Repository: PaddlePaddle/PaddleSpeech
Branch: develop
Commit: 3afe871a8768
Files: 3420
Total size: 15.9 MB

Directory structure:
gitextract_h_rw1s1r/

├── .clang-format
├── .flake8
├── .gitconfig
├── .github/
│   ├── CODE_OF_CONDUCT.md
│   ├── CONTRIBUTING.md
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report-s2t.md
│   │   ├── bug-report-tts.md
│   │   ├── feature-request.md
│   │   ├── others.md
│   │   └── question.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── stale.yml
├── .gitignore
├── .mergify.yml
├── .pre-commit-config.yaml
├── .pre-commit-hooks/
│   ├── clang-format.hook
│   └── copyright-check.hook
├── .readthedocs.yml
├── .style.yapf
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── README_cn.md
├── audio/
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── cmake/
│   │   ├── FindGFortranLibs.cmake
│   │   ├── external/
│   │   │   └── openblas.cmake
│   │   ├── pybind.cmake
│   │   └── summary.cmake
│   ├── paddleaudio/
│   │   ├── CMakeLists.txt
│   │   ├── __init__.py
│   │   ├── _extension.py
│   │   ├── _internal/
│   │   │   ├── __init__.py
│   │   │   └── module_utils.py
│   │   ├── backends/
│   │   │   ├── __init__.py
│   │   │   ├── common.py
│   │   │   ├── no_backend.py
│   │   │   ├── soundfile_backend.py
│   │   │   ├── sox_io_backend.py
│   │   │   └── utils.py
│   │   ├── compliance/
│   │   │   ├── __init__.py
│   │   │   ├── kaldi.py
│   │   │   └── librosa.py
│   │   ├── datasets/
│   │   │   ├── __init__.py
│   │   │   ├── dataset.py
│   │   │   ├── esc50.py
│   │   │   ├── gtzan.py
│   │   │   ├── hey_snips.py
│   │   │   ├── rirs_noises.py
│   │   │   ├── tess.py
│   │   │   ├── urban_sound.py
│   │   │   └── voxceleb.py
│   │   ├── features/
│   │   │   ├── __init__.py
│   │   │   └── layers.py
│   │   ├── functional/
│   │   │   ├── __init__.py
│   │   │   ├── functional.py
│   │   │   └── window.py
│   │   ├── kaldi/
│   │   │   ├── __init__.py
│   │   │   └── kaldi.py
│   │   ├── metric/
│   │   │   ├── __init__.py
│   │   │   └── eer.py
│   │   ├── sox_effects/
│   │   │   ├── __init__.py
│   │   │   └── sox_effects.py
│   │   ├── src/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── optional/
│   │   │   │   ├── COPYING
│   │   │   │   └── optional.hpp
│   │   │   ├── pybind/
│   │   │   │   ├── kaldi/
│   │   │   │   │   ├── feature_common.h
│   │   │   │   │   ├── feature_common_inl.h
│   │   │   │   │   ├── kaldi_feature.cc
│   │   │   │   │   ├── kaldi_feature.h
│   │   │   │   │   ├── kaldi_feature_wrapper.cc
│   │   │   │   │   └── kaldi_feature_wrapper.h
│   │   │   │   ├── pybind.cpp
│   │   │   │   └── sox/
│   │   │   │       ├── effects.cpp
│   │   │   │       ├── effects.h
│   │   │   │       ├── effects_chain.cpp
│   │   │   │       ├── effects_chain.h
│   │   │   │       ├── io.cpp
│   │   │   │       ├── io.h
│   │   │   │       ├── types.cpp
│   │   │   │       ├── types.h
│   │   │   │       ├── utils.cpp
│   │   │   │       └── utils.h
│   │   │   └── utils.cpp
│   │   ├── third_party/
│   │   │   ├── .gitignore
│   │   │   ├── CMakeLists.txt
│   │   │   ├── kaldi-native-fbank/
│   │   │   │   └── csrc/
│   │   │   │       ├── CMakeLists.txt
│   │   │   │       ├── feature-fbank.cc
│   │   │   │       ├── feature-fbank.h
│   │   │   │       ├── feature-functions.cc
│   │   │   │       ├── feature-functions.h
│   │   │   │       ├── feature-window.cc
│   │   │   │       ├── feature-window.h
│   │   │   │       ├── fftsg.c
│   │   │   │       ├── log.cc
│   │   │   │       ├── log.h
│   │   │   │       ├── mel-computations.cc
│   │   │   │       ├── mel-computations.h
│   │   │   │       ├── rfft.cc
│   │   │   │       └── rfft.h
│   │   │   ├── patches/
│   │   │   │   ├── config.guess
│   │   │   │   ├── config.sub
│   │   │   │   ├── libmad.patch
│   │   │   │   └── sox.patch
│   │   │   └── sox/
│   │   │       └── CMakeLists.txt
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── download.py
│   │       ├── env.py
│   │       ├── error.py
│   │       ├── log.py
│   │       ├── numeric.py
│   │       ├── sox_utils.py
│   │       ├── tensor_utils.py
│   │       └── time.py
│   ├── setup.py
│   ├── tests/
│   │   ├── backends/
│   │   │   ├── base.py
│   │   │   ├── common.py
│   │   │   ├── soundfile/
│   │   │   │   ├── base.py
│   │   │   │   ├── common.py
│   │   │   │   ├── info_test.py
│   │   │   │   ├── load_test.py
│   │   │   │   ├── save_test.py
│   │   │   │   └── test_io.py
│   │   │   └── sox_io/
│   │   │       ├── common.py
│   │   │       ├── info_test.py
│   │   │       ├── load_test.py
│   │   │       ├── save_test.py
│   │   │       ├── smoke_test.py
│   │   │       ├── sox_effect_test.py
│   │   │       └── sox_effect_test_args.jsonl
│   │   ├── benchmark/
│   │   │   ├── README.md
│   │   │   ├── log_melspectrogram.py
│   │   │   ├── melspectrogram.py
│   │   │   └── mfcc.py
│   │   ├── common_utils/
│   │   │   ├── __init__.py
│   │   │   ├── case_utils.py
│   │   │   ├── data_utils.py
│   │   │   ├── parameterized_utils.py
│   │   │   ├── sox_utils.py
│   │   │   └── wav_utils.py
│   │   └── features/
│   │       ├── __init__.py
│   │       ├── base.py
│   │       ├── test_istft.py
│   │       ├── test_kaldi.py
│   │       ├── test_kaldi_feat.py
│   │       ├── test_librosa.py
│   │       ├── test_log_melspectrogram.py
│   │       ├── test_spectrogram.py
│   │       ├── test_stft.py
│   │       └── testdata/
│   │           ├── fbank_feat.ark
│   │           ├── fbank_feat_txt.ark
│   │           ├── pitch_feat.ark
│   │           └── pitch_feat_txt.ark
│   └── tools/
│       └── setup_helpers/
│           ├── __init__.py
│           └── extension.py
├── dataset/
│   ├── aishell/
│   │   ├── .gitignore
│   │   └── aishell.py
│   ├── aishell3/
│   │   └── README.md
│   ├── chime3_background/
│   │   └── chime3_background.py
│   ├── gigaspeech/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── gigaspeech.py
│   │   └── run.sh
│   ├── librispeech/
│   │   ├── .gitignore
│   │   └── librispeech.py
│   ├── magicdata/
│   │   └── README.md
│   ├── mini_librispeech/
│   │   ├── .gitignore
│   │   └── mini_librispeech.py
│   ├── multi_cn/
│   │   └── README.md
│   ├── primewords/
│   │   └── README.md
│   ├── rir_noise/
│   │   ├── .gitignore
│   │   └── rir_noise.py
│   ├── st-cmds/
│   │   └── README.md
│   ├── tal_cs/
│   │   ├── README.md
│   │   └── tal_cs.py
│   ├── ted_en_zh/
│   │   ├── .gitignore
│   │   └── ted_en_zh.py
│   ├── thchs30/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   └── thchs30.py
│   ├── timit/
│   │   ├── .gitignore
│   │   ├── timit.py
│   │   └── timit_kaldi_standard_split.py
│   ├── voxceleb/
│   │   ├── README.md
│   │   ├── voxceleb1.py
│   │   └── voxceleb2.py
│   └── voxforge/
│       ├── run_data.sh
│       └── voxforge.py
├── demos/
│   ├── README.md
│   ├── README_cn.md
│   ├── TTSAndroid/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       ├── androidTest/
│   │   │       │   └── java/
│   │   │       │       └── com/
│   │   │       │           └── baidu/
│   │   │       │               └── paddle/
│   │   │       │                   └── lite/
│   │   │       │                       └── demo/
│   │   │       │                           └── tts/
│   │   │       │                               └── ExampleInstrumentedTest.java
│   │   │       ├── main/
│   │   │       │   ├── AndroidManifest.xml
│   │   │       │   ├── java/
│   │   │       │   │   └── com/
│   │   │       │   │       └── baidu/
│   │   │       │   │           └── paddle/
│   │   │       │   │               └── lite/
│   │   │       │   │                   └── demo/
│   │   │       │   │                       └── tts/
│   │   │       │   │                           ├── AppCompatPreferenceActivity.java
│   │   │       │   │                           ├── MainActivity.java
│   │   │       │   │                           ├── Predictor.java
│   │   │       │   │                           ├── SettingsActivity.java
│   │   │       │   │                           └── Utils.java
│   │   │       │   └── res/
│   │   │       │       ├── drawable/
│   │   │       │       │   └── button_drawable.xml
│   │   │       │       ├── layout/
│   │   │       │       │   └── activity_main.xml
│   │   │       │       ├── menu/
│   │   │       │       │   └── menu_action_options.xml
│   │   │       │       ├── values/
│   │   │       │       │   ├── arrays.xml
│   │   │       │       │   ├── colors.xml
│   │   │       │       │   ├── strings.xml
│   │   │       │       │   └── styles.xml
│   │   │       │       └── xml/
│   │   │       │           └── settings.xml
│   │   │       └── test/
│   │   │           └── java/
│   │   │               └── com/
│   │   │                   └── baidu/
│   │   │                       └── paddle/
│   │   │                           └── lite/
│   │   │                               └── demo/
│   │   │                                   └── tts/
│   │   │                                       └── ExampleUnitTest.java
│   │   ├── build.gradle
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       ├── gradle-wrapper.jar
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   └── settings.gradle
│   ├── TTSArmLinux/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── build.sh
│   │   ├── clean.sh
│   │   ├── config.sh
│   │   ├── download.sh
│   │   ├── front.conf
│   │   ├── run.sh
│   │   └── src/
│   │       ├── CMakeLists.txt
│   │       ├── Predictor.hpp
│   │       └── main.cc
│   ├── TTSCppFrontend/
│   │   ├── .gitignore
│   │   ├── CMakeLists.txt
│   │   ├── README.md
│   │   ├── build-depends.sh
│   │   ├── build.sh
│   │   ├── clean.sh
│   │   ├── download.sh
│   │   ├── front_demo/
│   │   │   ├── front.conf
│   │   │   ├── front_demo.cpp
│   │   │   └── gentools/
│   │   │       ├── gen_dict_paddlespeech.py
│   │   │       ├── genid.py
│   │   │       └── word2phones.py
│   │   ├── run_front_demo.sh
│   │   ├── src/
│   │   │   ├── base/
│   │   │   │   ├── type_conv.cpp
│   │   │   │   └── type_conv.h
│   │   │   └── front/
│   │   │       ├── front_interface.cpp
│   │   │       ├── front_interface.h
│   │   │       ├── text_normalize.cpp
│   │   │       └── text_normalize.h
│   │   └── third-party/
│   │       └── CMakeLists.txt
│   ├── asr_deployment/
│   │   ├── README.md
│   │   └── README_cn.md
│   ├── audio_content_search/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── acs_clinet.py
│   │   ├── conf/
│   │   │   ├── acs_application.yaml
│   │   │   ├── words.txt
│   │   │   ├── ws_conformer_application.yaml
│   │   │   └── ws_conformer_wenetspeech_application.yaml
│   │   ├── requirements.txt
│   │   ├── run.sh
│   │   └── streaming_asr_server.py
│   ├── audio_searching/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── docker-compose.yaml
│   │   ├── requirements.txt
│   │   └── src/
│   │       ├── audio_search.py
│   │       ├── config.py
│   │       ├── encode.py
│   │       ├── logs.py
│   │       ├── milvus_helpers.py
│   │       ├── mysql_helpers.py
│   │       ├── operations/
│   │       │   ├── __init__.py
│   │       │   ├── count.py
│   │       │   ├── drop.py
│   │       │   ├── load.py
│   │       │   └── search.py
│   │       ├── test_audio_search.py
│   │       ├── test_vpr_search.py
│   │       └── vpr_search.py
│   ├── audio_tagging/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── automatic_video_subtitiles/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── recognize.py
│   │   └── run.sh
│   ├── custom_streaming_asr/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── path.sh
│   │   ├── setup_docker.sh
│   │   ├── websocket_client.sh
│   │   └── websocket_server.sh
│   ├── keyword_spotting/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── metaverse/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── path.sh
│   │   ├── run.sh
│   │   └── sentences.txt
│   ├── punctuation_restoration/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── speaker_verification/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── speech_recognition/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── speech_server/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── asr_client.sh
│   │   ├── cls_client.sh
│   │   ├── conf/
│   │   │   ├── application.yaml
│   │   │   └── conformer_talcs_application.yaml
│   │   ├── server.sh
│   │   ├── sid_client.sh
│   │   ├── start_multi_progress_server.py
│   │   ├── text_client.sh
│   │   └── tts_client.sh
│   ├── speech_ssl/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── speech_translation/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   ├── speech_web/
│   │   ├── .gitignore
│   │   ├── API.md
│   │   ├── README.md
│   │   ├── speech_server/
│   │   │   ├── conf/
│   │   │   │   ├── tts3_finetune.yaml
│   │   │   │   ├── tts_online_application.yaml
│   │   │   │   └── ws_conformer_wenetspeech_application_faster.yaml
│   │   │   ├── main.py
│   │   │   ├── requirements.txt
│   │   │   ├── src/
│   │   │   │   ├── AudioManeger.py
│   │   │   │   ├── SpeechBase/
│   │   │   │   │   ├── asr.py
│   │   │   │   │   ├── nlp.py
│   │   │   │   │   ├── sql_helper.py
│   │   │   │   │   ├── tts.py
│   │   │   │   │   ├── vpr.py
│   │   │   │   │   └── vpr_encode.py
│   │   │   │   ├── WebsocketManeger.py
│   │   │   │   ├── ernie_sat.py
│   │   │   │   ├── finetune.py
│   │   │   │   ├── ge2e_clone.py
│   │   │   │   ├── robot.py
│   │   │   │   ├── tdnn_clone.py
│   │   │   │   └── util.py
│   │   │   └── vc.py
│   │   └── web_client/
│   │       ├── .gitignore
│   │       ├── index.html
│   │       ├── package.json
│   │       ├── src/
│   │       │   ├── App.vue
│   │       │   ├── api/
│   │       │   │   ├── API.js
│   │       │   │   ├── ApiASR.js
│   │       │   │   ├── ApiNLP.js
│   │       │   │   ├── ApiTTS.js
│   │       │   │   ├── ApiVC.js
│   │       │   │   └── ApiVPR.js
│   │       │   ├── components/
│   │       │   │   ├── Content/
│   │       │   │   │   ├── Header/
│   │       │   │   │   │   ├── Header.vue
│   │       │   │   │   │   └── style.less
│   │       │   │   │   └── Tail/
│   │       │   │   │       ├── Tail.vue
│   │       │   │   │       └── style.less
│   │       │   │   ├── Experience.vue
│   │       │   │   ├── SubMenu/
│   │       │   │   │   ├── ASR/
│   │       │   │   │   │   ├── ASR.vue
│   │       │   │   │   │   ├── ASRT.vue
│   │       │   │   │   │   ├── AudioFile/
│   │       │   │   │   │   │   ├── AudioFileIdentification.vue
│   │       │   │   │   │   │   └── style.less
│   │       │   │   │   │   ├── EndToEnd/
│   │       │   │   │   │   │   ├── EndToEndIdentification.vue
│   │       │   │   │   │   │   └── style.less
│   │       │   │   │   │   ├── RealTime/
│   │       │   │   │   │   │   ├── RealTime.vue
│   │       │   │   │   │   │   └── style.less
│   │       │   │   │   │   └── style.less
│   │       │   │   │   ├── ChatBot/
│   │       │   │   │   │   ├── ChatT.vue
│   │       │   │   │   │   └── style.less
│   │       │   │   │   ├── ERNIE_SAT/
│   │       │   │   │   │   └── ERNIE_SAT.vue
│   │       │   │   │   ├── FineTune/
│   │       │   │   │   │   └── FineTune.vue
│   │       │   │   │   ├── IE/
│   │       │   │   │   │   ├── IET.vue
│   │       │   │   │   │   └── style.less
│   │       │   │   │   ├── TTS/
│   │       │   │   │   │   ├── TTST.vue
│   │       │   │   │   │   └── style.less
│   │       │   │   │   ├── VPR/
│   │       │   │   │   │   ├── VPRT.vue
│   │       │   │   │   │   └── style.less
│   │       │   │   │   └── VoiceClone/
│   │       │   │   │       └── VoiceClone.vue
│   │       │   │   └── style.less
│   │       │   └── main.js
│   │       └── vite.config.js
│   ├── story_talker/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── ocr.py
│   │   ├── path.sh
│   │   └── run.sh
│   ├── streaming_asr_server/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── conf/
│   │   │   ├── application.yaml
│   │   │   ├── punc_application.yaml
│   │   │   ├── ws_conformer_application.yaml
│   │   │   ├── ws_conformer_talcs_application.yaml
│   │   │   ├── ws_conformer_wenetspeech_application.yaml
│   │   │   ├── ws_conformer_wenetspeech_application_faster.yaml
│   │   │   └── ws_ds2_application.yaml
│   │   ├── local/
│   │   │   ├── punc_server.py
│   │   │   ├── rtf_from_log.py
│   │   │   ├── streaming_asr_server.py
│   │   │   ├── test.sh
│   │   │   ├── websocket_client.py
│   │   │   └── websocket_client_srt.py
│   │   ├── run.sh
│   │   ├── server.sh
│   │   ├── test.sh
│   │   └── web/
│   │       ├── index.html
│   │       └── readme.md
│   ├── streaming_tts_server/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── client.sh
│   │   ├── conf/
│   │   │   ├── tts_online_application.yaml
│   │   │   └── tts_online_ws_application.yaml
│   │   └── server.sh
│   ├── streaming_tts_serving_fastdeploy/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── streaming_tts_serving/
│   │       ├── 1/
│   │       │   └── model.py
│   │       ├── config.pbtxt
│   │       └── stream_client.py
│   ├── style_fs2/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── path.sh
│   │   ├── run.sh
│   │   ├── sentences.txt
│   │   └── style_syn.py
│   ├── text_to_speech/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   └── run.sh
│   └── whisper/
│       ├── README.md
│       ├── README_cn.md
│       └── run.sh
├── docker/
│   ├── ubuntu16-gpu/
│   │   └── Dockerfile
│   ├── ubuntu18-cpu/
│   │   └── Dockerfile
│   └── ubuntu20-cpu/
│       └── Dockerfile
├── docs/
│   ├── Makefile
│   ├── requirements.txt
│   ├── source/
│   │   ├── _static/
│   │   │   └── custom.css
│   │   ├── api/
│   │   │   ├── modules.rst
│   │   │   ├── paddlespeech.audio.features.layers.rst
│   │   │   ├── paddlespeech.audio.features.rst
│   │   │   ├── paddlespeech.audio.io.rst
│   │   │   ├── paddlespeech.audio.rst
│   │   │   ├── paddlespeech.audio.streamdata.autodecode.rst
│   │   │   ├── paddlespeech.audio.streamdata.cache.rst
│   │   │   ├── paddlespeech.audio.streamdata.compat.rst
│   │   │   ├── paddlespeech.audio.streamdata.extradatasets.rst
│   │   │   ├── paddlespeech.audio.streamdata.filters.rst
│   │   │   ├── paddlespeech.audio.streamdata.gopen.rst
│   │   │   ├── paddlespeech.audio.streamdata.handlers.rst
│   │   │   ├── paddlespeech.audio.streamdata.mix.rst
│   │   │   ├── paddlespeech.audio.streamdata.paddle_utils.rst
│   │   │   ├── paddlespeech.audio.streamdata.pipeline.rst
│   │   │   ├── paddlespeech.audio.streamdata.rst
│   │   │   ├── paddlespeech.audio.streamdata.shardlists.rst
│   │   │   ├── paddlespeech.audio.streamdata.tariterators.rst
│   │   │   ├── paddlespeech.audio.streamdata.utils.rst
│   │   │   ├── paddlespeech.audio.streamdata.writer.rst
│   │   │   ├── paddlespeech.audio.text.rst
│   │   │   ├── paddlespeech.audio.text.text_featurizer.rst
│   │   │   ├── paddlespeech.audio.text.utility.rst
│   │   │   ├── paddlespeech.audio.transform.add_deltas.rst
│   │   │   ├── paddlespeech.audio.transform.channel_selector.rst
│   │   │   ├── paddlespeech.audio.transform.cmvn.rst
│   │   │   ├── paddlespeech.audio.transform.functional.rst
│   │   │   ├── paddlespeech.audio.transform.perturb.rst
│   │   │   ├── paddlespeech.audio.transform.rst
│   │   │   ├── paddlespeech.audio.transform.spec_augment.rst
│   │   │   ├── paddlespeech.audio.transform.spectrogram.rst
│   │   │   ├── paddlespeech.audio.transform.transform_interface.rst
│   │   │   ├── paddlespeech.audio.transform.transformation.rst
│   │   │   ├── paddlespeech.audio.transform.wpe.rst
│   │   │   ├── paddlespeech.audio.utils.check_kwargs.rst
│   │   │   ├── paddlespeech.audio.utils.download.rst
│   │   │   ├── paddlespeech.audio.utils.dynamic_import.rst
│   │   │   ├── paddlespeech.audio.utils.error.rst
│   │   │   ├── paddlespeech.audio.utils.log.rst
│   │   │   ├── paddlespeech.audio.utils.numeric.rst
│   │   │   ├── paddlespeech.audio.utils.rst
│   │   │   ├── paddlespeech.audio.utils.tensor_utils.rst
│   │   │   ├── paddlespeech.audio.utils.time.rst
│   │   │   ├── paddlespeech.cli.asr.infer.rst
│   │   │   ├── paddlespeech.cli.asr.rst
│   │   │   ├── paddlespeech.cli.base_commands.rst
│   │   │   ├── paddlespeech.cli.cls.infer.rst
│   │   │   ├── paddlespeech.cli.cls.rst
│   │   │   ├── paddlespeech.cli.download.rst
│   │   │   ├── paddlespeech.cli.entry.rst
│   │   │   ├── paddlespeech.cli.executor.rst
│   │   │   ├── paddlespeech.cli.kws.infer.rst
│   │   │   ├── paddlespeech.cli.kws.rst
│   │   │   ├── paddlespeech.cli.log.rst
│   │   │   ├── paddlespeech.cli.rst
│   │   │   ├── paddlespeech.cli.st.infer.rst
│   │   │   ├── paddlespeech.cli.st.rst
│   │   │   ├── paddlespeech.cli.text.infer.rst
│   │   │   ├── paddlespeech.cli.text.rst
│   │   │   ├── paddlespeech.cli.tts.infer.rst
│   │   │   ├── paddlespeech.cli.tts.rst
│   │   │   ├── paddlespeech.cli.utils.rst
│   │   │   ├── paddlespeech.cli.vector.infer.rst
│   │   │   ├── paddlespeech.cli.vector.rst
│   │   │   ├── paddlespeech.cls.exps.panns.deploy.rst
│   │   │   ├── paddlespeech.cls.exps.panns.rst
│   │   │   ├── paddlespeech.cls.exps.rst
│   │   │   ├── paddlespeech.cls.models.panns.classifier.rst
│   │   │   ├── paddlespeech.cls.models.panns.panns.rst
│   │   │   ├── paddlespeech.cls.models.panns.rst
│   │   │   ├── paddlespeech.cls.models.rst
│   │   │   ├── paddlespeech.cls.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.collate.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.compute_det.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.plot_det_curve.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.score.rst
│   │   │   ├── paddlespeech.kws.exps.mdtc.train.rst
│   │   │   ├── paddlespeech.kws.exps.rst
│   │   │   ├── paddlespeech.kws.models.loss.rst
│   │   │   ├── paddlespeech.kws.models.mdtc.rst
│   │   │   ├── paddlespeech.kws.models.rst
│   │   │   ├── paddlespeech.kws.rst
│   │   │   ├── paddlespeech.resource.model_alias.rst
│   │   │   ├── paddlespeech.resource.pretrained_models.rst
│   │   │   ├── paddlespeech.resource.resource.rst
│   │   │   ├── paddlespeech.resource.rst
│   │   │   ├── paddlespeech.rst
│   │   │   ├── paddlespeech.s2t.decoders.beam_search.batch_beam_search.rst
│   │   │   ├── paddlespeech.s2t.decoders.beam_search.beam_search.rst
│   │   │   ├── paddlespeech.s2t.decoders.beam_search.rst
│   │   │   ├── paddlespeech.s2t.decoders.ctcdecoder.decoders_deprecated.rst
│   │   │   ├── paddlespeech.s2t.decoders.ctcdecoder.rst
│   │   │   ├── paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper.rst
│   │   │   ├── paddlespeech.s2t.decoders.recog.rst
│   │   │   ├── paddlespeech.s2t.decoders.rst
│   │   │   ├── paddlespeech.s2t.decoders.scorers.ctc.rst
│   │   │   ├── paddlespeech.s2t.decoders.scorers.ctc_prefix_score.rst
│   │   │   ├── paddlespeech.s2t.decoders.scorers.length_bonus.rst
│   │   │   ├── paddlespeech.s2t.decoders.scorers.rst
│   │   │   ├── paddlespeech.s2t.decoders.scorers.scorer_interface.rst
│   │   │   ├── paddlespeech.s2t.decoders.utils.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.deploy.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.deploy.runtime.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.deploy.server.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.export.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.test.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.test_export.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.test_wav.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.bin.train.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.model.rst
│   │   │   ├── paddlespeech.s2t.exps.deepspeech2.rst
│   │   │   ├── paddlespeech.s2t.exps.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.alignment.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.export.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.test.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.test_wav.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.bin.train.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.model.rst
│   │   │   ├── paddlespeech.s2t.exps.u2.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_kaldi.bin.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_kaldi.bin.test.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_kaldi.bin.train.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_kaldi.model.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_kaldi.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.bin.export.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.bin.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.bin.test.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.bin.train.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.model.rst
│   │   │   ├── paddlespeech.s2t.exps.u2_st.rst
│   │   │   ├── paddlespeech.s2t.frontend.audio.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.augmentation.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.base.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.impulse_response.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.noise_perturb.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.online_bayesian_normalization.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.resample.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.shift_perturb.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.spec_augment.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.speed_perturb.rst
│   │   │   ├── paddlespeech.s2t.frontend.augmentor.volume_perturb.rst
│   │   │   ├── paddlespeech.s2t.frontend.featurizer.audio_featurizer.rst
│   │   │   ├── paddlespeech.s2t.frontend.featurizer.rst
│   │   │   ├── paddlespeech.s2t.frontend.featurizer.speech_featurizer.rst
│   │   │   ├── paddlespeech.s2t.frontend.featurizer.text_featurizer.rst
│   │   │   ├── paddlespeech.s2t.frontend.normalizer.rst
│   │   │   ├── paddlespeech.s2t.frontend.rst
│   │   │   ├── paddlespeech.s2t.frontend.speech.rst
│   │   │   ├── paddlespeech.s2t.frontend.utility.rst
│   │   │   ├── paddlespeech.s2t.io.batchfy.rst
│   │   │   ├── paddlespeech.s2t.io.collator.rst
│   │   │   ├── paddlespeech.s2t.io.converter.rst
│   │   │   ├── paddlespeech.s2t.io.dataloader.rst
│   │   │   ├── paddlespeech.s2t.io.dataset.rst
│   │   │   ├── paddlespeech.s2t.io.reader.rst
│   │   │   ├── paddlespeech.s2t.io.rst
│   │   │   ├── paddlespeech.s2t.io.sampler.rst
│   │   │   ├── paddlespeech.s2t.io.utility.rst
│   │   │   ├── paddlespeech.s2t.models.asr_interface.rst
│   │   │   ├── paddlespeech.s2t.models.ds2.conv.rst
│   │   │   ├── paddlespeech.s2t.models.ds2.deepspeech2.rst
│   │   │   ├── paddlespeech.s2t.models.ds2.rst
│   │   │   ├── paddlespeech.s2t.models.lm.dataset.rst
│   │   │   ├── paddlespeech.s2t.models.lm.rst
│   │   │   ├── paddlespeech.s2t.models.lm.transformer.rst
│   │   │   ├── paddlespeech.s2t.models.lm_interface.rst
│   │   │   ├── paddlespeech.s2t.models.rst
│   │   │   ├── paddlespeech.s2t.models.st_interface.rst
│   │   │   ├── paddlespeech.s2t.models.u2.rst
│   │   │   ├── paddlespeech.s2t.models.u2.u2.rst
│   │   │   ├── paddlespeech.s2t.models.u2.updater.rst
│   │   │   ├── paddlespeech.s2t.models.u2_st.rst
│   │   │   ├── paddlespeech.s2t.models.u2_st.u2_st.rst
│   │   │   ├── paddlespeech.s2t.modules.activation.rst
│   │   │   ├── paddlespeech.s2t.modules.align.rst
│   │   │   ├── paddlespeech.s2t.modules.attention.rst
│   │   │   ├── paddlespeech.s2t.modules.cmvn.rst
│   │   │   ├── paddlespeech.s2t.modules.conformer_convolution.rst
│   │   │   ├── paddlespeech.s2t.modules.crf.rst
│   │   │   ├── paddlespeech.s2t.modules.ctc.rst
│   │   │   ├── paddlespeech.s2t.modules.decoder.rst
│   │   │   ├── paddlespeech.s2t.modules.decoder_layer.rst
│   │   │   ├── paddlespeech.s2t.modules.embedding.rst
│   │   │   ├── paddlespeech.s2t.modules.encoder.rst
│   │   │   ├── paddlespeech.s2t.modules.encoder_layer.rst
│   │   │   ├── paddlespeech.s2t.modules.initializer.rst
│   │   │   ├── paddlespeech.s2t.modules.loss.rst
│   │   │   ├── paddlespeech.s2t.modules.mask.rst
│   │   │   ├── paddlespeech.s2t.modules.positionwise_feed_forward.rst
│   │   │   ├── paddlespeech.s2t.modules.rst
│   │   │   ├── paddlespeech.s2t.modules.subsampling.rst
│   │   │   ├── paddlespeech.s2t.rst
│   │   │   ├── paddlespeech.s2t.training.cli.rst
│   │   │   ├── paddlespeech.s2t.training.extensions.evaluator.rst
│   │   │   ├── paddlespeech.s2t.training.extensions.extension.rst
│   │   │   ├── paddlespeech.s2t.training.extensions.plot.rst
│   │   │   ├── paddlespeech.s2t.training.extensions.rst
│   │   │   ├── paddlespeech.s2t.training.gradclip.rst
│   │   │   ├── paddlespeech.s2t.training.optimizer.rst
│   │   │   ├── paddlespeech.s2t.training.reporter.rst
│   │   │   ├── paddlespeech.s2t.training.rst
│   │   │   ├── paddlespeech.s2t.training.scheduler.rst
│   │   │   ├── paddlespeech.s2t.training.timer.rst
│   │   │   ├── paddlespeech.s2t.training.trainer.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.compare_value_trigger.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.interval_trigger.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.limit_trigger.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.time_trigger.rst
│   │   │   ├── paddlespeech.s2t.training.triggers.utils.rst
│   │   │   ├── paddlespeech.s2t.training.updaters.rst
│   │   │   ├── paddlespeech.s2t.training.updaters.standard_updater.rst
│   │   │   ├── paddlespeech.s2t.training.updaters.updater.rst
│   │   │   ├── paddlespeech.s2t.utils.asr_utils.rst
│   │   │   ├── paddlespeech.s2t.utils.bleu_score.rst
│   │   │   ├── paddlespeech.s2t.utils.check_kwargs.rst
│   │   │   ├── paddlespeech.s2t.utils.checkpoint.rst
│   │   │   ├── paddlespeech.s2t.utils.cli_readers.rst
│   │   │   ├── paddlespeech.s2t.utils.cli_utils.rst
│   │   │   ├── paddlespeech.s2t.utils.cli_writers.rst
│   │   │   ├── paddlespeech.s2t.utils.ctc_utils.rst
│   │   │   ├── paddlespeech.s2t.utils.dynamic_import.rst
│   │   │   ├── paddlespeech.s2t.utils.dynamic_pip_install.rst
│   │   │   ├── paddlespeech.s2t.utils.error_rate.rst
│   │   │   ├── paddlespeech.s2t.utils.layer_tools.rst
│   │   │   ├── paddlespeech.s2t.utils.log.rst
│   │   │   ├── paddlespeech.s2t.utils.mp_tools.rst
│   │   │   ├── paddlespeech.s2t.utils.profiler.rst
│   │   │   ├── paddlespeech.s2t.utils.rst
│   │   │   ├── paddlespeech.s2t.utils.socket_server.rst
│   │   │   ├── paddlespeech.s2t.utils.spec_augment.rst
│   │   │   ├── paddlespeech.s2t.utils.tensor_utils.rst
│   │   │   ├── paddlespeech.s2t.utils.text_grid.rst
│   │   │   ├── paddlespeech.s2t.utils.utility.rst
│   │   │   ├── paddlespeech.server.base_commands.rst
│   │   │   ├── paddlespeech.server.bin.paddlespeech_client.rst
│   │   │   ├── paddlespeech.server.bin.paddlespeech_server.rst
│   │   │   ├── paddlespeech.server.bin.rst
│   │   │   ├── paddlespeech.server.engine.acs.python.rst
│   │   │   ├── paddlespeech.server.engine.acs.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.ctc_endpoint.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.ctc_search.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.onnx.asr_engine.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.onnx.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.paddleinference.asr_engine.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.paddleinference.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.python.asr_engine.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.python.rst
│   │   │   ├── paddlespeech.server.engine.asr.online.rst
│   │   │   ├── paddlespeech.server.engine.asr.paddleinference.asr_engine.rst
│   │   │   ├── paddlespeech.server.engine.asr.paddleinference.rst
│   │   │   ├── paddlespeech.server.engine.asr.python.asr_engine.rst
│   │   │   ├── paddlespeech.server.engine.asr.python.rst
│   │   │   ├── paddlespeech.server.engine.asr.rst
│   │   │   ├── paddlespeech.server.engine.base_engine.rst
│   │   │   ├── paddlespeech.server.engine.cls.paddleinference.cls_engine.rst
│   │   │   ├── paddlespeech.server.engine.cls.paddleinference.rst
│   │   │   ├── paddlespeech.server.engine.cls.python.cls_engine.rst
│   │   │   ├── paddlespeech.server.engine.cls.python.rst
│   │   │   ├── paddlespeech.server.engine.cls.rst
│   │   │   ├── paddlespeech.server.engine.engine_factory.rst
│   │   │   ├── paddlespeech.server.engine.engine_pool.rst
│   │   │   ├── paddlespeech.server.engine.engine_warmup.rst
│   │   │   ├── paddlespeech.server.engine.rst
│   │   │   ├── paddlespeech.server.engine.text.python.rst
│   │   │   ├── paddlespeech.server.engine.text.python.text_engine.rst
│   │   │   ├── paddlespeech.server.engine.text.rst
│   │   │   ├── paddlespeech.server.engine.tts.online.onnx.rst
│   │   │   ├── paddlespeech.server.engine.tts.online.onnx.tts_engine.rst
│   │   │   ├── paddlespeech.server.engine.tts.online.python.rst
│   │   │   ├── paddlespeech.server.engine.tts.online.python.tts_engine.rst
│   │   │   ├── paddlespeech.server.engine.tts.online.rst
│   │   │   ├── paddlespeech.server.engine.tts.paddleinference.rst
│   │   │   ├── paddlespeech.server.engine.tts.paddleinference.tts_engine.rst
│   │   │   ├── paddlespeech.server.engine.tts.python.rst
│   │   │   ├── paddlespeech.server.engine.tts.python.tts_engine.rst
│   │   │   ├── paddlespeech.server.engine.tts.rst
│   │   │   ├── paddlespeech.server.engine.vector.python.rst
│   │   │   ├── paddlespeech.server.engine.vector.python.vector_engine.rst
│   │   │   ├── paddlespeech.server.engine.vector.rst
│   │   │   ├── paddlespeech.server.entry.rst
│   │   │   ├── paddlespeech.server.executor.rst
│   │   │   ├── paddlespeech.server.restful.acs_api.rst
│   │   │   ├── paddlespeech.server.restful.api.rst
│   │   │   ├── paddlespeech.server.restful.asr_api.rst
│   │   │   ├── paddlespeech.server.restful.cls_api.rst
│   │   │   ├── paddlespeech.server.restful.request.rst
│   │   │   ├── paddlespeech.server.restful.response.rst
│   │   │   ├── paddlespeech.server.restful.rst
│   │   │   ├── paddlespeech.server.restful.text_api.rst
│   │   │   ├── paddlespeech.server.restful.tts_api.rst
│   │   │   ├── paddlespeech.server.restful.vector_api.rst
│   │   │   ├── paddlespeech.server.rst
│   │   │   ├── paddlespeech.server.tests.asr.offline.http_client.rst
│   │   │   ├── paddlespeech.server.tests.asr.offline.rst
│   │   │   ├── paddlespeech.server.tests.asr.rst
│   │   │   ├── paddlespeech.server.tests.rst
│   │   │   ├── paddlespeech.server.util.rst
│   │   │   ├── paddlespeech.server.utils.audio_handler.rst
│   │   │   ├── paddlespeech.server.utils.audio_process.rst
│   │   │   ├── paddlespeech.server.utils.buffer.rst
│   │   │   ├── paddlespeech.server.utils.config.rst
│   │   │   ├── paddlespeech.server.utils.errors.rst
│   │   │   ├── paddlespeech.server.utils.exception.rst
│   │   │   ├── paddlespeech.server.utils.onnx_infer.rst
│   │   │   ├── paddlespeech.server.utils.paddle_predictor.rst
│   │   │   ├── paddlespeech.server.utils.rst
│   │   │   ├── paddlespeech.server.utils.util.rst
│   │   │   ├── paddlespeech.server.utils.vad.rst
│   │   │   ├── paddlespeech.server.ws.api.rst
│   │   │   ├── paddlespeech.server.ws.asr_api.rst
│   │   │   ├── paddlespeech.server.ws.rst
│   │   │   ├── paddlespeech.server.ws.tts_api.rst
│   │   │   ├── paddlespeech.t2s.audio.audio.rst
│   │   │   ├── paddlespeech.t2s.audio.codec.rst
│   │   │   ├── paddlespeech.t2s.audio.rst
│   │   │   ├── paddlespeech.t2s.audio.spec_normalizer.rst
│   │   │   ├── paddlespeech.t2s.datasets.am_batch_fn.rst
│   │   │   ├── paddlespeech.t2s.datasets.batch.rst
│   │   │   ├── paddlespeech.t2s.datasets.data_table.rst
│   │   │   ├── paddlespeech.t2s.datasets.dataset.rst
│   │   │   ├── paddlespeech.t2s.datasets.get_feats.rst
│   │   │   ├── paddlespeech.t2s.datasets.ljspeech.rst
│   │   │   ├── paddlespeech.t2s.datasets.preprocess_utils.rst
│   │   │   ├── paddlespeech.t2s.datasets.rst
│   │   │   ├── paddlespeech.t2s.datasets.sampler.rst
│   │   │   ├── paddlespeech.t2s.datasets.vocoder_batch_fn.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.align.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.synthesize_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.train.rst
│   │   │   ├── paddlespeech.t2s.exps.ernie_sat.utils.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.gen_gta_mel.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.train.rst
│   │   │   ├── paddlespeech.t2s.exps.fastspeech2.vc2_infer.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.hifigan.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.hifigan.train.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan.train.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.synthesize_from_wav.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.train.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.style_melgan.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.style_melgan.train.rst
│   │   │   ├── paddlespeech.t2s.exps.gan_vocoder.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.inference.rst
│   │   │   ├── paddlespeech.t2s.exps.inference_streaming.rst
│   │   │   ├── paddlespeech.t2s.exps.ort_predict.rst
│   │   │   ├── paddlespeech.t2s.exps.ort_predict_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.ort_predict_streaming.rst
│   │   │   ├── paddlespeech.t2s.exps.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.gen_gta_mel.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.inference.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.synthesize_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.speedyspeech.train.rst
│   │   │   ├── paddlespeech.t2s.exps.stream_play_tts.rst
│   │   │   ├── paddlespeech.t2s.exps.syn_utils.rst
│   │   │   ├── paddlespeech.t2s.exps.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.synthesize_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.synthesize_streaming.rst
│   │   │   ├── paddlespeech.t2s.exps.tacotron2.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.tacotron2.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.tacotron2.rst
│   │   │   ├── paddlespeech.t2s.exps.tacotron2.train.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.synthesize_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.transformer_tts.train.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.normalize.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.synthesize_e2e.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.train.rst
│   │   │   ├── paddlespeech.t2s.exps.vits.voice_cloning.rst
│   │   │   ├── paddlespeech.t2s.exps.voice_cloning.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.config.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.ljspeech.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.preprocess.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.waveflow.train.rst
│   │   │   ├── paddlespeech.t2s.exps.wavernn.rst
│   │   │   ├── paddlespeech.t2s.exps.wavernn.synthesize.rst
│   │   │   ├── paddlespeech.t2s.exps.wavernn.train.rst
│   │   │   ├── paddlespeech.t2s.frontend.arpabet.rst
│   │   │   ├── paddlespeech.t2s.frontend.g2pw.dataset.rst
│   │   │   ├── paddlespeech.t2s.frontend.g2pw.onnx_api.rst
│   │   │   ├── paddlespeech.t2s.frontend.g2pw.rst
│   │   │   ├── paddlespeech.t2s.frontend.g2pw.utils.rst
│   │   │   ├── paddlespeech.t2s.frontend.generate_lexicon.rst
│   │   │   ├── paddlespeech.t2s.frontend.mix_frontend.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.abbrrviation.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.acronyms.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.normalizer.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.numbers.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.rst
│   │   │   ├── paddlespeech.t2s.frontend.normalizer.width.rst
│   │   │   ├── paddlespeech.t2s.frontend.phonectic.rst
│   │   │   ├── paddlespeech.t2s.frontend.punctuation.rst
│   │   │   ├── paddlespeech.t2s.frontend.rst
│   │   │   ├── paddlespeech.t2s.frontend.tone_sandhi.rst
│   │   │   ├── paddlespeech.t2s.frontend.vocab.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_frontend.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.char_convert.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.chronology.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.constants.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.num.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.phonecode.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.quantifier.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.rst
│   │   │   ├── paddlespeech.t2s.frontend.zh_normalization.text_normlization.rst
│   │   │   ├── paddlespeech.t2s.models.ernie_sat.ernie_sat.rst
│   │   │   ├── paddlespeech.t2s.models.ernie_sat.ernie_sat_updater.rst
│   │   │   ├── paddlespeech.t2s.models.ernie_sat.rst
│   │   │   ├── paddlespeech.t2s.models.fastspeech2.fastspeech2.rst
│   │   │   ├── paddlespeech.t2s.models.fastspeech2.fastspeech2_updater.rst
│   │   │   ├── paddlespeech.t2s.models.fastspeech2.rst
│   │   │   ├── paddlespeech.t2s.models.hifigan.hifigan.rst
│   │   │   ├── paddlespeech.t2s.models.hifigan.hifigan_updater.rst
│   │   │   ├── paddlespeech.t2s.models.hifigan.rst
│   │   │   ├── paddlespeech.t2s.models.melgan.melgan.rst
│   │   │   ├── paddlespeech.t2s.models.melgan.multi_band_melgan_updater.rst
│   │   │   ├── paddlespeech.t2s.models.melgan.rst
│   │   │   ├── paddlespeech.t2s.models.melgan.style_melgan.rst
│   │   │   ├── paddlespeech.t2s.models.melgan.style_melgan_updater.rst
│   │   │   ├── paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan.rst
│   │   │   ├── paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan_updater.rst
│   │   │   ├── paddlespeech.t2s.models.parallel_wavegan.rst
│   │   │   ├── paddlespeech.t2s.models.rst
│   │   │   ├── paddlespeech.t2s.models.speedyspeech.rst
│   │   │   ├── paddlespeech.t2s.models.speedyspeech.speedyspeech.rst
│   │   │   ├── paddlespeech.t2s.models.speedyspeech.speedyspeech_updater.rst
│   │   │   ├── paddlespeech.t2s.models.tacotron2.rst
│   │   │   ├── paddlespeech.t2s.models.tacotron2.tacotron2.rst
│   │   │   ├── paddlespeech.t2s.models.tacotron2.tacotron2_updater.rst
│   │   │   ├── paddlespeech.t2s.models.transformer_tts.rst
│   │   │   ├── paddlespeech.t2s.models.transformer_tts.transformer_tts.rst
│   │   │   ├── paddlespeech.t2s.models.transformer_tts.transformer_tts_updater.rst
│   │   │   ├── paddlespeech.t2s.models.vits.duration_predictor.rst
│   │   │   ├── paddlespeech.t2s.models.vits.flow.rst
│   │   │   ├── paddlespeech.t2s.models.vits.generator.rst
│   │   │   ├── paddlespeech.t2s.models.vits.monotonic_align.core.rst
│   │   │   ├── paddlespeech.t2s.models.vits.monotonic_align.rst
│   │   │   ├── paddlespeech.t2s.models.vits.monotonic_align.setup.rst
│   │   │   ├── paddlespeech.t2s.models.vits.posterior_encoder.rst
│   │   │   ├── paddlespeech.t2s.models.vits.residual_coupling.rst
│   │   │   ├── paddlespeech.t2s.models.vits.rst
│   │   │   ├── paddlespeech.t2s.models.vits.text_encoder.rst
│   │   │   ├── paddlespeech.t2s.models.vits.transform.rst
│   │   │   ├── paddlespeech.t2s.models.vits.vits.rst
│   │   │   ├── paddlespeech.t2s.models.vits.vits_updater.rst
│   │   │   ├── paddlespeech.t2s.models.vits.wavenet.residual_block.rst
│   │   │   ├── paddlespeech.t2s.models.vits.wavenet.rst
│   │   │   ├── paddlespeech.t2s.models.vits.wavenet.wavenet.rst
│   │   │   ├── paddlespeech.t2s.models.waveflow.rst
│   │   │   ├── paddlespeech.t2s.models.wavernn.rst
│   │   │   ├── paddlespeech.t2s.models.wavernn.wavernn.rst
│   │   │   ├── paddlespeech.t2s.models.wavernn.wavernn_updater.rst
│   │   │   ├── paddlespeech.t2s.modules.activation.rst
│   │   │   ├── paddlespeech.t2s.modules.causal_conv.rst
│   │   │   ├── paddlespeech.t2s.modules.conformer.convolution.rst
│   │   │   ├── paddlespeech.t2s.modules.conformer.encoder_layer.rst
│   │   │   ├── paddlespeech.t2s.modules.conformer.rst
│   │   │   ├── paddlespeech.t2s.modules.conv.rst
│   │   │   ├── paddlespeech.t2s.modules.geometry.rst
│   │   │   ├── paddlespeech.t2s.modules.layer_norm.rst
│   │   │   ├── paddlespeech.t2s.modules.losses.rst
│   │   │   ├── paddlespeech.t2s.modules.masked_fill.rst
│   │   │   ├── paddlespeech.t2s.modules.nets_utils.rst
│   │   │   ├── paddlespeech.t2s.modules.normalizer.rst
│   │   │   ├── paddlespeech.t2s.modules.positional_encoding.rst
│   │   │   ├── paddlespeech.t2s.modules.pqmf.rst
│   │   │   ├── paddlespeech.t2s.modules.predictor.duration_predictor.rst
│   │   │   ├── paddlespeech.t2s.modules.predictor.length_regulator.rst
│   │   │   ├── paddlespeech.t2s.modules.predictor.rst
│   │   │   ├── paddlespeech.t2s.modules.predictor.variance_predictor.rst
│   │   │   ├── paddlespeech.t2s.modules.residual_block.rst
│   │   │   ├── paddlespeech.t2s.modules.residual_stack.rst
│   │   │   ├── paddlespeech.t2s.modules.rst
│   │   │   ├── paddlespeech.t2s.modules.style_encoder.rst
│   │   │   ├── paddlespeech.t2s.modules.tacotron2.attentions.rst
│   │   │   ├── paddlespeech.t2s.modules.tacotron2.decoder.rst
│   │   │   ├── paddlespeech.t2s.modules.tacotron2.encoder.rst
│   │   │   ├── paddlespeech.t2s.modules.tacotron2.rst
│   │   │   ├── paddlespeech.t2s.modules.tade_res_block.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.attention.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.decoder.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.decoder_layer.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.embedding.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.encoder.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.encoder_layer.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.lightconv.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.mask.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.multi_layer_conv.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.positionwise_feed_forward.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.repeat.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.rst
│   │   │   ├── paddlespeech.t2s.modules.transformer.subsampling.rst
│   │   │   ├── paddlespeech.t2s.modules.upsample.rst
│   │   │   ├── paddlespeech.t2s.rst
│   │   │   ├── paddlespeech.t2s.training.cli.rst
│   │   │   ├── paddlespeech.t2s.training.default_config.rst
│   │   │   ├── paddlespeech.t2s.training.experiment.rst
│   │   │   ├── paddlespeech.t2s.training.extension.rst
│   │   │   ├── paddlespeech.t2s.training.extensions.evaluator.rst
│   │   │   ├── paddlespeech.t2s.training.extensions.rst
│   │   │   ├── paddlespeech.t2s.training.extensions.snapshot.rst
│   │   │   ├── paddlespeech.t2s.training.extensions.visualizer.rst
│   │   │   ├── paddlespeech.t2s.training.optimizer.rst
│   │   │   ├── paddlespeech.t2s.training.reporter.rst
│   │   │   ├── paddlespeech.t2s.training.rst
│   │   │   ├── paddlespeech.t2s.training.seeding.rst
│   │   │   ├── paddlespeech.t2s.training.trainer.rst
│   │   │   ├── paddlespeech.t2s.training.trigger.rst
│   │   │   ├── paddlespeech.t2s.training.triggers.interval_trigger.rst
│   │   │   ├── paddlespeech.t2s.training.triggers.limit_trigger.rst
│   │   │   ├── paddlespeech.t2s.training.triggers.rst
│   │   │   ├── paddlespeech.t2s.training.triggers.time_trigger.rst
│   │   │   ├── paddlespeech.t2s.training.updater.rst
│   │   │   ├── paddlespeech.t2s.training.updaters.rst
│   │   │   ├── paddlespeech.t2s.training.updaters.standard_updater.rst
│   │   │   ├── paddlespeech.t2s.utils.checkpoint.rst
│   │   │   ├── paddlespeech.t2s.utils.display.rst
│   │   │   ├── paddlespeech.t2s.utils.error_rate.rst
│   │   │   ├── paddlespeech.t2s.utils.h5_utils.rst
│   │   │   ├── paddlespeech.t2s.utils.internals.rst
│   │   │   ├── paddlespeech.t2s.utils.layer_tools.rst
│   │   │   ├── paddlespeech.t2s.utils.mp_tools.rst
│   │   │   ├── paddlespeech.t2s.utils.profiler.rst
│   │   │   ├── paddlespeech.t2s.utils.rst
│   │   │   ├── paddlespeech.t2s.utils.scheduler.rst
│   │   │   ├── paddlespeech.text.exps.ernie_linear.avg_model.rst
│   │   │   ├── paddlespeech.text.exps.ernie_linear.punc_restore.rst
│   │   │   ├── paddlespeech.text.exps.ernie_linear.rst
│   │   │   ├── paddlespeech.text.exps.ernie_linear.test.rst
│   │   │   ├── paddlespeech.text.exps.ernie_linear.train.rst
│   │   │   ├── paddlespeech.text.exps.rst
│   │   │   ├── paddlespeech.text.models.ernie_crf.model.rst
│   │   │   ├── paddlespeech.text.models.ernie_crf.rst
│   │   │   ├── paddlespeech.text.models.ernie_linear.dataset.rst
│   │   │   ├── paddlespeech.text.models.ernie_linear.ernie_linear.rst
│   │   │   ├── paddlespeech.text.models.ernie_linear.ernie_linear_updater.rst
│   │   │   ├── paddlespeech.text.models.ernie_linear.rst
│   │   │   ├── paddlespeech.text.models.rst
│   │   │   ├── paddlespeech.text.rst
│   │   │   ├── paddlespeech.utils.dynamic_import.rst
│   │   │   ├── paddlespeech.utils.env.rst
│   │   │   ├── paddlespeech.utils.rst
│   │   │   ├── paddlespeech.vector.cluster.diarization.rst
│   │   │   ├── paddlespeech.vector.cluster.plda.rst
│   │   │   ├── paddlespeech.vector.cluster.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.audio_processor.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.config.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.dataset_processors.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.inference.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.preprocess.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.random_cycle.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.speaker_verification_dataset.rst
│   │   │   ├── paddlespeech.vector.exps.ge2e.train.rst
│   │   │   ├── paddlespeech.vector.exps.rst
│   │   │   ├── paddlespeech.vector.io.augment.rst
│   │   │   ├── paddlespeech.vector.io.batch.rst
│   │   │   ├── paddlespeech.vector.io.dataset.rst
│   │   │   ├── paddlespeech.vector.io.dataset_from_json.rst
│   │   │   ├── paddlespeech.vector.io.embedding_norm.rst
│   │   │   ├── paddlespeech.vector.io.rst
│   │   │   ├── paddlespeech.vector.io.signal_processing.rst
│   │   │   ├── paddlespeech.vector.models.ecapa_tdnn.rst
│   │   │   ├── paddlespeech.vector.models.lstm_speaker_encoder.rst
│   │   │   ├── paddlespeech.vector.models.rst
│   │   │   ├── paddlespeech.vector.modules.layer.rst
│   │   │   ├── paddlespeech.vector.modules.loss.rst
│   │   │   ├── paddlespeech.vector.modules.rst
│   │   │   ├── paddlespeech.vector.modules.sid_model.rst
│   │   │   ├── paddlespeech.vector.rst
│   │   │   ├── paddlespeech.vector.training.rst
│   │   │   ├── paddlespeech.vector.training.scheduler.rst
│   │   │   ├── paddlespeech.vector.training.seeding.rst
│   │   │   ├── paddlespeech.vector.utils.rst
│   │   │   ├── paddlespeech.vector.utils.time.rst
│   │   │   ├── paddlespeech.vector.utils.vector_utils.rst
│   │   │   └── paddlespeech.version.rst
│   │   ├── asr/
│   │   │   ├── PPASR.md
│   │   │   ├── PPASR_cn.md
│   │   │   ├── data_preparation.md
│   │   │   ├── feature_list.md
│   │   │   ├── models_introduction.md
│   │   │   ├── ngram_lm.md
│   │   │   └── quick_start.md
│   │   ├── audio/
│   │   │   ├── _static/
│   │   │   │   └── custom.css
│   │   │   ├── _templates/
│   │   │   │   ├── module.rst_t
│   │   │   │   ├── package.rst_t
│   │   │   │   └── toc.rst_t
│   │   │   ├── conf.py
│   │   │   └── index.rst
│   │   ├── audio_api/
│   │   │   ├── modules.rst
│   │   │   ├── paddleaudio.backends.common.rst
│   │   │   ├── paddleaudio.backends.no_backend.rst
│   │   │   ├── paddleaudio.backends.rst
│   │   │   ├── paddleaudio.backends.soundfile_backend.rst
│   │   │   ├── paddleaudio.backends.sox_io_backend.rst
│   │   │   ├── paddleaudio.backends.utils.rst
│   │   │   ├── paddleaudio.compliance.kaldi.rst
│   │   │   ├── paddleaudio.compliance.librosa.rst
│   │   │   ├── paddleaudio.compliance.rst
│   │   │   ├── paddleaudio.datasets.dataset.rst
│   │   │   ├── paddleaudio.datasets.esc50.rst
│   │   │   ├── paddleaudio.datasets.gtzan.rst
│   │   │   ├── paddleaudio.datasets.hey_snips.rst
│   │   │   ├── paddleaudio.datasets.rirs_noises.rst
│   │   │   ├── paddleaudio.datasets.rst
│   │   │   ├── paddleaudio.datasets.tess.rst
│   │   │   ├── paddleaudio.datasets.urban_sound.rst
│   │   │   ├── paddleaudio.datasets.voxceleb.rst
│   │   │   ├── paddleaudio.features.layers.rst
│   │   │   ├── paddleaudio.features.rst
│   │   │   ├── paddleaudio.functional.functional.rst
│   │   │   ├── paddleaudio.functional.rst
│   │   │   ├── paddleaudio.functional.window.rst
│   │   │   ├── paddleaudio.kaldi.kaldi.rst
│   │   │   ├── paddleaudio.kaldi.rst
│   │   │   ├── paddleaudio.metric.eer.rst
│   │   │   ├── paddleaudio.metric.rst
│   │   │   ├── paddleaudio.rst
│   │   │   ├── paddleaudio.sox_effects.rst
│   │   │   ├── paddleaudio.sox_effects.sox_effects.rst
│   │   │   ├── paddleaudio.utils.download.rst
│   │   │   ├── paddleaudio.utils.env.rst
│   │   │   ├── paddleaudio.utils.error.rst
│   │   │   ├── paddleaudio.utils.log.rst
│   │   │   ├── paddleaudio.utils.numeric.rst
│   │   │   ├── paddleaudio.utils.rst
│   │   │   ├── paddleaudio.utils.sox_utils.rst
│   │   │   ├── paddleaudio.utils.tensor_utils.rst
│   │   │   └── paddleaudio.utils.time.rst
│   │   ├── cls/
│   │   │   ├── custom_dataset.md
│   │   │   └── quick_start.md
│   │   ├── conf.py
│   │   ├── demo_video.rst
│   │   ├── dependencies.md
│   │   ├── index.rst
│   │   ├── install.md
│   │   ├── install_cn.md
│   │   ├── introduction.md
│   │   ├── reference.md
│   │   ├── released_model.md
│   │   ├── streaming_asr_demo_video.rst
│   │   ├── streaming_tts_demo_video.rst
│   │   ├── tts/
│   │   │   ├── PPTTS.md
│   │   │   ├── PPTTS_cn.md
│   │   │   ├── README.md
│   │   │   ├── advanced_usage.md
│   │   │   ├── demo.rst
│   │   │   ├── demo_2.rst
│   │   │   ├── gan_vocoder.md
│   │   │   ├── models_introduction.md
│   │   │   ├── quick_start.md
│   │   │   ├── quick_start_cn.md
│   │   │   ├── svs_music_score.md
│   │   │   ├── test_sentence.txt
│   │   │   ├── tts_datasets.md
│   │   │   ├── tts_papers.md
│   │   │   └── zh_text_frontend.md
│   │   ├── tts_demo_video.rst
│   │   └── vpr/
│   │       ├── PPVPR.md
│   │       └── PPVPR_cn.md
│   ├── topic/
│   │   ├── ctc/
│   │   │   ├── ctc_loss.ipynb
│   │   │   ├── ctc_loss_compare.ipynb
│   │   │   └── ctc_loss_speed_compare.ipynb
│   │   ├── frontend/
│   │   │   └── g2p.md
│   │   ├── gan_vocoder/
│   │   │   └── gan_vocoder.ipynb
│   │   └── package_release/
│   │       └── python_package_release.md
│   └── tutorial/
│       ├── .gitkeep
│       ├── asr/
│       │   ├── tutorial_deepspeech2.ipynb
│       │   └── tutorial_transformer.ipynb
│       ├── cls/
│       │   └── cls_tutorial.ipynb
│       ├── st/
│       │   └── st_tutorial.ipynb
│       └── tts/
│           └── tts_tutorial.ipynb
├── examples/
│   ├── aishell/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── asr0/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── conf/
│   │   │   │   ├── deepspeech2.yaml
│   │   │   │   ├── deepspeech2_online.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   └── tuning/
│   │   │   │       ├── chunk_decode.yaml
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── download_lm_ch.sh
│   │   │   │   ├── export.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_export.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── asr1/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── conf/
│   │   │   │   ├── augmentation.json
│   │   │   │   ├── chunk_conformer.yaml
│   │   │   │   ├── chunk_roformer.yaml
│   │   │   │   ├── chunk_roformer_bidecoder.yaml
│   │   │   │   ├── chunk_squeezeformer.yaml
│   │   │   │   ├── conformer.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   ├── squeezeformer.yaml
│   │   │   │   ├── transformer.yaml
│   │   │   │   └── tuning/
│   │   │   │       ├── chunk_decode.yaml
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── aishell_train_lms.sh
│   │   │   │   ├── align.sh
│   │   │   │   ├── data.sh
│   │   │   │   ├── export.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   ├── tlg.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   └── asr3/
│   │       ├── README.md
│   │       ├── RESULT.md
│   │       ├── cmd.sh
│   │       ├── conf/
│   │       │   ├── preprocess.yaml
│   │       │   ├── train_with_wav2vec.yaml
│   │       │   ├── tuning/
│   │       │   │   └── decode.yaml
│   │       │   ├── wav2vec2ASR.yaml
│   │       │   └── wav2vec2ASR_adadelta.yaml
│   │       ├── local/
│   │       │   ├── aishell_prepare.py
│   │       │   ├── data.sh
│   │       │   ├── test.sh
│   │       │   ├── test_wav.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── aishell3/
│   │   ├── README.md
│   │   ├── ernie_sat/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── tts3/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   ├── conformer.yaml
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── ort_predict.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   └── run.sh
│   │   ├── vc0/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── train.sh
│   │   │   │   └── voice_cloning.sh
│   │   │   └── run.sh
│   │   ├── vc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── voice_cloning.sh
│   │   │   └── run.sh
│   │   ├── vc2/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   └── voice_cloning.sh
│   │   │   └── run.sh
│   │   ├── vits/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── vits-vc/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── train.sh
│   │   │   │   └── voice_cloning.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   └── preprocess.sh
│   │   │   └── run.sh
│   │   └── voc5/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       └── run.sh
│   ├── aishell3_vctk/
│   │   ├── README.md
│   │   └── ernie_sat/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       ├── local/
│   │       │   ├── preprocess.sh
│   │       │   └── synthesize_e2e.sh
│   │       └── run.sh
│   ├── ami/
│   │   ├── README.md
│   │   └── sd0/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── ecapa_tdnn.yaml
│   │       ├── local/
│   │       │   ├── ami_prepare.py
│   │       │   ├── ami_splits.py
│   │       │   ├── compute_embdding.py
│   │       │   ├── dataio.py
│   │       │   ├── experiment.py
│   │       │   └── process.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── callcenter/
│   │   ├── README.md
│   │   └── asr1/
│   │       ├── .gitignore
│   │       ├── RESULTS.md
│   │       ├── conf/
│   │       │   ├── augmentation.json
│   │       │   ├── chunk_conformer.yaml
│   │       │   ├── conformer.yaml
│   │       │   ├── preprocess.yaml
│   │       │   └── tuning/
│   │       │       ├── chunk_decode.yaml
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── align.sh
│   │       │   ├── data.sh
│   │       │   ├── download_lm_ch.sh
│   │       │   ├── export.sh
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── canton/
│   │   └── tts3/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       ├── local/
│   │       │   ├── inference.sh
│   │       │   ├── ort_predict.sh
│   │       │   ├── preprocess.sh
│   │       │   └── synthesize_e2e.sh
│   │       └── run.sh
│   ├── csmsc/
│   │   ├── README.md
│   │   ├── jets/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── tts0/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── tts2/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── inference_mlu.sh
│   │   │   │   ├── inference_npu.sh
│   │   │   │   ├── inference_xpu.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── ort_predict.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   ├── synthesize_e2e_mlu.sh
│   │   │   │   ├── synthesize_e2e_npu.sh
│   │   │   │   ├── synthesize_e2e_xpu.sh
│   │   │   │   ├── synthesize_mlu.sh
│   │   │   │   ├── synthesize_npu.sh
│   │   │   │   ├── synthesize_xpu.sh
│   │   │   │   ├── train.sh
│   │   │   │   ├── train_mlu.sh
│   │   │   │   ├── train_npu.sh
│   │   │   │   └── train_xpu.sh
│   │   │   ├── path.sh
│   │   │   ├── run.sh
│   │   │   ├── run_mlu.sh
│   │   │   ├── run_npu.sh
│   │   │   └── run_xpu.sh
│   │   ├── tts3/
│   │   │   ├── README.md
│   │   │   ├── README_cn.md
│   │   │   ├── conf/
│   │   │   │   ├── cnndecoder.yaml
│   │   │   │   ├── conformer.yaml
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── PTQ_dynamic.sh
│   │   │   │   ├── PTQ_static.sh
│   │   │   │   ├── export2lite.sh
│   │   │   │   ├── inference.sh
│   │   │   │   ├── inference_streaming.sh
│   │   │   │   ├── inference_xpu.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── lite_predict_streaming.sh
│   │   │   │   ├── ort_predict.sh
│   │   │   │   ├── ort_predict_streaming.sh
│   │   │   │   ├── paddle2onnx.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── simple.lexicon
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   ├── synthesize_e2e_xpu.sh
│   │   │   │   ├── synthesize_streaming.sh
│   │   │   │   ├── synthesize_xpu.sh
│   │   │   │   ├── train.sh
│   │   │   │   └── train_xpu.sh
│   │   │   ├── path.sh
│   │   │   ├── run.sh
│   │   │   ├── run_cnndecoder.sh
│   │   │   └── run_xpu.sh
│   │   ├── tts3_rhy/
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   └── synthesize_e2e.sh
│   │   │   └── run.sh
│   │   ├── vits/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── PTQ_static.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc3/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   ├── default.yaml
│   │   │   │   └── finetune.yaml
│   │   │   ├── local/
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc4/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   └── synthesize.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc5/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   ├── default.yaml
│   │   │   │   ├── finetune.yaml
│   │   │   │   └── iSTFT.yaml
│   │   │   ├── finetune.sh
│   │   │   ├── iSTFTNet.md
│   │   │   ├── local/
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   └── voc6/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       ├── local/
│   │       │   ├── preprocess.sh
│   │       │   └── synthesize.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── esc50/
│   │   ├── README.md
│   │   ├── RESULTS.md
│   │   └── cls0/
│   │       ├── conf/
│   │       │   └── panns.yaml
│   │       ├── local/
│   │       │   ├── export.sh
│   │       │   ├── infer.sh
│   │       │   ├── static_model_infer.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── hey_snips/
│   │   ├── README.md
│   │   └── kws0/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── mdtc.yaml
│   │       ├── local/
│   │       │   ├── plot.sh
│   │       │   ├── score.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── iwslt2012/
│   │   └── punc0/
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── conf/
│   │       │   ├── default.yaml
│   │       │   ├── ernie-3.0-base.yaml
│   │       │   ├── ernie-3.0-medium.yaml
│   │       │   ├── ernie-3.0-mini.yaml
│   │       │   ├── ernie-3.0-nano-zh.yaml
│   │       │   └── ernie-tiny.yaml
│   │       ├── local/
│   │       │   ├── data.sh
│   │       │   ├── preprocess.py
│   │       │   ├── punc_restore.sh
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── librispeech/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── asr0/
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── conf/
│   │   │   │   ├── deepspeech2.yaml
│   │   │   │   ├── deepspeech2_online.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   └── tuning/
│   │   │   │       ├── chunk_decode.yaml
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── download_lm_en.sh
│   │   │   │   ├── export.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── asr1/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── cmd.sh
│   │   │   ├── conf/
│   │   │   │   ├── augmentation.json
│   │   │   │   ├── chunk_conformer.yaml
│   │   │   │   ├── chunk_transformer.yaml
│   │   │   │   ├── conformer.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   ├── transformer.yaml
│   │   │   │   └── tuning/
│   │   │   │       ├── chunk_decode.yaml
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── align.sh
│   │   │   │   ├── data.sh
│   │   │   │   ├── download_lm_en.sh
│   │   │   │   ├── export.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── asr2/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── cmd.sh
│   │   │   ├── conf/
│   │   │   │   ├── augmentation.json
│   │   │   │   ├── decode/
│   │   │   │   │   ├── decode.yaml
│   │   │   │   │   ├── decode_att.yaml
│   │   │   │   │   ├── decode_base.yaml
│   │   │   │   │   ├── decode_ctc.yaml
│   │   │   │   │   └── decode_wo_lm.yaml
│   │   │   │   ├── fbank.conf
│   │   │   │   ├── lm/
│   │   │   │   │   └── transformer.yaml
│   │   │   │   ├── pitch.conf
│   │   │   │   ├── preprocess.yaml
│   │   │   │   └── transformer.yaml
│   │   │   ├── local/
│   │   │   │   ├── align.sh
│   │   │   │   ├── cacu_perplexity.sh
│   │   │   │   ├── data.sh
│   │   │   │   ├── data_prep.sh
│   │   │   │   ├── download_lm_en.sh
│   │   │   │   ├── espnet_json_to_manifest.py
│   │   │   │   ├── export.sh
│   │   │   │   ├── recog.sh
│   │   │   │   ├── test.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── asr3/
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── cmd.sh
│   │   │   ├── conf/
│   │   │   │   ├── preprocess.yaml
│   │   │   │   ├── tuning/
│   │   │   │   │   └── decode.yaml
│   │   │   │   └── wav2vec2ASR.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── asr4/
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── cmd.sh
│   │   │   ├── conf/
│   │   │   │   ├── config.json
│   │   │   │   ├── hubertASR.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   ├── preprocessor_config.json
│   │   │   │   └── tuning/
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── test.sh
│   │   │   │   ├── test_wav.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   └── asr5/
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── avg.sh
│   │       ├── cmd.sh
│   │       ├── compute_wer.py
│   │       ├── conf/
│   │       │   ├── preprocess.yaml
│   │       │   ├── preprocessor_config.json
│   │       │   ├── tuning/
│   │       │   │   └── decode.yaml
│   │       │   └── wavlmASR.yaml
│   │       ├── local/
│   │       │   ├── data.sh
│   │       │   ├── test.sh
│   │       │   ├── test_wav.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── ljspeech/
│   │   ├── README.md
│   │   ├── tts0/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   └── run.sh
│   │   ├── tts1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── tts3/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── ort_predict.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   └── run.sh
│   │   ├── voc0/
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   └── preprocess.sh
│   │   │   └── run.sh
│   │   └── voc5/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       └── run.sh
│   ├── mustc/
│   │   └── st1/
│   │       ├── cmd.sh
│   │       ├── conf/
│   │       │   ├── fbank.conf
│   │       │   ├── pitch.conf
│   │       │   ├── transformer_de.yaml
│   │       │   ├── transformer_es.yaml
│   │       │   ├── transformer_fr.yaml
│   │       │   ├── transformer_it.yaml
│   │       │   ├── transformer_nl.yaml
│   │       │   ├── transformer_pt.yaml
│   │       │   ├── transformer_ro.yaml
│   │       │   └── transformer_ru.yaml
│   │       ├── local/
│   │       │   ├── augmentation.json
│   │       │   ├── data.sh
│   │       │   ├── data_prep.sh
│   │       │   ├── divide_lang.sh
│   │       │   ├── remove_punctuation.pl
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── opencpop/
│   │   ├── README.md
│   │   ├── svs1/
│   │   │   ├── README.md
│   │   │   ├── README_cn.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── pinyin_to_phone.txt
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   ├── synthesize_e2e.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── dygraph_to_static.sh
│   │   │   │   └── preprocess.sh
│   │   │   └── run.sh
│   │   └── voc5/
│   │       ├── conf/
│   │       │   ├── default.yaml
│   │       │   └── finetune.yaml
│   │       ├── finetune.sh
│   │       ├── local/
│   │       │   └── dygraph_to_static.sh
│   │       └── run.sh
│   ├── other/
│   │   ├── augmentation/
│   │   │   └── augmentation.json
│   │   ├── cc-cedict/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   └── parser.py
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── g2p/
│   │   │   ├── README.md
│   │   │   ├── compare_badcase.py
│   │   │   ├── get_g2p_data.py
│   │   │   ├── path.sh
│   │   │   ├── run.sh
│   │   │   └── test_g2p.py
│   │   ├── ge2e/
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── mfa/
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   ├── detect_oov.py
│   │   │   │   ├── generate_canton_lexicon_wavlabs.py
│   │   │   │   ├── generate_lexicon.py
│   │   │   │   ├── reorganize_aishell3.py
│   │   │   │   ├── reorganize_baker.py
│   │   │   │   ├── reorganize_ljspeech.py
│   │   │   │   └── reorganize_vctk.py
│   │   │   ├── run.sh
│   │   │   └── run_canton.sh
│   │   ├── ngram_lm/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   └── s0/
│   │   │       ├── .gitignore
│   │   │       ├── README.md
│   │   │       ├── data/
│   │   │       │   ├── README.md
│   │   │       │   ├── custom_confusion.txt
│   │   │       │   └── text_correct.txt
│   │   │       ├── local/
│   │   │       │   ├── build_zh_lm.sh
│   │   │       │   ├── download_lm_zh.sh
│   │   │       │   └── kenlm_score_test.py
│   │   │       ├── path.sh
│   │   │       ├── requirements.txt
│   │   │       └── run.sh
│   │   ├── punctuation_restoration/
│   │   │   └── README.md
│   │   ├── rhy/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── data/
│   │   │   │   └── rhy_token
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── pre_for_sp_aishell.py
│   │   │   │   ├── pre_for_sp_csmsc.py
│   │   │   │   ├── rhy_predict.sh
│   │   │   │   ├── test.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── spm/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── path.sh
│   │   │   ├── run.sh
│   │   │   └── text
│   │   ├── tn/
│   │   │   ├── README.md
│   │   │   ├── data/
│   │   │   │   └── textnorm_test_cases.txt
│   │   │   ├── get_textnorm_data.py
│   │   │   ├── path.sh
│   │   │   ├── run.sh
│   │   │   └── test_textnorm.py
│   │   └── tts_finetune/
│   │       └── tts3/
│   │           ├── README.md
│   │           ├── conf/
│   │           │   ├── fastspeech2_layers.txt
│   │           │   └── finetune.yaml
│   │           ├── local/
│   │           │   ├── check_oov.py
│   │           │   ├── extract_feature.py
│   │           │   ├── finetune.py
│   │           │   ├── generate_duration.py
│   │           │   ├── get_mfa_result.py
│   │           │   └── prepare_env.py
│   │           ├── path.sh
│   │           ├── run.sh
│   │           ├── run_en.sh
│   │           └── run_mix.sh
│   ├── tal_cs/
│   │   └── asr1/
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── conf/
│   │       │   ├── chunk_conformer.yaml
│   │       │   ├── conformer.yaml
│   │       │   ├── preprocess.yaml
│   │       │   └── tuning/
│   │       │       ├── chunk_decode.yaml
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── data.sh
│   │       │   ├── test.sh
│   │       │   ├── test_wav.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── ted_en_zh/
│   │   ├── README.md
│   │   ├── st0/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── RESULTS.md
│   │   │   ├── conf/
│   │   │   │   ├── preprocess.yaml
│   │   │   │   ├── transformer.yaml
│   │   │   │   ├── transformer_mtl_noam.yaml
│   │   │   │   └── tuning/
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── test.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   └── st1/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── cmd.sh
│   │       ├── conf/
│   │       │   ├── fbank.conf
│   │       │   ├── pitch.conf
│   │       │   ├── preprocess.yaml
│   │       │   ├── transformer.yaml
│   │       │   ├── transformer_mtl_noam.yaml
│   │       │   └── tuning/
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── convert_torch_to_paddle.py
│   │       │   ├── data.sh
│   │       │   ├── divide_lang.sh
│   │       │   ├── download_pretrain.sh
│   │       │   ├── remove_punctuation.pl
│   │       │   ├── ted_en_zh.py
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── tess/
│   │   ├── README.md
│   │   └── cls0/
│   │       ├── conf/
│   │       │   ├── panns_logmelspectrogram.yaml
│   │       │   ├── panns_melspectrogram.yaml
│   │       │   ├── panns_mfcc.yaml
│   │       │   └── panns_spectrogram.yaml
│   │       ├── local/
│   │       │   ├── train.py
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── thchs30/
│   │   ├── README.md
│   │   └── align0/
│   │       ├── README.md
│   │       ├── data/
│   │       │   └── dict/
│   │       │       └── syllable.lexicon
│   │       ├── local/
│   │       │   ├── data.sh
│   │       │   ├── gen_word2phone.py
│   │       │   └── reorganize_thchs30.py
│   │       ├── path.sh
│   │       └── run.sh
│   ├── timit/
│   │   ├── README.md
│   │   └── asr1/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── conf/
│   │       │   ├── augmentation.json
│   │       │   ├── dev_spk.list
│   │       │   ├── preprocess.yaml
│   │       │   ├── test_spk.list
│   │       │   ├── transformer.yaml
│   │       │   └── tuning/
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── align.sh
│   │       │   ├── data.sh
│   │       │   ├── export.sh
│   │       │   ├── test.sh
│   │       │   ├── timit_data_prep.sh
│   │       │   ├── timit_norm_trans.pl
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── tiny/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── asr0/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   ├── deepspeech2.yaml
│   │   │   │   ├── deepspeech2_online.yaml
│   │   │   │   ├── preprocess.yaml
│   │   │   │   └── tuning/
│   │   │   │       ├── chunk_decode.yaml
│   │   │   │       └── decode.yaml
│   │   │   ├── local/
│   │   │   │   ├── data.sh
│   │   │   │   ├── download_lm_en.sh
│   │   │   │   ├── export.sh
│   │   │   │   ├── test.sh
│   │   │   │   └── train.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   └── asr1/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── conf/
│   │       │   ├── augmentation.json
│   │       │   ├── chunk_confermer.yaml
│   │       │   ├── chunk_transformer.yaml
│   │       │   ├── conformer.yaml
│   │       │   ├── preprocess.yaml
│   │       │   ├── transformer.yaml
│   │       │   └── tuning/
│   │       │       ├── chunk_decode.yaml
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── align.sh
│   │       │   ├── data.sh
│   │       │   ├── export.sh
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── vctk/
│   │   ├── README.md
│   │   ├── ernie_sat/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   └── run.sh
│   │   ├── tts3/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── inference.sh
│   │   │   │   ├── lite_predict.sh
│   │   │   │   ├── ort_predict.sh
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── synthesize.sh
│   │   │   │   └── synthesize_e2e.sh
│   │   │   └── run.sh
│   │   ├── vc3/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   ├── preprocess.sh
│   │   │   │   ├── train.sh
│   │   │   │   └── voice_conversion.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── voc1/
│   │   │   ├── README.md
│   │   │   ├── conf/
│   │   │   │   └── default.yaml
│   │   │   ├── local/
│   │   │   │   └── preprocess.sh
│   │   │   └── run.sh
│   │   └── voc5/
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── default.yaml
│   │       └── run.sh
│   ├── voxceleb/
│   │   ├── README.md
│   │   └── sv0/
│   │       ├── README.md
│   │       ├── RESULT.md
│   │       ├── conf/
│   │       │   ├── ecapa_tdnn.yaml
│   │       │   └── ecapa_tdnn_small.yaml
│   │       ├── local/
│   │       │   ├── convert.sh
│   │       │   ├── data.sh
│   │       │   ├── data_prepare.py
│   │       │   ├── emb.sh
│   │       │   ├── make_rirs_noise_csv_dataset_from_json.py
│   │       │   ├── make_vox_csv_dataset_from_json.py
│   │       │   ├── make_voxceleb_kaldi_trial.py
│   │       │   ├── test.sh
│   │       │   └── train.sh
│   │       ├── path.sh
│   │       └── run.sh
│   ├── wenetspeech/
│   │   ├── README.md
│   │   ├── asr0/
│   │   │   └── RESULTS.md
│   │   └── asr1/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── RESULTS.md
│   │       ├── conf/
│   │       │   ├── chunk_conformer.yaml
│   │       │   ├── chunk_conformer_u2pp.yaml
│   │       │   ├── conformer.yaml
│   │       │   ├── preprocess.yaml
│   │       │   └── tuning/
│   │       │       ├── chunk_decode.yaml
│   │       │       └── decode.yaml
│   │       ├── local/
│   │       │   ├── data.sh
│   │       │   ├── export.sh
│   │       │   ├── extract_meta.py
│   │       │   ├── process_opus.py
│   │       │   ├── quant.sh
│   │       │   ├── test.sh
│   │       │   ├── test_wav.sh
│   │       │   ├── train.sh
│   │       │   └── wenetspeech_data_prep.sh
│   │       ├── path.sh
│   │       └── run.sh
│   └── zh_en_tts/
│       └── tts3/
│           ├── .gitignore
│           ├── README.md
│           ├── conf/
│           │   └── default.yaml
│           ├── local/
│           │   ├── inference.sh
│           │   ├── mfa_download.sh
│           │   ├── model_download.sh
│           │   ├── ort_predict.sh
│           │   ├── preprocess.sh
│           │   ├── synthesize.sh
│           │   └── synthesize_e2e.sh
│           └── run.sh
├── paddlespeech/
│   ├── __init__.py
│   ├── audio/
│   │   ├── .gitignore
│   │   ├── __init__.py
│   │   ├── backends/
│   │   │   ├── __init__.py
│   │   │   ├── common.py
│   │   │   └── soundfile_backend.py
│   │   ├── compliance/
│   │   │   ├── __init__.py
│   │   │   ├── kaldi.py
│   │   │   └── librosa.py
│   │   ├── datasets/
│   │   │   ├── __init__.py
│   │   │   ├── dataset.py
│   │   │   ├── esc50.py
│   │   │   └── voxceleb.py
│   │   ├── functional/
│   │   │   ├── __init__.py
│   │   │   ├── functional.py
│   │   │   └── window.py
│   │   ├── streamdata/
│   │   │   ├── __init__.py
│   │   │   ├── autodecode.py
│   │   │   ├── cache.py
│   │   │   ├── compat.py
│   │   │   ├── extradatasets.py
│   │   │   ├── filters.py
│   │   │   ├── gopen.py
│   │   │   ├── handlers.py
│   │   │   ├── mix.py
│   │   │   ├── paddle_utils.py
│   │   │   ├── pipeline.py
│   │   │   ├── shardlists.py
│   │   │   ├── soundfile.py
│   │   │   ├── tariterators.py
│   │   │   ├── utils.py
│   │   │   └── writer.py
│   │   ├── text/
│   │   │   ├── __init__.py
│   │   │   ├── text_featurizer.py
│   │   │   └── utility.py
│   │   ├── transform/
│   │   │   ├── __init__.py
│   │   │   ├── add_deltas.py
│   │   │   ├── channel_selector.py
│   │   │   ├── cmvn.py
│   │   │   ├── functional.py
│   │   │   ├── perturb.py
│   │   │   ├── spec_augment.py
│   │   │   ├── spectrogram.py
│   │   │   ├── transform_interface.py
│   │   │   ├── transformation.py
│   │   │   └── wpe.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── check_kwargs.py
│   │       ├── download.py
│   │       ├── dynamic_import.py
│   │       ├── error.py
│   │       ├── log.py
│   │       ├── numeric.py
│   │       ├── tensor_utils.py
│   │       └── time.py
│   ├── audiotools/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── core/
│   │   │   ├── __init__.py
│   │   │   ├── _julius.py
│   │   │   ├── audio_signal.py
│   │   │   ├── display.py
│   │   │   ├── dsp.py
│   │   │   ├── effects.py
│   │   │   ├── ffmpeg.py
│   │   │   ├── loudness.py
│   │   │   └── util.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── datasets.py
│   │   │   ├── preprocess.py
│   │   │   └── transforms.py
│   │   ├── metrics/
│   │   │   ├── __init__.py
│   │   │   └── quality.py
│   │   ├── ml/
│   │   │   ├── __init__.py
│   │   │   ├── accelerator.py
│   │   │   ├── basemodel.py
│   │   │   └── decorators.py
│   │   └── post.py
│   ├── cli/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── __init__.py
│   │   ├── asr/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── base_commands.py
│   │   ├── cls/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── download.py
│   │   ├── entry.py
│   │   ├── executor.py
│   │   ├── kws/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── log.py
│   │   ├── ssl/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── st/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── text/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── tts/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   ├── utils.py
│   │   ├── vector/
│   │   │   ├── __init__.py
│   │   │   └── infer.py
│   │   └── whisper/
│   │       ├── __init__.py
│   │       └── infer.py
│   ├── cls/
│   │   ├── __init__.py
│   │   ├── exps/
│   │   │   ├── __init__.py
│   │   │   └── panns/
│   │   │       ├── __init__.py
│   │   │       ├── deploy/
│   │   │       │   ├── __init__.py
│   │   │       │   └── predict.py
│   │   │       ├── export_model.py
│   │   │       ├── predict.py
│   │   │       └── train.py
│   │   └── models/
│   │       ├── __init__.py
│   │       └── panns/
│   │           ├── __init__.py
│   │           ├── classifier.py
│   │           └── panns.py
│   ├── dataset/
│   │   ├── __init__.py
│   │   ├── aidatatang_200zh/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── aidatatang_200zh.py
│   │   ├── aishell/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   └── aishell.py
│   │   ├── download.py
│   │   └── s2t/
│   │       ├── __init__.py
│   │       ├── avg_model.py
│   │       ├── build_vocab.py
│   │       ├── compute_mean_std.py
│   │       ├── compute_wer.py
│   │       ├── format_data.py
│   │       └── format_rsl.py
│   ├── kws/
│   │   ├── __init__.py
│   │   ├── exps/
│   │   │   ├── __init__.py
│   │   │   └── mdtc/
│   │   │       ├── __init__.py
│   │   │       ├── collate.py
│   │   │       ├── compute_det.py
│   │   │       ├── plot_det_curve.py
│   │   │       ├── score.py
│   │   │       └── train.py
│   │   └── models/
│   │       ├── __init__.py
│   │       ├── loss.py
│   │       └── mdtc.py
│   ├── resource/
│   │   ├── __init__.py
│   │   ├── model_alias.py
│   │   ├── pretrained_models.py
│   │   └── resource.py
│   ├── s2t/
│   │   ├── __init__.py
│   │   ├── decoders/
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── beam_search/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── batch_beam_search.py
│   │   │   │   └── beam_search.py
│   │   │   ├── ctcdecoder/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── decoders_deprecated.py
│   │   │   │   ├── scorer_deprecated.py
│   │   │   │   ├── swig_wrapper.py
│   │   │   │   └── tests/
│   │   │   │       └── test_decoders.py
│   │   │   ├── recog.py
│   │   │   ├── recog_bin.py
│   │   │   ├── scorers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── ctc.py
│   │   │   │   ├── ctc_prefix_score.py
│   │   │   │   ├── length_bonus.py
│   │   │   │   ├── ngram.py
│   │   │   │   └── scorer_interface.py
│   │   │   └── utils.py
│   │   ├── exps/
│   │   │   ├── __init__.py
│   │   │   ├── deepspeech2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── deploy/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   ├── client.py
│   │   │   │   │   │   ├── record.py
│   │   │   │   │   │   ├── runtime.py
│   │   │   │   │   │   ├── send.py
│   │   │   │   │   │   └── server.py
│   │   │   │   │   ├── export.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   ├── test_export.py
│   │   │   │   │   ├── test_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   ├── hubert/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   ├── test_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   ├── lm/
│   │   │   │   └── transformer/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── bin/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   └── cacu_perplexity.py
│   │   │   │       └── lm_cacu_perplexity.py
│   │   │   ├── u2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── alignment.py
│   │   │   │   │   ├── export.py
│   │   │   │   │   ├── quant.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   ├── test_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   ├── model.py
│   │   │   │   └── trainer.py
│   │   │   ├── u2_kaldi/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── recog.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   ├── u2_st/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── export.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   ├── wav2vec2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   ├── test_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   ├── wavlm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bin/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── test.py
│   │   │   │   │   ├── test_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   └── model.py
│   │   │   └── whisper/
│   │   │       └── test_wav.py
│   │   ├── frontend/
│   │   │   ├── __init__.py
│   │   │   ├── audio.py
│   │   │   ├── augmentor/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── augmentation.py
│   │   │   │   ├── base.py
│   │   │   │   ├── impulse_response.py
│   │   │   │   ├── noise_perturb.py
│   │   │   │   ├── online_bayesian_normalization.py
│   │   │   │   ├── resample.py
│   │   │   │   ├── shift_perturb.py
│   │   │   │   ├── spec_augment.py
│   │   │   │   ├── speed_perturb.py
│   │   │   │   └── volume_perturb.py
│   │   │   ├── featurizer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── audio_featurizer.py
│   │   │   │   ├── speech_featurizer.py
│   │   │   │   └── text_featurizer.py
│   │   │   ├── normalizer.py
│   │   │   ├── speech.py
│   │   │   └── utility.py
│   │   ├── io/
│   │   │   ├── __init__.py
│   │   │   ├── batchfy.py
│   │   │   ├── collator.py
│   │   │   ├── converter.py
│   │   │   ├── dataloader.py
│   │   │   ├── dataset.py
│   │   │   ├── reader.py
│   │   │   ├── sampler.py
│   │   │   ├── speechbrain/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── batch.py
│   │   │   │   ├── data_pipeline.py
│   │   │   │   ├── data_utils.py
│   │   │   │   ├── dataio.py
│   │   │   │   ├── dataloader.py
│   │   │   │   ├── dataset.py
│   │   │   │   ├── depgraph.py
│   │   │   │   ├── make_dataloader.py
│   │   │   │   ├── sampler.py
│   │   │   │   └── sb_pipeline.py
│   │   │   └── utility.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── asr_interface.py
│   │   │   ├── ds2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── conv.py
│   │   │   │   └── deepspeech2.py
│   │   │   ├── hubert/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hubert_ASR.py
│   │   │   │   └── modules/
│   │   │   │       ├── __init__.py
│   │   │   │       └── hubert_model.py
│   │   │   ├── lm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dataset.py
│   │   │   │   └── transformer.py
│   │   │   ├── lm_interface.py
│   │   │   ├── st_interface.py
│   │   │   ├── u2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── u2.py
│   │   │   │   └── updater.py
│   │   │   ├── u2_st/
│   │   │   │   ├── __init__.py
│   │   │   │   └── u2_st.py
│   │   │   ├── wav2vec2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── modules/
│   │   │   │   │   ├── VanillaNN.py
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── activations.py
│   │   │   │   │   ├── containers.py
│   │   │   │   │   ├── linear.py
│   │   │   │   │   ├── modeling_outputs.py
│   │   │   │   │   ├── modeling_wav2vec2.py
│   │   │   │   │   ├── normalization.py
│   │   │   │   │   └── wav2vec2_model.py
│   │   │   │   ├── processing/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── signal_processing.py
│   │   │   │   │   └── speech_augmentation.py
│   │   │   │   └── wav2vec2_ASR.py
│   │   │   ├── wavlm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── modules/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── activations.py
│   │   │   │   │   ├── functional.py
│   │   │   │   │   └── modules.py
│   │   │   │   ├── wavlm_asr.py
│   │   │   │   └── wavlm_paddle.py
│   │   │   └── whisper/
│   │   │       ├── __init__.py
│   │   │       ├── tokenizer.py
│   │   │       ├── utils.py
│   │   │       ├── whisper.py
│   │   │       └── whisper_LICENSE
│   │   ├── modules/
│   │   │   ├── __init__.py
│   │   │   ├── activation.py
│   │   │   ├── align.py
│   │   │   ├── attention.py
│   │   │   ├── cmvn.py
│   │   │   ├── conformer_convolution.py
│   │   │   ├── conv2d.py
│   │   │   ├── crf.py
│   │   │   ├── ctc.py
│   │   │   ├── decoder.py
│   │   │   ├── decoder_layer.py
│   │   │   ├── embedding.py
│   │   │   ├── encoder.py
│   │   │   ├── encoder_layer.py
│   │   │   ├── fbank.py
│   │   │   ├── initializer.py
│   │   │   ├── loss.py
│   │   │   ├── mask.py
│   │   │   ├── positionwise_feed_forward.py
│   │   │   ├── subsampling.py
│   │   │   └── time_reduction.py
│   │   ├── training/
│   │   │   ├── __init__.py
│   │   │   ├── cli.py
│   │   │   ├── extensions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── evaluator.py
│   │   │   │   ├── extension.py
│   │   │   │   ├── plot.py
│   │   │   │   ├── snapshot.py
│   │   │   │   └── visualizer.py
│   │   │   ├── optimizer/
│   │   │   │   ├── __init__.py
│   │   │   │   └── adadelta.py
│   │   │   ├── reporter.py
│   │   │   ├── scheduler.py
│   │   │   ├── timer.py
│   │   │   ├── trainer.py
│   │   │   ├── triggers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── compare_value_trigger.py
│   │   │   │   ├── interval_trigger.py
│   │   │   │   ├── limit_trigger.py
│   │   │   │   ├── time_trigger.py
│   │   │   │   └── utils.py
│   │   │   └── updaters/
│   │   │       ├── __init__.py
│   │   │       ├── standard_updater.py
│   │   │       ├── trainer.py
│   │   │       └── updater.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── asr_utils.py
│   │       ├── bleu_score.py
│   │       ├── check_kwargs.py
│   │       ├── checkpoint.py
│   │       ├── cli_readers.py
│   │       ├── cli_utils.py
│   │       ├── cli_writers.py
│   │       ├── ctc_utils.py
│   │       ├── dynamic_import.py
│   │       ├── dynamic_pip_install.py
│   │       ├── error_rate.py
│   │       ├── layer_tools.py
│   │       ├── log.py
│   │       ├── mp_tools.py
│   │       ├── profiler.py
│   │       ├── socket_server.py
│   │       ├── spec_augment.py
│   │       ├── tensor_utils.py
│   │       ├── text_grid.py
│   │       └── utility.py
│   ├── server/
│   │   ├── README.md
│   │   ├── README_cn.md
│   │   ├── __init__.py
│   │   ├── base_commands.py
│   │   ├── bin/
│   │   │   ├── __init__.py
│   │   │   ├── paddlespeech_client.py
│   │   │   └── paddlespeech_server.py
│   │   ├── conf/
│   │   │   ├── application.yaml
│   │   │   ├── tts_online_application.yaml
│   │   │   ├── vector_application.yaml
│   │   │   ├── ws_conformer_application.yaml
│   │   │   ├── ws_conformer_wenetspeech_application_faster.yaml
│   │   │   └── ws_ds2_application.yaml
│   │   ├── engine/
│   │   │   ├── __init__.py
│   │   │   ├── acs/
│   │   │   │   ├── __init__.py
│   │   │   │   └── python/
│   │   │   │       ├── __init__.py
│   │   │   │       └── acs_engine.py
│   │   │   ├── asr/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── online/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── ctc_endpoint.py
│   │   │   │   │   ├── ctc_search.py
│   │   │   │   │   ├── onnx/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── asr_engine.py
│   │   │   │   │   ├── paddleinference/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── asr_engine.py
│   │   │   │   │   └── python/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── asr_engine.py
│   │   │   │   ├── paddleinference/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── asr_engine.py
│   │   │   │   └── python/
│   │   │   │       ├── __init__.py
│   │   │   │       └── asr_engine.py
│   │   │   ├── base_engine.py
│   │   │   ├── cls/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── paddleinference/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── cls_engine.py
│   │   │   │   └── python/
│   │   │   │       ├── __init__.py
│   │   │   │       └── cls_engine.py
│   │   │   ├── engine_factory.py
│   │   │   ├── engine_pool.py
│   │   │   ├── engine_warmup.py
│   │   │   ├── text/
│   │   │   │   ├── __init__.py
│   │   │   │   └── python/
│   │   │   │       ├── __init__.py
│   │   │   │       └── text_engine.py
│   │   │   ├── tts/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── online/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── onnx/
│   │   │   │   │   │   ├── __init__.py
│   │   │   │   │   │   └── tts_engine.py
│   │   │   │   │   └── python/
│   │   │   │   │       ├── __init__.py
│   │   │   │   │       └── tts_engine.py
│   │   │   │   ├── paddleinference/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── tts_engine.py
│   │   │   │   └── python/
│   │   │   │       ├── __init__.py
│   │   │   │       └── tts_engine.py
│   │   │   └── vector/
│   │   │       ├── __init__.py
│   │   │       └── python/
│   │   │           ├── __init__.py
│   │   │           └── vector_engine.py
│   │   ├── entry.py
│   │   ├── executor.py
│   │   ├── restful/
│   │   │   ├── __init__.py
│   │   │   ├── acs_api.py
│   │   │   ├── api.py
│   │   │   ├── asr_api.py
│   │   │   ├── cls_api.py
│   │   │   ├── request.py
│   │   │   ├── response.py
│   │   │   ├── text_api.py
│   │   │   ├── tts_api.py
│   │   │   └── vector_api.py
│   │   ├── tests/
│   │   │   ├── __init__.py
│   │   │   ├── asr/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── offline/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── http_client.py
│   │   │   │   └── online/
│   │   │   │       ├── README.md
│   │   │   │       ├── README_cn.md
│   │   │   │       └── microphone_client.py
│   │   │   ├── text/
│   │   │   │   └── http_client.py
│   │   │   └── tts/
│   │   │       ├── offline/
│   │   │       │   └── http_client.py
│   │   │       └── online/
│   │   │           ├── http_client.py
│   │   │           └── ws_client.py
│   │   ├── util.py
│   │   ├── utils/
│   │   │   ├── __init__.py
│   │   │   ├── audio_handler.py
│   │   │   ├── audio_process.py
│   │   │   ├── buffer.py
│   │   │   ├── config.py
│   │   │   ├── errors.py
│   │   │   ├── exception.py
│   │   │   ├── onnx_infer.py
│   │   │   ├── paddle_predictor.py
│   │   │   ├── util.py
│   │   │   └── vad.py
│   │   └── ws/
│   │       ├── __init__.py
│   │       ├── api.py
│   │       ├── asr_api.py
│   │       └── tts_api.py
│   ├── t2s/
│   │   ├── __init__.py
│   │   ├── assets/
│   │   │   ├── __init__.py
│   │   │   ├── csmsc_test.txt
│   │   │   ├── sentences.txt
│   │   │   ├── sentences_canton.txt
│   │   │   ├── sentences_en.txt
│   │   │   ├── sentences_mix.txt
│   │   │   ├── sentences_sing.txt
│   │   │   └── sentences_ssml.txt
│   │   ├── audio/
│   │   │   ├── __init__.py
│   │   │   ├── audio.py
│   │   │   ├── codec.py
│   │   │   └── spec_normalizer.py
│   │   ├── datasets/
│   │   │   ├── __init__.py
│   │   │   ├── am_batch_fn.py
│   │   │   ├── batch.py
│   │   │   ├── data_table.py
│   │   │   ├── dataset.py
│   │   │   ├── get_feats.py
│   │   │   ├── ljspeech.py
│   │   │   ├── preprocess_utils.py
│   │   │   ├── sampler.py
│   │   │   └── vocoder_batch_fn.py
│   │   ├── exps/
│   │   │   ├── PTQ_dynamic.py
│   │   │   ├── PTQ_static.py
│   │   │   ├── __init__.py
│   │   │   ├── diffsinger/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gen_gta_mel.py
│   │   │   │   ├── get_minmax.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   └── train.py
│   │   │   ├── dygraph_to_static.py
│   │   │   ├── ernie_sat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── align.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize.py
│   │   │   │   ├── synthesize_e2e.py
│   │   │   │   ├── train.py
│   │   │   │   └── utils.py
│   │   │   ├── fastspeech2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gen_gta_mel.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── train.py
│   │   │   │   └── vc2_infer.py
│   │   │   ├── gan_vocoder/
│   │   │   │   ├── README.md
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hifigan/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── train.py
│   │   │   │   ├── multi_band_melgan/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── train.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── parallelwave_gan/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── synthesize_from_wav.py
│   │   │   │   │   └── train.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── style_melgan/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── train.py
│   │   │   │   └── synthesize.py
│   │   │   ├── inference.py
│   │   │   ├── inference_streaming.py
│   │   │   ├── jets/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── inference.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize.py
│   │   │   │   ├── synthesize_e2e.py
│   │   │   │   └── train.py
│   │   │   ├── lite_predict.py
│   │   │   ├── lite_predict_streaming.py
│   │   │   ├── lite_syn_utils.py
│   │   │   ├── ort_predict.py
│   │   │   ├── ort_predict_e2e.py
│   │   │   ├── ort_predict_streaming.py
│   │   │   ├── speedyspeech/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gen_gta_mel.py
│   │   │   │   ├── inference.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize_e2e.py
│   │   │   │   └── train.py
│   │   │   ├── starganv2_vc/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── train.py
│   │   │   │   └── vc.py
│   │   │   ├── stream_play_tts.py
│   │   │   ├── syn_utils.py
│   │   │   ├── synthesize.py
│   │   │   ├── synthesize_e2e.py
│   │   │   ├── synthesize_streaming.py
│   │   │   ├── tacotron2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── preprocess.py
│   │   │   │   └── train.py
│   │   │   ├── transformer_tts/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize.py
│   │   │   │   ├── synthesize_e2e.py
│   │   │   │   └── train.py
│   │   │   ├── vits/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── inference.py
│   │   │   │   ├── lite_predict.py
│   │   │   │   ├── normalize.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize.py
│   │   │   │   ├── synthesize_e2e.py
│   │   │   │   ├── train.py
│   │   │   │   └── voice_cloning.py
│   │   │   ├── voice_cloning.py
│   │   │   ├── waveflow/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── config.py
│   │   │   │   ├── ljspeech.py
│   │   │   │   ├── preprocess.py
│   │   │   │   ├── synthesize.py
│   │   │   │   └── train.py
│   │   │   └── wavernn/
│   │   │       ├── __init__.py
│   │   │       ├── synthesize.py
│   │   │       └── train.py
│   │   ├── frontend/
│   │   │   ├── __init__.py
│   │   │   ├── arpabet.py
│   │   │   ├── canton_frontend.py
│   │   │   ├── en_frontend.py
│   │   │   ├── g2pw/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dataset.py
│   │   │   │   ├── onnx_api.py
│   │   │   │   └── utils.py
│   │   │   ├── generate_lexicon.py
│   │   │   ├── mix_frontend.py
│   │   │   ├── normalizer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── abbrrviation.py
│   │   │   │   ├── acronyms.py
│   │   │   │   ├── normalizer.py
│   │   │   │   ├── numbers.py
│   │   │   │   └── width.py
│   │   │   ├── phonectic.py
│   │   │   ├── polyphonic.py
│   │   │   ├── polyphonic.yaml
│   │   │   ├── punctuation.py
│   │   │   ├── rhy_prediction/
│   │   │   │   ├── __init__.py
│   │   │   │   └── rhy_predictor.py
│   │   │   ├── sing_frontend.py
│   │   │   ├── ssml/
│   │   │   │   ├── __init__.py
│   │   │   │   └── xml_processor.py
│   │   │   ├── tone_sandhi.py
│   │   │   ├── vocab.py
│   │   │   ├── zh_frontend.py
│   │   │   └── zh_normalization/
│   │   │       ├── README.md
│   │   │       ├── __init__.py
│   │   │       ├── char_convert.py
│   │   │       ├── chronology.py
│   │   │       ├── constants.py
│   │   │       ├── num.py
│   │   │       ├── phonecode.py
│   │   │       ├── quantifier.py
│   │   │       └── text_normlization.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── diffsinger/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── diffsinger.py
│   │   │   │   ├── diffsinger_updater.py
│   │   │   │   └── fastspeech2midi.py
│   │   │   ├── ernie_sat/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── ernie_sat.py
│   │   │   │   └── ernie_sat_updater.py
│   │   │   ├── fastspeech2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── fastspeech2.py
│   │   │   │   └── fastspeech2_updater.py
│   │   │   ├── hifigan/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── hifigan.py
│   │   │   │   └── hifigan_updater.py
│   │   │   ├── jets/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── alignments.py
│   │   │   │   ├── generator.py
│   │   │   │   ├── jets.py
│   │   │   │   ├── jets_updater.py
│   │   │   │   └── length_regulator.py
│   │   │   ├── melgan/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── melgan.py
│   │   │   │   ├── multi_band_melgan_updater.py
│   │   │   │   ├── style_melgan.py
│   │   │   │   └── style_melgan_updater.py
│   │   │   ├── parallel_wavegan/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── parallel_wavegan.py
│   │   │   │   └── parallel_wavegan_updater.py
│   │   │   ├── speedyspeech/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── speedyspeech.py
│   │   │   │   └── speedyspeech_updater.py
│   │   │   ├── starganv2_vc/
│   │   │   │   ├── AuxiliaryASR/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── config.yml
│   │   │   │   │   ├── layers.py
│   │   │   │   │   └── model.py
│   │   │   │   ├── JDCNet/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   └── model.py
│   │   │   │   ├── __init__.py
│   │   │   │   ├── losses.py
│   │   │   │   ├── starganv2_vc.py
│   │   │   │   ├── starganv2_vc_updater.py
│   │   │   │   └── transforms.py
│   │   │   ├── tacotron2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── tacotron2.py
│   │   │   │   └── tacotron2_updater.py
│   │   │   ├── transformer_tts/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── transformer_tts.py
│   │   │   │   └── transformer_tts_updater.py
│   │   │   ├── vits/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── duration_predictor.py
│   │   │   │   ├── flow.py
│   │   │   │   ├── generator.py
│   │   │   │   ├── monotonic_align/
│   │   │   │   │   ├── __init__.py
│   │   │   │   │   ├── core.pyx
│   │   │   │   │   └── setup.py
│   │   │   │   ├── posterior_encoder.py
│   │   │   │   ├── residual_coupling.py
│   │   │   │   ├── text_encoder.py
│   │   │   │   ├── transform.py
│   │   │   │   ├── vits.py
│   │   │   │   ├── vits_updater.py
│   │   │   │   └── wavenet/
│   │   │   │       ├── __init__.py
│   │   │   │       ├── residual_block.py
│   │   │   │       └── wavenet.py
│   │   │   ├── waveflow.py
│   │   │   └── wavernn/
│   │   │       ├── __init__.py
│   │   │       ├── wavernn.py
│   │   │       └── wavernn_updater.py
│   │   ├── modules/
│   │   │   ├── __init__.py
│   │   │   ├── activation.py
│   │   │   ├── adversarial_loss/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gradient_reversal.py
│   │   │   │   └── speaker_classifier.py
│   │   │   ├── causal_conv.py
│   │   │   ├── conformer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── convolution.py
│   │   │   │   └── encoder_layer.py
│   │   │   ├── conv.py
│   │   │   ├── diffnet.py
│   │   │   ├── diffusion.py
│   │   │   ├── fftconv1d.py
│   │   │   ├── geometry.py
│   │   │   ├── layer_norm.py
│   │   │   ├── losses.py
│   │   │   ├── masked_fill.py
│   │   │   ├── nets_utils.py
│   │   │   ├── normalizer.py
│   │   │   ├── positional_encoding.py
│   │   │   ├── pqmf.py
│   │   │   ├── predictor/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── duration_predictor.py
│   │   │   │   ├── length_regulator.py
│   │   │   │   └── variance_predictor.py
│   │   │   ├── residual_block.py
│   │   │   ├── residual_stack.py
│   │   │   ├── style_encoder.py
│   │   │   ├── tacotron2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── attentions.py
│   │   │   │   ├── decoder.py
│   │   │   │   └── encoder.py
│   │   │   ├── tade_res_block.py
│   │   │   ├── transformer/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── attention.py
│   │   │   │   ├── decoder.py
│   │   │   │   ├── decoder_layer.py
│   │   │   │   ├── embedding.py
│   │   │   │   ├── encoder.py
│   │   │   │   ├── encoder_layer.py
│   │   │   │   ├── lightconv.py
│   │   │   │   ├── mask.py
│   │   │   │   ├── multi_layer_conv.py
│   │   │   │   ├── positionwise_feed_forward.py
│   │   │   │   ├── repeat.py
│   │   │   │   └── subsampling.py
│   │   │   ├── upsample.py
│   │   │   └── wavenet_denoiser.py
│   │   ├── training/
│   │   │   ├── __init__.py
│   │   │   ├── cli.py
│   │   │   ├── default_config.py
│   │   │   ├── experiment.py
│   │   │   ├── extension.py
│   │   │   ├── extensions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── evaluator.py
│   │   │   │   ├── snapshot.py
│   │   │   │   └── visualizer.py
│   │   │   ├── optimizer.py
│   │   │   ├── reporter.py
│   │   │   ├── seeding.py
│   │   │   ├── trainer.py
│   │   │   ├── trigger.py
│   │   │   ├── triggers/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── interval_trigger.py
│   │   │   │   ├── limit_trigger.py
│   │   │   │   └── time_trigger.py
│   │   │   ├── updater.py
│   │   │   └── updaters/
│   │   │       ├── __init__.py
│   │   │       └── standard_updater.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── checkpoint.py
│   │       ├── display.py
│   │       ├── error_rate.py
│   │       ├── h5_utils.py
│   │       ├── internals.py
│   │       ├── layer_tools.py
│   │       ├── mp_tools.py
│   │       ├── profiler.py
│   │       └── scheduler.py
│   ├── text/
│   │   ├── __init__.py
│   │   ├── exps/
│   │   │   ├── __init__.py
│   │   │   └── ernie_linear/
│   │   │       ├── __init__.py
│   │   │       ├── avg_model.py
│   │   │       ├── punc_restore.py
│   │   │       ├── test.py
│   │   │       └── train.py
│   │   └── models/
│   │       ├── __init__.py
│   │       ├── ernie_crf/
│   │       │   ├── __init__.py
│   │       │   └── model.py
│   │       └── ernie_linear/
│   │           ├── __init__.py
│   │           ├── dataset.py
│   │           ├── ernie_linear.py
│   │           └── ernie_linear_updater.py
│   ├── utils/
│   │   ├── __init__.py
│   │   ├── argparse.py
│   │   ├── dynamic_import.py
│   │   ├── env.py
│   │   └── initialize.py
│   └── vector/
│       ├── __init__.py
│       ├── cluster/
│       │   ├── __init__.py
│       │   ├── diarization.py
│       │   └── plda.py
│       ├── exps/
│       │   ├── __init__.py
│       │   ├── ecapa_tdnn/
│       │   │   ├── extract_emb.py
│       │   │   ├── test.py
│       │   │   └── train.py
│       │   └── ge2e/
│       │       ├── __init__.py
│       │       ├── audio_processor.py
│       │       ├── config.py
│       │       ├── dataset_processors.py
│       │       ├── inference.py
│       │       ├── preprocess.py
│       │       ├── random_cycle.py
│       │       ├── speaker_verification_dataset.py
│       │       └── train.py
│       ├── io/
│       │   ├── __init__.py
│       │   ├── augment.py
│       │   ├── batch.py
│       │   ├── dataset.py
│       │   ├── dataset_from_json.py
│       │   ├── embedding_norm.py
│       │   └── signal_processing.py
│       ├── models/
│       │   ├── __init__.py
│       │   ├── ecapa_tdnn.py
│       │   └── lstm_speaker_encoder.py
│       ├── modules/
│       │   ├── __init__.py
│       │   ├── layer.py
│       │   ├── loss.py
│       │   └── sid_model.py
│       ├── training/
│       │   ├── __init__.py
│       │   ├── scheduler.py
│       │   └── seeding.py
│       └── utils/
│           ├── __init__.py
│           ├── time.py
│           └── vector_utils.py
├── runtime/
│   ├── .clang-format
│   ├── .gitignore
│   ├── CMakeLists.txt
│   ├── README.md
│   ├── build.sh
│   ├── build_android.sh
│   ├── build_ios.sh
│   ├── cmake/
│   │   ├── EnableCMP0048.cmake
│   │   ├── EnableCMP0077.cmake
│   │   ├── FindGFortranLibs.cmake
│   │   ├── absl.cmake
│   │   ├── boost.cmake
│   │   ├── eigen.cmake
│   │   ├── fastdeploy.cmake
│   │   ├── gflags.cmake
│   │   ├── glog.cmake
│   │   ├── gtest.cmake
│   │   ├── kenlm.cmake
│   │   ├── libsndfile.cmake
│   │   ├── openblas.cmake
│   │   ├── openfst.cmake
│   │   ├── paddleinference.cmake
│   │   ├── pybind.cmake
│   │   ├── summary.cmake
│   │   └── system.cmake
│   ├── docker/
│   │   └── .gitkeep
│   ├── engine/
│   │   ├── CMakeLists.txt
│   │   ├── asr/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── decoder/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── common.h
│   │   │   │   ├── ctc_beam_search_opt.h
│   │   │   │   ├── ctc_prefix_beam_search_decoder.cc
│   │   │   │   ├── ctc_prefix_beam_search_decoder.h
│   │   │   │   ├── ctc_prefix_beam_search_decoder_main.cc
│   │   │   │   ├── ctc_prefix_beam_search_score.h
│   │   │   │   ├── ctc_tlg_decoder.cc
│   │   │   │   ├── ctc_tlg_decoder.h
│   │   │   │   ├── ctc_tlg_decoder_main.cc
│   │   │   │   ├── decoder_itf.h
│   │   │   │   └── param.h
│   │   │   ├── nnet/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── decodable.cc
│   │   │   │   ├── decodable.h
│   │   │   │   ├── nnet_itf.h
│   │   │   │   ├── nnet_producer.cc
│   │   │   │   ├── nnet_producer.h
│   │   │   │   ├── u2_nnet.cc
│   │   │   │   ├── u2_nnet.h
│   │   │   │   ├── u2_nnet_main.cc
│   │   │   │   ├── u2_nnet_thread_main.cc
│   │   │   │   ├── u2_onnx_nnet.cc
│   │   │   │   └── u2_onnx_nnet.h
│   │   │   ├── recognizer/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── recognizer.cc
│   │   │   │   ├── recognizer.h
│   │   │   │   ├── recognizer_batch_main.cc
│   │   │   │   ├── recognizer_batch_main2.cc
│   │   │   │   ├── recognizer_controller.cc
│   │   │   │   ├── recognizer_controller.h
│   │   │   │   ├── recognizer_controller_impl.cc
│   │   │   │   ├── recognizer_controller_impl.h
│   │   │   │   ├── recognizer_instance.cc
│   │   │   │   ├── recognizer_instance.h
│   │   │   │   ├── recognizer_main.cc
│   │   │   │   └── recognizer_resource.h
│   │   │   └── server/
│   │   │       ├── CMakeLists.txt
│   │   │       └── websocket/
│   │   │           ├── CMakeLists.txt
│   │   │           ├── websocket_client.cc
│   │   │           ├── websocket_client.h
│   │   │           ├── websocket_client_main.cc
│   │   │           ├── websocket_server.cc
│   │   │           ├── websocket_server.h
│   │   │           └── websocket_server_main.cc
│   │   ├── audio_classification/
│   │   │   ├── CMakeLists.txt
│   │   │   └── nnet/
│   │   │       ├── CMakeLists.txt
│   │   │       ├── panns_interface.cc
│   │   │       ├── panns_interface.h
│   │   │       ├── panns_nnet.cc
│   │   │       ├── panns_nnet.h
│   │   │       └── panns_nnet_main.cc
│   │   ├── codelab/
│   │   │   ├── CMakeLists.txt
│   │   │   └── README.md
│   │   ├── common/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── base/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── basic_types.h
│   │   │   │   ├── common.h
│   │   │   │   ├── config.h
│   │   │   │   ├── flags.h.in
│   │   │   │   ├── glog_utils.cc
│   │   │   │   ├── glog_utils.h
│   │   │   │   ├── log.h.in
│   │   │   │   ├── log_impl.cc
│   │   │   │   ├── log_impl.h
│   │   │   │   ├── macros.h
│   │   │   │   ├── safe_queue.h
│   │   │   │   ├── safe_queue_inl.h
│   │   │   │   └── thread_pool.h
│   │   │   ├── frontend/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── assembler.cc
│   │   │   │   ├── assembler.h
│   │   │   │   ├── audio_cache.cc
│   │   │   │   ├── audio_cache.h
│   │   │   │   ├── cmvn.cc
│   │   │   │   ├── cmvn.h
│   │   │   │   ├── compute_fbank_main.cc
│   │   │   │   ├── compute_linear_spectrogram_main.cc
│   │   │   │   ├── data_cache.h
│   │   │   │   ├── db_norm.cc
│   │   │   │   ├── db_norm.h
│   │   │   │   ├── fbank.h
│   │   │   │   ├── feature-fbank.cc
│   │   │   │   ├── feature-fbank.h
│   │   │   │   ├── feature-functions.cc
│   │   │   │   ├── feature-functions.h
│   │   │   │   ├── feature-window.cc
│   │   │   │   ├── feature-window.h
│   │   │   │   ├── feature_cache.cc
│   │   │   │   ├── feature_cache.h
│   │   │   │   ├── feature_common.h
│   │   │   │   ├── feature_common_inl.h
│   │   │   │   ├── feature_pipeline.cc
│   │   │   │   ├── feature_pipeline.h
│   │   │   │   ├── fftsg.c
│   │   │   │   ├── frontend_itf.h
│   │   │   │   ├── linear_spectrogram.cc
│   │   │   │   ├── linear_spectrogram.h
│   │   │   │   ├── mel-computations.cc
│   │   │   │   ├── mel-computations.h
│   │   │   │   ├── normalizer.h
│   │   │   │   ├── rfft.cc
│   │   │   │   ├── rfft.h
│   │   │   │   ├── wave-reader.cc
│   │   │   │   └── wave-reader.h
│   │   │   ├── matrix/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── kaldi-matrix-inl.h
│   │   │   │   ├── kaldi-matrix.cc
│   │   │   │   ├── kaldi-matrix.h
│   │   │   │   ├── kaldi-vector-inl.h
│   │   │   │   ├── kaldi-vector.cc
│   │   │   │   ├── kaldi-vector.h
│   │   │   │   └── matrix-common.h
│   │   │   └── utils/
│   │   │       ├── CMakeLists.txt
│   │   │       ├── audio_process.cc
│   │   │       ├── audio_process.h
│   │   │       ├── blank_process_test.cc
│   │   │       ├── file_utils.cc
│   │   │       ├── file_utils.h
│   │   │       ├── math.cc
│   │   │       ├── math.h
│   │   │       ├── picojson.h
│   │   │       ├── strings.cc
│   │   │       ├── strings.h
│   │   │       ├── strings_test.cc
│   │   │       ├── timer.cc
│   │   │       └── timer.h
│   │   ├── kaldi/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── base/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── io-funcs-inl.h
│   │   │   │   ├── io-funcs.cc
│   │   │   │   ├── io-funcs.h
│   │   │   │   ├── kaldi-common.h
│   │   │   │   ├── kaldi-error.cc
│   │   │   │   ├── kaldi-error.h
│   │   │   │   ├── kaldi-math.cc
│   │   │   │   ├── kaldi-math.h
│   │   │   │   ├── kaldi-types.h
│   │   │   │   ├── kaldi-utils.cc
│   │   │   │   ├── kaldi-utils.h
│   │   │   │   ├── timer.cc
│   │   │   │   ├── timer.h
│   │   │   │   └── version.h
│   │   │   ├── decoder/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── decodable-itf.h
│   │   │   │   ├── lattice-faster-decoder.cc
│   │   │   │   ├── lattice-faster-decoder.h
│   │   │   │   ├── lattice-faster-online-decoder.cc
│   │   │   │   └── lattice-faster-online-decoder.h
│   │   │   ├── fstbin/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── fstaddselfloops.cc
│   │   │   │   ├── fstdeterminizestar.cc
│   │   │   │   ├── fstisstochastic.cc
│   │   │   │   ├── fstminimizeencoded.cc
│   │   │   │   └── fsttablecompose.cc
│   │   │   ├── fstext/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── determinize-lattice-inl.h
│   │   │   │   ├── determinize-lattice.h
│   │   │   │   ├── determinize-star-inl.h
│   │   │   │   ├── determinize-star.h
│   │   │   │   ├── fstext-lib.h
│   │   │   │   ├── fstext-utils-inl.h
│   │   │   │   ├── fstext-utils.h
│   │   │   │   ├── kaldi-fst-io-inl.h
│   │   │   │   ├── kaldi-fst-io.cc
│   │   │   │   ├── kaldi-fst-io.h
│   │   │   │   ├── lattice-utils-inl.h
│   │   │   │   ├── lattice-utils.h
│   │   │   │   ├── lattice-weight.h
│   │   │   │   ├── pre-determinize-inl.h
│   │   │   │   ├── pre-determinize.h
│   │   │   │   ├── remove-eps-local-inl.h
│   │   │   │   ├── remove-eps-local.h
│   │   │   │   └── table-matcher.h
│   │   │   ├── lat/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── determinize-lattice-pruned.cc
│   │   │   │   ├── determinize-lattice-pruned.h
│   │   │   │   ├── kaldi-lattice.cc
│   │   │   │   ├── kaldi-lattice.h
│   │   │   │   ├── lattice-functions.cc
│   │   │   │   └── lattice-functions.h
│   │   │   ├── lm/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   ├── arpa-file-parser.cc
│   │   │   │   ├── arpa-file-parser.h
│   │   │   │   ├── arpa-lm-compiler.cc
│   │   │   │   └── arpa-lm-compiler.h
│   │   │   ├── lmbin/
│   │   │   │   ├── CMakeLists.txt
│   │   │   │   └── arpa2fst.cc
│   │   │   └── util/
│   │   │       ├── CMakeLists.txt
│   │   │       ├── basic-filebuf.h
│   │   │       ├── common-utils.h
│   │   │       ├── const-integer-set-inl.h
│   │   │       ├── const-integer-set.h
│   │   │       ├── edit-distance-inl.h
│   │   │       ├── edit-distance.h
│   │   │       ├── hash-list-inl.h
│   │   │       ├── hash-list.h
│   │   │       ├── kaldi-cygwin-io-inl.h
│   │   │       ├── kaldi-holder-inl.h
│   │   │       ├── kaldi-holder.cc
│   │   │       ├── kaldi-holder.h
│   │   │       ├── kaldi-io-inl.h
│   │   │       ├── kaldi-io.cc
│   │   │       ├── kaldi-io.h
│   │   │       ├── kaldi-pipebuf.h
│   │   │       ├── kaldi-semaphore.cc
│   │   │       ├── kaldi-semaphore.h
│   │   │       ├── kaldi-table-inl.h
│   │   │       ├── kaldi-table.cc
│   │   │       ├── kaldi-table.h
│   │   │       ├── kaldi-thread.cc
│   │   │       ├── kaldi-thread.h
│   │   │       ├── options-itf.h
│   │   │       ├── parse-options.cc
│   │   │       ├── parse-options.h
│   │   │       ├── simple-io-funcs.cc
│   │   │       ├── simple-io-funcs.h
│   │   │       ├── simple-options.cc
│   │   │       ├── simple-options.h
│   │   │       ├── stl-utils.h
│   │   │       ├── table-types.h
│   │   │       ├── text-utils.cc
│   │   │       └── text-utils.h
│   │   └── vad/
│   │       ├── CMakeLists.txt
│   │       ├── frontend/
│   │       │   └── wav.h
│   │       ├── interface/
│   │       │   ├── CMakeLists.txt
│   │       │   ├── vad_interface.cc
│   │       │   ├── vad_interface.h
│   │       │   └── vad_interface_main.cc
│   │       └── nnet/
│   │           ├── CMakeLists.txt
│   │           ├── vad.cc
│   │           ├── vad.h
│   │           └── vad_nnet_main.cc
│   ├── examples/
│   │   ├── .gitignore
│   │   ├── README.md
│   │   ├── android/
│   │   │   └── VadJni/
│   │   │       ├── .gitignore
│   │   │       ├── app/
│   │   │       │   ├── .gitignore
│   │   │       │   ├── build.gradle
│   │   │       │   ├── libs/
│   │   │       │   │   └── .gitkeep
│   │   │       │   ├── proguard-rules.pro
│   │   │       │   └── src/
│   │   │       │       ├── androidTest/
│   │   │       │       │   └── java/
│   │   │       │       │       └── com/
│   │   │       │       │           └── baidu/
│   │   │       │       │               └── paddlespeech/
│   │   │       │       │                   └── vadjni/
│   │   │       │       │                       └── ExampleInstrumentedTest.java
│   │   │       │       └── main/
│   │   │       │           ├── AndroidManifest.xml
│   │   │       │           ├── assets/
│   │   │       │           │   └── .gitkeep
│   │   │       │           ├── cpp/
│   │   │       │           │   ├── CMakeLists.txt
│   │   │       │           │   ├── native-lib.cpp
│   │   │       │           │   └── vad_interface.h
│   │   │       │           ├── java/
│   │   │       │           │   └── com/
│   │   │       │           │       └── baidu/
│   │   │       │           │           └── paddlespeech/
│   │   │       │           │               └── vadjni/
│   │   │       │           │                   └── MainActivity.java
│   │   │       │           └── res/
│   │   │       │               ├── drawable/
│   │   │       │               │   └── ic_launcher_background.xml
│   │   │       │               ├── drawable-v24/
│   │   │       │               │   └── ic_launcher_foreground.xml
│   │   │       │               ├── layout/
│   │   │       │               │   └── activity_main.xml
│   │   │       │               ├── mipmap-anydpi-v26/
│   │   │       │               │   ├── ic_launcher.xml
│   │   │       │               │   └── ic_launcher_round.xml
│   │   │       │               ├── mipmap-anydpi-v33/
│   │   │       │               │   └── ic_launcher.xml
│   │   │       │               ├── values/
│   │   │       │               │   ├── colors.xml
│   │   │       │               │   ├── strings.xml
│   │   │       │               │   └── themes.xml
│   │   │       │               ├── values-night/
│   │   │       │               │   └── themes.xml
│   │   │       │               └── xml/
│   │   │       │                   ├── backup_rules.xml
│   │   │       │                   └── data_extraction_rules.xml
│   │   │       ├── build.gradle
│   │   │       ├── gradle/
│   │   │       │   └── wrapper/
│   │   │       │       ├── gradle-wrapper.jar
│   │   │       │       └── gradle-wrapper.properties
│   │   │       ├── gradle.properties
│   │   │       ├── gradlew
│   │   │       ├── gradlew.bat
│   │   │       └── settings.gradle
│   │   ├── audio_classification/
│   │   │   ├── README.md
│   │   │   ├── android_demo/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── app/
│   │   │   │   │   ├── .gitignore
│   │   │   │   │   ├── build.gradle
│   │   │   │   │   ├── proguard-rules.pro
│   │   │   │   │   └── src/
│   │   │   │   │       ├── androidTest/
│   │   │   │   │       │   └── java/
│   │   │   │   │       │       └── com/
│   │   │   │   │       │           └── example/
│   │   │   │   │       │               └── cls/
│   │   │   │   │       │                   └── ExampleInstrumentedTest.kt
│   │   │   │   │       └── main/
│   │   │   │   │           ├── AndroidManifest.xml
│   │   │   │   │           ├── cpp/
│   │   │   │   │           │   ├── CMakeLists.txt
│   │   │   │   │           │   ├── includes/
│   │   │   │   │           │   │   └── panns_interface.h
│   │   │   │   │           │   └── native-lib.cpp
│   │   │   │   │           ├── java/
│   │   │   │   │           │   └── com/
│   │   │   │   │           │       └── example/
│   │   │   │   │           │           └── cls/
│   │   │   │   │           │               └── MainActivity.kt
│   │   │   │   │           └── res/
│   │   │   │   │               ├── drawable/
│   │   │   │   │               │   └── ic_launcher_background.xml
│   │   │   │   │               ├── drawable-v24/
│   │   │   │   │               │   └── ic_launcher_foreground.xml
│   │   │   │   │               ├── layout/
│   │   │   │   │               │   └── activity_main.xml
│   │   │   │   │               ├── mipmap-anydpi-v26/
│   │   │   │   │               │   ├── ic_launcher.xml
│   │   │   │   │               │   └── ic_launcher_round.xml
│   │   │   │   │               ├── values/
│   │   │   │   │               │   ├── colors.xml
│   │   │   │   │               │   ├── strings.xml
│   │   │   │   │               │   └── themes.xml
│   │   │   │   │               ├── values-night/
│   │   │   │   │               │   └── themes.xml
│   │   │   │   │               └── xml/
│   │   │   │   │                   ├── backup_rules.xml
│   │   │   │   │                   └── data_extraction_rules.xml
│   │   │   │   ├── build.gradle
│   │   │   │   ├── gradle/
│   │   │   │   │   └── wrapper/
│   │   │   │   │       ├── gradle-wrapper.jar
│   │   │   │   │       └── gradle-wrapper.properties
│   │   │   │   ├── gradle.properties
│   │   │   │   ├── gradlew
│   │   │   │   ├── gradlew.bat
│   │   │   │   └── settings.gradle
│   │   │   ├── conf
│   │   │   ├── label_list
│   │   │   └── scp
│   │   ├── codelab/
│   │   │   ├── README.md
│   │   │   ├── decoder/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── README.md
│   │   │   │   ├── path.sh
│   │   │   │   ├── run.sh
│   │   │   │   └── valgrind.sh
│   │   │   ├── feat/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── README.md
│   │   │   │   ├── path.sh
│   │   │   │   ├── run.sh
│   │   │   │   └── valgrind.sh
│   │   │   ├── nnet/
│   │   │   │   ├── .gitignore
│   │   │   │   ├── README.md
│   │   │   │   ├── path.sh
│   │   │   │   ├── run.sh
│   │   │   │   └── valgrind.sh
│   │   │   └── u2/
│   │   │       ├── .gitignore
│   │   │       ├── README.md
│   │   │       ├── local/
│   │   │       │   ├── decode.sh
│   │   │       │   ├── feat.sh
│   │   │       │   ├── nnet.sh
│   │   │       │   └── recognizer.sh
│   │   │       ├── path.sh
│   │   │       └── run.sh
│   │   ├── custom_asr/
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   ├── compile_lexicon_token_fst.sh
│   │   │   │   ├── mk_slot_graph.sh
│   │   │   │   ├── mk_tlg_with_slot.sh
│   │   │   │   └── train_lm_with_slot.sh
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── text_lm/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── local/
│   │   │   │   └── mmseg.py
│   │   │   ├── path.sh
│   │   │   └── run.sh
│   │   ├── u2pp_ol/
│   │   │   ├── README.md
│   │   │   └── wenetspeech/
│   │   │       ├── .gitignore
│   │   │       ├── README.md
│   │   │       ├── RESULTS.md
│   │   │       ├── local/
│   │   │       │   ├── aishell_train_lms.sh
│   │   │       │   ├── decode.sh
│   │   │       │   ├── feat.sh
│   │   │       │   ├── nnet.sh
│   │   │       │   ├── recognizer.sh
│   │   │       │   ├── recognizer_fastdeploy.sh
│   │   │       │   ├── recognizer_quant.sh
│   │   │       │   ├── recognizer_wfst.sh
│   │   │       │   ├── recognizer_wfst_fastdeploy.sh
│   │   │       │   ├── run_build_tlg.sh
│   │   │       │   └── split_data.sh
│   │   │       ├── path.sh
│   │   │       └── run.sh
│   │   └── vad/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── conf/
│   │       │   └── vad.ini
│   │       ├── local/
│   │       │   ├── build.sh
│   │       │   ├── build_android.sh
│   │       │   ├── decode.sh
│   │       │   └── download.sh
│   │       ├── path.sh
│   │       ├── run.sh
│   │       └── vad-android-demo/
│   │           ├── .gradle/
│   │           │   ├── 6.1.1/
│   │           │   │   └── gc.properties
│   │           │   ├── buildOutputCleanup/
│   │           │   │   └── cache.properties
│   │           │   └── vcs-1/
│   │           │       └── gc.properties
│   │           ├── LICENSE.md
│   │           ├── README
│   │           ├── README.md
│   │           ├── build.gradle
│   │           ├── example/
│   │           │   ├── .gitignore
│   │           │   ├── build.gradle
│   │           │   ├── local.properties
│   │           │   ├── proguard-rules.pro
│   │           │   └── src/
│   │           │       ├── androidTest/
│   │           │       │   └── java/
│   │           │       │       └── com/
│   │           │       │           └── konovalov/
│   │           │       │               └── vad/
│   │           │       │                   └── example/
│   │           │       │                       └── ExampleInstrumentedTest.java
│   │           │       └── main/
│   │           │           ├── AndroidManifest.xml
│   │           │           ├── java/
│   │           │           │   └── com/
│   │           │           │       └── konovalov/
│   │           │           │           └── vad/
│   │           │           │               └── example/
│   │           │           │                   ├── MainActivity.java
│   │           │           │                   └── recorder/
│   │           │           │                       ├── VoiceRecorder.java
│   │           │           │                       └── VoiceRecorderConfig.java
│   │           │           └── res/
│   │           │               ├── drawable/
│   │           │               │   └── ic_launcher_background.xml
│   │           │               ├── drawable-v24/
│   │           │               │   └── ic_launcher_foreground.xml
│   │           │               ├── layout/
│   │           │               │   └── activity_main.xml
│   │           │               ├── mipmap-anydpi-v26/
│   │           │               │   ├── ic_launcher.xml
│   │           │               │   └── ic_launcher_round.xml
│   │           │               └── values/
│   │           │                   ├── colors.xml
│   │           │                   ├── strings.xml
│   │           │                   └── styles.xml
│   │           ├── gradle/
│   │           │   └── wrapper/
│   │           │       ├── gradle-wrapper.jar
│   │           │       └── gradle-wrapper.properties
│   │           ├── gradle.properties
│   │           ├── gradlew
│   │           ├── gradlew.bat
│   │           ├── local.properties
│   │           ├── settings.gradle
│   │           └── vad/
│   │               ├── .gitignore
│   │               ├── build.gradle
│   │               ├── consumer-rules.pro
│   │               ├── proguard-rules.pro
│   │               └── src/
│   │                   ├── androidTest/
│   │                   │   └── java/
│   │                   │       └── com/
│   │                   │           └── konovalov/
│   │                   │               └── vad/
│   │                   │                   └── ExampleInstrumentedTest.java
│   │                   └── main/
│   │                       ├── AndroidManifest.xml
│   │                       ├── cpp/
│   │                       │   ├── CMakeLists.txt
│   │                       │   ├── includes/
│   │                       │   │   └── vad_interface.h
│   │                       │   └── native-lib.cpp
│   │                       ├── java/
│   │                       │   └── com/
│   │                       │       └── konovalov/
│   │                       │           └── vad/
│   │                       │               ├── Vad.java
│   │                       │               └── VadListener.java
│   │                       └── res/
│   │                           └── values/
│   │                               └── strings.xml
│   ├── patch/
│   │   ├── CPPLINT.cfg
│   │   ├── README.md
│   │   └── openfst/
│   │       └── src/
│   │           ├── include/
│   │           │   └── fst/
│   │           │       ├── flags.h
│   │           │       └── log.h
│   │           └── lib/
│   │               └── flags.cc
│   └── tools/
│       ├── clang-format.sh
│       ├── setup_valgrind.sh
│       └── venv.sh
├── setup.cfg
├── setup.py
├── tests/
│   ├── benchmark/
│   │   ├── conformer/
│   │   │   ├── README.md
│   │   │   ├── prepare.sh
│   │   │   ├── run.sh
│   │   │   └── run_benchmark.sh
│   │   └── pwgan/
│   │       ├── README.md
│   │       ├── run_all.sh
│   │       └── run_benchmark.sh
│   ├── chains/
│   │   ├── ds2/
│   │   │   ├── README.md
│   │   │   ├── ds2_params_lite_train_infer.txt
│   │   │   ├── ds2_params_whole_train_infer.txt
│   │   │   ├── lite_train_infer.sh
│   │   │   ├── prepare.sh
│   │   │   ├── speedyspeech_params_lite.txt
│   │   │   ├── test.sh
│   │   │   └── whole_train_infer.sh
│   │   └── speedyspeech/
│   │       ├── README.md
│   │       ├── infer.sh
│   │       ├── lite_train_infer.sh
│   │       ├── prepare.sh
│   │       ├── speedyspeech_params_lite_multi_gpu.txt
│   │       ├── speedyspeech_params_lite_single_gpu.txt
│   │       ├── speedyspeech_params_whole_multi_gpu.txt
│   │       ├── speedyspeech_params_whole_single_gpu.txt
│   │       ├── test.sh
│   │       └── whole_train_infer.sh
│   ├── test_tipc/
│   │   ├── barrier.sh
│   │   ├── benchmark_train.sh
│   │   ├── common_func.sh
│   │   ├── configs/
│   │   │   ├── conformer/
│   │   │   │   └── train_infer_python.txt
│   │   │   ├── mdtc/
│   │   │   │   └── train_infer_python.txt
│   │   │   └── pwgan/
│   │   │       └── train_infer_python.txt
│   │   ├── conformer/
│   │   │   └── scripts/
│   │   │       └── aishell_tiny.py
│   │   ├── docs/
│   │   │   └── benchmark_train.md
│   │   ├── prepare.sh
│   │   └── test_train_inference_python.sh
│   └── unit/
│       ├── asr/
│       │   ├── deepspeech2_model_test.py
│       │   ├── deepspeech2_online_model_test.py
│       │   ├── deepspeech2_online_model_test.sh
│       │   ├── error_rate_test.py
│       │   ├── mask_test.py
│       │   ├── reverse_pad_list.py
│       │   └── u2_model_test.py
│       ├── audiotools/
│       │   ├── core/
│       │   │   ├── test_audio_signal.py
│       │   │   ├── test_bands.py
│       │   │   ├── test_display.py
│       │   │   ├── test_dsp.py
│       │   │   ├── test_effects.py
│       │   │   ├── test_fftconv.py
│       │   │   ├── test_grad.py
│       │   │   ├── test_highpass.py
│       │   │   ├── test_loudness.py
│       │   │   ├── test_lowpass.py
│       │   │   └── test_util.py
│       │   ├── data/
│       │   │   ├── test_datasets.py
│       │   │   ├── test_preprocess.py
│       │   │   └── test_transforms.py
│       │   ├── ml/
│       │   │   ├── test_decorators.py
│       │   │   └── test_model.py
│       │   ├── test_audiotools.sh
│       │   └── test_post.py
│       ├── ci.sh
│       ├── cli/
│       │   ├── aishell_test_prepare.py
│       │   ├── calc_RTF_CER_by_aishell.sh
│       │   ├── path.sh
│       │   └── test_cli.sh
│       ├── doc/
│       │   └── test_cli.md
│       ├── server/
│       │   ├── offline/
│       │   │   ├── change_yaml.py
│       │   │   ├── conf/
│       │   │   │   └── application.yaml
│       │   │   └── test_server_client.sh
│       │   └── online/
│       │       └── tts/
│       │           ├── check_server/
│       │           │   ├── change_yaml.py
│       │           │   ├── conf/
│       │           │   │   └── application.yaml
│       │           │   ├── test.sh
│       │           │   ├── test_all.sh
│       │           │   └── tts_online_application.yaml
│       │           └── test_server/
│       │               └── test_http_client.py
│       ├── tts/
│       │   ├── test_data_table.py
│       │   ├── test_enfrontend.py
│       │   ├── test_expansion.py
│       │   ├── test_fftconv1d.py
│       │   ├── test_losses.py
│       │   ├── test_mixfrontend.py
│       │   ├── test_optimizer.py
│       │   ├── test_pwg.py
│       │   ├── test_raise.py
│       │   ├── test_reporter.py
│       │   ├── test_snapshot.py
│       │   ├── test_ssml.py
│       │   ├── test_stft.py
│       │   └── test_to_static.py
│       └── vector/
│           ├── conftest.py
│           └── test_augment.py
├── third_party/
│   ├── README.md
│   ├── __init__.py
│   ├── ctc_decoders/
│   │   ├── .gitignore
│   │   ├── COPYING.APACHE2.0
│   │   ├── COPYING.LESSER.3
│   │   ├── LICENSE
│   │   ├── __init__.py
│   │   ├── ctc_beam_search_decoder.cpp
│   │   ├── ctc_beam_search_decoder.h
│   │   ├── ctc_greedy_decoder.cpp
│   │   ├── ctc_greedy_decoder.h
│   │   ├── decoder_utils.cpp
│   │   ├── decoder_utils.h
│   │   ├── decoders.i
│   │   ├── path_trie.cpp
│   │   ├── path_trie.h
│   │   ├── scorer.cpp
│   │   ├── scorer.h
│   │   ├── setup.py
│   │   └── setup.sh
│   ├── install.sh
│   ├── install_win_ctc.bat
│   └── python_kaldi_features/
│       ├── .gitignore
│       ├── LICENSE
│       ├── MANIFEST
│       ├── README.rst
│       ├── docs/
│       │   ├── Makefile
│       │   ├── make.bat
│       │   └── source/
│       │       ├── conf.py
│       │       └── index.rst
│       ├── example.py
│       ├── python_speech_features/
│       │   ├── __init__.py
│       │   ├── base.py
│       │   ├── base_orig.py
│       │   ├── sigproc.py
│       │   └── sigproc_orig.py
│       ├── requirements.txt
│       ├── setup.py
│       └── test/
│           └── test_sigproc.py
├── tools/
│   ├── Dockerfile
│   ├── Makefile
│   ├── extras/
│   │   ├── README.md
│   │   ├── install_autolog.sh
│   │   ├── install_gcc.sh
│   │   ├── install_kaldi.sh
│   │   ├── install_kenlm.sh
│   │   ├── install_liblbfgs.sh
│   │   ├── install_mfa_v1.sh
│   │   ├── install_mfa_v2.sh
│   │   ├── install_miniconda.sh
│   │   ├── install_mkl.sh
│   │   ├── install_ngram.sh
│   │   ├── install_openblas.sh
│   │   ├── install_openfst.sh
│   │   ├── install_pynini.sh
│   │   ├── install_sclite.sh
│   │   ├── install_soundfile.sh
│   │   ├── install_sox.sh
│   │   ├── install_srilm.sh
│   │   ├── install_venv.sh
│   │   └── srilm.patch
│   ├── get_contributors.ipynb
│   ├── pre_commit.sh
│   ├── release_note.py
│   ├── setup_anaconda.sh
│   └── watermark.py
└── utils/
    ├── DER.py
    ├── README.md
    ├── __init__.py
    ├── addjson.py
    ├── apply-cmvn.py
    ├── avg.sh
    ├── avg_model.py
    ├── build_kenlm_model_from_arpa.sh
    ├── build_vocab.py
    ├── caculate_rtf.py
    ├── compute-cmvn-stats.py
    ├── compute-wer.py
    ├── compute_mean_std.py
    ├── compute_statistics.py
    ├── copy-feats.py
    ├── data2json.sh
    ├── dump.sh
    ├── dump_manifest.py
    ├── duration_from_maniefst.sh
    ├── espnet_json_to_manifest.py
    ├── feat-to-shape.py
    ├── feat_to_shape.sh
    ├── filter.py
    ├── filter_scp.pl
    ├── format_data.py
    ├── format_rsl.py
    ├── format_triplet_data.py
    ├── fst/
    │   ├── add_lex_disambig.pl
    │   ├── compile_lexicon_token_fst.sh
    │   ├── ctc_token_fst.py
    │   ├── ctc_token_fst_corrected.py
    │   ├── eps2disambig.pl
    │   ├── make_lexicon_fst.pl
    │   ├── make_tlg.sh
    │   ├── prepare_dict.py
    │   ├── remove_oovs.pl
    │   ├── rnnt_token_fst.py
    │   └── s2eps.pl
    ├── gen_duration_from_textgrid.py
    ├── generate_infer_yaml.py
    ├── json2trn.py
    ├── link_wav.py
    ├── log.sh
    ├── manifest_key_value.py
    ├── md-eval.pl
    ├── merge_scp2json.py
    ├── ngram_train.sh
    ├── pack_model.sh
    ├── parallel/
    │   └── run.pl
    ├── parse_options.sh
    ├── pd_env_collect.sh
    ├── profile.sh
    ├── reduce_data_dir.sh
    ├── remove_longshortdata.py
    ├── remove_longshortdata.sh
    ├── score_sclite.sh
    ├── scp2json.py
    ├── show_results.sh
    ├── spk2utt_to_utt2spk.pl
    ├── split_data.sh
    ├── split_json.sh
    ├── split_scp.pl
    ├── spm_decode
    ├── spm_encode
    ├── spm_train
    ├── tarball.sh
    ├── text2token.py
    ├── text_to_lexicon.py
    ├── tokenizer.perl
    ├── train_arpa_with_kenlm.sh
    ├── update_json.sh
    ├── utility.sh
    ├── utt2spk_to_spk2utt.pl
    └── zh_tn.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
# This file is used by clang-format to autoformat paddle source code
#
# The clang-format is part of llvm toolchain.
# It need to install llvm and clang to format source code style.
#
# The basic usage is,
#   clang-format -i -style=file PATH/TO/SOURCE/CODE
#
# The -style=file implicit use ".clang-format" file located in one of
# parent directory.
# The -i means inplace change.
#
# The document of clang-format is
#   http://clang.llvm.org/docs/ClangFormat.html
#   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
---
Language:        Cpp
BasedOnStyle:  Google
IndentWidth:     4
TabWidth:        4
ContinuationIndentWidth: 4
MaxEmptyLinesToKeep: 2
AccessModifierOffset: -2  # The private/protected/public has no indent in class
Standard:  Cpp11
AllowAllParametersOfDeclarationOnNextLine: true
BinPackParameters: false
BinPackArguments: false
...


================================================
FILE: .flake8
================================================
[flake8]

########## OPTIONS ##########
# Set the maximum length that any line (with some exceptions) may be.
max-line-length = 120


################### FILE PATTERNS ##########################
# Provide a comma-separated list of glob patterns to exclude from checks.
exclude =
    # git folder
    .git,
    # python cache
    __pycache__,
    # third party
    utils/compute-wer.py,
    third_party/,
# Provide a comma-separate list of glob patterns to include for checks.
filename =
    *.py


########## RULES ##########

# ERROR CODES
#
# E/W  - PEP8 errors/warnings (pycodestyle)
# F    - linting errors (pyflakes)
# C    - McCabe complexity error (mccabe)
#
# W503 - line break before binary operator

# Specify a list of codes to ignore.
ignore =
    W503
    E252,E262,E127,E265,E126,E266,E241,E261,E128,E125,E129
    W291,W293,W605
    E203,E305,E402,E501,E721,E741,F403,F405,F821,F841,F999,W503,W504,C408,E302,W291,E303,
    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
    # to line this up with executable bit
    EXE001,
    # these ignores are from flake8-bugbear; please fix!
    B007,B008,
    # these ignores are from flake8-comprehensions; please fix!
    C400,C401,C402,C403,C404,C405,C407,C411,C413,C414,C415


per-file-ignores =
    */__init__.py: F401

# Specify the list of error codes you wish Flake8 to report.
select =
    E,
    W,
    F,
    C


================================================
FILE: .gitconfig
================================================
[alias]
  st = status
  ci = commit
  br = branch
  co = checkout
  df = diff
  l = log --pretty=format:\"%h %ad | %s%d [%an]\" --graph --date=short
  ll = log --stat

[merge]
  tool = vimdiff

[core]
  excludesfile = ~/.gitignore
  editor = vim

[color]
  branch = auto
  diff = auto
  status = auto

[color "branch"]
  current = yellow reverse
  local = yellow
  remote = green

[color "diff"]
  meta = yellow bold
  frag = magenta bold
  old = red bold
  new = green bold

[color "status"]
  added = yellow
  changed = green
  untracked = cyan

[push]
  default = matching

[credential]
  helper = store

[user]
  name =
  email =


================================================
FILE: .github/CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or
 advances
* Racial or political allusions          
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
 address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
 professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at paddlespeech@baidu.com. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq


================================================
FILE: .github/CONTRIBUTING.md
================================================
# 💡 paddlespeech 提交代码须知

### Discussed in https://github.com/PaddlePaddle/PaddleSpeech/discussions/1326

<div type='discussions-op-text'>

<sup>Originally posted by **yt605155624** January 12, 2022</sup>
1. 写完代码之后可以用我们的 pre-commit 检查一下代码格式，注意只改自己修改的代码的格式即可，其他的代码有可能也被改了格式，不要 add 就好
```
pip install pre-commit
pre-commit run --file 你修改的代码
```
2. 提交 commit 中增加必要信息跳过不必要的 CI
- 提交 asr 相关代码
```text
git commit -m "xxxxxx, test=asr"
```
- 提交 tts 相关代码
```text
git commit -m "xxxxxx, test=tts"
```
- 仅修改文档
```text
git commit -m "xxxxxx, test=doc"
```
注意：
1. 虽然跳过了 CI，但是还要先排队排到才能跳过，所以非自己方向看到 pending 不要着急 🤣
2. 在 `git commit --amend` 的时候才加 `test=xxx` 可能不太有效
3. 一个 pr 多次提交 commit 注意每次都要加 `test=xxx`，因为每个 commit 都会触发 CI
4. 删除 python 环境中已经安装好的 paddlespeech，否则可能会影响 import paddlespeech 的顺序</div>


================================================
FILE: .github/ISSUE_TEMPLATE/bug-report-s2t.md
================================================
---
name: "\U0001F41B S2T Bug Report"
about: Create a report to help us improve
title: "[S2T]XXXX"
labels: Bug, S2T
assignees: zh794390558

---

For support and discussions, please use our [Discourse forums](https://github.com/PaddlePaddle/DeepSpeech/discussions).

If you've found a bug then please create an issue with the following information:

**Describe the bug**
A clear and concise description of what the bug is.

**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error

**Expected behavior**
A clear and concise description of what you expected to happen.

**Screenshots**
If applicable, add screenshots to help explain your problem.

**Environment (please complete the following information):**
 - OS: [e.g. Ubuntu]
 - GCC/G++ Version [e.g. 8.3]
 - Python Version [e.g. 3.7]
 - PaddlePaddle Version [e.g. 2.0.0]
 - Model Version [e.g. 2.0.0]
 - GPU/DRIVER Information [e.g. Tesla V100-SXM2-32GB/440.64.00]
 - CUDA/CUDNN Version [e.g. cuda-10.2]
 - MKL Version
- TensorRT Version

**Additional context**
Add any other context about the problem here.


================================================
FILE: .github/ISSUE_TEMPLATE/bug-report-tts.md
================================================
---
name: "\U0001F41B TTS Bug Report"
about: Create a report to help us improve
title: "[TTS]XXXX"
labels: Bug, T2S

---

For support and discussions, please use our [Discourse forums](https://github.com/PaddlePaddle/DeepSpeech/discussions).

If you've found a bug then please create an issue with the following information:

**Describe the bug**
A clear and concise description of what the bug is.

**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error

**Expected behavior**
A clear and concise description of what you expected to happen.

**Screenshots**
If applicable, add screenshots to help explain your problem.

**Environment (please complete the following information):**
 - OS: [e.g. Ubuntu]
 - GCC/G++ Version [e.g. 8.3]
 - Python Version [e.g. 3.7]
 - PaddlePaddle Version [e.g. 2.0.0]
 - Model Version [e.g. 2.0.0]
 - GPU/DRIVER Information [e.g. Tesla V100-SXM2-32GB/440.64.00]
 - CUDA/CUDNN Version [e.g. cuda-10.2]
 - MKL Version
- TensorRT Version

**Additional context**
Add any other context about the problem here.


================================================
FILE: .github/ISSUE_TEMPLATE/feature-request.md
================================================
---
name: "\U0001F680 Feature Request"
about: As a user, I want to request a New Feature on the product.
title: ''
labels: feature request
assignees: D-DanielYang, iftaken

---

## Feature Request

**Is your feature request related to a problem? Please describe:**
<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->

**Describe the feature you'd like:**
<!-- A clear and concise description of what you want to happen. -->

**Describe alternatives you've considered:**
<!-- A clear and concise description of any alternative solutions or features you've considered. -->


================================================
FILE: .github/ISSUE_TEMPLATE/others.md
================================================
---
name: "\U0001F9E9 Others"
about: Report any other non-support related issues.
title: ''
labels: ''
assignees: ''

---

## Others

<!--
你可以在这里提出任何前面几类模板不适用的问题，包括但不限于：优化性建议、框架使用体验反馈、版本兼容性问题、报错信息不清楚等。
You can report any issues that are not applicable to the previous types of templates, including but not limited to: enhancement suggestions, feedback on the use of the framework, version compatibility issues, unclear error information, etc.
-->


================================================
FILE: .github/ISSUE_TEMPLATE/question.md
================================================
---
name: "\U0001F914 Ask a Question"
about: I want to ask a question.
title: ''
labels: Question
assignees: ''

---

## General Question

<!--
Before asking a question, make sure you have:
- Baidu/Google your question.
- Searched open and closed [GitHub issues](https://github.com/PaddlePaddle/PaddleSpeech/issues?q=is%3Aissue)
- Read the documentation:
  - [Readme](https://github.com/PaddlePaddle/PaddleSpeech)
  - [Doc](https://paddlespeech.readthedocs.io/)
-->


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
<!-- Demo: https://github.com/PaddlePaddle/PaddleNLP/pull/26 -->
### PR types
<!-- One of [ New features | Bug fixes | Function optimization | Performance optimization | Breaking changes | Others ] -->

### PR changes
<!-- One of [ Models | APIs | Docs | Others ] -->

### Describe
<!-- Describe what this PR does -->


================================================
FILE: .github/stale.yml
================================================
# Number of days of inactivity before an issue becomes stale
daysUntilStale: 45 
# Number of days of inactivity before a stale issue is closed
daysUntilClose: 30
# Issues with these labels will never be considered stale
exemptLabels:
  - Roadmap 
  - Bug
  - feature request
  - Tips
# Label to use when marking an issue as stale
staleLabel: Stale
# Comment to post when marking an issue as stale. Set to `false` to disable
markComment: >
  This issue has been automatically marked as stale because it has not had
  recent activity. It will be closed if no further activity occurs. Thank you
  for your contributions.
unmarkComment: false
# Comment to post when closing a stale issue. Set to `false` to disable
closeComment: >
  This issue is closed. Please re-open if needed.


================================================
FILE: .gitignore
================================================
.DS_Store
*.pyc
.vscode
*.log
*.wav
*.pdmodel
*.pdiparams*
*.zip
*.tar
*.tar.gz
.ipynb_checkpoints
*.npz
*.done
*.whl
*.egg-info
build
*output/
.history
.idea

audio/dist/
audio/fc_patch/

docs/build/
docs/topic/ctc/warp-ctc/

tools/venv
tools/kenlm
tools/sox-14.4.2
tools/soxbindings
tools/montreal-forced-aligner/
tools/Montreal-Forced-Aligner/
tools/sctk
tools/sctk-20159b5/
tools/kaldi
tools/OpenBLAS/
tools/Miniconda3-latest-Linux-x86_64.sh
tools/activate_python.sh
tools/miniconda.sh
tools/CRF++-0.58/
tools/liblbfgs-1.10/
tools/srilm/
tools/env.sh
tools/openfst-1.8.1/
tools/libsndfile/
tools/python-soundfile/
tools/onnx
tools/onnxruntime
tools/Paddle2ONNX
tools/onnx-simplifier/

speechx/fc_patch/

third_party/ctc_decoders/paddlespeech_ctcdecoders.py

kernel_meta/


================================================
FILE: .mergify.yml
================================================
pull_request_rules:
  - name: automatic merge for develop when CI passes and 1 reviews
    conditions:
      - "approved-reviews-by>=1"
      - check-success=Travis CI - Pull Request
      - base=develop
    actions:
      merge:
        method: merge
  - name: delete head branch after merged
    conditions:
      - merged
    actions:
      delete_head_branch: {}
  - name: "add label=auto-merge for PR by mergify"
    conditions:
      - author=mergify[bot]
    actions:
      label:
        add: ["auto-merge"]
  - name: warn on conflicts
    conditions:
      - conflict
    actions:
      comment:
        message: This pull request is now in conflict :(
      label:
        add: ["conflicts"]
  - name: unlabel conflicts
    conditions:
      - -conflict
    actions:
      label:
        remove: ["conflicts"]
  - name: "auto add label=Dataset"
    conditions:
      - files~=^dataset/
    actions:
      label:
        add: ["Dataset"]
  - name: "auto add label=S2T"
    conditions:
      - files~=^paddlespeech/s2t/
    actions:
      label:
        add: ["S2T"]
  - name: "auto add label=T2S"
    conditions:
      - files~=^paddlespeech/t2s/
    actions:
      label:
        add: ["T2S"]
  - name: "auto add label=Audio"
    conditions:
      - files~=^paddlespeech/audio/
    actions:
      label:
        add: ["Audio"]
  - name: "auto add label=Vector"
    conditions:
      - files~=^paddlespeech/vector/
    actions:
      label:
        add: ["Vector"]
  - name: "auto add label=Text"
    conditions:
      - files~=^paddlespeech/text/
    actions:
      label:
        add: ["Text"]
  - name: "auto add label=Example"
    conditions:
      - files~=^examples/
    actions:
      label:
        add: ["Example"]
  - name: "auto add label=CLI"
    conditions:
      - files~=^paddlespeech/cli
    actions:
      label:
        add: ["CLI"]
  - name: "auto add label=Server"
    conditions:
      - files~=^paddlespeech/server
    actions:
      label:
        add: ["Server"]
  - name: "auto add label=Demo"
    conditions:
      - files~=^demos/
    actions:
      label:
        add: ["Demo"]
  - name: "auto add label=README"
    conditions:
      - files~=(README.md|READEME_cn.md)
    actions:
      label:
        add: ["README"]
  - name: "auto add label=Documentation"
    conditions:
      - files~=^(docs/|CHANGELOG.md)
    actions:
      label:
        add: ["Documentation"]
  - name: "auto add label=CI"
    conditions:
      - files~=^(.circleci/|ci/|.github/|.travis.yml|.travis|env.sh)
    actions:
      label:
        add: ["CI"]
  - name: "auto add label=Installation"
    conditions:
      - files~=^(tools/|setup.py|setup.cfg|setup_audio.py)
    actions:
      label:
        add: ["Installation"]
  - name: "auto add label=Test"
    conditions:
      - files~=^(tests/)
    actions:
      label:
        add: ["Test"]
  - name: "auto add label=mergify"
    conditions:
      - files~=^.mergify.yml
    actions:
      label:
        add: ["mergify"]
  - name: "auto add label=Docker"
    conditions:
      - files~=^docker/
    actions:
      label:
        add: ["Docker"]
  - name: "auto add label=Deployment"
    conditions:
      - files~=^runtime/
    actions:
      label:
        add: ["Deployment"]


================================================
FILE: .pre-commit-config.yaml
================================================
repos:
-   repo: https://github.com/pre-commit/mirrors-yapf.git
    rev: v0.16.0
    hooks:
    -   id: yapf
        files: \.py$
        exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$

-   repo: https://github.com/pre-commit/pre-commit-hooks
    rev: a11d9314b22d8f8c7556443875b731ef05965464
    hooks:
    -   id: check-merge-conflict
    -   id: check-symlinks
    -   id: detect-private-key
        files: (?!.*paddle)^.*$
    -   id: end-of-file-fixer
        files: \.md$
    #-   id: trailing-whitespace
    #    files: \.md$
    -   id: requirements-txt-fixer
        exclude: (?=third_party).*$
    -   id: check-yaml
    -   id: check-json
    -   id: pretty-format-json
        args:
        - --no-sort-keys
        - --autofix
    -   id: check-merge-conflict
      #    -   id: flake8
      #        aergs:
      #        -  --ignore=E501,E228,E226,E261,E266,E128,E402,W503
      #        -  --builtins=G,request
      #        -  --jobs=1
      #        exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|third_party).*(\.cpp|\.cc|\.h\.hpp|\.py)$

-   repo : https://github.com/Lucas-C/pre-commit-hooks
    rev: v1.0.1
    hooks:
    -   id: forbid-crlf
        files: \.md$
    -   id: remove-crlf
        files: \.md$
    -   id: forbid-tabs
        files: \.md$
    -   id: remove-tabs
        files: \.md$

-   repo: local
    hooks:
    -   id: clang-format
        name: clang-format
        description: Format files with ClangFormat
        entry: bash .pre-commit-hooks/clang-format.hook -i
        language: system
        files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
        exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|runtime/patch|runtime/tools/fstbin|runtime/tools/lmbin|third_party/ctc_decoders|runtime/engine/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ 
    -   id: cpplint
        name: cpplint
        description: Static code analysis of C/C++ files
        language: python
        files: \.(h\+\+|h|hh|hxx|hpp|cuh|c|cc|cpp|cu|c\+\+|cxx|tpp|txx)$
        exclude: (?=runtime/engine/kaldi|runtime/engine/common/matrix|audio/paddleaudio/src|runtime/patch|runtime/tools/fstbin|runtime/tools/lmbin|third_party/ctc_decoders|runtime/engine/common/utils).*(\.cpp|\.cc|\.h|\.hpp|\.py)$ 
        entry: cpplint --filter=-build,-whitespace,+whitespace/comma,-whitespace/indent
-   repo: https://github.com/asottile/reorder_python_imports
    rev: v2.4.0
    hooks:
      - id: reorder-python-imports
        exclude: (?=runtime/engine/kaldi|audio/paddleaudio/src|runtime/patch|runtime/tools/fstbin|runtime/tools/lmbin|third_party/ctc_decoders).*(\.cpp|\.cc|\.h\.hpp|\.py)$


================================================
FILE: .pre-commit-hooks/clang-format.hook
================================================
#!/usr/bin/env bash
set -e

readonly VERSION="3.9"

version=$(clang-format -version)

# if ! [[ $version == *"$VERSION"* ]]; then
#     echo "clang-format version check failed."
#     echo "a version contains '$VERSION' is needed, but get '$version'"
#     echo "you can install the right version, and make an soft-link to '\$PATH' env"
#     exit -1
# fi

clang-format $@


================================================
FILE: .pre-commit-hooks/copyright-check.hook
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import io, re
import sys, os
import subprocess
import platform

COPYRIGHT = '''
Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''

LANG_COMMENT_MARK = None

NEW_LINE_MARK = None

COPYRIGHT_HEADER = None

if platform.system() == "Windows":
    NEW_LINE_MARK = "\r\n"
else:
    NEW_LINE_MARK = '\n'
    COPYRIGHT_HEADER = COPYRIGHT.split(NEW_LINE_MARK)[1]
    p = re.search('(\d{4})', COPYRIGHT_HEADER).group(0)
    process = subprocess.Popen(["date", "+%Y"], stdout=subprocess.PIPE)
    date, err = process.communicate()
    date = date.decode("utf-8").rstrip("\n")
    COPYRIGHT_HEADER = COPYRIGHT_HEADER.replace(p, date)


def generate_copyright(template, lang='C'):
    if lang == 'Python':
        LANG_COMMENT_MARK = '#'
    else:
        LANG_COMMENT_MARK = "//"

    lines = template.split(NEW_LINE_MARK)
    BLANK = " "
    ans = LANG_COMMENT_MARK + BLANK + COPYRIGHT_HEADER + NEW_LINE_MARK
    for lino, line in enumerate(lines):
        if lino == 0 or lino == 1 or lino == len(lines) - 1: continue
        if len(line)  == 0:
            BLANK = ""
        else:
            BLANK = " "
        ans += LANG_COMMENT_MARK + BLANK + line + NEW_LINE_MARK

    return ans + "\n"


def lang_type(filename):
    if filename.endswith(".py"):
        return "Python"
    elif filename.endswith(".h"):
        return "C"
    elif filename.endswith(".c"):
        return "C"
    elif filename.endswith(".hpp"):
        return "C"
    elif filename.endswith(".cc"):
        return "C"
    elif filename.endswith(".cpp"):
        return "C"
    elif filename.endswith(".cu"):
        return "C"
    elif filename.endswith(".cuh"):
        return "C"
    elif filename.endswith(".go"):
        return "C"
    elif filename.endswith(".proto"):
        return "C"
    else:
        print("Unsupported filetype %s", filename)
        exit(0)


PYTHON_ENCODE = re.compile("^[ \t\v]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)")


def main(argv=None):
    parser = argparse.ArgumentParser(
        description='Checker for copyright declaration.')
    parser.add_argument('filenames', nargs='*', help='Filenames to check')
    args = parser.parse_args(argv)

    retv = 0
    for filename in args.filenames:
        fd = io.open(filename, encoding="utf-8")
        first_line = fd.readline()
        second_line = fd.readline()
        if "COPYRIGHT (C)" in first_line.upper(): continue
        if first_line.startswith("#!") or PYTHON_ENCODE.match(
                second_line) != None or PYTHON_ENCODE.match(first_line) != None:
            continue
        original_contents = io.open(filename, encoding="utf-8").read()
        new_contents = generate_copyright(
            COPYRIGHT, lang_type(filename)) + original_contents
        print('Auto Insert Copyright Header {}'.format(filename))
        retv = 1
        with io.open(filename, 'w') as output_file:
            output_file.write(new_contents)

    return retv


if __name__ == '__main__':
    exit(main())


================================================
FILE: .readthedocs.yml
================================================
# .readthedocs.yml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details

# Required
version: 2

# Build documentation in the docs/ directory with Sphinx
sphinx:
  configuration: docs/source/conf.py

# Build documentation with MkDocs
#mkdocs:
#  configuration: mkdocs.yml

# Optionally build your docs in additional formats such as PDF
formats: []

# Optionally set the version of Python and requirements required to build your docs
python:
  version: 3.7
  install:
    - requirements: docs/requirements.txt
    - method: setuptools
      path: .
  system_packages: true


================================================
FILE: .style.yapf
================================================
[style]
based_on_style = pep8
column_limit = 80


================================================
FILE: .travis.yml
================================================
language: cpp
cache: ccache
sudo: required
dist: Bionic 
services:
  - docker
os:
  - linux
env:
  - JOB=PRE_COMMIT

addons:
  apt:
    packages:
      - git
      - python3-pip
      - python3-dev

before_install:
  -  python3 --version
  -  python3 -m pip --version
  -  pip3 --version
  -  sudo pip3 install -U virtualenv pre-commit pip
  -  docker pull paddlepaddle/paddle:latest

script:
  - exit_code=0
  - docker run -i --rm -v "$PWD:/py_unittest" paddlepaddle/paddle:latest /bin/bash -c
    'cd /py_unittest && bash .travis/precommit.sh && source env.sh && bash .travis/unittest.sh' || exit_code=$(( exit_code | $? ))
    exit $exit_code

notifications:
  email:
    on_success: change
    on_failure: always


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: MANIFEST.in
================================================
include paddlespeech/t2s/exps/*.txt
include paddlespeech/t2s/frontend/*.yaml

================================================
FILE: README.md
================================================
([简体中文](./README_cn.md)|English)
<p align="center">
  <img src="./docs/images/PaddleSpeech_logo.png" />
</p>

<p align="center">
    <a href="./LICENSE"><img src="https://img.shields.io/badge/license-Apache%202-red.svg"></a>
    <a href="https://github.com/PaddlePaddle/PaddleSpeech/releases"><img src="https://img.shields.io/github/v/release/PaddlePaddle/PaddleSpeech?color=ffa"></a>
    <a href="support os"><img src="https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-pink.svg"></a>
    <a href=""><img src="https://img.shields.io/badge/python-3.8+-aff.svg"></a>
    <a href="https://github.com/PaddlePaddle/PaddleSpeech/graphs/contributors"><img src="https://img.shields.io/github/contributors/PaddlePaddle/PaddleSpeech?color=9ea"></a>
    <a href="https://github.com/PaddlePaddle/PaddleSpeech/commits"><img src="https://img.shields.io/github/commit-activity/m/PaddlePaddle/PaddleSpeech?color=3af"></a>
    <a href="https://github.com/PaddlePaddle/PaddleSpeech/issues"><img src="https://img.shields.io/github/issues/PaddlePaddle/PaddleSpeech?color=9cc"></a>
    <a href="https://github.com/PaddlePaddle/PaddleSpeech/stargazers"><img src="https://img.shields.io/github/stars/PaddlePaddle/PaddleSpeech?color=ccf"></a>
    <a href="=https://pypi.org/project/paddlespeech/"><img src="https://img.shields.io/pypi/dm/PaddleSpeech"></a>
    <a href="=https://pypi.org/project/paddlespeech/"><img src="https://static.pepy.tech/badge/paddlespeech"></a>
    <a href="https://huggingface.co/spaces"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue"></a>
</p>
<div align="center">  
<h4>
    <a href="#quick-start"> Quick Start </a>
  | <a href="#documents"> Documents </a>
  | <a href="#model-list"> Models List </a>
  | <a href="https://aistudio.baidu.com/aistudio/course/introduce/25130"> AIStudio Courses </a>
  | <a href="https://arxiv.org/abs/2205.12007"> NAACL2022 Best Demo Award Paper </a>
  | <a href="https://gitee.com/paddlepaddle/PaddleSpeech"> Gitee </a>
</h4>
</div>

------------------------------------------------------------------------------------

**PaddleSpeech** is an open-source toolkit on [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform for a variety of critical tasks in speech and audio, with the state-of-art and influential models. 

**PaddleSpeech** won the [NAACL2022 Best Demo Award](https://2022.naacl.org/blog/best-demo-award/), please check out our paper on [Arxiv](https://arxiv.org/abs/2205.12007).

##### Speech Recognition

<div align = "center">
<table style="width:100%">
  <thead>
    <tr>
      <th> Input Audio  </th>
      <th width="550"> Recognition Result  </th>
    </tr>
  </thead>
  <tbody>
   <tr>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
      <td >I knocked at the door on the ancient side of the building.</td>
    </tr>
    <tr>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
      <td>我认为跑步最重要的就是给我带来了身体健康。</td>
    </tr>
  </tbody>
</table>

</div>

##### Speech Translation (English to Chinese)

<div align = "center">
<table style="width:100%">
  <thead>
    <tr>
      <th> Input Audio  </th>
      <th width="550"> Translations Result  </th>
    </tr>
  </thead>
  <tbody>
   <tr>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
      <td >我 在 这栋 建筑 的 古老 门上 敲门。</td>
    </tr>
  </tbody>
</table>

</div>

##### Text-to-Speech
<div align = "center">
<table style="width:100%">
  <thead>
    <tr>
      <th width="550" > Input Text</th>
      <th>Synthetic Audio</th>
    </tr>
  </thead>
  <tbody>
   <tr>
      <td>Life was like a box of chocolates, you never know what you're gonna get.</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_1.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td>早上好，今天是2020/10/29，最低温度是-3°C。</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td>季姬寂，集鸡，鸡即棘鸡。棘鸡饥叽，季姬及箕稷济鸡。鸡既济，跻姬笈，季姬忌，急咭鸡，鸡急，继圾几，季姬急，即籍箕击鸡，箕疾击几伎，伎即齑，鸡叽集几基，季姬急极屐击鸡，鸡既殛，季姬激，即记《季姬击鸡记》。</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/jijiji.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td>大家好，我是 parrot 虚拟老师，我们来读一首诗，我与春风皆过客，I and the spring breeze are passing by，你携秋水揽星河，you take the autumn water to take the galaxy。</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td>宜家唔系事必要你讲，但系你所讲嘅说话将会变成呈堂证供。</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/chengtangzhenggong.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td>各个国家有各个国家嘅国歌</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/gegege.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
  </tbody>
</table>

</div>

For more synthesized audios, please refer to [PaddleSpeech Text-to-Speech samples](https://paddlespeech.readthedocs.io/en/latest/tts/demo.html).

##### Punctuation Restoration
<div align = "center">
<table style="width:100%">
  <thead>
    <tr>
      <th width="390"> Input Text </th>
      <th width="390"> Output Text </th>
    </tr>
  </thead>
  <tbody>
   <tr>
      <td>今天的天气真不错啊你下午有空吗我想约你一起去吃饭</td>
      <td>今天的天气真不错啊！你下午有空吗？我想约你一起去吃饭。</td>
    </tr>
  </tbody>
</table>

</div>


### Features

Via the easy-to-use, efficient, flexible and scalable implementation, our vision is to empower both industrial application and academic research, including training, inference & testing modules, and deployment process. To be more specific, this toolkit features at:
- 📦  **Ease of Use**: low barriers to install, [CLI](#quick-start), [Server](#quick-start-server), and [Streaming Server](#quick-start-streaming-server) is available to quick-start your journey.
- 🏆  **Align to the State-of-the-Art**: we provide high-speed and ultra-lightweight models, and also cutting-edge technology. 
- 🏆  **Streaming ASR and TTS System**: we provide production ready streaming asr and streaming tts system.
- 💯  **Rule-based Chinese frontend**: our frontend contains Text Normalization and Grapheme-to-Phoneme (G2P, including Polyphone and Tone Sandhi). Moreover, we use self-defined linguistic rules to adapt Chinese context.
- 📦  **Varieties of Functions that Vitalize both Industrial and Academia**:
  - 🛎️  *Implementation of critical audio tasks*: this toolkit contains audio functions like  Automatic Speech Recognition, Text-to-Speech Synthesis, Speaker Verification, KeyWord Spotting, Audio Classification, and Speech Translation, etc.
  - 🔬  *Integration of mainstream models and datasets*: the toolkit implements modules that participate in the whole pipeline of the speech tasks, and uses mainstream datasets like LibriSpeech, LJSpeech, AIShell, CSMSC, etc. See also [model list](#model-list) for more details.
  - 🧩  *Cascaded models application*: as an extension of the typical traditional audio tasks, we combine the workflows of the aforementioned tasks with other fields like Natural language processing (NLP) and Computer Vision (CV).

### Recent Update
- 🎉 2025.09.01: Add [Whisper large v3 and turbo model](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/whisper).
- 🤗 2025.08.11: Add [code-switch online model and server demo](./examples/tal_cs/asr1/).
- 👑 2023.05.31: Add [WavLM ASR-en](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/librispeech/asr5), WavLM fine-tuning for ASR on LibriSpeech.
- 🎉 2023.05.18: Add [Squeezeformer](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell/asr1), Squeezeformer training for ASR on Aishell.
- 👑 2023.05.04: Add [HuBERT ASR-en](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/librispeech/asr4), HuBERT fine-tuning for ASR on LibriSpeech.
- ⚡ 2023.04.28: Fix [0-d tensor](https://github.com/PaddlePaddle/PaddleSpeech/pull/3214), with the upgrade of paddlepaddle==2.5, the problem of modifying 0-d tensor has been solved.
- 👑 2023.04.25: Add [AMP for U2 conformer](https://github.com/PaddlePaddle/PaddleSpeech/pull/3167).
- 🔥 2023.04.06: Add [subtitle file (.srt format) generation example](./demos/streaming_asr_server).
- 🔥 2023.03.14: Add SVS(Singing Voice Synthesis) examples with Opencpop dataset, including [DiffSinger](./examples/opencpop/svs1)、[PWGAN](./examples/opencpop/voc1) and [HiFiGAN](./examples/opencpop/voc5), the effect is continuously optimized.
- 👑 2023.03.09: Add [Wav2vec2ASR-zh](./examples/aishell/asr3).
- 🎉 2023.03.07: Add [TTS ARM Linux C++ Demo (with C++ Chinese Text Frontend)](./demos/TTSArmLinux).
- 🔥 2023.03.03 Add Voice Conversion [StarGANv2-VC synthesize pipeline](./examples/vctk/vc3).
- 🎉 2023.02.16: Add [Cantonese TTS](./examples/canton/tts3).
- 🔥 2023.01.10: Add [code-switch asr CLI and Demos](./demos/speech_recognition).
- 👑 2023.01.06: Add [code-switch asr tal_cs recipe](./examples/tal_cs/asr1/).
- 🎉 2022.12.02: Add [end-to-end Prosody Prediction pipeline](./examples/csmsc/tts3_rhy) (including using prosody labels in Acoustic Model).
- 🎉 2022.11.30: Add [TTS Android Demo](./demos/TTSAndroid).
- 🤗 2022.11.28: PP-TTS and PP-ASR demos are available in [AIStudio](https://aistudio.baidu.com/aistudio/modelsoverview) and [official website
 of paddlepaddle](https://www.paddlepaddle.org.cn/models).
- 👑 2022.11.18: Add [Whisper CLI and Demos](https://github.com/PaddlePaddle/PaddleSpeech/pull/2640), support multi language recognition and translation.
- 🔥 2022.11.18: Add [Wav2vec2 CLI and Demos](./demos/speech_ssl), Support ASR and Feature Extraction.
- 🎉 2022.11.17: Add [male voice for TTS](https://github.com/PaddlePaddle/PaddleSpeech/pull/2660).
- 🔥 2022.11.07: Add [U2/U2++ C++ High Performance Streaming ASR Deployment](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/runtime/examples/u2pp_ol/wenetspeech).
- 👑 2022.11.01: Add [Adversarial Loss](https://arxiv.org/pdf/1907.04448.pdf) for [Chinese English mixed TTS](./examples/zh_en_tts/tts3).
- 🔥 2022.10.26: Add [Prosody Prediction](./examples/other/rhy) for TTS.
- 🎉 2022.10.21: Add [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for TTS Chinese Text Frontend.
- 👑 2022.10.11: Add [Wav2vec2ASR-en](./examples/librispeech/asr3), wav2vec2.0 fine-tuning for ASR on LibriSpeech.
- 🔥 2022.09.26: Add Voice Cloning, TTS finetune, and [ERNIE-SAT](https://arxiv.org/abs/2211.03545) in [PaddleSpeech Web Demo](./demos/speech_web).
- ⚡ 2022.09.09: Add AISHELL-3 Voice Cloning [example](./examples/aishell3/vc2) with ECAPA-TDNN speaker encoder.
- ⚡ 2022.08.25: Release TTS [finetune](./examples/other/tts_finetune/tts3) example.
- 🔥 2022.08.22: Add [ERNIE-SAT](https://arxiv.org/abs/2211.03545) models: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat).
- 🔥 2022.08.15: Add [g2pW](https://github.com/GitYCC/g2pW) into TTS Chinese Text Frontend.
- 🔥 2022.08.09: Release [Chinese English mixed TTS](./examples/zh_en_tts/tts3).
- ⚡ 2022.08.03: Add ONNXRuntime infer for  TTS CLI.
- 🎉 2022.07.18: Release VITS: [VITS-csmsc](./examples/csmsc/vits)、[VITS-aishell3](./examples/aishell3/vits)、[VITS-VC](./examples/aishell3/vits-vc).
- 🎉 2022.06.22: All TTS models support ONNX format.
- 🍀 2022.06.17: Add [PaddleSpeech Web Demo](./demos/speech_web).
- 👑 2022.05.13: Release [PP-ASR](./docs/source/asr/PPASR.md)、[PP-TTS](./docs/source/tts/PPTTS.md)、[PP-VPR](docs/source/vpr/PPVPR.md).
- 👏🏻 2022.05.06: `PaddleSpeech Streaming Server` is available for `Streaming ASR` with `Punctuation Restoration` and `Token Timestamp` and `Text-to-Speech`.
- 👏🏻 2022.05.06: `PaddleSpeech Server` is available for `Audio Classification`, `Automatic Speech Recognition` and `Text-to-Speech`, `Speaker Verification` and `Punctuation Restoration`.
- 👏🏻 2022.03.28: `PaddleSpeech CLI` is available for `Speaker Verification`.
- 👏🏻 2021.12.10: `PaddleSpeech CLI` is available for `Audio Classification`, `Automatic Speech Recognition`, `Speech Translation (English to Chinese)` and `Text-to-Speech`.

### Community
- Scan the QR code below with your Wechat, you can access to official technical exchange group and get the bonus ( more than 20GB learning materials, such as papers, codes and videos ) and the live link of the lessons. Look forward to your participation.

<div align="center">
<img src="https://user-images.githubusercontent.com/30135920/212860467-9e943cc3-8be8-49a4-97fd-7c94aad8e979.jpg"  width = "200"  />
</div>

## Installation

We strongly recommend our users to install PaddleSpeech in **Linux** with *python>=3.8*. 

### **Dependency Introduction**

+ gcc >= 4.8.5
+ paddlepaddle
+ python >= 3.8
+ OS support:  Linux(recommend), Windows, Mac OSX

PaddleSpeech depends on paddlepaddle. For installation, please refer to the official website of [paddlepaddle](https://www.paddlepaddle.org.cn/en) and choose according to your own machine. Here is an example of the cpu version.

```bash
pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
```
You can also specify the version of paddlepaddle or install the develop version. 
```bash
# install 2.4.1 version. Note, 2.4.1 is just an example, please follow the minimum dependency of paddlepaddle for your selection
pip install paddlepaddle==2.4.1 -i https://mirror.baidu.com/pypi/simple
# install develop version
pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html
```

There are two quick installation methods for PaddleSpeech, one is pip installation, and the other is source code compilation (recommended).
### pip install

```shell
pip install pytest-runner
pip install paddlespeech
```

### source code compilation

```shell
git clone https://github.com/PaddlePaddle/PaddleSpeech.git
cd PaddleSpeech
pip install pytest-runner
pip install .
# If you need to install in editable mode, you need to use --use-pep517. The command is as follows:
# pip install -e . --use-pep517
```

For more installation problems, such as conda environment, librosa-dependent, gcc problems, kaldi installation, etc., you can refer to this [installation document](./docs/source/install.md). If you encounter problems during installation, you can leave a message on [#2150](https://github.com/PaddlePaddle/PaddleSpeech/issues/2150) and find related problems


<a name="quickstart"></a>
## Quick Start

Developers can have a try of our models with [PaddleSpeech Command Line](./paddlespeech/cli/README.md) or Python. Change `--input` to test your own audio/text and support 16k wav format audio.

**You can also quickly experience it in AI Studio 👉🏻 [PaddleSpeech API Demo](https://aistudio.baidu.com/aistudio/projectdetail/4353348?sUid=2470186&shared=1&ts=1660876445786)**


Test audio sample download

```shell
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
```

### Automatic Speech Recognition

<details><summary>&emsp;（Click to expand）Open Source Speech Recognition</summary>

**command line experience**

```shell
paddlespeech asr --lang zh --input zh.wav
```

**Python API experience**

```python
>>> from paddlespeech.cli.asr.infer import ASRExecutor
>>> asr = ASRExecutor()
>>> result = asr(audio_file="zh.wav")
>>> print(result)
我认为跑步最重要的就是给我带来了身体健康
```
</details>

### Text-to-Speech

<details><summary>&emsp;Open Source Speech Synthesis</summary>

Output 24k sample rate wav format audio


**command line experience**

```shell
paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output output.wav
```

**Python API experience**

```python
>>> from paddlespeech.cli.tts.infer import TTSExecutor
>>> tts = TTSExecutor()
>>> tts(text="今天天气十分不错。", output="output.wav")
```
- You can experience in [Huggingface Spaces](https://huggingface.co/spaces) [TTS Demo](https://huggingface.co/spaces/KPatrick/PaddleSpeechTTS)

</details>

### Audio Classification

<details><summary>&emsp;An open-domain sound classification tool</summary>

Sound classification model based on 527 categories of AudioSet dataset

**command line experience**

```shell
paddlespeech cls --input zh.wav
```

**Python API experience**

```python
>>> from paddlespeech.cli.cls.infer import CLSExecutor
>>> cls = CLSExecutor()
>>> result = cls(audio_file="zh.wav")
>>> print(result)
Speech 0.9027186632156372
```

</details>

### Voiceprint Extraction

<details><summary>&emsp;Industrial-grade voiceprint extraction tool</summary>

**command line experience**

```shell
paddlespeech vector --task spk --input zh.wav
```

**Python API experience**

```python
>>> from paddlespeech.cli.vector import VectorExecutor
>>> vec = VectorExecutor()
>>> result = vec(audio_file="zh.wav")
>>> print(result) # 187维向量
[ -0.19083306   9.474295   -14.122263    -2.0916545    0.04848729
   4.9295826    1.4780062    0.3733844   10.695862     3.2697146
  -4.48199     -0.6617882   -9.170393   -11.1568775   -1.2358263 ...]
```

</details>

### Punctuation Restoration

<details><summary>&emsp;Quick recovery of text punctuation, works with ASR models</summary>

**command line experience**

```shell
paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
```

**Python API experience**

```python
>>> from paddlespeech.cli.text.infer import TextExecutor
>>> text_punc = TextExecutor()
>>> result = text_punc(text="今天的天气真不错啊你下午有空吗我想约你一起去吃饭")
今天的天气真不错啊！你下午有空吗？我想约你一起去吃饭。
```

</details>

### Speech Translation

<details><summary>&emsp;End-to-end English to Chinese Speech Translation Tool</summary>

Use pre-compiled kaldi related tools, only support experience in Ubuntu system

**command line experience**

```shell
paddlespeech st --input en.wav
```

**Python API experience**

```python
>>> from paddlespeech.cli.st.infer import STExecutor
>>> st = STExecutor()
>>> result = st(audio_file="en.wav")
['我 在 这栋 建筑 的 古老 门上 敲门 。']
```

</details>


<a name="quickstartserver"></a>
## Quick Start Server

Developers can have a try of our speech server with [PaddleSpeech Server Command Line](./paddlespeech/server/README.md).

**You can try it quickly in AI Studio (recommend): [SpeechServer](https://aistudio.baidu.com/aistudio/projectdetail/4354592?sUid=2470186&shared=1&ts=1660877827034)**

**Start server**     

```shell
paddlespeech_server start --config_file ./demos/speech_server/conf/application.yaml
```

**Access Speech Recognition Services**     

```shell
paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
```

**Access Text to Speech Services**     

```shell
paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
```

**Access Audio Classification Services**     
```shell
paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input input.wav
```


For more information about server command lines, please see: [speech server demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/speech_server)


<a name="quickstartstreamingserver"></a>
## Quick Start Streaming Server

Developers can have a try of  [streaming asr](./demos/streaming_asr_server/README.md) and [streaming tts](./demos/streaming_tts_server/README.md) server.

**Start Streaming Speech Recognition Server**

```
paddlespeech_server start --config_file ./demos/streaming_asr_server/conf/application.yaml
```

**Access Streaming Speech Recognition Services**     

```
paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
```

**Start Streaming Text to Speech  Server**

```
paddlespeech_server start --config_file ./demos/streaming_tts_server/conf/tts_online_application.yaml
```

**Access Streaming Text to Speech Services**     

```
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
```

For more information please see:  [streaming asr](./demos/streaming_asr_server/README.md) and [streaming tts](./demos/streaming_tts_server/README.md) 

<a name="ModelList"></a>

## Model List

PaddleSpeech supports a series of most popular models. They are summarized in [released models](./docs/source/released_model.md) and attached with available pretrained models.

<a name="SpeechToText"></a>

**Speech-to-Text** contains *Acoustic Model*, *Language Model*, and *Speech Translation*, with the following details:

<table style="width:100%">
  <thead>
    <tr>
      <th>Speech-to-Text Module Type</th>
      <th>Dataset</th>
      <th>Model Type</th>
      <th>Example</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td rowspan="4">Speech Recogination</td>
      <td rowspan="2" >Aishell</td>
      <td >DeepSpeech2 RNN + Conv based Models</td>
      <td>
      <a href = "./examples/aishell/asr0">deepspeech2-aishell</a>
      </td>
    </tr>
    <tr>
      <td>Transformer based Attention Models </td>
      <td>
      <a href = "./examples/aishell/asr1">u2.transformer.conformer-aishell</a>
      </td>
    </tr>
    <tr>
      <td> Librispeech</td>
      <td>Transformer based Attention Models </td>
      <td>
      <a href = "./examples/librispeech/asr0">deepspeech2-librispeech</a> / <a href = "./examples/librispeech/asr1">transformer.conformer.u2-librispeech</a>  / <a href = "./examples/librispeech/asr2">transformer.conformer.u2-kaldi-librispeech</a>
      </td>
      </td>
    </tr>
  <tr>
      <td>TIMIT</td>
      <td>Unified Streaming & Non-streaming Two-pass</td>
      <td>
    <a href = "./examples/timit/asr1"> u2-timit</a>
      </td>
  </tr>
  <tr>
  <td>Alignment</td>
  <td>THCHS30</td>
  <td>MFA</td>
  <td>
  <a href = ".examples/thchs30/align0">mfa-thchs30</a>
  </td>
  </tr>
   <tr>
      <td rowspan="1">Language Model</td>
      <td colspan = "2">Ngram Language Model</td>
      <td>
      <a href = "./examples/other/ngram_lm">kenlm</a>
      </td>
    </tr>
  <tr>
      <td rowspan="2">Speech Translation (English to Chinese)</td> 
      <td rowspan="2">TED En-Zh</td>
      <td>Transformer + ASR MTL</td>
      <td>
      <a href = "./examples/ted_en_zh/st0">transformer-ted</a>
      </td>
  </tr>
  <tr>
      <td>FAT + Transformer + ASR MTL</td>
      <td>
      <a href = "./examples/ted_en_zh/st1">fat-st-ted</a>
      </td>
  </tr>
  </tbody>
</table>

<a name="TextToSpeech"></a>

**Text-to-Speech** in PaddleSpeech mainly contains three modules: *Text Frontend*, *Acoustic Model* and *Vocoder*. Acoustic Model and Vocoder models are listed as follow:

<table>
  <thead>
    <tr>
      <th> Text-to-Speech Module Type </th>
      <th> Model Type </th>
      <th> Dataset </th>
      <th> Example </th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td> Text Frontend </td>
      <td colspan="2"> &emsp; </td>
      <td>
      <a href = "./examples/other/tn">tn</a> / <a href = "./examples/other/g2p">g2p</a>
      </td>
    </tr>
    <tr>
      <td rowspan="6">Acoustic Model</td>
      <td>Tacotron2</td>
      <td>LJSpeech / CSMSC</td>
      <td>
      <a href = "./examples/ljspeech/tts0">tacotron2-ljspeech</a> / <a href = "./examples/csmsc/tts0">tacotron2-csmsc</a>
      </td>
    </tr>
    <tr>
      <td>Transformer TTS</td>
      <td>LJSpeech</td>
      <td>
      <a href = "./examples/ljspeech/tts1">transformer-ljspeech</a>
      </td>
    </tr>
    <tr>
      <td>SpeedySpeech</td>
      <td>CSMSC</td>
      <td >
      <a href = "./examples/csmsc/tts2">speedyspeech-csmsc</a>
      </td>
    </tr>
    <tr>
      <td>FastSpeech2</td>
      <td>LJSpeech / VCTK / CSMSC / AISHELL-3 / ZH_EN / finetune</td>
      <td>
      <a href = "./examples/ljspeech/tts3">fastspeech2-ljspeech</a> / <a href = "./examples/vctk/tts3">fastspeech2-vctk</a> / <a href = "./examples/csmsc/tts3">fastspeech2-csmsc</a> / <a href = "./examples/aishell3/tts3">fastspeech2-aishell3</a> / <a href = "./examples/zh_en_tts/tts3">fastspeech2-zh_en</a> / <a href = "./examples/other/tts_finetune/tts3">fastspeech2-finetune</a>
      </td>
    </tr>
    <tr>
      <td><a href = "https://arxiv.org/abs/2211.03545">ERNIE-SAT</a></td>
      <td>VCTK / AISHELL-3 / ZH_EN</td>
      <td>
      <a href = "./examples/vctk/ernie_sat">ERNIE-SAT-vctk</a> / <a href = "./examples/aishell3/ernie_sat">ERNIE-SAT-aishell3</a> / <a href = "./examples/aishell3_vctk/ernie_sat">ERNIE-SAT-zh_en</a>
      </td>
    </tr>
    <tr>
      <td>DiffSinger</td>
      <td>Opencpop</td>
      <td>
      <a href = "./examples/opencpop/svs1">DiffSinger-opencpop</a>
      </td>
   </tr>
   <tr>
      <td rowspan="6">Vocoder</td>
      <td >WaveFlow</td>
      <td >LJSpeech</td>
      <td>
      <a href = "./examples/ljspeech/voc0">waveflow-ljspeech</a>
      </td>
    </tr>
    <tr>
      <td >Parallel WaveGAN</td>
      <td >LJSpeech / VCTK / CSMSC / AISHELL-3 / Opencpop</td>
      <td>
      <a href = "./examples/ljspeech/voc1">PWGAN-ljspeech</a> / <a href = "./examples/vctk/voc1">PWGAN-vctk</a> / <a href = "./examples/csmsc/voc1">PWGAN-csmsc</a> /  <a href = "./examples/aishell3/voc1">PWGAN-aishell3</a> / <a href = "./examples/opencpop/voc1">PWGAN-opencpop</a>
      </td>
    </tr>
    <tr>
      <td >Multi Band MelGAN</td>
      <td >CSMSC</td>
      <td>
      <a href = "./examples/csmsc/voc3">Multi Band MelGAN-csmsc</a> 
      </td>
    </tr> 
    <tr>
      <td >Style MelGAN</td>
      <td >CSMSC</td>
      <td>
      <a href = "./examples/csmsc/voc4">Style MelGAN-csmsc</a> 
      </td>
    </tr>
    <tr>
      <td>HiFiGAN</td>
      <td>LJSpeech / VCTK / CSMSC / AISHELL-3 / Opencpop</td>
      <td>
      <a href = "./examples/ljspeech/voc5">HiFiGAN-ljspeech</a> / <a href = "./examples/vctk/voc5">HiFiGAN-vctk</a> / <a href = "./examples/csmsc/voc5">HiFiGAN-csmsc</a> / <a href = "./examples/aishell3/voc5">HiFiGAN-aishell3</a> / <a href = "./examples/opencpop/voc5">HiFiGAN-opencpop</a>
      </td>
    </tr>
    <tr>
      <td>WaveRNN</td>
      <td>CSMSC</td>
      <td>
      <a href = "./examples/csmsc/voc6">WaveRNN-csmsc</a>
      </td>
    </tr>
    <tr>
      <td rowspan="5">Voice Cloning</td>
      <td>GE2E</td>
      <td >Librispeech, etc.</td>
      <td>
      <a href = "./examples/other/ge2e">GE2E</a>
      </td>
    </tr>
    <tr>
      <td>SV2TTS (GE2E + Tacotron2)</td>
      <td>AISHELL-3</td>
      <td>
      <a href = "./examples/aishell3/vc0">VC0</a>
      </td>
    </tr>
    <tr>
      <td>SV2TTS (GE2E + FastSpeech2)</td>
      <td>AISHELL-3</td>
      <td>
      <a href = "./examples/aishell3/vc1">VC1</a>
      </td>
    </tr>
    <tr>
      <td>SV2TTS (ECAPA-TDNN + FastSpeech2)</td>
      <td>AISHELL-3</td>
      <td>
      <a href = "./examples/aishell3/vc2">VC2</a>
      </td>
    </tr>
    <tr>
      <td>GE2E + VITS</td>
      <td>AISHELL-3</td>
      <td>
      <a href = "./examples/aishell3/vits-vc">VITS-VC</a>
      </td>
    </tr>
    <tr>
      <td rowspan="3">End-to-End</td>
      <td>VITS</td>
      <td>CSMSC / AISHELL-3</td>
      <td>
      <a href = "./examples/csmsc/vits">VITS-csmsc</a> / <a href = "./examples/aishell3/vits">VITS-aishell3</a>
      </td>
    </tr>
  </tbody>
</table>

<a name="AudioClassification"></a>

**Audio Classification**

<table style="width:100%">
  <thead>
    <tr>
      <th> Task </th>
      <th> Dataset </th>
      <th> Model Type </th>
      <th> Example </th>
    </tr>
  </thead>
  <tbody>
  <tr>
      <td>Audio Classification</td>
      <td>ESC-50</td>
      <td>PANN</td>
      <td>
      <a href = "./examples/esc50/cls0">pann-esc50</a>
      </td>
    </tr>
  </tbody>
</table>

<a name="KeywordSpotting"></a>

**Keyword Spotting**

<table style="width:100%">
  <thead>
    <tr>
      <th> Task </th>
      <th> Dataset </th>
      <th> Model Type </th>
      <th> Example </th>
    </tr>
  </thead>
  <tbody>
  <tr>
      <td>Keyword Spotting</td>
      <td>hey-snips</td>
      <td>MDTC</td>
      <td>
      <a href = "./examples/hey_snips/kws0">mdtc-hey-snips</a>
      </td>
    </tr>
  </tbody>
</table>

<a name="SpeakerVerification"></a>

**Speaker Verification**

<table style="width:100%">
  <thead>
    <tr>
      <th> Task </th>
      <th> Dataset </th>
      <th> Model Type </th>
      <th> Example </th>
    </tr>
  </thead>
  <tbody>
  <tr>
      <td>Speaker Verification</td>
      <td>VoxCeleb1/2</td>
      <td>ECAPA-TDNN</td>
      <td>
      <a href = "./examples/voxceleb/sv0">ecapa-tdnn-voxceleb12</a>
      </td>
    </tr>
  </tbody>
</table>

<a name="SpeakerDiarization"></a>

**Speaker Diarization**

<table style="width:100%">
  <thead>
    <tr>
      <th> Task </th>
      <th> Dataset </th>
      <th> Model Type </th>
      <th> Example </th>
    </tr>
  </thead>
  <tbody>
  <tr>
      <td>Speaker Diarization</td>
     <td>AMI</td>
      <td>ECAPA-TDNN + AHC / SC</td>
      <td>
      <a href = "./examples/ami/sd0">ecapa-tdnn-ami</a>
      </td>
    </tr>
  </tbody>
</table>

<a name="PunctuationRestoration"></a>

**Punctuation Restoration**

<table style="width:100%">
  <thead>
    <tr>
      <th> Task </th>
      <th> Dataset </th>
      <th> Model Type </th>
      <th> Example </th>
    </tr>
  </thead>
  <tbody>
  <tr>
      <td>Punctuation Restoration</td>
      <td>IWLST2012_zh</td>
      <td>Ernie Linear</td>
      <td>
      <a href = "./examples/iwslt2012/punc0">iwslt2012-punc0</a>
      </td>
    </tr>
  </tbody>
</table>

## Documents

Normally, [Speech SoTA](https://paperswithcode.com/area/speech), [Audio SoTA](https://paperswithcode.com/area/audio) and [Music SoTA](https://paperswithcode.com/area/music) give you an overview of the hot academic topics in the related area. To focus on the tasks in PaddleSpeech, you will find the following guidelines are helpful to grasp the core ideas.

- [Installation](./docs/source/install.md)
- [Quick Start](#quickstart)
- [Some Demos](./demos/README.md)
- Tutorials
  - [Automatic Speech Recognition](./docs/source/asr/quick_start.md)
    - [Introduction](./docs/source/asr/models_introduction.md)
    - [Data Preparation](./docs/source/asr/data_preparation.md)
    - [Ngram LM](./docs/source/asr/ngram_lm.md)
  - [Text-to-Speech](./docs/source/tts/quick_start.md)
    - [Introduction](./docs/source/tts/models_introduction.md)
    - [Advanced Usage](./docs/source/tts/advanced_usage.md)
    - [Chinese Rule Based Text Frontend](./docs/source/tts/zh_text_frontend.md)
    - [Test Audio Samples](https://paddlespeech.readthedocs.io/en/latest/tts/demo.html)
  - Speaker Verification
    - [Audio Searching](./demos/audio_searching/README.md)
    - [Speaker Verification](./demos/speaker_verification/README.md)
  - [Audio Classification](./demos/audio_tagging/README.md)
  - [Speech Translation](./demos/speech_translation/README.md)
  - [Speech Server](./demos/speech_server/README.md)
- [Released Models](./docs/source/released_model.md)
  - [Speech-to-Text](#SpeechToText)
  - [Text-to-Speech](#TextToSpeech)
  - [Audio Classification](#AudioClassification)
  - [Speaker Verification](#SpeakerVerification)
  - [Speaker Diarization](#SpeakerDiarization)
  - [Punctuation Restoration](#PunctuationRestoration)
- [Community](#Community)
- [Welcome to contribute](#contribution)
- [License](#License)

The Text-to-Speech module is originally called [Parakeet](https://github.com/PaddlePaddle/Parakeet), and now merged with this repository. If you are interested in academic research about this task, please see [TTS research overview](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/docs/source/tts#overview). Also, [this document](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/tts/models_introduction.md) is a good guideline for the pipeline components.


## ⭐ Examples
- **[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo): Use PaddleSpeech TTS to generate virtual human voice.**
  
<div align="center"><a href="https://www.bilibili.com/video/BV1cL411V71o?share_source=copy_web"><img src="https://ai-studio-static-online.cdn.bcebos.com/06fd746ab32042f398fb6f33f873e6869e846fe63c214596ae37860fe8103720" / width="500px"></a></div>

- [PaddleSpeech Demo Video](https://paddlespeech.readthedocs.io/en/latest/demo_video.html)

- **[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk): Use PaddleSpeech TTS and ASR to clone voice from videos.**


## Citation

To cite PaddleSpeech for research, please use the following format.

```text
@inproceedings{zhang2022paddlespeech,
    title = {PaddleSpeech: An Easy-to-Use All-in-One Speech Toolkit},
    author = {Hui Zhang, Tian Yuan, Junkun Chen, Xintong Li, Renjie Zheng, Yuxin Huang, Xiaojie Chen, Enlei Gong, Zeyu Chen, Xiaoguang Hu, dianhai yu, Yanjun Ma, Liang Huang},
    booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Demonstrations},
    year = {2022},
    publisher = {Association for Computational Linguistics},
}

@InProceedings{pmlr-v162-bai22d,
  title = {{A}$^3${T}: Alignment-Aware Acoustic and Text Pretraining for Speech Synthesis and Editing},
  author = {Bai, He and Zheng, Renjie and Chen, Junkun and Ma, Mingbo and Li, Xintong and Huang, Liang},
  booktitle = {Proceedings of the 39th International Conference on Machine Learning},
  pages = {1399--1411},
  year = {2022},
  volume = {162},
  series = {Proceedings of Machine Learning Research},
  month = {17--23 Jul},
  publisher = {PMLR},
  pdf = {https://proceedings.mlr.press/v162/bai22d/bai22d.pdf},
  url = {https://proceedings.mlr.press/v162/bai22d.html},
}

@inproceedings{zheng2021fused,
  title={Fused acoustic and text encoding for multimodal bilingual pretraining and speech translation},
  author={Zheng, Renjie and Chen, Junkun and Ma, Mingbo and Huang, Liang},
  booktitle={International Conference on Machine Learning},
  pages={12736--12746},
  year={2021},
  organization={PMLR}
}
```

<a name="contribution"></a>
## Contribute to PaddleSpeech

You are warmly welcome to submit questions in [discussions](https://github.com/PaddlePaddle/PaddleSpeech/discussions) and bug reports in [issues](https://github.com/PaddlePaddle/PaddleSpeech/issues)! Also, we highly appreciate if you are willing to contribute to this project!

### Contributors
<p align="center">
<a href="https://github.com/zh794390558"><img src="https://avatars.githubusercontent.com/u/3038472?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Jackwaterveg"><img src="https://avatars.githubusercontent.com/u/87408988?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/yt605155624"><img src="https://avatars.githubusercontent.com/u/24568452?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Honei"><img src="https://avatars.githubusercontent.com/u/11361692?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/KPatr1ck"><img src="https://avatars.githubusercontent.com/u/22954146?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/kuke"><img src="https://avatars.githubusercontent.com/u/3064195?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/lym0302"><img src="https://avatars.githubusercontent.com/u/34430015?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/SmileGoat"><img src="https://avatars.githubusercontent.com/u/56786796?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/xinghai-sun"><img src="https://avatars.githubusercontent.com/u/7038341?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/pkuyym"><img src="https://avatars.githubusercontent.com/u/5782283?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/LittleChenCc"><img src="https://avatars.githubusercontent.com/u/10339970?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/qingen"><img src="https://avatars.githubusercontent.com/u/3139179?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/D-DanielYang"><img src="https://avatars.githubusercontent.com/u/23690325?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Mingxue-Xu"><img src="https://avatars.githubusercontent.com/u/92848346?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/745165806"><img src="https://avatars.githubusercontent.com/u/20623194?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/jerryuhoo"><img src="https://avatars.githubusercontent.com/u/24245709?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/WilliamZhang06"><img src="https://avatars.githubusercontent.com/u/97937340?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/chrisxu2016"><img src="https://avatars.githubusercontent.com/u/18379485?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/iftaken"><img src="https://avatars.githubusercontent.com/u/30135920?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/lfchener"><img src="https://avatars.githubusercontent.com/u/6771821?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/BarryKCL"><img src="https://avatars.githubusercontent.com/u/48039828?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/mmglove"><img src="https://avatars.githubusercontent.com/u/38800877?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/gongel"><img src="https://avatars.githubusercontent.com/u/24390500?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/luotao1"><img src="https://avatars.githubusercontent.com/u/6836917?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/wanghaoshuang"><img src="https://avatars.githubusercontent.com/u/7534971?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/kslz"><img src="https://avatars.githubusercontent.com/u/54951765?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/JiehangXie"><img src="https://avatars.githubusercontent.com/u/51190264?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/david-95"><img src="https://avatars.githubusercontent.com/u/15189190?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/THUzyt21"><img src="https://avatars.githubusercontent.com/u/91456992?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/buchongyu2"><img src="https://avatars.githubusercontent.com/u/29157444?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/iclementine"><img src="https://avatars.githubusercontent.com/u/16222986?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/phecda-xu"><img src="https://avatars.githubusercontent.com/u/46859427?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/freeliuzc"><img src="https://avatars.githubusercontent.com/u/23568094?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/ZeyuChen"><img src="https://avatars.githubusercontent.com/u/1371212?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/ccrrong"><img src="https://avatars.githubusercontent.com/u/101700995?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/AK391"><img src="https://avatars.githubusercontent.com/u/81195143?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/qingqing01"><img src="https://avatars.githubusercontent.com/u/7845005?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/0x45f"><img src="https://avatars.githubusercontent.com/u/23097963?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/vpegasus"><img src="https://avatars.githubusercontent.com/u/22723154?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/ericxk"><img src="https://avatars.githubusercontent.com/u/4719594?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Betterman-qs"><img src="https://avatars.githubusercontent.com/u/61459181?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/sneaxiy"><img src="https://avatars.githubusercontent.com/u/32832641?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Doubledongli"><img src="https://avatars.githubusercontent.com/u/20540661?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/apps/dependabot"><img src="https://avatars.githubusercontent.com/in/29110?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/kvinwang"><img src="https://avatars.githubusercontent.com/u/6442159?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/chenkui164"><img src="https://avatars.githubusercontent.com/u/34813030?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/PaddleZhang"><img src="https://avatars.githubusercontent.com/u/97284124?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/billishyahao"><img src="https://avatars.githubusercontent.com/u/96406262?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/BrightXiaoHan"><img src="https://avatars.githubusercontent.com/u/25839309?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/jiqiren11"><img src="https://avatars.githubusercontent.com/u/82639260?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/ryanrussell"><img src="https://avatars.githubusercontent.com/u/523300?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/GT-ZhangAcer"><img src="https://avatars.githubusercontent.com/u/46156734?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/tensor-tang"><img src="https://avatars.githubusercontent.com/u/21351065?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/hysunflower"><img src="https://avatars.githubusercontent.com/u/52739577?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/oyjxer"><img src="https://avatars.githubusercontent.com/u/16233945?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/JamesLim-sy"><img src="https://avatars.githubusercontent.com/u/61349199?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/limpidezza"><img src="https://avatars.githubusercontent.com/u/71760778?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/windstamp"><img src="https://avatars.githubusercontent.com/u/34057289?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/AshishKarel"><img src="https://avatars.githubusercontent.com/u/58069375?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/chesterkuo"><img src="https://avatars.githubusercontent.com/u/6285069?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/YDX-2147483647"><img src="https://avatars.githubusercontent.com/u/73375426?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/AdamBear"><img src="https://avatars.githubusercontent.com/u/2288870?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/wwhu"><img src="https://avatars.githubusercontent.com/u/6081200?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/lispc"><img src="https://avatars.githubusercontent.com/u/2833376?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/harisankarh"><img src="https://avatars.githubusercontent.com/u/1307053?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/pengzhendong"><img src="https://avatars.githubusercontent.com/u/10704539?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Jackiexiao"><img src="https://avatars.githubusercontent.com/u/18050469?s=60&v=4" width=75 height=75></a>
</p>

## Acknowledgement
- Many thanks to [HighCWu](https://github.com/HighCWu) for adding [VITS-aishell3](./examples/aishell3/vits) and [VITS-VC](./examples/aishell3/vits-vc) examples.
- Many thanks to [david-95](https://github.com/david-95) for fixing multi-punctuation bug、contributing to multiple program and data, and adding [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for TTS Chinese Text Frontend. 
- Many thanks to [BarryKCL](https://github.com/BarryKCL) for improving TTS Chinses Frontend based on [G2PW](https://github.com/GitYCC/g2pW).
- Many thanks to [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) for years of attention, constructive advice and great help.
- Many thanks to [mymagicpower](https://github.com/mymagicpower) for the Java implementation of ASR upon [short](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk) and [long](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk) audio files.
- Many thanks to [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) for developing Virtual Uploader(VUP)/Virtual YouTuber(VTuber) with PaddleSpeech TTS function.
- Many thanks to [745165806](https://github.com/745165806)/[PaddleSpeechTask](https://github.com/745165806/PaddleSpeechTask) for contributing Punctuation Restoration model.
- Many thanks to [kslz](https://github.com/745165806) for supplementary Chinese documents.
- Many thanks to [awmmmm](https://github.com/awmmmm) for contributing fastspeech2 aishell3 conformer pretrained model.
- Many thanks to [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) for developing a dubbing tool with GUI based on PaddleSpeech TTS model.
- Many thanks to [jerryuhoo](https://github.com/jerryuhoo)/[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk) for developing a GUI tool based on PaddleSpeech TTS and code for making datasets from videos based on PaddleSpeech ASR.
- Many thanks to [vpegasus](https://github.com/vpegasus)/[xuesebot](https://github.com/vpegasus/xuesebot) for developing a rasa chatbot,which is able to speak and listen thanks to PaddleSpeech.
- Many thanks to [chenkui164](https://github.com/chenkui164)/[FastASR](https://github.com/chenkui164/FastASR) for the C++ inference implementation of PaddleSpeech ASR.
- Many thanks to [heyudage](https://github.com/heyudage)/[VoiceTyping](https://github.com/heyudage/VoiceTyping) for the real-time voice typing tool implementation of PaddleSpeech ASR streaming services.
- Many thanks to [EscaticZheng](https://github.com/EscaticZheng)/[ps3.9wheel-install](https://github.com/EscaticZheng/ps3.9wheel-install) for the python3.9 prebuilt wheel for PaddleSpeech installation in Windows without Visual Studio.
Besides, PaddleSpeech depends on a lot of open source repositories. See [references](./docs/source/reference.md) for more information.
- Many thanks to [chinobing](https://github.com/chinobing)/[FastAPI-PaddleSpeech-Audio-To-Text](https://github.com/chinobing/FastAPI-PaddleSpeech-Audio-To-Text) for converting audio to text based on FastAPI and PaddleSpeech.
- Many thanks to [MistEO](https://github.com/MistEO)/[Pallas-Bot](https://github.com/MistEO/Pallas-Bot) for QQ bot based on PaddleSpeech TTS.

<a name="License"></a>
## License

PaddleSpeech is provided under the [Apache-2.0 License](./LICENSE).

## Stargazers over time

[![Stargazers over time](https://starchart.cc/PaddlePaddle/PaddleSpeech.svg)](https://starchart.cc/PaddlePaddle/PaddleSpeech)


================================================
FILE: README_cn.md
================================================
(简体中文|[English](./README.md))
<p align="center">
  <img src="./docs/images/PaddleSpeech_logo.png" />
</p>


<p align="center">
    <a href="./LICENSE"><img src="https://img.shields.io/badge/license-Apache%202-red.svg"></a>
    <a href="https://github.com/PaddlePaddle/PaddleSpeech/releases"><img src="https://img.shields.io/github/v/release/PaddlePaddle/PaddleSpeech?color=ffa"></a>
    <a href="support os"><img src="https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-pink.svg"></a>
    <a href=""><img src="https://img.shields.io/badge/python-3.8+-aff.svg"></a>
    <a href="https://github.com/PaddlePaddle/PaddleSpeech/graphs/contributors"><img src="https://img.shields.io/github/contributors/PaddlePaddle/PaddleSpeech?color=9ea"></a>
    <a href="https://github.com/PaddlePaddle/PaddleSpeech/commits"><img src="https://img.shields.io/github/commit-activity/m/PaddlePaddle/PaddleSpeech?color=3af"></a>
    <a href="https://github.com/PaddlePaddle/PaddleSpeech/issues"><img src="https://img.shields.io/github/issues/PaddlePaddle/PaddleSpeech?color=9cc"></a>
    <a href="https://github.com/PaddlePaddle/PaddleSpeech/stargazers"><img src="https://img.shields.io/github/stars/PaddlePaddle/PaddleSpeech?color=ccf"></a>
    <a href="=https://pypi.org/project/paddlespeech/"><img src="https://img.shields.io/pypi/dm/PaddleSpeech"></a>
    <a href="=https://pypi.org/project/paddlespeech/"><img src="https://static.pepy.tech/badge/paddlespeech"></a>
    <a href="https://huggingface.co/spaces"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue"></a>
</p>
<div align="center">  
<h4>
    <a href="#安装"> 安装 </a>
  | <a href="#快速开始"> 快速开始 </a>
  | <a href="#教程文档"> 教程文档 </a>
  | <a href="#模型列表"> 模型列表 </a>
  | <a href="https://aistudio.baidu.com/aistudio/course/introduce/25130"> AIStudio 课程 </a>
  | <a href="https://arxiv.org/abs/2205.12007"> NAACL2022 论文 </a>
  | <a href="https://gitee.com/paddlepaddle/PaddleSpeech"> Gitee 
</h4>
</div>


------------------------------------------------------------------------------------

**PaddleSpeech** 是基于飞桨 [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) 的语音方向的开源模型库，用于语音和音频中的各种关键任务的开发，包含大量基于深度学习前沿和有影响力的模型，一些典型的应用示例如下：

**PaddleSpeech** 荣获 [NAACL2022 Best Demo Award](https://2022.naacl.org/blog/best-demo-award/), 请访问 [Arxiv](https://arxiv.org/abs/2205.12007) 论文。
  
### 效果展示

##### 语音识别

<div align = "center">
<table style="width:100%">
  <thead>
    <tr>
      <th> 输入音频  </th>
      <th width="550"> 识别结果 </th>
    </tr>
  </thead>
  <tbody>
   <tr>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
      <td >I knocked at the door on the ancient side of the building.</td>
    </tr>
    <tr>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
      <td>我认为跑步最重要的就是给我带来了身体健康。</td>
    </tr>
  </tbody>
</table>

</div>

##### 语音翻译 (英译中)

<div align = "center">
<table style="width:100%">
  <thead>
    <tr>
      <th> 输入音频 </th>
      <th width="550"> 翻译结果 </th>
    </tr>
  </thead>
  <tbody>
   <tr>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
      <td >我 在 这栋 建筑 的 古老 门上 敲门。</td>
    </tr>
  </tbody>
</table>

</div>

##### 语音合成
<div align = "center">
<table style="width:100%">
  <thead>
    <tr>
      <th width="550">输入文本</th>
      <th>合成音频</th>
    </tr>
  </thead>
  <tbody>
   <tr>
      <td >Life was like a box of chocolates, you never know what you're gonna get.</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_1.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td >早上好，今天是2020/10/29，最低温度是-3°C。</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td >季姬寂，集鸡，鸡即棘鸡。棘鸡饥叽，季姬及箕稷济鸡。鸡既济，跻姬笈，季姬忌，急咭鸡，鸡急，继圾几，季姬急，即籍箕击鸡，箕疾击几伎，伎即齑，鸡叽集几基，季姬急极屐击鸡，鸡既殛，季姬激，即记《季姬击鸡记》。</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/jijiji.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td>大家好，我是 parrot 虚拟老师，我们来读一首诗，我与春风皆过客，I and the spring breeze are passing by，你携秋水揽星河，you take the autumn water to take the galaxy。</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/labixiaoxin.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td>宜家唔系事必要你讲，但系你所讲嘅说话将会变成呈堂证供。</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/chengtangzhenggong.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td>各个国家有各个国家嘅国歌</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/gegege.wav" rel="nofollow">
            <img align="center" src="./docs/images/audio_icon.png" width="200" style="max-width: 100%;"></a><br>
      </td>
    </tr>
  </tbody>
</table>

</div>

更多合成音频，可以参考 [PaddleSpeech 语音合成音频示例](https://paddlespeech.readthedocs.io/en/latest/tts/demo.html)。

##### 标点恢复
<div align = "center">
<table style="width:100%">
  <thead>
    <tr>
      <th width="390"> 输入文本 </th>
      <th width="390"> 输出文本 </th>
    </tr>
  </thead>
  <tbody>
   <tr>
      <td>今天的天气真不错啊你下午有空吗我想约你一起去吃饭</td>
      <td>今天的天气真不错啊！你下午有空吗？我想约你一起去吃饭。</td>
    </tr>
  </tbody>
</table>

</div>


### 特性

本项目采用了易用、高效、灵活以及可扩展的实现，旨在为工业应用、学术研究提供更好的支持，实现的功能包含训练、推断以及测试模块，以及部署过程，主要包括
- 📦 **易用性**: 安装门槛低，可使用 [CLI](#quick-start) 快速开始。
- 🏆 **对标 SoTA**: 提供了高速、轻量级模型，且借鉴了最前沿的技术。
- 🏆 **流式 ASR 和 TTS 系统**：工业级的端到端流式识别、流式合成系统。
- 💯 **基于规则的中文前端**: 我们的前端包含文本正则化和字音转换（G2P）。此外，我们使用自定义语言规则来适应中文语境。
- **多种工业界以及学术界主流功能支持**:
  - 🛎️ 典型音频任务: 本工具包提供了音频任务如音频分类、语音翻译、自动语音识别、文本转语音、语音合成、声纹识别、KWS等任务的实现。
  - 🔬 主流模型及数据集: 本工具包实现了参与整条语音任务流水线的各个模块，并且采用了主流数据集如 LibriSpeech、LJSpeech、AIShell、CSMSC，详情请见 [模型列表](#model-list)。
  - 🧩 级联模型应用: 作为传统语音任务的扩展，我们结合了自然语言处理、计算机视觉等任务，实现更接近实际需求的产业级应用。

### 近期更新
- 🎉 2025.09.01: 新增 [Whisper large v3 与 turbo 模型](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/whisper).
- 🤗 2025.08.11: 新增 [流式中英混合 tal_cs 识别模型](./examples/tal_cs/asr1/).
- 👑 2023.05.31: 新增 [WavLM ASR-en](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/librispeech/asr5), 基于WavLM的英语识别微调，使用LibriSpeech数据集
- 🎉 2023.05.18: 新增 [Squeezeformer](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell/asr1), 使用Squeezeformer进行训练，使用Aishell数据集
- 👑 2023.05.04: 新增 [HuBERT ASR-en](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/librispeech/asr4), 基于HuBERT的英语识别微调，使用LibriSpeech数据集
- ⚡ 2023.04.28: 修正 [0-d tensor](https://github.com/PaddlePaddle/PaddleSpeech/pull/3214), 配合PaddlePaddle2.5升级修改了0-d tensor的问题。
- 👑 2023.04.25: 新增 [U2 conformer 的 AMP 训练](https://github.com/PaddlePaddle/PaddleSpeech/pull/3167).
- 👑 2023.04.06: 新增 [srt格式字幕生成功能](./demos/streaming_asr_server)。
- 🔥 2023.03.14: 新增基于 Opencpop 数据集的 SVS (歌唱合成) 示例，包含 [DiffSinger](./examples/opencpop/svs1)、[PWGAN](./examples/opencpop/voc1) 和 [HiFiGAN](./examples/opencpop/voc5)，效果持续优化中。
- 👑 2023.03.09: 新增 [Wav2vec2ASR-zh](./examples/aishell/asr3)。
- 🎉 2023.03.07: 新增 [TTS ARM Linux C++ 部署示例 (包含 C++ 中文文本前端模块)](./demos/TTSArmLinux)。
- 🔥 2023.03.03: 新增声音转换模型 [StarGANv2-VC 合成流程](./examples/vctk/vc3)。
- 🎉 2023.02.16: 新增[粤语语音合成](./examples/canton/tts3)。
- 🔥 2023.01.10: 新增[中英混合 ASR CLI 和 Demos](./demos/speech_recognition)。
- 👑 2023.01.06: 新增 [ASR 中英混合 tal_cs 训练推理流程](./examples/tal_cs/asr1/)。
- 🎉 2022.12.02: 新增[端到端韵律预测全流程](./examples/csmsc/tts3_rhy) (包含在声学模型中使用韵律标签)。
- 🎉 2022.11.30: 新增 [TTS Android 部署示例](./demos/TTSAndroid)。
- 🤗 2022.11.28: PP-TTS and PP-ASR 示例可在 [AIStudio](https://aistudio.baidu.com/aistudio/modelsoverview) 和[飞桨官网](https://www.paddlepaddle.org.cn/models)体验！
- 👑 2022.11.18: 新增 [Whisper CLI 和 Demos](https://github.com/PaddlePaddle/PaddleSpeech/pull/2640), 支持多种语言的识别与翻译。
- 🔥 2022.11.18: 新增 [Wav2vec2 CLI 和 Demos](./demos/speech_ssl), 支持 ASR 和特征提取。
- 🎉 2022.11.17: TTS 新增[高质量男性音色](https://github.com/PaddlePaddle/PaddleSpeech/pull/2660)。
- 🔥 2022.11.07: 新增 [U2/U2++ 高性能流式 ASR C++ 部署](./speechx/examples/u2pp_ol/wenetspeech)。
- 👑 2022.11.01: [中英文混合 TTS](./examples/zh_en_tts/tts3) 新增 [Adversarial Loss](https://arxiv.org/pdf/1907.04448.pdf) 模块。
- 🔥 2022.10.26: TTS 新增[韵律预测](./develop/examples/other/rhy)功能。
- 🎉 2022.10.21: TTS 中文文本前端新增 [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) 功能。
- 👑 2022.10.11: 新增 [Wav2vec2ASR-en](./examples/librispeech/asr3), 在 LibriSpeech 上针对 ASR 任务对 wav2vec2.0 的 finetuning。
- 🔥 2022.09.26: 新增 Voice Cloning, TTS finetune 和 [ERNIE-SAT](https://arxiv.org/abs/2211.03545) 到 [PaddleSpeech 网页应用](./demos/speech_web)。
- ⚡ 2022.09.09: 新增基于 ECAPA-TDNN 声纹模型的 AISHELL-3 Voice Cloning [示例](./examples/aishell3/vc2)。
- ⚡ 2022.08.25: 发布 TTS [finetune](./examples/other/tts_finetune/tts3) 示例。
- 🔥 2022.08.22: 新增 [ERNIE-SAT](https://arxiv.org/abs/2211.03545) 模型: [ERNIE-SAT-vctk](./examples/vctk/ernie_sat)、[ERNIE-SAT-aishell3](./examples/aishell3/ernie_sat)、[ERNIE-SAT-zh_en](./examples/aishell3_vctk/ernie_sat)。
- 🔥 2022.08.15: 将 [g2pW](https://github.com/GitYCC/g2pW) 引入 TTS 中文文本前端。
- 🔥 2022.08.09: 发布[中英文混合 TTS](./examples/zh_en_tts/tts3)。
- ⚡ 2022.08.03: TTS CLI 新增 ONNXRuntime 推理方式。
- 🎉 2022.07.18: 发布 VITS 模型: [VITS-csmsc](./examples/csmsc/vits)、[VITS-aishell3](./examples/aishell3/vits)、[VITS-VC](./examples/aishell3/vits-vc)。
- 🎉 2022.06.22: 所有 TTS 模型支持了 ONNX 格式。
- 🍀 2022.06.17: 新增 [PaddleSpeech 网页应用](./demos/speech_web)。
- 👑 2022.05.13: PaddleSpeech 发布 [PP-ASR](./docs/source/asr/PPASR_cn.md) 流式语音识别系统、[PP-TTS](./docs/source/tts/PPTTS_cn.md) 流式语音合成系统、[PP-VPR](docs/source/vpr/PPVPR_cn.md) 全链路声纹识别系统
- 👏🏻 2022.05.06: PaddleSpeech Streaming Server 上线！覆盖了语音识别（标点恢复、时间戳）和语音合成。
- 👏🏻 2022.05.06: PaddleSpeech Server 上线！覆盖了声音分类、语音识别、语音合成、声纹识别，标点恢复。
- 👏🏻 2022.03.28: PaddleSpeech CLI 覆盖声音分类、语音识别、语音翻译（英译中）、语音合成和声纹验证。
- 👏🏻 2021.12.10: PaddleSpeech CLI 支持语音分类, 语音识别, 语音翻译（英译中）和语音合成。


 ### 🔥 加入技术交流群获取入群福利

 - 3 日直播课链接: 深度解读 【一句话语音合成】【小样本语音合成】【定制化语音识别】语音交互技术
 - 20G 学习大礼包：视频课程、前沿论文与学习资料
  
微信扫描二维码关注公众号，点击“马上报名”填写问卷加入官方交流群，获得更高效的问题答疑，与各行各业开发者充分交流，期待您的加入。

<div align="center">
<img src="https://user-images.githubusercontent.com/30135920/212860467-9e943cc3-8be8-49a4-97fd-7c94aad8e979.jpg"  width = "200"  />
</div>

<a name="安装"></a>
## 安装

我们强烈建议用户在 **Linux** 环境下，*3.8* 以上版本的 *python* 上安装 PaddleSpeech。

### 相关依赖
+ gcc >= 4.8.5
+ paddlepaddle
+ python >= 3.8
+ linux(推荐), mac, windows

PaddleSpeech 依赖于 paddlepaddle，安装可以参考[ paddlepaddle 官网](https://www.paddlepaddle.org.cn/)，根据自己机器的情况进行选择。这里给出 cpu 版本示例，其它版本大家可以根据自己机器的情况进行安装。

```shell
pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
```
你也可以安装指定版本的paddlepaddle，或者安装 develop 版本。
```bash
# 安装2.4.1版本. 注意：2.4.1只是一个示例，请按照对paddlepaddle的最小依赖进行选择。
pip install paddlepaddle==2.4.1 -i https://mirror.baidu.com/pypi/simple
# 安装 develop 版本
pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html
```
PaddleSpeech 快速安装方式有两种，一种是 pip 安装，一种是源码编译（推荐）。

### pip 安装
```shell
pip install pytest-runner
pip install paddlespeech
```

### 源码编译
```shell
git clone https://github.com/PaddlePaddle/PaddleSpeech.git
cd PaddleSpeech
pip install pytest-runner
pip install .
# 如果需要在可编辑模式下安装，需要使用 --use-pep517，命令如下
# pip install -e . --use-pep517
```

更多关于安装问题，如 conda 环境，librosa 依赖的系统库，gcc 环境问题，kaldi 安装等，可以参考这篇[安装文档](docs/source/install_cn.md)，如安装上遇到问题可以在 [#2150](https://github.com/PaddlePaddle/PaddleSpeech/issues/2150) 上留言以及查找相关问题

<a name="快速开始"></a>
## 快速开始
安装完成后，开发者可以通过命令行或者 Python 快速开始，命令行模式下改变 `--input` 可以尝试用自己的音频或文本测试，支持 16k wav 格式音频。

你也可以在 `aistudio` 中快速体验 👉🏻[一键预测，快速上手 Speech 开发任务](https://aistudio.baidu.com/aistudio/projectdetail/4353348?sUid=2470186&shared=1&ts=1660878142250)。

测试音频示例下载
```shell
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
```

### 语音识别
<details><summary>&emsp;（点击可展开）开源中文语音识别</summary>

命令行一键体验

```shell
paddlespeech asr --lang zh --input zh.wav
```

Python API 一键预测

```python
>>> from paddlespeech.cli.asr.infer import ASRExecutor
>>> asr = ASRExecutor()
>>> result = asr(audio_file="zh.wav")
>>> print(result)
我认为跑步最重要的就是给我带来了身体健康
```
</details>

### 语音合成

<details><summary>&emsp;开源中文语音合成</summary>

输出 24k 采样率wav格式音频


命令行一键体验

```shell
paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output output.wav
```

Python API 一键预测

```python
>>> from paddlespeech.cli.tts.infer import TTSExecutor
>>> tts = TTSExecutor()
>>> tts(text="今天天气十分不错。", output="output.wav")
```
- 语音合成的 web demo 已经集成进了 [Huggingface Spaces](https://huggingface.co/spaces). 请参考: [TTS Demo](https://huggingface.co/spaces/KPatrick/PaddleSpeechTTS)

</details>

### 声音分类   

<details><summary>&emsp;适配多场景的开放领域声音分类工具</summary>

基于 AudioSet 数据集 527 个类别的声音分类模型

命令行一键体验

```shell
paddlespeech cls --input zh.wav
```

python API 一键预测

```python
>>> from paddlespeech.cli.cls.infer import CLSExecutor
>>> cls = CLSExecutor()
>>> result = cls(audio_file="zh.wav")
>>> print(result)
Speech 0.9027186632156372
```

</details>

### 声纹提取

<details><summary>&emsp;工业级声纹提取工具</summary>

命令行一键体验

```shell
paddlespeech vector --task spk --input zh.wav
```

Python API 一键预测

```python
>>> from paddlespeech.cli.vector import VectorExecutor
>>> vec = VectorExecutor()
>>> result = vec(audio_file="zh.wav")
>>> print(result) # 187维向量
[ -0.19083306   9.474295   -14.122263    -2.0916545    0.04848729
   4.9295826    1.4780062    0.3733844   10.695862     3.2697146
  -4.48199     -0.6617882   -9.170393   -11.1568775   -1.2358263 ...]
```

</details>

### 标点恢复 

<details><summary>&emsp;一键恢复文本标点，可与ASR模型配合使用</summary>

命令行一键体验

```shell
paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
```

Python API 一键预测

```python
>>> from paddlespeech.cli.text.infer import TextExecutor
>>> text_punc = TextExecutor()
>>> result = text_punc(text="今天的天气真不错啊你下午有空吗我想约你一起去吃饭")
今天的天气真不错啊！你下午有空吗？我想约你一起去吃饭。
```

</details>

### 语音翻译

<details><summary>&emsp;端到端英译中语音翻译工具</summary>

使用预编译的 kaldi 相关工具，只支持在 Ubuntu 系统中体验

命令行一键体验

```shell
paddlespeech st --input en.wav
```

python API 一键预测

```python
>>> from paddlespeech.cli.st.infer import STExecutor
>>> st = STExecutor()
>>> result = st(audio_file="en.wav")
['我 在 这栋 建筑 的 古老 门上 敲门 。']
```

</details>


<a name="快速使用服务"></a>
## 快速使用服务
安装完成后，开发者可以通过命令行一键启动语音识别，语音合成，音频分类等多种服务。

你可以在 AI Studio 中快速体验：[SpeechServer 一键部署](https://aistudio.baidu.com/aistudio/projectdetail/4354592?sUid=2470186&shared=1&ts=1660878208266)

**启动服务**     
```shell
paddlespeech_server start --config_file ./demos/speech_server/conf/application.yaml
```

**访问语音识别服务**     
```shell
paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
```

**访问语音合成服务**     
```shell
paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
```

**访问音频分类服务**     
```shell
paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input input.wav
```

更多服务相关的命令行使用信息，请参考 [demos](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/speech_server)

<a name="快速使用流式服务"></a>
## 快速使用流式服务

开发者可以尝试 [流式 ASR](./demos/streaming_asr_server/README.md) 和 [流式 TTS](./demos/streaming_tts_server/README.md) 服务.

**启动流式 ASR 服务**

```
paddlespeech_server start --config_file ./demos/streaming_asr_server/conf/application.yaml
```

**访问流式 ASR 服务**     

```
paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
```

**启动流式 TTS 服务**

```
paddlespeech_server start --config_file ./demos/streaming_tts_server/conf/tts_online_application.yaml
```

**访问流式 TTS 服务**     

```
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
```

更多信息参看： [流式 ASR](./demos/streaming_asr_server/README.md) 和 [流式 TTS](./demos/streaming_tts_server/README.md) 

<a name="模型列表"></a>
## 模型列表
PaddleSpeech 支持很多主流的模型，并提供了预训练模型，详情请见[模型列表](./docs/source/released_model.md)。

<a name="语音识别模型"></a>

PaddleSpeech 的 **语音转文本** 包含语音识别声学模型、语音识别语言模型和语音翻译, 详情如下：

<table style="width:100%">
  <thead>
    <tr>
      <th>语音转文本模块类型</th>
      <th>数据集</th>
      <th>模型类型</th>
      <th>脚本</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td rowspan="4">语音识别</td>
      <td rowspan="2" >Aishell</td>
      <td >DeepSpeech2 RNN + Conv based Models</td>
      <td>
      <a href = "./examples/aishell/asr0">deepspeech2-aishell</a>
      </td>
    </tr>
    <tr>
      <td>Transformer based Attention Models </td>
      <td>
      <a href = "./examples/aishell/asr1">u2.transformer.conformer-aishell</a>
      </td>
    </tr>
      <tr>
      <td> Librispeech</td>
      <td>Transformer based Attention Models </td>
      <td>
      <a href = "./examples/librispeech/asr0">deepspeech2-librispeech</a> / <a href = "./examples/librispeech/asr1">transformer.conformer.u2-librispeech</a>  / <a href = "./examples/librispeech/asr2">transformer.conformer.u2-kaldi-librispeech</a>
      </td>
      </td>
    </tr>
    <tr>
      <td>TIMIT</td>
      <td>Unified Streaming & Non-streaming Two-pass</td>
      <td>
    <a href = "./examples/timit/asr1"> u2-timit</a>
      </td>
    </tr>
  <tr>
  <td>对齐</td>
  <td>THCHS30</td>
  <td>MFA</td>
  <td>
  <a href = ".examples/thchs30/align0">mfa-thchs30</a>
  </td>
  </tr>
   <tr>
      <td rowspan="1">语言模型</td>
      <td colspan = "2">Ngram 语言模型</td>
      <td>
      <a href = "./examples/other/ngram_lm">kenlm</a>
      </td>
    </tr>
    <tr>
      <td rowspan="2">语音翻译（英译中）</td> 
      <td rowspan="2">TED En-Zh</td>
      <td>Transformer + ASR MTL</td>
      <td>
      <a href = "./examples/ted_en_zh/st0">transformer-ted</a>
      </td>
  </tr>
  <tr>
      <td>FAT + Transformer + ASR MTL</td>
      <td>
      <a href = "./examples/ted_en_zh/st1">fat-st-ted</a>
      </td>
  </tr>
  </tbody>
</table>

<a name="语音合成模型"></a>

PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声学模型和声码器。声学模型和声码器模型如下：

<table>
  <thead>
    <tr>
      <th> 语音合成模块类型 </th>
      <th> 模型类型 </th>
      <th> 数据集  </th>
      <th> 脚本  </th>
    </tr>
  </thead>
  <tbody>
    <tr>
    <td> 文本前端</td>
    <td colspan="2"> &emsp; </td>
    <td>
    <a href = "./examples/other/tn">tn</a> / <a href = "./examples/other/g2p">g2p</a>
    </td>
   </tr>
   <tr>
      <td rowspan="6">声学模型</td>
      <td>Tacotron2</td>
      <td>LJSpeech / CSMSC</td>
      <td>
      <a href = "./examples/ljspeech/tts0">tacotron2-ljspeech</a> / <a href = "./examples/csmsc/tts0">tacotron2-csmsc</a>
      </td>
   </tr>
   <tr>
      <td>Transformer TTS</td>
      <td>LJSpeech</td>
      <td>
      <a href = "./examples/ljspeech/tts1">transformer-ljspeech</a>
      </td>
   </tr>
   <tr>
      <td>SpeedySpeech</td>
      <td>CSMSC</td>
      <td >
      <a href = "./examples/csmsc/tts2">speedyspeech-csmsc</a>
      </td>
   </tr>
   <tr>
      <td>FastSpeech2</td>
      <td>LJSpeech / VCTK / CSMSC / AISHELL-3 / ZH_EN / finetune</td>
      <td>
      <a href = "./examples/ljspeech/tts3">fastspeech2-ljspeech</a> / <a href = "./examples/vctk/tts3">fastspeech2-vctk</a> / <a href = "./examples/csmsc/tts3">fastspeech2-csmsc</a> / <a href = "./examples/aishell3/tts3">fastspeech2-aishell3</a> / <a href = "./examples/zh_en_tts/tts3">fastspeech2-zh_en</a> / <a href = "./examples/other/tts_finetune/tts3">fastspeech2-finetune</a>
      </td>
   </tr>
   <tr>
      <td><a href = "https://arxiv.org/abs/2211.03545">ERNIE-SAT</a></td>
      <td>VCTK / AISHELL-3 / ZH_EN</td>
      <td>
      <a href = "./examples/vctk/ernie_sat">ERNIE-SAT-vctk</a> / <a href = "./examples/aishell3/ernie_sat">ERNIE-SAT-aishell3</a> / <a href = "./examples/aishell3_vctk/ernie_sat">ERNIE-SAT-zh_en</a>
      </td>
   </tr>
   <tr>
      <td>DiffSinger</td>
      <td>Opencpop</td>
      <td>
      <a href = "./examples/opencpop/svs1">DiffSinger-opencpop</a>
      </td>
   </tr>
   <tr>
      <td rowspan="6">声码器</td>
      <td >WaveFlow</td>
      <td >LJSpeech</td>
      <td>
      <a href = "./examples/ljspeech/voc0">waveflow-ljspeech</a>
      </td>
    </tr>
    <tr>
      <td >Parallel WaveGAN</td>
      <td >LJSpeech / VCTK / CSMSC / AISHELL-3 / Opencpop</td>
      <td>
      <a href = "./examples/ljspeech/voc1">PWGAN-ljspeech</a> / <a href = "./examples/vctk/voc1">PWGAN-vctk</a> / <a href = "./examples/csmsc/voc1">PWGAN-csmsc</a> /  <a href = "./examples/aishell3/voc1">PWGAN-aishell3</a> / <a href = "./examples/opencpop/voc1">PWGAN-opencpop</a>
      </td>
    </tr>
    <tr>
      <td >Multi Band MelGAN</td>
      <td >CSMSC</td>
      <td>
      <a href = "./examples/csmsc/voc3">Multi Band MelGAN-csmsc</a> 
      </td>
    </tr>
    <tr>
      <td >Style MelGAN</td>
      <td >CSMSC</td>
      <td>
      <a href = "./examples/csmsc/voc4">Style MelGAN-csmsc</a> 
      </td>
    </tr>
    <tr>
      <td >HiFiGAN</td>
      <td >LJSpeech / VCTK / CSMSC / AISHELL-3 / Opencpop</td>
      <td>
      <a href = "./examples/ljspeech/voc5">HiFiGAN-ljspeech</a> / <a href = "./examples/vctk/voc5">HiFiGAN-vctk</a> / <a href = "./examples/csmsc/voc5">HiFiGAN-csmsc</a> / <a href = "./examples/aishell3/voc5">HiFiGAN-aishell3</a> / <a href = "./examples/opencpop/voc5">HiFiGAN-opencpop</a>
      </td>
    </tr>
    <tr>
      <td >WaveRNN</td>
      <td >CSMSC</td>
      <td>
      <a href = "./examples/csmsc/voc6">WaveRNN-csmsc</a>
      </td>
    </tr>
    <tr>
      <td rowspan="5">声音克隆</td>
      <td>GE2E</td>
      <td >Librispeech, etc.</td>
      <td>
      <a href = "./examples/other/ge2e">GE2E</a>
      </td>
    </tr>
    <tr>
      <td>SV2TTS (GE2E + Tacotron2)</td>
      <td>AISHELL-3</td>
      <td>
      <a href = "./examples/aishell3/vc0">VC0</a>
      </td>
    </tr>
    <tr>
      <td>SV2TTS (GE2E + FastSpeech2)</td>
      <td>AISHELL-3</td>
      <td>
      <a href = "./examples/aishell3/vc1">VC1</a>
      </td>
    </tr>
    <tr>
      <td>SV2TTS (ECAPA-TDNN + FastSpeech2)</td>
      <td>AISHELL-3</td>
      <td>
      <a href = "./examples/aishell3/vc2">VC2</a>
      </td>
    </tr>
    <tr>
      <td>GE2E + VITS</td>
      <td>AISHELL-3</td>
      <td>
      <a href = "./examples/aishell3/vits-vc">VITS-VC</a>
      </td>
    </tr>
     <tr>
      <td rowspan="3">端到端</td>
      <td>VITS</td>
      <td>CSMSC / AISHELL-3</td>
      <td>
      <a href = "./examples/csmsc/vits">VITS-csmsc</a> / <a href = "./examples/aishell3/vits">VITS-aishell3</a>
      </td>
    </tr>
  </tbody>
</table>


<a name="声音分类模型"></a>
**声音分类**

<table style="width:100%">
  <thead>
    <tr>
      <th> 任务 </th>
      <th> 数据集 </th>
      <th> 模型类型 </th>
      <th> 脚本</th>
    </tr>
  </thead>
  <tbody>
  <tr>
      <td>声音分类</td>
      <td>ESC-50</td>
      <td>PANN</td>
      <td>
      <a href = "./examples/esc50/cls0">pann-esc50</a>
      </td>
    </tr>
  </tbody>
</table>


<a name="语音唤醒模型"></a>

**语音唤醒**

<table style="width:100%">
  <thead>
    <tr>
      <th> 任务 </th>
      <th> 数据集 </th>
      <th> 模型类型 </th>
      <th> 脚本 </th>
    </tr>
  </thead>
  <tbody>
  <tr>
      <td>语音唤醒</td>
      <td>hey-snips</td>
      <td>MDTC</td>
      <td>
      <a href = "./examples/hey_snips/kws0">mdtc-hey-snips</a>
      </td>
    </tr>
  </tbody>
</table>

<a name="声纹识别模型"></a>

**声纹识别**

<table style="width:100%">
  <thead>
    <tr>
      <th> 任务 </th>
      <th> 数据集 </th>
      <th> 模型类型 </th>
      <th> 脚本 </th>
    </tr>
  </thead>
  <tbody>
  <tr>
      <td>声纹识别</td>
      <td>VoxCeleb1/2</td>
      <td>ECAPA-TDNN</td>
      <td>
      <a href = "./examples/voxceleb/sv0">ecapa-tdnn-voxceleb12</a>
      </td>
    </tr>
  </tbody>
</table>

<a name="说话人日志模型"></a>

**说话人日志**

<table style="width:100%">
  <thead>
    <tr>
      <th> 任务 </th>
      <th> 数据集 </th>
      <th> 模型类型 </th>
      <th> 脚本 </th>
    </tr>
  </thead>
  <tbody>
  <tr>
      <td>说话人日志</td>
      <td>AMI</td>
      <td>ECAPA-TDNN + AHC / SC</td>
      <td>
      <a href = "./examples/ami/sd0">ecapa-tdnn-ami</a>
      </td>
    </tr>
  </tbody>
</table>

<a name="标点恢复模型"></a>

**标点恢复**

<table style="width:100%">
  <thead>
    <tr>
      <th> 任务 </th>
      <th> 数据集 </th>
      <th> 模型类型 </th>
      <th> 脚本 </th>
    </tr>
  </thead>
  <tbody>
  <tr>
      <td>标点恢复</td>
      <td>IWLST2012_zh</td>
      <td>Ernie Linear</td>
      <td>
      <a href = "./examples/iwslt2012/punc0">iwslt2012-punc0</a>
      </td>
    </tr>
  </tbody>
</table>

<a name="教程文档"></a>
## 教程文档

对于 PaddleSpeech 的所关注的任务，以下指南有助于帮助开发者快速入门，了解语音相关核心思想。

- [下载安装](./docs/source/install_cn.md)
- [快速开始](#快速开始)
- Notebook基础教程
  - [声音分类](./docs/tutorial/cls/cls_tutorial.ipynb)
  - [语音识别](./docs/tutorial/asr/tutorial_transformer.ipynb)
  - [语音翻译](./docs/tutorial/st/st_tutorial.ipynb)
  - [声音合成](./docs/tutorial/tts/tts_tutorial.ipynb)
  - [示例Demo](./demos/README.md)
- 进阶文档  
  - [语音识别自定义训练](./docs/source/asr/quick_start.md)
    - [简介](./docs/source/asr/models_introduction.md)
    - [数据准备](./docs/source/asr/data_preparation.md)
    - [Ngram 语言模型](./docs/source/asr/ngram_lm.md)
  - [语音合成自定义训练](./docs/source/tts/quick_start.md)
    - [简介](./docs/source/tts/models_introduction.md)
    - [进阶用法](./docs/source/tts/advanced_usage.md)
    - [中文文本前端](./docs/source/tts/zh_text_frontend.md)
    - [测试语音样本](https://paddlespeech.readthedocs.io/en/latest/tts/demo.html)
  - 声纹识别
    - [声纹识别](./demos/speaker_verification/README_cn.md)
    - [音频检索](./demos/audio_searching/README_cn.md)
  - [声音分类](./demos/audio_tagging/README_cn.md)
  - [语音翻译](./demos/speech_translation/README_cn.md)
  - [服务化部署](./demos/speech_server/README_cn.md)
- [模型列表](#模型列表)
  - [语音识别](#语音识别模型)
  - [语音合成](#语音合成模型)
  - [声音分类](#声音分类模型)
  - [声纹识别](#声纹识别模型)
  - [说话人日志](#说话人日志模型)
  - [标点恢复](#标点恢复模型)
- [技术交流群](#技术交流群)
- [欢迎贡献](#欢迎贡献)
- [License](#License)


语音合成模块最初被称为 [Parakeet](https://github.com/PaddlePaddle/Parakeet)，现在与此仓库合并。如果您对该任务的学术研究感兴趣，请参阅 [TTS 研究概述](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/docs/source/tts#overview)。此外，[模型介绍](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/tts/models_introduction.md) 是了解语音合成流程的一个很好的指南。


## ⭐ 应用案例
- **[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo): 使用 PaddleSpeech 的语音合成模块生成虚拟人的声音。**
  
<div align="center"><a href="https://www.bilibili.com/video/BV1cL411V71o?share_source=copy_web"><img src="https://ai-studio-static-online.cdn.bcebos.com/06fd746ab32042f398fb6f33f873e6869e846fe63c214596ae37860fe8103720" / width="500px"></a></div>

- [PaddleSpeech 示例视频](https://paddlespeech.readthedocs.io/en/latest/demo_video.html)


- **[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk): 使用 PaddleSpeech 的语音合成和语音识别从视频中克隆人声。**

<div align="center">
<img src="https://raw.githubusercontent.com/jerryuhoo/VTuberTalk/main/gui/gui.png"  width = "500px"  />
</div>


## 引用

要引用 PaddleSpeech 进行研究，请使用以下格式进行引用。
```text
@InProceedings{pmlr-v162-bai22d,
  title = {{A}$^3${T}: Alignment-Aware Acoustic and Text Pretraining for Speech Synthesis and Editing},
  author = {Bai, He and Zheng, Renjie and Chen, Junkun and Ma, Mingbo and Li, Xintong and Huang, Liang},
  booktitle = {Proceedings of the 39th International Conference on Machine Learning},
  pages = {1399--1411},
  year = {2022},
  volume = {162},
  series = {Proceedings of Machine Learning Research},
  month = {17--23 Jul},
  publisher = {PMLR},
  pdf = {https://proceedings.mlr.press/v162/bai22d/bai22d.pdf},
  url = {https://proceedings.mlr.press/v162/bai22d.html},
}

@inproceedings{zhang2022paddlespeech,
    title = {PaddleSpeech: An Easy-to-Use All-in-One Speech Toolkit},
    author = {Hui Zhang, Tian Yuan, Junkun Chen, Xintong Li, Renjie Zheng, Yuxin Huang, Xiaojie Chen, Enlei Gong, Zeyu Chen, Xiaoguang Hu, dianhai yu, Yanjun Ma, Liang Huang},
    booktitle = {Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies: Demonstrations},
    year = {2022},
    publisher = {Association for Computational Linguistics},
}

@inproceedings{zheng2021fused,
  title={Fused acoustic and text encoding for multimodal bilingual pretraining and speech translation},
  author={Zheng, Renjie and Chen, Junkun and Ma, Mingbo and Huang, Liang},
  booktitle={International Conference on Machine Learning},
  pages={12736--12746},
  year={2021},
  organization={PMLR}
}
```

<a name="欢迎贡献"></a>
## 参与 PaddleSpeech 的开发

热烈欢迎您在 [Discussions](https://github.com/PaddlePaddle/PaddleSpeech/discussions) 中提交问题，并在 [Issues](https://github.com/PaddlePaddle/PaddleSpeech/issues) 中指出发现的 bug。此外，我们非常希望您参与到 PaddleSpeech 的开发中！

### 贡献者
<p align="center">
<a href="https://github.com/zh794390558"><img src="https://avatars.githubusercontent.com/u/3038472?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Jackwaterveg"><img src="https://avatars.githubusercontent.com/u/87408988?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/yt605155624"><img src="https://avatars.githubusercontent.com/u/24568452?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Honei"><img src="https://avatars.githubusercontent.com/u/11361692?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/KPatr1ck"><img src="https://avatars.githubusercontent.com/u/22954146?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/kuke"><img src="https://avatars.githubusercontent.com/u/3064195?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/lym0302"><img src="https://avatars.githubusercontent.com/u/34430015?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/SmileGoat"><img src="https://avatars.githubusercontent.com/u/56786796?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/xinghai-sun"><img src="https://avatars.githubusercontent.com/u/7038341?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/pkuyym"><img src="https://avatars.githubusercontent.com/u/5782283?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/LittleChenCc"><img src="https://avatars.githubusercontent.com/u/10339970?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/qingen"><img src="https://avatars.githubusercontent.com/u/3139179?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/D-DanielYang"><img src="https://avatars.githubusercontent.com/u/23690325?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Mingxue-Xu"><img src="https://avatars.githubusercontent.com/u/92848346?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/745165806"><img src="https://avatars.githubusercontent.com/u/20623194?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/jerryuhoo"><img src="https://avatars.githubusercontent.com/u/24245709?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/WilliamZhang06"><img src="https://avatars.githubusercontent.com/u/97937340?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/chrisxu2016"><img src="https://avatars.githubusercontent.com/u/18379485?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/iftaken"><img src="https://avatars.githubusercontent.com/u/30135920?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/lfchener"><img src="https://avatars.githubusercontent.com/u/6771821?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/BarryKCL"><img src="https://avatars.githubusercontent.com/u/48039828?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/mmglove"><img src="https://avatars.githubusercontent.com/u/38800877?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/gongel"><img src="https://avatars.githubusercontent.com/u/24390500?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/luotao1"><img src="https://avatars.githubusercontent.com/u/6836917?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/wanghaoshuang"><img src="https://avatars.githubusercontent.com/u/7534971?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/kslz"><img src="https://avatars.githubusercontent.com/u/54951765?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/JiehangXie"><img src="https://avatars.githubusercontent.com/u/51190264?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/david-95"><img src="https://avatars.githubusercontent.com/u/15189190?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/THUzyt21"><img src="https://avatars.githubusercontent.com/u/91456992?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/buchongyu2"><img src="https://avatars.githubusercontent.com/u/29157444?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/iclementine"><img src="https://avatars.githubusercontent.com/u/16222986?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/phecda-xu"><img src="https://avatars.githubusercontent.com/u/46859427?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/freeliuzc"><img src="https://avatars.githubusercontent.com/u/23568094?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/ZeyuChen"><img src="https://avatars.githubusercontent.com/u/1371212?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/ccrrong"><img src="https://avatars.githubusercontent.com/u/101700995?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/AK391"><img src="https://avatars.githubusercontent.com/u/81195143?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/qingqing01"><img src="https://avatars.githubusercontent.com/u/7845005?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/0x45f"><img src="https://avatars.githubusercontent.com/u/23097963?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/vpegasus"><img src="https://avatars.githubusercontent.com/u/22723154?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/ericxk"><img src="https://avatars.githubusercontent.com/u/4719594?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Betterman-qs"><img src="https://avatars.githubusercontent.com/u/61459181?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/sneaxiy"><img src="https://avatars.githubusercontent.com/u/32832641?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Doubledongli"><img src="https://avatars.githubusercontent.com/u/20540661?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/apps/dependabot"><img src="https://avatars.githubusercontent.com/in/29110?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/kvinwang"><img src="https://avatars.githubusercontent.com/u/6442159?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/chenkui164"><img src="https://avatars.githubusercontent.com/u/34813030?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/PaddleZhang"><img src="https://avatars.githubusercontent.com/u/97284124?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/billishyahao"><img src="https://avatars.githubusercontent.com/u/96406262?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/BrightXiaoHan"><img src="https://avatars.githubusercontent.com/u/25839309?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/jiqiren11"><img src="https://avatars.githubusercontent.com/u/82639260?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/ryanrussell"><img src="https://avatars.githubusercontent.com/u/523300?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/GT-ZhangAcer"><img src="https://avatars.githubusercontent.com/u/46156734?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/tensor-tang"><img src="https://avatars.githubusercontent.com/u/21351065?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/hysunflower"><img src="https://avatars.githubusercontent.com/u/52739577?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/oyjxer"><img src="https://avatars.githubusercontent.com/u/16233945?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/JamesLim-sy"><img src="https://avatars.githubusercontent.com/u/61349199?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/limpidezza"><img src="https://avatars.githubusercontent.com/u/71760778?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/windstamp"><img src="https://avatars.githubusercontent.com/u/34057289?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/AshishKarel"><img src="https://avatars.githubusercontent.com/u/58069375?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/chesterkuo"><img src="https://avatars.githubusercontent.com/u/6285069?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/YDX-2147483647"><img src="https://avatars.githubusercontent.com/u/73375426?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/AdamBear"><img src="https://avatars.githubusercontent.com/u/2288870?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/wwhu"><img src="https://avatars.githubusercontent.com/u/6081200?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/lispc"><img src="https://avatars.githubusercontent.com/u/2833376?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/harisankarh"><img src="https://avatars.githubusercontent.com/u/1307053?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/pengzhendong"><img src="https://avatars.githubusercontent.com/u/10704539?s=60&v=4" width=75 height=75></a>
<a href="https://github.com/Jackiexiao"><img src="https://avatars.githubusercontent.com/u/18050469?s=60&v=4" width=75 height=75></a>
</p>

## 致谢
- 非常感谢 [HighCWu](https://github.com/HighCWu) 新增 [VITS-aishell3](./examples/aishell3/vits) 和 [VITS-VC](./examples/aishell3/vits-vc) 代码示例。
- 非常感谢 [david-95](https://github.com/david-95) 修复 TTS 句尾多标点符号出错的问题，贡献补充多条程序和数据。为 TTS 中文文本前端新增 [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) 功能。
- 非常感谢 [BarryKCL](https://github.com/BarryKCL) 基于 [G2PW](https://github.com/GitYCC/g2pW) 对 TTS 中文文本前端的优化。
- 非常感谢 [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) 多年来的关注和建议，以及在诸多问题上的帮助。
- 非常感谢 [mymagicpower](https://github.com/mymagicpower) 采用PaddleSpeech 对 ASR 的[短语音](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk)及[长语音](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk)进行 Java 实现。
- 非常感谢 [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) 采用 PaddleSpeech 语音合成功能实现 Virtual Uploader(VUP)/Virtual YouTuber(VTuber) 虚拟主播。
- 非常感谢 [745165806](https://github.com/745165806)/[PaddleSpeechTask](https://github.com/745165806/PaddleSpeechTask) 贡献标点重建相关模型。
- 非常感谢 [kslz](https://github.com/kslz) 补充中文文档。
- 非常感谢 [awmmmm](https://github.com/awmmmm) 提供 fastspeech2 aishell3 conformer 预训练模型。
- 非常感谢 [phecda-xu](https://github.com/phecda-xu)/[PaddleDubbing](https://github.com/phecda-xu/PaddleDubbing) 基于 PaddleSpeech 的 TTS 模型搭建带 GUI 操作界面的配音工具。
- 非常感谢 [jerryuhoo](https://github.com/jerryuhoo)/[VTuberTalk](https://github.com/jerryuhoo/VTuberTalk) 基于 PaddleSpeech 的 TTS GUI 界面和基于 ASR 制作数据集的相关代码。
- 非常感谢 [vpegasus](https://github.com/vpegasus)/[xuesebot](https://github.com/vpegasus/xuesebot) 基于 PaddleSpeech 的 ASR 与 TTS 设计的可听、说对话机器人。
- 非常感谢 [chenkui164](https://github.com/chenkui164)/[FastASR](https://github.com/chenkui164/FastASR) 对 PaddleSpeech 的 ASR 进行 C++ 推理实现。
- 非常感谢 [heyudage](https://github.com/heyudage)/[VoiceTyping](https://github.com/heyudage/VoiceTyping) 基于 PaddleSpeech 的 ASR 流式服务实现的实时语音输入法工具。
- 非常感谢 [EscaticZheng](https://github.com/EscaticZheng)/[ps3.9wheel-install](https://github.com/EscaticZheng/ps3.9wheel-install) 对PaddleSpeech在Windows下的安装提供了无需Visua Studio，基于python3.9的预编译依赖安装包。
- 非常感谢 [chinobing](https://github.com/chinobing)/[FastAPI-PaddleSpeech-Audio-To-Text](https://github.com/chinobing/FastAPI-PaddleSpeech-Audio-To-Text) 利用 FastAPI 实现 PaddleSpeech 语音转文字，文件上传、分割、转换进度显示、后台更新任务并以 csv 格式输出。
- 非常感谢 [MistEO](https://github.com/MistEO)/[Pallas-Bot](https://github.com/MistEO/Pallas-Bot) 基于 PaddleSpeech TTS 的 QQ Bot 项目。

此外，PaddleSpeech 依赖于许多开源存储库。有关更多信息，请参阅 [references](./docs/source/reference.md)。

## License

PaddleSpeech 在 [Apache-2.0 许可](./LICENSE) 下提供。

## Stargazers over time

[![Stargazers over time](https://starchart.cc/PaddlePaddle/PaddleSpeech.svg)](https://starchart.cc/PaddlePaddle/PaddleSpeech)


================================================
FILE: audio/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)

# Use compiler ID "AppleClang" instead of "Clang" for XCode.
# Not setting this sometimes makes XCode C compiler gets detected as "Clang",
# even when the C++ one is detected as "AppleClang".
cmake_policy(SET CMP0010 NEW)
cmake_policy(SET CMP0025 NEW)

# Suppress warning flags in default MSVC configuration.  It's not
# mandatory that we do this (and we don't if cmake is old), but it's
# nice when it's possible, and it's possible on our Windows configs.
if(NOT CMAKE_VERSION VERSION_LESS 3.15.0)
  cmake_policy(SET CMP0092 NEW)
endif()

project(paddleaudio)

# check and set CMAKE_CXX_STANDARD
string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
if(env_cxx_standard GREATER -1)
  message(
      WARNING "C++ standard version definition detected in environment variable."
      "paddleaudio requires -std=c++14. Please remove -std=c++ settings in your environment.")
endif()


set(CMAKE_CXX_STANDARD 14)
set(CMAKE_C_STANDARD 11)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_VERBOSE_MAKEFILE ON)

# Options
option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_MAD "Enable libmad" ON)
option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_PADDLEAUDIO_PYTHON_EXTENSION "Build Python extension" ON)


# cmake
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${PROJECT_SOURCE_DIR}/cmake;${PROJECT_SOURCE_DIR}/cmake/external")

# fc_patch dir
set(FETCHCONTENT_QUIET off)
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_patch})
set(THIRD_PARTY_PATH ${fc_patch})

set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
include(cmake/pybind.cmake)
include_directories(${PYTHON_INCLUDE_DIR})

include_directories(${CMAKE_CURRENT_SOURCE_DIR}/paddleaudio/third_party/)

# packages
find_package(Python3 COMPONENTS Interpreter Development)

# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -O0 -Wall -g")
add_subdirectory(paddleaudio)

# Summary
include(cmake/summary.cmake)
onnx_print_configuration_summary()


================================================
FILE: audio/README.md
================================================
# PaddleAudio

安装方式： pip install paddleaudio

目前支持的平台：Linux, Mac, Windows

## Environment

## Build wheel
cmd: python setup.py bdist_wheel

Linux test build whl environment:
* os - Ubuntu 16.04.7 LTS
* gcc/g++ - 8.2.0
* cmake - 3.18.0 (need install)

MAC：test build whl environment：
* os 
* gcc/g++ 12.2.0
* cpu Intel Xeon E5 x86_64

Windows：
not support paddleaudio C++ extension lib (sox io, kaldi native fbank)


================================================
FILE: audio/cmake/FindGFortranLibs.cmake
================================================
#.rst:
# FindGFortranLibs
# --------
#  https://github.com/Argonne-National-Laboratory/PIPS/blob/master/cmake/Modules/FindGFortranLibs.cmake
#  https://enccs.github.io/cmake-workshop/cxx-fortran/
#
# Find gcc Fortran compiler & library paths
#
# The module defines the following variables:
#
# ::
#
#
#   GFORTRANLIBS_FOUND - true if system has gfortran
#   LIBGFORTRAN_LIBRARIES - path to libgfortran
#   LIBQUADMATH_LIBRARIES - path to libquadmath
#   GFORTRAN_LIBARIES_DIR - directory containing libgfortran, libquadmath
#   GFORTRAN_INCLUDE_DIR - directory containing gfortran/gcc headers
#   LIBGOMP_LIBRARIES - path to libgomp
#   LIBGOMP_INCLUDE_DIR - directory containing omp.h header
#   GFORTRAN_VERSION_STRING - version of gfortran found
#
set(CMAKE_REQUIRED_QUIET ${LIBIOMP_FIND_QUIETLY})

if(NOT CMAKE_REQUIRED_QUIET)
  message(STATUS "Looking for gfortran related libraries...")
endif()

enable_language(Fortran)
if(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")

  # Basically, call "gfortran -v" to dump compiler info to the string
  # GFORTRAN_VERBOSE_STR, which will be used to get necessary paths
  message(STATUS "Extracting library and header information by calling 'gfortran -v'...")
  execute_process(COMMAND "${CMAKE_Fortran_COMPILER}" "-v" ERROR_VARIABLE
    GFORTRAN_VERBOSE_STR RESULT_VARIABLE FLAG)

  # For debugging
  message(STATUS "'gfortran -v' returned:")
  message(STATUS "${GFORTRAN_VERBOSE_STR}")

  # Detect gfortran version
  string(REGEX MATCH "gcc version [^\t\n ]+" GFORTRAN_VER_STR "${GFORTRAN_VERBOSE_STR}")
  string(REGEX REPLACE "gcc version ([^\t\n ]+)" "\\1" GFORTRAN_VERSION_STRING "${GFORTRAN_VER_STR}")
  message(STATUS "Detected gfortran version ${GFORTRAN_VERSION_STRING}")
  unset(GFORTRAN_VER_STR)

  set(MATCH_REGEX "[^\t\n ]+[\t\n ]+")
  set(REPLACE_REGEX "([^\t\n ]+)")

  # Find architecture for compiler
  string(REGEX MATCH "Target: [^\t\n ]+"
    GFORTRAN_ARCH_STR "${GFORTRAN_VERBOSE_STR}")
  message(STATUS "Architecture string: ${GFORTRAN_ARCH_STR}")
  string(REGEX REPLACE "Target: ([^\t\n ]+)" "\\1"
    GFORTRAN_ARCH "${GFORTRAN_ARCH_STR}")
  message(STATUS "Detected gfortran architecture: ${GFORTRAN_ARCH}")
  unset(GFORTRAN_ARCH_STR)

  # Find install prefix, if it exists; if not, use default
  string(REGEX MATCH  "--prefix=[^\t\n ]+[\t\n ]+"
    GFORTRAN_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
  if(NOT GFORTRAN_PREFIX_STR)
    message(STATUS "Detected default gfortran prefix")
    set(GFORTRAN_PREFIX_DIR "/usr/local") # default prefix for gcc install
  else()
    string(REGEX REPLACE "--prefix=([^\t\n ]+)" "\\1"
      GFORTRAN_PREFIX_DIR "${GFORTRAN_PREFIX_STR}")
  endif()
  message(STATUS "Detected gfortran prefix: ${GFORTRAN_PREFIX_DIR}")
  unset(GFORTRAN_PREFIX_STR)

  # Find install exec-prefix, if it exists; if not, use default
  string(REGEX MATCH "--exec-prefix=[^\t\n ]+[\t\n ]+" "\\1"
    GFORTRAN_EXEC_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
  if(NOT GFORTRAN_EXEC_PREFIX_STR)
    message(STATUS "Detected default gfortran exec-prefix")
    set(GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_PREFIX_DIR}")
  else()
    string(REGEX REPLACE "--exec-prefix=([^\t\n ]+)" "\\1"
      GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_EXEC_PREFIX_STR}")
  endif()
  message(STATUS "Detected gfortran exec-prefix: ${GFORTRAN_EXEC_PREFIX_DIR}")
  UNSET(GFORTRAN_EXEC_PREFIX_STR)

  # Find library directory and include directory, if library directory specified
  string(REGEX MATCH "--libdir=[^\t\n ]+"
    GFORTRAN_LIB_DIR_STR "${GFORTRAN_VERBOSE_STR}")
  if(NOT GFORTRAN_LIB_DIR_STR)
    message(STATUS "Found --libdir flag -- not found")
    message(STATUS "Using default gfortran library & include directory paths")
    string(STRIP ${GFORTRAN_PREFIX_DIR} TMPLIBDIR)
    set(GFORTRAN_LIBRARIES_DIR "${TMPLIBDIR}/lib64")
    set(GFORTRAN_INCLUDE_DIR "${TMPLIBDIR}/include")
  else()
    message(STATUS "Found --libdir flag -- yes")
    string(REGEX REPLACE "--libdir=([^\t\n ]+)" "\\1"
      GFORTRAN_LIBRARIES_DIR "${GFORTRAN_LIB_DIR_STR}")
    string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/gcc/" "${GFORTRAN_ARCH}" "/" "${GFORTRAN_VERSION_STRING}" "/include")
  endif()
  message(STATUS "gfortran libraries path: ${GFORTRAN_LIBRARIES_DIR}")
  message(STATUS "gfortran include path dir: ${GFORTRAN_INCLUDE_DIR}")
  unset(GFORTRAN_LIB_DIR_STR)

  # There are lots of other build options for gcc & gfortran. For now, the
  # options implemented above should cover a lot of common use cases.

  # Clean up be deleting the output string from "gfortran -v"
  unset(GFORTRAN_VERBOSE_STR)

  # Find paths for libgfortran, libquadmath, libgomp
  # libgomp needed for OpenMP support without Clang
  find_library(LIBGFORTRAN_LIBRARIES NAMES gfortran libgfortran
    HINTS ${GFORTRAN_LIBRARIES_DIR})
  find_library(LIBQUADMATH_LIBRARIES NAMES quadmath libquadmath
    HINTS ${GFORTRAN_LIBRARIES_DIR})
  find_library(LIBGOMP_LIBRARIES NAMES gomp libgomp
    HINTS ${GFORTRAN_LIBRARIES_DIR})

  # Find OpenMP headers
  find_path(LIBGOMP_INCLUDE_DIR NAMES omp.h HINTS ${GFORTRAN_INCLUDE_DIR})

else()
  message(STATUS "CMAKE_Fortran_COMPILER_ID does not match 'GNU'!")
endif()

include(FindPackageHandleStandardArgs)

# Required: libgfortran, libquadmath, path for gfortran libraries
# Optional: libgomp, path for OpenMP headers, path for gcc/gfortran headers
find_package_handle_standard_args(GFortranLibs
  REQUIRED_VARS LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES GFORTRAN_LIBRARIES_DIR
  VERSION_VAR GFORTRAN_VERSION_STRING)

if(GFORTRANLIBS_FOUND)
  message(STATUS "Looking for gfortran libraries -- found")
  message(STATUS "gfortran version: ${GFORTRAN_VERSION_STRING}")
else()
  message(STATUS "Looking for gfortran libraries -- not found")
endif()

mark_as_advanced(LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES
  LIBGOMP_LIBRARIES LIBGOMP_INCLUDE_DIR
  GFORTRAN_LIBRARIES_DIR GFORTRAN_INCLUDE_DIR)
# FindGFortranLIBS.cmake ends here


message(STATUS LIBGFORTRAN_LIBRARIES= ${LIBGFORTRAN_LIBRARIES})
message(STATUS LIBQUADMATH_LIBRARIES= ${LIBQUADMATH_LIBRARIES})
message(STATUS LIBGOMP_LIBRARIES= ${LIBGOMP_LIBRARIES})
message(STATUS LIBGOMP_INCLUDE_DIR= ${LIBGOMP_INCLUDE_DIR})
message(STATUS GFORTRAN_LIBRARIES_DIR= ${GFORTRAN_LIBRARIES_DIR})
message(STATUS GFORTRAN_INCLUDE_DIR= ${GFORTRAN_INCLUDE_DIR})


================================================
FILE: audio/cmake/external/openblas.cmake
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

include(ExternalProject)

set(CBLAS_PREFIX_DIR ${THIRD_PARTY_PATH}/openblas)
set(CBLAS_INSTALL_DIR ${THIRD_PARTY_PATH}/install/openblas)
set(CBLAS_REPOSITORY https://github.com/xianyi/OpenBLAS.git)
set(CBLAS_TAG v0.3.10)

if(NOT WIN32)
  set(CBLAS_LIBRARIES
      "${CBLAS_INSTALL_DIR}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
      CACHE FILEPATH "openblas library." FORCE)
  set(CBLAS_INC_DIR
      "${CBLAS_INSTALL_DIR}/include"
      CACHE PATH "openblas include directory." FORCE)
  set(OPENBLAS_CC
      "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable")

  if(APPLE)
    set(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}")
  endif()
  set(OPTIONAL_ARGS "")
  set(COMMON_ARGS "")

  if(APPLE)
    if(CMAKE_SYSTEM_PROCESSOR MATCHES "^x86(_64)?$")
      set(OPTIONAL_ARGS DYNAMIC_ARCH=1 NUM_THREADS=64)
    endif()
    set(COMMON_ARGS CC=${OPENBLAS_CC} NO_SHARED=1)
  endif()

  ExternalProject_Add(
    OPENBLAS
    URL "https://paddleaudio.bj.bcebos.com/build/OpenBLAS-0.3.10.zip"
    GIT_SHALLOW YES
    DOWNLOAD_DIR ${CBLAS_PREFIX_DIR}
    SOURCE_DIR ${CBLAS_PREFIX_DIR}
    INSTALL_DIR ${CBLAS_INSTALL_DIR}
    BUILD_IN_SOURCE 1
    BUILD_COMMAND make -j${NPROC} ${COMMON_ARGS} ${OPTIONAL_ARGS}
    INSTALL_COMMAND make install PREFIX=<INSTALL_DIR>
    UPDATE_COMMAND ""
    CONFIGURE_COMMAND ""
    BUILD_BYPRODUCTS ${CBLAS_LIBRARIES})

    ExternalProject_Get_Property(OPENBLAS INSTALL_DIR)
    set(OpenBLAS_INSTALL_PREFIX ${INSTALL_DIR})
    add_library(openblas STATIC IMPORTED)
    add_dependencies(openblas OPENBLAS)
    set_target_properties(openblas PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES Fortran)
    set_target_properties(openblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_INSTALL_PREFIX}/lib/libopenblas.a)

    link_directories(${OpenBLAS_INSTALL_PREFIX}/lib)
    include_directories(${OpenBLAS_INSTALL_PREFIX}/include)

    set(OPENBLAS_LIBRARIES
        ${OpenBLAS_INSTALL_PREFIX}/lib/libopenblas.a
    )

    add_library(libopenblas INTERFACE)
    add_dependencies(libopenblas openblas)
    target_include_directories(libopenblas INTERFACE ${OpenBLAS_INSTALL_PREFIX}/include/openblas)
    target_link_libraries(libopenblas INTERFACE ${OPENBLAS_LIBRARIES})
else()
  set(CBLAS_LIBRARIES
      "${CBLAS_INSTALL_DIR}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
      CACHE FILEPATH "openblas library." FORCE)
  set(CBLAS_INC_DIR
      "${CBLAS_INSTALL_DIR}/include/openblas"
      CACHE PATH "openblas include directory." FORCE)
  ExternalProject_Add(
    extern_openblas
    ${EXTERNAL_PROJECT_LOG_ARGS}
    GIT_REPOSITORY ${CBLAS_REPOSITORY}
    GIT_TAG ${CBLAS_TAG}
    PREFIX ${CBLAS_PREFIX_DIR}
    INSTALL_DIR ${CBLAS_INSTALL_DIR}
    BUILD_IN_SOURCE 0
    UPDATE_COMMAND ""
    CMAKE_ARGS -DCMAKE_C_COMPILER=clang-cl
               -DCMAKE_CXX_COMPILER=clang-cl
               -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
               -DCMAKE_INSTALL_PREFIX=${CBLAS_INSTALL_DIR}
               -DCMAKE_BUILD_TYPE=Release #${THIRD_PARTY_BUILD_TYPE}
               -DCMAKE_MT=mt
               -DUSE_THREAD=OFF
               -DBUILD_WITHOUT_LAPACK=NO
               -DCMAKE_Fortran_COMPILER=flang
               -DNOFORTRAN=0
               -DDYNAMIC_ARCH=ON
               #${EXTERNAL_OPTIONAL_ARGS}
    CMAKE_CACHE_ARGS
      -DCMAKE_INSTALL_PREFIX:PATH=${CBLAS_INSTALL_DIR}
      -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
      -DCMAKE_BUILD_TYPE:STRING=Release #${THIRD_PARTY_BUILD_TYPE}
    # ninja need to know where openblas.lib comes from
    BUILD_BYPRODUCTS ${CBLAS_LIBRARIES})
  set(OPENBLAS_SHARED_LIB
      ${CBLAS_INSTALL_DIR}/bin/openblas${CMAKE_SHARED_LIBRARY_SUFFIX})

  add_library(openblas INTERFACE)
  add_dependencies(openblas extern_openblas)
  include_directories(${CBLAS_INC_DIR})
  link_libraries(${CBLAS_LIBRARIES})
endif()


================================================
FILE: audio/cmake/pybind.cmake
================================================
#the pybind11 is from:https://github.com/pybind/pybind11
# Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.

SET(PYBIND_ZIP "v2.10.0.zip")
SET(LOCAL_PYBIND_ZIP ${FETCHCONTENT_BASE_DIR}/${PYBIND_ZIP})
SET(PYBIND_SRC ${FETCHCONTENT_BASE_DIR}/pybind11)
SET(DOWNLOAD_URL "https://paddleaudio.bj.bcebos.com/build/v2.10.0.zip")
SET(PYBIND_TIMEOUT 600 CACHE STRING "Timeout in seconds when downloading pybind.")

IF(NOT EXISTS ${LOCAL_PYBIND_ZIP})
    FILE(DOWNLOAD ${DOWNLOAD_URL}
      ${LOCAL_PYBIND_ZIP}
      TIMEOUT ${PYBIND_TIMEOUT}
      STATUS ERR
      SHOW_PROGRESS
    )

    IF(ERR EQUAL 0)
        MESSAGE(STATUS "download pybind success")
    ELSE()
        MESSAGE(FATAL_ERROR "download pybind fail")
    ENDIF()
ENDIF()

IF(NOT EXISTS ${PYBIND_SRC})
    EXECUTE_PROCESS(
      COMMAND ${CMAKE_COMMAND} -E tar xfz ${LOCAL_PYBIND_ZIP}
       WORKING_DIRECTORY ${FETCHCONTENT_BASE_DIR}
       RESULT_VARIABLE tar_result
    )

    file(RENAME ${FETCHCONTENT_BASE_DIR}/pybind11-2.10.0 ${PYBIND_SRC})

  IF (tar_result MATCHES 0)
      MESSAGE(STATUS "unzip pybind success")
  ELSE()
      MESSAGE(FATAL_ERROR "unzip pybind fail")
  ENDIF()

ENDIF()

include_directories(${PYBIND_SRC}/include)


================================================
FILE: audio/cmake/summary.cmake
================================================
# SPDX-License-Identifier: Apache-2.0

# Prints accumulated ONNX configuration summary
function (onnx_print_configuration_summary)
  message(STATUS "")
  message(STATUS "******** Summary ********")
  message(STATUS "  CMake version             : ${CMAKE_VERSION}")
  message(STATUS "  CMake command             : ${CMAKE_COMMAND}")
  message(STATUS "  System                    : ${CMAKE_SYSTEM_NAME}")
  message(STATUS "  C++ compiler              : ${CMAKE_CXX_COMPILER}")
  message(STATUS "  C++ compiler version      : ${CMAKE_CXX_COMPILER_VERSION}")
  message(STATUS "  CXX flags                 : ${CMAKE_CXX_FLAGS}")
  message(STATUS "  Build type                : ${CMAKE_BUILD_TYPE}")
  get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS)
  message(STATUS "  Compile definitions       : ${tmp}")
  message(STATUS "  CMAKE_PREFIX_PATH         : ${CMAKE_PREFIX_PATH}")
  message(STATUS "  CMAKE_INSTALL_PREFIX      : ${CMAKE_INSTALL_PREFIX}")
  message(STATUS "  CMAKE_MODULE_PATH         : ${CMAKE_MODULE_PATH}")
  message(STATUS "")
  message(STATUS "  ONNX version              : ${ONNX_VERSION}")
  message(STATUS "  ONNX NAMESPACE            : ${ONNX_NAMESPACE}")
  message(STATUS "  ONNX_USE_LITE_PROTO       : ${ONNX_USE_LITE_PROTO}")
  message(STATUS "  USE_PROTOBUF_SHARED_LIBS  : ${ONNX_USE_PROTOBUF_SHARED_LIBS}")
  message(STATUS "  Protobuf_USE_STATIC_LIBS  : ${Protobuf_USE_STATIC_LIBS}")
  message(STATUS "  ONNX_DISABLE_EXCEPTIONS   : ${ONNX_DISABLE_EXCEPTIONS}")
  message(STATUS "  ONNX_WERROR               : ${ONNX_WERROR}")
  message(STATUS "  ONNX_BUILD_TESTS          : ${ONNX_BUILD_TESTS}")
  message(STATUS "  ONNX_BUILD_BENCHMARKS     : ${ONNX_BUILD_BENCHMARKS}")
  message(STATUS "  ONNXIFI_DUMMY_BACKEND     : ${ONNXIFI_DUMMY_BACKEND}")
  message(STATUS "  ONNXIFI_ENABLE_EXT        : ${ONNXIFI_ENABLE_EXT}")
  message(STATUS "")
  message(STATUS "  Protobuf compiler         : ${PROTOBUF_PROTOC_EXECUTABLE}")
  message(STATUS "  Protobuf includes         : ${PROTOBUF_INCLUDE_DIRS}")
  message(STATUS "  Protobuf libraries        : ${PROTOBUF_LIBRARIES}")
  message(STATUS "  BUILD_ONNX_PYTHON         : ${BUILD_ONNX_PYTHON}")
  message(STATUS "    Python version        : ${Python_VERSION}")
  message(STATUS "    Python executable     : ${Python_EXECUTABLE}")
  message(STATUS "    Python includes       : ${Python_INCLUDE_DIR}")
  message(STATUS "    Python libraries      : ${Python_LIBRARY}")
  message(STATUS "  PYBIND11                  : ${pybind11_FOUND}")
  message(STATUS "    Pybind11 version        : ${pybind11_VERSION}")
  message(STATUS "    Pybind11 include        : ${pybind11_INCLUDE_DIR}")
  message(STATUS "    Pybind11 includes       : ${pybind11_INCLUDE_DIRS}")
  message(STATUS "    Pybind11 libraries      : ${pybind11_LIBRARIES}")
endfunction()

================================================
FILE: audio/paddleaudio/CMakeLists.txt
================================================

add_subdirectory(third_party)
add_subdirectory(src)


================================================
FILE: audio/paddleaudio/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import _extension
from . import backends
from . import compliance
from . import datasets
from . import features
from . import functional
from . import metric
from . import sox_effects
from . import utils


================================================
FILE: audio/paddleaudio/_extension.py
================================================
import contextlib
import ctypes
import os
import sys
import types
import warnings
from pathlib import Path

from ._internal import module_utils as _mod_utils  # noqa: F401

# Query `hasattr` only once.
_SET_GLOBAL_FLAGS = hasattr(sys, 'getdlopenflags') and hasattr(sys,
                                                               'setdlopenflags')


@contextlib.contextmanager
def dl_open_guard():
    """
    # https://manpages.debian.org/bullseye/manpages-dev/dlopen.3.en.html
    Context manager to set the RTLD_GLOBAL dynamic linker flag while we open a
    shared library to load custom operators.
    """
    if _SET_GLOBAL_FLAGS:
        old_flags = sys.getdlopenflags()
        sys.setdlopenflags(old_flags | ctypes.RTLD_GLOBAL)
    yield
    if _SET_GLOBAL_FLAGS:
        sys.setdlopenflags(old_flags)


def resolve_library_path(path: str) -> str:
    return os.path.realpath(path)


class _Ops(types.ModuleType):
    #__file__ = '_ops.py'

    def __init__(self):
        super(_Ops, self).__init__('paddleaudio.ops')
        self.loaded_libraries = set()

    def load_library(self, path):
        """
        Loads a shared library from the given path into the current process.
        This allows dynamically loading custom operators. For this, 
        you should compile your operator and 
        the static registration code into a shared library object, and then
        call ``paddleaudio.ops.load_library('path/to/libcustom.so')`` to load the
        shared object.
        After the library is loaded, it is added to the
        ``paddleaudio.ops.loaded_libraries`` attribute, a set that may be inspected
        for the paths of all libraries loaded using this function.
        Args:
            path (str): A path to a shared library to load.
        """
        path = resolve_library_path(path)
        with dl_open_guard():
            # https://docs.python.org/3/library/ctypes.html?highlight=ctypes#loading-shared-libraries
            # Import the shared library into the process, thus running its
            # static (global) initialization code in order to register custom
            # operators with the JIT.
            ctypes.CDLL(path)
        self.loaded_libraries.add(path)


_LIB_DIR = Path(__file__).parent / "lib"


def _get_lib_path(lib: str):
    suffix = "pyd" if os.name == "nt" else "so"
    path = _LIB_DIR / f"{lib}.{suffix}"
    return path


def _load_lib(lib: str) -> bool:
    """Load extension module
    Note:
        In case `paddleaudio` is deployed with `pex` format, the library file
        is not in a standard location.
        In this case, we expect that `libpaddlleaudio` is available somewhere
        in the search path of dynamic loading mechanism, so that importing
        `_paddlleaudio` will have library loader find and load `libpaddlleaudio`.
        This is the reason why the function should not raising an error when the library
        file is not found.
    Returns:
        bool:
            True if the library file is found AND the library loaded without failure.
            False if the library file is not found (like in the case where paddlleaudio
            is deployed with pex format, thus the shared library file is
            in a non-standard location.).
            If the library file is found but there is an issue loading the library,
            (such as missing dependency) then this function raises the exception as-is.
    Raises:
        Exception:
            If the library file is found, but there is an issue loading the library file,
            (when underlying `ctype.DLL` throws an exception), this function will pass
            the exception as-is, instead of catching it and returning bool.
            The expected case is `OSError` thrown by `ctype.DLL` when a dynamic dependency
            is not found.
            This behavior was chosen because the expected failure case is not recoverable.
            If a dependency is missing, then users have to install it.
    """
    path = _get_lib_path(lib)
    if not path.exists():
        warnings.warn("lib path is not exists:" + str(path))
        return False
    ops.load_library(path)
    return True


_FFMPEG_INITIALIZED = False


def _init_ffmpeg():
    global _FFMPEG_INITIALIZED
    if _FFMPEG_INITIALIZED:
        return

    if not paddleaudio._paddlleaudio.is_ffmpeg_available():
        raise RuntimeError(
            "paddlleaudio is not compiled with FFmpeg integration. Please set USE_FFMPEG=1 when compiling paddlleaudio."
        )

    try:
        _load_lib("libpaddlleaudio_ffmpeg")
    except OSError as err:
        raise ImportError(
            "FFmpeg libraries are not found. Please install FFmpeg.") from err

    import paddllespeech.audio._paddlleaudio_ffmpeg  # noqa

    paddleaudio._paddlleaudio.ffmpeg_init()
    if paddleaudio._paddlleaudio.ffmpeg_get_log_level() > 8:
        paddleaudio._paddlleaudio.ffmpeg_set_log_level(8)

    _FFMPEG_INITIALIZED = True


def _init_extension():
    if not _mod_utils.is_module_available("paddleaudio._paddleaudio"):
        warnings.warn(
            "paddleaudio C++ extension is not available. sox_io, sox_effect, kaldi raw feature is not supported!!!")
        return

    _load_lib("libpaddleaudio")
    # This import is for initializing the methods registered via PyBind11
    # This has to happen after the base library is loaded
    try:
        from paddleaudio import _paddleaudio  # noqa
    except Exception:
        warnings.warn(
            "paddleaudio C++ extension is not available. sox_io, sox_effect, kaldi raw feature is not supported!!!")
        return

    # Because this part is executed as part of `import torchaudio`, we ignore the
    # initialization failure.
    # If the FFmpeg integration is not properly initialized, then detailed error
    # will be raised when client code attempts to import the dedicated feature.
    try:
        _init_ffmpeg()
    except Exception:
        pass


ops = _Ops()

_init_extension()


================================================
FILE: audio/paddleaudio/_internal/__init__.py
================================================


================================================
FILE: audio/paddleaudio/_internal/module_utils.py
================================================
import importlib.util
import platform
import warnings
from functools import wraps
from typing import Optional

#code is from https://github.com/pytorch/audio/blob/main/torchaudio/_internal/module_utils.py with modification.


def is_module_available(*modules: str) -> bool:
    r"""Returns if a top-level module with :attr:`name` exists *without**
    importing it. This is generally safer than try-catch block around a
    `import X`. It avoids third party libraries breaking assumptions of some of
    our tests, e.g., setting multiprocessing start method when imported
    (see librosa/#747, torchvision/#544).
    """
    return all(importlib.util.find_spec(m) is not None for m in modules)


def requires_module(*modules: str):
    """Decorate function to give error message if invoked without required optional modules.
    This decorator is to give better error message to users rather
    than raising ``NameError:  name 'module' is not defined`` at random places.
    """
    missing = [m for m in modules if not is_module_available(m)]

    if not missing:
        # fall through. If all the modules are available, no need to decorate
        def decorator(func):
            return func

    else:
        req = f"module: {missing[0]}" if len(
            missing) == 1 else f"modules: {missing}"

        def decorator(func):
            @wraps(func)
            def wrapped(*args, **kwargs):
                raise RuntimeError(
                    f"{func.__module__}.{func.__name__} requires {req}")

            return wrapped

    return decorator


def deprecated(direction: str, version: Optional[str]=None):
    """Decorator to add deprecation message
    Args:
        direction (str): Migration steps to be given to users.
        version (str or int): The version when the object will be removed
    """

    def decorator(func):
        @wraps(func)
        def wrapped(*args, **kwargs):
            message = (
                f"{func.__module__}.{func.__name__} has been deprecated "
                f'and will be removed from {"future" if version is None else version} release. '
                f"{direction}")
            warnings.warn(message, stacklevel=2)
            return func(*args, **kwargs)

        return wrapped

    return decorator


def is_kaldi_available():
    try:
        from paddleaudio import _paddleaudio  
        return True
    except Exception:
        return False

def requires_kaldi():
    if is_kaldi_available():

        def decorator(func):
            return func

    else:

        def decorator(func):
            @wraps(func)
            def wrapped(*args, **kwargs):
                raise RuntimeError(
                    f"{func.__module__}.{func.__name__} requires libpaddleaudio build with kaldi")

            return wrapped

    return decorator


def _check_soundfile_importable():
    if not is_module_available("soundfile"):
        return False
    try:
        import soundfile  # noqa: F401

        return True
    except Exception:
        warnings.warn(
            "Failed to import soundfile. 'soundfile' backend is not available.")
        return False


_is_soundfile_importable = _check_soundfile_importable()


def is_soundfile_available():
    return _is_soundfile_importable


def requires_soundfile():
    if is_soundfile_available():

        def decorator(func):
            return func
    else:

        def decorator(func):
            @wraps(func)
            def wrapped(*args, **kwargs):
                raise RuntimeError(
                    f"{func.__module__}.{func.__name__} requires soundfile")

            return wrapped

    return decorator


def is_sox_available():
    try:
        from paddleaudio import _paddleaudio  
        return True
    except Exception:
        return False


def requires_sox():
    if is_sox_available():

        def decorator(func):
            return func
    else:

        def decorator(func):
            @wraps(func)
            def wrapped(*args, **kwargs):
                raise RuntimeError(
                    f"{func.__module__}.{func.__name__} requires libpaddleaudio build with sox")

            return wrapped

    return decorator


================================================
FILE: audio/paddleaudio/backends/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import utils
from .soundfile_backend import depth_convert
from .soundfile_backend import normalize
from .soundfile_backend import resample
from .soundfile_backend import soundfile_load
from .soundfile_backend import soundfile_save
from .soundfile_backend import to_mono
from .utils import get_audio_backend
from .utils import list_audio_backends
from .utils import set_audio_backend

utils._init_audio_backend()


================================================
FILE: audio/paddleaudio/backends/common.py
================================================
# Token from https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py with modification.


class AudioInfo:
    """return of info function.

    This class is used by :ref:`"sox_io" backend<sox_io_backend>` and
    :ref:`"soundfile" backend with the new interface<soundfile_backend>`.

    :ivar int sample_rate: Sample rate
    :ivar int num_frames: The number of frames
    :ivar int num_channels: The number of channels
    :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
        or when it cannot be accurately inferred.
    :ivar str encoding: Audio encoding
        The values encoding can take are one of the following:

            * ``PCM_S``: Signed integer linear PCM
            * ``PCM_U``: Unsigned integer linear PCM
            * ``PCM_F``: Floating point linear PCM
            * ``FLAC``: Flac, Free Lossless Audio Codec
            * ``ULAW``: Mu-law
            * ``ALAW``: A-law
            * ``MP3`` : MP3, MPEG-1 Audio Layer III
            * ``VORBIS``: OGG Vorbis
            * ``AMR_WB``: Adaptive Multi-Rate
            * ``AMR_NB``: Adaptive Multi-Rate Wideband
            * ``OPUS``: Opus
            * ``HTK``: Single channel 16-bit PCM
            * ``UNKNOWN`` : None of above
    """

    def __init__(
            self,
            sample_rate: int,
            num_frames: int,
            num_channels: int,
            bits_per_sample: int,
            encoding: str, ):
        self.sample_rate = sample_rate
        self.num_frames = num_frames
        self.num_channels = num_channels
        self.bits_per_sample = bits_per_sample
        self.encoding = encoding

    def __str__(self):
        return (f"AudioMetaData("
                f"sample_rate={self.sample_rate}, "
                f"num_frames={self.num_frames}, "
                f"num_channels={self.num_channels}, "
                f"bits_per_sample={self.bits_per_sample}, "
                f"encoding={self.encoding}"
                f")")


================================================
FILE: audio/paddleaudio/backends/no_backend.py
================================================
from pathlib import Path
from typing import Callable
from typing import Optional
from typing import Tuple
from typing import Union

from paddle import Tensor

#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/no_backend.py


def load(
        filepath: Union[str, Path],
        out: Optional[Tensor]=None,
        normalization: Union[bool, float, Callable]=True,
        channels_first: bool=True,
        num_frames: int=0,
        offset: int=0,
        filetype: Optional[str]=None, ) -> Tuple[Tensor, int]:
    raise RuntimeError("No audio I/O backend is available.")


def save(filepath: str,
         src: Tensor,
         sample_rate: int,
         precision: int=16,
         channels_first: bool=True) -> None:
    raise RuntimeError("No audio I/O backend is available.")


def info(filepath: str) -> None:
    raise RuntimeError("No audio I/O backend is available.")


================================================
FILE: audio/paddleaudio/backends/soundfile_backend.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import warnings
from typing import Optional
from typing import Tuple

import numpy as np
import paddle
import resampy
import soundfile
from scipy.io import wavfile

from ..utils import depth_convert
from ..utils import ParameterError
from .common import AudioInfo

__all__ = [
    'resample',
    'to_mono',
    'normalize',
    'save',
    'soundfile_save',
    'load',
    'soundfile_load',
    'info',
]
NORMALMIZE_TYPES = ['linear', 'gaussian']
MERGE_TYPES = ['ch0', 'ch1', 'random', 'average']
RESAMPLE_MODES = ['kaiser_best', 'kaiser_fast']
EPS = 1e-8


def resample(y: np.ndarray,
             src_sr: int,
             target_sr: int,
             mode: str='kaiser_fast') -> np.ndarray:
    """Audio resampling.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        src_sr (int): Source sample rate.
        target_sr (int): Target sample rate.
        mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.

    Returns:
        np.ndarray: `y` resampled to `target_sr`
    """

    if mode == 'kaiser_best':
        warnings.warn(
            f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \
        we recommend the mode kaiser_fast in large scale audio training')

    if not isinstance(y, np.ndarray):
        raise ParameterError(
            'Only support numpy np.ndarray, but received y in {type(y)}')

    if mode not in RESAMPLE_MODES:
        raise ParameterError(f'resample mode must in {RESAMPLE_MODES}')

    return resampy.resample(y, src_sr, target_sr, filter=mode)


def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray:
    """Convert sterior audio to mono.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        merge_type (str, optional): Merge type to generate mono waveform. Defaults to 'average'.

    Returns:
        np.ndarray: `y` with mono channel.
    """

    if merge_type not in MERGE_TYPES:
        raise ParameterError(
            f'Unsupported merge type {merge_type}, available types are {MERGE_TYPES}'
        )
    if y.ndim > 2:
        raise ParameterError(
            f'Unsupported audio array,  y.ndim > 2, the shape is {y.shape}')
    if y.ndim == 1:  # nothing to merge
        return y

    if merge_type == 'ch0':
        return y[0]
    if merge_type == 'ch1':
        return y[1]
    if merge_type == 'random':
        return y[np.random.randint(0, 2)]

    # need to do averaging according to dtype

    if y.dtype == 'float32':
        y_out = (y[0] + y[1]) * 0.5
    elif y.dtype == 'int16':
        y_out = y.astype('int32')
        y_out = (y_out[0] + y_out[1]) // 2
        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
                        np.iinfo(y.dtype).max).astype(y.dtype)

    elif y.dtype == 'int8':
        y_out = y.astype('int16')
        y_out = (y_out[0] + y_out[1]) // 2
        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
                        np.iinfo(y.dtype).max).astype(y.dtype)
    else:
        raise ParameterError(f'Unsupported dtype: {y.dtype}')
    return y_out


def soundfile_load_(file: os.PathLike,
                    offset: Optional[float]=None,
                    dtype: str='int16',
                    duration: Optional[int]=None) -> Tuple[np.ndarray, int]:
    """Load audio using soundfile library. This function load audio file using libsndfile.

    Args:
        file (os.PathLike): File of waveform.
        offset (Optional[float], optional): Offset to the start of waveform. Defaults to None.
        dtype (str, optional): Data type of waveform. Defaults to 'int16'.
        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.

    Returns:
        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
    """
    with soundfile.SoundFile(file) as sf_desc:
        sr_native = sf_desc.samplerate
        if offset:
            sf_desc.seek(int(offset * sr_native))
        if duration is not None:
            frame_duration = int(duration * sr_native)
        else:
            frame_duration = -1
        y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T

    return y, sf_desc.samplerate


def normalize(y: np.ndarray, norm_type: str='linear',
              mul_factor: float=1.0) -> np.ndarray:
    """Normalize an input audio with additional multiplier.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
        mul_factor (float, optional): Scaling factor. Defaults to 1.0.

    Returns:
        np.ndarray: `y` after normalization.
    """

    if norm_type == 'linear':
        amax = np.max(np.abs(y))
        factor = 1.0 / (amax + EPS)
        y = y * factor * mul_factor
    elif norm_type == 'gaussian':
        amean = np.mean(y)
        astd = np.std(y)
        astd = max(astd, EPS)
        y = mul_factor * (y - amean) / astd
    else:
        raise NotImplementedError(f'norm_type should be in {NORMALMIZE_TYPES}')

    return y


def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
    """Save audio file to disk. This function saves audio to disk using scipy.io.wavfile, with additional step to convert input waveform to int16.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        sr (int): Sample rate.
        file (os.PathLike): Path of audio file to save.
    """
    if not file.endswith('.wav'):
        raise ParameterError(
            f'only .wav file supported, but dst file name is: {file}')

    if sr <= 0:
        raise ParameterError(
            f'Sample rate should be larger than 0, received sr = {sr}')

    if y.dtype not in ['int16', 'int8']:
        warnings.warn(
            f'input data type is {y.dtype}, will convert data to int16 format before saving'
        )
        y_out = depth_convert(y, 'int16')
    else:
        y_out = y

    wavfile.write(file, sr, y_out)


def soundfile_load(
        file: os.PathLike,
        sr: Optional[int]=None,
        mono: bool=True,
        merge_type: str='average',  # ch0,ch1,random,average
        normal: bool=True,
        norm_type: str='linear',
        norm_mul_factor: float=1.0,
        offset: float=0.0,
        duration: Optional[int]=None,
        dtype: str='float32',
        resample_mode: str='kaiser_fast') -> Tuple[np.ndarray, int]:
    """Load audio file from disk. This function loads audio from disk using using audio backend.

    Args:
        file (os.PathLike): Path of audio file to load.
        sr (Optional[int], optional): Sample rate of loaded waveform. Defaults to None.
        mono (bool, optional): Return waveform with mono channel. Defaults to True.
        merge_type (str, optional): Merge type of multi-channels waveform. Defaults to 'average'.
        normal (bool, optional): Waveform normalization. Defaults to True.
        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
        norm_mul_factor (float, optional): Scaling factor. Defaults to 1.0.
        offset (float, optional): Offset to the start of waveform. Defaults to 0.0.
        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.
        dtype (str, optional): Data type of waveform. Defaults to 'float32'.
        resample_mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.

    Returns:
        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
    """

    y, r = soundfile_load_(file, offset=offset, dtype=dtype, duration=duration)

    if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)):
        raise ParameterError(f'audio file {file} looks empty')

    if mono:
        y = to_mono(y, merge_type)

    if sr is not None and sr != r:
        y = resample(y, r, sr, mode=resample_mode)
        r = sr

    if normal:
        y = normalize(y, norm_type, norm_mul_factor)
    elif dtype in ['int8', 'int16']:
        # still need to do normalization, before depth conversion
        y = normalize(y, 'linear', 1.0)

    y = depth_convert(y, dtype)
    return y, r


#The code below is taken from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py, with some modifications.


def _get_subtype_for_wav(dtype: paddle.dtype,
                         encoding: str,
                         bits_per_sample: int):
    if not encoding:
        if not bits_per_sample:
            subtype = {
                paddle.uint8: "PCM_U8",
                paddle.int16: "PCM_16",
                paddle.int32: "PCM_32",
                paddle.float32: "FLOAT",
                paddle.float64: "DOUBLE",
            }.get(dtype)
            if not subtype:
                raise ValueError(f"Unsupported dtype for wav: {dtype}")
            return subtype
        if bits_per_sample == 8:
            return "PCM_U8"
        return f"PCM_{bits_per_sample}"
    if encoding == "PCM_S":
        if not bits_per_sample:
            return "PCM_32"
        if bits_per_sample == 8:
            raise ValueError("wav does not support 8-bit signed PCM encoding.")
        return f"PCM_{bits_per_sample}"
    if encoding == "PCM_U":
        if bits_per_sample in (None, 8):
            return "PCM_U8"
        raise ValueError("wav only supports 8-bit unsigned PCM encoding.")
    if encoding == "PCM_F":
        if bits_per_sample in (None, 32):
            return "FLOAT"
        if bits_per_sample == 64:
            return "DOUBLE"
        raise ValueError("wav only supports 32/64-bit float PCM encoding.")
    if encoding == "ULAW":
        if bits_per_sample in (None, 8):
            return "ULAW"
        raise ValueError("wav only supports 8-bit mu-law encoding.")
    if encoding == "ALAW":
        if bits_per_sample in (None, 8):
            return "ALAW"
        raise ValueError("wav only supports 8-bit a-law encoding.")
    raise ValueError(f"wav does not support {encoding}.")


def _get_subtype_for_sphere(encoding: str, bits_per_sample: int):
    if encoding in (None, "PCM_S"):
        return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32"
    if encoding in ("PCM_U", "PCM_F"):
        raise ValueError(f"sph does not support {encoding} encoding.")
    if encoding == "ULAW":
        if bits_per_sample in (None, 8):
            return "ULAW"
        raise ValueError("sph only supports 8-bit for mu-law encoding.")
    if encoding == "ALAW":
        return "ALAW"
    raise ValueError(f"sph does not support {encoding}.")


def _get_subtype(dtype: paddle.dtype,
                 format: str,
                 encoding: str,
                 bits_per_sample: int):
    if format == "wav":
        return _get_subtype_for_wav(dtype, encoding, bits_per_sample)
    if format == "flac":
        if encoding:
            raise ValueError("flac does not support encoding.")
        if not bits_per_sample:
            return "PCM_16"
        if bits_per_sample > 24:
            raise ValueError("flac does not support bits_per_sample > 24.")
        return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}"
    if format in ("ogg", "vorbis"):
        if encoding or bits_per_sample:
            raise ValueError(
                "ogg/vorbis does not support encoding/bits_per_sample.")
        return "VORBIS"
    if format == "sph":
        return _get_subtype_for_sphere(encoding, bits_per_sample)
    if format in ("nis", "nist"):
        return "PCM_16"
    raise ValueError(f"Unsupported format: {format}")


def save(
        filepath: str,
        src: paddle.Tensor,
        sample_rate: int,
        channels_first: bool=True,
        compression: Optional[float]=None,
        format: Optional[str]=None,
        encoding: Optional[str]=None,
        bits_per_sample: Optional[int]=None, ):
    """Save audio data to file.

    Note:
        The formats this function can handle depend on the soundfile installation.
        This function is tested on the following formats;

        * WAV

            * 32-bit floating-point
            * 32-bit signed integer
            * 16-bit signed integer
            * 8-bit unsigned integer

        * FLAC
        * OGG/VORBIS
        * SPHERE

    Note:
        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,

    Args:
        filepath (str or pathlib.Path): Path to audio file.
        src (paddle.Tensor): Audio data to save. must be 2D tensor.
        sample_rate (int): sampling rate
        channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
            otherwise `[time, channel]`.
        compression (float of None, optional): Not used.
            It is here only for interface compatibility reason with "sox_io" backend.
        format (str or None, optional): Override the audio format.
            When ``filepath`` argument is path-like object, audio format is
            inferred from file extension. If the file extension is missing or
            different, you can specify the correct format with this argument.

            When ``filepath`` argument is file-like object,
            this argument is required.

            Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
            ``"flac"`` and ``"sph"``.
        encoding (str or None, optional): Changes the encoding for supported formats.
            This argument is effective only for supported formats, such as
            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are:

                - ``"PCM_S"`` (signed integer Linear PCM)
                - ``"PCM_U"`` (unsigned integer Linear PCM)
                - ``"PCM_F"`` (floating point PCM)
                - ``"ULAW"`` (mu-law)
                - ``"ALAW"`` (a-law)

        bits_per_sample (int or None, optional): Changes the bit depth for the
            supported formats.
            When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``,
            you can change the bit depth.
            Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``.

    Supported formats/encodings/bit depth/compression are:

    ``"wav"``
        - 32-bit floating-point PCM
        - 32-bit signed integer PCM
        - 24-bit signed integer PCM
        - 16-bit signed integer PCM
        - 8-bit unsigned integer PCM
        - 8-bit mu-law
        - 8-bit a-law

        Note:
            Default encoding/bit depth is determined by the dtype of
            the input Tensor.

    ``"flac"``
        - 8-bit
        - 16-bit (default)
        - 24-bit

    ``"ogg"``, ``"vorbis"``
        - Doesn't accept changing configuration.

    ``"sph"``
        - 8-bit signed integer PCM
        - 16-bit signed integer PCM
        - 24-bit signed integer PCM
        - 32-bit signed integer PCM (default)
        - 8-bit mu-law
        - 8-bit a-law
        - 16-bit a-law
        - 24-bit a-law
        - 32-bit a-law

    """
    if src.ndim != 2:
        raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.")
    if compression is not None:
        warnings.warn(
            '`save` function of "soundfile" backend does not support "compression" parameter. '
            "The argument is silently ignored.")
    if hasattr(filepath, "write"):
        if format is None:
            raise RuntimeError(
                "`format` is required when saving to file object.")
        ext = format.lower()
    else:
        ext = str(filepath).split(".")[-1].lower()

    if bits_per_sample not in (None, 8, 16, 24, 32, 64):
        raise ValueError("Invalid bits_per_sample.")
    if bits_per_sample == 24:
        warnings.warn(
            "Saving audio with 24 bits per sample might warp samples near -1. "
            "Using 16 bits per sample might be able to avoid this.")
    subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample)

    # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format,
    # so we extend the extensions manually here
    if ext in ["nis", "nist", "sph"] and format is None:
        format = "NIST"

    if channels_first:
        src = src.t()

    soundfile.write(
        file=filepath,
        data=src,
        samplerate=sample_rate,
        subtype=subtype,
        format=format)


_SUBTYPE2DTYPE = {
    "PCM_S8": "int8",
    "PCM_U8": "uint8",
    "PCM_16": "int16",
    "PCM_32": "int32",
    "FLOAT": "float32",
    "DOUBLE": "float64",
}


def load(
        filepath: str,
        frame_offset: int=0,
        num_frames: int=-1,
        normalize: bool=True,
        channels_first: bool=True,
        format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]:
    """Load audio data from file.

    Note:
        The formats this function can handle depend on the soundfile installation.
        This function is tested on the following formats;

        * WAV

            * 32-bit floating-point
            * 32-bit signed integer
            * 16-bit signed integer
            * 8-bit unsigned integer

        * FLAC
        * OGG/VORBIS
        * SPHERE

    By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
    ``float32`` dtype and the shape of `[channel, time]`.
    The samples are normalized to fit in the range of ``[-1.0, 1.0]``.

    When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
    signed integer and 8-bit unsigned integer (24-bit signed integer is not supported),
    by providing ``normalize=False``, this function can return integer Tensor, where the samples
    are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor
    for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM.

    ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as
    ``flac`` and ``mp3``.
    For these formats, this function always returns ``float32`` Tensor with values normalized to
    ``[-1.0, 1.0]``.

    Note:
        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend.

    Args:
        filepath (path-like object or file-like object):
            Source of audio data.
        frame_offset (int, optional):
            Number of frames to skip before start reading data.
        num_frames (int, optional):
            Maximum number of frames to read. ``-1`` reads all the remaining samples,
            starting from ``frame_offset``.
            This function may return the less number of frames if there is not enough
            frames in the given file.
        normalize (bool, optional):
            When ``True``, this function always return ``float32``, and sample values are
            normalized to ``[-1.0, 1.0]``.
            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
            integer type.
            This argument has no effect for formats other than integer WAV type.
        channels_first (bool, optional):
            When True, the returned Tensor has dimension `[channel, time]`.
            Otherwise, the returned Tensor's dimension is `[time, channel]`.
        format (str or None, optional):
            Not used. PySoundFile does not accept format hint.

    Returns:
        (paddle.Tensor, int): Resulting Tensor and sample rate.
            If the input file has integer wav format and normalization is off, then it has
            integer type, else ``float32`` type. If ``channels_first=True``, it has
            `[channel, time]` else `[time, channel]`.
    """
    with soundfile.SoundFile(filepath, "r") as file_:
        if file_.format != "WAV" or normalize:
            dtype = "float32"
        elif file_.subtype not in _SUBTYPE2DTYPE:
            raise ValueError(f"Unsupported subtype: {file_.subtype}")
        else:
            dtype = _SUBTYPE2DTYPE[file_.subtype]

        frames = file_._prepare_read(frame_offset, None, num_frames)
        waveform = file_.read(frames, dtype, always_2d=True)
        sample_rate = file_.samplerate

    waveform = paddle.to_tensor(waveform)
    if channels_first:
        waveform = paddle.transpose(waveform, perm=[1, 0])
    return waveform, sample_rate


# Mapping from soundfile subtype to number of bits per sample.
# This is mostly heuristical and the value is set to 0 when it is irrelevant
# (lossy formats) or when it can't be inferred.
# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard:
# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony,
# the default seems to be 8 bits but it can be compressed further to 4 bits.
# The dict is inspired from
# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94
_SUBTYPE_TO_BITS_PER_SAMPLE = {
    "PCM_S8": 8,  # Signed 8 bit data
    "PCM_16": 16,  # Signed 16 bit data
    "PCM_24": 24,  # Signed 24 bit data
    "PCM_32": 32,  # Signed 32 bit data
    "PCM_U8": 8,  # Unsigned 8 bit data (WAV and RAW only)
    "FLOAT": 32,  # 32 bit float data
    "DOUBLE": 64,  # 64 bit float data
    "ULAW": 8,  # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
    "ALAW": 8,  # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
    "IMA_ADPCM": 0,  # IMA ADPCM.
    "MS_ADPCM": 0,  # Microsoft ADPCM.
    "GSM610":
    0,  # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate)
    "VOX_ADPCM": 0,  # OKI / Dialogix ADPCM
    "G721_32": 0,  # 32kbs G721 ADPCM encoding.
    "G723_24": 0,  # 24kbs G723 ADPCM encoding.
    "G723_40": 0,  # 40kbs G723 ADPCM encoding.
    "DWVW_12": 12,  # 12 bit Delta Width Variable Word encoding.
    "DWVW_16": 16,  # 16 bit Delta Width Variable Word encoding.
    "DWVW_24": 24,  # 24 bit Delta Width Variable Word encoding.
    "DWVW_N": 0,  # N bit Delta Width Variable Word encoding.
    "DPCM_8": 8,  # 8 bit differential PCM (XI only)
    "DPCM_16": 16,  # 16 bit differential PCM (XI only)
    "VORBIS": 0,  # Xiph Vorbis encoding. (lossy)
    "ALAC_16": 16,  # Apple Lossless Audio Codec (16 bit).
    "ALAC_20": 20,  # Apple Lossless Audio Codec (20 bit).
    "ALAC_24": 24,  # Apple Lossless Audio Codec (24 bit).
    "ALAC_32": 32,  # Apple Lossless Audio Codec (32 bit).
}


def _get_bit_depth(subtype):
    if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE:
        warnings.warn(
            f"The {subtype} subtype is unknown to PaddleAudio. As a result, the bits_per_sample "
            "attribute will be set to 0. If you are seeing this warning, please "
            "report by opening an issue on github (after checking for existing/closed ones). "
            "You may otherwise ignore this warning.")
    return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0)


_SUBTYPE_TO_ENCODING = {
    "PCM_S8": "PCM_S",
    "PCM_16": "PCM_S",
    "PCM_24": "PCM_S",
    "PCM_32": "PCM_S",
    "PCM_U8": "PCM_U",
    "FLOAT": "PCM_F",
    "DOUBLE": "PCM_F",
    "ULAW": "ULAW",
    "ALAW": "ALAW",
    "VORBIS": "VORBIS",
}


def _get_encoding(format: str, subtype: str):
    if format == "FLAC":
        return "FLAC"
    return _SUBTYPE_TO_ENCODING.get(subtype, "UNKNOWN")


def info(filepath: str, format: Optional[str]=None) -> AudioInfo:
    """Get signal information of an audio file.

    Note:
        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,

    Args:
        filepath (path-like object or file-like object):
            Source of audio data.
        format (str or None, optional):
            Not used. PySoundFile does not accept format hint.

    Returns:
        AudioInfo: meta data of the given audio.

    """
    sinfo = soundfile.info(filepath)
    return AudioInfo(
        sinfo.samplerate,
        sinfo.frames,
        sinfo.channels,
        bits_per_sample=_get_bit_depth(sinfo.subtype),
        encoding=_get_encoding(sinfo.format, sinfo.subtype), )


================================================
FILE: audio/paddleaudio/backends/sox_io_backend.py
================================================
import os
from typing import Optional
from typing import Tuple

import paddle
import paddleaudio
from paddle import Tensor
from paddleaudio._internal import module_utils as _mod_utils

from .common import AudioInfo

#https://github.com/pytorch/audio/blob/main/torchaudio/backend/sox_io_backend.py


def _fail_info(filepath: str, format: Optional[str]) -> AudioInfo:
    raise RuntimeError("Failed to fetch metadata from {}".format(filepath))


def _fail_info_fileobj(fileobj, format: Optional[str]) -> AudioInfo:
    raise RuntimeError("Failed to fetch metadata from {}".format(fileobj))


# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_load(
        filepath: str,
        frame_offset: int=0,
        num_frames: int=-1,
        normalize: bool=True,
        channels_first: bool=True,
        format: Optional[str]=None, ) -> Tuple[Tensor, int]:
    raise RuntimeError("Failed to load audio from {}".format(filepath))


def _fail_load_fileobj(fileobj, *args, **kwargs):
    raise RuntimeError(f"Failed to load audio from {fileobj}")


_fallback_info = _fail_info
_fallback_info_fileobj = _fail_info_fileobj
_fallback_load = _fail_load
_fallback_load_filebj = _fail_load_fileobj


@_mod_utils.requires_sox()
def load(
        filepath: str,
        frame_offset: int=0,
        num_frames: int=-1,
        normalize: bool=True,
        channels_first: bool=True,
        format: Optional[str]=None, ) -> Tuple[Tensor, int]:
    if hasattr(filepath, "read"):
        ret = paddleaudio._paddleaudio.load_audio_fileobj(
            filepath, frame_offset, num_frames, normalize, channels_first,
            format)
        if ret is not None:
            audio_tensor = paddle.to_tensor(ret[0])
            return (audio_tensor, ret[1])
        return _fallback_load_fileobj(filepath, frame_offset, num_frames,
                                      normalize, channels_first, format)
    filepath = os.fspath(filepath)
    ret = paddleaudio._paddleaudio.sox_io_load_audio_file(
        filepath, frame_offset, num_frames, normalize, channels_first, format)
    if ret is not None:
        audio_tensor = paddle.to_tensor(ret[0])
        return (audio_tensor, ret[1])
    return _fallback_load(filepath, frame_offset, num_frames, normalize,
                          channels_first, format)


@_mod_utils.requires_sox()
def save(
        filepath: str,
        src: Tensor,
        sample_rate: int,
        channels_first: bool=True,
        compression: Optional[float]=None,
        format: Optional[str]=None,
        encoding: Optional[str]=None,
        bits_per_sample: Optional[int]=None, ):
    src_arr = src.numpy()
    if hasattr(filepath, "write"):
        paddleaudio._paddleaudio.save_audio_fileobj(
            filepath, src_arr, sample_rate, channels_first, compression, format,
            encoding, bits_per_sample)
        return
    filepath = os.fspath(filepath)
    paddleaudio._paddleaudio.sox_io_save_audio_file(
        filepath, src_arr, sample_rate, channels_first, compression, format,
        encoding, bits_per_sample)


@_mod_utils.requires_sox()
def info(
        filepath: str,
        format: Optional[str]=None, ) -> AudioInfo:
    if hasattr(filepath, "read"):
        sinfo = paddleaudio._paddleaudio.get_info_fileobj(filepath, format)
        if sinfo is not None:
            return AudioInfo(*sinfo)
        return _fallback_info_fileobj(filepath, format)
    filepath = os.fspath(filepath)
    sinfo = paddleaudio._paddleaudio.get_info_file(filepath, format)
    if sinfo is not None:
        return AudioInfo(*sinfo)
    return _fallback_info(filepath, format)


================================================
FILE: audio/paddleaudio/backends/utils.py
================================================
"""Defines utilities for switching audio backends"""
#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/utils.py
import warnings
from typing import List
from typing import Optional

import paddleaudio
from paddleaudio._internal import module_utils as _mod_utils

from . import no_backend
from . import soundfile_backend
from . import sox_io_backend

__all__ = [
    "list_audio_backends",
    "get_audio_backend",
    "set_audio_backend",
]


def list_audio_backends() -> List[str]:
    """List available backends

    Returns:
        List[str]: The list of available backends.
    """
    backends = []
    if _mod_utils.is_module_available("soundfile"):
        backends.append("soundfile")
    if _mod_utils.is_sox_available():
        backends.append("sox_io")
    return backends


def set_audio_backend(backend: Optional[str]):
    """Set the backend for I/O operation

    Args:
        backend (str or None): Name of the backend.
            One of ``"sox_io"`` or ``"soundfile"`` based on availability
            of the system. If ``None`` is provided the  current backend is unassigned.
    """
    if backend is not None and backend not in list_audio_backends():
        raise RuntimeError(f'Backend "{backend}" is not one of '
                           f"available backends: {list_audio_backends()}.")

    if backend is None:
        module = no_backend
    elif backend == "sox_io":
        module = sox_io_backend
    elif backend == "soundfile":
        module = soundfile_backend
    else:
        raise NotImplementedError(f'Unexpected backend "{backend}"')

    for func in ["save", "load", "info"]:
        setattr(paddleaudio, func, getattr(module, func))


def _init_audio_backend():
    backends = list_audio_backends()
    if "soundfile" in backends:
        set_audio_backend("soundfile")
    elif "sox_io" in backends:
        set_audio_backend("sox_io")
    else:
        warnings.warn("No audio backend is available.")
        set_audio_backend(None)


def get_audio_backend() -> Optional[str]:
    """Get the name of the current backend

    Returns:
        Optional[str]: The name of the current backend or ``None`` if no backend is assigned.
    """
    if paddleaudio.load == no_backend.load:
        return None
    if paddleaudio.load == sox_io_backend.load:
        return "sox_io"
    if paddleaudio.load == soundfile_backend.load:
        return "soundfile"
    raise ValueError("Unknown backend.")


================================================
FILE: audio/paddleaudio/compliance/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import kaldi
from . import librosa


================================================
FILE: audio/paddleaudio/compliance/kaldi.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from torchaudio(https://github.com/pytorch/audio)
import math
from typing import Tuple

import paddle
from paddle import Tensor

from ..functional import create_dct
from ..functional.window import get_window

__all__ = [
    'spectrogram',
    'fbank',
    'mfcc',
]

# window types
HANNING = 'hann'
HAMMING = 'hamming'
POVEY = 'povey'
RECTANGULAR = 'rect'
BLACKMAN = 'blackman'


def _get_epsilon(dtype):
    return paddle.to_tensor(1e-07, dtype=dtype)


def _next_power_of_2(x: int) -> int:
    return 1 if x == 0 else 2**(x - 1).bit_length()


def _get_strided(waveform: Tensor,
                 window_size: int,
                 window_shift: int,
                 snip_edges: bool) -> Tensor:
    assert waveform.dim() == 1
    num_samples = waveform.shape[0]

    if snip_edges:
        if num_samples < window_size:
            return paddle.empty((0, 0), dtype=waveform.dtype)
        else:
            m = 1 + (num_samples - window_size) // window_shift
    else:
        reversed_waveform = paddle.flip(waveform, [0])
        m = (num_samples + (window_shift // 2)) // window_shift
        pad = window_size // 2 - window_shift // 2
        pad_right = reversed_waveform
        if pad > 0:
            pad_left = reversed_waveform[-pad:]
            waveform = paddle.concat((pad_left, waveform, pad_right), axis=0)
        else:
            waveform = paddle.concat((waveform[-pad:], pad_right), axis=0)

    return paddle.signal.frame(waveform, window_size, window_shift)[:, :m].T


def _feature_window_function(
        window_type: str,
        window_size: int,
        blackman_coeff: float,
        dtype: int, ) -> Tensor:
    if window_type == "hann":
        return get_window('hann', window_size, fftbins=False, dtype=dtype)
    elif window_type == "hamming":
        return get_window('hamming', window_size, fftbins=False, dtype=dtype)
    elif window_type == "povey":
        return get_window(
            'hann', window_size, fftbins=False, dtype=dtype).pow(0.85)
    elif window_type == "rect":
        return paddle.ones([window_size], dtype=dtype)
    elif window_type == "blackman":
        a = 2 * math.pi / (window_size - 1)
        window_function = paddle.arange(window_size, dtype=dtype)
        return (blackman_coeff - 0.5 * paddle.cos(a * window_function) +
                (0.5 - blackman_coeff) * paddle.cos(2 * a * window_function)
                ).astype(dtype)
    else:
        raise Exception('Invalid window type ' + window_type)


def _get_log_energy(strided_input: Tensor, epsilon: Tensor,
                    energy_floor: float) -> Tensor:
    log_energy = paddle.maximum(strided_input.pow(2).sum(1), epsilon).log()
    if energy_floor == 0.0:
        return log_energy
    return paddle.maximum(
        log_energy,
        paddle.to_tensor(math.log(energy_floor), dtype=strided_input.dtype))


def _get_waveform_and_window_properties(
        waveform: Tensor,
        channel: int,
        sr: int,
        frame_shift: float,
        frame_length: float,
        round_to_power_of_two: bool,
        preemphasis_coefficient: float) -> Tuple[Tensor, int, int, int]:
    channel = max(channel, 0)
    assert channel < waveform.shape[0], (
        'Invalid channel {} for size {}'.format(channel, waveform.shape[0]))
    waveform = waveform[channel, :]  # size (n)
    window_shift = int(
        sr * frame_shift *
        0.001)  # pass frame_shift and frame_length in milliseconds
    window_size = int(sr * frame_length * 0.001)
    padded_window_size = _next_power_of_2(
        window_size) if round_to_power_of_two else window_size

    assert 2 <= window_size <= len(waveform), (
        'choose a window size {} that is [2, {}]'.format(window_size,
                                                         len(waveform)))
    assert 0 < window_shift, '`window_shift` must be greater than 0'
    assert padded_window_size % 2 == 0, 'the padded `window_size` must be divisible by two.' \
                                        ' use `round_to_power_of_two` or change `frame_length`'
    assert 0. <= preemphasis_coefficient <= 1.0, '`preemphasis_coefficient` must be between [0,1]'
    assert sr > 0, '`sr` must be greater than zero'
    return waveform, window_shift, window_size, padded_window_size


def _get_window(waveform: Tensor,
                padded_window_size: int,
                window_size: int,
                window_shift: int,
                window_type: str,
                blackman_coeff: float,
                snip_edges: bool,
                raw_energy: bool,
                energy_floor: float,
                dither: float,
                remove_dc_offset: bool,
                preemphasis_coefficient: float) -> Tuple[Tensor, Tensor]:
    dtype = waveform.dtype
    epsilon = _get_epsilon(dtype)

    # (m, window_size)
    strided_input = _get_strided(waveform, window_size, window_shift,
                                 snip_edges)

    if dither != 0.0:
        x = paddle.maximum(epsilon,
                           paddle.rand(strided_input.shape, dtype=dtype))
        rand_gauss = paddle.sqrt(-2 * x.log()) * paddle.cos(2 * math.pi * x)
        strided_input = strided_input + rand_gauss * dither

    if remove_dc_offset:
        row_means = paddle.mean(strided_input, axis=1).unsqueeze(1)  # (m, 1)
        strided_input = strided_input - row_means

    if raw_energy:
        signal_log_energy = _get_log_energy(strided_input, epsilon,
                                            energy_floor)  # (m)

    if preemphasis_coefficient != 0.0:
        offset_strided_input = paddle.nn.functional.pad(
            strided_input.unsqueeze(0), (1, 0),
            data_format='NCL',
            mode='replicate').squeeze(0)  # (m, window_size + 1)
        strided_input = strided_input - preemphasis_coefficient * offset_strided_input[:, :
                                                                                       -1]

    window_function = _feature_window_function(
        window_type, window_size, blackman_coeff,
        dtype).unsqueeze(0)  # (1, window_size)
    strided_input = strided_input * window_function  # (m, window_size)

    # (m, padded_window_size)
    if padded_window_size != window_size:
        padding_right = padded_window_size - window_size
        strided_input = paddle.nn.functional.pad(
            strided_input.unsqueeze(0), (0, padding_right),
            data_format='NCL',
            mode='constant',
            value=0).squeeze(0)

    if not raw_energy:
        signal_log_energy = _get_log_energy(strided_input, epsilon,
                                            energy_floor)  # size (m)

    return strided_input, signal_log_energy


def _subtract_column_mean(tensor: Tensor, subtract_mean: bool) -> Tensor:
    if subtract_mean:
        col_means = paddle.mean(tensor, axis=0).unsqueeze(0)
        tensor = tensor - col_means
    return tensor


def spectrogram(waveform: Tensor,
                blackman_coeff: float=0.42,
                channel: int=-1,
                dither: float=0.0,
                energy_floor: float=1.0,
                frame_length: float=25.0,
                frame_shift: float=10.0,
                preemphasis_coefficient: float=0.97,
                raw_energy: bool=True,
                remove_dc_offset: bool=True,
                round_to_power_of_two: bool=True,
                sr: int=16000,
                snip_edges: bool=True,
                subtract_mean: bool=False,
                window_type: str="povey") -> Tensor:
    """Compute and return a spectrogram from a waveform. The output is identical to Kaldi's.

    Args:
        waveform (Tensor): A waveform tensor with shape `(C, T)`.
        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
        channel (int, optional): Select the channel of waveform. Defaults to -1.
        dither (float, optional): Dithering constant . Defaults to 0.0.
        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
            to FFT. Defaults to True.
        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
        window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".

    Returns:
        Tensor: A spectrogram tensor with shape `(m, padded_window_size // 2 + 1)` where m is the number of frames
            depends on frame_length and frame_shift.
    """
    dtype = waveform.dtype
    epsilon = _get_epsilon(dtype)

    waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties(
        waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two,
        preemphasis_coefficient)

    strided_input, signal_log_energy = _get_window(
        waveform, padded_window_size, window_size, window_shift, window_type,
        blackman_coeff, snip_edges, raw_energy, energy_floor, dither,
        remove_dc_offset, preemphasis_coefficient)

    # (m, padded_window_size // 2 + 1, 2)
    fft = paddle.fft.rfft(strided_input)

    power_spectrum = paddle.maximum(
        fft.abs().pow(2.), epsilon).log()  # (m, padded_window_size // 2 + 1)
    power_spectrum[:, 0] = signal_log_energy

    power_spectrum = _subtract_column_mean(power_spectrum, subtract_mean)
    return power_spectrum


def _inverse_mel_scale_scalar(mel_freq: float) -> float:
    return 700.0 * (math.exp(mel_freq / 1127.0) - 1.0)


def _inverse_mel_scale(mel_freq: Tensor) -> Tensor:
    return 700.0 * ((mel_freq / 1127.0).exp() - 1.0)


def _mel_scale_scalar(freq: float) -> float:
    return 1127.0 * math.log(1.0 + freq / 700.0)


def _mel_scale(freq: Tensor) -> Tensor:
    return 1127.0 * (1.0 + freq / 700.0).log()


def _vtln_warp_freq(vtln_low_cutoff: float,
                    vtln_high_cutoff: float,
                    low_freq: float,
                    high_freq: float,
                    vtln_warp_factor: float,
                    freq: Tensor) -> Tensor:
    assert vtln_low_cutoff > low_freq, 'be sure to set the vtln_low option higher than low_freq'
    assert vtln_high_cutoff < high_freq, 'be sure to set the vtln_high option lower than high_freq [or negative]'
    l = vtln_low_cutoff * max(1.0, vtln_warp_factor)
    h = vtln_high_cutoff * min(1.0, vtln_warp_factor)
    scale = 1.0 / vtln_warp_factor
    Fl = scale * l
    Fh = scale * h
    assert l > low_freq and h < high_freq
    scale_left = (Fl - low_freq) / (l - low_freq)
    scale_right = (high_freq - Fh) / (high_freq - h)
    res = paddle.empty_like(freq)

    outside_low_high_freq = paddle.less_than(freq, paddle.to_tensor(low_freq)) \
        | paddle.greater_than(freq, paddle.to_tensor(high_freq))
    before_l = paddle.less_than(freq, paddle.to_tensor(l))
    before_h = paddle.less_than(freq, paddle.to_tensor(h))
    after_h = paddle.greater_equal(freq, paddle.to_tensor(h))

    res[after_h] = high_freq + scale_right * (freq[after_h] - high_freq)
    res[before_h] = scale * freq[before_h]
    res[before_l] = low_freq + scale_left * (freq[before_l] - low_freq)
    res[outside_low_high_freq] = freq[outside_low_high_freq]

    return res


def _vtln_warp_mel_freq(vtln_low_cutoff: float,
                        vtln_high_cutoff: float,
                        low_freq,
                        high_freq: float,
                        vtln_warp_factor: float,
                        mel_freq: Tensor) -> Tensor:
    return _mel_scale(
        _vtln_warp_freq(vtln_low_cutoff, vtln_high_cutoff, low_freq, high_freq,
                        vtln_warp_factor, _inverse_mel_scale(mel_freq)))


def _get_mel_banks(num_bins: int,
                   window_length_padded: int,
                   sample_freq: float,
                   low_freq: float,
                   high_freq: float,
                   vtln_low: float,
                   vtln_high: float,
                   vtln_warp_factor: float) -> Tuple[Tensor, Tensor]:
    assert num_bins > 3, 'Must have at least 3 mel bins'
    assert window_length_padded % 2 == 0
    num_fft_bins = window_length_padded / 2
    nyquist = 0.5 * sample_freq

    if high_freq <= 0.0:
        high_freq += nyquist

    assert (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq), \
        ('Bad values in options: low-freq {} and high-freq {} vs. nyquist {}'.format(low_freq, high_freq, nyquist))

    fft_bin_width = sample_freq / window_length_padded
    mel_low_freq = _mel_scale_scalar(low_freq)
    mel_high_freq = _mel_scale_scalar(high_freq)

    mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1)

    if vtln_high < 0.0:
        vtln_high += nyquist

    assert vtln_warp_factor == 1.0 or ((low_freq < vtln_low < high_freq) and
                                       (0.0 < vtln_high < high_freq) and (vtln_low < vtln_high)), \
        ('Bad values in options: vtln-low {} and vtln-high {}, versus '
         'low-freq {} and high-freq {}'.format(vtln_low, vtln_high, low_freq, high_freq))

    bin = paddle.arange(num_bins, dtype=paddle.float32).unsqueeze(1)
    # left_mel = mel_low_freq + bin * mel_freq_delta  # (num_bins, 1)
    # center_mel = mel_low_freq + (bin + 1.0) * mel_freq_delta  # (num_bins, 1)
    # right_mel = mel_low_freq + (bin + 2.0) * mel_freq_delta  # (num_bins, 1)
    left_mel = mel_low_freq + bin * mel_freq_delta  # (num_bins, 1)
    center_mel = left_mel + mel_freq_delta
    right_mel = center_mel + mel_freq_delta

    if vtln_warp_factor != 1.0:
        left_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq,
                                       vtln_warp_factor, left_mel)
        center_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq,
                                         high_freq, vtln_warp_factor,
                                         center_mel)
        right_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq,
                                        high_freq, vtln_warp_factor, right_mel)

    center_freqs = _inverse_mel_scale(center_mel)  # (num_bins)
    # (1, num_fft_bins)
    mel = _mel_scale(fft_bin_width * paddle.arange(
        num_fft_bins, dtype=paddle.float32)).unsqueeze(0)

    # (num_bins, num_fft_bins)
    up_slope = (mel - left_mel) / (center_mel - left_mel)
    down_slope = (right_mel - mel) / (right_mel - center_mel)

    if vtln_warp_factor == 1.0:
        bins = paddle.maximum(
            paddle.zeros([1]), paddle.minimum(up_slope, down_slope))
    else:
        bins = paddle.zeros_like(up_slope)
        up_idx = paddle.greater_than(mel, left_mel) & paddle.less_than(
            mel, center_mel)
        down_idx = paddle.greater_than(mel, center_mel) & paddle.less_than(
            mel, right_mel)
        bins[up_idx] = up_slope[up_idx]
        bins[down_idx] = down_slope[down_idx]

    return bins, center_freqs


def fbank(waveform: Tensor,
          blackman_coeff: float=0.42,
          channel: int=-1,
          dither: float=0.0,
          energy_floor: float=1.0,
          frame_length: float=25.0,
          frame_shift: float=10.0,
          high_freq: float=0.0,
          htk_compat: bool=False,
          low_freq: float=20.0,
          n_mels: int=23,
          preemphasis_coefficient: float=0.97,
          raw_energy: bool=True,
          remove_dc_offset: bool=True,
          round_to_power_of_two: bool=True,
          sr: int=16000,
          snip_edges: bool=True,
          subtract_mean: bool=False,
          use_energy: bool=False,
          use_log_fbank: bool=True,
          use_power: bool=True,
          vtln_high: float=-500.0,
          vtln_low: float=100.0,
          vtln_warp: float=1.0,
          window_type: str="povey") -> Tensor:
    """Compute and return filter banks from a waveform. The output is identical to Kaldi's.

    Args:
        waveform (Tensor): A waveform tensor with shape `(C, T)`. `C` is in the range [0,1]. 
        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
        channel (int, optional): Select the channel of waveform. Defaults to -1.
        dither (float, optional): Dithering constant . Defaults to 0.0.
        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
        high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0.
        htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False.
        low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0.
        n_mels (int, optional): Number of output mel bins. Defaults to 23.
        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
            to FFT. Defaults to True.
        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
        use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
        use_log_fbank (bool, optional): Return log fbank when it is set True. Defaults to True.
        use_power (bool, optional): Whether to use power instead of magnitude. Defaults to True.
        vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0.
        vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0.
        vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0.
        window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".

    Returns:
        Tensor: A filter banks tensor with shape `(m, n_mels)`.
    """
    dtype = waveform.dtype

    waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties(
        waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two,
        preemphasis_coefficient)

    strided_input, signal_log_energy = _get_window(
        waveform, padded_window_size, window_size, window_shift, window_type,
        blackman_coeff, snip_edges, raw_energy, energy_floor, dither,
        remove_dc_offset, preemphasis_coefficient)

    # (m, padded_window_size // 2 + 1)
    spectrum = paddle.fft.rfft(strided_input).abs()
    if use_power:
        spectrum = spectrum.pow(2.)

    # (n_mels, padded_window_size // 2)
    mel_energies, _ = _get_mel_banks(n_mels, padded_window_size, sr, low_freq,
                                     high_freq, vtln_low, vtln_high, vtln_warp)
    # mel_energies = mel_energies.astype(dtype)
    assert mel_energies.dtype == dtype

    # (n_mels, padded_window_size // 2 + 1)
    mel_energies = paddle.nn.functional.pad(
        mel_energies.unsqueeze(0), (0, 1),
        data_format='NCL',
        mode='constant',
        value=0).squeeze(0)

    # (m, n_mels)
    mel_energies = paddle.mm(spectrum, mel_energies.T)
    if use_log_fbank:
        mel_energies = paddle.maximum(mel_energies, _get_epsilon(dtype)).log()

    if use_energy:
        signal_log_energy = signal_log_energy.unsqueeze(1)
        if htk_compat:
            mel_energies = paddle.concat(
                (mel_energies, signal_log_energy), axis=1)
        else:
            mel_energies = paddle.concat(
                (signal_log_energy, mel_energies), axis=1)

    # (m, n_mels + 1)
    mel_energies = _subtract_column_mean(mel_energies, subtract_mean)
    return mel_energies


def _get_dct_matrix(n_mfcc: int, n_mels: int) -> Tensor:
    dct_matrix = create_dct(n_mels, n_mels, 'ortho')
    dct_matrix[:, 0] = math.sqrt(1 / float(n_mels))
    dct_matrix = dct_matrix[:, :n_mfcc]  # (n_mels, n_mfcc)
    return dct_matrix


def _get_lifter_coeffs(n_mfcc: int, cepstral_lifter: float) -> Tensor:
    i = paddle.arange(n_mfcc)
    return 1.0 + 0.5 * cepstral_lifter * paddle.sin(math.pi * i /
                                                    cepstral_lifter)


def mfcc(waveform: Tensor,
         blackman_coeff: float=0.42,
         cepstral_lifter: float=22.0,
         channel: int=-1,
         dither: float=0.0,
         energy_floor: float=1.0,
         frame_length: float=25.0,
         frame_shift: float=10.0,
         high_freq: float=0.0,
         htk_compat: bool=False,
         low_freq: float=20.0,
         n_mfcc: int=13,
         n_mels: int=23,
         preemphasis_coefficient: float=0.97,
         raw_energy: bool=True,
         remove_dc_offset: bool=True,
         round_to_power_of_two: bool=True,
         sr: int=16000,
         snip_edges: bool=True,
         subtract_mean: bool=False,
         use_energy: bool=False,
         vtln_high: float=-500.0,
         vtln_low: float=100.0,
         vtln_warp: float=1.0,
         window_type: str="povey") -> Tensor:
    """Compute and return mel frequency cepstral coefficients from a waveform. The output is
            identical to Kaldi's.

    Args:
        waveform (Tensor): A waveform tensor with shape `(C, T)`.
        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
        cepstral_lifter (float, optional): Scaling of output mfccs. Defaults to 22.0.
        channel (int, optional): Select the channel of waveform. Defaults to -1.
        dither (float, optional): Dithering constant . Defaults to 0.0.
        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
        high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0.
        htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False.
        low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0.
        n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 13.
        n_mels (int, optional): Number of output mel bins. Defaults to 23.
        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
            to FFT. Defaults to True.
        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
        use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
        vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0.
        vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0.
        vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0.
        window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY.

    Returns:
        Tensor: A mel frequency cepstral coefficients tensor with shape `(m, n_mfcc)`.
    """
    assert n_mfcc <= n_mels, 'n_mfcc cannot be larger than n_mels: %d vs %d' % (
        n_mfcc, n_mels)

    dtype = waveform.dtype

    # (m, n_mels + use_energy)
    feature = fbank(
        waveform=waveform,
        blackman_coeff=blackman_coeff,
        channel=channel,
        dither=dither,
        energy_floor=energy_floor,
        frame_length=frame_length,
        frame_shift=frame_shift,
        high_freq=high_freq,
        htk_compat=htk_compat,
        low_freq=low_freq,
        n_mels=n_mels,
        preemphasis_coefficient=preemphasis_coefficient,
        raw_energy=raw_energy,
        remove_dc_offset=remove_dc_offset,
        round_to_power_of_two=round_to_power_of_two,
        sr=sr,
        snip_edges=snip_edges,
        subtract_mean=False,
        use_energy=use_energy,
        use_log_fbank=True,
        use_power=True,
        vtln_high=vtln_high,
        vtln_low=vtln_low,
        vtln_warp=vtln_warp,
        window_type=window_type)

    if use_energy:
        # (m)
        signal_log_energy = feature[:, n_mels if htk_compat else 0]
        mel_offset = int(not htk_compat)
        feature = feature[:, mel_offset:(n_mels + mel_offset)]

    # (n_mels, n_mfcc)
    dct_matrix = _get_dct_matrix(n_mfcc, n_mels).astype(dtype=dtype)

    # (m, n_mfcc)
    feature = feature.matmul(dct_matrix)

    if cepstral_lifter != 0.0:
        # (1, n_mfcc)
        lifter_coeffs = _get_lifter_coeffs(n_mfcc, cepstral_lifter).unsqueeze(0)
        feature *= lifter_coeffs.astype(dtype=dtype)

    if use_energy:
        feature[:, 0] = signal_log_energy

    if htk_compat:
        energy = feature[:, 0].unsqueeze(1)  # (m, 1)
        feature = feature[:, 1:]  # (m, n_mfcc - 1)
        if not use_energy:
            energy *= math.sqrt(2)

        feature = paddle.concat((feature, energy), axis=1)

    feature = _subtract_column_mean(feature, subtract_mean)
    return feature


================================================
FILE: audio/paddleaudio/compliance/librosa.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from librosa(https://github.com/librosa/librosa)
import warnings
from typing import List
from typing import Optional
from typing import Union

import numpy as np
import scipy
from numpy.lib.stride_tricks import as_strided
from scipy import signal

from ..backends import depth_convert
from ..utils import ParameterError

__all__ = [
    # dsp
    'stft',
    'mfcc',
    'hz_to_mel',
    'mel_to_hz',
    'mel_frequencies',
    'power_to_db',
    'compute_fbank_matrix',
    'melspectrogram',
    'spectrogram',
    'mu_encode',
    'mu_decode',
    # augmentation
    'depth_augment',
    'spect_augment',
    'random_crop1d',
    'random_crop2d',
    'adaptive_spect_augment',
]


def _pad_center(data: np.ndarray, size: int, axis: int=-1,
                **kwargs) -> np.ndarray:
    """Pad an array to a target length along a target axis.

    This differs from `np.pad` by centering the data prior to padding,
    analogous to `str.center`
    """

    kwargs.setdefault("mode", "constant")
    n = data.shape[axis]
    lpad = int((size - n) // 2)
    lengths = [(0, 0)] * data.ndim
    lengths[axis] = (lpad, int(size - n - lpad))

    if lpad < 0:
        raise ParameterError(("Target size ({size:d}) must be "
                              "at least input size ({n:d})"))

    return np.pad(data, lengths, **kwargs)


def _split_frames(x: np.ndarray,
                  frame_length: int,
                  hop_length: int,
                  axis: int=-1) -> np.ndarray:
    """Slice a data array into (overlapping) frames.

    This function is aligned with librosa.frame
    """

    if not isinstance(x, np.ndarray):
        raise ParameterError(
            f"Input must be of type numpy.ndarray, given type(x)={type(x)}")

    if x.shape[axis] < frame_length:
        raise ParameterError(f"Input is too short (n={x.shape[axis]:d})"
                             f" for frame_length={frame_length:d}")

    if hop_length < 1:
        raise ParameterError(f"Invalid hop_length: {hop_length:d}")

    if axis == -1 and not x.flags["F_CONTIGUOUS"]:
        warnings.warn(f"librosa.util.frame called with axis={axis} "
                      "on a non-contiguous input. This will result in a copy.")
        x = np.asfortranarray(x)
    elif axis == 0 and not x.flags["C_CONTIGUOUS"]:
        warnings.warn(f"librosa.util.frame called with axis={axis} "
                      "on a non-contiguous input. This will result in a copy.")
        x = np.ascontiguousarray(x)

    n_frames = 1 + (x.shape[axis] - frame_length) // hop_length
    strides = np.asarray(x.strides)

    new_stride = np.prod(strides[strides > 0] // x.itemsize) * x.itemsize

    if axis == -1:
        shape = list(x.shape)[:-1] + [frame_length, n_frames]
        strides = list(strides) + [hop_length * new_stride]

    elif axis == 0:
        shape = [n_frames, frame_length] + list(x.shape)[1:]
        strides = [hop_length * new_stride] + list(strides)

    else:
        raise ParameterError(f"Frame axis={axis} must be either 0 or -1")

    return as_strided(x, shape=shape, strides=strides)


def _check_audio(y, mono=True) -> bool:
    """Determine whether a variable contains valid audio data.

    The audio y must be a np.ndarray, ether 1-channel or two channel
    """
    if not isinstance(y, np.ndarray):
        raise ParameterError("Audio data must be of type numpy.ndarray")
    if y.ndim > 2:
        raise ParameterError(
            f"Invalid shape for audio ndim={y.ndim:d}, shape={y.shape}")

    if mono and y.ndim == 2:
        raise ParameterError(
            f"Invalid shape for mono audio ndim={y.ndim:d}, shape={y.shape}")

    if (mono and len(y) == 0) or (not mono and y.shape[1] < 0):
        raise ParameterError(f"Audio is empty ndim={y.ndim:d}, shape={y.shape}")

    if not np.issubdtype(y.dtype, np.floating):
        raise ParameterError("Audio data must be floating-point")

    if not np.isfinite(y).all():
        raise ParameterError("Audio buffer is not finite everywhere")

    return True


def hz_to_mel(frequencies: Union[float, List[float], np.ndarray],
              htk: bool=False) -> np.ndarray:
    """Convert Hz to Mels.

    Args:
        frequencies (Union[float, List[float], np.ndarray]): Frequencies in Hz.
        htk (bool, optional): Use htk scaling. Defaults to False.

    Returns:
        np.ndarray: Frequency in mels.
    """
    freq = np.asanyarray(frequencies)

    if htk:
        return 2595.0 * np.log10(1.0 + freq / 700.0)

    # Fill in the linear part
    f_min = 0.0
    f_sp = 200.0 / 3

    mels = (freq - f_min) / f_sp

    # Fill in the log-scale part

    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    logstep = np.log(6.4) / 27.0  # step size for log region

    if freq.ndim:
        # If we have array data, vectorize
        log_t = freq >= min_log_hz
        mels[log_t] = min_log_mel + \
            np.log(freq[log_t] / min_log_hz) / logstep
    elif freq >= min_log_hz:
        # If we have scalar data, heck directly
        mels = min_log_mel + np.log(freq / min_log_hz) / logstep

    return mels


def mel_to_hz(mels: Union[float, List[float], np.ndarray],
              htk: int=False) -> np.ndarray:
    """Convert mel bin numbers to frequencies.

    Args:
        mels (Union[float, List[float], np.ndarray]): Frequency in mels.
        htk (bool, optional): Use htk scaling. Defaults to False.

    Returns:
        np.ndarray: Frequencies in Hz.
    """
    mel_array = np.asanyarray(mels)

    if htk:
        return 700.0 * (10.0**(mel_array / 2595.0) - 1.0)

    # Fill in the linear scale
    f_min = 0.0
    f_sp = 200.0 / 3
    freqs = f_min + f_sp * mel_array

    # And now the nonlinear scale
    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    logstep = np.log(6.4) / 27.0  # step size for log region

    if mel_array.ndim:
        # If we have vector data, vectorize
        log_t = mel_array >= min_log_mel
        freqs[log_t] = min_log_hz * \
            np.exp(logstep * (mel_array[log_t] - min_log_mel))
    elif mel_array >= min_log_mel:
        # If we have scalar data, check directly
        freqs = min_log_hz * np.exp(logstep * (mel_array - min_log_mel))

    return freqs


def mel_frequencies(n_mels: int=128,
                    fmin: float=0.0,
                    fmax: float=11025.0,
                    htk: bool=False) -> np.ndarray:
    """Compute mel frequencies.

    Args:
        n_mels (int, optional): Number of mel bins. Defaults to 128.
        fmin (float, optional): Minimum frequency in Hz. Defaults to 0.0.
        fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0.
        htk (bool, optional): Use htk scaling. Defaults to False.

    Returns:
        np.ndarray: Vector of n_mels frequencies in Hz with shape `(n_mels,)`.
    """
    # 'Center freqs' of mel bands - uniformly spaced between limits
    min_mel = hz_to_mel(fmin, htk=htk)
    max_mel = hz_to_mel(fmax, htk=htk)

    mels = np.linspace(min_mel, max_mel, n_mels)

    return mel_to_hz(mels, htk=htk)


def fft_frequencies(sr: int, n_fft: int) -> np.ndarray:
    """Compute fourier frequencies.

    Args:
        sr (int): Sample rate.
        n_fft (int): FFT size.

    Returns:
        np.ndarray: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`.
    """
    return np.linspace(0, float(sr) / 2, int(1 + n_fft // 2), endpoint=True)


def compute_fbank_matrix(sr: int,
                         n_fft: int,
                         n_mels: int=128,
                         fmin: float=0.0,
                         fmax: Optional[float]=None,
                         htk: bool=False,
                         norm: str="slaney",
                         dtype: type=np.float32) -> np.ndarray:
    """Compute fbank matrix.

    Args:
        sr (int): Sample rate.
        n_fft (int): FFT size.
        n_mels (int, optional): Number of mel bins. Defaults to 128.
        fmin (float, optional): Minimum frequency in Hz. Defaults to 0.0.
        fmax (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
        htk (bool, optional): Use htk scaling. Defaults to False.
        norm (str, optional): Type of normalization. Defaults to "slaney".
        dtype (type, optional): Data type. Defaults to np.float32.


    Returns:
        np.ndarray: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`.
    """
    if norm != "slaney":
        raise ParameterError('norm must set to slaney')

    if fmax is None:
        fmax = float(sr) / 2

    # Initialize the weights
    n_mels = int(n_mels)
    weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)

    # Center freqs of each FFT bin
    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)

    fdiff = np.diff(mel_f)
    ramps = np.subtract.outer(mel_f, fftfreqs)

    for i in range(n_mels):
        # lower and upper slopes for all bins
        lower = -ramps[i] / fdiff[i]
        upper = ramps[i + 2] / fdiff[i + 1]

        # .. then intersect them with each other and zero
        weights[i] = np.maximum(0, np.minimum(lower, upper))

    if norm == "slaney":
        # Slaney-style mel is scaled to be approx constant energy per channel
        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
        weights *= enorm[:, np.newaxis]

    # Only check weights if f_mel[0] is positive
    if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)):
        # This means we have an empty channel somewhere
        warnings.warn("Empty filters detected in mel frequency basis. "
                      "Some channels will produce empty responses. "
                      "Try increasing your sampling rate (and fmax) or "
                      "reducing n_mels.")

    return weights


def stft(x: np.ndarray,
         n_fft: int=2048,
         hop_length: Optional[int]=None,
         win_length: Optional[int]=None,
         window: str="hann",
         center: bool=True,
         dtype: type=np.complex64,
         pad_mode: str="reflect") -> np.ndarray:
    """Short-time Fourier transform (STFT).

    Args:
        x (np.ndarray): Input waveform in one dimension.
        n_fft (int, optional): FFT size. Defaults to 2048.
        hop_length (Optional[int], optional): Number of steps to advance between adjacent windows. Defaults to None.
        win_length (Optional[int], optional): The size of window. Defaults to None.
        window (str, optional): A string of window specification. Defaults to "hann".
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        dtype (type, optional): Data type of STFT results. Defaults to np.complex64.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".

    Returns:
        np.ndarray: The complex STFT output with shape `(n_fft//2 + 1, num_frames)`.
    """
    _check_audio(x)

    # By default, use the entire frame
    if win_length is None:
        win_length = n_fft

    # Set the default hop, if it's not already specified
    if hop_length is None:
        hop_length = int(win_length // 4)

    fft_window = signal.get_window(window, win_length, fftbins=True)

    # Pad the window out to n_fft size
    fft_window = _pad_center(fft_window, n_fft)

    # Reshape so that the window can be broadcast
    fft_window = fft_window.reshape((-1, 1))

    # Pad the time series so that frames are centered
    if center:
        if n_fft > x.shape[-1]:
            warnings.warn(
                f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}"
            )
        x = np.pad(x, int(n_fft // 2), mode=pad_mode)

    elif n_fft > x.shape[-1]:
        raise ParameterError(
            f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}"
        )

    # Window the time series.
    x_frames = _split_frames(x, frame_length=n_fft, hop_length=hop_length)
    # Pre-allocate the STFT matrix
    stft_matrix = np.empty(
        (int(1 + n_fft // 2), x_frames.shape[1]), dtype=dtype, order="F")
    fft = np.fft  # use numpy fft as default
    # Constrain STFT block sizes to 256 KB
    MAX_MEM_BLOCK = 2**8 * 2**10
    # how many columns can we fit within MAX_MEM_BLOCK?
    n_columns = MAX_MEM_BLOCK // (stft_matrix.shape[0] * stft_matrix.itemsize)
    n_columns = max(n_columns, 1)

    for bl_s in range(0, stft_matrix.shape[1], n_columns):
        bl_t = min(bl_s + n_columns, stft_matrix.shape[1])
        stft_matrix[:, bl_s:bl_t] = fft.rfft(
            fft_window * x_frames[:, bl_s:bl_t], axis=0)

    return stft_matrix


def power_to_db(spect: np.ndarray,
                ref: float=1.0,
                amin: float=1e-10,
                top_db: Optional[float]=80.0) -> np.ndarray:
    """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way.

    Args:
        spect (np.ndarray): STFT power spectrogram of an input waveform.
        ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
        amin (float, optional): Minimum threshold. Defaults to 1e-10.
        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to 80.0.

    Returns:
        np.ndarray: Power spectrogram in db scale.
    """
    spect = np.asarray(spect)

    if amin <= 0:
        raise ParameterError("amin must be strictly positive")

    if np.issubdtype(spect.dtype, np.complexfloating):
        warnings.warn(
            "power_to_db was called on complex input so phase "
            "information will be discarded. To suppress this warning, "
            "call power_to_db(np.abs(D)**2) instead.")
        magnitude = np.abs(spect)
    else:
        magnitude = spect

    if callable(ref):
        # User supplied a function to calculate reference power
        ref_value = ref(magnitude)
    else:
        ref_value = np.abs(ref)

    log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))
    log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value))

    if top_db is not None:
        if top_db < 0:
            raise ParameterError("top_db must be non-negative")
        log_spec = np.maximum(log_spec, log_spec.max() - top_db)

    return log_spec


def mfcc(x: np.ndarray,
         sr: int=16000,
         spect: Optional[np.ndarray]=None,
         n_mfcc: int=20,
         dct_type: int=2,
         norm: str="ortho",
         lifter: int=0,
         **kwargs) -> np.ndarray:
    """Mel-frequency cepstral coefficients (MFCCs)

    Args:
        x (np.ndarray): Input waveform in one dimension.
        sr (int, optional): Sample rate. Defaults to 16000.
        spect (Optional[np.ndarray], optional): Input log-power Mel spectrogram. Defaults to None.
        n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 20.
        dct_type (int, optional): Discrete cosine transform (DCT) type. Defaults to 2.
        norm (str, optional): Type of normalization. Defaults to "ortho".
        lifter (int, optional): Cepstral filtering. Defaults to 0.

    Returns:
        np.ndarray: Mel frequency cepstral coefficients array with shape `(n_mfcc, num_frames)`.
    """
    if spect is None:
        spect = melspectrogram(x, sr=sr, **kwargs)

    M = scipy.fftpack.dct(spect, axis=0, type=dct_type, norm=norm)[:n_mfcc]

    if lifter > 0:
        factor = np.sin(np.pi * np.arange(1, 1 + n_mfcc, dtype=M.dtype) /
                        lifter)
        return M * factor[:, np.newaxis]
    elif lifter == 0:
        return M
    else:
        raise ParameterError(
            f"MFCC lifter={lifter} must be a non-negative number")


def melspectrogram(x: np.ndarray,
                   sr: int=16000,
                   window_size: int=512,
                   hop_length: int=320,
                   n_mels: int=64,
                   fmin: float=50.0,
                   fmax: Optional[float]=None,
                   window: str='hann',
                   center: bool=True,
                   pad_mode: str='reflect',
                   power: float=2.0,
                   to_db: bool=True,
                   ref: float=1.0,
                   amin: float=1e-10,
                   top_db: Optional[float]=None) -> np.ndarray:
    """Compute mel-spectrogram.

    Args:
        x (np.ndarray): Input waveform in one dimension.
        sr (int, optional): Sample rate. Defaults to 16000.
        window_size (int, optional): Size of FFT and window length. Defaults to 512.
        hop_length (int, optional): Number of steps to advance between adjacent windows. Defaults to 320.
        n_mels (int, optional): Number of mel bins. Defaults to 64.
        fmin (float, optional): Minimum frequency in Hz. Defaults to 50.0.
        fmax (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
        window (str, optional): A string of window specification. Defaults to "hann".
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".
        power (float, optional): Exponent for the magnitude melspectrogram. Defaults to 2.0.
        to_db (bool, optional): Enable db scale. Defaults to True.
        ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
        amin (float, optional): Minimum threshold. Defaults to 1e-10.
        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None.

    Returns:
        np.ndarray: The mel-spectrogram in power scale or db scale with shape `(n_mels, num_frames)`.
    """
    _check_audio(x, mono=True)
    if len(x) <= 0:
        raise ParameterError('The input waveform is empty')

    if fmax is None:
        fmax = sr // 2
    if fmin < 0 or fmin >= fmax:
        raise ParameterError('fmin and fmax must satisfy 0<fmin<fmax')

    s = stft(
        x,
        n_fft=window_size,
        hop_length=hop_length,
        win_length=window_size,
        window=window,
        center=center,
        pad_mode=pad_mode)

    spect_power = np.abs(s)**power
    fb_matrix = compute_fbank_matrix(
        sr=sr, n_fft=window_size, n_mels=n_mels, fmin=fmin, fmax=fmax)
    mel_spect = np.matmul(fb_matrix, spect_power)
    if to_db:
        return power_to_db(mel_spect, ref=ref, amin=amin, top_db=top_db)
    else:
        return mel_spect


def spectrogram(x: np.ndarray,
                sr: int=16000,
                window_size: int=512,
                hop_length: int=320,
                window: str='hann',
                center: bool=True,
                pad_mode: str='reflect',
                power: float=2.0) -> np.ndarray:
    """Compute spectrogram.

    Args:
        x (np.ndarray): Input waveform in one dimension.
        sr (int, optional): Sample rate. Defaults to 16000.
        window_size (int, optional): Size of FFT and window length. Defaults to 512.
        hop_length (int, optional): Number of steps to advance between adjacent windows. Defaults to 320.
        window (str, optional): A string of window specification. Defaults to "hann".
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".
        power (float, optional): Exponent for the magnitude melspectrogram. Defaults to 2.0.

    Returns:
        np.ndarray: The STFT spectrogram in power scale `(n_fft//2 + 1, num_frames)`.
    """

    s = stft(
        x,
        n_fft=window_size,
        hop_length=hop_length,
        win_length=window_size,
        window=window,
        center=center,
        pad_mode=pad_mode)

    return np.abs(s)**power


def mu_encode(x: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray:
    """Mu-law encoding. Encode waveform based on mu-law companding. When quantized is True, the result will be converted to integer in range `[0,mu-1]`. Otherwise, the resulting waveform is in range `[-1,1]`.

    Args:
        x (np.ndarray): The input waveform to encode.
        mu (int, optional): The endoceding parameter. Defaults to 255.
        quantized (bool, optional): If `True`, quantize the encoded values into `1 + mu` distinct integer values. Defaults to True.

    Returns:
        np.ndarray: The mu-law encoded waveform.
    """
    mu = 255
    y = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu)
    if quantized:
        y = np.floor((y + 1) / 2 * mu + 0.5)  # convert to [0 , mu-1]
    return y


def mu_decode(y: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray:
    """Mu-law decoding. Compute the mu-law decoding given an input code. It assumes that the input `y` is in range `[0,mu-1]` when quantize is True and `[-1,1]` otherwise.

    Args:
        y (np.ndarray): The encoded waveform.
        mu (int, optional): The endoceding parameter. Defaults to 255.
        quantized (bool, optional): If `True`, the input is assumed to be quantized to `1 + mu` distinct integer values. Defaults to True.

    Returns:
        np.ndarray: The mu-law decoded waveform.
    """
    if mu < 1:
        raise ParameterError('mu is typically set as 2**k-1, k=1, 2, 3,...')

    mu = mu - 1
    if quantized:  # undo the quantization
        y = y * 2 / mu - 1
    x = np.sign(y) / mu * ((1 + mu)**np.abs(y) - 1)
    return x


def _randint(high: int) -> int:
    """Generate one random integer in range [0 high)

     This is a helper function for random data augmentation
    """
    return int(np.random.randint(0, high=high))


def depth_augment(y: np.ndarray,
                  choices: List=['int8', 'int16'],
                  probs: List[float]=[0.5, 0.5]) -> np.ndarray:
    """ Audio depth augmentation. Do audio depth augmentation to simulate the distortion brought by quantization.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        choices (List, optional): A list of data type to depth conversion. Defaults to ['int8', 'int16'].
        probs (List[float], optional): Probabilities to depth conversion. Defaults to [0.5, 0.5].

    Returns:
        np.ndarray: The augmented waveform.
    """
    assert len(probs) == len(
        choices
    ), 'number of choices {} must be equal to size of probs {}'.format(
        len(choices), len(probs))
    depth = np.random.choice(choices, p=probs)
    src_depth = y.dtype
    y1 = depth_convert(y, depth)
    y2 = depth_convert(y1, src_depth)

    return y2


def adaptive_spect_augment(spect: np.ndarray,
                           tempo_axis: int=0,
                           level: float=0.1) -> np.ndarray:
    """Do adaptive spectrogram augmentation. The level of the augmentation is govern by the parameter level, ranging from 0 to 1, with 0 represents no augmentation.

    Args:
        spect (np.ndarray): Input spectrogram.
        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
        level (float, optional): The level factor of masking. Defaults to 0.1.

    Returns:
        np.ndarray: The augmented spectrogram.
    """
    assert spect.ndim == 2., 'only supports 2d tensor or numpy array'
    if tempo_axis == 0:
        nt, nf = spect.shape
    else:
        nf, nt = spect.shape

    time_mask_width = int(nt * level * 0.5)
    freq_mask_width = int(nf * level * 0.5)

    num_time_mask = int(10 * level)
    num_freq_mask = int(10 * level)

    if tempo_axis == 0:
        for _ in range(num_time_mask):
            start = _randint(nt - time_mask_width)
            spect[start:start + time_mask_width, :] = 0
        for _ in range(num_freq_mask):
            start = _randint(nf - freq_mask_width)
            spect[:, start:start + freq_mask_width] = 0
    else:
        for _ in range(num_time_mask):
            start = _randint(nt - time_mask_width)
            spect[:, start:start + time_mask_width] = 0
        for _ in range(num_freq_mask):
            start = _randint(nf - freq_mask_width)
            spect[start:start + freq_mask_width, :] = 0

    return spect


def spect_augment(spect: np.ndarray,
                  tempo_axis: int=0,
                  max_time_mask: int=3,
                  max_freq_mask: int=3,
                  max_time_mask_width: int=30,
                  max_freq_mask_width: int=20) -> np.ndarray:
    """Do spectrogram augmentation in both time and freq axis.

    Args:
        spect (np.ndarray): Input spectrogram.
        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
        max_time_mask (int, optional): Maximum number of time masking. Defaults to 3.
        max_freq_mask (int, optional): Maximum number of frequency masking. Defaults to 3.
        max_time_mask_width (int, optional): Maximum width of time masking. Defaults to 30.
        max_freq_mask_width (int, optional): Maximum width of frequency masking. Defaults to 20.

    Returns:
        np.ndarray: The augmented spectrogram.
    """
    assert spect.ndim == 2., 'only supports 2d tensor or numpy array'
    if tempo_axis == 0:
        nt, nf = spect.shape
    else:
        nf, nt = spect.shape

    num_time_mask = _randint(max_time_mask)
    num_freq_mask = _randint(max_freq_mask)

    time_mask_width = _randint(max_time_mask_width)
    freq_mask_width = _randint(max_freq_mask_width)

    if tempo_axis == 0:
        for _ in range(num_time_mask):
            start = _randint(nt - time_mask_width)
            spect[start:start + time_mask_width, :] = 0
        for _ in range(num_freq_mask):
            start = _randint(nf - freq_mask_width)
            spect[:, start:start + freq_mask_width] = 0
    else:
        for _ in range(num_time_mask):
            start = _randint(nt - time_mask_width)
            spect[:, start:start + time_mask_width] = 0
        for _ in range(num_freq_mask):
            start = _randint(nf - freq_mask_width)
            spect[start:start + freq_mask_width, :] = 0

    return spect


def random_crop1d(y: np.ndarray, crop_len: int) -> np.ndarray:
    """ Random cropping on a input waveform.

    Args:
        y (np.ndarray): Input waveform array in 1D.
        crop_len (int): Length of waveform to crop.

    Returns:
        np.ndarray: The cropped waveform.
    """
    if y.ndim != 1:
        'only accept 1d tensor or numpy array'
    n = len(y)
    idx = _randint(n - crop_len)
    return y[idx:idx + crop_len]


def random_crop2d(s: np.ndarray, crop_len: int,
                  tempo_axis: int=0) -> np.ndarray:
    """ Random cropping on a spectrogram.

    Args:
        s (np.ndarray): Input spectrogram in 2D.
        crop_len (int): Length of spectrogram to crop.
        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.

    Returns:
        np.ndarray: The cropped spectrogram.
    """
    if tempo_axis >= s.ndim:
        raise ParameterError('axis out of range')

    n = s.shape[tempo_axis]
    idx = _randint(high=n - crop_len)
    sli = [slice(None) for i in range(s.ndim)]
    sli[tempo_axis] = slice(idx, idx + crop_len)
    out = s[tuple(sli)]
    return out


================================================
FILE: audio/paddleaudio/datasets/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .esc50 import ESC50
from .gtzan import GTZAN
from .hey_snips import HeySnips
from .rirs_noises import OpenRIRNoise
from .tess import TESS
from .urban_sound import UrbanSound8K
from .voxceleb import VoxCeleb


================================================
FILE: audio/paddleaudio/datasets/dataset.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List

import numpy as np
import paddle

from ..backends.soundfile_backend import soundfile_load as load_audio
from ..compliance.kaldi import fbank as kaldi_fbank
from ..compliance.kaldi import mfcc as kaldi_mfcc
from ..compliance.librosa import melspectrogram
from ..compliance.librosa import mfcc

feat_funcs = {
    'raw': None,
    'melspectrogram': melspectrogram,
    'mfcc': mfcc,
    'kaldi_fbank': kaldi_fbank,
    'kaldi_mfcc': kaldi_mfcc,
}


class AudioClassificationDataset(paddle.io.Dataset):
    """
    Base class of audio classification dataset.
    """

    def __init__(self,
                 files: List[str],
                 labels: List[int],
                 feat_type: str='raw',
                 sample_rate: int=None,
                 **kwargs):
        """
        Args:
            files (:obj:`List[str]`): A list of absolute path of audio files.
            labels (:obj:`List[int]`): Labels of audio files.
            feat_type (:obj:`str`, `optional`, defaults to `raw`):
                It identifies the feature type that user wants to extract of an audio file.
        """
        super(AudioClassificationDataset, self).__init__()

        if feat_type not in feat_funcs.keys():
            raise RuntimeError(
                f"Unknown feat_type: {feat_type}, it must be one in {list(feat_funcs.keys())}"
            )

        self.files = files
        self.labels = labels

        self.feat_type = feat_type
        self.sample_rate = sample_rate
        self.feat_config = kwargs  # Pass keyword arguments to customize feature config

    def _get_data(self, input_file: str):
        raise NotImplementedError

    def _convert_to_record(self, idx):
        file, label = self.files[idx], self.labels[idx]

        if self.sample_rate is None:
            waveform, sample_rate = load_audio(file)
        else:
            waveform, sample_rate = load_audio(file, sr=self.sample_rate)

        feat_func = feat_funcs[self.feat_type]

        record = {}
        if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']:
            waveform = paddle.to_tensor(waveform).unsqueeze(0)  # (C, T)
            record['feat'] = feat_func(
                waveform=waveform, sr=self.sample_rate, **self.feat_config)
        else:
            record['feat'] = feat_func(
                waveform, sample_rate,
                **self.feat_config) if feat_func else waveform
        record['label'] = label
        return record

    def __getitem__(self, idx):
        record = self._convert_to_record(idx)
        if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']:
            return self.keys[idx], record['feat'], record['label']
        else:
            return np.array(record['feat']).transpose(), np.array(
                record['label'], dtype=np.int64)

    def __len__(self):
        return len(self.files)


================================================
FILE: audio/paddleaudio/datasets/esc50.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import os
from typing import List
from typing import Tuple

from ..utils.download import download_and_decompress
from ..utils.env import DATA_HOME
from .dataset import AudioClassificationDataset

__all__ = ['ESC50']


class ESC50(AudioClassificationDataset):
    """
    The ESC-50 dataset is a labeled collection of 2000 environmental audio recordings
    suitable for benchmarking methods of environmental sound classification. The dataset
    consists of 5-second-long recordings organized into 50 semantical classes (with
    40 examples per class)

    Reference:
        ESC: Dataset for Environmental Sound Classification
        http://dx.doi.org/10.1145/2733373.2806390
    """

    archives = [
        {
            'url':
            'https://paddleaudio.bj.bcebos.com/datasets/ESC-50-master.zip',
            'md5': '7771e4b9d86d0945acce719c7a59305a',
        },
    ]
    label_list = [
        # Animals
        'Dog',
        'Rooster',
        'Pig',
        'Cow',
        'Frog',
        'Cat',
        'Hen',
        'Insects (flying)',
        'Sheep',
        'Crow',
        # Natural soundscapes & water sounds
        'Rain',
        'Sea waves',
        'Crackling fire',
        'Crickets',
        'Chirping birds',
        'Water drops',
        'Wind',
        'Pouring water',
        'Toilet flush',
        'Thunderstorm',
        # Human, non-speech sounds
        'Crying baby',
        'Sneezing',
        'Clapping',
        'Breathing',
        'Coughing',
        'Footsteps',
        'Laughing',
        'Brushing teeth',
        'Snoring',
        'Drinking, sipping',
        # Interior/domestic sounds
        'Door knock',
        'Mouse click',
        'Keyboard typing',
        'Door, wood creaks',
        'Can opening',
        'Washing machine',
        'Vacuum cleaner',
        'Clock alarm',
        'Clock tick',
        'Glass breaking',
        # Exterior/urban noises
        'Helicopter',
        'Chainsaw',
        'Siren',
        'Car horn',
        'Engine',
        'Train',
        'Church bells',
        'Airplane',
        'Fireworks',
        'Hand saw',
    ]
    meta = os.path.join('ESC-50-master', 'meta', 'esc50.csv')
    meta_info = collections.namedtuple(
        'META_INFO',
        ('filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take'))
    audio_path = os.path.join('ESC-50-master', 'audio')

    def __init__(self,
                 mode: str='train',
                 split: int=1,
                 feat_type: str='raw',
                 **kwargs):
        """
        Args:
            mode (:obj:`str`, `optional`, defaults to `train`):
                It identifies the dataset mode (train or dev).
            split (:obj:`int`, `optional`, defaults to 1):
                It specify the fold of dev dataset.
            feat_type (:obj:`str`, `optional`, defaults to `raw`):
                It identifies the feature type that user wants to extract of an audio file.
        """
        files, labels = self._get_data(mode, split)
        super(ESC50, self).__init__(
            files=files, labels=labels, feat_type=feat_type, **kwargs)

    def _get_meta_info(self) -> List[collections.namedtuple]:
        ret = []
        with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
            for line in rf.readlines()[1:]:
                ret.append(self.meta_info(*line.strip().split(',')))
        return ret

    def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
            download_and_decompress(self.archives, DATA_HOME)

        meta_info = self._get_meta_info()

        files = []
        labels = []
        for sample in meta_info:
            filename, fold, target, _, _, _, _ = sample
            if mode == 'train' and int(fold) != split:
                files.append(os.path.join(DATA_HOME, self.audio_path, filename))
                labels.append(int(target))

            if mode != 'train' and int(fold) == split:
                files.append(os.path.join(DATA_HOME, self.audio_path, filename))
                labels.append(int(target))

        return files, labels


================================================
FILE: audio/paddleaudio/datasets/gtzan.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import os
import random
from typing import List
from typing import Tuple

from ..utils.download import download_and_decompress
from ..utils.env import DATA_HOME
from .dataset import AudioClassificationDataset

__all__ = ['GTZAN']


class GTZAN(AudioClassificationDataset):
    """
    The GTZAN dataset consists of 1000 audio tracks each 30 seconds long. It contains 10 genres,
    each represented by 100 tracks. The dataset is the most-used public dataset for evaluation
    in machine listening research for music genre recognition (MGR).

    Reference:
        Musical genre classification of audio signals
        https://ieeexplore.ieee.org/document/1021072/
    """

    archives = [
        {
            'url': 'http://opihi.cs.uvic.ca/sound/genres.tar.gz',
            'md5': '5b3d6dddb579ab49814ab86dba69e7c7',
        },
    ]
    label_list = [
        'blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal',
        'pop', 'reggae', 'rock'
    ]
    meta = os.path.join('genres', 'input.mf')
    meta_info = collections.namedtuple('META_INFO', ('file_path', 'label'))
    audio_path = 'genres'

    def __init__(self,
                 mode='train',
                 seed=0,
                 n_folds=5,
                 split=1,
                 feat_type='raw',
                 **kwargs):
        """
        Args:
            mode (:obj:`str`, `optional`, defaults to `train`):
                It identifies the dataset mode (train or dev).
            seed (:obj:`int`, `optional`, defaults to 0):
                Set the random seed to shuffle samples.
            n_folds (:obj:`int`, `optional`, defaults to 5):
                Split the dataset into n folds. 1 fold for dev dataset and n-1 for train dataset.
            split (:obj:`int`, `optional`, defaults to 1):
                It specify the fold of dev dataset.
            feat_type (:obj:`str`, `optional`, defaults to `raw`):
                It identifies the feature type that user wants to extract of an audio file.
        """
        assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}'
        files, labels = self._get_data(mode, seed, n_folds, split)
        super(GTZAN, self).__init__(
            files=files, labels=labels, feat_type=feat_type, **kwargs)

    def _get_meta_info(self) -> List[collections.namedtuple]:
        ret = []
        with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
            for line in rf.readlines():
                ret.append(self.meta_info(*line.strip().split('\t')))
        return ret

    def _get_data(self, mode, seed, n_folds,
                  split) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
            download_and_decompress(self.archives, DATA_HOME)

        meta_info = self._get_meta_info()
        random.seed(seed)  # shuffle samples to split data
        random.shuffle(
            meta_info
        )  # make sure using the same seed to create train and dev dataset

        files = []
        labels = []
        n_samples_per_fold = len(meta_info) // n_folds
        for idx, sample in enumerate(meta_info):
            file_path, label = sample
            filename = os.path.basename(file_path)
            target = self.label_list.index(label)
            fold = idx // n_samples_per_fold + 1

            if mode == 'train' and int(fold) != split:
                files.append(
                    os.path.join(DATA_HOME, self.audio_path, label, filename))
                labels.append(target)

            if mode != 'train' and int(fold) == split:
                files.append(
                    os.path.join(DATA_HOME, self.audio_path, label, filename))
                labels.append(target)

        return files, labels


================================================
FILE: audio/paddleaudio/datasets/hey_snips.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import json
import os
from typing import List
from typing import Tuple

from .dataset import AudioClassificationDataset

__all__ = ['HeySnips']


class HeySnips(AudioClassificationDataset):
    meta_info = collections.namedtuple('META_INFO',
                                       ('key', 'label', 'duration', 'wav'))

    def __init__(self,
                 data_dir: os.PathLike,
                 mode: str='train',
                 feat_type: str='kaldi_fbank',
                 sample_rate: int=16000,
                 **kwargs):
        self.data_dir = data_dir
        files, labels = self._get_data(mode)
        super(HeySnips, self).__init__(
            files=files,
            labels=labels,
            feat_type=feat_type,
            sample_rate=sample_rate,
            **kwargs)

    def _get_meta_info(self, mode) -> List[collections.namedtuple]:
        ret = []
        with open(os.path.join(self.data_dir, '{}.json'.format(mode)),
                  'r') as f:
            data = json.load(f)
            for item in data:
                sample = collections.OrderedDict()
                if item['duration'] > 0:
                    sample['key'] = item['id']
                    sample['label'] = 0 if item['is_hotword'] == 1 else -1
                    sample['duration'] = item['duration']
                    sample['wav'] = os.path.join(self.data_dir,
                                                 item['audio_file_path'])
                    ret.append(self.meta_info(*sample.values()))
        return ret

    def _get_data(self, mode: str) -> Tuple[List[str], List[int]]:
        meta_info = self._get_meta_info(mode)

        files = []
        labels = []
        self.keys = []
        self.durations = []
        for sample in meta_info:
            key, target, duration, wav = sample
            files.append(wav)
            labels.append(int(target))
            self.keys.append(key)
            self.durations.append(float(duration))

        return files, labels


================================================
FILE: audio/paddleaudio/datasets/rirs_noises.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import csv
import os
import random
from typing import List

from paddle.io import Dataset
from tqdm import tqdm

from ..backends.soundfile_backend import soundfile_load as load_audio
from ..backends.soundfile_backend import soundfile_save as save_wav
from ..utils import DATA_HOME
from ..utils.download import download_and_decompress
from .dataset import feat_funcs

__all__ = ['OpenRIRNoise']


class OpenRIRNoise(Dataset):
    archives = [
        {
            'url': 'http://www.openslr.org/resources/28/rirs_noises.zip',
            'md5': 'e6f48e257286e05de56413b4779d8ffb',
        },
    ]

    sample_rate = 16000
    meta_info = collections.namedtuple('META_INFO', ('id', 'duration', 'wav'))
    base_path = os.path.join(DATA_HOME, 'open_rir_noise')
    wav_path = os.path.join(base_path, 'RIRS_NOISES')
    csv_path = os.path.join(base_path, 'csv')
    subsets = ['rir', 'noise']

    def __init__(self,
                 subset: str='rir',
                 feat_type: str='raw',
                 target_dir=None,
                 random_chunk: bool=True,
                 chunk_duration: float=3.0,
                 seed: int=0,
                 **kwargs):

        assert subset in self.subsets, \
            'Dataset subset must be one in {}, but got {}'.format(self.subsets, subset)

        self.subset = subset
        self.feat_type = feat_type
        self.feat_config = kwargs
        self.random_chunk = random_chunk
        self.chunk_duration = chunk_duration

        OpenRIRNoise.csv_path = os.path.join(
            target_dir, "open_rir_noise",
            "csv") if target_dir else self.csv_path
        self._data = self._get_data()
        super(OpenRIRNoise, self).__init__()

        # Set up a seed to reproduce training or predicting result.
        # random.seed(seed)

    def _get_data(self):
        # Download audio files.
        print(f"rirs noises base path: {self.base_path}")
        if not os.path.isdir(self.base_path):
            download_and_decompress(
                self.archives, self.base_path, decompress=True)
        else:
            print(
                f"{self.base_path} already exists, we will not download and decompress again"
            )

        # Data preparation.
        print(f"prepare the csv to {self.csv_path}")
        if not os.path.isdir(self.csv_path):
            os.makedirs(self.csv_path)
            self.prepare_data()

        data = []
        with open(os.path.join(self.csv_path, f'{self.subset}.csv'), 'r') as rf:
            for line in rf.readlines()[1:]:
                audio_id, duration, wav = line.strip().split(',')
                data.append(self.meta_info(audio_id, float(duration), wav))

        random.shuffle(data)
        return data

    def _convert_to_record(self, idx: int):
        sample = self._data[idx]

        record = {}
        # To show all fields in a namedtuple: `type(sample)._fields`
        for field in type(sample)._fields:
            record[field] = getattr(sample, field)

        waveform, sr = load_audio(record['wav'])

        assert self.feat_type in feat_funcs.keys(), \
            f"Unknown feat_type: {self.feat_type}, it must be one in {list(feat_funcs.keys())}"
        feat_func = feat_funcs[self.feat_type]
        feat = feat_func(
            waveform, sr=sr, **self.feat_config) if feat_func else waveform

        record.update({'feat': feat})
        return record

    @staticmethod
    def _get_chunks(seg_dur, audio_id, audio_duration):
        num_chunks = int(audio_duration / seg_dur)  # all in milliseconds

        chunk_lst = [
            audio_id + "_" + str(i * seg_dur) + "_" + str(i * seg_dur + seg_dur)
            for i in range(num_chunks)
        ]
        return chunk_lst

    def _get_audio_info(self, wav_file: str,
                        split_chunks: bool) -> List[List[str]]:
        waveform, sr = load_audio(wav_file)
        audio_id = wav_file.split("/open_rir_noise/")[-1].split(".")[0]
        audio_duration = waveform.shape[0] / sr

        ret = []
        if split_chunks and audio_duration > self.chunk_duration:  # Split into pieces of self.chunk_duration seconds.
            uniq_chunks_list = self._get_chunks(self.chunk_duration, audio_id,
                                                audio_duration)

            for idx, chunk in enumerate(uniq_chunks_list):
                s, e = chunk.split("_")[-2:]  # Timestamps of start and end
                start_sample = int(float(s) * sr)
                end_sample = int(float(e) * sr)
                new_wav_file = os.path.join(self.base_path,
                                            audio_id + f'_chunk_{idx+1:02}.wav')
                save_wav(waveform[start_sample:end_sample], sr, new_wav_file)
                # id, duration, new_wav
                ret.append([chunk, self.chunk_duration, new_wav_file])
        else:  # Keep whole audio.
            ret.append([audio_id, audio_duration, wav_file])
        return ret

    def generate_csv(self,
                     wav_files: List[str],
                     output_file: str,
                     split_chunks: bool=True):
        print(f'Generating csv: {output_file}')
        header = ["id", "duration", "wav"]

        infos = list(
            tqdm(
                map(self._get_audio_info, wav_files, [split_chunks] * len(
                    wav_files)),
                total=len(wav_files)))

        csv_lines = []
        for info in infos:
            csv_lines.extend(info)

        with open(output_file, mode="w") as csv_f:
            csv_writer = csv.writer(
                csv_f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_writer.writerow(header)
            for line in csv_lines:
                csv_writer.writerow(line)

    def prepare_data(self):
        rir_list = os.path.join(self.wav_path, "real_rirs_isotropic_noises",
                                "rir_list")
        rir_files = []
        with open(rir_list, 'r') as f:
            for line in f.readlines():
                rir_file = line.strip().split(' ')[-1]
                rir_files.append(os.path.join(self.base_path, rir_file))

        noise_list = os.path.join(self.wav_path, "pointsource_noises",
                                  "noise_list")
        noise_files = []
        with open(noise_list, 'r') as f:
            for line in f.readlines():
                noise_file = line.strip().split(' ')[-1]
                noise_files.append(os.path.join(self.base_path, noise_file))

        self.generate_csv(rir_files, os.path.join(self.csv_path, 'rir.csv'))
        self.generate_csv(noise_files, os.path.join(self.csv_path, 'noise.csv'))

    def __getitem__(self, idx):
        return self._convert_to_record(idx)

    def __len__(self):
        return len(self._data)


================================================
FILE: audio/paddleaudio/datasets/tess.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import os
import random
from typing import List
from typing import Tuple

from ..utils.download import download_and_decompress
from ..utils.env import DATA_HOME
from .dataset import AudioClassificationDataset

__all__ = ['TESS']


class TESS(AudioClassificationDataset):
    """
    TESS is a set of 200 target words were spoken in the carrier phrase
    "Say the word _____' by two actresses (aged 26 and 64 years) and
    recordings were made of the set portraying each of seven emotions(anger,
    disgust, fear, happiness, pleasant surprise, sadness, and neutral).
    There are 2800 stimuli in total.

    Reference:
        Toronto emotional speech set (TESS)
        https://doi.org/10.5683/SP2/E8H2MF
    """

    archives = [
        {
            'url':
            'https://bj.bcebos.com/paddleaudio/datasets/TESS_Toronto_emotional_speech_set.zip',
            'md5':
            '1465311b24d1de704c4c63e4ccc470c7',
        },
    ]
    label_list = [
        'angry',
        'disgust',
        'fear',
        'happy',
        'neutral',
        'ps',  # pleasant surprise
        'sad',
    ]
    meta_info = collections.namedtuple('META_INFO',
                                       ('speaker', 'word', 'emotion'))
    audio_path = 'TESS_Toronto_emotional_speech_set'

    def __init__(self,
                 mode='train',
                 seed=0,
                 n_folds=5,
                 split=1,
                 feat_type='raw',
                 **kwargs):
        """
        Args:
            mode (:obj:`str`, `optional`, defaults to `train`):
                It identifies the dataset mode (train or dev).
            seed (:obj:`int`, `optional`, defaults to 0):
                Set the random seed to shuffle samples.
            n_folds (:obj:`int`, `optional`, defaults to 5):
                Split the dataset into n folds. 1 fold for dev dataset and n-1 for train dataset.
            split (:obj:`int`, `optional`, defaults to 1):
                It specify the fold of dev dataset.
            feat_type (:obj:`str`, `optional`, defaults to `raw`):
                It identifies the feature type that user wants to extract of an audio file.
        """
        assert split <= n_folds, f'The selected split should not be larger than n_fold, but got {split} > {n_folds}'
        files, labels = self._get_data(mode, seed, n_folds, split)
        super(TESS, self).__init__(
            files=files, labels=labels, feat_type=feat_type, **kwargs)

    def _get_meta_info(self, files) -> List[collections.namedtuple]:
        ret = []
        for file in files:
            basename_without_extend = os.path.basename(file)[:-4]
            ret.append(self.meta_info(*basename_without_extend.split('_')))
        return ret

    def _get_data(self, mode, seed, n_folds,
                  split) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)):
            download_and_decompress(self.archives, DATA_HOME)

        wav_files = []
        for root, _, files in os.walk(os.path.join(DATA_HOME, self.audio_path)):
            for file in files:
                if file.endswith('.wav'):
                    wav_files.append(os.path.join(root, file))

        random.seed(seed)  # shuffle samples to split data
        random.shuffle(
            wav_files
        )  # make sure using the same seed to create train and dev dataset
        meta_info = self._get_meta_info(wav_files)

        files = []
        labels = []
        n_samples_per_fold = len(meta_info) // n_folds
        for idx, sample in enumerate(meta_info):
            _, _, emotion = sample
            target = self.label_list.index(emotion)
            fold = idx // n_samples_per_fold + 1

            if mode == 'train' and int(fold) != split:
                files.append(wav_files[idx])
                labels.append(target)

            if mode != 'train' and int(fold) == split:
                files.append(wav_files[idx])
                labels.append(target)

        return files, labels


================================================
FILE: audio/paddleaudio/datasets/urban_sound.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import os
from typing import List
from typing import Tuple

from ..utils.download import download_and_decompress
from ..utils.env import DATA_HOME
from .dataset import AudioClassificationDataset

__all__ = ['UrbanSound8K']


class UrbanSound8K(AudioClassificationDataset):
    """
    UrbanSound8K dataset contains 8732 labeled sound excerpts (<=4s) of urban
    sounds from 10 classes: air_conditioner, car_horn, children_playing, dog_bark,
    drilling, enginge_idling, gun_shot, jackhammer, siren, and street_music. The
    classes are drawn from the urban sound taxonomy.

    Reference:
        A Dataset and Taxonomy for Urban Sound Research
        https://dl.acm.org/doi/10.1145/2647868.2655045
    """

    archives = [
        {
            'url':
            'https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz',
            'md5': '9aa69802bbf37fb986f71ec1483a196e',
        },
    ]
    label_list = [
        "air_conditioner", "car_horn", "children_playing", "dog_bark",
        "drilling", "engine_idling", "gun_shot", "jackhammer", "siren",
        "street_music"
    ]
    meta = os.path.join('UrbanSound8K', 'metadata', 'UrbanSound8K.csv')
    meta_info = collections.namedtuple(
        'META_INFO', ('filename', 'fsid', 'start', 'end', 'salience', 'fold',
                      'class_id', 'label'))
    audio_path = os.path.join('UrbanSound8K', 'audio')

    def __init__(self,
                 mode: str='train',
                 split: int=1,
                 feat_type: str='raw',
                 **kwargs):
        files, labels = self._get_data(mode, split)
        super(UrbanSound8K, self).__init__(
            files=files, labels=labels, feat_type=feat_type, **kwargs)
        """
        Args:
            mode (:obj:`str`, `optional`, defaults to `train`):
                It identifies the dataset mode (train or dev).
            split (:obj:`int`, `optional`, defaults to 1):
                It specify the fold of dev dataset.
            feat_type (:obj:`str`, `optional`, defaults to `raw`):
                It identifies the feature type that user wants to extract of an audio file.
        """

    def _get_meta_info(self):
        ret = []
        with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
            for line in rf.readlines()[1:]:
                ret.append(self.meta_info(*line.strip().split(',')))
        return ret

    def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
            download_and_decompress(self.archives, DATA_HOME)

        meta_info = self._get_meta_info()

        files = []
        labels = []
        for sample in meta_info:
            filename, _, _, _, _, fold, target, _ = sample
            if mode == 'train' and int(fold) != split:
                files.append(
                    os.path.join(DATA_HOME, self.audio_path, f'fold{fold}',
                                 filename))
                labels.append(int(target))

            if mode != 'train' and int(fold) == split:
                files.append(
                    os.path.join(DATA_HOME, self.audio_path, f'fold{fold}',
                                 filename))
                labels.append(int(target))

        return files, labels


================================================
FILE: audio/paddleaudio/datasets/voxceleb.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import csv
import glob
import os
import random
from multiprocessing import cpu_count
from typing import List

from paddle.io import Dataset
from pathos.multiprocessing import Pool
from tqdm import tqdm

from ..backends.soundfile_backend import soundfile_load as load_audio
from ..utils import DATA_HOME
from ..utils import decompress
from ..utils.download import download_and_decompress
from .dataset import feat_funcs

__all__ = ['VoxCeleb']


class VoxCeleb(Dataset):
    source_url = 'https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/'
    archives_audio_dev = [
        {
            'url': source_url + 'vox1_dev_wav_partaa',
            'md5': 'e395d020928bc15670b570a21695ed96',
        },
        {
            'url': source_url + 'vox1_dev_wav_partab',
            'md5': 'bbfaaccefab65d82b21903e81a8a8020',
        },
        {
            'url': source_url + 'vox1_dev_wav_partac',
            'md5': '017d579a2a96a077f40042ec33e51512',
        },
        {
            'url': source_url + 'vox1_dev_wav_partad',
            'md5': '7bb1e9f70fddc7a678fa998ea8b3ba19',
        },
    ]
    archives_audio_test = [
        {
            'url': source_url + 'vox1_test_wav.zip',
            'md5': '185fdc63c3c739954633d50379a3d102',
        },
    ]
    archives_meta = [
        {
            'url':
            'https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt',
            'md5':
            'b73110731c9223c1461fe49cb48dddfc',
        },
    ]

    num_speakers = 1211  # 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41
    sample_rate = 16000
    meta_info = collections.namedtuple(
        'META_INFO', ('id', 'duration', 'wav', 'start', 'stop', 'spk_id'))
    base_path = os.path.join(DATA_HOME, 'vox1')
    wav_path = os.path.join(base_path, 'wav')
    meta_path = os.path.join(base_path, 'meta')
    veri_test_file = os.path.join(meta_path, 'veri_test2.txt')
    csv_path = os.path.join(base_path, 'csv')
    subsets = ['train', 'dev', 'enroll', 'test']

    def __init__(
            self,
            subset: str='train',
            feat_type: str='raw',
            random_chunk: bool=True,
            chunk_duration: float=3.0,  # seconds
            split_ratio: float=0.9,  # train split ratio
            seed: int=0,
            target_dir: str=None,
            vox2_base_path=None,
            **kwargs):
        """VoxCeleb data prepare and get the specific dataset audio info

        Args:
            subset (str, optional): dataset name, such as train, dev, enroll or test. Defaults to 'train'.
            feat_type (str, optional): feat type, such raw, melspectrogram(fbank) or mfcc . Defaults to 'raw'.
            random_chunk (bool, optional): random select a duration from audio. Defaults to True.
            chunk_duration (float, optional): chunk duration if random_chunk flag is set. Defaults to 3.0.
            target_dir (str, optional): data dir, audio info will be stored in this directory. Defaults to None.
            vox2_base_path (_type_, optional): vox2 directory. vox2 data must be converted from m4a to wav. Defaults to None.
        """
        assert subset in self.subsets, \
            'Dataset subset must be one in {}, but got {}'.format(self.subsets, subset)

        self.subset = subset
        self.spk_id2label = {}
        self.feat_type = feat_type
        self.feat_config = kwargs
        self.random_chunk = random_chunk
        self.chunk_duration = chunk_duration
        self.split_ratio = split_ratio
        self.target_dir = target_dir if target_dir else VoxCeleb.base_path
        self.vox2_base_path = vox2_base_path

        # if we set the target dir, we will change the vox data info data from base path to target dir
        VoxCeleb.csv_path = os.path.join(
            target_dir, "voxceleb", 'csv') if target_dir else VoxCeleb.csv_path
        VoxCeleb.meta_path = os.path.join(
            target_dir, "voxceleb",
            'meta') if target_dir else VoxCeleb.meta_path
        VoxCeleb.veri_test_file = os.path.join(VoxCeleb.meta_path,
                                               'veri_test2.txt')
        # self._data = self._get_data()[:1000]  # KP: Small dataset test.
        self._data = self._get_data()
        super(VoxCeleb, self).__init__()

        # Set up a seed to reproduce training or predicting result.
        # random.seed(seed)

    def _get_data(self):
        # Download audio files.
        # We need the users to decompress all vox1/dev/wav and vox1/test/wav/ to vox1/wav/ dir
        # so, we check the vox1/wav dir status
        print(f"wav base path: {self.wav_path}")
        if not os.path.isdir(self.wav_path):
            print("start to download the voxceleb1 dataset")
            download_and_decompress(  # multi-zip parts concatenate to vox1_dev_wav.zip
                self.archives_audio_dev,
                self.base_path,
                decompress=False)
            download_and_decompress(  # download the vox1_test_wav.zip and unzip
                self.archives_audio_test,
                self.base_path,
                decompress=True)

            # Download all parts and concatenate the files into one zip file.
            dev_zipfile = os.path.join(self.base_path, 'vox1_dev_wav.zip')
            print(f'Concatenating all parts to: {dev_zipfile}')
            os.system(
                f'cat {os.path.join(self.base_path, "vox1_dev_wav_parta*")} > {dev_zipfile}'
            )

            # Extract all audio files of dev and test set.
            decompress(dev_zipfile, self.base_path)

        # Download meta files.
        if not os.path.isdir(self.meta_path):
            print("prepare the meta data")
            download_and_decompress(
                self.archives_meta, self.meta_path, decompress=False)

        # Data preparation.
        if not os.path.isdir(self.csv_path):
            os.makedirs(self.csv_path)
            self.prepare_data()

        data = []
        print(
            f"read the {self.subset} from {os.path.join(self.csv_path, f'{self.subset}.csv')}"
        )
        with open(os.path.join(self.csv_path, f'{self.subset}.csv'), 'r') as rf:
            for line in rf.readlines()[1:]:
                audio_id, duration, wav, start, stop, spk_id = line.strip(
                ).split(',')
                data.append(
                    self.meta_info(audio_id,
                                   float(duration), wav,
                                   int(start), int(stop), spk_id))

        with open(os.path.join(self.meta_path, 'spk_id2label.txt'), 'r') as f:
            for line in f.readlines():
                spk_id, label = line.strip().split(' ')
                self.spk_id2label[spk_id] = int(label)

        return data

    def _convert_to_record(self, idx: int):
        sample = self._data[idx]

        record = {}
        # To show all fields in a namedtuple: `type(sample)._fields`
        for field in type(sample)._fields:
            record[field] = getattr(sample, field)

        waveform, sr = load_audio(record['wav'])

        # random select a chunk audio samples from the audio
        if self.random_chunk:
            num_wav_samples = waveform.shape[0]
            num_chunk_samples = int(self.chunk_duration * sr)
            start = random.randint(0, num_wav_samples - num_chunk_samples - 1)
            stop = start + num_chunk_samples
        else:
            start = record['start']
            stop = record['stop']

        waveform = waveform[start:stop]

        assert self.feat_type in feat_funcs.keys(), \
            f"Unknown feat_type: {self.feat_type}, it must be one in {list(feat_funcs.keys())}"
        feat_func = feat_funcs[self.feat_type]
        feat = feat_func(
            waveform, sr=sr, **self.feat_config) if feat_func else waveform

        record.update({'feat': feat})
        if self.subset in ['train',
                           'dev']:  # Labels are available in train and dev.
            record.update({'label': self.spk_id2label[record['spk_id']]})

        return record

    @staticmethod
    def _get_chunks(seg_dur, audio_id, audio_duration):
        num_chunks = int(audio_duration / seg_dur)  # all in milliseconds

        chunk_lst = [
            audio_id + "_" + str(i * seg_dur) + "_" + str(i * seg_dur + seg_dur)
            for i in range(num_chunks)
        ]
        return chunk_lst

    def _get_audio_info(self, wav_file: str,
                        split_chunks: bool) -> List[List[str]]:
        waveform, sr = load_audio(wav_file)
        spk_id, sess_id, utt_id = wav_file.split("/")[-3:]
        audio_id = '-'.join([spk_id, sess_id, utt_id.split(".")[0]])
        audio_duration = waveform.shape[0] / sr

        ret = []
        if split_chunks:  # Split into pieces of self.chunk_duration seconds.
            uniq_chunks_list = self._get_chunks(self.chunk_duration, audio_id,
                                                audio_duration)

            for chunk in uniq_chunks_list:
                s, e = chunk.split("_")[-2:]  # Timestamps of start and end
                start_sample = int(float(s) * sr)
                end_sample = int(float(e) * sr)
                # id, duration, wav, start, stop, spk_id
                ret.append([
                    chunk, audio_duration, wav_file, start_sample, end_sample,
                    spk_id
                ])
        else:  # Keep whole audio.
            ret.append([
                audio_id, audio_duration, wav_file, 0, waveform.shape[0], spk_id
            ])
        return ret

    def generate_csv(self,
                     wav_files: List[str],
                     output_file: str,
                     split_chunks: bool=True):
        print(f'Generating csv: {output_file}')
        header = ["id", "duration", "wav", "start", "stop", "spk_id"]
        # Note: this may occurs c++ exception, but the program will execute fine
        # so we can ignore the exception 
        with Pool(cpu_count()) as p:
            infos = list(
                tqdm(
                    p.imap(lambda x: self._get_audio_info(x, split_chunks),
                           wav_files),
                    total=len(wav_files)))

        csv_lines = []
        for info in infos:
            csv_lines.extend(info)

        with open(output_file, mode="w") as csv_f:
            csv_writer = csv.writer(
                csv_f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_writer.writerow(header)
            for line in csv_lines:
                csv_writer.writerow(line)

    def prepare_data(self):
        # Audio of speakers in veri_test_file should not be included in training set.
        print("start to prepare the data csv file")
        enroll_files = set()
        test_files = set()
        # get the enroll and test audio file path
        with open(self.veri_test_file, 'r') as f:
            for line in f.readlines():
                _, enrol_file, test_file = line.strip().split(' ')
                enroll_files.add(os.path.join(self.wav_path, enrol_file))
                test_files.add(os.path.join(self.wav_path, test_file))
            enroll_files = sorted(enroll_files)
            test_files = sorted(test_files)

        # get the enroll and test speakers
        test_spks = set()
        for file in (enroll_files + test_files):
            spk = file.split('/wav/')[1].split('/')[0]
            test_spks.add(spk)

        # get all the train and dev audios file path
        audio_files = []
        speakers = set()
        print("Getting file list...")
        for path in [self.wav_path, self.vox2_base_path]:
            # if vox2 directory is not set and vox2 is not a directory 
            # we will not process this directory
            if not path or not os.path.exists(path):
                print(f"{path} is an invalid path, please check again, "
                      "and we will ignore the vox2 base path")
                continue
            for file in glob.glob(
                    os.path.join(path, "**", "*.wav"), recursive=True):
                spk = file.split('/wav/')[1].split('/')[0]
                if spk in test_spks:
                    continue
                speakers.add(spk)
                audio_files.append(file)

        print(
            f"start to generate the {os.path.join(self.meta_path, 'spk_id2label.txt')}"
        )
        # encode the train and dev speakers label to spk_id2label.txt
        with open(os.path.join(self.meta_path, 'spk_id2label.txt'), 'w') as f:
            for label, spk_id in enumerate(
                    sorted(speakers)):  # 1211 vox1, 5994 vox2, 7205 vox1+2
                f.write(f'{spk_id} {label}\n')

        audio_files = sorted(audio_files)
        random.shuffle(audio_files)
        split_idx = int(self.split_ratio * len(audio_files))
        # split_ratio to train
        train_files, dev_files = audio_files[:split_idx], audio_files[
            split_idx:]

        self.generate_csv(train_files, os.path.join(self.csv_path, 'train.csv'))
        self.generate_csv(dev_files, os.path.join(self.csv_path, 'dev.csv'))

        self.generate_csv(
            enroll_files,
            os.path.join(self.csv_path, 'enroll.csv'),
            split_chunks=False)
        self.generate_csv(
            test_files,
            os.path.join(self.csv_path, 'test.csv'),
            split_chunks=False)

    def __getitem__(self, idx):
        return self._convert_to_record(idx)

    def __len__(self):
        return len(self._data)


================================================
FILE: audio/paddleaudio/features/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .layers import LogMelSpectrogram
from .layers import MelSpectrogram
from .layers import MFCC
from .layers import Spectrogram


================================================
FILE: audio/paddleaudio/features/layers.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
from typing import Optional
from typing import Union

import paddle
import paddle.nn as nn
from paddle import Tensor

from ..functional import compute_fbank_matrix
from ..functional import create_dct
from ..functional import power_to_db
from ..functional.window import get_window

__all__ = [
    'Spectrogram',
    'MelSpectrogram',
    'LogMelSpectrogram',
    'MFCC',
]


class Spectrogram(nn.Layer):
    """Compute spectrogram of given signals, typically audio waveforms.
    The spectrogram is defined as the complex norm of the short-time Fourier transformation.

    Args:
        n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
        hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
        win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
        window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
        power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
    """

    def __init__(self,
                 n_fft: int=512,
                 hop_length: Optional[int]=None,
                 win_length: Optional[int]=None,
                 window: str='hann',
                 power: float=2.0,
                 center: bool=True,
                 pad_mode: str='reflect',
                 dtype: str='float32') -> None:
        super(Spectrogram, self).__init__()

        assert power > 0, 'Power of spectrogram must be > 0.'
        self.power = power

        if win_length is None:
            win_length = n_fft

        self.fft_window = get_window(
            window, win_length, fftbins=True, dtype=dtype)
        self._stft = partial(
            paddle.signal.stft,
            n_fft=n_fft,
            hop_length=hop_length,
            win_length=win_length,
            window=self.fft_window,
            center=center,
            pad_mode=pad_mode)
        self.register_buffer('fft_window', self.fft_window)

    def forward(self, x: Tensor) -> Tensor:
        """
        Args:
            x (Tensor): Tensor of waveforms with shape `(N, T)`

        Returns:
            Tensor: Spectrograms with shape `(N, n_fft//2 + 1, num_frames)`.
        """
        stft = self._stft(x)
        spectrogram = paddle.pow(paddle.abs(stft), self.power)
        return spectrogram


class MelSpectrogram(nn.Layer):
    """Compute the melspectrogram of given signals, typically audio waveforms. It is computed by multiplying spectrogram with Mel filter bank matrix.

    Args:
        sr (int, optional): Sample rate. Defaults to 22050.
        n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
        hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
        win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
        window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
        power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
        n_mels (int, optional): Number of mel bins. Defaults to 64.
        f_min (float, optional): Minimum frequency in Hz. Defaults to 50.0.
        f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
        htk (bool, optional): Use HTK formula in computing fbank matrix. Defaults to False.
        norm (Union[str, float], optional): Type of normalization in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. Defaults to 'slaney'.
        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
    """

    def __init__(self,
                 sr: int=22050,
                 n_fft: int=512,
                 hop_length: Optional[int]=None,
                 win_length: Optional[int]=None,
                 window: str='hann',
                 power: float=2.0,
                 center: bool=True,
                 pad_mode: str='reflect',
                 n_mels: int=64,
                 f_min: float=50.0,
                 f_max: Optional[float]=None,
                 htk: bool=False,
                 norm: Union[str, float]='slaney',
                 dtype: str='float32') -> None:
        super(MelSpectrogram, self).__init__()

        self._spectrogram = Spectrogram(
            n_fft=n_fft,
            hop_length=hop_length,
            win_length=win_length,
            window=window,
            power=power,
            center=center,
            pad_mode=pad_mode,
            dtype=dtype)
        self.n_mels = n_mels
        self.f_min = f_min
        self.f_max = f_max
        self.htk = htk
        self.norm = norm
        if f_max is None:
            f_max = sr // 2
        self.fbank_matrix = compute_fbank_matrix(
            sr=sr,
            n_fft=n_fft,
            n_mels=n_mels,
            f_min=f_min,
            f_max=f_max,
            htk=htk,
            norm=norm,
            dtype=dtype)  # float64 for better numerical results
        self.register_buffer('fbank_matrix', self.fbank_matrix)

    def forward(self, x: Tensor) -> Tensor:
        """
        Args:
            x (Tensor): Tensor of waveforms with shape `(N, T)`

        Returns:
            Tensor: Mel spectrograms with shape `(N, n_mels, num_frames)`.
        """
        spect_feature = self._spectrogram(x)
        mel_feature = paddle.matmul(self.fbank_matrix, spect_feature)
        return mel_feature


class LogMelSpectrogram(nn.Layer):
    """Compute log-mel-spectrogram feature of given signals, typically audio waveforms.

    Args:
        sr (int, optional): Sample rate. Defaults to 22050.
        n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
        hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
        win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
        window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
        power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
        n_mels (int, optional): Number of mel bins. Defaults to 64.
        f_min (float, optional): Minimum frequency in Hz. Defaults to 50.0.
        f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
        htk (bool, optional): Use HTK formula in computing fbank matrix. Defaults to False.
        norm (Union[str, float], optional): Type of normalization in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. Defaults to 'slaney'.
        ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
        amin (float, optional): The minimum value of input magnitude. Defaults to 1e-10.
        top_db (Optional[float], optional): The maximum db value of spectrogram. Defaults to None.
        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
    """

    def __init__(self,
                 sr: int=22050,
                 n_fft: int=512,
                 hop_length: Optional[int]=None,
                 win_length: Optional[int]=None,
                 window: str='hann',
                 power: float=2.0,
                 center: bool=True,
                 pad_mode: str='reflect',
                 n_mels: int=64,
                 f_min: float=50.0,
                 f_max: Optional[float]=None,
                 htk: bool=False,
                 norm: Union[str, float]='slaney',
                 ref_value: float=1.0,
                 amin: float=1e-10,
                 top_db: Optional[float]=None,
                 dtype: str='float32') -> None:
        super(LogMelSpectrogram, self).__init__()

        self._melspectrogram = MelSpectrogram(
            sr=sr,
            n_fft=n_fft,
            hop_length=hop_length,
            win_length=win_length,
            window=window,
            power=power,
            center=center,
            pad_mode=pad_mode,
            n_mels=n_mels,
            f_min=f_min,
            f_max=f_max,
            htk=htk,
            norm=norm,
            dtype=dtype)

        self.ref_value = ref_value
        self.amin = amin
        self.top_db = top_db

    def forward(self, x: Tensor) -> Tensor:
        """
        Args:
            x (Tensor): Tensor of waveforms with shape `(N, T)`

        Returns:
            Tensor: Log mel spectrograms with shape `(N, n_mels, num_frames)`.
        """
        mel_feature = self._melspectrogram(x)
        log_mel_feature = power_to_db(
            mel_feature,
            ref_value=self.ref_value,
            amin=self.amin,
            top_db=self.top_db)
        return log_mel_feature


class MFCC(nn.Layer):
    """Compute mel frequency cepstral coefficients(MFCCs) feature of given waveforms.

    Args:
        sr (int, optional): Sample rate. Defaults to 22050.
        n_mfcc (int, optional): [description]. Defaults to 40.
        n_fft (int, optional): The number of frequency components of the discrete Fourier transform. Defaults to 512.
        hop_length (Optional[int], optional): The hop length of the short time FFT. If `None`, it is set to `win_length//4`. Defaults to None.
        win_length (Optional[int], optional): The window length of the short time FFT. If `None`, it is set to same as `n_fft`. Defaults to None.
        window (str, optional): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'kaiser', 'gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'. Defaults to 'hann'.
        power (float, optional): Exponent for the magnitude spectrogram. Defaults to 2.0.
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to 'reflect'.
        n_mels (int, optional): Number of mel bins. Defaults to 64.
        f_min (float, optional): Minimum frequency in Hz. Defaults to 50.0.
        f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
        htk (bool, optional): Use HTK formula in computing fbank matrix. Defaults to False.
        norm (Union[str, float], optional): Type of normalization in computing fbank matrix. Slaney-style is used by default. You can specify norm=1.0/2.0 to use customized p-norm normalization. Defaults to 'slaney'.
        ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
        amin (float, optional): The minimum value of input magnitude. Defaults to 1e-10.
        top_db (Optional[float], optional): The maximum db value of spectrogram. Defaults to None.
        dtype (str, optional): Data type of input and window. Defaults to 'float32'.
    """

    def __init__(self,
                 sr: int=22050,
                 n_mfcc: int=40,
                 n_fft: int=512,
                 hop_length: Optional[int]=None,
                 win_length: Optional[int]=None,
                 window: str='hann',
                 power: float=2.0,
                 center: bool=True,
                 pad_mode: str='reflect',
                 n_mels: int=64,
                 f_min: float=50.0,
                 f_max: Optional[float]=None,
                 htk: bool=False,
                 norm: Union[str, float]='slaney',
                 ref_value: float=1.0,
                 amin: float=1e-10,
                 top_db: Optional[float]=None,
                 dtype: str=paddle.float32) -> None:
        super(MFCC, self).__init__()
        assert n_mfcc <= n_mels, 'n_mfcc cannot be larger than n_mels: %d vs %d' % (
            n_mfcc, n_mels)
        self._log_melspectrogram = LogMelSpectrogram(
            sr=sr,
            n_fft=n_fft,
            hop_length=hop_length,
            win_length=win_length,
            window=window,
            power=power,
            center=center,
            pad_mode=pad_mode,
            n_mels=n_mels,
            f_min=f_min,
            f_max=f_max,
            htk=htk,
            norm=norm,
            ref_value=ref_value,
            amin=amin,
            top_db=top_db,
            dtype=dtype)
        self.dct_matrix = create_dct(n_mfcc=n_mfcc, n_mels=n_mels, dtype=dtype)
        self.register_buffer('dct_matrix', self.dct_matrix)

    def forward(self, x: Tensor) -> Tensor:
        """
        Args:
            x (Tensor): Tensor of waveforms with shape `(N, T)`

        Returns:
            Tensor: Mel frequency cepstral coefficients with shape `(N, n_mfcc, num_frames)`.
        """
        log_mel_feature = self._log_melspectrogram(x)
        mfcc = paddle.matmul(
            log_mel_feature.transpose((0, 2, 1)), self.dct_matrix).transpose(
                (0, 2, 1))  # (B, n_mels, L)
        return mfcc


================================================
FILE: audio/paddleaudio/functional/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .functional import compute_fbank_matrix
from .functional import create_dct
from .functional import fft_frequencies
from .functional import hz_to_mel
from .functional import mel_frequencies
from .functional import mel_to_hz
from .functional import power_to_db


================================================
FILE: audio/paddleaudio/functional/functional.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from librosa(https://github.com/librosa/librosa)
import math
from typing import Optional
from typing import Union

import paddle
from paddle import Tensor

__all__ = [
    'hz_to_mel',
    'mel_to_hz',
    'mel_frequencies',
    'fft_frequencies',
    'compute_fbank_matrix',
    'power_to_db',
    'create_dct',
]


def hz_to_mel(freq: Union[Tensor, float],
              htk: bool=False) -> Union[Tensor, float]:
    """Convert Hz to Mels.

    Args:
        freq (Union[Tensor, float]): The input tensor with arbitrary shape.
        htk (bool, optional): Use htk scaling. Defaults to False.

    Returns:
        Union[Tensor, float]: Frequency in mels.
    """

    if htk:
        if isinstance(freq, Tensor):
            return 2595.0 * paddle.log10(1.0 + freq / 700.0)
        else:
            return 2595.0 * math.log10(1.0 + freq / 700.0)

    # Fill in the linear part
    f_min = 0.0
    f_sp = 200.0 / 3

    mels = (freq - f_min) / f_sp

    # Fill in the log-scale part

    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    logstep = math.log(6.4) / 27.0  # step size for log region

    if isinstance(freq, Tensor):
        target = min_log_mel + paddle.log(
            freq / min_log_hz + 1e-10) / logstep  # prevent nan with 1e-10
        mask = (freq > min_log_hz).astype(freq.dtype)
        mels = target * mask + mels * (
            1 - mask)  # will replace by masked_fill OP in future
    else:
        if freq >= min_log_hz:
            mels = min_log_mel + math.log(freq / min_log_hz + 1e-10) / logstep

    return mels


def mel_to_hz(mel: Union[float, Tensor],
              htk: bool=False) -> Union[float, Tensor]:
    """Convert mel bin numbers to frequencies.

    Args:
        mel (Union[float, Tensor]): The mel frequency represented as a tensor with arbitrary shape.
        htk (bool, optional): Use htk scaling. Defaults to False.

    Returns:
        Union[float, Tensor]: Frequencies in Hz.
    """
    if htk:
        return 700.0 * (10.0**(mel / 2595.0) - 1.0)

    f_min = 0.0
    f_sp = 200.0 / 3
    freqs = f_min + f_sp * mel
    # And now the nonlinear scale
    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    logstep = math.log(6.4) / 27.0  # step size for log region
    if isinstance(mel, Tensor):
        target = min_log_hz * paddle.exp(logstep * (mel - min_log_mel))
        mask = (mel > min_log_mel).astype(mel.dtype)
        freqs = target * mask + freqs * (
            1 - mask)  # will replace by masked_fill OP in future
    else:
        if mel >= min_log_mel:
            freqs = min_log_hz * math.exp(logstep * (mel - min_log_mel))

    return freqs


def mel_frequencies(n_mels: int=64,
                    f_min: float=0.0,
                    f_max: float=11025.0,
                    htk: bool=False,
                    dtype: str='float32') -> Tensor:
    """Compute mel frequencies.

    Args:
        n_mels (int, optional): Number of mel bins. Defaults to 64.
        f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0.
        fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0.
        htk (bool, optional): Use htk scaling. Defaults to False.
        dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'.

    Returns:
        Tensor: Tensor of n_mels frequencies in Hz with shape `(n_mels,)`.
    """
    # 'Center freqs' of mel bands - uniformly spaced between limits
    min_mel = hz_to_mel(f_min, htk=htk)
    max_mel = hz_to_mel(f_max, htk=htk)
    mels = paddle.linspace(min_mel, max_mel, n_mels, dtype=dtype)
    freqs = mel_to_hz(mels, htk=htk)
    return freqs


def fft_frequencies(sr: int, n_fft: int, dtype: str='float32') -> Tensor:
    """Compute fourier frequencies.

    Args:
        sr (int): Sample rate.
        n_fft (int): Number of fft bins.
        dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'.

    Returns:
        Tensor: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`.
    """
    return paddle.linspace(0, float(sr) / 2, int(1 + n_fft // 2), dtype=dtype)


def compute_fbank_matrix(sr: int,
                         n_fft: int,
                         n_mels: int=64,
                         f_min: float=0.0,
                         f_max: Optional[float]=None,
                         htk: bool=False,
                         norm: Union[str, float]='slaney',
                         dtype: str='float32') -> Tensor:
    """Compute fbank matrix.

    Args:
        sr (int): Sample rate.
        n_fft (int): Number of fft bins.
        n_mels (int, optional): Number of mel bins. Defaults to 64.
        f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0.
        f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
        htk (bool, optional): Use htk scaling. Defaults to False.
        norm (Union[str, float], optional): Type of normalization. Defaults to 'slaney'.
        dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.

    Returns:
        Tensor: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`.
    """

    if f_max is None:
        f_max = float(sr) / 2

    # Initialize the weights
    weights = paddle.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)

    # Center freqs of each FFT bin
    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft, dtype=dtype)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    mel_f = mel_frequencies(
        n_mels + 2, f_min=f_min, f_max=f_max, htk=htk, dtype=dtype)

    fdiff = mel_f[1:] - mel_f[:-1]  #np.diff(mel_f)
    ramps = mel_f.unsqueeze(1) - fftfreqs.unsqueeze(0)
    #ramps = np.subtract.outer(mel_f, fftfreqs)

    for i in range(n_mels):
        # lower and upper slopes for all bins
        lower = -ramps[i] / fdiff[i]
        upper = ramps[i + 2] / fdiff[i + 1]

        # .. then intersect them with each other and zero
        weights[i] = paddle.maximum(
            paddle.zeros_like(lower), paddle.minimum(lower, upper))

    # Slaney-style mel is scaled to be approx constant energy per channel
    if norm == 'slaney':
        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
        weights *= enorm.unsqueeze(1)
    elif isinstance(norm, int) or isinstance(norm, float):
        weights = paddle.nn.functional.normalize(weights, p=norm, axis=-1)

    return weights


def power_to_db(spect: Tensor,
                ref_value: float=1.0,
                amin: float=1e-10,
                top_db: Optional[float]=None) -> Tensor:
    """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way.

    Args:
        spect (Tensor): STFT power spectrogram.
        ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
        amin (float, optional): Minimum threshold. Defaults to 1e-10.
        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None.

    Returns:
        Tensor: Power spectrogram in db scale.
    """
    if amin <= 0:
        raise Exception("amin must be strictly positive")

    if ref_value <= 0:
        raise Exception("ref_value must be strictly positive")

    ones = paddle.ones_like(spect)
    log_spec = 10.0 * paddle.log10(paddle.maximum(ones * amin, spect))
    log_spec -= 10.0 * math.log10(max(ref_value, amin))

    if top_db is not None:
        if top_db < 0:
            raise Exception("top_db must be non-negative")
        log_spec = paddle.maximum(log_spec, ones * (log_spec.max() - top_db))

    return log_spec


def create_dct(n_mfcc: int,
               n_mels: int,
               norm: Optional[str]='ortho',
               dtype: str='float32') -> Tensor:
    """Create a discrete cosine transform(DCT) matrix.

    Args:
        n_mfcc (int): Number of mel frequency cepstral coefficients. 
        n_mels (int): Number of mel filterbanks.
        norm (Optional[str], optional): Normalization type. Defaults to 'ortho'.
        dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.

    Returns:
        Tensor: The DCT matrix with shape `(n_mels, n_mfcc)`.
    """
    n = paddle.arange(n_mels, dtype=dtype)
    k = paddle.arange(n_mfcc, dtype=dtype).unsqueeze(1)
    dct = paddle.cos(math.pi / float(n_mels) * (n + 0.5) *
                     k)  # size (n_mfcc, n_mels)
    if norm is None:
        dct *= 2.0
    else:
        assert norm == "ortho"
        dct[0] *= 1.0 / math.sqrt(2.0)
        dct *= math.sqrt(2.0 / float(n_mels))
    return dct.T


================================================
FILE: audio/paddleaudio/functional/window.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
import math
from typing import List
from typing import Tuple
from typing import Union

import paddle
from paddle import Tensor


class WindowFunctionRegister(object):
    def __init__(self):
        self._functions_dict = dict()

    def register(self):
        def add_subfunction(func):
            name = func.__name__
            self._functions_dict[name] = func
            return func

        return add_subfunction

    def get(self, name):
        return self._functions_dict[name]


window_function_register = WindowFunctionRegister()


@window_function_register.register()
def _cat(x: List[Tensor], data_type: str) -> Tensor:
    l = [paddle.to_tensor(_, data_type) for _ in x]
    return paddle.concat(l)


@window_function_register.register()
def _acosh(x: Union[Tensor, float]) -> Tensor:
    if isinstance(x, float):
        return math.log(x + math.sqrt(x**2 - 1))
    return paddle.log(x + paddle.sqrt(paddle.square(x) - 1))


@window_function_register.register()
def _extend(M: int, sym: bool) -> bool:
    """Extend window by 1 sample if needed for DFT-even symmetry."""
    if not sym:
        return M + 1, True
    else:
        return M, False


@window_function_register.register()
def _len_guards(M: int) -> bool:
    """Handle small or incorrect window lengths."""
    if int(M) != M or M < 0:
        raise ValueError('Window length M must be a non-negative integer')

    return M <= 1


@window_function_register.register()
def _truncate(w: Tensor, needed: bool) -> Tensor:
    """Truncate window by 1 sample if needed for DFT-even symmetry."""
    if needed:
        return w[:-1]
    else:
        return w


@window_function_register.register()
def _general_gaussian(
    M: int, p, sig, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
    """Compute a window with a generalized Gaussian shape.
    This function is consistent with scipy.signal.windows.general_gaussian().
    """
    if _len_guards(M):
        return paddle.ones((M,), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
    w = paddle.exp(-0.5 * paddle.abs(n / sig) ** (2 * p))

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _general_cosine(
    M: int, a: float, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
    """Compute a generic weighted sum of cosine terms window.
    This function is consistent with scipy.signal.windows.general_cosine().
    """
    if _len_guards(M):
        return paddle.ones((M,), dtype=dtype)
    M, needs_trunc = _extend(M, sym)
    fac = paddle.linspace(-math.pi, math.pi, M, dtype=dtype)
    w = paddle.zeros((M,), dtype=dtype)
    for k in range(len(a)):
        w += a[k] * paddle.cos(k * fac)
    return _truncate(w, needs_trunc)


@window_function_register.register()
def _general_hamming(
    M: int, alpha: float, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
    """Compute a generalized Hamming window.
    This function is consistent with scipy.signal.windows.general_hamming()
    """
    return _general_cosine(M, [alpha, 1.0 - alpha], sym, dtype=dtype)


@window_function_register.register()
def _taylor(
    M: int, nbar=4, sll=30, norm=True, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
    """Compute a Taylor window.
    The Taylor window taper function approximates the Dolph-Chebyshev window's
    constant sidelobe level for a parameterized number of near-in sidelobes.
    """
    if _len_guards(M):
        return paddle.ones((M,), dtype=dtype)
    M, needs_trunc = _extend(M, sym)
    # Original text uses a negative sidelobe level parameter and then negates
    # it in the calculation of B. To keep consistent with other methods we
    # assume the sidelobe level parameter to be positive.
    B = 10 ** (sll / 20)
    A = _acosh(B) / math.pi
    s2 = nbar**2 / (A**2 + (nbar - 0.5) ** 2)
    ma = paddle.arange(1, nbar, dtype=dtype)

    Fm = paddle.empty((nbar - 1,), dtype=dtype)
    signs = paddle.empty_like(ma)
    signs[::2] = 1
    signs[1::2] = -1
    m2 = ma * ma
    for mi in range(len(ma)):
        numer = signs[mi] * paddle.prod(
            1 - m2[mi] / s2 / (A**2 + (ma - 0.5) ** 2)
        )
        if mi == 0:
            denom = 2 * paddle.prod(1 - m2[mi] / m2[mi + 1 :])
        elif mi == len(ma) - 1:
            denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi])
        else:
            denom = (
                2
                * paddle.prod(1 - m2[mi] / m2[:mi])
                * paddle.prod(1 - m2[mi] / m2[mi + 1 :])
            )

        Fm[mi] = numer / denom

    def W(n):
        return 1 + 2 * paddle.matmul(
            Fm.unsqueeze(0),
            paddle.cos(2 * math.pi * ma.unsqueeze(1) * (n - M / 2.0 + 0.5) / M),
        )

    w = W(paddle.arange(0, M, dtype=dtype))

    # normalize (Note that this is not described in the original text [1])
    if norm:
        scale = 1.0 / W((M - 1) / 2)
        w *= scale
    w = w.squeeze()
    return _truncate(w, needs_trunc)


@window_function_register.register()
def _hamming(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a Hamming window.
    The Hamming window is a taper formed by using a raised cosine with
    non-zero endpoints, optimized to minimize the nearest side lobe.
    """
    return _general_hamming(M, 0.54, sym, dtype=dtype)


@window_function_register.register()
def _hann(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a Hann window.
    The Hann window is a taper formed by using a raised cosine or sine-squared
    with ends that touch zero.
    """
    return _general_hamming(M, 0.5, sym, dtype=dtype)


@window_function_register.register()
def _tukey(
    M: int, alpha=0.5, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
    """Compute a Tukey window.
    The Tukey window is also known as a tapered cosine window.
    """
    if _len_guards(M):
        return paddle.ones((M,), dtype=dtype)

    if alpha <= 0:
        return paddle.ones((M,), dtype=dtype)
    elif alpha >= 1.0:
        return hann(M, sym=sym)

    M, needs_trunc = _extend(M, sym)

    n = paddle.arange(0, M, dtype=dtype)
    width = int(alpha * (M - 1) / 2.0)
    n1 = n[0 : width + 1]
    n2 = n[width + 1 : M - width - 1]
    n3 = n[M - width - 1 :]

    w1 = 0.5 * (1 + paddle.cos(math.pi * (-1 + 2.0 * n1 / alpha / (M - 1))))
    w2 = paddle.ones(n2.shape, dtype=dtype)
    w3 = 0.5 * (
        1
        + paddle.cos(math.pi * (-2.0 / alpha + 1 + 2.0 * n3 / alpha / (M - 1)))
    )
    w = paddle.concat([w1, w2, w3])

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _gaussian(
    M: int, std: float, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
    """Compute a Gaussian window.
    The Gaussian widows has a Gaussian shape defined by the standard deviation(std).
    """
    if _len_guards(M):
        return paddle.ones((M,), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
    sig2 = 2 * std * std
    w = paddle.exp(-(n**2) / sig2)

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _exponential(
    M: int, center=None, tau=1.0, sym: bool = True, dtype: str = 'float64'
) -> Tensor:
    """Compute an exponential (or Poisson) window."""
    if sym and center is not None:
        raise ValueError("If sym==True, center must be None.")
    if _len_guards(M):
        return paddle.ones((M,), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    if center is None:
        center = (M - 1) / 2

    n = paddle.arange(0, M, dtype=dtype)
    w = paddle.exp(-paddle.abs(n - center) / tau)

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _triang(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a triangular window."""
    if _len_guards(M):
        return paddle.ones((M,), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    n = paddle.arange(1, (M + 1) // 2 + 1, dtype=dtype)
    if M % 2 == 0:
        w = (2 * n - 1.0) / M
        w = paddle.concat([w, w[::-1]])
    else:
        w = 2 * n / (M + 1.0)
        w = paddle.concat([w, w[-2::-1]])

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _bohman(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a Bohman window.
    The Bohman window is the autocorrelation of a cosine window.
    """
    if _len_guards(M):
        return paddle.ones((M,), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    fac = paddle.abs(paddle.linspace(-1, 1, M, dtype=dtype)[1:-1])
    w = (1 - fac) * paddle.cos(math.pi * fac) + 1.0 / math.pi * paddle.sin(
        math.pi * fac
    )
    w = _cat([0, w, 0], dtype)

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _blackman(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a Blackman window.
    The Blackman window is a taper formed by using the first three terms of
    a summation of cosines. It was designed to have close to the minimal
    leakage possible.  It is close to optimal, only slightly worse than a
    Kaiser window.
    """
    return _general_cosine(M, [0.42, 0.50, 0.08], sym, dtype=dtype)


@window_function_register.register()
def _cosine(M: int, sym: bool = True, dtype: str = 'float64') -> Tensor:
    """Compute a window with a simple cosine shape."""
    if _len_guards(M):
        return paddle.ones((M,), dtype=dtype)
    M, needs_trunc = _extend(M, sym)
    w = paddle.sin(math.pi / M * (paddle.arange(0, M, dtype=dtype) + 0.5))

    return _truncate(w, needs_trunc)


def get_window(
    window: Union[str, Tuple[str, float]],
    win_length: int,
    fftbins: bool = True,
    dtype: str = 'float64',
) -> Tensor:
    """Return a window of a given length and type.

    Args:
        window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'.
        win_length (int): Number of samples.
        fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True.
        dtype (str, optional): The data type of the return window. Defaults to 'float64'.

    Returns:
        Tensor: The window represented as a tensor.

    Examples:
        .. code-block:: python

            import paddle

            n_fft = 512
            cosine_window = paddle.audio.functional.get_window('cosine', n_fft)

            std = 7
            gaussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft)
    """
    sym = not fftbins

    args = ()
    if isinstance(window, tuple):
        winstr = window[0]
        if len(window) > 1:
            args = window[1:]
    elif isinstance(window, str):
        if window in ['gaussian', 'exponential']:
            raise ValueError(
                "The '" + window + "' window needs one or "
                "more parameters -- pass a tuple."
            )
        else:
            winstr = window
    else:
        raise ValueError(
            "%s as window type is not supported." % str(type(window))
        )

    try:
        winfunc = window_function_register.get('_' + winstr)
    except KeyError as e:
        raise ValueError("Unknown window type.") from e

    params = (win_length,) + args
    kwargs = {'sym': sym}
    return winfunc(*params, dtype=dtype, **kwargs)


================================================
FILE: audio/paddleaudio/kaldi/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .kaldi import fbank
#from .kaldi import pitch


================================================
FILE: audio/paddleaudio/kaldi/kaldi.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddleaudio
from paddleaudio._internal import module_utils

__all__ = [
    'fbank',
]


@module_utils.requires_kaldi()
def fbank(
        wav,
        samp_freq: int=16000,
        frame_shift_ms: float=10.0,
        frame_length_ms: float=25.0,
        dither: float=0.0,
        preemph_coeff: float=0.97,
        remove_dc_offset: bool=True,
        window_type: str='povey',
        round_to_power_of_two: bool=True,
        blackman_coeff: float=0.42,
        snip_edges: bool=True,
        max_feature_vectors: int=-1,
        num_bins: int=23,
        low_freq: float=20,
        high_freq: float=0,
        vtln_low: float=100,
        vtln_high: float=-500,
        debug_mel: bool=False,
        htk_mode: bool=False,
        use_energy: bool=False,  # fbank opts
        energy_floor: float=0.0,
        raw_energy: bool=True,
        htk_compat: bool=False,
        use_log_fbank: bool=True,
        use_power: bool=True):
    frame_opts = paddleaudio._paddleaudio.FrameExtractionOptions()
    mel_opts = paddleaudio._paddleaudio.MelBanksOptions()
    fbank_opts = paddleaudio._paddleaudio.FbankOptions()
    frame_opts.samp_freq = samp_freq
    frame_opts.frame_shift_ms = frame_shift_ms
    frame_opts.frame_length_ms = frame_length_ms
    frame_opts.dither = dither
    frame_opts.preemph_coeff = preemph_coeff
    frame_opts.remove_dc_offset = remove_dc_offset
    frame_opts.window_type = window_type
    frame_opts.round_to_power_of_two = round_to_power_of_two
    frame_opts.blackman_coeff = blackman_coeff
    frame_opts.snip_edges = snip_edges
    frame_opts.max_feature_vectors = max_feature_vectors

    mel_opts.num_bins = num_bins
    mel_opts.low_freq = low_freq
    mel_opts.high_freq = high_freq
    mel_opts.vtln_low = vtln_low
    mel_opts.vtln_high = vtln_high
    mel_opts.debug_mel = debug_mel
    mel_opts.htk_mode = htk_mode

    fbank_opts.use_energy = use_energy
    fbank_opts.energy_floor = energy_floor
    fbank_opts.raw_energy = raw_energy
    fbank_opts.htk_compat = htk_compat
    fbank_opts.use_log_fbank = use_log_fbank
    fbank_opts.use_power = use_power
    feat = paddleaudio._paddleaudio.ComputeFbank(frame_opts, mel_opts,
                                                 fbank_opts, wav)
    return feat


#@module_utils.requires_kaldi()
#def pitch(wav,
#samp_freq: int=16000,
#frame_shift_ms: float=10.0,
#frame_length_ms: float=25.0,
#preemph_coeff: float=0.0,
#min_f0: int=50,
#max_f0: int=400,
#soft_min_f0: float=10.0,
#penalty_factor: float=0.1,
#lowpass_cutoff: int=1000,
#resample_freq: int=4000,
#delta_pitch: float=0.005,
#nccf_ballast: int=7000,
#lowpass_filter_width: int=1,
#upsample_filter_width: int=5,
#max_frames_latency: int=0,
#frames_per_chunk: int=0,
#simulate_first_pass_online: bool=False,
#recompute_frame: int=500,
#nccf_ballast_online: bool=False,
#snip_edges: bool=True):
#pitch_opts = paddleaudio._paddleaudio.PitchExtractionOptions()
#pitch_opts.samp_freq = samp_freq
#pitch_opts.frame_shift_ms = frame_shift_ms
#pitch_opts.frame_length_ms = frame_length_ms
#pitch_opts.preemph_coeff = preemph_coeff
#pitch_opts.min_f0 = min_f0
#pitch_opts.max_f0 = max_f0
#pitch_opts.soft_min_f0 = soft_min_f0
#pitch_opts.penalty_factor = penalty_factor
#pitch_opts.lowpass_cutoff = lowpass_cutoff
#pitch_opts.resample_freq = resample_freq
#pitch_opts.delta_pitch = delta_pitch
#pitch_opts.nccf_ballast = nccf_ballast
#pitch_opts.lowpass_filter_width = lowpass_filter_width
#pitch_opts.upsample_filter_width = upsample_filter_width
#pitch_opts.max_frames_latency = max_frames_latency
#pitch_opts.frames_per_chunk = frames_per_chunk
#pitch_opts.simulate_first_pass_online = simulate_first_pass_online
#pitch_opts.recompute_frame = recompute_frame
#pitch_opts.nccf_ballast_online = nccf_ballast_online
#pitch_opts.snip_edges = snip_edges
#pitch = paddleaudio._paddleaudio.ComputeKaldiPitch(pitch_opts, wav)
#return pitch


================================================
FILE: audio/paddleaudio/metric/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .eer import compute_eer
from .eer import compute_minDCF


================================================
FILE: audio/paddleaudio/metric/eer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List

import numpy as np
import paddle
from sklearn.metrics import roc_curve


def compute_eer(labels: np.ndarray, scores: np.ndarray) -> List[float]:
    """Compute EER and return score threshold.

    Args:
        labels (np.ndarray): the trial label, shape: [N], one-dimension, N refer to the samples num
        scores (np.ndarray): the trial scores, shape: [N], one-dimension, N refer to the samples num

    Returns:
        List[float]: eer and the specific threshold
    """
    fpr, tpr, threshold = roc_curve(y_true=labels, y_score=scores)
    fnr = 1 - tpr
    eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]
    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    return eer, eer_threshold


def compute_minDCF(positive_scores,
                   negative_scores,
                   c_miss=1.0,
                   c_fa=1.0,
                   p_target=0.01):
    """
    This is modified from SpeechBrain
    https://github.com/speechbrain/speechbrain/blob/085be635c07f16d42cd1295045bc46c407f1e15b/speechbrain/utils/metric_stats.py#L509
    Computes the minDCF metric normally used to evaluate speaker verification
    systems. The min_DCF is the minimum of the following C_det function computed
    within the defined threshold range:

    C_det =  c_miss * p_miss * p_target + c_fa * p_fa * (1 -p_target)

    where p_miss is the missing probability and p_fa is the probability of having
    a false alarm.

    Args:
        positive_scores (Paddle.Tensor): The scores from entries of the same class.
        negative_scores (Paddle.Tensor): The scores from entries of different classes.
        c_miss (float, optional): Cost assigned to a missing error (default 1.0).
        c_fa (float, optional): Cost assigned to a false alarm (default 1.0).
        p_target (float, optional): Prior probability of having a target (default 0.01).

    Returns:
        List[float]: min dcf and the specific threshold
    """
    # Computing candidate thresholds
    if len(positive_scores.shape) > 1:
        positive_scores = positive_scores.squeeze()

    if len(negative_scores.shape) > 1:
        negative_scores = negative_scores.squeeze()

    thresholds = paddle.sort(paddle.concat([positive_scores, negative_scores]))
    thresholds = paddle.unique(thresholds)

    # Adding intermediate thresholds
    interm_thresholds = (thresholds[0:-1] + thresholds[1:]) / 2
    thresholds = paddle.sort(paddle.concat([thresholds, interm_thresholds]))

    # Computing False Rejection Rate (miss detection)
    positive_scores = paddle.concat(
        len(thresholds) * [positive_scores.unsqueeze(0)])
    pos_scores_threshold = positive_scores.transpose(perm=[1, 0]) <= thresholds
    p_miss = (pos_scores_threshold.sum(0)
              ).astype("float32") / positive_scores.shape[1]
    del positive_scores
    del pos_scores_threshold

    # Computing False Acceptance Rate (false alarm)
    negative_scores = paddle.concat(
        len(thresholds) * [negative_scores.unsqueeze(0)])
    neg_scores_threshold = negative_scores.transpose(perm=[1, 0]) > thresholds
    p_fa = (neg_scores_threshold.sum(0)
            ).astype("float32") / negative_scores.shape[1]
    del negative_scores
    del neg_scores_threshold

    c_det = c_miss * p_miss * p_target + c_fa * p_fa * (1 - p_target)
    c_min = paddle.min(c_det, axis=0)
    min_index = paddle.argmin(c_det, axis=0)
    return float(c_min), float(thresholds[min_index])


================================================
FILE: audio/paddleaudio/sox_effects/__init__.py
================================================
from paddleaudio._internal import module_utils as _mod_utils

from .sox_effects import apply_effects_file
from .sox_effects import apply_effects_tensor
from .sox_effects import effect_names
from .sox_effects import init_sox_effects
from .sox_effects import shutdown_sox_effects

if _mod_utils.is_sox_available():
    import atexit

    init_sox_effects()
    atexit.register(shutdown_sox_effects)

__all__ = [
    "init_sox_effects",
    "shutdown_sox_effects",
    "effect_names",
    "apply_effects_tensor",
    "apply_effects_file",
]


================================================
FILE: audio/paddleaudio/sox_effects/sox_effects.py
================================================
import os
from typing import List
from typing import Optional
from typing import Tuple

import paddle
import paddleaudio
from paddleaudio._internal import module_utils as _mod_utils
from paddleaudio.utils.sox_utils import list_effects

#code is from: https://github.com/pytorch/audio/blob/main/torchaudio/sox_effects/sox_effects.py


@_mod_utils.requires_sox()
def init_sox_effects():
    """Initialize resources required to use sox effects.

    Note:
        You do not need to call this function manually. It is called automatically.

    Once initialized, you do not need to call this function again across the multiple uses of
    sox effects though it is safe to do so as long as :func:`shutdown_sox_effects` is not called yet.
    Once :func:`shutdown_sox_effects` is called, you can no longer use SoX effects and initializing
    again will result in error.
    """
    paddleaudio._paddleaudio.sox_effects_initialize_sox_effects()


@_mod_utils.requires_sox()
def shutdown_sox_effects():
    """Clean up resources required to use sox effects.

    Note:
        You do not need to call this function manually. It is called automatically.

    It is safe to call this function multiple times.
    Once :py:func:`shutdown_sox_effects` is called, you can no longer use SoX effects and
    initializing again will result in error.
    """
    paddleaudio._paddleaudio.sox_effects_shutdown_sox_effects()


@_mod_utils.requires_sox()
def effect_names() -> List[str]:
    """Gets list of valid sox effect names

    Returns:
        List[str]: list of available effect names.

    Example
        >>> paddleaudio.sox_effects.effect_names()
        ['allpass', 'band', 'bandpass', ... ]
    """
    return list(list_effects().keys())


@_mod_utils.requires_sox()
def apply_effects_tensor(
        tensor: paddle.Tensor,
        sample_rate: int,
        effects: List[List[str]],
        channels_first: bool=True, ) -> Tuple[paddle.Tensor, int]:
    """Apply sox effects to given Tensor

    .. devices:: CPU

    Note:
        This function only works on CPU Tensors.
        This function works in the way very similar to ``sox`` command, however there are slight
        differences. For example, ``sox`` command adds certain effects automatically (such as
        ``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does
        only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also
        need to give ``rate`` effect with desired sampling rate.).

    Args:
        tensor (paddle.Tensor): Input 2D CPU Tensor.
        sample_rate (int): Sample rate
        effects (List[List[str]]): List of effects.
        channels_first (bool, optional): Indicates if the input Tensor's dimension is
            `[channels, time]` or `[time, channels]`

    Returns:
        (Tensor, int): Resulting Tensor and sample rate.
        The resulting Tensor has the same ``dtype`` as the input Tensor, and
        the same channels order. The shape of the Tensor can be different based on the
        effects applied. Sample rate can also be different based on the effects applied.

    Example - Basic usage
        >>>
        >>> # Defines the effects to apply
        >>> effects = [
        ...     ['gain', '-n'],  # normalises to 0dB
        ...     ['pitch', '5'],  # 5 cent pitch shift
        ...     ['rate', '8000'],  # resample to 8000 Hz
        ... ]
        >>>
        >>> # Generate pseudo wave:
        >>> # normalized, channels first, 2ch, sampling rate 16000, 1 second
        >>> sample_rate = 16000
        >>> waveform = 2 * paddle.rand([2, sample_rate * 1]) - 1
        >>> waveform.shape
        paddle.Size([2, 16000])
        >>> waveform
        tensor([[ 0.3138,  0.7620, -0.9019,  ..., -0.7495, -0.4935,  0.5442],
                [-0.0832,  0.0061,  0.8233,  ..., -0.5176, -0.9140, -0.2434]])
        >>>
        >>> # Apply effects
        >>> waveform, sample_rate = apply_effects_tensor(
        ...     wave_form, sample_rate, effects, channels_first=True)
        >>>
        >>> # Check the result
        >>> # The new waveform is sampling rate 8000, 1 second.
        >>> # normalization and channel order are preserved
        >>> waveform.shape
        paddle.Size([2, 8000])
        >>> waveform
        tensor([[ 0.5054, -0.5518, -0.4800,  ..., -0.0076,  0.0096, -0.0110],
                [ 0.1331,  0.0436, -0.3783,  ..., -0.0035,  0.0012,  0.0008]])
        >>> sample_rate
        8000

    """
    tensor_np = tensor.numpy()
    ret = paddleaudio._paddleaudio.sox_effects_apply_effects_tensor(
        tensor_np, sample_rate, effects, channels_first)
    if ret is not None:
        return (paddle.to_tensor(ret[0]), ret[1])
    raise RuntimeError("Failed to apply sox effect")


@_mod_utils.requires_sox()
def apply_effects_file(
        path: str,
        effects: List[List[str]],
        normalize: bool=True,
        channels_first: bool=True,
        format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]:
    """Apply sox effects to the audio file and load the resulting data as Tensor

    Note:
        This function works in the way very similar to ``sox`` command, however there are slight
        differences. For example, ``sox`` command adds certain effects automatically (such as
        ``rate`` effect after ``speed``, ``pitch`` etc), but this function only applies the given
        effects. Therefore, to actually apply ``speed`` effect, you also need to give ``rate``
        effect with desired sampling rate, because internally, ``speed`` effects only alter sampling
        rate and leave samples untouched.

    Args:
        path (path-like object or file-like object):
        effects (List[List[str]]): List of effects.
        normalize (bool, optional):
            When ``True``, this function always return ``float32``, and sample values are
            normalized to ``[-1.0, 1.0]``.
            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
            integer type. This argument has no effect for formats other
            than integer WAV type.
        channels_first (bool, optional): When True, the returned Tensor has dimension `[channel, time]`.
            Otherwise, the returned Tensor's dimension is `[time, channel]`.
        format (str or None, optional):
            Override the format detection with the given format.
            Providing the argument might help when libsox can not infer the format
            from header or extension,

    Returns:
        (Tensor, int): Resulting Tensor and sample rate.
        If ``normalize=True``, the resulting Tensor is always ``float32`` type.
        If ``normalize=False`` and the input audio file is of integer WAV file, then the
        resulting Tensor has corresponding integer type. (Note 24 bit integer type is not supported)
        If ``channels_first=True``, the resulting Tensor has dimension `[channel, time]`,
        otherwise `[time, channel]`.

    Example - Basic usage
        >>>
        >>> # Defines the effects to apply
        >>> effects = [
        ...     ['gain', '-n'],  # normalises to 0dB
        ...     ['pitch', '5'],  # 5 cent pitch shift
        ...     ['rate', '8000'],  # resample to 8000 Hz
        ... ]
        >>>
        >>> # Apply effects and load data with channels_first=True
        >>> waveform, sample_rate = apply_effects_file("data.wav", effects, channels_first=True)
        >>>
        >>> # Check the result
        >>> waveform.shape
        paddle.Size([2, 8000])
        >>> waveform
        tensor([[ 5.1151e-03,  1.8073e-02,  2.2188e-02,  ...,  1.0431e-07,
                 -1.4761e-07,  1.8114e-07],
                [-2.6924e-03,  2.1860e-03,  1.0650e-02,  ...,  6.4122e-07,
                 -5.6159e-07,  4.8103e-07]])
        >>> sample_rate
        8000

    Example - Apply random speed perturbation to dataset
        >>>
        >>> # Load data from file, apply random speed perturbation
        >>> class RandomPerturbationFile(paddle.utils.data.Dataset):
        ...     \"\"\"Given flist, apply random speed perturbation
        ...
        ...     Suppose all the input files are at least one second long.
        ...     \"\"\"
        ...     def __init__(self, flist: List[str], sample_rate: int):
        ...         super().__init__()
        ...         self.flist = flist
        ...         self.sample_rate = sample_rate
        ...
        ...     def __getitem__(self, index):
        ...         speed = 0.5 + 1.5 * random.randn()
        ...         effects = [
        ...             ['gain', '-n', '-10'],  # apply 10 db attenuation
        ...             ['remix', '-'],  # merge all the channels
        ...             ['speed', f'{speed:.5f}'],  # duration is now 0.5 ~ 2.0 seconds.
        ...             ['rate', f'{self.sample_rate}'],
        ...             ['pad', '0', '1.5'],  # add 1.5 seconds silence at the end
        ...             ['trim', '0', '2'],  # get the first 2 seconds
        ...         ]
        ...         waveform, _ = paddleaudio.sox_effects.apply_effects_file(
        ...             self.flist[index], effects)
        ...         return waveform
        ...
        ...     def __len__(self):
        ...         return len(self.flist)
        ...
        >>> dataset = RandomPerturbationFile(file_list, sample_rate=8000)
        >>> loader = paddle.utils.data.DataLoader(dataset, batch_size=32)
        >>> for batch in loader:
        >>>     pass
    """
    if hasattr(path, "read"):
        ret = paddleaudio._paddleaudio.apply_effects_fileobj(
            path, effects, normalize, channels_first, format)
        if ret is None:
            raise RuntimeError("Failed to load audio from {}".format(path))
        return (paddle.to_tensor(ret[0]), ret[1])
    path = os.fspath(path)
    ret = paddleaudio._paddleaudio.sox_effects_apply_effects_file(
        path, effects, normalize, channels_first, format)
    if ret is not None:
        return (paddle.to_tensor(ret[0]), ret[1])
    raise RuntimeError("Failed to load audio from {}".format(path))


================================================
FILE: audio/paddleaudio/src/CMakeLists.txt
================================================
if (MSVC)
  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()

if(APPLE)
set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
endif(APPLE)

################################################################################
# libpaddleaudio
################################################################################
set(
  LIBPADDLEAUDIO_SOURCES
  utils.cpp
  )

set(
  LIBPADDLEAUDIO_INCLUDE_DIRS
  ${PROJECT_SOURCE_DIR}
  )

set(
  LIBPADDLEAUDIO_LINK_LIBRARIES
  )

set(
  LIBPADDLEAUDIO_COMPILE_DEFINITIONS)

#------------------------------------------------------------------------------#
# START OF CUSTOMIZATION LOGICS
#------------------------------------------------------------------------------#

if(BUILD_SOX)
  list(
    APPEND
    LIBPADDLEAUDIO_LINK_LIBRARIES
    libsox
    )
  list(
    APPEND
    LIBPADDLEAUDIO_SOURCES
    )
  list(
    APPEND
    LIBPADDLEAUDIO_COMPILE_DEFINITIONS
    INCLUDE_SOX
    )
endif()


if(BUILD_KALDI)
  list(
    APPEND
    LIBPADDLEAUDIO_LINK_LIBRARIES
    kaldi-native-fbank-core
  )
  list(
    APPEND
    LIBPADDLEAUDIO_COMPILE_DEFINITIONS
    INCLUDE_KALDI
    COMPILE_WITHOUT_OPENFST
  )
endif()

#------------------------------------------------------------------------------#
# END OF CUSTOMIZATION LOGICS
#------------------------------------------------------------------------------#

function (define_library name source include_dirs link_libraries compile_defs)
  add_library(${name} SHARED ${source})
  target_include_directories(${name} PRIVATE ${include_dirs})
  target_link_libraries(${name} ${link_libraries})
  target_compile_definitions(${name} PRIVATE ${compile_defs})
  set_target_properties(${name} PROPERTIES PREFIX "")
  if (MSVC)
    set_target_properties(${name} PROPERTIES SUFFIX ".pyd")
  endif(MSVC)

  install(
    TARGETS ${name}
    LIBRARY DESTINATION lib
    RUNTIME DESTINATION lib  # For Windows
    )
endfunction()


define_library(
  libpaddleaudio
  "${LIBPADDLEAUDIO_SOURCES}"
  "${LIBPADDLEAUDIO_INCLUDE_DIRS}"
  "${LIBPADDLEAUDIO_LINK_LIBRARIES}"
  "${LIBPADDLEAUDIO_COMPILE_DEFINITIONS}"
)

if (APPLE)
  set(AUDIO_LIBRARY libpaddleaudio CACHE INTERNAL "")
else()
  set(AUDIO_LIBRARY -Wl,--no-as-needed libpaddleaudio -Wl,--as-needed CACHE INTERNAL "")
endif()

  ################################################################################
# _paddleaudio.so
################################################################################
if (BUILD_PADDLEAUDIO_PYTHON_EXTENSION)
if (WIN32)
  find_package(Python3 ${PYTHON_VERSION} EXACT COMPONENTS Development)
  set(ADDITIONAL_ITEMS Python3::Python)
endif()
function(define_extension name sources include_dirs libraries definitions)
  add_library(${name} SHARED ${sources})
  target_compile_definitions(${name} PRIVATE "${definitions}")
  target_include_directories(
    ${name} PRIVATE ${PROJECT_SOURCE_DIR} ${Python_INCLUDE_DIR} ${pybind11_INCLUDE_DIR} ${include_dirs})
  target_link_libraries(
    ${name}
    ${libraries}
    ${PYTHON_LIBRARY}
    ${ADDITIONAL_ITEMS}
    )
  set_target_properties(${name} PROPERTIES PREFIX "")
  if (MSVC)
    set_target_properties(${name} PROPERTIES SUFFIX ".pyd")
  endif(MSVC)
  if (APPLE)
    # https://github.com/facebookarchive/caffe2/issues/854#issuecomment-364538485
    # https://github.com/pytorch/pytorch/commit/73f6715f4725a0723d8171d3131e09ac7abf0666
    set_target_properties(${name} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
  endif()
  install(
    TARGETS ${name}
    LIBRARY DESTINATION .
    RUNTIME DESTINATION .  # For Windows
    )
endfunction()

set(
  EXTENSION_SOURCES
  pybind/pybind.cpp
  )
#----------------------------------------------------------------------------#
# START OF CUSTOMIZATION LOGICS
#----------------------------------------------------------------------------#
if(BUILD_SOX)
  list(
    APPEND
    EXTENSION_SOURCES
    pybind/sox/effects.cpp
    pybind/sox/effects_chain.cpp
    pybind/sox/io.cpp
    pybind/sox/types.cpp
    pybind/sox/utils.cpp
    )
endif()

if(BUILD_KALDI)
  list(
    APPEND
    EXTENSION_SOURCES
    pybind/kaldi/kaldi_feature_wrapper.cc
    pybind/kaldi/kaldi_feature.cc
    )
endif()
#----------------------------------------------------------------------------#
# END OF CUSTOMIZATION LOGICS
#----------------------------------------------------------------------------#
define_extension(
  _paddleaudio
  "${EXTENSION_SOURCES}"
  ""
  libpaddleaudio
  "${LIBPADDLEAUDIO_COMPILE_DEFINITIONS}"
  )
# if(BUILD_CTC_DECODER)
#   set(
#     DECODER_EXTENSION_SOURCES
#     decoder/bindings/pybind.cpp
#     )
#   define_extension(
#     _paddleaudio_decoder
#     "${DECODER_EXTENSION_SOURCES}"
#     ""
#     "libpaddleaudio_decoder"
#     "${LIBPADDLEAUDIO_DECODER_DEFINITIONS}"
#     )
# endif()
# if(USE_FFMPEG)
#   set(
#     FFMPEG_EXTENSION_SOURCES
#     ffmpeg/pybind/typedefs.cpp
#     ffmpeg/pybind/pybind.cpp
#     ffmpeg/pybind/stream_reader.cpp
#     )
#   define_extension(
#     _paddleaudio_ffmpeg
#     "${FFMPEG_EXTENSION_SOURCES}"
#     "${FFMPEG_INCLUDE_DIRS}"
#     "libpaddleaudio_ffmpeg"
#     "${LIBPADDLEAUDIO_DECODER_DEFINITIONS}"
#     )
# endif()
endif()


================================================
FILE: audio/paddleaudio/src/optional/COPYING
================================================
Creative Commons Legal Code

CC0 1.0 Universal

    CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
    LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
    ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
    INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
    REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
    PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
    THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
    HEREUNDER.

Statement of Purpose

The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").

Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.

For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.

1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:

  i. the right to reproduce, adapt, distribute, perform, display,
     communicate, and translate a Work;
 ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
     likeness depicted in a Work;
 iv. rights protecting against unfair competition in regards to a Work,
     subject to the limitations in paragraph 4(a), below;
  v. rights protecting the extraction, dissemination, use and reuse of data
     in a Work;
 vi. database rights (such as those arising under Directive 96/9/EC of the
     European Parliament and of the Council of 11 March 1996 on the legal
     protection of databases, and under any national implementation
     thereof, including any amended or successor version of such
     directive); and
vii. other similar, equivalent or corresponding rights throughout the
     world based on applicable law or treaty, and any national
     implementations thereof.

2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.

3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.

4. Limitations and Disclaimers.

 a. No trademark or patent rights held by Affirmer are waived, abandoned,
    surrendered, licensed or otherwise affected by this document.
 b. Affirmer offers the Work as-is and makes no representations or
    warranties of any kind concerning the Work, express, implied,
    statutory or otherwise, including without limitation warranties of
    title, merchantability, fitness for a particular purpose, non
    infringement, or the absence of latent or other defects, accuracy, or
    the present or absence of errors, whether or not discoverable, all to
    the greatest extent permissible under applicable law.
 c. Affirmer disclaims responsibility for clearing rights of other persons
    that may apply to the Work or any use thereof, including without
    limitation any person's Copyright and Related Rights in the Work.
    Further, Affirmer disclaims responsibility for obtaining any necessary
    consents, permissions or other rights required for any use of the
    Work.
 d. Affirmer understands and acknowledges that Creative Commons is not a
    party to this document and has no duty or obligation with respect to
    this CC0 or use of the Work.


================================================
FILE: audio/paddleaudio/src/optional/optional.hpp
================================================

///
// optional - An implementation of std::optional with extensions
// Written in 2017 by Sy Brand (tartanllama@gmail.com, @TartanLlama)
//
// Documentation available at https://tl.tartanllama.xyz/
//
// To the extent possible under law, the author(s) have dedicated all
// copyright and related and neighboring rights to this software to the
// public domain worldwide. This software is distributed without any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication
// along with this software. If not, see
// <http://creativecommons.org/publicdomain/zero/1.0/>.
// https://github.com/TartanLlama/optional
///

#ifndef TL_OPTIONAL_HPP
#define TL_OPTIONAL_HPP

#define TL_OPTIONAL_VERSION_MAJOR 1
#define TL_OPTIONAL_VERSION_MINOR 0
#define TL_OPTIONAL_VERSION_PATCH 0

#include <exception>
#include <functional>
#include <new>
#include <type_traits>
#include <utility>

#if (defined(_MSC_VER) && _MSC_VER == 1900)
#define TL_OPTIONAL_MSVC2015
#endif

#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \
     !defined(__clang__))
#define TL_OPTIONAL_GCC49
#endif

#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 4 && \
     !defined(__clang__))
#define TL_OPTIONAL_GCC54
#endif

#if (defined(__GNUC__) && __GNUC__ == 5 && __GNUC_MINOR__ <= 5 && \
     !defined(__clang__))
#define TL_OPTIONAL_GCC55
#endif

#if (defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ <= 9 && \
     !defined(__clang__))
// GCC < 5 doesn't support overloading on const&& for member functions
#define TL_OPTIONAL_NO_CONSTRR

// GCC < 5 doesn't support some standard C++11 type traits
#define TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \
    std::has_trivial_copy_constructor<T>::value
#define TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \
    std::has_trivial_copy_assign<T>::value

// This one will be different for GCC 5.7 if it's ever supported
#define TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T) \
    std::is_trivially_destructible<T>::value

// GCC 5 < v < 8 has a bug in is_trivially_copy_constructible which breaks
// std::vector
// for non-copyable types
#elif (defined(__GNUC__) && __GNUC__ < 8 && !defined(__clang__))
#ifndef TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX
#define TL_GCC_LESS_8_TRIVIALLY_COPY_CONSTRUCTIBLE_MUTEX
namespace tl {
namespace detail {
template <class T>
struct is_trivially_copy_constructible
    : std::is_trivially_copy_constructible<T> {};
#ifdef _GLIBCXX_VECTOR
template <class T, class A>
struct is_trivially_copy_constructible<std::vector<T, A>>
    : std::is_trivially_copy_constructible<T> {};
#endif
}
}
#endif

#define TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \
    tl::detail::is_trivially_copy_constructible<T>::value
#define TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \
    std::is_trivially_copy_assignable<T>::value
#define TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T) \
    std::is_trivially_destructible<T>::value
#else
#define TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \
    std::is_trivially_copy_constructible<T>::value
#define TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) \
    std::is_trivially_copy_assignable<T>::value
#define TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T) \
    std::is_trivially_destructible<T>::value
#endif

#if __cplusplus > 201103L
#define TL_OPTIONAL_CXX14
#endif

// constexpr implies const in C++11, not C++14
#if (__cplusplus == 201103L || defined(TL_OPTIONAL_MSVC2015) || \
     defined(TL_OPTIONAL_GCC49))
#define TL_OPTIONAL_11_CONSTEXPR
#else
#define TL_OPTIONAL_11_CONSTEXPR constexpr
#endif

namespace tl {
#ifndef TL_MONOSTATE_INPLACE_MUTEX
#define TL_MONOSTATE_INPLACE_MUTEX
/// Used to represent an optional with no data; essentially a bool
class monostate {};

///  A tag type to tell optional to construct its value in-place
struct in_place_t {
    explicit in_place_t() = default;
};
/// A tag to tell optional to construct its value in-place
static constexpr in_place_t in_place{};
#endif

template <class T>
class optional;

namespace detail {
#ifndef TL_TRAITS_MUTEX
#define TL_TRAITS_MUTEX
// C++14-style aliases for brevity
template <class T>
using remove_const_t = typename std::remove_const<T>::type;
template <class T>
using remove_reference_t = typename std::remove_reference<T>::type;
template <class T>
using decay_t = typename std::decay<T>::type;
template <bool E, class T = void>
using enable_if_t = typename std::enable_if<E, T>::type;
template <bool B, class T, class F>
using conditional_t = typename std::conditional<B, T, F>::type;

// std::conjunction from C++17
template <class...>
struct conjunction : std::true_type {};
template <class B>
struct conjunction<B> : B {};
template <class B, class... Bs>
struct conjunction<B, Bs...>
    : std::conditional<bool(B::value), conjunction<Bs...>, B>::type {};

#if defined(_LIBCPP_VERSION) && __cplusplus == 201103L
#define TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND
#endif

// In C++11 mode, there's an issue in libc++'s std::mem_fn
// which results in a hard-error when using it in a noexcept expression
// in some cases. This is a check to workaround the common failing case.
#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND
template <class T>
struct is_pointer_to_non_const_member_func : std::false_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...)>
    : std::true_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) &>
    : std::true_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) &&>
    : std::true_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile>
    : std::true_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile &>
    : std::true_type {};
template <class T, class Ret, class... Args>
struct is_pointer_to_non_const_member_func<Ret (T::*)(Args...) volatile &&>
    : std::true_type {};

template <class T>
struct is_const_or_const_ref : std::false_type {};
template <class T>
struct is_const_or_const_ref<T const &> : std::true_type {};
template <class T>
struct is_const_or_const_ref<T const> : std::true_type {};
#endif

// std::invoke from C++17
// https://stackoverflow.com/questions/38288042/c11-14-invoke-workaround
template <
    typename Fn,
    typename... Args,
#ifdef TL_TRAITS_LIBCXX_MEM_FN_WORKAROUND
    typename = enable_if_t<!(is_pointer_to_non_const_member_func<Fn>::value &&
                             is_const_or_const_ref<Args...>::value)>,
#endif
    typename = enable_if_t<std::is_member_pointer<decay_t<Fn>>::value>,
    int = 0>
constexpr auto invoke(Fn &&f, Args &&... args) noexcept(
    noexcept(std::mem_fn(f)(std::forward<Args>(args)...)))
    -> decltype(std::mem_fn(f)(std::forward<Args>(args)...)) {
    return std::mem_fn(f)(std::forward<Args>(args)...);
}

template <typename Fn,
          typename... Args,
          typename = enable_if_t<!std::is_member_pointer<decay_t<Fn>>::value>>
constexpr auto invoke(Fn &&f, Args &&... args) noexcept(
    noexcept(std::forward<Fn>(f)(std::forward<Args>(args)...)))
    -> decltype(std::forward<Fn>(f)(std::forward<Args>(args)...)) {
    return std::forward<Fn>(f)(std::forward<Args>(args)...);
}

// std::invoke_result from C++17
template <class F, class, class... Us>
struct invoke_result_impl;

template <class F, class... Us>
struct invoke_result_impl<
    F,
    decltype(detail::invoke(std::declval<F>(), std::declval<Us>()...), void()),
    Us...> {
    using type =
        decltype(detail::invoke(std::declval<F>(), std::declval<Us>()...));
};

template <class F, class... Us>
using invoke_result = invoke_result_impl<F, void, Us...>;

template <class F, class... Us>
using invoke_result_t = typename invoke_result<F, Us...>::type;

#if defined(_MSC_VER) && _MSC_VER <= 1900
// TODO make a version which works with MSVC 2015
template <class T, class U = T>
struct is_swappable : std::true_type {};

template <class T, class U = T>
struct is_nothrow_swappable : std::true_type {};
#else
// https://stackoverflow.com/questions/26744589/what-is-a-proper-way-to-implement-is-swappable-to-test-for-the-swappable-concept
namespace swap_adl_tests {
// if swap ADL finds this then it would call std::swap otherwise (same
// signature)
struct tag {};

template <class T>
tag swap(T &, T &);
template <class T, std::size_t N>
tag swap(T (&a)[N], T (&b)[N]);

// helper functions to test if an unqualified swap is possible, and if it
// becomes std::swap
template <class, class>
std::false_type can_swap(...) noexcept(false);
template <class T,
          class U,
          class = decltype(swap(std::declval<T &>(), std::declval<U &>()))>
std::true_type can_swap(int) noexcept(noexcept(swap(std::declval<T &>(),
                                                    std::declval<U &>())));

template <class, class>
std::false_type uses_std(...);
template <class T, class U>
std::is_same<decltype(swap(std::declval<T &>(), std::declval<U &>())), tag>
uses_std(int);

template <class T>
struct is_std_swap_noexcept
    : std::integral_constant<bool,
                             std::is_nothrow_move_constructible<T>::value &&
                                 std::is_nothrow_move_assignable<T>::value> {};

template <class T, std::size_t N>
struct is_std_swap_noexcept<T[N]> : is_std_swap_noexcept<T> {};

template <class T, class U>
struct is_adl_swap_noexcept
    : std::integral_constant<bool, noexcept(can_swap<T, U>(0))> {};
}  // namespace swap_adl_tests

template <class T, class U = T>
struct is_swappable
    : std::integral_constant<
          bool,
          decltype(detail::swap_adl_tests::can_swap<T, U>(0))::value &&
              (!decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value ||
               (std::is_move_assignable<T>::value &&
                std::is_move_constructible<T>::value))> {};

template <class T, std::size_t N>
struct is_swappable<T[N], T[N]>
    : std::integral_constant<
          bool,
          decltype(detail::swap_adl_tests::can_swap<T[N], T[N]>(0))::value &&
              (!decltype(
                   detail::swap_adl_tests::uses_std<T[N], T[N]>(0))::value ||
               is_swappable<T, T>::value)> {};

template <class T, class U = T>
struct is_nothrow_swappable
    : std::integral_constant<
          bool,
          is_swappable<T, U>::value &&
              ((decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value
                    &&detail::swap_adl_tests::is_std_swap_noexcept<T>::value) ||
               (!decltype(detail::swap_adl_tests::uses_std<T, U>(0))::value &&
                    detail::swap_adl_tests::is_adl_swap_noexcept<T,
                                                                 U>::value))> {
};
#endif
#endif

// std::void_t from C++17
template <class...>
struct voider {
    using type = void;
};
template <class... Ts>
using void_t = typename voider<Ts...>::type;

// Trait for checking if a type is a tl::optional
template <class T>
struct is_optional_impl : std::false_type {};
template <class T>
struct is_optional_impl<optional<T>> : std::true_type {};
template <class T>
using is_optional = is_optional_impl<decay_t<T>>;

// Change void to tl::monostate
template <class U>
using fixup_void = conditional_t<std::is_void<U>::value, monostate, U>;

template <class F, class U, class = invoke_result_t<F, U>>
using get_map_return = optional<fixup_void<invoke_result_t<F, U>>>;

// Check if invoking F for some Us returns void
template <class F, class = void, class... U>
struct returns_void_impl;
template <class F, class... U>
struct returns_void_impl<F, void_t<invoke_result_t<F, U...>>, U...>
    : std::is_void<invoke_result_t<F, U...>> {};
template <class F, class... U>
using returns_void = returns_void_impl<F, void, U...>;

template <class T, class... U>
using enable_if_ret_void = enable_if_t<returns_void<T &&, U...>::value>;

template <class T, class... U>
using disable_if_ret_void = enable_if_t<!returns_void<T &&, U...>::value>;

template <class T, class U>
using enable_forward_value =
    detail::enable_if_t<std::is_constructible<T, U &&>::value &&
                        !std::is_same<detail::decay_t<U>, in_place_t>::value &&
                        !std::is_same<optional<T>, detail::decay_t<U>>::value>;

template <class T, class U, class Other>
using enable_from_other = detail::enable_if_t<
    std::is_constructible<T, Other>::value &&
    !std::is_constructible<T, optional<U> &>::value &&
    !std::is_constructible<T, optional<U> &&>::value &&
    !std::is_constructible<T, const optional<U> &>::value &&
    !std::is_constructible<T, const optional<U> &&>::value &&
    !std::is_convertible<optional<U> &, T>::value &&
    !std::is_convertible<optional<U> &&, T>::value &&
    !std::is_convertible<const optional<U> &, T>::value &&
    !std::is_convertible<const optional<U> &&, T>::value>;

template <class T, class U>
using enable_assign_forward = detail::enable_if_t<
    !std::is_same<optional<T>, detail::decay_t<U>>::value &&
    !detail::conjunction<std::is_scalar<T>,
                         std::is_same<T, detail::decay_t<U>>>::value &&
    std::is_constructible<T, U>::value && std::is_assignable<T &, U>::value>;

template <class T, class U, class Other>
using enable_assign_from_other = detail::enable_if_t<
    std::is_constructible<T, Other>::value &&
    std::is_assignable<T &, Other>::value &&
    !std::is_constructible<T, optional<U> &>::value &&
    !std::is_constructible<T, optional<U> &&>::value &&
    !std::is_constructible<T, const optional<U> &>::value &&
    !std::is_constructible<T, const optional<U> &&>::value &&
    !std::is_convertible<optional<U> &, T>::value &&
    !std::is_convertible<optional<U> &&, T>::value &&
    !std::is_convertible<const optional<U> &, T>::value &&
    !std::is_convertible<const optional<U> &&, T>::value &&
    !std::is_assignable<T &, optional<U> &>::value &&
    !std::is_assignable<T &, optional<U> &&>::value &&
    !std::is_assignable<T &, const optional<U> &>::value &&
    !std::is_assignable<T &, const optional<U> &&>::value>;

// The storage base manages the actual storage, and correctly propagates
// trivial destruction from T. This case is for when T is not trivially
// destructible.
template <class T, bool = ::std::is_trivially_destructible<T>::value>
struct optional_storage_base {
    TL_OPTIONAL_11_CONSTEXPR optional_storage_base() noexcept
        : m_dummy(),
          m_has_value(false) {}

    template <class... U>
    TL_OPTIONAL_11_CONSTEXPR optional_storage_base(in_place_t, U &&... u)
        : m_value(std::forward<U>(u)...), m_has_value(true) {}

    ~optional_storage_base() {
        if (m_has_value) {
            m_value.~T();
            m_has_value = false;
        }
    }

    struct dummy {};
    union {
        dummy m_dummy;
        T m_value;
    };

    bool m_has_value;
};

// This case is for when T is trivially destructible.
template <class T>
struct optional_storage_base<T, true> {
    TL_OPTIONAL_11_CONSTEXPR optional_storage_base() noexcept
        : m_dummy(),
          m_has_value(false) {}

    template <class... U>
    TL_OPTIONAL_11_CONSTEXPR optional_storage_base(in_place_t, U &&... u)
        : m_value(std::forward<U>(u)...), m_has_value(true) {}

    // No destructor, so this class is trivially destructible

    struct dummy {};
    union {
        dummy m_dummy;
        T m_value;
    };

    bool m_has_value = false;
};

// This base class provides some handy member functions which can be used in
// further derived classes
template <class T>
struct optional_operations_base : optional_storage_base<T> {
    using optional_storage_base<T>::optional_storage_base;

    void hard_reset() noexcept {
        get().~T();
        this->m_has_value = false;
    }

    template <class... Args>
    void construct(Args &&... args) noexcept {
        new (std::addressof(this->m_value)) T(std::forward<Args>(args)...);
        this->m_has_value = true;
    }

    template <class Opt>
    void assign(Opt &&rhs) {
        if (this->has_value()) {
            if (rhs.has_value()) {
                this->m_value = std::forward<Opt>(rhs).get();
            } else {
                this->m_value.~T();
                this->m_has_value = false;
            }
        }

        else if (rhs.has_value()) {
            construct(std::forward<Opt>(rhs).get());
        }
    }

    bool has_value() const { return this->m_has_value; }

    TL_OPTIONAL_11_CONSTEXPR T &get() & { return this->m_value; }
    TL_OPTIONAL_11_CONSTEXPR const T &get() const & { return this->m_value; }
    TL_OPTIONAL_11_CONSTEXPR T &&get() && { return std::move(this->m_value); }
#ifndef TL_OPTIONAL_NO_CONSTRR
    constexpr const T &&get() const && { return std::move(this->m_value); }
#endif
};

// This class manages conditionally having a trivial copy constructor
// This specialization is for when T is trivially copy constructible
template <class T, bool = TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T)>
struct optional_copy_base : optional_operations_base<T> {
    using optional_operations_base<T>::optional_operations_base;
};

// This specialization is for when T is not trivially copy constructible
template <class T>
struct optional_copy_base<T, false> : optional_operations_base<T> {
    using optional_operations_base<T>::optional_operations_base;

    optional_copy_base() = default;
    optional_copy_base(const optional_copy_base &rhs)
        : optional_operations_base<T>() {
        if (rhs.has_value()) {
            this->construct(rhs.get());
        } else {
            this->m_has_value = false;
        }
    }

    optional_copy_base(optional_copy_base &&rhs) = default;
    optional_copy_base &operator=(const optional_copy_base &rhs) = default;
    optional_copy_base &operator=(optional_copy_base &&rhs) = default;
};

// This class manages conditionally having a trivial move constructor
// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it
// doesn't implement an analogue to std::is_trivially_move_constructible. We
// have to make do with a non-trivial move constructor even if T is trivially
// move constructible
#ifndef TL_OPTIONAL_GCC49
template <class T, bool = std::is_trivially_move_constructible<T>::value>
struct optional_move_base : optional_copy_base<T> {
    using optional_copy_base<T>::optional_copy_base;
};
#else
template <class T, bool = false>
struct optional_move_base;
#endif
template <class T>
struct optional_move_base<T, false> : optional_copy_base<T> {
    using optional_copy_base<T>::optional_copy_base;

    optional_move_base() = default;
    optional_move_base(const optional_move_base &rhs) = default;

    optional_move_base(optional_move_base &&rhs) noexcept(
        std::is_nothrow_move_constructible<T>::value) {
        if (rhs.has_value()) {
            this->construct(std::move(rhs.get()));
        } else {
            this->m_has_value = false;
        }
    }
    optional_move_base &operator=(const optional_move_base &rhs) = default;
    optional_move_base &operator=(optional_move_base &&rhs) = default;
};

// This class manages conditionally having a trivial copy assignment operator
template <class T,
          bool = TL_OPTIONAL_IS_TRIVIALLY_COPY_ASSIGNABLE(T) &&
                 TL_OPTIONAL_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) &&
                 TL_OPTIONAL_IS_TRIVIALLY_DESTRUCTIBLE(T)>
struct optional_copy_assign_base : optional_move_base<T> {
    using optional_move_base<T>::optional_move_base;
};

template <class T>
struct optional_copy_assign_base<T, false> : optional_move_base<T> {
    using optional_move_base<T>::optional_move_base;

    optional_copy_assign_base() = default;
    optional_copy_assign_base(const optional_copy_assign_base &rhs) = default;

    optional_copy_assign_base(optional_copy_assign_base &&rhs) = default;
    optional_copy_assign_base &operator=(const optional_copy_assign_base &rhs) {
        this->assign(rhs);
        return *this;
    }
    optional_copy_assign_base &operator=(optional_copy_assign_base &&rhs) =
        default;
};

// This class manages conditionally having a trivial move assignment operator
// Unfortunately there's no way to achieve this in GCC < 5 AFAIK, since it
// doesn't implement an analogue to std::is_trivially_move_assignable. We have
// to make do with a non-trivial move assignment operator even if T is trivially
// move assignable
#ifndef TL_OPTIONAL_GCC49
template <class T,
          bool = std::is_trivially_destructible<T>::value
              &&std::is_trivially_move_constructible<T>::value
                  &&std::is_trivially_move_assignable<T>::value>
struct optional_move_assign_base : optional_copy_assign_base<T> {
    using optional_copy_assign_base<T>::optional_copy_assign_base;
};
#else
template <class T, bool = false>
struct optional_move_assign_base;
#endif

template <class T>
struct optional_move_assign_base<T, false> : optional_copy_assign_base<T> {
    using optional_copy_assign_base<T>::optional_copy_assign_base;

    optional_move_assign_base() = default;
    optional_move_assign_base(const optional_move_assign_base &rhs) = default;

    optional_move_assign_base(optional_move_assign_base &&rhs) = default;

    optional_move_assign_base &operator=(const optional_move_assign_base &rhs) =
        default;

    optional_move_assign_base &
    operator=(optional_move_assign_base &&rhs) noexcept(
        std::is_nothrow_move_constructible<T>::value
            &&std::is_nothrow_move_assignable<T>::value) {
        this->assign(std::move(rhs));
        return *this;
    }
};

// optional_delete_ctor_base will conditionally delete copy and move
// constructors depending on whether T is copy/move constructible
template <class T,
          bool EnableCopy = std::is_copy_constructible<T>::value,
          bool EnableMove = std::is_move_constructible<T>::value>
struct optional_delete_ctor_base {
    optional_delete_ctor_base() = default;
    optional_delete_ctor_base(const optional_delete_ctor_base &) = default;
    optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = default;
    optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) =
        default;
    optional_delete_ctor_base &operator=(
        optional_delete_ctor_base &&) noexcept = default;
};

template <class T>
struct optional_delete_ctor_base<T, true, false> {
    optional_delete_ctor_base() = default;
    optional_delete_ctor_base(const optional_delete_ctor_base &) = default;
    optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = delete;
    optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) =
        default;
    optional_delete_ctor_base &operator=(
        optional_delete_ctor_base &&) noexcept = default;
};

template <class T>
struct optional_delete_ctor_base<T, false, true> {
    optional_delete_ctor_base() = default;
    optional_delete_ctor_base(const optional_delete_ctor_base &) = delete;
    optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = default;
    optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) =
        default;
    optional_delete_ctor_base &operator=(
        optional_delete_ctor_base &&) noexcept = default;
};

template <class T>
struct optional_delete_ctor_base<T, false, false> {
    optional_delete_ctor_base() = default;
    optional_delete_ctor_base(const optional_delete_ctor_base &) = delete;
    optional_delete_ctor_base(optional_delete_ctor_base &&) noexcept = delete;
    optional_delete_ctor_base &operator=(const optional_delete_ctor_base &) =
        default;
    optional_delete_ctor_base &operator=(
        optional_delete_ctor_base &&) noexcept = default;
};

// optional_delete_assign_base will conditionally delete copy and move
// constructors depending on whether T is copy/move constructible + assignable
template <class T,
          bool EnableCopy = (std::is_copy_constructible<T>::value &&
                             std::is_copy_assignable<T>::value),
          bool EnableMove = (std::is_move_constructible<T>::value &&
                             std::is_move_assignable<T>::value)>
struct optional_delete_assign_base {
    optional_delete_assign_base() = default;
    optional_delete_assign_base(const optional_delete_assign_base &) = default;
    optional_delete_assign_base(optional_delete_assign_base &&) noexcept =
        default;
    optional_delete_assign_base &operator=(
        const optional_delete_assign_base &) = default;
    optional_delete_assign_base &operator=(
        optional_delete_assign_base &&) noexcept = default;
};

template <class T>
struct optional_delete_assign_base<T, true, false> {
    optional_delete_assign_base() = default;
    optional_delete_assign_base(const optional_delete_assign_base &) = default;
    optional_delete_assign_base(optional_delete_assign_base &&) noexcept =
        default;
    optional_delete_assign_base &operator=(
        const optional_delete_assign_base &) = default;
    optional_delete_assign_base &operator=(
        optional_delete_assign_base &&) noexcept = delete;
};

template <class T>
struct optional_delete_assign_base<T, false, true> {
    optional_delete_assign_base() = default;
    optional_delete_assign_base(const optional_delete_assign_base &) = default;
    optional_delete_assign_base(optional_delete_assign_base &&) noexcept =
        default;
    optional_delete_assign_base &operator=(
        const optional_delete_assign_base &) = delete;
    optional_delete_assign_base &operator=(
        optional_delete_assign_base &&) noexcept = default;
};

template <class T>
struct optional_delete_assign_base<T, false, false> {
    optional_delete_assign_base() = default;
    optional_delete_assign_base(const optional_delete_assign_base &) = default;
    optional_delete_assign_base(optional_delete_assign_base &&) noexcept =
        default;
    optional_delete_assign_base &operator=(
        const optional_delete_assign_base &) = delete;
    optional_delete_assign_base &operator=(
        optional_delete_assign_base &&) noexcept = delete;
};

}  // namespace detail

/// A tag type to represent an empty optional
struct nullopt_t {
    struct do_not_use {};
    constexpr explicit nullopt_t(do_not_use, do_not_use) noexcept {}
};
/// Represents an empty optional
static constexpr nullopt_t nullopt{nullopt_t::do_not_use{},
                                   nullopt_t::do_not_use{}};

class bad_optional_access : public std::exception {
  public:
    bad_optional_access() = default;
    const char *what() const noexcept { return "Optional has no value"; }
};

/// An optional object is an object that contains the storage for another
/// object and manages the lifetime of this contained object, if any. The
/// contained object may be initialized after the optional object has been
/// initialized, and may be destroyed before the optional object has been
/// destroyed. The initialization state of the contained object is tracked by
/// the optional object.
template <class T>
class optional : private detail::optional_move_assign_base<T>,
                 private detail::optional_delete_ctor_base<T>,
                 private detail::optional_delete_assign_base<T> {
    using base = detail::optional_move_assign_base<T>;

    static_assert(!std::is_same<T, in_place_t>::value,
                  "instantiation of optional with in_place_t is ill-formed");
    static_assert(!std::is_same<detail::decay_t<T>, nullopt_t>::value,
                  "instantiation of optional with nullopt_t is ill-formed");

  public:
// The different versions for C++14 and 11 are needed because deduced return
// types are not SFINAE-safe. This provides better support for things like
// generic lambdas. C.f.
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0826r0.html
#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
    /// Carries out some operation which returns an optional on the stored
    /// object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) & {
        using result = detail::invoke_result_t<F, T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) && {
        using result = detail::invoke_result_t<F, T &&>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : result(nullopt);
    }

    template <class F>
    constexpr auto and_then(F &&f) const & {
        using result = detail::invoke_result_t<F, const T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F>
    constexpr auto and_then(F &&f) const && {
        using result = detail::invoke_result_t<F, const T &&>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : result(nullopt);
    }
#endif
#else
    /// Carries out some operation which returns an optional on the stored
    /// object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t<F, T &> and_then(F &&f) & {
        using result = detail::invoke_result_t<F, T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t<F, T &&> and_then(
        F &&f) && {
        using result = detail::invoke_result_t<F, T &&>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : result(nullopt);
    }

    template <class F>
    constexpr detail::invoke_result_t<F, const T &> and_then(F &&f) const & {
        using result = detail::invoke_result_t<F, const T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F>
    constexpr detail::invoke_result_t<F, const T &&> and_then(F &&f) const && {
        using result = detail::invoke_result_t<F, const T &&>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : result(nullopt);
    }
#endif
#endif

#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
    /// Carries out some operation on the stored object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) & {
        return optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) && {
        return optional_map_impl(std::move(*this), std::forward<F>(f));
    }

    template <class F>
    constexpr auto map(F &&f) const & {
        return optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    constexpr auto map(F &&f) const && {
        return optional_map_impl(std::move(*this), std::forward<F>(f));
    }
#else
    /// Carries out some operation on the stored object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR decltype(
        optional_map_impl(std::declval<optional &>(), std::declval<F &&>()))
    map(F &&f) & {
        return optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR decltype(
        optional_map_impl(std::declval<optional &&>(), std::declval<F &&>()))
    map(F &&f) && {
        return optional_map_impl(std::move(*this), std::forward<F>(f));
    }

    template <class F>
    constexpr decltype(optional_map_impl(std::declval<const optional &>(),
                                         std::declval<F &&>()))
    map(F &&f) const & {
        return optional_map_impl(*this, std::forward<F>(f));
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F>
    constexpr decltype(optional_map_impl(std::declval<const optional &&>(),
                                         std::declval<F &&>()))
    map(F &&f) const && {
        return optional_map_impl(std::move(*this), std::forward<F>(f));
    }
#endif
#endif

#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
    /// Carries out some operation on the stored object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) & {
        return optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) && {
        return optional_map_impl(std::move(*this), std::forward<F>(f));
    }

    template <class F>
    constexpr auto transform(F &&f) const & {
        return optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    constexpr auto transform(F &&f) const && {
        return optional_map_impl(std::move(*this), std::forward<F>(f));
    }
#else
    /// Carries out some operation on the stored object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR decltype(
        optional_map_impl(std::declval<optional &>(), std::declval<F &&>()))
    transform(F &&f) & {
        return optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR decltype(
        optional_map_impl(std::declval<optional &&>(), std::declval<F &&>()))
    transform(F &&f) && {
        return optional_map_impl(std::move(*this), std::forward<F>(f));
    }

    template <class F>
    constexpr decltype(optional_map_impl(std::declval<const optional &>(),
                                         std::declval<F &&>()))
    transform(F &&f) const & {
        return optional_map_impl(*this, std::forward<F>(f));
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F>
    constexpr decltype(optional_map_impl(std::declval<const optional &&>(),
                                         std::declval<F &&>()))
    transform(F &&f) const && {
        return optional_map_impl(std::move(*this), std::forward<F>(f));
    }
#endif
#endif

    /// Calls `f` if the optional is empty
    template <class F, detail::enable_if_ret_void<F> * = nullptr>
    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & {
        if (has_value()) return *this;

        std::forward<F>(f)();
        return nullopt;
    }

    template <class F, detail::disable_if_ret_void<F> * = nullptr>
    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & {
        return has_value() ? *this : std::forward<F>(f)();
    }

    template <class F, detail::enable_if_ret_void<F> * = nullptr>
    optional<T> or_else(F &&f) && {
        if (has_value()) return std::move(*this);

        std::forward<F>(f)();
        return nullopt;
    }

    template <class F, detail::disable_if_ret_void<F> * = nullptr>
    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) && {
        return has_value() ? std::move(*this) : std::forward<F>(f)();
    }

    template <class F, detail::enable_if_ret_void<F> * = nullptr>
    optional<T> or_else(F &&f) const & {
        if (has_value()) return *this;

        std::forward<F>(f)();
        return nullopt;
    }

    template <class F, detail::disable_if_ret_void<F> * = nullptr>
    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) const & {
        return has_value() ? *this : std::forward<F>(f)();
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F, detail::enable_if_ret_void<F> * = nullptr>
    optional<T> or_else(F &&f) const && {
        if (has_value()) return std::move(*this);

        std::forward<F>(f)();
        return nullopt;
    }

    template <class F, detail::disable_if_ret_void<F> * = nullptr>
    optional<T> or_else(F &&f) const && {
        return has_value() ? std::move(*this) : std::forward<F>(f)();
    }
#endif

    /// Maps the stored value with `f` if there is one, otherwise returns `u`.
    template <class F, class U>
    U map_or(F &&f, U &&u) & {
        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : std::forward<U>(u);
    }

    template <class F, class U>
    U map_or(F &&f, U &&u) && {
        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : std::forward<U>(u);
    }

    template <class F, class U>
    U map_or(F &&f, U &&u) const & {
        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : std::forward<U>(u);
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F, class U>
    U map_or(F &&f, U &&u) const && {
        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : std::forward<U>(u);
    }
#endif

    /// Maps the stored value with `f` if there is one, otherwise calls
    /// `u` and returns the result.
    template <class F, class U>
    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) & {
        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : std::forward<U>(u)();
    }

    template <class F, class U>
    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) && {
        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : std::forward<U>(u)();
    }

    template <class F, class U>
    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) const & {
        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : std::forward<U>(u)();
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F, class U>
    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) const && {
        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : std::forward<U>(u)();
    }
#endif

    /// Returns `u` if `*this` has a value, otherwise an empty optional.
    template <class U>
    constexpr optional<typename std::decay<U>::type> conjunction(U &&u) const {
        using result = optional<detail::decay_t<U>>;
        return has_value() ? result{u} : result{nullopt};
    }

    /// Returns `rhs` if `*this` is empty, otherwise the current value.
    TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) & {
        return has_value() ? *this : rhs;
    }

    constexpr optional disjunction(const optional &rhs) const & {
        return has_value() ? *this : rhs;
    }

    TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) && {
        return has_value() ? std::move(*this) : rhs;
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    constexpr optional disjunction(const optional &rhs) const && {
        return has_value() ? std::move(*this) : rhs;
    }
#endif

    TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) & {
        return has_value() ? *this : std::move(rhs);
    }

    constexpr optional disjunction(optional &&rhs) const & {
        return has_value() ? *this : std::move(rhs);
    }

    TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) && {
        return has_value() ? std::move(*this) : std::move(rhs);
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    constexpr optional disjunction(optional &&rhs) const && {
        return has_value() ? std::move(*this) : std::move(rhs);
    }
#endif

    /// Takes the value out of the optional, leaving it empty
    optional take() {
        optional ret = std::move(*this);
        reset();
        return ret;
    }

    using value_type = T;

    /// Constructs an optional that does not contain a value.
    constexpr optional() noexcept = default;

    constexpr optional(nullopt_t) noexcept {}

    /// Copy constructor
    ///
    /// If `rhs` contains a value, the stored value is direct-initialized with
    /// it. Otherwise, the constructed optional is empty.
    TL_OPTIONAL_11_CONSTEXPR optional(const optional &rhs) = default;

    /// Move constructor
    ///
    /// If `rhs` contains a value, the stored value is direct-initialized with
    /// it. Otherwise, the constructed optional is empty.
    TL_OPTIONAL_11_CONSTEXPR optional(optional &&rhs) = default;

    /// Constructs the stored value in-place using the given arguments.
    template <class... Args>
    constexpr explicit optional(
        detail::enable_if_t<std::is_constructible<T, Args...>::value,
                            in_place_t>,
        Args &&... args)
        : base(in_place, std::forward<Args>(args)...) {}

    template <class U, class... Args>
    TL_OPTIONAL_11_CONSTEXPR explicit optional(
        detail::enable_if_t<std::is_constructible<T,
                                                  std::initializer_list<U> &,
                                                  Args &&...>::value,
                            in_place_t>,
        std::initializer_list<U> il,
        Args &&... args) {
        this->construct(il, std::forward<Args>(args)...);
    }

    /// Constructs the stored value with `u`.
    template <
        class U = T,
        detail::enable_if_t<std::is_convertible<U &&, T>::value> * = nullptr,
        detail::enable_forward_value<T, U> * = nullptr>
    constexpr optional(U &&u) : base(in_place, std::forward<U>(u)) {}

    template <
        class U = T,
        detail::enable_if_t<!std::is_convertible<U &&, T>::value> * = nullptr,
        detail::enable_forward_value<T, U> * = nullptr>
    constexpr explicit optional(U &&u) : base(in_place, std::forward<U>(u)) {}

    /// Converting copy constructor.
    template <class U,
              detail::enable_from_other<T, U, const U &> * = nullptr,
              detail::enable_if_t<std::is_convertible<const U &, T>::value> * =
                  nullptr>
    optional(const optional<U> &rhs) {
        if (rhs.has_value()) {
            this->construct(*rhs);
        }
    }

    template <class U,
              detail::enable_from_other<T, U, const U &> * = nullptr,
              detail::enable_if_t<!std::is_convertible<const U &, T>::value> * =
                  nullptr>
    explicit optional(const optional<U> &rhs) {
        if (rhs.has_value()) {
            this->construct(*rhs);
        }
    }

    /// Converting move constructor.
    template <
        class U,
        detail::enable_from_other<T, U, U &&> * = nullptr,
        detail::enable_if_t<std::is_convertible<U &&, T>::value> * = nullptr>
    optional(optional<U> &&rhs) {
        if (rhs.has_value()) {
            this->construct(std::move(*rhs));
        }
    }

    template <
        class U,
        detail::enable_from_other<T, U, U &&> * = nullptr,
        detail::enable_if_t<!std::is_convertible<U &&, T>::value> * = nullptr>
    explicit optional(optional<U> &&rhs) {
        if (rhs.has_value()) {
            this->construct(std::move(*rhs));
        }
    }

    /// Destroys the stored value if there is one.
    ~optional() = default;

    /// Assignment to empty.
    ///
    /// Destroys the current value if there is one.
    optional &operator=(nullopt_t) noexcept {
        if (has_value()) {
            this->m_value.~T();
            this->m_has_value = false;
        }

        return *this;
    }

    /// Copy assignment.
    ///
    /// Copies the value from `rhs` if there is one. Otherwise resets the stored
    /// value in `*this`.
    optional &operator=(const optional &rhs) = default;

    /// Move assignment.
    ///
    /// Moves the value from `rhs` if there is one. Otherwise resets the stored
    /// value in `*this`.
    optional &operator=(optional &&rhs) = default;

    /// Assigns the stored value from `u`, destroying the old value if there was
    /// one.
    template <class U = T, detail::enable_assign_forward<T, U> * = nullptr>
    optional &operator=(U &&u) {
        if (has_value()) {
            this->m_value = std::forward<U>(u);
        } else {
            this->construct(std::forward<U>(u));
        }

        return *this;
    }

    /// Converting copy assignment operator.
    ///
    /// Copies the value from `rhs` if there is one. Otherwise resets the stored
    /// value in `*this`.
    template <class U,
              detail::enable_assign_from_other<T, U, const U &> * = nullptr>
    optional &operator=(const optional<U> &rhs) {
        if (has_value()) {
            if (rhs.has_value()) {
                this->m_value = *rhs;
            } else {
                this->hard_reset();
            }
        }

        if (rhs.has_value()) {
            this->construct(*rhs);
        }

        return *this;
    }

    // TODO check exception guarantee
    /// Converting move assignment operator.
    ///
    /// Moves the value from `rhs` if there is one. Otherwise resets the stored
    /// value in `*this`.
    template <class U, detail::enable_assign_from_other<T, U, U> * = nullptr>
    optional &operator=(optional<U> &&rhs) {
        if (has_value()) {
            if (rhs.has_value()) {
                this->m_value = std::move(*rhs);
            } else {
                this->hard_reset();
            }
        }

        if (rhs.has_value()) {
            this->construct(std::move(*rhs));
        }

        return *this;
    }

    /// Constructs the value in-place, destroying the current one if there is
    /// one.
    template <class... Args>
    T &emplace(Args &&... args) {
        static_assert(std::is_constructible<T, Args &&...>::value,
                      "T must be constructible with Args");

        *this = nullopt;
        this->construct(std::forward<Args>(args)...);
        return value();
    }

    template <class U, class... Args>
    detail::enable_if_t<
        std::is_constructible<T, std::initializer_list<U> &, Args &&...>::value,
        T &>
    emplace(std::initializer_list<U> il, Args &&... args) {
        *this = nullopt;
        this->construct(il, std::forward<Args>(args)...);
        return value();
    }

    /// Swaps this optional with the other.
    ///
    /// If neither optionals have a value, nothing happens.
    /// If both have a value, the values are swapped.
    /// If one has a value, it is moved to the other and the movee is left
    /// valueless.
    void swap(optional &rhs) noexcept(
        std::is_nothrow_move_constructible<T>::value
            &&detail::is_nothrow_swappable<T>::value) {
        using std::swap;
        if (has_value()) {
            if (rhs.has_value()) {
                swap(**this, *rhs);
            } else {
                new (std::addressof(rhs.m_value)) T(std::move(this->m_value));
                this->m_value.T::~T();
            }
        } else if (rhs.has_value()) {
            new (std::addressof(this->m_value)) T(std::move(rhs.m_value));
            rhs.m_value.T::~T();
        }
        swap(this->m_has_value, rhs.m_has_value);
    }

    /// Returns a pointer to the stored value
    constexpr const T *operator->() const {
        return std::addressof(this->m_value);
    }

    TL_OPTIONAL_11_CONSTEXPR T *operator->() {
        return std::addressof(this->m_value);
    }

    /// Returns the stored value
    TL_OPTIONAL_11_CONSTEXPR T &operator*() & { return this->m_value; }

    constexpr const T &operator*() const & { return this->m_value; }

    TL_OPTIONAL_11_CONSTEXPR T &&operator*() && {
        return std::move(this->m_value);
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    constexpr const T &&operator*() const && {
        return std::move(this->m_value);
    }
#endif

    /// Returns whether or not the optional has a value
    constexpr bool has_value() const noexcept { return this->m_has_value; }

    constexpr explicit operator bool() const noexcept {
        return this->m_has_value;
    }

    /// Returns the contained value if there is one, otherwise throws
    /// bad_optional_access
    TL_OPTIONAL_11_CONSTEXPR T &value() & {
        if (has_value()) return this->m_value;
        throw bad_optional_access();
    }
    TL_OPTIONAL_11_CONSTEXPR const T &value() const & {
        if (has_value()) return this->m_value;
        throw bad_optional_access();
    }
    TL_OPTIONAL_11_CONSTEXPR T &&value() && {
        if (has_value()) return std::move(this->m_value);
        throw bad_optional_access();
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    TL_OPTIONAL_11_CONSTEXPR const T &&value() const && {
        if (has_value()) return std::move(this->m_value);
        throw bad_optional_access();
    }
#endif

    /// Returns the stored value if there is one, otherwise returns `u`
    template <class U>
    constexpr T value_or(U &&u) const & {
        static_assert(std::is_copy_constructible<T>::value &&
                          std::is_convertible<U &&, T>::value,
                      "T must be copy constructible and convertible from U");
        return has_value() ? **this : static_cast<T>(std::forward<U>(u));
    }

    template <class U>
    TL_OPTIONAL_11_CONSTEXPR T value_or(U &&u) && {
        static_assert(std::is_move_constructible<T>::value &&
                          std::is_convertible<U &&, T>::value,
                      "T must be move constructible and convertible from U");
        return has_value() ? **this : static_cast<T>(std::forward<U>(u));
    }

    /// Destroys the stored value if one exists, making the optional empty
    void reset() noexcept {
        if (has_value()) {
            this->m_value.~T();
            this->m_has_value = false;
        }
    }
};  // namespace tl

/// Compares two optional objects
template <class T, class U>
inline constexpr bool operator==(const optional<T> &lhs,
                                 const optional<U> &rhs) {
    return lhs.has_value() == rhs.has_value() &&
           (!lhs.has_value() || *lhs == *rhs);
}
template <class T, class U>
inline constexpr bool operator!=(const optional<T> &lhs,
                                 const optional<U> &rhs) {
    return lhs.has_value() != rhs.has_value() ||
           (lhs.has_value() && *lhs != *rhs);
}
template <class T, class U>
inline constexpr bool operator<(const optional<T> &lhs,
                                const optional<U> &rhs) {
    return rhs.has_value() && (!lhs.has_value() || *lhs < *rhs);
}
template <class T, class U>
inline constexpr bool operator>(const optional<T> &lhs,
                                const optional<U> &rhs) {
    return lhs.has_value() && (!rhs.has_value() || *lhs > *rhs);
}
template <class T, class U>
inline constexpr bool operator<=(const optional<T> &lhs,
                                 const optional<U> &rhs) {
    return !lhs.has_value() || (rhs.has_value() && *lhs <= *rhs);
}
template <class T, class U>
inline constexpr bool operator>=(const optional<T> &lhs,
                                 const optional<U> &rhs) {
    return !rhs.has_value() || (lhs.has_value() && *lhs >= *rhs);
}

/// Compares an optional to a `nullopt`
template <class T>
inline constexpr bool operator==(const optional<T> &lhs, nullopt_t) noexcept {
    return !lhs.has_value();
}
template <class T>
inline constexpr bool operator==(nullopt_t, const optional<T> &rhs) noexcept {
    return !rhs.has_value();
}
template <class T>
inline constexpr bool operator!=(const optional<T> &lhs, nullopt_t) noexcept {
    return lhs.has_value();
}
template <class T>
inline constexpr bool operator!=(nullopt_t, const optional<T> &rhs) noexcept {
    return rhs.has_value();
}
template <class T>
inline constexpr bool operator<(const optional<T> &, nullopt_t) noexcept {
    return false;
}
template <class T>
inline constexpr bool operator<(nullopt_t, const optional<T> &rhs) noexcept {
    return rhs.has_value();
}
template <class T>
inline constexpr bool operator<=(const optional<T> &lhs, nullopt_t) noexcept {
    return !lhs.has_value();
}
template <class T>
inline constexpr bool operator<=(nullopt_t, const optional<T> &) noexcept {
    return true;
}
template <class T>
inline constexpr bool operator>(const optional<T> &lhs, nullopt_t) noexcept {
    return lhs.has_value();
}
template <class T>
inline constexpr bool operator>(nullopt_t, const optional<T> &) noexcept {
    return false;
}
template <class T>
inline constexpr bool operator>=(const optional<T> &, nullopt_t) noexcept {
    return true;
}
template <class T>
inline constexpr bool operator>=(nullopt_t, const optional<T> &rhs) noexcept {
    return !rhs.has_value();
}

/// Compares the optional with a value.
template <class T, class U>
inline constexpr bool operator==(const optional<T> &lhs, const U &rhs) {
    return lhs.has_value() ? *lhs == rhs : false;
}
template <class T, class U>
inline constexpr bool operator==(const U &lhs, const optional<T> &rhs) {
    return rhs.has_value() ? lhs == *rhs : false;
}
template <class T, class U>
inline constexpr bool operator!=(const optional<T> &lhs, const U &rhs) {
    return lhs.has_value() ? *lhs != rhs : true;
}
template <class T, class U>
inline constexpr bool operator!=(const U &lhs, const optional<T> &rhs) {
    return rhs.has_value() ? lhs != *rhs : true;
}
template <class T, class U>
inline constexpr bool operator<(const optional<T> &lhs, const U &rhs) {
    return lhs.has_value() ? *lhs < rhs : true;
}
template <class T, class U>
inline constexpr bool operator<(const U &lhs, const optional<T> &rhs) {
    return rhs.has_value() ? lhs < *rhs : false;
}
template <class T, class U>
inline constexpr bool operator<=(const optional<T> &lhs, const U &rhs) {
    return lhs.has_value() ? *lhs <= rhs : true;
}
template <class T, class U>
inline constexpr bool operator<=(const U &lhs, const optional<T> &rhs) {
    return rhs.has_value() ? lhs <= *rhs : false;
}
template <class T, class U>
inline constexpr bool operator>(const optional<T> &lhs, const U &rhs) {
    return lhs.has_value() ? *lhs > rhs : false;
}
template <class T, class U>
inline constexpr bool operator>(const U &lhs, const optional<T> &rhs) {
    return rhs.has_value() ? lhs > *rhs : true;
}
template <class T, class U>
inline constexpr bool operator>=(const optional<T> &lhs, const U &rhs) {
    return lhs.has_value() ? *lhs >= rhs : false;
}
template <class T, class U>
inline constexpr bool operator>=(const U &lhs, const optional<T> &rhs) {
    return rhs.has_value() ? lhs >= *rhs : true;
}

template <class T,
          detail::enable_if_t<std::is_move_constructible<T>::value> * = nullptr,
          detail::enable_if_t<detail::is_swappable<T>::value> * = nullptr>
void swap(optional<T> &lhs,
          optional<T> &rhs) noexcept(noexcept(lhs.swap(rhs))) {
    return lhs.swap(rhs);
}

namespace detail {
struct i_am_secret {};
}  // namespace detail

template <class T = detail::i_am_secret,
          class U,
          class Ret =
              detail::conditional_t<std::is_same<T, detail::i_am_secret>::value,
                                    detail::decay_t<U>,
                                    T>>
inline constexpr optional<Ret> make_optional(U &&v) {
    return optional<Ret>(std::forward<U>(v));
}

template <class T, class... Args>
inline constexpr optional<T> make_optional(Args &&... args) {
    return optional<T>(in_place, std::forward<Args>(args)...);
}
template <class T, class U, class... Args>
inline constexpr optional<T> make_optional(std::initializer_list<U> il,
                                           Args &&... args) {
    return optional<T>(in_place, il, std::forward<Args>(args)...);
}

#if __cplusplus >= 201703L
template <class T>
optional(T)->optional<T>;
#endif

/// \exclude
namespace detail {
#ifdef TL_OPTIONAL_CXX14
template <class Opt,
          class F,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              *std::declval<Opt>())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>
constexpr auto optional_map_impl(Opt &&opt, F &&f) {
    return opt.has_value()
               ? detail::invoke(std::forward<F>(f), *std::forward<Opt>(opt))
               : optional<Ret>(nullopt);
}

template <class Opt,
          class F,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              *std::declval<Opt>())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>
auto optional_map_impl(Opt &&opt, F &&f) {
    if (opt.has_value()) {
        detail::invoke(std::forward<F>(f), *std::forward<Opt>(opt));
        return make_optional(monostate{});
    }

    return optional<monostate>(nullopt);
}
#else
template <class Opt,
          class F,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              *std::declval<Opt>())),
          detail::enable_if_t<!std::is_void<Ret>::value> * = nullptr>

constexpr auto optional_map_impl(Opt &&opt, F &&f) -> optional<Ret> {
    return opt.has_value()
               ? detail::invoke(std::forward<F>(f), *std::forward<Opt>(opt))
               : optional<Ret>(nullopt);
}

template <class Opt,
          class F,
          class Ret = decltype(detail::invoke(std::declval<F>(),
                                              *std::declval<Opt>())),
          detail::enable_if_t<std::is_void<Ret>::value> * = nullptr>

auto optional_map_impl(Opt &&opt, F &&f) -> optional<monostate> {
    if (opt.has_value()) {
        detail::invoke(std::forward<F>(f), *std::forward<Opt>(opt));
        return monostate{};
    }

    return nullopt;
}
#endif
}  // namespace detail

/// Specialization for when `T` is a reference. `optional<T&>` acts similarly
/// to a `T*`, but provides more operations and shows intent more clearly.
template <class T>
class optional<T &> {
  public:
// The different versions for C++14 and 11 are needed because deduced return
// types are not SFINAE-safe. This provides better support for things like
// generic lambdas. C.f.
// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/p0826r0.html
#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)

    /// Carries out some operation which returns an optional on the stored
    /// object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) & {
        using result = detail::invoke_result_t<F, T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto and_then(F &&f) && {
        using result = detail::invoke_result_t<F, T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }

    template <class F>
    constexpr auto and_then(F &&f) const & {
        using result = detail::invoke_result_t<F, const T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F>
    constexpr auto and_then(F &&f) const && {
        using result = detail::invoke_result_t<F, const T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }
#endif
#else
    /// Carries out some operation which returns an optional on the stored
    /// object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t<F, T &> and_then(F &&f) & {
        using result = detail::invoke_result_t<F, T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR detail::invoke_result_t<F, T &> and_then(
        F &&f) && {
        using result = detail::invoke_result_t<F, T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }

    template <class F>
    constexpr detail::invoke_result_t<F, const T &> and_then(F &&f) const & {
        using result = detail::invoke_result_t<F, const T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F>
    constexpr detail::invoke_result_t<F, const T &> and_then(F &&f) const && {
        using result = detail::invoke_result_t<F, const T &>;
        static_assert(detail::is_optional<result>::value,
                      "F must return an optional");

        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : result(nullopt);
    }
#endif
#endif

#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
    /// Carries out some operation on the stored object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) & {
        return detail::optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto map(F &&f) && {
        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
    }

    template <class F>
    constexpr auto map(F &&f) const & {
        return detail::optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    constexpr auto map(F &&f) const && {
        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
    }
#else
    /// Carries out some operation on the stored object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl(
        std::declval<optional &>(), std::declval<F &&>()))
    map(F &&f) & {
        return detail::optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl(
        std::declval<optional &&>(), std::declval<F &&>()))
    map(F &&f) && {
        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
    }

    template <class F>
    constexpr decltype(detail::optional_map_impl(
        std::declval<const optional &>(), std::declval<F &&>()))
    map(F &&f) const & {
        return detail::optional_map_impl(*this, std::forward<F>(f));
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F>
    constexpr decltype(detail::optional_map_impl(
        std::declval<const optional &&>(), std::declval<F &&>()))
    map(F &&f) const && {
        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
    }
#endif
#endif

#if defined(TL_OPTIONAL_CXX14) && !defined(TL_OPTIONAL_GCC49) && \
    !defined(TL_OPTIONAL_GCC54) && !defined(TL_OPTIONAL_GCC55)
    /// Carries out some operation on the stored object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) & {
        return detail::optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    TL_OPTIONAL_11_CONSTEXPR auto transform(F &&f) && {
        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
    }

    template <class F>
    constexpr auto transform(F &&f) const & {
        return detail::optional_map_impl(*this, std::forward<F>(f));
    }

    template <class F>
    constexpr auto transform(F &&f) const && {
        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
    }
#else
    /// Carries out some operation on the stored object if there is one.
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl(
        std::declval<optional &>(), std::declval<F &&>()))
    transform(F &&f) & {
        return detail::optional_map_impl(*this, std::forward<F>(f));
    }

    /// \group map
    /// \synopsis template <class F> auto transform(F &&f) &&;
    template <class F>
    TL_OPTIONAL_11_CONSTEXPR decltype(detail::optional_map_impl(
        std::declval<optional &&>(), std::declval<F &&>()))
    transform(F &&f) && {
        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
    }

    template <class F>
    constexpr decltype(detail::optional_map_impl(
        std::declval<const optional &>(), std::declval<F &&>()))
    transform(F &&f) const & {
        return detail::optional_map_impl(*this, std::forward<F>(f));
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F>
    constexpr decltype(detail::optional_map_impl(
        std::declval<const optional &&>(), std::declval<F &&>()))
    transform(F &&f) const && {
        return detail::optional_map_impl(std::move(*this), std::forward<F>(f));
    }
#endif
#endif

    /// Calls `f` if the optional is empty
    template <class F, detail::enable_if_ret_void<F> * = nullptr>
    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & {
        if (has_value()) return *this;

        std::forward<F>(f)();
        return nullopt;
    }

    template <class F, detail::disable_if_ret_void<F> * = nullptr>
    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) & {
        return has_value() ? *this : std::forward<F>(f)();
    }

    template <class F, detail::enable_if_ret_void<F> * = nullptr>
    optional<T> or_else(F &&f) && {
        if (has_value()) return std::move(*this);

        std::forward<F>(f)();
        return nullopt;
    }

    template <class F, detail::disable_if_ret_void<F> * = nullptr>
    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) && {
        return has_value() ? std::move(*this) : std::forward<F>(f)();
    }

    template <class F, detail::enable_if_ret_void<F> * = nullptr>
    optional<T> or_else(F &&f) const & {
        if (has_value()) return *this;

        std::forward<F>(f)();
        return nullopt;
    }

    template <class F, detail::disable_if_ret_void<F> * = nullptr>
    optional<T> TL_OPTIONAL_11_CONSTEXPR or_else(F &&f) const & {
        return has_value() ? *this : std::forward<F>(f)();
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F, detail::enable_if_ret_void<F> * = nullptr>
    optional<T> or_else(F &&f) const && {
        if (has_value()) return std::move(*this);

        std::forward<F>(f)();
        return nullopt;
    }

    template <class F, detail::disable_if_ret_void<F> * = nullptr>
    optional<T> or_else(F &&f) const && {
        return has_value() ? std::move(*this) : std::forward<F>(f)();
    }
#endif

    /// Maps the stored value with `f` if there is one, otherwise returns `u`
    template <class F, class U>
    U map_or(F &&f, U &&u) & {
        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : std::forward<U>(u);
    }

    template <class F, class U>
    U map_or(F &&f, U &&u) && {
        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : std::forward<U>(u);
    }

    template <class F, class U>
    U map_or(F &&f, U &&u) const & {
        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : std::forward<U>(u);
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F, class U>
    U map_or(F &&f, U &&u) const && {
        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : std::forward<U>(u);
    }
#endif

    /// Maps the stored value with `f` if there is one, otherwise calls
    /// `u` and returns the result.
    template <class F, class U>
    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) & {
        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : std::forward<U>(u)();
    }

    template <class F, class U>
    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) && {
        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : std::forward<U>(u)();
    }

    template <class F, class U>
    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) const & {
        return has_value() ? detail::invoke(std::forward<F>(f), **this)
                           : std::forward<U>(u)();
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    template <class F, class U>
    detail::invoke_result_t<U> map_or_else(F &&f, U &&u) const && {
        return has_value()
                   ? detail::invoke(std::forward<F>(f), std::move(**this))
                   : std::forward<U>(u)();
    }
#endif

    /// Returns `u` if `*this` has a value, otherwise an empty optional.
    template <class U>
    constexpr optional<typename std::decay<U>::type> conjunction(U &&u) const {
        using result = optional<detail::decay_t<U>>;
        return has_value() ? result{u} : result{nullopt};
    }

    /// Returns `rhs` if `*this` is empty, otherwise the current value.
    TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) & {
        return has_value() ? *this : rhs;
    }

    constexpr optional disjunction(const optional &rhs) const & {
        return has_value() ? *this : rhs;
    }

    TL_OPTIONAL_11_CONSTEXPR optional disjunction(const optional &rhs) && {
        return has_value() ? std::move(*this) : rhs;
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    constexpr optional disjunction(const optional &rhs) const && {
        return has_value() ? std::move(*this) : rhs;
    }
#endif

    TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) & {
        return has_value() ? *this : std::move(rhs);
    }

    constexpr optional disjunction(optional &&rhs) const & {
        return has_value() ? *this : std::move(rhs);
    }

    TL_OPTIONAL_11_CONSTEXPR optional disjunction(optional &&rhs) && {
        return has_value() ? std::move(*this) : std::move(rhs);
    }

#ifndef TL_OPTIONAL_NO_CONSTRR
    constexpr optional disjunction(optional &&rhs) const && {
        return has_value() ? std::move(*this) : std::move(rhs);
    }
#endif

    /// Takes the value out of the optional, leaving it empty
    optional take() {
        optional ret = std::move(*this);
        reset();
        return ret;
    }

    using value_type = T &;

    /// Constructs an optional that does not contain a value.
    constexpr optional() noexcept : m_value(nullptr) {}

    constexpr optional(nullopt_t) noexcept : m_value(nullptr) {}

    /// Copy constructor
    ///
    /// If `rhs` contains a value, the stored value is direct-initialized with
    /// it. Otherwise, the constructed optional is empty.
    TL_OPTIONAL_11_CONSTEXPR optional(const optional &rhs) noexcept = default;

    /// Move constructor
    ///
    /// If `rhs` contains a value, the stored value is direct-initialized with
    /// it. Otherwise, the constructed optional is empty.
    TL_OPTIONAL_11_CONSTEXPR optional(optional &&rhs) = default;

    /// Constructs the stored value with `u`.
    template <class U = T,
              detail::enable_if_t<
                  !detail::is_optional<detail::decay_t<U>>::value> * = nullptr>
    constexpr optional(U &&u) noexcept : m_value(std::addressof(u)) {
        static_assert(std::is_lvalue_reference<U>::value,
                      "U must be an lvalue");
    }

    template <class U>
    constexpr explicit optional(const optional<U> &rhs) noexcept
        : optional(*rhs) {}

    /// No-op
    ~optional() = default;

    /// Assignment to empty.
    ///
    /// Destroys the current value if there is one.
    optional &operator=(nullopt_t) noexcept {
        m_value = nullptr;
        return *this;
    }

    /// Copy assignment.
    ///
    /// Rebinds this optional to the referee of `rhs` if there is one. Otherwise
    /// resets the stored value in `*this`.
    optional &operator=(const optional &rhs) = default;

    /// Rebinds this optional to `u`.
    template <class U = T,
              detail::enable_if_t<
                  !detail::is_optional<detail::decay_t<U>>::value> * = nullptr>
    optional &operator=(U &&u) {
        static_assert(std::is_lvalue_reference<U>::value,
                      "U must be an lvalue");
        m_value = std::addressof(u);
        return *this;
    }

    /// Converting copy assignment operator.
    ///
    /// Rebinds this optional to the referee of `rhs` if there is one. Otherwise
    /// resets the stored value in `*this`.
    template <class U>
    optional &operator=(const optional<U> &rhs) noexcept {
        m_value = std::addressof(rhs.value());
        return *this;
    }

    /// Rebinds this optional to `u`.
    template <class U = T,
              detail::enable_if_t<
                  !detail::is_optional<detail::decay_t<U>>::value> * = nullptr>
    optional &emplace(U &&u) noexcept {
        return *this = std::forward<U>(u);
    }

    void swap(optional &rhs) noexcept { std::swap(m_value, rhs.m_value); }

    /// Returns a pointer to the stored value
    constexpr const T *operator->() const noexcept { return m_value; }

    TL_OPTIONAL_11_CONSTEXPR T *operator->() noexcept { return m_value; }

    /// Returns the stored value
    TL_OPTIONAL_11_CONSTEXPR T &operator*() noexcept { return *m_value; }

    constexpr const T &operator*() const noexcept { return *m_value; }

    constexpr bool has_value() const noexcept { return m_value != nullptr; }

    constexpr explicit operator bool() const noexcept {
        return m_value != nullptr;
    }

    /// Returns the contained value if there is one, otherwise throws
    /// bad_optional_access
    TL_OPTIONAL_11_CONSTEXPR T &value() {
        if (has_value()) return *m_value;
        throw bad_optional_access();
    }
    TL_OPTIONAL_11_CONSTEXPR const T &value() const {
        if (has_value()) return *m_value;
        throw bad_optional_access();
    }

    /// Returns the stored value if there is one, otherwise returns `u`
    template <class U>
    constexpr T value_or(U &&u) const &noexcept {
        static_assert(std::is_copy_constructible<T>::value &&
                          std::is_convertible<U &&, T>::value,
                      "T must be copy constructible and convertible from U");
        return has_value() ? **this : static_cast<T>(std::forward<U>(u));
    }

    /// \group value_or
    template <class U>
        TL_OPTIONAL_11_CONSTEXPR T value_or(U &&u) && noexcept {
        static_assert(std::is_move_constructible<T>::value &&
                          std::is_convertible<U &&, T>::value,
                      "T must be move constructible and convertible from U");
        return has_value() ? **this : static_cast<T>(std::forward<U>(u));
    }

    /// Destroys the stored value if one exists, making the optional empty
    void reset() noexcept { m_value = nullptr; }

  private:
    T *m_value;
};  // namespace tl


}  // namespace tl

namespace std {
// TODO SFINAE
template <class T>
struct hash<tl::optional<T>> {
    ::std::size_t operator()(const tl::optional<T> &o) const {
        if (!o.has_value()) return 0;

        return std::hash<tl::detail::remove_const_t<T>>()(*o);
    }
};
}  // namespace std

#endif


================================================
FILE: audio/paddleaudio/src/pybind/kaldi/feature_common.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "pybind11/pybind11.h"
#include "pybind11/numpy.h"
#include "kaldi-native-fbank/csrc/feature-window.h"

namespace paddleaudio {
namespace kaldi {

namespace py = pybind11;

template <class F>
class StreamingFeatureTpl {
  public:
    typedef typename F::Options Options;
    StreamingFeatureTpl(const Options& opts);
    bool ComputeFeature(const std::vector<float>& wav,
                        std::vector<float>* feats);
    void Reset() { remained_wav_.resize(0); }

    int Dim() { return computer_.Dim(); }

  private:
    bool Compute(const std::vector<float>& waves,
                 std::vector<float>* feats);
    Options opts_;
    knf::FeatureWindowFunction window_function_;
    std::vector<float> remained_wav_;
    F computer_;
};

}  // namespace kaldi
}  // namespace ppspeech

#include "feature_common_inl.h"


================================================
FILE: audio/paddleaudio/src/pybind/kaldi/feature_common_inl.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


namespace paddleaudio {
namespace kaldi {

template <class F>
StreamingFeatureTpl<F>::StreamingFeatureTpl(const Options& opts)
    : opts_(opts), computer_(opts), window_function_(opts.frame_opts) {
    // window_function_(computer_.GetFrameOptions()) { the opt set to zero
}

template <class F>
bool StreamingFeatureTpl<F>::ComputeFeature(
    const std::vector<float>& wav,
    std::vector<float>* feats) {
    // append remained waves
    int wav_len = wav.size();
    if (wav_len == 0) return false;
    int left_len = remained_wav_.size();
    std::vector<float> waves(left_len + wav_len);
    std::memcpy(waves.data(),
                remained_wav_.data(),
                left_len * sizeof(float));
    std::memcpy(waves.data() + left_len,
                wav.data(),
                wav_len * sizeof(float));

    // cache remained waves
    knf::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
    int num_frames = knf::NumFrames(waves.size(), frame_opts);
    int frame_shift = frame_opts.WindowShift();
    int left_samples = waves.size() - frame_shift * num_frames;
    remained_wav_.resize(left_samples);
    std::memcpy(remained_wav_.data(),
                waves.data() + frame_shift * num_frames,
                left_samples * sizeof(float));

    // compute speech feature
    Compute(waves, feats);
    return true;
}

// Compute feat
template <class F>
bool StreamingFeatureTpl<F>::Compute(const std::vector<float>& waves,
                                     std::vector<float>* feats) {
    const knf::FrameExtractionOptions& frame_opts = computer_.GetFrameOptions();
    int num_samples = waves.size();
    int frame_length = frame_opts.WindowSize();
    int sample_rate = frame_opts.samp_freq;
    if (num_samples < frame_length) {
        return true;
    }

    int num_frames = knf::NumFrames(num_samples, frame_opts);
    feats->resize(num_frames * Dim());

    std::vector<float> window;
    bool need_raw_log_energy = computer_.NeedRawLogEnergy();
    for (int frame = 0; frame < num_frames; frame++) {
        std::fill(window.begin(), window.end(), 0);
        float raw_log_energy = 0.0;
        float vtln_warp = 1.0;
        knf::ExtractWindow(0,
                           waves,
                           frame,
                           frame_opts,
                           window_function_,
                           &window,
                           need_raw_log_energy ? &raw_log_energy : NULL);

        std::vector<float> this_feature(computer_.Dim());
        computer_.Compute(
            raw_log_energy, vtln_warp, &window, this_feature.data());
        std::memcpy(feats->data() + frame * Dim(),
                    this_feature.data(),
                    sizeof(float) * Dim());
    }
    return true;
}

}  // namespace kaldi
}  // namespace paddleaudio


================================================
FILE: audio/paddleaudio/src/pybind/kaldi/kaldi_feature.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddleaudio/src/pybind/kaldi/kaldi_feature.h"
//#include "feat/pitch-functions.h"

namespace paddleaudio {
namespace kaldi {

bool InitFbank(
    knf::FrameExtractionOptions frame_opts,
    knf::MelBanksOptions mel_opts,
    FbankOptions fbank_opts) {
    knf::FbankOptions opts;
    opts.frame_opts = frame_opts;
    opts.mel_opts = mel_opts;
    opts.use_energy = fbank_opts.use_energy;
    opts.energy_floor = fbank_opts.energy_floor;
    opts.raw_energy = fbank_opts.raw_energy;
    opts.htk_compat = fbank_opts.htk_compat;
    opts.use_log_fbank = fbank_opts.use_log_fbank;
    opts.use_power = fbank_opts.use_power;
    paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->InitFbank(opts);
    return true;
}

py::array_t<float> ComputeFbankStreaming(const py::array_t<float>& wav) {
    return paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ComputeFbank(
        wav);
}

py::array_t<float> ComputeFbank(
    knf::FrameExtractionOptions frame_opts,
    knf::MelBanksOptions mel_opts,
    FbankOptions fbank_opts,
    const py::array_t<float>& wav) {
    InitFbank(frame_opts, mel_opts, fbank_opts);
    py::array_t<float> result = ComputeFbankStreaming(wav);
    paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank();
    return result;
}

void ResetFbank() {
    paddleaudio::kaldi::KaldiFeatureWrapper::GetInstance()->ResetFbank();
}

//py::array_t<float> ComputeKaldiPitch(
  //const ::kaldi::PitchExtractionOptions& opts,
  //const py::array_t<float>& wav) {
    //py::buffer_info info = wav.request();
    //::kaldi::SubVector<::kaldi::BaseFloat> input_wav((float*)info.ptr, info.size);
   
    //::kaldi::Matrix<::kaldi::BaseFloat> features;
    //::kaldi::ComputeKaldiPitch(opts, input_wav, &features);
    //auto result = py::array_t<float>({features.NumRows(), features.NumCols()});
    //for (int row_idx = 0; row_idx < features.NumRows(); ++row_idx) {
        //std::memcpy(result.mutable_data(row_idx), features.Row(row_idx).Data(),
                    //sizeof(float)*features.NumCols());
    //}
   //return result;
//}

}  // namespace kaldi
}  // namespace paddleaudio


================================================
FILE: audio/paddleaudio/src/pybind/kaldi/kaldi_feature.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <string>

#include "paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.h"
//#include "feat/pitch-functions.h"

namespace py = pybind11;

namespace paddleaudio {
namespace kaldi {

struct FbankOptions{
  bool use_energy;  // append an extra dimension with energy to the filter banks
  float energy_floor;
  bool raw_energy;  // If true, compute energy before preemphasis and windowing
  bool htk_compat;  // If true, put energy last (if using energy)
  bool use_log_fbank;  // if true (default), produce log-filterbank, else linear
  bool use_power; 
  FbankOptions(): use_energy(false),
                 energy_floor(0.0),
                 raw_energy(true),
                 htk_compat(false),
                 use_log_fbank(true),
                 use_power(true) {}
};

bool InitFbank(
    knf::FrameExtractionOptions frame_opts,
    knf::MelBanksOptions mel_opts,
    FbankOptions fbank_opts);

py::array_t<float> ComputeFbank(
    knf::FrameExtractionOptions frame_opts,
    knf::MelBanksOptions mel_opts,
    FbankOptions fbank_opts,
    const py::array_t<float>& wav);

py::array_t<float> ComputeFbankStreaming(const py::array_t<float>& wav);

void ResetFbank();

//py::array_t<float> ComputeKaldiPitch(
    //const ::kaldi::PitchExtractionOptions& opts,
    //const py::array_t<float>& wav);

}  // namespace kaldi
}  // namespace paddleaudio


================================================
FILE: audio/paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.h"

namespace paddleaudio {
namespace kaldi {

KaldiFeatureWrapper* KaldiFeatureWrapper::GetInstance() {
    static KaldiFeatureWrapper instance;
    return &instance;
}

bool KaldiFeatureWrapper::InitFbank(knf::FbankOptions opts) {
    fbank_.reset(new Fbank(opts));
    return true;
}

py::array_t<float> KaldiFeatureWrapper::ComputeFbank(
    const py::array_t<float> wav) {
    py::buffer_info info = wav.request();
    std::vector<float> input_wav((float*)info.ptr, (float*)info.ptr + info.size);

    std::vector<float> feats;
    bool flag = fbank_->ComputeFeature(input_wav, &feats);
    if (flag == false || feats.size() == 0) return py::array_t<float>();
    auto result = py::array_t<float>(feats.size());
    py::buffer_info xs = result.request();
    float* res_ptr = (float*)xs.ptr;
    std::memcpy(res_ptr, feats.data(), sizeof(float)*feats.size());
    std::vector<int> shape{static_cast<int>(feats.size() / Dim()), 
                           static_cast<int>(Dim())};
    return result.reshape(shape);
}

}  // namespace kaldi
}  // namespace paddleaudio


================================================
FILE: audio/paddleaudio/src/pybind/kaldi/kaldi_feature_wrapper.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "paddleaudio/third_party/kaldi-native-fbank/csrc/feature-fbank.h"
#include "paddleaudio/src/pybind/kaldi/feature_common.h"

namespace paddleaudio {
namespace kaldi {

typedef StreamingFeatureTpl<knf::FbankComputer> Fbank;

class KaldiFeatureWrapper {
  public:
    static KaldiFeatureWrapper* GetInstance();
    bool InitFbank(knf::FbankOptions opts);
    py::array_t<float> ComputeFbank(const py::array_t<float> wav);
    int Dim() { return fbank_->Dim(); }
    void ResetFbank() { fbank_->Reset(); }

  private:
    std::unique_ptr<paddleaudio::kaldi::Fbank> fbank_;
};

}  // namespace kaldi
}  // namespace paddleaudio


================================================
FILE: audio/paddleaudio/src/pybind/pybind.cpp
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

#ifdef INCLUDE_KALDI
#include "paddleaudio/src/pybind/kaldi/kaldi_feature.h"
#include "paddleaudio/third_party/kaldi-native-fbank/csrc/feature-fbank.h"
#endif

#ifdef INCLUDE_SOX
#include "paddleaudio/src/pybind/sox/io.h"
#include "paddleaudio/src/pybind/sox/effects.h"
#endif

#include <pybind11/stl.h>
#include <pybind11/pybind11.h>

// `tl::optional` 
#ifdef INCLUDE_SOX
namespace pybind11 { namespace detail {
   template <typename T>
   struct type_caster<tl::optional<T>> : optional_caster<tl::optional<T>> {};
}}
#endif

PYBIND11_MODULE(_paddleaudio, m) {
#ifdef INCLUDE_SOX
    m.def("get_info_file",
          &paddleaudio::sox_io::get_info_file,
          "Get metadata of audio file.");
    // support obj later
    m.def("get_info_fileobj",
          &paddleaudio::sox_io::get_info_fileobj,
          "Get metadata of audio in file object.");
    m.def("load_audio_fileobj",
          &paddleaudio::sox_io::load_audio_fileobj,
          "Load audio from file object.");
    m.def("save_audio_fileobj",
          &paddleaudio::sox_io::save_audio_fileobj,
          "Save audio to file obj.");
          
    // sox io
     m.def("sox_io_get_info", &paddleaudio::sox_io::get_info_file);
     m.def(
         "sox_io_load_audio_file",
         &paddleaudio::sox_io::load_audio_file);
     m.def(
         "sox_io_save_audio_file",
         &paddleaudio::sox_io::save_audio_file);
    
     // sox utils
     m.def("sox_utils_set_seed", &paddleaudio::sox_utils::set_seed);
     m.def(
         "sox_utils_set_verbosity",
         &paddleaudio::sox_utils::set_verbosity);
     m.def(
         "sox_utils_set_use_threads",
         &paddleaudio::sox_utils::set_use_threads);
     m.def(
         "sox_utils_set_buffer_size",
         &paddleaudio::sox_utils::set_buffer_size);
     m.def(
         "sox_utils_list_effects",
         &paddleaudio::sox_utils::list_effects);
     m.def(
         "sox_utils_list_read_formats",
         &paddleaudio::sox_utils::list_read_formats);
     m.def(
         "sox_utils_list_write_formats",
         &paddleaudio::sox_utils::list_write_formats);
     m.def(
         "sox_utils_get_buffer_size",
         &paddleaudio::sox_utils::get_buffer_size);

     // effect
     m.def("apply_effects_fileobj",
           &paddleaudio::sox_effects::apply_effects_fileobj,
           "Decode audio data from file-like obj and apply effects.");
     m.def("sox_effects_initialize_sox_effects",
       &paddleaudio::sox_effects::initialize_sox_effects);
     m.def(
         "sox_effects_shutdown_sox_effects",
         &paddleaudio::sox_effects::shutdown_sox_effects);
     m.def(
         "sox_effects_apply_effects_tensor",
         &paddleaudio::sox_effects::apply_effects_tensor);
     m.def(
         "sox_effects_apply_effects_file",
         &paddleaudio::sox_effects::apply_effects_file);
#endif

#ifdef INCLUDE_KALDI
    m.def("ComputeFbank", &paddleaudio::kaldi::ComputeFbank, "compute fbank");
    //py::class_<kaldi::PitchExtractionOptions>(m, "PitchExtractionOptions")
        //.def(py::init<>())
        //.def_readwrite("samp_freq", &kaldi::PitchExtractionOptions::samp_freq)
        //.def_readwrite("frame_shift_ms", &kaldi::PitchExtractionOptions::frame_shift_ms)
        //.def_readwrite("frame_length_ms", &kaldi::PitchExtractionOptions::frame_length_ms)
        //.def_readwrite("preemph_coeff", &kaldi::PitchExtractionOptions::preemph_coeff)
        //.def_readwrite("min_f0", &kaldi::PitchExtractionOptions::min_f0)
        //.def_readwrite("max_f0", &kaldi::PitchExtractionOptions::max_f0)
        //.def_readwrite("soft_min_f0", &kaldi::PitchExtractionOptions::soft_min_f0)
        //.def_readwrite("penalty_factor", &kaldi::PitchExtractionOptions::penalty_factor)
        //.def_readwrite("lowpass_cutoff", &kaldi::PitchExtractionOptions::lowpass_cutoff)
        //.def_readwrite("resample_freq", &kaldi::PitchExtractionOptions::resample_freq)
        //.def_readwrite("delta_pitch", &kaldi::PitchExtractionOptions::delta_pitch)
        //.def_readwrite("nccf_ballast", &kaldi::PitchExtractionOptions::nccf_ballast)
        //.def_readwrite("lowpass_filter_width", &kaldi::PitchExtractionOptions::lowpass_filter_width)
        //.def_readwrite("upsample_filter_width", &kaldi::PitchExtractionOptions::upsample_filter_width)
        //.def_readwrite("max_frames_latency", &kaldi::PitchExtractionOptions::max_frames_latency)
        //.def_readwrite("frames_per_chunk", &kaldi::PitchExtractionOptions::frames_per_chunk)
        //.def_readwrite("simulate_first_pass_online", &kaldi::PitchExtractionOptions::simulate_first_pass_online)
        //.def_readwrite("recompute_frame", &kaldi::PitchExtractionOptions::recompute_frame)
        //.def_readwrite("nccf_ballast_online", &kaldi::PitchExtractionOptions::nccf_ballast_online)
        //.def_readwrite("snip_edges", &kaldi::PitchExtractionOptions::snip_edges);
    //m.def("ComputeKaldiPitch", &paddleaudio::kaldi::ComputeKaldiPitch, "compute kaldi pitch");
    py::class_<knf::FrameExtractionOptions>(m, "FrameExtractionOptions")
        .def(py::init<>())            
        .def_readwrite("samp_freq", &knf::FrameExtractionOptions::samp_freq)
        .def_readwrite("frame_shift_ms", &knf::FrameExtractionOptions::frame_shift_ms)            
        .def_readwrite("frame_length_ms", &knf::FrameExtractionOptions::frame_length_ms)
        .def_readwrite("dither", &knf::FrameExtractionOptions::dither)            
        .def_readwrite("preemph_coeff", &knf::FrameExtractionOptions::preemph_coeff)            
        .def_readwrite("remove_dc_offset", &knf::FrameExtractionOptions::remove_dc_offset)            
        .def_readwrite("window_type", &knf::FrameExtractionOptions::window_type)
        .def_readwrite("round_to_power_of_two", &knf::FrameExtractionOptions::round_to_power_of_two)           
        .def_readwrite("blackman_coeff", &knf::FrameExtractionOptions::blackman_coeff)          
        .def_readwrite("snip_edges", &knf::FrameExtractionOptions::snip_edges)
        .def_readwrite("max_feature_vectors", &knf::FrameExtractionOptions::max_feature_vectors);
    py::class_<knf::MelBanksOptions>(m, "MelBanksOptions")
        .def(py::init<>())
        .def_readwrite("num_bins", &knf::MelBanksOptions::num_bins)
        .def_readwrite("low_freq", &knf::MelBanksOptions::low_freq)
        .def_readwrite("high_freq", &knf::MelBanksOptions::high_freq)
        .def_readwrite("vtln_low", &knf::MelBanksOptions::vtln_low)
        .def_readwrite("vtln_high", &knf::MelBanksOptions::vtln_high)
        .def_readwrite("debug_mel", &knf::MelBanksOptions::debug_mel)
        .def_readwrite("htk_mode", &knf::MelBanksOptions::htk_mode);

    py::class_<paddleaudio::kaldi::FbankOptions>(m, "FbankOptions")
        .def(py::init<>())
        .def_readwrite("use_energy", &paddleaudio::kaldi::FbankOptions::use_energy)
        .def_readwrite("energy_floor", &paddleaudio::kaldi::FbankOptions::energy_floor)
        .def_readwrite("raw_energy", &paddleaudio::kaldi::FbankOptions::raw_energy)
        .def_readwrite("htk_compat", &paddleaudio::kaldi::FbankOptions::htk_compat)
        .def_readwrite("use_log_fbank", &paddleaudio::kaldi::FbankOptions::use_log_fbank)
        .def_readwrite("use_power", &paddleaudio::kaldi::FbankOptions::use_power);
#endif

}


================================================
FILE: audio/paddleaudio/src/pybind/sox/effects.cpp
================================================
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/effects.cpp  with modification.

#include <mutex>
#include <sox.h>

#include "paddleaudio/src/pybind/sox/effects.h"
#include "paddleaudio/src/pybind/sox/effects_chain.h"
#include "paddleaudio/src/pybind/sox/utils.h"

using namespace paddleaudio::sox_utils;

namespace paddleaudio::sox_effects {

// Streaming decoding over file-like object is tricky because libsox operates on
// FILE pointer. The following is what `sox` and `play` commands do
//  - file input -> FILE pointer
//  - URL input -> call wget in subprocess and pipe the data -> FILE pointer
//  - stdin -> FILE pointer
//
// We want to, instead, fetch byte strings chunk by chunk, consume them, and
// discard.
//
// Here is the approach
// 1. Initialize sox_format_t using sox_open_mem_read, providing the initial
// chunk of byte string
//    This will perform header-based format detection, if necessary, then fill
//    the metadata of sox_format_t. Internally, sox_open_mem_read uses fmemopen,
//    which returns FILE* which points the buffer of the provided byte string.
// 2. Each time sox reads a chunk from the FILE*, we update the underlying
// buffer in a way that it
//    starts with unseen data, and append the new data read from the given
//    fileobj. This will trick libsox as if it keeps reading from the FILE*
//    continuously.
// For Step 2. see `fileobj_input_drain` function in effects_chain.cpp
auto apply_effects_fileobj(
    py::object fileobj,
    const std::vector<std::vector<std::string>>& effects,
    tl::optional<bool> normalize,
    tl::optional<bool> channels_first,
    tl::optional<std::string> format)
    -> tl::optional<std::tuple<py::array, int64_t>> {
  // Prepare the buffer used throughout the lifecycle of SoxEffectChain.
  //
  // For certain format (such as FLAC), libsox keeps reading the content at
  // the initialization unless it reaches EOF even when the header is properly
  // parsed. (Making buffer size 8192, which is way bigger than the header,
  // resulted in libsox consuming all the buffer content at the time it opens
  // the file.) Therefore buffer has to always contain valid data, except after
  // EOF. We default to `sox_get_globals()->bufsiz`* for buffer size and we
  // first check if there is enough data to fill the buffer. `read_fileobj`
  // repeatedly calls `read`  method until it receives the requested length of
  // bytes or it reaches EOF. If we get bytes shorter than requested, that means
  // the whole audio data are fetched.
  //
  // * This can be changed with `paddleaudio.utils.sox_utils.set_buffer_size`.
  const auto capacity = [&]() {
    // NOTE:
    // Use the abstraction provided by `libpaddleaudio` to access the global
    // config defined by libsox. Directly using `sox_get_globals` function will
    // end up retrieving the static variable defined in `_paddleaudio`, which is
    // not correct.
    const auto bufsiz = get_buffer_size();
    const int64_t kDefaultCapacityInBytes = 256;
    return (bufsiz > kDefaultCapacityInBytes) ? bufsiz
                                              : kDefaultCapacityInBytes;
  }();
  std::string buffer(capacity, '\0');
  auto* in_buf = const_cast<char*>(buffer.data());
  auto num_read = read_fileobj(&fileobj, capacity, in_buf);
  // If the file is shorter than 256, then libsox cannot read the header.
  auto in_buffer_size = (num_read > 256) ? num_read : 256;

  // Open file (this starts reading the header)
  // When opening a file there are two functions that can touches FILE*.
  // * `auto_detect_format`
  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L43
  // * `startread` handler of detected format.
  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/formats.c#L574
  // To see the handler of a particular format, go to
  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/<FORMAT>.c
  // For example, voribs can be found
  //   https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/vorbis.c#L97-L158
  SoxFormat sf(sox_open_mem_read(
      in_buf,
      in_buffer_size,
      /*signal=*/nullptr,
      /*encoding=*/nullptr,
      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));

  // In case of streamed data, length can be 0
  if (static_cast<sox_format_t*>(sf) == nullptr ||
      sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
    return {};
  }

  // Prepare output buffer
  std::vector<sox_sample_t> out_buffer;
  out_buffer.reserve(sf->signal.length);

  // Create and run SoxEffectsChain
  const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);
  paddleaudio::sox_effects_chain::SoxEffectsChainPyBind chain(
      /*input_encoding=*/sf->encoding,
      /*output_encoding=*/get_tensor_encodinginfo(dtype));
  chain.addInputFileObj(sf, in_buf, in_buffer_size, &fileobj);
  for (const auto& effect : effects) {
    chain.addEffect(effect);
  }
  chain.addOutputBuffer(&out_buffer);
  chain.run();

  // Create tensor from buffer
  bool channels_first_ = channels_first.value_or(true);
  auto tensor = convert_to_tensor(
      /*buffer=*/out_buffer.data(),
      /*num_samples=*/out_buffer.size(),
      /*num_channels=*/chain.getOutputNumChannels(),
      dtype,
      normalize.value_or(true),
      channels_first_);

  return std::forward_as_tuple(
      tensor, static_cast<int64_t>(chain.getOutputSampleRate()));
}

namespace {

enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown };
SoxEffectsResourceState SOX_RESOURCE_STATE = NotInitialized;
std::mutex SOX_RESOURCE_STATE_MUTEX;

} // namespace

void initialize_sox_effects() {
  const std::lock_guard<std::mutex> lock(SOX_RESOURCE_STATE_MUTEX);

  switch (SOX_RESOURCE_STATE) {
    case NotInitialized:
      if (sox_init() != SOX_SUCCESS) {
        throw std::runtime_error("Failed to initialize sox effects.");
      };
      SOX_RESOURCE_STATE = Initialized;
      break;
    case Initialized:
      break;
    case ShutDown:
      throw std::runtime_error(
          "SoX Effects has been shut down. Cannot initialize again.");
  }
};

void shutdown_sox_effects() {
  const std::lock_guard<std::mutex> lock(SOX_RESOURCE_STATE_MUTEX);

  switch (SOX_RESOURCE_STATE) {
    case NotInitialized:
      throw std::runtime_error(
          "SoX Effects is not initialized. Cannot shutdown.");
    case Initialized:
      if (sox_quit() != SOX_SUCCESS) {
        throw std::runtime_error("Failed to initialize sox effects.");
      };
      SOX_RESOURCE_STATE = ShutDown;
      break;
    case ShutDown:
      break;
  }
}

auto apply_effects_tensor(
    py::array waveform,
    int64_t sample_rate,
    const std::vector<std::vector<std::string>>& effects,
    bool channels_first) -> std::tuple<py::array, int64_t> {
  validate_input_tensor(waveform);

  // Create SoxEffectsChain
  const auto dtype = waveform.dtype();
  paddleaudio::sox_effects_chain::SoxEffectsChain chain(
      /*input_encoding=*/get_tensor_encodinginfo(dtype),
      /*output_encoding=*/get_tensor_encodinginfo(dtype));

  // Prepare output buffer
  std::vector<sox_sample_t> out_buffer;
  out_buffer.reserve(waveform.size());

  // Build and run effects chain
  chain.addInputTensor(&waveform, sample_rate, channels_first);
  for (const auto& effect : effects) {
    chain.addEffect(effect);
  }
  chain.addOutputBuffer(&out_buffer);
  chain.run();

  // Create tensor from buffer
  auto out_tensor = convert_to_tensor(
      /*buffer=*/out_buffer.data(),
      /*num_samples=*/out_buffer.size(),
      /*num_channels=*/chain.getOutputNumChannels(),
      dtype,
      /*normalize=*/false,
      channels_first);

  return std::tuple<py::array, int64_t>(
      out_tensor, chain.getOutputSampleRate());
}

auto apply_effects_file(
    const std::string& path,
    const std::vector<std::vector<std::string>>& effects,
    tl::optional<bool> normalize,
    tl::optional<bool> channels_first,
    const tl::optional<std::string>& format)
    -> tl::optional<std::tuple<py::array, int64_t>> {
  // Open input file
  SoxFormat sf(sox_open_read(
      path.c_str(),
      /*signal=*/nullptr,
      /*encoding=*/nullptr,
      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));

  if (static_cast<sox_format_t*>(sf) == nullptr ||
      sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
    return {};
  }

  const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);

  // Prepare output
  std::vector<sox_sample_t> out_buffer;
  out_buffer.reserve(sf->signal.length);

  // Create and run SoxEffectsChain
  paddleaudio::sox_effects_chain::SoxEffectsChain chain(
      /*input_encoding=*/sf->encoding,
      /*output_encoding=*/get_tensor_encodinginfo(dtype));

  chain.addInputFile(sf);
  for (const auto& effect : effects) {
    chain.addEffect(effect);
  }
  chain.addOutputBuffer(&out_buffer);
  chain.run();

  // Create tensor from buffer
  bool channels_first_ = channels_first.value_or(true);
  auto tensor = convert_to_tensor(
      /*buffer=*/out_buffer.data(),
      /*num_samples=*/out_buffer.size(),
      /*num_channels=*/chain.getOutputNumChannels(),
      dtype,
      normalize.value_or(true),
      channels_first_);

  return std::tuple<py::array, int64_t>(
      tensor, chain.getOutputSampleRate());
}

} // namespace paddleaudio::sox_effects


================================================
FILE: audio/paddleaudio/src/pybind/sox/effects.h
================================================
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/effects.h  with modification.
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>

#include "paddleaudio/src/optional/optional.hpp"

namespace py = pybind11;

namespace paddleaudio::sox_effects {

auto apply_effects_fileobj(
    py::object fileobj,
    const std::vector<std::vector<std::string>>& effects,
    tl::optional<bool> normalize,
    tl::optional<bool> channels_first,
    tl::optional<std::string> format)
    -> tl::optional<std::tuple<py::array, int64_t>>;

void initialize_sox_effects();

void shutdown_sox_effects();

auto apply_effects_tensor(
    py::array waveform,
    int64_t sample_rate,
    const std::vector<std::vector<std::string>>& effects,
    bool channels_first) -> std::tuple<py::array, int64_t>;

auto apply_effects_file(
    const std::string& path,
    const std::vector<std::vector<std::string>>& effects,
    tl::optional<bool> normalize,
    tl::optional<bool> channels_first,
    const tl::optional<std::string>& format)
    -> tl::optional<std::tuple<py::array, int64_t>>;

} // namespace paddleaudio::sox_effects


================================================
FILE: audio/paddleaudio/src/pybind/sox/effects_chain.cpp
================================================
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/effects_chain.cpp with modification.

#include <sox.h>
#include <iostream>
#include <vector>
#include "paddleaudio/src/pybind/sox/effects_chain.h"
#include "paddleaudio/src/pybind/sox/utils.h"

using namespace paddleaudio::sox_utils;

namespace paddleaudio::sox_effects_chain {

namespace {

/// helper classes for passing the location of input tensor and output buffer
///
/// drain/flow callback functions require plain C style function signature and
/// the way to pass extra data is to attach data to sox_effect_t::priv pointer.
/// The following structs will be assigned to sox_effect_t::priv pointer which
/// gives sox_effect_t an access to input Tensor and output buffer object.
struct TensorInputPriv {
  size_t index;
  py::array* waveform;
  int64_t sample_rate;
  bool channels_first;
};

struct TensorOutputPriv {
  std::vector<sox_sample_t>* buffer;
};
struct FileOutputPriv {
  sox_format_t* sf;
};

/// Callback function to feed Tensor data to SoxEffectChain.
int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
  // Retrieve the input Tensor and current index
  auto priv = static_cast<TensorInputPriv*>(effp->priv);
  auto index = priv->index;
  auto tensor = *(priv->waveform);
  auto num_channels = effp->out_signal.channels;

  // Adjust the number of samples to read
  const size_t num_samples = tensor.size();
  if (index + *osamp > num_samples) {
    *osamp = num_samples - index;
  }

  // Ensure that it's a multiple of the number of channels
  *osamp -= *osamp % num_channels;

  // Slice the input Tensor
  // refactor this module, chunk
  auto i_frame = index / num_channels;
  auto num_frames = *osamp / num_channels;

  std::vector<int> chunk(num_frames*num_channels);
  py::buffer_info ori_info = tensor.request();
  void* ptr = ori_info.ptr;
  // Convert to sox_sample_t (int32_t)
  switch (tensor.dtype().num()) {
    //case c10::ScalarType::Float: {
    case 11: {
      // Need to convert to 64-bit precision so that
      // values around INT32_MIN/MAX are handled correctly.
      for (int idx = 0; idx < chunk.size(); ++idx) {
        int frame_idx = (idx + index) / num_channels;
        int channels_idx = (idx + index) % num_channels;
        double elem = 0; 
        if (priv->channels_first) {
          elem = *(float*)tensor.data(channels_idx, frame_idx);
        } else {
          elem = *(float*)tensor.data(frame_idx, channels_idx);
        } 
        elem = elem * 2147483648.;
        // *new_ptr = std::clamp(elem, INT32_MIN, INT32_MAX);
        if (elem > INT32_MAX) { 
          chunk[idx] = INT32_MAX; 
        } else if (elem < INT32_MIN) {
          chunk[idx] = INT32_MIN; 
        } else { 
          chunk[idx] = elem;
        }
      }
      break;
    }
    //case c10::ScalarType::Int: {
    case 5: {
      for (int idx = 0; idx < chunk.size(); ++idx) {
        int frame_idx = (idx + index) / num_channels;
        int channels_idx = (idx + index) % num_channels;
        int elem = 0;
        if (priv->channels_first) {
          elem = *(int*)tensor.data(channels_idx, frame_idx);
        } else {
          elem = *(int*)tensor.data(frame_idx, channels_idx);
        }
        chunk[idx] = elem;
      }
      break;
    }
    // case short
    case 3: {
      for (int idx = 0; idx < chunk.size(); ++idx) {
        int frame_idx = (idx + index) / num_channels;
        int channels_idx = (idx + index) % num_channels;
        int16_t elem = 0;
        if (priv->channels_first) {
          elem = *(int16_t*)tensor.data(channels_idx, frame_idx);
        } else {
          elem = *(int16_t*)tensor.data(frame_idx, channels_idx);
        }
        chunk[idx] = elem * 65536;
      }
      break;
    }
    // case byte
    case 1: {
      for (int idx = 0; idx < chunk.size(); ++idx) {
        int frame_idx = (idx + index) / num_channels;
        int channels_idx = (idx + index) % num_channels;
        int8_t elem = 0;
        if (priv->channels_first) {
          elem = *(int8_t*)tensor.data(channels_idx, frame_idx);
        } else {
          elem = *(int8_t*)tensor.data(frame_idx, channels_idx);
        }
        chunk[idx] = (elem - 128) * 16777216; 
      }
      break;
    }
    default:
      throw std::runtime_error("Unexpected dtype.");
  }
  // Write to buffer
  memcpy(obuf, chunk.data(), *osamp * 4);
  priv->index += *osamp;
  return (priv->index == num_samples) ? SOX_EOF : SOX_SUCCESS;
}

/// Callback function to fetch data from SoxEffectChain.
int tensor_output_flow(
    sox_effect_t* effp,
    sox_sample_t const* ibuf,
    sox_sample_t* obuf LSX_UNUSED,
    size_t* isamp,
    size_t* osamp) {
  *osamp = 0;
  // Get output buffer
  auto out_buffer = static_cast<TensorOutputPriv*>(effp->priv)->buffer;
  // Append at the end
  out_buffer->insert(out_buffer->end(), ibuf, ibuf + *isamp);
  return SOX_SUCCESS;
}

int file_output_flow(
    sox_effect_t* effp,
    sox_sample_t const* ibuf,
    sox_sample_t* obuf LSX_UNUSED,
    size_t* isamp,
    size_t* osamp) {
  *osamp = 0;
  if (*isamp) {
    auto sf = static_cast<FileOutputPriv*>(effp->priv)->sf;
    if (sox_write(sf, ibuf, *isamp) != *isamp) {
      if (sf->sox_errno) {
        std::ostringstream stream;
        stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " "
               << sf->filename;
        throw std::runtime_error(stream.str());
      }
      return SOX_EOF;
    }
  }
  return SOX_SUCCESS;
}

sox_effect_handler_t* get_tensor_input_handler() {
  static sox_effect_handler_t handler{
      /*name=*/"input_tensor",
      /*usage=*/NULL,
      /*flags=*/SOX_EFF_MCHAN,
      /*getopts=*/NULL,
      /*start=*/NULL,
      /*flow=*/NULL,
      /*drain=*/tensor_input_drain,
      /*stop=*/NULL,
      /*kill=*/NULL,
      /*priv_size=*/sizeof(TensorInputPriv)};
  return &handler;
}

sox_effect_handler_t* get_tensor_output_handler() {
  static sox_effect_handler_t handler{
      /*name=*/"output_tensor",
      /*usage=*/NULL,
      /*flags=*/SOX_EFF_MCHAN,
      /*getopts=*/NULL,
      /*start=*/NULL,
      /*flow=*/tensor_output_flow,
      /*drain=*/NULL,
      /*stop=*/NULL,
      /*kill=*/NULL,
      /*priv_size=*/sizeof(TensorOutputPriv)};
  return &handler;
}

sox_effect_handler_t* get_file_output_handler() {
  static sox_effect_handler_t handler{
      /*name=*/"output_file",
      /*usage=*/NULL,
      /*flags=*/SOX_EFF_MCHAN,
      /*getopts=*/NULL,
      /*start=*/NULL,
      /*flow=*/file_output_flow,
      /*drain=*/NULL,
      /*stop=*/NULL,
      /*kill=*/NULL,
      /*priv_size=*/sizeof(FileOutputPriv)};
  return &handler;
}

} // namespace

SoxEffect::SoxEffect(sox_effect_t* se) noexcept : se_(se) {}

SoxEffect::~SoxEffect() {
  if (se_ != nullptr) {
    free(se_);
  }
}

SoxEffect::operator sox_effect_t*() const {
  return se_;
}

auto SoxEffect::operator->() noexcept -> sox_effect_t* {
  return se_;
}

SoxEffectsChain::SoxEffectsChain(
    sox_encodinginfo_t input_encoding,
    sox_encodinginfo_t output_encoding)
    : in_enc_(input_encoding),
      out_enc_(output_encoding),
      in_sig_(),
      interm_sig_(),
      out_sig_(),
      sec_(sox_create_effects_chain(&in_enc_, &out_enc_)) {
  if (!sec_) {
    throw std::runtime_error("Failed to create effect chain.");
  }
}

SoxEffectsChain::~SoxEffectsChain() {
  if (sec_ != nullptr) {
    sox_delete_effects_chain(sec_);
  }
}

void SoxEffectsChain::run() {
  sox_flow_effects(sec_, NULL, NULL);
}

void SoxEffectsChain::addInputTensor(
    py::array* waveform,
    int64_t sample_rate,
    bool channels_first) {
  in_sig_ = get_signalinfo(waveform, sample_rate, "wav", channels_first);
  interm_sig_ = in_sig_;
  SoxEffect e(sox_create_effect(get_tensor_input_handler()));
  auto priv = static_cast<TensorInputPriv*>(e->priv);
  priv->index = 0;
  priv->waveform = waveform;
  priv->sample_rate = sample_rate;
  priv->channels_first = channels_first;
  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
    throw std::runtime_error(
        "Internal Error: Failed to add effect: input_tensor");
  }
}

void SoxEffectsChain::addOutputBuffer(
    std::vector<sox_sample_t>* output_buffer) {
  SoxEffect e(sox_create_effect(get_tensor_output_handler()));
  static_cast<TensorOutputPriv*>(e->priv)->buffer = output_buffer;
  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
    throw std::runtime_error(
        "Internal Error: Failed to add effect: output_tensor");
  }
}

void SoxEffectsChain::addInputFile(sox_format_t* sf) {
  in_sig_ = sf->signal;
  interm_sig_ = in_sig_;
  SoxEffect e(sox_create_effect(sox_find_effect("input")));
  char* opts[] = {(char*)sf};
  sox_effect_options(e, 1, opts);
  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
    std::ostringstream stream;
    stream << "Internal Error: Failed to add effect: input " << sf->filename;
    throw std::runtime_error(stream.str());
  }
}

void SoxEffectsChain::addOutputFile(sox_format_t* sf) {
  out_sig_ = sf->signal;
  SoxEffect e(sox_create_effect(get_file_output_handler()));
  static_cast<FileOutputPriv*>(e->priv)->sf = sf;
  if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) {
    std::ostringstream stream;
    stream << "Internal Error: Failed to add effect: output " << sf->filename;
    throw std::runtime_error(stream.str());
  }
}

void SoxEffectsChain::addEffect(const std::vector<std::string> effect) {
  const auto num_args = effect.size();
  if (num_args == 0) {
    throw std::runtime_error("Invalid argument: empty effect.");
  }
  const auto name = effect[0];
  if (UNSUPPORTED_EFFECTS.find(name) != UNSUPPORTED_EFFECTS.end()) {
    std::ostringstream stream;
    stream << "Unsupported effect: " << name;
    throw std::runtime_error(stream.str());
  }

  auto returned_effect = sox_find_effect(name.c_str());
  if (!returned_effect) {
    std::ostringstream stream;
    stream << "Unsupported effect: " << name;
    throw std::runtime_error(stream.str());
  }
  SoxEffect e(sox_create_effect(returned_effect));
  const auto num_options = num_args - 1;

  std::vector<char*> opts;
  for (size_t i = 1; i < num_args; ++i) {
    opts.push_back((char*)effect[i].c_str());
  }
  if (sox_effect_options(e, num_options, num_options ? opts.data() : nullptr) !=
      SOX_SUCCESS) {
    std::ostringstream stream;
    stream << "Invalid effect option:";
    for (const auto& v : effect) {
      stream << " " << v;
    }
    throw std::runtime_error(stream.str());
  }

  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
    std::ostringstream stream;
    stream << "Internal Error: Failed to add effect: \"" << name;
    for (size_t i = 1; i < num_args; ++i) {
      stream << " " << effect[i];
    }
    stream << "\"";
    throw std::runtime_error(stream.str());
  }
}

int64_t SoxEffectsChain::getOutputNumChannels() {
  return interm_sig_.channels;
}

int64_t SoxEffectsChain::getOutputSampleRate() {
  return interm_sig_.rate;
}

namespace {

/// helper classes for passing file-like object to SoxEffectChain
struct FileObjInputPriv {
  sox_format_t* sf;
  py::object* fileobj;
  bool eof_reached;
  char* buffer;
  uint64_t buffer_size;
};

struct FileObjOutputPriv {
  sox_format_t* sf;
  py::object* fileobj;
  char** buffer;
  size_t* buffer_size;
};

/// Callback function to feed byte string
/// https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/sox.h#L1268-L1278
auto fileobj_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp)
    -> int {
  auto priv = static_cast<FileObjInputPriv*>(effp->priv);
  auto sf = priv->sf;
  auto buffer = priv->buffer;

  // 1. Refresh the buffer
  //
  // NOTE:
  //   Since the underlying FILE* was opened with `fmemopen`, the only way
  //   libsox detect EOF is reaching the end of the buffer. (null byte won't
  //   help) Therefore we need to align the content at the end of buffer,
  //   otherwise, libsox will keep reading the content beyond intended length.
  //
  // Before:
  //
  //     |<-------consumed------>|<---remaining--->|
  //     |***********************|-----------------|
  //                             ^ ftell
  //
  // After:
  //
  //     |<-offset->|<---remaining--->|<-new data->|
  //     |**********|-----------------|++++++++++++|
  //                ^ ftell

  // NOTE:
  //   Do not use `sf->tell_off` here. Presumably, `tell_off` and `fseek` are
  //   supposed to be in sync, but there are cases (Vorbis) they are not
  //   in sync and `tell_off` has seemingly uninitialized value, which
  //   leads num_remain to be negative and cause segmentation fault
  //   in `memmove`.
  const auto tell = ftell((FILE*)sf->fp);
  if (tell < 0) {
    throw std::runtime_error("Internal Error: ftell failed.");
  }
  const auto num_consumed = static_cast<size_t>(tell);
  if (num_consumed > priv->buffer_size) {
    throw std::runtime_error("Internal Error: buffer overrun.");
  }

  const auto num_remain = priv->buffer_size - num_consumed;

  // 1.1. Fetch the data to see if there is data to fill the buffer
  size_t num_refill = 0;
  std::string chunk(num_consumed, '\0');
  if (num_consumed && !priv->eof_reached) {
    num_refill = read_fileobj(
        priv->fileobj, num_consumed, const_cast<char*>(chunk.data()));
    if (num_refill < num_consumed) {
      priv->eof_reached = true;
    }
  }
  const auto offset = num_consumed - num_refill;

  // 1.2. Move the unconsumed data towards the beginning of buffer.
  if (num_remain) {
    auto src = static_cast<void*>(buffer + num_consumed);
    auto dst = static_cast<void*>(buffer + offset);
    memmove(dst, src, num_remain);
  }

  // 1.3. Refill the remaining buffer.
  if (num_refill) {
    auto src = static_cast<void*>(const_cast<char*>(chunk.c_str()));
    auto dst = buffer + offset + num_remain;
    memcpy(dst, src, num_refill);
  }

  // 1.4. Set the file pointer to the new offset
  sf->tell_off = offset;
  fseek((FILE*)sf->fp, offset, SEEK_SET);

  // 2. Perform decoding operation
  // The following part is practically same as "input" effect
  // https://github.com/dmkrepo/libsox/blob/b9dd1a86e71bbd62221904e3e59dfaa9e5e72046/src/input.c#L30-L48

  // At this point, osamp represents the buffer size in bytes,
  // but sox_read expects the maximum number of samples ready to read.
  // Normally, this is fine, but in case when the samples are not 4-byte
  // aligned, (e.g. sample is 24bits), the resulting signal is not correct.
  // https://github.com/pytorch/audio/issues/2083
  if (sf->encoding.bits_per_sample > 0)
    *osamp /= (sf->encoding.bits_per_sample / 8);

  // Ensure that it's a multiple of the number of channels
  *osamp -= *osamp % effp->out_signal.channels;

  // Read up to *osamp samples into obuf;
  // store the actual number read back to *osamp
  *osamp = sox_read(sf, obuf, *osamp);

  // Decoding is finished when fileobject is exhausted and sox can no longer
  // decode a sample.
  return (priv->eof_reached && !*osamp) ? SOX_EOF : SOX_SUCCESS;
}

auto fileobj_output_flow(
    sox_effect_t* effp,
    sox_sample_t const* ibuf,
    sox_sample_t* obuf LSX_UNUSED,
    size_t* isamp,
    size_t* osamp) -> int {
  *osamp = 0;
  if (*isamp) {
    auto priv = static_cast<FileObjOutputPriv*>(effp->priv);
    auto sf = priv->sf;
    auto fp = static_cast<FILE*>(sf->fp);
    auto fileobj = priv->fileobj;
    auto buffer = priv->buffer;

    // Encode chunk
    auto num_samples_written = sox_write(sf, ibuf, *isamp);
    fflush(fp);

    // Copy the encoded chunk to python object.
    fileobj->attr("write")(py::bytes(*buffer, ftell(fp)));

    // Reset FILE*
    sf->tell_off = 0;
    fseek(fp, 0, SEEK_SET);

    if (num_samples_written != *isamp) {
      if (sf->sox_errno) {
        std::ostringstream stream;
        stream << sf->sox_errstr << " " << sox_strerror(sf->sox_errno) << " "
               << sf->filename;
        throw std::runtime_error(stream.str());
      }
      return SOX_EOF;
    }
  }
  return SOX_SUCCESS;
}

auto get_fileobj_input_handler() -> sox_effect_handler_t* {
  static sox_effect_handler_t handler{
      /*name=*/"input_fileobj_object",
      /*usage=*/nullptr,
      /*flags=*/SOX_EFF_MCHAN,
      /*getopts=*/nullptr,
      /*start=*/nullptr,
      /*flow=*/nullptr,
      /*drain=*/fileobj_input_drain,
      /*stop=*/nullptr,
      /*kill=*/nullptr,
      /*priv_size=*/sizeof(FileObjInputPriv)};
  return &handler;
}

auto get_fileobj_output_handler() -> sox_effect_handler_t* {
  static sox_effect_handler_t handler{
      /*name=*/"output_fileobj_object",
      /*usage=*/nullptr,
      /*flags=*/SOX_EFF_MCHAN,
      /*getopts=*/nullptr,
      /*start=*/nullptr,
      /*flow=*/fileobj_output_flow,
      /*drain=*/nullptr,
      /*stop=*/nullptr,
      /*kill=*/nullptr,
      /*priv_size=*/sizeof(FileObjOutputPriv)};
  return &handler;
}

} // namespace

void SoxEffectsChainPyBind::addInputFileObj(
    sox_format_t* sf,
    char* buffer,
    uint64_t buffer_size,
    py::object* fileobj) {
  in_sig_ = sf->signal;
  interm_sig_ = in_sig_;

  SoxEffect e(sox_create_effect(get_fileobj_input_handler()));
  auto priv = static_cast<FileObjInputPriv*>(e->priv);
  priv->sf = sf;
  priv->fileobj = fileobj;
  priv->eof_reached = false;
  priv->buffer = buffer;
  priv->buffer_size = buffer_size;
  if (sox_add_effect(sec_, e, &interm_sig_, &in_sig_) != SOX_SUCCESS) {
    throw std::runtime_error(
        "Internal Error: Failed to add effect: input fileobj");
  }
}

void SoxEffectsChainPyBind::addOutputFileObj(
    sox_format_t* sf,
    char** buffer,
    size_t* buffer_size,
    py::object* fileobj) {
  out_sig_ = sf->signal;
  SoxEffect e(sox_create_effect(get_fileobj_output_handler()));
  auto priv = static_cast<FileObjOutputPriv*>(e->priv);
  priv->sf = sf;
  priv->fileobj = fileobj;
  priv->buffer = buffer;
  priv->buffer_size = buffer_size;
  if (sox_add_effect(sec_, e, &interm_sig_, &out_sig_) != SOX_SUCCESS) {
    throw std::runtime_error(
        "Internal Error: Failed to add effect: output fileobj");
  }
}

} // namespace paddleaudio::sox_effects_chain


================================================
FILE: audio/paddleaudio/src/pybind/sox/effects_chain.h
================================================
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/effects_chain.h with modification.

#pragma once

#include <sox.h>
#include "paddleaudio/src/pybind/sox/utils.h"

namespace paddleaudio::sox_effects_chain {

// Helper struct to safely close sox_effect_t* pointer returned by
// sox_create_effect

struct SoxEffect {
  explicit SoxEffect(sox_effect_t* se) noexcept;
  SoxEffect(const SoxEffect& other) = delete;
  SoxEffect(const SoxEffect&& other) = delete;
  auto operator=(const SoxEffect& other) -> SoxEffect& = delete;
  auto operator=(SoxEffect&& other) -> SoxEffect& = delete;
  ~SoxEffect();
  operator sox_effect_t*() const;
  auto operator->() noexcept -> sox_effect_t*;

 private:
  sox_effect_t* se_;
};

// Helper struct to safely close sox_effects_chain_t with handy methods
class SoxEffectsChain {
  const sox_encodinginfo_t in_enc_;
  const sox_encodinginfo_t out_enc_;

 protected:
  sox_signalinfo_t in_sig_;
  sox_signalinfo_t interm_sig_;
  sox_signalinfo_t out_sig_;
  sox_effects_chain_t* sec_;

 public:
  explicit SoxEffectsChain(
      sox_encodinginfo_t input_encoding,
      sox_encodinginfo_t output_encoding);
  SoxEffectsChain(const SoxEffectsChain& other) = delete;
  SoxEffectsChain(const SoxEffectsChain&& other) = delete;
  SoxEffectsChain& operator=(const SoxEffectsChain& other) = delete;
  SoxEffectsChain& operator=(SoxEffectsChain&& other) = delete;
  ~SoxEffectsChain();
  void run();
  void addInputTensor(
      py::array* waveform,
      int64_t sample_rate,
      bool channels_first);
  void addInputFile(sox_format_t* sf);
  void addOutputBuffer(std::vector<sox_sample_t>* output_buffer);
  void addOutputFile(sox_format_t* sf);
  void addEffect(const std::vector<std::string> effect);
  int64_t getOutputNumChannels();
  int64_t getOutputSampleRate();
};

class SoxEffectsChainPyBind : public SoxEffectsChain {
  using SoxEffectsChain::SoxEffectsChain;

 public:
  void addInputFileObj(
      sox_format_t* sf,
      char* buffer,
      uint64_t buffer_size,
      py::object* fileobj);

  void addOutputFileObj(
      sox_format_t* sf,
      char** buffer,
      size_t* buffer_size,
      py::object* fileobj);
};

} // namespace paddleaudio::sox_effects_chain


================================================
FILE: audio/paddleaudio/src/pybind/sox/io.cpp
================================================
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/io.cpp with modification.

#include "paddleaudio/src/pybind/sox/io.h"
#include "paddleaudio/src/pybind/sox/effects.h"
#include "paddleaudio/src/pybind/sox/types.h"
#include "paddleaudio/src/pybind/sox/effects_chain.h"
#include "paddleaudio/src/pybind/sox/utils.h"
#include "paddleaudio/src/optional/optional.hpp"

using namespace paddleaudio::sox_utils;

namespace paddleaudio {
namespace sox_io {

auto get_info_file(const std::string &path, 
                   const tl::optional<std::string> &format)
    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
    SoxFormat sf(
        sox_open_read(path.data(),
                      /*signal=*/nullptr,
                      /*encoding=*/nullptr,
                      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));


    validate_input_file(sf, path);

    return std::make_tuple(
        static_cast<int64_t>(sf->signal.rate),
        static_cast<int64_t>(sf->signal.length / sf->signal.channels),
        static_cast<int64_t>(sf->signal.channels),
        static_cast<int64_t>(sf->encoding.bits_per_sample),
        get_encoding(sf->encoding.encoding));
}

std::vector<std::vector<std::string>> get_effects(
    const tl::optional<int64_t>& frame_offset,
    const tl::optional<int64_t>& num_frames) {
  const auto offset = frame_offset.value_or(0);
  if (offset < 0) {
    throw std::runtime_error(
        "Invalid argument: frame_offset must be non-negative.");
  }
  const auto frames = num_frames.value_or(-1);
  if (frames == 0 || frames < -1) {
    throw std::runtime_error(
        "Invalid argument: num_frames must be -1 or greater than 0.");
  }

  std::vector<std::vector<std::string>> effects;
  if (frames != -1) {
    std::ostringstream os_offset, os_frames;
    os_offset << offset << "s";
    os_frames << "+" << frames << "s";
    effects.emplace_back(
        std::vector<std::string>{"trim", os_offset.str(), os_frames.str()});
  } else if (offset != 0) {
    std::ostringstream os_offset;
    os_offset << offset << "s";
    effects.emplace_back(std::vector<std::string>{"trim", os_offset.str()});
  }
  return effects;
}

auto get_info_fileobj(py::object fileobj, 
                      const tl::optional<std::string> &format)
    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
    const auto capacity = [&]() {
        const auto bufsiz = get_buffer_size();
        const int64_t kDefaultCapacityInBytes = 4096;
        return (bufsiz > kDefaultCapacityInBytes) ? bufsiz
                                                  : kDefaultCapacityInBytes;
    }();
    std::string buffer(capacity, '\0');
    auto *buf = const_cast<char *>(buffer.data());
    auto num_read = read_fileobj(&fileobj, capacity, buf);
    // If the file is shorter than 256, then libsox cannot read the header.
    auto buf_size = (num_read > 256) ? num_read : 256;

    SoxFormat sf(sox_open_mem_read(
        buf,
        buf_size,
        /*signal=*/nullptr,
        /*encoding=*/nullptr,
        /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));

    // In case of streamed data, length can be 0
    validate_input_memfile(sf);

    return std::make_tuple(
        static_cast<int64_t>(sf->signal.rate),
        static_cast<int64_t>(sf->signal.length / sf->signal.channels),
        static_cast<int64_t>(sf->signal.channels),
        static_cast<int64_t>(sf->encoding.bits_per_sample),
        get_encoding(sf->encoding.encoding));
}

tl::optional<std::tuple<py::array, int64_t>> load_audio_fileobj(
    py::object fileobj,
    const tl::optional<int64_t>& frame_offset,
    const tl::optional<int64_t>& num_frames,
    tl::optional<bool> normalize,
    tl::optional<bool> channels_first,
    const tl::optional<std::string>& format) {
  auto effects = get_effects(frame_offset, num_frames);
  return paddleaudio::sox_effects::apply_effects_fileobj(
      std::move(fileobj), effects, normalize, channels_first, std::move(format));
}

tl::optional<std::tuple<py::array, int64_t>> load_audio_file(
    const std::string& path,
    const tl::optional<int64_t>& frame_offset,
    const tl::optional<int64_t>& num_frames,
    tl::optional<bool> normalize,
    tl::optional<bool> channels_first,
    const tl::optional<std::string>& format) {
    auto effects = get_effects(frame_offset, num_frames);
    return paddleaudio::sox_effects::apply_effects_file(
        path, effects, normalize, channels_first, format);
}

void save_audio_file(const std::string& path,
                     py::array tensor,
                     int64_t sample_rate,
                     bool channels_first,
                     tl::optional<double> compression,
                     tl::optional<std::string> format,
                     tl::optional<std::string> encoding,
                     tl::optional<int64_t> bits_per_sample) {
    validate_input_tensor(tensor);

    const auto filetype = [&]() {
        if (format.has_value()) return format.value();
        return get_filetype(path);
    }();

    if (filetype == "amr-nb") {
        const auto num_channels = tensor.shape(channels_first ? 0 : 1);
        //TORCH_CHECK(num_channels == 1,
        //            "amr-nb format only supports single channel audio.");
        assert(num_channels == 1);
    } else if (filetype == "htk") {
        const auto num_channels = tensor.shape(channels_first ? 0 : 1);
       // TORCH_CHECK(num_channels == 1,
        //            "htk format only supports single channel audio.");
        assert(num_channels == 1);
    } else if (filetype == "gsm") {
        const auto num_channels = tensor.shape(channels_first ? 0 : 1);
        assert(num_channels == 1);
        assert(sample_rate == 8000);
        //TORCH_CHECK(num_channels == 1,
        //            "gsm format only supports single channel audio.");
        //TORCH_CHECK(sample_rate == 8000,
        //            "gsm format only supports a sampling rate of 8kHz.");
    }
    const auto signal_info =
        get_signalinfo(&tensor, sample_rate, filetype, channels_first);
    const auto encoding_info = get_encodinginfo_for_save(
        filetype, tensor.dtype(), compression, encoding, bits_per_sample);

    SoxFormat sf(sox_open_write(path.c_str(),
                                &signal_info,
                                &encoding_info,
                                /*filetype=*/filetype.c_str(),
                                /*oob=*/nullptr,
                                /*overwrite_permitted=*/nullptr));

    if (static_cast<sox_format_t*>(sf) == nullptr) {
        throw std::runtime_error(
            "Error saving audio file: failed to open file " + path);
    }

    paddleaudio::sox_effects_chain::SoxEffectsChain chain(
        /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
        /*output_encoding=*/sf->encoding);
    chain.addInputTensor(&tensor, sample_rate, channels_first);
    chain.addOutputFile(sf);
    chain.run();
}

namespace {
// helper class to automatically release buffer, to be used by
// save_audio_fileobj
struct AutoReleaseBuffer {
  char* ptr;
  size_t size;

  AutoReleaseBuffer() : ptr(nullptr), size(0) {}
  AutoReleaseBuffer(const AutoReleaseBuffer& other) = delete;
  AutoReleaseBuffer(AutoReleaseBuffer&& other) = delete;
  auto operator=(const AutoReleaseBuffer& other) -> AutoReleaseBuffer& = delete;
  auto operator=(AutoReleaseBuffer&& other) -> AutoReleaseBuffer& = delete;
  ~AutoReleaseBuffer() {
    if (ptr) {
      free(ptr);
    }
  }
};

} // namespace

void save_audio_fileobj(
    py::object fileobj,
    py::array tensor,
    int64_t sample_rate,
    bool channels_first,
    tl::optional<double> compression,
    tl::optional<std::string> format,
    tl::optional<std::string> encoding,
    tl::optional<int64_t> bits_per_sample) {

  if (!format.has_value()) {
    throw std::runtime_error(
        "`format` is required when saving to file object.");
  }
  const auto filetype = format.value();

  if (filetype == "amr-nb") {
    const auto num_channels = tensor.shape(channels_first ? 0 : 1);
    if (num_channels != 1) {
      throw std::runtime_error(
          "amr-nb format only supports single channel audio.");
    }
  } else if (filetype == "htk") {
    const auto num_channels = tensor.shape(channels_first ? 0 : 1);
    if (num_channels != 1) {
      throw std::runtime_error(
          "htk format only supports single channel audio.");
    }
  } else if (filetype == "gsm") {
    const auto num_channels = tensor.shape(channels_first ? 0 : 1);
    if (num_channels != 1) {
      throw std::runtime_error(
          "gsm format only supports single channel audio.");
    }
    if (sample_rate != 8000) {
      throw std::runtime_error(
          "gsm format only supports a sampling rate of 8kHz.");
    }
  }

  const auto signal_info =
      get_signalinfo(&tensor, sample_rate, filetype, channels_first);
  const auto encoding_info = get_encodinginfo_for_save(
      filetype,
      tensor.dtype(),
      compression,
      std::move(encoding),
      bits_per_sample);

  AutoReleaseBuffer buffer;

  SoxFormat sf(sox_open_memstream_write(
      &buffer.ptr,
      &buffer.size,
      &signal_info,
      &encoding_info,
      filetype.c_str(),
      /*oob=*/nullptr));

  if (static_cast<sox_format_t*>(sf) == nullptr) {
    throw std::runtime_error(
        "Error saving audio file: failed to open memory stream.");
  }

  paddleaudio::sox_effects_chain::SoxEffectsChainPyBind chain(
      /*input_encoding=*/get_tensor_encodinginfo(tensor.dtype()),
      /*output_encoding=*/sf->encoding);
  chain.addInputTensor(&tensor, sample_rate, channels_first);
  chain.addOutputFileObj(sf, &buffer.ptr, &buffer.size, &fileobj);
  chain.run();

  // Closing the sox_format_t is necessary for flushing the last chunk to the
  // buffer
  sf.close();
  fileobj.attr("write")(py::bytes(buffer.ptr, buffer.size));
}

}  // namespace paddleaudio
}  // namespace sox_io


================================================
FILE: audio/paddleaudio/src/pybind/sox/io.h
================================================
// the code is from https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/io.h with modification.
#pragma once

#include "paddleaudio/src/pybind/sox/utils.h"

namespace py = pybind11;

namespace paddleaudio {
namespace sox_io {

auto get_info_file(const std::string &path, 
                   const tl::optional<std::string> &format)
    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;

auto get_info_fileobj(py::object fileobj,
                   const tl::optional<std::string> &format)
    -> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;

tl::optional<std::tuple<py::array, int64_t>> load_audio_fileobj(
    py::object fileobj,
    const tl::optional<int64_t>& frame_offset,
    const tl::optional<int64_t>& num_frames,
    tl::optional<bool> normalize,
    tl::optional<bool> channels_first,
    const tl::optional<std::string>& format);

void save_audio_fileobj(
    py::object fileobj,
    py::array tensor,
    int64_t sample_rate,
    bool channels_first,
    tl::optional<double> compression,
    tl::optional<std::string> format,
    tl::optional<std::string> encoding,
    tl::optional<int64_t> bits_per_sample);

auto get_effects(const tl::optional<int64_t>& frame_offset,
                 const tl::optional<int64_t>& num_frames)
    -> std::vector<std::vector<std::string>>;


tl::optional<std::tuple<py::array, int64_t>> load_audio_file(
    const std::string& path,
    const tl::optional<int64_t>& frame_offset,
    const tl::optional<int64_t>& num_frames,
    tl::optional<bool> normalize,
    tl::optional<bool> channels_first,
    const tl::optional<std::string>& format);

void save_audio_file(const std::string& path,
                     py::array tensor,
                     int64_t sample_rate,
                     bool channels_first,
                     tl::optional<double> compression,
                     tl::optional<std::string> format,
                     tl::optional<std::string> encoding,
                     tl::optional<int64_t> bits_per_sample);    


}  // namespace paddleaudio
}  // namespace sox_io


================================================
FILE: audio/paddleaudio/src/pybind/sox/types.cpp
================================================
//code is from: https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/types.cpp

#include "paddleaudio/src/pybind/sox/types.h"
#include <ostream>
#include <sstream>

namespace paddleaudio {
namespace sox_utils {

Format get_format_from_string(const std::string& format) {
  if (format == "wav")
    return Format::WAV;
  if (format == "mp3")
    return Format::MP3;
  if (format == "flac")
    return Format::FLAC;
  if (format == "ogg" || format == "vorbis")
    return Format::VORBIS;
  if (format == "amr-nb")
    return Format::AMR_NB;
  if (format == "amr-wb")
    return Format::AMR_WB;
  if (format == "amb")
    return Format::AMB;
  if (format == "sph")
    return Format::SPHERE;
  if (format == "htk")
    return Format::HTK;
  if (format == "gsm")
    return Format::GSM;
  std::ostringstream stream;
  stream << "Internal Error: unexpected format value: " << format;
  throw std::runtime_error(stream.str());
}

std::string to_string(Encoding v) {
  switch (v) {
    case Encoding::UNKNOWN:
      return "UNKNOWN";
    case Encoding::PCM_SIGNED:
      return "PCM_S";
    case Encoding::PCM_UNSIGNED:
      return "PCM_U";
    case Encoding::PCM_FLOAT:
      return "PCM_F";
    case Encoding::FLAC:
      return "FLAC";
    case Encoding::ULAW:
      return "ULAW";
    case Encoding::ALAW:
      return "ALAW";
    case Encoding::MP3:
      return "MP3";
    case Encoding::VORBIS:
      return "VORBIS";
    case Encoding::AMR_WB:
      return "AMR_WB";
    case Encoding::AMR_NB:
      return "AMR_NB";
    case Encoding::OPUS:
      return "OPUS";
    default:
      throw std::runtime_error("Internal Error: unexpected encoding.");
  }
}

Encoding get_encoding_from_option(const tl::optional<std::string> encoding) {
  if (!encoding.has_value())
    return Encoding::NOT_PROVIDED;
  std::string v = encoding.value();
  if (v == "PCM_S")
    return Encoding::PCM_SIGNED;
  if (v == "PCM_U")
    return Encoding::PCM_UNSIGNED;
  if (v == "PCM_F")
    return Encoding::PCM_FLOAT;
  if (v == "ULAW")
    return Encoding::ULAW;
  if (v == "ALAW")
    return Encoding::ALAW;
  std::ostringstream stream;
  stream << "Internal Error: unexpected encoding value: " << v;
  throw std::runtime_error(stream.str());
}

BitDepth get_bit_depth_from_option(const tl::optional<int64_t> bit_depth) {
  if (!bit_depth.has_value())
    return BitDepth::NOT_PROVIDED;
  int64_t v = bit_depth.value();
  switch (v) {
    case 8:
      return BitDepth::B8;
    case 16:
      return BitDepth::B16;
    case 24:
      return BitDepth::B24;
    case 32:
      return BitDepth::B32;
    case 64:
      return BitDepth::B64;
    default: {
      std::ostringstream s;
      s << "Internal Error: unexpected bit depth value: " << v;
      throw std::runtime_error(s.str());
    }
  }
}

std::string get_encoding(sox_encoding_t encoding) {
  switch (encoding) {
    case SOX_ENCODING_UNKNOWN:
      return "UNKNOWN";
    case SOX_ENCODING_SIGN2:
      return "PCM_S";
    case SOX_ENCODING_UNSIGNED:
      return "PCM_U";
    case SOX_ENCODING_FLOAT:
      return "PCM_F";
    case SOX_ENCODING_FLAC:
      return "FLAC";
    case SOX_ENCODING_ULAW:
      return "ULAW";
    case SOX_ENCODING_ALAW:
      return "ALAW";
    case SOX_ENCODING_MP3:
      return "MP3";
    case SOX_ENCODING_VORBIS:
      return "VORBIS";
    case SOX_ENCODING_AMR_WB:
      return "AMR_WB";
    case SOX_ENCODING_AMR_NB:
      return "AMR_NB";
    case SOX_ENCODING_OPUS:
      return "OPUS";
    case SOX_ENCODING_GSM:
      return "GSM";
    default:
      return "UNKNOWN";
  }
}

} // namespace sox_utils
} // namespace paddleaudio


================================================
FILE: audio/paddleaudio/src/pybind/sox/types.h
================================================
//code is from: https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/types.h
#pragma once

#include <sox.h>
#include "paddleaudio/src/optional/optional.hpp"

namespace paddleaudio {
namespace sox_utils {

enum class Format {
  WAV,
  MP3,
  FLAC,
  VORBIS,
  AMR_NB,
  AMR_WB,
  AMB,
  SPHERE,
  GSM,
  HTK,
};

Format get_format_from_string(const std::string& format);

enum class Encoding {
  NOT_PROVIDED,
  UNKNOWN,
  PCM_SIGNED,
  PCM_UNSIGNED,
  PCM_FLOAT,
  FLAC,
  ULAW,
  ALAW,
  MP3,
  VORBIS,
  AMR_WB,
  AMR_NB,
  OPUS,
};

std::string to_string(Encoding v);
Encoding get_encoding_from_option(const tl::optional<std::string> encoding);

enum class BitDepth : unsigned {
  NOT_PROVIDED = 0,
  B8 = 8,
  B16 = 16,
  B24 = 24,
  B32 = 32,
  B64 = 64,
};

BitDepth get_bit_depth_from_option(const tl::optional<int64_t> bit_depth);

std::string get_encoding(sox_encoding_t encoding);

} // namespace sox_utils
} // namespace paddleaudio


================================================
FILE: audio/paddleaudio/src/pybind/sox/utils.cpp
================================================
//code is from: https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/utils.cpp with modification.
#include <sox.h>

#include "paddleaudio/src/pybind/sox/utils.h"
#include "paddleaudio/src/pybind/sox/types.h"

#include <sstream>

namespace paddleaudio {
namespace sox_utils {

auto read_fileobj(py::object *fileobj, const uint64_t size, char *buffer)
    -> uint64_t {
    uint64_t num_read = 0;
    while (num_read < size) {
        auto request = size - num_read;
        auto chunk = static_cast<std::string>(
            static_cast<py::bytes>(fileobj->attr("read")(request)));
        auto chunk_len = chunk.length();
        if (chunk_len == 0) {
            break;
        }
        if (chunk_len > request) {
            std::ostringstream message;
            message
                << "Requested up to " << request << " bytes but, "
                << "received " << chunk_len << " bytes. "
                << "The given object does not confirm to read protocol of file "
                   "object.";
            throw std::runtime_error(message.str());
        }
        memcpy(buffer, chunk.data(), chunk_len);
        buffer += chunk_len;
        num_read += chunk_len;
    }
    return num_read;
}


void set_seed(const int64_t seed) {
  sox_get_globals()->ranqd1 = static_cast<sox_int32_t>(seed);
}

void set_verbosity(const int64_t verbosity) {
  sox_get_globals()->verbosity = static_cast<unsigned>(verbosity);
}

void set_use_threads(const bool use_threads) {
  sox_get_globals()->use_threads = static_cast<sox_bool>(use_threads);
}

void set_buffer_size(const int64_t buffer_size) {
  sox_get_globals()->bufsiz = static_cast<size_t>(buffer_size);
}

int64_t get_buffer_size() {
  return sox_get_globals()->bufsiz;
}

std::vector<std::vector<std::string>> list_effects() {
  std::vector<std::vector<std::string>> effects;
  for (const sox_effect_fn_t* fns = sox_get_effect_fns(); *fns; ++fns) {
    const sox_effect_handler_t* handler = (*fns)();
    if (handler && handler->name) {
      if (UNSUPPORTED_EFFECTS.find(handler->name) ==
          UNSUPPORTED_EFFECTS.end()) {
        effects.emplace_back(std::vector<std::string>{
            handler->name,
            handler->usage ? std::string(handler->usage) : std::string("")});
      }
    }
  }
  return effects;
}

std::vector<std::string> list_write_formats() {
  std::vector<std::string> formats;
  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
    const sox_format_handler_t* handler = fns->fn();
    for (const char* const* names = handler->names; *names; ++names) {
      if (!strchr(*names, '/') && handler->write)
        formats.emplace_back(*names);
    }
  }
  return formats;
}

std::vector<std::string> list_read_formats() {
  std::vector<std::string> formats;
  for (const sox_format_tab_t* fns = sox_get_format_fns(); fns->fn; ++fns) {
    const sox_format_handler_t* handler = fns->fn();
    for (const char* const* names = handler->names; *names; ++names) {
      if (!strchr(*names, '/') && handler->read)
        formats.emplace_back(*names);
    }
  }
  return formats;
}

SoxFormat::SoxFormat(sox_format_t* fd) noexcept : fd_(fd) {}
SoxFormat::~SoxFormat() {
  close();
}

sox_format_t* SoxFormat::operator->() const noexcept {
  return fd_;
}
SoxFormat::operator sox_format_t*() const noexcept {
  return fd_;
}

void SoxFormat::close() {
  if (fd_ != nullptr) {
    sox_close(fd_);
    fd_ = nullptr;
  }
}

void validate_input_file(const SoxFormat& sf, const std::string& path) {
  if (static_cast<sox_format_t*>(sf) == nullptr) {
    throw std::runtime_error(
        "Error loading audio file: failed to open file " + path);
  }
  if (sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
    throw std::runtime_error("Error loading audio file: unknown encoding.");
  }
}

void validate_input_memfile(const SoxFormat &sf) {
    return validate_input_file(sf, "<in memory buffer>");
}

void validate_input_tensor(const py::array tensor) {
  if (tensor.ndim() != 2) {
    throw std::runtime_error("Input tensor has to be 2D.");
  }

  char dtype = tensor.dtype().char_();
  bool flag = (dtype == 'f') || (dtype == 'd') || (dtype == 'l') || (dtype == 'i');
  if (flag == false) {
      throw std::runtime_error(
          "Input tensor has to be one of float32, int32, int16 or uint8 type.");
  }
}

py::dtype get_dtype(
    const sox_encoding_t encoding,
    const unsigned precision) {
    switch (encoding) {
      case SOX_ENCODING_UNSIGNED: // 8-bit PCM WAV
        return py::dtype('u1');
      case SOX_ENCODING_SIGN2: // 16-bit, 24-bit, or 32-bit PCM WAV
        switch (precision) {
          case 16:
            return py::dtype("i2");
          case 24: // Cast 24-bit to 32-bit.
          case 32:
            return py::dtype('i');
          default:
            throw std::runtime_error(
                "Only 16, 24, and 32 bits are supported for signed PCM.");
        }
      default:
        // default to float32 for the other formats, including
        // 32-bit floating-point WAV,
        // MP3,
        // FLAC,
        // VORBIS etc...
        return py::dtype("f");
    }
}

py::array convert_to_tensor(
    sox_sample_t* buffer,
    const int32_t num_samples,
    const int32_t num_channels,
    const py::dtype dtype,
    const bool normalize,
    const bool channels_first) {
  // todo refactor later(SGoat)
  py::array t;
  uint64_t dummy = 0;
  SOX_SAMPLE_LOCALS;
  int32_t num_rows = num_samples / num_channels;
  if (normalize || dtype.char_() == 'f') {
    t = py::array(dtype, {num_rows, num_channels});
    auto ptr = (float*)t.mutable_data(0, 0);
    for (int32_t i = 0; i < num_samples; ++i) {
      ptr[i] = SOX_SAMPLE_TO_FLOAT_32BIT(buffer[i], dummy);
    }
    if (channels_first) {
    py::array t2 = py::array(dtype, {num_channels, num_rows});
    for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) {
      for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx)
       *(float*)t2.mutable_data(row_idx, col_idx) = *(float*)t.data(col_idx, row_idx);
    }
    return t2;
  }
  } else if (dtype.char_() == 'i') {
    t = py::array(dtype, {num_rows, num_channels});
    auto ptr = (int*)t.mutable_data(0, 0);
    for (int32_t i = 0; i < num_samples; ++i) {
      ptr[i] = buffer[i];
    }
    if (channels_first) {
      py::array t2 = py::array(dtype, {num_channels, num_rows});
      for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) {
        for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx)
          *(int*)t2.mutable_data(row_idx, col_idx) = *(int*)t.data(col_idx, row_idx);
      }
      return t2;
    }
  } else if (dtype.char_() == 'h') { // int16
    t = py::array(dtype, {num_rows, num_channels});
    auto ptr = (int16_t*)t.mutable_data(0, 0);
    for (int32_t i = 0; i < num_samples; ++i) {
      ptr[i] = SOX_SAMPLE_TO_SIGNED_16BIT(buffer[i], dummy);
    }
    if (channels_first) {
      py::array t2 = py::array(dtype, {num_channels, num_rows});
      for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) {
        for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx)
          *(int16_t*)t2.mutable_data(row_idx, col_idx) = *(int16_t*)t.data(col_idx, row_idx);
      }
      return t2;
    }
  } else if (dtype.char_() == 'b') {
    //t = torch::empty({num_samples / num_channels, num_channels}, torch::kUInt8);
    t = py::array(dtype, {num_rows, num_channels});
    auto ptr = (uint8_t*)t.mutable_data(0,0);
    for (int32_t i = 0; i < num_samples; ++i) {
      ptr[i] = SOX_SAMPLE_TO_UNSIGNED_8BIT(buffer[i], dummy);
    }
    if (channels_first) {
      py::array t2 = py::array(dtype, {num_channels, num_rows});
      for (int32_t row_idx = 0; row_idx < num_channels; ++row_idx) {
        for (int32_t col_idx = 0; col_idx < num_rows; ++col_idx)
        *(uint8_t*)t2.mutable_data(row_idx, col_idx) = *(uint8_t*)t.data(col_idx, row_idx);
      }
      return t2;
    }
  } else {
    throw std::runtime_error("Unsupported dtype.");
  }
  return t;
}

const std::string get_filetype(const std::string path) {
  std::string ext = path.substr(path.find_last_of(".") + 1);
  std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
  return ext;
}

namespace {

std::tuple<sox_encoding_t, unsigned> get_save_encoding_for_wav(
    const std::string format,
    py::dtype dtype,
    const Encoding& encoding,
    const BitDepth& bits_per_sample) {
  switch (encoding) {
    case Encoding::NOT_PROVIDED:
      switch (bits_per_sample) {
        case BitDepth::NOT_PROVIDED:
          switch (dtype.num()) {
            case 11: // float32 numpy dtype num 
              return std::make_tuple<>(SOX_ENCODING_FLOAT, 32);
            case 5: // int numpy dtype num
              return std::make_tuple<>(SOX_ENCODING_SIGN2, 32);
            case 3: // int16 numpy
              return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
            case 1: // byte numpy
              return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
            default:
              throw std::runtime_error("Internal Error: Unexpected dtype.");
          }
        case BitDepth::B8:
          return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
        default:
          return std::make_tuple<>(
              SOX_ENCODING_SIGN2, static_cast<unsigned>(bits_per_sample));
      }
    case Encoding::PCM_SIGNED:
      switch (bits_per_sample) {
        case BitDepth::NOT_PROVIDED:
          return std::make_tuple<>(SOX_ENCODING_SIGN2, 32);
        case BitDepth::B8:
          throw std::runtime_error(
              format + " does not support 8-bit signed PCM encoding.");
        default:
          return std::make_tuple<>(
              SOX_ENCODING_SIGN2, static_cast<unsigned>(bits_per_sample));
      }
    case Encoding::PCM_UNSIGNED:
      switch (bits_per_sample) {
        case BitDepth::NOT_PROVIDED:
        case BitDepth::B8:
          return std::make_tuple<>(SOX_ENCODING_UNSIGNED, 8);
        default:
          throw std::runtime_error(
              format + " only supports 8-bit for unsigned PCM encoding.");
      }
    case Encoding::PCM_FLOAT:
      switch (bits_per_sample) {
        case BitDepth::NOT_PROVIDED:
        case BitDepth::B32:
          return std::make_tuple<>(SOX_ENCODING_FLOAT, 32);
        case BitDepth::B64:
          return std::make_tuple<>(SOX_ENCODING_FLOAT, 64);
        default:
          throw std::runtime_error(
              format +
              " only supports 32-bit or 64-bit for floating-point PCM encoding.");
      }
    case Encoding::ULAW:
      switch (bits_per_sample) {
        case BitDepth::NOT_PROVIDED:
        case BitDepth::B8:
          return std::make_tuple<>(SOX_ENCODING_ULAW, 8);
        default:
          throw std::runtime_error(
              format + " only supports 8-bit for mu-law encoding.");
      }
    case Encoding::ALAW:
      switch (bits_per_sample) {
        case BitDepth::NOT_PROVIDED:
        case BitDepth::B8:
          return std::make_tuple<>(SOX_ENCODING_ALAW, 8);
        default:
          throw std::runtime_error(
              format + " only supports 8-bit for a-law encoding.");
      }
    default:
      throw std::runtime_error(
          format + " does not support encoding: " + to_string(encoding));
  }
}

std::tuple<sox_encoding_t, unsigned> get_save_encoding(
    const std::string& format,
    const py::dtype dtype,
    const tl::optional<std::string> encoding,
    const tl::optional<int64_t> bits_per_sample) {
  const Format fmt = get_format_from_string(format);
  const Encoding enc = get_encoding_from_option(encoding);
  const BitDepth bps = get_bit_depth_from_option(bits_per_sample);

  switch (fmt) {
    case Format::WAV:
    case Format::AMB:
      return get_save_encoding_for_wav(format, dtype, enc, bps);
    case Format::MP3:
      if (enc != Encoding::NOT_PROVIDED)
        throw std::runtime_error("mp3 does not support `encoding` option.");
      if (bps != BitDepth::NOT_PROVIDED)
        throw std::runtime_error(
            "mp3 does not support `bits_per_sample` option.");
      return std::make_tuple<>(SOX_ENCODING_MP3, 16);
    case Format::HTK:
      if (enc != Encoding::NOT_PROVIDED)
        throw std::runtime_error("htk does not support `encoding` option.");
      if (bps != BitDepth::NOT_PROVIDED)
        throw std::runtime_error(
            "htk does not support `bits_per_sample` option.");
      return std::make_tuple<>(SOX_ENCODING_SIGN2, 16);
    case Format::VORBIS:
      if (enc != Encoding::NOT_PROVIDED)
        throw std::runtime_error("vorbis does not support `encoding` option.");
      if (bps != BitDepth::NOT_PROVIDED)
        throw std::runtime_error(
            "vorbis does not support `bits_per_sample` option.");
      return std::make_tuple<>(SOX_ENCODING_VORBIS, 16);
    case Format::AMR_NB:
      if (enc != Encoding::NOT_PROVIDED)
        throw std::runtime_error("amr-nb does not support `encoding` option.");
      if (bps != BitDepth::NOT_PROVIDED)
        throw std::runtime_error(
            "amr-nb does not support `bits_per_sample` option.");
      return std::make_tuple<>(SOX_ENCODING_AMR_NB, 16);
    case Format::FLAC:
      if (enc != Encoding::NOT_PROVIDED)
        throw std::runtime_error("flac does not support `encoding` option.");
      switch (bps) {
        case BitDepth::B32:
        case BitDepth::B64:
          throw std::runtime_error(
              "flac does not support `bits_per_sample` larger than 24.");
        default:
          return std::make_tuple<>(
              SOX_ENCODING_FLAC, static_cast<unsigned>(bps));
      }
    case Format::SPHERE:
      switch (enc) {
        case Encoding::NOT_PROVIDED:
        case Encoding::PCM_SIGNED:
          switch (bps) {
            case BitDepth::NOT_PROVIDED:
              return std::make_tuple<>(SOX_ENCODING_SIGN2, 32);
            default:
              return std::make_tuple<>(
                  SOX_ENCODING_SIGN2, static_cast<unsigned>(bps));
          }
        case Encoding::PCM_UNSIGNED:
          throw std::runtime_error(
              "sph does not support unsigned integer PCM.");
        case Encoding::PCM_FLOAT:
          throw std::runtime_error("sph does not support floating point PCM.");
        case Encoding::ULAW:
          switch (bps) {
            case BitDepth::NOT_PROVIDED:
            case BitDepth::B8:
              return std::make_tuple<>(SOX_ENCODING_ULAW, 8);
            default:
              throw std::runtime_error(
                  "sph only supports 8-bit for mu-law encoding.");
          }
        case Encoding::ALAW:
          switch (bps) {
            case BitDepth::NOT_PROVIDED:
            case BitDepth::B8:
              return std::make_tuple<>(SOX_ENCODING_ALAW, 8);
            default:
              return std::make_tuple<>(
                  SOX_ENCODING_ALAW, static_cast<unsigned>(bps));
          }
        default:
          throw std::runtime_error(
              "sph does not support encoding: " + encoding.value());
      }
    case Format::GSM:
      if (enc != Encoding::NOT_PROVIDED)
        throw std::runtime_error("gsm does not support `encoding` option.");
      if (bps != BitDepth::NOT_PROVIDED)
        throw std::runtime_error(
            "gsm does not support `bits_per_sample` option.");
      return std::make_tuple<>(SOX_ENCODING_GSM, 16);

    default:
      throw std::runtime_error("Unsupported format: " + format);
  }
}

unsigned get_precision(const std::string filetype, py::dtype dtype) {
  if (filetype == "mp3")
    return SOX_UNSPEC;
  if (filetype == "flac")
    return 24;
  if (filetype == "ogg" || filetype == "vorbis")
    return SOX_UNSPEC;
  if (filetype == "wav" || filetype == "amb") {
    switch (dtype.num()) {
      case 1: // byte in numpy dtype num
        return 8;
      case 3: // short, in numpy dtype num
        return 16;
      case 5: // int, numpy dtype 
        return 32;
      case 11: // float, numpy dtype
        return 32;
      default:
        throw std::runtime_error("Unsupported dtype.");
    }
  }
  if (filetype == "sph")
    return 32;
  if (filetype == "amr-nb") {
    return 16;
  }
  if (filetype == "gsm") {
    return 16;
  }
  if (filetype == "htk") {
    return 16;
  }
  throw std::runtime_error("Unsupported file type: " + filetype);
}

} // namespace

sox_signalinfo_t get_signalinfo(
    const py::array* waveform,
    const int64_t sample_rate,
    const std::string filetype,
    const bool channels_first) {
  return sox_signalinfo_t{
      /*rate=*/static_cast<sox_rate_t>(sample_rate),
      /*channels=*/
      static_cast<unsigned>(waveform->shape(channels_first ? 0 : 1)),
      /*precision=*/get_precision(filetype, waveform->dtype()),
      /*length=*/static_cast<uint64_t>(waveform->size())};
}

sox_encodinginfo_t get_tensor_encodinginfo(py::dtype dtype) {
  sox_encoding_t encoding = [&]() {
    switch (dtype.num()) {
      case 1: // byte
        return SOX_ENCODING_UNSIGNED;
      case 3: // short
        return SOX_ENCODING_SIGN2;
      case 5: // int32
        return SOX_ENCODING_SIGN2;
      case 11: // float
        return SOX_ENCODING_FLOAT;
      default:
        throw std::runtime_error("Unsupported dtype.");
    }
  }();
  unsigned bits_per_sample = [&]() {
    switch (dtype.num()) {
      case 1: // byte
        return 8;
      case 3: //short
        return 16;
      case 5: // int32
        return 32;
      case 11: // float
        return 32;
      default:
        throw std::runtime_error("Unsupported dtype.");
    }
  }();
  return sox_encodinginfo_t{
      /*encoding=*/encoding,
      /*bits_per_sample=*/bits_per_sample,
      /*compression=*/HUGE_VAL,
      /*reverse_bytes=*/sox_option_default,
      /*reverse_nibbles=*/sox_option_default,
      /*reverse_bits=*/sox_option_default,
      /*opposite_endian=*/sox_false};
}

sox_encodinginfo_t get_encodinginfo_for_save(
    const std::string& format,
    const py::dtype dtype,
    const tl::optional<double> compression,
    const tl::optional<std::string> encoding,
    const tl::optional<int64_t> bits_per_sample) {
  auto enc = get_save_encoding(format, dtype, encoding, bits_per_sample);
  return sox_encodinginfo_t{
      /*encoding=*/std::get<0>(enc),
      /*bits_per_sample=*/std::get<1>(enc),
      /*compression=*/compression.value_or(HUGE_VAL),
      /*reverse_bytes=*/sox_option_default,
      /*reverse_nibbles=*/sox_option_default,
      /*reverse_bits=*/sox_option_default,
      /*opposite_endian=*/sox_false};
}

}  // namespace paddleaudio
}  // namespace sox_utils


================================================
FILE: audio/paddleaudio/src/pybind/sox/utils.h
================================================
//code is from: https://github.com/pytorch/audio/blob/main/torchaudio/csrc/sox/utils.h with modification.
#pragma once

#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <sox.h>
#include "paddleaudio/src/optional/optional.hpp"

namespace py = pybind11;

namespace paddleaudio {
namespace sox_utils {

auto read_fileobj(py::object *fileobj, uint64_t size, char *buffer) -> uint64_t;

void set_seed(const int64_t seed);

void set_verbosity(const int64_t verbosity);

void set_use_threads(const bool use_threads);

void set_buffer_size(const int64_t buffer_size);

int64_t get_buffer_size();

std::vector<std::vector<std::string>> list_effects();

std::vector<std::string> list_read_formats();

std::vector<std::string> list_write_formats();

////////////////////////////////////////////////////////////////////////////////
// Utilities for sox_io / sox_effects implementations
////////////////////////////////////////////////////////////////////////////////

const std::unordered_set<std::string> UNSUPPORTED_EFFECTS =
    {"input", "output", "spectrogram", "noiseprof", "noisered", "splice"};

/// helper class to automatically close sox_format_t*
struct SoxFormat {
  explicit SoxFormat(sox_format_t* fd) noexcept;
  SoxFormat(const SoxFormat& other) = delete;
  SoxFormat(SoxFormat&& other) = delete;
  SoxFormat& operator=(const SoxFormat& other) = delete;
  SoxFormat& operator=(SoxFormat&& other) = delete;
  ~SoxFormat();
  sox_format_t* operator->() const noexcept;
  operator sox_format_t*() const noexcept;

  void close();

 private:
  sox_format_t* fd_;
};

///
/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32
void validate_input_tensor(const py::array);

void validate_input_file(const SoxFormat& sf, const std::string& path);

void validate_input_memfile(const SoxFormat &sf);
///
/// Get target dtype for the given encoding and precision.
py::dtype get_dtype(
    const sox_encoding_t encoding,
    const unsigned precision);

///
/// Convert sox_sample_t buffer to uint8/int16/int32/float32 Tensor
/// NOTE: This function might modify the values in the input buffer to
/// reduce the number of memory copy.
/// @param buffer Pointer to buffer that contains audio data.
/// @param num_samples The number of samples to read.
/// @param num_channels The number of channels. Used to reshape the resulting
/// Tensor.
/// @param dtype Target dtype. Determines the output dtype and value range in
/// conjunction with normalization.
/// @param normalize Perform normalization. Only effective when dtype is not
/// kFloat32. When effective, the output tensor is kFloat32 type and value range
/// is [-1.0, 1.0]
/// @param channels_first When True, output Tensor has shape of [num_channels,
/// num_frames].
py::array convert_to_tensor(
    sox_sample_t* buffer,
    const int32_t num_samples,
    const int32_t num_channels,
    const py::dtype dtype,
    const bool normalize,
    const bool channels_first);

/// Extract extension from file path
const std::string get_filetype(const std::string path);

/// Get sox_signalinfo_t for passing a py::array object.
sox_signalinfo_t get_signalinfo(
    const py::array* waveform,
    const int64_t sample_rate,
    const std::string filetype,
    const bool channels_first);

/// Get sox_encodinginfo_t for Tensor I/O
sox_encodinginfo_t get_tensor_encodinginfo(const py::dtype dtype);

/// Get sox_encodinginfo_t for saving to file/file object
sox_encodinginfo_t get_encodinginfo_for_save(
    const std::string& format,
    const py::dtype dtype,
    const tl::optional<double> compression,
    const tl::optional<std::string> encoding,
    const tl::optional<int64_t> bits_per_sample);

}  // namespace paddleaudio
}  // namespace sox_utils


================================================
FILE: audio/paddleaudio/src/utils.cpp
================================================
// this is from: https://github.com/pytorch/audio/blob/main/torchaudio/csrc/utils.cpp with modification.

namespace paddleaudio {

namespace {

bool is_sox_available() {
#ifdef INCLUDE_SOX
    return true;
#else
    return false;
#endif
}

bool is_kaldi_available() {
#ifdef INCLUDE_KALDI
    return true;
#else
    return false;
#endif
}

// It tells whether paddleaudio was compiled with ffmpeg
// not the runtime availability.
bool is_ffmpeg_available() {
#ifdef USE_FFMPEG
    return true;
#else
    return false;
#endif
}

}  // namespace

}  // namespace paddleaudio


================================================
FILE: audio/paddleaudio/third_party/.gitignore
================================================
archives/
install/


================================================
FILE: audio/paddleaudio/third_party/CMakeLists.txt
================================================
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")

################################################################################
# sox
################################################################################
if (BUILD_SOX)
  add_subdirectory(sox)
endif()

################################################################################
# kaldi
################################################################################
if (BUILD_KALDI)
  include_directories(${CMAKE_CURRENT_SOURCE_DIR})
  add_subdirectory(kaldi-native-fbank/csrc)
endif()


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/CMakeLists.txt
================================================
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../)
add_library(kaldi-native-fbank-core
  feature-fbank.cc
  feature-functions.cc
  feature-window.cc
  fftsg.c
  log.cc
  mel-computations.cc
  rfft.cc
)
# We are using std::call_once() in log.h,which requires us to link with -pthread
if(NOT WIN32)
  target_link_libraries(kaldi-native-fbank-core -pthread)
endif()

if(KNF_HAVE_EXECINFO_H)
  target_compile_definitions(kaldi-native-fbank-core PRIVATE KNF_HAVE_EXECINFO_H=1)
endif()

if(KNF_HAVE_CXXABI_H)
  target_compile_definitions(kaldi-native-fbank-core PRIVATE KNF_HAVE_CXXABI_H=1)
endif()


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/feature-fbank.cc
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file is copied/modified from kaldi/src/feat/feature-fbank.cc
//
#include "kaldi-native-fbank/csrc/feature-fbank.h"

#include <cmath>

#include "kaldi-native-fbank/csrc/feature-functions.h"

namespace knf {

static void Sqrt(float *in_out, int32_t n) {
  for (int32_t i = 0; i != n; ++i) {
    in_out[i] = std::sqrt(in_out[i]);
  }
}

std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) {
  os << opts.ToString();
  return os;
}

FbankComputer::FbankComputer(const FbankOptions &opts)
    : opts_(opts), rfft_(opts.frame_opts.PaddedWindowSize()) {
  if (opts.energy_floor > 0.0f) {
    log_energy_floor_ = logf(opts.energy_floor);
  }

  // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
  // [note: this call caches it.]
  GetMelBanks(1.0f);
}

FbankComputer::~FbankComputer() {
  for (auto iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter)
    delete iter->second;
}

const MelBanks *FbankComputer::GetMelBanks(float vtln_warp) {
  MelBanks *this_mel_banks = nullptr;

  // std::map<float, MelBanks *>::iterator iter = mel_banks_.find(vtln_warp);
  auto iter = mel_banks_.find(vtln_warp);
  if (iter == mel_banks_.end()) {
    this_mel_banks = new MelBanks(opts_.mel_opts, opts_.frame_opts, vtln_warp);
    mel_banks_[vtln_warp] = this_mel_banks;
  } else {
    this_mel_banks = iter->second;
  }
  return this_mel_banks;
}

void FbankComputer::Compute(float signal_raw_log_energy, float vtln_warp,
                            std::vector<float> *signal_frame, float *feature) {
  const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));

  KNF_CHECK_EQ(signal_frame->size(), opts_.frame_opts.PaddedWindowSize());

  // Compute energy after window function (not the raw one).
  if (opts_.use_energy && !opts_.raw_energy) {
    signal_raw_log_energy = std::log(
        std::max<float>(InnerProduct(signal_frame->data(), signal_frame->data(),
                                     signal_frame->size()),
                        std::numeric_limits<float>::epsilon()));
  }
  rfft_.Compute(signal_frame->data());  // signal_frame is modified in-place
  ComputePowerSpectrum(signal_frame);

  // Use magnitude instead of power if requested.
  if (!opts_.use_power) {
    Sqrt(signal_frame->data(), signal_frame->size() / 2 + 1);
  }

  int32_t mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);

  // Its length is opts_.mel_opts.num_bins
  float *mel_energies = feature + mel_offset;

  // Sum with mel filter banks over the power spectrum
  mel_banks.Compute(signal_frame->data(), mel_energies);

  if (opts_.use_log_fbank) {
    // Avoid log of zero (which should be prevented anyway by dithering).
    for (int32_t i = 0; i != opts_.mel_opts.num_bins; ++i) {
      auto t = std::max(mel_energies[i], std::numeric_limits<float>::epsilon());
      mel_energies[i] = std::log(t);
    }
  }

  // Copy energy as first value (or the last, if htk_compat == true).
  if (opts_.use_energy) {
    if (opts_.energy_floor > 0.0 && signal_raw_log_energy < log_energy_floor_) {
      signal_raw_log_energy = log_energy_floor_;
    }
    int32_t energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
    feature[energy_index] = signal_raw_log_energy;
  }
}

}  // namespace knf


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/feature-fbank.h
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file is copied/modified from kaldi/src/feat/feature-fbank.h

#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_

#include <map>

#include "kaldi-native-fbank/csrc/feature-window.h"
#include "kaldi-native-fbank/csrc/mel-computations.h"
#include "kaldi-native-fbank/csrc/rfft.h"

namespace knf {

struct FbankOptions {
  FrameExtractionOptions frame_opts;
  MelBanksOptions mel_opts;
  // append an extra dimension with energy to the filter banks
  bool use_energy = false;
  float energy_floor = 0.0f;  // active iff use_energy==true

  // If true, compute log_energy before preemphasis and windowing
  // If false, compute log_energy after preemphasis ans windowing
  bool raw_energy = true;  // active iff use_energy==true

  // If true, put energy last (if using energy)
  // If false, put energy first
  bool htk_compat = false;  // active iff use_energy==true

  // if true (default), produce log-filterbank, else linear
  bool use_log_fbank = true;

  // if true (default), use power in filterbank
  // analysis, else magnitude.
  bool use_power = true;

  FbankOptions() { mel_opts.num_bins = 23; }

  std::string ToString() const {
    std::ostringstream os;
    os << "frame_opts: \n";
    os << frame_opts << "\n";
    os << "\n";

    os << "mel_opts: \n";
    os << mel_opts << "\n";

    os << "use_energy: " << use_energy << "\n";
    os << "energy_floor: " << energy_floor << "\n";
    os << "raw_energy: " << raw_energy << "\n";
    os << "htk_compat: " << htk_compat << "\n";
    os << "use_log_fbank: " << use_log_fbank << "\n";
    os << "use_power: " << use_power << "\n";
    return os.str();
  }
};

std::ostream &operator<<(std::ostream &os, const FbankOptions &opts);

class FbankComputer {
 public:
  using Options = FbankOptions;

  explicit FbankComputer(const FbankOptions &opts);
  ~FbankComputer();

  int32_t Dim() const {
    return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
  }

  // if true, compute log_energy_pre_window but after dithering and dc removal
  bool NeedRawLogEnergy() const { return opts_.use_energy && opts_.raw_energy; }

  const FrameExtractionOptions &GetFrameOptions() const {
    return opts_.frame_opts;
  }

  const FbankOptions &GetOptions() const { return opts_; }

  /**
     Function that computes one frame of features from
     one frame of signal.

     @param [in] signal_raw_log_energy The log-energy of the frame of the signal
         prior to windowing and pre-emphasis, or
         log(numeric_limits<float>::min()), whichever is greater.  Must be
         ignored by this function if this class returns false from
         this->NeedsRawLogEnergy().
     @param [in] vtln_warp  The VTLN warping factor that the user wants
         to be applied when computing features for this utterance.  Will
         normally be 1.0, meaning no warping is to be done.  The value will
         be ignored for feature types that don't support VLTN, such as
         spectrogram features.
     @param [in] signal_frame  One frame of the signal,
       as extracted using the function ExtractWindow() using the options
       returned by this->GetFrameOptions().  The function will use the
       vector as a workspace, which is why it's a non-const pointer.
     @param [out] feature  Pointer to a vector of size this->Dim(), to which
         the computed feature will be written. It should be pre-allocated.
  */
  void Compute(float signal_raw_log_energy, float vtln_warp,
               std::vector<float> *signal_frame, float *feature);

 private:
  const MelBanks *GetMelBanks(float vtln_warp);

  FbankOptions opts_;
  float log_energy_floor_;
  std::map<float, MelBanks *> mel_banks_;  // float is VTLN coefficient.
  Rfft rfft_;
};

}  // namespace knf

#endif  // KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/feature-functions.cc
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file is copied/modified from kaldi/src/feat/feature-functions.cc

#include "kaldi-native-fbank/csrc/feature-functions.h"

#include <cstdint>
#include <vector>

namespace knf {

void ComputePowerSpectrum(std::vector<float> *complex_fft) {
  int32_t dim = complex_fft->size();

  // now we have in complex_fft, first half of complex spectrum
  // it's stored as [real0, realN/2, real1, im1, real2, im2, ...]

  float *p = complex_fft->data();
  int32_t half_dim = dim / 2;
  float first_energy = p[0] * p[0];
  float last_energy = p[1] * p[1];  // handle this special case

  for (int32_t i = 1; i < half_dim; ++i) {
    float real = p[i * 2];
    float im = p[i * 2 + 1];
    p[i] = real * real + im * im;
  }
  p[0] = first_energy;
  p[half_dim] = last_energy;  // Will actually never be used, and anyway
  // if the signal has been bandlimited sensibly this should be zero.
}

}  // namespace knf


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/feature-functions.h
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file is copied/modified from kaldi/src/feat/feature-functions.h
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H

#include <vector>
namespace knf {

// ComputePowerSpectrum converts a complex FFT (as produced by the FFT
// functions in csrc/rfft.h), and converts it into
// a power spectrum.  If the complex FFT is a vector of size n (representing
// half of the complex FFT of a real signal of size n, as described there),
// this function computes in the first (n/2) + 1 elements of it, the
// energies of the fft bins from zero to the Nyquist frequency.  Contents of the
// remaining (n/2) - 1 elements are undefined at output.

void ComputePowerSpectrum(std::vector<float> *complex_fft);

}  // namespace knf

#endif  // KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/feature-window.cc
================================================
// kaldi-native-fbank/csrc/feature-window.cc
//
// Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)

// This file is copied/modified from kaldi/src/feat/feature-window.cc

#include "kaldi-native-fbank/csrc/feature-window.h"

#include <cmath>
#include <vector>

#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif

namespace knf {

std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts) {
  os << opts.ToString();
  return os;
}

FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts)
    : window_(opts.WindowSize()) {
  int32_t frame_length = opts.WindowSize();
  KNF_CHECK_GT(frame_length, 0);

  float *window_data = window_.data();

  double a = M_2PI / (frame_length - 1);
  for (int32_t i = 0; i < frame_length; i++) {
    double i_fl = static_cast<double>(i);
    if (opts.window_type == "hanning") {
      window_data[i] = 0.5 - 0.5 * cos(a * i_fl);
    } else if (opts.window_type == "sine") {
      // when you are checking ws wikipedia, please
      // note that 0.5 * a = M_PI/(frame_length-1)
      window_data[i] = sin(0.5 * a * i_fl);
    } else if (opts.window_type == "hamming") {
      window_data[i] = 0.54 - 0.46 * cos(a * i_fl);
    } else if (opts.window_type ==
               "povey") {  // like hamming but goes to zero at edges.
      window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85);
    } else if (opts.window_type == "rectangular") {
      window_data[i] = 1.0;
    } else if (opts.window_type == "blackman") {
      window_data[i] = opts.blackman_coeff - 0.5 * cos(a * i_fl) +
                       (0.5 - opts.blackman_coeff) * cos(2 * a * i_fl);
    } else {
      KNF_LOG(FATAL) << "Invalid window type " << opts.window_type;
    }
  }
}

void FeatureWindowFunction::Apply(float *wave) const {
  int32_t window_size = window_.size();
  const float *p = window_.data();
  for (int32_t k = 0; k != window_size; ++k) {
    wave[k] *= p[k];
  }
}

int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts) {
  int64_t frame_shift = opts.WindowShift();
  if (opts.snip_edges) {
    return frame * frame_shift;
  } else {
    int64_t midpoint_of_frame = frame_shift * frame + frame_shift / 2,
            beginning_of_frame = midpoint_of_frame - opts.WindowSize() / 2;
    return beginning_of_frame;
  }
}

int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
                  bool flush /*= true*/) {
  int64_t frame_shift = opts.WindowShift();
  int64_t frame_length = opts.WindowSize();
  if (opts.snip_edges) {
    // with --snip-edges=true (the default), we use a HTK-like approach to
    // determining the number of frames-- all frames have to fit completely into
    // the waveform, and the first frame begins at sample zero.
    if (num_samples < frame_length)
      return 0;
    else
      return (1 + ((num_samples - frame_length) / frame_shift));
    // You can understand the expression above as follows: 'num_samples -
    // frame_length' is how much room we have to shift the frame within the
    // waveform; 'frame_shift' is how much we shift it each time; and the ratio
    // is how many times we can shift it (integer arithmetic rounds down).
  } else {
    // if --snip-edges=false, the number of frames is determined by rounding the
    // (file-length / frame-shift) to the nearest integer.  The point of this
    // formula is to make the number of frames an obvious and predictable
    // function of the frame shift and signal length, which makes many
    // segmentation-related questions simpler.
    //
    // Because integer division in C++ rounds toward zero, we add (half the
    // frame-shift minus epsilon) before dividing, to have the effect of
    // rounding towards the closest integer.
    int32_t num_frames = (num_samples + (frame_shift / 2)) / frame_shift;

    if (flush) return num_frames;

    // note: 'end' always means the last plus one, i.e. one past the last.
    int64_t end_sample_of_last_frame =
        FirstSampleOfFrame(num_frames - 1, opts) + frame_length;

    // the following code is optimized more for clarity than efficiency.
    // If flush == false, we can't output frames that extend past the end
    // of the signal.
    while (num_frames > 0 && end_sample_of_last_frame > num_samples) {
      num_frames--;
      end_sample_of_last_frame -= frame_shift;
    }
    return num_frames;
  }
}

void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
                   int32_t f, const FrameExtractionOptions &opts,
                   const FeatureWindowFunction &window_function,
                   std::vector<float> *window,
                   float *log_energy_pre_window /*= nullptr*/) {
  KNF_CHECK(sample_offset >= 0 && wave.size() != 0);

  int32_t frame_length = opts.WindowSize();
  int32_t frame_length_padded = opts.PaddedWindowSize();

  int64_t num_samples = sample_offset + wave.size();
  int64_t start_sample = FirstSampleOfFrame(f, opts);
  int64_t end_sample = start_sample + frame_length;

  if (opts.snip_edges) {
    KNF_CHECK(start_sample >= sample_offset && end_sample <= num_samples);
  } else {
    KNF_CHECK(sample_offset == 0 || start_sample >= sample_offset);
  }

  if (window->size() != frame_length_padded) {
    window->resize(frame_length_padded);
  }

  // wave_start and wave_end are start and end indexes into 'wave', for the
  // piece of wave that we're trying to extract.
  int32_t wave_start = int32_t(start_sample - sample_offset);
  int32_t wave_end = wave_start + frame_length;

  if (wave_start >= 0 && wave_end <= wave.size()) {
    // the normal case-- no edge effects to consider.
    std::copy(wave.begin() + wave_start,
              wave.begin() + wave_start + frame_length, window->data());
  } else {
    // Deal with any end effects by reflection, if needed.  This code will only
    // be reached for about two frames per utterance, so we don't concern
    // ourselves excessively with efficiency.
    int32_t wave_dim = wave.size();
    for (int32_t s = 0; s < frame_length; ++s) {
      int32_t s_in_wave = s + wave_start;
      while (s_in_wave < 0 || s_in_wave >= wave_dim) {
        // reflect around the beginning or end of the wave.
        // e.g. -1 -> 0, -2 -> 1.
        // dim -> dim - 1, dim + 1 -> dim - 2.
        // the code supports repeated reflections, although this
        // would only be needed in pathological cases.
        if (s_in_wave < 0)
          s_in_wave = -s_in_wave - 1;
        else
          s_in_wave = 2 * wave_dim - 1 - s_in_wave;
      }
      (*window)[s] = wave[s_in_wave];
    }
  }

  ProcessWindow(opts, window_function, window->data(), log_energy_pre_window);
}

static void RemoveDcOffset(float *d, int32_t n) {
  float sum = 0;
  for (int32_t i = 0; i != n; ++i) {
    sum += d[i];
  }

  float mean = sum / n;

  for (int32_t i = 0; i != n; ++i) {
    d[i] -= mean;
  }
}

float InnerProduct(const float *a, const float *b, int32_t n) {
  float sum = 0;
  for (int32_t i = 0; i != n; ++i) {
    sum += a[i] * b[i];
  }
  return sum;
}

static void Preemphasize(float *d, int32_t n, float preemph_coeff) {
  if (preemph_coeff == 0.0) {
    return;
  }

  KNF_CHECK(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);

  for (int32_t i = n - 1; i > 0; --i) {
    d[i] -= preemph_coeff * d[i - 1];
  }
  d[0] -= preemph_coeff * d[0];
}

void ProcessWindow(const FrameExtractionOptions &opts,
                   const FeatureWindowFunction &window_function, float *window,
                   float *log_energy_pre_window /*= nullptr*/) {
  int32_t frame_length = opts.WindowSize();

  // TODO(fangjun): Remove dither
  KNF_CHECK_EQ(opts.dither, 0);

  if (opts.remove_dc_offset) {
    RemoveDcOffset(window, frame_length);
  }

  if (log_energy_pre_window != NULL) {
    float energy = std::max<float>(InnerProduct(window, window, frame_length),
                                   std::numeric_limits<float>::epsilon());
    *log_energy_pre_window = std::log(energy);
  }

  if (opts.preemph_coeff != 0.0) {
    Preemphasize(window, frame_length, opts.preemph_coeff);
  }

  window_function.Apply(window);
}

}  // namespace knf


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/feature-window.h
================================================
// kaldi-native-fbank/csrc/feature-window.h
//
// Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)

// This file is copied/modified from kaldi/src/feat/feature-window.h

#ifndef KALDI_NATIVE_FEAT_CSRC_FEATURE_WINDOW_H_
#define KALDI_NATIVE_FEAT_CSRC_FEATURE_WINDOW_H_

#include <sstream>
#include <string>
#include <vector>

#include "kaldi-native-fbank/csrc/log.h"

namespace knf {

inline int32_t RoundUpToNearestPowerOfTwo(int32_t n) {
  // copied from kaldi/src/base/kaldi-math.cc
  KNF_CHECK_GT(n, 0);
  n--;
  n |= n >> 1;
  n |= n >> 2;
  n |= n >> 4;
  n |= n >> 8;
  n |= n >> 16;
  return n + 1;
}

struct FrameExtractionOptions {
  float samp_freq = 16000;
  float frame_shift_ms = 10.0f;   // in milliseconds.
  float frame_length_ms = 25.0f;  // in milliseconds.
  float dither = 1.0f;            // Amount of dithering, 0.0 means no dither.
  float preemph_coeff = 0.97f;    // Preemphasis coefficient.
  bool remove_dc_offset = true;   // Subtract mean of wave before FFT.
  std::string window_type = "povey";  // e.g. Hamming window
  // May be "hamming", "rectangular", "povey", "hanning", "sine", "blackman"
  // "povey" is a window I made to be similar to Hamming but to go to zero at
  // the edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85) I just don't think the
  // Hamming window makes sense as a windowing function.
  bool round_to_power_of_two = true;
  float blackman_coeff = 0.42f;
  bool snip_edges = true;
  // bool allow_downsample = false;
  // bool allow_upsample = false;

  // Used for streaming feature extraction. It indicates the number
  // of feature frames to keep in the recycling vector. -1 means to
  // keep all feature frames.
  int32_t max_feature_vectors = -1;

  int32_t WindowShift() const {
    return static_cast<int32_t>(samp_freq * 0.001f * frame_shift_ms);
  }
  int32_t WindowSize() const {
    return static_cast<int32_t>(samp_freq * 0.001f * frame_length_ms);
  }
  int32_t PaddedWindowSize() const {
    return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize())
                                  : WindowSize());
  }
  std::string ToString() const {
    std::ostringstream os;
#define KNF_PRINT(x) os << #x << ": " << x << "\n"
    KNF_PRINT(samp_freq);
    KNF_PRINT(frame_shift_ms);
    KNF_PRINT(frame_length_ms);
    KNF_PRINT(dither);
    KNF_PRINT(preemph_coeff);
    KNF_PRINT(remove_dc_offset);
    KNF_PRINT(window_type);
    KNF_PRINT(round_to_power_of_two);
    KNF_PRINT(blackman_coeff);
    KNF_PRINT(snip_edges);
    // KNF_PRINT(allow_downsample);
    // KNF_PRINT(allow_upsample);
    KNF_PRINT(max_feature_vectors);
#undef KNF_PRINT
    return os.str();
  }
};

std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts);

class FeatureWindowFunction {
 public:
  FeatureWindowFunction() = default;
  explicit FeatureWindowFunction(const FrameExtractionOptions &opts);
  /**
   * @param wave Pointer to a 1-D array of shape [window_size].
   *             It is modified in-place: wave[i] = wave[i] * window_[i].
   * @param
   */
  void Apply(float *wave) const;

 private:
  std::vector<float> window_;  // of size opts.WindowSize()
};

int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts);

/**
   This function returns the number of frames that we can extract from a wave
   file with the given number of samples in it (assumed to have the same
   sampling rate as specified in 'opts').

      @param [in] num_samples  The number of samples in the wave file.
      @param [in] opts     The frame-extraction options class

      @param [in] flush   True if we are asserting that this number of samples
   is 'all there is', false if we expecting more data to possibly come in.  This
   only makes a difference to the answer
   if opts.snips_edges== false.  For offline feature extraction you always want
   flush == true.  In an online-decoding context, once you know (or decide) that
   no more data is coming in, you'd call it with flush == true at the end to
   flush out any remaining data.
*/
int32_t NumFrames(int64_t num_samples, const FrameExtractionOptions &opts,
                  bool flush = true);

/*
  ExtractWindow() extracts a windowed frame of waveform (possibly with a
  power-of-two, padded size, depending on the config), including all the
  processing done by ProcessWindow().

  @param [in] sample_offset  If 'wave' is not the entire waveform, but
                   part of it to the left has been discarded, then the
                   number of samples prior to 'wave' that we have
                   already discarded.  Set this to zero if you are
                   processing the entire waveform in one piece, or
                   if you get 'no matching function' compilation
                   errors when updating the code.
  @param [in] wave  The waveform
  @param [in] f     The frame index to be extracted, with
                    0 <= f < NumFrames(sample_offset + wave.Dim(), opts, true)
  @param [in] opts  The options class to be used
  @param [in] window_function  The windowing function, as derived from the
                    options class.
  @param [out] window  The windowed, possibly-padded waveform to be
                     extracted.  Will be resized as needed.
  @param [out] log_energy_pre_window  If non-NULL, the log-energy of
                   the signal prior to pre-emphasis and multiplying by
                   the windowing function will be written to here.
*/
void ExtractWindow(int64_t sample_offset, const std::vector<float> &wave,
                   int32_t f, const FrameExtractionOptions &opts,
                   const FeatureWindowFunction &window_function,
                   std::vector<float> *window,
                   float *log_energy_pre_window = nullptr);

/**
  This function does all the windowing steps after actually
  extracting the windowed signal: depending on the
  configuration, it does dithering, dc offset removal,
  preemphasis, and multiplication by the windowing function.
   @param [in] opts  The options class to be used
   @param [in] window_function  The windowing function-- should have
                    been initialized using 'opts'.
   @param [in,out] window  A vector of size opts.WindowSize().  Note:
      it will typically be a sub-vector of a larger vector of size
      opts.PaddedWindowSize(), with the remaining samples zero,
      as the FFT code is more efficient if it operates on data with
      power-of-two size.
   @param [out]   log_energy_pre_window If non-NULL, then after dithering and
      DC offset removal, this function will write to this pointer the log of
      the total energy (i.e. sum-squared) of the frame.
 */
void ProcessWindow(const FrameExtractionOptions &opts,
                   const FeatureWindowFunction &window_function, float *window,
                   float *log_energy_pre_window = nullptr);

// Compute the inner product of two vectors
float InnerProduct(const float *a, const float *b, int32_t n);

}  // namespace knf

#endif  // KALDI_NATIVE_FEAT_CSRC_FEATURE_WINDOW_H_


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/fftsg.c
================================================
/* This file is copied from
 * https://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
 */
/*
Fast Fourier/Cosine/Sine Transform
    dimension   :one
    data length :power of 2
    decimation  :frequency
    radix       :split-radix
    data        :inplace
    table       :use
functions
    cdft: Complex Discrete Fourier Transform
    rdft: Real Discrete Fourier Transform
    ddct: Discrete Cosine Transform
    ddst: Discrete Sine Transform
    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
function prototypes
    void cdft(int, int, double *, int *, double *);
    void rdft(int, int, double *, int *, double *);
    void ddct(int, int, double *, int *, double *);
    void ddst(int, int, double *, int *, double *);
    void dfct(int, double *, double *, int *, double *);
    void dfst(int, double *, double *, int *, double *);
macro definitions
    USE_CDFT_PTHREADS : default=not defined
        CDFT_THREADS_BEGIN_N  : must be >= 512, default=8192
        CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
    USE_CDFT_WINTHREADS : default=not defined
        CDFT_THREADS_BEGIN_N  : must be >= 512, default=32768
        CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288


-------- Complex DFT (Discrete Fourier Transform) --------
    [definition]
        <case1>
            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
        <case2>
            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
    [usage]
        <case1>
            ip[0] = 0; // first time only
            cdft(2*n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            cdft(2*n, -1, a, ip, w);
    [parameters]
        2*n            :data length (int)
                        n >= 1, n = power of 2
        a[0...2*n-1]   :input/output data (double *)
                        input data
                            a[2*j] = Re(x[j]),
                            a[2*j+1] = Im(x[j]), 0<=j<n
                        output data
                            a[2*k] = Re(X[k]),
                            a[2*k+1] = Im(X[k]), 0<=k<n
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n/2-1]   :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            cdft(2*n, -1, a, ip, w);
        is
            cdft(2*n, 1, a, ip, w);
            for (j = 0; j <= 2 * n - 1; j++) {
                a[j] *= 1.0 / n;
            }
        .


-------- Real DFT / Inverse of Real DFT --------
    [definition]
        <case1> RDFT
            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
        <case2> IRDFT (excluding scale)
            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
    [usage]
        <case1>
            ip[0] = 0; // first time only
            rdft(n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            rdft(n, -1, a, ip, w);
    [parameters]
        n              :data length (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (double *)
                        <case1>
                            output data
                                a[2*k] = R[k], 0<=k<n/2
                                a[2*k+1] = I[k], 0<k<n/2
                                a[1] = R[n/2]
                        <case2>
                            input data
                                a[2*j] = R[j], 0<=j<n/2
                                a[2*j+1] = I[j], 0<j<n/2
                                a[1] = R[n/2]
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/2)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n/2-1]   :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            rdft(n, 1, a, ip, w);
        is
            rdft(n, -1, a, ip, w);
            for (j = 0; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
    [definition]
        <case1> IDCT (excluding scale)
            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
        <case2> DCT
            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
    [usage]
        <case1>
            ip[0] = 0; // first time only
            ddct(n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            ddct(n, -1, a, ip, w);
    [parameters]
        n              :data length (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (double *)
                        output data
                            a[k] = C[k], 0<=k<n
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/2)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/4-1] :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            ddct(n, -1, a, ip, w);
        is
            a[0] *= 0.5;
            ddct(n, 1, a, ip, w);
            for (j = 0; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- DST (Discrete Sine Transform) / Inverse of DST --------
    [definition]
        <case1> IDST (excluding scale)
            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
        <case2> DST
            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
    [usage]
        <case1>
            ip[0] = 0; // first time only
            ddst(n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            ddst(n, -1, a, ip, w);
    [parameters]
        n              :data length (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (double *)
                        <case1>
                            input data
                                a[j] = A[j], 0<j<n
                                a[0] = A[n]
                            output data
                                a[k] = S[k], 0<=k<n
                        <case2>
                            output data
                                a[k] = S[k], 0<k<n
                                a[0] = S[n]
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/2)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/4-1] :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            ddst(n, -1, a, ip, w);
        is
            a[0] *= 0.5;
            ddst(n, 1, a, ip, w);
            for (j = 0; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
    [definition]
        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
    [usage]
        ip[0] = 0; // first time only
        dfct(n, a, t, ip, w);
    [parameters]
        n              :data length - 1 (int)
                        n >= 2, n = power of 2
        a[0...n]       :input/output data (double *)
                        output data
                            a[k] = C[k], 0<=k<=n
        t[0...n/2]     :work area (double *)
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/4)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/8-1] :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            a[0] *= 0.5;
            a[n] *= 0.5;
            dfct(n, a, t, ip, w);
        is
            a[0] *= 0.5;
            a[n] *= 0.5;
            dfct(n, a, t, ip, w);
            for (j = 0; j <= n; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
    [definition]
        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
    [usage]
        ip[0] = 0; // first time only
        dfst(n, a, t, ip, w);
    [parameters]
        n              :data length + 1 (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (double *)
                        output data
                            a[k] = S[k], 0<k<n
                        (a[0] is used for work area)
        t[0...n/2-1]   :work area (double *)
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/4)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/8-1] :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            dfst(n, a, t, ip, w);
        is
            dfst(n, a, t, ip, w);
            for (j = 1; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


Appendix :
    The cos/sin table is recalculated when the larger table required.
    w[] and ip[] are compatible with all routines.
*/


void cdft(int n, int isgn, double *a, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void cftbsub(int n, double *a, int *ip, int nw, double *w);
    int nw;

    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        makewt(nw, ip, w);
    }
    if (isgn >= 0) {
        cftfsub(n, a, ip, nw, w);
    } else {
        cftbsub(n, a, ip, nw, w);
    }
}


void rdft(int n, int isgn, double *a, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void makect(int nc, int *ip, double *c);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void cftbsub(int n, double *a, int *ip, int nw, double *w);
    void rftfsub(int n, double *a, int nc, double *c);
    void rftbsub(int n, double *a, int nc, double *c);
    int nw, nc;
    double xi;

    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > (nc << 2)) {
        nc = n >> 2;
        makect(nc, ip, w + nw);
    }
    if (isgn >= 0) {
        if (n > 4) {
            cftfsub(n, a, ip, nw, w);
            rftfsub(n, a, nc, w + nw);
        } else if (n == 4) {
            cftfsub(n, a, ip, nw, w);
        }
        xi = a[0] - a[1];
        a[0] += a[1];
        a[1] = xi;
    } else {
        a[1] = 0.5 * (a[0] - a[1]);
        a[0] -= a[1];
        if (n > 4) {
            rftbsub(n, a, nc, w + nw);
            cftbsub(n, a, ip, nw, w);
        } else if (n == 4) {
            cftbsub(n, a, ip, nw, w);
        }
    }
}


void ddct(int n, int isgn, double *a, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void makect(int nc, int *ip, double *c);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void cftbsub(int n, double *a, int *ip, int nw, double *w);
    void rftfsub(int n, double *a, int nc, double *c);
    void rftbsub(int n, double *a, int nc, double *c);
    void dctsub(int n, double *a, int nc, double *c);
    int j, nw, nc;
    double xr;

    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > nc) {
        nc = n;
        makect(nc, ip, w + nw);
    }
    if (isgn < 0) {
        xr = a[n - 1];
        for (j = n - 2; j >= 2; j -= 2) {
            a[j + 1] = a[j] - a[j - 1];
            a[j] += a[j - 1];
        }
        a[1] = a[0] - xr;
        a[0] += xr;
        if (n > 4) {
            rftbsub(n, a, nc, w + nw);
            cftbsub(n, a, ip, nw, w);
        } else if (n == 4) {
            cftbsub(n, a, ip, nw, w);
        }
    }
    dctsub(n, a, nc, w + nw);
    if (isgn >= 0) {
        if (n > 4) {
            cftfsub(n, a, ip, nw, w);
            rftfsub(n, a, nc, w + nw);
        } else if (n == 4) {
            cftfsub(n, a, ip, nw, w);
        }
        xr = a[0] - a[1];
        a[0] += a[1];
        for (j = 2; j < n; j += 2) {
            a[j - 1] = a[j] - a[j + 1];
            a[j] += a[j + 1];
        }
        a[n - 1] = xr;
    }
}


void ddst(int n, int isgn, double *a, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void makect(int nc, int *ip, double *c);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void cftbsub(int n, double *a, int *ip, int nw, double *w);
    void rftfsub(int n, double *a, int nc, double *c);
    void rftbsub(int n, double *a, int nc, double *c);
    void dstsub(int n, double *a, int nc, double *c);
    int j, nw, nc;
    double xr;

    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > nc) {
        nc = n;
        makect(nc, ip, w + nw);
    }
    if (isgn < 0) {
        xr = a[n - 1];
        for (j = n - 2; j >= 2; j -= 2) {
            a[j + 1] = -a[j] - a[j - 1];
            a[j] -= a[j - 1];
        }
        a[1] = a[0] + xr;
        a[0] -= xr;
        if (n > 4) {
            rftbsub(n, a, nc, w + nw);
            cftbsub(n, a, ip, nw, w);
        } else if (n == 4) {
            cftbsub(n, a, ip, nw, w);
        }
    }
    dstsub(n, a, nc, w + nw);
    if (isgn >= 0) {
        if (n > 4) {
            cftfsub(n, a, ip, nw, w);
            rftfsub(n, a, nc, w + nw);
        } else if (n == 4) {
            cftfsub(n, a, ip, nw, w);
        }
        xr = a[0] - a[1];
        a[0] += a[1];
        for (j = 2; j < n; j += 2) {
            a[j - 1] = -a[j] - a[j + 1];
            a[j] -= a[j + 1];
        }
        a[n - 1] = -xr;
    }
}


void dfct(int n, double *a, double *t, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void makect(int nc, int *ip, double *c);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void rftfsub(int n, double *a, int nc, double *c);
    void dctsub(int n, double *a, int nc, double *c);
    int j, k, l, m, mh, nw, nc;
    double xr, xi, yr, yi;

    nw = ip[0];
    if (n > (nw << 3)) {
        nw = n >> 3;
        makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > (nc << 1)) {
        nc = n >> 1;
        makect(nc, ip, w + nw);
    }
    m = n >> 1;
    yi = a[m];
    xi = a[0] + a[n];
    a[0] -= a[n];
    t[0] = xi - yi;
    t[m] = xi + yi;
    if (n > 2) {
        mh = m >> 1;
        for (j = 1; j < mh; j++) {
            k = m - j;
            xr = a[j] - a[n - j];
            xi = a[j] + a[n - j];
            yr = a[k] - a[n - k];
            yi = a[k] + a[n - k];
            a[j] = xr;
            a[k] = yr;
            t[j] = xi - yi;
            t[k] = xi + yi;
        }
        t[mh] = a[mh] + a[n - mh];
        a[mh] -= a[n - mh];
        dctsub(m, a, nc, w + nw);
        if (m > 4) {
            cftfsub(m, a, ip, nw, w);
            rftfsub(m, a, nc, w + nw);
        } else if (m == 4) {
            cftfsub(m, a, ip, nw, w);
        }
        a[n - 1] = a[0] - a[1];
        a[1] = a[0] + a[1];
        for (j = m - 2; j >= 2; j -= 2) {
            a[2 * j + 1] = a[j] + a[j + 1];
            a[2 * j - 1] = a[j] - a[j + 1];
        }
        l = 2;
        m = mh;
        while (m >= 2) {
            dctsub(m, t, nc, w + nw);
            if (m > 4) {
                cftfsub(m, t, ip, nw, w);
                rftfsub(m, t, nc, w + nw);
            } else if (m == 4) {
                cftfsub(m, t, ip, nw, w);
            }
            a[n - l] = t[0] - t[1];
            a[l] = t[0] + t[1];
            k = 0;
            for (j = 2; j < m; j += 2) {
                k += l << 2;
                a[k - l] = t[j] - t[j + 1];
                a[k + l] = t[j] + t[j + 1];
            }
            l <<= 1;
            mh = m >> 1;
            for (j = 0; j < mh; j++) {
                k = m - j;
                t[j] = t[m + k] - t[m + j];
                t[k] = t[m + k] + t[m + j];
            }
            t[mh] = t[m + mh];
            m = mh;
        }
        a[l] = t[0];
        a[n] = t[2] - t[1];
        a[0] = t[2] + t[1];
    } else {
        a[1] = a[0];
        a[2] = t[0];
        a[0] = t[1];
    }
}


void dfst(int n, double *a, double *t, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void makect(int nc, int *ip, double *c);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void rftfsub(int n, double *a, int nc, double *c);
    void dstsub(int n, double *a, int nc, double *c);
    int j, k, l, m, mh, nw, nc;
    double xr, xi, yr, yi;

    nw = ip[0];
    if (n > (nw << 3)) {
        nw = n >> 3;
        makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > (nc << 1)) {
        nc = n >> 1;
        makect(nc, ip, w + nw);
    }
    if (n > 2) {
        m = n >> 1;
        mh = m >> 1;
        for (j = 1; j < mh; j++) {
            k = m - j;
            xr = a[j] + a[n - j];
            xi = a[j] - a[n - j];
            yr = a[k] + a[n - k];
            yi = a[k] - a[n - k];
            a[j] = xr;
            a[k] = yr;
            t[j] = xi + yi;
            t[k] = xi - yi;
        }
        t[0] = a[mh] - a[n - mh];
        a[mh] += a[n - mh];
        a[0] = a[m];
        dstsub(m, a, nc, w + nw);
        if (m > 4) {
            cftfsub(m, a, ip, nw, w);
            rftfsub(m, a, nc, w + nw);
        } else if (m == 4) {
            cftfsub(m, a, ip, nw, w);
        }
        a[n - 1] = a[1] - a[0];
        a[1] = a[0] + a[1];
        for (j = m - 2; j >= 2; j -= 2) {
            a[2 * j + 1] = a[j] - a[j + 1];
            a[2 * j - 1] = -a[j] - a[j + 1];
        }
        l = 2;
        m = mh;
        while (m >= 2) {
            dstsub(m, t, nc, w + nw);
            if (m > 4) {
                cftfsub(m, t, ip, nw, w);
                rftfsub(m, t, nc, w + nw);
            } else if (m == 4) {
                cftfsub(m, t, ip, nw, w);
            }
            a[n - l] = t[1] - t[0];
            a[l] = t[0] + t[1];
            k = 0;
            for (j = 2; j < m; j += 2) {
                k += l << 2;
                a[k - l] = -t[j] - t[j + 1];
                a[k + l] = t[j] - t[j + 1];
            }
            l <<= 1;
            mh = m >> 1;
            for (j = 1; j < mh; j++) {
                k = m - j;
                t[j] = t[m + k] + t[m + j];
                t[k] = t[m + k] - t[m + j];
            }
            t[0] = t[m + mh];
            m = mh;
        }
        a[l] = t[0];
    }
    a[0] = 0;
}


/* -------- initializing routines -------- */


#include <math.h>

void makewt(int nw, int *ip, double *w) {
    void makeipt(int nw, int *ip);
    int j, nwh, nw0, nw1;
    double delta, wn4r, wk1r, wk1i, wk3r, wk3i;

    ip[0] = nw;
    ip[1] = 1;
    if (nw > 2) {
        nwh = nw >> 1;
        delta = atan(1.0) / nwh;
        wn4r = cos(delta * nwh);
        w[0] = 1;
        w[1] = wn4r;
        if (nwh == 4) {
            w[2] = cos(delta * 2);
            w[3] = sin(delta * 2);
        } else if (nwh > 4) {
            makeipt(nw, ip);
            w[2] = 0.5 / cos(delta * 2);
            w[3] = 0.5 / cos(delta * 6);
            for (j = 4; j < nwh; j += 4) {
                w[j] = cos(delta * j);
                w[j + 1] = sin(delta * j);
                w[j + 2] = cos(3 * delta * j);
                w[j + 3] = -sin(3 * delta * j);
            }
        }
        nw0 = 0;
        while (nwh > 2) {
            nw1 = nw0 + nwh;
            nwh >>= 1;
            w[nw1] = 1;
            w[nw1 + 1] = wn4r;
            if (nwh == 4) {
                wk1r = w[nw0 + 4];
                wk1i = w[nw0 + 5];
                w[nw1 + 2] = wk1r;
                w[nw1 + 3] = wk1i;
            } else if (nwh > 4) {
                wk1r = w[nw0 + 4];
                wk3r = w[nw0 + 6];
                w[nw1 + 2] = 0.5 / wk1r;
                w[nw1 + 3] = 0.5 / wk3r;
                for (j = 4; j < nwh; j += 4) {
                    wk1r = w[nw0 + 2 * j];
                    wk1i = w[nw0 + 2 * j + 1];
                    wk3r = w[nw0 + 2 * j + 2];
                    wk3i = w[nw0 + 2 * j + 3];
                    w[nw1 + j] = wk1r;
                    w[nw1 + j + 1] = wk1i;
                    w[nw1 + j + 2] = wk3r;
                    w[nw1 + j + 3] = wk3i;
                }
            }
            nw0 = nw1;
        }
    }
}


void makeipt(int nw, int *ip) {
    int j, l, m, m2, p, q;

    ip[2] = 0;
    ip[3] = 16;
    m = 2;
    for (l = nw; l > 32; l >>= 2) {
        m2 = m << 1;
        q = m2 << 3;
        for (j = m; j < m2; j++) {
            p = ip[j] << 2;
            ip[m + j] = p;
            ip[m2 + j] = p + q;
        }
        m = m2;
    }
}


void makect(int nc, int *ip, double *c) {
    int j, nch;
    double delta;

    ip[1] = nc;
    if (nc > 1) {
        nch = nc >> 1;
        delta = atan(1.0) / nch;
        c[0] = cos(delta * nch);
        c[nch] = 0.5 * c[0];
        for (j = 1; j < nch; j++) {
            c[j] = 0.5 * cos(delta * j);
            c[nc - j] = 0.5 * sin(delta * j);
        }
    }
}


/* -------- child routines -------- */


#ifdef USE_CDFT_PTHREADS
#define USE_CDFT_THREADS
#ifndef CDFT_THREADS_BEGIN_N
#define CDFT_THREADS_BEGIN_N 8192
#endif
#ifndef CDFT_4THREADS_BEGIN_N
#define CDFT_4THREADS_BEGIN_N 65536
#endif
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#define cdft_thread_t pthread_t
#define cdft_thread_create(thp, func, argp)                       \
    {                                                             \
        if (pthread_create(thp, NULL, func, (void *)argp) != 0) { \
            fprintf(stderr, "cdft thread error\n");               \
            exit(1);                                              \
        }                                                         \
    }
#define cdft_thread_wait(th)                        \
    {                                               \
        if (pthread_join(th, NULL) != 0) {          \
            fprintf(stderr, "cdft thread error\n"); \
            exit(1);                                \
        }                                           \
    }
#endif /* USE_CDFT_PTHREADS */


#ifdef USE_CDFT_WINTHREADS
#define USE_CDFT_THREADS
#ifndef CDFT_THREADS_BEGIN_N
#define CDFT_THREADS_BEGIN_N 32768
#endif
#ifndef CDFT_4THREADS_BEGIN_N
#define CDFT_4THREADS_BEGIN_N 524288
#endif
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
#define cdft_thread_t HANDLE
#define cdft_thread_create(thp, func, argp)                                 \
    {                                                                       \
        DWORD thid;                                                         \
        *(thp) = CreateThread(                                              \
            NULL, 0, (LPTHREAD_START_ROUTINE)func, (LPVOID)argp, 0, &thid); \
        if (*(thp) == 0) {                                                  \
            fprintf(stderr, "cdft thread error\n");                         \
            exit(1);                                                        \
        }                                                                   \
    }
#define cdft_thread_wait(th)               \
    {                                      \
        WaitForSingleObject(th, INFINITE); \
        CloseHandle(th);                   \
    }
#endif /* USE_CDFT_WINTHREADS */


void cftfsub(int n, double *a, int *ip, int nw, double *w) {
    void bitrv2(int n, int *ip, double *a);
    void bitrv216(double *a);
    void bitrv208(double *a);
    void cftf1st(int n, double *a, double *w);
    void cftrec4(int n, double *a, int nw, double *w);
    void cftleaf(int n, int isplt, double *a, int nw, double *w);
    void cftfx41(int n, double *a, int nw, double *w);
    void cftf161(double *a, double *w);
    void cftf081(double *a, double *w);
    void cftf040(double *a);
    void cftx020(double *a);
#ifdef USE_CDFT_THREADS
    void cftrec4_th(int n, double *a, int nw, double *w);
#endif /* USE_CDFT_THREADS */

    if (n > 8) {
        if (n > 32) {
            cftf1st(n, a, &w[nw - (n >> 2)]);
#ifdef USE_CDFT_THREADS
            if (n > CDFT_THREADS_BEGIN_N) {
                cftrec4_th(n, a, nw, w);
            } else
#endif /* USE_CDFT_THREADS */
                if (n > 512) {
                cftrec4(n, a, nw, w);
            } else if (n > 128) {
                cftleaf(n, 1, a, nw, w);
            } else {
                cftfx41(n, a, nw, w);
            }
            bitrv2(n, ip, a);
        } else if (n == 32) {
            cftf161(a, &w[nw - 8]);
            bitrv216(a);
        } else {
            cftf081(a, w);
            bitrv208(a);
        }
    } else if (n == 8) {
        cftf040(a);
    } else if (n == 4) {
        cftx020(a);
    }
}


void cftbsub(int n, double *a, int *ip, int nw, double *w) {
    void bitrv2conj(int n, int *ip, double *a);
    void bitrv216neg(double *a);
    void bitrv208neg(double *a);
    void cftb1st(int n, double *a, double *w);
    void cftrec4(int n, double *a, int nw, double *w);
    void cftleaf(int n, int isplt, double *a, int nw, double *w);
    void cftfx41(int n, double *a, int nw, double *w);
    void cftf161(double *a, double *w);
    void cftf081(double *a, double *w);
    void cftb040(double *a);
    void cftx020(double *a);
#ifdef USE_CDFT_THREADS
    void cftrec4_th(int n, double *a, int nw, double *w);
#endif /* USE_CDFT_THREADS */

    if (n > 8) {
        if (n > 32) {
            cftb1st(n, a, &w[nw - (n >> 2)]);
#ifdef USE_CDFT_THREADS
            if (n > CDFT_THREADS_BEGIN_N) {
                cftrec4_th(n, a, nw, w);
            } else
#endif /* USE_CDFT_THREADS */
                if (n > 512) {
                cftrec4(n, a, nw, w);
            } else if (n > 128) {
                cftleaf(n, 1, a, nw, w);
            } else {
                cftfx41(n, a, nw, w);
            }
            bitrv2conj(n, ip, a);
        } else if (n == 32) {
            cftf161(a, &w[nw - 8]);
            bitrv216neg(a);
        } else {
            cftf081(a, w);
            bitrv208neg(a);
        }
    } else if (n == 8) {
        cftb040(a);
    } else if (n == 4) {
        cftx020(a);
    }
}


void bitrv2(int n, int *ip, double *a) {
    int j, j1, k, k1, l, m, nh, nm;
    double xr, xi, yr, yi;

    m = 1;
    for (l = n >> 2; l > 8; l >>= 2) {
        m <<= 1;
    }
    nh = n >> 1;
    nm = 4 * m;
    if (l == 8) {
        for (k = 0; k < m; k++) {
            for (j = 0; j < k; j++) {
                j1 = 4 * j + 2 * ip[m + k];
                k1 = 4 * k + 2 * ip[m + j];
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 -= nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nh;
                k1 += 2;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 += nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += 2;
                k1 += nh;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 -= nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nh;
                k1 -= 2;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 += nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
            }
            k1 = 4 * k + 2 * ip[m + k];
            j1 = k1 + 2;
            k1 += nh;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nm;
            k1 += 2 * nm;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nm;
            k1 -= nm;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 -= 2;
            k1 -= nh;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nh + 2;
            k1 += nh + 2;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 -= nh - nm;
            k1 += 2 * nm - 2;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
        }
    } else {
        for (k = 0; k < m; k++) {
            for (j = 0; j < k; j++) {
                j1 = 4 * j + ip[m + k];
                k1 = 4 * k + ip[m + j];
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nh;
                k1 += 2;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += 2;
                k1 += nh;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nh;
                k1 -= 2;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
            }
            k1 = 4 * k + ip[m + k];
            j1 = k1 + 2;
            k1 += nh;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nm;
            k1 += nm;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
        }
    }
}


void bitrv2conj(int n, int *ip, double *a) {
    int j, j1, k, k1, l, m, nh, nm;
    double xr, xi, yr, yi;

    m = 1;
    for (l = n >> 2; l > 8; l >>= 2) {
        m <<= 1;
    }
    nh = n >> 1;
    nm = 4 * m;
    if (l == 8) {
        for (k = 0; k < m; k++) {
            for (j = 0; j < k; j++) {
                j1 = 4 * j + 2 * ip[m + k];
                k1 = 4 * k + 2 * ip[m + j];
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 -= nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nh;
                k1 += 2;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 += nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += 2;
                k1 += nh;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 -= nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nh;
                k1 -= 2;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 += nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
            }
            k1 = 4 * k + 2 * ip[m + k];
            j1 = k1 + 2;
            k1 += nh;
            a[j1 - 1] = -a[j1 - 1];
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            a[k1 + 3] = -a[k1 + 3];
            j1 += nm;
            k1 += 2 * nm;
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nm;
            k1 -= nm;
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 -= 2;
            k1 -= nh;
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nh + 2;
            k1 += nh + 2;
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 -= nh - nm;
            k1 += 2 * nm - 2;
            a[j1 - 1] = -a[j1 - 1];
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            a[k1 + 3] = -a[k1 + 3];
        }
    } else {
        for (k = 0; k < m; k++) {
            for (j = 0; j < k; j++) {
                j1 = 4 * j + ip[m + k];
                k1 = 4 * k + ip[m + j];
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nh;
                k1 += 2;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += 2;
                k1 += nh;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nh;
                k1 -= 2;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
            }
            k1 = 4 * k + ip[m + k];
            j1 = k1 + 2;
            k1 += nh;
            a[j1 - 1] = -a[j1 - 1];
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            a[k1 + 3] = -a[k1 + 3];
            j1 += nm;
            k1 += nm;
            a[j1 - 1] = -a[j1 - 1];
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            a[k1 + 3] = -a[k1 + 3];
        }
    }
}


void bitrv216(double *a) {
    double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x7r, x7i, x8r, x8i,
        x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;

    x1r = a[2];
    x1i = a[3];
    x2r = a[4];
    x2i = a[5];
    x3r = a[6];
    x3i = a[7];
    x4r = a[8];
    x4i = a[9];
    x5r = a[10];
    x5i = a[11];
    x7r = a[14];
    x7i = a[15];
    x8r = a[16];
    x8i = a[17];
    x10r = a[20];
    x10i = a[21];
    x11r = a[22];
    x11i = a[23];
    x12r = a[24];
    x12i = a[25];
    x13r = a[26];
    x13i = a[27];
    x14r = a[28];
    x14i = a[29];
    a[2] = x8r;
    a[3] = x8i;
    a[4] = x4r;
    a[5] = x4i;
    a[6] = x12r;
    a[7] = x12i;
    a[8] = x2r;
    a[9] = x2i;
    a[10] = x10r;
    a[11] = x10i;
    a[14] = x14r;
    a[15] = x14i;
    a[16] = x1r;
    a[17] = x1i;
    a[20] = x5r;
    a[21] = x5i;
    a[22] = x13r;
    a[23] = x13i;
    a[24] = x3r;
    a[25] = x3i;
    a[26] = x11r;
    a[27] = x11i;
    a[28] = x7r;
    a[29] = x7i;
}


void bitrv216neg(double *a) {
    double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i,
        x8r, x8i, x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i,
        x14r, x14i, x15r, x15i;

    x1r = a[2];
    x1i = a[3];
    x2r = a[4];
    x2i = a[5];
    x3r = a[6];
    x3i = a[7];
    x4r = a[8];
    x4i = a[9];
    x5r = a[10];
    x5i = a[11];
    x6r = a[12];
    x6i = a[13];
    x7r = a[14];
    x7i = a[15];
    x8r = a[16];
    x8i = a[17];
    x9r = a[18];
    x9i = a[19];
    x10r = a[20];
    x10i = a[21];
    x11r = a[22];
    x11i = a[23];
    x12r = a[24];
    x12i = a[25];
    x13r = a[26];
    x13i = a[27];
    x14r = a[28];
    x14i = a[29];
    x15r = a[30];
    x15i = a[31];
    a[2] = x15r;
    a[3] = x15i;
    a[4] = x7r;
    a[5] = x7i;
    a[6] = x11r;
    a[7] = x11i;
    a[8] = x3r;
    a[9] = x3i;
    a[10] = x13r;
    a[11] = x13i;
    a[12] = x5r;
    a[13] = x5i;
    a[14] = x9r;
    a[15] = x9i;
    a[16] = x1r;
    a[17] = x1i;
    a[18] = x14r;
    a[19] = x14i;
    a[20] = x6r;
    a[21] = x6i;
    a[22] = x10r;
    a[23] = x10i;
    a[24] = x2r;
    a[25] = x2i;
    a[26] = x12r;
    a[27] = x12i;
    a[28] = x4r;
    a[29] = x4i;
    a[30] = x8r;
    a[31] = x8i;
}


void bitrv208(double *a) {
    double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;

    x1r = a[2];
    x1i = a[3];
    x3r = a[6];
    x3i = a[7];
    x4r = a[8];
    x4i = a[9];
    x6r = a[12];
    x6i = a[13];
    a[2] = x4r;
    a[3] = x4i;
    a[6] = x6r;
    a[7] = x6i;
    a[8] = x1r;
    a[9] = x1i;
    a[12] = x3r;
    a[13] = x3i;
}


void bitrv208neg(double *a) {
    double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i;

    x1r = a[2];
    x1i = a[3];
    x2r = a[4];
    x2i = a[5];
    x3r = a[6];
    x3i = a[7];
    x4r = a[8];
    x4i = a[9];
    x5r = a[10];
    x5i = a[11];
    x6r = a[12];
    x6i = a[13];
    x7r = a[14];
    x7i = a[15];
    a[2] = x7r;
    a[3] = x7i;
    a[4] = x3r;
    a[5] = x3i;
    a[6] = x5r;
    a[7] = x5i;
    a[8] = x1r;
    a[9] = x1i;
    a[10] = x6r;
    a[11] = x6i;
    a[12] = x2r;
    a[13] = x2i;
    a[14] = x4r;
    a[15] = x4i;
}


void cftf1st(int n, double *a, double *w) {
    int j, j0, j1, j2, j3, k, m, mh;
    double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i,
        y3r, y3i;

    mh = n >> 3;
    m = 2 * mh;
    j1 = m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[0] + a[j2];
    x0i = a[1] + a[j2 + 1];
    x1r = a[0] - a[j2];
    x1i = a[1] - a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[0] = x0r + x2r;
    a[1] = x0i + x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i - x2i;
    a[j2] = x1r - x3i;
    a[j2 + 1] = x1i + x3r;
    a[j3] = x1r + x3i;
    a[j3 + 1] = x1i - x3r;
    wn4r = w[1];
    csc1 = w[2];
    csc3 = w[3];
    wd1r = 1;
    wd1i = 0;
    wd3r = 1;
    wd3i = 0;
    k = 0;
    for (j = 2; j < mh - 2; j += 4) {
        k += 4;
        wk1r = csc1 * (wd1r + w[k]);
        wk1i = csc1 * (wd1i + w[k + 1]);
        wk3r = csc3 * (wd3r + w[k + 2]);
        wk3i = csc3 * (wd3i + w[k + 3]);
        wd1r = w[k];
        wd1i = w[k + 1];
        wd3r = w[k + 2];
        wd3i = w[k + 3];
        j1 = j + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j] + a[j2];
        x0i = a[j + 1] + a[j2 + 1];
        x1r = a[j] - a[j2];
        x1i = a[j + 1] - a[j2 + 1];
        y0r = a[j + 2] + a[j2 + 2];
        y0i = a[j + 3] + a[j2 + 3];
        y1r = a[j + 2] - a[j2 + 2];
        y1i = a[j + 3] - a[j2 + 3];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        y2r = a[j1 + 2] + a[j3 + 2];
        y2i = a[j1 + 3] + a[j3 + 3];
        y3r = a[j1 + 2] - a[j3 + 2];
        y3i = a[j1 + 3] - a[j3 + 3];
        a[j] = x0r + x2r;
        a[j + 1] = x0i + x2i;
        a[j + 2] = y0r + y2r;
        a[j + 3] = y0i + y2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i - x2i;
        a[j1 + 2] = y0r - y2r;
        a[j1 + 3] = y0i - y2i;
        x0r = x1r - x3i;
        x0i = x1i + x3r;
        a[j2] = wk1r * x0r - wk1i * x0i;
        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
        x0r = y1r - y3i;
        x0i = y1i + y3r;
        a[j2 + 2] = wd1r * x0r - wd1i * x0i;
        a[j2 + 3] = wd1r * x0i + wd1i * x0r;
        x0r = x1r + x3i;
        x0i = x1i - x3r;
        a[j3] = wk3r * x0r + wk3i * x0i;
        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
        x0r = y1r + y3i;
        x0i = y1i - y3r;
        a[j3 + 2] = wd3r * x0r + wd3i * x0i;
        a[j3 + 3] = wd3r * x0i - wd3i * x0r;
        j0 = m - j;
        j1 = j0 + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j0] + a[j2];
        x0i = a[j0 + 1] + a[j2 + 1];
        x1r = a[j0] - a[j2];
        x1i = a[j0 + 1] - a[j2 + 1];
        y0r = a[j0 - 2] + a[j2 - 2];
        y0i = a[j0 - 1] + a[j2 - 1];
        y1r = a[j0 - 2] - a[j2 - 2];
        y1i = a[j0 - 1] - a[j2 - 1];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        y2r = a[j1 - 2] + a[j3 - 2];
        y2i = a[j1 - 1] + a[j3 - 1];
        y3r = a[j1 - 2] - a[j3 - 2];
        y3i = a[j1 - 1] - a[j3 - 1];
        a[j0] = x0r + x2r;
        a[j0 + 1] = x0i + x2i;
        a[j0 - 2] = y0r + y2r;
        a[j0 - 1] = y0i + y2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i - x2i;
        a[j1 - 2] = y0r - y2r;
        a[j1 - 1] = y0i - y2i;
        x0r = x1r - x3i;
        x0i = x1i + x3r;
        a[j2] = wk1i * x0r - wk1r * x0i;
        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
        x0r = y1r - y3i;
        x0i = y1i + y3r;
        a[j2 - 2] = wd1i * x0r - wd1r * x0i;
        a[j2 - 1] = wd1i * x0i + wd1r * x0r;
        x0r = x1r + x3i;
        x0i = x1i - x3r;
        a[j3] = wk3i * x0r + wk3r * x0i;
        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
        x0r = y1r + y3i;
        x0i = y1i - y3r;
        a[j3 - 2] = wd3i * x0r + wd3r * x0i;
        a[j3 - 1] = wd3i * x0i - wd3r * x0r;
    }
    wk1r = csc1 * (wd1r + wn4r);
    wk1i = csc1 * (wd1i + wn4r);
    wk3r = csc3 * (wd3r - wn4r);
    wk3i = csc3 * (wd3i - wn4r);
    j0 = mh;
    j1 = j0 + m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[j0 - 2] + a[j2 - 2];
    x0i = a[j0 - 1] + a[j2 - 1];
    x1r = a[j0 - 2] - a[j2 - 2];
    x1i = a[j0 - 1] - a[j2 - 1];
    x2r = a[j1 - 2] + a[j3 - 2];
    x2i = a[j1 - 1] + a[j3 - 1];
    x3r = a[j1 - 2] - a[j3 - 2];
    x3i = a[j1 - 1] - a[j3 - 1];
    a[j0 - 2] = x0r + x2r;
    a[j0 - 1] = x0i + x2i;
    a[j1 - 2] = x0r - x2r;
    a[j1 - 1] = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    a[j2 - 2] = wk1r * x0r - wk1i * x0i;
    a[j2 - 1] = wk1r * x0i + wk1i * x0r;
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    a[j3 - 2] = wk3r * x0r + wk3i * x0i;
    a[j3 - 1] = wk3r * x0i - wk3i * x0r;
    x0r = a[j0] + a[j2];
    x0i = a[j0 + 1] + a[j2 + 1];
    x1r = a[j0] - a[j2];
    x1i = a[j0 + 1] - a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[j0] = x0r + x2r;
    a[j0 + 1] = x0i + x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    a[j2] = wn4r * (x0r - x0i);
    a[j2 + 1] = wn4r * (x0i + x0r);
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    a[j3] = -wn4r * (x0r + x0i);
    a[j3 + 1] = -wn4r * (x0i - x0r);
    x0r = a[j0 + 2] + a[j2 + 2];
    x0i = a[j0 + 3] + a[j2 + 3];
    x1r = a[j0 + 2] - a[j2 + 2];
    x1i = a[j0 + 3] - a[j2 + 3];
    x2r = a[j1 + 2] + a[j3 + 2];
    x2i = a[j1 + 3] + a[j3 + 3];
    x3r = a[j1 + 2] - a[j3 + 2];
    x3i = a[j1 + 3] - a[j3 + 3];
    a[j0 + 2] = x0r + x2r;
    a[j0 + 3] = x0i + x2i;
    a[j1 + 2] = x0r - x2r;
    a[j1 + 3] = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    a[j2 + 2] = wk1i * x0r - wk1r * x0i;
    a[j2 + 3] = wk1i * x0i + wk1r * x0r;
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    a[j3 + 2] = wk3i * x0r + wk3r * x0i;
    a[j3 + 3] = wk3i * x0i - wk3r * x0r;
}


void cftb1st(int n, double *a, double *w) {
    int j, j0, j1, j2, j3, k, m, mh;
    double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i,
        y3r, y3i;

    mh = n >> 3;
    m = 2 * mh;
    j1 = m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[0] + a[j2];
    x0i = -a[1] - a[j2 + 1];
    x1r = a[0] - a[j2];
    x1i = -a[1] + a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[0] = x0r + x2r;
    a[1] = x0i - x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i + x2i;
    a[j2] = x1r + x3i;
    a[j2 + 1] = x1i + x3r;
    a[j3] = x1r - x3i;
    a[j3 + 1] = x1i - x3r;
    wn4r = w[1];
    csc1 = w[2];
    csc3 = w[3];
    wd1r = 1;
    wd1i = 0;
    wd3r = 1;
    wd3i = 0;
    k = 0;
    for (j = 2; j < mh - 2; j += 4) {
        k += 4;
        wk1r = csc1 * (wd1r + w[k]);
        wk1i = csc1 * (wd1i + w[k + 1]);
        wk3r = csc3 * (wd3r + w[k + 2]);
        wk3i = csc3 * (wd3i + w[k + 3]);
        wd1r = w[k];
        wd1i = w[k + 1];
        wd3r = w[k + 2];
        wd3i = w[k + 3];
        j1 = j + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j] + a[j2];
        x0i = -a[j + 1] - a[j2 + 1];
        x1r = a[j] - a[j2];
        x1i = -a[j + 1] + a[j2 + 1];
        y0r = a[j + 2] + a[j2 + 2];
        y0i = -a[j + 3] - a[j2 + 3];
        y1r = a[j + 2] - a[j2 + 2];
        y1i = -a[j + 3] + a[j2 + 3];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        y2r = a[j1 + 2] + a[j3 + 2];
        y2i = a[j1 + 3] + a[j3 + 3];
        y3r = a[j1 + 2] - a[j3 + 2];
        y3i = a[j1 + 3] - a[j3 + 3];
        a[j] = x0r + x2r;
        a[j + 1] = x0i - x2i;
        a[j + 2] = y0r + y2r;
        a[j + 3] = y0i - y2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i + x2i;
        a[j1 + 2] = y0r - y2r;
        a[j1 + 3] = y0i + y2i;
        x0r = x1r + x3i;
        x0i = x1i + x3r;
        a[j2] = wk1r * x0r - wk1i * x0i;
        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
        x0r = y1r + y3i;
        x0i = y1i + y3r;
        a[j2 + 2] = wd1r * x0r - wd1i * x0i;
        a[j2 + 3] = wd1r * x0i + wd1i * x0r;
        x0r = x1r - x3i;
        x0i = x1i - x3r;
        a[j3] = wk3r * x0r + wk3i * x0i;
        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
        x0r = y1r - y3i;
        x0i = y1i - y3r;
        a[j3 + 2] = wd3r * x0r + wd3i * x0i;
        a[j3 + 3] = wd3r * x0i - wd3i * x0r;
        j0 = m - j;
        j1 = j0 + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j0] + a[j2];
        x0i = -a[j0 + 1] - a[j2 + 1];
        x1r = a[j0] - a[j2];
        x1i = -a[j0 + 1] + a[j2 + 1];
        y0r = a[j0 - 2] + a[j2 - 2];
        y0i = -a[j0 - 1] - a[j2 - 1];
        y1r = a[j0 - 2] - a[j2 - 2];
        y1i = -a[j0 - 1] + a[j2 - 1];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        y2r = a[j1 - 2] + a[j3 - 2];
        y2i = a[j1 - 1] + a[j3 - 1];
        y3r = a[j1 - 2] - a[j3 - 2];
        y3i = a[j1 - 1] - a[j3 - 1];
        a[j0] = x0r + x2r;
        a[j0 + 1] = x0i - x2i;
        a[j0 - 2] = y0r + y2r;
        a[j0 - 1] = y0i - y2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i + x2i;
        a[j1 - 2] = y0r - y2r;
        a[j1 - 1] = y0i + y2i;
        x0r = x1r + x3i;
        x0i = x1i + x3r;
        a[j2] = wk1i * x0r - wk1r * x0i;
        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
        x0r = y1r + y3i;
        x0i = y1i + y3r;
        a[j2 - 2] = wd1i * x0r - wd1r * x0i;
        a[j2 - 1] = wd1i * x0i + wd1r * x0r;
        x0r = x1r - x3i;
        x0i = x1i - x3r;
        a[j3] = wk3i * x0r + wk3r * x0i;
        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
        x0r = y1r - y3i;
        x0i = y1i - y3r;
        a[j3 - 2] = wd3i * x0r + wd3r * x0i;
        a[j3 - 1] = wd3i * x0i - wd3r * x0r;
    }
    wk1r = csc1 * (wd1r + wn4r);
    wk1i = csc1 * (wd1i + wn4r);
    wk3r = csc3 * (wd3r - wn4r);
    wk3i = csc3 * (wd3i - wn4r);
    j0 = mh;
    j1 = j0 + m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[j0 - 2] + a[j2 - 2];
    x0i = -a[j0 - 1] - a[j2 - 1];
    x1r = a[j0 - 2] - a[j2 - 2];
    x1i = -a[j0 - 1] + a[j2 - 1];
    x2r = a[j1 - 2] + a[j3 - 2];
    x2i = a[j1 - 1] + a[j3 - 1];
    x3r = a[j1 - 2] - a[j3 - 2];
    x3i = a[j1 - 1] - a[j3 - 1];
    a[j0 - 2] = x0r + x2r;
    a[j0 - 1] = x0i - x2i;
    a[j1 - 2] = x0r - x2r;
    a[j1 - 1] = x0i + x2i;
    x0r = x1r + x3i;
    x0i = x1i + x3r;
    a[j2 - 2] = wk1r * x0r - wk1i * x0i;
    a[j2 - 1] = wk1r * x0i + wk1i * x0r;
    x0r = x1r - x3i;
    x0i = x1i - x3r;
    a[j3 - 2] = wk3r * x0r + wk3i * x0i;
    a[j3 - 1] = wk3r * x0i - wk3i * x0r;
    x0r = a[j0] + a[j2];
    x0i = -a[j0 + 1] - a[j2 + 1];
    x1r = a[j0] - a[j2];
    x1i = -a[j0 + 1] + a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[j0] = x0r + x2r;
    a[j0 + 1] = x0i - x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i + x2i;
    x0r = x1r + x3i;
    x0i = x1i + x3r;
    a[j2] = wn4r * (x0r - x0i);
    a[j2 + 1] = wn4r * (x0i + x0r);
    x0r = x1r - x3i;
    x0i = x1i - x3r;
    a[j3] = -wn4r * (x0r + x0i);
    a[j3 + 1] = -wn4r * (x0i - x0r);
    x0r = a[j0 + 2] + a[j2 + 2];
    x0i = -a[j0 + 3] - a[j2 + 3];
    x1r = a[j0 + 2] - a[j2 + 2];
    x1i = -a[j0 + 3] + a[j2 + 3];
    x2r = a[j1 + 2] + a[j3 + 2];
    x2i = a[j1 + 3] + a[j3 + 3];
    x3r = a[j1 + 2] - a[j3 + 2];
    x3i = a[j1 + 3] - a[j3 + 3];
    a[j0 + 2] = x0r + x2r;
    a[j0 + 3] = x0i - x2i;
    a[j1 + 2] = x0r - x2r;
    a[j1 + 3] = x0i + x2i;
    x0r = x1r + x3i;
    x0i = x1i + x3r;
    a[j2 + 2] = wk1i * x0r - wk1r * x0i;
    a[j2 + 3] = wk1i * x0i + wk1r * x0r;
    x0r = x1r - x3i;
    x0i = x1i - x3r;
    a[j3 + 2] = wk3i * x0r + wk3r * x0i;
    a[j3 + 3] = wk3i * x0i - wk3r * x0r;
}


#ifdef USE_CDFT_THREADS
struct cdft_arg_st {
    int n0;
    int n;
    double *a;
    int nw;
    double *w;
};
typedef struct cdft_arg_st cdft_arg_t;


void cftrec4_th(int n, double *a, int nw, double *w) {
    void *cftrec1_th(void *p);
    void *cftrec2_th(void *p);
    int i, idiv4, m, nthread;
    cdft_thread_t th[4];
    cdft_arg_t ag[4];

    nthread = 2;
    idiv4 = 0;
    m = n >> 1;
    if (n > CDFT_4THREADS_BEGIN_N) {
        nthread = 4;
        idiv4 = 1;
        m >>= 1;
    }
    for (i = 0; i < nthread; i++) {
        ag[i].n0 = n;
        ag[i].n = m;
        ag[i].a = &a[i * m];
        ag[i].nw = nw;
        ag[i].w = w;
        if (i != idiv4) {
            cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
        } else {
            cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
        }
    }
    for (i = 0; i < nthread; i++) {
        cdft_thread_wait(th[i]);
    }
}


void *cftrec1_th(void *p) {
    int cfttree(int n, int j, int k, double *a, int nw, double *w);
    void cftleaf(int n, int isplt, double *a, int nw, double *w);
    void cftmdl1(int n, double *a, double *w);
    int isplt, j, k, m, n, n0, nw;
    double *a, *w;

    n0 = ((cdft_arg_t *)p)->n0;
    n = ((cdft_arg_t *)p)->n;
    a = ((cdft_arg_t *)p)->a;
    nw = ((cdft_arg_t *)p)->nw;
    w = ((cdft_arg_t *)p)->w;
    m = n0;
    while (m > 512) {
        m >>= 2;
        cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
    }
    cftleaf(m, 1, &a[n - m], nw, w);
    k = 0;
    for (j = n - m; j > 0; j -= m) {
        k++;
        isplt = cfttree(m, j, k, a, nw, w);
        cftleaf(m, isplt, &a[j - m], nw, w);
    }
    return (void *)0;
}


void *cftrec2_th(void *p) {
    int cfttree(int n, int j, int k, double *a, int nw, double *w);
    void cftleaf(int n, int isplt, double *a, int nw, double *w);
    void cftmdl2(int n, double *a, double *w);
    int isplt, j, k, m, n, n0, nw;
    double *a, *w;

    n0 = ((cdft_arg_t *)p)->n0;
    n = ((cdft_arg_t *)p)->n;
    a = ((cdft_arg_t *)p)->a;
    nw = ((cdft_arg_t *)p)->nw;
    w = ((cdft_arg_t *)p)->w;
    k = 1;
    m = n0;
    while (m > 512) {
        m >>= 2;
        k <<= 2;
        cftmdl2(m, &a[n - m], &w[nw - m]);
    }
    cftleaf(m, 0, &a[n - m], nw, w);
    k >>= 1;
    for (j = n - m; j > 0; j -= m) {
        k++;
        isplt = cfttree(m, j, k, a, nw, w);
        cftleaf(m, isplt, &a[j - m], nw, w);
    }
    return (void *)0;
}
#endif /* USE_CDFT_THREADS */


void cftrec4(int n, double *a, int nw, double *w) {
    int cfttree(int n, int j, int k, double *a, int nw, double *w);
    void cftleaf(int n, int isplt, double *a, int nw, double *w);
    void cftmdl1(int n, double *a, double *w);
    int isplt, j, k, m;

    m = n;
    while (m > 512) {
        m >>= 2;
        cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
    }
    cftleaf(m, 1, &a[n - m], nw, w);
    k = 0;
    for (j = n - m; j > 0; j -= m) {
        k++;
        isplt = cfttree(m, j, k, a, nw, w);
        cftleaf(m, isplt, &a[j - m], nw, w);
    }
}


int cfttree(int n, int j, int k, double *a, int nw, double *w) {
    void cftmdl1(int n, double *a, double *w);
    void cftmdl2(int n, double *a, double *w);
    int i, isplt, m;

    if ((k & 3) != 0) {
        isplt = k & 1;
        if (isplt != 0) {
            cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
        } else {
            cftmdl2(n, &a[j - n], &w[nw - n]);
        }
    } else {
        m = n;
        for (i = k; (i & 3) == 0; i >>= 2) {
            m <<= 2;
        }
        isplt = i & 1;
        if (isplt != 0) {
            while (m > 128) {
                cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
                m >>= 2;
            }
        } else {
            while (m > 128) {
                cftmdl2(m, &a[j - m], &w[nw - m]);
                m >>= 2;
            }
        }
    }
    return isplt;
}


void cftleaf(int n, int isplt, double *a, int nw, double *w) {
    void cftmdl1(int n, double *a, double *w);
    void cftmdl2(int n, double *a, double *w);
    void cftf161(double *a, double *w);
    void cftf162(double *a, double *w);
    void cftf081(double *a, double *w);
    void cftf082(double *a, double *w);

    if (n == 512) {
        cftmdl1(128, a, &w[nw - 64]);
        cftf161(a, &w[nw - 8]);
        cftf162(&a[32], &w[nw - 32]);
        cftf161(&a[64], &w[nw - 8]);
        cftf161(&a[96], &w[nw - 8]);
        cftmdl2(128, &a[128], &w[nw - 128]);
        cftf161(&a[128], &w[nw - 8]);
        cftf162(&a[160], &w[nw - 32]);
        cftf161(&a[192], &w[nw - 8]);
        cftf162(&a[224], &w[nw - 32]);
        cftmdl1(128, &a[256], &w[nw - 64]);
        cftf161(&a[256], &w[nw - 8]);
        cftf162(&a[288], &w[nw - 32]);
        cftf161(&a[320], &w[nw - 8]);
        cftf161(&a[352], &w[nw - 8]);
        if (isplt != 0) {
            cftmdl1(128, &a[384], &w[nw - 64]);
            cftf161(&a[480], &w[nw - 8]);
        } else {
            cftmdl2(128, &a[384], &w[nw - 128]);
            cftf162(&a[480], &w[nw - 32]);
        }
        cftf161(&a[384], &w[nw - 8]);
        cftf162(&a[416], &w[nw - 32]);
        cftf161(&a[448], &w[nw - 8]);
    } else {
        cftmdl1(64, a, &w[nw - 32]);
        cftf081(a, &w[nw - 8]);
        cftf082(&a[16], &w[nw - 8]);
        cftf081(&a[32], &w[nw - 8]);
        cftf081(&a[48], &w[nw - 8]);
        cftmdl2(64, &a[64], &w[nw - 64]);
        cftf081(&a[64], &w[nw - 8]);
        cftf082(&a[80], &w[nw - 8]);
        cftf081(&a[96], &w[nw - 8]);
        cftf082(&a[112], &w[nw - 8]);
        cftmdl1(64, &a[128], &w[nw - 32]);
        cftf081(&a[128], &w[nw - 8]);
        cftf082(&a[144], &w[nw - 8]);
        cftf081(&a[160], &w[nw - 8]);
        cftf081(&a[176], &w[nw - 8]);
        if (isplt != 0) {
            cftmdl1(64, &a[192], &w[nw - 32]);
            cftf081(&a[240], &w[nw - 8]);
        } else {
            cftmdl2(64, &a[192], &w[nw - 64]);
            cftf082(&a[240], &w[nw - 8]);
        }
        cftf081(&a[192], &w[nw - 8]);
        cftf082(&a[208], &w[nw - 8]);
        cftf081(&a[224], &w[nw - 8]);
    }
}


void cftmdl1(int n, double *a, double *w) {
    int j, j0, j1, j2, j3, k, m, mh;
    double wn4r, wk1r, wk1i, wk3r, wk3i;
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;

    mh = n >> 3;
    m = 2 * mh;
    j1 = m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[0] + a[j2];
    x0i = a[1] + a[j2 + 1];
    x1r = a[0] - a[j2];
    x1i = a[1] - a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[0] = x0r + x2r;
    a[1] = x0i + x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i - x2i;
    a[j2] = x1r - x3i;
    a[j2 + 1] = x1i + x3r;
    a[j3] = x1r + x3i;
    a[j3 + 1] = x1i - x3r;
    wn4r = w[1];
    k = 0;
    for (j = 2; j < mh; j += 2) {
        k += 4;
        wk1r = w[k];
        wk1i = w[k + 1];
        wk3r = w[k + 2];
        wk3i = w[k + 3];
        j1 = j + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j] + a[j2];
        x0i = a[j + 1] + a[j2 + 1];
        x1r = a[j] - a[j2];
        x1i = a[j + 1] - a[j2 + 1];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        a[j] = x0r + x2r;
        a[j + 1] = x0i + x2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i - x2i;
        x0r = x1r - x3i;
        x0i = x1i + x3r;
        a[j2] = wk1r * x0r - wk1i * x0i;
        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
        x0r = x1r + x3i;
        x0i = x1i - x3r;
        a[j3] = wk3r * x0r + wk3i * x0i;
        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
        j0 = m - j;
        j1 = j0 + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j0] + a[j2];
        x0i = a[j0 + 1] + a[j2 + 1];
        x1r = a[j0] - a[j2];
        x1i = a[j0 + 1] - a[j2 + 1];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        a[j0] = x0r + x2r;
        a[j0 + 1] = x0i + x2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i - x2i;
        x0r = x1r - x3i;
        x0i = x1i + x3r;
        a[j2] = wk1i * x0r - wk1r * x0i;
        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
        x0r = x1r + x3i;
        x0i = x1i - x3r;
        a[j3] = wk3i * x0r + wk3r * x0i;
        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
    }
    j0 = mh;
    j1 = j0 + m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[j0] + a[j2];
    x0i = a[j0 + 1] + a[j2 + 1];
    x1r = a[j0] - a[j2];
    x1i = a[j0 + 1] - a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[j0] = x0r + x2r;
    a[j0 + 1] = x0i + x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    a[j2] = wn4r * (x0r - x0i);
    a[j2 + 1] = wn4r * (x0i + x0r);
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    a[j3] = -wn4r * (x0r + x0i);
    a[j3 + 1] = -wn4r * (x0i - x0r);
}


void cftmdl2(int n, double *a, double *w) {
    int j, j0, j1, j2, j3, k, kr, m, mh;
    double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;

    mh = n >> 3;
    m = 2 * mh;
    wn4r = w[1];
    j1 = m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[0] - a[j2 + 1];
    x0i = a[1] + a[j2];
    x1r = a[0] + a[j2 + 1];
    x1i = a[1] - a[j2];
    x2r = a[j1] - a[j3 + 1];
    x2i = a[j1 + 1] + a[j3];
    x3r = a[j1] + a[j3 + 1];
    x3i = a[j1 + 1] - a[j3];
    y0r = wn4r * (x2r - x2i);
    y0i = wn4r * (x2i + x2r);
    a[0] = x0r + y0r;
    a[1] = x0i + y0i;
    a[j1] = x0r - y0r;
    a[j1 + 1] = x0i - y0i;
    y0r = wn4r * (x3r - x3i);
    y0i = wn4r * (x3i + x3r);
    a[j2] = x1r - y0i;
    a[j2 + 1] = x1i + y0r;
    a[j3] = x1r + y0i;
    a[j3 + 1] = x1i - y0r;
    k = 0;
    kr = 2 * m;
    for (j = 2; j < mh; j += 2) {
        k += 4;
        wk1r = w[k];
        wk1i = w[k + 1];
        wk3r = w[k + 2];
        wk3i = w[k + 3];
        kr -= 4;
        wd1i = w[kr];
        wd1r = w[kr + 1];
        wd3i = w[kr + 2];
        wd3r = w[kr + 3];
        j1 = j + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j] - a[j2 + 1];
        x0i = a[j + 1] + a[j2];
        x1r = a[j] + a[j2 + 1];
        x1i = a[j + 1] - a[j2];
        x2r = a[j1] - a[j3 + 1];
        x2i = a[j1 + 1] + a[j3];
        x3r = a[j1] + a[j3 + 1];
        x3i = a[j1 + 1] - a[j3];
        y0r = wk1r * x0r - wk1i * x0i;
        y0i = wk1r * x0i + wk1i * x0r;
        y2r = wd1r * x2r - wd1i * x2i;
        y2i = wd1r * x2i + wd1i * x2r;
        a[j] = y0r + y2r;
        a[j + 1] = y0i + y2i;
        a[j1] = y0r - y2r;
        a[j1 + 1] = y0i - y2i;
        y0r = wk3r * x1r + wk3i * x1i;
        y0i = wk3r * x1i - wk3i * x1r;
        y2r = wd3r * x3r + wd3i * x3i;
        y2i = wd3r * x3i - wd3i * x3r;
        a[j2] = y0r + y2r;
        a[j2 + 1] = y0i + y2i;
        a[j3] = y0r - y2r;
        a[j3 + 1] = y0i - y2i;
        j0 = m - j;
        j1 = j0 + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j0] - a[j2 + 1];
        x0i = a[j0 + 1] + a[j2];
        x1r = a[j0] + a[j2 + 1];
        x1i = a[j0 + 1] - a[j2];
        x2r = a[j1] - a[j3 + 1];
        x2i = a[j1 + 1] + a[j3];
        x3r = a[j1] + a[j3 + 1];
        x3i = a[j1 + 1] - a[j3];
        y0r = wd1i * x0r - wd1r * x0i;
        y0i = wd1i * x0i + wd1r * x0r;
        y2r = wk1i * x2r - wk1r * x2i;
        y2i = wk1i * x2i + wk1r * x2r;
        a[j0] = y0r + y2r;
        a[j0 + 1] = y0i + y2i;
        a[j1] = y0r - y2r;
        a[j1 + 1] = y0i - y2i;
        y0r = wd3i * x1r + wd3r * x1i;
        y0i = wd3i * x1i - wd3r * x1r;
        y2r = wk3i * x3r + wk3r * x3i;
        y2i = wk3i * x3i - wk3r * x3r;
        a[j2] = y0r + y2r;
        a[j2 + 1] = y0i + y2i;
        a[j3] = y0r - y2r;
        a[j3 + 1] = y0i - y2i;
    }
    wk1r = w[m];
    wk1i = w[m + 1];
    j0 = mh;
    j1 = j0 + m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[j0] - a[j2 + 1];
    x0i = a[j0 + 1] + a[j2];
    x1r = a[j0] + a[j2 + 1];
    x1i = a[j0 + 1] - a[j2];
    x2r = a[j1] - a[j3 + 1];
    x2i = a[j1 + 1] + a[j3];
    x3r = a[j1] + a[j3 + 1];
    x3i = a[j1 + 1] - a[j3];
    y0r = wk1r * x0r - wk1i * x0i;
    y0i = wk1r * x0i + wk1i * x0r;
    y2r = wk1i * x2r - wk1r * x2i;
    y2i = wk1i * x2i + wk1r * x2r;
    a[j0] = y0r + y2r;
    a[j0 + 1] = y0i + y2i;
    a[j1] = y0r - y2r;
    a[j1 + 1] = y0i - y2i;
    y0r = wk1i * x1r - wk1r * x1i;
    y0i = wk1i * x1i + wk1r * x1r;
    y2r = wk1r * x3r - wk1i * x3i;
    y2i = wk1r * x3i + wk1i * x3r;
    a[j2] = y0r - y2r;
    a[j2 + 1] = y0i - y2i;
    a[j3] = y0r + y2r;
    a[j3 + 1] = y0i + y2i;
}


void cftfx41(int n, double *a, int nw, double *w) {
    void cftf161(double *a, double *w);
    void cftf162(double *a, double *w);
    void cftf081(double *a, double *w);
    void cftf082(double *a, double *w);

    if (n == 128) {
        cftf161(a, &w[nw - 8]);
        cftf162(&a[32], &w[nw - 32]);
        cftf161(&a[64], &w[nw - 8]);
        cftf161(&a[96], &w[nw - 8]);
    } else {
        cftf081(a, &w[nw - 8]);
        cftf082(&a[16], &w[nw - 8]);
        cftf081(&a[32], &w[nw - 8]);
        cftf081(&a[48], &w[nw - 8]);
    }
}


void cftf161(double *a, double *w) {
    double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i,
        y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
        y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i,
        y14r, y14i, y15r, y15i;

    wn4r = w[1];
    wk1r = w[2];
    wk1i = w[3];
    x0r = a[0] + a[16];
    x0i = a[1] + a[17];
    x1r = a[0] - a[16];
    x1i = a[1] - a[17];
    x2r = a[8] + a[24];
    x2i = a[9] + a[25];
    x3r = a[8] - a[24];
    x3i = a[9] - a[25];
    y0r = x0r + x2r;
    y0i = x0i + x2i;
    y4r = x0r - x2r;
    y4i = x0i - x2i;
    y8r = x1r - x3i;
    y8i = x1i + x3r;
    y12r = x1r + x3i;
    y12i = x1i - x3r;
    x0r = a[2] + a[18];
    x0i = a[3] + a[19];
    x1r = a[2] - a[18];
    x1i = a[3] - a[19];
    x2r = a[10] + a[26];
    x2i = a[11] + a[27];
    x3r = a[10] - a[26];
    x3i = a[11] - a[27];
    y1r = x0r + x2r;
    y1i = x0i + x2i;
    y5r = x0r - x2r;
    y5i = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    y9r = wk1r * x0r - wk1i * x0i;
    y9i = wk1r * x0i + wk1i * x0r;
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    y13r = wk1i * x0r - wk1r * x0i;
    y13i = wk1i * x0i + wk1r * x0r;
    x0r = a[4] + a[20];
    x0i = a[5] + a[21];
    x1r = a[4] - a[20];
    x1i = a[5] - a[21];
    x2r = a[12] + a[28];
    x2i = a[13] + a[29];
    x3r = a[12] - a[28];
    x3i = a[13] - a[29];
    y2r = x0r + x2r;
    y2i = x0i + x2i;
    y6r = x0r - x2r;
    y6i = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    y10r = wn4r * (x0r - x0i);
    y10i = wn4r * (x0i + x0r);
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    y14r = wn4r * (x0r + x0i);
    y14i = wn4r * (x0i - x0r);
    x0r = a[6] + a[22];
    x0i = a[7] + a[23];
    x1r = a[6] - a[22];
    x1i = a[7] - a[23];
    x2r = a[14] + a[30];
    x2i = a[15] + a[31];
    x3r = a[14] - a[30];
    x3i = a[15] - a[31];
    y3r = x0r + x2r;
    y3i = x0i + x2i;
    y7r = x0r - x2r;
    y7i = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    y11r = wk1i * x0r - wk1r * x0i;
    y11i = wk1i * x0i + wk1r * x0r;
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    y15r = wk1r * x0r - wk1i * x0i;
    y15i = wk1r * x0i + wk1i * x0r;
    x0r = y12r - y14r;
    x0i = y12i - y14i;
    x1r = y12r + y14r;
    x1i = y12i + y14i;
    x2r = y13r - y15r;
    x2i = y13i - y15i;
    x3r = y13r + y15r;
    x3i = y13i + y15i;
    a[24] = x0r + x2r;
    a[25] = x0i + x2i;
    a[26] = x0r - x2r;
    a[27] = x0i - x2i;
    a[28] = x1r - x3i;
    a[29] = x1i + x3r;
    a[30] = x1r + x3i;
    a[31] = x1i - x3r;
    x0r = y8r + y10r;
    x0i = y8i + y10i;
    x1r = y8r - y10r;
    x1i = y8i - y10i;
    x2r = y9r + y11r;
    x2i = y9i + y11i;
    x3r = y9r - y11r;
    x3i = y9i - y11i;
    a[16] = x0r + x2r;
    a[17] = x0i + x2i;
    a[18] = x0r - x2r;
    a[19] = x0i - x2i;
    a[20] = x1r - x3i;
    a[21] = x1i + x3r;
    a[22] = x1r + x3i;
    a[23] = x1i - x3r;
    x0r = y5r - y7i;
    x0i = y5i + y7r;
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    x0r = y5r + y7i;
    x0i = y5i - y7r;
    x3r = wn4r * (x0r - x0i);
    x3i = wn4r * (x0i + x0r);
    x0r = y4r - y6i;
    x0i = y4i + y6r;
    x1r = y4r + y6i;
    x1i = y4i - y6r;
    a[8] = x0r + x2r;
    a[9] = x0i + x2i;
    a[10] = x0r - x2r;
    a[11] = x0i - x2i;
    a[12] = x1r - x3i;
    a[13] = x1i + x3r;
    a[14] = x1r + x3i;
    a[15] = x1i - x3r;
    x0r = y0r + y2r;
    x0i = y0i + y2i;
    x1r = y0r - y2r;
    x1i = y0i - y2i;
    x2r = y1r + y3r;
    x2i = y1i + y3i;
    x3r = y1r - y3r;
    x3i = y1i - y3i;
    a[0] = x0r + x2r;
    a[1] = x0i + x2i;
    a[2] = x0r - x2r;
    a[3] = x0i - x2i;
    a[4] = x1r - x3i;
    a[5] = x1i + x3r;
    a[6] = x1r + x3i;
    a[7] = x1i - x3r;
}


void cftf162(double *a, double *w) {
    double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, x0r, x0i, x1r, x1i, x2r,
        x2i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r,
        y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i,
        y13r, y13i, y14r, y14i, y15r, y15i;

    wn4r = w[1];
    wk1r = w[4];
    wk1i = w[5];
    wk3r = w[6];
    wk3i = -w[7];
    wk2r = w[8];
    wk2i = w[9];
    x1r = a[0] - a[17];
    x1i = a[1] + a[16];
    x0r = a[8] - a[25];
    x0i = a[9] + a[24];
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    y0r = x1r + x2r;
    y0i = x1i + x2i;
    y4r = x1r - x2r;
    y4i = x1i - x2i;
    x1r = a[0] + a[17];
    x1i = a[1] - a[16];
    x0r = a[8] + a[25];
    x0i = a[9] - a[24];
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    y8r = x1r - x2i;
    y8i = x1i + x2r;
    y12r = x1r + x2i;
    y12i = x1i - x2r;
    x0r = a[2] - a[19];
    x0i = a[3] + a[18];
    x1r = wk1r * x0r - wk1i * x0i;
    x1i = wk1r * x0i + wk1i * x0r;
    x0r = a[10] - a[27];
    x0i = a[11] + a[26];
    x2r = wk3i * x0r - wk3r * x0i;
    x2i = wk3i * x0i + wk3r * x0r;
    y1r = x1r + x2r;
    y1i = x1i + x2i;
    y5r = x1r - x2r;
    y5i = x1i - x2i;
    x0r = a[2] + a[19];
    x0i = a[3] - a[18];
    x1r = wk3r * x0r - wk3i * x0i;
    x1i = wk3r * x0i + wk3i * x0r;
    x0r = a[10] + a[27];
    x0i = a[11] - a[26];
    x2r = wk1r * x0r + wk1i * x0i;
    x2i = wk1r * x0i - wk1i * x0r;
    y9r = x1r - x2r;
    y9i = x1i - x2i;
    y13r = x1r + x2r;
    y13i = x1i + x2i;
    x0r = a[4] - a[21];
    x0i = a[5] + a[20];
    x1r = wk2r * x0r - wk2i * x0i;
    x1i = wk2r * x0i + wk2i * x0r;
    x0r = a[12] - a[29];
    x0i = a[13] + a[28];
    x2r = wk2i * x0r - wk2r * x0i;
    x2i = wk2i * x0i + wk2r * x0r;
    y2r = x1r + x2r;
    y2i = x1i + x2i;
    y6r = x1r - x2r;
    y6i = x1i - x2i;
    x0r = a[4] + a[21];
    x0i = a[5] - a[20];
    x1r = wk2i * x0r - wk2r * x0i;
    x1i = wk2i * x0i + wk2r * x0r;
    x0r = a[12] + a[29];
    x0i = a[13] - a[28];
    x2r = wk2r * x0r - wk2i * x0i;
    x2i = wk2r * x0i + wk2i * x0r;
    y10r = x1r - x2r;
    y10i = x1i - x2i;
    y14r = x1r + x2r;
    y14i = x1i + x2i;
    x0r = a[6] - a[23];
    x0i = a[7] + a[22];
    x1r = wk3r * x0r - wk3i * x0i;
    x1i = wk3r * x0i + wk3i * x0r;
    x0r = a[14] - a[31];
    x0i = a[15] + a[30];
    x2r = wk1i * x0r - wk1r * x0i;
    x2i = wk1i * x0i + wk1r * x0r;
    y3r = x1r + x2r;
    y3i = x1i + x2i;
    y7r = x1r - x2r;
    y7i = x1i - x2i;
    x0r = a[6] + a[23];
    x0i = a[7] - a[22];
    x1r = wk1i * x0r + wk1r * x0i;
    x1i = wk1i * x0i - wk1r * x0r;
    x0r = a[14] + a[31];
    x0i = a[15] - a[30];
    x2r = wk3i * x0r - wk3r * x0i;
    x2i = wk3i * x0i + wk3r * x0r;
    y11r = x1r + x2r;
    y11i = x1i + x2i;
    y15r = x1r - x2r;
    y15i = x1i - x2i;
    x1r = y0r + y2r;
    x1i = y0i + y2i;
    x2r = y1r + y3r;
    x2i = y1i + y3i;
    a[0] = x1r + x2r;
    a[1] = x1i + x2i;
    a[2] = x1r - x2r;
    a[3] = x1i - x2i;
    x1r = y0r - y2r;
    x1i = y0i - y2i;
    x2r = y1r - y3r;
    x2i = y1i - y3i;
    a[4] = x1r - x2i;
    a[5] = x1i + x2r;
    a[6] = x1r + x2i;
    a[7] = x1i - x2r;
    x1r = y4r - y6i;
    x1i = y4i + y6r;
    x0r = y5r - y7i;
    x0i = y5i + y7r;
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    a[8] = x1r + x2r;
    a[9] = x1i + x2i;
    a[10] = x1r - x2r;
    a[11] = x1i - x2i;
    x1r = y4r + y6i;
    x1i = y4i - y6r;
    x0r = y5r + y7i;
    x0i = y5i - y7r;
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    a[12] = x1r - x2i;
    a[13] = x1i + x2r;
    a[14] = x1r + x2i;
    a[15] = x1i - x2r;
    x1r = y8r + y10r;
    x1i = y8i + y10i;
    x2r = y9r - y11r;
    x2i = y9i - y11i;
    a[16] = x1r + x2r;
    a[17] = x1i + x2i;
    a[18] = x1r - x2r;
    a[19] = x1i - x2i;
    x1r = y8r - y10r;
    x1i = y8i - y10i;
    x2r = y9r + y11r;
    x2i = y9i + y11i;
    a[20] = x1r - x2i;
    a[21] = x1i + x2r;
    a[22] = x1r + x2i;
    a[23] = x1i - x2r;
    x1r = y12r - y14i;
    x1i = y12i + y14r;
    x0r = y13r + y15i;
    x0i = y13i - y15r;
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    a[24] = x1r + x2r;
    a[25] = x1i + x2i;
    a[26] = x1r - x2r;
    a[27] = x1i - x2i;
    x1r = y12r + y14i;
    x1i = y12i - y14r;
    x0r = y13r - y15i;
    x0i = y13i + y15r;
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    a[28] = x1r - x2i;
    a[29] = x1i + x2r;
    a[30] = x1r + x2i;
    a[31] = x1i - x2r;
}


void cftf081(double *a, double *w) {
    double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i,
        y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;

    wn4r = w[1];
    x0r = a[0] + a[8];
    x0i = a[1] + a[9];
    x1r = a[0] - a[8];
    x1i = a[1] - a[9];
    x2r = a[4] + a[12];
    x2i = a[5] + a[13];
    x3r = a[4] - a[12];
    x3i = a[5] - a[13];
    y0r = x0r + x2r;
    y0i = x0i + x2i;
    y2r = x0r - x2r;
    y2i = x0i - x2i;
    y1r = x1r - x3i;
    y1i = x1i + x3r;
    y3r = x1r + x3i;
    y3i = x1i - x3r;
    x0r = a[2] + a[10];
    x0i = a[3] + a[11];
    x1r = a[2] - a[10];
    x1i = a[3] - a[11];
    x2r = a[6] + a[14];
    x2i = a[7] + a[15];
    x3r = a[6] - a[14];
    x3i = a[7] - a[15];
    y4r = x0r + x2r;
    y4i = x0i + x2i;
    y6r = x0r - x2r;
    y6i = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    x2r = x1r + x3i;
    x2i = x1i - x3r;
    y5r = wn4r * (x0r - x0i);
    y5i = wn4r * (x0r + x0i);
    y7r = wn4r * (x2r - x2i);
    y7i = wn4r * (x2r + x2i);
    a[8] = y1r + y5r;
    a[9] = y1i + y5i;
    a[10] = y1r - y5r;
    a[11] = y1i - y5i;
    a[12] = y3r - y7i;
    a[13] = y3i + y7r;
    a[14] = y3r + y7i;
    a[15] = y3i - y7r;
    a[0] = y0r + y4r;
    a[1] = y0i + y4i;
    a[2] = y0r - y4r;
    a[3] = y0i - y4i;
    a[4] = y2r - y6i;
    a[5] = y2i + y6r;
    a[6] = y2r + y6i;
    a[7] = y2i - y6r;
}


void cftf082(double *a, double *w) {
    double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, y0r, y0i, y1r, y1i, y2r, y2i,
        y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;

    wn4r = w[1];
    wk1r = w[2];
    wk1i = w[3];
    y0r = a[0] - a[9];
    y0i = a[1] + a[8];
    y1r = a[0] + a[9];
    y1i = a[1] - a[8];
    x0r = a[4] - a[13];
    x0i = a[5] + a[12];
    y2r = wn4r * (x0r - x0i);
    y2i = wn4r * (x0i + x0r);
    x0r = a[4] + a[13];
    x0i = a[5] - a[12];
    y3r = wn4r * (x0r - x0i);
    y3i = wn4r * (x0i + x0r);
    x0r = a[2] - a[11];
    x0i = a[3] + a[10];
    y4r = wk1r * x0r - wk1i * x0i;
    y4i = wk1r * x0i + wk1i * x0r;
    x0r = a[2] + a[11];
    x0i = a[3] - a[10];
    y5r = wk1i * x0r - wk1r * x0i;
    y5i = wk1i * x0i + wk1r * x0r;
    x0r = a[6] - a[15];
    x0i = a[7] + a[14];
    y6r = wk1i * x0r - wk1r * x0i;
    y6i = wk1i * x0i + wk1r * x0r;
    x0r = a[6] + a[15];
    x0i = a[7] - a[14];
    y7r = wk1r * x0r - wk1i * x0i;
    y7i = wk1r * x0i + wk1i * x0r;
    x0r = y0r + y2r;
    x0i = y0i + y2i;
    x1r = y4r + y6r;
    x1i = y4i + y6i;
    a[0] = x0r + x1r;
    a[1] = x0i + x1i;
    a[2] = x0r - x1r;
    a[3] = x0i - x1i;
    x0r = y0r - y2r;
    x0i = y0i - y2i;
    x1r = y4r - y6r;
    x1i = y4i - y6i;
    a[4] = x0r - x1i;
    a[5] = x0i + x1r;
    a[6] = x0r + x1i;
    a[7] = x0i - x1r;
    x0r = y1r - y3i;
    x0i = y1i + y3r;
    x1r = y5r - y7r;
    x1i = y5i - y7i;
    a[8] = x0r + x1r;
    a[9] = x0i + x1i;
    a[10] = x0r - x1r;
    a[11] = x0i - x1i;
    x0r = y1r + y3i;
    x0i = y1i - y3r;
    x1r = y5r + y7r;
    x1i = y5i + y7i;
    a[12] = x0r - x1i;
    a[13] = x0i + x1r;
    a[14] = x0r + x1i;
    a[15] = x0i - x1r;
}


void cftf040(double *a) {
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;

    x0r = a[0] + a[4];
    x0i = a[1] + a[5];
    x1r = a[0] - a[4];
    x1i = a[1] - a[5];
    x2r = a[2] + a[6];
    x2i = a[3] + a[7];
    x3r = a[2] - a[6];
    x3i = a[3] - a[7];
    a[0] = x0r + x2r;
    a[1] = x0i + x2i;
    a[2] = x1r - x3i;
    a[3] = x1i + x3r;
    a[4] = x0r - x2r;
    a[5] = x0i - x2i;
    a[6] = x1r + x3i;
    a[7] = x1i - x3r;
}


void cftb040(double *a) {
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;

    x0r = a[0] + a[4];
    x0i = a[1] + a[5];
    x1r = a[0] - a[4];
    x1i = a[1] - a[5];
    x2r = a[2] + a[6];
    x2i = a[3] + a[7];
    x3r = a[2] - a[6];
    x3i = a[3] - a[7];
    a[0] = x0r + x2r;
    a[1] = x0i + x2i;
    a[2] = x1r + x3i;
    a[3] = x1i - x3r;
    a[4] = x0r - x2r;
    a[5] = x0i - x2i;
    a[6] = x1r - x3i;
    a[7] = x1i + x3r;
}


void cftx020(double *a) {
    double x0r, x0i;

    x0r = a[0] - a[2];
    x0i = a[1] - a[3];
    a[0] += a[2];
    a[1] += a[3];
    a[2] = x0r;
    a[3] = x0i;
}


void rftfsub(int n, double *a, int nc, double *c) {
    int j, k, kk, ks, m;
    double wkr, wki, xr, xi, yr, yi;

    m = n >> 1;
    ks = 2 * nc / m;
    kk = 0;
    for (j = 2; j < m; j += 2) {
        k = n - j;
        kk += ks;
        wkr = 0.5 - c[nc - kk];
        wki = c[kk];
        xr = a[j] - a[k];
        xi = a[j + 1] + a[k + 1];
        yr = wkr * xr - wki * xi;
        yi = wkr * xi + wki * xr;
        a[j] -= yr;
        a[j + 1] -= yi;
        a[k] += yr;
        a[k + 1] -= yi;
    }
}


void rftbsub(int n, double *a, int nc, double *c) {
    int j, k, kk, ks, m;
    double wkr, wki, xr, xi, yr, yi;

    m = n >> 1;
    ks = 2 * nc / m;
    kk = 0;
    for (j = 2; j < m; j += 2) {
        k = n - j;
        kk += ks;
        wkr = 0.5 - c[nc - kk];
        wki = c[kk];
        xr = a[j] - a[k];
        xi = a[j + 1] + a[k + 1];
        yr = wkr * xr + wki * xi;
        yi = wkr * xi - wki * xr;
        a[j] -= yr;
        a[j + 1] -= yi;
        a[k] += yr;
        a[k + 1] -= yi;
    }
}


void dctsub(int n, double *a, int nc, double *c) {
    int j, k, kk, ks, m;
    double wkr, wki, xr;

    m = n >> 1;
    ks = nc / n;
    kk = 0;
    for (j = 1; j < m; j++) {
        k = n - j;
        kk += ks;
        wkr = c[kk] - c[nc - kk];
        wki = c[kk] + c[nc - kk];
        xr = wki * a[j] - wkr * a[k];
        a[j] = wkr * a[j] + wki * a[k];
        a[k] = xr;
    }
    a[m] *= c[0];
}


void dstsub(int n, double *a, int nc, double *c) {
    int j, k, kk, ks, m;
    double wkr, wki, xr;

    m = n >> 1;
    ks = nc / n;
    kk = 0;
    for (j = 1; j < m; j++) {
        k = n - j;
        kk += ks;
        wkr = c[kk] - c[nc - kk];
        wki = c[kk] + c[nc - kk];
        xr = wki * a[k] - wkr * a[j];
        a[k] = wkr * a[k] + wki * a[j];
        a[j] = xr;
    }
    a[m] *= c[0];
}


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/log.cc
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * Stack trace related stuff is from kaldi.
 * Refer to
 * https://github.com/kaldi-asr/kaldi/blob/master/src/base/kaldi-error.cc
 */

#include "kaldi-native-fbank/csrc/log.h"

#ifdef KNF_HAVE_EXECINFO_H
#include <execinfo.h> // To get stack trace in error messages.
#ifdef KNF_HAVE_CXXABI_H
#include <cxxabi.h> // For name demangling.
// Useful to decode the stack trace, but only used if we have execinfo.h
#endif // KNF_HAVE_CXXABI_H
#endif // KNF_HAVE_EXECINFO_H

#include <stdlib.h>

#include <ctime>
#include <iomanip>
#include <string>

namespace knf {

std::string GetDateTimeStr() {
  std::ostringstream os;
  std::time_t t = std::time(nullptr);
  std::tm tm = *std::localtime(&t);
  os << std::put_time(&tm, "%F %T"); // yyyy-mm-dd hh:mm:ss
  return os.str();
}

static bool LocateSymbolRange(const std::string &trace_name, std::size_t *begin,
                              std::size_t *end) {
  // Find the first '_' with leading ' ' or '('.
  *begin = std::string::npos;
  for (std::size_t i = 1; i < trace_name.size(); ++i) {
    if (trace_name[i] != '_') {
      continue;
    }
    if (trace_name[i - 1] == ' ' || trace_name[i - 1] == '(') {
      *begin = i;
      break;
    }
  }
  if (*begin == std::string::npos) {
    return false;
  }
  *end = trace_name.find_first_of(" +", *begin);
  return *end != std::string::npos;
}

#ifdef KNF_HAVE_EXECINFO_H
static std::string Demangle(const std::string &trace_name) {
#ifndef KNF_HAVE_CXXABI_H
  return trace_name;
#else  // KNF_HAVE_CXXABI_H
  // Try demangle the symbol. We are trying to support the following formats
  // produced by different platforms:
  //
  // Linux:
  //   ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d]
  //
  // Mac:
  //   0 server 0x000000010f67614d _ZNK5kaldi13MessageLogger10LogMessageEv + 813
  //
  // We want to extract the name e.g., '_ZN5kaldi13UnitTestErrorEv' and
  // demangle it info a readable name like kaldi::UnitTextError.
  std::size_t begin, end;
  if (!LocateSymbolRange(trace_name, &begin, &end)) {
    return trace_name;
  }
  std::string symbol = trace_name.substr(begin, end - begin);
  int status;
  char *demangled_name = abi::__cxa_demangle(symbol.c_str(), 0, 0, &status);
  if (status == 0 && demangled_name != nullptr) {
    symbol = demangled_name;
    free(demangled_name);
  }
  return trace_name.substr(0, begin) + symbol +
         trace_name.substr(end, std::string::npos);
#endif // KNF_HAVE_CXXABI_H
}
#endif // KNF_HAVE_EXECINFO_H

std::string GetStackTrace() {
  std::string ans;
#ifdef KNF_HAVE_EXECINFO_H
  constexpr const std::size_t kMaxTraceSize = 50;
  constexpr const std::size_t kMaxTracePrint = 50; // Must be even.
                                                   // Buffer for the trace.
  void *trace[kMaxTraceSize];
  // Get the trace.
  std::size_t size = backtrace(trace, kMaxTraceSize);
  // Get the trace symbols.
  char **trace_symbol = backtrace_symbols(trace, size);
  if (trace_symbol == nullptr)
    return ans;

  // Compose a human-readable backtrace string.
  ans += "[ Stack-Trace: ]\n";
  if (size <= kMaxTracePrint) {
    for (std::size_t i = 0; i < size; ++i) {
      ans += Demangle(trace_symbol[i]) + "\n";
    }
  } else { // Print out first+last (e.g.) 5.
    for (std::size_t i = 0; i < kMaxTracePrint / 2; ++i) {
      ans += Demangle(trace_symbol[i]) + "\n";
    }
    ans += ".\n.\n.\n";
    for (std::size_t i = size - kMaxTracePrint / 2; i < size; ++i) {
      ans += Demangle(trace_symbol[i]) + "\n";
    }
    if (size == kMaxTraceSize)
      ans += ".\n.\n.\n"; // Stack was too long, probably a bug.
  }

  // We must free the array of pointers allocated by backtrace_symbols(),
  // but not the strings themselves.
  free(trace_symbol);
#endif // KNF_HAVE_EXECINFO_H
  return ans;
}

} // namespace knf


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/log.h
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// The content in this file is copied/modified from
// https://github.com/k2-fsa/k2/blob/master/k2/csrc/log.h
#ifndef KALDI_NATIVE_FBANK_CSRC_LOG_H_
#define KALDI_NATIVE_FBANK_CSRC_LOG_H_

#include <stdio.h>

#include <mutex>  // NOLINT
#include <sstream>
#include <string>

namespace knf {

#if defined(NDEBUG)
constexpr bool kDisableDebug = true;
#else
constexpr bool kDisableDebug = false;
#endif

enum class LogLevel {
  kTrace = 0,
  kDebug = 1,
  kInfo = 2,
  kWarning = 3,
  kError = 4,
  kFatal = 5,  // print message and abort the program
};

// They are used in KNF_LOG(xxx), so their names
// do not follow the google c++ code style
//
// You can use them in the following way:
//
//  KNF_LOG(TRACE) << "some message";
//  KNF_LOG(DEBUG) << "some message";
#ifndef _MSC_VER
constexpr LogLevel TRACE = LogLevel::kTrace;
constexpr LogLevel DEBUG = LogLevel::kDebug;
constexpr LogLevel INFO = LogLevel::kInfo;
constexpr LogLevel WARNING = LogLevel::kWarning;
constexpr LogLevel ERROR = LogLevel::kError;
constexpr LogLevel FATAL = LogLevel::kFatal;
#else
#define TRACE LogLevel::kTrace
#define DEBUG LogLevel::kDebug
#define INFO LogLevel::kInfo
#define WARNING LogLevel::kWarning
#define ERROR LogLevel::kError
#define FATAL LogLevel::kFatal
#endif

std::string GetStackTrace();

/* Return the current log level.


   If the current log level is TRACE, then all logged messages are printed out.

   If the current log level is DEBUG, log messages with "TRACE" level are not
   shown and all other levels are printed out.

   Similarly, if the current log level is INFO, log message with "TRACE" and
   "DEBUG" are not shown and all other levels are printed out.

   If it is FATAL, then only FATAL messages are shown.
 */
inline LogLevel GetCurrentLogLevel() {
  static LogLevel log_level = INFO;
  static std::once_flag init_flag;
  std::call_once(init_flag, []() {
    const char *env_log_level = std::getenv("KNF_LOG_LEVEL");
    if (env_log_level == nullptr) return;

    std::string s = env_log_level;
    if (s == "TRACE")
      log_level = TRACE;
    else if (s == "DEBUG")
      log_level = DEBUG;
    else if (s == "INFO")
      log_level = INFO;
    else if (s == "WARNING")
      log_level = WARNING;
    else if (s == "ERROR")
      log_level = ERROR;
    else if (s == "FATAL")
      log_level = FATAL;
    else
      fprintf(stderr,
              "Unknown KNF_LOG_LEVEL: %s"
              "\nSupported values are: "
              "TRACE, DEBUG, INFO, WARNING, ERROR, FATAL",
              s.c_str());
  });
  return log_level;
}

inline bool EnableAbort() {
  static std::once_flag init_flag;
  static bool enable_abort = false;
  std::call_once(init_flag, []() {
    enable_abort = (std::getenv("KNF_ABORT") != nullptr);
  });
  return enable_abort;
}

class Logger {
 public:
  Logger(const char *filename, const char *func_name, uint32_t line_num,
         LogLevel level)
      : filename_(filename),
        func_name_(func_name),
        line_num_(line_num),
        level_(level) {
    cur_level_ = GetCurrentLogLevel();
    fprintf(stderr, "here\n");
    switch (level) {
      case TRACE:
        if (cur_level_ <= TRACE) fprintf(stderr, "[T] ");
        break;
      case DEBUG:
        if (cur_level_ <= DEBUG) fprintf(stderr, "[D] ");
        break;
      case INFO:
        if (cur_level_ <= INFO) fprintf(stderr, "[I] ");
        break;
      case WARNING:
        if (cur_level_ <= WARNING) fprintf(stderr, "[W] ");
        break;
      case ERROR:
        if (cur_level_ <= ERROR) fprintf(stderr, "[E] ");
        break;
      case FATAL:
        if (cur_level_ <= FATAL) fprintf(stderr, "[F] ");
        break;
    }

    if (cur_level_ <= level_) {
      fprintf(stderr, "%s:%u:%s ", filename, line_num, func_name);
    }
  }

  ~Logger() noexcept(false) {
    static constexpr const char *kErrMsg = R"(
    Some bad things happened. Please read the above error messages and stack
    trace. If you are using Python, the following command may be helpful:

      gdb --args python /path/to/your/code.py

    (You can use `gdb` to debug the code. Please consider compiling
    a debug version of KNF.).

    If you are unable to fix it, please open an issue at:

      https://github.com/csukuangfj/kaldi-native-fbank/issues/new
    )";
    fprintf(stderr, "\n");
    if (level_ == FATAL) {
      std::string stack_trace = GetStackTrace();
      if (!stack_trace.empty()) {
        fprintf(stderr, "\n\n%s\n", stack_trace.c_str());
      }

      fflush(nullptr);

#ifndef __ANDROID_API__
      if (EnableAbort()) {
        // NOTE: abort() will terminate the program immediately without
        // printing the Python stack backtrace.
        abort();
      }

      throw std::runtime_error(kErrMsg);
#else
      abort();
#endif
    }
  }

  const Logger &operator<<(bool b) const {
    if (cur_level_ <= level_) {
      fprintf(stderr, b ? "true" : "false");
    }
    return *this;
  }

  const Logger &operator<<(int8_t i) const {
    if (cur_level_ <= level_) fprintf(stderr, "%d", i);
    return *this;
  }

  const Logger &operator<<(const char *s) const {
    if (cur_level_ <= level_) fprintf(stderr, "%s", s);
    return *this;
  }

  const Logger &operator<<(int32_t i) const {
    if (cur_level_ <= level_) fprintf(stderr, "%d", i);
    return *this;
  }

  const Logger &operator<<(uint32_t i) const {
    if (cur_level_ <= level_) fprintf(stderr, "%u", i);
    return *this;
  }

  const Logger &operator<<(uint64_t i) const {
    if (cur_level_ <= level_)
      fprintf(stderr, "%llu", (long long unsigned int)i);  // NOLINT
    return *this;
  }

  const Logger &operator<<(int64_t i) const {
    if (cur_level_ <= level_)
      fprintf(stderr, "%lli", (long long int)i);  // NOLINT
    return *this;
  }

  const Logger &operator<<(float f) const {
    if (cur_level_ <= level_) fprintf(stderr, "%f", f);
    return *this;
  }

  const Logger &operator<<(double d) const {
    if (cur_level_ <= level_) fprintf(stderr, "%f", d);
    return *this;
  }

  template <typename T>
  const Logger &operator<<(const T &t) const {
    // require T overloads operator<<
    std::ostringstream os;
    os << t;
    return *this << os.str().c_str();
  }

  // specialization to fix compile error: `stringstream << nullptr` is ambiguous
  const Logger &operator<<(const std::nullptr_t &null) const {
    if (cur_level_ <= level_) *this << "(null)";
    return *this;
  }

 private:
  const char *filename_;
  const char *func_name_;
  uint32_t line_num_;
  LogLevel level_;
  LogLevel cur_level_;
};

class Voidifier {
 public:
  void operator&(const Logger &)const {}
};

}  // namespace knf

#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || \
    defined(__PRETTY_FUNCTION__)
// for clang and GCC
#define KNF_FUNC __PRETTY_FUNCTION__
#else
// for other compilers
#define KNF_FUNC __func__
#endif

#define KNF_STATIC_ASSERT(x) static_assert(x, "")

#define KNF_CHECK(x)                                                  \
  (x) ? (void)0                                                       \
      : ::knf::Voidifier() &                                          \
            ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::FATAL) \
                << "Check failed: " << #x << " "

// WARNING: x and y may be evaluated multiple times, but this happens only
// when the check fails. Since the program aborts if it fails, we don't think
// the extra evaluation of x and y matters.
//
// CAUTION: we recommend the following use case:
//
//      auto x = Foo();
//      auto y = Bar();
//      KNF_CHECK_EQ(x, y) << "Some message";
//
//  And please avoid
//
//      KNF_CHECK_EQ(Foo(), Bar());
//
//  if `Foo()` or `Bar()` causes some side effects, e.g., changing some
//  local static variables or global variables.
#define _KNF_CHECK_OP(x, y, op)                                              \
  ((x)op(y)) ? (void)0                                                       \
             : ::knf::Voidifier() &                                          \
                   ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::FATAL) \
                       << "Check failed: " << #x << " " << #op << " " << #y  \
                       << " (" << (x) << " vs. " << (y) << ") "

#define KNF_CHECK_EQ(x, y) _KNF_CHECK_OP(x, y, ==)
#define KNF_CHECK_NE(x, y) _KNF_CHECK_OP(x, y, !=)
#define KNF_CHECK_LT(x, y) _KNF_CHECK_OP(x, y, <)
#define KNF_CHECK_LE(x, y) _KNF_CHECK_OP(x, y, <=)
#define KNF_CHECK_GT(x, y) _KNF_CHECK_OP(x, y, >)
#define KNF_CHECK_GE(x, y) _KNF_CHECK_OP(x, y, >=)

#define KNF_LOG(x) ::knf::Logger(__FILE__, KNF_FUNC, __LINE__, ::knf::x)

// ------------------------------------------------------------
//       For debug check
// ------------------------------------------------------------
// If you define the macro "-D NDEBUG" while compiling kaldi-native-fbank,
// the following macros are in fact empty and does nothing.

#define KNF_DCHECK(x) ::knf::kDisableDebug ? (void)0 : KNF_CHECK(x)

#define KNF_DCHECK_EQ(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_EQ(x, y)

#define KNF_DCHECK_NE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_NE(x, y)

#define KNF_DCHECK_LT(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_LT(x, y)

#define KNF_DCHECK_LE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_LE(x, y)

#define KNF_DCHECK_GT(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_GT(x, y)

#define KNF_DCHECK_GE(x, y) ::knf::kDisableDebug ? (void)0 : KNF_CHECK_GE(x, y)

#define KNF_DLOG(x) \
  ::knf::kDisableDebug ? (void)0 : ::knf::Voidifier() & KNF_LOG(x)

#endif  // KALDI_NATIVE_FBANK_CSRC_LOG_H_


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/mel-computations.cc
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file is copied/modified from kaldi/src/feat/mel-computations.cc

#include "kaldi-native-fbank/csrc/mel-computations.h"

#include <algorithm>
#include <sstream>

#include "kaldi-native-fbank/csrc/feature-window.h"

namespace knf {

std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts) {
  os << opts.ToString();
  return os;
}

float MelBanks::VtlnWarpFreq(
    float vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
    float vtln_high_cutoff,
    float low_freq,  // upper+lower frequency cutoffs in mel computation
    float high_freq, float vtln_warp_factor, float freq) {
  /// This computes a VTLN warping function that is not the same as HTK's one,
  /// but has similar inputs (this function has the advantage of never producing
  /// empty bins).

  /// This function computes a warp function F(freq), defined between low_freq
  /// and high_freq inclusive, with the following properties:
  ///  F(low_freq) == low_freq
  ///  F(high_freq) == high_freq
  /// The function is continuous and piecewise linear with two inflection
  ///   points.
  /// The lower inflection point (measured in terms of the unwarped
  ///  frequency) is at frequency l, determined as described below.
  /// The higher inflection point is at a frequency h, determined as
  ///   described below.
  /// If l <= f <= h, then F(f) = f/vtln_warp_factor.
  /// If the higher inflection point (measured in terms of the unwarped
  ///   frequency) is at h, then max(h, F(h)) == vtln_high_cutoff.
  ///   Since (by the last point) F(h) == h/vtln_warp_factor, then
  ///   max(h, h/vtln_warp_factor) == vtln_high_cutoff, so
  ///   h = vtln_high_cutoff / max(1, 1/vtln_warp_factor).
  ///     = vtln_high_cutoff * min(1, vtln_warp_factor).
  /// If the lower inflection point (measured in terms of the unwarped
  ///   frequency) is at l, then min(l, F(l)) == vtln_low_cutoff
  ///   This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor)
  ///                       = vtln_low_cutoff * max(1, vtln_warp_factor)

  if (freq < low_freq || freq > high_freq)
    return freq;  // in case this gets called
  // for out-of-range frequencies, just return the freq.

  KNF_CHECK_GT(vtln_low_cutoff, low_freq);
  KNF_CHECK_LT(vtln_high_cutoff, high_freq);

  float one = 1.0f;
  float l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
  float h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
  float scale = 1.0f / vtln_warp_factor;
  float Fl = scale * l;  // F(l);
  float Fh = scale * h;  // F(h);
  KNF_CHECK(l > low_freq && h < high_freq);
  // slope of left part of the 3-piece linear function
  float scale_left = (Fl - low_freq) / (l - low_freq);
  // [slope of center part is just "scale"]

  // slope of right part of the 3-piece linear function
  float scale_right = (high_freq - Fh) / (high_freq - h);

  if (freq < l) {
    return low_freq + scale_left * (freq - low_freq);
  } else if (freq < h) {
    return scale * freq;
  } else {  // freq >= h
    return high_freq + scale_right * (freq - high_freq);
  }
}

float MelBanks::VtlnWarpMelFreq(
    float vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
    float vtln_high_cutoff,
    float low_freq,  // upper+lower frequency cutoffs in mel computation
    float high_freq, float vtln_warp_factor, float mel_freq) {
  return MelScale(VtlnWarpFreq(vtln_low_cutoff, vtln_high_cutoff, low_freq,
                               high_freq, vtln_warp_factor,
                               InverseMelScale(mel_freq)));
}

MelBanks::MelBanks(const MelBanksOptions &opts,
                   const FrameExtractionOptions &frame_opts,
                   float vtln_warp_factor)
    : htk_mode_(opts.htk_mode) {
  int32_t num_bins = opts.num_bins;
  if (num_bins < 3) KNF_LOG(FATAL) << "Must have at least 3 mel bins";

  float sample_freq = frame_opts.samp_freq;
  int32_t window_length_padded = frame_opts.PaddedWindowSize();
  KNF_CHECK_EQ(window_length_padded % 2, 0);

  int32_t num_fft_bins = window_length_padded / 2;
  float nyquist = 0.5f * sample_freq;

  float low_freq = opts.low_freq, high_freq;
  if (opts.high_freq > 0.0f)
    high_freq = opts.high_freq;
  else
    high_freq = nyquist + opts.high_freq;

  if (low_freq < 0.0f || low_freq >= nyquist || high_freq <= 0.0f ||
      high_freq > nyquist || high_freq <= low_freq) {
    KNF_LOG(FATAL) << "Bad values in options: low-freq " << low_freq
                   << " and high-freq " << high_freq << " vs. nyquist "
                   << nyquist;
  }

  float fft_bin_width = sample_freq / window_length_padded;
  // fft-bin width [think of it as Nyquist-freq / half-window-length]

  float mel_low_freq = MelScale(low_freq);
  float mel_high_freq = MelScale(high_freq);

  debug_ = opts.debug_mel;

  // divide by num_bins+1 in next line because of end-effects where the bins
  // spread out to the sides.
  float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);

  float vtln_low = opts.vtln_low, vtln_high = opts.vtln_high;
  if (vtln_high < 0.0f) {
    vtln_high += nyquist;
  }

  if (vtln_warp_factor != 1.0f &&
      (vtln_low < 0.0f || vtln_low <= low_freq || vtln_low >= high_freq ||
       vtln_high <= 0.0f || vtln_high >= high_freq || vtln_high <= vtln_low)) {
    KNF_LOG(FATAL) << "Bad values in options: vtln-low " << vtln_low
                   << " and vtln-high " << vtln_high << ", versus "
                   << "low-freq " << low_freq << " and high-freq " << high_freq;
  }

  bins_.resize(num_bins);
  center_freqs_.resize(num_bins);

  for (int32_t bin = 0; bin < num_bins; ++bin) {
    float left_mel = mel_low_freq + bin * mel_freq_delta,
          center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
          right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;

    if (vtln_warp_factor != 1.0f) {
      left_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
                                 vtln_warp_factor, left_mel);
      center_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
                                   vtln_warp_factor, center_mel);
      right_mel = VtlnWarpMelFreq(vtln_low, vtln_high, low_freq, high_freq,
                                  vtln_warp_factor, right_mel);
    }
    center_freqs_[bin] = InverseMelScale(center_mel);

    // this_bin will be a vector of coefficients that is only
    // nonzero where this mel bin is active.
    std::vector<float> this_bin(num_fft_bins);

    int32_t first_index = -1, last_index = -1;
    for (int32_t i = 0; i < num_fft_bins; ++i) {
      float freq = (fft_bin_width * i);  // Center frequency of this fft
                                         // bin.
      float mel = MelScale(freq);
      if (mel > left_mel && mel < right_mel) {
        float weight;
        if (mel <= center_mel)
          weight = (mel - left_mel) / (center_mel - left_mel);
        else
          weight = (right_mel - mel) / (right_mel - center_mel);
        this_bin[i] = weight;
        if (first_index == -1) first_index = i;
        last_index = i;
      }
    }
    KNF_CHECK(first_index != -1 && last_index >= first_index &&
              "You may have set num_mel_bins too large.");

    bins_[bin].first = first_index;
    int32_t size = last_index + 1 - first_index;
    bins_[bin].second.insert(bins_[bin].second.end(),
                             this_bin.begin() + first_index,
                             this_bin.begin() + first_index + size);

    // Replicate a bug in HTK, for testing purposes.
    if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f) {
      bins_[bin].second[0] = 0.0;
    }
  }  // for (int32_t bin = 0; bin < num_bins; ++bin) {

  if (debug_) {
    std::ostringstream os;
    for (size_t i = 0; i < bins_.size(); i++) {
      os << "bin " << i << ", offset = " << bins_[i].first << ", vec = ";
      for (auto k : bins_[i].second) os << k << ", ";
      os << "\n";
    }
    KNF_LOG(INFO) << os.str();
  }
}

// "power_spectrum" contains fft energies.
void MelBanks::Compute(const float *power_spectrum,
                       float *mel_energies_out) const {
  int32_t num_bins = bins_.size();

  for (int32_t i = 0; i < num_bins; i++) {
    int32_t offset = bins_[i].first;
    const auto &v = bins_[i].second;
    float energy = 0;
    for (int32_t k = 0; k != v.size(); ++k) {
      energy += v[k] * power_spectrum[k + offset];
    }

    // HTK-like flooring- for testing purposes (we prefer dither)
    if (htk_mode_ && energy < 1.0) {
      energy = 1.0;
    }

    mel_energies_out[i] = energy;

    // The following assert was added due to a problem with OpenBlas that
    // we had at one point (it was a bug in that library).  Just to detect
    // it early.
    KNF_CHECK_EQ(energy, energy);  // check that energy is not nan
  }

  if (debug_) {
    fprintf(stderr, "MEL BANKS:\n");
    for (int32_t i = 0; i < num_bins; i++)
      fprintf(stderr, " %f", mel_energies_out[i]);
    fprintf(stderr, "\n");
  }
}

}  // namespace knf


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/mel-computations.h
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
// This file is copied/modified from kaldi/src/feat/mel-computations.h
#ifndef KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
#define KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_

#include <cmath>
#include <string>

#include "kaldi-native-fbank/csrc/feature-window.h"

namespace knf {

struct MelBanksOptions {
  int32_t num_bins = 25;  // e.g. 25; number of triangular bins
  float low_freq = 20;    // e.g. 20; lower frequency cutoff

  // an upper frequency cutoff; 0 -> no cutoff, negative
  // ->added to the Nyquist frequency to get the cutoff.
  float high_freq = 0;

  float vtln_low = 100;  // vtln lower cutoff of warping function.

  // vtln upper cutoff of warping function: if negative, added
  // to the Nyquist frequency to get the cutoff.
  float vtln_high = -500;

  bool debug_mel = false;
  // htk_mode is a "hidden" config, it does not show up on command line.
  // Enables more exact compatibility with HTK, for testing purposes.  Affects
  // mel-energy flooring and reproduces a bug in HTK.
  bool htk_mode = false;

  std::string ToString() const {
    std::ostringstream os;
    os << "num_bins: " << num_bins << "\n";
    os << "low_freq: " << low_freq << "\n";
    os << "high_freq: " << high_freq << "\n";
    os << "vtln_low: " << vtln_low << "\n";
    os << "vtln_high: " << vtln_high << "\n";
    os << "debug_mel: " << debug_mel << "\n";
    os << "htk_mode: " << htk_mode << "\n";
    return os.str();
  }
};

std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts);

class MelBanks {
 public:
  static inline float InverseMelScale(float mel_freq) {
    return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
  }

  static inline float MelScale(float freq) {
    return 1127.0f * logf(1.0f + freq / 700.0f);
  }

  static float VtlnWarpFreq(
      float vtln_low_cutoff,
      float vtln_high_cutoff,  // discontinuities in warp func
      float low_freq,
      float high_freq,  // upper+lower frequency cutoffs in
      // the mel computation
      float vtln_warp_factor, float freq);

  static float VtlnWarpMelFreq(float vtln_low_cutoff, float vtln_high_cutoff,
                               float low_freq, float high_freq,
                               float vtln_warp_factor, float mel_freq);

  // TODO(fangjun): Remove vtln_warp_factor
  MelBanks(const MelBanksOptions &opts,
           const FrameExtractionOptions &frame_opts, float vtln_warp_factor);

  /// Compute Mel energies (note: not log energies).
  /// At input, "fft_energies" contains the FFT energies (not log).
  ///
  /// @param fft_energies 1-D array of size num_fft_bins/2+1
  /// @param mel_energies_out  1-D array of size num_mel_bins
  void Compute(const float *fft_energies, float *mel_energies_out) const;

  int32_t NumBins() const { return bins_.size(); }

 private:
  // center frequencies of bins, numbered from 0 ... num_bins-1.
  // Needed by GetCenterFreqs().
  std::vector<float> center_freqs_;

  // the "bins_" vector is a vector, one for each bin, of a pair:
  // (the first nonzero fft-bin), (the vector of weights).
  std::vector<std::pair<int32_t, std::vector<float>>> bins_;

  // TODO(fangjun): Remove debug_ and htk_mode_
  bool debug_;
  bool htk_mode_;
};

}  // namespace knf

#endif  // KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/rfft.cc
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "kaldi-native-fbank/csrc/rfft.h"

#include <cmath>
#include <vector>

#include "kaldi-native-fbank/csrc/log.h"

// see fftsg.c
#ifdef __cplusplus
extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);
#else
void rdft(int n, int isgn, double *a, int *ip, double *w);
#endif

namespace knf {
class Rfft::RfftImpl {
 public:
  explicit RfftImpl(int32_t n) : n_(n), ip_(2 + std::sqrt(n / 2)), w_(n / 2) {
    KNF_CHECK_EQ(n & (n - 1), 0);
  }

  void Compute(float *in_out) {
    std::vector<double> d(in_out, in_out + n_);

    Compute(d.data());

    std::copy(d.begin(), d.end(), in_out);
  }

  void Compute(double *in_out) {
    // 1 means forward fft
    rdft(n_, 1, in_out, ip_.data(), w_.data());
  }

 private:
  int32_t n_;
  std::vector<int32_t> ip_;
  std::vector<double> w_;
};

Rfft::Rfft(int32_t n) : impl_(std::make_unique<RfftImpl>(n)) {}

Rfft::~Rfft() = default;

void Rfft::Compute(float *in_out) { impl_->Compute(in_out); }
void Rfft::Compute(double *in_out) { impl_->Compute(in_out); }

}  // namespace knf


================================================
FILE: audio/paddleaudio/third_party/kaldi-native-fbank/csrc/rfft.h
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef KALDI_NATIVE_FBANK_CSRC_RFFT_H_
#define KALDI_NATIVE_FBANK_CSRC_RFFT_H_

#include <memory>

namespace knf {

// n-point Real discrete Fourier transform
// where n is a power of 2. n >= 2
//
//  R[k] = sum_j=0^n-1 in[j]*cos(2*pi*j*k/n), 0<=k<=n/2
//  I[k] = sum_j=0^n-1 in[j]*sin(2*pi*j*k/n), 0<k<n/2
class Rfft {
 public:
  // @param n Number of fft bins. it should be a power of 2.
  explicit Rfft(int32_t n);
  ~Rfft();

  /** @param in_out A 1-D array of size n.
   *             On return:
   *               in_out[0] = R[0]
   *               in_out[1] = R[n/2]
   *               for 1 < k < n/2,
   *                 in_out[2*k] = R[k]
   *                 in_out[2*k+1] = I[k]
   *
   */
  void Compute(float *in_out);
  void Compute(double *in_out);

 private:
  class RfftImpl;
  std::unique_ptr<RfftImpl> impl_;
};

}  // namespace knf

#endif  // KALDI_NATIVE_FBANK_CSRC_RFFT_H_


================================================
FILE: audio/paddleaudio/third_party/patches/config.guess
================================================
#! /bin/sh
# Attempt to guess a canonical system name.
#   Copyright 1992-2022 Free Software Foundation, Inc.

# shellcheck disable=SC2006,SC2268 # see below for rationale

timestamp='2022-01-09'

# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <https://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that
# program.  This Exception is an additional permission under section 7
# of the GNU General Public License, version 3 ("GPLv3").
#
# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
#
# You can get the latest version of this script from:
# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
#
# Please send patches to <config-patches@gnu.org>.


# The "shellcheck disable" line above the timestamp inhibits complaints
# about features and limitations of the classic Bourne shell that were
# superseded or lifted in POSIX.  However, this script identifies a wide
# variety of pre-POSIX systems that do not have POSIX shells at all, and
# even some reasonably current systems (Solaris 10 as case-in-point) still
# have a pre-POSIX /bin/sh.


me=`echo "$0" | sed -e 's,.*/,,'`

usage="\
Usage: $0 [OPTION]

Output the configuration name of the system \`$me' is run on.

Options:
  -h, --help         print this help, then exit
  -t, --time-stamp   print date of last modification, then exit
  -v, --version      print version number, then exit

Report bugs and patches to <config-patches@gnu.org>."

version="\
GNU config.guess ($timestamp)

Originally written by Per Bothner.
Copyright 1992-2022 Free Software Foundation, Inc.

This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."

help="
Try \`$me --help' for more information."

# Parse command line
while test $# -gt 0 ; do
  case $1 in
    --time-stamp | --time* | -t )
       echo "$timestamp" ; exit ;;
    --version | -v )
       echo "$version" ; exit ;;
    --help | --h* | -h )
       echo "$usage"; exit ;;
    -- )     # Stop option processing
       shift; break ;;
    - )	# Use stdin as input.
       break ;;
    -* )
       echo "$me: invalid option $1$help" >&2
       exit 1 ;;
    * )
       break ;;
  esac
done

if test $# != 0; then
  echo "$me: too many arguments$help" >&2
  exit 1
fi

# Just in case it came from the environment.
GUESS=

# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
# compiler to aid in system detection is discouraged as it requires
# temporary files to be created and, as you can see below, it is a
# headache to deal with in a portable fashion.

# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
# use `HOST_CC' if defined, but it is deprecated.

# Portable tmp directory creation inspired by the Autoconf team.

tmp=
# shellcheck disable=SC2172
trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15

set_cc_for_build() {
    # prevent multiple calls if $tmp is already set
    test "$tmp" && return 0
    : "${TMPDIR=/tmp}"
    # shellcheck disable=SC2039,SC3028
    { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
	{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
	{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
	{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
    dummy=$tmp/dummy
    case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
	,,)    echo "int x;" > "$dummy.c"
	       for driver in cc gcc c89 c99 ; do
		   if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
		       CC_FOR_BUILD=$driver
		       break
		   fi
	       done
	       if test x"$CC_FOR_BUILD" = x ; then
		   CC_FOR_BUILD=no_compiler_found
	       fi
	       ;;
	,,*)   CC_FOR_BUILD=$CC ;;
	,*,*)  CC_FOR_BUILD=$HOST_CC ;;
    esac
}

# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
# (ghazi@noc.rutgers.edu 1994-08-24)
if test -f /.attbin/uname ; then
	PATH=$PATH:/.attbin ; export PATH
fi

UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown

case $UNAME_SYSTEM in
Linux|GNU|GNU/*)
	LIBC=unknown

	set_cc_for_build
	cat <<-EOF > "$dummy.c"
	#include <features.h>
	#if defined(__UCLIBC__)
	LIBC=uclibc
	#elif defined(__dietlibc__)
	LIBC=dietlibc
	#elif defined(__GLIBC__)
	LIBC=gnu
	#else
	#include <stdarg.h>
	/* First heuristic to detect musl libc.  */
	#ifdef __DEFINED_va_list
	LIBC=musl
	#endif
	#endif
	EOF
	cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
	eval "$cc_set_libc"

	# Second heuristic to detect musl libc.
	if [ "$LIBC" = unknown ] &&
	   command -v ldd >/dev/null &&
	   ldd --version 2>&1 | grep -q ^musl; then
		LIBC=musl
	fi

	# If the system lacks a compiler, then just pick glibc.
	# We could probably try harder.
	if [ "$LIBC" = unknown ]; then
		LIBC=gnu
	fi
	;;
esac

# Note: order is significant - the case branches are not exclusive.

case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in
    *:NetBSD:*:*)
	# NetBSD (nbsd) targets should (where applicable) match one or
	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
	# switched to ELF, *-*-netbsd* would select the old
	# object file format.  This provides both forward
	# compatibility and a consistent mechanism for selecting the
	# object file format.
	#
	# Note: NetBSD doesn't particularly care about the vendor
	# portion of the name.  We always set it to "unknown".
	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
	    /sbin/sysctl -n hw.machine_arch 2>/dev/null || \
	    /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \
	    echo unknown)`
	case $UNAME_MACHINE_ARCH in
	    aarch64eb) machine=aarch64_be-unknown ;;
	    armeb) machine=armeb-unknown ;;
	    arm*) machine=arm-unknown ;;
	    sh3el) machine=shl-unknown ;;
	    sh3eb) machine=sh-unknown ;;
	    sh5el) machine=sh5le-unknown ;;
	    earmv*)
		arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
		endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'`
		machine=${arch}${endian}-unknown
		;;
	    *) machine=$UNAME_MACHINE_ARCH-unknown ;;
	esac
	# The Operating System including object format, if it has switched
	# to ELF recently (or will in the future) and ABI.
	case $UNAME_MACHINE_ARCH in
	    earm*)
		os=netbsdelf
		;;
	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
		set_cc_for_build
		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
			| grep -q __ELF__
		then
		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
		    # Return netbsd for either.  FIX?
		    os=netbsd
		else
		    os=netbsdelf
		fi
		;;
	    *)
		os=netbsd
		;;
	esac
	# Determine ABI tags.
	case $UNAME_MACHINE_ARCH in
	    earm*)
		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
		abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"`
		;;
	esac
	# The OS release
	# Debian GNU/NetBSD machines have a different userland, and
	# thus, need a distinct triplet. However, they do not need
	# kernel version information, so it can be replaced with a
	# suitable tag, in the style of linux-gnu.
	case $UNAME_VERSION in
	    Debian*)
		release='-gnu'
		;;
	    *)
		release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2`
		;;
	esac
	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
	# contains redundant information, the shorter form:
	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
	GUESS=$machine-${os}${release}${abi-}
	;;
    *:Bitrig:*:*)
	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
	GUESS=$UNAME_MACHINE_ARCH-unknown-bitrig$UNAME_RELEASE
	;;
    *:OpenBSD:*:*)
	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
	GUESS=$UNAME_MACHINE_ARCH-unknown-openbsd$UNAME_RELEASE
	;;
    *:SecBSD:*:*)
	UNAME_MACHINE_ARCH=`arch | sed 's/SecBSD.//'`
	GUESS=$UNAME_MACHINE_ARCH-unknown-secbsd$UNAME_RELEASE
	;;
    *:LibertyBSD:*:*)
	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
	GUESS=$UNAME_MACHINE_ARCH-unknown-libertybsd$UNAME_RELEASE
	;;
    *:MidnightBSD:*:*)
	GUESS=$UNAME_MACHINE-unknown-midnightbsd$UNAME_RELEASE
	;;
    *:ekkoBSD:*:*)
	GUESS=$UNAME_MACHINE-unknown-ekkobsd$UNAME_RELEASE
	;;
    *:SolidBSD:*:*)
	GUESS=$UNAME_MACHINE-unknown-solidbsd$UNAME_RELEASE
	;;
    *:OS108:*:*)
	GUESS=$UNAME_MACHINE-unknown-os108_$UNAME_RELEASE
	;;
    macppc:MirBSD:*:*)
	GUESS=powerpc-unknown-mirbsd$UNAME_RELEASE
	;;
    *:MirBSD:*:*)
	GUESS=$UNAME_MACHINE-unknown-mirbsd$UNAME_RELEASE
	;;
    *:Sortix:*:*)
	GUESS=$UNAME_MACHINE-unknown-sortix
	;;
    *:Twizzler:*:*)
	GUESS=$UNAME_MACHINE-unknown-twizzler
	;;
    *:Redox:*:*)
	GUESS=$UNAME_MACHINE-unknown-redox
	;;
    mips:OSF1:*.*)
	GUESS=mips-dec-osf1
	;;
    alpha:OSF1:*:*)
	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
	trap '' 0
	case $UNAME_RELEASE in
	*4.0)
		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
		;;
	*5.*)
		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
		;;
	esac
	# According to Compaq, /usr/sbin/psrinfo has been available on
	# OSF/1 and Tru64 systems produced since 1995.  I hope that
	# covers most systems running today.  This code pipes the CPU
	# types through head -n 1, so we only detect the type of CPU 0.
	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
	case $ALPHA_CPU_TYPE in
	    "EV4 (21064)")
		UNAME_MACHINE=alpha ;;
	    "EV4.5 (21064)")
		UNAME_MACHINE=alpha ;;
	    "LCA4 (21066/21068)")
		UNAME_MACHINE=alpha ;;
	    "EV5 (21164)")
		UNAME_MACHINE=alphaev5 ;;
	    "EV5.6 (21164A)")
		UNAME_MACHINE=alphaev56 ;;
	    "EV5.6 (21164PC)")
		UNAME_MACHINE=alphapca56 ;;
	    "EV5.7 (21164PC)")
		UNAME_MACHINE=alphapca57 ;;
	    "EV6 (21264)")
		UNAME_MACHINE=alphaev6 ;;
	    "EV6.7 (21264A)")
		UNAME_MACHINE=alphaev67 ;;
	    "EV6.8CB (21264C)")
		UNAME_MACHINE=alphaev68 ;;
	    "EV6.8AL (21264B)")
		UNAME_MACHINE=alphaev68 ;;
	    "EV6.8CX (21264D)")
		UNAME_MACHINE=alphaev68 ;;
	    "EV6.9A (21264/EV69A)")
		UNAME_MACHINE=alphaev69 ;;
	    "EV7 (21364)")
		UNAME_MACHINE=alphaev7 ;;
	    "EV7.9 (21364A)")
		UNAME_MACHINE=alphaev79 ;;
	esac
	# A Pn.n version is a patched version.
	# A Vn.n version is a released version.
	# A Tn.n version is a released field test version.
	# A Xn.n version is an unreleased experimental baselevel.
	# 1.2 uses "1.2" for uname -r.
	OSF_REL=`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
	GUESS=$UNAME_MACHINE-dec-osf$OSF_REL
	;;
    Amiga*:UNIX_System_V:4.0:*)
	GUESS=m68k-unknown-sysv4
	;;
    *:[Aa]miga[Oo][Ss]:*:*)
	GUESS=$UNAME_MACHINE-unknown-amigaos
	;;
    *:[Mm]orph[Oo][Ss]:*:*)
	GUESS=$UNAME_MACHINE-unknown-morphos
	;;
    *:OS/390:*:*)
	GUESS=i370-ibm-openedition
	;;
    *:z/VM:*:*)
	GUESS=s390-ibm-zvmoe
	;;
    *:OS400:*:*)
	GUESS=powerpc-ibm-os400
	;;
    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
	GUESS=arm-acorn-riscix$UNAME_RELEASE
	;;
    arm*:riscos:*:*|arm*:RISCOS:*:*)
	GUESS=arm-unknown-riscos
	;;
    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
	GUESS=hppa1.1-hitachi-hiuxmpp
	;;
    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
	case `(/bin/universe) 2>/dev/null` in
	    att) GUESS=pyramid-pyramid-sysv3 ;;
	    *)   GUESS=pyramid-pyramid-bsd   ;;
	esac
	;;
    NILE*:*:*:dcosx)
	GUESS=pyramid-pyramid-svr4
	;;
    DRS?6000:unix:4.0:6*)
	GUESS=sparc-icl-nx6
	;;
    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
	case `/usr/bin/uname -p` in
	    sparc) GUESS=sparc-icl-nx7 ;;
	esac
	;;
    s390x:SunOS:*:*)
	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
	GUESS=$UNAME_MACHINE-ibm-solaris2$SUN_REL
	;;
    sun4H:SunOS:5.*:*)
	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
	GUESS=sparc-hal-solaris2$SUN_REL
	;;
    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
	GUESS=sparc-sun-solaris2$SUN_REL
	;;
    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
	GUESS=i386-pc-auroraux$UNAME_RELEASE
	;;
    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
	set_cc_for_build
	SUN_ARCH=i386
	# If there is a compiler, see if it is configured for 64-bit objects.
	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
	# This test works for both compilers.
	if test "$CC_FOR_BUILD" != no_compiler_found; then
	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
		(CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \
		grep IS_64BIT_ARCH >/dev/null
	    then
		SUN_ARCH=x86_64
	    fi
	fi
	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
	GUESS=$SUN_ARCH-pc-solaris2$SUN_REL
	;;
    sun4*:SunOS:6*:*)
	# According to config.sub, this is the proper way to canonicalize
	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
	# it's likely to be more like Solaris than SunOS4.
	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
	GUESS=sparc-sun-solaris3$SUN_REL
	;;
    sun4*:SunOS:*:*)
	case `/usr/bin/arch -k` in
	    Series*|S4*)
		UNAME_RELEASE=`uname -v`
		;;
	esac
	# Japanese Language versions have a version number like `4.1.3-JL'.
	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'`
	GUESS=sparc-sun-sunos$SUN_REL
	;;
    sun3*:SunOS:*:*)
	GUESS=m68k-sun-sunos$UNAME_RELEASE
	;;
    sun*:*:4.2BSD:*)
	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
	case `/bin/arch` in
	    sun3)
		GUESS=m68k-sun-sunos$UNAME_RELEASE
		;;
	    sun4)
		GUESS=sparc-sun-sunos$UNAME_RELEASE
		;;
	esac
	;;
    aushp:SunOS:*:*)
	GUESS=sparc-auspex-sunos$UNAME_RELEASE
	;;
    # The situation for MiNT is a little confusing.  The machine name
    # can be virtually everything (everything which is not
    # "atarist" or "atariste" at least should have a processor
    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
    # to the lowercase version "mint" (or "freemint").  Finally
    # the system name "TOS" denotes a system which is actually not
    # MiNT.  But MiNT is downward compatible to TOS, so this should
    # be no problem.
    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
	GUESS=m68k-atari-mint$UNAME_RELEASE
	;;
    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
	GUESS=m68k-atari-mint$UNAME_RELEASE
	;;
    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
	GUESS=m68k-atari-mint$UNAME_RELEASE
	;;
    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
	GUESS=m68k-milan-mint$UNAME_RELEASE
	;;
    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
	GUESS=m68k-hades-mint$UNAME_RELEASE
	;;
    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
	GUESS=m68k-unknown-mint$UNAME_RELEASE
	;;
    m68k:machten:*:*)
	GUESS=m68k-apple-machten$UNAME_RELEASE
	;;
    powerpc:machten:*:*)
	GUESS=powerpc-apple-machten$UNAME_RELEASE
	;;
    RISC*:Mach:*:*)
	GUESS=mips-dec-mach_bsd4.3
	;;
    RISC*:ULTRIX:*:*)
	GUESS=mips-dec-ultrix$UNAME_RELEASE
	;;
    VAX*:ULTRIX*:*:*)
	GUESS=vax-dec-ultrix$UNAME_RELEASE
	;;
    2020:CLIX:*:* | 2430:CLIX:*:*)
	GUESS=clipper-intergraph-clix$UNAME_RELEASE
	;;
    mips:*:*:UMIPS | mips:*:*:RISCos)
	set_cc_for_build
	sed 's/^	//' << EOF > "$dummy.c"
#ifdef __cplusplus
#include <stdio.h>  /* for printf() prototype */
	int main (int argc, char *argv[]) {
#else
	int main (argc, argv) int argc; char *argv[]; {
#endif
	#if defined (host_mips) && defined (MIPSEB)
	#if defined (SYSTYPE_SYSV)
	  printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
	#endif
	#if defined (SYSTYPE_SVR4)
	  printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
	#endif
	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
	  printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
	#endif
	#endif
	  exit (-1);
	}
EOF
	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
	  dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` &&
	  SYSTEM_NAME=`"$dummy" "$dummyarg"` &&
	    { echo "$SYSTEM_NAME"; exit; }
	GUESS=mips-mips-riscos$UNAME_RELEASE
	;;
    Motorola:PowerMAX_OS:*:*)
	GUESS=powerpc-motorola-powermax
	;;
    Motorola:*:4.3:PL8-*)
	GUESS=powerpc-harris-powermax
	;;
    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
	GUESS=powerpc-harris-powermax
	;;
    Night_Hawk:Power_UNIX:*:*)
	GUESS=powerpc-harris-powerunix
	;;
    m88k:CX/UX:7*:*)
	GUESS=m88k-harris-cxux7
	;;
    m88k:*:4*:R4*)
	GUESS=m88k-motorola-sysv4
	;;
    m88k:*:3*:R3*)
	GUESS=m88k-motorola-sysv3
	;;
    AViiON:dgux:*:*)
	# DG/UX returns AViiON for all architectures
	UNAME_PROCESSOR=`/usr/bin/uname -p`
	if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110
	then
	    if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \
	       test "$TARGET_BINARY_INTERFACE"x = x
	    then
		GUESS=m88k-dg-dgux$UNAME_RELEASE
	    else
		GUESS=m88k-dg-dguxbcs$UNAME_RELEASE
	    fi
	else
	    GUESS=i586-dg-dgux$UNAME_RELEASE
	fi
	;;
    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
	GUESS=m88k-dolphin-sysv3
	;;
    M88*:*:R3*:*)
	# Delta 88k system running SVR3
	GUESS=m88k-motorola-sysv3
	;;
    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
	GUESS=m88k-tektronix-sysv3
	;;
    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
	GUESS=m68k-tektronix-bsd
	;;
    *:IRIX*:*:*)
	IRIX_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/g'`
	GUESS=mips-sgi-irix$IRIX_REL
	;;
    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
	GUESS=romp-ibm-aix    # uname -m gives an 8 hex-code CPU id
	;;                    # Note that: echo "'`uname -s`'" gives 'AIX '
    i*86:AIX:*:*)
	GUESS=i386-ibm-aix
	;;
    ia64:AIX:*:*)
	if test -x /usr/bin/oslevel ; then
		IBM_REV=`/usr/bin/oslevel`
	else
		IBM_REV=$UNAME_VERSION.$UNAME_RELEASE
	fi
	GUESS=$UNAME_MACHINE-ibm-aix$IBM_REV
	;;
    *:AIX:2:3)
	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
		set_cc_for_build
		sed 's/^		//' << EOF > "$dummy.c"
		#include <sys/systemcfg.h>

		main()
			{
			if (!__power_pc())
				exit(1);
			puts("powerpc-ibm-aix3.2.5");
			exit(0);
			}
EOF
		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"`
		then
			GUESS=$SYSTEM_NAME
		else
			GUESS=rs6000-ibm-aix3.2.5
		fi
	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
		GUESS=rs6000-ibm-aix3.2.4
	else
		GUESS=rs6000-ibm-aix3.2
	fi
	;;
    *:AIX:*:[4567])
	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
		IBM_ARCH=rs6000
	else
		IBM_ARCH=powerpc
	fi
	if test -x /usr/bin/lslpp ; then
		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | \
			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
	else
		IBM_REV=$UNAME_VERSION.$UNAME_RELEASE
	fi
	GUESS=$IBM_ARCH-ibm-aix$IBM_REV
	;;
    *:AIX:*:*)
	GUESS=rs6000-ibm-aix
	;;
    ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
	GUESS=romp-ibm-bsd4.4
	;;
    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
	GUESS=romp-ibm-bsd$UNAME_RELEASE    # 4.3 with uname added to
	;;                                  # report: romp-ibm BSD 4.3
    *:BOSX:*:*)
	GUESS=rs6000-bull-bosx
	;;
    DPX/2?00:B.O.S.:*:*)
	GUESS=m68k-bull-sysv3
	;;
    9000/[34]??:4.3bsd:1.*:*)
	GUESS=m68k-hp-bsd
	;;
    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
	GUESS=m68k-hp-bsd4.4
	;;
    9000/[34678]??:HP-UX:*:*)
	HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'`
	case $UNAME_MACHINE in
	    9000/31?)            HP_ARCH=m68000 ;;
	    9000/[34]??)         HP_ARCH=m68k ;;
	    9000/[678][0-9][0-9])
		if test -x /usr/bin/getconf; then
		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
		    case $sc_cpu_version in
		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
		      532)                      # CPU_PA_RISC2_0
			case $sc_kernel_bits in
			  32) HP_ARCH=hppa2.0n ;;
			  64) HP_ARCH=hppa2.0w ;;
			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
			esac ;;
		    esac
		fi
		if test "$HP_ARCH" = ""; then
		    set_cc_for_build
		    sed 's/^		//' << EOF > "$dummy.c"

		#define _HPUX_SOURCE
		#include <stdlib.h>
		#include <unistd.h>

		int main ()
		{
		#if defined(_SC_KERNEL_BITS)
		    long bits = sysconf(_SC_KERNEL_BITS);
		#endif
		    long cpu  = sysconf (_SC_CPU_VERSION);

		    switch (cpu)
			{
			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
			case CPU_PA_RISC2_0:
		#if defined(_SC_KERNEL_BITS)
			    switch (bits)
				{
				case 64: puts ("hppa2.0w"); break;
				case 32: puts ("hppa2.0n"); break;
				default: puts ("hppa2.0"); break;
				} break;
		#else  /* !defined(_SC_KERNEL_BITS) */
			    puts ("hppa2.0"); break;
		#endif
			default: puts ("hppa1.0"); break;
			}
		    exit (0);
		}
EOF
		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"`
		    test -z "$HP_ARCH" && HP_ARCH=hppa
		fi ;;
	esac
	if test "$HP_ARCH" = hppa2.0w
	then
	    set_cc_for_build

	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
	    # generating 64-bit code.  GNU and HP use different nomenclature:
	    #
	    # $ CC_FOR_BUILD=cc ./config.guess
	    # => hppa2.0w-hp-hpux11.23
	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
	    # => hppa64-hp-hpux11.23

	    if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
		grep -q __LP64__
	    then
		HP_ARCH=hppa2.0w
	    else
		HP_ARCH=hppa64
	    fi
	fi
	GUESS=$HP_ARCH-hp-hpux$HPUX_REV
	;;
    ia64:HP-UX:*:*)
	HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'`
	GUESS=ia64-hp-hpux$HPUX_REV
	;;
    3050*:HI-UX:*:*)
	set_cc_for_build
	sed 's/^	//' << EOF > "$dummy.c"
	#include <unistd.h>
	int
	main ()
	{
	  long cpu = sysconf (_SC_CPU_VERSION);
	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
	     results, however.  */
	  if (CPU_IS_PA_RISC (cpu))
	    {
	      switch (cpu)
		{
		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
		  default: puts ("hppa-hitachi-hiuxwe2"); break;
		}
	    }
	  else if (CPU_IS_HP_MC68K (cpu))
	    puts ("m68k-hitachi-hiuxwe2");
	  else puts ("unknown-hitachi-hiuxwe2");
	  exit (0);
	}
EOF
	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` &&
		{ echo "$SYSTEM_NAME"; exit; }
	GUESS=unknown-hitachi-hiuxwe2
	;;
    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
	GUESS=hppa1.1-hp-bsd
	;;
    9000/8??:4.3bsd:*:*)
	GUESS=hppa1.0-hp-bsd
	;;
    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
	GUESS=hppa1.0-hp-mpeix
	;;
    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
	GUESS=hppa1.1-hp-osf
	;;
    hp8??:OSF1:*:*)
	GUESS=hppa1.0-hp-osf
	;;
    i*86:OSF1:*:*)
	if test -x /usr/sbin/sysversion ; then
	    GUESS=$UNAME_MACHINE-unknown-osf1mk
	else
	    GUESS=$UNAME_MACHINE-unknown-osf1
	fi
	;;
    parisc*:Lites*:*:*)
	GUESS=hppa1.1-hp-lites
	;;
    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
	GUESS=c1-convex-bsd
	;;
    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
	if getsysinfo -f scalar_acc
	then echo c32-convex-bsd
	else echo c2-convex-bsd
	fi
	exit ;;
    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
	GUESS=c34-convex-bsd
	;;
    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
	GUESS=c38-convex-bsd
	;;
    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
	GUESS=c4-convex-bsd
	;;
    CRAY*Y-MP:*:*:*)
	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
	GUESS=ymp-cray-unicos$CRAY_REL
	;;
    CRAY*[A-Z]90:*:*:*)
	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
	      -e 's/\.[^.]*$/.X/'
	exit ;;
    CRAY*TS:*:*:*)
	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
	GUESS=t90-cray-unicos$CRAY_REL
	;;
    CRAY*T3E:*:*:*)
	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
	GUESS=alphaev5-cray-unicosmk$CRAY_REL
	;;
    CRAY*SV1:*:*:*)
	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
	GUESS=sv1-cray-unicos$CRAY_REL
	;;
    *:UNICOS/mp:*:*)
	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
	GUESS=craynv-cray-unicosmp$CRAY_REL
	;;
    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
	FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'`
	GUESS=${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}
	;;
    5000:UNIX_System_V:4.*:*)
	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
	FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
	GUESS=sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}
	;;
    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
	GUESS=$UNAME_MACHINE-pc-bsdi$UNAME_RELEASE
	;;
    sparc*:BSD/OS:*:*)
	GUESS=sparc-unknown-bsdi$UNAME_RELEASE
	;;
    *:BSD/OS:*:*)
	GUESS=$UNAME_MACHINE-unknown-bsdi$UNAME_RELEASE
	;;
    arm:FreeBSD:*:*)
	UNAME_PROCESSOR=`uname -p`
	set_cc_for_build
	if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
	    | grep -q __ARM_PCS_VFP
	then
	    FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
	    GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabi
	else
	    FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
	    GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabihf
	fi
	;;
    *:FreeBSD:*:*)
	UNAME_PROCESSOR=`/usr/bin/uname -p`
	case $UNAME_PROCESSOR in
	    amd64)
		UNAME_PROCESSOR=x86_64 ;;
	    i386)
		UNAME_PROCESSOR=i586 ;;
	esac
	FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
	GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL
	;;
    i*:CYGWIN*:*)
	GUESS=$UNAME_MACHINE-pc-cygwin
	;;
    *:MINGW64*:*)
	GUESS=$UNAME_MACHINE-pc-mingw64
	;;
    *:MINGW*:*)
	GUESS=$UNAME_MACHINE-pc-mingw32
	;;
    *:MSYS*:*)
	GUESS=$UNAME_MACHINE-pc-msys
	;;
    i*:PW*:*)
	GUESS=$UNAME_MACHINE-pc-pw32
	;;
    *:SerenityOS:*:*)
        GUESS=$UNAME_MACHINE-pc-serenity
        ;;
    *:Interix*:*)
	case $UNAME_MACHINE in
	    x86)
		GUESS=i586-pc-interix$UNAME_RELEASE
		;;
	    authenticamd | genuineintel | EM64T)
		GUESS=x86_64-unknown-interix$UNAME_RELEASE
		;;
	    IA64)
		GUESS=ia64-unknown-interix$UNAME_RELEASE
		;;
	esac ;;
    i*:UWIN*:*)
	GUESS=$UNAME_MACHINE-pc-uwin
	;;
    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
	GUESS=x86_64-pc-cygwin
	;;
    prep*:SunOS:5.*:*)
	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
	GUESS=powerpcle-unknown-solaris2$SUN_REL
	;;
    *:GNU:*:*)
	# the GNU system
	GNU_ARCH=`echo "$UNAME_MACHINE" | sed -e 's,[-/].*$,,'`
	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's,/.*$,,'`
	GUESS=$GNU_ARCH-unknown-$LIBC$GNU_REL
	;;
    *:GNU/*:*:*)
	# other systems with GNU libc and userland
	GNU_SYS=`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"`
	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
	GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC
	;;
    *:Minix:*:*)
	GUESS=$UNAME_MACHINE-unknown-minix
	;;
    aarch64:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    aarch64_be:Linux:*:*)
	UNAME_MACHINE=aarch64_be
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    alpha:Linux:*:*)
	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in
	  EV5)   UNAME_MACHINE=alphaev5 ;;
	  EV56)  UNAME_MACHINE=alphaev56 ;;
	  PCA56) UNAME_MACHINE=alphapca56 ;;
	  PCA57) UNAME_MACHINE=alphapca56 ;;
	  EV6)   UNAME_MACHINE=alphaev6 ;;
	  EV67)  UNAME_MACHINE=alphaev67 ;;
	  EV68*) UNAME_MACHINE=alphaev68 ;;
	esac
	objdump --private-headers /bin/sh | grep -q ld.so.1
	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    arc:Linux:*:* | arceb:Linux:*:* | arc32:Linux:*:* | arc64:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    arm*:Linux:*:*)
	set_cc_for_build
	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
	    | grep -q __ARM_EABI__
	then
	    GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	else
	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
		| grep -q __ARM_PCS_VFP
	    then
		GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabi
	    else
		GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabihf
	    fi
	fi
	;;
    avr32*:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    cris:Linux:*:*)
	GUESS=$UNAME_MACHINE-axis-linux-$LIBC
	;;
    crisv32:Linux:*:*)
	GUESS=$UNAME_MACHINE-axis-linux-$LIBC
	;;
    e2k:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    frv:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    hexagon:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    i*86:Linux:*:*)
	GUESS=$UNAME_MACHINE-pc-linux-$LIBC
	;;
    ia64:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    k1om:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    m32r*:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    m68*:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    mips:Linux:*:* | mips64:Linux:*:*)
	set_cc_for_build
	IS_GLIBC=0
	test x"${LIBC}" = xgnu && IS_GLIBC=1
	sed 's/^	//' << EOF > "$dummy.c"
	#undef CPU
	#undef mips
	#undef mipsel
	#undef mips64
	#undef mips64el
	#if ${IS_GLIBC} && defined(_ABI64)
	LIBCABI=gnuabi64
	#else
	#if ${IS_GLIBC} && defined(_ABIN32)
	LIBCABI=gnuabin32
	#else
	LIBCABI=${LIBC}
	#endif
	#endif

	#if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
	CPU=mipsisa64r6
	#else
	#if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
	CPU=mipsisa32r6
	#else
	#if defined(__mips64)
	CPU=mips64
	#else
	CPU=mips
	#endif
	#endif
	#endif

	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
	MIPS_ENDIAN=el
	#else
	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
	MIPS_ENDIAN=
	#else
	MIPS_ENDIAN=
	#endif
	#endif
EOF
	cc_set_vars=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'`
	eval "$cc_set_vars"
	test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
	;;
    mips64el:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    openrisc*:Linux:*:*)
	GUESS=or1k-unknown-linux-$LIBC
	;;
    or32:Linux:*:* | or1k*:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    padre:Linux:*:*)
	GUESS=sparc-unknown-linux-$LIBC
	;;
    parisc64:Linux:*:* | hppa64:Linux:*:*)
	GUESS=hppa64-unknown-linux-$LIBC
	;;
    parisc:Linux:*:* | hppa:Linux:*:*)
	# Look for CPU level
	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
	  PA7*) GUESS=hppa1.1-unknown-linux-$LIBC ;;
	  PA8*) GUESS=hppa2.0-unknown-linux-$LIBC ;;
	  *)    GUESS=hppa-unknown-linux-$LIBC ;;
	esac
	;;
    ppc64:Linux:*:*)
	GUESS=powerpc64-unknown-linux-$LIBC
	;;
    ppc:Linux:*:*)
	GUESS=powerpc-unknown-linux-$LIBC
	;;
    ppc64le:Linux:*:*)
	GUESS=powerpc64le-unknown-linux-$LIBC
	;;
    ppcle:Linux:*:*)
	GUESS=powerpcle-unknown-linux-$LIBC
	;;
    riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    s390:Linux:*:* | s390x:Linux:*:*)
	GUESS=$UNAME_MACHINE-ibm-linux-$LIBC
	;;
    sh64*:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    sh*:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    sparc:Linux:*:* | sparc64:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    tile*:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    vax:Linux:*:*)
	GUESS=$UNAME_MACHINE-dec-linux-$LIBC
	;;
    x86_64:Linux:*:*)
	set_cc_for_build
	LIBCABI=$LIBC
	if test "$CC_FOR_BUILD" != no_compiler_found; then
	    if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \
		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
		grep IS_X32 >/dev/null
	    then
		LIBCABI=${LIBC}x32
	    fi
	fi
	GUESS=$UNAME_MACHINE-pc-linux-$LIBCABI
	;;
    xtensa*:Linux:*:*)
	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
	;;
    i*86:DYNIX/ptx:4*:*)
	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
	# earlier versions are messed up and put the nodename in both
	# sysname and nodename.
	GUESS=i386-sequent-sysv4
	;;
    i*86:UNIX_SV:4.2MP:2.*)
	# Unixware is an offshoot of SVR4, but it has its own version
	# number series starting with 2...
	# I am not positive that other SVR4 systems won't match this,
	# I just have to hope.  -- rms.
	# Use sysv4.2uw... so that sysv4* matches it.
	GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION
	;;
    i*86:OS/2:*:*)
	# If we were able to find `uname', then EMX Unix compatibility
	# is probably installed.
	GUESS=$UNAME_MACHINE-pc-os2-emx
	;;
    i*86:XTS-300:*:STOP)
	GUESS=$UNAME_MACHINE-unknown-stop
	;;
    i*86:atheos:*:*)
	GUESS=$UNAME_MACHINE-unknown-atheos
	;;
    i*86:syllable:*:*)
	GUESS=$UNAME_MACHINE-pc-syllable
	;;
    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
	GUESS=i386-unknown-lynxos$UNAME_RELEASE
	;;
    i*86:*DOS:*:*)
	GUESS=$UNAME_MACHINE-pc-msdosdjgpp
	;;
    i*86:*:4.*:*)
	UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'`
	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
		GUESS=$UNAME_MACHINE-univel-sysv$UNAME_REL
	else
		GUESS=$UNAME_MACHINE-pc-sysv$UNAME_REL
	fi
	;;
    i*86:*:5:[678]*)
	# UnixWare 7.x, OpenUNIX and OpenServer 6.
	case `/bin/uname -X | grep "^Machine"` in
	    *486*)	     UNAME_MACHINE=i486 ;;
	    *Pentium)	     UNAME_MACHINE=i586 ;;
	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
	esac
	GUESS=$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
	;;
    i*86:*:3.2:*)
	if test -f /usr/options/cb.name; then
		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
		GUESS=$UNAME_MACHINE-pc-isc$UNAME_REL
	elif /bin/uname -X 2>/dev/null >/dev/null ; then
		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
			&& UNAME_MACHINE=i586
		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
			&& UNAME_MACHINE=i686
		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
			&& UNAME_MACHINE=i686
		GUESS=$UNAME_MACHINE-pc-sco$UNAME_REL
	else
		GUESS=$UNAME_MACHINE-pc-sysv32
	fi
	;;
    pc:*:*:*)
	# Left here for compatibility:
	# uname -m prints for DJGPP always 'pc', but it prints nothing about
	# the processor, so we play safe by assuming i586.
	# Note: whatever this is, it MUST be the same as what config.sub
	# prints for the "djgpp" host, or else GDB configure will decide that
	# this is a cross-build.
	GUESS=i586-pc-msdosdjgpp
	;;
    Intel:Mach:3*:*)
	GUESS=i386-pc-mach3
	;;
    paragon:*:*:*)
	GUESS=i860-intel-osf1
	;;
    i860:*:4.*:*) # i860-SVR4
	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
	  GUESS=i860-stardent-sysv$UNAME_RELEASE    # Stardent Vistra i860-SVR4
	else # Add other i860-SVR4 vendors below as they are discovered.
	  GUESS=i860-unknown-sysv$UNAME_RELEASE     # Unknown i860-SVR4
	fi
	;;
    mini*:CTIX:SYS*5:*)
	# "miniframe"
	GUESS=m68010-convergent-sysv
	;;
    mc68k:UNIX:SYSTEM5:3.51m)
	GUESS=m68k-convergent-sysv
	;;
    M680?0:D-NIX:5.3:*)
	GUESS=m68k-diab-dnix
	;;
    M68*:*:R3V[5678]*:*)
	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
	OS_REL=''
	test -r /etc/.relid \
	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
	  && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
	  && { echo i486-ncr-sysv4; exit; } ;;
    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
	OS_REL='.3'
	test -r /etc/.relid \
	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; }
	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
	GUESS=m68k-unknown-lynxos$UNAME_RELEASE
	;;
    mc68030:UNIX_System_V:4.*:*)
	GUESS=m68k-atari-sysv4
	;;
    TSUNAMI:LynxOS:2.*:*)
	GUESS=sparc-unknown-lynxos$UNAME_RELEASE
	;;
    rs6000:LynxOS:2.*:*)
	GUESS=rs6000-unknown-lynxos$UNAME_RELEASE
	;;
    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
	GUESS=powerpc-unknown-lynxos$UNAME_RELEASE
	;;
    SM[BE]S:UNIX_SV:*:*)
	GUESS=mips-dde-sysv$UNAME_RELEASE
	;;
    RM*:ReliantUNIX-*:*:*)
	GUESS=mips-sni-sysv4
	;;
    RM*:SINIX-*:*:*)
	GUESS=mips-sni-sysv4
	;;
    *:SINIX-*:*:*)
	if uname -p 2>/dev/null >/dev/null ; then
		UNAME_MACHINE=`(uname -p) 2>/dev/null`
		GUESS=$UNAME_MACHINE-sni-sysv4
	else
		GUESS=ns32k-sni-sysv
	fi
	;;
    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
			# says <Richard.M.Bartel@ccMail.Census.GOV>
	GUESS=i586-unisys-sysv4
	;;
    *:UNIX_System_V:4*:FTX*)
	# From Gerald Hewes <hewes@openmarket.com>.
	# How about differentiating between stratus architectures? -djm
	GUESS=hppa1.1-stratus-sysv4
	;;
    *:*:*:FTX*)
	# From seanf@swdc.stratus.com.
	GUESS=i860-stratus-sysv4
	;;
    i*86:VOS:*:*)
	# From Paul.Green@stratus.com.
	GUESS=$UNAME_MACHINE-stratus-vos
	;;
    *:VOS:*:*)
	# From Paul.Green@stratus.com.
	GUESS=hppa1.1-stratus-vos
	;;
    mc68*:A/UX:*:*)
	GUESS=m68k-apple-aux$UNAME_RELEASE
	;;
    news*:NEWS-OS:6*:*)
	GUESS=mips-sony-newsos6
	;;
    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
	if test -d /usr/nec; then
		GUESS=mips-nec-sysv$UNAME_RELEASE
	else
		GUESS=mips-unknown-sysv$UNAME_RELEASE
	fi
	;;
    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
	GUESS=powerpc-be-beos
	;;
    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
	GUESS=powerpc-apple-beos
	;;
    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
	GUESS=i586-pc-beos
	;;
    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
	GUESS=i586-pc-haiku
	;;
    x86_64:Haiku:*:*)
	GUESS=x86_64-unknown-haiku
	;;
    SX-4:SUPER-UX:*:*)
	GUESS=sx4-nec-superux$UNAME_RELEASE
	;;
    SX-5:SUPER-UX:*:*)
	GUESS=sx5-nec-superux$UNAME_RELEASE
	;;
    SX-6:SUPER-UX:*:*)
	GUESS=sx6-nec-superux$UNAME_RELEASE
	;;
    SX-7:SUPER-UX:*:*)
	GUESS=sx7-nec-superux$UNAME_RELEASE
	;;
    SX-8:SUPER-UX:*:*)
	GUESS=sx8-nec-superux$UNAME_RELEASE
	;;
    SX-8R:SUPER-UX:*:*)
	GUESS=sx8r-nec-superux$UNAME_RELEASE
	;;
    SX-ACE:SUPER-UX:*:*)
	GUESS=sxace-nec-superux$UNAME_RELEASE
	;;
    Power*:Rhapsody:*:*)
	GUESS=powerpc-apple-rhapsody$UNAME_RELEASE
	;;
    *:Rhapsody:*:*)
	GUESS=$UNAME_MACHINE-apple-rhapsody$UNAME_RELEASE
	;;
    arm64:Darwin:*:*)
	GUESS=aarch64-apple-darwin$UNAME_RELEASE
	;;
    *:Darwin:*:*)
	UNAME_PROCESSOR=`uname -p`
	case $UNAME_PROCESSOR in
	    unknown) UNAME_PROCESSOR=powerpc ;;
	esac
	if command -v xcode-select > /dev/null 2> /dev/null && \
		! xcode-select --print-path > /dev/null 2> /dev/null ; then
	    # Avoid executing cc if there is no toolchain installed as
	    # cc will be a stub that puts up a graphical alert
	    # prompting the user to install developer tools.
	    CC_FOR_BUILD=no_compiler_found
	else
	    set_cc_for_build
	fi
	if test "$CC_FOR_BUILD" != no_compiler_found; then
	    if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
		   (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
		   grep IS_64BIT_ARCH >/dev/null
	    then
		case $UNAME_PROCESSOR in
		    i386) UNAME_PROCESSOR=x86_64 ;;
		    powerpc) UNAME_PROCESSOR=powerpc64 ;;
		esac
	    fi
	    # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
	    if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
		   (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
		   grep IS_PPC >/dev/null
	    then
		UNAME_PROCESSOR=powerpc
	    fi
	elif test "$UNAME_PROCESSOR" = i386 ; then
	    # uname -m returns i386 or x86_64
	    UNAME_PROCESSOR=$UNAME_MACHINE
	fi
	GUESS=$UNAME_PROCESSOR-apple-darwin$UNAME_RELEASE
	;;
    *:procnto*:*:* | *:QNX:[0123456789]*:*)
	UNAME_PROCESSOR=`uname -p`
	if test "$UNAME_PROCESSOR" = x86; then
		UNAME_PROCESSOR=i386
		UNAME_MACHINE=pc
	fi
	GUESS=$UNAME_PROCESSOR-$UNAME_MACHINE-nto-qnx$UNAME_RELEASE
	;;
    *:QNX:*:4*)
	GUESS=i386-pc-qnx
	;;
    NEO-*:NONSTOP_KERNEL:*:*)
	GUESS=neo-tandem-nsk$UNAME_RELEASE
	;;
    NSE-*:NONSTOP_KERNEL:*:*)
	GUESS=nse-tandem-nsk$UNAME_RELEASE
	;;
    NSR-*:NONSTOP_KERNEL:*:*)
	GUESS=nsr-tandem-nsk$UNAME_RELEASE
	;;
    NSV-*:NONSTOP_KERNEL:*:*)
	GUESS=nsv-tandem-nsk$UNAME_RELEASE
	;;
    NSX-*:NONSTOP_KERNEL:*:*)
	GUESS=nsx-tandem-nsk$UNAME_RELEASE
	;;
    *:NonStop-UX:*:*)
	GUESS=mips-compaq-nonstopux
	;;
    BS2000:POSIX*:*:*)
	GUESS=bs2000-siemens-sysv
	;;
    DS/*:UNIX_System_V:*:*)
	GUESS=$UNAME_MACHINE-$UNAME_SYSTEM-$UNAME_RELEASE
	;;
    *:Plan9:*:*)
	# "uname -m" is not consistent, so use $cputype instead. 386
	# is converted to i386 for consistency with other x86
	# operating systems.
	if test "${cputype-}" = 386; then
	    UNAME_MACHINE=i386
	elif test "x${cputype-}" != x; then
	    UNAME_MACHINE=$cputype
	fi
	GUESS=$UNAME_MACHINE-unknown-plan9
	;;
    *:TOPS-10:*:*)
	GUESS=pdp10-unknown-tops10
	;;
    *:TENEX:*:*)
	GUESS=pdp10-unknown-tenex
	;;
    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
	GUESS=pdp10-dec-tops20
	;;
    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
	GUESS=pdp10-xkl-tops20
	;;
    *:TOPS-20:*:*)
	GUESS=pdp10-unknown-tops20
	;;
    *:ITS:*:*)
	GUESS=pdp10-unknown-its
	;;
    SEI:*:*:SEIUX)
	GUESS=mips-sei-seiux$UNAME_RELEASE
	;;
    *:DragonFly:*:*)
	DRAGONFLY_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
	GUESS=$UNAME_MACHINE-unknown-dragonfly$DRAGONFLY_REL
	;;
    *:*VMS:*:*)
	UNAME_MACHINE=`(uname -p) 2>/dev/null`
	case $UNAME_MACHINE in
	    A*) GUESS=alpha-dec-vms ;;
	    I*) GUESS=ia64-dec-vms ;;
	    V*) GUESS=vax-dec-vms ;;
	esac ;;
    *:XENIX:*:SysV)
	GUESS=i386-pc-xenix
	;;
    i*86:skyos:*:*)
	SKYOS_REL=`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`
	GUESS=$UNAME_MACHINE-pc-skyos$SKYOS_REL
	;;
    i*86:rdos:*:*)
	GUESS=$UNAME_MACHINE-pc-rdos
	;;
    i*86:Fiwix:*:*)
	GUESS=$UNAME_MACHINE-pc-fiwix
	;;
    *:AROS:*:*)
	GUESS=$UNAME_MACHINE-unknown-aros
	;;
    x86_64:VMkernel:*:*)
	GUESS=$UNAME_MACHINE-unknown-esx
	;;
    amd64:Isilon\ OneFS:*:*)
	GUESS=x86_64-unknown-onefs
	;;
    *:Unleashed:*:*)
	GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE
	;;
esac

# Do we have a guess based on uname results?
if test "x$GUESS" != x; then
    echo "$GUESS"
    exit
fi

# No uname command or uname output not recognized.
set_cc_for_build
cat > "$dummy.c" <<EOF
#ifdef _SEQUENT_
#include <sys/types.h>
#include <sys/utsname.h>
#endif
#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
#include <signal.h>
#if defined(_SIZE_T_) || defined(SIGLOST)
#include <sys/utsname.h>
#endif
#endif
#endif
main ()
{
#if defined (sony)
#if defined (MIPSEB)
  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
     I don't know....  */
  printf ("mips-sony-bsd\n"); exit (0);
#else
#include <sys/param.h>
  printf ("m68k-sony-newsos%s\n",
#ifdef NEWSOS4
  "4"
#else
  ""
#endif
  ); exit (0);
#endif
#endif

#if defined (NeXT)
#if !defined (__ARCHITECTURE__)
#define __ARCHITECTURE__ "m68k"
#endif
  int version;
  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
  if (version < 4)
    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
  else
    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
  exit (0);
#endif

#if defined (MULTIMAX) || defined (n16)
#if defined (UMAXV)
  printf ("ns32k-encore-sysv\n"); exit (0);
#else
#if defined (CMU)
  printf ("ns32k-encore-mach\n"); exit (0);
#else
  printf ("ns32k-encore-bsd\n"); exit (0);
#endif
#endif
#endif

#if defined (__386BSD__)
  printf ("i386-pc-bsd\n"); exit (0);
#endif

#if defined (sequent)
#if defined (i386)
  printf ("i386-sequent-dynix\n"); exit (0);
#endif
#if defined (ns32000)
  printf ("ns32k-sequent-dynix\n"); exit (0);
#endif
#endif

#if defined (_SEQUENT_)
  struct utsname un;

  uname(&un);
  if (strncmp(un.version, "V2", 2) == 0) {
    printf ("i386-sequent-ptx2\n"); exit (0);
  }
  if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
    printf ("i386-sequent-ptx1\n"); exit (0);
  }
  printf ("i386-sequent-ptx\n"); exit (0);
#endif

#if defined (vax)
#if !defined (ultrix)
#include <sys/param.h>
#if defined (BSD)
#if BSD == 43
  printf ("vax-dec-bsd4.3\n"); exit (0);
#else
#if BSD == 199006
  printf ("vax-dec-bsd4.3reno\n"); exit (0);
#else
  printf ("vax-dec-bsd\n"); exit (0);
#endif
#endif
#else
  printf ("vax-dec-bsd\n"); exit (0);
#endif
#else
#if defined(_SIZE_T_) || defined(SIGLOST)
  struct utsname un;
  uname (&un);
  printf ("vax-dec-ultrix%s\n", un.release); exit (0);
#else
  printf ("vax-dec-ultrix\n"); exit (0);
#endif
#endif
#endif
#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
#if defined(_SIZE_T_) || defined(SIGLOST)
  struct utsname *un;
  uname (&un);
  printf ("mips-dec-ultrix%s\n", un.release); exit (0);
#else
  printf ("mips-dec-ultrix\n"); exit (0);
#endif
#endif
#endif

#if defined (alliant) && defined (i860)
  printf ("i860-alliant-bsd\n"); exit (0);
#endif

  exit (1);
}
EOF

$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`"$dummy"` &&
	{ echo "$SYSTEM_NAME"; exit; }

# Apollos put the system type in the environment.
test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }

echo "$0: unable to guess system type" >&2

case $UNAME_MACHINE:$UNAME_SYSTEM in
    mips:Linux | mips64:Linux)
	# If we got here on MIPS GNU/Linux, output extra information.
	cat >&2 <<EOF

NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
the system type. Please install a C compiler and try again.
EOF
	;;
esac

cat >&2 <<EOF

This script (version $timestamp), has failed to recognize the
operating system you are using. If your script is old, overwrite *all*
copies of config.guess and config.sub with the latest versions from:

  https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
and
  https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
EOF

our_year=`echo $timestamp | sed 's,-.*,,'`
thisyear=`date +%Y`
# shellcheck disable=SC2003
script_age=`expr "$thisyear" - "$our_year"`
if test "$script_age" -lt 3 ; then
   cat >&2 <<EOF

If $0 has already been updated, send the following data and any
information you think might be pertinent to config-patches@gnu.org to
provide the necessary information to handle your system.

config.guess timestamp = $timestamp

uname -m = `(uname -m) 2>/dev/null || echo unknown`
uname -r = `(uname -r) 2>/dev/null || echo unknown`
uname -s = `(uname -s) 2>/dev/null || echo unknown`
uname -v = `(uname -v) 2>/dev/null || echo unknown`

/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`

hostinfo               = `(hostinfo) 2>/dev/null`
/bin/universe          = `(/bin/universe) 2>/dev/null`
/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
/bin/arch              = `(/bin/arch) 2>/dev/null`
/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`

UNAME_MACHINE = "$UNAME_MACHINE"
UNAME_RELEASE = "$UNAME_RELEASE"
UNAME_SYSTEM  = "$UNAME_SYSTEM"
UNAME_VERSION = "$UNAME_VERSION"
EOF
fi

exit 1

# Local variables:
# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "timestamp='"
# time-stamp-format: "%:y-%02m-%02d"
# time-stamp-end: "'"
# End:


================================================
FILE: audio/paddleaudio/third_party/patches/config.sub
================================================
#! /bin/sh
# Configuration validation subroutine script.
#   Copyright 1992-2022 Free Software Foundation, Inc.

# shellcheck disable=SC2006,SC2268 # see below for rationale

timestamp='2022-01-03'

# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <https://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that
# program.  This Exception is an additional permission under section 7
# of the GNU General Public License, version 3 ("GPLv3").


# Please send patches to <config-patches@gnu.org>.
#
# Configuration subroutine to validate and canonicalize a configuration type.
# Supply the specified configuration type as an argument.
# If it is invalid, we print an error message on stderr and exit with code 1.
# Otherwise, we print the canonical config type on stdout and succeed.

# You can get the latest version of this script from:
# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub

# This file is supposed to be the same for all GNU packages
# and recognize all the CPU types, system types and aliases
# that are meaningful with *any* GNU software.
# Each package is responsible for reporting which valid configurations
# it does not support.  The user should be able to distinguish
# a failure to support a valid configuration from a meaningless
# configuration.

# The goal of this file is to map all the various variations of a given
# machine specification into a single specification in the form:
#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
# or in some cases, the newer four-part form:
#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
# It is wrong to echo any other type of specification.

# The "shellcheck disable" line above the timestamp inhibits complaints
# about features and limitations of the classic Bourne shell that were
# superseded or lifted in POSIX.  However, this script identifies a wide
# variety of pre-POSIX systems that do not have POSIX shells at all, and
# even some reasonably current systems (Solaris 10 as case-in-point) still
# have a pre-POSIX /bin/sh.

me=`echo "$0" | sed -e 's,.*/,,'`

usage="\
Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS

Canonicalize a configuration name.

Options:
  -h, --help         print this help, then exit
  -t, --time-stamp   print date of last modification, then exit
  -v, --version      print version number, then exit

Report bugs and patches to <config-patches@gnu.org>."

version="\
GNU config.sub ($timestamp)

Copyright 1992-2022 Free Software Foundation, Inc.

This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."

help="
Try \`$me --help' for more information."

# Parse command line
while test $# -gt 0 ; do
  case $1 in
    --time-stamp | --time* | -t )
       echo "$timestamp" ; exit ;;
    --version | -v )
       echo "$version" ; exit ;;
    --help | --h* | -h )
       echo "$usage"; exit ;;
    -- )     # Stop option processing
       shift; break ;;
    - )	# Use stdin as input.
       break ;;
    -* )
       echo "$me: invalid option $1$help" >&2
       exit 1 ;;

    *local*)
       # First pass through any local machine types.
       echo "$1"
       exit ;;

    * )
       break ;;
  esac
done

case $# in
 0) echo "$me: missing argument$help" >&2
    exit 1;;
 1) ;;
 *) echo "$me: too many arguments$help" >&2
    exit 1;;
esac

# Split fields of configuration type
# shellcheck disable=SC2162
saved_IFS=$IFS
IFS="-" read field1 field2 field3 field4 <<EOF
$1
EOF
IFS=$saved_IFS

# Separate into logical components for further validation
case $1 in
	*-*-*-*-*)
		echo Invalid configuration \`"$1"\': more than four components >&2
		exit 1
		;;
	*-*-*-*)
		basic_machine=$field1-$field2
		basic_os=$field3-$field4
		;;
	*-*-*)
		# Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two
		# parts
		maybe_os=$field2-$field3
		case $maybe_os in
			nto-qnx* | linux-* | uclinux-uclibc* \
			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
			| storm-chaos* | os2-emx* | rtmk-nova*)
				basic_machine=$field1
				basic_os=$maybe_os
				;;
			android-linux)
				basic_machine=$field1-unknown
				basic_os=linux-android
				;;
			*)
				basic_machine=$field1-$field2
				basic_os=$field3
				;;
		esac
		;;
	*-*)
		# A lone config we happen to match not fitting any pattern
		case $field1-$field2 in
			decstation-3100)
				basic_machine=mips-dec
				basic_os=
				;;
			*-*)
				# Second component is usually, but not always the OS
				case $field2 in
					# Prevent following clause from handling this valid os
					sun*os*)
						basic_machine=$field1
						basic_os=$field2
						;;
					zephyr*)
						basic_machine=$field1-unknown
						basic_os=$field2
						;;
					# Manufacturers
					dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
					| att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
					| unicom* | ibm* | next | hp | isi* | apollo | altos* \
					| convergent* | ncr* | news | 32* | 3600* | 3100* \
					| hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
					| ultra | tti* | harris | dolphin | highlevel | gould \
					| cbm | ns | masscomp | apple | axis | knuth | cray \
					| microblaze* | sim | cisco \
					| oki | wec | wrs | winbond)
						basic_machine=$field1-$field2
						basic_os=
						;;
					*)
						basic_machine=$field1
						basic_os=$field2
						;;
				esac
			;;
		esac
		;;
	*)
		# Convert single-component short-hands not valid as part of
		# multi-component configurations.
		case $field1 in
			386bsd)
				basic_machine=i386-pc
				basic_os=bsd
				;;
			a29khif)
				basic_machine=a29k-amd
				basic_os=udi
				;;
			adobe68k)
				basic_machine=m68010-adobe
				basic_os=scout
				;;
			alliant)
				basic_machine=fx80-alliant
				basic_os=
				;;
			altos | altos3068)
				basic_machine=m68k-altos
				basic_os=
				;;
			am29k)
				basic_machine=a29k-none
				basic_os=bsd
				;;
			amdahl)
				basic_machine=580-amdahl
				basic_os=sysv
				;;
			amiga)
				basic_machine=m68k-unknown
				basic_os=
				;;
			amigaos | amigados)
				basic_machine=m68k-unknown
				basic_os=amigaos
				;;
			amigaunix | amix)
				basic_machine=m68k-unknown
				basic_os=sysv4
				;;
			apollo68)
				basic_machine=m68k-apollo
				basic_os=sysv
				;;
			apollo68bsd)
				basic_machine=m68k-apollo
				basic_os=bsd
				;;
			aros)
				basic_machine=i386-pc
				basic_os=aros
				;;
			aux)
				basic_machine=m68k-apple
				basic_os=aux
				;;
			balance)
				basic_machine=ns32k-sequent
				basic_os=dynix
				;;
			blackfin)
				basic_machine=bfin-unknown
				basic_os=linux
				;;
			cegcc)
				basic_machine=arm-unknown
				basic_os=cegcc
				;;
			convex-c1)
				basic_machine=c1-convex
				basic_os=bsd
				;;
			convex-c2)
				basic_machine=c2-convex
				basic_os=bsd
				;;
			convex-c32)
				basic_machine=c32-convex
				basic_os=bsd
				;;
			convex-c34)
				basic_machine=c34-convex
				basic_os=bsd
				;;
			convex-c38)
				basic_machine=c38-convex
				basic_os=bsd
				;;
			cray)
				basic_machine=j90-cray
				basic_os=unicos
				;;
			crds | unos)
				basic_machine=m68k-crds
				basic_os=
				;;
			da30)
				basic_machine=m68k-da30
				basic_os=
				;;
			decstation | pmax | pmin | dec3100 | decstatn)
				basic_machine=mips-dec
				basic_os=
				;;
			delta88)
				basic_machine=m88k-motorola
				basic_os=sysv3
				;;
			dicos)
				basic_machine=i686-pc
				basic_os=dicos
				;;
			djgpp)
				basic_machine=i586-pc
				basic_os=msdosdjgpp
				;;
			ebmon29k)
				basic_machine=a29k-amd
				basic_os=ebmon
				;;
			es1800 | OSE68k | ose68k | ose | OSE)
				basic_machine=m68k-ericsson
				basic_os=ose
				;;
			gmicro)
				basic_machine=tron-gmicro
				basic_os=sysv
				;;
			go32)
				basic_machine=i386-pc
				basic_os=go32
				;;
			h8300hms)
				basic_machine=h8300-hitachi
				basic_os=hms
				;;
			h8300xray)
				basic_machine=h8300-hitachi
				basic_os=xray
				;;
			h8500hms)
				basic_machine=h8500-hitachi
				basic_os=hms
				;;
			harris)
				basic_machine=m88k-harris
				basic_os=sysv3
				;;
			hp300 | hp300hpux)
				basic_machine=m68k-hp
				basic_os=hpux
				;;
			hp300bsd)
				basic_machine=m68k-hp
				basic_os=bsd
				;;
			hppaosf)
				basic_machine=hppa1.1-hp
				basic_os=osf
				;;
			hppro)
				basic_machine=hppa1.1-hp
				basic_os=proelf
				;;
			i386mach)
				basic_machine=i386-mach
				basic_os=mach
				;;
			isi68 | isi)
				basic_machine=m68k-isi
				basic_os=sysv
				;;
			m68knommu)
				basic_machine=m68k-unknown
				basic_os=linux
				;;
			magnum | m3230)
				basic_machine=mips-mips
				basic_os=sysv
				;;
			merlin)
				basic_machine=ns32k-utek
				basic_os=sysv
				;;
			mingw64)
				basic_machine=x86_64-pc
				basic_os=mingw64
				;;
			mingw32)
				basic_machine=i686-pc
				basic_os=mingw32
				;;
			mingw32ce)
				basic_machine=arm-unknown
				basic_os=mingw32ce
				;;
			monitor)
				basic_machine=m68k-rom68k
				basic_os=coff
				;;
			morphos)
				basic_machine=powerpc-unknown
				basic_os=morphos
				;;
			moxiebox)
				basic_machine=moxie-unknown
				basic_os=moxiebox
				;;
			msdos)
				basic_machine=i386-pc
				basic_os=msdos
				;;
			msys)
				basic_machine=i686-pc
				basic_os=msys
				;;
			mvs)
				basic_machine=i370-ibm
				basic_os=mvs
				;;
			nacl)
				basic_machine=le32-unknown
				basic_os=nacl
				;;
			ncr3000)
				basic_machine=i486-ncr
				basic_os=sysv4
				;;
			netbsd386)
				basic_machine=i386-pc
				basic_os=netbsd
				;;
			netwinder)
				basic_machine=armv4l-rebel
				basic_os=linux
				;;
			news | news700 | news800 | news900)
				basic_machine=m68k-sony
				basic_os=newsos
				;;
			news1000)
				basic_machine=m68030-sony
				basic_os=newsos
				;;
			necv70)
				basic_machine=v70-nec
				basic_os=sysv
				;;
			nh3000)
				basic_machine=m68k-harris
				basic_os=cxux
				;;
			nh[45]000)
				basic_machine=m88k-harris
				basic_os=cxux
				;;
			nindy960)
				basic_machine=i960-intel
				basic_os=nindy
				;;
			mon960)
				basic_machine=i960-intel
				basic_os=mon960
				;;
			nonstopux)
				basic_machine=mips-compaq
				basic_os=nonstopux
				;;
			os400)
				basic_machine=powerpc-ibm
				basic_os=os400
				;;
			OSE68000 | ose68000)
				basic_machine=m68000-ericsson
				basic_os=ose
				;;
			os68k)
				basic_machine=m68k-none
				basic_os=os68k
				;;
			paragon)
				basic_machine=i860-intel
				basic_os=osf
				;;
			parisc)
				basic_machine=hppa-unknown
				basic_os=linux
				;;
			psp)
				basic_machine=mipsallegrexel-sony
				basic_os=psp
				;;
			pw32)
				basic_machine=i586-unknown
				basic_os=pw32
				;;
			rdos | rdos64)
				basic_machine=x86_64-pc
				basic_os=rdos
				;;
			rdos32)
				basic_machine=i386-pc
				basic_os=rdos
				;;
			rom68k)
				basic_machine=m68k-rom68k
				basic_os=coff
				;;
			sa29200)
				basic_machine=a29k-amd
				basic_os=udi
				;;
			sei)
				basic_machine=mips-sei
				basic_os=seiux
				;;
			sequent)
				basic_machine=i386-sequent
				basic_os=
				;;
			sps7)
				basic_machine=m68k-bull
				basic_os=sysv2
				;;
			st2000)
				basic_machine=m68k-tandem
				basic_os=
				;;
			stratus)
				basic_machine=i860-stratus
				basic_os=sysv4
				;;
			sun2)
				basic_machine=m68000-sun
				basic_os=
				;;
			sun2os3)
				basic_machine=m68000-sun
				basic_os=sunos3
				;;
			sun2os4)
				basic_machine=m68000-sun
				basic_os=sunos4
				;;
			sun3)
				basic_machine=m68k-sun
				basic_os=
				;;
			sun3os3)
				basic_machine=m68k-sun
				basic_os=sunos3
				;;
			sun3os4)
				basic_machine=m68k-sun
				basic_os=sunos4
				;;
			sun4)
				basic_machine=sparc-sun
				basic_os=
				;;
			sun4os3)
				basic_machine=sparc-sun
				basic_os=sunos3
				;;
			sun4os4)
				basic_machine=sparc-sun
				basic_os=sunos4
				;;
			sun4sol2)
				basic_machine=sparc-sun
				basic_os=solaris2
				;;
			sun386 | sun386i | roadrunner)
				basic_machine=i386-sun
				basic_os=
				;;
			sv1)
				basic_machine=sv1-cray
				basic_os=unicos
				;;
			symmetry)
				basic_machine=i386-sequent
				basic_os=dynix
				;;
			t3e)
				basic_machine=alphaev5-cray
				basic_os=unicos
				;;
			t90)
				basic_machine=t90-cray
				basic_os=unicos
				;;
			toad1)
				basic_machine=pdp10-xkl
				basic_os=tops20
				;;
			tpf)
				basic_machine=s390x-ibm
				basic_os=tpf
				;;
			udi29k)
				basic_machine=a29k-amd
				basic_os=udi
				;;
			ultra3)
				basic_machine=a29k-nyu
				basic_os=sym1
				;;
			v810 | necv810)
				basic_machine=v810-nec
				basic_os=none
				;;
			vaxv)
				basic_machine=vax-dec
				basic_os=sysv
				;;
			vms)
				basic_machine=vax-dec
				basic_os=vms
				;;
			vsta)
				basic_machine=i386-pc
				basic_os=vsta
				;;
			vxworks960)
				basic_machine=i960-wrs
				basic_os=vxworks
				;;
			vxworks68)
				basic_machine=m68k-wrs
				basic_os=vxworks
				;;
			vxworks29k)
				basic_machine=a29k-wrs
				basic_os=vxworks
				;;
			xbox)
				basic_machine=i686-pc
				basic_os=mingw32
				;;
			ymp)
				basic_machine=ymp-cray
				basic_os=unicos
				;;
			*)
				basic_machine=$1
				basic_os=
				;;
		esac
		;;
esac

# Decode 1-component or ad-hoc basic machines
case $basic_machine in
	# Here we handle the default manufacturer of certain CPU types.  It is in
	# some cases the only manufacturer, in others, it is the most popular.
	w89k)
		cpu=hppa1.1
		vendor=winbond
		;;
	op50n)
		cpu=hppa1.1
		vendor=oki
		;;
	op60c)
		cpu=hppa1.1
		vendor=oki
		;;
	ibm*)
		cpu=i370
		vendor=ibm
		;;
	orion105)
		cpu=clipper
		vendor=highlevel
		;;
	mac | mpw | mac-mpw)
		cpu=m68k
		vendor=apple
		;;
	pmac | pmac-mpw)
		cpu=powerpc
		vendor=apple
		;;

	# Recognize the various machine names and aliases which stand
	# for a CPU type and a company and sometimes even an OS.
	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
		cpu=m68000
		vendor=att
		;;
	3b*)
		cpu=we32k
		vendor=att
		;;
	bluegene*)
		cpu=powerpc
		vendor=ibm
		basic_os=cnk
		;;
	decsystem10* | dec10*)
		cpu=pdp10
		vendor=dec
		basic_os=tops10
		;;
	decsystem20* | dec20*)
		cpu=pdp10
		vendor=dec
		basic_os=tops20
		;;
	delta | 3300 | motorola-3300 | motorola-delta \
	      | 3300-motorola | delta-motorola)
		cpu=m68k
		vendor=motorola
		;;
	dpx2*)
		cpu=m68k
		vendor=bull
		basic_os=sysv3
		;;
	encore | umax | mmax)
		cpu=ns32k
		vendor=encore
		;;
	elxsi)
		cpu=elxsi
		vendor=elxsi
		basic_os=${basic_os:-bsd}
		;;
	fx2800)
		cpu=i860
		vendor=alliant
		;;
	genix)
		cpu=ns32k
		vendor=ns
		;;
	h3050r* | hiux*)
		cpu=hppa1.1
		vendor=hitachi
		basic_os=hiuxwe2
		;;
	hp3k9[0-9][0-9] | hp9[0-9][0-9])
		cpu=hppa1.0
		vendor=hp
		;;
	hp9k2[0-9][0-9] | hp9k31[0-9])
		cpu=m68000
		vendor=hp
		;;
	hp9k3[2-9][0-9])
		cpu=m68k
		vendor=hp
		;;
	hp9k6[0-9][0-9] | hp6[0-9][0-9])
		cpu=hppa1.0
		vendor=hp
		;;
	hp9k7[0-79][0-9] | hp7[0-79][0-9])
		cpu=hppa1.1
		vendor=hp
		;;
	hp9k78[0-9] | hp78[0-9])
		# FIXME: really hppa2.0-hp
		cpu=hppa1.1
		vendor=hp
		;;
	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
		# FIXME: really hppa2.0-hp
		cpu=hppa1.1
		vendor=hp
		;;
	hp9k8[0-9][13679] | hp8[0-9][13679])
		cpu=hppa1.1
		vendor=hp
		;;
	hp9k8[0-9][0-9] | hp8[0-9][0-9])
		cpu=hppa1.0
		vendor=hp
		;;
	i*86v32)
		cpu=`echo "$1" | sed -e 's/86.*/86/'`
		vendor=pc
		basic_os=sysv32
		;;
	i*86v4*)
		cpu=`echo "$1" | sed -e 's/86.*/86/'`
		vendor=pc
		basic_os=sysv4
		;;
	i*86v)
		cpu=`echo "$1" | sed -e 's/86.*/86/'`
		vendor=pc
		basic_os=sysv
		;;
	i*86sol2)
		cpu=`echo "$1" | sed -e 's/86.*/86/'`
		vendor=pc
		basic_os=solaris2
		;;
	j90 | j90-cray)
		cpu=j90
		vendor=cray
		basic_os=${basic_os:-unicos}
		;;
	iris | iris4d)
		cpu=mips
		vendor=sgi
		case $basic_os in
		    irix*)
			;;
		    *)
			basic_os=irix4
			;;
		esac
		;;
	miniframe)
		cpu=m68000
		vendor=convergent
		;;
	*mint | mint[0-9]* | *MiNT | *MiNT[0-9]*)
		cpu=m68k
		vendor=atari
		basic_os=mint
		;;
	news-3600 | risc-news)
		cpu=mips
		vendor=sony
		basic_os=newsos
		;;
	next | m*-next)
		cpu=m68k
		vendor=next
		case $basic_os in
		    openstep*)
		        ;;
		    nextstep*)
			;;
		    ns2*)
		      basic_os=nextstep2
			;;
		    *)
		      basic_os=nextstep3
			;;
		esac
		;;
	np1)
		cpu=np1
		vendor=gould
		;;
	op50n-* | op60c-*)
		cpu=hppa1.1
		vendor=oki
		basic_os=proelf
		;;
	pa-hitachi)
		cpu=hppa1.1
		vendor=hitachi
		basic_os=hiuxwe2
		;;
	pbd)
		cpu=sparc
		vendor=tti
		;;
	pbb)
		cpu=m68k
		vendor=tti
		;;
	pc532)
		cpu=ns32k
		vendor=pc532
		;;
	pn)
		cpu=pn
		vendor=gould
		;;
	power)
		cpu=power
		vendor=ibm
		;;
	ps2)
		cpu=i386
		vendor=ibm
		;;
	rm[46]00)
		cpu=mips
		vendor=siemens
		;;
	rtpc | rtpc-*)
		cpu=romp
		vendor=ibm
		;;
	sde)
		cpu=mipsisa32
		vendor=sde
		basic_os=${basic_os:-elf}
		;;
	simso-wrs)
		cpu=sparclite
		vendor=wrs
		basic_os=vxworks
		;;
	tower | tower-32)
		cpu=m68k
		vendor=ncr
		;;
	vpp*|vx|vx-*)
		cpu=f301
		vendor=fujitsu
		;;
	w65)
		cpu=w65
		vendor=wdc
		;;
	w89k-*)
		cpu=hppa1.1
		vendor=winbond
		basic_os=proelf
		;;
	none)
		cpu=none
		vendor=none
		;;
	leon|leon[3-9])
		cpu=sparc
		vendor=$basic_machine
		;;
	leon-*|leon[3-9]-*)
		cpu=sparc
		vendor=`echo "$basic_machine" | sed 's/-.*//'`
		;;

	*-*)
		# shellcheck disable=SC2162
		saved_IFS=$IFS
		IFS="-" read cpu vendor <<EOF
$basic_machine
EOF
		IFS=$saved_IFS
		;;
	# We use `pc' rather than `unknown'
	# because (1) that's what they normally are, and
	# (2) the word "unknown" tends to confuse beginning users.
	i*86 | x86_64)
		cpu=$basic_machine
		vendor=pc
		;;
	# These rules are duplicated from below for sake of the special case above;
	# i.e. things that normalized to x86 arches should also default to "pc"
	pc98)
		cpu=i386
		vendor=pc
		;;
	x64 | amd64)
		cpu=x86_64
		vendor=pc
		;;
	# Recognize the basic CPU types without company name.
	*)
		cpu=$basic_machine
		vendor=unknown
		;;
esac

unset -v basic_machine

# Decode basic machines in the full and proper CPU-Company form.
case $cpu-$vendor in
	# Here we handle the default manufacturer of certain CPU types in canonical form. It is in
	# some cases the only manufacturer, in others, it is the most popular.
	craynv-unknown)
		vendor=cray
		basic_os=${basic_os:-unicosmp}
		;;
	c90-unknown | c90-cray)
		vendor=cray
		basic_os=${Basic_os:-unicos}
		;;
	fx80-unknown)
		vendor=alliant
		;;
	romp-unknown)
		vendor=ibm
		;;
	mmix-unknown)
		vendor=knuth
		;;
	microblaze-unknown | microblazeel-unknown)
		vendor=xilinx
		;;
	rs6000-unknown)
		vendor=ibm
		;;
	vax-unknown)
		vendor=dec
		;;
	pdp11-unknown)
		vendor=dec
		;;
	we32k-unknown)
		vendor=att
		;;
	cydra-unknown)
		vendor=cydrome
		;;
	i370-ibm*)
		vendor=ibm
		;;
	orion-unknown)
		vendor=highlevel
		;;
	xps-unknown | xps100-unknown)
		cpu=xps100
		vendor=honeywell
		;;

	# Here we normalize CPU types with a missing or matching vendor
	armh-unknown | armh-alt)
		cpu=armv7l
		vendor=alt
		basic_os=${basic_os:-linux-gnueabihf}
		;;
	dpx20-unknown | dpx20-bull)
		cpu=rs6000
		vendor=bull
		basic_os=${basic_os:-bosx}
		;;

	# Here we normalize CPU types irrespective of the vendor
	amd64-*)
		cpu=x86_64
		;;
	blackfin-*)
		cpu=bfin
		basic_os=linux
		;;
	c54x-*)
		cpu=tic54x
		;;
	c55x-*)
		cpu=tic55x
		;;
	c6x-*)
		cpu=tic6x
		;;
	e500v[12]-*)
		cpu=powerpc
		basic_os=${basic_os}"spe"
		;;
	mips3*-*)
		cpu=mips64
		;;
	ms1-*)
		cpu=mt
		;;
	m68knommu-*)
		cpu=m68k
		basic_os=linux
		;;
	m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
		cpu=s12z
		;;
	openrisc-*)
		cpu=or32
		;;
	parisc-*)
		cpu=hppa
		basic_os=linux
		;;
	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
		cpu=i586
		;;
	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
		cpu=i686
		;;
	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
		cpu=i686
		;;
	pentium4-*)
		cpu=i786
		;;
	pc98-*)
		cpu=i386
		;;
	ppc-* | ppcbe-*)
		cpu=powerpc
		;;
	ppcle-* | powerpclittle-*)
		cpu=powerpcle
		;;
	ppc64-*)
		cpu=powerpc64
		;;
	ppc64le-* | powerpc64little-*)
		cpu=powerpc64le
		;;
	sb1-*)
		cpu=mipsisa64sb1
		;;
	sb1el-*)
		cpu=mipsisa64sb1el
		;;
	sh5e[lb]-*)
		cpu=`echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/'`
		;;
	spur-*)
		cpu=spur
		;;
	strongarm-* | thumb-*)
		cpu=arm
		;;
	tx39-*)
		cpu=mipstx39
		;;
	tx39el-*)
		cpu=mipstx39el
		;;
	x64-*)
		cpu=x86_64
		;;
	xscale-* | xscalee[bl]-*)
		cpu=`echo "$cpu" | sed 's/^xscale/arm/'`
		;;
	arm64-* | aarch64le-*)
		cpu=aarch64
		;;

	# Recognize the canonical CPU Types that limit and/or modify the
	# company names they are paired with.
	cr16-*)
		basic_os=${basic_os:-elf}
		;;
	crisv32-* | etraxfs*-*)
		cpu=crisv32
		vendor=axis
		;;
	cris-* | etrax*-*)
		cpu=cris
		vendor=axis
		;;
	crx-*)
		basic_os=${basic_os:-elf}
		;;
	neo-tandem)
		cpu=neo
		vendor=tandem
		;;
	nse-tandem)
		cpu=nse
		vendor=tandem
		;;
	nsr-tandem)
		cpu=nsr
		vendor=tandem
		;;
	nsv-tandem)
		cpu=nsv
		vendor=tandem
		;;
	nsx-tandem)
		cpu=nsx
		vendor=tandem
		;;
	mipsallegrexel-sony)
		cpu=mipsallegrexel
		vendor=sony
		;;
	tile*-*)
		basic_os=${basic_os:-linux-gnu}
		;;

	*)
		# Recognize the canonical CPU types that are allowed with any
		# company name.
		case $cpu in
			1750a | 580 \
			| a29k \
			| aarch64 | aarch64_be \
			| abacus \
			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
			| alphapca5[67] | alpha64pca5[67] \
			| am33_2.0 \
			| amdgcn \
			| arc | arceb | arc32 | arc64 \
			| arm | arm[lb]e | arme[lb] | armv* \
			| avr | avr32 \
			| asmjs \
			| ba \
			| be32 | be64 \
			| bfin | bpf | bs2000 \
			| c[123]* | c30 | [cjt]90 | c4x \
			| c8051 | clipper | craynv | csky | cydra \
			| d10v | d30v | dlx | dsp16xx \
			| e2k | elxsi | epiphany \
			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
			| h8300 | h8500 \
			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
			| hexagon \
			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
			| ip2k | iq2000 \
			| k1om \
			| le32 | le64 \
			| lm32 \
			| loongarch32 | loongarch64 | loongarchx32 \
			| m32c | m32r | m32rle \
			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
			| m88110 | m88k | maxq | mb | mcore | mep | metag \
			| microblaze | microblazeel \
			| mips | mipsbe | mipseb | mipsel | mipsle \
			| mips16 \
			| mips64 | mips64eb | mips64el \
			| mips64octeon | mips64octeonel \
			| mips64orion | mips64orionel \
			| mips64r5900 | mips64r5900el \
			| mips64vr | mips64vrel \
			| mips64vr4100 | mips64vr4100el \
			| mips64vr4300 | mips64vr4300el \
			| mips64vr5000 | mips64vr5000el \
			| mips64vr5900 | mips64vr5900el \
			| mipsisa32 | mipsisa32el \
			| mipsisa32r2 | mipsisa32r2el \
			| mipsisa32r3 | mipsisa32r3el \
			| mipsisa32r5 | mipsisa32r5el \
			| mipsisa32r6 | mipsisa32r6el \
			| mipsisa64 | mipsisa64el \
			| mipsisa64r2 | mipsisa64r2el \
			| mipsisa64r3 | mipsisa64r3el \
			| mipsisa64r5 | mipsisa64r5el \
			| mipsisa64r6 | mipsisa64r6el \
			| mipsisa64sb1 | mipsisa64sb1el \
			| mipsisa64sr71k | mipsisa64sr71kel \
			| mipsr5900 | mipsr5900el \
			| mipstx39 | mipstx39el \
			| mmix \
			| mn10200 | mn10300 \
			| moxie \
			| mt \
			| msp430 \
			| nds32 | nds32le | nds32be \
			| nfp \
			| nios | nios2 | nios2eb | nios2el \
			| none | np1 | ns16k | ns32k | nvptx \
			| open8 \
			| or1k* \
			| or32 \
			| orion \
			| picochip \
			| pdp10 | pdp11 | pj | pjl | pn | power \
			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
			| pru \
			| pyramid \
			| riscv | riscv32 | riscv32be | riscv64 | riscv64be \
			| rl78 | romp | rs6000 | rx \
			| s390 | s390x \
			| score \
			| sh | shl \
			| sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
			| sh[1234]e[lb] |  sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
			| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
			| sparclite \
			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
			| spu \
			| tahoe \
			| thumbv7* \
			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
			| tron \
			| ubicom32 \
			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
			| vax \
			| visium \
			| w65 \
			| wasm32 | wasm64 \
			| we32k \
			| x86 | x86_64 | xc16x | xgate | xps100 \
			| xstormy16 | xtensa* \
			| ymp \
			| z8k | z80)
				;;

			*)
				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
				exit 1
				;;
		esac
		;;
esac

# Here we canonicalize certain aliases for manufacturers.
case $vendor in
	digital*)
		vendor=dec
		;;
	commodore*)
		vendor=cbm
		;;
	*)
		;;
esac

# Decode manufacturer-specific aliases for certain operating systems.

if test x$basic_os != x
then

# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just
# set os.
case $basic_os in
	gnu/linux*)
		kernel=linux
		os=`echo "$basic_os" | sed -e 's|gnu/linux|gnu|'`
		;;
	os2-emx)
		kernel=os2
		os=`echo "$basic_os" | sed -e 's|os2-emx|emx|'`
		;;
	nto-qnx*)
		kernel=nto
		os=`echo "$basic_os" | sed -e 's|nto-qnx|qnx|'`
		;;
	*-*)
		# shellcheck disable=SC2162
		saved_IFS=$IFS
		IFS="-" read kernel os <<EOF
$basic_os
EOF
		IFS=$saved_IFS
		;;
	# Default OS when just kernel was specified
	nto*)
		kernel=nto
		os=`echo "$basic_os" | sed -e 's|nto|qnx|'`
		;;
	linux*)
		kernel=linux
		os=`echo "$basic_os" | sed -e 's|linux|gnu|'`
		;;
	*)
		kernel=
		os=$basic_os
		;;
esac

# Now, normalize the OS (knowing we just have one component, it's not a kernel,
# etc.)
case $os in
	# First match some system type aliases that might get confused
	# with valid system types.
	# solaris* is a basic system type, with this one exception.
	auroraux)
		os=auroraux
		;;
	bluegene*)
		os=cnk
		;;
	solaris1 | solaris1.*)
		os=`echo "$os" | sed -e 's|solaris1|sunos4|'`
		;;
	solaris)
		os=solaris2
		;;
	unixware*)
		os=sysv4.2uw
		;;
	# es1800 is here to avoid being matched by es* (a different OS)
	es1800*)
		os=ose
		;;
	# Some version numbers need modification
	chorusos*)
		os=chorusos
		;;
	isc)
		os=isc2.2
		;;
	sco6)
		os=sco5v6
		;;
	sco5)
		os=sco3.2v5
		;;
	sco4)
		os=sco3.2v4
		;;
	sco3.2.[4-9]*)
		os=`echo "$os" | sed -e 's/sco3.2./sco3.2v/'`
		;;
	sco*v* | scout)
		# Don't match below
		;;
	sco*)
		os=sco3.2v2
		;;
	psos*)
		os=psos
		;;
	qnx*)
		os=qnx
		;;
	hiux*)
		os=hiuxwe2
		;;
	lynx*178)
		os=lynxos178
		;;
	lynx*5)
		os=lynxos5
		;;
	lynxos*)
		# don't get caught up in next wildcard
		;;
	lynx*)
		os=lynxos
		;;
	mac[0-9]*)
		os=`echo "$os" | sed -e 's|mac|macos|'`
		;;
	opened*)
		os=openedition
		;;
	os400*)
		os=os400
		;;
	sunos5*)
		os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
		;;
	sunos6*)
		os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
		;;
	wince*)
		os=wince
		;;
	utek*)
		os=bsd
		;;
	dynix*)
		os=bsd
		;;
	acis*)
		os=aos
		;;
	atheos*)
		os=atheos
		;;
	syllable*)
		os=syllable
		;;
	386bsd)
		os=bsd
		;;
	ctix* | uts*)
		os=sysv
		;;
	nova*)
		os=rtmk-nova
		;;
	ns2)
		os=nextstep2
		;;
	# Preserve the version number of sinix5.
	sinix5.*)
		os=`echo "$os" | sed -e 's|sinix|sysv|'`
		;;
	sinix*)
		os=sysv4
		;;
	tpf*)
		os=tpf
		;;
	triton*)
		os=sysv3
		;;
	oss*)
		os=sysv3
		;;
	svr4*)
		os=sysv4
		;;
	svr3)
		os=sysv3
		;;
	sysvr4)
		os=sysv4
		;;
	ose*)
		os=ose
		;;
	*mint | mint[0-9]* | *MiNT | MiNT[0-9]*)
		os=mint
		;;
	dicos*)
		os=dicos
		;;
	pikeos*)
		# Until real need of OS specific support for
		# particular features comes up, bare metal
		# configurations are quite functional.
		case $cpu in
		    arm*)
			os=eabi
			;;
		    *)
			os=elf
			;;
		esac
		;;
	*)
		# No normalization, but not necessarily accepted, that comes below.
		;;
esac

else

# Here we handle the default operating systems that come with various machines.
# The value should be what the vendor currently ships out the door with their
# machine or put another way, the most popular os provided with the machine.

# Note that if you're going to try to match "-MANUFACTURER" here (say,
# "-sun"), then you have to tell the case statement up towards the top
# that MANUFACTURER isn't an operating system.  Otherwise, code above
# will signal an error saying that MANUFACTURER isn't an operating
# system, and we'll never get to this point.

kernel=
case $cpu-$vendor in
	score-*)
		os=elf
		;;
	spu-*)
		os=elf
		;;
	*-acorn)
		os=riscix1.2
		;;
	arm*-rebel)
		kernel=linux
		os=gnu
		;;
	arm*-semi)
		os=aout
		;;
	c4x-* | tic4x-*)
		os=coff
		;;
	c8051-*)
		os=elf
		;;
	clipper-intergraph)
		os=clix
		;;
	hexagon-*)
		os=elf
		;;
	tic54x-*)
		os=coff
		;;
	tic55x-*)
		os=coff
		;;
	tic6x-*)
		os=coff
		;;
	# This must come before the *-dec entry.
	pdp10-*)
		os=tops20
		;;
	pdp11-*)
		os=none
		;;
	*-dec | vax-*)
		os=ultrix4.2
		;;
	m68*-apollo)
		os=domain
		;;
	i386-sun)
		os=sunos4.0.2
		;;
	m68000-sun)
		os=sunos3
		;;
	m68*-cisco)
		os=aout
		;;
	mep-*)
		os=elf
		;;
	mips*-cisco)
		os=elf
		;;
	mips*-*)
		os=elf
		;;
	or32-*)
		os=coff
		;;
	*-tti)	# must be before sparc entry or we get the wrong os.
		os=sysv3
		;;
	sparc-* | *-sun)
		os=sunos4.1.1
		;;
	pru-*)
		os=elf
		;;
	*-be)
		os=beos
		;;
	*-ibm)
		os=aix
		;;
	*-knuth)
		os=mmixware
		;;
	*-wec)
		os=proelf
		;;
	*-winbond)
		os=proelf
		;;
	*-oki)
		os=proelf
		;;
	*-hp)
		os=hpux
		;;
	*-hitachi)
		os=hiux
		;;
	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
		os=sysv
		;;
	*-cbm)
		os=amigaos
		;;
	*-dg)
		os=dgux
		;;
	*-dolphin)
		os=sysv3
		;;
	m68k-ccur)
		os=rtu
		;;
	m88k-omron*)
		os=luna
		;;
	*-next)
		os=nextstep
		;;
	*-sequent)
		os=ptx
		;;
	*-crds)
		os=unos
		;;
	*-ns)
		os=genix
		;;
	i370-*)
		os=mvs
		;;
	*-gould)
		os=sysv
		;;
	*-highlevel)
		os=bsd
		;;
	*-encore)
		os=bsd
		;;
	*-sgi)
		os=irix
		;;
	*-siemens)
		os=sysv4
		;;
	*-masscomp)
		os=rtu
		;;
	f30[01]-fujitsu | f700-fujitsu)
		os=uxpv
		;;
	*-rom68k)
		os=coff
		;;
	*-*bug)
		os=coff
		;;
	*-apple)
		os=macos
		;;
	*-atari*)
		os=mint
		;;
	*-wrs)
		os=vxworks
		;;
	*)
		os=none
		;;
esac

fi

# Now, validate our (potentially fixed-up) OS.
case $os in
	# Sometimes we do "kernel-libc", so those need to count as OSes.
	musl* | newlib* | relibc* | uclibc*)
		;;
	# Likewise for "kernel-abi"
	eabi* | gnueabi*)
		;;
	# VxWorks passes extra cpu info in the 4th filed.
	simlinux | simwindows | spe)
		;;
	# Now accept the basic system types.
	# The portable systems comes first.
	# Each alternative MUST end in a * to match a version number.
	gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
	     | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
	     | hiux* | abug | nacl* | netware* | windows* \
	     | os9* | macos* | osx* | ios* \
	     | mpw* | magic* | mmixware* | mon960* | lnews* \
	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
	     | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
	     | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
	     | bitrig* | openbsd* | secbsd* | solidbsd* | libertybsd* | os108* \
	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
	     | chorusrdb* | cegcc* | glidix* | serenity* \
	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
	     | midipix* | mingw32* | mingw64* | mint* \
	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
	     | interix* | uwin* | mks* | rhapsody* | darwin* \
	     | openstep* | oskit* | conix* | pw32* | nonstopux* \
	     | storm-chaos* | tops10* | tenex* | tops20* | its* \
	     | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
	     | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
	     | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
	     | skyos* | haiku* | rdos* | toppers* | drops* | es* \
	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
	     | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr* \
	     | fiwix* )
		;;
	# This one is extra strict with allowed versions
	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
		# Don't forget version if it is 3.2v4 or newer.
		;;
	none)
		;;
	*)
		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
		exit 1
		;;
esac

# As a final step for OS-related things, validate the OS-kernel combination
# (given a valid OS), if there is a kernel.
case $kernel-$os in
	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* \
		   | linux-musl* | linux-relibc* | linux-uclibc* )
		;;
	uclinux-uclibc* )
		;;
	-dietlibc* | -newlib* | -musl* | -relibc* | -uclibc* )
		# These are just libc implementations, not actual OSes, and thus
		# require a kernel.
		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
		exit 1
		;;
	kfreebsd*-gnu* | kopensolaris*-gnu*)
		;;
	vxworks-simlinux | vxworks-simwindows | vxworks-spe)
		;;
	nto-qnx*)
		;;
	os2-emx)
		;;
	*-eabi* | *-gnueabi*)
		;;
	-*)
		# Blank kernel with real OS is always fine.
		;;
	*-*)
		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
		exit 1
		;;
esac

# Here we handle the case where we know the os, and the CPU type, but not the
# manufacturer.  We pick the logical manufacturer.
case $vendor in
	unknown)
		case $cpu-$os in
			*-riscix*)
				vendor=acorn
				;;
			*-sunos*)
				vendor=sun
				;;
			*-cnk* | *-aix*)
				vendor=ibm
				;;
			*-beos*)
				vendor=be
				;;
			*-hpux*)
				vendor=hp
				;;
			*-mpeix*)
				vendor=hp
				;;
			*-hiux*)
				vendor=hitachi
				;;
			*-unos*)
				vendor=crds
				;;
			*-dgux*)
				vendor=dg
				;;
			*-luna*)
				vendor=omron
				;;
			*-genix*)
				vendor=ns
				;;
			*-clix*)
				vendor=intergraph
				;;
			*-mvs* | *-opened*)
				vendor=ibm
				;;
			*-os400*)
				vendor=ibm
				;;
			s390-* | s390x-*)
				vendor=ibm
				;;
			*-ptx*)
				vendor=sequent
				;;
			*-tpf*)
				vendor=ibm
				;;
			*-vxsim* | *-vxworks* | *-windiss*)
				vendor=wrs
				;;
			*-aux*)
				vendor=apple
				;;
			*-hms*)
				vendor=hitachi
				;;
			*-mpw* | *-macos*)
				vendor=apple
				;;
			*-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*)
				vendor=atari
				;;
			*-vos*)
				vendor=stratus
				;;
		esac
		;;
esac

echo "$cpu-$vendor-${kernel:+$kernel-}$os"
exit

# Local variables:
# eval: (add-hook 'before-save-hook 'time-stamp)
# time-stamp-start: "timestamp='"
# time-stamp-format: "%:y-%02m-%02d"
# time-stamp-end: "'"
# End:


================================================
FILE: audio/paddleaudio/third_party/patches/libmad.patch
================================================
See the followings for the origin of this patch
http://www.linuxfromscratch.org/blfs/view/svn/multimedia/libmad.html
http://www.linuxfromscratch.org/patches/blfs/svn/libmad-0.15.1b-fixes-1.patch
--- src/libmad/configure	2004-02-05 09:34:07.000000000 +0000
+++ src/libmad/configure.new	2020-06-30 21:10:28.528018931 +0000
@@ -19083,71 +19083,7 @@
 
 if test "$GCC" = yes
 then
-    if test -z "$arch"
-    then
-	case "$host" in
-	    i386-*)           ;;
-	    i?86-*)           arch="-march=i486" ;;
-	    arm*-empeg-*)     arch="-march=armv4 -mtune=strongarm1100" ;;
-	    armv4*-*)         arch="-march=armv4 -mtune=strongarm" ;;
-	    powerpc-*)        ;;
-	    mips*-agenda-*)   arch="-mcpu=vr4100" ;;
-	    mips*-luxsonor-*) arch="-mips1 -mcpu=r3000 -Wa,-m4010" ;;
-	esac
-    fi
-
-    case "$optimize" in
-	-O|"-O "*)
-	    optimize="-O"
-	    optimize="$optimize -fforce-mem"
-	    optimize="$optimize -fforce-addr"
-	    : #x optimize="$optimize -finline-functions"
-	    : #- optimize="$optimize -fstrength-reduce"
-	    optimize="$optimize -fthread-jumps"
-	    optimize="$optimize -fcse-follow-jumps"
-	    optimize="$optimize -fcse-skip-blocks"
-	    : #x optimize="$optimize -frerun-cse-after-loop"
-	    : #x optimize="$optimize -frerun-loop-opt"
-	    : #x optimize="$optimize -fgcse"
-	    optimize="$optimize -fexpensive-optimizations"
-	    optimize="$optimize -fregmove"
-	    : #* optimize="$optimize -fdelayed-branch"
-	    : #x optimize="$optimize -fschedule-insns"
-	    optimize="$optimize -fschedule-insns2"
-	    : #? optimize="$optimize -ffunction-sections"
-	    : #? optimize="$optimize -fcaller-saves"
-	    : #> optimize="$optimize -funroll-loops"
-	    : #> optimize="$optimize -funroll-all-loops"
-	    : #x optimize="$optimize -fmove-all-movables"
-	    : #x optimize="$optimize -freduce-all-givs"
-	    : #? optimize="$optimize -fstrict-aliasing"
-	    : #* optimize="$optimize -fstructure-noalias"
-
-	    case "$host" in
-		arm*-*)
-		    optimize="$optimize -fstrength-reduce"
-		    ;;
-		mips*-*)
-		    optimize="$optimize -fstrength-reduce"
-		    optimize="$optimize -finline-functions"
-		    ;;
-		i?86-*)
-		    optimize="$optimize -fstrength-reduce"
-		    ;;
-		powerpc-apple-*)
-		    # this triggers an internal compiler error with gcc2
-		    : #optimize="$optimize -fstrength-reduce"
-
-		    # this is really only beneficial with gcc3
-		    : #optimize="$optimize -finline-functions"
-		    ;;
-		*)
-		    # this sometimes provokes bugs in gcc 2.95.2
-		    : #optimize="$optimize -fstrength-reduce"
-		    ;;
-	    esac
-	    ;;
-    esac
+    optimize="-O2"
 fi
 
 case "$host" in
@@ -21497,6 +21433,7 @@
 then
     case "$host" in
 	i?86-*)     FPM="INTEL"  ;;
+	x86_64*)    FPM="64BIT"  ;;
 	arm*-*)     FPM="ARM"    ;;
 	mips*-*)    FPM="MIPS"   ;;
 	sparc*-*)   FPM="SPARC"  ;;


================================================
FILE: audio/paddleaudio/third_party/patches/sox.patch
================================================
See https://github.com/pytorch/audio/pull/1297
diff -ru sox/src/formats.c sox/src/formats.c
--- sox/src/formats.c	2014-10-26 19:55:50.000000000 -0700
+++ sox/src/formats.c	2021-02-22 16:01:02.833144070 -0800
@@ -333,6 +333,10 @@
   assert(ft);
   if (!ft->fp)
     return sox_false;
-  fstat(fileno((FILE*)ft->fp), &st);
+  int fd = fileno((FILE*)ft->fp);
+  if (fd < 0)
+    return sox_false;
+  if (fstat(fd, &st) < 0)
+    return sox_false;
   return ((st.st_mode & S_IFMT) == S_IFREG);
 }


================================================
FILE: audio/paddleaudio/third_party/sox/CMakeLists.txt
================================================
find_package(PkgConfig REQUIRED)

include(ExternalProject)

set(INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../install)
set(ARCHIVE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../archives)
set(patch_dir ${CMAKE_CURRENT_SOURCE_DIR}/../patches)
set(COMMON_ARGS --quiet --disable-shared --enable-static --prefix=${INSTALL_DIR} --with-pic --disable-dependency-tracking --disable-debug --disable-examples --disable-doc)

# To pass custom environment variables to ExternalProject_Add command,
# we need to do `${CMAKE_COMMAND} -E env ${envs} <COMMAND>`.
# https://stackoverflow.com/a/62437353
# We construct the custom environment variables here
set(envs
  "PKG_CONFIG_PATH=${INSTALL_DIR}/lib/pkgconfig"
  "LDFLAGS=-L${INSTALL_DIR}/lib $ENV{LDFLAGS}"
  "CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden $ENV{CFLAGS}"
)

if (BUILD_MAD)
  ExternalProject_Add(mad
    PREFIX ${CMAKE_CURRENT_BINARY_DIR}
    DOWNLOAD_DIR ${ARCHIVE_DIR}
    URL https://downloads.sourceforge.net/project/mad/libmad/0.15.1b/libmad-0.15.1b.tar.gz
    URL_HASH SHA256=bbfac3ed6bfbc2823d3775ebb931087371e142bb0e9bb1bee51a76a6e0078690
    PATCH_COMMAND patch < ${patch_dir}/libmad.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/mad/
    CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/mad/configure ${COMMON_ARGS}
    DOWNLOAD_NO_PROGRESS ON
    LOG_DOWNLOAD ON
    LOG_UPDATE ON
    LOG_CONFIGURE ON
    LOG_BUILD ON
    LOG_INSTALL ON
    LOG_MERGED_STDOUTERR ON
    LOG_OUTPUT_ON_FAILURE ON
  )
endif (BUILD_MAD)

ExternalProject_Add(amr
  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
  DOWNLOAD_DIR ${ARCHIVE_DIR}
  URL https://sourceforge.net/projects/opencore-amr/files/opencore-amr/opencore-amr-0.1.5.tar.gz
  URL_HASH SHA256=2c006cb9d5f651bfb5e60156dbff6af3c9d35c7bbcc9015308c0aff1e14cd341
  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/amr/
  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/amr/configure ${COMMON_ARGS}
  DOWNLOAD_NO_PROGRESS ON
  LOG_DOWNLOAD ON
  LOG_UPDATE ON
  LOG_CONFIGURE ON
  LOG_BUILD ON
  LOG_INSTALL ON
  LOG_MERGED_STDOUTERR ON
  LOG_OUTPUT_ON_FAILURE ON
)

ExternalProject_Add(lame
  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
  DOWNLOAD_DIR ${ARCHIVE_DIR}
  URL https://downloads.sourceforge.net/project/lame/lame/3.99/lame-3.99.5.tar.gz
  URL_HASH SHA256=24346b4158e4af3bd9f2e194bb23eb473c75fb7377011523353196b19b9a23ff
  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/lame/
  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/lame/configure ${COMMON_ARGS} --enable-nasm
  DOWNLOAD_NO_PROGRESS ON
  LOG_DOWNLOAD ON
  LOG_UPDATE ON
  LOG_CONFIGURE ON
  LOG_BUILD ON
  LOG_INSTALL ON
  LOG_MERGED_STDOUTERR ON
  LOG_OUTPUT_ON_FAILURE ON
)

ExternalProject_Add(ogg
  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
  DOWNLOAD_DIR ${ARCHIVE_DIR}
  URL https://ftp.osuosl.org/pub/xiph/releases/ogg/libogg-1.3.3.tar.gz
  URL_HASH SHA256=c2e8a485110b97550f453226ec644ebac6cb29d1caef2902c007edab4308d985
  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/ogg/
  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/ogg/configure ${COMMON_ARGS}
  DOWNLOAD_NO_PROGRESS ON
  LOG_DOWNLOAD ON
  LOG_UPDATE ON
  LOG_CONFIGURE ON
  LOG_BUILD ON
  LOG_INSTALL ON
  LOG_MERGED_STDOUTERR ON
  LOG_OUTPUT_ON_FAILURE ON
)

ExternalProject_Add(flac
  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
  DEPENDS ogg
  DOWNLOAD_DIR ${ARCHIVE_DIR}
  URL https://ftp.osuosl.org/pub/xiph/releases/flac/flac-1.3.2.tar.xz
  URL_HASH SHA256=91cfc3ed61dc40f47f050a109b08610667d73477af6ef36dcad31c31a4a8d53f
  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/flac/
  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/flac/configure ${COMMON_ARGS} --with-ogg --disable-cpplibs
  DOWNLOAD_NO_PROGRESS ON
  LOG_DOWNLOAD ON
  LOG_UPDATE ON
  LOG_CONFIGURE ON
  LOG_BUILD ON
  LOG_INSTALL ON
  LOG_MERGED_STDOUTERR ON
  LOG_OUTPUT_ON_FAILURE ON
)

ExternalProject_Add(vorbis
  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
  DEPENDS ogg
  DOWNLOAD_DIR ${ARCHIVE_DIR}
  URL https://ftp.osuosl.org/pub/xiph/releases/vorbis/libvorbis-1.3.6.tar.gz
  URL_HASH SHA256=6ed40e0241089a42c48604dc00e362beee00036af2d8b3f46338031c9e0351cb
  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/vorbis/
  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/vorbis/configure ${COMMON_ARGS} --with-ogg
  DOWNLOAD_NO_PROGRESS ON
  LOG_DOWNLOAD ON
  LOG_UPDATE ON
  LOG_CONFIGURE ON
  LOG_BUILD ON
  LOG_INSTALL ON
  LOG_MERGED_STDOUTERR ON
  LOG_OUTPUT_ON_FAILURE ON
)

ExternalProject_Add(opus
  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
  DEPENDS ogg
  DOWNLOAD_DIR ${ARCHIVE_DIR}
  URL https://ftp.osuosl.org/pub/xiph/releases/opus/opus-1.3.1.tar.gz
  URL_HASH SHA256=65b58e1e25b2a114157014736a3d9dfeaad8d41be1c8179866f144a2fb44ff9d
  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/opus/
  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/opus/configure ${COMMON_ARGS} --with-ogg
  DOWNLOAD_NO_PROGRESS ON
  LOG_DOWNLOAD ON
  LOG_UPDATE ON
  LOG_CONFIGURE ON
  LOG_BUILD ON
  LOG_INSTALL ON
  LOG_MERGED_STDOUTERR ON
  LOG_OUTPUT_ON_FAILURE ON
)

ExternalProject_Add(opusfile
  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
  DEPENDS opus
  DOWNLOAD_DIR ${ARCHIVE_DIR}
  URL https://ftp.osuosl.org/pub/xiph/releases/opus/opusfile-0.12.tar.gz
  URL_HASH SHA256=118d8601c12dd6a44f52423e68ca9083cc9f2bfe72da7a8c1acb22a80ae3550b
  PATCH_COMMAND cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/opusfile/
  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/opusfile/configure ${COMMON_ARGS} --disable-http
  DOWNLOAD_NO_PROGRESS ON
  LOG_DOWNLOAD ON
  LOG_UPDATE ON
  LOG_CONFIGURE ON
  LOG_BUILD ON
  LOG_INSTALL ON
  LOG_MERGED_STDOUTERR ON
  LOG_OUTPUT_ON_FAILURE ON
)

# OpenMP is by default compiled against GNU OpenMP, which conflicts with the version of OpenMP that PyTorch uses.
# See https://github.com/pytorch/audio/pull/1026
# TODO: Add flags like https://github.com/suphoff/pytorch_parallel_extension_cpp/blob/master/setup.py
set(SOX_OPTIONS
  --disable-openmp
  --with-amrnb
  --with-amrwb
  --with-flac
  --with-lame
  --with-oggvorbis
  --with-opus
  --without-alsa
  --without-ao
  --without-coreaudio
  --without-oss
  --without-id3tag
  --without-ladspa
  --without-magic
  --without-png
  --without-pulseaudio
  --without-sndfile
  --without-sndio
  --without-sunaudio
  --without-waveaudio
  --without-wavpack
  --without-twolame
  )

set(SOX_LIBRARIES
  ${INSTALL_DIR}/lib/libsox.a
  ${INSTALL_DIR}/lib/libopencore-amrnb.a
  ${INSTALL_DIR}/lib/libopencore-amrwb.a
  ${INSTALL_DIR}/lib/libmp3lame.a
  ${INSTALL_DIR}/lib/libFLAC.a
  ${INSTALL_DIR}/lib/libopusfile.a
  ${INSTALL_DIR}/lib/libopus.a
  ${INSTALL_DIR}/lib/libvorbisenc.a
  ${INSTALL_DIR}/lib/libvorbisfile.a
  ${INSTALL_DIR}/lib/libvorbis.a
  ${INSTALL_DIR}/lib/libogg.a
  )

set(sox_depends
  ogg flac vorbis opusfile lame amr
  )

if (BUILD_MAD)
  list(
    APPEND
    SOX_OPTIONS
    --with-mad
    )
  list(
    APPEND
    SOX_LIBRARIES
    ${INSTALL_DIR}/lib/libmad.a
    )
  list(
    APPEND
    sox_depends
    mad
    )
else ()
  list(
    APPEND
    SOX_OPTIONS
    --without-mad
    )  
endif (BUILD_MAD)

ExternalProject_Add(sox
  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
  DEPENDS ${sox_depends}
  DOWNLOAD_DIR ${ARCHIVE_DIR}
  URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
  URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
  PATCH_COMMAND patch -p1 < ${patch_dir}/sox.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/sox/
  CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/sox/configure ${COMMON_ARGS} ${SOX_OPTIONS}
  BUILD_BYPRODUCTS ${SOX_LIBRARIES}
  DOWNLOAD_NO_PROGRESS ON
  LOG_DOWNLOAD ON
  LOG_UPDATE ON
  LOG_CONFIGURE ON
  LOG_BUILD ON
  LOG_INSTALL ON
  LOG_MERGED_STDOUTERR ON
  LOG_OUTPUT_ON_FAILURE ON
)

add_library(libsox INTERFACE)
add_dependencies(libsox sox)
target_include_directories(libsox INTERFACE ${INSTALL_DIR}/include)
target_link_libraries(libsox INTERFACE ${SOX_LIBRARIES})

================================================
FILE: audio/paddleaudio/utils/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .download import decompress
from .download import download_and_decompress
from .download import load_state_dict_from_url
from .env import DATA_HOME
from .env import MODEL_HOME
from .env import PPAUDIO_HOME
from .env import USER_HOME
from .error import ParameterError
from .log import Logger
from .log import logger
from .numeric import depth_convert
from .numeric import pcm16to32
from .time import seconds_to_hms
from .time import Timer


================================================
FILE: audio/paddleaudio/utils/download.py
================================================
# Copyright (c) 2021  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Dict
from typing import List

from paddle.framework import load as load_state_dict
from paddle.utils import download

from .log import logger

download.logger = logger

__all__ = [
    'decompress',
    'download_and_decompress',
    'load_state_dict_from_url',
]


def decompress(file: str):
    """
    Extracts all files from a compressed file.
    """
    assert os.path.isfile(file), "File: {} not exists.".format(file)
    download._decompress(file)


def download_and_decompress(archives: List[Dict[str, str]],
                            path: str,
                            decompress: bool=True):
    """
    Download archives and decompress to specific path.
    """
    if not os.path.isdir(path):
        os.makedirs(path)

    for archive in archives:
        assert 'url' in archive and 'md5' in archive, \
            'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archive.keys())}'
        download.get_path_from_url(
            archive['url'], path, archive['md5'], decompress=decompress)


def load_state_dict_from_url(url: str, path: str, md5: str=None):
    """
    Download and load a state dict from url
    """
    if not os.path.isdir(path):
        os.makedirs(path)

    download.get_path_from_url(url, path, md5)
    return load_state_dict(os.path.join(path, os.path.basename(url)))


================================================
FILE: audio/paddleaudio/utils/env.py
================================================
# Copyright (c) 2021  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
This module is used to store environmental variables in PaddleAudio.
PPAUDIO_HOME     -->  the root directory for storing PaddleAudio related data. Default to ~/.paddleaudio. Users can change the
├                            default value through the PPAUDIO_HOME environment variable.
├─ MODEL_HOME    -->  Store model files.
└─ DATA_HOME     -->  Store automatically downloaded datasets.
'''
import os

__all__ = [
    'USER_HOME',
    'PPAUDIO_HOME',
    'MODEL_HOME',
    'DATA_HOME',
]


def _get_user_home():
    return os.path.expanduser('~')


def _get_ppaudio_home():
    if 'PPAUDIO_HOME' in os.environ:
        home_path = os.environ['PPAUDIO_HOME']
        if os.path.exists(home_path):
            if os.path.isdir(home_path):
                return home_path
            else:
                raise RuntimeError(
                    'The environment variable PPAUDIO_HOME {} is not a directory.'.
                    format(home_path))
        else:
            return home_path
    return os.path.join(_get_user_home(), '.paddleaudio')


def _get_sub_home(directory):
    home = os.path.join(_get_ppaudio_home(), directory)
    if not os.path.exists(home):
        os.makedirs(home)
    return home


USER_HOME = _get_user_home()
PPAUDIO_HOME = _get_ppaudio_home()
MODEL_HOME = _get_sub_home('models')
DATA_HOME = _get_sub_home('datasets')


================================================
FILE: audio/paddleaudio/utils/error.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = ['ParameterError']


class ParameterError(Exception):
    """Exception class for Parameter checking"""
    pass


================================================
FILE: audio/paddleaudio/utils/log.py
================================================
# Copyright (c) 2021  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import functools
import logging
import threading
import time

import colorlog

__all__ = [
    'Logger',
    'logger',
]

log_config = {
    'DEBUG': {
        'level': 10,
        'color': 'purple'
    },
    'INFO': {
        'level': 20,
        'color': 'green'
    },
    'TRAIN': {
        'level': 21,
        'color': 'cyan'
    },
    'EVAL': {
        'level': 22,
        'color': 'blue'
    },
    'WARNING': {
        'level': 30,
        'color': 'yellow'
    },
    'ERROR': {
        'level': 40,
        'color': 'red'
    },
    'CRITICAL': {
        'level': 50,
        'color': 'bold_red'
    }
}


class Logger(object):
    '''
    Default logger in PaddleAudio
    Args:
        name(str) : Logger name, default is 'PaddleAudio'
    '''

    def __init__(self, name: str=None):
        name = 'PaddleAudio' if not name else name
        self.logger = logging.getLogger(name)

        for key, conf in log_config.items():
            logging.addLevelName(conf['level'], key)
            self.__dict__[key] = functools.partial(self.__call__, conf['level'])
            self.__dict__[key.lower()] = functools.partial(self.__call__,
                                                           conf['level'])

        self.format = colorlog.ColoredFormatter(
            '%(log_color)s[%(asctime)-15s] [%(levelname)8s]%(reset)s - %(message)s',
            log_colors={key: conf['color']
                        for key, conf in log_config.items()})

        self.handler = logging.StreamHandler()
        self.handler.setFormatter(self.format)

        self.logger.addHandler(self.handler)
        self.logLevel = 'DEBUG'
        self.logger.setLevel(logging.DEBUG)
        self.logger.propagate = False
        self._is_enable = True

    def disable(self):
        self._is_enable = False

    def enable(self):
        self._is_enable = True

    @property
    def is_enable(self) -> bool:
        return self._is_enable

    def __call__(self, log_level: str, msg: str):
        if not self.is_enable:
            return

        self.logger.log(log_level, msg)

    @contextlib.contextmanager
    def use_terminator(self, terminator: str):
        old_terminator = self.handler.terminator
        self.handler.terminator = terminator
        yield
        self.handler.terminator = old_terminator

    @contextlib.contextmanager
    def processing(self, msg: str, interval: float=0.1):
        '''
        Continuously print a progress bar with rotating special effects.
        Args:
            msg(str): Message to be printed.
            interval(float): Rotation interval. Default to 0.1.
        '''
        end = False

        def _printer():
            index = 0
            flags = ['\\', '|', '/', '-']
            while not end:
                flag = flags[index % len(flags)]
                with self.use_terminator('\r'):
                    self.info('{}: {}'.format(msg, flag))
                time.sleep(interval)
                index += 1

        t = threading.Thread(target=_printer)
        t.start()
        yield
        end = True


logger = Logger()


================================================
FILE: audio/paddleaudio/utils/numeric.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Union

import numpy as np

__all__ = ["pcm16to32", "depth_convert"]


def pcm16to32(audio: np.ndarray) -> np.ndarray:
    """pcm int16 to float32

    Args:
        audio (np.ndarray): Waveform with dtype of int16.

    Returns:
        np.ndarray: Waveform with dtype of float32.
    """
    if audio.dtype == np.int16:
        audio = audio.astype("float32")
        bits = np.iinfo(np.int16).bits
        audio = audio / (2**(bits - 1))
    return audio


def _safe_cast(y: np.ndarray, dtype: Union[type, str]) -> np.ndarray:
    """Data type casting in a safe way, i.e., prevent overflow or underflow.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        dtype (Union[type, str]): Data type of waveform.

    Returns:
        np.ndarray: `y` after safe casting.
    """
    if 'float' in str(y.dtype):
        return np.clip(y, np.finfo(dtype).min,
                       np.finfo(dtype).max).astype(dtype)
    else:
        return np.clip(y, np.iinfo(dtype).min,
                       np.iinfo(dtype).max).astype(dtype)


def depth_convert(y: np.ndarray, dtype: Union[type, str]) -> np.ndarray:
    """Convert audio array to target dtype safely. 
    This function convert audio waveform to a target dtype, with addition steps of
    preventing overflow/underflow and preserving audio range.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        dtype (Union[type, str]): Data type of waveform.

    Returns:
        np.ndarray: `y` after safe casting.
    """

    SUPPORT_DTYPE = ['int16', 'int8', 'float32', 'float64']
    if y.dtype not in SUPPORT_DTYPE:
        raise ParameterError(
            'Unsupported audio dtype, '
            f'y.dtype is {y.dtype}, supported dtypes are {SUPPORT_DTYPE}')

    if dtype not in SUPPORT_DTYPE:
        raise ParameterError(
            'Unsupported audio dtype, '
            f'target dtype  is {dtype}, supported dtypes are {SUPPORT_DTYPE}')

    if dtype == y.dtype:
        return y

    if dtype == 'float64' and y.dtype == 'float32':
        return _safe_cast(y, dtype)
    if dtype == 'float32' and y.dtype == 'float64':
        return _safe_cast(y, dtype)

    if dtype == 'int16' or dtype == 'int8':
        if y.dtype in ['float64', 'float32']:
            factor = np.iinfo(dtype).max
            y = np.clip(y * factor, np.iinfo(dtype).min,
                        np.iinfo(dtype).max).astype(dtype)
            y = y.astype(dtype)
        else:
            if dtype == 'int16' and y.dtype == 'int8':
                factor = np.iinfo('int16').max / np.iinfo('int8').max - EPS
                y = y.astype('float32') * factor
                y = y.astype('int16')

            else:  # dtype == 'int8' and y.dtype=='int16':
                y = y.astype('int32') * np.iinfo('int8').max / \
                    np.iinfo('int16').max
                y = y.astype('int8')

    if dtype in ['float32', 'float64']:
        org_dtype = y.dtype
        y = y.astype(dtype) / np.iinfo(org_dtype).max
    return y


================================================
FILE: audio/paddleaudio/utils/sox_utils.py
================================================
from typing import Dict
from typing import List

import paddleaudio
from paddleaudio._internal import module_utils as _mod_utils


@_mod_utils.requires_sox()
def set_seed(seed: int):
    """Set libsox's PRNG

    Args:
        seed (int): seed value. valid range is int32.

    See Also:
        http://sox.sourceforge.net/sox.html
    """
    paddleaudio._paddleaudio.sox_utils_set_seed(seed)


@_mod_utils.requires_sox()
def set_verbosity(verbosity: int):
    """Set libsox's verbosity

    Args:
        verbosity (int): Set verbosity level of libsox.

            * ``1`` failure messages
            * ``2`` warnings
            * ``3`` details of processing
            * ``4``-``6`` increasing levels of debug messages

    See Also:
        http://sox.sourceforge.net/sox.html
    """
    paddleaudio._paddleaudio.sox_utils_set_verbosity(verbosity)


@_mod_utils.requires_sox()
def set_buffer_size(buffer_size: int):
    """Set buffer size for sox effect chain

    Args:
        buffer_size (int): Set the size in bytes of the buffers used for processing audio.

    See Also:
        http://sox.sourceforge.net/sox.html
    """
    paddleaudio._paddleaudio.sox_utils_set_buffer_size(buffer_size)


@_mod_utils.requires_sox()
def set_use_threads(use_threads: bool):
    """Set multithread option for sox effect chain

    Args:
        use_threads (bool): When ``True``, enables ``libsox``'s parallel effects channels processing.
            To use multithread, the underlying ``libsox`` has to be compiled with OpenMP support.

    See Also:
        http://sox.sourceforge.net/sox.html
    """
    paddleaudio._paddleaudio.sox_utils_set_use_threads(use_threads)


@_mod_utils.requires_sox()
def list_effects() -> Dict[str, str]:
    """List the available sox effect names

    Returns:
        Dict[str, str]: Mapping from ``effect name`` to ``usage``
    """
    return dict(paddleaudio._paddleaudio.sox_utils_list_effects())


@_mod_utils.requires_sox()
def list_read_formats() -> List[str]:
    """List the supported audio formats for read

    Returns:
        List[str]: List of supported audio formats
    """
    return paddleaudio._paddleaudio.sox_utils_list_read_formats()


@_mod_utils.requires_sox()
def list_write_formats() -> List[str]:
    """List the supported audio formats for write

    Returns:
        List[str]: List of supported audio formats
    """
    return paddleaudio._paddleaudio.sox_utils_list_write_formats()


@_mod_utils.requires_sox()
def get_buffer_size() -> int:
    """Get buffer size for sox effect chain

    Returns:
        int: size in bytes of buffers used for processing audio.
    """
    return paddleaudio._paddleaudio.sox_utils_get_buffer_size()


================================================
FILE: audio/paddleaudio/utils/tensor_utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for Transformer."""
from typing import List
from typing import Tuple

import paddle

from .log import Logger

__all__ = ["pad_sequence", "add_sos_eos", "th_accuracy", "has_tensor"]

logger = Logger(__name__)


def has_tensor(val):
    if isinstance(val, (list, tuple)):
        for item in val:
            if has_tensor(item):
                return True
    elif isinstance(val, dict):
        for k, v in val.items():
            print(k)
            if has_tensor(v):
                return True
    else:
        return paddle.is_tensor(val)


def pad_sequence(sequences: List[paddle.Tensor],
                 batch_first: bool=False,
                 padding_value: float=0.0) -> paddle.Tensor:
    r"""Pad a list of variable length Tensors with ``padding_value``

    ``pad_sequence`` stacks a list of Tensors along a new dimension,
    and pads them to equal length. For example, if the input is list of
    sequences with size ``L x *`` and if batch_first is False, and ``T x B x *``
    otherwise.

    `B` is batch size. It is equal to the number of elements in ``sequences``.
    `T` is length of the longest sequence.
    `L` is length of the sequence.
    `*` is any number of trailing dimensions, including none.

    Example:
        >>> from paddle.nn.utils.rnn import pad_sequence
        >>> a = paddle.ones(25, 300)
        >>> b = paddle.ones(22, 300)
        >>> c = paddle.ones(15, 300)
        >>> pad_sequence([a, b, c]).shape
        paddle.Tensor([25, 3, 300])

    Note:
        This function returns a Tensor of size ``T x B x *`` or ``B x T x *``
        where `T` is the length of the longest sequence. This function assumes
        trailing dimensions and type of all the Tensors in sequences are same.

    Args:
        sequences (list[Tensor]): list of variable length sequences.
        batch_first (bool, optional): output will be in ``B x T x *`` if True, or in
            ``T x B x *`` otherwise
        padding_value (float, optional): value for padded elements. Default: 0.

    Returns:
        Tensor of size ``T x B x *`` if :attr:`batch_first` is ``False``.
        Tensor of size ``B x T x *`` otherwise
    """

    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
    max_size = paddle.shape(sequences[0])
    # (TODO Hui Zhang): slice not support `end==start`
    # trailing_dims = max_size[1:]
    trailing_dims = tuple(
        max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
    max_len = max([s.shape[0] for s in sequences])
    if batch_first:
        out_dims = (len(sequences), max_len) + trailing_dims
    else:
        out_dims = (max_len, len(sequences)) + trailing_dims
    out_tensor = paddle.full(out_dims, padding_value, sequences[0].dtype)
    for i, tensor in enumerate(sequences):
        length = tensor.shape[0]
        # use index notation to prevent duplicate references to the tensor
        if batch_first:
            # TODO (Hui Zhang): set_value op not support `end==start`
            # TODO (Hui Zhang): set_value op not support int16
            # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
            # out_tensor[i, :length, ...] = tensor
            if length != 0:
                out_tensor[i, :length] = tensor
            else:
                out_tensor[i, length] = tensor
        else:
            # TODO (Hui Zhang): set_value op not support `end==start`
            # out_tensor[:length, i, ...] = tensor
            if length != 0:
                out_tensor[:length, i] = tensor
            else:
                out_tensor[length, i] = tensor

    return out_tensor


def add_sos_eos(ys_pad: paddle.Tensor, sos: int, eos: int,
                ignore_id: int) -> Tuple[paddle.Tensor, paddle.Tensor]:
    """Add <sos> and <eos> labels.
    Args:
        ys_pad (paddle.Tensor): batch of padded target sequences (B, Lmax)
        sos (int): index of <sos>
        eos (int): index of <eeos>
        ignore_id (int): index of padding
    Returns:
        ys_in (paddle.Tensor) : (B, Lmax + 1)
        ys_out (paddle.Tensor) : (B, Lmax + 1)
    Examples:
        >>> sos_id = 10
        >>> eos_id = 11
        >>> ignore_id = -1
        >>> ys_pad
        tensor([[ 1,  2,  3,  4,  5],
                [ 4,  5,  6, -1, -1],
                [ 7,  8,  9, -1, -1]], dtype=paddle.int32)
        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
        >>> ys_in
        tensor([[10,  1,  2,  3,  4,  5],
                [10,  4,  5,  6, 11, 11],
                [10,  7,  8,  9, 11, 11]])
        >>> ys_out
        tensor([[ 1,  2,  3,  4,  5, 11],
                [ 4,  5,  6, 11, -1, -1],
                [ 7,  8,  9, 11, -1, -1]])
    """
    # TODO(Hui Zhang): using comment code,
    #_sos = paddle.to_tensor(
    #    [sos], dtype=paddle.long, stop_gradient=True, place=ys_pad.place)
    #_eos = paddle.to_tensor(
    #    [eos], dtype=paddle.long, stop_gradient=True, place=ys_pad.place)
    #ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
    #ys_in = [paddle.cat([_sos, y], dim=0) for y in ys]
    #ys_out = [paddle.cat([y, _eos], dim=0) for y in ys]
    #return pad_sequence(ys_in, padding_value=eos), pad_sequence(ys_out, padding_value=ignore_id)
    B = ys_pad.shape[0]
    _sos = paddle.ones([B, 1], dtype=ys_pad.dtype) * sos
    _eos = paddle.ones([B, 1], dtype=ys_pad.dtype) * eos
    ys_in = paddle.cat([_sos, ys_pad], dim=1)
    mask_pad = (ys_in == ignore_id)
    ys_in = ys_in.masked_fill(mask_pad, eos)

    ys_out = paddle.cat([ys_pad, _eos], dim=1)
    ys_out = ys_out.masked_fill(mask_pad, eos)
    mask_eos = (ys_out == ignore_id)
    ys_out = ys_out.masked_fill(mask_eos, eos)
    ys_out = ys_out.masked_fill(mask_pad, ignore_id)
    return ys_in, ys_out


def th_accuracy(pad_outputs: paddle.Tensor,
                pad_targets: paddle.Tensor,
                ignore_label: int) -> float:
    """Calculate accuracy.
    Args:
        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
        pad_targets (LongTensor): Target label tensors (B, Lmax, D).
        ignore_label (int): Ignore label id.
    Returns:
        float: Accuracy value (0.0 - 1.0).
    """
    pad_pred = pad_outputs.reshape(
        [pad_targets.shape[0], pad_targets.shape[1],
         pad_outputs.shape[1]]).argmax(2)
    mask = pad_targets != ignore_label
    #TODO(Hui Zhang): sum not support bool type
    # numerator = paddle.sum(
    #     pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
    numerator = (
        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
    numerator = paddle.sum(numerator.type_as(pad_targets))
    #TODO(Hui Zhang): sum not support bool type
    # denominator = paddle.sum(mask)
    denominator = paddle.sum(mask.type_as(pad_targets))
    return float(numerator) / float(denominator)


================================================
FILE: audio/paddleaudio/utils/time.py
================================================
# Copyright (c) 2021  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import time

__all__ = [
    'Timer',
    'seconds_to_hms',
]


class Timer(object):
    '''Calculate running speed and estimated time of arrival(ETA)'''

    def __init__(self, total_step: int):
        self.total_step = total_step
        self.last_start_step = 0
        self.current_step = 0
        self._is_running = True

    def start(self):
        self.last_time = time.time()
        self.start_time = time.time()

    def stop(self):
        self._is_running = False
        self.end_time = time.time()

    def count(self) -> int:
        if not self.current_step >= self.total_step:
            self.current_step += 1
        return self.current_step

    @property
    def timing(self) -> float:
        run_steps = self.current_step - self.last_start_step
        self.last_start_step = self.current_step
        time_used = time.time() - self.last_time
        self.last_time = time.time()
        return run_steps / time_used

    @property
    def is_running(self) -> bool:
        return self._is_running

    @property
    def eta(self) -> str:
        if not self.is_running:
            return '00:00:00'
        scale = self.total_step / self.current_step
        remaining_time = (time.time() - self.start_time) * scale
        return seconds_to_hms(remaining_time)


def seconds_to_hms(seconds: int) -> str:
    '''Convert the number of seconds to hh:mm:ss'''
    h = math.floor(seconds / 3600)
    m = math.floor((seconds - h * 3600) / 60)
    s = int(seconds - h * 3600 - m * 60)
    hms_str = '{:0>2}:{:0>2}:{:0>2}'.format(h, m, s)
    return hms_str


================================================
FILE: audio/setup.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import inspect
import io
import os
import platform
import subprocess as sp
import sys
from pathlib import Path
from typing import List
from typing import Tuple
from typing import Union

import distutils.command.clean
from setuptools import Command
from setuptools import find_packages
from setuptools import setup
from setuptools.command.develop import develop
from setuptools.command.test import test

from tools import setup_helpers

ROOT_DIR = Path(__file__).parent.resolve()

VERSION = '1.2.0'
COMMITID = 'none'

base = [
    # paddleaudio align with librosa==0.8.1, which need numpy==1.23.x
    "librosa==0.8.1",
    "numpy==1.23.5",
    "kaldiio",
    "pathos",
    "pybind11",
    "parameterized",
]

requirements = {
    "install": base,
    "develop": [
        "sox",
        "soxbindings",
        "pre-commit",
    ],
}


def check_call(cmd: str, shell=False, executable=None):
    try:
        sp.check_call(
            cmd.split(),
            shell=shell,
            executable="/bin/bash" if shell else executable)
    except sp.CalledProcessError as e:
        print(
            f"{__file__}:{inspect.currentframe().f_lineno}: CMD: {cmd}, Error:",
            e.output,
            file=sys.stderr)
        raise e


def check_output(cmd: Union[str, List[str], Tuple[str]], shell=False):
    try:

        if isinstance(cmd, (list, tuple)):
            cmds = cmd
        else:
            cmds = cmd.split()
        out_bytes = sp.check_output(cmds)

    except sp.CalledProcessError as e:
        out_bytes = e.output  # Output generated before error
        code = e.returncode  # Return code
        print(
            f"{__file__}:{inspect.currentframe().f_lineno}: CMD: {cmd}, Error:",
            out_bytes,
            file=sys.stderr)
    return out_bytes.strip().decode('utf8')


def _run_cmd(cmd):
    try:
        return subprocess.check_output(
            cmd, cwd=ROOT_DIR,
            stderr=subprocess.DEVNULL).decode("ascii").strip()
    except Exception:
        return None


@contextlib.contextmanager
def pushd(new_dir):
    old_dir = os.getcwd()
    os.chdir(new_dir)
    print(new_dir)
    yield
    os.chdir(old_dir)
    print(old_dir)


def read(*names, **kwargs):
    with io.open(
            os.path.join(os.path.dirname(__file__), *names),
            encoding=kwargs.get("encoding", "utf8")) as fp:
        return fp.read()


def _remove(files: str):
    for f in files:
        f.unlink()


################################# Install ##################################


def _post_install(install_lib_dir):
    pass


class DevelopCommand(develop):
    def run(self):
        develop.run(self)
        # must after develop.run, or pkg install by shell will not see
        self.execute(_post_install, (self.install_lib, ), msg="Post Install...")


class TestCommand(test):
    def finalize_options(self):
        test.finalize_options(self)
        self.test_args = []
        self.test_suite = True

    def run_tests(self):
        # Run nose ensuring that argv simulates running nosetests directly
        import nose
        nose.run_exit(argv=['nosetests', '-w', 'tests'])

    def run_benchmark(self):
        for benchmark_item in glob.glob('tests/benchmark/*py'):
            os.system(f'pytest {benchmark_item}')


# cmd: python setup.py upload
class UploadCommand(Command):
    description = "Build and publish the package."
    user_options = []

    def initialize_options(self):
        pass

    def finalize_options(self):
        pass

    def run(self):
        try:
            print("Removing previous dist/ ...")
            shutil.rmtree(str(ROOT_DIR / "dist"))
        except OSError:
            pass
        print("Building source distribution...")
        sp.check_call([sys.executable, "setup.py", "sdist"])
        print("Uploading package to PyPi...")
        sp.check_call(["twine", "upload", "dist/*"])
        sys.exit()


################################# Version ##################################
def _get_version(sha):
    version = VERSION
    if os.getenv("BUILD_VERSION"):
        version = os.getenv("BUILD_VERSION")
    elif sha is not None:
        version += "+" + sha[:7]
    return version


def _make_version_file(version, sha):
    sha = "Unknown" if sha is None else sha
    version_path = ROOT_DIR / "paddleaudio" / "__init__.py"
    with open(version_path, "a") as f:
        f.write(f"__version__ = '{version}'\n")


def _rm_version():
    file_ = ROOT_DIR / "paddleaudio" / "__init__.py"
    with open(file_, "r") as f:
        lines = f.readlines()
    with open(file_, "w") as f:
        for line in lines:
            if "__version__" not in line:
                f.write(line)


################################# Steup ##################################
class clean(distutils.command.clean.clean):
    def run(self):
        # Run default behavior first
        distutils.command.clean.clean.run(self)

        # Remove paddleaudio extension
        for path in (ROOT_DIR / "paddleaudio").glob("**/*.so"):
            print(f"removing '{path}'")
            path.unlink()
        # Remove build directory
        build_dirs = [
            ROOT_DIR / "build",
        ]
        for path in build_dirs:
            if path.exists():
                print(f"removing '{path}' (and everything under it)")
                shutil.rmtree(str(path), ignore_errors=True)


def main():

    sha = _run_cmd(["git", "rev-parse", "HEAD"])  # commit id
    branch = _run_cmd(["git", "rev-parse", "--abbrev-ref", "HEAD"])
    tag = _run_cmd(["git", "describe", "--tags", "--exact-match", "@"])
    print("-- Git branch:", branch)
    print("-- Git SHA:", sha)
    print("-- Git tag:", tag)
    version = _get_version(sha)
    print("-- Building version", version)
    _rm_version()

    _make_version_file(version, sha)
    lib_package_data = {}
    if platform.system() != 'Windows' and platform.system() != 'Linux':
        lib_package_data = {'paddleaudio': ['lib/libgcc_s.1.1.dylib']}

    #if platform.system() == 'Linux':
    #    lib_package_data = {'paddleaudio': ['lib/lib*']}

    setup_info = dict(
        # Metadata
        name='paddleaudio',
        version=VERSION,
        author='PaddlePaddle Speech and Language Team',
        author_email='paddlesl@baidu.com',
        url='https://github.com/PaddlePaddle/PaddleSpeech/audio',
        license='Apache 2.0',
        description='Speech audio tools based on Paddlepaddle',
        keywords=[
            "audio process"
            "paddlepaddle",
        ],
        python_requires='>=3.7',
        install_requires=requirements["install"],
        extras_require={
            'develop': requirements["develop"],
            #'test': ["nose", "torchaudio==0.10.2", "pytest-benchmark", "librosa=0.8.1", "parameterized", "paddlepaddle"],
        },
        cmdclass={
            "build_ext": setup_helpers.CMakeBuild,
            'develop': DevelopCommand,
            'test': TestCommand,
            'upload': UploadCommand,
            "clean": clean,
        },

        # Package info
        packages=find_packages(include=['paddleaudio*']),
        package_data=lib_package_data,
        ext_modules=setup_helpers.get_ext_modules(),
        zip_safe=True,
        classifiers=[
            'Development Status :: 5 - Production/Stable',
            'Intended Audience :: Developers',
            'Intended Audience :: Science/Research',
            'Topic :: Scientific/Engineering :: Artificial Intelligence',
            'License :: OSI Approved :: Apache Software License',
            'Programming Language :: Python',
            'Programming Language :: Python :: 3',
            'Programming Language :: Python :: 3.6',
            'Programming Language :: Python :: 3.7',
            'Programming Language :: Python :: 3.8',
            'Programming Language :: Python :: 3.9',
            'Programming Language :: Python :: 3.10',
        ], )

    setup(**setup_info)
    _rm_version()


if __name__ == '__main__':
    main()


================================================
FILE: audio/tests/backends/base.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import urllib.request

mono_channel_wav = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
multi_channels_wav = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav'


class BackendTest(unittest.TestCase):
    def setUp(self):
        self.initWavInput()

    def initWavInput(self):
        self.files = []
        for url in [mono_channel_wav, multi_channels_wav]:
            if not os.path.isfile(os.path.basename(url)):
                urllib.request.urlretrieve(url, os.path.basename(url))
            self.files.append(os.path.basename(url))

    def initParams(self):
        raise NotImplementedError


================================================
FILE: audio/tests/backends/common.py
================================================

def get_encoding(ext, dtype):
    exts = {
        "mp3",
        "flac",
        "vorbis",
    }
    encodings = {
        "float32": "PCM_F",
        "int32": "PCM_S",
        "int16": "PCM_S",
        "uint8": "PCM_U",
    }
    return ext.upper() if ext in exts else encodings[dtype]


def get_bit_depth(dtype):
    bit_depths = {
        "float32": 32,
        "int32": 32,
        "int16": 16,
        "uint8": 8,
    }
    return bit_depths[dtype]

def get_bits_per_sample(ext, dtype):
    bits_per_samples = {
        "flac": 24,
        "mp3": 0,
        "vorbis": 0,
    }
    return bits_per_samples.get(ext, get_bit_depth(dtype))


================================================
FILE: audio/tests/backends/soundfile/base.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import urllib.request

mono_channel_wav = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
multi_channels_wav = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav'


class BackendTest(unittest.TestCase):
    def setUp(self):
        self.initWavInput()

    def initWavInput(self):
        self.files = []
        for url in [mono_channel_wav, multi_channels_wav]:
            if not os.path.isfile(os.path.basename(url)):
                urllib.request.urlretrieve(url, os.path.basename(url))
            self.files.append(os.path.basename(url))

    def initParams(self):
        raise NotImplementedError


================================================
FILE: audio/tests/backends/soundfile/common.py
================================================
import itertools
from unittest import skipIf

from paddleaudio._internal.module_utils import is_module_available
from parameterized import parameterized


def name_func(func, _, params):
    return f'{func.__name__}_{"_".join(str(arg) for arg in params.args)}'


def dtype2subtype(dtype):
    return {
        "float64": "DOUBLE",
        "float32": "FLOAT",
        "int32": "PCM_32",
        "int16": "PCM_16",
        "uint8": "PCM_U8",
        "int8": "PCM_S8",
    }[dtype]


def skipIfFormatNotSupported(fmt):
    fmts = []
    if is_module_available("soundfile"):
        import soundfile

        fmts = soundfile.available_formats()
        return skipIf(fmt not in fmts, f'"{fmt}" is not supported by soundfile')
    return skipIf(True, '"soundfile" not available.')


def parameterize(*params):
    return parameterized.expand(
        list(itertools.product(*params)), name_func=name_func)


def fetch_wav_subtype(dtype, encoding, bits_per_sample):
    subtype = {
        (None, None): dtype2subtype(dtype),
        (None, 8): "PCM_U8",
        ("PCM_U", None): "PCM_U8",
        ("PCM_U", 8): "PCM_U8",
        ("PCM_S", None): "PCM_32",
        ("PCM_S", 16): "PCM_16",
        ("PCM_S", 32): "PCM_32",
        ("PCM_F", None): "FLOAT",
        ("PCM_F", 32): "FLOAT",
        ("PCM_F", 64): "DOUBLE",
        ("ULAW", None): "ULAW",
        ("ULAW", 8): "ULAW",
        ("ALAW", None): "ALAW",
        ("ALAW", 8): "ALAW",
    }.get((encoding, bits_per_sample))
    if subtype:
        return subtype
    raise ValueError(f"wav does not support ({encoding}, {bits_per_sample}).")

def get_encoding(ext, dtype):
    exts = {
        "mp3",
        "flac",
        "vorbis",
    }
    encodings = {
        "float32": "PCM_F",
        "int32": "PCM_S",
        "int16": "PCM_S",
        "uint8": "PCM_U",
    }
    return ext.upper() if ext in exts else encodings[dtype]


def get_bit_depth(dtype):
    bit_depths = {
        "float32": 32,
        "int32": 32,
        "int16": 16,
        "uint8": 8,
    }
    return bit_depths[dtype]

def get_bits_per_sample(ext, dtype):
    bits_per_samples = {
        "flac": 24,
        "mp3": 0,
        "vorbis": 0,
    }
    return bits_per_samples.get(ext, get_bit_depth(dtype))


================================================
FILE: audio/tests/backends/soundfile/info_test.py
================================================
#this code is from: https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/backend/soundfile/info_test.py
import tarfile
import unittest
import warnings
from unittest.mock import patch

import paddle
import soundfile
from common import get_bits_per_sample
from common import get_encoding
from common import parameterize
from common import skipIfFormatNotSupported
from common_utils import get_wav_data
from common_utils import nested_params
from common_utils import save_wav
from common_utils import TempDirMixin
from paddleaudio.backends import soundfile_backend


class TestInfo(TempDirMixin, unittest.TestCase):
    @parameterize(
        ["float32", "int32"],
        [8000, 16000],
        [1, 2], )
    def test_wav(self, dtype, sample_rate, num_channels):
        """`soundfile_backend.info` can check wav file correctly"""
        duration = 1
        path = self.get_temp_path("data.wav")
        data = get_wav_data(
            dtype,
            num_channels,
            normalize=False,
            num_frames=duration * sample_rate)
        save_wav(path, data, sample_rate)
        info = soundfile_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_frames == sample_rate * duration
        assert info.num_channels == num_channels
        assert info.bits_per_sample == get_bits_per_sample("wav", dtype)
        assert info.encoding == get_encoding("wav", dtype)

    @parameterize([8000, 16000], [1, 2])
    @skipIfFormatNotSupported("FLAC")
    def test_flac(self, sample_rate, num_channels):
        """`soundfile_backend.info` can check flac file correctly"""
        duration = 1
        num_frames = sample_rate * duration
        #data = torch.randn(num_frames, num_channels).numpy()
        data = paddle.randn(shape=[num_frames, num_channels]).numpy()

        path = self.get_temp_path("data.flac")
        soundfile.write(path, data, sample_rate)

        info = soundfile_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_frames == num_frames
        assert info.num_channels == num_channels
        assert info.bits_per_sample == 16
        assert info.encoding == "FLAC"

    #@parameterize([8000, 16000], [1, 2])
    #@skipIfFormatNotSupported("OGG")
    #def test_ogg(self, sample_rate, num_channels):
    #"""`soundfile_backend.info` can check ogg file correctly"""
    #duration = 1
    #num_frames = sample_rate * duration
    ##data = torch.randn(num_frames, num_channels).numpy()
    #data = paddle.randn(shape=[num_frames, num_channels]).numpy()
    #print(len(data))
    #path = self.get_temp_path("data.ogg")
    #soundfile.write(path, data, sample_rate)

    #info = soundfile_backend.info(path)
    #print(info)
    #assert info.sample_rate == sample_rate
    #print("info")
    #print(info.num_frames)
    #print("jiji")
    #print(sample_rate*duration)
    ##assert info.num_frames == sample_rate * duration
    #assert info.num_channels == num_channels
    #assert info.bits_per_sample == 0
    #assert info.encoding == "VORBIS"

    @nested_params(
        [8000, 16000],
        [1, 2],
        [("PCM_24", 24), ("PCM_32", 32)], )
    @skipIfFormatNotSupported("NIST")
    def test_sphere(self, sample_rate, num_channels, subtype_and_bit_depth):
        """`soundfile_backend.info` can check sph file correctly"""
        duration = 1
        num_frames = sample_rate * duration
        #data = torch.randn(num_frames, num_channels).numpy()
        data = paddle.randn(shape=[num_frames, num_channels]).numpy()
        path = self.get_temp_path("data.nist")
        subtype, bits_per_sample = subtype_and_bit_depth
        soundfile.write(path, data, sample_rate, subtype=subtype)

        info = soundfile_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_frames == sample_rate * duration
        assert info.num_channels == num_channels
        assert info.bits_per_sample == bits_per_sample
        assert info.encoding == "PCM_S"

    def test_unknown_subtype_warning(self):
        """soundfile_backend.info issues a warning when the subtype is unknown

        This will happen if a new subtype is supported in SoundFile: the _SUBTYPE_TO_BITS_PER_SAMPLE
        dict should be updated.
        """

        def _mock_info_func(_):
            class MockSoundFileInfo:
                samplerate = 8000
                frames = 356
                channels = 2
                subtype = "UNSEEN_SUBTYPE"
                format = "UNKNOWN"

            return MockSoundFileInfo()

        with patch("soundfile.info", _mock_info_func):
            with warnings.catch_warnings(record=True) as w:
                info = soundfile_backend.info("foo")
                assert len(w) == 1
                assert "UNSEEN_SUBTYPE subtype is unknown to PaddleAudio" in str(
                    w[-1].message)
                assert info.bits_per_sample == 0


class TestFileObject(TempDirMixin, unittest.TestCase):
    def _test_fileobj(self, ext, subtype, bits_per_sample):
        """Query audio via file-like object works"""
        duration = 2
        sample_rate = 16000
        num_channels = 2
        num_frames = sample_rate * duration
        path = self.get_temp_path(f"test.{ext}")

        #data = torch.randn(num_frames, num_channels).numpy()
        data = paddle.randn(shape=[num_frames, num_channels]).numpy()
        soundfile.write(path, data, sample_rate, subtype=subtype)

        with open(path, "rb") as fileobj:
            info = soundfile_backend.info(fileobj)
        assert info.sample_rate == sample_rate
        assert info.num_frames == num_frames
        assert info.num_channels == num_channels
        assert info.bits_per_sample == bits_per_sample
        assert info.encoding == "FLAC" if ext == "flac" else "PCM_S"

    def test_fileobj_wav(self):
        """Loading audio via file-like object works"""
        self._test_fileobj("wav", "PCM_16", 16)

    @skipIfFormatNotSupported("FLAC")
    def test_fileobj_flac(self):
        """Loading audio via file-like object works"""
        self._test_fileobj("flac", "PCM_16", 16)

    def _test_tarobj(self, ext, subtype, bits_per_sample):
        """Query compressed audio via file-like object works"""
        duration = 2
        sample_rate = 16000
        num_channels = 2
        num_frames = sample_rate * duration
        audio_file = f"test.{ext}"
        audio_path = self.get_temp_path(audio_file)
        archive_path = self.get_temp_path("archive.tar.gz")

        #data = torch.randn(num_frames, num_channels).numpy()
        data = paddle.randn(shape=[num_frames, num_channels]).numpy()
        soundfile.write(audio_path, data, sample_rate, subtype=subtype)

        with tarfile.TarFile(archive_path, "w") as tarobj:
            tarobj.add(audio_path, arcname=audio_file)
        with tarfile.TarFile(archive_path, "r") as tarobj:
            fileobj = tarobj.extractfile(audio_file)
            info = soundfile_backend.info(fileobj)
        assert info.sample_rate == sample_rate
        assert info.num_frames == num_frames
        assert info.num_channels == num_channels
        assert info.bits_per_sample == bits_per_sample
        assert info.encoding == "FLAC" if ext == "flac" else "PCM_S"

    def test_tarobj_wav(self):
        """Query compressed audio via file-like object works"""
        self._test_tarobj("wav", "PCM_16", 16)

    @skipIfFormatNotSupported("FLAC")
    def test_tarobj_flac(self):
        """Query compressed audio via file-like object works"""
        self._test_tarobj("flac", "PCM_16", 16)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/backends/soundfile/load_test.py
================================================
#this code is from: https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/backend/soundfile/load_test.py
import os
import tarfile
import unittest
from unittest.mock import patch

import numpy as np
import paddle
import soundfile
from common import dtype2subtype
from common import parameterize
from common import skipIfFormatNotSupported
from common_utils import get_wav_data
from common_utils import load_wav
from common_utils import normalize_wav
from common_utils import save_wav
from common_utils import TempDirMixin
from paddleaudio.backends import soundfile_backend
from parameterized import parameterized


def _get_mock_path(
        ext: str,
        dtype: str,
        sample_rate: int,
        num_channels: int,
        num_frames: int, ):
    return f"{dtype}_{sample_rate}_{num_channels}_{num_frames}.{ext}"


def _get_mock_params(path: str):
    filename, ext = path.split(".")
    parts = filename.split("_")
    return {
        "ext": ext,
        "dtype": parts[0],
        "sample_rate": int(parts[1]),
        "num_channels": int(parts[2]),
        "num_frames": int(parts[3]),
    }


class SoundFileMock:
    def __init__(self, path, mode):
        assert mode == "r"
        self.path = path
        self._params = _get_mock_params(path)
        self._start = None

    @property
    def samplerate(self):
        return self._params["sample_rate"]

    @property
    def format(self):
        if self._params["ext"] == "wav":
            return "WAV"
        if self._params["ext"] == "flac":
            return "FLAC"
        if self._params["ext"] == "ogg":
            return "OGG"
        if self._params["ext"] in ["sph", "nis", "nist"]:
            return "NIST"

    @property
    def subtype(self):
        if self._params["ext"] == "ogg":
            return "VORBIS"
        return dtype2subtype(self._params["dtype"])

    def _prepare_read(self, start, stop, frames):
        assert stop is None
        self._start = start
        return frames

    def read(self, frames, dtype, always_2d):
        assert always_2d
        data = get_wav_data(
            dtype,
            self._params["num_channels"],
            normalize=False,
            num_frames=self._params["num_frames"],
            channels_first=False, ).numpy()
        return data[self._start:self._start + frames]

    def __enter__(self):
        return self

    def __exit__(self, *args, **kwargs):
        pass


class MockedLoadTest(unittest.TestCase):
    def assert_dtype(self, ext, dtype, sample_rate, num_channels, normalize,
                     channels_first):
        """When format is WAV or NIST, normalize=False will return the native dtype Tensor, otherwise float32"""
        num_frames = 3 * sample_rate
        path = _get_mock_path(ext, dtype, sample_rate, num_channels, num_frames)
        expected_dtype = paddle.float32 if normalize or ext not in [
            "wav", "nist"
        ] else getattr(paddle, dtype)
        with patch("soundfile.SoundFile", SoundFileMock):
            found, sr = soundfile_backend.load(
                path, normalize=normalize, channels_first=channels_first)
            assert found.dtype == expected_dtype
            assert sample_rate == sr

    @parameterize(
        ["int32", "float32", "float64"],
        [8000, 16000],
        [1, 2],
        [True, False],
        [True, False], )
    def test_wav(self, dtype, sample_rate, num_channels, normalize,
                 channels_first):
        """Returns native dtype when normalize=False else float32"""
        self.assert_dtype("wav", dtype, sample_rate, num_channels, normalize,
                          channels_first)

    @parameterize(
        ["int32"],
        [8000, 16000],
        [1, 2],
        [True, False],
        [True, False], )
    def test_sphere(self, dtype, sample_rate, num_channels, normalize,
                    channels_first):
        """Returns float32 always"""
        self.assert_dtype("sph", dtype, sample_rate, num_channels, normalize,
                          channels_first)

    @parameterize([8000, 16000], [1, 2], [True, False], [True, False])
    def test_ogg(self, sample_rate, num_channels, normalize, channels_first):
        """Returns float32 always"""
        self.assert_dtype("ogg", "int16", sample_rate, num_channels, normalize,
                          channels_first)

    @parameterize([8000, 16000], [1, 2], [True, False], [True, False])
    def test_flac(self, sample_rate, num_channels, normalize, channels_first):
        """`soundfile_backend.load` can load ogg format."""
        self.assert_dtype("flac", "int16", sample_rate, num_channels, normalize,
                          channels_first)


class LoadTestBase(TempDirMixin, unittest.TestCase):
    def assert_wav(
            self,
            dtype,
            sample_rate,
            num_channels,
            normalize,
            channels_first=True,
            duration=1, ):
        """`soundfile_backend.load` can load wav format correctly.

        Wav data loaded with soundfile backend should match those with scipy
        """
        path = self.get_temp_path("reference.wav")
        num_frames = duration * sample_rate
        data = get_wav_data(
            dtype,
            num_channels,
            normalize=normalize,
            num_frames=num_frames,
            channels_first=channels_first, )
        save_wav(path, data, sample_rate, channels_first=channels_first)
        expected = load_wav(
            path, normalize=normalize, channels_first=channels_first)[0]
        data, sr = soundfile_backend.load(
            path, normalize=normalize, channels_first=channels_first)
        assert sr == sample_rate
        np.testing.assert_array_almost_equal(data.numpy(), expected.numpy())

    def assert_sphere(
            self,
            dtype,
            sample_rate,
            num_channels,
            channels_first=True,
            duration=1, ):
        """`soundfile_backend.load` can load SPHERE format correctly."""
        path = self.get_temp_path("reference.sph")
        num_frames = duration * sample_rate
        raw = get_wav_data(
            dtype,
            num_channels,
            num_frames=num_frames,
            normalize=False,
            channels_first=False, )
        soundfile.write(
            path, raw, sample_rate, subtype=dtype2subtype(dtype), format="NIST")
        expected = normalize_wav(raw.t() if channels_first else raw)
        data, sr = soundfile_backend.load(path, channels_first=channels_first)
        assert sr == sample_rate
        #self.assertEqual(data, expected, atol=1e-4, rtol=1e-8)
        np.testing.assert_array_almost_equal(data.numpy(), expected.numpy())

    def assert_flac(
            self,
            dtype,
            sample_rate,
            num_channels,
            channels_first=True,
            duration=1, ):
        """`soundfile_backend.load` can load FLAC format correctly."""
        path = self.get_temp_path("reference.flac")
        num_frames = duration * sample_rate
        raw = get_wav_data(
            dtype,
            num_channels,
            num_frames=num_frames,
            normalize=False,
            channels_first=False, )
        soundfile.write(path, raw, sample_rate)
        expected = normalize_wav(raw.t() if channels_first else raw)
        data, sr = soundfile_backend.load(path, channels_first=channels_first)
        assert sr == sample_rate
        #self.assertEqual(data, expected, atol=1e-4, rtol=1e-8)
        np.testing.assert_array_almost_equal(data.numpy(), expected.numpy())


class TestLoad(LoadTestBase):
    """Test the correctness of `soundfile_backend.load` for various formats"""

    @parameterize(
        ["float32", "int32"],
        [8000, 16000],
        [1, 2],
        [False, True],
        [False, True], )
    def test_wav(self, dtype, sample_rate, num_channels, normalize,
                 channels_first):
        """`soundfile_backend.load` can load wav format correctly."""
        self.assert_wav(dtype, sample_rate, num_channels, normalize,
                        channels_first)

    @parameterize(
        ["int32"],
        [16000],
        [2],
        [False], )
    def test_wav_large(self, dtype, sample_rate, num_channels, normalize):
        """`soundfile_backend.load` can load large wav file correctly."""
        two_hours = 2 * 60 * 60
        self.assert_wav(
            dtype, sample_rate, num_channels, normalize, duration=two_hours)

    @parameterize(["float32", "int32"], [4, 8, 16, 32], [False, True])
    def test_multiple_channels(self, dtype, num_channels, channels_first):
        """`soundfile_backend.load` can load wav file with more than 2 channels."""
        sample_rate = 8000
        normalize = False
        self.assert_wav(dtype, sample_rate, num_channels, normalize,
                        channels_first)

    #@parameterize(["int32"], [8000, 16000], [1, 2], [False, True])
    #@skipIfFormatNotSupported("NIST")
    #def test_sphere(self, dtype, sample_rate, num_channels, channels_first):
    #"""`soundfile_backend.load` can load sphere format correctly."""
    #self.assert_sphere(dtype, sample_rate, num_channels, channels_first)

    #@parameterize(["int32"], [8000, 16000], [1, 2], [False, True])
    #@skipIfFormatNotSupported("FLAC")
    #def test_flac(self, dtype, sample_rate, num_channels, channels_first):
    #"""`soundfile_backend.load` can load flac format correctly."""
    #self.assert_flac(dtype, sample_rate, num_channels, channels_first)


class TestLoadFormat(TempDirMixin, unittest.TestCase):
    """Given `format` parameter, `so.load` can load files without extension"""

    original = None
    path = None

    def _make_file(self, format_):
        sample_rate = 8000
        path_with_ext = self.get_temp_path(f"test.{format_}")
        data = get_wav_data("float32", num_channels=2).numpy().T
        soundfile.write(path_with_ext, data, sample_rate)
        expected = soundfile.read(path_with_ext, dtype="float32")[0].T
        path = os.path.splitext(path_with_ext)[0]
        os.rename(path_with_ext, path)
        return path, expected

    def _test_format(self, format_):
        """Providing format allows to read file without extension"""
        path, expected = self._make_file(format_)
        found, _ = soundfile_backend.load(path)
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(found, expected)

    @parameterized.expand([
        ("WAV", ),
        ("wav", ),
    ])
    def test_wav(self, format_):
        self._test_format(format_)

    @parameterized.expand([
        ("FLAC", ),
        ("flac", ),
    ])
    @skipIfFormatNotSupported("FLAC")
    def test_flac(self, format_):
        self._test_format(format_)


class TestFileObject(TempDirMixin, unittest.TestCase):
    def _test_fileobj(self, ext):
        """Loading audio via file-like object works"""
        sample_rate = 16000
        path = self.get_temp_path(f"test.{ext}")

        data = get_wav_data("float32", num_channels=2).numpy().T
        soundfile.write(path, data, sample_rate)
        expected = soundfile.read(path, dtype="float32")[0].T

        with open(path, "rb") as fileobj:
            found, sr = soundfile_backend.load(fileobj)
        assert sr == sample_rate
        #self.assertEqual(expected, found)
        np.testing.assert_array_almost_equal(found, expected)

    def test_fileobj_wav(self):
        """Loading audio via file-like object works"""
        self._test_fileobj("wav")

    def test_fileobj_flac(self):
        """Loading audio via file-like object works"""
        self._test_fileobj("flac")

    def _test_tarfile(self, ext):
        """Loading audio via file-like object works"""
        sample_rate = 16000
        audio_file = f"test.{ext}"
        audio_path = self.get_temp_path(audio_file)
        archive_path = self.get_temp_path("archive.tar.gz")

        data = get_wav_data("float32", num_channels=2).numpy().T
        soundfile.write(audio_path, data, sample_rate)
        expected = soundfile.read(audio_path, dtype="float32")[0].T

        with tarfile.TarFile(archive_path, "w") as tarobj:
            tarobj.add(audio_path, arcname=audio_file)
        with tarfile.TarFile(archive_path, "r") as tarobj:
            fileobj = tarobj.extractfile(audio_file)
            found, sr = soundfile_backend.load(fileobj)

        assert sr == sample_rate
        #self.assertEqual(expected, found)
        np.testing.assert_array_almost_equal(found.numpy(), expected)

    def test_tarfile_wav(self):
        """Loading audio via file-like object works"""
        self._test_tarfile("wav")

    def test_tarfile_flac(self):
        """Loading audio via file-like object works"""
        self._test_tarfile("flac")


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/backends/soundfile/save_test.py
================================================
import io
import unittest
from unittest.mock import patch

import numpy as np
import paddle
import soundfile
from common import fetch_wav_subtype
from common import parameterize
from common import skipIfFormatNotSupported
from common_utils import get_wav_data
from common_utils import load_wav
from common_utils import nested_params
from common_utils import TempDirMixin
from paddleaudio.backends import soundfile_backend


class MockedSaveTest(unittest.TestCase):
    @nested_params(
        ["float32", "int32"],
        [8000, 16000],
        [1, 2],
        [False, True],
        [
            (None, None),
            ("PCM_U", None),
            ("PCM_U", 8),
            ("PCM_S", None),
            ("PCM_S", 16),
            ("PCM_S", 32),
            ("PCM_F", None),
            ("PCM_F", 32),
            ("PCM_F", 64),
            ("ULAW", None),
            ("ULAW", 8),
            ("ALAW", None),
            ("ALAW", 8),
        ], )
    @patch("soundfile.write")
    def test_wav(self, dtype, sample_rate, num_channels, channels_first,
                 enc_params, mocked_write):
        """soundfile_backend.save passes correct subtype to soundfile.write when WAV"""
        filepath = "foo.wav"
        input_tensor = get_wav_data(
            dtype,
            num_channels,
            num_frames=3 * sample_rate,
            normalize=dtype == "float32",
            channels_first=channels_first, )
        input_tensor = paddle.transpose(input_tensor, [1, 0])

        encoding, bits_per_sample = enc_params
        soundfile_backend.save(
            filepath,
            input_tensor,
            sample_rate,
            channels_first=channels_first,
            encoding=encoding,
            bits_per_sample=bits_per_sample, )

        # on +Py3.8 call_args.kwargs is more descriptive
        args = mocked_write.call_args[1]
        assert args["file"] == filepath
        assert args["samplerate"] == sample_rate
        assert args["subtype"] == fetch_wav_subtype(dtype, encoding,
                                                    bits_per_sample)
        assert args["format"] is None
        tensor_result = paddle.transpose(
            input_tensor, [1, 0]) if channels_first else input_tensor
        #self.assertEqual(args["data"], tensor_result.numpy())
        np.testing.assert_array_almost_equal(args["data"].numpy(),
                                             tensor_result.numpy())

    @patch("soundfile.write")
    def assert_non_wav(
            self,
            fmt,
            dtype,
            sample_rate,
            num_channels,
            channels_first,
            mocked_write,
            encoding=None,
            bits_per_sample=None, ):
        """soundfile_backend.save passes correct subtype and format to soundfile.write when SPHERE"""
        filepath = f"foo.{fmt}"
        input_tensor = get_wav_data(
            dtype,
            num_channels,
            num_frames=3 * sample_rate,
            normalize=False,
            channels_first=channels_first, )
        input_tensor = paddle.transpose(input_tensor, [1, 0])

        expected_data = paddle.transpose(
            input_tensor, [1, 0]) if channels_first else input_tensor

        soundfile_backend.save(
            filepath,
            input_tensor,
            sample_rate,
            channels_first,
            encoding=encoding,
            bits_per_sample=bits_per_sample, )

        # on +Py3.8 call_args.kwargs is more descriptive
        args = mocked_write.call_args[1]
        assert args["file"] == filepath
        assert args["samplerate"] == sample_rate
        if fmt in ["sph", "nist", "nis"]:
            assert args["format"] == "NIST"
        else:
            assert args["format"] is None
        np.testing.assert_array_almost_equal(args["data"].numpy(),
                                             expected_data.numpy())
        #self.assertEqual(args["data"], expected_data)

    @nested_params(
        ["sph", "nist", "nis"],
        ["int32"],
        [8000, 16000],
        [1, 2],
        [False, True],
        [
            ("PCM_S", 8),
            ("PCM_S", 16),
            ("PCM_S", 24),
            ("PCM_S", 32),
            ("ULAW", 8),
            ("ALAW", 8),
            ("ALAW", 16),
            ("ALAW", 24),
            ("ALAW", 32),
        ], )
    def test_sph(self, fmt, dtype, sample_rate, num_channels, channels_first,
                 enc_params):
        """soundfile_backend.save passes default format and subtype (None-s) to
        soundfile.write when not WAV"""
        encoding, bits_per_sample = enc_params
        self.assert_non_wav(
            fmt,
            dtype,
            sample_rate,
            num_channels,
            channels_first,
            encoding=encoding,
            bits_per_sample=bits_per_sample)

    @parameterize(
        ["int32"],
        [8000, 16000],
        [1, 2],
        [False, True],
        [8, 16, 24], )
    def test_flac(self, dtype, sample_rate, num_channels, channels_first,
                  bits_per_sample):
        """soundfile_backend.save passes default format and subtype (None-s) to
        soundfile.write when not WAV"""
        self.assert_non_wav(
            "flac",
            dtype,
            sample_rate,
            num_channels,
            channels_first,
            bits_per_sample=bits_per_sample)

    @parameterize(
        ["int32"],
        [8000, 16000],
        [1, 2],
        [False, True], )
    def test_ogg(self, dtype, sample_rate, num_channels, channels_first):
        """soundfile_backend.save passes default format and subtype (None-s) to
        soundfile.write when not WAV"""
        self.assert_non_wav("ogg", dtype, sample_rate, num_channels,
                            channels_first)


class SaveTestBase(TempDirMixin, unittest.TestCase):
    def assert_wav(self, dtype, sample_rate, num_channels, num_frames):
        """`soundfile_backend.save` can save wav format."""
        path = self.get_temp_path("data.wav")
        expected = get_wav_data(
            dtype, num_channels, num_frames=num_frames, normalize=False)
        soundfile_backend.save(path, expected, sample_rate)
        found, sr = load_wav(path, normalize=False)
        assert sample_rate == sr
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())

    def _assert_non_wav(self, fmt, dtype, sample_rate, num_channels):
        """`soundfile_backend.save` can save non-wav format.

        Due to precision mismatch, and the lack of alternative way to decode the
        resulting files without using soundfile, only meta data are validated.
        """
        num_frames = sample_rate * 3
        path = self.get_temp_path(f"data.{fmt}")
        expected = get_wav_data(
            dtype, num_channels, num_frames=num_frames, normalize=False)
        soundfile_backend.save(path, expected, sample_rate)
        sinfo = soundfile.info(path)
        assert sinfo.format == fmt.upper()
        #assert sinfo.frames == num_frames this go wrong
        assert sinfo.channels == num_channels
        assert sinfo.samplerate == sample_rate

    def assert_flac(self, dtype, sample_rate, num_channels):
        """`soundfile_backend.save` can save flac format."""
        self._assert_non_wav("flac", dtype, sample_rate, num_channels)

    def assert_sphere(self, dtype, sample_rate, num_channels):
        """`soundfile_backend.save` can save sph format."""
        self._assert_non_wav("nist", dtype, sample_rate, num_channels)

    def assert_ogg(self, dtype, sample_rate, num_channels):
        """`soundfile_backend.save` can save ogg format.

        As we cannot inspect the OGG format (it's lossy), we only check the metadata.
        """
        self._assert_non_wav("ogg", dtype, sample_rate, num_channels)


class TestSave(SaveTestBase):
    @parameterize(
        ["float32", "int32"],
        [8000, 16000],
        [1, 2], )
    def test_wav(self, dtype, sample_rate, num_channels):
        """`soundfile_backend.save` can save wav format."""
        self.assert_wav(dtype, sample_rate, num_channels, num_frames=None)

    @parameterize(
        ["float32", "int32"],
        [4, 8, 16, 32], )
    def test_multiple_channels(self, dtype, num_channels):
        """`soundfile_backend.save` can save wav with more than 2 channels."""
        sample_rate = 8000
        self.assert_wav(dtype, sample_rate, num_channels, num_frames=None)

    @parameterize(
        ["int32"],
        [8000, 16000],
        [1, 2], )
    @skipIfFormatNotSupported("NIST")
    def test_sphere(self, dtype, sample_rate, num_channels):
        """`soundfile_backend.save` can save sph format."""
        self.assert_sphere(dtype, sample_rate, num_channels)

    @parameterize(
        [8000, 16000],
        [1, 2], )
    @skipIfFormatNotSupported("FLAC")
    def test_flac(self, sample_rate, num_channels):
        """`soundfile_backend.save` can save flac format."""
        self.assert_flac("float32", sample_rate, num_channels)

    @parameterize(
        [8000, 16000],
        [1, 2], )
    @skipIfFormatNotSupported("OGG")
    def test_ogg(self, sample_rate, num_channels):
        """`soundfile_backend.save` can save ogg/vorbis format."""
        self.assert_ogg("float32", sample_rate, num_channels)


class TestSaveParams(TempDirMixin, unittest.TestCase):
    """Test the correctness of optional parameters of `soundfile_backend.save`"""

    @parameterize([True, False])
    def test_channels_first(self, channels_first):
        """channels_first swaps axes"""
        path = self.get_temp_path("data.wav")
        data = get_wav_data("int32", 2, channels_first=channels_first)
        soundfile_backend.save(path, data, 8000, channels_first=channels_first)
        found = load_wav(path)[0]
        expected = data if channels_first else data.transpose([1, 0])
        #self.assertEqual(found, expected, atol=1e-4, rtol=1e-8)
        np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())


class TestFileObject(TempDirMixin, unittest.TestCase):
    def _test_fileobj(self, ext):
        """Saving audio to file-like object works"""
        sample_rate = 16000
        path = self.get_temp_path(f"test.{ext}")

        subtype = "FLOAT" if ext == "wav" else None
        data = get_wav_data("float32", num_channels=2)
        soundfile.write(path, data.numpy().T, sample_rate, subtype=subtype)
        expected = soundfile.read(path, dtype="float32")[0]

        fileobj = io.BytesIO()
        soundfile_backend.save(fileobj, data, sample_rate, format=ext)
        fileobj.seek(0)
        found, sr = soundfile.read(fileobj, dtype="float32")

        assert sr == sample_rate
        #self.assertEqual(expected, found, atol=1e-4, rtol=1e-8)
        np.testing.assert_array_almost_equal(found, expected)

    def test_fileobj_wav(self):
        """Saving audio via file-like object works"""
        self._test_fileobj("wav")

    @skipIfFormatNotSupported("FLAC")
    def test_fileobj_flac(self):
        """Saving audio via file-like object works"""
        self._test_fileobj("flac")

    @skipIfFormatNotSupported("NIST")
    def test_fileobj_nist(self):
        """Saving audio via file-like object works"""
        self._test_fileobj("NIST")

    @skipIfFormatNotSupported("OGG")
    def test_fileobj_ogg(self):
        """Saving audio via file-like object works"""
        self._test_fileobj("OGG")


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/backends/soundfile/test_io.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import filecmp
import os
import unittest

import numpy as np
from paddleaudio.backends import soundfile_load as load
from paddleaudio.backends import soundfile_save as save
import soundfile as sf

from base import BackendTest


class TestIO(BackendTest):
    def test_load_mono_channel(self):
        sf_data, sf_sr = sf.read(self.files[0])
        pa_data, pa_sr = load(
            self.files[0], normal=False, dtype='float64')

        self.assertEqual(sf_data.dtype, pa_data.dtype)
        self.assertEqual(sf_sr, pa_sr)
        np.testing.assert_array_almost_equal(sf_data, pa_data)

    def test_load_multi_channels(self):
        sf_data, sf_sr = sf.read(self.files[1])
        sf_data = sf_data.T  # Channel dim first
        pa_data, pa_sr = load(
            self.files[1], mono=False, normal=False, dtype='float64')

        self.assertEqual(sf_data.dtype, pa_data.dtype)
        self.assertEqual(sf_sr, pa_sr)
        np.testing.assert_array_almost_equal(sf_data, pa_data)

    def test_save_mono_channel(self):
        waveform, sr = np.random.randint(
            low=-32768, high=32768, size=(48000), dtype=np.int16), 16000
        sf_tmp_file = 'sf_tmp.wav'
        pa_tmp_file = 'pa_tmp.wav'

        sf.write(sf_tmp_file, waveform, sr)
        save(waveform, sr, pa_tmp_file)

        self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
        for file in [sf_tmp_file, pa_tmp_file]:
            os.remove(file)

    def test_save_multi_channels(self):
        waveform, sr = np.random.randint(
            low=-32768, high=32768, size=(2, 48000), dtype=np.int16), 16000
        sf_tmp_file = 'sf_tmp.wav'
        pa_tmp_file = 'pa_tmp.wav'

        sf.write(sf_tmp_file, waveform.T, sr)
        save(waveform.T, sr, pa_tmp_file)

        self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
        for file in [sf_tmp_file, pa_tmp_file]:
            os.remove(file)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/backends/sox_io/common.py
================================================
import itertools
from unittest import skipIf

from paddleaudio._internal.module_utils import is_module_available
from parameterized import parameterized


def name_func(func, _, params):
    return f'{func.__name__}_{"_".join(str(arg) for arg in params.args)}'


def dtype2subtype(dtype):
    return {
        "float64": "DOUBLE",
        "float32": "FLOAT",
        "int32": "PCM_32",
        "int16": "PCM_16",
        "uint8": "PCM_U8",
        "int8": "PCM_S8",
    }[dtype]


def skipIfFormatNotSupported(fmt):
    fmts = []
    if is_module_available("soundfile"):
        import soundfile

        fmts = soundfile.available_formats()
        return skipIf(fmt not in fmts, f'"{fmt}" is not supported by soundfile')
    return skipIf(True, '"soundfile" not available.')


def parameterize(*params):
    return parameterized.expand(
        list(itertools.product(*params)), name_func=name_func)


def fetch_wav_subtype(dtype, encoding, bits_per_sample):
    subtype = {
        (None, None): dtype2subtype(dtype),
        (None, 8): "PCM_U8",
        ("PCM_U", None): "PCM_U8",
        ("PCM_U", 8): "PCM_U8",
        ("PCM_S", None): "PCM_32",
        ("PCM_S", 16): "PCM_16",
        ("PCM_S", 32): "PCM_32",
        ("PCM_F", None): "FLOAT",
        ("PCM_F", 32): "FLOAT",
        ("PCM_F", 64): "DOUBLE",
        ("ULAW", None): "ULAW",
        ("ULAW", 8): "ULAW",
        ("ALAW", None): "ALAW",
        ("ALAW", 8): "ALAW",
    }.get((encoding, bits_per_sample))
    if subtype:
        return subtype
    raise ValueError(f"wav does not support ({encoding}, {bits_per_sample}).")

def get_encoding(ext, dtype):
    exts = {
        "mp3",
        "flac",
        "vorbis",
    }
    encodings = {
        "float32": "PCM_F",
        "int32": "PCM_S",
        "int16": "PCM_S",
        "uint8": "PCM_U",
    }
    return ext.upper() if ext in exts else encodings[dtype]


def get_bit_depth(dtype):
    bit_depths = {
        "float32": 32,
        "int32": 32,
        "int16": 16,
        "uint8": 8,
    }
    return bit_depths[dtype]

def get_bits_per_sample(ext, dtype):
    bits_per_samples = {
        "flac": 24,
        "mp3": 0,
        "vorbis": 0,
    }
    return bits_per_samples.get(ext, get_bit_depth(dtype))


================================================
FILE: audio/tests/backends/sox_io/info_test.py
================================================
import io
import itertools
import os
import platform
import tarfile
import unittest
from contextlib import contextmanager
if platform.system() == "Windows":
    import warnings
    warnings.warn("sox io not support in Windows, please skip test.")
    exit()

from parameterized import parameterized
from common import get_bits_per_sample, get_encoding

from paddleaudio.backends import sox_io_backend

from common_utils import (
    get_wav_data,
    save_wav,
    TempDirMixin,
    sox_utils, )

#code is from:https://github.com/pytorch/audio/blob/main/torchaudio/test/torchaudio_unittest/backend/sox_io/info_test.py


class TestInfo(TempDirMixin, unittest.TestCase):
    @parameterized.expand(
        list(
            itertools.product(
                [
                    "float32",
                    "int32",
                ],
                [8000, 16000],
                [1, 2], )), )
    def test_wav(self, dtype, sample_rate, num_channels):
        """`sox_io_backend.info` can check wav file correctly"""
        duration = 1
        path = self.get_temp_path("data.wav")
        data = get_wav_data(
            dtype,
            num_channels,
            normalize=False,
            num_frames=duration * sample_rate)
        save_wav(path, data, sample_rate)
        info = sox_io_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_frames == sample_rate * duration
        assert info.num_channels == num_channels
        assert info.bits_per_sample == sox_utils.get_bit_depth(dtype)
        assert info.encoding == get_encoding("wav", dtype)

    @parameterized.expand(
        list(
            itertools.product(
                ["float32", "int32"],
                [8000, 16000],
                [4, 8, 16, 32], )), )
    def test_wav_multiple_channels(self, dtype, sample_rate, num_channels):
        """`sox_io_backend.info` can check wav file with channels more than 2 correctly"""
        duration = 1
        path = self.get_temp_path("data.wav")
        data = get_wav_data(
            dtype,
            num_channels,
            normalize=False,
            num_frames=duration * sample_rate)
        save_wav(path, data, sample_rate)
        info = sox_io_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_frames == sample_rate * duration
        assert info.num_channels == num_channels
        assert info.bits_per_sample == sox_utils.get_bit_depth(dtype)

    def test_ulaw(self):
        """`sox_io_backend.info` can check ulaw file correctly"""
        duration = 1
        num_channels = 1
        sample_rate = 8000
        path = self.get_temp_path("data.wav")
        sox_utils.gen_audio_file(
            path,
            sample_rate=sample_rate,
            num_channels=num_channels,
            bit_depth=8,
            encoding="u-law",
            duration=duration)
        info = sox_io_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_frames == sample_rate * duration
        assert info.num_channels == num_channels
        assert info.bits_per_sample == 8
        assert info.encoding == "ULAW"

    def test_alaw(self):
        """`sox_io_backend.info` can check alaw file correctly"""
        duration = 1
        num_channels = 1
        sample_rate = 8000
        path = self.get_temp_path("data.wav")
        sox_utils.gen_audio_file(
            path,
            sample_rate=sample_rate,
            num_channels=num_channels,
            bit_depth=8,
            encoding="a-law",
            duration=duration)
        info = sox_io_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_frames == sample_rate * duration
        assert info.num_channels == num_channels
        assert info.bits_per_sample == 8
        assert info.encoding == "ALAW"


#class TestInfoOpus(unittest.TestCase):
#@parameterized.expand(
#list(
#itertools.product(
#["96k"],
#[1, 2],
#[0, 5, 10],
#)
#),
#)
#def test_opus(self, bitrate, num_channels, compression_level):
#"""`sox_io_backend.info` can check opus file correcty"""
#path = data_utils.get_asset_path("io", f"{bitrate}_{compression_level}_{num_channels}ch.opus")
#info = sox_io_backend.info(path)
#assert info.sample_rate == 48000
#assert info.num_frames == 32768
#assert info.num_channels == num_channels
#assert info.bits_per_sample == 0  # bit_per_sample is irrelevant for compressed formats
#assert info.encoding == "OPUS"


class FileObjTestBase(TempDirMixin):
    def _gen_file(self,
                  ext,
                  dtype,
                  sample_rate,
                  num_channels,
                  num_frames,
                  *,
                  comments=None):
        path = self.get_temp_path(f"test.{ext}")
        bit_depth = sox_utils.get_bit_depth(dtype)
        duration = num_frames / sample_rate
        comment_file = self._gen_comment_file(comments) if comments else None

        sox_utils.gen_audio_file(
            path,
            sample_rate,
            num_channels=num_channels,
            encoding=sox_utils.get_encoding(dtype),
            bit_depth=bit_depth,
            duration=duration,
            comment_file=comment_file, )
        return path

    def _gen_comment_file(self, comments):
        comment_path = self.get_temp_path("comment.txt")
        with open(comment_path, "w") as file_:
            file_.writelines(comments)
        return comment_path


class Unseekable:
    def __init__(self, fileobj):
        self.fileobj = fileobj

    def read(self, n):
        return self.fileobj.read(n)


class TestFileObject(FileObjTestBase, unittest.TestCase):
    def _query_fileobj(self,
                       ext,
                       dtype,
                       sample_rate,
                       num_channels,
                       num_frames,
                       *,
                       comments=None):
        path = self._gen_file(
            ext,
            dtype,
            sample_rate,
            num_channels,
            num_frames,
            comments=comments)
        format_ = ext if ext in ["mp3"] else None
        with open(path, "rb") as fileobj:
            return sox_io_backend.info(fileobj, format_)

    def _query_bytesio(self, ext, dtype, sample_rate, num_channels, num_frames):
        path = self._gen_file(ext, dtype, sample_rate, num_channels, num_frames)
        format_ = ext if ext in ["mp3"] else None
        with open(path, "rb") as file_:
            fileobj = io.BytesIO(file_.read())
        return sox_io_backend.info(fileobj, format_)

    def _query_tarfile(self, ext, dtype, sample_rate, num_channels, num_frames):
        audio_path = self._gen_file(ext, dtype, sample_rate, num_channels,
                                    num_frames)
        audio_file = os.path.basename(audio_path)
        archive_path = self.get_temp_path("archive.tar.gz")
        with tarfile.TarFile(archive_path, "w") as tarobj:
            tarobj.add(audio_path, arcname=audio_file)
        format_ = ext if ext in ["mp3"] else None
        with tarfile.TarFile(archive_path, "r") as tarobj:
            fileobj = tarobj.extractfile(audio_file)
            return sox_io_backend.info(fileobj, format_)

    @contextmanager
    def _set_buffer_size(self, buffer_size):
        try:
            original_buffer_size = get_buffer_size()
            set_buffer_size(buffer_size)
            yield
        finally:
            set_buffer_size(original_buffer_size)

    @parameterized.expand([
        ("wav", "float32"),
        ("wav", "int32"),
        ("wav", "int16"),
        ("wav", "uint8"),
    ])
    def test_fileobj(self, ext, dtype):
        """Querying audio via file object works"""
        sample_rate = 16000
        num_frames = 3 * sample_rate
        num_channels = 2
        sinfo = self._query_fileobj(ext, dtype, sample_rate, num_channels,
                                    num_frames)

        bits_per_sample = get_bits_per_sample(ext, dtype)
        num_frames = 0 if ext in ["mp3", "vorbis"] else num_frames

        assert sinfo.sample_rate == sample_rate
        assert sinfo.num_channels == num_channels
        assert sinfo.num_frames == num_frames
        assert sinfo.bits_per_sample == bits_per_sample
        assert sinfo.encoding == get_encoding(ext, dtype)

    @parameterized.expand([
        ("wav", "float32"),
        ("wav", "int32"),
        ("wav", "int16"),
        ("wav", "uint8"),
    ])
    def test_bytesio(self, ext, dtype):
        """Querying audio via ByteIO object works for small data"""
        sample_rate = 16000
        num_frames = 3 * sample_rate
        num_channels = 2
        sinfo = self._query_bytesio(ext, dtype, sample_rate, num_channels,
                                    num_frames)

        bits_per_sample = get_bits_per_sample(ext, dtype)
        num_frames = 0 if ext in ["mp3", "vorbis"] else num_frames

        assert sinfo.sample_rate == sample_rate
        assert sinfo.num_channels == num_channels
        assert sinfo.num_frames == num_frames
        assert sinfo.bits_per_sample == bits_per_sample
        assert sinfo.encoding == get_encoding(ext, dtype)

    @parameterized.expand([
        ("wav", "float32"),
        ("wav", "int32"),
        ("wav", "int16"),
        ("wav", "uint8"),
    ])
    def test_bytesio_tiny(self, ext, dtype):
        """Querying audio via ByteIO object works for small data"""
        sample_rate = 8000
        num_frames = 4
        num_channels = 2
        sinfo = self._query_bytesio(ext, dtype, sample_rate, num_channels,
                                    num_frames)

        bits_per_sample = get_bits_per_sample(ext, dtype)
        num_frames = 0 if ext in ["mp3", "vorbis"] else num_frames

        assert sinfo.sample_rate == sample_rate
        assert sinfo.num_channels == num_channels
        assert sinfo.num_frames == num_frames
        assert sinfo.bits_per_sample == bits_per_sample
        assert sinfo.encoding == get_encoding(ext, dtype)

    @parameterized.expand([
        ("wav", "float32"),
        ("wav", "int32"),
        ("wav", "int16"),
        ("wav", "uint8"),
        ("flac", "float32"),
        ("vorbis", "float32"),
        ("amb", "int16"),
    ])
    def test_tarfile(self, ext, dtype):
        """Querying compressed audio via file-like object works"""
        sample_rate = 16000
        num_frames = 3.0 * sample_rate
        num_channels = 2
        sinfo = self._query_tarfile(ext, dtype, sample_rate, num_channels,
                                    num_frames)

        bits_per_sample = get_bits_per_sample(ext, dtype)
        num_frames = 0 if ext in ["vorbis"] else num_frames

        assert sinfo.sample_rate == sample_rate
        assert sinfo.num_channels == num_channels
        assert sinfo.num_frames == num_frames
        assert sinfo.bits_per_sample == bits_per_sample
        assert sinfo.encoding == get_encoding(ext, dtype)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/backends/sox_io/load_test.py
================================================
import itertools
import platform
import unittest
if platform.system() == "Windows":
    import warnings
    warnings.warn("sox io not support in Windows, please skip test.")
    exit()

from parameterized import parameterized
import numpy as np
from paddleaudio.backends import sox_io_backend

from common_utils import (
    get_wav_data,
    load_wav,
    save_wav, )

#code is from:https://github.com/pytorch/audio/blob/main/torchaudio/test/torchaudio_unittest/backend/sox_io/load_test.py


class TestLoad(unittest.TestCase):
    def assert_wav(self, dtype, sample_rate, num_channels, normalize, duration):
        """`sox_io_backend.load` can load wav format correctly.

        Wav data loaded with sox_io backend should match those with scipy
        """
        path = 'testdata/reference.wav'
        data = get_wav_data(
            dtype,
            num_channels,
            normalize=normalize,
            num_frames=duration * sample_rate)
        save_wav(path, data, sample_rate)
        expected = load_wav(path, normalize=normalize)[0]
        data, sr = sox_io_backend.load(path, normalize=normalize)
        assert sr == sample_rate
        np.testing.assert_array_almost_equal(data, expected, decimal=4)

    @parameterized.expand(
        list(
            itertools.product(
                [
                    "float64",
                    "float32",
                    "int32",
                ],
                [8000, 16000],
                [1, 2],
                [False, True], )), )
    def test_wav(self, dtype, sample_rate, num_channels, normalize):
        """`sox_io_backend.load` can load wav format correctly."""
        self.assert_wav(dtype, sample_rate, num_channels, normalize, duration=1)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/backends/sox_io/save_test.py
================================================
import io
import platform
import unittest
if platform.system() == "Windows":
    import warnings
    warnings.warn("sox io not support in Windows, please skip test.")
    exit()

import numpy as np
from paddleaudio.backends import sox_io_backend

from common_utils import (get_wav_data, load_wav, save_wav, nested_params,
                          TempDirMixin, sox_utils)

#code is from:https://github.com/pytorch/audio/blob/main/torchaudio/test/torchaudio_unittest/backend/sox_io/save_test.py


def _get_sox_encoding(encoding):
    encodings = {
        "PCM_F": "floating-point",
        "PCM_S": "signed-integer",
        "PCM_U": "unsigned-integer",
        "ULAW": "u-law",
        "ALAW": "a-law",
    }
    return encodings.get(encoding)


class TestSaveBase(TempDirMixin):
    def assert_save_consistency(
            self,
            format: str,
            *,
            compression: float=None,
            encoding: str=None,
            bits_per_sample: int=None,
            sample_rate: float=8000,
            num_channels: int=2,
            num_frames: float=3 * 8000,
            src_dtype: str="int32",
            test_mode: str="path", ):
        """`save` function produces file that is comparable with `sox` command

        To compare that the file produced by `save` function against the file produced by
        the equivalent `sox` command, we need to load both files.
        But there are many formats that cannot be opened with common Python modules (like
        SciPy).
        So we use `sox` command to prepare the original data and convert the saved files
        into a format that SciPy can read (PCM wav).
        The following diagram illustrates this process. The difference is 2.1. and 3.1.

        This assumes that
         - loading data with SciPy preserves the data well.
         - converting the resulting files into WAV format with `sox` preserve the data well.

                          x
                          | 1. Generate source wav file with SciPy
                          |
                          v
          -------------- wav ----------------
         |                                   |
         | 2.1. load with scipy              | 3.1. Convert to the target
         |   then save it into the target    |      format depth with sox
         |   format with paddleaudio          |
         v                                   v
        target format                       target format
         |                                   |
         | 2.2. Convert to wav with sox      | 3.2. Convert to wav with sox
         |                                   |
         v                                   v
        wav                                 wav
         |                                   |
         | 2.3. load with scipy              | 3.3. load with scipy
         |                                   |
         v                                   v
        tensor -------> compare <--------- tensor

        """
        cmp_encoding = "floating-point"
        cmp_bit_depth = 32

        src_path = self.get_temp_path("1.source.wav")
        tgt_path = self.get_temp_path(f"2.1.paddleaudio.{format}")
        tst_path = self.get_temp_path("2.2.result.wav")
        sox_path = self.get_temp_path(f"3.1.sox.{format}")
        ref_path = self.get_temp_path("3.2.ref.wav")

        # 1. Generate original wav
        data = get_wav_data(
            src_dtype, num_channels, normalize=False, num_frames=num_frames)
        save_wav(src_path, data, sample_rate)

        # 2.1. Convert the original wav to target format with paddleaudio
        data = load_wav(src_path, normalize=False)[0]
        if test_mode == "path":
            sox_io_backend.save(
                tgt_path,
                data,
                sample_rate,
                compression=compression,
                encoding=encoding,
                bits_per_sample=bits_per_sample)
        elif test_mode == "fileobj":
            with open(tgt_path, "bw") as file_:
                sox_io_backend.save(
                    file_,
                    data,
                    sample_rate,
                    format=format,
                    compression=compression,
                    encoding=encoding,
                    bits_per_sample=bits_per_sample, )
        elif test_mode == "bytesio":
            file_ = io.BytesIO()
            sox_io_backend.save(
                file_,
                data,
                sample_rate,
                format=format,
                compression=compression,
                encoding=encoding,
                bits_per_sample=bits_per_sample, )
            file_.seek(0)
            with open(tgt_path, "bw") as f:
                f.write(file_.read())
        else:
            raise ValueError(f"Unexpected test mode: {test_mode}")
        # 2.2. Convert the target format to wav with sox
        sox_utils.convert_audio_file(
            tgt_path, tst_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth)
        # 2.3. Load with SciPy
        found = load_wav(tst_path, normalize=False)[0]

        # 3.1. Convert the original wav to target format with sox
        sox_encoding = _get_sox_encoding(encoding)
        sox_utils.convert_audio_file(
            src_path,
            sox_path,
            compression=compression,
            encoding=sox_encoding,
            bit_depth=bits_per_sample)
        # 3.2. Convert the target format to wav with sox
        sox_utils.convert_audio_file(
            sox_path, ref_path, encoding=cmp_encoding, bit_depth=cmp_bit_depth)
        # 3.3. Load with SciPy
        expected = load_wav(ref_path, normalize=False)[0]

        np.testing.assert_array_almost_equal(found, expected)


class TestSave(TestSaveBase, unittest.TestCase):
    @nested_params(
        [
            "path",
        ],
        [
            ("PCM_U", 8),
            ("PCM_S", 16),
            ("PCM_S", 32),
            ("PCM_F", 32),
            ("PCM_F", 64),
            ("ULAW", 8),
            ("ALAW", 8),
        ], )
    def test_save_wav(self, test_mode, enc_params):
        encoding, bits_per_sample = enc_params
        self.assert_save_consistency(
            "wav",
            encoding=encoding,
            bits_per_sample=bits_per_sample,
            test_mode=test_mode)

    @nested_params(
        [
            "path",
        ],
        [
            ("float32", ),
            ("int32", ),
        ], )
    def test_save_wav_dtype(self, test_mode, params):
        (dtype, ) = params
        self.assert_save_consistency(
            "wav", src_dtype=dtype, test_mode=test_mode)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/backends/sox_io/smoke_test.py
================================================
import io
import itertools
import platform
import unittest
if platform.system() == "Windows":
    import warnings
    warnings.warn("sox io not support in Windows, please skip test.")
    exit()

from parameterized import parameterized
from paddleaudio.backends import sox_io_backend
from common_utils import (get_wav_data, TempDirMixin, name_func)


class SmokeTest(TempDirMixin, unittest.TestCase):
    """Run smoke test on various audio format

    The purpose of this test suite is to verify that sox_io_backend functionalities do not exhibit
    abnormal behaviors.

    This test suite should be able to run without any additional tools (such as sox command),
    however without such tools, the correctness of each function cannot be verified.
    """

    def run_smoke_test(self,
                       ext,
                       sample_rate,
                       num_channels,
                       *,
                       compression=None,
                       dtype="float32"):
        duration = 1
        num_frames = sample_rate * duration
        #path = self.get_temp_path(f"test.{ext}")
        path = self.get_temp_path(f"test.{ext}")
        original = get_wav_data(
            dtype, num_channels, normalize=False, num_frames=num_frames)

        # 1. run save
        sox_io_backend.save(
            path, original, sample_rate, compression=compression)
        # 2. run info
        info = sox_io_backend.info(path)
        assert info.sample_rate == sample_rate
        assert info.num_channels == num_channels
        # 3. run load
        loaded, sr = sox_io_backend.load(path, normalize=False)
        assert sr == sample_rate
        assert loaded.shape[0] == num_channels

    @parameterized.expand(
        list(
            itertools.product(
                ["float32", "int32"],
                #["float32", "int32", "int16", "uint8"],
                [8000, 16000],
                [1, 2], )),
        name_func=name_func, )
    def test_wav(self, dtype, sample_rate, num_channels):
        """Run smoke test on wav format"""
        self.run_smoke_test("wav", sample_rate, num_channels, dtype=dtype)

    #@parameterized.expand(
    #list(
    #itertools.product(
    #[8000, 16000],
    #[1, 2],
    #[-4.2, -0.2, 0, 0.2, 96, 128, 160, 192, 224, 256, 320],
    #)
    #)
    #)
    #def test_mp3(self, sample_rate, num_channels, bit_rate):
    #"""Run smoke test on mp3 format"""
    #self.run_smoke_test("mp3", sample_rate, num_channels, compression=bit_rate)

    #@parameterized.expand(
    #list(
    #itertools.product(
    #[8000, 16000],
    #[1, 2],
    #[-1, 0, 1, 2, 3, 3.6, 5, 10],
    #)
    #)
    #)
    #def test_vorbis(self, sample_rate, num_channels, quality_level):
    #"""Run smoke test on vorbis format"""
    #self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level)

    @parameterized.expand(
        list(itertools.product(
            [8000, 16000],
            [1, 2],
            list(range(9)), )),
        name_func=name_func, )
    def test_flac(self, sample_rate, num_channels, compression_level):
        """Run smoke test on flac format"""
        self.run_smoke_test(
            "flac", sample_rate, num_channels, compression=compression_level)


class SmokeTestFileObj(unittest.TestCase):
    """Run smoke test on various audio format

    The purpose of this test suite is to verify that sox_io_backend functionalities do not exhibit
    abnormal behaviors.

    This test suite should be able to run without any additional tools (such as sox command),
    however without such tools, the correctness of each function cannot be verified.
    """

    def run_smoke_test(self,
                       ext,
                       sample_rate,
                       num_channels,
                       *,
                       compression=None,
                       dtype="float32"):
        duration = 1
        num_frames = sample_rate * duration
        original = get_wav_data(
            dtype, num_channels, normalize=False, num_frames=num_frames)

        fileobj = io.BytesIO()
        # 1. run save
        sox_io_backend.save(
            fileobj, original, sample_rate, compression=compression, format=ext)
        # 2. run info
        fileobj.seek(0)
        info = sox_io_backend.info(fileobj, format=ext)
        assert info.sample_rate == sample_rate
        assert info.num_channels == num_channels
        # 3. run load
        fileobj.seek(0)
        loaded, sr = sox_io_backend.load(fileobj, normalize=False, format=ext)
        assert sr == sample_rate
        assert loaded.shape[0] == num_channels

    @parameterized.expand(
        list(itertools.product(
            ["float32", "int32"],
            [8000, 16000],
            [1, 2], )),
        name_func=name_func, )
    def test_wav(self, dtype, sample_rate, num_channels):
        """Run smoke test on wav format"""
        self.run_smoke_test("wav", sample_rate, num_channels, dtype=dtype)

    # not support yet
    #@parameterized.expand(
    #list(
    #itertools.product(
    #[8000, 16000],
    #[1, 2],
    #[-4.2, -0.2, 0, 0.2, 96, 128, 160, 192, 224, 256, 320],
    #)
    #)
    #)
    #def test_mp3(self, sample_rate, num_channels, bit_rate):
    #"""Run smoke test on mp3 format"""
    #self.run_smoke_test("mp3", sample_rate, num_channels, compression=bit_rate)

    #@parameterized.expand(
    #list(
    #itertools.product(
    #[8000, 16000],
    #[1, 2],
    #[-1, 0, 1, 2, 3, 3.6, 5, 10],
    #)
    #)
    #)
    #def test_vorbis(self, sample_rate, num_channels, quality_level):
    #"""Run smoke test on vorbis format"""
    #self.run_smoke_test("vorbis", sample_rate, num_channels, compression=quality_level)

    @parameterized.expand(
        list(itertools.product(
            [8000, 16000],
            [1, 2],
            list(range(9)), )),
        name_func=name_func, )
    def test_flac(self, sample_rate, num_channels, compression_level):
        #"""Run smoke test on flac format"""
        self.run_smoke_test(
            "flac", sample_rate, num_channels, compression=compression_level)


if __name__ == '__main__':
    #test_func()
    unittest.main()


================================================
FILE: audio/tests/backends/sox_io/sox_effect_test.py
================================================
#code is from: https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/sox_effect/sox_effect_test.py
import io
import itertools
import platform
import tarfile
import unittest
from pathlib import Path

import numpy as np
if platform.system() == "Windows":
    import warnings
    warnings.warn("sox io not support in Windows, please skip test.")
    exit()

from parameterized import parameterized
from paddleaudio import sox_effects
from common_utils import (get_sinusoid, get_wav_data, load_wav, save_wav,
                          sox_utils, TempDirMixin, load_effects_params)


class TestSoxEffects(unittest.TestCase):
    def test_init(self):
        """Calling init_sox_effects multiple times does not crush"""
        for _ in range(3):
            sox_effects.init_sox_effects()


class TestSoxEffectsTensor(TempDirMixin, unittest.TestCase):
    """Test suite for `apply_effects_tensor` function"""

    @parameterized.expand(
        list(
            itertools.product(["float32", "int32"], [8000, 16000], [1, 2, 4, 8],
                              [True, False])), )
    def test_apply_no_effect(self, dtype, sample_rate, num_channels,
                             channels_first):
        """`apply_effects_tensor` without effects should return identical data as input"""
        original = get_wav_data(
            dtype, num_channels, channels_first=channels_first)
        expected = original.clone()

        found, output_sample_rate = sox_effects.apply_effects_tensor(
            expected, sample_rate, [], channels_first)

        assert (output_sample_rate == sample_rate)
        # SoxEffect should not alter the input Tensor object
        #self.assertEqual(original, expected)
        np.testing.assert_array_almost_equal(original.numpy(), expected.numpy())

        # SoxEffect should not return the same Tensor object
        assert expected is not found
        # Returned Tensor should equal to the input Tensor
        #self.assertEqual(expected, found)
        np.testing.assert_array_almost_equal(expected.numpy(), found.numpy())

    @parameterized.expand(
        load_effects_params("sox_effect_test_args.jsonl"),
        name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
    )
    def test_apply_effects(self, args):
        """`apply_effects_tensor` should return identical data as sox command"""
        effects = args["effects"]
        num_channels = args.get("num_channels", 2)
        input_sr = args.get("input_sample_rate", 8000)
        output_sr = args.get("output_sample_rate")

        input_path = self.get_temp_path("input.wav")
        reference_path = self.get_temp_path("reference.wav")

        original = get_sinusoid(
            frequency=800,
            sample_rate=input_sr,
            n_channels=num_channels,
            dtype="float32")
        save_wav(input_path, original, input_sr)
        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_sample_rate=output_sr)

        expected, expected_sr = load_wav(reference_path)
        found, sr = sox_effects.apply_effects_tensor(original, input_sr,
                                                     effects)

        assert sr == expected_sr
        #self.assertEqual(expected, found)
        np.testing.assert_array_almost_equal(expected.numpy(), found.numpy())


class TestSoxEffectsFile(TempDirMixin, unittest.TestCase):
    """Test suite for `apply_effects_file` function"""

    @parameterized.expand(
        list(
            itertools.product(
                ["float32", "int32"],
                [8000, 16000],
                [1, 2, 4, 8],
                [False, True], )),
        #name_func=name_func,
    )
    def test_apply_no_effect(self, dtype, sample_rate, num_channels,
                             channels_first):
        """`apply_effects_file` without effects should return identical data as input"""
        path = self.get_temp_path("input.wav")
        expected = get_wav_data(
            dtype, num_channels, channels_first=channels_first)
        save_wav(path, expected, sample_rate, channels_first=channels_first)

        found, output_sample_rate = sox_effects.apply_effects_file(
            path, [], normalize=False, channels_first=channels_first)

        assert output_sample_rate == sample_rate
        #self.assertEqual(expected, found)
        np.testing.assert_array_almost_equal(expected.numpy(), found.numpy())

    @parameterized.expand(
        load_effects_params("sox_effect_test_args.jsonl"),
        #name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
    )
    def test_apply_effects_str(self, args):
        """`apply_effects_file` should return identical data as sox command"""
        dtype = "int32"
        channels_first = True
        effects = args["effects"]
        num_channels = args.get("num_channels", 2)
        input_sr = args.get("input_sample_rate", 8000)
        output_sr = args.get("output_sample_rate")

        input_path = self.get_temp_path("input.wav")
        reference_path = self.get_temp_path("reference.wav")
        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
        save_wav(input_path, data, input_sr, channels_first=channels_first)
        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_sample_rate=output_sr)

        expected, expected_sr = load_wav(reference_path)
        found, sr = sox_effects.apply_effects_file(
            input_path, effects, normalize=False, channels_first=channels_first)

        assert sr == expected_sr
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(expected.numpy(), found.numpy())

    def test_apply_effects_path(self):
        """`apply_effects_file` should return identical data as sox command when file path is given as a Path Object"""
        dtype = "int32"
        channels_first = True
        effects = [["hilbert"]]
        num_channels = 2
        input_sr = 8000
        output_sr = 8000

        input_path = self.get_temp_path("input.wav")
        reference_path = self.get_temp_path("reference.wav")
        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
        save_wav(input_path, data, input_sr, channels_first=channels_first)
        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_sample_rate=output_sr)

        expected, expected_sr = load_wav(reference_path)
        found, sr = sox_effects.apply_effects_file(
            Path(input_path),
            effects,
            normalize=False,
            channels_first=channels_first)

        assert sr == expected_sr
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(expected.numpy(), found.numpy())


class TestFileFormats(TempDirMixin, unittest.TestCase):
    """`apply_effects_file` gives the same result as sox on various file formats"""

    @parameterized.expand(
        list(itertools.product(
            ["float32", "int32"],
            [8000, 16000],
            [1, 2], )),
        #name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}',
    )
    def test_wav(self, dtype, sample_rate, num_channels):
        """`apply_effects_file` works on various wav format"""
        channels_first = True
        effects = [["band", "300", "10"]]

        input_path = self.get_temp_path("input.wav")
        reference_path = self.get_temp_path("reference.wav")
        data = get_wav_data(dtype, num_channels, channels_first=channels_first)
        save_wav(input_path, data, sample_rate, channels_first=channels_first)
        sox_utils.run_sox_effect(input_path, reference_path, effects)

        expected, expected_sr = load_wav(reference_path)
        found, sr = sox_effects.apply_effects_file(
            input_path, effects, normalize=False, channels_first=channels_first)

        assert sr == expected_sr
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())

    #not support now
    #@parameterized.expand(
    #list(
    #itertools.product(
    #[8000, 16000],
    #[1, 2],
    #)
    #),
    ##name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}',
    #)
    #def test_flac(self, sample_rate, num_channels):
    #"""`apply_effects_file` works on various flac format"""
    #channels_first = True
    #effects = [["band", "300", "10"]]

    #input_path = self.get_temp_path("input.flac")
    #reference_path = self.get_temp_path("reference.wav")
    #sox_utils.gen_audio_file(input_path, sample_rate, num_channels)
    #sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)

    #expected, expected_sr = load_wav(reference_path)
    #found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first=channels_first)
    #save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)

    #assert sr == expected_sr
    ##self.assertEqual(found, expected)
    #np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())

    #@parameterized.expand(
    #list(
    #itertools.product(
    #[8000, 16000],
    #[1, 2],
    #)
    #),
    ##name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}',
    #)
    #def test_vorbis(self, sample_rate, num_channels):
    #"""`apply_effects_file` works on various vorbis format"""
    #channels_first = True
    #effects = [["band", "300", "10"]]

    #input_path = self.get_temp_path("input.vorbis")
    #reference_path = self.get_temp_path("reference.wav")
    #sox_utils.gen_audio_file(input_path, sample_rate, num_channels)
    #sox_utils.run_sox_effect(input_path, reference_path, effects, output_bitdepth=32)

    #expected, expected_sr = load_wav(reference_path)
    #found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first=channels_first)
    #save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)

    #assert sr == expected_sr
    ##self.assertEqual(found, expected)
    #np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())


    #@skipIfNoExec("sox")
    #@skipIfNoSox
class TestFileObject(TempDirMixin, unittest.TestCase):
    @parameterized.expand([
        ("wav", None),
    ])
    def test_fileobj(self, ext, compression):
        """Applying effects via file object works"""
        sample_rate = 16000
        channels_first = True
        effects = [["band", "300", "10"]]
        input_path = self.get_temp_path(f"input.{ext}")
        reference_path = self.get_temp_path("reference.wav")

        #sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
        data = get_wav_data("int32", 2, channels_first=channels_first)
        save_wav(input_path, data, sample_rate, channels_first=channels_first)

        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_bitdepth=32)
        expected, expected_sr = load_wav(reference_path)

        with open(input_path, "rb") as fileobj:
            found, sr = sox_effects.apply_effects_file(
                fileobj, effects, channels_first=channels_first)
        save_wav(
            self.get_temp_path("result.wav"),
            found,
            sr,
            channels_first=channels_first)
        assert sr == expected_sr
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())

    @parameterized.expand([
        ("wav", None),
    ])
    def test_bytesio(self, ext, compression):
        """Applying effects via BytesIO object works"""
        sample_rate = 16000
        channels_first = True
        effects = [["band", "300", "10"]]
        input_path = self.get_temp_path(f"input.{ext}")
        reference_path = self.get_temp_path("reference.wav")

        #sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
        data = get_wav_data("int32", 2, channels_first=channels_first)
        save_wav(input_path, data, sample_rate, channels_first=channels_first)
        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_bitdepth=32)
        expected, expected_sr = load_wav(reference_path)

        with open(input_path, "rb") as file_:
            fileobj = io.BytesIO(file_.read())
        found, sr = sox_effects.apply_effects_file(
            fileobj, effects, channels_first=channels_first)
        save_wav(
            self.get_temp_path("result.wav"),
            found,
            sr,
            channels_first=channels_first)
        assert sr == expected_sr
        #self.assertEqual(found, expected)
        print("found")
        print(found)
        print("expected")
        print(expected)
        np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())

    @parameterized.expand([
        ("wav", None),
    ])
    def test_tarfile(self, ext, compression):
        """Applying effects to compressed audio via file-like file works"""
        sample_rate = 16000
        channels_first = True
        effects = [["band", "300", "10"]]
        audio_file = f"input.{ext}"

        input_path = self.get_temp_path(audio_file)
        reference_path = self.get_temp_path("reference.wav")
        archive_path = self.get_temp_path("archive.tar.gz")
        data = get_wav_data("int32", 2, channels_first=channels_first)
        save_wav(input_path, data, sample_rate, channels_first=channels_first)

        #       sox_utils.gen_audio_file(input_path, sample_rate, num_channels=2, compression=compression)
        sox_utils.run_sox_effect(
            input_path, reference_path, effects, output_bitdepth=32)

        expected, expected_sr = load_wav(reference_path)

        with tarfile.TarFile(archive_path, "w") as tarobj:
            tarobj.add(input_path, arcname=audio_file)
        with tarfile.TarFile(archive_path, "r") as tarobj:
            fileobj = tarobj.extractfile(audio_file)
            found, sr = sox_effects.apply_effects_file(
                fileobj, effects, channels_first=channels_first)
        save_wav(
            self.get_temp_path("result.wav"),
            found,
            sr,
            channels_first=channels_first)
        assert sr == expected_sr
        #self.assertEqual(found, expected)
        np.testing.assert_array_almost_equal(found.numpy(), expected.numpy())


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/backends/sox_io/sox_effect_test_args.jsonl
================================================
{"effects": [["allpass", "300", "10"]]}
{"effects": [["band", "300", "10"]]}
{"effects": [["bandpass", "300", "10"]]}
{"effects": [["bandreject", "300", "10"]]}
{"effects": [["bass", "-10"]]}
{"effects": [["biquad", "0.4", "0.2", "0.9", "0.7", "0.2", "0.6"]]}
{"effects": [["chorus", "0.7", "0.9", "55", "0.4", "0.25", "2", "-t"]]}
{"effects": [["chorus", "0.6", "0.9", "50", "0.4", "0.25", "2", "-t", "60", "0.32", "0.4", "1.3", "-s"]]}
{"effects": [["chorus", "0.5", "0.9", "50", "0.4", "0.25", "2", "-t", "60", "0.32", "0.4", "2.3", "-t", "40", "0.3", "0.3", "1.3", "-s"]]}
{"effects": [["channels", "1"]]}
{"effects": [["channels", "2"]]}
{"effects": [["channels", "3"]]}
{"effects": [["compand", "0.3,1", "6:-70,-60,-20", "-5", "-90", "0.2"]]}
{"effects": [["compand", ".1,.2", "-inf,-50.1,-inf,-50,-50", "0", "-90", ".1"]]}
{"effects": [["compand", ".1,.1", "-45.1,-45,-inf,0,-inf", "45", "-90", ".1"]]}
{"effects": [["contrast", "0"]]}
{"effects": [["contrast", "25"]]}
{"effects": [["contrast", "50"]]}
{"effects": [["contrast", "75"]]}
{"effects": [["contrast", "100"]]}
{"effects": [["dcshift", "1.0"]]}
{"effects": [["dcshift", "-1.0"]]}
{"effects": [["deemph"]], "input_sample_rate": 44100}
{"effects": [["dither", "-s"]]}
{"effects": [["dither", "-S"]]}
{"effects": [["divide"]]}
{"effects": [["downsample", "2"]], "input_sample_rate": 8000, "output_sample_rate": 4000}
{"effects": [["earwax"]], "input_sample_rate": 44100}
{"effects": [["echo", "0.8", "0.88", "60", "0.4"]]}
{"effects": [["echo", "0.8", "0.88", "6", "0.4"]]}
{"effects": [["echo", "0.8", "0.9", "1000", "0.3"]]}
{"effects": [["echo", "0.8", "0.9", "1000", "0.3", "1800", "0.25"]]}
{"effects": [["echos", "0.8", "0.7", "700", "0.25", "700", "0.3"]]}
{"effects": [["echos", "0.8", "0.7", "700", "0.25", "900", "0.3"]]}
{"effects": [["echos", "0.8", "0.7", "40", "0.25", "63", "0.3"]]}
{"effects": [["equalizer", "300", "10", "5"]]}
{"effects": [["fade", "q", "3"]]}
{"effects": [["fade", "h", "3"]]}
{"effects": [["fade", "t", "3"]]}
{"effects": [["fade", "l", "3"]]}
{"effects": [["fade", "p", "3"]]}
{"effects": [["fir", "0.0195", "-0.082", "0.234", "0.891", "-0.145", "0.043"]]}
{"effects": [["flanger"]]}
{"effects": [["gain", "-l", "-6"]]}
{"effects": [["highpass", "-1", "300"]]}
{"effects": [["highpass", "-2", "300"]]}
{"effects": [["hilbert"]]}
{"effects": [["loudness"]]}
{"effects": [["lowpass", "-1", "300"]]}
{"effects": [["lowpass", "-2", "300"]]}
{"effects": [["mcompand", "0.005,0.1 -47,-40,-34,-34,-17,-33", "100", "0.003,0.05 -47,-40,-34,-34,-17,-33", "400", "0.000625,0.0125 -47,-40,-34,-34,-15,-33", "1600", "0.0001,0.025 -47,-40,-34,-34,-31,-31,-0,-30", "6400", "0,0.025 -38,-31,-28,-28,-0,-25"]], "input_sample_rate": 44100}
{"effects": [["oops"]]}
{"effects": [["overdrive"]]}
{"effects": [["pad"]]}
{"effects": [["phaser"]]}
{"effects": [["remix", "6", "7", "8", "0"]], "num_channels": 8}
{"effects": [["remix", "1-3,7", "3"]], "num_channels": 8}
{"effects": [["repeat"]]}
{"effects": [["reverb"]]}
{"effects": [["reverse"]]}
{"effects": [["riaa"]], "input_sample_rate": 44100}
{"effects": [["silence", "0"]]}
{"effects": [["speed", "1.3"]], "input_sample_rate": 4000, "output_sample_rate": 5200}
{"effects": [["speed", "0.7"]], "input_sample_rate": 4000, "output_sample_rate": 2800}
{"effects": [["stat"]]}
{"effects": [["stats"]]}
{"effects": [["stretch"]]}
{"effects": [["swap"]]}
{"effects": [["synth"]]}
{"effects": [["tempo", "0.9"]]}
{"effects": [["tempo", "1.1"]]}
{"effects": [["treble", "3"]]}
{"effects": [["tremolo", "300", "40"]]}
{"effects": [["tremolo", "300", "50"]]}
{"effects": [["trim", "0", "0.1"]]}
{"effects": [["upsample", "2"]], "input_sample_rate": 8000, "output_sample_rate": 16000}
{"effects": [["vol", "3"]]}


================================================
FILE: audio/tests/benchmark/README.md
================================================
# 1. Prepare
First, install `pytest-benchmark` via pip.
```sh
pip install pytest-benchmark
```

# 2. Run
Run the specific script for profiling.
```sh
pytest melspectrogram.py
```

Result:
```sh
========================================================================== test session starts ==========================================================================
platform linux -- Python 3.7.7, pytest-7.0.1, pluggy-1.0.0
benchmark: 3.4.1 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000)
rootdir: /ssd3/chenxiaojie06/PaddleSpeech/DeepSpeech/paddleaudio
plugins: typeguard-2.12.1, benchmark-3.4.1, anyio-3.5.0
collected 4 items

melspectrogram.py ....                                                                                                                                            [100%]


-------------------------------------------------------------------------------------------------- benchmark: 4 tests -------------------------------------------------------------------------------------------------
Name (time in us)                        Min                    Max                   Mean              StdDev                 Median                 IQR            Outliers         OPS            Rounds  Iterations
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
test_melspect_gpu_torchaudio        202.0765 (1.0)         360.6230 (1.0)         218.1168 (1.0)       16.3022 (1.0)         214.2871 (1.0)       21.8451 (1.0)          40;3  4,584.7001 (1.0)         286           1
test_melspect_gpu                   657.8509 (3.26)        908.0470 (2.52)        724.2545 (3.32)     106.5771 (6.54)        669.9096 (3.13)     113.4719 (5.19)          1;0  1,380.7300 (0.30)          5           1
test_melspect_cpu_torchaudio      1,247.6053 (6.17)      2,892.5799 (8.02)      1,443.2853 (6.62)     345.3732 (21.19)     1,262.7263 (5.89)     221.6385 (10.15)       56;53    692.8637 (0.15)        399           1
test_melspect_cpu                20,326.2549 (100.59)   20,607.8682 (57.15)    20,473.4125 (93.86)     63.8654 (3.92)     20,467.0429 (95.51)     68.4294 (3.13)          8;1     48.8438 (0.01)         29           1
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

Legend:
  Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile.
  OPS: Operations Per Second, computed as 1 / Mean
========================================================================== 4 passed in 21.12s ===========================================================================

```


================================================
FILE: audio/tests/benchmark/log_melspectrogram.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import urllib.request

import librosa
import numpy as np
import paddle
import paddleaudio
import torch
import torchaudio

wav_url = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
if not os.path.isfile(os.path.basename(wav_url)):
    urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))

waveform, sr = paddleaudio.backends.soundfile_load(
    os.path.abspath(os.path.basename(wav_url)))
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)

# Feature conf
mel_conf = {
    'sr': sr,
    'n_fft': 512,
    'hop_length': 128,
    'n_mels': 40,
}

mel_conf_torchaudio = {
    'sample_rate': sr,
    'n_fft': 512,
    'hop_length': 128,
    'n_mels': 40,
    'norm': 'slaney',
    'mel_scale': 'slaney',
}


def enable_cpu_device():
    paddle.set_device('cpu')


def enable_gpu_device():
    paddle.set_device('gpu')


log_mel_extractor = paddle.audio.features.LogMelSpectrogram(
    **mel_conf, f_min=0.0, top_db=80.0, dtype=waveform_tensor.dtype)


def log_melspectrogram():
    return log_mel_extractor(waveform_tensor).squeeze(0)


def test_log_melspect_cpu(benchmark):
    enable_cpu_device()
    feature_paddleaudio = benchmark(log_melspectrogram)
    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
    feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_paddleaudio, decimal=3)


def test_log_melspect_gpu(benchmark):
    enable_gpu_device()
    feature_paddleaudio = benchmark(log_melspectrogram)
    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
    feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_paddleaudio, decimal=2)


mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram(
    **mel_conf_torchaudio, f_min=0.0)
amplitude_to_DB = torchaudio.transforms.AmplitudeToDB('power', top_db=80.0)


def melspectrogram_torchaudio():
    return mel_extractor_torchaudio(waveform_tensor_torch).squeeze(0)


def log_melspectrogram_torchaudio():
    mel_specgram = mel_extractor_torchaudio(waveform_tensor_torch)
    return amplitude_to_DB(mel_specgram).squeeze(0)


def test_log_melspect_cpu_torchaudio(benchmark):
    global waveform_tensor_torch, mel_extractor_torchaudio, amplitude_to_DB

    mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu')
    waveform_tensor_torch = waveform_tensor_torch.to('cpu')
    amplitude_to_DB = amplitude_to_DB.to('cpu')

    feature_paddleaudio = benchmark(log_melspectrogram_torchaudio)
    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
    feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_paddleaudio, decimal=3)


def test_log_melspect_gpu_torchaudio(benchmark):
    global waveform_tensor_torch, mel_extractor_torchaudio, amplitude_to_DB

    mel_extractor_torchaudio = mel_extractor_torchaudio.to('cuda')
    waveform_tensor_torch = waveform_tensor_torch.to('cuda')
    amplitude_to_DB = amplitude_to_DB.to('cuda')

    feature_torchaudio = benchmark(log_melspectrogram_torchaudio)
    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
    feature_librosa = librosa.power_to_db(feature_librosa, top_db=80.0)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_torchaudio.cpu(), decimal=2)


================================================
FILE: audio/tests/benchmark/melspectrogram.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import urllib.request

import librosa
import numpy as np
import paddle
import paddleaudio
import torch
import torchaudio

wav_url = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
if not os.path.isfile(os.path.basename(wav_url)):
    urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))

waveform, sr = paddleaudio.backends.soundfile_load(
    os.path.abspath(os.path.basename(wav_url)))
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)

# Feature conf
mel_conf = {
    'sr': sr,
    'n_fft': 512,
    'hop_length': 128,
    'n_mels': 40,
}

mel_conf_torchaudio = {
    'sample_rate': sr,
    'n_fft': 512,
    'hop_length': 128,
    'n_mels': 40,
    'norm': 'slaney',
    'mel_scale': 'slaney',
}


def enable_cpu_device():
    paddle.set_device('cpu')


def enable_gpu_device():
    paddle.set_device('gpu')


mel_extractor = paddle.audio.features.MelSpectrogram(
    **mel_conf, f_min=0.0, dtype=waveform_tensor.dtype)


def melspectrogram():
    return mel_extractor(waveform_tensor).squeeze(0)


def test_melspect_cpu(benchmark):
    enable_cpu_device()
    feature_paddleaudio = benchmark(melspectrogram)
    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_paddleaudio, decimal=3)


def test_melspect_gpu(benchmark):
    enable_gpu_device()
    feature_paddleaudio = benchmark(melspectrogram)
    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_paddleaudio, decimal=3)


mel_extractor_torchaudio = torchaudio.transforms.MelSpectrogram(
    **mel_conf_torchaudio, f_min=0.0)


def melspectrogram_torchaudio():
    return mel_extractor_torchaudio(waveform_tensor_torch).squeeze(0)


def test_melspect_cpu_torchaudio(benchmark):
    global waveform_tensor_torch, mel_extractor_torchaudio
    mel_extractor_torchaudio = mel_extractor_torchaudio.to('cpu')
    waveform_tensor_torch = waveform_tensor_torch.to('cpu')
    feature_paddleaudio = benchmark(melspectrogram_torchaudio)
    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_paddleaudio, decimal=3)


def test_melspect_gpu_torchaudio(benchmark):
    global waveform_tensor_torch, mel_extractor_torchaudio
    mel_extractor_torchaudio = mel_extractor_torchaudio.to('cuda')
    waveform_tensor_torch = waveform_tensor_torch.to('cuda')
    feature_torchaudio = benchmark(melspectrogram_torchaudio)
    feature_librosa = librosa.feature.melspectrogram(waveform, **mel_conf)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_torchaudio.cpu(), decimal=3)


================================================
FILE: audio/tests/benchmark/mfcc.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import urllib.request

import librosa
import numpy as np
import paddle
import paddleaudio
import torch
import torchaudio

wav_url = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'
if not os.path.isfile(os.path.basename(wav_url)):
    urllib.request.urlretrieve(wav_url, os.path.basename(wav_url))

waveform, sr = paddleaudio.backends.soundfile_load(
    os.path.abspath(os.path.basename(wav_url)))
waveform_tensor = paddle.to_tensor(waveform).unsqueeze(0)
waveform_tensor_torch = torch.from_numpy(waveform).unsqueeze(0)

# Feature conf
mel_conf = {
    'sr': sr,
    'n_fft': 512,
    'hop_length': 128,
    'n_mels': 40,
}
mfcc_conf = {
    'n_mfcc': 20,
    'top_db': 80.0,
}
mfcc_conf.update(mel_conf)

mel_conf_torchaudio = {
    'sample_rate': sr,
    'n_fft': 512,
    'hop_length': 128,
    'n_mels': 40,
    'norm': 'slaney',
    'mel_scale': 'slaney',
}
mfcc_conf_torchaudio = {
    'sample_rate': sr,
    'n_mfcc': 20,
}


def enable_cpu_device():
    paddle.set_device('cpu')


def enable_gpu_device():
    paddle.set_device('gpu')


mfcc_extractor = paddle.audio.features.MFCC(
    **mfcc_conf, f_min=0.0, dtype=waveform_tensor.dtype)


def mfcc():
    return mfcc_extractor(waveform_tensor).squeeze(0)


def test_mfcc_cpu(benchmark):
    enable_cpu_device()
    feature_paddleaudio = benchmark(mfcc)
    feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_paddleaudio, decimal=3)


def test_mfcc_gpu(benchmark):
    enable_gpu_device()
    feature_paddleaudio = benchmark(mfcc)
    feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_paddleaudio, decimal=3)


del mel_conf_torchaudio['sample_rate']
mfcc_extractor_torchaudio = torchaudio.transforms.MFCC(
    **mfcc_conf_torchaudio, melkwargs=mel_conf_torchaudio)


def mfcc_torchaudio():
    return mfcc_extractor_torchaudio(waveform_tensor_torch).squeeze(0)


def test_mfcc_cpu_torchaudio(benchmark):
    global waveform_tensor_torch, mfcc_extractor_torchaudio

    mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cpu')
    waveform_tensor_torch = waveform_tensor_torch.to('cpu')

    feature_paddleaudio = benchmark(mfcc_torchaudio)
    feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_paddleaudio, decimal=3)


def test_mfcc_gpu_torchaudio(benchmark):
    global waveform_tensor_torch, mfcc_extractor_torchaudio

    mel_extractor_torchaudio = mfcc_extractor_torchaudio.to('cuda')
    waveform_tensor_torch = waveform_tensor_torch.to('cuda')

    feature_torchaudio = benchmark(mfcc_torchaudio)
    feature_librosa = librosa.feature.mfcc(waveform, **mel_conf)
    np.testing.assert_array_almost_equal(
        feature_librosa, feature_torchaudio.cpu(), decimal=3)


================================================
FILE: audio/tests/common_utils/__init__.py
================================================
from .case_utils import name_func
from .case_utils import TempDirMixin
from .data_utils import get_sinusoid
from .data_utils import load_effects_params
from .data_utils import load_params
from .parameterized_utils import nested_params
from .wav_utils import get_wav_data
from .wav_utils import load_wav
from .wav_utils import normalize_wav
from .wav_utils import save_wav

__all__ = [
    "get_wav_data", "load_wav", "save_wav", "normalize_wav", "load_params",
    "nested_params", "get_sinusoid", "name_func", "load_effects_params"
]


================================================
FILE: audio/tests/common_utils/case_utils.py
================================================
import os.path
import tempfile

#code is from:https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/common_utils/case_utils.py


def name_func(func, _, params):
    return f'{func.__name__}_{"_".join(str(arg) for arg in params.args)}'


class TempDirMixin:
    """Mixin to provide easy access to temp dir"""

    temp_dir_ = None

    @classmethod
    def get_base_temp_dir(cls):
        # If PADDLEAUDIO_TEST_TEMP_DIR is set, use it instead of temporary directory.
        # this is handy for debugging.
        key = "PADDLEAUDIO_TEST_TEMP_DIR"
        if key in os.environ:
            return os.environ[key]
        if cls.temp_dir_ is None:
            cls.temp_dir_ = tempfile.TemporaryDirectory()
        return cls.temp_dir_.name

    @classmethod
    def tearDownClass(cls):
        if cls.temp_dir_ is not None:
            try:
                cls.temp_dir_.cleanup()
                cls.temp_dir_ = None
            except PermissionError:
                # On Windows there is a know issue with `shutil.rmtree`,
                # which fails intermittenly.
                #
                # https://github.com/python/cpython/issues/74168
                #
                # We observed this on CircleCI, where Windows job raises
                # PermissionError.
                #
                # Following the above thread, we ignore it.
                pass
        super().tearDownClass()

    def get_temp_path(self, *paths):
        temp_dir = os.path.join(self.get_base_temp_dir(), self.id())
        path = os.path.join(temp_dir, *paths)
        os.makedirs(os.path.dirname(path), exist_ok=True)
        return path


================================================
FILE: audio/tests/common_utils/data_utils.py
================================================
import json
import os.path

import paddle
from parameterized import param
#code is from:https://github.com/pytorch/audio/blob/main/test/torchaudio_unittest/common_utils/data_utils.py with modification.

_TEST_DIR_PATH = os.path.realpath(os.path.join(os.path.dirname(__file__), ".."))


def get_asset_path(*paths):
    """Return full path of a test asset"""
    return os.path.join(_TEST_DIR_PATH, "assets", *paths)


def load_params(*paths):
    with open(get_asset_path(*paths), "r") as file:
        return [param(json.loads(line)) for line in file]


def load_effects_params(*paths):
    params = []
    with open(*paths, "r") as file:
        for line in file:
            data = json.loads(line)
            for effect in data["effects"]:
                for i, arg in enumerate(effect):
                    if arg.startswith("<ASSET_DIR>"):
                        effect[i] = arg.replace("<ASSET_DIR>", get_asset_path())
            params.append(param(data))
    return params


def convert_tensor_encoding(
        tensor: paddle.tensor,
        dtype: paddle.dtype, ):
    """Convert input tensor with values between -1 and 1 to integer encoding
    Args:
        tensor: input tensor, assumed between -1 and 1
        dtype: desired output tensor dtype
    Returns:
        Tensor: shape of (n_channels, sample_rate * duration)
    """
    if dtype == paddle.int32:
        tensor *= (tensor > 0) * 2147483647 + (tensor < 0) * 2147483648
    if dtype == paddle.int16:
        tensor *= (tensor > 0) * 32767 + (tensor < 0) * 32768
    if dtype == paddle.uint8:
        tensor *= (tensor > 0) * 127 + (tensor < 0) * 128
        tensor += 128
    tensor = paddle.to_tensor(tensor, dtype)
    return tensor


#def get_whitenoise(
#*,
#sample_rate: int = 16000,
#duration: float = 1,  # seconds
#n_channels: int = 1,
#seed: int = 0,
#dtype: Union[str, paddle.dtype] = "float32",
#device: Union[str, paddle.device] = "cpu",
#channels_first=True,
#scale_factor: float = 1,
#):
#"""Generate pseudo audio data with whitenoise
#Args:
#sample_rate: Sampling rate
#duration: Length of the resulting Tensor in seconds.
#n_channels: Number of channels
#seed: Seed value used for random number generation.
#Note that this function does not modify global random generator state.
#dtype: Torch dtype
#device: device
#channels_first: whether first dimension is n_channels
#scale_factor: scale the Tensor before clamping and quantization
#Returns:
#Tensor: shape of (n_channels, sample_rate * duration)
#"""
#if isinstance(dtype, str):
#dtype = getattr(paddle, dtype)
#if dtype not in [paddle.float64, paddle.float32, paddle.int32, paddle.int16, paddle.uint8]:
#raise NotImplementedError(f"dtype {dtype} is not supported.")
## According to the doc, forking rng on all CUDA devices is slow when there are many CUDA devices,
## so we only fork on CPU, generate values and move the data to the given device
#with paddle.random.fork_rng([]):
#paddle.random.manual_seed(seed)
#tensor = paddle.randn([n_channels, int(sample_rate * duration)], dtype=paddle.float32, device="cpu")
#tensor /= 2.0
#tensor *= scale_factor
#tensor.clamp_(-1.0, 1.0)
#if not channels_first:
#tensor = tensor.t()

#tensor = tensor.to(device)

#return convert_tensor_encoding(tensor, dtype)


def get_sinusoid(
        *,
        frequency: float=300,
        sample_rate: int=16000,
        duration: float=1,  # seconds
        n_channels: int=1,
        dtype: str="float32",
        device: str="cpu",
        channels_first: bool=True, ):
    """Generate pseudo audio data with sine wave.

    Args:
        frequency: Frequency of sine wave
        sample_rate: Sampling rate
        duration: Length of the resulting Tensor in seconds.
        n_channels: Number of channels
        dtype: Torch dtype
        device: device

    Returns:
        Tensor: shape of (n_channels, sample_rate * duration)
    """
    if isinstance(dtype, str):
        dtype = getattr(paddle, dtype)
    pie2 = 2 * 3.141592653589793
    end = pie2 * frequency * duration
    num_frames = int(sample_rate * duration)
    # Randomize the initial phase. (except the first channel)
    theta0 = pie2 * paddle.randn([n_channels, 1], dtype=paddle.float32)
    theta0[0, :] = 0
    theta = paddle.linspace(0, end, num_frames, dtype=paddle.float32)
    theta = theta0 + theta
    tensor = paddle.sin(theta)
    if not channels_first:
        tensor = paddle.t(tensor)
    return convert_tensor_encoding(tensor, dtype)


================================================
FILE: audio/tests/common_utils/parameterized_utils.py
================================================
from itertools import product

from parameterized import param
from parameterized import parameterized


def _name_func(func, _, params):
    strs = []
    for arg in params.args:
        if isinstance(arg, tuple):
            strs.append("_".join(str(a) for a in arg))
        else:
            strs.append(str(arg))
    # sanitize the test name
    name = "_".join(strs)
    return parameterized.to_safe_name(f"{func.__name__}_{name}")


def nested_params(*params_set, name_func=_name_func):
    """Generate the cartesian product of the given list of parameters.

    Args:
        params_set (list of parameters): Parameters. When using ``parameterized.param`` class,
            all the parameters have to be specified with the class, only using kwargs.
    """
    flatten = [p for params in params_set for p in params]

    # Parameters to be nested are given as list of plain objects
    if all(not isinstance(p, param) for p in flatten):
        args = list(product(*params_set))
        return parameterized.expand(args, name_func=_name_func)

    # Parameters to be nested are given as list of `parameterized.param`
    if not all(isinstance(p, param) for p in flatten):
        raise TypeError("When using ``parameterized.param``, "
                        "all the parameters have to be of the ``param`` type.")
    if any(p.args for p in flatten):
        raise ValueError(
            "When using ``parameterized.param``, "
            "all the parameters have to be provided as keyword argument.")
    args = [param()]
    for params in params_set:
        args = [param(**x.kwargs, **y.kwargs) for x in args for y in params]
    return parameterized.expand(args)


================================================
FILE: audio/tests/common_utils/sox_utils.py
================================================
import subprocess
import sys
import warnings


def get_encoding(dtype):
    encodings = {
        "float32": "floating-point",
        "int32": "signed-integer",
        "int16": "signed-integer",
        "uint8": "unsigned-integer",
    }
    return encodings[dtype]


def get_bit_depth(dtype):
    bit_depths = {
        "float32": 32,
        "int32": 32,
        "int16": 16,
        "uint8": 8,
    }
    return bit_depths[dtype]


def gen_audio_file(
        path,
        sample_rate,
        num_channels,
        *,
        encoding=None,
        bit_depth=None,
        compression=None,
        attenuation=None,
        duration=1,
        comment_file=None, ):
    """Generate synthetic audio file with `sox` command."""
    if path.endswith(".wav"):
        warnings.warn(
            "Use get_wav_data and save_wav to generate wav file for accurate result."
        )
    command = [
        "sox",
        "-V3",  # verbose
        "--no-dither",  # disable automatic dithering
        "-R",
        # -R is supposed to be repeatable, though the implementation looks suspicious
        # and not setting the seed to a fixed value.
        # https://fossies.org/dox/sox-14.4.2/sox_8c_source.html
        # search "sox_globals.repeatable"
    ]
    if bit_depth is not None:
        command += ["--bits", str(bit_depth)]
    command += [
        "--rate",
        str(sample_rate),
        "--null",  # no input
        "--channels",
        str(num_channels),
    ]
    if compression is not None:
        command += ["--compression", str(compression)]
    if bit_depth is not None:
        command += ["--bits", str(bit_depth)]
    if encoding is not None:
        command += ["--encoding", str(encoding)]
    if comment_file is not None:
        command += ["--comment-file", str(comment_file)]
    command += [
        str(path),
        "synth",
        str(duration),  # synthesizes for the given duration [sec]
        "sawtooth",
        "1",
        # saw tooth covers the both ends of value range, which is a good property for test.
        # similar to linspace(-1., 1.)
        # this introduces bigger boundary effect than sine when converted to mp3
    ]
    if attenuation is not None:
        command += ["vol", f"-{attenuation}dB"]
    print(" ".join(command), file=sys.stderr)
    subprocess.run(command, check=True)


def convert_audio_file(src_path,
                       dst_path,
                       *,
                       encoding=None,
                       bit_depth=None,
                       compression=None):
    """Convert audio file with `sox` command."""
    command = ["sox", "-V3", "--no-dither", "-R", str(src_path)]
    if encoding is not None:
        command += ["--encoding", str(encoding)]
    if bit_depth is not None:
        command += ["--bits", str(bit_depth)]
    if compression is not None:
        command += ["--compression", str(compression)]
    command += [dst_path]
    print(" ".join(command), file=sys.stderr)
    subprocess.run(command, check=True)


def _flatten(effects):
    if not effects:
        return effects
    if isinstance(effects[0], str):
        return effects
    return [item for sublist in effects for item in sublist]


def run_sox_effect(input_file,
                   output_file,
                   effect,
                   *,
                   output_sample_rate=None,
                   output_bitdepth=None):
    """Run sox effects"""
    effect = _flatten(effect)
    command = ["sox", "-V", "--no-dither", input_file]
    if output_bitdepth:
        command += ["--bits", str(output_bitdepth)]
    command += [output_file] + effect
    if output_sample_rate:
        command += ["rate", str(output_sample_rate)]
    print(" ".join(command))
    subprocess.run(command, check=True)


================================================
FILE: audio/tests/common_utils/wav_utils.py
================================================
from typing import Optional

import paddle
import scipy.io.wavfile


def normalize_wav(tensor: paddle.Tensor) -> paddle.Tensor:
    if tensor.dtype == paddle.float32:
        pass
    elif tensor.dtype == paddle.int32:
        tensor = paddle.cast(tensor, paddle.float32)
        tensor[tensor > 0] /= 2147483647.0
        tensor[tensor < 0] /= 2147483648.0
    elif tensor.dtype == paddle.int16:
        tensor = paddle.cast(tensor, paddle.float32)
        tensor[tensor > 0] /= 32767.0
        tensor[tensor < 0] /= 32768.0
    elif tensor.dtype == paddle.uint8:
        tensor = paddle.cast(tensor, paddle.float32) - 128
        tensor[tensor > 0] /= 127.0
        tensor[tensor < 0] /= 128.0
    return tensor


def get_wav_data(
        dtype: str,
        num_channels: int,
        *,
        num_frames: Optional[int]=None,
        normalize: bool=True,
        channels_first: bool=True, ):
    """Generate linear signal of the given dtype and num_channels

    Data range is
        [-1.0, 1.0] for float32,
        [-2147483648, 2147483647] for int32
        [-32768, 32767] for int16
        [0, 255] for uint8

    num_frames allow to change the linear interpolation parameter.
    Default values are 256 for uint8, else 1 << 16.
    1 << 16 as default is so that int16 value range is completely covered.
    """
    dtype_ = getattr(paddle, dtype)

    if num_frames is None:
        if dtype == "uint8":
            num_frames = 256
        else:
            num_frames = 1 << 16

    # paddle linspace not support uint8, int8, int16
    #if dtype == "uint8":
    #    base = paddle.linspace(0, 255, num_frames, dtype=dtype_)
    #dtype_np = getattr(np, dtype)
    #base_np = np.linspace(0, 255, num_frames, dtype_np)
    #base = paddle.to_tensor(base_np, dtype=dtype_)
    #elif dtype == "int8":
    #    base = paddle.linspace(-128, 127, num_frames, dtype=dtype_)
    #dtype_np = getattr(np, dtype)
    #base_np = np.linspace(-128, 127, num_frames, dtype_np)
    #base = paddle.to_tensor(base_np, dtype=dtype_)
    if dtype == "float32":
        base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_)
    elif dtype == "float64":
        base = paddle.linspace(-1.0, 1.0, num_frames, dtype=dtype_)
    elif dtype == "int32":
        base = paddle.linspace(
            -2147483648, 2147483647, num_frames, dtype=dtype_)
    #elif dtype == "int16":
    #    base = paddle.linspace(-32768, 32767, num_frames, dtype=dtype_)
    #dtype_np = getattr(np, dtype)
    #base_np = np.linspace(-32768, 32767, num_frames, dtype_np)
    #base = paddle.to_tensor(base_np, dtype=dtype_)
    else:
        raise NotImplementedError(f"Unsupported dtype {dtype}")
    data = base.tile([num_channels, 1])
    if not channels_first:
        data = data.transpose([1, 0])
    if normalize:
        data = normalize_wav(data)
    return data


def load_wav(path: str, normalize=True, channels_first=True) -> paddle.Tensor:
    """Load wav file without paddleaudio"""
    sample_rate, data = scipy.io.wavfile.read(path)
    data = paddle.to_tensor(data.copy())
    if data.ndim == 1:
        data = data.unsqueeze(1)
    if normalize:
        data = normalize_wav(data)
    if channels_first:
        data = data.transpose([1, 0])
    return data, sample_rate


def save_wav(path, data, sample_rate, channels_first=True):
    """Save wav file without paddleaudio"""
    if channels_first:
        data = data.transpose([1, 0])
    scipy.io.wavfile.write(path, sample_rate, data.numpy())


================================================
FILE: audio/tests/features/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: audio/tests/features/base.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import urllib.request

import numpy as np
import paddle
from paddleaudio.backends import soundfile_load as load

wav_url = 'https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav'


class FeatTest(unittest.TestCase):
    def setUp(self):
        self.initParams()
        self.initWavInput()
        self.setUpDevice()

    def setUpDevice(self, device='cpu'):
        paddle.set_device(device)

    def initWavInput(self, url=wav_url):
        if not os.path.isfile(os.path.basename(url)):
            urllib.request.urlretrieve(url, os.path.basename(url))
        self.waveform, self.sr = load(os.path.abspath(os.path.basename(url)))
        self.waveform = self.waveform.astype(
            np.float32
        )  # paddlespeech.audio.transform.spectrogram only supports float32
        dim = len(self.waveform.shape)

        assert dim in [1, 2]
        if dim == 1:
            self.waveform = np.expand_dims(self.waveform, 0)

    def initParams(self):
        raise NotImplementedError


================================================
FILE: audio/tests/features/test_istft.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest

import numpy as np
import paddle
from paddleaudio.functional.window import get_window

from .base import FeatTest
from paddlespeech.audio.transform.spectrogram import IStft
from paddlespeech.audio.transform.spectrogram import Stft


class TestIstft(FeatTest):
    def initParams(self):
        self.n_fft = 512
        self.hop_length = 128
        self.window_str = 'hann'

    def test_istft(self):
        ps_stft = Stft(self.n_fft, self.hop_length)
        ps_res = ps_stft(
            self.waveform.T).squeeze(1).T  # (n_fft//2 + 1, n_frmaes)
        x = paddle.to_tensor(ps_res)

        ps_istft = IStft(self.hop_length)
        ps_res = ps_istft(ps_res.T)

        window = get_window(
            self.window_str, self.n_fft, dtype=self.waveform.dtype)
        pd_res = paddle.signal.istft(
            x, self.n_fft, self.hop_length, window=window)

        np.testing.assert_array_almost_equal(ps_res, pd_res, decimal=5)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/features/test_kaldi.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest

import numpy as np
import paddle
import paddleaudio
import torch
import torchaudio
from base import FeatTest


class TestKaldi(FeatTest):
    def initParams(self):
        self.window_size = 1024
        self.dtype = 'float32'

    def test_window(self):
        t_hann_window = torch.hann_window(
            self.window_size, periodic=False, dtype=eval(f'torch.{self.dtype}'))
        t_hamm_window = torch.hamming_window(
            self.window_size,
            periodic=False,
            alpha=0.54,
            beta=0.46,
            dtype=eval(f'torch.{self.dtype}'))
        t_povey_window = torch.hann_window(
            self.window_size, periodic=False,
            dtype=eval(f'torch.{self.dtype}')).pow(0.85)

        p_hann_window = paddleaudio.functional.window.get_window(
            'hann',
            self.window_size,
            fftbins=False,
            dtype=eval(f'paddle.{self.dtype}'))
        p_hamm_window = paddleaudio.functional.window.get_window(
            'hamming',
            self.window_size,
            fftbins=False,
            dtype=eval(f'paddle.{self.dtype}'))
        p_povey_window = paddleaudio.functional.window.get_window(
            'hann',
            self.window_size,
            fftbins=False,
            dtype=eval(f'paddle.{self.dtype}')).pow(0.85)

        np.testing.assert_array_almost_equal(t_hann_window, p_hann_window)
        np.testing.assert_array_almost_equal(t_hamm_window, p_hamm_window)
        np.testing.assert_array_almost_equal(t_povey_window, p_povey_window)

    def test_fbank(self):
        ta_features = torchaudio.compliance.kaldi.fbank(
            torch.from_numpy(self.waveform.astype(self.dtype)))
        pa_features = paddleaudio.compliance.kaldi.fbank(
            paddle.to_tensor(self.waveform.astype(self.dtype)))
        np.testing.assert_array_almost_equal(
            ta_features, pa_features, decimal=4)

    def test_mfcc(self):
        ta_features = torchaudio.compliance.kaldi.mfcc(
            torch.from_numpy(self.waveform.astype(self.dtype)))
        pa_features = paddleaudio.compliance.kaldi.mfcc(
            paddle.to_tensor(self.waveform.astype(self.dtype)))
        np.testing.assert_array_almost_equal(
            ta_features, pa_features, decimal=4)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/features/test_kaldi_feat.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#import platform
import unittest

import kaldiio
import numpy as np
from kaldiio import ReadHelper
from paddleaudio.kaldi import fbank as fbank
#from paddleaudio.kaldi import pitch as pitch

# the groundtruth feats computed in kaldi command below.
#compute-fbank-feats  --dither=0 scp:$wav_scp ark,t:fbank_feat.ark
#compute-kaldi-pitch-feats --sample-frequency=16000 scp:$wav_scp ark,t:pitch_feat.ark


class TestKaldiFbank(unittest.TestCase):
    def test_fbank(self):
        fbank_groundtruth = {}
        with ReadHelper('ark:testdata/fbank_feat.ark') as reader:
            for key, feat in reader:
                fbank_groundtruth[key] = feat

        wav_rate, wav = kaldiio.wavio.read_wav('testdata/test.wav')
        fbank_feat = fbank(wav)
        fbank_check = fbank_groundtruth['test_wav']
        np.testing.assert_array_almost_equal(fbank_feat, fbank_check, decimal=4)

    #def test_pitch(self):
    #    pitch_groundtruth = {}
    #    if platform.system() != "Linux":
    #        pass
    #    with ReadHelper('ark:testdata/pitch_feat.ark') as reader:
    #        for key, feat in reader:
    #            pitch_groundtruth[key] = feat

    #    wav_rate, wav = kaldiio.wavio.read_wav('testdata/test.wav')
    #    pitch_feat = pitch(wav)
    #    pitch_check = pitch_groundtruth['test_wav']
    #    np.testing.assert_array_almost_equal(pitch_feat, pitch_check, decimal=4)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/features/test_librosa.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest

import librosa
import numpy as np
import paddle
import paddleaudio
from base import FeatTest
from paddleaudio.functional.window import get_window


class TestLibrosa(FeatTest):
    def initParams(self):
        self.n_fft = 512
        self.hop_length = 128
        self.n_mels = 40
        self.n_mfcc = 20
        self.fmin = 0.0
        self.window_str = 'hann'
        self.pad_mode = 'reflect'
        self.top_db = 80.0

    def test_stft(self):
        if len(self.waveform.shape) == 2:  # (C, T)
            self.waveform = self.waveform.squeeze(
                0)  # 1D input for librosa.feature.melspectrogram

        feature_librosa = librosa.core.stft(
            y=self.waveform,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=None,
            window=self.window_str,
            center=True,
            dtype=None,
            pad_mode=self.pad_mode, )
        x = paddle.to_tensor(self.waveform).unsqueeze(0)
        window = get_window(self.window_str, self.n_fft, dtype=x.dtype)
        feature_paddle = paddle.signal.stft(
            x=x,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=None,
            window=window,
            center=True,
            pad_mode=self.pad_mode,
            normalized=False,
            onesided=True, ).squeeze(0)

        np.testing.assert_array_almost_equal(
            feature_librosa, feature_paddle, decimal=5)

    def test_istft(self):
        if len(self.waveform.shape) == 2:  # (C, T)
            self.waveform = self.waveform.squeeze(
                0)  # 1D input for librosa.feature.melspectrogram

        # Get stft result from librosa.
        stft_matrix = librosa.core.stft(
            y=self.waveform,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=None,
            window=self.window_str,
            center=True,
            pad_mode=self.pad_mode, )

        feature_librosa = librosa.core.istft(
            stft_matrix=stft_matrix,
            hop_length=self.hop_length,
            win_length=None,
            window=self.window_str,
            center=True,
            dtype=None,
            length=None, )

        x = paddle.to_tensor(stft_matrix).unsqueeze(0)
        window = get_window(
            self.window_str,
            self.n_fft,
            dtype=paddle.to_tensor(self.waveform).dtype)
        feature_paddle = paddle.signal.istft(
            x=x,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=None,
            window=window,
            center=True,
            normalized=False,
            onesided=True,
            length=None,
            return_complex=False, ).squeeze(0)

        np.testing.assert_array_almost_equal(
            feature_librosa, feature_paddle, decimal=5)

    def test_mel(self):
        feature_librosa = librosa.filters.mel(
            sr=self.sr,
            n_fft=self.n_fft,
            n_mels=self.n_mels,
            fmin=self.fmin,
            fmax=None,
            htk=False,
            norm='slaney',
            dtype=self.waveform.dtype, )
        feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix(
            sr=self.sr,
            n_fft=self.n_fft,
            n_mels=self.n_mels,
            fmin=self.fmin,
            fmax=None,
            htk=False,
            norm='slaney',
            dtype=self.waveform.dtype, )
        x = paddle.to_tensor(self.waveform)
        feature_functional = paddleaudio.functional.compute_fbank_matrix(
            sr=self.sr,
            n_fft=self.n_fft,
            n_mels=self.n_mels,
            f_min=self.fmin,
            f_max=None,
            htk=False,
            norm='slaney',
            dtype=x.dtype, )

        np.testing.assert_array_almost_equal(feature_librosa,
                                             feature_compliance)
        np.testing.assert_array_almost_equal(feature_librosa,
                                             feature_functional)

    def test_melspect(self):
        if len(self.waveform.shape) == 2:  # (C, T)
            self.waveform = self.waveform.squeeze(
                0)  # 1D input for librosa.feature.melspectrogram

        # librosa:
        feature_librosa = librosa.feature.melspectrogram(
            y=self.waveform,
            sr=self.sr,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            fmin=self.fmin)

        # paddleaudio.compliance.librosa:
        feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
            x=self.waveform,
            sr=self.sr,
            window_size=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            fmin=self.fmin,
            to_db=False)

        # paddleaudio.features.layer
        x = paddle.to_tensor(
            self.waveform, dtype=paddle.float64).unsqueeze(0)  # Add batch dim.
        feature_extractor = paddle.audio.features.MelSpectrogram(
            sr=self.sr,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            f_min=self.fmin,
            dtype=x.dtype)
        feature_layer = feature_extractor(x).squeeze(0).numpy()

        np.testing.assert_array_almost_equal(
            feature_librosa, feature_compliance, decimal=5)
        np.testing.assert_array_almost_equal(
            feature_librosa, feature_layer, decimal=5)

    def test_log_melspect(self):
        if len(self.waveform.shape) == 2:  # (C, T)
            self.waveform = self.waveform.squeeze(
                0)  # 1D input for librosa.feature.melspectrogram

        # librosa:
        feature_librosa = librosa.feature.melspectrogram(
            y=self.waveform,
            sr=self.sr,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            fmin=self.fmin)
        feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)

        # paddleaudio.compliance.librosa:
        feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
            x=self.waveform,
            sr=self.sr,
            window_size=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            fmin=self.fmin)

        # paddleaudio.features.layer
        x = paddle.to_tensor(
            self.waveform, dtype=paddle.float64).unsqueeze(0)  # Add batch dim.
        feature_extractor = paddle.audio.features.LogMelSpectrogram(
            sr=self.sr,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            f_min=self.fmin,
            dtype=x.dtype)
        feature_layer = feature_extractor(x).squeeze(0).numpy()

        np.testing.assert_array_almost_equal(
            feature_librosa, feature_compliance, decimal=5)
        np.testing.assert_array_almost_equal(
            feature_librosa, feature_layer, decimal=4)

    def test_mfcc(self):
        if len(self.waveform.shape) == 2:  # (C, T)
            self.waveform = self.waveform.squeeze(
                0)  # 1D input for librosa.feature.melspectrogram

        # librosa:
        feature_librosa = librosa.feature.mfcc(
            y=self.waveform,
            sr=self.sr,
            S=None,
            n_mfcc=self.n_mfcc,
            dct_type=2,
            norm='ortho',
            lifter=0,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            fmin=self.fmin)

        # paddleaudio.compliance.librosa:
        feature_compliance = paddleaudio.compliance.librosa.mfcc(
            x=self.waveform,
            sr=self.sr,
            n_mfcc=self.n_mfcc,
            dct_type=2,
            norm='ortho',
            lifter=0,
            window_size=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            fmin=self.fmin,
            top_db=self.top_db)

        # paddle.audio.features.layer
        x = paddle.to_tensor(
            self.waveform, dtype=paddle.float64).unsqueeze(0)  # Add batch dim.
        feature_extractor = paddle.audio.features.MFCC(
            sr=self.sr,
            n_mfcc=self.n_mfcc,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            f_min=self.fmin,
            top_db=self.top_db,
            dtype=x.dtype)
        feature_layer = feature_extractor(x).squeeze(0).numpy()

        np.testing.assert_array_almost_equal(
            feature_librosa, feature_compliance, decimal=4)
        np.testing.assert_array_almost_equal(
            feature_librosa, feature_layer, decimal=4)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/features/test_log_melspectrogram.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest

import numpy as np
import paddle
import paddleaudio

from .base import FeatTest
from paddlespeech.audio.transform.spectrogram import LogMelSpectrogram


class TestLogMelSpectrogram(FeatTest):
    def initParams(self):
        self.n_fft = 512
        self.hop_length = 128
        self.n_mels = 40

    def test_log_melspect(self):
        ps_melspect = LogMelSpectrogram(self.sr, self.n_mels, self.n_fft,
                                        self.hop_length)
        ps_res = ps_melspect(self.waveform.T).squeeze(1).T

        x = paddle.to_tensor(self.waveform)
        ps_melspect = paddleaudio.features.LogMelSpectrogram(
            self.sr,
            self.n_fft,
            self.hop_length,
            power=1.0,
            n_mels=self.n_mels,
            f_min=0.0)
        pa_res = (ps_melspect(x) / 10.0).squeeze(0).numpy()

        np.testing.assert_array_almost_equal(ps_res, pa_res, decimal=5)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/features/test_spectrogram.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest

import numpy as np
import paddle
import paddleaudio

from .base import FeatTest
from paddlespeech.audio.transform.spectrogram import Spectrogram


class TestSpectrogram(FeatTest):
    def initParams(self):
        self.n_fft = 512
        self.hop_length = 128

    def test_spectrogram(self):
        ps_spect = Spectrogram(self.n_fft, self.hop_length)
        ps_res = ps_spect(self.waveform.T).squeeze(1).T  # Magnitude

        x = paddle.to_tensor(self.waveform)
        pa_spect = paddle.audio.features.Spectrogram(
            self.n_fft, self.hop_length, power=1.0)
        pa_res = pa_spect(x).squeeze(0).numpy()

        np.testing.assert_array_almost_equal(ps_res, pa_res, decimal=5)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/features/test_stft.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest

import numpy as np
import paddle
from paddleaudio.functional.window import get_window

from .base import FeatTest
from paddlespeech.audio.transform.spectrogram import Stft


class TestStft(FeatTest):
    def initParams(self):
        self.n_fft = 512
        self.hop_length = 128
        self.window_str = 'hann'

    def test_stft(self):
        ps_stft = Stft(self.n_fft, self.hop_length)
        ps_res = ps_stft(
            self.waveform.T).squeeze(1).T  # (n_fft//2 + 1, n_frames)

        x = paddle.to_tensor(self.waveform)
        window = get_window(self.window_str, self.n_fft, dtype=x.dtype)
        pd_res = paddle.signal.stft(
            x, self.n_fft, self.hop_length, window=window).squeeze(0).numpy()

        np.testing.assert_array_almost_equal(ps_res, pd_res, decimal=5)


if __name__ == '__main__':
    unittest.main()


================================================
FILE: audio/tests/features/testdata/fbank_feat_txt.ark
================================================
test_wav  [
  8.86961 7.025289 6.664165 7.169617 7.317829 7.188704 8.351522 8.843228 7.711394 7.231504 6.903938 7.053499 7.293597 8.331067 7.871729 9.206844 9.434045 9.768963 10.01864 10.25888 10.68228 10.55968 10.62156 
  8.364346 7.526375 6.915925 6.705005 7.641569 7.827819 8.253532 7.794802 7.522578 7.222802 7.388284 7.493527 8.257078 9.141049 8.994849 9.348937 9.015431 9.343955 10.42236 10.13459 10.40709 10.39534 10.22199 
  7.230361 6.771988 6.422344 7.535786 7.164408 6.342811 8.723886 8.481328 6.804535 7.276428 7.471786 7.581892 8.757826 8.764767 8.570841 8.741215 9.756334 9.515329 9.720121 10.26671 10.67728 10.5581 10.61378 
  8.872903 7.371414 6.360067 6.153208 7.333708 8.0974 8.851793 8.730195 7.622618 7.169075 7.85181 7.04872 7.978426 8.408302 8.802312 8.395834 9.217923 8.381662 9.777567 10.40261 10.37856 10.31888 10.34441 
  7.532231 6.751142 8.175496 7.593341 8.063697 8.369373 7.881088 8.15251 7.428252 7.103447 7.989072 7.949497 8.124873 8.496659 8.553727 8.761693 9.109408 9.4684 10.13223 10.16827 9.71019 10.59482 10.90851 
  6.746207 6.833902 8.067636 7.6485 7.013279 7.693223 8.147296 8.030097 7.067122 8.186153 7.784977 8.756321 8.6457 8.458344 7.769485 8.669812 9.332602 9.097817 9.444702 9.950351 10.18657 10.94016 11.36237 
  7.128079 7.711221 6.469111 6.649592 6.508276 7.082622 7.440871 7.623552 7.594537 7.354738 8.278585 8.652099 8.709033 8.605441 8.353497 8.4307 9.50789 9.312969 9.785786 10.18853 10.35708 10.71134 11.13716 
  8.201805 8.373339 7.420624 6.559644 6.318965 6.861043 7.299667 7.459548 7.392651 7.774329 7.917755 8.39668 8.304465 7.503887 8.319722 9.310571 9.144365 9.811728 9.738833 9.825593 10.65616 10.42954 10.28108 
  8.140867 8.803907 7.842072 6.760153 6.040633 7.099484 7.671337 7.230731 7.280159 7.825505 8.67013 7.970374 7.691161 7.603088 8.121623 8.186304 8.884687 9.764359 9.774008 10.14217 10.57618 10.70955 10.46593 
  9.335081 7.443659 6.302314 6.274453 6.648484 7.101899 8.31965 7.279731 7.011406 7.738722 8.111496 7.826912 9.217668 8.704701 8.520703 9.237556 9.141415 9.966189 10.13588 10.02818 10.32232 10.5961 10.39195 
  8.623323 6.642638 6.092153 6.010561 6.961139 7.523609 7.631736 6.81785 6.3952 7.437364 7.784822 8.421501 8.569756 8.931039 8.394889 8.730085 8.568606 9.672934 10.10908 10.22258 10.30489 10.11329 10.44942 
  8.340079 7.404711 6.73463 6.621991 6.641872 6.501914 7.429152 7.325783 7.075316 7.198759 7.573832 7.726879 8.173958 8.713857 8.947656 9.52945 9.495684 10.04432 9.70799 9.853218 10.34904 10.31011 10.33581 
  9.458065 6.449831 5.764122 6.112749 6.633307 6.588135 7.683532 7.854395 7.497962 6.587214 7.522266 8.246718 7.842532 8.194103 8.769976 9.494311 9.410578 9.623136 9.262513 9.66307 9.958323 10.58842 10.94138 
  9.336168 7.658177 6.552251 4.668932 5.951582 7.329723 6.881893 7.673193 8.018956 7.765876 8.092113 7.657069 8.385877 8.804426 8.99403 9.033966 9.84664 9.652982 9.678547 9.901299 10.55994 10.98264 10.42628 
  8.845008 7.827737 6.934293 6.809871 7.603892 7.622493 7.295815 7.316114 7.707997 8.28838 7.455571 7.749361 8.357333 8.413839 8.780228 9.111949 8.773423 9.546294 9.316511 9.7621 9.853085 10.24652 10.58686 
  9.845795 7.653487 6.760835 6.417203 6.483883 8.249362 8.985138 7.782502 8.038197 7.994664 7.512061 7.712332 8.276911 8.76414 8.027122 8.730929 8.986988 9.50342 9.225771 9.793655 10.35338 10.19652 10.67655 
  8.068243 7.419188 7.323668 6.883723 7.588089 7.267737 7.464292 8.121238 7.117096 7.165044 7.638491 7.958453 8.368897 8.497821 8.06803 8.636445 9.186031 9.296571 9.991373 10.35782 10.44223 10.60756 10.9512 
  8.540399 7.109134 6.417945 6.851756 7.50145 7.613665 7.395747 8.489678 7.192803 8.40198 8.48846 7.516579 8.291675 9.133558 8.94935 9.040503 9.475376 9.886353 10.04679 10.23742 10.22118 10.67988 10.29023 
  9.943547 6.745187 7.141617 7.058182 7.203416 8.045156 7.670315 7.748672 7.009519 7.208478 7.261959 8.346151 8.425858 8.822375 8.973361 9.907825 9.710265 9.542497 9.383007 9.832958 10.30413 10.9831 10.99566 
  9.077311 7.305787 7.036552 6.220779 6.492191 6.642952 8.301676 8.177285 7.706949 7.897906 7.814847 7.765959 8.228884 8.499186 8.701291 8.90225 9.000106 9.510703 9.477421 9.869934 10.31142 10.33504 10.62863 
  9.609183 6.470312 6.850113 7.247727 6.606174 7.178535 7.569305 7.858948 7.907071 7.322339 7.393857 8.411836 8.555615 8.416198 8.268435 8.814535 9.016805 9.221167 8.856338 9.819329 10.63624 10.67038 10.4818 
  8.896682 6.96038 7.062835 6.841669 6.556721 7.257597 9.19041 8.196911 7.560658 7.475944 7.570982 8.699837 8.718691 8.698103 9.604801 9.410757 9.482757 9.489608 10.0253 10.21278 10.42956 10.25078 10.40438 
  10.05012 7.997848 8.523865 7.810099 7.064989 7.438704 8.866325 8.385101 8.702444 7.532755 6.970853 8.658536 8.904259 8.098039 8.240754 9.383977 9.924636 9.825594 10.16586 9.929379 10.33848 10.6166 10.88825 
  9.945673 7.943526 8.236642 7.982066 7.059935 7.337771 8.282602 7.669298 6.812744 7.241785 7.059946 8.290605 8.874635 8.206081 9.037805 9.692253 9.771944 9.658941 9.887684 9.687504 10.12401 10.5467 10.46784 
  9.24334 8.005972 8.029324 7.080775 6.692482 7.339699 6.330315 7.749064 7.191765 7.614157 7.584374 8.707373 8.633794 7.661246 8.89745 8.717183 8.727042 9.601094 9.761832 9.91165 10.89605 10.7561 10.4439 
  9.008655 7.696863 8.051159 6.21505 6.986367 7.75211 7.125566 8.008561 8.631255 7.692895 7.423654 8.070612 7.934104 7.481667 7.878049 8.699003 9.587379 9.81943 9.638152 10.14519 10.48706 10.67093 10.95845 
  9.56725 7.086317 7.604103 6.621353 6.861667 6.762026 7.150949 8.061196 7.548547 6.552682 7.445788 8.408651 8.276496 7.768121 8.235985 9.36837 9.303123 9.568222 9.907539 10.04948 10.3193 10.38864 11.01985 
  9.435867 7.065077 8.260999 7.606821 7.11696 6.622849 7.453804 7.729028 6.969114 7.593372 8.076168 8.142076 7.987474 7.750668 8.709545 9.194512 9.28443 9.696207 10.08809 10.43397 10.95729 10.62421 10.32698 
  7.326074 6.968657 7.991836 8.057238 8.277269 6.64677 8.417533 8.390658 7.716748 7.254366 8.046806 8.952176 8.943592 8.626341 8.475414 8.366508 9.379741 9.649915 9.956218 10.14107 10.0686 10.3009 10.63128 
  8.867029 7.436584 8.325071 8.151552 7.802304 7.204567 7.817641 8.41904 8.079826 7.800119 7.738661 8.019406 8.496906 8.371117 8.895446 8.874495 9.541505 9.603241 9.702045 9.520963 10.07405 10.22188 10.69203 
  7.892157 7.706676 8.708323 8.090714 7.298046 7.119635 7.677824 8.568966 9.065309 7.950867 7.361037 8.006032 7.704475 8.136228 9.241001 9.342211 9.766371 9.645364 10.53355 10.35027 10.26263 10.8308 10.88063 
  9.17825 6.552885 7.865911 7.314655 7.429624 7.467305 7.331274 8.097523 8.513691 7.603092 8.123087 8.988563 8.24368 7.665757 8.043156 9.323641 9.559673 10.4114 10.44213 10.46176 10.31303 10.41219 10.04096 
  9.499084 6.913695 8.680465 8.913202 7.983476 7.54614 8.20214 8.433187 7.619872 7.079637 7.59503 7.827682 8.021211 8.954014 8.512682 8.685501 9.039448 9.882102 10.09629 9.995301 10.25326 10.52707 10.41024 
  8.841949 7.852749 8.220187 8.067208 6.958006 6.525739 7.606658 8.743006 8.618779 7.857424 7.85938 6.839745 8.494206 7.93556 8.554915 9.17892 9.411014 9.403722 9.75618 9.915223 10.4127 10.62058 10.75261 
  8.80931 7.176473 8.226482 8.048065 6.875594 7.035853 8.159007 8.788584 7.998541 7.745961 7.02769 7.343524 8.768233 8.742394 8.993815 8.919962 8.948602 9.299537 9.644719 9.328181 10.0551 10.60812 10.26714 
  8.416738 7.433433 8.202932 7.654713 7.487177 7.454067 7.807778 8.21654 7.973643 7.745943 7.477229 7.699207 8.724244 8.921854 9.167027 9.329788 9.198338 9.449234 9.350556 9.504007 10.01113 10.77754 10.79311 
  7.265522 8.85788 8.794858 6.660202 5.630237 7.668158 8.690257 8.2572 7.200497 7.342714 7.748627 8.173976 8.632828 8.53996 9.053345 9.001765 9.227647 10.09744 9.63631 10.25264 9.908823 11.11253 11.10346 
  8.641815 8.838676 8.314915 6.847168 5.620167 7.186635 8.486069 8.301935 7.863872 7.790708 7.733249 8.113005 8.49118 7.827488 8.389672 8.932463 9.147495 10.10519 10.22344 10.33928 10.09212 10.47646 10.13575 
  7.634632 6.384978 8.24638 7.503924 6.99588 7.640463 7.480204 7.821477 7.610888 7.87086 8.010333 7.981211 7.871301 8.551961 8.90114 9.304276 8.653571 9.205483 9.727321 10.48769 10.20595 10.13756 10.41514 
  8.518332 7.862745 8.034936 8.2016 6.675735 7.573159 8.171587 8.14386 7.991887 6.929758 7.718138 7.759238 8.233613 7.833398 9.010793 8.673923 9.241878 10.12235 10.55527 10.50808 10.58219 10.75335 10.4664 
  8.728762 7.574676 6.733976 7.824086 8.000705 7.302856 7.860464 8.410678 7.924881 6.743472 7.201906 7.431501 7.915576 8.187505 8.869449 8.608752 9.654437 10.20065 9.895975 10.05015 10.0937 11.01951 10.82668 
  7.629794 6.486794 6.295053 7.050656 7.523815 7.511558 8.234637 8.500476 8.005258 7.928689 8.219633 8.856939 8.562517 8.864034 9.246393 9.440809 9.380711 10.00752 9.569251 9.957119 10.54277 10.3938 10.80297 
  9.755882 6.397912 7.59745 7.644662 7.147341 7.108955 8.413054 8.151018 7.909472 7.656272 9.413692 11.06461 10.69808 10.64652 11.38675 11.21603 11.1705 10.6691 9.91959 10.95611 11.29983 11.17031 10.97226 
  9.474765 7.803607 8.385871 6.384162 7.948147 7.78756 8.428943 8.824086 8.393611 8.634716 8.208453 8.799019 9.028046 9.011028 9.807654 10.60488 10.43945 10.06425 10.27431 9.998198 11.03344 10.98869 10.06162 
  8.450254 7.49551 6.799858 6.310681 7.20258 8.050933 8.205685 8.776224 7.973586 7.784925 7.730099 8.780946 9.105856 8.773993 8.182259 8.562423 9.90097 9.80205 10.24598 10.17385 10.20839 10.46425 10.26094 
  8.252859 7.287729 7.475165 7.372538 6.989303 7.400277 6.899447 8.107825 7.968466 7.659332 8.521437 9.400009 9.208941 8.744918 8.120193 8.727738 9.995106 9.742892 9.950342 9.993467 10.03006 10.28127 10.45164 
  8.592541 7.811454 7.721195 6.033926 7.451138 7.954978 7.936564 8.032441 8.207817 7.761642 7.824575 8.806037 8.584102 9.029242 9.405202 9.570443 10.0666 9.799012 10.14801 10.00847 10.26264 10.47974 10.30221 
  8.263712 7.622368 7.388706 6.853238 7.028842 8.005108 7.877354 8.477866 8.796792 8.305426 8.403102 8.24081 8.906131 8.901518 9.546893 9.517485 9.934557 9.7798 10.04766 9.671112 10.1303 10.74265 10.63667 
  9.357657 8.026215 7.907611 7.254067 6.912889 7.510985 8.215575 8.492505 8.151805 7.310562 8.163457 8.056206 8.009975 8.118791 8.972518 9.303927 9.412671 9.438314 9.8068 9.750667 10.35458 10.35285 10.09931 
  8.511848 7.523149 8.195268 7.966453 7.744322 8.485562 8.004887 7.922952 8.245237 7.383193 7.844249 8.127041 8.127259 8.443523 8.102531 8.774108 8.681726 9.477551 8.882012 9.804171 10.55685 10.48077 10.37192 
  7.975676 7.082146 7.933741 6.951503 6.272567 7.088233 7.917744 7.379583 8.238628 7.820266 8.475443 8.424726 8.05715 8.067741 8.94787 8.867351 9.374059 9.266661 9.49703 10.01138 10.3168 10.71612 10.27346 
  8.813721 8.656951 7.681757 7.203363 7.488046 8.216457 8.221495 7.714307 7.938663 7.708951 8.019526 7.926672 7.864351 7.698409 8.801851 8.555806 9.788485 9.553381 9.62245 10.2158 9.915847 10.40117 10.80687 
  6.477304 7.734454 8.373713 6.965827 8.077723 8.442548 7.893295 8.407219 7.909376 8.080144 8.55372 7.803288 6.868239 8.449481 8.860373 8.910608 9.437862 9.165442 9.82246 10.30201 10.48472 10.77984 11.18594 
  9.42877 10.45913 10.29159 9.716755 9.220295 8.658374 6.794528 6.873018 7.217855 8.288869 8.809176 8.809996 8.093638 8.87556 9.207784 9.647738 9.825712 10.08347 9.918015 10.10553 10.54475 11.02415 10.92163 
  13.23045 15.72255 15.14105 16.25381 15.65043 12.23466 10.81911 10.62931 10.80597 11.77175 12.40585 12.68368 13.26326 11.76209 10.96095 14.30546 15.08591 12.37251 12.23275 11.97119 10.67747 10.71765 10.87486 
  13.65519 16.94076 16.0239 18.20588 16.9831 13.7648 12.16126 11.07043 11.80799 12.73412 12.98298 12.90398 14.25156 11.9087 10.24638 14.79149 15.7653 13.01829 12.92099 13.79269 11.3934 11.02932 11.13173 
  13.2973 17.06046 16.08663 18.53358 17.69795 13.95008 12.40663 11.98961 12.74475 13.79677 14.19347 14.3372 14.89816 13.25463 12.57261 16.00417 16.9676 13.95466 13.9528 14.54457 12.07387 11.57656 11.49943 
  11.75321 16.53652 16.49979 19.33843 18.63615 13.93179 11.64835 11.64008 12.97903 13.87796 14.90638 15.14878 15.24947 14.3757 13.73179 15.41584 15.93701 14.80291 14.12144 14.58253 12.19397 12.21274 12.09839 
  12.51654 17.17333 16.62761 20.22687 19.77942 13.27525 11.65 12.57212 13.83558 14.21572 15.06257 15.13174 14.75701 15.54657 14.15343 15.183 16.24345 14.94379 14.57995 14.90003 12.3515 12.90885 12.08299 
  12.59919 17.27024 16.59142 20.83065 20.46106 14.01313 12.53942 13.63936 14.82475 14.79233 15.39594 15.28064 15.61105 15.19362 13.62629 15.96059 17.22206 15.25036 15.57805 15.82821 13.8464 14.34314 12.32957 
  12.87339 17.40207 16.69853 20.75185 20.27472 14.82241 13.57454 13.89783 14.86771 15.49655 15.69082 15.79947 15.61258 16.25987 14.84013 16.51847 18.01414 15.01476 15.3407 16.01681 14.13993 14.62834 12.70863 
  13.17359 17.59177 16.96258 19.95944 18.9106 15.22701 13.8382 13.31798 14.14737 14.76138 15.86723 16.08653 15.98195 16.09414 14.82789 16.4943 17.65386 15.15134 15.58843 16.19427 14.51803 15.25478 14.10639 
  13.61701 17.7865 17.24287 20.31397 19.26783 15.3012 14.27533 14.00652 14.06157 13.97732 15.41972 16.40127 16.03523 14.89164 14.54936 15.81458 16.71577 14.59401 15.69445 17.08231 15.19946 15.35905 13.67907 
  14.00184 17.83053 17.10755 20.32385 19.31859 16.05173 14.2608 13.70262 13.87365 13.57939 14.82414 15.85239 16.70191 15.73202 13.95968 14.90556 15.42773 15.31231 15.90333 16.87693 15.50014 15.44649 13.9608 
  14.57378 17.84243 16.76657 19.45782 18.36666 15.36792 13.44019 13.72313 14.92875 14.36976 14.25026 15.64569 15.89312 14.45428 13.3161 14.53451 15.62068 14.59393 15.80624 17.03548 14.96751 15.02374 13.4909 
  15.21447 18.18304 16.96745 19.16579 18.69295 14.50879 13.17371 13.65467 15.22889 14.17074 13.75207 14.95903 15.48484 14.01785 11.96252 13.67882 15.37274 14.30946 15.06645 16.43822 14.03593 14.029 12.96432 
  15.69557 18.73573 17.84295 20.32482 19.18899 14.03427 12.50394 12.18307 15.27329 13.74666 13.09154 14.42644 14.96374 13.87214 11.83368 12.93985 14.45116 14.00281 15.06362 16.09592 13.61044 13.31885 12.74485 
  15.98183 18.61076 17.23318 18.58419 17.75739 14.66514 11.80884 13.04507 15.05399 13.23005 12.31126 13.64777 13.9603 12.94314 11.54242 12.23427 13.46654 13.42021 14.35025 15.38576 13.45529 13.28467 13.43179 
  16.36983 18.49153 17.46072 19.55406 17.8304 14.26844 11.65388 12.54786 13.89278 12.02204 11.70842 12.86929 13.92763 13.19448 11.47678 11.02568 13.00155 13.05625 13.99241 14.93269 13.36193 12.62123 11.25201 
  16.91053 19.31395 17.91098 18.89643 16.86432 13.72694 11.87185 13.24254 14.00205 12.05257 10.76908 12.87949 13.28356 11.94633 11.05437 11.12533 12.75738 12.05501 13.33129 14.07069 12.3796 11.7192 11.05169 
  17.29174 19.6846 17.73157 17.35478 16.16297 14.15115 11.11073 12.40668 13.28362 11.18836 11.07174 12.54022 13.02938 11.88706 10.44743 11.13548 12.77041 12.04656 12.83903 12.8876 11.77514 11.40439 10.56045 
  17.23432 19.42053 17.20756 16.95235 16.25754 13.89139 11.49117 11.78105 11.14721 10.20588 10.31909 12.48863 12.60464 11.83299 12.36004 11.31453 12.29337 12.36164 12.43958 12.93882 11.9782 12.27852 11.67843 
  16.53074 18.71063 16.51751 16.94945 16.05041 12.74791 10.03071 10.72647 10.99932 9.982744 10.96635 12.68731 12.83623 11.30921 11.54913 10.90253 11.23106 11.50233 12.71762 13.18253 11.76622 12.38223 11.70104 
  15.69217 17.89815 15.7483 14.97377 14.53612 11.96516 11.12983 9.131196 10.96767 10.9673 12.05442 12.21859 12.5895 10.55106 10.79283 10.54876 11.01498 10.7606 11.54235 11.51353 10.85205 11.89782 11.11372 
  13.86923 16.57632 15.26331 14.85788 14.39067 12.7171 11.17243 11.00468 11.68423 13.25407 12.60769 12.59903 12.12048 12.90249 13.94144 13.63238 13.21792 12.05534 12.87838 14.90839 15.73015 16.02667 15.69104 
  13.61382 16.34682 14.90382 14.8893 14.40084 13.49422 12.6097 13.05071 13.73629 15.75304 15.30508 13.85447 13.98672 15.82224 16.32998 16.46949 16.51073 13.91432 14.74784 17.0687 18.13454 18.42613 18.47258 
  14.55605 16.97081 15.03501 15.3369 14.16967 11.95557 10.91977 11.08843 11.75058 13.24282 12.85878 12.96099 13.28478 15.59657 14.72278 15.30267 15.28392 13.16171 14.36229 17.22016 18.41889 19.45104 19.20117 
  15.47327 17.72926 15.78447 15.42446 15.40768 12.5729 10.11994 10.89366 12.10496 13.59284 13.03747 13.40623 12.94038 16.30528 16.32386 14.95233 14.70298 13.22234 14.0664 16.60508 17.89858 18.17508 18.00281 
  16.2211 18.26995 17.28866 17.57735 15.27353 13.3695 11.53266 11.68451 11.35421 11.92297 12.33619 13.06362 12.45224 15.49412 16.06547 15.92675 15.58576 12.58488 13.5124 15.40464 16.18043 15.33247 15.62229 
  16.40175 18.42591 17.84651 18.44906 15.41768 13.78307 12.2946 12.64999 11.22093 11.41927 11.63084 10.93246 11.98123 14.54343 16.20716 16.31189 15.47426 12.76656 13.13507 15.26896 15.09722 14.0725 14.46431 
  16.6757 18.54147 17.61383 18.17967 15.46035 13.83175 11.8658 12.33601 11.13389 11.20397 11.12461 11.22868 12.22488 13.94469 15.89769 16.34622 15.46319 12.74337 14.20451 15.74678 15.10971 14.52905 14.4963 
  16.76283 18.5887 17.68788 18.20937 15.80549 13.99685 12.14814 12.54838 10.8432 11.06379 11.41749 11.17507 11.48255 12.85203 14.78319 16.05849 16.08326 13.06919 13.94136 15.39909 14.80749 14.61425 14.66702 
  16.64995 18.30085 17.58634 18.3342 16.15151 14.58886 11.89366 12.30556 11.47828 11.79887 11.09667 11.30176 11.16868 14.35984 14.87677 16.13587 16.49911 13.06481 14.12372 15.51614 15.46192 15.01677 14.90916 
  16.52672 18.18384 17.63734 18.262 16.74109 15.33377 12.32343 12.43404 12.25445 12.18885 12.2143 11.90199 11.90509 14.61265 15.28587 16.9969 17.03483 13.91487 14.51653 15.79068 15.46326 15.15117 14.32447 
  16.60033 18.01448 17.0511 17.71783 17.75788 16.23282 12.81127 12.78385 12.44913 12.3739 12.64835 12.44617 11.88231 14.26506 14.75798 16.41036 16.44767 13.79646 14.08188 15.74754 15.21961 14.5229 14.27368 
  16.58554 18.17001 15.6192 15.79108 17.83907 16.49691 13.58467 13.87136 13.15251 15.34053 15.64338 14.75528 12.38052 13.428 14.05307 15.71102 16.18547 13.51842 13.2032 15.82146 15.4119 15.28212 14.9756 
  16.81706 18.37561 15.55623 15.33322 17.88172 16.29072 14.11057 14.21804 13.08518 13.69146 14.14797 14.73615 13.03893 13.11906 14.03352 14.95837 16.48228 13.51765 14.07395 15.32152 14.80149 14.38036 14.81253 
  16.95721 18.65568 15.88247 14.83851 17.85907 16.19847 14.35859 14.35917 13.97949 13.80073 14.48404 15.46158 12.86709 12.57524 13.31753 13.57888 14.20235 13.62078 13.80603 14.58132 14.26628 15.57101 15.81091 
  16.92929 18.59748 16.18815 16.21428 17.81978 16.08954 14.27445 14.10925 13.96899 14.1386 14.35728 14.62649 12.22072 11.41585 11.79407 12.69996 13.94304 14.1476 14.00437 14.08617 14.21517 16.62167 16.08522 
  16.71347 18.34732 16.52386 16.70664 17.32837 15.66043 14.25163 14.13859 14.01087 14.8577 14.49389 14.02374 12.22193 12.02307 12.09513 12.59532 14.0639 13.72341 13.91703 14.04354 14.41245 16.46469 16.29546 
  16.4208 18.01264 16.88099 17.1442 16.57642 14.64041 13.85327 13.94551 14.64954 14.75776 14.21291 14.0806 12.50921 12.25238 11.75339 12.30866 14.22572 14.74261 14.3222 14.56139 14.66986 16.32808 16.07767 
  16.02585 17.41488 16.75061 17.14062 15.60216 13.43982 13.3218 13.5059 15.12315 15.74747 15.12732 14.97423 11.94713 11.64201 11.86428 12.81159 14.43116 14.51448 14.0855 13.91175 13.53605 16.79836 16.27676 
  15.54676 16.81251 16.59787 16.86721 13.93739 12.5265 13.02697 13.58001 14.63541 15.0335 15.03953 14.87171 11.96232 11.17344 11.23595 12.35256 13.94297 14.62026 14.15592 14.0969 13.14401 15.85678 16.06257 
  15.29335 16.5896 16.2428 16.40902 13.19328 11.96381 13.15584 13.4573 13.83275 14.18077 14.74952 13.4333 11.54537 11.45848 11.20981 11.44254 13.91985 14.09482 13.83819 13.72444 12.47239 15.62782 15.47463 
  15.03695 16.41206 15.76506 15.74993 14.99031 12.53887 14.11596 13.88085 13.56149 13.86856 14.16082 13.18733 10.81969 11.83342 11.28586 11.12295 13.29487 13.72911 13.43846 13.1814 11.87428 13.31521 14.49123 
  15.19216 16.60078 14.61114 14.46758 15.01078 13.24106 14.01335 13.57776 12.27156 13.13906 14.44345 13.87597 11.4695 11.93211 11.51881 11.86933 13.54847 13.27814 13.21632 13.90559 14.1398 14.70719 14.13614 
  14.67921 16.36944 14.31438 14.29885 14.20024 12.58013 12.82334 12.93028 12.20578 13.60437 14.77291 14.12951 12.00097 12.08059 12.8358 12.58611 13.60779 13.56823 13.54014 15.80377 16.93598 17.07594 16.38759 
  14.09909 15.53763 14.35333 14.36311 13.46532 12.24172 11.28518 11.79358 12.87418 12.31148 14.00558 14.61462 12.33662 12.94618 13.78474 13.02037 13.37858 14.04721 15.38267 18.08268 19.1378 18.01291 17.87174 
  13.13866 14.28506 13.46247 13.33514 12.55707 11.26553 10.44495 10.7674 12.86463 12.67321 13.31184 12.77522 11.95182 13.00975 13.90651 13.2495 14.2042 14.69992 15.82987 18.51886 19.10749 19.2898 19.35597 
  12.87447 13.63893 12.77628 12.87282 11.82412 10.49559 11.2738 11.53943 12.46402 13.76956 13.33128 13.70105 13.19503 13.69975 14.5716 14.70375 14.28935 14.60292 15.83705 18.41346 19.52948 19.5496 19.50181 
  12.43892 14.00917 11.94452 12.23295 13.09062 11.04633 10.81847 11.03293 12.85549 13.9011 14.34009 14.35291 13.72982 13.88251 14.83228 14.69408 14.73889 15.0092 16.64098 19.92425 20.72796 20.62129 20.78949 
  12.57589 14.14755 12.76697 12.63588 12.86826 10.93601 10.81407 11.39892 13.70268 15.24565 15.25113 15.90769 15.04436 13.95307 14.30912 15.37132 15.7328 15.43253 17.88644 20.36236 20.81531 21.52759 20.81256 
  11.55697 13.42533 11.86985 12.06834 12.89067 12.34525 12.49656 12.66325 13.77704 15.66433 16.10908 16.22246 15.24011 14.59829 15.35136 15.66786 15.75713 15.76261 17.19726 20.20177 20.41369 21.18811 20.27664 
  11.83781 11.78476 10.88169 12.16515 12.8652 12.27359 12.28256 13.37679 14.12796 16.37236 16.86594 16.74899 13.97586 14.41109 15.27517 15.25704 16.5749 15.36801 16.12666 18.94821 19.08416 20.14678 19.19424 
  11.29138 12.42908 11.55172 11.38864 12.08098 11.84389 12.93314 13.31962 14.90897 16.77657 16.34854 15.3447 13.96065 14.13565 14.2487 14.35624 16.70179 14.96482 14.82867 17.15551 17.93615 17.96055 17.92477 
  11.65481 13.19278 11.53873 12.37848 11.75132 12.30172 13.64941 13.70718 15.32799 15.79656 16.56388 15.52489 14.06691 13.22153 13.50002 13.39991 15.37756 14.06801 14.822 16.09739 16.44575 16.48426 16.86766 
  10.83643 12.90215 11.20963 11.4503 12.48443 12.65709 14.0135 14.90167 14.40853 15.14135 14.69142 14.34933 13.56003 13.23642 13.07663 13.29242 14.56171 13.91872 14.96106 16.0597 15.73718 16.22322 16.35061 
  11.76764 14.94607 14.002 12.59242 13.17363 12.93256 14.04444 14.95929 14.73403 14.32073 13.673 13.46165 12.49231 11.98037 12.40751 12.22861 14.2252 13.65674 14.01322 15.1333 14.95369 15.47503 16.08985 
  13.89615 16.76128 15.37205 14.68637 14.55084 14.06249 14.60906 15.02118 14.96586 14.17659 13.32582 13.36398 12.40657 11.75675 12.20584 11.6482 13.49479 13.45353 13.88642 14.68982 14.64923 14.41646 14.38884 
  14.84046 17.53472 16.00968 16.66092 14.74903 14.52959 14.44483 14.51026 14.96541 13.62469 12.74818 12.63656 11.79429 10.7945 10.28362 10.71255 12.82129 13.17442 13.48379 14.43379 14.1143 13.92664 13.57136 
  15.24041 17.86895 16.36832 17.24933 15.26022 14.57038 14.41827 14.01725 15.12896 14.63774 13.50198 13.16672 10.8975 10.52911 10.55219 10.53394 12.4472 12.95718 13.381 14.41105 14.18102 13.43132 13.21059 
  15.70714 18.11394 16.63966 17.54475 15.6258 14.62249 13.44291 14.33535 15.30156 14.73038 12.71182 11.88729 11.21848 11.80892 11.28598 11.00321 12.07642 12.61302 12.96624 13.64967 13.5497 13.66227 15.18429 
  16.24144 18.45406 16.74632 17.26887 16.00562 15.20356 14.67932 14.5265 15.42036 15.9411 12.72794 11.28165 11.22411 11.92239 11.05458 10.47411 11.84302 12.45643 12.32757 13.31341 13.65614 14.01642 15.6444 
  16.61691 18.75608 16.65908 16.05809 16.49335 15.44349 14.24317 13.74861 15.73102 16.03167 12.45979 11.43863 12.06159 11.43628 11.58031 10.96982 12.73052 13.25921 12.43133 13.44257 13.17499 13.61122 15.05092 
  16.80972 18.82384 16.56598 15.89633 16.39415 14.86858 13.51579 13.62421 15.78119 15.96636 12.04702 12.23535 12.08618 11.75746 11.64274 11.03938 12.40111 12.2805 12.29179 13.94364 13.40477 14.06813 15.88864 
  16.63538 18.58344 16.62993 16.47061 16.84248 15.18759 14.40616 13.94959 15.81456 15.56275 13.3875 12.99495 11.75405 11.29325 11.58946 10.95365 11.4328 12.09967 12.70522 13.64252 13.03335 14.81887 16.09799 
  16.17998 17.74599 16.20119 16.20937 17.15302 15.38168 15.02195 14.70119 15.58851 15.20672 13.59827 13.00949 12.00444 11.9139 12.20626 11.42136 12.15257 13.27923 12.24949 13.81115 13.19789 14.65822 15.93385 
  15.63498 16.83412 15.9795 16.01633 16.73568 14.96041 15.20592 14.9941 15.25987 15.44159 13.90219 13.12603 12.1955 12.00728 11.73402 11.36467 12.2128 13.21522 12.46231 13.59565 12.94152 14.60943 15.73831 
  15.64667 16.88672 16.15878 16.30288 14.82288 12.78746 14.38334 14.24222 15.33887 15.6051 14.19205 13.4702 11.48957 11.67185 11.36558 11.50844 11.67504 12.6779 12.27801 13.62284 12.4663 13.766 14.50554 
  16.1116 17.70094 16.61898 16.63179 14.50537 13.00403 14.49458 14.1091 14.77081 16.03117 14.69265 13.87188 11.65564 11.77306 11.69971 10.80465 11.53343 12.87404 11.97025 13.6426 11.97943 14.26168 15.0657 
  16.30089 17.96895 16.63384 16.78599 15.04495 13.30599 14.34233 14.00725 14.74239 15.8376 14.84421 13.98984 11.77159 11.99564 11.91329 11.26901 12.10066 14.10714 12.30086 12.97296 11.96393 13.5807 13.9249 
  16.11331 17.73792 16.50445 16.31737 15.14762 13.18915 13.33717 13.48801 15.0379 14.98231 14.6054 13.371 10.87303 10.89191 11.57216 11.3191 12.55595 13.71982 12.37777 13.36162 12.34734 12.24435 12.79598 
  15.58554 16.86545 15.76826 16.14976 14.84043 12.69869 13.17145 12.83138 13.56788 13.88323 12.82279 11.39667 10.49319 10.13156 10.41932 9.690503 11.98269 12.59705 11.74903 12.81073 11.81398 12.05253 12.92546 
  15.40506 16.56629 15.7819 16.15461 15.12686 13.06547 13.38418 13.28866 11.46526 12.2537 12.2489 12.08193 10.35587 10.30065 10.11564 10.48493 11.5637 11.96607 11.58659 11.82748 10.88299 11.80397 12.83247 
  15.64443 17.13461 15.25182 15.23267 14.96755 12.78958 12.95011 12.96703 12.13067 12.01262 11.68263 11.51331 10.19018 10.40131 10.45652 9.893003 10.72811 11.31659 10.82685 11.00283 10.72475 11.77716 12.83387 
  15.71785 17.45413 15.5854 15.01698 14.3412 11.71642 10.96577 11.03219 11.98035 12.71681 11.61681 10.26505 9.601998 11.02885 11.42186 10.48678 11.01715 11.48228 11.22338 11.53144 10.79359 11.5774 11.99054 
  15.35344 17.22778 14.87986 13.97491 13.06646 10.3242 10.27509 10.32014 10.23042 10.61678 9.737593 10.01495 9.896219 9.359041 9.507567 9.67006 10.14279 10.65076 10.4929 10.57693 10.33899 11.15093 10.91731 
  14.73678 16.47514 14.2816 13.68988 13.41362 11.12034 10.6353 10.23782 10.21636 10.79559 10.0661 10.10203 9.259908 9.090837 9.185848 9.923974 9.884459 10.10081 10.24038 10.84077 10.28913 10.66675 10.87501 
  13.34543 15.10439 13.62741 13.43057 13.13955 11.26525 10.73161 10.48361 8.931073 10.65594 10.13154 10.01275 7.661518 8.794694 8.97205 9.528334 9.832764 10.47726 10.19211 10.24871 10.08061 10.30916 10.30075 
  10.66386 12.08491 13.25964 13.39106 12.19863 10.12935 11.24597 11.23391 9.032432 10.34383 9.701096 9.622985 8.836775 8.976686 8.384194 8.839561 9.518727 10.43447 10.18111 9.936262 10.31167 10.56191 10.62938 
  9.864835 11.36032 12.31589 12.7327 11.80678 10.03503 10.97492 10.7874 9.752045 8.775429 8.095765 8.616365 8.481807 8.58884 9.057997 9.505718 10.19115 10.05057 10.04556 10.14044 10.41077 10.46338 10.77466 
  8.924618 11.12906 12.26638 12.40822 11.61309 9.230251 10.80107 10.44947 9.52743 9.636349 8.786553 9.209707 8.387288 8.182937 8.243385 9.326593 9.774284 10.1685 10.02218 10.17269 10.70011 10.49688 10.61065 
  10.46031 12.47213 11.88194 11.74302 11.8674 10.72511 11.42089 11.52294 12.66962 13.42245 12.90012 11.16923 10.26218 10.81529 10.76803 10.16122 9.826 9.915348 9.86139 10.30795 11.00059 11.42232 11.84945 
  12.26625 14.16075 12.72903 14.60278 14.37007 13.44235 12.87462 13.86045 15.57594 16.44231 16.44123 15.36868 13.77279 15.25738 15.21306 14.52211 13.27622 12.97607 12.30169 14.6017 16.36056 17.2046 17.79084 
  12.88817 14.4487 12.22028 13.27812 13.9757 13.02691 12.24403 13.3259 14.29199 15.57484 15.93701 16.14005 13.74489 14.87244 15.91617 16.46932 15.27049 13.81515 13.83679 16.70215 18.24577 19.36378 19.67374 
  12.00124 15.37659 15.7605 14.22926 13.0174 13.10144 11.59302 12.30477 13.87144 16.48501 16.24954 14.65436 13.44942 13.95702 15.1756 15.80813 15.05493 13.84618 12.97811 14.7254 15.89062 17.37466 17.80906 
  11.40193 16.41908 17.10656 18.07596 18.44994 14.86982 11.46446 13.08391 15.70103 16.6888 15.61081 14.1348 12.79728 13.32965 13.90944 13.99649 13.78145 12.89573 11.9253 13.4449 14.24946 14.72222 15.25539 
  11.44434 16.72986 17.13328 18.86685 20.06189 16.01924 13.17847 12.7762 17.20245 16.84163 15.03655 13.80424 11.7222 13.07261 13.83397 13.96507 13.21282 12.76647 11.96956 12.39547 12.90304 13.23651 13.36372 
  10.10038 17.35346 17.76299 18.8912 20.46964 16.98438 14.85764 12.72846 16.86143 16.67736 14.81743 13.0213 12.2606 12.59028 13.18198 14.63323 13.8833 13.29084 11.56619 12.33959 12.88427 13.18531 13.93617 
  11.39528 17.46929 17.92314 19.14353 20.62991 16.99307 15.65434 13.20797 15.76896 15.81176 13.97282 12.77619 11.2707 11.88343 12.92847 13.32276 13.7076 13.56002 11.07274 11.85992 12.23533 13.28798 15.04418 
  11.27983 17.707 18.24305 19.08735 20.56552 17.11422 16.33149 13.43726 14.52213 14.48029 13.66881 12.0331 11.1849 10.85902 12.006 12.63326 13.76072 13.75976 11.0928 11.39149 11.49694 13.85464 15.78272 
  10.4093 17.90809 18.4453 18.32235 20.13852 17.31362 17.00359 13.08082 13.30086 13.77682 13.61921 11.83928 11.08163 10.67048 11.6871 11.93983 13.32355 13.21966 11.60241 12.79556 10.68321 13.65361 15.63675 
  11.84527 18.06657 18.71371 18.15455 20.01794 17.42912 17.4063 13.26461 13.40163 13.82001 13.56594 12.01353 11.27739 11.76238 11.80556 12.06823 13.41709 13.49274 11.33103 12.29501 11.13933 13.40397 15.07014 
  11.17124 18.38477 19.04682 18.43799 20.28102 17.52682 17.30856 12.02493 13.38366 14.0256 12.96288 11.21294 11.00717 11.76898 11.5427 11.69634 12.18315 13.27228 12.24852 12.80867 11.54512 13.54795 14.91226 
  11.79911 18.40328 19.16708 18.38249 20.19378 17.33193 16.52722 12.74763 13.76983 14.43673 13.70339 10.60393 10.94744 11.23283 11.7214 10.93908 12.70267 13.61965 12.34297 12.93185 11.72894 13.06544 14.49717 
  11.53892 18.40511 19.1988 17.54394 19.24091 16.508 15.51558 11.58755 12.57494 12.96985 12.25272 10.15904 10.6575 10.79901 10.68679 10.70864 12.52 13.14616 11.88523 13.06559 11.90712 13.5841 14.07163 
  11.24507 18.51727 19.28405 15.55707 16.47013 15.3894 15.69263 12.4398 12.47237 12.42525 11.96187 10.67544 11.41625 11.04646 10.61635 10.52443 12.70697 13.38649 11.35408 12.26194 11.28505 13.45126 13.82557 
  11.66839 18.54571 19.34097 15.28023 16.99143 15.16772 15.61421 11.90284 12.48574 13.0267 11.92334 10.24209 11.53736 11.04919 10.13369 10.87534 12.32853 13.07113 11.1777 12.35379 11.42702 13.24903 13.47682 
  11.33042 18.52498 19.28227 15.3864 16.97915 14.79772 15.07273 11.31091 12.86657 13.35474 11.33974 9.940867 11.48389 10.81812 9.995729 10.97024 12.09934 12.44435 10.87501 12.38766 11.07067 13.29669 13.62211 
  11.3056 18.38368 19.13547 14.39207 16.36372 14.8709 14.99244 11.87787 12.69045 13.26806 11.46194 8.540175 10.51303 10.84584 10.27139 10.64636 11.47711 11.95679 10.57964 11.26044 10.76696 12.47983 12.5918 
  11.55642 18.09599 18.73145 15.37899 17.526 15.3725 15.14406 11.61225 12.45738 12.59422 10.7795 8.732772 9.834822 10.28893 9.671982 10.17871 10.94767 11.20057 10.26523 10.61838 10.52447 10.65674 11.38504 
  10.35312 17.61216 18.04445 15.89868 17.93677 15.43736 14.92465 10.95503 10.74119 11.10365 10.10367 8.769894 9.549344 9.448477 10.16095 9.991289 10.12919 10.67581 10.30081 10.2757 10.61878 11.08467 11.66054 
  10.57029 16.80257 17.43232 15.33712 17.40169 14.69754 13.7979 10.32226 10.54766 10.62139 9.538151 8.558439 8.763721 9.43328 9.823753 9.941569 10.20876 10.29988 9.839418 10.4947 10.67628 10.63656 11.36743 
  10.88069 16.40481 16.94452 14.63999 16.55998 13.75831 12.04815 10.07983 9.779847 9.41483 9.367539 8.638872 7.551649 8.591156 9.22356 9.253392 9.312204 10.12255 9.771526 10.66726 10.28885 10.70903 10.99041 
  10.68995 15.9614 16.66981 12.87561 15.32809 13.38852 13.34967 10.08965 9.267484 8.510974 8.937634 8.066225 7.492024 8.559677 9.379137 8.934621 9.395594 9.947574 9.762503 10.16802 10.52471 10.62313 10.52163 
  10.10682 15.73363 16.55744 12.98704 15.02876 13.22308 13.64594 9.306398 8.513678 8.840482 8.639846 8.813548 8.690471 9.372956 9.21609 9.271674 10.22423 10.17199 9.91317 10.44354 10.81481 10.71306 10.58567 
  10.53498 15.85452 16.48894 13.69206 15.73015 14.98417 13.35866 12.12415 12.66079 12.8636 12.57128 11.65822 10.42173 9.932479 10.30401 10.88577 10.69672 11.60742 10.67614 10.5448 11.20179 12.58635 13.41792 
  10.96499 15.49205 16.39072 14.58421 16.09194 14.12068 12.85947 10.9668 11.6846 11.61401 11.32033 10.1519 9.595464 9.126978 9.282423 9.871722 10.47498 11.93141 10.77083 10.79808 10.82797 12.26179 13.4949 
  10.02513 14.95054 15.62905 12.19269 13.6047 13.04639 12.49895 9.641859 10.20514 10.12763 9.829097 9.059594 8.440591 8.796917 9.133607 9.019018 10.19306 11.17522 10.55359 10.32446 10.81375 11.08818 12.5863 
  9.422004 14.69329 15.27407 12.65151 13.94985 12.99975 11.70039 9.886936 9.611949 10.09684 9.314202 9.766679 9.321723 8.8813 8.354034 8.528647 9.770875 10.59045 9.79849 10.28119 10.74567 10.74199 12.01325 
  11.32263 15.05788 15.25787 15.88041 15.08022 14.13194 12.75217 11.34698 9.656963 9.717936 9.331264 8.585823 9.124638 8.87212 8.442982 8.890512 9.943332 10.48704 10.23122 10.30991 10.68609 11.49976 11.52623 
  13.40004 15.62923 17.28662 19.34645 17.7522 14.88166 15.30356 13.29276 11.17599 10.1756 8.496209 8.687905 8.767036 7.138587 7.905816 9.155358 9.870092 11.34072 10.64145 10.56284 10.93426 11.27267 11.83103 
  14.40832 16.92628 18.28235 19.92572 17.42484 16.51931 17.17439 13.85876 11.32172 10.88191 9.843261 8.557543 8.702249 8.43864 8.905317 9.214272 10.79284 11.19663 10.4797 10.33966 11.11496 11.59549 12.62567 
  15.07293 17.23655 18.7453 20.03891 18.20394 17.32358 16.8287 15.43667 13.0998 12.50691 10.92344 10.46035 9.055417 9.849059 9.764614 11.06528 13.47688 13.99895 12.79779 12.12013 14.56175 14.29823 14.93593 
  15.40606 17.37957 18.26671 19.24368 19.16639 17.99058 16.59817 14.71303 12.86606 12.04343 11.7385 9.705905 9.335643 9.112566 9.626029 10.41576 13.39695 13.71364 12.28225 11.58019 13.76471 13.65422 14.2695 
  15.64262 17.64327 17.19793 17.43849 19.78349 18.46711 16.14888 16.3467 13.90007 12.84516 12.72527 10.46768 9.428706 9.794108 10.62751 11.56921 13.06464 12.97483 12.06074 12.42412 15.20449 15.17994 15.41442 
  15.52258 17.33565 17.81485 18.67915 20.30933 18.47157 16.206 16.37829 16.18498 14.41841 13.32267 11.18768 9.268024 10.2243 10.31914 12.25662 14.13956 13.31647 12.61077 13.04033 15.24885 15.47131 15.91226 
  14.72044 16.24851 17.39948 18.02982 19.91796 18.15676 16.86558 16.89674 18.34361 15.89882 13.7806 11.83443 10.31598 9.947871 10.88693 13.04681 14.78731 14.09249 13.07614 13.30486 14.96555 15.89365 16.21339 
  12.94438 14.262 17.79223 17.94734 18.41548 17.99884 16.59502 17.46818 19.89834 17.3267 14.68132 12.74071 10.74997 11.11081 11.97623 14.37372 15.06054 13.62777 12.74747 14.17785 13.22865 15.07573 15.67369 
  11.9413 14.24815 17.19899 17.78853 19.11386 17.95886 17.36403 17.78174 19.56396 18.41936 15.83994 13.5232 11.30285 11.13494 12.42168 14.83165 15.21284 13.93472 14.22501 15.48852 14.69647 14.61711 15.37134 
  13.80032 15.34109 17.01766 17.4297 17.96654 17.89771 17.45484 16.8976 18.80153 19.64696 17.77114 13.78843 12.49898 11.85447 13.19477 15.72658 16.27253 14.45719 14.41123 16.36125 15.85711 15.05885 15.80895 
  14.42734 15.95719 16.8165 17.40023 17.72437 18.32386 18.0716 16.22936 18.2776 19.88119 18.43486 14.89068 13.09634 12.62326 14.15393 16.27906 16.78207 14.59756 14.30995 16.8557 16.74701 13.89337 15.01136 
  13.1981 15.42621 17.47243 17.46376 16.60345 17.87163 18.14639 15.80742 17.8999 18.86058 18.78569 15.2696 12.82393 12.26846 14.426 16.64136 16.84205 14.60646 14.58007 16.73913 16.8489 13.78548 15.02061 
  14.53758 16.3012 17.77195 17.30224 18.26668 17.24618 16.88194 15.98161 16.4734 18.86712 18.13484 16.05925 12.78708 12.62607 14.48131 16.29807 16.06342 14.38513 14.33487 16.47281 16.5306 13.28973 14.15116 
  13.29329 15.40082 17.00095 16.67248 17.51397 17.87264 17.22511 15.87745 16.17892 17.66627 18.2972 16.80437 14.20589 12.99962 14.83743 16.62644 15.83631 14.91555 14.99209 17.03086 17.57478 15.26948 15.94518 
  12.60118 14.90419 15.67589 16.70849 16.94395 16.61985 16.03358 15.05487 15.99355 17.37237 17.7354 17.29265 14.00175 12.25772 14.06873 15.4484 15.89154 14.37778 13.64605 15.54983 15.959 14.05933 14.62753 
  13.46687 15.90945 16.95177 16.9344 16.38556 16.47206 16.12689 15.33399 16.03906 17.06773 16.99802 17.40662 13.44633 12.45146 13.97935 15.02217 15.57034 14.04656 14.18954 15.08655 15.59686 13.81427 14.97765 
  13.90775 15.34541 16.23319 17.18387 17.43214 17.35052 16.137 15.33103 15.5793 15.9622 16.35075 16.71717 13.52683 12.29098 13.29745 15.11607 15.76651 13.92764 14.35643 15.45294 15.56918 14.29957 14.75815 
  12.64945 15.05234 16.66335 17.67402 17.42719 17.03349 15.29472 13.20675 14.79218 16.15494 16.06722 15.88496 14.37258 12.09099 13.10366 14.55416 15.20259 14.15096 14.36049 15.3579 14.84148 14.77236 15.21402 
  13.63886 15.13448 17.12833 17.25077 17.57319 16.51949 14.79358 14.33071 15.40003 15.31295 14.707 15.51841 15.19862 12.16728 12.38076 13.97288 15.01122 13.84911 14.36896 15.99493 15.19653 15.71203 15.79622 
  14.08951 15.27791 16.67466 17.38719 17.48137 15.49804 14.64399 13.99431 14.06412 14.96719 14.45313 15.07616 15.22869 11.85608 11.80214 13.4829 14.47193 13.28767 13.82474 15.54498 14.68076 15.78218 15.55037 
  13.55869 15.54187 15.30116 16.07854 15.19114 14.89549 14.28598 13.40435 12.58551 12.71723 13.51417 14.44852 13.74717 11.34321 11.73631 12.8637 13.46484 12.86962 13.4271 14.9927 16.06477 17.438 17.00769 
  14.03767 15.96629 15.7353 15.65322 14.90481 14.81491 13.83562 13.2442 12.39499 12.31491 12.90394 14.30139 13.67956 10.38161 11.00745 11.82061 12.68529 12.26423 13.03794 13.27276 13.62125 14.79159 14.69412 
  14.69835 16.23426 15.85394 14.956 15.55784 15.1201 13.12001 11.77338 12.35275 13.03831 12.70205 13.89809 13.49535 10.74578 10.39047 11.01446 12.48515 11.93507 12.14398 12.33028 12.54093 12.23906 11.828 
  13.74119 15.01042 14.63525 14.21064 15.04048 14.53166 13.30235 11.51442 11.54563 12.75932 12.87387 12.97661 12.22388 9.737226 8.855085 10.95898 12.41875 11.5811 11.65182 12.16752 11.78344 11.6825 11.58462 
  13.79247 14.65056 14.39971 14.56042 14.50872 14.48325 12.4725 11.32357 11.69767 11.76651 11.72759 11.75854 11.63687 9.03346 8.632442 10.05544 11.80099 11.283 11.6399 11.7433 11.51521 11.30795 11.44739 
  12.4817 13.96644 14.94901 14.22443 14.7217 14.6918 12.00653 11.63244 11.49402 12.39865 11.62017 11.95335 10.76735 9.714913 9.972126 10.44865 11.5843 10.77267 11.39896 11.29488 11.80279 12.45774 12.97729 
  10.99601 12.48925 13.91558 13.39563 12.76022 12.97499 10.67527 11.2872 11.05415 11.66541 11.35227 12.07333 10.91728 8.470509 9.390297 9.98071 10.65596 10.24184 11.05679 11.06191 11.15756 11.02771 10.34172 
  11.55889 13.16734 13.01277 13.71551 11.48029 11.84606 12.16008 11.11222 11.96976 12.21278 12.98672 12.49252 11.65213 12.38936 13.03628 13.42355 12.62503 11.73549 11.11625 12.92801 14.35214 14.86793 15.02906 
  10.42775 12.77446 12.08269 12.53821 11.88333 11.98844 12.13725 11.12525 12.48612 13.17795 13.38511 12.48855 11.81891 12.69561 13.0315 13.13174 12.79269 11.96432 12.52114 14.89753 16.66616 17.80948 17.48282 
  11.87147 13.35178 12.29651 13.07613 13.38078 12.15471 11.58152 10.57638 11.48294 11.66988 10.83331 10.95618 10.80718 12.60706 13.70898 13.6584 13.12783 12.36247 13.81388 16.36983 18.19867 19.32249 19.17922 
  11.74805 13.03258 11.83647 10.99839 11.53884 11.58733 9.78068 10.00441 11.77152 12.2512 12.23087 13.11207 12.52222 13.45929 14.83094 14.73086 14.22945 13.92837 15.33029 17.7077 19.34477 20.7841 21.06674 
  12.91207 13.92127 12.60293 11.2707 11.34953 11.66406 10.61612 10.41087 11.90737 12.03745 12.62049 12.73641 12.31181 15.63897 15.29306 15.0111 14.84808 15.03202 16.69465 18.62388 20.75715 22.16574 21.82943 
  11.44404 13.17524 11.91981 11.4003 11.81657 10.4973 11.08138 10.91362 11.54249 11.68209 11.96895 12.71395 12.69716 15.29071 16.25213 15.87629 15.68893 14.98099 17.12889 18.67869 20.67893 22.07556 22.39026 
  12.4943 14.38828 15.40293 14.53412 12.63508 10.61958 11.04276 10.82849 11.58219 11.86627 13.09368 11.76773 11.85476 15.56555 15.54773 14.94056 15.46294 14.71608 16.38725 18.32511 19.88208 20.835 20.72138 
  12.06502 16.88607 18.26798 18.51961 18.70255 16.0556 11.92916 12.43737 13.55841 13.30871 12.92406 12.8846 12.13316 15.64249 15.85884 15.73912 15.52897 14.45106 15.96146 17.69323 18.59188 19.02813 19.20515 
  11.12339 17.63247 18.67913 18.09006 20.21992 17.90995 12.6137 12.48078 12.48313 12.308 12.5629 13.46077 13.18955 15.1113 15.49067 16.01841 16.40285 15.20319 16.30907 17.38044 16.9833 16.74973 17.5288 
  11.91933 17.71219 18.231 18.7819 20.81494 18.03944 12.80576 12.62215 13.70358 14.11044 12.98072 13.73609 12.88998 16.17216 15.98503 15.40534 16.2627 14.74855 14.94189 16.8815 16.57269 16.14612 17.22886 
  10.80071 17.88858 18.37441 19.57571 20.85023 16.92813 14.54551 14.11315 14.60858 14.76622 14.24647 13.50114 13.44903 15.14018 15.77174 16.55495 16.70045 14.63158 14.95225 16.897 16.51554 16.39054 16.95824 
  10.11735 17.24759 17.42141 19.76315 20.66429 16.31513 15.35933 15.46266 14.2526 14.75529 14.8134 13.60172 13.60854 15.0717 15.27205 15.8339 16.31969 14.84996 15.4993 17.3387 16.82842 15.77378 16.33088 
  11.58894 15.94406 15.80265 19.82332 19.81112 16.33015 15.83328 16.12075 14.11878 13.67492 14.12749 13.19047 13.91129 14.98374 14.91681 15.74825 16.61319 15.2014 15.57807 17.29531 16.63654 15.11069 16.41315 
  12.11817 16.37547 16.11933 19.74942 19.52616 15.253 15.60251 15.62886 13.73787 12.94972 14.53512 13.22932 13.03554 14.42677 14.02384 14.75253 15.97637 14.65869 15.00835 17.03261 16.1345 14.50001 16.3125 
  12.40414 17.09673 16.56095 19.30345 18.83337 15.53385 15.46334 15.14222 13.0195 13.65549 14.05797 13.27337 13.1787 13.58766 13.74914 13.84215 14.70384 13.81669 14.36711 16.36557 16.29565 15.23389 17.42854 
  13.11347 16.62823 16.08442 18.65703 19.09037 15.8935 15.2197 14.41059 14.00731 14.22315 14.2603 13.98392 12.74996 12.83496 12.62025 12.43474 14.70534 14.04083 13.31598 15.78194 16.0346 15.06308 16.98965 
  13.64474 17.90531 17.46792 19.65909 19.84337 16.36866 15.0947 14.08054 14.56519 14.61388 14.04171 13.45949 11.97165 12.38503 12.46181 12.36982 13.73913 13.27664 12.86066 14.10406 13.75239 13.30816 15.46512 
  14.14659 17.85538 16.63506 17.69785 17.37612 14.4621 14.21311 14.33 14.55272 14.0266 13.34313 12.6474 11.67851 12.07374 11.39458 12.00585 13.00983 13.07051 12.53945 13.83851 13.02929 13.78781 15.03182 
  14.42228 17.95903 16.79436 17.2867 17.37424 14.4423 12.56357 13.39425 14.97358 14.38745 12.16811 12.31725 11.24478 11.62687 11.66598 11.82592 13.17022 13.03793 12.44336 12.93122 12.50479 13.24247 14.70499 
  14.74926 17.10213 16.10866 18.41386 17.76688 14.66671 12.0519 12.36404 14.64657 13.54079 11.52991 11.67943 10.9411 11.91207 12.82024 12.68861 12.79371 12.74705 12.14265 12.87091 12.87852 13.3739 14.64138 
  14.77377 17.52547 16.45933 18.03106 17.99345 14.37569 12.28482 12.38711 13.49167 12.96972 12.2464 11.9446 12.06746 13.32547 13.42399 12.804 13.77554 13.8069 12.38337 13.585 13.03204 13.04344 13.17559 
  14.33936 16.05802 15.20399 17.29784 16.82894 14.67938 13.40503 14.50885 16.62795 16.48669 15.85748 14.00688 13.75563 13.0535 13.17885 13.77508 14.15859 14.48487 13.08349 14.30881 13.77487 13.4736 14.49444 
  14.44479 16.99548 15.77001 17.10963 16.52903 14.38944 13.44239 14.06731 16.73449 17.81567 16.47066 14.45036 13.36218 11.99206 12.16488 12.52671 13.19155 13.33347 12.81071 14.34716 14.2478 14.17321 15.78081 
  14.02579 17.08305 15.59479 15.89314 16.72221 14.24861 13.47705 14.10365 17.43085 18.94127 17.04 14.55132 12.30284 11.67238 12.13895 11.74947 12.48997 12.62799 12.61127 14.23425 14.47474 14.48953 16.6846 
  12.98601 16.43487 15.30097 15.82755 15.67639 13.93307 12.06077 14.01533 17.31017 17.03305 16.06872 14.63635 13.17454 12.51504 12.19607 11.30268 12.40507 12.31048 13.08197 14.41822 14.24439 14.75511 16.61187 
  12.36717 14.63769 12.81204 14.45165 13.75739 14.0802 13.48325 14.81929 17.03195 17.97369 16.36871 14.08131 13.45929 13.48154 13.22458 13.06752 12.79095 12.71402 13.04852 14.30993 14.63789 14.94942 16.90777 
  11.93372 14.51598 12.94251 14.97276 14.5037 13.14109 12.6004 15.28386 17.44633 17.41194 15.19083 13.97468 12.30335 11.49983 12.01657 12.22904 12.7571 12.17981 13.11903 15.56299 14.76678 16.0535 16.8217 
  12.47566 14.19673 11.79418 12.79603 13.27397 13.03157 14.51644 15.58098 16.45773 16.79338 14.77104 13.59812 12.80408 11.51495 12.39793 12.11381 12.55917 12.43729 13.31075 15.56109 14.98054 15.78551 17.06624 
  12.2613 15.1282 13.99666 13.71514 14.27644 13.59662 14.62273 14.96381 16.1037 15.93762 14.79321 13.20536 12.09543 11.00414 11.58279 11.8383 12.9191 12.51326 13.47569 16.26331 15.60605 16.11314 16.97496 
  12.43962 14.93872 12.93799 13.37484 14.13616 13.94297 13.31241 15.01747 17.49583 16.37276 14.13916 12.80372 11.91654 10.96919 11.34828 11.55524 13.11632 12.65655 13.44765 15.82003 15.211 15.98718 16.41879 
  12.20704 14.56072 12.76487 14.66424 14.36893 13.81488 12.67005 14.58511 15.8139 15.15328 13.72443 12.0736 11.75713 10.39046 10.3162 11.07518 13.42291 12.18517 12.6754 14.56121 13.61762 14.73406 15.99769 
  12.26719 14.83184 13.45157 13.9031 13.37387 13.42337 13.16261 14.01834 13.45282 14.0881 12.42882 10.71366 10.67623 9.888527 9.556181 10.42893 12.93971 12.08191 12.30767 14.6603 13.51334 14.00465 15.0682 
  13.72396 15.37463 14.90107 14.14883 14.71334 14.23447 14.62967 15.41844 13.9625 13.62733 12.02162 10.61794 9.693156 9.38163 9.607562 10.24143 12.10383 11.62002 11.61677 13.37127 12.76904 12.98495 13.14606 
  12.20455 15.08801 16.28204 15.02526 14.91302 14.54155 16.05799 15.24317 13.17851 13.23946 11.95253 10.98236 9.365788 9.214337 9.505456 10.4926 10.92743 11.19956 12.08282 12.89608 12.90349 12.21022 12.78775 
  13.37813 14.82368 16.03482 15.19005 16.00402 15.02282 15.84676 15.76265 13.34728 12.43027 11.99226 11.34601 9.200621 9.239662 9.540243 11.00728 12.16874 11.82533 11.95636 13.50077 12.4501 12.26641 12.92042 
  12.66223 14.38916 15.40475 14.94944 16.37963 15.2842 15.38037 15.8695 13.61753 13.21642 11.7801 11.01452 9.542067 9.19785 9.696535 10.70263 12.5226 12.46992 12.17822 13.79243 13.24585 13.34737 13.80106 
  11.79797 13.75565 14.85456 15.8272 16.72862 15.18555 15.07867 15.58954 14.36672 13.03338 12.42223 11.01965 9.557907 9.226222 9.848079 10.37486 11.38179 11.42391 12.02948 13.78323 12.73048 13.11424 13.94043 
  12.11479 14.20692 15.95607 15.77075 15.94264 15.04848 14.68504 15.58499 16.73163 14.876 13.04782 11.34189 9.554831 9.312258 9.405445 10.54039 12.08341 11.93015 12.28186 14.35123 13.82728 13.614 15.07887 
  11.63898 13.24526 15.91903 15.04351 15.44301 14.33202 14.69388 15.04394 16.66434 15.8622 13.70809 11.39564 9.984374 9.255429 9.771338 10.38831 12.17067 12.11052 12.20296 14.49279 14.43369 13.75218 14.74203 
  12.24345 14.6031 16.65611 14.82507 16.38684 15.87055 14.16241 14.53406 16.72972 16.78175 14.29714 11.9692 10.40499 9.319419 10.12237 10.74756 13.3062 12.89304 12.51923 15.03736 15.23477 14.65409 15.5749 
  12.77314 15.07554 16.98285 15.57631 16.88236 15.60186 13.31313 14.22181 15.56985 16.22102 14.96699 12.77368 10.60659 9.910019 11.08181 11.14588 13.15701 12.80074 12.45314 14.14474 14.5937 14.61545 14.75511 
  11.81499 14.35236 15.86184 15.37682 15.91093 14.51173 12.01213 12.23065 14.53028 15.26377 14.16712 12.59848 10.46429 9.104154 9.218637 9.824287 12.90368 12.61128 11.938 13.29911 13.48286 12.61785 12.90715 
  13.13115 13.74104 13.82935 14.62105 13.24596 12.30486 11.74188 12.57558 13.35646 14.14618 12.51182 9.92897 9.892784 8.203323 8.843456 9.73543 11.10688 11.40638 11.2225 11.48934 11.82256 11.33256 11.55696 
  13.5014 14.46064 13.74532 13.76357 14.51184 13.47387 12.36466 13.09748 12.37077 13.78026 12.56234 10.40441 9.652602 9.258573 9.640882 10.48315 11.37579 11.28799 11.69723 12.14306 11.93345 12.913 12.47232 
  12.11707 14.41063 14.2667 13.93633 13.81357 11.92622 11.39311 12.56514 11.91435 12.2172 11.83334 10.78043 8.693699 9.34746 10.5232 10.74675 11.27402 11.17691 11.69285 11.59679 11.87287 12.48645 13.45397 
  12.73287 14.28872 14.12864 14.03616 13.14685 12.7475 12.18416 12.10903 11.98855 12.79712 14.14206 13.79637 12.14143 12.49956 13.05192 13.06696 12.86376 11.90433 12.45795 13.79512 14.72743 15.6082 16.03671 
  13.99284 14.59781 15.86497 15.46929 15.35721 14.4506 13.86872 13.70982 12.77764 13.87713 16.70483 16.12365 13.74394 14.14882 14.72046 14.79044 14.55702 13.26187 13.4715 15.52675 16.2828 17.03688 16.70895 
  13.34487 14.76827 16.66223 16.6201 16.95358 16.41091 14.59964 14.13615 14.98613 15.52808 16.94203 16.33436 13.65864 12.67898 12.89719 13.15985 13.87055 13.7345 13.81801 15.46476 15.5521 15.17291 16.18393 
  11.05816 13.38546 16.02215 16.0239 16.26703 16.0995 16.00097 15.06918 15.39436 16.33123 17.5566 17.69474 14.73381 12.76004 13.46956 13.62351 14.9516 14.46614 13.86967 15.92621 15.77614 15.14017 16.05052 
  13.44674 14.49088 16.24643 15.87667 15.80228 16.08157 16.27578 16.11953 15.71647 16.77408 17.38955 17.5013 14.82175 12.91548 12.99751 13.53425 14.71743 14.50213 14.5002 15.80028 15.50727 14.7684 16.64701 
  14.21669 14.68321 15.45795 15.69659 15.90129 16.00284 16.18194 16.61241 15.75285 16.67905 17.6036 17.33829 14.63122 13.52661 13.50696 13.30114 14.47372 14.06641 14.2557 16.1715 16.02145 14.45178 16.56786 
  13.89403 13.90886 13.8904 15.63989 16.239 16.28278 16.61728 16.93481 16.17262 16.97161 18.37757 18.09198 15.23442 13.31642 13.7317 13.67162 14.61277 14.06051 14.15388 16.12131 16.52321 14.41898 16.15249 
  13.62738 14.29071 14.26743 15.39755 16.17412 15.93856 16.81614 17.35265 17.03794 17.52091 19.20687 18.71931 15.41284 13.25959 14.02276 14.49864 15.36796 14.59919 14.92057 16.34993 16.91476 14.57673 16.4135 
  13.91304 14.38333 15.62968 15.92608 15.88913 15.46424 17.23949 17.6737 17.75764 18.32361 19.38836 18.81548 14.75313 13.59059 14.47715 15.14762 15.94664 14.38755 14.47487 16.32241 16.61687 13.88076 16.03177 
  13.85443 14.63751 16.63829 16.22158 16.00957 14.22576 17.54068 17.94837 17.70362 18.35151 19.07424 18.78005 15.38741 13.65261 14.89409 15.66142 16.46598 15.04117 14.94388 16.69312 17.13902 14.49912 16.41131 
  14.16736 15.20732 17.17613 15.65355 14.81104 14.93489 17.35307 17.69239 17.67073 17.9412 18.96013 18.70328 15.56409 14.20707 14.99696 15.9745 17.04941 15.52152 15.42804 16.55275 17.19493 14.82115 16.61891 
  14.55787 15.56332 17.40187 15.79714 15.16877 15.23872 17.43352 17.88713 17.42919 17.76297 19.16017 18.89678 16.21667 14.49569 15.32538 16.22569 17.27599 15.67997 14.60294 16.08368 16.37376 14.28088 16.07994 
  14.28819 15.29579 16.9726 15.32225 14.96721 14.86766 17.3646 17.55653 16.88566 16.39165 17.97582 17.84254 15.61118 13.325 13.99773 15.29101 16.41687 14.61869 14.14026 15.54824 15.78234 13.5513 15.05351 
  12.89534 14.19264 16.16715 15.45553 14.97155 14.18898 16.54575 16.86909 17.18435 16.91364 17.95696 17.44936 14.9484 14.08921 14.70956 16.02505 16.75773 14.69309 14.29639 15.83831 15.93069 13.20353 15.10151 
  11.07287 14.01731 15.98684 15.85503 15.7026 14.17124 15.66209 16.81634 16.69689 17.06315 17.20102 16.85413 14.60714 13.95707 14.35706 15.67642 16.10085 14.48048 13.75637 15.7046 15.97279 12.9786 14.57301 
  13.20215 14.88027 16.1391 15.30331 14.56342 14.65112 16.31849 16.70032 16.30379 16.66877 17.15785 17.35987 14.50481 13.62881 13.80476 16.18898 16.30147 13.83495 13.66053 14.89267 14.77247 12.55925 14.45503 
  13.06126 13.95905 14.72861 15.52299 15.57437 14.80034 15.9755 16.06871 15.0617 16.14234 17.04153 17.21765 14.65441 13.15693 13.40964 15.61284 16.12287 13.8438 13.76604 15.52066 15.88898 13.41924 15.27078 
  12.30977 13.65725 15.62276 15.12077 15.0009 14.32717 15.56222 15.53802 15.59415 16.0091 15.70288 16.7213 14.29013 13.25526 13.82748 14.57323 15.08817 13.3787 13.22771 15.28969 15.54399 12.92044 15.89454 
  13.2355 12.87303 14.13755 14.8824 14.98317 14.09578 15.47559 14.69907 14.95037 15.00035 15.11841 15.08056 14.14125 13.50607 13.85954 13.90839 15.27832 13.46804 13.2138 15.63453 15.70406 13.72386 16.19327 
  14.09465 13.80971 14.12421 15.16251 15.51726 14.75089 15.25305 14.35832 14.27529 14.6144 14.28123 15.06938 13.85847 13.27495 13.16495 13.6391 14.85534 13.8216 13.19121 14.84105 14.6136 13.33247 14.90938 
  14.17169 14.05926 14.38466 15.14356 15.18883 14.75758 14.63411 13.4688 13.57265 14.68271 14.24092 14.8144 14.59175 13.15273 12.52619 13.557 14.39644 13.34105 12.86075 15.03354 14.76084 13.2114 14.36096 
  12.94696 14.0161 14.38512 14.93382 15.28973 13.96826 13.00536 14.16216 12.7634 13.37025 13.69937 14.27973 14.28271 12.20191 11.58065 13.0691 13.86918 12.99897 12.07017 12.88836 13.36119 13.8559 13.72427 
  14.00163 14.74522 13.96561 13.84176 13.14617 12.28493 13.21515 13.64575 12.21422 12.96211 13.65326 13.52429 13.38388 11.56683 10.93623 12.6721 12.94885 11.75394 11.93754 12.60298 12.66667 13.39652 13.23497 
  14.01837 14.65643 14.34931 12.89079 12.44645 12.47288 12.34212 12.92817 12.88704 12.13856 13.52651 14.26985 13.20721 11.13747 10.50563 12.04486 13.04915 12.82268 12.21847 14.11812 14.85144 15.37979 15.23997 
  13.68709 14.73938 13.74758 13.45861 13.16367 13.00546 12.78111 12.54126 11.39514 11.72749 12.81818 13.32782 12.47606 10.72879 11.57598 13.16603 12.89361 13.47785 12.73763 14.78968 15.50553 18.04643 17.43831 
  12.58177 13.59478 14.05649 13.31381 13.11898 12.32821 13.0021 12.62218 11.5436 12.16554 12.48717 11.81394 10.50265 8.973746 10.09016 12.06378 12.39155 12.9348 12.42843 14.2313 14.86099 18.31309 17.32769 
  12.75411 12.6887 12.49659 12.86026 12.59906 11.34449 12.41792 12.40671 10.25577 10.55363 11.62938 11.7192 11.39313 9.014958 9.833184 11.20295 11.54293 12.10884 12.57974 14.2528 15.04182 16.35394 17.43106 
  13.07687 12.93932 12.05013 11.10465 10.92198 10.16203 11.9019 12.00359 11.20652 11.09058 11.98129 10.94307 10.32444 9.561 10.09412 11.1652 11.89071 12.46157 13.37886 14.81377 15.79583 16.46983 16.3099 
  12.44068 12.16542 12.749 11.51701 11.75818 10.98072 11.45177 11.79877 11.41194 10.86352 11.39708 11.80494 10.21916 10.17022 10.94074 12.73651 13.22818 13.90771 14.318 16.45604 17.52512 18.12885 17.99037 
  12.84129 12.74346 11.62644 11.11886 11.60124 9.904121 10.04816 10.7376 10.31966 11.05361 11.45463 12.15746 11.15053 10.69711 11.84554 13.98456 14.60289 15.31413 15.49967 16.84238 18.05069 19.01266 18.84834 
  12.53398 11.96433 9.244778 8.572387 10.26614 9.39289 10.5042 10.72253 10.65647 12.75043 13.54582 13.20332 12.30947 11.8775 13.39289 15.73803 16.03198 17.65893 17.08116 18.5041 19.5771 20.24899 20.35165 
  12.00563 12.20351 10.276 9.838881 10.18145 9.774684 9.775256 10.85217 11.57201 12.9943 13.72266 13.57533 12.36394 12.41757 14.64912 16.40559 16.40833 17.94178 17.0117 19.56192 20.42689 21.10626 20.54123 
  11.91491 12.22656 10.11695 11.68905 13.45684 12.16579 11.17595 11.02882 13.19395 13.89767 15.23678 15.18915 13.55568 14.68092 15.77575 16.75733 17.13061 17.34155 17.21397 19.74595 20.57611 22.05666 21.36359 
  11.47355 11.28028 10.45217 11.66897 13.02617 12.40421 12.3838 12.38136 13.96514 15.68431 16.50488 16.14618 15.39929 15.91039 16.42896 17.51905 18.45585 18.59583 17.75705 19.62965 20.81224 22.27122 21.80147 
  12.52662 11.94858 10.55505 12.42413 13.77102 13.32065 12.9342 13.66854 14.39565 16.63391 17.94618 17.4248 17.46566 16.57822 16.48766 17.86062 18.27674 18.54208 17.23242 19.31924 20.2264 21.92178 21.3364 
  12.02873 11.26655 9.571183 11.58556 13.19479 12.65055 12.59585 14.40476 14.95008 16.4713 18.34317 18.11756 17.86362 16.10263 16.13897 16.93494 18.05787 17.58092 17.09885 18.80086 19.4799 20.65734 19.97151 
  10.86476 9.108546 9.552567 11.91356 12.38227 12.85921 11.95166 14.44887 16.19041 17.26532 17.9296 17.84677 17.18984 16.35882 15.73214 17.40462 17.33592 16.97078 16.5151 16.88156 17.47563 18.54997 17.5454 
  12.60442 11.37389 10.2908 11.55008 13.28062 14.39302 13.83146 14.80365 16.86215 17.7065 17.82295 16.90414 16.20468 15.93856 16.19014 16.84039 17.83888 16.61141 15.98524 16.43695 16.56059 17.5152 17.1277 
  12.41213 11.59318 10.9033 11.81183 13.5534 12.9817 12.88539 14.86807 16.76102 18.08833 17.84195 17.58156 15.95464 15.7454 16.94265 17.86619 18.44193 16.60484 16.43938 17.78119 16.30627 18.12034 18.17815 
  13.1731 10.92382 11.44889 11.58021 12.51424 12.61171 11.91807 14.35635 16.45222 18.2344 18.02634 18.44849 16.43503 15.31518 16.52777 16.50167 17.21136 16.07273 16.42845 17.90891 16.48678 18.03056 17.71963 
  12.32748 11.78882 9.929072 11.31927 12.64543 11.5685 10.86378 14.32529 17.28631 17.74572 17.08748 16.90873 15.3218 14.87652 15.57612 15.72205 15.87119 15.72217 15.76832 16.80227 16.46172 17.64037 17.64002 
  12.19527 11.26126 10.91108 11.51357 11.88646 12.82152 12.50746 15.10742 17.22483 18.49776 18.38831 17.32627 15.58188 14.17714 15.13854 16.44323 16.22381 16.09768 16.18013 17.33267 16.986 16.6601 17.38706 
  12.38316 10.9315 10.43465 10.56535 12.23314 11.90101 12.91362 14.65029 16.51646 18.22181 17.65647 16.37604 14.34329 13.71848 14.16311 15.41569 16.0204 15.15731 15.41514 16.67949 17.00598 15.89455 16.97392 
  11.30577 10.28441 9.945932 11.39331 12.30649 11.96362 11.83533 14.81513 17.35564 18.33072 18.05184 15.06798 14.07956 13.20424 13.25025 14.4972 15.11539 14.80095 14.70875 16.30007 16.03745 14.93294 16.70602 
  11.03609 11.63719 10.21918 10.6167 10.88359 11.86974 11.941 14.54243 18.29357 19.38234 16.87861 14.88691 13.15115 12.30057 13.38792 13.84875 14.85546 14.83342 14.70834 15.97845 15.37833 14.04472 16.18783 
  13.34986 14.78248 13.79236 13.17295 12.78349 12.70567 12.45625 14.42082 17.43404 18.18635 15.59477 14.50266 12.80653 12.16918 12.35644 12.73297 13.97416 14.38635 13.72448 14.86592 14.04201 13.30526 16.08755 
  13.90898 15.20264 15.9369 14.62706 13.72583 13.84371 14.70597 16.07141 18.13611 18.26731 15.36589 14.14722 12.81018 11.74895 11.73215 12.83022 13.8598 14.30199 14.02788 15.60954 14.73503 13.21847 15.28059 
  12.65622 13.64794 16.40841 15.1159 12.92702 14.19901 14.66136 16.53231 18.85972 18.87691 15.47901 14.02315 13.04305 11.90804 12.02419 12.18788 13.65167 14.87863 14.52754 15.9854 15.23429 13.42042 14.61205 
  13.16586 14.09224 15.83176 14.90314 13.58085 14.20237 14.74656 16.52872 18.05439 17.30598 14.93832 14.26765 12.49032 11.86487 11.99516 12.17831 14.06437 14.65313 14.55127 15.61084 14.85131 13.26807 13.69388 
  13.4008 14.57262 15.16415 15.02138 13.99929 13.74122 14.89686 16.43497 18.12129 17.57994 14.38773 13.34021 11.77359 11.37652 10.77275 11.62928 13.2342 14.23693 14.04607 15.04497 14.55735 12.52344 13.15258 
  12.58941 13.8753 16.66035 16.01036 13.94624 12.86215 14.91205 15.55473 17.18862 16.32199 14.95177 13.18178 11.81485 11.68483 12.10907 10.99695 12.64363 13.92735 13.98247 14.74136 14.57911 14.06635 14.69018 
  12.09153 13.84327 16.73779 16.22157 14.56728 12.37056 15.10593 15.10421 16.23268 15.88102 14.39125 12.42256 11.36509 10.85059 10.98364 10.62551 11.97378 13.79792 13.98639 13.31582 13.83742 14.01882 14.74063 
  14.35417 15.46001 15.97281 15.97872 15.05301 13.271 14.85252 14.8182 15.14593 14.99852 13.87889 12.85231 11.08199 10.78992 11.17402 10.49343 12.2052 13.27493 13.57413 13.50568 12.8814 12.89545 14.14766 
  15.11954 16.42943 16.03817 15.8695 14.94205 13.81331 14.11605 13.90077 13.72665 14.46259 13.50304 12.15606 10.48165 10.17465 10.4161 9.912415 11.29775 13.10989 13.52075 12.40094 12.22526 13.01442 14.24785 
  15.51706 16.90478 15.31721 15.63563 14.91287 12.8936 13.76248 14.32526 14.25805 14.36927 12.66967 12.0761 10.97497 9.48582 10.20874 10.5375 11.10218 12.73337 12.90427 12.4544 12.51494 12.39871 13.89166 
  15.83766 17.44094 15.59742 15.76098 14.73724 13.33175 13.09907 14.17982 13.79805 13.84968 12.17967 12.07957 11.02122 10.35668 10.95256 10.26702 11.17187 12.12873 12.05996 12.1097 11.7091 12.38269 13.66632 
  16.28121 17.97894 15.37001 15.30069 15.23669 13.93175 13.33285 13.58994 12.95194 12.94088 12.26932 12.17642 10.4537 10.94146 11.22272 10.16105 10.75595 11.85875 11.60317 11.8685 11.6532 12.81746 14.03694 
  16.65678 18.48001 16.02887 15.72611 14.88948 13.75212 13.00821 13.84919 13.9075 12.9391 12.38781 11.60378 10.67275 10.77736 11.13959 10.14937 11.14159 12.06991 11.99633 12.57936 11.69493 12.77214 13.83402 
  16.88027 18.81418 16.32965 15.89062 14.08585 12.9283 13.01355 14.09655 14.04627 13.70805 12.55457 12.14791 10.23291 10.64731 10.54774 10.34701 11.31384 11.85837 11.31565 12.61369 11.56602 12.69893 13.17958 
  16.86315 18.90851 16.74496 16.61073 13.71159 11.98781 12.37382 13.89134 13.772 14.01257 12.72022 12.62209 10.90394 10.19925 10.34741 10.43047 10.72284 11.89819 11.36431 12.45117 11.00326 12.7707 13.43279 
  16.55088 18.7428 16.48301 16.17187 14.55713 13.0492 13.09207 14.58359 13.44237 13.92513 13.25501 12.62462 10.7068 10.42912 10.70771 10.98847 11.20761 12.1386 10.86133 11.76409 10.89257 12.65522 13.48968 
  16.31133 18.53433 16.33694 15.77417 14.38188 12.95522 13.08407 14.4475 12.69577 13.27632 12.01401 11.38529 10.35604 10.744 10.78147 10.85441 10.54302 12.20752 11.1479 11.46902 11.70803 13.0295 13.62829 
  16.08484 18.22621 16.04434 15.1363 12.58238 11.89639 12.57116 13.53181 14.12426 14.20648 11.83826 11.15889 9.9625 10.32529 9.984441 10.63607 11.00616 12.5833 11.28041 11.6917 11.89614 12.90713 13.29698 
  15.68179 17.81505 16.00374 14.72812 14.84708 11.26076 12.04008 13.00523 14.13205 13.28368 11.21361 10.097 9.955743 10.26499 9.792438 9.863858 11.07145 12.3472 10.51815 10.99274 11.61275 13.19388 13.72785 
  14.55198 16.86638 15.5845 14.5592 14.73209 12.26241 10.81983 11.97698 13.21972 12.68782 10.91807 10.4126 9.59472 10.19615 9.584566 9.555467 10.64191 12.16234 10.07995 11.50145 12.38194 12.66189 12.71277 
  13.01004 15.59743 15.64693 14.08796 13.8486 11.90227 10.5667 10.79841 12.75414 11.89225 9.598926 9.206168 9.479238 8.989665 8.048623 9.163903 10.12297 11.2002 10.10107 10.59673 10.9262 11.29577 11.6629 
  13.91573 16.00849 15.0316 13.62253 12.95412 9.966305 10.1017 9.991467 9.596074 9.605503 9.343888 9.190509 8.865524 8.973734 8.919617 9.209815 10.08192 10.85753 10.00404 9.867455 10.36516 11.38496 11.67881 
  14.8278 16.76171 14.70668 13.6729 13.04084 9.941384 9.693533 10.51062 10.44051 10.19218 9.282916 9.032 8.986729 8.863693 9.409104 9.42415 9.590022 10.41179 9.549519 9.795614 10.16903 11.44894 11.5452 
  14.85072 16.98533 15.18039 13.29803 13.33594 9.623359 9.158119 10.2534 10.32673 9.353215 9.035355 9.315903 9.19397 9.060982 9.5213 9.576943 9.395948 9.738444 9.638402 9.812303 10.30602 10.88667 10.86766 
  14.6904 17.03413 15.28008 14.86307 14.88218 11.42855 9.846802 11.10628 10.92814 10.19661 10.62403 11.59551 10.50513 9.933274 11.18404 11.79196 12.68107 11.0639 9.99734 11.25623 10.75933 12.35911 12.92669 
  14.66271 16.80819 14.93047 15.11656 14.37493 12.24518 9.898335 10.41227 10.94259 9.562234 9.82956 10.52575 10.91687 11.3688 12.16531 13.12481 13.59379 11.50523 10.77534 12.16117 11.03448 13.25059 13.33833 
  14.15415 16.37178 13.99285 14.13001 14.35129 12.60274 10.18616 9.21813 9.342664 9.430008 10.54196 10.54733 10.69073 12.74719 13.28319 14.68884 15.19119 12.57943 11.37738 12.86609 11.79551 14.11093 13.75384 
  13.90892 16.035 14.79413 14.31352 13.37496 11.89549 10.30609 9.863048 9.564677 9.050714 9.766609 9.916749 9.736766 12.76469 13.29292 14.07608 15.34341 12.62214 11.65098 12.8247 11.93356 14.02104 13.58228 
  13.60849 15.51042 14.56466 14.77384 14.10931 12.00589 9.922771 10.00272 9.233529 8.90524 9.197906 9.287741 9.224826 11.10517 11.75283 12.47408 14.07385 11.81659 10.66715 11.48033 10.96836 11.95531 11.53177 
  12.63871 14.4074 14.43942 15.18537 14.02158 12.20864 10.17488 10.41652 8.884479 8.93423 10.05001 9.872001 10.05358 11.18607 11.13514 11.16448 12.71593 10.94455 10.9008 11.7045 11.48672 11.77495 11.38987 
  9.470507 13.9183 15.38312 15.29352 14.31409 12.62442 10.19246 9.844234 9.015466 8.645416 9.272799 8.95722 9.29404 9.842471 10.79908 11.95937 12.56295 10.62304 10.97254 11.55685 10.80731 10.67443 10.93567 
  9.689436 12.1227 15.04778 14.65507 12.59609 11.3594 10.29826 9.403631 8.485927 8.846539 8.928617 8.97279 8.096455 9.521686 11.13479 12.4645 12.32434 10.56936 10.17078 10.82339 10.85006 11.1649 11.38804 
  10.98928 14.05137 14.64129 16.00715 14.09278 11.13883 9.941337 9.854771 8.722589 9.290672 8.857082 8.991954 8.21919 9.559203 12.24082 13.63578 12.84687 11.02113 10.59677 11.14553 10.55739 11.74764 11.36405 
  12.63155 15.67096 14.68544 16.10676 14.76892 11.35606 9.204973 8.561191 8.504937 9.011736 8.932889 8.941728 8.647584 9.663374 12.39362 13.74842 12.88115 11.69389 10.94999 12.21108 11.19174 12.30093 11.63438 
  12.10139 15.86091 14.87627 15.61644 14.75041 11.31224 8.924519 8.241544 8.472549 8.927956 8.740888 9.051448 8.190333 9.251281 12.88241 13.90231 12.65679 11.89317 11.16821 12.41968 11.02865 12.32125 11.54047 
  11.38513 15.89259 15.55978 16.49071 15.93176 11.26818 9.120329 8.472344 9.292308 9.625569 9.129047 9.180802 8.588961 11.19132 14.33207 15.09559 13.56647 11.99036 11.61518 12.64687 10.69325 12.63932 11.8825 
  11.33489 16.01806 16.14634 17.10612 17.0976 11.94706 10.30982 9.953162 10.28553 10.87142 10.28722 9.505128 9.044598 12.70718 15.12737 15.38648 14.23596 12.67216 11.409 11.88481 10.85025 11.92107 11.80873 
  11.93121 16.69381 17.38387 17.6749 17.98742 13.28621 11.70719 10.43649 10.91934 11.77147 11.88422 11.13052 11.38345 13.80934 14.99402 15.98011 14.9559 12.99995 11.91326 12.93166 10.28887 13.55714 13.66834 
  11.91258 16.81928 16.99542 18.31518 19.14895 14.40946 11.01381 11.08671 11.39375 13.21682 13.19212 12.36498 12.30863 14.04659 13.71266 15.38462 14.6618 12.61307 12.04058 12.94022 10.91476 13.20159 13.73922 
  11.46789 17.38041 17.82477 18.2752 19.65061 15.74574 11.42097 11.61401 12.88716 14.96064 15.02348 12.79885 12.33007 13.6574 13.31771 14.41018 14.14105 12.70052 11.94269 13.00976 10.96539 14.06782 14.93854 
  10.82074 17.51527 17.82471 18.72139 20.24971 16.65067 13.19277 11.47689 13.75297 16.15981 15.89948 12.82615 11.78095 12.68962 13.03443 13.99312 14.44622 12.8687 12.09715 12.6253 11.04033 14.82887 15.41104 
  9.789128 17.60623 17.95166 19.12147 20.36291 16.45957 14.5175 11.95895 14.00584 15.58451 15.58284 13.09798 11.42349 12.38721 12.11636 14.10925 15.14098 13.66024 12.47174 12.31593 11.41416 15.35977 16.09952 
  10.03751 16.87819 17.14559 18.82435 19.69844 15.54911 14.925 13.47285 13.4415 14.02402 13.91976 12.55425 11.28404 11.29449 11.12666 12.90026 14.52994 14.08993 12.81467 12.58953 11.19166 14.11535 15.23785 
  10.93798 15.34559 15.48075 18.89332 19.13994 15.17251 14.3534 14.04348 13.22518 13.32802 12.83179 11.63252 10.8536 11.23855 11.16474 11.8378 13.7653 13.51799 12.46745 12.35123 10.79845 14.09694 15.23058 
  11.39907 15.94585 15.80858 18.99041 19.07972 14.78049 14.1376 14.13164 12.62071 13.95744 12.93874 11.53029 10.4269 11.58948 11.19517 11.35314 12.57921 12.77587 11.52973 10.72114 11.27394 14.35466 15.65102 
  12.50065 16.98081 16.2958 18.53826 18.03814 14.84136 14.63556 13.97874 13.23756 13.85454 13.35822 12.12458 10.75648 11.73495 10.99577 10.73339 12.26947 13.05336 11.80032 11.86604 11.52577 14.2759 16.10515 
  14.11917 17.50495 16.44348 18.44194 18.71619 15.23897 15.50647 14.00227 12.22151 13.66027 13.43431 11.87765 10.321 11.40836 10.86816 10.51086 12.05384 13.00603 11.8335 11.91356 11.91522 14.54532 16.75152 
  14.99379 17.92895 16.8975 18.70519 18.95995 15.73521 15.49823 12.87329 13.83215 13.62185 13.06669 12.05937 10.42773 10.73961 11.01464 10.88707 11.64818 12.58473 11.62818 11.58135 11.51648 14.55052 16.08885 
  15.54145 17.85107 16.98327 18.49837 17.73919 15.18074 14.75038 12.77507 14.06172 13.54413 12.30926 11.52931 10.36564 11.26507 11.33629 10.336 12.06124 12.62648 11.26266 11.69004 11.93004 14.61818 15.63297 
  16.0619 17.99921 16.89255 18.13565 17.83411 15.88005 14.93022 13.40796 14.59884 14.0794 12.40723 11.15436 10.18998 10.83631 11.137 10.82108 12.08101 13.16273 11.88929 11.08195 13.06093 14.60555 15.1582 
  16.41592 18.74591 17.24427 17.45222 17.14775 15.69397 14.63761 13.72616 14.99289 14.96222 12.8571 11.36845 10.90592 10.93947 10.72949 10.99991 12.06396 12.11599 11.73193 12.9482 13.76328 14.24961 14.51857 
  16.59169 18.33644 16.6729 17.92939 17.99472 15.11923 13.68678 12.95239 14.18037 14.98073 13.72894 12.48612 11.10576 11.19653 10.8321 11.57918 12.45367 12.50688 11.21697 13.96683 14.07828 13.39284 13.05171 
  16.37931 18.53983 17.11295 17.0422 17.68007 15.11545 13.4979 12.7253 12.83473 13.30683 14.44711 13.53261 11.2399 11.29604 10.66569 11.51261 12.52057 12.61519 11.1287 14.42613 14.21188 12.91073 13.62521 
  15.77711 16.81826 15.82284 16.26417 14.77699 13.92578 12.86666 12.48366 12.62949 12.08785 13.24057 12.46911 10.74334 10.64687 10.51021 10.52875 12.20348 12.29009 10.30058 13.55652 13.26817 13.30325 14.35986 
  15.82015 17.46225 15.50791 15.47983 15.53145 14.01977 13.48505 11.83372 11.23159 11.60364 11.24516 11.02851 9.994411 10.127 9.874244 9.952956 11.45186 11.57752 10.71201 11.31057 11.925 12.24183 13.13455 
  15.76488 17.33266 15.47503 15.67266 14.76409 13.55282 13.16781 10.86405 10.00527 10.30678 10.84636 10.38855 9.692888 10.62687 10.82284 10.76016 11.56705 11.6752 10.60772 11.25967 11.79259 12.14699 12.2975 
  15.79346 17.73031 15.68137 14.88974 13.31264 11.70068 10.75016 9.699461 11.59397 11.09019 10.58869 9.898376 8.929638 9.302467 9.637378 10.49889 11.22649 10.7973 10.0415 11.52228 10.99211 11.29036 11.60892 
  15.62056 17.43279 15.06023 14.50031 13.94001 11.11427 10.53508 10.24727 10.84226 10.2417 9.97341 9.786078 8.522814 8.529009 8.403295 8.864924 9.860689 10.12548 10.1109 10.64103 10.51032 10.91088 10.99742 
  14.92246 17.0135 15.04857 14.46992 14.52418 12.18941 11.24207 11.29555 12.0192 13.37018 13.99349 13.40953 12.65679 13.28639 14.09664 14.8326 14.6762 13.23665 11.35561 12.54946 14.12629 15.38962 14.67778 
  14.08548 16.16105 14.25179 13.72728 14.2951 12.56719 11.60147 10.79406 11.7847 13.46562 14.78914 15.1487 13.70664 13.32854 14.55988 16.36461 17.04605 15.91949 14.81793 16.99095 18.20212 19.37614 18.60077 
  13.56568 15.41561 13.65116 13.80278 12.35432 10.98944 10.19834 9.714845 10.88012 12.79503 15.31259 16.04332 15.07261 13.3139 16.37029 18.55497 19.72721 18.21986 16.88817 18.66231 19.42877 21.17449 21.34429 
  12.81052 14.82067 12.89596 12.89064 12.7278 9.884626 10.07492 10.31998 11.56146 13.61393 16.51963 16.62613 15.85953 13.70052 15.65685 20.09129 21.61243 19.28339 18.15581 19.5183 20.06223 21.13614 22.15731 
  12.4148 13.93226 11.91046 12.1541 12.28745 11.01894 10.5866 10.27591 12.31247 14.79986 17.28344 17.50844 16.46629 14.32873 15.63348 20.89158 21.659 18.84053 18.39215 19.79963 20.31273 21.56329 22.12888 
  12.91297 14.79987 12.71113 13.29154 13.44041 11.56215 10.49496 10.6033 12.03684 15.29325 17.49444 18.31451 16.88695 15.08641 17.40228 20.39707 21.17206 19.49123 17.8341 19.26415 20.69028 21.98025 21.82807 
  13.12175 14.64588 12.15094 11.35538 12.11017 10.95849 11.28366 11.61484 12.85101 16.09893 18.0477 18.62724 16.99242 15.1852 17.51782 19.87419 20.87705 19.48903 17.96088 19.17698 20.07501 21.74238 22.1084 
  13.00411 14.90964 12.85213 12.4642 12.19896 10.9285 11.4324 11.96293 13.98125 16.61734 18.89468 19.34385 16.26028 15.6907 16.13703 19.2197 20.6384 19.75793 18.03179 18.44206 20.21319 21.34424 21.91418 
  12.53915 14.56936 12.39721 13.01271 12.77172 11.97516 11.71099 11.24299 14.61106 16.86008 19.35301 19.43951 16.80698 15.8419 16.63168 19.33833 20.47225 19.18408 17.29919 18.15383 19.49329 20.5814 20.88459 
  12.15284 13.47757 10.60758 12.20281 13.0343 11.11187 10.97446 10.88111 14.17981 17.2116 19.27656 19.46944 16.21806 14.89002 15.36739 17.92939 19.41214 17.94695 16.58801 16.62714 18.35245 19.04712 19.02948 
  11.57865 13.99112 12.93595 11.18491 10.64367 10.56507 10.97282 12.65517 14.05695 15.92091 18.14322 18.23843 15.92175 14.87526 15.68978 16.53125 18.24236 17.28429 16.56872 15.94291 16.28687 17.53417 17.20242 
  11.28529 12.6209 11.65753 11.60241 11.58626 10.83581 10.27027 12.12041 14.267 15.48161 18.20242 17.39253 15.21907 14.75703 15.08057 16.2598 17.26633 17.07322 16.03636 15.15166 15.24807 16.5594 16.19338 
  10.91567 13.44223 11.93005 10.71006 11.31781 11.7807 10.22369 12.09495 13.98265 15.85598 16.988 16.82117 15.11764 13.72713 14.96652 15.93681 17.34565 16.43155 15.34467 14.63535 14.80754 16.85585 15.56012 
  11.37683 12.73687 10.39089 9.336987 9.831561 11.17227 10.51462 11.394 13.52753 17.06892 18.22289 16.58073 14.50473 13.4536 14.32212 15.69642 17.59365 16.24478 14.84533 14.05617 14.99095 16.48585 15.65335 
  11.15164 13.128 10.98207 9.10566 10.39393 10.70634 10.84463 11.06166 13.15317 16.16613 17.48376 16.13078 14.46833 12.60861 13.22436 15.39284 16.83496 15.80455 14.54673 13.42426 14.11045 16.32096 15.15128 
  11.15148 13.08558 11.24186 10.75418 11.27332 10.83193 10.16917 10.71663 12.71503 15.8612 16.86666 15.34324 13.83714 12.93292 12.91148 14.20864 16.26399 15.44005 14.26241 13.12186 13.5547 15.73553 15.21248 
  10.32435 11.80502 10.71903 10.12998 9.243139 10.32276 10.51204 9.885305 13.37798 15.24079 15.56827 14.59323 13.25549 12.08573 12.08292 13.83259 15.49082 14.31401 13.56349 12.58398 12.93209 15.15023 14.39397 
  9.55196 13.14198 12.09491 11.57915 11.7868 10.55393 9.688849 10.1536 12.66342 15.51936 15.60778 14.23114 13.59677 11.5554 12.35998 13.28365 14.88144 13.88197 13.16487 12.75849 12.51151 14.26958 14.14925 
  11.14226 15.27234 14.74488 14.24864 14.20611 12.25802 11.94632 12.10342 13.43625 15.97755 15.82204 14.68259 12.32215 10.68163 11.67134 13.07826 14.39713 13.78414 12.79398 12.32595 12.68034 13.86888 14.63672 
  11.82192 16.13304 15.37651 16.57404 16.01068 12.65684 12.84768 13.69928 14.84385 16.28752 15.21064 13.4215 11.93544 10.11553 10.96781 12.73654 13.93202 13.7597 12.7366 13.53805 13.78219 14.34586 15.65966 
  11.16597 15.60295 14.91919 17.65105 17.25035 13.94595 13.03636 13.82953 15.81298 16.99026 16.38386 13.44202 10.67358 10.54342 11.52007 12.54199 15.2073 14.85921 13.04932 13.6851 13.8579 15.4532 16.68387 
  10.51662 15.43471 14.99521 18.16862 17.72067 15.69889 14.12805 14.29307 16.52756 17.99657 16.64942 14.49547 11.61017 10.99339 11.64379 12.93506 15.5309 15.33103 13.2612 14.26317 14.55778 16.36224 17.4065 
  11.90673 16.09601 15.16525 18.22427 17.79523 16.65013 14.95471 14.57647 17.00873 18.48756 16.99238 14.55247 11.84007 11.13602 12.00641 12.06635 14.78719 14.60945 13.60264 14.37275 14.25254 15.76529 16.61269 
  11.77753 15.94984 15.05677 17.97174 17.373 17.41905 15.6169 14.73355 17.46013 18.81142 17.14954 15.49692 12.20508 11.44368 12.23865 12.73304 15.18181 14.84058 13.51544 14.6217 13.58304 15.94587 16.794 
  10.41407 15.72303 15.14866 17.70807 17.07916 17.71496 15.88798 14.64032 17.55777 18.87132 17.8488 15.62309 12.12702 11.63624 12.23076 13.35091 14.845 14.49619 13.87168 14.96588 14.34432 16.30333 17.20194 
  11.354 16.18204 15.44579 17.66172 16.96994 17.44448 15.66464 14.72198 17.39568 18.71137 18.22394 15.33202 11.91729 11.85658 12.49095 13.32615 14.09961 14.23158 14.58506 15.50219 16.31929 16.47316 17.44572 
  11.67818 16.44463 15.64772 17.72272 16.90357 16.48343 15.1468 14.66261 17.37626 18.67665 18.43763 16.17672 12.68573 12.12047 12.56644 13.53683 15.69811 15.11192 15.10006 15.99792 16.72847 15.30137 16.12518 
  11.81452 16.51302 15.8351 18.05065 17.12857 14.78708 14.79863 14.54577 17.14968 18.40389 18.33661 16.81234 12.45416 12.26822 13.33935 13.42922 16.02875 15.44887 14.45614 16.00752 17.02212 15.07159 15.84839 
  11.66775 16.09326 15.45326 18.56066 17.73905 16.56577 15.80819 14.98165 16.93577 18.54793 18.7565 16.84025 13.44262 11.95572 12.87634 13.43538 15.80291 15.6723 14.43189 16.38626 16.87126 15.30465 16.12741 
  11.18154 15.3121 14.98711 18.80246 18.00036 17.79539 16.50552 15.6597 16.54539 18.6693 18.88139 16.73647 13.84423 12.21505 12.68858 12.8891 14.96932 14.98377 14.47017 15.99248 16.69073 16.01393 17.04851 
  11.39253 14.84143 14.70809 18.75023 17.97756 18.1698 16.63218 15.90916 15.77523 18.6932 18.87343 16.30272 14.07881 12.17943 12.72991 13.99461 15.09284 14.90023 14.86968 15.68508 16.95284 15.82605 17.11721 
  11.86377 15.31133 14.59444 18.38207 17.6422 17.60731 16.2254 15.42659 15.86426 17.45585 17.54583 14.96311 13.32989 12.32241 12.82136 13.80614 16.04843 15.70225 14.60214 15.79691 16.55345 14.48477 16.21475 
  11.73533 15.70459 14.84644 17.80892 16.99964 15.85337 15.77551 15.02913 15.56236 17.03756 17.23155 14.80542 13.2883 11.4158 12.61335 13.34564 15.46394 15.25104 14.61511 15.47291 15.86379 15.43297 16.33346 
  10.81638 15.52154 14.9054 17.31289 16.38354 15.1073 15.92739 15.73638 16.55172 18.13293 18.54304 15.54773 13.22721 12.31508 13.16266 14.46879 16.11985 15.48509 14.86392 14.97309 15.37611 15.07204 15.87638 
  10.26267 15.25113 14.67625 17.21563 16.31719 14.77018 14.92522 15.68053 17.32211 18.43587 17.91074 15.57454 13.49049 12.60371 13.12548 14.11297 15.84021 15.45053 14.63433 14.96083 15.49248 13.76896 15.98103 
  10.69819 14.90186 14.28961 17.07143 16.24204 13.46402 14.20674 15.63925 17.09035 18.04039 17.88895 15.37756 12.4007 12.11112 12.55157 12.96375 15.69617 15.59222 14.15425 15.03544 15.54571 13.44852 15.65409 
  9.915021 13.82648 13.51551 16.30905 15.34114 15.36788 15.25857 15.57581 16.88137 17.62993 16.70799 14.8589 12.48502 11.65991 12.48844 12.89608 15.70182 15.3114 14.08729 15.36026 15.73317 12.51652 15.22122 
  8.68051 12.45413 12.77622 15.34519 14.32071 14.90527 13.81296 14.36697 15.78108 17.00771 16.51527 13.94231 11.84495 11.75392 12.35656 13.10109 14.77411 14.72279 13.45488 14.19114 14.58668 12.4133 15.44579 
  10.05589 13.40399 13.02372 15.95843 15.3619 14.07744 14.53119 14.84178 15.96589 16.57776 15.83888 13.69824 11.78194 11.49591 11.9167 13.30375 14.69968 14.48295 12.85517 14.20968 14.40717 12.00698 13.85596 
  11.27062 14.20996 13.00546 15.81915 15.0768 14.00317 14.15812 14.63979 15.93456 16.13581 15.80585 14.0422 10.93639 11.04755 11.28759 13.27676 14.53805 14.24715 12.69541 13.41502 13.21583 11.7954 14.09292 
  11.82292 14.00475 12.70146 14.15521 13.96183 13.86355 13.82776 14.55097 15.977 16.62114 15.52749 14.43003 11.57255 11.82603 11.46633 12.81295 13.52481 13.20844 12.48258 12.9837 12.70388 12.01538 14.10332 
  11.81733 14.02889 12.67105 14.56858 13.83397 13.15577 12.52465 13.75595 15.65288 15.53403 15.14229 13.78594 11.95139 10.92552 12.43795 12.72144 13.77396 13.37051 12.03247 13.05484 13.05454 12.07504 13.45452 
  11.53573 14.69543 13.47587 14.54975 14.09588 12.23716 11.75021 12.82979 16.3925 16.57847 15.83274 15.41968 12.53094 12.0738 12.46284 13.14491 13.65205 13.70883 12.48045 13.69684 14.24071 12.90326 13.78031 
  10.46279 13.28604 12.97831 14.62308 13.95662 12.26015 12.11266 13.03825 15.73407 15.80908 16.49609 15.14229 11.56523 11.61488 11.99993 13.00464 14.1577 13.95174 12.51486 14.32073 14.42474 13.37241 14.48748 
  10.48366 14.08517 13.39815 12.98214 12.39788 11.79398 12.84884 13.57857 15.40684 15.60652 15.99256 14.35739 12.09316 12.1796 13.10014 13.46264 14.51066 14.02861 12.81611 14.63739 14.12812 13.86828 14.78857 
  10.19182 14.69601 13.93462 12.37111 11.73293 10.65019 12.18164 13.14647 14.66128 15.14137 15.29343 13.7568 11.9698 12.07825 12.39925 13.19032 14.63404 14.42319 13.39776 15.18882 14.68679 13.48536 15.13381 
  10.34476 14.25978 13.3268 12.39499 12.02569 10.45536 10.55758 11.70175 15.12028 16.20647 14.50963 12.94513 11.49872 11.71179 12.57724 12.75515 13.61092 13.75198 13.41445 15.13885 14.47829 14.33477 15.76451 
  10.20487 12.92972 12.66008 12.42735 11.86889 10.36294 10.88357 12.17473 14.84048 16.0851 14.19029 12.64169 11.7309 11.68961 12.55493 12.70489 13.73792 13.65674 13.2528 14.73102 13.98869 14.62711 16.16679 
  9.423117 12.70624 12.09342 10.86123 10.24333 9.513165 11.43117 11.95184 14.92405 16.06582 14.44456 13.55055 11.02903 11.31061 12.30381 12.54599 13.94664 13.53971 13.29417 14.64221 13.89266 14.74018 16.82738 
  9.589171 12.47112 11.4502 9.131975 10.2197 10.17758 11.90142 12.39148 15.51737 16.71535 15.28426 13.4031 11.43478 11.36541 12.49403 13.41569 14.58072 13.76381 13.98231 14.93766 14.71383 14.92337 16.24941 
  9.503296 10.60735 9.271545 9.716817 9.667707 10.52781 11.40006 11.39293 14.60591 15.02798 15.03264 13.64719 11.30165 10.74731 12.65038 13.62129 14.64687 13.53882 15.08017 15.38536 14.1648 14.99333 16.10574 
  9.115908 11.43826 10.47298 10.14771 9.880108 9.726262 10.82772 12.05128 14.10969 15.54203 15.91383 13.69349 11.85291 11.131 13.34072 14.18404 15.38566 14.57817 14.72091 15.19623 14.41386 14.31567 15.83257 
  9.310917 10.28504 9.811528 10.01983 10.05451 9.4747 10.45637 11.62435 13.92147 16.01029 15.57332 13.24274 11.89735 11.36156 13.09891 14.07911 15.26106 14.44157 15.20087 15.08729 14.25182 14.66974 15.79354 
  9.030734 10.77806 10.31404 10.89988 11.18181 10.48961 10.15753 11.69851 14.42295 15.29466 14.48458 12.67693 11.36603 10.84135 11.80535 13.1063 15.03863 14.38514 14.50504 14.37171 13.37179 14.37903 15.7146 
  7.50856 11.28577 10.72007 10.24735 10.17675 10.65663 10.4188 11.49671 14.89423 16.14469 14.11847 12.56653 11.28391 10.65832 11.76379 12.46559 14.1188 13.54819 13.30948 13.31488 13.49415 14.57029 15.60327 
  8.540401 11.55971 10.55634 10.50483 10.84564 11.33433 10.85704 11.34962 13.89402 15.64289 14.49759 12.96604 10.80758 9.884405 10.34586 11.19268 13.79956 13.10055 13.12975 13.08347 12.67242 13.66006 14.28579 
  11.08979 14.00895 13.28101 13.3758 13.54885 12.20103 11.48327 11.56215 13.08175 15.66193 14.28371 12.17407 9.616703 9.735795 10.51737 10.66526 12.92028 12.44681 12.94642 12.86102 11.83186 12.5984 13.619 
  13.14398 15.67154 15.34065 16.24163 15.0646 12.54676 11.68441 12.30696 13.41859 15.11965 13.7802 11.35566 9.2933 9.63155 9.780502 10.87477 12.77262 12.19879 12.55715 12.45354 11.55077 12.7023 13.38533 
  14.07658 16.64854 16.01492 17.56435 15.44176 13.57488 11.04171 12.65898 13.55481 14.86006 13.82938 10.95063 9.030316 9.329757 10.07974 11.01514 14.26132 13.92831 12.94692 12.58781 11.56843 13.6175 14.11914 
  14.75746 16.99999 16.77552 18.02952 15.90656 14.6823 12.81055 14.08961 14.8099 15.94903 14.67507 12.65691 10.4475 10.15641 10.20949 12.54788 15.05997 14.94959 15.0851 14.83846 12.45193 15.8906 16.3022 
  15.11706 17.16695 16.8549 17.89547 16.15163 14.86321 13.19364 14.13913 14.60327 15.88921 14.64402 11.80295 9.991564 9.918747 10.29794 12.53781 15.5761 15.41474 14.87424 14.64059 12.25671 15.93182 16.2683 
  15.40835 17.45925 16.58469 16.7639 16.27665 14.75572 13.4018 13.85402 13.12921 15.10079 14.71053 11.99744 9.913205 9.705381 10.4648 12.13035 15.2068 15.025 13.92012 13.05841 12.5278 16.01011 16.19179 
  15.45827 17.28377 16.69241 16.92484 16.34339 14.48242 13.334 13.81861 13.36756 14.88102 14.33365 11.80426 9.697133 9.240011 10.16598 11.92516 14.61208 14.43869 14.29321 13.34631 12.60551 16.59318 16.53483 
  15.13229 16.73378 16.89245 17.1393 17.00265 14.96519 13.72409 14.02927 13.25723 15.83806 14.40103 12.19008 10.30029 9.71996 9.842927 12.35917 14.15221 13.89012 14.0526 13.6017 11.93211 16.53617 16.72011 
  14.57705 15.6379 16.80176 16.86314 16.66628 14.47865 13.58029 14.06467 13.77963 16.02653 14.70159 12.60434 10.41194 10.06278 10.46712 12.41631 14.66149 14.67236 13.84786 12.99362 12.37743 16.0895 16.24062 
  14.77465 16.17302 17.21569 17.31233 15.77169 13.20695 14.42118 14.81845 14.40012 16.4256 15.06712 12.46396 10.4278 10.54279 11.23261 13.11649 15.14829 15.03464 14.55053 13.6049 12.95183 16.86971 16.66895 
  15.05606 16.52065 17.57998 17.69038 16.2915 14.25601 14.84489 15.36414 15.11178 16.8064 15.81776 13.49685 11.06283 10.37466 11.31837 13.62332 15.70944 15.07266 14.53792 14.15806 14.11397 17.4218 17.03832 
  14.91461 16.54216 17.72679 17.76228 16.31701 14.88375 14.68025 15.63748 16.55472 17.25992 17.16825 15.38784 12.14931 11.18112 11.89017 14.01893 16.31654 15.77574 15.18395 14.93204 14.24931 17.65414 17.32909 
  14.06235 15.76193 17.18796 17.04869 16.16309 15.03378 14.50389 15.27548 16.01657 17.05204 16.83701 15.13547 11.94728 10.58079 11.4934 13.36317 15.81689 15.14827 14.4666 14.55218 13.48098 17.22405 16.73607 
  12.20449 13.63824 16.85458 16.60645 16.1426 16.12964 15.33291 15.36466 16.87278 18.04861 17.6068 15.85586 12.63215 11.99263 12.60324 14.04203 16.81243 16.22995 15.30362 15.6595 14.4412 17.11238 17.13072 
  10.72764 13.3548 16.83893 16.19029 15.82427 16.1692 15.66567 15.07302 15.77989 16.54188 16.30075 15.60935 12.34325 10.73788 11.86701 13.85012 16.31892 15.11082 14.34861 14.96774 14.47254 14.20635 15.01912 
  11.60129 14.09942 16.75742 16.20039 15.56324 15.11675 15.08574 14.86573 15.50838 17.15246 16.86691 15.52252 12.93029 10.73168 11.6597 14.13603 16.52929 15.7967 14.49399 14.30547 13.43089 14.71824 14.45458 
  14.02859 15.28391 15.8077 15.34497 15.03183 15.79032 16.02132 15.8215 15.79886 17.51942 17.64467 15.42086 12.91273 10.42257 12.27133 14.28747 16.08209 15.12793 13.71037 14.9558 14.61221 14.93433 14.82792 
  14.18848 15.49555 16.91608 15.82519 14.52658 13.71498 16.10671 15.94508 15.64377 17.88175 17.91087 15.55968 12.45656 10.9743 12.02333 13.80416 15.67876 14.38375 13.71993 15.67794 15.32651 14.47271 14.63666 
  12.46225 13.96751 15.86773 14.64494 14.40544 14.4847 15.3736 16.16685 15.92646 17.85421 18.09699 16.27834 12.37715 10.08867 11.90742 13.48527 15.4419 14.10502 13.62834 14.96597 14.83753 15.32119 15.06888 
  13.05211 14.65883 16.07732 15.72431 14.58461 14.83052 15.30428 15.05263 15.02916 16.23183 15.43935 14.29991 11.75905 11.07148 12.31495 13.43882 14.98901 13.66362 12.9674 14.51913 14.48279 14.95528 14.8023 
  12.55515 14.89801 14.75364 14.68671 14.14907 14.13834 14.59004 13.6544 13.79198 16.03756 16.11797 13.08031 11.24561 10.83997 11.88435 12.2136 13.93883 13.77705 12.87251 14.5708 14.71351 14.24566 14.39286 
  13.83093 15.56555 14.57585 14.22723 13.8579 13.36604 14.29341 13.54445 13.8807 14.93316 14.51684 12.76485 10.79458 10.58632 11.6082 12.35924 13.70661 13.07612 12.48223 12.69072 12.71655 12.41708 13.33185 
  14.18465 15.79808 14.80991 14.02056 12.90899 12.66223 13.70633 13.82998 14.50648 14.96112 15.03471 13.67727 10.52017 10.59638 11.55017 11.89342 12.99354 12.42688 11.95904 12.05783 11.865 12.08287 12.86125 
  13.25803 14.87615 13.40272 12.26935 12.22413 11.50442 12.41747 13.64918 15.23524 15.79496 15.65018 14.92247 10.86561 10.38056 10.77693 10.56645 12.19267 12.05079 11.84969 12.24822 11.36017 11.14598 12.25312 
  12.45111 13.44244 13.22482 12.22398 12.6735 12.62569 12.88445 12.80736 13.84655 14.60308 14.62763 13.62638 10.40659 11.13378 10.61906 9.895057 11.55708 10.91639 10.84196 11.58675 11.38106 11.41171 12.38584 
  11.51846 12.357 13.54436 12.86645 11.85491 11.5113 12.90373 11.86775 12.45533 13.17408 13.00485 11.74634 9.947218 9.551273 9.567058 9.890096 11.33025 11.05918 10.6337 10.56511 10.17946 10.49385 10.70985 
  10.98355 12.91067 13.64736 13.10762 10.84069 10.44838 11.49005 13.12592 12.14138 12.09276 13.07966 12.63638 9.730008 9.848679 9.347866 9.414342 10.63352 10.80758 10.35773 10.48653 10.42719 11.00619 10.87458 
  11.5681 13.21478 13.99549 13.15111 9.779534 10.84289 11.58694 12.34813 12.21469 12.28861 12.12051 10.8032 10.08394 10.18741 9.829264 9.272332 10.70571 10.90198 10.80318 10.46099 11.22602 10.77159 11.26807 
  10.92434 12.3015 13.37657 11.97633 9.846909 10.4243 11.08952 11.22303 10.78325 12.41337 12.66613 11.84448 9.250697 9.51819 9.804947 9.046823 10.62793 10.31114 10.24323 10.33592 10.83164 10.2066 10.32049 
  10.66577 12.14629 11.68034 9.369942 10.3728 9.903213 10.77546 11.68394 11.14858 12.63394 12.94571 11.37593 10.00287 9.558322 9.845166 10.15729 10.41532 9.908718 9.831599 10.41129 10.63024 10.63221 10.3509 
  11.6184 13.30724 11.55282 10.44678 10.41798 10.1706 10.90822 10.41916 10.55029 11.9752 13.67633 11.79162 10.79402 9.887626 10.8025 10.48727 10.37634 10.34983 10.57088 10.24781 10.26391 11.38719 11.21457 
  12.47586 13.6613 11.17755 10.26591 9.585545 8.900165 11.20114 11.92466 11.11244 11.51987 12.53722 10.63915 9.778065 10.18182 10.7475 10.43095 11.06562 10.47551 9.800613 9.590487 10.01961 10.74588 10.42597 
  10.77701 12.82747 11.43204 10.35398 8.763439 9.405526 10.82308 10.63848 11.23412 12.39658 12.69621 11.6574 9.802265 10.72791 10.38412 9.456212 10.21749 10.77777 10.52131 10.53893 10.67069 11.10977 11.42564 
  11.11245 11.89655 11.72092 11.20401 9.289854 9.801051 10.75733 10.49686 11.86391 12.7017 13.00109 11.37016 10.12052 10.20385 9.899943 10.12168 10.17335 10.37608 10.25884 10.2047 10.5204 11.22245 11.59654 
  10.07293 9.634727 11.54807 11.43805 9.092267 9.683016 10.62169 11.24899 11.31559 11.98399 12.95545 12.45842 10.71746 10.80228 11.2789 9.754478 10.40236 10.25889 10.37339 9.958531 10.25048 11.00226 11.04672 
  10.42046 11.45978 11.63636 10.63461 9.173406 8.947865 9.959992 9.868871 11.08009 11.51371 12.75934 12.10706 11.59692 10.57697 11.32946 10.17161 10.1469 10.40671 10.2813 10.1093 10.52484 11.39623 11.14704 
  10.24291 11.15639 10.16661 9.884758 9.216879 9.206462 10.319 10.26303 10.83201 12.79486 13.33239 11.75389 10.45811 9.76089 10.24667 9.601921 10.60552 10.51551 10.36004 10.27134 10.3252 11.17595 10.70015 
  9.422491 10.00307 9.304786 9.541505 9.374505 9.497077 9.545082 9.764844 10.62842 12.44954 12.87034 11.195 9.428076 10.17063 10.55802 9.774949 10.74514 10.18642 10.23282 10.14036 10.41611 10.48228 10.65022 
  9.431662 9.925019 10.80831 9.614099 8.896067 8.509177 8.498697 8.813649 10.72018 11.46818 11.79037 10.94821 10.24577 10.29312 10.4467 10.298 11.07519 10.74677 10.3395 10.43149 10.75678 12.64705 11.43539 
  9.448807 10.96071 9.576197 7.308518 8.883255 8.301652 8.766844 8.846759 10.22622 11.76619 12.58748 11.67644 10.72528 9.9915 10.21584 10.17676 11.38863 11.19318 9.839796 10.4516 11.10404 13.23044 11.87121 
  10.62994 11.5706 10.67056 9.265289 10.66641 8.995152 7.84528 9.58221 9.973509 10.98804 12.24823 11.57158 10.72447 10.14804 9.541183 9.675903 11.06965 10.92531 10.45405 11.09323 10.41595 11.13436 11.58708 
  10.39545 11.44841 10.55975 9.705544 10.74701 9.747647 8.344221 10.04484 10.6118 11.2215 12.51557 12.40113 10.53294 10.54167 10.73098 10.17531 11.36208 11.10819 10.5466 10.93015 10.72956 11.21366 11.42109 
  9.808684 10.54995 8.585596 8.609066 9.104267 8.886292 8.315665 9.341417 10.22613 11.00258 12.79302 12.32948 11.03078 10.42751 10.86653 10.40495 11.54998 10.99841 10.25156 10.56426 10.5004 11.59432 11.25886 
  9.783687 9.554473 9.224267 8.305874 9.829804 8.624431 9.106644 9.878937 9.975676 11.34404 12.81888 12.35782 11.02464 10.09095 10.79956 10.69735 11.30436 10.86825 10.91094 10.57113 10.4626 11.75406 11.67169 
  9.964293 8.91133 9.597994 8.38109 10.30375 10.12096 9.371438 9.138732 10.23402 11.95144 12.71362 11.4016 10.57952 10.13566 10.54746 10.2636 10.74369 10.67116 10.78272 10.88782 11.01974 11.17345 11.2694 
  7.987907 8.382757 9.251793 8.551911 8.960312 9.169497 9.091488 10.20918 11.97596 10.86922 12.52002 12.28877 10.51484 10.44818 10.56632 10.83528 10.84797 10.92687 10.48957 10.72594 10.87492 11.48538 11.44997 
  9.208866 8.503493 9.972482 9.071673 9.462955 8.664878 7.662143 9.897245 10.64185 10.91716 11.13872 12.12932 11.36929 11.34245 11.2402 10.83909 11.03287 10.97415 10.68303 10.55697 10.5056 11.69601 11.68266 
  9.830456 7.644547 9.789823 9.708093 10.68528 10.30543 9.51989 9.725322 10.94882 11.60964 13.0219 12.9686 12.24573 11.09353 11.58634 11.25884 10.88388 10.70734 11.0613 10.88753 10.72593 12.06754 11.55765 
  10.1434 9.600281 9.03609 9.28277 9.389798 9.782507 9.522714 9.155346 10.51925 11.83723 12.88527 13.05917 12.5973 11.04044 12.4058 11.76693 11.41435 11.29793 11.33523 11.57119 11.21824 12.67894 11.60467 
  9.044415 9.35461 8.125829 8.155436 10.21073 9.420408 8.485256 9.519516 10.80159 11.03508 12.98993 12.86469 12.11976 11.60614 11.22908 11.30737 11.26812 11.07872 11.49184 12.23885 12.01657 12.81706 11.95831 
  9.557627 10.25238 8.452591 8.784152 10.9992 9.823544 9.510536 10.38744 11.27521 11.65287 13.02484 12.76013 11.33446 11.90731 11.69084 11.35221 11.25088 11.1325 11.48025 11.96735 11.96839 12.37707 11.87469 
  10.23095 9.161502 8.99103 8.923839 9.805343 9.837078 10.10092 9.621623 10.84666 11.88772 13.02984 13.09626 11.90776 11.65839 11.98773 11.63274 11.56222 10.86827 11.27272 11.77922 11.91178 12.68476 12.05236 
  8.874165 7.809851 8.695865 8.223797 8.763299 9.566321 8.770155 9.008782 11.19149 11.83618 13.52786 13.19089 11.91928 11.96163 12.16576 11.49836 11.79749 10.94192 10.88142 11.00608 11.66361 12.51995 11.84705 
  8.437733 8.480018 7.988172 8.590461 9.420288 9.875575 9.504238 9.388609 10.90206 11.65753 13.35867 13.6816 12.48646 11.95356 12.16133 11.43572 11.42155 11.1403 10.60003 11.26259 11.62654 12.52154 11.82845 
  9.964341 8.923758 9.201259 9.687395 9.499485 9.23505 8.127786 8.023572 10.40248 11.75716 12.92915 12.92148 11.44932 11.59385 12.10262 10.90831 11.27934 11.05236 10.57544 10.54616 11.69068 12.66987 12.08684 
  7.982019 9.21278 8.003357 9.466326 10.96768 9.979166 8.679527 8.800684 10.64961 10.21235 12.66046 12.89555 12.06681 11.04834 11.27791 11.41866 11.11113 10.78568 10.56767 10.6112 11.04461 12.21314 11.62118 
  8.391383 9.194236 9.299546 9.597569 11.07862 9.571204 8.833894 8.325615 10.67207 11.36724 11.90812 12.3979 11.69843 11.26144 10.98187 10.62434 10.55542 10.79755 10.96708 11.02939 11.2887 11.80884 11.33321 
  8.614913 7.849565 7.212549 9.800451 10.48496 9.617368 9.147942 9.296861 10.13177 11.20514 12.3896 11.95667 11.57815 10.64794 11.444 10.93266 11.42423 10.72211 10.74878 10.75023 10.98787 11.26347 11.04169 
  7.535953 8.379319 8.465982 9.019888 8.474906 8.644684 9.380523 9.011086 9.468113 10.66582 11.96134 11.67335 10.88397 10.53693 10.82633 10.25402 10.21263 10.32749 10.5247 10.19471 11.01541 11.35801 10.77586 
  8.953902 8.443554 8.80369 8.623836 9.60961 8.834076 8.999572 8.835788 10.4686 10.42089 11.60229 11.5832 11.33114 10.05566 11.09636 10.20237 10.258 10.62062 10.48857 10.14489 11.40099 11.07088 10.64445 
  8.500578 8.544858 7.815175 8.512371 8.136365 8.380095 9.461661 9.8258 9.200838 10.49927 11.48638 11.7742 10.49608 9.742468 9.315075 9.044184 9.661458 9.323888 9.979736 10.51669 10.50003 10.75289 10.55279 
  8.140522 7.9291 8.467335 7.948473 7.84909 8.878713 9.228608 9.341762 9.541382 9.544453 9.793461 9.858119 10.06971 9.451279 8.51177 8.683918 9.586095 9.493937 10.0905 10.45376 10.50862 10.45298 10.89592 
  9.174277 7.910554 8.41324 7.574174 8.196404 8.136746 8.121965 7.858237 8.164356 9.198856 10.01476 9.891054 9.341989 8.451241 8.975142 8.96706 9.390165 9.745036 9.87119 9.820464 10.01453 10.3652 10.31961 
  9.053761 8.623909 7.898996 7.378081 7.484929 8.12253 7.869719 8.111196 7.660993 7.883986 8.894523 9.96628 9.412325 8.602212 8.978284 9.126224 9.584894 9.770082 10.27369 10.15531 10.25294 10.89211 10.43277 
  8.431073 9.066086 7.605909 6.630337 6.98281 6.810944 7.623994 7.17003 7.13573 8.053357 9.209491 9.415495 8.662474 9.053435 9.397181 9.446578 9.86334 10.01845 10.17869 10.7758 10.44624 10.38814 10.5821 
  8.942445 8.941487 8.016459 8.513492 8.322972 7.758252 7.450797 7.878129 7.665907 7.39168 7.762891 8.652268 8.399451 8.6668 9.206373 9.121785 10.34451 9.926603 10.24554 10.46929 10.76244 10.86812 10.72856 
  8.17054 7.23271 7.703678 7.791962 8.005906 6.999223 7.472554 7.710637 7.925411 7.891777 8.577125 8.739526 8.736339 9.344995 9.341995 9.167021 10.40602 9.640493 9.765063 10.79863 10.77817 10.97881 10.41767 
  8.539558 8.259498 8.565295 7.196165 6.610144 8.112316 7.271636 6.927933 7.946772 8.102273 7.875247 8.079875 9.375752 11.33052 10.81717 10.21815 10.39465 10.33572 9.731151 10.2445 10.49189 11.12053 10.76395 
  8.240433 8.050691 8.147851 7.085478 6.498564 7.444286 7.85869 7.59498 7.437051 8.215886 8.160721 8.345738 8.091083 9.35317 9.591409 9.448403 10.31442 11.06631 10.24245 10.9984 10.90544 11.08032 10.89513 
  8.550308 7.967917 8.127562 7.298366 7.333465 6.49415 7.352107 7.673339 7.976416 7.147206 8.484519 8.401263 8.720249 9.223616 9.384628 9.444779 9.571202 10.1814 10.25545 10.58296 10.57897 10.69769 10.3299 
  8.480454 8.143271 8.400046 8.316236 8.42107 7.541131 8.236753 8.256838 7.837097 7.3845 8.376999 8.782026 8.372799 9.166592 9.061806 8.605547 9.607065 10.09919 10.09029 10.41691 10.74461 10.34597 10.42431 
  8.515889 7.59104 8.670294 8.427706 8.82935 8.922324 7.844193 8.359896 10.19205 11.45637 11.92006 11.58537 10.71724 11.57984 12.44317 12.37348 11.76033 10.8328 10.71005 12.17016 13.12123 13.49667 12.32615 
  9.425822 7.528139 8.268925 8.60403 9.334135 8.938853 8.31205 8.903502 10.90029 12.15107 12.53318 12.27021 11.24565 12.047 13.05301 12.96288 12.37509 11.36883 10.95156 12.79793 14.03581 15.59215 14.59917 
  9.623832 8.175732 8.02983 8.535868 7.534457 7.092595 7.973546 8.616755 9.133872 9.538013 9.353437 10.20192 10.09133 10.06388 10.93718 11.69838 11.60221 10.81748 11.46689 14.67081 16.11157 18.09854 18.4955 
  9.071504 7.593831 8.569519 7.444993 7.027435 7.670381 7.727194 8.79417 8.78412 10.26749 10.49481 10.5346 10.10987 10.96909 11.81418 12.57517 12.30537 11.48952 13.16938 15.3521 18.07163 19.78831 20.41618 
  9.247075 8.342739 8.020856 8.513025 8.168929 8.344195 8.860204 9.17792 10.17791 11.50909 11.07768 11.59347 11.83874 13.10125 13.83476 13.61449 13.87037 13.02653 13.97317 17.72324 19.60029 21.56919 21.07852 
  9.474851 6.999008 7.357038 8.650279 9.284972 10.27271 10.72347 10.36143 11.41015 11.70805 12.40401 12.9837 12.64802 14.57075 15.13742 14.90884 14.86531 14.43093 15.03153 18.73455 20.89188 22.41504 21.75938 
  8.43823 7.602589 7.820417 9.932999 10.68121 10.14239 10.38401 10.35759 12.41399 12.78729 13.28866 13.30149 12.69712 15.16579 15.84278 16.40749 16.29351 14.39652 16.20181 19.23029 21.44838 22.89708 22.22457 
  8.946985 7.907045 8.086035 9.623978 11.20953 10.96355 11.34051 11.00923 13.56718 13.85 13.94107 14.30479 14.27669 15.13896 16.49326 16.82891 16.4699 15.615 17.10016 20.28425 22.26842 23.02636 22.56798 
  8.171475 8.090663 8.552567 10.50998 10.79045 10.79585 11.77593 11.28996 13.5619 14.10327 14.95813 15.71215 14.54693 15.72432 17.80629 18.37472 18.3487 16.05388 17.43175 20.30505 22.645 23.84378 23.11084 
  8.994973 8.401255 10.09553 11.87671 11.69541 11.35841 11.40016 11.24762 13.69226 14.48686 14.93769 14.94588 14.56451 15.99824 18.65324 18.06892 17.89767 16.18208 18.38502 20.50726 22.87932 22.78703 22.46937 
  9.782323 8.487134 9.374578 10.02132 10.83674 10.82025 10.9053 12.09562 13.35448 14.63213 14.8919 14.47755 14.00885 15.77607 18.01674 17.6033 18.13421 16.40895 17.83689 19.94169 22.52357 22.9064 22.88938 
  9.201106 9.515572 10.08597 10.27731 11.7706 10.57605 11.69659 11.87735 13.37034 14.23273 14.85306 14.77176 14.40004 15.35916 18.42156 17.62954 17.38562 16.92805 18.14824 19.61706 22.00475 23.03317 22.16275 
  8.748121 9.011187 9.970596 11.7954 12.21095 11.81142 11.55084 12.17591 13.77476 14.28944 14.6072 13.23592 13.31399 15.84934 19.25628 18.41424 18.67715 16.80241 18.52868 20.36193 21.57055 22.12268 21.53455 
  8.204464 8.748878 10.92591 11.5593 13.19048 12.10626 12.02502 12.05263 13.87249 14.59625 14.2399 13.71816 14.30343 15.58732 18.69723 18.77998 18.86568 17.82364 18.05045 20.36054 21.73694 21.81034 20.88315 
  9.566998 9.260836 10.80363 11.55462 12.66976 12.26979 12.27326 11.70452 12.91361 13.86102 14.35423 14.02273 14.13222 14.11232 18.20366 18.85984 18.12046 16.65557 17.61575 19.08671 20.27447 20.7323 19.79643 
  9.615409 8.908094 10.52764 12.07648 12.35761 11.91719 11.31676 10.78619 11.90151 13.13435 13.57572 13.40028 13.16485 14.1586 17.97005 18.6574 17.49142 16.61602 16.97221 18.26481 19.8641 19.66534 19.13474 
  9.518407 13.66414 14.68301 12.44051 12.49297 11.79643 11.65911 11.69031 11.55598 11.38682 13.41629 12.76745 13.40397 14.54726 16.66532 17.59948 16.87245 15.5036 16.39759 17.45155 18.24678 18.04825 17.68169 
  9.886844 16.85537 18.44436 16.27383 15.36497 13.84907 13.35042 12.87622 12.51681 12.60914 13.06418 12.81159 12.38205 13.68456 16.01586 16.22343 16.41093 15.50471 16.12335 16.51148 17.60962 16.92897 15.91486 
  9.932881 17.91137 19.72267 16.92243 17.54066 16.3813 13.78773 13.66247 14.68949 11.97803 13.91343 13.337 12.36077 13.84169 16.9629 16.03707 15.96878 15.15771 15.55823 16.98711 16.99094 16.69534 16.27666 
  9.659884 18.09613 19.96937 17.05172 18.13224 17.0855 14.13896 14.20781 15.18947 12.32284 14.15654 13.44278 11.52995 14.65633 17.3638 16.08185 14.93075 14.58424 15.63039 16.85744 16.92874 16.18911 15.86921 
  9.225732 17.84486 19.66064 16.73664 18.33948 17.20544 14.32049 13.99798 14.7793 14.12439 13.80436 13.0112 10.64079 14.43213 16.94347 15.76767 14.88731 14.56431 14.75913 16.57955 16.82982 14.99926 14.78576 
  9.978578 17.50287 18.95299 15.45618 18.21201 17.02547 14.4027 13.69167 14.05373 14.97102 13.77975 12.47722 10.86374 13.84392 16.04429 14.79358 15.0554 14.68087 14.3245 15.75972 15.80098 14.99877 15.23887 
  10.18868 17.52852 18.87184 15.18826 17.86486 16.54828 14.55269 13.6402 14.14156 15.5371 13.91196 12.7452 11.54375 13.17193 15.15623 14.38001 14.19828 14.53728 13.81415 15.57458 15.68111 14.43799 15.17661 
  9.751181 17.77567 19.34327 16.0554 17.61001 16.27066 14.56316 13.73765 14.17065 15.73401 13.81508 12.20863 11.4907 13.34967 15.23476 13.96594 13.74054 13.87504 13.52333 15.54135 15.94359 14.1128 15.21993 
  10.12562 17.83921 19.52574 16.31928 17.54741 16.23771 14.63482 13.75617 14.15524 15.65129 13.92926 12.40173 12.0656 13.88922 14.18397 13.18738 13.58043 13.53881 13.6731 15.04371 15.07353 14.68031 15.49483 
  9.599173 17.72381 19.33587 16.07591 17.93183 16.65643 14.42071 13.79618 14.27678 15.20349 13.67884 12.52839 12.27971 14.40655 13.7573 12.9882 12.78122 13.05482 13.01771 15.88482 15.94137 15.41255 15.99297 
  9.698954 17.42312 18.87054 15.56586 18.05615 16.68462 13.77028 13.44723 13.88304 14.32237 13.28011 12.27115 12.59856 13.77862 13.29578 12.70767 12.83421 13.53693 13.18715 15.76992 15.76956 15.14107 15.93729 
  10.26758 17.18085 18.16345 14.96107 18.03324 16.52345 12.76308 12.70064 12.66801 12.16031 13.20089 12.79833 12.22538 12.70561 13.22355 12.84058 12.97821 13.38026 12.87051 16.21716 16.48619 14.9863 15.6656 
  10.44387 17.23746 17.66788 14.44783 17.53398 15.96145 11.02331 12.6539 12.60111 13.22309 12.45506 12.53511 12.75447 13.26548 13.35413 13.15652 13.2005 13.69604 12.86835 15.36887 16.235 16.85772 16.54977 
  10.23759 17.50456 18.4933 14.91491 16.87596 15.31868 10.46777 12.26849 12.64224 12.68833 12.15624 12.53137 13.16832 14.12793 14.15696 13.49824 13.72296 13.93223 13.8926 16.50182 18.04177 18.58524 18.28993 
  10.35166 17.05918 18.34675 14.85144 15.89987 14.52373 9.76105 11.89735 12.64335 14.01029 13.73979 13.24587 13.72216 15.26234 15.57124 15.82822 14.74431 15.00797 14.97392 17.71632 19.5425 20.54453 20.59761 
  9.555297 15.74934 17.18219 13.77729 15.08703 13.69022 10.26886 10.90417 12.99767 13.61335 14.09484 13.58124 14.1276 15.82747 16.31124 16.64211 16.18265 15.21884 15.42664 17.76999 19.3903 20.87875 20.63521 
  8.862854 14.02321 15.96503 13.04325 13.75009 12.35207 10.97646 11.18398 12.65906 13.14713 13.12646 13.15454 14.30518 15.27275 15.80141 16.52305 15.9915 14.97509 15.27901 17.70121 19.40024 21.01748 20.90414 
  9.332595 14.84705 16.37164 13.3482 13.17087 10.29789 10.88544 11.37841 12.42703 13.47049 13.44906 13.32345 14.28644 15.11956 16.00913 15.9466 15.03316 14.63348 14.81898 18.33994 19.95979 22.53777 21.45701 
  9.877868 15.61059 16.83839 13.17729 12.35922 10.87731 12.04013 11.68683 13.13918 13.59089 14.2367 13.87884 13.71417 14.67861 15.89329 16.04236 15.43196 14.25506 15.13684 17.87797 19.84326 21.79107 21.3449 
  9.140958 14.76068 16.10624 13.06766 13.47845 11.71019 11.03197 11.56545 12.67004 13.27332 13.428 13.15621 14.76991 15.18839 16.24062 16.5626 15.80795 14.79566 15.80218 18.18081 20.55774 22.59585 22.16014 
  9.849629 12.1764 13.81905 12.20043 12.96303 11.37443 10.69466 11.47991 12.80477 12.91667 12.75835 13.21933 15.38656 15.95259 16.823 16.35283 16.43909 15.31986 16.49224 18.66559 20.4817 22.71685 21.85016 
  9.36202 14.08019 15.09082 12.18999 12.97359 12.29903 11.47363 11.54493 12.6398 12.84276 13.85428 13.64815 15.08565 15.69742 16.96827 16.94289 16.80222 15.64644 16.48269 19.1488 21.81447 23.17466 22.23106 
  9.57612 15.02501 16.06492 12.43449 13.88631 12.50848 11.44948 10.40349 11.57291 13.3126 14.31746 13.71188 14.76625 16.03081 16.07044 15.89003 16.94098 15.98808 16.14021 18.78211 21.02096 21.56885 20.88732 
  11.78685 15.01811 15.52112 13.93588 14.43564 12.86158 11.35952 11.50686 11.3272 13.30817 13.81913 13.40519 15.05978 15.84919 15.35929 13.89001 15.52159 15.23993 15.40555 17.11906 18.97794 19.44685 18.4641 
  13.22307 17.17885 17.15026 17.71776 17.11022 14.76405 13.44761 13.67636 13.50036 13.58129 13.95018 13.70605 14.88567 15.03857 14.84337 14.45563 14.86374 14.4649 14.85144 16.43289 16.89828 16.35405 16.94752 
  13.76669 17.1444 16.05253 18.63818 17.65204 16.25583 13.92566 13.3862 14.08959 14.32102 14.93346 14.31664 14.22204 15.10196 14.79939 14.6737 15.79121 14.93165 14.95335 16.65422 16.83254 16.26995 16.42411 
  14.16293 17.56638 16.68997 18.26259 16.92739 16.86658 14.25883 13.70589 14.65646 14.7077 15.98879 14.10393 13.49225 14.60994 14.55858 14.16977 16.02258 15.63895 14.65991 16.49418 17.11003 16.32096 16.05037 
  15.02538 17.72275 16.8508 18.08271 16.43288 16.42471 14.93453 14.1509 15.1513 14.99477 16.14239 14.24235 14.14056 14.526 13.32078 13.45539 15.79795 15.33149 14.69157 15.69744 16.36747 15.60381 15.84035 
  15.73365 18.03275 17.27561 18.75753 16.95327 15.80555 14.91612 15.18357 15.67709 14.97079 15.72672 15.15915 14.04085 14.47075 13.372 12.99742 14.7936 14.90177 14.24704 15.20238 16.2678 14.74167 16.11823 
  16.26338 18.46109 17.27797 17.99683 16.66476 16.05012 15.40509 15.53155 15.96098 15.33267 16.30002 15.72512 14.53671 14.58817 13.3534 12.64955 14.37179 14.83991 14.59077 15.26721 16.07784 14.28749 15.199 
  16.74382 18.91121 17.16111 18.04957 17.40871 16.48565 15.41961 15.17963 15.49974 15.08093 16.65548 15.9269 13.91849 13.88327 12.97235 12.65223 14.07203 14.75737 14.22784 14.64774 15.34663 14.63228 16.0888 
  16.98529 19.11541 16.87011 16.74349 16.9392 15.88544 15.29093 14.62053 14.88953 15.09566 16.51639 15.45968 13.03472 13.23992 12.823 11.87619 13.33451 14.86192 14.20521 15.69702 15.84917 14.75232 16.39235 
  17.02896 19.19595 17.16109 16.8834 16.3556 15.42284 14.74861 14.76104 14.92473 14.9596 16.6881 15.93081 13.33428 12.97397 11.86678 11.63746 13.04319 14.28844 13.94617 15.04595 15.0869 13.90257 15.89565 
  16.85907 18.78141 16.70892 17.00507 15.92624 15.45061 14.55362 14.95892 14.77704 14.90913 16.98108 16.19264 12.85906 12.48413 11.65127 11.22398 12.58972 13.57485 13.65335 14.72739 15.04537 13.98265 15.01211 
  16.64772 18.64282 16.71573 17.18554 14.0683 13.95587 14.57919 14.46328 14.58978 14.59518 17.19131 16.57361 11.92534 12.44241 11.72602 10.9928 12.40186 12.90819 13.48149 13.89237 14.56885 13.63314 15.65915 
  16.43158 18.50433 16.55654 16.99913 14.52069 14.72103 15.24015 14.21969 14.87307 14.52989 16.81562 16.34734 11.70553 12.3317 12.12857 10.71785 11.98174 13.78292 13.32322 13.64274 14.45671 12.9734 15.85995 
  16.15336 18.23414 16.24944 17.09164 15.74857 15.23684 15.1064 14.90766 14.93522 14.25292 15.3692 15.24385 11.05891 11.63416 11.56629 10.73565 11.83266 13.19534 12.7056 12.56191 13.70245 13.0052 15.80045 
  15.78317 18.05012 16.20395 16.93727 15.87394 15.19709 14.3524 15.32856 14.63064 14.19885 14.98944 14.75296 10.62751 12.19092 12.06983 10.93399 11.87565 13.48435 13.07779 12.09982 12.47317 12.27248 14.84082 
  15.39456 17.87456 16.5168 17.58305 15.61139 14.72246 13.21522 14.96642 14.20902 13.41669 14.09352 13.92773 10.64034 11.88319 11.45076 10.29098 11.03321 12.29812 12.70397 11.7358 12.26939 12.86226 14.78897 
  15.02351 17.78964 16.85471 18.13364 15.47449 12.54772 11.64264 14.01794 14.02666 13.12667 13.50438 13.3572 10.43241 11.16833 10.74877 10.2707 11.00853 12.42801 11.92629 11.09674 11.43357 12.86431 14.27416 
  15.09622 17.92495 16.85561 18.0988 15.42922 13.02023 12.68088 13.53072 14.48343 13.28728 12.15022 11.93546 9.953141 10.79692 10.91492 10.17909 10.60862 11.80252 11.56975 11.04416 11.2531 12.42208 13.60621 
  15.60607 18.24389 16.80309 17.81159 15.3917 13.49877 12.40576 13.14706 13.09449 12.85986 11.56458 11.51797 9.652337 9.949935 10.32372 9.620278 10.69048 12.02814 11.59302 11.66032 11.30074 12.12424 13.24895 
  15.90996 18.41689 16.64116 16.99631 14.64567 13.13194 12.07542 12.5822 11.69349 12.01927 12.62355 12.76301 9.990061 10.90189 10.44079 10.43505 10.66998 11.15739 10.76031 11.42792 10.89405 11.56491 12.52887 
  16.00483 18.30446 16.08208 15.86133 13.48314 11.61676 10.97823 11.11799 11.98649 11.92625 11.69026 11.50517 9.732758 11.60136 12.52013 10.93729 10.51151 10.81287 10.82617 11.56538 11.44386 11.92824 12.41918 
  15.85966 18.15599 15.9375 14.95248 13.4534 12.87702 10.83218 10.35821 11.16184 9.718279 11.04042 10.5775 9.815519 11.36835 11.88853 10.33835 10.50751 11.11677 11.1463 10.79816 10.42655 11.12749 11.29204 
  15.60883 17.95237 15.71562 14.06708 12.83226 12.50489 11.20979 11.6847 10.72093 10.36459 9.917531 9.80342 9.078652 10.03732 10.6124 10.40085 10.80684 10.88086 10.69822 11.10227 10.46698 10.97048 11.10912 
  15.2283 17.69786 15.57806 13.4353 12.19316 11.6246 11.0224 10.72823 9.161061 9.050728 9.18773 8.855824 9.341613 10.0488 10.13517 9.546819 9.585426 10.50744 10.39803 10.67031 10.51961 11.0685 11.14912 
  14.64353 17.16451 15.15834 12.35796 12.72608 12.63046 11.68167 12.3668 14.25992 15.93695 17.47775 17.61803 15.56757 12.9737 13.57319 14.56981 13.7044 12.71877 12.86225 13.4201 14.1629 15.60515 15.65097 
  14.06131 16.47077 14.28449 12.84858 13.9689 13.82744 13.50231 13.78073 15.74885 17.04282 18.63243 18.85933 16.36653 13.78124 14.21553 15.24145 14.55391 13.56051 13.44235 13.90322 15.24372 16.40675 16.67416 
  13.27166 15.83467 13.93871 14.37551 15.34901 13.98736 12.92254 13.76627 16.36158 17.10549 18.30305 19.29653 16.39378 14.40754 14.97054 15.40539 15.09013 14.31383 14.41104 14.81105 16.28816 17.46544 17.82826 
  12.96865 15.35536 13.58534 12.4107 13.17324 13.02235 12.97369 12.78875 14.50413 17.08331 17.24843 18.6091 16.95518 13.8684 14.65208 15.11388 14.85909 14.55639 14.21002 14.85613 16.15392 16.62638 16.38885 
  12.74014 14.99901 13.14317 12.02542 13.41206 14.03033 13.80731 13.07016 15.74668 17.29949 18.08332 19.69819 16.80057 14.75249 15.27074 15.78106 15.3902 15.52286 15.18476 16.46467 17.64916 18.49447 18.44848 
  12.97496 15.42951 13.52378 12.25876 12.9795 12.56979 12.22692 13.57497 16.76153 17.23584 17.87868 19.12806 16.46191 14.3858 15.09691 16.4803 16.39127 15.58957 15.17072 17.00169 18.0875 17.41795 17.79514 
  12.85048 15.10298 13.23063 12.87181 13.15208 14.49708 13.07669 14.13431 17.66776 18.05823 18.7889 19.8481 17.92514 14.92354 15.70432 16.74818 16.99346 15.81199 15.74879 17.21499 17.70545 17.515 17.36108 
  12.53143 14.44646 12.04487 12.66221 12.77981 14.32154 14.07838 15.03848 17.27874 19.03686 20.9606 21.08079 18.90958 15.76664 16.3242 16.48548 17.68536 17.00244 15.70976 17.18387 17.96568 17.45182 17.34422 
  12.27433 14.13071 11.95814 11.10963 12.29782 13.32947 14.22352 14.91252 17.05256 19.26613 20.56975 21.0718 19.55671 16.45315 16.53867 17.16694 17.57012 16.58561 15.94327 18.1118 18.44273 17.04756 17.29078 
  11.13274 13.31547 11.17836 10.36571 11.99675 13.38614 12.98195 13.65712 17.52398 18.83635 20.82496 21.25018 19.02944 15.45844 17.22033 16.9855 17.61625 16.53855 16.01951 17.28067 17.80919 16.29823 16.60528 
  11.49348 12.72149 11.09698 10.74121 11.50064 13.47943 13.28319 14.12894 16.37674 18.3519 20.4639 21.47652 19.33566 15.77035 17.06349 16.64114 18.07568 16.64302 15.36201 16.04336 16.91956 15.94561 15.99434 
  9.608139 11.75429 10.06571 11.10138 12.11786 11.58483 13.1205 14.21301 17.56386 19.13147 20.32322 20.12503 18.29022 15.28772 16.86212 17.37056 17.82328 16.0193 14.99997 15.78904 16.12305 14.55896 16.01875 
  10.37425 13.20676 13.2479 12.19929 12.48866 11.74895 13.30715 14.54824 16.49951 18.31167 18.70988 19.54097 18.35619 15.11479 16.51697 15.98994 17.09819 15.56539 14.59282 15.14828 15.75255 14.43601 15.72609 
  12.24577 13.50046 14.6557 13.99299 13.82843 14.16929 14.48501 15.5354 17.66883 19.51937 19.39577 20.05754 18.37926 15.92839 17.23025 16.90665 17.33165 15.74956 14.90289 15.95183 16.37263 14.70161 16.83718 
  11.82463 14.14506 14.91792 15.10521 13.43633 14.76357 14.50471 16.30964 18.37582 20.10591 19.70662 20.12249 18.50193 16.44523 17.84676 17.22989 18.06538 15.9933 14.78116 15.97121 16.63473 14.1447 16.62147 
  11.49515 12.57675 14.99955 15.12591 14.04319 14.90118 15.1428 16.90608 19.3396 19.58758 19.89336 20.47232 19.38027 16.83281 17.80051 16.78543 17.68006 16.1482 15.12499 16.29321 17.28958 14.39812 16.74356 
  12.83119 13.36858 15.33085 15.8993 14.98862 15.06884 16.27656 17.09787 19.89 19.45495 19.66687 20.41065 20.05285 16.93578 17.84101 17.22516 16.6539 15.47889 14.51853 15.41234 16.16337 13.92638 16.21105 
  12.24501 13.81182 15.54418 15.71802 14.7434 15.25031 16.42093 16.59528 18.99483 18.75741 18.65719 19.56692 19.45428 16.5445 17.94703 16.85806 17.52571 15.87006 14.62513 16.25237 16.75192 14.53815 16.44307 
  12.43977 14.4055 15.44666 14.61817 14.49417 15.49514 16.47301 15.8417 18.29481 18.40584 18.14786 18.99505 18.48756 16.02132 17.57674 16.21739 17.56367 15.96092 14.12129 15.69186 16.08254 13.29479 15.69123 
  13.40359 14.56592 14.56405 14.75467 14.80557 15.61075 16.30825 16.00495 17.7576 17.7054 18.44081 18.78086 17.91715 15.75736 16.79029 16.09669 16.95103 15.45958 14.6285 16.45296 17.08261 14.81537 16.62719 
  13.40314 14.79066 14.44038 14.9208 15.28192 15.93543 16.12896 15.63424 17.53443 16.71537 17.66586 18.26663 18.14948 15.38054 15.57821 15.0981 16.37489 14.9254 14.11771 16.22453 16.63408 13.88445 16.03982 
  13.27263 15.37152 14.98211 15.65071 16.10147 16.25443 16.39878 15.22288 16.07312 15.48051 16.95177 17.88365 17.64783 15.38212 14.85438 14.25418 15.50375 13.87315 13.85102 15.71543 15.58055 13.48126 13.54171 
  12.79769 14.03458 15.15382 15.77783 16.13641 15.98903 15.71088 14.83081 15.42676 14.42686 16.35059 17.19747 15.89601 14.84456 14.84353 13.56014 14.43848 14.02768 13.56937 15.50516 15.01935 13.35548 13.45835 
  12.93903 13.22154 14.53004 14.98958 15.29645 14.57587 14.04077 13.26272 15.32759 15.32236 15.08416 15.42689 14.78734 13.71587 13.79163 12.75518 14.19596 13.34965 13.19663 14.03743 14.96614 16.49541 15.33635 
  12.45864 14.61374 13.87632 13.62215 13.72294 14.31611 14.38086 14.30659 15.43989 15.61096 14.15577 14.35514 14.81355 13.64131 13.45614 12.72456 14.13968 12.8555 13.59385 14.17748 16.39845 18.38371 17.80732 
  12.33578 14.00891 13.07828 13.71424 14.10664 14.0687 13.13889 13.63885 14.8914 14.61784 13.90929 15.45194 14.73162 13.03892 13.2657 13.5637 13.76148 13.25706 15.10444 16.29706 16.97831 19.44946 19.79165 
  11.96159 13.51957 13.01035 13.54761 13.69153 14.00353 13.68937 12.58661 13.48017 14.13214 13.91707 15.31068 14.23771 12.71703 12.92456 13.6957 14.43063 13.71029 15.22686 17.15446 18.45062 20.47192 20.89673 
  12.05161 13.9109 13.16452 12.97099 13.18609 13.13105 12.99765 11.58859 13.5218 14.01596 14.89017 16.47881 15.28746 13.67678 14.92654 14.96245 15.16147 15.0592 16.54627 18.17285 20.02958 21.51884 20.96085 
  11.84209 13.52904 12.77278 13.65398 13.20718 12.63755 11.65749 11.51693 13.74348 13.71783 15.14928 15.87488 15.38048 14.01578 15.41916 16.25989 16.82 15.85239 17.44685 18.49073 20.68714 23.03419 22.3119 
  10.61903 12.43421 12.6958 13.1518 12.79633 12.09934 11.18174 12.15249 13.5683 12.78554 14.49859 16.1356 16.38138 15.60392 15.1943 16.6934 17.43444 17.15856 17.69735 18.73776 20.3603 23.0533 22.83157 
  11.10656 11.3087 12.01439 12.55888 12.73079 12.43724 11.43508 11.91604 11.70938 13.99057 15.63961 17.28322 16.47677 15.20305 15.66617 17.61813 17.6751 17.60305 17.77451 19.25667 19.94949 21.90983 21.98059 
  10.16949 12.0564 11.52991 9.989189 11.37004 11.68872 11.52673 12.63663 12.73647 14.62583 15.9687 17.945 17.5506 15.1612 15.45209 17.22151 17.94404 18.35554 18.20267 19.54291 20.5292 21.7872 21.39606 
  10.79198 11.45736 9.874234 11.29701 11.26837 11.21837 10.89179 11.62866 12.76508 14.6835 16.18573 17.36658 16.85448 15.94918 15.41981 17.08667 18.88033 18.45577 17.89176 18.7529 19.93383 20.91602 20.71407 
  10.99965 11.83611 10.76372 12.36669 12.88359 12.50686 12.02554 11.77562 12.96449 14.7419 17.49727 19.05647 17.76691 16.14889 15.8502 16.94702 18.54008 18.61212 17.60375 18.40122 18.75871 20.56727 20.2013 
  10.71721 12.24653 11.40098 12.01936 13.33127 11.90686 10.65947 11.82579 13.32248 15.28793 17.01124 18.64284 17.72797 16.53176 16.11108 16.70494 17.7008 16.87293 16.90321 17.01193 17.9865 19.62047 18.54438 
  10.84808 11.77401 10.30343 12.64884 13.14566 11.23923 10.88098 13.11721 13.34063 16.31784 16.83382 18.21345 17.3839 15.27081 15.50685 15.6567 16.72981 16.33526 16.45064 16.3113 16.49539 17.15864 16.46489 
  9.56905 11.07326 11.29717 12.00945 12.56924 11.74636 11.67797 12.27109 12.33903 15.01359 17.29026 17.98109 16.23312 14.9431 14.57544 15.20214 16.10517 16.19224 15.99478 15.25868 15.40402 16.06674 15.44316 
  10.508 10.89142 10.47173 10.95241 11.76593 10.98585 10.76927 12.52917 13.36483 14.31004 15.60877 16.93384 15.8426 14.30149 13.70551 14.91785 15.92855 15.53096 15.40199 15.04334 15.08857 15.01525 14.25574 
  10.63981 10.66823 10.20522 11.10935 11.52312 11.02348 10.33216 11.91914 13.66731 13.93949 16.71332 17.01327 15.03499 14.47763 14.38019 14.41362 15.83647 15.67047 14.47045 14.31423 14.19904 14.82268 14.1559 
  8.488654 10.10538 10.92675 10.09366 10.85918 11.26827 12.20304 11.80998 13.27558 13.73623 16.86435 16.49012 14.37312 13.21352 13.43559 13.28338 14.71591 15.0856 14.91768 13.89271 13.95088 14.00356 13.01124 
  10.81898 13.143 12.69162 12.18221 12.55161 11.64523 12.4468 13.02303 11.64384 13.77602 16.63419 16.27282 14.54465 12.69009 12.62936 13.17467 13.86224 14.07909 14.24895 13.57493 13.15678 13.56058 14.07112 
  11.59634 16.15282 16.04291 14.96902 14.3159 12.56915 13.37283 13.76863 13.54054 15.07096 17.31351 15.84202 14.1527 12.01037 12.44869 12.54331 13.90306 14.87695 13.81047 14.15678 13.85537 13.21027 14.43797 
  11.50468 16.01534 15.65023 16.86644 16.7308 13.15539 13.75249 14.19167 14.84136 15.6804 16.669 15.56741 13.27599 12.18581 12.46388 13.30746 14.70786 14.89385 13.59378 14.1329 13.90083 13.44039 14.82801 
  10.83374 15.32171 15.06138 17.48699 17.51079 13.80905 13.93069 14.59176 15.34084 16.82994 18.43996 15.90578 13.45436 11.4224 12.79343 13.04898 15.73146 15.33158 13.21905 14.24352 13.77445 13.27233 15.71018 
  10.87577 15.47127 14.70888 17.73984 17.70155 15.23704 14.3281 14.20854 15.22783 16.9904 18.81393 15.66493 12.64094 11.61629 12.53687 12.72079 15.53799 15.56939 13.56948 13.97123 13.99972 13.84164 15.74528 
  9.678106 15.093 14.73196 18.04266 17.9735 15.98976 14.84349 14.29395 15.38192 17.14681 18.48132 15.55414 11.81447 11.31875 12.51214 13.46786 15.07734 14.72639 12.89217 14.18221 13.942 14.46083 16.30935 
  9.27702 15.11397 14.6864 18.26455 18.06617 16.64905 15.23202 14.50276 15.79894 17.31784 17.91948 15.71456 11.84832 11.10479 12.36307 13.02193 14.84241 13.94248 12.61262 14.14687 13.61714 14.61513 16.65529 
  10.31116 15.20001 14.41779 18.20655 17.87181 17.37104 15.80053 14.73345 16.33781 17.26945 17.61073 15.76432 12.14948 11.87968 12.84734 13.35572 15.99964 15.05299 12.98151 13.59093 13.31608 14.26525 16.48788 
  11.03995 15.58457 14.74849 18.04535 17.62146 17.83399 16.13437 15.02326 16.80089 17.59974 17.02293 15.01341 11.93478 12.57936 13.6127 14.46719 15.84169 14.35661 12.21926 13.44742 13.43839 15.56678 17.01775 
  10.68538 15.81025 15.13098 18.21277 17.8306 18.10699 16.28154 15.16807 17.16661 18.57889 16.92452 14.87336 11.62055 12.73409 14.09998 15.07829 15.77258 14.38338 12.46635 13.38107 13.42031 15.66041 16.70012 
  10.80705 16.10119 15.46497 18.4098 18.0212 17.97135 16.10877 15.19762 17.24214 18.58634 16.91249 15.42679 11.88585 13.91432 14.45943 15.74949 15.78372 14.54293 11.42183 13.62915 13.34823 14.55129 16.14689 
  11.94579 16.89884 16.24403 18.55092 17.98724 17.20735 15.26665 14.65565 17.00868 18.47542 17.43419 16.03572 12.83943 14.0368 13.85004 14.82996 15.17481 14.47794 11.56922 13.58228 13.62983 13.59302 13.86895 
  12.38524 17.37505 16.7696 18.47446 17.26857 15.96833 14.24874 13.03042 16.26422 17.79273 17.16697 15.42565 13.5957 14.48988 15.57542 16.0727 14.58922 13.24106 11.54757 13.28529 14.40883 13.49071 12.73336 
  13.03429 17.5047 16.87468 18.99148 17.70904 16.97621 14.13228 12.62222 14.45247 16.32981 16.61537 14.76507 12.27121 13.29012 13.69145 14.77818 13.90872 12.32405 11.65774 13.06149 13.20633 12.79723 14.08691 
  13.543 17.20046 16.71364 19.24344 17.89848 16.23823 13.61446 12.58731 13.79848 15.94572 15.83083 13.94334 12.33336 12.76893 13.26038 13.66663 13.53549 12.52763 10.97694 12.18569 12.12396 12.52927 14.61442 
  13.93316 16.19917 16.60253 18.37887 16.34868 15.3344 13.60281 12.46623 14.00873 15.9596 15.67228 13.47866 13.02688 12.05685 12.97747 13.6599 13.39636 12.42592 11.40474 12.42729 12.39029 12.35651 14.67778 
  14.61915 17.17112 17.5472 19.37409 17.49985 15.90717 13.63423 11.95899 12.93315 15.14544 15.01936 13.25283 12.0639 11.61034 12.99201 13.5629 12.46684 11.77172 11.48846 12.21882 12.39871 12.28264 13.6378 
  15.01957 17.45677 17.19109 18.36874 17.45291 15.49117 13.40853 11.98515 13.43848 14.47075 13.50971 13.02251 11.71136 11.68848 13.16979 13.33672 12.58453 12.35258 10.77152 11.58895 12.66962 12.02104 13.51065 
  14.3445 16.08383 16.46618 18.37986 17.49754 15.51561 12.724 11.01192 12.56261 14.75061 13.70792 13.23755 11.97928 11.47003 12.88496 13.19709 12.51916 12.02072 11.23445 11.98222 12.57755 12.58092 13.76349 
  14.65777 17.34103 16.21595 17.87756 17.01974 14.35497 12.22902 10.17611 12.75461 14.43687 13.74576 11.96353 11.00994 10.55103 11.22951 12.06966 11.58648 11.32696 10.70369 11.09274 11.97977 11.15652 12.0545 
  13.84403 16.75669 15.79734 15.90669 15.87204 12.81713 11.09628 9.869988 11.99412 13.88495 12.05193 12.04093 9.865374 9.98888 10.54978 11.08329 11.51721 11.287 10.19415 10.19129 11.11317 10.82287 11.26695 
  12.03434 15.45859 15.11167 15.11641 15.16317 14.43024 11.65586 9.923259 11.36602 12.3641 11.81006 10.42863 9.54241 9.423545 10.06809 10.60886 9.986245 10.09022 9.895988 9.939384 10.39964 10.69043 10.78549 
  11.62159 13.6024 13.63344 15.00115 13.6637 13.22205 10.87602 8.768591 9.799749 12.06331 11.48621 10.03431 9.139547 9.069712 9.670283 10.33705 9.896729 9.732963 9.743416 9.657762 10.36899 10.26934 10.40027 
  11.81183 14.29564 13.63866 13.6298 13.18893 11.65243 10.08644 8.713807 10.15905 11.12224 10.93487 10.20999 9.452074 8.578758 9.197603 9.436418 9.835726 10.35768 10.15316 9.921681 10.65349 10.78003 10.5952 
  12.90228 14.67107 13.85628 14.29541 14.35086 11.63753 8.860591 8.425612 9.22795 10.41009 10.2974 10.03016 9.485574 8.96593 9.532566 10.04701 10.0559 10.14437 9.809213 10.09447 10.73098 10.48613 10.97839 
  12.14247 14.62703 13.12694 13.76217 13.40734 11.91112 10.03432 10.69592 11.79923 14.96772 14.89136 11.78498 11.03141 11.00658 12.59778 13.80232 13.4921 12.70364 11.11213 11.89696 12.66712 11.61696 12.02989 
  12.4993 14.9535 13.76636 14.01113 13.60918 11.75265 9.318426 8.455474 10.17039 14.04918 14.17533 10.41443 9.469222 8.986806 10.21098 11.43141 11.99639 11.92311 10.10373 10.33753 11.20988 10.83258 10.75896 
  12.68394 14.34959 12.56896 14.07434 13.22602 10.90166 8.476089 7.972132 8.819757 12.4768 12.05638 10.18155 9.379935 9.701678 9.932637 10.54168 10.334 10.5734 9.954249 10.09657 10.31382 10.49024 10.26939 
  11.75025 13.98052 12.16752 14.10251 13.03628 9.846658 8.836266 7.344621 8.59577 10.93273 10.1303 9.196149 9.388488 9.020345 10.06814 10.79366 10.47544 10.58243 10.03372 10.03065 10.42077 10.64656 10.34247 
  11.83129 14.04893 12.96955 13.20662 12.82525 9.696637 7.323474 8.177329 8.925125 10.40487 10.10953 8.585592 8.994603 8.708897 9.25738 9.843696 9.578411 9.801521 10.00724 10.09079 10.59092 10.64287 10.58762 
  9.790665 12.15129 12.64895 13.58438 12.95563 9.711924 8.575097 9.001645 8.701345 10.52208 10.09897 8.517282 8.199652 8.714253 9.041263 10.04511 10.11496 10.00863 9.805107 10.16271 11.0758 11.04546 10.75644 
  9.414765 12.83946 11.98675 12.726 12.46979 9.267152 9.039246 8.176429 8.053612 10.21454 10.41292 8.622193 7.66354 8.369621 8.642668 9.53128 9.865145 9.638438 9.991666 10.25773 10.73875 10.72115 10.65728 
  8.779566 11.93363 11.50436 10.50424 9.622196 9.267383 8.582248 8.53771 7.890744 10.04505 10.20443 8.095118 8.612964 8.879345 8.707826 9.227075 9.176577 8.984758 10.03803 10.03335 10.5386 10.95927 10.46486 
  10.31262 11.88174 11.99282 11.90945 9.594206 8.671169 8.318475 8.181697 8.13483 10.15049 10.25244 7.071458 7.442567 8.785149 9.691864 9.679538 9.605824 9.961757 10.42259 10.17275 10.75009 11.18037 10.72504 
  9.364248 11.96104 11.57661 12.01562 10.75383 6.945975 7.367255 7.736284 7.312432 8.969963 9.153383 7.523694 7.927206 8.369026 8.890183 8.625024 9.480546 9.502405 9.561119 10.14811 10.62848 10.7046 10.59589 
  8.967705 11.99061 11.26416 11.1752 9.580898 7.045003 7.880908 7.853495 8.063289 8.442639 8.374674 7.542927 8.712921 9.363231 8.787829 8.940651 8.762001 9.656484 9.560316 9.75022 9.998157 10.96855 10.83991 
  10.79417 10.95528 10.71563 9.979272 8.763993 8.042848 7.088778 6.67021 7.448594 8.206177 8.038567 8.22692 8.547086 7.783141 9.548368 9.043281 9.180702 10.15911 10.00978 9.834736 10.7707 10.82731 10.66534 
  9.814768 12.13082 11.35585 9.269568 8.184406 7.729397 6.804678 6.910729 7.282489 8.309008 8.415974 8.394214 7.598199 8.050271 7.802004 8.228464 9.248132 9.948934 9.981988 9.834879 10.65781 10.51871 10.32499 
  9.652328 11.7325 10.80005 9.37675 9.013214 7.384077 7.177258 6.372046 7.152434 7.877509 7.752526 8.609852 8.207765 8.032439 8.76918 8.827532 10.00151 9.213638 9.988755 9.774078 9.915665 9.921049 10.41844 
  9.466437 10.99616 10.88723 9.492371 8.961466 7.342728 7.358507 7.339148 7.301462 8.448383 7.55255 8.734881 8.417077 8.13668 8.47903 9.036981 9.332474 9.148084 9.903509 9.827729 9.972055 10.26808 10.0221 
  9.64337 12.05854 10.97252 10.21088 9.235108 7.101156 7.432653 8.052295 7.081014 6.580639 7.278504 8.679576 8.280871 7.73062 8.75071 8.532309 9.329696 9.757581 9.649004 9.204338 10.25986 10.47111 10.64369 
  8.506092 11.0128 10.68274 8.38584 7.542353 7.152045 7.326688 7.308694 7.642912 8.45325 8.044881 7.742783 7.756063 7.661537 8.275199 9.033102 9.546831 10.0107 9.767799 9.575422 10.45731 10.67947 10.68756 
  9.190603 9.530713 10.03561 9.930932 8.509491 7.016098 7.502091 7.892929 8.24372 8.307619 7.979042 8.022539 7.607509 8.970244 9.02056 9.152427 9.395686 9.654662 10.00138 9.774302 10.25611 10.34001 10.50237 
  8.055989 10.48079 9.946203 8.668516 8.405702 6.663228 7.840029 7.953751 7.417573 8.407804 8.318707 8.09545 8.268252 8.237886 8.408285 9.147687 9.502143 9.359609 9.642401 10.35548 10.85097 10.35253 10.09162 
  8.523983 10.58812 10.45586 9.346948 8.423094 6.955203 7.860735 7.061844 7.640428 8.098751 8.684567 8.348521 8.667695 8.787177 8.56147 9.256539 9.863892 9.601056 9.600217 10.26275 10.53992 10.67695 10.53019 
  8.54944 10.105 9.919378 8.700562 8.344156 7.651311 7.920049 8.247942 7.364244 7.392271 7.95518 8.640728 8.450232 9.633104 9.684301 9.891527 10.32047 10.18789 10.04646 9.441429 9.973019 10.92726 11.09868 
  8.736779 10.67164 10.37507 8.660881 7.410948 6.964188 8.062383 8.657798 8.175787 8.124825 8.552548 8.639585 9.223927 9.4434 9.095827 9.018662 9.108647 9.592525 9.690062 9.847681 10.15555 10.88235 11.25451 
  9.102138 10.24407 9.305552 7.972584 7.585355 7.130199 8.330059 8.346169 7.620663 7.047201 7.893063 7.923952 8.201685 8.679209 8.079769 8.701505 9.398625 9.830337 10.10372 10.45054 10.39745 10.80141 11.24109 
  8.334327 8.800347 7.70179 9.206788 7.814823 6.755373 8.229139 8.560562 8.269526 8.002664 8.831103 8.463894 8.141541 9.049789 8.658106 9.396982 9.033927 9.765074 9.622846 9.758422 10.27184 10.49305 10.98647 
  8.128156 10.12106 8.910188 8.762326 7.666012 6.949401 7.950629 8.1798 7.218674 8.2833 9.230454 8.431417 8.010346 8.432215 9.385073 9.408096 9.264348 9.842328 9.402944 10.16326 10.44512 10.27258 10.61046 
  9.777776 10.17425 9.162596 8.756527 9.402295 7.245045 6.706331 7.813528 7.5786 6.985806 8.648697 8.302122 8.295655 9.284019 9.676419 9.26071 9.179568 9.636398 10.1599 10.62567 10.66345 10.83598 11.00543 
  9.4869 9.121305 8.684687 8.700015 9.641527 9.05888 7.269274 8.791707 8.11694 7.627497 7.894124 8.112581 8.470985 9.387035 9.151163 9.710186 10.16911 10.16346 9.604074 9.987885 10.31005 10.77668 10.24039 
  7.866471 9.883156 9.715327 8.677911 9.516367 8.34246 6.843951 7.838072 7.583541 7.852625 8.79549 7.970304 7.762091 8.846987 8.563057 9.384929 9.697865 10.01375 9.721039 9.735113 10.16057 10.63224 10.43368 
  9.080318 8.246053 9.041588 9.417509 9.518696 7.989542 8.403717 7.463982 7.206106 7.981693 8.578013 8.107899 8.514518 7.873508 8.147607 8.765822 9.033931 9.63969 10.04347 10.20652 10.35496 10.39911 10.52451 
  9.13128 9.074238 9.046019 9.162777 9.203651 7.817907 8.81809 9.218652 8.689803 8.523691 8.246085 7.754849 8.768758 8.798193 9.144502 9.340219 8.860093 9.865606 10.48086 10.25486 10.56614 10.47177 10.82067 
  7.529797 9.080559 9.025809 9.092778 9.069881 7.946015 8.707859 8.578739 7.658888 7.688912 8.116105 8.22648 8.638034 8.394612 8.661798 9.266599 9.497275 10.0152 10.51752 10.32173 10.39726 10.61129 10.75329 
  8.053345 7.806293 9.592606 9.250229 10.12409 9.191865 7.827196 8.112508 8.535282 8.097424 9.000211 8.186003 8.92979 8.464402 8.670289 9.216187 8.820874 9.527074 9.87499 10.41622 10.59681 10.51637 10.55027 
  8.95764 9.454929 9.582647 10.45938 9.56032 9.433331 8.428452 8.183547 8.032114 8.346304 9.071006 7.965348 8.02034 8.698916 8.684159 9.147419 9.093694 9.894665 9.954333 9.885933 10.35833 10.71119 10.45288 
  8.644167 9.48673 9.193418 10.94477 10.83592 10.44026 9.28124 8.387064 8.084641 8.072067 8.342614 7.222877 7.512965 8.233858 8.808452 9.225642 9.137754 9.547808 9.455416 9.747941 9.971579 10.64146 10.5402 
  9.362679 9.369581 9.729918 10.51496 11.36854 10.06961 9.509526 8.469991 7.580379 7.402191 8.035908 8.038517 7.526035 8.179871 9.229591 9.114322 9.371479 8.994488 10.0342 9.992566 10.37911 10.48729 10.1093 
  9.234426 10.02822 10.1721 10.36807 11.38841 10.48724 9.894856 8.55321 7.932958 7.556772 6.919079 7.997473 8.420939 8.136166 8.576433 8.605513 9.379736 9.246395 9.551528 10.12443 10.13093 10.29004 10.01479 
  7.140229 7.340988 9.690434 11.22617 10.54758 9.907132 10.23349 7.846205 7.38694 7.756484 8.138117 7.949685 8.286037 7.875541 8.682808 9.680047 8.736238 9.250605 9.806664 9.929686 10.29157 10.88262 10.53922 
  7.609503 8.446361 7.927704 10.19232 9.981958 9.750368 10.16796 8.132822 7.723164 7.520811 7.855199 7.349501 7.666749 8.55197 8.498571 9.580915 9.49983 9.968874 9.624427 10.19383 9.79729 10.48853 10.77697 
  8.643137 8.622834 9.379179 10.04107 10.14056 9.3327 9.441696 7.66965 6.918215 7.325087 8.433827 7.403569 7.932473 9.355262 8.961336 9.433534 8.987287 9.442863 9.373944 9.75194 9.910201 10.3459 10.44656 
  8.102217 8.064889 10.64028 12.49552 11.73663 10.07672 9.110008 8.321954 7.095238 7.711002 7.663853 7.130478 7.74688 9.022202 9.092591 9.239534 9.148391 9.078291 9.648328 9.942706 10.50384 10.56971 10.62924 
  9.490426 8.373809 10.40654 12.21494 12.46647 10.44266 9.246642 8.188566 7.077143 7.664156 7.372887 8.266142 8.489851 8.568101 8.872865 9.148156 8.987501 9.957211 10.15845 9.909985 10.00002 10.26888 11.01089 
  8.798372 9.025386 11.31425 11.42679 10.40138 9.216152 9.422606 8.502447 7.214077 7.507916 7.581383 7.426453 7.74972 8.678537 8.401515 9.35175 9.522068 9.689368 10.34969 10.18144 10.10829 10.22423 10.66045 
  8.333779 8.557695 11.21921 11.96504 10.46627 9.176018 9.10099 8.930181 6.847103 7.599282 7.893191 7.544654 8.840031 8.585423 8.405005 8.922948 9.007273 9.288807 9.946635 10.77755 10.62705 10.41732 10.66157 
  7.559142 9.412998 10.48524 11.38948 11.17182 9.945296 9.195613 7.687301 7.509073 7.597322 8.275542 8.369865 7.823867 8.430854 8.52591 8.722355 9.220226 9.689714 9.526666 9.805052 10.34926 10.81544 10.53098 
  11.4918 14.00084 16.17197 16.42617 15.349 12.41796 11.39773 11.66957 13.16477 16.12451 15.81399 11.20098 8.846268 9.76712 11.6804 12.1915 12.09149 11.27532 10.66243 11.10545 11.89396 11.18614 11.62189 
  12.7754 14.8082 17.40473 16.74525 14.93741 12.71457 10.89088 11.37673 13.37231 16.26893 15.81663 11.01917 8.918041 9.566416 11.83768 12.05284 12.66729 11.85449 10.60257 11.00357 11.39586 10.96889 11.1296 
  13.36541 15.34315 17.2086 17.66689 14.65784 13.48884 12.4741 12.45936 13.42722 15.72825 15.79719 14.43064 12.889 10.51247 12.3019 12.30175 12.62188 12.04293 10.42985 10.74694 10.95862 12.90817 13.67789 
  14.24038 16.24807 17.4501 18.67464 18.25082 16.41788 13.84389 13.58693 15.07749 17.34456 17.16127 16.20167 15.06416 11.77578 12.49449 13.45164 13.06607 12.11281 10.48501 10.73691 11.01208 15.49132 16.34476 
  15.46261 17.19522 17.5746 18.0107 18.14651 16.75021 13.80587 13.41014 14.9592 17.26956 16.178 14.19134 11.93522 11.6239 12.82757 14.12765 13.70803 12.50571 10.75355 11.29876 11.34688 16.21528 16.64844 
  15.85424 17.58728 17.14469 17.19608 18.15673 16.2019 13.98389 14.04934 14.00954 16.88185 15.97725 14.16334 13.0995 11.42474 11.70505 13.47377 14.31889 12.8265 11.54771 11.79184 11.55733 14.441 14.84616 
  16.01069 17.67813 17.21562 17.56873 18.24154 16.43555 14.16117 14.16953 13.69155 17.32711 16.4162 13.46104 12.36254 11.61799 11.27409 13.07141 14.62038 12.76868 11.54521 12.79095 12.39311 14.58929 14.84247 
  16.09941 17.7363 16.53284 17.27621 18.18447 16.58593 14.33624 14.35762 14.19332 17.44104 17.00833 13.70702 12.45344 12.21988 13.11712 14.56761 16.01712 13.74598 12.13099 13.26468 13.10818 15.27417 15.56255 
  16.42318 17.98415 16.44273 16.57448 17.62833 16.28909 14.36828 14.6948 13.78254 16.22694 15.95521 14.12572 12.26701 11.85083 12.46605 14.29742 16.13456 13.84185 12.26186 12.63923 12.99921 15.56506 15.71164 
  16.66626 18.41392 16.23549 16.69583 17.69269 16.16858 13.91554 14.21553 13.17823 15.73793 16.4865 15.50647 12.3832 11.67269 11.71129 12.98511 15.19916 14.09333 12.14127 13.53028 13.66361 16.01434 15.8486 
  16.93131 18.80218 16.81364 17.33029 16.89452 15.53996 14.12277 13.81145 13.03909 15.09264 16.55081 16.00311 12.3552 12.25566 11.77334 12.40888 14.75922 13.7299 12.90318 13.40209 14.23598 16.61812 16.00201 
  17.09422 18.93463 16.77671 17.12045 16.87137 15.12399 13.50143 13.57805 12.80646 15.09097 16.1608 15.57011 12.32053 11.78571 11.45637 11.75474 14.11037 13.6446 12.81923 13.51655 14.85688 16.60715 15.62787 
  17.18843 19.15834 16.90851 16.13819 16.45988 14.99436 12.40748 12.04965 12.14001 13.26226 14.0558 13.51968 12.57948 11.99215 11.86336 12.04673 13.09016 13.15625 12.4888 12.85295 14.979 16.59832 15.41977 
  17.2978 19.31523 17.34639 17.72629 15.35329 13.78927 12.61048 11.38221 11.57769 13.06238 13.21393 12.3947 11.35336 11.29097 10.76642 10.99077 12.29214 12.08838 12.20228 12.01544 11.86961 12.86051 13.13075 
  17.29455 19.39194 17.52913 18.08251 14.94142 11.73378 12.26912 13.0758 11.84843 11.48248 13.48376 13.39672 11.70031 11.98227 11.31891 10.61537 11.5765 11.5006 12.25064 12.40025 11.37915 12.90368 12.65543 
  17.11082 19.30302 17.25127 17.65386 14.90019 12.53856 11.45692 13.26181 11.62685 11.53579 13.36939 12.93143 11.07778 11.12064 10.86599 10.39632 11.41168 11.49042 11.78741 12.00982 11.57957 12.68554 12.28857 
  16.81596 19.11267 17.10374 17.62917 15.10644 12.49786 11.83172 13.11271 12.08539 11.51897 12.27075 12.25029 10.05081 10.51726 10.23621 9.723547 11.47403 11.38655 11.75697 12.38226 11.68046 12.32908 12.25521 
  16.18891 18.7452 17.12043 17.90645 15.35898 12.87751 11.93357 11.34936 12.12841 12.00346 12.70253 13.15777 10.02708 9.598401 9.508228 9.663599 10.49346 10.55344 11.21704 12.04108 11.25043 12.10459 11.84318 
  15.3587 18.25852 17.01021 17.94506 15.36013 11.68553 10.90639 9.600526 11.48932 11.83768 11.86593 12.23622 10.37516 10.22231 9.242025 9.690022 10.79908 10.51446 10.66708 11.09983 11.20814 11.95468 11.32153 
  14.94686 18.0086 16.66558 16.91915 14.43537 11.78186 10.37191 10.1009 10.64833 11.51232 11.21707 11.44729 10.38132 10.89793 11.71739 11.39077 10.99349 11.08029 10.52239 10.70206 11.1781 11.64449 11.60586 
  15.2814 17.80077 15.7858 15.0733 13.19273 10.99025 11.4507 10.23576 11.88874 11.69319 11.04233 10.65287 11.20542 12.47354 12.96547 12.31134 12.59687 12.5441 11.05464 11.68961 12.18429 12.52163 12.24349 
  15.48732 17.81838 15.57828 14.95755 13.17332 11.15086 11.35037 10.27524 11.15464 11.83677 10.82353 10.86964 10.84648 12.2185 12.01063 11.35811 12.14788 12.13265 10.27622 11.16301 10.85313 11.33077 11.19722 
  15.64028 18.04706 15.90875 13.46693 12.71794 10.47495 10.41679 10.41669 11.51304 10.97092 10.52732 10.51946 10.18956 11.28483 11.5248 11.06633 10.79301 11.08132 10.48834 11.23098 10.90977 10.93761 11.2342 
  15.691 18.08693 15.95903 14.5647 13.08687 11.96817 11.98348 10.27445 10.82906 10.65066 10.7676 10.49903 10.69828 10.99068 11.62179 10.48239 10.64531 10.73122 10.30917 10.84842 11.01107 11.19445 10.96024 
  15.70235 17.87411 15.54562 14.17106 13.73759 13.39945 12.94871 11.86754 12.32136 13.34113 14.04813 13.64429 12.8817 14.03526 13.78847 14.31763 14.08296 12.28475 12.26945 13.41342 15.16624 15.82314 14.65862 
  15.111 17.38759 15.23328 14.86325 16.06344 16.10236 14.34899 14.72656 14.66246 16.20999 16.94175 17.2274 15.76645 16.78901 16.12683 16.99647 16.62846 13.99079 13.86968 16.12244 18.25901 19.34782 17.66724 
  15.03301 16.61184 14.17075 13.25705 14.54141 14.69707 13.71418 15.49278 15.08241 16.18828 17.16152 16.65724 15.34151 13.99803 13.97429 15.01542 15.35275 14.87416 14.55297 15.45949 16.31717 18.42344 16.47798 
  13.45951 15.78569 13.98534 13.04344 13.92377 14.24774 14.06853 15.00025 15.36809 16.57527 17.38427 17.02029 15.19632 14.64439 14.65536 14.64023 15.68649 15.49167 14.9104 15.35289 16.33175 18.0865 16.37584 
  13.07347 14.7915 12.67954 13.23848 14.1819 13.9683 13.21752 14.69234 17.90232 17.24359 17.5149 18.04125 15.55333 15.03565 14.85524 15.23376 15.60953 15.83141 15.17968 15.23778 16.42048 17.88067 16.92698 
  12.37712 14.9033 13.8624 13.76913 13.30324 14.3478 14.02929 14.99968 18.80645 18.39549 18.51047 18.57759 16.0035 14.5608 15.19788 16.24825 16.87722 16.25792 15.09623 15.11478 16.32289 17.68389 16.08194 
  12.66646 14.60857 12.63233 12.64944 13.4228 13.81045 14.2454 15.89087 17.83408 17.56272 18.75677 18.45625 15.79576 15.1664 15.52465 16.30447 17.33408 16.53073 14.91176 15.41308 16.24126 17.18274 15.97926 
  12.81794 14.79919 12.64581 13.73917 14.27735 12.86728 13.65656 16.11627 17.94633 17.98828 18.37813 18.78796 17.11118 16.00847 15.83047 16.21775 16.80949 15.5775 14.92074 15.60607 15.80383 16.84433 15.66324 
  12.5034 15.02809 13.34704 12.59922 13.16282 13.18943 13.6498 16.22042 17.74181 18.11487 19.01717 18.76182 16.68096 15.37317 16.47843 15.88386 16.60799 16.23045 15.78561 15.60193 15.58282 16.25681 16.40265 
  13.06095 14.47213 11.57626 11.60358 12.57563 13.76851 13.97735 14.38274 16.31401 17.55055 18.43916 18.5317 17.04531 14.99704 16.53685 17.02856 17.58887 16.26225 14.9763 14.45201 15.51912 15.21459 16.6817 
  12.86505 15.85786 14.75419 13.01475 12.67307 14.4433 15.20655 15.15495 16.09785 16.51936 17.95182 18.31379 16.75537 15.01732 15.75735 16.82045 17.44093 15.66828 14.33217 14.62619 15.22149 15.17155 17.20671 
  13.12523 16.5502 15.50594 15.34845 14.73641 15.80987 16.79601 17.06586 17.48354 16.89506 16.9989 17.87918 16.51353 15.33819 15.52597 17.10384 16.88856 15.24374 15.05034 15.51818 15.27853 14.60615 17.90415 
  12.69545 15.62294 14.66864 16.31116 15.64141 15.63712 16.38996 17.50281 17.90165 17.66326 17.61484 17.62272 16.00488 15.10813 14.94744 17.00407 17.49522 15.62474 15.2027 15.69592 15.58376 14.26855 17.33854 
  11.70221 15.91556 15.21819 16.73549 15.93421 15.34525 15.80453 16.35826 17.22446 17.98482 17.83851 17.86388 16.83107 16.10304 15.82976 17.13237 17.91541 15.26026 15.10075 15.98048 15.72246 14.22937 16.43557 
  12.58039 16.54533 15.48762 17.14302 16.20825 15.26482 15.79737 16.80904 17.53083 17.92554 17.57588 16.92835 16.21835 16.24897 16.26789 16.09962 16.68681 15.19278 14.3501 15.73407 15.31177 14.58012 16.44425 
  12.52427 16.68682 15.6768 17.41134 16.48487 16.26741 15.93352 16.79063 17.23722 17.75588 17.59963 16.55783 16.45605 16.19305 16.34012 15.8233 16.01214 14.49679 14.05411 15.83327 15.75738 14.70327 16.81463 
  13.08901 16.92737 15.88609 17.48892 16.60913 16.92618 15.98914 16.84229 16.96335 17.23575 17.33563 17.23502 16.5829 16.31644 16.85165 16.5601 16.59091 14.8695 14.04865 15.1961 15.04456 15.03072 16.76081 
  13.25131 16.82976 15.65168 17.33111 16.53403 16.99053 16.30228 16.67911 16.8145 17.26553 17.52997 17.40855 16.6225 16.19843 16.2693 16.43582 16.03738 13.90187 14.50136 15.9589 15.88366 15.52198 16.96512 
  12.86011 16.4959 15.32254 16.57291 15.98928 16.51817 16.59031 17.21095 17.32273 17.24012 17.26693 17.4594 16.63605 16.95958 17.0414 16.83618 16.73441 14.66106 14.54188 15.98279 15.59871 15.42995 16.82753 
  12.3734 16.38073 15.23618 15.71347 15.76168 16.80319 16.83652 17.01589 17.01722 16.75942 16.97372 17.74806 16.4648 16.73439 16.26514 16.33158 16.27448 13.83779 14.71891 15.37935 14.90931 15.09158 16.92881 
  13.16311 16.98702 15.90418 16.86047 16.45071 17.14736 16.50004 16.52925 16.95103 16.78541 16.71283 17.1481 15.68781 16.3924 15.7757 16.42943 16.24413 13.86384 14.62436 15.48191 14.93738 14.84517 16.21951 
  13.59746 17.23912 16.12656 17.50044 16.93601 17.43536 16.78477 16.41955 16.75596 16.41606 17.25539 17.49482 16.25328 16.44511 15.88677 16.52536 16.53255 14.46775 14.81169 15.79123 14.8385 15.43922 17.17949 
  13.25004 16.94229 15.93826 17.73337 17.23859 17.89079 17.34496 15.97857 15.19893 15.15988 16.91029 17.25496 16.62859 16.25416 15.48432 15.56355 16.21918 14.83444 15.0622 15.81797 15.64522 15.32866 16.89268 
  12.62537 16.47641 15.74364 17.85862 17.47602 18.16114 17.55453 15.95657 15.11439 15.08639 16.68707 16.74309 16.72464 16.83375 15.94381 16.12422 16.87457 15.30209 14.94832 16.63556 16.4691 15.57759 17.3871 
  12.6226 16.10576 15.56736 18.04713 17.56757 18.11768 17.59097 16.34696 15.375 14.14986 15.13968 15.70977 16.85783 17.35108 16.21252 16.49694 16.90446 14.97466 14.87337 16.36794 16.10093 15.90252 17.58858 
  12.7855 15.88645 15.67985 18.15505 17.37622 17.52381 17.42296 16.19477 15.50058 13.98604 15.38529 15.37528 15.93515 16.47197 15.3381 15.97862 16.40454 14.95582 15.0605 16.63037 15.63746 16.15078 17.13511 
  12.65207 15.56413 15.72229 18.15676 17.02139 16.67873 16.7427 15.08276 14.19359 12.79751 14.48611 15.64405 16.11738 15.6327 14.42156 14.5413 15.50612 14.09361 14.9137 16.91826 15.45331 16.1494 17.52712 
  12.56314 15.1539 15.81251 18.0403 17.20735 16.9832 15.43454 13.55827 13.10696 12.59839 13.23814 15.90523 16.60976 15.61129 13.46397 13.59857 14.55115 13.29045 14.68942 16.51844 14.84454 16.06714 16.90926 
  12.64222 15.16426 16.14207 18.01224 17.03122 16.82287 13.88338 12.87623 12.53724 12.11878 13.98522 15.42566 15.64351 14.8041 13.28969 13.65548 14.46269 13.32662 13.92385 14.84129 13.31076 15.09256 15.82777 
  12.85059 15.56986 16.29726 18.16373 16.31842 15.27977 12.30446 12.48871 12.05114 12.02475 13.32861 15.53581 15.44006 14.6188 12.19517 12.34837 13.52474 12.41385 13.52724 14.1898 12.92852 15.22741 15.49917 
  12.59761 14.4621 15.45572 17.18511 15.81542 15.78824 11.84854 11.87338 12.32981 11.94361 12.86972 14.26315 14.89518 14.27585 11.97713 12.49144 13.58669 12.65414 13.09633 13.75735 14.88413 16.18767 15.3166 
  12.64882 14.81147 14.45962 16.38583 15.72507 15.59489 12.376 11.95343 12.09104 11.86625 12.24266 13.68499 13.91463 13.24626 10.7595 12.1026 13.03325 11.85887 12.62869 13.85928 16.20368 18.33456 16.91249 
  12.12053 15.05175 14.22981 15.73134 14.97451 14.68129 13.12028 11.97819 11.38219 11.20992 11.37042 12.24603 13.25563 13.08652 10.43595 10.82252 11.63596 11.33452 11.86574 13.24061 15.52648 17.4119 16.81245 
  11.19065 14.21162 13.51477 14.70132 14.9847 15.25536 14.19143 12.48769 11.34493 9.814653 9.99059 11.53341 12.29802 11.784 10.16933 10.75036 11.20462 10.74964 11.48954 12.01296 13.41964 14.80476 14.38404 
  10.00999 12.74599 12.14843 14.51654 15.13391 15.2609 13.72731 11.66748 10.16199 9.022615 10.54656 11.04408 11.06597 10.65407 9.941639 9.674333 10.40516 10.81487 11.25098 11.6855 11.23007 11.83496 12.85696 
  10.08466 11.83495 11.54785 14.59242 13.85901 13.03223 12.44154 11.23707 9.762304 8.823195 10.68114 10.62345 10.76976 10.03955 9.436696 9.586202 11.22563 10.83237 11.0967 10.70838 10.57485 11.49634 11.70702 
  10.48889 12.83749 11.55844 14.30776 13.98957 13.29756 10.27051 11.19239 10.73191 8.282329 9.798191 11.28903 11.10856 10.43637 9.679453 9.696969 10.23166 10.5042 10.2436 10.84797 10.9094 11.14272 11.04821 
  10.19536 11.74172 11.33944 13.41399 13.06223 13.24977 10.40681 9.719469 9.504123 8.071562 9.28103 10.37499 11.08919 10.83884 9.125082 9.426393 10.12565 10.30615 10.62199 11.02502 10.98999 11.49767 10.76202 
  10.88223 12.977 11.56636 10.88618 12.75476 12.96493 10.24409 9.20118 8.650577 7.399164 8.780226 9.993423 11.50119 11.16004 9.309609 9.911741 10.38707 9.787494 10.5507 10.78142 10.8794 11.07239 10.47661 
  10.47173 13.69682 12.45944 12.35382 12.58801 12.46847 10.00025 10.27305 11.53972 12.61611 13.00516 13.0168 12.24968 12.74772 13.46355 13.35037 12.67452 11.07662 10.77348 12.34726 13.52226 13.54476 12.45269 
  10.39999 12.38218 11.96567 12.83998 11.78783 11.14596 10.65086 9.983682 11.39105 12.32884 12.81081 12.60661 11.70481 12.39304 13.20282 13.14799 12.67365 11.12111 11.64005 15.3785 17.26889 19.2809 18.58731 
  10.06194 11.97701 11.10652 11.85843 11.02539 11.2265 10.7828 9.873035 10.32476 11.25525 11.96012 12.71968 11.64478 11.76205 12.47315 13.18736 12.99789 12.38262 13.63724 17.6046 20.06792 21.4036 21.43807 
  9.509731 11.85916 11.10777 12.50954 12.18235 11.80091 11.27237 10.29496 12.2601 12.32322 13.42 14.12483 13.16052 13.71877 13.45849 13.22503 14.22825 13.49225 15.37626 19.22064 21.55962 22.73142 22.24018 
  9.15207 11.07175 10.86126 11.87928 12.69756 12.46886 11.5245 13.08445 13.88625 14.80505 14.99481 15.52319 15.05282 15.02273 14.87423 15.29251 15.91438 14.93134 16.80274 21.40077 22.54545 22.58663 22.29738 
  8.860111 10.74111 10.50416 12.93807 12.55079 13.06175 12.17259 13.51095 14.48125 15.87022 17.08365 15.86352 15.00874 14.95488 15.34727 15.18754 16.02795 15.03839 16.93264 19.53366 21.6056 21.50261 21.28786 
  10.75877 10.37344 10.58075 12.34408 12.94353 11.96778 11.4975 12.68404 14.95128 16.74927 16.77153 15.63198 13.98707 14.3226 14.68699 15.35497 16.36192 15.49951 15.98009 17.65331 18.19219 18.41393 18.1715 
  9.39485 10.52738 11.42019 12.21228 12.45682 12.18025 11.33037 12.44518 14.51794 15.84477 14.62877 14.42839 14.07113 13.2896 13.65752 13.82688 15.6779 15.75582 15.67209 17.08607 16.55664 17.27306 16.92216 
  9.381274 11.83695 11.8863 11.62455 11.99966 11.92766 12.16977 12.76743 14.2169 15.01731 14.82383 13.0397 13.42383 13.10484 13.42112 13.71131 14.73178 14.64882 14.79978 16.09834 16.16719 17.4647 16.77292 
  9.585428 10.993 10.5035 11.81598 11.44083 12.43314 11.64416 12.2522 14.10823 14.41961 14.19406 12.61582 11.76546 12.10949 13.28017 13.21444 14.49621 14.27315 14.40105 16.25966 16.55149 17.44991 16.81313 
  8.175888 11.18484 11.35472 11.47145 12.36835 12.95294 11.74024 12.37499 14.70968 14.42316 13.73902 12.62927 11.59597 12.07641 12.50259 12.79706 14.06515 14.19124 14.13676 15.15194 15.65369 18.23908 17.15416 
  9.260625 10.70815 9.263671 9.806908 11.83224 12.31308 11.73443 13.4353 14.60034 15.05972 13.83386 12.88395 11.57649 12.02801 12.20291 12.59455 14.31687 14.47969 14.6405 15.52516 15.67438 18.1055 17.65921 
  9.911335 10.50611 10.12131 9.937845 12.0217 12.06643 12.41577 13.58587 15.64432 14.32141 13.08726 12.30006 11.95351 12.71829 11.26942 12.65187 14.17917 13.53116 13.70112 15.10717 14.98607 17.59751 17.46946 
  9.024209 8.987809 9.160165 10.86254 12.75719 12.29259 12.99271 14.01397 14.93609 15.38431 14.07591 12.15879 11.33199 11.99755 11.29723 12.28741 13.99432 13.90781 13.2847 14.09958 14.27385 16.84444 16.68974 
  11.9299 13.47678 12.837 11.34953 12.98505 12.79702 14.93977 14.99091 14.8044 15.23901 13.70815 12.30987 11.19355 11.14724 11.14456 11.97139 13.38016 13.39131 13.10051 13.16111 13.63137 15.1045 15.48153 
  14.41228 16.11685 15.7402 15.5474 13.80078 14.53132 14.65797 14.65964 15.91533 16.35481 13.96598 12.8188 11.46816 10.90168 10.61743 11.47446 13.77556 13.64553 13.89567 13.91599 13.57935 15.20967 15.69138 
  15.08783 16.8548 16.49383 16.41818 14.09665 15.30241 14.7049 14.89885 15.83918 18.0921 16.22671 14.06109 11.65885 11.18969 11.55042 12.64634 15.04636 14.6538 14.17054 14.18222 15.13279 16.13375 16.41827 
  15.16109 16.7801 16.56729 16.43837 15.12475 14.8846 14.26196 15.39288 16.01123 17.1432 17.07575 15.71479 12.39699 11.93664 11.86322 12.98023 15.49663 15.14287 14.6057 14.86261 15.40321 16.85805 17.07797 
  15.09084 16.57223 16.80362 16.58635 15.20792 15.10697 14.90998 15.40153 15.35094 16.88487 17.96318 16.37918 12.55137 12.20782 12.30409 12.03328 14.67583 15.03111 14.56669 14.42755 15.211 17.10543 17.10053 
  14.69408 15.81541 16.46612 16.37843 15.2229 14.59921 13.61355 14.3778 14.58977 16.18752 17.5792 16.62926 12.41473 12.08434 12.35257 11.9434 14.27981 14.79067 13.80497 13.96159 14.25573 17.45439 16.78282 
  14.43095 15.41959 16.21408 16.07892 15.18807 14.0085 13.96235 14.37799 14.08463 15.0561 16.641 15.33981 12.0121 10.58257 10.67117 11.07191 13.45955 14.30978 13.5234 13.39615 13.00758 15.30398 14.59562 
  14.58677 15.82187 15.58251 15.65558 15.2996 14.34471 13.48909 13.53928 12.62957 13.79965 15.50717 14.88582 12.17444 11.02395 10.55875 10.28314 11.87372 12.80804 12.80521 12.87114 12.06493 14.00245 13.72964 
  14.95683 16.3266 15.16857 15.4292 15.51012 13.59986 12.23133 12.66695 12.21721 13.14255 14.3418 13.26337 11.4309 11.01019 10.59134 10.42654 11.62912 13.19194 13.29006 13.23644 11.81969 14.27526 13.76118 
  15.59626 17.08241 15.06971 15.65379 14.82292 13.31317 11.43343 11.9617 11.85537 12.2777 14.29009 13.45866 10.72894 10.13381 9.537953 9.800446 11.53536 12.31185 12.60432 11.92848 11.34388 12.84455 12.54369 
  16.04304 17.82648 15.79309 16.24393 14.73321 13.42411 10.97739 10.73002 11.65459 12.07827 13.2604 12.44436 10.10683 9.841578 9.451722 9.291868 11.41494 11.8923 11.88242 11.91761 11.27107 12.20158 12.1325 
  16.25625 18.22469 16.04192 16.12708 14.96282 13.00054 10.65593 11.17855 11.14046 12.55776 12.52015 11.42063 9.433226 9.00628 9.585645 9.290247 10.70464 11.49628 11.68974 12.07785 11.17581 11.06459 11.13168 
  16.04482 18.14673 15.86384 14.91284 13.05532 12.45239 11.72008 11.51588 11.29592 11.78518 12.2973 11.38977 10.29393 9.548064 9.722668 10.09043 10.84775 11.8718 11.26131 11.56782 11.02544 11.18756 11.03338 
  15.26615 17.28764 15.16505 14.44016 13.28143 12.07429 10.97578 11.15899 11.50891 11.01097 11.61723 11.36335 10.16722 9.822009 9.901525 10.20638 10.18853 11.82158 11.44402 10.68624 10.50383 11.11971 11.41882 
  13.81127 16.00517 14.23292 13.67491 13.21358 11.73995 9.486867 10.56567 9.862735 10.49043 11.29869 10.67811 9.464268 9.29777 9.709767 10.42091 10.48922 10.93209 10.42687 11.04964 12.2851 12.72577 12.39185 
  12.14262 14.94287 14.0802 14.32626 12.80348 10.66512 9.237948 9.739904 9.596913 10.61011 11.65724 10.63011 9.505305 9.886621 10.47523 11.0215 10.59563 10.78076 11.00128 13.60528 15.06756 16.77889 16.86555 
  10.83472 14.31258 12.90632 12.77348 12.20279 9.816229 9.277019 10.18015 9.096459 9.997322 11.51482 10.87203 9.44985 10.83187 12.01145 12.17475 12.17965 11.82202 12.18132 14.90107 17.46739 18.73794 18.04218 
  11.36189 14.46365 13.13914 12.11007 11.7593 10.0019 9.75862 10.77737 11.03628 12.26316 12.78038 11.30242 11.24927 12.86973 13.40784 13.56777 13.70109 13.02316 12.83176 16.09006 18.6455 20.18859 19.64919 
  12.72338 15.11341 13.32158 12.74039 11.63489 10.73177 10.17774 10.56085 11.74206 12.8467 13.64448 12.64196 13.06981 14.0199 13.50293 14.09397 14.45456 13.70243 13.05322 16.6385 19.08248 21.01362 20.21127 
  14.20404 15.91786 14.92538 15.12413 14.77285 12.89441 12.79587 11.85586 12.40155 12.80069 14.29893 14.05416 12.61579 13.57712 13.58947 13.30546 13.65021 12.84343 13.35256 14.62488 16.85373 18.83813 18.01679 
  14.53132 17.33943 16.26635 18.11593 17.92229 16.41659 14.97751 13.63034 14.93866 15.25947 16.20634 15.82948 14.33713 12.11738 13.05697 13.88322 14.07832 13.04026 13.09391 14.61522 14.58644 16.06195 17.62827 
  14.11968 16.69542 15.12335 18.32276 17.78468 17.74669 15.92803 14.50374 15.56971 16.23568 16.50482 16.08389 14.88408 13.72153 14.5031 14.91234 14.72964 13.03544 12.99891 14.99354 14.95278 15.26584 17.54221 
  12.58309 15.06906 15.59264 18.20675 17.21118 17.89225 16.83617 15.57529 15.21058 16.55922 16.91265 16.39316 15.46596 14.28004 14.31382 15.09121 15.5357 13.50103 13.16001 15.68369 15.05554 15.30483 17.25786 
  13.38149 16.1832 16.11864 18.42098 17.48438 17.89621 18.30592 16.22846 16.11395 16.84072 17.24339 16.6131 15.43338 14.83396 15.42686 15.86161 16.32101 15.09858 13.3338 15.44106 14.9978 14.34465 16.55215 
  14.37599 16.83687 16.94507 18.2267 16.84422 18.06699 18.55002 15.54701 16.76327 16.83081 17.27568 17.00946 16.32395 15.44895 15.63621 15.85781 16.74874 15.47261 13.73446 15.28013 15.02808 14.89976 16.14604 
  15.05994 16.78429 17.38232 18.71122 17.29902 17.89909 18.18792 17.06817 16.19571 16.4189 16.93696 17.02257 16.01795 15.9466 15.9529 16.66254 17.07906 14.79625 13.80007 15.45906 15.62278 14.89583 16.3634 
  15.31985 17.11125 16.90094 17.50673 18.12416 17.26594 18.61826 17.81363 16.92988 17.33131 17.22307 17.6853 17.16475 17.8327 17.79674 18.01369 18.75657 16.41342 14.83692 16.64419 17.08493 16.10439 17.74908 
  14.81287 16.67113 17.24438 17.58853 16.56671 17.4409 19.14317 17.1272 17.22141 17.37194 17.30178 17.35553 16.81259 17.59592 17.2289 17.99676 18.54756 16.97488 15.1711 17.01239 17.41453 16.61493 18.02877 
  14.10214 16.48185 16.70473 17.83863 17.09863 17.92168 18.60074 17.03748 17.01916 17.1662 17.45386 16.9503 17.22705 17.88758 17.25076 18.06163 18.82708 17.06891 15.6639 17.32082 17.86867 16.99368 18.4935 
  14.47095 16.27239 17.20447 16.68737 16.69925 16.92639 17.55458 16.4129 16.37072 16.70506 17.13801 16.74328 16.66209 17.66596 17.03108 17.52262 18.5179 16.57609 15.3233 16.59836 16.57953 15.76227 17.69633 
  13.24856 14.56392 16.16074 15.96923 16.11824 17.0615 17.37887 16.89696 16.49023 16.93359 17.04477 16.61666 17.3206 18.46787 17.4531 17.96162 19.10358 16.9934 15.63334 17.35444 17.27749 16.49334 18.51469 
  13.37477 14.40732 15.3166 16.12179 16.0549 17.9857 18.24229 15.97747 16.51573 16.80781 17.66976 16.77187 17.07391 18.37435 17.8941 17.99025 18.43793 16.58463 15.94918 17.33995 17.58286 16.71625 18.26147 
  13.93172 15.95009 16.12278 16.62806 16.77155 17.70332 18.30991 16.70022 16.12438 16.63647 16.96805 16.56441 16.83017 17.72108 17.19194 17.83681 18.34489 16.32563 15.5139 17.0459 17.14361 16.61393 17.9541 
  13.97243 15.71532 14.24013 15.94024 16.42779 17.24181 18.20842 16.16203 15.17454 15.77178 16.21501 15.67723 16.35604 17.39831 16.76721 17.22853 17.88738 15.95963 14.72727 16.41531 16.48671 16.12189 17.57236 
  13.12301 15.36403 14.92359 14.51013 15.03061 17.25054 18.02694 15.68099 14.72595 15.05695 15.51794 15.62031 15.82822 17.21599 16.45444 16.81317 17.53438 15.40422 14.59688 15.75735 15.82233 15.50435 17.19834 
  13.63734 15.75846 15.76891 16.33092 16.02224 15.02408 15.51878 13.95916 13.93933 15.34064 15.0723 15.51362 15.4393 16.63021 16.81165 16.50006 17.1815 15.69489 14.01058 16.13823 15.96146 15.71183 17.02876 
  13.19862 15.13811 14.20608 14.87641 13.86195 15.69343 16.0471 14.37471 12.70754 13.07089 13.74677 13.72397 14.30151 15.42236 14.87322 14.77012 16.19817 14.61831 14.36353 14.7163 14.1343 13.27196 14.94497 
  13.07108 14.96432 15.33887 15.41621 16.32788 16.2543 15.70077 13.64809 12.23586 13.97429 14.36621 14.99106 14.89284 15.73454 15.09936 16.08811 17.21342 14.54043 14.93736 16.18698 15.30752 15.68343 17.1031 
  12.17307 13.8663 14.74691 13.82833 15.29623 15.13898 14.94032 14.15431 13.22405 12.60576 13.50053 13.70859 14.2374 14.95605 14.1026 13.79868 15.32834 13.637 13.26627 13.85343 12.49579 13.00945 15.18009 
  12.25888 13.0473 13.98215 15.20883 14.53955 14.94806 15.02145 12.94323 12.06552 12.47472 12.65622 12.89004 14.31748 15.0515 13.45358 13.49286 14.73266 13.38384 12.97834 13.98633 13.04967 14.18903 15.84389 
  12.96655 13.67219 15.57411 16.78977 16.06032 13.93955 14.09687 13.42214 11.74005 12.39577 12.6212 13.11872 14.73195 14.7253 13.47528 13.54747 14.39114 12.98527 13.10725 14.80631 13.07693 14.86841 16.53499 
  10.92069 13.71753 13.87495 14.55451 14.22817 13.57006 13.76763 12.60664 11.17306 11.62393 12.25004 11.81037 13.68605 13.56421 12.86382 12.15489 13.24436 12.0221 12.38628 13.08033 11.34459 12.62064 14.02351 
  11.5769 13.79373 14.03928 14.87669 13.80563 13.71551 14.04329 12.57617 11.19479 11.37623 10.92707 11.98935 12.46576 12.69549 12.09219 11.78906 13.35041 11.86954 12.23488 13.13238 10.95979 13.09149 14.30729 
  10.74847 13.18716 13.24313 12.67642 13.39273 12.77279 12.68126 11.46781 10.00143 9.96587 10.43531 11.69773 12.84946 12.46179 11.33073 10.75771 12.40113 11.34042 11.7729 12.49863 11.20993 11.8419 11.45071 
  11.98419 13.92745 12.27599 12.76547 12.93638 12.34464 12.64864 11.34802 10.23944 9.272176 10.22678 11.26323 11.94163 12.36338 11.0064 10.31246 12.27957 11.23496 11.376 12.09842 11.01261 11.91821 11.30618 
  11.78683 13.45012 12.74603 13.96315 13.49791 12.03139 12.34809 11.16278 9.914504 8.632698 10.02429 10.49186 10.53503 11.25449 10.37139 10.6025 11.64441 10.73072 11.01634 10.47035 9.727706 10.35247 10.77614 
  11.52162 12.46663 11.89697 14.03308 13.64052 11.73958 11.48232 9.956855 8.571227 8.986975 9.7626 10.39963 10.45375 10.46915 10.78598 10.67382 11.57074 10.30212 9.981796 10.63711 10.12906 10.43168 10.83299 
  10.47112 11.29303 12.18044 11.88793 10.8442 10.59727 11.06464 9.595164 9.016017 8.599607 8.99171 9.042683 10.83571 10.81611 9.935513 10.24435 10.53577 9.991421 10.32891 10.49404 10.43093 10.59912 10.33619 
  11.11114 12.00806 12.1118 11.56301 11.28773 11.11582 11.11947 10.14109 9.354194 9.394133 8.459903 9.571765 10.62552 10.94563 10.39587 10.17176 11.07636 10.36478 10.18448 10.5289 10.58135 11.55 12.22885 
  9.4698 10.58114 11.28397 11.78697 11.88663 11.19909 11.94986 8.947301 8.886659 9.0145 8.525748 9.001356 10.4408 10.03337 9.662758 9.838831 10.64479 9.835432 9.941095 11.35219 12.8765 13.6802 13.69769 
  9.431818 10.37749 9.402466 11.65605 12.08712 11.54609 11.73875 8.867339 9.922234 10.61318 10.76134 10.87708 10.16173 10.49702 11.13492 10.97119 10.87446 9.942475 10.69347 12.76756 14.78555 15.63049 15.3683 
  9.377582 10.94667 11.15134 12.46202 11.73515 10.88063 11.25175 9.65536 11.14065 11.88232 11.94413 11.89391 10.90223 11.40995 12.1747 11.62114 11.59762 10.78957 11.37593 13.46099 15.81433 17.16356 17.15998 
  9.576265 11.13453 10.96931 10.33049 10.23858 10.44758 10.76839 8.92027 9.525447 10.34433 9.90531 9.858282 9.904203 11.16824 11.88748 11.96853 11.37809 11.49845 11.76391 14.43392 16.65162 18.26472 17.6829 
  9.068971 10.9832 11.37302 11.22971 9.851135 9.574883 9.735991 9.078069 9.105554 10.11883 10.55576 10.29943 9.432137 11.80125 12.00973 12.56134 12.2724 12.0964 12.37772 14.91978 17.71165 19.49631 18.86863 
  10.06087 11.30585 10.13232 9.214253 8.846772 9.325168 10.23292 9.699574 10.61134 11.65377 12.32304 11.8669 11.56203 13.31346 13.75598 14.85218 14.10346 13.62769 13.95514 16.67099 18.7434 19.90597 20.01846 
  9.883128 9.034005 10.26573 9.922016 9.431784 8.751758 10.03959 10.56669 12.11162 13.17491 13.19539 13.8256 12.93738 14.20389 14.85687 14.53677 14.83226 13.86686 14.76633 17.12251 20.07998 22.08923 21.37822 
  9.735471 10.76163 10.26859 10.09679 10.99103 9.163989 9.649745 10.54053 11.75659 12.85315 12.67314 13.1372 12.42263 13.90682 15.30728 15.41667 14.59862 14.48736 15.16387 17.94337 20.98712 23.08864 22.34081 
  8.208311 9.269901 9.024371 9.993098 11.19902 9.208381 9.769848 11.40966 12.96586 14.02102 14.22982 14.19959 13.32056 14.34479 16.31928 16.57144 15.62925 14.52523 16.18148 18.67118 20.88604 23.1573 22.48557 
  8.409505 9.858494 9.497688 10.31458 11.88834 9.982258 10.32621 11.68756 12.89914 13.39427 14.21135 14.64743 13.3156 14.29278 16.14828 15.96886 15.35177 15.5271 16.91822 18.72905 21.57679 23.61708 23.15364 
  8.934703 9.573559 9.441955 8.765581 10.14542 9.618201 10.5309 11.58863 12.7027 13.85705 14.36015 14.13195 13.7818 15.45178 16.22048 15.6483 15.75696 15.26135 16.86555 19.28024 22.11215 24.40104 23.93007 
  8.684306 8.866581 8.434036 8.029984 10.91321 11.22698 10.23968 11.33583 12.85784 13.39227 14.42832 14.13936 13.58225 16.27074 17.07727 16.51017 16.3993 16.14433 16.99274 19.69513 23.2365 24.59908 24.04222 
  8.643711 9.807123 9.50042 9.324292 10.83774 11.15303 10.39372 11.22418 12.95454 13.35421 14.3774 13.54205 13.72393 17.05115 17.68932 16.12386 15.83425 16.01268 17.58204 19.77008 23.01275 24.67944 24.14307 
  7.688036 8.535404 9.236469 10.83574 11.49044 11.28016 11.46384 12.16683 13.50681 13.8899 13.76011 13.80902 13.72299 16.50299 18.20104 16.61069 16.52741 16.1986 18.36386 20.11685 22.81937 23.6178 23.35131 
  10.17826 8.205004 9.347319 12.3267 13.34488 11.22051 10.90597 11.83232 13.47991 13.05177 12.89349 13.7592 13.9114 16.77988 18.02056 16.96051 16.41222 16.26488 18.08109 20.25217 22.95943 23.70821 22.32704 
  9.040493 8.760096 8.527176 9.89751 10.56365 9.972799 10.10701 10.67076 12.86245 12.45882 13.2768 14.06905 14.12293 17.41031 18.83172 16.48821 16.42317 16.09177 17.95219 20.02211 23.01787 23.78878 22.42188 
  9.399467 7.837672 9.216712 10.6783 10.54392 10.52287 11.50509 11.89538 12.42534 13.73793 12.68215 13.53249 13.51577 16.23136 18.68464 17.29062 16.49525 15.83475 18.22473 19.69581 22.15342 22.9642 22.44792 
  8.776668 9.244227 9.925127 11.38309 10.78001 10.8051 10.342 11.05655 12.77748 14.84761 13.69246 13.08996 13.35411 16.14378 18.69108 17.32467 17.7679 16.45478 17.57494 19.5618 22.46428 22.97581 21.49464 
  8.894381 9.427493 10.64883 11.46896 10.78124 9.074279 10.21656 11.47114 12.82468 12.99811 12.95543 12.60641 13.26145 15.72201 18.2295 17.42356 17.95378 16.7246 17.54635 19.24829 21.3367 21.933 20.77688 
  9.042042 8.76036 10.59839 11.72806 12.2252 10.01876 10.65936 11.28481 12.38915 12.18661 11.73761 12.19487 12.95884 15.87654 18.47887 17.35869 17.52493 16.28156 16.84913 18.45013 20.72539 20.79898 19.75169 
  10.06297 12.90782 12.88049 10.60552 11.71332 10.51397 9.21656 10.82334 11.41065 12.40258 13.10792 13.42447 12.83565 15.32679 18.56764 17.4046 16.69835 15.57237 16.41321 17.4268 18.71982 19.50741 18.1189 
  9.921656 16.52995 17.53824 15.70275 15.32331 12.92085 12.47987 12.10413 12.42868 12.15174 12.5522 11.8612 12.07436 13.91121 17.06873 16.3721 16.04488 15.49017 15.58386 17.01062 17.89955 17.60539 17.31772 
  11.36264 17.33499 18.40477 17.00244 18.4658 15.7809 13.04894 11.71813 12.67283 13.29064 13.457 12.057 11.66851 13.66247 17.68314 16.57116 15.98984 14.92666 16.06848 17.17213 17.39293 17.66684 17.4516 
  10.62282 17.60542 18.32822 17.09484 19.33393 16.77328 12.53772 11.13757 13.03852 14.02408 14.38111 13.17555 12.76845 14.38874 17.52058 16.74355 16.38338 15.24556 15.39419 16.26055 16.64132 16.97384 16.89183 
  10.47606 17.70844 18.43419 17.68979 19.75669 16.95199 11.87117 12.01979 13.45656 14.36295 14.81179 13.49354 12.68675 14.77314 17.4999 16.20375 15.68843 15.2202 15.67017 16.32603 16.47589 16.68805 16.65318 
  10.98051 17.72518 18.46542 18.05853 19.93608 16.77472 12.27509 12.2058 13.27803 14.52329 15.10997 13.82026 12.10452 13.67547 16.55763 15.52405 15.14251 14.49046 15.37576 16.78892 16.64076 16.92625 17.4085 
  10.61574 17.7157 18.246 18.44892 20.22108 17.01719 13.5045 12.65141 12.83893 14.40987 15.02312 13.29914 13.55334 15.01744 16.19461 15.84118 15.6518 14.68441 15.11373 16.25138 16.23415 16.90338 17.4352 
  10.69256 17.79112 18.42277 18.3324 20.11359 16.95714 13.75263 13.08895 13.11321 14.44533 15.22537 13.40903 13.27475 15.41946 15.46931 15.14482 15.65731 14.71926 15.28069 17.00951 16.92454 17.32482 17.49486 
  10.73899 17.75406 18.27556 17.87709 19.47082 15.97545 13.59863 13.1574 13.07014 14.04729 14.62672 13.32558 12.45246 15.42138 14.76762 14.0249 15.21964 14.52955 15.10038 17.08111 17.13165 16.41658 17.86931 
  9.618335 17.47857 17.98661 17.55681 18.80152 14.7733 13.48202 12.65518 13.07832 13.56103 13.67241 12.99241 12.99369 14.52666 14.21892 14.50008 15.0451 14.01138 14.70819 17.09094 17.17188 17.34347 17.56815 
  11.57962 16.64433 17.44956 16.3053 18.07066 14.92957 13.12261 12.80251 13.23727 13.17543 12.1275 12.36365 13.38163 14.66714 14.47845 14.50121 14.2356 14.29019 15.07313 17.368 19.52579 21.40561 20.27804 
  10.79007 16.8456 17.36958 16.39471 18.20171 15.12339 12.79474 10.63306 11.83869 12.54484 13.22725 12.38338 12.43016 14.38861 14.98121 15.43519 15.47331 14.8543 16.05373 18.43741 20.38091 22.39174 21.13035 
  11.29912 15.8077 16.54896 16.36229 18.221 15.09686 11.11965 9.979053 10.83024 13.19693 13.55953 12.65395 12.45233 12.89222 15.0179 15.60445 14.92871 14.3497 15.49676 18.35275 19.81606 22.24131 21.26899 
  10.58634 15.52423 16.11585 16.42515 18.17402 15.00347 10.48176 11.49412 11.44849 12.50072 12.81663 12.37796 12.68344 12.88461 13.50673 14.09901 14.95052 14.7693 14.78594 17.34875 19.04901 21.56851 20.89043 
  9.614183 15.8431 16.38121 15.86494 17.58311 14.49032 11.02957 10.22538 9.527282 10.68686 12.11254 12.43469 12.31795 13.20986 13.51692 13.65216 14.22145 14.17002 14.99944 16.96408 18.17498 20.48696 20.18502 
  10.00105 14.93148 15.57334 14.18313 15.84909 13.41543 10.91846 10.40649 10.04232 11.3954 11.08972 10.65496 11.28233 12.84103 13.64318 13.3043 13.51652 13.92885 14.5559 16.84107 18.5327 20.81325 20.21984 
  8.364077 14.47285 15.08788 13.55744 15.35808 12.38914 10.11354 10.69198 10.48146 11.39112 12.41379 12.30688 11.5294 12.61159 13.42383 14.04588 13.40064 13.34902 14.46458 16.70078 18.35919 20.76412 19.943 
  9.641496 14.35918 14.95244 14.19986 15.70295 12.1898 10.18897 9.93488 11.01549 11.42438 12.44854 12.86371 11.5565 13.17326 13.65934 14.53895 13.90649 12.75882 14.03429 16.0668 18.27451 20.19135 19.50024 
  8.998137 15.16441 15.34586 14.19052 15.65328 11.88099 10.49521 10.64151 11.44318 12.29878 12.59886 12.93051 11.39418 13.67889 14.56925 14.55365 14.10461 13.24354 14.07761 16.63885 18.53308 20.30708 20.31402 
  8.946667 14.58632 14.90333 13.99691 15.40372 11.7403 9.425845 11.23817 12.30968 13.13296 12.68324 12.09499 11.59824 14.54168 14.32594 15.0352 14.37218 13.71023 14.06014 17.93824 19.09838 21.4241 20.9504 
  9.447927 13.22877 13.59622 13.29706 14.72807 10.7798 10.31542 10.73058 12.23221 13.26059 12.40936 13.92871 12.75949 13.78177 15.05717 15.43973 15.04269 14.58284 15.24181 18.27596 20.08396 22.0071 21.56488 
  8.942842 12.3533 12.72897 12.85406 14.50646 11.68498 10.16205 11.14519 12.01001 13.16905 12.63423 12.60056 13.09376 15.80599 15.64569 15.43857 15.37898 14.46924 16.22836 18.90742 20.43498 22.41726 21.382 
  9.982429 12.81441 13.19554 12.47804 14.03025 11.90199 10.44281 10.78242 11.68254 11.92076 12.58015 12.23102 12.68594 15.57787 14.6662 15.05085 15.23528 13.82845 15.80674 17.82709 19.04893 21.38214 19.97395 
  9.869902 15.98693 17.0225 15.82257 16.53093 14.39392 13.55346 13.0189 13.5896 13.15744 13.18752 12.97133 12.9559 14.33525 13.73748 13.04741 14.4363 13.70424 15.57391 16.99634 17.09068 17.86176 18.46799 
  11.20318 16.74047 17.93852 17.55423 19.5529 17.38312 15.2714 13.70905 14.90142 15.34691 15.5448 12.94791 14.09873 15.03901 14.75121 14.84058 15.57433 14.77669 15.42783 17.23933 17.08564 16.54724 18.42886 
  10.00581 16.99308 17.64376 17.35759 19.84209 17.72609 16.06901 13.94531 14.6718 16.30286 16.95507 13.571 14.54512 15.59432 15.28288 16.09251 16.91068 14.48281 15.62211 16.89838 16.95857 16.07769 17.91624 
  10.26451 17.10342 17.68718 17.12966 18.80752 16.72883 17.11706 14.18921 15.08757 17.02866 17.63015 14.70868 14.18901 15.36524 15.08402 16.53765 17.53717 14.94885 15.30448 16.46144 16.25202 15.7747 17.48356 
  10.57615 17.0614 17.78706 16.76842 18.19138 17.44588 18.12214 14.26881 15.37736 17.35711 17.82368 16.12888 14.66693 15.406 15.11084 16.10291 17.21349 14.85879 14.25072 15.92479 15.79147 15.51069 17.19426 
  9.48652 16.911 17.18489 17.25456 18.75069 18.27182 18.70464 15.1519 15.75929 18.05139 18.79077 18.19505 14.61217 15.21042 14.50809 16.6024 17.5759 14.97376 14.62691 15.54537 15.83502 14.67196 16.36763 
  9.985225 16.74782 17.24112 17.25887 18.27095 18.51624 18.83493 16.13771 15.24101 18.14103 19.13589 18.64982 14.49468 13.72041 14.16209 16.20668 16.41984 14.58272 14.20643 15.07785 16.00739 15.01148 16.37612 
  8.453218 16.44852 16.53209 17.24354 18.39434 18.03302 18.01833 17.08058 14.58579 18.43371 19.00238 18.1741 14.07615 12.95393 12.84746 15.36702 15.92542 14.33508 13.597 14.46982 16.11521 14.52893 17.01237 
  9.352406 16.0076 16.35685 16.47454 16.98188 17.873 17.68334 17.51781 14.67671 18.76287 18.72811 17.72148 13.38931 12.69698 13.39213 14.65827 15.30858 15.00858 14.27974 14.66115 15.79502 13.98777 16.22205 
  9.672524 14.51215 14.49281 17.57202 18.71115 18.5533 18.56325 18.01528 15.20035 18.96259 18.58512 17.13366 13.41415 12.05879 12.61457 14.21013 14.71272 14.4392 14.22963 14.40962 15.35258 14.32724 15.84399 
  9.294844 14.85708 15.12689 17.73894 18.71831 18.47003 18.26656 17.68312 15.53754 18.16326 17.79104 15.80158 12.8884 11.75445 11.60362 14.12353 14.61159 13.81479 13.50152 14.23705 15.36493 14.26479 16.47873 
  8.768968 15.45139 15.69153 17.49653 18.18467 18.7153 18.61305 16.95576 17.30786 17.01988 16.78725 15.96744 13.20623 11.91497 10.71408 13.16159 14.11679 14.18742 13.99787 14.10843 14.59204 14.09456 16.28277 
  9.966001 15.72716 15.85344 17.40161 18.01343 18.25241 18.0155 17.06878 17.53317 16.64179 16.41474 15.78998 13.04946 11.22962 10.81054 12.68965 14.08202 14.16896 13.95166 14.16778 15.26385 14.41844 15.19306 
  9.675585 15.72316 15.94752 16.87626 17.34341 17.19278 16.94996 16.84417 17.60019 16.79139 15.83696 14.38576 11.87001 11.4011 11.60792 12.10766 13.4803 13.23835 12.92072 14.22253 15.03658 14.08519 14.59369 
  8.843268 15.32752 15.28943 16.05486 16.22001 15.98866 16.2473 16.98972 17.01772 16.9006 16.29029 14.78596 10.99794 11.25592 11.78223 12.48373 13.76172 13.57825 13.22475 14.14124 14.77772 14.18768 15.47867 
  9.988932 15.02293 15.06712 16.10529 16.68101 14.5116 15.2082 16.47553 16.51361 17.52498 17.05329 14.73698 11.47603 11.26132 11.55777 12.60618 14.0904 13.70418 12.92093 14.22556 15.54817 14.19895 15.33703 
  9.373691 14.60107 14.44204 16.04573 16.79593 15.65233 15.8361 15.7082 15.76369 17.70542 17.72066 15.61394 12.20694 11.29502 12.29862 12.74661 13.96791 13.84649 13.18555 14.58438 16.68427 15.04027 15.47512 
  9.678563 14.24788 13.43199 15.21713 15.649 16.42124 16.40302 16.3937 15.09498 16.95411 17.4481 16.14875 12.54013 12.54267 13.3262 13.59261 15.50308 14.9336 13.64636 14.634 16.40599 14.55039 15.61549 
  10.3574 15.14911 14.75568 15.57308 15.29968 16.94623 16.488 16.12799 15.32639 15.1618 16.76408 16.33082 12.41711 12.65482 13.66632 13.70024 15.7187 15.12583 13.7085 15.76237 17.63751 14.65858 15.68508 
  10.7973 15.52072 14.91968 15.44418 16.11236 16.58336 16.3113 15.46747 14.99532 15.4524 15.88347 15.21543 12.29164 12.56826 12.42722 12.49476 14.77974 14.30123 13.07562 15.28318 17.01124 14.17749 15.07545 
  11.72055 14.70813 14.33016 15.86267 16.10855 16.00652 15.79218 14.71058 14.24063 14.49434 15.89563 14.62785 13.76968 14.33625 13.71839 14.10913 14.64376 14.01509 13.78574 15.50584 16.63034 17.31616 17.19594 
  11.63814 14.6362 14.03121 15.15585 15.78361 15.6864 15.34269 14.2795 14.29758 15.06682 15.55843 14.9841 14.52794 14.38037 14.58176 15.49519 16.01623 14.98994 16.1685 17.89346 19.66196 20.41212 19.85152 
  11.20878 14.61073 14.74267 14.05825 15.03689 14.27688 13.934 13.7751 13.63184 13.97793 14.57368 14.39714 14.21094 14.06395 14.52638 15.53573 16.57571 15.51742 17.78471 19.37989 21.206 22.15632 21.33591 
  10.45609 14.74396 14.32149 13.18418 13.62141 14.69138 14.50593 13.22618 13.50317 13.10417 13.68227 13.81558 14.411 13.72738 14.54855 15.41036 16.20938 15.58582 17.08364 18.51379 20.73615 22.85728 22.03742 
  9.892098 14.23401 13.93373 13.37699 13.63826 14.14518 13.86748 12.95281 12.58174 13.15587 13.58239 13.93001 14.0541 14.45245 14.81557 15.24402 15.24497 14.92806 16.91828 19.23318 21.00042 22.89181 22.22411 
  8.341251 13.11608 13.08356 12.63929 12.80791 12.68776 12.75128 13.05065 11.75436 13.81135 13.41625 12.83436 13.37474 14.62833 15.19703 15.31099 14.71062 15.49001 16.62827 18.99271 20.98364 22.86999 22.26387 
  9.033696 14.0476 13.55966 12.04009 12.19411 12.87305 12.72984 12.89095 12.6282 13.35929 13.01105 12.84826 12.64545 13.73565 15.17793 15.06093 14.961 14.88946 16.90281 18.75511 20.67654 22.07957 22.50922 
  9.733017 14.11259 13.47146 12.15287 12.6395 12.34159 11.76064 12.4338 12.91527 13.87755 13.5646 12.60468 13.0273 14.35512 15.41535 15.23291 15.34092 14.8519 15.97584 18.7154 21.41322 22.42959 23.02157 
  8.697401 12.64109 12.03506 13.12382 13.84305 11.54991 11.58626 11.05973 11.39041 14.03995 14.19523 13.01843 13.23257 14.71688 15.60102 14.75217 15.33405 14.82154 16.59491 18.86729 21.02257 22.58431 22.67589 
  9.467981 13.12758 12.64267 12.87043 13.40577 11.28999 10.91053 11.14885 11.37754 13.09841 14.32014 13.18549 12.73418 14.28728 14.77119 14.96639 15.80654 15.12319 16.99688 18.39054 20.01874 22.73198 22.52024 
  9.764517 13.51877 13.10358 11.6141 12.21243 11.77503 11.65425 10.68032 11.29402 14.13506 14.52542 13.75776 13.12655 14.90901 14.66903 15.17769 14.88138 14.35127 15.96097 17.97906 20.40209 22.40177 22.05331 
  9.448538 12.1722 12.48105 11.88218 12.70931 12.26048 11.73737 10.74498 12.15391 13.1719 13.41782 12.89102 12.99866 15.54638 15.04468 15.65555 15.56007 14.52773 15.27511 18.04018 19.91089 21.79332 21.80087 
  8.763973 11.93478 11.62999 11.1273 12.46282 11.85779 11.41145 11.12057 12.67596 13.37468 12.94005 12.73368 12.64572 15.58685 15.55944 15.38471 14.74936 14.22819 16.26686 18.11139 19.70222 21.42123 21.45517 
  8.306178 9.252882 9.844025 11.26888 10.83866 11.36586 11.08982 10.34481 11.93284 12.10324 12.52144 12.75002 12.61329 14.85645 14.61 14.17808 15.05804 14.43231 15.56323 17.80058 18.97718 20.20973 19.62204 
  11.66475 14.36481 14.23808 12.98364 11.47848 11.98761 11.70588 10.32386 11.46336 12.05531 11.33844 11.47684 12.1066 15.10926 14.7728 13.86291 14.54506 14.13791 14.81665 16.32619 17.66183 18.56561 18.54115 
  13.551 15.17671 16.76833 15.87209 15.30873 11.94541 10.92486 11.23132 11.48584 13.04966 12.27409 11.97255 11.65486 14.969 14.62882 13.83866 13.70533 13.39387 13.99928 15.66928 16.7587 16.72407 16.5293 
  12.76456 14.3368 17.05488 16.533 16.53849 12.9081 12.13053 12.54419 11.06146 11.80947 11.34655 11.66301 11.78405 14.4737 14.33242 13.79723 14.23168 13.66543 14.59762 15.88466 16.25202 16.19663 16.058 
  12.61326 13.91244 16.62031 16.31223 16.49494 13.073 12.46359 13.13346 11.08299 12.0898 12.36925 12.15002 11.55262 14.73402 14.74845 14.4159 15.61065 14.59098 14.15156 15.8146 16.68856 15.90823 15.70445 
  13.72736 14.60473 16.47353 16.42584 16.23803 13.49946 13.32173 13.55856 11.47504 12.57557 13.47919 12.9309 12.37103 15.21231 15.2276 15.20738 16.28225 14.82661 15.0314 16.40727 17.3156 15.99237 15.84188 
  13.94386 14.66504 15.96983 16.09722 16.08472 13.28472 14.21239 13.74801 11.81877 13.22909 13.79301 13.41157 12.9921 15.82657 15.1773 15.28221 16.49038 15.39782 15.09806 16.5119 17.39903 16.71843 16.71772 
  13.7535 15.06458 15.29338 15.41305 15.38942 12.72157 14.62464 13.94845 13.23048 13.06216 13.54 12.74474 13.10539 14.58909 14.21166 14.60993 15.12524 14.18825 14.24555 15.87136 16.5764 16.25047 16.18505 
  12.87529 15.11237 17.02908 16.62773 14.56104 13.15497 14.62326 14.26014 13.4457 13.14097 14.58538 14.2613 13.35599 14.02586 13.7997 13.72621 14.95759 14.69201 13.92311 15.1955 15.75672 15.51821 15.26852 
  14.88954 16.34912 15.96387 16.39929 15.98874 13.18963 13.59016 13.90227 13.35802 13.87966 14.67649 14.61434 13.03115 13.09564 12.67227 13.00849 14.46988 13.8509 13.29925 14.3982 14.9851 14.69154 15.44168 
  15.86816 17.34939 15.38919 15.75551 14.90487 13.54091 13.4477 14.09079 13.59264 14.21146 13.69315 13.91447 12.34339 11.63899 11.57346 11.78274 13.74251 13.4482 12.77586 13.67141 13.67339 14.87321 15.2977 
  16.05614 17.80696 15.5341 15.78276 14.98103 13.54131 13.31379 14.18915 14.42894 15.16783 13.61487 12.74171 12.90643 12.58465 11.50915 11.18896 13.20965 13.26229 12.49681 12.70356 12.91538 14.117 14.82414 
  16.10782 18.11083 15.80454 15.79372 14.83904 13.20489 12.75853 14.28803 15.24095 14.56751 12.98957 12.72844 12.02183 12.00667 11.41799 10.49253 12.19255 12.55818 12.08304 12.07552 12.59875 13.70636 14.76297 
  16.48818 18.47473 16.28432 16.31223 13.50066 12.83804 13.33812 14.4014 15.45115 14.3461 12.77997 12.5514 11.41195 11.27457 9.940119 10.18195 11.79267 12.34199 12.40519 12.08422 12.70067 13.04217 14.32282 
  16.58727 18.6493 16.57899 16.56668 13.67641 12.70696 13.48661 14.16076 15.01744 13.59957 12.72013 12.734 11.13864 11.15946 10.10025 10.58426 12.08072 12.54692 12.42924 12.06971 13.08699 12.87339 13.88818 
  16.46552 18.60275 16.49226 16.2148 14.75385 13.87186 13.3091 13.94653 14.61146 13.24284 12.09616 11.93655 11.40253 11.15538 10.07787 9.789737 11.60745 12.03494 11.88888 12.21645 12.83724 12.63594 13.60312 
  16.42601 18.49523 16.10097 14.80393 15.19385 14.14109 13.04128 13.45862 14.21167 13.62841 11.52437 10.68034 10.83395 10.6881 10.08846 10.43773 10.89225 11.42747 11.60774 13.2441 13.97224 11.69935 13.44734 
  16.38114 18.33183 15.98755 14.96587 15.34151 13.83746 12.48781 12.56362 13.59895 13.62195 11.67679 11.84699 11.38903 10.70972 10.37174 10.57339 10.79502 11.32389 11.35826 13.23451 14.09176 12.10806 13.03413 
  16.22431 17.94065 15.84532 15.77651 16.14402 14.11528 12.80109 13.16647 14.27707 13.8602 11.68355 11.39642 10.84387 10.61396 9.958416 9.830719 10.69155 11.12258 11.26303 13.12325 13.99851 11.67061 12.9628 
  15.79015 17.19271 15.68581 15.83383 16.8037 14.45622 12.3988 12.65228 14.56971 13.79517 10.82631 9.563471 10.30857 9.767759 10.45248 9.569944 10.59113 11.10918 11.09264 12.86746 13.68986 11.89357 12.99938 
  15.2913 16.56188 15.66566 15.6369 15.88488 12.91912 13.74727 13.40825 14.74673 12.99693 11.53082 10.84703 10.13508 9.678087 9.518613 9.16087 10.43609 11.18994 10.68452 11.71982 12.54992 12.5922 13.53928 
  14.92816 16.64891 16.45783 16.25064 16.19631 13.19242 13.77824 14.43863 15.26204 15.19201 12.83485 11.38921 10.91256 11.21886 10.82799 12.13972 13.95731 12.54356 12.07864 12.44756 12.12163 12.97568 14.7286 
  15.13246 17.03524 15.97898 15.40096 15.36011 13.36627 13.54038 14.25134 15.57904 16.03234 13.77775 12.25592 10.30315 10.69148 10.83694 12.56628 14.37519 12.44555 12.39819 13.07718 11.56385 13.12282 15.2996 
  15.5982 17.27533 16.06111 15.55091 15.21003 14.51325 13.98331 14.05154 14.17753 16.04241 15.2899 13.87621 11.14674 10.69581 11.96811 14.34262 14.9851 13.41596 13.65708 14.32451 13.39169 12.8552 14.58668 
  14.91011 16.69463 15.57334 15.01008 15.36094 14.74225 14.26171 13.92481 13.91756 15.95652 16.69785 16.57439 12.48547 11.33714 12.08421 15.43829 15.81007 13.38737 13.30834 14.67949 14.26769 13.08782 14.67955 
  13.62063 15.99206 15.72027 15.10223 15.1442 15.15948 14.64559 13.86851 12.91738 14.75704 17.01463 17.44593 14.92344 12.40641 12.71938 16.50565 16.63255 13.53405 12.98962 15.20941 15.12311 13.42216 15.25566 
  15.30581 16.69621 15.66549 15.66445 16.12411 15.10698 15.31674 14.56899 12.28834 15.10705 16.23727 16.72542 15.70731 13.46484 13.77255 16.57919 16.63644 13.61368 13.20484 14.98691 15.30538 13.48375 14.71632 
  14.74377 15.90853 16.02791 15.53424 15.54271 15.17513 15.70076 15.01335 12.41409 14.9216 15.6982 15.6553 14.80186 13.63735 14.12234 16.54076 16.64301 13.9333 13.65155 15.33424 15.71972 14.00131 14.723 
  13.63359 14.52747 16.50115 15.59644 14.78498 14.94671 15.83353 15.04419 12.83702 15.15406 14.95185 15.14444 14.53343 14.07447 14.4541 16.43644 16.78634 13.96131 13.85571 15.59888 16.54558 14.7505 15.17269 
  15.09095 15.96606 16.31398 15.20485 14.33777 14.76441 15.66189 15.22405 12.65838 15.33522 14.93109 15.00384 14.89501 14.88041 14.65054 16.52218 17.02337 14.22087 14.82588 15.54361 17.17436 15.60863 15.80521 
  15.26671 15.98887 16.00072 15.11115 14.80698 14.81987 15.41057 15.06448 12.76331 14.64497 15.39146 15.59635 15.41956 14.94571 14.81527 16.34526 16.81958 14.88231 15.31733 15.54397 16.68404 16.08439 16.50137 
  14.82031 15.1955 15.81392 14.74509 13.80763 14.37639 15.23381 14.08953 13.05277 14.46634 15.04908 15.01031 14.62417 15.87493 14.96298 16.19891 16.81858 14.21854 14.53428 15.09545 15.81948 15.23545 15.83264 
  14.47235 14.64184 15.65243 14.61598 13.88489 14.16747 15.15573 13.50578 13.0122 14.29219 15.03921 14.0275 13.43212 15.84878 14.55854 14.92103 15.28403 13.08439 14.17978 15.17877 15.71674 13.99384 15.05782 
  14.84271 15.37902 15.59052 14.53613 13.88661 14.49238 15.42805 13.71516 12.43119 13.77503 14.90729 14.78802 14.2214 15.8298 14.60928 14.95287 15.55999 13.44495 13.74132 15.10681 16.23884 14.13648 15.53918 
  14.90855 15.54294 15.92132 14.83505 13.52632 14.82017 15.56321 12.84901 12.36878 12.84481 13.59268 13.83251 13.89308 14.76446 13.99949 14.81193 15.35496 13.92693 14.23314 14.60406 16.01696 13.7629 15.32229 
  14.34874 14.65815 15.8946 14.89865 13.93546 14.51131 15.38126 12.69744 12.18602 11.85635 12.55572 13.3946 13.59089 14.50925 13.44721 14.08821 15.0949 13.18603 13.76213 14.69819 15.71466 14.58852 16.02038 
  14.12023 14.95996 14.94988 14.08591 14.26389 15.05177 15.42382 14.23856 13.6525 13.4122 13.92378 13.58581 13.71874 15.37159 13.65612 14.78279 15.35144 12.51747 13.10743 13.96306 14.74565 13.60848 15.42061 
  13.03794 14.3534 14.61717 14.4276 14.20311 14.88333 16.29688 13.85629 12.45 12.66146 13.211 13.02482 13.61957 15.25426 13.65302 14.40839 15.28184 12.64547 12.91868 13.65031 15.02586 13.60051 14.85588 
  12.80224 14.75209 15.02113 14.23392 13.57626 14.24956 14.97642 13.44714 11.19146 11.87919 12.94502 13.08048 12.71222 14.35021 13.33305 13.61649 14.14324 12.44998 12.97652 14.02602 14.98486 13.33818 14.75948 
  13.16971 14.54832 13.68839 12.53332 12.31186 13.85297 14.5408 12.63638 10.70129 10.50308 11.22596 12.15875 12.87752 14.50729 13.75705 13.03189 14.20643 12.62912 13.04198 13.98892 14.39815 13.00242 14.93295 
  12.32784 14.45602 13.87575 12.9689 12.90031 12.28457 12.79731 12.50595 11.64002 10.49457 12.00499 11.84997 12.20587 13.69985 12.74907 13.07555 13.99215 12.42866 12.85618 13.70903 14.67477 12.98654 15.42268 
  13.97398 15.76731 14.26718 13.93614 13.3635 12.53315 12.47679 12.46972 10.95234 10.91325 11.5195 11.66877 12.05166 13.99423 12.40961 12.80348 13.87326 12.39891 12.30221 12.87925 14.37371 12.68297 14.16082 
  14.184 16.03344 14.21303 12.96472 13.494 13.50742 12.40183 12.13428 11.0977 10.09598 11.73288 11.88649 11.97245 12.51878 11.91088 12.78705 13.84648 12.43722 11.65642 12.11694 12.9002 12.40637 14.5552 
  13.45738 15.55 14.00745 12.82178 12.33664 11.91138 12.54709 12.3248 11.18794 10.89101 11.58366 11.65439 12.35607 12.53853 11.86051 12.31624 13.99491 12.18095 11.57812 11.95029 13.21074 12.14984 13.58227 
  12.75044 14.95871 13.36009 13.20988 12.31619 11.93608 11.93395 12.31846 11.38457 11.1864 11.68532 11.97781 12.34169 12.93438 11.78984 12.08069 13.22225 11.86906 11.43046 11.13731 12.37698 12.51735 13.13258 
  12.4284 14.15335 13.07597 11.76668 11.4075 11.61571 12.59232 11.93633 12.10711 12.10692 12.07862 13.06988 12.64771 14.07678 12.7958 11.8246 12.44421 11.69291 11.27304 11.0924 11.61102 12.44975 12.95033 
  11.18342 11.99406 12.91778 11.29505 11.66648 10.58673 12.03916 12.36362 11.94302 11.53715 12.30084 12.70467 12.66107 14.14579 13.0996 11.72703 12.5057 11.78472 11.09719 10.43786 12.19886 12.96147 12.51465 
  11.16282 12.16246 10.93769 9.637576 10.93914 11.07357 12.48322 11.72473 10.95556 11.30609 12.062 12.24674 12.28014 13.23953 12.19423 12.05279 12.69395 11.68082 11.26663 10.09986 10.75675 11.19461 11.49739 
  10.68674 13.24407 11.97083 9.004447 9.364768 10.10863 12.20102 11.69279 10.02772 10.04208 11.35098 11.3541 12.00796 12.02115 10.81907 10.48873 11.81506 11.11288 11.23438 10.76176 10.97295 11.27559 11.05051 
  11.25764 13.04875 10.96754 9.594095 9.950509 9.515109 10.90454 10.44351 10.30407 10.58162 11.61821 10.79663 11.97438 12.49545 10.3045 10.45207 10.77822 10.23204 10.59221 10.55611 10.73 11.25474 10.92693 
  10.77957 12.84101 10.96046 9.825205 9.921305 10.27295 11.00374 9.069146 9.143576 9.619534 9.886018 9.683996 11.43353 11.68437 10.21345 9.418591 10.17027 9.919155 9.729475 10.12781 10.13701 10.20216 10.49461 
  10.50037 12.81704 10.93899 8.919902 9.181169 9.454815 9.864502 9.160638 7.950092 8.281847 9.608268 9.700049 10.10044 10.43497 10.07498 9.374721 10.09255 9.838331 9.777655 9.972161 10.5668 10.3991 10.44382 
  10.9499 12.69608 10.32951 8.873781 9.134046 8.758868 9.037214 8.199131 7.569058 8.374478 9.34644 9.717125 9.732412 10.43906 8.821301 8.579359 9.148021 9.602486 9.728423 9.711402 10.63859 10.64856 10.91956 
  11.27755 12.95559 10.74122 9.436187 9.256186 9.484769 9.586822 8.388662 7.160058 8.068262 9.397726 9.698923 10.2531 10.30162 9.136081 8.90623 9.838286 9.443312 9.586123 9.845777 10.52771 11.16902 11.16297 
  10.61487 12.45117 10.35181 8.28381 7.78183 7.990805 8.725731 8.746444 7.518696 7.472842 8.455659 9.625546 9.859873 9.179219 8.927357 9.205382 9.615867 10.14538 10.18613 10.31581 10.64848 10.60155 10.51831 
  9.637716 11.20578 9.314525 7.651236 7.868614 8.413736 9.356231 8.070017 8.155 7.390232 8.749864 9.116447 9.567814 9.369207 9.694729 9.147233 9.482315 9.444982 9.887435 9.599484 9.862085 10.45017 10.60415 
  8.626204 9.187364 9.029099 8.493851 7.209723 7.690259 8.042856 7.316092 8.150468 8.128954 8.491919 8.446475 8.681947 8.747696 8.789898 9.233255 10.31805 9.660691 9.486577 10.23518 9.970778 9.988548 10.34961 
  9.442098 8.689657 9.283001 7.998808 6.22693 6.962261 9.189577 8.19746 7.75057 8.493724 8.455694 7.799927 8.001998 8.729837 9.923102 9.574437 9.835016 9.547566 9.595923 10.27687 9.859653 10.14931 10.31981 
  9.97291 9.826776 8.408665 7.496465 8.021371 7.480401 6.859789 7.656764 8.673873 8.172565 8.807829 9.007799 9.039057 8.638908 9.110216 9.078871 9.454023 9.512172 9.644451 9.953197 10.47481 10.57586 11.22771 
  7.733074 9.88445 8.777633 8.313414 8.081938 7.271777 8.92068 8.634503 8.78035 8.908507 9.615075 9.58618 10.19903 9.804263 9.953383 9.936666 10.38816 9.741985 10.35782 10.95726 11.52999 11.21175 11.48313 
  10.96227 9.154578 8.698281 7.998504 7.728884 6.602579 8.022117 8.025263 9.123106 8.844633 8.908731 10.21477 9.655914 9.417415 9.496919 9.789172 10.42569 10.3659 11.02518 10.92264 11.54033 11.66203 11.41841 
  10.86337 9.16935 9.55076 7.872849 7.942523 7.244263 7.779666 7.115378 7.89739 8.183571 8.867866 9.697963 9.289954 9.11062 9.085766 9.426685 9.012211 9.846148 10.90698 10.7715 10.9265 11.21198 11.22182 
  9.851903 10.36412 8.908376 7.846486 7.46843 7.405447 7.65377 8.196727 7.064144 8.249885 8.761197 8.942713 9.204334 8.628345 8.864152 9.710718 9.490798 9.545791 10.44929 10.69282 10.93337 10.86275 10.97815 
  11.25423 10.87261 8.889456 7.461558 7.015344 7.351716 8.382879 8.503271 7.68911 7.91639 8.643186 8.87805 9.065163 9.099637 9.386837 9.786633 10.39724 10.21951 10.45584 10.46524 10.56583 10.76281 10.16936 
  10.68273 9.817769 8.26171 8.016673 8.054502 7.516357 7.65291 7.820787 7.519141 7.720293 7.444949 8.414156 9.215623 8.999539 9.341202 10.16718 9.897247 10.21318 10.05819 10.82438 10.51383 10.53982 10.3831 
  9.478124 9.167519 9.010949 7.99378 7.74503 6.204545 7.584774 7.112559 7.285814 7.478474 7.668393 7.974492 8.957026 8.854042 8.923244 10.00307 9.395614 9.709485 9.681889 10.5162 10.42807 10.61794 10.64966 
  10.47888 7.862177 7.167226 8.245425 7.75247 7.6353 7.317435 8.184895 7.887283 7.203204 8.485103 8.665626 8.602202 8.789666 8.851316 9.74954 9.710799 9.547979 9.842075 10.54527 10.60706 10.56509 10.56258 
  9.801606 8.885911 7.033187 7.478241 6.67696 7.042121 7.39953 7.7513 7.696868 7.563337 8.205177 8.162902 8.607606 8.607259 8.436271 9.256256 9.444992 9.138235 9.183046 10.3544 10.70144 10.78459 10.73416 
  8.729084 9.680941 8.473826 7.128935 7.117252 7.535285 7.976354 8.395122 7.388021 7.665706 7.663038 7.935491 7.918374 8.014401 9.072487 9.747068 9.442183 10.02906 9.621786 10.21271 10.62509 10.38278 10.59681 
  9.071483 8.072448 8.148285 8.208888 6.865083 7.357923 7.7726 7.642717 7.373379 7.314355 6.811155 8.184822 8.202291 9.007961 8.424119 9.344726 9.500482 9.494846 9.845454 10.21647 10.59387 10.59069 10.30458 
  9.551182 8.782715 9.017637 8.455041 7.292755 6.371716 8.139261 7.807406 6.868361 7.754928 8.373298 7.639658 8.648994 8.402566 9.127618 9.492344 9.512679 9.582137 10.01061 10.36339 10.5303 10.40906 10.77627 
  10.07956 9.123581 9.239129 7.188252 7.983877 7.36659 7.689083 7.349831 6.549315 7.351971 7.765708 8.071753 8.22062 8.234403 9.252606 9.165942 9.141379 9.528788 10.10328 10.09754 10.46661 10.03632 10.45256 
  10.45638 9.177458 8.172346 6.487576 7.374524 7.649885 7.573339 6.959404 7.260113 6.734559 7.447418 8.466086 8.382314 8.260194 8.807273 9.042309 9.807338 9.552331 9.757251 10.22166 10.39291 10.50788 10.47725 
  11.13613 7.758188 7.694408 7.91339 6.591579 7.690708 7.883243 6.817697 6.937893 6.617546 7.995504 8.199288 8.606889 7.930028 8.27558 9.028001 9.202659 9.807508 10.00867 10.30622 10.30117 10.22799 10.23392 
  10.84375 8.658377 8.964039 7.577753 5.997756 6.243629 6.59042 7.667848 7.77693 7.160565 7.672915 8.214157 7.767068 8.187228 8.741798 9.008192 8.761507 9.775722 9.991219 10.50324 10.35022 10.56752 10.42405 
  9.856415 8.33345 7.864082 7.394323 7.037837 7.429994 7.053414 8.148975 7.734139 7.73136 8.126728 8.250671 8.107525 8.775521 8.947971 9.013912 8.709566 9.952392 10.0177 10.55204 10.14417 10.48263 10.89007 
  11.22424 8.785028 8.384821 6.571542 6.668182 6.510378 7.012306 7.705905 6.736544 7.154219 8.193804 7.920104 8.426934 8.557131 8.854219 9.46376 10.04807 9.964184 9.701237 9.969363 10.31233 9.983346 10.42036 
  11.32489 8.361799 9.032561 7.834023 7.204043 6.074617 7.471241 8.117975 7.885279 7.726513 8.305443 8.50788 8.930935 8.875918 8.786529 9.071368 10.33387 9.977633 10.02416 10.06347 10.81268 11.20751 10.80103 
  10.06941 6.52937 7.571665 6.465108 6.881132 6.903685 7.100048 7.664837 8.100976 8.205462 8.043177 8.448032 8.847051 8.371434 9.149493 9.756526 10.0323 9.265886 10.12252 10.04926 10.18163 10.70782 10.76523 
  10.31685 7.024517 5.869543 5.841137 6.383105 6.215681 7.949264 7.746029 7.835628 7.861153 8.296052 8.204115 8.655869 8.276096 8.929706 8.994547 9.447945 9.700706 10.37251 10.27933 10.33522 10.72122 10.48557 
  10.91509 7.91675 8.244367 6.124669 6.732698 7.492036 8.363312 8.124249 8.108838 7.358902 8.053522 7.411808 8.716305 8.342316 8.457804 9.005593 9.336573 10.05767 9.578779 10.344 10.4169 10.55184 10.60104 
  10.32075 8.261191 8.136519 6.961864 6.799195 7.198459 7.713956 6.972926 6.764237 7.75519 7.804876 7.912869 8.563591 8.987835 9.309792 9.610885 9.492855 9.925476 10.05713 10.52021 10.62411 10.47692 10.59566 
  9.86818 8.572285 8.530177 7.359343 7.294422 6.582515 6.642591 7.496963 7.95584 7.202435 7.878167 8.503516 8.365258 9.081302 8.992274 9.197975 9.750833 9.722472 9.760623 9.966175 10.59453 10.34588 10.6256 
  10.25583 7.347171 7.843901 6.244213 6.5353 6.617674 7.064937 7.198507 6.530531 7.619411 7.706642 8.672579 8.665023 9.01842 9.258142 9.454097 9.65053 9.569846 10.16275 10.25629 10.61747 10.79843 11.0936 
  11.3455 8.169881 7.368921 6.081739 7.016907 7.807951 8.699952 7.736558 6.748681 7.378449 7.852139 8.727378 9.049452 8.352005 9.397895 9.694369 9.896957 9.87398 9.838742 10.29402 10.58788 9.819412 10.6098 
  11.90842 7.299352 8.247597 6.929464 5.668571 7.252095 8.041783 7.685021 7.479499 7.446598 7.486227 8.122455 8.321239 8.863303 8.986836 9.896532 10.01282 9.700945 10.13044 10.36466 10.79011 10.65473 10.96276 
  11.22572 8.534163 7.996068 7.321532 6.95424 7.306089 8.242614 7.205853 7.210618 7.631306 8.128415 8.210221 7.745347 8.572201 9.164312 9.454391 9.143291 9.996038 9.919705 10.61216 10.94615 11.0497 10.50996 
  11.99271 8.205185 9.120809 8.732939 7.727026 7.459247 9.017488 8.611053 9.431357 8.641321 9.48832 9.588237 9.180689 8.96218 9.460557 9.78753 10.30156 10.65017 10.35131 10.94515 12.2452 13.12846 12.19351 
  11.79253 9.627811 9.291315 7.524522 6.52378 7.294258 8.705289 7.135509 8.226054 8.742827 9.064022 9.066143 8.709431 8.971881 9.728841 9.269897 9.861171 10.09128 10.07978 10.35271 10.13663 11.0843 11.13849 
  10.56259 9.232968 9.15976 6.620336 6.150416 6.181613 7.583303 6.586105 7.54056 7.118359 8.163677 8.156444 8.354664 8.431437 9.365588 9.699803 9.425529 9.986162 10.02214 9.757886 9.986296 10.37123 11.12805 
  11.46303 8.064131 8.81135 8.263533 7.772588 6.787446 7.045297 7.507244 7.852684 7.8141 8.464418 8.278731 8.368163 9.374079 9.125239 9.165322 8.79129 9.501587 10.15753 9.792738 10.41069 10.7875 10.81474 
  11.05755 8.402949 8.103877 8.311771 7.382164 7.518039 8.082945 8.166703 8.587493 8.333583 7.051155 8.128522 9.163286 9.241158 9.282046 9.011143 9.606009 9.214875 9.92196 10.35358 10.19762 11.04288 11.07783 
  9.634427 8.981845 9.008856 8.496973 8.026294 6.744306 7.501451 7.233029 7.99301 7.752292 7.717514 8.286538 9.62907 9.19305 8.897845 9.250588 9.401032 9.651338 10.04432 10.55263 10.60681 10.55907 10.44911 
  9.16023 9.718575 9.712869 9.499995 9.118845 7.099794 7.596916 7.821012 7.377513 7.772183 8.825283 8.444467 8.374529 8.278049 8.336656 9.146471 9.616967 9.900914 9.985411 10.23762 10.56904 10.3051 10.82687 
  10.26259 9.707577 7.891645 7.938838 8.196815 7.030812 7.729892 8.539984 7.097323 7.639617 8.26931 8.103879 8.820785 8.756143 8.088627 7.633537 8.861978 9.411267 9.507934 9.617286 10.103 10.32123 10.60248 
  9.416736 8.067968 8.121611 8.702229 8.153371 7.398215 8.101689 7.075943 7.48716 7.682926 7.930077 7.562144 7.981254 8.482229 8.007603 8.850947 9.587449 9.971848 10.00796 10.0423 10.69032 11.02298 10.9979 
  9.875791 9.180859 8.33131 7.329909 7.138772 6.531456 7.820087 7.730562 7.006549 7.355982 8.221857 7.397998 8.619684 9.434539 8.325085 8.88515 9.217641 10.11225 10.50192 10.39947 10.4549 10.84156 10.37869 
  10.21343 8.826467 8.089176 7.33913 7.855656 7.318372 7.243741 7.606412 7.226522 7.431902 7.899468 8.143455 8.827127 9.14143 8.887341 8.847593 9.56572 9.84055 9.684552 9.931477 10.32066 10.90385 10.82047 
  8.685722 8.634503 8.177417 7.393768 7.107471 7.263133 7.5967 7.938039 6.885909 7.359814 8.428015 8.861872 8.920212 7.885601 8.544594 9.326226 9.538568 9.941797 10.37305 10.73129 10.24279 10.99198 10.71185 
  9.49738 9.815642 8.553341 7.461448 7.640052 7.98072 7.311526 8.038465 7.255682 6.560006 7.371539 7.967708 8.035154 7.652845 8.489419 9.47836 9.636602 9.799182 10.27001 10.54407 10.1862 10.5361 11.08353 
  9.237267 8.263541 7.987866 7.640958 7.269582 6.937474 6.849837 7.717149 7.656352 7.173209 7.225511 7.958437 8.102967 8.547636 8.357112 8.845057 9.423195 9.327521 10.10193 10.10455 10.16159 10.10588 10.53935 
  8.52638 7.134833 7.512567 7.138206 6.746409 7.367229 7.365469 6.383105 7.514528 7.68226 8.600569 8.164186 8.062187 8.582131 9.225581 9.787975 9.365478 9.480648 9.960615 9.841322 9.842931 10.30676 10.41945 
  9.151896 6.975066 6.058979 7.529419 6.584132 6.413368 7.757835 7.395633 7.860129 7.49466 8.109066 7.873742 8.698033 9.225484 9.379223 9.214903 9.393662 9.46548 9.963161 10.11975 10.1148 10.09052 10.40652 
  9.582273 6.791395 7.215298 7.381511 7.378876 7.854954 7.890674 7.242115 7.359837 7.425533 8.024413 7.492979 8.248519 8.686938 8.807583 9.178555 9.128061 9.372928 9.666212 9.78712 9.796471 10.31979 10.85505 ]


================================================
FILE: audio/tests/features/testdata/pitch_feat_txt.ark
================================================
test_wav  [
  0.4300044 238.1164 
  0.7310953 238.1164 
  0.0589752 238.1164 
  0.3706925 238.1164 
  0.6353879 238.1164 
  0.4168843 238.1164 
  0.7615743 238.1164 
  0.7359886 238.1164 
  0.5104555 238.1164 
  0.843815 238.1164 
  0.574707 238.1164 
  0.8757079 238.1164 
  0.8545787 238.1164 
  0.5284878 238.1164 
  0.8398617 238.1164 
  0.8405749 238.1164 
  0.7291093 238.1164 
  0.3530312 238.1164 
  0.8516902 238.1164 
  0.8366287 238.1164 
  0.7860549 238.1164 
  0.7996263 238.1164 
  0.8769377 238.1164 
  0.8062987 238.1164 
  0.5502667 238.1164 
  0.8050084 236.9318 
  0.8435217 235.753 
  0.6781067 234.5801 
  0.6275977 233.4131 
  0.8354951 232.2518 
  0.7898548 231.0963 
  0.6786529 229.9466 
  0.5784115 228.8026 
  0.8078744 227.6642 
  0.7860623 226.5316 
  0.3529067 225.4046 
  0.7398534 224.2832 
  0.7554479 223.1673 
  0.3688865 222.057 
  0.431939 220.9523 
  -0.2559481 219.853 
  0.5161331 218.7592 
  0.736354 217.6709 
  0.2649688 216.5879 
  0.6187224 215.5104 
  0.8040497 214.4382 
  0.4176693 213.3714 
  0.7362868 212.3098 
  0.6412426 211.2535 
  0.01994592 210.2025 
  -0.09901931 209.1568 
  0.4139394 208.1162 
  0.7458671 207.0808 
  0.7015585 206.0505 
  0.8989595 206.0505 
  0.9356874 209.1568 
  0.9370709 212.3098 
  0.9640602 216.5879 
  0.9851464 217.6709 
  0.9874667 217.6709 
  0.9662011 215.5104 
  0.9561557 211.2535 
  0.9835509 209.1568 
  0.9764188 209.1568 
  0.8779365 208.1162 
  0.8911879 207.0808 
  0.8716732 205.0254 
  0.8663442 197.9909 
  0.9105747 191.1978 
  0.9614732 191.1978 
  0.9704162 187.4211 
  0.9690547 186.4887 
  0.9596871 187.4211 
  0.9392474 190.2465 
  0.8951484 192.1537 
  0.8852764 190.2465 
  0.9345272 185.5609 
  0.9304817 182.8051 
  0.9600235 180.0902 
  0.9829449 178.3027 
  0.9876611 178.3027 
  0.9889374 178.3027 
  0.9911457 178.3027 
  0.9835688 176.533 
  0.9755108 175.6547 
  0.9868198 174.7808 
  0.9932452 175.6547 
  0.9902206 175.6547 
  0.9920143 176.533 
  0.9922135 175.6547 
  0.9892406 174.7808 
  0.9888865 172.185 
  0.9796383 172.185 
  0.9825094 170.476 
  0.9640273 170.476 
  0.9416605 173.046 
  0.9690976 173.9112 
  0.9527848 173.9112 
  0.8625391 173.046 
  0.8571138 173.9112 
  0.826855 174.7808 
  0.8046249 176.533 
  0.6064064 178.3027 
  0.4831207 180.9906 
  0.4654697 183.7191 
  0.6392145 186.4887 
  0.6342434 190.2465 
  0.8626602 194.0801 
  0.953752 194.0801 
  0.987371 191.1978 
  0.9915444 188.3582 
  0.9920763 186.4887 
  0.9898383 184.6377 
  0.9908906 181.8956 
  0.9892344 180.9906 
  0.9779232 179.1942 
  0.962887 175.6547 
  0.9623674 170.476 
  0.9719465 171.3284 
  0.9843823 173.046 
  0.9776363 173.9112 
  0.9711902 173.9112 
  0.9477384 172.185 
  0.9629893 172.185 
  0.9716213 173.9112 
  0.9675733 176.533 
  0.9643332 178.3027 
  0.9592766 180.0902 
  0.8798139 181.8956 
  0.7133937 185.5609 
  0.558998 190.2465 
  0.3460312 196.0257 
  0.645723 201.9805 
  0.538826 208.1162 
  0.6076745 216.5879 
  0.5921855 228.8026 
  0.8826846 238.1164 
  0.9501736 242.9146 
  0.9898381 245.3498 
  0.9941772 245.3498 
  0.9901065 246.5766 
  0.9880084 247.8095 
  0.9941741 249.0485 
  0.9959649 249.0485 
  0.9930359 250.2938 
  0.9856881 252.8029 
  0.9906433 252.8029 
  0.9973371 252.8029 
  0.9980655 252.8029 
  0.9958057 251.5452 
  0.9909865 250.2938 
  0.990723 249.0485 
  0.987911 249.0485 
  0.9759061 251.5452 
  0.9722236 251.5452 
  0.9578422 251.5452 
  0.936386 250.2938 
  0.9094891 245.3498 
  0.8420637 235.753 
  0.6358631 218.7592 
  0.757064 198.9809 
  0.8944787 192.1537 
  0.9415626 188.3582 
  0.9400396 183.7191 
  0.9019431 180.9906 
  0.904547 175.6547 
  0.8742625 173.046 
  0.7475201 169.6279 
  0.5156456 164.6269 
  0.4781345 161.3751 
  0.4000205 149.7427 
  0.4614965 147.5188 
  0.4422437 141.7487 
  0.3527509 136.2042 
  0.191848 130.2255 
  0.3072545 128.2915 
  0.5506754 127.0181 
  0.6923887 129.5776 
  0.6101584 132.8496 
  0.3040284 138.2575 
  0.1897903 155.8383 
  0.4685728 164.6269 
  0.5444003 164.6269 
  0.5138596 162.9929 
  0.5802024 160.5723 
  0.2090085 160.5723 
  0.2416739 162.9929 
  0.2459603 168.784 
  0.6345571 176.533 
  0.5914032 184.6377 
  0.4447052 193.1145 
  0.2762254 201.9805 
  0.2842087 212.3098 
  -0.1974148 224.2832 
  0.5095772 238.1164 
  0.7442543 252.8029 
  0.8915138 252.8029 
  0.9569594 247.8095 
  0.9625007 242.9146 
  0.9194686 235.753 
  0.8994824 227.6642 
  0.9381443 218.7592 
  0.7226841 219.853 
  0.6949252 223.1673 
  0.6777956 222.057 
  0.7978759 214.4382 
  0.8507771 209.1568 
  0.7713854 211.2535 
  0.6789038 211.2535 
  0.7500904 206.0505 
  0.7494768 204.0054 
  0.7971382 206.0505 
  0.8222839 206.0505 
  0.7602305 200.9756 
  0.6428912 197.0059 
  0.6536841 193.1145 
  0.7034875 188.3582 
  0.5804744 181.8956 
  0.4417492 173.046 
  0.3494109 161.3751 
  0.1981726 148.2564 
  0.6261529 140.3417 
  0.7587391 136.8852 
  0.3861219 134.8523 
  0.2585415 138.2575 
  0.4469885 141.7487 
  0.7737626 140.3417 
  0.7246874 138.2575 
  0.6511324 136.8852 
  0.4254677 136.8852 
  0.1780608 141.7487 
  0.4341104 146.7849 
  0.2834992 149.7427 
  0.4845652 151.2439 
  0.4879717 151.2439 
  0.5601908 152.0001 
  0.7019184 152.0001 
  0.8482625 152.0001 
  0.8672082 152.0001 
  0.8626106 151.2439 
  0.8626055 149.7427 
  0.9013379 148.2564 
  0.8793075 146.7849 
  0.8914734 144.605 
  0.8311929 141.7487 
  0.6839114 139.6435 
  0.5378032 136.8852 
  0.3579432 131.531 
  0.3721773 123.8897 
  0.259927 118.4515 
  0.4204291 116.1118 
  0.7305987 117.2759 
  0.6386622 122.6601 
  0.4521356 127.0181 
  0.1688575 132.1886 
  0.2404094 136.8852 
  0.161655 139.6435 
  0.361738 140.3417 
  -0.02500387 138.2575 
  0.4358978 136.2042 
  0.4791997 134.8523 
  0.5491226 134.1814 
  0.6218376 133.5138 
  0.566164 133.5138 
  0.6004029 134.1814 
  0.4282176 134.8523 
  0.2462277 135.5266 
  0.468562 136.8852 
  0.1709644 137.5696 
  -0.1866998 138.2575 
  -0.2322609 138.9488 
  -0.3413201 138.9488 
  -0.5628413 138.9488 
  -0.3278477 138.2575 
  -0.1193225 138.9488 
  0.04384596 140.3417 
  0.04935479 142.4574 
  0.1841608 144.605 
  0.5726393 147.5188 
  0.6960948 151.2439 
  0.7003101 153.5239 
  0.7147154 155.8383 
  0.7900128 158.9785 
  0.7915136 160.5723 
  0.8635668 162.9929 
  0.9153162 167.1087 
  0.9632365 168.784 
  0.9837555 173.046 
  0.9895447 174.7808 
  0.9936979 178.3027 
  0.9939244 180.0902 
  0.9943298 180.9906 
  0.9935455 183.7191 
  0.9959602 184.6377 
  0.9956325 184.6377 
  0.9592334 185.5609 
  0.8768551 189.3 
  0.7307228 193.1145 
  0.511133 192.1537 
  0.6857247 188.3582 
  0.9158598 184.6377 
  0.9108561 186.4887 
  0.9116812 187.4211 
  0.9223366 187.4211 
  0.8974369 188.3582 
  0.8092315 189.3 
  0.6424754 190.2465 
  0.415496 193.1145 
  -0.09486372 194.0801 
  0.06201403 196.0257 
  0.7022306 199.9758 
  0.8596367 206.0505 
  0.8891434 212.3098 
  0.8837855 220.9523 
  0.7532349 229.9466 
  0.8273478 235.753 
  0.8926229 240.5036 
  0.9618158 242.9146 
  0.9802947 242.9146 
  0.9757999 241.7061 
  0.9556377 236.9318 
  0.9495205 228.8026 
  0.8714783 222.057 
  0.6913882 217.6709 
  0.5996734 214.4382 
  0.5818065 196.0257 
  0.7346321 182.8051 
  0.8720678 184.6377 
  0.8043494 181.8956 
  0.7913815 184.6377 
  0.8019897 179.1942 
  0.8272967 177.4156 
  0.917188 173.9112 
  0.948779 178.3027 
  0.9491397 179.1942 
  0.9660939 181.8956 
  0.948925 181.8956 
  0.9438759 183.7191 
  0.9082419 182.8051 
  0.8157918 182.8051 
  0.7726267 183.7191 
  0.7925529 184.6377 
  0.82323 185.5609 
  0.7710455 186.4887 
  0.7245523 187.4211 
  0.6725792 188.3582 
  0.6098945 190.2465 
  0.6053364 192.1537 
  0.5669799 194.0801 
  0.6425812 196.0257 
  0.5920788 198.9809 
  0.5982969 201.9805 
  0.5845767 206.0505 
  0.8673239 210.2025 
  0.8958703 213.3714 
  0.9322492 215.5104 
  0.9763456 216.5879 
  0.9871499 217.6709 
  0.9847117 216.5879 
  0.9803717 215.5104 
  0.9864718 214.4382 
  0.9891353 213.3714 
  0.9886542 212.3098 
  0.9887235 211.2535 
  0.9895412 211.2535 
  0.987739 210.2025 
  0.9813522 211.2535 
  0.979372 210.2025 
  0.9715059 211.2535 
  0.9659958 212.3098 
  0.9582934 213.3714 
  0.9457313 214.4382 
  0.9001306 214.4382 
  0.8375491 214.4382 
  0.8580287 213.3714 
  0.8090529 212.3098 
  0.7204515 211.2535 
  0.727868 211.2535 
  0.8134795 211.2535 
  0.7847017 212.3098 
  0.855688 212.3098 
  0.9308017 212.3098 
  0.8832355 211.2535 
  0.7976366 210.2025 
  0.6125828 208.1162 
  0.7335693 206.0505 
  0.5974885 204.0054 
  0.5784881 201.9805 
  0.626056 199.9758 
  0.3227086 197.9909 
  0.4882276 196.0257 
  0.5797763 194.0801 
  0.6834005 191.1978 
  0.8976591 190.2465 
  0.9618267 188.3582 
  0.9768698 185.5609 
  0.9630268 181.8956 
  0.951674 178.3027 
  0.9476838 173.9112 
  0.9517156 172.185 
  0.9504529 169.6279 
  0.9870636 169.6279 
  0.9741296 168.784 
  0.9508372 168.784 
  0.90883 165.4501 
  0.8581914 161.3751 
  0.8267851 156.6175 
  0.6312259 150.4914 
  0.546525 146.7849 
  0.5260749 146.7849 
  0.4342186 153.5239 
  0.492891 160.5723 
  0.6401276 166.2773 
  0.7607003 168.784 
  0.8006893 169.6279 
  0.754485 169.6279 
  0.3563844 168.784 
  0.2301813 168.784 
  0.5213931 168.784 
  0.4302804 169.6279 
  0.203999 170.476 
  0.5803608 172.185 
  0.799207 173.9112 
  0.7838959 175.6547 
  0.6272278 177.4156 
  0.5649745 179.1942 
  0.1334038 180.9906 
  0.4665532 182.8051 
  0.7017906 184.6377 
  0.574991 186.4887 
  0.1776394 188.3582 
  0.7058933 190.2465 
  0.7285945 192.1537 
  0.5696192 194.0801 
  0.5714518 196.0257 
  0.8047431 198.9809 
  0.7406042 200.9756 
  0.3201705 202.9904 
  0.7420282 205.0254 
  0.8154832 207.0808 
  0.7317356 209.1568 
  0.3067544 211.2535 
  0.8014479 213.3714 
  0.8102676 215.5104 
  0.4439816 217.6709 
  0.8234195 219.853 
  0.7885534 222.057 
  0.4245847 224.2832 
  0.00418177 226.5316 
  0.3035149 228.8026 
  0.6538261 231.0963 
  0.847946 233.4131 
  0.6219971 235.753 
  0.858582 236.9318 
  0.8284138 239.307 
  0.4044586 240.5036 
  0.7807371 241.7061 
  0.6421613 242.9146 
  0.4366452 244.1292 
  0.8172922 245.3498 
  0.7077252 246.5766 
  0.7137164 247.8095 
  0.8416377 249.0485 
  0.8084357 250.2938 
  0.6429872 251.5452 
  0.8479958 252.8029 
  0.8376269 254.067 
  0.5148344 255.3373 
  0.8253111 256.614 
  0.792549 257.8971 
  0.7535735 259.1866 
  0.08307214 260.4825 
  0.7227954 261.7849 
  0.7564813 264.4093 
  0.4621202 265.7313 
  0.4633192 267.06 
  0.7247673 268.3953 
  0.7581604 269.7372 
  0.5511292 271.0859 
  0.9282034 272.4414 
  0.9706463 271.0859 
  0.9854098 273.8036 
  0.9971125 273.8036 
  0.9954729 272.4414 
  0.9888761 269.7372 
  0.9950762 267.06 
  0.9968371 269.7372 
  0.9980098 269.7372 
  0.997749 268.3953 
  0.9915248 265.7313 
  0.9653835 259.1866 
  0.9663866 256.614 
  0.9707376 259.1866 
  0.9852746 263.0938 
  0.982106 265.7313 
  0.9469596 267.06 
  0.9675582 264.4093 
  0.9636477 261.7849 
  0.9427549 257.8971 
  0.7895892 251.5452 
  0.8654758 245.3498 
  0.5468963 235.753 
  0.683558 220.9523 
  0.8124123 211.2535 
  0.9160304 205.0254 
  0.9279389 197.0059 
  0.9523526 191.1978 
  0.9704132 186.4887 
  0.9692958 185.5609 
  0.9680378 184.6377 
  0.9765167 184.6377 
  0.9860514 182.8051 
  0.9883636 182.8051 
  0.9909548 181.8956 
  0.991558 183.7191 
  0.9909708 185.5609 
  0.9903755 187.4211 
  0.9908676 189.3 
  0.9932029 191.1978 
  0.9957952 191.1978 
  0.9942914 191.1978 
  0.9921359 189.3 
  0.9943052 187.4211 
  0.9959483 186.4887 
  0.995445 188.3582 
  0.9905357 190.2465 
  0.9810765 189.3 
  0.9172173 189.3 
  0.8372061 189.3 
  0.7548154 189.3 
  0.7888815 189.3 
  0.7481033 187.4211 
  0.598231 181.8956 
  0.4594265 180.9906 
  0.3907863 181.8956 
  0.3352165 180.9906 
  0.1176405 178.3027 
  0.07078327 173.046 
  -0.04527154 169.6279 
  -0.1348021 168.784 
  0.04466751 167.1087 
  0.09953013 164.6269 
  0.0682964 162.182 
  0.06978174 163.8079 
  0.1322864 165.4501 
  0.2013361 169.6279 
  0.2920707 174.7808 
  0.3333177 176.533 
  0.3332107 176.533 
  0.3530797 177.4156 
  0.4650808 179.1942 
  0.5486825 180.9906 
  0.4580497 182.8051 
  0.4908407 184.6377 
  0.4697383 186.4887 
  0.3304503 188.3582 
  0.3914385 190.2465 
  0.6257187 192.1537 
  0.3618582 195.0505 
  0.1043633 197.9909 
  0.3943349 200.9756 
  0.5306126 204.0054 
  0.1373097 207.0808 
  0.2010625 210.2025 
  0.3670651 213.3714 
  0.3145425 216.5879 
  0.8130425 219.853 
  0.8798357 222.057 
  0.9403205 223.1673 
  0.97297 224.2832 
  0.9836287 223.1673 
  0.9911686 222.057 
  0.9915301 220.9523 
  0.9894016 219.853 
  0.9908847 218.7592 
  0.9930059 217.6709 
  0.9904228 217.6709 
  0.962741 213.3714 
  0.9670218 209.1568 
  0.9779282 206.0505 
  0.9113222 202.9904 
  0.863717 196.0257 
  0.7813085 193.1145 
  0.7102375 197.0059 
  0.6673803 201.9805 
  0.7775867 202.9904 
  0.8043374 204.0054 
  0.7297218 200.9756 
  0.5714095 197.0059 
  0.5821126 194.0801 
  0.7140544 193.1145 
  0.7262678 194.0801 
  0.7389845 195.0505 
  0.7598057 196.0257 
  0.7466297 197.9909 
  0.6394933 199.9758 
  0.6129577 201.9805 
  0.672642 202.9904 
  0.715472 202.9904 
  0.7394825 202.9904 
  0.5446303 202.9904 
  0.7562882 202.9904 
  0.7478679 202.9904 
  0.3738182 201.9805 
  0.7470279 200.9756 
  0.7631992 199.9758 
  0.4866134 198.9809 
  0.6727687 197.9909 
  0.6740197 197.0059 
  0.5121869 196.0257 
  0.5960711 195.0505 
  0.4775652 194.0801 
  0.7727789 193.1145 
  0.7569727 192.1537 
  0.3645771 190.2465 
  0.7480979 188.3582 
  0.669458 186.4887 
  0.06009803 184.6377 
  -0.2068706 182.8051 
  0.2833839 180.9906 
  0.6422815 179.1942 
  0.3581112 177.4156 
  0.2500266 175.6547 
  0.3498747 173.9112 
  0.569603 172.185 
  0.097353 170.476 
  0.3730714 168.784 
  -0.1027659 167.1087 
  -0.1343261 165.4501 
  -0.1674749 163.8079 
  -0.09346908 162.182 
  0.02017644 160.5723 
  0.01829195 158.9785 
  0.2345988 157.4006 
  0.254218 155.8383 
  0.4619125 154.2915 
  0.2792292 165.4501 
  0.6047359 176.533 
  0.7835823 178.3027 
  0.8670148 175.6547 
  0.9359789 173.9112 
  0.9631585 175.6547 
  0.9587292 176.533 
  0.9695755 176.533 
  0.9835575 179.1942 
  0.9900097 179.1942 
  0.9886937 180.0902 
  0.9895495 180.9906 
  0.9944302 183.7191 
  0.9966992 183.7191 
  0.9946688 186.4887 
  0.992237 189.3 
  0.9917749 191.1978 
  0.9863276 192.1537 
  0.9785607 192.1537 
  0.9806378 190.2465 
  0.9924863 189.3 
  0.99017 188.3582 
  0.9820646 187.4211 
  0.9470521 188.3582 
  0.8216711 187.4211 
  0.7153458 190.2465 
  0.7442878 193.1145 
  0.5942695 196.0257 
  0.5665525 197.0059 
  0.535006 195.0505 
  0.6266476 193.1145 
  0.6059493 194.0801 
  0.6425897 198.9809 
  0.8118222 206.0505 
  0.8551767 210.2025 
  0.8889339 211.2535 
  0.947816 211.2535 
  0.9787855 209.1568 
  0.988611 208.1162 
  0.987321 207.0808 
  0.9843316 205.0254 
  0.9737096 204.0054 
  0.9765185 202.9904 
  0.9856598 202.9904 
  0.9898718 204.0054 
  0.9886413 204.0054 
  0.989678 202.9904 
  0.9851701 200.9756 
  0.9817263 198.9809 
  0.9766667 196.0257 
  0.9717003 194.0801 
  0.9652438 193.1145 
  0.9332103 193.1145 
  0.9138572 194.0801 
  0.8846416 194.0801 
  0.9482771 195.0505 
  0.9305538 196.0257 
  0.8390872 197.0059 
  0.7701216 197.9909 
  0.7264634 197.9909 
  0.7708728 197.9909 
  0.8309863 197.9909 
  0.8339165 197.9909 
  0.723191 197.0059 
  0.7247626 196.0257 
  0.608819 195.0505 
  0.3213559 194.0801 
  0.2234745 193.1145 
  -0.02573975 191.1978 
  0.05725723 189.3 
  0.218053 187.4211 
  0.4777128 185.5609 
  0.5337029 182.8051 
  0.2318052 180.0902 
  0.4889631 177.4156 
  0.08109105 174.7808 
  0.7427355 172.185 
  0.8433576 171.3284 
  0.938632 170.476 
  0.9607551 168.784 
  0.9642593 167.1087 
  0.963613 166.2773 
  0.9651926 167.1087 
  0.9591027 168.784 
  0.9754831 172.185 
  0.9820764 176.533 
  0.9875088 180.0902 
  0.9851621 181.8956 
  0.9825414 183.7191 
  0.9723955 185.5609 
  0.9567259 187.4211 
  0.8835074 189.3 
  0.8628154 192.1537 
  0.9157982 195.0505 
  0.5938084 198.9809 
  0.6943321 209.1568 
  0.7714629 212.3098 
  0.8662375 211.2535 
  0.849728 205.0254 
  0.7945137 196.0257 
  0.7933522 188.3582 
  0.6368725 178.3027 
  0.5897154 167.1087 
  0.3229582 158.1876 
  0.2969382 144.605 
  0.3351896 141.7487 
  0.384491 146.0546 
  0.04096958 151.2439 
  0.1369363 172.185 
  0.3727793 180.9906 
  0.4194036 183.7191 
  0.4396896 185.5609 
  0.5275242 185.5609 
  0.2498598 186.4887 
  0.05898202 190.2465 
  0.04901373 195.0505 
  0.1100158 197.9909 
  0.2187118 198.9809 
  0.4847157 197.9909 
  0.4972816 197.9909 
  0.5150168 198.9809 
  0.5341532 200.9756 
  0.2631531 202.9904 
  0.1344986 205.0254 
  0.1769232 207.0808 
  0.2019505 209.1568 
  0.3217678 211.2535 
  0.366561 213.3714 
  0.3891162 215.5104 
  0.3456501 217.6709 
  0.3461981 219.853 
  0.1349857 222.057 
  0.06558777 224.2832 
  0.397542 226.5316 
  0.697022 228.8026 
  0.4396819 231.0963 
  0.7819827 233.4131 
  0.7121504 235.753 
  0.2208821 238.1164 
  0.7816582 240.5036 
  0.7002975 242.9146 
  0.03593514 245.3498 
  0.1999688 247.8095 
  0.6519101 249.0485 
  0.5185443 251.5452 
  0.9164662 254.067 
  0.9069665 255.3373 
  0.9549389 254.067 
  0.9885621 252.8029 
  0.9925307 251.5452 
  0.9942389 249.0485 
  0.996495 249.0485 
  0.9955549 247.8095 
  0.9936098 246.5766 
  0.980476 245.3498 
  0.9671 246.5766 
  0.9642795 249.0485 
  0.9722587 249.0485 
  0.974653 249.0485 
  0.9627596 247.8095 
  0.9594576 247.8095 
  0.9352374 247.8095 
  0.9516878 246.5766 
  0.9676836 245.3498 
  0.9551129 246.5766 
  0.9023392 249.0485 
  0.8472683 251.5452 
  0.8856258 254.067 
  0.8884159 255.3373 
  0.9254211 255.3373 
  0.9612991 254.067 
  0.9673759 250.2938 
  0.9728466 245.3498 
  0.967782 242.9146 
  0.9654288 240.5036 
  0.9444144 238.1164 
  0.9269778 236.9318 
  0.9488637 236.9318 
  0.9771202 236.9318 
  0.9830479 235.753 
  0.97499 234.5801 
  0.968623 233.4131 
  0.9521769 233.4131 
  0.9390221 233.4131 
  0.9339334 233.4131 
  0.887956 232.2518 
  0.9046763 231.0963 
  0.8827981 229.9466 
  0.8236388 231.0963 
  0.7853005 231.0963 
  0.8529194 229.9466 
  0.8421693 227.6642 
  0.880672 224.2832 
  0.8143209 219.853 
  0.8765387 215.5104 
  0.8224545 210.2025 
  0.5476995 204.0054 
  0.3984698 197.0059 
  0.2524756 189.3 
  0.0114817 180.9906 
  0.2040667 172.185 
  -0.1669366 163.8079 
  0.504205 154.2915 
  0.7180874 151.2439 
  0.8632782 151.2439 
  0.8973259 150.4914 
  0.9185986 151.2439 
  0.7587776 154.2915 
  0.7147576 159.7734 
  0.7589291 165.4501 
  0.8840488 171.3284 
  0.972356 175.6547 
  0.9816653 179.1942 
  0.988443 180.9906 
  0.9923041 181.8956 
  0.9943824 182.8051 
  0.9964477 183.7191 
  0.9936554 181.8956 
  0.9887792 180.0902 
  0.9771134 175.6547 
  0.9618319 170.476 
  0.9328524 167.9442 
  0.819974 168.784 
  0.8450117 170.476 
  0.8293109 171.3284 
  0.6963592 171.3284 
  0.6338254 164.6269 
  0.7285098 158.9785 
  0.7664092 154.2915 
  0.8064316 152.0001 
  0.9278697 152.0001 
  0.9533199 151.2439 
  0.9601526 149.7427 
  0.9562845 148.9977 
  0.963026 149.7427 
  0.9602929 149.7427 
  0.8744306 150.4914 
  0.5927675 154.2915 
  0.3564559 159.7734 
  0.2405773 164.6269 
  0.2860079 169.6279 
  0.6529107 173.9112 
  0.835753 176.533 
  0.8849658 179.1942 
  0.875807 180.9906 
  0.823069 182.8051 
  0.6626108 184.6377 
  0.2836408 187.4211 
  0.6051244 190.2465 
  0.7253854 193.1145 
  0.765985 196.0257 
  0.8194041 198.9809 
  0.6263421 201.9805 
  0.8017131 205.0254 
  0.682602 209.1568 
  0.5036187 213.3714 
  0.5814967 218.7592 
  0.814868 224.2832 
  0.8269848 229.9466 
  0.7358219 235.753 
  0.4268066 241.7061 
  0.8835183 247.8095 
  0.8267587 254.067 
  0.5491355 259.1866 
  0.8746379 264.4093 
  0.8389229 269.7372 
  0.6826822 275.1726 
  0.9028191 280.7175 
  0.8286572 286.3741 
  0.4525082 292.1447 
  0.7583002 298.0316 
  0.9073849 304.037 
  0.9051184 310.1635 
  0.64346 316.4135 
  0.9253378 322.7894 
  0.9132795 329.2938 
  0.6598362 334.2579 
  0.9356085 339.2969 
  0.9236958 344.4118 
  0.8228285 349.6039 
  0.9376769 354.8742 
  0.9356694 360.224 
  0.9184375 365.6544 
  0.9058462 371.1667 
  0.9012623 374.8877 
  0.9611984 378.6459 
  0.9567629 382.4419 
  0.7675866 384.3541 
  0.9645271 386.2758 
  0.9546855 388.2072 
  0.7844603 388.2072 
  0.9640375 388.2072 
  0.9452955 388.2072 
  0.5753929 388.2072 
  0.394645 388.2072 
  0.5464906 388.2072 
  0.7876784 388.2072 
  0.7982122 388.2072 
  0.5932955 388.2072 
  0.4936025 388.2072 
  0.7250561 388.2072 
  0.6559195 388.2072 
  0.7656435 388.2072 
  0.9420093 388.2072 
  0.7749707 388.2072 ]


================================================
FILE: audio/tools/setup_helpers/__init__.py
================================================
from .extension import *


================================================
FILE: audio/tools/setup_helpers/extension.py
================================================
import os
import platform
import subprocess
from pathlib import Path

import distutils.sysconfig
from setuptools import Extension
from setuptools.command.build_ext import build_ext

__all__ = [
    "get_ext_modules",
    "CMakeBuild",
]

_THIS_DIR = Path(__file__).parent.resolve()
_ROOT_DIR = _THIS_DIR.parent.parent.resolve()
_PADDLESPEECH_DIR = _ROOT_DIR / "paddleaudio"


def _get_build(var, default=False):
    if var not in os.environ:
        return default

    val = os.environ.get(var, "0")
    trues = ["1", "true", "TRUE", "on", "ON", "yes", "YES"]
    falses = ["0", "false", "FALSE", "off", "OFF", "no", "NO"]
    if val in trues:
        return True
    if val not in falses:
        print(f"WARNING: Unexpected environment variable value `{var}={val}`. "
              f"Expected one of {trues + falses}")
    return False


_BUILD_SOX = False if platform.system() == "Windows" else _get_build(
    "BUILD_SOX", True)
_BUILD_MAD = _get_build("BUILD_MAD", False)
_BUILD_KALDI = False if platform.system() == "Windows" else _get_build(
    "BUILD_KALDI", True)
_PADDLESPEECH_CUDA_ARCH_LIST = os.environ.get("PADDLESPEECH_CUDA_ARCH_LIST",
                                              None)

def get_ext_modules():
    if platform.system() == "Windows":
        return []
    modules = [
        Extension(name="paddleaudio.lib.libpaddleaudio", sources=[]),
        Extension(name="paddleaudio._paddleaudio", sources=[]),
    ]
    return modules


# Based off of
# https://github.com/pybind/cmake_example/blob/580c5fd29d4651db99d8874714b07c0c49a53f8a/setup.py
class CMakeBuild(build_ext):
    def run(self):
        try:
            subprocess.check_output(["cmake", "--version"])
        except OSError:
            raise RuntimeError("CMake is not available.") from None
        super().run()

    def build_extension(self, ext):
        # Since two library files (libpaddleaudio and _paddleaudio) need to be
        # recognized by setuptools, we instantiate `Extension` twice. (see `get_ext_modules`)
        # This leads to the situation where this `build_extension` method is called twice.
        # However, the following `cmake` command will build all of them at the same time,
        # so, we do not need to perform `cmake` twice.
        # Therefore we call `cmake` only for `paddleaudio._paddleaudio`.
        if ext.name != "paddleaudio._paddleaudio":
            return

        extdir = os.path.abspath(
            os.path.dirname(self.get_ext_fullpath(ext.name)))

        # required for auto-detection of auxiliary "native" libs
        if not extdir.endswith(os.path.sep):
            extdir += os.path.sep

        cfg = "Debug" if self.debug else "Release"

        cmake_args = [
            f"-DCMAKE_BUILD_TYPE={cfg}",
            # f"-DCMAKE_PREFIX_PATH={torch.utils.cmake_prefix_path}",
            f"-DCMAKE_INSTALL_PREFIX={extdir}",
            "-DCMAKE_VERBOSE_MAKEFILE=ON",
            f"-DPYTHON_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
            #f"-DPYTHON_LIBRARY={distutils.sysconfig.get_config_var('LIBDIR')}",
            f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
            f"-DBUILD_MAD:BOOL={'ON' if _BUILD_MAD else 'OFF'}",
            f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
            # f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
            # f"-DBUILD_CTC_DECODER:BOOL={'ON' if _BUILD_CTC_DECODER else 'OFF'}",
            "-DBUILD_PADDLEAUDIO_PYTHON_EXTENSION:BOOL=ON",
            # f"-DUSE_ROCM:BOOL={'ON' if _USE_ROCM else 'OFF'}",
            # f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}",
            # f"-DUSE_OPENMP:BOOL={'ON' if _USE_OPENMP else 'OFF'}",
            # f"-DUSE_FFMPEG:BOOL={'ON' if _USE_FFMPEG else 'OFF'}",
        ]
        build_args = ["--target", "install"]
        # Pass CUDA architecture to cmake
        if _PADDLESPEECH_CUDA_ARCH_LIST is not None:
            # Convert MAJOR.MINOR[+PTX] list to new style one
            # defined at https://cmake.org/cmake/help/latest/prop_tgt/CUDA_ARCHITECTURES.html
            _arches = _PADDLESPEECH_CUDA_ARCH_LIST.replace(".", "").replace(
                " ", ";").split(";")
            _arches = [
                arch[:-4] if arch.endswith("+PTX") else f"{arch}-real"
                for arch in _arches
            ]
            cmake_args += [f"-DCMAKE_CUDA_ARCHITECTURES={';'.join(_arches)}"]

        # Default to Ninja
        if "CMAKE_GENERATOR" not in os.environ or platform.system() == "Windows":
            cmake_args += ["-GNinja"]

        if platform.system() == "Windows":
            import sys

            python_version = sys.version_info
            cmake_args += [
                "-DCMAKE_C_COMPILER=cl",
                "-DCMAKE_CXX_COMPILER=cl",
                f"-DPYTHON_VERSION={python_version.major}.{python_version.minor}",
            ]

        # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
        # across all generators.
        if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
            # self.parallel is a Python 3 only way to set parallel jobs by hand
            # using -j in the build_ext call, not supported by pip or PyPA-build.
            if hasattr(self, "parallel") and self.parallel:
                # CMake 3.12+ only.
                build_args += ["-j{}".format(self.parallel)]

        if not os.path.exists(self.build_temp):
            os.makedirs(self.build_temp)

        print(
            f"cmake {_ROOT_DIR} {' '.join(cmake_args)}, cwd={self.build_temp}")
        subprocess.check_call(
            ["cmake", str(_ROOT_DIR)] + cmake_args, cwd=self.build_temp)
        print(f"cmake --build . {' '.join(build_args)}, cwd={self.build_temp}")
        subprocess.check_call(
            ["cmake", "--build", "."] + build_args, cwd=self.build_temp)

    def get_ext_filename(self, fullname):
        ext_filename = super().get_ext_filename(fullname)
        ext_filename_parts = ext_filename.split(".")
        without_abi = ext_filename_parts[:-2] + ext_filename_parts[-1:]
        ext_filename = ".".join(without_abi)
        return ext_filename


================================================
FILE: dataset/aishell/.gitignore
================================================
data_aishell*
*.meta
manifest.*
*.tgz
resource_aishell


================================================
FILE: dataset/aishell/aishell.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare Aishell mandarin dataset

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
from paddlespeech.dataset.aishell import aishell_main

if __name__ == '__main__':
    aishell_main()


================================================
FILE: dataset/aishell3/README.md
================================================
# [Aishell3](http://www.openslr.org/93/)

AISHELL-3 is a large-scale and high-fidelity multi-speaker Mandarin speech corpus which could be used to train multi-speaker Text-to-Speech (TTS) systems. The corpus contains roughly **85 hours** of emotion-neutral recordings spoken by 218 native Chinese mandarin speakers and total 88035 utterances. Their auxiliary attributes such as gender, age group and native accents are explicitly marked and provided in the corpus. Accordingly, transcripts in Chinese character-level and pinyin-level are provided along with the recordings. The  word & tone transcription accuracy rate is above 98%, through professional speech annotation and strict quality inspection for tone and prosody. ( This database is free for academic research, not in the commerce, if without permission. )


================================================
FILE: dataset/chime3_background/chime3_background.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare CHiME3 background data.

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import io
import json
import os
import zipfile

import soundfile
import wget
from paddle.v2.dataset.common import md5file

# DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
DATA_HOME = os.path.expanduser('.')

URL = "https://d4s.myairbridge.com/packagev2/AG0Y3DNBE5IWRRTV/?dlid=W19XG7T0NNHB027139H0EQ"
MD5 = "c3ff512618d7a67d4f85566ea1bc39ec"

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/chime3_background",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_filepath",
    default="manifest.chime3.background",
    type=str,
    help="Filepath for output manifests. (default: %(default)s)")
args = parser.parse_args()


def download(url, md5sum, target_dir, filename=None):
    """Download file from url to target_dir, and check md5sum."""
    if filename is None:
        filename = url.split("/")[-1]
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    filepath = os.path.join(target_dir, filename)
    if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
        print("Downloading %s ..." % url)
        wget.download(url, target_dir)
        print("\nMD5 Checksum %s ..." % filepath)
        if not md5file(filepath) == md5sum:
            raise RuntimeError("MD5 checksum failed.")
    else:
        print("File exists, skip downloading. (%s)" % filepath)
    return filepath


def unpack(filepath, target_dir):
    """Unpack the file to the target_dir."""
    print("Unpacking %s ..." % filepath)
    if filepath.endswith('.zip'):
        zip = zipfile.ZipFile(filepath, 'r')
        zip.extractall(target_dir)
        zip.close()
    elif filepath.endswith('.tar') or filepath.endswith('.tar.gz'):
        tar = zipfile.open(filepath)
        tar.extractall(target_dir)
        tar.close()
    else:
        raise ValueError("File format is not supported for unpacking.")


def create_manifest(data_dir, manifest_path):
    """Create a manifest json file summarizing the data set, with each line
    containing the meta data (i.e. audio filepath, transcription text, audio
    duration) of each audio file within the data set.
    """
    print("Creating manifest %s ..." % manifest_path)
    json_lines = []
    for subfolder, _, filelist in sorted(os.walk(data_dir)):
        for filename in filelist:
            if filename.endswith('.wav'):
                filepath = os.path.join(data_dir, subfolder, filename)
                audio_data, samplerate = soundfile.read(filepath)
                duration = float(len(audio_data)) / samplerate
                json_lines.append(
                    json.dumps(
                        {
                            'utt': os.path.splitext(os.path.basename(filepath))[
                                0],
                            'feat': filepath,
                            'feat_shape': (duration, ),  # second
                            'type': 'background'
                        }))
    with io.open(manifest_path, mode='w', encoding='utf8') as out_file:
        for line in json_lines:
            out_file.write(line + '\n')


def prepare_chime3(url, md5sum, target_dir, manifest_path):
    """Download, unpack and create summary manifest file."""
    if not os.path.exists(os.path.join(target_dir, "CHiME3")):
        # download
        filepath = download(url, md5sum, target_dir,
                            "myairbridge-AG0Y3DNBE5IWRRTV.zip")
        # unpack
        unpack(filepath, target_dir)
        unpack(
            os.path.join(target_dir, 'CHiME3_background_bus.zip'), target_dir)
        unpack(
            os.path.join(target_dir, 'CHiME3_background_caf.zip'), target_dir)
        unpack(
            os.path.join(target_dir, 'CHiME3_background_ped.zip'), target_dir)
        unpack(
            os.path.join(target_dir, 'CHiME3_background_str.zip'), target_dir)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    # create manifest json file
    create_manifest(target_dir, manifest_path)


def main():
    prepare_chime3(
        url=URL,
        md5sum=MD5,
        target_dir=args.target_dir,
        manifest_path=args.manifest_filepath)


if __name__ == '__main__':
    main()


================================================
FILE: dataset/gigaspeech/.gitignore
================================================
GigaSpeech/


================================================
FILE: dataset/gigaspeech/README.md
================================================
# [GigaSpeech](https://github.com/SpeechColab/GigaSpeech)

```
git clone https://github.com/SpeechColab/GigaSpeech.git

cd GigaSpeech
utils/gigaspeech_download.sh /disk1/audio_data/gigaspeech
toolkits/kaldi/gigaspeech_data_prep.sh --train-subset XL /disk1/audio_data/gigaspeech ../data
cd ..
```


================================================
FILE: dataset/gigaspeech/gigaspeech.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: dataset/gigaspeech/run.sh
================================================
#!/bin/bash

set -e

curdir=$PWD

test -d GigaSpeech || git clone https://github.com/SpeechColab/GigaSpeech.git


pushd GigaSpeech
source env_vars.sh
./utils/download_gigaspeech.sh ${curdir}/
#toolkits/kaldi/gigaspeech_data_prep.sh --train-subset XL /disk1/audio_data/gigaspeech ../data
popd


================================================
FILE: dataset/librispeech/.gitignore
================================================
dev-clean
dev-other
test-clean
test-other
train-clean-100
train-clean-360
train-other-500
*.meta
manifest.*


================================================
FILE: dataset/librispeech/librispeech.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare Librispeech ASR datasets.

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import codecs
import io
import json
import os
from multiprocessing.pool import Pool

import distutils.util
import soundfile

from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unpack
from paddlespeech.utils.argparse import strtobool

URL_ROOT = "http://openslr.elda.org/resources/12"
#URL_ROOT = "https://openslr.magicdatatech.com/resources/12"
URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"
URL_DEV_OTHER = URL_ROOT + "/dev-other.tar.gz"
URL_TRAIN_CLEAN_100 = URL_ROOT + "/train-clean-100.tar.gz"
URL_TRAIN_CLEAN_360 = URL_ROOT + "/train-clean-360.tar.gz"
URL_TRAIN_OTHER_500 = URL_ROOT + "/train-other-500.tar.gz"

MD5_TEST_CLEAN = "32fa31d27d2e1cad72775fee3f4849a9"
MD5_TEST_OTHER = "fb5a50374b501bb3bac4815ee91d3135"
MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1"
MD5_DEV_OTHER = "c8d0bcc9cca99d4f8b62fcc847357931"
MD5_TRAIN_CLEAN_100 = "2a93770f6d5c6c964bc36631d331a522"
MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa"
MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default='~/.cache/paddle/dataset/speech/libri',
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
parser.add_argument(
    "--full_download",
    default="True",
    type=strtobool,
    help="Download all datasets for Librispeech."
    " If False, only download a minimal requirement (test-clean, dev-clean"
    " train-clean-100). (default: %(default)s)")
args = parser.parse_args()


def create_manifest(data_dir, manifest_path):
    """Create a manifest json file summarizing the data set, with each line
    containing the meta data (i.e. audio filepath, transcription text, audio
    duration) of each audio file within the data set.
    """
    print("Creating manifest %s ..." % manifest_path)
    json_lines = []
    total_sec = 0.0
    total_char = 0.0
    total_num = 0

    for subfolder, _, filelist in sorted(os.walk(data_dir)):
        text_filelist = [
            filename for filename in filelist if filename.endswith('trans.txt')
        ]
        if len(text_filelist) > 0:
            text_filepath = os.path.join(subfolder, text_filelist[0])
            for line in io.open(text_filepath, encoding="utf8"):
                segments = line.strip().split()
                nchars = len(segments[1:])
                text = ' '.join(segments[1:]).lower()

                audio_filepath = os.path.abspath(
                    os.path.join(subfolder, segments[0] + '.flac'))
                audio_data, samplerate = soundfile.read(audio_filepath)
                duration = float(len(audio_data)) / samplerate

                utt = os.path.splitext(os.path.basename(audio_filepath))[0]
                utt2spk = '-'.join(utt.split('-')[:2])

                json_lines.append(
                    json.dumps({
                        'utt': utt,
                        'utt2spk': utt2spk,
                        'feat': audio_filepath,
                        'feat_shape': (duration, ),  # second
                        'text': text,
                    }))

                total_sec += duration
                total_char += nchars
                total_num += 1

    with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
        for line in json_lines:
            out_file.write(line + '\n')

    subset = os.path.splitext(manifest_path)[1][1:]
    manifest_dir = os.path.dirname(manifest_path)
    data_dir_name = os.path.split(data_dir)[-1]
    meta_path = os.path.join(manifest_dir, data_dir_name) + '.meta'
    with open(meta_path, 'w') as f:
        print(f"{subset}:", file=f)
        print(f"{total_num} utts", file=f)
        print(f"{total_sec / (60*60)} h", file=f)
        print(f"{total_char} char", file=f)
        print(f"{total_char / total_sec} char/sec", file=f)
        print(f"{total_sec / total_num} sec/utt", file=f)


def prepare_dataset(url, md5sum, target_dir, manifest_path):
    """Download, unpack and create summary manifest file.
    """
    if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
        # download
        filepath = download(url, md5sum, target_dir)
        # unpack
        unpack(filepath, target_dir)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    # create manifest json file
    create_manifest(target_dir, manifest_path)


def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    tasks = [
        (URL_TEST_CLEAN, MD5_TEST_CLEAN, os.path.join(args.target_dir,
                                                      "test-clean"),
         args.manifest_prefix + ".test-clean"),
        (URL_DEV_CLEAN, MD5_DEV_CLEAN, os.path.join(
            args.target_dir, "dev-clean"), args.manifest_prefix + ".dev-clean"),
    ]
    if args.full_download:
        tasks.extend([
            (URL_TRAIN_CLEAN_100, MD5_TRAIN_CLEAN_100,
             os.path.join(args.target_dir, "train-clean-100"),
             args.manifest_prefix + ".train-clean-100"),
            (URL_TEST_OTHER, MD5_TEST_OTHER, os.path.join(args.target_dir,
                                                          "test-other"),
             args.manifest_prefix + ".test-other"),
            (URL_DEV_OTHER, MD5_DEV_OTHER, os.path.join(args.target_dir,
                                                        "dev-other"),
             args.manifest_prefix + ".dev-other"),
            (URL_TRAIN_CLEAN_360, MD5_TRAIN_CLEAN_360,
             os.path.join(args.target_dir, "train-clean-360"),
             args.manifest_prefix + ".train-clean-360"),
            (URL_TRAIN_OTHER_500, MD5_TRAIN_OTHER_500,
             os.path.join(args.target_dir, "train-other-500"),
             args.manifest_prefix + ".train-other-500"),
        ])

    with Pool(7) as pool:
        pool.starmap(prepare_dataset, tasks)

    print("Data download and manifest prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: dataset/magicdata/README.md
================================================
# [MagicData](http://openslr.elda.org/68/)

MAGICDATA Mandarin Chinese Read Speech Corpus was developed by MAGIC DATA Technology Co., Ltd. and freely published for non-commercial use.
The contents and the corresponding descriptions of the corpus include:

* The corpus contains 755 hours of speech data, which is mostly mobile recorded data.
* 1080 speakers from different accent areas in China are invited to participate in the recording.
* The sentence transcription accuracy is higher than 98%.
* Recordings are conducted in a quiet indoor environment.
* The database is divided into training set, validation set, and testing set in a ratio of 51: 1: 2.
* Detail information such as speech data coding and speaker information is preserved in the metadata file.
* The domain of recording texts is diversified, including interactive Q&A, music search, SNS messages, home command and control, etc.
* Segmented transcripts are also provided.

The corpus aims to support researchers in speech recognition, machine translation, speaker recognition, and other speech-related fields. Therefore, the corpus is totally free for academic use.


================================================
FILE: dataset/mini_librispeech/.gitignore
================================================
dev-clean/
manifest.dev-clean
manifest.train-clean
train-clean/
*.meta


================================================
FILE: dataset/mini_librispeech/mini_librispeech.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare Librispeech ASR datasets.

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import codecs
import io
import json
import os
from multiprocessing.pool import Pool

import soundfile

from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unpack

URL_ROOT = "http://openslr.elda.org/resources/31"
URL_TRAIN_CLEAN = URL_ROOT + "/train-clean-5.tar.gz"
URL_DEV_CLEAN = URL_ROOT + "/dev-clean-2.tar.gz"

MD5_TRAIN_CLEAN = "5df7d4e78065366204ca6845bb08f490"
MD5_DEV_CLEAN = "6d7ab67ac6a1d2c993d050e16d61080d"

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default='~/.cache/paddle/dataset/speech/libri',
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()


def create_manifest(data_dir, manifest_path):
    """Create a manifest json file summarizing the data set, with each line
    containing the meta data (i.e. audio filepath, transcription text, audio
    duration) of each audio file within the data set.
    """
    print("Creating manifest %s ..." % manifest_path)
    json_lines = []
    total_sec = 0.0
    total_text = 0.0
    total_num = 0

    for subfolder, _, filelist in sorted(os.walk(data_dir)):
        text_filelist = [
            filename for filename in filelist if filename.endswith('trans.txt')
        ]
        if len(text_filelist) > 0:
            text_filepath = os.path.join(subfolder, text_filelist[0])
            for line in io.open(text_filepath, encoding="utf8"):
                segments = line.strip().split()
                text = ' '.join(segments[1:]).lower()
                audio_filepath = os.path.join(subfolder, segments[0] + '.flac')
                audio_data, samplerate = soundfile.read(audio_filepath)
                duration = float(len(audio_data)) / samplerate

                utt = os.path.splitext(os.path.basename(audio_filepath))[0]
                utt2spk = '-'.join(utt.split('-')[:2])
                json_lines.append(
                    json.dumps({
                        'utt': utt,
                        'utt2spk': utt2spk,
                        'feat': audio_filepath,
                        'feat_shape': (duration, ),  #second
                        'text': text,
                    }))

                total_sec += duration
                total_text += len(text)
                total_num += 1

    with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
        for line in json_lines:
            out_file.write(line + '\n')

    subset = os.path.splitext(manifest_path)[1][1:]
    manifest_dir = os.path.dirname(manifest_path)
    data_dir_name = os.path.split(data_dir)[-1]
    meta_path = os.path.join(manifest_dir, data_dir_name) + '.meta'
    with open(meta_path, 'w') as f:
        print(f"{subset}:", file=f)
        print(f"{total_num} utts", file=f)
        print(f"{total_sec / (60*60)} h", file=f)
        print(f"{total_text} text", file=f)
        print(f"{total_text / total_sec} text/sec", file=f)
        print(f"{total_sec / total_num} sec/utt", file=f)


def prepare_dataset(url, md5sum, target_dir, manifest_path):
    """Download, unpack and create summary manifest file.
    """
    if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
        # download
        filepath = download(url, md5sum, target_dir)
        # unpack
        unpack(filepath, target_dir)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    # create manifest json file
    create_manifest(target_dir, manifest_path)


def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    tasks = [
        (URL_TRAIN_CLEAN, MD5_TRAIN_CLEAN,
         os.path.join(args.target_dir, "train-clean"),
         args.manifest_prefix + ".train-clean"),
        (URL_DEV_CLEAN, MD5_DEV_CLEAN, os.path.join(
            args.target_dir, "dev-clean"), args.manifest_prefix + ".dev-clean"),
    ]

    with Pool(2) as pool:
        pool.starmap(prepare_dataset, tasks)

    print("Data download and manifest prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: dataset/multi_cn/README.md
================================================
# multi-cn

This is a Chinese speech recognition recipe that trains on all Chinese corpora on OpenSLR, including:

* Aidatatang (140 hours)
* Aishell (151 hours)
* MagicData (712 hours)
* Primewords (99 hours)
* ST-CMDS (110 hours)
* THCHS-30 (26 hours)
* optional AISHELL2 (~1000 hours) if available


================================================
FILE: dataset/primewords/README.md
================================================
# [Primewords](http://openslr.elda.org/47/)

This free Chinese Mandarin speech corpus set is released by Shanghai Primewords Information Technology Co., Ltd.
The corpus is recorded by smart mobile phones from 296 native Chinese speakers. The transcription accuracy is larger than 98%, at the confidence level of 95%. It is free for academic use.

The mapping between the transcript and utterance is given in JSON format.


================================================
FILE: dataset/rir_noise/.gitignore
================================================
RIRS_NOISES/
manifest.pointsource_noises
manifest.real_rirs_isotropic_noises
manifest.simulated_rirs
rirs_noises.zip


================================================
FILE: dataset/rir_noise/rir_noise.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare Aishell mandarin dataset

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import codecs
import json
import os

import soundfile

from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unzip

DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')

URL_ROOT = '--no-check-certificate https://us.openslr.org/resources/28/rirs_noises.zip'
DATA_URL = URL_ROOT + '/rirs_noises.zip'
MD5_DATA = 'e6f48e257286e05de56413b4779d8ffb'

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/rirs_noise",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()


def create_manifest(data_dir, manifest_path_prefix):
    print("Creating manifest %s ..." % manifest_path_prefix)
    json_lines = []
    data_types = [
        'pointsource_noises', 'real_rirs_isotropic_noises', 'simulated_rirs'
    ]
    for dtype in data_types:
        del json_lines[:]
        audio_dir = os.path.join(data_dir, dtype)
        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
            for fname in filelist:
                audio_path = os.path.join(subfolder, fname)
                if not audio_path.endswith('.wav'):
                    continue
                audio_data, samplerate = soundfile.read(audio_path)
                duration = float(len(audio_data) / samplerate)
                json_lines.append(
                    json.dumps(
                        {
                            'utt':
                            os.path.splitext(os.path.basename(audio_path))[0],
                            'feat':
                            audio_path,
                            'feat_shape': (duration, ),  #second
                            'type':
                            dtype,
                        },
                        ensure_ascii=False))
        manifest_path = manifest_path_prefix + '.' + dtype

        if not os.path.exists(os.path.dirname(manifest_path)):
            os.makedirs(os.path.dirname(manifest_path))

        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
            for line in json_lines:
                fout.write(line + '\n')


def prepare_dataset(url, md5sum, target_dir, manifest_path):
    """Download, unzip and create manifest file."""
    data_dir = os.path.join(target_dir, 'RIRS_NOISES')
    if not os.path.exists(data_dir):
        filepath = download(url, md5sum, target_dir)
        unzip(filepath, target_dir)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    create_manifest(data_dir, manifest_path)


def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    prepare_dataset(
        url=DATA_URL,
        md5sum=MD5_DATA,
        target_dir=args.target_dir,
        manifest_path=args.manifest_prefix)


if __name__ == '__main__':
    main()


================================================
FILE: dataset/st-cmds/README.md
================================================
# [FreeST](http://openslr.elda.org/38/)


================================================
FILE: dataset/tal_cs/README.md
================================================
# [TAL_CSASR](https://ai.100tal.com/dataset/)

This data set is TAL English class audio, including mixed Chinese and English speech. Each audio has only one speaker, and this data set has more than 100 speakers. (File 63.36G) This data contains the sample of intra sentence and inter sentence mixing. The ratio between Chinese characters and English words in the data is 13:1. 

- Total data: 587H (train_set: 555.9H, dev_set: 8H, test_set: 23.6H)
- Sample rate: 16000
- Sample bit: 16
- Recording device: microphone
- Speaker number: 200+
- Recording time: 2019
- Data format: audio: .wav; test: .txt
- Audio duration: 1-60s
- Data type: audio of English teachers' teaching


================================================
FILE: dataset/tal_cs/tal_cs.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare TALCS ASR datasets.

create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import codecs
import io
import json
import os

import soundfile

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()

TRAIN_SET = os.path.join(args.target_dir, "train_set")
DEV_SET = os.path.join(args.target_dir, "dev_set")
TEST_SET = os.path.join(args.target_dir, "test_set")

manifest_train_path = os.path.join(args.manifest_prefix, "manifest.train.raw")
manifest_dev_path = os.path.join(args.manifest_prefix, "manifest.dev.raw")
manifest_test_path = os.path.join(args.manifest_prefix, "manifest.test.raw")


def create_manifest(data_dir, manifest_path):
    """Create a manifest json file summarizing the data set, with each line
    containing the meta data (i.e. audio filepath, transcription text, audio
    duration) of each audio file within the data set.
    """
    print("Creating manifest %s ..." % manifest_path)
    json_lines = []
    total_sec = 0.0
    total_char = 0.0
    total_num = 0
    wav_dir = os.path.join(data_dir, 'wav')
    text_filepath = os.path.join(data_dir, 'label.txt')
    for subfolder, _, filelist in sorted(os.walk(wav_dir)):
        for line in io.open(text_filepath, encoding="utf8"):
            segments = line.strip().split()
            nchars = len(segments[1:])
            text = ' '.join(segments[1:]).lower()

            audio_filepath = os.path.abspath(
                os.path.join(subfolder, segments[0] + '.wav'))
            audio_data, samplerate = soundfile.read(audio_filepath)
            duration = float(len(audio_data)) / samplerate

            utt = os.path.splitext(os.path.basename(audio_filepath))[0]
            utt2spk = '-'.join(utt.split('-')[:2])

            json_lines.append(
                json.dumps({
                    'utt': utt,
                    'utt2spk': utt2spk,
                    'feat': audio_filepath,
                    'feat_shape': (duration, ),  # second
                    'text': text,
                }))

            total_sec += duration
            total_char += nchars
            total_num += 1

    with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
        for line in json_lines:
            out_file.write(line + '\n')

    subset = os.path.splitext(manifest_path)[1][1:]
    manifest_dir = os.path.dirname(manifest_path)
    data_dir_name = os.path.split(data_dir)[-1]
    meta_path = os.path.join(manifest_dir, data_dir_name) + '.meta'
    with open(meta_path, 'w') as f:
        print(f"{subset}:", file=f)
        print(f"{total_num} utts", file=f)
        print(f"{total_sec / (60*60)} h", file=f)
        print(f"{total_char} char", file=f)
        print(f"{total_char / total_sec} char/sec", file=f)
        print(f"{total_sec / total_num} sec/utt", file=f)


def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    create_manifest(TRAIN_SET, manifest_train_path)
    create_manifest(DEV_SET, manifest_dev_path)
    create_manifest(TEST_SET, manifest_test_path)
    print("Data download and manifest prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: dataset/ted_en_zh/.gitignore
================================================
*.tar.gz.*
manifest.*
*.md
EN-ZH/
train-split/
test-segment/

================================================
FILE: dataset/ted_en_zh/ted_en_zh.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare Ted-En-Zh speech translation dataset

Create manifest files from splited dataset. 
dev set: tst2010, test set: tst2015
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import codecs
import json
import os

import soundfile

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--src-dir",
    default="",
    type=str,
    help="Directory to kaldi splited data. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()


def create_manifest(data_dir, manifest_path_prefix):
    print("Creating manifest %s ..." % manifest_path_prefix)
    json_lines = []

    data_types_infos = [
        ('train', 'train-split/train-segment', 'En-Zh/train.en-zh'),
        ('dev', 'test-segment/tst2010', 'En-Zh/tst2010.en-zh'),
        ('test', 'test-segment/tst2015', 'En-Zh/tst2015.en-zh')
    ]
    for data_info in data_types_infos:
        dtype, audio_relative_dir, text_relative_path = data_info
        del json_lines[:]
        total_sec = 0.0
        total_text = 0.0
        total_num = 0

        text_path = os.path.join(data_dir, text_relative_path)
        audio_dir = os.path.join(data_dir, audio_relative_dir)

        for line in codecs.open(text_path, 'r', 'utf-8', errors='ignore'):
            line = line.strip()
            if len(line) < 1:
                continue
            audio_id, trancription, translation = line.split('\t')
            utt = audio_id.split('.')[0]

            audio_path = os.path.join(audio_dir, audio_id)
            if os.path.exists(audio_path):
                if os.path.getsize(audio_path) < 30000:
                    continue
                audio_data, samplerate = soundfile.read(audio_path)
                duration = float(len(audio_data) / samplerate)

                translation_str = " ".join(translation.split())
                trancription_str = " ".join(trancription.split())
                json_lines.append(
                    json.dumps(
                        {
                            'utt': utt,
                            'feat': audio_path,
                            'feat_shape': (duration, ),  # second
                            'text': [translation_str, trancription_str],
                        },
                        ensure_ascii=False))

                total_sec += duration
                total_text += len(translation.split())
                total_num += 1
                if not total_num % 1000:
                    print(dtype, 'Processed:', total_num)

        manifest_path = manifest_path_prefix + '.' + dtype + '.raw'
        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
            for line in json_lines:
                fout.write(line + '\n')


def prepare_dataset(src_dir, manifest_path=None):
    """create manifest file."""
    if os.path.isdir(manifest_path):
        manifest_path = os.path.join(manifest_path, 'manifest')
    if manifest_path:
        create_manifest(src_dir, manifest_path)


def main():
    if args.src_dir.startswith('~'):
        args.src_dir = os.path.expanduser(args.src_dir)

    prepare_dataset(src_dir=args.src_dir, manifest_path=args.manifest_prefix)

    print("manifest prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: dataset/thchs30/.gitignore
================================================
*.tgz
manifest.*
data_thchs30
resource
test-noise
*.meta


================================================
FILE: dataset/thchs30/README.md
================================================
# [THCHS30](http://openslr.elda.org/18/)

This is the *data part* of the `THCHS30 2015` acoustic data
& scripts dataset.

The dataset is described in more detail in the paper ``THCHS-30 : A Free
Chinese Speech Corpus`` by Dong Wang, Xuewei Zhang.

A paper (if it can be called a paper) 13 years ago regarding the database:

Dong Wang, Dalei Wu, Xiaoyan Zhu, ``TCMSD: A new Chinese Continuous Speech Database``,
International Conference on Chinese Computing (ICCC'01), 2001, Singapore.

The layout of this data pack is the following:

  ``data``
      ``*.wav``
        audio data

      ``*.wav.trn``  
        transcriptions

  ``{train,dev,test}``
    contain symlinks into the ``data`` directory for both audio and
    transcription files. Contents of these directories define the
    train/dev/test split of the data.

  ``{lm_word}``
       ``word.3gram.lm``
         trigram LM based on word
        ``lexicon.txt``
         lexicon based on word

   ``{lm_phone}``
       ``phone.3gram.lm``
         trigram LM based on phone
        ``lexicon.txt``
         lexicon based on phone

  ``README.TXT``
    this file


Data statistics
===============

Statistics for the data are as follows:

    ===========  ==========  ==========  ===========
    **dataset**  **audio**   **#sents**  **#words**
    ===========  ==========  ==========  ===========
        train        25        10,000      198,252
        dev         2:14         893        17,743
        test        6:15        2,495       49,085
    ===========  ==========  ==========  ===========


================================================
FILE: dataset/thchs30/thchs30.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare THCHS-30 mandarin dataset

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import codecs
import json
import os
from multiprocessing.pool import Pool
from pathlib import Path

import soundfile

from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unpack

DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')

URL_ROOT = 'http://openslr.elda.org/resources/18'
# URL_ROOT = 'https://openslr.magicdatatech.com/resources/18'
DATA_URL = URL_ROOT + '/data_thchs30.tgz'
TEST_NOISE_URL = URL_ROOT + '/test-noise.tgz'
RESOURCE_URL = URL_ROOT + '/resource.tgz'
MD5_DATA = '2d2252bde5c8429929e1841d4cb95e90'
MD5_TEST_NOISE = '7e8a985fb965b84141b68c68556c2030'
MD5_RESOURCE = 'c0b2a565b4970a0c4fe89fefbf2d97e1'

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/THCHS30",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()


def read_trn(filepath):
    """read trn file.
    word text in first line.
    syllable text in second line.
    phoneme text in third line.

    Args:
        filepath (str): trn path.

    Returns:
        list(str): (word, syllable, phone)
    """
    texts = []
    with open(filepath, 'r') as f:
        lines = f.read().strip().split('\n')
        assert len(lines) == 3, lines
    # character text, remove whitespace
    texts.append(''.join(lines[0].split()))
    texts.extend(lines[1:])
    return texts


def resolve_symlink(filepath):
    """resolve symlink which content is norm file.

    Args:
        filepath (str): norm file symlink.
    """
    sym_path = Path(filepath)
    relative_link = sym_path.read_text().strip()
    relative = Path(relative_link)
    relpath = sym_path.parent / relative
    return relpath.resolve()


def create_manifest(data_dir, manifest_path_prefix):
    print("Creating manifest %s ..." % manifest_path_prefix)
    json_lines = []
    data_types = ['train', 'dev', 'test']
    for dtype in data_types:
        del json_lines[:]
        total_sec = 0.0
        total_text = 0.0
        total_num = 0

        audio_dir = os.path.join(data_dir, dtype)
        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
            for fname in filelist:
                file_path = os.path.join(subfolder, fname)
                if file_path.endswith('.wav'):
                    audio_path = os.path.abspath(file_path)
                    text_path = resolve_symlink(audio_path + '.trn')
                else:
                    continue

                assert os.path.exists(audio_path) and os.path.exists(text_path)

                audio_id = os.path.basename(audio_path)[:-4]
                spk = audio_id.split('_')[0]

                word_text, syllable_text, phone_text = read_trn(text_path)
                audio_data, samplerate = soundfile.read(audio_path)
                duration = float(len(audio_data) / samplerate)

                # not dump alignment infos
                json_lines.append(
                    json.dumps(
                        {
                            'utt': audio_id,
                            'utt2spk': spk,
                            'feat': audio_path,
                            'feat_shape': (duration, ),  # second
                            'text': word_text,  # character
                            'syllable': syllable_text,
                            'phone': phone_text,
                        },
                        ensure_ascii=False))

                total_sec += duration
                total_text += len(word_text)
                total_num += 1

        manifest_path = manifest_path_prefix + '.' + dtype
        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
            for line in json_lines:
                fout.write(line + '\n')

        manifest_dir = os.path.dirname(manifest_path_prefix)
        meta_path = os.path.join(manifest_dir, dtype) + '.meta'
        with open(meta_path, 'w') as f:
            print(f"{dtype}:", file=f)
            print(f"{total_num} utts", file=f)
            print(f"{total_sec / (60*60)} h", file=f)
            print(f"{total_text} text", file=f)
            print(f"{total_text / total_sec} text/sec", file=f)
            print(f"{total_sec / total_num} sec/utt", file=f)


def prepare_dataset(url, md5sum, target_dir, manifest_path, subset):
    """Download, unpack and create manifest file."""
    datadir = os.path.join(target_dir, subset)
    if not os.path.exists(datadir):
        filepath = download(url, md5sum, target_dir)
        unpack(filepath, target_dir)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)

    if subset == 'data_thchs30':
        create_manifest(datadir, manifest_path)


def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    tasks = [
        (DATA_URL, MD5_DATA, args.target_dir, args.manifest_prefix,
         "data_thchs30"),
        (TEST_NOISE_URL, MD5_TEST_NOISE, args.target_dir, args.manifest_prefix,
         "test-noise"),
        (RESOURCE_URL, MD5_RESOURCE, args.target_dir, args.manifest_prefix,
         "resource"),
    ]
    with Pool(7) as pool:
        pool.starmap(prepare_dataset, tasks)

    print("Data download and manifest prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: dataset/timit/.gitignore
================================================
TIMIT.*
TIMIT
manifest.*
*.meta


================================================
FILE: dataset/timit/timit.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare Librispeech ASR datasets.

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import codecs
import json
import os
import re
import string
from pathlib import Path

import soundfile

from paddlespeech.dataset.download import unzip

URL_ROOT = ""
MD5_DATA = "45c68037c7fdfe063a43c851f181fb2d"

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default='~/.cache/paddle/dataset/speech/timit',
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()

#: A string containing Chinese punctuation marks (non-stops).
non_stops = (
    # Fullwidth ASCII variants
    '\uFF02\uFF03\uFF04\uFF05\uFF06\uFF07\uFF08\uFF09\uFF0A\uFF0B\uFF0C\uFF0D'
    '\uFF0F\uFF1A\uFF1B\uFF1C\uFF1D\uFF1E\uFF20\uFF3B\uFF3C\uFF3D\uFF3E\uFF3F'
    '\uFF40\uFF5B\uFF5C\uFF5D\uFF5E\uFF5F\uFF60'

    # Halfwidth CJK punctuation
    '\uFF62\uFF63\uFF64'

    # CJK symbols and punctuation
    '\u3000\u3001\u3003'

    # CJK angle and corner brackets
    '\u3008\u3009\u300A\u300B\u300C\u300D\u300E\u300F\u3010\u3011'

    # CJK brackets and symbols/punctuation
    '\u3014\u3015\u3016\u3017\u3018\u3019\u301A\u301B\u301C\u301D\u301E\u301F'

    # Other CJK symbols
    '\u3030'

    # Special CJK indicators
    '\u303E\u303F'

    # Dashes
    '\u2013\u2014'

    # Quotation marks and apostrophe
    '\u2018\u2019\u201B\u201C\u201D\u201E\u201F'

    # General punctuation
    '\u2026\u2027'

    # Overscores and underscores
    '\uFE4F'

    # Small form variants
    '\uFE51\uFE54'

    # Latin punctuation
    '\u00B7')

#: A string of Chinese stops.
stops = (
    '\uFF01'  # Fullwidth exclamation mark
    '\uFF1F'  # Fullwidth question mark
    '\uFF61'  # Halfwidth ideographic full stop
    '\u3002'  # Ideographic full stop
)

#: A string containing all Chinese punctuation.
punctuation = non_stops + stops


def tn(text):
    # lower text
    text = text.lower()
    # remove punc
    text = re.sub(f'[{punctuation}{string.punctuation}]', "", text)
    return text


def read_txt(filepath: str) -> str:
    with open(filepath, 'r') as f:
        line = f.read().strip().split(maxsplit=2)[2]
        return tn(line)


def read_algin(filepath: str) -> str:
    """read word or phone alignment file.
    <start-sample> <end-sample> <token><newline>
    
    Args:
        filepath (str): [description]

    Returns:
        str: token separate by <space>
    """
    aligns = []  # (start, end, token)
    with open(filepath, 'r') as f:
        for line in f:
            items = line.strip().split()
            # for phone: (Note: beginning and ending silence regions are marked with h#)
            if items[2].strip() == 'h#':
                continue
            aligns.append(items)
    return ' '.join([item[2] for item in aligns])


def create_manifest(data_dir, manifest_path_prefix):
    """Create a manifest json file summarizing the data set, with each line
    containing the meta data (i.e. audio filepath, transcription text, audio
    duration) of each audio file within the data set.
    """
    print("Creating manifest %s ..." % manifest_path_prefix)
    json_lines = []
    utts = set()

    data_types = ['TRAIN', 'TEST']
    for dtype in data_types:
        del json_lines[:]
        total_sec = 0.0
        total_text = 0.0
        total_num = 0

        audio_dir = Path(os.path.join(data_dir, dtype))
        for fname in sorted(audio_dir.rglob('*.WAV')):
            audio_path = fname.resolve()  # .WAV
            audio_id = audio_path.stem
            # if uttid exits,  then skipped
            if audio_id in utts:
                continue

            utts.add(audio_id)
            text_path = audio_path.with_suffix('.TXT')
            phone_path = audio_path.with_suffix('.PHN')
            word_path = audio_path.with_suffix('.WRD')

            audio_data, samplerate = soundfile.read(
                str(audio_path), dtype='int16')
            duration = float(len(audio_data) / samplerate)
            word_text = read_txt(text_path)
            phone_text = read_algin(phone_path)

            gender_spk = str(audio_path.parent.stem)
            spk = gender_spk[1:]
            gender = gender_spk[0]
            utt_id = '_'.join([spk, gender, audio_id])
            # not dump alignment infos
            json_lines.append(
                json.dumps(
                    {
                        'utt': utt_id,
                        'utt2spk': spk,
                        'utt2gender': gender,
                        'feat': str(audio_path),
                        'feat_shape': (duration, ),  # second
                        'text': word_text,  # word
                        'phone': phone_text,
                    },
                    ensure_ascii=False))

            total_sec += duration
            total_text += len(word_text.split())
            total_num += 1

        manifest_path = manifest_path_prefix + '.' + dtype.lower()
        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
            for line in json_lines:
                fout.write(line + '\n')

        manifest_dir = os.path.dirname(manifest_path_prefix)
        meta_path = os.path.join(manifest_dir, dtype.lower()) + '.meta'
        with open(meta_path, 'w') as f:
            print(f"{dtype}:", file=f)
            print(f"{total_num} utts", file=f)
            print(f"{total_sec / (60*60)} h", file=f)
            print(f"{total_text} text", file=f)
            print(f"{total_text / total_sec} text/sec", file=f)
            print(f"{total_sec / total_num} sec/utt", file=f)


def prepare_dataset(url, md5sum, target_dir, manifest_path):
    """Download, unpack and create summary manifest file.
    """
    filepath = os.path.join(target_dir, "TIMIT.zip")
    if not os.path.exists(filepath):
        print(f"Please download TIMIT.zip into {target_dir}.")
        raise FileNotFoundError

    if not os.path.exists(os.path.join(target_dir, "TIMIT")):
        # check md5sum
        assert check_md5sum(filepath, md5sum)
        # unpack
        unzip(filepath, target_dir)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    # create manifest json file
    create_manifest(os.path.join(target_dir, "TIMIT"), manifest_path)


def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    prepare_dataset(URL_ROOT, MD5_DATA, args.target_dir, args.manifest_prefix)
    print("Data download and manifest prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: dataset/timit/timit_kaldi_standard_split.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare TIMIT dataset (Standard split from Kaldi)

Create manifest files from splited dataset.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import codecs
import json
import os
from pathlib import Path

import soundfile

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--src_dir",
    default="",
    type=str,
    help="Directory to kaldi splited data. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()


def create_manifest(data_dir, manifest_path_prefix):
    print("Creating manifest %s ..." % manifest_path_prefix)
    json_lines = []

    data_types = ['train', 'dev', 'test']
    for dtype in data_types:
        del json_lines[:]
        total_sec = 0.0
        total_text = 0.0
        total_num = 0

        phn_path = os.path.join(data_dir, dtype + '.text')
        phn_dict = {}
        for line in codecs.open(phn_path, 'r', 'utf-8'):
            line = line.strip()
            if line == '':
                continue
            audio_id, text = line.split(' ', 1)
            phn_dict[audio_id] = text

        audio_dir = os.path.join(data_dir, dtype + '_sph.scp')
        for line in codecs.open(audio_dir, 'r', 'utf-8'):
            audio_id, audio_path = line.strip().split()
            # if no transcription for audio then raise error
            assert audio_id in phn_dict
            audio_data, samplerate = soundfile.read(audio_path)
            duration = float(len(audio_data) / samplerate)
            text = phn_dict[audio_id]

            gender_spk = str(Path(audio_path).parent.stem)
            spk = gender_spk[1:]
            gender = gender_spk[0]
            utt_id = '_'.join([spk, gender, audio_id])
            json_lines.append(
                json.dumps(
                    {
                        'utt': audio_id,
                        'utt2spk': spk,
                        'utt2gender': gender,
                        'feat': audio_path,
                        'feat_shape': (duration, ),  # second
                        'text': text
                    },
                    ensure_ascii=False))

            total_sec += duration
            total_text += len(text)
            total_num += 1

        manifest_path = manifest_path_prefix + '.' + dtype + '.raw'
        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
            for line in json_lines:
                fout.write(line + '\n')


def prepare_dataset(src_dir, manifest_path=None):
    """create manifest file."""
    if os.path.isdir(manifest_path):
        manifest_path = os.path.join(manifest_path, 'manifest')
    if manifest_path:
        create_manifest(src_dir, manifest_path)


def main():
    if args.src_dir.startswith('~'):
        args.src_dir = os.path.expanduser(args.src_dir)

    prepare_dataset(src_dir=args.src_dir, manifest_path=args.manifest_prefix)

    print("manifest prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: dataset/voxceleb/README.md
================================================
# [VoxCeleb](http://www.robots.ox.ac.uk/~vgg/data/voxceleb/)
VoxCeleb is an audio-visual dataset consisting of short clips of human speech, extracted from interview videos uploaded to YouTube。

VoxCeleb contains speech from speakers spanning a wide range of different ethnicities, accents, professions and ages.
All speaking face-tracks are captured "in the wild", with background chatter, laughter, overlapping speech, pose variation and different lighting conditions.
VoxCeleb consists of both audio and video. Each segment is at least 3 seconds long.

The dataset consists of two versions, VoxCeleb1 and VoxCeleb2. Each version has it's own train/test split. For each we provide YouTube URLs, face detections and tracks, audio files, cropped face videos and speaker meta-data. There is no overlap between the two versions.

more info in details refers to http://www.robots.ox.ac.uk/~vgg/data/voxceleb/


================================================
FILE: dataset/voxceleb/voxceleb1.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare VoxCeleb1 dataset

create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.

researchers should download the voxceleb1 dataset yourselves
through google form to get the username & password and unpack the data
"""
import argparse
import codecs
import glob
import json
import os
import subprocess
from pathlib import Path

import soundfile

from paddlespeech.dataset.download import check_md5sum
from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unzip

# all the data will be download in the current data/voxceleb directory default
DATA_HOME = os.path.expanduser('.')

# if you use the http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/ as the download base url
# you need to get the username & password via the google form

# if you use the https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a as the download base url,
# you need use --no-check-certificate to connect the target download url 

BASE_URL = "https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a"

# dev data
DEV_LIST = {
    "vox1_dev_wav_partaa": "e395d020928bc15670b570a21695ed96",
    "vox1_dev_wav_partab": "bbfaaccefab65d82b21903e81a8a8020",
    "vox1_dev_wav_partac": "017d579a2a96a077f40042ec33e51512",
    "vox1_dev_wav_partad": "7bb1e9f70fddc7a678fa998ea8b3ba19",
}
DEV_TARGET_DATA = "vox1_dev_wav_parta* vox1_dev_wav.zip ae63e55b951748cc486645f532ba230b"

# test data
TEST_LIST = {"vox1_test_wav.zip": "185fdc63c3c739954633d50379a3d102"}
TEST_TARGET_DATA = "vox1_test_wav.zip vox1_test_wav.zip 185fdc63c3c739954633d50379a3d102"

# voxceleb trial

TRIAL_BASE_URL = "https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/"
TRIAL_LIST = {
    "veri_test.txt": "29fc7cc1c5d59f0816dc15d6e8be60f7",  # voxceleb1
    "veri_test2.txt": "b73110731c9223c1461fe49cb48dddfc",  # voxceleb1(cleaned)
    "list_test_hard.txt": "21c341b6b2168eea2634df0fb4b8fff1",  # voxceleb1-H
    "list_test_hard2.txt":
    "857790e09d579a68eb2e339a090343c8",  # voxceleb1-H(cleaned)
    "list_test_all.txt": "b9ecf7aa49d4b656aa927a8092844e4a",  # voxceleb1-E
    "list_test_all2.txt":
    "a53e059deb562ffcfc092bf5d90d9f3a"  # voxceleb1-E(cleaned)
}

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/voxceleb1/",
    type=str,
    help="Directory to save the voxceleb1 dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")

args = parser.parse_args()


def create_manifest(data_dir, manifest_path_prefix):
    print(f"Creating manifest {manifest_path_prefix} from {data_dir}")
    json_lines = []
    data_path = os.path.join(data_dir, "wav", "**", "*.wav")
    total_sec = 0.0
    total_text = 0.0
    total_num = 0
    speakers = set()
    for audio_path in glob.glob(data_path, recursive=True):
        audio_id = "-".join(audio_path.split("/")[-3:])
        utt2spk = audio_path.split("/")[-3]
        duration = soundfile.info(audio_path).duration
        text = ""
        json_lines.append(
            json.dumps(
                {
                    "utt": audio_id,
                    "utt2spk": str(utt2spk),
                    "feat": audio_path,
                    "feat_shape": (duration, ),
                    "text": text  # compatible with asr data format
                },
                ensure_ascii=False))

        total_sec += duration
        total_text += len(text)
        total_num += 1
        speakers.add(utt2spk)

    # data_dir_name refer to dev or test
    # voxceleb1 is given explicit in the path
    data_dir_name = Path(data_dir).name
    manifest_path_prefix = manifest_path_prefix + "." + data_dir_name
    if not os.path.exists(os.path.dirname(manifest_path_prefix)):
        os.makedirs(os.path.dirname(manifest_path_prefix))

    with codecs.open(manifest_path_prefix, 'w', encoding='utf-8') as f:
        for line in json_lines:
            f.write(line + "\n")

    manifest_dir = os.path.dirname(manifest_path_prefix)
    meta_path = os.path.join(manifest_dir, "voxceleb1." +
                             data_dir_name) + ".meta"
    with codecs.open(meta_path, 'w', encoding='utf-8') as f:
        print(f"{total_num} utts", file=f)
        print(f"{len(speakers)} speakers", file=f)
        print(f"{total_sec / (60 * 60)} h", file=f)
        print(f"{total_text} text", file=f)
        print(f"{total_text / total_sec} text/sec", file=f)
        print(f"{total_sec / total_num} sec/utt", file=f)


def prepare_dataset(base_url, data_list, target_dir, manifest_path,
                    target_data):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # wav directory already exists, it need do nothing
    # we will download the voxceleb1 data to ${target_dir}/vox1/dev/ or ${target_dir}/vox1/test directory 
    if not os.path.exists(os.path.join(target_dir, "wav")):
        # download all dataset part
        print(f"start to download the vox1 zip package to {target_dir}")
        for zip_part in data_list.keys():
            download_url = " --no-check-certificate " + base_url + "/" + zip_part
            download(
                url=download_url,
                md5sum=data_list[zip_part],
                target_dir=target_dir)

        # pack the all part to target zip file
        all_target_part, target_name, target_md5sum = target_data.split()
        target_name = os.path.join(target_dir, target_name)
        if not os.path.exists(target_name):
            pack_part_cmd = "cat {}/{} > {}".format(target_dir, all_target_part,
                                                    target_name)
            subprocess.call(pack_part_cmd, shell=True)

        # check the target zip file md5sum
        if not check_md5sum(target_name, target_md5sum):
            raise RuntimeError("{} MD5 checksum failed".format(target_name))
        else:
            print("Check {} md5sum successfully".format(target_name))

        # unzip the all zip file
        if target_name.endswith(".zip"):
            unzip(target_name, target_dir)

    # create the manifest file
    create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path)


def prepare_trial(base_url, data_list, target_dir):
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    for trial, md5sum in data_list.items():
        target_trial = os.path.join(target_dir, trial)
        if not os.path.exists(os.path.join(target_dir, trial)):
            download_url = " --no-check-certificate " + base_url + "/" + trial
            download(url=download_url, md5sum=md5sum, target_dir=target_dir)


def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    # prepare the vox1 dev data
    prepare_dataset(
        base_url=BASE_URL,
        data_list=DEV_LIST,
        target_dir=os.path.join(args.target_dir, "dev"),
        manifest_path=args.manifest_prefix,
        target_data=DEV_TARGET_DATA)

    # prepare the vox1 test data
    prepare_dataset(
        base_url=BASE_URL,
        data_list=TEST_LIST,
        target_dir=os.path.join(args.target_dir, "test"),
        manifest_path=args.manifest_prefix,
        target_data=TEST_TARGET_DATA)

    # prepare the vox1 trial
    prepare_trial(
        base_url=TRIAL_BASE_URL,
        data_list=TRIAL_LIST,
        target_dir=os.path.dirname(args.manifest_prefix))

    print("Manifest prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: dataset/voxceleb/voxceleb2.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare VoxCeleb2 dataset

Download and unpack the voxceleb2 data files.
Voxceleb2 data is stored as the m4a format, 
so we need convert the m4a to wav with the convert.sh scripts
"""
import argparse
import codecs
import glob
import json
import os
import subprocess
from pathlib import Path

import soundfile

from paddlespeech.dataset.download import check_md5sum
from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unzip

# all the data will be download in the current data/voxceleb directory default
DATA_HOME = os.path.expanduser('.')

BASE_URL = "--no-check-certificate https://www.robots.ox.ac.uk/~vgg/data/voxceleb/data/"

# dev data
DEV_LIST = {
    "vox2_dev_aac_partaa": "da070494c573e5c0564b1d11c3b20577",
    "vox2_dev_aac_partab": "17fe6dab2b32b48abaf1676429cdd06f",
    "vox2_dev_aac_partac": "1de58e086c5edf63625af1cb6d831528",
    "vox2_dev_aac_partad": "5a043eb03e15c5a918ee6a52aad477f9",
    "vox2_dev_aac_partae": "cea401b624983e2d0b2a87fb5d59aa60",
    "vox2_dev_aac_partaf": "fc886d9ba90ab88e7880ee98effd6ae9",
    "vox2_dev_aac_partag": "d160ecc3f6ee3eed54d55349531cb42e",
    "vox2_dev_aac_partah": "6b84a81b9af72a9d9eecbb3b1f602e65",
}

DEV_TARGET_DATA = "vox2_dev_aac_parta* vox2_dev_aac.zip bbc063c46078a602ca71605645c2a402"

# test data
TEST_LIST = {"vox2_test_aac.zip": "0d2b3ea430a821c33263b5ea37ede312"}
TEST_TARGET_DATA = "vox2_test_aac.zip vox2_test_aac.zip 0d2b3ea430a821c33263b5ea37ede312"

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/voxceleb2/",
    type=str,
    help="Directory to save the voxceleb1 dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
parser.add_argument(
    "--download",
    default=False,
    action="store_true",
    help="Download the voxceleb2 dataset. (default: %(default)s)")
parser.add_argument(
    "--generate",
    default=False,
    action="store_true",
    help="Generate the manifest files. (default: %(default)s)")

args = parser.parse_args()


def create_manifest(data_dir, manifest_path_prefix):
    """Generate the voxceleb2 dataset manifest file.
    We will create the ${manifest_path_prefix}.vox2 as the final manifest file 
    The dev and test wav info will be put in one manifest file.

    Args:
        data_dir (str): voxceleb2 wav directory, which include dev and test subdataset
        manifest_path_prefix (str): manifest file prefix
    """
    print("Creating manifest %s ..." % manifest_path_prefix)
    json_lines = []
    data_path = os.path.join(data_dir, "**", "*.wav")
    total_sec = 0.0
    total_text = 0.0
    total_num = 0
    speakers = set()
    for audio_path in glob.glob(data_path, recursive=True):
        audio_id = "-".join(audio_path.split("/")[-3:])
        utt2spk = audio_path.split("/")[-3]
        duration = soundfile.info(audio_path).duration
        text = ""
        json_lines.append(
            json.dumps(
                {
                    "utt": audio_id,
                    "utt2spk": str(utt2spk),
                    "feat": audio_path,
                    "feat_shape": (duration, ),
                    "text": text  # compatible with asr data format
                },
                ensure_ascii=False))

        total_sec += duration
        total_text += len(text)
        total_num += 1
        speakers.add(utt2spk)

    # data_dir_name refer to dev or test
    # voxceleb2 is given explicit in the path
    data_dir_name = Path(data_dir).name
    manifest_path_prefix = manifest_path_prefix + "." + data_dir_name

    if not os.path.exists(os.path.dirname(manifest_path_prefix)):
        os.makedirs(os.path.dirname(manifest_path_prefix))
    with codecs.open(manifest_path_prefix, 'w', encoding='utf-8') as f:
        for line in json_lines:
            f.write(line + "\n")

    manifest_dir = os.path.dirname(manifest_path_prefix)
    meta_path = os.path.join(manifest_dir, "voxceleb2." +
                             data_dir_name) + ".meta"
    with codecs.open(meta_path, 'w', encoding='utf-8') as f:
        print(f"{total_num} utts", file=f)
        print(f"{len(speakers)} speakers", file=f)
        print(f"{total_sec / (60 * 60)} h", file=f)
        print(f"{total_text} text", file=f)
        print(f"{total_text / total_sec} text/sec", file=f)
        print(f"{total_sec / total_num} sec/utt", file=f)


def download_dataset(base_url, data_list, target_data, target_dir, dataset):
    """Download the voxceleb2 zip package

    Args:
        base_url (str): the voxceleb2 dataset download baseline url
        data_list (dict): the dataset part zip package and the md5 value
        target_data (str): the final dataset zip info
        target_dir (str): the dataset stored directory
        dataset (str): the dataset name, dev or test

    Raises:
        RuntimeError: the md5sum occurs error
    """
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    # wav directory already exists, it need do nothing
    print("target dir {}".format(os.path.join(target_dir, dataset)))
    # unzip the dev dataset will create the dev and unzip the m4a to dev dir
    # but the test dataset will unzip to aac
    # so, wo create the ${target_dir}/test and unzip the m4a to test dir
    if not os.path.exists(os.path.join(target_dir, dataset)):
        print(f"start to download the vox2 zip package to {target_dir}")
        for zip_part in data_list.keys():
            download_url = " --no-check-certificate " + base_url + "/" + zip_part
            download(
                url=download_url,
                md5sum=data_list[zip_part],
                target_dir=target_dir)

        # pack the all part to target zip file
        all_target_part, target_name, target_md5sum = target_data.split()
        target_name = os.path.join(target_dir, target_name)
        if not os.path.exists(target_name):
            pack_part_cmd = "cat {}/{} > {}".format(target_dir, all_target_part,
                                                    target_name)
            subprocess.call(pack_part_cmd, shell=True)

        # check the target zip file md5sum
        if not check_md5sum(target_name, target_md5sum):
            raise RuntimeError("{} MD5 checksum failed".format(target_name))
        else:
            print("Check {} md5sum successfully".format(target_name))

        if dataset == "test":
            # we need make the test directory
            unzip(target_name, os.path.join(target_dir, "test"))
        else:
            # unzip dev zip package and will create the dev directory
            unzip(target_name, target_dir)


def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    # download and unpack the vox2-dev data
    print("download: {}".format(args.download))
    if args.download:
        download_dataset(
            base_url=BASE_URL,
            data_list=DEV_LIST,
            target_data=DEV_TARGET_DATA,
            target_dir=args.target_dir,
            dataset="dev")

        download_dataset(
            base_url=BASE_URL,
            data_list=TEST_LIST,
            target_data=TEST_TARGET_DATA,
            target_dir=args.target_dir,
            dataset="test")

        print("VoxCeleb2 download is done!")

    if args.generate:
        create_manifest(
            args.target_dir, manifest_path_prefix=args.manifest_prefix)


if __name__ == '__main__':
    main()


================================================
FILE: dataset/voxforge/run_data.sh
================================================
#! /usr/bin/env bash

TARGET_DIR=${MAIN_ROOT}/dataset/voxforge
mkdir -p ${TARGET_DIR}

# download data, generate manifests
python ${MAIN_ROOT}/dataset/voxforge/voxforge.py \
--manifest_prefix="${TARGET_DIR}/manifest" \
--target_dir="${TARGET_DIR}" \
--is_merge_dialect=True \
--dialects 'american' 'british' 'australian' 'european' 'irish' 'canadian' 'indian'

if [ $? -ne 0 ]; then
    echo "Prepare VoxForge failed. Terminated."
    exit 1
fi

echo "VoxForge Data preparation done."
exit 0


================================================
FILE: dataset/voxforge/voxforge.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare VoxForge dataset

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import codecs
import datetime
import json
import os
import shutil
import subprocess

import soundfile

from paddlespeech.dataset.download import download_multi
from paddlespeech.dataset.download import getfile_insensitive
from paddlespeech.dataset.download import unpack

DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')

DATA_URL = 'http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/' \
           'Audio/Main/16kHz_16bit'

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/VoxForge",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--dialects",
    default=[
        'american', 'british', 'australian', 'european', 'irish', 'canadian',
        'indian'
    ],
    nargs='+',
    type=str,
    help="Dialect types. (default: %(default)s)")
parser.add_argument(
    "--is_merge_dialect",
    default=True,
    type=bool,
    help="If set True, manifests of american dialect and canadian dialect will "
    "be merged to american-canadian dialect; manifests of british "
    "dialect, irish dialect and australian dialect will be merged to "
    "commonwealth dialect. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()


def download_and_unpack(target_dir, url):
    wget_args = '-q -l 1 -N -nd -c -e robots=off -A tgz -r -np'
    tgz_dir = os.path.join(target_dir, 'tgz')
    exit_code = download_multi(url, tgz_dir, wget_args)
    if exit_code != 0:
        print('Download tgz audio files failed with exit code %d.' % exit_code)
    else:
        print('Download done, start unpacking ...')
        audio_dir = os.path.join(target_dir, 'audio')
        for root, dirs, files in os.walk(tgz_dir):
            for file in files:
                print(file)
                if file.endswith('.tgz'):
                    unpack(os.path.join(root, file), audio_dir)


def select_dialects(target_dir, dialect_list):
    """Classify audio files by dialect."""
    dialect_root_dir = os.path.join(target_dir, 'dialect')
    if os.path.exists(dialect_root_dir):
        shutil.rmtree(dialect_root_dir)
    os.mkdir(dialect_root_dir)
    audio_dir = os.path.abspath(os.path.join(target_dir, 'audio'))
    for dialect in dialect_list:
        # filter files by dialect
        command = 'find %s -iwholename "*etc/readme*" -exec egrep -iHl \
            "pronunciation dialect.*%s" {} \;' % (audio_dir, dialect)
        p = subprocess.Popen(
            command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
        output, err = p.communicate()
        dialect_dir = os.path.join(dialect_root_dir, dialect)
        if os.path.exists(dialect_dir):
            shutil.rmtree(dialect_dir)
        os.mkdir(dialect_dir)
        for path in output.splitlines():
            src_dir = os.path.dirname(os.path.dirname(path))
            link = os.path.basename(os.path.normpath(src_dir))
            os.symlink(src_dir, os.path.join(dialect_dir, link))


def generate_manifest(data_dir, manifest_path):
    json_lines = []

    for path in os.listdir(data_dir):
        audio_link = os.path.join(data_dir, path)
        assert os.path.islink(
            audio_link), '%s should be symbolic link.' % audio_link
        actual_audio_dir = os.path.abspath(os.readlink(audio_link))

        audio_type = ''
        if os.path.isdir(os.path.join(actual_audio_dir, 'wav')):
            audio_type = 'wav'
        elif os.path.isdir(os.path.join(actual_audio_dir, 'flac')):
            audio_type = 'flac'
        else:
            print('Unknown audio type, skipped processing %s.' %
                  actual_audio_dir)
            continue

        etc_dir = os.path.join(actual_audio_dir, 'etc')
        prompts_file = os.path.join(etc_dir, 'PROMPTS')
        if not os.path.isfile(prompts_file):
            print('PROMPTS file missing, skip processing %s.' %
                  actual_audio_dir)
            continue

        readme_file = getfile_insensitive(os.path.join(etc_dir, 'README'))
        if readme_file is None:
            print('README file missing, skip processing %s.' % actual_audio_dir)
            continue

        for line in file(prompts_file):
            u, trans = line.strip().split(None, 1)
            u_parts = u.split('/')

            # try to format the date time
            try:
                speaker, date, sfx = u_parts[-3].split('-')
                obj = datetime.datetime.strptime(date, '%y.%m.%d')
                formatted = obj.strftime('%Y%m%d')
                u_parts[-3] = '-'.join([speaker, formatted, sfx])
            except Exception as e:
                pass

            if len(u_parts) < 2:
                u_parts = [audio_type] + u_parts
            u_parts[-2] = audio_type
            u_parts[-1] += '.' + audio_type
            u = os.path.join(actual_audio_dir, '/'.join(u_parts[-2:]))

            if not os.path.isfile(u):
                print('Audio file missing, skip processing %s.' % u)
                continue

            if os.stat(u).st_size == 0:
                print('Empty audio file, skip processing %s.' % u)
                continue

            trans = trans.strip().replace('-', ' ')
            if not trans.isupper() or \
                not trans.strip().replace(' ', '').replace("'", "").isalpha():
                print("Transcript not normalized properly, skip processing %s."
                      % u)
                continue

            audio_data, samplerate = soundfile.read(u)
            duration = float(len(audio_data)) / samplerate

            utt = os.path.splitext(os.path.basename(u))[0]
            json_lines.append(
                json.dumps({
                    'utt': utt,
                    'utt2spk': speaker,
                    'feat': u,
                    'feat_shape': (duration, ),  #second
                    'text': trans.lower()
                }))

    with codecs.open(manifest_path, 'w', 'utf-8') as fout:
        for line in json_lines:
            fout.write(line + '\n')


def merge_manifests(manifest_files, save_path):
    lines = []
    for manifest_file in manifest_files:
        line = codecs.open(manifest_file, 'r', 'utf-8').readlines()
        lines += line

    with codecs.open(save_path, 'w', 'utf-8') as fout:
        for line in lines:
            fout.write(line)


def prepare_dataset(url, dialects, target_dir, manifest_prefix, is_merge):
    download_and_unpack(target_dir, url)
    select_dialects(target_dir, dialects)
    american_canadian_manifests = []
    commonwealth_manifests = []
    for dialect in dialects:
        dialect_dir = os.path.join(target_dir, 'dialect', dialect)
        manifest_fpath = manifest_prefix + '.' + dialect
        if dialect == 'american' or dialect == 'canadian':
            american_canadian_manifests.append(manifest_fpath)
        if dialect == 'australian' \
                or dialect == 'british' \
                or dialect == 'irish':
            commonwealth_manifests.append(manifest_fpath)
        generate_manifest(dialect_dir, manifest_fpath)

    if is_merge:
        if len(american_canadian_manifests) > 0:
            manifest_fpath = manifest_prefix + '.american-canadian'
            merge_manifests(american_canadian_manifests, manifest_fpath)
        if len(commonwealth_manifests) > 0:
            manifest_fpath = manifest_prefix + '.commonwealth'
            merge_manifests(commonwealth_manifests, manifest_fpath)


def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    prepare_dataset(DATA_URL, args.dialects, args.target_dir,
                    args.manifest_prefix, args.is_merge_dialect)


if __name__ == '__main__':
    main()


================================================
FILE: demos/README.md
================================================
# Speech Application based on PaddleSpeech

([简体中文](./README_cn.md)|English)

This directory contains many speech applications in multiple scenarios.

* audio searching - mass audio similarity retrieval
* audio tagging - multi-label tagging of an audio file
* automatic_video_subtitles - generate subtitles from a video
* metaverse - 2D AR with TTS  
* punctuation_restoration - restore punctuation from raw text
* speech recognition - recognize text of an audio file 
* speech server - Server for Speech Task, e.g. ASR,TTS,CLS
* streaming asr server - receive audio stream from websocket, and recognize to transcript.
* streaming tts server - receive text from http or websocket, and streaming audio data stream.
* speech translation - end to end speech translation  
* story talker - book reader based on OCR and TTS  
* style_fs2 - multi style control for FastSpeech2 model  
* text_to_speech - convert text into speech 
* self supervised pretraining - speech feature extraction and speech recognition based on wav2vec2
* Whisper - speech recognize and translate based on Whisper model


================================================
FILE: demos/README_cn.md
================================================
# PaddleSpeech 语音应用 Demo

(简体中文|[English](./README.md))

该目录包含基于 PaddleSpeech 开发的不同场景的语音应用 Demo：

* 声音检索 - 海量音频相似性检索。
* 声音分类 - 基于 AudioSet 的 527 类标签的音频多标签分类。 
* 视频字幕生成 - 识别视频中语音的文本，并进行文本后处理。
* 元宇宙 - 基于语音合成的 2D 增强现实。
* 标点恢复 - 通常作为语音识别的文本后处理任务，为一段无标点的纯文本添加相应的标点符号。
* 语音识别 - 识别一段音频中包含的语音文字。
* 语音服务 - 离线语音服务，包括ASR、TTS、CLS等。
* 流式语音识别服务 - 流式输入语音数据流识别音频中的文字。
* 流式语音合成服务 - 根据待合成文本流式生成合成音频数据流。
* 语音翻译 - 实时识别音频中的语言，并同时翻译成目标语言。
* 会说话的故事书 - 基于 OCR 和语音合成的会说话的故事书。
* 个性化语音合成 - 基于 FastSpeech2 模型的个性化语音合成。 
* 语音合成 - 基于给定的文本生成语音音频。
* 自监督预训练模型 - 基于wav2vec2的语音特征提取和语音识别。
* Whisper - 基于Whisper模型的语音识别与翻译。


================================================
FILE: demos/TTSAndroid/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild


================================================
FILE: demos/TTSAndroid/README.md
================================================
# 语音合成 Java API Demo 使用指南

在 Android 上实现语音合成功能，此 Demo 有很好的易用性和开放性，如在 Demo 中跑自己训练好的模型等。

本文主要介绍语音合成 Demo 运行方法。

## 如何运行语音合成 Demo

### 环境准备

1. 在本地环境安装好 Android Studio 工具，详细安装方法请见 [Android Studio 官网](https://developer.android.com/studio)。
2. 准备一部 Android 手机，并开启 USB 调试模式。开启方法: `手机设置 -> 查找开发者选项 -> 打开开发者选项和 USB 调试模式`。

**注意**：
> 如果您的 Android Studio 尚未配置 NDK ，请根据 Android Studio 用户指南中的[安装及配置 NDK 和 CMake ](https://developer.android.com/studio/projects/install-ndk)内容，预先配置好 NDK 。您可以选择最新的 NDK 版本，或者使用 Paddle Lite 预测库版本一样的 NDK。

### 部署步骤

1. 用 Android Studio 打开 TTSAndroid 工程。
2. 手机连接电脑，打开 USB 调试和文件传输模式，并在 Android Studio 上连接自己的手机设备（手机需要开启允许从 USB 安装软件权限）。

**注意：**
>1. 如果您在导入项目、编译或者运行过程中遇到 NDK 配置错误的提示，请打开 `File > Project Structure > SDK Location`，修改 `Android NDK location` 为您本机配置的 NDK 所在路径。
>2. 如果您是通过 Android Studio 的 SDK Tools 下载的 NDK (见本章节"环境准备")，可以直接点击下拉框选择默认路径。
>3. 还有一种 NDK 配置方法，你可以在 `TTSAndroid/local.properties` 文件中手动添加 NDK 路径配置 `nkd.dir=/root/android-ndk-r20b`
>4. 如果以上步骤仍旧无法解决 NDK 配置错误，请尝试根据 Android Studio 官方文档中的[更新 Android Gradle 插件](https://developer.android.com/studio/releases/gradle-plugin?hl=zh-cn#updating-plugin)章节，尝试更新 Android Gradle plugin 版本。

3. 点击 Run 按钮，自动编译 APP 并安装到手机。(该过程会自动下载 Paddle Lite 预测库和模型，需要联网)
   成功后效果如下：
    - pic 1：APP 安装到手机。
    - pic 2：APP 打开后的效果，在下拉框中选择待合成的文本。
    - pic 3：合成后点击按钮播放音频。

<p align="center"><img width="350" height="500"  src="https://user-images.githubusercontent.com/24568452/204450217-d166588a-5341-4565-8662-0f8129284bba.png"/><img width="350" height="500" src="https://user-images.githubusercontent.com/24568452/204450231-d6f3105c-276a-4af5-a3ba-864d9f5ee24e.png"/><img width="350" height="500" src="https://user-images.githubusercontent.com/24568452/204450269-0ddf46ec-eedd-4c90-8a0d-e915622fdf3e.png"/></p>

## 更新预测库

* Paddle Lite
  项目：[https://github.com/PaddlePaddle/Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite)。


参考 [Paddle Lite 源码编译文档](https://www.paddlepaddle.org.cn/lite/v2.11/source_compile/compile_env.html)，编译
Android 预测库。

* 编译最终产物位于 `build.lite.xxx.xxx.xxx` 下的 `inference_lite_lib.xxx.xxx`
* 替换 java 库
    * jar 包
      将生成的 `build.lite.android.xxx.gcc/inference_lite_lib.android.xxx/java/jar/PaddlePredictor.jar`
      替换 Demo 中的 `TTSAndroid/app/libs/PaddlePredictor.jar`。
    * Java so
        * arm64-v8a
          将生成的 `build.lite.android.armv8.gcc/inference_lite_lib.android.armv8/java/so/libpaddle_lite_jni.so`
          库替换 Demo 中的 `TTSAndroid/app/src/main/jniLibs/arm64-v8a/libpaddle_lite_jni.so`。

## Demo 内容介绍

先整体介绍下目标检测 Demo 的代码结构，然后介绍 Java 各功能模块的功能。

<p align="center">
<img width="442" alt="image" src="https://user-images.githubusercontent.com/24568452/204455080-4f96fe55-6058-4235-bb92-cc98cfcc8bb6.png">
</p>

### 重点关注内容

1. `Predictor.java`： 预测代码。

```bash
# 位置：
TTSAndroid/app/src/main/java/com/baidu/paddle/lite/demo/tts/Predictor.java
```

2. `fastspeech2_csmsc_arm.nb`  和 `mb_melgan_csmsc_arm.nb`: 模型文件 (opt 工具转化后 Paddle Lite 模型)
   ，分别来自 [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip)
   和 [mb_melgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_pdlite_1.3.0.zip)。

```bash
# 位置：
TTSAndroid/app/src/main/assets/models/cpu/fastspeech2_csmsc_arm.nb
TTSAndroid/app/src/main/assets/models/cpu/mb_melgan_csmsc_arm.nb
```

3. `libpaddle_lite_jni.so`、`PaddlePredictor.jar`：Paddle Lite Java 预测库与 jar 包。

```bash
# 位置
TTSAndroid/app/src/main/jniLibs/arm64-v8a/libpaddle_lite_jni.so
TTSAndroid/app/libs/PaddlePredictor.jar
```

> 如果要替换动态库 so 和 jar 文件，则将新的动态库 so 更新到 `TTSAndroid/app/src/main/jniLibs/arm64-v8a/` 目录下 新的 jar 文件更新到 `TTSAndroid/app/libs/` 目录下

4. `build.gradle` : 定义编译过程的 gradle 脚本。（不用改动，定义了自动下载 Paddle Lite 预测和模型的过程）

```bash
# 位置
TTSAndroid/app/build.gradle
```

如果需要手动更新模型和预测库，则可将 gradle 脚本中的 `download*` 接口注释即可, 将新的预测库替换至相应目录下

### Java 端

* 模型存放，将下载好的模型解压存放在 `app/src/assets/models` 目录下。
* TTSAndroid Java 包在 `app/src/main/java/com/baidu/paddle/lite/demo/tts` 目录下，实现 APP 界面消息事件。
* MainActivity 实现 APP 的创建、运行、释放功能，重点关注 `onLoadModel` 和 `onRunModel` 函数，实现 APP 界面值传递和推理处理。

     ```java
    public boolean onLoadModel() {
        return predictor.init(MainActivity.this, modelPath, AMmodelName, VOCmodelName, cpuThreadNum,
                cpuPowerMode);
    }
     
    public boolean onRunModel() {
        return predictor.isLoaded() && predictor.runModel(phones);
    }
     ```

* SettingActivity 实现设置界面各个元素的更新与显示如模型地址、线程数、输入 shape 大小等，如果新增/删除界面的某个元素，均在这个类里面实现：
    - 参数的默认值可在 `app/src/main/res/values/strings.xml` 查看
    - 每个元素的 ID 和 value 是对应 `app/src/main/res/xml/settings.xml`
      和 `app/src/main/res/values/string.xml` 文件中的值
    - 这部分内容不建议修改，如果有新增属性，可以按照此格式进行添加

* Predictor 使用 Java API 实现语音合成模型的预测功能，重点关注 `init`、和 `runModel` 函数，实现 Paddle Lite 端侧推理功能：
     ```java
     // 初始化函数，完成预测器初始化
     public boolean init(Context appCtx, String modelPath, String AMmodelName, String VOCmodelName, int cpuThreadNum, String cpuPowerMode);
     // 模型推理函数
     public boolean runModel(float[] phones);
     ```

## 代码讲解 （使用 Paddle Lite `Java API` 执行预测）

Android 示例基于 Java API 开发，调用 Paddle Lite `Java API` 包括以下五步。更详细的 `API`
描述参考：[Paddle Lite Java API ](https://www.paddlepaddle.org.cn/lite/v2.11/api_reference/java_api_doc.html)。

## 如何更新模型和输入

### 更新模型

1. 将优化后的模型存放到目录 `TTSAndroid/app/src/main/assets/models/cpu/`
   下，可任意换成 [released_model.md](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/released_model.md)
   中的 `*_pdlite_*.zip/*_arm.nb`
   格式的声学模型和声码器，注意更换声学模型需要对应修改 `TTSAndroid/app/src/main/java/com/baidu/paddle/lite/demo/tts/MainActivity.java`
   中的 `sentencesToChoose` 数组。
2. 如果模型名字跟工程中模型名字一模一样，即均是使用`fastspeech2_csmsc_arm.nb` （假设声学模型的 `phone_id_map.txt`
   也一样）和 `mb_melgan_csmsc_arm.nb`
   ，则代码不需更新；否则，需要修改  `TTSAndroid/app/src/main/java/com/baidu/paddle/lite/demo/tts/MainActivity.java`
   中的 `AMmodelName` 和 `VOCmodelName`：

<p align="center">
<img src="https://user-images.githubusercontent.com/24568452/204458299-25e305a6-7cbb-4308-86ee-03f146bb938e.png">
</p>

3. 如果更新模型的输入/输出 Tensor 个数、shape 和 Dtype
   发生更新，需要更新文件 `TTSAndroid/app/src/main/java/com/baidu/paddle/lite/demo/tts/Predictor.java`。

### 更新输入

**本 Demo 不包含文本前端模块**，通过下拉框选择预先设置好的文本，在代码中映射成对应的 phone_id，**如需文本前端模块请自行处理**，可参考：
- C++ 中文前端 [lym0302/paddlespeech_tts_cpp](https://github.com/lym0302/paddlespeech_tts_cpp)
- C++ 英文 g2p [yazone/g2pE_mobile](https://github.com/yazone/g2pE_mobile)

`phone_id_map.txt` 请参考 [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip)。

## 通过 setting 界面更新语音合成的相关参数

### setting 界面参数介绍

可通过 APP 上的 Settings 按钮，实现语音合成 Demo 中参数的更新，目前支持以下参数的更新：
参数的默认值可在 `app/src/main/res/values/strings.xml` 查看

- CPU setting：
    - power_mode 默认是 `LITE_POWER_HIGH`
    - thread_num 默认是 1

### setting 界面参数更新

1. 打开 APP，点击右上角的 `:` 符合，选择 `Settings..` 选项，打开 setting 界面；
2. 再将 setting 界面的 Enable custom settings 选中☑️，然后更新部分参数；
3. 假设更新线程数据，将 CPU Thread Num 设置为 4，更新后，返回原界面，APP 将自动重新加载模型，在下拉框中选择文本会进行合成，合成结束后悔打印 4 线程的耗时和结果

## 性能优化方法

如果你觉得当前性能不符合需求，想进一步提升模型性能，可参考[性能优化文档](https://github.com/PaddlePaddle/Paddle-Lite-Demo#%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96)完成性能优化。

## Release

[2022-11-29-app-release.apk](https://paddlespeech.cdn.bcebos.com/demos/TTSAndroid/2022-11-29-app-release.apk)

## More
本 Demo 合并自 [yt605155624/TTSAndroid](https://github.com/yt605155624/TTSAndroid)。


================================================
FILE: demos/TTSAndroid/app/.gitignore
================================================
/build


================================================
FILE: demos/TTSAndroid/app/build.gradle
================================================
import java.security.MessageDigest

apply plugin: 'com.android.application'

android {
    compileSdkVersion 28
    defaultConfig {
        applicationId "com.baidu.paddle.lite.demo.tts"
        minSdkVersion 15
        targetSdkVersion 28
        versionCode 1
        versionName "1.0"
        testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
    }
    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
}

dependencies {
    implementation fileTree(include: ['*.jar'], dir: 'libs')
    implementation 'com.android.support:appcompat-v7:28.0.0'
    implementation 'com.android.support.constraint:constraint-layout:1.1.3'
    implementation 'com.android.support:design:28.0.0'
    testImplementation 'junit:junit:4.12'
    androidTestImplementation 'com.android.support.test:runner:1.0.2'
    androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2'
    implementation files('libs/PaddlePredictor.jar')
}

def paddleLiteLibs = 'https://paddlespeech.cdn.bcebos.com/demos/TTSAndroid/paddle_lite_libs_68b66fd3.tar.gz'
task downloadAndExtractPaddleLiteLibs(type: DefaultTask) {
    doFirst {
        println "Downloading and extracting Paddle Lite libs"
    }
    doLast {
        // Prepare cache folder for libs
        if (!file("cache").exists()) {
            mkdir "cache"
        }
        // Generate cache name for libs
        MessageDigest messageDigest = MessageDigest.getInstance('MD5')
        messageDigest.update(paddleLiteLibs.bytes)
        String cacheName = new BigInteger(1, messageDigest.digest()).toString(32)
        // Download libs
        if (!file("cache/${cacheName}.tar.gz").exists()) {
            ant.get(src: paddleLiteLibs, dest: file("cache/${cacheName}.tar.gz"))
        }
        // Unpack libs
        if (!file("cache/${cacheName}").exists()) {
            copy {
                from tarTree("cache/${cacheName}.tar.gz")
                into "cache/${cacheName}"
            }
        }
        // Copy PaddlePredictor.jar
        if (!file("libs/PaddlePredictor.jar").exists()) {
            copy {
                from "cache/${cacheName}/java/PaddlePredictor.jar"
                into "libs"
            }
        }
        if (!file("src/main/jniLibs/arm64-v8a/libpaddle_lite_jni.so").exists()) {
            copy {
                from "cache/${cacheName}/java/libs/arm64-v8a/"
                into "src/main/jniLibs/arm64-v8a"
            }
        }
    }
}
preBuild.dependsOn downloadAndExtractPaddleLiteLibs

def paddleLiteModels = [['src' : 'https://paddlespeech.cdn.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz',
                         'dest': 'src/main/assets/models'],]
task downloadAndExtractPaddleLiteModels(type: DefaultTask) {
    doFirst {
        println "Downloading and extracting Paddle Lite models"
    }
    doLast {
        // Prepare cache folder for models
        String cachePath = "cache"
        if (!file("${cachePath}").exists()) {
            mkdir "${cachePath}"
        }
        paddleLiteModels.eachWithIndex { model, index ->
            MessageDigest messageDigest = MessageDigest.getInstance('MD5')
            messageDigest.update(model.src.bytes)
            String cacheName = new BigInteger(1, messageDigest.digest()).toString(32)
            // Download the target model if not exists
            boolean copyFiles = !file("${model.dest}").exists()
            if (!file("${cachePath}/${cacheName}.tar.gz").exists()) {
                ant.get(src: model.src, dest: file("${cachePath}/${cacheName}.tar.gz"))
                copyFiles = true // force to copy files from the latest archive files
            }
            // Copy model file
            if (copyFiles) {
                copy {
                    from tarTree("${cachePath}/${cacheName}.tar.gz")
                    into "${model.dest}"
                }
            }
        }
    }
}
preBuild.dependsOn downloadAndExtractPaddleLiteModels


================================================
FILE: demos/TTSAndroid/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile


================================================
FILE: demos/TTSAndroid/app/src/androidTest/java/com/baidu/paddle/lite/demo/tts/ExampleInstrumentedTest.java
================================================
package com.baidu.paddle.lite.demo.tts;

import android.content.Context;
import android.support.test.InstrumentationRegistry;
import android.support.test.runner.AndroidJUnit4;

import org.junit.Test;
import org.junit.runner.RunWith;

import static org.junit.Assert.*;

/**
 * Instrumented test, which will execute on an Android device.
 *
 * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
 */
@RunWith(AndroidJUnit4.class)
public class ExampleInstrumentedTest {
    @Test
    public void useAppContext() {
        // Context of the app under test.
        Context appContext = InstrumentationRegistry.getTargetContext();

        assertEquals("com.baidu.paddle.lite.demo", appContext.getPackageName());
    }
}


================================================
FILE: demos/TTSAndroid/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    package="com.baidu.paddle.lite.demo.tts">

    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
    <uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />

    <application
        android:allowBackup="true"
        android:icon="@drawable/logo"
        android:label="@string/app_name"
        android:roundIcon="@drawable/logo"
        android:supportsRtl="true"
        android:theme="@style/AppTheme">
        <activity android:name="com.baidu.paddle.lite.demo.tts.MainActivity">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
        <activity
            android:name="com.baidu.paddle.lite.demo.tts.SettingsActivity"
            android:label="Settings"></activity>
    </application>

</manifest>

================================================
FILE: demos/TTSAndroid/app/src/main/java/com/baidu/paddle/lite/demo/tts/AppCompatPreferenceActivity.java
================================================
/*
 * Copyright (C) 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.baidu.paddle.lite.demo.tts;

import android.content.res.Configuration;
import android.os.Bundle;
import android.preference.PreferenceActivity;
import android.support.annotation.LayoutRes;
import android.support.v7.app.ActionBar;
import android.support.v7.app.AppCompatDelegate;
import android.view.MenuInflater;
import android.view.View;
import android.view.ViewGroup;

/**
 * A {@link android.preference.PreferenceActivity} which implements and proxies the necessary calls
 * to be used with AppCompat.
 * <p>
 * This technique can be used with an {@link android.app.Activity} class, not just
 * {@link android.preference.PreferenceActivity}.
 */
public abstract class AppCompatPreferenceActivity extends PreferenceActivity {
    private AppCompatDelegate mDelegate;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        getDelegate().installViewFactory();
        getDelegate().onCreate(savedInstanceState);
        super.onCreate(savedInstanceState);
    }

    @Override
    protected void onPostCreate(Bundle savedInstanceState) {
        super.onPostCreate(savedInstanceState);
        getDelegate().onPostCreate(savedInstanceState);
    }

    public ActionBar getSupportActionBar() {
        return getDelegate().getSupportActionBar();
    }


    @Override
    public MenuInflater getMenuInflater() {
        return getDelegate().getMenuInflater();
    }

    @Override
    public void setContentView(@LayoutRes int layoutResID) {
        getDelegate().setContentView(layoutResID);
    }

    @Override
    public void setContentView(View view) {
        getDelegate().setContentView(view);
    }

    @Override
    public void setContentView(View view, ViewGroup.LayoutParams params) {
        getDelegate().setContentView(view, params);
    }

    @Override
    public void addContentView(View view, ViewGroup.LayoutParams params) {
        getDelegate().addContentView(view, params);
    }

    @Override
    protected void onPostResume() {
        super.onPostResume();
        getDelegate().onPostResume();
    }

    @Override
    protected void onTitleChanged(CharSequence title, int color) {
        super.onTitleChanged(title, color);
        getDelegate().setTitle(title);
    }

    @Override
    public void onConfigurationChanged(Configuration newConfig) {
        super.onConfigurationChanged(newConfig);
        getDelegate().onConfigurationChanged(newConfig);
    }

    @Override
    protected void onStop() {
        super.onStop();
        getDelegate().onStop();
    }

    @Override
    protected void onDestroy() {
        super.onDestroy();
        getDelegate().onDestroy();
    }

    public void invalidateOptionsMenu() {
        getDelegate().invalidateOptionsMenu();
    }

    private AppCompatDelegate getDelegate() {
        if (mDelegate == null) {
            mDelegate = AppCompatDelegate.create(this, null);
        }
        return mDelegate;
    }
}


================================================
FILE: demos/TTSAndroid/app/src/main/java/com/baidu/paddle/lite/demo/tts/MainActivity.java
================================================
package com.baidu.paddle.lite.demo.tts;

import android.Manifest;
import android.app.ProgressDialog;
import android.content.Intent;
import android.content.SharedPreferences;
import android.content.pm.PackageManager;
import android.media.MediaPlayer;
import android.os.Bundle;
import android.os.Environment;
import android.os.Handler;
import android.os.HandlerThread;
import android.os.Message;
import android.preference.PreferenceManager;
import android.support.annotation.NonNull;
import android.support.v4.app.ActivityCompat;
import android.support.v4.content.ContextCompat;
import android.support.v7.app.AppCompatActivity;
import android.text.method.ScrollingMovementMethod;
import android.util.Log;
import android.view.Menu;
import android.view.MenuInflater;
import android.view.MenuItem;
import android.view.View;
import android.widget.AdapterView;
import android.widget.ArrayAdapter;
import android.widget.Button;
import android.widget.Spinner;
import android.widget.TextView;
import android.widget.Toast;

import java.io.File;
import java.io.IOException;

public class MainActivity extends AppCompatActivity implements View.OnClickListener, MediaPlayer.OnPreparedListener, MediaPlayer.OnErrorListener, AdapterView.OnItemSelectedListener {
    public static final int REQUEST_LOAD_MODEL = 0;
    public static final int REQUEST_RUN_MODEL = 1;
    public static final int RESPONSE_LOAD_MODEL_SUCCESSED = 0;
    public static final int RESPONSE_LOAD_MODEL_FAILED = 1;
    public static final int RESPONSE_RUN_MODEL_SUCCESSED = 2;
    public static final int RESPONSE_RUN_MODEL_FAILED = 3;
    public MediaPlayer mediaPlayer = new MediaPlayer();
    private static final String TAG = Predictor.class.getSimpleName();
    protected ProgressDialog pbLoadModel = null;
    protected ProgressDialog pbRunModel = null;
    // Receive messages from worker thread
    protected Handler receiver = null;
    // Send command to worker thread
    protected Handler sender = null;
    // Worker thread to load&run model
    protected HandlerThread worker = null;
    // UI components of image classification
    protected TextView tvInputSetting;
    protected TextView tvInferenceTime;
    protected Button btn_play;
    protected Button btn_pause;
    protected Button btn_stop;
    // Model settings of image classification
    protected String modelPath = "";
    protected int cpuThreadNum = 1;
    protected String cpuPowerMode = "";
    protected Predictor predictor = new Predictor();
    int sampleRate = 24000;
    private final String wavName = "tts_output.wav";
    private final String wavFile = Environment.getExternalStorageDirectory() + File.separator + wavName;
    private final String AMmodelName = "fastspeech2_csmsc_arm.nb";
    private final String VOCmodelName = "mb_melgan_csmsc_arm.nb";
    private float[] phones = {};
    private final float[][] sentencesToChoose = {
            // 009901 昨日，这名“伤者”与医生全部被警方依法刑事拘留。
            {261, 231, 175, 116, 179, 262, 44, 154, 126, 177, 19, 262, 42, 241, 72, 177, 56, 174, 245, 37, 186, 37, 49, 151, 127, 69, 19, 179, 72, 69, 4, 260, 126, 177, 116, 151, 239, 153, 141},
            // 009902 钱伟长想到上海来办学校是经过深思熟虑的。
            {174, 83, 213, 39, 20, 260, 89, 40, 30, 177, 22, 71, 9, 153, 8, 37, 17, 260, 251, 260, 99, 179, 177, 116, 151, 125, 70, 233, 177, 51, 176, 108, 177, 184, 153, 242, 40, 45},
            // 009903 她见我一进门就骂，吃饭时也骂，骂得我抬不起头。
            {182, 2, 151, 85, 232, 73, 151, 123, 154, 52, 151, 143, 154, 5, 179, 39, 113, 69, 17, 177, 114, 105, 154, 5, 179, 154, 5, 40, 45, 232, 182, 8, 37, 186, 174, 74, 182, 168},
            // 009904 李述德在离开之前，只说了一句“柱驼杀父亲了”。
            {153, 74, 177, 186, 40, 42, 261, 10, 153, 73, 152, 7, 262, 113, 174, 83, 179, 262, 115, 177, 230, 153, 45, 73, 151, 242, 180, 262, 186, 182, 231, 177, 2, 69, 186, 174, 124, 153, 45},
            // 009905 这种车票和保险单捆绑出售属于重复性购买。
            {262, 44, 262, 163, 39, 41, 173, 99, 71, 42, 37, 28, 260, 84, 40, 14, 179, 152, 220, 37, 21, 39, 183, 177, 170, 179, 177, 185, 240, 39, 162, 69, 186, 260, 128, 70, 170, 154, 9},
            // 009906 戴佩妮的男友西米露接唱情歌，让她非常开心。
            {40, 10, 173, 49, 155, 72, 40, 45, 155, 15, 142, 260, 72, 154, 74, 153, 186, 179, 151, 103, 39, 22, 174, 126, 70, 41, 179, 175, 22, 182, 2, 69, 46, 39, 20, 152, 7, 260, 120},
            // 009907 观大势、谋大局、出大策始终是该院的办院方针。
            {70, 199, 40, 5, 177, 116, 154, 168, 40, 5, 151, 240, 179, 39, 183, 40, 5, 38, 44, 179, 177, 115, 262, 161, 177, 116, 70, 7, 247, 40, 45, 37, 17, 247, 69, 19, 262, 51},
            // 009908 他们骑着摩托回家，正好为农忙时的父母帮忙。
            {182, 2, 154, 55, 174, 73, 262, 45, 154, 157, 182, 230, 71, 212, 151, 77, 180, 262, 59, 71, 29, 214, 155, 162, 154, 20, 177, 114, 40, 45, 69, 186, 154, 185, 37, 19, 154, 20},
            // 009909 但是因为还没到退休年龄，只能掰着指头捱日子。
            {40, 17, 177, 116, 120, 214, 71, 8, 154, 47, 40, 30, 182, 214, 260, 140, 155, 83, 153, 126, 180, 262, 115, 155, 57, 37, 7, 262, 45, 262, 115, 182, 171, 8, 175, 116, 261, 112},
            // 009910 这几天雨水不断，人们恨不得待在家里不出门。
            {262, 44, 151, 74, 182, 82, 240, 177, 213, 37, 184, 40, 202, 180, 175, 52, 154, 55, 71, 54, 37, 186, 40, 42, 40, 7, 261, 10, 151, 77, 153, 74, 37, 186, 39, 183, 154, 52}

    };

    @Override
    public void onClick(View v) {
        switch (v.getId()) {
            case R.id.btn_play:
                if (!mediaPlayer.isPlaying()) {
                    mediaPlayer.start();
                }
                break;
            case R.id.btn_pause:
                if (mediaPlayer.isPlaying()) {
                    mediaPlayer.pause();
                }
                break;
            case R.id.btn_stop:
                if (mediaPlayer.isPlaying()) {
                    mediaPlayer.reset();
                    initMediaPlayer();
                }
                break;
            default:
                break;
        }
    }

    private void initMediaPlayer() {
        try {
            File file = new File(wavFile);
            // 指定音频文件的路径
            mediaPlayer.setDataSource(file.getPath());
            // 让 MediaPlayer 进入到准备状态
            mediaPlayer.prepare();
            // 该方法使得进入应用时就播放音频
            // mediaPlayer.setOnPreparedListener(this);
            // prepare async to not block main thread
            mediaPlayer.prepareAsync();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override
    public void onPrepared(MediaPlayer player) {
        player.start();
    }

    @Override
    public boolean onError(MediaPlayer mp, int what, int extra) {
        // The MediaPlayer has moved to the Error state, must be reset!
        mediaPlayer.reset();
        initMediaPlayer();
        return true;
    }

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        requestAllPermissions();
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        // 初始化控件
        Spinner spinner = findViewById(R.id.spinner1);
        // 建立数据源
        String[] sentences = getResources().getStringArray(R.array.text);
        // 建立 Adapter 并且绑定数据源
        ArrayAdapter<String> adapter = new ArrayAdapter<String>(this, android.R.layout.simple_spinner_dropdown_item, sentences);
        // 第一个参数表示在哪个 Activity 上显示，第二个参数是系统下拉框的样式，第三个参数是数组。
        spinner.setAdapter(adapter);//绑定Adapter到控件
        spinner.setOnItemSelectedListener(this);

        btn_play = findViewById(R.id.btn_play);
        btn_pause = findViewById(R.id.btn_pause);
        btn_stop = findViewById(R.id.btn_stop);

        btn_play.setOnClickListener(this);
        btn_pause.setOnClickListener(this);
        btn_stop.setOnClickListener(this);

        btn_play.setVisibility(View.INVISIBLE);
        btn_pause.setVisibility(View.INVISIBLE);
        btn_stop.setVisibility(View.INVISIBLE);


        // Clear all setting items to avoid app crashing due to the incorrect settings
        SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this);
        SharedPreferences.Editor editor = sharedPreferences.edit();
        editor.clear();
        editor.commit();

        // Prepare the worker thread for mode loading and inference
        receiver = new Handler() {
            @Override
            public void handleMessage(Message msg) {
                switch (msg.what) {
                    case RESPONSE_LOAD_MODEL_SUCCESSED:
                        pbLoadModel.dismiss();
                        onLoadModelSuccessed();
                        break;
                    case RESPONSE_LOAD_MODEL_FAILED:
                        pbLoadModel.dismiss();
                        Toast.makeText(MainActivity.this, "Load model failed!", Toast.LENGTH_SHORT).show();
                        onLoadModelFailed();
                        break;
                    case RESPONSE_RUN_MODEL_SUCCESSED:
                        pbRunModel.dismiss();
                        onRunModelSuccessed();
                        break;
                    case RESPONSE_RUN_MODEL_FAILED:
                        pbRunModel.dismiss();
                        Toast.makeText(MainActivity.this, "Run model failed!", Toast.LENGTH_SHORT).show();
                        onRunModelFailed();
                        break;
                    default:
                        break;
                }
            }
        };

        worker = new HandlerThread("Predictor Worker");
        worker.start();
        sender = new Handler(worker.getLooper()) {
            public void handleMessage(Message msg) {
                switch (msg.what) {
                    case REQUEST_LOAD_MODEL:
                        // Load model and reload test image
                        if (onLoadModel()) {
                            receiver.sendEmptyMessage(RESPONSE_LOAD_MODEL_SUCCESSED);
                        } else {
                            receiver.sendEmptyMessage(RESPONSE_LOAD_MODEL_FAILED);
                        }
                        break;
                    case REQUEST_RUN_MODEL:
                        // Run model if model is loaded
                        if (onRunModel()) {
                            receiver.sendEmptyMessage(RESPONSE_RUN_MODEL_SUCCESSED);
                        } else {
                            receiver.sendEmptyMessage(RESPONSE_RUN_MODEL_FAILED);
                        }
                        break;
                    default:
                        break;
                }
            }
        };

        // Setup the UI components
        tvInputSetting = findViewById(R.id.tv_input_setting);
        tvInferenceTime = findViewById(R.id.tv_inference_time);
        tvInputSetting.setMovementMethod(ScrollingMovementMethod.getInstance());
    }

    @Override
    protected void onResume() {
        super.onResume();
        boolean settingsChanged = false;
        SharedPreferences sharedPreferences = PreferenceManager.getDefaultSharedPreferences(this);
        String model_path = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY),
                getString(R.string.MODEL_PATH_DEFAULT));

        settingsChanged |= !model_path.equalsIgnoreCase(modelPath);

        int cpu_thread_num = Integer.parseInt(sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY),
                getString(R.string.CPU_THREAD_NUM_DEFAULT)));
        settingsChanged |= cpu_thread_num != cpuThreadNum;
        String cpu_power_mode =
                sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY),
                        getString(R.string.CPU_POWER_MODE_DEFAULT));
        settingsChanged |= !cpu_power_mode.equalsIgnoreCase(cpuPowerMode);

        if (settingsChanged) {
            modelPath = model_path;
            cpuThreadNum = cpu_thread_num;
            cpuPowerMode = cpu_power_mode;
            // Update UI
            tvInputSetting.setText("Model: " + modelPath.substring(modelPath.lastIndexOf("/") + 1) + "\n" + "CPU" +
                    " Thread Num: " + cpuThreadNum + "\n" + "CPU Power Mode: " + cpuPowerMode + "\n");
            tvInputSetting.scrollTo(0, 0);
            // Reload model if configure has been changed
            loadModel();
        }
    }

    public void loadModel() {
        pbLoadModel = ProgressDialog.show(this, "", "Loading model...", false, false);
        sender.sendEmptyMessage(REQUEST_LOAD_MODEL);
    }

    public void runModel() {
        pbRunModel = ProgressDialog.show(this, "", "Running model...", false, false);
        sender.sendEmptyMessage(REQUEST_RUN_MODEL);
    }

    public boolean onLoadModel() {
        return predictor.init(MainActivity.this, modelPath, AMmodelName, VOCmodelName, cpuThreadNum,
                cpuPowerMode);
    }

    public boolean onRunModel() {
        return predictor.isLoaded() && predictor.runModel(phones);
    }

    public boolean onLoadModelSuccessed() {
        // Load test image from path and run model
//        runModel();
        return true;
    }

    public void onLoadModelFailed() {
    }

    public void onRunModelSuccessed() {
        // Obtain results and update UI
        btn_play.setVisibility(View.VISIBLE);
        btn_pause.setVisibility(View.VISIBLE);
        btn_stop.setVisibility(View.VISIBLE);
        tvInferenceTime.setText("Inference done！\nInference time: " + predictor.inferenceTime() + " ms"
                + "\nRTF: " + predictor.inferenceTime() * sampleRate / (predictor.wav.length * 1000) + "\nAudio saved in " + wavFile);
        try {
            Utils.rawToWave(wavFile, predictor.wav, sampleRate);
        } catch (IOException e) {
            e.printStackTrace();
        }
        if (ContextCompat.checkSelfPermission(MainActivity.this,
                Manifest.permission.WRITE_EXTERNAL_STORAGE) != PackageManager.PERMISSION_GRANTED) {
            ActivityCompat.requestPermissions(MainActivity.this, new String[]{Manifest.permission.WRITE_EXTERNAL_STORAGE}, 1);
        } else {
            // 初始化 MediaPlayer
            initMediaPlayer();
        }
    }

    public void onRunModelFailed() {
    }


    public void onSettingsClicked() {
        startActivity(new Intent(MainActivity.this, SettingsActivity.class));
    }

    @Override
    public boolean onCreateOptionsMenu(Menu menu) {
        MenuInflater inflater = getMenuInflater();
        inflater.inflate(R.menu.menu_action_options, menu);
        return true;
    }

    @Override
    public boolean onOptionsItemSelected(MenuItem item) {
        switch (item.getItemId()) {
            case android.R.id.home:
                finish();
                break;
            case R.id.settings:
                onSettingsClicked();
        }
        return super.onOptionsItemSelected(item);
    }

    @Override
    public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions,
                                           @NonNull int[] grantResults) {

        super.onRequestPermissionsResult(requestCode, permissions, grantResults);
        if (grantResults[0] != PackageManager.PERMISSION_GRANTED) {
            Toast.makeText(this, "Permission Denied", Toast.LENGTH_SHORT).show();
        }
    }


    @Override
    protected void onDestroy() {
        if (predictor != null) {
            predictor.releaseModel();
        }
        worker.quit();
        super.onDestroy();
        if (mediaPlayer != null) {
            mediaPlayer.stop();
            mediaPlayer.release();
        }
    }

    private boolean requestAllPermissions() {
        if (ContextCompat.checkSelfPermission(this, Manifest.permission.WRITE_EXTERNAL_STORAGE)
                != PackageManager.PERMISSION_GRANTED || ContextCompat.checkSelfPermission(this,
                Manifest.permission.CAMERA)
                != PackageManager.PERMISSION_GRANTED) {
            ActivityCompat.requestPermissions(this, new String[]{Manifest.permission.WRITE_EXTERNAL_STORAGE},
                    0);
            return false;
        }
        return true;
    }


    @Override
    public void onItemSelected(AdapterView<?> parent, View view, int position, long id) {
        if (position > 0) {
            phones = sentencesToChoose[position - 1];
            runModel();
        }

    }

    @Override
    public void onNothingSelected(AdapterView<?> parent) {

    }
}


================================================
FILE: demos/TTSAndroid/app/src/main/java/com/baidu/paddle/lite/demo/tts/Predictor.java
================================================
package com.baidu.paddle.lite.demo.tts;

import android.content.Context;
import android.util.Log;

import com.baidu.paddle.lite.MobileConfig;
import com.baidu.paddle.lite.PaddlePredictor;
import com.baidu.paddle.lite.PowerMode;
import com.baidu.paddle.lite.Tensor;

import java.io.File;
import java.util.Date;


public class Predictor {
    private static final String TAG = Predictor.class.getSimpleName();
    public boolean isLoaded = false;
    public int cpuThreadNum = 1;
    public String cpuPowerMode = "LITE_POWER_HIGH";
    public String modelPath = "";
    protected PaddlePredictor AMPredictor = null;
    protected PaddlePredictor VOCPredictor = null;
    protected float inferenceTime = 0;
    protected float[] wav;

    public boolean init(Context appCtx, String modelPath, String AMmodelName, String VOCmodelName, int cpuThreadNum, String cpuPowerMode) {
        // Release model if exists
        releaseModel();

        AMPredictor = loadModel(appCtx, modelPath, AMmodelName, cpuThreadNum, cpuPowerMode);
        if (AMPredictor == null) {
            return false;
        }
        VOCPredictor = loadModel(appCtx, modelPath, VOCmodelName, cpuThreadNum, cpuPowerMode);
        if (VOCPredictor == null) {
            return false;
        }
        isLoaded = true;
        return true;
    }

    protected PaddlePredictor loadModel(Context appCtx, String modelPath, String modelName, int cpuThreadNum, String cpuPowerMode) {
        // Load model
        if (modelPath.isEmpty()) {
            return null;
        }
        String realPath = modelPath;
        if (modelPath.charAt(0) != '/') {
            // Read model files from custom path if the first character of mode path is '/'
            // otherwise copy model to cache from assets
            realPath = appCtx.getCacheDir() + "/" + modelPath;
            // push model to mobile
            Utils.copyDirectoryFromAssets(appCtx, modelPath, realPath);
        }
        if (realPath.isEmpty()) {
            return null;
        }
        MobileConfig config = new MobileConfig();
        config.setModelFromFile(realPath + File.separator + modelName);
        Log.e(TAG, "File:" + realPath + File.separator + modelName);
        config.setThreads(cpuThreadNum);
        if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_HIGH")) {
            config.setPowerMode(PowerMode.LITE_POWER_HIGH);
        } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_LOW")) {
            config.setPowerMode(PowerMode.LITE_POWER_LOW);
        } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_FULL")) {
            config.setPowerMode(PowerMode.LITE_POWER_FULL);
        } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_NO_BIND")) {
            config.setPowerMode(PowerMode.LITE_POWER_NO_BIND);
        } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_RAND_HIGH")) {
            config.setPowerMode(PowerMode.LITE_POWER_RAND_HIGH);
        } else if (cpuPowerMode.equalsIgnoreCase("LITE_POWER_RAND_LOW")) {
            config.setPowerMode(PowerMode.LITE_POWER_RAND_LOW);
        } else {
            Log.e(TAG, "Unknown cpu power mode!");
            return null;
        }
        return PaddlePredictor.createPaddlePredictor(config);
    }

    public void releaseModel() {
        AMPredictor = null;
        VOCPredictor = null;
        isLoaded = false;
        cpuThreadNum = 1;
        cpuPowerMode = "LITE_POWER_HIGH";
        modelPath = "";
    }

    public boolean runModel(float[] phones) {
        if (!isLoaded()) {
            return false;
        }
        Date start = new Date();
        Tensor am_output_handle = getAMOutput(phones, AMPredictor);
        wav = getVOCOutput(am_output_handle, VOCPredictor);
        Date end = new Date();
        inferenceTime = (end.getTime() - start.getTime());
        return true;
    }

    public Tensor getAMOutput(float[] phones, PaddlePredictor am_predictor) {
        Tensor phones_handle = am_predictor.getInput(0);
        long[] dims = {phones.length};
        phones_handle.resize(dims);
        phones_handle.setData(phones);
        am_predictor.run();
        Tensor am_output_handle = am_predictor.getOutput(0);
        // [?, 80]
        // long outputShape[] = am_output_handle.shape();
        float[] am_output_data = am_output_handle.getFloatData();
        // [? x 80]
        // long[] am_output_data_shape = {am_output_data.length};
        // Log.e(TAG, Arrays.toString(am_output_data));
        // 打印 mel 数组
        // for (int i=0;i<outputShape[0];i++) {
        //      Log.e(TAG, Arrays.toString(Arrays.copyOfRange(am_output_data,i*80,(i+1)*80)));
        // }
        // voc_predictor 需要知道输入的 shape，所以不能输出转成 float 之后的一维数组
        return am_output_handle;
    }

    public float[] getVOCOutput(Tensor input, PaddlePredictor voc_predictor) {
        Tensor mel_handle = voc_predictor.getInput(0);
        // [?, 80]
        long[] dims = input.shape();
        mel_handle.resize(dims);
        float[] am_output_data = input.getFloatData();
        mel_handle.setData(am_output_data);
        voc_predictor.run();
        Tensor voc_output_handle = voc_predictor.getOutput(0);
        // [? x 300, 1]
        // long[] outputShape = voc_output_handle.shape();
        float[] voc_output_data = voc_output_handle.getFloatData();
        // long[] voc_output_data_shape = {voc_output_data.length};
        return voc_output_data;
    }


    public boolean isLoaded() {
        return AMPredictor != null && VOCPredictor != null && isLoaded;
    }


    public float inferenceTime() {
        return inferenceTime;
    }

}


================================================
FILE: demos/TTSAndroid/app/src/main/java/com/baidu/paddle/lite/demo/tts/SettingsActivity.java
================================================
package com.baidu.paddle.lite.demo.tts;

import android.content.SharedPreferences;
import android.os.Bundle;
import android.preference.CheckBoxPreference;
import android.preference.EditTextPreference;
import android.preference.ListPreference;
import android.support.v7.app.ActionBar;

import java.util.ArrayList;
import java.util.List;

public class SettingsActivity extends AppCompatPreferenceActivity implements SharedPreferences.OnSharedPreferenceChangeListener {
    ListPreference lpChoosePreInstalledModel = null;
    CheckBoxPreference cbEnableCustomSettings = null;
    EditTextPreference etModelPath = null;
    ListPreference lpCPUThreadNum = null;
    ListPreference lpCPUPowerMode = null;

    List<String> preInstalledModelPaths = null;
    List<String> preInstalledCPUThreadNums = null;
    List<String> preInstalledCPUPowerModes = null;


    @Override
    public void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        addPreferencesFromResource(R.xml.settings);
        ActionBar supportActionBar = getSupportActionBar();
        if (supportActionBar != null) {
            supportActionBar.setDisplayHomeAsUpEnabled(true);
        }

        // Initialized pre-installed models
        preInstalledModelPaths = new ArrayList<String>();
        preInstalledCPUThreadNums = new ArrayList<String>();
        preInstalledCPUPowerModes = new ArrayList<String>();
        preInstalledModelPaths.add(getString(R.string.MODEL_PATH_DEFAULT));
        preInstalledCPUThreadNums.add(getString(R.string.CPU_THREAD_NUM_DEFAULT));
        preInstalledCPUPowerModes.add(getString(R.string.CPU_POWER_MODE_DEFAULT));


        // Setup UI components
        lpChoosePreInstalledModel = (ListPreference) findPreference(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY));
        String[] preInstalledModelNames = new String[preInstalledModelPaths.size()];
        for (int i = 0; i < preInstalledModelPaths.size(); i++) {
            preInstalledModelNames[i] = preInstalledModelPaths.get(i).substring(preInstalledModelPaths.get(i).lastIndexOf("/") + 1);
        }
        lpChoosePreInstalledModel.setEntries(preInstalledModelNames);
        lpChoosePreInstalledModel.setEntryValues(preInstalledModelPaths.toArray(new String[preInstalledModelPaths.size()]));
        lpCPUThreadNum = (ListPreference) findPreference(getString(R.string.CPU_THREAD_NUM_KEY));
        lpCPUPowerMode = (ListPreference) findPreference(getString(R.string.CPU_POWER_MODE_KEY));
        cbEnableCustomSettings = (CheckBoxPreference) findPreference(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY));
        etModelPath = (EditTextPreference) findPreference(getString(R.string.MODEL_PATH_KEY));
        etModelPath.setTitle("Model Path (SDCard: " + Utils.getSDCardDirectory() + ")");
    }

    private void reloadPreferenceAndUpdateUI() {
        SharedPreferences sharedPreferences = getPreferenceScreen().getSharedPreferences();
        boolean enableCustomSettings = sharedPreferences.getBoolean(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY), false);
        String modelPath = sharedPreferences.getString(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY), getString(R.string.MODEL_PATH_DEFAULT));
        int modelIdx = lpChoosePreInstalledModel.findIndexOfValue(modelPath);
        if (modelIdx >= 0 && modelIdx < preInstalledModelPaths.size()) {
            if (!enableCustomSettings) {
                SharedPreferences.Editor editor = sharedPreferences.edit();
                editor.putString(getString(R.string.MODEL_PATH_KEY), preInstalledModelPaths.get(modelIdx));
                editor.putString(getString(R.string.CPU_THREAD_NUM_KEY), preInstalledCPUThreadNums.get(modelIdx));
                editor.putString(getString(R.string.CPU_POWER_MODE_KEY), preInstalledCPUPowerModes.get(modelIdx));
                editor.commit();
            }
            lpChoosePreInstalledModel.setSummary(modelPath);
        }
        cbEnableCustomSettings.setChecked(enableCustomSettings);
        etModelPath.setEnabled(enableCustomSettings);
        lpCPUThreadNum.setEnabled(enableCustomSettings);
        lpCPUPowerMode.setEnabled(enableCustomSettings);
        modelPath = sharedPreferences.getString(getString(R.string.MODEL_PATH_KEY), getString(R.string.MODEL_PATH_DEFAULT));
        String cpuThreadNum = sharedPreferences.getString(getString(R.string.CPU_THREAD_NUM_KEY), getString(R.string.CPU_THREAD_NUM_DEFAULT));
        String cpuPowerMode = sharedPreferences.getString(getString(R.string.CPU_POWER_MODE_KEY), getString(R.string.CPU_POWER_MODE_DEFAULT));

        etModelPath.setSummary(modelPath);
        etModelPath.setText(modelPath);
        lpCPUThreadNum.setValue(cpuThreadNum);
        lpCPUThreadNum.setSummary(cpuThreadNum);
        lpCPUPowerMode.setValue(cpuPowerMode);
        lpCPUPowerMode.setSummary(cpuPowerMode);
    }

    @Override
    protected void onResume() {
        super.onResume();
        getPreferenceScreen().getSharedPreferences().registerOnSharedPreferenceChangeListener(this);
        reloadPreferenceAndUpdateUI();
    }

    @Override
    protected void onPause() {
        super.onPause();
        getPreferenceScreen().getSharedPreferences().unregisterOnSharedPreferenceChangeListener(this);
    }

    @Override
    public void onSharedPreferenceChanged(SharedPreferences sharedPreferences, String key) {
        if (key.equals(getString(R.string.CHOOSE_PRE_INSTALLED_MODEL_KEY))) {
            SharedPreferences.Editor editor = sharedPreferences.edit();
            editor.putBoolean(getString(R.string.ENABLE_CUSTOM_SETTINGS_KEY), false);
            editor.commit();
        }
        reloadPreferenceAndUpdateUI();
    }
}


================================================
FILE: demos/TTSAndroid/app/src/main/java/com/baidu/paddle/lite/demo/tts/Utils.java
================================================
package com.baidu.paddle.lite.demo.tts;

import static java.lang.Math.abs;

import android.content.Context;
import android.os.Environment;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

public class Utils {
    public static void copyFileFromAssets(Context appCtx, String srcPath, String dstPath) {
        if (srcPath.isEmpty() || dstPath.isEmpty()) {
            return;
        }
        InputStream is = null;
        OutputStream os = null;
        try {
            is = new BufferedInputStream(appCtx.getAssets().open(srcPath));
            os = new BufferedOutputStream(new FileOutputStream(new File(dstPath)));
            byte[] buffer = new byte[1024];
            int length = 0;
            while ((length = is.read(buffer)) != -1) {
                os.write(buffer, 0, length);
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                os.close();
                is.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public static void copyDirectoryFromAssets(Context appCtx, String srcDir, String dstDir) {
        if (srcDir.isEmpty() || dstDir.isEmpty()) {
            return;
        }
        try {
            if (!new File(dstDir).exists()) {
                new File(dstDir).mkdirs();
            }
            for (String fileName : appCtx.getAssets().list(srcDir)) {
                String srcSubPath = srcDir + File.separator + fileName;
                String dstSubPath = dstDir + File.separator + fileName;
                if (new File(srcSubPath).isDirectory()) {
                    copyDirectoryFromAssets(appCtx, srcSubPath, dstSubPath);
                } else {
                    copyFileFromAssets(appCtx, srcSubPath, dstSubPath);
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }


    public static String getSDCardDirectory() {
        return Environment.getExternalStorageDirectory().getAbsolutePath();
    }

    public static void rawToWave(String file, float[] data, int samplerate) throws IOException {
        // creating the empty wav file.
        File waveFile = new File(file);
        waveFile.createNewFile();
        //following block is converting raw to wav.
        DataOutputStream output = null;
        try {
            output = new DataOutputStream(new FileOutputStream(waveFile));
            // WAVE header
            // chunk id
            writeString(output, "RIFF");
            // chunk size
            writeInt(output, 36 + data.length * 2);
            // format
            writeString(output, "WAVE");
            // subchunk 1 id
            writeString(output, "fmt ");
            // subchunk 1 size
            writeInt(output, 16);
            // audio format (1 = PCM)
            writeShort(output, (short) 1);
            // number of channels
            writeShort(output, (short) 1);
            // sample rate
            writeInt(output, samplerate);
            // byte rate
            writeInt(output, samplerate * 2);
            // block align
            writeShort(output, (short) 2);
            // bits per sample
            writeShort(output, (short) 16);
            // subchunk 2 id
            writeString(output, "data");
            // subchunk 2 size
            writeInt(output, data.length * 2);
            short[] short_data = FloatArray2ShortArray(data);
            for (int i = 0; i < short_data.length; i++) {
                writeShort(output, short_data[i]);
            }
        } finally {
            if (output != null) {
                output.close();
            }
        }
    }

    private static void writeInt(final DataOutputStream output, final int value) throws IOException {
        output.write(value);
        output.write(value >> 8);
        output.write(value >> 16);
        output.write(value >> 24);
    }

    private static void writeShort(final DataOutputStream output, final short value) throws IOException {
        output.write(value);
        output.write(value >> 8);
    }

    private static void writeString(final DataOutputStream output, final String value) throws IOException {
        for (int i = 0; i < value.length(); i++) {
            output.write(value.charAt(i));
        }
    }

    public static short[] FloatArray2ShortArray(float[] values) {
        float mmax = (float) 0.01;
        short[] ret = new short[values.length];

        for (int i = 0; i < values.length; i++) {
            if (abs(values[i]) > mmax) {
                mmax = abs(values[i]);
            }
        }

        for (int i = 0; i < values.length; i++) {
            values[i] = values[i] * (32767 / mmax);
            ret[i] = (short) (values[i]);
        }
        return ret;
    }

}


================================================
FILE: demos/TTSAndroid/app/src/main/res/drawable/button_drawable.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<selector xmlns:android="http://schemas.android.com/apk/res/android">
    <item android:state_pressed="false"><!--没点击按钮的时候-->
        <shape android:shape="rectangle"><!--按钮形状-->
            <solid android:color="#008577" /><!--按钮背景填充色-->
            <corners android:radius="10dp" />
            <stroke android:width="1dp" android:color="#009688" /><!--按钮边框-->
        </shape>
    </item>

    <item android:state_pressed="true">
        <shape android:shape="rectangle"><!--按钮形状-->
            <solid android:color="#C3009688" /><!--按钮背景填充色-->
            <corners android:radius="10dp" />
            <stroke android:width="1dp" android:color="#009688" /><!--按钮边框-->
        </shape>
    </item>

</selector>


================================================
FILE: demos/TTSAndroid/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<android.support.constraint.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <RelativeLayout
        android:layout_width="match_parent"
        android:layout_height="match_parent">

        <ImageView
            android:id="@+id/logo"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"
            android:layout_marginTop="20dp"
            android:src="@drawable/paddlespeech_logo" />

        <LinearLayout
            android:id="@+id/v_input_info"
            android:layout_width="fill_parent"
            android:layout_height="wrap_content"
            android:layout_below="@+id/logo"
            android:layout_alignParentTop="true"
            android:layout_marginTop="120dp"
            android:orientation="vertical">

            <TextView
                android:id="@+id/tv_input_setting"
                android:layout_width="wrap_content"
                android:layout_height="wrap_content"
                android:layout_marginLeft="12dp"
                android:layout_marginTop="10dp"
                android:layout_marginRight="12dp"
                android:layout_marginBottom="5dp"
                android:lineSpacingExtra="4dp"
                android:maxLines="6"
                android:scrollbars="vertical"
                android:singleLine="false"
                android:text=""
                android:textColor="#3C3C3C" />

            <Spinner
                android:id="@+id/spinner1"
                android:layout_width="wrap_content"
                android:layout_height="wrap_content"
                android:dropDownSelector="#63D81B60"
                android:spinnerMode="dropdown" />

            <TextView
                android:id="@+id/tv_inference_time"
                android:layout_width="wrap_content"
                android:layout_height="wrap_content"
                android:layout_below="@+id/spinner1"
                android:layout_centerHorizontal="true"
                android:layout_centerVertical="true"
                android:layout_marginLeft="12dp"
                android:layout_marginTop="50dp"
                android:layout_marginRight="12dp"
                android:layout_marginBottom="5dp"
                android:gravity="start"
                android:lineSpacingExtra="4dp"
                android:maxLines="6"
                android:textColor="#3C3C3C" />

            <LinearLayout
                android:id="@+id/btns"
                android:layout_width="match_parent"
                android:layout_height="match_parent"
                android:layout_below="@+id/tv_inference_time"
                android:layout_marginLeft="10dp"
                android:layout_marginTop="30dp">

                <Button
                    android:id="@+id/btn_play"
                    android:layout_width="60dp"
                    android:layout_height="40dp"
                    android:background="@drawable/button_drawable"
                    android:text="Play"
                    android:textAllCaps="false"
                    android:textColor="#ffffff" />

                <Button
                    android:id="@+id/btn_pause"
                    android:layout_width="60dp"
                    android:layout_height="40dp"
                    android:layout_marginLeft="3dp"
                    android:background="@drawable/button_drawable"
                    android:text="Pause"
                    android:textAllCaps="false"
                    android:textColor="#ffffff" />

                <Button
                    android:id="@+id/btn_stop"
                    android:layout_width="60dp"
                    android:layout_height="40dp"
                    android:layout_marginLeft="3dp"
                    android:background="@drawable/button_drawable"
                    android:text="Stop"
                    android:textAllCaps="false"
                    android:textColor="#ffffff" />

            </LinearLayout>


        </LinearLayout>


    </RelativeLayout>


</android.support.constraint.ConstraintLayout>

================================================
FILE: demos/TTSAndroid/app/src/main/res/menu/menu_action_options.xml
================================================
<menu xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto">
    <group>
        <item
            android:id="@+id/settings"
            android:title="Settings..."
            app:showAsAction="withText" />
    </group>
</menu>


================================================
FILE: demos/TTSAndroid/app/src/main/res/values/arrays.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <string-array name="cpu_thread_num_entries">
        <item>1 threads</item>
        <item>2 threads</item>
        <item>4 threads</item>
        <item>8 threads</item>
    </string-array>
    <string-array name="cpu_thread_num_values">
        <item>1</item>
        <item>2</item>
        <item>4</item>
        <item>8</item>
    </string-array>
    <string-array name="cpu_power_mode_entries">
        <item>HIGH(only big cores)</item>
        <item>LOW(only LITTLE cores)</item>
        <item>FULL(all cores)</item>
        <item>NO_BIND(depends on system)</item>
        <item>RAND_HIGH</item>
        <item>RAND_LOW</item>
    </string-array>
    <string-array name="cpu_power_mode_values">
        <item>LITE_POWER_HIGH</item>
        <item>LITE_POWER_LOW</item>
        <item>LITE_POWER_FULL</item>
        <item>LITE_POWER_NO_BIND</item>
        <item>LITE_POWER_RAND_HIGH</item>
        <item>LITE_POWER_RAND_LOW</item>
    </string-array>
    <string-array name="text">
        <item>Please select a sentence to be synthesized</item>
        <item>昨日，这名“伤者”与医生全部被警方依法刑事拘留。</item>
        <item>钱伟长想到上海来办学校是经过深思熟虑的。</item>
        <item>她见我一进门就骂，吃饭时也骂，骂得我抬不起头。</item>
        <item>李述德在离开之前，只说了一句“柱驼杀父亲了”。</item>
        <item>这种车票和保险单捆绑出售属于重复性购买。</item>
        <item>戴佩妮的男友西米露接唱情歌，让她非常开心。</item>
        <item>观大势、谋大局、出大策始终是该院的办院方针。</item>
        <item>他们骑着摩托回家，正好为农忙时的父母帮忙。</item>
        <item>但是因为还没到退休年龄，只能掰着指头捱日子。</item>
        <item>这几天雨水不断，人们恨不得待在家里不出门。</item>
    </string-array>
</resources>

================================================
FILE: demos/TTSAndroid/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="colorPrimary">#008577</color>
    <color name="colorPrimaryDark">#00574B</color>
    <color name="colorAccent">#D81B60</color>
</resources>


================================================
FILE: demos/TTSAndroid/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">TTS</string>
    <string name="CHOOSE_PRE_INSTALLED_MODEL_KEY">CHOOSE_PRE_INSTALLED_MODEL_KEY</string>
    <string name="ENABLE_CUSTOM_SETTINGS_KEY">ENABLE_CUSTOM_SETTINGS_KEY</string>
    <string name="MODEL_PATH_KEY">MODEL_PATH_KEY</string>
    <string name="CPU_THREAD_NUM_KEY">CPU_THREAD_NUM_KEY</string>
    <string name="CPU_POWER_MODE_KEY">CPU_POWER_MODE_KEY</string>
    <string name="MODEL_PATH_DEFAULT">models/cpu</string>
    <string name="CPU_THREAD_NUM_DEFAULT">1</string>
    <string name="CPU_POWER_MODE_DEFAULT">LITE_POWER_HIGH</string>
</resources>


================================================
FILE: demos/TTSAndroid/app/src/main/res/values/styles.xml
================================================
<resources>

    <!-- Base application theme. -->
    <style name="AppTheme" parent="Theme.AppCompat.Light.DarkActionBar">
        <!-- Customize your theme here. -->
        <item name="colorPrimary">@color/colorPrimary</item>
        <item name="colorPrimaryDark">@color/colorPrimaryDark</item>
        <item name="colorAccent">@color/colorAccent</item>
        <item name="actionOverflowMenuStyle">@style/OverflowMenuStyle</item>
    </style>

    <style name="OverflowMenuStyle" parent="Widget.AppCompat.Light.PopupMenu.Overflow">
        <item name="overlapAnchor">false</item>
    </style>

</resources>


================================================
FILE: demos/TTSAndroid/app/src/main/res/xml/settings.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<PreferenceScreen xmlns:android="http://schemas.android.com/apk/res/android">
    <PreferenceCategory android:title="Model Settings">
        <ListPreference
            android:defaultValue="@string/MODEL_PATH_DEFAULT"
            android:key="@string/CHOOSE_PRE_INSTALLED_MODEL_KEY"
            android:negativeButtonText="@null"
            android:positiveButtonText="@null"
            android:title="Choose pre-installed models" />
        <CheckBoxPreference
            android:defaultValue="false"
            android:key="@string/ENABLE_CUSTOM_SETTINGS_KEY"
            android:summaryOff="Disable"
            android:summaryOn="Enable"
            android:title="Enable custom settings" />
        <EditTextPreference
            android:defaultValue="@string/MODEL_PATH_DEFAULT"
            android:key="@string/MODEL_PATH_KEY"
            android:title="Model Path" />
    </PreferenceCategory>
    <PreferenceCategory android:title="CPU Settings">
        <ListPreference
            android:defaultValue="@string/CPU_THREAD_NUM_DEFAULT"
            android:entries="@array/cpu_thread_num_entries"
            android:entryValues="@array/cpu_thread_num_values"
            android:key="@string/CPU_THREAD_NUM_KEY"
            android:negativeButtonText="@null"
            android:positiveButtonText="@null"
            android:title="CPU Thread Num" />
        <ListPreference
            android:defaultValue="@string/CPU_POWER_MODE_DEFAULT"
            android:entries="@array/cpu_power_mode_entries"
            android:entryValues="@array/cpu_power_mode_values"
            android:key="@string/CPU_POWER_MODE_KEY"
            android:negativeButtonText="@null"
            android:positiveButtonText="@null"
            android:title="CPU Power Mode" />
    </PreferenceCategory>
</PreferenceScreen>


================================================
FILE: demos/TTSAndroid/app/src/test/java/com/baidu/paddle/lite/demo/tts/ExampleUnitTest.java
================================================
package com.baidu.paddle.lite.demo.tts;

import static org.junit.Assert.assertEquals;

import org.junit.Test;

/**
 * Example local unit test, which will execute on the development machine (host).
 *
 * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
 */
public class ExampleUnitTest {
    @Test
    public void addition_isCorrect() {
        assertEquals(4, 2 + 2);
    }
}

================================================
FILE: demos/TTSAndroid/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.

buildscript {
    repositories {
        google()
        jcenter()

    }
    dependencies {
        classpath 'com.android.tools.build:gradle:4.1.0'

        // NOTE: Do not place your application dependencies here; they belong
        // in the individual module build.gradle files
    }
}

allprojects {
    repositories {
        google()
        jcenter()

    }
}

task clean(type: Delete) {
    delete rootProject.buildDir
}


================================================
FILE: demos/TTSAndroid/gradle/wrapper/gradle-wrapper.properties
================================================
#Wed Jun 16 14:31:28 CST 2021
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.0-all.zip


================================================
FILE: demos/TTSAndroid/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx1536m
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true


================================================
FILE: demos/TTSAndroid/gradlew
================================================
#!/usr/bin/env sh

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS=""

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar

# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=$((i+1))
    done
    case $i in
        (0) set -- ;;
        (1) set -- "$args0" ;;
        (2) set -- "$args0" "$args1" ;;
        (3) set -- "$args0" "$args1" "$args2" ;;
        (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=$(save "$@")

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
  cd "$(dirname "$0")"
fi

exec "$JAVACMD" "$@"


================================================
FILE: demos/TTSAndroid/gradlew.bat
================================================
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS=

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto init

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:init
@rem Get command-line arguments, handling Windows variants

if not "%OS%" == "Windows_NT" goto win9xME_args

:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2

:win9xME_args_slurp
if "x%~1" == "x" goto execute

set CMD_LINE_ARGS=%*

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar

@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: demos/TTSAndroid/settings.gradle
================================================
include ':app'


================================================
FILE: demos/TTSArmLinux/.gitignore
================================================
# 目录
build/
output/
libs/
models/

# 符号连接
dict


================================================
FILE: demos/TTSArmLinux/README.md
================================================
# TTS ARM Linux C++ Demo

修改自 [demos/TTSAndroid](../TTSAndroid)，模型也来自该安卓 Demo。

### 配置编译选项

打开 [config.sh](config.sh) 按需修改配置。

默认编译 64 位版本，如果要编译 32 位版本，把 `ARM_ABI=armv8` 改成 `ARM_ABI=armv7hf` 。

### 安装依赖

```bash
# Ubuntu
sudo apt install build-essential cmake pkg-config wget tar unzip

# CentOS
sudo yum groupinstall "Development Tools"
sudo yum install cmake wget tar unzip
```

### 下载 Paddle Lite 库文件和模型文件

预编译的二进制使用与安卓 Demo 版本相同的 Paddle Lite 推理库（[Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd356c875c92167d311ad458e6093078449)）和模型（[fs2cnn_mbmelgan_cpu_v1.3.0](https://paddlespeech.cdn.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz)）。

可用以下命令下载：

```bash
./download.sh
```

### 编译 Demo

```bash
./build.sh
```

预编译的二进制兼容 Ubuntu 16.04 到 20.04。

如果编译或链接失败，说明发行版与预编译库不兼容，请尝试手动编译 Paddle Lite 库，具体步骤在最下面。

### 运行

你可以修改 `./front.conf` 中 `--phone2id_path` 参数为你自己的声学模型的 `phone_id_map.txt` 。

```bash
./run.sh
./run.sh --sentence "语音合成测试"
./run.sh --sentence "输出到指定的音频文件" --output_wav ./output/test.wav
./run.sh --help
```

目前只支持中文合成，出现任何英文都会导致程序崩溃。

如果未指定`--wav_file`，默认输出到`./output/tts.wav`。

## 手动编译 Paddle Lite 库

预编译的二进制兼容 Ubuntu 16.04 到 20.04，如果你的发行版与其不兼容，可以自行从源代码编译。

注意，我们只能保证 [Paddle-Lite:68b66fd35](https://github.com/PaddlePaddle/Paddle-Lite/tree/68b66fd356c875c92167d311ad458e6093078449) 与通过 `download.sh` 下载的模型兼容。
如果使用其他版本的 Paddle Lite 库，可能需要用对应版本的 opt 工具重新导出模型。

此外，[Paddle-Lite 2.12](https://github.com/PaddlePaddle/Paddle-Lite/releases/tag/v2.12) 与 TTS 不兼容，无法导出或运行 TTS 模型，需要使用更新的版本（比如 `develop` 分支中的代码）。
但 `develop` 分支中的代码可能与通过 `download.sh` 下载的模型不兼容，Demo 运行起来可能会崩溃。

### 安装 Paddle Lite 的编译依赖

```bash
# Ubuntu
sudo apt install build-essential cmake git python

# CentOS
sudo yum groupinstall "Development Tools"
sudo yum install cmake git python
```

### 编译 Paddle Lite 68b66fd35

```
git clone https://github.com/PaddlePaddle/Paddle-Lite.git
cd Paddle-Lite
git checkout 68b66fd356c875c92167d311ad458e6093078449
./lite/tools/build_linux.sh --with_extra=ON
```

编译完成后，打开 Demo 的 [config.sh](config.sh)，把 `PADDLE_LITE_DIR` 改成以下值即可（注意替换 `/path/to/` 为实际目录）：

```
PADDLE_LITE_DIR="/path/to/Paddle-Lite/build.lite.linux.${ARM_ABI}.gcc/inference_lite_lib.armlinux.${ARM_ABI}/cxx"
```


================================================
FILE: demos/TTSArmLinux/build.sh
================================================
#!/bin/bash
set -e
set -x

cd "$(dirname "$(realpath "$0")")"

BASE_DIR="$PWD"

# load configure
. ./config.sh

# build
echo "ARM_ABI is ${ARM_ABI}"
echo "PADDLE_LITE_DIR is ${PADDLE_LITE_DIR}"

echo "Build depends..."
./build-depends.sh "$@"

mkdir -p "$BASE_DIR/build"
cd "$BASE_DIR/build"
cmake -DPADDLE_LITE_DIR="${PADDLE_LITE_DIR}" -DARM_ABI="${ARM_ABI}" ../src

if [ "$*" = "" ]; then
    make -j$(nproc)
else
    make "$@"
fi

echo "make successful!"


================================================
FILE: demos/TTSArmLinux/clean.sh
================================================
#!/bin/bash
set -e
set -x

cd "$(dirname "$(realpath "$0")")"

BASE_DIR="$PWD"

# load configure
. ./config.sh

# remove dirs
set -x

rm -rf "$OUTPUT_DIR"
rm -rf "$LIBS_DIR"
rm -rf "$MODELS_DIR"
rm -rf "$BASE_DIR/build"

"$BASE_DIR/src/TTSCppFrontend/clean.sh"

# 符号连接
rm "$BASE_DIR/dict"


================================================
FILE: demos/TTSArmLinux/config.sh
================================================
# configuration

ARM_ABI=armv8
#ARM_ABI=armv7hf

MODELS_DIR="${PWD}/models"
LIBS_DIR="${PWD}/libs"
OUTPUT_DIR="${PWD}/output"

PADDLE_LITE_DIR="${LIBS_DIR}/inference_lite_lib.armlinux.${ARM_ABI}.gcc.with_extra.with_cv/cxx"
#PADDLE_LITE_DIR="/path/to/Paddle-Lite/build.lite.linux.${ARM_ABI}.gcc/inference_lite_lib.armlinux.${ARM_ABI}/cxx"

ACOUSTIC_MODEL_PATH="${MODELS_DIR}/cpu/fastspeech2_csmsc_arm.nb"
VOCODER_PATH="${MODELS_DIR}/cpu/mb_melgan_csmsc_arm.nb"
FRONT_CONF="${PWD}/front.conf"


================================================
FILE: demos/TTSArmLinux/download.sh
================================================
#!/bin/bash
set -e

cd "$(dirname "$(realpath "$0")")"

BASE_DIR="$PWD"

# load configure
. ./config.sh

mkdir -p "$LIBS_DIR" "$MODELS_DIR"

download() {
    file="$1"
    url="$2"
    md5="$3"
    dir="$4"

    cd "$dir"

    if [ -f "$file" ] && [ "$(md5sum "$file" | awk '{ print $1 }')" = "$md5" ]; then
        echo "File $file (MD5: $md5) has been downloaded."
    else
        echo "Downloading $file..."
        wget -O "$file" "$url"

        # MD5 verify
        fileMd5="$(md5sum "$file" | awk '{ print $1 }')"
        if [ "$fileMd5" == "$md5" ]; then
            echo "File $file (MD5: $md5) has been downloaded."
        else
            echo "MD5 mismatch, file may be corrupt"
            echo "$file MD5: $fileMd5, it should be $md5"
        fi
    fi

    echo "Extracting $file..."
    echo '-----------------------'
    tar -vxf "$file"
    echo '======================='
}

########################################

echo "Download models..."

download 'inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
    'https://paddlespeech.cdn.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv8.gcc.with_extra.with_cv.tar.gz' \
    '39e0c6604f97c70f5d13c573d7e709b9' \
    "$LIBS_DIR"

download 'inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \
    'https://paddlespeech.cdn.bcebos.com/demos/TTSArmLinux/inference_lite_lib.armlinux.armv7hf.gcc.with_extra.with_cv.tar.gz' \
    'f5ceb509f0b610dafb8379889c5f36f8' \
    "$LIBS_DIR"

download 'fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
    'https://paddlespeech.cdn.bcebos.com/demos/TTSAndroid/fs2cnn_mbmelgan_cpu_v1.3.0.tar.gz' \
    '93ef17d44b498aff3bea93e2c5c09a1e' \
    "$MODELS_DIR"

echo "Done."

########################################

echo "Download dictionary files..."

ln -s src/TTSCppFrontend/front_demo/dict "$BASE_DIR/"

"$BASE_DIR/src/TTSCppFrontend/download.sh"


================================================
FILE: demos/TTSArmLinux/front.conf
================================================
# jieba conf
--jieba_dict_path=./dict/jieba/jieba.dict.utf8
--jieba_hmm_path=./dict/jieba/hmm_model.utf8
--jieba_user_dict_path=./dict/jieba/user.dict.utf8
--jieba_idf_path=./dict/jieba/idf.utf8
--jieba_stop_word_path=./dict/jieba/stop_words.utf8

# dict conf fastspeech2_0.4
--separate_tone=false
--word2phone_path=./dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict
--phone2id_path=./dict/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt
--tone2id_path=./dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict

# dict conf speedyspeech_0.5
#--separate_tone=true
#--word2phone_path=./dict/speedyspeech_nosil_baker_ckpt_0.5/word2phone.dict
#--phone2id_path=./dict/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt
#--tone2id_path=./dict/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt

# dict of tranditional_to_simplified
--trand2simpd_path=./dict/tranditional_to_simplified/trand2simp.txt


================================================
FILE: demos/TTSArmLinux/run.sh
================================================
#!/bin/bash
set -e

cd "$(dirname "$(realpath "$0")")"

# load configure
. ./config.sh

# create dir
mkdir -p "$OUTPUT_DIR"

# run
set -x
./build/paddlespeech_tts_demo \
    --front_conf "$FRONT_CONF" \
    --acoustic_model "$ACOUSTIC_MODEL_PATH" \
    --vocoder "$VOCODER_PATH" \
    "$@"
# end


================================================
FILE: demos/TTSArmLinux/src/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.10)
project(paddlespeech_tts_demo)


########## Global Options ##########

option(WITH_FRONT_DEMO "Build front demo" OFF)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(ABSL_PROPAGATE_CXX_STD ON)


########## ARM Options ##########

set(CMAKE_SYSTEM_NAME Linux)
if(ARM_ABI STREQUAL "armv8")
    set(CMAKE_SYSTEM_PROCESSOR aarch64)
    #set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc")
    #set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++")
elseif(ARM_ABI STREQUAL "armv7hf")
    set(CMAKE_SYSTEM_PROCESSOR arm)
    #set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
    #set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
else()
    message(FATAL_ERROR "Unknown arch abi ${ARM_ABI}, only support armv8 and armv7hf.")
    return()
endif()


########## Paddle Lite Options ##########

message(STATUS "TARGET ARCH ABI: ${ARM_ABI}")
message(STATUS "PADDLE LITE DIR: ${PADDLE_LITE_DIR}")

include_directories(${PADDLE_LITE_DIR}/include)
link_directories(${PADDLE_LITE_DIR}/libs/${ARM_ABI})
link_directories(${PADDLE_LITE_DIR}/lib)

if(ARM_ABI STREQUAL "armv8")
    set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}")
    set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}")
elseif(ARM_ABI STREQUAL "armv7hf")
    set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
    set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" )
endif()


########## Dependencies ##########

find_package(OpenMP REQUIRED)
if(OpenMP_FOUND OR OpenMP_CXX_FOUND)
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
    message(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}")
    message(STATUS "OpenMP C flags:  ${OpenMP_C_FLAGS}")
    message(STATUS "OpenMP CXX flags:  ${OpenMP_CXX_FLAGS}")
    message(STATUS "OpenMP OpenMP_CXX_LIB_NAMES:  ${OpenMP_CXX_LIB_NAMES}")
    message(STATUS "OpenMP OpenMP_CXX_LIBRARIES:  ${OpenMP_CXX_LIBRARIES}")
else()
    message(FATAL_ERROR "Could not found OpenMP!")
    return()
endif()


############### tts cpp frontend ###############

add_subdirectory(TTSCppFrontend)

include_directories(
    TTSCppFrontend/src
    third-party/build/src/cppjieba/include
    third-party/build/src/limonp/include
)


############### paddlespeech_tts_demo ###############

add_executable(paddlespeech_tts_demo main.cc)
target_link_libraries(paddlespeech_tts_demo paddle_light_api_shared paddlespeech_tts_front)


================================================
FILE: demos/TTSArmLinux/src/Predictor.hpp
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <chrono>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
#include "paddle_api.h"

using namespace paddle::lite_api;

class PredictorInterface {
  public:
    virtual ~PredictorInterface() = 0;
    virtual bool Init(const std::string &AcousticModelPath,
                      const std::string &VocoderPath,
                      PowerMode cpuPowerMode,
                      int cpuThreadNum,
                      // WAV采样率（必须与模型输出匹配）
                      // 如果播放速度和音调异常，请修改采样率
                      // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
                      uint32_t wavSampleRate) = 0;
    virtual std::shared_ptr<PaddlePredictor> LoadModel(
        const std::string &modelPath,
        int cpuThreadNum,
        PowerMode cpuPowerMode) = 0;
    virtual void ReleaseModel() = 0;
    virtual bool RunModel(const std::vector<int64_t> &phones) = 0;
    virtual std::unique_ptr<const Tensor> GetAcousticModelOutput(
        const std::vector<int64_t> &phones) = 0;
    virtual std::unique_ptr<const Tensor> GetVocoderOutput(
        std::unique_ptr<const Tensor> &&amOutput) = 0;
    virtual void VocoderOutputToWav(
        std::unique_ptr<const Tensor> &&vocOutput) = 0;
    virtual void SaveFloatWav(float *floatWav, int64_t size) = 0;
    virtual bool IsLoaded() = 0;
    virtual float GetInferenceTime() = 0;
    virtual int GetWavSize() = 0;
    // 获取WAV持续时间（单位：毫秒）
    virtual float GetWavDuration() = 0;
    // 获取RTF（合成时间 / 音频时长）
    virtual float GetRTF() = 0;
    virtual void ReleaseWav() = 0;
    virtual bool WriteWavToFile(const std::string &wavPath) = 0;
};

PredictorInterface::~PredictorInterface() {}

// WavDataType: WAV数据类型
// 可在 int16_t 和 float 之间切换，
// 用于生成 16-bit PCM 或 32-bit IEEE float 格式的 WAV
template <typename WavDataType>
class Predictor : public PredictorInterface {
  public:
    bool Init(const std::string &AcousticModelPath,
              const std::string &VocoderPath,
              PowerMode cpuPowerMode,
              int cpuThreadNum,
              // WAV采样率（必须与模型输出匹配）
              // 如果播放速度和音调异常，请修改采样率
              // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
              uint32_t wavSampleRate) override {
        // Release model if exists
        ReleaseModel();

        acoustic_model_predictor_ =
            LoadModel(AcousticModelPath, cpuThreadNum, cpuPowerMode);
        if (acoustic_model_predictor_ == nullptr) {
            return false;
        }
        vocoder_predictor_ = LoadModel(VocoderPath, cpuThreadNum, cpuPowerMode);
        if (vocoder_predictor_ == nullptr) {
            return false;
        }

        wav_sample_rate_ = wavSampleRate;

        return true;
    }

    virtual ~Predictor() {
        ReleaseModel();
        ReleaseWav();
    }

    std::shared_ptr<PaddlePredictor> LoadModel(
        const std::string &modelPath,
        int cpuThreadNum,
        PowerMode cpuPowerMode) override {
        if (modelPath.empty()) {
            return nullptr;
        }

        // 设置MobileConfig
        MobileConfig config;
        config.set_model_from_file(modelPath);
        config.set_threads(cpuThreadNum);
        config.set_power_mode(cpuPowerMode);

        return CreatePaddlePredictor<MobileConfig>(config);
    }

    void ReleaseModel() override {
        acoustic_model_predictor_ = nullptr;
        vocoder_predictor_ = nullptr;
    }

    bool RunModel(const std::vector<int64_t> &phones) override {
        if (!IsLoaded()) {
            return false;
        }

        // 计时开始
        auto start = std::chrono::system_clock::now();

        // 执行推理
        VocoderOutputToWav(GetVocoderOutput(GetAcousticModelOutput(phones)));

        // 计时结束
        auto end = std::chrono::system_clock::now();

        // 计算用时
        std::chrono::duration<float> duration = end - start;
        inference_time_ = duration.count() * 1000;  // 单位：毫秒

        return true;
    }

    std::unique_ptr<const Tensor> GetAcousticModelOutput(
        const std::vector<int64_t> &phones) override {
        auto phones_handle = acoustic_model_predictor_->GetInput(0);
        phones_handle->Resize({static_cast<int64_t>(phones.size())});
        phones_handle->CopyFromCpu(phones.data());
        acoustic_model_predictor_->Run();

        // 获取输出Tensor
        auto am_output_handle = acoustic_model_predictor_->GetOutput(0);
        // 打印输出Tensor的shape
        std::cout << "Acoustic Model Output shape: ";
        auto shape = am_output_handle->shape();
        for (auto s : shape) {
            std::cout << s << ", ";
        }
        std::cout << std::endl;

        return am_output_handle;
    }

    std::unique_ptr<const Tensor> GetVocoderOutput(
        std::unique_ptr<const Tensor> &&amOutput) override {
        auto mel_handle = vocoder_predictor_->GetInput(0);
        // [?, 80]
        auto dims = amOutput->shape();
        mel_handle->Resize(dims);
        auto am_output_data = amOutput->mutable_data<float>();
        mel_handle->CopyFromCpu(am_output_data);
        vocoder_predictor_->Run();

        // 获取输出Tensor
        auto voc_output_handle = vocoder_predictor_->GetOutput(0);
        // 打印输出Tensor的shape
        std::cout << "Vocoder Output shape: ";
        auto shape = voc_output_handle->shape();
        for (auto s : shape) {
            std::cout << s << ", ";
        }
        std::cout << std::endl;

        return voc_output_handle;
    }

    void VocoderOutputToWav(
        std::unique_ptr<const Tensor> &&vocOutput) override {
        // 获取输出Tensor的数据
        int64_t output_size = 1;
        for (auto dim : vocOutput->shape()) {
            output_size *= dim;
        }
        auto output_data = vocOutput->mutable_data<float>();

        SaveFloatWav(output_data, output_size);
    }

    void SaveFloatWav(float *floatWav, int64_t size) override;

    bool IsLoaded() override {
        return acoustic_model_predictor_ != nullptr &&
               vocoder_predictor_ != nullptr;
    }

    float GetInferenceTime() override { return inference_time_; }

    const std::vector<WavDataType> &GetWav() { return wav_; }

    int GetWavSize() override { return wav_.size() * sizeof(WavDataType); }

    // 获取WAV持续时间（单位：毫秒）
    float GetWavDuration() override {
        return static_cast<float>(GetWavSize()) / sizeof(WavDataType) /
               static_cast<float>(wav_sample_rate_) * 1000;
    }

    // 获取RTF（合成时间 / 音频时长）
    float GetRTF() override { return GetInferenceTime() / GetWavDuration(); }

    void ReleaseWav() override { wav_.clear(); }

    bool WriteWavToFile(const std::string &wavPath) override {
        std::ofstream fout(wavPath, std::ios::binary);
        if (!fout.is_open()) {
            return false;
        }

        // 写入头信息
        WavHeader header;
        header.audio_format = GetWavAudioFormat();
        header.data_size = GetWavSize();
        header.size = sizeof(header) - 8 + header.data_size;
        header.sample_rate = wav_sample_rate_;
        header.byte_rate = header.sample_rate * header.num_channels *
                           header.bits_per_sample / 8;
        header.block_align = header.num_channels * header.bits_per_sample / 8;
        fout.write(reinterpret_cast<const char *>(&header), sizeof(header));

        // 写入wav数据
        fout.write(reinterpret_cast<const char *>(wav_.data()),
                   header.data_size);

        fout.close();
        return true;
    }

  protected:
    struct WavHeader {
        // RIFF 头
        char riff[4] = {'R', 'I', 'F', 'F'};
        uint32_t size = 0;
        char wave[4] = {'W', 'A', 'V', 'E'};

        // FMT 头
        char fmt[4] = {'f', 'm', 't', ' '};
        uint32_t fmt_size = 16;
        uint16_t audio_format = 0;
        uint16_t num_channels = 1;
        uint32_t sample_rate = 0;
        uint32_t byte_rate = 0;
        uint16_t block_align = 0;
        uint16_t bits_per_sample = sizeof(WavDataType) * 8;

        // DATA 头
        char data[4] = {'d', 'a', 't', 'a'};
        uint32_t data_size = 0;
    };

    enum WavAudioFormat {
        WAV_FORMAT_16BIT_PCM = 1,   // 16-bit PCM 格式
        WAV_FORMAT_32BIT_FLOAT = 3  // 32-bit IEEE float 格式
    };

  protected:
    // 返回值通过模板特化由 WavDataType 决定
    inline uint16_t GetWavAudioFormat();

    inline float Abs(float number) { return (number < 0) ? -number : number; }

  protected:
    float inference_time_ = 0;
    uint32_t wav_sample_rate_ = 0;
    std::vector<WavDataType> wav_;
    std::shared_ptr<PaddlePredictor> acoustic_model_predictor_ = nullptr;
    std::shared_ptr<PaddlePredictor> vocoder_predictor_ = nullptr;
};

template <>
uint16_t Predictor<int16_t>::GetWavAudioFormat() {
    return Predictor::WAV_FORMAT_16BIT_PCM;
}

template <>
uint16_t Predictor<float>::GetWavAudioFormat() {
    return Predictor::WAV_FORMAT_32BIT_FLOAT;
}

// 保存 16-bit PCM 格式 WAV
template <>
void Predictor<int16_t>::SaveFloatWav(float *floatWav, int64_t size) {
    wav_.resize(size);
    float maxSample = 0.01;
    // 寻找最大采样值
    for (int64_t i = 0; i < size; i++) {
        float sample = Abs(floatWav[i]);
        if (sample > maxSample) {
            maxSample = sample;
        }
    }
    // 把采样值缩放到 int_16 范围
    for (int64_t i = 0; i < size; i++) {
        wav_[i] = floatWav[i] * 32767.0f / maxSample;
    }
}

// 保存 32-bit IEEE float 格式 WAV
template <>
void Predictor<float>::SaveFloatWav(float *floatWav, int64_t size) {
    wav_.resize(size);
    std::copy_n(floatWav, size, wav_.data());
}


================================================
FILE: demos/TTSArmLinux/src/main.cc
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <front/front_interface.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <paddle_api.h>
#include <cstdlib>
#include <iostream>
#include <map>
#include <memory>
#include <string>
#include "Predictor.hpp"

using namespace paddle::lite_api;

DEFINE_string(
    sentence,
    "你好，欢迎使用语音合成服务",
    "Text to be synthesized (Chinese only. English will crash the program.)");
DEFINE_string(front_conf, "./front.conf", "Front configuration file");
DEFINE_string(acoustic_model,
              "./models/cpu/fastspeech2_csmsc_arm.nb",
              "Acoustic model .nb file");
DEFINE_string(vocoder,
              "./models/cpu/fastspeech2_csmsc_arm.nb",
              "vocoder .nb file");
DEFINE_string(output_wav, "./output/tts.wav", "Output WAV file");
DEFINE_string(wav_bit_depth,
              "16",
              "WAV bit depth, 16 (16-bit PCM) or 32 (32-bit IEEE float)");
DEFINE_string(wav_sample_rate,
              "24000",
              "WAV sample rate, should match the output of the vocoder");
DEFINE_string(cpu_thread, "1", "CPU thread numbers");

int main(int argc, char *argv[]) {
    gflags::ParseCommandLineFlags(&argc, &argv, true);

    PredictorInterface *predictor;

    if (FLAGS_wav_bit_depth == "16") {
        predictor = new Predictor<int16_t>();
    } else if (FLAGS_wav_bit_depth == "32") {
        predictor = new Predictor<float>();
    } else {
        LOG(ERROR) << "Unsupported WAV bit depth: " << FLAGS_wav_bit_depth;
        return -1;
    }


    /////////////////////////// 前端：文本转音素 ///////////////////////////

    // 实例化文本前端引擎
    ppspeech::FrontEngineInterface *front_inst = nullptr;
    front_inst = new ppspeech::FrontEngineInterface(FLAGS_front_conf);
    if ((!front_inst) || (front_inst->init())) {
        LOG(ERROR) << "Creater tts engine failed!";
        if (front_inst != nullptr) {
            delete front_inst;
        }
        front_inst = nullptr;
        return -1;
    }

    std::wstring ws_sentence = ppspeech::utf8string2wstring(FLAGS_sentence);

    // 繁体转简体
    std::wstring sentence_simp;
    front_inst->Trand2Simp(ws_sentence, &sentence_simp);
    ws_sentence = sentence_simp;

    std::string s_sentence;
    std::vector<std::wstring> sentence_part;
    std::vector<int> phoneids = {};
    std::vector<int> toneids = {};

    // 根据标点进行分句
    LOG(INFO) << "Start to segment sentences by punctuation";
    front_inst->SplitByPunc(ws_sentence, &sentence_part);
    LOG(INFO) << "Segment sentences through punctuation successfully";

    // 分句后获取音素id
    LOG(INFO)
        << "Start to get the phoneme and tone id sequence of each sentence";
    for (int i = 0; i < sentence_part.size(); i++) {
        LOG(INFO) << "Raw sentence is: "
                  << ppspeech::wstring2utf8string(sentence_part[i]);
        front_inst->SentenceNormalize(&sentence_part[i]);
        s_sentence = ppspeech::wstring2utf8string(sentence_part[i]);
        LOG(INFO) << "After normalization sentence is: " << s_sentence;

        if (0 != front_inst->GetSentenceIds(s_sentence, &phoneids, &toneids)) {
            LOG(ERROR) << "TTS inst get sentence phoneids and toneids failed";
            return -1;
        }
    }
    LOG(INFO) << "The phoneids of the sentence is: "
              << limonp::Join(phoneids.begin(), phoneids.end(), " ");
    LOG(INFO) << "The toneids of the sentence is: "
              << limonp::Join(toneids.begin(), toneids.end(), " ");
    LOG(INFO) << "Get the phoneme id sequence of each sentence successfully";


    /////////////////////////// 后端：音素转音频 ///////////////////////////

    // WAV采样率（必须与模型输出匹配）
    // 如果播放速度和音调异常，请修改采样率
    // 常见采样率：16000, 24000, 32000, 44100, 48000, 96000
    const uint32_t wavSampleRate = std::stoul(FLAGS_wav_sample_rate);

    // CPU线程数
    const int cpuThreadNum = std::stol(FLAGS_cpu_thread);

    // CPU电源模式
    const PowerMode cpuPowerMode = PowerMode::LITE_POWER_HIGH;

    if (!predictor->Init(FLAGS_acoustic_model,
                         FLAGS_vocoder,
                         cpuPowerMode,
                         cpuThreadNum,
                         wavSampleRate)) {
        LOG(ERROR) << "predictor init failed" << std::endl;
        return -1;
    }

    std::vector<int64_t> phones(phoneids.size());
    std::transform(phoneids.begin(), phoneids.end(), phones.begin(), [](int x) {
        return static_cast<int64_t>(x);
    });

    if (!predictor->RunModel(phones)) {
        LOG(ERROR) << "predictor run model failed" << std::endl;
        return -1;
    }

    LOG(INFO) << "Inference time: " << predictor->GetInferenceTime() << " ms, "
              << "WAV size (without header): " << predictor->GetWavSize()
              << " bytes, "
              << "WAV duration: " << predictor->GetWavDuration() << " ms, "
              << "RTF: " << predictor->GetRTF() << std::endl;

    if (!predictor->WriteWavToFile(FLAGS_output_wav)) {
        LOG(ERROR) << "write wav file failed" << std::endl;
        return -1;
    }

    delete predictor;

    return 0;
}


================================================
FILE: demos/TTSCppFrontend/.gitignore
================================================
build/
dict/


================================================
FILE: demos/TTSCppFrontend/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.10)
project(paddlespeech_tts_cpp)


########## Global Options ##########

option(WITH_FRONT_DEMO "Build front demo" ON)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(ABSL_PROPAGATE_CXX_STD ON)


########## Dependencies ##########

set(ENV{PKG_CONFIG_PATH} "${CMAKE_SOURCE_DIR}/third-party/build/lib/pkgconfig:${CMAKE_SOURCE_DIR}/third-party/build/lib64/pkgconfig")
find_package(PkgConfig REQUIRED)

# It is hard to load xxx-config.cmake in a custom location, so use pkgconfig instead.
pkg_check_modules(ABSL   REQUIRED absl_strings IMPORTED_TARGET)
pkg_check_modules(GFLAGS REQUIRED gflags       IMPORTED_TARGET)
pkg_check_modules(GLOG   REQUIRED libglog      IMPORTED_TARGET)

# load header-only libraries
include_directories(
    ${CMAKE_SOURCE_DIR}/third-party/build/src/cppjieba/include
    ${CMAKE_SOURCE_DIR}/third-party/build/src/limonp/include
)

find_package(Threads REQUIRED)


########## paddlespeech_tts_front ##########

include_directories(src)

file(GLOB FRONT_SOURCES
    ./src/base/*.cpp
    ./src/front/*.cpp
)
add_library(paddlespeech_tts_front STATIC ${FRONT_SOURCES})

target_link_libraries(
    paddlespeech_tts_front
    PUBLIC
    PkgConfig::GFLAGS
    PkgConfig::GLOG
    PkgConfig::ABSL
    Threads::Threads
)


########## tts_front_demo ##########

if (WITH_FRONT_DEMO)

    file(GLOB FRONT_DEMO_SOURCES front_demo/*.cpp)
    add_executable(tts_front_demo ${FRONT_DEMO_SOURCES})

    target_include_directories(tts_front_demo PRIVATE ./front_demo)
    target_link_libraries(tts_front_demo PRIVATE paddlespeech_tts_front)

endif (WITH_FRONT_DEMO)


================================================
FILE: demos/TTSCppFrontend/README.md
================================================
# PaddleSpeech TTS CPP Frontend

A TTS frontend that implements text-to-phoneme conversion.

Currently it only supports Chinese, any English word will crash the demo.

## Install Build Tools

```bash
# Ubuntu
sudo apt install build-essential cmake pkg-config

# CentOS
sudo yum groupinstall "Development Tools"
sudo yum install cmake
```

If your cmake version is too old, you can go here to download a precompiled new version: https://cmake.org/download/

## Build

```bash
# Build with all CPU cores
./build.sh

# Build with 1 core
./build.sh -j1
```

Dependent libraries will be automatically downloaded to the `third-party/build` folder.

If the download speed is too slow, you can open [third-party/CMakeLists.txt](third-party/CMakeLists.txt) and modify `GIT_REPOSITORY` URLs.

## Download dictionary files

```bash
./download.sh
```

## Run
You can change `--phone2id_path` in `./front_demo/front.conf` to the `phone_id_map.txt` of your own acoustic model.

```bash
./run_front_demo.sh
./run_front_demo.sh --help
./run_front_demo.sh --sentence "这是语音合成服务的文本前端，用于将文本转换为音素序号数组。"
./run_front_demo.sh --front_conf ./front_demo/front.conf --sentence "你还需要一个语音合成后端才能将其转换为实际的声音。"
```

## Clean

```bash
./clean.sh
```

The folders `front_demo/dict`, `build` and `third-party/build` will be deleted.


================================================
FILE: demos/TTSCppFrontend/build-depends.sh
================================================
#!/bin/bash
set -e
set -x

cd "$(dirname "$(realpath "$0")")"

cd ./third-party

mkdir -p build
cd build

cmake ..

if [ "$*" = "" ]; then
    make -j$(nproc)
else
    make "$@"
fi

echo "Done."


================================================
FILE: demos/TTSCppFrontend/build.sh
================================================
#!/bin/bash
set -e
set -x

cd "$(dirname "$(realpath "$0")")"

echo "************* Download & Build Dependencies *************"
./build-depends.sh "$@"

echo "************* Build Front Lib and Demo *************"
mkdir -p ./build
cd ./build
cmake ..

if [ "$*" = "" ]; then
    make -j$(nproc)
else
    make "$@"
fi

echo "Done."


================================================
FILE: demos/TTSCppFrontend/clean.sh
================================================
#!/bin/bash
set -e
set -x

cd "$(dirname "$(realpath "$0")")"
rm -rf "./front_demo/dict"
rm -rf "./build"
rm -rf "./third-party/build"

echo "Done."


================================================
FILE: demos/TTSCppFrontend/download.sh
================================================
#!/bin/bash
set -e

cd "$(dirname "$(realpath "$0")")"

download() {
    file="$1"
    url="$2"
    md5="$3"
    dir="$4"

    cd "$dir"

    if [ -f "$file" ] && [ "$(md5sum "$file" | awk '{ print $1 }')" = "$md5" ]; then
        echo "File $file (MD5: $md5) has been downloaded."
    else
        echo "Downloading $file..."
        wget -O "$file" "$url"

        # MD5 verify
        fileMd5="$(md5sum "$file" | awk '{ print $1 }')"
        if [ "$fileMd5" == "$md5" ]; then
            echo "File $file (MD5: $md5) has been downloaded."
        else
            echo "MD5 mismatch, file may be corrupt"
            echo "$file MD5: $fileMd5, it should be $md5"
        fi
    fi

    echo "Extracting $file..."
    echo '-----------------------'
    tar -vxf "$file"
    echo '======================='
}

########################################

DIST_DIR="$PWD/front_demo/dict"

mkdir -p "$DIST_DIR"

download 'fastspeech2_nosil_baker_ckpt_0.4.tar.gz' \
    'https://paddlespeech.cdn.bcebos.com/t2s/text_frontend/fastspeech2_nosil_baker_ckpt_0.4.tar.gz' \
    '7bf1bab1737375fa123c413eb429c573' \
    "$DIST_DIR"

download 'speedyspeech_nosil_baker_ckpt_0.5.tar.gz' \
    'https://paddlespeech.cdn.bcebos.com/t2s/text_frontend/speedyspeech_nosil_baker_ckpt_0.5.tar.gz' \
    '0b7754b21f324789aef469c61f4d5b8f' \
    "$DIST_DIR"

download 'jieba.tar.gz' \
    'https://paddlespeech.cdn.bcebos.com/t2s/text_frontend/jieba.tar.gz' \
    '6d30f426bd8c0025110a483f051315ca' \
    "$DIST_DIR"

download 'tranditional_to_simplified.tar.gz' \
    'https://paddlespeech.cdn.bcebos.com/t2s/text_frontend/tranditional_to_simplified.tar.gz' \
    '258f5b59d5ebfe96d02007ca1d274a7f' \
    "$DIST_DIR"

echo "Done."


================================================
FILE: demos/TTSCppFrontend/front_demo/front.conf
================================================
# jieba conf
--jieba_dict_path=./front_demo/dict/jieba/jieba.dict.utf8
--jieba_hmm_path=./front_demo/dict/jieba/hmm_model.utf8
--jieba_user_dict_path=./front_demo/dict/jieba/user.dict.utf8
--jieba_idf_path=./front_demo/dict/jieba/idf.utf8
--jieba_stop_word_path=./front_demo/dict/jieba/stop_words.utf8

# dict conf fastspeech2_0.4
--separate_tone=false
--word2phone_path=./front_demo/dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict
--phone2id_path=./front_demo/dict/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt
--tone2id_path=./front_demo/dict/fastspeech2_nosil_baker_ckpt_0.4/word2phone_fs2.dict

# dict conf speedyspeech_0.5
#--separate_tone=true
#--word2phone_path=./front_demo/dict/speedyspeech_nosil_baker_ckpt_0.5/word2phone.dict
#--phone2id_path=./front_demo/dict/speedyspeech_nosil_baker_ckpt_0.5/phone_id_map.txt
#--tone2id_path=./front_demo/dict/speedyspeech_nosil_baker_ckpt_0.5/tone_id_map.txt

# dict of tranditional_to_simplified
--trand2simpd_path=./front_demo/dict/tranditional_to_simplified/trand2simp.txt


================================================
FILE: demos/TTSCppFrontend/front_demo/front_demo.cpp
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gflags/gflags.h>
#include <glog/logging.h>
#include <map>
#include <string>
#include "front/front_interface.h"

DEFINE_string(sentence, "你好，欢迎使用语音合成服务", "Text to be synthesized");
DEFINE_string(front_conf, "./front_demo/front.conf", "Front conf file");
// DEFINE_string(separate_tone, "true", "If true, get phoneids and tonesid");


int main(int argc, char** argv) {
    gflags::ParseCommandLineFlags(&argc, &argv, true);
    // 实例化文本前端引擎
    ppspeech::FrontEngineInterface* front_inst = nullptr;
    front_inst = new ppspeech::FrontEngineInterface(FLAGS_front_conf);
    if ((!front_inst) || (front_inst->init())) {
        LOG(ERROR) << "Creater tts engine failed!";
        if (front_inst != nullptr) {
            delete front_inst;
        }
        front_inst = nullptr;
        return -1;
    }

    std::wstring ws_sentence = ppspeech::utf8string2wstring(FLAGS_sentence);

    // 繁体转简体
    std::wstring sentence_simp;
    front_inst->Trand2Simp(ws_sentence, &sentence_simp);
    ws_sentence = sentence_simp;

    std::string s_sentence;
    std::vector<std::wstring> sentence_part;
    std::vector<int> phoneids = {};
    std::vector<int> toneids = {};

    // 根据标点进行分句
    LOG(INFO) << "Start to segment sentences by punctuation";
    front_inst->SplitByPunc(ws_sentence, &sentence_part);
    LOG(INFO) << "Segment sentences through punctuation successfully";

    // 分句后获取音素id
    LOG(INFO)
        << "Start to get the phoneme and tone id sequence of each sentence";
    for (int i = 0; i < sentence_part.size(); i++) {
        LOG(INFO) << "Raw sentence is: "
                  << ppspeech::wstring2utf8string(sentence_part[i]);
        front_inst->SentenceNormalize(&sentence_part[i]);
        s_sentence = ppspeech::wstring2utf8string(sentence_part[i]);
        LOG(INFO) << "After normalization sentence is: " << s_sentence;

        if (0 != front_inst->GetSentenceIds(s_sentence, &phoneids, &toneids)) {
            LOG(ERROR) << "TTS inst get sentence phoneids and toneids failed";
            return -1;
        }
    }
    LOG(INFO) << "The phoneids of the sentence is: "
              << limonp::Join(phoneids.begin(), phoneids.end(), " ");
    LOG(INFO) << "The toneids of the sentence is: "
              << limonp::Join(toneids.begin(), toneids.end(), " ");
    LOG(INFO) << "Get the phoneme id sequence of each sentence successfully";

    return EXIT_SUCCESS;
}


================================================
FILE: demos/TTSCppFrontend/front_demo/gentools/gen_dict_paddlespeech.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import configparser

from paddlespeech.t2s.frontend.zh_frontend import Frontend


def get_phone(frontend,
              word,
              merge_sentences=True,
              print_info=False,
              robot=False,
              get_tone_ids=False):
    phonemes = frontend.get_phonemes(word, merge_sentences, print_info, robot)
    # Some optimizations
    phones, tones = frontend._get_phone_tone(phonemes[0], get_tone_ids)
    #print(type(phones), phones)
    #print(type(tones), tones)
    return phones, tones


def gen_word2phone_dict(frontend,
                        jieba_words_dict,
                        word2phone_dict,
                        get_tone=False):
    with open(jieba_words_dict, "r") as f1, open(word2phone_dict, "w+") as f2:
        for line in f1.readlines():
            word = line.split(" ")[0]
            phone, tone = get_phone(frontend, word, get_tone_ids=get_tone)
            phone_str = ""

            if tone:
                assert (len(phone) == len(tone))
                for i in range(len(tone)):
                    phone_tone = phone[i] + tone[i]
                    phone_str += (" " + phone_tone)
                phone_str = phone_str.strip("sp0").strip(" ")
            else:
                for x in phone:
                    phone_str += (" " + x)
                phone_str = phone_str.strip("sp").strip(" ")
            print(phone_str)
            f2.write(word + " " + phone_str + "\n")
    print("Generate word2phone dict successfully.")


def main():
    parser = argparse.ArgumentParser(description="Generate dictionary")
    parser.add_argument(
        "--config", type=str, default="./config.ini", help="config file.")
    parser.add_argument(
        "--am_type",
        type=str,
        default="fastspeech2",
        help="fastspeech2 or speedyspeech")
    args = parser.parse_args()

    # Read config
    cf = configparser.ConfigParser()
    cf.read(args.config)
    jieba_words_dict_file = cf.get("jieba",
                                   "jieba_words_dict")  # get words dict

    am_type = args.am_type
    if (am_type == "fastspeech2"):
        phone2id_dict_file = cf.get(am_type, "phone2id_dict")
        word2phone_dict_file = cf.get(am_type, "word2phone_dict")

        frontend = Frontend(phone_vocab_path=phone2id_dict_file)
        print("frontend done!")

        gen_word2phone_dict(
            frontend,
            jieba_words_dict_file,
            word2phone_dict_file,
            get_tone=False)

    elif (am_type == "speedyspeech"):
        phone2id_dict_file = cf.get(am_type, "phone2id_dict")
        tone2id_dict_file = cf.get(am_type, "tone2id_dict")
        word2phone_dict_file = cf.get(am_type, "word2phone_dict")

        frontend = Frontend(
            phone_vocab_path=phone2id_dict_file,
            tone_vocab_path=tone2id_dict_file)
        print("frontend done!")

        gen_word2phone_dict(
            frontend,
            jieba_words_dict_file,
            word2phone_dict_file,
            get_tone=True)

    else:
        print("Please set correct am type, fastspeech2 or speedyspeech.")


if __name__ == "__main__":
    main()


================================================
FILE: demos/TTSCppFrontend/front_demo/gentools/genid.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

PHONESFILE = "./dict/phones.txt"
PHONES_ID_FILE = "./dict/phonesid.dict"
TONESFILE = "./dict/tones.txt"
TONES_ID_FILE = "./dict/tonesid.dict"


def GenIdFile(file, idfile):
    id = 2
    with open(file, 'r') as f1, open(idfile, "w+") as f2:
        f2.write("<pad> 0\n")
        f2.write("<unk> 1\n")
        for line in f1.readlines():
            phone = line.strip()
            print(phone + " " + str(id) + "\n")
            f2.write(phone + " " + str(id) + "\n")
            id += 1


if __name__ == "__main__":
    GenIdFile(PHONESFILE, PHONES_ID_FILE)
    GenIdFile(TONESFILE, TONES_ID_FILE)


================================================
FILE: demos/TTSCppFrontend/front_demo/gentools/word2phones.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re

from pypinyin import lazy_pinyin
from pypinyin import Style

worddict = "./dict/jieba_part.dict.utf8"
newdict = "./dict/word_phones.dict"


def GenPhones(initials, finals, separate=True):

    phones = []
    for c, v in zip(initials, finals):
        if re.match(r'i\d', v):
            if c in ['z', 'c', 's']:
                v = re.sub('i', 'ii', v)
            elif c in ['zh', 'ch', 'sh', 'r']:
                v = re.sub('i', 'iii', v)
        if c:
            if separate is True:
                phones.append(c + '0')
            elif separate is False:
                phones.append(c)
            else:
                print("Not sure whether phone and tone need to be separated")
        if v:
            phones.append(v)
    return phones


with open(worddict, "r") as f1, open(newdict, "w+") as f2:
    for line in f1.readlines():
        word = line.split(" ")[0]
        initials = lazy_pinyin(
            word, neutral_tone_with_five=True, style=Style.INITIALS)
        finals = lazy_pinyin(
            word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)

        phones = GenPhones(initials, finals, True)

        temp = " ".join(phones)
        f2.write(word + " " + temp + "\n")


================================================
FILE: demos/TTSCppFrontend/run_front_demo.sh
================================================
#!/bin/bash
set -e
set -x

cd "$(dirname "$(realpath "$0")")"

./build/tts_front_demo "$@"


================================================
FILE: demos/TTSCppFrontend/src/base/type_conv.cpp
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "base/type_conv.h"

namespace ppspeech {
// wstring to string
std::string wstring2utf8string(const std::wstring& str) {
    static std::wstring_convert<std::codecvt_utf8<wchar_t>> strCnv;
    return strCnv.to_bytes(str);
}

// string to wstring
std::wstring utf8string2wstring(const std::string& str) {
    static std::wstring_convert<std::codecvt_utf8<wchar_t>> strCnv;
    return strCnv.from_bytes(str);
}
}  // namespace ppspeech


================================================
FILE: demos/TTSCppFrontend/src/base/type_conv.h
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef BASE_TYPE_CONVC_H
#define BASE_TYPE_CONVC_H

#include <codecvt>
#include <locale>
#include <string>


namespace ppspeech {
// wstring to string
std::string wstring2utf8string(const std::wstring& str);

// string to wstring
std::wstring utf8string2wstring(const std::string& str);
}

#endif  // BASE_TYPE_CONVC_H

================================================
FILE: demos/TTSCppFrontend/src/front/front_interface.cpp
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "front/front_interface.h"

namespace ppspeech {

int FrontEngineInterface::init() {
    if (_initialed) {
        return 0;
    }
    if (0 != ReadConfFile()) {
        LOG(ERROR) << "Read front conf file failed";
        return -1;
    }

    _jieba = new cppjieba::Jieba(_jieba_dict_path,
                                 _jieba_hmm_path,
                                 _jieba_user_dict_path,
                                 _jieba_idf_path,
                                 _jieba_stop_word_path);

    _punc = {"，",
             "。",
             "、",
             "？",
             "：",
             "；",
             "~",
             "！",
             ",",
             ".",
             "?",
             "!",
             ":",
             ";",
             "/",
             "\\"};
    _punc_omit = {"“", "”", "\"", "\""};

    // 需要儿化音处理的词语
    must_erhua = {
        "小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿"};
    not_erhua = {"虐儿",   "为儿",   "护儿",   "瞒儿",   "救儿",   "替儿",
                 "有儿",   "一儿",   "我儿",   "俺儿",   "妻儿",   "拐儿",
                 "聋儿",   "乞儿",   "患儿",   "幼儿",   "孤儿",   "婴儿",
                 "婴幼儿", "连体儿", "脑瘫儿", "流浪儿", "体弱儿", "混血儿",
                 "蜜雪儿", "舫儿",   "祖儿",   "美儿",   "应采儿", "可儿",
                 "侄儿",   "孙儿",   "侄孙儿", "女儿",   "男儿",   "红孩儿",
                 "花儿",   "虫儿",   "马儿",   "鸟儿",   "猪儿",   "猫儿",
                 "狗儿"};

    must_not_neural_tone_words = {
        "男子", "女子", "分子", "原子", "量子", "莲子", "石子", "瓜子", "电子"};
    // 需要轻声处理的词语
    must_neural_tone_words = {
        "麻烦", "麻利", "鸳鸯", "高粱", "骨头", "骆驼", "马虎", "首饰", "馒头",
        "馄饨", "风筝", "难为", "队伍", "阔气", "闺女", "门道", "锄头", "铺盖",
        "铃铛", "铁匠", "钥匙", "里脊", "里头", "部分", "那么", "道士", "造化",
        "迷糊", "连累", "这么", "这个", "运气", "过去", "软和", "转悠", "踏实",
        "跳蚤", "跟头", "趔趄", "财主", "豆腐", "讲究", "记性", "记号", "认识",
        "规矩", "见识", "裁缝", "补丁", "衣裳", "衣服", "衙门", "街坊", "行李",
        "行当", "蛤蟆", "蘑菇", "薄荷", "葫芦", "葡萄", "萝卜", "荸荠", "苗条",
        "苗头", "苍蝇", "芝麻", "舒服", "舒坦", "舌头", "自在", "膏药", "脾气",
        "脑袋", "脊梁", "能耐", "胳膊", "胭脂", "胡萝", "胡琴", "胡同", "聪明",
        "耽误", "耽搁", "耷拉", "耳朵", "老爷", "老实", "老婆", "老头", "老太",
        "翻腾", "罗嗦", "罐头", "编辑", "结实", "红火", "累赘", "糨糊", "糊涂",
        "精神", "粮食", "簸箕", "篱笆", "算计", "算盘", "答应", "笤帚", "笑语",
        "笑话", "窟窿", "窝囊", "窗户", "稳当", "稀罕", "称呼", "秧歌", "秀气",
        "秀才", "福气", "祖宗", "砚台", "码头", "石榴", "石头", "石匠", "知识",
        "眼睛", "眯缝", "眨巴", "眉毛", "相声", "盘算", "白净", "痢疾", "痛快",
        "疟疾", "疙瘩", "疏忽", "畜生", "生意", "甘蔗", "琵琶", "琢磨", "琉璃",
        "玻璃", "玫瑰", "玄乎", "狐狸", "状元", "特务", "牲口", "牙碜", "牌楼",
        "爽快", "爱人", "热闹", "烧饼", "烟筒", "烂糊", "点心", "炊帚", "灯笼",
        "火候", "漂亮", "滑溜", "溜达", "温和", "清楚", "消息", "浪头", "活泼",
        "比方", "正经", "欺负", "模糊", "槟榔", "棺材", "棒槌", "棉花", "核桃",
        "栅栏", "柴火", "架势", "枕头", "枇杷", "机灵", "本事", "木头", "木匠",
        "朋友", "月饼", "月亮", "暖和", "明白", "时候", "新鲜", "故事", "收拾",
        "收成", "提防", "挖苦", "挑剔", "指甲", "指头", "拾掇", "拳头", "拨弄",
        "招牌", "招呼", "抬举", "护士", "折腾", "扫帚", "打量", "打算", "打点",
        "打扮", "打听", "打发", "扎实", "扁担", "戒指", "懒得", "意识", "意思",
        "情形", "悟性", "怪物", "思量", "怎么", "念头", "念叨", "快活", "忙活",
        "志气", "心思", "得罪", "张罗", "弟兄", "开通", "应酬", "庄稼", "干事",
        "帮手", "帐篷", "希罕", "师父", "师傅", "巴结", "巴掌", "差事", "工夫",
        "岁数", "屁股", "尾巴", "少爷", "小气", "小伙", "将就", "对头", "对付",
        "寡妇", "家伙", "客气", "实在", "官司", "学问", "学生", "字号", "嫁妆",
        "媳妇", "媒人", "婆家", "娘家", "委屈", "姑娘", "姐夫", "妯娌", "妥当",
        "妖精", "奴才", "女婿", "头发", "太阳", "大爷", "大方", "大意", "大夫",
        "多少", "多么", "外甥", "壮实", "地道", "地方", "在乎", "困难", "嘴巴",
        "嘱咐", "嘟囔", "嘀咕", "喜欢", "喇嘛", "喇叭", "商量", "唾沫", "哑巴",
        "哈欠", "哆嗦", "咳嗽", "和尚", "告诉", "告示", "含糊", "吓唬", "后头",
        "名字", "名堂", "合同", "吆喝", "叫唤", "口袋", "厚道", "厉害", "千斤",
        "包袱", "包涵", "匀称", "勤快", "动静", "动弹", "功夫", "力气", "前头",
        "刺猬", "刺激", "别扭", "利落", "利索", "利害", "分析", "出息", "凑合",
        "凉快", "冷战", "冤枉", "冒失", "养活", "关系", "先生", "兄弟", "便宜",
        "使唤", "佩服", "作坊", "体面", "位置", "似的", "伙计", "休息", "什么",
        "人家", "亲戚", "亲家", "交情", "云彩", "事情", "买卖", "主意", "丫头",
        "丧气", "两口", "东西", "东家", "世故", "不由", "不在", "下水", "下巴",
        "上头", "上司", "丈夫", "丈人", "一辈", "那个", "菩萨", "父亲", "母亲",
        "咕噜", "邋遢", "费用", "冤家", "甜头", "介绍", "荒唐", "大人", "泥鳅",
        "幸福", "熟悉", "计划", "扑腾", "蜡烛", "姥爷", "照顾", "喉咙", "吉他",
        "弄堂", "蚂蚱", "凤凰", "拖沓", "寒碜", "糟蹋", "倒腾", "报复", "逻辑",
        "盘缠", "喽啰", "牢骚", "咖喱", "扫把", "惦记"};


    // 生成词典（词到音素的映射）
    if (0 != GenDict(_word2phone_path, &word_phone_map)) {
        LOG(ERROR) << "Generate word2phone dict failed";
        return -1;
    }

    // 生成音素字典（音素到音素id的映射）
    if (0 != GenDict(_phone2id_path, &phone_id_map)) {
        LOG(ERROR) << "Generate phone2id dict failed";
        return -1;
    }

    // 生成音调字典（音调到音调id的映射）
    if (_separate_tone == "true") {
        if (0 != GenDict(_tone2id_path, &tone_id_map)) {
            LOG(ERROR) << "Generate tone2id dict failed";
            return -1;
        }
    }

    // 生成繁简字典（繁体到简体id的映射）
    if (0 != GenDict(_trand2simp_path, &trand_simp_map)) {
        LOG(ERROR) << "Generate trand2simp dict failed";
        return -1;
    }

    _initialed = true;
    return 0;
}

int FrontEngineInterface::ReadConfFile() {
    std::ifstream is(_conf_file.c_str(), std::ifstream::in);
    if (!is.good()) {
        LOG(ERROR) << "Cannot open config file: " << _conf_file;
        return -1;
    }
    std::string line, key, value;
    while (std::getline(is, line)) {
        if (line.substr(0, 2) == "--") {
            size_t pos = line.find_first_of("=", 0);
            std::string key = line.substr(2, pos - 2);
            std::string value = line.substr(pos + 1);
            conf_map[key] = value;
            LOG(INFO) << "Key: " << key << "; Value: " << value;
        }
    }

    // jieba conf path
    _jieba_dict_path = conf_map["jieba_dict_path"];
    _jieba_hmm_path = conf_map["jieba_hmm_path"];
    _jieba_user_dict_path = conf_map["jieba_user_dict_path"];
    _jieba_idf_path = conf_map["jieba_idf_path"];
    _jieba_stop_word_path = conf_map["jieba_stop_word_path"];

    // dict path
    _separate_tone = conf_map["separate_tone"];
    _word2phone_path = conf_map["word2phone_path"];
    _phone2id_path = conf_map["phone2id_path"];
    _tone2id_path = conf_map["tone2id_path"];
    _trand2simp_path = conf_map["trand2simpd_path"];

    return 0;
}

int FrontEngineInterface::Trand2Simp(const std::wstring &sentence,
                                     std::wstring *sentence_simp) {
    // sentence_simp = sentence;
    for (int i = 0; i < sentence.length(); i++) {
        std::wstring temp(1, sentence[i]);
        std::string sigle_word = ppspeech::wstring2utf8string(temp);
        // 单个字是否在繁转简的字典里
        if (trand_simp_map.find(sigle_word) == trand_simp_map.end()) {
            sentence_simp->append(temp);
        } else {
            sentence_simp->append(
                (ppspeech::utf8string2wstring(trand_simp_map[sigle_word])));
        }
    }

    return 0;
}

int FrontEngineInterface::GenDict(const std::string &dict_file,
                                  std::map<std::string, std::string> *map) {
    std::ifstream is(dict_file.c_str(), std::ifstream::in);
    if (!is.good()) {
        LOG(ERROR) << "Cannot open dict file: " << dict_file;
        return -1;
    }
    std::string line, key, value;
    while (std::getline(is, line)) {
        size_t pos = line.find_first_of(" ", 0);
        key = line.substr(0, pos);
        value = line.substr(pos + 1);
        (*map)[key] = value;
    }
    return 0;
}

int FrontEngineInterface::GetSegResult(
    std::vector<std::pair<std::string, std::string>> *seg,
    std::vector<std::string> *seg_words) {
    std::vector<std::pair<std::string, std::string>>::iterator iter;
    for (iter = seg->begin(); iter != seg->end(); iter++) {
        seg_words->push_back((*iter).first);
    }
    return 0;
}

int FrontEngineInterface::GetSentenceIds(const std::string &sentence,
                                         std::vector<int> *phoneids,
                                         std::vector<int> *toneids) {
    std::vector<std::pair<std::string, std::string>>
        cut_result;  //分词结果包含词和词性
    if (0 != Cut(sentence, &cut_result)) {
        LOG(ERROR) << "Cut sentence: \"" << sentence << "\" failed";
        return -1;
    }

    if (0 != GetWordsIds(cut_result, phoneids, toneids)) {
        LOG(ERROR) << "Get words phoneids failed";
        return -1;
    }
    return 0;
}

int FrontEngineInterface::GetWordsIds(
    const std::vector<std::pair<std::string, std::string>> &cut_result,
    std::vector<int> *phoneids,
    std::vector<int> *toneids) {
    std::string word;
    std::string pos;
    std::vector<std::string> word_initials;
    std::vector<std::string> word_finals;
    std::string phone;
    for (int i = 0; i < cut_result.size(); i++) {
        word = cut_result[i].first;
        pos = cut_result[i].second;
        if (std::find(_punc_omit.begin(), _punc_omit.end(), word) ==
            _punc_omit.end()) {  // 非可忽略的标点
            word_initials = {};
            word_finals = {};
            phone = "";
            // 判断是否在标点符号集合中
            if (std::find(_punc.begin(), _punc.end(), word) ==
                _punc.end()) {  // 文字
                // 获取字词的声母韵母列表
                if (0 !=
                    GetInitialsFinals(word, &word_initials, &word_finals)) {
                    LOG(ERROR)
                        << "Generate the word_initials and word_finals of "
                        << word << " failed";
                    return -1;
                }

                // 对读音进行修改
                if (0 != ModifyTone(word, pos, &word_finals)) {
                    LOG(ERROR) << "Failed to modify tone.";
                }

                // 对儿化音进行修改
                std::vector<std::vector<std::string>> new_initals_finals =
                    MergeErhua(word_initials, word_finals, word, pos);
                word_initials = new_initals_finals[0];
                word_finals = new_initals_finals[1];

                // 将声母和韵母合并成音素
                assert(word_initials.size() == word_finals.size());
                std::string temp_phone;
                for (int j = 0; j < word_initials.size(); j++) {
                    if (word_initials[j] != "") {
                        temp_phone = word_initials[j] + " " + word_finals[j];
                    } else {
                        temp_phone = word_finals[j];
                    }
                    if (j == 0) {
                        phone += temp_phone;
                    } else {
                        phone += (" " + temp_phone);
                    }
                }
            } else {  // 标点符号
                if (_separate_tone == "true") {
                    phone = "sp0";  // speedyspeech
                } else {
                    phone = "sp";  // fastspeech2
                }
            }

            // 音素到音素id
            if (0 != Phone2Phoneid(phone, phoneids, toneids)) {
                LOG(ERROR) << "Generate the phone id of " << word << " failed";
                return -1;
            }
        }
    }
    return 0;
}

int FrontEngineInterface::Cut(
    const std::string &sentence,
    std::vector<std::pair<std::string, std::string>> *cut_result) {
    std::vector<std::pair<std::string, std::string>> cut_result_jieba;

    // 结巴分词
    _jieba->Tag(sentence, cut_result_jieba);

    // 对分词后结果进行整合
    if (0 != MergeforModify(&cut_result_jieba, cut_result)) {
        LOG(ERROR) << "Failed to modify  for word segmentation result.";
        return -1;
    }

    return 0;
}

int FrontEngineInterface::GetPhone(const std::string &word,
                                   std::string *phone) {
    // 判断 word 在不在 词典里，如果不在，进行CutAll分词
    if (word_phone_map.find(word) == word_phone_map.end()) {
        std::vector<std::string> wordcut;
        _jieba->CutAll(word, wordcut);
        phone->assign(word_phone_map[wordcut[0]]);
        for (int i = 1; i < wordcut.size(); i++) {
            phone->assign((*phone) + (" " + word_phone_map[wordcut[i]]));
        }
    } else {
        phone->assign(word_phone_map[word]);
    }

    return 0;
}

int FrontEngineInterface::Phone2Phoneid(const std::string &phone,
                                        std::vector<int> *phoneid,
                                        std::vector<int> *toneid) {
    std::vector<std::string> phone_vec;
    phone_vec = absl::StrSplit(phone, " ");
    std::string temp_phone;
    for (int i = 0; i < phone_vec.size(); i++) {
        temp_phone = phone_vec[i];
        if (_separate_tone == "true") {
            phoneid->push_back(atoi(
                (phone_id_map[temp_phone.substr(0, temp_phone.length() - 1)])
                    .c_str()));
            toneid->push_back(
                atoi((tone_id_map[temp_phone.substr(temp_phone.length() - 1,
                                                    temp_phone.length())])
                         .c_str()));
        } else {
            phoneid->push_back(atoi((phone_id_map[temp_phone]).c_str()));
        }
    }
    return 0;
}


// 根据韵母判断该词中每个字的读音都为第三声。true表示词中每个字都是第三声
bool FrontEngineInterface::AllToneThree(
    const std::vector<std::string> &finals) {
    bool flags = true;
    for (int i = 0; i < finals.size(); i++) {
        if (static_cast<int>(finals[i].back()) != 51) {  //如果读音不为第三声
            flags = false;
        }
    }
    return flags;
}

// 判断词是否是叠词
bool FrontEngineInterface::IsReduplication(const std::string &word) {
    bool flags = false;
    std::wstring word_wstr = ppspeech::utf8string2wstring(word);
    int len = word_wstr.length();
    if (len == 2 && word_wstr[0] == word_wstr[1]) {
        flags = true;
    }
    return flags;
}

// 获取每个字词的声母和韵母列表， word_initials 为声母列表，word_finals
// 为韵母列表
int FrontEngineInterface::GetInitialsFinals(
    const std::string &word,
    std::vector<std::string> *word_initials,
    std::vector<std::string> *word_finals) {
    std::string phone;
    GetPhone(word, &phone);  //获取字词对应的音素
    std::vector<std::string> phone_vec = absl::StrSplit(phone, " ");
    //获取韵母，每个字的音素有1或者2个，start为单个字音素的起始位置。
    int start = 0;
    while (start < phone_vec.size()) {
        if (phone_vec[start] == "sp" || phone_vec[start] == "sp0") {
            start += 1;
        } else if (isdigit(phone_vec[start].back()) == 0 ||
                   static_cast<int>(phone_vec[start].back()) == 48) {
            word_initials->push_back(phone_vec[start]);
            word_finals->push_back(phone_vec[start + 1]);
            start += 2;
        } else {
            word_initials->push_back("");
            word_finals->push_back(phone_vec[start]);
            start += 1;
        }
    }

    assert(word_finals->size() == ppspeech::utf8string2wstring(word).length() &&
           word_finals->size() == word_initials->size());

    return 0;
}

// 获取每个字词的韵母列表
int FrontEngineInterface::GetFinals(const std::string &word,
                                    std::vector<std::string> *word_finals) {
    std::vector<std::string> word_initials;
    if (0 != GetInitialsFinals(word, &word_initials, word_finals)) {
        LOG(ERROR) << "Failed to get word finals";
        return -1;
    }

    return 0;
}

int FrontEngineInterface::Word2WordVec(const std::string &word,
                                       std::vector<std::wstring> *wordvec) {
    std::wstring word_wstr = ppspeech::utf8string2wstring(word);
    for (int i = 0; i < word_wstr.length(); i++) {
        std::wstring word_sigle(1, word_wstr[i]);
        wordvec->push_back(word_sigle);
    }
    return 0;
}

// yuantian01解释：把一个词再进行分词找到。例子：小雨伞 --> 小 雨伞 或者 小雨 伞
int FrontEngineInterface::SplitWord(const std::string &word,
                                    std::vector<std::string> *new_word_vec) {
    std::vector<std::string> word_vec;
    std::string second_subword;
    _jieba->CutForSearch(word, word_vec);
    // 升序
    std::sort(word_vec.begin(),
              word_vec.end(),
              [](std::string a, std::string b) { return a.size() > b.size(); });
    std::string first_subword = word_vec[0];  // 提取长度最短的字符串
    int first_begin_idx = word.find_first_of(first_subword);
    if (first_begin_idx == 0) {
        second_subword = word.substr(first_subword.length());
        new_word_vec->push_back(first_subword);
        new_word_vec->push_back(second_subword);
    } else {
        second_subword = word.substr(0, word.length() - first_subword.length());
        new_word_vec->push_back(second_subword);
        new_word_vec->push_back(first_subword);
    }

    return 0;
}


// example: 不 一起 --> 不一起
std::vector<std::pair<std::string, std::string>> FrontEngineInterface::MergeBu(
    std::vector<std::pair<std::string, std::string>> *seg_result) {
    std::vector<std::pair<std::string, std::string>> result;
    std::string word;
    std::string pos;
    std::string last_word = "";

    for (int i = 0; i < seg_result->size(); i++) {
        word = std::get<0>((*seg_result)[i]);
        pos = std::get<1>((*seg_result)[i]);
        if (last_word == "不") {
            word = last_word + word;
        }
        if (word != "不") {
            result.push_back(make_pair(word, pos));
        }
        last_word = word;
    }

    if (last_word == "不") {
        result.push_back(make_pair(last_word, "d"));
        last_word = "";
    }

    return result;
}

std::vector<std::pair<std::string, std::string>> FrontEngineInterface::Mergeyi(
    std::vector<std::pair<std::string, std::string>> *seg_result) {
    std::vector<std::pair<std::string, std::string>> *result_temp =
        new std::vector<std::pair<std::string, std::string>>();
    std::string word;
    std::string pos;
    // function 1  example: 听 一 听 --> 听一听
    for (int i = 0; i < seg_result->size(); i++) {
        word = std::get<0>((*seg_result)[i]);
        pos = std::get<1>((*seg_result)[i]);

        if ((i - 1 >= 0) && (word == "一") && (i + 1 < seg_result->size()) &&
            (std::get<0>((*seg_result)[i - 1]) ==
             std::get<0>((*seg_result)[i + 1])) &&
            std::get<1>((*seg_result)[i - 1]) == "v") {
            std::get<0>((*result_temp)[i - 1]) =
                std::get<0>((*result_temp)[i - 1]) + "一" +
                std::get<0>((*result_temp)[i - 1]);
        } else {
            if ((i - 2 >= 0) && (std::get<0>((*seg_result)[i - 1]) == "一") &&
                (std::get<0>((*seg_result)[i - 2]) == word) && (pos == "v")) {
                continue;
            } else {
                result_temp->push_back(make_pair(word, pos));
            }
        }
    }

    // function 2  example: 一 你 -->  一你
    std::vector<std::pair<std::string, std::string>> result = {};
    for (int j = 0; j < result_temp->size(); j++) {
        word = std::get<0>((*result_temp)[j]);
        pos = std::get<1>((*result_temp)[j]);
        if ((result.size() != 0) && (result.back().first == "一")) {
            result.back().first = result.back().first + word;
        } else {
            result.push_back(make_pair(word, pos));
        }
    }

    return result;
}

// example: 你 你 --> 你你
std::vector<std::pair<std::string, std::string>>
FrontEngineInterface::MergeReduplication(
    std::vector<std::pair<std::string, std::string>> *seg_result) {
    std::vector<std::pair<std::string, std::string>> result;
    std::string word;
    std::string pos;

    for (int i = 0; i < seg_result->size(); i++) {
        word = std::get<0>((*seg_result)[i]);
        pos = std::get<1>((*seg_result)[i]);
        if ((result.size() != 0) && (word == result.back().first)) {
            result.back().first =
                result.back().first + std::get<0>((*seg_result)[i]);
        } else {
            result.push_back(make_pair(word, pos));
        }
    }

    return result;
}

// the first and the second words are all_tone_three
std::vector<std::pair<std::string, std::string>>
FrontEngineInterface::MergeThreeTones(
    std::vector<std::pair<std::string, std::string>> *seg_result) {
    std::vector<std::pair<std::string, std::string>> result;
    std::string word;
    std::string pos;
    std::vector<std::vector<std::string>> finals;  //韵母数组
    std::vector<std::string> word_final;
    std::vector<bool> merge_last(seg_result->size(), false);

    // 判断最后一个分词结果是不是标点，不看标点的声母韵母
    int word_num = seg_result->size() - 1;

    // seg_result[word_num].first
    if (std::find(
            _punc.begin(), _punc.end(), std::get<0>((*seg_result)[word_num])) ==
        _punc.end()) {  // 最后一个分词结果不是标点
        word_num += 1;
    }

    // 获取韵母数组
    for (int i = 0; i < word_num; i++) {
        word_final = {};
        word = std::get<0>((*seg_result)[i]);
        pos = std::get<1>((*seg_result)[i]);
        if (std::find(_punc_omit.begin(), _punc_omit.end(), word) ==
            _punc_omit.end()) {  // 非可忽略的标点，即文字
            if (0 != GetFinals(word, &word_final)) {
                LOG(ERROR) << "Failed to get the final of word.";
            }
        }

        finals.push_back(word_final);
    }
    assert(word_num == finals.size());

    // 对第三声读音的字词分词结果进行处理
    for (int i = 0; i < word_num; i++) {
        word = std::get<0>((*seg_result)[i]);
        pos = std::get<1>((*seg_result)[i]);
        if (i - 1 >= 0 && AllToneThree(finals[i - 1]) &&
            AllToneThree(finals[i]) && !merge_last[i - 1]) {
            // if the last word is reduplication, not merge, because
            // reduplication need to be _neural_sandhi
            // seg_result[i - 1].first
            if (!IsReduplication(std::get<0>((*seg_result)[i - 1])) &&
                (ppspeech::utf8string2wstring(
                     std::get<0>((*seg_result)[i - 1])))
                            .length() +
                        (ppspeech::utf8string2wstring(word)).length() <=
                    3) {
                result.back().first =
                    result.back().first + std::get<0>((*seg_result)[i]);
                merge_last[i] = true;
            } else {
                result.push_back(make_pair(word, pos));
            }
        } else {
            result.push_back(make_pair(word, pos));
        }
    }

    //把标点的分词结果补上
    if (word_num < seg_result->size()) {
        result.push_back(
            // seg_result[word_num].first seg_result[word_num].second
            // std::get<0>((*seg_result)[word_num])
            make_pair(std::get<0>((*seg_result)[word_num]),
                      std::get<1>((*seg_result)[word_num])));
    }

    return result;
}

// the last char of first word and the first char of second word is tone_three
std::vector<std::pair<std::string, std::string>>
FrontEngineInterface::MergeThreeTones2(
    std::vector<std::pair<std::string, std::string>> *seg_result) {
    std::vector<std::pair<std::string, std::string>> result;
    std::string word;
    std::string pos;
    std::vector<std::vector<std::string>> finals;  //韵母数组
    std::vector<std::string> word_final;
    std::vector<bool> merge_last(seg_result->size(), false);

    // 判断最后一个分词结果是不是标点
    int word_num = seg_result->size() - 1;
    if (std::find(
            _punc.begin(), _punc.end(), std::get<0>((*seg_result)[word_num])) ==
        _punc.end()) {  // 最后一个分词结果不是标点
        word_num += 1;
    }

    // 获取韵母数组
    for (int i = 0; i < word_num; i++) {
        word_final = {};
        word = std::get<0>((*seg_result)[i]);
        pos = std::get<1>((*seg_result)[i]);
        // 如果是文字，则获取韵母，如果是可忽略的标点，例如引号，则跳过
        if (std::find(_punc_omit.begin(), _punc_omit.end(), word) ==
            _punc_omit.end()) {
            if (0 != GetFinals(word, &word_final)) {
                LOG(ERROR) << "Failed to get the final of word.";
            }
        }

        finals.push_back(word_final);
    }
    assert(word_num == finals.size());

    // 对第三声读音的字词分词结果进行处理
    for (int i = 0; i < word_num; i++) {
        word = std::get<0>((*seg_result)[i]);
        pos = std::get<1>((*seg_result)[i]);
        if (i - 1 >= 0 && !finals[i - 1].empty() &&
            absl::EndsWith(finals[i - 1].back(), "3") == true &&
            !finals[i].empty() &&
            absl::EndsWith(finals[i].front(), "3") == true &&
            !merge_last[i - 1]) {
            // if the last word is reduplication, not merge, because
            // reduplication need to be _neural_sandhi
            // seg_result[i - 1].first
            if (!IsReduplication(std::get<0>((*seg_result)[i - 1])) &&
                (ppspeech::utf8string2wstring(
                     std::get<0>((*seg_result)[i - 1])))
                            .length() +
                        ppspeech::utf8string2wstring(word).length() <=
                    3) {
                result.back().first =
                    result.back().first + std::get<0>((*seg_result)[i]);
                merge_last[i] = true;
            } else {
                result.push_back(make_pair(word, pos));
            }
        } else {
            result.push_back(make_pair(word, pos));
        }
    }

    //把标点的分词结果补上
    if (word_num < seg_result->size()) {
        result.push_back(make_pair(std::get<0>((*seg_result)[word_num]),
                                   std::get<1>((*seg_result)[word_num])));
    }

    return result;
}

// example: 吃饭 儿 --> 吃饭儿
std::vector<std::pair<std::string, std::string>> FrontEngineInterface::MergeEr(
    std::vector<std::pair<std::string, std::string>> *seg_result) {
    std::vector<std::pair<std::string, std::string>> result;
    std::string word;
    std::string pos;

    for (int i = 0; i < seg_result->size(); i++) {
        word = std::get<0>((*seg_result)[i]);
        pos = std::get<1>((*seg_result)[i]);
        if ((i - 1 >= 0) && (word == "儿")) {
            result.back().first =
                result.back().first + std::get<0>((*seg_result)[i]);
        } else {
            result.push_back(make_pair(word, pos));
        }
    }

    return result;
}

int FrontEngineInterface::MergeforModify(
    std::vector<std::pair<std::string, std::string>> *seg_word_type,
    std::vector<std::pair<std::string, std::string>> *modify_seg_word_type) {
    std::vector<std::string> seg_result;
    GetSegResult(seg_word_type, &seg_result);
    LOG(INFO) << "Before merge, seg result is: "
              << limonp::Join(seg_result.begin(), seg_result.end(), "/");
    std::vector<std::pair<std::string, std::string>> tmp;
    tmp = MergeBu(seg_word_type);
    *modify_seg_word_type = tmp;
    tmp = Mergeyi(modify_seg_word_type);
    *modify_seg_word_type = tmp;
    tmp = MergeReduplication(modify_seg_word_type);
    *modify_seg_word_type = tmp;
    tmp = MergeThreeTones(modify_seg_word_type);
    *modify_seg_word_type = tmp;
    tmp = MergeThreeTones2(modify_seg_word_type);
    *modify_seg_word_type = tmp;
    tmp = MergeEr(modify_seg_word_type);
    *modify_seg_word_type = tmp;
    seg_result = {};

    GetSegResult(modify_seg_word_type, &seg_result);
    LOG(INFO) << "After merge, seg result is: "
              << limonp::Join(seg_result.begin(), seg_result.end(), "/");

    return 0;
}


int FrontEngineInterface::BuSandi(const std::string &word,
                                  std::vector<std::string> *finals) {
    std::wstring bu = L"不";
    std::vector<std::wstring> wordvec;
    // 一个词转成向量形式
    if (0 != Word2WordVec(word, &wordvec)) {
        LOG(ERROR) << "Failed to get word vector";
        return -1;
    }

    // e.g. 看不懂   b u4  -->  b u5, 将韵母的最后一位替换成 5
    if (wordvec.size() == 3 && wordvec[1] == bu) {
        (*finals)[1] = (*finals)[1].replace((*finals)[1].length() - 1, 1, "5");
    } else {
        // e.g. 不怕  b u4 --> b u2, 将韵母的最后一位替换成 2
        for (int i = 0; i < wordvec.size(); i++) {
            if (wordvec[i] == bu && i + 1 < wordvec.size() &&
                absl::EndsWith((*finals)[i + 1], "4") == true) {
                (*finals)[i] =
                    (*finals)[i].replace((*finals)[i].length() - 1, 1, "2");
            }
        }
    }

    return 0;
}


int FrontEngineInterface::YiSandhi(const std::string &word,
                                   std::vector<std::string> *finals) {
    std::wstring yi = L"一";
    std::vector<std::wstring> wordvec;
    // 一个词转成向量形式
    if (0 != Word2WordVec(word, &wordvec)) {
        LOG(ERROR) << "Failed to get word vector";
        return -1;
    }

    //情况1："一" in number sequences, e.g. 一零零, 二一零
    std::wstring num_wstr = L"零一二三四六七八九";
    std::wstring word_wstr = ppspeech::utf8string2wstring(word);
    if (word_wstr.find(yi) != word_wstr.npos && wordvec.back() != yi) {
        int flags = 0;
        for (int j = 0; j < wordvec.size(); j++) {
            if (num_wstr.find(wordvec[j]) == num_wstr.npos) {
                flags = -1;
                break;
            }
        }
        if (flags == 0) {
            return 0;
        }
    } else if (wordvec.size() == 3 && wordvec[1] == yi &&
               wordvec[0] == wordvec[2]) {
        // "一" between reduplication words shold be yi5, e.g. 看一看
        (*finals)[1] = (*finals)[1].replace((*finals)[1].length() - 1, 1, "5");
    } else if (wordvec[0] == L"第" && wordvec[1] == yi) {  //以第一位开始
        (*finals)[1] = (*finals)[1].replace((*finals)[1].length() - 1, 1, "1");
    } else {
        for (int i = 0; i < wordvec.size(); i++) {
            if (wordvec[i] == yi && i + 1 < wordvec.size()) {
                if (absl::EndsWith((*finals)[i + 1], "4") == true) {
                    // "一" before tone4 should be yi2, e.g. 一段
                    (*finals)[i] =
                        (*finals)[i].replace((*finals)[i].length() - 1, 1, "2");
                } else {
                    // "一" before non-tone4 should be yi4, e.g. 一天
                    (*finals)[i] =
                        (*finals)[i].replace((*finals)[i].length() - 1, 1, "4");
                }
            }
        }
    }

    return 0;
}

int FrontEngineInterface::NeuralSandhi(const std::string &word,
                                       const std::string &pos,
                                       std::vector<std::string> *finals) {
    std::wstring word_wstr = ppspeech::utf8string2wstring(word);
    std::vector<std::wstring> wordvec;
    // 一个词转成向量形式
    if (0 != Word2WordVec(word, &wordvec)) {
        LOG(ERROR) << "Failed to get word vector";
        return -1;
    }
    int word_num = wordvec.size();
    assert(word_num == word_wstr.length());

    // 情况1：reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
    for (int j = 0; j < wordvec.size(); j++) {
        std::string inits = "nva";
        if (j - 1 >= 0 && wordvec[j] == wordvec[j - 1] &&
            inits.find(pos[0]) != inits.npos) {
            (*finals)[j] =
                (*finals)[j].replace((*finals)[j].length() - 1, 1, "5");
        }
    }

    // 情况2：对下述词的处理
    std::wstring yuqici = L"吧呢哈啊呐噻嘛吖嗨呐哦哒额滴哩哟喽啰耶喔诶";
    std::wstring de = L"的地得";
    std::wstring le = L"了着过";
    std::vector<std::string> le_pos = {"ul", "uz", "ug"};
    std::wstring men = L"们子";
    std::vector<std::string> men_pos = {"r", "n"};
    std::wstring weizhi = L"上下里";
    std::vector<std::string> weizhi_pos = {"s", "l", "f"};
    std::wstring dong = L"来去";
    std::wstring fangxiang = L"上下进出回过起开";
    std::wstring ge = L"个";
    std::wstring xiushi = L"几有两半多各整每做是零一二三四六七八九";
    auto ge_idx = word_wstr.find_first_of(ge);  // 出现“个”的第一个位置

    if (word_num >= 1 && yuqici.find(wordvec.back()) != yuqici.npos) {
        (*finals).back() =
            (*finals).back().replace((*finals).back().length() - 1, 1, "5");
    } else if (word_num >= 1 && de.find(wordvec.back()) != de.npos) {
        (*finals).back() =
            (*finals).back().replace((*finals).back().length() - 1, 1, "5");
    } else if (word_num == 1 && le.find(wordvec[0]) != le.npos &&
               find(le_pos.begin(), le_pos.end(), pos) != le_pos.end()) {
        (*finals).back() =
            (*finals).back().replace((*finals).back().length() - 1, 1, "5");
    } else if (word_num > 1 && men.find(wordvec.back()) != men.npos &&
               find(men_pos.begin(), men_pos.end(), pos) != men_pos.end() &&
               find(must_not_neural_tone_words.begin(),
                    must_not_neural_tone_words.end(),
                    word) != must_not_neural_tone_words.end()) {
        (*finals).back() =
            (*finals).back().replace((*finals).back().length() - 1, 1, "5");
    } else if (word_num > 1 && weizhi.find(wordvec.back()) != weizhi.npos &&
               find(weizhi_pos.begin(), weizhi_pos.end(), pos) !=
                   weizhi_pos.end()) {
        (*finals).back() =
            (*finals).back().replace((*finals).back().length() - 1, 1, "5");
    } else if (word_num > 1 && dong.find(wordvec.back()) != dong.npos &&
               fangxiang.find(wordvec[word_num - 2]) != fangxiang.npos) {
        (*finals).back() =
            (*finals).back().replace((*finals).back().length() - 1, 1, "5");
    } else if ((ge_idx != word_wstr.npos && ge_idx >= 1 &&
                xiushi.find(wordvec[ge_idx - 1]) != xiushi.npos) ||
               word_wstr == ge) {
        (*finals).back() =
            (*finals).back().replace((*finals).back().length() - 1, 1, "5");
    } else {
        if (find(must_neural_tone_words.begin(),
                 must_neural_tone_words.end(),
                 word) != must_neural_tone_words.end() ||
            (word_num >= 2 && find(must_neural_tone_words.begin(),
                                   must_neural_tone_words.end(),
                                   ppspeech::wstring2utf8string(
                                       word_wstr.substr(word_num - 2))) !=
                                  must_neural_tone_words.end())) {
            (*finals).back() =
                (*finals).back().replace((*finals).back().length() - 1, 1, "5");
        }
    }

    // 进行进一步分词，把长词切分更短些
    std::vector<std::string> word_list;
    if (0 != SplitWord(word, &word_list)) {
        LOG(ERROR) << "Failed to split word.";
        return -1;
    }
    // 创建对应的 韵母列表
    std::vector<std::vector<std::string>> finals_list;
    std::vector<std::string> finals_temp;
    finals_temp.assign((*finals).begin(),
                       (*finals).begin() +
                           ppspeech::utf8string2wstring(word_list[0]).length());
    finals_list.push_back(finals_temp);
    finals_temp.assign(
        (*finals).begin() + ppspeech::utf8string2wstring(word_list[0]).length(),
        (*finals).end());
    finals_list.push_back(finals_temp);

    finals = new std::vector<std::string>();
    for (int i = 0; i < word_list.size(); i++) {
        std::wstring temp_wstr = ppspeech::utf8string2wstring(word_list[i]);
        if ((find(must_neural_tone_words.begin(),
                  must_neural_tone_words.end(),
                  word_list[i]) != must_neural_tone_words.end()) ||
            (temp_wstr.length() >= 2 &&
             find(must_neural_tone_words.begin(),
                  must_neural_tone_words.end(),
                  ppspeech::wstring2utf8string(
                      temp_wstr.substr(temp_wstr.length() - 2))) !=
                 must_neural_tone_words.end())) {
            finals_list[i].back() = finals_list[i].back().replace(
                finals_list[i].back().length() - 1, 1, "5");
        }
        (*finals).insert(
            (*finals).end(), finals_list[i].begin(), finals_list[i].end());
    }

    return 0;
}

int FrontEngineInterface::ThreeSandhi(const std::string &word,
                                      std::vector<std::string> *finals) {
    std::wstring word_wstr = ppspeech::utf8string2wstring(word);
    std::vector<std::vector<std::string>> finals_list;
    std::vector<std::string> finals_temp;
    std::vector<std::wstring> wordvec;
    // 一个词转成向量形式
    if (0 != Word2WordVec(word, &wordvec)) {
        LOG(ERROR) << "Failed to get word vector";
        return -1;
    }
    int word_num = wordvec.size();
    assert(word_num == word_wstr.length());

    if (word_num == 2 && AllToneThree((*finals))) {
        (*finals)[0] = (*finals)[0].replace((*finals)[0].length() - 1, 1, "2");
    } else if (word_num == 3) {
        // 进行进一步分词，把长词切分更短些
        std::vector<std::string> word_list;
        if (0 != SplitWord(word, &word_list)) {
            LOG(ERROR) << "Failed to split word.";
            return -1;
        }
        if (AllToneThree((*finals))) {
            std::wstring temp_wstr = ppspeech::utf8string2wstring(word_list[0]);
            // disyllabic + monosyllabic, e.g. 蒙古/包
            if (temp_wstr.length() == 2) {
                (*finals)[0] =
                    (*finals)[0].replace((*finals)[0].length() - 1, 1, "2");
                (*finals)[1] =
                    (*finals)[1].replace((*finals)[1].length() - 1, 1, "2");
            } else if (temp_wstr.length() ==
                       1) {  // monosyllabic + disyllabic, e.g. 纸/老虎
                (*finals)[1] =
                    (*finals)[1].replace((*finals)[1].length() - 1, 1, "2");
            }
        } else {
            // 创建对应的 韵母列表
            finals_temp = {};
            finals_list = {};
            finals_temp.assign(
                (*finals).begin(),
                (*finals).begin() +
                    ppspeech::utf8string2wstring(word_list[0]).length());
            finals_list.push_back(finals_temp);
            finals_temp.assign(
                (*finals).begin() +
                    ppspeech::utf8string2wstring(word_list[0]).length(),
                (*finals).end());
            finals_list.push_back(finals_temp);

            finals = new std::vector<std::string>();
            for (int i = 0; i < finals_list.size(); i++) {
                // e.g. 所有/人
                if (AllToneThree(finals_list[i]) &&
                    finals_list[i].size() == 2) {
                    finals_list[i][0] = finals_list[i][0].replace(
                        finals_list[i][0].length() - 1, 1, "2");
                } else if (i == 1 && !(AllToneThree(finals_list[i])) &&
                           absl::EndsWith(finals_list[i][0], "3") == true &&
                           absl::EndsWith(finals_list[0].back(), "3") == true) {
                    finals_list[0].back() = finals_list[0].back().replace(
                        finals_list[0].back().length() - 1, 1, "2");
                }
            }
            (*finals).insert(
                (*finals).end(), finals_list[0].begin(), finals_list[0].end());
            (*finals).insert(
                (*finals).end(), finals_list[1].begin(), finals_list[1].end());
        }

    } else if (word_num == 4) {  //将成语拆分为两个长度为 2 的单词
        // 创建对应的 韵母列表
        finals_temp = {};
        finals_list = {};
        finals_temp.assign((*finals).begin(), (*finals).begin() + 2);
        finals_list.push_back(finals_temp);
        finals_temp.assign((*finals).begin() + 2, (*finals).end());
        finals_list.push_back(finals_temp);

        finals = new std::vector<std::string>();
        for (int j = 0; j < finals_list.size(); j++) {
            if (AllToneThree(finals_list[j])) {
                finals_list[j][0] = finals_list[j][0].replace(
                    finals_list[j][0].length() - 1, 1, "2");
            }
            (*finals).insert(
                (*finals).end(), finals_list[j].begin(), finals_list[j].end());
        }
    }

    return 0;
}

int FrontEngineInterface::ModifyTone(const std::string &word,
                                     const std::string &pos,
                                     std::vector<std::string> *finals) {
    if ((0 != BuSandi(word, finals)) || (0 != YiSandhi(word, finals)) ||
        (0 != NeuralSandhi(word, pos, finals)) ||
        (0 != ThreeSandhi(word, finals))) {
        LOG(ERROR) << "Failed to modify tone of the word: " << word;
        return -1;
    }

    return 0;
}

std::vector<std::vector<std::string>> FrontEngineInterface::MergeErhua(
    const std::vector<std::string> &initials,
    const std::vector<std::string> &finals,
    const std::string &word,
    const std::string &pos) {
    std::vector<std::string> new_initials = {};
    std::vector<std::string> new_finals = {};
    std::vector<std::vector<std::string>> new_initials_finals;
    std::vector<std::string> specified_pos = {"a", "j", "nr"};
    std::wstring word_wstr = ppspeech::utf8string2wstring(word);
    std::vector<std::wstring> wordvec;
    // 一个词转成向量形式
    if (0 != Word2WordVec(word, &wordvec)) {
        LOG(ERROR) << "Failed to get word vector";
    }
    int word_num = wordvec.size();

    if ((find(must_erhua.begin(), must_erhua.end(), word) ==
         must_erhua.end()) &&
        ((find(not_erhua.begin(), not_erhua.end(), word) != not_erhua.end()) ||
         (find(specified_pos.begin(), specified_pos.end(), pos) !=
          specified_pos.end()))) {
        new_initials_finals.push_back(initials);
        new_initials_finals.push_back(finals);
        return new_initials_finals;
    }
    if (finals.size() != word_num) {
        new_initials_finals.push_back(initials);
        new_initials_finals.push_back(finals);
        return new_initials_finals;
    }

    assert(finals.size() == word_num);
    for (int i = 0; i < finals.size(); i++) {
        if (i == finals.size() - 1 && wordvec[i] == L"儿" &&
            (finals[i] == "er2" || finals[i] == "er5") && word_num >= 2 &&
            find(not_erhua.begin(),
                 not_erhua.end(),
                 ppspeech::wstring2utf8string(word_wstr.substr(
                     word_wstr.length() - 2))) == not_erhua.end() &&
            !new_finals.empty()) {
            new_finals.back() =
                new_finals.back().substr(0, new_finals.back().length() - 1) +
                "r" + new_finals.back().substr(new_finals.back().length() - 1);
        } else {
            new_initials.push_back(initials[i]);
            new_finals.push_back(finals[i]);
        }
    }
    new_initials_finals.push_back(new_initials);
    new_initials_finals.push_back(new_finals);

    return new_initials_finals;
}
}  // namespace ppspeech


================================================
FILE: demos/TTSCppFrontend/src/front/front_interface.h
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PADDLE_TTS_SERVING_FRONT_FRONT_INTERFACE_H
#define PADDLE_TTS_SERVING_FRONT_FRONT_INTERFACE_H

#include <glog/logging.h>
#include <fstream>
#include <map>
#include <memory>
#include <string>
//#include "utils/dir_utils.h"
#include <cppjieba/Jieba.hpp>
#include "absl/strings/str_split.h"
#include "front/text_normalize.h"


namespace ppspeech {

class FrontEngineInterface : public TextNormalizer {
  public:
    explicit FrontEngineInterface(std::string conf) : _conf_file(conf) {
        TextNormalizer();
        _jieba = nullptr;
        _initialed = false;
        init();
    }

    int init();
    ~FrontEngineInterface() {}

    // 读取配置文件
    int ReadConfFile();

    // 简体转繁体
    int Trand2Simp(const std::wstring &sentence, std::wstring *sentence_simp);

    // 生成字典
    int GenDict(const std::string &file,
                std::map<std::string, std::string> *map);

    // 由 词+词性的分词结果转为仅包含词的结果
    int GetSegResult(std::vector<std::pair<std::string, std::string>> *seg,
                     std::vector<std::string> *seg_words);

    // 生成句子的音素，音调id。如果音素和音调未分开，则 toneids
    // 为空（fastspeech2），反之则不为空(speedyspeech)
    int GetSentenceIds(const std::string &sentence,
                       std::vector<int> *phoneids,
                       std::vector<int> *toneids);

    // 根据分词结果获取词的音素，音调id，并对读音进行适当修改
    // (ModifyTone)。如果音素和音调未分开，则 toneids
    // 为空（fastspeech2），反之则不为空(speedyspeech)
    int GetWordsIds(
        const std::vector<std::pair<std::string, std::string>> &cut_result,
        std::vector<int> *phoneids,
        std::vector<int> *toneids);

    // 结巴分词生成包含词和词性的分词结果，再对分词结果进行适当修改
    // (MergeforModify)
    int Cut(const std::string &sentence,
            std::vector<std::pair<std::string, std::string>> *cut_result);

    // 字词到音素的映射，查找字典
    int GetPhone(const std::string &word, std::string *phone);

    // 音素到音素id
    int Phone2Phoneid(const std::string &phone,
                      std::vector<int> *phoneid,
                      std::vector<int> *toneids);


    // 根据韵母判断该词中每个字的读音都为第三声。true表示词中每个字都是第三声
    bool AllToneThree(const std::vector<std::string> &finals);

    // 判断词是否是叠词
    bool IsReduplication(const std::string &word);

    // 获取每个字词的声母韵母列表
    int GetInitialsFinals(const std::string &word,
                          std::vector<std::string> *word_initials,
                          std::vector<std::string> *word_finals);

    // 获取每个字词的韵母列表
    int GetFinals(const std::string &word,
                  std::vector<std::string> *word_finals);

    // 整个词转成向量形式，向量的每个元素对应词的一个字
    int Word2WordVec(const std::string &word,
                     std::vector<std::wstring> *wordvec);

    // 将整个词重新进行 full cut，分词后，各个词会在词典中
    int SplitWord(const std::string &word,
                  std::vector<std::string> *fullcut_word);

    // 对分词结果进行处理：对包含“不”字的分词结果进行整理
    std::vector<std::pair<std::string, std::string>> MergeBu(
        std::vector<std::pair<std::string, std::string>> *seg_result);

    // 对分词结果进行处理：对包含“一”字的分词结果进行整理
    std::vector<std::pair<std::string, std::string>> Mergeyi(
        std::vector<std::pair<std::string, std::string>> *seg_result);

    // 对分词结果进行处理：对前后相同的两个字进行合并
    std::vector<std::pair<std::string, std::string>> MergeReduplication(
        std::vector<std::pair<std::string, std::string>> *seg_result);

    // 对一个词和后一个词他们的读音均为第三声的两个词进行合并
    std::vector<std::pair<std::string, std::string>> MergeThreeTones(
        std::vector<std::pair<std::string, std::string>> *seg_result);

    // 对一个词的最后一个读音和后一个词的第一个读音为第三声的两个词进行合并
    std::vector<std::pair<std::string, std::string>> MergeThreeTones2(
        std::vector<std::pair<std::string, std::string>> *seg_result);

    // 对分词结果进行处理：对包含“儿”字的分词结果进行整理
    std::vector<std::pair<std::string, std::string>> MergeEr(
        std::vector<std::pair<std::string, std::string>> *seg_result);

    // 对分词结果进行处理、修改
    int MergeforModify(
        std::vector<std::pair<std::string, std::string>> *seg_result,
        std::vector<std::pair<std::string, std::string>> *merge_seg_result);


    // 对包含“不”字的相关词音调进行修改
    int BuSandi(const std::string &word, std::vector<std::string> *finals);

    // 对包含“一”字的相关词音调进行修改
    int YiSandhi(const std::string &word, std::vector<std::string> *finals);

    // 对一些特殊词（包括量词，语助词等）的相关词音调进行修改
    int NeuralSandhi(const std::string &word,
                     const std::string &pos,
                     std::vector<std::string> *finals);

    // 对包含第三声的相关词音调进行修改
    int ThreeSandhi(const std::string &word, std::vector<std::string> *finals);

    // 对字词音调进行处理、修改
    int ModifyTone(const std::string &word,
                   const std::string &pos,
                   std::vector<std::string> *finals);


    // 对儿化音进行处理
    std::vector<std::vector<std::string>> MergeErhua(
        const std::vector<std::string> &initials,
        const std::vector<std::string> &finals,
        const std::string &word,
        const std::string &pos);


  private:
    bool _initialed;
    cppjieba::Jieba *_jieba;
    std::vector<std::string> _punc;
    std::vector<std::string> _punc_omit;

    std::string _conf_file;
    std::map<std::string, std::string> conf_map;
    std::map<std::string, std::string> word_phone_map;
    std::map<std::string, std::string> phone_id_map;
    std::map<std::string, std::string> tone_id_map;
    std::map<std::string, std::string> trand_simp_map;


    std::string _jieba_dict_path;
    std::string _jieba_hmm_path;
    std::string _jieba_user_dict_path;
    std::string _jieba_idf_path;
    std::string _jieba_stop_word_path;

    std::string _separate_tone;
    std::string _word2phone_path;
    std::string _phone2id_path;
    std::string _tone2id_path;
    std::string _trand2simp_path;

    std::vector<std::string> must_erhua;
    std::vector<std::string> not_erhua;

    std::vector<std::string> must_not_neural_tone_words;
    std::vector<std::string> must_neural_tone_words;
};
}  // namespace ppspeech
#endif

================================================
FILE: demos/TTSCppFrontend/src/front/text_normalize.cpp
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "front/text_normalize.h"

namespace ppspeech {

// 初始化 digits_map and unit_map
int TextNormalizer::InitMap() {
    digits_map["0"] = "零";
    digits_map["1"] = "一";
    digits_map["2"] = "二";
    digits_map["3"] = "三";
    digits_map["4"] = "四";
    digits_map["5"] = "五";
    digits_map["6"] = "六";
    digits_map["7"] = "七";
    digits_map["8"] = "八";
    digits_map["9"] = "九";

    units_map[1] = "十";
    units_map[2] = "百";
    units_map[3] = "千";
    units_map[4] = "万";
    units_map[8] = "亿";

    return 0;
}

// 替换
int TextNormalizer::Replace(std::wstring *sentence,
                            const int &pos,
                            const int &len,
                            const std::wstring &repstr) {
    // 删除原来的
    sentence->erase(pos, len);
    // 插入新的
    sentence->insert(pos, repstr);
    return 0;
}

// 根据标点符号切分句子
int TextNormalizer::SplitByPunc(const std::wstring &sentence,
                                std::vector<std::wstring> *sentence_part) {
    std::wstring temp = sentence;
    std::wregex reg(L"[：，；。？！,;?!]");
    std::wsmatch match;

    while (std::regex_search(temp, match, reg)) {
        sentence_part->push_back(
            temp.substr(0, match.position(0) + match.length(0)));
        Replace(&temp, 0, match.position(0) + match.length(0), L"");
    }
    // 如果最后没有标点符号
    if (temp != L"") {
        sentence_part->push_back(temp);
    }
    return 0;
}

// 数字转文本，10200 - > 一万零二百
std::string TextNormalizer::CreateTextValue(const std::string &num_str,
                                            bool use_zero) {
    std::string num_lstrip =
        std::string(absl::StripPrefix(num_str, "0")).data();
    int len = num_lstrip.length();

    if (len == 0) {
        return "";
    } else if (len == 1) {
        if (use_zero && (len < num_str.length())) {
            return digits_map["0"] + digits_map[num_lstrip];
        } else {
            return digits_map[num_lstrip];
        }
    } else {
        int largest_unit = 0;  // 最大单位
        std::string first_part;
        std::string second_part;

        if (len > 1 && len <= 2) {
            largest_unit = 1;
        } else if (len > 2 && len <= 3) {
            largest_unit = 2;
        } else if (len > 3 && len <= 4) {
            largest_unit = 3;
        } else if (len > 4 && len <= 8) {
            largest_unit = 4;
        } else if (len > 8) {
            largest_unit = 8;
        }

        first_part = num_str.substr(0, num_str.length() - largest_unit);
        second_part = num_str.substr(num_str.length() - largest_unit);

        return CreateTextValue(first_part, use_zero) + units_map[largest_unit] +
               CreateTextValue(second_part, use_zero);
    }
}

// 数字一个一个对应，可直接用于年份，电话，手机，
std::string TextNormalizer::SingleDigit2Text(const std::string &num_str,
                                             bool alt_one) {
    std::string text = "";
    if (alt_one) {
        digits_map["1"] = "幺";
    } else {
        digits_map["1"] = "一";
    }

    for (size_t i = 0; i < num_str.size(); i++) {
        std::string num_int(1, num_str[i]);
        if (digits_map.find(num_int) == digits_map.end()) {
            LOG(ERROR) << "digits_map doesn't have key: " << num_int;
        }
        text += digits_map[num_int];
    }

    return text;
}

std::string TextNormalizer::SingleDigit2Text(const std::wstring &num,
                                             bool alt_one) {
    std::string num_str = wstring2utf8string(num);
    return SingleDigit2Text(num_str, alt_one);
}

//  数字整体对应，可直接用于月份，日期，数值整数部分
std::string TextNormalizer::MultiDigit2Text(const std::string &num_str,
                                            bool alt_one,
                                            bool use_zero) {
    LOG(INFO) << "aaaaaaaaaaaaaaaa: " << alt_one << use_zero;
    if (alt_one) {
        digits_map["1"] = "幺";
    } else {
        digits_map["1"] = "一";
    }

    std::wstring result =
        utf8string2wstring(CreateTextValue(num_str, use_zero));
    std::wstring result_0(1, result[0]);
    std::wstring result_1(1, result[1]);
    // 一十八 --> 十八
    if ((result_0 == utf8string2wstring(digits_map["1"])) &&
        (result_1 == utf8string2wstring(units_map[1]))) {
        return wstring2utf8string(result.substr(1, result.length()));
    } else {
        return wstring2utf8string(result);
    }
}

std::string TextNormalizer::MultiDigit2Text(const std::wstring &num,
                                            bool alt_one,
                                            bool use_zero) {
    std::string num_str = wstring2utf8string(num);
    return MultiDigit2Text(num_str, alt_one, use_zero);
}

// 数字转文本，包括整数和小数
std::string TextNormalizer::Digits2Text(const std::string &num_str) {
    std::string text;
    std::vector<std::string> integer_decimal;
    integer_decimal = absl::StrSplit(num_str, ".");

    if (integer_decimal.size() == 1) {  // 整数
        text = MultiDigit2Text(integer_decimal[0]);
    } else if (integer_decimal.size() == 2) {  // 小数
        if (integer_decimal[0] == "") {  // 无整数的小数类型，例如：.22
            text = "点" +
                   SingleDigit2Text(
                       std::string(absl::StripSuffix(integer_decimal[1], "0"))
                           .data());
        } else {  // 常规小数类型，例如：12.34
            text = MultiDigit2Text(integer_decimal[0]) + "点" +
                   SingleDigit2Text(
                       std::string(absl::StripSuffix(integer_decimal[1], "0"))
                           .data());
        }
    } else {
        return "The value does not conform to the numeric format";
    }

    return text;
}

std::string TextNormalizer::Digits2Text(const std::wstring &num) {
    std::string num_str = wstring2utf8string(num);
    return Digits2Text(num_str);
}

// 日期，2021年8月18日 --> 二零二一年八月十八日
int TextNormalizer::ReData(std::wstring *sentence) {
    std::wregex reg(
        L"(\\d{4}|\\d{2})年((0?[1-9]|1[0-2])月)?(((0?[1-9])|((1|2)[0-9])|30|31)"
        L"([日号]))?");
    std::wsmatch match;
    std::string rep;

    while (std::regex_search(*sentence, match, reg)) {
        rep = "";
        rep += SingleDigit2Text(match[1]) + "年";
        if (match[3] != L"") {
            rep += MultiDigit2Text(match[3], false, false) + "月";
        }
        if (match[5] != L"") {
            rep += MultiDigit2Text(match[5], false, false) +
                   wstring2utf8string(match[9]);
        }

        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}


// XX-XX-XX or XX/XX/XX 例如：2021/08/18 --> 二零二一年八月十八日
int TextNormalizer::ReData2(std::wstring *sentence) {
    std::wregex reg(
        L"(\\d{4})([- /.])(0[1-9]|1[012])\\2(0[1-9]|[12][0-9]|3[01])");
    std::wsmatch match;
    std::string rep;

    while (std::regex_search(*sentence, match, reg)) {
        rep = "";
        rep += (SingleDigit2Text(match[1]) + "年");
        rep += (MultiDigit2Text(match[3], false, false) + "月");
        rep += (MultiDigit2Text(match[4], false, false) + "日");
        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}

// XX:XX:XX   09:09:02 --> 九点零九分零二秒
int TextNormalizer::ReTime(std::wstring *sentence) {
    std::wregex reg(L"([0-1]?[0-9]|2[0-3]):([0-5][0-9])(:([0-5][0-9]))?");
    std::wsmatch match;
    std::string rep;

    while (std::regex_search(*sentence, match, reg)) {
        rep = "";
        rep += (MultiDigit2Text(match[1], false, false) + "点");
        if (absl::StartsWith(wstring2utf8string(match[2]), "0")) {
            rep += "零";
        }
        rep += (MultiDigit2Text(match[2]) + "分");
        if (absl::StartsWith(wstring2utf8string(match[4]), "0")) {
            rep += "零";
        }
        rep += (MultiDigit2Text(match[4]) + "秒");

        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}

// 温度，例如：-24.3℃ --> 零下二十四点三度
int TextNormalizer::ReTemperature(std::wstring *sentence) {
    std::wregex reg(L"(-?)(\\d+(\\.\\d+)?)(°C|℃|度|摄氏度)");
    std::wsmatch match;
    std::string rep;
    std::string sign;
    std::vector<std::string> integer_decimal;
    std::string unit;

    while (std::regex_search(*sentence, match, reg)) {
        match[1] == L"-" ? sign = "负" : sign = "";
        match[4] == L"摄氏度" ? unit = "摄氏度" : unit = "度";
        rep = sign + Digits2Text(match[2]) + unit;

        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}

// 分数，例如： 1/3 --> 三分之一
int TextNormalizer::ReFrac(std::wstring *sentence) {
    std::wregex reg(L"(-?)(\\d+)/(\\d+)");
    std::wsmatch match;
    std::string sign;
    std::string rep;
    while (std::regex_search(*sentence, match, reg)) {
        match[1] == L"-" ? sign = "负" : sign = "";
        rep = sign + MultiDigit2Text(match[3]) + "分之" +
              MultiDigit2Text(match[2]);
        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}

// 百分数，例如：45.5% --> 百分之四十五点五
int TextNormalizer::RePercentage(std::wstring *sentence) {
    std::wregex reg(L"(-?)(\\d+(\\.\\d+)?)%");
    std::wsmatch match;
    std::string sign;
    std::string rep;
    std::vector<std::string> integer_decimal;

    while (std::regex_search(*sentence, match, reg)) {
        match[1] == L"-" ? sign = "负" : sign = "";
        rep = sign + "百分之" + Digits2Text(match[2]);

        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}

// 手机号码，例如：+86 18883862235 --> 八六幺八八八三八六二二三五
int TextNormalizer::ReMobilePhone(std::wstring *sentence) {
    std::wregex reg(
        L"(\\d)?((\\+?86 ?)?1([38]\\d|5[0-35-9]|7[678]|9[89])\\d{8})(\\d)?");
    std::wsmatch match;
    std::string rep;
    std::vector<std::string> country_phonenum;

    while (std::regex_search(*sentence, match, reg)) {
        country_phonenum = absl::StrSplit(wstring2utf8string(match[0]), "+");
        rep = "";
        for (int i = 0; i < country_phonenum.size(); i++) {
            LOG(INFO) << country_phonenum[i];
            rep += SingleDigit2Text(country_phonenum[i], true);
        }
        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}

// 座机号码，例如：010-51093154 --> 零幺零五幺零九三幺五四
int TextNormalizer::RePhone(std::wstring *sentence) {
    std::wregex reg(
        L"(\\d)?((0(10|2[1-3]|[3-9]\\d{2})-?)?[1-9]\\d{6,7})(\\d)?");
    std::wsmatch match;
    std::vector<std::string> zone_phonenum;
    std::string rep;

    while (std::regex_search(*sentence, match, reg)) {
        rep = "";
        zone_phonenum = absl::StrSplit(wstring2utf8string(match[0]), "-");
        for (int i = 0; i < zone_phonenum.size(); i++) {
            rep += SingleDigit2Text(zone_phonenum[i], true);
        }
        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}

// 范围，例如：60~90 --> 六十到九十
int TextNormalizer::ReRange(std::wstring *sentence) {
    std::wregex reg(
        L"((-?)((\\d+)(\\.\\d+)?)|(\\.(\\d+)))[-~]((-?)((\\d+)(\\.\\d+)?)|(\\.("
        L"\\d+)))");
    std::wsmatch match;
    std::string rep;
    std::string sign1;
    std::string sign2;

    while (std::regex_search(*sentence, match, reg)) {
        rep = "";
        match[2] == L"-" ? sign1 = "负" : sign1 = "";
        if (match[6] != L"") {
            rep += sign1 + Digits2Text(match[6]) + "到";
        } else {
            rep += sign1 + Digits2Text(match[3]) + "到";
        }
        match[9] == L"-" ? sign2 = "负" : sign2 = "";
        if (match[13] != L"") {
            rep += sign2 + Digits2Text(match[13]);
        } else {
            rep += sign2 + Digits2Text(match[10]);
        }

        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}

// 带负号的整数，例如：-10 --> 负十
int TextNormalizer::ReInterger(std::wstring *sentence) {
    std::wregex reg(L"(-)(\\d+)");
    std::wsmatch match;
    std::string rep;
    while (std::regex_search(*sentence, match, reg)) {
        rep = "负" + MultiDigit2Text(match[2]);
        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}

// 纯小数
int TextNormalizer::ReDecimalNum(std::wstring *sentence) {
    std::wregex reg(L"(-?)((\\d+)(\\.\\d+))|(\\.(\\d+))");
    std::wsmatch match;
    std::string sign;
    std::string rep;
    // std::vector<std::string> integer_decimal;
    while (std::regex_search(*sentence, match, reg)) {
        match[1] == L"-" ? sign = "负" : sign = "";
        if (match[5] != L"") {
            rep = sign + Digits2Text(match[5]);
        } else {
            rep = sign + Digits2Text(match[2]);
        }

        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }

    return 0;
}

// 正整数 + 量词
int TextNormalizer::RePositiveQuantifiers(std::wstring *sentence) {
    std::wstring common_quantifiers =
        L"(朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|"
        L"担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|"
        L"溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|"
        L"本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|"
        L"毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|"
        L"合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|"
        L"卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|纪|岁|世|更|"
        L"夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|"
        L"元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|"
        L"百万|万|千|百|)块|角|毛|分)";
    std::wregex reg(L"(\\d+)([多余几])?" + common_quantifiers);
    std::wsmatch match;
    std::string rep;
    while (std::regex_search(*sentence, match, reg)) {
        rep = MultiDigit2Text(match[1]);
        Replace(sentence,
                match.position(1),
                match.length(1),
                utf8string2wstring(rep));
    }

    return 0;
}

// 编号类数字，例如： 89757 --> 八九七五七
int TextNormalizer::ReDefalutNum(std::wstring *sentence) {
    std::wregex reg(L"\\d{3}\\d*");
    std::wsmatch match;
    while (std::regex_search(*sentence, match, reg)) {
        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(SingleDigit2Text(match[0])));
    }

    return 0;
}

int TextNormalizer::ReNumber(std::wstring *sentence) {
    std::wregex reg(L"(-?)((\\d+)(\\.\\d+)?)|(\\.(\\d+))");
    std::wsmatch match;
    std::string sign;
    std::string rep;
    while (std::regex_search(*sentence, match, reg)) {
        match[1] == L"-" ? sign = "负" : sign = "";
        if (match[5] != L"") {
            rep = sign + Digits2Text(match[5]);
        } else {
            rep = sign + Digits2Text(match[2]);
        }

        Replace(sentence,
                match.position(0),
                match.length(0),
                utf8string2wstring(rep));
    }
    return 0;
}

// 整体正则，按顺序
int TextNormalizer::SentenceNormalize(std::wstring *sentence) {
    ReData(sentence);
    ReData2(sentence);
    ReTime(sentence);
    ReTemperature(sentence);
    ReFrac(sentence);
    RePercentage(sentence);
    ReMobilePhone(sentence);
    RePhone(sentence);
    ReRange(sentence);
    ReInterger(sentence);
    ReDecimalNum(sentence);
    RePositiveQuantifiers(sentence);
    ReDefalutNum(sentence);
    ReNumber(sentence);
    return 0;
}
}  // namespace ppspeech

================================================
FILE: demos/TTSCppFrontend/src/front/text_normalize.h
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PADDLE_TTS_SERVING_FRONT_TEXT_NORMALIZE_H
#define PADDLE_TTS_SERVING_FRONT_TEXT_NORMALIZE_H

#include <glog/logging.h>
#include <codecvt>
#include <map>
#include <regex>
#include <string>
#include "absl/strings/str_split.h"
#include "absl/strings/strip.h"
#include "base/type_conv.h"

namespace ppspeech {

class TextNormalizer {
  public:
    TextNormalizer() { InitMap(); }
    ~TextNormalizer() {}

    int InitMap();
    int Replace(std::wstring *sentence,
                const int &pos,
                const int &len,
                const std::wstring &repstr);
    int SplitByPunc(const std::wstring &sentence,
                    std::vector<std::wstring> *sentence_part);

    std::string CreateTextValue(const std::string &num, bool use_zero = true);
    std::string SingleDigit2Text(const std::string &num_str,
                                 bool alt_one = false);
    std::string SingleDigit2Text(const std::wstring &num, bool alt_one = false);
    std::string MultiDigit2Text(const std::string &num_str,
                                bool alt_one = false,
                                bool use_zero = true);
    std::string MultiDigit2Text(const std::wstring &num,
                                bool alt_one = false,
                                bool use_zero = true);
    std::string Digits2Text(const std::string &num_str);
    std::string Digits2Text(const std::wstring &num);

    int ReData(std::wstring *sentence);
    int ReData2(std::wstring *sentence);
    int ReTime(std::wstring *sentence);
    int ReTemperature(std::wstring *sentence);
    int ReFrac(std::wstring *sentence);
    int RePercentage(std::wstring *sentence);
    int ReMobilePhone(std::wstring *sentence);
    int RePhone(std::wstring *sentence);
    int ReRange(std::wstring *sentence);
    int ReInterger(std::wstring *sentence);
    int ReDecimalNum(std::wstring *sentence);
    int RePositiveQuantifiers(std::wstring *sentence);
    int ReDefalutNum(std::wstring *sentence);
    int ReNumber(std::wstring *sentence);
    int SentenceNormalize(std::wstring *sentence);


  private:
    std::map<std::string, std::string> digits_map;
    std::map<int, std::string> units_map;
};
}  // namespace ppspeech

#endif

================================================
FILE: demos/TTSCppFrontend/third-party/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.10)
project(tts_third_party_libs)

include(ExternalProject)

# gflags
ExternalProject_Add(gflags
    GIT_REPOSITORY https://github.com/gflags/gflags.git
    GIT_TAG        v2.2.2
    PREFIX         ${CMAKE_CURRENT_BINARY_DIR}
    INSTALL_DIR    ${CMAKE_CURRENT_BINARY_DIR}
    CMAKE_ARGS     -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
                   -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                   -DBUILD_STATIC_LIBS=OFF
                   -DBUILD_SHARED_LIBS=ON
)

# glog
ExternalProject_Add(
    glog
    GIT_REPOSITORY https://github.com/google/glog.git
    GIT_TAG        v0.6.0
    PREFIX         ${CMAKE_CURRENT_BINARY_DIR}
    INSTALL_DIR    ${CMAKE_CURRENT_BINARY_DIR}
    CMAKE_ARGS     -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
                   -DCMAKE_POSITION_INDEPENDENT_CODE=ON
    DEPENDS        gflags
)

# abseil
ExternalProject_Add(
    abseil
    GIT_REPOSITORY https://github.com/abseil/abseil-cpp.git
    GIT_TAG        20230125.1
    PREFIX         ${CMAKE_CURRENT_BINARY_DIR}
    INSTALL_DIR    ${CMAKE_CURRENT_BINARY_DIR}
    CMAKE_ARGS     -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
                   -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                   -DABSL_PROPAGATE_CXX_STD=ON
)

# cppjieba (header-only)
ExternalProject_Add(
    cppjieba
    GIT_REPOSITORY https://github.com/yanyiwu/cppjieba.git
    GIT_TAG        v5.0.3
    PREFIX         ${CMAKE_CURRENT_BINARY_DIR}
    CONFIGURE_COMMAND ""
    BUILD_COMMAND     ""
    INSTALL_COMMAND   ""
    TEST_COMMAND      ""
)

# limonp (header-only)
ExternalProject_Add(
    limonp
    GIT_REPOSITORY https://github.com/yanyiwu/limonp.git
    GIT_TAG        v0.6.6
    PREFIX         ${CMAKE_CURRENT_BINARY_DIR}
    CONFIGURE_COMMAND ""
    BUILD_COMMAND     ""
    INSTALL_COMMAND   ""
    TEST_COMMAND      ""
)


================================================
FILE: demos/asr_deployment/README.md
================================================
([简体中文](./README_cn.md)|English)
# ASR Deployment by SpeechX

## Introduction

ASR deployment support U2/U2++/Deepspeech2 asr model using c++, which is good practice in industry deployment.

More info about SpeechX, please see [here](../../speechx/README.md).

## Usage
### 1. Environment

* python - 3.7
* docker - `registry.baidubce.com/paddlepaddle/paddle:2.2.2-gpu-cuda10.2-cudnn7`
* os - Ubuntu 16.04.7 LTS
* gcc/g++/gfortran - 8.2.0
* cmake - 3.16.0

More info please see [here](../../speechx/README.md).

### 2. Compile SpeechX

Please see [here](../../speechx/README.md).

### 3. Usage

For u2++ asr deployment example, please to see [here](../../speechx/examples/u2pp_ol/wenetspeech/).

First go to `speechx/speechx/examples/u2pp_ol/wenetspeech` dir.

- Source path.sh
  ```bash
  source path.sh
  ```

- Download Model, Prepare test data and cmvn
  ```bash
  run.sh --stage 0 --stop_stage 1
  ```

- Decode with WAV
  
  ```bash
  # FP32
  ./local/recognizer.sh

  # INT8
  ./local/recognizer_quant.sh
  ```

  Output:
  ```bash
  I1026 16:13:24.683531 48038 u2_recognizer_main.cc:55] utt: BAC009S0916W0495
  I1026 16:13:24.683578 48038 u2_recognizer_main.cc:56] wav dur: 4.17119 sec.
  I1026 16:13:24.683595 48038 u2_recognizer_main.cc:64] wav len (sample): 66739
  I1026 16:13:25.037652 48038 u2_recognizer_main.cc:87] Pratial result: 3 这令
  I1026 16:13:25.043697 48038 u2_recognizer_main.cc:87] Pratial result: 4 这令
  I1026 16:13:25.222124 48038 u2_recognizer_main.cc:87] Pratial result: 5 这令被贷款
  I1026 16:13:25.228385 48038 u2_recognizer_main.cc:87] Pratial result: 6 这令被贷款
  I1026 16:13:25.414669 48038 u2_recognizer_main.cc:87] Pratial result: 7 这令被贷款的员工
  I1026 16:13:25.420714 48038 u2_recognizer_main.cc:87] Pratial result: 8 这令被贷款的员工
  I1026 16:13:25.608129 48038 u2_recognizer_main.cc:87] Pratial result: 9 这令被贷款的员工们请
  I1026 16:13:25.801620 48038 u2_recognizer_main.cc:87] Pratial result: 10 这令被贷款的员工们请食难安
  I1026 16:13:25.804101 48038 feature_cache.h:44] set finished
  I1026 16:13:25.804128 48038 feature_cache.h:51] compute last feats done.
  I1026 16:13:25.948771 48038 u2_recognizer_main.cc:87] Pratial result: 11 这令被贷款的员工们请食难安
  I1026 16:13:26.246963 48038 u2_recognizer_main.cc:113] BAC009S0916W0495 这令被贷款的员工们请食难安
  ```

## Result

> CER compute under aishell-test.
> RTF compute with feature and decoder, which is more end to end.
> Machine Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz avx512_vnni

### FP32

```
Overall -> 5.75 % N=104765 C=99035 S=5587 D=143 I=294
Mandarin -> 5.75 % N=104762 C=99035 S=5584 D=143 I=294
English -> 0.00 % N=0 C=0 S=0 D=0 I=0
Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
```

```
RTF is: 0.315337
```

### INT8

```
Overall -> 5.83 % N=104765 C=98943 S=5675 D=147 I=286
Mandarin -> 5.83 % N=104762 C=98943 S=5672 D=147 I=286
English -> 0.00 % N=0 C=0 S=0 D=0 I=0
Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
```

```
RTF is: 0.269674
```


================================================
FILE: demos/asr_deployment/README_cn.md
================================================
([简体中文](./README_cn.md)|English)
# 基于SpeechX 的 ASR 部署 

## 简介

支持 U2/U2++/Deepspeech2 模型的 C++ 部署，其在工业实践中经常被用到。

更多 Speechx 信息可以参看[文档](../../speechx/README.md)。

## 使用
### 1. 环境

* python - 3.7
* docker - `registry.baidubce.com/paddlepaddle/paddle:2.2.2-gpu-cuda10.2-cudnn7`
* os - Ubuntu 16.04.7 LTS
* gcc/g++/gfortran - 8.2.0
* cmake - 3.16.0

更多信息可以参看[文档](../../speechx/README.md)。

### 2. 编译 SpeechX

更多信息可以参看[文档](../../speechx/README.md)。

### 3. 例子

u2++ 识别部署参看[这里](../../speechx/examples/u2pp_ol/wenetspeech/)。

以下是在 `speechx/speechx/examples/u2pp_ol/wenetspeech`.

- Source path.sh
  ```bash
  source path.sh
  ```

- 下载模型，准备测试数据和cmvn文件
  ```bash
  run.sh --stage 0 --stop_stage 1
  ```

- 解码
  
  ```bash
  # FP32
  ./local/recognizer.sh

  # INT8
  ./local/recognizer_quant.sh
  ```

  输出:
  ```bash
  I1026 16:13:24.683531 48038 u2_recognizer_main.cc:55] utt: BAC009S0916W0495
  I1026 16:13:24.683578 48038 u2_recognizer_main.cc:56] wav dur: 4.17119 sec.
  I1026 16:13:24.683595 48038 u2_recognizer_main.cc:64] wav len (sample): 66739
  I1026 16:13:25.037652 48038 u2_recognizer_main.cc:87] Pratial result: 3 这令
  I1026 16:13:25.043697 48038 u2_recognizer_main.cc:87] Pratial result: 4 这令
  I1026 16:13:25.222124 48038 u2_recognizer_main.cc:87] Pratial result: 5 这令被贷款
  I1026 16:13:25.228385 48038 u2_recognizer_main.cc:87] Pratial result: 6 这令被贷款
  I1026 16:13:25.414669 48038 u2_recognizer_main.cc:87] Pratial result: 7 这令被贷款的员工
  I1026 16:13:25.420714 48038 u2_recognizer_main.cc:87] Pratial result: 8 这令被贷款的员工
  I1026 16:13:25.608129 48038 u2_recognizer_main.cc:87] Pratial result: 9 这令被贷款的员工们请
  I1026 16:13:25.801620 48038 u2_recognizer_main.cc:87] Pratial result: 10 这令被贷款的员工们请食难安
  I1026 16:13:25.804101 48038 feature_cache.h:44] set finished
  I1026 16:13:25.804128 48038 feature_cache.h:51] compute last feats done.
  I1026 16:13:25.948771 48038 u2_recognizer_main.cc:87] Pratial result: 11 这令被贷款的员工们请食难安
  I1026 16:13:26.246963 48038 u2_recognizer_main.cc:113] BAC009S0916W0495 这令被贷款的员工们请食难安
  ```

## 结果

> CER 测试集为 aishell-test
> RTF 计算包含提特征和解码
> 测试机器： Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz avx512_vnni

### FP32

```
Overall -> 5.75 % N=104765 C=99035 S=5587 D=143 I=294
Mandarin -> 5.75 % N=104762 C=99035 S=5584 D=143 I=294
English -> 0.00 % N=0 C=0 S=0 D=0 I=0
Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
```

```
RTF is: 0.315337
```

### INT8

```
Overall -> 5.87 % N=104765 C=98909 S=5711 D=145 I=289
Mandarin -> 5.86 % N=104762 C=98909 S=5708 D=145 I=289
English -> 0.00 % N=0 C=0 S=0 D=0 I=0
Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
```


================================================
FILE: demos/audio_content_search/README.md
================================================
([简体中文](./README_cn.md)|English)
# ACS (Audio Content Search)

## Introduction
ACS, or Audio Content Search, refers to the problem of getting the key word time stamp from automatically transcribe spoken language (speech-to-text). 

This demo is an implementation of obtaining the keyword timestamp in the text from a given audio file. It can be done by a single command or a few lines in python using `PaddleSpeech`. 
Now, the search word in demo is:
```
我
康
```
## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from medium and hard to install paddlespeech.

The dependency refers to the requirements.txt, and install the dependency as follows:

```
pip install -r requirements.txt 
```

### 2. Prepare Input File
The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

Here are sample files for this demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
```

### 3. run paddlespeech_server
Before using the client, it is necessary to start paddlespeech_servers.

Here are sample server configuration：
```bash
bash demos/audio_content_search/run.sh
```
The logs of the two services will be recorded in 'acs.log' and 'streaming_asr.log' in this configuration.

### 4. Usage
- Command Line(Recommended)
  ```bash
  # Chinese
  paddlespeech_client acs --server_ip 127.0.0.1 --port 8490 --input ./zh.wav 
  ```
  
  Usage:
  ```bash
  paddlespeech asr --help
  ```
  Arguments:
  - `input`(required): Audio file to recognize.
  - `server_ip`: the server ip.
  - `port`: the server port.
  - `lang`: the language type of the model. Default: `zh`.
  - `sample_rate`: Sample rate of the model. Default: `16000`.
  - `audio_format`: The audio format.

  Output:
  ```bash
  [2022-05-15 15:00:58,185] [    INFO] - acs http client start
  [2022-05-15 15:00:58,185] [    INFO] - endpoint: http://127.0.0.1:8490/paddlespeech/asr/search
  [2022-05-15 15:01:03,220] [    INFO] - acs http client finished
  [2022-05-15 15:01:03,221] [    INFO] - ACS result: {'transcription': '我认为跑步最重要的就是给我带来了身体健康', 'acs': [{'w': '我', 'bg': 0, 'ed': 1.6800000000000002}, {'w': '我', 'bg': 2.1, 'ed': 4.28}, {'w': '康', 'bg': 3.2, 'ed': 4.92}]}
  [2022-05-15 15:01:03,221] [    INFO] - Response time 5.036084 s.
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import ACSClientExecutor

  acs_executor = ACSClientExecutor()
  res = acs_executor(
      input='./zh.wav',
      server_ip="127.0.0.1",
      port=8490,)
  print(res)
  ```

  Output:
  ```bash
  [2022-05-15 15:08:13,955] [    INFO] - acs http client start
  [2022-05-15 15:08:13,956] [    INFO] - endpoint: http://127.0.0.1:8490/paddlespeech/asr/search
  [2022-05-15 15:08:19,026] [    INFO] - acs http client finished
  {'transcription': '我认为跑步最重要的就是给我带来了身体健康', 'acs': [{'w': '我', 'bg': 0, 'ed': 1.6800000000000002}, {'w': '我', 'bg': 2.1, 'ed': 4.28}, {'w': '康', 'bg': 3.2, 'ed': 4.92}]}
  ```


================================================
FILE: demos/audio_content_search/README_cn.md
================================================
(简体中文|[English](./README.md))

# 语音内容搜索
## 介绍
语音内容搜索是一项用计算机程序获取转录语音内容关键词时间戳的技术。

这个 demo 是一个从给定音频文件获取其文本中关键词时间戳的实现，它可以通过使用 `PaddleSpeech` 的单个命令或 python 中的几行代码来实现。

当前示例中检索词是
```
我
康
```
## 使用方法
### 1. 安装
请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从 medium，hard 三中方式中选择一种方式安装。
依赖参见 requirements.txt, 安装依赖

```
pip install -r requirements.txt
```

### 2. 准备输入
这个 demo 的输入应该是一个 WAV 文件（`.wav`），并且采样率必须与模型的采样率相同。

可以下载此 demo 的示例音频：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
```

### 3. 启动 server
使用 client 之前需要先启动 paddlespeech_server。

可以使用默认 server 配置：
```bash
bash demos/audio_content_search/run.sh
```
该配置下两个服务的日志会被记录在 `acs.log` 和 `streaming_asr.log` 中。

### 4. 使用方法
- 命令行 (推荐使用)
  ```bash
  # 中文
  paddlespeech_client acs --server_ip 127.0.0.1 --port 8490 --input ./zh.wav 
  ```
  
  使用方法：
  ```bash
  paddlespeech asr --help
  ```
  参数：
  - `input`(必须输入)：用于识别的音频文件。
  - `server_ip`: 服务的ip。
  - `port`：服务的端口。
  - `lang`：模型语言，默认值：`zh`。
  - `sample_rate`：音频采样率，默认值：`16000`。
  - `audio_format`: 音频的格式。

  输出：
  ```bash
  [2022-05-15 15:00:58,185] [    INFO] - acs http client start
  [2022-05-15 15:00:58,185] [    INFO] - endpoint: http://127.0.0.1:8490/paddlespeech/asr/search
  [2022-05-15 15:01:03,220] [    INFO] - acs http client finished
  [2022-05-15 15:01:03,221] [    INFO] - ACS result: {'transcription': '我认为跑步最重要的就是给我带来了身体健康', 'acs': [{'w': '我', 'bg': 0, 'ed': 1.6800000000000002}, {'w': '我', 'bg': 2.1, 'ed': 4.28}, {'w': '康', 'bg': 3.2, 'ed': 4.92}]}
  [2022-05-15 15:01:03,221] [    INFO] - Response time 5.036084 s.
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import ACSClientExecutor

  acs_executor = ACSClientExecutor()
  res = acs_executor(
      input='./zh.wav',
      server_ip="127.0.0.1",
      port=8490,)
  print(res)
  ```

  输出：
  ```bash
  [2022-05-15 15:08:13,955] [    INFO] - acs http client start
  [2022-05-15 15:08:13,956] [    INFO] - endpoint: http://127.0.0.1:8490/paddlespeech/asr/search
  [2022-05-15 15:08:19,026] [    INFO] - acs http client finished
  {'transcription': '我认为跑步最重要的就是给我带来了身体健康', 'acs': [{'w': '我', 'bg': 0, 'ed': 1.6800000000000002}, {'w': '我', 'bg': 2.1, 'ed': 4.28}, {'w': '康', 'bg': 3.2, 'ed': 4.92}]}
  ```


================================================
FILE: demos/audio_content_search/acs_clinet.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

from paddlespeech.cli.log import logger
from paddlespeech.server.utils.audio_handler import ASRHttpHandler


def main(args):
    logger.info("asr http client start")
    audio_format = "wav"
    sample_rate = 16000
    lang = "zh"
    handler = ASRHttpHandler(
        server_ip=args.server_ip, port=args.port, endpoint=args.endpoint)
    res = handler.run(args.wavfile, audio_format, sample_rate, lang)
    # res = res['result']
    logger.info(f"the final result: {res}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="audio content search client")
    parser.add_argument(
        '--server_ip', type=str, default='127.0.0.1', help='server ip')
    parser.add_argument('--port', type=int, default=8090, help='server port')
    parser.add_argument(
        "--wavfile",
        action="store",
        help="wav file path ",
        default="./16_audio.wav")
    parser.add_argument(
        '--endpoint',
        type=str,
        default='/paddlespeech/asr/search',
        help='server endpoint')
    args = parser.parse_args()

    main(args)


================================================
FILE: demos/audio_content_search/conf/acs_application.yaml
================================================
#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8490

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['acs_python']
# protocol = ['http'] (only one can be selected). 
# http only support offline engine type.
protocol: 'http'
engine_list: ['acs_python']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ACS #########################################
################### acs task: engine_type: python ###############################
acs_python:
    task: acs
    asr_protocol: 'websocket' # 'websocket'
    offset: 1.0 # second
    asr_server_ip: 127.0.0.1
    asr_server_port: 8390
    lang: 'zh'
    word_list: "./conf/words.txt"
    sample_rate: 16000
    device: 'cpu' # set 'gpu:id' or 'cpu'
    ping_timeout: 100 # seconds


================================================
FILE: demos/audio_content_search/conf/words.txt
================================================
我
康

================================================
FILE: demos/audio_content_search/conf/ws_conformer_application.yaml
================================================
#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8390

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
    model_type: 'conformer_online_multicn'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 'attention_rescoring' 
    num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    continuous_decoding: False # disable continue decoding when endpoint detected
    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2

================================================
FILE: demos/audio_content_search/conf/ws_conformer_wenetspeech_application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8390

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
    model_type: 'conformer_online_wenetspeech'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    decode_method: "attention_rescoring"
    num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2


================================================
FILE: demos/audio_content_search/requirements.txt
================================================
websocket-client

================================================
FILE: demos/audio_content_search/run.sh
================================================
export CUDA_VISIBLE_DEVICE=0,1,2,3
# we need the streaming asr server
nohup python3 streaming_asr_server.py --config_file conf/ws_conformer_application.yaml > streaming_asr.log  2>&1  &

# start the acs server
nohup paddlespeech_server start --config_file conf/acs_application.yaml > acs.log 2>&1 &


================================================
FILE: demos/audio_content_search/streaming_asr_server.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

from paddlespeech.cli.log import logger
from paddlespeech.server.bin.paddlespeech_server import ServerExecutor
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        prog='paddlespeech_server.start', add_help=True)
    parser.add_argument(
        "--config_file",
        action="store",
        help="yaml file of the app",
        default=None,
        required=True)

    parser.add_argument(
        "--log_file",
        action="store",
        help="log file",
        default="./log/paddlespeech.log")
    logger.info("start to parse the args")
    args = parser.parse_args()

    logger.info("start to launch the streaming asr server")
    streaming_asr_server = ServerExecutor()
    streaming_asr_server(config_file=args.config_file, log_file=args.log_file)


================================================
FILE: demos/audio_searching/README.md
================================================
([简体中文](./README_cn.md)|English)

# Audio Searching

## Introduction
As the Internet continues to evolve, unstructured data such as emails, social media photos, live videos, and customer service voice calls have become increasingly common. If we want to process the data on a computer, we need to use embedding technology to transform the data into vector and store, index, and query it.

However, when there is a large amount of data, such as hundreds of millions of audio tracks, it is more difficult to do a similarity search. The exhaustive method is feasible, but very time consuming.  For this scenario, this demo will introduce how to build an audio similarity retrieval system using the open source vector database Milvus.

Audio retrieval (speech, music, speaker, etc.) enables querying and finding similar sounds (or the same speaker) in a large amount of audio data.  The audio similarity retrieval system can be used to identify similar sound effects, minimize intellectual property infringement, quickly retrieve the voice print library, and help enterprises control fraud and identity theft. Audio retrieval also plays an important role in the classification and statistical analysis of audio data.

In this demo, you will learn how to build an audio retrieval system to retrieve similar sound snippets. The uploaded audio clips are converted into vector data using paddlespeech-based pre-training models (audio classification model, speaker recognition model, etc.) and stored in Milvus.  Milvus automatically generates a unique ID for each vector, then stores the ID and the corresponding audio information (audio ID, audio speaker ID, etc.) in MySQL to complete the library construction.  During retrieval, users upload test audio to obtain vector, and then conduct vector similarity search in Milvus.The retrieval result returned by Milvus is vector ID, and the corresponding audio information can be queried in MySQL by ID.

![Workflow of an audio searching system](./img/audio_searching.png)

Note：this demo uses the [CN-Celeb](http://openslr.org/82/) dataset of at least 650,000 audio entries and 3000 speakers to build the audio vector library, which is then retrieved using a preset distance calculation. The dataset can also use other,  Adjust as needed, e.g. Librispeech, VoxCeleb, UrbanSound, GloVe, MNIST, etc.

## Usage
### 1. Prepare PaddleSpeech
Audio vector extraction requires PaddleSpeech training model, so please make sure that PaddleSpeech has been installed before running. Specific installation steps: See [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).  

You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare MySQL and Milvus services by docker-compose
The audio similarity search system requires Milvus, MySQL services. We can start these containers with one click through [docker-compose.yaml](./docker-compose.yaml), so please make sure you have [installed Docker Engine](https://docs.docker.com/engine/install/) and [Docker Compose](https://docs.docker.com/compose/install/) before running. then

```bash
## Enter the audio_searching directory for the following example
cd ~/PaddleSpeech/demos/audio_searching/

## Then start the related services within the container
docker-compose -f docker-compose.yaml up -d
```

You will see the that all containers are created:

```bash
Creating network "quick_deploy_app_net" with driver "bridge"
Creating milvus-minio    ... done
Creating milvus-etcd     ... done
Creating audio-mysql     ... done
Creating milvus-standalone ... done
Creating audio-webclient     ... done
```

And show all containers with `docker ps`, and you can use `docker logs audio-mysql` to get the logs of server container

```bash
CONTAINER ID  IMAGE COMMAND CREATED STATUS  PORTS NAMES
b2bcf279e599  milvusdb/milvus:v2.0.1  "/tini -- milvus run…"  22 hours ago  Up 22 hours 0.0.0.0:19530->19530/tcp  milvus-standalone
d8ef4c84e25c  mysql:5.7 "docker-entrypoint.s…"  22 hours ago  Up 22 hours 0.0.0.0:3306->3306/tcp, 33060/tcp audio-mysql
8fb501edb4f3  quay.io/coreos/etcd:v3.5.0  "etcd -advertise-cli…"  22 hours ago  Up 22 hours 2379-2380/tcp milvus-etcd
ffce340b3790  minio/minio:RELEASE.2020-12-03T00-03-10Z  "/usr/bin/docker-ent…"  22 hours ago  Up 22 hours (healthy) 9000/tcp  milvus-minio
15c84a506754  paddlepaddle/paddlespeech-audio-search-client:2.3  "/bin/bash -c '/usr/…"  22 hours ago  Up 22 hours (healthy) 0.0.0.0:8068->80/tcp  audio-webclient
```

### 3. Start API Server
Then to start the system server, and it provides HTTP backend services.

- Install the Python packages

  ```bash
  pip install -r requirements.txt
  ```
- Set configuration(In the case of local running, you can skip this step.)

  ```bash
  ## Method 1: Modify the source file
  vim src/config.py

  ## Method 2: Modify the environment variables, as shown in
  export MILVUS_HOST=127.0.0.1
  export MYSQL_HOST=127.0.0.1
  ```

  Here listing some parameters that need to be set, for more information please refer to [config.py](./src/config.py).

  | **Parameter**    |**Description**         | **Default setting** |
  | ---------------- | -----------------------| ------------------- |
  | MILVUS_HOST      | The IP address of Milvus, you can get it by ifconfig. If running everything on one machine, most likely 127.0.0.1 | 127.0.0.1
  | MILVUS_PORT      | Port of Milvus.    | 19530               |
  | VECTOR_DIMENSION | Dimension of the vectors.        | 2048          |
  | MYSQL_HOST       | The IP address of Mysql.    | 127.0.0.1           |
  | MYSQL_PORT       | Port of Mysql.        | 3306                |
  | DEFAULT_TABLE    | The milvus and mysql default collection name.  | audio_table          |

- Run the code

  Then start the server with Fastapi.

  ```bash
  export PYTHONPATH=$PYTHONPATH:./src
  python src/audio_search.py
  ```

  Then you will see the Application is started:

  ```bash
  INFO:     Started server process [13352]
  2022-03-26 22:45:30,838 ｜ INFO ｜ server.py ｜ serve ｜ 75 ｜ Started server process [13352]
  INFO:     Waiting for application startup.
  2022-03-26 22:45:30,839 ｜ INFO ｜ on.py ｜ startup ｜ 45 ｜ Waiting for application startup.
  INFO:     Application startup complete.
  2022-03-26 22:45:30,839 ｜ INFO ｜ on.py ｜ startup ｜ 59 ｜ Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)
  2022-03-26 22:45:30,840 ｜ INFO ｜ server.py ｜ _log_started_message ｜ 206 ｜ Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)
  ```

### 4. Usage
- Prepare data
  ```bash
  wget -c https://www.openslr.org/resources/82/cn-celeb_v2.tar.gz && tar -xvf cn-celeb_v2.tar.gz 
  ```
  **Note**: If you want to build a quick demo, you can use ./src/test_audio_search.py:download_audio_data function, it downloads 20 audio files , Subsequent results show this collection as an example

- Prepare model(Skip this step if you use the default model.)
  ```bash
  ## Modify model configuration parameters. Currently, only ecapatdnn_voxceleb12 is supported, and multiple types will be supported in the future
  vim ./src/encode.py
  ```
 
- Scripts test (Recommended)

    The internal process is downloading data, loading the paddlespeech model, extracting embedding, storing library, retrieving and deleting library  
    ```bash
    python ./src/test_audio_search.py
    ```

    Output：
    ```bash
    Downloading https://paddlespeech.cdn.bcebos.com/vector/audio/example_audio.tar.gz ...
    ...
    Unpacking ./example_audio.tar.gz ...
    [2022-03-26 22:50:54,987] [    INFO] - checking the aduio file format......
    [2022-03-26 22:50:54,987] [    INFO] - The sample rate is 16000
    [2022-03-26 22:50:54,987] [    INFO] - The audio file format is right
    [2022-03-26 22:50:54,988] [    INFO] - device type: cpu
    [2022-03-26 22:50:54,988] [    INFO] - load the pretrained model: ecapatdnn_voxceleb12-16k
    [2022-03-26 22:50:54,990] [    INFO] - Downloading sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz from https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz
    ...
    [2022-03-26 22:51:17,285] [    INFO] - start to dynamic import the model class
    [2022-03-26 22:51:17,285] [    INFO] - model name ecapatdnn
    [2022-03-26 22:51:23,864] [    INFO] - start to set the model parameters to model
    [2022-03-26 22:54:08,115] [    INFO] - create the model instance success
    [2022-03-26 22:54:08,116] [    INFO] - Preprocess audio file: /home/zhaoqingen/PaddleSpeech/demos/audio_
    searching/example_audio/knife_hit_iron3.wav
    [2022-03-26 22:54:08,116] [    INFO] - load the audio sample points, shape is: (11012,)
    [2022-03-26 22:54:08,150] [    INFO] - extract the audio feat, shape is: (80, 69)
    [2022-03-26 22:54:08,152] [    INFO] - feats shape: [1, 80, 69]
    [2022-03-26 22:54:08,154] [    INFO] - audio extract the feat success
    [2022-03-26 22:54:08,155] [    INFO] - start to do backbone network model forward
    [2022-03-26 22:54:08,155] [    INFO] - feats shape:[1, 80, 69], lengths shape: [1]
    [2022-03-26 22:54:08,433] [    INFO] - embedding size: (192,)
    Extracting feature from audio No. 1 , 20 audios in total
    [2022-03-26 22:54:08,435] [    INFO] - checking the aduio file format......
    [2022-03-26 22:54:08,435] [    INFO] - The sample rate is 16000
    [2022-03-26 22:54:08,436] [    INFO] - The audio file format is right
    [2022-03-26 22:54:08,436] [    INFO] - device type: cpu
    [2022-03-26 22:54:08,436] [    INFO] - Model has been initialized
    [2022-03-26 22:54:08,436] [    INFO] - Preprocess audio file: /home/zhaoqingen/PaddleSpeech/demos/audio_searching/example_audio/sword_wielding.wav
    [2022-03-26 22:54:08,436] [    INFO] - load the audio sample points, shape is: (6391,)
    [2022-03-26 22:54:08,452] [    INFO] - extract the audio feat, shape is: (80, 40)
    [2022-03-26 22:54:08,454] [    INFO] - feats shape: [1, 80, 40]
    [2022-03-26 22:54:08,454] [    INFO] - audio extract the feat success
    [2022-03-26 22:54:08,454] [    INFO] - start to do backbone network model forward
    [2022-03-26 22:54:08,455] [    INFO] - feats shape:[1, 80, 40], lengths shape: [1]
    [2022-03-26 22:54:08,633] [    INFO] - embedding size: (192,)
    Extracting feature from audio No. 2 , 20 audios in total
    ...
    2022-03-26 22:54:15,892 ｜ INFO ｜ audio_search.py ｜ load_audios ｜ 85 ｜ Successfully loaded data, total count: 20
    2022-03-26 22:54:15,908 ｜ INFO ｜ audio_search.py ｜ count_audio ｜ 148 ｜ Successfully count the number of data!
    [2022-03-26 22:54:15,916] [    INFO] - checking the aduio file format......
    [2022-03-26 22:54:15,916] [    INFO] - The sample rate is 16000
    [2022-03-26 22:54:15,916] [    INFO] - The audio file format is right
    [2022-03-26 22:54:15,916] [    INFO] - device type: cpu
    [2022-03-26 22:54:15,916] [    INFO] - Model has been initialized
    [2022-03-26 22:54:15,916] [    INFO] - Preprocess audio file: /home/zhaoqingen/PaddleSpeech/demos/audio_searching/example_audio/test.wav
    [2022-03-26 22:54:15,917] [    INFO] - load the audio sample points, shape is: (8456,)
    [2022-03-26 22:54:15,923] [    INFO] - extract the audio feat, shape is: (80, 53)
    [2022-03-26 22:54:15,924] [    INFO] - feats shape: [1, 80, 53]
    [2022-03-26 22:54:15,924] [    INFO] - audio extract the feat success
    [2022-03-26 22:54:15,924] [    INFO] - start to do backbone network model forward
    [2022-03-26 22:54:15,924] [    INFO] - feats shape:[1, 80, 53], lengths shape: [1]
    [2022-03-26 22:54:16,051] [    INFO] - embedding size: (192,)
    ...
    2022-03-26 22:54:16,086 ｜ INFO ｜ audio_search.py ｜ search_local_audio ｜ 132 ｜ search result http://testserver/data?audio_path=./example_audio/test.wav, score 100.0
    2022-03-26 22:54:16,087 ｜ INFO ｜ audio_search.py ｜ search_local_audio ｜ 132 ｜ search result http://testserver/data?audio_path=./example_audio/knife_chopping.wav, score 29.182177782058716
    2022-03-26 22:54:16,087 ｜ INFO ｜ audio_search.py ｜ search_local_audio ｜ 132 ｜ search result http://testserver/data?audio_path=./example_audio/knife_cut_into_body.wav, score 22.73637056350708
    ...
    2022-03-26 22:54:16,088 ｜ INFO ｜ audio_search.py ｜ search_local_audio ｜ 136 ｜ Successfully searched similar audio!
    2022-03-26 22:54:17,164 ｜ INFO ｜ audio_search.py ｜ drop_tables ｜ 160 ｜ Successfully drop tables in Milvus and MySQL!
    ```
- GUI test (Optional)
  
    Navigate to 127.0.0.1:8068 in your browser to access the front-end interface.

    **Note**: If the browser and the service are not on the same machine, then the IP needs to be changed to the IP of the machine where the service is located, and the corresponding API_URL in docker-compose.yaml needs to be changed, and the docker-compose.yaml file needs to be re-executed for the change to take effect.

    - Insert data

      Download the data on the server and decompress it to a file, for example, /home/speech/data/. Then enter /home/speech/data/ in the address bar of the upload page to upload the data.
    
      ![](./img/insert.png)

    - Search for similar audio

      Select the magnifying glass icon on the left side of the interface. Then, press the "Default Target Audio File" button and upload a .wav sound file from the client you'd like to search. Results will be displayed.

      ![](./img/search.png)

### 5.Result

 machine configuration：
- OS: CentOS release 7.6 
- kernel：4.17.11-1.el7.elrepo.x86_64
- CPU：Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz 
- memory：132G

dataset：
- CN-Celeb, train size 650,000, test size 10,000, dimension 192, distance L2

recall and elapsed time statistics are shown in the following figure：

  ![](./img/result.png)


The retrieval framework based on Milvus takes about 2.9 milliseconds to retrieve on the premise of 90% recall rate, and it takes about 500 milliseconds for feature extraction (testing audio takes about 5 seconds), that is, a single audio test takes about 503 milliseconds in total, which can meet most application scenarios.

* compute embedding takes 500 ms
* retrieval with cosine takes 2.9 ms
* total takes 503 ms

> test audio is 5 sec

### 6.Pretrained Models

Here is a list of pretrained models released by PaddleSpeech :

| Model | Sample Rate
| :--- | :---: 
| ecapa_tdnn | 16000


================================================
FILE: demos/audio_searching/README_cn.md
================================================

(简体中文|[English](./README.md))

# 音频相似性检索
## 介绍

随着互联网不断发展，电子邮件、社交媒体照片、直播视频、客服语音等非结构化数据已经变得越来越普遍。如果想要使用计算机来处理这些数据，需要使用 embedding 技术将这些数据转化为向量 vector，然后进行存储、建索引、并查询。

但是，当数据量很大，比如上亿条音频要做相似度搜索，就比较困难了。穷举法固然可行，但非常耗时。针对这种场景，该 demo 将介绍如何使用开源向量数据库 Milvus 搭建音频相似度检索系统。

音频检索（如演讲、音乐、说话人等检索）实现了在海量音频数据中查询并找出相似声音（或相同说话人）片段。音频相似性检索系统可用于识别相似的音效、最大限度减少知识产权侵权等，还可以快速的检索声纹库、帮助企业控制欺诈和身份盗用等。在音频数据的分类和统计分析中，音频检索也发挥着重要作用。

在本 demo 中，你将学会如何构建一个音频检索系统，用来检索相似的声音片段。使用基于 PaddleSpeech 预训练模型（音频分类模型，说话人识别模型等）将上传的音频片段转换为向量数据，并存储在 Milvus 中。Milvus 自动为每个向量生成唯一的 ID，然后将 ID 和 相应的音频信息（音频id，音频的说话人id等等）存储在 MySQL，这样就完成建库的工作。用户在检索时，上传测试音频，得到向量，然后在 Milvus 中进行向量相似度搜索，Milvus 返回的检索结果为向量 ID，通过 ID 在 MySQL 内部查询相应的音频信息即可。

![音频检索流程图](./img/audio_searching.png)

注：该 demo 使用 [CN-Celeb](http://openslr.org/82/) 数据集，包括至少 650000 条音频，3000 个说话人，来建立音频向量库（音频特征，或音频说话人特征），然后通过预设的距离计算方式进行音频（或说话人）检索，这里面数据集也可以使用其他的，根据需要调整，如Librispeech，VoxCeleb，UrbanSound，GloVe，MNIST等。

## 使用方法
### 1. PaddleSpeech 安装
音频向量的提取需要用到基于 PaddleSpeech 训练的模型，所以请确保在运行之前已经安装了 PaddleSpeech，具体安装步骤，详见[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从 easy，medium，hard 三种方式中选择一种方式安装。

### 2. MySQL 和 Milvus 安装
音频相似性的检索需要用到 Milvus, MySQL 服务。 我们可以通过 [docker-compose.yaml](./docker-compose.yaml) 一键启动这些容器，所以请确保在运行之前已经安装了 [Docker Engine](https://docs.docker.com/engine/install/) 和 [Docker Compose](https://docs.docker.com/compose/install/)。 即

```bash
## 先进入到 audio_searching 目录，如下示例
cd ~/PaddleSpeech/demos/audio_searching/

## 然后启动容器内的相关服务
docker-compose -f docker-compose.yaml up -d
```

你会看到所有的容器都被创建:

```bash
Creating network "quick_deploy_app_net" with driver "bridge"
Creating milvus-minio    ... done
Creating milvus-etcd     ... done
Creating audio-mysql     ... done
Creating milvus-standalone ... done
Creating audio-webclient     ... done
```

可以采用'docker ps'来显示所有的容器，还可以使用'docker logs audio-mysql'来获取服务器容器的日志：

```bash
CONTAINER ID  IMAGE COMMAND CREATED STATUS  PORTS NAMES
b2bcf279e599  milvusdb/milvus:v2.0.1  "/tini -- milvus run…"  22 hours ago  Up 22 hours 0.0.0.0:19530->19530/tcp  milvus-standalone
d8ef4c84e25c  mysql:5.7 "docker-entrypoint.s…"  22 hours ago  Up 22 hours 0.0.0.0:3306->3306/tcp, 33060/tcp audio-mysql
8fb501edb4f3  quay.io/coreos/etcd:v3.5.0  "etcd -advertise-cli…"  22 hours ago  Up 22 hours 2379-2380/tcp milvus-etcd
ffce340b3790  minio/minio:RELEASE.2020-12-03T00-03-10Z  "/usr/bin/docker-ent…"  22 hours ago  Up 22 hours (healthy) 9000/tcp  milvus-minio
15c84a506754  paddlepaddle/paddlespeech-audio-search-client:2.3  "/bin/bash -c '/usr/…"  22 hours ago  Up 22 hours (healthy) 0.0.0.0:8068->80/tcp  audio-webclient

```

### 3. 配置并启动 API 服务
启动系统服务程序，它会提供基于 HTTP 后端服务。

- 安装服务依赖的 python 基础包

  ```bash
  pip install -r requirements.txt
  ```
- 修改配置(本地运行情况下，一般不用修改，可以跳过该步骤)

  ```bash
  ## 方法一：修改源码文件
  vim src/config.py

  ## 方法二：修改环境变量，如下所示
  export MILVUS_HOST=127.0.0.1
  export MYSQL_HOST=127.0.0.1
  ```

  这里列出了一些需要设置的参数，更多信息请参考 [config.py](./src/config.py)

  | **参数**    | **描述**                | **默认设置** |
  | ---------------- | -------------------- | ------------------- |
  | MILVUS_HOST      | Milvus 服务的 IP 地址 | 127.0.0.1           |
  | MILVUS_PORT      | Milvus 服务的端口号   | 19530               |
  | VECTOR_DIMENSION | 特征向量的维度        | 192                 |
  | MYSQL_HOST       | Mysql 服务的 IP 地址  | 127.0.0.1           |
  | MYSQL_PORT       | Mysql 服务的端口号    | 3306                |
  | DEFAULT_TABLE    | 默认存储的表名        | audio_table         |

- 运行程序

  启动用 Fastapi 构建的服务

  ```bash
  export PYTHONPATH=$PYTHONPATH:./src
  python src/audio_search.py
  ```

  然后你会看到应用程序启动:

  ```bash
  INFO:     Started server process [13352]
  2022-03-26 22:45:30,838 ｜ INFO ｜ server.py ｜ serve ｜ 75 ｜ Started server process [13352]
  INFO:     Waiting for application startup.
  2022-03-26 22:45:30,839 ｜ INFO ｜ on.py ｜ startup ｜ 45 ｜ Waiting for application startup.
  INFO:     Application startup complete.
  2022-03-26 22:45:30,839 ｜ INFO ｜ on.py ｜ startup ｜ 59 ｜ Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)
  2022-03-26 22:45:30,840 ｜ INFO ｜ server.py ｜ _log_started_message ｜ 206 ｜ Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit)
  ```

### 4. 测试方法
- 准备数据
  ```bash
  wget -c https://www.openslr.org/resources/82/cn-celeb_v2.tar.gz && tar -xvf cn-celeb_v2.tar.gz 
  ```
  **注**：如果希望快速搭建 demo，可以采用 ./src/test_audio_search.py:download_audio_data 内部的 20 条音频，另外后续结果展示以该集合为例

- 准备模型（如果使用默认模型，可以跳过此步骤）
  ```bash
  ## 修改模型配置参数，目前 model 仅支持 ecapatdnn_voxceleb12，后续将支持多种类型
  vim ./src/encode.py
  ```
 
 - 脚本测试（推荐）

    ```bash
    python ./src/test_audio_search.py
    ```
    注：内部将依次下载数据，加载 paddlespeech 模型，提取 embedding，存储建库，检索，删库

    输出：
    ```bash
    Downloading https://paddlespeech.cdn.bcebos.com/vector/audio/example_audio.tar.gz ...
    ...
    Unpacking ./example_audio.tar.gz ...
    [2022-03-26 22:50:54,987] [    INFO] - checking the aduio file format......
    [2022-03-26 22:50:54,987] [    INFO] - The sample rate is 16000
    [2022-03-26 22:50:54,987] [    INFO] - The audio file format is right
    [2022-03-26 22:50:54,988] [    INFO] - device type: cpu
    [2022-03-26 22:50:54,988] [    INFO] - load the pretrained model: ecapatdnn_voxceleb12-16k
    [2022-03-26 22:50:54,990] [    INFO] - Downloading sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz from https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_0.tar.gz
    ...
    [2022-03-26 22:51:17,285] [    INFO] - start to dynamic import the model class
    [2022-03-26 22:51:17,285] [    INFO] - model name ecapatdnn
    [2022-03-26 22:51:23,864] [    INFO] - start to set the model parameters to model
    [2022-03-26 22:54:08,115] [    INFO] - create the model instance success
    [2022-03-26 22:54:08,116] [    INFO] - Preprocess audio file: /home/zhaoqingen/PaddleSpeech/demos/audio_
    searching/example_audio/knife_hit_iron3.wav
    [2022-03-26 22:54:08,116] [    INFO] - load the audio sample points, shape is: (11012,)
    [2022-03-26 22:54:08,150] [    INFO] - extract the audio feat, shape is: (80, 69)
    [2022-03-26 22:54:08,152] [    INFO] - feats shape: [1, 80, 69]
    [2022-03-26 22:54:08,154] [    INFO] - audio extract the feat success
    [2022-03-26 22:54:08,155] [    INFO] - start to do backbone network model forward
    [2022-03-26 22:54:08,155] [    INFO] - feats shape:[1, 80, 69], lengths shape: [1]
    [2022-03-26 22:54:08,433] [    INFO] - embedding size: (192,)
    Extracting feature from audio No. 1 , 20 audios in total
    [2022-03-26 22:54:08,435] [    INFO] - checking the aduio file format......
    [2022-03-26 22:54:08,435] [    INFO] - The sample rate is 16000
    [2022-03-26 22:54:08,436] [    INFO] - The audio file format is right
    [2022-03-26 22:54:08,436] [    INFO] - device type: cpu
    [2022-03-26 22:54:08,436] [    INFO] - Model has been initialized
    [2022-03-26 22:54:08,436] [    INFO] - Preprocess audio file: /home/zhaoqingen/PaddleSpeech/demos/audio_searching/example_audio/sword_wielding.wav
    [2022-03-26 22:54:08,436] [    INFO] - load the audio sample points, shape is: (6391,)
    [2022-03-26 22:54:08,452] [    INFO] - extract the audio feat, shape is: (80, 40)
    [2022-03-26 22:54:08,454] [    INFO] - feats shape: [1, 80, 40]
    [2022-03-26 22:54:08,454] [    INFO] - audio extract the feat success
    [2022-03-26 22:54:08,454] [    INFO] - start to do backbone network model forward
    [2022-03-26 22:54:08,455] [    INFO] - feats shape:[1, 80, 40], lengths shape: [1]
    [2022-03-26 22:54:08,633] [    INFO] - embedding size: (192,)
    Extracting feature from audio No. 2 , 20 audios in total
    ...
    2022-03-26 22:54:15,892 ｜ INFO ｜ audio_search.py ｜ load_audios ｜ 85 ｜ Successfully loaded data, total count: 20
    2022-03-26 22:54:15,908 ｜ INFO ｜ audio_search.py ｜ count_audio ｜ 148 ｜ Successfully count the number of data!
    [2022-03-26 22:54:15,916] [    INFO] - checking the aduio file format......
    [2022-03-26 22:54:15,916] [    INFO] - The sample rate is 16000
    [2022-03-26 22:54:15,916] [    INFO] - The audio file format is right
    [2022-03-26 22:54:15,916] [    INFO] - device type: cpu
    [2022-03-26 22:54:15,916] [    INFO] - Model has been initialized
    [2022-03-26 22:54:15,916] [    INFO] - Preprocess audio file: /home/zhaoqingen/PaddleSpeech/demos/audio_searching/example_audio/test.wav
    [2022-03-26 22:54:15,917] [    INFO] - load the audio sample points, shape is: (8456,)
    [2022-03-26 22:54:15,923] [    INFO] - extract the audio feat, shape is: (80, 53)
    [2022-03-26 22:54:15,924] [    INFO] - feats shape: [1, 80, 53]
    [2022-03-26 22:54:15,924] [    INFO] - audio extract the feat success
    [2022-03-26 22:54:15,924] [    INFO] - start to do backbone network model forward
    [2022-03-26 22:54:15,924] [    INFO] - feats shape:[1, 80, 53], lengths shape: [1]
    [2022-03-26 22:54:16,051] [    INFO] - embedding size: (192,)
    ...
    2022-03-26 22:54:16,086 ｜ INFO ｜ audio_search.py ｜ search_local_audio ｜ 132 ｜ search result http://testserver/data?audio_path=./example_audio/test.wav, score 100.0
    2022-03-26 22:54:16,087 ｜ INFO ｜ audio_search.py ｜ search_local_audio ｜ 132 ｜ search result http://testserver/data?audio_path=./example_audio/knife_chopping.wav, score 29.182177782058716
    2022-03-26 22:54:16,087 ｜ INFO ｜ audio_search.py ｜ search_local_audio ｜ 132 ｜ search result http://testserver/data?audio_path=./example_audio/knife_cut_into_body.wav, score 22.73637056350708
    ...
    2022-03-26 22:54:16,088 ｜ INFO ｜ audio_search.py ｜ search_local_audio ｜ 136 ｜ Successfully searched similar audio!
    2022-03-26 22:54:17,164 ｜ INFO ｜ audio_search.py ｜ drop_tables ｜ 160 ｜ Successfully drop tables in Milvus and MySQL!
    ```

  - 前端测试（可选）
  
    在浏览器中输入 127.0.0.1:8068 访问前端页面
    
    **注**：如果浏览器和服务不在同一台机器上，那么 IP 需要修改成服务所在的机器 IP，并且 docker-compose.yaml 中相应的 API_URL 也要修改，然后重新执行 docker-compose.yaml 文件，使修改生效。

    - 上传音频
    
      在服务端下载数据并解压到一文件夹，假设为 /home/speech/data/，那么在上传页面地址栏输入 /home/speech/data/ 进行数据上传
    
      ![](./img/insert.png)

    - 检索相似音频

      选择左上角放大镜，点击 “Default Target Audio File” 按钮，从客户端上传测试音频，接着你将看到检索结果

      ![](./img/search.png)

### 5. 结果

机器配置：
- 操作系统: CentOS release 7.6 
- 内核：4.17.11-1.el7.elrepo.x86_64
- 处理器：Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz 
- 内存：132G

数据集：
- CN-Celeb, 训练集 65万, 测试集 1万，向量维度 192，距离计算方式 L2

召回和耗时统计如下图：

  ![](./img/result.png)

基于 Milvus 的检索框架在召回率 90% 的前提下，检索耗时约 2.9 毫秒，加上特征提取(Embedding)耗时约 500 毫秒(测试音频时长约 5 秒)，即单条音频测试总共耗时约 503 毫秒，可以满足大多数应用场景。

### 6. 预训练模型

以下是 PaddleSpeech 提供的预训练模型列表：

| 模型 | 采样率
| :--- | :---: 
| ecapa_tdnn| 16000


================================================
FILE: demos/audio_searching/docker-compose.yaml
================================================
version: '3.5'

services:
  etcd:
    container_name: milvus-etcd
    image: quay.io/coreos/etcd:v3.5.0
    networks:
      app_net:
    environment:
      - ETCD_AUTO_COMPACTION_MODE=revision
      - ETCD_AUTO_COMPACTION_RETENTION=1000
      - ETCD_QUOTA_BACKEND_BYTES=4294967296
    volumes:
      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
    command: etcd -advertise-client-urls=http://127.0.0.1:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd

  minio:
    container_name: milvus-minio
    image: minio/minio:RELEASE.2020-12-03T00-03-10Z
    networks:
      app_net:
    environment:
      MINIO_ACCESS_KEY: minioadmin
      MINIO_SECRET_KEY: minioadmin
    volumes:
      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
    command: minio server /minio_data
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
      interval: 30s
      timeout: 20s
      retries: 3

  standalone:
    container_name: milvus-standalone
    image: milvusdb/milvus:v2.0.1
    networks:
      app_net:
        ipv4_address: 172.16.23.10
    command: ["milvus", "run", "standalone"]
    environment:
      ETCD_ENDPOINTS: etcd:2379
      MINIO_ADDRESS: minio:9000
    volumes:
      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
    ports:
      - "19530:19530"
    depends_on:
      - "etcd"
      - "minio"
  
  mysql:
    container_name: audio-mysql
    image: mysql:5.7
    networks:
      app_net:
        ipv4_address: 172.16.23.11
    environment:
      - MYSQL_ROOT_PASSWORD=123456
    volumes:
      - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/mysql:/var/lib/mysql
    ports:
      - "3306:3306"

  webclient:
    container_name: audio-webclient
    image: paddlepaddle/paddlespeech-audio-search-client:2.3
    networks:
      app_net:
        ipv4_address: 172.16.23.13
    environment:
      API_URL: 'http://127.0.0.1:8002'
    ports:
      - "8068:80"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost/"]
      interval: 30s
      timeout: 20s
      retries: 3

networks:
  app_net:
    driver: bridge
    ipam:
      driver: default
      config:
        - subnet: 172.16.23.0/24
          gateway: 172.16.23.1


================================================
FILE: demos/audio_searching/requirements.txt
================================================
diskcache
fastapi
librosa==0.8.0
numpy==1.22.0
pydantic
pymilvus==2.0.1
pymysql
python-multipart
soundfile==0.10.3.post1
starlette
typing
uvicorn


================================================
FILE: demos/audio_searching/src/audio_search.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Optional

import uvicorn
from config import UPLOAD_PATH
from diskcache import Cache
from fastapi import FastAPI
from fastapi import File
from fastapi import UploadFile
from logs import LOGGER
from milvus_helpers import MilvusHelper
from mysql_helpers import MySQLHelper
from operations.count import do_count
from operations.drop import do_drop
from operations.load import do_load
from operations.search import do_search
from pydantic import BaseModel
from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request
from starlette.responses import FileResponse

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"])

MILVUS_CLI = MilvusHelper()
MYSQL_CLI = MySQLHelper()

# Mkdir 'tmp/audio-data'
if not os.path.exists(UPLOAD_PATH):
    os.makedirs(UPLOAD_PATH)
    LOGGER.info(f"Mkdir the path: {UPLOAD_PATH}")


@app.get('/data')
def audio_path(audio_path):
    # Get the audio file
    try:
        LOGGER.info(f"Successfully load audio: {audio_path}")
        return FileResponse(audio_path)
    except Exception as e:
        LOGGER.error(f"upload audio error: {e}")
        return {'status': False, 'msg': e}, 400


@app.get('/progress')
def get_progress():
    # Get the progress of dealing with data
    try:
        cache = Cache('./tmp')
        return f"current: {cache['current']}, total: {cache['total']}"
    except Exception as e:
        LOGGER.error(f"Upload data error: {e}")
        return {'status': False, 'msg': e}, 400


class Item(BaseModel):
    Table: Optional[str] = None
    File: str


@app.post('/audio/load')
async def load_audios(item: Item):
    # Insert all the audio files under the file path to Milvus/MySQL
    try:
        total_num = do_load(item.Table, item.File, MILVUS_CLI, MYSQL_CLI)
        LOGGER.info(f"Successfully loaded data, total count: {total_num}")
        return {'status': True, 'msg': "Successfully loaded data!"}
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.post('/audio/search')
async def search_audio(request: Request,
                       table_name: str=None,
                       audio: UploadFile=File(...)):
    # Search the uploaded audio in Milvus/MySQL
    try:
        # Save the upload data to server.
        content = await audio.read()
        query_audio_path = os.path.join(UPLOAD_PATH, audio.filename)
        with open(query_audio_path, "wb+") as f:
            f.write(content)
        host = request.headers['host']
        _, paths, distances = do_search(host, table_name, query_audio_path,
                                        MILVUS_CLI, MYSQL_CLI)
        names = []
        for path, score in zip(paths, distances):
            names.append(os.path.basename(path))
            LOGGER.info(f"search result {path}, score {score}")
        res = dict(zip(paths, zip(names, distances)))
        # Sort results by distance metric, closest distances first
        res = sorted(res.items(), key=lambda item: item[1][1], reverse=True)
        LOGGER.info("Successfully searched similar audio!")
        return res
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.post('/audio/search/local')
async def search_local_audio(request: Request,
                             query_audio_path: str,
                             table_name: str=None):
    # Search the uploaded audio in Milvus/MySQL
    try:
        host = request.headers['host']
        _, paths, distances = do_search(host, table_name, query_audio_path,
                                        MILVUS_CLI, MYSQL_CLI)
        names = []
        for path, score in zip(paths, distances):
            names.append(os.path.basename(path))
            LOGGER.info(f"search result {path}, score {score}")
        res = dict(zip(paths, zip(names, distances)))
        # Sort results by distance metric, closest distances first
        res = sorted(res.items(), key=lambda item: item[1][1], reverse=True)
        LOGGER.info("Successfully searched similar audio!")
        return res
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.get('/audio/count')
async def count_audio(table_name: str=None):
    # Returns the total number of vectors in the system
    try:
        num = do_count(table_name, MILVUS_CLI)
        LOGGER.info("Successfully count the number of data!")
        return num
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.post('/audio/drop')
async def drop_tables(table_name: str=None):
    # Delete the collection of Milvus and MySQL
    try:
        status = do_drop(table_name, MILVUS_CLI, MYSQL_CLI)
        LOGGER.info("Successfully drop tables in Milvus and MySQL!")
        return status
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


if __name__ == '__main__':
    uvicorn.run(app=app, host='0.0.0.0', port=8002)


================================================
FILE: demos/audio_searching/src/config.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

############### Milvus Configuration ###############
MILVUS_HOST = os.getenv("MILVUS_HOST", "127.0.0.1")
MILVUS_PORT = int(os.getenv("MILVUS_PORT", "19530"))
VECTOR_DIMENSION = int(os.getenv("VECTOR_DIMENSION", "192"))
INDEX_FILE_SIZE = int(os.getenv("INDEX_FILE_SIZE", "1024"))
METRIC_TYPE = os.getenv("METRIC_TYPE", "L2")
DEFAULT_TABLE = os.getenv("DEFAULT_TABLE", "audio_table")
TOP_K = int(os.getenv("TOP_K", "10"))

############### MySQL Configuration ###############
MYSQL_HOST = os.getenv("MYSQL_HOST", "127.0.0.1")
MYSQL_PORT = int(os.getenv("MYSQL_PORT", "3306"))
MYSQL_USER = os.getenv("MYSQL_USER", "root")
MYSQL_PWD = os.getenv("MYSQL_PWD", "123456")
MYSQL_DB = os.getenv("MYSQL_DB", "mysql")

############### Data Path ###############
UPLOAD_PATH = os.getenv("UPLOAD_PATH", "tmp/audio-data")

############### Number of Log Files ###############
LOGS_NUM = int(os.getenv("logs_num", "0"))


================================================
FILE: demos/audio_searching/src/encode.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from logs import LOGGER

from paddlespeech.cli.vector import VectorExecutor

vector_executor = VectorExecutor()


def get_audio_embedding(path):
    """
    Use vpr_inference to generate embedding of audio
    """
    try:
        embedding = vector_executor(
            audio_file=path, model='ecapatdnn_voxceleb12')
        embedding = embedding / np.linalg.norm(embedding)
        embedding = embedding.tolist()
        return embedding
    except Exception as e:
        LOGGER.error(f"Error with embedding:{e}")
        return None


================================================
FILE: demos/audio_searching/src/logs.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import logging
import os
import re
import sys

from config import LOGS_NUM


class MultiprocessHandler(logging.FileHandler):
    """
    A handler class which writes formatted logging records to disk files
    """

    def __init__(self,
                 filename,
                 when='D',
                 backupCount=0,
                 encoding=None,
                 delay=False):
        """
        Open the specified file and use it as the stream for logging
        """
        self.prefix = filename
        self.backupCount = backupCount
        self.when = when.upper()
        self.extMath = r"^\d{4}-\d{2}-\d{2}"

        self.when_dict = {
            'S': "%Y-%m-%d-%H-%M-%S",
            'M': "%Y-%m-%d-%H-%M",
            'H': "%Y-%m-%d-%H",
            'D': "%Y-%m-%d"
        }

        self.suffix = self.when_dict.get(when)
        if not self.suffix:
            print('The specified date interval unit is invalid: ', self.when)
            sys.exit(1)

        self.filefmt = os.path.join('.', "logs",
                                    f"{self.prefix}-{self.suffix}.log")

        self.filePath = datetime.datetime.now().strftime(self.filefmt)

        _dir = os.path.dirname(self.filefmt)
        try:
            if not os.path.exists(_dir):
                os.makedirs(_dir)
        except Exception as e:
            print('Failed to create log file: ', e)
            print("log_path：" + self.filePath)
            sys.exit(1)

        logging.FileHandler.__init__(self, self.filePath, 'a+', encoding, delay)

    def should_change_file_to_write(self):
        """
        To write the file
        """
        _filePath = datetime.datetime.now().strftime(self.filefmt)
        if _filePath != self.filePath:
            self.filePath = _filePath
            return True
        return False

    def do_change_file(self):
        """
        To change file states
        """
        self.baseFilename = os.path.abspath(self.filePath)
        if self.stream:
            self.stream.close()
            self.stream = None

        if not self.delay:
            self.stream = self._open()
        if self.backupCount > 0:
            for s in self.get_files_to_delete():
                os.remove(s)

    def get_files_to_delete(self):
        """
        To delete backup files
        """
        dir_name, _ = os.path.split(self.baseFilename)
        file_names = os.listdir(dir_name)
        result = []
        prefix = self.prefix + '-'
        for file_name in file_names:
            if file_name[:len(prefix)] == prefix:
                suffix = file_name[len(prefix):-4]
                if re.compile(self.extMath).match(suffix):
                    result.append(os.path.join(dir_name, file_name))
        result.sort()

        if len(result) < self.backupCount:
            result = []
        else:
            result = result[:len(result) - self.backupCount]
        return result

    def emit(self, record):
        """
        Emit a record
        """
        try:
            if self.should_change_file_to_write():
                self.do_change_file()
            logging.FileHandler.emit(self, record)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            self.handleError(record)


def write_log():
    """
    Init a logger
    """
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    # formatter = '%(asctime)s ｜ %(levelname)s ｜ %(filename)s ｜ %(funcName)s ｜ %(module)s ｜ %(lineno)s ｜ %(message)s'
    fmt = logging.Formatter(
        '%(asctime)s ｜ %(levelname)s ｜ %(filename)s ｜ %(funcName)s ｜ %(lineno)s ｜ %(message)s'
    )

    stream_handler = logging.StreamHandler(sys.stdout)
    stream_handler.setLevel(logging.INFO)
    stream_handler.setFormatter(fmt)

    log_name = "audio-searching"
    file_handler = MultiprocessHandler(log_name, when='D', backupCount=LOGS_NUM)
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(fmt)
    file_handler.do_change_file()

    logger.addHandler(stream_handler)
    logger.addHandler(file_handler)

    return logger


LOGGER = write_log()

if __name__ == "__main__":
    message = 'test writing logs'
    LOGGER.info(message)
    LOGGER.debug(message)
    LOGGER.error(message)


================================================
FILE: demos/audio_searching/src/milvus_helpers.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

from config import METRIC_TYPE
from config import MILVUS_HOST
from config import MILVUS_PORT
from config import VECTOR_DIMENSION
from logs import LOGGER
from pymilvus import Collection
from pymilvus import CollectionSchema
from pymilvus import connections
from pymilvus import DataType
from pymilvus import FieldSchema
from pymilvus import utility


class MilvusHelper:
    """
    the basic operations of PyMilvus

    # This example shows how to:
    #   1. connect to Milvus server
    #   2. create a collection
    #   3. insert entities
    #   4. create index
    #   5. search
    #   6. delete a collection

    """

    def __init__(self):
        try:
            self.collection = None
            connections.connect(host=MILVUS_HOST, port=MILVUS_PORT)
            LOGGER.debug(
                f"Successfully connect to Milvus with IP:{MILVUS_HOST} and PORT:{MILVUS_PORT}"
            )
        except Exception as e:
            LOGGER.error(f"Failed to connect Milvus: {e}")
            sys.exit(1)

    def set_collection(self, collection_name):
        try:
            if self.has_collection(collection_name):
                self.collection = Collection(name=collection_name)
            else:
                raise Exception(
                    f"There is no collection named:{collection_name}")
        except Exception as e:
            LOGGER.error(f"Failed to set collection in Milvus: {e}")
            sys.exit(1)

    def has_collection(self, collection_name):
        # Return if Milvus has the collection
        try:
            return utility.has_collection(collection_name)
        except Exception as e:
            LOGGER.error(f"Failed to check state of collection in Milvus: {e}")
            sys.exit(1)

    def create_collection(self, collection_name):
        # Create milvus collection if not exists
        try:
            if not self.has_collection(collection_name):
                field1 = FieldSchema(
                    name="id",
                    dtype=DataType.INT64,
                    description="int64",
                    is_primary=True,
                    auto_id=True)
                field2 = FieldSchema(
                    name="embedding",
                    dtype=DataType.FLOAT_VECTOR,
                    description="speaker embeddings",
                    dim=VECTOR_DIMENSION,
                    is_primary=False)
                schema = CollectionSchema(
                    fields=[field1, field2], description="embeddings info")
                self.collection = Collection(
                    name=collection_name, schema=schema)
                LOGGER.debug(f"Create Milvus collection: {collection_name}")
            else:
                self.set_collection(collection_name)
            return "OK"
        except Exception as e:
            LOGGER.error(f"Failed to create collection in Milvus: {e}")
            sys.exit(1)

    def insert(self, collection_name, vectors):
        # Batch insert vectors to milvus collection
        try:
            self.create_collection(collection_name)
            data = [vectors]
            self.set_collection(collection_name)
            mr = self.collection.insert(data)
            ids = mr.primary_keys
            self.collection.load()
            LOGGER.debug(
                f"Insert vectors to Milvus in collection: {collection_name} with {len(vectors)} rows"
            )
            return ids
        except Exception as e:
            LOGGER.error(f"Failed to insert data to Milvus: {e}")
            sys.exit(1)

    def create_index(self, collection_name):
        # Create IVF_FLAT index on milvus collection
        try:
            self.set_collection(collection_name)
            default_index = {
                "index_type": "IVF_SQ8",
                "metric_type": METRIC_TYPE,
                "params": {
                    "nlist": 16384
                }
            }
            status = self.collection.create_index(
                field_name="embedding", index_params=default_index)
            if not status.code:
                LOGGER.debug(
                    f"Successfully create index in collection:{collection_name} with param:{default_index}"
                )
                return status
            else:
                raise Exception(status.message)
        except Exception as e:
            LOGGER.error(f"Failed to create index: {e}")
            sys.exit(1)

    def delete_collection(self, collection_name):
        # Delete Milvus collection
        try:
            self.set_collection(collection_name)
            self.collection.drop()
            LOGGER.debug("Successfully drop collection!")
            return "ok"
        except Exception as e:
            LOGGER.error(f"Failed to drop collection: {e}")
            sys.exit(1)

    def search_vectors(self, collection_name, vectors, top_k):
        # Search vector in milvus collection
        try:
            self.set_collection(collection_name)
            search_params = {
                "metric_type": METRIC_TYPE,
                "params": {
                    "nprobe": 16
                }
            }
            res = self.collection.search(
                vectors,
                anns_field="embedding",
                param=search_params,
                limit=top_k)
            LOGGER.debug(f"Successfully search in collection: {res}")
            return res
        except Exception as e:
            LOGGER.error(f"Failed to search vectors in Milvus: {e}")
            sys.exit(1)

    def count(self, collection_name):
        # Get the number of milvus collection
        try:
            self.set_collection(collection_name)
            num = self.collection.num_entities
            LOGGER.debug(
                f"Successfully get the num:{num} of the collection:{collection_name}"
            )
            return num
        except Exception as e:
            LOGGER.error(f"Failed to count vectors in Milvus: {e}")
            sys.exit(1)


================================================
FILE: demos/audio_searching/src/mysql_helpers.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

import numpy
import pymysql
from config import MYSQL_DB
from config import MYSQL_HOST
from config import MYSQL_PORT
from config import MYSQL_PWD
from config import MYSQL_USER
from logs import LOGGER


class MySQLHelper():
    """
    the basic operations of PyMySQL

    # This example shows how to:
    #   1. connect to MySQL server
    #   2. create a table
    #   3. insert data to table
    #   4. search by milvus ids
    #   5. delete table
    """

    def __init__(self):
        self.conn = pymysql.connect(
            host=MYSQL_HOST,
            user=MYSQL_USER,
            port=MYSQL_PORT,
            password=MYSQL_PWD,
            database=MYSQL_DB,
            local_infile=True)
        self.cursor = self.conn.cursor()

    def test_connection(self):
        try:
            self.conn.ping()
        except Exception:
            self.conn = pymysql.connect(
                host=MYSQL_HOST,
                user=MYSQL_USER,
                port=MYSQL_PORT,
                password=MYSQL_PWD,
                database=MYSQL_DB,
                local_infile=True)
            self.cursor = self.conn.cursor()

    def create_mysql_table(self, table_name):
        # Create mysql table if not exists
        self.test_connection()
        sql = "create table if not exists " + table_name + "(milvus_id TEXT, audio_path TEXT);"
        try:
            self.cursor.execute(sql)
            LOGGER.debug(f"MYSQL create table: {table_name} with sql: {sql}")
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)

    def load_data_to_mysql(self, table_name, data):
        # Batch insert (Milvus_ids, audio_path) to mysql
        self.test_connection()
        sql = "insert into " + table_name + " (milvus_id,audio_path) values (%s,%s);"
        try:
            self.cursor.executemany(sql, data)
            self.conn.commit()
            LOGGER.debug(
                f"MYSQL loads data to table: {table_name} successfully")
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)

    def search_by_milvus_ids(self, ids, table_name):
        # Get the audio_path according to the milvus ids
        self.test_connection()
        str_ids = str(ids).replace('[', '').replace(']', '')
        sql = "select audio_path from " + table_name + " where milvus_id in (" + str_ids + ") order by field (milvus_id," + str_ids + ");"
        try:
            self.cursor.execute(sql)
            results = self.cursor.fetchall()
            results = [res[0] for res in results]
            LOGGER.debug("MYSQL search by milvus id.")
            return results
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)

    def delete_table(self, table_name):
        # Delete mysql table if exists
        self.test_connection()
        sql = "drop table if exists " + table_name + ";"
        try:
            self.cursor.execute(sql)
            LOGGER.debug(f"MYSQL delete table:{table_name}")
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)

    def delete_all_data(self, table_name):
        # Delete all the data in mysql table
        self.test_connection()
        sql = 'delete from ' + table_name + ';'
        try:
            self.cursor.execute(sql)
            self.conn.commit()
            LOGGER.debug(f"MYSQL delete all data in table:{table_name}")
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)

    def count_table(self, table_name):
        # Get the number of spk in mysql table
        self.test_connection()
        sql = "select count(spk_id) from " + table_name + ";"
        try:
            self.cursor.execute(sql)
            results = self.cursor.fetchall()
            LOGGER.debug(f"MYSQL count table:{results[0][0]}")
            return results[0][0]
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)

    def create_mysql_table_vpr(self, table_name):
        # Create mysql table if not exists
        self.test_connection()
        sql = "create table if not exists " + table_name + "(spk_id TEXT, audio_path TEXT, embedding TEXT);"
        try:
            self.cursor.execute(sql)
            LOGGER.debug(f"MYSQL create table: {table_name} with sql: {sql}")
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)

    def load_data_to_mysql_vpr(self, table_name, data):
        # Insert (spk, audio, embedding) to mysql
        self.test_connection()
        sql = "insert into " + table_name + " (spk_id,audio_path,embedding) values (%s,%s,%s);"
        try:
            self.cursor.execute(sql, data)
            LOGGER.debug(
                f"MYSQL loads data to table: {table_name} successfully")
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)

    def list_vpr(self, table_name):
        # Get all records in mysql
        self.test_connection()
        sql = "select * from " + table_name + " ;"
        try:
            self.cursor.execute(sql)
            results = self.cursor.fetchall()
            self.conn.commit()
            spk_ids = [res[0] for res in results]
            audio_paths = [res[1] for res in results]
            embeddings = [
                numpy.array(
                    str(res[2]).replace('[', '').replace(']', '').split(","))
                for res in results
            ]
            return spk_ids, audio_paths, embeddings
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)

    def search_audio_vpr(self, table_name, spk_id):
        # Get the audio_path according to the spk_id
        self.test_connection()
        sql = "select audio_path from " + table_name + " where spk_id='" + spk_id + "' ;"
        try:
            self.cursor.execute(sql)
            results = self.cursor.fetchall()
            LOGGER.debug(
                f"MYSQL search by spk id {spk_id} to get audio {results[0][0]}.")
            return results[0][0]
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)

    def delete_data_vpr(self, table_name, spk_id):
        # Delete a record by spk_id in mysql table
        self.test_connection()
        sql = "delete from " + table_name + " where spk_id='" + spk_id + "';"
        try:
            self.cursor.execute(sql)
            LOGGER.debug(
                f"MYSQL delete a record {spk_id} in table {table_name}")
        except Exception as e:
            LOGGER.error(f"MYSQL ERROR: {e} with sql: {sql}")
            sys.exit(1)


================================================
FILE: demos/audio_searching/src/operations/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: demos/audio_searching/src/operations/count.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

from config import DEFAULT_TABLE
from logs import LOGGER


def do_count(table_name, milvus_cli):
    """
    Returns the total number of vectors in the system
    """
    if not table_name:
        table_name = DEFAULT_TABLE
    try:
        if not milvus_cli.has_collection(table_name):
            return None
        num = milvus_cli.count(table_name)
        return num
    except Exception as e:
        LOGGER.error(f"Error attempting to count table {e}")
        sys.exit(1)


def do_count_vpr(table_name, mysql_cli):
    """
    Returns the total number of spk in the system
    """
    if not table_name:
        table_name = DEFAULT_TABLE
    try:
        num = mysql_cli.count_table(table_name)
        return num
    except Exception as e:
        LOGGER.error(f"Error attempting to count table {e}")
        sys.exit(1)


def do_list(table_name, mysql_cli):
    """
    Returns the total records of vpr in the system
    """
    if not table_name:
        table_name = DEFAULT_TABLE
    try:
        spk_ids, audio_paths, _ = mysql_cli.list_vpr(table_name)
        return spk_ids, audio_paths
    except Exception as e:
        LOGGER.error(f"Error attempting to count table {e}")
        sys.exit(1)


def do_get(table_name, spk_id, mysql_cli):
    """
    Returns the audio path by spk_id in the system
    """
    if not table_name:
        table_name = DEFAULT_TABLE
    try:
        audio_apth = mysql_cli.search_audio_vpr(table_name, spk_id)
        return audio_apth
    except Exception as e:
        LOGGER.error(f"Error attempting to count table {e}")
        sys.exit(1)


================================================
FILE: demos/audio_searching/src/operations/drop.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

from config import DEFAULT_TABLE
from logs import LOGGER


def do_drop(table_name, milvus_cli, mysql_cli):
    """
    Delete the collection of Milvus and MySQL
    """
    if not table_name:
        table_name = DEFAULT_TABLE
    try:
        if not milvus_cli.has_collection(table_name):
            return "Collection is not exist"
        status = milvus_cli.delete_collection(table_name)
        mysql_cli.delete_table(table_name)
        return status
    except Exception as e:
        LOGGER.error(f"Error attempting to drop table: {e}")
        sys.exit(1)


def do_drop_vpr(table_name, mysql_cli):
    """
    Delete the table of MySQL
    """
    if not table_name:
        table_name = DEFAULT_TABLE
    try:
        mysql_cli.delete_table(table_name)
        return "OK"
    except Exception as e:
        LOGGER.error(f"Error attempting to drop table: {e}")
        sys.exit(1)


def do_delete(table_name, spk_id, mysql_cli):
    """
    Delete a record by spk_id in MySQL
    """
    if not table_name:
        table_name = DEFAULT_TABLE
    try:
        mysql_cli.delete_data_vpr(table_name, spk_id)
        return "OK"
    except Exception as e:
        LOGGER.error(f"Error attempting to drop table: {e}")
        sys.exit(1)


================================================
FILE: demos/audio_searching/src/operations/load.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys

from config import DEFAULT_TABLE
from diskcache import Cache
from encode import get_audio_embedding
from logs import LOGGER


def get_audios(path):
    """
    List all wav and aif files recursively under the path folder.
    """
    supported_formats = [".wav", ".mp3", ".ogg", ".flac", ".m4a"]
    return [
        item
        for sublist in [[os.path.join(dir, file) for file in files]
                        for dir, _, files in list(os.walk(path))]
        for item in sublist if os.path.splitext(item)[1] in supported_formats
    ]


def extract_features(audio_dir):
    """
    Get the vector of audio
    """
    try:
        cache = Cache('./tmp')
        feats = []
        names = []
        audio_list = get_audios(audio_dir)
        total = len(audio_list)
        cache['total'] = total
        for i, audio_path in enumerate(audio_list):
            norm_feat = get_audio_embedding(audio_path)
            if norm_feat is None:
                continue
            feats.append(norm_feat)
            names.append(audio_path.encode())
            cache['current'] = i + 1
            print(
                f"Extracting feature from audio No. {i + 1} , {total} audios in total"
            )
        return feats, names
    except Exception as e:
        LOGGER.error(f"Error with extracting feature from audio {e}")
        sys.exit(1)


def format_data(ids, names):
    """
    Combine the id of the vector and the name of the audio into a list
    """
    data = []
    for i in range(len(ids)):
        value = (str(ids[i]), names[i])
        data.append(value)
    return data


def do_load(table_name, audio_dir, milvus_cli, mysql_cli):
    """
    Import vectors to Milvus and data to Mysql respectively
    """
    if not table_name:
        table_name = DEFAULT_TABLE
    vectors, names = extract_features(audio_dir)
    ids = milvus_cli.insert(table_name, vectors)
    milvus_cli.create_index(table_name)
    mysql_cli.create_mysql_table(table_name)
    mysql_cli.load_data_to_mysql(table_name, format_data(ids, names))
    return len(ids)


def do_enroll(table_name, spk_id, audio_path, mysql_cli):
    """
    Import spk_id,audio_path,embedding to Mysql
    """
    if not table_name:
        table_name = DEFAULT_TABLE
    embedding = get_audio_embedding(audio_path)
    mysql_cli.create_mysql_table_vpr(table_name)
    data = (spk_id, audio_path, str(embedding))
    mysql_cli.load_data_to_mysql_vpr(table_name, data)
    return "OK"


================================================
FILE: demos/audio_searching/src/operations/search.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

import numpy
from config import DEFAULT_TABLE
from config import TOP_K
from encode import get_audio_embedding
from logs import LOGGER


def do_search(host, table_name, audio_path, milvus_cli, mysql_cli):
    """
    Search the uploaded audio in Milvus/MySQL
    """
    try:
        if not table_name:
            table_name = DEFAULT_TABLE
        feat = get_audio_embedding(audio_path)
        vectors = milvus_cli.search_vectors(table_name, [feat], TOP_K)
        vids = [str(x.id) for x in vectors[0]]
        paths = mysql_cli.search_by_milvus_ids(vids, table_name)
        distances = [x.distance for x in vectors[0]]
        for i in range(len(paths)):
            tmp = "http://" + str(host) + "/data?audio_path=" + str(paths[i])
            paths[i] = tmp
            distances[i] = (1 - distances[i]) * 100
        return vids, paths, distances
    except Exception as e:
        LOGGER.error(f"Error with search: {e}")
        sys.exit(1)


def do_search_vpr(host, table_name, audio_path, mysql_cli):
    """
    Search the uploaded audio in MySQL
    """
    try:
        if not table_name:
            table_name = DEFAULT_TABLE
        emb = get_audio_embedding(audio_path)
        emb = numpy.array(emb)
        spk_ids, paths, vectors = mysql_cli.list_vpr(table_name)
        scores = [numpy.dot(emb, x.astype(numpy.float64)) for x in vectors]
        spk_ids = [str(x) for x in spk_ids]
        paths = [str(x) for x in paths]
        for i in range(len(paths)):
            tmp = "http://" + str(host) + "/data?audio_path=" + str(paths[i])
            paths[i] = tmp
            scores[i] = scores[i] * 100
        return spk_ids, paths, scores
    except Exception as e:
        LOGGER.error(f"Error with search: {e}")
        sys.exit(1)


================================================
FILE: demos/audio_searching/src/test_audio_search.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from audio_search import app
from fastapi.testclient import TestClient

from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unpack

client = TestClient(app)


def download_audio_data():
    """
    Download audio data
    """
    url = "https://paddlespeech.cdn.bcebos.com/vector/audio/example_audio.tar.gz"
    md5sum = "52ac69316c1aa1fdef84da7dd2c67b39"
    target_dir = "./"
    filepath = download(url, md5sum, target_dir)
    unpack(filepath, target_dir, True)


def test_drop():
    """
    Delete the collection of Milvus and MySQL
    """
    response = client.post("/audio/drop")
    assert response.status_code == 200


def test_load():
    """
    Insert all the audio files under the file path to Milvus/MySQL
    """
    response = client.post("/audio/load", json={"File": "./example_audio"})
    assert response.status_code == 200
    assert response.json() == {
        'status': True,
        'msg': "Successfully loaded data!"
    }


def test_progress():
    """
    Get the progress of dealing with data
    """
    response = client.get("/progress")
    assert response.status_code == 200
    assert response.json() == "current: 20, total: 20"


def test_count():
    """
    Returns the total number of vectors in the system
    """
    response = client.get("/audio/count")
    assert response.status_code == 200
    assert response.json() == 20


def test_search():
    """
    Search the uploaded audio in Milvus/MySQL
    """
    response = client.post(
        "/audio/search/local?query_audio_path=.%2Fexample_audio%2Ftest.wav")
    assert response.status_code == 200
    assert len(response.json()) == 10


def test_data():
    """
    Get the audio file
    """
    response = client.get("/data?audio_path=.%2Fexample_audio%2Ftest.wav")
    assert response.status_code == 200


if __name__ == "__main__":
    download_audio_data()
    test_load()
    test_count()
    test_search()
    test_drop()


================================================
FILE: demos/audio_searching/src/test_vpr_search.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from fastapi.testclient import TestClient
from vpr_search import app

from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unpack

client = TestClient(app)


def download_audio_data():
    """
    Download audio data
    """
    url = "https://paddlespeech.cdn.bcebos.com/vector/audio/example_audio.tar.gz"
    md5sum = "52ac69316c1aa1fdef84da7dd2c67b39"
    target_dir = "./"
    filepath = download(url, md5sum, target_dir)
    unpack(filepath, target_dir, True)


def test_drop():
    """
    Delete the table of MySQL
    """
    response = client.post("/vpr/drop")
    assert response.status_code == 200


def test_enroll_local(spk: str, audio: str):
    """
    Enroll the audio to MySQL
    """
    response = client.post("/vpr/enroll/local?spk_id=" + spk +
                           "&audio_path=.%2Fexample_audio%2F" + audio + ".wav")
    assert response.status_code == 200
    assert response.json() == {
        'status': True,
        'msg': "Successfully enroll data!"
    }


def test_search_local():
    """
    Search the spk in MySQL by audio
    """
    response = client.post(
        "/vpr/recog/local?audio_path=.%2Fexample_audio%2Ftest.wav")
    assert response.status_code == 200


def test_list():
    """
    Get all records in MySQL
    """
    response = client.get("/vpr/list")
    assert response.status_code == 200


def test_data(spk: str):
    """
    Get the audio file by spk_id in MySQL
    """
    response = client.get(
        "/vpr/data",
        json={"spk_id": spk}, )
    assert response.status_code == 200


def test_del(spk: str):
    """
    Delete the record in MySQL by spk_id
    """
    response = client.post(
        "/vpr/del",
        json={"spk_id": spk}, )
    assert response.status_code == 200


def test_count():
    """
    Get the number of spk in MySQL
    """
    response = client.get("/vpr/count")
    assert response.status_code == 200


if __name__ == "__main__":
    download_audio_data()

    test_enroll_local("spk1", "arms_strikes")
    test_enroll_local("spk2", "sword_wielding")
    test_enroll_local("spk3", "test")
    test_list()
    test_data("spk1")
    test_count()
    test_search_local()

    test_del("spk1")
    test_count()
    test_search_local()

    test_enroll_local("spk1", "arms_strikes")
    test_count()
    test_search_local()

    test_drop()


================================================
FILE: demos/audio_searching/src/vpr_search.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

import uvicorn
from config import UPLOAD_PATH
from fastapi import FastAPI
from fastapi import File
from fastapi import Form
from fastapi import UploadFile
from logs import LOGGER
from mysql_helpers import MySQLHelper
from operations.count import do_count_vpr
from operations.count import do_get
from operations.count import do_list
from operations.drop import do_delete
from operations.drop import do_drop_vpr
from operations.load import do_enroll
from operations.search import do_search_vpr
from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request
from starlette.responses import FileResponse

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"])

MYSQL_CLI = MySQLHelper()

# Mkdir 'tmp/audio-data'
if not os.path.exists(UPLOAD_PATH):
    os.makedirs(UPLOAD_PATH)
    LOGGER.info(f"Mkdir the path: {UPLOAD_PATH}")


@app.post('/vpr/enroll')
async def vpr_enroll(table_name: str=None,
                     spk_id: str=Form(...),
                     audio: UploadFile=File(...)):
    # Enroll the uploaded audio with spk-id into MySQL
    try:
        if not spk_id:
            return {'status': False, 'msg': "spk_id can not be None"}
        # Save the upload data to server.
        content = await audio.read()
        audio_path = os.path.join(UPLOAD_PATH, audio.filename)
        with open(audio_path, "wb+") as f:
            f.write(content)
        do_enroll(table_name, spk_id, audio_path, MYSQL_CLI)
        LOGGER.info(f"Successfully enrolled {spk_id} online!")
        return {'status': True, 'msg': "Successfully enroll data!"}
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}


@app.post('/vpr/enroll/local')
async def vpr_enroll_local(table_name: str=None,
                           spk_id: str=None,
                           audio_path: str=None):
    # Enroll the local audio with spk-id into MySQL
    try:
        do_enroll(table_name, spk_id, audio_path, MYSQL_CLI)
        LOGGER.info(f"Successfully enrolled {spk_id} locally!")
        return {'status': True, 'msg': "Successfully enroll data!"}
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.post('/vpr/recog')
async def vpr_recog(request: Request,
                    table_name: str=None,
                    audio: UploadFile=File(...)):
    # Voice print recognition online
    try:
        # Save the upload data to server.
        content = await audio.read()
        query_audio_path = os.path.join(UPLOAD_PATH, audio.filename)
        with open(query_audio_path, "wb+") as f:
            f.write(content)
        host = request.headers['host']
        spk_ids, paths, scores = do_search_vpr(host, table_name,
                                               query_audio_path, MYSQL_CLI)
        for spk_id, path, score in zip(spk_ids, paths, scores):
            LOGGER.info(f"spk {spk_id}, score {score}, audio path {path}, ")
        res = dict(zip(spk_ids, zip(paths, scores)))
        # Sort results by distance metric, closest distances first
        res = sorted(res.items(), key=lambda item: item[1][1], reverse=True)
        LOGGER.info("Successfully speaker recognition online!")
        return res
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.post('/vpr/recog/local')
async def vpr_recog_local(request: Request,
                          table_name: str=None,
                          audio_path: str=None):
    # Voice print recognition locally
    try:
        host = request.headers['host']
        spk_ids, paths, scores = do_search_vpr(host, table_name, audio_path,
                                               MYSQL_CLI)
        for spk_id, path, score in zip(spk_ids, paths, scores):
            LOGGER.info(f"spk {spk_id}, score {score}, audio path {path}, ")
        res = dict(zip(spk_ids, zip(paths, scores)))
        # Sort results by distance metric, closest distances first
        res = sorted(res.items(), key=lambda item: item[1][1], reverse=True)
        LOGGER.info("Successfully speaker recognition locally!")
        return res
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.post('/vpr/del')
async def vpr_del(table_name: str=None, spk_id: dict=None):
    # Delete a record by spk_id in MySQL
    try:
        spk_id = spk_id['spk_id']
        if not spk_id:
            return {'status': False, 'msg': "spk_id can not be None"}
        do_delete(table_name, spk_id, MYSQL_CLI)
        LOGGER.info("Successfully delete a record by spk_id in MySQL")
        return {'status': True, 'msg': "Successfully delete data!"}
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.get('/vpr/list')
async def vpr_list(table_name: str=None):
    # Get all records in MySQL
    try:
        spk_ids, audio_paths = do_list(table_name, MYSQL_CLI)
        for i in range(len(spk_ids)):
            LOGGER.debug(f"spk {spk_ids[i]}, audio path {audio_paths[i]}")
        LOGGER.info("Successfully list all records from mysql!")
        return spk_ids, audio_paths
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.get('/vpr/data')
async def vpr_data(
    table_name: str=None,
    spk_id: dict=None, ):
    # Get the audio file from path by spk_id in MySQL
    try:
        spk_id = spk_id['spk_id']
        if not spk_id:
            return {'status': False, 'msg': "spk_id can not be None"}
        audio_path = do_get(table_name, spk_id, MYSQL_CLI)
        LOGGER.info(f"Successfully get audio path {audio_path}!")
        return FileResponse(audio_path)
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.get('/vpr/count')
async def vpr_count(table_name: str=None):
    # Get the total number of spk in MySQL
    try:
        num = do_count_vpr(table_name, MYSQL_CLI)
        LOGGER.info("Successfully count the number of spk!")
        return num
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.post('/vpr/drop')
async def drop_tables(table_name: str=None):
    # Delete the table of MySQL
    try:
        do_drop_vpr(table_name, MYSQL_CLI)
        LOGGER.info("Successfully drop tables in MySQL!")
        return {'status': True, 'msg': "Successfully drop tables!"}
    except Exception as e:
        LOGGER.error(e)
        return {'status': False, 'msg': e}, 400


@app.get('/data')
def audio_path(audio_path):
    # Get the audio file from path
    try:
        LOGGER.info(f"Successfully get audio: {audio_path}")
        return FileResponse(audio_path)
    except Exception as e:
        LOGGER.error(f"get audio error: {e}")
        return {'status': False, 'msg': e}, 400


if __name__ == '__main__':
    uvicorn.run(app=app, host='0.0.0.0', port=8002)


================================================
FILE: demos/audio_tagging/README.md
================================================
([简体中文](./README_cn.md)|English)

# Audio Tagging

## Introduction
Audio tagging is the task of labeling an audio clip with one or more labels or tags, including music tagging, acoustic scene classification, audio event classification, etc.

This demo is an implementation to tag an audio file with 527 [AudioSet](https://research.google.com/audioset/) labels. It can be done by a single command or a few lines in python using `PaddleSpeech`. 

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input File
The input of this demo should be a WAV file(`.wav`).

Here are sample files for this demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/dog.wav
```

### 3. Usage
- Command Line(Recommended)
  ```bash
  paddlespeech cls --input ./cat.wav --topk 10
  ```
  Usage:
  ```bash
  paddlespeech cls --help
  ```
  Arguments:
  - `input`(required): The audio file to tag.
  - `model`: Model type of tagging task. Default: `panns_cnn14`.
  - `config`: Config of tagging task. Use a pretrained model when it is None. Default: `None`.
  - `ckpt_path`: Model checkpoint. Use a pretrained model when it is None. Default: `None`.
  - `label_file`: Label file of tagging task. Use audio set labels when it is None. Default: `None`.
  - `topk`: Show topk tagging labels of the result. Default: `1`.
  - `device`: Choose the device to execute model inference. Default: default device of paddlepaddle in the current environment.

  Output:
  ```bash
  [2021-12-08 14:49:40,671] [    INFO] [utils.py] [L225] - CLS Result:
  Cat: 0.8991316556930542
  Domestic animals, pets: 0.8806838393211365
  Meow: 0.8784668445587158
  Animal: 0.8776564598083496
  Caterwaul: 0.2232048511505127
  Speech: 0.03101264126598835
  Music: 0.02870696596801281
  Inside, small room: 0.016673989593982697
  Purr: 0.008387474343180656
  Bird: 0.006304860580712557
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.cls import CLSExecutor

  cls_executor = CLSExecutor()
  result = cls_executor(
      model='panns_cnn14',
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      label_file=None,
      ckpt_path=None,
      audio_file='./cat.wav',
      topk=10,
      device=paddle.get_device())
  print('CLS Result: \n{}'.format(result))
  ```
  Output:
  ```bash
  CLS Result:
  Cat: 0.8991316556930542
  Domestic animals, pets: 0.8806838393211365
  Meow: 0.8784668445587158
  Animal: 0.8776564598083496
  Caterwaul: 0.2232048511505127
  Speech: 0.03101264126598835
  Music: 0.02870696596801281
  Inside, small room: 0.016673989593982697
  Purr: 0.008387474343180656
  Bird: 0.006304860580712557
  ```

### 4.Pretrained Models

Here is a list of pretrained models released by PaddleSpeech that can be used by command and python API:

| Model | Sample Rate
| :--- | :---: 
| panns_cnn6| 32000
| panns_cnn10| 32000
| panns_cnn14| 32000


================================================
FILE: demos/audio_tagging/README_cn.md
================================================

(简体中文|[English](./README.md))

# 声音分类
## 介绍
声音分类任务为音频片段添加一个或多个标签的任务，包括音乐分类、声学场景分类、音频事件分类等。

这个 demo 使用 527 个 [AudioSet](https://research.google.com/audioset/) 数据集中的标签为音频进行分类，它可以通过使用 `PaddleSpeech` 的单个命令或 python 中的几行代码来实现。

## 使用方法
### 1. 安装
请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从 easy，medium，hard 三中方式中选择一种方式安装。

### 2. 准备输入
这个 demo 的输入应该是一个 WAV 文件（`.wav`），

可以下载此 demo 的示例音频：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/dog.wav
```

### 3. 使用方法
- 命令行 (推荐使用)
  ```bash
  paddlespeech cls --input ./cat.wav --topk 10
  ```
  使用方法：
  ```bash
  paddlespeech cls --help
  ```
  参数：
  - `input`(必须输入)： 用于分类的音频。
  - `model`： 声音分类任务的模型， 默认值： `panns_cnn14`.
  - `config`： 声音分类任务的配置文件，若不设置则使用预训练模型中的默认配置，  默认值： `None`。
  - `ckpt_path`：模型参数文件， 若不设置则下载预训练模型使用， 默认值： `None`。
  - `label_file`：声音分类任务的标签文件，若不是设置则使用音频数据集标签，默认值： `None`。
  - `topk`：展示分类结果的 topk 个结果，默认值： `1`。
  - `device`：执行预测的设备，默认值：当前系统下 paddlepaddle 的默认 device。

  输出：
  ```bash
  [2021-12-08 14:49:40,671] [    INFO] [utils.py] [L225] - CLS Result:
  Cat: 0.8991316556930542
  Domestic animals, pets: 0.8806838393211365
  Meow: 0.8784668445587158
  Animal: 0.8776564598083496
  Caterwaul: 0.2232048511505127
  Speech: 0.03101264126598835
  Music: 0.02870696596801281
  Inside, small room: 0.016673989593982697
  Purr: 0.008387474343180656
  Bird: 0.006304860580712557
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.cls import CLSExecutor

  cls_executor = CLSExecutor()
  result = cls_executor(
      model='panns_cnn14',
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      label_file=None,
      ckpt_path=None,
      audio_file='./cat.wav',
      topk=10,
      device=paddle.get_device())
  print('CLS Result: \n{}'.format(result))
  ```
  输出：
  ```bash
  CLS Result:
  Cat: 0.8991316556930542
  Domestic animals, pets: 0.8806838393211365
  Meow: 0.8784668445587158
  Animal: 0.8776564598083496
  Caterwaul: 0.2232048511505127
  Speech: 0.03101264126598835
  Music: 0.02870696596801281
  Inside, small room: 0.016673989593982697
  Purr: 0.008387474343180656
  Bird: 0.006304860580712557
  ```

### 4. 预训练模型

以下是 PaddleSpeech 提供的可以被命令行和 python api 使用的预训练模型列表：

| 模型 | 采样率
| :--- | :---: 
| panns_cnn6| 32000
| panns_cnn10| 32000
| panns_cnn14| 32000


================================================
FILE: demos/audio_tagging/run.sh
================================================
#!/bin/bash

wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/cat.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/dog.wav
paddlespeech cls --input ./cat.wav --topk 10


================================================
FILE: demos/automatic_video_subtitiles/README.md
================================================
([简体中文](./README_cn.md)|English)
# Automatic Video Subtitiles

## Introduction
Automatic video subtitles can generate subtitles from a specific video by using the Automatic Speech Recognition (ASR) system. 

This demo is an implementation to automatic video subtitles from a video file. It can be done by a single command or a few lines in python using `PaddleSpeech`. 

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md). 

You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input
Get a video file with the speech of the specific language:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/demos/asr_demos/subtitle_demo1.mp4
```

Extract `.wav` with one channel and 16000 sample rate from the video:
```bash
ffmpeg -i subtitle_demo1.mp4 -ac 1 -ar 16000 -vn input.wav
```

### 3. Usage

- Python API
  ```python
  import paddle
  from paddlespeech.cli.asr import ASRExecutor
  from paddlespeech.cli.text import TextExecutor

  asr_executor = ASRExecutor()
  text_executor = TextExecutor()

  text = asr_executor(
      audio_file='input.wav',
      device=paddle.get_device())
  result = text_executor(
      text=text,
      task='punc',
      model='ernie_linear_p3_wudao',
      device=paddle.get_device())
  print('Text Result: \n{}'.format(result))
  ```
  Output:
  ```bash
  Text Result:
  当我说我可以把三十年的经验变成一个准确的算法，他们说不可能。当我说我们十个人就能实现对十九个城市变电站七乘二十四小时的实时监管，他们说不可能。
  ```


================================================
FILE: demos/automatic_video_subtitiles/README_cn.md
================================================
(简体中文|[English](./README.md))
# 视频字幕生成
## 介绍
视频字幕生成可以使用语音识别系统从特定视频生成字幕。

这个 demo 是一个为视频自动生成字幕的实现，它可以通过使用 `PaddleSpeech` 的单个命令或 python 中的几行代码来实现。
## 使用方法
### 1. 安装
请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从 easy，medium，hard 三中方式中选择一种方式安装。

### 2. 准备输入
获取包含特定语言语音的视频文件：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/demos/asr_demos/subtitle_demo1.mp4
```
从视频文件中提取单通道的 16kHz 采样率的 `.wav` 文件：
```bash
ffmpeg -i subtitle_demo1.mp4 -ac 1 -ar 16000 -vn input.wav
```
### 3. 使用方法
- Python API
  ```python
  import paddle
  from paddlespeech.cli.asr import ASRExecutor
  from paddlespeech.cli.text import TextExecutor

  asr_executor = ASRExecutor()
  text_executor = TextExecutor()

  text = asr_executor(
      audio_file='input.wav',
      device=paddle.get_device())
  result = text_executor(
      text=text,
      task='punc',
      model='ernie_linear_p3_wudao',
      device=paddle.get_device())
  print('Text Result: \n{}'.format(result))
  ```
  输出:
  ```bash
  Text Result:
  当我说我可以把三十年的经验变成一个准确的算法，他们说不可能。当我说我们十个人就能实现对十九个城市变电站七乘二十四小时的实时监管，他们说不可能。
  ```


================================================
FILE: demos/automatic_video_subtitiles/recognize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os

import paddle

from paddlespeech.cli.asr import ASRExecutor
from paddlespeech.cli.text import TextExecutor

# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--input", type=str, required=True)
parser.add_argument("--device", type=str, default=paddle.get_device())
args = parser.parse_args()
# yapf: enable

if __name__ == "__main__":
    asr_executor = ASRExecutor()
    text_executor = TextExecutor()

    text = asr_executor(
        audio_file=os.path.abspath(os.path.expanduser(args.input)),
        device=args.device)
    result = text_executor(
        text=text,
        task='punc',
        model='ernie_linear_p3_wudao',
        device=args.device)

    print('ASR Result: \n{}'.format(text))
    print('Text Result: \n{}'.format(result))


================================================
FILE: demos/automatic_video_subtitiles/run.sh
================================================
#!/bin/bash

video_url=https://paddlespeech.cdn.bcebos.com/demos/asr_demos/subtitle_demo1.mp4
video_file=$(basename ${video_url})
audio_file=$(echo ${video_file} | awk -F'.' '{print $1}').wav
num_channels=1
sr=16000

# Download video
if [ ! -f ${video_file} ]; then
    wget -c ${video_url}
fi

# Extract audio from video
if [ ! -f ${audio_file} ]; then
    ffmpeg -i ${video_file} -ac ${num_channels} -ar ${sr} -vn ${audio_file}
fi

python -u recognize.py --input ${audio_file}
exit 0


================================================
FILE: demos/custom_streaming_asr/README.md
================================================
([简体中文](./README_cn.md)|English)

# Customized Auto Speech Recognition

## introduction

In some cases, we need to recognize the specific rare words with high accuracy. eg: address recognition in navigation apps. customized ASR can slove those issues.

this demo is customized for expense account, which need to recognize rare address.

the scripts are in https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/speechx/examples/custom_asr

* G with slot: 打车到 "address_slot"。  
![](https://ai-studio-static-online.cdn.bcebos.com/28d9ef132a7f47a895a65ae9e5c4f55b8f472c9f3dd24be8a2e66e0b88b173a4)

* this is address slot wfst, you can add the address which want to recognize.  
![](https://ai-studio-static-online.cdn.bcebos.com/47c89100ef8c465bac733605ffc53d76abefba33d62f4d818d351f8cea3c8fe2)

* after replace operation, G = fstreplace(G_with_slot, address_slot), we will get the customized graph.  
![](https://ai-studio-static-online.cdn.bcebos.com/60a3095293044f10b73039ab10c7950d139a6717580a44a3ba878c6e74de402b)  

## Usage
### 1. Installation
install paddle:2.2.2 docker.
```
sudo docker pull registry.baidubce.com/paddlepaddle/paddle:2.2.2

sudo docker run --privileged  --net=host --ipc=host -it --rm -v $PWD:/paddle --name=paddle_demo_docker registry.baidubce.com/paddlepaddle/paddle:2.2.2 /bin/bash 
```

### 2. demo
* run websocket_server.sh.  This script will download resources and libs, and launch the service.
```
cd /paddle
bash websocket_server.sh
```
this script run in two steps:  
1. download the resources.tar.gz, those direcotries will be found in resource directory.  
model: acustic model  
graph: the decoder graph (TLG.fst)  
lib: some libs  
bin: binary  
data: audio and wav.scp  

2. websocket_server_main launch the service.  
some params:  
port: the service port  
graph_path: the decoder graph path  
model_path: acustic model path  
please refer other params in those files:  
PaddleSpeech/speechx/speechx/decoder/param.h  
PaddleSpeech/speechx/examples/ds2_ol/websocket/websocket_server_main.cc  

* In other terminal, run script websocket_client.sh, the client will send data and get the results.
```
bash websocket_client.sh
```
websocket_client_main will launch the client, the wav_scp is the wav set, port is the server service port.

* result:
In the log of client, you will see the message below:
```
0513 10:58:13.827821 41768 recognizer_test_main.cc:56] wav len (sample): 70208
I0513 10:58:13.884493 41768 feature_cache.h:52] set finished
I0513 10:58:24.247171 41768 paddle_nnet.h:76] Tensor neml: 10240
I0513 10:58:24.247249 41768 paddle_nnet.h:76] Tensor neml: 10240
LOG ([5.5.544~2-f21d7]:main():decoder/recognizer_test_main.cc:90)  the result of case_10 is 五月十二日二十二点三十六分加班打车回家四十一元
```


================================================
FILE: demos/custom_streaming_asr/README_cn.md
================================================
(简体中文|[English](./README.md))

# 定制化语音识别演示
## 介绍
在一些场景中，识别系统需要高精度的识别一些稀有词，例如导航软件中地名识别。而通过定制化识别可以满足这一需求。  

这个 demo 是打车报销单的场景识别，需要识别一些稀有的地名，可以通过如下操作实现。

相关脚本:https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/speechx/examples/custom_asr

* G with slot: 打车到 "address_slot"。  
![](https://ai-studio-static-online.cdn.bcebos.com/28d9ef132a7f47a895a65ae9e5c4f55b8f472c9f3dd24be8a2e66e0b88b173a4)

* 这是 address slot wfst, 可以添加一些需要识别的地名.  
![](https://ai-studio-static-online.cdn.bcebos.com/47c89100ef8c465bac733605ffc53d76abefba33d62f4d818d351f8cea3c8fe2)

* 通过 replace 操作, G = fstreplace(G_with_slot, address_slot), 最终可以得到定制化的解码图。  
![](https://ai-studio-static-online.cdn.bcebos.com/60a3095293044f10b73039ab10c7950d139a6717580a44a3ba878c6e74de402b)  

## 使用方法
### 1. 配置环境
安装paddle:2.2.2 docker镜像。
```
sudo docker pull registry.baidubce.com/paddlepaddle/paddle:2.2.2

sudo docker run --privileged  --net=host --ipc=host -it --rm -v $PWD:/paddle --name=paddle_demo_docker registry.baidubce.com/paddlepaddle/paddle:2.2.2 /bin/bash 
```

### 2. 演示
* 运行如下命令，完成相关资源和库的下载和服务启动。
```
cd /paddle
bash websocket_server.sh
```
上面脚本完成了如下两个功能：
1. 完成 resource.tar.gz 下载，解压后,会在 resource 中发现如下目录：  
model: 声学模型  
graph: 解码构图  
lib: 相关库  
bin: 运行程序  
data: 语音数据  

2. 通过 websocket_server_main 来启动服务。
这里简单的介绍几个参数:  
port 是服务端口，  
graph_path 用来指定解码图文件，  
其他参数说明可参见代码：  
PaddleSpeech/speechx/speechx/decoder/param.h  
PaddleSpeech/speechx/examples/ds2_ol/websocket/websocket_server_main.cc  

* 在另一个终端中， 通过 client 发送数据，得到结果。运行如下命令：
```
bash websocket_client.sh
```
通过 websocket_client_main 来启动 client 服务，其中 wav_scp 是发送的语音句子集合，port 为服务端口。

* 结果：
client 的 log 中可以看到如下类似的结果
```
0513 10:58:13.827821 41768 recognizer_test_main.cc:56] wav len (sample): 70208
I0513 10:58:13.884493 41768 feature_cache.h:52] set finished
I0513 10:58:24.247171 41768 paddle_nnet.h:76] Tensor neml: 10240
I0513 10:58:24.247249 41768 paddle_nnet.h:76] Tensor neml: 10240
LOG ([5.5.544~2-f21d7]:main():decoder/recognizer_test_main.cc:90)  the result of case_10 is 五月十二日二十二点三十六分加班打车回家四十一元
```


================================================
FILE: demos/custom_streaming_asr/path.sh
================================================
export LD_LIBRARY_PATH=$PWD/resource/lib
export PATH=$PATH:$PWD/resource/bin


================================================
FILE: demos/custom_streaming_asr/setup_docker.sh
================================================
sudo nvidia-docker run --privileged  --net=host --ipc=host -it --rm -v $PWD:/paddle --name=paddle_demo_docker registry.baidubce.com/paddlepaddle/paddle:2.2.2 /bin/bash


================================================
FILE: demos/custom_streaming_asr/websocket_client.sh
================================================
#!/bin/bash
set +x
set -e

. path.sh
# input
data=$PWD/data

# output
wav_scp=wav.scp

export GLOG_logtostderr=1

# websocket client
websocket_client_main \
    --wav_rspecifier=scp:$data/$wav_scp \
    --streaming_chunk=0.36 \
    --port=8881


================================================
FILE: demos/custom_streaming_asr/websocket_server.sh
================================================
#!/bin/bash
set +x
set -e

export GLOG_logtostderr=1

. path.sh
#test websocket server 

model_dir=./resource/model
graph_dir=./resource/graph
cmvn=./data/cmvn.ark


#paddle_asr_online/resource.tar.gz
if [ ! -f $cmvn ]; then
    wget -c https://paddlespeech.cdn.bcebos.com/s2t/paddle_asr_online/resource.tar.gz
    tar xzfv resource.tar.gz
    ln -s ./resource/data .
fi

websocket_server_main \
    --cmvn_file=$cmvn \
    --streaming_chunk=0.1 \
    --use_fbank=true \
    --model_path=$model_dir/avg_10.jit.pdmodel \
    --param_path=$model_dir/avg_10.jit.pdiparams \
    --model_cache_shapes="5-1-2048,5-1-2048" \
    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
    --word_symbol_table=$graph_dir/words.txt \
    --graph_path=$graph_dir/TLG.fst --max_active=7500 \
    --port=8881 \
    --acoustic_scale=12 


================================================
FILE: demos/keyword_spotting/README.md
================================================
([简体中文](./README_cn.md)|English)
# KWS (Keyword Spotting)

## Introduction
KWS(Keyword Spotting) is a technique to recognize keyword from a giving speech audio.

This demo is an implementation to recognize keyword from a specific audio file. It can be done by a single command or a few lines in python using `PaddleSpeech`. 

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input File
The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

Here are sample files for this demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/kws/hey_snips.wav https://paddlespeech.cdn.bcebos.com/kws/non-keyword.wav
```

### 3. Usage
- Command Line(Recommended)
  ```bash
  paddlespeech kws --input ./hey_snips.wav
  paddlespeech kws --input ./non-keyword.wav
  ```
  
  Usage:
  ```bash
  paddlespeech kws --help
  ```
  Arguments:
  - `input`(required): Audio file to recognize.
  - `threshold`：Score threshold for kws. Default: `0.8`.
  - `model`: Model type of kws task. Default: `mdtc_heysnips`.
  - `config`: Config of kws task. Use pretrained model when it is None. Default: `None`.
  - `ckpt_path`: Model checkpoint. Use pretrained model when it is None. Default: `None`.
  - `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.
  - `verbose`: Show the log information.

  Output:
  ```bash
  # Input file: ./hey_snips.wav
  Score: 1.000, Threshold: 0.8, Is keyword: True
  # Input file: ./non-keyword.wav
  Score: 0.000, Threshold: 0.8, Is keyword: False
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.kws import KWSExecutor

  kws_executor = KWSExecutor()
  result = kws_executor(
      audio_file='./hey_snips.wav',
      threshold=0.8,
      model='mdtc_heysnips',
      config=None,
      ckpt_path=None,
      device=paddle.get_device())
  print('KWS Result: \n{}'.format(result))
  ```

  Output:
  ```bash
  KWS Result:
  Score: 1.000, Threshold: 0.8, Is keyword: True
  ```

### 4.Pretrained Models

Here is a list of pretrained models released by PaddleSpeech that can be used by command and python API:

| Model | Language | Sample Rate
| :--- | :---: | :---: |
| mdtc_heysnips | en | 16k


================================================
FILE: demos/keyword_spotting/README_cn.md
================================================
(简体中文|[English](./README.md))

# 关键词识别
## 介绍
关键词识别是一项用于识别一段语音内是否包含特定的关键词。

这个 demo 是一个从给定音频文件识别特定关键词的实现，它可以通过使用 `PaddleSpeech` 的单个命令或 python 中的几行代码来实现。
## 使用方法
### 1. 安装
请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从 easy，medium，hard 三中方式中选择一种方式安装。

### 2. 准备输入
这个 demo 的输入应该是一个 WAV 文件（`.wav`），并且采样率必须与模型的采样率相同。

可以下载此 demo 的示例音频：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/kws/hey_snips.wav https://paddlespeech.cdn.bcebos.com/kws/non-keyword.wav
```
### 3. 使用方法
- 命令行 (推荐使用)
  ```bash
  paddlespeech kws --input ./hey_snips.wav
  paddlespeech kws --input ./non-keyword.wav
  ```
  
  使用方法：
  ```bash
  paddlespeech kws --help
  ```
  参数：
  - `input`(必须输入)：用于识别关键词的音频文件。
  - `threshold`：用于判别是包含关键词的得分阈值，默认值：`0.8`。
  - `model`：KWS 任务的模型，默认值：`mdtc_heysnips`。
  - `config`：KWS 任务的参数文件，若不设置则使用预训练模型中的默认配置，默认值：`None`。
  - `ckpt_path`：模型参数文件，若不设置则下载预训练模型使用，默认值：`None`。
  - `device`：执行预测的设备，默认值：当前系统下 paddlepaddle 的默认 device。
  - `verbose`: 如果使用，显示 logger 信息。

  输出：
  ```bash
  # 输入为 ./hey_snips.wav
  Score: 1.000, Threshold: 0.8, Is keyword: True
  # 输入为 ./non-keyword.wav
  Score: 0.000, Threshold: 0.8, Is keyword: False
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.kws import KWSExecutor

  kws_executor = KWSExecutor()
  result = kws_executor(
      audio_file='./hey_snips.wav',
      threshold=0.8,
      model='mdtc_heysnips',
      config=None,
      ckpt_path=None,
      device=paddle.get_device())
  print('KWS Result: \n{}'.format(result))
  ```

  输出：
  ```bash
  KWS Result:
  Score: 1.000, Threshold: 0.8, Is keyword: True
  ```

### 4.预训练模型
以下是 PaddleSpeech 提供的可以被命令行和 python API 使用的预训练模型列表：

| 模型 | 语言 | 采样率
| :--- | :---: | :---: |
| mdtc_heysnips | en | 16k


================================================
FILE: demos/keyword_spotting/run.sh
================================================
#!/bin/bash

wget -c https://paddlespeech.cdn.bcebos.com/kws/hey_snips.wav https://paddlespeech.cdn.bcebos.com/kws/non-keyword.wav

# kws
paddlespeech kws --input ./hey_snips.wav
paddlespeech kws --input non-keyword.wav


================================================
FILE: demos/metaverse/README.md
================================================
([简体中文](./README_cn.md)|English)

# Metaverse
## Introduction
Metaverse is a new Internet application and social form integrating virtual reality produced by integrating a variety of new technologies. 

This demo is an implementation to let a celebrity in an image "speak". With the composition of the `TTS` module of `PaddleSpeech` and `PaddleGAN`, we integrate the installation and the specific modules in a single shell script. 
## Usage

You can make your favorite person say the specified content with the `TTS` module of `PaddleSpeech` and `PaddleGAN`, and construct your virtual human.

Run `run.sh` to complete all the essential procedures, including the installation.  

```bash
./run.sh
```
In `run.sh`, it will execute `source path.sh` firstly, which will set the environment variants. 

If you would like to try your sentence, please replace the sentence in `sentences.txt`.

If you would like to try your image, please replace the image `download/Lamarr.png` in the shell script.

The result has shown in our [notebook](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/tutorial/tts/tts_tutorial.ipynb).


================================================
FILE: demos/metaverse/README_cn.md
================================================
(简体中文|[English](./README.md))

# Metaverse

## 简介

Metaverse 是一种新的互联网应用和社交形式，融合了多种新技术，产生了虚拟现实。

这个演示是一个让图片中的名人“说话”的实现。通过 `PaddleSpeech` 的 `TTS` 模块和 `PaddleGAN` 的组合，我们集成了安装和特定模块到一个 shell 脚本中。

## 使用

您可以使用 `PaddleSpeech` 的 `TTS` 模块和 `PaddleGAN` 让您最喜欢的人说出指定的内容，并构建您的虚拟人。

运行 `run.sh` 完成所有基本程序，包括安装。

```bash
./run.sh
```

在 `run.sh`, 先会执行 `source path.sh` 来设置好环境变量。

如果您想尝试您的句子，请替换 `sentences.txt` 中的句子。

如果您想尝试图像，请将图像替换 shell 脚本中的 `download/Lamarr.png` 。

结果已显示在我们的 [notebook](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/tutorial/tts/tts_tutorial.ipynb)。


================================================
FILE: demos/metaverse/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=fastspeech2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}


================================================
FILE: demos/metaverse/run.sh
================================================
#!/bin/bash
source path.sh

gpus=0
stage=0
stop_stage=100

# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

mkdir -p download

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # install PaddleGAN
    git clone https://github.com/PaddlePaddle/PaddleGAN.git
    pip install -e PaddleGAN/
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then 
    # download pretrained PaddleGAN model
    wget -P download https://paddlegan.bj.bcebos.com/models/wav2lip_hq.pdparams
fi 

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # download pretrained tts models and unzip
    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
    unzip -d download download/pwg_baker_ckpt_0.4.zip
    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip
    unzip -d download download/fastspeech2_nosil_baker_ckpt_0.4.zip
    # donload sources
    wget -P download https://paddlespeech.cdn.bcebos.com/demos/metaverse/Lamarr.png

fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # run tts
    CUDA_VISIBLE_DEVICES=${gpus} \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=download/fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
        --am_ckpt=download/fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
        --am_stat=download/fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy  \
        --voc=pwgan_csmsc \
        --voc_config=download/pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=download/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=download/pwg_baker_ckpt_0.4/pwg_stats.npy \
        --lang=zh \
        --text=sentences.txt \
        --output_dir=output/wavs \
        --inference_dir=output/inference \
        --phones_dict=download/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt
    # output/inference is not needed here, which save the static models
    rm -rf output/inference
fi

if [ ${stage} -le  4 ] && [ ${stop_stage} -ge 4 ]; then
    # We only test one audio here, cause it's slow
    CUDA_VISIBLE_DEVICES=${gpus} \
    python3 PaddleGAN/applications/tools/wav2lip.py \
        --checkpoint_path download/wav2lip_hq.pdparams \
        --face download/Lamarr.png \
        --audio output/wavs/000.wav \
        --outfile output/tts_lips.mp4 \
        --face_enhancement
fi


================================================
FILE: demos/metaverse/sentences.txt
================================================
000 谁知青蛙一落地，竟变成了一位英俊的王子。于是遵照国王的意思，他做了公主的亲密伴侣。


================================================
FILE: demos/punctuation_restoration/README.md
================================================
([简体中文](./README_cn.md)|English)
# Punctuation Restoration
## Introduction
Punctuation restoration is a common post-processing problem for Automatic Speech Recognition (ASR) systems. It is important to improve the readability of the transcribed text for the human reader and facilitate NLP tasks. 

This demo is an implementation to restore punctuation from raw text. It can be done by a single command or a few lines in python using `PaddleSpeech`. 

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input
The input of this demo should be a text of the specific language that can be passed via argument.

### 3. Usage
- Command Line(Recommended)
  ```bash
  paddlespeech text --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
  ```
  Usage:
  ```bash
  paddlespeech text --help
  ```
  Arguments:
  - `input`(required): Input raw text.
  - `task`: Choose subtask. Default: `punc`.
  - `model`: Model type of text task. Default: `ernie_linear_p7_wudao`.
  - `lang`: Choose model language.. Default: `zh`.
  - `config`: Config of text task. Use pretrained model when it is None. Default: `None`.
  - `ckpt_path`: Model checkpoint. Use pretrained model when it is None. Default: `None`.
  - `punc_vocab`: Vocabulary file of punctuation restoration task. Default: `None`.
  - `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.

  Output:
  ```bash
  [2021-12-14 19:50:22,200] [    INFO] [log.py] [L57] - Text Result:
  今天的天气真不错啊！你下午有空吗？我想约你一起去吃饭。
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.text import TextExecutor

  text_executor = TextExecutor()
  result = text_executor(
      text='今天的天气真不错啊你下午有空吗我想约你一起去吃饭',
      task='punc',
      model='ernie_linear_p7_wudao',
      lang='zh',
      config=None,
      ckpt_path=None,
      punc_vocab=None,
      device=paddle.get_device())
  print('Text Result: \n{}'.format(result))
  ```
  Output:
  ```bash
  Text Result:
  今天的天气真不错啊！你下午有空吗？我想约你一起去吃饭。
  ```

### 4.Pretrained Models
Here is a list of pretrained models released by PaddleSpeech that can be used by command and python API:

- Punctuation Restoration
  | Model | Language | Number of Punctuation Characters
  | :--- | :---: | :---: 
  | ernie_linear_p3_wudao| zh | 3(，。？)
  | ernie_linear_p7_wudao| zh | 7(，。！？、：；)


================================================
FILE: demos/punctuation_restoration/README_cn.md
================================================
(简体中文|[English](./README.md))

# 标点恢复
## 介绍

标点恢复是语音识别系统中常见的后处理步骤。提高转录文本的可读性对于人类阅读和后续的自然语言处理任务是非常重要的。

这个 demo 是一个为原始文本恢复标点的实现，它可以通过使用 `PaddleSpeech` 的单个命令或 python 中的几行代码来实现。

## 使用方法
### 1. 安装
请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从 easy，medium，hard 三中方式中选择一种方式安装。

### 2. 准备输入
这个 demo 的输入是通过参数传递的特定语言的文本。

### 3. 使用方法
- 命令行 (推荐使用)
  ```bash
  paddlespeech text --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
  ```
  使用方法:
  ```bash
  paddlespeech text --help
  ```
  参数：
  - `input`(必须输入)：原始文本。
  - `task`：子任务，默认值：`punc`。
  - `model`：文本模型类型，默认值：`ernie_linear_p7_wudao`。
  - `lang`：模型语言， 默认值：`zh`。
  - `config`：文本任务的配置文件，若不设置则使用预训练模型中的默认配置，默认值：`None`。
  - `ckpt_path`：模型参数文件， 若不设置则下载预训练模型使用，默认值：`None`。
  - `punc_vocab`：标点恢复任务的标点词表文件，默认值：`None`。
  - `device`：执行预测的设备，默认值：当前系统下 paddlepaddle 的默认 device。

  输出：
  ```bash
  [2021-12-14 19:50:22,200] [    INFO] [log.py] [L57] - Text Result:
  今天的天气真不错啊！你下午有空吗？我想约你一起去吃饭。
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.text import TextExecutor

  text_executor = TextExecutor()
  result = text_executor(
      text='今天的天气真不错啊你下午有空吗我想约你一起去吃饭',
      task='punc',
      model='ernie_linear_p7_wudao',
      lang='zh',
      config=None,
      ckpt_path=None,
      punc_vocab=None,
      device=paddle.get_device())
  print('Text Result: \n{}'.format(result))
  ```
  输出：
  ```bash
  Text Result:
  今天的天气真不错啊！你下午有空吗？我想约你一起去吃饭。
  ```

### 预训练模型
以下是 PaddleSpeech 提供的可以被命令行和 python API 使用的预训练模型列表：

- 标点恢复
  | 模型 | 语言 | 标点类型数
  | :--- | :---: | :---: 
  | ernie_linear_p3_wudao| zh | 3(，。？)
  | ernie_linear_p7_wudao| zh | 7(，。！？、：；)


================================================
FILE: demos/punctuation_restoration/run.sh
================================================
#!/bin/bash

paddlespeech text --input 今天的天气真好啊你下午有空吗我想约你一起去吃饭


================================================
FILE: demos/speaker_verification/README.md
================================================
([简体中文](./README_cn.md)|English)
# Speech Verification

## Introduction

Speaker Verification, refers to the problem of getting a speaker embedding from an audio. 

This demo is an implementation to extract speaker embedding from a specific audio file. It can be done by a single command or a few lines in python using `PaddleSpeech`. 

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input File
The input of this cli demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

Here are sample files for this demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav
```

### 3. Usage
- Command Line(Recommended)
  ```bash
  paddlespeech vector --task spk --input 85236145389.wav

  echo -e "demo1 85236145389.wav" > vec.job
  paddlespeech vector --task spk --input vec.job

  echo -e "demo2 85236145389.wav \n demo3 85236145389.wav" | paddlespeech vector --task spk

  paddlespeech vector --task score --input "./85236145389.wav ./123456789.wav"
  
  echo -e "demo4 85236145389.wav 85236145389.wav \n demo5 85236145389.wav 123456789.wav" > vec.job
  paddlespeech vector --task score --input vec.job
  ```
  
  Usage:
  ```bash
  paddlespeech vector --help
  ```
  Arguments:
  - `input`(required): Audio file to recognize.
  - `task` (required): Specify `vector` task. Default `spk`。
  - `model`: Model type of vector task. Default: `ecapatdnn_voxceleb12`.
  - `sample_rate`: Sample rate of the model. Default: `16000`.
  - `config`: Config of vector task. Use pretrained model when it is None. Default: `None`.
  - `ckpt_path`: Model checkpoint. Use pretrained model when it is None. Default: `None`.
  - `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.

  Output:

  ```bash
    demo [ -1.3251206    7.8606825   -4.620626     0.3000721    2.2648535
    -1.1931441    3.0647137    7.673595    -6.0044727  -12.02426
    -1.9496069    3.1269536    1.618838    -7.6383104   -1.2299773
  -12.338331     2.1373026   -5.3957124    9.717328     5.6752305
    3.7805123    3.0597172    3.429692     8.97601     13.174125
    -0.53132284   8.9424715    4.46511     -4.4262476   -9.726503
    8.399328     7.2239175   -7.435854     2.9441683   -4.3430395
  -13.886965    -1.6346735  -10.9027405   -5.311245     3.8007221
    3.8976038   -2.1230774   -2.3521194    4.151031    -7.4048667
    0.13911647   2.4626107    4.9664545    0.9897574    5.4839754
    -3.3574002   10.1340065   -0.6120171  -10.403095     4.6007543
    16.00935     -7.7836914   -4.1945305   -6.9368606    1.1789556
    11.490801     4.2380238    9.550931     8.375046     7.5089145
    -0.65707296  -0.30051577   2.8406055    3.0828028    0.730817
    6.148354     0.13766119 -13.424735    -7.7461405   -2.3227983
    -8.305252     2.9879124  -10.995229     0.15211068  -2.3820348
    -1.7984174    8.495629    -5.8522367   -3.755498     0.6989711
    -5.2702994   -2.6188622   -1.8828466   -4.64665     14.078544
    -0.5495333   10.579158    -3.2160501    9.349004    -4.381078
  -11.675817    -2.8630207    4.5721755    2.246612    -4.574342
    1.8610188    2.3767874    5.6257877   -9.784078     0.64967257
    -1.4579505    0.4263264   -4.9211264   -2.454784     3.4869802
    -0.42654222   8.341269     1.356552     7.0966883  -13.102829
    8.016734    -7.1159344    1.8699781    0.208721    14.699384
    -1.025278    -2.6107233   -2.5082312    8.427193     6.9138527
    -6.2912464    0.6157366    2.489688    -3.4668267    9.921763
    11.200815    -0.1966403    7.4916005   -0.62312716  -0.25848144
    -9.947997    -0.9611041    1.1649219   -2.1907122   -1.5028487
    -0.51926106  15.165954     2.4649463   -0.9980445    7.4416637
    -2.0768049    3.5896823   -7.3055434   -7.5620847    4.323335
    0.0804418   -6.56401     -2.3148053   -1.7642345   -2.4708817
    -7.675618    -9.548878    -1.0177554    0.16986446   2.5877135
    -1.8752296   -0.36614323  -6.0493784   -2.3965611   -5.9453387
    0.9424033  -13.155974    -7.457801     0.14658108  -3.742797
    5.8414927   -1.2872906    5.5694313   12.57059      1.0939219
    2.2142086    1.9181576    6.9914207   -5.888139     3.1409824
    -2.003628     2.4434285    9.973139     5.03668      2.0051203
    2.8615603    5.860224     2.9176188   -1.6311141    2.0292206
    -4.070415    -6.831437  ]
  ```

- Python API
  ```python
  from paddlespeech.cli.vector import VectorExecutor

  vector_executor = VectorExecutor()
  audio_emb = vector_executor(
      model='ecapatdnn_voxceleb12',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./85236145389.wav',
      device=paddle.get_device())
  print('Audio embedding Result: \n{}'.format(audio_emb))

  test_emb = vector_executor(
      model='ecapatdnn_voxceleb12',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./123456789.wav',
      device=paddle.get_device())
  print('Test embedding Result: \n{}'.format(test_emb))

  # score range [0, 1]
  score = vector_executor.get_embeddings_score(audio_emb, test_emb)
  print(f"Eembeddings Score: {score}")
  ```

  Output：

  ```bash
  # Vector Result:
   Audio embedding Result:
    [ -1.3251206    7.8606825   -4.620626     0.3000721    2.2648535
      -1.1931441    3.0647137    7.673595    -6.0044727  -12.02426
      -1.9496069    3.1269536    1.618838    -7.6383104   -1.2299773
    -12.338331     2.1373026   -5.3957124    9.717328     5.6752305
      3.7805123    3.0597172    3.429692     8.97601     13.174125
      -0.53132284   8.9424715    4.46511     -4.4262476   -9.726503
      8.399328     7.2239175   -7.435854     2.9441683   -4.3430395
    -13.886965    -1.6346735  -10.9027405   -5.311245     3.8007221
      3.8976038   -2.1230774   -2.3521194    4.151031    -7.4048667
      0.13911647   2.4626107    4.9664545    0.9897574    5.4839754
      -3.3574002   10.1340065   -0.6120171  -10.403095     4.6007543
      16.00935     -7.7836914   -4.1945305   -6.9368606    1.1789556
      11.490801     4.2380238    9.550931     8.375046     7.5089145
      -0.65707296  -0.30051577   2.8406055    3.0828028    0.730817
      6.148354     0.13766119 -13.424735    -7.7461405   -2.3227983
      -8.305252     2.9879124  -10.995229     0.15211068  -2.3820348
      -1.7984174    8.495629    -5.8522367   -3.755498     0.6989711
      -5.2702994   -2.6188622   -1.8828466   -4.64665     14.078544
      -0.5495333   10.579158    -3.2160501    9.349004    -4.381078
    -11.675817    -2.8630207    4.5721755    2.246612    -4.574342
      1.8610188    2.3767874    5.6257877   -9.784078     0.64967257
      -1.4579505    0.4263264   -4.9211264   -2.454784     3.4869802
      -0.42654222   8.341269     1.356552     7.0966883  -13.102829
      8.016734    -7.1159344    1.8699781    0.208721    14.699384
      -1.025278    -2.6107233   -2.5082312    8.427193     6.9138527
      -6.2912464    0.6157366    2.489688    -3.4668267    9.921763
      11.200815    -0.1966403    7.4916005   -0.62312716  -0.25848144
      -9.947997    -0.9611041    1.1649219   -2.1907122   -1.5028487
      -0.51926106  15.165954     2.4649463   -0.9980445    7.4416637
      -2.0768049    3.5896823   -7.3055434   -7.5620847    4.323335
      0.0804418   -6.56401     -2.3148053   -1.7642345   -2.4708817
      -7.675618    -9.548878    -1.0177554    0.16986446   2.5877135
      -1.8752296   -0.36614323  -6.0493784   -2.3965611   -5.9453387
      0.9424033  -13.155974    -7.457801     0.14658108  -3.742797
      5.8414927   -1.2872906    5.5694313   12.57059      1.0939219
      2.2142086    1.9181576    6.9914207   -5.888139     3.1409824
      -2.003628     2.4434285    9.973139     5.03668      2.0051203
      2.8615603    5.860224     2.9176188   -1.6311141    2.0292206
      -4.070415    -6.831437  ]
    # get the test embedding
    Test embedding Result:
    [  2.5247195    5.119042    -4.335273     4.4583654    5.047907
      3.5059214    1.6159848    0.49364898 -11.6899185   -3.1014526
      -5.6589785   -0.42684984   2.674276   -11.937654     6.2248464
    -10.776924    -5.694543     1.112041     1.5709964    1.0961034
      1.3976512    2.324352     1.339981     5.279319    13.734659
      -2.5753925   13.651442    -2.2357535    5.1575427   -3.251567
      1.4023279    6.1191974   -6.0845175   -1.3646189   -2.6789894
    -15.220778     9.779349    -9.411551    -6.388947     6.8313975
      -9.245996     0.31196198   2.5509644   -4.413065     6.1649427
      6.793837     2.6328635    8.620976     3.4832475    0.52491665
      2.9115407    5.8392377    0.6702376   -3.2726715    2.6694255
      16.91701     -5.5811176    0.23362345  -4.5573606  -11.801059
      14.728292    -0.5198082   -3.999922     7.0927105   -7.0459595
      -5.4389      -0.46420583  -5.1085467   10.376568    -8.889225
      -0.37705845  -1.659806     2.6731026   -7.1909504    1.4608804
      -2.163136    -0.17949677   4.0241547    0.11319201   0.601279
      2.039692     3.1910992  -11.649526    -8.121584    -4.8707457
      0.3851982    1.4231744   -2.3321972    0.99332285  14.121717
      5.899413     0.7384519  -17.760096    10.555021     4.1366534
      -0.3391071   -0.20792882   3.208204     0.8847948   -8.721497
      -6.432868    13.006379     4.8956      -9.155822    -1.9441519
      5.7815638   -2.066733    10.425042    -0.8802383   -2.4314315
      -9.869258     0.35095334  -5.3549943    2.1076174   -8.290468
      8.4433365   -4.689333     9.334139    -2.172678    -3.0250976
      8.394216    -3.2110903   -7.93868      2.3960824   -2.3213403
      -1.4963245   -3.476059     4.132903   -10.893354     4.362673
      -0.45456508  10.258634    -1.1655927   -6.7799754    0.22885278
      -4.399287     2.333433    -4.84745     -4.2752337   -1.3577863
      -1.0685898    9.505196     7.3062205    0.08708266  12.927811
      -9.57974      1.3936648   -1.9444873    5.776769    15.251903
      10.6118355   -1.4903594   -9.535318    -3.6553776   -1.6699586
      -0.5933151    7.600357    -4.8815503   -8.698617   -15.855757
      0.25632986  -7.2235737    0.9506656    0.7128582   -9.051738
      8.74869     -1.6426028   -6.5762258    2.506905    -6.7431564
      5.129912   -12.189555    -3.6435068   12.068113    -6.0059533
      -2.3535995    2.9014351   22.3082      -1.5563312   13.193291
      2.7583609   -7.468798     1.3407065   -4.599617    -6.2345777
      10.7689295    7.137627     5.099476     0.3473359    9.647881
      -2.0484571   -5.8549366 ]
    # get the score between enroll and test
    Eembeddings Score: 0.45332613587379456
  ```

### 4.Pretrained Models

Here is a list of pretrained models released by PaddleSpeech that can be used by command and python API:

| Model | Sample Rate
| :--- | :---: |
| ecapatdnn_voxceleb12 | 16k


================================================
FILE: demos/speaker_verification/README_cn.md
================================================
(简体中文|[English](./README.md))

# 声纹识别
## 介绍
声纹识别是一项用计算机程序自动提取说话人特征的技术。

这个 demo 是从一个给定音频文件中提取说话人特征，它可以通过使用 `PaddleSpeech` 的单个命令或 python 中的几行代码来实现。

## 使用方法
### 1. 安装
请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从easy medium，hard 三种方式中选择一种方式安装。

### 2. 准备输入
声纹cli demo 的输入应该是一个 WAV 文件（`.wav`），并且采样率必须与模型的采样率相同。

可以下载此 demo 的示例音频：
```bash
# 该音频的内容是数字串 85236145389
wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav
```
### 3. 使用方法
- 命令行 (推荐使用)
  ```bash
  paddlespeech vector --task spk --input 85236145389.wav

  echo -e "demo1 85236145389.wav" > vec.job
  paddlespeech vector --task spk --input vec.job

  echo -e "demo2 85236145389.wav \n demo3 85236145389.wav" | paddlespeech vector --task spk
  
  paddlespeech vector --task score --input "./85236145389.wav ./123456789.wav"
  
  echo -e "demo4 85236145389.wav 85236145389.wav \n demo5 85236145389.wav 123456789.wav" > vec.job
  paddlespeech vector --task score --input vec.job
  ```
  
  使用方法：
  ```bash
  paddlespeech vector --help
  ```
  参数：
  - `input`(必须输入)：用于识别的音频文件。
  - `task` (必须输入): 用于指定 `vector` 处理的具体任务，默认是 `spk`。
  - `model`：声纹任务的模型，默认值：`ecapatdnn_voxceleb12`。
  - `sample_rate`：音频采样率，默认值：`16000`。
  - `config`：声纹任务的参数文件，若不设置则使用预训练模型中的默认配置，默认值：`None`。
  - `ckpt_path`：模型参数文件，若不设置则下载预训练模型使用，默认值：`None`。
  - `device`：执行预测的设备，默认值：当前系统下 paddlepaddle 的默认 device。

  输出：
  ```bash
    [ -1.3251206    7.8606825   -4.620626     0.3000721    2.2648535
    -1.1931441    3.0647137    7.673595    -6.0044727  -12.02426
    -1.9496069    3.1269536    1.618838    -7.6383104   -1.2299773
  -12.338331     2.1373026   -5.3957124    9.717328     5.6752305
    3.7805123    3.0597172    3.429692     8.97601     13.174125
    -0.53132284   8.9424715    4.46511     -4.4262476   -9.726503
    8.399328     7.2239175   -7.435854     2.9441683   -4.3430395
  -13.886965    -1.6346735  -10.9027405   -5.311245     3.8007221
    3.8976038   -2.1230774   -2.3521194    4.151031    -7.4048667
    0.13911647   2.4626107    4.9664545    0.9897574    5.4839754
    -3.3574002   10.1340065   -0.6120171  -10.403095     4.6007543
    16.00935     -7.7836914   -4.1945305   -6.9368606    1.1789556
    11.490801     4.2380238    9.550931     8.375046     7.5089145
    -0.65707296  -0.30051577   2.8406055    3.0828028    0.730817
    6.148354     0.13766119 -13.424735    -7.7461405   -2.3227983
    -8.305252     2.9879124  -10.995229     0.15211068  -2.3820348
    -1.7984174    8.495629    -5.8522367   -3.755498     0.6989711
    -5.2702994   -2.6188622   -1.8828466   -4.64665     14.078544
    -0.5495333   10.579158    -3.2160501    9.349004    -4.381078
  -11.675817    -2.8630207    4.5721755    2.246612    -4.574342
    1.8610188    2.3767874    5.6257877   -9.784078     0.64967257
    -1.4579505    0.4263264   -4.9211264   -2.454784     3.4869802
    -0.42654222   8.341269     1.356552     7.0966883  -13.102829
    8.016734    -7.1159344    1.8699781    0.208721    14.699384
    -1.025278    -2.6107233   -2.5082312    8.427193     6.9138527
    -6.2912464    0.6157366    2.489688    -3.4668267    9.921763
    11.200815    -0.1966403    7.4916005   -0.62312716  -0.25848144
    -9.947997    -0.9611041    1.1649219   -2.1907122   -1.5028487
    -0.51926106  15.165954     2.4649463   -0.9980445    7.4416637
    -2.0768049    3.5896823   -7.3055434   -7.5620847    4.323335
    0.0804418   -6.56401     -2.3148053   -1.7642345   -2.4708817
    -7.675618    -9.548878    -1.0177554    0.16986446   2.5877135
    -1.8752296   -0.36614323  -6.0493784   -2.3965611   -5.9453387
    0.9424033  -13.155974    -7.457801     0.14658108  -3.742797
    5.8414927   -1.2872906    5.5694313   12.57059      1.0939219
    2.2142086    1.9181576    6.9914207   -5.888139     3.1409824
    -2.003628     2.4434285    9.973139     5.03668      2.0051203
    2.8615603    5.860224     2.9176188   -1.6311141    2.0292206
    -4.070415    -6.831437  ]
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.vector import VectorExecutor

  vector_executor = VectorExecutor()
  audio_emb = vector_executor(
      model='ecapatdnn_voxceleb12',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./85236145389.wav',
      device=paddle.get_device())
  print('Audio embedding Result: \n{}'.format(audio_emb))

  test_emb = vector_executor(
      model='ecapatdnn_voxceleb12',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./123456789.wav',
      device=paddle.get_device())
  print('Test embedding Result: \n{}'.format(test_emb))

  # score range [0, 1]
  score = vector_executor.get_embeddings_score(audio_emb, test_emb)
  print(f"Eembeddings Score: {score}")
  ```

  输出：
  ```bash
  # Vector Result:
   Audio embedding Result:
    [ -1.3251206    7.8606825   -4.620626     0.3000721    2.2648535
      -1.1931441    3.0647137    7.673595    -6.0044727  -12.02426
      -1.9496069    3.1269536    1.618838    -7.6383104   -1.2299773
    -12.338331     2.1373026   -5.3957124    9.717328     5.6752305
      3.7805123    3.0597172    3.429692     8.97601     13.174125
      -0.53132284   8.9424715    4.46511     -4.4262476   -9.726503
      8.399328     7.2239175   -7.435854     2.9441683   -4.3430395
    -13.886965    -1.6346735  -10.9027405   -5.311245     3.8007221
      3.8976038   -2.1230774   -2.3521194    4.151031    -7.4048667
      0.13911647   2.4626107    4.9664545    0.9897574    5.4839754
      -3.3574002   10.1340065   -0.6120171  -10.403095     4.6007543
      16.00935     -7.7836914   -4.1945305   -6.9368606    1.1789556
      11.490801     4.2380238    9.550931     8.375046     7.5089145
      -0.65707296  -0.30051577   2.8406055    3.0828028    0.730817
      6.148354     0.13766119 -13.424735    -7.7461405   -2.3227983
      -8.305252     2.9879124  -10.995229     0.15211068  -2.3820348
      -1.7984174    8.495629    -5.8522367   -3.755498     0.6989711
      -5.2702994   -2.6188622   -1.8828466   -4.64665     14.078544
      -0.5495333   10.579158    -3.2160501    9.349004    -4.381078
    -11.675817    -2.8630207    4.5721755    2.246612    -4.574342
      1.8610188    2.3767874    5.6257877   -9.784078     0.64967257
      -1.4579505    0.4263264   -4.9211264   -2.454784     3.4869802
      -0.42654222   8.341269     1.356552     7.0966883  -13.102829
      8.016734    -7.1159344    1.8699781    0.208721    14.699384
      -1.025278    -2.6107233   -2.5082312    8.427193     6.9138527
      -6.2912464    0.6157366    2.489688    -3.4668267    9.921763
      11.200815    -0.1966403    7.4916005   -0.62312716  -0.25848144
      -9.947997    -0.9611041    1.1649219   -2.1907122   -1.5028487
      -0.51926106  15.165954     2.4649463   -0.9980445    7.4416637
      -2.0768049    3.5896823   -7.3055434   -7.5620847    4.323335
      0.0804418   -6.56401     -2.3148053   -1.7642345   -2.4708817
      -7.675618    -9.548878    -1.0177554    0.16986446   2.5877135
      -1.8752296   -0.36614323  -6.0493784   -2.3965611   -5.9453387
      0.9424033  -13.155974    -7.457801     0.14658108  -3.742797
      5.8414927   -1.2872906    5.5694313   12.57059      1.0939219
      2.2142086    1.9181576    6.9914207   -5.888139     3.1409824
      -2.003628     2.4434285    9.973139     5.03668      2.0051203
      2.8615603    5.860224     2.9176188   -1.6311141    2.0292206
      -4.070415    -6.831437  ]
    # get the test embedding
    Test embedding Result:
    [  2.5247195    5.119042    -4.335273     4.4583654    5.047907
      3.5059214    1.6159848    0.49364898 -11.6899185   -3.1014526
      -5.6589785   -0.42684984   2.674276   -11.937654     6.2248464
    -10.776924    -5.694543     1.112041     1.5709964    1.0961034
      1.3976512    2.324352     1.339981     5.279319    13.734659
      -2.5753925   13.651442    -2.2357535    5.1575427   -3.251567
      1.4023279    6.1191974   -6.0845175   -1.3646189   -2.6789894
    -15.220778     9.779349    -9.411551    -6.388947     6.8313975
      -9.245996     0.31196198   2.5509644   -4.413065     6.1649427
      6.793837     2.6328635    8.620976     3.4832475    0.52491665
      2.9115407    5.8392377    0.6702376   -3.2726715    2.6694255
      16.91701     -5.5811176    0.23362345  -4.5573606  -11.801059
      14.728292    -0.5198082   -3.999922     7.0927105   -7.0459595
      -5.4389      -0.46420583  -5.1085467   10.376568    -8.889225
      -0.37705845  -1.659806     2.6731026   -7.1909504    1.4608804
      -2.163136    -0.17949677   4.0241547    0.11319201   0.601279
      2.039692     3.1910992  -11.649526    -8.121584    -4.8707457
      0.3851982    1.4231744   -2.3321972    0.99332285  14.121717
      5.899413     0.7384519  -17.760096    10.555021     4.1366534
      -0.3391071   -0.20792882   3.208204     0.8847948   -8.721497
      -6.432868    13.006379     4.8956      -9.155822    -1.9441519
      5.7815638   -2.066733    10.425042    -0.8802383   -2.4314315
      -9.869258     0.35095334  -5.3549943    2.1076174   -8.290468
      8.4433365   -4.689333     9.334139    -2.172678    -3.0250976
      8.394216    -3.2110903   -7.93868      2.3960824   -2.3213403
      -1.4963245   -3.476059     4.132903   -10.893354     4.362673
      -0.45456508  10.258634    -1.1655927   -6.7799754    0.22885278
      -4.399287     2.333433    -4.84745     -4.2752337   -1.3577863
      -1.0685898    9.505196     7.3062205    0.08708266  12.927811
      -9.57974      1.3936648   -1.9444873    5.776769    15.251903
      10.6118355   -1.4903594   -9.535318    -3.6553776   -1.6699586
      -0.5933151    7.600357    -4.8815503   -8.698617   -15.855757
      0.25632986  -7.2235737    0.9506656    0.7128582   -9.051738
      8.74869     -1.6426028   -6.5762258    2.506905    -6.7431564
      5.129912   -12.189555    -3.6435068   12.068113    -6.0059533
      -2.3535995    2.9014351   22.3082      -1.5563312   13.193291
      2.7583609   -7.468798     1.3407065   -4.599617    -6.2345777
      10.7689295    7.137627     5.099476     0.3473359    9.647881
      -2.0484571   -5.8549366 ]
    # get the score between enroll and test
    Eembeddings Score: 0.45332613587379456
  ```

### 4.预训练模型
以下是 PaddleSpeech 提供的可以被命令行和 python API 使用的预训练模型列表：

| 模型 | 采样率
| :--- | :---: |
| ecapatdnn_voxceleb12 | 16k


================================================
FILE: demos/speaker_verification/run.sh
================================================
#!/bin/bash

wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav

# vector
paddlespeech vector --task spk --input ./85236145389.wav

paddlespeech vector --task score --input "./85236145389.wav ./123456789.wav"


================================================
FILE: demos/speech_recognition/.gitignore
================================================
*.wav


================================================
FILE: demos/speech_recognition/README.md
================================================
([简体中文](./README_cn.md)|English)
# ASR (Automatic Speech Recognition)

## Introduction
ASR, or Automatic Speech Recognition, refers to the problem of getting a program to automatically transcribe spoken language (speech-to-text). 

This demo is an implementation to recognize text from a specific audio file. It can be done by a single command or a few lines in python using `PaddleSpeech`. 

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input File
The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

Here are sample files for this demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav
```

### 3. Usage
- Command Line(Recommended)
  ```bash
  # Chinese
  paddlespeech asr --input ./zh.wav -v
  # English
  paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav -v
  # Code-Switch
  paddlespeech asr --model conformer_talcs --lang zh_en --codeswitch True --input ./ch_zh_mix.wav -v 
  # Chinese ASR + Punctuation Restoration
  paddlespeech asr --input ./zh.wav -v | paddlespeech text --task punc -v
  ```
  (If you don't want to see the log information, you can remove "-v". Besides, it doesn't matter if package `paddlespeech-ctcdecoders` is not found, this package is optional.)
  
  Usage:
  ```bash
  paddlespeech asr --help
  ```
  Arguments:
  - `input`(required): Audio file to recognize.
  - `model`: Model type of asr task. Default: `conformer_wenetspeech`.
  - `lang`: Model language. Default: `zh`.
  - `codeswitch`: Code Swith Model. Default: `False`
  - `sample_rate`: Sample rate of the model. Default: `16000`.
  - `config`: Config of asr task. Use pretrained model when it is None. Default: `None`.
  - `ckpt_path`: Model checkpoint. Use pretrained model when it is None. Default: `None`.
  - `yes`: No additional parameters required. Once set this parameter, it means accepting the request of the program by default, which includes transforming the audio sample rate. Default: `False`.
  - `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.
  - `verbose`: Show the log information.

  Output:
  ```bash
  # Chinese
  [2021-12-08 13:12:34,063] [    INFO] [utils.py] [L225] - ASR Result: 我认为跑步最重要的就是给我带来了身体健康
  # English
  [2022-01-12 11:51:10,815] [    INFO] - ASR Result: i knocked at the door on the ancient side of the building
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.asr import ASRExecutor

  asr_executor = ASRExecutor()
  text = asr_executor(
      model='conformer_wenetspeech',
      lang='zh',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./zh.wav',
      force_yes=False,
      device=paddle.get_device())
  print('ASR Result: \n{}'.format(text))
  ```

  Output:
  ```bash
  ASR Result:
  我认为跑步最重要的就是给我带来了身体健康
  ```

### 4.Pretrained Models

Here is a list of pretrained models released by PaddleSpeech that can be used by command and python API:

| Model | Code Switch | Language | Sample Rate
| :--- | :---: | :---: | :---: |
| conformer_wenetspeech | False | zh | 16k
| conformer_online_multicn | False | zh | 16k
| conformer_aishell | False | zh | 16k
| conformer_online_aishell | False | zh | 16k
| transformer_librispeech | False | en | 16k
| deepspeech2online_wenetspeech | False | zh | 16k
| deepspeech2offline_aishell | False | zh| 16k
| deepspeech2online_aishell | False | zh | 16k
| deepspeech2offline_librispeech | False | en | 16k
| conformer_talcs | True | zh_en | 16k


================================================
FILE: demos/speech_recognition/README_cn.md
================================================
(简体中文|[English](./README.md))
 (简体中文|[English](./README.md))

# 语音识别
## 介绍
语音识别是一项用计算机程序自动转录语音的技术。

这个 demo 是一个从给定音频文件识别文本的实现，它可以通过使用 `PaddleSpeech` 的单个命令或 python 中的几行代码来实现。
## 使用方法
### 1. 安装
请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从 easy，medium，hard 三中方式中选择一种方式安装。

### 2. 准备输入
这个 demo 的输入应该是一个 WAV 文件（`.wav`），并且采样率必须与模型的采样率相同。

可以下载此 demo 的示例音频：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav
```
### 3. 使用方法
- 命令行 (推荐使用)
  ```bash
  # 中文
  paddlespeech asr --input ./zh.wav -v
  # 英文
  paddlespeech asr --model transformer_librispeech --lang en --input ./en.wav -v
  #中英混合
  paddlespeech asr --model conformer_talcs --lang zh_en --codeswitch True --input ./ch_zh_mix.wav -v 
  # 中文 + 标点恢复
  paddlespeech asr --input ./zh.wav -v | paddlespeech text --task punc -v
  ```
  (如果不想显示 log 信息，可以不使用"-v", 另外如果显示 `paddlespeech-ctcdecoders` 这个 python 包没有找到的 Error，没有关系，这个包是非必须的。)
  
  使用方法：
  ```bash
  paddlespeech asr --help
  ```
  参数：
  - `input`(必须输入)：用于识别的音频文件。
  - `model`：ASR 任务的模型，默认值：`conformer_wenetspeech`。
  - `lang`：模型语言，默认值：`zh`。
  - `codeswitch`: 是否使用语言转换，默认值：`False`。
  - `sample_rate`：音频采样率，默认值：`16000`。
  - `config`：ASR 任务的参数文件，若不设置则使用预训练模型中的默认配置，默认值：`None`。
  - `ckpt_path`：模型参数文件，若不设置则下载预训练模型使用，默认值：`None`。
  - `yes`；不需要设置额外的参数，一旦设置了该参数，说明你默认同意程序的所有请求，其中包括自动转换输入音频的采样率。默认值：`False`。
  - `device`：执行预测的设备，默认值：当前系统下 paddlepaddle 的默认 device。
  - `verbose`: 如果使用，显示 logger 信息。

  输出：
  ```bash
  # 中文
  [2021-12-08 13:12:34,063] [    INFO] [utils.py] [L225] - ASR Result: 我认为跑步最重要的就是给我带来了身体健康
  # 英文
  [2022-01-12 11:51:10,815] [    INFO] - ASR Result: i knocked at the door on the ancient side of the building
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.asr import ASRExecutor

  asr_executor = ASRExecutor()
  text = asr_executor(
      model='conformer_wenetspeech',
      lang='zh',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./zh.wav',
      force_yes=False,
      device=paddle.get_device())
  print('ASR Result: \n{}'.format(text))
  ```

  输出：
  ```bash
  ASR Result:
  我认为跑步最重要的就是给我带来了身体健康
  ```

### 4.预训练模型
以下是 PaddleSpeech 提供的可以被命令行和 python API 使用的预训练模型列表：

| 模型 | 语言转换 | 语言 | 采样率
| :--- | :---: | :---: | :---: |
| conformer_wenetspeech | False | zh | 16k
| conformer_online_multicn | False | zh | 16k
| conformer_aishell | False | zh | 16k
| conformer_online_aishell | False | zh | 16k
| transformer_librispeech | False | en | 16k
| deepspeech2online_wenetspeech | False | zh | 16k
| deepspeech2offline_aishell | False | zh| 16k
| deepspeech2online_aishell | False | zh | 16k
| deepspeech2offline_librispeech | False | en | 16k
| conformer_talcs | True | zh_en | 16k


================================================
FILE: demos/speech_recognition/run.sh
================================================
#!/bin/bash

wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav

# asr
paddlespeech asr --input ./zh.wav


# asr + punc
paddlespeech asr --input ./zh.wav | paddlespeech text --task punc


# asr help
paddlespeech asr --help


# english asr
paddlespeech asr --lang en --model transformer_librispeech --input ./en.wav


# code-switch asr
paddlespeech asr --lang zh_en --codeswitch True --model conformer_talcs --input ./ch_zh_mix.wav


# model stats
paddlespeech stats --task asr


# paddlespeech help
paddlespeech --help


================================================
FILE: demos/speech_server/.gitignore
================================================
*.wav


================================================
FILE: demos/speech_server/README.md
================================================
([简体中文](./README_cn.md)|English)

# Speech Server

## Introduction
This demo is an implementation of starting the voice service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python.

For service interface definition, please check:
- [PaddleSpeech Server RESTful API](https://github.com/PaddlePaddle/PaddleSpeech/wiki/PaddleSpeech-Server-RESTful-API)


## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

It is recommended to use **paddlepaddle 2.4rc** or above.

You can choose one way from easy, medium and hard to install paddlespeech.

**If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to the yaml file in the conf directory.**

### 2. Prepare config File
The configuration file can be found in `conf/application.yaml` .
Among them, `engine_list` indicates the speech engine that will be included in the service to be started, in the format of `<speech task>_<engine type>`.
At present, the speech tasks integrated by the service include: asr (speech recognition), tts (text to sppech) and cls (audio classification).
Currently the engine type supports two forms: python and inference (Paddle Inference)
**Note:** If the service can be started normally in the container, but the client access IP is unreachable, you can try to replace the `host` address in the configuration file with the local IP address.

### 3. Server Usage
- Command Line (Recommended)

  ```bash
  # start the service
  paddlespeech_server start --config_file ./conf/application.yaml
  ```

  > **Note:** For mixed Chinese and English speech recognition, please use the `./conf/conformer_talcs_application.yaml` configuration file 

  Usage:
  
  ```bash
  paddlespeech_server start --help
  ```
  Arguments:
  - `config_file`: yaml file of the app, default: ./conf/application.yaml
  - `log_file`: log file. Default: ./log/paddlespeech.log

  Output:
  ```text
  [2022-02-23 11:17:32] [INFO] [server.py:64] Started server process [6384]
  INFO:     Waiting for application startup.
  [2022-02-23 11:17:32] [INFO] [on.py:26] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-02-23 11:17:32] [INFO] [on.py:38] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  [2022-02-23 11:17:32] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_server import ServerExecutor

  server_executor = ServerExecutor()
  server_executor(
      config_file="./conf/application.yaml", 
      log_file="./log/paddlespeech.log")
  ```

  Output:
  ```text
  INFO:     Started server process [529]
  [2022-02-23 14:57:56] [INFO] [server.py:64] Started server process [529]
  INFO:     Waiting for application startup.
  [2022-02-23 14:57:56] [INFO] [on.py:26] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-02-23 14:57:56] [INFO] [on.py:38] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  [2022-02-23 14:57:56] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  ```


### 4. ASR Client Usage

The input of  ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

Here are sample files for this ASR client demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav
```

**Note:** The response time will be slightly longer when using the client for the first time
- Command Line (Recommended)

   If `127.0.0.1` is not accessible, you need to use the actual service IP address.

   ```bash
   paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
  
   # Chinese and English mixed speech recognition,  using `./conf/conformer_talcs_application.yaml` config file
   paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./ch_zh_mix.wav
   ```

  Usage:
  
  ```bash
  paddlespeech_client asr --help
  ```
  Arguments:
  - `server_ip`: server ip. Default: 127.0.0.1
  - `port`: server port. Default: 8090
  - `input`(required): Audio file to be recognized.
  - `sample_rate`: Audio ampling rate, default: 16000.
  - `lang`: Language. Default: "zh_cn".
  - `audio_format`: Audio format. Default: "wav".

  Output:
  ```text
  [2022-08-01 07:54:01,646] [    INFO] - ASR result: 我认为跑步最重要的就是给我带来了身体健康
  [2022-08-01 07:54:01,646] [    INFO] - Response time 4.898965 s.
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import ASRClientExecutor

  asrclient_executor = ASRClientExecutor()
  res = asrclient_executor(
      input="./zh.wav",
      server_ip="127.0.0.1",
      port=8090,
      sample_rate=16000,
      lang="zh_cn",
      audio_format="wav")
  print(res)
  ```
  Output:
  ```text
  我认为跑步最重要的就是给我带来了身体健康
  ```
 
### 5. TTS Client Usage
**Note:** The response time will be slightly longer when using the client for the first time
- Command Line (Recommended)

   If `127.0.0.1` is not accessible, you need to use the actual service IP address

   ```bash
   paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
   ```
     Usage:
  
    ```bash
    paddlespeech_client tts --help
    ```
    Arguments:
    - `server_ip`: server ip. Default: 127.0.0.1
    - `port`: server port. Default: 8090
    - `input`(required): Input text to generate.
    - `spk_id`: Speaker id for multi-speaker text to speech. Default: 0
    - `speed`: Audio speed, the value should be set between 0 and 3. Default: 1.0
    - `volume`: Audio volume, the value should be set between 0 and 3. Default: 1.0
    - `sample_rate`: Sampling rate, choice: [0, 8000, 16000], the default is the same as the model. Default: 0
    - `output`: Output wave filepath. Default: None, which means not to save the audio to the local.

    Output:
    ```text
    [2022-02-23 15:20:37,875] [    INFO] - Save synthesized audio successfully on output.wav.
    [2022-02-23 15:20:37,875] [    INFO] - Audio duration: 3.612500 s.
    [2022-02-23 15:20:37,875] [    INFO] - Response time: 0.348050 s.
    ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import TTSClientExecutor
  import json

  ttsclient_executor = TTSClientExecutor()
  res = ttsclient_executor(
      input="您好，欢迎使用百度飞桨语音合成服务。",
      server_ip="127.0.0.1",
      port=8090,
      spk_id=0,
      speed=1.0,
      volume=1.0,
      sample_rate=0,
      output="./output.wav")

  response_dict = res.json()
  print(response_dict["message"])
  print("Save synthesized audio successfully on %s." % (response_dict['result']['save_path']))
  print("Audio duration: %f s." %(response_dict['result']['duration']))
  ```

  Output:
  ```text
  {'description': 'success.'}
  Save synthesized audio successfully on ./output.wav.
  Audio duration: 3.612500 s.
  ```

### 6. CLS Client Usage

Here are sample files for this CLS Client demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav 
```

**Note:** The response time will be slightly longer when using the client for the first time
- Command Line (Recommended)

   If `127.0.0.1` is not accessible, you need to use the actual service IP address.

   ```bash
   paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
   ```

  Usage:
  
  ```bash
  paddlespeech_client cls --help
  ```
  Arguments:
  - `server_ip`: server ip. Default: 127.0.0.1
  - `port`: server port. Default: 8090
  - `input`(required): Audio file to be classified.
  - `topk`: topk scores of classification result.

  Output:
  ```text
  [2022-03-09 20:44:39,974] [    INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}}
  [2022-03-09 20:44:39,975] [    INFO] - Response time 0.104360 s.
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import CLSClientExecutor
  import json

  clsclient_executor = CLSClientExecutor()
  res = clsclient_executor(
      input="./zh.wav",
      server_ip="127.0.0.1",
      port=8090,
      topk=1)
  print(res.json())
  ```

  Output:
  ```text
  {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}}
  ```


### 7. Speaker Verification Client Usage

Here are sample files for this Speaker Verification Client demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav
```

#### 7.1 Extract speaker embedding
**Note:** The response time will be slightly longer when using the client for the first time
- Command Line (Recommended)

  If `127.0.0.1` is not accessible, you need to use the actual service IP address.

  ``` bash
  paddlespeech_client vector --task spk  --server_ip 127.0.0.1 --port 8090 --input 85236145389.wav
  ```

  Usage:

  ``` bash
  paddlespeech_client vector --help
  ```

  Arguments:
    * server_ip: server ip. Default: 127.0.0.1
    * port: server port. Default: 8090
    * input(required): Input text to generate.
    * task: the task of vector, can be use 'spk' or 'score。Default is 'spk'。
    * enroll: enroll audio
    * test: test audio

  Output:

  ```text
  [2022-08-01 09:01:22,151] [    INFO] - vector http client start
  [2022-08-01 09:01:22,152] [    INFO] - the input audio: 85236145389.wav
  [2022-08-01 09:01:22,152] [    INFO] - endpoint: http://127.0.0.1:8090/paddlespeech/vector
  [2022-08-01 09:01:27,093] [    INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.4217487573623657, 5.626248836517334, -5.342073440551758, 1.177390217781067, 3.308061122894287, 1.7565997838974, 5.1678876876831055, 10.806346893310547, -3.822679042816162, -5.614130973815918, 2.6238481998443604, -0.8072965741157532, 1.963512659072876, -7.312864780426025, 0.011034967377781868, -9.723127365112305, 0.661963164806366, -6.976816654205322, 10.213465690612793, 7.494767189025879, 2.9105641841888428, 3.894925117492676, 3.7999846935272217, 7.106173992156982, 16.905324935913086, -7.149376392364502, 8.733112335205078, 3.423002004623413, -4.831653118133545, -11.403371810913086, 11.232216835021973, 7.127464771270752, -4.282831192016602, 2.4523589611053467, -5.13075065612793, -18.17765998840332, -2.611666440963745, -11.00034236907959, -6.731431007385254, 1.6564655303955078, 0.7618184685707092, 1.1253058910369873, -2.0838277339935303, 4.725739002227783, -8.782590866088867, -3.5398736000061035, 3.8142387866973877, 5.142062664031982, 2.162053346633911, 4.09642219543457, -6.416221618652344, 12.747454643249512, 1.9429889917373657, -15.152948379516602, 6.417416572570801, 16.097013473510742, -9.716649055480957, -1.9920448064804077, -3.364956855773926, -1.8719490766525269, 11.567351341247559, 3.6978795528411865, 11.258269309997559, 7.442364692687988, 9.183405876159668, 4.528151512145996, -1.2417811155319214, 4.395910263061523, 6.672768592834473, 5.889888763427734, 7.627115249633789, -0.6692016124725342, -11.889703750610352, -9.208883285522461, -7.427401542663574, -3.777655601501465, 6.917237758636475, -9.848749160766602, -2.094479560852051, -5.1351189613342285, 0.49564215540885925, 9.317541122436523, -5.9141845703125, -1.809845209121704, -0.11738205701112747, -7.169270992279053, -1.0578246116638184, -5.721685886383057, -5.117387294769287, 16.137670516967773, -4.473618984222412, 7.66243314743042, -0.5538089871406555, 9.631582260131836, -6.470466613769531, -8.54850959777832, 4.371622085571289, -0.7970349192619324, 4.479003429412842, -2.9758646488189697, 3.2721707820892334, 2.8382749557495117, 5.1345953941345215, -9.19078254699707, -0.5657423138618469, -4.874573230743408, 2.316561460494995, -5.984307289123535, -2.1798791885375977, 0.35541653633117676, -0.3178458511829376, 9.493547439575195, 2.114448070526123, 4.358088493347168, -12.089820861816406, 8.451695442199707, -7.925461769104004, 4.624246120452881, 4.428938388824463, 18.691999435424805, -2.620460033416748, -5.149182319641113, -0.3582168221473694, 8.488557815551758, 4.98148250579834, -9.326834678649902, -2.2544236183166504, 6.64176607131958, 1.2119656801223755, 10.977132797241211, 16.55504035949707, 3.323848247528076, 9.55185317993164, -1.6677050590515137, -0.7953923940658569, -8.605660438537598, -0.4735637903213501, 2.6741855144500732, -5.359188079833984, -2.6673784255981445, 0.6660736799240112, 15.443212509155273, 4.740597724914551, -3.4725306034088135, 11.592561721801758, -2.05450701713562, 1.7361239194869995, -8.26533031463623, -9.304476737976074, 5.406835079193115, -1.5180232524871826, -7.746610641479492, -6.089605331420898, 0.07112561166286469, -0.34904858469963074, -8.649889945983887, -9.998958587646484, -2.5648481845855713, -0.5399898886680603, 2.6018145084381104, -0.31927648186683655, -1.8815231323242188, -2.0721378326416016, -3.4105639457702637, -8.299802780151367, 1.4836379289627075, -15.366002082824707, -8.288193702697754, 3.884773015975952, -3.4876506328582764, 7.362995624542236, 0.4657321572303772, 3.1326000690460205, 12.438883781433105, -1.8337029218673706, 4.532927513122559, 2.726433277130127, 10.145345687866211, -6.521956920623779, 2.8971481323242188, -3.3925881385803223, 5.079156398773193, 7.759725093841553, 4.677562236785889, 5.8457818031311035, 2.4023921489715576, 7.707108974456787, 3.9711389541625977, -6.390035152435303, 6.126871109008789, -3.776031017303467, -11.118141174316406]}}
  [2022-08-01 09:01:27,094] [    INFO] - Response time 4.941739 s.
  ```

* Python API

  ``` python
  from paddlespeech.server.bin.paddlespeech_client import VectorClientExecutor
  import json

  vectorclient_executor = VectorClientExecutor()
  res = vectorclient_executor(
      input="85236145389.wav",
      server_ip="127.0.0.1",
      port=8090,
      task="spk")
  print(res.json())
  ```

  Output:

  ```text
  {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.4217487573623657, 5.626248836517334, -5.342073440551758, 1.177390217781067, 3.308061122894287, 1.7565997838974, 5.1678876876831055, 10.806346893310547, -3.822679042816162, -5.614130973815918, 2.6238481998443604, -0.8072965741157532, 1.963512659072876, -7.312864780426025, 0.011034967377781868, -9.723127365112305, 0.661963164806366, -6.976816654205322, 10.213465690612793, 7.494767189025879, 2.9105641841888428, 3.894925117492676, 3.7999846935272217, 7.106173992156982, 16.905324935913086, -7.149376392364502, 8.733112335205078, 3.423002004623413, -4.831653118133545, -11.403371810913086, 11.232216835021973, 7.127464771270752, -4.282831192016602, 2.4523589611053467, -5.13075065612793, -18.17765998840332, -2.611666440963745, -11.00034236907959, -6.731431007385254, 1.6564655303955078, 0.7618184685707092, 1.1253058910369873, -2.0838277339935303, 4.725739002227783, -8.782590866088867, -3.5398736000061035, 3.8142387866973877, 5.142062664031982, 2.162053346633911, 4.09642219543457, -6.416221618652344, 12.747454643249512, 1.9429889917373657, -15.152948379516602, 6.417416572570801, 16.097013473510742, -9.716649055480957, -1.9920448064804077, -3.364956855773926, -1.8719490766525269, 11.567351341247559, 3.6978795528411865, 11.258269309997559, 7.442364692687988, 9.183405876159668, 4.528151512145996, -1.2417811155319214, 4.395910263061523, 6.672768592834473, 5.889888763427734, 7.627115249633789, -0.6692016124725342, -11.889703750610352, -9.208883285522461, -7.427401542663574, -3.777655601501465, 6.917237758636475, -9.848749160766602, -2.094479560852051, -5.1351189613342285, 0.49564215540885925, 9.317541122436523, -5.9141845703125, -1.809845209121704, -0.11738205701112747, -7.169270992279053, -1.0578246116638184, -5.721685886383057, -5.117387294769287, 16.137670516967773, -4.473618984222412, 7.66243314743042, -0.5538089871406555, 9.631582260131836, -6.470466613769531, -8.54850959777832, 4.371622085571289, -0.7970349192619324, 4.479003429412842, -2.9758646488189697, 3.2721707820892334, 2.8382749557495117, 5.1345953941345215, -9.19078254699707, -0.5657423138618469, -4.874573230743408, 2.316561460494995, -5.984307289123535, -2.1798791885375977, 0.35541653633117676, -0.3178458511829376, 9.493547439575195, 2.114448070526123, 4.358088493347168, -12.089820861816406, 8.451695442199707, -7.925461769104004, 4.624246120452881, 4.428938388824463, 18.691999435424805, -2.620460033416748, -5.149182319641113, -0.3582168221473694, 8.488557815551758, 4.98148250579834, -9.326834678649902, -2.2544236183166504, 6.64176607131958, 1.2119656801223755, 10.977132797241211, 16.55504035949707, 3.323848247528076, 9.55185317993164, -1.6677050590515137, -0.7953923940658569, -8.605660438537598, -0.4735637903213501, 2.6741855144500732, -5.359188079833984, -2.6673784255981445, 0.6660736799240112, 15.443212509155273, 4.740597724914551, -3.4725306034088135, 11.592561721801758, -2.05450701713562, 1.7361239194869995, -8.26533031463623, -9.304476737976074, 5.406835079193115, -1.5180232524871826, -7.746610641479492, -6.089605331420898, 0.07112561166286469, -0.34904858469963074, -8.649889945983887, -9.998958587646484, -2.5648481845855713, -0.5399898886680603, 2.6018145084381104, -0.31927648186683655, -1.8815231323242188, -2.0721378326416016, -3.4105639457702637, -8.299802780151367, 1.4836379289627075, -15.366002082824707, -8.288193702697754, 3.884773015975952, -3.4876506328582764, 7.362995624542236, 0.4657321572303772, 3.1326000690460205, 12.438883781433105, -1.8337029218673706, 4.532927513122559, 2.726433277130127, 10.145345687866211, -6.521956920623779, 2.8971481323242188, -3.3925881385803223, 5.079156398773193, 7.759725093841553, 4.677562236785889, 5.8457818031311035, 2.4023921489715576, 7.707108974456787, 3.9711389541625977, -6.390035152435303, 6.126871109008789, -3.776031017303467, -11.118141174316406]}}
  ```

#### 7.2 Get the score between speaker audio embedding

**Note:** The response time will be slightly longer when using the client for the first time

- Command Line (Recommended)

  If `127.0.0.1` is not accessible, you need to use the actual service IP address.

  ``` bash
  paddlespeech_client vector --task score  --server_ip 127.0.0.1 --port 8090 --enroll 85236145389.wav --test 123456789.wav
  ```

  Usage:

  ``` bash
  paddlespeech_client vector --help
  ```

  Arguments:
    * server_ip: server ip. Default: 127.0.0.1
    * port: server port. Default: 8090
    * input(required): Input text to generate.
    * task: the task of vector, can be use 'spk' or 'score。If get the score, this must be 'score' parameter.
    * enroll: enroll audio
    * test: test audio
  
  Output:

  ```text
  [2022-08-01 09:04:42,275] [    INFO] - vector score http client start
  [2022-08-01 09:04:42,275] [    INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav
  [2022-08-01 09:04:42,275] [    INFO] - endpoint: http://127.0.0.1:8090/paddlespeech/vector/score
  [2022-08-01 09:04:44,611] [    INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}}
  [2022-08-01 09:04:44,611] [    INFO] - Response time 2.336258 s.
  ```

* Python API

  ``` python 
  from paddlespeech.server.bin.paddlespeech_client import VectorClientExecutor
  import json

  vectorclient_executor = VectorClientExecutor()
  res = vectorclient_executor(
      input=None,
      enroll_audio="85236145389.wav",
      test_audio="123456789.wav",
      server_ip="127.0.0.1",
      port=8090,
      task="score")
  print(res.json())
  ```

  Output:

  ```text
  {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}}
  ```

### 8. Punctuation prediction
  
**Note:** The response time will be slightly longer when using the client for the first time

- Command Line (Recommended)

  If `127.0.0.1` is not accessible, you need to use the actual service IP address.

   ``` bash
   paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input "我认为跑步最重要的就是给我带来了身体健康"
   ```

  Usage:
  
  ```bash
  paddlespeech_client text --help
  ```
  Arguments:
  - `server_ip`: server ip. Default: 127.0.0.1
  - `port`: server port. Default: 8090
  - `input`(required): Input text to get punctuation.

  Output:
  ```text
  [2022-05-09 18:19:04,397] [    INFO] - The punc text: 我认为跑步最重要的就是给我带来了身体健康。
  [2022-05-09 18:19:04,397] [    INFO] - Response time 0.092407 s.
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import TextClientExecutor

  textclient_executor = TextClientExecutor()
  res = textclient_executor(
      input="我认为跑步最重要的就是给我带来了身体健康",
      server_ip="127.0.0.1",
      port=8090,)
  print(res)
  ```

  Output:
  ```text
  我认为跑步最重要的就是给我带来了身体健康。
  ```

## Models supported by the service
### ASR model
Get all models supported by the ASR service via `paddlespeech_server stats --task asr`, where static models can be used for paddle inference inference.

### TTS model
Get all models supported by the TTS service via `paddlespeech_server stats --task tts`, where static models can be used for paddle inference inference.

### CLS model
Get all models supported by the CLS service via `paddlespeech_server stats --task cls`, where static models can be used for paddle inference inference.

### Vector model
Get all models supported by the TTS service via `paddlespeech_server stats --task vector`, where static models can be used for paddle inference inference.

### Text model
Get all models supported by the CLS service via `paddlespeech_server stats --task text`, where static models can be used for paddle inference inference.


================================================
FILE: demos/speech_server/README_cn.md
================================================
(简体中文|[English](./README.md))

# 语音服务

## 介绍

这个 demo 是一个启动离线语音服务和访问服务的实现。它可以通过使用 `paddlespeech_server` 和 `paddlespeech_client` 的单个命令或 python 的几行代码来实现。


服务接口定义请参考:
- [PaddleSpeech Server RESTful API](https://github.com/PaddlePaddle/PaddleSpeech/wiki/PaddleSpeech-Server-RESTful-API)

## 使用方法
### 1. 安装
请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

推荐使用 **paddlepaddle 2.4rc** 或以上版本。

你可以从简单，中等，困难 几种方式中选择一种方式安装 PaddleSpeech。

**如果使用简单模式安装，需要自行准备 yaml 文件，可参考 conf 目录下的 yaml 文件。**

### 2. 准备配置文件
配置文件可参见 `conf/application.yaml` 。
其中，`engine_list` 表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。

目前服务集成的语音任务有： asr (语音识别)、tts (语音合成)、cls (音频分类)、vector (声纹识别)以及 text (文本处理)。

目前引擎类型支持两种形式：python 及 inference (Paddle Inference)
**注意：** 如果在容器里可正常启动服务，但客户端访问 ip 不可达，可尝试将配置文件中 `host` 地址换成本地 ip 地址。

### 3. 服务端使用方法
- 命令行 (推荐使用)

  ```bash
  # 启动服务
  paddlespeech_server start --config_file ./conf/application.yaml
  ```

  > **注意：** 中英文混合语音识别请使用  `./conf/conformer_talcs_application.yaml` 配置文件

  使用方法：
  
  ```bash
  paddlespeech_server start --help
  ```
  参数:
  - `config_file`: 服务的配置文件，默认： ./conf/application.yaml
  - `log_file`: log 文件. 默认：./log/paddlespeech.log

  输出:
  ```text
  [2022-02-23 11:17:32] [INFO] [server.py:64] Started server process [6384]
  INFO:     Waiting for application startup.
  [2022-02-23 11:17:32] [INFO] [on.py:26] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-02-23 11:17:32] [INFO] [on.py:38] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  [2022-02-23 11:17:32] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_server import ServerExecutor

  server_executor = ServerExecutor()
  server_executor(
      config_file="./conf/application.yaml", 
      log_file="./log/paddlespeech.log")
  ```

  输出：
  ```text
  INFO:     Started server process [529]
  [2022-02-23 14:57:56] [INFO] [server.py:64] Started server process [529]
  INFO:     Waiting for application startup.
  [2022-02-23 14:57:56] [INFO] [on.py:26] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-02-23 14:57:56] [INFO] [on.py:38] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  [2022-02-23 14:57:56] [INFO] [server.py:204] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  ```


### 4. ASR 客户端使用方法

ASR 客户端的输入是一个 WAV 文件（`.wav`），并且采样率必须与模型的采样率相同。

可以下载 ASR 客户端的示例音频：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/ch_zh_mix.wav
```

**注意：** 初次使用客户端时响应时间会略长
- 命令行 (推荐使用)

  若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

  ```bash
  paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
  
  # 中英文混合语音识别 , 请使用  `./conf/conformer_talcs_application.yaml` 配置文件
  paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./ch_zh_mix.wav
  ```

  使用帮助:

  ```bash
  paddlespeech_client asr --help
  ```

  参数:
  - `server_ip`: 服务端 ip 地址，默认: 127.0.0.1。
  - `port`: 服务端口，默认: 8090。
  - `input`(必须输入): 用于识别的音频文件。
  - `sample_rate`: 音频采样率，默认值：16000。
  - `lang`: 模型语言，默认值：zh_cn。
  - `audio_format`: 音频格式，默认值：wav。

  输出:
  ```text
  [2022-08-01 07:54:01,646] [    INFO] - ASR result: 我认为跑步最重要的就是给我带来了身体健康
  [2022-08-01 07:54:01,646] [    INFO] - Response time 4.898965 s.
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import ASRClientExecutor

  asrclient_executor = ASRClientExecutor()
  res = asrclient_executor(
      input="./zh.wav",
      server_ip="127.0.0.1",
      port=8090,
      sample_rate=16000,
      lang="zh_cn",
      audio_format="wav")
  print(res)
  ```

  输出:
  ```text
  我认为跑步最重要的就是给我带来了身体健康
  ```
 
### 5. TTS 客户端使用方法
**注意：** 初次使用客户端时响应时间会略长
- 命令行 (推荐使用)
  
  若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

  ```bash
  paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
  ```
  使用帮助:

  ```bash
  paddlespeech_client tts --help
  ```

  参数:
  - `server_ip`: 服务端ip地址，默认: 127.0.0.1。
  - `port`: 服务端口，默认: 8090。
  - `input`(必须输入): 待合成的文本。
  - `spk_id`: 说话人 id，用于多说话人语音合成，默认值： 0。
  - `speed`: 音频速度，该值应设置在 0 到 3 之间。 默认值：1.0
  - `volume`: 音频音量，该值应设置在 0 到 3 之间。 默认值： 1.0
  - `sample_rate`: 采样率，可选 [0, 8000, 16000]，默认与模型相同。 默认值：0
  - `output`: 输出音频的路径， 默认值：None，表示不保存音频到本地。

  输出:
  ```text
  [2022-02-23 15:20:37,875] [    INFO] - Save synthesized audio successfully on output.wav.
  [2022-02-23 15:20:37,875] [    INFO] - Audio duration: 3.612500 s.
  [2022-02-23 15:20:37,875] [    INFO] - Response time: 0.348050 s.
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import TTSClientExecutor
  import json

  ttsclient_executor = TTSClientExecutor()
  res = ttsclient_executor(
      input="您好，欢迎使用百度飞桨语音合成服务。",
      server_ip="127.0.0.1",
      port=8090,
      spk_id=0,
      speed=1.0,
      volume=1.0,
      sample_rate=0,
      output="./output.wav")

  response_dict = res.json()
  print(response_dict["message"])
  print("Save synthesized audio successfully on %s." % (response_dict['result']['save_path']))
  print("Audio duration: %f s." %(response_dict['result']['duration']))
  ```

  输出:
  ```text
  {'description': 'success.'}
  Save synthesized audio successfully on ./output.wav.
  Audio duration: 3.612500 s.
  ```

### 6. CLS 客户端使用方法

可以下载 CLS 客户端的示例音频：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
```

**注意：** 初次使用客户端时响应时间会略长

- 命令行 (推荐使用)

  若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

  ```bash
  paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
  ```

  使用帮助:
  
  ```bash
  paddlespeech_client cls --help
  ```
  参数:
  - `server_ip`: 服务端 ip 地址，默认: 127.0.0.1。
  - `port`: 服务端口，默认: 8090。
  - `input`(必须输入): 用于分类的音频文件。
  - `topk`: 分类结果的topk。

  输出:
  ```text
  [2022-03-09 20:44:39,974] [    INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}}
  [2022-03-09 20:44:39,975] [    INFO] - Response time 0.104360 s.
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import CLSClientExecutor
  import json

  clsclient_executor = CLSClientExecutor()
  res = clsclient_executor(
      input="./zh.wav",
      server_ip="127.0.0.1",
      port=8090,
      topk=1)
  print(res.json())
  ```

  输出:
  ```text
  {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'topk': 1, 'results': [{'class_name': 'Speech', 'prob': 0.9027184844017029}]}}
  ```

### 7. 声纹客户端使用方法

可以下载声纹客户端的示例音频：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav
```

#### 7.1 提取声纹特征
**注意：** 初次使用客户端时响应时间会略长
* 命令行 (推荐使用)

  若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

  ```bash
  paddlespeech_client vector --task spk  --server_ip 127.0.0.1 --port 8090 --input 85236145389.wav
  ```

  使用帮助:

  ``` bash
  paddlespeech_client vector --help
  ```
  参数:
  * server_ip: 服务端ip地址，默认: 127.0.0.1。
  * port: 服务端口，默认: 8090。
  * input(必须输入): 用于识别的音频文件。
  * task: vector 的任务，可选spk或者score。默认是 spk。
  * enroll: 注册音频；。
  * test: 测试音频。

  输出:
  ```text
  [2022-08-01 09:01:22,151] [    INFO] - vector http client start
  [2022-08-01 09:01:22,152] [    INFO] - the input audio: 85236145389.wav
  [2022-08-01 09:01:22,152] [    INFO] - endpoint: http://127.0.0.1:8090/paddlespeech/vector
  [2022-08-01 09:01:27,093] [    INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.4217487573623657, 5.626248836517334, -5.342073440551758, 1.177390217781067, 3.308061122894287, 1.7565997838974, 5.1678876876831055, 10.806346893310547, -3.822679042816162, -5.614130973815918, 2.6238481998443604, -0.8072965741157532, 1.963512659072876, -7.312864780426025, 0.011034967377781868, -9.723127365112305, 0.661963164806366, -6.976816654205322, 10.213465690612793, 7.494767189025879, 2.9105641841888428, 3.894925117492676, 3.7999846935272217, 7.106173992156982, 16.905324935913086, -7.149376392364502, 8.733112335205078, 3.423002004623413, -4.831653118133545, -11.403371810913086, 11.232216835021973, 7.127464771270752, -4.282831192016602, 2.4523589611053467, -5.13075065612793, -18.17765998840332, -2.611666440963745, -11.00034236907959, -6.731431007385254, 1.6564655303955078, 0.7618184685707092, 1.1253058910369873, -2.0838277339935303, 4.725739002227783, -8.782590866088867, -3.5398736000061035, 3.8142387866973877, 5.142062664031982, 2.162053346633911, 4.09642219543457, -6.416221618652344, 12.747454643249512, 1.9429889917373657, -15.152948379516602, 6.417416572570801, 16.097013473510742, -9.716649055480957, -1.9920448064804077, -3.364956855773926, -1.8719490766525269, 11.567351341247559, 3.6978795528411865, 11.258269309997559, 7.442364692687988, 9.183405876159668, 4.528151512145996, -1.2417811155319214, 4.395910263061523, 6.672768592834473, 5.889888763427734, 7.627115249633789, -0.6692016124725342, -11.889703750610352, -9.208883285522461, -7.427401542663574, -3.777655601501465, 6.917237758636475, -9.848749160766602, -2.094479560852051, -5.1351189613342285, 0.49564215540885925, 9.317541122436523, -5.9141845703125, -1.809845209121704, -0.11738205701112747, -7.169270992279053, -1.0578246116638184, -5.721685886383057, -5.117387294769287, 16.137670516967773, -4.473618984222412, 7.66243314743042, -0.5538089871406555, 9.631582260131836, -6.470466613769531, -8.54850959777832, 4.371622085571289, -0.7970349192619324, 4.479003429412842, -2.9758646488189697, 3.2721707820892334, 2.8382749557495117, 5.1345953941345215, -9.19078254699707, -0.5657423138618469, -4.874573230743408, 2.316561460494995, -5.984307289123535, -2.1798791885375977, 0.35541653633117676, -0.3178458511829376, 9.493547439575195, 2.114448070526123, 4.358088493347168, -12.089820861816406, 8.451695442199707, -7.925461769104004, 4.624246120452881, 4.428938388824463, 18.691999435424805, -2.620460033416748, -5.149182319641113, -0.3582168221473694, 8.488557815551758, 4.98148250579834, -9.326834678649902, -2.2544236183166504, 6.64176607131958, 1.2119656801223755, 10.977132797241211, 16.55504035949707, 3.323848247528076, 9.55185317993164, -1.6677050590515137, -0.7953923940658569, -8.605660438537598, -0.4735637903213501, 2.6741855144500732, -5.359188079833984, -2.6673784255981445, 0.6660736799240112, 15.443212509155273, 4.740597724914551, -3.4725306034088135, 11.592561721801758, -2.05450701713562, 1.7361239194869995, -8.26533031463623, -9.304476737976074, 5.406835079193115, -1.5180232524871826, -7.746610641479492, -6.089605331420898, 0.07112561166286469, -0.34904858469963074, -8.649889945983887, -9.998958587646484, -2.5648481845855713, -0.5399898886680603, 2.6018145084381104, -0.31927648186683655, -1.8815231323242188, -2.0721378326416016, -3.4105639457702637, -8.299802780151367, 1.4836379289627075, -15.366002082824707, -8.288193702697754, 3.884773015975952, -3.4876506328582764, 7.362995624542236, 0.4657321572303772, 3.1326000690460205, 12.438883781433105, -1.8337029218673706, 4.532927513122559, 2.726433277130127, 10.145345687866211, -6.521956920623779, 2.8971481323242188, -3.3925881385803223, 5.079156398773193, 7.759725093841553, 4.677562236785889, 5.8457818031311035, 2.4023921489715576, 7.707108974456787, 3.9711389541625977, -6.390035152435303, 6.126871109008789, -3.776031017303467, -11.118141174316406]}}
  [2022-08-01 09:01:27,094] [    INFO] - Response time 4.941739 s.
  ```

* Python API

  ``` python
  from paddlespeech.server.bin.paddlespeech_client import VectorClientExecutor
  import json

  vectorclient_executor = VectorClientExecutor()
  res = vectorclient_executor(
      input="85236145389.wav",
      server_ip="127.0.0.1",
      port=8090,
      task="spk")
  print(res.json())
  ```

  输出:
  ```text
  {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'vec': [1.4217487573623657, 5.626248836517334, -5.342073440551758, 1.177390217781067, 3.308061122894287, 1.7565997838974, 5.1678876876831055, 10.806346893310547, -3.822679042816162, -5.614130973815918, 2.6238481998443604, -0.8072965741157532, 1.963512659072876, -7.312864780426025, 0.011034967377781868, -9.723127365112305, 0.661963164806366, -6.976816654205322, 10.213465690612793, 7.494767189025879, 2.9105641841888428, 3.894925117492676, 3.7999846935272217, 7.106173992156982, 16.905324935913086, -7.149376392364502, 8.733112335205078, 3.423002004623413, -4.831653118133545, -11.403371810913086, 11.232216835021973, 7.127464771270752, -4.282831192016602, 2.4523589611053467, -5.13075065612793, -18.17765998840332, -2.611666440963745, -11.00034236907959, -6.731431007385254, 1.6564655303955078, 0.7618184685707092, 1.1253058910369873, -2.0838277339935303, 4.725739002227783, -8.782590866088867, -3.5398736000061035, 3.8142387866973877, 5.142062664031982, 2.162053346633911, 4.09642219543457, -6.416221618652344, 12.747454643249512, 1.9429889917373657, -15.152948379516602, 6.417416572570801, 16.097013473510742, -9.716649055480957, -1.9920448064804077, -3.364956855773926, -1.8719490766525269, 11.567351341247559, 3.6978795528411865, 11.258269309997559, 7.442364692687988, 9.183405876159668, 4.528151512145996, -1.2417811155319214, 4.395910263061523, 6.672768592834473, 5.889888763427734, 7.627115249633789, -0.6692016124725342, -11.889703750610352, -9.208883285522461, -7.427401542663574, -3.777655601501465, 6.917237758636475, -9.848749160766602, -2.094479560852051, -5.1351189613342285, 0.49564215540885925, 9.317541122436523, -5.9141845703125, -1.809845209121704, -0.11738205701112747, -7.169270992279053, -1.0578246116638184, -5.721685886383057, -5.117387294769287, 16.137670516967773, -4.473618984222412, 7.66243314743042, -0.5538089871406555, 9.631582260131836, -6.470466613769531, -8.54850959777832, 4.371622085571289, -0.7970349192619324, 4.479003429412842, -2.9758646488189697, 3.2721707820892334, 2.8382749557495117, 5.1345953941345215, -9.19078254699707, -0.5657423138618469, -4.874573230743408, 2.316561460494995, -5.984307289123535, -2.1798791885375977, 0.35541653633117676, -0.3178458511829376, 9.493547439575195, 2.114448070526123, 4.358088493347168, -12.089820861816406, 8.451695442199707, -7.925461769104004, 4.624246120452881, 4.428938388824463, 18.691999435424805, -2.620460033416748, -5.149182319641113, -0.3582168221473694, 8.488557815551758, 4.98148250579834, -9.326834678649902, -2.2544236183166504, 6.64176607131958, 1.2119656801223755, 10.977132797241211, 16.55504035949707, 3.323848247528076, 9.55185317993164, -1.6677050590515137, -0.7953923940658569, -8.605660438537598, -0.4735637903213501, 2.6741855144500732, -5.359188079833984, -2.6673784255981445, 0.6660736799240112, 15.443212509155273, 4.740597724914551, -3.4725306034088135, 11.592561721801758, -2.05450701713562, 1.7361239194869995, -8.26533031463623, -9.304476737976074, 5.406835079193115, -1.5180232524871826, -7.746610641479492, -6.089605331420898, 0.07112561166286469, -0.34904858469963074, -8.649889945983887, -9.998958587646484, -2.5648481845855713, -0.5399898886680603, 2.6018145084381104, -0.31927648186683655, -1.8815231323242188, -2.0721378326416016, -3.4105639457702637, -8.299802780151367, 1.4836379289627075, -15.366002082824707, -8.288193702697754, 3.884773015975952, -3.4876506328582764, 7.362995624542236, 0.4657321572303772, 3.1326000690460205, 12.438883781433105, -1.8337029218673706, 4.532927513122559, 2.726433277130127, 10.145345687866211, -6.521956920623779, 2.8971481323242188, -3.3925881385803223, 5.079156398773193, 7.759725093841553, 4.677562236785889, 5.8457818031311035, 2.4023921489715576, 7.707108974456787, 3.9711389541625977, -6.390035152435303, 6.126871109008789, -3.776031017303467, -11.118141174316406]}}
  ```

#### 7.2 音频声纹打分

**注意：** 初次使用客户端时响应时间会略长
* 命令行 (推荐使用)

  若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

  ``` bash
  paddlespeech_client vector --task score  --server_ip 127.0.0.1 --port 8090 --enroll 85236145389.wav --test 123456789.wav
  ```

  使用帮助:

  ``` bash
  paddlespeech_client vector --help
  ```

  参数:
  * server_ip: 服务端ip地址，默认: 127.0.0.1。
  * port: 服务端口，默认: 8090。
  * input(必须输入): 用于识别的音频文件。
  * task: vector 的任务，可选spk或者score。默认是 spk。
  * enroll: 注册音频；。
  * test: 测试音频。

  输出:
  ```text
  [2022-08-01 09:04:42,275] [    INFO] - vector score http client start
  [2022-08-01 09:04:42,275] [    INFO] - enroll audio: 85236145389.wav, test audio: 123456789.wav
  [2022-08-01 09:04:42,275] [    INFO] - endpoint: http://127.0.0.1:8090/paddlespeech/vector/score
  [2022-08-01 09:04:44,611] [    INFO] - {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}}
  [2022-08-01 09:04:44,611] [    INFO] - Response time 2.336258 s.
  ```

* Python API

  ```python 
  from paddlespeech.server.bin.paddlespeech_client import VectorClientExecutor
  import json

  vectorclient_executor = VectorClientExecutor()
  res = vectorclient_executor(
      input=None,
      enroll_audio="85236145389.wav",
      test_audio="123456789.wav",
      server_ip="127.0.0.1",
      port=8090,
      task="score")
  print(res.json())
  ```

  输出:
  ```text
  {'success': True, 'code': 200, 'message': {'description': 'success'}, 'result': {'score': 0.4292638897895813}}
  ```

### 8. 标点预测
  
  **注意：** 初次使用客户端时响应时间会略长
- 命令行 (推荐使用)

  若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

  ``` bash
  paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input "我认为跑步最重要的就是给我带来了身体健康"
  ```

  使用帮助:
  
  ```bash
  paddlespeech_client text --help
  ```
  参数:
  - `server_ip`: 服务端ip地址，默认: 127.0.0.1。
  - `port`: 服务端口，默认: 8090。
  - `input`(必须输入): 用于标点预测的文本内容。

  输出:
  ```text
  [2022-05-09 18:19:04,397] [    INFO] - The punc text: 我认为跑步最重要的就是给我带来了身体健康。
  [2022-05-09 18:19:04,397] [    INFO] - Response time 0.092407 s.
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import TextClientExecutor

  textclient_executor = TextClientExecutor()
  res = textclient_executor(
      input="我认为跑步最重要的就是给我带来了身体健康",
      server_ip="127.0.0.1",
      port=8090,)
  print(res)
  ```

  输出:
  ```text
  我认为跑步最重要的就是给我带来了身体健康。
  ```

## 服务支持的模型
### ASR 支持的模型
通过 `paddlespeech_server stats --task asr` 获取 ASR 服务支持的所有模型，其中静态模型可用于 paddle inference 推理。 

### TTS 支持的模型
通过 `paddlespeech_server stats --task tts` 获取 TTS 服务支持的所有模型，其中静态模型可用于 paddle inference 推理。

### CLS 支持的模型
通过 `paddlespeech_server stats --task cls` 获取 CLS 服务支持的所有模型，其中静态模型可用于 paddle inference 推理。

### Vector 支持的模型
通过 `paddlespeech_server stats --task vector` 获取 Vector 服务支持的所有模型。

### Text支持的模型
通过 `paddlespeech_server stats --task text` 获取 Text 服务支持的所有模型。


================================================
FILE: demos/speech_server/asr_client.sh
================================================
#!/bin/bash

wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav

# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input ./zh.wav


================================================
FILE: demos/speech_server/cls_client.sh
================================================
#!/bin/bash

wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav

# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input ./zh.wav --topk 1


================================================
FILE: demos/speech_server/conf/application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Offline Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference', 'cls_python', 'cls_inference', 'text_python', 'vector_python']
protocol: 'http'
engine_list: ['asr_python', 'tts_python', 'cls_python', 'text_python', 'vector_python']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: python #######################
asr_python:
    model: 'conformer_wenetspeech'
    lang: 'zh'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    decode_method: 'attention_rescoring'
    force_yes: True
    device:  # set 'gpu:id' or 'cpu'

################### speech task: asr; engine_type: inference #######################
asr_inference:
    # model_type choices=['deepspeech2offline_aishell']
    model_type: 'deepspeech2offline_aishell'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    force_yes: True

    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config


################################### TTS #########################################
################### speech task: tts; engine_type: python #######################
tts_python:
    # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc',
    #                             'fastspeech2_ljspeech', 'fastspeech2_aishell3',
    #                             'fastspeech2_vctk', 'fastspeech2_mix',
    #                             'tacotron2_csmsc', 'tacotron2_ljspeech']
    am: 'fastspeech2_csmsc'   
    am_config: 
    am_ckpt: 
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 


    # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3',
    #                        'pwgan_vctk', 'mb_melgan_csmsc', 'style_melgan_csmsc',
    #                        'hifigan_csmsc', 'hifigan_ljspeech', 'hifigan_aishell3',
    #                        'hifigan_vctk', 'wavernn_csmsc']
    voc: 'mb_melgan_csmsc'
    voc_config: 
    voc_ckpt: 
    voc_stat: 

    # others
    lang: 'zh'
    device:  # set 'gpu:id' or 'cpu'


################### speech task: tts; engine_type: inference #######################
tts_inference:
    # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc']
    am: 'fastspeech2_csmsc'   
    am_model: # the pdmodel file of your am static model (XX.pdmodel)
    am_params: # the pdiparams file of your am static model (XX.pdipparams)
    am_sample_rate: 24000
    phones_dict: 
    tones_dict: 
    speaker_dict: 


    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False # True -> print glog
        summary: True  # False -> do not show predictor config

    # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc']
    voc: 'mb_melgan_csmsc'
    voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel)
    voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams)
    voc_sample_rate: 24000

    voc_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'  
        switch_ir_optim: True  
        glog_info: False # True -> print glog
        summary: True  # False -> do not show predictor config

    # others
    lang: 'zh'


################################### CLS #########################################
################### speech task: cls; engine_type: python #######################
cls_python:
    # model choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
    model: 'panns_cnn14'
    cfg_path: # [optional] Config of cls task.
    ckpt_path: # [optional] Checkpoint file of model.
    label_file: # [optional] Label file of cls task.
    device:  # set 'gpu:id' or 'cpu'


################### speech task: cls; engine_type: inference #######################
cls_inference:
    # model_type choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
    model_type: 'panns_cnn14' 
    cfg_path: 
    model_path:  # the pdmodel file of am static model [optional]
    params_path:  # the pdiparams file of am static model [optional]
    label_file:  # [optional] Label file of cls task.

    predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config


################################### Text #########################################
################### text task: punc; engine_type: python #######################
text_python:
    task: punc
    model_type: 'ernie_linear_p3_wudao'
    lang: 'zh'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    vocab_file: # [optional]
    device:  # set 'gpu:id' or 'cpu'


################################### Vector ######################################
################### Vector task: spk; engine_type: python #######################
vector_python:
    task: spk
    model_type: 'ecapatdnn_voxceleb12'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    device:  # set 'gpu:id' or 'cpu'


================================================
FILE: demos/speech_server/conf/conformer_talcs_application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Offline Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference', 'cls_python', 'cls_inference', 'text_python', 'vector_python']
protocol: 'http'
engine_list: ['asr_python', 'tts_python', 'cls_python', 'text_python', 'vector_python']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: python #######################
asr_python:
    model: 'conformer_talcs'
    lang: 'zh_en'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    decode_method: 'attention_rescoring'
    force_yes: True
    codeswitch: True
    device:  # set 'gpu:id' or 'cpu'

################### speech task: asr; engine_type: inference #######################
asr_inference:
    # model_type choices=['deepspeech2offline_aishell']
    model_type: 'deepspeech2offline_aishell'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    force_yes: True

    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config


################################### TTS #########################################
################### speech task: tts; engine_type: python #######################
tts_python:
    # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc',
    #                             'fastspeech2_ljspeech', 'fastspeech2_aishell3',
    #                             'fastspeech2_vctk', 'fastspeech2_mix',
    #                             'tacotron2_csmsc', 'tacotron2_ljspeech']
    am: 'fastspeech2_csmsc'   
    am_config: 
    am_ckpt: 
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 


    # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3',
    #                        'pwgan_vctk', 'mb_melgan_csmsc', 'style_melgan_csmsc',
    #                        'hifigan_csmsc', 'hifigan_ljspeech', 'hifigan_aishell3',
    #                        'hifigan_vctk', 'wavernn_csmsc']
    voc: 'mb_melgan_csmsc'
    voc_config: 
    voc_ckpt: 
    voc_stat: 

    # others
    lang: 'zh'
    device:  # set 'gpu:id' or 'cpu'


################### speech task: tts; engine_type: inference #######################
tts_inference:
    # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc']
    am: 'fastspeech2_csmsc'   
    am_model: # the pdmodel file of your am static model (XX.pdmodel)
    am_params: # the pdiparams file of your am static model (XX.pdipparams)
    am_sample_rate: 24000
    phones_dict: 
    tones_dict: 
    speaker_dict: 


    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False # True -> print glog
        summary: True  # False -> do not show predictor config

    # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc']
    voc: 'mb_melgan_csmsc'
    voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel)
    voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams)
    voc_sample_rate: 24000

    voc_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'  
        switch_ir_optim: True  
        glog_info: False # True -> print glog
        summary: True  # False -> do not show predictor config

    # others
    lang: 'zh'


################################### CLS #########################################
################### speech task: cls; engine_type: python #######################
cls_python:
    # model choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
    model: 'panns_cnn14'
    cfg_path: # [optional] Config of cls task.
    ckpt_path: # [optional] Checkpoint file of model.
    label_file: # [optional] Label file of cls task.
    device:  # set 'gpu:id' or 'cpu'


################### speech task: cls; engine_type: inference #######################
cls_inference:
    # model_type choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
    model_type: 'panns_cnn14' 
    cfg_path: 
    model_path:  # the pdmodel file of am static model [optional]
    params_path:  # the pdiparams file of am static model [optional]
    label_file:  # [optional] Label file of cls task.

    predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config


################################### Text #########################################
################### text task: punc; engine_type: python #######################
text_python:
    task: punc
    model_type: 'ernie_linear_p3_wudao'
    lang: 'zh'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    vocab_file: # [optional]
    device:  # set 'gpu:id' or 'cpu'


################################### Vector ######################################
################### Vector task: spk; engine_type: python #######################
vector_python:
    task: spk
    model_type: 'ecapatdnn_voxceleb12'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    device:  # set 'gpu:id' or 'cpu'


================================================
FILE: demos/speech_server/server.sh
================================================
#!/bin/bash

paddlespeech_server start --config_file ./conf/application.yaml &> server.log &


================================================
FILE: demos/speech_server/sid_client.sh
================================================
#!/bin/bash

wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/85236145389.wav
wget -c https://paddlespeech.cdn.bcebos.com/vector/audio/123456789.wav

# sid extract
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task spk --input ./85236145389.wav

# sid score
paddlespeech_client vector --server_ip 127.0.0.1 --port 8090 --task score --enroll ./85236145389.wav --test ./123456789.wav


================================================
FILE: demos/speech_server/start_multi_progress_server.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import warnings

import uvicorn
from fastapi import FastAPI
from starlette.middleware.cors import CORSMiddleware

from paddlespeech.server.engine.engine_pool import init_engine_pool
from paddlespeech.server.restful.api import setup_router as setup_http_router
from paddlespeech.server.utils.config import get_config
from paddlespeech.server.ws.api import setup_router as setup_ws_router
warnings.filterwarnings("ignore")
import sys

app = FastAPI(
    title="PaddleSpeech Serving API", description="Api", version="0.0.1")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"])

# change yaml file here
config_file = "./conf/application.yaml"
config = get_config(config_file)

# init engine
if not init_engine_pool(config):
    print("Failed to init engine.")
    sys.exit(-1)

# get api_router
api_list = list(engine.split("_")[0] for engine in config.engine_list)
if config.protocol == "websocket":
    api_router = setup_ws_router(api_list)
elif config.protocol == "http":
    api_router = setup_http_router(api_list)
else:
    raise Exception("unsupported protocol")
    sys.exit(-1)

# app needs to operate outside the main function 
app.include_router(api_router)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(add_help=True)
    parser.add_argument(
        "--workers", type=int, help="workers of server", default=1)
    args = parser.parse_args()

    uvicorn.run(
        "start_multi_progress_server:app",
        host=config.host,
        port=config.port,
        debug=True,
        workers=args.workers)


================================================
FILE: demos/speech_server/text_client.sh
================================================
#!/bin/bash


paddlespeech_client text --server_ip 127.0.0.1 --port 8090 --input 今天的天气真好啊你下午有空吗我想约你一起去吃饭


================================================
FILE: demos/speech_server/tts_client.sh
================================================
#!/bin/bash

# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav


================================================
FILE: demos/speech_ssl/README.md
================================================
([简体中文](./README_cn.md)|English)
# Speech SSL (Self-Supervised Learning)

## Introduction
Speech SSL, or Self-Supervised Learning, refers to a training method on the large-scale unlabeled speech dataset. The model trained in this way can produce a good acoustic representation, and can be applied to other downstream speech tasks by fine-tuning on labeled datasets.

This demo is an implementation to recognize text or produce the acoustic representation from a specific audio file by speech ssl models. It can be done by a single command or a few lines in python using `PaddleSpeech`. 

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input File
The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

Here are sample files for this demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
```

### 3. Usage
- Command Line(Recommended)
  ```bash
  # to recognize text 
  paddlespeech ssl --task asr --lang en --input ./en.wav

  # to get acoustic representation
  paddlespeech ssl --task vector --lang en --input ./en.wav
  ```

  Usage:
  ```bash
  paddlespeech ssl --help
  ```
  Arguments:
  - `input`(required): Audio file to recognize.
  - `model`: Model type of asr task. Default: `wav2vec2`, choices: [wav2vec2, hubert, wavlm].
  - `task`: Output type. Default: `asr`.
  - `lang`: Model language. Default: `en`.
  - `sample_rate`: Sample rate of the model. Default: `16000`.
  - `config`: Config of asr task. Use pretrained model when it is None. Default: `None`.
  - `ckpt_path`: Model checkpoint. Use pretrained model when it is None. Default: `None`.
  - `yes`: No additional parameters required. Once set this parameter, it means accepting the request of the program by default, which includes transforming the audio sample rate. Default: `False`.
  - `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.
  - `verbose`: Show the log information.


- Python API
  ```python
  import paddle
  from paddlespeech.cli.ssl import SSLExecutor

  ssl_executor = SSLExecutor()

  # to recognize text 
  text = ssl_executor(
      model='wav2vec2',
      task='asr',
      lang='en',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./en.wav',
      device=paddle.get_device())
  print('ASR Result: \n{}'.format(text))

  # to get acoustic representation
  feature = ssl_executor(
      model='wav2vec2',
      task='vector',
      lang='en',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./en.wav',
      device=paddle.get_device())
  print('Representation: \n{}'.format(feature))
  ```

  Output:
  ```bash
  ASR Result:
  i knocked at the door on the ancient side of the building

  Representation:
  Tensor(shape=[1, 164, 1024], dtype=float32, place=Place(gpu:0), stop_gradient=True,
       [[[ 0.02351918, -0.12980647,  0.17868176, ...,  0.10118122,
          -0.04614586,  0.17853957],
         [ 0.02361383, -0.12978461,  0.17870593, ...,  0.10103855,
          -0.04638699,  0.17855372],
         [ 0.02345137, -0.12982975,  0.17883906, ...,  0.10104341,
          -0.04643029,  0.17856732],
         ...,
         [ 0.02313030, -0.12918393,  0.17845058, ...,  0.10073373,
          -0.04701405,  0.17862988],
         [ 0.02176583, -0.12929161,  0.17797582, ...,  0.10097728,
          -0.04687393,  0.17864393],
         [ 0.05269200,  0.01297141, -0.23336855, ..., -0.11257174,
          -0.17227529,  0.20338398]]])
  ```


================================================
FILE: demos/speech_ssl/README_cn.md
================================================
(简体中文|[English](./README.md))

# 语音自监督学习
## 介绍
语音自监督学习，指的是在大规模无标记的语音数据集上的训练方法。用这种方法训练出来的模型可以产生很好的声学表征。并且可以通过在有标签的数据集上进行微调，应用于其他下游的语音任务。

这个 demo 是通过语音自监督模型将一个特定的音频文件识别成文本或产生声学表征，它可以通过使用 `PaddleSpeech` 的单个命令或 python 中的几行代码来实现。

## 使用方法
### 1. 安装
请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从 easy，medium，hard 三中方式中选择一种方式安装。

### 2. 准备输入
这个 demo 的输入应该是一个 WAV 文件（`.wav`），并且采样率必须与模型的采样率相同。

可以下载此 demo 的示例音频：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
```
### 3. 使用方法
- 命令行 (推荐使用)
  ```bash

  # 识别文本
  paddlespeech ssl --task asr --lang en --input ./en.wav

  # 产生声学表征
  paddlespeech ssl --task vector --lang en --input ./en.wav
  ```
  
  使用方法：
  ```bash
  paddlespeech asr --help
  ```
  参数：
  - `input`(必须输入)：用于识别的音频文件。
  - `model`：ASR 任务的模型，默认值：`wav2vec2`, 可选项：[wav2vec2, hubert, wavlm]。
  - `task`：输出类别，默认值：`asr`。
  - `lang`：模型语言，默认值：`en`。
  - `sample_rate`：音频采样率，默认值：`16000`。
  - `config`：ASR 任务的参数文件，若不设置则使用预训练模型中的默认配置，默认值：`None`。
  - `ckpt_path`：模型参数文件，若不设置则下载预训练模型使用，默认值：`None`。
  - `yes`；不需要设置额外的参数，一旦设置了该参数，说明你默认同意程序的所有请求，其中包括自动转换输入音频的采样率。默认值：`False`。
  - `device`：执行预测的设备，默认值：当前系统下 paddlepaddle 的默认 device。
  - `verbose`: 如果使用，显示 logger 信息。


- Python API
  ```python
  import paddle
  from paddlespeech.cli.ssl import SSLExecutor

  ssl_executor = SSLExecutor()

  # 识别文本
  text = ssl_executor(
      model='wav2vec2',
      task='asr',
      lang='en',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./en.wav',
      device=paddle.get_device())
  print('ASR Result: \n{}'.format(text))

  # 得到声学表征
  feature = ssl_executor(
      model='wav2vec2',
      task='vector',
      lang='en',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./en.wav',
      device=paddle.get_device())
  print('Representation: \n{}'.format(feature))
  ```


  输出：
  ```bash
  ASR Result:
  i knocked at the door on the ancient side of the building
  
  Representation:
  Tensor(shape=[1, 164, 1024], dtype=float32, place=Place(gpu:0), stop_gradient=True,
       [[[ 0.02351918, -0.12980647,  0.17868176, ...,  0.10118122,
          -0.04614586,  0.17853957],
         [ 0.02361383, -0.12978461,  0.17870593, ...,  0.10103855,
          -0.04638699,  0.17855372],
         [ 0.02345137, -0.12982975,  0.17883906, ...,  0.10104341,
          -0.04643029,  0.17856732],
         ...,
         [ 0.02313030, -0.12918393,  0.17845058, ...,  0.10073373,
          -0.04701405,  0.17862988],
         [ 0.02176583, -0.12929161,  0.17797582, ...,  0.10097728,
          -0.04687393,  0.17864393],
         [ 0.05269200,  0.01297141, -0.23336855, ..., -0.11257174,
          -0.17227529,  0.20338398]]])
  ```


================================================
FILE: demos/speech_ssl/run.sh
================================================
#!/bin/bash

# audio download
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav

# to recognize text 
paddlespeech ssl --task asr --lang en --input ./en.wav

# to get acoustic representation
paddlespeech ssl --task vector --lang en --input ./en.wav


================================================
FILE: demos/speech_translation/README.md
================================================
([简体中文](./README_cn.md)|English)
# Speech Translation
## Introduction
Speech translation is the process by which conversational spoken phrases are instantly translated and spoken aloud in a second language.

This demo is an implementation to recognize text from a specific audio file and translate it to the target language. It can be done by a single command or a few lines in python using `PaddleSpeech`. 

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, medium and hard to install paddlespeech.


### 2. Prepare Input File
The input of this demo should be a WAV file(`.wav`).

Here are sample files for this demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
```

### 3. Usage (not support for Windows now)
- Command Line(Recommended)
  ```bash
  paddlespeech st --input ./en.wav
  ```
  Usage:
  ```bash
  paddlespeech st --help
  ```
  Arguments:
  - `input`(required): Audio file to recognize and translate.
  - `model`: Model type of st task. Default: `fat_st_ted`.
  - `src_lang`: Source language. Default: `en`.
  - `tgt_lang`: Target language. Default: `zh`.
  - `sample_rate`: Sample rate of the model. Default: `16000`.
  - `config`: Config of st task. Use pretrained model when it is None. Default: `None`.
  - `ckpt_path`: Model checkpoint. Use pretrained model when it is None. Default: `None`.
  - `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.

  Output:
  ```bash
  [2021-12-09 11:13:03,178] [    INFO] [utils.py] [L225] - ST Result: ['我 在 这栋 建筑 的 古老 门上 敲门 。']
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.st import STExecutor

  st_executor = STExecutor()
  text = st_executor(
      model='fat_st_ted',
      src_lang='en',
      tgt_lang='zh',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./en.wav',
      device=paddle.get_device())
  print('ST Result: \n{}'.format(text))
  ```

  Output:
  ```bash
  ST Result:
  ['我 在 这栋 建筑 的 古老 门上 敲门 。'] 
  ```

### 4.Pretrained Models
Here is a list of pretrained models released by PaddleSpeech that can be used by command and python API:

| Model | Source Language | Target Language
| :--- | :---: | :---: |
| fat_st_ted| en| zh


================================================
FILE: demos/speech_translation/README_cn.md
================================================
(简体中文|[English](./README.md))
# 语音翻译

## 介绍
语音翻译是将会话口语短语翻译成另一语言的过程。

该 Demo 是从特定音频文件中识别文本并将其翻译为目标语言的实现。它可以通过使用 `PaddleSpeech` 的单个命令或 python 中的几行代码来实现。

## 使用方法
### 1. 安装
请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从 easy，medium，hard 三中方式中选择一种方式安装。

### 2. 准备输入
这个 Demo 的输入是 WAV(`.wav`) 语音文件

这里给出一些样例文件供 Demo 使用：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
```

### 3. 使用方法 (暂不支持Windows)
- 命令行(推荐使用)
  ```bash
  paddlespeech st --input ./en.wav
  ```
  使用方法：
  ```bash
  paddlespeech st --help
  ```
  参数：
  - `input`(必须输入)：用于翻译的音频。
  - `model`： 语音翻译的模型类型. 默认：`fat_st_ted`。
  - `src_lang`： 源语言. 默认：`en`。
  - `tgt_lang`： 目标语言. 默认：`zh`。
  - `sample_rate`：输入音频的采样率. 默认：`16000`。
  - `config`：语音翻译任务的配置文件. 如果没有默认使用预训练模型的配置文件. 默认：`None`。
  - `ckpt_path`：模型文件. 如果没有默认使用预训练模型. 默认：`None`。
  - `device`：选择执行的设备. 默认： 当前环境 paddlepaddle 的默认设备。

  输出：
  ```bash
  [2021-12-09 11:13:03,178] [    INFO] [utils.py] [L225] - ST Result: ['我 在 这栋 建筑 的 古老 门上 敲门 。']
  ```

- Python API
  ```python
  import paddle
  from paddlespeech.cli.st import STExecutor
  
  st_executor = STExecutor()
  text = st_executor(
      model='fat_st_ted',
      src_lang='en',
      tgt_lang='zh',
      sample_rate=16000,
      config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
      ckpt_path=None,
      audio_file='./en.wav',
      device=paddle.get_device())
  print('ST Result: \n{}'.format(text))
  ```

  输出：
  ```bash
  ST Result:
  ['我 在 这栋 建筑 的 古老 门上 敲门 。'] 
  ```

### 4. 预训练模型

以下是 PaddleSpeech 提供的可以被命令行和 python API 使用的预训练模型列表：

| 模型 | 源语言 | 目标语言
| :--- | :---: | :---: |
| fat_st_ted| en| zh


================================================
FILE: demos/speech_translation/run.sh
================================================
#!/bin/bash

wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav
paddlespeech st --input ./en.wav


================================================
FILE: demos/speech_web/.gitignore
================================================
*/.vscode/*
*.wav
*/resource/*
.Ds*
*.pyc
*.pcm
*.npy
*.diff
*.sqlite
*/static/*
*.pdparams
*.pdiparams*
*.pdmodel
*/source/*
*/PaddleSpeech/*
*/tmp*/*
*/duration.txt
*/oov_info.txt


================================================
FILE: demos/speech_web/API.md
================================================
# 接口文档

开启服务后可参照：

http://0.0.0.0:8010/docs

## ASR

### 【POST】/asr/offline

说明：上传 16k, 16bit wav 文件，返回 offline 语音识别模型识别结果

返回: JSON

前端接口： ASR-端到端识别，音频文件识别；语音指令-录音上传

示例:

```json
{
  "code": 0,
  "result": "你也喜欢这个天气吗",
  "message": "ok"
}
```

### 【POST】/asr/offlinefile

说明：上传16k,16bit wav文件，返回 offline 语音识别模型识别结果 + wav 数据的 base64

返回: JSON

前端接口： 音频文件识别(播放这段base64还原后记得添加 wav 头，采样率 16k, int16，添加后才能播放)

示例:

```json
{
  "code": 0,
  "result": {
    "asr_result": "今天天气真好",
    "wav_base64": "///+//3//f/8/////v/////////////////+/wAA//8AAAEAAQACAAIAAQABAP"
    },
  "message": "ok"
}
```


### 【POST】/asr/collectEnv

说明： 通过采集环境噪音，上传 16k, int16 wav 文件，来生成后台 VAD 的能量阈值， 返回阈值结果

前端接口：ASR-环境采样

返回: JSON

```json
{
  "code": 0,
  "result": 3624.93505859375,
  "message": "采集环境噪音成功"
}
```

### 【GET】/asr/stopRecord

说明：通过 GET 请求 /asr/stopRecord, 后台停止接收 offlineStream 中通过 WS 协议 上传的数据

前端接口：语音聊天-暂停录音（获取 NLP，播放 TTS 时暂停）

返回: JSON

```JSON
{
  "code": 0,
  "result": null,
  "message": "停止成功"
}
```

### 【GET】/asr/resumeRecord

说明：通过 GET 请求 /asr/resumeRecord, 后台停止接收 offlineStream 中通过 WS 协议 上传的数据

前端接口：语音聊天-恢复录音（ TTS 播放完毕时，告诉后台恢复录音）

返回: JSON

```JSON
{
  "code": 0,
  "result": null,
  "message": "Online录音恢复"
}
```

### 【Websocket】/ws/asr/offlineStream

说明：通过 WS 协议，将前端音频持续上传到后台，前端采集 16k，Int16 类型的PCM片段，持续上传到后端

前端接口：语音聊天-开始录音，持续将麦克风语音传给后端，后端推送语音识别结果

返回：后端返回识别结果，offline 模型识别结果， 由WS推送


### 【Websocket】/ws/asr/onlineStream

说明：通过 WS 协议，将前端音频持续上传到后台，前端采集 16k，Int16 类型的 PCM 片段，持续上传到后端

前端接口：ASR-流式识别开始录音，持续将麦克风语音传给后端，后端推送语音识别结果

返回：后端返回识别结果，online 模型识别结果， 由 WS 推送

## NLP

### 【POST】/nlp/chat

说明：返回闲聊对话的结果

前端接口：语音聊天-获取到ASR识别结果后，向后端获取闲聊文本

上传示例：

```json
{
  "chat": "天气非常棒"
}
```

返回示例：

```json
{
  "code": 0,
  "result": "是的,我也挺喜欢的",
  "message": "ok"
}
```


### 【POST】/nlp/ie

说明：返回信息抽取结果

前端接口：语音指令-向后端获取信息抽取结果

上传示例：

```json
{
  "chat": "今天我从马来西亚出发去香港花了五十万元"
}
```

返回示例：

```json
{
  "code": 0,
  "result": [
    {
      "时间": [
        {
          "text": "今天",
          "start": 0,
          "end": 2,
          "probability": 0.9817976247505698
        }
      ],
      "出发地": [
        {
          "text": "马来西亚",
          "start": 4,
          "end": 8,
          "probability": 0.974892389414169
        }
      ],
      "目的地": [
        {
          "text": "马来西亚",
          "start": 4,
          "end": 8,
          "probability": 0.7347504438136951
        }
      ],
      "费用": [
        {
          "text": "五十万元",
          "start": 15,
          "end": 19,
          "probability": 0.9679076530644402
        }
      ]
    }
  ],
  "message": "ok"
}
```


## TTS

### 【POST】/tts/offline

说明：获取 TTS 离线模型音频

前端接口：TTS-端到端合成

上传示例：

```json
{
  "text": "天气非常棒"
}
```

返回示例：对应音频对应的 base64 编码

```json
{
  "code": 0,
  "result": "UklGRrzQAABXQVZFZm10IBAAAAABAAEAwF0AAIC7AAACABAAZGF0YZjQAAADAP7/BAADAAAA...",
  "message": "ok"
}
```

### 【POST】/tts/online

说明：流式获取语音合成音频

前端接口：流式合成

上传示例：
```json
{
  "text": "天气非常棒"
}

```

返回示例：

二进制PCM片段，16k Int 16类型

## VPR

### 【POST】/vpr/enroll

说明：声纹注册，通过表单上传 spk_id（字符串，非空）, 与 audio (文件)

前端接口：声纹识别-声纹注册

上传示例：

```text
curl -X 'POST' \
  'http://0.0.0.0:8010/vpr/enroll' \
  -H 'accept: application/json' \
  -H 'Content-Type: multipart/form-data' \
  -F 'spk_id=啦啦啦啦' \
  -F 'audio=@demo_16k.wav;type=audio/wav'
```

返回示例：

```json
{
  "status": true,
  "msg": "Successfully enroll data!"
}
```

### 【POST】/vpr/recog

说明：声纹识别，识别文件，提取文件的声纹信息做比对 音频 16k, int 16 wav 格式

前端接口：声纹识别-上传音频，返回声纹识别结果

上传示例： 

```shell
curl -X 'POST' \
  'http://0.0.0.0:8010/vpr/recog' \
  -H 'accept: application/json' \
  -H 'Content-Type: multipart/form-data' \
  -F 'audio=@demo_16k.wav;type=audio/wav'
```

返回示例：

```json
[
  [
    "啦啦啦啦",
    [
      "",
      100
    ]
  ],
  [
    "test1",
    [
      "",
      11.64
    ]
  ],
  [
    "test2",
    [
      "",
      6.09
    ]
  ]
]

```


### 【POST】/vpr/del

说明： 根据 spk_id 删除用户数据

前端接口：声纹识别-删除用户数据

上传示例：
```json
{
 "spk_id":"啦啦啦啦"
}
```

返回示例

```json
{
  "status": true,
  "msg": "Successfully delete data!"
}

```


### 【GET】/vpr/list

说明：查询用户列表数据，无需参数，返回 spk_id 与 vpr_id

前端接口：声纹识别-获取声纹数据列表

返回示例：

```json
[
  [
    "test1",
    "test2"
  ],
  [
    9,
    10
  ]
]

```


### 【GET】/vpr/data

说明： 根据 vpr_id 获取用户vpr时使用的音频

前端接口：声纹识别-获取vpr对应的音频

访问示例：

```shell
curl -X 'GET' \
  'http://0.0.0.0:8010/vpr/data?vprId=9' \
  -H 'accept: application/json'
```

返回示例：

对应音频文件

### 【GET】/vpr/database64

说明： 根据 vpr_id 获取用户 vpr 时注册使用音频转换成 16k, int16 类型的数组，返回 base64 编码

前端接口：声纹识别-获取 vpr 对应的音频（注意：播放时需要添加 wav头，16k,int16, 可参考 tts 播放时添加 wav 的方式，注意更改采样率）

访问示例：

```shell
curl -X 'GET' \
  'http://localhost:8010/vpr/database64?vprId=12' \
  -H 'accept: application/json'
```

返回示例：
```json
{
  "code": 0,
  "result":"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA",
  "message": "ok"
```


================================================
FILE: demos/speech_web/README.md
================================================
# Paddle Speech Demo

## 简介
Paddle Speech Demo 是一个以 PaddleSpeech 的语音交互功能为主体开发的 Demo 展示项目，用于帮助大家更好的上手 PaddleSpeech 以及使用 PaddleSpeech 构建自己的应用。

智能语音交互部分使用 PaddleSpeech，对话以及信息抽取部分使用 PaddleNLP，网页前端展示部分基于 Vue3 进行开发。

主要功能：

`main.py` 中包含功能
+ 语音聊天：PaddleSpeech 的语音识别能力+语音合成能力，对话部分基于 PaddleNLP 的闲聊功能
+ 声纹识别：PaddleSpeech 的声纹识别功能展示
+ 语音识别：支持【实时语音识别】，【端到端识别】，【音频文件识别】三种模式
+ 语音合成：支持【流式合成】与【端到端合成】两种方式
+ 语音指令：基于 PaddleSpeech 的语音识别能力与 PaddleNLP 的信息抽取，实现交通费的智能报销

`vc.py` 中包含功能
+ 一句话合成：基于 GE2E 和 ECAPA-TDNN 模型的一句话合成方案，可以模仿输入的音频的音色进行合成任务
  + GE2E 音色克隆方案可以参考： [【FastSpeech2 + AISHELL-3 Voice Cloning】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc1)
  + ECAPA-TDNN 音色克隆方案可以参考: [【FastSpeech2 + AISHELL-3 Voice Cloning (ECAPA-TDNN)】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc2)

+ 小数据微调：基于小数据集的微调方案，内置用12句话标贝中文女声微调示例，你也可以通过一键重置，录制自己的声音，注意在安静环境下录制，效果会更好。你可以在 [【Finetune your own AM based on FastSpeech2 with AISHELL-3】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/tts_finetune/tts3)中尝试使用自己的数据集进行微调。

+ ERNIE-SAT：语言-语音跨模态大模型 ERNIE-SAT 可视化展示示例，支持个性化合成，跨语言语音合成（音频为中文则输入英文文本进行合成），语音编辑（修改音频文字中间的结果）功能。 ERNIE-SAT 更多实现细节，可以参考：
  + [【ERNIE-SAT with AISHELL-3 dataset】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/ernie_sat)
  + [【ERNIE-SAT with AISHELL3 and VCTK datasets】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3_vctk/ernie_sat)
  + [【ERNIE-SAT with VCTK dataset】](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/ernie_sat)

运行效果：

 ![效果](https://user-images.githubusercontent.com/30135920/196076507-7eb33d39-2345-4268-aee7-6270b9ac8b98.png)


## 基础环境安装

### 后端环境安装
```bash 
# 需要先安装 PaddleSpeech
cd speech_server
pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
cd ../
```

### 前端环境安装
前端依赖 `node.js` ，需要提前安装，确保 `npm` 可用，`npm` 测试版本 `8.3.1`，建议下载[官网](https://nodejs.org/en/)稳定版的 `node.js`

如果因为网络问题，无法下载依赖库，可以参考 FAQ 部分，`npm / yarn 下载速度慢问题`

```bash
# 进入前端目录
cd web_client
# 安装 `yarn`，已经安装可跳过
npm install -g yarn
# 使用yarn安装前端依赖
yarn install
cd ../
```


## 启动服务
【注意】目前只支持 `main.py` 和 `vc.py` 两者中选择开启一个后端服务。

### 启动 `main.py` 后端服务

#### 下载相关模型

只需手动下载语音指令所需模型即可，其他模型会自动下载。

```bash
cd speech_server
mkdir -p source/model
cd source/model
# 下载IE模型
wget https://bj.bcebos.com/paddlenlp/applications/speech-cmd-analysis/finetune/model_state.pdparams
cd ../../../

```
#### 启动后端服务

```
cd speech_server
# 默认8010端口
python main.py --port 8010
```


### 启动 `vc.py` 后端服务

参照下面的步骤自行配置项目所需环境。

Aistudio 在线体验小样本合成后端功能：[【PaddleSpeech进阶】PaddleSpeech小样本合成方案体验](https://aistudio.baidu.com/aistudio/projectdetail/4573549?sUid=2470186&shared=1&ts=1664174385948)

#### 下载相关模型和音频

```bash
cd speech_server

# 已创建则跳过
mkdir -p source/model
cd source
# 下载 & 解压 wav （包含VC测试音频）
wget https://paddlespeech.cdn.bcebos.com/demos/speech_web/wav_vc.zip
unzip wav_vc.zip

cd model
# 下载 GE2E 相关模型
wget https://bj.bcebos.com/paddlespeech/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip
unzip ge2e_ckpt_0.3.zip
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip
unzip pwg_aishell3_ckpt_0.5.zip
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip
unzip fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip

# 下载 ECAPA-TDNN 相关模型
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_vc2_1.2.0.zip
unzip fastspeech2_aishell3_ckpt_vc2_1.2.0.zip

# 下载 ERNIE-SAT 相关模型
# aishell3 ERNIE-SAT
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_ckpt_1.2.0.zip
unzip erniesat_aishell3_ckpt_1.2.0.zip

# vctk ERNIE-SAT
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_vctk_ckpt_1.2.0.zip
unzip erniesat_vctk_ckpt_1.2.0.zip

# aishell3_vctk ERNIE-SAT
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_vctk_ckpt_1.2.0.zip
unzip erniesat_aishell3_vctk_ckpt_1.2.0.zip

# 下载 finetune 相关模型
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip
unzip fastspeech2_aishell3_ckpt_1.1.0.zip

# 下载声码器
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip
unzip hifigan_aishell3_ckpt_0.2.0.zip
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip
unzip hifigan_vctk_ckpt_0.2.0.zip

cd ../../../
```

#### ERNIE-SAT 环境配置

ERNIE-SAT 体验依赖于 [examples/aishell3_vctk/ernie_sat](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3_vctk/ernie_sat) 的环境。参考 `examples/aishell3_vctk/ernie_sat` 下的 `README.md`， 确保 `examples/aishell3_vctk/ernie_sat` 下 `run.sh` 相关示例代码有效。
 
运行好 `examples/aishell3_vctk/ernie_sat` 后，回到当前目录，创建环境：
```bash
cd speech_server
ln -snf ../../../examples/aishell3_vctk/ernie_sat/download .
ln -snf ../../../examples/aishell3_vctk/ernie_sat/tools .
cd ../
```

#### finetune 环境配置

`finetune` 需要解压 `tools/aligner` 中的 `aishell3_model.zip`，finetune 过程需要使用到 `tools/aligner/aishell3_model/meta.yaml` 文件。

```bash
cd speech_server/tools/aligner
unzip aishell3_model.zip
cd -
```

#### 启动后端服务

```
cd speech_server
# 默认8010端口
python vc.py --port 8010
```

### 启动前端服务

```
cd web_client
yarn dev --port 8011
```

默认配置下，前端配置的后台地址信息是 `localhost`，确保后端服务器和打开页面的游览器在同一台机器上，不在一台机器的配置方式见下方的 FAQ：【后端如果部署在其它机器或者别的端口如何修改】

#### 关于前端的一些说明

为了方便后期的维护，这里并没有给出打包好的 HTML 文件，而是 Vue3 的项目，使用 `yarn dev --port 8011` 的方式启动测试，方便大家debug，相当于是启动了一个前端服务器。

比如我们在本机启动的这个前端服务（运行 `yarn dev --port 8011` ），我们就可以通过在游览器中通过 `http://localhost:8011` 访问前端页面

如果我们在其它服务器上（例如：`*.*.*.*` ）启动这个前端服务（运行 `yarn dev --port 8011` ），我们就可以通过在游览器中访问 `http://*.*.*.*:8011` 访问前端页面

那前端跟后端是什么关系呢？ 两个是独立的，只要前端能够通过代理访问到后端的接口，那就没有问题。你可以在 A 机器上部署后端服务，然后在 B 机器上部署前端服务。我们在 `./web_client/vite.config.js` 中将 `/api` 映射到的是 `http://localhost:8010`，你可以把它配置成任意你想要访问后端地址。

当前端在以 `*.*.*.*` 这类以 IP 地址形式的网页中访问时，由于游览器的安全限制，会禁止录音，需要重新配置游览器的安全策略， 可以看下面 FAQ 部分： [【前端以IP地址的形式访问，无法录音】]


## FAQ 

#### Q: 如何安装node.js

A： node.js的安装可以参考[【菜鸟教程】](https://www.runoob.com/nodejs/nodejs-install-setup.html), 确保 npm 可用

#### Q：后端如果部署在其它机器或者别的端口如何修改

A：后端的配置地址有分散在两个文件中

修改第一个文件 `./web_client/vite.config.js`

```
server: {
    host: "0.0.0.0",
    proxy: {
      "/api": {
        target: "http://localhost:8010",  // 这里改成后端所在接口
        changeOrigin: true,
        rewrite: (path) => path.replace(/^\/api/, ""),
      },
    },
  }
```

修改第二个文件 `./web_client/src/api/API.js`（ Websocket 代理配置失败，所以需要在这个文件中修改）

```
// websocket （这里改成后端所在的接口）
CHAT_SOCKET_RECORD: 'ws://localhost:8010/ws/asr/offlineStream', // ChatBot websocket 接口
ASR_SOCKET_RECORD: 'ws://localhost:8010/ws/asr/onlineStream',  // Stream ASR 接口
TTS_SOCKET_RECORD: 'ws://localhost:8010/ws/tts/online', // Stream TTS 接口
```

#### Q：前端以IP地址的形式访问，无法录音

A：这里主要是游览器安全策略的限制，需要配置游览器后重启。游览器修改配置可参考[使用js-audio-recorder报浏览器不支持getUserMedia](https://blog.csdn.net/YRY_LIKE_YOU/article/details/113745273)

chrome设置地址: chrome://flags/#unsafely-treat-insecure-origin-as-secure

#### Q: npm / yarn 配置淘宝镜像源

A: 配置淘宝镜像源，详细可以参考 [【yarn npm 设置淘宝镜像】](https://www.jianshu.com/p/f6f43e8f9d6b)

```bash
# npm 配置淘宝镜像源
npm config set registry https://registry.npmmirror.com

# yarn 配置淘宝镜像源
yarn config set registry http://registry.npm.taobao.org/
```

## 参考资料

vue实现录音参考资料：https://blog.csdn.net/qq_41619796/article/details/107865602#t1

前端流式播放音频参考仓库：

https://github.com/AnthumChris/fetch-stream-audio

https://bm.enthuses.me/buffered.php?bref=6677


================================================
FILE: demos/speech_web/speech_server/conf/tts3_finetune.yaml
================================================
###########################################################
#                 PARAS SETTING               #
###########################################################
# Set to -1 to indicate that the parameter is the same as the pretrained model configuration

batch_size: 10
learning_rate: 0.0001     # learning rate
num_snapshots: -1

# frozen_layers should be a list
# if you don't need to freeze, set frozen_layers to []
frozen_layers: ["encoder"]


================================================
FILE: demos/speech_web/speech_server/conf/tts_online_application.yaml
================================================
# This is the parameter configuration file for streaming tts server.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8092

# The task format in the engin_list is: <speech task>_<engine type>
# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
# protocol choices = ['websocket', 'http'] 
protocol: 'http'
engine_list: ['tts_online-onnx']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online: 
    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']   
    # fastspeech2_cnndecoder_csmsc support streaming am infer.     
    am: 'fastspeech2_csmsc'   
    am_config: 
    am_ckpt: 
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 
    spk_id: 0

    # voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
    # Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
    voc: 'mb_melgan_csmsc'
    voc_config: 
    voc_ckpt: 
    voc_stat: 

    # others
    lang: 'zh'
    device: 'cpu' # set 'gpu:id' or 'cpu'
    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
    am_block: 72
    am_pad: 12
    # voc_pad and voc_block voc model to streaming voc infer,
    # when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
    # when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
    voc_block: 36
    voc_pad: 14
    

#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx: 
    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
    # fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.        
    am: 'fastspeech2_cnndecoder_csmsc_onnx' 
    # am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
    # if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
    am_ckpt:   # list
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 
    spk_id: 0
    am_sample_rate: 24000
    am_sess_conf:
        device: "cpu" # set 'gpu:id' or 'cpu'
        use_trt: False
        cpu_threads: 4

    # voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
    # Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
    voc: 'hifigan_csmsc_onnx'
    voc_ckpt: 
    voc_sample_rate: 24000
    voc_sess_conf:
        device: "cpu" # set 'gpu:id' or 'cpu'
        use_trt: False
        cpu_threads: 4

    # others
    lang: 'zh'
    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
    am_block: 72
    am_pad: 12
    # voc_pad and voc_block voc model to streaming voc infer,
    # when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
    # when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
    voc_block: 36
    voc_pad: 14
    # voc_upsample should be same as n_shift on voc config.
    voc_upsample: 300
    

================================================
FILE: demos/speech_web/speech_server/conf/ws_conformer_wenetspeech_application_faster.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
    model_type: 'conformer_online_wenetspeech'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    decode_method: "attention_rescoring"
    continuous_decoding: True # enable continue decoding when endpoint detected
    num_decoding_left_chunks: 16
    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2


================================================
FILE: demos/speech_web/speech_server/main.py
================================================
import argparse
import base64
import datetime
import json
import os
from typing import List

import aiofiles
import librosa
import soundfile as sf
import uvicorn
from fastapi import FastAPI
from fastapi import File
from fastapi import Form
from fastapi import UploadFile
from fastapi import WebSocket
from fastapi import WebSocketDisconnect
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from src.AudioManeger import AudioMannger
from src.robot import Robot
from src.SpeechBase.vpr import VPR
from src.util import *
from src.WebsocketManeger import ConnectionManager
from starlette.middleware.cors import CORSMiddleware
from starlette.requests import Request
from starlette.responses import FileResponse
from starlette.websockets import WebSocketState as WebSocketState

from paddlespeech.cli.tts.infer import TTSExecutor
from paddlespeech.server.engine.asr.online.python.asr_engine import PaddleASRConnectionHanddler
from paddlespeech.server.utils.audio_process import float2pcm

# 解析配置
parser = argparse.ArgumentParser(prog='PaddleSpeechDemo', add_help=True)

parser.add_argument(
    "--port",
    action="store",
    type=int,
    help="port of the app",
    default=8010,
    required=False)

args = parser.parse_args()
port = args.port

# 配置文件
tts_config = "conf/tts_online_application.yaml"
asr_config = "conf/ws_conformer_wenetspeech_application_faster.yaml"
asr_init_path = "source/demo/demo.wav"
db_path = "source/db/vpr.sqlite"
ie_model_path = "source/model"
tts_model = TTSExecutor()
# 路径配置
UPLOAD_PATH = "source/vpr"
WAV_PATH = "source/wav"

base_sources = [UPLOAD_PATH, WAV_PATH]
for path in base_sources:
    os.makedirs(path, exist_ok=True)

# 初始化
app = FastAPI()
chatbot = Robot(
    asr_config, tts_config, asr_init_path, ie_model_path=ie_model_path)
manager = ConnectionManager()
aumanager = AudioMannger(chatbot)
aumanager.init()
vpr = VPR(db_path, dim=192, top_k=5)
# 初始化下载模型
tts_model(
    text="今天天气准不错",
    output="test.wav",
    am='fastspeech2_mix',
    spk_id=174,
    voc='hifigan_csmsc',
    lang='mix', )


# 服务配置
class NlpBase(BaseModel):
    chat: str


class TtsBase(BaseModel):
    text: str


class Audios:
    def __init__(self) -> None:
        self.audios = b""


audios = Audios()

######################################################################
########################### ASR 服务 #################################
#####################################################################


# 接收文件，返回ASR结果
# 上传文件
@app.post("/asr/offline")
async def speech2textOffline(files: List[UploadFile]):
    # 只有第一个有效
    asr_res = ""
    for file in files[:1]:
        # 生成时间戳
        now_name = "asr_offline_" + datetime.datetime.strftime(
            datetime.datetime.now(), '%Y%m%d%H%M%S') + randName() + ".wav"
        out_file_path = os.path.join(WAV_PATH, now_name)
        async with aiofiles.open(out_file_path, 'wb') as out_file:
            content = await file.read()  # async read
            await out_file.write(content)  # async write

        # 返回ASR识别结果
        asr_res = chatbot.speech2text(out_file_path)
        return SuccessRequest(result=asr_res)
    return ErrorRequest(message="上传文件为空")


# 接收文件，同时将wav强制转成16k, int16类型
@app.post("/asr/offlinefile")
async def speech2textOfflineFile(files: List[UploadFile]):
    # 只有第一个有效
    asr_res = ""
    for file in files[:1]:
        # 生成时间戳
        now_name = "asr_offline_" + datetime.datetime.strftime(
            datetime.datetime.now(), '%Y%m%d%H%M%S') + randName() + ".wav"
        out_file_path = os.path.join(WAV_PATH, now_name)
        async with aiofiles.open(out_file_path, 'wb') as out_file:
            content = await file.read()  # async read
            await out_file.write(content)  # async write

        # 将文件转成16k, 16bit类型的wav文件
        wav, sr = librosa.load(out_file_path, sr=16000)
        wav = float2pcm(wav)  # float32 to int16
        wav_bytes = wav.tobytes()  # to bytes
        wav_base64 = base64.b64encode(wav_bytes).decode('utf8')

        # 将文件重新写入
        now_name = now_name[:-4] + "_16k" + ".wav"
        out_file_path = os.path.join(WAV_PATH, now_name)
        sf.write(out_file_path, wav, 16000)

        # 返回ASR识别结果
        asr_res = chatbot.speech2text(out_file_path)
        response_res = {"asr_result": asr_res, "wav_base64": wav_base64}
        return SuccessRequest(result=response_res)

    return ErrorRequest(message="上传文件为空")


# 流式接收测试
@app.post("/asr/online1")
async def speech2textOnlineRecive(files: List[UploadFile]):
    audio_bin = b''
    for file in files:
        content = await file.read()
        audio_bin += content
    audios.audios += audio_bin
    print(f"audios长度变化: {len(audios.audios)}")
    return SuccessRequest(message="接收成功")


# 采集环境噪音大小
@app.post("/asr/collectEnv")
async def collectEnv(files: List[UploadFile]):
    for file in files[:1]:
        content = await file.read()  # async read
        # 初始化, wav 前44字节是头部信息
        aumanager.compute_env_volume(content[44:])
        vad_ = aumanager.vad_threshold
        return SuccessRequest(result=vad_, message="采集环境噪音成功")


# 停止录音
@app.get("/asr/stopRecord")
async def stopRecord():
    audios.audios = b""
    aumanager.stop()
    print("Online录音暂停")
    return SuccessRequest(message="停止成功")


# 恢复录音
@app.get("/asr/resumeRecord")
async def resumeRecord():
    aumanager.resume()
    print("Online录音恢复")
    return SuccessRequest(message="Online录音恢复")


# 聊天用的 ASR
@app.websocket("/ws/asr/offlineStream")
async def websocket_endpoint(websocket: WebSocket):
    await manager.connect(websocket)
    try:
        while True:
            asr_res = None
            # websocket 不接收，只推送
            data = await websocket.receive_bytes()
            if not aumanager.is_pause:
                asr_res = aumanager.stream_asr(data)
            else:
                print("录音暂停")
            if asr_res:
                await manager.send_personal_message(asr_res, websocket)
                aumanager.clear_asr()

    except WebSocketDisconnect:
        manager.disconnect(websocket)
        # await manager.broadcast(f"用户-{user}-离开")
        # print(f"用户-{user}-离开")


    # 流式识别的 ASR
@app.websocket('/ws/asr/onlineStream')
async def websocket_endpoint_online(websocket: WebSocket):
    """PaddleSpeech Online ASR Server api

    Args:
        websocket (WebSocket): the websocket instance
    """

    #1. the interface wait to accept the websocket protocol header
    #   and only we receive the header, it establish the connection with specific thread
    await websocket.accept()

    #2. if we accept the websocket headers, we will get the online asr engine instance
    engine = chatbot.asr.engine

    #3. each websocket connection, we will create an PaddleASRConnectionHanddler to process such audio
    #   and each connection has its own connection instance to process the request
    #   and only if client send the start signal, we create the PaddleASRConnectionHanddler instance
    connection_handler = None

    try:
        #4. we do a loop to process the audio package by package according the protocol
        #   and only if the client send finished signal, we will break the loop
        while True:
            # careful here, changed the source code from starlette.websockets
            # 4.1 we wait for the client signal for the specific action
            assert websocket.application_state == WebSocketState.CONNECTED
            message = await websocket.receive()
            websocket._raise_on_disconnect(message)

            #4.2 text for the action command and bytes for pcm data
            if "text" in message:
                # we first parse the specific command
                message = json.loads(message["text"])
                if 'signal' not in message:
                    resp = {"status": "ok", "message": "no valid json data"}
                    await websocket.send_json(resp)

                # start command, we create the PaddleASRConnectionHanddler instance to process the audio data
                # end command, we process the all the last audio pcm and return the final result
                #              and we break the loop
                if message['signal'] == 'start':
                    resp = {"status": "ok", "signal": "server_ready"}
                    # do something at beginning here
                    # create the instance to process the audio
                    # connection_handler = chatbot.asr.connection_handler
                    connection_handler = PaddleASRConnectionHanddler(engine)
                    await websocket.send_json(resp)
                elif message['signal'] == 'end':
                    # reset single  engine for an new connection
                    # and we will destroy the connection
                    connection_handler.decode(is_finished=True)
                    connection_handler.rescoring()
                    asr_results = connection_handler.get_result()
                    connection_handler.reset()

                    resp = {
                        "status": "ok",
                        "signal": "finished",
                        'result': asr_results
                    }
                    await websocket.send_json(resp)
                    break
                else:
                    resp = {"status": "ok", "message": "no valid json data"}
                    await websocket.send_json(resp)
            elif "bytes" in message:
                # bytes for the pcm data
                message = message["bytes"]
                print("###############")
                print("len message: ", len(message))
                print("###############")

                # we extract the remained audio pcm 
                # and decode for the result in this package data
                connection_handler.extract_feat(message)
                connection_handler.decode(is_finished=False)
                asr_results = connection_handler.get_result()

                # return the current period result
                # if the engine create the vad instance, this connection will have many period results 
                resp = {'result': asr_results}
                print(resp)
                await websocket.send_json(resp)
    except WebSocketDisconnect:
        pass


######################################################################
########################### NLP 服务 #################################
#####################################################################


@app.post("/nlp/chat")
async def chatOffline(nlp_base: NlpBase):
    chat = nlp_base.chat
    if not chat:
        return ErrorRequest(message="传入文本为空")
    else:
        res = chatbot.chat(chat)
        return SuccessRequest(result=res)


@app.post("/nlp/ie")
async def ieOffline(nlp_base: NlpBase):
    nlp_text = nlp_base.chat
    if not nlp_text:
        return ErrorRequest(message="传入文本为空")
    else:
        res = chatbot.ie(nlp_text)
        return SuccessRequest(result=res)


######################################################################
########################### TTS 服务 #################################
#####################################################################


# 端到端合成
@app.post("/tts/offline")
async def text2speechOffline(tts_base: TtsBase):
    text = tts_base.text
    if not text:
        return ErrorRequest(message="文本为空")
    else:
        now_name = "tts_" + datetime.datetime.strftime(
            datetime.datetime.now(), '%Y%m%d%H%M%S') + randName() + ".wav"
        out_file_path = os.path.join(WAV_PATH, now_name)
        # 使用中英混合CLI
        tts_model(
            text=text,
            output=out_file_path,
            am='fastspeech2_mix',
            spk_id=174,
            voc='hifigan_csmsc',
            lang='mix')
        with open(out_file_path, "rb") as f:
            data_bin = f.read()
        base_str = base64.b64encode(data_bin)
        return SuccessRequest(result=base_str)


# http流式TTS
@app.post("/tts/online")
async def stream_tts(request_body: TtsBase):
    text = request_body.text
    return StreamingResponse(chatbot.text2speechStreamBytes(text=text))


# ws流式TTS
@app.websocket("/ws/tts/online")
async def stream_ttsWS(websocket: WebSocket):
    await manager.connect(websocket)
    try:
        while True:
            text = await websocket.receive_text()
            # 用 websocket 流式接收音频数据
            if text:
                for sub_wav in chatbot.text2speechStream(text=text):
                    # print("发送sub wav: ", len(sub_wav))
                    res = {"wav": sub_wav, "done": False}
                    await websocket.send_json(res)

                # 输送结束
                res = {"wav": sub_wav, "done": True}
                await websocket.send_json(res)
            # manager.disconnect(websocket)

    except WebSocketDisconnect:
        manager.disconnect(websocket)


######################################################################
########################### VPR 服务 #################################
#####################################################################

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"])


@app.post('/vpr/enroll')
async def vpr_enroll(table_name: str=None,
                     spk_id: str=Form(...),
                     audio: UploadFile=File(...)):
    # Enroll the uploaded audio with spk-id into MySQL
    try:
        if not spk_id:
            return {'status': False, 'msg': "spk_id can not be None"}
        # Save the upload data to server.
        content = await audio.read()
        now_name = "vpr_enroll_" + datetime.datetime.strftime(
            datetime.datetime.now(), '%Y%m%d%H%M%S') + randName() + ".wav"
        audio_path = os.path.join(UPLOAD_PATH, now_name)

        with open(audio_path, "wb+") as f:
            f.write(content)
        vpr.vpr_enroll(username=spk_id, wav_path=audio_path)
        return {'status': True, 'msg': "Successfully enroll data!"}
    except Exception as e:
        return {'status': False, 'msg': e}


@app.post('/vpr/recog')
async def vpr_recog(request: Request,
                    table_name: str=None,
                    audio: UploadFile=File(...)):
    # Voice print recognition online
    # try:
    # Save the upload data to server.
    content = await audio.read()
    now_name = "vpr_query_" + datetime.datetime.strftime(
        datetime.datetime.now(), '%Y%m%d%H%M%S') + randName() + ".wav"
    query_audio_path = os.path.join(UPLOAD_PATH, now_name)
    with open(query_audio_path, "wb+") as f:
        f.write(content)
    spk_ids, paths, scores = vpr.do_search_vpr(query_audio_path)

    res = dict(zip(spk_ids, zip(paths, scores)))
    # Sort results by distance metric, closest distances first
    res = sorted(res.items(), key=lambda item: item[1][1], reverse=True)
    return res


@app.post('/vpr/del')
async def vpr_del(spk_id: dict=None):
    # Delete a record by spk_id in MySQL
    try:
        spk_id = spk_id['spk_id']
        if not spk_id:
            return {'status': False, 'msg': "spk_id can not be None"}
        vpr.vpr_del(username=spk_id)
        return {'status': True, 'msg': "Successfully delete data!"}
    except Exception as e:
        return {'status': False, 'msg': e}, 400


@app.get('/vpr/list')
async def vpr_list():
    # Get all records in MySQL
    try:
        spk_ids, vpr_ids = vpr.do_list()
        return spk_ids, vpr_ids
    except Exception as e:
        return {'status': False, 'msg': e}, 400


@app.get('/vpr/database64')
async def vpr_database64(vprId: int):
    # Get the audio file from path by spk_id in MySQL
    try:
        if not vprId:
            return {'status': False, 'msg': "vpr_id can not be None"}
        audio_path = vpr.do_get_wav(vprId)
        # 返回base64

        # 将文件转成16k, 16bit类型的wav文件
        wav, sr = librosa.load(audio_path, sr=16000)
        wav = float2pcm(wav)  # float32 to int16
        wav_bytes = wav.tobytes()  # to bytes
        wav_base64 = base64.b64encode(wav_bytes).decode('utf8')

        return SuccessRequest(result=wav_base64)
    except Exception as e:
        return {'status': False, 'msg': e}, 400


@app.get('/vpr/data')
async def vpr_data(vprId: int):
    # Get the audio file from path by spk_id in MySQL
    try:
        if not vprId:
            return {'status': False, 'msg': "vpr_id can not be None"}
        audio_path = vpr.do_get_wav(vprId)
        return FileResponse(audio_path)
    except Exception as e:
        return {'status': False, 'msg': e}, 400


if __name__ == '__main__':
    uvicorn.run(app=app, host='0.0.0.0', port=port)


================================================
FILE: demos/speech_web/speech_server/requirements.txt
================================================
aiofiles
faiss-cpu
praatio>=5.0.0
pydantic
python-multipart
starlette


================================================
FILE: demos/speech_web/speech_server/src/AudioManeger.py
================================================
import datetime
import os
import wave

import numpy as np

from .util import randName


class AudioMannger:
    def __init__(self,
                 robot,
                 frame_length=160,
                 frame=10,
                 data_width=2,
                 vad_default=300):
        # 二进制 pcm 流 
        self.audios = b''
        self.asr_result = ""
        # Speech 核心主体
        self.robot = robot

        self.file_dir = "source"
        os.makedirs(self.file_dir, exist_ok=True)
        self.vad_deafult = vad_default
        self.vad_threshold = vad_default
        self.vad_threshold_path = os.path.join(self.file_dir,
                                               "vad_threshold.npy")

        # 10ms 一帧
        self.frame_length = frame_length
        # 10帧，检测一次 vad
        self.frame = frame
        # int 16, 两个bytes
        self.data_width = data_width
        # window
        self.window_length = frame_length * frame * data_width

        # 是否开始录音
        self.on_asr = False
        self.silence_cnt = 0
        self.max_silence_cnt = 4
        self.is_pause = False  # 录音暂停与恢复

    def init(self):
        if os.path.exists(self.vad_threshold_path):
            # 平均响度文件存在
            self.vad_threshold = np.load(self.vad_threshold_path)

    def clear_audio(self):
        # 清空 pcm 累积片段与 asr 识别结果
        self.audios = b''

    def clear_asr(self):
        self.asr_result = ""

    def compute_chunk_volume(self, start_index, pcm_bins):
        # 根据帧长计算能量平均值
        pcm_bin = pcm_bins[start_index:start_index + self.window_length]
        # 转成 numpy
        pcm_np = np.frombuffer(pcm_bin, np.int16)
        # 归一化 + 计算响度
        x = pcm_np.astype(np.float32)
        x = np.abs(x)
        return np.mean(x)

    def is_speech(self, start_index, pcm_bins):
        # 检查是否没
        if start_index > len(pcm_bins):
            return False
        # 检查从这个 start 开始是否为静音帧
        energy = self.compute_chunk_volume(
            start_index=start_index, pcm_bins=pcm_bins)
        # print(energy)
        if energy > self.vad_threshold:
            return True
        else:
            return False

    def compute_env_volume(self, pcm_bins):
        max_energy = 0
        start = 0
        while start < len(pcm_bins):
            energy = self.compute_chunk_volume(
                start_index=start, pcm_bins=pcm_bins)
            if energy > max_energy:
                max_energy = energy
            start += self.window_length
        self.vad_threshold = max_energy + 100 if max_energy > self.vad_deafult else self.vad_deafult

        # 保存成文件
        np.save(self.vad_threshold_path, self.vad_threshold)
        print(f"vad 阈值大小: {self.vad_threshold}")
        print(f"环境采样保存: {os.path.realpath(self.vad_threshold_path)}")

    def stream_asr(self, pcm_bin):
        # 先把 pcm_bin 送进去做端点检测
        start = 0
        while start < len(pcm_bin):
            if self.is_speech(start_index=start, pcm_bins=pcm_bin):
                self.on_asr = True
                self.silence_cnt = 0
                print("录音中")
                self.audios += pcm_bin[start:start + self.window_length]
            else:
                if self.on_asr:
                    self.silence_cnt += 1
                    if self.silence_cnt > self.max_silence_cnt:
                        self.on_asr = False
                        self.silence_cnt = 0
                        # 录音停止
                        print("录音停止")
                        # audios 保存为 wav, 送入 ASR
                        if len(self.audios) > 2 * 16000:
                            file_path = os.path.join(
                                self.file_dir,
                                "asr_" + datetime.datetime.strftime(
                                    datetime.datetime.now(),
                                    '%Y%m%d%H%M%S') + randName() + ".wav")
                            self.save_audio(file_path=file_path)
                            self.asr_result = self.robot.speech2text(file_path)
                        self.clear_audio()
                        return self.asr_result
                    else:
                        # 正常接收
                        print("录音中 静音")
                        self.audios += pcm_bin[start:start + self.window_length]
            start += self.window_length
        return ""

    def save_audio(self, file_path):
        print("保存音频")
        wf = wave.open(file_path, 'wb')  # 创建一个音频文件，名字为“01.wav"
        wf.setnchannels(1)  # 设置声道数为2
        wf.setsampwidth(2)  # 设置采样深度为
        wf.setframerate(16000)  # 设置采样率为16000
        # 将数据写入创建的音频文件
        wf.writeframes(self.audios)
        # 写完后将文件关闭
        wf.close()

    def end(self):
        # audios 保存为 wav, 送入 ASR
        file_path = os.path.join(self.file_dir, "asr.wav")
        self.save_audio(file_path=file_path)
        return self.robot.speech2text(file_path)

    def stop(self):
        self.is_pause = True
        self.audios = b''

    def resume(self):
        self.is_pause = False


================================================
FILE: demos/speech_web/speech_server/src/SpeechBase/asr.py
================================================
import numpy as np

from paddlespeech.server.engine.asr.online.python.asr_engine import ASREngine
from paddlespeech.server.engine.asr.online.python.asr_engine import PaddleASRConnectionHanddler
from paddlespeech.server.utils.config import get_config


def readWave(samples):
    x_len = len(samples)

    chunk_size = 85 * 16  #80ms, sample_rate = 16kHz
    if x_len % chunk_size != 0:
        padding_len_x = chunk_size - x_len % chunk_size
    else:
        padding_len_x = 0

    padding = np.zeros((padding_len_x), dtype=samples.dtype)
    padded_x = np.concatenate([samples, padding], axis=0)

    assert (x_len + padding_len_x) % chunk_size == 0
    num_chunk = (x_len + padding_len_x) / chunk_size
    num_chunk = int(num_chunk)
    for i in range(0, num_chunk):
        start = i * chunk_size
        end = start + chunk_size
        x_chunk = padded_x[start:end]
        yield x_chunk


class ASR:
    def __init__(
            self,
            config_path, ) -> None:
        self.config = get_config(config_path)['asr_online']
        self.engine = ASREngine()
        self.engine.init(self.config)
        self.connection_handler = PaddleASRConnectionHanddler(self.engine)

    def offlineASR(self, samples, sample_rate=16000):
        x_chunk, x_chunk_lens = self.engine.preprocess(
            samples=samples, sample_rate=sample_rate)
        self.engine.run(x_chunk, x_chunk_lens)
        result = self.engine.postprocess()
        self.engine.reset()
        return result

    def onlineASR(self, samples: bytes=None, is_finished=False):
        if not is_finished:
            # 流式开始
            self.connection_handler.extract_feat(samples)
            self.connection_handler.decode(is_finished)
            asr_results = self.connection_handler.get_result()
            return asr_results
        else:
            # 流式结束
            self.connection_handler.decode(is_finished=True)
            self.connection_handler.rescoring()
            asr_results = self.connection_handler.get_result()
            self.connection_handler.reset()
            return asr_results


================================================
FILE: demos/speech_web/speech_server/src/SpeechBase/nlp.py
================================================
from paddlenlp import Taskflow


class NLP:
    def __init__(self, ie_model_path=None):
        schema = ["时间", "出发地", "目的地", "费用"]
        if ie_model_path:
            self.ie_model = Taskflow(
                "information_extraction",
                schema=schema,
                task_path=ie_model_path)
        else:
            self.ie_model = Taskflow("information_extraction", schema=schema)

        self.dialogue_model = Taskflow("dialogue")

    def chat(self, text):
        result = self.dialogue_model([text])
        return result[0]

    def ie(self, text):
        result = self.ie_model(text)
        return result


================================================
FILE: demos/speech_web/speech_server/src/SpeechBase/sql_helper.py
================================================
import base64
import os
import sqlite3

import numpy as np


def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d


class DataBase(object):
    def __init__(self, db_path: str):
        db_path = os.path.realpath(db_path)

        if os.path.exists(db_path):
            self.db_path = db_path
        else:
            db_path_dir = os.path.dirname(db_path)
            os.makedirs(db_path_dir, exist_ok=True)
            self.db_path = db_path

        self.conn = sqlite3.connect(self.db_path)
        self.conn.row_factory = dict_factory
        self.cursor = self.conn.cursor()
        self.init_database()

    def init_database(self):
        """
        初始化数据库， 若表不存在则创建
        """
        sql = """
        CREATE TABLE IF NOT EXISTS vprtable (
            `id` INTEGER PRIMARY KEY AUTOINCREMENT,
            `username` TEXT NOT NULL,
            `vector` TEXT NOT NULL,
            `wavpath` TEXT  NOT NULL
            ); 
        """
        self.cursor.execute(sql)
        self.conn.commit()

    def execute_base(self, sql, data_dict):
        self.cursor.execute(sql, data_dict)
        self.conn.commit()

    def insert_one(self, username, vector_base64: str, wav_path):
        if not os.path.exists(wav_path):
            return None, "wav not exists"
        else:
            sql = """
            insert into 
            vprtable (username, vector, wavpath)
            values (?, ?, ?)
            """

            try:
                self.cursor.execute(sql, (username, vector_base64, wav_path))
                self.conn.commit()
                lastidx = self.cursor.lastrowid
                return lastidx, "data insert success"
            except Exception as e:
                print(e)
                return None, e

    def select_all(self):
        sql = """
        SELECT * from vprtable
        """
        result = self.cursor.execute(sql).fetchall()
        return result

    def select_by_id(self, vpr_id):
        sql = f"""
        SELECT * from vprtable WHERE `id` = {vpr_id}
        """

        result = self.cursor.execute(sql).fetchall()
        return result

    def select_by_username(self, username):
        sql = f"""
        SELECT * from vprtable WHERE `username` = '{username}'
        """

        result = self.cursor.execute(sql).fetchall()
        return result

    def drop_by_username(self, username):
        sql = f"""
        DELETE from vprtable WHERE `username`='{username}'
        """

        self.cursor.execute(sql)
        self.conn.commit()

    def drop_all(self):
        sql = """
        DELETE from vprtable
        """

        self.cursor.execute(sql)
        self.conn.commit()

    def drop_table(self):
        sql = """
            DROP TABLE vprtable
        """

        self.cursor.execute(sql)
        self.conn.commit()

    def encode_vector(self, vector: np.ndarray):
        return base64.b64encode(vector).decode('utf8')

    def decode_vector(self, vector_base64, dtype=np.float32):
        b = base64.b64decode(vector_base64)
        vc = np.frombuffer(b, dtype=dtype)
        return vc


================================================
FILE: demos/speech_web/speech_server/src/SpeechBase/tts.py
================================================
# tts 推理引擎，支持流式与非流式
# 精简化使用
# 用 onnxruntime 进行推理
# 1. 下载对应的模型
# 2. 加载模型
# 3. 端到端推理
# 4. 流式推理
import base64
import logging
import math

import numpy as np

from paddlespeech.server.engine.tts.online.onnx.tts_engine import TTSEngine
from paddlespeech.server.utils.audio_process import float2pcm
from paddlespeech.server.utils.config import get_config
from paddlespeech.server.utils.util import denorm
from paddlespeech.server.utils.util import get_chunks
from paddlespeech.t2s.frontend.zh_frontend import Frontend


class TTS:
    def __init__(self, config_path):
        self.config = get_config(config_path)['tts_online-onnx']
        self.config['voc_block'] = 36
        self.engine = TTSEngine()
        self.engine.init(self.config)
        self.executor = self.engine.executor
        #self.engine.warm_up()

        # 前端初始化
        self.frontend = Frontend(
            phone_vocab_path=self.engine.executor.phones_dict,
            tone_vocab_path=None)

    def depadding(self, data, chunk_num, chunk_id, block, pad, upsample):
        """ 
        Streaming inference removes the result of pad inference
        """
        front_pad = min(chunk_id * block, pad)
        # first chunk
        if chunk_id == 0:
            data = data[:block * upsample]
        # last chunk
        elif chunk_id == chunk_num - 1:
            data = data[front_pad * upsample:]
        # middle chunk
        else:
            data = data[front_pad * upsample:(front_pad + block) * upsample]

        return data

    def offlineTTS(self, text):
        get_tone_ids = False
        merge_sentences = False

        input_ids = self.frontend.get_input_ids(
            text, merge_sentences=merge_sentences, get_tone_ids=get_tone_ids)
        phone_ids = input_ids["phone_ids"]
        wav_list = []
        for i in range(len(phone_ids)):
            orig_hs = self.engine.executor.am_encoder_infer_sess.run(
                None, input_feed={'text': phone_ids[i].numpy()})
            hs = orig_hs[0]
            am_decoder_output = self.engine.executor.am_decoder_sess.run(
                None, input_feed={'xs': hs})
            am_postnet_output = self.engine.executor.am_postnet_sess.run(
                None,
                input_feed={
                    'xs': np.transpose(am_decoder_output[0], (0, 2, 1))
                })
            am_output_data = am_decoder_output + np.transpose(
                am_postnet_output[0], (0, 2, 1))
            normalized_mel = am_output_data[0][0]
            mel = denorm(normalized_mel, self.engine.executor.am_mu,
                         self.engine.executor.am_std)
            wav = self.engine.executor.voc_sess.run(
                output_names=None, input_feed={'logmel': mel})[0]
            wav_list.append(wav)
        wavs = np.concatenate(wav_list)
        return wavs

    def streamTTS(self, text):

        get_tone_ids = False
        merge_sentences = False

        # front 
        input_ids = self.frontend.get_input_ids(
            text, merge_sentences=merge_sentences, get_tone_ids=get_tone_ids)
        phone_ids = input_ids["phone_ids"]

        for i in range(len(phone_ids)):
            part_phone_ids = phone_ids[i].numpy()
            voc_chunk_id = 0

            # fastspeech2_csmsc
            if self.config.am == "fastspeech2_csmsc_onnx":
                # am 
                mel = self.executor.am_sess.run(
                    output_names=None, input_feed={'text': part_phone_ids})
                mel = mel[0]

                # voc streaming
                mel_chunks = get_chunks(mel, self.config.voc_block,
                                        self.config.voc_pad, "voc")
                voc_chunk_num = len(mel_chunks)
                for i, mel_chunk in enumerate(mel_chunks):
                    sub_wav = self.executor.voc_sess.run(
                        output_names=None, input_feed={'logmel': mel_chunk})
                    sub_wav = self.depadding(
                        sub_wav[0], voc_chunk_num, i, self.config.voc_block,
                        self.config.voc_pad, self.config.voc_upsample)

                    yield self.after_process(sub_wav)

            # fastspeech2_cnndecoder_csmsc 
            elif self.config.am == "fastspeech2_cnndecoder_csmsc_onnx":
                # am 
                orig_hs = self.executor.am_encoder_infer_sess.run(
                    None, input_feed={'text': part_phone_ids})
                orig_hs = orig_hs[0]

                # streaming voc chunk info
                mel_len = orig_hs.shape[1]
                voc_chunk_num = math.ceil(mel_len / self.config.voc_block)
                start = 0
                end = min(self.config.voc_block + self.config.voc_pad, mel_len)

                # streaming am
                hss = get_chunks(orig_hs, self.config.am_block,
                                 self.config.am_pad, "am")
                am_chunk_num = len(hss)
                for i, hs in enumerate(hss):
                    am_decoder_output = self.executor.am_decoder_sess.run(
                        None, input_feed={'xs': hs})
                    am_postnet_output = self.executor.am_postnet_sess.run(
                        None,
                        input_feed={
                            'xs': np.transpose(am_decoder_output[0], (0, 2, 1))
                        })
                    am_output_data = am_decoder_output + np.transpose(
                        am_postnet_output[0], (0, 2, 1))
                    normalized_mel = am_output_data[0][0]

                    sub_mel = denorm(normalized_mel, self.executor.am_mu,
                                     self.executor.am_std)
                    sub_mel = self.depadding(sub_mel, am_chunk_num, i,
                                             self.config.am_block,
                                             self.config.am_pad, 1)

                    if i == 0:
                        mel_streaming = sub_mel
                    else:
                        mel_streaming = np.concatenate(
                            (mel_streaming, sub_mel), axis=0)

                    # streaming voc
                    # 当流式AM推理的mel帧数大于流式voc推理的chunk size，开始进行流式voc 推理
                    while (mel_streaming.shape[0] >= end and
                           voc_chunk_id < voc_chunk_num):
                        voc_chunk = mel_streaming[start:end, :]

                        sub_wav = self.executor.voc_sess.run(
                            output_names=None, input_feed={'logmel': voc_chunk})
                        sub_wav = self.depadding(
                            sub_wav[0], voc_chunk_num, voc_chunk_id,
                            self.config.voc_block, self.config.voc_pad,
                            self.config.voc_upsample)

                        yield self.after_process(sub_wav)

                        voc_chunk_id += 1
                        start = max(0, voc_chunk_id * self.config.voc_block -
                                    self.config.voc_pad)
                        end = min((voc_chunk_id + 1) * self.config.voc_block +
                                  self.config.voc_pad, mel_len)

            else:
                logging.error(
                    "Only support fastspeech2_csmsc or fastspeech2_cnndecoder_csmsc on streaming tts."
                )

    def streamTTSBytes(self, text):
        for wav in self.engine.executor.infer(
                text=text,
                lang=self.engine.config.lang,
                am=self.engine.config.am,
                spk_id=0):
            wav = float2pcm(wav)  # float32 to int16
            wav_bytes = wav.tobytes()  # to bytes
            yield wav_bytes

    def after_process(self, wav):
        # for tvm
        wav = float2pcm(wav)  # float32 to int16
        wav_bytes = wav.tobytes()  # to bytes
        wav_base64 = base64.b64encode(wav_bytes).decode('utf8')  # to base64
        return wav_base64

    def streamTTS_TVM(self, text):
        # 用 TVM 优化
        pass


================================================
FILE: demos/speech_web/speech_server/src/SpeechBase/vpr.py
================================================
# vpr Demo 没有使用 mysql 与 muilvs, 仅用于docker演示
import logging

import faiss
import numpy as np

from .sql_helper import DataBase
from .vpr_encode import get_audio_embedding


class VPR:
    def __init__(self, db_path, dim, top_k) -> None:
        # 初始化
        self.db_path = db_path
        self.dim = dim
        self.top_k = top_k
        self.dtype = np.float32
        self.vpr_idx = 0

        # db 初始化
        self.db = DataBase(db_path)

        # faiss 初始化
        index_ip = faiss.IndexFlatIP(dim)
        self.index_ip = faiss.IndexIDMap(index_ip)
        self.init()

    def init(self):
        # demo 初始化，把 mysql中的向量注册到 faiss 中
        sql_dbs = self.db.select_all()
        if sql_dbs:
            for sql_db in sql_dbs:
                idx = sql_db['id']
                vc_bs64 = sql_db['vector']
                vc = self.db.decode_vector(vc_bs64)
                if len(vc.shape) == 1:
                    vc = np.expand_dims(vc, axis=0)
                # 构建数据库
                self.index_ip.add_with_ids(vc, np.array(
                    (idx, )).astype('int64'))
            logging.info("faiss 构建完毕")

    def faiss_enroll(self, idx, vc):
        self.index_ip.add_with_ids(vc, np.array((idx, )).astype('int64'))

    def vpr_enroll(self, username, wav_path):
        # 注册声纹
        emb = get_audio_embedding(wav_path)
        emb = np.expand_dims(emb, axis=0)
        if emb is not None:
            emb_bs64 = self.db.encode_vector(emb)
            last_idx, mess = self.db.insert_one(username, emb_bs64, wav_path)
            if last_idx:
                # faiss 注册
                self.faiss_enroll(last_idx, emb)
        else:
            last_idx, mess = None
        return last_idx

    def vpr_recog(self, wav_path):
        # 识别声纹
        emb_search = get_audio_embedding(wav_path)

        if emb_search is not None:
            emb_search = np.expand_dims(emb_search, axis=0)
            D, I = self.index_ip.search(emb_search, self.top_k)
            D = D.tolist()[0]
            I = I.tolist()[0]
            return [(round(D[i] * 100, 2), I[i]) for i in range(len(D))
                    if I[i] != -1]
        else:
            logging.error("识别失败")
            return None

    def do_search_vpr(self, wav_path):
        spk_ids, paths, scores = [], [], []
        recog_result = self.vpr_recog(wav_path)
        for score, idx in recog_result:
            username = self.db.select_by_id(idx)[0]['username']
            if username not in spk_ids:
                spk_ids.append(username)
                scores.append(score)
                paths.append("")
        return spk_ids, paths, scores

    def vpr_del(self, username):
        # 根据用户username, 删除声纹
        # 查用户ID，删除对应向量
        res = self.db.select_by_username(username)
        for r in res:
            idx = r['id']
            self.index_ip.remove_ids(np.array((idx, )).astype('int64'))

        self.db.drop_by_username(username)

    def vpr_list(self):
        # 获取数据列表
        return self.db.select_all()

    def do_list(self):
        spk_ids, vpr_ids = [], []
        for res in self.db.select_all():
            spk_ids.append(res['username'])
            vpr_ids.append(res['id'])
        return spk_ids, vpr_ids

    def do_get_wav(self, vpr_idx):
        res = self.db.select_by_id(vpr_idx)
        return res[0]['wavpath']

    def vpr_data(self, idx):
        # 获取对应ID的数据
        res = self.db.select_by_id(idx)
        return res

    def vpr_droptable(self):
        # 删除表
        self.db.drop_table()
        # 清空 faiss
        self.index_ip.reset()


================================================
FILE: demos/speech_web/speech_server/src/SpeechBase/vpr_encode.py
================================================
import logging

import numpy as np

from paddlespeech.cli.vector import VectorExecutor

vector_executor = VectorExecutor()


def get_audio_embedding(path):
    """
    Use vpr_inference to generate embedding of audio
    """
    try:
        embedding = vector_executor(
            audio_file=path, model='ecapatdnn_voxceleb12')
        embedding = embedding / np.linalg.norm(embedding)
        return embedding
    except Exception as e:
        logging.error(f"Error with embedding:{e}")
        return None


================================================
FILE: demos/speech_web/speech_server/src/WebsocketManeger.py
================================================
from typing import List

from fastapi import WebSocket


class ConnectionManager:
    def __init__(self):
        # 存放激活的ws连接对象
        self.active_connections: List[WebSocket] = []

    async def connect(self, ws: WebSocket):
        # 等待连接
        await ws.accept()
        # 存储ws连接对象
        self.active_connections.append(ws)

    def disconnect(self, ws: WebSocket):
        # 关闭时 移除ws对象
        self.active_connections.remove(ws)

    @staticmethod
    async def send_personal_message(message: str, ws: WebSocket):
        # 发送个人消息
        await ws.send_text(message)

    async def broadcast(self, message: str):
        # 广播消息
        for connection in self.active_connections:
            await connection.send_text(message)


manager = ConnectionManager()


================================================
FILE: demos/speech_web/speech_server/src/ernie_sat.py
================================================
import os

from .util import get_ngpu
from .util import MAIN_ROOT
from .util import run_cmd


class SAT:
    def __init__(self):
        # pretrain model path
        self.zh_pretrain_model_path = os.path.realpath(
            "source/model/erniesat_aishell3_ckpt_1.2.0")
        self.en_pretrain_model_path = os.path.realpath(
            "source/model/erniesat_vctk_ckpt_1.2.0")
        self.cross_pretrain_model_path = os.path.realpath(
            "source/model/erniesat_aishell3_vctk_ckpt_1.2.0")

        self.zh_voc_model_path = os.path.realpath(
            "source/model/hifigan_aishell3_ckpt_0.2.0")
        self.eb_voc_model_path = os.path.realpath(
            "source/model/hifigan_vctk_ckpt_0.2.0")
        self.cross_voc_model_path = os.path.realpath(
            "source/model/hifigan_aishell3_ckpt_0.2.0")

        self.BIN_DIR = os.path.join(MAIN_ROOT,
                                    "paddlespeech/t2s/exps/ernie_sat")

    def zh_synthesize_edit(self,
                           old_str: str,
                           new_str: str,
                           input_name: os.PathLike,
                           output_name: os.PathLike,
                           task_name: str="synthesize",
                           erniesat_ckpt_name: str="snapshot_iter_289500.pdz"):

        if task_name not in ['synthesize', 'edit']:
            print("task name only in ['edit', 'synthesize']")
            return None

        # 推理文件配置
        config_path = os.path.join(self.zh_pretrain_model_path, "default.yaml")
        phones_dict = os.path.join(self.zh_pretrain_model_path,
                                   "phone_id_map.txt")
        erniesat_ckpt = os.path.join(self.zh_pretrain_model_path,
                                     erniesat_ckpt_name)
        erniesat_stat = os.path.join(self.zh_pretrain_model_path,
                                     "speech_stats.npy")

        voc = "hifigan_aishell3"
        voc_config = os.path.join(self.zh_voc_model_path, "default.yaml")
        voc_ckpt = os.path.join(self.zh_voc_model_path,
                                "snapshot_iter_2500000.pdz")
        voc_stat = os.path.join(self.zh_voc_model_path, "feats_stats.npy")

        cmd = self.get_cmd(
            task_name=task_name,
            input_name=input_name,
            old_str=old_str,
            new_str=new_str,
            config_path=config_path,
            phones_dict=phones_dict,
            erniesat_ckpt=erniesat_ckpt,
            erniesat_stat=erniesat_stat,
            voc=voc,
            voc_config=voc_config,
            voc_ckpt=voc_ckpt,
            voc_stat=voc_stat,
            output_name=output_name,
            source_lang="zh",
            target_lang="zh")

        return run_cmd(cmd, output_name)

    def crossclone(self,
                   old_str: str,
                   new_str: str,
                   input_name: os.PathLike,
                   output_name: os.PathLike,
                   source_lang: str,
                   target_lang: str,
                   erniesat_ckpt_name: str="snapshot_iter_489000.pdz"):
        # 推理文件配置
        config_path = os.path.join(self.cross_pretrain_model_path,
                                   "default.yaml")
        phones_dict = os.path.join(self.cross_pretrain_model_path,
                                   "phone_id_map.txt")
        erniesat_ckpt = os.path.join(self.cross_pretrain_model_path,
                                     erniesat_ckpt_name)
        erniesat_stat = os.path.join(self.cross_pretrain_model_path,
                                     "speech_stats.npy")

        voc = "hifigan_aishell3"
        voc_config = os.path.join(self.cross_voc_model_path, "default.yaml")
        voc_ckpt = os.path.join(self.cross_voc_model_path,
                                "snapshot_iter_2500000.pdz")
        voc_stat = os.path.join(self.cross_voc_model_path, "feats_stats.npy")
        task_name = "synthesize"
        cmd = self.get_cmd(
            task_name=task_name,
            input_name=input_name,
            old_str=old_str,
            new_str=new_str,
            config_path=config_path,
            phones_dict=phones_dict,
            erniesat_ckpt=erniesat_ckpt,
            erniesat_stat=erniesat_stat,
            voc=voc,
            voc_config=voc_config,
            voc_ckpt=voc_ckpt,
            voc_stat=voc_stat,
            output_name=output_name,
            source_lang=source_lang,
            target_lang=target_lang)

        return run_cmd(cmd, output_name)

    def en_synthesize_edit(self,
                           old_str: str,
                           new_str: str,
                           input_name: os.PathLike,
                           output_name: os.PathLike,
                           task_name: str="synthesize",
                           erniesat_ckpt_name: str="snapshot_iter_199500.pdz"):

        # 推理文件配置
        config_path = os.path.join(self.en_pretrain_model_path, "default.yaml")
        phones_dict = os.path.join(self.en_pretrain_model_path,
                                   "phone_id_map.txt")
        erniesat_ckpt = os.path.join(self.en_pretrain_model_path,
                                     erniesat_ckpt_name)
        erniesat_stat = os.path.join(self.en_pretrain_model_path,
                                     "speech_stats.npy")

        voc = "hifigan_aishell3"
        voc_config = os.path.join(self.zh_voc_model_path, "default.yaml")
        voc_ckpt = os.path.join(self.zh_voc_model_path,
                                "snapshot_iter_2500000.pdz")
        voc_stat = os.path.join(self.zh_voc_model_path, "feats_stats.npy")

        cmd = self.get_cmd(
            task_name=task_name,
            input_name=input_name,
            old_str=old_str,
            new_str=new_str,
            config_path=config_path,
            phones_dict=phones_dict,
            erniesat_ckpt=erniesat_ckpt,
            erniesat_stat=erniesat_stat,
            voc=voc,
            voc_config=voc_config,
            voc_ckpt=voc_ckpt,
            voc_stat=voc_stat,
            output_name=output_name,
            source_lang="en",
            target_lang="en")

        return run_cmd(cmd, output_name)

    def get_cmd(self,
                task_name: str,
                input_name: str,
                old_str: str,
                new_str: str,
                config_path: str,
                phones_dict: str,
                erniesat_ckpt: str,
                erniesat_stat: str,
                voc: str,
                voc_config: str,
                voc_ckpt: str,
                voc_stat: str,
                output_name: str,
                source_lang: str,
                target_lang: str):
        ngpu = get_ngpu()
        cmd = f"""
            FLAGS_allocator_strategy=naive_best_fit \
            FLAGS_fraction_of_gpu_memory_to_use=0.01 \
            python3 {self.BIN_DIR}/synthesize_e2e.py \
                --task_name={task_name} \
                --wav_path={input_name} \
                --old_str='{old_str}' \
                --new_str='{new_str}' \
                --source_lang={source_lang} \
                --target_lang={target_lang} \
                --erniesat_config={config_path} \
                --phones_dict={phones_dict} \
                --erniesat_ckpt={erniesat_ckpt} \
                --erniesat_stat={erniesat_stat} \
                --voc={voc} \
                --voc_config={voc_config} \
                --voc_ckpt={voc_ckpt} \
                --voc_stat={voc_stat} \
                --output_name={output_name} \
                --ngpu={ngpu}
        """

        return cmd


================================================
FILE: demos/speech_web/speech_server/src/finetune.py
================================================
import os

from .util import get_ngpu
from .util import MAIN_ROOT
from .util import run_cmd


def find_max_ckpt(model_path):
    max_ckpt = 0
    for filename in os.listdir(model_path):
        if filename.endswith('.pdz'):
            files = filename[:-4]
            a1, a2, it = files.split("_")
            if int(it) > max_ckpt:
                max_ckpt = int(it)
    return max_ckpt


class FineTune:
    def __init__(self):
        self.now_file_path = os.path.dirname(__file__)
        self.PYTHONPATH = os.path.join(MAIN_ROOT,
                                       "examples/other/tts_finetune/tts3")
        self.BIN_DIR = os.path.join(MAIN_ROOT,
                                    "paddlespeech/t2s/exps/fastspeech2")
        self.pretrained_model_dir = os.path.realpath(
            "source/model/fastspeech2_aishell3_ckpt_1.1.0")
        self.voc_model_dir = os.path.realpath(
            "source/model/hifigan_aishell3_ckpt_0.2.0")
        self.finetune_config = os.path.join("conf/tts3_finetune.yaml")

    def finetune(self, input_dir, exp_dir='temp', epoch=100):
        """
        use cmd follow examples/other/tts_finetune/tts3/run.sh
        """
        newdir_name = "newdir"
        new_dir = os.path.join(input_dir, newdir_name)
        mfa_dir = os.path.join(exp_dir, 'mfa_result')
        dump_dir = os.path.join(exp_dir, 'dump')
        output_dir = os.path.join(exp_dir, 'exp')
        lang = "zh"
        ngpu = get_ngpu()

        cmd = f"""
            # check oov
            python3 {self.PYTHONPATH}/local/check_oov.py \
                --input_dir={input_dir} \
                --pretrained_model_dir={self.pretrained_model_dir} \
                --newdir_name={newdir_name} \
                --lang={lang}
            
            # get mfa result
            python3 {self.PYTHONPATH}/local/get_mfa_result.py \
                --input_dir={new_dir} \
                --mfa_dir={mfa_dir} \
                --lang={lang}
            
            # generate durations.txt
            python3 {self.PYTHONPATH}/local/generate_duration.py \
                --mfa_dir={mfa_dir} 
            
            # extract feature
            python3 {self.PYTHONPATH}/local/extract_feature.py \
                --duration_file="./durations.txt" \
                --input_dir={new_dir} \
                --dump_dir={dump_dir} \
                --pretrained_model_dir={self.pretrained_model_dir}
            
            # create finetune env
            python3 {self.PYTHONPATH}/local/prepare_env.py \
                --pretrained_model_dir={self.pretrained_model_dir} \
                --output_dir={output_dir}
            
            # finetune
            python3 {self.PYTHONPATH}/local/finetune.py \
                --pretrained_model_dir={self.pretrained_model_dir} \
                --dump_dir={dump_dir} \
                --output_dir={output_dir} \
                --ngpu={ngpu} \
                --epoch=100 \
                --finetune_config={self.finetune_config}
        """

        print(cmd)

        return run_cmd(cmd, exp_dir)

    def synthesize(self, text, wav_name, out_wav_dir, exp_dir='temp'):

        voc = "hifigan_aishell3"
        dump_dir = os.path.join(exp_dir, 'dump')
        output_dir = os.path.join(exp_dir, 'exp')
        text_path = os.path.join(exp_dir, 'sentences.txt')
        lang = "zh"
        ngpu = get_ngpu()

        model_path = f"{output_dir}/checkpoints"
        ckpt = find_max_ckpt(model_path)

        # 生成对应的语句
        with open(text_path, "w", encoding='utf8') as f:
            f.write(wav_name + " " + text)

        cmd = f"""
            FLAGS_allocator_strategy=naive_best_fit \
            FLAGS_fraction_of_gpu_memory_to_use=0.01 \
            python3 {self.BIN_DIR}/../synthesize_e2e.py \
                --am=fastspeech2_aishell3 \
                --am_config={self.pretrained_model_dir}/default.yaml \
                --am_ckpt={output_dir}/checkpoints/snapshot_iter_{ckpt}.pdz \
                --am_stat={self.pretrained_model_dir}/speech_stats.npy \
                --voc={voc} \
                --voc_config={self.voc_model_dir}/default.yaml \
                --voc_ckpt={self.voc_model_dir}/snapshot_iter_2500000.pdz \
                --voc_stat={self.voc_model_dir}/feats_stats.npy \
                --lang={lang} \
                --text={text_path} \
                --output_dir={out_wav_dir} \
                --phones_dict={dump_dir}/phone_id_map.txt \
                --speaker_dict={dump_dir}/speaker_id_map.txt \
                --spk_id=0 \
                --ngpu={ngpu}
        """

        out_path = os.path.join(out_wav_dir, f"{wav_name}.wav")

        return run_cmd(cmd, out_path)


================================================
FILE: demos/speech_web/speech_server/src/ge2e_clone.py
================================================
import os
import shutil

from .util import get_ngpu
from .util import MAIN_ROOT
from .util import run_cmd


class VoiceCloneGE2E():
    def __init__(self):
        # Path 到指定路径上
        self.BIN_DIR = os.path.join(MAIN_ROOT, "paddlespeech/t2s/exps")
        # am
        self.am = "fastspeech2_aishell3"
        self.am_config = "source/model/fastspeech2_nosil_aishell3_vc1_ckpt_0.5/default.yaml"
        self.am_ckpt = "source/model/fastspeech2_nosil_aishell3_vc1_ckpt_0.5/snapshot_iter_96400.pdz"
        self.am_stat = "source/model/fastspeech2_nosil_aishell3_vc1_ckpt_0.5/speech_stats.npy"
        self.phones_dict = "source/model/fastspeech2_nosil_aishell3_vc1_ckpt_0.5/phone_id_map.txt"
        # voc
        self.voc = "pwgan_aishell3"
        self.voc_config = "source/model/pwg_aishell3_ckpt_0.5/default.yaml"
        self.voc_ckpt = "source/model/pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz"
        self.voc_stat = "source/model/pwg_aishell3_ckpt_0.5/feats_stats.npy"
        # ge2e
        self.ge2e_params_path = "source/model/ge2e_ckpt_0.3/step-3000000.pdparams"

    def vc(self, text, input_wav, out_wav):

        # input wav 需要形成临时单独文件夹
        _, full_file_name = os.path.split(input_wav)
        ref_audio_dir = os.path.realpath("tmp_dir/ge2e")
        if os.path.exists(ref_audio_dir):
            shutil.rmtree(ref_audio_dir)

        os.makedirs(ref_audio_dir, exist_ok=True)
        shutil.copy(input_wav, ref_audio_dir)

        output_dir = os.path.dirname(out_wav)
        ngpu = get_ngpu()

        cmd = f"""python {self.BIN_DIR}/voice_cloning.py --am={self.am} --am_config={self.am_config} --am_ckpt={self.am_ckpt} --am_stat={self.am_stat} --voc={self.voc} --voc_config={self.voc_config} --voc_ckpt={self.voc_ckpt} --voc_stat={self.voc_stat} --ge2e_params_path={self.ge2e_params_path} --text="{text}" --input-dir={ref_audio_dir} --output-dir={output_dir} --phones-dict={self.phones_dict} --ngpu={ngpu}"""

        print(cmd)

        output_name = os.path.join(output_dir, full_file_name)
        return run_cmd(cmd, output_name=output_name)


================================================
FILE: demos/speech_web/speech_server/src/robot.py
================================================
import os

import soundfile as sf
from src.SpeechBase.asr import ASR
from src.SpeechBase.nlp import NLP
from src.SpeechBase.tts import TTS

from paddlespeech.cli.asr.infer import ASRExecutor


class Robot:
    def __init__(self,
                 asr_config,
                 tts_config,
                 asr_init_path,
                 ie_model_path=None) -> None:
        self.nlp = NLP(ie_model_path=ie_model_path)
        self.asr = ASR(config_path=asr_config)
        self.tts = TTS(config_path=tts_config)
        self.tts_sample_rate = 24000
        self.asr_sample_rate = 16000

        # 流式识别效果不如端到端的模型，这里流式模型与端到端模型分开
        self.asr_model = ASRExecutor()
        self.asr_name = "conformer_wenetspeech"
        self.warm_up_asrmodel(asr_init_path)

    def warm_up_asrmodel(self, asr_init_path):
        if not os.path.exists(asr_init_path):
            path_dir = os.path.dirname(asr_init_path)
            if not os.path.exists(path_dir):
                os.makedirs(path_dir, exist_ok=True)

            # TTS生成，采样率24000
            text = "生成初始音频"
            self.text2speech(text, asr_init_path)

        # asr model初始化
        self.asr_model(
            asr_init_path,
            model=self.asr_name,
            lang='zh',
            sample_rate=16000,
            force_yes=True)

    def speech2text(self, audio_file):
        self.asr_model.preprocess(self.asr_name, audio_file)
        self.asr_model.infer(self.asr_name)
        res = self.asr_model.postprocess()
        return res

    def text2speech(self, text, outpath):
        wav = self.tts.offlineTTS(text)
        sf.write(outpath, wav, samplerate=self.tts_sample_rate)
        res = wav
        return res

    def text2speechStream(self, text):
        for sub_wav_base64 in self.tts.streamTTS(text=text):
            yield sub_wav_base64

    def text2speechStreamBytes(self, text):
        for wav_bytes in self.tts.streamTTSBytes(text=text):
            yield wav_bytes

    def chat(self, text):
        result = self.nlp.chat(text)
        return result

    def ie(self, text):
        result = self.nlp.ie(text)
        return result


================================================
FILE: demos/speech_web/speech_server/src/tdnn_clone.py
================================================
import os
import shutil

from .util import get_ngpu
from .util import MAIN_ROOT
from .util import run_cmd


class VoiceCloneTDNN():
    def __init__(self):
        # Path 到指定路径上
        self.BIN_DIR = os.path.join(MAIN_ROOT, "paddlespeech/t2s/exps")

        self.am = "fastspeech2_aishell3"
        self.am_config = "source/model/fastspeech2_aishell3_ckpt_vc2_1.2.0/default.yaml"
        self.am_ckpt = "source/model/fastspeech2_aishell3_ckpt_vc2_1.2.0/snapshot_iter_96400.pdz"
        self.am_stat = "source/model/fastspeech2_aishell3_ckpt_vc2_1.2.0/speech_stats.npy"
        self.phones_dict = "source/model/fastspeech2_aishell3_ckpt_vc2_1.2.0/phone_id_map.txt"
        # voc
        self.voc = "pwgan_aishell3"
        self.voc_config = "source/model/pwg_aishell3_ckpt_0.5/default.yaml"
        self.voc_ckpt = "source/model/pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz"
        self.voc_stat = "source/model/pwg_aishell3_ckpt_0.5/feats_stats.npy"

    def vc(self, text, input_wav, out_wav):
        # input wav 需要形成临时单独文件夹
        _, full_file_name = os.path.split(input_wav)
        ref_audio_dir = os.path.realpath("tmp_dir/tdnn")
        if os.path.exists(ref_audio_dir):
            shutil.rmtree(ref_audio_dir)
        os.makedirs(ref_audio_dir, exist_ok=True)
        shutil.copy(input_wav, ref_audio_dir)

        output_dir = os.path.dirname(out_wav)
        ngpu = get_ngpu()

        cmd = f"""
            python3 {self.BIN_DIR}/voice_cloning.py \
                    --am={self.am} \
                    --am_config={self.am_config} \
                    --am_ckpt={self.am_ckpt} \
                    --am_stat={self.am_stat} \
                    --voc={self.voc} \
                    --voc_config={self.voc_config} \
                    --voc_ckpt={self.voc_ckpt} \
                    --voc_stat={self.voc_stat} \
                    --text="{text}" \
                    --input-dir={ref_audio_dir} \
                    --output-dir={output_dir} \
                    --phones-dict={self.phones_dict} \
                    --use_ecapa=True \
                    --ngpu={ngpu}
        """

        output_name = os.path.join(output_dir, full_file_name)
        return run_cmd(cmd, output_name=output_name)


================================================
FILE: demos/speech_web/speech_server/src/util.py
================================================
import os
import random
import subprocess

import paddle

NOW_FILE_PATH = os.path.dirname(__file__)
MAIN_ROOT = os.path.realpath(os.path.join(NOW_FILE_PATH, "../../../../"))


def get_ngpu():
    if paddle.device.get_device() == "cpu":
        return 0
    else:
        return 1


def randName(n=5):
    return "".join(random.sample('zyxwvutsrqponmlkjihgfedcba', n))


def SuccessRequest(result=None, message="ok"):
    return {"code": 0, "result": result, "message": message}


def ErrorRequest(result=None, message="error"):
    return {"code": -1, "result": result, "message": message}


def run_cmd(cmd, output_name):
    p = subprocess.Popen(cmd, shell=True)
    res = p.wait()
    print(cmd)
    print("运行结果：", res)
    if res == 0:
        # 运行成功
        if os.path.exists(output_name):
            return output_name
        else:
            # 合成的文件不存在
            return None
    else:
        # 运行失败
        return None


================================================
FILE: demos/speech_web/speech_server/vc.py
================================================
import argparse
import base64
import datetime
import json
import os
from typing import List

import aiofiles
import librosa
import soundfile as sf
import uvicorn
from fastapi import FastAPI
from fastapi import UploadFile
from pydantic import BaseModel
from src.ernie_sat import SAT
from src.finetune import FineTune
from src.ge2e_clone import VoiceCloneGE2E
from src.tdnn_clone import VoiceCloneTDNN
from src.util import *
from starlette.responses import FileResponse

from paddlespeech.server.utils.audio_process import float2pcm

# 解析配置
parser = argparse.ArgumentParser(prog='PaddleSpeechDemo', add_help=True)

parser.add_argument(
    "--port",
    action="store",
    type=int,
    help="port of the app",
    default=8010,
    required=False)

args = parser.parse_args()
port = args.port

# 这里会对finetune产生影响，所以finetune使用了cmd
vc_model = VoiceCloneGE2E()
vc_model_tdnn = VoiceCloneTDNN()

sat_model = SAT()
ft_model = FineTune()

# 配置文件
tts_config = "conf/tts_online_application.yaml"
asr_config = "conf/ws_conformer_wenetspeech_application_faster.yaml"
asr_init_path = "source/demo/demo.wav"
db_path = "source/db/vc.sqlite"
ie_model_path = "source/model"

# 路径配置
VC_UPLOAD_PATH = "source/wav/vc/upload"
VC_OUT_PATH = "source/wav/vc/out"

FT_UPLOAD_PATH = "source/wav/finetune/upload"
FT_OUT_PATH = "source/wav/finetune/out"
FT_LABEL_PATH = "source/wav/finetune/label.json"
FT_LABEL_TXT_PATH = "source/wav/finetune/labels.txt"
FT_DEFAULT_PATH = "source/wav/finetune/default"
FT_EXP_BASE_PATH = "tmp_dir/finetune"

SAT_UPLOAD_PATH = "source/wav/SAT/upload"
SAT_OUT_PATH = "source/wav/SAT/out"
SAT_LABEL_PATH = "source/wav/SAT/label.json"

# SAT 标注结果初始化
if os.path.exists(SAT_LABEL_PATH):
    with open(SAT_LABEL_PATH, "r", encoding='utf8') as f:
        sat_label_dic = json.load(f)
else:
    sat_label_dic = {}

# ft 标注结果初始化
if os.path.exists(FT_LABEL_PATH):
    with open(FT_LABEL_PATH, "r", encoding='utf8') as f:
        ft_label_dic = json.load(f)
else:
    ft_label_dic = {}

# 新建文件夹
base_sources = [
    VC_UPLOAD_PATH,
    VC_OUT_PATH,
    FT_UPLOAD_PATH,
    FT_OUT_PATH,
    FT_DEFAULT_PATH,
    SAT_UPLOAD_PATH,
    SAT_OUT_PATH,
]
for path in base_sources:
    os.makedirs(path, exist_ok=True)
#####################################################################
########################### APP初始化  ###############################
#####################################################################
app = FastAPI()

######################################################################
########################### 接口类型  #################################
#####################################################################


# 接口结构
class VcBase(BaseModel):
    wavName: str
    wavPath: str


class VcBaseText(BaseModel):
    wavName: str
    wavPath: str
    text: str
    func: str


class VcBaseSAT(BaseModel):
    old_str: str
    new_str: str
    language: str
    function: str
    wav: str  # base64编码
    filename: str


class FTPath(BaseModel):
    dataPath: str


class VcBaseFT(BaseModel):
    wav: str  # base64编码
    filename: str
    wav_path: str


class VcBaseFTModel(BaseModel):
    wav_path: str


class VcBaseFTSyn(BaseModel):
    exp_path: str
    text: str


######################################################################
########################### 文件列表查询与保存服务 #################################
#####################################################################


def getVCList(path):
    VC_FileDict = []
    # 查询upload路径下的wav文件名
    for root, dirs, files in os.walk(path, topdown=False):
        for name in files:
            # print(os.path.join(root, name))
            VC_FileDict.append({'name': name, 'path': os.path.join(root, name)})
    VC_FileDict = sorted(VC_FileDict, key=lambda x: x['name'], reverse=True)
    return VC_FileDict


async def saveFiles(files, SavePath):
    right = 0
    error = 0
    error_info = "错误文件："
    for file in files:
        try:
            if 'blob' in file.filename:
                out_file_path = os.path.join(
                    SavePath,
                    datetime.datetime.strftime(datetime.datetime.now(),
                                               '%H%M') + randName(3) + ".wav")
            else:
                out_file_path = os.path.join(SavePath, file.filename)

            print("上传文件名:", out_file_path)
            async with aiofiles.open(out_file_path, 'wb') as out_file:
                content = await file.read()  # async read
                await out_file.write(content)  # async write
            # 将文件转成24k, 16bit类型的wav文件
            wav, sr = librosa.load(out_file_path, sr=16000)
            sf.write(out_file_path, data=wav, samplerate=sr)
            right += 1
        except Exception as e:
            error += 1
            error_info = error_info + file.filename + " " + str(e) + "\n"
            continue
    return f"上传成功：{right}, 上传失败：{error}, 失败原因： {error_info}"


# 音频下载
@app.post("/vc/download")
async def VcDownload(base: VcBase):
    if os.path.exists(base.wavPath):
        return FileResponse(base.wavPath)
    else:
        return ErrorRequest(message="下载请求失败，文件不存在")


# 音频下载base64
@app.post("/vc/download_base64")
async def VcDownloadBase64(base: VcBase):
    if os.path.exists(base.wavPath):
        # 将文件转成16k, 16bit类型的wav文件
        wav, sr = librosa.load(base.wavPath, sr=16000)
        wav = float2pcm(wav)  # float32 to int16
        wav_bytes = wav.tobytes()  # to bytes
        wav_base64 = base64.b64encode(wav_bytes).decode('utf8')
        return SuccessRequest(result=wav_base64)
    else:
        return ErrorRequest(message="播放请求失败，文件不存在")


######################################################################
########################### VC 服务 #################################
#####################################################################


# 上传文件
@app.post("/vc/upload")
async def VcUpload(files: List[UploadFile]):
    # res = saveFiles(files, VC_UPLOAD_PATH)
    right = 0
    error = 0
    error_info = "错误文件："
    for file in files:
        try:
            if 'blob' in file.filename:
                out_file_path = os.path.join(
                    VC_UPLOAD_PATH,
                    datetime.datetime.strftime(datetime.datetime.now(),
                                               '%H%M') + randName(3) + ".wav")
            else:
                out_file_path = os.path.join(VC_UPLOAD_PATH, file.filename)

            print("上传文件名:", out_file_path)
            async with aiofiles.open(out_file_path, 'wb') as out_file:
                content = await file.read()  # async read
                await out_file.write(content)  # async write
            # 将文件转成24k, 16bit类型的wav文件
            wav, sr = librosa.load(out_file_path, sr=16000)
            sf.write(out_file_path, data=wav, samplerate=sr)
            right += 1
        except Exception as e:
            error += 1
            error_info = error_info + file.filename + " " + str(e) + "\n"
            continue
    return SuccessRequest(
        result=f"上传成功：{right}, 上传失败：{error}, 失败原因： {error_info}")


# 获取文件列表
@app.get("/vc/list")
async def VcList():
    res = getVCList(VC_UPLOAD_PATH)
    return SuccessRequest(result=res)


# 获取音频文件
@app.post("/vc/file")
async def VcFileGet(base: VcBase):
    if os.path.exists(base.wavPath):
        return FileResponse(base.wavPath)
    else:
        return ErrorRequest(result="获取文件失败")


# 删除音频文件
@app.post("/vc/del")
async def VcFileDel(base: VcBase):
    if os.path.exists(base.wavPath):
        os.remove(base.wavPath)
        return SuccessRequest(result="删除成功")
    else:
        return ErrorRequest(result="删除失败")


# 声音克隆G2P
@app.post("/vc/clone_g2p")
async def VcCloneG2P(base: VcBaseText):
    if os.path.exists(base.wavPath):
        try:
            if base.func == 'ge2e':
                wavName = base.wavName
                wavPath = os.path.join(VC_OUT_PATH, wavName)
                wavPath = vc_model.vc(
                    text=base.text, input_wav=base.wavPath, out_wav=wavPath)
            else:
                wavName = base.wavName
                wavPath = os.path.join(VC_OUT_PATH, wavName)
                wavPath = vc_model_tdnn.vc(
                    text=base.text, input_wav=base.wavPath, out_wav=wavPath)
            if wavPath:
                res = {"wavName": wavName, "wavPath": wavPath}
                return SuccessRequest(result=res)
            else:
                return ErrorRequest(message="克隆失败，检查克隆脚本是否有效")
        except Exception as e:
            print(e)
            return ErrorRequest(message="克隆失败，合成过程报错")
    else:
        return ErrorRequest(message="克隆失败，音频不存在")


######################################################################
########################### SAT 服务 #################################
#####################################################################
# 声音克隆SAT
@app.post("/vc/clone_sat")
async def VcCloneSAT(base: VcBaseSAT):
    # 重新整理 sat_label_dict
    if base.filename not in sat_label_dic or sat_label_dic[
            base.filename] != base.old_str:
        sat_label_dic[base.filename] = base.old_str
        with open(SAT_LABEL_PATH, "w", encoding='utf8') as f:
            json.dump(sat_label_dic, f, ensure_ascii=False, indent=4)

    input_file_path = base.wav

    # 选择任务
    if base.language == "zh":
        # 中文
        if base.function == "synthesize":
            output_file_path = os.path.join(SAT_OUT_PATH,
                                            "sat_syn_zh_" + base.filename)
            # 中文克隆
            sat_result = sat_model.zh_synthesize_edit(
                old_str=base.old_str,
                new_str=base.new_str,
                input_name=os.path.realpath(input_file_path),
                output_name=os.path.realpath(output_file_path),
                task_name="synthesize")
        elif base.function == "edit":
            output_file_path = os.path.join(SAT_OUT_PATH,
                                            "sat_edit_zh_" + base.filename)
            # 中文语音编辑
            sat_result = sat_model.zh_synthesize_edit(
                old_str=base.old_str,
                new_str=base.new_str,
                input_name=os.path.realpath(input_file_path),
                output_name=os.path.realpath(output_file_path),
                task_name="edit")
        elif base.function == "crossclone":
            output_file_path = os.path.join(SAT_OUT_PATH,
                                            "sat_cross_zh_" + base.filename)
            # 中文跨语言
            sat_result = sat_model.crossclone(
                old_str=base.old_str,
                new_str=base.new_str,
                input_name=os.path.realpath(input_file_path),
                output_name=os.path.realpath(output_file_path),
                source_lang="zh",
                target_lang="en")
        else:
            return ErrorRequest(
                message="请检查功能选项是否正确，仅支持:synthesize, edit, crossclone")
    elif base.language == "en":
        if base.function == "synthesize":
            output_file_path = os.path.join(SAT_OUT_PATH,
                                            "sat_syn_zh_" + base.filename)
            # 英文语音克隆
            sat_result = sat_model.en_synthesize_edit(
                old_str=base.old_str,
                new_str=base.new_str,
                input_name=os.path.realpath(input_file_path),
                output_name=os.path.realpath(output_file_path),
                task_name="synthesize")
        elif base.function == "edit":
            output_file_path = os.path.join(SAT_OUT_PATH,
                                            "sat_edit_zh_" + base.filename)
            # 英文语音编辑
            sat_result = sat_model.en_synthesize_edit(
                old_str=base.old_str,
                new_str=base.new_str,
                input_name=os.path.realpath(input_file_path),
                output_name=os.path.realpath(output_file_path),
                task_name="edit")
        elif base.function == "crossclone":
            output_file_path = os.path.join(SAT_OUT_PATH,
                                            "sat_cross_zh_" + base.filename)
            # 英文跨语言
            sat_result = sat_model.crossclone(
                old_str=base.old_str,
                new_str=base.new_str,
                input_name=os.path.realpath(input_file_path),
                output_name=os.path.realpath(output_file_path),
                source_lang="en",
                target_lang="zh")
        else:
            return ErrorRequest(
                message="请检查功能选项是否正确，仅支持:synthesize, edit, crossclone")
    else:
        return ErrorRequest(message="请检查功能选项是否正确，仅支持中文和英文")

    if sat_result:
        return SuccessRequest(result=sat_result, message="SAT合成成功")
    else:
        return ErrorRequest(message="SAT 合成失败，请从后台检查错误信息！")


# SAT 文件列表
@app.get("/sat/list")
async def SatList():
    res = []
    filelist = getVCList(SAT_UPLOAD_PATH)
    for fileitem in filelist:
        if fileitem['name'] in sat_label_dic:
            fileitem['label'] = sat_label_dic[fileitem['name']]
        else:
            fileitem['label'] = ""
        res.append(fileitem)
    return SuccessRequest(result=res)


# 上传 SAT 音频
# 上传文件
@app.post("/sat/upload")
async def SATUpload(files: List[UploadFile]):
    right = 0
    error = 0
    error_info = "错误文件："
    for file in files:
        try:
            if 'blob' in file.filename:
                out_file_path = os.path.join(
                    SAT_UPLOAD_PATH,
                    datetime.datetime.strftime(datetime.datetime.now(),
                                               '%H%M') + randName(3) + ".wav")
            else:
                out_file_path = os.path.join(SAT_UPLOAD_PATH, file.filename)

            print("上传文件名:", out_file_path)
            async with aiofiles.open(out_file_path, 'wb') as out_file:
                content = await file.read()  # async read
                await out_file.write(content)  # async write
            # 将文件转成24k, 16bit类型的wav文件
            wav, sr = librosa.load(out_file_path, sr=16000)
            sf.write(out_file_path, data=wav, samplerate=sr)
            right += 1
        except Exception as e:
            error += 1
            error_info = error_info + file.filename + " " + str(e) + "\n"
            continue
    return SuccessRequest(
        result=f"上传成功：{right}, 上传失败：{error}, 失败原因： {error_info}")


######################################################################
########################### FinueTune 服务 #################################
#####################################################################


# finetune 文件列表
@app.post("/finetune/list")
async def FineTuneList(Path: FTPath):
    dataPath = Path.dataPath
    if dataPath == "default":
        # 默认路径
        FT_PATH = FT_DEFAULT_PATH
    else:
        FT_PATH = dataPath

    res = []
    filelist = getVCList(FT_PATH)
    for name, value in ft_label_dic.items():
        wav_path = os.path.join(FT_PATH, name)
        if not os.path.exists(wav_path):
            wav_path = ""
        d = {'text': value['text'], 'name': name, 'path': wav_path}
        res.append(d)
    return SuccessRequest(result=res)


# 一键重置，获取新的文件地址
@app.get('/finetune/newdir')
async def FTGetNewDir():
    new_path = os.path.join(FT_UPLOAD_PATH, randName(3))
    if not os.path.exists(new_path):
        os.makedirs(new_path, exist_ok=True)
    # 把 labels.txt 复制进去
    cmd = f"cp {FT_LABEL_TXT_PATH} {new_path}"
    os.system(cmd)
    return SuccessRequest(result=new_path)


# finetune 上传文件
@app.post("/finetune/upload")
async def FTUpload(base: VcBaseFT):
    try:
        # 文件夹是否存在
        if not os.path.exists(base.wav_path):
            os.makedirs(base.wav_path)
        # 保存音频文件
        out_file_path = os.path.join(base.wav_path, base.filename)
        wav_b = base64.b64decode(base.wav)
        async with aiofiles.open(out_file_path, 'wb') as out_file:
            await out_file.write(wav_b)  # async write

        return SuccessRequest(result="上传成功")
    except Exception as e:
        return ErrorRequest(result="上传失败")


# finetune 微调
@app.post("/finetune/clone_finetune")
async def FTModel(base: VcBaseFTModel):
    # 先检查 wav_path 是否有效
    if base.wav_path == 'default':
        data_path = FT_DEFAULT_PATH
    else:
        data_path = base.wav_path
    if not os.path.exists(data_path):
        return ErrorRequest(message="数据文件夹不存在")

    data_base = data_path.split(os.sep)[-1]
    exp_dir = os.path.join(FT_EXP_BASE_PATH, data_base)
    try:
        exp_dir = ft_model.finetune(
            input_dir=os.path.realpath(data_path),
            exp_dir=os.path.realpath(exp_dir))
        if exp_dir:
            return SuccessRequest(result=exp_dir)
        else:
            return ErrorRequest(message="微调失败")
    except Exception as e:
        print(e)
        return ErrorRequest(message="微调失败")


# finetune 合成
@app.post("/finetune/clone_finetune_syn")
async def FTSyn(base: VcBaseFTSyn):
    try:
        if not os.path.exists(base.exp_path):
            return ErrorRequest(result="模型路径不存在")
        wav_name = randName(5)
        wav_path = ft_model.synthesize(
            text=base.text,
            wav_name=wav_name,
            out_wav_dir=os.path.realpath(FT_OUT_PATH),
            exp_dir=os.path.realpath(base.exp_path))
        if wav_path:
            res = {"wavName": wav_name + ".wav", "wavPath": wav_path}
            return SuccessRequest(result=res)
        else:
            return ErrorRequest(message="音频合成失败")
    except Exception as e:
        return ErrorRequest(message="音频合成失败")


if __name__ == '__main__':
    uvicorn.run(app=app, host='0.0.0.0', port=port)


================================================
FILE: demos/speech_web/web_client/.gitignore
================================================
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*

node_modules
dist
dist-ssr
*.local

# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
.DS_Store
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
.vscode/*


================================================
FILE: demos/speech_web/web_client/index.html
================================================
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <link rel="icon" href="/favicon.ico" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>飞桨PaddleSpeech</title>
  </head>
  <body>
    <div id="app"></div>
    <script type="module" src="/src/main.js"></script>
  </body>
</html>


================================================
FILE: demos/speech_web/web_client/package.json
================================================
{
  "name": "paddlespeechwebclient",
  "private": true,
  "version": "0.0.0",
  "scripts": {
    "dev": "vite",
    "build": "vite build",
    "preview": "vite preview"
  },
  "dependencies": {
    "@element-plus/icons-vue": "^2.0.9",
    "ant-design-vue": "^2.2.8",
    "axios": "^1.8.2",
    "element-plus": "^2.1.9",
    "js-audio-recorder": "0.5.7",
    "lamejs": "^1.2.1",
    "less": "^4.1.2",
    "vue": "^3.2.25"
  },
  "devDependencies": {
    "@vitejs/plugin-vue": "^2.3.0",
    "vite": "^2.9.13",
    "@vue/compiler-sfc": "^3.1.0"
  }
}


================================================
FILE: demos/speech_web/web_client/src/App.vue
================================================
<script setup>
import Experience from './components/Experience.vue'
import Header from './components/Content/Header/Header.vue'
</script>

<template>
  <div class="app">
    <Header></Header>
    <Experience></Experience>
  </div>


</template>

<style style="less">
.app {
  background: url("assets/image/在线体验-背景@2x.png") no-repeat;
};
</style>


================================================
FILE: demos/speech_web/web_client/src/api/API.js
================================================
export const apiURL =   {
    ASR_OFFLINE : '/api/asr/offline',           // 获取离线语音识别结果
    ASR_COLLECT_ENV : '/api/asr/collectEnv',    // 采集环境噪音
    ASR_STOP_RECORD : '/api/asr/stopRecord',    // 后端暂停录音
    ASR_RESUME_RECORD : '/api/asr/resumeRecord',// 后端恢复录音

    NLP_CHAT : '/api/nlp/chat',                 // NLP闲聊接口
    NLP_IE : '/api/nlp/ie',                     // 信息抽取接口

    TTS_OFFLINE : '/api/tts/offline',           // 获取TTS音频

    VPR_RECOG : '/api/vpr/recog',               // 声纹识别接口，返回声纹对比相似度
    VPR_ENROLL : '/api/vpr/enroll',             // 声纹识别注册接口
    VPR_LIST : '/api/vpr/list',                 // 获取声纹注册的数据列表
    VPR_DEL : '/api/vpr/del',                   // 删除用户声纹
    VPR_DATA : '/api/vpr/database64?vprId=',            // 获取声纹注册数据 bs64格式

    // websocket
    CHAT_SOCKET_RECORD: 'ws://localhost:8010/ws/asr/offlineStream', // ChatBot websocket 接口
    ASR_SOCKET_RECORD: 'ws://localhost:8010/ws/asr/onlineStream',  // Stream ASR 接口
    TTS_SOCKET_RECORD: 'ws://localhost:8010/ws/tts/online', // Stream TTS 接口

    // voice clone
    // Voice Clone
    VC_List: '/api/vc/list',
    SAT_List: '/api/sat/list',
    FineTune_List: '/api/finetune/list',

    VC_Upload: '/api/vc/upload',
    SAT_Upload: '/api/sat/upload',
    FineTune_Upload: '/api/finetune/upload',
    FineTune_NewDir: '/api/finetune/newdir',

    VC_Download: '/api/vc/download',
    VC_Download_Base64: '/api/vc/download_base64',
    VC_Del: '/api/vc/del',
    
    VC_CloneG2p: '/api/vc/clone_g2p',
    VC_CloneSAT: '/api/vc/clone_sat',
    VC_CloneFineTune: '/api/finetune/clone_finetune',
    VC_CloneFineTuneSyn: '/api/finetune/clone_finetune_syn',
}


================================================
FILE: demos/speech_web/web_client/src/api/ApiASR.js
================================================
import axios from 'axios'
import {apiURL} from "./API.js"

// 上传音频文件，获得识别结果
export async function asrOffline(params){
    const result = await axios.post(
        apiURL.ASR_OFFLINE, params
    )
    return result
}

// 上传环境采集文件
export async function asrCollentEnv(params){
    const result = await axios.post(
        apiURL.ASR_OFFLINE, params
    )
    return result
}

// 暂停录音
export async function asrStopRecord(){
    const result = await axios.get(apiURL.ASR_STOP_RECORD);
    return result
}

// 恢复录音
export async function asrResumeRecord(){
    const result = await axios.get(apiURL.ASR_RESUME_RECORD);
    return result
}

================================================
FILE: demos/speech_web/web_client/src/api/ApiNLP.js
================================================
import axios from 'axios'
import {apiURL} from "./API.js"

// 获取闲聊对话结果
export async function nlpChat(text){
    const result = await axios.post(apiURL.NLP_CHAT, { chat : text});
    return result
}

// 获取信息抽取结果
export async function nlpIE(text){
    const result = await axios.post(apiURL.NLP_IE, { chat : text});
    return result
}


================================================
FILE: demos/speech_web/web_client/src/api/ApiTTS.js
================================================
import axios from 'axios'
import {apiURL} from "./API.js"

export async function ttsOffline(text){
    const result = await axios.post(apiURL.TTS_OFFLINE, { text : text});
    return result
}


================================================
FILE: demos/speech_web/web_client/src/api/ApiVC.js
================================================
import axios from 'axios'
import {apiURL} from "./API.js"

// 上传音频-vc
export async function vcUpload(params){
    const result = await axios.post(apiURL.VC_Upload, params);
    return result
}

// 上传音频-sat
export async function satUpload(params){
    const result = await axios.post(apiURL.SAT_Upload, params);
    return result
}

// 上传音频-finetune
export async function fineTuneUpload(params){
    const result = await axios.post(apiURL.FineTune_Upload, params);
    return result
}

// 删除音频
export async function vcDel(params){
    const result = await axios.post(apiURL.VC_Del, params);
    return result
}

// 获取音频列表vc
export async function vcList(){
    const result = await axios.get(apiURL.VC_List);
    return result
}
// 获取音频列表Sat
export async function satList(){
    const result = await axios.get(apiURL.SAT_List);
    return result
}

// 获取音频列表fineTune
export async function fineTuneList(params){
    const result = await axios.post(apiURL.FineTune_List, params);
    return result
}

// fineTune 一键重置 获取新的文件夹
export async function fineTuneNewDir(){
    const result = await axios.get(apiURL.FineTune_NewDir);
    return result
}

// 获取音频数据
export async function vcDownload(params){
    const result = await axios.post(apiURL.VC_Download, params);
    return result
}

// 获取音频数据Base64
export async function vcDownloadBase64(params){
    const result = await axios.post(apiURL.VC_Download_Base64, params);
    return result
}


// 克隆合成G2P
export async function vcCloneG2P(params){
    const result = await axios.post(apiURL.VC_CloneG2p, params);
    return result
}

// 克隆合成SAT
export async function vcCloneSAT(params){
    const result = await axios.post(apiURL.VC_CloneSAT, params);
    return result
}

// 克隆合成 - finetune 微调
export async function vcCloneFineTune(params){
    const result = await axios.post(apiURL.VC_CloneFineTune, params);
    return result
}

// 克隆合成 - finetune 合成
export async function vcCloneFineTuneSyn(params){
    const result = await axios.post(apiURL.VC_CloneFineTuneSyn, params);
    return result
}


================================================
FILE: demos/speech_web/web_client/src/api/ApiVPR.js
================================================
import axios from 'axios'
import {apiURL} from "./API.js"

// 注册声纹
export async function vprEnroll(params){
    const result = await axios.post(apiURL.VPR_ENROLL, params);
    return result
}

// 声纹识别
export async function vprRecog(params){
    const result = await axios.post(apiURL.VPR_RECOG, params);
    return result
}

// 删除声纹
export async function vprDel(params){
    const result = await axios.post(apiURL.VPR_DEL, params);
    return result
}

// 获取声纹列表
export async function vprList(){
    const result = await axios.get(apiURL.VPR_LIST);
    return result
}

// 获取声纹音频
export async function vprData(params){
    const result = await axios.get(apiURL.VPR_DATA+params);
    return result
}


================================================
FILE: demos/speech_web/web_client/src/components/Content/Header/Header.vue
================================================
<template>
<div className="speech_header">
      <div className="speech_header_title">
        飞桨-PaddleSpeech
      </div>
      <div className="speech_header_describe">
        PaddleSpeech 是基于飞桨 PaddlePaddle 的语音方向的开源模型库，用于语音和音频中的各种关键任务的开发。支持语音识别，语音合成，声纹识别，声音分类，语音唤醒，语音翻译等多种语音任务，荣获 NAACL2022 Best Demo Award 。如果你喜欢这个示例，欢迎在 github 中 star 收藏鼓励。
      </div>
      <div className="speech_header_link_box">
        <a href="https://github.com/PaddlePaddle/PaddleSpeech" className="speech_header_link"  target='_blank' rel='noreferrer' key={index}>
            前往Github
        </a>
      </div>
    </div>

</template>

<script>
export default {
    name:"Header"
}
</script>

<style lang="less" scoped>
@import "./style.less";
</style>

================================================
FILE: demos/speech_web/web_client/src/components/Content/Header/style.less
================================================
.speech_header {
    width: 1200px;
    margin: 0 auto;
    padding-top: 50px;
    // background: url("../../../assets/image/在线体验-背景@2x.png") no-repeat;
    box-sizing: border-box;
    &::after {
        content: "";
        display: block;
        clear: both;
        visibility: hidden;
    }

    ;

    // background: pink;
    .speech_header_title {
        height: 57px;
        font-family: PingFangSC-Medium;
        font-size: 38px;
        color: #000000;
        letter-spacing: 0;
        line-height: 57px;
        font-weight: 500;
        margin-bottom: 15px;
    }

    ;

    .speech_header_describe {
        height: 26px;
        font-family: PingFangSC-Regular;
        font-size: 16px;
        color: #575757;
        line-height: 26px;
        font-weight: 400;
        margin-bottom: 24px;
    }

    ;
    .speech_header_link_box {
        height: 40px;
        margin-bottom: 40px;
        display: flex;
        align-items: center;
        margin-top: 40px;
    };
    .speech_header_link {
        display: block;
        background: #2932E1;
        width: 120px;
        height: 40px;
        line-height: 40px;
        border-radius: 20px;
        font-family: PingFangSC-Medium;
        font-size: 14px;
        color: #FFFFFF;
        text-align: center;
        font-weight: 500;
        margin-right: 20px;
        // margin-bottom: 40px;

        &:hover {
            opacity: 0.9;
        }

        ;
    }

    ;

    .speech_header_divider {
        width: 1200px;
        height: 1px;
        background: #D1D1D1;
        margin-bottom: 40px;
    }

    ;

    .speech_header_content_wrapper {
        width: 1200px;
        margin: 0 auto;
        // background: pink;
        margin-bottom: 20px;
        display: flex;
        justify-content: space-between;
        flex-wrap: wrap;

        .speech_header_module {
            width: 384px;
            background: #FFFFFF;
            border: 1px solid rgba(224, 224, 224, 1);
            box-shadow: 4px 8px 12px 0px rgba(0, 0, 0, 0.05);
            border-radius: 16px;
            padding: 30px 34px 0px 34px;
            box-sizing: border-box;
            display: flex;
            margin-bottom: 40px;
            .speech_header_background_img {
                width: 46px;
                height: 46px;
                background-size: 46px 46px;
                background-repeat: no-repeat;
                background-position: center;
                margin-right: 20px;
            }

            ;

            .speech_header_content {
                padding-top: 4px;
                margin-bottom: 32px;
               
                .speech_header_module_title {
                    height: 26px;
                    font-family: PingFangSC-Medium;
                    font-size: 20px;
                    color: #000000;
                    letter-spacing: 0;
                    line-height: 26px;
                    font-weight: 500;
                    margin-bottom: 10px;
                }

                ;

                .speech_header_module_introduce {
                    font-family: PingFangSC-Regular;
                    font-size: 16px;
                    color: #666666;
                    letter-spacing: 0;
                    font-weight: 400;
                }

                ;
            }

            ;
        }

        ;
    }

    ;
}

;


================================================
FILE: demos/speech_web/web_client/src/components/Content/Tail/Tail.vue
================================================


================================================
FILE: demos/speech_web/web_client/src/components/Content/Tail/style.less
================================================


================================================
FILE: demos/speech_web/web_client/src/components/Experience.vue
================================================
<script setup>

import ChatT from './SubMenu/ChatBot/ChatT.vue'
import ASRT from './SubMenu/ASR/ASRT.vue'
import TTST from './SubMenu/TTS/TTST.vue'
import VPRT from './SubMenu/VPR/VPRT.vue'
import IET from './SubMenu/IE/IET.vue'

import VoiceCloneT from './SubMenu/VoiceClone/VoiceClone.vue'
import ERNIE_SATT from './SubMenu/ERNIE_SAT/ERNIE_SAT.vue'
import FineTuneT from './SubMenu/FineTune/FineTune.vue'

</script>

<template>
    <div className="experience">
      <div className="experience_wrapper">
        <div className="experience_title">
          功能体验
        </div>
        <div className="experience_describe">
          体验前，请允许浏览器获取麦克风权限
        </div>
        <div className="experience_content" >
          <el-tabs
            className="experience_tabs"
            type="border-card"
          >
            <el-tab-pane label="语音聊天" key="1">
              <ChatT></ChatT>
            </el-tab-pane>
            <el-tab-pane label="声纹识别" key="2">
             <VPRT></VPRT>
            </el-tab-pane>
            <el-tab-pane label="语音识别" key="3">
            <ASRT></ASRT>
            </el-tab-pane>
            <el-tab-pane label="语音合成" key="4">
            <TTST></TTST>
            </el-tab-pane>
            <el-tab-pane label="语音指令" key="5">
            <IET></IET>
            </el-tab-pane>
            <el-tab-pane label="一句话合成" key="6">
            <VoiceCloneT></VoiceCloneT>
            </el-tab-pane>
            <el-tab-pane label="小数据微调" key="7">
            <FineTuneT></FineTuneT>
            </el-tab-pane>
            <el-tab-pane label="ERNIE-SAT" key="8">
            <ERNIE_SATT></ERNIE_SATT>
            </el-tab-pane>
          </el-tabs>
        </div>
      </div>
    </div>
</template>


<style lang="less">
@import "./style.less";

</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ASR/ASR.vue
================================================
<template>
    <div class="asrbox">
        <h5> ASR 体验</h5>
        <div class="home" style="margin:1vw;">
            <el-button :type="recoType" @click="startRecorderChunk()"  style="margin:1vw;">{{ recoText }} (流式)</el-button>
            <el-button :type="recoType" @click="startRecorder()"  style="margin:1vw;">{{ recoText }} (端到端)</el-button>
      </div>
      <a> asr_stream: {{ streamAsrResult }}</a>
      <br>
      <a> asr_offline: {{ asrResultOffline }} </a>

    </div>
</template>

<script>
import Recorder from 'js-audio-recorder'

const recorder_chunk = new Recorder({
  sampleBits: 16,                 // 采样位数，支持 8 或 16，默认是16
  sampleRate: 16000,              // 采样率，支持 11025、16000、22050、24000、44100、48000，根据浏览器默认值，我的chrome是48000
  numChannels: 1,                 // 声道，支持 1 或 2， 默认是1
  compiling: true
})

const recorder = new Recorder({
  sampleBits: 16,                 // 采样位数，支持 8 或 16，默认是16
  sampleRate: 16000,              // 采样率，支持 11025、16000、22050、24000、44100、48000，根据浏览器默认值，我的chrome是48000
  numChannels: 1,                 // 声道，支持 1 或 2， 默认是1
  compiling: true
})

    export default {
        name: "ASR",
        data(){
            return {
                streamAsrResult: '',
                recoType: "primary",
                recoText: "开始录音",
                playType: "success",
                asrResultOffline: '',
                onReco: false,
                ws:'',
            }
        },
        mounted (){
            // 初始化ws
            this.ws = new WebSocket("ws://localhost:8010/ws/asr/onlineStream")
            // 定义消息处理逻辑
            var _that = this
            this.ws.addEventListener('message', function (event) {
                var temp = JSON.parse(event.data);
                // console.log('ws message', event.data)
                if(temp.result && (temp.result != _that.streamAsrResult)){
                    _that.streamAsrResult = temp.result
                    _that.$nextTick(()=>{})
                    console.log('更新了')
                }                
            })
        },

        methods: {
            startRecorder () {
                if(!this.onReco){
                    recorder.clear()
                    recorder.start().then(() => {
                    }, (error) => {
                    console.log("录音出错");
                })
                this.onReco = true
                this.recoType = "danger"
                this.recoText = "结束录音"
                this.$nextTick(()=>{
                })
                } else {
                // 结束录音
                    recorder.stop()
                    this.onReco = false
                    this.recoType = "primary"
                    this.recoText = "开始录音"
                    this.$nextTick(()=>{})
                    // 音频导出成wav,然后上传到服务器
                    const wavs = recorder.getWAVBlob()
                    this.uploadFile(wavs, "/api/asr/offline")
                }
            },


            startRecorderChunk() {
                if(!this.onReco){
                    // 跟后端说：开始流式传输
                    var start = JSON.stringify({name:"test.wav", "nbest":5, signal:"start"})
                    this.ws.send(start)
                    recorder_chunk.start().then(() => {
                        setInterval(() => {
                        // 持续录音
                        let newData = recorder_chunk.getNextData();
                        if (!newData.length) {
                            return;
                        }
                        // 上传到流式测试1
                        this.uploadChunk(newData)
                        }, 500)
                    }, (error) => {
                    console.log("录音出错");
                    })
                    this.onReco = true
                    this.recoType = "danger"
                    this.recoText = "结束录音"
                    this.$nextTick(()=>{
                    })
                } else {
                    // 结束录音
                    recorder_chunk.stop()
                    // 跟后端说不录了
                    // var end = JSON.stringify({name:"test.wav", "nbest":5, signal:"end"})
                    // this.ws.send(end)
                    this.onReco = false
                    this.recoType = "primary"
                    this.recoText = "开始录音"
                    this.$nextTick(()=>{})
                    recorder_chunk.clear()
                }
            },
            uploadChunk(chunkDatas){
                chunkDatas.forEach((chunkData) => {
                this.ws.send(chunkData)
              })
            },
            async uploadFile(file, post_url){
                const formData = new FormData()
                formData.append('files', file)
                const result = await this.$http.post(post_url, formData);
                if (result.data.code === 0) {
                    this.asrResultOffline = result.data.result
                    this.$nextTick(()=>{})
                    this.$message.success(result.data.message);
                } else {
                    this.$message.error(result.data.message);
                }
            },
        },
    }
</script>

<style lang='less' scoped>
 .asrbox {
  border: 4px solid #F00;
//   position: fixed;
  top:40%;
  width: 100%;
  height: 20%;
  overflow: auto;
 }
</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ASR/ASRT.vue
================================================
<script setup>
import AudioFileIdentification from "./AudioFile/AudioFileIdentification.vue"
import RealTime from "./RealTime/RealTime.vue"
import EndToEndIdentification from "./EndToEnd/EndToEndIdentification.vue";


</script>

<template>
    <div class="speech_recognition">
      <div class="speech_recognition_tabs">
        <div class="frame"></div>
        <el-tabs class="speech_recognition_mytabs" type="border-card">
          <el-tab-pane label="实时语音识别" key="1">
            <RealTime />
          </el-tab-pane>
          <el-tab-pane label="端到端识别" key="2">
            <EndToEndIdentification />
          </el-tab-pane>
          <el-tab-pane label="音频文件识别" key="3">
            <AudioFileIdentification />
          </el-tab-pane>
        </el-tabs>
      </div>
    </div>
</template>

<script>

export default {
    
}
</script>

<style lang="less" scoped>
@import "./style.less";

</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ASR/AudioFile/AudioFileIdentification.vue
================================================
<template>
        <div class="audioFileIdentification">

      
        <div v-if="uploadStatus === 0" class="public_recognition_speech">
            <!-- 上传前 -->
            <el-upload
                :multiple="false"
                :accept="'.wav'"
                :limit="1"
                :auto-upload="false"
                :on-change="handleChange"
                :show-file-list="false"
            >
                <div class="upload_img">
                <div class="upload_img_back"></div>
                </div>
            </el-upload>
            <div class="speech_text">
                上传文件
            </div>
            <div class="speech_text_prompt">
                支持50秒内的.wav文件
            </div>
        </div>
        <!-- 上传中 -->
        <div v-else-if="uploadStatus === 1" class="on_the_cross_speech">
            <div class="on_the_upload_img">
                <div class="on_the_upload_img_back"></div>
            </div>
            <div class="on_the_speech_text">
                <span class="on_the_speech_loading"> <Spin indicator={antIcon} /></span> 上传中
            </div>
        </div>
        <div v-else>

            <!-- // {/* //开始识别 */} -->
                <div v-if="recognitionStatus === 0" class="public_recognition_speech_start">
                <div class="public_recognition_speech_content">
                  <div
                    class="public_recognition_speech_title"
                  >
                   {{ filename }}
                  </div>
                  <div
                    class="public_recognition_speech_again"
                    @click="uploadAgain()"
                  >重新上传</div>
                  <div
                    class="public_recognition_speech_play"
                    @click="paly()"
                  >播放</div>
                </div>
                <div class="speech_promp"
                    @click="beginToIdentify()">
                  开始识别
                </div>
                </div>
                <!-- //  {/* 识别中 */} -->
                <div v-else-if="recognitionStatus === 1" class="public_recognition_speech_identify">
                <div class="public_recognition_speech_identify_box">
                <div
                    class="public_recognition_speech_identify_back_img"
                  > 
                    <a-spin  />
                  </div> 

                  <div
                    class="public_recognition__identify_the_promp"
                  >识别中</div>
        
                </div>
                </div>
              
                <!-- //  {/* // 重新识别 */} -->
              <div v-else class="public_recognition_speech_identify_ahain">
              <div class="public_recognition_speech_identify_box_btn">
              
                <div
                  class="public_recognition__identify_the_btn"
                  @click="toIdentifyThe()"
                >重新识别</div>
      
              </div>
                </div>
            
        </div>

      <!-- {/* 指向 */} -->
      <div class="public_recognition_point_to">

      </div>
      <!-- {/* 识别结果 */} -->
      <div class="public_recognition_result">
        <div>识别结果</div>
        <div>{{ asrResult }}</div>
      </div>
    </div>
</template>

<script>
import { asrOffline } from '../../../../api/ApiASR'

let audioCtx = new AudioContext({
  latencyHint: 'interactive',
  sampleRate: 24000,
});

export default {
    name:"",
    data(){
        return {
            uploadStatus : 0,
            recognitionStatus : 0,
            asrResult : "",
            indicator : "",
            
            filename: "",
            upfile: ""

        }
    },

    methods:{
        // 上传文件切换
        handleChange(file, fileList){
            this.uploadStatus = 2
            this.filename = file.name
            this.upfile = file
            console.log(file)
            
            // debugger
            // var result = Buffer.from(file);

            
        },
        readFile(file) {
            return new Promise((resolve, reject) => {
                const fileReader = new FileReader();
                fileReader.onload = function () {
                    resolve(fileReader);
                };
                fileReader.onerror = function (err) {
                    reject(err);
                };
                fileReader.readAsDataURL(file);
                });
            },
        // 重新上传
        uploadAgain(){
            this.uploadStatus = 0
            this.upfile = ""
            this.filename = ""
            this.asrResult = ""
        },

        // 播放音频
        playAudioData(wav_buffer){
            audioCtx.decodeAudioData(wav_buffer, buffer => {
                let source = audioCtx.createBufferSource();
                source.buffer = buffer
            
                source.connect(audioCtx.destination);
                source.start();
            }, function (e) {
            });
        },

        // 播放本地音频
        async paly(){
            if(this.upfile){
                let fileRes = ""
                let fileString = ""
                fileRes = await this.readFile(this.upfile.raw);
                fileString = fileRes.result;
                const audioBase64type = (fileString.match(/data:[^;]*;base64,/))?.[0] ?? '';
                const isBase64 = !!fileString.match(/data:[^;]*;base64,/);
                const uploadBase64 = fileString.substr(audioBase64type.length);
                // isBase64 ? uploadBase64 : undefined
                // base转换二进制数
                let typedArray = this.base64ToUint8Array(isBase64 ? uploadBase64 : undefined)
                this.playAudioData(typedArray.buffer)
            }
        },
        base64ToUint8Array(base64String){
           const padding = '='.repeat((4 - base64String.length % 4) % 4);
            const base64 = (base64String + padding)
                .replace(/-/g, '+')
                .replace(/_/g, '/');
        
            const rawData = window.atob(base64);
            const outputArray = new Uint8Array(rawData.length);
        
            for (let i = 0; i < rawData.length; ++i) {
                outputArray[i] = rawData.charCodeAt(i);
            }
            return outputArray; 
        },

        // 开始识别
        async beginToIdentify(){
          // 识别中
          this.recognitionStatus = 1
          
          const formData = new FormData();
          formData.append('files', this.upfile.raw);
          
          const result = await asrOffline(formData)
          // 重新识别
          this.recognitionStatus = 2
          console.log(result);
          // debugger
          if (result.data.code === 0) {

            this.$message.success("识别成功")
            // 获取识别文本
            this.asrResult = result.data.result

          } else {
            this.$message.success("识别失败")
          };
        },

        // 重新识别
        toIdentifyThe(){
          // this.uploadAgain()
          this.uploadStatus = 0
          this.recognitionStatus = 0
          this.asrResult = ""
        }

    }
}   

</script>

<style lang="less" scoped>
@import "./style.less";


</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ASR/AudioFile/style.less
================================================
.audioFileIdentification {
    width: 1106px;
    height: 270px;
    // background-color: pink;
    padding-top: 40px;
    box-sizing: border-box;
    display: flex;
    // 开始上传
    .public_recognition_speech {
        width: 295px;
        height: 230px;
        padding-top: 32px;
        box-sizing: border-box;
        // 开始上传
        .upload_img {
            width: 116px;
            height: 116px;
            background: #2932E1;
            border-radius: 50%;
            margin-left: 98px;
            cursor: pointer;
            margin-bottom: 20px;
            display: flex;
            justify-content: center;
            align-items: center;
            .upload_img_back {
                width: 34.38px;
                height: 30.82px;
                background: #2932E1;
                background: url("../../../../assets/image/ic_大-上传文件.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 34.38px  30.82px;
                cursor: pointer;
            }
            &:hover {
                opacity: 0.9;
            };

        };
      
          
        .speech_text {
                height: 22px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #000000;
                font-weight: 500;
                margin-left: 124px;
                margin-bottom: 10px;
        };
        .speech_text_prompt {
            height: 20px;
            font-family: PingFangSC-Regular;
            font-size: 14px;
            color: #999999;
            font-weight: 400;
            margin-left: 84px;
        };
    };
    // 上传中
    .on_the_cross_speech {
        width: 295px;
        height: 230px;
        padding-top: 32px;
        box-sizing: border-box;
        
        .on_the_upload_img {
            width: 116px;
            height: 116px;
           background: #7278F5;
            border-radius: 50%;
            margin-left: 98px;
            cursor: pointer;
            margin-bottom: 20px;
            display: flex;
            justify-content: center;
            align-items: center;
            
            .on_the_upload_img_back {
                width: 34.38px;
                height: 30.82px;
               background: #7278F5;
                background: url("../../../../assets/image/ic_大-上传文件.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 34.38px  30.82px;
                cursor: pointer;
              
            };
        };
      
          
        .on_the_speech_text {
                height: 22px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #000000;
                font-weight: 500;
                margin-left: 124px;
                margin-bottom: 10px;
                display: flex;
                // justify-content: center;
                align-items: center;
                .on_the_speech_loading {
                    display: inline-block;
                    width: 16px;
                    height: 16px;
                   background: #7278F5;
                    // background: url("../../../../assets/image/ic_开始聊天.svg");
                    // background-repeat: no-repeat;
                    // background-position: center;
                    // background-size: 16px  16px;
                    margin-right: 8px;
                };
        };
    };

    //开始识别
    .public_recognition_speech_start {
        width: 295px;
        height: 230px;
        padding-top: 32px;
        box-sizing: border-box;
        position: relative;
        .public_recognition_speech_content {
        width: 100%;
          position: absolute;
          top: 40px;
          left: 50%;
          transform: translateX(-50%);
          display: flex;
          justify-content: center;
          align-items: center;

            .public_recognition_speech_title {
                height: 22px;
                font-family: PingFangSC-Regular;
                font-size: 16px;
                color: #000000;
                font-weight: 400;
            };
            .public_recognition_speech_again {
                height: 22px;
                font-family: PingFangSC-Regular;
                font-size: 16px;
                color: #2932E1;
                font-weight: 400;
                margin-left: 30px;
                cursor: pointer;
            };
            .public_recognition_speech_play {
                height: 22px;
                font-family: PingFangSC-Regular;
                font-size: 16px;
                color: #2932E1;
                font-weight: 400;
                margin-left: 20px;
                cursor: pointer;
            };
        };
        .speech_promp {
            position: absolute;
            top: 112px;
            left: 50%;
            transform: translateX(-50%);
            width: 142px;
            height: 44px;
            background: #2932E1;
            border-radius: 22px;
            font-family: PingFangSC-Medium;
            font-size: 14px;
            color: #FFFFFF;
            text-align: center;
            line-height: 44px;
            font-weight: 500;
            cursor: pointer;
        };
          
      
    };
    // 识别中
    .public_recognition_speech_identify {
        width: 295px;
        height: 230px;
        padding-top: 32px;
        box-sizing: border-box;
        position: relative;
       .public_recognition_speech_identify_box {
        width: 143px;
        height: 44px;
        background: #7278F5;
        border-radius: 22px;
        position: absolute;
        top: 50%;
        left: 50%;
        transform: translate(-50%,-50%);
        display: flex;
        justify-content: center;
        align-items: center;
        cursor: pointer;
        .public_recognition_speech_identify_back_img {
            width: 16px;
            height: 16px;
        //    background: #7278F5;
        //     background: url("../../../../assets/image/ic_开始聊天.svg");
        //     background-repeat: no-repeat;
        //     background-position: center;
        //     background-size: 16px  16px;
        };
        .public_recognition__identify_the_promp {
            height: 20px;
            font-family: PingFangSC-Medium;
            font-size: 14px;
            color: #FFFFFF;
            font-weight: 500;
            margin-left: 12px;
        };
       };
       

    };
    // 重新识别
    .public_recognition_speech_identify_ahain {
        width: 295px;
        height: 230px;
        padding-top: 32px;
        box-sizing: border-box;
        position: relative;
        cursor: pointer;
       .public_recognition_speech_identify_box_btn {
        width: 143px;
        height: 44px;
        background: #2932E1;
        border-radius: 22px;
        position: absolute;
        top: 50%;
        left: 50%;
        transform: translate(-50%,-50%);
        display: flex;
        justify-content: center;
        align-items: center;
        cursor: pointer;
        .public_recognition__identify_the_btn {
            height: 20px;
            font-family: PingFangSC-Medium;
            font-size: 14px;
            color: #FFFFFF;
            font-weight: 500;
        };
       };
       

    };
    // 指向
    .public_recognition_point_to {
        width: 47px;
        height: 67px;
        background: url("../../../../assets/image/步骤-箭头切图@2x.png") no-repeat;
        background-position: center;
        background-size: 47px 67px;
       margin-top: 91px;
       margin-right: 67px;
    };
    // 识别结果
    .public_recognition_result {
        width: 680px;
        height: 230px;
        background: #FAFAFA;
        padding: 40px 50px 0px 50px;
        div {
            &:nth-of-type(1) {
                height: 26px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #666666;
                line-height: 26px;
                font-weight: 500;
                margin-bottom: 20px;
            };
            &:nth-of-type(2) {
                height: 26px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #666666;
                line-height: 26px;
                font-weight: 500;
            };
        };
    };
};

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ASR/EndToEnd/EndToEndIdentification.vue
================================================
<template>
    <div class="endToEndIdentification">
      <div  class="public_recognition_speech">
      
      <div v-if="onReco"> 
        <!-- 结束录音 -->
        <div @click="endRecorder()" class="endToEndIdentification_end_recorder_img">
              <div class='endToEndIdentification_end_recorder_img_back'></div>
        </div>
      </div>
      <div v-else>
        <div @click="startRecorder()" class="endToEndIdentification_start_recorder_img"></div>
      </div>
      
        <div class="endToEndIdentification_prompt" >
            <div v-if="onReco">
                结束识别
            </div>
            <div v-else>
                开始识别
            </div>
        </div>
        <div class="speech_text_prompt">
            停止录音后得到识别结果
        </div>
      </div>
      <div class="public_recognition_point_to"></div>
      <div class="public_recognition_result">
        <div>识别结果</div>
        <div> {{asrResult}} </div>
      </div>
    </div>
</template>

<script>
import Recorder from 'js-audio-recorder'
import { asrOffline } from '../../../../api/ApiASR'

const recorder = new Recorder({
  sampleBits: 16,                 // 采样位数，支持 8 或 16，默认是16
  sampleRate: 16000,              // 采样率，支持 11025、16000、22050、24000、44100、48000，根据浏览器默认值，我的chrome是48000
  numChannels: 1,                 // 声道，支持 1 或 2， 默认是1
  compiling: true
})

export default {
    data () {
        return {
            onReco: false,
            asrResult: "",
        }
    },
    methods: {
        // 开始录音
        startRecorder(){
            this.onReco = true
            recorder.clear()
            recorder.start()
        },

        // 停止录音
        endRecorder(){
            recorder.stop()
            this.onReco = false
            // this.$nextTick(()=>{})
            // 音频导出成wav,然后上传到服务器
            const wavs = recorder.getWAVBlob()
            this.uploadFile(wavs)
        },
        
        // 上传文件
         async uploadFile(file){
            const formData = new FormData()
            formData.append('files', file)
            const result = await asrOffline(formData)
            if (result.data.code === 0) {
                this.asrResult = result.data.result
                // this.$nextTick(()=>{})
                this.$message.success(result.data.message);
            } else {
                this.$message.error(result.data.message);
            }
        },

    }
    
}
</script>

<style lang="less" scoped>
@import "./style.less";
</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ASR/EndToEnd/style.less
================================================
.endToEndIdentification {
    width: 1106px;
    height: 270px;
    // background-color: pink;
    padding-top: 40px;
    box-sizing: border-box;
    display: flex;
    // 开始识别
    .public_recognition_speech {
        width: 295px;
        height: 230px;
        padding-top: 32px;
        box-sizing: border-box;
        
        .endToEndIdentification_start_recorder_img {
            width: 116px;
            height: 116px;
            background: #2932E1;
            background: url("../../../../assets/image/ic_开始聊天.svg");
            background-repeat: no-repeat;
            background-position: center;
            background-size: 116px 116px;
            margin-left: 98px;
            cursor: pointer;
            margin-bottom: 20px;
            &:hover {
                background: url("../../../../assets/image/ic_开始聊天_hover.svg");

            };

        };

        .endToEndIdentification_end_recorder_img {
            width: 116px;
            height: 116px;
            background: #2932E1;
            border-radius: 50%;
            display: flex;
            justify-content: center;
            align-items: center;
            margin-left: 98px;
            margin-bottom: 20px;
            cursor: pointer;
            .endToEndIdentification_end_recorder_img_back {
                width: 50px;
                height: 50px;
                background: url("../../../../assets/image/ic_大-声音波浪.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 50px 50px;
              
                &:hover {
                    opacity: 0.9;
        
                };
            };
           
        };
        .endToEndIdentification_prompt {
                height: 22px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #000000;
                font-weight: 500;
                margin-left: 124px;
                margin-bottom: 10px;
        };
        .speech_text_prompt {
            height: 20px;
            font-family: PingFangSC-Regular;
            font-size: 14px;
            color: #999999;
            font-weight: 400;
            margin-left: 90px;
        };
    };
    // 指向
    .public_recognition_point_to {
        width: 47px;
        height: 67px;
        background: url("../../../../assets/image/步骤-箭头切图@2x.png") no-repeat;
        background-position: center;
        background-size: 47px 67px;
       margin-top: 91px;
       margin-right: 67px;
    };
    // 识别结果
    .public_recognition_result {
        width: 680px;
        height: 230px;
        background: #FAFAFA;
        padding: 40px 50px 0px 50px;
        div {
            &:nth-of-type(1) {
                height: 26px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #666666;
                line-height: 26px;
                font-weight: 500;
                margin-bottom: 20px;
            };
            &:nth-of-type(2) {
                height: 26px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #666666;
                line-height: 26px;
                font-weight: 500;
            };
        };
    };
   
};

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ASR/RealTime/RealTime.vue
================================================
<template>
    <div class="realTime">
      <div  class="public_recognition_speech">
      
      <div v-if="onReco"> 
        <!-- 结束录音 -->
        <div @click="endRecorder()" class="endToEndIdentification_end_recorder_img">
              <div class='endToEndIdentification_end_recorder_img_back'></div>
        </div>
      </div>
      <div v-else>
        <div @click="startRecorder()" class="endToEndIdentification_start_recorder_img"></div>
      </div>
      
        <div class="endToEndIdentification_prompt" >
            <div v-if="onReco">
                结束识别
            </div>
            <div v-else>
                开始识别
            </div>
        </div>

        <div class="speech_text_prompt">
            实时得到识别结果
        </div>

      </div>
      <div class="public_recognition_point_to"></div>
      <div class="public_recognition_result">
        <div>识别结果</div>
        <div> {{asrResult}} </div>
      </div>
    </div>
  
</template>

<script>
import Recorder from 'js-audio-recorder'
import { apiURL } from '../../../../api/API'

const recorder = new Recorder({
  sampleBits: 16,                 // 采样位数，支持 8 或 16，默认是16
  sampleRate: 16000,              // 采样率，支持 11025、16000、22050、24000、44100、48000，根据浏览器默认值，我的chrome是48000
  numChannels: 1,                 // 声道，支持 1 或 2， 默认是1
  compiling: true
})

export default {
    data () {
        return {
            onReco: false,
            asrResult: "",
            wsUrl: "",
            ws: ""
        }
    },
    mounted () {
        this.wsUrl = apiURL.ASR_SOCKET_RECORD
        this.ws = new WebSocket(this.wsUrl)
        var _that = this
        this.ws.addEventListener('message', function (event) {
                var temp = JSON.parse(event.data);
                // console.log('ws message', event.data)
                if(temp.result && (temp.result != _that.streamAsrResult)){
                    _that.asrResult = temp.result
                    _that.$nextTick(()=>{})
                }                
        })

    },
    methods: {
        // 开始录音
        startRecorder(){
            // 检查 websocket 状态
            // debugger
            if(this.ws.readyState != this.ws.OPEN){
                this.$message.error("websocket 链接失败，请检查 Websocket 后端服务是否正确开启")
                return
            }

            this.onReco = true

            // 先跟后端说开始
            var start = JSON.stringify({name:"test.wav", "nbest":5, signal:"start"})
            this.ws.send(start)

            recorder.start().then(() => {
                setInterval(() => {
                // 持续录音
                let newData = recorder.getNextData();
                if (!newData.length) {
                    return;
                }
                // 上传到流式测试1
                this.uploadChunk(newData)
                }, 300)
            }, (error) => {
            console.log("录音出错");
            })
            // this.onReco = true
        },
        
        // 停止录音
        endRecorder(){
            // 结束录音
            recorder.stop()
            this.onReco = false
            recorder.clear()
        },

        // 流式上传
        uploadChunk(chunkDatas){
                chunkDatas.forEach((chunkData) => {
                this.ws.send(chunkData)
            })
        },
    },

}
</script>

<style lang="less" scoped>
@import "./style.less";
</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ASR/RealTime/style.less
================================================
.realTime{
    width: 1106px;
    height: 270px;
    // background-color: pink;
    padding-top: 40px;
    box-sizing: border-box;
    display: flex;
    // 开始识别
    .public_recognition_speech {
        width: 295px;
        height: 230px;
        padding-top: 32px;
        box-sizing: border-box;
        .endToEndIdentification_start_recorder_img {
            width: 116px;
            height: 116px;
            background: #2932E1;
            background: url("../../../../assets/image/ic_开始聊天.svg");
            background-repeat: no-repeat;
            background-position: center;
            background-size: 116px 116px;
            margin-left: 98px;
            cursor: pointer;
            margin-bottom: 20px;
            &:hover {
                background: url("../../../../assets/image/ic_开始聊天_hover.svg");

            };

        };

        .endToEndIdentification_end_recorder_img {
            width: 116px;
            height: 116px;
            background: #2932E1;
            border-radius: 50%;
            display: flex;
            justify-content: center;
            align-items: center;
            margin-left: 98px;
            margin-bottom: 20px;
            cursor: pointer;
            .endToEndIdentification_end_recorder_img_back {
                width: 50px;
                height: 50px;
                background: url("../../../../assets/image/ic_大-声音波浪.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 50px 50px;
              
                &:hover {
                    opacity: 0.9;
        
                };
            };
           
        };
        .endToEndIdentification_prompt {
                height: 22px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #000000;
                font-weight: 500;
                margin-left: 124px;
                margin-bottom: 10px;
        };
        .speech_text_prompt {
            height: 20px;
            font-family: PingFangSC-Regular;
            font-size: 14px;
            color: #999999;
            font-weight: 400;
            margin-left: 105px;
        };
    };
    // 指向
    .public_recognition_point_to {
        width: 47px;
        height: 67px;
        background: url("../../../../assets/image/步骤-箭头切图@2x.png") no-repeat;
        background-position: center;
        background-size: 47px 67px;
       margin-top: 91px;
       margin-right: 67px;
    };
    // 识别结果
    .public_recognition_result {
        width: 680px;
        height: 230px;
        background: #FAFAFA;
        padding: 40px 50px 0px 50px;
        div {
            &:nth-of-type(1) {
                height: 26px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #666666;
                line-height: 26px;
                font-weight: 500;
                margin-bottom: 20px;
            };
            &:nth-of-type(2) {
                height: 26px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #666666;
                line-height: 26px;
                font-weight: 500;
            };
        };
    };
};

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ASR/style.less
================================================
.speech_recognition {
    width: 1200px;
    height: 410px;
    background: #FFFFFF;
    padding: 40px 50px 50px 44px;
    position: relative;
    .frame {
        width: 605px;
        height: 50px;
        border: 1px solid rgba(238,238,238,1);
        border-radius: 25px;
        position: absolute;
    }
    .speech_recognition_mytabs {
        .ant-tabs-tab {
            position: relative;
            display: inline-flex;
            align-items: center;
            // padding: 12px 0;
            font-size: 14px;
            background: transparent;
            border: 0;
            outline: none;
            cursor: pointer;
            padding: 12px 26px;
            box-sizing: border-box;
        }
        .ant-tabs-tab-active {
            height: 50px;
            background: #EEEFFD;
            border-radius: 25px;
            padding: 12px 26px;
            box-sizing: border-box;
        };
        .speech_recognition .speech_recognition_mytabs .ant-tabs-ink-bar {
            position: absolute;
            background: transparent !important;
            pointer-events: none;
        }
        .ant-tabs-ink-bar {
            position: absolute;
            background: transparent !important;
            pointer-events: none;
        }
        .experience .experience_wrapper .experience_content .experience_tabs .ant-tabs-nav::before {
            position: absolute;
            right: 0;
            left: 0;
            border-bottom: 1px solid transparent !important;
            // border: none;
            content: '';
        }
        .ant-tabs-top > .ant-tabs-nav::before, .ant-tabs-bottom > .ant-tabs-nav::before, .ant-tabs-top > div > .ant-tabs-nav::before, .ant-tabs-bottom > div > .ant-tabs-nav::before {
            position: absolute;
            right: 0;
            left: 0;
             border-bottom: 1px solid transparent !important;
            //  border: none;
            content: '';
        }
        .ant-tabs-top > .ant-tabs-nav::before, .ant-tabs-bottom > .ant-tabs-nav::before, .ant-tabs-top > div > .ant-tabs-nav::before, .ant-tabs-bottom > div > .ant-tabs-nav::before {
            position: absolute;
            right: 0;
            left: 0;
            border-bottom: 1px solid transparent !important;
            content: '';
        }
        .ant-tabs-nav::before {
            position: absolute;
            right: 0;
            left: 0;
            border-bottom: 1px solid transparent !important;
            content: '';
        };
    };
};

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ChatBot/ChatT.vue
================================================
<template>
    <div className="voice_chat">
        <!-- 开始聊天 -->
        <div v-if="!onReco" className="voice_chat_wrapper">
        <div className="voice_chat_btn"
            @click="startRecorder()"
        ></div>
        <div className="voice_chat_btn_title">点击开始聊天</div>
        <div className="voice_chat_btn_prompt">聊天前请允许浏览器获取麦克风权限</div>
        </div>
          <!-- 结束聊天 -->
        <div v-else className="voice_chat_dialog_wrapper">
            <div className="dialog_box" >
              <ul className="dialog_content" >
                <li id="speech_list" :key="index">
                    <div className="dialog_content_img_pp"></div>
                    <div className="dialog_content_dialogue_pp">
                        {{ nlpResult }}
                    </div>
                </li>
                <li id="speech_list" className="move_dialogue">
                    
                    <div className="dialog_content_dialogue_user">
                        {{ asrResult }}
                    </div>
                    <div className="dialog_content_img_user"></div>
                </li>
              </ul>
            </div>
            <div className="btn_end_dialog"
              @click="startRecorder()"
            >
              <span></span>
              <span>结束聊天</span>
            </div>
          </div>

    </div>
</template>

<script>
import { asrCollentEnv, asrOffline, asrResumeRecord, asrStopRecord } from '../../../api/ApiASR'
import { apiURL } from '../../../api/API'
import Recorder from 'js-audio-recorder'
import { nlpChat } from '../../../api/ApiNLP';


const audioCtx = new (window.AudioContext || window.webkitAudioContext)({
   latencyHint: 'interactive',
   sampleRate: 24000,
  });

const recorder = new Recorder({
  sampleBits: 16,                 // 采样位数，支持 8 或 16，默认是16
  sampleRate: 16000,              // 采样率，支持 11025、16000、22050、24000、44100、48000，根据浏览器默认值，我的chrome是48000
  numChannels: 1,                 // 声道，支持 1 或 2， 默认是1
  compiling: true
})


export default {
    data () {
        return {
            onReco: false,
            allResultList: [],
            asrResult: "",
            nlpResult: "",
            ws:"",

            initChatText: "欢迎使用飞桨语音对话系统，试试和我说话吧",
            speakingText: "我正在说话...",
            stopText: "等待音频播放结束..."
        }
    },
    mounted () {
      // 初始化ws
      this.ws = new WebSocket(apiURL.CHAT_SOCKET_RECORD);
      var _that = this
      this.ws.addEventListener('message', function (event) {
          _that.allResultList.push(
            {
              value : event.data,
              name : "asr"
            }
          )
          _that.asrResult = event.data
          _that.$nextTick(()=>{})
          _that.getNlp(event.data)
      })
    },
    methods: {
        // 开始录音
        startRecorder(){
          if(this.ws.readyState != this.ws.OPEN){
                this.$message.error("websocket 链接失败，请检查 Websocket 后端服务是否正确开启")
                return
            }
          this.allResultList = []
          if(!this.onReco){
            this.asrResult = this.speakingText
            this.resumeRecordOnline()
            recorder.start().then(() => {
              setInterval(() => {
                // 1.07版本不再实现 getNextData函数，只做了声明
                let newData = recorder?.getNextData();
                if (!newData.length) {
                  return;
                }
                // 上传到流式测试1
                this.uploadChunk(newData)
              }, 500)
            }, () => {
              console.log("录音出错");
            })
          this.onReco = true
          // 初始化NLP
          this.initNLP()
        } else {
          // 结束录音
          recorder.stop()
          this.onReco = false
          this.asrResult = ""
          this.stopRecordOnline()
        }
        },

        // 录音数据上传
        uploadChunk(chunkDatas){
          chunkDatas.forEach((chunkData) => {
            this.ws.send(chunkData)
          })
        },


        // 恢复后端录音
        async resumeRecordOnline(){
            const result = await asrResumeRecord();
        },
        // 暂停后端录音
        async stopRecordOnline(){
            const result = await asrStopRecord();
        },
        // 清空录音
        clearChat(){
            this.allResultList = []
        },

        // 采集环境录音


        // 初始化NLP
        initNLP(){
            // 录音暂停
            this.onRecoPause = true
            recorder.pause()
            this.stopRecordOnline()
            console.log('录音暂停')
            this.asrResult = this.stopText

            // 开场语句
            // this.allResultList.push(
            //     {
            //         value:this.initChatText,
            //         name: "nlp"
            //     }
            // )
            this.nlpResult = this.initChatText
            this.getTts(this.initChatText)

        },

        // 获得NLP聊天结果
        async getNlp(text){
            
            // 录音暂停
            this.onRecoPause = true
            recorder.pause()
            this.stopRecordOnline()
            console.log('录音暂停')

            const result = await nlpChat(text);
            if (result.data.code === 0) {
                // this.allResultList.push(
                //     {
                //         value:result.data.result,
                //         name: "nlp"
                //     }
                // )
                this.nlpResult = result.data.result
                this.getTts(result.data.result)
            } else {
                this.$message.error(result.data.message);
            }
        },


        // 获得TTS录音
        async getTts(nlpText){
            // base64
            var result = await this.$http.post("/api/tts/offline", { text : nlpText});
            if (result.data.code === 0) {
                var typedArray = this.base64ToUint8Array(result.data.result)
                this.playAudioData( typedArray.buffer )
            } else {
                this.$message.error(result.data.message)
            }
            
        },

        // bs64解码
         base64ToUint8Array(base64String) {
            const padding = '='.repeat((4 - base64String.length % 4) % 4);
            const base64 = (base64String + padding)
                            .replace(/-/g, '+')
                            .replace(/_/g, '/');

            const rawData = window.atob(base64);
            const outputArray = new Uint8Array(rawData.length);

            for (let i = 0; i < rawData.length; ++i) {
                    outputArray[i] = rawData.charCodeAt(i);
            }
            return outputArray;
            },

        // 播放音频
        playAudioData( wav_buffer ) {
            var _that = this
            audioCtx.decodeAudioData(wav_buffer, buffer => {
            var source = audioCtx.createBufferSource();
            source.onended = () => {
              // 如果被暂停
              if(_that.onRecoPause){
                console.log("恢复录音")
                // 客户端录音恢复
                this.onRecoPause = false
                recorder.resume()
                this.asrResult = this.speakingText

                // 服务器录音恢复
                this.resumeRecordOnline()
              }
              
            }
            source.buffer = buffer;
            source.connect(audioCtx.destination);
            source.start();
        }, function(e) {
            Recorder.throwError(e);
        });
    },

    }
}
</script>

<style lang="less" scoped>
@import "./style.less";
</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ChatBot/style.less
================================================
.voice_chat {
    width: 1200px;
    height: 410px;
    background: #FFFFFF;
    position: relative;
    // 开始聊天
    .voice_chat_wrapper {
        top: 50%;
        left: 50%;
        transform: translate(-50%,-50%);
        position: absolute;
        .voice_chat_btn {
            width: 116px;
            height: 116px;
            margin-left: 54px;
            // background: #2932E1;
            border-radius: 50%;
            cursor: pointer;
            background: url("../../../assets/image/ic_开始聊天.svg");
            background-repeat: no-repeat;
            background-position: center;
            background-size: 116px 116px;
            margin-bottom: 17px;
            &:hover {
                width: 116px;
                height: 116px;
                background: url("../../../assets/image/ic_开始聊天_hover.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 116px 116px;
            };
           
        };
        .voice_chat_btn_title {
            height: 22px;
            font-family: PingFangSC-Medium;
            font-size: 16px;
            color: #000000;
            letter-spacing: 0;
            text-align: center;
            line-height: 22px;
            font-weight: 500;
            margin-bottom: 10px;
        };
        .voice_chat_btn_prompt {
            height: 24px;
            font-family: PingFangSC-Regular;
            font-size: 14px;
            color: #999999;
            letter-spacing: 0;
            text-align: center;
            line-height: 24px;
            font-weight: 400;
        };
    };
    .voice_chat_wrapper::after {
        content: "";
        display: block;
        clear: both;
        visibility: hidden;
    };
    // 结束聊天
    .voice_chat_dialog_wrapper {
        width: 1200px;
        height: 410px;
        background: #FFFFFF;
        position: relative;
        .dialog_box {
            width: 100%;
            height: 410px;
            padding: 50px 198px 82px 199px;
            box-sizing: border-box;
           
            .dialog_content {
                width: 100%;
                height: 268px;
                // background: rgb(113, 144, 145);
                padding: 0px;  
                overflow: auto;           
                li {
                    list-style-type: none;
                    margin-bottom: 33px;
                    display: flex;
                    align-items: center;
                    &:last-of-type(1) {
                        margin-bottom: 0px;
                    };
                    .dialog_content_img_pp {
                        width: 60px;
                        height: 60px;
                        // transform: scaleX(-1);
                        background: url("../../../assets/image/飞桨头像@2x.png");
                        background-repeat: no-repeat;
                        background-position: center;
                        background-size: 60px 60px;
                        margin-right: 20px;
                    };
                    .dialog_content_img_user {
                        width: 60px;
                        height: 60px;
                        transform: scaleX(-1);
                        background: url("../../../assets/image/用户头像@2x.png");
                        background-repeat: no-repeat;
                        background-position: center;
                        background-size: 60px 60px;
                        margin-left: 20px;
                    };
                    .dialog_content_dialogue_pp {
                        height: 50px;
                        background: #F5F5F5;
                        border-radius: 25px;
                        font-family: PingFangSC-Regular;
                        font-size: 14px;
                        color: #000000;
                        line-height: 50px;
                        font-weight: 400;
                        padding: 0px 16px;
                        box-sizing: border-box;
                    };
                    .dialog_content_dialogue_user {
                        height: 50px;
                        background: rgba(41,50,225,0.90);
                        border-radius: 25px;
                        font-family: PingFangSC-Regular;
                        font-size: 14px;
                        color: #FFFFFF;
                        line-height: 50px;
                        font-weight: 400;
                        padding: 0px 16px;
                        box-sizing: border-box;
                    };
                };
            };
            .move_dialogue {
                justify-content: flex-end;
            };

        };
       
        .btn_end_dialog {
            width: 124px;
            height: 42px;
            line-height: 42px;
            background: #FFFFFF;
            box-shadow: 0px 4px 16px 0px rgba(0,0,0,0.09);
            border-radius: 21px;
            padding: 0px 24px;
            box-sizing: border-box;
            position: absolute;
            left: 50%;
            bottom: 40px;
            transform: translateX(-50%);
            display: flex;
            justify-content: space-between;
            align-items: center;
            cursor: pointer;
            span {
                display: inline-block;
                &:nth-of-type(1) {
                    width: 16px;
                    height: 16px;
                    background: url("../../../assets/image/ic_小-结束.svg");
                    background-repeat: no-repeat;
                    background-position: center;
                    background-size: 16px 16px;
                   
                };
                &:nth-of-type(2) {
                    height: 20px;
                    font-family: PingFangSC-Regular;
                    font-size: 14px;
                    color: #F33E3E;
                    text-align: center;
                    font-weight: 400;
                    line-height: 20px;
                    margin-left: 4px;
                };
            };
        };
    };
};

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/ERNIE_SAT/ERNIE_SAT.vue
================================================
<template>
    <div class="sat">
      <el-row :gutter="20">
            <el-col :span="12"><div class="grid-content ep-bg-purple" />
                <el-row :gutter="60" class="btn_row_wav" justify="center">
                    <el-button class="ml-3" v-if="onEnrollRec === 0" @click="startRecorderEnroll()" type="primary">录制音频</el-button>
                    <el-button class="ml-3" v-else-if="onEnrollRec === 1" @click="stopRecorderEnroll()" type="danger">停止录音</el-button>
                    <el-button class="ml-3" v-else @click="uploadRecord()" type="success">上传录音</el-button>
                    <a>&#12288</a>
                    <el-upload
                        :multiple="false"
                        :accept="'.wav'"
                        :auto-upload="false"
                        :on-change="handleChange"
                        :show-file-list="false"
                    >
                        <el-button class="ml-3" type="success">上传音频文件</el-button>
                    </el-upload>
                </el-row>
                <div class="recording_table">
                <el-table :data="vcDatas" border class="recording_table_box" scrollbar-always-on max-height="250px">
                    <!-- <el-table-column prop="wavId" label="序号" width="60"/> -->
                    <el-table-column prop="wavName" label="文件名" width="150"/>
                    <el-table-column label="文本">
                      <template #default="scope">
                            <el-input 
                              v-model="scope.row.label"
                              :autosize="{ minRows: 8, maxRows: 13 }" 
                              placeholder="Please input"
                              />
                            
                        </template>
                    </el-table-column>
                    <el-table-column label="操作" width="80">
                        <template #default="scope">
                            <div class="flex justify-space-between mb-4 flex-wrap gap-4">
                                <a @click="PlayTable(scope.row.wavId)"><el-icon><VideoPlay /></el-icon></a>
                                <a>&#12288</a>
                                <a @click="delWav(scope.row.wavId)"><el-icon><DeleteFilled /></el-icon></a>
                            </div>
                        </template>
                    </el-table-column>
                    <el-table-column fixed="right" label="选择" width="70">
                        <template #default="scope">
                            <el-switch v-model="scope.row.status"  @click="choseWav(scope.row.wavId)"/>
                        </template>
                    </el-table-column>
                </el-table>
                </div>

            </el-col>
            <el-col :span="8"><div class="grid-content ep-bg-purple" />
                <el-space direction="vertical">
                    <el-card class="box-card" style="width: 250px; height:310px">
                        <template #header>
                            <div class="card-header">
                            <span>功能选择</span>
                            </div>
                        </template>  
                        <el-radio-group v-model="funcMode">
                          <el-radio label="1" size="middle" border style="margin-bottom: 10px">个性化语音合成</el-radio>
                            <el-input
                              v-if="funcMode === '1'"
                              v-model="ttsText"
                              :autosize="{ minRows: 2, maxRows: 2 }"
                              type="textarea"
                              placeholder="Please input"
                              style="margin-bottom: 10px"
                              />
                          <el-radio label="2" size="middle" border style="margin-bottom: 10px">跨语言语音合成</el-radio>
                            <el-input
                              v-if="funcMode === '2'"
                              v-model="ttsText"
                              :autosize="{ minRows: 2, maxRows: 2 }"
                              type="textarea"
                              placeholder="Please input"
                              style="margin-bottom: 10px"
                              />
                          <el-radio label="3" size="middle" border style="margin-bottom: 10px">语音编辑</el-radio>
                            <el-input
                                v-if="funcMode === '3'"
                                v-model="ttsText"
                                :autosize="{ minRows: 2, maxRows: 2 }"
                                type="textarea"
                                placeholder="Please input"
                                style="margin-bottom: 10px"
                                />
                        </el-radio-group>
                    </el-card>                    
                </el-space>
            </el-col>
            <el-col :span="4"><div class="grid-content ep-bg-purple" />
                <div class="play_board">
                    <el-space direction="vertical">
                        <el-row :gutter="20">
                            <el-button size="large" v-if="onSyn === 0" type="primary" @click="SatSyn()">开始合成</el-button>
                            <el-button size="large" v-else :loading-icon="Eleme" type="danger">合成中</el-button>
                        </el-row>
                        <el-row :gutter="20">
                            <el-button v-if='this.cloneWav' type="success" @click="PlaySyn()">播放</el-button>
                            <el-button v-else disabled type="primary" @click="PlaySyn()">播放</el-button>
                            <el-button v-if='this.cloneWav' type="primary" @click="downLoadCloneWav()">下载</el-button>
                            <el-button v-else disabled type="primary" @click="downLoadCloneWav()">下载</el-button>
                        </el-row>
                    </el-space>
                </div>
            </el-col>
        </el-row>
</div>
</template>

<script>
import { vcCloneSAT, vcDownload, vcDownloadBase64, satUpload, satList, vcDel } from '../../../api/ApiVC'
import Recorder from 'js-audio-recorder'

let audioCtx = new AudioContext({
latencyHint: 'interactive',
sampleRate: 24000,
});

// 初始化录音
const recorder = new Recorder({
  sampleBits: 16,                 // 采样位数，支持 8 或 16，默认是16
  sampleRate: 16000,              // 采样率，支持 11025、16000、22050、24000、44100、48000，根据浏览器默认值，我的chrome是48000
  numChannels: 1,                 // 声道，支持 1 或 2， 默认是1
  compiling: true
})

export default {
name:"",
data(){
    return {
        uploadStatus : 0,
        recognitionStatus : 0,
        asrResult : "",
        indicator : "",
        
        filename: "",
        upfile: "",
        mode: 1,
        language: 1,
        wav_input: "卡尔普陪外孙玩滑梯",
        new_input: "卡尔普陪外孙打滑梯",
        received_file:"",

        // 分割线
        onEnrollRec: 0,
        onSyn:0,
        vcDatas: [],
        funcMode: '1',
        selected_Id: -1,
        ttsText: '',
        cloneWav: '',
        wav:''
    }
},

mounted () {
        this.GetList()
    },

methods:{
    // 获取文件列表
    async GetList(){
            this.vcDatas =[]
            const result = await satList();
            console.log("List: ", result);
            for(let i=0; i < result.data.result.length; i++){
                this.vcDatas.push({
                    wavName: result.data.result[i]['name'],
                    wavId: i,
                    wavPath: result.data.result[i]['path'],
                    status: false,
                    label: result.data.result[i]['label']
                })
            }
            console.log("vcDatas: ", this.vcDatas);
            this.$nextTick(()=>{})
    },

    // 上传文件切换
    async handleChange(file, fileList){
      for(let i=0; i<fileList.length; i++){
        this.uploadFile(fileList[i])
      }
      this.GetList()
    },

    async uploadFile(file){
      let formData = new FormData();
      formData.append('files', file.raw);
      const result = await satUpload(formData);
      if (result.data.code === 0) {
          this.$message.success("音频上传成功")
          
      } else {
          this.$message.error("音频上传失败")
      }
    },

    // 开始录音
    startRecorderEnroll(){
            this.onEnrollRec = 1
            recorder.clear()
            recorder.start()
        },
    
    // 结束录音
    stopRecorderEnroll(){
        this.onEnrollRec = 2
        recorder.stop()
        this.wav = recorder.getWAVBlob()
    },

    // 上传录音
    async uploadRecord(){
            this.onEnrollRec = 0
            if(this.wav === ""){
                this.$message.error("未检测到录音，录音失败，请重新录制")
                return
            } else {
                if(this.wav === ''){
                    this.$message.error("请先完成录音");
                    this.onEnrollRec = 0
                    return
                } else {
                    let formData = new FormData();
                    formData.append('files', this.wav);
                    const result = await satUpload(formData);
                    console.log(result)
                    this.GetList() 
                }
                this.$message.success("录音上传成功")
            }
        }, 

    // 删除音频文件
    async delWav(wavId){
            console.log('wavId', wavId)
            // 删除文件
            const result = await vcDel(
                {
                  wavName: this.vcDatas[wavId]['wavName'],
                  wavPath: this.vcDatas[wavId]['wavPath']
                }
            );
            if(!result.data.code){
                this.$message.success("删除成功")
            } else {
                this.$message.error(result.data.msg)
            }
            this.GetList()
            this.reset()
        },
    
    // 播放表格
    async PlayTable(wavId){
        this.Play(this.vcDatas[wavId])
    },

    // 播放音频
    async Play(wavBase){
        // 获取音频数据
        const result = await vcDownloadBase64(wavBase);
        // console.log('play result', result)
        if (result.data.code === 0) {
            // base转换二进制数
            let typedArray = this.base64ToUint8Array(result.data.result)
            // 添加wav文件头
            let view = new DataView(typedArray.buffer);
            view = Recorder.encodeWAV(view, 16000, 16000, 1, 16, true);
            // 播放音频
            this.playAudioData(view.buffer);
        };
        },
    // chose wav
    choseWav(wavId){
            this.cloneWav = ''
            this.nowFile = this.vcDatas[wavId].wavName
            this.nowIndex = wavId
            // only wavId is true else false
            for(let i=0; i<this.vcDatas.length; i++){
                if(i==wavId){
                    this.vcDatas[wavId].status = true
                    this.selected_Id = wavId
                    this.ttsText = this.vcDatas[wavId]['label']
                } else {
                    this.vcDatas[i].status = false
                }
            }
            this.$nextTick(()=>{})
        },

    // 播放音频
    playAudioData(wav_buffer){
        audioCtx.decodeAudioData(wav_buffer, buffer => {
            let source = audioCtx.createBufferSource();
            source.buffer = buffer
            source.connect(audioCtx.destination);
            source.start();
        }, function (e) {
        });
    },


    base64ToUint8Array(base64String){
       const padding = '='.repeat((4 - base64String.length % 4) % 4);
        const base64 = (base64String + padding)
            .replace(/-/g, '+')
            .replace(/_/g, '/');
    
        const rawData = window.atob(base64);
        const outputArray = new Uint8Array(rawData.length);
    
        for (let i = 0; i < rawData.length; ++i) {
            outputArray[i] = rawData.charCodeAt(i);
        }
        return outputArray; 
    },

    // 检查是否包含中文
    hasChinese(str) {
      return /[\u4E00-\u9FA5]+/g.test(str)
    },

    // SAT合成
    async SatSyn(){
      // 检查 select id
      if(this.selected_Id < 0){
        return this.$message.error("请先选择音频文件！")
      }

      // 检查音频对应的文本
      if(!this.vcDatas[this.selected_Id]['label']){
        return this.$message.error("音频对应文本不可以为空！")
      }

      // 检查待合成文本
      if(!this.ttsText){
        return this.$message.error("合成文本不可以为空！")
      }

      // 合成中
      this.onSyn = 1
      // 重置 clone wav
      this.cloneWav = ""
  
      const old_str = this.vcDatas[this.selected_Id]['label']
      const new_str = this.ttsText
      let language = ""
      // 包含中文
      if(this.hasChinese(old_str)){
        language = "zh"
      } else{
        language = "en"
      }
      // 功能选择
      let func = ""
      if(this.funcMode === '1') {
        func = "synthesize"
      } else if(this.funcMode === '2'){
        func = "crossclone"
      } else {
        func = "edit"
      }
      
      let wav_path = this.vcDatas[this.selected_Id]['wavPath']
      let filename = this.vcDatas[this.selected_Id]['wavName']

      const data = {
        old_str: old_str,
        new_str: new_str,
        language: language,
        function: func,
        wav: wav_path,
        filename: filename

      }

      console.log("sat data: ", data)
      
      // sat 接口
      const result = await vcCloneSAT(data)
      // 合成完成
      this.onSyn = 0
      console.log(result);
      // debugger
      if (result.data.code === 0) {

        this.$message.success(result.data.message)
        // 获取识别文本
        this.cloneWav = result.data.result
        console.log("cloneWave", this.cloneWav);

      } else {
        this.$message.error(result.data.message)
      };
    },
    // 播放合成的音频
    // 播放音频
    async PlaySyn(){
        // 获取音频数据
        const data = {
          wavName: "sat_"+this.filename,
          wavPath: this.cloneWav
        }
        const result = await vcDownloadBase64(data);
        // console.log('play result', result)
        if (result.data.code === 0) {
            // base转换二进制数
            let typedArray = this.base64ToUint8Array(result.data.result)
            // 添加wav文件头
            let view = new DataView(typedArray.buffer);
            view = Recorder.encodeWAV(view, 16000, 16000, 1, 16, true);
            // 播放音频
            this.playAudioData(view.buffer);
        };
        },


    // 下载合成文件
    async downLoadCloneWav(){
    if(this.cloneWav  === ""){
        this.$message.error("音频合成完毕后再下载！")
    } else {
        // const result = await vcDownload(this.cloneWav);
        // 获取音频数据
        const data = {
          wavName: "sat_"+this.filename,
          wavPath: this.cloneWav
        }
        const result = await vcDownloadBase64(data);
        let view;
        // console.log('play result', result)
        if (result.data.code === 0) {
            // base转换二进制数
            let typedArray = this.base64ToUint8Array(result.data.result)
            // 添加wav文件头
            view = new DataView(typedArray.buffer);
            view = Recorder.encodeWAV(view, 16000, 16000, 1, 16, true);
            // 播放音频
            // this.playAudioData(view.buffer);
        }
        console.log(view.buffer)
        // debugger
        const blob = new Blob([view.buffer], { type: 'audio/wav' });
        const fileName = new Date().getTime() + '.wav';
        const down = document.createElement('a');
        down.download = fileName;
        down.style.display = 'none';//隐藏,没必要展示出来
        down.href = URL.createObjectURL(blob);
        document.body.appendChild(down);
        down.click();
        URL.revokeObjectURL(down.href); // 释放URL 对象
        document.body.removeChild(down);//下载完成移除
      }
    },

}
}   

</script>

<style lang="less" scoped>
// @import "./style.less";
.sat {
    width: 1200px;
    height: 410px;
    background: #FFFFFF;
    padding: 5px 80px 56px 80px;
    box-sizing: border-box;
}

.el-row {
  margin-bottom: 20px;
}
.grid-content {
  border-radius: 4px;
  min-height: 36px;
}
.play_board{
    height: 100%;
    display: flex;
    align-items: center;
}

</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/FineTune/FineTune.vue
================================================
<template>
    <div class="finetune">
      <el-row :gutter="20"> 
        <el-col :span="12"><div class="grid-content ep-bg-purple" />
          <el-row :gutter="60" class="btn_row_wav" justify="center">
              <el-button class="ml-3" @click="clearAll()" type="primary">一键重置</el-button>
              <el-button class="ml-3" @click="resetDefault()" type="primary">默认示例</el-button>
              <el-button v-if='onFinetune === 0' class="ml-3" @click="fineTuneModel()" type="primary">一键微调</el-button>
              <el-button v-else-if='onFinetune === 1' class="ml-3" @click="fineTuneModel()" type="danger">微调中</el-button>
              <el-button v-else-if='onFinetune === 2' class="ml-3" @click="resetFinetuneBtn()" type="success">微调成功</el-button>
              <el-button v-else class="ml-3" @click="resetFinetuneBtn()" type="success">微调失败</el-button>
              <!-- <el-button class="ml-3" @click="chooseHistory()" type="warning">历史数据选择</el-button> -->
        </el-row>

        <div class="recording_table">
            <el-table :data="vcDatas" border class="recording_table_box" scrollbar-always-on max-height="250px">
                <el-table-column prop="wavId" label="序号" width="60"/>
                <el-table-column prop="text" label="文本" />
                <el-table-column label="音频" width="80">
                    <template #default="scope">
                        <a v-if="scope.row.wavPath != ''">{{ scope.row.wavName }}</a>
                        <a v-else>
                            
                            <el-button class="ml-3" v-if="onEnrollRec === 0" @click="startRecorderEnroll()" type="primary" circle>
                                <el-icon><Microphone /></el-icon>
                            </el-button>
                            <el-button class="ml-3" v-else-if="onEnrollRec === 1" @click="stopRecorderEnroll()" type="danger" circle>
                                <el-icon><Microphone /></el-icon>
                            </el-button>
                            <el-button class="ml-3" v-else @click="uploadRecord(scope.row.wavId)" type="success" circle>
                                <el-icon><Upload /></el-icon>
                            </el-button>
                        </a>
                    </template>
                </el-table-column>
                <el-table-column label="操作" width="80" fixed="right">
                    <template #default="scope">
                        <div class="flex justify-space-between mb-4 flex-wrap gap-4">
                            <a @click="PlayTable(scope.row.wavId)"><el-icon><VideoPlay /></el-icon></a>
                            <a>&#12288</a>
                            <a @click="delWav(scope.row.wavId)"><el-icon><DeleteFilled /></el-icon></a>
                        </div>
                    </template>
                </el-table-column>
            </el-table>
        </div>

            </el-col>
            <el-col :span="8"><div class="grid-content ep-bg-purple" />
                <el-space direction="vertical">
                    <el-card class="box-card" style="width: 250px; height:310px">
                        <template #header>
                            
                            <div class="card-header">
                                <span>试验路径</span>
                                <el-input
                                    v-model="expPath"
                                    :autosize="{ minRows: 2, maxRows: 3 }"
                                    type="textarea"
                                    placeholder="一键微调自动生成，可使用历史试验路径"
                                    />
                            </div>
                        </template>
                        <span>请输入中文文本</span>
                        <el-input
                            v-model="ttsText"
                            :autosize="{ minRows: 5, maxRows: 6 }"
                            type="textarea"
                            placeholder="请输入待合成文本"
                            />
                    </el-card>                    
                </el-space>
            </el-col>
            <el-col :span="4"><div class="grid-content ep-bg-purple" />
                <div class="play_board">
                    <el-space direction="vertical">
                        <el-row :gutter="20">
                            <el-button size="large" v-if="onSyn === 0" type="primary" @click="fineTuneSyn()">开始合成</el-button>
                            <el-button size="large" v-else :loading-icon="Eleme" type="danger">合成中</el-button>
                        </el-row>

                        <el-row :gutter="20">
                            <el-button v-if='this.cloneWav' type="success" @click="PlaySyn()">播放</el-button>
                            <el-button v-else disabled type="primary" @click="PlaySyn()">播放</el-button>
                            <el-button v-if='this.cloneWav' type="primary" @click="downLoadCloneWav()">下载</el-button>
                            <el-button v-else disabled type="primary" @click="downLoadCloneWav()">下载</el-button>
                        </el-row>
                    </el-space>
                </div>
            </el-col>
        </el-row>
    </div>
    </template>
    
    <script>
    import Recorder from 'js-audio-recorder'
    import { vcDownload, vcDownloadBase64, vcCloneFineTune, vcCloneFineTuneSyn, fineTuneList, vcDel, fineTuneUpload, fineTuneNewDir } from '../../../api/ApiVC';
    
    // 初始化录音
    const recorder = new Recorder({
      sampleBits: 16,                 // 采样位数，支持 8 或 16，默认是16
      sampleRate: 16000,              // 采样率，支持 11025、16000、22050、24000、44100、48000，根据浏览器默认值，我的chrome是48000
      numChannels: 1,                 // 声道，支持 1 或 2， 默认是1
      compiling: true
    })
    
    // 初始化播放器
    const audioCtx = new AudioContext({
        latencyHint: 'interactive',
        sampleRate: 16000,
    });

    function blobToDataURL(blob, callback) {
        let a = new FileReader();
        a.onload = function (e) { callback(e.target.result); }
        a.readAsDataURL(blob);
    }

    
    export default {
        data(){
            return {
              vcDatas:[],
              defaultDataPath: 'default',
              nowDataPath: '',
              expPath: '',
              wav: '',
              wav_base64: '',
              ttsText: '欢迎使用飞桨语音套件',
              cloneWav: '',
              
              onEnrollRec: 0,  // 录音状态
              onFinetune: 0,  // 微调状态
              onSyn: 0, // 合成状态
            }
        },
        mounted () {
            this.nowDataPath = this.defaultDataPath
            this.GetList()
            
        },
        methods: {
            // 重置 btn 
            resetFinetuneBtn(){
                this.onFinetune = 0
            },
        
        // 一键重置
        async clearAll(){
            this.vcDatas = []
            const result = await fineTuneNewDir()
            console.log("clearALL: ", result.data.result);
            this.nowDataPath = result.data.result
            this.expPath = ''
            this.onFinetune = 0
            await this.GetList()
        },
        // 显示默认
        async resetDefault(){
            this.nowDataPath = this.defaultDataPath
            await this.GetList()
            this.expPath = ''
        },

        // 开始录音
        startRecorderEnroll(){
            this.onEnrollRec = 1
            recorder.clear()
            recorder.start()
        },
        // 结束录音
        stopRecorderEnroll(){
            this.onEnrollRec = 2
            recorder.stop()
            this.wav = recorder.getWAVBlob()
        },

        // 上传录音
        async uploadRecord(wavId){
            this.onEnrollRec = 0
            if(this.wav === ""){
                this.$message.error("未检测到录音，录音失败，请重新录制")
                return
            } else {
                if(this.wav === ''){
                    this.$message.error("请先完成录音");
                    this.onEnrollRec = 0
                    return
                } else {
                    let fileRes = ""
                    let fileString = ""
                    fileRes = await this.readFile(this.wav);
                    fileString = fileRes.result;
                    const audioBase64type = (fileString.match(/data:[^;]*;base64,/))?.[0] ?? '';
                    const isBase64 = !!fileString.match(/data:[^;]*;base64,/);
                    const uploadBase64 = fileString.substr(audioBase64type.length);
                    
                    // 上传时指定文件路径
                    const data = {
                        'wav': uploadBase64,
                        'filename': this.vcDatas[wavId]['wavName'],
                        'wav_path': this.nowDataPath
                    }

                    const result = await fineTuneUpload(data);
                    console.log(result)
                    this.GetList() 
                }
                this.$message.success("录音上传成功")
            }
        }, 
        // 读取文件和Blob
        readFile(file) {
            return new Promise((resolve, reject) => {
                const fileReader = new FileReader();
                fileReader.onload = function () {
                    resolve(fileReader);
                };
                fileReader.onerror = function (err) {
                    reject(err);
                };
                fileReader.readAsDataURL(file);
                });
            },

            // 获取文件列表
          async GetList(){
            this.vcDatas = []
            const result = await fineTuneList({
              dataPath: this.nowDataPath
            });
            console.log(result, result.data.result);
            for(let i=0; i<result.data.result.length; i++){
                this.vcDatas.push({
                  wavId: i,
                  text: result.data.result[i]['text'],
                  wavName: result.data.result[i]['name'],
                  wavPath: result.data.result[i]['path'],
                })
            }
            this.$nextTick(()=>{})
          },
                  // 播放音频
    playAudioData( wav_buffer ) {
        audioCtx.decodeAudioData(wav_buffer, buffer => {
            var source = audioCtx.createBufferSource();
            source.buffer = buffer;
            source.connect(audioCtx.destination);
            source.start();
        }, function(e) {
            Recorder.throwError(e);
            })
    },
        // base64解码
        base64ToUint8Array(base64String) {
        const padding = '='.repeat((4 - base64String.length % 4) % 4);
        const base64 = (base64String + padding)
                        .replace(/-/g, '+')
                        .replace(/_/g, '/');

        const rawData = window.atob(base64);
        const outputArray = new Uint8Array(rawData.length);

        for (let i = 0; i < rawData.length; ++i) {
                outputArray[i] = rawData.charCodeAt(i);
        }
        return outputArray;
    },
            // 播放表格
        async PlayTable(wavId){
            this.Play(this.vcDatas[wavId])
        },
        // 播放合成后的音频
        async PlaySyn(){
           
            if(this.cloneWav  === ""){
                this.$message.error("请合成音频后再播放！！")
                return
            } else {
                this.Play(this.cloneWav)
            }
        },
        // 播放音频
        async Play(wavBase){
                // 获取音频数据
                const result = await vcDownloadBase64(wavBase);
                // console.log('play result', result)
                if (result.data.code === 0) {
                    // base转换二进制数
                    let typedArray = this.base64ToUint8Array(result.data.result)
                    // 添加wav文件头
                    let view = new DataView(typedArray.buffer);
                    view = Recorder.encodeWAV(view, 16000, 16000, 1, 16, true);
                    // 播放音频
                    this.playAudioData(view.buffer);
                } else {
                    this.$message.error("获取音频文件失败")
                }
        },
                // 下载合成文件
        async downLoadCloneWav(){
            if(this.cloneWav  === ""){
                this.$message.error("音频合成完毕后再下载！")
            } else {
                // const result = await vcDownload(this.cloneWav);
                // 获取音频数据
                const result = await vcDownloadBase64(this.cloneWav);
                let view;
                // console.log('play result', result)
                if (result.data.code === 0) {
                    // base转换二进制数
                    let typedArray = this.base64ToUint8Array(result.data.result)
                    // 添加wav文件头
                    view = new DataView(typedArray.buffer);
                    view = Recorder.encodeWAV(view, 16000, 16000, 1, 16, true);
                    // 播放音频
                    // this.playAudioData(view.buffer);
                }
                console.log(view.buffer)
                // debugger
                const blob = new Blob([view.buffer], { type: 'audio/wav' });
                const fileName = new Date().getTime() + '.wav';
                const down = document.createElement('a');
                down.download = fileName;
                down.style.display = 'none';//隐藏,没必要展示出来
                down.href = URL.createObjectURL(blob);
                document.body.appendChild(down);
                down.click();
                URL.revokeObjectURL(down.href); // 释放URL 对象
                document.body.removeChild(down);//下载完成移除
            }
        },
        // 删除音频文件
        async delWav(wavId){
            if(this.nowDataPath === this.defaultDataPath){
                this.$message.error("默认音频不允许删除，可以一键重置，重新录音")
                return 
            }

            console.log('wavId', wavId)
            // 删除文件
            const result = await vcDel(
                {
                    wavName: this.vcDatas[wavId]['wavName'],
                    wavPath: this.vcDatas[wavId]['wavPath']
                }
            );
            if(!result.data.code){
                this.$message.success("删除成功")
                this.GetList()
            } else {
                this.$message.error("文件删除失败")
            }
        }, 
        // 微调模型
        async fineTuneModel(){
            // 先检查是否都有录音
            for(let i=0; i < this.vcDatas.length; i++){
                if(this.vcDatas['wavPath'] === ''){
                    return this.$message.error("还有录音未完成，请先完成录音！")
                }
            }
            this.onFinetune = 1
            const result = await vcCloneFineTune(
                {
                    wav_path: this.nowDataPath,
                }
            );
            if(!result.data.code){
                this.onFinetune = 2
                this.expPath = result.data.result
                console.log("this.expPath: ", this.expPath)
                this.$message.success("小数据微调成功")
            } else {
                this.onFinetune = 3
                this.$message.error(result.data.msg)
            }
        },
        // 合成音频
        async fineTuneSyn(){
            if(!this.expPath){
                return this.$message.error("请先微调生成模型后再生成！")
            }
            // 合成
            this.onSyn = 1
            const result = await vcCloneFineTuneSyn(
                {
                    exp_path: this.expPath,
                    text: this.ttsText
                }
            );
            this.onSyn = 0
            if(!result.data.code){
                this.cloneWav = result.data.result
                console.log("clone wav: ", this.cloneWav)
                this.$message.success("音色克隆成功")
            } else {
                this.$message.error(result.data.msg)
            }
            this.$nextTick(()=>{})
        }
},
};
</script>
    
<style lang="less" scoped>
// @import "./style.less";
.finetune {
  width: 1200px;
  height: 410px;
  background: #FFFFFF;
  padding: 5px 80px 56px 80px;
  box-sizing: border-box;
}
.el-row {
  margin-bottom: 20px;
}
.grid-content {
  border-radius: 4px;
  min-height: 36px;
}
.play_board{
    height: 100%;
    display: flex;
    align-items: center;
}
</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/IE/IET.vue
================================================
<template>
    <div class="voice_commands">
      <div class="voice_commands_traffic">
        <div class="voice_commands_traffic_title">交通费报销</div>
        <div class="voice_commands_traffic_wrapper">
          <div class="voice_commands_traffic_wrapper_move">
            <div class="traffic_btn_img_btn">
                <!-- 结束录音 -->
                <div v-if="onReco"
                @click="endRecorder()"
                class="end_recorder_img"
                ></div>
                <!-- 开始录音 -->
                <div v-else
                @click= "startRecorder()"
                class="start_recorder_img"
                ></div>
            </div>
            <div class="traffic_btn_prompt">
                <div v-if="onReco">
                    结束识别
                </div>
                <div v-else>
                    开始识别
                </div>
            </div>
            <div class="traffic_btn_list">试试说“早上八点，我从广州到北京花了四百二十六元”</div>
          </div>
        </div>
      </div>

      <div class="voice_point_to"></div>

      <!-- 识别结果 -->
      <div class="voice_commands_IdentifyTheResults">
        <div class="voice_commands_IdentifyTheResults_title">
          识别结果
        </div>
 
        <div v-if="postStatus" class="voice_commands_IdentifyTheResults_show">
            <div class="voice_commands_IdentifyTheResults_show_title">
              {{ asrResult }}
            </div>
            <div class="oice_commands_IdentifyTheResults_show_time">
              时间：{{voiceCommandsData.time}}
            </div>
            <div class="oice_commands_IdentifyTheResults_show_money">
              费用：{{voiceCommandsData.amount}}
            </div>
            <div class="oice_commands_IdentifyTheResults_show_origin">
              出发地：{{voiceCommandsData.outset}}
            </div>
            <div class="oice_commands_IdentifyTheResults_show_destination">
              目的地：{{voiceCommandsData.destination}}
            </div>
            </div>
        <div v-else class="voice_commands_IdentifyTheResults_show_loading">
                <a-spin />
        </div>
        
      </div>
    </div >

</template>

<script>
import Recorder from 'js-audio-recorder'
import { asrOffline } from '../../../api/ApiASR'
import { nlpIE } from '../../../api/ApiNLP'

const recorder = new Recorder({
  sampleBits: 16,                 // 采样位数，支持 8 或 16，默认是16
  sampleRate: 16000,              // 采样率，支持 11025、16000、22050、24000、44100、48000，根据浏览器默认值，我的chrome是48000
  numChannels: 1,                 // 声道，支持 1 或 2， 默认是1
  compiling: true
})


export default {
  
    data () {
        return {
            voiceCommandsData:{
                time:"",
                amount:"",
                outset:"",
                destination:""
            },
            asrDeafult : "语音识别结果",
            asrResult: "",
            postStatus:true,
            onReco:false
        }
    },
    mounted () {
      this.asrResult = this.asrDeafult
    },
    methods: {
      // reset
      reset(){
          this.asrResult = this.asrDeafult
          this.voiceCommandsData = {
                  time:"",
                  amount:"",
                  outset:"",
                  destination:""
              }
         },

      // 开始录音
        startRecorder(){
          this.reset()
          this.onReco = true
          recorder.clear()
          recorder.start()
        },
      // 停止录音
        endRecorder(){
            recorder.stop()
            this.onReco = false
            // this.$nextTick(()=>{})
            this.postStatus = false
            const wavs = recorder.getWAVBlob()
            this.uploadFile(wavs)
        },
      // 上传识别结果
        async uploadFile(file){
          const formData = new FormData();
          formData.append('files', file)
          const result = await asrOffline(formData)
            if (result.data.code === 0) {
                this.asrResult = result.data.result
                this.$message.success(result.data.message);
                this.informationExtract()
            } else {
                this.$message.error(result.data.message);
            }
        },
        // 信息抽取
        async informationExtract(){
                const result = await nlpIE(this.asrResult)

                if(result.data.result[0]['时间']){
                    this.voiceCommandsData.time = result.data.result[0]['时间'][0]['text']
                }
                
                if(result.data.result[0]['出发地']){
                    this.voiceCommandsData.outset = result.data.result[0]['出发地'][0]['text']
                }

                if(result.data.result[0]['目的地']){
                    this.voiceCommandsData.destination = result.data.result[0]['目的地'][0]['text']
                }

                if(result.data.result[0]['费用']){
                    this.voiceCommandsData.amount = result.data.result[0]['费用'][0]['text']
                }
                this.postStatus = true
            }
    }
}
</script>

<style lang="less" scoped>
@import "./style.less";
</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/IE/style.less
================================================
.voice_commands {
    width: 1200px;
    height: 410px;
    background: #FFFFFF;
    padding: 40px 50px 50px 50px;
    box-sizing: border-box;
    display: flex;
    // 交通报销
    .voice_commands_traffic {
        width: 468px;
        height: 320px;
        .voice_commands_traffic_title {
            height: 26px;
            font-family: PingFangSC-Medium;
            font-size: 16px;
            color: #000000;
            letter-spacing: 0;
            line-height: 26px;
            font-weight: 500;
            margin-bottom: 30px;
            // background: pink;
        };
        .voice_commands_traffic_wrapper {
            width: 465px;
            height: 264px;
            // background: #FAFAFA;
            position: relative;
            .voice_commands_traffic_wrapper_move {
                position: absolute;
                top: 50%;
                left: 50%;
                transform: translate(-50%,-50%);
            };
            .traffic_btn_img_btn {
                width: 116px;
                height: 116px;
                background: #2932E1;
                display: flex;
                justify-content: center;
                align-items: center;
                border-radius: 50%;
                cursor: pointer;
                margin-bottom: 20px;
                margin-left: 84px;
                &:hover {
                    width: 116px;
                    height: 116px;
                    background: #7278F5;
                   
                        .start_recorder_img{
                            width: 50px;
                            height: 50px;
                            background: url("../../../assets/image/ic_开始聊天_hover.svg") no-repeat;
                            background-position: center;
                            background-size: 50px 50px;
                        };
                   
                };
            
                    .start_recorder_img{
                        width: 50px;
                        height: 50px;
                        background: url("../../../assets/image/ic_开始聊天.svg") no-repeat;
                        background-position: center;
                        background-size: 50px 50px;
                    };
                
            };
            .traffic_btn_prompt {
                height: 22px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #000000;
                font-weight: 500;
                margin-bottom: 16px;
                margin-left: 110px;
            };
            .traffic_btn_list {
                height: 20px;
                font-family: PingFangSC-Regular;
                font-size: 12px;
                color: #999999;
                font-weight: 400;
                width: 112%;
            };
        };
    };
    //指向
    .voice_point_to {
        width: 47px;
        height: 63px;
        background: url("../../../assets/image/步骤-箭头切图@2x.png") no-repeat;
        background-position: center;
        background-size: 47px 63px;
        margin-top: 164px;
        margin-right: 82px;
    };
    //识别结果
    .voice_commands_IdentifyTheResults {
        .voice_commands_IdentifyTheResults_title {
            height: 26px;
            font-family: PingFangSC-Medium;
            font-size: 16px;
            color: #000000;
            line-height: 26px;
            font-weight: 500;
            margin-bottom: 30px;
        };
        // 显示框
        .voice_commands_IdentifyTheResults_show {
            width: 503px;
            height: 264px;
            background: #FAFAFA;
            padding: 40px 0px 0px 50px;
            box-sizing: border-box;
            .voice_commands_IdentifyTheResults_show_title {
                height: 22px;
                font-family: PingFangSC-Medium;
                font-size: 16px;
                color: #000000;
                // text-align: center;
                font-weight: 500;
                margin-bottom: 30px;
            };
            .oice_commands_IdentifyTheResults_show_time {
                height: 20px;
                font-family: PingFangSC-Medium;
                font-size: 14px;
                color: #666666;
                font-weight: 500;
                margin-bottom: 12px;
            };
            .oice_commands_IdentifyTheResults_show_money {
                height: 20px;
                font-family: PingFangSC-Medium;
                font-size: 14px;
                color: #666666;
                font-weight: 500;
                margin-bottom: 12px;
            };
            .oice_commands_IdentifyTheResults_show_origin {
                height: 20px;
                font-family: PingFangSC-Medium;
                font-size: 14px;
                color: #666666;
                font-weight: 500;
                margin-bottom: 12px;
            };
            .oice_commands_IdentifyTheResults_show_destination {
                height: 20px;
                font-family: PingFangSC-Medium;
                font-size: 14px;
                color: #666666;
                font-weight: 500;
            };
        };
        //加载状态
        .voice_commands_IdentifyTheResults_show_loading {
            width: 503px;
            height: 264px;
            background: #FAFAFA;
            padding: 40px 0px 0px 50px;
            box-sizing: border-box;
            display: flex;
            justify-content: center;
            align-items: center;
        };
    };
    .end_recorder_img {
        width: 50px;
        height: 50px;
        background: url("../../../assets/image/ic_大-声音波浪.svg") no-repeat;
        background-position: center;
        background-size: 50px 50px;
    };
    .end_recorder_img:hover {
        opacity: 0.9;
    };
};

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/TTS/TTST.vue
================================================
<template>
    <div class="speech_recognition">
      <!-- {/* 中文文本 */} -->
      <div class="recognition_text">
        <div class="recognition_text_header">
          <div class="recognition_text_title">
            中文文本
          </div>
          <div class="recognition_text_random" @click="getRandomChineseWord()">
            <span></span><span>更换示例</span>
          </div>
        </div>

        <div class="recognition_text_field">

            <el-input
            v-model="textarea"
            :autosize="{ minRows: 13, maxRows: 13 }"
            type="textarea"
            placeholder="Please input"
            />

            
        </div>
      </div>
      <!-- {/* 指向 */} -->
      <div class="recognition_point_to"></div>
      <!-- {/* 语音合成 */} -->
      <div class="speech_recognition_new">
        <div class="speech_recognition_title">
          语音合成
        </div>
            <!-- 流式合成初始状态 -->
            <div  v-if="streamingOnInit" class="speech_recognition_streaming"
              @click="getTtsChunkWavWS()"
            >
              流式合成
            </div>
            <!-- 流式合成播放状态 -->
            <div v-else>
                <div v-if="streamingStopStatus" class="streaming_ing_box">
                <div class="streaming_ing">
                <div class="streaming_ing_img"></div>
                <!-- <Spin indicator={antIcon} /> -->
                <div class="streaming_ing_text">合成中</div>
                </div>
                <div class="streaming_time">响应时间：0ms</div>
            </div>
            <div v-else>
                <div v-if="streamingContinueStatus" class="streaming_suspended_box">
                    <div class="streaming_suspended"
                    @click="streamingStop()"
                    >
                    <div class="streaming_suspended_img"></div>
                    <div class="streaming_suspended_text">暂停播放</div>

                    </div>
                    <div class="suspended_time">
                    响应时间：{{ Number(streamingAcceptStamp) - Number(streamingSendStamp) }}ms
                    </div>
                </div>
                <div v-else class="streaming_continue"
                    @click="streamingResume()"
                >
                    <div class="streaming_continue_img"></div>
                    <div class="streaming_continue_text">继续播放</div>
                </div>
                </div>
            </div>
         
            
            <!-- //  {/* 端到端合成 */} -->
            <div v-if="endToEndOnInit" class="speech_recognition_end_to_end"
              @click="EndToEndSynthesis()"
            >
              端到端合成
            </div>
            <div v-else>
                <div  v-if="endToEndStopStatus"  class="end_to_end_ing_box">
                  <div class="end_to_end_ing">
                    <div class="end_to_end_ing_img"> </div>
                    <!-- <Spin indicator={antIcon}></Spin> -->
                    <div class="end_to_end_ing_text">合成中</div>

                  </div>
                  <div class="end_to_end_ing_time">响应时间：0s</div>
                </div>
                
                <div v-else class="end_to_end_suspended_box">
                    <div v-if="endToEndContinueStatus" class="end_to_end_suspended"
                        @onClick="EndToEndStop()"
                    >
                    <div class="end_to_end_suspended_img"></div>
                    <div class="end_to_end_suspended_text">暂停播放</div>

                    </div>
                    <div v-else class="end_to_end_continue"
                      @click="EndToEndResume()"
                    >
                      <div class="end_to_end_continue_img"></div>
                      <div class="end_to_end_continue_text">继续播放</div>
                    </div>
                    <div class="end_to_end_ing_suspended_time">响应时间：{{Number(endToEndAcceptStamp) - Number(endToEndSendStamp) }}ms</div>
                </div>
            </div>
                
      </div>
    </div>
</template>

<script>
import Recorder from 'js-audio-recorder'
import { apiURL } from '../../../api/API'

// 全局承接流式 chunk 块
let chunks = []
let AudioContext = window.AudioContext || window.webkitAudioContext;
let chunk_index = 0
let palyIndex = 0
let reciveOver = false


// 定义新的流式播放服务
let _audioSrcNodes = []
const _audioCtx = new (window.AudioContext || window.webkitAudioContext)({ latencyHint: 'interactive' });
let _playStartedAt = 0
let _totalTimeScheduled = 0

function _reset(){
    _playStartedAt = 0
    _totalTimeScheduled = 0
    _audioSrcNodes = []
}


export default {
    name: "TTSTS",
    data () {
        return {
            textarea: "",
            audioCtx: '',
            source: '',
            typedArray: '',
            ttsResult: '',
            ws: '',

            // 控制播放状态
            streamingContinueStatus: true,
            endToEndContinueStatus: true,
            // 控制初始状态
            streamingOnInit: true, 
            endToEndOnInit: true, 
            // 控制是否开始
            streamingStopStatus: false,
            endToEndStopStatus: false,

            // 流式接收时间戳
            streamingAcceptStamp: '0',
            endToEndAcceptStamp: '0',
            // 流式发起时间戳
            streamingSendStamp: '0',
            endToEndSendStamp: '0'
            
        }
    },
    mounted(){
        this.getRandomChineseWord()
        
        this.ws = new WebSocket(apiURL.TTS_SOCKET_RECORD)
        var _that = this
        this.ws.addEventListener('message', function (event) {
            let temp = JSON.parse(event.data);
            if(chunk_index === 0){
                _that.streamingStopStatus = false
                _that.streamingAcceptStamp = Date.now()
            }

            // 接收的数据刷进播放器
            if(!temp.done){
                chunk_index += 1
                let chunk = temp.wav
                let arraybuffer = _that.base64ToUint8Array(chunk)
                let view = new DataView(arraybuffer.buffer);
                
                let length = view.buffer.byteLength / 2
                
                view = Recorder.encodeWAV(view, 24000, 24000, 1, 16, true) 
                _that._schedulePlaybackWav({
                    wavData: view.buffer,
                })
            } else {
                reciveOver = true
                // this.streamingOnInit = true
            }})
    },

    methods: {
        // 状态变量重置
        resetStatus(){
            this.streamingContinueStatus = true
            this.streamingOnInit = true
            this.streamingStopStatus = false

            this.endToEndContinueStatus = true
            this.endToEndOnInit = true
            this.endToEndStopStatus = false
        },

        // 生成随机文本
        getRandomChineseWord(){
            const resultChina = [
                "钱伟长想到上海来办学校是经过深思熟虑的。",
                "林荒大吼出声，即便十年挣扎，他也从未感到过如此无助。自己的身体一点点陷入岁月之门，却眼睁睁的看着君倾城一手持剑，雪白的身影决然凄厉。就这样孤身一人，于漫天风雪中，对阵数千武者。",
                "我们将继续成长，用行动回击那些只会说风凉话，不愿意和我们相向而行的害群之马。",
                "许多道理，人们已经证明过千遍万遍，为什么还要带着侥幸的心理再去试验一回呢？",
                "宫内整洁利索，廊柱门窗颜色鲜艳，几名电工正在维修线路。",
                "他身材矮小，颧骨突出，留着小胡子，说话一口浓重的福建口音。",
                "阿杰让阿悦看下剩下的盒饭合不合他的胃口。",
                "有网友问，能不能回忆几件刘洋在学校里的趣事或糗事。"
                ];
            let text = "";

            text = resultChina[Math.floor(Math.random() * 7)];
            this.textarea = text
        },
        // 基于WS的流式合成
        async getTtsChunkWavWS(){
            if(this.ws.readyState != this.ws.OPEN){
                this.$message.error("websocket 链接失败，请检查 Websocket 后端服务是否正确开启")
                return
            }
            // 初始化 chunks
            chunks = []
            chunk_index = 0
            reciveOver = false
            _reset()
            
            this.streamingOnInit = false
            this.streamingStopStatus = true
            this.streamingContinueStatus = true

            this.streamingSendStamp = Date.now()
            this.ws.send(this.textarea)
        },
        // 流式播放器
        _schedulePlaybackWav({wavData}) {
            var _that = this
            _audioCtx.decodeAudioData(wavData, audioBuffer => {
            const audioSrc = _audioCtx.createBufferSource()
            audioSrc.onended = () => {
                _audioSrcNodes.shift();
                if(_audioSrcNodes.length === 0){
                    _that.resetStatus()
                }
                };
            _audioSrcNodes.push(audioSrc);
            let startDelay = 0;
            if (!_playStartedAt) {
                startDelay = 10 / 1000;
                _playStartedAt = _audioCtx.currentTime + startDelay;
                }
            audioSrc.buffer = audioBuffer;
            audioSrc.connect(_audioCtx.destination);
            
            const startAt = _playStartedAt + _totalTimeScheduled;
            audioSrc.start(startAt);

            _totalTimeScheduled+= audioBuffer.duration;

            })    
        },

        // base64转换
        base64ToUint8Array(base64String) {
            const padding = '='.repeat((4 - base64String.length % 4) % 4);
            const base64 = (base64String + padding)
                            .replace(/-/g, '+')
                            .replace(/_/g, '/');

            const rawData = window.atob(base64);
            const outputArray = new Uint8Array(rawData.length);

            for (let i = 0; i < rawData.length; ++i) {
                    outputArray[i] = rawData.charCodeAt(i);
            }
            return outputArray;
        }, 
        
        // 暂停播放
        playerPaused(){
            _audioCtx.suspend()
        },

        // 恢复播放
        playerResume(){
            _audioCtx.resume()
        },

        // 流式播放暂停
        streamingStop(){
            this.playerPaused()
            // 切换为暂停状态
            this.streamingContinueStatus = false

        },
        // 流式播放恢复
        streamingResume(){
            this.playerResume()
            this.streamingContinueStatus = true
        },
        
        // 端到端合成
        async EndToEndSynthesis(){
            this.endToEndSendStamp = Date.now()
            this.endToEndOnInit = false
            this.endToEndStopStatus = true

            let ttsResult = await this.$http.post("/api/tts/offline", { text : this.textarea});
            
            if (ttsResult.status == 200) {
                this.endToEndAcceptStamp = Date.now()
                this.endToEndStopStatus = false
                this.endToEndContinueStatus = true
                // base转换二进制数
                console.log('res', ttsResult)
                let typedArray = this.base64ToUint8Array(ttsResult.data.result)
                // 播放音频
                this._schedulePlaybackWav({
                    wavData: typedArray.buffer,
                })                
            };
        },

        // 端到端播放暂停
        streamingStop(){
            this.playerPaused()
            // 切换为暂停状态
            this.endToEndContinueStatus = false

        },
        // 端到端播放恢复
        streamingResume(){
            this.playerResume()
            this.endToEndContinueStatus = true
        },


    }

}

</script>


<style lang="less" scoped>
@import "./style.less";
</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/TTS/style.less
================================================
.speech_recognition {
    width: 1200px;
    height: 410px;
    background: #FFFFFF;
    padding: 40px 0px 50px 50px;
    box-sizing: border-box;
    display: flex;
    .recognition_text {
        width: 589px; 
        height: 320px;
        // background: pink;
       .recognition_text_header {
           margin-bottom: 30px;
           display: flex;
           justify-content: space-between;
           align-items: center;
           .recognition_text_title {
            height: 26px;
            font-family: PingFangSC-Medium;
            font-size: 16px;
            color: #000000;
            letter-spacing: 0;
            line-height: 26px;
            font-weight: 500;
           };
           .recognition_text_random {
               display: flex;
               align-items: center;
               cursor: pointer;
               span {
                display: inline-block;
                    &:nth-of-type(1) {
                        width: 20px;
                        height: 20px;
                        background: url("../../../assets/image/ic_更换示例.svg") no-repeat;
                        background-position: center;
                        background-size: 20px 20px;
                        margin-right: 5px;
                       
                    };
                    &:nth-of-type(2) {
                        height: 20px;
                        font-family: PingFangSC-Regular;
                        font-size: 14px;
                        color: #2932E1;
                        letter-spacing: 0;
                        font-weight: 400;
                    };
               };
           };
       };
       .recognition_text_field {
        width: 589px;
        height: 264px;
        background: #FAFAFA;
        .textToSpeech_content_show_text{
            width: 100%;
            height: 264px;
            padding: 0px 30px 30px 0px;
            box-sizing: border-box;
            .ant-input {
                height: 208px;
                resize: none;
                // margin-bottom: 230px;
                padding: 21px 20px;
            };
        };
       };
    };
    // 指向
    .recognition_point_to {
        width: 47px;
        height: 63px;
        background: url("../../../assets/image/步骤-箭头切图@2x.png") no-repeat;
        background-position: center;
        background-size: 47px 63px;
        margin-top: 164px;
        margin-right: 101px;
        margin-left: 100px;
        margin-top: 164px;
       };
    //    语音合成
    .speech_recognition_new {
        .speech_recognition_title {
            height: 26px;
            font-family: PingFangSC-Medium;
            font-size: 16px;
            color: #000000;
            line-height: 26px;
            font-weight: 500;
            margin-left: 32px;
            margin-bottom: 96px;
        };
        // 流式合成
        .speech_recognition_streaming {
            width: 136px;
            height: 44px;
            background: #2932E1;
            border-radius: 22px;
            font-family: PingFangSC-Medium;
            font-size: 14px;
            color: #FFFFFF;
            font-weight: 500;
            text-align: center;
            line-height: 44px;
            margin-bottom: 40px;
            cursor: pointer;
            &:hover {
                opacity: .9;
            };
        };
         // 合成中
        .streaming_ing_box {
            display: flex;
            align-items: center;
            height: 44px;
            margin-bottom: 40px;
         .streaming_ing {
            width: 136px;
            height: 44px;
            background: #7278F5;
            border-radius: 22px;
            display: flex;
            justify-content: center;
            align-items: center;
            cursor: pointer;
          
            .streaming_ing_img {
                width: 16px;
                height: 16px;
                // background: url("../../../assets/image/ic_小-录制语音.svg");
                // background-repeat: no-repeat;
                // background-position: center;
                // background-size: 16px 16px;
                // margin-right: 12px;
            };
            .streaming_ing_text {
                height: 20px;
                font-family: PingFangSC-Medium;
                font-size: 14px;
                color: #FFFFFF;
                font-weight: 500;
                margin-left: 12px;
            };
        };
        // 合成时间文字
        .streaming_time {
            height: 20px;
            font-family: PingFangSC-Medium;
            font-size: 14px;
            color: #000000;
            font-weight: 500;
            margin-left: 12px;
        };
        };
        

        // 暂停播放
        .streaming_suspended_box {
            display: flex;
            align-items: center;
            height: 44px;
            margin-bottom: 40px;
            .streaming_suspended {
                width: 136px;
                height: 44px;
                background: #2932E1;
                border-radius: 22px;
                display: flex;
                justify-content: center;
                align-items: center;
                cursor: pointer;
              
                .streaming_suspended_img {
                    width: 16px;
                    height: 16px;
                    background: url("../../../assets/image/ic_暂停（按钮）.svg");
                    background-repeat: no-repeat;
                    background-position: center;
                    background-size: 16px 16px;
                    margin-right: 12px;
                };
                .streaming_suspended_text {
                    height: 20px;
                    font-family: PingFangSC-Medium;
                    font-size: 14px;
                    color: #FFFFFF;
                    font-weight: 500;
                    margin-left: 12px;
                };
               
            };
            // 暂停获取时间
            .suspended_time {
                height: 20px;
                font-family: PingFangSC-Medium;
                font-size: 14px;
                color: #000000;
                font-weight: 500;
                margin-left: 12px;
            }
        };
       
        // 继续播放
        .streaming_continue {
            width: 136px;
            height: 44px;
            background: #2932E1;
            border-radius: 22px;
            display: flex;
            justify-content: center;
            align-items: center;
            cursor: pointer;
            margin-bottom: 40px;
            .streaming_continue_img {
                width: 16px;
                height: 16px;
                background: url("../../../assets/image/ic_播放（按钮）.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 16px 16px;
                margin-right: 12px;
            };
            .streaming_continue_text {
                height: 20px;
                font-family: PingFangSC-Medium;
                font-size: 14px;
                color: #FFFFFF;
                font-weight: 500;
            };
        };


        // 端到端合成
        .speech_recognition_end_to_end {
            width: 136px;
            height: 44px;
            background: #2932E1;
            border-radius: 22px;
            font-family: PingFangSC-Medium;
            font-size: 14px;
            color: #FFFFFF;
            font-weight: 500;
            text-align: center;
            line-height: 44px;
            cursor: pointer;
            &:hover {
                opacity: .9;
            };
        };
        // 合成中
        .end_to_end_ing_box {
            display: flex;
            align-items: center;
            height: 44px;
            .end_to_end_ing {
                width: 136px;
                height: 44px;
                background: #7278F5;
                border-radius: 22px;
                display: flex;
                justify-content: center;
                align-items: center;
                cursor: pointer;
                .end_to_end_ing_img {
                    width: 16px;
                    height: 16px;
                    // background: url("../../../assets/image/ic_小-录制语音.svg");
                    // background-repeat: no-repeat;
                    // background-position: center;
                    // background-size: 16px 16px;
                   
                };
                .end_to_end_ing_text {
                    height: 20px;
                    font-family: PingFangSC-Medium;
                    font-size: 14px;
                    color: #FFFFFF;
                    font-weight: 500;
                    margin-left: 12px;
                };
            };
            // 合成时间文本
            .end_to_end_ing_time {
                height: 20px;
                font-family: PingFangSC-Medium;
                font-size: 14px;
                color: #000000;
                font-weight: 500;
                margin-left: 12px;
            };
        };
       

        // 暂停播放
        .end_to_end_suspended_box {
            display: flex;
            align-items: center;
            height: 44px;
            .end_to_end_suspended {
                width: 136px;
                height: 44px;
                background: #2932E1;
                border-radius: 22px;
                display: flex;
                justify-content: center;
                align-items: center;
                cursor: pointer;
                .end_to_end_suspended_img {
                    width: 16px;
                    height: 16px;
                    background: url("../../../assets/image/ic_暂停（按钮）.svg");
                    background-repeat: no-repeat;
                    background-position: center;
                    background-size: 16px 16px;
                    margin-right: 12px;
                };
                .end_to_end_suspended_text {
                    height: 20px;
                    font-family: PingFangSC-Medium;
                    font-size: 14px;
                    color: #FFFFFF;
                    font-weight: 500;
                };
            };
            // 暂停播放时间
            .end_to_end_ing_suspended_time {
                height: 20px;
                font-family: PingFangSC-Medium;
                font-size: 14px;
                color: #000000;
                font-weight: 500;
                margin-left: 12px;
            };
        };
       
        // 继续播放
        .end_to_end_continue {
            width: 136px;
            height: 44px;
            background: #2932E1;
            border-radius: 22px;
            display: flex;
            justify-content: center;
            align-items: center;
            cursor: pointer;
            .end_to_end_continue_img {
                width: 16px;
                height: 16px;
                background: url("../../../assets/image/ic_播放（按钮）.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 16px 16px;
                margin-right: 12px;
            };
            .end_to_end_continue_text {
                height: 20px;
                font-family: PingFangSC-Medium;
                font-size: 14px;
                color: #FFFFFF;
                font-weight: 500;
            };
        };
    };
};

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/VPR/VPRT.vue
================================================
<template>
<div class="voiceprint">
        <div class="voiceprint_recording">
            <div class="recording_title">
                <div>1</div>
                <div>
                    录制声纹
                </div>
            </div>
            <div>
                试试对我说：欢迎使用飞桨声纹识别系统
            </div>
            <!-- 开始录音 -->
                <div v-if="onEnrollRec === 0 " class="recording_btn"
                    @click="startRecorderEnroll()"
                >
                    <div class="recording_img"></div>
                        <div class="recording_prompt">
                            录制声音
                        </div>
                </div>
                <!-- 结束录音 -->
                <div v-else-if="onEnrollRec === 1 " class="recording_btn_the_recording"
                    @click="stopRecorderEnroll(0)"
                >
                    <a-spin />
                    <div class="recording_prompt">
                            停止录音
                    </div>
                </div>

                <!-- :
                //  {/* 完成录音 */} -->
                <div v-else class="complete_the_recording_btn"
                    @click="enrollVoicePrint()"
                >
                    <div class="complete_the_recording_img"></div>
                    <div class="complete_the_recording_prompt">
                        注册声纹
                    </div>
                </div>
            
            <!-- 用户名输入框 -->
            <div class="recording_input">
                <el-input v-model="enrollSpkId" class="w-50 m-2" autosize placeholder="请输入注册用户名" />
            </div>
            

            <!-- {/* table */} -->
            <div class="recording_table">

                <el-table :data="vpr_datas" border class="recording_table_box">
                    <el-table-column prop="spkId" label="用户" />
                    <el-table-column fixed="right" label="操作">
                        <template #default="scope">
                            <el-button @click="Play(scope.row.vprId)" type="text" size="small">播放</el-button>
                            <el-button @click="Del(scope.row.spkId)" type="text" size="small">删除</el-button>
                        </template>
                    </el-table-column>
                </el-table>

            </div>
        </div>

        <!-- {/* 指向 */} -->
        <div class="recording_point_to"></div>

        <!-- {/* 识别声纹 */} -->
        <div class="voiceprint_identify">
            <div class="identify_title">
                <div>2</div>
                <div>
                    识别声纹
                </div>
            </div>
            <div>
                试试对我说：请识别一下我的声音
            </div>
                    <div v-if="onRegRec === 0" class="identify_btn"
                        @click="startRecorderRecog()"
                    >
                        <div class="identify_img"></div>
                        <div class="identify_prompt">
                            录制声音
                        </div>
                    </div>

                    <div v-else-if="onRegRec === 1" class="identify_btn_the_recording"
                    @click="stopRecorderRecog()">
                        <a-spin />
                        <div class="recording_prompt">
                                停止录音
                        </div>
                           
                    </div>
                    
                    <div v-else class="identify_complete_the_recording_btn"
                        @click="Recog()">
                        <div class="identify_complete_the_recording_img"></div>
                        <div class="identify_complete_the_recording_prompt">
                            开始识别
                        </div>
                    </div>

            <div class="identify_result">
                <div class="identify_result_content">
                    <div>识别结果</div>
                    <div>{{scoreResult}}</div>
                </div>
            </div>
        </div>
</div>
</template>

<script>
import Recorder from 'js-audio-recorder'
import { vprData, vprList, vprEnroll, vprRecog, vprDel } from '../../../api/ApiVPR';

// 初始化录音
const recorder = new Recorder({
  sampleBits: 16,                 // 采样位数，支持 8 或 16，默认是16
  sampleRate: 16000,              // 采样率，支持 11025、16000、22050、24000、44100、48000，根据浏览器默认值，我的chrome是48000
  numChannels: 1,                 // 声道，支持 1 或 2， 默认是1
  compiling: true
})

// 初始化播放器
const audioCtx = new AudioContext({
    latencyHint: 'interactive',
    sampleRate: 16000,
});

export default {
    data(){
        return {
            onEnrollRec: 0,     // 注册录音状态
            onRegRec:0,         // 识别录音状态

            scoreResult: "",   // 得分结果
            enrollSpkId: "",    // SpkId注册

            wav: '',            // 录音结果

            scoreResults: [],   // 得分结果
            vpr_datas: []       // 数据列表


        }
    },
    mounted () {
        this.GetList()
        this.randomSpkId()
    },
    methods: {
        // 重置
        reset(){
            this.wav = ''
            this.scoreResults = []
            this.scoreResult = ""
        },
        // random SpkName
        randomSpkId(){
            var e = 3;
            var t = "赵钱孙李周吴郑王冯陈褚卫蒋沈韩杨朱秦尤许何吕施张孔曹严华金魏陶姜戚谢邹喻柏水窦章云苏潘葛奚范彭郎鲁韦昌马苗凤花方俞任袁柳酆鲍史唐费廉岑薛雷贺倪汤滕殷罗毕郝邬安常乐于时傅皮卞齐康伍余元卜顾孟平黄",
            a = t.length,
            n = "";
            for (var i = 0; i < e; i++) n += t.charAt(Math.floor(Math.random() * a));
            this.enrollSpkId = n
            console.log("n", n)
        },
        // 注册声纹开始录音
        startRecorderEnroll(){
            this.onEnrollRec = 1
            recorder.clear()
            recorder.start()
        },
        // 注册声纹结束录音
        stopRecorderEnroll(){
            this,this.onEnrollRec = 2
            recorder.stop()
            this.wav = recorder.getWAVBlob()
        },

        // 识别声纹开始录音
        startRecorderRecog(){
            // this.wav = ''
            this.onRegRec = 1
            this.reset()
            recorder.clear()
            recorder.start()
        },

        // 注册声纹结束录音
        stopRecorderRecog(){
            this,this.onRegRec = 2
            recorder.stop()
            this.wav = recorder.getWAVBlob()
        },

        // 注册声纹
        async enrollVoicePrint(){
                if(this.wav === ''){
                    this.$message.error("请先完成录音");
                    this.onEnrollRec = 0
                    return
                }
                if(this.enrollSpkId === ""){
                    this.$message.error("请输入声纹用户名")
                    this.onEnrollRec = 2
                    return
                }
                this.onEnrollRec = 0

                let formData = new FormData()
                formData.append('spk_id', this.enrollSpkId)
                formData.append('audio', this.wav)
                
                const result = await vprEnroll(formData)
                if (!result){
                    this.$message.error("请检查后端服务是否正确开启")
                    return 
                }
                if(result.data.status){
                    this.$message.success("声纹注册成功")
                } else {
                    this.$message.error(result.data.msg)
                }
                this.GetList()
                this.wav = ''
                this.randomSpkId()
            },

        // 识别声纹
        async Recog(){
            this.scoreResults = []
            this.onRegRec = 0
            if(this.wav === ''){
                this.$message.error("请先完成录音");
                return
            }
            if(this.vpr_datas.length == 0){
                this.$message.error("未查询到声纹数据，请先注册");
                return
            }
            let formData = new FormData()
            formData.append('audio', this.wav)
            const result = await vprRecog(formData);
            console.log(result)
            result.data.forEach(dat => {
                this.scoreResults.push({
                    spkId: dat[0],
                    score: dat[1][1]
                })
            });
            if(this.scoreResults.length > 0){
                this.scoreResult = this.scoreResults[0]['spkId']
            }
        },

        // 删除声纹
        async Del(spkId){
                console.log('spkId', spkId)
                // 删除用户
                const result = await vprDel({spk_id: spkId});
                if(result.data.status){
                    this.$message.success("删除成功")
                } else {
                    this.$message.error(result.data.msg)
                }
                this.GetList()
            },
        
        // 获取声纹列表
        async GetList(){
            this.vpr_datas =[]
            const result = await vprList();
            console.log("list", result)
            for(let i=0; i<result.data[0].length; i++){
                this.vpr_datas.push({
                    spkId: result.data[0][i],
                    vprId: result.data[1][i]
                })
            }
            this.$nextTick(()=>{})
        },

        // 播放音频
        async Play(vprId){
                console.log('vprId', vprId)
                // 获取音频数据
                const result = await vprData(vprId);
                console.log('play result', result)
                if (result.data.code == 0) {
                    // base转换二进制数
                    let typedArray = this.base64ToUint8Array(result.data.result)

                    // 添加wav文件头
                    let view = new DataView(typedArray.buffer);
                    view = Recorder.encodeWAV(view, 16000, 16000, 1, 16, true);

                    // 播放音频
                    this.playAudioData(view.buffer);
                };
        },

        // base64解码
        base64ToUint8Array(base64String) {
            const padding = '='.repeat((4 - base64String.length % 4) % 4);
            const base64 = (base64String + padding)
                            .replace(/-/g, '+')
                            .replace(/_/g, '/');

            const rawData = window.atob(base64);
            const outputArray = new Uint8Array(rawData.length);

            for (let i = 0; i < rawData.length; ++i) {
                    outputArray[i] = rawData.charCodeAt(i);
            }
            return outputArray;
        }, 
        // 播放音频
        playAudioData( wav_buffer ) {
        audioCtx.decodeAudioData(wav_buffer, buffer => {
            var source = audioCtx.createBufferSource();
            source.buffer = buffer;
            source.connect(audioCtx.destination);
            source.start();
        }, function(e) {
            Recorder.throwError(e);
            })
        }
    }
};
</script>

<style lang="less" scoped>
@import "./style.less";
</style>

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/VPR/style.less
================================================
.voiceprint {
    width: 1200px;
    height: 410px;
    background: #FFFFFF;
    padding: 41px 80px 56px 80px;
    box-sizing: border-box;
    display: flex;
    // 录制声纹
    .voiceprint_recording {
        width: 423px;
        height: 354px;
        margin-right: 66px;
        .recording_title {
            display: flex;
            align-items: center;
            margin-bottom: 20px;
            div {
                &:nth-of-type(1) {
                    width: 24px;
                    height: 24px;
                    background: rgba(41,50,225,0.70);
                    font-family: PingFangSC-Regular;
                    font-size: 16px;
                    color: #FFFFFF;
                    letter-spacing: 0;
                    text-align: center;
                    line-height: 24px;
                    font-weight: 400;
                    margin-right: 16px;
                    border-radius: 50%;
                };
                &:nth-of-type(2) {
                    height: 26px;
                    font-family: PingFangSC-Regular;
                    font-size: 16px;
                    color: #000000;
                    line-height: 26px;
                    font-weight: 400;
                };
            };
        };
        // 开始录音
        .recording_btn {
            width: 143px;
            height: 44px;
            cursor: pointer;
            background: #2932E1;
            padding: 0px  24px 0px 21px;
            box-sizing: border-box;
            border-radius: 22px;
            display: flex;
            align-items: center;
            margin-bottom: 20px;
            margin-top: 10px;
            
            &:hover {
                background: #7278F5;
                .recording_img {
                    width: 20px;
                    height: 20px;
                    background: url("../../../assets/image//icon_录制声音小语音1.svg");
                    background-repeat: no-repeat;
                    background-position: center;
                    background-size: 20px 20px;
                    margin-right: 8.26px;
                
                };
            }
            .recording_img {
                width: 20px;
                height: 20px;
                background: url("../../../assets/image//icon_录制声音小语音1.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 20px 20px;
                margin-right: 8.26px;
            
            };
            .recording_prompt {
                height: 20px;
                font-family: PingFangSC-Regular;
                font-size: 12px;
                color: #FFFFFF;
                font-weight: 400;
            };
           
        };
        // 录音中
        .recording_btn_the_recording {
            width: 143px;
            height: 44px;
            cursor: pointer;
            background: #7278F5;
            padding: 0px  24px 0px 21px;
            box-sizing: border-box;
            border-radius: 22px;
            display: flex;
            align-items: center;
            justify-content: center;
            margin-bottom: 40px;
            .recording_img_the_recording {
                width: 20px;
                height: 20px;
                background: url("../../../assets/image//icon_小-声音波浪.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 20px 20px;
                margin-right: 8.26px;
            };
            .recording_prompt {
                height: 20px;
                font-family: PingFangSC-Regular;
                font-size: 12px;
                color: #FFFFFF;
                font-weight: 400;
            };
        };
        // 完成录音
        .complete_the_recording_btn {
            width: 143px;
            height: 44px;
            cursor: pointer;
            background: #2932E1;
            padding: 0px  24px 0px 21px;
            box-sizing: border-box;
            border-radius: 22px;
            display: flex;
            align-items: center;
            margin-bottom: 40px;
            &:hover {
                background: #7278F5;
                .complete_the_recording_img {
                    width: 20px;
                    height: 20px;
                    background: url("../../../assets/image//icon_小-声音波浪.svg");
                    background-repeat: no-repeat;
                    background-position: center;
                    background-size: 20px 20px;
                    margin-right: 8.26px;
                
                };
            }
            .complete_the_recording_img {
                width: 20px;
                height: 20px;
                background: url("../../../assets/image//icon_小-声音波浪.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 20px 20px;
                margin-right: 8.26px;
            
            };
            .complete_the_recording_prompt {
                height: 20px;
                font-family: PingFangSC-Regular;
                font-size: 12px;
                color: #FFFFFF;
                font-weight: 400;
            };
           
        };
         // table
         .recording_table {
            width: 322px;
            .recording_table_box {
                .ant-table-thead > tr > th {
                    color: rgba(0, 0, 0, 0.85);
                    font-weight: 500;
                    text-align: left;
                    background: rgba(40,50,225,0.08);
                    border-bottom: none; 
                    transition: background 0.3s ease;
                    height: 22px;
                    font-family: PingFangSC-Regular;
                    font-size: 16px;
                    color: #333333;
                    // text-align: center;
                    font-weight: 400;
                    &:nth-of-type(2) {
                       border-left: 2px solid white; 
                    };
                };
            .ant-table-tbody > tr > td {
                border-bottom: 1px solid #f0f0f0;
                transition: background 0.3s;
                height: 22px;
                font-family: PingFangSC-Regular;
                font-size: 16px;
                color: #333333;
                // text-align: center;
                font-weight: 400;
            };
            };
        };
        // input
        .recording_input {
            width: 322px;
            margin-bottom: 20px;
        };
    };
    // 指向
    .recording_point_to {
        width: 63px;
        height: 47px;
        background: url("../../../assets/image//步骤-箭头切图@2x.png");
        background-repeat: no-repeat;
        background-position: center;
        background-size: 63px 47px;
        margin-right: 66px;
        margin-top: 198px;
    };
    //识别声纹
    .voiceprint_identify {
        width: 423px;
        height: 354px;
        .identify_title {
            display: flex;
            align-items: center;
            margin-bottom: 20px;
            div {
                &:nth-of-type(1) {
                    width: 24px;
                    height: 24px;
                    background: rgba(41,50,225,0.70);
                    font-family: PingFangSC-Regular;
                    font-size: 16px;
                    color: #FFFFFF;
                    letter-spacing: 0;
                    text-align: center;
                    line-height: 24px;
                    font-weight: 400;
                    margin-right: 16px;
                    border-radius: 50%;
                };
                &:nth-of-type(2) {
                    height: 26px;
                    font-family: PingFangSC-Regular;
                    font-size: 16px;
                    color: #000000;
                    line-height: 26px;
                    font-weight: 400;
                };
            };
        };
        // 开始识别
        .identify_btn {
            width: 143px;
            height: 44px;
            cursor: pointer;
            background: #2932E1;
            padding: 0px  24px 0px 21px;
            box-sizing: border-box;
            border-radius: 22px;
            display: flex;
            align-items: center;
            margin-bottom: 40px;
            margin-top: 10px;
            &:hover {
                background: #7278F5;
                .identify_img {
                    width: 20px;
                    height: 20px;
                    background: url("../../../assets/image//icon_录制声音小语音1.svg");
                    background-repeat: no-repeat;
                    background-position: center;
                    background-size: 20px 20px;
                    margin-right: 8.26px;
                
                };
            }
            .identify_img {
                width: 20px;
                height: 20px;
                background: url("../../../assets/image//icon_录制声音小语音1.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 20px 20px;
                margin-right: 8.26px;
            
            };
            .identify_prompt {
                height: 20px;
                font-family: PingFangSC-Regular;
                font-size: 12px;
                color: #FFFFFF;
                font-weight: 400;
            };
           
        };
         // 识别中
         .identify_btn_the_recording {
            width: 143px;
            height: 44px;
            cursor: pointer;
            background: #7278F5;
            padding: 0px  24px 0px 21px;
            box-sizing: border-box;
            border-radius: 22px;
            display: flex;
            align-items: center;
            justify-content: center;
            margin-bottom: 40px;
            .identify_img_the_recording {
                width: 20px;
                height: 20px;
                background: url("../../../assets/image//icon_录制声音小语音1.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 20px 20px;
                margin-right: 8.26px;
            };
            .recording_prompt {
                height: 20px;
                font-family: PingFangSC-Regular;
                font-size: 12px;
                color: #FFFFFF;
                font-weight: 400;
            };
        };
        // 完成识别
        .identify_complete_the_recording_btn {
            width: 143px;
            height: 44px;
            cursor: pointer;
            background: #2932E1;
            padding: 0px  24px 0px 21px;
            box-sizing: border-box;
            border-radius: 22px;
            display: flex;
            align-items: center;
            margin-bottom: 40px;
            &:hover {
                background: #7278F5;
                .identify_complete_the_recording_img {
                    width: 20px;
                    height: 20px;
                    background: url("../../../assets/image//icon_小-声音波浪.svg");
                    background-repeat: no-repeat;
                    background-position: center;
                    background-size: 20px 20px;
                    margin-right: 8.26px;
                
                };
            }
            .identify_complete_the_recording_img {
                width: 20px;
                height: 20px;
                background: url("../../../assets/image//icon_小-声音波浪.svg");
                background-repeat: no-repeat;
                background-position: center;
                background-size: 20px 20px;
                margin-right: 8.26px;
            
            };
            .identify_complete_the_recording_prompt {
                height: 20px;
                font-family: PingFangSC-Regular;
                font-size: 12px;
                color: #FFFFFF;
                font-weight: 400;
            };
           
        };


        // 结果
        .identify_result {
            width: 422px;
            height: 184px;
            text-align: center;
            line-height: 184px;
            background: #FAFAFA;
            position: relative;
            .identify_result_default {
               
            font-family: PingFangSC-Regular;
            font-size: 16px;
            color: #999999;
            font-weight: 400;
            };
            .identify_result_content {
                // text-align: center;
                // position: absolute;
                // top: 50%;
                // left: 50%;
                // transform: translate(-50%,-50%);
               div {
                   &:nth-of-type(1) {
                    height: 22px;
                    font-family: PingFangSC-Regular;
                    font-size: 16px;
                    color: #666666;
                    font-weight: 400;
                    margin-bottom: 10px;
                   };
                   &:nth-of-type(2) {
                    height: 33px;
                    font-family: PingFangSC-Medium;
                    font-size: 24px;
                    color: #000000;
                    font-weight: 500;
                   };
               };
            };
        };
    };
    .action_btn {
        display: inline-block;
        height: 22px;
        font-family: PingFangSC-Regular;
        font-size: 16px;
        color: #2932E1;
        text-align: center;
        font-weight: 400;
        cursor: pointer;
    };
};

================================================
FILE: demos/speech_web/web_client/src/components/SubMenu/VoiceClone/VoiceClone.vue
================================================
<template>
    <div class="voiceclone">
        <el-row :gutter="20">
            <el-col :span="12"><div class="grid-content ep-bg-purple" />
                <el-row :gutter="60" class="btn_row_wav" justify="center">
                    <el-button class="ml-3" v-if="onEnrollRec === 0" @click="startRecorderEnroll()" type="primary">录制音频</el-button>
                    <el-button class="ml-3" v-else-if="onEnrollRec === 1" @click="stopRecorderEnroll()" type="danger">停止录音</el-button>
                    <el-button class="ml-3" v-else @click="uploadRecord()" type="success">上传录音</el-button>
                    <a>&#12288</a>
                    <el-upload
                        :multiple="false"
                        :accept="'.wav'"
                        :auto-upload="false"
                        :on-change="handleChange"
                        :show-file-list="false"
                    >
                        <el-button class="ml-3" type="success">上传音频文件</el-button>
                    </el-upload>
                </el-row>
                <div class="recording_table">
                <el-table :data="vcDatas" border class="recording_table_box" scrollbar-always-on max-height="250px">
                    <el-table-column prop="wavId" label="序号" width="60"/>
                    <el-table-column prop="wavName" label="文件名" />
                    <el-table-column label="操作" width="80">
                        <template #default="scope">
                            <div class="flex justify-space-between mb-4 flex-wrap gap-4">
                                <a @click="PlayTable(scope.row.wavId)"><el-icon><VideoPlay /></el-icon></a>
                                <a>&#12288</a>
                                <a @click="delWav(scope.row.wavId)"><el-icon><DeleteFilled /></el-icon></a>
                            </div>
                        </template>
                    </el-table-column>
                    <el-table-column fixed="right" label="选择" width="70">
                        <template #default="scope">
                            <el-switch v-model="scope.row.status"  @click="choseWav(scope.row.wavId)"/>
                        </template>
                    </el-table-column>
                </el-table>
                </div>

            </el-col>
            <el-col :span="8"><div class="grid-content ep-bg-purple" />
                <el-space direction="vertical">
                    <el-card class="box-card" style="width: 250px; height:310px">
                        <template #header>
                            <div class="card-header">
                            <span>请输入中文文本</span>
                            </div>
                        </template>
                        <div class="mb-2 flex items-center text-sm">
                            <el-radio-group v-model="func_radio" class="ml-4">
                            <el-radio label="1" size="large">GE2E</el-radio>
                            <el-radio label="2" size="large">ECAPA-TDNN</el-radio>
                            </el-radio-group>
                        </div>
                        <el-input
                            v-model="ttsText"
                            :autosize="{ minRows: 8, maxRows: 13 }"
                            type="textarea"
                            placeholder="Please input"
                            />
                    </el-card>                    
                </el-space>
            </el-col>
            <el-col :span="4"><div class="grid-content ep-bg-purple" />
                <div class="play_board">
                    <el-space direction="vertical">
                        <el-row :gutter="20">
                            <el-button size="large" v-if="g2pOnSys === 0" type="primary" @click="g2pClone()">开始合成</el-button>
                            <el-button size="large" v-else :loading-icon="Eleme" type="danger">合成中</el-button>
                        </el-row>

                        <el-row :gutter="20">
                            <el-button v-if='this.cloneWav' type="success" @click="PlaySyn()">播放</el-button>
                            <el-button v-else disabled type="primary" @click="PlaySyn()">播放</el-button>
                            <el-button v-if='this.cloneWav' type="primary" @click="downLoadCloneWav()">下载</el-button>
                            <el-button v-else disabled type="primary" @click="downLoadCloneWav()">下载</el-button>
                        </el-row>
                    </el-space>
                </div>
            </el-col>
        </el-row>
    </div>
</template>

<script>

import Recorder from 'js-audio-recorder'
import { vcCloneG2P, vcCloneSAT, vcDel, vcUpload, vcList, vcDownload, vcDownloadBase64 } from '../../../api/ApiVC';

// 初始化录音
const recorder = new Recorder({
  sampleBits: 16,                 // 采样位数，支持 8 或 16，默认是16
  sampleRate: 16000,              // 采样率，支持 11025、16000、22050、24000、44100、48000，根据浏览器默认值，我的chrome是48000
  numChannels: 1,                 // 声道，支持 1 或 2， 默认是1
  compiling: true
})

// 初始化播放器
const audioCtx = new AudioContext({
    latencyHint: 'interactive',
    sampleRate: 16000,
});

export default {
    data(){
         return {
            onEnrollRec: 0,     // 注册录音状态
            wav: '',            // 录音结果
            vcDatas: [],       // 已录制的音频
            nowFile: "",        // 当前选择的音频
            ttsText: "欢迎使用飞桨语音套件",
            nowIndex: -1,
            cloneWav: "",
            g2pOnSys: 0,
            func_radio: '1',
         }
    },
    mounted () {
        this.GetList()
    },
    methods:{
        // 重置
        reset(){
            this.onEnrollRec = 0
            this.wav = ''
            this.vcDatas = []
            this.nowFile = ""
            this.ttsText = "欢迎使用飞桨语音套件"
            this.nowIndex = -1
        },
        // 开始录音
        startRecorderEnroll(){
            this.onEnrollRec = 1
            recorder.clear()
            recorder.start()
        },
        // 结束录音
        stopRecorderEnroll(){
            this.onEnrollRec = 2
            recorder.stop()
            this.wav = recorder.getWAVBlob()
        },
        // chose wav
        choseWav(wavId){
            this.cloneWav = ''
            this.nowFile = this.vcDatas[wavId].wavName
            this.nowIndex = wavId
            // only wavId is true else false
            for(let i=0; i<this.vcDatas.length; i++){
                if(i==wavId){
                    this.vcDatas[wavId].status = true
                } else {
                    this.vcDatas[i].status = false
                }
            }
            this.$nextTick(()=>{})
        },
        // 上传录音
        async uploadRecord(){
            this.onEnrollRec = 0
            if(this.wav === ""){
                this.$message.error("未检测到录音，录音失败，请重新录制")
                return
            } else {
                if(this.wav === ''){
                    this.$message.error("请先完成录音");
                    this.onEnrollRec = 0
                    return
                } else {
                    let formData = new FormData();
                    formData.append('files', this.wav);
                    const result = await vcUpload(formData);
                    console.log(result)
                    this.GetList() 
                }
                this.$message.success("录音上传成功")
            }
        }, 
        // 上传列表改变
        async handleChange(file, fileList){
            for(let i=0; i<fileList.length; i++){
                this.uploadFile(fileList[i])
            } 
        },

        // 上传音频
        async uploadFile(file){
            let formData = new FormData();
            formData.append('files', file.raw);
            const result = await vcUpload(formData);
            if (result.data.code === 0) {
                this.$message.success("音频上传成功")
                this.GetList()
            } else {
                this.$message.error("音频上传失败")
            }
        },
        // 获取文件列表
        async GetList(){
            this.vcDatas =[]
            const result = await vcList();
            for(let i=0; i<result.data.result.length; i++){
                this.vcDatas.push({
                    wavName: result.data.result[i]['name'],
                    wavId: i,
                    wavPath: result.data.result[i]['path'],
                    status: false
                })
            }
            this.$nextTick(()=>{})
        },
        // 删除音频文件
        async delWav(wavId){
            console.log('wavId', wavId)
            // 删除文件
            const result = await vcDel(
                {
                    wavName: this.vcDatas[wavId]['wavName'],
                    wavPath: this.vcDatas[wavId]['wavPath']
                }
            );
            if(!result.data.code){
                this.$message.success("删除成功")
            } else {
                this.$message.error(result.data.msg)
            }
            this.GetList()
            this.reset()
        },
        // 下载合成文件
        async downLoadCloneWav(){
            if(this.cloneWav  === ""){
                this.$message.error("音频合成完毕后再下载！")
            } else {
                // const result = await vcDownload(this.cloneWav);
                // 获取音频数据
                const result = await vcDownloadBase64(this.cloneWav);
                let view;
                // console.log('play result', result)
                if (result.data.code === 0) {
                    // base转换二进制数
                    let typedArray = this.base64ToUint8Array(result.data.result)
                    // 添加wav文件头
                    view = new DataView(typedArray.buffer);
                    view = Recorder.encodeWAV(view, 16000, 16000, 1, 16, true);
                    // 播放音频
                    // this.playAudioData(view.buffer);
                }
                console.log(view.buffer)
                // debugger
                const blob = new Blob([view.buffer], { type: 'audio/wav' });
                const fileName = new Date().getTime() + '.wav';
                const down = document.createElement('a');
                down.download = fileName;
                down.style.display = 'none';//隐藏,没必要展示出来
                down.href = URL.createObjectURL(blob);
                document.body.appendChild(down);
                down.click();
                URL.revokeObjectURL(down.href); // 释放URL 对象
                document.body.removeChild(down);//下载完成移除
            }
        },
        // g2p voice clone
        async g2pClone(){
            if(this.nowIndex === -1){
                return this.$message.error("请先录音并上传，选择音频后再点击合成")
            } else if (this.ttsText === ""){
                return this.$message.error("合成文本不可以为空")
            } else if (this.nowIndex >= this.vcDatas.length){
                return this.$message.error("当前序号不可以超过音频个数")
            }
            this.cloneWav = ""
            let func = ''
            if(this.func_radio === '1'){
                func = 'ge2e'
            } else {
                func = 'ecapa_tdnn'
            }
            console.log('func', func)

            // 合成
            this.g2pOnSys = 1
            const result = await vcCloneG2P(
                {
                    wavName: this.vcDatas[this.nowIndex]['wavName'],
                    wavPath: this.vcDatas[this.nowIndex]['wavPath'],
                    text: this.ttsText,
                    func: func
                }
            );
            this.g2pOnSys = 0
            if(result.data.code == 0){
                this.cloneWav = result.data.result
                console.log("clone wav: ", this.cloneWav)
                this.$message.success("音频合成成功")
            } else {
                this.$message.error("音频合成失败，请检查后台错误后重试！")
            }
        },
        // 播放表格
        async PlayTable(wavId){
            this.Play(this.vcDatas[wavId])
        },
        // 播放合成后的音频
        async PlaySyn(){
            if(this.cloneWav  === ""){
                this.$message.error("请合成音频后再播放！！")
                return
            } else {
                this.Play(this.cloneWav)
            }
        },
        // 播放音频
        async Play(wavBase){
                // 获取音频数据
                const result = await vcDownloadBase64(wavBase);
                // console.log('play result', result)
                if (result.data.code === 0) {
                    // base转换二进制数
                    let typedArray = this.base64ToUint8Array(result.data.result)
                    // 添加wav文件头
                    let view = new DataView(typedArray.buffer);
                    view = Recorder.encodeWAV(view, 16000, 16000, 1, 16, true);
                    // 播放音频
                    this.playAudioData(view.buffer);
                };
        },
        // base64解码
        base64ToUint8Array(base64String) {
            const padding = '='.repeat((4 - base64String.length % 4) % 4);
            const base64 = (base64String + padding)
                            .replace(/-/g, '+')
                            .replace(/_/g, '/');

            const rawData = window.atob(base64);
            const outputArray = new Uint8Array(rawData.length);

            for (let i = 0; i < rawData.length; ++i) {
                    outputArray[i] = rawData.charCodeAt(i);
            }
            return outputArray;
        }, 
        // 播放音频
        playAudioData( wav_buffer ) {
        audioCtx.decodeAudioData(wav_buffer, buffer => {
            var source = audioCtx.createBufferSource();
            source.buffer = buffer;
            source.connect(audioCtx.destination);
            source.start();
        }, function(e) {
            Recorder.throwError(e);
            })
        },
    },
}
</script>

<style lang="less" scoped>
// @import "./style.less";
.voiceclone {
    width: 1200px;
    height: 410px;
    background: #FFFFFF;
    padding: 5px 80px 56px 80px;
    box-sizing: border-box;
}
.el-row {
  margin-bottom: 20px;
}
.grid-content {
  border-radius: 4px;
  min-height: 36px;
}
.play_board{
    height: 100%;
    display: flex;
    align-items: center;
}
</style>

================================================
FILE: demos/speech_web/web_client/src/components/style.less
================================================
.experience {
    width: 100%;
    height: 709px;
    // background: url("../assets/image/在线体验-背景@2x.png") no-repeat;
    background-size: 100% 709px;
    background-position: initial;   
    // 
    .experience_wrapper {
        width: 1200px;
        height: 709px;
        margin: 0 auto;
        padding: 0px 0px 0px 0px;
        box-sizing: border-box;
        // background: red;
        .experience_title {
            height: 42px;
            font-family: PingFangSC-Semibold;
            font-size: 30px;
            color: #000000;
            font-weight: 600;
            line-height: 42px;
            text-align: center;
            margin-bottom: 10px;
        };
        .experience_describe {
            height: 22px;
            font-family: PingFangSC-Regular;
            font-size: 14px;
            color: #666666;
            letter-spacing: 0;
            text-align: center;
            line-height: 22px;
            font-weight: 400;
            margin-bottom: 30px;
        };
        .experience_content {
            width: 1200px;
            margin: 0 auto;
            display: flex;
            justify-content: center;
            .experience_tabs {
                
                    margin-top: 15px;
            
                    & > .ant-tabs-nav {
                        margin-bottom: 20px;
                
                        &::before {
                            content: none;
                        }
                
                        .ant-tabs-nav-wrap {
                            justify-content: center;
                        }
                
                        .ant-tabs-tab {
                            font-size: 20px;
                        }
                
                        .ant-tabs-nav-list {
                            margin-right: -32px;
                            flex: none;
                        }
                    };
            
                .ant-tabs-nav::before {
                    position: absolute;
                    right: 0;
                    left: 0;
                    border-bottom: 1px solid #f6f7fe;
                    content: '';
                };
               
            };
        };
    };
};
.experience::after {
    content: "";
    display: block;
    clear: both;
    visibility: hidden;
}

================================================
FILE: demos/speech_web/web_client/src/main.js
================================================
import { createApp } from 'vue'
import ElementPlus from 'element-plus'
import * as ElementPlusIconsVue from '@element-plus/icons-vue'
import 'element-plus/dist/index.css'
import Antd from 'ant-design-vue';
import 'ant-design-vue/dist/antd.css';
import App from './App.vue'
import axios from 'axios'

const app = createApp(App)
app.config.globalProperties.$http = axios

for (const [key, component] of Object.entries(ElementPlusIconsVue)) {
    app.component(key, component)
  }
app.use(ElementPlus).use(Antd)
app.mount('#app')


================================================
FILE: demos/speech_web/web_client/vite.config.js
================================================
import { defineConfig } from 'vite'
import vue from '@vitejs/plugin-vue'

// https://vitejs.dev/config/
export default defineConfig({
  plugins: [vue()],
  css: 
    { preprocessorOptions:
      { css: 
        { 
          charset: false 
        } 
      } 
    },
  build: {
      assetsInlineLimit: '2048' // 2kb
  },
  server: {
    host: "0.0.0.0",
    proxy: {
      "/api": {
        target: "http://localhost:8010",
        changeOrigin: true,
        rewrite: (path) => path.replace(/^\/api/, ""),
      },
    },
  },
})


================================================
FILE: demos/story_talker/README.md
================================================
([简体中文](./README_cn.md)|English)

# Story Talker
## Introduction
Storybooks are very important children's enlightenment books, but parents usually don't have enough time to read storybooks for their children. For very young children, they may not understand the Chinese characters in storybooks. Or sometimes, children just want to "listen" but don't want to "read".

You can use `PaddleOCR` to get the text of a storybook and read it by the `TTS` module of `PaddleSpeech`.

## Usage
Run the following command line to get started:
```
./run.sh
```
The result has shown in our [notebook](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/tutorial/tts/tts_tutorial.ipynb).


================================================
FILE: demos/story_talker/README_cn.md
================================================

(简体中文|[English](./README.md))

# Story Talker

## 简介

故事书是非常重要的儿童启蒙书，但家长通常没有足够的时间为孩子读故事书。对于非常小的孩子，他们可能不理解故事书中的汉字。或有时，孩子们只是想“听”，而不想“读”。

您可以使用 `PaddleOCR` 获取故事书的文本，并通过 `PaddleSpeech` 的 `TTS` 模块进行阅读。

## 使用

运行以下命令行开始：

```
./run.sh
```

结果已显示在 [notebook](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/tutorial/tts/tts_tutorial.ipynb)。


================================================
FILE: demos/story_talker/ocr.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import re
from pathlib import Path

import paddle
from paddleocr import draw_ocr
from paddleocr import PaddleOCR
from PIL import Image


def evaluate(args, ocr):
    img_dir = Path(args.img_dir)
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    img_out_dir = output_dir / "imgs"
    img_out_dir.mkdir(parents=True, exist_ok=True)
    with open(output_dir / "sentences.txt", "w") as wf:
        for name in os.listdir(img_dir):
            id = name.split(".")[0]
            img_path = img_dir / name
            result = ocr.ocr(str(img_path), cls=True)
            # draw result
            image = Image.open(img_path).convert('RGB')
            boxes = [line[0] for line in result]
            txts = [line[1][0] for line in result]
            scores = [line[1][1] for line in result]
            im_show = draw_ocr(
                image, boxes, txts, scores, font_path=args.font_path)
            im_show = Image.fromarray(im_show)
            paragraph = "".join(txts)
            # 过滤出中文结果
            pattern = re.compile(r'[^(\u4e00-\u9fa5)+，。？、]')
            sentence = re.sub(pattern, '', paragraph)
            im_show.save(img_out_dir / name)
            wf.write(id + " " + sentence + "\n")


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(
        description="Synthesize with fastspeech2 & parallel wavegan.")
    parser.add_argument("--img-dir", default="imgs", type=str, help="img_dir.")
    parser.add_argument(
        "--output-dir",
        type=str,
        default="output",
        help="output sentences path.")
    parser.add_argument(
        "--font-path", type=str, default="simfang.ttf", help="font path")
    args = parser.parse_args()

    paddle.set_device("gpu")
    # need to run only once to download and load model into memory
    ocr = PaddleOCR(use_angle_cls=True, lang='ch')

    evaluate(args, ocr)


if __name__ == "__main__":
    main()


================================================
FILE: demos/story_talker/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=fastspeech2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}


================================================
FILE: demos/story_talker/run.sh
================================================
#!/bin/bash
source path.sh

gpus=0
stage=0
stop_stage=100

# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

mkdir -p download

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # install PaddleOCR
    pip install "paddleocr>=2.0.1"
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # download pretrained tts models and unzip
    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
    unzip -d download download/pwg_baker_ckpt_0.4.zip
    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip
    unzip -d download download/fastspeech2_nosil_baker_ckpt_0.4.zip
    # download sources
    wget -P download https://paddlespeech.cdn.bcebos.com/demos/story_talker/simfang.ttf
    wget -P download/imgs https://paddlespeech.cdn.bcebos.com/demos/story_talker/000.jpg
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # run ocr
    CUDA_VISIBLE_DEVICES=${gpus} \
    python3 ocr.py --img-dir=download/imgs --output-dir=output --font-path=download/simfang.ttf
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # run tts
    CUDA_VISIBLE_DEVICES=${gpus} \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=download/fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
        --am_ckpt=download/fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
        --am_stat=download/fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy  \
        --voc=pwgan_csmsc \
        --voc_config=download/pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=download/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=download/pwg_baker_ckpt_0.4/pwg_stats.npy \
        --lang=zh \
        --text=output/sentences.txt \
        --output_dir=output/wavs \
        --inference_dir=output/inference \
        --phones_dict=download/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt
    # output/inference is not needed here, which save the static models
    rm -rf output/inference
fi


================================================
FILE: demos/streaming_asr_server/.gitignore
================================================
exp


================================================
FILE: demos/streaming_asr_server/README.md
================================================
([简体中文](./README_cn.md)|English)

> conf/ws_ds2_application.yaml need onnxruntime>=1.11.0

# Streaming ASR Server

## Introduction
This demo is an implementation of starting the streaming speech service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python.

Streaming ASR server only support `websocket` protocol, and doesn't support `http` protocol.

For service interface definitions, please refer to:
- [PaddleSpeech Streaming Server WebSocket API](https://github.com/PaddlePaddle/PaddleSpeech/wiki/PaddleSpeech-Server-WebSocket-API)

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

It is recommended to use **paddlepaddle 2.4rc** or above.

You can choose one way from easy, medium and hard to install paddlespeech.

**If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to 

### 2. Prepare config File
The configuration file can be found in `conf/ws_application.yaml` or `conf/ws_conformer_wenetspeech_application.yaml`.

At present, the speech tasks integrated by the model include: DeepSpeech2 and conformer.


The input of  ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

Here are sample files for thisASR client demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
```

### 3. Server Usage
- Command Line (Recommended)
  **Note:** The default deployment of the server is on the 'CPU' device, which can be deployed on the 'GPU' by modifying the 'device' parameter in the service configuration file.
  ```bash
  # in PaddleSpeech/demos/streaming_asr_server start the service
   paddlespeech_server start --config_file ./conf/ws_conformer_wenetspeech_application.yaml
  # if you want to increase decoding speed, you can use the config file below, it will increase decoding speed and reduce accuracy  
   paddlespeech_server start --config_file ./conf/ws_conformer_wenetspeech_application_faster.yaml
  ```

  Usage:
  
  ```bash
  paddlespeech_server start --help
  ```
  Arguments:
  - `config_file`: yaml file of the app, default: `./conf/application.yaml`
  - `log_file`: log file. Default: `./log/paddlespeech.log`

  Output:
  ```text
  [2022-05-14 04:56:13,086] [    INFO] - create the online asr engine instance
  [2022-05-14 04:56:13,086] [    INFO] - paddlespeech_server set the device: cpu
  [2022-05-14 04:56:13,087] [    INFO] - Load the pretrained model, tag = conformer_online_wenetspeech-zh-16k
  [2022-05-14 04:56:13,087] [    INFO] - File /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz md5        checking...
  [2022-05-14 04:56:17,542] [    INFO] - Use pretrained model stored in: /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.  0.0a.model.tar
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/model.yaml
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/exp/               chunk_conformer/checkpoints/avg_10.pdparams
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/exp/               chunk_conformer/checkpoints/avg_10.pdparams
  [2022-05-14 04:56:17,852] [    INFO] - start to create the stream conformer asr engine
  [2022-05-14 04:56:17,863] [    INFO] - model name: conformer_online
  [2022-05-14 04:56:22,756] [    INFO] - create the transformer like model success
  [2022-05-14 04:56:22,758] [    INFO] - Initialize ASR server engine successfully.
  INFO:     Started server process [4242]
  [2022-05-14 04:56:22] [INFO] [server.py:75] Started server process [4242]
  INFO:     Waiting for application startup.
  [2022-05-14 04:56:22] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-05-14 04:56:22] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  [2022-05-14 04:56:22] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  ```

- Python API
  **Note:** The default deployment of the server is on the 'CPU' device, which can be deployed on the 'GPU' by modifying the 'device' parameter in the service configuration file.
  ```python
  # in PaddleSpeech/demos/streaming_asr_server directory
  from paddlespeech.server.bin.paddlespeech_server import ServerExecutor

  server_executor = ServerExecutor()
  server_executor(
      config_file="./conf/ws_conformer_wenetspeech_application_faster.yaml",
      log_file="./log/paddlespeech.log")
  ```

  Output:
  ```text
  [2022-05-14 04:56:13,086] [    INFO] - create the online asr engine instance
  [2022-05-14 04:56:13,086] [    INFO] - paddlespeech_server set the device: cpu
  [2022-05-14 04:56:13,087] [    INFO] - Load the pretrained model, tag = conformer_online_wenetspeech-zh-16k
  [2022-05-14 04:56:13,087] [    INFO] - File /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz md5        checking...
  [2022-05-14 04:56:17,542] [    INFO] - Use pretrained model stored in: /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.  0.0a.model.tar
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/model.yaml
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/exp/               chunk_conformer/checkpoints/avg_10.pdparams
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/exp/               chunk_conformer/checkpoints/avg_10.pdparams
  [2022-05-14 04:56:17,852] [    INFO] - start to create the stream conformer asr engine
  [2022-05-14 04:56:17,863] [    INFO] - model name: conformer_online
  [2022-05-14 04:56:22,756] [    INFO] - create the transformer like model success
  [2022-05-14 04:56:22,758] [    INFO] - Initialize ASR server engine successfully.
  INFO:     Started server process [4242]
  [2022-05-14 04:56:22] [INFO] [server.py:75] Started server process [4242]
  INFO:     Waiting for application startup.
  [2022-05-14 04:56:22] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-05-14 04:56:22] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  [2022-05-14 04:56:22] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  ```


### 4. ASR Client Usage

**Note:** The response time will be slightly longer when using the client for the first time
- Command Line (Recommended)

  If `127.0.0.1` is not accessible, you need to use the actual service IP address.

  ```bash
  paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
  ```

  Usage:
  
  ```bash
  paddlespeech_client asr_online --help
  ```

  Arguments:
  - `server_ip`: server ip. Default: 127.0.0.1
  - `port`: server port. Default: 8090
  - `input`(required): Audio file to be recognized.
  - `sample_rate`: Audio ampling rate, default: 16000.
  - `lang`: Language. Default: "zh_cn".
  - `audio_format`: Audio format. Default: "wav".
  - `punc.server_ip`: punctuation server ip. Default: None.
  - `punc.server_port`: punctuation server port. Default: None.

  Output:
  ```text
  [2022-05-06 21:10:35,598] [    INFO] - Start to do streaming asr client
  [2022-05-06 21:10:35,600] [    INFO] - asr websocket client start
  [2022-05-06 21:10:35,600] [    INFO] - endpoint: ws://127.0.0.1:8390/paddlespeech/asr/streaming
  [2022-05-06 21:10:35,600] [    INFO] - start to process the wavscp: ./zh.wav
  [2022-05-06 21:10:35,670] [    INFO] - client receive msg={"status": "ok", "signal": "server_ready"}
  [2022-05-06 21:10:35,699] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,713] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,726] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,738] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,750] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,762] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,774] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,786] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,387] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,398] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,407] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,416] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,425] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,434] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,442] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,930] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,938] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,946] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,954] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,962] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,970] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,977] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,985] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:37,484] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,492] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,500] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,508] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,517] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,525] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,532] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:38,050] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,058] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,066] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,073] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,081] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,089] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,097] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,105] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,630] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,639] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,647] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,655] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,663] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,671] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,679] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:39,216] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,224] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,232] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,240] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,248] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,256] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,264] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,272] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,885] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:39,896] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:39,905] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:39,915] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:39,924] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:39,934] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:44,827] [    INFO] - client final receive msg={'status': 'ok', 'signal': 'finished', 'result': '我认为跑步最重要的就是给我带来了身体健康', 'times': [{'w': '我', 'bg': 0.0, 'ed': 0.7000000000000001}, {'w': '认', 'bg': 0.7000000000000001, 'ed': 0.84}, {'w': '为', 'bg': 0.84, 'ed': 1.0}, {'w': '跑', 'bg': 1.0, 'ed': 1.18}, {'w': '步', 'bg': 1.18, 'ed': 1.36}, {'w': '最', 'bg': 1.36, 'ed': 1.5}, {'w': '重', 'bg': 1.5, 'ed': 1.6400000000000001}, {'w': '要', 'bg': 1.6400000000000001, 'ed': 1.78}, {'w': '的', 'bg': 1.78, 'ed': 1.9000000000000001}, {'w': '就', 'bg': 1.9000000000000001, 'ed': 2.06}, {'w': '是', 'bg': 2.06, 'ed': 2.62}, {'w': '给', 'bg': 2.62, 'ed': 3.16}, {'w': '我', 'bg': 3.16, 'ed': 3.3200000000000003}, {'w': '带', 'bg': 3.3200000000000003, 'ed': 3.48}, {'w': '来', 'bg': 3.48, 'ed': 3.62}, {'w': '了', 'bg': 3.62, 'ed': 3.7600000000000002}, {'w': '身', 'bg': 3.7600000000000002, 'ed': 3.9}, {'w': '体', 'bg': 3.9, 'ed': 4.0600000000000005}, {'w': '健', 'bg': 4.0600000000000005, 'ed': 4.26}, {'w': '康', 'bg': 4.26, 'ed': 4.96}]}
  [2022-05-06 21:10:44,827] [    INFO] - audio duration: 4.9968125, elapsed time: 9.225094079971313, RTF=1.846195765794957
  [2022-05-06 21:10:44,828] [    INFO] - asr websocket client finished : 我认为跑步最重要的就是给我带来了身体健康
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import ASROnlineClientExecutor

  asrclient_executor = ASROnlineClientExecutor()
  res = asrclient_executor(
      input="./zh.wav",
      server_ip="127.0.0.1",
      port=8090,
      sample_rate=16000,
      lang="zh_cn",
      audio_format="wav")
  print(res)
  ```

  Output:
  ```text
  [2022-05-06 21:14:03,137] [    INFO] - asr websocket client start
  [2022-05-06 21:14:03,137] [    INFO] - endpoint: ws://127.0.0.1:8390/paddlespeech/asr/streaming
  [2022-05-06 21:14:03,149] [    INFO] - client receive msg={"status": "ok", "signal": "server_ready"}
  [2022-05-06 21:14:03,167] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,181] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,194] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,207] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,219] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,230] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,241] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,252] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,768] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,776] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,784] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,792] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,800] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,807] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,815] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:04,301] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,309] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,317] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,325] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,333] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,341] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,349] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,356] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,855] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,864] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,871] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,879] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,887] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,894] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,902] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:05,418] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,426] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,434] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,442] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,449] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,457] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,465] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,473] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,996] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,006] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,013] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,021] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,029] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,037] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,045] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,581] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,589] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,597] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,605] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,613] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,621] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,628] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,636] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:07,188] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:07,196] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:07,203] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:07,211] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:07,219] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:07,226] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:12,158] [    INFO] - client final receive msg={'status': 'ok', 'signal': 'finished', 'result': '我认为跑步最重要的就是给我带来了身体健康', 'times': [{'w': '我', 'bg': 0.0, 'ed': 0.7000000000000001}, {'w': '认', 'bg': 0.7000000000000001, 'ed': 0.84}, {'w': '为', 'bg': 0.84, 'ed': 1.0}, {'w': '跑', 'bg': 1.0, 'ed': 1.18}, {'w': '步', 'bg': 1.18, 'ed': 1.36}, {'w': '最', 'bg': 1.36, 'ed': 1.5}, {'w': '重', 'bg': 1.5, 'ed': 1.6400000000000001}, {'w': '要', 'bg': 1.6400000000000001, 'ed': 1.78}, {'w': '的', 'bg': 1.78, 'ed': 1.9000000000000001}, {'w': '就', 'bg': 1.9000000000000001, 'ed': 2.06}, {'w': '是', 'bg': 2.06, 'ed': 2.62}, {'w': '给', 'bg': 2.62, 'ed': 3.16}, {'w': '我', 'bg': 3.16, 'ed': 3.3200000000000003}, {'w': '带', 'bg': 3.3200000000000003, 'ed': 3.48}, {'w': '来', 'bg': 3.48, 'ed': 3.62}, {'w': '了', 'bg': 3.62, 'ed': 3.7600000000000002}, {'w': '身', 'bg': 3.7600000000000002, 'ed': 3.9}, {'w': '体', 'bg': 3.9, 'ed': 4.0600000000000005}, {'w': '健', 'bg': 4.0600000000000005, 'ed': 4.26}, {'w': '康', 'bg': 4.26, 'ed': 4.96}]}
  [2022-05-06 21:14:12,159] [    INFO] - audio duration: 4.9968125, elapsed time: 9.019973039627075, RTF=1.8051453881103354
  [2022-05-06 21:14:12,160] [    INFO] - asr websocket client finished
  ```


## Punctuation service

### 1. Server usage
 
- Command Line
  **Note:** The default deployment of the server is on the 'CPU' device, which can be deployed on the 'GPU' by modifying the 'device' parameter in the service configuration file.
  ```bash
  In PaddleSpeech/demos/streaming_asr_server directory to launch punctuation service
  paddlespeech_server start --config_file conf/punc_application.yaml
  ```

   Usage:
  ```bash
  paddlespeech_server start --help
  ```
  
  Arguments:
  - `config_file`: configuration file.
  - `log_file`: log file.


  Output:
  ```text
  [2022-05-02 17:59:26,285] [    INFO] - Create the TextEngine Instance
  [2022-05-02 17:59:26,285] [    INFO] - Init the text engine
  [2022-05-02 17:59:26,285] [    INFO] - Text Engine set the device: gpu:0
  [2022-05-02 17:59:26,286] [    INFO] - File /home/users/xiongxinlei/.paddlespeech/models/ernie_linear_p3_wudao-punc-zh/ernie_linear_p3_wudao-punc-zh.tar.gz md5 checking...
  [2022-05-02 17:59:30,810] [    INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/ernie_linear_p3_wudao-punc-zh/ernie_linear_p3_wudao-punc-zh.tar
  W0502 17:59:31.486552  9595 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 6.1, Driver API Version: 10.2, Runtime API Version: 10.2
  W0502 17:59:31.491360  9595 device_context.cc:465] device: 0, cuDNN Version: 7.6.
  [2022-05-02 17:59:34,688] [    INFO] - Already cached /home/users/xiongxinlei/.paddlenlp/models/ernie-1.0/vocab.txt
  [2022-05-02 17:59:34,701] [    INFO] - Init the text engine successfully
  INFO:     Started server process [9595]
  [2022-05-02 17:59:34] [INFO] [server.py:75] Started server process [9595]
  INFO:     Waiting for application startup.
  [2022-05-02 17:59:34] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-05-02 17:59:34] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8190 (Press CTRL+C to quit)
  [2022-05-02 17:59:34] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8190 (Press CTRL+C to quit)
  ```

- Python API
  **Note:** The default deployment of the server is on the 'CPU' device, which can be deployed on the 'GPU' by modifying the 'device' parameter in the service configuration file.
  ```python
  # 在 PaddleSpeech/demos/streaming_asr_server 目录
  from paddlespeech.server.bin.paddlespeech_server import ServerExecutor

  server_executor = ServerExecutor()
  server_executor(
      config_file="./conf/punc_application.yaml", 
      log_file="./log/paddlespeech.log")
  ```

  Output:
  ```text
  [2022-05-02 18:09:02,542] [    INFO] - Create the TextEngine Instance
  [2022-05-02 18:09:02,543] [    INFO] - Init the text engine
  [2022-05-02 18:09:02,543] [    INFO] - Text Engine set the device: gpu:0
  [2022-05-02 18:09:02,545] [    INFO] - File /home/users/xiongxinlei/.paddlespeech/models/ernie_linear_p3_wudao-punc-zh/ernie_linear_p3_wudao-punc-zh.tar.gz md5 checking...
  [2022-05-02 18:09:06,919] [    INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/ernie_linear_p3_wudao-punc-zh/ernie_linear_p3_wudao-punc-zh.tar
  W0502 18:09:07.523002 22615 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 6.1, Driver API Version: 10.2, Runtime API Version: 10.2
  W0502 18:09:07.527882 22615 device_context.cc:465] device: 0, cuDNN Version: 7.6.
  [2022-05-02 18:09:10,900] [    INFO] - Already cached /home/users/xiongxinlei/.paddlenlp/models/ernie-1.0/vocab.txt
  [2022-05-02 18:09:10,913] [    INFO] - Init the text engine successfully
  INFO:     Started server process [22615]
  [2022-05-02 18:09:10] [INFO] [server.py:75] Started server process [22615]
  INFO:     Waiting for application startup.
  [2022-05-02 18:09:10] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-05-02 18:09:10] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8190 (Press CTRL+C to quit)
  [2022-05-02 18:09:10] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8190 (Press CTRL+C to quit)
  ```

### 2. Client usage
**Note** The response time will be slightly longer when using the client for the first time

- Command line:

  If `127.0.0.1` is not accessible, you need to use the actual service IP address.

  ```bash
  paddlespeech_client text --server_ip 127.0.0.1 --port 8190 --input "我认为跑步最重要的就是给我带来了身体健康"
  ```
  
  Output
  ```text
  [2022-05-02 18:12:29,767] [    INFO] - The punc text: 我认为跑步最重要的就是给我带来了身体健康。
  [2022-05-02 18:12:29,767] [    INFO] - Response time 0.096548 s.
  ```

- Python API

  ```python
  from paddlespeech.server.bin.paddlespeech_client import TextClientExecutor

  textclient_executor = TextClientExecutor()
  res = textclient_executor(
      input="我认为跑步最重要的就是给我带来了身体健康",
      server_ip="127.0.0.1",
      port=8190,)
  print(res)
  ```

  Output:
  ```text
  我认为跑步最重要的就是给我带来了身体健康。
  ```

## Join streaming asr and punctuation server

By default, each server is deployed on the 'CPU' device and speech recognition and punctuation prediction can be deployed on different 'GPU' by modifying the' device 'parameter in the service configuration file respectively.

We use `streaming_ asr_server.py` and `punc_server.py` two services to launch streaming speech recognition and punctuation prediction services respectively. And the `websocket_client.py` script can be used to call streaming speech recognition and punctuation prediction services at the same time.

### 1. Start two server

```bash
Note: streaming speech recognition and punctuation prediction are configured on different graphics cards through configuration files
bash server.sh
```

### 2. Call client
- Command line

  If `127.0.0.1` is not accessible, you need to use the actual service IP address.

  ```bash
  paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --punc.server_ip 127.0.0.1 --punc.port 8190 --input ./zh.wav
  ```
  Output:
  ```text
  [2022-05-07 11:21:47,060] [    INFO] - asr websocket client start
  [2022-05-07 11:21:47,060] [    INFO] - endpoint: ws://127.0.0.1:8490/paddlespeech/asr/streaming
  [2022-05-07 11:21:47,080] [    INFO] - client receive msg={"status": "ok", "signal": "server_ready"}
  [2022-05-07 11:21:47,096] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,108] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,120] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,131] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,142] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,152] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,163] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,173] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,705] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,713] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,721] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,728] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,736] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,743] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,751] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:48,459] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:48,572] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:48,681] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:48,790] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:48,898] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:49,005] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:49,112] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:49,219] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:49,935] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,062] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,186] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,310] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,435] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,560] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,686] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:51,444] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:51,606] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:51,744] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:51,882] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:52,020] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:52,159] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:52,298] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:52,437] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:53,298] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:53,450] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:53,589] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:53,728] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:53,867] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:54,007] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:54,146] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:55,002] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,148] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,292] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,437] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,584] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,731] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,877] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:56,021] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:56,842] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:21:57,013] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:21:57,174] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:21:57,336] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:21:57,497] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:21:57,659] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:22:03,035] [    INFO] - client final receive msg={'status': 'ok', 'signal': 'finished', 'result': '我认为跑步最重要的就是给我带来了身体健康。', 'times': [{'w': '我', 'bg': 0.0, 'ed': 0.7000000000000001}, {'w': '认', 'bg': 0.7000000000000001, 'ed': 0.84}, {'w': '为', 'bg': 0.84, 'ed': 1.0}, {'w': '跑', 'bg': 1.0, 'ed': 1.18}, {'w': '步', 'bg': 1.18, 'ed': 1.36}, {'w': '最', 'bg': 1.36, 'ed': 1.5}, {'w': '重', 'bg': 1.5, 'ed': 1.6400000000000001}, {'w': '要', 'bg': 1.6400000000000001, 'ed': 1.78}, {'w': '的', 'bg': 1.78, 'ed': 1.9000000000000001}, {'w': '就', 'bg': 1.9000000000000001, 'ed': 2.06}, {'w': '是', 'bg': 2.06, 'ed': 2.62}, {'w': '给', 'bg': 2.62, 'ed': 3.16}, {'w': '我', 'bg': 3.16, 'ed': 3.3200000000000003}, {'w': '带', 'bg': 3.3200000000000003, 'ed': 3.48}, {'w': '来', 'bg': 3.48, 'ed': 3.62}, {'w': '了', 'bg': 3.62, 'ed': 3.7600000000000002}, {'w': '身', 'bg': 3.7600000000000002, 'ed': 3.9}, {'w': '体', 'bg': 3.9, 'ed': 4.0600000000000005}, {'w': '健', 'bg': 4.0600000000000005, 'ed': 4.26}, {'w': '康', 'bg': 4.26, 'ed': 4.96}]}
  [2022-05-07 11:22:03,035] [    INFO] - audio duration: 4.9968125, elapsed time: 15.974023818969727, RTF=3.1968427510477384
  [2022-05-07 11:22:03,037] [    INFO] - asr websocket client finished
  [2022-05-07 11:22:03,037] [    INFO] - 我认为跑步最重要的就是给我带来了身体健康。
  [2022-05-07 11:22:03,037] [    INFO] - Response time 15.977116 s.
  ```

- Use script

  If `127.0.0.1` is not accessible, you need to use the actual service IP address.

  ```bash
  python3 local/websocket_client.py --server_ip 127.0.0.1 --port 8090 --punc.server_ip 127.0.0.1 --punc.port 8190 --wavfile ./zh.wav
  ```
  Output:
  ```text
  [2022-05-07 11:11:02,984] [    INFO] - Start to do streaming asr client
  [2022-05-07 11:11:02,985] [    INFO] - asr websocket client start
  [2022-05-07 11:11:02,985] [    INFO] - endpoint: ws://127.0.0.1:8490/paddlespeech/asr/streaming
  [2022-05-07 11:11:02,986] [    INFO] - start to process the wavscp: ./zh.wav
  [2022-05-07 11:11:03,006] [    INFO] - client receive msg={"status": "ok", "signal": "server_ready"}
  [2022-05-07 11:11:03,021] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,034] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,046] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,058] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,070] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,081] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,092] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,102] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,629] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,638] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,645] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,653] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,661] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,668] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,676] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:04,402] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:04,510] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:04,619] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:04,743] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:04,849] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:04,956] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:05,063] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:05,170] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:05,876] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,019] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,184] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,342] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,537] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,727] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,871] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:07,617] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:07,769] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:07,905] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:08,043] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:08,186] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:08,326] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:08,466] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:08,611] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:09,431] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:09,571] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:09,714] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:09,853] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:09,992] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:10,129] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:10,266] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:11,113] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:11,296] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:11,439] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:11,582] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:11,727] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:11,869] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:12,011] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:12,153] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:12,969] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:13,137] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:13,297] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:13,456] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:13,615] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:13,776] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:18,915] [    INFO] - client final receive msg={'status': 'ok', 'signal': 'finished', 'result': '我认为跑步最重要的就是给我带来了身体健康。', 'times': [{'w': '我', 'bg': 0.0, 'ed': 0.7000000000000001}, {'w': '认', 'bg': 0.7000000000000001, 'ed': 0.84}, {'w': '为', 'bg': 0.84, 'ed': 1.0}, {'w': '跑', 'bg': 1.0, 'ed': 1.18}, {'w': '步', 'bg': 1.18, 'ed': 1.36}, {'w': '最', 'bg': 1.36, 'ed': 1.5}, {'w': '重', 'bg': 1.5, 'ed': 1.6400000000000001}, {'w': '要', 'bg': 1.6400000000000001, 'ed': 1.78}, {'w': '的', 'bg': 1.78, 'ed': 1.9000000000000001}, {'w': '就', 'bg': 1.9000000000000001, 'ed': 2.06}, {'w': '是', 'bg': 2.06, 'ed': 2.62}, {'w': '给', 'bg': 2.62, 'ed': 3.16}, {'w': '我', 'bg': 3.16, 'ed': 3.3200000000000003}, {'w': '带', 'bg': 3.3200000000000003, 'ed': 3.48}, {'w': '来', 'bg': 3.48, 'ed': 3.62}, {'w': '了', 'bg': 3.62, 'ed': 3.7600000000000002}, {'w': '身', 'bg': 3.7600000000000002, 'ed': 3.9}, {'w': '体', 'bg': 3.9, 'ed': 4.0600000000000005}, {'w': '健', 'bg': 4.0600000000000005, 'ed': 4.26}, {'w': '康', 'bg': 4.26, 'ed': 4.96}]}
  [2022-05-07 11:11:18,915] [    INFO] - audio duration: 4.9968125, elapsed time: 15.928460597991943, RTF=3.187724293835709
  [2022-05-07 11:11:18,916] [    INFO] - asr websocket client finished : 我认为跑步最重要的就是给我带来了身体健康
  ```

## Generate corresponding subtitle (.srt format) from audio file (.wav format or.mp3 format)

By default, each server is deployed on the 'CPU' device and speech recognition and punctuation prediction can be deployed on different 'GPU' by modifying the' device 'parameter in the service configuration file respectively.

We use `streaming_ asr_server.py` and `punc_server.py` two services to launch streaming speech recognition and punctuation prediction services respectively. And the `websocket_client_srt.py` script can be used to call streaming speech recognition and punctuation prediction services at the same time, and will generate the corresponding subtitle (.srt format).

**need to install ffmpeg before running this script**

**You should at the directory of `.../demos/streaming_asr_server/`**

### 1. Start two server

```bash
Note: streaming speech recognition and punctuation prediction are configured on different graphics cards through configuration files
paddlespeech_server start --config_file ./conf/ws_conformer_wenetspeech_application.yaml
```

Open another terminal run the following commands:
```bash
paddlespeech_server start --config_file conf/punc_application.yaml
```

### 2. Call client

  ```bash
  python3 local/websocket_client_srt.py --server_ip 127.0.0.1 --port 8090 --punc.server_ip 127.0.0.1 --punc.port 8190 --wavfile ../../data/认知.mp3
  ```
  Output:
  ```text
  [2023-03-30 23:26:13,991] [    INFO] - Start to do streaming asr client
[2023-03-30 23:26:13,994] [    INFO] - asr websocket client start
[2023-03-30 23:26:13,994] [    INFO] - endpoint: http://127.0.0.1:8190/paddlespeech/text
[2023-03-30 23:26:13,994] [    INFO] - endpoint: ws://127.0.0.1:8090/paddlespeech/asr/streaming
[2023-03-30 23:26:14,475] [    INFO] - /home/fxb/PaddleSpeech-develop/data/认知.mp3 converted to /home/fxb/PaddleSpeech-develop/data/认知.wav
[2023-03-30 23:26:14,476] [    INFO] - start to process the wavscp: /home/fxb/PaddleSpeech-develop/data/认知.wav
[2023-03-30 23:26:14,515] [    INFO] - client receive msg={"status": "ok", "signal": "server_ready"}
[2023-03-30 23:26:14,533] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,545] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,556] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,572] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,588] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,600] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,613] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,626] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:15,122] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,135] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,154] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,163] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,175] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,185] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,196] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,637] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,648] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,657] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,666] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,676] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,683] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,691] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,703] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:16,146] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,159] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,167] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,177] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,187] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,197] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,210] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,694] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,704] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,713] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,725] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,737] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,749] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,759] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,770] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:17,279] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,302] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,316] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,332] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,343] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,358] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,373] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,958] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:17,971] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:17,987] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,000] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,017] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,028] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,038] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,049] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,653] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,689] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,701] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,712] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,723] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,750] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,767] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:19,295] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,307] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,323] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,332] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,342] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,349] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,373] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,389] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:20,046] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,055] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,067] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,076] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,094] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,124] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,135] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,732] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,742] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,757] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,770] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,782] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,798] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,815] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,834] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:21,390] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,405] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,416] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,428] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,448] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,459] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,473] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:22,065] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,085] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,110] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,118] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,137] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,144] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,154] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,169] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,698] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,709] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,731] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,743] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,755] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,771] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,782] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:23,415] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,430] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,442] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,456] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,470] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,487] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,498] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,524] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:24,200] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,210] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,219] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,231] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,250] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,262] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,272] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,898] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,903] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,907] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,932] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,957] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,979] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,991] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:25,011] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:25,616] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,625] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,648] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,658] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,669] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,681] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,690] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,707] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,378] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,384] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,389] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,397] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,402] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,415] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,428] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:27,008] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,018] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,026] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,037] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,046] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,054] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,062] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,070] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,735] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,745] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,755] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,769] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,783] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,794] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,804] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:28,454] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,472] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,481] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,489] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,499] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,533] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,543] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,556] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:29,212] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,222] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,233] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,246] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,258] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,270] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,286] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:30,003] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,013] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,038] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,048] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,062] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,074] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,114] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,125] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,856] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,876] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,885] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,897] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,914] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,940] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,952] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:31,655] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,696] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,709] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,718] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,727] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,740] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,757] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,768] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:32,476] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,486] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,495] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,549] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,560] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,574] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,590] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:33,338] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,356] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,368] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,386] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,397] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,409] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,424] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,434] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:34,352] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,364] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,377] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,395] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,410] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,423] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,434] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:35,373] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,397] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,410] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,420] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,437] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,448] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,460] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,473] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:36,288] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,297] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,306] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,326] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,336] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,351] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,365] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:37,164] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,173] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,182] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,192] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,204] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,232] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,238] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,252] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:38,084] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,093] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,106] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,122] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,140] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,181] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,206] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:39,094] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,111] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,132] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,150] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,174] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,190] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,197] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,212] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:40,009] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,094] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,105] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,128] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,149] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,173] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,189] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,200] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,952] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:40,973] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:40,986] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:40,999] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:41,013] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:41,022] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:41,033] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:41,819] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,832] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,845] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,878] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,886] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,893] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,925] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,935] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:42,562] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,589] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,621] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,634] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,644] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,657] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,668] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:43,380] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,389] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,436] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,448] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,462] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,472] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,486] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,496] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:44,346] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,356] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,364] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,374] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,389] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,398] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,420] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:45,226] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:45,235] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:45,258] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:45,273] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:45,295] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:45,306] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:46,380] [    INFO] - client punctuation restored msg={'result': '第一部分是认知部分，该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理，让学生对设备有大致的认知。随后使用真实传感器的内部构造图，辅以文字说明，进一步帮助学生对传感器有更深刻的印象，最后结合具体的实践应用，提升学生对实训的兴趣以及意义感。'}
[2023-03-30 23:27:01,059] [    INFO] - client final receive msg={'status': 'ok', 'signal': 'finished', 'result': '第一部分是认知部分，该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理，让学生对设备有大致的认知。随后使用真实传感器的内部构造图，辅以文字说明，进一步帮助学生对传感器有更深刻的印象，最后结合具体的实践应用，提升学生对实训的兴趣以及意义感。', 'times': [{'w': '第', 'bg': 0.0, 'ed': 0.36}, {'w': '一', 'bg': 0.36, 'ed': 0.48}, {'w': '部', 'bg': 0.48, 'ed': 0.62}, {'w': '分', 'bg': 0.62, 'ed': 0.8200000000000001}, {'w': '是', 'bg': 0.8200000000000001, 'ed': 1.08}, {'w': '认', 'bg': 1.08, 'ed': 1.28}, {'w': '知', 'bg': 1.28, 'ed': 1.44}, {'w': '部', 'bg': 1.44, 'ed': 1.58}, {'w': '分', 'bg': 1.58, 'ed': 2.1}, {'w': '该', 'bg': 2.1, 'ed': 2.6}, {'w': '部', 'bg': 2.6, 'ed': 2.72}, {'w': '分', 'bg': 2.72, 'ed': 2.94}, {'w': '通', 'bg': 2.94, 'ed': 3.16}, {'w': '过', 'bg': 3.16, 'ed': 3.36}, {'w': '示', 'bg': 3.36, 'ed': 3.54}, {'w': '意', 'bg': 3.54, 'ed': 3.68}, {'w': '图', 'bg': 3.68, 'ed': 3.9}, {'w': '和', 'bg': 3.9, 'ed': 4.14}, {'w': '文', 'bg': 4.14, 'ed': 4.32}, {'w': '本', 'bg': 4.32, 'ed': 4.46}, {'w': '的', 'bg': 4.46, 'ed': 4.58}, {'w': '形', 'bg': 4.58, 'ed': 4.72}, {'w': '式', 'bg': 4.72, 'ed': 5.0}, {'w': '向', 'bg': 5.0, 'ed': 5.32}, {'w': '学', 'bg': 5.32, 'ed': 5.5}, {'w': '生', 'bg': 5.5, 'ed': 5.66}, {'w': '讲', 'bg': 5.66, 'ed': 5.86}, {'w': '解', 'bg': 5.86, 'ed': 6.18}, {'w': '主', 'bg': 6.18, 'ed': 6.46}, {'w': '要', 'bg': 6.46, 'ed': 6.62}, {'w': '传', 'bg': 6.62, 'ed': 6.8}, {'w': '感', 'bg': 6.8, 'ed': 7.0}, {'w': '器', 'bg': 7.0, 'ed': 7.16}, {'w': '的', 'bg': 7.16, 'ed': 7.28}, {'w': '工', 'bg': 7.28, 'ed': 7.44}, {'w': '作', 'bg': 7.44, 'ed': 7.6000000000000005}, {'w': '原', 'bg': 7.6000000000000005, 'ed': 7.74}, {'w': '理', 'bg': 7.74, 'ed': 8.06}, {'w': '让', 'bg': 8.06, 'ed': 8.44}, {'w': '学', 'bg': 8.44, 'ed': 8.64}, {'w': '生', 'bg': 8.64, 'ed': 8.84}, {'w': '对', 'bg': 8.84, 'ed': 9.06}, {'w': '设', 'bg': 9.06, 'ed': 9.24}, {'w': '备', 'bg': 9.24, 'ed': 9.52}, {'w': '有', 'bg': 9.52, 'ed': 9.86}, {'w': '大', 'bg': 9.86, 'ed': 10.1}, {'w': '致', 'bg': 10.1, 'ed': 10.24}, {'w': '的', 'bg': 10.24, 'ed': 10.36}, {'w': '认', 'bg': 10.36, 'ed': 10.5}, {'w': '知', 'bg': 10.5, 'ed': 11.040000000000001}, {'w': '随', 'bg': 11.040000000000001, 'ed': 11.56}, {'w': '后', 'bg': 11.56, 'ed': 11.82}, {'w': '使', 'bg': 11.82, 'ed': 12.1}, {'w': '用', 'bg': 12.1, 'ed': 12.26}, {'w': '真', 'bg': 12.26, 'ed': 12.44}, {'w': '实', 'bg': 12.44, 'ed': 12.620000000000001}, {'w': '传', 'bg': 12.620000000000001, 'ed': 12.780000000000001}, {'w': '感', 'bg': 12.780000000000001, 'ed': 12.94}, {'w': '器', 'bg': 12.94, 'ed': 13.1}, {'w': '的', 'bg': 13.1, 'ed': 13.26}, {'w': '内', 'bg': 13.26, 'ed': 13.42}, {'w': '部', 'bg': 13.42, 'ed': 13.56}, {'w': '构', 'bg': 13.56, 'ed': 13.700000000000001}, {'w': '造', 'bg': 13.700000000000001, 'ed': 13.86}, {'w': '图', 'bg': 13.86, 'ed': 14.280000000000001}, {'w': '辅', 'bg': 14.280000000000001, 'ed': 14.66}, {'w': '以', 'bg': 14.66, 'ed': 14.82}, {'w': '文', 'bg': 14.82, 'ed': 15.0}, {'w': '字', 'bg': 15.0, 'ed': 15.16}, {'w': '说', 'bg': 15.16, 'ed': 15.32}, {'w': '明', 'bg': 15.32, 'ed': 15.72}, {'w': '进', 'bg': 15.72, 'ed': 16.1}, {'w': '一', 'bg': 16.1, 'ed': 16.2}, {'w': '步', 'bg': 16.2, 'ed': 16.32}, {'w': '帮', 'bg': 16.32, 'ed': 16.48}, {'w': '助', 'bg': 16.48, 'ed': 16.66}, {'w': '学', 'bg': 16.66, 'ed': 16.82}, {'w': '生', 'bg': 16.82, 'ed': 17.12}, {'w': '对', 'bg': 17.12, 'ed': 17.48}, {'w': '传', 'bg': 17.48, 'ed': 17.66}, {'w': '感', 'bg': 17.66, 'ed': 17.84}, {'w': '器', 'bg': 17.84, 'ed': 18.12}, {'w': '有', 'bg': 18.12, 'ed': 18.42}, {'w': '更', 'bg': 18.42, 'ed': 18.66}, {'w': '深', 'bg': 18.66, 'ed': 18.88}, {'w': '刻', 'bg': 18.88, 'ed': 19.04}, {'w': '的', 'bg': 19.04, 'ed': 19.16}, {'w': '印', 'bg': 19.16, 'ed': 19.3}, {'w': '象', 'bg': 19.3, 'ed': 19.8}, {'w': '最', 'bg': 19.8, 'ed': 20.3}, {'w': '后', 'bg': 20.3, 'ed': 20.62}, {'w': '结', 'bg': 20.62, 'ed': 20.96}, {'w': '合', 'bg': 20.96, 'ed': 21.14}, {'w': '具', 'bg': 21.14, 'ed': 21.3}, {'w': '体', 'bg': 21.3, 'ed': 21.42}, {'w': '的', 'bg': 21.42, 'ed': 21.580000000000002}, {'w': '实', 'bg': 21.580000000000002, 'ed': 21.76}, {'w': '践', 'bg': 21.76, 'ed': 21.92}, {'w': '应', 'bg': 21.92, 'ed': 22.080000000000002}, {'w': '用', 'bg': 22.080000000000002, 'ed': 22.44}, {'w': '提', 'bg': 22.44, 'ed': 22.78}, {'w': '升', 'bg': 22.78, 'ed': 22.94}, {'w': '学', 'bg': 22.94, 'ed': 23.12}, {'w': '生', 'bg': 23.12, 'ed': 23.34}, {'w': '对', 'bg': 23.34, 'ed': 23.62}, {'w': '实', 'bg': 23.62, 'ed': 23.82}, {'w': '训', 'bg': 23.82, 'ed': 23.96}, {'w': '的', 'bg': 23.96, 'ed': 24.12}, {'w': '兴', 'bg': 24.12, 'ed': 24.3}, {'w': '趣', 'bg': 24.3, 'ed': 24.6}, {'w': '以', 'bg': 24.6, 'ed': 24.88}, {'w': '及', 'bg': 24.88, 'ed': 25.12}, {'w': '意', 'bg': 25.12, 'ed': 25.34}, {'w': '义', 'bg': 25.34, 'ed': 25.46}, {'w': '感', 'bg': 25.46, 'ed': 26.04}]}
[2023-03-30 23:27:01,060] [    INFO] - audio duration: 26.04, elapsed time: 46.581613540649414, RTF=1.7888484462614982
sentences:  ['第一部分是认知部分', '该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理', '让学生对设备有大致的认知', '随后使用真实传感器的内部构造图', '辅以文字说明', '进一步帮助学生对传感器有更深刻的印象', '最后结合具体的实践应用', '提升学生对实训的兴趣以及意义感']
relative_times:  [[0.0, 2.1], [2.1, 8.06], [8.06, 11.040000000000001], [11.040000000000001, 14.280000000000001], [14.280000000000001, 15.72], [15.72, 19.8], [19.8, 22.44], [22.44, 26.04]]
[2023-03-30 23:27:01,076] [    INFO] - results saved to /home/fxb/PaddleSpeech-develop/data/认知.srt
  ```


================================================
FILE: demos/streaming_asr_server/README_cn.md
================================================
([English](./README.md)|中文)

# 流式语音识别服务

## 介绍
这个 demo 是一个启动流式语音服务和访问服务的实现。 它可以通过使用 `paddlespeech_server` 和 `paddlespeech_client` 的单个命令或 python 的几行代码来实现。

**流式语音识别服务只支持 `websocket` 协议，不支持 `http` 协议。**

服务接口定义请参考:
- [PaddleSpeech Streaming Server WebSocket API](https://github.com/PaddlePaddle/PaddleSpeech/wiki/PaddleSpeech-Server-WebSocket-API)

## 使用方法
### 1. 安装
安装 PaddleSpeech 的详细过程请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md)。

推荐使用 **paddlepaddle 2.4rc** 或以上版本。

你可以从简单，中等，困难 几种方式中选择一种方式安装 PaddleSpeech。

**如果使用简单模式安装，需要自行准备 yaml 文件，可参考 conf 目录下的 yaml 文件。**

### 2. 准备配置文件

流式ASR的服务启动脚本和服务测试脚本存放在 `PaddleSpeech/demos/streaming_asr_server` 目录。
下载好 `PaddleSpeech` 之后，进入到 `PaddleSpeech/demos/streaming_asr_server` 目录。
配置文件可参见该目录下 `conf/ws_application.yaml` 和 `conf/ws_conformer_wenetspeech_application.yaml` 。

目前服务集成的模型有： DeepSpeech2 和 conformer模型，对应的配置文件如下：
* DeepSpeech: `conf/ws_application.yaml`
* conformer: `conf/ws_conformer_wenetspeech_application.yaml`


这个 ASR client 的输入应该是一个 WAV 文件（`.wav`），并且采样率必须与模型的采样率相同。

可以下载此 ASR client的示例音频：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
```

### 3. 服务端使用方法
- 命令行 (推荐使用)
  **注意:** 默认部署在 `cpu` 设备上，可以通过修改服务配置文件中 `device` 参数部署在 `gpu` 上。
  ```bash
  # 在 PaddleSpeech/demos/streaming_asr_server 目录启动服务
  paddlespeech_server start --config_file ./conf/ws_conformer_wenetspeech_application.yaml
  # 你如果愿意为了增加解码的速度而牺牲一定的模型精度，你可以使用如下的脚本 
   paddlespeech_server start --config_file ./conf/ws_conformer_wenetspeech_application_faster.yaml
  ```

  使用方法：
  
  ```bash
  paddlespeech_server start --help
  ```
  参数:
  - `config_file`: 服务的配置文件，默认： `./conf/application.yaml`
  - `log_file`: log 文件. 默认：`./log/paddlespeech.log`

  输出:
  ```text
  [2022-05-14 04:56:13,086] [    INFO] - create the online asr engine instance
  [2022-05-14 04:56:13,086] [    INFO] - paddlespeech_server set the device: cpu
  [2022-05-14 04:56:13,087] [    INFO] - Load the pretrained model, tag = conformer_online_wenetspeech-zh-16k
  [2022-05-14 04:56:13,087] [    INFO] - File /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz md5        checking...
  [2022-05-14 04:56:17,542] [    INFO] - Use pretrained model stored in: /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.  0.0a.model.tar
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/model.yaml
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/exp/               chunk_conformer/checkpoints/avg_10.pdparams
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/exp/               chunk_conformer/checkpoints/avg_10.pdparams
  [2022-05-14 04:56:17,852] [    INFO] - start to create the stream conformer asr engine
  [2022-05-14 04:56:17,863] [    INFO] - model name: conformer_online
  [2022-05-14 04:56:22,756] [    INFO] - create the transformer like model success
  [2022-05-14 04:56:22,758] [    INFO] - Initialize ASR server engine successfully.
  INFO:     Started server process [4242]
  [2022-05-14 04:56:22] [INFO] [server.py:75] Started server process [4242]
  INFO:     Waiting for application startup.
  [2022-05-14 04:56:22] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-05-14 04:56:22] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  [2022-05-14 04:56:22] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  ```

- Python API
  **注意:** 默认部署在 `cpu` 设备上，可以通过修改服务配置文件中 `device` 参数部署在 `gpu` 上。
  ```python
  # 在 PaddleSpeech/demos/streaming_asr_server 目录
  from paddlespeech.server.bin.paddlespeech_server import ServerExecutor

  server_executor = ServerExecutor()
  server_executor(
      config_file="./conf/ws_conformer_wenetspeech_application_faster.yaml", 
      log_file="./log/paddlespeech.log")
  ```

  输出:
  ```text
  [2022-05-14 04:56:13,086] [    INFO] - create the online asr engine instance
  [2022-05-14 04:56:13,086] [    INFO] - paddlespeech_server set the device: cpu
  [2022-05-14 04:56:13,087] [    INFO] - Load the pretrained model, tag = conformer_online_wenetspeech-zh-16k
  [2022-05-14 04:56:13,087] [    INFO] - File /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz md5        checking...
  [2022-05-14 04:56:17,542] [    INFO] - Use pretrained model stored in: /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.  0.0a.model.tar
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/model.yaml
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/exp/               chunk_conformer/checkpoints/avg_10.pdparams
  [2022-05-14 04:56:17,543] [    INFO] - /root/.paddlespeech/models/conformer_online_wenetspeech-zh-16k/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar/exp/               chunk_conformer/checkpoints/avg_10.pdparams
  [2022-05-14 04:56:17,852] [    INFO] - start to create the stream conformer asr engine
  [2022-05-14 04:56:17,863] [    INFO] - model name: conformer_online
  [2022-05-14 04:56:22,756] [    INFO] - create the transformer like model success
  [2022-05-14 04:56:22,758] [    INFO] - Initialize ASR server engine successfully.
  INFO:     Started server process [4242]
  [2022-05-14 04:56:22] [INFO] [server.py:75] Started server process [4242]
  INFO:     Waiting for application startup.
  [2022-05-14 04:56:22] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-05-14 04:56:22] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  [2022-05-14 04:56:22] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8090 (Press CTRL+C to quit)
  ```

### 4. ASR 客户端使用方法

**注意：** 初次使用客户端时响应时间会略长
- 命令行 (推荐使用)

  若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

  ```bash
  paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav
  ```

  使用帮助:

  ```bash
  paddlespeech_client asr_online --help
  ```

  参数:
  - `server_ip`: 服务端ip地址，默认: 127.0.0.1。
  - `port`: 服务端口，默认: 8090。
  - `input`(必须输入): 用于识别的音频文件。
  - `sample_rate`: 音频采样率，默认值：16000。
  - `lang`: 模型语言，默认值：zh_cn。
  - `audio_format`: 音频格式，默认值：wav。
  - `punc.server_ip` 标点预测服务的ip。默认是None。
  - `punc.server_port` 标点预测服务的端口port。默认是None。

  输出:
  ```text
  [2022-05-06 21:10:35,598] [    INFO] - Start to do streaming asr client
  [2022-05-06 21:10:35,600] [    INFO] - asr websocket client start
  [2022-05-06 21:10:35,600] [    INFO] - endpoint: ws://127.0.0.1:8390/paddlespeech/asr/streaming
  [2022-05-06 21:10:35,600] [    INFO] - start to process the wavscp: ./zh.wav
  [2022-05-06 21:10:35,670] [    INFO] - client receive msg={"status": "ok", "signal": "server_ready"}
  [2022-05-06 21:10:35,699] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,713] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,726] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,738] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,750] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,762] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,774] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:35,786] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,387] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,398] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,407] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,416] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,425] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,434] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,442] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:10:36,930] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,938] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,946] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,954] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,962] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,970] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,977] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:36,985] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:10:37,484] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,492] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,500] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,508] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,517] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,525] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:37,532] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:10:38,050] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,058] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,066] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,073] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,081] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,089] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,097] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,105] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:10:38,630] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,639] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,647] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,655] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,663] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,671] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:38,679] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:10:39,216] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,224] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,232] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,240] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,248] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,256] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,264] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,272] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:10:39,885] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:39,896] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:39,905] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:39,915] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:39,924] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:39,934] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:10:44,827] [    INFO] - client final receive msg={'status': 'ok', 'signal': 'finished', 'result': '我认为跑步最重要的就是给我带来了身体健康', 'times': [{'w': '我', 'bg': 0.0, 'ed': 0.7000000000000001}, {'w': '认', 'bg': 0.7000000000000001, 'ed': 0.84}, {'w': '为', 'bg': 0.84, 'ed': 1.0}, {'w': '跑', 'bg': 1.0, 'ed': 1.18}, {'w': '步', 'bg': 1.18, 'ed': 1.36}, {'w': '最', 'bg': 1.36, 'ed': 1.5}, {'w': '重', 'bg': 1.5, 'ed': 1.6400000000000001}, {'w': '要', 'bg': 1.6400000000000001, 'ed': 1.78}, {'w': '的', 'bg': 1.78, 'ed': 1.9000000000000001}, {'w': '就', 'bg': 1.9000000000000001, 'ed': 2.06}, {'w': '是', 'bg': 2.06, 'ed': 2.62}, {'w': '给', 'bg': 2.62, 'ed': 3.16}, {'w': '我', 'bg': 3.16, 'ed': 3.3200000000000003}, {'w': '带', 'bg': 3.3200000000000003, 'ed': 3.48}, {'w': '来', 'bg': 3.48, 'ed': 3.62}, {'w': '了', 'bg': 3.62, 'ed': 3.7600000000000002}, {'w': '身', 'bg': 3.7600000000000002, 'ed': 3.9}, {'w': '体', 'bg': 3.9, 'ed': 4.0600000000000005}, {'w': '健', 'bg': 4.0600000000000005, 'ed': 4.26}, {'w': '康', 'bg': 4.26, 'ed': 4.96}]}
  [2022-05-06 21:10:44,827] [    INFO] - audio duration: 4.9968125, elapsed time: 9.225094079971313, RTF=1.846195765794957
  [2022-05-06 21:10:44,828] [    INFO] - asr websocket client finished : 我认为跑步最重要的就是给我带来了身体健康
    ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import ASROnlineClientExecutor

  asrclient_executor = ASROnlineClientExecutor()
  res = asrclient_executor(
      input="./zh.wav",
      server_ip="127.0.0.1",
      port=8090,
      sample_rate=16000,
      lang="zh_cn",
      audio_format="wav")
  print(res)
  ```

  输出:
  ```text
  [2022-05-06 21:14:03,137] [    INFO] - asr websocket client start
  [2022-05-06 21:14:03,137] [    INFO] - endpoint: ws://127.0.0.1:8390/paddlespeech/asr/streaming
  [2022-05-06 21:14:03,149] [    INFO] - client receive msg={"status": "ok", "signal": "server_ready"}
  [2022-05-06 21:14:03,167] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,181] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,194] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,207] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,219] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,230] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,241] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,252] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,768] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,776] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,784] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,792] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,800] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,807] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:03,815] [    INFO] - client receive msg={'result': ''}
  [2022-05-06 21:14:04,301] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,309] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,317] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,325] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,333] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,341] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,349] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,356] [    INFO] - client receive msg={'result': '我认为跑'}
  [2022-05-06 21:14:04,855] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,864] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,871] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,879] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,887] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,894] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:04,902] [    INFO] - client receive msg={'result': '我认为跑步最重要的'}
  [2022-05-06 21:14:05,418] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,426] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,434] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,442] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,449] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,457] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,465] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,473] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是'}
  [2022-05-06 21:14:05,996] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,006] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,013] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,021] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,029] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,037] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,045] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给'}
  [2022-05-06 21:14:06,581] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,589] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,597] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,605] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,613] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,621] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,628] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:06,636] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了'}
  [2022-05-06 21:14:07,188] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:07,196] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:07,203] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:07,211] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:07,219] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:07,226] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康'}
  [2022-05-06 21:14:12,158] [    INFO] - client final receive msg={'status': 'ok', 'signal': 'finished', 'result': '我认为跑步最重要的就是给我带来了身体健康', 'times': [{'w': '我', 'bg': 0.0, 'ed': 0.7000000000000001}, {'w': '认', 'bg': 0.7000000000000001, 'ed': 0.84}, {'w': '为', 'bg': 0.84, 'ed': 1.0}, {'w': '跑', 'bg': 1.0, 'ed': 1.18}, {'w': '步', 'bg': 1.18, 'ed': 1.36}, {'w': '最', 'bg': 1.36, 'ed': 1.5}, {'w': '重', 'bg': 1.5, 'ed': 1.6400000000000001}, {'w': '要', 'bg': 1.6400000000000001, 'ed': 1.78}, {'w': '的', 'bg': 1.78, 'ed': 1.9000000000000001}, {'w': '就', 'bg': 1.9000000000000001, 'ed': 2.06}, {'w': '是', 'bg': 2.06, 'ed': 2.62}, {'w': '给', 'bg': 2.62, 'ed': 3.16}, {'w': '我', 'bg': 3.16, 'ed': 3.3200000000000003}, {'w': '带', 'bg': 3.3200000000000003, 'ed': 3.48}, {'w': '来', 'bg': 3.48, 'ed': 3.62}, {'w': '了', 'bg': 3.62, 'ed': 3.7600000000000002}, {'w': '身', 'bg': 3.7600000000000002, 'ed': 3.9}, {'w': '体', 'bg': 3.9, 'ed': 4.0600000000000005}, {'w': '健', 'bg': 4.0600000000000005, 'ed': 4.26}, {'w': '康', 'bg': 4.26, 'ed': 4.96}]}
  [2022-05-06 21:14:12,159] [    INFO] - audio duration: 4.9968125, elapsed time: 9.019973039627075, RTF=1.8051453881103354
  [2022-05-06 21:14:12,160] [    INFO] - asr websocket client finished
  ```
## 标点预测

### 1. 服务端使用方法

- 命令行
  **注意:** 默认部署在 `cpu` 设备上，可以通过修改服务配置文件中 `device` 参数部署在 `gpu` 上。
  ```bash
  # 在 PaddleSpeech/demos/streaming_asr_server 目录下启动标点预测服务
  paddlespeech_server start --config_file conf/punc_application.yaml
  ```

  使用方法:
  ```bash
  paddlespeech_server start --help
  ```
  
  参数:
  - `config_file`: 服务的配置文件。
  - `log_file`: log 文件。


  输出:
  ```text
  [2022-05-02 17:59:26,285] [    INFO] - Create the TextEngine Instance
  [2022-05-02 17:59:26,285] [    INFO] - Init the text engine
  [2022-05-02 17:59:26,285] [    INFO] - Text Engine set the device: gpu:0
  [2022-05-02 17:59:26,286] [    INFO] - File /home/users/xiongxinlei/.paddlespeech/models/ernie_linear_p3_wudao-punc-zh/ernie_linear_p3_wudao-punc-zh.tar.gz md5 checking...
  [2022-05-02 17:59:30,810] [    INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/ernie_linear_p3_wudao-punc-zh/ernie_linear_p3_wudao-punc-zh.tar
  W0502 17:59:31.486552  9595 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 6.1, Driver API Version: 10.2, Runtime API Version: 10.2
  W0502 17:59:31.491360  9595 device_context.cc:465] device: 0, cuDNN Version: 7.6.
  [2022-05-02 17:59:34,688] [    INFO] - Already cached /home/users/xiongxinlei/.paddlenlp/models/ernie-1.0/vocab.txt
  [2022-05-02 17:59:34,701] [    INFO] - Init the text engine successfully
  INFO:     Started server process [9595]
  [2022-05-02 17:59:34] [INFO] [server.py:75] Started server process [9595]
  INFO:     Waiting for application startup.
  [2022-05-02 17:59:34] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-05-02 17:59:34] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8190 (Press CTRL+C to quit)
  [2022-05-02 17:59:34] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8190 (Press CTRL+C to quit)
  ```

- Python API
  **注意:** 默认部署在 `cpu` 设备上，可以通过修改服务配置文件中 `device` 参数部署在 `gpu` 上。
  ```python
  # 在 PaddleSpeech/demos/streaming_asr_server 目录
  from paddlespeech.server.bin.paddlespeech_server import ServerExecutor

  server_executor = ServerExecutor()
  server_executor(
      config_file="./conf/punc_application.yaml", 
      log_file="./log/paddlespeech.log")
  ```

  输出:
  ```text
  [2022-05-02 18:09:02,542] [    INFO] - Create the TextEngine Instance
  [2022-05-02 18:09:02,543] [    INFO] - Init the text engine
  [2022-05-02 18:09:02,543] [    INFO] - Text Engine set the device: gpu:0
  [2022-05-02 18:09:02,545] [    INFO] - File /home/users/xiongxinlei/.paddlespeech/models/ernie_linear_p3_wudao-punc-zh/ernie_linear_p3_wudao-punc-zh.tar.gz md5 checking...
  [2022-05-02 18:09:06,919] [    INFO] - Use pretrained model stored in: /home/users/xiongxinlei/.paddlespeech/models/ernie_linear_p3_wudao-punc-zh/ernie_linear_p3_wudao-punc-zh.tar
  W0502 18:09:07.523002 22615 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 6.1, Driver API Version: 10.2, Runtime API Version: 10.2
  W0502 18:09:07.527882 22615 device_context.cc:465] device: 0, cuDNN Version: 7.6.
  [2022-05-02 18:09:10,900] [    INFO] - Already cached /home/users/xiongxinlei/.paddlenlp/models/ernie-1.0/vocab.txt
  [2022-05-02 18:09:10,913] [    INFO] - Init the text engine successfully
  INFO:     Started server process [22615]
  [2022-05-02 18:09:10] [INFO] [server.py:75] Started server process [22615]
  INFO:     Waiting for application startup.
  [2022-05-02 18:09:10] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-05-02 18:09:10] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8190 (Press CTRL+C to quit)
  [2022-05-02 18:09:10] [INFO] [server.py:206] Uvicorn running on http://0.0.0.0:8190 (Press CTRL+C to quit)
  ```

### 2. 标点预测客户端使用方法
**注意：** 初次使用客户端时响应时间会略长

- 命令行 (推荐使用)

  若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

  ```bash
  paddlespeech_client text --server_ip 127.0.0.1 --port 8190 --input "我认为跑步最重要的就是给我带来了身体健康"
  ```
  
  输出:
  ```text
  [2022-05-02 18:12:29,767] [    INFO] - The punc text: 我认为跑步最重要的就是给我带来了身体健康。
  [2022-05-02 18:12:29,767] [    INFO] - Response time 0.096548 s.
  ```

- Python API

  ```python
  from paddlespeech.server.bin.paddlespeech_client import TextClientExecutor

  textclient_executor = TextClientExecutor()
  res = textclient_executor(
      input="我认为跑步最重要的就是给我带来了身体健康",
      server_ip="127.0.0.1",
      port=8190,)
  print(res)
  ```

  输出:
  ```text
  我认为跑步最重要的就是给我带来了身体健康。
  ```

## 联合流式语音识别和标点预测
**注意:** 默认部署在 `cpu` 设备上，可以通过修改服务配置文件中 `device` 参数将语音识别和标点预测部署在不同的 `gpu` 上。

使用 `streaming_asr_server.py` 和 `punc_server.py` 两个服务，分别启动流式语音识别和标点预测服务。调用 `websocket_client.py` 脚本可以同时调用流式语音识别和标点预测服务。

### 1. 启动服务

```bash
注意：流式语音识别和标点预测通过配置文件配置到不同的显卡上
bash server.sh
```

### 2. 调用服务
- 使用命令行：

  若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

  ```bash
  paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --punc.server_ip 127.0.0.1 --punc.port 8190 --input ./zh.wav
  ```
  输出:
  ```text
  [2022-05-07 11:21:47,060] [    INFO] - asr websocket client start
  [2022-05-07 11:21:47,060] [    INFO] - endpoint: ws://127.0.0.1:8490/paddlespeech/asr/streaming
  [2022-05-07 11:21:47,080] [    INFO] - client receive msg={"status": "ok", "signal": "server_ready"}
  [2022-05-07 11:21:47,096] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,108] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,120] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,131] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,142] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,152] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,163] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,173] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,705] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,713] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,721] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,728] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,736] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,743] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:47,751] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:21:48,459] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:48,572] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:48,681] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:48,790] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:48,898] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:49,005] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:49,112] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:49,219] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:21:49,935] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,062] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,186] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,310] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,435] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,560] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:50,686] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:21:51,444] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:51,606] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:51,744] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:51,882] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:52,020] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:52,159] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:52,298] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:52,437] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:21:53,298] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:53,450] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:53,589] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:53,728] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:53,867] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:54,007] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:54,146] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:21:55,002] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,148] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,292] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,437] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,584] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,731] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:55,877] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:56,021] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:21:56,842] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:21:57,013] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:21:57,174] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:21:57,336] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:21:57,497] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:21:57,659] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:22:03,035] [    INFO] - client final receive msg={'status': 'ok', 'signal': 'finished', 'result': '我认为跑步最重要的就是给我带来了身体健康。', 'times': [{'w': '我', 'bg': 0.0, 'ed': 0.7000000000000001}, {'w': '认', 'bg': 0.7000000000000001, 'ed': 0.84}, {'w': '为', 'bg': 0.84, 'ed': 1.0}, {'w': '跑', 'bg': 1.0, 'ed': 1.18}, {'w': '步', 'bg': 1.18, 'ed': 1.36}, {'w': '最', 'bg': 1.36, 'ed': 1.5}, {'w': '重', 'bg': 1.5, 'ed': 1.6400000000000001}, {'w': '要', 'bg': 1.6400000000000001, 'ed': 1.78}, {'w': '的', 'bg': 1.78, 'ed': 1.9000000000000001}, {'w': '就', 'bg': 1.9000000000000001, 'ed': 2.06}, {'w': '是', 'bg': 2.06, 'ed': 2.62}, {'w': '给', 'bg': 2.62, 'ed': 3.16}, {'w': '我', 'bg': 3.16, 'ed': 3.3200000000000003}, {'w': '带', 'bg': 3.3200000000000003, 'ed': 3.48}, {'w': '来', 'bg': 3.48, 'ed': 3.62}, {'w': '了', 'bg': 3.62, 'ed': 3.7600000000000002}, {'w': '身', 'bg': 3.7600000000000002, 'ed': 3.9}, {'w': '体', 'bg': 3.9, 'ed': 4.0600000000000005}, {'w': '健', 'bg': 4.0600000000000005, 'ed': 4.26}, {'w': '康', 'bg': 4.26, 'ed': 4.96}]}
  [2022-05-07 11:22:03,035] [    INFO] - audio duration: 4.9968125, elapsed time: 15.974023818969727, RTF=3.1968427510477384
  [2022-05-07 11:22:03,037] [    INFO] - asr websocket client finished
  [2022-05-07 11:22:03,037] [    INFO] - 我认为跑步最重要的就是给我带来了身体健康。
  [2022-05-07 11:22:03,037] [    INFO] - Response time 15.977116 s.
  ```

- 使用脚本调用
  
  若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

  ```bash
  python3 local/websocket_client.py --server_ip 127.0.0.1 --port 8090 --punc.server_ip 127.0.0.1 --punc.port 8190 --wavfile ./zh.wav
  ```
  输出:
  ```text
  [2022-05-07 11:11:02,984] [    INFO] - Start to do streaming asr client
  [2022-05-07 11:11:02,985] [    INFO] - asr websocket client start
  [2022-05-07 11:11:02,985] [    INFO] - endpoint: ws://127.0.0.1:8490/paddlespeech/asr/streaming
  [2022-05-07 11:11:02,986] [    INFO] - start to process the wavscp: ./zh.wav
  [2022-05-07 11:11:03,006] [    INFO] - client receive msg={"status": "ok", "signal": "server_ready"}
  [2022-05-07 11:11:03,021] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,034] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,046] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,058] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,070] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,081] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,092] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,102] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,629] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,638] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,645] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,653] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,661] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,668] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:03,676] [    INFO] - client receive msg={'result': ''}
  [2022-05-07 11:11:04,402] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:04,510] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:04,619] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:04,743] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:04,849] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:04,956] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:05,063] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:05,170] [    INFO] - client receive msg={'result': '我认为，跑'}
  [2022-05-07 11:11:05,876] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,019] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,184] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,342] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,537] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,727] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:06,871] [    INFO] - client receive msg={'result': '我认为，跑步最重要的。'}
  [2022-05-07 11:11:07,617] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:07,769] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:07,905] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:08,043] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:08,186] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:08,326] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:08,466] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:08,611] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是。'}
  [2022-05-07 11:11:09,431] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:09,571] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:09,714] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:09,853] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:09,992] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:10,129] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:10,266] [    INFO] - client receive msg={'result': '我认为，跑步最重要的就是给。'}
  [2022-05-07 11:11:11,113] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:11,296] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:11,439] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:11,582] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:11,727] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:11,869] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:12,011] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:12,153] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了。'}
  [2022-05-07 11:11:12,969] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:13,137] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:13,297] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:13,456] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:13,615] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:13,776] [    INFO] - client receive msg={'result': '我认为跑步最重要的就是给我带来了身体健康。'}
  [2022-05-07 11:11:18,915] [    INFO] - client final receive msg={'status': 'ok', 'signal': 'finished', 'result': '我认为跑步最重要的就是给我带来了身体健康。', 'times': [{'w': '我', 'bg': 0.0, 'ed': 0.7000000000000001}, {'w': '认', 'bg': 0.7000000000000001, 'ed': 0.84}, {'w': '为', 'bg': 0.84, 'ed': 1.0}, {'w': '跑', 'bg': 1.0, 'ed': 1.18}, {'w': '步', 'bg': 1.18, 'ed': 1.36}, {'w': '最', 'bg': 1.36, 'ed': 1.5}, {'w': '重', 'bg': 1.5, 'ed': 1.6400000000000001}, {'w': '要', 'bg': 1.6400000000000001, 'ed': 1.78}, {'w': '的', 'bg': 1.78, 'ed': 1.9000000000000001}, {'w': '就', 'bg': 1.9000000000000001, 'ed': 2.06}, {'w': '是', 'bg': 2.06, 'ed': 2.62}, {'w': '给', 'bg': 2.62, 'ed': 3.16}, {'w': '我', 'bg': 3.16, 'ed': 3.3200000000000003}, {'w': '带', 'bg': 3.3200000000000003, 'ed': 3.48}, {'w': '来', 'bg': 3.48, 'ed': 3.62}, {'w': '了', 'bg': 3.62, 'ed': 3.7600000000000002}, {'w': '身', 'bg': 3.7600000000000002, 'ed': 3.9}, {'w': '体', 'bg': 3.9, 'ed': 4.0600000000000005}, {'w': '健', 'bg': 4.0600000000000005, 'ed': 4.26}, {'w': '康', 'bg': 4.26, 'ed': 4.96}]}
  [2022-05-07 11:11:18,915] [    INFO] - audio duration: 4.9968125, elapsed time: 15.928460597991943, RTF=3.187724293835709
  [2022-05-07 11:11:18,916] [    INFO] - asr websocket client finished : 我认为跑步最重要的就是给我带来了身体健康
  ```

## 从音频文件(.wav 格式 或者.mp3 格式)生成字幕文件 (.srt 格式)

**注意:** 默认部署在 `cpu` 设备上，可以通过修改服务配置文件中 `device` 参数将语音识别和标点预测部署在不同的 `gpu` 上。

使用 `streaming_asr_server.py` 和 `punc_server.py` 两个服务，分别启动流式语音识别和标点预测服务。调用 `websocket_client.py` 脚本可以同时调用流式语音识别和标点预测服务，将会生成对应的字幕文件(.srt格式)。

**使用该脚本前需要安装mffpeg**

**应该在对应的`.../demos/streaming_asr_server/`目录下运行以下脚本**

### 1. 启动服务端

```bash
Note: streaming speech recognition and punctuation prediction are configured on different graphics cards through configuration files
paddlespeech_server start --config_file ./conf/ws_conformer_wenetspeech_application.yaml
```

Open another terminal run the following commands:
```bash
paddlespeech_server start --config_file conf/punc_application.yaml
```

### 2. 启动客户端

  ```bash
  python3 local/websocket_client_srt.py --server_ip 127.0.0.1 --port 8090 --punc.server_ip 127.0.0.1 --punc.port 8190 --wavfile ../../data/认知.mp3
  ```
  Output:
  ```text
  [2023-03-30 23:26:13,991] [    INFO] - Start to do streaming asr client
[2023-03-30 23:26:13,994] [    INFO] - asr websocket client start
[2023-03-30 23:26:13,994] [    INFO] - endpoint: http://127.0.0.1:8190/paddlespeech/text
[2023-03-30 23:26:13,994] [    INFO] - endpoint: ws://127.0.0.1:8090/paddlespeech/asr/streaming
[2023-03-30 23:26:14,475] [    INFO] - /home/fxb/PaddleSpeech-develop/data/认知.mp3 converted to /home/fxb/PaddleSpeech-develop/data/认知.wav
[2023-03-30 23:26:14,476] [    INFO] - start to process the wavscp: /home/fxb/PaddleSpeech-develop/data/认知.wav
[2023-03-30 23:26:14,515] [    INFO] - client receive msg={"status": "ok", "signal": "server_ready"}
[2023-03-30 23:26:14,533] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,545] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,556] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,572] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,588] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,600] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,613] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:14,626] [    INFO] - client receive msg={'result': ''}
[2023-03-30 23:26:15,122] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,135] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,154] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,163] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,175] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,185] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,196] [    INFO] - client receive msg={'result': '第一部'}
[2023-03-30 23:26:15,637] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,648] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,657] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,666] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,676] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,683] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,691] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:15,703] [    INFO] - client receive msg={'result': '第一部分是认'}
[2023-03-30 23:26:16,146] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,159] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,167] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,177] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,187] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,197] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,210] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,694] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,704] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,713] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,725] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,737] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,749] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,759] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:16,770] [    INFO] - client receive msg={'result': '第一部分是认知部分'}
[2023-03-30 23:26:17,279] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,302] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,316] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,332] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,343] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,358] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,373] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通'}
[2023-03-30 23:26:17,958] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:17,971] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:17,987] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,000] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,017] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,028] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,038] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,049] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图'}
[2023-03-30 23:26:18,653] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,689] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,701] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,712] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,723] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,750] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:18,767] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本'}
[2023-03-30 23:26:19,295] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,307] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,323] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,332] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,342] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,349] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,373] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:19,389] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式'}
[2023-03-30 23:26:20,046] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,055] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,067] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,076] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,094] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,124] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,135] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生'}
[2023-03-30 23:26:20,732] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,742] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,757] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,770] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,782] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,798] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,815] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:20,834] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解'}
[2023-03-30 23:26:21,390] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,405] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,416] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,428] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,448] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,459] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:21,473] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感'}
[2023-03-30 23:26:22,065] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,085] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,110] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,118] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,137] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,144] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,154] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,169] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作'}
[2023-03-30 23:26:22,698] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,709] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,731] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,743] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,755] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,771] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:22,782] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理'}
[2023-03-30 23:26:23,415] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,430] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,442] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,456] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,470] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,487] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,498] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:23,524] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生'}
[2023-03-30 23:26:24,200] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,210] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,219] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,231] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,250] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,262] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,272] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备'}
[2023-03-30 23:26:24,898] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,903] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,907] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,932] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,957] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,979] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:24,991] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:25,011] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致'}
[2023-03-30 23:26:25,616] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,625] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,648] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,658] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,669] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,681] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,690] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:25,707] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,378] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,384] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,389] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,397] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,402] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,415] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:26,428] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知'}
[2023-03-30 23:26:27,008] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,018] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,026] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,037] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,046] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,054] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,062] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,070] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使'}
[2023-03-30 23:26:27,735] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,745] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,755] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,769] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,783] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,794] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:27,804] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传'}
[2023-03-30 23:26:28,454] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,472] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,481] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,489] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,499] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,533] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,543] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:28,556] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内'}
[2023-03-30 23:26:29,212] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,222] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,233] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,246] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,258] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,270] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:29,286] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图'}
[2023-03-30 23:26:30,003] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,013] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,038] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,048] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,062] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,074] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,114] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,125] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅'}
[2023-03-30 23:26:30,856] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,876] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,885] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,897] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,914] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,940] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:30,952] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说'}
[2023-03-30 23:26:31,655] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,696] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,709] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,718] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,727] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,740] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,757] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:31,768] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明'}
[2023-03-30 23:26:32,476] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,486] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,495] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,549] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,560] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,574] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:32,590] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助'}
[2023-03-30 23:26:33,338] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,356] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,368] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,386] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,397] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,409] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,424] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:33,434] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生'}
[2023-03-30 23:26:34,352] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,364] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,377] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,395] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,410] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,423] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:34,434] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感'}
[2023-03-30 23:26:35,373] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,397] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,410] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,420] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,437] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,448] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,460] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:35,473] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有'}
[2023-03-30 23:26:36,288] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,297] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,306] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,326] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,336] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,351] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:36,365] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的'}
[2023-03-30 23:26:37,164] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,173] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,182] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,192] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,204] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,232] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,238] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:37,252] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象'}
[2023-03-30 23:26:38,084] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,093] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,106] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,122] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,140] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,181] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:38,206] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后'}
[2023-03-30 23:26:39,094] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,111] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,132] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,150] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,174] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,190] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,197] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:39,212] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合'}
[2023-03-30 23:26:40,009] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,094] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,105] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,128] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,149] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,173] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,189] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,200] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实'}
[2023-03-30 23:26:40,952] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:40,973] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:40,986] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:40,999] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:41,013] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:41,022] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:41,033] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用'}
[2023-03-30 23:26:41,819] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,832] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,845] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,878] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,886] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,893] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,925] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:41,935] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升'}
[2023-03-30 23:26:42,562] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,589] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,621] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,634] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,644] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,657] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:42,668] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对'}
[2023-03-30 23:26:43,380] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,389] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,436] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,448] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,462] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,472] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,486] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:43,496] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴'}
[2023-03-30 23:26:44,346] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,356] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,364] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,374] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,389] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,398] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:44,420] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以'}
[2023-03-30 23:26:45,226] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:45,235] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:45,258] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:45,273] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:45,295] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:45,306] [    INFO] - client receive msg={'result': '第一部分是认知部分该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理让学生对设备有大致的认知随后使用真实传感器的内部构造图辅以文字说明进一步帮助学生对传感器有更深刻的印象最后结合具体的实践应用提升学生对实训的兴趣以及意义感'}
[2023-03-30 23:26:46,380] [    INFO] - client punctuation restored msg={'result': '第一部分是认知部分，该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理，让学生对设备有大致的认知。随后使用真实传感器的内部构造图，辅以文字说明，进一步帮助学生对传感器有更深刻的印象，最后结合具体的实践应用，提升学生对实训的兴趣以及意义感。'}
[2023-03-30 23:27:01,059] [    INFO] - client final receive msg={'status': 'ok', 'signal': 'finished', 'result': '第一部分是认知部分，该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理，让学生对设备有大致的认知。随后使用真实传感器的内部构造图，辅以文字说明，进一步帮助学生对传感器有更深刻的印象，最后结合具体的实践应用，提升学生对实训的兴趣以及意义感。', 'times': [{'w': '第', 'bg': 0.0, 'ed': 0.36}, {'w': '一', 'bg': 0.36, 'ed': 0.48}, {'w': '部', 'bg': 0.48, 'ed': 0.62}, {'w': '分', 'bg': 0.62, 'ed': 0.8200000000000001}, {'w': '是', 'bg': 0.8200000000000001, 'ed': 1.08}, {'w': '认', 'bg': 1.08, 'ed': 1.28}, {'w': '知', 'bg': 1.28, 'ed': 1.44}, {'w': '部', 'bg': 1.44, 'ed': 1.58}, {'w': '分', 'bg': 1.58, 'ed': 2.1}, {'w': '该', 'bg': 2.1, 'ed': 2.6}, {'w': '部', 'bg': 2.6, 'ed': 2.72}, {'w': '分', 'bg': 2.72, 'ed': 2.94}, {'w': '通', 'bg': 2.94, 'ed': 3.16}, {'w': '过', 'bg': 3.16, 'ed': 3.36}, {'w': '示', 'bg': 3.36, 'ed': 3.54}, {'w': '意', 'bg': 3.54, 'ed': 3.68}, {'w': '图', 'bg': 3.68, 'ed': 3.9}, {'w': '和', 'bg': 3.9, 'ed': 4.14}, {'w': '文', 'bg': 4.14, 'ed': 4.32}, {'w': '本', 'bg': 4.32, 'ed': 4.46}, {'w': '的', 'bg': 4.46, 'ed': 4.58}, {'w': '形', 'bg': 4.58, 'ed': 4.72}, {'w': '式', 'bg': 4.72, 'ed': 5.0}, {'w': '向', 'bg': 5.0, 'ed': 5.32}, {'w': '学', 'bg': 5.32, 'ed': 5.5}, {'w': '生', 'bg': 5.5, 'ed': 5.66}, {'w': '讲', 'bg': 5.66, 'ed': 5.86}, {'w': '解', 'bg': 5.86, 'ed': 6.18}, {'w': '主', 'bg': 6.18, 'ed': 6.46}, {'w': '要', 'bg': 6.46, 'ed': 6.62}, {'w': '传', 'bg': 6.62, 'ed': 6.8}, {'w': '感', 'bg': 6.8, 'ed': 7.0}, {'w': '器', 'bg': 7.0, 'ed': 7.16}, {'w': '的', 'bg': 7.16, 'ed': 7.28}, {'w': '工', 'bg': 7.28, 'ed': 7.44}, {'w': '作', 'bg': 7.44, 'ed': 7.6000000000000005}, {'w': '原', 'bg': 7.6000000000000005, 'ed': 7.74}, {'w': '理', 'bg': 7.74, 'ed': 8.06}, {'w': '让', 'bg': 8.06, 'ed': 8.44}, {'w': '学', 'bg': 8.44, 'ed': 8.64}, {'w': '生', 'bg': 8.64, 'ed': 8.84}, {'w': '对', 'bg': 8.84, 'ed': 9.06}, {'w': '设', 'bg': 9.06, 'ed': 9.24}, {'w': '备', 'bg': 9.24, 'ed': 9.52}, {'w': '有', 'bg': 9.52, 'ed': 9.86}, {'w': '大', 'bg': 9.86, 'ed': 10.1}, {'w': '致', 'bg': 10.1, 'ed': 10.24}, {'w': '的', 'bg': 10.24, 'ed': 10.36}, {'w': '认', 'bg': 10.36, 'ed': 10.5}, {'w': '知', 'bg': 10.5, 'ed': 11.040000000000001}, {'w': '随', 'bg': 11.040000000000001, 'ed': 11.56}, {'w': '后', 'bg': 11.56, 'ed': 11.82}, {'w': '使', 'bg': 11.82, 'ed': 12.1}, {'w': '用', 'bg': 12.1, 'ed': 12.26}, {'w': '真', 'bg': 12.26, 'ed': 12.44}, {'w': '实', 'bg': 12.44, 'ed': 12.620000000000001}, {'w': '传', 'bg': 12.620000000000001, 'ed': 12.780000000000001}, {'w': '感', 'bg': 12.780000000000001, 'ed': 12.94}, {'w': '器', 'bg': 12.94, 'ed': 13.1}, {'w': '的', 'bg': 13.1, 'ed': 13.26}, {'w': '内', 'bg': 13.26, 'ed': 13.42}, {'w': '部', 'bg': 13.42, 'ed': 13.56}, {'w': '构', 'bg': 13.56, 'ed': 13.700000000000001}, {'w': '造', 'bg': 13.700000000000001, 'ed': 13.86}, {'w': '图', 'bg': 13.86, 'ed': 14.280000000000001}, {'w': '辅', 'bg': 14.280000000000001, 'ed': 14.66}, {'w': '以', 'bg': 14.66, 'ed': 14.82}, {'w': '文', 'bg': 14.82, 'ed': 15.0}, {'w': '字', 'bg': 15.0, 'ed': 15.16}, {'w': '说', 'bg': 15.16, 'ed': 15.32}, {'w': '明', 'bg': 15.32, 'ed': 15.72}, {'w': '进', 'bg': 15.72, 'ed': 16.1}, {'w': '一', 'bg': 16.1, 'ed': 16.2}, {'w': '步', 'bg': 16.2, 'ed': 16.32}, {'w': '帮', 'bg': 16.32, 'ed': 16.48}, {'w': '助', 'bg': 16.48, 'ed': 16.66}, {'w': '学', 'bg': 16.66, 'ed': 16.82}, {'w': '生', 'bg': 16.82, 'ed': 17.12}, {'w': '对', 'bg': 17.12, 'ed': 17.48}, {'w': '传', 'bg': 17.48, 'ed': 17.66}, {'w': '感', 'bg': 17.66, 'ed': 17.84}, {'w': '器', 'bg': 17.84, 'ed': 18.12}, {'w': '有', 'bg': 18.12, 'ed': 18.42}, {'w': '更', 'bg': 18.42, 'ed': 18.66}, {'w': '深', 'bg': 18.66, 'ed': 18.88}, {'w': '刻', 'bg': 18.88, 'ed': 19.04}, {'w': '的', 'bg': 19.04, 'ed': 19.16}, {'w': '印', 'bg': 19.16, 'ed': 19.3}, {'w': '象', 'bg': 19.3, 'ed': 19.8}, {'w': '最', 'bg': 19.8, 'ed': 20.3}, {'w': '后', 'bg': 20.3, 'ed': 20.62}, {'w': '结', 'bg': 20.62, 'ed': 20.96}, {'w': '合', 'bg': 20.96, 'ed': 21.14}, {'w': '具', 'bg': 21.14, 'ed': 21.3}, {'w': '体', 'bg': 21.3, 'ed': 21.42}, {'w': '的', 'bg': 21.42, 'ed': 21.580000000000002}, {'w': '实', 'bg': 21.580000000000002, 'ed': 21.76}, {'w': '践', 'bg': 21.76, 'ed': 21.92}, {'w': '应', 'bg': 21.92, 'ed': 22.080000000000002}, {'w': '用', 'bg': 22.080000000000002, 'ed': 22.44}, {'w': '提', 'bg': 22.44, 'ed': 22.78}, {'w': '升', 'bg': 22.78, 'ed': 22.94}, {'w': '学', 'bg': 22.94, 'ed': 23.12}, {'w': '生', 'bg': 23.12, 'ed': 23.34}, {'w': '对', 'bg': 23.34, 'ed': 23.62}, {'w': '实', 'bg': 23.62, 'ed': 23.82}, {'w': '训', 'bg': 23.82, 'ed': 23.96}, {'w': '的', 'bg': 23.96, 'ed': 24.12}, {'w': '兴', 'bg': 24.12, 'ed': 24.3}, {'w': '趣', 'bg': 24.3, 'ed': 24.6}, {'w': '以', 'bg': 24.6, 'ed': 24.88}, {'w': '及', 'bg': 24.88, 'ed': 25.12}, {'w': '意', 'bg': 25.12, 'ed': 25.34}, {'w': '义', 'bg': 25.34, 'ed': 25.46}, {'w': '感', 'bg': 25.46, 'ed': 26.04}]}
[2023-03-30 23:27:01,060] [    INFO] - audio duration: 26.04, elapsed time: 46.581613540649414, RTF=1.7888484462614982
sentences:  ['第一部分是认知部分', '该部分通过示意图和文本的形式向学生讲解主要传感器的工作原理', '让学生对设备有大致的认知', '随后使用真实传感器的内部构造图', '辅以文字说明', '进一步帮助学生对传感器有更深刻的印象', '最后结合具体的实践应用', '提升学生对实训的兴趣以及意义感']
relative_times:  [[0.0, 2.1], [2.1, 8.06], [8.06, 11.040000000000001], [11.040000000000001, 14.280000000000001], [14.280000000000001, 15.72], [15.72, 19.8], [19.8, 22.44], [22.44, 26.04]]
[2023-03-30 23:27:01,076] [    INFO] - results saved to /home/fxb/PaddleSpeech-develop/data/认知.srt
  ```


================================================
FILE: demos/streaming_asr_server/conf/application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
    model_type: 'conformer_u2pp_online_wenetspeech'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    num_decoding_left_chunks: -1
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    decode_method: "attention_rescoring"
    continuous_decoding: True # enable continue decoding when endpoint detected

    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2


================================================
FILE: demos/streaming_asr_server/conf/punc_application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8190

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_python']
# protocol = ['http'] (only one can be selected). 
# http only support offline engine type.
protocol: 'http'
engine_list: ['text_python']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### Text #########################################
################### text task: punc; engine_type: python #######################
text_python:
    task: punc
    model_type: 'ernie_linear_p3_wudao'
    lang: 'zh'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    vocab_file: # [optional]
    device: 'cpu' # set 'gpu:id' or 'cpu'


================================================
FILE: demos/streaming_asr_server/conf/ws_conformer_application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8091

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
    model_type: 'conformer_online_multicn'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    num_decoding_left_chunks: -1
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    decode_method: "attention_rescoring"
    continuous_decoding: True # enable continue decoding when endpoint detected

    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2


================================================
FILE: demos/streaming_asr_server/conf/ws_conformer_talcs_application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
    model_type: 'conformer_online_talcs'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    codeswitch: True
    lang: 'zh_en'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    num_decoding_left_chunks: -1
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    decode_method: "attention_rescoring"
    continuous_decoding: True # enable continue decoding when endpoint detected

    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2


================================================
FILE: demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
    model_type: 'conformer_online_wenetspeech'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    decode_method: "attention_rescoring"
    continuous_decoding: True # enable continue decoding when endpoint detected
    num_decoding_left_chunks: -1
    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2


================================================
FILE: demos/streaming_asr_server/conf/ws_conformer_wenetspeech_application_faster.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
    model_type: 'conformer_online_wenetspeech'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    decode_method: "attention_rescoring"
    continuous_decoding: True # enable continue decoding when endpoint detected
    num_decoding_left_chunks: 16
    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2


================================================
FILE: demos/streaming_asr_server/conf/ws_ds2_application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online-inference', 'asr_online-onnx']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online-onnx']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################


################################### ASR #########################################
################### speech task: asr; engine_type: online-onnx #######################
asr_online-onnx:
    model_type: 'deepspeech2online_wenetspeech'
    am_model:  # the pdmodel file of onnx am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    num_decoding_left_chunks: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id

    # https://onnxruntime.ai/docs/api/python/api_summary.html#inferencesession
    am_predictor_conf:
        device: 'cpu' # set 'gpu:id' or 'cpu'
        graph_optimization_level: 0 
        intra_op_num_threads: 0 # Sets the number of threads used to parallelize the execution within nodes.
        inter_op_num_threads: 0 # Sets the number of threads used to parallelize the execution of the graph (across nodes).
        log_severity_level: 2   # Log severity level. Applies to session load, initialization, etc. 0:Verbose, 1:Info, 2:Warning. 3:Error, 4:Fatal. Default is 2.
        log_verbosity_level: 0  # VLOG level if DEBUG build and session_log_severity_level is 0. Applies to session load, initialization, etc. Default is 0.

    chunk_buffer_conf:
        frame_duration_ms: 85
        shift_ms: 40
        sample_rate: 16000
        sample_width: 2
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms


################################### ASR #########################################
################### speech task: asr; engine_type: online-inference #######################
asr_online-inference:
    model_type: 'deepspeech2online_wenetspeech'
    am_model:    # the pdmodel file of am static model [optional]
    am_params:   # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    num_decoding_left_chunks: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id

    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        frame_duration_ms: 85
        shift_ms: 40
        sample_rate: 16000
        sample_width: 2
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms

================================================
FILE: demos/streaming_asr_server/local/punc_server.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

from paddlespeech.cli.log import logger
from paddlespeech.server.bin.paddlespeech_server import ServerExecutor
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        prog='paddlespeech_server.start', add_help=True)
    parser.add_argument(
        "--config_file",
        action="store",
        help="yaml file of the app",
        default=None,
        required=True)

    parser.add_argument(
        "--log_file",
        action="store",
        help="log file",
        default="./log/paddlespeech.log")
    logger.info("start to parse the args")
    args = parser.parse_args()

    logger.info("start to launch the punctuation server")
    punc_server = ServerExecutor()
    punc_server(config_file=args.config_file, log_file=args.log_file)


================================================
FILE: demos/streaming_asr_server/local/rtf_from_log.py
================================================
#!/usr/bin/env python3
import argparse

if __name__ == '__main__':
    parser = argparse.ArgumentParser(prog=__doc__)
    parser.add_argument(
        '--logfile', type=str, required=True, help='ws client log file')

    args = parser.parse_args()

    rtfs = []
    with open(args.logfile, 'r') as f:
        for line in f:
            if 'RTF=' in line:
                # udio duration: 6.126, elapsed time: 3.471978187561035, RTF=0.5667610492264177
                line = line.strip()
                beg = line.index("audio")
                line = line[beg:]

                items = line.split(',')
                vals = []
                for elem in items:
                    if "RTF=" in elem:
                        continue
                    _, val = elem.split(":")
                    vals.append(eval(val))
                keys = ['T', 'P']
                meta = dict(zip(keys, vals))

                rtfs.append(meta)

    T = 0.0
    P = 0.0
    n = 0
    for m in rtfs:
        # not accurate, may have duplicate log
        n += 1
        T += m['T']
        P += m['P']

    print(f"RTF: {P/T}, utts: {n}")


================================================
FILE: demos/streaming_asr_server/local/streaming_asr_server.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

from paddlespeech.cli.log import logger
from paddlespeech.server.bin.paddlespeech_server import ServerExecutor
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        prog='paddlespeech_server.start', add_help=True)
    parser.add_argument(
        "--config_file",
        action="store",
        help="yaml file of the app",
        default=None,
        required=True)

    parser.add_argument(
        "--log_file",
        action="store",
        help="log file",
        default="./log/paddlespeech.log")
    logger.info("start to parse the args")
    args = parser.parse_args()

    logger.info("start to launch the streaming asr server")
    streaming_asr_server = ServerExecutor()
    streaming_asr_server(config_file=args.config_file, log_file=args.log_file)


================================================
FILE: demos/streaming_asr_server/local/test.sh
================================================
#!/bin/bash 

if [ $# != 1 ];then
    echo "usage: $0 wav_scp"
    exit -1
fi

scp=$1

# calc RTF
# wav_scp can generate from `speechx/examples/ds2_ol/aishell`

exp=exp
mkdir -p $exp

python3 local/websocket_client.py --server_ip 127.0.0.1 --port 8090 --wavscp $scp &> $exp/log.rsl

python3 local/rtf_from_log.py --logfile $exp/log.rsl


================================================
FILE: demos/streaming_asr_server/local/websocket_client.py
================================================
#!/usr/bin/python
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# calc avg RTF(NOT Accurate): grep -rn RTF log.txt | awk '{print $NF}' | awk -F "=" '{sum += $NF} END {print "all time",sum, "audio num", NR,  "RTF", sum/NR}'
# python3 websocket_client.py --server_ip 127.0.0.1 --port 8290 --punc.server_ip 127.0.0.1 --punc.port 8190 --wavfile ./zh.wav
# python3 websocket_client.py --server_ip 127.0.0.1 --port 8290 --wavfile ./zh.wav
import argparse
import asyncio
import codecs
import os

from paddlespeech.cli.log import logger
from paddlespeech.server.utils.audio_handler import ASRWsAudioHandler


def main(args):
    logger.info("asr websocket client start")
    handler = ASRWsAudioHandler(
        args.server_ip,
        args.port,
        endpoint=args.endpoint,
        punc_server_ip=args.punc_server_ip,
        punc_server_port=args.punc_server_port)
    loop = asyncio.get_event_loop()

    # support to process single audio file
    if args.wavfile and os.path.exists(args.wavfile):
        logger.info(f"start to process the wavscp: {args.wavfile}")
        result = loop.run_until_complete(handler.run(args.wavfile))
        result = result["result"]
        logger.info(f"asr websocket client finished : {result}")

    # support to process batch audios from wav.scp
    if args.wavscp and os.path.exists(args.wavscp):
        logger.info(f"start to process the wavscp: {args.wavscp}")
        with codecs.open(args.wavscp, 'r', encoding='utf-8') as f,\
             codecs.open("result.txt", 'w', encoding='utf-8') as w:
            for line in f:
                utt_name, utt_path = line.strip().split()
                result = loop.run_until_complete(handler.run(utt_path))
                result = result["result"]
                w.write(f"{utt_name} {result}\n")


if __name__ == "__main__":
    logger.info("Start to do streaming asr client")
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--server_ip', type=str, default='127.0.0.1', help='server ip')
    parser.add_argument('--port', type=int, default=8090, help='server port')
    parser.add_argument(
        '--punc.server_ip',
        type=str,
        default=None,
        dest="punc_server_ip",
        help='Punctuation server ip')
    parser.add_argument(
        '--punc.port',
        type=int,
        default=8091,
        dest="punc_server_port",
        help='Punctuation server port')
    parser.add_argument(
        "--endpoint",
        type=str,
        default="/paddlespeech/asr/streaming",
        help="ASR websocket endpoint")
    parser.add_argument(
        "--wavfile",
        action="store",
        help="wav file path ",
        default="./16_audio.wav")
    parser.add_argument(
        "--wavscp", type=str, default=None, help="The batch audios dict text")
    args = parser.parse_args()

    main(args)


================================================
FILE: demos/streaming_asr_server/local/websocket_client_srt.py
================================================
#!/usr/bin/python
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# calc avg RTF(NOT Accurate): grep -rn RTF log.txt | awk '{print $NF}' | awk -F "=" '{sum += $NF} END {print "all time",sum, "audio num", NR,  "RTF", sum/NR}'
# python3 websocket_client.py --server_ip 127.0.0.1 --port 8290 --punc.server_ip 127.0.0.1 --punc.port 8190 --wavfile ./zh.wav
# python3 websocket_client.py --server_ip 127.0.0.1 --port 8290 --wavfile ./zh.wav
import argparse
import asyncio
import codecs
import os
from pydub import AudioSegment
import re

from paddlespeech.cli.log import logger
from paddlespeech.server.utils.audio_handler import ASRWsAudioHandler

def convert_to_wav(input_file):
    # Load audio file
    audio = AudioSegment.from_file(input_file)

    # Set parameters for audio file
    audio = audio.set_channels(1)
    audio = audio.set_frame_rate(16000)

    # Create output filename
    output_file = os.path.splitext(input_file)[0] + ".wav"

    # Export audio file as WAV
    audio.export(output_file, format="wav")

    logger.info(f"{input_file} converted to {output_file}")

def format_time(sec):
    # Convert seconds to SRT format (HH:MM:SS,ms)
    hours = int(sec/3600)
    minutes = int((sec%3600)/60)
    seconds = int(sec%60)
    milliseconds = int((sec%1)*1000)
    return f'{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}'

def results2srt(results, srt_file):
    """convert results from paddlespeech to srt format for subtitle
    Args:
        results (dict): results from paddlespeech
    """
    # times contains start and end time of each word
    times = results['times']
    # result contains the whole sentence including punctuation
    result = results['result']
    # split result into several sencences by '，' and '。'
    sentences = re.split('，|。', result)[:-1]
    # print("sentences: ", sentences)
    # generate relative time for each sentence in sentences
    relative_times = []
    word_i = 0
    for sentence in sentences:
        relative_times.append([])
        for word in sentence:
            if relative_times[-1] == []:
                relative_times[-1].append(times[word_i]['bg'])
            if len(relative_times[-1]) == 1:
                relative_times[-1].append(times[word_i]['ed'])
            else:
                relative_times[-1][1] = times[word_i]['ed']
            word_i += 1
    # print("relative_times: ", relative_times)
    # generate srt file acoording to relative_times and sentences
    with open(srt_file, 'w') as f:
        for i in range(len(sentences)):
            # Write index number
            f.write(str(i+1)+'\n')
            
            # Write start and end times
            start = format_time(relative_times[i][0])
            end = format_time(relative_times[i][1])
            f.write(start + ' --> ' + end + '\n')
            
            # Write text
            f.write(sentences[i]+'\n\n')
    logger.info(f"results saved to {srt_file}")

def main(args):
    logger.info("asr websocket client start")
    handler = ASRWsAudioHandler(
        args.server_ip,
        args.port,
        endpoint=args.endpoint,
        punc_server_ip=args.punc_server_ip,
        punc_server_port=args.punc_server_port)
    loop = asyncio.get_event_loop()

    # check if the wav file is mp3 format
    # if so, convert it to wav format using convert_to_wav function
    if args.wavfile and os.path.exists(args.wavfile):
        if args.wavfile.endswith(".mp3"):
            convert_to_wav(args.wavfile)
            args.wavfile = args.wavfile.replace(".mp3", ".wav")

    # support to process single audio file
    if args.wavfile and os.path.exists(args.wavfile):
        logger.info(f"start to process the wavscp: {args.wavfile}")
        result = loop.run_until_complete(handler.run(args.wavfile))
        # result = result["result"]
        # logger.info(f"asr websocket client finished : {result}")
        results2srt(result, args.wavfile.replace(".wav", ".srt"))

    # support to process batch audios from wav.scp
    if args.wavscp and os.path.exists(args.wavscp):
        logger.info(f"start to process the wavscp: {args.wavscp}")
        with codecs.open(args.wavscp, 'r', encoding='utf-8') as f,\
             codecs.open("result.txt", 'w', encoding='utf-8') as w:
            for line in f:
                utt_name, utt_path = line.strip().split()
                result = loop.run_until_complete(handler.run(utt_path))
                result = result["result"]
                w.write(f"{utt_name} {result}\n")


if __name__ == "__main__":
    logger.info("Start to do streaming asr client")
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--server_ip', type=str, default='127.0.0.1', help='server ip')
    parser.add_argument('--port', type=int, default=8090, help='server port')
    parser.add_argument(
        '--punc.server_ip',
        type=str,
        default=None,
        dest="punc_server_ip",
        help='Punctuation server ip')
    parser.add_argument(
        '--punc.port',
        type=int,
        default=8091,
        dest="punc_server_port",
        help='Punctuation server port')
    parser.add_argument(
        "--endpoint",
        type=str,
        default="/paddlespeech/asr/streaming",
        help="ASR websocket endpoint")
    parser.add_argument(
        "--wavfile",
        action="store",
        help="wav file path ",
        default="./16_audio.wav")
    parser.add_argument(
        "--wavscp", type=str, default=None, help="The batch audios dict text")
    args = parser.parse_args()

    main(args)


================================================
FILE: demos/streaming_asr_server/run.sh
================================================
# start the streaming asr service
paddlespeech_server start --config_file ./conf/ws_conformer_application.yaml

================================================
FILE: demos/streaming_asr_server/server.sh
================================================
#export CUDA_VISIBLE_DEVICE=0,1,2,3

# nohup python3 local/punc_server.py --config_file conf/punc_application.yaml > punc.log 2>&1 &
paddlespeech_server start --config_file conf/punc_application.yaml &> punc.log &

# nohup python3 local/streaming_asr_server.py --config_file conf/ws_conformer_wenetspeech_application.yaml > streaming_asr.log 2>&1 &
paddlespeech_server start --config_file conf/ws_conformer_wenetspeech_application.yaml &> streaming_asr.log  &


================================================
FILE: demos/streaming_asr_server/test.sh
================================================
# download the test wav
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav 

# read the wav and pass it to only streaming asr service
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --input ./zh.wav

# read the wav and call streaming and punc service
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8090 --punc.server_ip 127.0.0.1 --punc.port 8190 --input ./zh.wav


================================================
FILE: demos/streaming_asr_server/web/index.html
================================================
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <link rel="icon" href="./favicon.ico" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>飞桨PaddleSpeech</title>
    <script type="module" crossorigin>
//assets/index.c0d7cb1a.js
var m9=Object.defineProperty,g9=Object.defineProperties;var y9=Object.getOwnPropertyDescriptors;var Xf=Object.getOwnPropertySymbols;var Sk=Object.prototype.hasOwnProperty,kk=Object.prototype.propertyIsEnumerable;var wk=(e,t,n)=>t in e?m9(e,t,{enumerable:!0,configurable:!0,writable:!0,value:n}):e[t]=n,Te=(e,t)=>{for(var n in t||(t={}))Sk.call(t,n)&&wk(e,n,t[n]);if(Xf)for(var n of Xf(t))kk.call(t,n)&&wk(e,n,t[n]);return e},Ke=(e,t)=>g9(e,y9(t));var $k=(e,t)=>{var n={};for(var r in e)Sk.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&Xf)for(var r of Xf(e))t.indexOf(r)<0&&kk.call(e,r)&&(n[r]=e[r]);return n};var b9=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var cLe=b9((fa,Sr)=>{const C9=function(){const t=document.createElement("link").relList;if(t&&t.supports&&t.supports("modulepreload"))return;for(const a of document.querySelectorAll('link[rel="modulepreload"]'))r(a);new MutationObserver(a=>{for(const o of a)if(o.type==="childList")for(const i of o.addedNodes)i.tagName==="LINK"&&i.rel==="modulepreload"&&r(i)}).observe(document,{childList:!0,subtree:!0});function n(a){const o={};return a.integrity&&(o.integrity=a.integrity),a.referrerpolicy&&(o.referrerPolicy=a.referrerpolicy),a.crossorigin==="use-credentials"?o.credentials="include":a.crossorigin==="anonymous"?o.credentials="omit":o.credentials="same-origin",o}function r(a){if(a.ep)return;a.ep=!0;const o=n(a);fetch(a.href,o)}};C9();function H1(e,t){const n=Object.create(null),r=e.split(",");for(let a=0;a<r.length;a++)n[r[a]]=!0;return t?a=>!!n[a.toLowerCase()]:a=>!!n[a]}const w9="itemscope,allowfullscreen,formnovalidate,ismap,nomodule,novalidate,readonly",S9=H1(w9);function W_(e){return!!e||e===""}function Xe(e){if(pt(e)){const t={};for(let n=0;n<e.length;n++){const r=e[n],a=wt(r)?O9(r):Xe(r);if(a)for(const o in a)t[o]=a[o]}return t}else{if(wt(e))return e;if(zt(e))return e}}const k9=/;(?![^(]*\))/g,$9=/:(.+)/;function O9(e){const t={};return e.split(k9).forEach(n=>{if(n){const r=n.split($9);r.length>1&&(t[r[0].trim()]=r[1].trim())}}),t}function U(e){let t="";if(wt(e))t=e;else if(pt(e))for(let n=0;n<e.length;n++){const r=U(e[n]);r&&(t+=r+" ")}else if(zt(e))for(const n in e)e[n]&&(t+=n+" ");return t.trim()}function Za(e){if(!e)return null;let{class:t,style:n}=e;return t&&!wt(t)&&(e.class=U(t)),n&&(e.style=Xe(n)),e}function P9(e,t){if(e.length!==t.length)return!1;let n=!0;for(let r=0;n&&r<e.length;r++)n=Vu(e[r],t[r]);return n}function Vu(e,t){if(e===t)return!0;let n=Ok(e),r=Ok(t);if(n||r)return n&&r?e.getTime()===t.getTime():!1;if(n=pt(e),r=pt(t),n||r)return n&&r?P9(e,t):!1;if(n=zt(e),r=zt(t),n||r){if(!n||!r)return!1;const a=Object.keys(e).length,o=Object.keys(t).length;if(a!==o)return!1;for(const i in e){const l=e.hasOwnProperty(i),s=t.hasOwnProperty(i);if(l&&!s||!l&&s||!Vu(e[i],t[i]))return!1}}return String(e)===String(t)}function U_(e,t){return e.findIndex(n=>Vu(n,t))}const Me=e=>wt(e)?e:e==null?"":pt(e)||zt(e)&&(e.toString===q_||!Ct(e.toString))?JSON.stringify(e,Y_,2):String(e),Y_=(e,t)=>t&&t.__v_isRef?Y_(e,t.value):ku(t)?{[`Map(${t.size})`]:[...t.entries()].reduce((n,[r,a])=>(n[`${r} =>`]=a,n),{})}:hm(t)?{[`Set(${t.size})`]:[...t.values()]}:zt(t)&&!pt(t)&&!G_(t)?String(t):t,wn={},Su=[],Qt=()=>{},T9=()=>!1,x9=/^on[^a-z]/,fm=e=>x9.test(e),j1=e=>e.startsWith("onUpdate:"),fr=Object.assign,K1=(e,t)=>{const n=e.indexOf(t);n>-1&&e.splice(n,1)},_9=Object.prototype.hasOwnProperty,Mt=(e,t)=>_9.call(e,t),pt=Array.isArray,ku=e=>bf(e)==="[object Map]",hm=e=>bf(e)==="[object Set]",Ok=e=>e instanceof Date,Ct=e=>typeof e=="function",wt=e=>typeof e=="string",W1=e=>typeof e=="symbol",zt=e=>e!==null&&typeof e=="object",hs=e=>zt(e)&&Ct(e.then)&&Ct(e.catch),q_=Object.prototype.toString,bf=e=>q_.call(e),np=e=>bf(e).slice(8,-1),G_=e=>bf(e)==="[object Object]",U1=e=>wt(e)&&e!=="NaN"&&e[0]!=="-"&&""+parseInt(e,10)===e,rp=H1(",key,ref,ref_for,ref_key,onVnodeBeforeMount,onVnodeMounted,onVnodeBeforeUpdate,onVnodeUpdated,onVnodeBeforeUnmount,onVnodeUnmounted"),pm=e=>{const t=Object.create(null);return n=>t[n]||(t[n]=e(n))},E9=/-(\w)/g,_a=pm(e=>e.replace(E9,(t,n)=>n?n.toUpperCase():"")),M9=/\B([A-Z])/g,vl=pm(e=>e.replace(M9,"-$1").toLowerCase()),ia=pm(e=>e.charAt(0).toUpperCase()+e.slice(1)),ap=pm(e=>e?`on${ia(e)}`:""),Md=(e,t)=>!Object.is(e,t),op=(e,t)=>{for(let n=0;n<e.length;n++)e[n](t)},rv=(e,t,n)=>{Object.defineProperty(e,t,{configurable:!0,enumerable:!1,value:n})},av=e=>{const t=parseFloat(e);return isNaN(t)?e:t};let Pk;const I9=()=>Pk||(Pk=typeof globalThis!="undefined"?globalThis:typeof self!="undefined"?self:typeof window!="undefined"?window:typeof global!="undefined"?global:{});let ta;class X_{constructor(t=!1){this.active=!0,this.effects=[],this.cleanups=[],!t&&ta&&(this.parent=ta,this.index=(ta.scopes||(ta.scopes=[])).push(this)-1)}run(t){if(this.active){const n=ta;try{return ta=this,t()}finally{ta=n}}}on(){ta=this}off(){ta=this.parent}stop(t){if(this.active){let n,r;for(n=0,r=this.effects.length;n<r;n++)this.effects[n].stop();for(n=0,r=this.cleanups.length;n<r;n++)this.cleanups[n]();if(this.scopes)for(n=0,r=this.scopes.length;n<r;n++)this.scopes[n].stop(!0);if(this.parent&&!t){const a=this.parent.scopes.pop();a&&a!==this&&(this.parent.scopes[this.index]=a,a.index=this.index)}this.active=!1}}}function N9(e){return new X_(e)}function A9(e,t=ta){t&&t.active&&t.effects.push(e)}function D9(){return ta}function Z_(e){ta&&ta.cleanups.push(e)}const Y1=e=>{const t=new Set(e);return t.w=0,t.n=0,t},J_=e=>(e.w&il)>0,Q_=e=>(e.n&il)>0,R9=({deps:e})=>{if(e.length)for(let t=0;t<e.length;t++)e[t].w|=il},L9=e=>{const{deps:t}=e;if(t.length){let n=0;for(let r=0;r<t.length;r++){const a=t[r];J_(a)&&!Q_(a)?a.delete(e):t[n++]=a,a.w&=~il,a.n&=~il}t.length=n}},Fb=new WeakMap;let Qc=0,il=1;const Bb=30;let To;const es=Symbol(""),Vb=Symbol("");class q1{constructor(t,n=null,r){this.fn=t,this.scheduler=n,this.active=!0,this.deps=[],this.parent=void 0,A9(this,r)}run(){if(!this.active)return this.fn();let t=To,n=Ji;for(;t;){if(t===this)return;t=t.parent}try{return this.parent=To,To=this,Ji=!0,il=1<<++Qc,Qc<=Bb?R9(this):Tk(this),this.fn()}finally{Qc<=Bb&&L9(this),il=1<<--Qc,To=this.parent,Ji=n,this.parent=void 0}}stop(){this.active&&(Tk(this),this.onStop&&this.onStop(),this.active=!1)}}function Tk(e){const{deps:t}=e;if(t.length){for(let n=0;n<t.length;n++)t[n].delete(e);t.length=0}}let Ji=!0;const e6=[];function $s(){e6.push(Ji),Ji=!1}function Os(){const e=e6.pop();Ji=e===void 0?!0:e}function ha(e,t,n){if(Ji&&To){let r=Fb.get(e);r||Fb.set(e,r=new Map);let a=r.get(n);a||r.set(n,a=Y1()),t6(a)}}function t6(e,t){let n=!1;Qc<=Bb?Q_(e)||(e.n|=il,n=!J_(e)):n=!e.has(To),n&&(e.add(To),To.deps.push(e))}function mi(e,t,n,r,a,o){const i=Fb.get(e);if(!i)return;let l=[];if(t==="clear")l=[...i.values()];else if(n==="length"&&pt(e))i.forEach((s,c)=>{(c==="length"||c>=r)&&l.push(s)});else switch(n!==void 0&&l.push(i.get(n)),t){case"add":pt(e)?U1(n)&&l.push(i.get("length")):(l.push(i.get(es)),ku(e)&&l.push(i.get(Vb)));break;case"delete":pt(e)||(l.push(i.get(es)),ku(e)&&l.push(i.get(Vb)));break;case"set":ku(e)&&l.push(i.get(es));break}if(l.length===1)l[0]&&zb(l[0]);else{const s=[];for(const c of l)c&&s.push(...c);zb(Y1(s))}}function zb(e,t){for(const n of pt(e)?e:[...e])(n!==To||n.allowRecurse)&&(n.scheduler?n.scheduler():n.run())}const F9=H1("__proto__,__v_isRef,__isVue"),n6=new Set(Object.getOwnPropertyNames(Symbol).map(e=>Symbol[e]).filter(W1)),B9=G1(),V9=G1(!1,!0),z9=G1(!0),xk=H9();function H9(){const e={};return["includes","indexOf","lastIndexOf"].forEach(t=>{e[t]=function(...n){const r=Gt(this);for(let o=0,i=this.length;o<i;o++)ha(r,"get",o+"");const a=r[t](...n);return a===-1||a===!1?r[t](...n.map(Gt)):a}}),["push","pop","shift","unshift","splice"].forEach(t=>{e[t]=function(...n){$s();const r=Gt(this)[t].apply(this,n);return Os(),r}}),e}function G1(e=!1,t=!1){return function(r,a,o){if(a==="__v_isReactive")return!e;if(a==="__v_isReadonly")return e;if(a==="__v_isShallow")return t;if(a==="__v_raw"&&o===(e?t?aL:l6:t?i6:o6).get(r))return r;const i=pt(r);if(!e&&i&&Mt(xk,a))return Reflect.get(xk,a,o);const l=Reflect.get(r,a,o);return(W1(a)?n6.has(a):F9(a))||(e||ha(r,"get",a),t)?l:_n(l)?!i||!U1(a)?l.value:l:zt(l)?e?Cf(l):bt(l):l}}const j9=r6(),K9=r6(!0);function r6(e=!1){return function(n,r,a,o){let i=n[r];if(Id(i)&&_n(i)&&!_n(a))return!1;if(!e&&!Id(a)&&(s6(a)||(a=Gt(a),i=Gt(i)),!pt(n)&&_n(i)&&!_n(a)))return i.value=a,!0;const l=pt(n)&&U1(r)?Number(r)<n.length:Mt(n,r),s=Reflect.set(n,r,a,o);return n===Gt(o)&&(l?Md(a,i)&&mi(n,"set",r,a):mi(n,"add",r,a)),s}}function W9(e,t){const n=Mt(e,t);e[t];const r=Reflect.deleteProperty(e,t);return r&&n&&mi(e,"delete",t,void 0),r}function U9(e,t){const n=Reflect.has(e,t);return(!W1(t)||!n6.has(t))&&ha(e,"has",t),n}function Y9(e){return ha(e,"iterate",pt(e)?"length":es),Reflect.ownKeys(e)}const a6={get:B9,set:j9,deleteProperty:W9,has:U9,ownKeys:Y9},q9={get:z9,set(e,t){return!0},deleteProperty(e,t){return!0}},G9=fr({},a6,{get:V9,set:K9}),X1=e=>e,vm=e=>Reflect.getPrototypeOf(e);function Zf(e,t,n=!1,r=!1){e=e.__v_raw;const a=Gt(e),o=Gt(t);t!==o&&!n&&ha(a,"get",t),!n&&ha(a,"get",o);const{has:i}=vm(a),l=r?X1:n?Q1:Nd;if(i.call(a,t))return l(e.get(t));if(i.call(a,o))return l(e.get(o));e!==a&&e.get(t)}function Jf(e,t=!1){const n=this.__v_raw,r=Gt(n),a=Gt(e);return e!==a&&!t&&ha(r,"has",e),!t&&ha(r,"has",a),e===a?n.has(e):n.has(e)||n.has(a)}function Qf(e,t=!1){return e=e.__v_raw,!t&&ha(Gt(e),"iterate",es),Reflect.get(e,"size",e)}function _k(e){e=Gt(e);const t=Gt(this);return vm(t).has.call(t,e)||(t.add(e),mi(t,"add",e,e)),this}function Ek(e,t){t=Gt(t);const n=Gt(this),{has:r,get:a}=vm(n);let o=r.call(n,e);o||(e=Gt(e),o=r.call(n,e));const i=a.call(n,e);return n.set(e,t),o?Md(t,i)&&mi(n,"set",e,t):mi(n,"add",e,t),this}function Mk(e){const t=Gt(this),{has:n,get:r}=vm(t);let a=n.call(t,e);a||(e=Gt(e),a=n.call(t,e)),r&&r.call(t,e);const o=t.delete(e);return a&&mi(t,"delete",e,void 0),o}function Ik(){const e=Gt(this),t=e.size!==0,n=e.clear();return t&&mi(e,"clear",void 0,void 0),n}function eh(e,t){return function(r,a){const o=this,i=o.__v_raw,l=Gt(i),s=t?X1:e?Q1:Nd;return!e&&ha(l,"iterate",es),i.forEach((c,d)=>r.call(a,s(c),s(d),o))}}function th(e,t,n){return function(...r){const a=this.__v_raw,o=Gt(a),i=ku(o),l=e==="entries"||e===Symbol.iterator&&i,s=e==="keys"&&i,c=a[e](...r),d=n?X1:t?Q1:Nd;return!t&&ha(o,"iterate",s?Vb:es),{next(){const{value:f,done:p}=c.next();return p?{value:f,done:p}:{value:l?[d(f[0]),d(f[1])]:d(f),done:p}},[Symbol.iterator](){return this}}}}function Ai(e){return function(...t){return e==="delete"?!1:this}}function X9(){const e={get(o){return Zf(this,o)},get size(){return Qf(this)},has:Jf,add:_k,set:Ek,delete:Mk,clear:Ik,forEach:eh(!1,!1)},t={get(o){return Zf(this,o,!1,!0)},get size(){return Qf(this)},has:Jf,add:_k,set:Ek,delete:Mk,clear:Ik,forEach:eh(!1,!0)},n={get(o){return Zf(this,o,!0)},get size(){return Qf(this,!0)},has(o){return Jf.call(this,o,!0)},add:Ai("add"),set:Ai("set"),delete:Ai("delete"),clear:Ai("clear"),forEach:eh(!0,!1)},r={get(o){return Zf(this,o,!0,!0)},get size(){return Qf(this,!0)},has(o){return Jf.call(this,o,!0)},add:Ai("add"),set:Ai("set"),delete:Ai("delete"),clear:Ai("clear"),forEach:eh(!0,!0)};return["keys","values","entries",Symbol.iterator].forEach(o=>{e[o]=th(o,!1,!1),n[o]=th(o,!0,!1),t[o]=th(o,!1,!0),r[o]=th(o,!0,!0)}),[e,n,t,r]}const[Z9,J9,Q9,eL]=X9();function Z1(e,t){const n=t?e?eL:Q9:e?J9:Z9;return(r,a,o)=>a==="__v_isReactive"?!e:a==="__v_isReadonly"?e:a==="__v_raw"?r:Reflect.get(Mt(n,a)&&a in r?n:r,a,o)}const tL={get:Z1(!1,!1)},nL={get:Z1(!1,!0)},rL={get:Z1(!0,!1)},o6=new WeakMap,i6=new WeakMap,l6=new WeakMap,aL=new WeakMap;function oL(e){switch(e){case"Object":case"Array":return 1;case"Map":case"Set":case"WeakMap":case"WeakSet":return 2;default:return 0}}function iL(e){return e.__v_skip||!Object.isExtensible(e)?0:oL(np(e))}function bt(e){return Id(e)?e:J1(e,!1,a6,tL,o6)}function lL(e){return J1(e,!1,G9,nL,i6)}function Cf(e){return J1(e,!0,q9,rL,l6)}function J1(e,t,n,r,a){if(!zt(e)||e.__v_raw&&!(t&&e.__v_isReactive))return e;const o=a.get(e);if(o)return o;const i=iL(e);if(i===0)return e;const l=new Proxy(e,i===2?r:n);return a.set(e,l),l}function $u(e){return Id(e)?$u(e.__v_raw):!!(e&&e.__v_isReactive)}function Id(e){return!!(e&&e.__v_isReadonly)}function s6(e){return!!(e&&e.__v_isShallow)}function u6(e){return $u(e)||Id(e)}function Gt(e){const t=e&&e.__v_raw;return t?Gt(t):e}function ps(e){return rv(e,"__v_skip",!0),e}const Nd=e=>zt(e)?bt(e):e,Q1=e=>zt(e)?Cf(e):e;function c6(e){Ji&&To&&(e=Gt(e),t6(e.dep||(e.dep=Y1())))}function eC(e,t){e=Gt(e),e.dep&&zb(e.dep)}function _n(e){return!!(e&&e.__v_isRef===!0)}function H(e){return d6(e,!1)}function Qn(e){return d6(e,!0)}function d6(e,t){return _n(e)?e:new sL(e,t)}class sL{constructor(t,n){this.__v_isShallow=n,this.dep=void 0,this.__v_isRef=!0,this._rawValue=n?t:Gt(t),this._value=n?t:Nd(t)}get value(){return c6(this),this._value}set value(t){t=this.__v_isShallow?t:Gt(t),Md(t,this._rawValue)&&(this._rawValue=t,this._value=this.__v_isShallow?t:Nd(t),eC(this))}}function Dc(e){eC(e)}function A(e){return _n(e)?e.value:e}const uL={get:(e,t,n)=>A(Reflect.get(e,t,n)),set:(e,t,n,r)=>{const a=e[t];return _n(a)&&!_n(n)?(a.value=n,!0):Reflect.set(e,t,n,r)}};function f6(e){return $u(e)?e:new Proxy(e,uL)}function or(e){const t=pt(e)?new Array(e.length):{};for(const n in e)t[n]=yn(e,n);return t}class cL{constructor(t,n,r){this._object=t,this._key=n,this._defaultValue=r,this.__v_isRef=!0}get value(){const t=this._object[this._key];return t===void 0?this._defaultValue:t}set value(t){this._object[this._key]=t}}function yn(e,t,n){const r=e[t];return _n(r)?r:new cL(e,t,n)}class dL{constructor(t,n,r,a){this._setter=n,this.dep=void 0,this.__v_isRef=!0,this._dirty=!0,this.effect=new q1(t,()=>{this._dirty||(this._dirty=!0,eC(this))}),this.effect.computed=this,this.effect.active=this._cacheable=!a,this.__v_isReadonly=r}get value(){const t=Gt(this);return c6(t),(t._dirty||!t._cacheable)&&(t._dirty=!1,t._value=t.effect.run()),t._value}set value(t){this._setter(t)}}function fL(e,t,n=!1){let r,a;const o=Ct(e);return o?(r=e,a=Qt):(r=e.get,a=e.set),new dL(r,a,o||!a,n)}Promise.resolve();const ud=[];function hL(e,...t){$s();const n=ud.length?ud[ud.length-1].component:null,r=n&&n.appContext.config.warnHandler,a=pL();if(r)ci(r,n,11,[e+t.join(""),n&&n.proxy,a.map(({vnode:o})=>`at <${K6(n,o.type)}>`).join(`
`),a]);else{const o=[`[Vue warn]: ${e}`,...t];a.length&&o.push(`
`,...vL(a)),console.warn(...o)}Os()}function pL(){let e=ud[ud.length-1];if(!e)return[];const t=[];for(;e;){const n=t[0];n&&n.vnode===e?n.recurseCount++:t.push({vnode:e,recurseCount:0});const r=e.component&&e.component.parent;e=r&&r.vnode}return t}function vL(e){const t=[];return e.forEach((n,r)=>{t.push(...r===0?[]:[`
`],...mL(n))}),t}function mL({vnode:e,recurseCount:t}){const n=t>0?`... (${t} recursive calls)`:"",r=e.component?e.component.parent==null:!1,a=` at <${K6(e.component,e.type,r)}`,o=">"+n;return e.props?[a,...gL(e.props),o]:[a+o]}function gL(e){const t=[],n=Object.keys(e);return n.slice(0,3).forEach(r=>{t.push(...h6(r,e[r]))}),n.length>3&&t.push(" ..."),t}function h6(e,t,n){return wt(t)?(t=JSON.stringify(t),n?t:[`${e}=${t}`]):typeof t=="number"||typeof t=="boolean"||t==null?n?t:[`${e}=${t}`]:_n(t)?(t=h6(e,Gt(t.value),!0),n?t:[`${e}=Ref<`,t,">"]):Ct(t)?[`${e}=fn${t.name?`<${t.name}>`:""}`]:(t=Gt(t),n?t:[`${e}=`,t])}function ci(e,t,n,r){let a;try{a=r?e(...r):e()}catch(o){mm(o,t,n)}return a}function Pa(e,t,n,r){if(Ct(e)){const o=ci(e,t,n,r);return o&&hs(o)&&o.catch(i=>{mm(i,t,n)}),o}const a=[];for(let o=0;o<e.length;o++)a.push(Pa(e[o],t,n,r));return a}function mm(e,t,n,r=!0){const a=t?t.vnode:null;if(t){let o=t.parent;const i=t.proxy,l=n;for(;o;){const c=o.ec;if(c){for(let d=0;d<c.length;d++)if(c[d](e,i,l)===!1)return}o=o.parent}const s=t.appContext.config.errorHandler;if(s){ci(s,null,10,[e,i,l]);return}}yL(e,n,a,r)}function yL(e,t,n,r=!0){console.error(e)}let ov=!1,Hb=!1;const la=[];let ri=0;const cd=[];let ed=null,au=0;const dd=[];let Bi=null,ou=0;const p6=Promise.resolve();let tC=null,jb=null;function Ne(e){const t=tC||p6;return e?t.then(this?e.bind(this):e):t}function bL(e){let t=ri+1,n=la.length;for(;t<n;){const r=t+n>>>1;Ad(la[r])<e?t=r+1:n=r}return t}function v6(e){(!la.length||!la.includes(e,ov&&e.allowRecurse?ri+1:ri))&&e!==jb&&(e.id==null?la.push(e):la.splice(bL(e.id),0,e),m6())}function m6(){!ov&&!Hb&&(Hb=!0,tC=p6.then(b6))}function CL(e){const t=la.indexOf(e);t>ri&&la.splice(t,1)}function g6(e,t,n,r){pt(e)?n.push(...e):(!t||!t.includes(e,e.allowRecurse?r+1:r))&&n.push(e),m6()}function wL(e){g6(e,ed,cd,au)}function SL(e){g6(e,Bi,dd,ou)}function nC(e,t=null){if(cd.length){for(jb=t,ed=[...new Set(cd)],cd.length=0,au=0;au<ed.length;au++)ed[au]();ed=null,au=0,jb=null,nC(e,t)}}function y6(e){if(dd.length){const t=[...new Set(dd)];if(dd.length=0,Bi){Bi.push(...t);return}for(Bi=t,Bi.sort((n,r)=>Ad(n)-Ad(r)),ou=0;ou<Bi.length;ou++)Bi[ou]();Bi=null,ou=0}}const Ad=e=>e.id==null?1/0:e.id;function b6(e){Hb=!1,ov=!0,nC(e),la.sort((n,r)=>Ad(n)-Ad(r));const t=Qt;try{for(ri=0;ri<la.length;ri++){const n=la[ri];n&&n.active!==!1&&ci(n,null,14)}}finally{ri=0,la.length=0,y6(),ov=!1,tC=null,(la.length||cd.length||dd.length)&&b6(e)}}function kL(e,t,...n){const r=e.vnode.props||wn;let a=n;const o=t.startsWith("update:"),i=o&&t.slice(7);if(i&&i in r){const d=`${i==="modelValue"?"model":i}Modifiers`,{number:f,trim:p}=r[d]||wn;p?a=n.map(v=>v.trim()):f&&(a=n.map(av))}let l,s=r[l=ap(t)]||r[l=ap(_a(t))];!s&&o&&(s=r[l=ap(vl(t))]),s&&Pa(s,e,6,a);const c=r[l+"Once"];if(c){if(!e.emitted)e.emitted={};else if(e.emitted[l])return;e.emitted[l]=!0,Pa(c,e,6,a)}}function C6(e,t,n=!1){const r=t.emitsCache,a=r.get(e);if(a!==void 0)return a;const o=e.emits;let i={},l=!1;if(!Ct(e)){const s=c=>{const d=C6(c,t,!0);d&&(l=!0,fr(i,d))};!n&&t.mixins.length&&t.mixins.forEach(s),e.extends&&s(e.extends),e.mixins&&e.mixins.forEach(s)}return!o&&!l?(r.set(e,null),null):(pt(o)?o.forEach(s=>i[s]=null):fr(i,o),r.set(e,i),i)}function gm(e,t){return!e||!fm(t)?!1:(t=t.slice(2).replace(/Once$/,""),Mt(e,t[0].toLowerCase()+t.slice(1))||Mt(e,vl(t))||Mt(e,t))}let ua=null,ym=null;function iv(e){const t=ua;return ua=e,ym=e&&e.type.__scopeId||null,t}function rC(e){ym=e}function aC(){ym=null}function re(e,t=ua,n){if(!t||e._n)return e;const r=(...a)=>{r._d&&Kk(-1);const o=iv(t),i=e(...a);return iv(o),r._d&&Kk(1),i};return r._n=!0,r._c=!0,r._d=!0,r}function Ug(e){const{type:t,vnode:n,proxy:r,withProxy:a,props:o,propsOptions:[i],slots:l,attrs:s,emit:c,render:d,renderCache:f,data:p,setupState:v,ctx:m,inheritAttrs:y}=e;let b,C;const S=iv(e);try{if(n.shapeFlag&4){const k=a||r;b=ko(d.call(k,k,f,o,v,p,m)),C=s}else{const k=t;b=ko(k.length>1?k(o,{attrs:s,slots:l,emit:c}):k(o,null)),C=t.props?s:$L(s)}}catch(k){hd.length=0,mm(k,e,1),b=g(Ir)}let w=b;if(C&&y!==!1){const k=Object.keys(C),{shapeFlag:$}=w;k.length&&$&7&&(i&&k.some(j1)&&(C=OL(C,i)),w=hr(w,C))}return n.dirs&&(w.dirs=w.dirs?w.dirs.concat(n.dirs):n.dirs),n.transition&&(w.transition=n.transition),b=w,iv(S),b}const $L=e=>{let t;for(const n in e)(n==="class"||n==="style"||fm(n))&&((t||(t={}))[n]=e[n]);return t},OL=(e,t)=>{const n={};for(const r in e)(!j1(r)||!(r.slice(9)in t))&&(n[r]=e[r]);return n};function PL(e,t,n){const{props:r,children:a,component:o}=e,{props:i,children:l,patchFlag:s}=t,c=o.emitsOptions;if(t.dirs||t.transition)return!0;if(n&&s>=0){if(s&1024)return!0;if(s&16)return r?Nk(r,i,c):!!i;if(s&8){const d=t.dynamicProps;for(let f=0;f<d.length;f++){const p=d[f];if(i[p]!==r[p]&&!gm(c,p))return!0}}}else return(a||l)&&(!l||!l.$stable)?!0:r===i?!1:r?i?Nk(r,i,c):!0:!!i;return!1}function Nk(e,t,n){const r=Object.keys(t);if(r.length!==Object.keys(e).length)return!0;for(let a=0;a<r.length;a++){const o=r[a];if(t[o]!==e[o]&&!gm(n,o))return!0}return!1}function TL({vnode:e,parent:t},n){for(;t&&t.subTree===e;)(e=t.vnode).el=n,t=t.parent}const xL=e=>e.__isSuspense;function _L(e,t){t&&t.pendingBranch?pt(e)?t.effects.push(...e):t.effects.push(e):SL(e)}function ot(e,t){if(dr){let n=dr.provides;const r=dr.parent&&dr.parent.provides;r===n&&(n=dr.provides=Object.create(r)),n[e]=t}}function ve(e,t,n=!1){const r=dr||ua;if(r){const a=r.parent==null?r.vnode.appContext&&r.vnode.appContext.provides:r.parent.provides;if(a&&e in a)return a[e];if(arguments.length>1)return n&&Ct(t)?t.call(r.proxy):t}}function Wn(e,t){return oC(e,null,t)}const Ak={};function ce(e,t,n){return oC(e,t,n)}function oC(e,t,{immediate:n,deep:r,flush:a,onTrack:o,onTrigger:i}=wn){const l=dr;let s,c=!1,d=!1;if(_n(e)?(s=()=>e.value,c=s6(e)):$u(e)?(s=()=>e,r=!0):pt(e)?(d=!0,c=e.some($u),s=()=>e.map(C=>{if(_n(C))return C.value;if($u(C))return Wl(C);if(Ct(C))return ci(C,l,2)})):Ct(e)?t?s=()=>ci(e,l,2):s=()=>{if(!(l&&l.isUnmounted))return f&&f(),Pa(e,l,3,[p])}:s=Qt,t&&r){const C=s;s=()=>Wl(C())}let f,p=C=>{f=b.onStop=()=>{ci(C,l,4)}};if(Ld)return p=Qt,t?n&&Pa(t,l,3,[s(),d?[]:void 0,p]):s(),Qt;let v=d?[]:Ak;const m=()=>{if(!!b.active)if(t){const C=b.run();(r||c||(d?C.some((S,w)=>Md(S,v[w])):Md(C,v)))&&(f&&f(),Pa(t,l,3,[C,v===Ak?void 0:v,p]),v=C)}else b.run()};m.allowRecurse=!!t;let y;a==="sync"?y=m:a==="post"?y=()=>Rr(m,l&&l.suspense):y=()=>{!l||l.isMounted?wL(m):m()};const b=new q1(s,y);return t?n?m():v=b.run():a==="post"?Rr(b.run.bind(b),l&&l.suspense):b.run(),()=>{b.stop(),l&&l.scope&&K1(l.scope.effects,b)}}function EL(e,t,n){const r=this.proxy,a=wt(e)?e.includes(".")?w6(r,e):()=>r[e]:e.bind(r,r);let o;Ct(t)?o=t:(o=t.handler,n=t);const i=dr;zu(this);const l=oC(a,o.bind(r),n);return i?zu(i):ns(),l}function w6(e,t){const n=t.split(".");return()=>{let r=e;for(let a=0;a<n.length&&r;a++)r=r[n[a]];return r}}function Wl(e,t){if(!zt(e)||e.__v_skip||(t=t||new Set,t.has(e)))return e;if(t.add(e),_n(e))Wl(e.value,t);else if(pt(e))for(let n=0;n<e.length;n++)Wl(e[n],t);else if(hm(e)||ku(e))e.forEach(n=>{Wl(n,t)});else if(G_(e))for(const n in e)Wl(e[n],t);return e}function S6(){const e={isMounted:!1,isLeaving:!1,isUnmounting:!1,leavingVNodes:new Map};return et(()=>{e.isMounted=!0}),Lt(()=>{e.isUnmounting=!0}),e}const ba=[Function,Array],ML={name:"BaseTransition",props:{mode:String,appear:Boolean,persisted:Boolean,onBeforeEnter:ba,onEnter:ba,onAfterEnter:ba,onEnterCancelled:ba,onBeforeLeave:ba,onLeave:ba,onAfterLeave:ba,onLeaveCancelled:ba,onBeforeAppear:ba,onAppear:ba,onAfterAppear:ba,onAppearCancelled:ba},setup(e,{slots:t}){const n=$t(),r=S6();let a;return()=>{const o=t.default&&iC(t.default(),!0);if(!o||!o.length)return;const i=Gt(e),{mode:l}=i,s=o[0];if(r.isLeaving)return Yg(s);const c=Dk(s);if(!c)return Yg(s);const d=Dd(c,i,r,n);Rd(c,d);const f=n.subTree,p=f&&Dk(f);let v=!1;const{getTransitionKey:m}=c.type;if(m){const y=m();a===void 0?a=y:y!==a&&(a=y,v=!0)}if(p&&p.type!==Ir&&(!Rl(c,p)||v)){const y=Dd(p,i,r,n);if(Rd(p,y),l==="out-in")return r.isLeaving=!0,y.afterLeave=()=>{r.isLeaving=!1,n.update()},Yg(s);l==="in-out"&&c.type!==Ir&&(y.delayLeave=(b,C,S)=>{const w=$6(r,p);w[String(p.key)]=p,b._leaveCb=()=>{C(),b._leaveCb=void 0,delete d.delayedLeave},d.delayedLeave=S})}return s}}},k6=ML;function $6(e,t){const{leavingVNodes:n}=e;let r=n.get(t.type);return r||(r=Object.create(null),n.set(t.type,r)),r}function Dd(e,t,n,r){const{appear:a,mode:o,persisted:i=!1,onBeforeEnter:l,onEnter:s,onAfterEnter:c,onEnterCancelled:d,onBeforeLeave:f,onLeave:p,onAfterLeave:v,onLeaveCancelled:m,onBeforeAppear:y,onAppear:b,onAfterAppear:C,onAppearCancelled:S}=t,w=String(e.key),k=$6(n,e),$=(T,_)=>{T&&Pa(T,r,9,_)},O={mode:o,persisted:i,beforeEnter(T){let _=l;if(!n.isMounted)if(a)_=y||l;else return;T._leaveCb&&T._leaveCb(!0);const I=k[w];I&&Rl(e,I)&&I.el._leaveCb&&I.el._leaveCb(),$(_,[T])},enter(T){let _=s,I=c,L=d;if(!n.isMounted)if(a)_=b||s,I=C||c,L=S||d;else return;let j=!1;const F=T._enterCb=N=>{j||(j=!0,N?$(L,[T]):$(I,[T]),O.delayedLeave&&O.delayedLeave(),T._enterCb=void 0)};_?(_(T,F),_.length<=1&&F()):F()},leave(T,_){const I=String(e.key);if(T._enterCb&&T._enterCb(!0),n.isUnmounting)return _();$(f,[T]);let L=!1;const j=T._leaveCb=F=>{L||(L=!0,_(),F?$(m,[T]):$(v,[T]),T._leaveCb=void 0,k[I]===e&&delete k[I])};k[I]=e,p?(p(T,j),p.length<=1&&j()):j()},clone(T){return Dd(T,t,n,r)}};return O}function Yg(e){if(bm(e))return e=hr(e),e.children=null,e}function Dk(e){return bm(e)?e.children?e.children[0]:void 0:e}function Rd(e,t){e.shapeFlag&6&&e.component?Rd(e.component.subTree,t):e.shapeFlag&128?(e.ssContent.transition=t.clone(e.ssContent),e.ssFallback.transition=t.clone(e.ssFallback)):e.transition=t}function iC(e,t=!1,n){let r=[],a=0;for(let o=0;o<e.length;o++){let i=e[o];const l=n==null?i.key:String(n)+String(i.key!=null?i.key:o);i.type===Fe?(i.patchFlag&128&&a++,r=r.concat(iC(i.children,t,l))):(t||i.type!==Ir)&&r.push(l!=null?hr(i,{key:l}):i)}if(a>1)for(let o=0;o<r.length;o++)r[o].patchFlag=-2;return r}function G(e){return Ct(e)?{setup:e,name:e.name}:e}const Kb=e=>!!e.type.__asyncLoader,bm=e=>e.type.__isKeepAlive;function O6(e,t){T6(e,"a",t)}function P6(e,t){T6(e,"da",t)}function T6(e,t,n=dr){const r=e.__wdc||(e.__wdc=()=>{let a=n;for(;a;){if(a.isDeactivated)return;a=a.parent}return e()});if(Cm(t,r,n),n){let a=n.parent;for(;a&&a.parent;)bm(a.parent.vnode)&&IL(r,t,n,a),a=a.parent}}function IL(e,t,n,r){const a=Cm(t,e,r,!0);Wr(()=>{K1(r[t],a)},n)}function Cm(e,t,n=dr,r=!1){if(n){const a=n[e]||(n[e]=[]),o=t.__weh||(t.__weh=(...i)=>{if(n.isUnmounted)return;$s(),zu(n);const l=Pa(t,n,e,i);return ns(),Os(),l});return r?a.unshift(o):a.push(o),o}}const ki=e=>(t,n=dr)=>(!Ld||e==="sp")&&Cm(e,t,n),hc=ki("bm"),et=ki("m"),wm=ki("bu"),ur=ki("u"),Lt=ki("bum"),Wr=ki("um"),NL=ki("sp"),AL=ki("rtg"),DL=ki("rtc");function RL(e,t=dr){Cm("ec",e,t)}let Wb=!0;function LL(e){const t=_6(e),n=e.proxy,r=e.ctx;Wb=!1,t.beforeCreate&&Rk(t.beforeCreate,e,"bc");const{data:a,computed:o,methods:i,watch:l,provide:s,inject:c,created:d,beforeMount:f,mounted:p,beforeUpdate:v,updated:m,activated:y,deactivated:b,beforeDestroy:C,beforeUnmount:S,destroyed:w,unmounted:k,render:$,renderTracked:O,renderTriggered:T,errorCaptured:_,serverPrefetch:I,expose:L,inheritAttrs:j,components:F,directives:N,filters:D}=t;if(c&&FL(c,r,null,e.appContext.config.unwrapInjectedRef),i)for(const M in i){const E=i[M];Ct(E)&&(r[M]=E.bind(n))}if(a){const M=a.call(n,n);zt(M)&&(e.data=bt(M))}if(Wb=!0,o)for(const M in o){const E=o[M],K=Ct(E)?E.bind(n,n):Ct(E.get)?E.get.bind(n,n):Qt,W=!Ct(E)&&Ct(E.set)?E.set.bind(n):Qt,Y=x({get:K,set:W});Object.defineProperty(r,M,{enumerable:!0,configurable:!0,get:()=>Y.value,set:q=>Y.value=q})}if(l)for(const M in l)x6(l[M],r,n,M);if(s){const M=Ct(s)?s.call(n):s;Reflect.ownKeys(M).forEach(E=>{ot(E,M[E])})}d&&Rk(d,e,"c");function B(M,E){pt(E)?E.forEach(K=>M(K.bind(n))):E&&M(E.bind(n))}if(B(hc,f),B(et,p),B(wm,v),B(ur,m),B(O6,y),B(P6,b),B(RL,_),B(DL,O),B(AL,T),B(Lt,S),B(Wr,k),B(NL,I),pt(L))if(L.length){const M=e.exposed||(e.exposed={});L.forEach(E=>{Object.defineProperty(M,E,{get:()=>n[E],set:K=>n[E]=K})})}else e.exposed||(e.exposed={});$&&e.render===Qt&&(e.render=$),j!=null&&(e.inheritAttrs=j),F&&(e.components=F),N&&(e.directives=N)}function FL(e,t,n=Qt,r=!1){pt(e)&&(e=Ub(e));for(const a in e){const o=e[a];let i;zt(o)?"default"in o?i=ve(o.from||a,o.default,!0):i=ve(o.from||a):i=ve(o),_n(i)&&r?Object.defineProperty(t,a,{enumerable:!0,configurable:!0,get:()=>i.value,set:l=>i.value=l}):t[a]=i}}function Rk(e,t,n){Pa(pt(e)?e.map(r=>r.bind(t.proxy)):e.bind(t.proxy),t,n)}function x6(e,t,n,r){const a=r.includes(".")?w6(n,r):()=>n[r];if(wt(e)){const o=t[e];Ct(o)&&ce(a,o)}else if(Ct(e))ce(a,e.bind(n));else if(zt(e))if(pt(e))e.forEach(o=>x6(o,t,n,r));else{const o=Ct(e.handler)?e.handler.bind(n):t[e.handler];Ct(o)&&ce(a,o,e)}}function _6(e){const t=e.type,{mixins:n,extends:r}=t,{mixins:a,optionsCache:o,config:{optionMergeStrategies:i}}=e.appContext,l=o.get(t);let s;return l?s=l:!a.length&&!n&&!r?s=t:(s={},a.length&&a.forEach(c=>lv(s,c,i,!0)),lv(s,t,i)),o.set(t,s),s}function lv(e,t,n,r=!1){const{mixins:a,extends:o}=t;o&&lv(e,o,n,!0),a&&a.forEach(i=>lv(e,i,n,!0));for(const i in t)if(!(r&&i==="expose")){const l=BL[i]||n&&n[i];e[i]=l?l(e[i],t[i]):t[i]}return e}const BL={data:Lk,props:Ml,emits:Ml,methods:Ml,computed:Ml,beforeCreate:xr,created:xr,beforeMount:xr,mounted:xr,beforeUpdate:xr,updated:xr,beforeDestroy:xr,beforeUnmount:xr,destroyed:xr,unmounted:xr,activated:xr,deactivated:xr,errorCaptured:xr,serverPrefetch:xr,components:Ml,directives:Ml,watch:zL,provide:Lk,inject:VL};function Lk(e,t){return t?e?function(){return fr(Ct(e)?e.call(this,this):e,Ct(t)?t.call(this,this):t)}:t:e}function VL(e,t){return Ml(Ub(e),Ub(t))}function Ub(e){if(pt(e)){const t={};for(let n=0;n<e.length;n++)t[e[n]]=e[n];return t}return e}function xr(e,t){return e?[...new Set([].concat(e,t))]:t}function Ml(e,t){return e?fr(fr(Object.create(null),e),t):t}function zL(e,t){if(!e)return t;if(!t)return e;const n=fr(Object.create(null),e);for(const r in t)n[r]=xr(e[r],t[r]);return n}function HL(e,t,n,r=!1){const a={},o={};rv(o,Sm,1),e.propsDefaults=Object.create(null),E6(e,t,a,o);for(const i in e.propsOptions[0])i in a||(a[i]=void 0);n?e.props=r?a:lL(a):e.type.props?e.props=a:e.props=o,e.attrs=o}function jL(e,t,n,r){const{props:a,attrs:o,vnode:{patchFlag:i}}=e,l=Gt(a),[s]=e.propsOptions;let c=!1;if((r||i>0)&&!(i&16)){if(i&8){const d=e.vnode.dynamicProps;for(let f=0;f<d.length;f++){let p=d[f];if(gm(e.emitsOptions,p))continue;const v=t[p];if(s)if(Mt(o,p))v!==o[p]&&(o[p]=v,c=!0);else{const m=_a(p);a[m]=Yb(s,l,m,v,e,!1)}else v!==o[p]&&(o[p]=v,c=!0)}}}else{E6(e,t,a,o)&&(c=!0);let d;for(const f in l)(!t||!Mt(t,f)&&((d=vl(f))===f||!Mt(t,d)))&&(s?n&&(n[f]!==void 0||n[d]!==void 0)&&(a[f]=Yb(s,l,f,void 0,e,!0)):delete a[f]);if(o!==l)for(const f in o)(!t||!Mt(t,f)&&!0)&&(delete o[f],c=!0)}c&&mi(e,"set","$attrs")}function E6(e,t,n,r){const[a,o]=e.propsOptions;let i=!1,l;if(t)for(let s in t){if(rp(s))continue;const c=t[s];let d;a&&Mt(a,d=_a(s))?!o||!o.includes(d)?n[d]=c:(l||(l={}))[d]=c:gm(e.emitsOptions,s)||(!(s in r)||c!==r[s])&&(r[s]=c,i=!0)}if(o){const s=Gt(n),c=l||wn;for(let d=0;d<o.length;d++){const f=o[d];n[f]=Yb(a,s,f,c[f],e,!Mt(c,f))}}return i}function Yb(e,t,n,r,a,o){const i=e[n];if(i!=null){const l=Mt(i,"default");if(l&&r===void 0){const s=i.default;if(i.type!==Function&&Ct(s)){const{propsDefaults:c}=a;n in c?r=c[n]:(zu(a),r=c[n]=s.call(null,t),ns())}else r=s}i[0]&&(o&&!l?r=!1:i[1]&&(r===""||r===vl(n))&&(r=!0))}return r}function M6(e,t,n=!1){const r=t.propsCache,a=r.get(e);if(a)return a;const o=e.props,i={},l=[];let s=!1;if(!Ct(e)){const d=f=>{s=!0;const[p,v]=M6(f,t,!0);fr(i,p),v&&l.push(...v)};!n&&t.mixins.length&&t.mixins.forEach(d),e.extends&&d(e.extends),e.mixins&&e.mixins.forEach(d)}if(!o&&!s)return r.set(e,Su),Su;if(pt(o))for(let d=0;d<o.length;d++){const f=_a(o[d]);Fk(f)&&(i[f]=wn)}else if(o)for(const d in o){const f=_a(d);if(Fk(f)){const p=o[d],v=i[f]=pt(p)||Ct(p)?{type:p}:p;if(v){const m=zk(Boolean,v.type),y=zk(String,v.type);v[0]=m>-1,v[1]=y<0||m<y,(m>-1||Mt(v,"default"))&&l.push(f)}}}const c=[i,l];return r.set(e,c),c}function Fk(e){return e[0]!=="$"}function Bk(e){const t=e&&e.toString().match(/^\s*function (\w+)/);return t?t[1]:e===null?"null":""}function Vk(e,t){return Bk(e)===Bk(t)}function zk(e,t){return pt(t)?t.findIndex(n=>Vk(n,e)):Ct(t)&&Vk(t,e)?0:-1}const I6=e=>e[0]==="_"||e==="$stable",lC=e=>pt(e)?e.map(ko):[ko(e)],KL=(e,t,n)=>{const r=re((...a)=>lC(t(...a)),n);return r._c=!1,r},N6=(e,t,n)=>{const r=e._ctx;for(const a in e){if(I6(a))continue;const o=e[a];if(Ct(o))t[a]=KL(a,o,r);else if(o!=null){const i=lC(o);t[a]=()=>i}}},A6=(e,t)=>{const n=lC(t);e.slots.default=()=>n},WL=(e,t)=>{if(e.vnode.shapeFlag&32){const n=t._;n?(e.slots=Gt(t),rv(t,"_",n)):N6(t,e.slots={})}else e.slots={},t&&A6(e,t);rv(e.slots,Sm,1)},UL=(e,t,n)=>{const{vnode:r,slots:a}=e;let o=!0,i=wn;if(r.shapeFlag&32){const l=t._;l?n&&l===1?o=!1:(fr(a,t),!n&&l===1&&delete a._):(o=!t.$stable,N6(t,a)),i=t}else t&&(A6(e,t),i={default:1});if(o)for(const l in a)!I6(l)&&!(l in i)&&delete a[l]};function at(e,t){const n=ua;if(n===null)return e;const r=km(n)||n.proxy,a=e.dirs||(e.dirs=[]);for(let o=0;o<t.length;o++){let[i,l,s,c=wn]=t[o];Ct(i)&&(i={mounted:i,updated:i}),i.deep&&Wl(l),a.push({dir:i,instance:r,value:l,oldValue:void 0,arg:s,modifiers:c})}return e}function Cl(e,t,n,r){const a=e.dirs,o=t&&t.dirs;for(let i=0;i<a.length;i++){const l=a[i];o&&(l.oldValue=o[i].value);let s=l.dir[r];s&&($s(),Pa(s,n,8,[e.el,l,e,t]),Os())}}function D6(){return{app:null,config:{isNativeTag:T9,performance:!1,globalProperties:{},optionMergeStrategies:{},errorHandler:void 0,warnHandler:void 0,compilerOptions:{}},mixins:[],components:{},directives:{},provides:Object.create(null),optionsCache:new WeakMap,propsCache:new WeakMap,emitsCache:new WeakMap}}let YL=0;function qL(e,t){return function(r,a=null){Ct(r)||(r=Object.assign({},r)),a!=null&&!zt(a)&&(a=null);const o=D6(),i=new Set;let l=!1;const s=o.app={_uid:YL++,_component:r,_props:a,_container:null,_context:o,_instance:null,version:vF,get config(){return o.config},set config(c){},use(c,...d){return i.has(c)||(c&&Ct(c.install)?(i.add(c),c.install(s,...d)):Ct(c)&&(i.add(c),c(s,...d))),s},mixin(c){return o.mixins.includes(c)||o.mixins.push(c),s},component(c,d){return d?(o.components[c]=d,s):o.components[c]},directive(c,d){return d?(o.directives[c]=d,s):o.directives[c]},mount(c,d,f){if(!l){const p=g(r,a);return p.appContext=o,d&&t?t(p,c):e(p,c,f),l=!0,s._container=c,c.__vue_app__=s,km(p.component)||p.component.proxy}},unmount(){l&&(e(null,s._container),delete s._container.__vue_app__)},provide(c,d){return o.provides[c]=d,s}};return s}}function qb(e,t,n,r,a=!1){if(pt(e)){e.forEach((p,v)=>qb(p,t&&(pt(t)?t[v]:t),n,r,a));return}if(Kb(r)&&!a)return;const o=r.shapeFlag&4?km(r.component)||r.component.proxy:r.el,i=a?null:o,{i:l,r:s}=e,c=t&&t.r,d=l.refs===wn?l.refs={}:l.refs,f=l.setupState;if(c!=null&&c!==s&&(wt(c)?(d[c]=null,Mt(f,c)&&(f[c]=null)):_n(c)&&(c.value=null)),Ct(s))ci(s,l,12,[i,d]);else{const p=wt(s),v=_n(s);if(p||v){const m=()=>{if(e.f){const y=p?d[s]:s.value;a?pt(y)&&K1(y,o):pt(y)?y.includes(o)||y.push(o):p?(d[s]=[o],Mt(f,s)&&(f[s]=d[s])):(s.value=[o],e.k&&(d[e.k]=s.value))}else p?(d[s]=i,Mt(f,s)&&(f[s]=i)):_n(s)&&(s.value=i,e.k&&(d[e.k]=i))};i?(m.id=-1,Rr(m,n)):m()}}}const Rr=_L;function GL(e){return XL(e)}function XL(e,t){const n=I9();n.__VUE__=!0;const{insert:r,remove:a,patchProp:o,createElement:i,createText:l,createComment:s,setText:c,setElementText:d,parentNode:f,nextSibling:p,setScopeId:v=Qt,cloneNode:m,insertStaticContent:y}=e,b=(te,ie,ge,ke=null,xe=null,Ie=null,ye=!1,pe=null,ue=!!ie.dynamicChildren)=>{if(te===ie)return;te&&!Rl(te,ie)&&(ke=de(te),J(te,xe,Ie,!0),te=null),ie.patchFlag===-2&&(ue=!1,ie.dynamicChildren=null);const{type:Ce,ref:je,shapeFlag:ee}=ie;switch(Ce){case Fo:C(te,ie,ge,ke);break;case Ir:S(te,ie,ge,ke);break;case qg:te==null&&w(ie,ge,ke,ye);break;case Fe:N(te,ie,ge,ke,xe,Ie,ye,pe,ue);break;default:ee&1?O(te,ie,ge,ke,xe,Ie,ye,pe,ue):ee&6?D(te,ie,ge,ke,xe,Ie,ye,pe,ue):(ee&64||ee&128)&&Ce.process(te,ie,ge,ke,xe,Ie,ye,pe,ue,Ee)}je!=null&&xe&&qb(je,te&&te.ref,Ie,ie||te,!ie)},C=(te,ie,ge,ke)=>{if(te==null)r(ie.el=l(ie.children),ge,ke);else{const xe=ie.el=te.el;ie.children!==te.children&&c(xe,ie.children)}},S=(te,ie,ge,ke)=>{te==null?r(ie.el=s(ie.children||""),ge,ke):ie.el=te.el},w=(te,ie,ge,ke)=>{[te.el,te.anchor]=y(te.children,ie,ge,ke,te.el,te.anchor)},k=({el:te,anchor:ie},ge,ke)=>{let xe;for(;te&&te!==ie;)xe=p(te),r(te,ge,ke),te=xe;r(ie,ge,ke)},$=({el:te,anchor:ie})=>{let ge;for(;te&&te!==ie;)ge=p(te),a(te),te=ge;a(ie)},O=(te,ie,ge,ke,xe,Ie,ye,pe,ue)=>{ye=ye||ie.type==="svg",te==null?T(ie,ge,ke,xe,Ie,ye,pe,ue):L(te,ie,xe,Ie,ye,pe,ue)},T=(te,ie,ge,ke,xe,Ie,ye,pe)=>{let ue,Ce;const{type:je,props:ee,shapeFlag:me,transition:He,patchFlag:lt,dirs:Ye}=te;if(te.el&&m!==void 0&&lt===-1)ue=te.el=m(te.el);else{if(ue=te.el=i(te.type,Ie,ee&&ee.is,ee),me&8?d(ue,te.children):me&16&&I(te.children,ue,null,ke,xe,Ie&&je!=="foreignObject",ye,pe),Ye&&Cl(te,null,ke,"created"),ee){for(const _e in ee)_e!=="value"&&!rp(_e)&&o(ue,_e,null,ee[_e],Ie,te.children,ke,xe,ae);"value"in ee&&o(ue,"value",null,ee.value),(Ce=ee.onVnodeBeforeMount)&&bo(Ce,ke,te)}_(ue,te,te.scopeId,ye,ke)}Ye&&Cl(te,null,ke,"beforeMount");const he=(!xe||xe&&!xe.pendingBranch)&&He&&!He.persisted;he&&He.beforeEnter(ue),r(ue,ie,ge),((Ce=ee&&ee.onVnodeMounted)||he||Ye)&&Rr(()=>{Ce&&bo(Ce,ke,te),he&&He.enter(ue),Ye&&Cl(te,null,ke,"mounted")},xe)},_=(te,ie,ge,ke,xe)=>{if(ge&&v(te,ge),ke)for(let Ie=0;Ie<ke.length;Ie++)v(te,ke[Ie]);if(xe){let Ie=xe.subTree;if(ie===Ie){const ye=xe.vnode;_(te,ye,ye.scopeId,ye.slotScopeIds,xe.parent)}}},I=(te,ie,ge,ke,xe,Ie,ye,pe,ue=0)=>{for(let Ce=ue;Ce<te.length;Ce++){const je=te[Ce]=pe?Ki(te[Ce]):ko(te[Ce]);b(null,je,ie,ge,ke,xe,Ie,ye,pe)}},L=(te,ie,ge,ke,xe,Ie,ye)=>{const pe=ie.el=te.el;let{patchFlag:ue,dynamicChildren:Ce,dirs:je}=ie;ue|=te.patchFlag&16;const ee=te.props||wn,me=ie.props||wn;let He;ge&&wl(ge,!1),(He=me.onVnodeBeforeUpdate)&&bo(He,ge,ie,te),je&&Cl(ie,te,ge,"beforeUpdate"),ge&&wl(ge,!0);const lt=xe&&ie.type!=="foreignObject";if(Ce?j(te.dynamicChildren,Ce,pe,ge,ke,lt,Ie):ye||K(te,ie,pe,null,ge,ke,lt,Ie,!1),ue>0){if(ue&16)F(pe,ie,ee,me,ge,ke,xe);else if(ue&2&&ee.class!==me.class&&o(pe,"class",null,me.class,xe),ue&4&&o(pe,"style",ee.style,me.style,xe),ue&8){const Ye=ie.dynamicProps;for(let he=0;he<Ye.length;he++){const _e=Ye[he],$e=ee[_e],Ve=me[_e];(Ve!==$e||_e==="value")&&o(pe,_e,$e,Ve,xe,te.children,ge,ke,ae)}}ue&1&&te.children!==ie.children&&d(pe,ie.children)}else!ye&&Ce==null&&F(pe,ie,ee,me,ge,ke,xe);((He=me.onVnodeUpdated)||je)&&Rr(()=>{He&&bo(He,ge,ie,te),je&&Cl(ie,te,ge,"updated")},ke)},j=(te,ie,ge,ke,xe,Ie,ye)=>{for(let pe=0;pe<ie.length;pe++){const ue=te[pe],Ce=ie[pe],je=ue.el&&(ue.type===Fe||!Rl(ue,Ce)||ue.shapeFlag&70)?f(ue.el):ge;b(ue,Ce,je,null,ke,xe,Ie,ye,!0)}},F=(te,ie,ge,ke,xe,Ie,ye)=>{if(ge!==ke){for(const pe in ke){if(rp(pe))continue;const ue=ke[pe],Ce=ge[pe];ue!==Ce&&pe!=="value"&&o(te,pe,Ce,ue,ye,ie.children,xe,Ie,ae)}if(ge!==wn)for(const pe in ge)!rp(pe)&&!(pe in ke)&&o(te,pe,ge[pe],null,ye,ie.children,xe,Ie,ae);"value"in ke&&o(te,"value",ge.value,ke.value)}},N=(te,ie,ge,ke,xe,Ie,ye,pe,ue)=>{const Ce=ie.el=te?te.el:l(""),je=ie.anchor=te?te.anchor:l("");let{patchFlag:ee,dynamicChildren:me,slotScopeIds:He}=ie;He&&(pe=pe?pe.concat(He):He),te==null?(r(Ce,ge,ke),r(je,ge,ke),I(ie.children,ge,je,xe,Ie,ye,pe,ue)):ee>0&&ee&64&&me&&te.dynamicChildren?(j(te.dynamicChildren,me,ge,xe,Ie,ye,pe),(ie.key!=null||xe&&ie===xe.subTree)&&sC(te,ie,!0)):K(te,ie,ge,je,xe,Ie,ye,pe,ue)},D=(te,ie,ge,ke,xe,Ie,ye,pe,ue)=>{ie.slotScopeIds=pe,te==null?ie.shapeFlag&512?xe.ctx.activate(ie,ge,ke,ye,ue):z(ie,ge,ke,xe,Ie,ye,ue):B(te,ie,ue)},z=(te,ie,ge,ke,xe,Ie,ye)=>{const pe=te.component=sF(te,ke,xe);if(bm(te)&&(pe.ctx.renderer=Ee),uF(pe),pe.asyncDep){if(xe&&xe.registerDep(pe,M),!te.el){const ue=pe.subTree=g(Ir);S(null,ue,ie,ge)}return}M(pe,te,ie,ge,xe,Ie,ye)},B=(te,ie,ge)=>{const ke=ie.component=te.component;if(PL(te,ie,ge))if(ke.asyncDep&&!ke.asyncResolved){E(ke,ie,ge);return}else ke.next=ie,CL(ke.update),ke.update();else ie.component=te.component,ie.el=te.el,ke.vnode=ie},M=(te,ie,ge,ke,xe,Ie,ye)=>{const pe=()=>{if(te.isMounted){let{next:je,bu:ee,u:me,parent:He,vnode:lt}=te,Ye=je,he;wl(te,!1),je?(je.el=lt.el,E(te,je,ye)):je=lt,ee&&op(ee),(he=je.props&&je.props.onVnodeBeforeUpdate)&&bo(he,He,je,lt),wl(te,!0);const _e=Ug(te),$e=te.subTree;te.subTree=_e,b($e,_e,f($e.el),de($e),te,xe,Ie),je.el=_e.el,Ye===null&&TL(te,_e.el),me&&Rr(me,xe),(he=je.props&&je.props.onVnodeUpdated)&&Rr(()=>bo(he,He,je,lt),xe)}else{let je;const{el:ee,props:me}=ie,{bm:He,m:lt,parent:Ye}=te,he=Kb(ie);if(wl(te,!1),He&&op(He),!he&&(je=me&&me.onVnodeBeforeMount)&&bo(je,Ye,ie),wl(te,!0),ee&&Be){const _e=()=>{te.subTree=Ug(te),Be(ee,te.subTree,te,xe,null)};he?ie.type.__asyncLoader().then(()=>!te.isUnmounted&&_e()):_e()}else{const _e=te.subTree=Ug(te);b(null,_e,ge,ke,te,xe,Ie),ie.el=_e.el}if(lt&&Rr(lt,xe),!he&&(je=me&&me.onVnodeMounted)){const _e=ie;Rr(()=>bo(je,Ye,_e),xe)}ie.shapeFlag&256&&te.a&&Rr(te.a,xe),te.isMounted=!0,ie=ge=ke=null}},ue=te.effect=new q1(pe,()=>v6(te.update),te.scope),Ce=te.update=ue.run.bind(ue);Ce.id=te.uid,wl(te,!0),Ce()},E=(te,ie,ge)=>{ie.component=te;const ke=te.vnode.props;te.vnode=ie,te.next=null,jL(te,ie.props,ke,ge),UL(te,ie.children,ge),$s(),nC(void 0,te.update),Os()},K=(te,ie,ge,ke,xe,Ie,ye,pe,ue=!1)=>{const Ce=te&&te.children,je=te?te.shapeFlag:0,ee=ie.children,{patchFlag:me,shapeFlag:He}=ie;if(me>0){if(me&128){Y(Ce,ee,ge,ke,xe,Ie,ye,pe,ue);return}else if(me&256){W(Ce,ee,ge,ke,xe,Ie,ye,pe,ue);return}}He&8?(je&16&&ae(Ce,xe,Ie),ee!==Ce&&d(ge,ee)):je&16?He&16?Y(Ce,ee,ge,ke,xe,Ie,ye,pe,ue):ae(Ce,xe,Ie,!0):(je&8&&d(ge,""),He&16&&I(ee,ge,ke,xe,Ie,ye,pe,ue))},W=(te,ie,ge,ke,xe,Ie,ye,pe,ue)=>{te=te||Su,ie=ie||Su;const Ce=te.length,je=ie.length,ee=Math.min(Ce,je);let me;for(me=0;me<ee;me++){const He=ie[me]=ue?Ki(ie[me]):ko(ie[me]);b(te[me],He,ge,null,xe,Ie,ye,pe,ue)}Ce>je?ae(te,xe,Ie,!0,!1,ee):I(ie,ge,ke,xe,Ie,ye,pe,ue,ee)},Y=(te,ie,ge,ke,xe,Ie,ye,pe,ue)=>{let Ce=0;const je=ie.length;let ee=te.length-1,me=je-1;for(;Ce<=ee&&Ce<=me;){const He=te[Ce],lt=ie[Ce]=ue?Ki(ie[Ce]):ko(ie[Ce]);if(Rl(He,lt))b(He,lt,ge,null,xe,Ie,ye,pe,ue);else break;Ce++}for(;Ce<=ee&&Ce<=me;){const He=te[ee],lt=ie[me]=ue?Ki(ie[me]):ko(ie[me]);if(Rl(He,lt))b(He,lt,ge,null,xe,Ie,ye,pe,ue);else break;ee--,me--}if(Ce>ee){if(Ce<=me){const He=me+1,lt=He<je?ie[He].el:ke;for(;Ce<=me;)b(null,ie[Ce]=ue?Ki(ie[Ce]):ko(ie[Ce]),ge,lt,xe,Ie,ye,pe,ue),Ce++}}else if(Ce>me)for(;Ce<=ee;)J(te[Ce],xe,Ie,!0),Ce++;else{const He=Ce,lt=Ce,Ye=new Map;for(Ce=lt;Ce<=me;Ce++){const Ft=ie[Ce]=ue?Ki(ie[Ce]):ko(ie[Ce]);Ft.key!=null&&Ye.set(Ft.key,Ce)}let he,_e=0;const $e=me-lt+1;let Ve=!1,st=0;const Dt=new Array($e);for(Ce=0;Ce<$e;Ce++)Dt[Ce]=0;for(Ce=He;Ce<=ee;Ce++){const Ft=te[Ce];if(_e>=$e){J(Ft,xe,Ie,!0);continue}let Jt;if(Ft.key!=null)Jt=Ye.get(Ft.key);else for(he=lt;he<=me;he++)if(Dt[he-lt]===0&&Rl(Ft,ie[he])){Jt=he;break}Jt===void 0?J(Ft,xe,Ie,!0):(Dt[Jt-lt]=Ce+1,Jt>=st?st=Jt:Ve=!0,b(Ft,ie[Jt],ge,null,xe,Ie,ye,pe,ue),_e++)}const Ut=Ve?ZL(Dt):Su;for(he=Ut.length-1,Ce=$e-1;Ce>=0;Ce--){const Ft=lt+Ce,Jt=ie[Ft],Mn=Ft+1<je?ie[Ft+1].el:ke;Dt[Ce]===0?b(null,Jt,ge,Mn,xe,Ie,ye,pe,ue):Ve&&(he<0||Ce!==Ut[he]?q(Jt,ge,Mn,2):he--)}}},q=(te,ie,ge,ke,xe=null)=>{const{el:Ie,type:ye,transition:pe,children:ue,shapeFlag:Ce}=te;if(Ce&6){q(te.component.subTree,ie,ge,ke);return}if(Ce&128){te.suspense.move(ie,ge,ke);return}if(Ce&64){ye.move(te,ie,ge,Ee);return}if(ye===Fe){r(Ie,ie,ge);for(let ee=0;ee<ue.length;ee++)q(ue[ee],ie,ge,ke);r(te.anchor,ie,ge);return}if(ye===qg){k(te,ie,ge);return}if(ke!==2&&Ce&1&&pe)if(ke===0)pe.beforeEnter(Ie),r(Ie,ie,ge),Rr(()=>pe.enter(Ie),xe);else{const{leave:ee,delayLeave:me,afterLeave:He}=pe,lt=()=>r(Ie,ie,ge),Ye=()=>{ee(Ie,()=>{lt(),He&&He()})};me?me(Ie,lt,Ye):Ye()}else r(Ie,ie,ge)},J=(te,ie,ge,ke=!1,xe=!1)=>{const{type:Ie,props:ye,ref:pe,children:ue,dynamicChildren:Ce,shapeFlag:je,patchFlag:ee,dirs:me}=te;if(pe!=null&&qb(pe,null,ge,te,!0),je&256){ie.ctx.deactivate(te);return}const He=je&1&&me,lt=!Kb(te);let Ye;if(lt&&(Ye=ye&&ye.onVnodeBeforeUnmount)&&bo(Ye,ie,te),je&6)Q(te.component,ge,ke);else{if(je&128){te.suspense.unmount(ge,ke);return}He&&Cl(te,null,ie,"beforeUnmount"),je&64?te.type.remove(te,ie,ge,xe,Ee,ke):Ce&&(Ie!==Fe||ee>0&&ee&64)?ae(Ce,ie,ge,!1,!0):(Ie===Fe&&ee&384||!xe&&je&16)&&ae(ue,ie,ge),ke&&ne(te)}(lt&&(Ye=ye&&ye.onVnodeUnmounted)||He)&&Rr(()=>{Ye&&bo(Ye,ie,te),He&&Cl(te,null,ie,"unmounted")},ge)},ne=te=>{const{type:ie,el:ge,anchor:ke,transition:xe}=te;if(ie===Fe){oe(ge,ke);return}if(ie===qg){$(te);return}const Ie=()=>{a(ge),xe&&!xe.persisted&&xe.afterLeave&&xe.afterLeave()};if(te.shapeFlag&1&&xe&&!xe.persisted){const{leave:ye,delayLeave:pe}=xe,ue=()=>ye(ge,Ie);pe?pe(te.el,Ie,ue):ue()}else Ie()},oe=(te,ie)=>{let ge;for(;te!==ie;)ge=p(te),a(te),te=ge;a(ie)},Q=(te,ie,ge)=>{const{bum:ke,scope:xe,update:Ie,subTree:ye,um:pe}=te;ke&&op(ke),xe.stop(),Ie&&(Ie.active=!1,J(ye,te,ie,ge)),pe&&Rr(pe,ie),Rr(()=>{te.isUnmounted=!0},ie),ie&&ie.pendingBranch&&!ie.isUnmounted&&te.asyncDep&&!te.asyncResolved&&te.suspenseId===ie.pendingId&&(ie.deps--,ie.deps===0&&ie.resolve())},ae=(te,ie,ge,ke=!1,xe=!1,Ie=0)=>{for(let ye=Ie;ye<te.length;ye++)J(te[ye],ie,ge,ke,xe)},de=te=>te.shapeFlag&6?de(te.component.subTree):te.shapeFlag&128?te.suspense.next():p(te.anchor||te.el),be=(te,ie,ge)=>{te==null?ie._vnode&&J(ie._vnode,null,null,!0):b(ie._vnode||null,te,ie,null,null,null,ge),y6(),ie._vnode=te},Ee={p:b,um:J,m:q,r:ne,mt:z,mc:I,pc:K,pbc:j,n:de,o:e};let Pe,Be;return t&&([Pe,Be]=t(Ee)),{render:be,hydrate:Pe,createApp:qL(be,Pe)}}function wl({effect:e,update:t},n){e.allowRecurse=t.allowRecurse=n}function sC(e,t,n=!1){const r=e.children,a=t.children;if(pt(r)&&pt(a))for(let o=0;o<r.length;o++){const i=r[o];let l=a[o];l.shapeFlag&1&&!l.dynamicChildren&&((l.patchFlag<=0||l.patchFlag===32)&&(l=a[o]=Ki(a[o]),l.el=i.el),n||sC(i,l))}}function ZL(e){const t=e.slice(),n=[0];let r,a,o,i,l;const s=e.length;for(r=0;r<s;r++){const c=e[r];if(c!==0){if(a=n[n.length-1],e[a]<c){t[r]=a,n.push(r);continue}for(o=0,i=n.length-1;o<i;)l=o+i>>1,e[n[l]]<c?o=l+1:i=l;c<e[n[o]]&&(o>0&&(t[r]=n[o-1]),n[o]=r)}}for(o=n.length,i=n[o-1];o-- >0;)n[o]=i,i=t[i];return n}const JL=e=>e.__isTeleport,fd=e=>e&&(e.disabled||e.disabled===""),Hk=e=>typeof SVGElement!="undefined"&&e instanceof SVGElement,Gb=(e,t)=>{const n=e&&e.to;return wt(n)?t?t(n):null:n},QL={__isTeleport:!0,process(e,t,n,r,a,o,i,l,s,c){const{mc:d,pc:f,pbc:p,o:{insert:v,querySelector:m,createText:y,createComment:b}}=c,C=fd(t.props);let{shapeFlag:S,children:w,dynamicChildren:k}=t;if(e==null){const $=t.el=y(""),O=t.anchor=y("");v($,n,r),v(O,n,r);const T=t.target=Gb(t.props,m),_=t.targetAnchor=y("");T&&(v(_,T),i=i||Hk(T));const I=(L,j)=>{S&16&&d(w,L,j,a,o,i,l,s)};C?I(n,O):T&&I(T,_)}else{t.el=e.el;const $=t.anchor=e.anchor,O=t.target=e.target,T=t.targetAnchor=e.targetAnchor,_=fd(e.props),I=_?n:O,L=_?$:T;if(i=i||Hk(O),k?(p(e.dynamicChildren,k,I,a,o,i,l),sC(e,t,!0)):s||f(e,t,I,L,a,o,i,l,!1),C)_||nh(t,n,$,c,1);else if((t.props&&t.props.to)!==(e.props&&e.props.to)){const j=t.target=Gb(t.props,m);j&&nh(t,j,null,c,0)}else _&&nh(t,O,T,c,1)}},remove(e,t,n,r,{um:a,o:{remove:o}},i){const{shapeFlag:l,children:s,anchor:c,targetAnchor:d,target:f,props:p}=e;if(f&&o(d),(i||!fd(p))&&(o(c),l&16))for(let v=0;v<s.length;v++){const m=s[v];a(m,t,n,!0,!!m.dynamicChildren)}},move:nh,hydrate:eF};function nh(e,t,n,{o:{insert:r},m:a},o=2){o===0&&r(e.targetAnchor,t,n);const{el:i,anchor:l,shapeFlag:s,children:c,props:d}=e,f=o===2;if(f&&r(i,t,n),(!f||fd(d))&&s&16)for(let p=0;p<c.length;p++)a(c[p],t,n,2);f&&r(l,t,n)}function eF(e,t,n,r,a,o,{o:{nextSibling:i,parentNode:l,querySelector:s}},c){const d=t.target=Gb(t.props,s);if(d){const f=d._lpa||d.firstChild;t.shapeFlag&16&&(fd(t.props)?(t.anchor=c(i(e),t,l(e),n,r,a,o),t.targetAnchor=f):(t.anchor=i(e),t.targetAnchor=c(f,t,d,n,r,a,o)),d._lpa=t.targetAnchor&&i(t.targetAnchor))}return t.anchor&&i(t.anchor)}const Ps=QL,uC="components",tF="directives";function we(e,t){return cC(uC,e,!0,t)||e}const R6=Symbol();function Kt(e){return wt(e)?cC(uC,e,!1)||e:e||R6}function pa(e){return cC(tF,e)}function cC(e,t,n=!0,r=!1){const a=ua||dr;if(a){const o=a.type;if(e===uC){const l=j6(o);if(l&&(l===t||l===_a(t)||l===ia(_a(t))))return o}const i=jk(a[e]||o[e],t)||jk(a.appContext[e],t);return!i&&r?o:i}}function jk(e,t){return e&&(e[t]||e[_a(t)]||e[ia(_a(t))])}const Fe=Symbol(void 0),Fo=Symbol(void 0),Ir=Symbol(void 0),qg=Symbol(void 0),hd=[];let ts=null;function R(e=!1){hd.push(ts=e?null:[])}function nF(){hd.pop(),ts=hd[hd.length-1]||null}let sv=1;function Kk(e){sv+=e}function L6(e){return e.dynamicChildren=sv>0?ts||Su:null,nF(),sv>0&&ts&&ts.push(e),e}function X(e,t,n,r,a,o){return L6(Z(e,t,n,r,a,o,!0))}function fe(e,t,n,r,a){return L6(g(e,t,n,r,a,!0))}function rn(e){return e?e.__v_isVNode===!0:!1}function Rl(e,t){return e.type===t.type&&e.key===t.key}const Sm="__vInternal",F6=({key:e})=>e!=null?e:null,ip=({ref:e,ref_key:t,ref_for:n})=>e!=null?wt(e)||_n(e)||Ct(e)?{i:ua,r:e,k:t,f:!!n}:e:null;function Z(e,t=null,n=null,r=0,a=null,o=e===Fe?0:1,i=!1,l=!1){const s={__v_isVNode:!0,__v_skip:!0,type:e,props:t,key:t&&F6(t),ref:t&&ip(t),scopeId:ym,slotScopeIds:null,children:n,component:null,suspense:null,ssContent:null,ssFallback:null,dirs:null,transition:null,el:null,anchor:null,target:null,targetAnchor:null,staticCount:0,shapeFlag:o,patchFlag:r,dynamicProps:a,dynamicChildren:null,appContext:null};return l?(dC(s,n),o&128&&e.normalize(s)):n&&(s.shapeFlag|=wt(n)?8:16),sv>0&&!i&&ts&&(s.patchFlag>0||o&6)&&s.patchFlag!==32&&ts.push(s),s}const g=rF;function rF(e,t=null,n=null,r=0,a=null,o=!1){if((!e||e===R6)&&(e=Ir),rn(e)){const l=hr(e,t,!0);return n&&dC(l,n),l}if(pF(e)&&(e=e.__vccOpts),t){t=ll(t);let{class:l,style:s}=t;l&&!wt(l)&&(t.class=U(l)),zt(s)&&(u6(s)&&!pt(s)&&(s=fr({},s)),t.style=Xe(s))}const i=wt(e)?1:xL(e)?128:JL(e)?64:zt(e)?4:Ct(e)?2:0;return Z(e,t,n,r,a,i,o,!0)}function ll(e){return e?u6(e)||Sm in e?fr({},e):e:null}function hr(e,t,n=!1){const{props:r,ref:a,patchFlag:o,children:i}=e,l=t?hn(r||{},t):r;return{__v_isVNode:!0,__v_skip:!0,type:e.type,props:l,key:l&&F6(l),ref:t&&t.ref?n&&a?pt(a)?a.concat(ip(t)):[a,ip(t)]:ip(t):a,scopeId:e.scopeId,slotScopeIds:e.slotScopeIds,children:i,target:e.target,targetAnchor:e.targetAnchor,staticCount:e.staticCount,shapeFlag:e.shapeFlag,patchFlag:t&&e.type!==Fe?o===-1?16:o|16:o,dynamicProps:e.dynamicProps,dynamicChildren:e.dynamicChildren,appContext:e.appContext,dirs:e.dirs,transition:e.transition,component:e.component,suspense:e.suspense,ssContent:e.ssContent&&hr(e.ssContent),ssFallback:e.ssFallback&&hr(e.ssFallback),el:e.el,anchor:e.anchor}}function yt(e=" ",t=0){return g(Fo,null,e,t)}function se(e="",t=!1){return t?(R(),fe(Ir,null,e)):g(Ir,null,e)}function ko(e){return e==null||typeof e=="boolean"?g(Ir):pt(e)?g(Fe,null,e.slice()):typeof e=="object"?Ki(e):g(Fo,null,String(e))}function Ki(e){return e.el===null||e.memo?e:hr(e)}function dC(e,t){let n=0;const{shapeFlag:r}=e;if(t==null)t=null;else if(pt(t))n=16;else if(typeof t=="object")if(r&65){const a=t.default;a&&(a._c&&(a._d=!1),dC(e,a()),a._c&&(a._d=!0));return}else{n=32;const a=t._;!a&&!(Sm in t)?t._ctx=ua:a===3&&ua&&(ua.slots._===1?t._=1:(t._=2,e.patchFlag|=1024))}else Ct(t)?(t={default:t,_ctx:ua},n=32):(t=String(t),r&64?(n=16,t=[yt(t)]):n=8);e.children=t,e.shapeFlag|=n}function hn(...e){const t={};for(let n=0;n<e.length;n++){const r=e[n];for(const a in r)if(a==="class")t.class!==r.class&&(t.class=U([t.class,r.class]));else if(a==="style")t.style=Xe([t.style,r.style]);else if(fm(a)){const o=t[a],i=r[a];i&&o!==i&&!(pt(o)&&o.includes(i))&&(t[a]=o?[].concat(o,i):i)}else a!==""&&(t[a]=r[a])}return t}function bo(e,t,n,r=null){Pa(e,t,7,[n,r])}function Rt(e,t,n,r){let a;const o=n&&n[r];if(pt(e)||wt(e)){a=new Array(e.length);for(let i=0,l=e.length;i<l;i++)a[i]=t(e[i],i,void 0,o&&o[i])}else if(typeof e=="number"){a=new Array(e);for(let i=0;i<e;i++)a[i]=t(i+1,i,void 0,o&&o[i])}else if(zt(e))if(e[Symbol.iterator])a=Array.from(e,(i,l)=>t(i,l,void 0,o&&o[l]));else{const i=Object.keys(e);a=new Array(i.length);for(let l=0,s=i.length;l<s;l++){const c=i[l];a[l]=t(e[c],c,l,o&&o[l])}}else a=[];return n&&(n[r]=a),a}function sl(e,t){for(let n=0;n<t.length;n++){const r=t[n];if(pt(r))for(let a=0;a<r.length;a++)e[r[a].name]=r[a].fn;else r&&(e[r.name]=r.fn)}return e}function Oe(e,t,n={},r,a){if(ua.isCE)return g("slot",t==="default"?null:{name:t},r&&r());let o=e[t];o&&o._c&&(o._d=!1),R();const i=o&&B6(o(n)),l=fe(Fe,{key:n.key||`_${t}`},i||(r?r():[]),i&&e._===1?64:-2);return!a&&l.scopeId&&(l.slotScopeIds=[l.scopeId+"-s"]),o&&o._c&&(o._d=!0),l}function B6(e){return e.some(t=>rn(t)?!(t.type===Ir||t.type===Fe&&!B6(t.children)):!0)?e:null}function aF(e){const t={};for(const n in e)t[ap(n)]=e[n];return t}const Xb=e=>e?V6(e)?km(e)||e.proxy:Xb(e.parent):null,uv=fr(Object.create(null),{$:e=>e,$el:e=>e.vnode.el,$data:e=>e.data,$props:e=>e.props,$attrs:e=>e.attrs,$slots:e=>e.slots,$refs:e=>e.refs,$parent:e=>Xb(e.parent),$root:e=>Xb(e.root),$emit:e=>e.emit,$options:e=>_6(e),$forceUpdate:e=>()=>v6(e.update),$nextTick:e=>Ne.bind(e.proxy),$watch:e=>EL.bind(e)}),oF={get({_:e},t){const{ctx:n,setupState:r,data:a,props:o,accessCache:i,type:l,appContext:s}=e;let c;if(t[0]!=="$"){const v=i[t];if(v!==void 0)switch(v){case 1:return r[t];case 2:return a[t];case 4:return n[t];case 3:return o[t]}else{if(r!==wn&&Mt(r,t))return i[t]=1,r[t];if(a!==wn&&Mt(a,t))return i[t]=2,a[t];if((c=e.propsOptions[0])&&Mt(c,t))return i[t]=3,o[t];if(n!==wn&&Mt(n,t))return i[t]=4,n[t];Wb&&(i[t]=0)}}const d=uv[t];let f,p;if(d)return t==="$attrs"&&ha(e,"get",t),d(e);if((f=l.__cssModules)&&(f=f[t]))return f;if(n!==wn&&Mt(n,t))return i[t]=4,n[t];if(p=s.config.globalProperties,Mt(p,t))return p[t]},set({_:e},t,n){const{data:r,setupState:a,ctx:o}=e;return a!==wn&&Mt(a,t)?(a[t]=n,!0):r!==wn&&Mt(r,t)?(r[t]=n,!0):Mt(e.props,t)||t[0]==="$"&&t.slice(1)in e?!1:(o[t]=n,!0)},has({_:{data:e,setupState:t,accessCache:n,ctx:r,appContext:a,propsOptions:o}},i){let l;return!!n[i]||e!==wn&&Mt(e,i)||t!==wn&&Mt(t,i)||(l=o[0])&&Mt(l,i)||Mt(r,i)||Mt(uv,i)||Mt(a.config.globalProperties,i)},defineProperty(e,t,n){return n.get!=null?e.$.accessCache[t]=0:Mt(n,"value")&&this.set(e,t,n.value,null),Reflect.defineProperty(e,t,n)}},iF=D6();let lF=0;function sF(e,t,n){const r=e.type,a=(t?t.appContext:e.appContext)||iF,o={uid:lF++,vnode:e,type:r,parent:t,appContext:a,root:null,next:null,subTree:null,effect:null,update:null,scope:new X_(!0),render:null,proxy:null,exposed:null,exposeProxy:null,withProxy:null,provides:t?t.provides:Object.create(a.provides),accessCache:null,renderCache:[],components:null,directives:null,propsOptions:M6(r,a),emitsOptions:C6(r,a),emit:null,emitted:null,propsDefaults:wn,inheritAttrs:r.inheritAttrs,ctx:wn,data:wn,props:wn,attrs:wn,slots:wn,refs:wn,setupState:wn,setupContext:null,suspense:n,suspenseId:n?n.pendingId:0,asyncDep:null,asyncResolved:!1,isMounted:!1,isUnmounted:!1,isDeactivated:!1,bc:null,c:null,bm:null,m:null,bu:null,u:null,um:null,bum:null,da:null,a:null,rtg:null,rtc:null,ec:null,sp:null};return o.ctx={_:o},o.root=t?t.root:o,o.emit=kL.bind(null,o),e.ce&&e.ce(o),o}let dr=null;const $t=()=>dr||ua,zu=e=>{dr=e,e.scope.on()},ns=()=>{dr&&dr.scope.off(),dr=null};function V6(e){return e.vnode.shapeFlag&4}let Ld=!1;function uF(e,t=!1){Ld=t;const{props:n,children:r}=e.vnode,a=V6(e);HL(e,n,a,t),WL(e,r);const o=a?cF(e,t):void 0;return Ld=!1,o}function cF(e,t){const n=e.type;e.accessCache=Object.create(null),e.proxy=ps(new Proxy(e.ctx,oF));const{setup:r}=n;if(r){const a=e.setupContext=r.length>1?H6(e):null;zu(e),$s();const o=ci(r,e,0,[e.props,a]);if(Os(),ns(),hs(o)){if(o.then(ns,ns),t)return o.then(i=>{Wk(e,i,t)}).catch(i=>{mm(i,e,0)});e.asyncDep=o}else Wk(e,o,t)}else z6(e,t)}function Wk(e,t,n){Ct(t)?e.type.__ssrInlineRender?e.ssrRender=t:e.render=t:zt(t)&&(e.setupState=f6(t)),z6(e,n)}let Uk;function z6(e,t,n){const r=e.type;if(!e.render){if(!t&&Uk&&!r.render){const a=r.template;if(a){const{isCustomElement:o,compilerOptions:i}=e.appContext.config,{delimiters:l,compilerOptions:s}=r,c=fr(fr({isCustomElement:o,delimiters:l},i),s);r.render=Uk(a,c)}}e.render=r.render||Qt}zu(e),$s(),LL(e),Os(),ns()}function dF(e){return new Proxy(e.attrs,{get(t,n){return ha(e,"get","$attrs"),t[n]}})}function H6(e){const t=r=>{e.exposed=r||{}};let n;return{get attrs(){return n||(n=dF(e))},slots:e.slots,emit:e.emit,expose:t}}function km(e){if(e.exposed)return e.exposeProxy||(e.exposeProxy=new Proxy(f6(ps(e.exposed)),{get(t,n){if(n in t)return t[n];if(n in uv)return uv[n](e)}}))}const fF=/(?:^|[-_])(\w)/g,hF=e=>e.replace(fF,t=>t.toUpperCase()).replace(/[-_]/g,"");function j6(e){return Ct(e)&&e.displayName||e.name}function K6(e,t,n=!1){let r=j6(t);if(!r&&t.__file){const a=t.__file.match(/([^/\\]+)\.\w+$/);a&&(r=a[1])}if(!r&&e&&e.parent){const a=o=>{for(const i in o)if(o[i]===t)return i};r=a(e.components||e.parent.type.components)||a(e.appContext.components)}return r?hF(r):n?"App":"Anonymous"}function pF(e){return Ct(e)&&"__vccOpts"in e}const x=(e,t)=>fL(e,t,Ld);function wf(){return U6().slots}function W6(){return U6().attrs}function U6(){const e=$t();return e.setupContext||(e.setupContext=H6(e))}function qe(e,t,n){const r=arguments.length;return r===2?zt(t)&&!pt(t)?rn(t)?g(e,null,[t]):g(e,t):g(e,null,t):(r>3?n=Array.prototype.slice.call(arguments,2):r===3&&rn(n)&&(n=[n]),g(e,t,n))}const vF="3.2.32",mF="http://www.w3.org/2000/svg",Ll=typeof document!="undefined"?document:null,Yk=Ll&&Ll.createElement("template"),gF={insert:(e,t,n)=>{t.insertBefore(e,n||null)},remove:e=>{const t=e.parentNode;t&&t.removeChild(e)},createElement:(e,t,n,r)=>{const a=t?Ll.createElementNS(mF,e):Ll.createElement(e,n?{is:n}:void 0);return e==="select"&&r&&r.multiple!=null&&a.setAttribute("multiple",r.multiple),a},createText:e=>Ll.createTextNode(e),createComment:e=>Ll.createComment(e),setText:(e,t)=>{e.nodeValue=t},setElementText:(e,t)=>{e.textContent=t},parentNode:e=>e.parentNode,nextSibling:e=>e.nextSibling,querySelector:e=>Ll.querySelector(e),setScopeId(e,t){e.setAttribute(t,"")},cloneNode(e){const t=e.cloneNode(!0);return"_value"in e&&(t._value=e._value),t},insertStaticContent(e,t,n,r,a,o){const i=n?n.previousSibling:t.lastChild;if(a&&(a===o||a.nextSibling))for(;t.insertBefore(a.cloneNode(!0),n),!(a===o||!(a=a.nextSibling)););else{Yk.innerHTML=r?`<svg>${e}</svg>`:e;const l=Yk.content;if(r){const s=l.firstChild;for(;s.firstChild;)l.appendChild(s.firstChild);l.removeChild(s)}t.insertBefore(l,n)}return[i?i.nextSibling:t.firstChild,n?n.previousSibling:t.lastChild]}};function yF(e,t,n){const r=e._vtc;r&&(t=(t?[t,...r]:[...r]).join(" ")),t==null?e.removeAttribute("class"):n?e.setAttribute("class",t):e.className=t}function bF(e,t,n){const r=e.style,a=wt(n);if(n&&!a){for(const o in n)Zb(r,o,n[o]);if(t&&!wt(t))for(const o in t)n[o]==null&&Zb(r,o,"")}else{const o=r.display;a?t!==n&&(r.cssText=n):t&&e.removeAttribute("style"),"_vod"in e&&(r.display=o)}}const qk=/\s*!important$/;function Zb(e,t,n){if(pt(n))n.forEach(r=>Zb(e,t,r));else if(t.startsWith("--"))e.setProperty(t,n);else{const r=CF(e,t);qk.test(n)?e.setProperty(vl(r),n.replace(qk,""),"important"):e[r]=n}}const Gk=["Webkit","Moz","ms"],Gg={};function CF(e,t){const n=Gg[t];if(n)return n;let r=_a(t);if(r!=="filter"&&r in e)return Gg[t]=r;r=ia(r);for(let a=0;a<Gk.length;a++){const o=Gk[a]+r;if(o in e)return Gg[t]=o}return t}const Xk="http://www.w3.org/1999/xlink";function wF(e,t,n,r,a){if(r&&t.startsWith("xlink:"))n==null?e.removeAttributeNS(Xk,t.slice(6,t.length)):e.setAttributeNS(Xk,t,n);else{const o=S9(t);n==null||o&&!W_(n)?e.removeAttribute(t):e.setAttribute(t,o?"":n)}}function SF(e,t,n,r,a,o,i){if(t==="innerHTML"||t==="textContent"){r&&i(r,a,o),e[t]=n==null?"":n;return}if(t==="value"&&e.tagName!=="PROGRESS"&&!e.tagName.includes("-")){e._value=n;const l=n==null?"":n;(e.value!==l||e.tagName==="OPTION")&&(e.value=l),n==null&&e.removeAttribute(t);return}if(n===""||n==null){const l=typeof e[t];if(l==="boolean"){e[t]=W_(n);return}else if(n==null&&l==="string"){e[t]="",e.removeAttribute(t);return}else if(l==="number"){try{e[t]=0}catch{}e.removeAttribute(t);return}}try{e[t]=n}catch{}}let cv=Date.now,Y6=!1;if(typeof window!="undefined"){cv()>document.createEvent("Event").timeStamp&&(cv=()=>performance.now());const e=navigator.userAgent.match(/firefox\/(\d+)/i);Y6=!!(e&&Number(e[1])<=53)}let Jb=0;const kF=Promise.resolve(),$F=()=>{Jb=0},OF=()=>Jb||(kF.then($F),Jb=cv());function Wi(e,t,n,r){e.addEventListener(t,n,r)}function PF(e,t,n,r){e.removeEventListener(t,n,r)}function TF(e,t,n,r,a=null){const o=e._vei||(e._vei={}),i=o[t];if(r&&i)i.value=r;else{const[l,s]=xF(t);if(r){const c=o[t]=_F(r,a);Wi(e,l,c,s)}else i&&(PF(e,l,i,s),o[t]=void 0)}}const Zk=/(?:Once|Passive|Capture)$/;function xF(e){let t;if(Zk.test(e)){t={};let n;for(;n=e.match(Zk);)e=e.slice(0,e.length-n[0].length),t[n[0].toLowerCase()]=!0}return[vl(e.slice(2)),t]}function _F(e,t){const n=r=>{const a=r.timeStamp||cv();(Y6||a>=n.attached-1)&&Pa(EF(r,n.value),t,5,[r])};return n.value=e,n.attached=OF(),n}function EF(e,t){if(pt(t)){const n=e.stopImmediatePropagation;return e.stopImmediatePropagation=()=>{n.call(e),e._stopped=!0},t.map(r=>a=>!a._stopped&&r&&r(a))}else return t}const Jk=/^on[a-z]/,MF=(e,t,n,r,a=!1,o,i,l,s)=>{t==="class"?yF(e,r,a):t==="style"?bF(e,n,r):fm(t)?j1(t)||TF(e,t,n,r,i):(t[0]==="."?(t=t.slice(1),!0):t[0]==="^"?(t=t.slice(1),!1):IF(e,t,r,a))?SF(e,t,r,o,i,l,s):(t==="true-value"?e._trueValue=r:t==="false-value"&&(e._falseValue=r),wF(e,t,r,a))};function IF(e,t,n,r){return r?!!(t==="innerHTML"||t==="textContent"||t in e&&Jk.test(t)&&Ct(n)):t==="spellcheck"||t==="draggable"||t==="form"||t==="list"&&e.tagName==="INPUT"||t==="type"&&e.tagName==="TEXTAREA"||Jk.test(t)&&wt(n)?!1:t in e}const Di="transition",Rc="animation",Vn=(e,{slots:t})=>qe(k6,G6(e),t);Vn.displayName="Transition";const q6={name:String,type:String,css:{type:Boolean,default:!0},duration:[String,Number,Object],enterFromClass:String,enterActiveClass:String,enterToClass:String,appearFromClass:String,appearActiveClass:String,appearToClass:String,leaveFromClass:String,leaveActiveClass:String,leaveToClass:String},NF=Vn.props=fr({},k6.props,q6),Sl=(e,t=[])=>{pt(e)?e.forEach(n=>n(...t)):e&&e(...t)},Qk=e=>e?pt(e)?e.some(t=>t.length>1):e.length>1:!1;function G6(e){const t={};for(const F in e)F in q6||(t[F]=e[F]);if(e.css===!1)return t;const{name:n="v",type:r,duration:a,enterFromClass:o=`${n}-enter-from`,enterActiveClass:i=`${n}-enter-active`,enterToClass:l=`${n}-enter-to`,appearFromClass:s=o,appearActiveClass:c=i,appearToClass:d=l,leaveFromClass:f=`${n}-leave-from`,leaveActiveClass:p=`${n}-leave-active`,leaveToClass:v=`${n}-leave-to`}=e,m=AF(a),y=m&&m[0],b=m&&m[1],{onBeforeEnter:C,onEnter:S,onEnterCancelled:w,onLeave:k,onLeaveCancelled:$,onBeforeAppear:O=C,onAppear:T=S,onAppearCancelled:_=w}=t,I=(F,N,D)=>{Il(F,N?d:l),Il(F,N?c:i),D&&D()},L=(F,N)=>{Il(F,v),Il(F,p),N&&N()},j=F=>(N,D)=>{const z=F?T:S,B=()=>I(N,F,D);Sl(z,[N,B]),e$(()=>{Il(N,F?s:o),Qo(N,F?d:l),Qk(z)||t$(N,r,y,B)})};return fr(t,{onBeforeEnter(F){Sl(C,[F]),Qo(F,o),Qo(F,i)},onBeforeAppear(F){Sl(O,[F]),Qo(F,s),Qo(F,c)},onEnter:j(!1),onAppear:j(!0),onLeave(F,N){const D=()=>L(F,N);Qo(F,f),Z6(),Qo(F,p),e$(()=>{Il(F,f),Qo(F,v),Qk(k)||t$(F,r,b,D)}),Sl(k,[F,D])},onEnterCancelled(F){I(F,!1),Sl(w,[F])},onAppearCancelled(F){I(F,!0),Sl(_,[F])},onLeaveCancelled(F){L(F),Sl($,[F])}})}function AF(e){if(e==null)return null;if(zt(e))return[Xg(e.enter),Xg(e.leave)];{const t=Xg(e);return[t,t]}}function Xg(e){return av(e)}function Qo(e,t){t.split(/\s+/).forEach(n=>n&&e.classList.add(n)),(e._vtc||(e._vtc=new Set)).add(t)}function Il(e,t){t.split(/\s+/).forEach(r=>r&&e.classList.remove(r));const{_vtc:n}=e;n&&(n.delete(t),n.size||(e._vtc=void 0))}function e$(e){requestAnimationFrame(()=>{requestAnimationFrame(e)})}let DF=0;function t$(e,t,n,r){const a=e._endId=++DF,o=()=>{a===e._endId&&r()};if(n)return setTimeout(o,n);const{type:i,timeout:l,propCount:s}=X6(e,t);if(!i)return r();const c=i+"end";let d=0;const f=()=>{e.removeEventListener(c,p),o()},p=v=>{v.target===e&&++d>=s&&f()};setTimeout(()=>{d<s&&f()},l+1),e.addEventListener(c,p)}function X6(e,t){const n=window.getComputedStyle(e),r=m=>(n[m]||"").split(", "),a=r(Di+"Delay"),o=r(Di+"Duration"),i=n$(a,o),l=r(Rc+"Delay"),s=r(Rc+"Duration"),c=n$(l,s);let d=null,f=0,p=0;t===Di?i>0&&(d=Di,f=i,p=o.length):t===Rc?c>0&&(d=Rc,f=c,p=s.length):(f=Math.max(i,c),d=f>0?i>c?Di:Rc:null,p=d?d===Di?o.length:s.length:0);const v=d===Di&&/\b(transform|all)(,|$)/.test(n[Di+"Property"]);return{type:d,timeout:f,propCount:p,hasTransform:v}}function n$(e,t){for(;e.length<t.length;)e=e.concat(e);return Math.max(...t.map((n,r)=>r$(n)+r$(e[r])))}function r$(e){return Number(e.slice(0,-1).replace(",","."))*1e3}function Z6(){return document.body.offsetHeight}const J6=new WeakMap,Q6=new WeakMap,RF={name:"TransitionGroup",props:fr({},NF,{tag:String,moveClass:String}),setup(e,{slots:t}){const n=$t(),r=S6();let a,o;return ur(()=>{if(!a.length)return;const i=e.moveClass||`${e.name||"v"}-move`;if(!VF(a[0].el,n.vnode.el,i))return;a.forEach(LF),a.forEach(FF);const l=a.filter(BF);Z6(),l.forEach(s=>{const c=s.el,d=c.style;Qo(c,i),d.transform=d.webkitTransform=d.transitionDuration="";const f=c._moveCb=p=>{p&&p.target!==c||(!p||/transform$/.test(p.propertyName))&&(c.removeEventListener("transitionend",f),c._moveCb=null,Il(c,i))};c.addEventListener("transitionend",f)})}),()=>{const i=Gt(e),l=G6(i);let s=i.tag||Fe;a=o,o=t.default?iC(t.default()):[];for(let c=0;c<o.length;c++){const d=o[c];d.key!=null&&Rd(d,Dd(d,l,r,n))}if(a)for(let c=0;c<a.length;c++){const d=a[c];Rd(d,Dd(d,l,r,n)),J6.set(d,d.el.getBoundingClientRect())}return g(s,null,o)}}},e8=RF;function LF(e){const t=e.el;t._moveCb&&t._moveCb(),t._enterCb&&t._enterCb()}function FF(e){Q6.set(e,e.el.getBoundingClientRect())}function BF(e){const t=J6.get(e),n=Q6.get(e),r=t.left-n.left,a=t.top-n.top;if(r||a){const o=e.el.style;return o.transform=o.webkitTransform=`translate(${r}px,${a}px)`,o.transitionDuration="0s",e}}function VF(e,t,n){const r=e.cloneNode();e._vtc&&e._vtc.forEach(i=>{i.split(/\s+/).forEach(l=>l&&r.classList.remove(l))}),n.split(/\s+/).forEach(i=>i&&r.classList.add(i)),r.style.display="none";const a=t.nodeType===1?t:t.parentNode;a.appendChild(r);const{hasTransform:o}=X6(r);return a.removeChild(r),o}const Hu=e=>{const t=e.props["onUpdate:modelValue"];return pt(t)?n=>op(t,n):t};function zF(e){e.target.composing=!0}function a$(e){const t=e.target;t.composing&&(t.composing=!1,HF(t,"input"))}function HF(e,t){const n=document.createEvent("HTMLEvents");n.initEvent(t,!0,!0),e.dispatchEvent(n)}const fC={created(e,{modifiers:{lazy:t,trim:n,number:r}},a){e._assign=Hu(a);const o=r||a.props&&a.props.type==="number";Wi(e,t?"change":"input",i=>{if(i.target.composing)return;let l=e.value;n?l=l.trim():o&&(l=av(l)),e._assign(l)}),n&&Wi(e,"change",()=>{e.value=e.value.trim()}),t||(Wi(e,"compositionstart",zF),Wi(e,"compositionend",a$),Wi(e,"change",a$))},mounted(e,{value:t}){e.value=t==null?"":t},beforeUpdate(e,{value:t,modifiers:{lazy:n,trim:r,number:a}},o){if(e._assign=Hu(o),e.composing||document.activeElement===e&&(n||r&&e.value.trim()===t||(a||e.type==="number")&&av(e.value)===t))return;const i=t==null?"":t;e.value!==i&&(e.value=i)}},dv={deep:!0,created(e,t,n){e._assign=Hu(n),Wi(e,"change",()=>{const r=e._modelValue,a=n8(e),o=e.checked,i=e._assign;if(pt(r)){const l=U_(r,a),s=l!==-1;if(o&&!s)i(r.concat(a));else if(!o&&s){const c=[...r];c.splice(l,1),i(c)}}else if(hm(r)){const l=new Set(r);o?l.add(a):l.delete(a),i(l)}else i(r8(e,o))})},mounted:o$,beforeUpdate(e,t,n){e._assign=Hu(n),o$(e,t,n)}};function o$(e,{value:t,oldValue:n},r){e._modelValue=t,pt(t)?e.checked=U_(t,r.props.value)>-1:hm(t)?e.checked=t.has(r.props.value):t!==n&&(e.checked=Vu(t,r8(e,!0)))}const t8={created(e,{value:t},n){e.checked=Vu(t,n.props.value),e._assign=Hu(n),Wi(e,"change",()=>{e._assign(n8(e))})},beforeUpdate(e,{value:t,oldValue:n},r){e._assign=Hu(r),t!==n&&(e.checked=Vu(t,r.props.value))}};function n8(e){return"_value"in e?e._value:e.value}function r8(e,t){const n=t?"_trueValue":"_falseValue";return n in e?e[n]:t}const jF=["ctrl","shift","alt","meta"],KF={stop:e=>e.stopPropagation(),prevent:e=>e.preventDefault(),self:e=>e.target!==e.currentTarget,ctrl:e=>!e.ctrlKey,shift:e=>!e.shiftKey,alt:e=>!e.altKey,meta:e=>!e.metaKey,left:e=>"button"in e&&e.button!==0,middle:e=>"button"in e&&e.button!==1,right:e=>"button"in e&&e.button!==2,exact:(e,t)=>jF.some(n=>e[`${n}Key`]&&!t.includes(n))},dt=(e,t)=>(n,...r)=>{for(let a=0;a<t.length;a++){const o=KF[t[a]];if(o&&o(n,t))return}return e(n,...r)},WF={esc:"escape",space:" ",up:"arrow-up",left:"arrow-left",right:"arrow-right",down:"arrow-down",delete:"backspace"},It=(e,t)=>n=>{if(!("key"in n))return;const r=vl(n.key);if(t.some(a=>a===r||WF[a]===r))return e(n)},_t={beforeMount(e,{value:t},{transition:n}){e._vod=e.style.display==="none"?"":e.style.display,n&&t?n.beforeEnter(e):Lc(e,t)},mounted(e,{value:t},{transition:n}){n&&t&&n.enter(e)},updated(e,{value:t,oldValue:n},{transition:r}){!t!=!n&&(r?t?(r.beforeEnter(e),Lc(e,!0),r.enter(e)):r.leave(e,()=>{Lc(e,!1)}):Lc(e,t))},beforeUnmount(e,{value:t}){Lc(e,t)}};function Lc(e,t){e.style.display=t?e._vod:"none"}const UF=fr({patchProp:MF},gF);let i$;function a8(){return i$||(i$=GL(UF))}const vs=(...e)=>{a8().render(...e)},$m=(...e)=>{const t=a8().createApp(...e),{mount:n}=t;return t.mount=r=>{const a=YF(r);if(!a)return;const o=t._component;!Ct(o)&&!o.render&&!o.template&&(o.template=a.innerHTML),a.innerHTML="";const i=n(a,!1,a instanceof SVGElement);return a instanceof Element&&(a.removeAttribute("v-cloak"),a.setAttribute("data-v-app","")),i},t};function YF(e){return wt(e)?document.querySelector(e):e}var qF=typeof global=="object"&&global&&global.Object===Object&&global,o8=qF,GF=typeof self=="object"&&self&&self.Object===Object&&self,XF=o8||GF||Function("return this")(),fo=XF,ZF=fo.Symbol,Ea=ZF,i8=Object.prototype,JF=i8.hasOwnProperty,QF=i8.toString,Fc=Ea?Ea.toStringTag:void 0;function eB(e){var t=JF.call(e,Fc),n=e[Fc];try{e[Fc]=void 0;var r=!0}catch{}var a=QF.call(e);return r&&(t?e[Fc]=n:delete e[Fc]),a}var tB=Object.prototype,nB=tB.toString;function rB(e){return nB.call(e)}var aB="[object Null]",oB="[object Undefined]",l$=Ea?Ea.toStringTag:void 0;function ml(e){return e==null?e===void 0?oB:aB:l$&&l$ in Object(e)?eB(e):rB(e)}function ao(e){return e!=null&&typeof e=="object"}var iB="[object Symbol]";function Om(e){return typeof e=="symbol"||ao(e)&&ml(e)==iB}function Pm(e,t){for(var n=-1,r=e==null?0:e.length,a=Array(r);++n<r;)a[n]=t(e[n],n,e);return a}var lB=Array.isArray,Nr=lB,sB=1/0,s$=Ea?Ea.prototype:void 0,u$=s$?s$.toString:void 0;function hC(e){if(typeof e=="string")return e;if(Nr(e))return Pm(e,hC)+"";if(Om(e))return u$?u$.call(e):"";var t=e+"";return t=="0"&&1/e==-sB?"-0":t}var uB=/\s/;function cB(e){for(var t=e.length;t--&&uB.test(e.charAt(t)););return t}var dB=/^\s+/;function fB(e){return e&&e.slice(0,cB(e)+1).replace(dB,"")}function Ar(e){var t=typeof e;return e!=null&&(t=="object"||t=="function")}var c$=0/0,hB=/^[-+]0x[0-9a-f]+$/i,pB=/^0b[01]+$/i,vB=/^0o[0-7]+$/i,mB=parseInt;function Qb(e){if(typeof e=="number")return e;if(Om(e))return c$;if(Ar(e)){var t=typeof e.valueOf=="function"?e.valueOf():e;e=Ar(t)?t+"":t}if(typeof e!="string")return e===0?e:+e;e=fB(e);var n=pB.test(e);return n||vB.test(e)?mB(e.slice(2),n?2:8):hB.test(e)?c$:+e}var d$=1/0,gB=17976931348623157e292;function yB(e){if(!e)return e===0?e:0;if(e=Qb(e),e===d$||e===-d$){var t=e<0?-1:1;return t*gB}return e===e?e:0}function Tm(e){var t=yB(e),n=t%1;return t===t?n?t-n:t:0}function pC(e){return e}var bB="[object AsyncFunction]",CB="[object Function]",wB="[object GeneratorFunction]",SB="[object Proxy]";function vC(e){if(!Ar(e))return!1;var t=ml(e);return t==CB||t==wB||t==bB||t==SB}var kB=fo["__core-js_shared__"],Zg=kB,f$=function(){var e=/[^.]+$/.exec(Zg&&Zg.keys&&Zg.keys.IE_PROTO||"");return e?"Symbol(src)_1."+e:""}();function $B(e){return!!f$&&f$ in e}var OB=Function.prototype,PB=OB.toString;function Ts(e){if(e!=null){try{return PB.call(e)}catch{}try{return e+""}catch{}}return""}var TB=/[\\^$.*+?()[\]{}|]/g,xB=/^\[object .+?Constructor\]$/,_B=Function.prototype,EB=Object.prototype,MB=_B.toString,IB=EB.hasOwnProperty,NB=RegExp("^"+MB.call(IB).replace(TB,"\\$&").replace(/hasOwnProperty|(function).*?(?=\\\()| for .+?(?=\\\])/g,"$1.*?")+"$");function AB(e){if(!Ar(e)||$B(e))return!1;var t=vC(e)?NB:xB;return t.test(Ts(e))}function DB(e,t){return e==null?void 0:e[t]}function xs(e,t){var n=DB(e,t);return AB(n)?n:void 0}var RB=xs(fo,"WeakMap"),e0=RB,h$=Object.create,LB=function(){function e(){}return function(t){if(!Ar(t))return{};if(h$)return h$(t);e.prototype=t;var n=new e;return e.prototype=void 0,n}}(),FB=LB;function BB(e,t,n){switch(n.length){case 0:return e.call(t);case 1:return e.call(t,n[0]);case 2:return e.call(t,n[0],n[1]);case 3:return e.call(t,n[0],n[1],n[2])}return e.apply(t,n)}function VB(){}function l8(e,t){var n=-1,r=e.length;for(t||(t=Array(r));++n<r;)t[n]=e[n];return t}var zB=800,HB=16,jB=Date.now;function KB(e){var t=0,n=0;return function(){var r=jB(),a=HB-(r-n);if(n=r,a>0){if(++t>=zB)return arguments[0]}else t=0;return e.apply(void 0,arguments)}}function WB(e){return function(){return e}}var UB=function(){try{var e=xs(Object,"defineProperty");return e({},"",{}),e}catch{}}(),fv=UB,YB=fv?function(e,t){return fv(e,"toString",{configurable:!0,enumerable:!1,value:WB(t),writable:!0})}:pC,qB=YB,GB=KB(qB),s8=GB;function XB(e,t){for(var n=-1,r=e==null?0:e.length;++n<r&&t(e[n],n,e)!==!1;);return e}function u8(e,t,n,r){for(var a=e.length,o=n+(r?1:-1);r?o--:++o<a;)if(t(e[o],o,e))return o;return-1}function ZB(e){return e!==e}function JB(e,t,n){for(var r=n-1,a=e.length;++r<a;)if(e[r]===t)return r;return-1}function c8(e,t,n){return t===t?JB(e,t,n):u8(e,ZB,n)}function d8(e,t){var n=e==null?0:e.length;return!!n&&c8(e,t,0)>-1}var QB=9007199254740991,eV=/^(?:0|[1-9]\d*)$/;function xm(e,t){var n=typeof e;return t=t==null?QB:t,!!t&&(n=="number"||n!="symbol"&&eV.test(e))&&e>-1&&e%1==0&&e<t}function mC(e,t,n){t=="__proto__"&&fv?fv(e,t,{configurable:!0,enumerable:!0,value:n,writable:!0}):e[t]=n}function Sf(e,t){return e===t||e!==e&&t!==t}var tV=Object.prototype,nV=tV.hasOwnProperty;function gC(e,t,n){var r=e[t];(!(nV.call(e,t)&&Sf(r,n))||n===void 0&&!(t in e))&&mC(e,t,n)}function pc(e,t,n,r){var a=!n;n||(n={});for(var o=-1,i=t.length;++o<i;){var l=t[o],s=r?r(n[l],e[l],l,n,e):void 0;s===void 0&&(s=e[l]),a?mC(n,l,s):gC(n,l,s)}return n}var p$=Math.max;function f8(e,t,n){return t=p$(t===void 0?e.length-1:t,0),function(){for(var r=arguments,a=-1,o=p$(r.length-t,0),i=Array(o);++a<o;)i[a]=r[t+a];a=-1;for(var l=Array(t+1);++a<t;)l[a]=r[a];return l[t]=n(i),BB(e,this,l)}}function yC(e,t){return s8(f8(e,t,pC),e+"")}var rV=9007199254740991;function bC(e){return typeof e=="number"&&e>-1&&e%1==0&&e<=rV}function vc(e){return e!=null&&bC(e.length)&&!vC(e)}function aV(e,t,n){if(!Ar(n))return!1;var r=typeof t;return(r=="number"?vc(n)&&xm(t,n.length):r=="string"&&t in n)?Sf(n[t],e):!1}function oV(e){return yC(function(t,n){var r=-1,a=n.length,o=a>1?n[a-1]:void 0,i=a>2?n[2]:void 0;for(o=e.length>3&&typeof o=="function"?(a--,o):void 0,i&&aV(n[0],n[1],i)&&(o=a<3?void 0:o,a=1),t=Object(t);++r<a;){var l=n[r];l&&e(t,l,r,o)}return t})}var iV=Object.prototype;function CC(e){var t=e&&e.constructor,n=typeof t=="function"&&t.prototype||iV;return e===n}function lV(e,t){for(var n=-1,r=Array(e);++n<e;)r[n]=t(n);return r}var sV="[object Arguments]";function v$(e){return ao(e)&&ml(e)==sV}var h8=Object.prototype,uV=h8.hasOwnProperty,cV=h8.propertyIsEnumerable,dV=v$(function(){return arguments}())?v$:function(e){return ao(e)&&uV.call(e,"callee")&&!cV.call(e,"callee")},Fd=dV;function fV(){return!1}var p8=typeof fa=="object"&&fa&&!fa.nodeType&&fa,m$=p8&&typeof Sr=="object"&&Sr&&!Sr.nodeType&&Sr,hV=m$&&m$.exports===p8,g$=hV?fo.Buffer:void 0,pV=g$?g$.isBuffer:void 0,vV=pV||fV,Bd=vV,mV="[object Arguments]",gV="[object Array]",yV="[object Boolean]",bV="[object Date]",CV="[object Error]",wV="[object Function]",SV="[object Map]",kV="[object Number]",$V="[object Object]",OV="[object RegExp]",PV="[object Set]",TV="[object String]",xV="[object WeakMap]",_V="[object ArrayBuffer]",EV="[object DataView]",MV="[object Float32Array]",IV="[object Float64Array]",NV="[object Int8Array]",AV="[object Int16Array]",DV="[object Int32Array]",RV="[object Uint8Array]",LV="[object Uint8ClampedArray]",FV="[object Uint16Array]",BV="[object Uint32Array]",In={};In[MV]=In[IV]=In[NV]=In[AV]=In[DV]=In[RV]=In[LV]=In[FV]=In[BV]=!0;In[mV]=In[gV]=In[_V]=In[yV]=In[EV]=In[bV]=In[CV]=In[wV]=In[SV]=In[kV]=In[$V]=In[OV]=In[PV]=In[TV]=In[xV]=!1;function VV(e){return ao(e)&&bC(e.length)&&!!In[ml(e)]}function _m(e){return function(t){return e(t)}}var v8=typeof fa=="object"&&fa&&!fa.nodeType&&fa,pd=v8&&typeof Sr=="object"&&Sr&&!Sr.nodeType&&Sr,zV=pd&&pd.exports===v8,Jg=zV&&o8.process,HV=function(){try{var e=pd&&pd.require&&pd.require("util").types;return e||Jg&&Jg.binding&&Jg.binding("util")}catch{}}(),ju=HV,y$=ju&&ju.isTypedArray,jV=y$?_m(y$):VV,wC=jV,KV=Object.prototype,WV=KV.hasOwnProperty;function m8(e,t){var n=Nr(e),r=!n&&Fd(e),a=!n&&!r&&Bd(e),o=!n&&!r&&!a&&wC(e),i=n||r||a||o,l=i?lV(e.length,String):[],s=l.length;for(var c in e)(t||WV.call(e,c))&&!(i&&(c=="length"||a&&(c=="offset"||c=="parent")||o&&(c=="buffer"||c=="byteLength"||c=="byteOffset")||xm(c,s)))&&l.push(c);return l}function g8(e,t){return function(n){return e(t(n))}}var UV=g8(Object.keys,Object),YV=UV,qV=Object.prototype,GV=qV.hasOwnProperty;function XV(e){if(!CC(e))return YV(e);var t=[];for(var n in Object(e))GV.call(e,n)&&n!="constructor"&&t.push(n);return t}function mc(e){return vc(e)?m8(e):XV(e)}function ZV(e){var t=[];if(e!=null)for(var n in Object(e))t.push(n);return t}var JV=Object.prototype,QV=JV.hasOwnProperty;function ez(e){if(!Ar(e))return ZV(e);var t=CC(e),n=[];for(var r in e)r=="constructor"&&(t||!QV.call(e,r))||n.push(r);return n}function kf(e){return vc(e)?m8(e,!0):ez(e)}var tz=/\.|\[(?:[^[\]]*|(["'])(?:(?!\1)[^\\]|\\.)*?\1)\]/,nz=/^\w*$/;function SC(e,t){if(Nr(e))return!1;var n=typeof e;return n=="number"||n=="symbol"||n=="boolean"||e==null||Om(e)?!0:nz.test(e)||!tz.test(e)||t!=null&&e in Object(t)}var rz=xs(Object,"create"),Vd=rz;function az(){this.__data__=Vd?Vd(null):{},this.size=0}function oz(e){var t=this.has(e)&&delete this.__data__[e];return this.size-=t?1:0,t}var iz="__lodash_hash_undefined__",lz=Object.prototype,sz=lz.hasOwnProperty;function uz(e){var t=this.__data__;if(Vd){var n=t[e];return n===iz?void 0:n}return sz.call(t,e)?t[e]:void 0}var cz=Object.prototype,dz=cz.hasOwnProperty;function fz(e){var t=this.__data__;return Vd?t[e]!==void 0:dz.call(t,e)}var hz="__lodash_hash_undefined__";function pz(e,t){var n=this.__data__;return this.size+=this.has(e)?0:1,n[e]=Vd&&t===void 0?hz:t,this}function ms(e){var t=-1,n=e==null?0:e.length;for(this.clear();++t<n;){var r=e[t];this.set(r[0],r[1])}}ms.prototype.clear=az;ms.prototype.delete=oz;ms.prototype.get=uz;ms.prototype.has=fz;ms.prototype.set=pz;function vz(){this.__data__=[],this.size=0}function Em(e,t){for(var n=e.length;n--;)if(Sf(e[n][0],t))return n;return-1}var mz=Array.prototype,gz=mz.splice;function yz(e){var t=this.__data__,n=Em(t,e);if(n<0)return!1;var r=t.length-1;return n==r?t.pop():gz.call(t,n,1),--this.size,!0}function bz(e){var t=this.__data__,n=Em(t,e);return n<0?void 0:t[n][1]}function Cz(e){return Em(this.__data__,e)>-1}function wz(e,t){var n=this.__data__,r=Em(n,e);return r<0?(++this.size,n.push([e,t])):n[r][1]=t,this}function $i(e){var t=-1,n=e==null?0:e.length;for(this.clear();++t<n;){var r=e[t];this.set(r[0],r[1])}}$i.prototype.clear=vz;$i.prototype.delete=yz;$i.prototype.get=bz;$i.prototype.has=Cz;$i.prototype.set=wz;var Sz=xs(fo,"Map"),zd=Sz;function kz(){this.size=0,this.__data__={hash:new ms,map:new(zd||$i),string:new ms}}function $z(e){var t=typeof e;return t=="string"||t=="number"||t=="symbol"||t=="boolean"?e!=="__proto__":e===null}function Mm(e,t){var n=e.__data__;return $z(t)?n[typeof t=="string"?"string":"hash"]:n.map}function Oz(e){var t=Mm(this,e).delete(e);return this.size-=t?1:0,t}function Pz(e){return Mm(this,e).get(e)}function Tz(e){return Mm(this,e).has(e)}function xz(e,t){var n=Mm(this,e),r=n.size;return n.set(e,t),this.size+=n.size==r?0:1,this}function Oi(e){var t=-1,n=e==null?0:e.length;for(this.clear();++t<n;){var r=e[t];this.set(r[0],r[1])}}Oi.prototype.clear=kz;Oi.prototype.delete=Oz;Oi.prototype.get=Pz;Oi.prototype.has=Tz;Oi.prototype.set=xz;var _z="Expected a function";function Im(e,t){if(typeof e!="function"||t!=null&&typeof t!="function")throw new TypeError(_z);var n=function(){var r=arguments,a=t?t.apply(this,r):r[0],o=n.cache;if(o.has(a))return o.get(a);var i=e.apply(this,r);return n.cache=o.set(a,i)||o,i};return n.cache=new(Im.Cache||Oi),n}Im.Cache=Oi;var Ez=500;function Mz(e){var t=Im(e,function(r){return n.size===Ez&&n.clear(),r}),n=t.cache;return t}var Iz=/[^.[\]]+|\[(?:(-?\d+(?:\.\d+)?)|(["'])((?:(?!\2)[^\\]|\\.)*?)\2)\]|(?=(?:\.|\[\])(?:\.|\[\]|$))/g,Nz=/\\(\\)?/g,Az=Mz(function(e){var t=[];return e.charCodeAt(0)===46&&t.push(""),e.replace(Iz,function(n,r,a,o){t.push(a?o.replace(Nz,"$1"):r||n)}),t}),Dz=Az;function kC(e){return e==null?"":hC(e)}function gc(e,t){return Nr(e)?e:SC(e,t)?[e]:Dz(kC(e))}var Rz=1/0;function yc(e){if(typeof e=="string"||Om(e))return e;var t=e+"";return t=="0"&&1/e==-Rz?"-0":t}function Nm(e,t){t=gc(t,e);for(var n=0,r=t.length;e!=null&&n<r;)e=e[yc(t[n++])];return n&&n==r?e:void 0}function vn(e,t,n){var r=e==null?void 0:Nm(e,t);return r===void 0?n:r}function $C(e,t){for(var n=-1,r=t.length,a=e.length;++n<r;)e[a+n]=t[n];return e}var b$=Ea?Ea.isConcatSpreadable:void 0;function Lz(e){return Nr(e)||Fd(e)||!!(b$&&e&&e[b$])}function Am(e,t,n,r,a){var o=-1,i=e.length;for(n||(n=Lz),a||(a=[]);++o<i;){var l=e[o];t>0&&n(l)?t>1?Am(l,t-1,n,r,a):$C(a,l):r||(a[a.length]=l)}return a}function Fz(e){var t=e==null?0:e.length;return t?Am(e,1):[]}function y8(e){return s8(f8(e,void 0,Fz),e+"")}var Bz=g8(Object.getPrototypeOf,Object),OC=Bz,Vz="[object Object]",zz=Function.prototype,Hz=Object.prototype,b8=zz.toString,jz=Hz.hasOwnProperty,Kz=b8.call(Object);function bc(e){if(!ao(e)||ml(e)!=Vz)return!1;var t=OC(e);if(t===null)return!0;var n=jz.call(t,"constructor")&&t.constructor;return typeof n=="function"&&n instanceof n&&b8.call(n)==Kz}function C8(e,t,n){var r=-1,a=e.length;t<0&&(t=-t>a?0:a+t),n=n>a?a:n,n<0&&(n+=a),a=t>n?0:n-t>>>0,t>>>=0;for(var o=Array(a);++r<a;)o[r]=e[r+t];return o}function Wz(e,t,n){var r=e.length;return n=n===void 0?r:n,!t&&n>=r?e:C8(e,t,n)}var Uz="\\ud800-\\udfff",Yz="\\u0300-\\u036f",qz="\\ufe20-\\ufe2f",Gz="\\u20d0-\\u20ff",Xz=Yz+qz+Gz,Zz="\\ufe0e\\ufe0f",Jz="\\u200d",Qz=RegExp("["+Jz+Uz+Xz+Zz+"]");function PC(e){return Qz.test(e)}function eH(e){return e.split("")}var w8="\\ud800-\\udfff",tH="\\u0300-\\u036f",nH="\\ufe20-\\ufe2f",rH="\\u20d0-\\u20ff",aH=tH+nH+rH,oH="\\ufe0e\\ufe0f",iH="["+w8+"]",t0="["+aH+"]",n0="\\ud83c[\\udffb-\\udfff]",lH="(?:"+t0+"|"+n0+")",S8="[^"+w8+"]",k8="(?:\\ud83c[\\udde6-\\uddff]){2}",$8="[\\ud800-\\udbff][\\udc00-\\udfff]",sH="\\u200d",O8=lH+"?",P8="["+oH+"]?",uH="(?:"+sH+"(?:"+[S8,k8,$8].join("|")+")"+P8+O8+")*",cH=P8+O8+uH,dH="(?:"+[S8+t0+"?",t0,k8,$8,iH].join("|")+")",fH=RegExp(n0+"(?="+n0+")|"+dH+cH,"g");function hH(e){return e.match(fH)||[]}function pH(e){return PC(e)?hH(e):eH(e)}function Hd(){if(!arguments.length)return[];var e=arguments[0];return Nr(e)?e:[e]}function vH(){this.__data__=new $i,this.size=0}function mH(e){var t=this.__data__,n=t.delete(e);return this.size=t.size,n}function gH(e){return this.__data__.get(e)}function yH(e){return this.__data__.has(e)}var bH=200;function CH(e,t){var n=this.__data__;if(n instanceof $i){var r=n.__data__;if(!zd||r.length<bH-1)return r.push([e,t]),this.size=++n.size,this;n=this.__data__=new Oi(r)}return n.set(e,t),this.size=n.size,this}function Qa(e){var t=this.__data__=new $i(e);this.size=t.size}Qa.prototype.clear=vH;Qa.prototype.delete=mH;Qa.prototype.get=gH;Qa.prototype.has=yH;Qa.prototype.set=CH;function wH(e,t){return e&&pc(t,mc(t),e)}function SH(e,t){return e&&pc(t,kf(t),e)}var T8=typeof fa=="object"&&fa&&!fa.nodeType&&fa,C$=T8&&typeof Sr=="object"&&Sr&&!Sr.nodeType&&Sr,kH=C$&&C$.exports===T8,w$=kH?fo.Buffer:void 0,S$=w$?w$.allocUnsafe:void 0;function x8(e,t){if(t)return e.slice();var n=e.length,r=S$?S$(n):new e.constructor(n);return e.copy(r),r}function $H(e,t){for(var n=-1,r=e==null?0:e.length,a=0,o=[];++n<r;){var i=e[n];t(i,n,e)&&(o[a++]=i)}return o}function _8(){return[]}var OH=Object.prototype,PH=OH.propertyIsEnumerable,k$=Object.getOwnPropertySymbols,TH=k$?function(e){return e==null?[]:(e=Object(e),$H(k$(e),function(t){return PH.call(e,t)}))}:_8,TC=TH;function xH(e,t){return pc(e,TC(e),t)}var _H=Object.getOwnPropertySymbols,EH=_H?function(e){for(var t=[];e;)$C(t,TC(e)),e=OC(e);return t}:_8,E8=EH;function MH(e,t){return pc(e,E8(e),t)}function M8(e,t,n){var r=t(e);return Nr(e)?r:$C(r,n(e))}function r0(e){return M8(e,mc,TC)}function I8(e){return M8(e,kf,E8)}var IH=xs(fo,"DataView"),a0=IH,NH=xs(fo,"Promise"),o0=NH,AH=xs(fo,"Set"),Ou=AH,$$="[object Map]",DH="[object Object]",O$="[object Promise]",P$="[object Set]",T$="[object WeakMap]",x$="[object DataView]",RH=Ts(a0),LH=Ts(zd),FH=Ts(o0),BH=Ts(Ou),VH=Ts(e0),Nl=ml;(a0&&Nl(new a0(new ArrayBuffer(1)))!=x$||zd&&Nl(new zd)!=$$||o0&&Nl(o0.resolve())!=O$||Ou&&Nl(new Ou)!=P$||e0&&Nl(new e0)!=T$)&&(Nl=function(e){var t=ml(e),n=t==DH?e.constructor:void 0,r=n?Ts(n):"";if(r)switch(r){case RH:return x$;case LH:return $$;case FH:return O$;case BH:return P$;case VH:return T$}return t});var jd=Nl,zH=Object.prototype,HH=zH.hasOwnProperty;function jH(e){var t=e.length,n=new e.constructor(t);return t&&typeof e[0]=="string"&&HH.call(e,"index")&&(n.index=e.index,n.input=e.input),n}var KH=fo.Uint8Array,hv=KH;function xC(e){var t=new e.constructor(e.byteLength);return new hv(t).set(new hv(e)),t}function WH(e,t){var n=t?xC(e.buffer):e.buffer;return new e.constructor(n,e.byteOffset,e.byteLength)}var UH=/\w*$/;function YH(e){var t=new e.constructor(e.source,UH.exec(e));return t.lastIndex=e.lastIndex,t}var _$=Ea?Ea.prototype:void 0,E$=_$?_$.valueOf:void 0;function qH(e){return E$?Object(E$.call(e)):{}}function N8(e,t){var n=t?xC(e.buffer):e.buffer;return new e.constructor(n,e.byteOffset,e.length)}var GH="[object Boolean]",XH="[object Date]",ZH="[object Map]",JH="[object Number]",QH="[object RegExp]",ej="[object Set]",tj="[object String]",nj="[object Symbol]",rj="[object ArrayBuffer]",aj="[object DataView]",oj="[object Float32Array]",ij="[object Float64Array]",lj="[object Int8Array]",sj="[object Int16Array]",uj="[object Int32Array]",cj="[object Uint8Array]",dj="[object Uint8ClampedArray]",fj="[object Uint16Array]",hj="[object Uint32Array]";function pj(e,t,n){var r=e.constructor;switch(t){case rj:return xC(e);case GH:case XH:return new r(+e);case aj:return WH(e,n);case oj:case ij:case lj:case sj:case uj:case cj:case dj:case fj:case hj:return N8(e,n);case ZH:return new r;case JH:case tj:return new r(e);case QH:return YH(e);case ej:return new r;case nj:return qH(e)}}function A8(e){return typeof e.constructor=="function"&&!CC(e)?FB(OC(e)):{}}var vj="[object Map]";function mj(e){return ao(e)&&jd(e)==vj}var M$=ju&&ju.isMap,gj=M$?_m(M$):mj,yj=gj,bj="[object Set]";function Cj(e){return ao(e)&&jd(e)==bj}var I$=ju&&ju.isSet,wj=I$?_m(I$):Cj,Sj=wj,kj=1,$j=2,Oj=4,D8="[object Arguments]",Pj="[object Array]",Tj="[object Boolean]",xj="[object Date]",_j="[object Error]",R8="[object Function]",Ej="[object GeneratorFunction]",Mj="[object Map]",Ij="[object Number]",L8="[object Object]",Nj="[object RegExp]",Aj="[object Set]",Dj="[object String]",Rj="[object Symbol]",Lj="[object WeakMap]",Fj="[object ArrayBuffer]",Bj="[object DataView]",Vj="[object Float32Array]",zj="[object Float64Array]",Hj="[object Int8Array]",jj="[object Int16Array]",Kj="[object Int32Array]",Wj="[object Uint8Array]",Uj="[object Uint8ClampedArray]",Yj="[object Uint16Array]",qj="[object Uint32Array]",Pn={};Pn[D8]=Pn[Pj]=Pn[Fj]=Pn[Bj]=Pn[Tj]=Pn[xj]=Pn[Vj]=Pn[zj]=Pn[Hj]=Pn[jj]=Pn[Kj]=Pn[Mj]=Pn[Ij]=Pn[L8]=Pn[Nj]=Pn[Aj]=Pn[Dj]=Pn[Rj]=Pn[Wj]=Pn[Uj]=Pn[Yj]=Pn[qj]=!0;Pn[_j]=Pn[R8]=Pn[Lj]=!1;function Pu(e,t,n,r,a,o){var i,l=t&kj,s=t&$j,c=t&Oj;if(n&&(i=a?n(e,r,a,o):n(e)),i!==void 0)return i;if(!Ar(e))return e;var d=Nr(e);if(d){if(i=jH(e),!l)return l8(e,i)}else{var f=jd(e),p=f==R8||f==Ej;if(Bd(e))return x8(e,l);if(f==L8||f==D8||p&&!a){if(i=s||p?{}:A8(e),!l)return s?MH(e,SH(i,e)):xH(e,wH(i,e))}else{if(!Pn[f])return a?e:{};i=pj(e,f,l)}}o||(o=new Qa);var v=o.get(e);if(v)return v;o.set(e,i),Sj(e)?e.forEach(function(b){i.add(Pu(b,t,n,b,e,o))}):yj(e)&&e.forEach(function(b,C){i.set(C,Pu(b,t,n,C,e,o))});var m=c?s?I8:r0:s?kf:mc,y=d?void 0:m(e);return XB(y||e,function(b,C){y&&(C=b,b=e[C]),gC(i,C,Pu(b,t,n,C,e,o))}),i}var Gj=4;function Xj(e){return Pu(e,Gj)}var Zj=1,Jj=4;function lp(e){return Pu(e,Zj|Jj)}var Qj="__lodash_hash_undefined__";function eK(e){return this.__data__.set(e,Qj),this}function tK(e){return this.__data__.has(e)}function Ku(e){var t=-1,n=e==null?0:e.length;for(this.__data__=new Oi;++t<n;)this.add(e[t])}Ku.prototype.add=Ku.prototype.push=eK;Ku.prototype.has=tK;function nK(e,t){for(var n=-1,r=e==null?0:e.length;++n<r;)if(t(e[n],n,e))return!0;return!1}function pv(e,t){return e.has(t)}var rK=1,aK=2;function F8(e,t,n,r,a,o){var i=n&rK,l=e.length,s=t.length;if(l!=s&&!(i&&s>l))return!1;var c=o.get(e),d=o.get(t);if(c&&d)return c==t&&d==e;var f=-1,p=!0,v=n&aK?new Ku:void 0;for(o.set(e,t),o.set(t,e);++f<l;){var m=e[f],y=t[f];if(r)var b=i?r(y,m,f,t,e,o):r(m,y,f,e,t,o);if(b!==void 0){if(b)continue;p=!1;break}if(v){if(!nK(t,function(C,S){if(!pv(v,S)&&(m===C||a(m,C,n,r,o)))return v.push(S)})){p=!1;break}}else if(!(m===y||a(m,y,n,r,o))){p=!1;break}}return o.delete(e),o.delete(t),p}function oK(e){var t=-1,n=Array(e.size);return e.forEach(function(r,a){n[++t]=[a,r]}),n}function _C(e){var t=-1,n=Array(e.size);return e.forEach(function(r){n[++t]=r}),n}var iK=1,lK=2,sK="[object Boolean]",uK="[object Date]",cK="[object Error]",dK="[object Map]",fK="[object Number]",hK="[object RegExp]",pK="[object Set]",vK="[object String]",mK="[object Symbol]",gK="[object ArrayBuffer]",yK="[object DataView]",N$=Ea?Ea.prototype:void 0,Qg=N$?N$.valueOf:void 0;function bK(e,t,n,r,a,o,i){switch(n){case yK:if(e.byteLength!=t.byteLength||e.byteOffset!=t.byteOffset)return!1;e=e.buffer,t=t.buffer;case gK:return!(e.byteLength!=t.byteLength||!o(new hv(e),new hv(t)));case sK:case uK:case fK:return Sf(+e,+t);case cK:return e.name==t.name&&e.message==t.message;case hK:case vK:return e==t+"";case dK:var l=oK;case pK:var s=r&iK;if(l||(l=_C),e.size!=t.size&&!s)return!1;var c=i.get(e);if(c)return c==t;r|=lK,i.set(e,t);var d=F8(l(e),l(t),r,a,o,i);return i.delete(e),d;case mK:if(Qg)return Qg.call(e)==Qg.call(t)}return!1}var CK=1,wK=Object.prototype,SK=wK.hasOwnProperty;function kK(e,t,n,r,a,o){var i=n&CK,l=r0(e),s=l.length,c=r0(t),d=c.length;if(s!=d&&!i)return!1;for(var f=s;f--;){var p=l[f];if(!(i?p in t:SK.call(t,p)))return!1}var v=o.get(e),m=o.get(t);if(v&&m)return v==t&&m==e;var y=!0;o.set(e,t),o.set(t,e);for(var b=i;++f<s;){p=l[f];var C=e[p],S=t[p];if(r)var w=i?r(S,C,p,t,e,o):r(C,S,p,e,t,o);if(!(w===void 0?C===S||a(C,S,n,r,o):w)){y=!1;break}b||(b=p=="constructor")}if(y&&!b){var k=e.constructor,$=t.constructor;k!=$&&"constructor"in e&&"constructor"in t&&!(typeof k=="function"&&k instanceof k&&typeof $=="function"&&$ instanceof $)&&(y=!1)}return o.delete(e),o.delete(t),y}var $K=1,A$="[object Arguments]",D$="[object Array]",rh="[object Object]",OK=Object.prototype,R$=OK.hasOwnProperty;function PK(e,t,n,r,a,o){var i=Nr(e),l=Nr(t),s=i?D$:jd(e),c=l?D$:jd(t);s=s==A$?rh:s,c=c==A$?rh:c;var d=s==rh,f=c==rh,p=s==c;if(p&&Bd(e)){if(!Bd(t))return!1;i=!0,d=!1}if(p&&!d)return o||(o=new Qa),i||wC(e)?F8(e,t,n,r,a,o):bK(e,t,s,n,r,a,o);if(!(n&$K)){var v=d&&R$.call(e,"__wrapped__"),m=f&&R$.call(t,"__wrapped__");if(v||m){var y=v?e.value():e,b=m?t.value():t;return o||(o=new Qa),a(y,b,n,r,o)}}return p?(o||(o=new Qa),kK(e,t,n,r,a,o)):!1}function Dm(e,t,n,r,a){return e===t?!0:e==null||t==null||!ao(e)&&!ao(t)?e!==e&&t!==t:PK(e,t,n,r,Dm,a)}var TK=1,xK=2;function _K(e,t,n,r){var a=n.length,o=a,i=!r;if(e==null)return!o;for(e=Object(e);a--;){var l=n[a];if(i&&l[2]?l[1]!==e[l[0]]:!(l[0]in e))return!1}for(;++a<o;){l=n[a];var s=l[0],c=e[s],d=l[1];if(i&&l[2]){if(c===void 0&&!(s in e))return!1}else{var f=new Qa;if(r)var p=r(c,d,s,e,t,f);if(!(p===void 0?Dm(d,c,TK|xK,r,f):p))return!1}}return!0}function B8(e){return e===e&&!Ar(e)}function EK(e){for(var t=mc(e),n=t.length;n--;){var r=t[n],a=e[r];t[n]=[r,a,B8(a)]}return t}function V8(e,t){return function(n){return n==null?!1:n[e]===t&&(t!==void 0||e in Object(n))}}function MK(e){var t=EK(e);return t.length==1&&t[0][2]?V8(t[0][0],t[0][1]):function(n){return n===e||_K(n,e,t)}}function IK(e,t){return e!=null&&t in Object(e)}function NK(e,t,n){t=gc(t,e);for(var r=-1,a=t.length,o=!1;++r<a;){var i=yc(t[r]);if(!(o=e!=null&&n(e,i)))break;e=e[i]}return o||++r!=a?o:(a=e==null?0:e.length,!!a&&bC(a)&&xm(i,a)&&(Nr(e)||Fd(e)))}function z8(e,t){return e!=null&&NK(e,t,IK)}var AK=1,DK=2;function RK(e,t){return SC(e)&&B8(t)?V8(yc(e),t):function(n){var r=vn(n,e);return r===void 0&&r===t?z8(n,e):Dm(t,r,AK|DK)}}function H8(e){return function(t){return t==null?void 0:t[e]}}function LK(e){return function(t){return Nm(t,e)}}function FK(e){return SC(e)?H8(yc(e)):LK(e)}function Rm(e){return typeof e=="function"?e:e==null?pC:typeof e=="object"?Nr(e)?RK(e[0],e[1]):MK(e):FK(e)}function BK(e,t,n,r){for(var a=-1,o=e==null?0:e.length;++a<o;){var i=e[a];t(r,i,n(i),e)}return r}function VK(e){return function(t,n,r){for(var a=-1,o=Object(t),i=r(t),l=i.length;l--;){var s=i[e?l:++a];if(n(o[s],s,o)===!1)break}return t}}var zK=VK(),j8=zK;function HK(e,t){return e&&j8(e,t,mc)}function jK(e,t){return function(n,r){if(n==null)return n;if(!vc(n))return e(n,r);for(var a=n.length,o=t?a:-1,i=Object(n);(t?o--:++o<a)&&r(i[o],o,i)!==!1;);return n}}var KK=jK(HK),WK=KK;function UK(e,t,n,r){return WK(e,function(a,o,i){t(r,a,n(a),i)}),r}function YK(e,t){return function(n,r){var a=Nr(n)?BK:UK,o=t?t():{};return a(n,e,Rm(r),o)}}var qK=function(){return fo.Date.now()},ey=qK,GK="Expected a function",XK=Math.max,ZK=Math.min;function Yn(e,t,n){var r,a,o,i,l,s,c=0,d=!1,f=!1,p=!0;if(typeof e!="function")throw new TypeError(GK);t=Qb(t)||0,Ar(n)&&(d=!!n.leading,f="maxWait"in n,o=f?XK(Qb(n.maxWait)||0,t):o,p="trailing"in n?!!n.trailing:p);function v(O){var T=r,_=a;return r=a=void 0,c=O,i=e.apply(_,T),i}function m(O){return c=O,l=setTimeout(C,t),d?v(O):i}function y(O){var T=O-s,_=O-c,I=t-T;return f?ZK(I,o-_):I}function b(O){var T=O-s,_=O-c;return s===void 0||T>=t||T<0||f&&_>=o}function C(){var O=ey();if(b(O))return S(O);l=setTimeout(C,y(O))}function S(O){return l=void 0,p&&r?v(O):(r=a=void 0,i)}function w(){l!==void 0&&clearTimeout(l),c=0,r=s=a=l=void 0}function k(){return l===void 0?i:S(ey())}function $(){var O=ey(),T=b(O);if(r=arguments,a=this,s=O,T){if(l===void 0)return m(s);if(f)return clearTimeout(l),l=setTimeout(C,t),v(s)}return l===void 0&&(l=setTimeout(C,t)),i}return $.cancel=w,$.flush=k,$}function i0(e,t,n){(n!==void 0&&!Sf(e[t],n)||n===void 0&&!(t in e))&&mC(e,t,n)}function EC(e){return ao(e)&&vc(e)}function l0(e,t){if(!(t==="constructor"&&typeof e[t]=="function")&&t!="__proto__")return e[t]}function JK(e){return pc(e,kf(e))}function QK(e,t,n,r,a,o,i){var l=l0(e,n),s=l0(t,n),c=i.get(s);if(c){i0(e,n,c);return}var d=o?o(l,s,n+"",e,t,i):void 0,f=d===void 0;if(f){var p=Nr(s),v=!p&&Bd(s),m=!p&&!v&&wC(s);d=s,p||v||m?Nr(l)?d=l:EC(l)?d=l8(l):v?(f=!1,d=x8(s,!0)):m?(f=!1,d=N8(s,!0)):d=[]:bc(s)||Fd(s)?(d=l,Fd(l)?d=JK(l):(!Ar(l)||vC(l))&&(d=A8(s))):f=!1}f&&(i.set(s,d),a(d,s,r,o,i),i.delete(s)),i0(e,n,d)}function K8(e,t,n,r,a){e!==t&&j8(t,function(o,i){if(a||(a=new Qa),Ar(o))QK(e,t,i,n,K8,r,a);else{var l=r?r(l0(e,i),o,i+"",e,t,a):void 0;l===void 0&&(l=o),i0(e,i,l)}},kf)}function W8(e,t,n){for(var r=-1,a=e==null?0:e.length;++r<a;)if(n(t,e[r]))return!0;return!1}function eW(e){var t=e==null?0:e.length;return t?e[t-1]:void 0}function tW(e){return function(t,n,r){var a=Object(t);if(!vc(t)){var o=Rm(n);t=mc(t),n=function(l){return o(a[l],l,a)}}var i=e(t,n,r);return i>-1?a[o?t[i]:i]:void 0}}var nW=Math.max;function U8(e,t,n){var r=e==null?0:e.length;if(!r)return-1;var a=n==null?0:Tm(n);return a<0&&(a=nW(r+a,0)),u8(e,Rm(t),a)}var rW=tW(U8),aW=rW,oW=1/0;function iW(e){var t=e==null?0:e.length;return t?Am(e,oW):[]}function Y8(e){for(var t=-1,n=e==null?0:e.length,r={};++t<n;){var a=e[t];r[a[0]]=a[1]}return r}var lW=Math.max;function ty(e,t,n){var r=e==null?0:e.length;if(!r)return-1;var a=n==null?0:Tm(n);return a<0&&(a=lW(r+a,0)),c8(e,t,a)}var sW=Math.min;function uW(e,t,n){for(var r=n?W8:d8,a=e[0].length,o=e.length,i=o,l=Array(o),s=1/0,c=[];i--;){var d=e[i];i&&t&&(d=Pm(d,_m(t))),s=sW(d.length,s),l[i]=!n&&(t||a>=120&&d.length>=120)?new Ku(i&&d):void 0}d=e[0];var f=-1,p=l[0];e:for(;++f<a&&c.length<s;){var v=d[f],m=t?t(v):v;if(v=n||v!==0?v:0,!(p?pv(p,m):r(c,m,n))){for(i=o;--i;){var y=l[i];if(!(y?pv(y,m):r(e[i],m,n)))continue e}p&&p.push(m),c.push(v)}}return c}function cW(e){return EC(e)?e:[]}var dW=yC(function(e){var t=Pm(e,cW);return t.length&&t[0]===e[0]?uW(t):[]}),fW=dW;function hW(e,t){return t.length<2?e:Nm(e,C8(t,0,-1))}function er(e,t){return Dm(e,t)}var pW="[object Number]";function vW(e){return typeof e=="number"||ao(e)&&ml(e)==pW}function ul(e){return e==null}var mW=oV(function(e,t,n){K8(e,t,n)}),L$=mW;function gW(e,t){return t=gc(t,e),e=hW(e,t),e==null||delete e[yc(eW(t))]}function yW(e){return bc(e)?void 0:e}var bW=1,CW=2,wW=4,SW=y8(function(e,t){var n={};if(e==null)return n;var r=!1;t=Pm(t,function(o){return o=gc(o,e),r||(r=o.length>1),o}),pc(e,I8(e),n),r&&(n=Pu(n,bW|CW|wW,yW));for(var a=t.length;a--;)gW(n,t[a]);return n}),MC=SW;function q8(e,t,n,r){if(!Ar(e))return e;t=gc(t,e);for(var a=-1,o=t.length,i=o-1,l=e;l!=null&&++a<o;){var s=yc(t[a]),c=n;if(s==="__proto__"||s==="constructor"||s==="prototype")return e;if(a!=i){var d=l[s];c=r?r(d,s,l):void 0,c===void 0&&(c=Ar(d)?d:xm(t[a+1])?[]:{})}gC(l,s,c),l=l[s]}return e}function kW(e,t,n){for(var r=-1,a=t.length,o={};++r<a;){var i=t[r],l=Nm(e,i);n(l,i)&&q8(o,gc(i,e),l)}return o}var $W=9007199254740991,OW=Math.floor;function F$(e,t){var n="";if(!e||t<1||t>$W)return n;do t%2&&(n+=e),t=OW(t/2),t&&(e+=e);while(t);return n}var PW=H8("length"),TW=PW,G8="\\ud800-\\udfff",xW="\\u0300-\\u036f",_W="\\ufe20-\\ufe2f",EW="\\u20d0-\\u20ff",MW=xW+_W+EW,IW="\\ufe0e\\ufe0f",NW="["+G8+"]",s0="["+MW+"]",u0="\\ud83c[\\udffb-\\udfff]",AW="(?:"+s0+"|"+u0+")",X8="[^"+G8+"]",Z8="(?:\\ud83c[\\udde6-\\uddff]){2}",J8="[\\ud800-\\udbff][\\udc00-\\udfff]",DW="\\u200d",Q8=AW+"?",eE="["+IW+"]?",RW="(?:"+DW+"(?:"+[X8,Z8,J8].join("|")+")"+eE+Q8+")*",LW=eE+Q8+RW,FW="(?:"+[X8+s0+"?",s0,Z8,J8,NW].join("|")+")",B$=RegExp(u0+"(?="+u0+")|"+FW+LW,"g");function BW(e){for(var t=B$.lastIndex=0;B$.test(e);)++t;return t}function IC(e){return PC(e)?BW(e):TW(e)}var VW=Math.ceil;function tE(e,t){t=t===void 0?" ":hC(t);var n=t.length;if(n<2)return n?F$(t,e):t;var r=F$(t,VW(e/IC(t)));return PC(t)?Wz(pH(r),0,e).join(""):r.slice(0,e)}function zW(e,t,n){e=kC(e),t=Tm(t);var r=t?IC(e):0;return t&&r<t?e+tE(t-r,n):e}function HW(e,t,n){e=kC(e),t=Tm(t);var r=t?IC(e):0;return t&&r<t?tE(t-r,n)+e:e}var jW=YK(function(e,t,n){e[n?0:1].push(t)},function(){return[[],[]]}),KW=jW;function WW(e,t){return kW(e,t,function(n,r){return z8(e,r)})}var UW=y8(function(e,t){return e==null?{}:WW(e,t)}),Yi=UW;function YW(e,t,n){return e==null?e:q8(e,t,n)}var qW="Expected a function";function Qi(e,t,n){var r=!0,a=!0;if(typeof e!="function")throw new TypeError(qW);return Ar(n)&&(r="leading"in n?!!n.leading:r,a="trailing"in n?!!n.trailing:a),Yn(e,t,{leading:r,maxWait:t,trailing:a})}var GW=1/0,XW=Ou&&1/_C(new Ou([,-0]))[1]==GW?function(e){return new Ou(e)}:VB,ZW=XW,JW=200;function NC(e,t,n){var r=-1,a=d8,o=e.length,i=!0,l=[],s=l;if(n)i=!1,a=W8;else if(o>=JW){var c=t?null:ZW(e);if(c)return _C(c);i=!1,a=pv,s=new Ku}else s=t?[]:l;e:for(;++r<o;){var d=e[r],f=t?t(d):d;if(d=n||d!==0?d:0,i&&f===f){for(var p=s.length;p--;)if(s[p]===f)continue e;t&&s.push(f),l.push(d)}else a(s,f,n)||(s!==l&&s.push(f),l.push(d))}return l}var QW=yC(function(e){return NC(Am(e,1,EC,!0))}),ny=QW;function eU(e){return e&&e.length?NC(e):[]}function tU(e,t){return e&&e.length?NC(e,Rm(t)):[]}const nU='a[href],button:not([disabled]),button:not([hidden]),:not([tabindex="-1"]),input:not([disabled]),input:not([type="hidden"]),select:not([disabled]),textarea:not([disabled])',rU=e=>getComputedStyle(e).position==="fixed"?!1:e.offsetParent!==null,V$=e=>Array.from(e.querySelectorAll(nU)).filter(t=>aU(t)&&rU(t)),aU=e=>{if(e.tabIndex>0||e.tabIndex===0&&e.getAttribute("tabIndex")!==null)return!0;if(e.disabled)return!1;switch(e.nodeName){case"A":return!!e.href&&e.rel!=="ignore";case"INPUT":return!(e.type==="hidden"||e.type==="file");case"BUTTON":case"SELECT":case"TEXTAREA":return!0;default:return!1}},sp=function(e,t,...n){let r;t.includes("mouse")||t.includes("click")?r="MouseEvents":t.includes("key")?r="KeyboardEvent":r="HTMLEvents";const a=document.createEvent(r);return a.initEvent(t,...n),e.dispatchEvent(a),e},nE=e=>!e.getAttribute("aria-owns"),rE=(e,t,n)=>{const{parentNode:r}=e;if(!r)return null;const a=r.querySelectorAll(n),o=Array.prototype.indexOf.call(a,e);return a[o+t]||null},up=e=>{!e||(e.focus(),!nE(e)&&e.click())},gn=(e,t,n,r=!1)=>{e&&t&&n&&(e==null||e.addEventListener(t,n,r))},Bn=(e,t,n,r=!1)=>{e&&t&&n&&(e==null||e.removeEventListener(t,n,r))},oU=(e,t,n)=>{const r=function(...a){n&&n.apply(this,a),Bn(e,t,r)};gn(e,t,r)},Tn=(e,t,{checkForDefaultPrevented:n=!0}={})=>a=>{const o=e==null?void 0:e(a);if(n===!1||!o)return t==null?void 0:t(a)},z$=e=>t=>t.pointerType==="mouse"?e(t):void 0;var iU=Object.defineProperty,lU=Object.defineProperties,sU=Object.getOwnPropertyDescriptors,H$=Object.getOwnPropertySymbols,uU=Object.prototype.hasOwnProperty,cU=Object.prototype.propertyIsEnumerable,j$=(e,t,n)=>t in e?iU(e,t,{enumerable:!0,configurable:!0,writable:!0,value:n}):e[t]=n,dU=(e,t)=>{for(var n in t||(t={}))uU.call(t,n)&&j$(e,n,t[n]);if(H$)for(var n of H$(t))cU.call(t,n)&&j$(e,n,t[n]);return e},fU=(e,t)=>lU(e,sU(t));function K$(e,t){var n;const r=Qn();return Wn(()=>{r.value=e()},fU(dU({},t),{flush:(n=t==null?void 0:t.flush)!=null?n:"sync"})),Cf(r)}function Lm(e){return D9()?(Z_(e),!0):!1}const Bt=typeof window!="undefined",yr=e=>typeof e=="boolean",Yt=e=>typeof e=="number",hU=e=>typeof e=="string",ry=()=>{};function aE(e,t){function n(...r){e(()=>t.apply(this,r),{fn:t,thisArg:this,args:r})}return n}function pU(e,t={}){let n,r;return o=>{const i=A(e),l=A(t.maxWait);if(n&&clearTimeout(n),i<=0||l!==void 0&&l<=0)return r&&(clearTimeout(r),r=null),o();l&&!r&&(r=setTimeout(()=>{n&&clearTimeout(n),r=null,o()},l)),n=setTimeout(()=>{r&&clearTimeout(r),r=null,o()},i)}}function vU(e,t=!0,n=!0){let r=0,a,o=!n;const i=()=>{a&&(clearTimeout(a),a=void 0)};return s=>{const c=A(e),d=Date.now()-r;if(i(),c<=0)return r=Date.now(),s();d>c&&(r=Date.now(),o?o=!1:s()),t&&(a=setTimeout(()=>{r=Date.now(),n||(o=!0),i(),s()},c)),!n&&!a&&(a=setTimeout(()=>o=!0,c))}}function mU(e,t=200,n={}){return aE(pU(t,n),e)}function gU(e,t=200,n={}){if(t<=0)return e;const r=H(e.value),a=mU(()=>{r.value=e.value},t,n);return ce(e,()=>a()),r}function oE(e,t=200,n=!0,r=!0){return aE(vU(t,n,r),e)}function yU(e,t=!0){$t()?et(e):t?e():Ne(e)}function gs(e,t,n={}){const{immediate:r=!0}=n,a=H(!1);let o=null;function i(){o&&(clearTimeout(o),o=null)}function l(){a.value=!1,i()}function s(...c){i(),a.value=!0,o=setTimeout(()=>{a.value=!1,o=null,e(...c)},A(t))}return r&&(a.value=!0,Bt&&s()),Lm(l),{isPending:a,start:s,stop:l}}function eo(e){var t;const n=A(e);return(t=n==null?void 0:n.$el)!=null?t:n}const $f=Bt?window:void 0,bU=Bt?window.document:void 0;function Hn(...e){let t,n,r,a;if(hU(e[0])?([n,r,a]=e,t=$f):[t,n,r,a]=e,!t)return ry;let o=ry;const i=ce(()=>eo(t),s=>{o(),s&&(s.addEventListener(n,r,a),o=()=>{s.removeEventListener(n,r,a),o=ry})},{immediate:!0,flush:"post"}),l=()=>{i(),o()};return Lm(l),l}function vv(e,t,n={}){const{window:r=$f,ignore:a,capture:o=!0}=n;if(!r)return;const i=H(!0),s=[Hn(r,"click",d=>{const f=eo(e),p=d.composedPath();!f||f===d.target||p.includes(f)||!i.value||a&&a.length>0&&a.some(v=>{const m=eo(v);return m&&(d.target===m||p.includes(m))})||t(d)},{passive:!0,capture:o}),Hn(r,"pointerdown",d=>{const f=eo(e);i.value=!!f&&!d.composedPath().includes(f)},{passive:!0})];return()=>s.forEach(d=>d())}const c0=typeof globalThis!="undefined"?globalThis:typeof window!="undefined"?window:typeof global!="undefined"?global:typeof self!="undefined"?self:{},d0="__vueuse_ssr_handlers__";c0[d0]=c0[d0]||{};c0[d0];function CU({document:e=bU}={}){if(!e)return H("visible");const t=H(e.visibilityState);return Hn(e,"visibilitychange",()=>{t.value=e.visibilityState}),t}var W$=Object.getOwnPropertySymbols,wU=Object.prototype.hasOwnProperty,SU=Object.prototype.propertyIsEnumerable,kU=(e,t)=>{var n={};for(var r in e)wU.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&W$)for(var r of W$(e))t.indexOf(r)<0&&SU.call(e,r)&&(n[r]=e[r]);return n};function Cc(e,t,n={}){const r=n,{window:a=$f}=r,o=kU(r,["window"]);let i;const l=a&&"ResizeObserver"in a,s=()=>{i&&(i.disconnect(),i=void 0)},c=ce(()=>eo(e),f=>{s(),l&&a&&f&&(i=new ResizeObserver(t),i.observe(f,o))},{immediate:!0,flush:"post"}),d=()=>{s(),c()};return Lm(d),{isSupported:l,stop:d}}function U$(e,t={}){const{reset:n=!0,windowResize:r=!0,windowScroll:a=!0}=t,o=H(0),i=H(0),l=H(0),s=H(0),c=H(0),d=H(0),f=H(0),p=H(0);function v(){const m=eo(e);if(!m){n&&(o.value=0,i.value=0,l.value=0,s.value=0,c.value=0,d.value=0,f.value=0,p.value=0);return}const y=m.getBoundingClientRect();o.value=y.height,i.value=y.bottom,l.value=y.left,s.value=y.right,c.value=y.top,d.value=y.width,f.value=y.x,p.value=y.y}return Cc(e,v),ce(()=>eo(e),m=>!m&&v()),a&&Hn("scroll",v,{passive:!0}),r&&Hn("resize",v,{passive:!0}),{height:o,bottom:i,left:l,right:s,top:c,width:d,x:f,y:p,update:v}}var Y$,q$;Bt&&(window==null?void 0:window.navigator)&&((Y$=window==null?void 0:window.navigator)==null?void 0:Y$.platform)&&/iP(ad|hone|od)/.test((q$=window==null?void 0:window.navigator)==null?void 0:q$.platform);function $U({window:e=$f}={}){if(!e)return H(!1);const t=H(e.document.hasFocus());return Hn(e,"blur",()=>{t.value=!1}),Hn(e,"focus",()=>{t.value=!0}),t}function OU({window:e=$f,initialWidth:t=1/0,initialHeight:n=1/0}={}){const r=H(t),a=H(n),o=()=>{e&&(r.value=e.innerWidth,a.value=e.innerHeight)};return o(),yU(o),Hn("resize",o,{passive:!0}),{width:r,height:a}}const PU=(e,t)=>{if(!Bt||!e||!t)return!1;const n=e.getBoundingClientRect();let r;return t instanceof Element?r=t.getBoundingClientRect():r={top:0,right:window.innerWidth,bottom:window.innerHeight,left:0},n.top<r.bottom&&n.bottom>r.top&&n.right>r.left&&n.left<r.right},G$=e=>{let t=0,n=e;for(;n;)t+=n.offsetTop,n=n.offsetParent;return t},TU=(e,t)=>Math.abs(G$(e)-G$(t)),AC=e=>{let t,n;return e.type==="touchend"?(n=e.changedTouches[0].clientY,t=e.changedTouches[0].clientX):e.type.startsWith("touch")?(n=e.touches[0].clientY,t=e.touches[0].clientX):(n=e.clientY,t=e.clientX),{clientX:t,clientY:n}},xU=function(e){for(const t of e){const n=t.target.__resizeListeners__||[];n.length&&n.forEach(r=>{r()})}},wc=function(e,t){!Bt||!e||(e.__resizeListeners__||(e.__resizeListeners__=[],e.__ro__=new ResizeObserver(xU),e.__ro__.observe(e)),e.__resizeListeners__.push(t))},Sc=function(e,t){var n;!e||!e.__resizeListeners__||(e.__resizeListeners__.splice(e.__resizeListeners__.indexOf(t),1),e.__resizeListeners__.length||(n=e.__ro__)==null||n.disconnect())},sa=e=>e===void 0,Kd=e=>!e&&e!==0||pt(e)&&e.length===0||zt(e)&&!Object.keys(e).length,ys=e=>typeof Element=="undefined"?!1:e instanceof Element,_U=e=>ul(e),EU=(e="")=>e.replace(/[|\\{}()[\]^$+*?.]/g,"\\$&").replace(/-/g,"\\x2d"),f0=e=>Object.keys(e),MU=e=>Object.entries(e),cp=(e,t,n)=>({get value(){return vn(e,t,n)},set value(r){YW(e,t,r)}}),iE=(e="")=>e.split(" ").filter(t=>!!t.trim()),to=(e,t)=>{if(!e||!t)return!1;if(t.includes(" "))throw new Error("className should not contain space.");return e.classList.contains(t)},xo=(e,t)=>{!e||!t.trim()||e.classList.add(...iE(t))},Br=(e,t)=>{!e||!t.trim()||e.classList.remove(...iE(t))},$o=(e,t)=>{var n;if(!Bt||!e||!t)return"";_a(t);try{const r=e.style[t];if(r)return r;const a=(n=document.defaultView)==null?void 0:n.getComputedStyle(e,"");return a?a[t]:""}catch{return e.style[t]}},IU=(e,t)=>{if(!Bt)return!1;const n={undefined:"overflow",true:"overflow-y",false:"overflow-x"}[String(t)],r=$o(e,n);return["scroll","auto","overlay"].some(a=>r.includes(a))},DC=(e,t)=>{if(!Bt)return;let n=e;for(;n;){if([window,document,document.documentElement].includes(n))return window;if(IU(n,t))return n;n=n.parentNode}return n};let ah;const NU=()=>{var e;if(!Bt)return 0;if(ah!==void 0)return ah;const t=document.createElement("div");t.className="el-scrollbar__wrap",t.style.visibility="hidden",t.style.width="100px",t.style.position="absolute",t.style.top="-9999px",document.body.appendChild(t);const n=t.offsetWidth;t.style.overflow="scroll";const r=document.createElement("div");r.style.width="100%",t.appendChild(r);const a=r.offsetWidth;return(e=t.parentNode)==null||e.removeChild(t),ah=n-a,ah};function lE(e,t){if(!Bt)return;if(!t){e.scrollTop=0;return}const n=[];let r=t.offsetParent;for(;r!==null&&e!==r&&e.contains(r);)n.push(r),r=r.offsetParent;const a=t.offsetTop+n.reduce((s,c)=>s+c.offsetTop,0),o=a+t.offsetHeight,i=e.scrollTop,l=i+e.clientHeight;a<i?e.scrollTop=a:o>l&&(e.scrollTop=o-e.clientHeight)}var Xt=(e,t)=>{const n=e.__vccOpts||e;for(const[r,a]of t)n[r]=a;return n};const AU=G({name:"ArrowDown"}),DU={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},RU=Z("path",{fill:"currentColor",d:"M831.872 340.864 512 652.672 192.128 340.864a30.592 30.592 0 0 0-42.752 0 29.12 29.12 0 0 0 0 41.6L489.664 714.24a32 32 0 0 0 44.672 0l340.288-331.712a29.12 29.12 0 0 0 0-41.728 30.592 30.592 0 0 0-42.752 0z"},null,-1),LU=[RU];function FU(e,t,n,r,a,o){return R(),X("svg",DU,LU)}var _s=Xt(AU,[["render",FU]]);const BU=G({name:"ArrowLeft"}),VU={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},zU=Z("path",{fill:"currentColor",d:"M609.408 149.376 277.76 489.6a32 32 0 0 0 0 44.672l331.648 340.352a29.12 29.12 0 0 0 41.728 0 30.592 30.592 0 0 0 0-42.752L339.264 511.936l311.872-319.872a30.592 30.592 0 0 0 0-42.688 29.12 29.12 0 0 0-41.728 0z"},null,-1),HU=[zU];function jU(e,t,n,r,a,o){return R(),X("svg",VU,HU)}var Es=Xt(BU,[["render",jU]]);const KU=G({name:"ArrowRight"}),WU={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},UU=Z("path",{fill:"currentColor",d:"M340.864 149.312a30.592 30.592 0 0 0 0 42.752L652.736 512 340.864 831.872a30.592 30.592 0 0 0 0 42.752 29.12 29.12 0 0 0 41.728 0L714.24 534.336a32 32 0 0 0 0-44.672L382.592 149.376a29.12 29.12 0 0 0-41.728 0z"},null,-1),YU=[UU];function qU(e,t,n,r,a,o){return R(),X("svg",WU,YU)}var Da=Xt(KU,[["render",qU]]);const GU=G({name:"ArrowUp"}),XU={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},ZU=Z("path",{fill:"currentColor",d:"m488.832 344.32-339.84 356.672a32 32 0 0 0 0 44.16l.384.384a29.44 29.44 0 0 0 42.688 0l320-335.872 319.872 335.872a29.44 29.44 0 0 0 42.688 0l.384-.384a32 32 0 0 0 0-44.16L535.168 344.32a32 32 0 0 0-46.336 0z"},null,-1),JU=[ZU];function QU(e,t,n,r,a,o){return R(),X("svg",XU,JU)}var Of=Xt(GU,[["render",QU]]);const eY=G({name:"Back"}),tY={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},nY=Z("path",{fill:"currentColor",d:"M224 480h640a32 32 0 1 1 0 64H224a32 32 0 0 1 0-64z"},null,-1),rY=Z("path",{fill:"currentColor",d:"m237.248 512 265.408 265.344a32 32 0 0 1-45.312 45.312l-288-288a32 32 0 0 1 0-45.312l288-288a32 32 0 1 1 45.312 45.312L237.248 512z"},null,-1),aY=[nY,rY];function oY(e,t,n,r,a,o){return R(),X("svg",tY,aY)}var iY=Xt(eY,[["render",oY]]);const lY=G({name:"Calendar"}),sY={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},uY=Z("path",{fill:"currentColor",d:"M128 384v512h768V192H768v32a32 32 0 1 1-64 0v-32H320v32a32 32 0 0 1-64 0v-32H128v128h768v64H128zm192-256h384V96a32 32 0 1 1 64 0v32h160a32 32 0 0 1 32 32v768a32 32 0 0 1-32 32H96a32 32 0 0 1-32-32V160a32 32 0 0 1 32-32h160V96a32 32 0 0 1 64 0v32zm-32 384h64a32 32 0 0 1 0 64h-64a32 32 0 0 1 0-64zm0 192h64a32 32 0 1 1 0 64h-64a32 32 0 1 1 0-64zm192-192h64a32 32 0 0 1 0 64h-64a32 32 0 0 1 0-64zm0 192h64a32 32 0 1 1 0 64h-64a32 32 0 1 1 0-64zm192-192h64a32 32 0 1 1 0 64h-64a32 32 0 1 1 0-64zm0 192h64a32 32 0 1 1 0 64h-64a32 32 0 1 1 0-64z"},null,-1),cY=[uY];function dY(e,t,n,r,a,o){return R(),X("svg",sY,cY)}var fY=Xt(lY,[["render",dY]]);const hY=G({name:"CaretRight"}),pY={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},vY=Z("path",{fill:"currentColor",d:"M384 192v640l384-320.064z"},null,-1),mY=[vY];function gY(e,t,n,r,a,o){return R(),X("svg",pY,mY)}var sE=Xt(hY,[["render",gY]]);const yY=G({name:"CaretTop"}),bY={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},CY=Z("path",{fill:"currentColor",d:"M512 320 192 704h639.936z"},null,-1),wY=[CY];function SY(e,t,n,r,a,o){return R(),X("svg",bY,wY)}var kY=Xt(yY,[["render",SY]]);const $Y=G({name:"Check"}),OY={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},PY=Z("path",{fill:"currentColor",d:"M406.656 706.944 195.84 496.256a32 32 0 1 0-45.248 45.248l256 256 512-512a32 32 0 0 0-45.248-45.248L406.592 706.944z"},null,-1),TY=[PY];function xY(e,t,n,r,a,o){return R(),X("svg",OY,TY)}var Wu=Xt($Y,[["render",xY]]);const _Y=G({name:"CircleCheckFilled"}),EY={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},MY=Z("path",{fill:"currentColor",d:"M512 64a448 448 0 1 1 0 896 448 448 0 0 1 0-896zm-55.808 536.384-99.52-99.584a38.4 38.4 0 1 0-54.336 54.336l126.72 126.72a38.272 38.272 0 0 0 54.336 0l262.4-262.464a38.4 38.4 0 1 0-54.272-54.336L456.192 600.384z"},null,-1),IY=[MY];function NY(e,t,n,r,a,o){return R(),X("svg",EY,IY)}var AY=Xt(_Y,[["render",NY]]);const DY=G({name:"CircleCheck"}),RY={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},LY=Z("path",{fill:"currentColor",d:"M512 896a384 384 0 1 0 0-768 384 384 0 0 0 0 768zm0 64a448 448 0 1 1 0-896 448 448 0 0 1 0 896z"},null,-1),FY=Z("path",{fill:"currentColor",d:"M745.344 361.344a32 32 0 0 1 45.312 45.312l-288 288a32 32 0 0 1-45.312 0l-160-160a32 32 0 1 1 45.312-45.312L480 626.752l265.344-265.408z"},null,-1),BY=[LY,FY];function VY(e,t,n,r,a,o){return R(),X("svg",RY,BY)}var mv=Xt(DY,[["render",VY]]);const zY=G({name:"CircleCloseFilled"}),HY={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},jY=Z("path",{fill:"currentColor",d:"M512 64a448 448 0 1 1 0 896 448 448 0 0 1 0-896zm0 393.664L407.936 353.6a38.4 38.4 0 1 0-54.336 54.336L457.664 512 353.6 616.064a38.4 38.4 0 1 0 54.336 54.336L512 566.336 616.064 670.4a38.4 38.4 0 1 0 54.336-54.336L566.336 512 670.4 407.936a38.4 38.4 0 1 0-54.336-54.336L512 457.664z"},null,-1),KY=[jY];function WY(e,t,n,r,a,o){return R(),X("svg",HY,KY)}var RC=Xt(zY,[["render",WY]]);const UY=G({name:"CircleClose"}),YY={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},qY=Z("path",{fill:"currentColor",d:"m466.752 512-90.496-90.496a32 32 0 0 1 45.248-45.248L512 466.752l90.496-90.496a32 32 0 1 1 45.248 45.248L557.248 512l90.496 90.496a32 32 0 1 1-45.248 45.248L512 557.248l-90.496 90.496a32 32 0 0 1-45.248-45.248L466.752 512z"},null,-1),GY=Z("path",{fill:"currentColor",d:"M512 896a384 384 0 1 0 0-768 384 384 0 0 0 0 768zm0 64a448 448 0 1 1 0-896 448 448 0 0 1 0 896z"},null,-1),XY=[qY,GY];function ZY(e,t,n,r,a,o){return R(),X("svg",YY,XY)}var gi=Xt(UY,[["render",ZY]]);const JY=G({name:"Clock"}),QY={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},eq=Z("path",{fill:"currentColor",d:"M512 896a384 384 0 1 0 0-768 384 384 0 0 0 0 768zm0 64a448 448 0 1 1 0-896 448 448 0 0 1 0 896z"},null,-1),tq=Z("path",{fill:"currentColor",d:"M480 256a32 32 0 0 1 32 32v256a32 32 0 0 1-64 0V288a32 32 0 0 1 32-32z"},null,-1),nq=Z("path",{fill:"currentColor",d:"M480 512h256q32 0 32 32t-32 32H480q-32 0-32-32t32-32z"},null,-1),rq=[eq,tq,nq];function aq(e,t,n,r,a,o){return R(),X("svg",QY,rq)}var uE=Xt(JY,[["render",aq]]);const oq=G({name:"Close"}),iq={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},lq=Z("path",{fill:"currentColor",d:"M764.288 214.592 512 466.88 259.712 214.592a31.936 31.936 0 0 0-45.12 45.12L466.752 512 214.528 764.224a31.936 31.936 0 1 0 45.12 45.184L512 557.184l252.288 252.288a31.936 31.936 0 0 0 45.12-45.12L557.12 512.064l252.288-252.352a31.936 31.936 0 1 0-45.12-45.184z"},null,-1),sq=[lq];function uq(e,t,n,r,a,o){return R(),X("svg",iq,sq)}var Ma=Xt(oq,[["render",uq]]);const cq=G({name:"DArrowLeft"}),dq={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},fq=Z("path",{fill:"currentColor",d:"M529.408 149.376a29.12 29.12 0 0 1 41.728 0 30.592 30.592 0 0 1 0 42.688L259.264 511.936l311.872 319.936a30.592 30.592 0 0 1-.512 43.264 29.12 29.12 0 0 1-41.216-.512L197.76 534.272a32 32 0 0 1 0-44.672l331.648-340.224zm256 0a29.12 29.12 0 0 1 41.728 0 30.592 30.592 0 0 1 0 42.688L515.264 511.936l311.872 319.936a30.592 30.592 0 0 1-.512 43.264 29.12 29.12 0 0 1-41.216-.512L453.76 534.272a32 32 0 0 1 0-44.672l331.648-340.224z"},null,-1),hq=[fq];function pq(e,t,n,r,a,o){return R(),X("svg",dq,hq)}var Fm=Xt(cq,[["render",pq]]);const vq=G({name:"DArrowRight"}),mq={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},gq=Z("path",{fill:"currentColor",d:"M452.864 149.312a29.12 29.12 0 0 1 41.728.064L826.24 489.664a32 32 0 0 1 0 44.672L494.592 874.624a29.12 29.12 0 0 1-41.728 0 30.592 30.592 0 0 1 0-42.752L764.736 512 452.864 192a30.592 30.592 0 0 1 0-42.688zm-256 0a29.12 29.12 0 0 1 41.728.064L570.24 489.664a32 32 0 0 1 0 44.672L238.592 874.624a29.12 29.12 0 0 1-41.728 0 30.592 30.592 0 0 1 0-42.752L508.736 512 196.864 192a30.592 30.592 0 0 1 0-42.688z"},null,-1),yq=[gq];function bq(e,t,n,r,a,o){return R(),X("svg",mq,yq)}var Bm=Xt(vq,[["render",bq]]);const Cq=G({name:"Delete"}),wq={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},Sq=Z("path",{fill:"currentColor",d:"M160 256H96a32 32 0 0 1 0-64h256V95.936a32 32 0 0 1 32-32h256a32 32 0 0 1 32 32V192h256a32 32 0 1 1 0 64h-64v672a32 32 0 0 1-32 32H192a32 32 0 0 1-32-32V256zm448-64v-64H416v64h192zM224 896h576V256H224v640zm192-128a32 32 0 0 1-32-32V416a32 32 0 0 1 64 0v320a32 32 0 0 1-32 32zm192 0a32 32 0 0 1-32-32V416a32 32 0 0 1 64 0v320a32 32 0 0 1-32 32z"},null,-1),kq=[Sq];function $q(e,t,n,r,a,o){return R(),X("svg",wq,kq)}var Oq=Xt(Cq,[["render",$q]]);const Pq=G({name:"Document"}),Tq={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},xq=Z("path",{fill:"currentColor",d:"M832 384H576V128H192v768h640V384zm-26.496-64L640 154.496V320h165.504zM160 64h480l256 256v608a32 32 0 0 1-32 32H160a32 32 0 0 1-32-32V96a32 32 0 0 1 32-32zm160 448h384v64H320v-64zm0-192h160v64H320v-64zm0 384h384v64H320v-64z"},null,-1),_q=[xq];function Eq(e,t,n,r,a,o){return R(),X("svg",Tq,_q)}var Mq=Xt(Pq,[["render",Eq]]);const Iq=G({name:"FullScreen"}),Nq={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},Aq=Z("path",{fill:"currentColor",d:"m160 96.064 192 .192a32 32 0 0 1 0 64l-192-.192V352a32 32 0 0 1-64 0V96h64v.064zm0 831.872V928H96V672a32 32 0 1 1 64 0v191.936l192-.192a32 32 0 1 1 0 64l-192 .192zM864 96.064V96h64v256a32 32 0 1 1-64 0V160.064l-192 .192a32 32 0 1 1 0-64l192-.192zm0 831.872-192-.192a32 32 0 0 1 0-64l192 .192V672a32 32 0 1 1 64 0v256h-64v-.064z"},null,-1),Dq=[Aq];function Rq(e,t,n,r,a,o){return R(),X("svg",Nq,Dq)}var Lq=Xt(Iq,[["render",Rq]]);const Fq=G({name:"Hide"}),Bq={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},Vq=Z("path",{d:"M876.8 156.8c0-9.6-3.2-16-9.6-22.4-6.4-6.4-12.8-9.6-22.4-9.6-9.6 0-16 3.2-22.4 9.6L736 220.8c-64-32-137.6-51.2-224-60.8-160 16-288 73.6-377.6 176C44.8 438.4 0 496 0 512s48 73.6 134.4 176c22.4 25.6 44.8 48 73.6 67.2l-86.4 89.6c-6.4 6.4-9.6 12.8-9.6 22.4 0 9.6 3.2 16 9.6 22.4 6.4 6.4 12.8 9.6 22.4 9.6 9.6 0 16-3.2 22.4-9.6l704-710.4c3.2-6.4 6.4-12.8 6.4-22.4Zm-646.4 528c-76.8-70.4-128-128-153.6-172.8 28.8-48 80-105.6 153.6-172.8C304 272 400 230.4 512 224c64 3.2 124.8 19.2 176 44.8l-54.4 54.4C598.4 300.8 560 288 512 288c-64 0-115.2 22.4-160 64s-64 96-64 160c0 48 12.8 89.6 35.2 124.8L256 707.2c-9.6-6.4-19.2-16-25.6-22.4Zm140.8-96c-12.8-22.4-19.2-48-19.2-76.8 0-44.8 16-83.2 48-112 32-28.8 67.2-48 112-48 28.8 0 54.4 6.4 73.6 19.2L371.2 588.8ZM889.599 336c-12.8-16-28.8-28.8-41.6-41.6l-48 48c73.6 67.2 124.8 124.8 150.4 169.6-28.8 48-80 105.6-153.6 172.8-73.6 67.2-172.8 108.8-284.8 115.2-51.2-3.2-99.2-12.8-140.8-28.8l-48 48c57.6 22.4 118.4 38.4 188.8 44.8 160-16 288-73.6 377.6-176C979.199 585.6 1024 528 1024 512s-48.001-73.6-134.401-176Z",fill:"currentColor"},null,-1),zq=Z("path",{d:"M511.998 672c-12.8 0-25.6-3.2-38.4-6.4l-51.2 51.2c28.8 12.8 57.6 19.2 89.6 19.2 64 0 115.2-22.4 160-64 41.6-41.6 64-96 64-160 0-32-6.4-64-19.2-89.6l-51.2 51.2c3.2 12.8 6.4 25.6 6.4 38.4 0 44.8-16 83.2-48 112-32 28.8-67.2 48-112 48Z",fill:"currentColor"},null,-1),Hq=[Vq,zq];function jq(e,t,n,r,a,o){return R(),X("svg",Bq,Hq)}var Kq=Xt(Fq,[["render",jq]]);const Wq=G({name:"InfoFilled"}),Uq={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},Yq=Z("path",{fill:"currentColor",d:"M512 64a448 448 0 1 1 0 896.064A448 448 0 0 1 512 64zm67.2 275.072c33.28 0 60.288-23.104 60.288-57.344s-27.072-57.344-60.288-57.344c-33.28 0-60.16 23.104-60.16 57.344s26.88 57.344 60.16 57.344zM590.912 699.2c0-6.848 2.368-24.64 1.024-34.752l-52.608 60.544c-10.88 11.456-24.512 19.392-30.912 17.28a12.992 12.992 0 0 1-8.256-14.72l87.68-276.992c7.168-35.136-12.544-67.2-54.336-71.296-44.096 0-108.992 44.736-148.48 101.504 0 6.784-1.28 23.68.064 33.792l52.544-60.608c10.88-11.328 23.552-19.328 29.952-17.152a12.8 12.8 0 0 1 7.808 16.128L388.48 728.576c-10.048 32.256 8.96 63.872 55.04 71.04 67.84 0 107.904-43.648 147.456-100.416z"},null,-1),qq=[Yq];function Gq(e,t,n,r,a,o){return R(),X("svg",Uq,qq)}var LC=Xt(Wq,[["render",Gq]]);const Xq=G({name:"Loading"}),Zq={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},Jq=Z("path",{fill:"currentColor",d:"M512 64a32 32 0 0 1 32 32v192a32 32 0 0 1-64 0V96a32 32 0 0 1 32-32zm0 640a32 32 0 0 1 32 32v192a32 32 0 1 1-64 0V736a32 32 0 0 1 32-32zm448-192a32 32 0 0 1-32 32H736a32 32 0 1 1 0-64h192a32 32 0 0 1 32 32zm-640 0a32 32 0 0 1-32 32H96a32 32 0 0 1 0-64h192a32 32 0 0 1 32 32zM195.2 195.2a32 32 0 0 1 45.248 0L376.32 331.008a32 32 0 0 1-45.248 45.248L195.2 240.448a32 32 0 0 1 0-45.248zm452.544 452.544a32 32 0 0 1 45.248 0L828.8 783.552a32 32 0 0 1-45.248 45.248L647.744 692.992a32 32 0 0 1 0-45.248zM828.8 195.264a32 32 0 0 1 0 45.184L692.992 376.32a32 32 0 0 1-45.248-45.248l135.808-135.808a32 32 0 0 1 45.248 0zm-452.544 452.48a32 32 0 0 1 0 45.248L240.448 828.8a32 32 0 0 1-45.248-45.248l135.808-135.808a32 32 0 0 1 45.248 0z"},null,-1),Qq=[Jq];function eG(e,t,n,r,a,o){return R(),X("svg",Zq,Qq)}var gl=Xt(Xq,[["render",eG]]);const tG=G({name:"Minus"}),nG={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},rG=Z("path",{fill:"currentColor",d:"M128 544h768a32 32 0 1 0 0-64H128a32 32 0 0 0 0 64z"},null,-1),aG=[rG];function oG(e,t,n,r,a,o){return R(),X("svg",nG,aG)}var iG=Xt(tG,[["render",oG]]);const lG=G({name:"MoreFilled"}),sG={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},uG=Z("path",{fill:"currentColor",d:"M176 416a112 112 0 1 1 0 224 112 112 0 0 1 0-224zm336 0a112 112 0 1 1 0 224 112 112 0 0 1 0-224zm336 0a112 112 0 1 1 0 224 112 112 0 0 1 0-224z"},null,-1),cG=[uG];function dG(e,t,n,r,a,o){return R(),X("svg",sG,cG)}var fG=Xt(lG,[["render",dG]]);const hG=G({name:"More"}),pG={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},vG=Z("path",{fill:"currentColor",d:"M176 416a112 112 0 1 0 0 224 112 112 0 0 0 0-224m0 64a48 48 0 1 1 0 96 48 48 0 0 1 0-96zm336-64a112 112 0 1 1 0 224 112 112 0 0 1 0-224zm0 64a48 48 0 1 0 0 96 48 48 0 0 0 0-96zm336-64a112 112 0 1 1 0 224 112 112 0 0 1 0-224zm0 64a48 48 0 1 0 0 96 48 48 0 0 0 0-96z"},null,-1),mG=[vG];function gG(e,t,n,r,a,o){return R(),X("svg",pG,mG)}var yG=Xt(hG,[["render",gG]]);const bG=G({name:"PictureFilled"}),CG={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},wG=Z("path",{fill:"currentColor",d:"M96 896a32 32 0 0 1-32-32V160a32 32 0 0 1 32-32h832a32 32 0 0 1 32 32v704a32 32 0 0 1-32 32H96zm315.52-228.48-68.928-68.928a32 32 0 0 0-45.248 0L128 768.064h778.688l-242.112-290.56a32 32 0 0 0-49.216 0L458.752 665.408a32 32 0 0 1-47.232 2.112zM256 384a96 96 0 1 0 192.064-.064A96 96 0 0 0 256 384z"},null,-1),SG=[wG];function kG(e,t,n,r,a,o){return R(),X("svg",CG,SG)}var $G=Xt(bG,[["render",kG]]);const OG=G({name:"Plus"}),PG={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},TG=Z("path",{fill:"currentColor",d:"M480 480V128a32 32 0 0 1 64 0v352h352a32 32 0 1 1 0 64H544v352a32 32 0 1 1-64 0V544H128a32 32 0 0 1 0-64h352z"},null,-1),xG=[TG];function _G(e,t,n,r,a,o){return R(),X("svg",PG,xG)}var cE=Xt(OG,[["render",_G]]);const EG=G({name:"QuestionFilled"}),MG={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},IG=Z("path",{fill:"currentColor",d:"M512 64a448 448 0 1 1 0 896 448 448 0 0 1 0-896zm23.744 191.488c-52.096 0-92.928 14.784-123.2 44.352-30.976 29.568-45.76 70.4-45.76 122.496h80.256c0-29.568 5.632-52.8 17.6-68.992 13.376-19.712 35.2-28.864 66.176-28.864 23.936 0 42.944 6.336 56.32 19.712 12.672 13.376 19.712 31.68 19.712 54.912 0 17.6-6.336 34.496-19.008 49.984l-8.448 9.856c-45.76 40.832-73.216 70.4-82.368 89.408-9.856 19.008-14.08 42.24-14.08 68.992v9.856h80.96v-9.856c0-16.896 3.52-31.68 10.56-45.76 6.336-12.672 15.488-24.64 28.16-35.2 33.792-29.568 54.208-48.576 60.544-55.616 16.896-22.528 26.048-51.392 26.048-86.592 0-42.944-14.08-76.736-42.24-101.376-28.16-25.344-65.472-37.312-111.232-37.312zm-12.672 406.208a54.272 54.272 0 0 0-38.72 14.784 49.408 49.408 0 0 0-15.488 38.016c0 15.488 4.928 28.16 15.488 38.016A54.848 54.848 0 0 0 523.072 768c15.488 0 28.16-4.928 38.72-14.784a51.52 51.52 0 0 0 16.192-38.72 51.968 51.968 0 0 0-15.488-38.016 55.936 55.936 0 0 0-39.424-14.784z"},null,-1),NG=[IG];function AG(e,t,n,r,a,o){return R(),X("svg",MG,NG)}var DG=Xt(EG,[["render",AG]]);const RG=G({name:"RefreshLeft"}),LG={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},FG=Z("path",{fill:"currentColor",d:"M289.088 296.704h92.992a32 32 0 0 1 0 64H232.96a32 32 0 0 1-32-32V179.712a32 32 0 0 1 64 0v50.56a384 384 0 0 1 643.84 282.88 384 384 0 0 1-383.936 384 384 384 0 0 1-384-384h64a320 320 0 1 0 640 0 320 320 0 0 0-555.712-216.448z"},null,-1),BG=[FG];function VG(e,t,n,r,a,o){return R(),X("svg",LG,BG)}var zG=Xt(RG,[["render",VG]]);const HG=G({name:"RefreshRight"}),jG={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},KG=Z("path",{fill:"currentColor",d:"M784.512 230.272v-50.56a32 32 0 1 1 64 0v149.056a32 32 0 0 1-32 32H667.52a32 32 0 1 1 0-64h92.992A320 320 0 1 0 524.8 833.152a320 320 0 0 0 320-320h64a384 384 0 0 1-384 384 384 384 0 0 1-384-384 384 384 0 0 1 643.712-282.88z"},null,-1),WG=[KG];function UG(e,t,n,r,a,o){return R(),X("svg",jG,WG)}var YG=Xt(HG,[["render",UG]]);const qG=G({name:"ScaleToOriginal"}),GG={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},XG=Z("path",{fill:"currentColor",d:"M813.176 180.706a60.235 60.235 0 0 1 60.236 60.235v481.883a60.235 60.235 0 0 1-60.236 60.235H210.824a60.235 60.235 0 0 1-60.236-60.235V240.94a60.235 60.235 0 0 1 60.236-60.235h602.352zm0-60.235H210.824A120.47 120.47 0 0 0 90.353 240.94v481.883a120.47 120.47 0 0 0 120.47 120.47h602.353a120.47 120.47 0 0 0 120.471-120.47V240.94a120.47 120.47 0 0 0-120.47-120.47zm-120.47 180.705a30.118 30.118 0 0 0-30.118 30.118v301.177a30.118 30.118 0 0 0 60.236 0V331.294a30.118 30.118 0 0 0-30.118-30.118zm-361.412 0a30.118 30.118 0 0 0-30.118 30.118v301.177a30.118 30.118 0 1 0 60.236 0V331.294a30.118 30.118 0 0 0-30.118-30.118zM512 361.412a30.118 30.118 0 0 0-30.118 30.117v30.118a30.118 30.118 0 0 0 60.236 0V391.53A30.118 30.118 0 0 0 512 361.412zM512 512a30.118 30.118 0 0 0-30.118 30.118v30.117a30.118 30.118 0 0 0 60.236 0v-30.117A30.118 30.118 0 0 0 512 512z"},null,-1),ZG=[XG];function JG(e,t,n,r,a,o){return R(),X("svg",GG,ZG)}var QG=Xt(qG,[["render",JG]]);const eX=G({name:"Search"}),tX={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},nX=Z("path",{fill:"currentColor",d:"m795.904 750.72 124.992 124.928a32 32 0 0 1-45.248 45.248L750.656 795.904a416 416 0 1 1 45.248-45.248zM480 832a352 352 0 1 0 0-704 352 352 0 0 0 0 704z"},null,-1),rX=[nX];function aX(e,t,n,r,a,o){return R(),X("svg",tX,rX)}var oX=Xt(eX,[["render",aX]]);const iX=G({name:"StarFilled"}),lX={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},sX=Z("path",{fill:"currentColor",d:"M283.84 867.84 512 747.776l228.16 119.936a6.4 6.4 0 0 0 9.28-6.72l-43.52-254.08 184.512-179.904a6.4 6.4 0 0 0-3.52-10.88l-255.104-37.12L517.76 147.904a6.4 6.4 0 0 0-11.52 0L392.192 379.072l-255.104 37.12a6.4 6.4 0 0 0-3.52 10.88L318.08 606.976l-43.584 254.08a6.4 6.4 0 0 0 9.28 6.72z"},null,-1),uX=[sX];function cX(e,t,n,r,a,o){return R(),X("svg",lX,uX)}var oh=Xt(iX,[["render",cX]]);const dX=G({name:"Star"}),fX={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},hX=Z("path",{fill:"currentColor",d:"m512 747.84 228.16 119.936a6.4 6.4 0 0 0 9.28-6.72l-43.52-254.08 184.512-179.904a6.4 6.4 0 0 0-3.52-10.88l-255.104-37.12L517.76 147.904a6.4 6.4 0 0 0-11.52 0L392.192 379.072l-255.104 37.12a6.4 6.4 0 0 0-3.52 10.88L318.08 606.976l-43.584 254.08a6.4 6.4 0 0 0 9.28 6.72L512 747.84zM313.6 924.48a70.4 70.4 0 0 1-102.144-74.24l37.888-220.928L88.96 472.96A70.4 70.4 0 0 1 128 352.896l221.76-32.256 99.2-200.96a70.4 70.4 0 0 1 126.208 0l99.2 200.96 221.824 32.256a70.4 70.4 0 0 1 39.04 120.064L774.72 629.376l37.888 220.928a70.4 70.4 0 0 1-102.144 74.24L512 820.096l-198.4 104.32z"},null,-1),pX=[hX];function vX(e,t,n,r,a,o){return R(),X("svg",fX,pX)}var mX=Xt(dX,[["render",vX]]);const gX=G({name:"SuccessFilled"}),yX={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},bX=Z("path",{fill:"currentColor",d:"M512 64a448 448 0 1 1 0 896 448 448 0 0 1 0-896zm-55.808 536.384-99.52-99.584a38.4 38.4 0 1 0-54.336 54.336l126.72 126.72a38.272 38.272 0 0 0 54.336 0l262.4-262.464a38.4 38.4 0 1 0-54.272-54.336L456.192 600.384z"},null,-1),CX=[bX];function wX(e,t,n,r,a,o){return R(),X("svg",yX,CX)}var dE=Xt(gX,[["render",wX]]);const SX=G({name:"View"}),kX={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},$X=Z("path",{fill:"currentColor",d:"M512 160c320 0 512 352 512 352S832 864 512 864 0 512 0 512s192-352 512-352zm0 64c-225.28 0-384.128 208.064-436.8 288 52.608 79.872 211.456 288 436.8 288 225.28 0 384.128-208.064 436.8-288-52.608-79.872-211.456-288-436.8-288zm0 64a224 224 0 1 1 0 448 224 224 0 0 1 0-448zm0 64a160.192 160.192 0 0 0-160 160c0 88.192 71.744 160 160 160s160-71.808 160-160-71.744-160-160-160z"},null,-1),OX=[$X];function PX(e,t,n,r,a,o){return R(),X("svg",kX,OX)}var TX=Xt(SX,[["render",PX]]);const xX=G({name:"WarningFilled"}),_X={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},EX=Z("path",{fill:"currentColor",d:"M512 64a448 448 0 1 1 0 896 448 448 0 0 1 0-896zm0 192a58.432 58.432 0 0 0-58.24 63.744l23.36 256.384a35.072 35.072 0 0 0 69.76 0l23.296-256.384A58.432 58.432 0 0 0 512 256zm0 512a51.2 51.2 0 1 0 0-102.4 51.2 51.2 0 0 0 0 102.4z"},null,-1),MX=[EX];function IX(e,t,n,r,a,o){return R(),X("svg",_X,MX)}var Wd=Xt(xX,[["render",IX]]);const NX=G({name:"ZoomIn"}),AX={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},DX=Z("path",{fill:"currentColor",d:"m795.904 750.72 124.992 124.928a32 32 0 0 1-45.248 45.248L750.656 795.904a416 416 0 1 1 45.248-45.248zM480 832a352 352 0 1 0 0-704 352 352 0 0 0 0 704zm-32-384v-96a32 32 0 0 1 64 0v96h96a32 32 0 0 1 0 64h-96v96a32 32 0 0 1-64 0v-96h-96a32 32 0 0 1 0-64h96z"},null,-1),RX=[DX];function LX(e,t,n,r,a,o){return R(),X("svg",AX,RX)}var fE=Xt(NX,[["render",LX]]);const FX=G({name:"ZoomOut"}),BX={viewBox:"0 0 1024 1024",xmlns:"http://www.w3.org/2000/svg"},VX=Z("path",{fill:"currentColor",d:"m795.904 750.72 124.992 124.928a32 32 0 0 1-45.248 45.248L750.656 795.904a416 416 0 1 1 45.248-45.248zM480 832a352 352 0 1 0 0-704 352 352 0 0 0 0 704zM352 448h256a32 32 0 0 1 0 64H352a32 32 0 0 1 0-64z"},null,-1),zX=[VX];function HX(e,t,n,r,a,o){return R(),X("svg",BX,zX)}var jX=Xt(FX,[["render",HX]]);const h0=Symbol(),X$="__elPropsReservedKey";function ho(e,t){if(!zt(e)||!!e[X$])return e;const{values:n,required:r,default:a,type:o,validator:i}=e,l=n||i?c=>{let d=!1,f=[];if(n&&(f=Array.from(n),Mt(e,"default")&&f.push(a),d||(d=f.includes(c))),i&&(d||(d=i(c))),!d&&f.length>0){const p=[...new Set(f)].map(v=>JSON.stringify(v)).join(", ");hL(`Invalid prop: validation failed${t?` for prop "${t}"`:""}. Expected one of [${p}], got value ${JSON.stringify(c)}.`)}return d}:void 0,s={type:zt(o)&&Object.getOwnPropertySymbols(o).includes(h0)?o[h0]:o,required:!!r,validator:l,[X$]:!0};return Mt(e,"default")&&(s.default=a),s}const Ze=e=>Y8(Object.entries(e).map(([t,n])=>[t,ho(n,t)])),Le=e=>({[h0]:e}),wr=Le([String,Object,Function]),KX={Close:Ma},Vm={Close:Ma,SuccessFilled:dE,InfoFilled:LC,WarningFilled:Wd,CircleCloseFilled:RC},yi={success:dE,warning:Wd,error:RC,info:LC},hE={validating:gl,success:mv,error:gi},xt=(e,t)=>{if(e.install=n=>{for(const r of[e,...Object.values(t!=null?t:{})])n.component(r.name,r)},t)for(const[n,r]of Object.entries(t))e[n]=r;return e},pE=(e,t)=>(e.install=n=>{e._context=n._context,n.config.globalProperties[t]=e},e),En=e=>(e.install=Qt,e),FC=(...e)=>t=>{e.forEach(n=>{Ct(n)?n(t):n.value=t})};class WX extends Error{constructor(t){super(t),this.name="ElementPlusError"}}function qn(e,t){throw new WX(`[${e}] ${t}`)}function oo(e,t="px"){if(!e)return"";if(wt(e))return e;if(Yt(e))return`${e}${t}`}const Ge={tab:"Tab",enter:"Enter",space:"Space",left:"ArrowLeft",up:"ArrowUp",right:"ArrowRight",down:"ArrowDown",esc:"Escape",delete:"Delete",backspace:"Backspace",numpadEnter:"NumpadEnter",pageUp:"PageUp",pageDown:"PageDown",home:"Home",end:"End"},UX=["year","month","date","dates","week","datetime","datetimerange","daterange","monthrange"],ay=["sun","mon","tue","wed","thu","fri","sat"],Pt="update:modelValue",ir="change",cl="input",Bo=["","default","small","large"],YX={large:40,default:32,small:24},vE=(e="default")=>YX[e||"default"],va=e=>["",...Bo].includes(e),mE=e=>[...UX].includes(e);var ka=(e=>(e[e.TEXT=1]="TEXT",e[e.CLASS=2]="CLASS",e[e.STYLE=4]="STYLE",e[e.PROPS=8]="PROPS",e[e.FULL_PROPS=16]="FULL_PROPS",e[e.HYDRATE_EVENTS=32]="HYDRATE_EVENTS",e[e.STABLE_FRAGMENT=64]="STABLE_FRAGMENT",e[e.KEYED_FRAGMENT=128]="KEYED_FRAGMENT",e[e.UNKEYED_FRAGMENT=256]="UNKEYED_FRAGMENT",e[e.NEED_PATCH=512]="NEED_PATCH",e[e.DYNAMIC_SLOTS=1024]="DYNAMIC_SLOTS",e[e.HOISTED=-1]="HOISTED",e[e.BAIL=-2]="BAIL",e))(ka||{});function gE(e){return rn(e)&&e.type===Fe}function qX(e){return rn(e)&&e.type===Ir}function GX(e){return rn(e)&&!gE(e)&&!qX(e)}const XX=e=>{if(!rn(e))return{};const t=e.props||{},n=(rn(e.type)?e.type.props:void 0)||{},r={};return Object.keys(n).forEach(a=>{Mt(n[a],"default")&&(r[a]=n[a].default)}),Object.keys(t).forEach(a=>{r[_a(a)]=t[a]}),r},ZX=e=>{if(!pt(e)||e.length>1)throw new Error("expect to receive a single Vue element child");return e[0]},Z$=e=>e**3,JX=e=>e<.5?Z$(e*2)/2:1-Z$((1-e)*2)/2,J$=e=>[...new Set(e)],rs=e=>!e&&e!==0?[]:Array.isArray(e)?e:[e],BC=()=>Bt&&/firefox/i.test(window.navigator.userAgent),zm=e=>/([(\uAC00-\uD7AF)|(\u3130-\u318F)])+/gi.test(e),yE=e=>Bt?window.requestAnimationFrame(e):setTimeout(e,16),bE=e=>Bt?window.cancelAnimationFrame(e):clearTimeout(e),Pf=()=>Math.floor(Math.random()*1e4),xn=e=>e,QX=["class","style"],eZ=/^on[A-Z]/,VC=(e={})=>{const{excludeListeners:t=!1,excludeKeys:n=[]}=e,r=n.concat(QX),a=$t();return x(a?()=>{var o;return Y8(Object.entries((o=a.proxy)==null?void 0:o.$attrs).filter(([i])=>!r.includes(i)&&!(t&&eZ.test(i))))}:()=>({}))},CE=Symbol("breadcrumbKey"),wE=Symbol("buttonGroupContextKey"),SE=Symbol("carouselContextKey"),kE=Symbol("collapseContextKey"),$E=Symbol(),OE=Symbol("dialogInjectionKey"),ga=Symbol("formContextKey"),Ia=Symbol("formItemContextKey"),PE=Symbol("elPaginationKey"),TE=Symbol("radioGroupKey"),xE=Symbol("rowContextKey"),_E=Symbol("scrollbarContextKey"),Hm=Symbol("tabsRootContextKey"),EE=Symbol("uploadContextKey"),zC=Symbol("popper"),ME=Symbol("popperContent"),jm=Symbol("tooltipV2"),IE=Symbol("tooltipV2Content"),oy="tooltip_v2.open",NE=e=>{const t=$t();return x(()=>{var n,r;return(r=(n=t.proxy)==null?void 0:n.$props[e])!=null?r:void 0})},gv=H();function kc(e,t=void 0){const n=$t()?ve($E,gv):gv;return e?x(()=>{var r,a;return(a=(r=n.value)==null?void 0:r[e])!=null?a:t}):n}const AE=(e,t,n=!1)=>{var r;const a=!!$t(),o=a?kc():void 0,i=(r=t==null?void 0:t.provide)!=null?r:a?ot:void 0;if(!i)return;const l=x(()=>{const s=A(e);return o!=null&&o.value?tZ(o.value,s):s});return i($E,l),(n||!gv.value)&&(gv.value=l.value),l},tZ=(e,t)=>{var n;const r=[...new Set([...f0(e),...f0(t)])],a={};for(const o of r)a[o]=(n=t[o])!=null?n:e[o];return a},Km=ho({type:String,values:Bo,required:!1}),Gn=(e,t={})=>{const n=H(void 0),r=t.prop?n:NE("size"),a=t.global?n:kc("size"),o=t.form?{size:void 0}:ve(ga,void 0),i=t.formItem?{size:void 0}:ve(Ia,void 0);return x(()=>r.value||A(e)||(i==null?void 0:i.size)||(o==null?void 0:o.size)||a.value||"")},Ms=e=>{const t=NE("disabled"),n=ve(ga,void 0);return x(()=>t.value||A(e)||(n==null?void 0:n.disabled)||!1)},Tf=({from:e,replacement:t,scope:n,version:r,ref:a,type:o="API"},i)=>{ce(()=>A(i),l=>{},{immediate:!0})},DE=(e,t,n)=>{let r={offsetX:0,offsetY:0};const a=l=>{const s=l.clientX,c=l.clientY,{offsetX:d,offsetY:f}=r,p=e.value.getBoundingClientRect(),v=p.left,m=p.top,y=p.width,b=p.height,C=document.documentElement.clientWidth,S=document.documentElement.clientHeight,w=-v+d,k=-m+f,$=C-v-y+d,O=S-m-b+f,T=I=>{const L=Math.min(Math.max(d+I.clientX-s,w),$),j=Math.min(Math.max(f+I.clientY-c,k),O);r={offsetX:L,offsetY:j},e.value.style.transform=`translate(${oo(L)}, ${oo(j)})`},_=()=>{document.removeEventListener("mousemove",T),document.removeEventListener("mouseup",_)};document.addEventListener("mousemove",T),document.addEventListener("mouseup",_)},o=()=>{t.value&&e.value&&t.value.addEventListener("mousedown",a)},i=()=>{t.value&&e.value&&t.value.removeEventListener("mousedown",a)};et(()=>{Wn(()=>{n.value?o():i()})}),Lt(()=>{i()})},nZ=e=>({focus:()=>{var t,n;(n=(t=e.value)==null?void 0:t.focus)==null||n.call(t)}}),$c=()=>{const e=ve(ga,void 0),t=ve(Ia,void 0);return{form:e,formItem:t}};var rZ={name:"en",el:{colorpicker:{confirm:"OK",clear:"Clear"},datepicker:{now:"Now",today:"Today",cancel:"Cancel",clear:"Clear",confirm:"OK",selectDate:"Select date",selectTime:"Select time",startDate:"Start Date",startTime:"Start Time",endDate:"End Date",endTime:"End Time",prevYear:"Previous Year",nextYear:"Next Year",prevMonth:"Previous Month",nextMonth:"Next Month",year:"",month1:"January",month2:"February",month3:"March",month4:"April",month5:"May",month6:"June",month7:"July",month8:"August",month9:"September",month10:"October",month11:"November",month12:"December",week:"week",weeks:{sun:"Sun",mon:"Mon",tue:"Tue",wed:"Wed",thu:"Thu",fri:"Fri",sat:"Sat"},months:{jan:"Jan",feb:"Feb",mar:"Mar",apr:"Apr",may:"May",jun:"Jun",jul:"Jul",aug:"Aug",sep:"Sep",oct:"Oct",nov:"Nov",dec:"Dec"}},select:{loading:"Loading",noMatch:"No matching data",noData:"No data",placeholder:"Select"},cascader:{noMatch:"No matching data",loading:"Loading",placeholder:"Select",noData:"No data"},pagination:{goto:"Go to",pagesize:"/page",total:"Total {total}",pageClassifier:"",deprecationWarning:"Deprecated usages detected, please refer to the el-pagination documentation for more details"},messagebox:{title:"Message",confirm:"OK",cancel:"Cancel",error:"Illegal input"},upload:{deleteTip:"press delete to remove",delete:"Delete",preview:"Preview",continue:"Continue"},table:{emptyText:"No Data",confirmFilter:"Confirm",resetFilter:"Reset",clearFilter:"All",sumText:"Sum"},tree:{emptyText:"No Data"},transfer:{noMatch:"No matching data",noData:"No data",titles:["List 1","List 2"],filterPlaceholder:"Enter keyword",noCheckedFormat:"{total} items",hasCheckedFormat:"{checked}/{total} checked"},image:{error:"FAILED"},pageHeader:{title:"Back"},popconfirm:{confirmButtonText:"Yes",cancelButtonText:"No"}}};const aZ=e=>(t,n)=>oZ(t,n,A(e)),oZ=(e,t,n)=>vn(n,e,e).replace(/\{(\w+)\}/g,(r,a)=>{var o;return`${(o=t==null?void 0:t[a])!=null?o:`{${a}}`}`}),iZ=e=>{const t=x(()=>A(e).name),n=_n(e)?e:H(e);return{lang:t,locale:n,t:aZ(e)}},ln=()=>{const e=kc("locale");return iZ(x(()=>e.value||rZ))},RE=e=>{if(_n(e)||qn("[useLockscreen]","You need to pass a ref param to this function"),!Bt||to(document.body,"el-popup-parent--hidden"))return;let t=0,n=!1,r="0",a=0;const o=()=>{Br(document.body,"el-popup-parent--hidden"),n&&(document.body.style.paddingRight=r)};ce(e,i=>{if(!i){o();return}n=!to(document.body,"el-popup-parent--hidden"),n&&(r=document.body.style.paddingRight,a=Number.parseInt($o(document.body,"paddingRight"),10)),t=NU();const l=document.documentElement.clientHeight<document.body.scrollHeight,s=$o(document.body,"overflowY");t>0&&(l||s==="scroll")&&n&&(document.body.style.paddingRight=`${a+t}px`),xo(document.body,"el-popup-parent--hidden")}),Z_(()=>o())},Tu=[],lZ=e=>{Tu.length!==0&&e.code===Ge.esc&&(e.stopPropagation(),Tu[Tu.length-1].handleClose())},LE=(e,t)=>{ce(t,n=>{n?Tu.push(e):Tu.splice(Tu.indexOf(e),1)})};Bt&&Hn(document,"keydown",lZ);const sZ=ho({type:Le(Boolean),default:null}),uZ=ho({type:Le(Function)}),cZ=e=>{const t={[e]:sZ,[`onUpdate:${e}`]:uZ},n=[`update:${e}`];return{useModelToggle:({indicator:a,shouldHideWhenRouteChanges:o,shouldProceed:i,onShow:l,onHide:s})=>{const c=$t(),d=c.props,{emit:f}=c,p=`update:${e}`,v=x(()=>Ct(d[`onUpdate:${e}`])),m=x(()=>d[e]===null),y=()=>{a.value!==!0&&(a.value=!0,Ct(l)&&l())},b=()=>{a.value!==!1&&(a.value=!1,Ct(s)&&s())},C=()=>{if(d.disabled===!0||Ct(i)&&!i())return;const $=v.value&&Bt;$&&f(p,!0),(m.value||!$)&&y()},S=()=>{if(d.disabled===!0||!Bt)return;const $=v.value&&Bt;$&&f(p,!1),(m.value||!$)&&b()},w=$=>{!yr($)||(d.disabled&&$?v.value&&f(p,!1):a.value!==$&&($?y():b()))},k=()=>{a.value?S():C()};return ce(()=>d[e],w),o&&c.appContext.config.globalProperties.$route!==void 0&&ce(()=>Te({},c.proxy.$route),()=>{o.value&&a.value&&S()}),et(()=>{w(d[e])}),{hide:S,show:C,toggle:k}},useModelToggleProps:t,useModelToggleEmits:n}},dZ=(e,t,n)=>{const r=o=>{n(o)&&o.stopImmediatePropagation()};let a;ce(()=>e.value,o=>{o?a=Hn(document,t,r,!0):a==null||a()},{immediate:!0})},FE=(e,t)=>{let n;ce(()=>e.value,r=>{var a,o;r?(n=document.activeElement,_n(t)&&((o=(a=t.value).focus)==null||o.call(a))):n.focus()})},HC=e=>{if(!e)return{onClick:Qt,onMousedown:Qt,onMouseup:Qt};let t=!1,n=!1;return{onClick:i=>{t&&n&&e(i),t=n=!1},onMousedown:i=>{t=i.target===i.currentTarget},onMouseup:i=>{n=i.target===i.currentTarget}}},fZ=(e,t=0)=>{if(t===0)return e;const n=H(!1);let r=0;const a=()=>{r&&clearTimeout(r),r=window.setTimeout(()=>{n.value=e.value},t)};return et(a),ce(()=>e.value,o=>{o?a():n.value=o}),n};function hZ(){let e;const t=(r,a)=>{n(),e=window.setTimeout(r,a)},n=()=>window.clearTimeout(e);return Lm(()=>n()),{registerTimeout:t,cancelTimeout:n}}const pZ={prefix:Math.floor(Math.random()*1e4),current:0},vZ=Symbol("elIdInjection"),jC=e=>{const t=ve(vZ,pZ);return x(()=>A(e)||`el-id-${t.prefix}-${t.current++}`)},mZ=e=>{const t=n=>{const r=n;r.key===Ge.esc&&(e==null||e(r))};et(()=>{gn(document,"keydown",t)}),Lt(()=>{Bn(document,"keydown",t)})};let Q$;const BE=`el-popper-container-${Pf()}`,VE=`#${BE}`,gZ=()=>{const e=document.createElement("div");return e.id=BE,document.body.appendChild(e),e},yZ=()=>{hc(()=>{!Bt||(!Q$||!document.body.querySelector(VE))&&(Q$=gZ())})},bZ=Ze({showAfter:{type:Number,default:0},hideAfter:{type:Number,default:200}}),CZ=({showAfter:e,hideAfter:t,open:n,close:r})=>{const{registerTimeout:a}=hZ();return{onOpen:()=>{a(()=>{n()},A(e))},onClose:()=>{a(()=>{r()},A(t))}}},zE=Symbol("elForwardRef"),wZ=e=>{ot(zE,{setForwardRef:n=>{e.value=n}})},SZ=e=>({mounted(t){e(t)},updated(t){e(t)},unmounted(){e(null)}}),kZ="el",$Z="is-",kl=(e,t,n,r,a)=>{let o=`${e}-${t}`;return n&&(o+=`-${n}`),r&&(o+=`__${r}`),a&&(o+=`--${a}`),o},De=e=>{const t=kc("namespace"),n=x(()=>t.value||kZ);return{namespace:n,b:(f="")=>kl(A(n),e,f,"",""),e:f=>f?kl(A(n),e,"",f,""):"",m:f=>f?kl(A(n),e,"","",f):"",be:(f,p)=>f&&p?kl(A(n),e,f,p,""):"",em:(f,p)=>f&&p?kl(A(n),e,"",f,p):"",bm:(f,p)=>f&&p?kl(A(n),e,f,"",p):"",bem:(f,p,v)=>f&&p&&v?kl(A(n),e,f,p,v):"",is:(f,...p)=>{const v=p.length>=1?p[0]:!0;return f&&v?`${$Z}${f}`:""}}},e4=H(0),Pi=()=>{const e=kc("zIndex",2e3),t=x(()=>e.value+e4.value);return{initialZIndex:e,currentZIndex:t,nextZIndex:()=>(e4.value++,t.value)}};function KC(e){return e.split("-")[0]}function HE(e){return e.split("-")[1]}function WC(e){return["top","bottom"].includes(KC(e))?"x":"y"}function jE(e){return e==="y"?"height":"width"}function t4(e,t,n){let{reference:r,floating:a}=e;const o=r.x+r.width/2-a.width/2,i=r.y+r.height/2-a.height/2,l=WC(t),s=jE(l),c=r[s]/2-a[s]/2,d=l==="x";let f;switch(KC(t)){case"top":f={x:o,y:r.y-a.height};break;case"bottom":f={x:o,y:r.y+r.height};break;case"right":f={x:r.x+r.width,y:i};break;case"left":f={x:r.x-a.width,y:i};break;default:f={x:r.x,y:r.y}}switch(HE(t)){case"start":f[l]-=c*(n&&d?-1:1);break;case"end":f[l]+=c*(n&&d?-1:1)}return f}const OZ=async(e,t,n)=>{const{placement:r="bottom",strategy:a="absolute",middleware:o=[],platform:i}=n,l=await(i.isRTL==null?void 0:i.isRTL(t));let s=await i.getElementRects({reference:e,floating:t,strategy:a}),{x:c,y:d}=t4(s,r,l),f=r,p={};for(let v=0;v<o.length;v++){const{name:m,fn:y}=o[v],{x:b,y:C,data:S,reset:w}=await y({x:c,y:d,initialPlacement:r,placement:f,strategy:a,middlewareData:p,rects:s,platform:i,elements:{reference:e,floating:t}});c=b!=null?b:c,d=C!=null?C:d,p=Ke(Te({},p),{[m]:Te(Te({},p[m]),S)}),w&&(typeof w=="object"&&(w.placement&&(f=w.placement),w.rects&&(s=w.rects===!0?await i.getElementRects({reference:e,floating:t,strategy:a}):w.rects),{x:c,y:d}=t4(s,f,l)),v=-1)}return{x:c,y:d,placement:f,strategy:a,middlewareData:p}};function PZ(e){return typeof e!="number"?function(t){return Te({top:0,right:0,bottom:0,left:0},t)}(e):{top:e,right:e,bottom:e,left:e}}function n4(e){return Ke(Te({},e),{top:e.y,left:e.x,right:e.x+e.width,bottom:e.y+e.height})}const TZ=Math.min,xZ=Math.max;function _Z(e,t,n){return xZ(e,TZ(t,n))}const EZ=e=>({name:"arrow",options:e,async fn(t){const{element:n,padding:r=0}=e!=null?e:{},{x:a,y:o,placement:i,rects:l,platform:s}=t;if(n==null)return{};const c=PZ(r),d={x:a,y:o},f=WC(i),p=jE(f),v=await s.getDimensions(n),m=f==="y"?"top":"left",y=f==="y"?"bottom":"right",b=l.reference[p]+l.reference[f]-d[f]-l.floating[p],C=d[f]-l.reference[f],S=await(s.getOffsetParent==null?void 0:s.getOffsetParent(n)),w=S?f==="y"?S.clientHeight||0:S.clientWidth||0:0,k=b/2-C/2,$=c[m],O=w-v[p]-c[y],T=w/2-v[p]/2+k,_=_Z($,T,O);return{data:{[f]:_,centerOffset:T-_}}}}),MZ=["top","right","bottom","left"];MZ.reduce((e,t)=>e.concat(t,t+"-start",t+"-end"),[]);const IZ=function(e){return e===void 0&&(e=0),{name:"offset",options:e,async fn(t){const{x:n,y:r,placement:a,rects:o,platform:i,elements:l}=t,s=function(c,d,f,p){p===void 0&&(p=!1);const v=KC(c),m=HE(c),y=WC(c)==="x",b=["left","top"].includes(v)?-1:1,C=p&&y?-1:1,S=typeof f=="function"?f(Ke(Te({},d),{placement:c})):f,w=typeof S=="number";let{mainAxis:k,crossAxis:$,alignmentAxis:O}=w?{mainAxis:S,crossAxis:0,alignmentAxis:null}:Te({mainAxis:0,crossAxis:0,alignmentAxis:null},S);return m&&typeof O=="number"&&($=m==="end"?-1*O:O),y?{x:$*C,y:k*b}:{x:k*b,y:$*C}}(a,o,e,await(i.isRTL==null?void 0:i.isRTL(l.floating)));return{x:n+s.x,y:r+s.y,data:s}}}};function KE(e){return e&&e.document&&e.location&&e.alert&&e.setInterval}function Ti(e){if(e==null)return window;if(!KE(e)){const t=e.ownerDocument;return t&&t.defaultView||window}return e}function xf(e){return Ti(e).getComputedStyle(e)}function di(e){return KE(e)?"":e?(e.nodeName||"").toLowerCase():""}function No(e){return e instanceof Ti(e).HTMLElement}function Uu(e){return e instanceof Ti(e).Element}function UC(e){return e instanceof Ti(e).ShadowRoot||e instanceof ShadowRoot}function Wm(e){const{overflow:t,overflowX:n,overflowY:r}=xf(e);return/auto|scroll|overlay|hidden/.test(t+r+n)}function NZ(e){return["table","td","th"].includes(di(e))}function r4(e){const t=navigator.userAgent.toLowerCase().includes("firefox"),n=xf(e);return n.transform!=="none"||n.perspective!=="none"||n.contain==="paint"||["transform","perspective"].includes(n.willChange)||t&&n.willChange==="filter"||t&&!!n.filter&&n.filter!=="none"}function WE(){return!/^((?!chrome|android).)*safari/i.test(navigator.userAgent)}const a4=Math.min,vd=Math.max,yv=Math.round;function as(e,t,n){var r,a,o,i;t===void 0&&(t=!1),n===void 0&&(n=!1);const l=e.getBoundingClientRect();let s=1,c=1;t&&No(e)&&(s=e.offsetWidth>0&&yv(l.width)/e.offsetWidth||1,c=e.offsetHeight>0&&yv(l.height)/e.offsetHeight||1);const d=Uu(e)?Ti(e):window,f=!WE()&&n,p=(l.left+(f&&(r=(a=d.visualViewport)==null?void 0:a.offsetLeft)!=null?r:0))/s,v=(l.top+(f&&(o=(i=d.visualViewport)==null?void 0:i.offsetTop)!=null?o:0))/c,m=l.width/s,y=l.height/c;return{width:m,height:y,top:v,right:p+m,bottom:v+y,left:p,x:p,y:v}}function el(e){return(t=e,(t instanceof Ti(t).Node?e.ownerDocument:e.document)||window.document).documentElement;var t}function Um(e){return Uu(e)?{scrollLeft:e.scrollLeft,scrollTop:e.scrollTop}:{scrollLeft:e.pageXOffset,scrollTop:e.pageYOffset}}function UE(e){return as(el(e)).left+Um(e).scrollLeft}function AZ(e,t,n){const r=No(t),a=el(t),o=as(e,r&&function(s){const c=as(s);return yv(c.width)!==s.offsetWidth||yv(c.height)!==s.offsetHeight}(t),n==="fixed");let i={scrollLeft:0,scrollTop:0};const l={x:0,y:0};if(r||!r&&n!=="fixed")if((di(t)!=="body"||Wm(a))&&(i=Um(t)),No(t)){const s=as(t,!0);l.x=s.x+t.clientLeft,l.y=s.y+t.clientTop}else a&&(l.x=UE(a));return{x:o.left+i.scrollLeft-l.x,y:o.top+i.scrollTop-l.y,width:o.width,height:o.height}}function YE(e){return di(e)==="html"?e:e.assignedSlot||e.parentNode||(UC(e)?e.host:null)||el(e)}function o4(e){return No(e)&&getComputedStyle(e).position!=="fixed"?e.offsetParent:null}function p0(e){const t=Ti(e);let n=o4(e);for(;n&&NZ(n)&&getComputedStyle(n).position==="static";)n=o4(n);return n&&(di(n)==="html"||di(n)==="body"&&getComputedStyle(n).position==="static"&&!r4(n))?t:n||function(r){let a=YE(r);for(UC(a)&&(a=a.host);No(a)&&!["html","body"].includes(di(a));){if(r4(a))return a;a=a.parentNode}return null}(e)||t}function i4(e){if(No(e))return{width:e.offsetWidth,height:e.offsetHeight};const t=as(e);return{width:t.width,height:t.height}}function qE(e){const t=YE(e);return["html","body","#document"].includes(di(t))?e.ownerDocument.body:No(t)&&Wm(t)?t:qE(t)}function GE(e,t){var n;t===void 0&&(t=[]);const r=qE(e),a=r===((n=e.ownerDocument)==null?void 0:n.body),o=Ti(r),i=a?[o].concat(o.visualViewport||[],Wm(r)?r:[]):r,l=t.concat(i);return a?l:l.concat(GE(i))}function l4(e,t,n){return t==="viewport"?n4(function(r,a){const o=Ti(r),i=el(r),l=o.visualViewport;let s=i.clientWidth,c=i.clientHeight,d=0,f=0;if(l){s=l.width,c=l.height;const p=WE();(p||!p&&a==="fixed")&&(d=l.offsetLeft,f=l.offsetTop)}return{width:s,height:c,x:d,y:f}}(e,n)):Uu(t)?function(r,a){const o=as(r,!1,a==="fixed"),i=o.top+r.clientTop,l=o.left+r.clientLeft;return{top:i,left:l,x:l,y:i,right:l+r.clientWidth,bottom:i+r.clientHeight,width:r.clientWidth,height:r.clientHeight}}(t,n):n4(function(r){var a;const o=el(r),i=Um(r),l=(a=r.ownerDocument)==null?void 0:a.body,s=vd(o.scrollWidth,o.clientWidth,l?l.scrollWidth:0,l?l.clientWidth:0),c=vd(o.scrollHeight,o.clientHeight,l?l.scrollHeight:0,l?l.clientHeight:0);let d=-i.scrollLeft+UE(r);const f=-i.scrollTop;return xf(l||o).direction==="rtl"&&(d+=vd(o.clientWidth,l?l.clientWidth:0)-s),{width:s,height:c,x:d,y:f}}(el(e)))}function DZ(e){const t=GE(e),n=["absolute","fixed"].includes(xf(e).position)&&No(e)?p0(e):e;return Uu(n)?t.filter(r=>Uu(r)&&function(a,o){const i=o==null||o.getRootNode==null?void 0:o.getRootNode();if(a!=null&&a.contains(o))return!0;if(i&&UC(i)){let l=o;do{if(l&&a===l)return!0;l=l.parentNode||l.host}while(l)}return!1}(r,n)&&di(r)!=="body"):[]}const RZ={getClippingRect:function(e){let{element:t,boundary:n,rootBoundary:r,strategy:a}=e;const o=[...n==="clippingAncestors"?DZ(t):[].concat(n),r],i=o[0],l=o.reduce((s,c)=>{const d=l4(t,c,a);return s.top=vd(d.top,s.top),s.right=a4(d.right,s.right),s.bottom=a4(d.bottom,s.bottom),s.left=vd(d.left,s.left),s},l4(t,i,a));return{width:l.right-l.left,height:l.bottom-l.top,x:l.left,y:l.top}},convertOffsetParentRelativeRectToViewportRelativeRect:function(e){let{rect:t,offsetParent:n,strategy:r}=e;const a=No(n),o=el(n);if(n===o)return t;let i={scrollLeft:0,scrollTop:0};const l={x:0,y:0};if((a||!a&&r!=="fixed")&&((di(n)!=="body"||Wm(o))&&(i=Um(n)),No(n))){const s=as(n,!0);l.x=s.x+n.clientLeft,l.y=s.y+n.clientTop}return Ke(Te({},t),{x:t.x-i.scrollLeft+l.x,y:t.y-i.scrollTop+l.y})},isElement:Uu,getDimensions:i4,getOffsetParent:p0,getDocumentElement:el,getElementRects:e=>{let{reference:t,floating:n,strategy:r}=e;return{reference:AZ(t,p0(n),r),floating:Ke(Te({},i4(n)),{x:0,y:0})}},getClientRects:e=>Array.from(e.getClientRects()),isRTL:e=>xf(e).direction==="rtl"},LZ=(e,t,n)=>OZ(e,t,Te({platform:RZ},n));Ze({});const FZ=e=>{if(!Bt)return;if(!e)return e;const t=eo(e);return t||(_n(e)?t:e)},BZ=({middleware:e,placement:t,strategy:n})=>{const r=H(),a=H(),o=H(),i=H(),l=H({}),s={x:o,y:i,placement:t,strategy:n,middlewareData:l},c=async()=>{if(!Bt)return;const d=FZ(r),f=eo(a);if(!d||!f)return;const p=await LZ(d,f,{placement:A(t),strategy:A(n),middleware:A(e)});Object.keys(s).forEach(v=>{s[v].value=p[v]})};return et(()=>{Wn(()=>{c()})}),Ke(Te({},s),{update:c,referenceRef:r,contentRef:a})},VZ=({arrowRef:e,padding:t})=>({name:"arrow",options:{element:e,padding:t},fn(n){const r=A(e);return r?EZ({element:r,padding:t}).fn(n):{}}}),zZ="2.1.9",s4=Symbol("INSTALLED_KEY"),HZ=(e=[])=>({version:zZ,install:(n,r)=>{n[s4]||(n[s4]=!0,e.forEach(a=>n.use(a)),r&&AE(r,n,!0))}}),jZ=Ze({zIndex:{type:Le([Number,String]),default:100},target:{type:String,default:""},offset:{type:Number,default:0},position:{type:String,values:["top","bottom"],default:"top"}}),KZ={scroll:({scrollTop:e,fixed:t})=>typeof e=="number"&&typeof t=="boolean",change:e=>typeof e=="boolean"};var Ae=(e,t)=>{const n=e.__vccOpts||e;for(const[r,a]of t)n[r]=a;return n};const WZ={name:"ElAffix"},UZ=G(Ke(Te({},WZ),{props:jZ,emits:KZ,setup(e,{expose:t,emit:n}){const r=e,a="ElAffix",o=De("affix"),i=Qn(),l=Qn(),s=Qn(),{height:c}=OU(),{height:d,width:f,top:p,bottom:v,update:m}=U$(l),y=U$(i),b=H(!1),C=H(0),S=H(0),w=x(()=>({height:b.value?`${d.value}px`:"",width:b.value?`${f.value}px`:""})),k=x(()=>{if(!b.value)return{};const T=r.offset?`${r.offset}px`:0;return{height:`${d.value}px`,width:`${f.value}px`,top:r.position==="top"?T:"",bottom:r.position==="bottom"?T:"",transform:S.value?`translateY(${S.value}px)`:"",zIndex:r.zIndex}}),$=()=>{if(!!s.value)if(C.value=s.value instanceof Window?document.documentElement.scrollTop:s.value.scrollTop||0,r.position==="top")if(r.target){const T=y.bottom.value-r.offset-d.value;b.value=r.offset>p.value&&y.bottom.value>0,S.value=T<0?T:0}else b.value=r.offset>p.value;else if(r.target){const T=c.value-y.top.value-r.offset-d.value;b.value=c.value-r.offset<v.value&&c.value>y.top.value,S.value=T<0?-T:0}else b.value=c.value-r.offset<v.value},O=()=>{n("scroll",{scrollTop:C.value,fixed:b.value})};return ce(b,T=>n("change",T)),et(()=>{var T;r.target?(i.value=(T=document.querySelector(r.target))!=null?T:void 0,i.value||qn(a,`Target is not existed: ${r.target}`)):i.value=document.documentElement,s.value=DC(l.value,!0),m()}),Hn(s,"scroll",O),Wn($),t({update:$}),(T,_)=>(R(),X("div",{ref_key:"root",ref:l,class:U(A(o).b()),style:Xe(A(w))},[Z("div",{class:U({[A(o).m("fixed")]:b.value}),style:Xe(A(k))},[Oe(T.$slots,"default")],6)],6))}}));var YZ=Ae(UZ,[["__file","/home/runner/work/element-plus/element-plus/packages/components/affix/src/affix.vue"]]);const qZ=xt(YZ),GZ=Ze({size:{type:Le([Number,String])},color:{type:String}}),XZ={name:"ElIcon",inheritAttrs:!1},ZZ=G(Ke(Te({},XZ),{props:GZ,setup(e){const t=e,n=De("icon"),r=x(()=>!t.size&&!t.color?{}:{fontSize:sa(t.size)?void 0:oo(t.size),"--color":t.color});return(a,o)=>(R(),X("i",hn({class:A(n).b(),style:A(r)},a.$attrs),[Oe(a.$slots,"default")],16))}}));var JZ=Ae(ZZ,[["__file","/home/runner/work/element-plus/element-plus/packages/components/icon/src/icon.vue"]]);const ft=xt(JZ),QZ=["light","dark"],eJ=Ze({title:{type:String,default:""},description:{type:String,default:""},type:{type:String,values:f0(yi),default:"info"},closable:{type:Boolean,default:!0},closeText:{type:String,default:""},showIcon:Boolean,center:Boolean,effect:{type:String,values:QZ,default:"light"}}),tJ={close:e=>e instanceof MouseEvent},nJ={name:"ElAlert"},rJ=G(Ke(Te({},nJ),{props:eJ,emits:tJ,setup(e,{emit:t}){const n=e,{Close:r}=Vm,a=wf(),o=De("alert"),i=H(!0),l=x(()=>yi[n.type]||yi.info),s=x(()=>n.description||{[o.is("big")]:a.default}),c=x(()=>n.description||{[o.is("bold")]:a.default}),d=f=>{i.value=!1,t("close",f)};return(f,p)=>(R(),fe(Vn,{name:A(o).b("fade")},{default:re(()=>[at(Z("div",{class:U([A(o).b(),A(o).m(f.type),A(o).is("center",f.center),A(o).is(f.effect)]),role:"alert"},[f.showIcon&&A(l)?(R(),fe(A(ft),{key:0,class:U([A(o).e("icon"),A(s)])},{default:re(()=>[(R(),fe(Kt(A(l))))]),_:1},8,["class"])):se("v-if",!0),Z("div",{class:U(A(o).e("content"))},[f.title||f.$slots.title?(R(),X("span",{key:0,class:U([A(o).e("title"),A(c)])},[Oe(f.$slots,"title",{},()=>[yt(Me(f.title),1)])],2)):se("v-if",!0),f.$slots.default||f.description?(R(),X("p",{key:1,class:U(A(o).e("description"))},[Oe(f.$slots,"default",{},()=>[yt(Me(f.description),1)])],2)):se("v-if",!0),f.closable?(R(),X(Fe,{key:2},[f.closeText?(R(),X("div",{key:0,class:U([A(o).e("close-btn"),A(o).is("customed")]),onClick:d},Me(f.closeText),3)):(R(),fe(A(ft),{key:1,class:U(A(o).e("close-btn")),onClick:d},{default:re(()=>[g(A(r))]),_:1},8,["class"]))],2112)):se("v-if",!0)],2)],2),[[_t,i.value]])]),_:3},8,["name"]))}}));var aJ=Ae(rJ,[["__file","/home/runner/work/element-plus/element-plus/packages/components/alert/src/alert.vue"]]);const oJ=xt(aJ);let Ha;const iJ=`
  height:0 !important;
  visibility:hidden !important;
  overflow:hidden !important;
  position:absolute !important;
  z-index:-1000 !important;
  top:0 !important;
  right:0 !important;
`,lJ=["letter-spacing","line-height","padding-top","padding-bottom","font-family","font-weight","font-size","text-rendering","text-transform","width","text-indent","padding-left","padding-right","border-width","box-sizing"];function sJ(e){const t=window.getComputedStyle(e),n=t.getPropertyValue("box-sizing"),r=Number.parseFloat(t.getPropertyValue("padding-bottom"))+Number.parseFloat(t.getPropertyValue("padding-top")),a=Number.parseFloat(t.getPropertyValue("border-bottom-width"))+Number.parseFloat(t.getPropertyValue("border-top-width"));return{contextStyle:lJ.map(i=>`${i}:${t.getPropertyValue(i)}`).join(";"),paddingSize:r,borderSize:a,boxSizing:n}}function u4(e,t=1,n){var r;Ha||(Ha=document.createElement("textarea"),document.body.appendChild(Ha));const{paddingSize:a,borderSize:o,boxSizing:i,contextStyle:l}=sJ(e);Ha.setAttribute("style",`${l};${iJ}`),Ha.value=e.value||e.placeholder||"";let s=Ha.scrollHeight;const c={};i==="border-box"?s=s+o:i==="content-box"&&(s=s-a),Ha.value="";const d=Ha.scrollHeight-a;if(Yt(t)){let f=d*t;i==="border-box"&&(f=f+a+o),s=Math.max(f,s),c.minHeight=`${f}px`}if(Yt(n)){let f=d*n;i==="border-box"&&(f=f+a+o),s=Math.min(f,s)}return c.height=`${s}px`,(r=Ha.parentNode)==null||r.removeChild(Ha),Ha=void 0,c}const uJ=Ze({size:Km,disabled:Boolean,modelValue:{type:Le([String,Number,Object]),default:""},type:{type:String,default:"text"},resize:{type:String,values:["none","both","horizontal","vertical"]},autosize:{type:Le([Boolean,Object]),default:!1},autocomplete:{type:String,default:"off"},placeholder:{type:String},form:{type:String,default:""},readonly:{type:Boolean,default:!1},clearable:{type:Boolean,default:!1},showPassword:{type:Boolean,default:!1},showWordLimit:{type:Boolean,default:!1},suffixIcon:{type:wr,default:""},prefixIcon:{type:wr,default:""},label:{type:String},tabindex:{type:[Number,String]},validateEvent:{type:Boolean,default:!0},inputStyle:{type:Le([Object,Array,String]),default:()=>xn({})}}),cJ={[Pt]:e=>wt(e),input:e=>wt(e),change:e=>wt(e),focus:e=>e instanceof FocusEvent,blur:e=>e instanceof FocusEvent,clear:()=>!0,mouseleave:e=>e instanceof MouseEvent,mouseenter:e=>e instanceof MouseEvent,keydown:e=>e instanceof Event,compositionstart:e=>e instanceof CompositionEvent,compositionupdate:e=>e instanceof CompositionEvent,compositionend:e=>e instanceof CompositionEvent},dJ=["type","disabled","readonly","autocomplete","tabindex","aria-label","placeholder"],fJ=["tabindex","disabled","readonly","autocomplete","aria-label","placeholder"],hJ={name:"ElInput",inheritAttrs:!1},pJ=G(Ke(Te({},hJ),{props:uJ,emits:cJ,setup(e,{expose:t,emit:n}){const r=e,a={suffix:"append",prefix:"prepend"},o=$t(),i=W6(),l=wf(),s=VC(),{form:c,formItem:d}=$c(),f=Gn(),p=Ms(),v=De("input"),m=De("textarea"),y=Qn(),b=Qn(),C=H(!1),S=H(!1),w=H(!1),k=H(!1),$=Qn(r.inputStyle),O=x(()=>y.value||b.value),T=x(()=>{var ee;return(ee=c==null?void 0:c.statusIcon)!=null?ee:!1}),_=x(()=>(d==null?void 0:d.validateState)||""),I=x(()=>hE[_.value]),L=x(()=>k.value?TX:Kq),j=x(()=>[i.style,r.inputStyle]),F=x(()=>[r.inputStyle,$.value,{resize:r.resize}]),N=x(()=>ul(r.modelValue)?"":String(r.modelValue)),D=x(()=>r.clearable&&!p.value&&!r.readonly&&!!N.value&&(C.value||S.value)),z=x(()=>r.showPassword&&!p.value&&!r.readonly&&(!!N.value||C.value)),B=x(()=>r.showWordLimit&&!!s.value.maxlength&&(r.type==="text"||r.type==="textarea")&&!p.value&&!r.readonly&&!r.showPassword),M=x(()=>Array.from(N.value).length),E=x(()=>!!B.value&&M.value>Number(s.value.maxlength)),K=x(()=>!!l.suffix||!!r.suffixIcon||D.value||r.showPassword||B.value||!!_.value&&T.value),W=()=>{const{type:ee,autosize:me}=r;if(!(!Bt||ee!=="textarea"))if(me){const He=zt(me)?me.minRows:void 0,lt=zt(me)?me.maxRows:void 0;$.value=Te({},u4(b.value,He,lt))}else $.value={minHeight:u4(b.value).minHeight}},Y=()=>{const ee=O.value;!ee||ee.value===N.value||(ee.value=N.value)},q=ee=>{const{el:me}=o.vnode;if(!me)return;const lt=Array.from(me.querySelectorAll(`.${v.e(ee)}`)).find(he=>he.parentNode===me);if(!lt)return;const Ye=a[ee];l[Ye]?lt.style.transform=`translateX(${ee==="suffix"?"-":""}${me.querySelector(`.${v.be("group",Ye)}`).offsetWidth}px)`:lt.removeAttribute("style")},J=()=>{q("prefix"),q("suffix")},ne=async ee=>{const{value:me}=ee.target;w.value||me!==N.value&&(n(Pt,me),n("input",me),await Ne(),Y())},oe=ee=>{n("change",ee.target.value)},Q=ee=>{n("compositionstart",ee),w.value=!0},ae=ee=>{var me;n("compositionupdate",ee);const He=(me=ee.target)==null?void 0:me.value,lt=He[He.length-1]||"";w.value=!zm(lt)},de=ee=>{n("compositionend",ee),w.value&&(w.value=!1,ne(ee))},be=()=>{k.value=!k.value,Ee()},Ee=async()=>{var ee;await Ne(),(ee=O.value)==null||ee.focus()},Pe=()=>{var ee;return(ee=O.value)==null?void 0:ee.blur()},Be=ee=>{C.value=!0,n("focus",ee)},te=ee=>{var me;C.value=!1,n("blur",ee),r.validateEvent&&((me=d==null?void 0:d.validate)==null||me.call(d,"blur").catch(He=>void 0))},ie=ee=>{S.value=!1,n("mouseleave",ee)},ge=ee=>{S.value=!0,n("mouseenter",ee)},ke=ee=>{n("keydown",ee)},xe=()=>{var ee;(ee=O.value)==null||ee.select()},Ie=()=>{n(Pt,""),n("change",""),n("clear"),n("input","")};ce(()=>r.modelValue,()=>{var ee;Ne(()=>W()),r.validateEvent&&((ee=d==null?void 0:d.validate)==null||ee.call(d,"change").catch(me=>void 0))}),ce(N,()=>Y()),ce(()=>r.type,async()=>{await Ne(),Y(),W(),J()});const ye=H(),pe=H(),ue=H({}),Ce=(ee,me)=>{if(ee.value){const He=ee.value.offsetWidth;return He>0?He+16:me}return me},je=()=>{ue.value=Te({paddingRight:`${Ce(ye,0)}px`,paddingLeft:`${Ce(pe,11)}px`},r.inputStyle)};return ce(D,()=>{Ne(()=>{je()})}),et(async()=>{je(),Y(),J(),await Ne(),W()}),ur(async()=>{await Ne(),J()}),t({input:y,textarea:b,ref:O,textareaStyle:F,autosize:yn(r,"autosize"),focus:Ee,blur:Pe,select:xe,clear:Ie,resizeTextarea:W}),(ee,me)=>at((R(),X("div",{class:U([ee.type==="textarea"?A(m).b():A(v).b(),A(v).m(A(f)),A(v).is("disabled",A(p)),A(v).is("exceed",A(E)),{[A(v).b("group")]:ee.$slots.prepend||ee.$slots.append,[A(v).bm("group","append")]:ee.$slots.append,[A(v).bm("group","prepend")]:ee.$slots.prepend,[A(v).m("prefix")]:ee.$slots.prefix||ee.prefixIcon,[A(v).m("suffix")]:ee.$slots.suffix||ee.suffixIcon||ee.clearable||ee.showPassword,[A(v).m("suffix--password-clear")]:A(D)&&A(z)},ee.$attrs.class]),style:Xe(A(j)),onMouseenter:ge,onMouseleave:ie},[se(" input "),ee.type!=="textarea"?(R(),X(Fe,{key:0},[se(" prepend slot "),ee.$slots.prepend?(R(),X("div",{key:0,class:U(A(v).be("group","prepend"))},[Oe(ee.$slots,"prepend")],2)):se("v-if",!0),Z("input",hn({ref_key:"input",ref:y,class:A(v).e("inner")},A(s),{type:ee.showPassword?k.value?"text":"password":ee.type,disabled:A(p),readonly:ee.readonly,autocomplete:ee.autocomplete,tabindex:ee.tabindex,"aria-label":ee.label,placeholder:ee.placeholder,style:ue.value,onCompositionstart:Q,onCompositionupdate:ae,onCompositionend:de,onInput:ne,onFocus:Be,onBlur:te,onChange:oe,onKeydown:ke}),null,16,dJ),se(" prefix slot "),ee.$slots.prefix||ee.prefixIcon?(R(),X("span",{key:1,class:U(A(v).e("prefix"))},[Z("span",{ref_key:"innerPrefixRef",ref:pe,class:U(A(v).e("prefix-inner"))},[Oe(ee.$slots,"prefix"),ee.prefixIcon?(R(),fe(A(ft),{key:0,class:U(A(v).e("icon"))},{default:re(()=>[(R(),fe(Kt(ee.prefixIcon)))]),_:1},8,["class"])):se("v-if",!0)],2)],2)):se("v-if",!0),se(" suffix slot "),A(K)?(R(),X("span",{key:2,class:U(A(v).e("suffix"))},[Z("span",{ref_key:"innerSuffixRef",ref:ye,class:U(A(v).e("suffix-inner"))},[!A(D)||!A(z)||!A(B)?(R(),X(Fe,{key:0},[Oe(ee.$slots,"suffix"),ee.suffixIcon?(R(),fe(A(ft),{key:0,class:U(A(v).e("icon"))},{default:re(()=>[(R(),fe(Kt(ee.suffixIcon)))]),_:1},8,["class"])):se("v-if",!0)],64)):se("v-if",!0),A(D)?(R(),fe(A(ft),{key:1,class:U([A(v).e("icon"),A(v).e("clear")]),onMousedown:me[0]||(me[0]=dt(()=>{},["prevent"])),onClick:Ie},{default:re(()=>[g(A(gi))]),_:1},8,["class"])):se("v-if",!0),A(z)?(R(),fe(A(ft),{key:2,class:U([A(v).e("icon"),A(v).e("clear")]),onClick:be},{default:re(()=>[(R(),fe(Kt(A(L))))]),_:1},8,["class"])):se("v-if",!0),A(B)?(R(),X("span",{key:3,class:U(A(v).e("count"))},[Z("span",{class:U(A(v).e("count-inner"))},Me(A(M))+" / "+Me(A(s).maxlength),3)],2)):se("v-if",!0)],2),A(_)&&A(I)&&A(T)?(R(),fe(A(ft),{key:0,class:U([A(v).e("icon"),A(v).e("validateIcon"),A(v).is("loading",A(_)==="validating")])},{default:re(()=>[(R(),fe(Kt(A(I))))]),_:1},8,["class"])):se("v-if",!0)],2)):se("v-if",!0),se(" append slot "),ee.$slots.append?(R(),X("div",{key:3,class:U(A(v).be("group","append"))},[Oe(ee.$slots,"append")],2)):se("v-if",!0)],64)):(R(),X(Fe,{key:1},[se(" textarea "),Z("textarea",hn({ref_key:"textarea",ref:b,class:A(m).e("inner")},A(s),{tabindex:ee.tabindex,disabled:A(p),readonly:ee.readonly,autocomplete:ee.autocomplete,style:A(F),"aria-label":ee.label,placeholder:ee.placeholder,onCompositionstart:Q,onCompositionupdate:ae,onCompositionend:de,onInput:ne,onFocus:Be,onBlur:te,onChange:oe,onKeydown:ke}),null,16,fJ),A(B)?(R(),X("span",{key:0,class:U(A(v).e("count"))},Me(A(M))+" / "+Me(A(s).maxlength),3)):se("v-if",!0)],64))],38)),[[_t,ee.type!=="hidden"]])}}));var vJ=Ae(pJ,[["__file","/home/runner/work/element-plus/element-plus/packages/components/input/src/input.vue"]]);const Ra=xt(vJ),XE={vertical:{offset:"offsetHeight",scroll:"scrollTop",scrollSize:"scrollHeight",size:"height",key:"vertical",axis:"Y",client:"clientY",direction:"top"},horizontal:{offset:"offsetWidth",scroll:"scrollLeft",scrollSize:"scrollWidth",size:"width",key:"horizontal",axis:"X",client:"clientX",direction:"left"}},mJ=({move:e,size:t,bar:n})=>({[n.size]:t,transform:`translate${n.axis}(${e}%)`}),gJ=Ze({vertical:Boolean,size:String,move:Number,ratio:{type:Number,required:!0},always:Boolean}),c4="Thumb",yJ=G({name:c4,props:gJ,setup(e){const t=ve(_E),n=De("scrollbar");t||qn(c4,"can not inject scrollbar context");const r=H(),a=H(),o=H({}),i=H(!1);let l=!1,s=!1,c=Bt?document.onselectstart:null;const d=x(()=>XE[e.vertical?"vertical":"horizontal"]),f=x(()=>mJ({size:e.size,move:e.move,bar:d.value})),p=x(()=>r.value[d.value.offset]**2/t.wrapElement[d.value.scrollSize]/e.ratio/a.value[d.value.offset]),v=$=>{var O;if($.stopPropagation(),$.ctrlKey||[1,2].includes($.button))return;(O=window.getSelection())==null||O.removeAllRanges(),y($);const T=$.currentTarget;!T||(o.value[d.value.axis]=T[d.value.offset]-($[d.value.client]-T.getBoundingClientRect()[d.value.direction]))},m=$=>{if(!a.value||!r.value||!t.wrapElement)return;const O=Math.abs($.target.getBoundingClientRect()[d.value.direction]-$[d.value.client]),T=a.value[d.value.offset]/2,_=(O-T)*100*p.value/r.value[d.value.offset];t.wrapElement[d.value.scroll]=_*t.wrapElement[d.value.scrollSize]/100},y=$=>{$.stopImmediatePropagation(),l=!0,document.addEventListener("mousemove",b),document.addEventListener("mouseup",C),c=document.onselectstart,document.onselectstart=()=>!1},b=$=>{if(!r.value||!a.value||l===!1)return;const O=o.value[d.value.axis];if(!O)return;const T=(r.value.getBoundingClientRect()[d.value.direction]-$[d.value.client])*-1,_=a.value[d.value.offset]-O,I=(T-_)*100*p.value/r.value[d.value.offset];t.wrapElement[d.value.scroll]=I*t.wrapElement[d.value.scrollSize]/100},C=()=>{l=!1,o.value[d.value.axis]=0,document.removeEventListener("mousemove",b),document.removeEventListener("mouseup",C),k(),s&&(i.value=!1)},S=()=>{s=!1,i.value=!!e.size},w=()=>{s=!0,i.value=l};Lt(()=>{k(),document.removeEventListener("mouseup",C)});const k=()=>{document.onselectstart!==c&&(document.onselectstart=c)};return Hn(yn(t,"scrollbarElement"),"mousemove",S),Hn(yn(t,"scrollbarElement"),"mouseleave",w),{ns:n,instance:r,thumb:a,bar:d,thumbStyle:f,visible:i,clickTrackHandler:m,clickThumbHandler:v}}});function bJ(e,t,n,r,a,o){return R(),fe(Vn,{name:e.ns.b("fade")},{default:re(()=>[at(Z("div",{ref:"instance",class:U([e.ns.e("bar"),e.ns.is(e.bar.key)]),onMousedown:t[1]||(t[1]=(...i)=>e.clickTrackHandler&&e.clickTrackHandler(...i))},[Z("div",{ref:"thumb",class:U(e.ns.e("thumb")),style:Xe(e.thumbStyle),onMousedown:t[0]||(t[0]=(...i)=>e.clickThumbHandler&&e.clickThumbHandler(...i))},null,38)],34),[[_t,e.always||e.visible]])]),_:1},8,["name"])}var CJ=Ae(yJ,[["render",bJ],["__file","/home/runner/work/element-plus/element-plus/packages/components/scrollbar/src/thumb.vue"]]);const wJ=Ze({always:{type:Boolean,default:!0},width:{type:String,default:""},height:{type:String,default:""},ratioX:{type:Number,default:1},ratioY:{type:Number,default:1}}),SJ=G({components:{Thumb:CJ},props:wJ,setup(e){const t=H(0),n=H(0),r=4;return{handleScroll:o=>{if(o){const i=o.offsetHeight-r,l=o.offsetWidth-r;n.value=o.scrollTop*100/i*e.ratioY,t.value=o.scrollLeft*100/l*e.ratioX}},moveX:t,moveY:n}}});function kJ(e,t,n,r,a,o){const i=we("thumb");return R(),X(Fe,null,[g(i,{move:e.moveX,ratio:e.ratioX,size:e.width,always:e.always},null,8,["move","ratio","size","always"]),g(i,{move:e.moveY,ratio:e.ratioY,size:e.height,vertical:"",always:e.always},null,8,["move","ratio","size","always"])],64)}var $J=Ae(SJ,[["render",kJ],["__file","/home/runner/work/element-plus/element-plus/packages/components/scrollbar/src/bar.vue"]]);const OJ=Ze({height:{type:[String,Number],default:""},maxHeight:{type:[String,Number],default:""},native:{type:Boolean,default:!1},wrapStyle:{type:Le([String,Object,Array]),default:""},wrapClass:{type:[String,Array],default:""},viewClass:{type:[String,Array],default:""},viewStyle:{type:[String,Array,Object],default:""},noresize:Boolean,tag:{type:String,default:"div"},always:{type:Boolean,default:!1},minSize:{type:Number,default:20}}),PJ={scroll:({scrollTop:e,scrollLeft:t})=>Yt(e)&&Yt(t)},TJ=G({name:"ElScrollbar",components:{Bar:$J},props:OJ,emits:PJ,setup(e,{emit:t}){const n=De("scrollbar");let r,a;const o=H(),i=H(),l=H(),s=H("0"),c=H("0"),d=H(),f=H(0),p=H(0),v=H(1),m=H(1),y=4,b=x(()=>{const O={};return e.height&&(O.height=oo(e.height)),e.maxHeight&&(O.maxHeight=oo(e.maxHeight)),[e.wrapStyle,O]}),C=()=>{var O;i.value&&((O=d.value)==null||O.handleScroll(i.value),t("scroll",{scrollTop:i.value.scrollTop,scrollLeft:i.value.scrollLeft}))};function S(O,T){zt(O)?i.value.scrollTo(O):Yt(O)&&Yt(T)&&i.value.scrollTo(O,T)}const w=O=>{!Yt(O)||(i.value.scrollTop=O)},k=O=>{!Yt(O)||(i.value.scrollLeft=O)},$=()=>{if(!i.value)return;const O=i.value.offsetHeight-y,T=i.value.offsetWidth-y,_=O**2/i.value.scrollHeight,I=T**2/i.value.scrollWidth,L=Math.max(_,e.minSize),j=Math.max(I,e.minSize);v.value=_/(O-_)/(L/(O-L)),m.value=I/(T-I)/(j/(T-j)),c.value=L+y<O?`${L}px`:"",s.value=j+y<T?`${j}px`:""};return ce(()=>e.noresize,O=>{O?(r==null||r(),a==null||a()):({stop:r}=Cc(l,$),a=Hn("resize",$))},{immediate:!0}),ce(()=>[e.maxHeight,e.height],()=>{e.native||Ne(()=>{var O;$(),i.value&&((O=d.value)==null||O.handleScroll(i.value))})}),ot(_E,bt({scrollbarElement:o,wrapElement:i})),et(()=>{e.native||Ne(()=>$())}),ur(()=>$()),{ns:n,scrollbar$:o,wrap$:i,resize$:l,barRef:d,moveX:f,moveY:p,ratioX:m,ratioY:v,sizeWidth:s,sizeHeight:c,style:b,update:$,handleScroll:C,scrollTo:S,setScrollTop:w,setScrollLeft:k}}});function xJ(e,t,n,r,a,o){const i=we("bar");return R(),X("div",{ref:"scrollbar$",class:U(e.ns.b())},[Z("div",{ref:"wrap$",class:U([e.wrapClass,e.ns.e("wrap"),{[e.ns.em("wrap","hidden-default")]:!e.native}]),style:Xe(e.style),onScroll:t[0]||(t[0]=(...l)=>e.handleScroll&&e.handleScroll(...l))},[(R(),fe(Kt(e.tag),{ref:"resize$",class:U([e.ns.e("view"),e.viewClass]),style:Xe(e.viewStyle)},{default:re(()=>[Oe(e.$slots,"default")]),_:3},8,["class","style"]))],38),e.native?se("v-if",!0):(R(),fe(i,{key:0,ref:"barRef",height:e.sizeHeight,width:e.sizeWidth,always:e.always,"ratio-x":e.ratioX,"ratio-y":e.ratioY},null,8,["height","width","always","ratio-x","ratio-y"]))],2)}var _J=Ae(TJ,[["render",xJ],["__file","/home/runner/work/element-plus/element-plus/packages/components/scrollbar/src/scrollbar.vue"]]);const xi=xt(_J),EJ={name:"ElPopperRoot",inheritAttrs:!1},MJ=G(Ke(Te({},EJ),{setup(e,{expose:t}){const n=H(),r=H(),a=H(),o=H(),i={triggerRef:n,popperInstanceRef:r,contentRef:a,referenceRef:o};return t(i),ot(zC,i),(l,s)=>Oe(l.$slots,"default")}}));var IJ=Ae(MJ,[["__file","/home/runner/work/element-plus/element-plus/packages/components/popper/src/popper.vue"]]);const ZE=Ze({arrowOffset:{type:Number,default:5}}),NJ={name:"ElPopperArrow",inheritAttrs:!1},AJ=G(Ke(Te({},NJ),{props:ZE,setup(e,{expose:t}){const n=e,r=De("popper"),{arrowOffset:a,arrowRef:o}=ve(ME,void 0);return ce(()=>n.arrowOffset,i=>{a.value=i}),Lt(()=>{o.value=void 0}),t({arrowRef:o}),(i,l)=>(R(),X("span",{ref_key:"arrowRef",ref:o,class:U(A(r).e("arrow")),"data-popper-arrow":""},null,2))}}));var DJ=Ae(AJ,[["__file","/home/runner/work/element-plus/element-plus/packages/components/popper/src/arrow.vue"]]);const RJ="ElOnlyChild",LJ=G({name:RJ,setup(e,{slots:t,attrs:n}){var r;const a=ve(zE),o=SZ((r=a==null?void 0:a.setForwardRef)!=null?r:Qt);return()=>{var i;const l=(i=t.default)==null?void 0:i.call(t,n);if(!l||l.length>1)return null;const s=JE(l);return s?at(hr(s,n),[[o]]):null}}});function JE(e){if(!e)return null;const t=e;for(const n of t){if(zt(n))switch(n.type){case Ir:continue;case Fo:return iy(n);case"svg":return iy(n);case Fe:return JE(n.children);default:return n}return iy(n)}return null}function iy(e){return g("span",{class:"el-only-child__content"},[e])}const QE=Ze({virtualRef:{type:Le(Object)},virtualTriggering:Boolean,onMouseenter:Function,onMouseleave:Function,onClick:Function,onKeydown:Function,onFocus:Function,onBlur:Function,onContextmenu:Function,id:String,open:Boolean}),FJ={name:"ElPopperTrigger",inheritAttrs:!1},BJ=G(Ke(Te({},FJ),{props:QE,setup(e,{expose:t}){const n=e,{triggerRef:r}=ve(zC,void 0);return wZ(r),et(()=>{ce(()=>n.virtualRef,a=>{a&&(r.value=eo(a))},{immediate:!0}),ce(()=>r.value,(a,o)=>{ys(a)&&["onMouseenter","onMouseleave","onClick","onKeydown","onFocus","onBlur","onContextmenu"].forEach(i=>{var l;const s=n[i];s&&(a.addEventListener(i.slice(2).toLowerCase(),s),(l=o==null?void 0:o.removeEventListener)==null||l.call(o,i.slice(2).toLowerCase(),s))})},{immediate:!0})}),t({triggerRef:r}),(a,o)=>a.virtualTriggering?se("v-if",!0):(R(),fe(A(LJ),hn({key:0},a.$attrs,{"aria-describedby":a.open?a.id:void 0}),{default:re(()=>[Oe(a.$slots,"default")]),_:3},16,["aria-describedby"]))}}));var VJ=Ae(BJ,[["__file","/home/runner/work/element-plus/element-plus/packages/components/popper/src/trigger.vue"]]),Vr="top",Na="bottom",Aa="right",zr="left",YC="auto",_f=[Vr,Na,Aa,zr],Yu="start",Ud="end",zJ="clippingParents",eM="viewport",Bc="popper",HJ="reference",d4=_f.reduce(function(e,t){return e.concat([t+"-"+Yu,t+"-"+Ud])},[]),qC=[].concat(_f,[YC]).reduce(function(e,t){return e.concat([t,t+"-"+Yu,t+"-"+Ud])},[]),jJ="beforeRead",KJ="read",WJ="afterRead",UJ="beforeMain",YJ="main",qJ="afterMain",GJ="beforeWrite",XJ="write",ZJ="afterWrite",JJ=[jJ,KJ,WJ,UJ,YJ,qJ,GJ,XJ,ZJ];function Ao(e){return e?(e.nodeName||"").toLowerCase():null}function po(e){if(e==null)return window;if(e.toString()!=="[object Window]"){var t=e.ownerDocument;return t&&t.defaultView||window}return e}function qu(e){var t=po(e).Element;return e instanceof t||e instanceof Element}function Ta(e){var t=po(e).HTMLElement;return e instanceof t||e instanceof HTMLElement}function GC(e){if(typeof ShadowRoot=="undefined")return!1;var t=po(e).ShadowRoot;return e instanceof t||e instanceof ShadowRoot}function QJ(e){var t=e.state;Object.keys(t.elements).forEach(function(n){var r=t.styles[n]||{},a=t.attributes[n]||{},o=t.elements[n];!Ta(o)||!Ao(o)||(Object.assign(o.style,r),Object.keys(a).forEach(function(i){var l=a[i];l===!1?o.removeAttribute(i):o.setAttribute(i,l===!0?"":l)}))})}function eQ(e){var t=e.state,n={popper:{position:t.options.strategy,left:"0",top:"0",margin:"0"},arrow:{position:"absolute"},reference:{}};return Object.assign(t.elements.popper.style,n.popper),t.styles=n,t.elements.arrow&&Object.assign(t.elements.arrow.style,n.arrow),function(){Object.keys(t.elements).forEach(function(r){var a=t.elements[r],o=t.attributes[r]||{},i=Object.keys(t.styles.hasOwnProperty(r)?t.styles[r]:n[r]),l=i.reduce(function(s,c){return s[c]="",s},{});!Ta(a)||!Ao(a)||(Object.assign(a.style,l),Object.keys(o).forEach(function(s){a.removeAttribute(s)}))})}}var tQ={name:"applyStyles",enabled:!0,phase:"write",fn:QJ,effect:eQ,requires:["computeStyles"]};function Mo(e){return e.split("-")[0]}var os=Math.max,bv=Math.min,Gu=Math.round;function Xu(e,t){t===void 0&&(t=!1);var n=e.getBoundingClientRect(),r=1,a=1;if(Ta(e)&&t){var o=e.offsetHeight,i=e.offsetWidth;i>0&&(r=Gu(n.width)/i||1),o>0&&(a=Gu(n.height)/o||1)}return{width:n.width/r,height:n.height/a,top:n.top/a,right:n.right/r,bottom:n.bottom/a,left:n.left/r,x:n.left/r,y:n.top/a}}function XC(e){var t=Xu(e),n=e.offsetWidth,r=e.offsetHeight;return Math.abs(t.width-n)<=1&&(n=t.width),Math.abs(t.height-r)<=1&&(r=t.height),{x:e.offsetLeft,y:e.offsetTop,width:n,height:r}}function tM(e,t){var n=t.getRootNode&&t.getRootNode();if(e.contains(t))return!0;if(n&&GC(n)){var r=t;do{if(r&&e.isSameNode(r))return!0;r=r.parentNode||r.host}while(r)}return!1}function bi(e){return po(e).getComputedStyle(e)}function nQ(e){return["table","td","th"].indexOf(Ao(e))>=0}function yl(e){return((qu(e)?e.ownerDocument:e.document)||window.document).documentElement}function Ym(e){return Ao(e)==="html"?e:e.assignedSlot||e.parentNode||(GC(e)?e.host:null)||yl(e)}function f4(e){return!Ta(e)||bi(e).position==="fixed"?null:e.offsetParent}function rQ(e){var t=navigator.userAgent.toLowerCase().indexOf("firefox")!==-1,n=navigator.userAgent.indexOf("Trident")!==-1;if(n&&Ta(e)){var r=bi(e);if(r.position==="fixed")return null}var a=Ym(e);for(GC(a)&&(a=a.host);Ta(a)&&["html","body"].indexOf(Ao(a))<0;){var o=bi(a);if(o.transform!=="none"||o.perspective!=="none"||o.contain==="paint"||["transform","perspective"].indexOf(o.willChange)!==-1||t&&o.willChange==="filter"||t&&o.filter&&o.filter!=="none")return a;a=a.parentNode}return null}function Ef(e){for(var t=po(e),n=f4(e);n&&nQ(n)&&bi(n).position==="static";)n=f4(n);return n&&(Ao(n)==="html"||Ao(n)==="body"&&bi(n).position==="static")?t:n||rQ(e)||t}function ZC(e){return["top","bottom"].indexOf(e)>=0?"x":"y"}function md(e,t,n){return os(e,bv(t,n))}function aQ(e,t,n){var r=md(e,t,n);return r>n?n:r}function nM(){return{top:0,right:0,bottom:0,left:0}}function rM(e){return Object.assign({},nM(),e)}function aM(e,t){return t.reduce(function(n,r){return n[r]=e,n},{})}var oQ=function(t,n){return t=typeof t=="function"?t(Object.assign({},n.rects,{placement:n.placement})):t,rM(typeof t!="number"?t:aM(t,_f))};function iQ(e){var t,n=e.state,r=e.name,a=e.options,o=n.elements.arrow,i=n.modifiersData.popperOffsets,l=Mo(n.placement),s=ZC(l),c=[zr,Aa].indexOf(l)>=0,d=c?"height":"width";if(!(!o||!i)){var f=oQ(a.padding,n),p=XC(o),v=s==="y"?Vr:zr,m=s==="y"?Na:Aa,y=n.rects.reference[d]+n.rects.reference[s]-i[s]-n.rects.popper[d],b=i[s]-n.rects.reference[s],C=Ef(o),S=C?s==="y"?C.clientHeight||0:C.clientWidth||0:0,w=y/2-b/2,k=f[v],$=S-p[d]-f[m],O=S/2-p[d]/2+w,T=md(k,O,$),_=s;n.modifiersData[r]=(t={},t[_]=T,t.centerOffset=T-O,t)}}function lQ(e){var t=e.state,n=e.options,r=n.element,a=r===void 0?"[data-popper-arrow]":r;a!=null&&(typeof a=="string"&&(a=t.elements.popper.querySelector(a),!a)||!tM(t.elements.popper,a)||(t.elements.arrow=a))}var sQ={name:"arrow",enabled:!0,phase:"main",fn:iQ,effect:lQ,requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function Zu(e){return e.split("-")[1]}var uQ={top:"auto",right:"auto",bottom:"auto",left:"auto"};function cQ(e){var t=e.x,n=e.y,r=window,a=r.devicePixelRatio||1;return{x:Gu(t*a)/a||0,y:Gu(n*a)/a||0}}function h4(e){var t,n=e.popper,r=e.popperRect,a=e.placement,o=e.variation,i=e.offsets,l=e.position,s=e.gpuAcceleration,c=e.adaptive,d=e.roundOffsets,f=e.isFixed,p=i.x,v=p===void 0?0:p,m=i.y,y=m===void 0?0:m,b=typeof d=="function"?d({x:v,y}):{x:v,y};v=b.x,y=b.y;var C=i.hasOwnProperty("x"),S=i.hasOwnProperty("y"),w=zr,k=Vr,$=window;if(c){var O=Ef(n),T="clientHeight",_="clientWidth";if(O===po(n)&&(O=yl(n),bi(O).position!=="static"&&l==="absolute"&&(T="scrollHeight",_="scrollWidth")),O=O,a===Vr||(a===zr||a===Aa)&&o===Ud){k=Na;var I=f&&O===$&&$.visualViewport?$.visualViewport.height:O[T];y-=I-r.height,y*=s?1:-1}if(a===zr||(a===Vr||a===Na)&&o===Ud){w=Aa;var L=f&&O===$&&$.visualViewport?$.visualViewport.width:O[_];v-=L-r.width,v*=s?1:-1}}var j=Object.assign({position:l},c&&uQ),F=d===!0?cQ({x:v,y}):{x:v,y};if(v=F.x,y=F.y,s){var N;return Object.assign({},j,(N={},N[k]=S?"0":"",N[w]=C?"0":"",N.transform=($.devicePixelRatio||1)<=1?"translate("+v+"px, "+y+"px)":"translate3d("+v+"px, "+y+"px, 0)",N))}return Object.assign({},j,(t={},t[k]=S?y+"px":"",t[w]=C?v+"px":"",t.transform="",t))}function dQ(e){var t=e.state,n=e.options,r=n.gpuAcceleration,a=r===void 0?!0:r,o=n.adaptive,i=o===void 0?!0:o,l=n.roundOffsets,s=l===void 0?!0:l,c={placement:Mo(t.placement),variation:Zu(t.placement),popper:t.elements.popper,popperRect:t.rects.popper,gpuAcceleration:a,isFixed:t.options.strategy==="fixed"};t.modifiersData.popperOffsets!=null&&(t.styles.popper=Object.assign({},t.styles.popper,h4(Object.assign({},c,{offsets:t.modifiersData.popperOffsets,position:t.options.strategy,adaptive:i,roundOffsets:s})))),t.modifiersData.arrow!=null&&(t.styles.arrow=Object.assign({},t.styles.arrow,h4(Object.assign({},c,{offsets:t.modifiersData.arrow,position:"absolute",adaptive:!1,roundOffsets:s})))),t.attributes.popper=Object.assign({},t.attributes.popper,{"data-popper-placement":t.placement})}var fQ={name:"computeStyles",enabled:!0,phase:"beforeWrite",fn:dQ,data:{}},ih={passive:!0};function hQ(e){var t=e.state,n=e.instance,r=e.options,a=r.scroll,o=a===void 0?!0:a,i=r.resize,l=i===void 0?!0:i,s=po(t.elements.popper),c=[].concat(t.scrollParents.reference,t.scrollParents.popper);return o&&c.forEach(function(d){d.addEventListener("scroll",n.update,ih)}),l&&s.addEventListener("resize",n.update,ih),function(){o&&c.forEach(function(d){d.removeEventListener("scroll",n.update,ih)}),l&&s.removeEventListener("resize",n.update,ih)}}var pQ={name:"eventListeners",enabled:!0,phase:"write",fn:function(){},effect:hQ,data:{}},vQ={left:"right",right:"left",bottom:"top",top:"bottom"};function dp(e){return e.replace(/left|right|bottom|top/g,function(t){return vQ[t]})}var mQ={start:"end",end:"start"};function p4(e){return e.replace(/start|end/g,function(t){return mQ[t]})}function JC(e){var t=po(e),n=t.pageXOffset,r=t.pageYOffset;return{scrollLeft:n,scrollTop:r}}function QC(e){return Xu(yl(e)).left+JC(e).scrollLeft}function gQ(e){var t=po(e),n=yl(e),r=t.visualViewport,a=n.clientWidth,o=n.clientHeight,i=0,l=0;return r&&(a=r.width,o=r.height,/^((?!chrome|android).)*safari/i.test(navigator.userAgent)||(i=r.offsetLeft,l=r.offsetTop)),{width:a,height:o,x:i+QC(e),y:l}}function yQ(e){var t,n=yl(e),r=JC(e),a=(t=e.ownerDocument)==null?void 0:t.body,o=os(n.scrollWidth,n.clientWidth,a?a.scrollWidth:0,a?a.clientWidth:0),i=os(n.scrollHeight,n.clientHeight,a?a.scrollHeight:0,a?a.clientHeight:0),l=-r.scrollLeft+QC(e),s=-r.scrollTop;return bi(a||n).direction==="rtl"&&(l+=os(n.clientWidth,a?a.clientWidth:0)-o),{width:o,height:i,x:l,y:s}}function ew(e){var t=bi(e),n=t.overflow,r=t.overflowX,a=t.overflowY;return/auto|scroll|overlay|hidden/.test(n+a+r)}function oM(e){return["html","body","#document"].indexOf(Ao(e))>=0?e.ownerDocument.body:Ta(e)&&ew(e)?e:oM(Ym(e))}function gd(e,t){var n;t===void 0&&(t=[]);var r=oM(e),a=r===((n=e.ownerDocument)==null?void 0:n.body),o=po(r),i=a?[o].concat(o.visualViewport||[],ew(r)?r:[]):r,l=t.concat(i);return a?l:l.concat(gd(Ym(i)))}function v0(e){return Object.assign({},e,{left:e.x,top:e.y,right:e.x+e.width,bottom:e.y+e.height})}function bQ(e){var t=Xu(e);return t.top=t.top+e.clientTop,t.left=t.left+e.clientLeft,t.bottom=t.top+e.clientHeight,t.right=t.left+e.clientWidth,t.width=e.clientWidth,t.height=e.clientHeight,t.x=t.left,t.y=t.top,t}function v4(e,t){return t===eM?v0(gQ(e)):qu(t)?bQ(t):v0(yQ(yl(e)))}function CQ(e){var t=gd(Ym(e)),n=["absolute","fixed"].indexOf(bi(e).position)>=0,r=n&&Ta(e)?Ef(e):e;return qu(r)?t.filter(function(a){return qu(a)&&tM(a,r)&&Ao(a)!=="body"}):[]}function wQ(e,t,n){var r=t==="clippingParents"?CQ(e):[].concat(t),a=[].concat(r,[n]),o=a[0],i=a.reduce(function(l,s){var c=v4(e,s);return l.top=os(c.top,l.top),l.right=bv(c.right,l.right),l.bottom=bv(c.bottom,l.bottom),l.left=os(c.left,l.left),l},v4(e,o));return i.width=i.right-i.left,i.height=i.bottom-i.top,i.x=i.left,i.y=i.top,i}function iM(e){var t=e.reference,n=e.element,r=e.placement,a=r?Mo(r):null,o=r?Zu(r):null,i=t.x+t.width/2-n.width/2,l=t.y+t.height/2-n.height/2,s;switch(a){case Vr:s={x:i,y:t.y-n.height};break;case Na:s={x:i,y:t.y+t.height};break;case Aa:s={x:t.x+t.width,y:l};break;case zr:s={x:t.x-n.width,y:l};break;default:s={x:t.x,y:t.y}}var c=a?ZC(a):null;if(c!=null){var d=c==="y"?"height":"width";switch(o){case Yu:s[c]=s[c]-(t[d]/2-n[d]/2);break;case Ud:s[c]=s[c]+(t[d]/2-n[d]/2);break}}return s}function Yd(e,t){t===void 0&&(t={});var n=t,r=n.placement,a=r===void 0?e.placement:r,o=n.boundary,i=o===void 0?zJ:o,l=n.rootBoundary,s=l===void 0?eM:l,c=n.elementContext,d=c===void 0?Bc:c,f=n.altBoundary,p=f===void 0?!1:f,v=n.padding,m=v===void 0?0:v,y=rM(typeof m!="number"?m:aM(m,_f)),b=d===Bc?HJ:Bc,C=e.rects.popper,S=e.elements[p?b:d],w=wQ(qu(S)?S:S.contextElement||yl(e.elements.popper),i,s),k=Xu(e.elements.reference),$=iM({reference:k,element:C,strategy:"absolute",placement:a}),O=v0(Object.assign({},C,$)),T=d===Bc?O:k,_={top:w.top-T.top+y.top,bottom:T.bottom-w.bottom+y.bottom,left:w.left-T.left+y.left,right:T.right-w.right+y.right},I=e.modifiersData.offset;if(d===Bc&&I){var L=I[a];Object.keys(_).forEach(function(j){var F=[Aa,Na].indexOf(j)>=0?1:-1,N=[Vr,Na].indexOf(j)>=0?"y":"x";_[j]+=L[N]*F})}return _}function SQ(e,t){t===void 0&&(t={});var n=t,r=n.placement,a=n.boundary,o=n.rootBoundary,i=n.padding,l=n.flipVariations,s=n.allowedAutoPlacements,c=s===void 0?qC:s,d=Zu(r),f=d?l?d4:d4.filter(function(m){return Zu(m)===d}):_f,p=f.filter(function(m){return c.indexOf(m)>=0});p.length===0&&(p=f);var v=p.reduce(function(m,y){return m[y]=Yd(e,{placement:y,boundary:a,rootBoundary:o,padding:i})[Mo(y)],m},{});return Object.keys(v).sort(function(m,y){return v[m]-v[y]})}function kQ(e){if(Mo(e)===YC)return[];var t=dp(e);return[p4(e),t,p4(t)]}function $Q(e){var t=e.state,n=e.options,r=e.name;if(!t.modifiersData[r]._skip){for(var a=n.mainAxis,o=a===void 0?!0:a,i=n.altAxis,l=i===void 0?!0:i,s=n.fallbackPlacements,c=n.padding,d=n.boundary,f=n.rootBoundary,p=n.altBoundary,v=n.flipVariations,m=v===void 0?!0:v,y=n.allowedAutoPlacements,b=t.options.placement,C=Mo(b),S=C===b,w=s||(S||!m?[dp(b)]:kQ(b)),k=[b].concat(w).reduce(function(ne,oe){return ne.concat(Mo(oe)===YC?SQ(t,{placement:oe,boundary:d,rootBoundary:f,padding:c,flipVariations:m,allowedAutoPlacements:y}):oe)},[]),$=t.rects.reference,O=t.rects.popper,T=new Map,_=!0,I=k[0],L=0;L<k.length;L++){var j=k[L],F=Mo(j),N=Zu(j)===Yu,D=[Vr,Na].indexOf(F)>=0,z=D?"width":"height",B=Yd(t,{placement:j,boundary:d,rootBoundary:f,altBoundary:p,padding:c}),M=D?N?Aa:zr:N?Na:Vr;$[z]>O[z]&&(M=dp(M));var E=dp(M),K=[];if(o&&K.push(B[F]<=0),l&&K.push(B[M]<=0,B[E]<=0),K.every(function(ne){return ne})){I=j,_=!1;break}T.set(j,K)}if(_)for(var W=m?3:1,Y=function(oe){var Q=k.find(function(ae){var de=T.get(ae);if(de)return de.slice(0,oe).every(function(be){return be})});if(Q)return I=Q,"break"},q=W;q>0;q--){var J=Y(q);if(J==="break")break}t.placement!==I&&(t.modifiersData[r]._skip=!0,t.placement=I,t.reset=!0)}}var OQ={name:"flip",enabled:!0,phase:"main",fn:$Q,requiresIfExists:["offset"],data:{_skip:!1}};function m4(e,t,n){return n===void 0&&(n={x:0,y:0}),{top:e.top-t.height-n.y,right:e.right-t.width+n.x,bottom:e.bottom-t.height+n.y,left:e.left-t.width-n.x}}function g4(e){return[Vr,Aa,Na,zr].some(function(t){return e[t]>=0})}function PQ(e){var t=e.state,n=e.name,r=t.rects.reference,a=t.rects.popper,o=t.modifiersData.preventOverflow,i=Yd(t,{elementContext:"reference"}),l=Yd(t,{altBoundary:!0}),s=m4(i,r),c=m4(l,a,o),d=g4(s),f=g4(c);t.modifiersData[n]={referenceClippingOffsets:s,popperEscapeOffsets:c,isReferenceHidden:d,hasPopperEscaped:f},t.attributes.popper=Object.assign({},t.attributes.popper,{"data-popper-reference-hidden":d,"data-popper-escaped":f})}var TQ={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:PQ};function xQ(e,t,n){var r=Mo(e),a=[zr,Vr].indexOf(r)>=0?-1:1,o=typeof n=="function"?n(Object.assign({},t,{placement:e})):n,i=o[0],l=o[1];return i=i||0,l=(l||0)*a,[zr,Aa].indexOf(r)>=0?{x:l,y:i}:{x:i,y:l}}function _Q(e){var t=e.state,n=e.options,r=e.name,a=n.offset,o=a===void 0?[0,0]:a,i=qC.reduce(function(d,f){return d[f]=xQ(f,t.rects,o),d},{}),l=i[t.placement],s=l.x,c=l.y;t.modifiersData.popperOffsets!=null&&(t.modifiersData.popperOffsets.x+=s,t.modifiersData.popperOffsets.y+=c),t.modifiersData[r]=i}var EQ={name:"offset",enabled:!0,phase:"main",requires:["popperOffsets"],fn:_Q};function MQ(e){var t=e.state,n=e.name;t.modifiersData[n]=iM({reference:t.rects.reference,element:t.rects.popper,strategy:"absolute",placement:t.placement})}var IQ={name:"popperOffsets",enabled:!0,phase:"read",fn:MQ,data:{}};function NQ(e){return e==="x"?"y":"x"}function AQ(e){var t=e.state,n=e.options,r=e.name,a=n.mainAxis,o=a===void 0?!0:a,i=n.altAxis,l=i===void 0?!1:i,s=n.boundary,c=n.rootBoundary,d=n.altBoundary,f=n.padding,p=n.tether,v=p===void 0?!0:p,m=n.tetherOffset,y=m===void 0?0:m,b=Yd(t,{boundary:s,rootBoundary:c,padding:f,altBoundary:d}),C=Mo(t.placement),S=Zu(t.placement),w=!S,k=ZC(C),$=NQ(k),O=t.modifiersData.popperOffsets,T=t.rects.reference,_=t.rects.popper,I=typeof y=="function"?y(Object.assign({},t.rects,{placement:t.placement})):y,L=typeof I=="number"?{mainAxis:I,altAxis:I}:Object.assign({mainAxis:0,altAxis:0},I),j=t.modifiersData.offset?t.modifiersData.offset[t.placement]:null,F={x:0,y:0};if(!!O){if(o){var N,D=k==="y"?Vr:zr,z=k==="y"?Na:Aa,B=k==="y"?"height":"width",M=O[k],E=M+b[D],K=M-b[z],W=v?-_[B]/2:0,Y=S===Yu?T[B]:_[B],q=S===Yu?-_[B]:-T[B],J=t.elements.arrow,ne=v&&J?XC(J):{width:0,height:0},oe=t.modifiersData["arrow#persistent"]?t.modifiersData["arrow#persistent"].padding:nM(),Q=oe[D],ae=oe[z],de=md(0,T[B],ne[B]),be=w?T[B]/2-W-de-Q-L.mainAxis:Y-de-Q-L.mainAxis,Ee=w?-T[B]/2+W+de+ae+L.mainAxis:q+de+ae+L.mainAxis,Pe=t.elements.arrow&&Ef(t.elements.arrow),Be=Pe?k==="y"?Pe.clientTop||0:Pe.clientLeft||0:0,te=(N=j==null?void 0:j[k])!=null?N:0,ie=M+be-te-Be,ge=M+Ee-te,ke=md(v?bv(E,ie):E,M,v?os(K,ge):K);O[k]=ke,F[k]=ke-M}if(l){var xe,Ie=k==="x"?Vr:zr,ye=k==="x"?Na:Aa,pe=O[$],ue=$==="y"?"height":"width",Ce=pe+b[Ie],je=pe-b[ye],ee=[Vr,zr].indexOf(C)!==-1,me=(xe=j==null?void 0:j[$])!=null?xe:0,He=ee?Ce:pe-T[ue]-_[ue]-me+L.altAxis,lt=ee?pe+T[ue]+_[ue]-me-L.altAxis:je,Ye=v&&ee?aQ(He,pe,lt):md(v?He:Ce,pe,v?lt:je);O[$]=Ye,F[$]=Ye-pe}t.modifiersData[r]=F}}var DQ={name:"preventOverflow",enabled:!0,phase:"main",fn:AQ,requiresIfExists:["offset"]};function RQ(e){return{scrollLeft:e.scrollLeft,scrollTop:e.scrollTop}}function LQ(e){return e===po(e)||!Ta(e)?JC(e):RQ(e)}function FQ(e){var t=e.getBoundingClientRect(),n=Gu(t.width)/e.offsetWidth||1,r=Gu(t.height)/e.offsetHeight||1;return n!==1||r!==1}function BQ(e,t,n){n===void 0&&(n=!1);var r=Ta(t),a=Ta(t)&&FQ(t),o=yl(t),i=Xu(e,a),l={scrollLeft:0,scrollTop:0},s={x:0,y:0};return(r||!r&&!n)&&((Ao(t)!=="body"||ew(o))&&(l=LQ(t)),Ta(t)?(s=Xu(t,!0),s.x+=t.clientLeft,s.y+=t.clientTop):o&&(s.x=QC(o))),{x:i.left+l.scrollLeft-s.x,y:i.top+l.scrollTop-s.y,width:i.width,height:i.height}}function VQ(e){var t=new Map,n=new Set,r=[];e.forEach(function(o){t.set(o.name,o)});function a(o){n.add(o.name);var i=[].concat(o.requires||[],o.requiresIfExists||[]);i.forEach(function(l){if(!n.has(l)){var s=t.get(l);s&&a(s)}}),r.push(o)}return e.forEach(function(o){n.has(o.name)||a(o)}),r}function zQ(e){var t=VQ(e);return JJ.reduce(function(n,r){return n.concat(t.filter(function(a){return a.phase===r}))},[])}function HQ(e){var t;return function(){return t||(t=new Promise(function(n){Promise.resolve().then(function(){t=void 0,n(e())})})),t}}function jQ(e){var t=e.reduce(function(n,r){var a=n[r.name];return n[r.name]=a?Object.assign({},a,r,{options:Object.assign({},a.options,r.options),data:Object.assign({},a.data,r.data)}):r,n},{});return Object.keys(t).map(function(n){return t[n]})}var y4={placement:"bottom",modifiers:[],strategy:"absolute"};function b4(){for(var e=arguments.length,t=new Array(e),n=0;n<e;n++)t[n]=arguments[n];return!t.some(function(r){return!(r&&typeof r.getBoundingClientRect=="function")})}function KQ(e){e===void 0&&(e={});var t=e,n=t.defaultModifiers,r=n===void 0?[]:n,a=t.defaultOptions,o=a===void 0?y4:a;return function(l,s,c){c===void 0&&(c=o);var d={placement:"bottom",orderedModifiers:[],options:Object.assign({},y4,o),modifiersData:{},elements:{reference:l,popper:s},attributes:{},styles:{}},f=[],p=!1,v={state:d,setOptions:function(C){var S=typeof C=="function"?C(d.options):C;y(),d.options=Object.assign({},o,d.options,S),d.scrollParents={reference:qu(l)?gd(l):l.contextElement?gd(l.contextElement):[],popper:gd(s)};var w=zQ(jQ([].concat(r,d.options.modifiers)));return d.orderedModifiers=w.filter(function(k){return k.enabled}),m(),v.update()},forceUpdate:function(){if(!p){var C=d.elements,S=C.reference,w=C.popper;if(!!b4(S,w)){d.rects={reference:BQ(S,Ef(w),d.options.strategy==="fixed"),popper:XC(w)},d.reset=!1,d.placement=d.options.placement,d.orderedModifiers.forEach(function(L){return d.modifiersData[L.name]=Object.assign({},L.data)});for(var k=0;k<d.orderedModifiers.length;k++){if(d.reset===!0){d.reset=!1,k=-1;continue}var $=d.orderedModifiers[k],O=$.fn,T=$.options,_=T===void 0?{}:T,I=$.name;typeof O=="function"&&(d=O({state:d,options:_,name:I,instance:v})||d)}}}},update:HQ(function(){return new Promise(function(b){v.forceUpdate(),b(d)})}),destroy:function(){y(),p=!0}};if(!b4(l,s))return v;v.setOptions(c).then(function(b){!p&&c.onFirstUpdate&&c.onFirstUpdate(b)});function m(){d.orderedModifiers.forEach(function(b){var C=b.name,S=b.options,w=S===void 0?{}:S,k=b.effect;if(typeof k=="function"){var $=k({state:d,name:C,instance:v,options:w}),O=function(){};f.push($||O)}})}function y(){f.forEach(function(b){return b()}),f=[]}return v}}var WQ=[pQ,IQ,fQ,tQ,EQ,OQ,DQ,sQ,TQ],lM=KQ({defaultModifiers:WQ});const UQ=["fixed","absolute"],YQ=Ze({boundariesPadding:{type:Number,default:0},fallbackPlacements:{type:Le(Array),default:()=>[]},gpuAcceleration:{type:Boolean,default:!0},offset:{type:Number,default:12},placement:{type:String,values:qC,default:"bottom"},popperOptions:{type:Le(Object),default:()=>({})},strategy:{type:String,values:UQ,default:"absolute"}}),sM=Ze(Ke(Te({},YQ),{style:{type:Le([String,Array,Object])},className:{type:Le([String,Array,Object])},effect:{type:String,default:"dark"},visible:Boolean,enterable:{type:Boolean,default:!0},pure:Boolean,popperClass:{type:Le([String,Array,Object])},popperStyle:{type:Le([String,Array,Object])},referenceEl:{type:Le(Object)},stopPopperMouseEvent:{type:Boolean,default:!0},zIndex:Number})),C4=(e,t)=>{const{placement:n,strategy:r,popperOptions:a}=e,o=Ke(Te({placement:n,strategy:r},a),{modifiers:GQ(e)});return XQ(o,t),ZQ(o,a==null?void 0:a.modifiers),o},qQ=e=>{if(!!Bt)return eo(e)};function GQ(e){const{offset:t,gpuAcceleration:n,fallbackPlacements:r}=e;return[{name:"offset",options:{offset:[0,t!=null?t:12]}},{name:"preventOverflow",options:{padding:{top:2,bottom:2,left:5,right:5}}},{name:"flip",options:{padding:5,fallbackPlacements:r!=null?r:[]}},{name:"computeStyles",options:{gpuAcceleration:n,adaptive:n}}]}function XQ(e,{arrowEl:t,arrowOffset:n}){e.modifiers.push({name:"arrow",options:{element:t,padding:n!=null?n:5}})}function ZQ(e,t){t&&(e.modifiers=[...e.modifiers,...t!=null?t:[]])}const JQ={name:"ElPopperContent"},QQ=G(Ke(Te({},JQ),{props:sM,emits:["mouseenter","mouseleave"],setup(e,{expose:t}){const n=e,{popperInstanceRef:r,contentRef:a,triggerRef:o}=ve(zC,void 0),{nextZIndex:i}=Pi(),l=De("popper"),s=H(),c=H(),d=H();ot(ME,{arrowRef:c,arrowOffset:d});const f=H(n.zIndex||i()),p=x(()=>qQ(n.referenceEl)||A(o)),v=x(()=>[{zIndex:A(f)},n.popperStyle]),m=x(()=>[l.b(),l.is("pure",n.pure),l.is(n.effect),n.popperClass]),y=({referenceEl:S,popperContentEl:w,arrowEl:k})=>{const $=C4(n,{arrowEl:k,arrowOffset:A(d)});return lM(S,w,$)},b=()=>{var S;(S=A(r))==null||S.update(),f.value=n.zIndex||f.value||i()},C=()=>{var S,w;const k={name:"eventListeners",enabled:n.visible};(w=(S=A(r))==null?void 0:S.setOptions)==null||w.call(S,$=>Ke(Te({},$),{modifiers:[...$.modifiers||[],k]})),b()};return et(()=>{let S;ce(p,w=>{var k;S==null||S();const $=A(r);if((k=$==null?void 0:$.destroy)==null||k.call($),w){const O=A(s);a.value=O,r.value=y({referenceEl:w,popperContentEl:O,arrowEl:A(c)}),S=ce(()=>w.getBoundingClientRect(),()=>b(),{immediate:!0})}else r.value=void 0},{immediate:!0}),ce(()=>n.visible,C,{immediate:!0}),ce(()=>C4(n,{arrowEl:A(c),arrowOffset:A(d)}),w=>{var k;return(k=r.value)==null?void 0:k.setOptions(w)})}),t({popperContentRef:s,popperInstanceRef:r,updatePopper:b,contentStyle:v}),(S,w)=>(R(),X("div",{ref_key:"popperContentRef",ref:s,style:Xe(A(v)),class:U(A(m)),role:"tooltip",onMouseenter:w[0]||(w[0]=k=>S.$emit("mouseenter",k)),onMouseleave:w[1]||(w[1]=k=>S.$emit("mouseleave",k))},[Oe(S.$slots,"default")],38))}}));var eee=Ae(QQ,[["__file","/home/runner/work/element-plus/element-plus/packages/components/popper/src/content.vue"]]);const tee={LIGHT:"light",DARK:"dark"};Ze({autoClose:{type:Number,default:0},cutoff:{type:Boolean,default:!1},disabled:{type:Boolean,default:!1}});function Oc(e,t){const n=$t(),r=x(()=>yr(n.props[t])?n.props[t]:n.props.teleported);return Tf({scope:e,from:t,replacement:"teleported",version:"2.1.0",ref:"https://element-plus.org/en-US/component/tooltip.html#attributes"},x(()=>yr(n.props[t]))),{compatTeleported:r}}const uM=xt(IJ),nee=G({name:"ElVisuallyHidden",props:{style:{type:[String,Object,Array]}},setup(e){return{computedStyle:x(()=>[e.style,{position:"absolute",border:0,width:1,height:1,padding:0,margin:-1,overflow:"hidden",clip:"rect(0, 0, 0, 0)",whiteSpace:"nowrap",wordWrap:"normal"}])}}});function ree(e,t,n,r,a,o){return R(),X("span",hn(e.$attrs,{style:e.computedStyle}),[Oe(e.$slots,"default")],16)}var cM=Ae(nee,[["render",ree],["__file","/home/runner/work/element-plus/element-plus/packages/components/visual-hidden/src/visual-hidden.vue"]]);const mr=Ze(Ke(Te(Te({},bZ),sM),{appendTo:{type:Le([String,Object]),default:VE},content:{type:String,default:""},rawContent:{type:Boolean,default:!1},persistent:Boolean,ariaLabel:String,visible:{type:Le(Boolean),default:null},transition:{type:String,default:"el-fade-in-linear"},teleported:{type:Boolean,default:!0},disabled:{type:Boolean}})),qd=Ze(Ke(Te({},QE),{disabled:Boolean,trigger:{type:Le([String,Array]),default:"hover"}})),aee=Ze({openDelay:{type:Number},visibleArrow:{type:Boolean,default:void 0},hideAfter:{type:Number,default:200},showArrow:{type:Boolean,default:!0}}),qm=Symbol("elTooltip"),oee=G({name:"ElTooltipContent",components:{ElPopperContent:eee,ElVisuallyHidden:cM},inheritAttrs:!1,props:mr,setup(e){const t=H(null),n=H(!1),r=H(!1),a=H(!1),o=H(!1),{controlled:i,id:l,open:s,trigger:c,onClose:d,onOpen:f,onShow:p,onHide:v,onBeforeShow:m,onBeforeHide:y}=ve(qm,void 0),b=x(()=>e.persistent);Lt(()=>{o.value=!0});const C=x(()=>A(b)?!0:A(s)),S=x(()=>e.disabled?!1:A(s)),w=x(()=>{var N;return(N=e.style)!=null?N:{}}),k=x(()=>!A(s));mZ(d);const $=()=>{v()},O=()=>{if(A(i))return!0},T=Tn(O,()=>{e.enterable&&A(c)==="hover"&&f()}),_=Tn(O,()=>{A(c)==="hover"&&d()}),I=()=>{var N,D;(D=(N=t.value)==null?void 0:N.updatePopper)==null||D.call(N),m==null||m()},L=()=>{y==null||y()},j=()=>{p()};let F;return ce(()=>A(s),N=>{N?F=vv(x(()=>{var D;return(D=t.value)==null?void 0:D.popperContentRef}),()=>{if(A(i))return;A(c)!=="hover"&&d()}):F==null||F()},{flush:"post"}),{ariaHidden:k,entering:r,leaving:a,id:l,intermediateOpen:n,contentStyle:w,contentRef:t,destroyed:o,shouldRender:C,shouldShow:S,open:s,onAfterShow:j,onBeforeEnter:I,onBeforeLeave:L,onContentEnter:T,onContentLeave:_,onTransitionLeave:$}}});function iee(e,t,n,r,a,o){const i=we("el-visually-hidden"),l=we("el-popper-content");return R(),fe(Ps,{disabled:!e.teleported,to:e.appendTo},[g(Vn,{name:e.transition,onAfterLeave:e.onTransitionLeave,onBeforeEnter:e.onBeforeEnter,onAfterEnter:e.onAfterShow,onBeforeLeave:e.onBeforeLeave},{default:re(()=>[e.shouldRender?at((R(),fe(l,hn({key:0,ref:"contentRef"},e.$attrs,{"aria-hidden":e.ariaHidden,"boundaries-padding":e.boundariesPadding,"fallback-placements":e.fallbackPlacements,"gpu-acceleration":e.gpuAcceleration,offset:e.offset,placement:e.placement,"popper-options":e.popperOptions,strategy:e.strategy,effect:e.effect,enterable:e.enterable,pure:e.pure,"popper-class":e.popperClass,"popper-style":[e.popperStyle,e.contentStyle],"reference-el":e.referenceEl,visible:e.shouldShow,"z-index":e.zIndex,onMouseenter:e.onContentEnter,onMouseleave:e.onContentLeave}),{default:re(()=>[se(" Workaround bug #6378 "),e.destroyed?se("v-if",!0):(R(),X(Fe,{key:0},[Oe(e.$slots,"default"),g(i,{id:e.id,role:"tooltip"},{default:re(()=>[yt(Me(e.ariaLabel),1)]),_:1},8,["id"])],64))]),_:3},16,["aria-hidden","boundaries-padding","fallback-placements","gpu-acceleration","offset","placement","popper-options","strategy","effect","enterable","pure","popper-class","popper-style","reference-el","visible","z-index","onMouseenter","onMouseleave"])),[[_t,e.shouldShow]]):se("v-if",!0)]),_:3},8,["name","onAfterLeave","onBeforeEnter","onAfterEnter","onBeforeLeave"])],8,["disabled","to"])}var lee=Ae(oee,[["render",iee],["__file","/home/runner/work/element-plus/element-plus/packages/components/tooltip/src/content.vue"]]);const see=(e,t)=>pt(e)?e.includes(t):e===t,Ls=(e,t,n)=>r=>{see(A(e),t)&&n(r)},uee=G({name:"ElTooltipTrigger",components:{ElPopperTrigger:VJ},props:qd,setup(e){const t=De("tooltip"),{controlled:n,id:r,open:a,onOpen:o,onClose:i,onToggle:l}=ve(qm,void 0),s=H(null),c=()=>{if(A(n)||e.disabled)return!0},d=yn(e,"trigger"),f=Tn(c,Ls(d,"hover",o)),p=Tn(c,Ls(d,"hover",i)),v=Tn(c,Ls(d,"click",S=>{S.button===0&&l(S)})),m=Tn(c,Ls(d,"focus",o)),y=Tn(c,Ls(d,"focus",i)),b=Tn(c,Ls(d,"contextmenu",S=>{S.preventDefault(),l(S)})),C=Tn(c,S=>{const{code:w}=S;(w===Ge.enter||w===Ge.space)&&l(S)});return{onBlur:y,onContextMenu:b,onFocus:m,onMouseenter:f,onMouseleave:p,onClick:v,onKeydown:C,open:a,id:r,triggerRef:s,ns:t}}});function cee(e,t,n,r,a,o){const i=we("el-popper-trigger");return R(),fe(i,{id:e.id,"virtual-ref":e.virtualRef,open:e.open,"virtual-triggering":e.virtualTriggering,class:U(e.ns.e("trigger")),onBlur:e.onBlur,onClick:e.onClick,onContextmenu:e.onContextMenu,onFocus:e.onFocus,onMouseenter:e.onMouseenter,onMouseleave:e.onMouseleave,onKeydown:e.onKeydown},{default:re(()=>[Oe(e.$slots,"default")]),_:3},8,["id","virtual-ref","open","virtual-triggering","class","onBlur","onClick","onContextmenu","onFocus","onMouseenter","onMouseleave","onKeydown"])}var dee=Ae(uee,[["render",cee],["__file","/home/runner/work/element-plus/element-plus/packages/components/tooltip/src/trigger.vue"]]);const{useModelToggleProps:fee,useModelToggle:hee,useModelToggleEmits:pee}=cZ("visible"),vee=G({name:"ElTooltip",components:{ElPopper:uM,ElPopperArrow:DJ,ElTooltipContent:lee,ElTooltipTrigger:dee},props:Te(Te(Te(Te(Te({},fee),mr),qd),ZE),aee),emits:[...pee,"before-show","before-hide","show","hide"],setup(e,{emit:t}){yZ();const n=x(()=>(sa(e.openDelay),e.openDelay||e.showAfter)),r=x(()=>(sa(e.visibleArrow),yr(e.visibleArrow)?e.visibleArrow:e.showArrow)),a=jC(),o=H(null),i=()=>{var v;const m=A(o);m&&((v=m.popperInstanceRef)==null||v.update())},l=H(!1),{show:s,hide:c}=hee({indicator:l}),{onOpen:d,onClose:f}=CZ({showAfter:n,hideAfter:yn(e,"hideAfter"),open:s,close:c}),p=x(()=>yr(e.visible));return ot(qm,{controlled:p,id:a,open:Cf(l),trigger:yn(e,"trigger"),onOpen:d,onClose:f,onToggle:()=>{A(l)?f():d()},onShow:()=>{t("show")},onHide:()=>{t("hide")},onBeforeShow:()=>{t("before-show")},onBeforeHide:()=>{t("before-hide")},updatePopper:i}),ce(()=>e.disabled,v=>{v&&l.value&&(l.value=!1)}),{compatShowAfter:n,compatShowArrow:r,popperRef:o,open:l,hide:c,updatePopper:i,onOpen:d,onClose:f}}}),mee=["innerHTML"],gee={key:1};function yee(e,t,n,r,a,o){const i=we("el-tooltip-trigger"),l=we("el-popper-arrow"),s=we("el-tooltip-content"),c=we("el-popper");return R(),fe(c,{ref:"popperRef"},{default:re(()=>[g(i,{disabled:e.disabled,trigger:e.trigger,"virtual-ref":e.virtualRef,"virtual-triggering":e.virtualTriggering},{default:re(()=>[e.$slots.default?Oe(e.$slots,"default",{key:0}):se("v-if",!0)]),_:3},8,["disabled","trigger","virtual-ref","virtual-triggering"]),g(s,{"aria-label":e.ariaLabel,"boundaries-padding":e.boundariesPadding,content:e.content,disabled:e.disabled,effect:e.effect,enterable:e.enterable,"fallback-placements":e.fallbackPlacements,"hide-after":e.hideAfter,"gpu-acceleration":e.gpuAcceleration,offset:e.offset,persistent:e.persistent,"popper-class":e.popperClass,"popper-style":e.popperStyle,placement:e.placement,"popper-options":e.popperOptions,pure:e.pure,"raw-content":e.rawContent,"reference-el":e.referenceEl,"show-after":e.compatShowAfter,strategy:e.strategy,teleported:e.teleported,transition:e.transition,"z-index":e.zIndex,"append-to":e.appendTo},{default:re(()=>[Oe(e.$slots,"content",{},()=>[e.rawContent?(R(),X("span",{key:0,innerHTML:e.content},null,8,mee)):(R(),X("span",gee,Me(e.content),1))]),e.compatShowArrow?(R(),fe(l,{key:0,"arrow-offset":e.arrowOffset},null,8,["arrow-offset"])):se("v-if",!0)]),_:3},8,["aria-label","boundaries-padding","content","disabled","effect","enterable","fallback-placements","hide-after","gpu-acceleration","offset","persistent","popper-class","popper-style","placement","popper-options","pure","raw-content","reference-el","show-after","strategy","teleported","transition","z-index","append-to"])]),_:3},512)}var bee=Ae(vee,[["render",yee],["__file","/home/runner/work/element-plus/element-plus/packages/components/tooltip/src/tooltip.vue"]]);const Ur=xt(bee),Cee=Ze({valueKey:{type:String,default:"value"},modelValue:{type:[String,Number],default:""},debounce:{type:Number,default:300},placement:{type:Le(String),values:["top","top-start","top-end","bottom","bottom-start","bottom-end"],default:"bottom-start"},fetchSuggestions:{type:Le([Function,Array]),default:Qt},popperClass:{type:String,default:""},triggerOnFocus:{type:Boolean,default:!0},selectWhenUnmatched:{type:Boolean,default:!1},hideLoading:{type:Boolean,default:!1},popperAppendToBody:{type:Boolean,default:void 0},teleported:mr.teleported,highlightFirstItem:{type:Boolean,default:!1}}),wee={[Pt]:e=>wt(e),input:e=>wt(e),change:e=>wt(e),focus:e=>e instanceof FocusEvent,blur:e=>e instanceof FocusEvent,clear:()=>!0,select:e=>zt(e)},See=["aria-expanded","aria-owns"],kee={key:0},$ee=["id","aria-selected","onClick"],Oee={name:"ElAutocomplete",inheritAttrs:!1},Pee=G(Ke(Te({},Oee),{props:Cee,emits:wee,setup(e,{expose:t,emit:n}){const r=e,a="ElAutocomplete",o=De("autocomplete"),{compatTeleported:i}=Oc(a,"popperAppendToBody");let l=!1;const s=VC(),c=W6(),d=H([]),f=H(-1),p=H(""),v=H(!1),m=H(!1),y=H(!1),b=H(),C=H(),S=H(),w=H(),k=x(()=>o.b(String(Pf()))),$=x(()=>c.style),O=x(()=>(pt(d.value)&&d.value.length>0||y.value)&&v.value),T=x(()=>!r.hideLoading&&y.value),_=()=>{Ne(()=>{O.value&&(p.value=`${b.value.$el.offsetWidth}px`)})},L=Yn(Y=>{if(m.value)return;y.value=!0;const q=J=>{y.value=!1,!m.value&&(pt(J)?(d.value=J,f.value=r.highlightFirstItem?0:-1):qn(a,"autocomplete suggestions must be an array"))};if(pt(r.fetchSuggestions))q(r.fetchSuggestions);else{const J=r.fetchSuggestions(Y,q);pt(J)?q(J):hs(J)&&J.then(q)}},r.debounce),j=Y=>{const q=Boolean(Y);if(n("input",Y),n(Pt,Y),m.value=!1,v.value||(v.value=l&&q),!r.triggerOnFocus&&!Y){m.value=!0,d.value=[];return}l&&q&&(l=!1),L(Y)},F=Y=>{n("change",Y)},N=Y=>{v.value=!0,n("focus",Y),r.triggerOnFocus&&L(String(r.modelValue))},D=Y=>{n("blur",Y)},z=()=>{v.value=!1,l=!0,n(Pt,""),n("clear")},B=()=>{O.value&&f.value>=0&&f.value<d.value.length?K(d.value[f.value]):r.selectWhenUnmatched&&(n("select",{value:r.modelValue}),Ne(()=>{d.value=[],f.value=-1}))},M=()=>{v.value=!1},E=()=>{var Y;(Y=b.value)==null||Y.focus()},K=Y=>{n("input",Y[r.valueKey]),n(Pt,Y[r.valueKey]),n("select",Y),Ne(()=>{d.value=[],f.value=-1})},W=Y=>{if(!O.value||y.value)return;if(Y<0){f.value=-1;return}Y>=d.value.length&&(Y=d.value.length-1);const q=C.value.querySelector(`.${o.be("suggestion","wrap")}`),ne=q.querySelectorAll(`.${o.be("suggestion","list")} li`)[Y],oe=q.scrollTop,{offsetTop:Q,scrollHeight:ae}=ne;Q+ae>oe+q.clientHeight&&(q.scrollTop+=ae),Q<oe&&(q.scrollTop-=ae),f.value=Y,b.value.ref.setAttribute("aria-activedescendant",`${k.value}-item-${f.value}`)};return vv(w,M),et(()=>{b.value.ref.setAttribute("role","textbox"),b.value.ref.setAttribute("aria-autocomplete","list"),b.value.ref.setAttribute("aria-controls","id"),b.value.ref.setAttribute("aria-activedescendant",`${k.value}-item-${f.value}`)}),t({highlightedIndex:f,activated:v,loading:y,inputRef:b,popperRef:S,suggestions:d,handleSelect:K,handleKeyEnter:B,focus:E,close:M,highlight:W}),(Y,q)=>(R(),fe(A(Ur),{ref_key:"popperRef",ref:S,visible:A(O),"onUpdate:visible":q[2]||(q[2]=J=>_n(O)?O.value=J:null),placement:Y.placement,"fallback-placements":["bottom-start","top-start"],"popper-class":[A(o).e("popper"),Y.popperClass],teleported:A(i),"gpu-acceleration":!1,pure:"","manual-mode":"",effect:"light",trigger:"click",transition:`${A(o).namespace.value}-zoom-in-top`,persistent:"",onBeforeShow:_},{content:re(()=>[Z("div",{ref_key:"regionRef",ref:C,class:U([A(o).b("suggestion"),A(o).is("loading",A(T))]),style:Xe({minWidth:p.value,outline:"none"}),role:"region"},[g(A(xi),{id:A(k),tag:"ul","wrap-class":A(o).be("suggestion","wrap"),"view-class":A(o).be("suggestion","list"),role:"listbox"},{default:re(()=>[A(T)?(R(),X("li",kee,[g(A(ft),{class:U(A(o).is("loading"))},{default:re(()=>[g(A(gl))]),_:1},8,["class"])])):(R(!0),X(Fe,{key:1},Rt(d.value,(J,ne)=>(R(),X("li",{id:`${A(k)}-item-${ne}`,key:ne,class:U({highlighted:f.value===ne}),role:"option","aria-selected":f.value===ne,onClick:oe=>K(J)},[Oe(Y.$slots,"default",{item:J},()=>[yt(Me(J[Y.valueKey]),1)])],10,$ee))),128))]),_:3},8,["id","wrap-class","view-class"])],6)]),default:re(()=>[Z("div",{ref_key:"listboxRef",ref:w,class:U([A(o).b(),Y.$attrs.class]),style:Xe(A($)),role:"combobox","aria-haspopup":"listbox","aria-expanded":A(O),"aria-owns":A(k)},[g(A(Ra),hn({ref_key:"inputRef",ref:b},A(s),{"model-value":Y.modelValue,onInput:j,onChange:F,onFocus:N,onBlur:D,onClear:z,onKeydown:[q[0]||(q[0]=It(dt(J=>W(f.value-1),["prevent"]),["up"])),q[1]||(q[1]=It(dt(J=>W(f.value+1),["prevent"]),["down"])),It(B,["enter"]),It(M,["tab"])]}),sl({_:2},[Y.$slots.prepend?{name:"prepend",fn:re(()=>[Oe(Y.$slots,"prepend")])}:void 0,Y.$slots.append?{name:"append",fn:re(()=>[Oe(Y.$slots,"append")])}:void 0,Y.$slots.prefix?{name:"prefix",fn:re(()=>[Oe(Y.$slots,"prefix")])}:void 0,Y.$slots.suffix?{name:"suffix",fn:re(()=>[Oe(Y.$slots,"suffix")])}:void 0]),1040,["model-value","onKeydown"])],14,See)]),_:3},8,["visible","placement","popper-class","teleported","transition"]))}}));var Tee=Ae(Pee,[["__file","/home/runner/work/element-plus/element-plus/packages/components/autocomplete/src/autocomplete.vue"]]);const xee=xt(Tee),_ee=Ze({size:{type:[Number,String],values:Bo,default:"",validator:e=>typeof e=="number"},shape:{type:String,values:["circle","square"],default:"circle"},icon:{type:wr},src:{type:String,default:""},alt:String,srcSet:String,fit:{type:Le(String),default:"cover"}}),Eee={error:e=>e instanceof Event},Mee=["src","alt","srcset"],Iee={name:"ElAvatar"},Nee=G(Ke(Te({},Iee),{props:_ee,emits:Eee,setup(e,{emit:t}){const n=e,r=De("avatar"),a=H(!1),o=x(()=>{const{size:c,icon:d,shape:f}=n,p=[r.b()];return wt(c)&&p.push(r.m(c)),d&&p.push(r.m("icon")),f&&p.push(r.m(f)),p}),i=x(()=>{const{size:c}=n;return Yt(c)?{"--el-avatar-size":oo(c)}:void 0}),l=x(()=>({objectFit:n.fit}));ce(()=>n.src,()=>a.value=!1);function s(c){a.value=!0,t("error",c)}return(c,d)=>(R(),X("span",{class:U(A(o)),style:Xe(A(i))},[(c.src||c.srcSet)&&!a.value?(R(),X("img",{key:0,src:c.src,alt:c.alt,srcset:c.srcSet,style:Xe(A(l)),onError:s},null,44,Mee)):c.icon?(R(),fe(A(ft),{key:1},{default:re(()=>[(R(),fe(Kt(c.icon)))]),_:1})):Oe(c.$slots,"default",{key:2})],6))}}));var Aee=Ae(Nee,[["__file","/home/runner/work/element-plus/element-plus/packages/components/avatar/src/avatar.vue"]]);const Dee=xt(Aee),Ree={visibilityHeight:{type:Number,default:200},target:{type:String,default:""},right:{type:Number,default:40},bottom:{type:Number,default:40}},Lee={click:e=>e instanceof MouseEvent},Fee=["onClick"],Bee={name:"ElBacktop"},Vee=G(Ke(Te({},Bee),{props:Ree,emits:Lee,setup(e,{emit:t}){const n=e,r="ElBacktop",a=De("backtop"),o=Qn(),i=Qn(),l=H(!1),s=x(()=>({right:`${n.right}px`,bottom:`${n.bottom}px`})),c=()=>{if(!o.value)return;const v=Date.now(),m=o.value.scrollTop,y=()=>{if(!o.value)return;const b=(Date.now()-v)/500;b<1?(o.value.scrollTop=m*(1-JX(b)),requestAnimationFrame(y)):o.value.scrollTop=0};requestAnimationFrame(y)},d=()=>{o.value&&(l.value=o.value.scrollTop>=n.visibilityHeight)},f=v=>{c(),t("click",v)},p=oE(d,300);return et(()=>{var v;i.value=document,o.value=document.documentElement,n.target&&(o.value=(v=document.querySelector(n.target))!=null?v:void 0,o.value||qn(r,`target is not existed: ${n.target}`),i.value=o.value),Hn(i,"scroll",p)}),(v,m)=>(R(),fe(Vn,{name:`${A(a).namespace.value}-fade-in`},{default:re(()=>[l.value?(R(),X("div",{key:0,style:Xe(A(s)),class:U(A(a).b()),onClick:dt(f,["stop"])},[Oe(v.$slots,"default",{},()=>[g(A(ft),{class:U(A(a).e("icon"))},{default:re(()=>[g(A(kY))]),_:1},8,["class"])])],14,Fee)):se("v-if",!0)]),_:3},8,["name"]))}}));var zee=Ae(Vee,[["__file","/home/runner/work/element-plus/element-plus/packages/components/backtop/src/backtop.vue"]]);const Hee=xt(zee),jee=Ze({value:{type:[String,Number],default:""},max:{type:Number,default:99},isDot:Boolean,hidden:Boolean,type:{type:String,values:["primary","success","warning","info","danger"],default:"danger"}}),Kee=["textContent"],Wee={name:"ElBadge"},Uee=G(Ke(Te({},Wee),{props:jee,setup(e,{expose:t}){const n=e,r=De("badge"),a=x(()=>n.isDot?"":Yt(n.value)&&Yt(n.max)?n.max<n.value?`${n.max}+`:`${n.value}`:`${n.value}`);return t({content:a}),(o,i)=>(R(),X("div",{class:U(A(r).b())},[Oe(o.$slots,"default"),g(Vn,{name:`${A(r).namespace.value}-zoom-in-center`},{default:re(()=>[at(Z("sup",{class:U([A(r).e("content"),A(r).em("content",o.type),A(r).is("fixed",!!o.$slots.default),A(r).is("dot",o.isDot)]),textContent:Me(A(a))},null,10,Kee),[[_t,!o.hidden&&(A(a)||A(a)==="0"||o.isDot)]])]),_:1},8,["name"])],2))}}));var Yee=Ae(Uee,[["__file","/home/runner/work/element-plus/element-plus/packages/components/badge/src/badge.vue"]]);const dM=xt(Yee),qee=Ze({separator:{type:String,default:"/"},separatorIcon:{type:wr,default:""}}),Gee={name:"ElBreadcrumb"},Xee=G(Ke(Te({},Gee),{props:qee,setup(e){const t=e,n=De("breadcrumb"),r=H();return ot(CE,t),et(()=>{const a=r.value.querySelectorAll(`.${n.e("item")}`);a.length&&a[a.length-1].setAttribute("aria-current","page")}),(a,o)=>(R(),X("div",{ref_key:"breadcrumb",ref:r,class:U(A(n).b()),"aria-label":"Breadcrumb",role:"navigation"},[Oe(a.$slots,"default")],2))}}));var Zee=Ae(Xee,[["__file","/home/runner/work/element-plus/element-plus/packages/components/breadcrumb/src/breadcrumb.vue"]]);const Jee=Ze({to:{type:Le([String,Object]),default:""},replace:{type:Boolean,default:!1}}),Qee={name:"ElBreadcrumbItem"},ete=G(Ke(Te({},Qee),{props:Jee,setup(e){const t=e,r=$t().appContext.config.globalProperties.$router,a=ve(CE,{}),o=De("breadcrumb"),{separator:i,separatorIcon:l}=a,s=H(),c=()=>{!t.to||!r||(t.replace?r.replace(t.to):r.push(t.to))};return(d,f)=>(R(),X("span",{class:U(A(o).e("item"))},[Z("span",{ref_key:"link",ref:s,class:U([A(o).e("inner"),A(o).is("link",!!d.to)]),role:"link",onClick:c},[Oe(d.$slots,"default")],2),A(l)?(R(),fe(A(ft),{key:0,class:U(A(o).e("separator"))},{default:re(()=>[(R(),fe(Kt(A(l))))]),_:1},8,["class"])):(R(),X("span",{key:1,class:U(A(o).e("separator")),role:"presentation"},Me(A(i)),3))],2))}}));var fM=Ae(ete,[["__file","/home/runner/work/element-plus/element-plus/packages/components/breadcrumb/src/breadcrumb-item.vue"]]);const tte=xt(Zee,{BreadcrumbItem:fM}),nte=En(fM),m0=["default","primary","success","warning","info","danger","text",""],rte=["button","submit","reset"],g0=Ze({size:Km,disabled:Boolean,type:{type:String,values:m0,default:""},icon:{type:wr,default:""},nativeType:{type:String,values:rte,default:"button"},loading:Boolean,loadingIcon:{type:wr,default:()=>gl},plain:Boolean,autofocus:Boolean,round:Boolean,circle:Boolean,color:String,dark:Boolean,autoInsertSpace:{type:Boolean,default:void 0}}),ate={click:e=>e instanceof MouseEvent};function br(e,t){ote(e)&&(e="100%");var n=ite(e);return e=t===360?e:Math.min(t,Math.max(0,parseFloat(e))),n&&(e=parseInt(String(e*t),10)/100),Math.abs(e-t)<1e-6?1:(t===360?e=(e<0?e%t+t:e%t)/parseFloat(String(t)):e=e%t/parseFloat(String(t)),e)}function lh(e){return Math.min(1,Math.max(0,e))}function ote(e){return typeof e=="string"&&e.indexOf(".")!==-1&&parseFloat(e)===1}function ite(e){return typeof e=="string"&&e.indexOf("%")!==-1}function hM(e){return e=parseFloat(e),(isNaN(e)||e<0||e>1)&&(e=1),e}function sh(e){return e<=1?"".concat(Number(e)*100,"%"):e}function Ul(e){return e.length===1?"0"+e:String(e)}function lte(e,t,n){return{r:br(e,255)*255,g:br(t,255)*255,b:br(n,255)*255}}function w4(e,t,n){e=br(e,255),t=br(t,255),n=br(n,255);var r=Math.max(e,t,n),a=Math.min(e,t,n),o=0,i=0,l=(r+a)/2;if(r===a)i=0,o=0;else{var s=r-a;switch(i=l>.5?s/(2-r-a):s/(r+a),r){case e:o=(t-n)/s+(t<n?6:0);break;case t:o=(n-e)/s+2;break;case n:o=(e-t)/s+4;break}o/=6}return{h:o,s:i,l}}function ly(e,t,n){return n<0&&(n+=1),n>1&&(n-=1),n<1/6?e+(t-e)*(6*n):n<1/2?t:n<2/3?e+(t-e)*(2/3-n)*6:e}function ste(e,t,n){var r,a,o;if(e=br(e,360),t=br(t,100),n=br(n,100),t===0)a=n,o=n,r=n;else{var i=n<.5?n*(1+t):n+t-n*t,l=2*n-i;r=ly(l,i,e+1/3),a=ly(l,i,e),o=ly(l,i,e-1/3)}return{r:r*255,g:a*255,b:o*255}}function y0(e,t,n){e=br(e,255),t=br(t,255),n=br(n,255);var r=Math.max(e,t,n),a=Math.min(e,t,n),o=0,i=r,l=r-a,s=r===0?0:l/r;if(r===a)o=0;else{switch(r){case e:o=(t-n)/l+(t<n?6:0);break;case t:o=(n-e)/l+2;break;case n:o=(e-t)/l+4;break}o/=6}return{h:o,s,v:i}}function ute(e,t,n){e=br(e,360)*6,t=br(t,100),n=br(n,100);var r=Math.floor(e),a=e-r,o=n*(1-t),i=n*(1-a*t),l=n*(1-(1-a)*t),s=r%6,c=[n,i,o,o,l,n][s],d=[l,n,n,i,o,o][s],f=[o,o,l,n,n,i][s];return{r:c*255,g:d*255,b:f*255}}function b0(e,t,n,r){var a=[Ul(Math.round(e).toString(16)),Ul(Math.round(t).toString(16)),Ul(Math.round(n).toString(16))];return r&&a[0].startsWith(a[0].charAt(1))&&a[1].startsWith(a[1].charAt(1))&&a[2].startsWith(a[2].charAt(1))?a[0].charAt(0)+a[1].charAt(0)+a[2].charAt(0):a.join("")}function cte(e,t,n,r,a){var o=[Ul(Math.round(e).toString(16)),Ul(Math.round(t).toString(16)),Ul(Math.round(n).toString(16)),Ul(dte(r))];return a&&o[0].startsWith(o[0].charAt(1))&&o[1].startsWith(o[1].charAt(1))&&o[2].startsWith(o[2].charAt(1))&&o[3].startsWith(o[3].charAt(1))?o[0].charAt(0)+o[1].charAt(0)+o[2].charAt(0)+o[3].charAt(0):o.join("")}function dte(e){return Math.round(parseFloat(e)*255).toString(16)}function S4(e){return ea(e)/255}function ea(e){return parseInt(e,16)}function fte(e){return{r:e>>16,g:(e&65280)>>8,b:e&255}}var C0={aliceblue:"#f0f8ff",antiquewhite:"#faebd7",aqua:"#00ffff",aquamarine:"#7fffd4",azure:"#f0ffff",beige:"#f5f5dc",bisque:"#ffe4c4",black:"#000000",blanchedalmond:"#ffebcd",blue:"#0000ff",blueviolet:"#8a2be2",brown:"#a52a2a",burlywood:"#deb887",cadetblue:"#5f9ea0",chartreuse:"#7fff00",chocolate:"#d2691e",coral:"#ff7f50",cornflowerblue:"#6495ed",cornsilk:"#fff8dc",crimson:"#dc143c",cyan:"#00ffff",darkblue:"#00008b",darkcyan:"#008b8b",darkgoldenrod:"#b8860b",darkgray:"#a9a9a9",darkgreen:"#006400",darkgrey:"#a9a9a9",darkkhaki:"#bdb76b",darkmagenta:"#8b008b",darkolivegreen:"#556b2f",darkorange:"#ff8c00",darkorchid:"#9932cc",darkred:"#8b0000",darksalmon:"#e9967a",darkseagreen:"#8fbc8f",darkslateblue:"#483d8b",darkslategray:"#2f4f4f",darkslategrey:"#2f4f4f",darkturquoise:"#00ced1",darkviolet:"#9400d3",deeppink:"#ff1493",deepskyblue:"#00bfff",dimgray:"#696969",dimgrey:"#696969",dodgerblue:"#1e90ff",firebrick:"#b22222",floralwhite:"#fffaf0",forestgreen:"#228b22",fuchsia:"#ff00ff",gainsboro:"#dcdcdc",ghostwhite:"#f8f8ff",goldenrod:"#daa520",gold:"#ffd700",gray:"#808080",green:"#008000",greenyellow:"#adff2f",grey:"#808080",honeydew:"#f0fff0",hotpink:"#ff69b4",indianred:"#cd5c5c",indigo:"#4b0082",ivory:"#fffff0",khaki:"#f0e68c",lavenderblush:"#fff0f5",lavender:"#e6e6fa",lawngreen:"#7cfc00",lemonchiffon:"#fffacd",lightblue:"#add8e6",lightcoral:"#f08080",lightcyan:"#e0ffff",lightgoldenrodyellow:"#fafad2",lightgray:"#d3d3d3",lightgreen:"#90ee90",lightgrey:"#d3d3d3",lightpink:"#ffb6c1",lightsalmon:"#ffa07a",lightseagreen:"#20b2aa",lightskyblue:"#87cefa",lightslategray:"#778899",lightslategrey:"#778899",lightsteelblue:"#b0c4de",lightyellow:"#ffffe0",lime:"#00ff00",limegreen:"#32cd32",linen:"#faf0e6",magenta:"#ff00ff",maroon:"#800000",mediumaquamarine:"#66cdaa",mediumblue:"#0000cd",mediumorchid:"#ba55d3",mediumpurple:"#9370db",mediumseagreen:"#3cb371",mediumslateblue:"#7b68ee",mediumspringgreen:"#00fa9a",mediumturquoise:"#48d1cc",mediumvioletred:"#c71585",midnightblue:"#191970",mintcream:"#f5fffa",mistyrose:"#ffe4e1",moccasin:"#ffe4b5",navajowhite:"#ffdead",navy:"#000080",oldlace:"#fdf5e6",olive:"#808000",olivedrab:"#6b8e23",orange:"#ffa500",orangered:"#ff4500",orchid:"#da70d6",palegoldenrod:"#eee8aa",palegreen:"#98fb98",paleturquoise:"#afeeee",palevioletred:"#db7093",papayawhip:"#ffefd5",peachpuff:"#ffdab9",peru:"#cd853f",pink:"#ffc0cb",plum:"#dda0dd",powderblue:"#b0e0e6",purple:"#800080",rebeccapurple:"#663399",red:"#ff0000",rosybrown:"#bc8f8f",royalblue:"#4169e1",saddlebrown:"#8b4513",salmon:"#fa8072",sandybrown:"#f4a460",seagreen:"#2e8b57",seashell:"#fff5ee",sienna:"#a0522d",silver:"#c0c0c0",skyblue:"#87ceeb",slateblue:"#6a5acd",slategray:"#708090",slategrey:"#708090",snow:"#fffafa",springgreen:"#00ff7f",steelblue:"#4682b4",tan:"#d2b48c",teal:"#008080",thistle:"#d8bfd8",tomato:"#ff6347",turquoise:"#40e0d0",violet:"#ee82ee",wheat:"#f5deb3",white:"#ffffff",whitesmoke:"#f5f5f5",yellow:"#ffff00",yellowgreen:"#9acd32"};function iu(e){var t={r:0,g:0,b:0},n=1,r=null,a=null,o=null,i=!1,l=!1;return typeof e=="string"&&(e=vte(e)),typeof e=="object"&&(Yo(e.r)&&Yo(e.g)&&Yo(e.b)?(t=lte(e.r,e.g,e.b),i=!0,l=String(e.r).substr(-1)==="%"?"prgb":"rgb"):Yo(e.h)&&Yo(e.s)&&Yo(e.v)?(r=sh(e.s),a=sh(e.v),t=ute(e.h,r,a),i=!0,l="hsv"):Yo(e.h)&&Yo(e.s)&&Yo(e.l)&&(r=sh(e.s),o=sh(e.l),t=ste(e.h,r,o),i=!0,l="hsl"),Object.prototype.hasOwnProperty.call(e,"a")&&(n=e.a)),n=hM(n),{ok:i,format:e.format||l,r:Math.min(255,Math.max(t.r,0)),g:Math.min(255,Math.max(t.g,0)),b:Math.min(255,Math.max(t.b,0)),a:n}}var hte="[-\\+]?\\d+%?",pte="[-\\+]?\\d*\\.\\d+%?",qi="(?:".concat(pte,")|(?:").concat(hte,")"),sy="[\\s|\\(]+(".concat(qi,")[,|\\s]+(").concat(qi,")[,|\\s]+(").concat(qi,")\\s*\\)?"),uy="[\\s|\\(]+(".concat(qi,")[,|\\s]+(").concat(qi,")[,|\\s]+(").concat(qi,")[,|\\s]+(").concat(qi,")\\s*\\)?"),ja={CSS_UNIT:new RegExp(qi),rgb:new RegExp("rgb"+sy),rgba:new RegExp("rgba"+uy),hsl:new RegExp("hsl"+sy),hsla:new RegExp("hsla"+uy),hsv:new RegExp("hsv"+sy),hsva:new RegExp("hsva"+uy),hex3:/^#?([0-9a-fA-F]{1})([0-9a-fA-F]{1})([0-9a-fA-F]{1})$/,hex6:/^#?([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})$/,hex4:/^#?([0-9a-fA-F]{1})([0-9a-fA-F]{1})([0-9a-fA-F]{1})([0-9a-fA-F]{1})$/,hex8:/^#?([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})([0-9a-fA-F]{2})$/};function vte(e){if(e=e.trim().toLowerCase(),e.length===0)return!1;var t=!1;if(C0[e])e=C0[e],t=!0;else if(e==="transparent")return{r:0,g:0,b:0,a:0,format:"name"};var n=ja.rgb.exec(e);return n?{r:n[1],g:n[2],b:n[3]}:(n=ja.rgba.exec(e),n?{r:n[1],g:n[2],b:n[3],a:n[4]}:(n=ja.hsl.exec(e),n?{h:n[1],s:n[2],l:n[3]}:(n=ja.hsla.exec(e),n?{h:n[1],s:n[2],l:n[3],a:n[4]}:(n=ja.hsv.exec(e),n?{h:n[1],s:n[2],v:n[3]}:(n=ja.hsva.exec(e),n?{h:n[1],s:n[2],v:n[3],a:n[4]}:(n=ja.hex8.exec(e),n?{r:ea(n[1]),g:ea(n[2]),b:ea(n[3]),a:S4(n[4]),format:t?"name":"hex8"}:(n=ja.hex6.exec(e),n?{r:ea(n[1]),g:ea(n[2]),b:ea(n[3]),format:t?"name":"hex"}:(n=ja.hex4.exec(e),n?{r:ea(n[1]+n[1]),g:ea(n[2]+n[2]),b:ea(n[3]+n[3]),a:S4(n[4]+n[4]),format:t?"name":"hex8"}:(n=ja.hex3.exec(e),n?{r:ea(n[1]+n[1]),g:ea(n[2]+n[2]),b:ea(n[3]+n[3]),format:t?"name":"hex"}:!1)))))))))}function Yo(e){return Boolean(ja.CSS_UNIT.exec(String(e)))}var pM=function(){function e(t,n){t===void 0&&(t=""),n===void 0&&(n={});var r;if(t instanceof e)return t;typeof t=="number"&&(t=fte(t)),this.originalInput=t;var a=iu(t);this.originalInput=t,this.r=a.r,this.g=a.g,this.b=a.b,this.a=a.a,this.roundA=Math.round(100*this.a)/100,this.format=(r=n.format)!==null&&r!==void 0?r:a.format,this.gradientType=n.gradientType,this.r<1&&(this.r=Math.round(this.r)),this.g<1&&(this.g=Math.round(this.g)),this.b<1&&(this.b=Math.round(this.b)),this.isValid=a.ok}return e.prototype.isDark=function(){return this.getBrightness()<128},e.prototype.isLight=function(){return!this.isDark()},e.prototype.getBrightness=function(){var t=this.toRgb();return(t.r*299+t.g*587+t.b*114)/1e3},e.prototype.getLuminance=function(){var t=this.toRgb(),n,r,a,o=t.r/255,i=t.g/255,l=t.b/255;return o<=.03928?n=o/12.92:n=Math.pow((o+.055)/1.055,2.4),i<=.03928?r=i/12.92:r=Math.pow((i+.055)/1.055,2.4),l<=.03928?a=l/12.92:a=Math.pow((l+.055)/1.055,2.4),.2126*n+.7152*r+.0722*a},e.prototype.getAlpha=function(){return this.a},e.prototype.setAlpha=function(t){return this.a=hM(t),this.roundA=Math.round(100*this.a)/100,this},e.prototype.toHsv=function(){var t=y0(this.r,this.g,this.b);return{h:t.h*360,s:t.s,v:t.v,a:this.a}},e.prototype.toHsvString=function(){var t=y0(this.r,this.g,this.b),n=Math.round(t.h*360),r=Math.round(t.s*100),a=Math.round(t.v*100);return this.a===1?"hsv(".concat(n,", ").concat(r,"%, ").concat(a,"%)"):"hsva(".concat(n,", ").concat(r,"%, ").concat(a,"%, ").concat(this.roundA,")")},e.prototype.toHsl=function(){var t=w4(this.r,this.g,this.b);return{h:t.h*360,s:t.s,l:t.l,a:this.a}},e.prototype.toHslString=function(){var t=w4(this.r,this.g,this.b),n=Math.round(t.h*360),r=Math.round(t.s*100),a=Math.round(t.l*100);return this.a===1?"hsl(".concat(n,", ").concat(r,"%, ").concat(a,"%)"):"hsla(".concat(n,", ").concat(r,"%, ").concat(a,"%, ").concat(this.roundA,")")},e.prototype.toHex=function(t){return t===void 0&&(t=!1),b0(this.r,this.g,this.b,t)},e.prototype.toHexString=function(t){return t===void 0&&(t=!1),"#"+this.toHex(t)},e.prototype.toHex8=function(t){return t===void 0&&(t=!1),cte(this.r,this.g,this.b,this.a,t)},e.prototype.toHex8String=function(t){return t===void 0&&(t=!1),"#"+this.toHex8(t)},e.prototype.toRgb=function(){return{r:Math.round(this.r),g:Math.round(this.g),b:Math.round(this.b),a:this.a}},e.prototype.toRgbString=function(){var t=Math.round(this.r),n=Math.round(this.g),r=Math.round(this.b);return this.a===1?"rgb(".concat(t,", ").concat(n,", ").concat(r,")"):"rgba(".concat(t,", ").concat(n,", ").concat(r,", ").concat(this.roundA,")")},e.prototype.toPercentageRgb=function(){var t=function(n){return"".concat(Math.round(br(n,255)*100),"%")};return{r:t(this.r),g:t(this.g),b:t(this.b),a:this.a}},e.prototype.toPercentageRgbString=function(){var t=function(n){return Math.round(br(n,255)*100)};return this.a===1?"rgb(".concat(t(this.r),"%, ").concat(t(this.g),"%, ").concat(t(this.b),"%)"):"rgba(".concat(t(this.r),"%, ").concat(t(this.g),"%, ").concat(t(this.b),"%, ").concat(this.roundA,")")},e.prototype.toName=function(){if(this.a===0)return"transparent";if(this.a<1)return!1;for(var t="#"+b0(this.r,this.g,this.b,!1),n=0,r=Object.entries(C0);n<r.length;n++){var a=r[n],o=a[0],i=a[1];if(t===i)return o}return!1},e.prototype.toString=function(t){var n=Boolean(t);t=t!=null?t:this.format;var r=!1,a=this.a<1&&this.a>=0,o=!n&&a&&(t.startsWith("hex")||t==="name");return o?t==="name"&&this.a===0?this.toName():this.toRgbString():(t==="rgb"&&(r=this.toRgbString()),t==="prgb"&&(r=this.toPercentageRgbString()),(t==="hex"||t==="hex6")&&(r=this.toHexString()),t==="hex3"&&(r=this.toHexString(!0)),t==="hex4"&&(r=this.toHex8String(!0)),t==="hex8"&&(r=this.toHex8String()),t==="name"&&(r=this.toName()),t==="hsl"&&(r=this.toHslString()),t==="hsv"&&(r=this.toHsvString()),r||this.toHexString())},e.prototype.toNumber=function(){return(Math.round(this.r)<<16)+(Math.round(this.g)<<8)+Math.round(this.b)},e.prototype.clone=function(){return new e(this.toString())},e.prototype.lighten=function(t){t===void 0&&(t=10);var n=this.toHsl();return n.l+=t/100,n.l=lh(n.l),new e(n)},e.prototype.brighten=function(t){t===void 0&&(t=10);var n=this.toRgb();return n.r=Math.max(0,Math.min(255,n.r-Math.round(255*-(t/100)))),n.g=Math.max(0,Math.min(255,n.g-Math.round(255*-(t/100)))),n.b=Math.max(0,Math.min(255,n.b-Math.round(255*-(t/100)))),new e(n)},e.prototype.darken=function(t){t===void 0&&(t=10);var n=this.toHsl();return n.l-=t/100,n.l=lh(n.l),new e(n)},e.prototype.tint=function(t){return t===void 0&&(t=10),this.mix("white",t)},e.prototype.shade=function(t){return t===void 0&&(t=10),this.mix("black",t)},e.prototype.desaturate=function(t){t===void 0&&(t=10);var n=this.toHsl();return n.s-=t/100,n.s=lh(n.s),new e(n)},e.prototype.saturate=function(t){t===void 0&&(t=10);var n=this.toHsl();return n.s+=t/100,n.s=lh(n.s),new e(n)},e.prototype.greyscale=function(){return this.desaturate(100)},e.prototype.spin=function(t){var n=this.toHsl(),r=(n.h+t)%360;return n.h=r<0?360+r:r,new e(n)},e.prototype.mix=function(t,n){n===void 0&&(n=50);var r=this.toRgb(),a=new e(t).toRgb(),o=n/100,i={r:(a.r-r.r)*o+r.r,g:(a.g-r.g)*o+r.g,b:(a.b-r.b)*o+r.b,a:(a.a-r.a)*o+r.a};return new e(i)},e.prototype.analogous=function(t,n){t===void 0&&(t=6),n===void 0&&(n=30);var r=this.toHsl(),a=360/n,o=[this];for(r.h=(r.h-(a*t>>1)+720)%360;--t;)r.h=(r.h+a)%360,o.push(new e(r));return o},e.prototype.complement=function(){var t=this.toHsl();return t.h=(t.h+180)%360,new e(t)},e.prototype.monochromatic=function(t){t===void 0&&(t=6);for(var n=this.toHsv(),r=n.h,a=n.s,o=n.v,i=[],l=1/t;t--;)i.push(new e({h:r,s:a,v:o})),o=(o+l)%1;return i},e.prototype.splitcomplement=function(){var t=this.toHsl(),n=t.h;return[this,new e({h:(n+72)%360,s:t.s,l:t.l}),new e({h:(n+216)%360,s:t.s,l:t.l})]},e.prototype.onBackground=function(t){var n=this.toRgb(),r=new e(t).toRgb();return new e({r:r.r+(n.r-r.r)*n.a,g:r.g+(n.g-r.g)*n.a,b:r.b+(n.b-r.b)*n.a})},e.prototype.triad=function(){return this.polyad(3)},e.prototype.tetrad=function(){return this.polyad(4)},e.prototype.polyad=function(t){for(var n=this.toHsl(),r=n.h,a=[this],o=360/t,i=1;i<t;i++)a.push(new e({h:(r+i*o)%360,s:n.s,l:n.l}));return a},e.prototype.equals=function(t){return this.toRgbString()===new e(t).toRgbString()},e}();function Ri(e,t=20){return e.mix("#141414",t).toString()}function mte(e){const t=Ms();return x(()=>{let n={};const r=e.color;if(r){const a=new pM(r),o=e.dark?a.tint(20).toString():Ri(a,20);if(e.plain)n={"--el-button-bg-color":e.dark?Ri(a,90):a.tint(90).toString(),"--el-button-text-color":r,"--el-button-border-color":e.dark?Ri(a,50):a.tint(50).toString(),"--el-button-hover-text-color":"var(--el-color-white)","--el-button-hover-bg-color":r,"--el-button-hover-border-color":r,"--el-button-active-bg-color":o,"--el-button-active-text-color":"var(--el-color-white)","--el-button-active-border-color":o},t.value&&(n["--el-button-disabled-bg-color"]=e.dark?Ri(a,90):a.tint(90).toString(),n["--el-button-disabled-text-color"]=e.dark?Ri(a,50):a.tint(50).toString(),n["--el-button-disabled-border-color"]=e.dark?Ri(a,80):a.tint(80).toString());else{const i=e.dark?Ri(a,30):a.tint(30).toString(),l=a.isDark()?"var(--el-color-white)":"var(--el-color-black)";if(n={"--el-button-bg-color":r,"--el-button-text-color":l,"--el-button-border-color":r,"--el-button-hover-bg-color":i,"--el-button-hover-text-color":l,"--el-button-hover-border-color":i,"--el-button-active-bg-color":o,"--el-button-active-border-color":o},t.value){const s=e.dark?Ri(a,50):a.tint(50).toString();n["--el-button-disabled-bg-color"]=s,n["--el-button-disabled-text-color"]=e.dark?"rgba(255, 255, 255, 0.5)":"var(--el-color-white)",n["--el-button-disabled-border-color"]=s}}}return n})}const gte=["disabled","autofocus","type"],yte={name:"ElButton"},bte=G(Ke(Te({},yte),{props:g0,emits:ate,setup(e,{expose:t,emit:n}){const r=e,a=wf(),o=ve(wE,void 0),i=kc("button"),l=De("button"),{form:s}=$c(),c=Gn(x(()=>o==null?void 0:o.size)),d=Ms(),f=H(),p=x(()=>r.type||(o==null?void 0:o.type)||""),v=x(()=>{var C,S,w;return(w=(S=r.autoInsertSpace)!=null?S:(C=i.value)==null?void 0:C.autoInsertSpace)!=null?w:!1}),m=x(()=>{var C;const S=(C=a.default)==null?void 0:C.call(a);if(v.value&&(S==null?void 0:S.length)===1){const w=S[0];if((w==null?void 0:w.type)===Fo){const k=w.children;return/^\p{Unified_Ideograph}{2}$/u.test(k.trim())}}return!1}),y=mte(r),b=C=>{r.nativeType==="reset"&&(s==null||s.resetFields()),n("click",C)};return t({ref:f,size:c,type:p,disabled:d,shouldAddSpace:m}),(C,S)=>(R(),X("button",{ref_key:"_ref",ref:f,class:U([A(l).b(),A(l).m(A(p)),A(l).m(A(c)),A(l).is("disabled",A(d)),A(l).is("loading",C.loading),A(l).is("plain",C.plain),A(l).is("round",C.round),A(l).is("circle",C.circle)]),disabled:A(d)||C.loading,autofocus:C.autofocus,type:C.nativeType,style:Xe(A(y)),onClick:b},[C.loading?(R(),X(Fe,{key:0},[C.$slots.loading?Oe(C.$slots,"loading",{key:0}):(R(),fe(A(ft),{key:1,class:U(A(l).is("loading"))},{default:re(()=>[(R(),fe(Kt(C.loadingIcon)))]),_:1},8,["class"]))],2112)):C.icon||C.$slots.icon?(R(),fe(A(ft),{key:1},{default:re(()=>[C.icon?(R(),fe(Kt(C.icon),{key:0})):Oe(C.$slots,"icon",{key:1})]),_:3})):se("v-if",!0),C.$slots.default?(R(),X("span",{key:2,class:U({[A(l).em("text","expand")]:A(m)})},[Oe(C.$slots,"default")],2)):se("v-if",!0)],14,gte))}}));var Cte=Ae(bte,[["__file","/home/runner/work/element-plus/element-plus/packages/components/button/src/button.vue"]]);const wte={size:g0.size,type:g0.type},Ste={name:"ElButtonGroup"},kte=G(Ke(Te({},Ste),{props:wte,setup(e){const t=e;ot(wE,bt({size:yn(t,"size"),type:yn(t,"type")}));const n=De("button");return(r,a)=>(R(),X("div",{class:U(`${A(n).b("group")}`)},[Oe(r.$slots,"default")],2))}}));var vM=Ae(kte,[["__file","/home/runner/work/element-plus/element-plus/packages/components/button/src/button-group.vue"]]);const xa=xt(Cte,{ButtonGroup:vM}),mM=En(vM);var vo=typeof globalThis!="undefined"?globalThis:typeof window!="undefined"?window:typeof global!="undefined"?global:typeof self!="undefined"?self:{},gM={exports:{}};(function(e,t){(function(n,r){e.exports=r()})(vo,function(){var n=1e3,r=6e4,a=36e5,o="millisecond",i="second",l="minute",s="hour",c="day",d="week",f="month",p="quarter",v="year",m="date",y="Invalid Date",b=/^(\d{4})[-/]?(\d{1,2})?[-/]?(\d{0,2})[Tt\s]*(\d{1,2})?:?(\d{1,2})?:?(\d{1,2})?[.:]?(\d+)?$/,C=/\[([^\]]+)]|Y{1,4}|M{1,4}|D{1,2}|d{1,4}|H{1,2}|h{1,2}|a|A|m{1,2}|s{1,2}|Z{1,2}|SSS/g,S={name:"en",weekdays:"Sunday_Monday_Tuesday_Wednesday_Thursday_Friday_Saturday".split("_"),months:"January_February_March_April_May_June_July_August_September_October_November_December".split("_")},w=function(N,D,z){var B=String(N);return!B||B.length>=D?N:""+Array(D+1-B.length).join(z)+N},k={s:w,z:function(N){var D=-N.utcOffset(),z=Math.abs(D),B=Math.floor(z/60),M=z%60;return(D<=0?"+":"-")+w(B,2,"0")+":"+w(M,2,"0")},m:function N(D,z){if(D.date()<z.date())return-N(z,D);var B=12*(z.year()-D.year())+(z.month()-D.month()),M=D.clone().add(B,f),E=z-M<0,K=D.clone().add(B+(E?-1:1),f);return+(-(B+(z-M)/(E?M-K:K-M))||0)},a:function(N){return N<0?Math.ceil(N)||0:Math.floor(N)},p:function(N){return{M:f,y:v,w:d,d:c,D:m,h:s,m:l,s:i,ms:o,Q:p}[N]||String(N||"").toLowerCase().replace(/s$/,"")},u:function(N){return N===void 0}},$="en",O={};O[$]=S;var T=function(N){return N instanceof j},_=function N(D,z,B){var M;if(!D)return $;if(typeof D=="string"){var E=D.toLowerCase();O[E]&&(M=E),z&&(O[E]=z,M=E);var K=D.split("-");if(!M&&K.length>1)return N(K[0])}else{var W=D.name;O[W]=D,M=W}return!B&&M&&($=M),M||!B&&$},I=function(N,D){if(T(N))return N.clone();var z=typeof D=="object"?D:{};return z.date=N,z.args=arguments,new j(z)},L=k;L.l=_,L.i=T,L.w=function(N,D){return I(N,{locale:D.$L,utc:D.$u,x:D.$x,$offset:D.$offset})};var j=function(){function N(z){this.$L=_(z.locale,null,!0),this.parse(z)}var D=N.prototype;return D.parse=function(z){this.$d=function(B){var M=B.date,E=B.utc;if(M===null)return new Date(NaN);if(L.u(M))return new Date;if(M instanceof Date)return new Date(M);if(typeof M=="string"&&!/Z$/i.test(M)){var K=M.match(b);if(K){var W=K[2]-1||0,Y=(K[7]||"0").substring(0,3);return E?new Date(Date.UTC(K[1],W,K[3]||1,K[4]||0,K[5]||0,K[6]||0,Y)):new Date(K[1],W,K[3]||1,K[4]||0,K[5]||0,K[6]||0,Y)}}return new Date(M)}(z),this.$x=z.x||{},this.init()},D.init=function(){var z=this.$d;this.$y=z.getFullYear(),this.$M=z.getMonth(),this.$D=z.getDate(),this.$W=z.getDay(),this.$H=z.getHours(),this.$m=z.getMinutes(),this.$s=z.getSeconds(),this.$ms=z.getMilliseconds()},D.$utils=function(){return L},D.isValid=function(){return this.$d.toString()!==y},D.isSame=function(z,B){var M=I(z);return this.startOf(B)<=M&&M<=this.endOf(B)},D.isAfter=function(z,B){return I(z)<this.startOf(B)},D.isBefore=function(z,B){return this.endOf(B)<I(z)},D.$g=function(z,B,M){return L.u(z)?this[B]:this.set(M,z)},D.unix=function(){return Math.floor(this.valueOf()/1e3)},D.valueOf=function(){return this.$d.getTime()},D.startOf=function(z,B){var M=this,E=!!L.u(B)||B,K=L.p(z),W=function(de,be){var Ee=L.w(M.$u?Date.UTC(M.$y,be,de):new Date(M.$y,be,de),M);return E?Ee:Ee.endOf(c)},Y=function(de,be){return L.w(M.toDate()[de].apply(M.toDate("s"),(E?[0,0,0,0]:[23,59,59,999]).slice(be)),M)},q=this.$W,J=this.$M,ne=this.$D,oe="set"+(this.$u?"UTC":"");switch(K){case v:return E?W(1,0):W(31,11);case f:return E?W(1,J):W(0,J+1);case d:var Q=this.$locale().weekStart||0,ae=(q<Q?q+7:q)-Q;return W(E?ne-ae:ne+(6-ae),J);case c:case m:return Y(oe+"Hours",0);case s:return Y(oe+"Minutes",1);case l:return Y(oe+"Seconds",2);case i:return Y(oe+"Milliseconds",3);default:return this.clone()}},D.endOf=function(z){return this.startOf(z,!1)},D.$set=function(z,B){var M,E=L.p(z),K="set"+(this.$u?"UTC":""),W=(M={},M[c]=K+"Date",M[m]=K+"Date",M[f]=K+"Month",M[v]=K+"FullYear",M[s]=K+"Hours",M[l]=K+"Minutes",M[i]=K+"Seconds",M[o]=K+"Milliseconds",M)[E],Y=E===c?this.$D+(B-this.$W):B;if(E===f||E===v){var q=this.clone().set(m,1);q.$d[W](Y),q.init(),this.$d=q.set(m,Math.min(this.$D,q.daysInMonth())).$d}else W&&this.$d[W](Y);return this.init(),this},D.set=function(z,B){return this.clone().$set(z,B)},D.get=function(z){return this[L.p(z)]()},D.add=function(z,B){var M,E=this;z=Number(z);var K=L.p(B),W=function(J){var ne=I(E);return L.w(ne.date(ne.date()+Math.round(J*z)),E)};if(K===f)return this.set(f,this.$M+z);if(K===v)return this.set(v,this.$y+z);if(K===c)return W(1);if(K===d)return W(7);var Y=(M={},M[l]=r,M[s]=a,M[i]=n,M)[K]||1,q=this.$d.getTime()+z*Y;return L.w(q,this)},D.subtract=function(z,B){return this.add(-1*z,B)},D.format=function(z){var B=this,M=this.$locale();if(!this.isValid())return M.invalidDate||y;var E=z||"YYYY-MM-DDTHH:mm:ssZ",K=L.z(this),W=this.$H,Y=this.$m,q=this.$M,J=M.weekdays,ne=M.months,oe=function(be,Ee,Pe,Be){return be&&(be[Ee]||be(B,E))||Pe[Ee].substr(0,Be)},Q=function(be){return L.s(W%12||12,be,"0")},ae=M.meridiem||function(be,Ee,Pe){var Be=be<12?"AM":"PM";return Pe?Be.toLowerCase():Be},de={YY:String(this.$y).slice(-2),YYYY:this.$y,M:q+1,MM:L.s(q+1,2,"0"),MMM:oe(M.monthsShort,q,ne,3),MMMM:oe(ne,q),D:this.$D,DD:L.s(this.$D,2,"0"),d:String(this.$W),dd:oe(M.weekdaysMin,this.$W,J,2),ddd:oe(M.weekdaysShort,this.$W,J,3),dddd:J[this.$W],H:String(W),HH:L.s(W,2,"0"),h:Q(1),hh:Q(2),a:ae(W,Y,!0),A:ae(W,Y,!1),m:String(Y),mm:L.s(Y,2,"0"),s:String(this.$s),ss:L.s(this.$s,2,"0"),SSS:L.s(this.$ms,3,"0"),Z:K};return E.replace(C,function(be,Ee){return Ee||de[be]||K.replace(":","")})},D.utcOffset=function(){return 15*-Math.round(this.$d.getTimezoneOffset()/15)},D.diff=function(z,B,M){var E,K=L.p(B),W=I(z),Y=(W.utcOffset()-this.utcOffset())*r,q=this-W,J=L.m(this,W);return J=(E={},E[v]=J/12,E[f]=J,E[p]=J/3,E[d]=(q-Y)/6048e5,E[c]=(q-Y)/864e5,E[s]=q/a,E[l]=q/r,E[i]=q/n,E)[K]||q,M?J:L.a(J)},D.daysInMonth=function(){return this.endOf(f).$D},D.$locale=function(){return O[this.$L]},D.locale=function(z,B){if(!z)return this.$L;var M=this.clone(),E=_(z,B,!0);return E&&(M.$L=E),M},D.clone=function(){return L.w(this.$d,this)},D.toDate=function(){return new Date(this.valueOf())},D.toJSON=function(){return this.isValid()?this.toISOString():null},D.toISOString=function(){return this.$d.toISOString()},D.toString=function(){return this.$d.toUTCString()},N}(),F=j.prototype;return I.prototype=F,[["$ms",o],["$s",i],["$m",l],["$H",s],["$W",c],["$M",f],["$y",v],["$D",m]].forEach(function(N){F[N[1]]=function(D){return this.$g(D,N[0],N[1])}}),I.extend=function(N,D){return N.$i||(N(D,j,I),N.$i=!0),I},I.locale=_,I.isDayjs=T,I.unix=function(N){return I(1e3*N)},I.en=O[$],I.Ls=O,I.p={},I})})(gM);var mt=gM.exports,yM={exports:{}};(function(e,t){(function(n,r){e.exports=r()})(vo,function(){return function(n,r,a){var o=r.prototype,i=function(f){return f&&(f.indexOf?f:f.s)},l=function(f,p,v,m,y){var b=f.name?f:f.$locale(),C=i(b[p]),S=i(b[v]),w=C||S.map(function($){return $.substr(0,m)});if(!y)return w;var k=b.weekStart;return w.map(function($,O){return w[(O+(k||0))%7]})},s=function(){return a.Ls[a.locale()]},c=function(f,p){return f.formats[p]||function(v){return v.replace(/(\[[^\]]+])|(MMMM|MM|DD|dddd)/g,function(m,y,b){return y||b.slice(1)})}(f.formats[p.toUpperCase()])},d=function(){var f=this;return{months:function(p){return p?p.format("MMMM"):l(f,"months")},monthsShort:function(p){return p?p.format("MMM"):l(f,"monthsShort","months",3)},firstDayOfWeek:function(){return f.$locale().weekStart||0},weekdays:function(p){return p?p.format("dddd"):l(f,"weekdays")},weekdaysMin:function(p){return p?p.format("dd"):l(f,"weekdaysMin","weekdays",2)},weekdaysShort:function(p){return p?p.format("ddd"):l(f,"weekdaysShort","weekdays",3)},longDateFormat:function(p){return c(f.$locale(),p)},meridiem:this.$locale().meridiem,ordinal:this.$locale().ordinal}};o.localeData=function(){return d.bind(this)()},a.localeData=function(){var f=s();return{firstDayOfWeek:function(){return f.weekStart||0},weekdays:function(){return a.weekdays()},weekdaysShort:function(){return a.weekdaysShort()},weekdaysMin:function(){return a.weekdaysMin()},months:function(){return a.months()},monthsShort:function(){return a.monthsShort()},longDateFormat:function(p){return c(f,p)},meridiem:f.meridiem,ordinal:f.ordinal}},a.months=function(){return l(s(),"months")},a.monthsShort=function(){return l(s(),"monthsShort","months",3)},a.weekdays=function(f){return l(s(),"weekdays",null,null,f)},a.weekdaysShort=function(f){return l(s(),"weekdaysShort","weekdays",3,f)},a.weekdaysMin=function(f){return l(s(),"weekdaysMin","weekdays",2,f)}}})})(yM);var bM=yM.exports,CM={exports:{}};(function(e,t){(function(n,r){e.exports=r()})(vo,function(){var n={LTS:"h:mm:ss A",LT:"h:mm A",L:"MM/DD/YYYY",LL:"MMMM D, YYYY",LLL:"MMMM D, YYYY h:mm A",LLLL:"dddd, MMMM D, YYYY h:mm A"},r=/(\[[^[]*\])|([-:/.()\s]+)|(A|a|YYYY|YY?|MM?M?M?|Do|DD?|hh?|HH?|mm?|ss?|S{1,3}|z|ZZ?)/g,a=/\d\d/,o=/\d\d?/,i=/\d*[^\s\d-_:/()]+/,l={},s=function(y){return(y=+y)+(y>68?1900:2e3)},c=function(y){return function(b){this[y]=+b}},d=[/[+-]\d\d:?(\d\d)?|Z/,function(y){(this.zone||(this.zone={})).offset=function(b){if(!b||b==="Z")return 0;var C=b.match(/([+-]|\d\d)/g),S=60*C[1]+(+C[2]||0);return S===0?0:C[0]==="+"?-S:S}(y)}],f=function(y){var b=l[y];return b&&(b.indexOf?b:b.s.concat(b.f))},p=function(y,b){var C,S=l.meridiem;if(S){for(var w=1;w<=24;w+=1)if(y.indexOf(S(w,0,b))>-1){C=w>12;break}}else C=y===(b?"pm":"PM");return C},v={A:[i,function(y){this.afternoon=p(y,!1)}],a:[i,function(y){this.afternoon=p(y,!0)}],S:[/\d/,function(y){this.milliseconds=100*+y}],SS:[a,function(y){this.milliseconds=10*+y}],SSS:[/\d{3}/,function(y){this.milliseconds=+y}],s:[o,c("seconds")],ss:[o,c("seconds")],m:[o,c("minutes")],mm:[o,c("minutes")],H:[o,c("hours")],h:[o,c("hours")],HH:[o,c("hours")],hh:[o,c("hours")],D:[o,c("day")],DD:[a,c("day")],Do:[i,function(y){var b=l.ordinal,C=y.match(/\d+/);if(this.day=C[0],b)for(var S=1;S<=31;S+=1)b(S).replace(/\[|\]/g,"")===y&&(this.day=S)}],M:[o,c("month")],MM:[a,c("month")],MMM:[i,function(y){var b=f("months"),C=(f("monthsShort")||b.map(function(S){return S.substr(0,3)})).indexOf(y)+1;if(C<1)throw new Error;this.month=C%12||C}],MMMM:[i,function(y){var b=f("months").indexOf(y)+1;if(b<1)throw new Error;this.month=b%12||b}],Y:[/[+-]?\d+/,c("year")],YY:[a,function(y){this.year=s(y)}],YYYY:[/\d{4}/,c("year")],Z:d,ZZ:d};function m(y){var b,C;b=y,C=l&&l.formats;for(var S=(y=b.replace(/(\[[^\]]+])|(LTS?|l{1,4}|L{1,4})/g,function(I,L,j){var F=j&&j.toUpperCase();return L||C[j]||n[j]||C[F].replace(/(\[[^\]]+])|(MMMM|MM|DD|dddd)/g,function(N,D,z){return D||z.slice(1)})})).match(r),w=S.length,k=0;k<w;k+=1){var $=S[k],O=v[$],T=O&&O[0],_=O&&O[1];S[k]=_?{regex:T,parser:_}:$.replace(/^\[|\]$/g,"")}return function(I){for(var L={},j=0,F=0;j<w;j+=1){var N=S[j];if(typeof N=="string")F+=N.length;else{var D=N.regex,z=N.parser,B=I.substr(F),M=D.exec(B)[0];z.call(L,M),I=I.replace(M,"")}}return function(E){var K=E.afternoon;if(K!==void 0){var W=E.hours;K?W<12&&(E.hours+=12):W===12&&(E.hours=0),delete E.afternoon}}(L),L}}return function(y,b,C){C.p.customParseFormat=!0,y&&y.parseTwoDigitYear&&(s=y.parseTwoDigitYear);var S=b.prototype,w=S.parse;S.parse=function(k){var $=k.date,O=k.utc,T=k.args;this.$u=O;var _=T[1];if(typeof _=="string"){var I=T[2]===!0,L=T[3]===!0,j=I||L,F=T[2];L&&(F=T[2]),l=this.$locale(),!I&&F&&(l=C.Ls[F]),this.$d=function(B,M,E){try{if(["x","X"].indexOf(M)>-1)return new Date((M==="X"?1e3:1)*B);var K=m(M)(B),W=K.year,Y=K.month,q=K.day,J=K.hours,ne=K.minutes,oe=K.seconds,Q=K.milliseconds,ae=K.zone,de=new Date,be=q||(W||Y?1:de.getDate()),Ee=W||de.getFullYear(),Pe=0;W&&!Y||(Pe=Y>0?Y-1:de.getMonth());var Be=J||0,te=ne||0,ie=oe||0,ge=Q||0;return ae?new Date(Date.UTC(Ee,Pe,be,Be,te,ie,ge+60*ae.offset*1e3)):E?new Date(Date.UTC(Ee,Pe,be,Be,te,ie,ge)):new Date(Ee,Pe,be,Be,te,ie,ge)}catch{return new Date("")}}($,_,O),this.init(),F&&F!==!0&&(this.$L=this.locale(F).$L),j&&$!=this.format(_)&&(this.$d=new Date("")),l={}}else if(_ instanceof Array)for(var N=_.length,D=1;D<=N;D+=1){T[1]=_[D-1];var z=C.apply(this,T);if(z.isValid()){this.$d=z.$d,this.$L=z.$L,this.init();break}D===N&&(this.$d=new Date(""))}else w.call(this,k)}}})})(CM);var tw=CM.exports;const w0="HH:mm:ss",td="YYYY-MM-DD",$te={date:td,week:"gggg[w]ww",year:"YYYY",month:"YYYY-MM",datetime:`${td} ${w0}`,monthrange:"YYYY-MM",daterange:td,datetimerange:`${td} ${w0}`},nw={id:{type:[Array,String]},name:{type:[Array,String],default:""},popperClass:{type:String,default:""},format:{type:String},valueFormat:{type:String},type:{type:String,default:""},clearable:{type:Boolean,default:!0},clearIcon:{type:[String,Object],default:gi},editable:{type:Boolean,default:!0},prefixIcon:{type:[String,Object],default:""},size:{type:String,validator:va},readonly:{type:Boolean,default:!1},disabled:{type:Boolean,default:!1},placeholder:{type:String,default:""},popperOptions:{type:Object,default:()=>({})},modelValue:{type:[Date,Array,String,Number],default:""},rangeSeparator:{type:String,default:"-"},startPlaceholder:String,endPlaceholder:String,defaultValue:{type:[Date,Array]},defaultTime:{type:[Date,Array]},isRange:{type:Boolean,default:!1},disabledHours:{type:Function},disabledMinutes:{type:Function},disabledSeconds:{type:Function},disabledDate:{type:Function},cellClassName:{type:Function},shortcuts:{type:Array,default:()=>[]},arrowControl:{type:Boolean,default:!1},validateEvent:{type:Boolean,default:!0},unlinkPanels:Boolean},k4=function(e,t){const n=e instanceof Date,r=t instanceof Date;return n&&r?e.getTime()===t.getTime():!n&&!r?e===t:!1},$4=function(e,t){const n=Array.isArray(e),r=Array.isArray(t);return n&&r?e.length!==t.length?!1:e.every((a,o)=>k4(a,t[o])):!n&&!r?k4(e,t):!1},O4=function(e,t,n){const r=Kd(t)||t==="x"?mt(e).locale(n):mt(e,t).locale(n);return r.isValid()?r:void 0},P4=function(e,t,n){return Kd(t)?e:t==="x"?+e:mt(e).locale(n).format(t)},Ote=G({name:"Picker",components:{ElInput:Ra,ElTooltip:Ur,ElIcon:ft},props:nw,emits:["update:modelValue","change","focus","blur","calendar-change","panel-change","visible-change"],setup(e,t){const{lang:n}=ln(),r=De("date"),a=De("input"),o=De("range"),i=ve(ga,{}),l=ve(Ia,{}),s=ve("ElPopperOptions",{}),c=H(),d=H(),f=H(!1),p=H(!1),v=H(null);ce(f,ee=>{var me;ee?v.value=e.modelValue:(de.value=null,Ne(()=>{m(e.modelValue)}),t.emit("blur"),Ee(),e.validateEvent&&((me=l.validate)==null||me.call(l,"blur").catch(He=>void 0)))});const m=(ee,me)=>{var He;(me||!$4(ee,v.value))&&(t.emit("change",ee),e.validateEvent&&((He=l.validate)==null||He.call(l,"change").catch(lt=>void 0)))},y=ee=>{if(!$4(e.modelValue,ee)){let me;Array.isArray(ee)?me=ee.map(He=>P4(He,e.valueFormat,n.value)):ee&&(me=P4(ee,e.valueFormat,n.value)),t.emit("update:modelValue",ee&&me,n.value)}},b=x(()=>{if(d.value){const ee=J.value?d.value:d.value.$el;return Array.from(ee.querySelectorAll("input"))}return[]}),C=x(()=>b==null?void 0:b.value[0]),S=x(()=>b==null?void 0:b.value[1]),w=(ee,me,He)=>{const lt=b.value;!lt.length||(!He||He==="min"?(lt[0].setSelectionRange(ee,me),lt[0].focus()):He==="max"&&(lt[1].setSelectionRange(ee,me),lt[1].focus()))},k=(ee="",me=!1)=>{f.value=me;let He;Array.isArray(ee)?He=ee.map(lt=>lt.toDate()):He=ee&&ee.toDate(),de.value=null,y(He)},$=()=>{p.value=!0},O=()=>{t.emit("visible-change",!0)},T=()=>{p.value=!1,t.emit("visible-change",!1)},_=(ee=!0)=>{let me=C.value;!ee&&J.value&&(me=S.value),me&&me.focus()},I=ee=>{e.readonly||j.value||f.value||(f.value=!0,t.emit("focus",ee))},L=()=>{var ee;(ee=c.value)==null||ee.onClose(),Ee()},j=x(()=>e.disabled||i.disabled),F=x(()=>{let ee;if(W.value?pe.value.getDefaultValue&&(ee=pe.value.getDefaultValue()):Array.isArray(e.modelValue)?ee=e.modelValue.map(me=>O4(me,e.valueFormat,n.value)):ee=O4(e.modelValue,e.valueFormat,n.value),pe.value.getRangeAvailableTime){const me=pe.value.getRangeAvailableTime(ee);er(me,ee)||(ee=me,y(Array.isArray(ee)?ee.map(He=>He.toDate()):ee.toDate()))}return Array.isArray(ee)&&ee.some(me=>!me)&&(ee=[]),ee}),N=x(()=>{if(!pe.value.panelReady)return;const ee=Be(F.value);if(Array.isArray(de.value))return[de.value[0]||ee&&ee[0]||"",de.value[1]||ee&&ee[1]||""];if(de.value!==null)return de.value;if(!(!z.value&&W.value)&&!(!f.value&&W.value))return ee?B.value?ee.join(", "):ee:""}),D=x(()=>e.type.includes("time")),z=x(()=>e.type.startsWith("time")),B=x(()=>e.type==="dates"),M=x(()=>e.prefixIcon||(D.value?uE:fY)),E=H(!1),K=ee=>{e.readonly||j.value||E.value&&(ee.stopPropagation(),y(null),m(null,!0),E.value=!1,f.value=!1,pe.value.handleClear&&pe.value.handleClear())},W=x(()=>!e.modelValue||Array.isArray(e.modelValue)&&!e.modelValue.length),Y=()=>{e.readonly||j.value||!W.value&&e.clearable&&(E.value=!0)},q=()=>{E.value=!1},J=x(()=>e.type.includes("range")),ne=Gn(),oe=x(()=>{var ee,me;return(me=(ee=c.value)==null?void 0:ee.popperRef)==null?void 0:me.contentRef}),Q=x(()=>{var ee,me;return(me=(ee=A(c))==null?void 0:ee.popperRef)==null?void 0:me.contentRef}),ae=x(()=>{var ee;return A(J)?A(d):(ee=A(d))==null?void 0:ee.$el});vv(ae,ee=>{const me=A(Q),He=A(ae);me&&(ee.target===me||ee.composedPath().includes(me))||ee.target===He||ee.composedPath().includes(He)||(f.value=!1)});const de=H(null),be=()=>{if(de.value){const ee=Pe(N.value);ee&&te(ee)&&(y(Array.isArray(ee)?ee.map(me=>me.toDate()):ee.toDate()),de.value=null)}de.value===""&&(y(null),m(null),de.value=null)},Ee=()=>{b.value.forEach(ee=>ee.blur())},Pe=ee=>ee?pe.value.parseUserInput(ee):null,Be=ee=>ee?pe.value.formatToString(ee):null,te=ee=>pe.value.isValidValue(ee),ie=ee=>{const me=ee.code;if(me===Ge.esc){f.value=!1,ee.stopPropagation();return}if(me===Ge.tab){J.value?setTimeout(()=>{b.value.includes(document.activeElement)||(f.value=!1,Ee())},0):(be(),f.value=!1,ee.stopPropagation());return}if(me===Ge.enter||me===Ge.numpadEnter){(de.value===null||de.value===""||te(Pe(N.value)))&&(be(),f.value=!1),ee.stopPropagation();return}if(de.value){ee.stopPropagation();return}pe.value.handleKeydown&&pe.value.handleKeydown(ee)},ge=ee=>{de.value=ee},ke=ee=>{de.value?de.value=[ee.target.value,de.value[1]]:de.value=[ee.target.value,null]},xe=ee=>{de.value?de.value=[de.value[0],ee.target.value]:de.value=[null,ee.target.value]},Ie=()=>{const ee=Pe(de.value&&de.value[0]);if(ee&&ee.isValid()){de.value=[Be(ee),N.value[1]];const me=[ee,F.value&&F.value[1]];te(me)&&(y(me),de.value=null)}},ye=()=>{const ee=Pe(de.value&&de.value[1]);if(ee&&ee.isValid()){de.value=[N.value[0],Be(ee)];const me=[F.value&&F.value[0],ee];te(me)&&(y(me),de.value=null)}},pe=H({}),ue=ee=>{pe.value[ee[0]]=ee[1],pe.value.panelReady=!0},Ce=ee=>{t.emit("calendar-change",ee)},je=(ee,me,He)=>{t.emit("panel-change",ee,me,He)};return ot("EP_PICKER_BASE",{props:e}),{nsDate:r,nsInput:a,nsRange:o,elPopperOptions:s,isDatesPicker:B,handleEndChange:ye,handleStartChange:Ie,handleStartInput:ke,handleEndInput:xe,onUserInput:ge,handleChange:be,handleKeydown:ie,popperPaneRef:oe,onClickOutside:vv,pickerSize:ne,isRangeInput:J,onMouseLeave:q,onMouseEnter:Y,onClearIconClick:K,showClose:E,triggerIcon:M,onPick:k,handleFocus:I,handleBlur:L,pickerVisible:f,pickerActualVisible:p,displayValue:N,parsedValue:F,setSelectionRange:w,refPopper:c,inputRef:d,pickerDisabled:j,onSetPickerOption:ue,onCalendarChange:Ce,onPanelChange:je,focus:_,onShow:O,onBeforeShow:$,onHide:T}}}),Pte=["id","name","placeholder","value","disabled","readonly"],Tte=["id","name","placeholder","value","disabled","readonly"];function xte(e,t,n,r,a,o){const i=we("el-icon"),l=we("el-input"),s=we("el-tooltip");return R(),fe(s,hn({ref:"refPopper",visible:e.pickerVisible,"onUpdate:visible":t[17]||(t[17]=c=>e.pickerVisible=c),effect:"light",pure:"",trigger:"click"},e.$attrs,{"append-to-body":"",transition:`${e.nsDate.namespace.value}-zoom-in-top`,"popper-class":[`${e.nsDate.namespace.value}-picker__popper`,e.popperClass],"popper-options":e.elPopperOptions,"fallback-placements":["bottom","top","right","left"],"gpu-acceleration":!1,"stop-popper-mouse-event":!1,"hide-after":0,persistent:"",onBeforeShow:e.onBeforeShow,onShow:e.onShow,onHide:e.onHide}),{default:re(()=>[e.isRangeInput?(R(),X("div",{key:1,ref:"inputRef",class:U([e.nsDate.b("editor"),e.nsDate.bm("editor",e.type),e.nsInput.e("inner"),e.nsDate.is("disabled",e.pickerDisabled),e.nsDate.is("active",e.pickerVisible),e.nsRange.b("editor"),e.pickerSize?e.nsRange.bm("editor",e.pickerSize):"",e.$attrs.class]),style:Xe(e.$attrs.style),onClick:t[7]||(t[7]=(...c)=>e.handleFocus&&e.handleFocus(...c)),onMouseenter:t[8]||(t[8]=(...c)=>e.onMouseEnter&&e.onMouseEnter(...c)),onMouseleave:t[9]||(t[9]=(...c)=>e.onMouseLeave&&e.onMouseLeave(...c)),onKeydown:t[10]||(t[10]=(...c)=>e.handleKeydown&&e.handleKeydown(...c))},[e.triggerIcon?(R(),fe(i,{key:0,class:U([e.nsInput.e("icon"),e.nsRange.e("icon")]),onClick:e.handleFocus},{default:re(()=>[(R(),fe(Kt(e.triggerIcon)))]),_:1},8,["class","onClick"])):se("v-if",!0),Z("input",{id:e.id&&e.id[0],autocomplete:"off",name:e.name&&e.name[0],placeholder:e.startPlaceholder,value:e.displayValue&&e.displayValue[0],disabled:e.pickerDisabled,readonly:!e.editable||e.readonly,class:U(e.nsRange.b("input")),onInput:t[1]||(t[1]=(...c)=>e.handleStartInput&&e.handleStartInput(...c)),onChange:t[2]||(t[2]=(...c)=>e.handleStartChange&&e.handleStartChange(...c)),onFocus:t[3]||(t[3]=(...c)=>e.handleFocus&&e.handleFocus(...c))},null,42,Pte),Oe(e.$slots,"range-separator",{},()=>[Z("span",{class:U(e.nsRange.b("separator"))},Me(e.rangeSeparator),3)]),Z("input",{id:e.id&&e.id[1],autocomplete:"off",name:e.name&&e.name[1],placeholder:e.endPlaceholder,value:e.displayValue&&e.displayValue[1],disabled:e.pickerDisabled,readonly:!e.editable||e.readonly,class:U(e.nsRange.b("input")),onFocus:t[4]||(t[4]=(...c)=>e.handleFocus&&e.handleFocus(...c)),onInput:t[5]||(t[5]=(...c)=>e.handleEndInput&&e.handleEndInput(...c)),onChange:t[6]||(t[6]=(...c)=>e.handleEndChange&&e.handleEndChange(...c))},null,42,Tte),e.clearIcon?(R(),fe(i,{key:1,class:U([e.nsInput.e("icon"),e.nsRange.e("close-icon"),{[e.nsRange.e("close-icon--hidden")]:!e.showClose}]),onClick:e.onClearIconClick},{default:re(()=>[(R(),fe(Kt(e.clearIcon)))]),_:1},8,["class","onClick"])):se("v-if",!0)],38)):(R(),fe(l,{key:0,id:e.id,ref:"inputRef","model-value":e.displayValue,name:e.name,size:e.pickerSize,disabled:e.pickerDisabled,placeholder:e.placeholder,class:U([e.nsDate.b("editor"),e.nsDate.bm("editor",e.type),e.$attrs.class]),style:Xe(e.$attrs.style),readonly:!e.editable||e.readonly||e.isDatesPicker||e.type==="week",onInput:e.onUserInput,onFocus:e.handleFocus,onKeydown:e.handleKeydown,onChange:e.handleChange,onMouseenter:e.onMouseEnter,onMouseleave:e.onMouseLeave,onClick:t[0]||(t[0]=dt(()=>{},["stop"]))},{prefix:re(()=>[e.triggerIcon?(R(),fe(i,{key:0,class:U(e.nsInput.e("icon")),onClick:e.handleFocus},{default:re(()=>[(R(),fe(Kt(e.triggerIcon)))]),_:1},8,["class","onClick"])):se("v-if",!0)]),suffix:re(()=>[e.showClose&&e.clearIcon?(R(),fe(i,{key:0,class:U(`${e.nsInput.e("icon")} clear-icon`),onClick:e.onClearIconClick},{default:re(()=>[(R(),fe(Kt(e.clearIcon)))]),_:1},8,["class","onClick"])):se("v-if",!0)]),_:1},8,["id","model-value","name","size","disabled","placeholder","class","style","readonly","onInput","onFocus","onKeydown","onChange","onMouseenter","onMouseleave"]))]),content:re(()=>[Oe(e.$slots,"default",{visible:e.pickerVisible,actualVisible:e.pickerActualVisible,parsedValue:e.parsedValue,format:e.format,unlinkPanels:e.unlinkPanels,type:e.type,defaultValue:e.defaultValue,onPick:t[11]||(t[11]=(...c)=>e.onPick&&e.onPick(...c)),onSelectRange:t[12]||(t[12]=(...c)=>e.setSelectionRange&&e.setSelectionRange(...c)),onSetPickerOption:t[13]||(t[13]=(...c)=>e.onSetPickerOption&&e.onSetPickerOption(...c)),onCalendarChange:t[14]||(t[14]=(...c)=>e.onCalendarChange&&e.onCalendarChange(...c)),onPanelChange:t[15]||(t[15]=(...c)=>e.onPanelChange&&e.onPanelChange(...c)),onMousedown:t[16]||(t[16]=dt(()=>{},["stop"]))})]),_:3},16,["visible","transition","popper-class","popper-options","onBeforeShow","onShow","onHide"])}var wM=Ae(Ote,[["render",xte],["__file","/home/runner/work/element-plus/element-plus/packages/components/time-picker/src/common/picker.vue"]]);const Vi=new Map;let T4;Bt&&(document.addEventListener("mousedown",e=>T4=e),document.addEventListener("mouseup",e=>{for(const t of Vi.values())for(const{documentHandler:n}of t)n(e,T4)}));function x4(e,t){let n=[];return Array.isArray(t.arg)?n=t.arg:ys(t.arg)&&n.push(t.arg),function(r,a){const o=t.instance.popperRef,i=r.target,l=a==null?void 0:a.target,s=!t||!t.instance,c=!i||!l,d=e.contains(i)||e.contains(l),f=e===i,p=n.length&&n.some(m=>m==null?void 0:m.contains(i))||n.length&&n.includes(l),v=o&&(o.contains(i)||o.contains(l));s||c||d||f||p||v||t.value(r,a)}}const Is={beforeMount(e,t){Vi.has(e)||Vi.set(e,[]),Vi.get(e).push({documentHandler:x4(e,t),bindingFn:t.value})},updated(e,t){Vi.has(e)||Vi.set(e,[]);const n=Vi.get(e),r=n.findIndex(o=>o.bindingFn===t.oldValue),a={documentHandler:x4(e,t),bindingFn:t.value};r>=0?n.splice(r,1,a):n.push(a)},unmounted(e){Vi.delete(e)}};var SM={beforeMount(e,t){let n=null,r;const a=()=>t.value&&t.value(),o=()=>{Date.now()-r<100&&a(),clearInterval(n),n=null};gn(e,"mousedown",i=>{i.button===0&&(r=Date.now(),oU(document,"mouseup",o),clearInterval(n),n=setInterval(a,100))})}};const S0="_trap-focus-children",Yl=[],_4=e=>{if(Yl.length===0)return;const t=Yl[Yl.length-1][S0];if(t.length>0&&e.code===Ge.tab){if(t.length===1){e.preventDefault(),document.activeElement!==t[0]&&t[0].focus();return}const n=e.shiftKey,r=e.target===t[0],a=e.target===t[t.length-1];r&&n&&(e.preventDefault(),t[t.length-1].focus()),a&&!n&&(e.preventDefault(),t[0].focus())}},kM={beforeMount(e){e[S0]=V$(e),Yl.push(e),Yl.length<=1&&gn(document,"keydown",_4)},updated(e){Ne(()=>{e[S0]=V$(e)})},unmounted(){Yl.shift(),Yl.length===0&&Bn(document,"keydown",_4)}};var E4=!1,Fl,k0,$0,fp,hp,$M,pp,O0,P0,T0,OM,x0,_0,PM,TM;function Dr(){if(!E4){E4=!0;var e=navigator.userAgent,t=/(?:MSIE.(\d+\.\d+))|(?:(?:Firefox|GranParadiso|Iceweasel).(\d+\.\d+))|(?:Opera(?:.+Version.|.)(\d+\.\d+))|(?:AppleWebKit.(\d+(?:\.\d+)?))|(?:Trident\/\d+\.\d+.*rv:(\d+\.\d+))/.exec(e),n=/(Mac OS X)|(Windows)|(Linux)/.exec(e);if(x0=/\b(iPhone|iP[ao]d)/.exec(e),_0=/\b(iP[ao]d)/.exec(e),T0=/Android/i.exec(e),PM=/FBAN\/\w+;/i.exec(e),TM=/Mobile/i.exec(e),OM=!!/Win64/.exec(e),t){Fl=t[1]?parseFloat(t[1]):t[5]?parseFloat(t[5]):NaN,Fl&&document&&document.documentMode&&(Fl=document.documentMode);var r=/(?:Trident\/(\d+.\d+))/.exec(e);$M=r?parseFloat(r[1])+4:Fl,k0=t[2]?parseFloat(t[2]):NaN,$0=t[3]?parseFloat(t[3]):NaN,fp=t[4]?parseFloat(t[4]):NaN,fp?(t=/(?:Chrome\/(\d+\.\d+))/.exec(e),hp=t&&t[1]?parseFloat(t[1]):NaN):hp=NaN}else Fl=k0=$0=hp=fp=NaN;if(n){if(n[1]){var a=/(?:Mac OS X (\d+(?:[._]\d+)?))/.exec(e);pp=a?parseFloat(a[1].replace("_",".")):!0}else pp=!1;O0=!!n[2],P0=!!n[3]}else pp=O0=P0=!1}}var E0={ie:function(){return Dr()||Fl},ieCompatibilityMode:function(){return Dr()||$M>Fl},ie64:function(){return E0.ie()&&OM},firefox:function(){return Dr()||k0},opera:function(){return Dr()||$0},webkit:function(){return Dr()||fp},safari:function(){return E0.webkit()},chrome:function(){return Dr()||hp},windows:function(){return Dr()||O0},osx:function(){return Dr()||pp},linux:function(){return Dr()||P0},iphone:function(){return Dr()||x0},mobile:function(){return Dr()||x0||_0||T0||TM},nativeApp:function(){return Dr()||PM},android:function(){return Dr()||T0},ipad:function(){return Dr()||_0}},_te=E0,uh=!!(typeof window<"u"&&window.document&&window.document.createElement),Ete={canUseDOM:uh,canUseWorkers:typeof Worker<"u",canUseEventListeners:uh&&!!(window.addEventListener||window.attachEvent),canUseViewport:uh&&!!window.screen,isInWorker:!uh},xM=Ete,_M;xM.canUseDOM&&(_M=document.implementation&&document.implementation.hasFeature&&document.implementation.hasFeature("","")!==!0);function Mte(e,t){if(!xM.canUseDOM||t&&!("addEventListener"in document))return!1;var n="on"+e,r=n in document;if(!r){var a=document.createElement("div");a.setAttribute(n,"return;"),r=typeof a[n]=="function"}return!r&&_M&&e==="wheel"&&(r=document.implementation.hasFeature("Events.wheel","3.0")),r}var Ite=Mte,M4=10,I4=40,N4=800;function EM(e){var t=0,n=0,r=0,a=0;return"detail"in e&&(n=e.detail),"wheelDelta"in e&&(n=-e.wheelDelta/120),"wheelDeltaY"in e&&(n=-e.wheelDeltaY/120),"wheelDeltaX"in e&&(t=-e.wheelDeltaX/120),"axis"in e&&e.axis===e.HORIZONTAL_AXIS&&(t=n,n=0),r=t*M4,a=n*M4,"deltaY"in e&&(a=e.deltaY),"deltaX"in e&&(r=e.deltaX),(r||a)&&e.deltaMode&&(e.deltaMode==1?(r*=I4,a*=I4):(r*=N4,a*=N4)),r&&!t&&(t=r<1?-1:1),a&&!n&&(n=a<1?-1:1),{spinX:t,spinY:n,pixelX:r,pixelY:a}}EM.getEventType=function(){return _te.firefox()?"DOMMouseScroll":Ite("wheel")?"wheel":"mousewheel"};var Nte=EM;/**
* Checks if an event is supported in the current execution environment.
*
* NOTE: This will not work correctly for non-generic events such as `change`,
* `reset`, `load`, `error`, and `select`.
*
* Borrows from Modernizr.
*
* @param {string} eventNameSuffix Event name, e.g. "click".
* @param {?boolean} capture Check if the capture phase is supported.
* @return {boolean} True if the event is supported.
* @internal
* @license Modernizr 3.0.0pre (Custom Build) | MIT
*/const Ate=function(e,t){if(e&&e.addEventListener){const n=function(r){const a=Nte(r);t&&Reflect.apply(t,this,[r,a])};BC()?e.addEventListener("DOMMouseScroll",n):e.onmousewheel=n}},Dte={beforeMount(e,t){Ate(e,t.value)}},Rte={beforeMount(e,t){e._handleResize=()=>{var n;e&&((n=t.value)==null||n.call(t,e))},wc(e,e._handleResize)},beforeUnmount(e){Sc(e,e._handleResize)}},cy=(e,t,n)=>{const r=[],a=t&&n();for(let o=0;o<e;o++)r[o]=a?a.includes(o):!1;return r},dy=e=>e.map((t,n)=>t||n).filter(t=>t!==!0),MM=(e,t,n)=>({getHoursList:(i,l)=>cy(24,e,()=>e(i,l)),getMinutesList:(i,l,s)=>cy(60,t,()=>t(i,l,s)),getSecondsList:(i,l,s,c)=>cy(60,n,()=>n(i,l,s,c))}),IM=(e,t,n)=>{const{getHoursList:r,getMinutesList:a,getSecondsList:o}=MM(e,t,n);return{getAvailableHours:(c,d)=>dy(r(c,d)),getAvailableMinutes:(c,d,f)=>dy(a(c,d,f)),getAvailableSeconds:(c,d,f,p)=>dy(o(c,d,f,p))}},NM=e=>{const t=H(e.parsedValue);return ce(()=>e.visible,n=>{n||(t.value=e.parsedValue)}),t},Lte=G({directives:{repeatClick:SM},components:{ElScrollbar:xi,ElIcon:ft,ArrowUp:Of,ArrowDown:_s},props:{role:{type:String,required:!0},spinnerDate:{type:Object,required:!0},showSeconds:{type:Boolean,default:!0},arrowControl:Boolean,amPmMode:{type:String,default:""},disabledHours:{type:Function},disabledMinutes:{type:Function},disabledSeconds:{type:Function}},emits:["change","select-range","set-option"],setup(e,t){const n=De("time");let r=!1;const a=Yn(Q=>{r=!1,I(Q)},200),o=H(null),i=H(null),l=H(null),s=H(null),c={hours:i,minutes:l,seconds:s},d=x(()=>{const Q=["hours","minutes","seconds"];return e.showSeconds?Q:Q.slice(0,2)}),f=x(()=>e.spinnerDate.hour()),p=x(()=>e.spinnerDate.minute()),v=x(()=>e.spinnerDate.second()),m=x(()=>({hours:f,minutes:p,seconds:v})),y=x(()=>J(e.role)),b=x(()=>ne(f.value,e.role)),C=x(()=>oe(f.value,p.value,e.role)),S=x(()=>({hours:y,minutes:b,seconds:C})),w=x(()=>{const Q=f.value;return[Q>0?Q-1:void 0,Q,Q<23?Q+1:void 0]}),k=x(()=>{const Q=p.value;return[Q>0?Q-1:void 0,Q,Q<59?Q+1:void 0]}),$=x(()=>{const Q=v.value;return[Q>0?Q-1:void 0,Q,Q<59?Q+1:void 0]}),O=x(()=>({hours:w,minutes:k,seconds:$})),T=Q=>{if(!!!e.amPmMode)return"";const de=e.amPmMode==="A";let be=Q<12?" am":" pm";return de&&(be=be.toUpperCase()),be},_=Q=>{Q==="hours"?t.emit("select-range",0,2):Q==="minutes"?t.emit("select-range",3,5):Q==="seconds"&&t.emit("select-range",6,8),o.value=Q},I=Q=>{F(Q,m.value[Q].value)},L=()=>{I("hours"),I("minutes"),I("seconds")},j=Q=>Q.querySelector(`.${n.namespace.value}-scrollbar__wrap`),F=(Q,ae)=>{if(e.arrowControl)return;const de=c[Q];de&&de.$el&&(j(de.$el).scrollTop=Math.max(0,ae*N(Q)))},N=Q=>c[Q].$el.querySelector("li").offsetHeight,D=()=>{B(1)},z=()=>{B(-1)},B=Q=>{o.value||_("hours");const ae=o.value;let de=m.value[ae].value;const be=o.value==="hours"?24:60;de=(de+Q+be)%be,M(ae,de),F(ae,de),Ne(()=>_(o.value))},M=(Q,ae)=>{if(!S.value[Q].value[ae])switch(Q){case"hours":t.emit("change",e.spinnerDate.hour(ae).minute(p.value).second(v.value));break;case"minutes":t.emit("change",e.spinnerDate.hour(f.value).minute(ae).second(v.value));break;case"seconds":t.emit("change",e.spinnerDate.hour(f.value).minute(p.value).second(ae));break}},E=(Q,{value:ae,disabled:de})=>{de||(M(Q,ae),_(Q),F(Q,ae))},K=Q=>{r=!0,a(Q);const ae=Math.min(Math.round((j(c[Q].$el).scrollTop-(W(Q)*.5-10)/N(Q)+3)/N(Q)),Q==="hours"?23:59);M(Q,ae)},W=Q=>c[Q].$el.offsetHeight,Y=()=>{const Q=ae=>{c[ae]&&c[ae].$el&&(j(c[ae].$el).onscroll=()=>{K(ae)})};Q("hours"),Q("minutes"),Q("seconds")};et(()=>{Ne(()=>{!e.arrowControl&&Y(),L(),e.role==="start"&&_("hours")})});const q=(Q,ae)=>{c[ae]=Q};t.emit("set-option",[`${e.role}_scrollDown`,B]),t.emit("set-option",[`${e.role}_emitSelectRange`,_]);const{getHoursList:J,getMinutesList:ne,getSecondsList:oe}=MM(e.disabledHours,e.disabledMinutes,e.disabledSeconds);return ce(()=>e.spinnerDate,()=>{r||L()}),{ns:n,setRef:q,spinnerItems:d,currentScrollbar:o,hours:f,minutes:p,seconds:v,hoursList:y,minutesList:b,arrowHourList:w,arrowMinuteList:k,arrowSecondList:$,getAmPmFlag:T,emitSelectRange:_,adjustCurrentSpinner:I,typeItemHeight:N,listHoursRef:i,listMinutesRef:l,listSecondsRef:s,onIncreaseClick:D,onDecreaseClick:z,handleClick:E,secondsList:C,timePartsMap:m,arrowListMap:O,listMap:S}}}),Fte=["onClick"],Bte=["onMouseenter"];function Vte(e,t,n,r,a,o){const i=we("el-scrollbar"),l=we("arrow-up"),s=we("el-icon"),c=we("arrow-down"),d=pa("repeat-click");return R(),X("div",{class:U([e.ns.b("spinner"),{"has-seconds":e.showSeconds}])},[e.arrowControl?se("v-if",!0):(R(!0),X(Fe,{key:0},Rt(e.spinnerItems,f=>(R(),fe(i,{key:f,ref_for:!0,ref:p=>e.setRef(p,f),class:U(e.ns.be("spinner","wrapper")),"wrap-style":"max-height: inherit;","view-class":e.ns.be("spinner","list"),noresize:"",tag:"ul",onMouseenter:p=>e.emitSelectRange(f),onMousemove:p=>e.adjustCurrentSpinner(f)},{default:re(()=>[(R(!0),X(Fe,null,Rt(e.listMap[f].value,(p,v)=>(R(),X("li",{key:v,class:U([e.ns.be("spinner","item"),e.ns.is("active",v===e.timePartsMap[f].value),e.ns.is("disabled",p)]),onClick:m=>e.handleClick(f,{value:v,disabled:p})},[f==="hours"?(R(),X(Fe,{key:0},[yt(Me(("0"+(e.amPmMode?v%12||12:v)).slice(-2))+Me(e.getAmPmFlag(v)),1)],2112)):(R(),X(Fe,{key:1},[yt(Me(("0"+v).slice(-2)),1)],2112))],10,Fte))),128))]),_:2},1032,["class","view-class","onMouseenter","onMousemove"]))),128)),e.arrowControl?(R(!0),X(Fe,{key:1},Rt(e.spinnerItems,f=>(R(),X("div",{key:f,class:U([e.ns.be("spinner","wrapper"),e.ns.is("arrow")]),onMouseenter:p=>e.emitSelectRange(f)},[at((R(),fe(s,{class:U(["arrow-up",e.ns.be("spinner","arrow")])},{default:re(()=>[g(l)]),_:1},8,["class"])),[[d,e.onDecreaseClick]]),at((R(),fe(s,{class:U(["arrow-down",e.ns.be("spinner","arrow")])},{default:re(()=>[g(c)]),_:1},8,["class"])),[[d,e.onIncreaseClick]]),Z("ul",{class:U(e.ns.be("spinner","list"))},[(R(!0),X(Fe,null,Rt(e.arrowListMap[f].value,(p,v)=>(R(),X("li",{key:v,class:U([e.ns.be("spinner","item"),e.ns.is("active",p===e.timePartsMap[f].value),e.ns.is("disabled",e.listMap[f].value[p])])},[typeof p=="number"?(R(),X(Fe,{key:0},[f==="hours"?(R(),X(Fe,{key:0},[yt(Me(("0"+(e.amPmMode?p%12||12:p)).slice(-2))+Me(e.getAmPmFlag(p)),1)],2112)):(R(),X(Fe,{key:1},[yt(Me(("0"+p).slice(-2)),1)],2112))],2112)):se("v-if",!0)],2))),128))],2)],42,Bte))),128)):se("v-if",!0)],2)}var AM=Ae(Lte,[["render",Vte],["__file","/home/runner/work/element-plus/element-plus/packages/components/time-picker/src/time-picker-com/basic-time-spinner.vue"]]);const zte=G({components:{TimeSpinner:AM},props:{visible:Boolean,actualVisible:{type:Boolean,default:void 0},datetimeRole:{type:String},parsedValue:{type:[Object,String]},format:{type:String,default:""}},emits:["pick","select-range","set-picker-option"],setup(e,t){const n=De("time"),{t:r,lang:a}=ln(),o=H([0,2]),i=NM(e),l=x(()=>sa(e.actualVisible)?`${n.namespace.value}-zoom-in-top`:""),s=x(()=>e.format.includes("ss")),c=x(()=>e.format.includes("A")?"A":e.format.includes("a")?"a":""),d=B=>{const M=mt(B).locale(a.value),E=C(M);return M.isSame(E)},f=()=>{t.emit("pick",i.value,!1)},p=(B=!1,M=!1)=>{M||t.emit("pick",e.parsedValue,B)},v=B=>{if(!e.visible)return;const M=C(B).millisecond(0);t.emit("pick",M,!0)},m=(B,M)=>{t.emit("select-range",B,M),o.value=[B,M]},y=B=>{const M=[0,3].concat(s.value?[6]:[]),E=["hours","minutes"].concat(s.value?["seconds"]:[]),W=(M.indexOf(o.value[0])+B+M.length)%M.length;$.start_emitSelectRange(E[W])},b=B=>{const M=B.code;if(M===Ge.left||M===Ge.right){const E=M===Ge.left?-1:1;y(E),B.preventDefault();return}if(M===Ge.up||M===Ge.down){const E=M===Ge.up?-1:1;$.start_scrollDown(E),B.preventDefault();return}},C=B=>{const M={hour:N,minute:D,second:z};let E=B;return["hour","minute","second"].forEach(K=>{if(M[K]){let W;const Y=M[K];K==="minute"?W=Y(E.hour(),e.datetimeRole):K==="second"?W=Y(E.hour(),E.minute(),e.datetimeRole):W=Y(e.datetimeRole),W&&W.length&&!W.includes(E[K]())&&(E=E[K](W[0]))}}),E},S=B=>B?mt(B,e.format).locale(a.value):null,w=B=>B?B.format(e.format):null,k=()=>mt(F).locale(a.value);t.emit("set-picker-option",["isValidValue",d]),t.emit("set-picker-option",["formatToString",w]),t.emit("set-picker-option",["parseUserInput",S]),t.emit("set-picker-option",["handleKeydown",b]),t.emit("set-picker-option",["getRangeAvailableTime",C]),t.emit("set-picker-option",["getDefaultValue",k]);const $={},O=B=>{$[B[0]]=B[1]},T=ve("EP_PICKER_BASE"),{arrowControl:_,disabledHours:I,disabledMinutes:L,disabledSeconds:j,defaultValue:F}=T.props,{getAvailableHours:N,getAvailableMinutes:D,getAvailableSeconds:z}=IM(I,L,j);return{ns:n,transitionName:l,arrowControl:_,onSetOption:O,t:r,handleConfirm:p,handleChange:v,setSelectionRange:m,amPmMode:c,showSeconds:s,handleCancel:f,disabledHours:I,disabledMinutes:L,disabledSeconds:j}}});function Hte(e,t,n,r,a,o){const i=we("time-spinner");return R(),fe(Vn,{name:e.transitionName},{default:re(()=>[e.actualVisible||e.visible?(R(),X("div",{key:0,class:U(e.ns.b("panel"))},[Z("div",{class:U([e.ns.be("panel","content"),{"has-seconds":e.showSeconds}])},[g(i,{ref:"spinner",role:e.datetimeRole||"start","arrow-control":e.arrowControl,"show-seconds":e.showSeconds,"am-pm-mode":e.amPmMode,"spinner-date":e.parsedValue,"disabled-hours":e.disabledHours,"disabled-minutes":e.disabledMinutes,"disabled-seconds":e.disabledSeconds,onChange:e.handleChange,onSetOption:e.onSetOption,onSelectRange:e.setSelectionRange},null,8,["role","arrow-control","show-seconds","am-pm-mode","spinner-date","disabled-hours","disabled-minutes","disabled-seconds","onChange","onSetOption","onSelectRange"])],2),Z("div",{class:U(e.ns.be("panel","footer"))},[Z("button",{type:"button",class:U([e.ns.be("panel","btn"),"cancel"]),onClick:t[0]||(t[0]=(...l)=>e.handleCancel&&e.handleCancel(...l))},Me(e.t("el.datepicker.cancel")),3),Z("button",{type:"button",class:U([e.ns.be("panel","btn"),"confirm"]),onClick:t[1]||(t[1]=l=>e.handleConfirm())},Me(e.t("el.datepicker.confirm")),3)],2)],2)):se("v-if",!0)]),_:1},8,["name"])}var rw=Ae(zte,[["render",Hte],["__file","/home/runner/work/element-plus/element-plus/packages/components/time-picker/src/time-picker-com/panel-time-pick.vue"]]);const Fs=(e,t)=>{const n=[];for(let r=e;r<=t;r++)n.push(r);return n},jte=G({components:{TimeSpinner:AM},props:{visible:Boolean,actualVisible:Boolean,parsedValue:{type:[Array]},format:{type:String,default:""}},emits:["pick","select-range","set-picker-option"],setup(e,t){const{t:n,lang:r}=ln(),a=De("time"),o=De("picker"),i=x(()=>e.parsedValue[0]),l=x(()=>e.parsedValue[1]),s=NM(e),c=()=>{t.emit("pick",s.value,!1)},d=x(()=>e.format.includes("ss")),f=x(()=>e.format.includes("A")?"A":e.format.includes("a")?"a":""),p=H([]),v=H([]),m=(be=!1)=>{t.emit("pick",[i.value,l.value],be)},y=be=>{S(be.millisecond(0),l.value)},b=be=>{S(i.value,be.millisecond(0))},C=be=>{const Ee=be.map(Be=>mt(Be).locale(r.value)),Pe=N(Ee);return Ee[0].isSame(Pe[0])&&Ee[1].isSame(Pe[1])},S=(be,Ee)=>{t.emit("pick",[be,Ee],!0)},w=x(()=>i.value>l.value),k=H([0,2]),$=(be,Ee)=>{t.emit("select-range",be,Ee,"min"),k.value=[be,Ee]},O=x(()=>d.value?11:8),T=(be,Ee)=>{t.emit("select-range",be,Ee,"max"),k.value=[be+O.value,Ee+O.value]},_=be=>{const Ee=d.value?[0,3,6,11,14,17]:[0,3,8,11],Pe=["hours","minutes"].concat(d.value?["seconds"]:[]),te=(Ee.indexOf(k.value[0])+be+Ee.length)%Ee.length,ie=Ee.length/2;te<ie?Y.start_emitSelectRange(Pe[te]):Y.end_emitSelectRange(Pe[te-ie])},I=be=>{const Ee=be.code;if(Ee===Ge.left||Ee===Ge.right){const Pe=Ee===Ge.left?-1:1;_(Pe),be.preventDefault();return}if(Ee===Ge.up||Ee===Ge.down){const Pe=Ee===Ge.up?-1:1,Be=k.value[0]<O.value?"start":"end";Y[`${Be}_scrollDown`](Pe),be.preventDefault();return}},L=(be,Ee)=>{const Pe=oe?oe(be):[],Be=be==="start",ie=(Ee||(Be?l.value:i.value)).hour(),ge=Be?Fs(ie+1,23):Fs(0,ie-1);return ny(Pe,ge)},j=(be,Ee,Pe)=>{const Be=Q?Q(be,Ee):[],te=Ee==="start",ie=Pe||(te?l.value:i.value),ge=ie.hour();if(be!==ge)return Be;const ke=ie.minute(),xe=te?Fs(ke+1,59):Fs(0,ke-1);return ny(Be,xe)},F=(be,Ee,Pe,Be)=>{const te=ae?ae(be,Ee,Pe):[],ie=Pe==="start",ge=Be||(ie?l.value:i.value),ke=ge.hour(),xe=ge.minute();if(be!==ke||Ee!==xe)return te;const Ie=ge.second(),ye=ie?Fs(Ie+1,59):Fs(0,Ie-1);return ny(te,ye)},N=be=>be.map((Ee,Pe)=>M(be[0],be[1],Pe===0?"start":"end")),{getAvailableHours:D,getAvailableMinutes:z,getAvailableSeconds:B}=IM(L,j,F),M=(be,Ee,Pe)=>{const Be={hour:D,minute:z,second:B},te=Pe==="start";let ie=te?be:Ee;const ge=te?Ee:be;return["hour","minute","second"].forEach(ke=>{if(Be[ke]){let xe;const Ie=Be[ke];if(ke==="minute"?xe=Ie(ie.hour(),Pe,ge):ke==="second"?xe=Ie(ie.hour(),ie.minute(),Pe,ge):xe=Ie(Pe,ge),xe&&xe.length&&!xe.includes(ie[ke]())){const ye=te?0:xe.length-1;ie=ie[ke](xe[ye])}}}),ie},E=be=>be?Array.isArray(be)?be.map(Ee=>mt(Ee,e.format).locale(r.value)):mt(be,e.format).locale(r.value):null,K=be=>be?Array.isArray(be)?be.map(Ee=>Ee.format(e.format)):be.format(e.format):null,W=()=>{if(Array.isArray(de))return de.map(Ee=>mt(Ee).locale(r.value));const be=mt(de).locale(r.value);return[be,be.add(60,"m")]};t.emit("set-picker-option",["formatToString",K]),t.emit("set-picker-option",["parseUserInput",E]),t.emit("set-picker-option",["isValidValue",C]),t.emit("set-picker-option",["handleKeydown",I]),t.emit("set-picker-option",["getDefaultValue",W]),t.emit("set-picker-option",["getRangeAvailableTime",N]);const Y={},q=be=>{Y[be[0]]=be[1]},J=ve("EP_PICKER_BASE"),{arrowControl:ne,disabledHours:oe,disabledMinutes:Q,disabledSeconds:ae,defaultValue:de}=J.props;return{nsTime:a,nsPicker:o,arrowControl:ne,onSetOption:q,setMaxSelectionRange:T,setMinSelectionRange:$,btnConfirmDisabled:w,handleCancel:c,handleConfirm:m,t:n,showSeconds:d,minDate:i,maxDate:l,amPmMode:f,handleMinChange:y,handleMaxChange:b,minSelectableRange:p,maxSelectableRange:v,disabledHours_:L,disabledMinutes_:j,disabledSeconds_:F}}}),Kte=["disabled"];function Wte(e,t,n,r,a,o){const i=we("time-spinner");return e.actualVisible?(R(),X("div",{key:0,class:U([e.nsTime.b("range-picker"),e.nsPicker.b("panel")])},[Z("div",{class:U(e.nsTime.be("range-picker","content"))},[Z("div",{class:U(e.nsTime.be("range-picker","cell"))},[Z("div",{class:U(e.nsTime.be("range-picker","header"))},Me(e.t("el.datepicker.startTime")),3),Z("div",{class:U([e.nsTime.be("range-picker","body"),e.nsTime.be("panel","content"),e.nsTime.is("arrow",e.arrowControl),{"has-seconds":e.showSeconds}])},[g(i,{ref:"minSpinner",role:"start","show-seconds":e.showSeconds,"am-pm-mode":e.amPmMode,"arrow-control":e.arrowControl,"spinner-date":e.minDate,"disabled-hours":e.disabledHours_,"disabled-minutes":e.disabledMinutes_,"disabled-seconds":e.disabledSeconds_,onChange:e.handleMinChange,onSetOption:e.onSetOption,onSelectRange:e.setMinSelectionRange},null,8,["show-seconds","am-pm-mode","arrow-control","spinner-date","disabled-hours","disabled-minutes","disabled-seconds","onChange","onSetOption","onSelectRange"])],2)],2),Z("div",{class:U(e.nsTime.be("range-picker","cell"))},[Z("div",{class:U(e.nsTime.be("range-picker","header"))},Me(e.t("el.datepicker.endTime")),3),Z("div",{class:U([e.nsTime.be("range-picker","body"),e.nsTime.be("panel","content"),e.nsTime.is("arrow",e.arrowControl),{"has-seconds":e.showSeconds}])},[g(i,{ref:"maxSpinner",role:"end","show-seconds":e.showSeconds,"am-pm-mode":e.amPmMode,"arrow-control":e.arrowControl,"spinner-date":e.maxDate,"disabled-hours":e.disabledHours_,"disabled-minutes":e.disabledMinutes_,"disabled-seconds":e.disabledSeconds_,onChange:e.handleMaxChange,onSetOption:e.onSetOption,onSelectRange:e.setMaxSelectionRange},null,8,["show-seconds","am-pm-mode","arrow-control","spinner-date","disabled-hours","disabled-minutes","disabled-seconds","onChange","onSetOption","onSelectRange"])],2)],2)],2),Z("div",{class:U(e.nsTime.be("panel","footer"))},[Z("button",{type:"button",class:U([e.nsTime.be("panel","btn"),"cancel"]),onClick:t[0]||(t[0]=l=>e.handleCancel())},Me(e.t("el.datepicker.cancel")),3),Z("button",{type:"button",class:U([e.nsTime.be("panel","btn"),"confirm"]),disabled:e.btnConfirmDisabled,onClick:t[1]||(t[1]=l=>e.handleConfirm())},Me(e.t("el.datepicker.confirm")),11,Kte)],2)],2)):se("v-if",!0)}var Ute=Ae(jte,[["render",Wte],["__file","/home/runner/work/element-plus/element-plus/packages/components/time-picker/src/time-picker-com/panel-time-range.vue"]]);mt.extend(tw);var Yte=G({name:"ElTimePicker",install:null,props:Ke(Te({},nw),{isRange:{type:Boolean,default:!1}}),emits:["update:modelValue"],setup(e,t){const n=H(null),r=e.isRange?"timerange":"time",a=e.isRange?Ute:rw,o=Ke(Te({},e),{focus:()=>{var i;(i=n.value)==null||i.handleFocus()},blur:()=>{var i;(i=n.value)==null||i.handleBlur()}});return ot("ElPopperOptions",e.popperOptions),t.expose(o),()=>{var i;const l=(i=e.format)!=null?i:w0;return qe(wM,Ke(Te({},e),{format:l,type:r,ref:n,"onUpdate:modelValue":s=>t.emit("update:modelValue",s)}),{default:s=>qe(a,s)})}}});const tl=e=>Array.from(Array.from({length:e}).keys()),DM=e=>e.replace(/\W?m{1,2}|\W?ZZ/g,"").replace(/\W?h{1,2}|\W?s{1,3}|\W?a/gi,"").trim(),RM=e=>e.replace(/\W?D{1,2}|\W?Do|\W?d{1,4}|\W?M{1,4}|\W?Y{2,4}/g,"").trim(),vp=Yte;vp.install=e=>{e.component(vp.name,vp)};const qte=vp,Gte=(e,t)=>{const n=e.subtract(1,"month").endOf("month").date();return tl(t).map((r,a)=>n-(t-a-1))},Xte=e=>{const t=e.daysInMonth();return tl(t).map((n,r)=>r+1)},Zte=e=>tl(e.length/7).map(t=>{const n=t*7;return e.slice(n,n+7)}),Jte=Ze({selectedDay:{type:Le(Object)},range:{type:Le(Array)},date:{type:Le(Object),required:!0},hideHeader:{type:Boolean}}),Qte={pick:e=>zt(e)},ene={key:0},tne=["onClick"],nne={name:"DateTable"},rne=G(Ke(Te({},nne),{props:Jte,emits:Qte,setup(e,{expose:t,emit:n}){const r=e;mt.extend(bM);const{t:a,lang:o}=ln(),i=De("calendar-table"),l=De("calendar-day"),s=mt().locale(o.value),c=s.$locale().weekStart||0,d=x(()=>!!r.range&&!!r.range.length),f=x(()=>{let C=[];if(d.value){const[S,w]=r.range,k=tl(w.date()-S.date()+1).map(T=>({text:S.date()+T,type:"current"}));let $=k.length%7;$=$===0?0:7-$;const O=tl($).map((T,_)=>({text:_+1,type:"next"}));C=k.concat(O)}else{const S=r.date.startOf("month").day()||7,w=Gte(r.date,S-c).map(O=>({text:O,type:"prev"})),k=Xte(r.date).map(O=>({text:O,type:"current"}));C=[...w,...k];const $=tl(42-C.length).map((O,T)=>({text:T+1,type:"next"}));C=C.concat($)}return Zte(C)}),p=x(()=>{const C=c;return C===0?ay.map(S=>a(`el.datepicker.weeks.${S}`)):ay.slice(C).concat(ay.slice(0,C)).map(S=>a(`el.datepicker.weeks.${S}`))}),v=(C,S)=>{switch(S){case"prev":return r.date.startOf("month").subtract(1,"month").date(C);case"next":return r.date.startOf("month").add(1,"month").date(C);case"current":return r.date.date(C)}},m=({text:C,type:S})=>{const w=[S];if(S==="current"){const k=v(C,S);k.isSame(r.selectedDay,"day")&&w.push(l.is("selected")),k.isSame(s,"day")&&w.push(l.is("today"))}return w},y=({text:C,type:S})=>{const w=v(C,S);n("pick",w)},b=({text:C,type:S})=>{const w=v(C,S);return{isSelected:w.isSame(r.selectedDay),type:`${S}-month`,day:w.format("YYYY-MM-DD"),date:w.toDate()}};return t({getFormattedDate:v}),(C,S)=>(R(),X("table",{class:U([A(i).b(),A(i).is("range",A(d))]),cellspacing:"0",cellpadding:"0"},[C.hideHeader?se("v-if",!0):(R(),X("thead",ene,[(R(!0),X(Fe,null,Rt(A(p),w=>(R(),X("th",{key:w},Me(w),1))),128))])),Z("tbody",null,[(R(!0),X(Fe,null,Rt(A(f),(w,k)=>(R(),X("tr",{key:k,class:U({[A(i).e("row")]:!0,[A(i).em("row","hide-border")]:k===0&&C.hideHeader})},[(R(!0),X(Fe,null,Rt(w,($,O)=>(R(),X("td",{key:O,class:U(m($)),onClick:T=>y($)},[Z("div",{class:U(A(l).b())},[Oe(C.$slots,"dateCell",{data:b($)},()=>[Z("span",null,Me($.text),1)])],2)],10,tne))),128))],2))),128))])],2))}}));var A4=Ae(rne,[["__file","/home/runner/work/element-plus/element-plus/packages/components/calendar/src/date-table.vue"]]);const ane=Ze({modelValue:{type:Date},range:{type:Le(Array),validator:e=>Array.isArray(e)&&e.length===2&&e.every(t=>t instanceof Date)}}),one={[Pt]:e=>e instanceof Date,input:e=>e instanceof Date},ine={name:"ElCalendar"},lne=G(Ke(Te({},ine),{props:ane,emits:one,setup(e,{expose:t,emit:n}){const r=e,a=De("calendar"),{t:o,lang:i}=ln(),l=H(),s=mt().locale(i.value),c=x(()=>y.value.subtract(1,"month").date(1)),d=x(()=>y.value.add(1,"month").date(1)),f=x(()=>y.value.subtract(1,"year").date(1)),p=x(()=>y.value.add(1,"year").date(1)),v=x(()=>{const k=`el.datepicker.month${y.value.format("M")}`;return`${y.value.year()} ${o("el.datepicker.year")} ${o(k)}`}),m=x({get(){return r.modelValue?y.value:l.value},set(k){if(!k)return;l.value=k;const $=k.toDate();n("input",$),n("update:modelValue",$)}}),y=x(()=>r.modelValue?mt(r.modelValue).locale(i.value):m.value?m.value:C.value.length?C.value[0][0]:s),b=(k,$)=>{const O=k.startOf("week"),T=$.endOf("week"),_=O.get("month"),I=T.get("month");if(_===I)return[[O,T]];if(_+1===I){const L=O.endOf("month"),j=T.startOf("month"),N=L.isSame(j,"week")?j.add(1,"week"):j;return[[O,L],[N.startOf("week"),T]]}else if(_+2===I){const L=O.endOf("month"),j=O.add(1,"month").startOf("month"),F=L.isSame(j,"week")?j.add(1,"week"):j,N=F.endOf("month"),D=T.startOf("month"),z=N.isSame(D,"week")?D.add(1,"week"):D;return[[O,L],[F.startOf("week"),N],[z.startOf("week"),T]]}else return[]},C=x(()=>{if(!r.range)return[];const k=r.range.map(T=>mt(T).locale(i.value)),[$,O]=k;return $.isAfter(O)?[]:$.isSame(O,"month")?b($,O):$.add(1,"month").month()!==O.month()?[]:b($,O)}),S=k=>{m.value=k},w=k=>{let $;k==="prev-month"?$=c.value:k==="next-month"?$=d.value:k==="prev-year"?$=f.value:k==="next-year"?$=p.value:$=s,!$.isSame(y.value,"day")&&S($)};return t({selectedDay:m,pickDay:S,selectDate:w,calculateValidatedDateRange:b}),(k,$)=>(R(),X("div",{class:U(A(a).b())},[Z("div",{class:U(A(a).e("header"))},[Oe(k.$slots,"header",{date:A(v)},()=>[Z("div",{class:U(A(a).e("title"))},Me(A(v)),3),A(C).length===0?(R(),X("div",{key:0,class:U(A(a).e("button-group"))},[g(A(mM),null,{default:re(()=>[g(A(xa),{size:"small",onClick:$[0]||($[0]=O=>w("prev-month"))},{default:re(()=>[yt(Me(A(o)("el.datepicker.prevMonth")),1)]),_:1}),g(A(xa),{size:"small",onClick:$[1]||($[1]=O=>w("today"))},{default:re(()=>[yt(Me(A(o)("el.datepicker.today")),1)]),_:1}),g(A(xa),{size:"small",onClick:$[2]||($[2]=O=>w("next-month"))},{default:re(()=>[yt(Me(A(o)("el.datepicker.nextMonth")),1)]),_:1})]),_:1})],2)):se("v-if",!0)])],2),A(C).length===0?(R(),X("div",{key:0,class:U(A(a).e("body"))},[g(A4,{date:A(y),"selected-day":A(m),onPick:S},sl({_:2},[k.$slots.dateCell?{name:"dateCell",fn:re(O=>[Oe(k.$slots,"dateCell",Za(ll(O)))])}:void 0]),1032,["date","selected-day"])],2)):(R(),X("div",{key:1,class:U(A(a).e("body"))},[(R(!0),X(Fe,null,Rt(A(C),(O,T)=>(R(),fe(A4,{key:T,date:O[0],"selected-day":A(m),range:O,"hide-header":T!==0,onPick:S},sl({_:2},[k.$slots.dateCell?{name:"dateCell",fn:re(_=>[Oe(k.$slots,"dateCell",Za(ll(_)))])}:void 0]),1032,["date","selected-day","range","hide-header"]))),128))],2))],2))}}));var sne=Ae(lne,[["__file","/home/runner/work/element-plus/element-plus/packages/components/calendar/src/calendar.vue"]]);const une=xt(sne),cne=Ze({header:{type:String,default:""},bodyStyle:{type:Le([String,Object,Array]),default:""},shadow:{type:String,default:"always"}}),dne={name:"ElCard"},fne=G(Ke(Te({},dne),{props:cne,setup(e){const t=De("card");return(n,r)=>(R(),X("div",{class:U([A(t).b(),A(t).is(`${n.shadow}-shadow`)])},[n.$slots.header||n.header?(R(),X("div",{key:0,class:U(A(t).e("header"))},[Oe(n.$slots,"header",{},()=>[yt(Me(n.header),1)])],2)):se("v-if",!0),Z("div",{class:U(A(t).e("body")),style:Xe(n.bodyStyle)},[Oe(n.$slots,"default")],6)],2))}}));var hne=Ae(fne,[["__file","/home/runner/work/element-plus/element-plus/packages/components/card/src/card.vue"]]);const pne=xt(hne),vne=Ze({initialIndex:{type:Number,default:0},height:{type:String,default:""},trigger:{type:String,default:"hover"},autoplay:{type:Boolean,default:!0},interval:{type:Number,default:3e3},indicatorPosition:{type:String,default:""},indicator:{type:Boolean,default:!0},arrow:{type:String,default:"hover"},type:{type:String,default:""},loop:{type:Boolean,default:!0},direction:{type:String,default:"horizontal",validator(e){return["horizontal","vertical"].includes(e)}},pauseOnHover:{type:Boolean,default:!0}}),mne={change:(e,t)=>[e,t].every(Yt)},gne=["onMouseenter","onMouseleave"],yne=["onMouseenter","onClick"],bne={key:0},Cne={name:"ElCarousel"},wne=G(Ke(Te({},Cne),{props:vne,emits:mne,setup(e,{expose:t,emit:n}){const r=e,a=De("carousel"),o=300,i=H(-1),l=H(null),s=H(!1),c=H(),d=H([]),f=x(()=>r.arrow!=="never"&&!A(b)),p=x(()=>d.value.some(W=>W.props.label.toString().length>0)),v=x(()=>{const W=[a.b(),a.m(r.direction)];return A(y)&&W.push(a.m("card")),W}),m=x(()=>{const W=[a.e("indicators"),a.em("indicators",r.direction)];return p.value&&W.push(a.em("indicators","labels")),(r.indicatorPosition==="outside"||A(y))&&W.push(a.em("indicators","outside")),W}),y=x(()=>r.type==="card"),b=x(()=>r.direction==="vertical"),C=Qi(W=>{O(W)},o,{trailing:!0}),S=Qi(W=>{B(W)},o);function w(){l.value&&(clearInterval(l.value),l.value=null)}function k(){r.interval<=0||!r.autoplay||l.value||(l.value=setInterval(()=>$(),r.interval))}const $=()=>{i.value<d.value.length-1?i.value=i.value+1:r.loop&&(i.value=0)};function O(W){if(wt(W)){const J=d.value.filter(ne=>ne.props.name===W);J.length>0&&(W=d.value.indexOf(J[0]))}if(W=Number(W),Number.isNaN(W)||W!==Math.floor(W))return;const Y=d.value.length,q=i.value;W<0?i.value=r.loop?Y-1:0:W>=Y?i.value=r.loop?0:Y-1:i.value=W,q===i.value&&T(q)}function T(W){d.value.forEach((Y,q)=>{Y.translateItem(q,i.value,W)})}function _(W){d.value.push(W)}function I(W){const Y=d.value.findIndex(q=>q.uid===W);Y!==-1&&(d.value.splice(Y,1),i.value===Y&&E())}function L(W,Y){var q,J,ne,oe;const Q=A(d),ae=Q.length;if(ae===0||!W.states.inStage)return!1;const de=Y+1,be=Y-1,Ee=ae-1,Pe=Q[Ee].states.active,Be=Q[0].states.active,te=(J=(q=Q[de])==null?void 0:q.states)==null?void 0:J.active,ie=(oe=(ne=Q[be])==null?void 0:ne.states)==null?void 0:oe.active;return Y===Ee&&Be||te?"left":Y===0&&Pe||ie?"right":!1}function j(){s.value=!0,r.pauseOnHover&&w()}function F(){s.value=!1,k()}function N(W){A(b)||d.value.forEach((Y,q)=>{W===L(Y,q)&&(Y.states.hover=!0)})}function D(){A(b)||d.value.forEach(W=>{W.states.hover=!1})}function z(W){i.value=W}function B(W){r.trigger==="hover"&&W!==i.value&&(i.value=W)}function M(){O(i.value-1)}function E(){O(i.value+1)}ce(()=>i.value,(W,Y)=>{T(Y),Y>-1&&n("change",W,Y)}),ce(()=>r.autoplay,W=>{W?k():w()}),ce(()=>r.loop,()=>{O(i.value)});const K=Qn();return et(async()=>{await Ne(),K.value=Cc(c.value,()=>{T()}),r.initialIndex<d.value.length&&r.initialIndex>=0&&(i.value=r.initialIndex),k()}),Lt(()=>{w(),c.value&&K.value&&K.value.stop()}),ot(SE,{root:c,isCardType:y,isVertical:b,items:d,loop:r.loop,addItem:_,removeItem:I,setActiveItem:O}),t({setActiveItem:O,prev:M,next:E}),(W,Y)=>(R(),X("div",{ref_key:"root",ref:c,class:U(A(v)),onMouseenter:dt(j,["stop"]),onMouseleave:dt(F,["stop"])},[Z("div",{class:U(A(a).e("container")),style:Xe({height:W.height})},[A(f)?(R(),fe(Vn,{key:0,name:"carousel-arrow-left"},{default:re(()=>[at(Z("button",{type:"button",class:U([A(a).e("arrow"),A(a).em("arrow","left")]),onMouseenter:Y[0]||(Y[0]=q=>N("left")),onMouseleave:D,onClick:Y[1]||(Y[1]=dt(q=>A(C)(i.value-1),["stop"]))},[g(A(ft),null,{default:re(()=>[g(A(Es))]),_:1})],34),[[_t,(W.arrow==="always"||s.value)&&(r.loop||i.value>0)]])]),_:1})):se("v-if",!0),A(f)?(R(),fe(Vn,{key:1,name:"carousel-arrow-right"},{default:re(()=>[at(Z("button",{type:"button",class:U([A(a).e("arrow"),A(a).em("arrow","right")]),onMouseenter:Y[2]||(Y[2]=q=>N("right")),onMouseleave:D,onClick:Y[3]||(Y[3]=dt(q=>A(C)(i.value+1),["stop"]))},[g(A(ft),null,{default:re(()=>[g(A(Da))]),_:1})],34),[[_t,(W.arrow==="always"||s.value)&&(r.loop||i.value<d.value.length-1)]])]),_:1})):se("v-if",!0),Oe(W.$slots,"default")],6),W.indicatorPosition!=="none"?(R(),X("ul",{key:0,class:U(A(m))},[(R(!0),X(Fe,null,Rt(d.value,(q,J)=>(R(),X("li",{key:J,class:U([A(a).e("indicator"),A(a).em("indicator",W.direction),A(a).is("active",J===i.value)]),onMouseenter:ne=>A(S)(J),onClick:dt(ne=>z(J),["stop"])},[Z("button",{class:U(A(a).e("button"))},[A(p)?(R(),X("span",bne,Me(q.props.label),1)):se("v-if",!0)],2)],42,yne))),128))],2)):se("v-if",!0)],42,gne))}}));var Sne=Ae(wne,[["__file","/home/runner/work/element-plus/element-plus/packages/components/carousel/src/carousel.vue"]]);const kne=Ze({name:{type:String,default:""},label:{type:[String,Number],default:""}}),$ne={name:"ElCarouselItem"},One=G(Ke(Te({},$ne),{props:kne,setup(e){const t=e,n=De("carousel"),r=ve(SE),a=$t(),o=.83,i=H(!1),l=H(0),s=H(1),c=H(!1),d=H(!1),f=H(!1),p=H(!1),{isCardType:v,isVertical:m}=r,y=x(()=>{const O=`${`translate${A(m)?"Y":"X"}`}(${A(l)}px)`,T=`scale(${A(s)})`;return{transform:[O,T].join(" ")}});function b($,O,T){const _=T-1,I=O-1,L=O+1,j=T/2;return O===0&&$===_?-1:O===_&&$===0?T:$<I&&O-$>=j?T+1:$>L&&$-O>=j?-2:$}function C($,O){var T;const _=((T=r.root.value)==null?void 0:T.offsetWidth)||0;return f.value?_*((2-o)*($-O)+1)/4:$<O?-(1+o)*_/4:(3+o)*_/4}function S($,O,T){const _=r.root.value;return _?((T?_.offsetHeight:_.offsetWidth)||0)*($-O):0}const w=($,O,T)=>{var _;const I=A(v),L=(_=r.items.value.length)!=null?_:Number.NaN,j=$===O;!I&&!sa(T)&&(p.value=j||$===T),!j&&L>2&&r.loop&&($=b($,O,L));const F=A(m);c.value=j,I?(f.value=Math.round(Math.abs($-O))<=1,l.value=C($,O),s.value=A(c)?1:o):l.value=S($,O,F),d.value=!0};function k(){if(r&&A(v)){const $=r.items.value.findIndex(({uid:O})=>O===a.uid);r.setActiveItem($)}}return et(()=>{r.addItem({props:t,states:bt({hover:i,translate:l,scale:s,active:c,ready:d,inStage:f,animating:p}),uid:a.uid,translateItem:w})}),Wr(()=>{r.removeItem(a.uid)}),($,O)=>at((R(),X("div",{class:U([A(n).e("item"),A(n).is("active",c.value),A(n).is("in-stage",f.value),A(n).is("hover",i.value),A(n).is("animating",p.value),{[A(n).em("item","card")]:A(v)}]),style:Xe(A(y)),onClick:k},[A(v)?at((R(),X("div",{key:0,class:U(A(n).e("mask"))},null,2)),[[_t,!c.value]]):se("v-if",!0),Oe($.$slots,"default")],6)),[[_t,d.value]])}}));var LM=Ae(One,[["__file","/home/runner/work/element-plus/element-plus/packages/components/carousel/src/carousel-item.vue"]]);const Pne=xt(Sne,{CarouselItem:LM}),Tne=En(LM),xne={modelValue:{type:[Boolean,Number,String],default:()=>{}},label:{type:[String,Boolean,Number,Object]},indeterminate:Boolean,disabled:Boolean,checked:Boolean,name:{type:String,default:void 0},trueLabel:{type:[String,Number],default:void 0},falseLabel:{type:[String,Number],default:void 0},tabindex:[String,Number],size:String},Pc=()=>{const e=ve(ga,{}),t=ve(Ia,{}),n=ve("CheckboxGroup",{}),r=x(()=>n&&(n==null?void 0:n.name)==="ElCheckboxGroup"),a=x(()=>t.size);return{isGroup:r,checkboxGroup:n,elForm:e,elFormItemSize:a,elFormItem:t}},_ne=e=>{const t=H(!1),{emit:n}=$t(),{isGroup:r,checkboxGroup:a}=Pc(),o=H(!1);return{model:x({get(){var l,s;return r.value?(l=a.modelValue)==null?void 0:l.value:(s=e.modelValue)!=null?s:t.value},set(l){var s;r.value&&Array.isArray(l)?(o.value=a.max!==void 0&&l.length>a.max.value,o.value===!1&&((s=a==null?void 0:a.changeEvent)==null||s.call(a,l))):(n(Pt,l),t.value=l)}}),isLimitExceeded:o}},Ene=(e,{model:t})=>{const{isGroup:n,checkboxGroup:r}=Pc(),a=H(!1),o=Gn(r==null?void 0:r.checkboxGroupSize,{prop:!0}),i=x(()=>{const s=t.value;return bf(s)==="[object Boolean]"?s:Array.isArray(s)?s.includes(e.label):s!=null?s===e.trueLabel:!!s}),l=Gn(x(()=>{var s;return n.value?(s=r==null?void 0:r.checkboxGroupSize)==null?void 0:s.value:void 0}));return{isChecked:i,focus:a,size:o,checkboxSize:l}},Mne=(e,{model:t,isChecked:n})=>{const{elForm:r,isGroup:a,checkboxGroup:o}=Pc(),i=x(()=>{var s,c;const d=(s=o.max)==null?void 0:s.value,f=(c=o.min)==null?void 0:c.value;return!!(d||f)&&t.value.length>=d&&!n.value||t.value.length<=f&&n.value});return{isDisabled:x(()=>{var s,c;const d=e.disabled||r.disabled;return(c=a.value?((s=o.disabled)==null?void 0:s.value)||d||i.value:e.disabled||r.disabled)!=null?c:!1}),isLimitDisabled:i}},Ine=(e,{model:t})=>{function n(){Array.isArray(t.value)&&!t.value.includes(e.label)?t.value.push(e.label):t.value=e.trueLabel||!0}e.checked&&n()},Nne=(e,{isLimitExceeded:t})=>{const{elFormItem:n}=Pc(),{emit:r}=$t();function a(o){var i,l;if(t.value)return;const c=o.target.checked?(i=e.trueLabel)!=null?i:!0:(l=e.falseLabel)!=null?l:!1;r("change",c,o)}return ce(()=>e.modelValue,()=>{var o;(o=n.validate)==null||o.call(n,"change").catch(i=>void 0)}),{handleChange:a}},FM=e=>{const{model:t,isLimitExceeded:n}=_ne(e),{focus:r,size:a,isChecked:o,checkboxSize:i}=Ene(e,{model:t}),{isDisabled:l}=Mne(e,{model:t,isChecked:o}),{handleChange:s}=Nne(e,{isLimitExceeded:n});return Ine(e,{model:t}),{isChecked:o,isDisabled:l,checkboxSize:i,model:t,handleChange:s,focus:r,size:a}},Ane=G({name:"ElCheckbox",props:{modelValue:{type:[Number,String,Boolean],default:()=>{}},label:{type:[String,Boolean,Number,Object]},indeterminate:Boolean,disabled:Boolean,checked:Boolean,name:{type:String,default:void 0},trueLabel:{type:[String,Number],default:void 0},falseLabel:{type:[String,Number],default:void 0},id:{type:String,default:void 0},controls:{type:String,default:void 0},border:Boolean,size:{type:String,validator:va},tabindex:[String,Number]},emits:[Pt,"change"],setup(e){const t=De("checkbox");return Te({ns:t},FM(e))}}),Dne=["id","aria-controls"],Rne=["tabindex","role","aria-checked"],Lne=["aria-hidden","name","tabindex","disabled","true-value","false-value"],Fne=["aria-hidden","disabled","value","name","tabindex"];function Bne(e,t,n,r,a,o){return R(),X("label",{id:e.id,class:U([e.ns.b(),e.ns.m(e.checkboxSize),e.ns.is("disabled",e.isDisabled),e.ns.is("bordered",e.border),e.ns.is("checked",e.isChecked)]),"aria-controls":e.indeterminate?e.controls:null},[Z("span",{class:U([e.ns.e("input"),e.ns.is("disabled",e.isDisabled),e.ns.is("checked",e.isChecked),e.ns.is("indeterminate",e.indeterminate),e.ns.is("focus",e.focus)]),tabindex:e.indeterminate?0:void 0,role:e.indeterminate?"checkbox":void 0,"aria-checked":e.indeterminate?"mixed":!1},[Z("span",{class:U(e.ns.e("inner"))},null,2),e.trueLabel||e.falseLabel?at((R(),X("input",{key:0,"onUpdate:modelValue":t[0]||(t[0]=i=>e.model=i),class:U(e.ns.e("original")),type:"checkbox","aria-hidden":e.indeterminate?"true":"false",name:e.name,tabindex:e.tabindex,disabled:e.isDisabled,"true-value":e.trueLabel,"false-value":e.falseLabel,onChange:t[1]||(t[1]=(...i)=>e.handleChange&&e.handleChange(...i)),onFocus:t[2]||(t[2]=i=>e.focus=!0),onBlur:t[3]||(t[3]=i=>e.focus=!1)},null,42,Lne)),[[dv,e.model]]):at((R(),X("input",{key:1,"onUpdate:modelValue":t[4]||(t[4]=i=>e.model=i),class:U(e.ns.e("original")),type:"checkbox","aria-hidden":e.indeterminate?"true":"false",disabled:e.isDisabled,value:e.label,name:e.name,tabindex:e.tabindex,onChange:t[5]||(t[5]=(...i)=>e.handleChange&&e.handleChange(...i)),onFocus:t[6]||(t[6]=i=>e.focus=!0),onBlur:t[7]||(t[7]=i=>e.focus=!1)},null,42,Fne)),[[dv,e.model]])],10,Rne),e.$slots.default||e.label?(R(),X("span",{key:0,class:U(e.ns.e("label"))},[Oe(e.$slots,"default"),e.$slots.default?se("v-if",!0):(R(),X(Fe,{key:0},[yt(Me(e.label),1)],2112))],2)):se("v-if",!0)],10,Dne)}var Vne=Ae(Ane,[["render",Bne],["__file","/home/runner/work/element-plus/element-plus/packages/components/checkbox/src/checkbox.vue"]]);const zne=G({name:"ElCheckboxButton",props:xne,emits:[Pt,"change"],setup(e){const{focus:t,isChecked:n,isDisabled:r,size:a,model:o,handleChange:i}=FM(e),{checkboxGroup:l}=Pc(),s=De("checkbox"),c=x(()=>{var d,f,p,v;const m=(f=(d=l==null?void 0:l.fill)==null?void 0:d.value)!=null?f:"";return{backgroundColor:m,borderColor:m,color:(v=(p=l==null?void 0:l.textColor)==null?void 0:p.value)!=null?v:"",boxShadow:m?`-1px 0 0 0 ${m}`:null}});return{focus:t,isChecked:n,isDisabled:r,model:o,handleChange:i,activeStyle:c,size:a,ns:s}}}),Hne=["aria-checked","aria-disabled"],jne=["name","tabindex","disabled","true-value","false-value"],Kne=["name","tabindex","disabled","value"];function Wne(e,t,n,r,a,o){return R(),X("label",{class:U([e.ns.b("button"),e.ns.bm("button",e.size),e.ns.is("disabled",e.isDisabled),e.ns.is("checked",e.isChecked),e.ns.is("focus",e.focus)]),role:"checkbox","aria-checked":e.isChecked,"aria-disabled":e.isDisabled},[e.trueLabel||e.falseLabel?at((R(),X("input",{key:0,"onUpdate:modelValue":t[0]||(t[0]=i=>e.model=i),class:U(e.ns.be("button","original")),type:"checkbox",name:e.name,tabindex:e.tabindex,disabled:e.isDisabled,"true-value":e.trueLabel,"false-value":e.falseLabel,onChange:t[1]||(t[1]=(...i)=>e.handleChange&&e.handleChange(...i)),onFocus:t[2]||(t[2]=i=>e.focus=!0),onBlur:t[3]||(t[3]=i=>e.focus=!1)},null,42,jne)),[[dv,e.model]]):at((R(),X("input",{key:1,"onUpdate:modelValue":t[4]||(t[4]=i=>e.model=i),class:U(e.ns.be("button","original")),type:"checkbox",name:e.name,tabindex:e.tabindex,disabled:e.isDisabled,value:e.label,onChange:t[5]||(t[5]=(...i)=>e.handleChange&&e.handleChange(...i)),onFocus:t[6]||(t[6]=i=>e.focus=!0),onBlur:t[7]||(t[7]=i=>e.focus=!1)},null,42,Kne)),[[dv,e.model]]),e.$slots.default||e.label?(R(),X("span",{key:2,class:U(e.ns.be("button","inner")),style:Xe(e.isChecked?e.activeStyle:null)},[Oe(e.$slots,"default",{},()=>[yt(Me(e.label),1)])],6)):se("v-if",!0)],10,Hne)}var BM=Ae(zne,[["render",Wne],["__file","/home/runner/work/element-plus/element-plus/packages/components/checkbox/src/checkbox-button.vue"]]);const Une=G({name:"ElCheckboxGroup",props:{modelValue:{type:Array,default:()=>[]},disabled:Boolean,min:{type:Number,default:void 0},max:{type:Number,default:void 0},size:{type:String,validator:va},fill:{type:String,default:void 0},textColor:{type:String,default:void 0},tag:{type:String,default:"div"}},emits:[Pt,"change"],setup(e,{emit:t,slots:n}){const{elFormItem:r}=Pc(),a=Gn(),o=De("checkbox"),i=s=>{t(Pt,s),Ne(()=>{t("change",s)})},l=x({get(){return e.modelValue},set(s){i(s)}});return ot("CheckboxGroup",Ke(Te({name:"ElCheckboxGroup",modelValue:l},or(e)),{checkboxGroupSize:a,changeEvent:i})),ce(()=>e.modelValue,()=>{var s;(s=r.validate)==null||s.call(r,"change").catch(c=>void 0)}),()=>qe(e.tag,{class:o.b("group"),role:"group","aria-label":"checkbox-group"},[Oe(n,"default")])}});var VM=Ae(Une,[["__file","/home/runner/work/element-plus/element-plus/packages/components/checkbox/src/checkbox-group.vue"]]);const io=xt(Vne,{CheckboxButton:BM,CheckboxGroup:VM}),Yne=En(BM),zM=En(VM),HM=Ze({size:Km,disabled:Boolean,label:{type:[String,Number,Boolean],default:""}}),qne=Ze(Ke(Te({},HM),{modelValue:{type:[String,Number,Boolean],default:""},name:{type:String,default:""},border:Boolean})),jM={[Pt]:e=>wt(e)||Yt(e)||yr(e),change:e=>wt(e)||Yt(e)||yr(e)},KM=(e,t)=>{const n=H(),r=ve(TE,void 0),a=x(()=>!!r),o=x({get(){return a.value?r.modelValue:e.modelValue},set(d){a.value?r.changeEvent(d):t(Pt,d),n.value.checked=e.modelValue===e.label}}),i=Gn(x(()=>r==null?void 0:r.size)),l=Ms(x(()=>r==null?void 0:r.disabled)),s=H(!1),c=x(()=>l.value||a.value&&o.value!==e.label?-1:0);return{radioRef:n,isGroup:a,radioGroup:r,focus:s,size:i,disabled:l,tabIndex:c,modelValue:o}},Gne=G({name:"ElRadio",props:qne,emits:jM,setup(e,{emit:t}){const n=De("radio"),{radioRef:r,isGroup:a,focus:o,size:i,disabled:l,tabIndex:s,modelValue:c}=KM(e,t);function d(){Ne(()=>t("change",c.value))}return{ns:n,focus:o,isGroup:a,modelValue:c,tabIndex:s,size:i,disabled:l,radioRef:r,handleChange:d}}}),Xne=["aria-checked","aria-disabled","tabindex"],Zne=["value","name","disabled"];function Jne(e,t,n,r,a,o){return R(),X("label",{class:U([e.ns.b(),e.ns.is("disabled",e.disabled),e.ns.is("focus",e.focus),e.ns.is("bordered",e.border),e.ns.is("checked",e.modelValue===e.label),e.ns.m(e.size)]),role:"radio","aria-checked":e.modelValue===e.label,"aria-disabled":e.disabled,tabindex:e.tabIndex,onKeydown:t[5]||(t[5]=It(dt(i=>e.modelValue=e.disabled?e.modelValue:e.label,["stop","prevent"]),["space"]))},[Z("span",{class:U([e.ns.e("input"),e.ns.is("disabled",e.disabled),e.ns.is("checked",e.modelValue===e.label)])},[Z("span",{class:U(e.ns.e("inner"))},null,2),at(Z("input",{ref:"radioRef","onUpdate:modelValue":t[0]||(t[0]=i=>e.modelValue=i),class:U(e.ns.e("original")),value:e.label,type:"radio","aria-hidden":"true",name:e.name,disabled:e.disabled,tabindex:"-1",onFocus:t[1]||(t[1]=i=>e.focus=!0),onBlur:t[2]||(t[2]=i=>e.focus=!1),onChange:t[3]||(t[3]=(...i)=>e.handleChange&&e.handleChange(...i))},null,42,Zne),[[t8,e.modelValue]])],2),Z("span",{class:U(e.ns.e("label")),onKeydown:t[4]||(t[4]=dt(()=>{},["stop"]))},[Oe(e.$slots,"default",{},()=>[yt(Me(e.label),1)])],34)],42,Xne)}var Qne=Ae(Gne,[["render",Jne],["__file","/home/runner/work/element-plus/element-plus/packages/components/radio/src/radio.vue"]]);const ere=Ze(Ke(Te({},HM),{name:{type:String,default:""}})),tre=G({name:"ElRadioButton",props:ere,setup(e,{emit:t}){const n=De("radio"),{radioRef:r,isGroup:a,focus:o,size:i,disabled:l,tabIndex:s,modelValue:c,radioGroup:d}=KM(e,t),f=x(()=>({backgroundColor:(d==null?void 0:d.fill)||"",borderColor:(d==null?void 0:d.fill)||"",boxShadow:d!=null&&d.fill?`-1px 0 0 0 ${d.fill}`:"",color:(d==null?void 0:d.textColor)||""}));return{ns:n,isGroup:a,size:i,disabled:l,tabIndex:s,modelValue:c,focus:o,activeStyle:f,radioRef:r}}}),nre=["aria-checked","aria-disabled","tabindex"],rre=["value","name","disabled"];function are(e,t,n,r,a,o){return R(),X("label",{class:U([e.ns.b("button"),e.ns.is("active",e.modelValue===e.label),e.ns.is("disabled",e.disabled),e.ns.is("focus",e.focus),e.ns.bm("button",e.size)]),role:"radio","aria-checked":e.modelValue===e.label,"aria-disabled":e.disabled,tabindex:e.tabIndex,onKeydown:t[4]||(t[4]=It(dt(i=>e.modelValue=e.disabled?e.modelValue:e.label,["stop","prevent"]),["space"]))},[at(Z("input",{ref:"radioRef","onUpdate:modelValue":t[0]||(t[0]=i=>e.modelValue=i),class:U(e.ns.be("button","original-radio")),value:e.label,type:"radio",name:e.name,disabled:e.disabled,tabindex:"-1",onFocus:t[1]||(t[1]=i=>e.focus=!0),onBlur:t[2]||(t[2]=i=>e.focus=!1)},null,42,rre),[[t8,e.modelValue]]),Z("span",{class:U(e.ns.be("button","inner")),style:Xe(e.modelValue===e.label?e.activeStyle:{}),onKeydown:t[3]||(t[3]=dt(()=>{},["stop"]))},[Oe(e.$slots,"default",{},()=>[yt(Me(e.label),1)])],38)],42,nre)}var WM=Ae(tre,[["render",are],["__file","/home/runner/work/element-plus/element-plus/packages/components/radio/src/radio-button.vue"]]);const ore=Ze({size:Km,disabled:Boolean,modelValue:{type:[String,Number,Boolean],default:""},fill:{type:String,default:""},textColor:{type:String,default:""}}),ire=jM,lre=G({name:"ElRadioGroup",props:ore,emits:ire,setup(e,t){const n=De("radio"),r=H(),{formItem:a}=$c(),o=l=>{t.emit(Pt,l),Ne(()=>t.emit("change",l))},i=l=>{if(!r.value)return;const s=l.target,c=s.nodeName==="INPUT"?"[type=radio]":"[role=radio]",d=r.value.querySelectorAll(c),f=d.length,p=Array.from(d).indexOf(s),v=r.value.querySelectorAll("[role=radio]");let m=null;switch(l.code){case Ge.left:case Ge.up:l.stopPropagation(),l.preventDefault(),m=p===0?f-1:p-1;break;case Ge.right:case Ge.down:l.stopPropagation(),l.preventDefault(),m=p===f-1?0:p+1;break}m!==null&&(v[m].click(),v[m].focus())};return et(()=>{const l=r.value.querySelectorAll("[type=radio]"),s=l[0];!Array.from(l).some(c=>c.checked)&&s&&(s.tabIndex=0)}),ot(TE,bt(Ke(Te({},or(e)),{changeEvent:o}))),ce(()=>e.modelValue,()=>a==null?void 0:a.validate("change").catch(l=>void 0)),{ns:n,radioGroupRef:r,handleKeydown:i}}});function sre(e,t,n,r,a,o){return R(),X("div",{ref:"radioGroupRef",class:U(e.ns.b("group")),role:"radiogroup",onKeydown:t[0]||(t[0]=(...i)=>e.handleKeydown&&e.handleKeydown(...i))},[Oe(e.$slots,"default")],34)}var UM=Ae(lre,[["render",sre],["__file","/home/runner/work/element-plus/element-plus/packages/components/radio/src/radio-group.vue"]]);const YM=xt(Qne,{RadioButton:WM,RadioGroup:UM}),ure=En(UM),cre=En(WM);var dre=G({name:"NodeContent",setup(){return{ns:De("cascader-node")}},render(){const{ns:e}=this,{node:t,panel:n}=this.$parent,{data:r,label:a}=t,{renderLabelFn:o}=n;return qe("span",{class:e.e("label")},o?o({node:t,data:r}):a)}});const aw=Symbol(),fre=G({name:"ElCascaderNode",components:{ElCheckbox:io,ElRadio:YM,NodeContent:dre,ElIcon:ft,Check:Wu,Loading:gl,ArrowRight:Da},props:{node:{type:Object,required:!0},menuId:String},emits:["expand"],setup(e,{emit:t}){const n=ve(aw),r=De("cascader-node"),a=x(()=>n.isHoverMenu),o=x(()=>n.config.multiple),i=x(()=>n.config.checkStrictly),l=x(()=>{var O;return(O=n.checkedNodes[0])==null?void 0:O.uid}),s=x(()=>e.node.isDisabled),c=x(()=>e.node.isLeaf),d=x(()=>i.value&&!c.value||!s.value),f=x(()=>v(n.expandingNode)),p=x(()=>i.value&&n.checkedNodes.some(v)),v=O=>{var T;const{level:_,uid:I}=e.node;return((T=O==null?void 0:O.pathNodes[_-1])==null?void 0:T.uid)===I},m=()=>{f.value||n.expandNode(e.node)},y=O=>{const{node:T}=e;O!==T.checked&&n.handleCheckChange(T,O)},b=()=>{n.lazyLoad(e.node,()=>{c.value||m()})},C=O=>{!a.value||(S(),!c.value&&t("expand",O))},S=()=>{const{node:O}=e;!d.value||O.loading||(O.loaded?m():b())},w=()=>{a.value&&!c.value||(c.value&&!s.value&&!i.value&&!o.value?$(!0):S())},k=O=>{i.value?(y(O),e.node.loaded&&m()):$(O)},$=O=>{e.node.loaded?(y(O),!i.value&&m()):b()};return{panel:n,isHoverMenu:a,multiple:o,checkStrictly:i,checkedNodeId:l,isDisabled:s,isLeaf:c,expandable:d,inExpandingPath:f,inCheckedPath:p,ns:r,handleHoverExpand:C,handleExpand:S,handleClick:w,handleCheck:$,handleSelectCheck:k}}}),hre=["id","aria-haspopup","aria-owns","aria-expanded","tabindex"],pre=Z("span",null,null,-1);function vre(e,t,n,r,a,o){const i=we("el-checkbox"),l=we("el-radio"),s=we("check"),c=we("el-icon"),d=we("node-content"),f=we("loading"),p=we("arrow-right");return R(),X("li",{id:`${e.menuId}-${e.node.uid}`,role:"menuitem","aria-haspopup":!e.isLeaf,"aria-owns":e.isLeaf?null:e.menuId,"aria-expanded":e.inExpandingPath,tabindex:e.expandable?-1:void 0,class:U([e.ns.b(),e.ns.is("selectable",e.checkStrictly),e.ns.is("active",e.node.checked),e.ns.is("disabled",!e.expandable),e.inExpandingPath&&"in-active-path",e.inCheckedPath&&"in-checked-path"]),onMouseenter:t[2]||(t[2]=(...v)=>e.handleHoverExpand&&e.handleHoverExpand(...v)),onFocus:t[3]||(t[3]=(...v)=>e.handleHoverExpand&&e.handleHoverExpand(...v)),onClick:t[4]||(t[4]=(...v)=>e.handleClick&&e.handleClick(...v))},[se(" prefix "),e.multiple?(R(),fe(i,{key:0,"model-value":e.node.checked,indeterminate:e.node.indeterminate,disabled:e.isDisabled,onClick:t[0]||(t[0]=dt(()=>{},["stop"])),"onUpdate:modelValue":e.handleSelectCheck},null,8,["model-value","indeterminate","disabled","onUpdate:modelValue"])):e.checkStrictly?(R(),fe(l,{key:1,"model-value":e.checkedNodeId,label:e.node.uid,disabled:e.isDisabled,"onUpdate:modelValue":e.handleSelectCheck,onClick:t[1]||(t[1]=dt(()=>{},["stop"]))},{default:re(()=>[se(`
        Add an empty element to avoid render label,
        do not use empty fragment here for https://github.com/vuejs/vue-next/pull/2485
      `),pre]),_:1},8,["model-value","label","disabled","onUpdate:modelValue"])):e.isLeaf&&e.node.checked?(R(),fe(c,{key:2,class:U(e.ns.e("prefix"))},{default:re(()=>[g(s)]),_:1},8,["class"])):se("v-if",!0),se(" content "),g(d),se(" postfix "),e.isLeaf?se("v-if",!0):(R(),X(Fe,{key:3},[e.node.loading?(R(),fe(c,{key:0,class:U([e.ns.is("loading"),e.ns.e("postfix")])},{default:re(()=>[g(f)]),_:1},8,["class"])):(R(),fe(c,{key:1,class:U(["arrow-right",e.ns.e("postfix")])},{default:re(()=>[g(p)]),_:1},8,["class"]))],2112))],42,hre)}var mre=Ae(fre,[["render",vre],["__file","/home/runner/work/element-plus/element-plus/packages/components/cascader-panel/src/node.vue"]]);const gre=G({name:"ElCascaderMenu",components:{Loading:gl,ElIcon:ft,ElScrollbar:xi,ElCascaderNode:mre},props:{nodes:{type:Array,required:!0},index:{type:Number,required:!0}},setup(e){const t=$t(),n=De("cascader-menu"),{t:r}=ln(),a=Pf();let o=null,i=null;const l=ve(aw),s=H(null),c=x(()=>!e.nodes.length),d=x(()=>!l.initialLoaded),f=x(()=>`cascader-menu-${a}-${e.index}`),p=b=>{o=b.target},v=b=>{if(!(!l.isHoverMenu||!o||!s.value))if(o.contains(b.target)){m();const C=t.vnode.el,{left:S}=C.getBoundingClientRect(),{offsetWidth:w,offsetHeight:k}=C,$=b.clientX-S,O=o.offsetTop,T=O+o.offsetHeight;s.value.innerHTML=`
          <path style="pointer-events: auto;" fill="transparent" d="M${$} ${O} L${w} 0 V${O} Z" />
          <path style="pointer-events: auto;" fill="transparent" d="M${$} ${T} L${w} ${k} V${T} Z" />
        `}else i||(i=window.setTimeout(y,l.config.hoverThreshold))},m=()=>{!i||(clearTimeout(i),i=null)},y=()=>{!s.value||(s.value.innerHTML="",m())};return{ns:n,panel:l,hoverZone:s,isEmpty:c,isLoading:d,menuId:f,t:r,handleExpand:p,handleMouseMove:v,clearHoverZone:y}}});function yre(e,t,n,r,a,o){const i=we("el-cascader-node"),l=we("loading"),s=we("el-icon"),c=we("el-scrollbar");return R(),fe(c,{key:e.menuId,tag:"ul",role:"menu",class:U(e.ns.b()),"wrap-class":e.ns.e("wrap"),"view-class":[e.ns.e("list"),e.ns.is("empty",e.isEmpty)],onMousemove:e.handleMouseMove,onMouseleave:e.clearHoverZone},{default:re(()=>{var d;return[(R(!0),X(Fe,null,Rt(e.nodes,f=>(R(),fe(i,{key:f.uid,node:f,"menu-id":e.menuId,onExpand:e.handleExpand},null,8,["node","menu-id","onExpand"]))),128)),e.isLoading?(R(),X("div",{key:0,class:U(e.ns.e("empty-text"))},[g(s,{size:"14",class:U(e.ns.is("loading"))},{default:re(()=>[g(l)]),_:1},8,["class"]),yt(" "+Me(e.t("el.cascader.loading")),1)],2)):e.isEmpty?(R(),X("div",{key:1,class:U(e.ns.e("empty-text"))},Me(e.t("el.cascader.noData")),3)):(d=e.panel)!=null&&d.isHoverMenu?(R(),X("svg",{key:2,ref:"hoverZone",class:U(e.ns.e("hover-zone"))},null,2)):se("v-if",!0)]}),_:1},8,["class","wrap-class","view-class","onMousemove","onMouseleave"])}var bre=Ae(gre,[["render",yre],["__file","/home/runner/work/element-plus/element-plus/packages/components/cascader-panel/src/menu.vue"]]),ow=(e=>(e.CLICK="click",e.HOVER="hover",e))(ow||{});let Cre=0;const wre=e=>{const t=[e];let{parent:n}=e;for(;n;)t.unshift(n),n=n.parent;return t};class Ju{constructor(t,n,r,a=!1){this.data=t,this.config=n,this.parent=r,this.root=a,this.uid=Cre++,this.checked=!1,this.indeterminate=!1,this.loading=!1;const{value:o,label:i,children:l}=n,s=t[l],c=wre(this);this.level=a?0:r?r.level+1:1,this.value=t[o],this.label=t[i],this.pathNodes=c,this.pathValues=c.map(d=>d.value),this.pathLabels=c.map(d=>d.label),this.childrenData=s,this.children=(s||[]).map(d=>new Ju(d,n,this)),this.loaded=!n.lazy||this.isLeaf||!Kd(s)}get isDisabled(){const{data:t,parent:n,config:r}=this,{disabled:a,checkStrictly:o}=r;return(Ct(a)?a(t,this):!!t[a])||!o&&(n==null?void 0:n.isDisabled)}get isLeaf(){const{data:t,config:n,childrenData:r,loaded:a}=this,{lazy:o,leaf:i}=n,l=Ct(i)?i(t,this):t[i];return sa(l)?o&&!a?!1:!(Array.isArray(r)&&r.length):!!l}get valueByOption(){return this.config.emitPath?this.pathValues:this.value}appendChild(t){const{childrenData:n,children:r}=this,a=new Ju(t,this.config,this);return Array.isArray(n)?n.push(t):this.childrenData=[t],r.push(a),a}calcText(t,n){const r=t?this.pathLabels.join(n):this.label;return this.text=r,r}broadcast(t,...n){const r=`onParent${ia(t)}`;this.children.forEach(a=>{a&&(a.broadcast(t,...n),a[r]&&a[r](...n))})}emit(t,...n){const{parent:r}=this,a=`onChild${ia(t)}`;r&&(r[a]&&r[a](...n),r.emit(t,...n))}onParentCheck(t){this.isDisabled||this.setCheckState(t)}onChildCheck(){const{children:t}=this,n=t.filter(a=>!a.isDisabled),r=n.length?n.every(a=>a.checked):!1;this.setCheckState(r)}setCheckState(t){const n=this.children.length,r=this.children.reduce((a,o)=>{const i=o.checked?1:o.indeterminate?.5:0;return a+i},0);this.checked=this.loaded&&this.children.every(a=>a.loaded&&a.checked)&&t,this.indeterminate=this.loaded&&r!==n&&r>0}doCheck(t){if(this.checked===t)return;const{checkStrictly:n,multiple:r}=this.config;n||!r?this.checked=t:(this.broadcast("check",t),this.setCheckState(t),this.emit("check"))}}const M0=(e,t)=>e.reduce((n,r)=>(r.isLeaf?n.push(r):(!t&&n.push(r),n=n.concat(M0(r.children,t))),n),[]);class D4{constructor(t,n){this.config=n;const r=(t||[]).map(a=>new Ju(a,this.config));this.nodes=r,this.allNodes=M0(r,!1),this.leafNodes=M0(r,!0)}getNodes(){return this.nodes}getFlattedNodes(t){return t?this.leafNodes:this.allNodes}appendNode(t,n){const r=n?n.appendChild(t):new Ju(t,this.config);n||this.nodes.push(r),this.allNodes.push(r),r.isLeaf&&this.leafNodes.push(r)}appendNodes(t,n){t.forEach(r=>this.appendNode(r,n))}getNodeByValue(t,n=!1){return!t&&t!==0?null:this.getFlattedNodes(n).find(a=>er(a.value,t)||er(a.pathValues,t))||null}getSameNode(t){return t&&this.getFlattedNodes(!1).find(({value:r,level:a})=>er(t.value,r)&&t.level===a)||null}}const qM={modelValue:[Number,String,Array],options:{type:Array,default:()=>[]},props:{type:Object,default:()=>({})}},Sre={expandTrigger:ow.CLICK,multiple:!1,checkStrictly:!1,emitPath:!0,lazy:!1,lazyLoad:Qt,value:"value",label:"label",children:"children",leaf:"leaf",disabled:"disabled",hoverThreshold:500},kre=e=>x(()=>Te(Te({},Sre),e.props)),R4=e=>{if(!e)return 0;const t=e.id.split("-");return Number(t[t.length-2])},$re=e=>{if(!e)return;const t=e.querySelector("input");t?t.click():nE(e)&&e.click()},Ore=(e,t)=>{const n=t.slice(0),r=n.map(o=>o.uid),a=e.reduce((o,i)=>{const l=r.indexOf(i.uid);return l>-1&&(o.push(i),n.splice(l,1),r.splice(l,1)),o},[]);return a.push(...n),a},Pre=G({name:"ElCascaderPanel",components:{ElCascaderMenu:bre},props:Ke(Te({},qM),{border:{type:Boolean,default:!0},renderLabel:Function}),emits:[Pt,ir,"close","expand-change"],setup(e,{emit:t,slots:n}){let r=!1;const a=De("cascader"),o=kre(e);let i=null;const l=H(!0),s=H([]),c=H(null),d=H([]),f=H(null),p=H([]),v=x(()=>o.value.expandTrigger===ow.HOVER),m=x(()=>e.renderLabel||n.default),y=()=>{const{options:F}=e,N=o.value;r=!1,i=new D4(F,N),d.value=[i.getNodes()],N.lazy&&Kd(e.options)?(l.value=!1,b(void 0,D=>{D&&(i=new D4(D,N),d.value=[i.getNodes()]),l.value=!0,_(!1,!0)})):_(!1,!0)},b=(F,N)=>{const D=o.value;F=F||new Ju({},D,void 0,!0),F.loading=!0;const z=B=>{const M=F,E=M.root?null:M;B&&(i==null||i.appendNodes(B,E)),M.loading=!1,M.loaded=!0,M.childrenData=M.childrenData||[],N&&N(B)};D.lazyLoad(F,z)},C=(F,N)=>{var D;const{level:z}=F,B=d.value.slice(0,z);let M;F.isLeaf?M=F.pathNodes[z-2]:(M=F,B.push(F.children)),((D=f.value)==null?void 0:D.uid)!==(M==null?void 0:M.uid)&&(f.value=F,d.value=B,!N&&t("expand-change",(F==null?void 0:F.pathValues)||[]))},S=(F,N,D=!0)=>{const{checkStrictly:z,multiple:B}=o.value,M=p.value[0];r=!0,!B&&(M==null||M.doCheck(!1)),F.doCheck(N),T(),D&&!B&&!z&&t("close"),!D&&!B&&!z&&w(F)},w=F=>{!F||(F=F.parent,w(F),F&&C(F))},k=F=>i==null?void 0:i.getFlattedNodes(F),$=F=>{var N;return(N=k(F))==null?void 0:N.filter(D=>D.checked!==!1)},O=()=>{p.value.forEach(F=>F.doCheck(!1)),T()},T=()=>{var F;const{checkStrictly:N,multiple:D}=o.value,z=p.value,B=$(!N),M=Ore(z,B),E=M.map(K=>K.valueByOption);p.value=M,c.value=D?E:(F=E[0])!=null?F:null},_=(F=!1,N=!1)=>{const{modelValue:D}=e,{lazy:z,multiple:B,checkStrictly:M}=o.value,E=!M;if(!(!l.value||r||!N&&er(D,c.value)))if(z&&!F){const W=J$(iW(rs(D))).map(Y=>i==null?void 0:i.getNodeByValue(Y)).filter(Y=>!!Y&&!Y.loaded&&!Y.loading);W.length?W.forEach(Y=>{b(Y,()=>_(!1,N))}):_(!0,N)}else{const K=B?rs(D):[D],W=J$(K.map(Y=>i==null?void 0:i.getNodeByValue(Y,E)));I(W,!1),c.value=D}},I=(F,N=!0)=>{const{checkStrictly:D}=o.value,z=p.value,B=F.filter(K=>!!K&&(D||K.isLeaf)),M=i==null?void 0:i.getSameNode(f.value),E=N&&M||B[0];E?E.pathNodes.forEach(K=>C(K,!0)):f.value=null,z.forEach(K=>K.doCheck(!1)),B.forEach(K=>K.doCheck(!0)),p.value=B,Ne(L)},L=()=>{!Bt||s.value.forEach(F=>{const N=F==null?void 0:F.$el;if(N){const D=N.querySelector(`.${a.namespace.value}-scrollbar__wrap`),z=N.querySelector(`.${a.b("node")}.${a.is("active")}`)||N.querySelector(`.${a.b("node")}.in-active-path`);lE(D,z)}})},j=F=>{const N=F.target,{code:D}=F;switch(D){case Ge.up:case Ge.down:{F.preventDefault();const z=D===Ge.up?-1:1;up(rE(N,z,`.${a.b("node")}[tabindex="-1"]`));break}case Ge.left:{F.preventDefault();const z=s.value[R4(N)-1],B=z==null?void 0:z.$el.querySelector(`.${a.b("node")}[aria-expanded="true"]`);up(B);break}case Ge.right:{F.preventDefault();const z=s.value[R4(N)+1],B=z==null?void 0:z.$el.querySelector(`.${a.b("node")}[tabindex="-1"]`);up(B);break}case Ge.enter:$re(N);break;case Ge.esc:case Ge.tab:t("close");break}};return ot(aw,bt({config:o,expandingNode:f,checkedNodes:p,isHoverMenu:v,initialLoaded:l,renderLabelFn:m,lazyLoad:b,expandNode:C,handleCheckChange:S})),ce([o,()=>e.options],y,{deep:!0,immediate:!0}),ce(()=>e.modelValue,()=>{r=!1,_()}),ce(c,F=>{er(F,e.modelValue)||(t(Pt,F),t(ir,F))}),wm(()=>s.value=[]),et(()=>!Kd(e.modelValue)&&_()),{ns:a,menuList:s,menus:d,checkedNodes:p,handleKeyDown:j,handleCheckChange:S,getFlattedNodes:k,getCheckedNodes:$,clearCheckedNodes:O,calculateCheckedValue:T,scrollToExpandingNode:L}}});function Tre(e,t,n,r,a,o){const i=we("el-cascader-menu");return R(),X("div",{class:U([e.ns.b("panel"),e.ns.is("bordered",e.border)]),onKeydown:t[0]||(t[0]=(...l)=>e.handleKeyDown&&e.handleKeyDown(...l))},[(R(!0),X(Fe,null,Rt(e.menus,(l,s)=>(R(),fe(i,{key:s,ref_for:!0,ref:c=>e.menuList[s]=c,index:s,nodes:[...l]},null,8,["index","nodes"]))),128))],34)}var mp=Ae(Pre,[["render",Tre],["__file","/home/runner/work/element-plus/element-plus/packages/components/cascader-panel/src/index.vue"]]);mp.install=e=>{e.component(mp.name,mp)};const GM=mp,xre=GM,iw=Ze({closable:Boolean,type:{type:String,values:["success","info","warning","danger",""],default:""},hit:Boolean,disableTransitions:Boolean,color:{type:String,default:""},size:{type:String,values:Bo,default:""},effect:{type:String,values:["dark","light","plain"],default:"light"},round:Boolean}),_re={close:e=>e instanceof MouseEvent,click:e=>e instanceof MouseEvent},Ere={name:"ElTag"},Mre=G(Ke(Te({},Ere),{props:iw,emits:_re,setup(e,{emit:t}){const n=e,r=Gn(),a=De("tag"),o=x(()=>{const{type:s,hit:c,effect:d,closable:f,round:p}=n;return[a.b(),a.is("closable",f),a.m(s),a.m(r.value),a.m(d),a.is("hit",c),a.is("round",p)]}),i=s=>{s.stopPropagation(),t("close",s)},l=s=>{t("click",s)};return(s,c)=>s.disableTransitions?(R(),fe(Vn,{key:1,name:`${A(a).namespace.value}-zoom-in-center`},{default:re(()=>[Z("span",{class:U(A(o)),style:Xe({backgroundColor:s.color}),onClick:l},[Z("span",{class:U(A(a).e("content"))},[Oe(s.$slots,"default")],2),s.closable?(R(),fe(A(ft),{key:0,class:U(A(a).e("close")),onClick:i},{default:re(()=>[g(A(Ma))]),_:1},8,["class"])):se("v-if",!0)],6)]),_:3},8,["name"])):(R(),X("span",{key:0,class:U(A(o)),style:Xe({backgroundColor:s.color}),onClick:l},[Z("span",{class:U(A(a).e("content"))},[Oe(s.$slots,"default")],2),s.closable?(R(),fe(A(ft),{key:0,class:U(A(a).e("close")),onClick:i},{default:re(()=>[g(A(Ma))]),_:1},8,["class"])):se("v-if",!0)],6))}}));var Ire=Ae(Mre,[["__file","/home/runner/work/element-plus/element-plus/packages/components/tag/src/tag.vue"]]);const Gm=xt(Ire),Nre=40,Are={large:36,default:32,small:28},Dre={modifiers:[{name:"arrowPosition",enabled:!0,phase:"main",fn:({state:e})=>{const{modifiersData:t,placement:n}=e;["right","left","bottom","top"].includes(n)||(t.arrow.x=35)},requires:["arrow"]}]},L4="ElCascader",Rre=G({name:L4,components:{ElCascaderPanel:GM,ElInput:Ra,ElTooltip:Ur,ElScrollbar:xi,ElTag:Gm,ElIcon:ft,CircleClose:gi,Check:Wu,ArrowDown:_s},directives:{Clickoutside:Is},props:Ke(Te({},qM),{size:{type:String,validator:va},placeholder:{type:String},disabled:Boolean,clearable:Boolean,filterable:Boolean,filterMethod:{type:Function,default:(e,t)=>e.text.includes(t)},separator:{type:String,default:" / "},showAllLevels:{type:Boolean,default:!0},collapseTags:Boolean,collapseTagsTooltip:{type:Boolean,default:!1},debounce:{type:Number,default:300},beforeFilter:{type:Function,default:()=>!0},popperClass:{type:String,default:""},popperAppendToBody:{type:Boolean,default:void 0},teleported:mr.teleported,tagType:Ke(Te({},iw.type),{default:"info"})}),emits:[Pt,ir,"focus","blur","visible-change","expand-change","remove-tag"],setup(e,{emit:t}){let n=0,r=0;const{compatTeleported:a}=Oc(L4,"popperAppendToBody"),o=De("cascader"),i=De("input"),{t:l}=ln(),s=ve(ga,{}),c=ve(Ia,{}),d=H(null),f=H(null),p=H(null),v=H(null),m=H(null),y=H(!1),b=H(!1),C=H(!1),S=H(""),w=H(""),k=H([]),$=H([]),O=H([]),T=H(!1),_=x(()=>e.disabled||s.disabled),I=x(()=>e.placeholder||l("el.cascader.placeholder")),L=Gn(),j=x(()=>["small"].includes(L.value)?"small":"default"),F=x(()=>!!e.props.multiple),N=x(()=>!e.filterable||F.value),D=x(()=>F.value?w.value:S.value),z=x(()=>{var ye;return((ye=v.value)==null?void 0:ye.checkedNodes)||[]}),B=x(()=>!e.clearable||_.value||C.value||!b.value?!1:!!z.value.length),M=x(()=>{const{showAllLevels:ye,separator:pe}=e,ue=z.value;return ue.length?F.value?" ":ue[0].calcText(ye,pe):""}),E=x({get(){return e.modelValue},set(ye){var pe;t(Pt,ye),t(ir,ye),(pe=c.validate)==null||pe.call(c,"change").catch(ue=>void 0)}}),K=x(()=>{var ye,pe;return(pe=(ye=d.value)==null?void 0:ye.popperRef)==null?void 0:pe.contentRef}),W=ye=>{var pe,ue,Ce;if(!_.value&&(ye=ye!=null?ye:!y.value,ye!==y.value)){if(y.value=ye,(ue=(pe=f.value)==null?void 0:pe.input)==null||ue.setAttribute("aria-expanded",`${ye}`),ye)Y(),Ne((Ce=v.value)==null?void 0:Ce.scrollToExpandingNode);else if(e.filterable){const{value:je}=M;S.value=je,w.value=je}t("visible-change",ye)}},Y=()=>{Ne(()=>{var ye;(ye=d.value)==null||ye.updatePopper()})},q=()=>{C.value=!1},J=ye=>{const{showAllLevels:pe,separator:ue}=e;return{node:ye,key:ye.uid,text:ye.calcText(pe,ue),hitState:!1,closable:!_.value&&!ye.isDisabled,isCollapseTag:!1}},ne=ye=>{var pe;const ue=ye.node;ue.doCheck(!1),(pe=v.value)==null||pe.calculateCheckedValue(),t("remove-tag",ue.valueByOption)},oe=()=>{if(!F.value)return;const ye=z.value,pe=[],ue=[];if(ye.forEach(Ce=>ue.push(J(Ce))),$.value=ue,ye.length){const[Ce,...je]=ye,ee=je.length;pe.push(J(Ce)),ee&&(e.collapseTags?pe.push({key:-1,text:`+ ${ee}`,closable:!1,isCollapseTag:!0}):je.forEach(me=>pe.push(J(me))))}k.value=pe},Q=()=>{var ye,pe;const{filterMethod:ue,showAllLevels:Ce,separator:je}=e,ee=(pe=(ye=v.value)==null?void 0:ye.getFlattedNodes(!e.props.checkStrictly))==null?void 0:pe.filter(me=>me.isDisabled?!1:(me.calcText(Ce,je),ue(me,D.value)));F.value&&(k.value.forEach(me=>{me.hitState=!1}),$.value.forEach(me=>{me.hitState=!1})),C.value=!0,O.value=ee,Y()},ae=()=>{var ye;let pe;C.value&&m.value?pe=m.value.$el.querySelector(`.${o.e("suggestion-item")}`):pe=(ye=v.value)==null?void 0:ye.$el.querySelector(`.${o.b("node")}[tabindex="-1"]`),pe&&(pe.focus(),!C.value&&pe.click())},de=()=>{var ye,pe;const ue=(ye=f.value)==null?void 0:ye.input,Ce=p.value,je=(pe=m.value)==null?void 0:pe.$el;if(!(!Bt||!ue)){if(je){const ee=je.querySelector(`.${o.e("suggestion-list")}`);ee.style.minWidth=`${ue.offsetWidth}px`}if(Ce){const{offsetHeight:ee}=Ce,me=k.value.length>0?`${Math.max(ee+6,n)}px`:`${n}px`;ue.style.height=me,Y()}}},be=ye=>{var pe;return(pe=v.value)==null?void 0:pe.getCheckedNodes(ye)},Ee=ye=>{Y(),t("expand-change",ye)},Pe=ye=>{var pe;const ue=(pe=ye.target)==null?void 0:pe.value;if(ye.type==="compositionend")T.value=!1,Ne(()=>Ie(ue));else{const Ce=ue[ue.length-1]||"";T.value=!zm(Ce)}},Be=ye=>{if(!T.value)switch(ye.code){case Ge.enter:W();break;case Ge.down:W(!0),Ne(ae),ye.preventDefault();break;case Ge.esc:case Ge.tab:W(!1);break}},te=()=>{var ye;(ye=v.value)==null||ye.clearCheckedNodes(),W(!1)},ie=ye=>{var pe,ue;const{checked:Ce}=ye;F.value?(pe=v.value)==null||pe.handleCheckChange(ye,!Ce,!1):(!Ce&&((ue=v.value)==null||ue.handleCheckChange(ye,!0,!1)),W(!1))},ge=ye=>{const pe=ye.target,{code:ue}=ye;switch(ue){case Ge.up:case Ge.down:{const Ce=ue===Ge.up?-1:1;up(rE(pe,Ce,`.${o.e("suggestion-item")}[tabindex="-1"]`));break}case Ge.enter:pe.click();break;case Ge.esc:case Ge.tab:W(!1);break}},ke=()=>{const ye=k.value,pe=ye[ye.length-1];r=w.value?0:r+1,!(!pe||!r)&&(pe.hitState?ne(pe):pe.hitState=!0)},xe=Yn(()=>{const{value:ye}=D;if(!ye)return;const pe=e.beforeFilter(ye);hs(pe)?pe.then(Q).catch(()=>{}):pe!==!1?Q():q()},e.debounce),Ie=(ye,pe)=>{!y.value&&W(!0),!(pe!=null&&pe.isComposing)&&(ye?xe():q())};return ce(C,Y),ce([z,_],oe),ce(k,()=>{Ne(()=>de())}),ce(M,ye=>S.value=ye,{immediate:!0}),et(()=>{var ye;const pe=(ye=f.value)==null?void 0:ye.$el;n=(pe==null?void 0:pe.offsetHeight)||Are[L.value]||Nre,wc(pe,de)}),Lt(()=>{var ye;Sc((ye=f.value)==null?void 0:ye.$el,de)}),{popperOptions:Dre,tooltipRef:d,popperPaneRef:K,input:f,tagWrapper:p,panel:v,suggestionPanel:m,popperVisible:y,inputHover:b,inputPlaceholder:I,filtering:C,presentText:M,checkedValue:E,inputValue:S,searchInputValue:w,presentTags:k,allPresentTags:$,suggestions:O,isDisabled:_,isOnComposition:T,realSize:L,tagSize:j,multiple:F,readonly:N,clearBtnVisible:B,compatTeleported:a,nsCascader:o,nsInput:i,t:l,togglePopperVisible:W,hideSuggestionPanel:q,deleteTag:ne,focusFirstNode:ae,getCheckedNodes:be,handleExpandChange:Ee,handleKeyDown:Be,handleComposition:Pe,handleClear:te,handleSuggestionClick:ie,handleSuggestionKeyDown:ge,handleDelete:ke,handleInput:Ie}}}),Lre={key:0},Fre={class:"el-cascader__collapse-tags"},Bre=["placeholder"],Vre=["onClick"];function zre(e,t,n,r,a,o){const i=we("circle-close"),l=we("el-icon"),s=we("arrow-down"),c=we("el-input"),d=we("el-tag"),f=we("el-tooltip"),p=we("el-cascader-panel"),v=we("check"),m=we("el-scrollbar"),y=pa("clickoutside");return R(),fe(f,{ref:"tooltipRef",visible:e.popperVisible,"onUpdate:visible":t[17]||(t[17]=b=>e.popperVisible=b),teleported:e.compatTeleported,"popper-class":[e.nsCascader.e("dropdown"),e.popperClass],"popper-options":e.popperOptions,"fallback-placements":["bottom-start","bottom","top-start","top","right","left"],"stop-popper-mouse-event":!1,"gpu-acceleration":!1,placement:"bottom-start",transition:`${e.nsCascader.namespace.value}-zoom-in-top`,effect:"light",pure:"",persistent:"",onHide:e.hideSuggestionPanel},{default:re(()=>[at((R(),X("div",{class:U([e.nsCascader.b(),e.nsCascader.m(e.realSize),e.nsCascader.is("disabled",e.isDisabled),e.$attrs.class]),style:Xe(e.$attrs.style),onClick:t[11]||(t[11]=()=>e.togglePopperVisible(e.readonly?void 0:!0)),onKeydown:t[12]||(t[12]=(...b)=>e.handleKeyDown&&e.handleKeyDown(...b)),onMouseenter:t[13]||(t[13]=b=>e.inputHover=!0),onMouseleave:t[14]||(t[14]=b=>e.inputHover=!1)},[g(c,{ref:"input",modelValue:e.inputValue,"onUpdate:modelValue":t[1]||(t[1]=b=>e.inputValue=b),placeholder:e.inputPlaceholder,readonly:e.readonly,disabled:e.isDisabled,"validate-event":!1,size:e.realSize,class:U(e.nsCascader.is("focus",e.popperVisible)),onCompositionstart:e.handleComposition,onCompositionupdate:e.handleComposition,onCompositionend:e.handleComposition,onFocus:t[2]||(t[2]=b=>e.$emit("focus",b)),onBlur:t[3]||(t[3]=b=>e.$emit("blur",b)),onInput:e.handleInput},{suffix:re(()=>[e.clearBtnVisible?(R(),fe(l,{key:"clear",class:U([e.nsInput.e("icon"),"icon-circle-close"]),onClick:dt(e.handleClear,["stop"])},{default:re(()=>[g(i)]),_:1},8,["class","onClick"])):(R(),fe(l,{key:"arrow-down",class:U([e.nsInput.e("icon"),"icon-arrow-down",e.nsCascader.is("reverse",e.popperVisible)]),onClick:t[0]||(t[0]=dt(b=>e.togglePopperVisible(),["stop"]))},{default:re(()=>[g(s)]),_:1},8,["class"]))]),_:1},8,["modelValue","placeholder","readonly","disabled","size","class","onCompositionstart","onCompositionupdate","onCompositionend","onInput"]),e.multiple?(R(),X("div",{key:0,ref:"tagWrapper",class:U(e.nsCascader.e("tags"))},[(R(!0),X(Fe,null,Rt(e.presentTags,b=>(R(),fe(d,{key:b.key,type:e.tagType,size:e.tagSize,hit:b.hitState,closable:b.closable,"disable-transitions":"",onClose:C=>e.deleteTag(b)},{default:re(()=>[b.isCollapseTag===!1?(R(),X("span",Lre,Me(b.text),1)):(R(),fe(f,{key:1,teleported:!1,disabled:e.popperVisible||!e.collapseTagsTooltip,"fallback-placements":["bottom","top","right","left"],placement:"bottom",effect:"light"},{default:re(()=>[Z("span",null,Me(b.text),1)]),content:re(()=>[Z("div",Fre,[(R(!0),X(Fe,null,Rt(e.allPresentTags,(C,S)=>(R(),X("div",{key:S,class:"el-cascader__collapse-tag"},[(R(),fe(d,{key:C.key,class:"in-tooltip",type:e.tagType,size:e.tagSize,hit:C.hitState,closable:C.closable,"disable-transitions":"",onClose:w=>e.deleteTag(C)},{default:re(()=>[Z("span",null,Me(C.text),1)]),_:2},1032,["type","size","hit","closable","onClose"]))]))),128))])]),_:2},1032,["disabled"]))]),_:2},1032,["type","size","hit","closable","onClose"]))),128)),e.filterable&&!e.isDisabled?at((R(),X("input",{key:0,"onUpdate:modelValue":t[4]||(t[4]=b=>e.searchInputValue=b),type:"text",class:U(e.nsCascader.e("search-input")),placeholder:e.presentText?"":e.inputPlaceholder,onInput:t[5]||(t[5]=b=>e.handleInput(e.searchInputValue,b)),onClick:t[6]||(t[6]=dt(b=>e.togglePopperVisible(!0),["stop"])),onKeydown:t[7]||(t[7]=It((...b)=>e.handleDelete&&e.handleDelete(...b),["delete"])),onCompositionstart:t[8]||(t[8]=(...b)=>e.handleComposition&&e.handleComposition(...b)),onCompositionupdate:t[9]||(t[9]=(...b)=>e.handleComposition&&e.handleComposition(...b)),onCompositionend:t[10]||(t[10]=(...b)=>e.handleComposition&&e.handleComposition(...b))},null,42,Bre)),[[fC,e.searchInputValue]]):se("v-if",!0)],2)):se("v-if",!0)],38)),[[y,()=>e.togglePopperVisible(!1),e.popperPaneRef]])]),content:re(()=>[at(g(p,{ref:"panel",modelValue:e.checkedValue,"onUpdate:modelValue":t[15]||(t[15]=b=>e.checkedValue=b),options:e.options,props:e.props,border:!1,"render-label":e.$slots.default,onExpandChange:e.handleExpandChange,onClose:t[16]||(t[16]=b=>e.$nextTick(()=>e.togglePopperVisible(!1)))},null,8,["modelValue","options","props","render-label","onExpandChange"]),[[_t,!e.filtering]]),e.filterable?at((R(),fe(m,{key:0,ref:"suggestionPanel",tag:"ul",class:U(e.nsCascader.e("suggestion-panel")),"view-class":e.nsCascader.e("suggestion-list"),onKeydown:e.handleSuggestionKeyDown},{default:re(()=>[e.suggestions.length?(R(!0),X(Fe,{key:0},Rt(e.suggestions,b=>(R(),X("li",{key:b.uid,class:U([e.nsCascader.e("suggestion-item"),e.nsCascader.is("checked",b.checked)]),tabindex:-1,onClick:C=>e.handleSuggestionClick(b)},[Z("span",null,Me(b.text),1),b.checked?(R(),fe(l,{key:0},{default:re(()=>[g(v)]),_:1})):se("v-if",!0)],10,Vre))),128)):Oe(e.$slots,"empty",{key:1},()=>[Z("li",{class:U(e.nsCascader.e("empty-text"))},Me(e.t("el.cascader.noMatch")),3)])]),_:3},8,["class","view-class","onKeydown"])),[[_t,e.filtering]]):se("v-if",!0)]),_:3},8,["visible","teleported","popper-class","popper-options","transition","onHide"])}var gp=Ae(Rre,[["render",zre],["__file","/home/runner/work/element-plus/element-plus/packages/components/cascader/src/index.vue"]]);gp.install=e=>{e.component(gp.name,gp)};const Hre=gp,jre=Hre,Kre=Ze({checked:{type:Boolean,default:!1}}),Wre={"update:checked":e=>yr(e),change:e=>yr(e)},Ure={name:"ElCheckTag"},Yre=G(Ke(Te({},Ure),{props:Kre,emits:Wre,setup(e,{emit:t}){const n=e,r=De("check-tag"),a=()=>{const o=!n.checked;t("change",o),t("update:checked",o)};return(o,i)=>(R(),X("span",{class:U([A(r).b(),A(r).is("checked",o.checked)]),onClick:a},[Oe(o.$slots,"default")],2))}}));var qre=Ae(Yre,[["__file","/home/runner/work/element-plus/element-plus/packages/components/check-tag/src/check-tag.vue"]]);const Gre=xt(qre),Xre=Ze({tag:{type:String,default:"div"},span:{type:Number,default:24},offset:{type:Number,default:0},pull:{type:Number,default:0},push:{type:Number,default:0},xs:{type:Le([Number,Object]),default:()=>xn({})},sm:{type:Le([Number,Object]),default:()=>xn({})},md:{type:Le([Number,Object]),default:()=>xn({})},lg:{type:Le([Number,Object]),default:()=>xn({})},xl:{type:Le([Number,Object]),default:()=>xn({})}});var Zre=G({name:"ElCol",props:Xre,setup(e,{slots:t}){const{gutter:n}=ve(xE,{gutter:x(()=>0)}),r=De("col"),a=x(()=>n.value?{paddingLeft:`${n.value/2}px`,paddingRight:`${n.value/2}px`}:{}),o=x(()=>{const i=[];return["span","offset","pull","push"].forEach(c=>{const d=e[c];typeof d=="number"&&(c==="span"?i.push(r.b(`${e[c]}`)):d>0&&i.push(r.b(`${c}-${e[c]}`)))}),["xs","sm","md","lg","xl"].forEach(c=>{if(typeof e[c]=="number")i.push(r.b(`${c}-${e[c]}`));else if(typeof e[c]=="object"){const d=e[c];Object.keys(d).forEach(f=>{i.push(f!=="span"?r.b(`${c}-${f}-${d[f]}`):r.b(`${c}-${d[f]}`))})}}),n.value&&i.push(r.is("guttered")),i});return()=>g(e.tag,{class:[r.b(),o.value],style:a.value},t)}});const Jre=xt(Zre),F4=e=>typeof Yt(e),Qre=Ze({accordion:Boolean,modelValue:{type:Le([Array,String,Number]),default:()=>xn([])}}),eae={[Pt]:F4,[ir]:F4},tae={name:"ElCollapse"},nae=G(Ke(Te({},tae),{props:Qre,emits:eae,setup(e,{expose:t,emit:n}){const r=e,a=De("collapse"),o=H(Hd(r.modelValue)),i=s=>{o.value=s;const c=r.accordion?o.value[0]:o.value;n(Pt,c),n(ir,c)},l=s=>{if(r.accordion)i([(o.value[0]||o.value[0]===0)&&o.value[0]===s?"":s]);else{const c=[...o.value],d=c.indexOf(s);d>-1?c.splice(d,1):c.push(s),i(c)}};return ce(()=>r.modelValue,()=>o.value=Hd(r.modelValue),{deep:!0}),ot(kE,{activeNames:o,handleItemClick:l}),t({activeNames:o,setActiveNames:i}),(s,c)=>(R(),X("div",{class:U(A(a).b()),role:"tablist","aria-multiselectable":"true"},[Oe(s.$slots,"default")],2))}}));var rae=Ae(nae,[["__file","/home/runner/work/element-plus/element-plus/packages/components/collapse/src/collapse.vue"]]);const aae=G({name:"ElCollapseTransition",setup(){return{ns:De("collapse-transition"),on:{beforeEnter(t){t.dataset||(t.dataset={}),t.dataset.oldPaddingTop=t.style.paddingTop,t.dataset.oldPaddingBottom=t.style.paddingBottom,t.style.maxHeight=0,t.style.paddingTop=0,t.style.paddingBottom=0},enter(t){t.dataset.oldOverflow=t.style.overflow,t.scrollHeight!==0?(t.style.maxHeight=`${t.scrollHeight}px`,t.style.paddingTop=t.dataset.oldPaddingTop,t.style.paddingBottom=t.dataset.oldPaddingBottom):(t.style.maxHeight=0,t.style.paddingTop=t.dataset.oldPaddingTop,t.style.paddingBottom=t.dataset.oldPaddingBottom),t.style.overflow="hidden"},afterEnter(t){t.style.maxHeight="",t.style.overflow=t.dataset.oldOverflow},beforeLeave(t){t.dataset||(t.dataset={}),t.dataset.oldPaddingTop=t.style.paddingTop,t.dataset.oldPaddingBottom=t.style.paddingBottom,t.dataset.oldOverflow=t.style.overflow,t.style.maxHeight=`${t.scrollHeight}px`,t.style.overflow="hidden"},leave(t){t.scrollHeight!==0&&(t.style.maxHeight=0,t.style.paddingTop=0,t.style.paddingBottom=0)},afterLeave(t){t.style.maxHeight="",t.style.overflow=t.dataset.oldOverflow,t.style.paddingTop=t.dataset.oldPaddingTop,t.style.paddingBottom=t.dataset.oldPaddingBottom}}}}});function oae(e,t,n,r,a,o){return R(),fe(Vn,hn({name:e.ns.b()},aF(e.on)),{default:re(()=>[Oe(e.$slots,"default")]),_:3},16,["name"])}var yp=Ae(aae,[["render",oae],["__file","/home/runner/work/element-plus/element-plus/packages/components/collapse-transition/src/collapse-transition.vue"]]);yp.install=e=>{e.component(yp.name,yp)};const Xm=yp,iae=Xm,lae=Ze({title:{type:String,default:""},name:{type:Le([String,Number]),default:()=>Pf()},disabled:Boolean}),sae=["aria-expanded","aria-controls","aria-describedby"],uae=["id","tabindex","onKeypress"],cae=["id","aria-hidden","aria-labelledby"],dae={name:"ElCollapseItem"},fae=G(Ke(Te({},dae),{props:lae,setup(e,{expose:t}){const n=e,r=ve(kE),a=De("collapse"),o=H(!1),i=H(!1),l=H(Pf()),s=x(()=>r==null?void 0:r.activeNames.value.includes(n.name)),c=()=>{setTimeout(()=>{i.value?i.value=!1:o.value=!0},50)},d=()=>{n.disabled||(r==null||r.handleItemClick(n.name),o.value=!1,i.value=!0)},f=()=>{r==null||r.handleItemClick(n.name)};return t({isActive:s}),(p,v)=>(R(),X("div",{class:U([A(a).b("item"),A(a).is("active",A(s)),A(a).is("disabled",p.disabled)])},[Z("div",{role:"tab","aria-expanded":A(s),"aria-controls":A(a).b(`content-${l.value}`),"aria-describedby":A(a).b(`content-${l.value}`)},[Z("div",{id:A(a).b(`head-${l.value}`),class:U([A(a).be("item","header"),A(a).is("active",A(s)),{focusing:o.value}]),role:"button",tabindex:p.disabled?-1:0,onClick:d,onKeypress:It(dt(f,["stop","prevent"]),["space","enter"]),onFocus:c,onBlur:v[0]||(v[0]=m=>o.value=!1)},[Oe(p.$slots,"title",{},()=>[yt(Me(p.title),1)]),g(A(ft),{class:U([A(a).be("item","arrow"),A(a).is("active",A(s))])},{default:re(()=>[g(A(Da))]),_:1},8,["class"])],42,uae)],8,sae),g(A(Xm),null,{default:re(()=>[at(Z("div",{id:A(a).b(`content-${l.value}`),class:U(A(a).be("item","wrap")),role:"tabpanel","aria-hidden":!A(s),"aria-labelledby":A(a).b(`head-${l.value}`)},[Z("div",{class:U(A(a).be("item","content"))},[Oe(p.$slots,"default")],2)],10,cae),[[_t,A(s)]])]),_:3})],2))}}));var XM=Ae(fae,[["__file","/home/runner/work/element-plus/element-plus/packages/components/collapse/src/collapse-item.vue"]]);const hae=xt(rae,{CollapseItem:XM}),pae=En(XM);let fy=!1;function Gd(e,t){if(!Bt)return;const n=function(o){var i;(i=t.drag)==null||i.call(t,o)},r=function(o){var i;Bn(document,"mousemove",n),Bn(document,"mouseup",r),Bn(document,"touchmove",n),Bn(document,"touchend",r),document.onselectstart=null,document.ondragstart=null,fy=!1,(i=t.end)==null||i.call(t,o)},a=function(o){var i;fy||(o.preventDefault(),document.onselectstart=()=>!1,document.ondragstart=()=>!1,gn(document,"mousemove",n),gn(document,"mouseup",r),gn(document,"touchmove",n),gn(document,"touchend",r),fy=!0,(i=t.start)==null||i.call(t,o))};gn(e,"mousedown",a),gn(e,"touchstart",a)}const vae=G({name:"ElColorAlphaSlider",props:{color:{type:Object,required:!0},vertical:{type:Boolean,default:!1}},setup(e){const t=$t(),n=Qn(null),r=Qn(null),a=H(0),o=H(0),i=H(null);ce(()=>e.color.get("alpha"),()=>{p()}),ce(()=>e.color.value,()=>{p()});function l(){if(e.vertical)return 0;const v=t.vnode.el,m=e.color.get("alpha");return v?Math.round(m*(v.offsetWidth-n.value.offsetWidth/2)/100):0}function s(){const v=t.vnode.el;if(!e.vertical)return 0;const m=e.color.get("alpha");return v?Math.round(m*(v.offsetHeight-n.value.offsetHeight/2)/100):0}function c(){if(e.color&&e.color.value){const{r:v,g:m,b:y}=e.color.toRgb();return`linear-gradient(to right, rgba(${v}, ${m}, ${y}, 0) 0%, rgba(${v}, ${m}, ${y}, 1) 100%)`}return null}function d(v){v.target!==n.value&&f(v)}function f(v){const y=t.vnode.el.getBoundingClientRect(),{clientX:b,clientY:C}=AC(v);if(e.vertical){let S=C-y.top;S=Math.max(n.value.offsetHeight/2,S),S=Math.min(S,y.height-n.value.offsetHeight/2),e.color.set("alpha",Math.round((S-n.value.offsetHeight/2)/(y.height-n.value.offsetHeight)*100))}else{let S=b-y.left;S=Math.max(n.value.offsetWidth/2,S),S=Math.min(S,y.width-n.value.offsetWidth/2),e.color.set("alpha",Math.round((S-n.value.offsetWidth/2)/(y.width-n.value.offsetWidth)*100))}}function p(){a.value=l(),o.value=s(),i.value=c()}return et(()=>{const v={drag:m=>{f(m)},end:m=>{f(m)}};Gd(r.value,v),Gd(n.value,v),p()}),{thumb:n,bar:r,thumbLeft:a,thumbTop:o,background:i,handleClick:d,update:p}}});function mae(e,t,n,r,a,o){return R(),X("div",{class:U(["el-color-alpha-slider",{"is-vertical":e.vertical}])},[Z("div",{ref:"bar",class:"el-color-alpha-slider__bar",style:Xe({background:e.background}),onClick:t[0]||(t[0]=(...i)=>e.handleClick&&e.handleClick(...i))},null,4),Z("div",{ref:"thumb",class:"el-color-alpha-slider__thumb",style:Xe({left:e.thumbLeft+"px",top:e.thumbTop+"px"})},null,4)],2)}var gae=Ae(vae,[["render",mae],["__file","/home/runner/work/element-plus/element-plus/packages/components/color-picker/src/components/alpha-slider.vue"]]);const yae=G({name:"ElColorHueSlider",props:{color:{type:Object,required:!0},vertical:Boolean},setup(e){const t=$t(),n=H(null),r=H(null),a=H(0),o=H(0),i=x(()=>e.color.get("hue"));ce(()=>i.value,()=>{f()});function l(p){p.target!==n.value&&s(p)}function s(p){const m=t.vnode.el.getBoundingClientRect(),{clientX:y,clientY:b}=AC(p);let C;if(e.vertical){let S=b-m.top;S=Math.min(S,m.height-n.value.offsetHeight/2),S=Math.max(n.value.offsetHeight/2,S),C=Math.round((S-n.value.offsetHeight/2)/(m.height-n.value.offsetHeight)*360)}else{let S=y-m.left;S=Math.min(S,m.width-n.value.offsetWidth/2),S=Math.max(n.value.offsetWidth/2,S),C=Math.round((S-n.value.offsetWidth/2)/(m.width-n.value.offsetWidth)*360)}e.color.set("hue",C)}function c(){const p=t.vnode.el;if(e.vertical)return 0;const v=e.color.get("hue");return p?Math.round(v*(p.offsetWidth-n.value.offsetWidth/2)/360):0}function d(){const p=t.vnode.el;if(!e.vertical)return 0;const v=e.color.get("hue");return p?Math.round(v*(p.offsetHeight-n.value.offsetHeight/2)/360):0}function f(){a.value=c(),o.value=d()}return et(()=>{const p={drag:v=>{s(v)},end:v=>{s(v)}};Gd(r.value,p),Gd(n.value,p),f()}),{bar:r,thumb:n,thumbLeft:a,thumbTop:o,hueValue:i,handleClick:l,update:f}}});function bae(e,t,n,r,a,o){return R(),X("div",{class:U(["el-color-hue-slider",{"is-vertical":e.vertical}])},[Z("div",{ref:"bar",class:"el-color-hue-slider__bar",onClick:t[0]||(t[0]=(...i)=>e.handleClick&&e.handleClick(...i))},null,512),Z("div",{ref:"thumb",class:"el-color-hue-slider__thumb",style:Xe({left:e.thumbLeft+"px",top:e.thumbTop+"px"})},null,4)],2)}var Cae=Ae(yae,[["render",bae],["__file","/home/runner/work/element-plus/element-plus/packages/components/color-picker/src/components/hue-slider.vue"]]);const ZM=Symbol(),wae=()=>ve(ZM),B4=function(e,t,n){return[e,t*n/((e=(2-t)*n)<1?e:2-e)||0,e/2]},Sae=function(e){return typeof e=="string"&&e.includes(".")&&Number.parseFloat(e)===1},kae=function(e){return typeof e=="string"&&e.includes("%")},xu=function(e,t){Sae(e)&&(e="100%");const n=kae(e);return e=Math.min(t,Math.max(0,Number.parseFloat(`${e}`))),n&&(e=Number.parseInt(`${e*t}`,10)/100),Math.abs(e-t)<1e-6?1:e%t/Number.parseFloat(t)},V4={10:"A",11:"B",12:"C",13:"D",14:"E",15:"F"},bp=function(e){e=Math.min(Math.round(e),255);const t=Math.floor(e/16),n=e%16;return`${V4[t]||t}${V4[n]||n}`},z4=function({r:e,g:t,b:n}){return Number.isNaN(+e)||Number.isNaN(+t)||Number.isNaN(+n)?"":`#${bp(e)}${bp(t)}${bp(n)}`},hy={A:10,B:11,C:12,D:13,E:14,F:15},$l=function(e){return e.length===2?(hy[e[0].toUpperCase()]||+e[0])*16+(hy[e[1].toUpperCase()]||+e[1]):hy[e[1].toUpperCase()]||+e[1]},$ae=function(e,t,n){t=t/100,n=n/100;let r=t;const a=Math.max(n,.01);n*=2,t*=n<=1?n:2-n,r*=a<=1?a:2-a;const o=(n+t)/2,i=n===0?2*r/(a+r):2*t/(n+t);return{h:e,s:i*100,v:o*100}},H4=function(e,t,n){e=xu(e,255),t=xu(t,255),n=xu(n,255);const r=Math.max(e,t,n),a=Math.min(e,t,n);let o;const i=r,l=r-a,s=r===0?0:l/r;if(r===a)o=0;else{switch(r){case e:{o=(t-n)/l+(t<n?6:0);break}case t:{o=(n-e)/l+2;break}case n:{o=(e-t)/l+4;break}}o/=6}return{h:o*360,s:s*100,v:i*100}},Vc=function(e,t,n){e=xu(e,360)*6,t=xu(t,100),n=xu(n,100);const r=Math.floor(e),a=e-r,o=n*(1-t),i=n*(1-a*t),l=n*(1-(1-a)*t),s=r%6,c=[n,i,o,o,l,n][s],d=[l,n,n,i,o,o][s],f=[o,o,l,n,n,i][s];return{r:Math.round(c*255),g:Math.round(d*255),b:Math.round(f*255)}};class yd{constructor(t){this._hue=0,this._saturation=100,this._value=100,this._alpha=100,this.enableAlpha=!1,this.format="hex",this.value="",t=t||{};for(const n in t)Mt(t,n)&&(this[n]=t[n]);t.value?this.fromString(t.value):this.doOnChange()}set(t,n){if(arguments.length===1&&typeof t=="object"){for(const r in t)Mt(t,r)&&this.set(r,t[r]);return}this[`_${t}`]=n,this.doOnChange()}get(t){return t==="alpha"?Math.floor(this[`_${t}`]):this[`_${t}`]}toRgb(){return Vc(this._hue,this._saturation,this._value)}fromString(t){if(!t){this._hue=0,this._saturation=100,this._value=100,this.doOnChange();return}const n=(r,a,o)=>{this._hue=Math.max(0,Math.min(360,r)),this._saturation=Math.max(0,Math.min(100,a)),this._value=Math.max(0,Math.min(100,o)),this.doOnChange()};if(t.includes("hsl")){const r=t.replace(/hsla|hsl|\(|\)/gm,"").split(/\s|,/g).filter(a=>a!=="").map((a,o)=>o>2?Number.parseFloat(a):Number.parseInt(a,10));if(r.length===4?this._alpha=Number.parseFloat(r[3])*100:r.length===3&&(this._alpha=100),r.length>=3){const{h:a,s:o,v:i}=$ae(r[0],r[1],r[2]);n(a,o,i)}}else if(t.includes("hsv")){const r=t.replace(/hsva|hsv|\(|\)/gm,"").split(/\s|,/g).filter(a=>a!=="").map((a,o)=>o>2?Number.parseFloat(a):Number.parseInt(a,10));r.length===4?this._alpha=Number.parseFloat(r[3])*100:r.length===3&&(this._alpha=100),r.length>=3&&n(r[0],r[1],r[2])}else if(t.includes("rgb")){const r=t.replace(/rgba|rgb|\(|\)/gm,"").split(/\s|,/g).filter(a=>a!=="").map((a,o)=>o>2?Number.parseFloat(a):Number.parseInt(a,10));if(r.length===4?this._alpha=Number.parseFloat(r[3])*100:r.length===3&&(this._alpha=100),r.length>=3){const{h:a,s:o,v:i}=H4(r[0],r[1],r[2]);n(a,o,i)}}else if(t.includes("#")){const r=t.replace("#","").trim();if(!/^[0-9a-fA-F]{3}$|^[0-9a-fA-F]{6}$|^[0-9a-fA-F]{8}$/.test(r))return;let a,o,i;r.length===3?(a=$l(r[0]+r[0]),o=$l(r[1]+r[1]),i=$l(r[2]+r[2])):(r.length===6||r.length===8)&&(a=$l(r.slice(0,2)),o=$l(r.slice(2,4)),i=$l(r.slice(4,6))),r.length===8?this._alpha=$l(r.slice(6))/255*100:(r.length===3||r.length===6)&&(this._alpha=100);const{h:l,s,v:c}=H4(a,o,i);n(l,s,c)}}compare(t){return Math.abs(t._hue-this._hue)<2&&Math.abs(t._saturation-this._saturation)<1&&Math.abs(t._value-this._value)<1&&Math.abs(t._alpha-this._alpha)<1}doOnChange(){const{_hue:t,_saturation:n,_value:r,_alpha:a,format:o}=this;if(this.enableAlpha)switch(o){case"hsl":{const i=B4(t,n/100,r/100);this.value=`hsla(${t}, ${Math.round(i[1]*100)}%, ${Math.round(i[2]*100)}%, ${this.get("alpha")/100})`;break}case"hsv":{this.value=`hsva(${t}, ${Math.round(n)}%, ${Math.round(r)}%, ${this.get("alpha")/100})`;break}case"hex":{this.value=`${z4(Vc(t,n,r))}${bp(a*255/100)}`;break}default:{const{r:i,g:l,b:s}=Vc(t,n,r);this.value=`rgba(${i}, ${l}, ${s}, ${this.get("alpha")/100})`}}else switch(o){case"hsl":{const i=B4(t,n/100,r/100);this.value=`hsl(${t}, ${Math.round(i[1]*100)}%, ${Math.round(i[2]*100)}%)`;break}case"hsv":{this.value=`hsv(${t}, ${Math.round(n)}%, ${Math.round(r)}%)`;break}case"rgb":{const{r:i,g:l,b:s}=Vc(t,n,r);this.value=`rgb(${i}, ${l}, ${s})`;break}default:this.value=z4(Vc(t,n,r))}}}const Oae=G({props:{colors:{type:Array,required:!0},color:{type:Object,required:!0}},setup(e){const{currentColor:t}=wae(),n=H(a(e.colors,e.color));ce(()=>t.value,o=>{const i=new yd;i.fromString(o),n.value.forEach(l=>{l.selected=i.compare(l)})}),Wn(()=>{n.value=a(e.colors,e.color)});function r(o){e.color.fromString(e.colors[o])}function a(o,i){return o.map(l=>{const s=new yd;return s.enableAlpha=!0,s.format="rgba",s.fromString(l),s.selected=s.value===i.value,s})}return{rgbaColors:n,handleSelect:r}}}),Pae={class:"el-color-predefine"},Tae={class:"el-color-predefine__colors"},xae=["onClick"];function _ae(e,t,n,r,a,o){return R(),X("div",Pae,[Z("div",Tae,[(R(!0),X(Fe,null,Rt(e.rgbaColors,(i,l)=>(R(),X("div",{key:e.colors[l],class:U(["el-color-predefine__color-selector",{selected:i.selected,"is-alpha":i._alpha<100}]),onClick:s=>e.handleSelect(l)},[Z("div",{style:Xe({backgroundColor:i.value})},null,4)],10,xae))),128))])])}var Eae=Ae(Oae,[["render",_ae],["__file","/home/runner/work/element-plus/element-plus/packages/components/color-picker/src/components/predefine.vue"]]);const Mae=G({name:"ElSlPanel",props:{color:{type:Object,required:!0}},setup(e){const t=$t(),n=H(0),r=H(0),a=H("hsl(0, 100%, 50%)"),o=x(()=>{const s=e.color.get("hue"),c=e.color.get("value");return{hue:s,value:c}});function i(){const s=e.color.get("saturation"),c=e.color.get("value"),d=t.vnode.el,{clientWidth:f,clientHeight:p}=d;r.value=s*f/100,n.value=(100-c)*p/100,a.value=`hsl(${e.color.get("hue")}, 100%, 50%)`}function l(s){const d=t.vnode.el.getBoundingClientRect(),{clientX:f,clientY:p}=AC(s);let v=f-d.left,m=p-d.top;v=Math.max(0,v),v=Math.min(v,d.width),m=Math.max(0,m),m=Math.min(m,d.height),r.value=v,n.value=m,e.color.set({saturation:v/d.width*100,value:100-m/d.height*100})}return ce(()=>o.value,()=>{i()}),et(()=>{Gd(t.vnode.el,{drag:s=>{l(s)},end:s=>{l(s)}}),i()}),{cursorTop:n,cursorLeft:r,background:a,colorValue:o,handleDrag:l,update:i}}}),Iae=Z("div",{class:"el-color-svpanel__white"},null,-1),Nae=Z("div",{class:"el-color-svpanel__black"},null,-1),Aae=Z("div",null,null,-1),Dae=[Aae];function Rae(e,t,n,r,a,o){return R(),X("div",{class:"el-color-svpanel",style:Xe({backgroundColor:e.background})},[Iae,Nae,Z("div",{class:"el-color-svpanel__cursor",style:Xe({top:e.cursorTop+"px",left:e.cursorLeft+"px"})},Dae,4)],4)}var Lae=Ae(Mae,[["render",Rae],["__file","/home/runner/work/element-plus/element-plus/packages/components/color-picker/src/components/sv-panel.vue"]]);const Fae=G({name:"ElColorPicker",components:{ElButton:xa,ElTooltip:Ur,ElInput:Ra,ElIcon:ft,Close:Ma,ArrowDown:_s,SvPanel:Lae,HueSlider:Cae,AlphaSlider:gae,Predefine:Eae},directives:{ClickOutside:Is},props:{modelValue:String,showAlpha:Boolean,colorFormat:String,disabled:Boolean,size:{type:String,validator:va},popperClass:String,predefine:Array},emits:["change","active-change",Pt],setup(e,{emit:t}){const{t:n}=ln(),r=De("color"),a=ve(ga,{}),o=ve(Ia,{}),i=H(null),l=H(null),s=H(null),c=H(null),d=bt(new yd({enableAlpha:e.showAlpha,format:e.colorFormat,value:e.modelValue})),f=H(!1),p=H(!1),v=H(""),m=x(()=>!e.modelValue&&!p.value?"transparent":S(d,e.showAlpha)),y=Gn(),b=x(()=>!!(e.disabled||a.disabled)),C=x(()=>!e.modelValue&&!p.value?"":d.value);ce(()=>e.modelValue,j=>{j?j&&j!==d.value&&d.fromString(j):p.value=!1}),ce(()=>C.value,j=>{v.value=j,t("active-change",j)}),ce(()=>d.value,()=>{!e.modelValue&&!p.value&&(p.value=!0)});function S(j,F){if(!(j instanceof yd))throw new TypeError("color should be instance of _color Class");const{r:N,g:D,b:z}=j.toRgb();return F?`rgba(${N}, ${D}, ${z}, ${j.get("alpha")/100})`:`rgb(${N}, ${D}, ${z})`}function w(j){f.value=j}const k=Yn(w,100);function $(){k(!1),O()}function O(){Ne(()=>{e.modelValue?d.fromString(e.modelValue):p.value=!1})}function T(){b.value||k(!f.value)}function _(){d.fromString(v.value)}function I(){var j;const F=d.value;t(Pt,F),t("change",F),(j=o.validate)==null||j.call(o,"change").catch(N=>void 0),k(!1),Ne(()=>{const N=new yd({enableAlpha:e.showAlpha,format:e.colorFormat,value:e.modelValue});d.compare(N)||O()})}function L(){var j;k(!1),t(Pt,null),t("change",null),e.modelValue!==null&&((j=o.validate)==null||j.call(o,"change").catch(F=>void 0)),O()}return et(()=>{e.modelValue&&(v.value=C.value)}),ce(()=>f.value,()=>{Ne(()=>{var j,F,N;(j=i.value)==null||j.update(),(F=l.value)==null||F.update(),(N=s.value)==null||N.update()})}),ot(ZM,{currentColor:C}),{color:d,colorDisabled:b,colorSize:y,displayedColor:m,showPanelColor:p,showPicker:f,customInput:v,handleConfirm:_,hide:$,handleTrigger:T,clear:L,confirmValue:I,t:n,ns:r,hue:i,svPanel:l,alpha:s,popper:c}}});function Bae(e,t,n,r,a,o){const i=we("hue-slider"),l=we("sv-panel"),s=we("alpha-slider"),c=we("predefine"),d=we("el-input"),f=we("el-button"),p=we("arrow-down"),v=we("el-icon"),m=we("close"),y=we("el-tooltip"),b=pa("click-outside");return R(),fe(y,{ref:"popper",visible:e.showPicker,"onUpdate:visible":t[2]||(t[2]=C=>e.showPicker=C),"show-arrow":!1,"fallback-placements":["bottom","top","right","left"],offset:0,"gpu-acceleration":!1,"popper-class":[e.ns.be("picker","panel"),e.ns.b("dropdown"),e.popperClass],"stop-popper-mouse-event":!1,effect:"light",trigger:"click",transition:"el-zoom-in-top",persistent:""},{content:re(()=>[at((R(),X("div",null,[Z("div",{class:U(e.ns.be("dropdown","main-wrapper"))},[g(i,{ref:"hue",class:"hue-slider",color:e.color,vertical:""},null,8,["color"]),g(l,{ref:"svPanel",color:e.color},null,8,["color"])],2),e.showAlpha?(R(),fe(s,{key:0,ref:"alpha",color:e.color},null,8,["color"])):se("v-if",!0),e.predefine?(R(),fe(c,{key:1,ref:"predefine",color:e.color,colors:e.predefine},null,8,["color","colors"])):se("v-if",!0),Z("div",{class:U(e.ns.be("dropdown","btns"))},[Z("span",{class:U(e.ns.be("dropdown","value"))},[g(d,{modelValue:e.customInput,"onUpdate:modelValue":t[0]||(t[0]=C=>e.customInput=C),"validate-event":!1,size:"small",onKeyup:It(e.handleConfirm,["enter"]),onBlur:e.handleConfirm},null,8,["modelValue","onKeyup","onBlur"])],2),g(f,{size:"small",type:"text",class:U(e.ns.be("dropdown","link-btn")),onClick:e.clear},{default:re(()=>[yt(Me(e.t("el.colorpicker.clear")),1)]),_:1},8,["class","onClick"]),g(f,{plain:"",size:"small",class:U(e.ns.be("dropdown","btn")),onClick:e.confirmValue},{default:re(()=>[yt(Me(e.t("el.colorpicker.confirm")),1)]),_:1},8,["class","onClick"])],2)])),[[b,e.hide]])]),default:re(()=>[Z("div",{class:U([e.ns.b("picker"),e.ns.is("disabled",e.colorDisabled),e.ns.bm("picker",e.colorSize)])},[e.colorDisabled?(R(),X("div",{key:0,class:U(e.ns.be("picker","mask"))},null,2)):se("v-if",!0),Z("div",{class:U(e.ns.be("picker","trigger")),onClick:t[1]||(t[1]=(...C)=>e.handleTrigger&&e.handleTrigger(...C))},[Z("span",{class:U([e.ns.be("picker","color"),e.ns.is("alpha",e.showAlpha)])},[Z("span",{class:U(e.ns.be("picker","color-inner")),style:Xe({backgroundColor:e.displayedColor})},[at(g(v,{class:U([e.ns.be("picker","icon"),e.ns.is("icon-arrow-down")])},{default:re(()=>[g(p)]),_:1},8,["class"]),[[_t,e.modelValue||e.showPanelColor]]),!e.modelValue&&!e.showPanelColor?(R(),fe(v,{key:0,class:U([e.ns.be("picker","empty"),e.ns.is("icon-close")])},{default:re(()=>[g(m)]),_:1},8,["class"])):se("v-if",!0)],6)],2)],2)],2)]),_:1},8,["visible","popper-class"])}var Cp=Ae(Fae,[["render",Bae],["__file","/home/runner/work/element-plus/element-plus/packages/components/color-picker/src/index.vue"]]);Cp.install=e=>{e.component(Cp.name,Cp)};const Vae=Cp,zae=Vae,I0={},Hae=Ze({a11y:{type:Boolean,default:!0},locale:{type:Le(Object)},size:{type:String,values:Bo,default:""},button:{type:Le(Object)},experimentalFeatures:{type:Le(Object)},keyboardNavigation:{type:Boolean,default:!0},message:{type:Le(Object)},zIndex:{type:Number},namespace:{type:String,default:"el"}});var jae=G({name:"ElConfigProvider",props:Hae,setup(e,{slots:t}){ce(()=>e.message,r=>{Object.assign(I0,r!=null?r:{})},{immediate:!0,deep:!0});const n=AE(e);return()=>Oe(t,"default",{config:n==null?void 0:n.value})}});const Kae=xt(jae),Wae=G({name:"ElContainer",props:{direction:{type:String,default:""}},setup(e,{slots:t}){const n=De("container");return{isVertical:x(()=>e.direction==="vertical"?!0:e.direction==="horizontal"?!1:t&&t.default?t.default().some(o=>{const i=o.type.name;return i==="ElHeader"||i==="ElFooter"}):!1),ns:n}}});function Uae(e,t,n,r,a,o){return R(),X("section",{class:U([e.ns.b(),e.ns.is("vertical",e.isVertical)])},[Oe(e.$slots,"default")],2)}var Yae=Ae(Wae,[["render",Uae],["__file","/home/runner/work/element-plus/element-plus/packages/components/container/src/container.vue"]]);const qae=G({name:"ElAside",props:{width:{type:String,default:null}},setup(e){const t=De("aside");return{style:x(()=>e.width?{"--el-aside-width":e.width}:{}),ns:t}}});function Gae(e,t,n,r,a,o){return R(),X("aside",{class:U(e.ns.b()),style:Xe(e.style)},[Oe(e.$slots,"default")],6)}var JM=Ae(qae,[["render",Gae],["__file","/home/runner/work/element-plus/element-plus/packages/components/container/src/aside.vue"]]);const Xae=G({name:"ElFooter",props:{height:{type:String,default:null}},setup(e){const t=De("footer");return{style:x(()=>e.height?{"--el-footer-height":e.height}:{}),ns:t}}});function Zae(e,t,n,r,a,o){return R(),X("footer",{class:U(e.ns.b()),style:Xe(e.style)},[Oe(e.$slots,"default")],6)}var QM=Ae(Xae,[["render",Zae],["__file","/home/runner/work/element-plus/element-plus/packages/components/container/src/footer.vue"]]);const Jae=G({name:"ElHeader",props:{height:{type:String,default:null}},setup(e){const t=De("header");return{style:x(()=>e.height?{"--el-header-height":e.height}:{}),ns:t}}});function Qae(e,t,n,r,a,o){return R(),X("header",{class:U(e.ns.b()),style:Xe(e.style)},[Oe(e.$slots,"default")],6)}var e5=Ae(Jae,[["render",Qae],["__file","/home/runner/work/element-plus/element-plus/packages/components/container/src/header.vue"]]);const eoe=G({name:"ElMain",setup(){return{ns:De("main")}}});function toe(e,t,n,r,a,o){return R(),X("main",{class:U(e.ns.b())},[Oe(e.$slots,"default")],2)}var t5=Ae(eoe,[["render",toe],["__file","/home/runner/work/element-plus/element-plus/packages/components/container/src/main.vue"]]);const noe=xt(Yae,{Aside:JM,Footer:QM,Header:e5,Main:t5}),roe=En(JM),aoe=En(QM),ooe=En(e5),ioe=En(t5);var n5={exports:{}};(function(e,t){(function(n,r){e.exports=r()})(vo,function(){return function(n,r,a){var o=r.prototype,i=o.format;a.en.ordinal=function(l){var s=["th","st","nd","rd"],c=l%100;return"["+l+(s[(c-20)%10]||s[c]||s[0])+"]"},o.format=function(l){var s=this,c=this.$locale();if(!this.isValid())return i.bind(this)(l);var d=this.$utils(),f=(l||"YYYY-MM-DDTHH:mm:ssZ").replace(/\[([^\]]+)]|Q|wo|ww|w|WW|W|zzz|z|gggg|GGGG|Do|X|x|k{1,2}|S/g,function(p){switch(p){case"Q":return Math.ceil((s.$M+1)/3);case"Do":return c.ordinal(s.$D);case"gggg":return s.weekYear();case"GGGG":return s.isoWeekYear();case"wo":return c.ordinal(s.week(),"W");case"w":case"ww":return d.s(s.week(),p==="w"?1:2,"0");case"W":case"WW":return d.s(s.isoWeek(),p==="W"?1:2,"0");case"k":case"kk":return d.s(String(s.$H===0?24:s.$H),p==="k"?1:2,"0");case"X":return Math.floor(s.$d.getTime()/1e3);case"x":return s.$d.getTime();case"z":return"["+s.offsetName()+"]";case"zzz":return"["+s.offsetName("long")+"]";default:return p}});return i.bind(this)(f)}}})})(n5);var loe=n5.exports,r5={exports:{}};(function(e,t){(function(n,r){e.exports=r()})(vo,function(){var n="week",r="year";return function(a,o,i){var l=o.prototype;l.week=function(s){if(s===void 0&&(s=null),s!==null)return this.add(7*(s-this.week()),"day");var c=this.$locale().yearStart||1;if(this.month()===11&&this.date()>25){var d=i(this).startOf(r).add(1,r).date(c),f=i(this).endOf(n);if(d.isBefore(f))return 1}var p=i(this).startOf(r).date(c).startOf(n).subtract(1,"millisecond"),v=this.diff(p,n,!0);return v<0?i(this).startOf("week").week():Math.ceil(v)},l.weeks=function(s){return s===void 0&&(s=null),this.week(s)}}})})(r5);var soe=r5.exports,a5={exports:{}};(function(e,t){(function(n,r){e.exports=r()})(vo,function(){return function(n,r){r.prototype.weekYear=function(){var a=this.month(),o=this.week(),i=this.year();return o===1&&a===11?i+1:a===0&&o>=52?i-1:i}}})})(a5);var uoe=a5.exports,o5={exports:{}};(function(e,t){(function(n,r){e.exports=r()})(vo,function(){return function(n,r,a){r.prototype.dayOfYear=function(o){var i=Math.round((a(this).startOf("day")-a(this).startOf("year"))/864e5)+1;return o==null?i:this.add(o-i,"day")}}})})(o5);var coe=o5.exports,i5={exports:{}};(function(e,t){(function(n,r){e.exports=r()})(vo,function(){return function(n,r){r.prototype.isSameOrAfter=function(a,o){return this.isSame(a,o)||this.isAfter(a,o)}}})})(i5);var doe=i5.exports,l5={exports:{}};(function(e,t){(function(n,r){e.exports=r()})(vo,function(){return function(n,r){r.prototype.isSameOrBefore=function(a,o){return this.isSame(a,o)||this.isBefore(a,o)}}})})(l5);var foe=l5.exports;const s5=Symbol();var hoe=G({name:"ElDatePickerCell",props:Ze({cell:{type:Le(Object)}}),setup(e){const t=ve(s5);return()=>{const n=e.cell;if(t!=null&&t.ctx.slots.default){const r=t.ctx.slots.default(n).filter(a=>a.patchFlag!==-2&&a.type.toString()!=="Symbol(Comment)");if(r.length)return r}return qe("div",{class:"el-date-table-cell"},[qe("span",{class:"el-date-table-cell__text"},[n==null?void 0:n.text])])}}});const poe=G({components:{ElDatePickerCell:hoe},props:{date:{type:Object},minDate:{type:Object},maxDate:{type:Object},parsedValue:{type:[Object,Array]},selectionMode:{type:String,default:"day"},showWeekNumber:{type:Boolean,default:!1},disabledDate:{type:Function},cellClassName:{type:Function},rangeState:{type:Object,default:()=>({endDate:null,selecting:!1})}},emits:["changerange","pick","select"],setup(e,t){const{t:n,lang:r}=ln(),a=H(null),o=H(null),i=H([[],[],[],[],[],[]]),l=e.date.$locale().weekStart||7,s=e.date.locale("en").localeData().weekdaysShort().map(k=>k.toLowerCase()),c=x(()=>l>3?7-l:-l),d=x(()=>{const k=e.date.startOf("month");return k.subtract(k.day()||7,"day")}),f=x(()=>s.concat(s).slice(l,l+7)),p=x(()=>{var k;const $=e.date.startOf("month"),O=$.day()||7,T=$.daysInMonth(),_=$.subtract(1,"month").daysInMonth(),I=c.value,L=i.value;let j=1;const F=e.selectionMode==="dates"?rs(e.parsedValue):[],N=mt().locale(r.value).startOf("day");for(let D=0;D<6;D++){const z=L[D];e.showWeekNumber&&(z[0]||(z[0]={type:"week",text:d.value.add(D*7+1,"day").week()}));for(let B=0;B<7;B++){let M=z[e.showWeekNumber?B+1:B];M||(M={row:D,column:B,type:"normal",inRange:!1,start:!1,end:!1});const E=D*7+B,K=d.value.add(E-I,"day");M.dayjs=K,M.date=K.toDate(),M.timestamp=K.valueOf(),M.type="normal";const W=e.rangeState.endDate||e.maxDate||e.rangeState.selecting&&e.minDate;if(M.inRange=e.minDate&&K.isSameOrAfter(e.minDate,"day")&&W&&K.isSameOrBefore(W,"day")||e.minDate&&K.isSameOrBefore(e.minDate,"day")&&W&&K.isSameOrAfter(W,"day"),(k=e.minDate)!=null&&k.isSameOrAfter(W)?(M.start=W&&K.isSame(W,"day"),M.end=e.minDate&&K.isSame(e.minDate,"day")):(M.start=e.minDate&&K.isSame(e.minDate,"day"),M.end=W&&K.isSame(W,"day")),K.isSame(N,"day")&&(M.type="today"),D>=0&&D<=1){const J=O+I<0?7+O+I:O+I;B+D*7>=J?M.text=j++:(M.text=_-(J-B%7)+1+D*7,M.type="prev-month")}else j<=T?M.text=j++:(M.text=j++-T,M.type="next-month");const q=K.toDate();M.selected=F.find(J=>J.valueOf()===K.valueOf()),M.isSelected=!!M.selected,M.isCurrent=v(M),M.disabled=e.disabledDate&&e.disabledDate(q),M.customClass=e.cellClassName&&e.cellClassName(q),z[e.showWeekNumber?B+1:B]=M}if(e.selectionMode==="week"){const B=e.showWeekNumber?1:0,M=e.showWeekNumber?7:6,E=w(z[B+1]);z[B].inRange=E,z[B].start=E,z[M].inRange=E,z[M].end=E}}return L}),v=k=>e.selectionMode==="day"&&(k.type==="normal"||k.type==="today")&&m(k,e.parsedValue),m=(k,$)=>$?mt($).locale(r.value).isSame(e.date.date(Number(k.text)),"day"):!1,y=k=>{const $=[];return(k.type==="normal"||k.type==="today")&&!k.disabled?($.push("available"),k.type==="today"&&$.push("today")):$.push(k.type),v(k)&&$.push("current"),k.inRange&&(k.type==="normal"||k.type==="today"||e.selectionMode==="week")&&($.push("in-range"),k.start&&$.push("start-date"),k.end&&$.push("end-date")),k.disabled&&$.push("disabled"),k.selected&&$.push("selected"),k.customClass&&$.push(k.customClass),$.join(" ")},b=(k,$)=>{const O=k*7+($-(e.showWeekNumber?1:0))-c.value;return d.value.add(O,"day")},C=k=>{if(!e.rangeState.selecting)return;let $=k.target;if($.tagName==="SPAN"&&($=$.parentNode.parentNode),$.tagName==="DIV"&&($=$.parentNode),$.tagName!=="TD")return;const O=$.parentNode.rowIndex-1,T=$.cellIndex;p.value[O][T].disabled||(O!==a.value||T!==o.value)&&(a.value=O,o.value=T,t.emit("changerange",{selecting:!0,endDate:b(O,T)}))},S=k=>{let $=k.target;for(;$&&$.tagName!=="TD";)$=$.parentNode;if(!$||$.tagName!=="TD")return;const O=$.parentNode.rowIndex-1,T=$.cellIndex,_=p.value[O][T];if(_.disabled||_.type==="week")return;const I=b(O,T);if(e.selectionMode==="range")e.rangeState.selecting?(I>=e.minDate?t.emit("pick",{minDate:e.minDate,maxDate:I}):t.emit("pick",{minDate:I,maxDate:e.minDate}),t.emit("select",!1)):(t.emit("pick",{minDate:I,maxDate:null}),t.emit("select",!0));else if(e.selectionMode==="day")t.emit("pick",I);else if(e.selectionMode==="week"){const L=I.week(),j=`${I.year()}w${L}`;t.emit("pick",{year:I.year(),week:L,value:j,date:I.startOf("week")})}else if(e.selectionMode==="dates"){const L=_.selected?rs(e.parsedValue).filter(j=>j.valueOf()!==I.valueOf()):rs(e.parsedValue).concat([I]);t.emit("pick",L)}},w=k=>{if(e.selectionMode!=="week")return!1;let $=e.date.startOf("day");if(k.type==="prev-month"&&($=$.subtract(1,"month")),k.type==="next-month"&&($=$.add(1,"month")),$=$.date(Number.parseInt(k.text,10)),e.parsedValue&&!Array.isArray(e.parsedValue)){const O=(e.parsedValue.day()-l+7)%7-1;return e.parsedValue.subtract(O,"day").isSame($,"day")}return!1};return{handleMouseMove:C,t:n,rows:p,isWeekActive:w,getCellClasses:y,WEEKS:f,handleClick:S}}}),voe={key:0};function moe(e,t,n,r,a,o){const i=we("el-date-picker-cell");return R(),X("table",{cellspacing:"0",cellpadding:"0",class:U(["el-date-table",{"is-week-mode":e.selectionMode==="week"}]),onClick:t[0]||(t[0]=(...l)=>e.handleClick&&e.handleClick(...l)),onMousemove:t[1]||(t[1]=(...l)=>e.handleMouseMove&&e.handleMouseMove(...l))},[Z("tbody",null,[Z("tr",null,[e.showWeekNumber?(R(),X("th",voe,Me(e.t("el.datepicker.week")),1)):se("v-if",!0),(R(!0),X(Fe,null,Rt(e.WEEKS,(l,s)=>(R(),X("th",{key:s},Me(e.t("el.datepicker.weeks."+l)),1))),128))]),(R(!0),X(Fe,null,Rt(e.rows,(l,s)=>(R(),X("tr",{key:s,class:U(["el-date-table__row",{current:e.isWeekActive(l[1])}])},[(R(!0),X(Fe,null,Rt(l,(c,d)=>(R(),X("td",{key:d,class:U(e.getCellClasses(c))},[g(i,{cell:c},null,8,["cell"])],2))),128))],2))),128))])],34)}var u5=Ae(poe,[["render",moe],["__file","/home/runner/work/element-plus/element-plus/packages/components/date-picker/src/date-picker-com/basic-date-table.vue"]]);const goe=(e,t,n)=>{const r=mt().locale(n).startOf("month").month(t).year(e),a=r.daysInMonth();return tl(a).map(o=>r.add(o,"day").toDate())},yoe=G({props:{disabledDate:{type:Function},selectionMode:{type:String,default:"month"},minDate:{type:Object},maxDate:{type:Object},date:{type:Object},parsedValue:{type:Object},rangeState:{type:Object,default:()=>({endDate:null,selecting:!1})}},emits:["changerange","pick","select"],setup(e,t){const{t:n,lang:r}=ln(),a=H(e.date.locale("en").localeData().monthsShort().map(p=>p.toLowerCase())),o=H([[],[],[]]),i=H(null),l=H(null),s=x(()=>{var p;const v=o.value,m=mt().locale(r.value).startOf("month");for(let y=0;y<3;y++){const b=v[y];for(let C=0;C<4;C++){let S=b[C];S||(S={row:y,column:C,type:"normal",inRange:!1,start:!1,end:!1}),S.type="normal";const w=y*4+C,k=e.date.startOf("year").month(w),$=e.rangeState.endDate||e.maxDate||e.rangeState.selecting&&e.minDate;S.inRange=e.minDate&&k.isSameOrAfter(e.minDate,"month")&&$&&k.isSameOrBefore($,"month")||e.minDate&&k.isSameOrBefore(e.minDate,"month")&&$&&k.isSameOrAfter($,"month"),(p=e.minDate)!=null&&p.isSameOrAfter($)?(S.start=$&&k.isSame($,"month"),S.end=e.minDate&&k.isSame(e.minDate,"month")):(S.start=e.minDate&&k.isSame(e.minDate,"month"),S.end=$&&k.isSame($,"month")),m.isSame(k)&&(S.type="today"),S.text=w;const T=k.toDate();S.disabled=e.disabledDate&&e.disabledDate(T),b[C]=S}}return v});return{handleMouseMove:p=>{if(!e.rangeState.selecting)return;let v=p.target;if(v.tagName==="A"&&(v=v.parentNode.parentNode),v.tagName==="DIV"&&(v=v.parentNode),v.tagName!=="TD")return;const m=v.parentNode.rowIndex,y=v.cellIndex;s.value[m][y].disabled||(m!==i.value||y!==l.value)&&(i.value=m,l.value=y,t.emit("changerange",{selecting:!0,endDate:e.date.startOf("year").month(m*4+y)}))},handleMonthTableClick:p=>{let v=p.target;if(v.tagName==="A"&&(v=v.parentNode.parentNode),v.tagName==="DIV"&&(v=v.parentNode),v.tagName!=="TD"||to(v,"disabled"))return;const m=v.cellIndex,b=v.parentNode.rowIndex*4+m,C=e.date.startOf("year").month(b);e.selectionMode==="range"?e.rangeState.selecting?(C>=e.minDate?t.emit("pick",{minDate:e.minDate,maxDate:C}):t.emit("pick",{minDate:C,maxDate:e.minDate}),t.emit("select",!1)):(t.emit("pick",{minDate:C,maxDate:null}),t.emit("select",!0)):t.emit("pick",b)},rows:s,getCellStyle:p=>{const v={},m=e.date.year(),y=new Date,b=p.text;return v.disabled=e.disabledDate?goe(m,b,r.value).every(e.disabledDate):!1,v.current=rs(e.parsedValue).findIndex(C=>C.year()===m&&C.month()===b)>=0,v.today=y.getFullYear()===m&&y.getMonth()===b,p.inRange&&(v["in-range"]=!0,p.start&&(v["start-date"]=!0),p.end&&(v["end-date"]=!0)),v},t:n,months:a}}}),boe={class:"cell"};function Coe(e,t,n,r,a,o){return R(),X("table",{class:"el-month-table",onClick:t[0]||(t[0]=(...i)=>e.handleMonthTableClick&&e.handleMonthTableClick(...i)),onMousemove:t[1]||(t[1]=(...i)=>e.handleMouseMove&&e.handleMouseMove(...i))},[Z("tbody",null,[(R(!0),X(Fe,null,Rt(e.rows,(i,l)=>(R(),X("tr",{key:l},[(R(!0),X(Fe,null,Rt(i,(s,c)=>(R(),X("td",{key:c,class:U(e.getCellStyle(s))},[Z("div",null,[Z("a",boe,Me(e.t("el.datepicker.months."+e.months[s.text])),1)])],2))),128))]))),128))])],32)}var c5=Ae(yoe,[["render",Coe],["__file","/home/runner/work/element-plus/element-plus/packages/components/date-picker/src/date-picker-com/basic-month-table.vue"]]);const woe=(e,t)=>{const n=mt(String(e)).locale(t).startOf("year"),a=n.endOf("year").dayOfYear();return tl(a).map(o=>n.add(o,"day").toDate())},Soe=G({props:{disabledDate:{type:Function},parsedValue:{type:Object},date:{type:Object}},emits:["pick"],setup(e,t){const{lang:n}=ln();return{startYear:x(()=>Math.floor(e.date.year()/10)*10),getCellStyle:i=>{const l={},s=mt().locale(n.value);return l.disabled=e.disabledDate?woe(i,n.value).every(e.disabledDate):!1,l.current=rs(e.parsedValue).findIndex(c=>c.year()===i)>=0,l.today=s.year()===i,l},handleYearTableClick:i=>{const l=i.target;if(l.tagName==="A"){if(to(l.parentNode,"disabled"))return;const s=l.textContent||l.innerText;t.emit("pick",Number(s))}}}}}),koe={class:"cell"},$oe={class:"cell"},Ooe={class:"cell"},Poe={class:"cell"},Toe={class:"cell"},xoe={class:"cell"},_oe={class:"cell"},Eoe={class:"cell"},Moe={class:"cell"},Ioe={class:"cell"},Noe=Z("td",null,null,-1),Aoe=Z("td",null,null,-1);function Doe(e,t,n,r,a,o){return R(),X("table",{class:"el-year-table",onClick:t[0]||(t[0]=(...i)=>e.handleYearTableClick&&e.handleYearTableClick(...i))},[Z("tbody",null,[Z("tr",null,[Z("td",{class:U(["available",e.getCellStyle(e.startYear+0)])},[Z("a",koe,Me(e.startYear),1)],2),Z("td",{class:U(["available",e.getCellStyle(e.startYear+1)])},[Z("a",$oe,Me(e.startYear+1),1)],2),Z("td",{class:U(["available",e.getCellStyle(e.startYear+2)])},[Z("a",Ooe,Me(e.startYear+2),1)],2),Z("td",{class:U(["available",e.getCellStyle(e.startYear+3)])},[Z("a",Poe,Me(e.startYear+3),1)],2)]),Z("tr",null,[Z("td",{class:U(["available",e.getCellStyle(e.startYear+4)])},[Z("a",Toe,Me(e.startYear+4),1)],2),Z("td",{class:U(["available",e.getCellStyle(e.startYear+5)])},[Z("a",xoe,Me(e.startYear+5),1)],2),Z("td",{class:U(["available",e.getCellStyle(e.startYear+6)])},[Z("a",_oe,Me(e.startYear+6),1)],2),Z("td",{class:U(["available",e.getCellStyle(e.startYear+7)])},[Z("a",Eoe,Me(e.startYear+7),1)],2)]),Z("tr",null,[Z("td",{class:U(["available",e.getCellStyle(e.startYear+8)])},[Z("a",Moe,Me(e.startYear+8),1)],2),Z("td",{class:U(["available",e.getCellStyle(e.startYear+9)])},[Z("a",Ioe,Me(e.startYear+9),1)],2),Noe,Aoe])])])}var Roe=Ae(Soe,[["render",Doe],["__file","/home/runner/work/element-plus/element-plus/packages/components/date-picker/src/date-picker-com/basic-year-table.vue"]]);const Loe=(e,t,n)=>!0,Foe=G({components:{DateTable:u5,ElInput:Ra,ElButton:xa,ElIcon:ft,TimePickPanel:rw,MonthTable:c5,YearTable:Roe,DArrowLeft:Fm,ArrowLeft:Es,DArrowRight:Bm,ArrowRight:Da},directives:{clickoutside:Is},props:{visible:{type:Boolean,default:!1},parsedValue:{type:[Object,Array]},format:{type:String,default:""},type:{type:String,required:!0,validator:mE}},emits:["pick","set-picker-option","panel-change"],setup(e,t){const{t:n,lang:r}=ln(),a=ve("EP_PICKER_BASE"),o=ve(qm),{shortcuts:i,disabledDate:l,cellClassName:s,defaultTime:c,arrowControl:d}=a.props,f=yn(a.props,"defaultValue"),p=H(mt().locale(r.value)),v=x(()=>mt(c).locale(r.value)),m=x(()=>p.value.month()),y=x(()=>p.value.year()),b=H([]),C=H(null),S=H(null),w=ue=>b.value.length>0?Loe(ue,b.value,e.format||"HH:mm:ss"):!0,k=ue=>c&&!Q.value?v.value.year(ue.year()).month(ue.month()).date(ue.date()):W.value?ue.millisecond(0):ue.startOf("day"),$=(ue,...Ce)=>{if(!ue)t.emit("pick",ue,...Ce);else if(Array.isArray(ue)){const je=ue.map(k);t.emit("pick",je,...Ce)}else t.emit("pick",k(ue),...Ce);C.value=null,S.value=null},O=ue=>{if(D.value==="day"){let Ce=e.parsedValue?e.parsedValue.year(ue.year()).month(ue.month()).date(ue.date()):ue;w(Ce)||(Ce=b.value[0][0].year(ue.year()).month(ue.month()).date(ue.date())),p.value=Ce,$(Ce,W.value)}else D.value==="week"?$(ue.date):D.value==="dates"&&$(ue,!0)},T=()=>{p.value=p.value.subtract(1,"month"),pe("month")},_=()=>{p.value=p.value.add(1,"month"),pe("month")},I=()=>{j.value==="year"?p.value=p.value.subtract(10,"year"):p.value=p.value.subtract(1,"year"),pe("year")},L=()=>{j.value==="year"?p.value=p.value.add(10,"year"):p.value=p.value.add(1,"year"),pe("year")},j=H("date"),F=x(()=>{const ue=n("el.datepicker.year");if(j.value==="year"){const Ce=Math.floor(y.value/10)*10;return ue?`${Ce} ${ue} - ${Ce+9} ${ue}`:`${Ce} - ${Ce+9}`}return`${y.value} ${ue}`}),N=ue=>{const Ce=typeof ue.value=="function"?ue.value():ue.value;if(Ce){$(mt(Ce).locale(r.value));return}ue.onClick&&ue.onClick(t)},D=x(()=>["week","month","year","dates"].includes(e.type)?e.type:"day");ce(()=>D.value,ue=>{if(["month","year"].includes(ue)){j.value=ue;return}j.value="date"},{immediate:!0}),ce(()=>j.value,()=>{o==null||o.updatePopper()});const z=x(()=>!!i.length),B=ue=>{p.value=p.value.startOf("month").month(ue),D.value==="month"?$(p.value):j.value="date",pe("month")},M=ue=>{D.value==="year"?(p.value=p.value.startOf("year").year(ue),$(p.value)):(p.value=p.value.year(ue),j.value="month"),pe("year")},E=()=>{j.value="month"},K=()=>{j.value="year"},W=x(()=>e.type==="datetime"||e.type==="datetimerange"),Y=x(()=>W.value||D.value==="dates"),q=()=>{if(D.value==="dates")$(e.parsedValue);else{let ue=e.parsedValue;if(!ue){const Ce=mt(c).locale(r.value),je=xe();ue=Ce.year(je.year()).month(je.month()).date(je.date())}p.value=ue,$(ue)}},J=()=>{const Ce=mt().locale(r.value).toDate();(!l||!l(Ce))&&w(Ce)&&(p.value=mt().locale(r.value),$(p.value))},ne=x(()=>RM(e.format)),oe=x(()=>DM(e.format)),Q=x(()=>{if(S.value)return S.value;if(!(!e.parsedValue&&!f.value))return(e.parsedValue||p.value).format(ne.value)}),ae=x(()=>{if(C.value)return C.value;if(!(!e.parsedValue&&!f.value))return(e.parsedValue||p.value).format(oe.value)}),de=H(!1),be=()=>{de.value=!0},Ee=()=>{de.value=!1},Pe=(ue,Ce,je)=>{const ee=e.parsedValue?e.parsedValue.hour(ue.hour()).minute(ue.minute()).second(ue.second()):ue;p.value=ee,$(p.value,!0),je||(de.value=Ce)},Be=ue=>{const Ce=mt(ue,ne.value).locale(r.value);Ce.isValid()&&w(Ce)&&(p.value=Ce.year(p.value.year()).month(p.value.month()).date(p.value.date()),S.value=null,de.value=!1,$(p.value,!0))},te=ue=>{const Ce=mt(ue,oe.value).locale(r.value);if(Ce.isValid()){if(l&&l(Ce.toDate()))return;p.value=Ce.hour(p.value.hour()).minute(p.value.minute()).second(p.value.second()),C.value=null,$(p.value,!0)}},ie=ue=>mt.isDayjs(ue)&&ue.isValid()&&(l?!l(ue.toDate()):!0),ge=ue=>D.value==="dates"?ue.map(Ce=>Ce.format(e.format)):ue.format(e.format),ke=ue=>mt(ue,e.format).locale(r.value),xe=()=>{const ue=mt(f.value).locale(r.value);if(!f.value){const Ce=v.value;return mt().hour(Ce.hour()).minute(Ce.minute()).second(Ce.second()).locale(r.value)}return ue},Ie=ue=>{const{code:Ce,keyCode:je}=ue,ee=[Ge.up,Ge.down,Ge.left,Ge.right];e.visible&&!de.value&&(ee.includes(Ce)&&(ye(je),ue.stopPropagation(),ue.preventDefault()),Ce===Ge.enter&&C.value===null&&S.value===null&&$(p,!1))},ye=ue=>{const Ce={year:{38:-4,40:4,37:-1,39:1,offset:(ee,me)=>ee.setFullYear(ee.getFullYear()+me)},month:{38:-4,40:4,37:-1,39:1,offset:(ee,me)=>ee.setMonth(ee.getMonth()+me)},week:{38:-1,40:1,37:-1,39:1,offset:(ee,me)=>ee.setDate(ee.getDate()+me*7)},day:{38:-7,40:7,37:-1,39:1,offset:(ee,me)=>ee.setDate(ee.getDate()+me)}},je=p.value.toDate();for(;Math.abs(p.value.diff(je,"year",!0))<1;){const ee=Ce[D.value];if(ee.offset(je,ee[ue]),l&&l(je))continue;const me=mt(je).locale(r.value);p.value=me,t.emit("pick",me,!0);break}},pe=ue=>{t.emit("panel-change",p.value.toDate(),ue,j.value)};return t.emit("set-picker-option",["isValidValue",ie]),t.emit("set-picker-option",["formatToString",ge]),t.emit("set-picker-option",["parseUserInput",ke]),t.emit("set-picker-option",["handleKeydown",Ie]),ce(()=>f.value,ue=>{ue&&(p.value=xe())},{immediate:!0}),ce(()=>e.parsedValue,ue=>{if(ue){if(D.value==="dates"||Array.isArray(ue))return;p.value=ue}else p.value=xe()},{immediate:!0}),{handleTimePick:Pe,handleTimePickClose:Ee,onTimePickerInputFocus:be,timePickerVisible:de,visibleTime:Q,visibleDate:ae,showTime:W,changeToNow:J,onConfirm:q,footerVisible:Y,handleYearPick:M,showMonthPicker:E,showYearPicker:K,handleMonthPick:B,hasShortcuts:z,shortcuts:i,arrowControl:d,disabledDate:l,cellClassName:s,selectionMode:D,handleShortcutClick:N,prevYear_:I,nextYear_:L,prevMonth_:T,nextMonth_:_,innerDate:p,t:n,yearLabel:F,currentView:j,month:m,handleDatePick:O,handleVisibleTimeChange:Be,handleVisibleDateChange:te,timeFormat:ne,userInputTime:S,userInputDate:C}}}),Boe={class:"el-picker-panel__body-wrapper"},Voe={key:0,class:"el-picker-panel__sidebar"},zoe=["onClick"],Hoe={class:"el-picker-panel__body"},joe={key:0,class:"el-date-picker__time-header"},Koe={class:"el-date-picker__editor-wrap"},Woe={class:"el-date-picker__editor-wrap"},Uoe=["aria-label"],Yoe=["aria-label"],qoe=["aria-label"],Goe=["aria-label"],Xoe={class:"el-picker-panel__content"},Zoe={class:"el-picker-panel__footer"};function Joe(e,t,n,r,a,o){const i=we("el-input"),l=we("time-pick-panel"),s=we("d-arrow-left"),c=we("el-icon"),d=we("arrow-left"),f=we("d-arrow-right"),p=we("arrow-right"),v=we("date-table"),m=we("year-table"),y=we("month-table"),b=we("el-button"),C=pa("clickoutside");return R(),X("div",{class:U(["el-picker-panel el-date-picker",[{"has-sidebar":e.$slots.sidebar||e.hasShortcuts,"has-time":e.showTime}]])},[Z("div",Boe,[Oe(e.$slots,"sidebar",{class:"el-picker-panel__sidebar"}),e.hasShortcuts?(R(),X("div",Voe,[(R(!0),X(Fe,null,Rt(e.shortcuts,(S,w)=>(R(),X("button",{key:w,type:"button",class:"el-picker-panel__shortcut",onClick:k=>e.handleShortcutClick(S)},Me(S.text),9,zoe))),128))])):se("v-if",!0),Z("div",Hoe,[e.showTime?(R(),X("div",joe,[Z("span",Koe,[g(i,{placeholder:e.t("el.datepicker.selectDate"),"model-value":e.visibleDate,size:"small",onInput:t[0]||(t[0]=S=>e.userInputDate=S),onChange:e.handleVisibleDateChange},null,8,["placeholder","model-value","onChange"])]),at((R(),X("span",Woe,[g(i,{placeholder:e.t("el.datepicker.selectTime"),"model-value":e.visibleTime,size:"small",onFocus:e.onTimePickerInputFocus,onInput:t[1]||(t[1]=S=>e.userInputTime=S),onChange:e.handleVisibleTimeChange},null,8,["placeholder","model-value","onFocus","onChange"]),g(l,{visible:e.timePickerVisible,format:e.timeFormat,"time-arrow-control":e.arrowControl,"parsed-value":e.innerDate,onPick:e.handleTimePick},null,8,["visible","format","time-arrow-control","parsed-value","onPick"])])),[[C,e.handleTimePickClose]])])):se("v-if",!0),at(Z("div",{class:U(["el-date-picker__header",{"el-date-picker__header--bordered":e.currentView==="year"||e.currentView==="month"}])},[Z("button",{type:"button","aria-label":e.t("el.datepicker.prevYear"),class:"el-picker-panel__icon-btn el-date-picker__prev-btn d-arrow-left",onClick:t[2]||(t[2]=(...S)=>e.prevYear_&&e.prevYear_(...S))},[g(c,null,{default:re(()=>[g(s)]),_:1})],8,Uoe),at(Z("button",{type:"button","aria-label":e.t("el.datepicker.prevMonth"),class:"el-picker-panel__icon-btn el-date-picker__prev-btn arrow-left",onClick:t[3]||(t[3]=(...S)=>e.prevMonth_&&e.prevMonth_(...S))},[g(c,null,{default:re(()=>[g(d)]),_:1})],8,Yoe),[[_t,e.currentView==="date"]]),Z("span",{role:"button",class:"el-date-picker__header-label",onClick:t[4]||(t[4]=(...S)=>e.showYearPicker&&e.showYearPicker(...S))},Me(e.yearLabel),1),at(Z("span",{role:"button",class:U(["el-date-picker__header-label",{active:e.currentView==="month"}]),onClick:t[5]||(t[5]=(...S)=>e.showMonthPicker&&e.showMonthPicker(...S))},Me(e.t(`el.datepicker.month${e.month+1}`)),3),[[_t,e.currentView==="date"]]),Z("button",{type:"button","aria-label":e.t("el.datepicker.nextYear"),class:"el-picker-panel__icon-btn el-date-picker__next-btn d-arrow-right",onClick:t[6]||(t[6]=(...S)=>e.nextYear_&&e.nextYear_(...S))},[g(c,null,{default:re(()=>[g(f)]),_:1})],8,qoe),at(Z("button",{type:"button","aria-label":e.t("el.datepicker.nextMonth"),class:"el-picker-panel__icon-btn el-date-picker__next-btn arrow-right",onClick:t[7]||(t[7]=(...S)=>e.nextMonth_&&e.nextMonth_(...S))},[g(c,null,{default:re(()=>[g(p)]),_:1})],8,Goe),[[_t,e.currentView==="date"]])],2),[[_t,e.currentView!=="time"]]),Z("div",Xoe,[e.currentView==="date"?(R(),fe(v,{key:0,"selection-mode":e.selectionMode,date:e.innerDate,"parsed-value":e.parsedValue,"disabled-date":e.disabledDate,onPick:e.handleDatePick},null,8,["selection-mode","date","parsed-value","disabled-date","onPick"])):se("v-if",!0),e.currentView==="year"?(R(),fe(m,{key:1,date:e.innerDate,"disabled-date":e.disabledDate,"parsed-value":e.parsedValue,onPick:e.handleYearPick},null,8,["date","disabled-date","parsed-value","onPick"])):se("v-if",!0),e.currentView==="month"?(R(),fe(y,{key:2,date:e.innerDate,"parsed-value":e.parsedValue,"disabled-date":e.disabledDate,onPick:e.handleMonthPick},null,8,["date","parsed-value","disabled-date","onPick"])):se("v-if",!0)])])]),at(Z("div",Zoe,[at(g(b,{size:"small",type:"text",class:"el-picker-panel__link-btn",onClick:e.changeToNow},{default:re(()=>[yt(Me(e.t("el.datepicker.now")),1)]),_:1},8,["onClick"]),[[_t,e.selectionMode!=="dates"]]),g(b,{plain:"",size:"small",class:"el-picker-panel__link-btn",onClick:e.onConfirm},{default:re(()=>[yt(Me(e.t("el.datepicker.confirm")),1)]),_:1},8,["onClick"])],512),[[_t,e.footerVisible&&e.currentView==="date"]])],2)}var Qoe=Ae(Foe,[["render",Joe],["__file","/home/runner/work/element-plus/element-plus/packages/components/date-picker/src/date-picker-com/panel-date-pick.vue"]]);const eie=G({directives:{clickoutside:Is},components:{TimePickPanel:rw,DateTable:u5,ElInput:Ra,ElButton:xa,ElIcon:ft,DArrowLeft:Fm,ArrowLeft:Es,DArrowRight:Bm,ArrowRight:Da},props:{unlinkPanels:Boolean,parsedValue:{type:Array},type:{type:String,required:!0,validator:mE}},emits:["pick","set-picker-option","calendar-change","panel-change"],setup(e,t){const{t:n,lang:r}=ln(),a=H(mt().locale(r.value)),o=H(mt().locale(r.value).add(1,"month")),i=H(null),l=H(null),s=H({min:null,max:null}),c=H({min:null,max:null}),d=x(()=>`${a.value.year()} ${n("el.datepicker.year")} ${n(`el.datepicker.month${a.value.month()+1}`)}`),f=x(()=>`${o.value.year()} ${n("el.datepicker.year")} ${n(`el.datepicker.month${o.value.month()+1}`)}`),p=x(()=>a.value.year()),v=x(()=>a.value.month()),m=x(()=>o.value.year()),y=x(()=>o.value.month()),b=x(()=>!!je.length),C=x(()=>s.value.min!==null?s.value.min:i.value?i.value.format(O.value):""),S=x(()=>s.value.max!==null?s.value.max:l.value||i.value?(l.value||i.value).format(O.value):""),w=x(()=>c.value.min!==null?c.value.min:i.value?i.value.format($.value):""),k=x(()=>c.value.max!==null?c.value.max:l.value||i.value?(l.value||i.value).format($.value):""),$=x(()=>RM(He)),O=x(()=>DM(He)),T=()=>{a.value=a.value.subtract(1,"year"),e.unlinkPanels||(o.value=a.value.add(1,"month")),z("year")},_=()=>{a.value=a.value.subtract(1,"month"),e.unlinkPanels||(o.value=a.value.add(1,"month")),z("month")},I=()=>{e.unlinkPanels?o.value=o.value.add(1,"year"):(a.value=a.value.add(1,"year"),o.value=a.value.add(1,"month")),z("year")},L=()=>{e.unlinkPanels?o.value=o.value.add(1,"month"):(a.value=a.value.add(1,"month"),o.value=a.value.add(1,"month")),z("month")},j=()=>{a.value=a.value.add(1,"year"),z("year")},F=()=>{a.value=a.value.add(1,"month"),z("month")},N=()=>{o.value=o.value.subtract(1,"year"),z("year")},D=()=>{o.value=o.value.subtract(1,"month"),z("month")},z=$e=>{t.emit("panel-change",[a.value.toDate(),o.value.toDate()],$e)},B=x(()=>{const $e=(v.value+1)%12,Ve=v.value+1>=12?1:0;return e.unlinkPanels&&new Date(p.value+Ve,$e)<new Date(m.value,y.value)}),M=x(()=>e.unlinkPanels&&m.value*12+y.value-(p.value*12+v.value+1)>=12),E=$e=>Array.isArray($e)&&$e[0]&&$e[1]&&$e[0].valueOf()<=$e[1].valueOf(),K=H({endDate:null,selecting:!1}),W=x(()=>!(i.value&&l.value&&!K.value.selecting&&E([i.value,l.value]))),Y=$e=>{K.value=$e},q=$e=>{K.value.selecting=$e,$e||(K.value.endDate=null)},J=x(()=>e.type==="datetime"||e.type==="datetimerange"),ne=($e=!1)=>{E([i.value,l.value])&&t.emit("pick",[i.value,l.value],$e)},oe=($e,Ve)=>{if(!!$e)return lt?mt(lt[Ve]||lt).locale(r.value).year($e.year()).month($e.month()).date($e.date()):$e},Q=($e,Ve=!0)=>{const st=$e.minDate,Dt=$e.maxDate,Ut=oe(st,0),Ft=oe(Dt,1);l.value===Ft&&i.value===Ut||(t.emit("calendar-change",[st.toDate(),Dt&&Dt.toDate()]),l.value=Ft,i.value=Ut,!(!Ve||J.value)&&ne())},ae=$e=>{const Ve=typeof $e.value=="function"?$e.value():$e.value;if(Ve){t.emit("pick",[mt(Ve[0]).locale(r.value),mt(Ve[1]).locale(r.value)]);return}$e.onClick&&$e.onClick(t)},de=H(!1),be=H(!1),Ee=()=>{de.value=!1},Pe=()=>{be.value=!1},Be=($e,Ve)=>{s.value[Ve]=$e;const st=mt($e,O.value).locale(r.value);if(st.isValid()){if(ee&&ee(st.toDate()))return;Ve==="min"?(a.value=st,i.value=(i.value||a.value).year(st.year()).month(st.month()).date(st.date()),e.unlinkPanels||(o.value=st.add(1,"month"),l.value=i.value.add(1,"month"))):(o.value=st,l.value=(l.value||o.value).year(st.year()).month(st.month()).date(st.date()),e.unlinkPanels||(a.value=st.subtract(1,"month"),i.value=l.value.subtract(1,"month")))}},te=($e,Ve)=>{s.value[Ve]=null},ie=($e,Ve)=>{c.value[Ve]=$e;const st=mt($e,$.value).locale(r.value);st.isValid()&&(Ve==="min"?(de.value=!0,i.value=(i.value||a.value).hour(st.hour()).minute(st.minute()).second(st.second()),(!l.value||l.value.isBefore(i.value))&&(l.value=i.value)):(be.value=!0,l.value=(l.value||o.value).hour(st.hour()).minute(st.minute()).second(st.second()),o.value=l.value,l.value&&l.value.isBefore(i.value)&&(i.value=l.value)))},ge=($e,Ve)=>{c.value[Ve]=null,Ve==="min"?(a.value=i.value,de.value=!1):(o.value=l.value,be.value=!1)},ke=($e,Ve,st)=>{c.value.min||($e&&(a.value=$e,i.value=(i.value||a.value).hour($e.hour()).minute($e.minute()).second($e.second())),st||(de.value=Ve),(!l.value||l.value.isBefore(i.value))&&(l.value=i.value,o.value=$e))},xe=($e,Ve,st)=>{c.value.max||($e&&(o.value=$e,l.value=(l.value||o.value).hour($e.hour()).minute($e.minute()).second($e.second())),st||(be.value=Ve),l.value&&l.value.isBefore(i.value)&&(i.value=l.value))},Ie=()=>{a.value=ue()[0],o.value=a.value.add(1,"month"),t.emit("pick",null)},ye=$e=>Array.isArray($e)?$e.map(Ve=>Ve.format(He)):$e.format(He),pe=$e=>Array.isArray($e)?$e.map(Ve=>mt(Ve,He).locale(r.value)):mt($e,He).locale(r.value),ue=()=>{let $e;if(Array.isArray(_e.value)){const Ve=mt(_e.value[0]);let st=mt(_e.value[1]);return e.unlinkPanels||(st=Ve.add(1,"month")),[Ve,st]}else _e.value?$e=mt(_e.value):$e=mt();return $e=$e.locale(r.value),[$e,$e.add(1,"month")]};t.emit("set-picker-option",["isValidValue",E]),t.emit("set-picker-option",["parseUserInput",pe]),t.emit("set-picker-option",["formatToString",ye]),t.emit("set-picker-option",["handleClear",Ie]);const Ce=ve("EP_PICKER_BASE"),{shortcuts:je,disabledDate:ee,cellClassName:me,format:He,defaultTime:lt,arrowControl:Ye,clearable:he}=Ce.props,_e=yn(Ce.props,"defaultValue");return ce(()=>_e.value,$e=>{if($e){const Ve=ue();i.value=null,l.value=null,a.value=Ve[0],o.value=Ve[1]}},{immediate:!0}),ce(()=>e.parsedValue,$e=>{if($e&&$e.length===2)if(i.value=$e[0],l.value=$e[1],a.value=i.value,e.unlinkPanels&&l.value){const Ve=i.value.year(),st=i.value.month(),Dt=l.value.year(),Ut=l.value.month();o.value=Ve===Dt&&st===Ut?l.value.add(1,"month"):l.value}else o.value=a.value.add(1,"month"),l.value&&(o.value=o.value.hour(l.value.hour()).minute(l.value.minute()).second(l.value.second()));else{const Ve=ue();i.value=null,l.value=null,a.value=Ve[0],o.value=Ve[1]}},{immediate:!0}),{shortcuts:je,disabledDate:ee,cellClassName:me,minTimePickerVisible:de,maxTimePickerVisible:be,handleMinTimeClose:Ee,handleMaxTimeClose:Pe,handleShortcutClick:ae,rangeState:K,minDate:i,maxDate:l,handleRangePick:Q,onSelect:q,handleChangeRange:Y,btnDisabled:W,enableYearArrow:M,enableMonthArrow:B,rightPrevMonth:D,rightPrevYear:N,rightNextMonth:L,rightNextYear:I,leftPrevMonth:_,leftPrevYear:T,leftNextMonth:F,leftNextYear:j,hasShortcuts:b,leftLabel:d,rightLabel:f,leftDate:a,rightDate:o,showTime:J,t:n,minVisibleDate:C,maxVisibleDate:S,minVisibleTime:w,maxVisibleTime:k,arrowControl:Ye,handleDateInput:Be,handleDateChange:te,handleTimeInput:ie,handleTimeChange:ge,handleMinTimePick:ke,handleMaxTimePick:xe,handleClear:Ie,handleConfirm:ne,timeFormat:$,clearable:he}}}),tie={class:"el-picker-panel__body-wrapper"},nie={key:0,class:"el-picker-panel__sidebar"},rie=["onClick"],aie={class:"el-picker-panel__body"},oie={key:0,class:"el-date-range-picker__time-header"},iie={class:"el-date-range-picker__editors-wrap"},lie={class:"el-date-range-picker__time-picker-wrap"},sie={class:"el-date-range-picker__time-picker-wrap"},uie={class:"el-date-range-picker__editors-wrap is-right"},cie={class:"el-date-range-picker__time-picker-wrap"},die={class:"el-date-range-picker__time-picker-wrap"},fie={class:"el-picker-panel__content el-date-range-picker__content is-left"},hie={class:"el-date-range-picker__header"},pie=["disabled"],vie=["disabled"],mie={class:"el-picker-panel__content el-date-range-picker__content is-right"},gie={class:"el-date-range-picker__header"},yie=["disabled"],bie=["disabled"],Cie={key:0,class:"el-picker-panel__footer"};function wie(e,t,n,r,a,o){const i=we("el-input"),l=we("time-pick-panel"),s=we("arrow-right"),c=we("el-icon"),d=we("d-arrow-left"),f=we("arrow-left"),p=we("d-arrow-right"),v=we("date-table"),m=we("el-button"),y=pa("clickoutside");return R(),X("div",{class:U(["el-picker-panel el-date-range-picker",[{"has-sidebar":e.$slots.sidebar||e.hasShortcuts,"has-time":e.showTime}]])},[Z("div",tie,[Oe(e.$slots,"sidebar",{class:"el-picker-panel__sidebar"}),e.hasShortcuts?(R(),X("div",nie,[(R(!0),X(Fe,null,Rt(e.shortcuts,(b,C)=>(R(),X("button",{key:C,type:"button",class:"el-picker-panel__shortcut",onClick:S=>e.handleShortcutClick(b)},Me(b.text),9,rie))),128))])):se("v-if",!0),Z("div",aie,[e.showTime?(R(),X("div",oie,[Z("span",iie,[Z("span",lie,[g(i,{size:"small",disabled:e.rangeState.selecting,placeholder:e.t("el.datepicker.startDate"),class:"el-date-range-picker__editor","model-value":e.minVisibleDate,onInput:t[0]||(t[0]=b=>e.handleDateInput(b,"min")),onChange:t[1]||(t[1]=b=>e.handleDateChange(b,"min"))},null,8,["disabled","placeholder","model-value"])]),at((R(),X("span",sie,[g(i,{size:"small",class:"el-date-range-picker__editor",disabled:e.rangeState.selecting,placeholder:e.t("el.datepicker.startTime"),"model-value":e.minVisibleTime,onFocus:t[2]||(t[2]=b=>e.minTimePickerVisible=!0),onInput:t[3]||(t[3]=b=>e.handleTimeInput(b,"min")),onChange:t[4]||(t[4]=b=>e.handleTimeChange(b,"min"))},null,8,["disabled","placeholder","model-value"]),g(l,{visible:e.minTimePickerVisible,format:e.timeFormat,"datetime-role":"start","time-arrow-control":e.arrowControl,"parsed-value":e.leftDate,onPick:e.handleMinTimePick},null,8,["visible","format","time-arrow-control","parsed-value","onPick"])])),[[y,e.handleMinTimeClose]])]),Z("span",null,[g(c,null,{default:re(()=>[g(s)]),_:1})]),Z("span",uie,[Z("span",cie,[g(i,{size:"small",class:"el-date-range-picker__editor",disabled:e.rangeState.selecting,placeholder:e.t("el.datepicker.endDate"),"model-value":e.maxVisibleDate,readonly:!e.minDate,onInput:t[5]||(t[5]=b=>e.handleDateInput(b,"max")),onChange:t[6]||(t[6]=b=>e.handleDateChange(b,"max"))},null,8,["disabled","placeholder","model-value","readonly"])]),at((R(),X("span",die,[g(i,{size:"small",class:"el-date-range-picker__editor",disabled:e.rangeState.selecting,placeholder:e.t("el.datepicker.endTime"),"model-value":e.maxVisibleTime,readonly:!e.minDate,onFocus:t[7]||(t[7]=b=>e.minDate&&(e.maxTimePickerVisible=!0)),onInput:t[8]||(t[8]=b=>e.handleTimeInput(b,"max")),onChange:t[9]||(t[9]=b=>e.handleTimeChange(b,"max"))},null,8,["disabled","placeholder","model-value","readonly"]),g(l,{"datetime-role":"end",visible:e.maxTimePickerVisible,format:e.timeFormat,"time-arrow-control":e.arrowControl,"parsed-value":e.rightDate,onPick:e.handleMaxTimePick},null,8,["visible","format","time-arrow-control","parsed-value","onPick"])])),[[y,e.handleMaxTimeClose]])])])):se("v-if",!0),Z("div",fie,[Z("div",hie,[Z("button",{type:"button",class:"el-picker-panel__icon-btn d-arrow-left",onClick:t[10]||(t[10]=(...b)=>e.leftPrevYear&&e.leftPrevYear(...b))},[g(c,null,{default:re(()=>[g(d)]),_:1})]),Z("button",{type:"button",class:"el-picker-panel__icon-btn arrow-left",onClick:t[11]||(t[11]=(...b)=>e.leftPrevMonth&&e.leftPrevMonth(...b))},[g(c,null,{default:re(()=>[g(f)]),_:1})]),e.unlinkPanels?(R(),X("button",{key:0,type:"button",disabled:!e.enableYearArrow,class:U([{"is-disabled":!e.enableYearArrow},"el-picker-panel__icon-btn d-arrow-right"]),onClick:t[12]||(t[12]=(...b)=>e.leftNextYear&&e.leftNextYear(...b))},[g(c,null,{default:re(()=>[g(p)]),_:1})],10,pie)):se("v-if",!0),e.unlinkPanels?(R(),X("button",{key:1,type:"button",disabled:!e.enableMonthArrow,class:U([{"is-disabled":!e.enableMonthArrow},"el-picker-panel__icon-btn arrow-right"]),onClick:t[13]||(t[13]=(...b)=>e.leftNextMonth&&e.leftNextMonth(...b))},[g(c,null,{default:re(()=>[g(s)]),_:1})],10,vie)):se("v-if",!0),Z("div",null,Me(e.leftLabel),1)]),g(v,{"selection-mode":"range",date:e.leftDate,"min-date":e.minDate,"max-date":e.maxDate,"range-state":e.rangeState,"disabled-date":e.disabledDate,"cell-class-name":e.cellClassName,onChangerange:e.handleChangeRange,onPick:e.handleRangePick,onSelect:e.onSelect},null,8,["date","min-date","max-date","range-state","disabled-date","cell-class-name","onChangerange","onPick","onSelect"])]),Z("div",mie,[Z("div",gie,[e.unlinkPanels?(R(),X("button",{key:0,type:"button",disabled:!e.enableYearArrow,class:U([{"is-disabled":!e.enableYearArrow},"el-picker-panel__icon-btn d-arrow-left"]),onClick:t[14]||(t[14]=(...b)=>e.rightPrevYear&&e.rightPrevYear(...b))},[g(c,null,{default:re(()=>[g(d)]),_:1})],10,yie)):se("v-if",!0),e.unlinkPanels?(R(),X("button",{key:1,type:"button",disabled:!e.enableMonthArrow,class:U([{"is-disabled":!e.enableMonthArrow},"el-picker-panel__icon-btn arrow-left"]),onClick:t[15]||(t[15]=(...b)=>e.rightPrevMonth&&e.rightPrevMonth(...b))},[g(c,null,{default:re(()=>[g(f)]),_:1})],10,bie)):se("v-if",!0),Z("button",{type:"button",class:"el-picker-panel__icon-btn d-arrow-right",onClick:t[16]||(t[16]=(...b)=>e.rightNextYear&&e.rightNextYear(...b))},[g(c,null,{default:re(()=>[g(p)]),_:1})]),Z("button",{type:"button",class:"el-picker-panel__icon-btn arrow-right",onClick:t[17]||(t[17]=(...b)=>e.rightNextMonth&&e.rightNextMonth(...b))},[g(c,null,{default:re(()=>[g(s)]),_:1})]),Z("div",null,Me(e.rightLabel),1)]),g(v,{"selection-mode":"range",date:e.rightDate,"min-date":e.minDate,"max-date":e.maxDate,"range-state":e.rangeState,"disabled-date":e.disabledDate,"cell-class-name":e.cellClassName,onChangerange:e.handleChangeRange,onPick:e.handleRangePick,onSelect:e.onSelect},null,8,["date","min-date","max-date","range-state","disabled-date","cell-class-name","onChangerange","onPick","onSelect"])])])]),e.showTime?(R(),X("div",Cie,[e.clearable?(R(),fe(m,{key:0,size:"small",type:"text",class:"el-picker-panel__link-btn",onClick:e.handleClear},{default:re(()=>[yt(Me(e.t("el.datepicker.clear")),1)]),_:1},8,["onClick"])):se("v-if",!0),g(m,{plain:"",size:"small",class:"el-picker-panel__link-btn",disabled:e.btnDisabled,onClick:t[18]||(t[18]=b=>e.handleConfirm(!1))},{default:re(()=>[yt(Me(e.t("el.datepicker.confirm")),1)]),_:1},8,["disabled"])])):se("v-if",!0)],2)}var Sie=Ae(eie,[["render",wie],["__file","/home/runner/work/element-plus/element-plus/packages/components/date-picker/src/date-picker-com/panel-date-range.vue"]]);const kie=G({components:{MonthTable:c5,ElIcon:ft,DArrowLeft:Fm,DArrowRight:Bm},props:{unlinkPanels:Boolean,parsedValue:{type:Array}},emits:["pick","set-picker-option"],setup(e,t){const{t:n,lang:r}=ln(),a=H(mt().locale(r.value)),o=H(mt().locale(r.value).add(1,"year")),i=x(()=>!!F.length),l=B=>{const M=typeof B.value=="function"?B.value():B.value;if(M){t.emit("pick",[mt(M[0]).locale(r.value),mt(M[1]).locale(r.value)]);return}B.onClick&&B.onClick(t)},s=()=>{a.value=a.value.subtract(1,"year"),e.unlinkPanels||(o.value=o.value.subtract(1,"year"))},c=()=>{e.unlinkPanels||(a.value=a.value.add(1,"year")),o.value=o.value.add(1,"year")},d=()=>{a.value=a.value.add(1,"year")},f=()=>{o.value=o.value.subtract(1,"year")},p=x(()=>`${a.value.year()} ${n("el.datepicker.year")}`),v=x(()=>`${o.value.year()} ${n("el.datepicker.year")}`),m=x(()=>a.value.year()),y=x(()=>o.value.year()===a.value.year()?a.value.year()+1:o.value.year()),b=x(()=>e.unlinkPanels&&y.value>m.value+1),C=H(null),S=H(null),w=H({endDate:null,selecting:!1}),k=B=>{w.value=B},$=(B,M=!0)=>{const E=B.minDate,K=B.maxDate;S.value===K&&C.value===E||(S.value=K,C.value=E,M&&T())},O=B=>Array.isArray(B)&&B&&B[0]&&B[1]&&B[0].valueOf()<=B[1].valueOf(),T=(B=!1)=>{O([C.value,S.value])&&t.emit("pick",[C.value,S.value],B)},_=B=>{w.value.selecting=B,B||(w.value.endDate=null)},I=B=>B.map(M=>M.format(D)),L=()=>{let B;if(Array.isArray(z.value)){const M=mt(z.value[0]);let E=mt(z.value[1]);return e.unlinkPanels||(E=M.add(1,"year")),[M,E]}else z.value?B=mt(z.value):B=mt();return B=B.locale(r.value),[B,B.add(1,"year")]};t.emit("set-picker-option",["formatToString",I]);const j=ve("EP_PICKER_BASE"),{shortcuts:F,disabledDate:N,format:D}=j.props,z=yn(j.props,"defaultValue");return ce(()=>z.value,B=>{if(B){const M=L();a.value=M[0],o.value=M[1]}},{immediate:!0}),ce(()=>e.parsedValue,B=>{if(B&&B.length===2)if(C.value=B[0],S.value=B[1],a.value=C.value,e.unlinkPanels&&S.value){const M=C.value.year(),E=S.value.year();o.value=M===E?S.value.add(1,"year"):S.value}else o.value=a.value.add(1,"year");else{const M=L();C.value=null,S.value=null,a.value=M[0],o.value=M[1]}},{immediate:!0}),{shortcuts:F,disabledDate:N,onSelect:_,handleRangePick:$,rangeState:w,handleChangeRange:k,minDate:C,maxDate:S,enableYearArrow:b,leftLabel:p,rightLabel:v,leftNextYear:d,leftPrevYear:s,rightNextYear:c,rightPrevYear:f,t:n,leftDate:a,rightDate:o,hasShortcuts:i,handleShortcutClick:l}}}),$ie={class:"el-picker-panel__body-wrapper"},Oie={key:0,class:"el-picker-panel__sidebar"},Pie=["onClick"],Tie={class:"el-picker-panel__body"},xie={class:"el-picker-panel__content el-date-range-picker__content is-left"},_ie={class:"el-date-range-picker__header"},Eie=["disabled"],Mie={class:"el-picker-panel__content el-date-range-picker__content is-right"},Iie={class:"el-date-range-picker__header"},Nie=["disabled"];function Aie(e,t,n,r,a,o){const i=we("d-arrow-left"),l=we("el-icon"),s=we("d-arrow-right"),c=we("month-table");return R(),X("div",{class:U(["el-picker-panel el-date-range-picker",[{"has-sidebar":e.$slots.sidebar||e.hasShortcuts}]])},[Z("div",$ie,[Oe(e.$slots,"sidebar",{class:"el-picker-panel__sidebar"}),e.hasShortcuts?(R(),X("div",Oie,[(R(!0),X(Fe,null,Rt(e.shortcuts,(d,f)=>(R(),X("button",{key:f,type:"button",class:"el-picker-panel__shortcut",onClick:p=>e.handleShortcutClick(d)},Me(d.text),9,Pie))),128))])):se("v-if",!0),Z("div",Tie,[Z("div",xie,[Z("div",_ie,[Z("button",{type:"button",class:"el-picker-panel__icon-btn d-arrow-left",onClick:t[0]||(t[0]=(...d)=>e.leftPrevYear&&e.leftPrevYear(...d))},[g(l,null,{default:re(()=>[g(i)]),_:1})]),e.unlinkPanels?(R(),X("button",{key:0,type:"button",disabled:!e.enableYearArrow,class:U([{"is-disabled":!e.enableYearArrow},"el-picker-panel__icon-btn d-arrow-right"]),onClick:t[1]||(t[1]=(...d)=>e.leftNextYear&&e.leftNextYear(...d))},[g(l,null,{default:re(()=>[g(s)]),_:1})],10,Eie)):se("v-if",!0),Z("div",null,Me(e.leftLabel),1)]),g(c,{"selection-mode":"range",date:e.leftDate,"min-date":e.minDate,"max-date":e.maxDate,"range-state":e.rangeState,"disabled-date":e.disabledDate,onChangerange:e.handleChangeRange,onPick:e.handleRangePick,onSelect:e.onSelect},null,8,["date","min-date","max-date","range-state","disabled-date","onChangerange","onPick","onSelect"])]),Z("div",Mie,[Z("div",Iie,[e.unlinkPanels?(R(),X("button",{key:0,type:"button",disabled:!e.enableYearArrow,class:U([{"is-disabled":!e.enableYearArrow},"el-picker-panel__icon-btn d-arrow-left"]),onClick:t[2]||(t[2]=(...d)=>e.rightPrevYear&&e.rightPrevYear(...d))},[g(l,null,{default:re(()=>[g(i)]),_:1})],10,Nie)):se("v-if",!0),Z("button",{type:"button",class:"el-picker-panel__icon-btn d-arrow-right",onClick:t[3]||(t[3]=(...d)=>e.rightNextYear&&e.rightNextYear(...d))},[g(l,null,{default:re(()=>[g(s)]),_:1})]),Z("div",null,Me(e.rightLabel),1)]),g(c,{"selection-mode":"range",date:e.rightDate,"min-date":e.minDate,"max-date":e.maxDate,"range-state":e.rangeState,"disabled-date":e.disabledDate,onChangerange:e.handleChangeRange,onPick:e.handleRangePick,onSelect:e.onSelect},null,8,["date","min-date","max-date","range-state","disabled-date","onChangerange","onPick","onSelect"])])])])],2)}var Die=Ae(kie,[["render",Aie],["__file","/home/runner/work/element-plus/element-plus/packages/components/date-picker/src/date-picker-com/panel-month-range.vue"]]);mt.extend(bM);mt.extend(loe);mt.extend(tw);mt.extend(soe);mt.extend(uoe);mt.extend(coe);mt.extend(doe);mt.extend(foe);const Rie=function(e){return e==="daterange"||e==="datetimerange"?Sie:e==="monthrange"?Die:Qoe};var Lie=G({name:"ElDatePicker",install:null,props:Ke(Te({},nw),{type:{type:String,default:"date"}}),emits:["update:modelValue"],setup(e,t){ot("ElPopperOptions",e.popperOptions),ot(s5,{ctx:t});const n=H(null),r=Ke(Te({},e),{focus:(a=!0)=>{var o;(o=n.value)==null||o.focus(a)}});return t.expose(r),()=>{var a;const o=(a=e.format)!=null?a:$te[e.type]||td;return qe(wM,Ke(Te({},e),{format:o,type:e.type,ref:n,"onUpdate:modelValue":i=>t.emit("update:modelValue",i)}),{default:i=>qe(Rie(e.type),i),"range-separator":()=>Oe(t.slots,"range-separator")})}}});const wp=Lie;wp.install=e=>{e.component(wp.name,wp)};const Fie=wp,lw="elDescriptions";var j4=G({name:"ElDescriptionsCell",props:{cell:{type:Object},tag:{type:String},type:{type:String}},setup(){return{descriptions:ve(lw,{})}},render(){var e,t,n,r,a,o;const i=XX(this.cell),{border:l,direction:s}=this.descriptions,c=s==="vertical",d=((n=(t=(e=this.cell)==null?void 0:e.children)==null?void 0:t.label)==null?void 0:n.call(t))||i.label,f=(o=(a=(r=this.cell)==null?void 0:r.children)==null?void 0:a.default)==null?void 0:o.call(a),p=i.span,v=i.align?`is-${i.align}`:"",m=i.labelAlign?`is-${i.labelAlign}`:v,y=i.className,b=i.labelClassName,C={width:oo(i.width),minWidth:oo(i.minWidth)},S=De("descriptions");switch(this.type){case"label":return qe(this.tag,{style:C,class:[S.e("cell"),S.e("label"),S.is("bordered-label",l),S.is("vertical-label",c),m,b],colSpan:c?p:1},d);case"content":return qe(this.tag,{style:C,class:[S.e("cell"),S.e("content"),S.is("bordered-content",l),S.is("vertical-content",c),v,y],colSpan:c?p:p*2-1},f);default:return qe("td",{style:C,class:[S.e("cell"),v],colSpan:p},[qe("span",{class:[S.e("label"),b]},d),qe("span",{class:[S.e("content"),y]},f)])}}});const Bie=G({name:"ElDescriptionsRow",components:{[j4.name]:j4},props:{row:{type:Array}},setup(){return{descriptions:ve(lw,{})}}}),Vie={key:1};function zie(e,t,n,r,a,o){const i=we("el-descriptions-cell");return e.descriptions.direction==="vertical"?(R(),X(Fe,{key:0},[Z("tr",null,[(R(!0),X(Fe,null,Rt(e.row,(l,s)=>(R(),fe(i,{key:`tr1-${s}`,cell:l,tag:"th",type:"label"},null,8,["cell"]))),128))]),Z("tr",null,[(R(!0),X(Fe,null,Rt(e.row,(l,s)=>(R(),fe(i,{key:`tr2-${s}`,cell:l,tag:"td",type:"content"},null,8,["cell"]))),128))])],64)):(R(),X("tr",Vie,[(R(!0),X(Fe,null,Rt(e.row,(l,s)=>(R(),X(Fe,{key:`tr3-${s}`},[e.descriptions.border?(R(),X(Fe,{key:0},[g(i,{cell:l,tag:"td",type:"label"},null,8,["cell"]),g(i,{cell:l,tag:"td",type:"content"},null,8,["cell"])],64)):(R(),fe(i,{key:1,cell:l,tag:"td",type:"both"},null,8,["cell"]))],64))),128))]))}var K4=Ae(Bie,[["render",zie],["__file","/home/runner/work/element-plus/element-plus/packages/components/descriptions/src/descriptions-row.vue"]]);const Hie=G({name:"ElDescriptions",components:{[K4.name]:K4},props:{border:{type:Boolean,default:!1},column:{type:Number,default:3},direction:{type:String,default:"horizontal"},size:{type:String,validator:va},title:{type:String,default:""},extra:{type:String,default:""}},setup(e,{slots:t}){ot(lw,e);const n=Gn(),r=De("descriptions"),a=x(()=>[r.b(),r.is(r.m(n.value),!!n.value)]),o=s=>{const c=Array.isArray(s)?s:[s],d=[];return c.forEach(f=>{Array.isArray(f.children)?d.push(...o(f.children)):d.push(f)}),d},i=(s,c,d,f=!1)=>(s.props||(s.props={}),c>d&&(s.props.span=d),f&&(s.props.span=c),s);return{descriptionKls:a,getRows:()=>{var s;const c=o((s=t.default)==null?void 0:s.call(t)).filter(m=>{var y;return((y=m==null?void 0:m.type)==null?void 0:y.name)==="ElDescriptionsItem"}),d=[];let f=[],p=e.column,v=0;return c.forEach((m,y)=>{var b;const C=((b=m.props)==null?void 0:b.span)||1;if(y<c.length-1&&(v+=C>p?p:C),y===c.length-1){const S=e.column-v%e.column;f.push(i(m,S,p,!0)),d.push(f);return}C<p?(p-=C,f.push(m)):(f.push(i(m,C,p)),d.push(f),p=e.column,f=[])}),d},ns:r}}});function jie(e,t,n,r,a,o){const i=we("el-descriptions-row");return R(),X("div",{class:U(e.descriptionKls)},[e.title||e.extra||e.$slots.title||e.$slots.extra?(R(),X("div",{key:0,class:U(e.ns.e("header"))},[Z("div",{class:U(e.ns.e("title"))},[Oe(e.$slots,"title",{},()=>[yt(Me(e.title),1)])],2),Z("div",{class:U(e.ns.e("extra"))},[Oe(e.$slots,"extra",{},()=>[yt(Me(e.extra),1)])],2)],2)):se("v-if",!0),Z("div",{class:U(e.ns.e("body"))},[Z("table",{class:U([e.ns.e("table"),e.ns.is("bordered",e.border)])},[Z("tbody",null,[(R(!0),X(Fe,null,Rt(e.getRows(),(l,s)=>(R(),fe(i,{key:s,row:l},null,8,["row"]))),128))])],2)],2)],2)}var Kie=Ae(Hie,[["render",jie],["__file","/home/runner/work/element-plus/element-plus/packages/components/descriptions/src/index.vue"]]),d5=G({name:"ElDescriptionsItem",props:{label:{type:String,default:""},span:{type:Number,default:1},width:{type:[String,Number],default:""},minWidth:{type:[String,Number],default:""},align:{type:String,default:"left"},labelAlign:{type:String,default:""},className:{type:String,default:""},labelClassName:{type:String,default:""}}});const Wie=xt(Kie,{DescriptionsItem:d5}),Uie=En(d5),Yie=Ze({mask:{type:Boolean,default:!0},customMaskEvent:{type:Boolean,default:!1},overlayClass:{type:Le([String,Array,Object])},zIndex:{type:Le([String,Number])}}),qie={click:e=>e instanceof MouseEvent};var Gie=G({name:"ElOverlay",props:Yie,emits:qie,setup(e,{slots:t,emit:n}){const r=De("overlay"),a=s=>{n("click",s)},{onClick:o,onMousedown:i,onMouseup:l}=HC(e.customMaskEvent?void 0:a);return()=>e.mask?g("div",{class:[r.b(),e.overlayClass],style:{zIndex:e.zIndex},onClick:o,onMousedown:i,onMouseup:l},[Oe(t,"default")],ka.STYLE|ka.CLASS|ka.PROPS,["onClick","onMouseup","onMousedown"]):qe("div",{class:e.overlayClass,style:{zIndex:e.zIndex,position:"fixed",top:"0px",right:"0px",bottom:"0px",left:"0px"}},[Oe(t,"default")])}});const sw=Gie,f5=Ze({center:{type:Boolean,default:!1},closeIcon:{type:wr,default:""},customClass:{type:String,default:""},draggable:{type:Boolean,default:!1},fullscreen:{type:Boolean,default:!1},showClose:{type:Boolean,default:!0},title:{type:String,default:""}}),Xie={close:()=>!0},Zie=["aria-label"],Jie={name:"ElDialogContent"},Qie=G(Ke(Te({},Jie),{props:f5,emits:Xie,setup(e){const{Close:t}=KX,{dialogRef:n,headerRef:r,ns:a,style:o}=ve(OE);return(i,l)=>(R(),X("div",{ref_key:"dialogRef",ref:n,class:U([A(a).b(),A(a).is("fullscreen",i.fullscreen),A(a).is("draggable",i.draggable),{[A(a).m("center")]:i.center},i.customClass]),"aria-modal":"true",role:"dialog","aria-label":i.title||"dialog",style:Xe(A(o)),onClick:l[1]||(l[1]=dt(()=>{},["stop"]))},[Z("div",{ref_key:"headerRef",ref:r,class:U(A(a).e("header"))},[Oe(i.$slots,"title",{},()=>[Z("span",{class:U(A(a).e("title"))},Me(i.title),3)])],2),Z("div",{class:U(A(a).e("body"))},[Oe(i.$slots,"default")],2),i.$slots.footer?(R(),X("div",{key:0,class:U(A(a).e("footer"))},[Oe(i.$slots,"footer")],2)):se("v-if",!0),i.showClose?(R(),X("button",{key:1,"aria-label":"close",class:U(A(a).e("headerbtn")),type:"button",onClick:l[0]||(l[0]=s=>i.$emit("close"))},[g(A(ft),{class:U(A(a).e("close"))},{default:re(()=>[(R(),fe(Kt(i.closeIcon||A(t))))]),_:1},8,["class"])],2)):se("v-if",!0)],14,Zie))}}));var ele=Ae(Qie,[["__file","/home/runner/work/element-plus/element-plus/packages/components/dialog/src/dialog-content.vue"]]);const h5=Ze(Ke(Te({},f5),{appendToBody:{type:Boolean,default:!1},beforeClose:{type:Le(Function)},destroyOnClose:{type:Boolean,default:!1},closeOnClickModal:{type:Boolean,default:!0},closeOnPressEscape:{type:Boolean,default:!0},lockScroll:{type:Boolean,default:!0},modal:{type:Boolean,default:!0},openDelay:{type:Number,default:0},closeDelay:{type:Number,default:0},top:{type:String},modelValue:{type:Boolean,required:!0},modalClass:String,width:{type:[String,Number]},zIndex:{type:Number},trapFocus:{type:Boolean,default:!1}})),p5={open:()=>!0,opened:()=>!0,close:()=>!0,closed:()=>!0,[Pt]:e=>yr(e),openAutoFocus:()=>!0,closeAutoFocus:()=>!0},v5=(e,t)=>{const r=$t().emit,{nextZIndex:a}=Pi();let o="";const i=H(!1),l=H(!1),s=H(!1),c=H(e.zIndex||a());let d,f;const p=x(()=>Yt(e.width)?`${e.width}px`:e.width),v=x(()=>{const T={},_="--el-dialog";return e.fullscreen||(e.top&&(T[`${_}-margin-top`]=e.top),e.width&&(T[`${_}-width`]=p.value)),T});function m(){r("opened")}function y(){r("closed"),r(Pt,!1),e.destroyOnClose&&(s.value=!1)}function b(){r("close")}function C(){f==null||f(),d==null||d(),e.openDelay&&e.openDelay>0?{stop:d}=gs(()=>$(),e.openDelay):$()}function S(){d==null||d(),f==null||f(),e.closeDelay&&e.closeDelay>0?{stop:f}=gs(()=>O(),e.closeDelay):O()}function w(){function T(_){_||(l.value=!0,i.value=!1)}e.beforeClose?e.beforeClose(T):S()}function k(){e.closeOnClickModal&&w()}function $(){!Bt||(i.value=!0)}function O(){i.value=!1}return e.lockScroll&&RE(i),e.closeOnPressEscape&&LE({handleClose:w},i),FE(i),ce(()=>e.modelValue,T=>{T?(l.value=!1,C(),s.value=!0,r("open"),c.value=e.zIndex?c.value++:a(),Ne(()=>{t.value&&(t.value.scrollTop=0)})):i.value&&S()}),ce(()=>e.fullscreen,T=>{!t.value||(T?(o=t.value.style.transform,t.value.style.transform=""):t.value.style.transform=o)}),et(()=>{e.modelValue&&(i.value=!0,s.value=!0,C())}),{afterEnter:m,afterLeave:y,beforeLeave:b,handleClose:w,onModalClick:k,close:S,doClose:O,closed:l,style:v,rendered:s,visible:i,zIndex:c}},tle={name:"ElDialog"},nle=G(Ke(Te({},tle),{props:h5,emits:p5,setup(e,{expose:t}){const n=e,r=De("dialog"),a=H(),o=H(),{visible:i,style:l,rendered:s,zIndex:c,afterEnter:d,afterLeave:f,beforeLeave:p,handleClose:v,onModalClick:m}=v5(n,a);ot(OE,{dialogRef:a,headerRef:o,ns:r,rendered:s,style:l});const y=HC(m),b=x(()=>n.draggable&&!n.fullscreen);return DE(a,o,b),t({visible:i}),(C,S)=>(R(),fe(Ps,{to:"body",disabled:!C.appendToBody},[g(Vn,{name:"dialog-fade",onAfterEnter:A(d),onAfterLeave:A(f),onBeforeLeave:A(p)},{default:re(()=>[at(g(A(sw),{"custom-mask-event":"",mask:C.modal,"overlay-class":C.modalClass,"z-index":A(c)},{default:re(()=>[Z("div",{class:U(`${A(r).namespace.value}-overlay-dialog`),onClick:S[0]||(S[0]=(...w)=>A(y).onClick&&A(y).onClick(...w)),onMousedown:S[1]||(S[1]=(...w)=>A(y).onMousedown&&A(y).onMousedown(...w)),onMouseup:S[2]||(S[2]=(...w)=>A(y).onMouseup&&A(y).onMouseup(...w))},[A(s)?(R(),fe(ele,{key:0,"custom-class":C.customClass,center:C.center,"close-icon":C.closeIcon,draggable:A(b),fullscreen:C.fullscreen,"show-close":C.showClose,style:Xe(A(l)),title:C.title,onClose:A(v)},sl({title:re(()=>[Oe(C.$slots,"title")]),default:re(()=>[Oe(C.$slots,"default")]),_:2},[C.$slots.footer?{name:"footer",fn:re(()=>[Oe(C.$slots,"footer")])}:void 0]),1032,["custom-class","center","close-icon","draggable","fullscreen","show-close","style","title","onClose"])):se("v-if",!0)],34)]),_:3},8,["mask","overlay-class","z-index"]),[[_t,A(i)]])]),_:3},8,["onAfterEnter","onAfterLeave","onBeforeLeave"])],8,["disabled"]))}}));var rle=Ae(nle,[["__file","/home/runner/work/element-plus/element-plus/packages/components/dialog/src/dialog.vue"]]);const ale=xt(rle),ole=Ze({direction:{type:String,values:["horizontal","vertical"],default:"horizontal"},contentPosition:{type:String,values:["left","center","right"],default:"center"},borderStyle:{type:Le(String),default:"solid"}}),ile={name:"ElDivider"},lle=G(Ke(Te({},ile),{props:ole,setup(e){const t=e,n=De("divider"),r=x(()=>({"--el-border-style":t.borderStyle}));return(a,o)=>(R(),X("div",{class:U([A(n).b(),A(n).m(a.direction)]),style:Xe(A(r))},[a.$slots.default&&a.direction!=="vertical"?(R(),X("div",{key:0,class:U([A(n).e("text"),A(n).is(a.contentPosition)])},[Oe(a.$slots,"default")],2)):se("v-if",!0)],6))}}));var sle=Ae(lle,[["__file","/home/runner/work/element-plus/element-plus/packages/components/divider/src/divider.vue"]]);const ule=xt(sle),cle=Ze(Ke(Te({},h5),{direction:{type:String,default:"rtl",values:["ltr","rtl","ttb","btt"]},size:{type:[String,Number],default:"30%"},withHeader:{type:Boolean,default:!0},modalFade:{type:Boolean,default:!0}})),dle=p5,fle=G({name:"ElDrawer",components:{ElOverlay:sw,ElIcon:ft,Close:Ma},directives:{TrapFocus:kM},props:cle,emits:dle,setup(e,t){const n=H(),r=De("drawer"),a=x(()=>e.direction==="rtl"||e.direction==="ltr"),o=x(()=>typeof e.size=="number"?`${e.size}px`:e.size);return Ke(Te({},v5(e,t,n)),{drawerRef:n,isHorizontal:a,drawerSize:o,ns:r})}}),hle=["aria-labelledby","aria-label"],ple=["id"],vle=["title"],mle=["aria-label"];function gle(e,t,n,r,a,o){const i=we("close"),l=we("el-icon"),s=we("el-overlay"),c=pa("trap-focus");return R(),fe(Ps,{to:"body",disabled:!e.appendToBody},[g(Vn,{name:e.ns.b("fade"),onAfterEnter:e.afterEnter,onAfterLeave:e.afterLeave,onBeforeLeave:e.beforeLeave},{default:re(()=>[at(g(s,{mask:e.modal,"overlay-class":e.modalClass,"z-index":e.zIndex,onClick:e.onModalClick},{default:re(()=>[at((R(),X("div",{ref:"drawerRef","aria-modal":"true","aria-labelledby":e.ns.e("title"),"aria-label":e.title,class:U([e.ns.b(),e.direction,e.visible&&"open",e.customClass]),style:Xe(e.isHorizontal?"width: "+e.drawerSize:"height: "+e.drawerSize),role:"dialog",onClick:t[1]||(t[1]=dt(()=>{},["stop"]))},[e.withHeader?(R(),X("header",{key:0,id:e.ns.e("title"),class:U(e.ns.e("header"))},[Oe(e.$slots,"title",{},()=>[Z("span",{role:"heading",title:e.title},Me(e.title),9,vle)]),e.showClose?(R(),X("button",{key:0,"aria-label":"close "+(e.title||"drawer"),class:U(e.ns.e("close-btn")),type:"button",onClick:t[0]||(t[0]=(...d)=>e.handleClose&&e.handleClose(...d))},[g(l,{class:U(e.ns.e("close"))},{default:re(()=>[g(i)]),_:1},8,["class"])],10,mle)):se("v-if",!0)],10,ple)):se("v-if",!0),e.rendered?(R(),X("section",{key:1,class:U(e.ns.e("body"))},[Oe(e.$slots,"default")],2)):se("v-if",!0),e.$slots.footer?(R(),X("div",{key:2,class:U(e.ns.e("footer"))},[Oe(e.$slots,"footer")],2)):se("v-if",!0)],14,hle)),[[c]])]),_:3},8,["mask","overlay-class","z-index","onClick"]),[[_t,e.visible]])]),_:3},8,["name","onAfterEnter","onAfterLeave","onBeforeLeave"])],8,["disabled"])}var yle=Ae(fle,[["render",gle],["__file","/home/runner/work/element-plus/element-plus/packages/components/drawer/src/drawer.vue"]]);const ble=xt(yle),m5=e=>{const t=[],n=document.createTreeWalker(e,NodeFilter.SHOW_ELEMENT,{acceptNode:r=>{const a=r.tagName==="INPUT"&&r.type==="hidden";return r.disabled||r.hidden||a?NodeFilter.FILTER_SKIP:r.tabIndex>=0?NodeFilter.FILTER_ACCEPT:NodeFilter.FILTER_SKIP}});for(;n.nextNode();)t.push(n.currentNode);return t},W4=(e,t)=>{for(const n of e)if(!Cle(n,t))return n},Cle=(e,t)=>{if(getComputedStyle(e).visibility==="hidden")return!0;for(;e;){if(t&&e===t)return!1;if(getComputedStyle(e).display==="none")return!0;e=e.parentElement}return!1},wle=e=>{const t=m5(e),n=W4(t,e),r=W4(t.reverse(),e);return[n,r]},Sle=e=>e instanceof HTMLInputElement&&"select"in e,Al=(e,t)=>{if(e&&e.focus){const n=document.activeElement;e.focus({preventScroll:!0}),e!==n&&Sle(e)&&t&&e.select()}};function U4(e,t){const n=[...e],r=e.indexOf(t);return r!==-1&&n.splice(r,1),n}const kle=()=>{let e=[];return{push:r=>{const a=e[0];a&&r!==a&&a.pause(),e=U4(e,r),e.unshift(r)},remove:r=>{var a,o;e=U4(e,r),(o=(a=e[0])==null?void 0:a.resume)==null||o.call(a)}}},$le=(e,t=!1)=>{const n=document.activeElement;for(const r of e)if(Al(r,t),document.activeElement!==n)return},Y4=kle(),py="focus-trap.focus-on-mount",vy="focus-trap.focus-on-unmount",q4={cancelable:!0,bubbles:!1},G4="mountOnFocus",X4="unmountOnFocus",g5=Symbol("elFocusTrap"),Ole=G({name:"ElFocusTrap",inheritAttrs:!1,props:{loop:Boolean,trapped:Boolean},emits:[G4,X4],setup(e,{emit:t}){const n=H(),r=H(null);let a,o;const i={paused:!1,pause(){this.paused=!0},resume(){this.paused=!1}},l=v=>{if(!e.loop&&!e.trapped||i.paused)return;const{key:m,altKey:y,ctrlKey:b,metaKey:C,currentTarget:S,shiftKey:w}=v,{loop:k}=e,$=m===Ge.tab&&!y&&!b&&!C,O=document.activeElement;if($&&O){const T=S,[_,I]=wle(T);_&&I?!w&&O===I?(v.preventDefault(),k&&Al(_,!0)):w&&O===_&&(v.preventDefault(),k&&Al(I,!0)):O===T&&v.preventDefault()}};ot(g5,{focusTrapRef:r,onKeydown:l});const s=v=>{t(G4,v)},c=v=>t(X4,v),d=v=>{const m=A(r);if(i.paused||!m)return;const y=v.target;y&&m.contains(y)?o=y:Al(o,!0)},f=v=>{const m=A(r);i.paused||!m||m.contains(v.relatedTarget)||Al(o,!0)},p=()=>{document.removeEventListener("focusin",d),document.removeEventListener("focusout",f)};return et(()=>{const v=A(r);if(v){Y4.push(i);const m=document.activeElement;if(a=m,!v.contains(m)){const b=new Event(py,q4);v.addEventListener(py,s),v.dispatchEvent(b),b.defaultPrevented||Ne(()=>{$le(m5(v),!0),document.activeElement===m&&Al(v)})}}ce(()=>e.trapped,m=>{m?(document.addEventListener("focusin",d),document.addEventListener("focusout",f)):p()},{immediate:!0})}),Lt(()=>{p();const v=A(r);if(v){v.removeEventListener(py,s);const m=new Event(vy,q4);v.addEventListener(vy,c),v.dispatchEvent(m),m.defaultPrevented||Al(a!=null?a:document.body,!0),v.removeEventListener(vy,s),Y4.remove(i)}}),{focusTrapRef:n,forwardRef:r,onKeydown:l}}});function Ple(e,t,n,r,a,o){return Oe(e.$slots,"default")}var Tle=Ae(Ole,[["render",Ple],["__file","/home/runner/work/element-plus/element-plus/packages/components/focus-trap/src/focus-trap.vue"]]);const xle=G({inheritAttrs:!1});function _le(e,t,n,r,a,o){return Oe(e.$slots,"default")}var Ele=Ae(xle,[["render",_le],["__file","/home/runner/work/element-plus/element-plus/packages/components/collection/src/collection.vue"]]);const Mle=G({name:"ElCollectionItem",inheritAttrs:!1});function Ile(e,t,n,r,a,o){return Oe(e.$slots,"default")}var Nle=Ae(Mle,[["render",Ile],["__file","/home/runner/work/element-plus/element-plus/packages/components/collection/src/collection-item.vue"]]);const y5="data-el-collection-item",b5=e=>{const t=`El${e}Collection`,n=`${t}Item`,r=Symbol(t),a=Symbol(n),o=Ke(Te({},Ele),{name:t,setup(){const l=H(null),s=new Map;ot(r,{itemMap:s,getItems:()=>{const d=A(l);if(!d)return[];const f=Array.from(d.querySelectorAll(`[${y5}]`));return[...s.values()].sort((m,y)=>f.indexOf(m.ref)-f.indexOf(y.ref))},collectionRef:l})}}),i=Ke(Te({},Nle),{name:n,setup(l,{attrs:s}){const c=H(null),d=ve(r,void 0);ot(a,{collectionItemRef:c}),et(()=>{const f=A(c);f&&d.itemMap.set(f,Te({ref:f},s))}),Lt(()=>{const f=A(c);d.itemMap.delete(f)})}});return{COLLECTION_INJECTION_KEY:r,COLLECTION_ITEM_INJECTION_KEY:a,ElCollection:o,ElCollectionItem:i}},Ale=Ze({style:{type:Le([String,Array,Object])},currentTabId:{type:Le(String)},defaultCurrentTabId:String,loop:Boolean,dir:{type:String,values:["ltr","rtl"],default:"ltr"},orientation:{type:Le(String)},onBlur:Function,onFocus:Function,onMousedown:Function}),{ElCollection:Dle,ElCollectionItem:Rle,COLLECTION_INJECTION_KEY:uw,COLLECTION_ITEM_INJECTION_KEY:Lle}=b5("RovingFocusGroup"),cw=Symbol("elRovingFocusGroup"),C5=Symbol("elRovingFocusGroupItem"),Fle={ArrowLeft:"prev",ArrowUp:"prev",ArrowRight:"next",ArrowDown:"next",PageUp:"first",Home:"first",PageDown:"last",End:"last"},Ble=(e,t)=>{if(t!=="rtl")return e;switch(e){case Ge.right:return Ge.left;case Ge.left:return Ge.right;default:return e}},Vle=(e,t,n)=>{const r=Ble(e.key,n);if(!(t==="vertical"&&[Ge.left,Ge.right].includes(r))&&!(t==="horizontal"&&[Ge.up,Ge.down].includes(r)))return Fle[r]},zle=(e,t)=>e.map((n,r)=>e[(r+t)%e.length]),dw=e=>{const{activeElement:t}=document;for(const n of e)if(n===t||(n.focus(),t!==document.activeElement))return},Z4="currentTabIdChange",my="rovingFocusGroup.entryFocus",Hle={bubbles:!1,cancelable:!0},jle=G({name:"ElRovingFocusGroupImpl",inheritAttrs:!1,props:Ale,emits:[Z4,"entryFocus"],setup(e,{emit:t}){var n;const r=H((n=e.currentTabId||e.defaultCurrentTabId)!=null?n:null),a=H(!1),o=H(!1),i=H(null),{getItems:l}=ve(uw,void 0),s=x(()=>[{outline:"none"},e.style]),c=y=>{t(Z4,y)},d=()=>{a.value=!0},f=Tn(y=>{var b;(b=e.onMousedown)==null||b.call(e,y)},()=>{o.value=!0}),p=Tn(y=>{var b;(b=e.onFocus)==null||b.call(e,y)},y=>{const b=!A(o),{target:C,currentTarget:S}=y;if(C===S&&b&&!A(a)){const w=new Event(my,Hle);if(S==null||S.dispatchEvent(w),!w.defaultPrevented){const k=l().filter(I=>I.focusable),$=k.find(I=>I.active),O=k.find(I=>I.id===A(r)),_=[$,O,...k].filter(Boolean).map(I=>I.ref);dw(_)}}o.value=!1}),v=Tn(y=>{var b;(b=e.onBlur)==null||b.call(e,y)},()=>{a.value=!1}),m=(...y)=>{t("entryFocus",...y)};ot(cw,{currentTabbedId:Cf(r),loop:yn(e,"loop"),tabIndex:x(()=>A(a)?-1:0),rovingFocusGroupRef:i,rovingFocusGroupRootStyle:s,orientation:yn(e,"orientation"),dir:yn(e,"dir"),onItemFocus:c,onItemShiftTab:d,onBlur:v,onFocus:p,onMousedown:f}),ce(()=>e.currentTabId,y=>{r.value=y!=null?y:null}),et(()=>{const y=A(i);gn(y,my,m)}),Lt(()=>{const y=A(i);Bn(y,my,m)})}});function Kle(e,t,n,r,a,o){return Oe(e.$slots,"default")}var Wle=Ae(jle,[["render",Kle],["__file","/home/runner/work/element-plus/element-plus/packages/components/roving-focus-group/src/roving-focus-group-impl.vue"]]);const Ule=G({name:"ElRovingFocusGroup",components:{ElFocusGroupCollection:Dle,ElRovingFocusGroupImpl:Wle}});function Yle(e,t,n,r,a,o){const i=we("el-roving-focus-group-impl"),l=we("el-focus-group-collection");return R(),fe(l,null,{default:re(()=>[g(i,Za(ll(e.$attrs)),{default:re(()=>[Oe(e.$slots,"default")]),_:3},16)]),_:3})}var qle=Ae(Ule,[["render",Yle],["__file","/home/runner/work/element-plus/element-plus/packages/components/roving-focus-group/src/roving-focus-group.vue"]]);const Gle=G({components:{ElRovingFocusCollectionItem:Rle},props:{focusable:{type:Boolean,default:!0},active:{type:Boolean,default:!1}},emits:["mousedown","focus","keydown"],setup(e,{emit:t}){const{currentTabbedId:n,loop:r,onItemFocus:a,onItemShiftTab:o}=ve(cw,void 0),{getItems:i}=ve(uw,void 0),l=jC(),s=H(null),c=Tn(v=>{t("mousedown",v)},v=>{e.focusable?a(A(l)):v.preventDefault()}),d=Tn(v=>{t("focus",v)},()=>{a(A(l))}),f=Tn(v=>{t("keydown",v)},v=>{const{key:m,shiftKey:y,target:b,currentTarget:C}=v;if(m===Ge.tab&&y){o();return}if(b!==C)return;const S=Vle(v);if(S){v.preventDefault();let k=i().filter($=>$.focusable).map($=>$.ref);switch(S){case"last":{k.reverse();break}case"prev":case"next":{S==="prev"&&k.reverse();const $=k.indexOf(C);k=r.value?zle(k,$+1):k.slice($+1);break}}Ne(()=>{dw(k)})}}),p=x(()=>n.value===A(l));return ot(C5,{rovingFocusGroupItemRef:s,tabIndex:x(()=>A(p)?0:-1),handleMousedown:c,handleFocus:d,handleKeydown:f}),{id:l,handleKeydown:f,handleFocus:d,handleMousedown:c}}});function Xle(e,t,n,r,a,o){const i=we("el-roving-focus-collection-item");return R(),fe(i,{id:e.id,focusable:e.focusable,active:e.active},{default:re(()=>[Oe(e.$slots,"default")]),_:3},8,["id","focusable","active"])}var Zle=Ae(Gle,[["render",Xle],["__file","/home/runner/work/element-plus/element-plus/packages/components/roving-focus-group/src/roving-focus-item.vue"]]);const Sp=Ze({trigger:qd.trigger,effect:Ke(Te({},mr.effect),{default:"light"}),type:{type:Le(String)},placement:{type:Le(String),default:"bottom"},popperOptions:{type:Le(Object),default:()=>({})},size:{type:String,default:""},splitButton:Boolean,hideOnClick:{type:Boolean,default:!0},loop:{type:Boolean},showTimeout:{type:Number,default:150},hideTimeout:{type:Number,default:150},tabindex:{type:Le([Number,String]),default:0},maxHeight:{type:Le([Number,String]),default:""},popperClass:{type:String,default:""},disabled:{type:Boolean,default:!1},buttonProps:{type:Le(Object)}}),w5=Ze({command:{type:[Object,String,Number],default:()=>({})},disabled:Boolean,divided:Boolean,textValue:String,icon:{type:wr}}),Jle=Ze({onKeydown:{type:Le(Function)}}),Qle=[Ge.down,Ge.pageDown,Ge.home],S5=[Ge.up,Ge.pageUp,Ge.end],ese=[...Qle,...S5],{ElCollection:tse,ElCollectionItem:nse,COLLECTION_INJECTION_KEY:rse,COLLECTION_ITEM_INJECTION_KEY:ase}=b5("Dropdown"),fw=Symbol("elDropdown"),{ButtonGroup:ose}=xa,ise=G({name:"ElDropdown",components:{ElButton:xa,ElFocusTrap:Tle,ElButtonGroup:ose,ElScrollbar:xi,ElDropdownCollection:tse,ElTooltip:Ur,ElRovingFocusGroup:qle,ElIcon:ft,ArrowDown:_s},props:Sp,emits:["visible-change","click","command"],setup(e,{emit:t}){const n=$t(),r=De("dropdown"),a=H(),o=H(),i=H(null),l=H(null),s=H(null),c=H(null),d=H(!1),f=x(()=>({maxHeight:oo(e.maxHeight)})),p=x(()=>[r.m(b.value)]);function v(){m()}function m(){var _;(_=i.value)==null||_.onClose()}function y(){var _;(_=i.value)==null||_.onOpen()}const b=Gn();function C(..._){t("command",..._)}function S(){}function w(){const _=A(l);_==null||_.focus(),c.value=null}function k(_){c.value=_}function $(_){d.value||(_.preventDefault(),_.stopImmediatePropagation())}return ot(fw,{contentRef:l,isUsingKeyboard:d,onItemEnter:S,onItemLeave:w}),ot("elDropdown",{instance:n,dropdownSize:b,handleClick:v,commandHandler:C,trigger:yn(e,"trigger"),hideOnClick:yn(e,"hideOnClick")}),{ns:r,scrollbar:s,wrapStyle:f,dropdownTriggerKls:p,dropdownSize:b,currentTabId:c,handleCurrentTabIdChange:k,handlerMainButtonClick:_=>{t("click",_)},handleEntryFocus:$,handleClose:m,handleOpen:y,onMountOnFocus:_=>{var I,L;_.preventDefault(),(L=(I=l.value)==null?void 0:I.focus)==null||L.call(I,{preventScroll:!0})},popperRef:i,triggeringElementRef:a,referenceElementRef:o}}});function lse(e,t,n,r,a,o){var i;const l=we("el-dropdown-collection"),s=we("el-roving-focus-group"),c=we("el-focus-trap"),d=we("el-scrollbar"),f=we("el-tooltip"),p=we("el-button"),v=we("arrow-down"),m=we("el-icon"),y=we("el-button-group");return R(),X("div",{class:U([e.ns.b(),e.ns.is("disabled",e.disabled)])},[g(f,{ref:"popperRef",effect:e.effect,"fallback-placements":["bottom","top"],"popper-options":e.popperOptions,"gpu-acceleration":!1,"hide-after":e.trigger==="hover"?e.hideTimeout:0,"manual-mode":!0,placement:e.placement,"popper-class":[e.ns.e("popper"),e.popperClass],"reference-element":(i=e.referenceElementRef)==null?void 0:i.$el,trigger:e.trigger,"show-after":e.trigger==="hover"?e.showTimeout:0,"stop-popper-mouse-event":!1,"virtual-ref":e.triggeringElementRef,"virtual-triggering":e.splitButton,disabled:e.disabled,"append-to-body":"",pure:"",transition:`${e.ns.namespace.value}-zoom-in-top`,persistent:"",onShow:t[0]||(t[0]=b=>e.$emit("visible-change",!0)),onHide:t[1]||(t[1]=b=>e.$emit("visible-change",!1))},sl({content:re(()=>[g(d,{ref:"scrollbar","wrap-style":e.wrapStyle,tag:"div","view-class":e.ns.e("list")},{default:re(()=>[g(c,{trapped:"",onMountOnFocus:e.onMountOnFocus},{default:re(()=>[g(s,{loop:e.loop,"current-tab-id":e.currentTabId,orientation:"horizontal",onCurrentTabIdChange:e.handleCurrentTabIdChange,onEntryFocus:e.handleEntryFocus},{default:re(()=>[g(l,null,{default:re(()=>[Oe(e.$slots,"dropdown")]),_:3})]),_:3},8,["loop","current-tab-id","onCurrentTabIdChange","onEntryFocus"])]),_:3},8,["onMountOnFocus"])]),_:3},8,["wrap-style","view-class"])]),_:2},[e.splitButton?void 0:{name:"default",fn:re(()=>[Z("div",{class:U(e.dropdownTriggerKls)},[Oe(e.$slots,"default")],2)])}]),1032,["effect","popper-options","hide-after","placement","popper-class","reference-element","trigger","show-after","virtual-ref","virtual-triggering","disabled","transition"]),e.splitButton?(R(),fe(y,{key:0},{default:re(()=>[g(p,hn({ref:"referenceElementRef"},e.buttonProps,{size:e.dropdownSize,type:e.type,disabled:e.disabled,onClick:e.handlerMainButtonClick}),{default:re(()=>[Oe(e.$slots,"default")]),_:3},16,["size","type","disabled","onClick"]),g(p,hn({ref:"triggeringElementRef"},e.buttonProps,{size:e.dropdownSize,type:e.type,class:e.ns.e("caret-button"),disabled:e.disabled}),{default:re(()=>[g(m,{class:U(e.ns.e("icon"))},{default:re(()=>[g(v)]),_:1},8,["class"])]),_:1},16,["size","type","class","disabled"])]),_:3})):se("v-if",!0)],2)}var sse=Ae(ise,[["render",lse],["__file","/home/runner/work/element-plus/element-plus/packages/components/dropdown/src/dropdown.vue"]]);const use=G({name:"DropdownItemImpl",components:{ElIcon:ft},props:w5,emits:["pointermove","pointerleave","click","clickimpl"],setup(e,{emit:t}){const n=De("dropdown"),{collectionItemRef:r}=ve(ase,void 0),{collectionItemRef:a}=ve(Lle,void 0),{rovingFocusGroupItemRef:o,tabIndex:i,handleFocus:l,handleKeydown:s,handleMousedown:c}=ve(C5,void 0),d=FC(r,a,o),f=Tn(p=>{const{code:v}=p;if(v===Ge.enter||v===Ge.space)return p.preventDefault(),p.stopImmediatePropagation(),t("clickimpl",p),!0},s);return{ns:n,itemRef:d,dataset:{[y5]:""},tabIndex:i,handleFocus:l,handleKeydown:f,handleMousedown:c}}}),cse=["aria-disabled","tabindex"];function dse(e,t,n,r,a,o){const i=we("el-icon");return R(),X(Fe,null,[e.divided?(R(),X("li",hn({key:0,class:e.ns.bem("menu","item","divided")},e.$attrs),null,16)):se("v-if",!0),Z("li",hn({ref:e.itemRef},Te(Te({},e.dataset),e.$attrs),{"aria-disabled":e.disabled,class:[e.ns.be("menu","item"),e.ns.is("disabled",e.disabled)],tabindex:e.tabIndex,role:"menuitem",onClick:t[0]||(t[0]=l=>e.$emit("clickimpl",l)),onFocus:t[1]||(t[1]=(...l)=>e.handleFocus&&e.handleFocus(...l)),onKeydown:t[2]||(t[2]=(...l)=>e.handleKeydown&&e.handleKeydown(...l)),onMousedown:t[3]||(t[3]=(...l)=>e.handleMousedown&&e.handleMousedown(...l)),onPointermove:t[4]||(t[4]=l=>e.$emit("pointermove",l)),onPointerleave:t[5]||(t[5]=l=>e.$emit("pointerleave",l))}),[e.icon?(R(),fe(i,{key:0},{default:re(()=>[(R(),fe(Kt(e.icon)))]),_:1})):se("v-if",!0),Oe(e.$slots,"default")],16,cse)],64)}var fse=Ae(use,[["render",dse],["__file","/home/runner/work/element-plus/element-plus/packages/components/dropdown/src/dropdown-item-impl.vue"]]);const k5=()=>{const e=ve("elDropdown",{}),t=x(()=>e==null?void 0:e.dropdownSize);return{elDropdown:e,_elDropdownSize:t}},hse=G({name:"ElDropdownItem",components:{ElDropdownCollectionItem:nse,ElRovingFocusItem:Zle,ElDropdownItemImpl:fse},inheritAttrs:!1,props:w5,emits:["pointermove","pointerleave","click"],setup(e,{emit:t,attrs:n}){const{elDropdown:r}=k5(),a=$t(),o=H(null),i=x(()=>{var v,m;return(m=(v=A(o))==null?void 0:v.textContent)!=null?m:""}),{onItemEnter:l,onItemLeave:s}=ve(fw,void 0),c=Tn(v=>(t("pointermove",v),v.defaultPrevented),z$(v=>{var m;e.disabled?s(v):(l(v),v.defaultPrevented||(m=v.currentTarget)==null||m.focus())})),d=Tn(v=>(t("pointerleave",v),v.defaultPrevented),z$(v=>{s(v)})),f=Tn(v=>(t("click",v),v.defaultPrevented),v=>{var m,y,b;if(e.disabled){v.stopImmediatePropagation();return}(m=r==null?void 0:r.hideOnClick)!=null&&m.value&&((y=r.handleClick)==null||y.call(r)),(b=r.commandHandler)==null||b.call(r,e.command,a,v)}),p=x(()=>Te(Te({},e),n));return{handleClick:f,handlePointerMove:c,handlePointerLeave:d,textContent:i,propsAndAttrs:p}}});function pse(e,t,n,r,a,o){var i;const l=we("el-dropdown-item-impl"),s=we("el-roving-focus-item"),c=we("el-dropdown-collection-item");return R(),fe(c,{disabled:e.disabled,"text-value":(i=e.textValue)!=null?i:e.textContent},{default:re(()=>[g(s,{focusable:!e.disabled},{default:re(()=>[g(l,hn(e.propsAndAttrs,{onPointerleave:e.handlePointerLeave,onPointermove:e.handlePointerMove,onClickimpl:e.handleClick}),{default:re(()=>[Oe(e.$slots,"default")]),_:3},16,["onPointerleave","onPointermove","onClickimpl"])]),_:3},8,["focusable"])]),_:3},8,["disabled","text-value"])}var $5=Ae(hse,[["render",pse],["__file","/home/runner/work/element-plus/element-plus/packages/components/dropdown/src/dropdown-item.vue"]]);const vse=G({name:"ElDropdownMenu",props:Jle,setup(e){const t=De("dropdown"),{_elDropdownSize:n}=k5(),r=n.value,{focusTrapRef:a,onKeydown:o}=ve(g5,void 0),{contentRef:i}=ve(fw,void 0),{collectionRef:l,getItems:s}=ve(rse,void 0),{rovingFocusGroupRef:c,rovingFocusGroupRootStyle:d,tabIndex:f,onBlur:p,onFocus:v,onMousedown:m}=ve(cw,void 0),{collectionRef:y}=ve(uw,void 0),b=x(()=>[t.b("menu"),t.bm("menu",r==null?void 0:r.value)]),C=FC(i,l,a,c,y),S=Tn(k=>{var $;($=e.onKeydown)==null||$.call(e,k)},k=>{const{currentTarget:$,code:O,target:T}=k;if($.contains(T),Ge.tab===O&&k.stopImmediatePropagation(),k.preventDefault(),T!==A(i)||!ese.includes(O))return;const I=s().filter(L=>!L.disabled).map(L=>L.ref);S5.includes(O)&&I.reverse(),dw(I)});return{size:r,rovingFocusGroupRootStyle:d,tabIndex:f,dropdownKls:b,dropdownListWrapperRef:C,handleKeydown:k=>{S(k),o(k)},onBlur:p,onFocus:v,onMousedown:m}}});function mse(e,t,n,r,a,o){return R(),X("ul",{ref:e.dropdownListWrapperRef,class:U(e.dropdownKls),style:Xe(e.rovingFocusGroupRootStyle),tabindex:-1,role:"menu",onBlur:t[0]||(t[0]=(...i)=>e.onBlur&&e.onBlur(...i)),onFocus:t[1]||(t[1]=(...i)=>e.onFocus&&e.onFocus(...i)),onKeydown:t[2]||(t[2]=(...i)=>e.handleKeydown&&e.handleKeydown(...i)),onMousedown:t[3]||(t[3]=(...i)=>e.onMousedown&&e.onMousedown(...i))},[Oe(e.$slots,"default")],38)}var O5=Ae(vse,[["render",mse],["__file","/home/runner/work/element-plus/element-plus/packages/components/dropdown/src/dropdown-menu.vue"]]);const gse=xt(sse,{DropdownItem:$5,DropdownMenu:O5}),yse=En($5),bse=En(O5);let Cse=0;const wse=G({name:"ImgEmpty",setup(){return{id:++Cse}}}),Sse={viewBox:"0 0 79 86",version:"1.1",xmlns:"http://www.w3.org/2000/svg","xmlns:xlink":"http://www.w3.org/1999/xlink"},kse=["id"],$se=Z("stop",{"stop-color":"var(--el-empty-fill-color-1)",offset:"0%"},null,-1),Ose=Z("stop",{"stop-color":"var(--el-empty-fill-color-4)",offset:"100%"},null,-1),Pse=[$se,Ose],Tse=["id"],xse=Z("stop",{"stop-color":"var(--el-empty-fill-color-1)",offset:"0%"},null,-1),_se=Z("stop",{"stop-color":"var(--el-empty-fill-color-6)",offset:"100%"},null,-1),Ese=[xse,_se],Mse=["id"],Ise={id:"Illustrations",stroke:"none","stroke-width":"1",fill:"none","fill-rule":"evenodd"},Nse={id:"B-type",transform:"translate(-1268.000000, -535.000000)"},Ase={id:"Group-2",transform:"translate(1268.000000, 535.000000)"},Dse=Z("path",{id:"Oval-Copy-2",d:"M39.5,86 C61.3152476,86 79,83.9106622 79,81.3333333 C79,78.7560045 57.3152476,78 35.5,78 C13.6847524,78 0,78.7560045 0,81.3333333 C0,83.9106622 17.6847524,86 39.5,86 Z",fill:"var(--el-empty-fill-color-3)"},null,-1),Rse=Z("polygon",{id:"Rectangle-Copy-14",fill:"var(--el-empty-fill-color-7)",transform:"translate(27.500000, 51.500000) scale(1, -1) translate(-27.500000, -51.500000) ",points:"13 58 53 58 42 45 2 45"},null,-1),Lse={id:"Group-Copy",transform:"translate(34.500000, 31.500000) scale(-1, 1) rotate(-25.000000) translate(-34.500000, -31.500000) translate(7.000000, 10.000000)"},Fse=Z("polygon",{id:"Rectangle-Copy-10",fill:"var(--el-empty-fill-color-7)",transform:"translate(11.500000, 5.000000) scale(1, -1) translate(-11.500000, -5.000000) ",points:"2.84078316e-14 3 18 3 23 7 5 7"},null,-1),Bse=Z("polygon",{id:"Rectangle-Copy-11",fill:"var(--el-empty-fill-color-5)",points:"-3.69149156e-15 7 38 7 38 43 -3.69149156e-15 43"},null,-1),Vse=["fill"],zse=Z("polygon",{id:"Rectangle-Copy-13",fill:"var(--el-empty-fill-color-2)",transform:"translate(39.500000, 3.500000) scale(-1, 1) translate(-39.500000, -3.500000) ",points:"24 7 41 7 55 -3.63806207e-12 38 -3.63806207e-12"},null,-1),Hse=["fill"],jse={id:"Rectangle-Copy-17",transform:"translate(53.000000, 45.000000)"},Kse=["id"],Wse=["xlink:href"],Use=["xlink:href"],Yse=["mask"],qse=Z("polygon",{id:"Rectangle-Copy-18",fill:"var(--el-empty-fill-color-2)",transform:"translate(66.000000, 51.500000) scale(-1, 1) translate(-66.000000, -51.500000) ",points:"62 45 79 45 70 58 53 58"},null,-1);function Gse(e,t,n,r,a,o){return R(),X("svg",Sse,[Z("defs",null,[Z("linearGradient",{id:`linearGradient-1-${e.id}`,x1:"38.8503086%",y1:"0%",x2:"61.1496914%",y2:"100%"},Pse,8,kse),Z("linearGradient",{id:`linearGradient-2-${e.id}`,x1:"0%",y1:"9.5%",x2:"100%",y2:"90.5%"},Ese,8,Tse),Z("rect",{id:`path-3-${e.id}`,x:"0",y:"0",width:"17",height:"36"},null,8,Mse)]),Z("g",Ise,[Z("g",Nse,[Z("g",Ase,[Dse,Rse,Z("g",Lse,[Fse,Bse,Z("rect",{id:"Rectangle-Copy-12",fill:`url(#linearGradient-1-${e.id})`,transform:"translate(46.500000, 25.000000) scale(-1, 1) translate(-46.500000, -25.000000) ",x:"38",y:"7",width:"17",height:"36"},null,8,Vse),zse]),Z("rect",{id:"Rectangle-Copy-15",fill:`url(#linearGradient-2-${e.id})`,x:"13",y:"45",width:"40",height:"36"},null,8,Hse),Z("g",jse,[Z("mask",{id:`mask-4-${e.id}`,fill:"var(--el-empty-fill-color-0)"},[Z("use",{"xlink:href":`#path-3-${e.id}`},null,8,Wse)],8,Kse),Z("use",{id:"Mask",fill:"var(--el-empty-fill-color-8)",transform:"translate(8.500000, 18.000000) scale(-1, 1) translate(-8.500000, -18.000000) ","xlink:href":`#path-3-${e.id}`},null,8,Use),Z("polygon",{id:"Rectangle-Copy",fill:"var(--el-empty-fill-color-9)",mask:`url(#mask-4-${e.id})`,transform:"translate(12.000000, 9.000000) scale(-1, 1) translate(-12.000000, -9.000000) ",points:"7 0 24 0 20 18 -1.70530257e-13 16"},null,8,Yse)]),qse])])])])}var Xse=Ae(wse,[["render",Gse],["__file","/home/runner/work/element-plus/element-plus/packages/components/empty/src/img-empty.vue"]]);const Zse={image:{type:String,default:""},imageSize:Number,description:{type:String,default:""}},Jse=["src"],Qse={key:1},eue={name:"ElEmpty"},tue=G(Ke(Te({},eue),{props:Zse,setup(e){const t=e,{t:n}=ln(),r=De("empty"),a=x(()=>t.description||n("el.table.emptyText")),o=x(()=>({width:t.imageSize?`${t.imageSize}px`:""}));return(i,l)=>(R(),X("div",{class:U(A(r).b())},[Z("div",{class:U(A(r).e("image")),style:Xe(A(o))},[i.image?(R(),X("img",{key:0,src:i.image,ondragstart:"return false"},null,8,Jse)):Oe(i.$slots,"image",{key:1},()=>[g(Xse)])],6),Z("div",{class:U(A(r).e("description"))},[i.$slots.description?Oe(i.$slots,"description",{key:0}):(R(),X("p",Qse,Me(A(a)),1))],2),i.$slots.default?(R(),X("div",{key:0,class:U(A(r).e("bottom"))},[Oe(i.$slots,"default")],2)):se("v-if",!0)],2))}}));var nue=Ae(tue,[["__file","/home/runner/work/element-plus/element-plus/packages/components/empty/src/empty.vue"]]);const rue=xt(nue),aue=Ze({model:Object,rules:{type:Le(Object)},labelPosition:String,labelWidth:{type:[String,Number],default:""},labelSuffix:{type:String,default:""},inline:Boolean,inlineMessage:Boolean,statusIcon:Boolean,showMessage:{type:Boolean,default:!0},size:{type:String,values:Bo},disabled:Boolean,validateOnRuleChange:{type:Boolean,default:!0},hideRequiredAsterisk:{type:Boolean,default:!1},scrollToError:Boolean}),oue={validate:(e,t,n)=>(pt(e)||wt(e))&&yr(t)&&wt(n)};function iue(){const e=H([]),t=x(()=>{if(!e.value.length)return"0";const o=Math.max(...e.value);return o?`${o}px`:""});function n(o){return e.value.indexOf(o)}function r(o,i){if(o&&i){const l=n(i);e.value.splice(l,1,o)}else o&&e.value.push(o)}function a(o){const i=n(o);i>-1&&e.value.splice(i,1)}return{autoLabelWidth:t,registerLabelWidth:r,deregisterLabelWidth:a}}const ch=(e,t)=>{const n=Hd(t);return n.length>0?e.filter(r=>r.prop&&n.includes(r.prop)):e},lue={name:"ElForm"},sue=G(Ke(Te({},lue),{props:aue,emits:oue,setup(e,{expose:t,emit:n}){const r=e,a=[],o=Gn(),i=De("form"),l=x(()=>{const{labelPosition:S,inline:w}=r;return[i.b(),i.m(o.value||"default"),{[i.m(`label-${S}`)]:S,[i.m("inline")]:w}]}),s=S=>{a.push(S)},c=S=>{S.prop&&a.splice(a.indexOf(S),1)},d=(S=[])=>{!r.model||ch(a,S).forEach(w=>w.resetField())},f=(S=[])=>{ch(a,S).forEach(w=>w.clearValidate())},p=x(()=>!!r.model),v=S=>{if(a.length===0)return[];const w=ch(a,S);return w.length?w:[]},m=async S=>b(void 0,S),y=async(S=[])=>{if(!p.value)return!1;const w=v(S);if(w.length===0)return!0;let k={};for(const $ of w)try{await $.validate("")}catch(O){k=Te(Te({},k),O)}return Object.keys(k).length===0?!0:Promise.reject(k)},b=async(S=[],w)=>{const k=!Ct(w);try{const $=await y(S);return $===!0&&(w==null||w($)),$}catch($){const O=$;return r.scrollToError&&C(Object.keys(O)[0]),w==null||w(!1,O),k&&Promise.reject(O)}},C=S=>{var w;const k=ch(a,S)[0];k&&((w=k.$el)==null||w.scrollIntoView())};return ce(()=>r.rules,()=>{r.validateOnRuleChange&&m()},{deep:!0}),ot(ga,bt(Te(Ke(Te({},or(r)),{emit:n,resetFields:d,clearValidate:f,validateField:b,addField:s,removeField:c}),iue()))),t({validate:m,validateField:b,resetFields:d,clearValidate:f,scrollToField:C}),(S,w)=>(R(),X("form",{class:U(A(l))},[Oe(S.$slots,"default")],2))}}));var uue=Ae(sue,[["__file","/home/runner/work/element-plus/element-plus/packages/components/form/src/form.vue"]]);function ql(){return ql=Object.assign||function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e},ql.apply(this,arguments)}function cue(e,t){e.prototype=Object.create(t.prototype),e.prototype.constructor=e,Xd(e,t)}function N0(e){return N0=Object.setPrototypeOf?Object.getPrototypeOf:function(n){return n.__proto__||Object.getPrototypeOf(n)},N0(e)}function Xd(e,t){return Xd=Object.setPrototypeOf||function(r,a){return r.__proto__=a,r},Xd(e,t)}function due(){if(typeof Reflect=="undefined"||!Reflect.construct||Reflect.construct.sham)return!1;if(typeof Proxy=="function")return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch{return!1}}function kp(e,t,n){return due()?kp=Reflect.construct:kp=function(a,o,i){var l=[null];l.push.apply(l,o);var s=Function.bind.apply(a,l),c=new s;return i&&Xd(c,i.prototype),c},kp.apply(null,arguments)}function fue(e){return Function.toString.call(e).indexOf("[native code]")!==-1}function A0(e){var t=typeof Map=="function"?new Map:void 0;return A0=function(r){if(r===null||!fue(r))return r;if(typeof r!="function")throw new TypeError("Super expression must either be null or a function");if(typeof t!="undefined"){if(t.has(r))return t.get(r);t.set(r,a)}function a(){return kp(r,arguments,N0(this).constructor)}return a.prototype=Object.create(r.prototype,{constructor:{value:a,enumerable:!1,writable:!0,configurable:!0}}),Xd(a,r)},A0(e)}var hue=/%[sdj%]/g,pue=function(){};typeof process!="undefined"&&process.env;function D0(e){if(!e||!e.length)return null;var t={};return e.forEach(function(n){var r=n.field;t[r]=t[r]||[],t[r].push(n)}),t}function ca(e){for(var t=arguments.length,n=new Array(t>1?t-1:0),r=1;r<t;r++)n[r-1]=arguments[r];var a=0,o=n.length;if(typeof e=="function")return e.apply(null,n);if(typeof e=="string"){var i=e.replace(hue,function(l){if(l==="%%")return"%";if(a>=o)return l;switch(l){case"%s":return String(n[a++]);case"%d":return Number(n[a++]);case"%j":try{return JSON.stringify(n[a++])}catch{return"[Circular]"}break;default:return l}});return i}return e}function vue(e){return e==="string"||e==="url"||e==="hex"||e==="email"||e==="date"||e==="pattern"}function lr(e,t){return!!(e==null||t==="array"&&Array.isArray(e)&&!e.length||vue(t)&&typeof e=="string"&&!e)}function mue(e,t,n){var r=[],a=0,o=e.length;function i(l){r.push.apply(r,l||[]),a++,a===o&&n(r)}e.forEach(function(l){t(l,i)})}function J4(e,t,n){var r=0,a=e.length;function o(i){if(i&&i.length){n(i);return}var l=r;r=r+1,l<a?t(e[l],o):n([])}o([])}function gue(e){var t=[];return Object.keys(e).forEach(function(n){t.push.apply(t,e[n]||[])}),t}var Q4=function(e){cue(t,e);function t(n,r){var a;return a=e.call(this,"Async Validation Error")||this,a.errors=n,a.fields=r,a}return t}(A0(Error));function yue(e,t,n,r,a){if(t.first){var o=new Promise(function(p,v){var m=function(C){return r(C),C.length?v(new Q4(C,D0(C))):p(a)},y=gue(e);J4(y,n,m)});return o.catch(function(p){return p}),o}var i=t.firstFields===!0?Object.keys(e):t.firstFields||[],l=Object.keys(e),s=l.length,c=0,d=[],f=new Promise(function(p,v){var m=function(b){if(d.push.apply(d,b),c++,c===s)return r(d),d.length?v(new Q4(d,D0(d))):p(a)};l.length||(r(d),p(a)),l.forEach(function(y){var b=e[y];i.indexOf(y)!==-1?J4(b,n,m):mue(b,n,m)})});return f.catch(function(p){return p}),f}function bue(e){return!!(e&&e.message!==void 0)}function Cue(e,t){for(var n=e,r=0;r<t.length;r++){if(n==null)return n;n=n[t[r]]}return n}function eO(e,t){return function(n){var r;return e.fullFields?r=Cue(t,e.fullFields):r=t[n.field||e.fullField],bue(n)?(n.field=n.field||e.fullField,n.fieldValue=r,n):{message:typeof n=="function"?n():n,fieldValue:r,field:n.field||e.fullField}}}function tO(e,t){if(t){for(var n in t)if(t.hasOwnProperty(n)){var r=t[n];typeof r=="object"&&typeof e[n]=="object"?e[n]=ql({},e[n],r):e[n]=r}}return e}var P5=function(t,n,r,a,o,i){t.required&&(!r.hasOwnProperty(t.field)||lr(n,i||t.type))&&a.push(ca(o.messages.required,t.fullField))},wue=function(t,n,r,a,o){(/^\s+$/.test(n)||n==="")&&a.push(ca(o.messages.whitespace,t.fullField))},gy={email:/^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+\.)+[a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]{2,}))$/,url:new RegExp("^(?!mailto:)(?:(?:http|https|ftp)://|//)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-*)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?$","i"),hex:/^#?([a-f0-9]{6}|[a-f0-9]{3})$/i},nd={integer:function(t){return nd.number(t)&&parseInt(t,10)===t},float:function(t){return nd.number(t)&&!nd.integer(t)},array:function(t){return Array.isArray(t)},regexp:function(t){if(t instanceof RegExp)return!0;try{return!!new RegExp(t)}catch{return!1}},date:function(t){return typeof t.getTime=="function"&&typeof t.getMonth=="function"&&typeof t.getYear=="function"&&!isNaN(t.getTime())},number:function(t){return isNaN(t)?!1:typeof t=="number"},object:function(t){return typeof t=="object"&&!nd.array(t)},method:function(t){return typeof t=="function"},email:function(t){return typeof t=="string"&&t.length<=320&&!!t.match(gy.email)},url:function(t){return typeof t=="string"&&t.length<=2048&&!!t.match(gy.url)},hex:function(t){return typeof t=="string"&&!!t.match(gy.hex)}},Sue=function(t,n,r,a,o){if(t.required&&n===void 0){P5(t,n,r,a,o);return}var i=["integer","float","array","regexp","object","method","email","number","date","url","hex"],l=t.type;i.indexOf(l)>-1?nd[l](n)||a.push(ca(o.messages.types[l],t.fullField,t.type)):l&&typeof n!==t.type&&a.push(ca(o.messages.types[l],t.fullField,t.type))},kue=function(t,n,r,a,o){var i=typeof t.len=="number",l=typeof t.min=="number",s=typeof t.max=="number",c=/[\uD800-\uDBFF][\uDC00-\uDFFF]/g,d=n,f=null,p=typeof n=="number",v=typeof n=="string",m=Array.isArray(n);if(p?f="number":v?f="string":m&&(f="array"),!f)return!1;m&&(d=n.length),v&&(d=n.replace(c,"_").length),i?d!==t.len&&a.push(ca(o.messages[f].len,t.fullField,t.len)):l&&!s&&d<t.min?a.push(ca(o.messages[f].min,t.fullField,t.min)):s&&!l&&d>t.max?a.push(ca(o.messages[f].max,t.fullField,t.max)):l&&s&&(d<t.min||d>t.max)&&a.push(ca(o.messages[f].range,t.fullField,t.min,t.max))},Bs="enum",$ue=function(t,n,r,a,o){t[Bs]=Array.isArray(t[Bs])?t[Bs]:[],t[Bs].indexOf(n)===-1&&a.push(ca(o.messages[Bs],t.fullField,t[Bs].join(", ")))},Oue=function(t,n,r,a,o){if(t.pattern){if(t.pattern instanceof RegExp)t.pattern.lastIndex=0,t.pattern.test(n)||a.push(ca(o.messages.pattern.mismatch,t.fullField,n,t.pattern));else if(typeof t.pattern=="string"){var i=new RegExp(t.pattern);i.test(n)||a.push(ca(o.messages.pattern.mismatch,t.fullField,n,t.pattern))}}},tn={required:P5,whitespace:wue,type:Sue,range:kue,enum:$ue,pattern:Oue},Pue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n,"string")&&!t.required)return r();tn.required(t,n,a,i,o,"string"),lr(n,"string")||(tn.type(t,n,a,i,o),tn.range(t,n,a,i,o),tn.pattern(t,n,a,i,o),t.whitespace===!0&&tn.whitespace(t,n,a,i,o))}r(i)},Tue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n)&&!t.required)return r();tn.required(t,n,a,i,o),n!==void 0&&tn.type(t,n,a,i,o)}r(i)},xue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(n===""&&(n=void 0),lr(n)&&!t.required)return r();tn.required(t,n,a,i,o),n!==void 0&&(tn.type(t,n,a,i,o),tn.range(t,n,a,i,o))}r(i)},_ue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n)&&!t.required)return r();tn.required(t,n,a,i,o),n!==void 0&&tn.type(t,n,a,i,o)}r(i)},Eue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n)&&!t.required)return r();tn.required(t,n,a,i,o),lr(n)||tn.type(t,n,a,i,o)}r(i)},Mue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n)&&!t.required)return r();tn.required(t,n,a,i,o),n!==void 0&&(tn.type(t,n,a,i,o),tn.range(t,n,a,i,o))}r(i)},Iue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n)&&!t.required)return r();tn.required(t,n,a,i,o),n!==void 0&&(tn.type(t,n,a,i,o),tn.range(t,n,a,i,o))}r(i)},Nue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(n==null&&!t.required)return r();tn.required(t,n,a,i,o,"array"),n!=null&&(tn.type(t,n,a,i,o),tn.range(t,n,a,i,o))}r(i)},Aue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n)&&!t.required)return r();tn.required(t,n,a,i,o),n!==void 0&&tn.type(t,n,a,i,o)}r(i)},Due="enum",Rue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n)&&!t.required)return r();tn.required(t,n,a,i,o),n!==void 0&&tn[Due](t,n,a,i,o)}r(i)},Lue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n,"string")&&!t.required)return r();tn.required(t,n,a,i,o),lr(n,"string")||tn.pattern(t,n,a,i,o)}r(i)},Fue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n,"date")&&!t.required)return r();if(tn.required(t,n,a,i,o),!lr(n,"date")){var s;n instanceof Date?s=n:s=new Date(n),tn.type(t,s,a,i,o),s&&tn.range(t,s.getTime(),a,i,o)}}r(i)},Bue=function(t,n,r,a,o){var i=[],l=Array.isArray(n)?"array":typeof n;tn.required(t,n,a,i,o,l),r(i)},yy=function(t,n,r,a,o){var i=t.type,l=[],s=t.required||!t.required&&a.hasOwnProperty(t.field);if(s){if(lr(n,i)&&!t.required)return r();tn.required(t,n,a,l,o,i),lr(n,i)||tn.type(t,n,a,l,o)}r(l)},Vue=function(t,n,r,a,o){var i=[],l=t.required||!t.required&&a.hasOwnProperty(t.field);if(l){if(lr(n)&&!t.required)return r();tn.required(t,n,a,i,o)}r(i)},bd={string:Pue,method:Tue,number:xue,boolean:_ue,regexp:Eue,integer:Mue,float:Iue,array:Nue,object:Aue,enum:Rue,pattern:Lue,date:Fue,url:yy,hex:yy,email:yy,required:Bue,any:Vue};function R0(){return{default:"Validation error on field %s",required:"%s is required",enum:"%s must be one of %s",whitespace:"%s cannot be empty",date:{format:"%s date %s is invalid for format %s",parse:"%s date could not be parsed, %s is invalid ",invalid:"%s date %s is invalid"},types:{string:"%s is not a %s",method:"%s is not a %s (function)",array:"%s is not an %s",object:"%s is not an %s",number:"%s is not a %s",date:"%s is not a %s",boolean:"%s is not a %s",integer:"%s is not an %s",float:"%s is not a %s",regexp:"%s is not a valid %s",email:"%s is not a valid %s",url:"%s is not a valid %s",hex:"%s is not a valid %s"},string:{len:"%s must be exactly %s characters",min:"%s must be at least %s characters",max:"%s cannot be longer than %s characters",range:"%s must be between %s and %s characters"},number:{len:"%s must equal %s",min:"%s cannot be less than %s",max:"%s cannot be greater than %s",range:"%s must be between %s and %s"},array:{len:"%s must be exactly %s in length",min:"%s cannot be less than %s in length",max:"%s cannot be greater than %s in length",range:"%s must be between %s and %s in length"},pattern:{mismatch:"%s value %s does not match pattern %s"},clone:function(){var t=JSON.parse(JSON.stringify(this));return t.clone=this.clone,t}}}var L0=R0(),Mf=function(){function e(n){this.rules=null,this._messages=L0,this.define(n)}var t=e.prototype;return t.define=function(r){var a=this;if(!r)throw new Error("Cannot configure a schema with no rules");if(typeof r!="object"||Array.isArray(r))throw new Error("Rules must be an object");this.rules={},Object.keys(r).forEach(function(o){var i=r[o];a.rules[o]=Array.isArray(i)?i:[i]})},t.messages=function(r){return r&&(this._messages=tO(R0(),r)),this._messages},t.validate=function(r,a,o){var i=this;a===void 0&&(a={}),o===void 0&&(o=function(){});var l=r,s=a,c=o;if(typeof s=="function"&&(c=s,s={}),!this.rules||Object.keys(this.rules).length===0)return c&&c(null,l),Promise.resolve(l);function d(y){var b=[],C={};function S(k){if(Array.isArray(k)){var $;b=($=b).concat.apply($,k)}else b.push(k)}for(var w=0;w<y.length;w++)S(y[w]);b.length?(C=D0(b),c(b,C)):c(null,l)}if(s.messages){var f=this.messages();f===L0&&(f=R0()),tO(f,s.messages),s.messages=f}else s.messages=this.messages();var p={},v=s.keys||Object.keys(this.rules);v.forEach(function(y){var b=i.rules[y],C=l[y];b.forEach(function(S){var w=S;typeof w.transform=="function"&&(l===r&&(l=ql({},l)),C=l[y]=w.transform(C)),typeof w=="function"?w={validator:w}:w=ql({},w),w.validator=i.getValidationMethod(w),w.validator&&(w.field=y,w.fullField=w.fullField||y,w.type=i.getType(w),p[y]=p[y]||[],p[y].push({rule:w,value:C,source:l,field:y}))})});var m={};return yue(p,s,function(y,b){var C=y.rule,S=(C.type==="object"||C.type==="array")&&(typeof C.fields=="object"||typeof C.defaultField=="object");S=S&&(C.required||!C.required&&y.value),C.field=y.field;function w(O,T){return ql({},T,{fullField:C.fullField+"."+O,fullFields:C.fullFields?[].concat(C.fullFields,[O]):[O]})}function k(O){O===void 0&&(O=[]);var T=Array.isArray(O)?O:[O];!s.suppressWarning&&T.length&&e.warning("async-validator:",T),T.length&&C.message!==void 0&&(T=[].concat(C.message));var _=T.map(eO(C,l));if(s.first&&_.length)return m[C.field]=1,b(_);if(!S)b(_);else{if(C.required&&!y.value)return C.message!==void 0?_=[].concat(C.message).map(eO(C,l)):s.error&&(_=[s.error(C,ca(s.messages.required,C.field))]),b(_);var I={};C.defaultField&&Object.keys(y.value).map(function(F){I[F]=C.defaultField}),I=ql({},I,y.rule.fields);var L={};Object.keys(I).forEach(function(F){var N=I[F],D=Array.isArray(N)?N:[N];L[F]=D.map(w.bind(null,F))});var j=new e(L);j.messages(s.messages),y.rule.options&&(y.rule.options.messages=s.messages,y.rule.options.error=s.error),j.validate(y.value,y.rule.options||s,function(F){var N=[];_&&_.length&&N.push.apply(N,_),F&&F.length&&N.push.apply(N,F),b(N.length?N:null)})}}var $;C.asyncValidator?$=C.asyncValidator(C,y.value,k,y.source,s):C.validator&&($=C.validator(C,y.value,k,y.source,s),$===!0?k():$===!1?k(typeof C.message=="function"?C.message(C.fullField||C.field):C.message||(C.fullField||C.field)+" fails"):$ instanceof Array?k($):$ instanceof Error&&k($.message)),$&&$.then&&$.then(function(){return k()},function(O){return k(O)})},function(y){d(y)},l)},t.getType=function(r){if(r.type===void 0&&r.pattern instanceof RegExp&&(r.type="pattern"),typeof r.validator!="function"&&r.type&&!bd.hasOwnProperty(r.type))throw new Error(ca("Unknown rule type %s",r.type));return r.type||"string"},t.getValidationMethod=function(r){if(typeof r.validator=="function")return r.validator;var a=Object.keys(r),o=a.indexOf("message");return o!==-1&&a.splice(o,1),a.length===1&&a[0]==="required"?bd.required:bd[this.getType(r)]||void 0},e}();Mf.register=function(t,n){if(typeof n!="function")throw new Error("Cannot register a validator by type, validator is not a function");bd[t]=n};Mf.warning=pue;Mf.messages=L0;Mf.validators=bd;const zue=["","error","validating","success"],Hue=Ze({label:String,labelWidth:{type:[String,Number],default:""},prop:{type:Le([String,Array])},required:{type:Boolean,default:void 0},rules:{type:Le([Object,Array])},error:String,validateStatus:{type:String,values:zue},for:String,inlineMessage:{type:[String,Boolean],default:""},showMessage:{type:Boolean,default:!0},size:{type:String,values:Bo}}),nO="ElLabelWrap";var jue=G({name:nO,props:{isAutoWidth:Boolean,updateAll:Boolean},setup(e,{slots:t}){const n=ve(ga),r=ve(Ia);(!n||!r)&&qn(nO,"usage: <el-form><el-form-item><label-wrap /></el-form-item></el-form>");const a=De("form"),o=H(),i=H(0),l=()=>{var d;if((d=o.value)!=null&&d.firstElementChild){const f=window.getComputedStyle(o.value.firstElementChild).width;return Math.ceil(Number.parseFloat(f))}else return 0},s=(d="update")=>{Ne(()=>{t.default&&e.isAutoWidth&&(d==="update"?i.value=l():d==="remove"&&n.deregisterLabelWidth(i.value))})},c=()=>s("update");return et(()=>{c()}),Lt(()=>{s("remove")}),ur(()=>c()),ce(i,(d,f)=>{e.updateAll&&n.registerLabelWidth(d,f)}),Cc(x(()=>{var d,f;return(f=(d=o.value)==null?void 0:d.firstElementChild)!=null?f:null}),c),()=>{var d,f;if(!t)return null;const{isAutoWidth:p}=e;if(p){const v=n.autoLabelWidth,m={};if(v&&v!=="auto"){const y=Math.max(0,Number.parseInt(v,10)-i.value),b=n.labelPosition==="left"?"marginRight":"marginLeft";y&&(m[b]=`${y}px`)}return g("div",{ref:o,class:[a.be("item","label-wrap")],style:m},[(d=t.default)==null?void 0:d.call(t)])}else return g(Fe,{ref:o},[(f=t.default)==null?void 0:f.call(t)])}}});const Kue=["for"],Wue={name:"ElFormItem"},Uue=G(Ke(Te({},Wue),{props:Hue,setup(e,{expose:t}){const n=e,r="ElFormItem",a=wf(),o=ve(ga);o||qn(r,"usage: <el-form><el-form-item /></el-form>");const i=ve(Ia,void 0),l=Gn(void 0,{formItem:!1}),s=De("form-item"),c=H(""),d=gU(c,100),f=H(""),p=H();let v,m=!1;const y=x(()=>{if(o.labelPosition==="top")return{};const q=oo(n.labelWidth||o.labelWidth||"");return q?{width:q}:{}}),b=x(()=>{if(o.labelPosition==="top"||o.inline)return{};if(!n.label&&!n.labelWidth&&O)return{};const q=oo(n.labelWidth||o.labelWidth||"");return!n.label&&!a.label?{marginLeft:q}:{}}),C=x(()=>[s.b(),s.m(l.value),s.is("error",c.value==="error"),s.is("validating",c.value==="validating"),s.is("success",c.value==="success"),s.is("required",j.value||n.required),s.is("no-asterisk",o.hideRequiredAsterisk),{[s.m("feedback")]:o.statusIcon}]),S=x(()=>yr(n.inlineMessage)?n.inlineMessage:o.inlineMessage||!1),w=x(()=>[s.e("error"),{[s.em("error","inline")]:S.value}]),k=x(()=>n.prop?wt(n.prop)?n.prop:n.prop.join("."):""),$=x(()=>n.for||k.value),O=!!i,T=x(()=>{const q=o.model;if(!(!q||!n.prop))return cp(q,n.prop).value}),_=x(()=>{const q=n.rules?Hd(n.rules):[],J=o.rules;if(J&&n.prop){const ne=cp(J,n.prop).value;ne&&q.push(...Hd(ne))}return n.required!==void 0&&q.push({required:!!n.required}),q}),I=x(()=>_.value.length>0),L=q=>_.value.filter(ne=>!ne.trigger||!q?!0:Array.isArray(ne.trigger)?ne.trigger.includes(q):ne.trigger===q).map(Q=>{var ae=Q,{trigger:ne}=ae,oe=$k(ae,["trigger"]);return oe}),j=x(()=>_.value.some(q=>q.required===!0)),F=x(()=>d.value==="error"&&n.showMessage&&o.showMessage),N=x(()=>`${n.label||""}${o.labelSuffix||""}`),D=q=>{c.value=q},z=q=>{var J,ne;const{errors:oe,fields:Q}=q;(!oe||!Q)&&console.error(q),D("error"),f.value=oe?(ne=(J=oe==null?void 0:oe[0])==null?void 0:J.message)!=null?ne:`${n.prop} is required`:"",o.emit("validate",n.prop,!1,f.value)},B=()=>{D("success"),o.emit("validate",n.prop,!0,"")},M=async q=>{const J=k.value;return new Mf({[J]:q}).validate({[J]:T.value},{firstFields:!0}).then(()=>(B(),!0)).catch(oe=>(z(oe),Promise.reject(oe)))},E=async(q,J)=>{if(m)return m=!1,!1;const ne=Ct(J);if(!I.value)return J==null||J(!1),!1;const oe=L(q);return oe.length===0?(J==null||J(!0),!0):(D("validating"),M(oe).then(()=>(J==null||J(!0),!0)).catch(Q=>{const{fields:ae}=Q;return J==null||J(!1,ae),ne?!1:Promise.reject(ae)}))},K=()=>{D(""),f.value=""},W=async()=>{const q=o.model;if(!q||!n.prop)return;const J=cp(q,n.prop);er(J.value,v)||(m=!0),J.value=v,await Ne(),K()};ce(()=>n.error,q=>{f.value=q||"",D(q?"error":"")},{immediate:!0}),ce(()=>n.validateStatus,q=>D(q||""));const Y=bt(Ke(Te({},or(n)),{$el:p,size:l,validateState:c,resetField:W,clearValidate:K,validate:E}));return ot(Ia,Y),et(()=>{n.prop&&(o.addField(Y),v=Xj(T.value))}),Lt(()=>{o.removeField(Y)}),t({size:l,validateMessage:f,validateState:c,validate:E,clearValidate:K,resetField:W}),(q,J)=>(R(),X("div",{ref_key:"formItemRef",ref:p,class:U(A(C))},[g(A(jue),{"is-auto-width":A(y).width==="auto","update-all":A(o).labelWidth==="auto"},{default:re(()=>[q.label||q.$slots.label?(R(),X("label",{key:0,for:A($),class:U(A(s).e("label")),style:Xe(A(y))},[Oe(q.$slots,"label",{label:A(N)},()=>[yt(Me(A(N)),1)])],14,Kue)):se("v-if",!0)]),_:3},8,["is-auto-width","update-all"]),Z("div",{class:U(A(s).e("content")),style:Xe(A(b))},[Oe(q.$slots,"default"),g(Vn,{name:`${A(s).namespace.value}-zoom-in-top`},{default:re(()=>[A(F)?Oe(q.$slots,"error",{key:0,error:f.value},()=>[Z("div",{class:U(A(w))},Me(f.value),3)]):se("v-if",!0)]),_:3},8,["name"])],6)],2))}}));var T5=Ae(Uue,[["__file","/home/runner/work/element-plus/element-plus/packages/components/form/src/form-item.vue"]]);const Yue=xt(uue,{FormItem:T5}),que=En(T5),Gue=Ze({urlList:{type:Le(Array),default:()=>xn([])},zIndex:{type:Number},initialIndex:{type:Number,default:0},infinite:{type:Boolean,default:!0},hideOnClickModal:{type:Boolean,default:!1},teleported:{type:Boolean,default:!1}}),Xue={close:()=>!0,switch:e=>typeof e=="number"},Zue=["src"],Jue={name:"ElImageViewer"},Que=G(Ke(Te({},Jue),{props:Gue,emits:Xue,setup(e,{emit:t}){const n=e,r={CONTAIN:{name:"contain",icon:ps(Lq)},ORIGINAL:{name:"original",icon:ps(QG)}},a=BC()?"DOMMouseScroll":"mousewheel",{t:o}=ln(),i=De("image-viewer"),{nextZIndex:l}=Pi(),s=H(),c=H([]),d=N9(),f=H(!0),p=H(n.initialIndex),v=H(r.CONTAIN),m=H({scale:1,deg:0,offsetX:0,offsetY:0,enableTransition:!1}),y=x(()=>{const{urlList:B}=n;return B.length<=1}),b=x(()=>p.value===0),C=x(()=>p.value===n.urlList.length-1),S=x(()=>n.urlList[p.value]),w=x(()=>{const{scale:B,deg:M,offsetX:E,offsetY:K,enableTransition:W}=m.value;let Y=E/B,q=K/B;switch(M%360){case 90:case-270:[Y,q]=[q,-Y];break;case 180:case-180:[Y,q]=[-Y,-q];break;case 270:case-90:[Y,q]=[-q,Y];break}const J={transform:`scale(${B}) rotate(${M}deg) translate(${Y}px, ${q}px)`,transition:W?"transform .3s":""};return v.value.name===r.CONTAIN.name&&(J.maxWidth=J.maxHeight="100%"),J}),k=x(()=>Yt(n.zIndex)?n.zIndex:l());function $(){T(),t("close")}function O(){const B=Qi(E=>{switch(E.code){case Ge.esc:$();break;case Ge.space:F();break;case Ge.left:N();break;case Ge.up:z("zoomIn");break;case Ge.right:D();break;case Ge.down:z("zoomOut");break}}),M=Qi(E=>{(E.wheelDelta?E.wheelDelta:-E.detail)>0?z("zoomIn",{zoomRate:1.2,enableTransition:!1}):z("zoomOut",{zoomRate:1.2,enableTransition:!1})});d.run(()=>{Hn(document,"keydown",B),Hn(document,a,M)})}function T(){d.stop()}function _(){f.value=!1}function I(B){f.value=!1,B.target.alt=o("el.image.error")}function L(B){if(f.value||B.button!==0||!s.value)return;m.value.enableTransition=!1;const{offsetX:M,offsetY:E}=m.value,K=B.pageX,W=B.pageY,Y=Qi(J=>{m.value=Ke(Te({},m.value),{offsetX:M+J.pageX-K,offsetY:E+J.pageY-W})}),q=Hn(document,"mousemove",Y);Hn(document,"mouseup",()=>{q()}),B.preventDefault()}function j(){m.value={scale:1,deg:0,offsetX:0,offsetY:0,enableTransition:!1}}function F(){if(f.value)return;const B=Object.keys(r),M=Object.values(r),E=v.value.name,W=(M.findIndex(Y=>Y.name===E)+1)%B.length;v.value=r[B[W]],j()}function N(){if(b.value&&!n.infinite)return;const B=n.urlList.length;p.value=(p.value-1+B)%B}function D(){if(C.value&&!n.infinite)return;const B=n.urlList.length;p.value=(p.value+1)%B}function z(B,M={}){if(f.value)return;const{zoomRate:E,rotateDeg:K,enableTransition:W}=Te({zoomRate:1.4,rotateDeg:90,enableTransition:!0},M);switch(B){case"zoomOut":m.value.scale>.2&&(m.value.scale=Number.parseFloat((m.value.scale/E).toFixed(3)));break;case"zoomIn":m.value.scale<7&&(m.value.scale=Number.parseFloat((m.value.scale*E).toFixed(3)));break;case"clockwise":m.value.deg+=K;break;case"anticlockwise":m.value.deg-=K;break}m.value.enableTransition=W}return ce(S,()=>{Ne(()=>{const B=c.value[0];B!=null&&B.complete||(f.value=!0)})}),ce(p,B=>{j(),t("switch",B)}),et(()=>{var B,M;O(),(M=(B=s.value)==null?void 0:B.focus)==null||M.call(B)}),(B,M)=>(R(),fe(Ps,{to:"body",disabled:!B.teleported},[g(Vn,{name:"viewer-fade",appear:""},{default:re(()=>[Z("div",{ref_key:"wrapper",ref:s,tabindex:-1,class:U(A(i).e("wrapper")),style:Xe({zIndex:A(k)})},[Z("div",{class:U(A(i).e("mask")),onClick:M[0]||(M[0]=dt(E=>B.hideOnClickModal&&$(),["self"]))},null,2),se(" CLOSE "),Z("span",{class:U([A(i).e("btn"),A(i).e("close")]),onClick:$},[g(A(ft),null,{default:re(()=>[g(A(Ma))]),_:1})],2),se(" ARROW "),A(y)?se("v-if",!0):(R(),X(Fe,{key:0},[Z("span",{class:U([A(i).e("btn"),A(i).e("prev"),A(i).is("disabled",!B.infinite&&A(b))]),onClick:N},[g(A(ft),null,{default:re(()=>[g(A(Es))]),_:1})],2),Z("span",{class:U([A(i).e("btn"),A(i).e("next"),A(i).is("disabled",!B.infinite&&A(C))]),onClick:D},[g(A(ft),null,{default:re(()=>[g(A(Da))]),_:1})],2)],64)),se(" ACTIONS "),Z("div",{class:U([A(i).e("btn"),A(i).e("actions")])},[Z("div",{class:U(A(i).e("actions__inner"))},[g(A(ft),{onClick:M[1]||(M[1]=E=>z("zoomOut"))},{default:re(()=>[g(A(jX))]),_:1}),g(A(ft),{onClick:M[2]||(M[2]=E=>z("zoomIn"))},{default:re(()=>[g(A(fE))]),_:1}),Z("i",{class:U(A(i).e("actions__divider"))},null,2),g(A(ft),{onClick:F},{default:re(()=>[(R(),fe(Kt(v.value.icon)))]),_:1}),Z("i",{class:U(A(i).e("actions__divider"))},null,2),g(A(ft),{onClick:M[3]||(M[3]=E=>z("anticlockwise"))},{default:re(()=>[g(A(zG))]),_:1}),g(A(ft),{onClick:M[4]||(M[4]=E=>z("clockwise"))},{default:re(()=>[g(A(YG))]),_:1})],2)],2),se(" CANVAS "),Z("div",{class:U(A(i).e("canvas"))},[(R(!0),X(Fe,null,Rt(B.urlList,(E,K)=>at((R(),X("img",{ref_for:!0,ref:W=>c.value[K]=W,key:E,src:E,style:Xe(A(w)),class:U(A(i).e("img")),onLoad:_,onError:I,onMousedown:L},null,46,Zue)),[[_t,K===p.value]])),128))],2),Oe(B.$slots,"default")],6)]),_:3})],8,["disabled"]))}}));var ece=Ae(Que,[["__file","/home/runner/work/element-plus/element-plus/packages/components/image-viewer/src/image-viewer.vue"]]);const x5=xt(ece),tce=Ze({appendToBody:{type:Boolean,default:void 0},hideOnClickModal:{type:Boolean,default:!1},src:{type:String,default:""},fit:{type:String,values:["","contain","cover","fill","none","scale-down"],default:""},lazy:{type:Boolean,default:!1},scrollContainer:{type:Le([String,Object])},previewSrcList:{type:Le(Array),default:()=>xn([])},previewTeleported:{type:Boolean,default:!1},zIndex:{type:Number},initialIndex:{type:Number,default:0},infinite:{type:Boolean,default:!0}}),nce={error:e=>e instanceof Event,switch:e=>Yt(e),close:()=>!0},rce=["src"],ace={key:0},oce={name:"ElImage"},ice=G(Ke(Te({},oce),{props:tce,emits:nce,setup(e,{emit:t}){const n=e;let r="";Tf({scope:"el-image",from:"append-to-body",replacement:"preview-teleported",version:"2.2.0",ref:"https://element-plus.org/en-US/component/image.html#image-attributess"},x(()=>yr(n.appendToBody)));const{t:a}=ln(),o=De("image"),i=VC(),l=H(!1),s=H(!0),c=H(0),d=H(0),f=H(!1),p=H(),v=H();let m,y;const b=x(()=>i.value.style),C=x(()=>{const{fit:B}=n;return Bt&&B?{objectFit:B}:{}}),S=x(()=>{const{previewSrcList:B}=n;return Array.isArray(B)&&B.length>0}),w=x(()=>n.appendToBody||n.previewTeleported),k=x(()=>{const{previewSrcList:B,initialIndex:M}=n;let E=M;return M>B.length-1&&(E=0),E}),$=()=>{if(!Bt)return;s.value=!0,l.value=!1;const B=new Image,M=n.src;B.addEventListener("load",E=>{M===n.src&&O(E,B)}),B.addEventListener("error",E=>{M===n.src&&T(E)}),Object.entries(i.value).forEach(([E,K])=>{E.toLowerCase()!=="onload"&&B.setAttribute(E,K)}),B.src=M};function O(B,M){c.value=M.width,d.value=M.height,s.value=!1,l.value=!1}function T(B){s.value=!1,l.value=!0,t("error",B)}function _(){PU(p.value,v.value)&&($(),j())}const I=oE(_,200);async function L(){var B;if(!Bt)return;await Ne();const{scrollContainer:M}=n;ys(M)?v.value=M:wt(M)&&M!==""?v.value=(B=document.querySelector(M))!=null?B:void 0:p.value&&(v.value=DC(p.value)),v.value&&(m=Hn(v,"scroll",I),setTimeout(()=>_(),100))}function j(){!Bt||!v.value||!I||(m(),v.value=void 0)}function F(B){if(!!B.ctrlKey){if(B.deltaY<0)return B.preventDefault(),!1;if(B.deltaY>0)return B.preventDefault(),!1}}function N(){!S.value||(y=Hn("wheel",F,{passive:!1}),r=document.body.style.overflow,document.body.style.overflow="hidden",f.value=!0)}function D(){y==null||y(),document.body.style.overflow=r,f.value=!1,t("close")}function z(B){t("switch",B)}return ce(()=>n.src,()=>{n.lazy?(s.value=!0,l.value=!1,j(),L()):$()}),et(()=>{n.lazy?L():$()}),(B,M)=>(R(),X("div",{ref_key:"container",ref:p,class:U([A(o).b(),B.$attrs.class]),style:Xe(A(b))},[s.value?Oe(B.$slots,"placeholder",{key:0},()=>[Z("div",{class:U(A(o).e("placeholder"))},null,2)]):l.value?Oe(B.$slots,"error",{key:1},()=>[Z("div",{class:U(A(o).e("error"))},Me(A(a)("el.image.error")),3)]):(R(),X("img",hn({key:2},A(i),{src:B.src,style:A(C),class:[A(o).e("inner"),A(S)?A(o).e("preview"):""],onClick:N}),null,16,rce)),A(S)?(R(),X(Fe,{key:3},[f.value?(R(),fe(A(x5),{key:0,"z-index":B.zIndex,"initial-index":A(k),infinite:B.infinite,"url-list":B.previewSrcList,"hide-on-click-modal":B.hideOnClickModal,teleported:A(w),onClose:D,onSwitch:z},{default:re(()=>[B.$slots.viewer?(R(),X("div",ace,[Oe(B.$slots,"viewer")])):se("v-if",!0)]),_:3},8,["z-index","initial-index","infinite","url-list","hide-on-click-modal","teleported"])):se("v-if",!0)],2112)):se("v-if",!0)],6))}}));var lce=Ae(ice,[["__file","/home/runner/work/element-plus/element-plus/packages/components/image/src/image.vue"]]);const sce=xt(lce),uce=Ze({step:{type:Number,default:1},stepStrictly:{type:Boolean,default:!1},max:{type:Number,default:Number.POSITIVE_INFINITY},min:{type:Number,default:Number.NEGATIVE_INFINITY},modelValue:{type:Number},disabled:{type:Boolean,default:!1},size:{type:String,values:Bo},controls:{type:Boolean,default:!0},controlsPosition:{type:String,default:"",values:["","right"]},name:String,label:String,placeholder:String,precision:{type:Number,validator:e=>e>=0&&e===Number.parseInt(`${e}`,10)}}),cce={change:(e,t)=>e!==t,blur:e=>e instanceof FocusEvent,focus:e=>e instanceof FocusEvent,input:e=>Yt(e),"update:modelValue":e=>Yt(e)||e===void 0},dce=G({name:"ElInputNumber",components:{ElInput:Ra,ElIcon:ft,ArrowUp:Of,ArrowDown:_s,Plus:cE,Minus:iG},directives:{RepeatClick:SM},props:uce,emits:cce,setup(e,{emit:t}){const n=H(),r=bt({currentValue:e.modelValue,userInput:null}),{formItem:a}=$c(),o=De("input-number"),i=x(()=>y(e.modelValue,-1)<e.min),l=x(()=>y(e.modelValue)>e.max),s=x(()=>{const L=m(e.step);return sa(e.precision)?Math.max(m(e.modelValue),L):(L>e.precision,e.precision)}),c=x(()=>e.controls&&e.controlsPosition==="right"),d=Gn(),f=Ms(),p=x(()=>{if(r.userInput!==null)return r.userInput;let L=r.currentValue;if(Yt(L)){if(Number.isNaN(L))return"";sa(e.precision)||(L=L.toFixed(e.precision))}return L}),v=(L,j)=>(sa(j)&&(j=s.value),Number.parseFloat(`${Math.round(L*10**j)/10**j}`)),m=L=>{if(sa(L))return 0;const j=L.toString(),F=j.indexOf(".");let N=0;return F!==-1&&(N=j.length-F-1),N},y=(L,j=1)=>Yt(L)?(L=Yt(L)?L:Number.NaN,v(L+e.step*j)):r.currentValue,b=()=>{if(f.value||l.value)return;const L=e.modelValue||0,j=y(L);w(j)},C=()=>{if(f.value||i.value)return;const L=e.modelValue||0,j=y(L,-1);w(j)},S=(L,j)=>{const{max:F,min:N,step:D,precision:z,stepStrictly:B}=e;let M=Number(L);return L===null&&(M=Number.NaN),Number.isNaN(M)||(B&&(M=Math.round(M/D)*D),sa(z)||(M=v(M,z)),(M>F||M<N)&&(M=M>F?F:N,j&&t("update:modelValue",M))),M},w=L=>{var j;const F=r.currentValue;let N=S(L);F!==N&&(Number.isNaN(N)&&(N=void 0),r.userInput=null,t("update:modelValue",N),t("input",N),t("change",N,F),(j=a==null?void 0:a.validate)==null||j.call(a,"change").catch(D=>void 0),r.currentValue=N)},k=L=>r.userInput=L,$=L=>{const j=L!==""?Number(L):"";(Yt(j)&&!Number.isNaN(j)||L==="")&&w(j),r.userInput=null},O=()=>{var L,j;(j=(L=n.value)==null?void 0:L.focus)==null||j.call(L)},T=()=>{var L,j;(j=(L=n.value)==null?void 0:L.blur)==null||j.call(L)},_=L=>{t("focus",L)},I=L=>{var j;t("blur",L),(j=a==null?void 0:a.validate)==null||j.call(a,"blur").catch(F=>void 0)};return ce(()=>e.modelValue,L=>{const j=S(L,!0);r.currentValue=j,r.userInput=null},{immediate:!0}),et(()=>{var L;const j=(L=n.value)==null?void 0:L.input;if(j.setAttribute("role","spinbutton"),j.setAttribute("aria-valuemax",String(e.max)),j.setAttribute("aria-valuemin",String(e.min)),j.setAttribute("aria-valuenow",String(r.currentValue)),j.setAttribute("aria-disabled",String(f.value)),!Yt(e.modelValue)){let F=Number(e.modelValue);Number.isNaN(F)&&(F=void 0),t("update:modelValue",F)}}),ur(()=>{var L;const j=(L=n.value)==null?void 0:L.input;j==null||j.setAttribute("aria-valuenow",r.currentValue)}),{input:n,displayValue:p,handleInput:k,handleInputChange:$,controlsAtRight:c,decrease:C,increase:b,inputNumberSize:d,inputNumberDisabled:f,maxDisabled:l,minDisabled:i,focus:O,blur:T,handleFocus:_,handleBlur:I,ns:o}}});function fce(e,t,n,r,a,o){const i=we("arrow-down"),l=we("minus"),s=we("el-icon"),c=we("arrow-up"),d=we("plus"),f=we("el-input"),p=pa("repeat-click");return R(),X("div",{class:U([e.ns.b(),e.ns.m(e.inputNumberSize),e.ns.is("disabled",e.inputNumberDisabled),e.ns.is("without-controls",!e.controls),e.ns.is("controls-right",e.controlsAtRight)]),onDragstart:t[2]||(t[2]=dt(()=>{},["prevent"]))},[e.controls?at((R(),X("span",{key:0,role:"button",class:U([e.ns.e("decrease"),e.ns.is("disabled",e.minDisabled)]),onKeydown:t[0]||(t[0]=It((...v)=>e.decrease&&e.decrease(...v),["enter"]))},[g(s,null,{default:re(()=>[e.controlsAtRight?(R(),fe(i,{key:0})):(R(),fe(l,{key:1}))]),_:1})],34)),[[p,e.decrease]]):se("v-if",!0),e.controls?at((R(),X("span",{key:1,role:"button",class:U([e.ns.e("increase"),e.ns.is("disabled",e.maxDisabled)]),onKeydown:t[1]||(t[1]=It((...v)=>e.increase&&e.increase(...v),["enter"]))},[g(s,null,{default:re(()=>[e.controlsAtRight?(R(),fe(c,{key:0})):(R(),fe(d,{key:1}))]),_:1})],34)),[[p,e.increase]]):se("v-if",!0),g(f,{ref:"input",type:"number",step:e.step,"model-value":e.displayValue,placeholder:e.placeholder,disabled:e.inputNumberDisabled,size:e.inputNumberSize,max:e.max,min:e.min,name:e.name,label:e.label,"validate-event":!1,onKeydown:[It(dt(e.increase,["prevent"]),["up"]),It(dt(e.decrease,["prevent"]),["down"])],onBlur:e.handleBlur,onFocus:e.handleFocus,onInput:e.handleInput,onChange:e.handleInputChange},null,8,["step","model-value","placeholder","disabled","size","max","min","name","label","onKeydown","onBlur","onFocus","onInput","onChange"])],34)}var hce=Ae(dce,[["render",fce],["__file","/home/runner/work/element-plus/element-plus/packages/components/input-number/src/input-number.vue"]]);const _5=xt(hce),pce=Ze({type:{type:String,values:["primary","success","warning","info","danger","default"],default:"default"},underline:{type:Boolean,default:!0},disabled:{type:Boolean,default:!1},href:{type:String,default:""},icon:{type:wr,default:""}}),vce={click:e=>e instanceof MouseEvent},mce=["href"],gce={name:"ElLink"},yce=G(Ke(Te({},gce),{props:pce,emits:vce,setup(e,{emit:t}){const n=e,r=De("link");function a(o){n.disabled||t("click",o)}return(o,i)=>(R(),X("a",{class:U([A(r).b(),A(r).m(o.type),A(r).is("disabled",o.disabled),A(r).is("underline",o.underline&&!o.disabled)]),href:o.disabled||!o.href?void 0:o.href,onClick:a},[o.icon?(R(),fe(A(ft),{key:0},{default:re(()=>[(R(),fe(Kt(o.icon)))]),_:1})):se("v-if",!0),o.$slots.default?(R(),X("span",{key:1,class:U(A(r).e("inner"))},[Oe(o.$slots,"default")],2)):se("v-if",!0),o.$slots.icon?Oe(o.$slots,"icon",{key:2}):se("v-if",!0)],10,mce))}}));var bce=Ae(yce,[["__file","/home/runner/work/element-plus/element-plus/packages/components/link/src/link.vue"]]);const Cce=xt(bce);class wce{constructor(t,n){this.parent=t,this.domNode=n,this.subIndex=0,this.subIndex=0,this.init()}init(){this.subMenuItems=this.domNode.querySelectorAll("li"),this.addListeners()}gotoSubIndex(t){t===this.subMenuItems.length?t=0:t<0&&(t=this.subMenuItems.length-1),this.subMenuItems[t].focus(),this.subIndex=t}addListeners(){const t=this.parent.domNode;Array.prototype.forEach.call(this.subMenuItems,n=>{n.addEventListener("keydown",r=>{let a=!1;switch(r.code){case Ge.down:{this.gotoSubIndex(this.subIndex+1),a=!0;break}case Ge.up:{this.gotoSubIndex(this.subIndex-1),a=!0;break}case Ge.tab:{sp(t,"mouseleave");break}case Ge.enter:case Ge.space:{a=!0,r.currentTarget.click();break}}return a&&(r.preventDefault(),r.stopPropagation()),!1})})}}class Sce{constructor(t){this.domNode=t,this.submenu=null,this.submenu=null,this.init()}init(){this.domNode.setAttribute("tabindex","0");const t=this.domNode.querySelector(".el-menu");t&&(this.submenu=new wce(this,t)),this.addListeners()}addListeners(){this.domNode.addEventListener("keydown",t=>{let n=!1;switch(t.code){case Ge.down:{sp(t.currentTarget,"mouseenter"),this.submenu&&this.submenu.gotoSubIndex(0),n=!0;break}case Ge.up:{sp(t.currentTarget,"mouseenter"),this.submenu&&this.submenu.gotoSubIndex(this.submenu.subMenuItems.length-1),n=!0;break}case Ge.tab:{sp(t.currentTarget,"mouseleave");break}case Ge.enter:case Ge.space:{n=!0,t.currentTarget.click();break}}n&&t.preventDefault()})}}class kce{constructor(t){this.domNode=t,this.init()}init(){const t=this.domNode.childNodes;Array.from(t).forEach(n=>{n.nodeType===1&&new Sce(n)})}}const $ce=G({name:"ElMenuCollapseTransition",setup(){return{listeners:{onBeforeEnter:t=>t.style.opacity="0.2",onEnter(t,n){xo(t,"el-opacity-transition"),t.style.opacity="1",n()},onAfterEnter(t){Br(t,"el-opacity-transition"),t.style.opacity=""},onBeforeLeave(t){t.dataset||(t.dataset={}),to(t,"el-menu--collapse")?(Br(t,"el-menu--collapse"),t.dataset.oldOverflow=t.style.overflow,t.dataset.scrollWidth=t.clientWidth.toString(),xo(t,"el-menu--collapse")):(xo(t,"el-menu--collapse"),t.dataset.oldOverflow=t.style.overflow,t.dataset.scrollWidth=t.clientWidth.toString(),Br(t,"el-menu--collapse")),t.style.width=`${t.scrollWidth}px`,t.style.overflow="hidden"},onLeave(t){xo(t,"horizontal-collapse-transition"),t.style.width=`${t.dataset.scrollWidth}px`}}}}});function Oce(e,t,n,r,a,o){return R(),fe(Vn,hn({mode:"out-in"},e.listeners),{default:re(()=>[Oe(e.$slots,"default")]),_:3},16)}var Pce=Ae($ce,[["render",Oce],["__file","/home/runner/work/element-plus/element-plus/packages/components/menu/src/menu-collapse-transition.vue"]]);function E5(e,t){const n=ve("rootMenu");n||qn("useMenu","can not inject root menu");const r=x(()=>{let i=e.parent;const l=[t.value];for(;i.type.name!=="ElMenu";)i.props.index&&l.unshift(i.props.index),i=i.parent;return l}),a=x(()=>{let i=e.parent;for(;i&&!["ElMenu","ElSubMenu"].includes(i.type.name);)i=i.parent;return i}),o=x(()=>{let i=e.parent;if(n.props.mode!=="vertical")return{};let l=20;if(n.props.collapse)l=20;else for(;i&&i.type.name!=="ElMenu";)i.type.name==="ElSubMenu"&&(l+=20),i=i.parent;return{paddingLeft:`${l}px`}});return{parentMenu:a,paddingStyle:o,indexPath:r}}function Tce(e){return x(()=>{const n=e.backgroundColor;return n?new pM(n).shade(20).toString():""})}const M5=e=>x(()=>({"--el-menu-text-color":e.textColor||"","--el-menu-hover-text-color":e.textColor||"","--el-menu-bg-color":e.backgroundColor||"","--el-menu-hover-bg-color":Tce(e).value||"","--el-menu-active-color":e.activeTextColor||""})),xce=Ze({index:{type:String,required:!0},showTimeout:{type:Number,default:300},hideTimeout:{type:Number,default:300},popperClass:String,disabled:Boolean,popperAppendToBody:{type:Boolean,default:void 0},popperOffset:{type:Number,default:6}}),by="ElSubMenu";var hw=G({name:by,props:xce,setup(e,{slots:t,expose:n}){const r=$t(),{paddingStyle:a,indexPath:o,parentMenu:i}=E5(r,x(()=>e.index)),l=ve("rootMenu");l||qn(by,"can not inject root menu");const s=ve(`subMenu:${i.value.uid}`);s||qn(by,"can not inject sub menu");const c=H({}),d=H({});let f;const p=H(!1),v=H(),m=H(null),y=x(()=>L.value==="horizontal"&&C.value?"bottom-start":"right-start"),b=x(()=>L.value==="horizontal"&&C.value||L.value==="vertical"&&!l.props.collapse?_s:Da),C=x(()=>{let E=!0,K=r.parent;for(;K&&K.type.name!=="ElMenu";)if(["ElSubMenu","ElMenuItemGroup"].includes(K.type.name)){E=!1;break}else K=K.parent;return E}),S=x(()=>e.popperAppendToBody===void 0?C.value:Boolean(e.popperAppendToBody)),w=x(()=>l.props.collapse?"el-zoom-in-left":"el-zoom-in-top"),k=x(()=>L.value==="horizontal"&&C.value?["bottom-start","bottom-end","top-start","top-end","right-start","left-start"]:["right-start","left-start","bottom-start","bottom-end","top-start","top-end"]),$=x(()=>l.openedMenus.includes(e.index)),O=x(()=>{let E=!1;return Object.values(c.value).forEach(K=>{K.active&&(E=!0)}),Object.values(d.value).forEach(K=>{K.active&&(E=!0)}),E}),T=x(()=>l.props.backgroundColor||""),_=x(()=>l.props.activeTextColor||""),I=x(()=>l.props.textColor||""),L=x(()=>l.props.mode),j=bt({index:e.index,indexPath:o,active:O}),F=x(()=>L.value!=="horizontal"?{color:I.value}:{borderBottomColor:O.value?l.props.activeTextColor?_.value:"":"transparent",color:O.value?_.value:I.value}),N=()=>{var E,K,W;return(W=(K=(E=m.value)==null?void 0:E.popperRef)==null?void 0:K.popperInstanceRef)==null?void 0:W.destroy()},D=E=>{E||N()},z=()=>{l.props.menuTrigger==="hover"&&l.props.mode==="horizontal"||l.props.collapse&&l.props.mode==="vertical"||e.disabled||l.handleSubMenuClick({index:e.index,indexPath:o.value,active:O.value})},B=(E,K=e.showTimeout)=>{var W;E.type==="focus"&&!E.relatedTarget||l.props.menuTrigger==="click"&&l.props.mode==="horizontal"||!l.props.collapse&&l.props.mode==="vertical"||e.disabled||(s.mouseInChild.value=!0,f==null||f(),{stop:f}=gs(()=>{l.openMenu(e.index,o.value)},K),S.value&&((W=i.value.vnode.el)==null||W.dispatchEvent(new MouseEvent("mouseenter"))))},M=(E=!1)=>{var K,W;l.props.menuTrigger==="click"&&l.props.mode==="horizontal"||!l.props.collapse&&l.props.mode==="vertical"||(f==null||f(),s.mouseInChild.value=!1,{stop:f}=gs(()=>!p.value&&l.closeMenu(e.index,o.value),e.hideTimeout),S.value&&E&&((K=r.parent)==null?void 0:K.type.name)==="ElSubMenu"&&((W=s.handleMouseleave)==null||W.call(s,!0)))};ce(()=>l.props.collapse,E=>D(Boolean(E)));{const E=W=>{d.value[W.index]=W},K=W=>{delete d.value[W.index]};ot(`subMenu:${r.uid}`,{addSubMenu:E,removeSubMenu:K,handleMouseleave:M,mouseInChild:p})}return n({opened:$}),et(()=>{l.addSubMenu(j),s.addSubMenu(j)}),Lt(()=>{s.removeSubMenu(j),l.removeSubMenu(j)}),()=>{var E;const K=[(E=t.title)==null?void 0:E.call(t),qe(ft,{class:["el-sub-menu__icon-arrow"]},{default:()=>qe(b.value)})],W=M5(l.props),Y=l.isMenuPopup?qe(Ur,{ref:m,visible:$.value,effect:"light",pure:!0,offset:e.popperOffset,showArrow:!1,persistent:!0,popperClass:e.popperClass,placement:y.value,teleported:S.value,fallbackPlacements:k.value,transition:w.value,gpuAcceleration:!1},{content:()=>{var q;return qe("div",{class:[`el-menu--${L.value}`,e.popperClass],onMouseenter:J=>B(J,100),onMouseleave:()=>M(!0),onFocus:J=>B(J,100)},[qe("ul",{class:["el-menu el-menu--popup",`el-menu--popup-${y.value}`],style:W.value},[(q=t.default)==null?void 0:q.call(t)])])},default:()=>qe("div",{class:"el-sub-menu__title",style:[a.value,F.value,{backgroundColor:T.value}],onClick:z},K)}):qe(Fe,{},[qe("div",{class:"el-sub-menu__title",style:[a.value,F.value,{backgroundColor:T.value}],ref:v,onClick:z},K),qe(Xm,{},{default:()=>{var q;return at(qe("ul",{role:"menu",class:"el-menu el-menu--inline",style:W.value},[(q=t.default)==null?void 0:q.call(t)]),[[_t,$.value]])}})]);return qe("li",{class:["el-sub-menu",{"is-active":O.value,"is-opened":$.value,"is-disabled":e.disabled}],role:"menuitem",ariaHaspopup:!0,ariaExpanded:$.value,onMouseenter:B,onMouseleave:()=>M(!0),onFocus:B},[Y])}}});const _ce=Ze({mode:{type:String,values:["horizontal","vertical"],default:"vertical"},defaultActive:{type:String,default:""},defaultOpeneds:{type:Le(Array),default:()=>xn([])},uniqueOpened:Boolean,router:Boolean,menuTrigger:{type:String,values:["hover","click"],default:"hover"},collapse:Boolean,backgroundColor:String,textColor:String,activeTextColor:String,collapseTransition:{type:Boolean,default:!0},ellipsis:{type:Boolean,default:!0}}),Cy=e=>Array.isArray(e)&&e.every(t=>wt(t)),Ece={close:(e,t)=>wt(e)&&Cy(t),open:(e,t)=>wt(e)&&Cy(t),select:(e,t,n,r)=>wt(e)&&Cy(t)&&zt(n)&&(r===void 0||r instanceof Promise)};var Mce=G({name:"ElMenu",props:_ce,emits:Ece,setup(e,{emit:t,slots:n,expose:r}){const a=$t(),o=a.appContext.config.globalProperties.$router,i=H(),l=H(e.defaultOpeneds&&!e.collapse?e.defaultOpeneds.slice(0):[]),s=H(e.defaultActive),c=H({}),d=H({}),f=x(()=>e.mode==="horizontal"||e.mode==="vertical"&&e.collapse),p=()=>{const $=s.value&&c.value[s.value];if(!$||e.mode==="horizontal"||e.collapse)return;$.indexPath.forEach(T=>{const _=d.value[T];_&&v(T,_.indexPath)})},v=($,O)=>{l.value.includes($)||(e.uniqueOpened&&(l.value=l.value.filter(T=>O.includes(T))),l.value.push($),t("open",$,O))},m=($,O)=>{const T=l.value.indexOf($);T!==-1&&l.value.splice(T,1),t("close",$,O)},y=({index:$,indexPath:O})=>{l.value.includes($)?m($,O):v($,O)},b=$=>{(e.mode==="horizontal"||e.collapse)&&(l.value=[]);const{index:O,indexPath:T}=$;if(!(O===void 0||T===void 0))if(e.router&&o){const _=$.route||O,I=o.push(_).then(L=>(L||(s.value=O),L));t("select",O,T,{index:O,indexPath:T,route:_},I)}else s.value=O,t("select",O,T,{index:O,indexPath:T})},C=$=>{const O=c.value,T=O[$]||s.value&&O[s.value]||O[e.defaultActive];T?(s.value=T.index,p()):s.value=$},S=()=>{Ne(()=>a.proxy.$forceUpdate())};ce(()=>e.defaultActive,$=>{c.value[$]||(s.value=""),C($)}),ce(c.value,()=>p()),ce(()=>e.collapse,$=>{$&&(l.value=[])});{const $=I=>{d.value[I.index]=I},O=I=>{delete d.value[I.index]};ot("rootMenu",bt({props:e,openedMenus:l,items:c,subMenus:d,activeIndex:s,isMenuPopup:f,addMenuItem:I=>{c.value[I.index]=I},removeMenuItem:I=>{delete c.value[I.index]},addSubMenu:$,removeSubMenu:O,openMenu:v,closeMenu:m,handleMenuItemClick:b,handleSubMenuClick:y})),ot(`subMenu:${a.uid}`,{addSubMenu:$,removeSubMenu:O,mouseInChild:H(!1)})}et(()=>{p(),e.mode==="horizontal"&&new kce(a.vnode.el)}),r({open:O=>{const{indexPath:T}=d.value[O];T.forEach(_=>v(_,T))},close:m,handleResize:S});const w=$=>{const O=Array.isArray($)?$:[$],T=[];return O.forEach(_=>{Array.isArray(_.children)?T.push(...w(_.children)):T.push(_)}),T},k=$=>e.mode==="horizontal"?at($,[[Rte,S]]):$;return()=>{var $,O,T,_;let I=(O=($=n.default)==null?void 0:$.call(n))!=null?O:[];const L=[];if(e.mode==="horizontal"&&i.value){const D=Array.from((_=(T=i.value)==null?void 0:T.childNodes)!=null?_:[]).filter(ne=>ne.nodeName!=="#text"||ne.nodeValue),z=w(I),B=64,M=Number.parseInt(getComputedStyle(i.value).paddingLeft,10),E=Number.parseInt(getComputedStyle(i.value).paddingRight,10),K=i.value.clientWidth-M-E;let W=0,Y=0;D.forEach((ne,oe)=>{W+=ne.offsetWidth||0,W<=K-B&&(Y=oe+1)});const q=z.slice(0,Y),J=z.slice(Y);(J==null?void 0:J.length)&&e.ellipsis&&(I=q,L.push(qe(hw,{index:"sub-menu-more",class:"el-sub-menu__hide-arrow"},{title:()=>qe(ft,{class:["el-sub-menu__icon-more"]},{default:()=>qe(yG)}),default:()=>J})))}const j=M5(e),N=(D=>e.ellipsis?k(D):D)(qe("ul",{key:String(e.collapse),role:"menubar",ref:i,style:j.value,class:{"el-menu":!0,"el-menu--horizontal":e.mode==="horizontal","el-menu--collapse":e.collapse}},[...I,...L]));return e.collapseTransition&&e.mode==="vertical"?qe(Pce,()=>N):N}}});const Ice=Ze({index:{type:Le([String,null]),default:null},route:{type:Le([String,Object])},disabled:Boolean}),Nce={click:e=>wt(e.index)&&Array.isArray(e.indexPath)},wy="ElMenuItem",Ace=G({name:wy,components:{ElTooltip:Ur},props:Ice,emits:Nce,setup(e,{emit:t}){const n=$t(),r=ve("rootMenu");r||qn(wy,"can not inject root menu");const{parentMenu:a,paddingStyle:o,indexPath:i}=E5(n,yn(e,"index")),l=ve(`subMenu:${a.value.uid}`);l||qn(wy,"can not inject sub menu");const s=x(()=>e.index===r.activeIndex),c=bt({index:e.index,indexPath:i,active:s}),d=()=>{e.disabled||(r.handleMenuItemClick({index:e.index,indexPath:i.value,route:e.route}),t("click",c))};return et(()=>{l.addSubMenu(c),r.addMenuItem(c)}),Lt(()=>{l.removeSubMenu(c),r.removeMenuItem(c)}),{Effect:tee,parentMenu:a,rootMenu:r,paddingStyle:o,active:s,handleClick:d}}}),Dce={class:"el-menu-tooltip__trigger"};function Rce(e,t,n,r,a,o){const i=we("el-tooltip");return R(),X("li",{class:U(["el-menu-item",{"is-active":e.active,"is-disabled":e.disabled}]),role:"menuitem",tabindex:"-1",style:Xe(e.paddingStyle),onClick:t[0]||(t[0]=(...l)=>e.handleClick&&e.handleClick(...l))},[e.parentMenu.type.name==="ElMenu"&&e.rootMenu.props.collapse&&e.$slots.title?(R(),fe(i,{key:0,effect:e.Effect.DARK,placement:"right","fallback-placements":["left"],persistent:""},{content:re(()=>[Oe(e.$slots,"title")]),default:re(()=>[Z("div",Dce,[Oe(e.$slots,"default")])]),_:3},8,["effect"])):(R(),X(Fe,{key:1},[Oe(e.$slots,"default"),Oe(e.$slots,"title")],64))],6)}var I5=Ae(Ace,[["render",Rce],["__file","/home/runner/work/element-plus/element-plus/packages/components/menu/src/menu-item.vue"]]);const Lce={title:String},rO="ElMenuItemGroup",Fce=G({name:rO,props:Lce,setup(){const e=$t(),t=ve("rootMenu");return t||qn(rO,"can not inject root menu"),{levelPadding:x(()=>{if(t.props.collapse)return 20;let r=20,a=e.parent;for(;a&&a.type.name!=="ElMenu";)a.type.name==="ElSubMenu"&&(r+=20),a=a.parent;return r})}}}),Bce={class:"el-menu-item-group"};function Vce(e,t,n,r,a,o){return R(),X("li",Bce,[Z("div",{class:"el-menu-item-group__title",style:Xe({paddingLeft:`${e.levelPadding}px`})},[e.$slots.title?Oe(e.$slots,"title",{key:1}):(R(),X(Fe,{key:0},[yt(Me(e.title),1)],2112))],4),Z("ul",null,[Oe(e.$slots,"default")])])}var N5=Ae(Fce,[["render",Vce],["__file","/home/runner/work/element-plus/element-plus/packages/components/menu/src/menu-item-group.vue"]]);const zce=xt(Mce,{MenuItem:I5,MenuItemGroup:N5,SubMenu:hw}),Hce=En(I5),jce=En(N5);En(hw);const Kce=Ze({icon:{type:wr,default:()=>iY},title:String,content:{type:String,default:""}}),Wce={back:()=>!0},Uce={name:"ElPageHeader"},Yce=G(Ke(Te({},Uce),{props:Kce,emits:Wce,setup(e,{emit:t}){const{t:n}=ln(),r=De("page-header");function a(){t("back")}return(o,i)=>(R(),X("div",{class:U(A(r).b())},[Z("div",{class:U(A(r).e("left")),onClick:a},[o.icon||o.$slots.icon?(R(),X("div",{key:0,class:U(A(r).e("icon"))},[Oe(o.$slots,"icon",{},()=>[o.icon?(R(),fe(A(ft),{key:0},{default:re(()=>[(R(),fe(Kt(o.icon)))]),_:1})):se("v-if",!0)])],2)):se("v-if",!0),Z("div",{class:U(A(r).e("title"))},[Oe(o.$slots,"title",{},()=>[yt(Me(o.title||A(n)("el.pageHeader.title")),1)])],2)],2),Z("div",{class:U(A(r).e("content"))},[Oe(o.$slots,"content",{},()=>[yt(Me(o.content),1)])],2)],2))}}));var qce=Ae(Yce,[["__file","/home/runner/work/element-plus/element-plus/packages/components/page-header/src/page-header.vue"]]);const Gce=xt(qce),Xce={disabled:Boolean,currentPage:{type:Number,default:1},prevText:{type:String,default:""}},Zce=G({name:"ElPaginationPrev",components:{ElIcon:ft,ArrowLeft:Es},props:Xce,emits:["click"],setup(e){return{internalDisabled:x(()=>e.disabled||e.currentPage<=1)}}}),Jce=["disabled","aria-disabled"],Qce={key:0};function ede(e,t,n,r,a,o){const i=we("arrow-left"),l=we("el-icon");return R(),X("button",{type:"button",class:"btn-prev",disabled:e.internalDisabled,"aria-disabled":e.internalDisabled,onClick:t[0]||(t[0]=s=>e.$emit("click",s))},[e.prevText?(R(),X("span",Qce,Me(e.prevText),1)):(R(),fe(l,{key:1},{default:re(()=>[g(i)]),_:1}))],8,Jce)}var tde=Ae(Zce,[["render",ede],["__file","/home/runner/work/element-plus/element-plus/packages/components/pagination/src/components/prev.vue"]]);const nde={disabled:Boolean,currentPage:{type:Number,default:1},pageCount:{type:Number,default:50},nextText:{type:String,default:""}},rde=G({name:"ElPaginationNext",components:{ElIcon:ft,ArrowRight:Da},props:nde,emits:["click"],setup(e){return{internalDisabled:x(()=>e.disabled||e.currentPage===e.pageCount||e.pageCount===0)}}}),ade=["disabled","aria-disabled"],ode={key:0};function ide(e,t,n,r,a,o){const i=we("arrow-right"),l=we("el-icon");return R(),X("button",{type:"button",class:"btn-next",disabled:e.internalDisabled,"aria-disabled":e.internalDisabled,onClick:t[0]||(t[0]=s=>e.$emit("click",s))},[e.nextText?(R(),X("span",ode,Me(e.nextText),1)):(R(),fe(l,{key:1},{default:re(()=>[g(i)]),_:1}))],8,ade)}var lde=Ae(rde,[["render",ide],["__file","/home/runner/work/element-plus/element-plus/packages/components/pagination/src/components/next.vue"]]);const A5="ElSelectGroup",Zm="ElSelect";function sde(e,t){const n=ve(Zm),r=ve(A5,{disabled:!1}),a=x(()=>Object.prototype.toString.call(e.value).toLowerCase()==="[object object]"),o=x(()=>n.props.multiple?f(n.props.modelValue,e.value):p(e.value,n.props.modelValue)),i=x(()=>{if(n.props.multiple){const y=n.props.modelValue||[];return!o.value&&y.length>=n.props.multipleLimit&&n.props.multipleLimit>0}else return!1}),l=x(()=>e.label||(a.value?"":e.value)),s=x(()=>e.value||e.label||""),c=x(()=>e.disabled||t.groupDisabled||i.value),d=$t(),f=(y=[],b)=>{if(a.value){const C=n.props.valueKey;return y&&y.some(S=>vn(S,C)===vn(b,C))}else return y&&y.includes(b)},p=(y,b)=>{if(a.value){const{valueKey:C}=n.props;return vn(y,C)===vn(b,C)}else return y===b},v=()=>{!e.disabled&&!r.disabled&&(n.hoverIndex=n.optionsArray.indexOf(d.proxy))};ce(()=>l.value,()=>{!e.created&&!n.props.remote&&n.setSelected()}),ce(()=>e.value,(y,b)=>{const{remote:C,valueKey:S}=n.props;if(!e.created&&!C){if(S&&typeof y=="object"&&typeof b=="object"&&y[S]===b[S])return;n.setSelected()}}),ce(()=>r.disabled,()=>{t.groupDisabled=r.disabled},{immediate:!0});const{queryChange:m}=Gt(n);return ce(m,y=>{const{query:b}=A(y),C=new RegExp(EU(b),"i");t.visible=C.test(l.value)||e.created,t.visible||n.filteredOptionsCount--}),{select:n,currentLabel:l,currentValue:s,itemSelected:o,isDisabled:c,hoverItem:v}}const ude=G({name:"ElOption",componentName:"ElOption",props:{value:{required:!0,type:[String,Number,Boolean,Object]},label:[String,Number],created:Boolean,disabled:{type:Boolean,default:!1}},setup(e){const t=De("select"),n=bt({index:-1,groupDisabled:!1,visible:!0,hitState:!1,hover:!1}),{currentLabel:r,itemSelected:a,isDisabled:o,select:i,hoverItem:l}=sde(e,n),{visible:s,hover:c}=or(n),d=$t().proxy,f=d.value;i.onOptionCreate(d),Lt(()=>{const{selected:v}=i,y=(i.props.multiple?v:[v]).some(b=>b.value===d.value);i.cachedOptions.get(f)===d&&!y&&Ne(()=>{i.cachedOptions.delete(f)}),i.onOptionDestroy(f,d)});function p(){e.disabled!==!0&&n.groupDisabled!==!0&&i.handleOptionSelect(d,!0)}return{ns:t,currentLabel:r,itemSelected:a,isDisabled:o,select:i,hoverItem:l,visible:s,hover:c,selectOptionClick:p,states:n}}});function cde(e,t,n,r,a,o){return at((R(),X("li",{class:U([e.ns.be("dropdown","item"),e.ns.is("disabled",e.isDisabled),{selected:e.itemSelected,hover:e.hover}]),onMouseenter:t[0]||(t[0]=(...i)=>e.hoverItem&&e.hoverItem(...i)),onClick:t[1]||(t[1]=dt((...i)=>e.selectOptionClick&&e.selectOptionClick(...i),["stop"]))},[Oe(e.$slots,"default",{},()=>[Z("span",null,Me(e.currentLabel),1)])],34)),[[_t,e.visible]])}var pw=Ae(ude,[["render",cde],["__file","/home/runner/work/element-plus/element-plus/packages/components/select/src/option.vue"]]);const dde=G({name:"ElSelectDropdown",componentName:"ElSelectDropdown",setup(){const e=ve(Zm),t=De("select"),n=x(()=>e.props.popperClass),r=x(()=>e.props.multiple),a=x(()=>e.props.fitInputWidth),o=H("");function i(){var l;o.value=`${(l=e.selectWrapper)==null?void 0:l.getBoundingClientRect().width}px`}return et(()=>{i(),wc(e.selectWrapper,i)}),Lt(()=>{Sc(e.selectWrapper,i)}),{ns:t,minWidth:o,popperClass:n,isMultiple:r,isFitInputWidth:a}}});function fde(e,t,n,r,a,o){return R(),X("div",{class:U([e.ns.b("dropdown"),e.ns.is("multiple",e.isMultiple),e.popperClass]),style:Xe({[e.isFitInputWidth?"width":"minWidth"]:e.minWidth})},[Oe(e.$slots,"default")],6)}var hde=Ae(dde,[["render",fde],["__file","/home/runner/work/element-plus/element-plus/packages/components/select/src/select-dropdown.vue"]]);function pde(e){const{t}=ln();return bt({options:new Map,cachedOptions:new Map,createdLabel:null,createdSelected:!1,selected:e.multiple?[]:{},inputLength:20,inputWidth:0,initialInputHeight:0,optionsCount:0,filteredOptionsCount:0,visible:!1,softFocus:!1,selectedLabel:"",hoverIndex:-1,query:"",previousQuery:null,inputHovering:!1,cachedPlaceHolder:"",currentPlaceholder:t("el.select.placeholder"),menuVisibleOnFocus:!1,isOnComposition:!1,isSilentBlur:!1,prefixWidth:11,tagInMultiLine:!1})}const vde=(e,t,n)=>{const{t:r}=ln(),a=De("select"),o=H(null),i=H(null),l=H(null),s=H(null),c=H(null),d=H(null),f=H(-1),p=Qn({query:""}),v=Qn(""),m=ve(ga,{}),y=ve(Ia,{}),b=x(()=>!e.filterable||e.multiple||!t.visible),C=x(()=>e.disabled||m.disabled),S=x(()=>{const he=e.multiple?Array.isArray(e.modelValue)&&e.modelValue.length>0:e.modelValue!==void 0&&e.modelValue!==null&&e.modelValue!=="";return e.clearable&&!C.value&&t.inputHovering&&he}),w=x(()=>e.remote&&e.filterable?"":e.suffixIcon),k=x(()=>a.is("reverse",w.value&&t.visible)),$=x(()=>e.remote?300:0),O=x(()=>e.loading?e.loadingText||r("el.select.loading"):e.remote&&t.query===""&&t.options.size===0?!1:e.filterable&&t.query&&t.options.size>0&&t.filteredOptionsCount===0?e.noMatchText||r("el.select.noMatch"):t.options.size===0?e.noDataText||r("el.select.noData"):null),T=x(()=>Array.from(t.options.values())),_=x(()=>Array.from(t.cachedOptions.values())),I=x(()=>{const he=T.value.filter(_e=>!_e.created).some(_e=>_e.currentLabel===t.query);return e.filterable&&e.allowCreate&&t.query!==""&&!he}),L=Gn(),j=x(()=>["small"].includes(L.value)?"small":"default"),F=x({get(){return t.visible&&O.value!==!1},set(he){t.visible=he}});ce(()=>C.value,()=>{Ne(()=>{N()})}),ce(()=>e.placeholder,he=>{t.cachedPlaceHolder=t.currentPlaceholder=he}),ce(()=>e.modelValue,(he,_e)=>{var $e;e.multiple&&(N(),he&&he.length>0||i.value&&t.query!==""?t.currentPlaceholder="":t.currentPlaceholder=t.cachedPlaceHolder,e.filterable&&!e.reserveKeyword&&(t.query="",D(t.query))),M(),e.filterable&&!e.multiple&&(t.inputLength=20),er(he,_e)||($e=y.validate)==null||$e.call(y,"change").catch(Ve=>void 0)},{flush:"post",deep:!0}),ce(()=>t.visible,he=>{var _e,$e,Ve;he?(($e=(_e=l.value)==null?void 0:_e.updatePopper)==null||$e.call(_e),e.filterable&&(t.filteredOptionsCount=t.optionsCount,t.query=e.remote?"":t.selectedLabel,e.multiple?(Ve=i.value)==null||Ve.focus():t.selectedLabel&&(t.currentPlaceholder=`${t.selectedLabel}`,t.selectedLabel=""),D(t.query),!e.multiple&&!e.remote&&(p.value.query="",Dc(p),Dc(v)))):(i.value&&i.value.blur(),t.query="",t.previousQuery=null,t.selectedLabel="",t.inputLength=20,t.menuVisibleOnFocus=!1,K(),Ne(()=>{i.value&&i.value.value===""&&t.selected.length===0&&(t.currentPlaceholder=t.cachedPlaceHolder)}),e.multiple||(t.selected&&(e.filterable&&e.allowCreate&&t.createdSelected&&t.createdLabel?t.selectedLabel=t.createdLabel:t.selectedLabel=t.selected.currentLabel,e.filterable&&(t.query=t.selectedLabel)),e.filterable&&(t.currentPlaceholder=t.cachedPlaceHolder))),n.emit("visible-change",he)}),ce(()=>t.options.entries(),()=>{var he,_e,$e;if(!Bt)return;(_e=(he=l.value)==null?void 0:he.updatePopper)==null||_e.call(he),e.multiple&&N();const Ve=(($e=c.value)==null?void 0:$e.querySelectorAll("input"))||[];Array.from(Ve).includes(document.activeElement)||M(),e.defaultFirstOption&&(e.filterable||e.remote)&&t.filteredOptionsCount&&B()},{flush:"post"}),ce(()=>t.hoverIndex,he=>{typeof he=="number"&&he>-1&&(f.value=T.value[he]||{}),T.value.forEach(_e=>{_e.hover=f.value===_e})});const N=()=>{e.collapseTags&&!e.filterable||Ne(()=>{var he,_e;if(!o.value)return;const $e=o.value.$el.childNodes,Ve=Array.from($e).find(Ut=>Ut.tagName==="INPUT"),st=s.value,Dt=t.initialInputHeight||vE(L.value||m.size);Ve.style.height=t.selected.length===0?`${Dt}px`:`${Math.max(st?st.clientHeight+(st.clientHeight>Dt?6:0):0,Dt)}px`,t.tagInMultiLine=Number.parseFloat(Ve.style.height)>=Dt,t.visible&&O.value!==!1&&((_e=(he=l.value)==null?void 0:he.updatePopper)==null||_e.call(he))})},D=he=>{if(!(t.previousQuery===he||t.isOnComposition)){if(t.previousQuery===null&&(typeof e.filterMethod=="function"||typeof e.remoteMethod=="function")){t.previousQuery=he;return}t.previousQuery=he,Ne(()=>{var _e,$e;t.visible&&(($e=(_e=l.value)==null?void 0:_e.updatePopper)==null||$e.call(_e))}),t.hoverIndex=-1,e.multiple&&e.filterable&&Ne(()=>{const _e=i.value.value.length*15+20;t.inputLength=e.collapseTags?Math.min(50,_e):_e,z(),N()}),e.remote&&typeof e.remoteMethod=="function"?(t.hoverIndex=-1,e.remoteMethod(he)):typeof e.filterMethod=="function"?(e.filterMethod(he),Dc(v)):(t.filteredOptionsCount=t.optionsCount,p.value.query=he,Dc(p),Dc(v)),e.defaultFirstOption&&(e.filterable||e.remote)&&t.filteredOptionsCount&&B()}},z=()=>{t.currentPlaceholder!==""&&(t.currentPlaceholder=i.value.value?"":t.cachedPlaceHolder)},B=()=>{const he=T.value.filter(Ve=>Ve.visible&&!Ve.disabled&&!Ve.states.groupDisabled),_e=he.find(Ve=>Ve.created),$e=he[0];t.hoverIndex=Ee(T.value,_e||$e)},M=()=>{var he;if(e.multiple)t.selectedLabel="";else{const $e=E(e.modelValue);(he=$e.props)!=null&&he.created?(t.createdLabel=$e.props.value,t.createdSelected=!0):t.createdSelected=!1,t.selectedLabel=$e.currentLabel,t.selected=$e,e.filterable&&(t.query=t.selectedLabel);return}const _e=[];Array.isArray(e.modelValue)&&e.modelValue.forEach($e=>{_e.push(E($e))}),t.selected=_e,Ne(()=>{N()})},E=he=>{let _e;const $e=np(he).toLowerCase()==="object",Ve=np(he).toLowerCase()==="null",st=np(he).toLowerCase()==="undefined";for(let Ft=t.cachedOptions.size-1;Ft>=0;Ft--){const Jt=_.value[Ft];if($e?vn(Jt,e.valueKey)===vn(he,e.valueKey):Jt.value===he){_e={value:he,currentLabel:Jt.currentLabel,isDisabled:Jt.isDisabled};break}}if(_e)return _e;const Dt=$e?he.label:!Ve&&!st?he:"",Ut={value:he,currentLabel:Dt};return e.multiple&&(Ut.hitState=!1),Ut},K=()=>{setTimeout(()=>{const he=e.valueKey;e.multiple?t.selected.length>0?t.hoverIndex=Math.min.apply(null,t.selected.map(_e=>T.value.findIndex($e=>vn($e,he)===vn(_e,he)))):t.hoverIndex=-1:t.hoverIndex=T.value.findIndex(_e=>He(_e)===He(t.selected))},300)},W=()=>{var he,_e;Y(),(_e=(he=l.value)==null?void 0:he.updatePopper)==null||_e.call(he),e.multiple&&!e.filterable&&N()},Y=()=>{var he;t.inputWidth=(he=o.value)==null?void 0:he.$el.getBoundingClientRect().width},q=()=>{e.filterable&&t.query!==t.selectedLabel&&(t.query=t.selectedLabel,D(t.query))},J=Yn(()=>{q()},$.value),ne=Yn(he=>{D(he.target.value)},$.value),oe=he=>{er(e.modelValue,he)||n.emit(ir,he)},Q=he=>{if(he.target.value.length<=0&&!ke()){const _e=e.modelValue.slice();_e.pop(),n.emit(Pt,_e),oe(_e)}he.target.value.length===1&&e.modelValue.length===0&&(t.currentPlaceholder=t.cachedPlaceHolder)},ae=(he,_e)=>{const $e=t.selected.indexOf(_e);if($e>-1&&!C.value){const Ve=e.modelValue.slice();Ve.splice($e,1),n.emit(Pt,Ve),oe(Ve),n.emit("remove-tag",_e.value)}he.stopPropagation()},de=he=>{he.stopPropagation();const _e=e.multiple?[]:"";if(typeof _e!="string")for(const $e of t.selected)$e.isDisabled&&_e.push($e.value);n.emit(Pt,_e),oe(_e),t.visible=!1,n.emit("clear")},be=(he,_e)=>{var $e;if(e.multiple){const Ve=(e.modelValue||[]).slice(),st=Ee(Ve,he.value);st>-1?Ve.splice(st,1):(e.multipleLimit<=0||Ve.length<e.multipleLimit)&&Ve.push(he.value),n.emit(Pt,Ve),oe(Ve),he.created&&(t.query="",D(""),t.inputLength=20),e.filterable&&(($e=i.value)==null||$e.focus())}else n.emit(Pt,he.value),oe(he.value),t.visible=!1;t.isSilentBlur=_e,Pe(),!t.visible&&Ne(()=>{Be(he)})},Ee=(he=[],_e)=>{if(!zt(_e))return he.indexOf(_e);const $e=e.valueKey;let Ve=-1;return he.some((st,Dt)=>vn(st,$e)===vn(_e,$e)?(Ve=Dt,!0):!1),Ve},Pe=()=>{t.softFocus=!0;const he=i.value||o.value;he&&(he==null||he.focus())},Be=he=>{var _e,$e,Ve,st,Dt;const Ut=Array.isArray(he)?he[0]:he;let Ft=null;if(Ut!=null&&Ut.value){const Jt=T.value.filter(Mn=>Mn.value===Ut.value);Jt.length>0&&(Ft=Jt[0].$el)}if(l.value&&Ft){const Jt=(st=(Ve=($e=(_e=l.value)==null?void 0:_e.popperRef)==null?void 0:$e.contentRef)==null?void 0:Ve.querySelector)==null?void 0:st.call(Ve,`.${a.be("dropdown","wrap")}`);Jt&&lE(Jt,Ft)}(Dt=d.value)==null||Dt.handleScroll()},te=he=>{t.optionsCount++,t.filteredOptionsCount++,t.options.set(he.value,he),t.cachedOptions.set(he.value,he)},ie=(he,_e)=>{t.options.get(he)===_e&&(t.optionsCount--,t.filteredOptionsCount--,t.options.delete(he))},ge=he=>{he.code!==Ge.backspace&&ke(!1),t.inputLength=i.value.value.length*15+20,N()},ke=he=>{if(!Array.isArray(t.selected))return;const _e=t.selected[t.selected.length-1];if(!!_e)return he===!0||he===!1?(_e.hitState=he,he):(_e.hitState=!_e.hitState,_e.hitState)},xe=he=>{const _e=he.target.value;if(he.type==="compositionend")t.isOnComposition=!1,Ne(()=>D(_e));else{const $e=_e[_e.length-1]||"";t.isOnComposition=!zm($e)}},Ie=()=>{Ne(()=>Be(t.selected))},ye=he=>{t.softFocus?t.softFocus=!1:((e.automaticDropdown||e.filterable)&&(e.filterable&&!t.visible&&(t.menuVisibleOnFocus=!0),t.visible=!0),n.emit("focus",he))},pe=()=>{var he;t.visible=!1,(he=o.value)==null||he.blur()},ue=he=>{Ne(()=>{t.isSilentBlur?t.isSilentBlur=!1:n.emit("blur",he)}),t.softFocus=!1},Ce=he=>{de(he)},je=()=>{t.visible=!1},ee=()=>{var he;e.automaticDropdown||C.value||(t.menuVisibleOnFocus?t.menuVisibleOnFocus=!1:t.visible=!t.visible,t.visible&&((he=i.value||o.value)==null||he.focus()))},me=()=>{t.visible?T.value[t.hoverIndex]&&be(T.value[t.hoverIndex],void 0):ee()},He=he=>zt(he.value)?vn(he.value,e.valueKey):he.value,lt=x(()=>T.value.filter(he=>he.visible).every(he=>he.disabled)),Ye=he=>{if(!t.visible){t.visible=!0;return}if(!(t.options.size===0||t.filteredOptionsCount===0)&&!t.isOnComposition&&!lt.value){he==="next"?(t.hoverIndex++,t.hoverIndex===t.options.size&&(t.hoverIndex=0)):he==="prev"&&(t.hoverIndex--,t.hoverIndex<0&&(t.hoverIndex=t.options.size-1));const _e=T.value[t.hoverIndex];(_e.disabled===!0||_e.states.groupDisabled===!0||!_e.visible)&&Ye(he),Ne(()=>Be(f.value))}};return{optionsArray:T,selectSize:L,handleResize:W,debouncedOnInputChange:J,debouncedQueryChange:ne,deletePrevTag:Q,deleteTag:ae,deleteSelected:de,handleOptionSelect:be,scrollToOption:Be,readonly:b,resetInputHeight:N,showClose:S,iconComponent:w,iconReverse:k,showNewOption:I,collapseTagSize:j,setSelected:M,managePlaceholder:z,selectDisabled:C,emptyText:O,toggleLastOptionHitState:ke,resetInputState:ge,handleComposition:xe,onOptionCreate:te,onOptionDestroy:ie,handleMenuEnter:Ie,handleFocus:ye,blur:pe,handleBlur:ue,handleClearClick:Ce,handleClose:je,toggleMenu:ee,selectOption:me,getValueKey:He,navigateOptions:Ye,dropMenuVisible:F,queryChange:p,groupQueryChange:v,reference:o,input:i,tooltipRef:l,tags:s,selectWrapper:c,scrollbar:d}},Sy="ElSelect",mde=G({name:Sy,componentName:Sy,components:{ElInput:Ra,ElSelectMenu:hde,ElOption:pw,ElTag:Gm,ElScrollbar:xi,ElTooltip:Ur,ElIcon:ft},directives:{ClickOutside:Is},props:{name:String,id:String,modelValue:{type:[Array,String,Number,Boolean,Object],default:void 0},autocomplete:{type:String,default:"off"},automaticDropdown:Boolean,size:{type:String,validator:va},effect:{type:String,default:"light"},disabled:Boolean,clearable:Boolean,filterable:Boolean,allowCreate:Boolean,loading:Boolean,popperClass:{type:String,default:""},remote:Boolean,loadingText:String,noMatchText:String,noDataText:String,remoteMethod:Function,filterMethod:Function,multiple:Boolean,multipleLimit:{type:Number,default:0},placeholder:{type:String},defaultFirstOption:Boolean,reserveKeyword:{type:Boolean,default:!0},valueKey:{type:String,default:"value"},collapseTags:Boolean,collapseTagsTooltip:{type:Boolean,default:!1},popperAppendToBody:{type:Boolean,default:void 0},teleported:mr.teleported,persistent:{type:Boolean,default:!0},clearIcon:{type:[String,Object],default:gi},fitInputWidth:{type:Boolean,default:!1},suffixIcon:{type:[String,Object],default:Of},tagType:Ke(Te({},iw.type),{default:"info"})},emits:[Pt,ir,"remove-tag","clear","visible-change","focus","blur"],setup(e,t){const n=De("select"),r=De("input"),{t:a}=ln(),o=pde(e),{optionsArray:i,selectSize:l,readonly:s,handleResize:c,collapseTagSize:d,debouncedOnInputChange:f,debouncedQueryChange:p,deletePrevTag:v,deleteTag:m,deleteSelected:y,handleOptionSelect:b,scrollToOption:C,setSelected:S,resetInputHeight:w,managePlaceholder:k,showClose:$,selectDisabled:O,iconComponent:T,iconReverse:_,showNewOption:I,emptyText:L,toggleLastOptionHitState:j,resetInputState:F,handleComposition:N,onOptionCreate:D,onOptionDestroy:z,handleMenuEnter:B,handleFocus:M,blur:E,handleBlur:K,handleClearClick:W,handleClose:Y,toggleMenu:q,selectOption:J,getValueKey:ne,navigateOptions:oe,dropMenuVisible:Q,reference:ae,input:de,tooltipRef:be,tags:Ee,selectWrapper:Pe,scrollbar:Be,queryChange:te,groupQueryChange:ie}=vde(e,o,t),{focus:ge}=nZ(ae),{inputWidth:ke,selected:xe,inputLength:Ie,filteredOptionsCount:ye,visible:pe,softFocus:ue,selectedLabel:Ce,hoverIndex:je,query:ee,inputHovering:me,currentPlaceholder:He,menuVisibleOnFocus:lt,isOnComposition:Ye,isSilentBlur:he,options:_e,cachedOptions:$e,optionsCount:Ve,prefixWidth:st,tagInMultiLine:Dt}=or(o),Ut=x(()=>{const tr=[n.b()],nr=A(l);return nr&&tr.push(n.m(nr)),e.disabled&&tr.push(n.m("disabled")),tr}),Ft=x(()=>({maxWidth:`${A(ke)-32}px`,width:"100%"}));ot(Zm,bt({props:e,options:_e,optionsArray:i,cachedOptions:$e,optionsCount:Ve,filteredOptionsCount:ye,hoverIndex:je,handleOptionSelect:b,onOptionCreate:D,onOptionDestroy:z,selectWrapper:Pe,selected:xe,setSelected:S,queryChange:te,groupQueryChange:ie})),et(()=>{if(o.cachedPlaceHolder=He.value=e.placeholder||a("el.select.placeholder"),e.multiple&&Array.isArray(e.modelValue)&&e.modelValue.length>0&&(He.value=""),wc(Pe.value,c),ae.value&&ae.value.$el){const tr=ae.value.input;o.initialInputHeight=tr.getBoundingClientRect().height||vE(l.value)}e.remote&&e.multiple&&w(),Ne(()=>{if(!!ae.value&&(ae.value.$el&&(ke.value=ae.value.$el.getBoundingClientRect().width),t.slots.prefix)){const tr=ae.value.$el.childNodes,nr=Array.from(tr).find(Wo=>Wo.tagName==="INPUT"),Va=ae.value.$el.querySelector(`.${r.e("prefix")}`);st.value=Math.max(Va.getBoundingClientRect().width+5,30),o.prefixWidth&&(nr.style.paddingLeft=`${Math.max(o.prefixWidth,30)}px`)}}),S()}),Lt(()=>{Sc(Pe.value,c)}),e.multiple&&!Array.isArray(e.modelValue)&&t.emit(Pt,[]),!e.multiple&&Array.isArray(e.modelValue)&&t.emit(Pt,"");const Jt=x(()=>{var tr,nr;return(nr=(tr=be.value)==null?void 0:tr.popperRef)==null?void 0:nr.contentRef}),{compatTeleported:Mn}=Oc(Sy,"popperAppendToBody");return{tagInMultiLine:Dt,prefixWidth:st,selectSize:l,readonly:s,handleResize:c,collapseTagSize:d,debouncedOnInputChange:f,debouncedQueryChange:p,deletePrevTag:v,deleteTag:m,deleteSelected:y,handleOptionSelect:b,scrollToOption:C,inputWidth:ke,selected:xe,inputLength:Ie,filteredOptionsCount:ye,visible:pe,softFocus:ue,selectedLabel:Ce,hoverIndex:je,query:ee,inputHovering:me,currentPlaceholder:He,menuVisibleOnFocus:lt,isOnComposition:Ye,isSilentBlur:he,options:_e,resetInputHeight:w,managePlaceholder:k,showClose:$,selectDisabled:O,iconComponent:T,iconReverse:_,showNewOption:I,emptyText:L,toggleLastOptionHitState:j,resetInputState:F,handleComposition:N,handleMenuEnter:B,handleFocus:M,blur:E,handleBlur:K,handleClearClick:W,handleClose:Y,toggleMenu:q,selectOption:J,getValueKey:ne,navigateOptions:oe,dropMenuVisible:Q,focus:ge,reference:ae,input:de,tooltipRef:be,popperPaneRef:Jt,tags:Ee,selectWrapper:Pe,scrollbar:Be,wrapperKls:Ut,selectTagsStyle:Ft,compatTeleported:Mn,nsSelect:n}}}),gde={class:"select-trigger"},yde=["disabled","autocomplete"],bde={style:{height:"100%",display:"flex","justify-content":"center","align-items":"center"}};function Cde(e,t,n,r,a,o){const i=we("el-tag"),l=we("el-tooltip"),s=we("el-icon"),c=we("el-input"),d=we("el-option"),f=we("el-scrollbar"),p=we("el-select-menu"),v=pa("click-outside");return at((R(),X("div",{ref:"selectWrapper",class:U(e.wrapperKls),onClick:t[24]||(t[24]=dt((...m)=>e.toggleMenu&&e.toggleMenu(...m),["stop"]))},[g(l,{ref:"tooltipRef",visible:e.dropMenuVisible,"onUpdate:visible":t[23]||(t[23]=m=>e.dropMenuVisible=m),placement:"bottom-start",teleported:e.compatTeleported,"popper-class":[e.nsSelect.e("popper"),e.popperClass],"fallback-placements":["bottom-start","top-start","right","left"],effect:e.effect,pure:"",trigger:"click",transition:`${e.nsSelect.namespace.value}-zoom-in-top`,"stop-popper-mouse-event":!1,"gpu-acceleration":!1,persistent:e.persistent,onShow:e.handleMenuEnter},{default:re(()=>[Z("div",gde,[e.multiple?(R(),X("div",{key:0,ref:"tags",class:U(e.nsSelect.e("tags")),style:Xe(e.selectTagsStyle)},[e.collapseTags&&e.selected.length?(R(),X("span",{key:0,class:U([e.nsSelect.b("tags-wrapper"),{"has-prefix":e.prefixWidth&&e.selected.length}])},[g(i,{closable:!e.selectDisabled&&!e.selected[0].isDisabled,size:e.collapseTagSize,hit:e.selected[0].hitState,type:e.tagType,"disable-transitions":"",onClose:t[0]||(t[0]=m=>e.deleteTag(m,e.selected[0]))},{default:re(()=>[Z("span",{class:U(e.nsSelect.e("tags-text")),style:Xe({maxWidth:e.inputWidth-123+"px"})},Me(e.selected[0].currentLabel),7)]),_:1},8,["closable","size","hit","type"]),e.selected.length>1?(R(),fe(i,{key:0,closable:!1,size:e.collapseTagSize,type:e.tagType,"disable-transitions":""},{default:re(()=>[e.collapseTagsTooltip?(R(),fe(l,{key:0,disabled:e.dropMenuVisible,"fallback-placements":["bottom","top","right","left"],effect:e.effect,placement:"bottom",teleported:!1},{default:re(()=>[Z("span",{class:U(e.nsSelect.e("tags-text"))},"+ "+Me(e.selected.length-1),3)]),content:re(()=>[Z("div",{class:U(e.nsSelect.e("collapse-tags"))},[(R(!0),X(Fe,null,Rt(e.selected,(m,y)=>(R(),X("div",{key:y,class:U(e.nsSelect.e("collapse-tag"))},[(R(),fe(i,{key:e.getValueKey(m),class:"in-tooltip",closable:!e.selectDisabled&&!m.isDisabled,size:e.collapseTagSize,hit:m.hitState,type:e.tagType,"disable-transitions":"",style:{margin:"2px"},onClose:b=>e.deleteTag(b,m)},{default:re(()=>[Z("span",{class:U(e.nsSelect.e("tags-text")),style:Xe({maxWidth:e.inputWidth-75+"px"})},Me(m.currentLabel),7)]),_:2},1032,["closable","size","hit","type","onClose"]))],2))),128))],2)]),_:1},8,["disabled","effect"])):(R(),X("span",{key:1,class:U(e.nsSelect.e("tags-text"))},"+ "+Me(e.selected.length-1),3))]),_:1},8,["size","type"])):se("v-if",!0)],2)):se("v-if",!0),se(" <div> "),e.collapseTags?se("v-if",!0):(R(),fe(Vn,{key:1,onAfterLeave:e.resetInputHeight},{default:re(()=>[Z("span",{class:U([e.nsSelect.b("tags-wrapper"),{"has-prefix":e.prefixWidth&&e.selected.length}])},[(R(!0),X(Fe,null,Rt(e.selected,m=>(R(),fe(i,{key:e.getValueKey(m),closable:!e.selectDisabled&&!m.isDisabled,size:e.collapseTagSize,hit:m.hitState,type:e.tagType,"disable-transitions":"",onClose:y=>e.deleteTag(y,m)},{default:re(()=>[Z("span",{class:U(e.nsSelect.e("tags-text")),style:Xe({maxWidth:e.inputWidth-75+"px"})},Me(m.currentLabel),7)]),_:2},1032,["closable","size","hit","type","onClose"]))),128))],2)]),_:1},8,["onAfterLeave"])),se(" </div> "),e.filterable?at((R(),X("input",{key:2,ref:"input","onUpdate:modelValue":t[1]||(t[1]=m=>e.query=m),type:"text",class:U([e.nsSelect.e("input"),e.nsSelect.is(e.selectSize)]),disabled:e.selectDisabled,autocomplete:e.autocomplete,style:Xe({marginLeft:e.prefixWidth&&!e.selected.length||e.tagInMultiLine?`${e.prefixWidth}px`:"",flexGrow:1,width:`${e.inputLength/(e.inputWidth-32)}%`,maxWidth:`${e.inputWidth-42}px`}),onFocus:t[2]||(t[2]=(...m)=>e.handleFocus&&e.handleFocus(...m)),onBlur:t[3]||(t[3]=(...m)=>e.handleBlur&&e.handleBlur(...m)),onKeyup:t[4]||(t[4]=(...m)=>e.managePlaceholder&&e.managePlaceholder(...m)),onKeydown:[t[5]||(t[5]=(...m)=>e.resetInputState&&e.resetInputState(...m)),t[6]||(t[6]=It(dt(m=>e.navigateOptions("next"),["prevent"]),["down"])),t[7]||(t[7]=It(dt(m=>e.navigateOptions("prev"),["prevent"]),["up"])),t[8]||(t[8]=It(dt(m=>e.visible=!1,["stop","prevent"]),["esc"])),t[9]||(t[9]=It(dt((...m)=>e.selectOption&&e.selectOption(...m),["stop","prevent"]),["enter"])),t[10]||(t[10]=It((...m)=>e.deletePrevTag&&e.deletePrevTag(...m),["delete"])),t[11]||(t[11]=It(m=>e.visible=!1,["tab"]))],onCompositionstart:t[12]||(t[12]=(...m)=>e.handleComposition&&e.handleComposition(...m)),onCompositionupdate:t[13]||(t[13]=(...m)=>e.handleComposition&&e.handleComposition(...m)),onCompositionend:t[14]||(t[14]=(...m)=>e.handleComposition&&e.handleComposition(...m)),onInput:t[15]||(t[15]=(...m)=>e.debouncedQueryChange&&e.debouncedQueryChange(...m))},null,46,yde)),[[fC,e.query]]):se("v-if",!0)],6)):se("v-if",!0),g(c,{id:e.id,ref:"reference",modelValue:e.selectedLabel,"onUpdate:modelValue":t[16]||(t[16]=m=>e.selectedLabel=m),type:"text",placeholder:e.currentPlaceholder,name:e.name,autocomplete:e.autocomplete,size:e.selectSize,disabled:e.selectDisabled,readonly:e.readonly,"validate-event":!1,class:U([e.nsSelect.is("focus",e.visible)]),tabindex:e.multiple&&e.filterable?-1:void 0,onFocus:e.handleFocus,onBlur:e.handleBlur,onInput:e.debouncedOnInputChange,onPaste:e.debouncedOnInputChange,onCompositionstart:e.handleComposition,onCompositionupdate:e.handleComposition,onCompositionend:e.handleComposition,onKeydown:[t[17]||(t[17]=It(dt(m=>e.navigateOptions("next"),["stop","prevent"]),["down"])),t[18]||(t[18]=It(dt(m=>e.navigateOptions("prev"),["stop","prevent"]),["up"])),It(dt(e.selectOption,["stop","prevent"]),["enter"]),t[19]||(t[19]=It(dt(m=>e.visible=!1,["stop","prevent"]),["esc"])),t[20]||(t[20]=It(m=>e.visible=!1,["tab"]))],onMouseenter:t[21]||(t[21]=m=>e.inputHovering=!0),onMouseleave:t[22]||(t[22]=m=>e.inputHovering=!1)},sl({suffix:re(()=>[e.iconComponent?at((R(),fe(s,{key:0,class:U([e.nsSelect.e("caret"),e.nsSelect.e("icon"),e.iconReverse])},{default:re(()=>[(R(),fe(Kt(e.iconComponent)))]),_:1},8,["class"])),[[_t,!e.showClose]]):se("v-if",!0),e.showClose&&e.clearIcon?(R(),fe(s,{key:1,class:U([e.nsSelect.e("caret"),e.nsSelect.e("icon")]),onClick:e.handleClearClick},{default:re(()=>[(R(),fe(Kt(e.clearIcon)))]),_:1},8,["class","onClick"])):se("v-if",!0)]),_:2},[e.$slots.prefix?{name:"prefix",fn:re(()=>[Z("div",bde,[Oe(e.$slots,"prefix")])])}:void 0]),1032,["id","modelValue","placeholder","name","autocomplete","size","disabled","readonly","class","tabindex","onFocus","onBlur","onInput","onPaste","onCompositionstart","onCompositionupdate","onCompositionend","onKeydown"])])]),content:re(()=>[g(p,null,{default:re(()=>[at(g(f,{ref:"scrollbar",tag:"ul","wrap-class":e.nsSelect.be("dropdown","wrap"),"view-class":e.nsSelect.be("dropdown","list"),class:U([e.nsSelect.is("empty",!e.allowCreate&&Boolean(e.query)&&e.filteredOptionsCount===0)])},{default:re(()=>[e.showNewOption?(R(),fe(d,{key:0,value:e.query,created:!0},null,8,["value"])):se("v-if",!0),Oe(e.$slots,"default")]),_:3},8,["wrap-class","view-class","class"]),[[_t,e.options.size>0&&!e.loading]]),e.emptyText&&(!e.allowCreate||e.loading||e.allowCreate&&e.options.size===0)?(R(),X(Fe,{key:0},[e.$slots.empty?Oe(e.$slots,"empty",{key:0}):(R(),X("p",{key:1,class:U(e.nsSelect.be("dropdown","empty"))},Me(e.emptyText),3))],2112)):se("v-if",!0)]),_:3})]),_:3},8,["visible","teleported","popper-class","effect","transition","persistent","onShow"])],2)),[[v,e.handleClose,e.popperPaneRef]])}var wde=Ae(mde,[["render",Cde],["__file","/home/runner/work/element-plus/element-plus/packages/components/select/src/select.vue"]]);const Sde=G({name:"ElOptionGroup",componentName:"ElOptionGroup",props:{label:String,disabled:{type:Boolean,default:!1}},setup(e){const t=De("select"),n=H(!0),r=$t(),a=H([]);ot(A5,bt(Te({},or(e))));const o=ve(Zm);et(()=>{a.value=i(r.subTree)});const i=s=>{const c=[];return Array.isArray(s.children)&&s.children.forEach(d=>{var f;d.type&&d.type.name==="ElOption"&&d.component&&d.component.proxy?c.push(d.component.proxy):(f=d.children)!=null&&f.length&&c.push(...i(d))}),c},{groupQueryChange:l}=Gt(o);return ce(l,()=>{n.value=a.value.some(s=>s.visible===!0)}),{visible:n,ns:t}}});function kde(e,t,n,r,a,o){return at((R(),X("ul",{class:U(e.ns.be("group","wrap"))},[Z("li",{class:U(e.ns.be("group","title"))},Me(e.label),3),Z("li",null,[Z("ul",{class:U(e.ns.b("group"))},[Oe(e.$slots,"default")],2)])],2)),[[_t,e.visible]])}var D5=Ae(Sde,[["render",kde],["__file","/home/runner/work/element-plus/element-plus/packages/components/select/src/option-group.vue"]]);const bs=xt(wde,{Option:pw,OptionGroup:D5}),Cv=En(pw),$de=En(D5),vw=()=>ve(PE,{}),Ode=Ze({pageSize:{type:Number,required:!0},pageSizes:{type:Le(Array),default:()=>xn([10,20,30,40,50,100])},popperClass:{type:String,default:""},disabled:Boolean,size:{type:String,default:"default"}}),Pde=G({name:"ElPaginationSizes",components:{ElSelect:bs,ElOption:Cv},props:Ode,emits:["page-size-change"],setup(e,{emit:t}){const{t:n}=ln(),r=De("pagination"),a=vw(),o=H(e.pageSize);ce(()=>e.pageSizes,(s,c)=>{if(!er(s,c)&&Array.isArray(s)){const d=s.includes(e.pageSize)?e.pageSize:e.pageSizes[0];t("page-size-change",d)}}),ce(()=>e.pageSize,s=>{o.value=s});const i=x(()=>e.pageSizes);function l(s){var c;s!==o.value&&(o.value=s,(c=a.handleSizeChange)==null||c.call(a,Number(s)))}return{ns:r,innerPagesizes:i,innerPageSize:o,t:n,handleChange:l}}});function Tde(e,t,n,r,a,o){const i=we("el-option"),l=we("el-select");return R(),X("span",{class:U(e.ns.e("sizes"))},[g(l,{"model-value":e.innerPageSize,disabled:e.disabled,"popper-class":e.popperClass,size:e.size,onChange:e.handleChange},{default:re(()=>[(R(!0),X(Fe,null,Rt(e.innerPagesizes,s=>(R(),fe(i,{key:s,value:s,label:s+e.t("el.pagination.pagesize")},null,8,["value","label"]))),128))]),_:1},8,["model-value","disabled","popper-class","size","onChange"])],2)}var xde=Ae(Pde,[["render",Tde],["__file","/home/runner/work/element-plus/element-plus/packages/components/pagination/src/components/sizes.vue"]]);const _de=G({name:"ElPaginationJumper",components:{ElInput:Ra},setup(){const{t:e}=ln(),t=De("pagination"),{pageCount:n,disabled:r,currentPage:a,changeEvent:o}=vw(),i=H(),l=x(()=>{var d;return(d=i.value)!=null?d:a==null?void 0:a.value});function s(d){i.value=+d}function c(d){d=Math.trunc(+d),o==null||o(+d),i.value=void 0}return{ns:t,pageCount:n,disabled:r,innerValue:l,t:e,handleInput:s,handleChange:c}}}),Ede=["disabled"];function Mde(e,t,n,r,a,o){const i=we("el-input");return R(),X("span",{class:U(e.ns.e("jump")),disabled:e.disabled},[yt(Me(e.t("el.pagination.goto"))+" ",1),g(i,{size:"small",class:U([e.ns.e("editor"),e.ns.is("in-pagination")]),min:1,max:e.pageCount,disabled:e.disabled,"model-value":e.innerValue,type:"number","onUpdate:modelValue":e.handleInput,onChange:e.handleChange},null,8,["class","max","disabled","model-value","onUpdate:modelValue","onChange"]),yt(" "+Me(e.t("el.pagination.pageClassifier")),1)],10,Ede)}var Ide=Ae(_de,[["render",Mde],["__file","/home/runner/work/element-plus/element-plus/packages/components/pagination/src/components/jumper.vue"]]);const Nde={total:{type:Number,default:1e3}},Ade=G({name:"ElPaginationTotal",props:Nde,setup(){const{t:e}=ln(),t=De("pagination"),{disabled:n}=vw();return{t:e,ns:t,disabled:n}}}),Dde=["disabled"];function Rde(e,t,n,r,a,o){return R(),X("span",{class:U(e.ns.e("total")),disabled:e.disabled},Me(e.t("el.pagination.total",{total:e.total})),11,Dde)}var Lde=Ae(Ade,[["render",Rde],["__file","/home/runner/work/element-plus/element-plus/packages/components/pagination/src/components/total.vue"]]);const Fde={currentPage:{type:Number,default:1},pageCount:{type:Number,required:!0},pagerCount:{type:Number,default:7},disabled:Boolean},Bde=G({name:"ElPaginationPager",components:{DArrowLeft:Fm,DArrowRight:Bm,MoreFilled:fG},props:Fde,emits:["change"],setup(e,{emit:t}){const n=De("pager"),r=De("icon"),a=H(!1),o=H(!1),i=H(!1),l=H(!1),s=x(()=>{const p=e.pagerCount,v=(p-1)/2,m=Number(e.currentPage),y=Number(e.pageCount);let b=!1,C=!1;y>p&&(m>p-v&&(b=!0),m<y-v&&(C=!0));const S=[];if(b&&!C){const w=y-(p-2);for(let k=w;k<y;k++)S.push(k)}else if(!b&&C)for(let w=2;w<p;w++)S.push(w);else if(b&&C){const w=Math.floor(p/2)-1;for(let k=m-w;k<=m+w;k++)S.push(k)}else for(let w=2;w<y;w++)S.push(w);return S});Wn(()=>{const p=(e.pagerCount-1)/2;a.value=!1,o.value=!1,e.pageCount>e.pagerCount&&(e.currentPage>e.pagerCount-p&&(a.value=!0),e.currentPage<e.pageCount-p&&(o.value=!0))});function c(p){e.disabled||(p==="left"?i.value=!0:l.value=!0)}function d(p){const v=p.target;if(v.tagName.toLowerCase()==="li"&&Array.from(v.classList).includes("number")){const m=Number(v.textContent);m!==e.currentPage&&t("change",m)}}function f(p){const v=p.target;if(v.tagName.toLowerCase()==="ul"||e.disabled)return;let m=Number(v.textContent);const y=e.pageCount,b=e.currentPage,C=e.pagerCount-2;v.className.includes("more")&&(v.className.includes("quickprev")?m=b-C:v.className.includes("quicknext")&&(m=b+C)),Number.isNaN(+m)||(m<1&&(m=1),m>y&&(m=y)),m!==b&&t("change",m)}return{showPrevMore:a,showNextMore:o,quickPrevHover:i,quickNextHover:l,pagers:s,nsPager:n,nsIcon:r,onMouseenter:c,onPagerClick:f,onEnter:d}}}),Vde=["aria-current"],zde=["aria-current"],Hde=["aria-current"];function jde(e,t,n,r,a,o){const i=we("d-arrow-left"),l=we("more-filled"),s=we("d-arrow-right");return R(),X("ul",{class:U(e.nsPager.b()),onClick:t[4]||(t[4]=(...c)=>e.onPagerClick&&e.onPagerClick(...c)),onKeyup:t[5]||(t[5]=It((...c)=>e.onEnter&&e.onEnter(...c),["enter"]))},[e.pageCount>0?(R(),X("li",{key:0,class:U([[e.nsPager.is("active",e.currentPage===1),e.nsPager.is("disabled",e.disabled)],"number"]),"aria-current":e.currentPage===1,tabindex:"0"}," 1 ",10,Vde)):se("v-if",!0),e.showPrevMore?(R(),X("li",{key:1,class:U(["more","btn-quickprev",e.nsIcon.b(),e.nsPager.is("disabled",e.disabled)]),onMouseenter:t[0]||(t[0]=c=>e.onMouseenter("left")),onMouseleave:t[1]||(t[1]=c=>e.quickPrevHover=!1)},[e.quickPrevHover?(R(),fe(i,{key:0})):(R(),fe(l,{key:1}))],34)):se("v-if",!0),(R(!0),X(Fe,null,Rt(e.pagers,c=>(R(),X("li",{key:c,class:U([[e.nsPager.is("active",e.currentPage===c),e.nsPager.is("disabled",e.disabled)],"number"]),"aria-current":e.currentPage===c,tabindex:"0"},Me(c),11,zde))),128)),e.showNextMore?(R(),X("li",{key:2,class:U(["more","btn-quicknext",e.nsIcon.b(),e.nsPager.is("disabled",e.disabled)]),onMouseenter:t[2]||(t[2]=c=>e.onMouseenter("right")),onMouseleave:t[3]||(t[3]=c=>e.quickNextHover=!1)},[e.quickNextHover?(R(),fe(s,{key:0})):(R(),fe(l,{key:1}))],34)):se("v-if",!0),e.pageCount>1?(R(),X("li",{key:3,class:U([[e.nsPager.is("active",e.currentPage===e.pageCount),e.nsPager.is("disabled",e.disabled)],"number"]),"aria-current":e.currentPage===e.pageCount,tabindex:"0"},Me(e.pageCount),11,Hde)):se("v-if",!0)],34)}var Kde=Ae(Bde,[["render",jde],["__file","/home/runner/work/element-plus/element-plus/packages/components/pagination/src/components/pager.vue"]]);const Tr=e=>typeof e!="number",Wde=Ze({total:Number,pageSize:Number,defaultPageSize:Number,currentPage:Number,defaultCurrentPage:Number,pageCount:Number,pagerCount:{type:Number,validator:e=>typeof e=="number"&&Math.trunc(e)===e&&e>4&&e<22&&e%2===1,default:7},layout:{type:String,default:["prev","pager","next","jumper","->","total"].join(", ")},pageSizes:{type:Le(Array),default:()=>xn([10,20,30,40,50,100])},popperClass:{type:String,default:""},prevText:{type:String,default:""},nextText:{type:String,default:""},small:Boolean,background:Boolean,disabled:Boolean,hideOnSinglePage:Boolean}),Ude={"update:current-page":e=>typeof e=="number","update:page-size":e=>typeof e=="number","size-change":e=>typeof e=="number","current-change":e=>typeof e=="number","prev-click":e=>typeof e=="number","next-click":e=>typeof e=="number"},aO="ElPagination";var Yde=G({name:aO,props:Wde,emits:Ude,setup(e,{emit:t,slots:n}){const{t:r}=ln(),a=De("pagination"),o=$t().vnode.props||{},i="onUpdate:currentPage"in o||"onUpdate:current-page"in o||"onCurrentChange"in o,l="onUpdate:pageSize"in o||"onUpdate:page-size"in o||"onSizeChange"in o,s=x(()=>{if(Tr(e.total)&&Tr(e.pageCount)||!Tr(e.currentPage)&&!i)return!1;if(e.layout.includes("sizes")){if(Tr(e.pageCount)){if(!Tr(e.total)&&!Tr(e.pageSize)&&!l)return!1}else if(!l)return!1}return!0}),c=H(Tr(e.defaultPageSize)?10:e.defaultPageSize),d=H(Tr(e.defaultCurrentPage)?1:e.defaultCurrentPage),f=x({get(){return Tr(e.pageSize)?c.value:e.pageSize},set(w){Tr(e.pageSize)&&(c.value=w),l&&(t("update:page-size",w),t("size-change",w))}}),p=x(()=>{let w=0;return Tr(e.pageCount)?Tr(e.total)||(w=Math.max(1,Math.ceil(e.total/f.value))):w=e.pageCount,w}),v=x({get(){return Tr(e.currentPage)?d.value:e.currentPage},set(w){let k=w;w<1?k=1:w>p.value&&(k=p.value),Tr(e.currentPage)&&(d.value=k),i&&(t("update:current-page",k),t("current-change",k))}});ce(p,w=>{v.value>w&&(v.value=w)});function m(w){v.value=w}function y(w){f.value=w;const k=p.value;v.value>k&&(v.value=k)}function b(){e.disabled||(v.value-=1,t("prev-click",v.value))}function C(){e.disabled||(v.value+=1,t("next-click",v.value))}function S(w,k){w&&(w.props||(w.props={}),w.props.class=[w.props.class,k].join(" "))}return ot(PE,{pageCount:p,disabled:x(()=>e.disabled),currentPage:v,changeEvent:m,handleSizeChange:y}),()=>{var w,k;if(!s.value)return r("el.pagination.deprecationWarning"),null;if(!e.layout||e.hideOnSinglePage&&p.value<=1)return null;const $=[],O=[],T=qe("div",{class:a.e("rightwrapper")},O),_={prev:qe(tde,{disabled:e.disabled,currentPage:v.value,prevText:e.prevText,onClick:b}),jumper:qe(Ide),pager:qe(Kde,{currentPage:v.value,pageCount:p.value,pagerCount:e.pagerCount,onChange:m,disabled:e.disabled}),next:qe(lde,{disabled:e.disabled,currentPage:v.value,pageCount:p.value,nextText:e.nextText,onClick:C}),sizes:qe(xde,{pageSize:f.value,pageSizes:e.pageSizes,popperClass:e.popperClass,disabled:e.disabled,size:e.small?"small":"default"}),slot:(k=(w=n==null?void 0:n.default)==null?void 0:w.call(n))!=null?k:null,total:qe(Lde,{total:Tr(e.total)?0:e.total})},I=e.layout.split(",").map(j=>j.trim());let L=!1;return I.forEach(j=>{if(j==="->"){L=!0;return}L?O.push(_[j]):$.push(_[j])}),S($[0],a.is("first")),S($[$.length-1],a.is("last")),L&&O.length>0&&(S(O[0],a.is("first")),S(O[O.length-1],a.is("last")),$.push(T)),qe("div",{role:"pagination","aria-label":"pagination",class:[a.b(),a.is("background",e.background),{[a.m("small")]:e.small}]},$)}}});const qde=xt(Yde),Gde=Ze({title:{type:String},confirmButtonText:{type:String},cancelButtonText:{type:String},confirmButtonType:{type:String,values:m0,default:"primary"},cancelButtonType:{type:String,values:m0,default:"text"},icon:{type:wr,default:DG},iconColor:{type:String,default:"#f90"},hideIcon:{type:Boolean,default:!1},hideAfter:{type:Number,default:200},onConfirm:{type:Le(Function)},onCancel:{type:Le(Function)},teleported:mr.teleported,persistent:mr.persistent}),oO="ElPopconfirm",Xde=G({name:oO,components:{ElButton:xa,ElTooltip:Ur,ElIcon:ft},props:Gde,setup(e){const{compatTeleported:t}=Oc(oO,"appendToBody"),{t:n}=ln(),r=De("popconfirm"),a=H(),o=()=>{var f,p;(p=(f=A(a))==null?void 0:f.onClose)==null||p.call(f)},i=()=>{o()},l=f=>{var p;(p=e.onConfirm)==null||p.call(e,f),i()},s=f=>{var p;(p=e.onCancel)==null||p.call(e,f),i()},c=x(()=>e.confirmButtonText||n("el.popconfirm.confirmButtonText")),d=x(()=>e.cancelButtonText||n("el.popconfirm.cancelButtonText"));return{finalConfirmButtonText:c,finalCancelButtonText:d,tooltipRef:a,ns:r,compatTeleported:t,confirm:l,cancel:s}}});function Zde(e,t,n,r,a,o){const i=we("el-icon"),l=we("el-button"),s=we("el-tooltip");return R(),fe(s,hn({ref:"tooltipRef"},e.$attrs,{trigger:"click",effect:"light","popper-class":`${e.ns.namespace.value}-popover`,teleported:e.compatTeleported,"fallback-placements":["bottom","top","right","left"],"hide-after":e.hideAfter,persistent:e.persistent}),{content:re(()=>[Z("div",{class:U(e.ns.b())},[Z("div",{class:U(e.ns.e("main"))},[!e.hideIcon&&e.icon?(R(),fe(i,{key:0,class:U(e.ns.e("icon")),style:Xe({color:e.iconColor})},{default:re(()=>[(R(),fe(Kt(e.icon)))]),_:1},8,["class","style"])):se("v-if",!0),yt(" "+Me(e.title),1)],2),Z("div",{class:U(e.ns.e("action"))},[g(l,{size:"small",type:e.cancelButtonType,onClick:e.cancel},{default:re(()=>[yt(Me(e.finalCancelButtonText),1)]),_:1},8,["type","onClick"]),g(l,{size:"small",type:e.confirmButtonType,onClick:e.confirm},{default:re(()=>[yt(Me(e.finalConfirmButtonText),1)]),_:1},8,["type","onClick"])],2)],2)]),default:re(()=>[e.$slots.reference?Oe(e.$slots,"reference",{key:0}):se("v-if",!0)]),_:3},16,["popper-class","teleported","hide-after","persistent"])}var Jde=Ae(Xde,[["render",Zde],["__file","/home/runner/work/element-plus/element-plus/packages/components/popconfirm/src/popconfirm.vue"]]);const Qde=xt(Jde),efe=Ze({trigger:qd.trigger,placement:Sp.placement,disabled:qd.disabled,visible:mr.visible,transition:mr.transition,popperOptions:Sp.popperOptions,tabindex:Sp.tabindex,appendToBody:{type:Boolean,default:void 0},content:mr.content,popperStyle:mr.popperStyle,popperClass:mr.popperClass,enterable:Ke(Te({},mr.enterable),{default:!0}),effect:Ke(Te({},mr.effect),{default:"light"}),teleported:mr.teleported,title:String,width:{type:[String,Number],default:150},offset:{type:Number,default:void 0},showAfter:{type:Number,default:0},hideAfter:{type:Number,default:200},autoClose:{type:Number,default:0},showArrow:{type:Boolean,default:!0},persistent:{type:Boolean,default:!0}}),tfe=["update:visible","before-enter","before-leave","after-enter","after-leave"],iO="ElPopover",nfe=G({name:iO,components:{ElTooltip:Ur},props:efe,emits:tfe,setup(e,{emit:t}){const n=De("popover"),r=H(null),a=x(()=>{var y;return(y=A(r))==null?void 0:y.popperRef}),o=x(()=>wt(e.width)?e.width:`${e.width}px`),i=x(()=>[{width:o.value},e.popperStyle]),l=x(()=>[n.b(),e.popperClass,{[n.m("plain")]:!!e.content}]),s=x(()=>e.transition==="el-fade-in-linear"),{compatTeleported:c}=Oc(iO,"appendToBody");return{compatTeleported:c,ns:n,kls:l,gpuAcceleration:s,style:i,tooltipRef:r,popperRef:a,hide:()=>{var y;(y=r.value)==null||y.hide()},beforeEnter:()=>{t("before-enter")},beforeLeave:()=>{t("before-leave")},afterEnter:()=>{t("after-enter")},afterLeave:()=>{t("update:visible",!1),t("after-leave")}}}});function rfe(e,t,n,r,a,o){const i=we("el-tooltip");return R(),fe(i,hn({ref:"tooltipRef"},e.$attrs,{trigger:e.trigger,placement:e.placement,disabled:e.disabled,visible:e.visible,transition:e.transition,"popper-options":e.popperOptions,tabindex:e.tabindex,"append-to-body":e.appendToBody,content:e.content,offset:e.offset,"show-after":e.showAfter,"hide-after":e.hideAfter,"auto-close":e.autoClose,"show-arrow":e.showArrow,"aria-label":e.title,effect:e.effect,enterable:e.enterable,"popper-class":e.kls,"popper-style":e.style,teleported:e.compatTeleported,persistent:e.persistent,"gpu-acceleration":e.gpuAcceleration,onBeforeShow:e.beforeEnter,onBeforeHide:e.beforeLeave,onShow:e.afterEnter,onHide:e.afterLeave}),{content:re(()=>[e.title?(R(),X("div",{key:0,class:U(e.ns.e("title")),role:"title"},Me(e.title),3)):se("v-if",!0),Oe(e.$slots,"default",{},()=>[yt(Me(e.content),1)])]),default:re(()=>[e.$slots.reference?Oe(e.$slots,"reference",{key:0}):se("v-if",!0)]),_:3},16,["trigger","placement","disabled","visible","transition","popper-options","tabindex","append-to-body","content","offset","show-after","hide-after","auto-close","show-arrow","aria-label","effect","enterable","popper-class","popper-style","teleported","persistent","gpu-acceleration","onBeforeShow","onBeforeHide","onShow","onHide"])}var Cd=Ae(nfe,[["render",rfe],["__file","/home/runner/work/element-plus/element-plus/packages/components/popover/src/index.vue"]]);const lO=(e,t)=>{const n=t.arg||t.value,r=n==null?void 0:n.popperRef;r&&(r.triggerRef=e)};var F0={mounted(e,t){lO(e,t)},updated(e,t){lO(e,t)}};const afe="popover";Cd.install=e=>{e.component(Cd.name,Cd)};F0.install=e=>{e.directive(afe,F0)};const R5=F0;Cd.directive=R5;const ofe=Cd,ife=ofe,lfe=R5,sfe=Ze({type:{type:String,default:"line",values:["line","circle","dashboard"]},percentage:{type:Number,default:0,validator:e=>e>=0&&e<=100},status:{type:String,default:"",values:["","success","exception","warning"]},indeterminate:{type:Boolean,default:!1},duration:{type:Number,default:3},strokeWidth:{type:Number,default:6},strokeLinecap:{type:Le(String),default:"round"},textInside:{type:Boolean,default:!1},width:{type:Number,default:126},showText:{type:Boolean,default:!0},color:{type:Le([String,Array,Function]),default:""},format:{type:Le(Function),default:e=>`${e}%`}}),ufe=G({name:"ElProgress",components:{ElIcon:ft,CircleCheck:mv,CircleClose:gi,Check:Wu,Close:Ma,WarningFilled:Wd},props:sfe,setup(e){const t=De("progress"),n=x(()=>({width:`${e.percentage}%`,animationDuration:`${e.duration}s`,backgroundColor:y(e.percentage)})),r=x(()=>(e.strokeWidth/e.width*100).toFixed(1)),a=x(()=>e.type==="circle"||e.type==="dashboard"?Number.parseInt(`${50-Number.parseFloat(r.value)/2}`,10):0),o=x(()=>{const C=a.value,S=e.type==="dashboard";return`
          M 50 50
          m 0 ${S?"":"-"}${C}
          a ${C} ${C} 0 1 1 0 ${S?"-":""}${C*2}
          a ${C} ${C} 0 1 1 0 ${S?"":"-"}${C*2}
          `}),i=x(()=>2*Math.PI*a.value),l=x(()=>e.type==="dashboard"?.75:1),s=x(()=>`${-1*i.value*(1-l.value)/2}px`),c=x(()=>({strokeDasharray:`${i.value*l.value}px, ${i.value}px`,strokeDashoffset:s.value})),d=x(()=>({strokeDasharray:`${i.value*l.value*(e.percentage/100)}px, ${i.value}px`,strokeDashoffset:s.value,transition:"stroke-dasharray 0.6s ease 0s, stroke 0.6s ease"})),f=x(()=>{let C;if(e.color)C=y(e.percentage);else switch(e.status){case"success":C="#13ce66";break;case"exception":C="#ff4949";break;case"warning":C="#e6a23c";break;default:C="#20a0ff"}return C}),p=x(()=>e.status==="warning"?Wd:e.type==="line"?e.status==="success"?mv:gi:e.status==="success"?Wu:Ma),v=x(()=>e.type==="line"?12+e.strokeWidth*.4:e.width*.111111+2),m=x(()=>e.format(e.percentage)),y=C=>{var S;const{color:w}=e;if(typeof w=="function")return w(C);if(typeof w=="string")return w;{const k=100/w.length,O=w.map((T,_)=>typeof T=="string"?{color:T,percentage:(_+1)*k}:T).sort((T,_)=>T.percentage-_.percentage);for(const T of O)if(T.percentage>C)return T.color;return(S=O[O.length-1])==null?void 0:S.color}},b=x(()=>({percentage:e.percentage}));return{ns:t,barStyle:n,relativeStrokeWidth:r,radius:a,trackPath:o,perimeter:i,rate:l,strokeDashoffset:s,trailPathStyle:c,circlePathStyle:d,stroke:f,statusIcon:p,progressTextSize:v,content:m,slotData:b}}}),cfe=["aria-valuenow"],dfe={viewBox:"0 0 100 100"},ffe=["d","stroke-width"],hfe=["d","stroke","stroke-linecap","stroke-width"],pfe={key:0};function vfe(e,t,n,r,a,o){const i=we("el-icon");return R(),X("div",{class:U([e.ns.b(),e.ns.m(e.type),e.ns.is(e.status),{[e.ns.m("without-text")]:!e.showText,[e.ns.m("text-inside")]:e.textInside}]),role:"progressbar","aria-valuenow":e.percentage,"aria-valuemin":"0","aria-valuemax":"100"},[e.type==="line"?(R(),X("div",{key:0,class:U(e.ns.b("bar"))},[Z("div",{class:U(e.ns.be("bar","outer")),style:Xe({height:`${e.strokeWidth}px`})},[Z("div",{class:U([e.ns.be("bar","inner"),{[e.ns.bem("bar","inner","indeterminate")]:e.indeterminate}]),style:Xe(e.barStyle)},[(e.showText||e.$slots.default)&&e.textInside?(R(),X("div",{key:0,class:U(e.ns.be("bar","innerText"))},[Oe(e.$slots,"default",Za(ll(e.slotData)),()=>[Z("span",null,Me(e.content),1)])],2)):se("v-if",!0)],6)],6)],2)):(R(),X("div",{key:1,class:U(e.ns.b("circle")),style:Xe({height:`${e.width}px`,width:`${e.width}px`})},[(R(),X("svg",dfe,[Z("path",{class:U(e.ns.be("circle","track")),d:e.trackPath,stroke:"var(--el-fill-color-light, #e5e9f2)","stroke-width":e.relativeStrokeWidth,fill:"none",style:Xe(e.trailPathStyle)},null,14,ffe),Z("path",{class:U(e.ns.be("circle","path")),d:e.trackPath,stroke:e.stroke,fill:"none","stroke-linecap":e.strokeLinecap,"stroke-width":e.percentage?e.relativeStrokeWidth:0,style:Xe(e.circlePathStyle)},null,14,hfe)]))],6)),(e.showText||e.$slots.default)&&!e.textInside?(R(),X("div",{key:2,class:U(e.ns.e("text")),style:Xe({fontSize:`${e.progressTextSize}px`})},[Oe(e.$slots,"default",Za(ll(e.slotData)),()=>[e.status?(R(),fe(i,{key:1},{default:re(()=>[(R(),fe(Kt(e.statusIcon)))]),_:1})):(R(),X("span",pfe,Me(e.content),1))])],6)):se("v-if",!0)],10,cfe)}var mfe=Ae(ufe,[["render",vfe],["__file","/home/runner/work/element-plus/element-plus/packages/components/progress/src/progress.vue"]]);const L5=xt(mfe),gfe=Ze({modelValue:{type:Number,default:0},lowThreshold:{type:Number,default:2},highThreshold:{type:Number,default:4},max:{type:Number,default:5},colors:{type:Le([Array,Object]),default:()=>xn(["","",""])},voidColor:{type:String,default:""},disabledVoidColor:{type:String,default:""},icons:{type:Le([Array,Object]),default:()=>[oh,oh,oh]},voidIcon:{type:wr,default:()=>mX},disabledVoidIcon:{type:wr,default:()=>oh},disabled:{type:Boolean,default:!1},allowHalf:{type:Boolean,default:!1},showText:{type:Boolean,default:!1},showScore:{type:Boolean,default:!1},textColor:{type:String,default:""},texts:{type:Le(Array),default:()=>xn(["Extremely bad","Disappointed","Fair","Satisfied","Surprise"])},scoreTemplate:{type:String,default:"{value}"},size:{type:String,validator:va}}),yfe={change:e=>typeof e=="number",[Pt]:e=>typeof e=="number"},bfe=["aria-valuenow","aria-valuetext","aria-valuemax"],Cfe=["onMousemove","onClick"],wfe={name:"ElRate"},Sfe=G(Ke(Te({},wfe),{props:gfe,emits:yfe,setup(e,{expose:t,emit:n}){const r=e;function a(N,D){const z=E=>zt(E),B=Object.keys(D).map(E=>+E).filter(E=>{const K=D[E];return(z(K)?K.excluded:!1)?N<E:N<=E}).sort((E,K)=>E-K),M=D[B[0]];return z(M)&&M.value||M}const o=ve(ga,void 0),i=Gn(),l=De("rate"),s=H(r.modelValue),c=H(-1),d=H(!0),f=x(()=>[l.b(),l.m(i.value)]),p=x(()=>r.disabled||(o==null?void 0:o.disabled)),v=x(()=>({"--el-rate-void-color":r.voidColor,"--el-rate-disabled-void-color":r.disabledVoidColor,"--el-rate-fill-color":C.value})),m=x(()=>{let N="";return r.showScore?N=r.scoreTemplate.replace(/\{\s*value\s*\}/,p.value?`${r.modelValue}`:`${s.value}`):r.showText&&(N=r.texts[Math.ceil(s.value)-1]),N}),y=x(()=>r.modelValue*100-Math.floor(r.modelValue)*100),b=x(()=>pt(r.colors)?{[r.lowThreshold]:r.colors[0],[r.highThreshold]:{value:r.colors[1],excluded:!0},[r.max]:r.colors[2]}:r.colors),C=x(()=>{const N=a(s.value,b.value);return zt(N)?"":N}),S=x(()=>{let N="";return p.value?N=`${y.value}%`:r.allowHalf&&(N="50%"),{color:C.value,width:N}}),w=x(()=>pt(r.icons)?{[r.lowThreshold]:r.icons[0],[r.highThreshold]:{value:r.icons[1],excluded:!0},[r.max]:r.icons[2]}:r.icons),k=x(()=>a(r.modelValue,w.value)),$=x(()=>p.value?r.disabledVoidIcon:r.voidIcon),O=x(()=>a(s.value,w.value)),T=x(()=>{const N=Array.from({length:r.max}),D=s.value;return N.fill(O.value,0,D),N.fill($.value,D,r.max),N});function _(N){const D=p.value&&y.value>0&&N-1<r.modelValue&&N>r.modelValue,z=r.allowHalf&&d.value&&N-.5<=s.value&&N>s.value;return D||z}function I(N){p.value||(r.allowHalf&&d.value?(n(Pt,s.value),r.modelValue!==s.value&&n("change",s.value)):(n(Pt,N),r.modelValue!==N&&n("change",N)))}function L(N){if(p.value)return;let D=s.value;const z=N.code;return z===Ge.up||z===Ge.right?(r.allowHalf?D+=.5:D+=1,N.stopPropagation(),N.preventDefault()):(z===Ge.left||z===Ge.down)&&(r.allowHalf?D-=.5:D-=1,N.stopPropagation(),N.preventDefault()),D=D<0?0:D,D=D>r.max?r.max:D,n(Pt,D),n("change",D),D}function j(N,D){if(!p.value){if(r.allowHalf){let z=D.target;to(z,l.e("item"))&&(z=z.querySelector(`.${l.e("icon")}`)),(z.clientWidth===0||to(z,l.e("decimal")))&&(z=z.parentNode),d.value=D.offsetX*2<=z.clientWidth,s.value=d.value?N-.5:N}else s.value=N;c.value=N}}function F(){p.value||(r.allowHalf&&(d.value=r.modelValue!==Math.floor(r.modelValue)),s.value=r.modelValue,c.value=-1)}return ce(()=>r.modelValue,N=>{s.value=N,d.value=r.modelValue!==Math.floor(r.modelValue)}),r.modelValue||n(Pt,0),t({setCurrentValue:j,resetCurrentValue:F}),(N,D)=>(R(),X("div",{class:U([A(f),A(l).is("disabled",A(p))]),role:"slider","aria-valuenow":s.value,"aria-valuetext":A(m),"aria-valuemin":"0","aria-valuemax":N.max,tabindex:"0",style:Xe(A(v)),onKeydown:L},[(R(!0),X(Fe,null,Rt(N.max,(z,B)=>(R(),X("span",{key:B,class:U(A(l).e("item")),onMousemove:M=>j(z,M),onMouseleave:F,onClick:M=>I(z)},[g(A(ft),{class:U([A(l).e("icon"),{hover:c.value===z},A(l).is("active",z<=s.value)])},{default:re(()=>[_(z)?se("v-if",!0):(R(),fe(Kt(A(T)[z-1]),{key:0})),_(z)?(R(),fe(A(ft),{key:1,style:Xe(A(S)),class:U([A(l).e("icon"),A(l).e("decimal")])},{default:re(()=>[(R(),fe(Kt(A(k))))]),_:1},8,["style","class"])):se("v-if",!0)]),_:2},1032,["class"])],42,Cfe))),128)),N.showText||N.showScore?(R(),X("span",{key:0,class:U(A(l).e("text"))},Me(A(m)),3)):se("v-if",!0)],46,bfe))}}));var kfe=Ae(Sfe,[["__file","/home/runner/work/element-plus/element-plus/packages/components/rate/src/rate.vue"]]);const $fe=xt(kfe),vu={success:"icon-success",warning:"icon-warning",error:"icon-error",info:"icon-info"},sO={[vu.success]:AY,[vu.warning]:Wd,[vu.error]:RC,[vu.info]:LC},Ofe=Ze({title:{type:String,default:""},subTitle:{type:String,default:""},icon:{values:["success","warning","info","error"],default:"info"}}),uO="ElResult",Pfe=G({name:uO,props:Ofe,setup(e,{slots:t}){const n=De("result");Tf({scope:uO,type:"Slot",from:"subTitle",replacement:"sub-title",version:"2.1.3",ref:"https://github.com/element-plus/element-plus/pull/6636/"},x(()=>!!t.subTitle));const r=x(()=>{const a=e.icon,o=a&&vu[a]?vu[a]:"icon-info",i=sO[o]||sO["icon-info"];return{class:o,component:i}});return{ns:n,resultIcon:r}}});function Tfe(e,t,n,r,a,o){return R(),X("div",{class:U(e.ns.b())},[Z("div",{class:U(e.ns.e("icon"))},[Oe(e.$slots,"icon",{},()=>[e.resultIcon.component?(R(),fe(Kt(e.resultIcon.component),{key:0,class:U(e.resultIcon.class)},null,8,["class"])):se("v-if",!0)])],2),e.title||e.$slots.title?(R(),X("div",{key:0,class:U(e.ns.e("title"))},[Oe(e.$slots,"title",{},()=>[Z("p",null,Me(e.title),1)])],2)):se("v-if",!0),e.subTitle||e.$slots["sub-title"]?(R(),X("div",{key:1,class:U(e.ns.e("subtitle"))},[Oe(e.$slots,"sub-title",{},()=>[Z("p",null,Me(e.subTitle),1)])],2)):se("v-if",!0),e.$slots.extra?(R(),X("div",{key:2,class:U(e.ns.e("extra"))},[Oe(e.$slots,"extra")],2)):se("v-if",!0)],2)}var xfe=Ae(Pfe,[["render",Tfe],["__file","/home/runner/work/element-plus/element-plus/packages/components/result/src/result.vue"]]);const _fe=xt(xfe),Efe=Ze({tag:{type:String,default:"div"},gutter:{type:Number,default:0},justify:{type:String,values:["start","center","end","space-around","space-between","space-evenly"],default:"start"},align:{type:String,values:["top","middle","bottom"],default:"top"}}),Mfe=G({name:"ElRow",props:Efe,setup(e,{slots:t}){const n=De("row"),r=x(()=>e.gutter);ot(xE,{gutter:r});const a=x(()=>{const o={marginLeft:"",marginRight:""};return e.gutter&&(o.marginLeft=`-${e.gutter/2}px`,o.marginRight=o.marginLeft),o});return()=>g(e.tag,{class:[n.b(),n.is(`justify-${e.justify}`,e.justify!=="start"),n.is(`align-${e.align}`,e.align!=="top")],style:a.value},{default:()=>{var o;return[(o=t.default)==null?void 0:o.call(t)]}})}}),Ife=xt(Mfe);var cO=Number.isNaN||function(t){return typeof t=="number"&&t!==t};function Nfe(e,t){return!!(e===t||cO(e)&&cO(t))}function Afe(e,t){if(e.length!==t.length)return!1;for(var n=0;n<e.length;n++)if(!Nfe(e[n],t[n]))return!1;return!0}function Dfe(e,t){t===void 0&&(t=Afe);var n=null;function r(){for(var a=[],o=0;o<arguments.length;o++)a[o]=arguments[o];if(n&&n.lastThis===this&&t(a,n.lastArgs))return n.lastResult;var i=e.apply(this,a);return n={lastResult:i,lastArgs:a,lastThis:this},i}return r.clear=function(){n=null},r}const Rfe=()=>{const t=$t().proxy.$props;return x(()=>{const n=(r,a,o)=>({});return t.perfMode?Im(n):Dfe(n)})},Lfe=50,dO="item-rendered",fO="scroll",F5="forward",B5="backward",Zd="auto",V5="smart",z5="start",wv="center",H5="end",Qu="horizontal",mw="vertical",Ffe="ltr",$p="rtl",j5="negative",Bfe="positive-ascending",K5="positive-descending",Vfe={[Qu]:"left",[mw]:"top"},zfe=20,Hfe={[Qu]:"deltaX",[mw]:"deltaY"},jfe=({atEndEdge:e,atStartEdge:t,layout:n},r)=>{let a,o=0;const i=s=>s<0&&t.value||s>0&&e.value;return{hasReachedEdge:i,onWheel:s=>{bE(a);const c=s[Hfe[n.value]];i(o)&&i(o+c)||(o+=c,BC()||s.preventDefault(),a=yE(()=>{r(o),o=0}))}}},B0=ho({type:Le([Number,Function]),required:!0}),V0=ho({type:Number}),z0=ho({type:Number,default:2}),Kfe=ho({type:String,values:["ltr","rtl"],default:"ltr"}),H0=ho({type:Number,default:0}),Sv=ho({type:Number,required:!0}),W5=ho({type:String,values:["horizontal","vertical"],default:mw}),U5=Ze({className:{type:String,default:""},containerElement:{type:Le([String,Object]),default:"div"},data:{type:Le(Array),default:()=>xn([])},direction:Kfe,height:{type:[String,Number],required:!0},innerElement:{type:[String,Object],default:"div"},style:{type:Le([Object,String,Array])},useIsScrolling:{type:Boolean,default:!1},width:{type:[Number,String],required:!1},perfMode:{type:Boolean,default:!0},scrollbarAlwaysOn:{type:Boolean,default:!1}}),Wfe=Ze(Te({cache:z0,estimatedItemSize:V0,layout:W5,initScrollOffset:H0,total:Sv,itemSize:B0},U5));Ze(Te({columnCache:z0,columnWidth:B0,estimatedColumnWidth:V0,estimatedRowHeight:V0,initScrollLeft:H0,initScrollTop:H0,rowCache:z0,rowHeight:B0,totalColumn:Sv,totalRow:Sv},U5));const Ufe=Ze({layout:W5,total:Sv,ratio:{type:Number,required:!0},clientSize:{type:Number,required:!0},scrollFrom:{type:Number,required:!0},visible:Boolean}),ky=(e,t)=>e<t?F5:B5,Jd=e=>e===Ffe||e===$p||e===Qu;let Vs=null;function hO(e=!1){if(Vs===null||e){const t=document.createElement("div"),n=t.style;n.width="50px",n.height="50px",n.overflow="scroll",n.direction="rtl";const r=document.createElement("div"),a=r.style;return a.width="100px",a.height="100px",t.appendChild(r),document.body.appendChild(t),t.scrollLeft>0?Vs=K5:(t.scrollLeft=1,t.scrollLeft===0?Vs=j5:Vs=Bfe),document.body.removeChild(t),Vs}return Vs}function Yfe({move:e,size:t,bar:n},r){const a={},o=`translate${n.axis}(${e}px)`;return a[n.size]=t,a.transform=o,a.msTransform=o,a.webkitTransform=o,r==="horizontal"?a.height="100%":a.width="100%",a}const qfe=G({name:"ElVirtualScrollBar",props:Ufe,emits:["scroll","start-move","stop-move"],setup(e,{emit:t}){const r=H(),a=H();let o=null,i=null;const l=bt({isDragging:!1,traveled:0}),s=x(()=>XE[e.layout]),c=x(()=>e.clientSize-4),d=x(()=>({position:"absolute",width:Qu===e.layout?`${c.value}px`:"6px",height:Qu===e.layout?"6px":`${c.value}px`,[Vfe[e.layout]]:"2px",right:"2px",bottom:"2px",borderRadius:"4px"})),f=x(()=>{const $=e.ratio,O=e.clientSize;if($>=100)return Number.POSITIVE_INFINITY;if($>=50)return $*O/100;const T=O/3;return Math.floor(Math.min(Math.max($*O,zfe),T))}),p=x(()=>{if(!Number.isFinite(f.value))return{display:"none"};const $=`${f.value}px`;return Yfe({bar:s.value,size:$,move:l.traveled},e.layout)}),v=x(()=>Math.floor(e.clientSize-f.value-4)),m=()=>{gn(window,"mousemove",S),gn(window,"mouseup",C);const $=A(a);!$||(i=document.onselectstart,document.onselectstart=()=>!1,gn($,"touchmove",S),gn($,"touchend",C))},y=()=>{Bn(window,"mousemove",S),Bn(window,"mouseup",C),document.onselectstart=i,i=null;const $=A(a);!$||(Bn($,"touchmove",S),Bn($,"touchend",C))},b=$=>{$.stopImmediatePropagation(),!($.ctrlKey||[1,2].includes($.button))&&(l.isDragging=!0,l[s.value.axis]=$.currentTarget[s.value.offset]-($[s.value.client]-$.currentTarget.getBoundingClientRect()[s.value.direction]),t("start-move"),m())},C=()=>{l.isDragging=!1,l[s.value.axis]=0,t("stop-move"),y()},S=$=>{const{isDragging:O}=l;if(!O||!a.value||!r.value)return;const T=l[s.value.axis];if(!T)return;bE(o);const _=(r.value.getBoundingClientRect()[s.value.direction]-$[s.value.client])*-1,I=a.value[s.value.offset]-T,L=_-I;o=yE(()=>{l.traveled=Math.max(0,Math.min(L,v.value)),t("scroll",L,v.value)})},w=$=>{const O=Math.abs($.target.getBoundingClientRect()[s.value.direction]-$[s.value.client]),T=a.value[s.value.offset]/2,_=O-T;l.traveled=Math.max(0,Math.min(_,v.value)),t("scroll",_,v.value)},k=$=>$.preventDefault();return ce(()=>e.scrollFrom,$=>{l.isDragging||(l.traveled=Math.ceil($*v.value))}),et(()=>{!Bt||(gn(r.value,"touchstart",k),gn(a.value,"touchstart",b))}),Lt(()=>{Bn(r.value,"touchstart",k),y()}),()=>qe("div",{role:"presentation",ref:r,class:"el-virtual-scrollbar",style:d.value,onMousedown:dt(w,["stop","prevent"])},qe("div",{ref:a,class:"el-scrollbar__thumb",style:p.value,onMousedown:b},[]))}}),Y5=({name:e,getOffset:t,getItemSize:n,getItemOffset:r,getEstimatedTotalSize:a,getStartIndexForOffset:o,getStopIndexForStartIndex:i,initCache:l,clearCache:s,validateProps:c})=>G({name:e!=null?e:"ElVirtualList",props:Wfe,emits:[dO,fO],setup(d,{emit:f,expose:p}){c(d);const v=$t(),m=H(l(d,v)),y=Rfe(),b=H(),C=H(),S=H(),w=H({isScrolling:!1,scrollDir:"forward",scrollOffset:Yt(d.initScrollOffset)?d.initScrollOffset:0,updateRequested:!1,isScrollbarDragging:!1,scrollbarAlwaysOn:d.scrollbarAlwaysOn}),k=x(()=>{const{total:q,cache:J}=d,{isScrolling:ne,scrollDir:oe,scrollOffset:Q}=A(w);if(q===0)return[0,0,0,0];const ae=o(d,Q,A(m)),de=i(d,ae,Q,A(m)),be=!ne||oe===B5?Math.max(1,J):1,Ee=!ne||oe===F5?Math.max(1,J):1;return[Math.max(0,ae-be),Math.max(0,Math.min(q-1,de+Ee)),ae,de]}),$=x(()=>a(d,A(m))),O=x(()=>Jd(d.layout)),T=x(()=>[{position:"relative",[`overflow-${O.value?"x":"y"}`]:"scroll",WebkitOverflowScrolling:"touch",willChange:"transform"},{direction:d.direction,height:Yt(d.height)?`${d.height}px`:d.height,width:Yt(d.width)?`${d.width}px`:d.width},d.style]),_=x(()=>{const q=A($),J=A(O);return{height:J?"100%":`${q}px`,pointerEvents:A(w).isScrolling?"none":void 0,width:J?`${q}px`:"100%"}}),I=x(()=>O.value?d.width:d.height),{onWheel:L}=jfe({atStartEdge:x(()=>w.value.scrollOffset<=0),atEndEdge:x(()=>w.value.scrollOffset>=$.value),layout:x(()=>d.layout)},q=>{var J,ne;(ne=(J=S.value).onMouseUp)==null||ne.call(J),B(Math.min(w.value.scrollOffset+q,$.value-I.value))}),j=()=>{const{total:q}=d;if(q>0){const[Q,ae,de,be]=A(k);f(dO,Q,ae,de,be)}const{scrollDir:J,scrollOffset:ne,updateRequested:oe}=A(w);f(fO,J,ne,oe)},F=q=>{const{clientHeight:J,scrollHeight:ne,scrollTop:oe}=q.currentTarget,Q=A(w);if(Q.scrollOffset===oe)return;const ae=Math.max(0,Math.min(oe,ne-J));w.value=Ke(Te({},Q),{isScrolling:!0,scrollDir:ky(Q.scrollOffset,ae),scrollOffset:ae,updateRequested:!1}),Ne(K)},N=q=>{const{clientWidth:J,scrollLeft:ne,scrollWidth:oe}=q.currentTarget,Q=A(w);if(Q.scrollOffset===ne)return;const{direction:ae}=d;let de=ne;if(ae===$p)switch(hO()){case j5:{de=-ne;break}case K5:{de=oe-J-ne;break}}de=Math.max(0,Math.min(de,oe-J)),w.value=Ke(Te({},Q),{isScrolling:!0,scrollDir:ky(Q.scrollOffset,de),scrollOffset:de,updateRequested:!1}),Ne(K)},D=q=>{A(O)?N(q):F(q),j()},z=(q,J)=>{const ne=($.value-I.value)/J*q;B(Math.min($.value-I.value,ne))},B=q=>{q=Math.max(q,0),q!==A(w).scrollOffset&&(w.value=Ke(Te({},A(w)),{scrollOffset:q,scrollDir:ky(A(w).scrollOffset,q),updateRequested:!0}),Ne(K))},M=(q,J=Zd)=>{const{scrollOffset:ne}=A(w);q=Math.max(0,Math.min(q,d.total-1)),B(t(d,q,J,ne,A(m)))},E=q=>{const{direction:J,itemSize:ne,layout:oe}=d,Q=y.value(s&&ne,s&&oe,s&&J);let ae;if(Mt(Q,String(q)))ae=Q[q];else{const de=r(d,q,A(m)),be=n(d,q,A(m)),Ee=A(O),Pe=J===$p,Be=Ee?de:0;Q[q]=ae={position:"absolute",left:Pe?void 0:`${Be}px`,right:Pe?`${Be}px`:void 0,top:Ee?0:`${de}px`,height:Ee?"100%":`${be}px`,width:Ee?`${be}px`:"100%"}}return ae},K=()=>{w.value.isScrolling=!1,Ne(()=>{y.value(-1,null,null)})},W=()=>{const q=b.value;q&&(q.scrollTop=0)};et(()=>{if(!Bt)return;const{initScrollOffset:q}=d,J=A(b);Yt(q)&&J&&(A(O)?J.scrollLeft=q:J.scrollTop=q),j()}),ur(()=>{const{direction:q,layout:J}=d,{scrollOffset:ne,updateRequested:oe}=A(w),Q=A(b);if(oe&&Q)if(J===Qu)if(q===$p)switch(hO()){case"negative":{Q.scrollLeft=-ne;break}case"positive-ascending":{Q.scrollLeft=ne;break}default:{const{clientWidth:ae,scrollWidth:de}=Q;Q.scrollLeft=de-ae-ne;break}}else Q.scrollLeft=ne;else Q.scrollTop=ne});const Y={clientSize:I,estimatedTotalSize:$,windowStyle:T,windowRef:b,innerRef:C,innerStyle:_,itemsToRender:k,scrollbarRef:S,states:w,getItemStyle:E,onScroll:D,onScrollbarScroll:z,onWheel:L,scrollTo:B,scrollToItem:M,resetScrollTop:W};return p({windowRef:b,innerRef:C,getItemStyleCache:y,scrollTo:B,scrollToItem:M,resetScrollTop:W,states:w}),Y},render(d){var f;const{$slots:p,className:v,clientSize:m,containerElement:y,data:b,getItemStyle:C,innerElement:S,itemsToRender:w,innerStyle:k,layout:$,total:O,onScroll:T,onScrollbarScroll:_,onWheel:I,states:L,useIsScrolling:j,windowStyle:F}=d,[N,D]=w,z=Kt(y),B=Kt(S),M=[];if(O>0)for(let Y=N;Y<=D;Y++)M.push((f=p.default)==null?void 0:f.call(p,{data:b,key:Y,index:Y,isScrolling:j?L.isScrolling:void 0,style:C(Y)}));const E=[qe(B,{style:k,ref:"innerRef"},wt(B)?M:{default:()=>M})],K=qe(qfe,{ref:"scrollbarRef",clientSize:m,layout:$,onScroll:_,ratio:m*100/this.estimatedTotalSize,scrollFrom:L.scrollOffset/(this.estimatedTotalSize-m),total:O}),W=qe(z,{class:["el-vl__window",v],style:F,onScroll:T,onWheel:I,ref:"windowRef",key:0},wt(z)?[E]:{default:()=>[E]});return qe("div",{key:0,class:["el-vl__wrapper",L.scrollbarAlwaysOn?"always-on":""]},[W,K])}}),q5=Y5({name:"ElFixedSizeList",getItemOffset:({itemSize:e},t)=>t*e,getItemSize:({itemSize:e})=>e,getEstimatedTotalSize:({total:e,itemSize:t})=>t*e,getOffset:({height:e,total:t,itemSize:n,layout:r,width:a},o,i,l)=>{const s=Jd(r)?a:e,c=Math.max(0,t*n-s),d=Math.min(c,o*n),f=Math.max(0,(o+1)*n-s);switch(i===V5&&(l>=f-s&&l<=d+s?i=Zd:i=wv),i){case z5:return d;case H5:return f;case wv:{const p=Math.round(f+(d-f)/2);return p<Math.ceil(s/2)?0:p>c+Math.floor(s/2)?c:p}case Zd:default:return l>=f&&l<=d?l:l<f?f:d}},getStartIndexForOffset:({total:e,itemSize:t},n)=>Math.max(0,Math.min(e-1,Math.floor(n/t))),getStopIndexForStartIndex:({height:e,total:t,itemSize:n,layout:r,width:a},o,i)=>{const l=o*n,s=Jd(r)?a:e,c=Math.ceil((s+i-l)/n);return Math.max(0,Math.min(t-1,o+c-1))},initCache(){},clearCache:!0,validateProps(){}}),mu=(e,t,n)=>{const{itemSize:r}=e,{items:a,lastVisitedIndex:o}=n;if(t>o){let i=0;if(o>=0){const l=a[o];i=l.offset+l.size}for(let l=o+1;l<=t;l++){const s=r(l);a[l]={offset:i,size:s},i+=s}n.lastVisitedIndex=t}return a[t]},Gfe=(e,t,n)=>{const{items:r,lastVisitedIndex:a}=t;return(a>0?r[a].offset:0)>=n?G5(e,t,0,a,n):Xfe(e,t,Math.max(0,a),n)},G5=(e,t,n,r,a)=>{for(;n<=r;){const o=n+Math.floor((r-n)/2),i=mu(e,o,t).offset;if(i===a)return o;i<a?n=o+1:i>a&&(r=o-1)}return Math.max(0,n-1)},Xfe=(e,t,n,r)=>{const{total:a}=e;let o=1;for(;n<a&&mu(e,n,t).offset<r;)n+=o,o*=2;return G5(e,t,Math.floor(n/2),Math.min(n,a-1),r)},pO=({total:e},{items:t,estimatedItemSize:n,lastVisitedIndex:r})=>{let a=0;if(r>=e&&(r=e-1),r>=0){const l=t[r];a=l.offset+l.size}const i=(e-r-1)*n;return a+i},Zfe=Y5({name:"ElDynamicSizeList",getItemOffset:(e,t,n)=>mu(e,t,n).offset,getItemSize:(e,t,{items:n})=>n[t].size,getEstimatedTotalSize:pO,getOffset:(e,t,n,r,a)=>{const{height:o,layout:i,width:l}=e,s=Jd(i)?l:o,c=mu(e,t,a),d=pO(e,a),f=Math.max(0,Math.min(d-s,c.offset)),p=Math.max(0,c.offset-s+c.size);switch(n===V5&&(r>=p-s&&r<=f+s?n=Zd:n=wv),n){case z5:return f;case H5:return p;case wv:return Math.round(p+(f-p)/2);case Zd:default:return r>=p&&r<=f?r:r<p?p:f}},getStartIndexForOffset:(e,t,n)=>Gfe(e,n,t),getStopIndexForStartIndex:(e,t,n,r)=>{const{height:a,total:o,layout:i,width:l}=e,s=Jd(i)?l:a,c=mu(e,t,r),d=n+s;let f=c.offset+c.size,p=t;for(;p<o-1&&f<d;)p++,f+=mu(e,p,r).size;return p},initCache({estimatedItemSize:e=Lfe},t){const n={items:{},estimatedItemSize:e,lastVisitedIndex:-1};return n.clearCacheAfterIndex=(r,a=!0)=>{var o,i;n.lastVisitedIndex=Math.min(n.lastVisitedIndex,r-1),(o=t.exposed)==null||o.getItemStyleCache(-1),a&&((i=t.proxy)==null||i.$forceUpdate())},n},clearCache:!1,validateProps:({itemSize:e})=>{}}),Jfe=G({props:{item:{type:Object,required:!0},style:Object,height:Number},setup(){return{ns:De("select")}}});function Qfe(e,t,n,r,a,o){return e.item.isTitle?(R(),X("div",{key:0,class:U(e.ns.be("group","title")),style:Xe([e.style,{lineHeight:`${e.height}px`}])},Me(e.item.label),7)):(R(),X("div",{key:1,class:U(e.ns.be("group","split")),style:Xe(e.style)},[Z("span",{class:U(e.ns.be("group","split-dash")),style:Xe({top:`${e.height/2}px`})},null,6)],6))}var ehe=Ae(Jfe,[["render",Qfe],["__file","/home/runner/work/element-plus/element-plus/packages/components/select-v2/src/group-item.vue"]]);function the(e,{emit:t}){return{hoverItem:()=>{e.disabled||t("hover",e.index)},selectOptionClick:()=>{e.disabled||t("select",e.item,e.index)}}}const nhe={allowCreate:Boolean,autocomplete:{type:String,default:"none"},automaticDropdown:Boolean,clearable:Boolean,clearIcon:{type:[String,Object],default:gi},effect:{type:String,default:"light"},collapseTags:Boolean,collapseTagsTooltip:{type:Boolean,default:!1},defaultFirstOption:Boolean,disabled:Boolean,estimatedOptionHeight:{type:Number,default:void 0},filterable:Boolean,filterMethod:Function,height:{type:Number,default:170},itemHeight:{type:Number,default:34},id:String,loading:Boolean,loadingText:String,label:String,modelValue:[Array,String,Number,Boolean,Object],multiple:Boolean,multipleLimit:{type:Number,default:0},name:String,noDataText:String,noMatchText:String,remoteMethod:Function,reserveKeyword:{type:Boolean,default:!0},options:{type:Array,required:!0},placeholder:{type:String},popperAppendToBody:{type:Boolean,default:void 0},teleported:mr.teleported,persistent:{type:Boolean,default:!0},popperClass:{type:String,default:""},popperOptions:{type:Object,default:()=>({})},remote:Boolean,size:{type:String,validator:va},valueKey:{type:String,default:"value"},scrollbarAlwaysOn:{type:Boolean,default:!1}},rhe={data:Array,disabled:Boolean,hovering:Boolean,item:Object,index:Number,style:Object,selected:Boolean,created:Boolean},ahe=G({props:rhe,emits:["select","hover"],setup(e,{emit:t}){const n=De("select"),{hoverItem:r,selectOptionClick:a}=the(e,{emit:t});return{ns:n,hoverItem:r,selectOptionClick:a}}}),ohe=["aria-selected"];function ihe(e,t,n,r,a,o){return R(),X("li",{"aria-selected":e.selected,style:Xe(e.style),class:U([e.ns.be("dropdown","option-item"),e.ns.is("selected",e.selected),e.ns.is("disabled",e.disabled),e.ns.is("created",e.created),{hover:e.hovering}]),onMouseenter:t[0]||(t[0]=(...i)=>e.hoverItem&&e.hoverItem(...i)),onClick:t[1]||(t[1]=dt((...i)=>e.selectOptionClick&&e.selectOptionClick(...i),["stop"]))},[Oe(e.$slots,"default",{item:e.item,index:e.index,disabled:e.disabled},()=>[Z("span",null,Me(e.item.label),1)])],46,ohe)}var lhe=Ae(ahe,[["render",ihe],["__file","/home/runner/work/element-plus/element-plus/packages/components/select-v2/src/option-item.vue"]]);const X5="ElSelectV2Injection",she=G({name:"ElSelectDropdown",props:{data:Array,hoveringIndex:Number,width:Number},setup(e){const t=ve(X5),n=De("select"),r=H([]),a=H(null),o=x(()=>sa(t.props.estimatedOptionHeight)),i=x(()=>o.value?{itemSize:t.props.itemHeight}:{estimatedSize:t.props.estimatedOptionHeight,itemSize:m=>r.value[m]}),l=(m=[],y)=>{const{props:{valueKey:b}}=t;return zt(y)?m&&m.some(C=>vn(C,b)===vn(y,b)):m.includes(y)},s=(m,y)=>{if(zt(y)){const{valueKey:b}=t.props;return vn(m,b)===vn(y,b)}else return m===y};return{ns:n,select:t,listProps:i,listRef:a,isSized:o,isItemDisabled:(m,y)=>{const{disabled:b,multiple:C,multipleLimit:S}=t.props;return b||!y&&(C?S>0&&m.length>=S:!1)},isItemHovering:m=>e.hoveringIndex===m,isItemSelected:(m,y)=>{const{valueKey:b}=t.props;return t.props.multiple?l(m,vn(y,b)):s(m,vn(y,b))},scrollToItem:m=>{const y=a.value;y&&y.scrollToItem(m)},resetScrollTop:()=>{const m=a.value;m&&m.resetScrollTop()}}},render(e,t){var n;const{$slots:r,data:a,listProps:o,select:i,isSized:l,width:s,ns:c,isItemDisabled:d,isItemHovering:f,isItemSelected:p}=e,v=l?q5:Zfe,{props:m,onSelect:y,onHover:b,onKeyboardNavigate:C,onKeyboardSelect:S}=i,{height:w,modelValue:k,multiple:$}=m;if(a.length===0)return qe("div",{class:c.b("dropdown"),style:{width:`${s}px`}},(n=r.empty)==null?void 0:n.call(r));const O=re(_=>{const{index:I,data:L}=_,j=L[I];if(L[I].type==="Group")return qe(ehe,{item:j,style:_.style,height:l?o.itemSize:o.estimatedSize});const F=p(k,j),N=d(k,F);return qe(lhe,Ke(Te({},_),{selected:F,disabled:j.disabled||N,created:!!j.created,hovering:f(I),item:j,onSelect:y,onHover:b}),{default:re(D=>Oe(r,"default",D,()=>[qe("span",j.label)]))})}),T=qe(v,Te({ref:"listRef",className:c.be("dropdown","list"),data:a,height:w,width:s,total:a.length,scrollbarAlwaysOn:m.scrollbarAlwaysOn,onKeydown:[t[1]||(t[1]=It(dt(()=>C("forward"),["stop","prevent"]),["down"])),t[2]||(t[2]=It(dt(()=>C("backward"),["stop","prevent"]),["up"])),t[3]||(t[3]=It(dt(S,["stop","prevent"]),["enter"])),t[4]||(t[4]=It(dt(()=>i.expanded=!1,["stop","prevent"]),["esc"])),t[5]||(t[5]=It(()=>i.expanded=!1,["tab"]))]},o),{default:O});return qe("div",{class:[c.b("dropdown"),c.is("multiple",$)]},[T])}});var uhe=Ae(she,[["__file","/home/runner/work/element-plus/element-plus/packages/components/select-v2/src/select-dropdown.vue"]]);function che(e,t){const n=H(0),r=H(null),a=x(()=>e.allowCreate&&e.filterable);function o(d){const f=p=>p.value===d;return e.options&&e.options.some(f)||t.createdOptions.some(f)}function i(d){!a.value||(e.multiple&&d.created?n.value++:r.value=d)}function l(d){if(a.value)if(d&&d.length>0&&!o(d)){const f={value:d,label:d,created:!0,disabled:!1};t.createdOptions.length>=n.value?t.createdOptions[n.value]=f:t.createdOptions.push(f)}else if(e.multiple)t.createdOptions.length=n.value;else{const f=r.value;t.createdOptions.length=0,f&&f.created&&t.createdOptions.push(f)}}function s(d){if(!a.value||!d||!d.created||d.created&&e.reserveKeyword&&t.inputValue===d.label)return;const f=t.createdOptions.findIndex(p=>p.value===d.value);~f&&(t.createdOptions.splice(f,1),n.value--)}function c(){a.value&&(t.createdOptions.length=0,n.value=0)}return{createNewOption:l,removeNewOption:s,selectNewOption:i,clearAllNewOption:c}}const dhe=e=>{const t=[];return e.forEach(n=>{pt(n.options)?(t.push({label:n.label,isTitle:!0,type:"Group"}),n.options.forEach(r=>{t.push(r)}),t.push({type:"Group"})):t.push(n)}),t};function fhe(e){const t=H(!1);return{handleCompositionStart:()=>{t.value=!0},handleCompositionUpdate:o=>{const i=o.target.value,l=i[i.length-1]||"";t.value=!zm(l)},handleCompositionEnd:o=>{t.value&&(t.value=!1,Ct(e)&&e(o))}}}const vO="",mO=11,hhe={larget:51,default:42,small:33},phe="ElSelectV2",vhe=(e,t)=>{const{t:n}=ln(),r=De("select-v2"),a=De("input"),{form:o,formItem:i}=$c(),{compatTeleported:l}=Oc(phe,"popperAppendToBody"),s=bt({inputValue:vO,displayInputValue:vO,calculatedWidth:0,cachedPlaceholder:"",cachedOptions:[],createdOptions:[],createdLabel:"",createdSelected:!1,currentPlaceholder:"",hoveringIndex:-1,comboBoxHovering:!1,isOnComposition:!1,isSilentBlur:!1,isComposing:!1,inputLength:20,selectWidth:200,initialInputHeight:0,previousQuery:null,previousValue:"",query:"",selectedLabel:"",softFocus:!1,tagInMultiLine:!1}),c=H(-1),d=H(-1),f=H(null),p=H(null),v=H(null),m=H(null),y=H(null),b=H(null),C=H(null),S=H(!1),w=x(()=>e.disabled||(o==null?void 0:o.disabled)),k=x(()=>{const Re=N.value.length*34;return Re>e.height?e.height:Re}),$=x(()=>e.modelValue!==void 0&&e.modelValue!==null&&e.modelValue!==""),O=x(()=>{const Re=e.multiple?Array.isArray(e.modelValue)&&e.modelValue.length>0:$.value;return e.clearable&&!w.value&&s.comboBoxHovering&&Re}),T=x(()=>e.remote&&e.filterable?"":Of),_=x(()=>T.value&&r.is("reverse",S.value)),I=x(()=>(i==null?void 0:i.validateState)||""),L=x(()=>hE[I.value]),j=x(()=>e.remote?300:0),F=x(()=>{const Re=N.value;return e.loading?e.loadingText||n("el.select.loading"):e.remote&&s.inputValue===""&&Re.length===0?!1:e.filterable&&s.inputValue&&Re.length>0?e.noMatchText||n("el.select.noMatch"):Re.length===0?e.noDataText||n("el.select.noData"):null}),N=x(()=>{const Re=ut=>{const Vt=s.inputValue;return Vt?ut.label.includes(Vt):!0};return e.loading?[]:dhe(e.options.concat(s.createdOptions).map(ut=>{if(pt(ut.options)){const Vt=ut.options.filter(Re);if(Vt.length>0)return Ke(Te({},ut),{options:Vt})}else if(e.remote||Re(ut))return ut;return null}).filter(ut=>ut!==null))}),D=x(()=>N.value.every(Re=>Re.disabled)),z=Gn(),B=x(()=>z.value==="small"?"small":"default"),M=x(()=>{const Re=b.value,ut=B.value||"default",Vt=Re?Number.parseInt(getComputedStyle(Re).paddingLeft):0,it=Re?Number.parseInt(getComputedStyle(Re).paddingRight):0;return s.selectWidth-it-Vt-hhe[ut]}),E=()=>{var Re,ut,Vt;d.value=((Vt=(ut=(Re=y.value)==null?void 0:Re.getBoundingClientRect)==null?void 0:ut.call(Re))==null?void 0:Vt.width)||200},K=x(()=>({width:`${s.calculatedWidth===0?mO:Math.ceil(s.calculatedWidth)+mO}px`})),W=x(()=>pt(e.modelValue)?e.modelValue.length===0&&!s.displayInputValue:e.filterable?s.displayInputValue.length===0:!0),Y=x(()=>{const Re=e.placeholder||n("el.select.placeholder");return e.multiple?Re:s.selectedLabel||Re}),q=x(()=>{var Re,ut;return(ut=(Re=m.value)==null?void 0:Re.popperRef)==null?void 0:ut.contentRef}),J=x(()=>{if(e.multiple){const Re=e.modelValue.length;if(e.modelValue.length>0)return N.value.findIndex(ut=>ut.value===e.modelValue[Re-1])}else if(e.modelValue)return N.value.findIndex(Re=>Re.value===e.modelValue);return-1}),ne=x(()=>S.value&&F.value!==!1),{createNewOption:oe,removeNewOption:Q,selectNewOption:ae,clearAllNewOption:de}=che(e,s),{handleCompositionStart:be,handleCompositionUpdate:Ee,handleCompositionEnd:Pe}=fhe(Re=>Mn(Re)),Be=()=>{var Re,ut,Vt;(ut=(Re=p.value).focus)==null||ut.call(Re),(Vt=m.value)==null||Vt.updatePopper()},te=()=>{if(!e.automaticDropdown&&!w.value)return s.isComposing&&(s.softFocus=!0),Ne(()=>{var Re,ut;S.value=!S.value,(ut=(Re=p.value)==null?void 0:Re.focus)==null||ut.call(Re)})},ie=()=>(e.filterable&&s.inputValue!==s.selectedLabel&&(s.query=s.selectedLabel),ke(s.inputValue),Ne(()=>{oe(s.inputValue)})),ge=Yn(ie,j.value),ke=Re=>{s.previousQuery!==Re&&(s.previousQuery=Re,e.filterable&&Ct(e.filterMethod)?e.filterMethod(Re):e.filterable&&e.remote&&Ct(e.remoteMethod)&&e.remoteMethod(Re))},xe=Re=>{er(e.modelValue,Re)||t(ir,Re)},Ie=Re=>{t(Pt,Re),xe(Re),s.previousValue=Re.toString()},ye=(Re=[],ut)=>{if(!zt(ut))return Re.indexOf(ut);const Vt=e.valueKey;let it=-1;return Re.some((ct,Nt)=>vn(ct,Vt)===vn(ut,Vt)?(it=Nt,!0):!1),it},pe=Re=>zt(Re)?vn(Re,e.valueKey):Re,ue=Re=>zt(Re)?Re.label:Re,Ce=()=>{if(!(e.collapseTags&&!e.filterable))return Ne(()=>{var Re,ut;if(!p.value)return;const Vt=b.value;y.value.height=Vt.offsetHeight,S.value&&F.value!==!1&&((ut=(Re=m.value)==null?void 0:Re.updatePopper)==null||ut.call(Re))})},je=()=>{var Re,ut;if(ee(),E(),(ut=(Re=m.value)==null?void 0:Re.updatePopper)==null||ut.call(Re),e.multiple)return Ce()},ee=()=>{const Re=b.value;Re&&(s.selectWidth=Re.getBoundingClientRect().width)},me=(Re,ut,Vt=!0)=>{var it,ct;if(e.multiple){let Nt=e.modelValue.slice();const sn=ye(Nt,pe(Re));sn>-1?(Nt=[...Nt.slice(0,sn),...Nt.slice(sn+1)],s.cachedOptions.splice(sn,1),Q(Re)):(e.multipleLimit<=0||Nt.length<e.multipleLimit)&&(Nt=[...Nt,pe(Re)],s.cachedOptions.push(Re),ae(Re),Ut(ut)),Ie(Nt),Re.created&&(s.query="",ke(""),s.inputLength=20),e.filterable&&!e.reserveKeyword&&((ct=(it=p.value).focus)==null||ct.call(it),Ve("")),e.filterable&&(s.calculatedWidth=C.value.getBoundingClientRect().width),Ce(),Jt()}else c.value=ut,s.selectedLabel=Re.label,Ie(pe(Re)),S.value=!1,s.isComposing=!1,s.isSilentBlur=Vt,ae(Re),Re.created||de(),Ut(ut)},He=(Re,ut)=>{const{valueKey:Vt}=e,it=e.modelValue.indexOf(vn(ut,Vt));if(it>-1&&!w.value){const ct=[...e.modelValue.slice(0,it),...e.modelValue.slice(it+1)];return s.cachedOptions.splice(it,1),Ie(ct),t("remove-tag",vn(ut,Vt)),s.softFocus=!0,Q(ut),Ne(Be)}Re.stopPropagation()},lt=Re=>{const ut=s.isComposing;s.isComposing=!0,s.softFocus?s.softFocus=!1:ut||t("focus",Re)},Ye=()=>(s.softFocus=!1,Ne(()=>{var Re,ut;(ut=(Re=p.value)==null?void 0:Re.blur)==null||ut.call(Re),C.value&&(s.calculatedWidth=C.value.getBoundingClientRect().width),s.isSilentBlur?s.isSilentBlur=!1:s.isComposing&&t("blur"),s.isComposing=!1})),he=()=>{s.displayInputValue.length>0?Ve(""):S.value=!1},_e=Re=>{if(s.displayInputValue.length===0){Re.preventDefault();const ut=e.modelValue.slice();ut.pop(),Q(s.cachedOptions.pop()),Ie(ut)}},$e=()=>{let Re;return pt(e.modelValue)?Re=[]:Re="",s.softFocus=!0,e.multiple?s.cachedOptions=[]:s.selectedLabel="",S.value=!1,Ie(Re),t("clear"),de(),Ne(Be)},Ve=Re=>{s.displayInputValue=Re,s.inputValue=Re},st=(Re,ut=void 0)=>{const Vt=N.value;if(!["forward","backward"].includes(Re)||w.value||Vt.length<=0||D.value)return;if(!S.value)return te();ut===void 0&&(ut=s.hoveringIndex);let it=-1;Re==="forward"?(it=ut+1,it>=Vt.length&&(it=0)):Re==="backward"&&(it=ut-1,it<0&&(it=Vt.length-1));const ct=Vt[it];if(ct.disabled||ct.type==="Group")return st(Re,it);Ut(it),Va(it)},Dt=()=>{if(S.value)~s.hoveringIndex&&N.value[s.hoveringIndex]&&me(N.value[s.hoveringIndex],s.hoveringIndex,!1);else return te()},Ut=Re=>{s.hoveringIndex=Re},Ft=()=>{s.hoveringIndex=-1},Jt=()=>{var Re;const ut=p.value;ut&&((Re=ut.focus)==null||Re.call(ut))},Mn=Re=>{const ut=Re.target.value;if(Ve(ut),s.displayInputValue.length>0&&!S.value&&(S.value=!0),s.calculatedWidth=C.value.getBoundingClientRect().width,e.multiple&&Ce(),e.remote)ge();else return ie()},tr=()=>(S.value=!1,Ye()),nr=()=>(s.inputValue=s.displayInputValue,Ne(()=>{~J.value&&(Ut(J.value),Va(s.hoveringIndex))})),Va=Re=>{v.value.scrollToItem(Re)},Wo=()=>{if(Ft(),e.multiple)if(e.modelValue.length>0){let Re=!1;s.cachedOptions.length=0,s.previousValue=e.modelValue.toString(),e.modelValue.forEach(ut=>{const Vt=N.value.findIndex(it=>pe(it)===ut);~Vt&&(s.cachedOptions.push(N.value[Vt]),Re||Ut(Vt),Re=!0)})}else s.cachedOptions=[],s.previousValue="";else if($.value){s.previousValue=e.modelValue;const Re=N.value,ut=Re.findIndex(Vt=>pe(Vt)===pe(e.modelValue));~ut?(s.selectedLabel=Re[ut].label,Ut(ut)):s.selectedLabel=`${e.modelValue}`}else s.selectedLabel="",s.previousValue="";E()};return ce(S,Re=>{var ut,Vt;t("visible-change",Re),Re?(Vt=(ut=m.value).update)==null||Vt.call(ut):(s.displayInputValue="",oe(""))}),ce(()=>e.modelValue,(Re,ut)=>{var Vt;(!Re||Re.toString()!==s.previousValue)&&Wo(),er(Re,ut)||(Vt=i==null?void 0:i.validate)==null||Vt.call(i,"change").catch(it=>void 0)},{deep:!0}),ce(()=>e.options,()=>{const Re=p.value;(!Re||Re&&document.activeElement!==Re)&&Wo()},{deep:!0}),ce(N,()=>Ne(v.value.resetScrollTop)),et(()=>{Wo(),wc(y.value,je)}),hc(()=>{Sc(y.value,je)}),{collapseTagSize:B,currentPlaceholder:Y,expanded:S,emptyText:F,popupHeight:k,debounce:j,filteredOptions:N,iconComponent:T,iconReverse:_,inputWrapperStyle:K,popperSize:d,dropdownMenuVisible:ne,hasModelValue:$,shouldShowPlaceholder:W,selectDisabled:w,selectSize:z,showClearBtn:O,states:s,tagMaxWidth:M,nsSelectV2:r,nsInput:a,calculatorRef:C,controlRef:f,inputRef:p,menuRef:v,popper:m,selectRef:y,selectionRef:b,popperRef:q,validateState:I,validateIcon:L,compatTeleported:l,debouncedOnInputChange:ge,deleteTag:He,getLabel:ue,getValueKey:pe,handleBlur:Ye,handleClear:$e,handleClickOutside:tr,handleDel:_e,handleEsc:he,handleFocus:lt,handleMenuEnter:nr,handleResize:je,toggleMenu:te,scrollTo:Va,onInput:Mn,onKeyboardNavigate:st,onKeyboardSelect:Dt,onSelect:me,onHover:Ut,onUpdateInputValue:Ve,handleCompositionStart:be,handleCompositionEnd:Pe,handleCompositionUpdate:Ee}},mhe=G({name:"ElSelectV2",components:{ElSelectMenu:uhe,ElTag:Gm,ElTooltip:Ur,ElIcon:ft},directives:{ClickOutside:Is,ModelText:fC},props:nhe,emits:[Pt,ir,"remove-tag","clear","visible-change","focus","blur"],setup(e,{emit:t}){const n=vhe(e,t);return ot(X5,{props:bt(Ke(Te({},or(e)),{height:n.popupHeight})),onSelect:n.onSelect,onHover:n.onHover,onKeyboardNavigate:n.onKeyboardNavigate,onKeyboardSelect:n.onKeyboardSelect}),n}}),ghe={key:0},yhe=["id","autocomplete","aria-expanded","aria-labelledby","disabled","readonly","name","unselectable"],bhe=["textContent"],Che=["id","aria-labelledby","aria-expanded","autocomplete","disabled","name","readonly","unselectable"],whe=["textContent"];function She(e,t,n,r,a,o){const i=we("el-tag"),l=we("el-tooltip"),s=we("el-icon"),c=we("el-select-menu"),d=pa("model-text"),f=pa("click-outside");return at((R(),X("div",{ref:"selectRef",class:U([e.nsSelectV2.b(),e.nsSelectV2.m(e.selectSize)]),onClick:t[24]||(t[24]=dt((...p)=>e.toggleMenu&&e.toggleMenu(...p),["stop"])),onMouseenter:t[25]||(t[25]=p=>e.states.comboBoxHovering=!0),onMouseleave:t[26]||(t[26]=p=>e.states.comboBoxHovering=!1)},[g(l,{ref:"popper",visible:e.dropdownMenuVisible,"onUpdate:visible":t[22]||(t[22]=p=>e.dropdownMenuVisible=p),teleported:e.compatTeleported,"popper-class":[e.nsSelectV2.e("popper"),e.popperClass],"gpu-acceleration":!1,"stop-popper-mouse-event":!1,"popper-options":e.popperOptions,"fallback-placements":["bottom-start","top-start","right","left"],effect:e.effect,placement:"bottom-start",pure:"",transition:`${e.nsSelectV2.namespace.value}-zoom-in-top`,trigger:"click",persistent:e.persistent,onBeforeShow:e.handleMenuEnter,onHide:t[23]||(t[23]=p=>e.states.inputValue=e.states.displayInputValue)},{default:re(()=>{var p;return[Z("div",{ref:"selectionRef",class:U([e.nsSelectV2.e("wrapper"),e.nsSelectV2.is("focused",e.states.isComposing),e.nsSelectV2.is("hovering",e.states.comboBoxHovering),e.nsSelectV2.is("filterable",e.filterable),e.nsSelectV2.is("disabled",e.selectDisabled)])},[e.$slots.prefix?(R(),X("div",ghe,[Oe(e.$slots,"prefix")])):se("v-if",!0),e.multiple?(R(),X("div",{key:1,class:U(e.nsSelectV2.e("selection"))},[e.collapseTags&&e.modelValue.length>0?(R(),X("div",{key:0,class:U(e.nsSelectV2.e("selected-item"))},[g(i,{closable:!e.selectDisabled&&!((p=e.states.cachedOptions[0])!=null&&p.disable),size:e.collapseTagSize,type:"info","disable-transitions":"",onClose:t[0]||(t[0]=v=>e.deleteTag(v,e.states.cachedOptions[0]))},{default:re(()=>{var v;return[Z("span",{class:U(e.nsSelectV2.e("tags-text")),style:Xe({maxWidth:`${e.tagMaxWidth}px`})},Me((v=e.states.cachedOptions[0])==null?void 0:v.label),7)]}),_:1},8,["closable","size"]),e.modelValue.length>1?(R(),fe(i,{key:0,closable:!1,size:e.collapseTagSize,type:"info","disable-transitions":""},{default:re(()=>[e.collapseTagsTooltip?(R(),fe(l,{key:0,disabled:e.dropdownMenuVisible,"fallback-placements":["bottom","top","right","left"],effect:e.effect,placement:"bottom",teleported:!1},{default:re(()=>[Z("span",{class:U(e.nsSelectV2.e("tags-text")),style:Xe({maxWidth:`${e.tagMaxWidth}px`})},"+ "+Me(e.modelValue.length-1),7)]),content:re(()=>[Z("div",{class:U(e.nsSelectV2.e("selection"))},[(R(!0),X(Fe,null,Rt(e.states.cachedOptions,(v,m)=>(R(),X("div",{key:m,class:U(e.nsSelectV2.e("selected-item"))},[(R(),fe(i,{key:e.getValueKey(v),closable:!e.selectDisabled&&!v.disabled,size:e.collapseTagSize,class:"in-tooltip",type:"info","disable-transitions":"",onClose:y=>e.deleteTag(y,v)},{default:re(()=>[Z("span",{class:U(e.nsSelectV2.e("tags-text")),style:Xe({maxWidth:`${e.tagMaxWidth}px`})},Me(e.getLabel(v)),7)]),_:2},1032,["closable","size","onClose"]))],2))),128))],2)]),_:1},8,["disabled","effect"])):(R(),X("span",{key:1,class:U(e.nsSelectV2.e("tags-text")),style:Xe({maxWidth:`${e.tagMaxWidth}px`})},"+ "+Me(e.modelValue.length-1),7))]),_:1},8,["size"])):se("v-if",!0)],2)):(R(!0),X(Fe,{key:1},Rt(e.states.cachedOptions,(v,m)=>(R(),X("div",{key:m,class:U(e.nsSelectV2.e("selected-item"))},[(R(),fe(i,{key:e.getValueKey(v),closable:!e.selectDisabled&&!v.disabled,size:e.collapseTagSize,type:"info","disable-transitions":"",onClose:y=>e.deleteTag(y,v)},{default:re(()=>[Z("span",{class:U(e.nsSelectV2.e("tags-text")),style:Xe({maxWidth:`${e.tagMaxWidth}px`})},Me(e.getLabel(v)),7)]),_:2},1032,["closable","size","onClose"]))],2))),128)),Z("div",{class:U([e.nsSelectV2.e("selected-item"),e.nsSelectV2.e("input-wrapper")]),style:Xe(e.inputWrapperStyle)},[at(Z("input",{id:e.id,ref:"inputRef",autocomplete:e.autocomplete,"aria-autocomplete":"list","aria-haspopup":"listbox",autocapitalize:"off","aria-expanded":e.expanded,"aria-labelledby":e.label,class:U([e.nsSelectV2.is(e.selectSize),e.nsSelectV2.e("combobox-input")]),disabled:e.disabled,role:"combobox",readonly:!e.filterable,spellcheck:"false",type:"text",name:e.name,unselectable:e.expanded?"on":void 0,"onUpdate:modelValue":t[1]||(t[1]=(...v)=>e.onUpdateInputValue&&e.onUpdateInputValue(...v)),onFocus:t[2]||(t[2]=(...v)=>e.handleFocus&&e.handleFocus(...v)),onInput:t[3]||(t[3]=(...v)=>e.onInput&&e.onInput(...v)),onCompositionstart:t[4]||(t[4]=(...v)=>e.handleCompositionStart&&e.handleCompositionStart(...v)),onCompositionupdate:t[5]||(t[5]=(...v)=>e.handleCompositionUpdate&&e.handleCompositionUpdate(...v)),onCompositionend:t[6]||(t[6]=(...v)=>e.handleCompositionEnd&&e.handleCompositionEnd(...v)),onKeydown:[t[7]||(t[7]=It(dt(v=>e.onKeyboardNavigate("backward"),["stop","prevent"]),["up"])),t[8]||(t[8]=It(dt(v=>e.onKeyboardNavigate("forward"),["stop","prevent"]),["down"])),t[9]||(t[9]=It(dt((...v)=>e.onKeyboardSelect&&e.onKeyboardSelect(...v),["stop","prevent"]),["enter"])),t[10]||(t[10]=It(dt((...v)=>e.handleEsc&&e.handleEsc(...v),["stop","prevent"]),["esc"])),t[11]||(t[11]=It(dt((...v)=>e.handleDel&&e.handleDel(...v),["stop"]),["delete"]))]},null,42,yhe),[[d,e.states.displayInputValue]]),e.filterable?(R(),X("span",{key:0,ref:"calculatorRef","aria-hidden":"true",class:U(e.nsSelectV2.e("input-calculator")),textContent:Me(e.states.displayInputValue)},null,10,bhe)):se("v-if",!0)],6)],2)):(R(),X(Fe,{key:2},[Z("div",{class:U([e.nsSelectV2.e("selected-item"),e.nsSelectV2.e("input-wrapper")])},[at(Z("input",{id:e.id,ref:"inputRef","aria-autocomplete":"list","aria-haspopup":"listbox","aria-labelledby":e.label,"aria-expanded":e.expanded,autocapitalize:"off",autocomplete:e.autocomplete,class:U(e.nsSelectV2.e("combobox-input")),disabled:e.disabled,name:e.name,role:"combobox",readonly:!e.filterable,spellcheck:"false",type:"text",unselectable:e.expanded?"on":void 0,onCompositionstart:t[12]||(t[12]=(...v)=>e.handleCompositionStart&&e.handleCompositionStart(...v)),onCompositionupdate:t[13]||(t[13]=(...v)=>e.handleCompositionUpdate&&e.handleCompositionUpdate(...v)),onCompositionend:t[14]||(t[14]=(...v)=>e.handleCompositionEnd&&e.handleCompositionEnd(...v)),onFocus:t[15]||(t[15]=(...v)=>e.handleFocus&&e.handleFocus(...v)),onInput:t[16]||(t[16]=(...v)=>e.onInput&&e.onInput(...v)),onKeydown:[t[17]||(t[17]=It(dt(v=>e.onKeyboardNavigate("backward"),["stop","prevent"]),["up"])),t[18]||(t[18]=It(dt(v=>e.onKeyboardNavigate("forward"),["stop","prevent"]),["down"])),t[19]||(t[19]=It(dt((...v)=>e.onKeyboardSelect&&e.onKeyboardSelect(...v),["stop","prevent"]),["enter"])),t[20]||(t[20]=It(dt((...v)=>e.handleEsc&&e.handleEsc(...v),["stop","prevent"]),["esc"]))],"onUpdate:modelValue":t[21]||(t[21]=(...v)=>e.onUpdateInputValue&&e.onUpdateInputValue(...v))},null,42,Che),[[d,e.states.displayInputValue]])],2),e.filterable?(R(),X("span",{key:0,ref:"calculatorRef","aria-hidden":"true",class:U([e.nsSelectV2.e("selected-item"),e.nsSelectV2.e("input-calculator")]),textContent:Me(e.states.displayInputValue)},null,10,whe)):se("v-if",!0)],64)),e.shouldShowPlaceholder?(R(),X("span",{key:3,class:U([e.nsSelectV2.e("placeholder"),e.nsSelectV2.is("transparent",e.states.isComposing||(e.placeholder&&e.multiple?e.modelValue.length===0:!e.hasModelValue))])},Me(e.currentPlaceholder),3)):se("v-if",!0),Z("span",{class:U(e.nsSelectV2.e("suffix"))},[e.iconComponent?at((R(),fe(s,{key:0,class:U([e.nsSelectV2.e("caret"),e.nsInput.e("icon"),e.iconReverse])},{default:re(()=>[(R(),fe(Kt(e.iconComponent)))]),_:1},8,["class"])),[[_t,!e.showClearBtn]]):se("v-if",!0),e.showClearBtn&&e.clearIcon?(R(),fe(s,{key:1,class:U([e.nsSelectV2.e("caret"),e.nsInput.e("icon")]),onClick:dt(e.handleClear,["prevent","stop"])},{default:re(()=>[(R(),fe(Kt(e.clearIcon)))]),_:1},8,["class","onClick"])):se("v-if",!0),e.validateState&&e.validateIcon?(R(),fe(s,{key:2,class:U([e.nsInput.e("icon"),e.nsInput.e("validateIcon")])},{default:re(()=>[(R(),fe(Kt(e.validateIcon)))]),_:1},8,["class"])):se("v-if",!0)],2)],2)]}),content:re(()=>[g(c,{ref:"menuRef",data:e.filteredOptions,width:e.popperSize,"hovering-index":e.states.hoveringIndex,"scrollbar-always-on":e.scrollbarAlwaysOn},{default:re(p=>[Oe(e.$slots,"default",Za(ll(p)))]),empty:re(()=>[Oe(e.$slots,"empty",{},()=>[Z("p",{class:U(e.nsSelectV2.e("empty"))},Me(e.emptyText?e.emptyText:""),3)])]),_:3},8,["data","width","hovering-index","scrollbar-always-on"])]),_:3},8,["visible","teleported","popper-class","popper-options","effect","transition","persistent","onBeforeShow"])],34)),[[f,e.handleClickOutside,e.popperRef]])}var Op=Ae(mhe,[["render",She],["__file","/home/runner/work/element-plus/element-plus/packages/components/select-v2/src/select.vue"]]);Op.install=e=>{e.component(Op.name,Op)};const khe=Op,$he=khe,Ohe=Ze({animated:{type:Boolean,default:!1},count:{type:Number,default:1},rows:{type:Number,default:3},loading:{type:Boolean,default:!0},throttle:{type:Number}}),Phe=Ze({variant:{type:String,values:["circle","rect","h1","h3","text","caption","p","image","button"],default:"text"}}),The={name:"ElSkeletonItem"},xhe=G(Ke(Te({},The),{props:Phe,setup(e){const t=De("skeleton");return(n,r)=>(R(),X("div",{class:U([A(t).e("item"),A(t).e(n.variant)])},[n.variant==="image"?(R(),fe(A($G),{key:0})):se("v-if",!0)],2))}}));var kv=Ae(xhe,[["__file","/home/runner/work/element-plus/element-plus/packages/components/skeleton/src/skeleton-item.vue"]]);const _he={name:"ElSkeleton"},Ehe=G(Ke(Te({},_he),{props:Ohe,setup(e,{expose:t}){const n=e,r=De("skeleton"),a=fZ(yn(n,"loading"),n.throttle);return t({uiLoading:a}),(o,i)=>A(a)?(R(),X("div",hn({key:0,class:[A(r).b(),A(r).is("animated",o.animated)]},o.$attrs),[(R(!0),X(Fe,null,Rt(o.count,l=>(R(),X(Fe,{key:l},[o.loading?Oe(o.$slots,"template",{key:l},()=>[g(kv,{class:U(A(r).is("first")),variant:"p"},null,8,["class"]),(R(!0),X(Fe,null,Rt(o.rows,s=>(R(),fe(kv,{key:s,class:U([A(r).e("paragraph"),A(r).is("last",s===o.rows&&o.rows>1)]),variant:"p"},null,8,["class"]))),128))]):se("v-if",!0)],64))),128))],16)):Oe(o.$slots,"default",Za(hn({key:1},o.$attrs)))}}));var Mhe=Ae(Ehe,[["__file","/home/runner/work/element-plus/element-plus/packages/components/skeleton/src/skeleton.vue"]]);const Ihe=xt(Mhe,{SkeletonItem:kv}),Nhe=En(kv),Ahe=(e,t,n)=>{const r=H(null),a=H(!1),o=x(()=>t.value instanceof Function),i=x(()=>o.value&&t.value(e.modelValue)||e.modelValue),l=Yn(()=>{n.value&&(a.value=!0)},50),s=Yn(()=>{n.value&&(a.value=!1)},50);return{tooltip:r,tooltipVisible:a,formatValue:i,displayTooltip:l,hideTooltip:s}},Dhe=(e,t,n)=>{const{disabled:r,min:a,max:o,step:i,showTooltip:l,precision:s,sliderSize:c,formatTooltip:d,emitChange:f,resetSize:p,updateDragging:v}=ve("SliderProvider"),{tooltip:m,tooltipVisible:y,formatValue:b,displayTooltip:C,hideTooltip:S}=Ahe(e,d,l),w=x(()=>`${(e.modelValue-a.value)/(o.value-a.value)*100}%`),k=x(()=>e.vertical?{bottom:w.value}:{left:w.value}),$=()=>{t.hovering=!0,C()},O=()=>{t.hovering=!1,t.dragging||S()},T=z=>{r.value||(z.preventDefault(),j(z),gn(window,"mousemove",F),gn(window,"touchmove",F),gn(window,"mouseup",N),gn(window,"touchend",N),gn(window,"contextmenu",N))},_=()=>{r.value||(t.newPosition=Number.parseFloat(w.value)-i.value/(o.value-a.value)*100,D(t.newPosition),f())},I=()=>{r.value||(t.newPosition=Number.parseFloat(w.value)+i.value/(o.value-a.value)*100,D(t.newPosition),f())},L=z=>{let B,M;return z.type.startsWith("touch")?(M=z.touches[0].clientY,B=z.touches[0].clientX):(M=z.clientY,B=z.clientX),{clientX:B,clientY:M}},j=z=>{t.dragging=!0,t.isClick=!0;const{clientX:B,clientY:M}=L(z);e.vertical?t.startY=M:t.startX=B,t.startPosition=Number.parseFloat(w.value),t.newPosition=t.startPosition},F=z=>{if(t.dragging){t.isClick=!1,C(),p();let B;const{clientX:M,clientY:E}=L(z);e.vertical?(t.currentY=E,B=(t.startY-t.currentY)/c.value*100):(t.currentX=M,B=(t.currentX-t.startX)/c.value*100),t.newPosition=t.startPosition+B,D(t.newPosition)}},N=()=>{t.dragging&&(setTimeout(()=>{t.dragging=!1,t.hovering||S(),t.isClick||(D(t.newPosition),f())},0),Bn(window,"mousemove",F),Bn(window,"touchmove",F),Bn(window,"mouseup",N),Bn(window,"touchend",N),Bn(window,"contextmenu",N))},D=async z=>{if(z===null||Number.isNaN(+z))return;z<0?z=0:z>100&&(z=100);const B=100/((o.value-a.value)/i.value);let E=Math.round(z/B)*B*(o.value-a.value)*.01+a.value;E=Number.parseFloat(E.toFixed(s.value)),n(Pt,E),!t.dragging&&e.modelValue!==t.oldValue&&(t.oldValue=e.modelValue),await Ne(),t.dragging&&C(),m.value.updatePopper()};return ce(()=>t.dragging,z=>{v(z)}),{tooltip:m,tooltipVisible:y,showTooltip:l,wrapperStyle:k,formatValue:b,handleMouseEnter:$,handleMouseLeave:O,onButtonDown:T,onLeftKeyDown:_,onRightKeyDown:I,setPosition:D}},Rhe=G({name:"ElSliderButton",components:{ElTooltip:Ur},props:{modelValue:{type:Number,default:0},vertical:{type:Boolean,default:!1},tooltipClass:{type:String,default:""}},emits:[Pt],setup(e,{emit:t}){const n=De("slider"),r=bt({hovering:!1,dragging:!1,isClick:!1,startX:0,currentX:0,startY:0,currentY:0,startPosition:0,newPosition:0,oldValue:e.modelValue}),{tooltip:a,showTooltip:o,tooltipVisible:i,wrapperStyle:l,formatValue:s,handleMouseEnter:c,handleMouseLeave:d,onButtonDown:f,onLeftKeyDown:p,onRightKeyDown:v,setPosition:m}=Dhe(e,r,t),{hovering:y,dragging:b}=or(r);return{ns:n,tooltip:a,tooltipVisible:i,showTooltip:o,wrapperStyle:l,formatValue:s,handleMouseEnter:c,handleMouseLeave:d,onButtonDown:f,onLeftKeyDown:p,onRightKeyDown:v,setPosition:m,hovering:y,dragging:b}}});function Lhe(e,t,n,r,a,o){const i=we("el-tooltip");return R(),X("div",{ref:"button",class:U([e.ns.e("button-wrapper"),{hover:e.hovering,dragging:e.dragging}]),style:Xe(e.wrapperStyle),tabindex:"0",onMouseenter:t[1]||(t[1]=(...l)=>e.handleMouseEnter&&e.handleMouseEnter(...l)),onMouseleave:t[2]||(t[2]=(...l)=>e.handleMouseLeave&&e.handleMouseLeave(...l)),onMousedown:t[3]||(t[3]=(...l)=>e.onButtonDown&&e.onButtonDown(...l)),onTouchstart:t[4]||(t[4]=(...l)=>e.onButtonDown&&e.onButtonDown(...l)),onFocus:t[5]||(t[5]=(...l)=>e.handleMouseEnter&&e.handleMouseEnter(...l)),onBlur:t[6]||(t[6]=(...l)=>e.handleMouseLeave&&e.handleMouseLeave(...l)),onKeydown:[t[7]||(t[7]=It((...l)=>e.onLeftKeyDown&&e.onLeftKeyDown(...l),["left"])),t[8]||(t[8]=It((...l)=>e.onRightKeyDown&&e.onRightKeyDown(...l),["right"])),t[9]||(t[9]=It(dt((...l)=>e.onLeftKeyDown&&e.onLeftKeyDown(...l),["prevent"]),["down"])),t[10]||(t[10]=It(dt((...l)=>e.onRightKeyDown&&e.onRightKeyDown(...l),["prevent"]),["up"]))]},[g(i,{ref:"tooltip",visible:e.tooltipVisible,"onUpdate:visible":t[0]||(t[0]=l=>e.tooltipVisible=l),placement:"top","stop-popper-mouse-event":!1,"popper-class":e.tooltipClass,disabled:!e.showTooltip,persistent:""},{content:re(()=>[Z("span",null,Me(e.formatValue),1)]),default:re(()=>[Z("div",{class:U([e.ns.e("button"),{hover:e.hovering,dragging:e.dragging}])},null,2)]),_:1},8,["visible","popper-class","disabled"])],38)}var Fhe=Ae(Rhe,[["render",Lhe],["__file","/home/runner/work/element-plus/element-plus/packages/components/slider/src/button.vue"]]);const Bhe=G({name:"ElMarker",props:{mark:{type:[String,Object],default:()=>{}}},setup(e){const t=De("slider"),n=x(()=>typeof e.mark=="string"?e.mark:e.mark.label);return{ns:t,label:n}},render(){var e;return qe("div",{class:this.ns.e("marks-text"),style:(e=this.mark)==null?void 0:e.style},this.label)}});var Vhe=Ae(Bhe,[["__file","/home/runner/work/element-plus/element-plus/packages/components/slider/src/marker.vue"]]);const zhe=e=>x(()=>e.marks?Object.keys(e.marks).map(parseFloat).sort((n,r)=>n-r).filter(n=>n<=e.max&&n>=e.min).map(n=>({point:n,position:(n-e.min)*100/(e.max-e.min),mark:e.marks[n]})):[]),Hhe=(e,t,n)=>{const r=ve(ga,{}),a=ve(Ia,{}),o=Qn(null),i=H(null),l=H(null),s={firstButton:i,secondButton:l},c=x(()=>e.disabled||r.disabled||!1),d=x(()=>Math.min(t.firstValue,t.secondValue)),f=x(()=>Math.max(t.firstValue,t.secondValue)),p=x(()=>e.range?`${100*(f.value-d.value)/(e.max-e.min)}%`:`${100*(t.firstValue-e.min)/(e.max-e.min)}%`),v=x(()=>e.range?`${100*(d.value-e.min)/(e.max-e.min)}%`:"0%"),m=x(()=>e.vertical?{height:e.height}:{}),y=x(()=>e.vertical?{height:p.value,bottom:v.value}:{width:p.value,left:v.value}),b=()=>{o.value&&(t.sliderSize=o.value[`client${e.vertical?"Height":"Width"}`])},C=T=>{const _=e.min+T*(e.max-e.min)/100;if(!e.range){i.value.setPosition(T);return}let I;Math.abs(d.value-_)<Math.abs(f.value-_)?I=t.firstValue<t.secondValue?"firstButton":"secondButton":I=t.firstValue>t.secondValue?"firstButton":"secondButton",s[I].value.setPosition(T)},S=T=>{t.firstValue=T,k(e.range?[d.value,f.value]:T)},w=T=>{t.secondValue=T,e.range&&k([d.value,f.value])},k=T=>{n(Pt,T),n(cl,T)},$=async()=>{await Ne(),n(ir,e.range?[d.value,f.value]:e.modelValue)};return{elFormItem:a,slider:o,firstButton:i,secondButton:l,sliderDisabled:c,minValue:d,maxValue:f,runwayStyle:m,barStyle:y,resetSize:b,setPosition:C,emitChange:$,onSliderClick:T=>{if(!(c.value||t.dragging)){if(b(),e.vertical){const _=o.value.getBoundingClientRect().bottom;C((_-T.clientY)/t.sliderSize*100)}else{const _=o.value.getBoundingClientRect().left;C((T.clientX-_)/t.sliderSize*100)}$()}},setFirstValue:S,setSecondValue:w}},jhe=(e,t,n,r)=>({stops:x(()=>{if(!e.showStops||e.min>e.max)return[];if(e.step===0)return[];const i=(e.max-e.min)/e.step,l=100*e.step/(e.max-e.min),s=Array.from({length:i-1}).map((c,d)=>(d+1)*l);return e.range?s.filter(c=>c<100*(n.value-e.min)/(e.max-e.min)||c>100*(r.value-e.min)/(e.max-e.min)):s.filter(c=>c>100*(t.firstValue-e.min)/(e.max-e.min))}),getStopStyle:i=>e.vertical?{bottom:`${i}%`}:{left:`${i}%`}}),Khe=G({name:"ElSlider",components:{ElInputNumber:_5,SliderButton:Fhe,SliderMarker:Vhe},props:{modelValue:{type:[Number,Array],default:0},min:{type:Number,default:0},max:{type:Number,default:100},step:{type:Number,default:1},showInput:{type:Boolean,default:!1},showInputControls:{type:Boolean,default:!0},size:{type:String,validator:va},inputSize:{type:String,validator:va},showStops:{type:Boolean,default:!1},showTooltip:{type:Boolean,default:!0},formatTooltip:{type:Function,default:void 0},disabled:{type:Boolean,default:!1},range:{type:Boolean,default:!1},vertical:{type:Boolean,default:!1},height:{type:String,default:""},debounce:{type:Number,default:300},label:{type:String,default:void 0},tooltipClass:{type:String,default:void 0},marks:Object},emits:[Pt,ir,cl],setup(e,{emit:t}){const n=De("slider"),r=bt({firstValue:0,secondValue:0,oldValue:0,dragging:!1,sliderSize:1}),{elFormItem:a,slider:o,firstButton:i,secondButton:l,sliderDisabled:s,minValue:c,maxValue:d,runwayStyle:f,barStyle:p,resetSize:v,emitChange:m,onSliderClick:y,setFirstValue:b,setSecondValue:C}=Hhe(e,r,t),{stops:S,getStopStyle:w}=jhe(e,r,c,d),k=Gn(),$=x(()=>e.inputSize||k.value),O=x(()=>[n.b(),n.m(k.value),n.is("vertical",e.vertical),{[n.m("with-input")]:e.showInput}]),T=zhe(e);Whe(e,r,c,d,t,a);const _=x(()=>{const B=[e.min,e.max,e.step].map(M=>{const E=`${M}`.split(".")[1];return E?E.length:0});return Math.max.apply(null,B)}),{sliderWrapper:I}=Uhe(e,r,v),{firstValue:L,secondValue:j,oldValue:F,dragging:N,sliderSize:D}=or(r),z=B=>{r.dragging=B};return ot("SliderProvider",Ke(Te({},or(e)),{sliderSize:D,disabled:s,precision:_,emitChange:m,resetSize:v,updateDragging:z})),{ns:n,firstValue:L,secondValue:j,oldValue:F,dragging:N,sliderSize:D,slider:o,firstButton:i,secondButton:l,sliderDisabled:s,runwayStyle:f,barStyle:p,emitChange:m,onSliderClick:y,getStopStyle:w,setFirstValue:b,setSecondValue:C,stops:S,markList:T,sliderWrapper:I,sliderWrapperSize:k,sliderInputSize:$,sliderKls:O}}}),Whe=(e,t,n,r,a,o)=>{const i=c=>{a(Pt,c),a(cl,c)},l=()=>e.range?![n.value,r.value].every((c,d)=>c===t.oldValue[d]):e.modelValue!==t.oldValue,s=()=>{var c,d;if(e.min>e.max){qn("Slider","min should not be greater than max.");return}const f=e.modelValue;e.range&&Array.isArray(f)?f[1]<e.min?i([e.min,e.min]):f[0]>e.max?i([e.max,e.max]):f[0]<e.min?i([e.min,f[1]]):f[1]>e.max?i([f[0],e.max]):(t.firstValue=f[0],t.secondValue=f[1],l()&&((c=o.validate)==null||c.call(o,"change").catch(p=>void 0),t.oldValue=f.slice())):!e.range&&typeof f=="number"&&!Number.isNaN(f)&&(f<e.min?i(e.min):f>e.max?i(e.max):(t.firstValue=f,l()&&((d=o.validate)==null||d.call(o,"change").catch(p=>void 0),t.oldValue=f)))};s(),ce(()=>t.dragging,c=>{c||s()}),ce(()=>e.modelValue,(c,d)=>{t.dragging||Array.isArray(c)&&Array.isArray(d)&&c.every((f,p)=>f===d[p])&&t.firstValue===c[0]&&t.secondValue===c[1]||s()},{deep:!0}),ce(()=>[e.min,e.max],()=>{s()})},Uhe=(e,t,n)=>{const r=H(null);return et(async()=>{let a;e.range?(Array.isArray(e.modelValue)?(t.firstValue=Math.max(e.min,e.modelValue[0]),t.secondValue=Math.min(e.max,e.modelValue[1])):(t.firstValue=e.min,t.secondValue=e.max),t.oldValue=[t.firstValue,t.secondValue],a=`${t.firstValue}-${t.secondValue}`):(typeof e.modelValue!="number"||Number.isNaN(e.modelValue)?t.firstValue=e.min:t.firstValue=Math.min(e.max,Math.max(e.min,e.modelValue)),t.oldValue=t.firstValue,a=t.firstValue),r.value.setAttribute("aria-valuetext",a),r.value.setAttribute("aria-label",e.label?e.label:`slider between ${e.min} and ${e.max}`),gn(window,"resize",n),await Ne(),n()}),Lt(()=>{Bn(window,"resize",n)}),{sliderWrapper:r}},Yhe=["aria-valuemin","aria-valuemax","aria-orientation","aria-disabled"],qhe={key:1};function Ghe(e,t,n,r,a,o){const i=we("slider-button"),l=we("slider-marker"),s=we("el-input-number");return R(),X("div",{ref:"sliderWrapper",class:U(e.sliderKls),role:"slider","aria-valuemin":e.min,"aria-valuemax":e.max,"aria-orientation":e.vertical?"vertical":"horizontal","aria-disabled":e.sliderDisabled},[Z("div",{ref:"slider",class:U([e.ns.e("runway"),{"show-input":e.showInput&&!e.range},e.ns.is("disabled",e.sliderDisabled)]),style:Xe(e.runwayStyle),onClick:t[0]||(t[0]=(...c)=>e.onSliderClick&&e.onSliderClick(...c))},[Z("div",{class:U(e.ns.e("bar")),style:Xe(e.barStyle)},null,6),g(i,{ref:"firstButton","model-value":e.firstValue,vertical:e.vertical,"tooltip-class":e.tooltipClass,"onUpdate:modelValue":e.setFirstValue},null,8,["model-value","vertical","tooltip-class","onUpdate:modelValue"]),e.range?(R(),fe(i,{key:0,ref:"secondButton","model-value":e.secondValue,vertical:e.vertical,"tooltip-class":e.tooltipClass,"onUpdate:modelValue":e.setSecondValue},null,8,["model-value","vertical","tooltip-class","onUpdate:modelValue"])):se("v-if",!0),e.showStops?(R(),X("div",qhe,[(R(!0),X(Fe,null,Rt(e.stops,(c,d)=>(R(),X("div",{key:d,class:U(e.ns.e("stop")),style:Xe(e.getStopStyle(c))},null,6))),128))])):se("v-if",!0),e.markList.length>0?(R(),X(Fe,{key:2},[Z("div",null,[(R(!0),X(Fe,null,Rt(e.markList,(c,d)=>(R(),X("div",{key:d,style:Xe(e.getStopStyle(c.position)),class:U([e.ns.e("stop"),e.ns.e("marks-stop")])},null,6))),128))]),Z("div",{class:U(e.ns.e("marks"))},[(R(!0),X(Fe,null,Rt(e.markList,(c,d)=>(R(),fe(l,{key:d,mark:c.mark,style:Xe(e.getStopStyle(c.position))},null,8,["mark","style"]))),128))],2)],64)):se("v-if",!0)],6),e.showInput&&!e.range?(R(),fe(s,{key:0,ref:"input","model-value":e.firstValue,class:U(e.ns.e("input")),step:e.step,disabled:e.sliderDisabled,controls:e.showInputControls,min:e.min,max:e.max,debounce:e.debounce,size:e.sliderInputSize,"onUpdate:modelValue":e.setFirstValue,onChange:e.emitChange},null,8,["model-value","class","step","disabled","controls","min","max","debounce","size","onUpdate:modelValue","onChange"])):se("v-if",!0)],10,Yhe)}var Pp=Ae(Khe,[["render",Ghe],["__file","/home/runner/work/element-plus/element-plus/packages/components/slider/src/index.vue"]]);Pp.install=e=>{e.component(Pp.name,Pp)};const Xhe=Pp,Zhe=Xhe,Jhe=Ze({prefixCls:{type:String,default:""}}),Qhe=G({props:Jhe,setup(e){const t=De("space");return{classes:x(()=>`${e.prefixCls||t.b()}__item`)}}});function epe(e,t,n,r,a,o){return R(),X("div",{class:U(e.classes)},[Oe(e.$slots,"default")],2)}var gO=Ae(Qhe,[["render",epe],["__file","/home/runner/work/element-plus/element-plus/packages/components/space/src/item.vue"]]);const yO={small:8,default:12,large:16};function tpe(e){const t=De("space"),n=x(()=>[t.b(),t.m(e.direction),e.class]),r=H(0),a=H(0),o=x(()=>{const l=e.wrap||e.fill?{flexWrap:"wrap",marginBottom:`-${a.value}px`}:{},s={alignItems:e.alignment};return[l,s,e.style]}),i=x(()=>{const l={paddingBottom:`${a.value}px`,marginRight:`${r.value}px`},s=e.fill?{flexGrow:1,minWidth:`${e.fillRatio}%`}:{};return[l,s]});return Wn(()=>{const{size:l="small",wrap:s,direction:c,fill:d}=e;if(Array.isArray(l)){const[f=0,p=0]=l;r.value=f,a.value=p}else{let f;Yt(l)?f=l:f=yO[l]||yO.small,(s||d)&&c==="horizontal"?r.value=a.value=f:c==="horizontal"?(r.value=f,a.value=0):(a.value=f,r.value=0)}}),{classes:n,containerStyle:o,itemStyle:i}}const npe=Ze({direction:{type:String,values:["horizontal","vertical"],default:"horizontal"},class:{type:Le([String,Object,Array]),default:""},style:{type:Le([String,Array,Object]),default:""},alignment:{type:Le(String),default:"center"},prefixCls:{type:String},spacer:{type:Le([Object,String,Number,Array]),default:null,validator:e=>rn(e)||Yt(e)||wt(e)},wrap:{type:Boolean,default:!1},fill:{type:Boolean,default:!1},fillRatio:{type:Number,default:100},size:{type:[String,Array,Number],values:Bo,validator:e=>Yt(e)||pt(e)&&e.length===2&&e.every(t=>Yt(t))}});var rpe=G({name:"ElSpace",props:npe,setup(e,{slots:t}){const{classes:n,containerStyle:r,itemStyle:a}=tpe(e);return()=>{var o;const{spacer:i,prefixCls:l,direction:s}=e,c=Oe(t,"default",{key:0},()=>[]);if(((o=c.children)!=null?o:[]).length===0)return null;if(pt(c.children)){let d=[];if(c.children.forEach((f,p)=>{gE(f)?pt(f.children)&&f.children.forEach((v,m)=>{d.push(g(gO,{style:a.value,prefixCls:l,key:`nested-${m}`},{default:()=>[v]},ka.PROPS|ka.STYLE,["style","prefixCls"]))}):GX(f)&&d.push(g(gO,{style:a.value,prefixCls:l,key:`LoopKey${p}`},{default:()=>[f]},ka.PROPS|ka.STYLE,["style","prefixCls"]))}),i){const f=d.length-1;d=d.reduce((p,v,m)=>{const y=[...p,v];return m!==f&&y.push(g("span",{style:[a.value,s==="vertical"?"width: 100%":null],key:m},[rn(i)?i:yt(i,ka.TEXT)],ka.STYLE)),y},[])}return g("div",{class:n.value,style:r.value},d,ka.STYLE|ka.CLASS)}return c.children}}});const ape=xt(rpe),ope=G({name:"ElSteps",props:{space:{type:[Number,String],default:""},active:{type:Number,default:0},direction:{type:String,default:"horizontal",validator:e=>["horizontal","vertical"].includes(e)},alignCenter:{type:Boolean,default:!1},simple:{type:Boolean,default:!1},finishStatus:{type:String,default:"finish",validator:e=>["wait","process","finish","error","success"].includes(e)},processStatus:{type:String,default:"process",validator:e=>["wait","process","finish","error","success"].includes(e)}},emits:[ir],setup(e,{emit:t}){const n=De("steps"),r=H([]);return ce(r,()=>{r.value.forEach((a,o)=>{a.setIndex(o)})}),ot("ElSteps",{props:e,steps:r}),ce(()=>e.active,(a,o)=>{t(ir,a,o)}),{steps:r,ns:n}}});function ipe(e,t,n,r,a,o){return R(),X("div",{class:U([e.ns.b(),e.ns.m(e.simple?"simple":e.direction)])},[Oe(e.$slots,"default")],2)}var lpe=Ae(ope,[["render",ipe],["__file","/home/runner/work/element-plus/element-plus/packages/components/steps/src/index.vue"]]);const spe=G({name:"ElStep",components:{ElIcon:ft,Close:Ma,Check:Wu},props:{title:{type:String,default:""},icon:{type:[String,Object],default:""},description:{type:String,default:""},status:{type:String,default:"",validator:e=>["","wait","process","finish","error","success"].includes(e)}},setup(e){const t=De("step"),n=H(-1),r=H({}),a=H(""),o=ve("ElSteps"),i=$t();et(()=>{ce([()=>o.props.active,()=>o.props.processStatus,()=>o.props.finishStatus],([k])=>{S(k)},{immediate:!0})}),Lt(()=>{o.steps.value=o.steps.value.filter(k=>k.uid!==i.uid)});const l=x(()=>e.status||a.value),s=x(()=>{const k=o.steps.value[n.value-1];return k?k.currentStatus:"wait"}),c=x(()=>o.props.alignCenter),d=x(()=>o.props.direction==="vertical"),f=x(()=>o.props.simple),p=x(()=>o.steps.value.length),v=x(()=>{var k;return((k=o.steps.value[p.value-1])==null?void 0:k.uid)===i.uid}),m=x(()=>f.value?"":o.props.space),y=x(()=>{const k={flexBasis:typeof m.value=="number"?`${m.value}px`:m.value?m.value:`${100/(p.value-(c.value?0:1))}%`};return d.value||v.value&&(k.maxWidth=`${100/p.value}%`),k}),b=k=>{n.value=k},C=k=>{let $=100;const O={};O.transitionDelay=`${150*n.value}ms`,k===o.props.processStatus?$=0:k==="wait"&&($=0,O.transitionDelay=`${-150*n.value}ms`),O.borderWidth=$&&!f.value?"1px":0,O[o.props.direction==="vertical"?"height":"width"]=`${$}%`,r.value=O},S=k=>{k>n.value?a.value=o.props.finishStatus:k===n.value&&s.value!=="error"?a.value=o.props.processStatus:a.value="wait";const $=o.steps.value[p.value-1];$&&$.calcProgress(a.value)},w=bt({uid:x(()=>i.uid),currentStatus:l,setIndex:b,calcProgress:C});return o.steps.value=[...o.steps.value,w],{ns:t,index:n,lineStyle:r,currentStatus:l,isCenter:c,isVertical:d,isSimple:f,isLast:v,space:m,style:y,parent:o,setIndex:b,calcProgress:C,updateStatus:S}}});function upe(e,t,n,r,a,o){const i=we("el-icon"),l=we("check"),s=we("close");return R(),X("div",{style:Xe(e.style),class:U([e.ns.b(),e.ns.is(e.isSimple?"simple":e.parent.props.direction),e.ns.is("flex",e.isLast&&!e.space&&!e.isCenter),e.ns.is("center",e.isCenter&&!e.isVertical&&!e.isSimple)])},[se(" icon & line "),Z("div",{class:U([e.ns.e("head"),e.ns.is(e.currentStatus)])},[e.isSimple?se("v-if",!0):(R(),X("div",{key:0,class:U(e.ns.e("line"))},[Z("i",{class:U(e.ns.e("line-inner")),style:Xe(e.lineStyle)},null,6)],2)),Z("div",{class:U([e.ns.e("icon"),e.ns.is(e.icon?"icon":"text")])},[e.currentStatus!=="success"&&e.currentStatus!=="error"?Oe(e.$slots,"icon",{key:0},()=>[e.icon?(R(),fe(i,{key:0,class:U(e.ns.e("icon-inner"))},{default:re(()=>[(R(),fe(Kt(e.icon)))]),_:1},8,["class"])):se("v-if",!0),!e.icon&&!e.isSimple?(R(),X("div",{key:1,class:U(e.ns.e("icon-inner"))},Me(e.index+1),3)):se("v-if",!0)]):(R(),fe(i,{key:1,class:U([e.ns.e("icon-inner"),e.ns.is("status")])},{default:re(()=>[e.currentStatus==="success"?(R(),fe(l,{key:0})):(R(),fe(s,{key:1}))]),_:1},8,["class"]))],2)],2),se(" title & description "),Z("div",{class:U(e.ns.e("main"))},[Z("div",{class:U([e.ns.e("title"),e.ns.is(e.currentStatus)])},[Oe(e.$slots,"title",{},()=>[yt(Me(e.title),1)])],2),e.isSimple?(R(),X("div",{key:0,class:U(e.ns.e("arrow"))},null,2)):(R(),X("div",{key:1,class:U([e.ns.e("description"),e.ns.is(e.currentStatus)])},[Oe(e.$slots,"description",{},()=>[yt(Me(e.description),1)])],2))],2)],6)}var Z5=Ae(spe,[["render",upe],["__file","/home/runner/work/element-plus/element-plus/packages/components/steps/src/item.vue"]]);const cpe=xt(lpe,{Step:Z5}),dpe=En(Z5),fpe=Ze({modelValue:{type:[Boolean,String,Number],default:!1},value:{type:[Boolean,String,Number],default:!1},disabled:{type:Boolean,default:!1},width:{type:Number,default:40},inlinePrompt:{type:Boolean,default:!1},activeIcon:{type:wr,default:""},inactiveIcon:{type:wr,default:""},activeText:{type:String,default:""},inactiveText:{type:String,default:""},activeColor:{type:String,default:""},inactiveColor:{type:String,default:""},borderColor:{type:String,default:""},activeValue:{type:[Boolean,String,Number],default:!0},inactiveValue:{type:[Boolean,String,Number],default:!1},name:{type:String,default:""},validateEvent:{type:Boolean,default:!0},id:String,loading:{type:Boolean,default:!1},beforeChange:{type:Le(Function)},size:{type:String,validator:va}}),hpe={[Pt]:e=>yr(e)||wt(e)||Yt(e),[ir]:e=>yr(e)||wt(e)||Yt(e),[cl]:e=>yr(e)||wt(e)||Yt(e)},bO="ElSwitch",ppe=G({name:bO,components:{ElIcon:ft,Loading:gl},props:fpe,emits:hpe,setup(e,{emit:t}){const{formItem:n}=$c(),r=Ms(x(()=>e.loading)),a=De("switch"),o=Gn(),i=H(e.modelValue!==!1),l=H(),s=H(),c=x(()=>[a.b(),a.m(o.value),a.is("disabled",r.value),a.is("checked",f.value)]);ce(()=>e.modelValue,()=>{i.value=!0}),ce(()=>e.value,()=>{i.value=!1});const d=x(()=>i.value?e.modelValue:e.value),f=x(()=>d.value===e.activeValue);[e.activeValue,e.inactiveValue].includes(d.value)||(t(Pt,e.inactiveValue),t(ir,e.inactiveValue),t(cl,e.inactiveValue)),ce(f,()=>{var b;l.value.checked=f.value,(e.activeColor||e.inactiveColor)&&m(),e.validateEvent&&((b=n==null?void 0:n.validate)==null||b.call(n,"change").catch(C=>void 0))});const p=()=>{const b=f.value?e.inactiveValue:e.activeValue;t(Pt,b),t(ir,b),t(cl,b),Ne(()=>{l.value.checked=f.value})},v=()=>{if(r.value)return;const{beforeChange:b}=e;if(!b){p();return}const C=b();[hs(C),yr(C)].some(w=>w)||qn(bO,"beforeChange must return type `Promise<boolean>` or `boolean`"),hs(C)?C.then(w=>{w&&p()}).catch(w=>{}):C&&p()},m=()=>{const b=f.value?e.activeColor:e.inactiveColor,C=s.value;e.borderColor?C.style.borderColor=e.borderColor:e.borderColor||(C.style.borderColor=b),C.style.backgroundColor=b,C.children[0].style.color=b},y=()=>{var b,C;(C=(b=l.value)==null?void 0:b.focus)==null||C.call(b)};return et(()=>{(e.activeColor||e.inactiveColor||e.borderColor)&&m(),l.value.checked=f.value}),{ns:a,input:l,core:s,switchDisabled:r,checked:f,switchKls:c,handleChange:p,switchValue:v,focus:y}}}),vpe=["aria-checked","aria-disabled"],mpe=["id","name","true-value","false-value","disabled"],gpe=["aria-hidden"],ype=["aria-hidden"],bpe=["aria-hidden"],Cpe=["aria-hidden"];function wpe(e,t,n,r,a,o){const i=we("el-icon"),l=we("loading");return R(),X("div",{class:U(e.switchKls),role:"switch","aria-checked":e.checked,"aria-disabled":e.switchDisabled,onClick:t[2]||(t[2]=dt((...s)=>e.switchValue&&e.switchValue(...s),["prevent"]))},[Z("input",{id:e.id,ref:"input",class:U(e.ns.e("input")),type:"checkbox",name:e.name,"true-value":e.activeValue,"false-value":e.inactiveValue,disabled:e.switchDisabled,onChange:t[0]||(t[0]=(...s)=>e.handleChange&&e.handleChange(...s)),onKeydown:t[1]||(t[1]=It((...s)=>e.switchValue&&e.switchValue(...s),["enter"]))},null,42,mpe),!e.inlinePrompt&&(e.inactiveIcon||e.inactiveText)?(R(),X("span",{key:0,class:U([e.ns.e("label"),e.ns.em("label","left"),e.ns.is("active",!e.checked)])},[e.inactiveIcon?(R(),fe(i,{key:0},{default:re(()=>[(R(),fe(Kt(e.inactiveIcon)))]),_:1})):se("v-if",!0),!e.inactiveIcon&&e.inactiveText?(R(),X("span",{key:1,"aria-hidden":e.checked},Me(e.inactiveText),9,gpe)):se("v-if",!0)],2)):se("v-if",!0),Z("span",{ref:"core",class:U(e.ns.e("core")),style:Xe({width:(e.width||40)+"px"})},[e.inlinePrompt?(R(),X("div",{key:0,class:U(e.ns.e("inner"))},[e.activeIcon||e.inactiveIcon?(R(),X(Fe,{key:0},[e.activeIcon?(R(),fe(i,{key:0,class:U([e.ns.is("icon"),e.checked?e.ns.is("show"):e.ns.is("hide")])},{default:re(()=>[(R(),fe(Kt(e.activeIcon)))]),_:1},8,["class"])):se("v-if",!0),e.inactiveIcon?(R(),fe(i,{key:1,class:U([e.ns.is("icon"),e.checked?e.ns.is("hide"):e.ns.is("show")])},{default:re(()=>[(R(),fe(Kt(e.inactiveIcon)))]),_:1},8,["class"])):se("v-if",!0)],64)):e.activeText||e.inactiveIcon?(R(),X(Fe,{key:1},[e.activeText?(R(),X("span",{key:0,class:U([e.ns.is("text"),e.checked?e.ns.is("show"):e.ns.is("hide")]),"aria-hidden":!e.checked},Me(e.activeText.substring(0,3)),11,ype)):se("v-if",!0),e.inactiveText?(R(),X("span",{key:1,class:U([e.ns.is("text"),e.checked?e.ns.is("hide"):e.ns.is("show")]),"aria-hidden":e.checked},Me(e.inactiveText.substring(0,3)),11,bpe)):se("v-if",!0)],64)):se("v-if",!0)],2)):se("v-if",!0),Z("div",{class:U(e.ns.e("action"))},[e.loading?(R(),fe(i,{key:0,class:U(e.ns.is("loading"))},{default:re(()=>[g(l)]),_:1},8,["class"])):se("v-if",!0)],2)],6),!e.inlinePrompt&&(e.activeIcon||e.activeText)?(R(),X("span",{key:1,class:U([e.ns.e("label"),e.ns.em("label","right"),e.ns.is("active",e.checked)])},[e.activeIcon?(R(),fe(i,{key:0},{default:re(()=>[(R(),fe(Kt(e.activeIcon)))]),_:1})):se("v-if",!0),!e.activeIcon&&e.activeText?(R(),X("span",{key:1,"aria-hidden":!e.checked},Me(e.activeText),9,Cpe)):se("v-if",!0)],2)):se("v-if",!0)],10,vpe)}var Spe=Ae(ppe,[["render",wpe],["__file","/home/runner/work/element-plus/element-plus/packages/components/switch/src/switch.vue"]]);const kpe=xt(Spe);/*!
 * escape-html
 * Copyright(c) 2012-2013 TJ Holowaychuk
 * Copyright(c) 2015 Andreas Lubbe
 * Copyright(c) 2015 Tiancheng "Timothy" Gu
 * MIT Licensed
 */var $pe=/["'&<>]/,Ope=Ppe;function Ppe(e){var t=""+e,n=$pe.exec(t);if(!n)return t;var r,a="",o=0,i=0;for(o=n.index;o<t.length;o++){switch(t.charCodeAt(o)){case 34:r="&quot;";break;case 38:r="&amp;";break;case 39:r="&#39;";break;case 60:r="&lt;";break;case 62:r="&gt;";break;default:continue}i!==o&&(a+=t.substring(i,o)),i=o+1,a+=r}return i!==o?a+t.substring(i,o):a}const $y=function(e){let t=e.target;for(;t&&t.tagName.toUpperCase()!=="HTML";){if(t.tagName.toUpperCase()==="TD")return t;t=t.parentNode}return null},CO=function(e){return e!==null&&typeof e=="object"},Tpe=function(e,t,n,r,a){if(!t&&!r&&(!a||Array.isArray(a)&&!a.length))return e;typeof n=="string"?n=n==="descending"?-1:1:n=n&&n<0?-1:1;const o=r?null:function(l,s){return a?(Array.isArray(a)||(a=[a]),a.map(c=>typeof c=="string"?vn(l,c):c(l,s,e))):(t!=="$key"&&CO(l)&&"$value"in l&&(l=l.$value),[CO(l)?vn(l,t):l])},i=function(l,s){if(r)return r(l.value,s.value);for(let c=0,d=l.key.length;c<d;c++){if(l.key[c]<s.key[c])return-1;if(l.key[c]>s.key[c])return 1}return 0};return e.map((l,s)=>({value:l,index:s,key:o?o(l,s):null})).sort((l,s)=>{let c=i(l,s);return c||(c=l.index-s.index),c*+n}).map(l=>l.value)},J5=function(e,t){let n=null;return e.columns.forEach(r=>{r.id===t&&(n=r)}),n},xpe=function(e,t){let n=null;for(let r=0;r<e.columns.length;r++){const a=e.columns[r];if(a.columnKey===t){n=a;break}}return n},wO=function(e,t,n){const r=(t.className||"").match(new RegExp(`${n}-table_[^\\s]+`,"gm"));return r?J5(e,r[0]):null},gr=(e,t)=>{if(!e)throw new Error("Row is required when get row identity");if(typeof t=="string"){if(!t.includes("."))return`${e[t]}`;const n=t.split(".");let r=e;for(const a of n)r=r[a];return`${r}`}else if(typeof t=="function")return t.call(null,e)},Gl=function(e,t){const n={};return(e||[]).forEach((r,a)=>{n[gr(r,t)]={row:r,index:a}}),n};function _pe(e,t){const n={};let r;for(r in e)n[r]=e[r];for(r in t)if(Mt(t,r)){const a=t[r];typeof a!="undefined"&&(n[r]=a)}return n}function gw(e){return e===""||e!==void 0&&(e=Number.parseInt(e,10),Number.isNaN(e)&&(e="")),e}function Q5(e){return e===""||e!==void 0&&(e=gw(e),Number.isNaN(e)&&(e=80)),e}function j0(e){return typeof e=="number"?e:typeof e=="string"?/^\d+(?:px)?$/.test(e)?Number.parseInt(e,10):e:null}function Epe(...e){return e.length===0?t=>t:e.length===1?e[0]:e.reduce((t,n)=>(...r)=>t(n(...r)))}function Tp(e,t,n){let r=!1;const a=e.indexOf(t),o=a!==-1,i=()=>{e.push(t),r=!0},l=()=>{e.splice(a,1),r=!0};return typeof n=="boolean"?n&&!o?i():!n&&o&&l():o?l():i(),r}function Mpe(e,t,n="children",r="hasChildren"){const a=i=>!(Array.isArray(i)&&i.length);function o(i,l,s){t(i,l,s),l.forEach(c=>{if(c[r]){t(c,null,s+1);return}const d=c[n];a(d)||o(c,d,s+1)})}e.forEach(i=>{if(i[r]){t(i,null,0);return}const l=i[n];a(l)||o(i,l,0)})}let $v;function Ipe(e,t,n,r){const{nextZIndex:a}=Pi();function o(){const f=r==="light",p=document.createElement("div");return p.className=`el-popper ${f?"is-light":"is-dark"}`,t=Ope(t),p.innerHTML=t,p.style.zIndex=String(a()),document.body.appendChild(p),p}function i(){const f=document.createElement("div");return f.className="el-popper__arrow",f}function l(){s&&s.update()}$v=function f(){try{s&&s.destroy(),c&&document.body.removeChild(c),Bn(e,"mouseenter",l),Bn(e,"mouseleave",f)}catch{}};let s=null;const c=o(),d=i();return c.appendChild(d),s=lM(e,c,Te({modifiers:[{name:"offset",options:{offset:[0,8]}},{name:"arrow",options:{element:d,padding:10}}]},n)),gn(e,"mouseenter",l),gn(e,"mouseleave",$v),s}const eI=(e,t,n,r)=>{let a=0,o=e;if(r){if(r[e].colSpan>1)return{};for(let s=0;s<e;s++)a+=r[s].colSpan;o=a+r[e].colSpan-1}else a=e;let i;const l=n.states.columns;switch(t){case"left":o<n.states.fixedLeafColumnsLength.value&&(i="left");break;case"right":a>=l.value.length-n.states.rightFixedLeafColumnsLength.value&&(i="right");break;default:o<n.states.fixedLeafColumnsLength.value?i="left":a>=l.value.length-n.states.rightFixedLeafColumnsLength.value&&(i="right")}return i?{direction:i,start:a,after:o}:{}},yw=(e,t,n,r,a)=>{const o=[],{direction:i,start:l}=eI(t,n,r,a);if(i){const s=i==="left";o.push(`${e}-fixed-column--${i}`),s&&l===r.states.fixedLeafColumnsLength.value-1?o.push("is-last-column"):!s&&l===r.states.columns.value.length-r.states.rightFixedLeafColumnsLength.value&&o.push("is-first-column")}return o};function SO(e,t){return e+(t.realWidth===null||Number.isNaN(t.realWidth)?Number(t.width):t.realWidth)}const bw=(e,t,n,r)=>{const{direction:a,start:o=0}=eI(e,t,n,r);if(!a)return;const i={},l=a==="left",s=n.states.columns.value;return l?i.left=s.slice(0,e).reduce(SO,0):i.right=s.slice(o+1).reverse().reduce(SO,0),i},ec=(e,t)=>{!e||Number.isNaN(e[t])||(e[t]=`${e[t]}px`)};function Npe(e){const t=$t(),n=H(!1),r=H([]);return{updateExpandRows:()=>{const s=e.data.value||[],c=e.rowKey.value;if(n.value)r.value=s.slice();else if(c){const d=Gl(r.value,c);r.value=s.reduce((f,p)=>{const v=gr(p,c);return d[v]&&f.push(p),f},[])}else r.value=[]},toggleRowExpansion:(s,c)=>{Tp(r.value,s,c)&&t.emit("expand-change",s,r.value.slice())},setExpandRowKeys:s=>{t.store.assertRowKey();const c=e.data.value||[],d=e.rowKey.value,f=Gl(c,d);r.value=s.reduce((p,v)=>{const m=f[v];return m&&p.push(m.row),p},[])},isRowExpanded:s=>{const c=e.rowKey.value;return c?!!Gl(r.value,c)[gr(s,c)]:r.value.includes(s)},states:{expandRows:r,defaultExpandAll:n}}}function Ape(e){const t=$t(),n=H(null),r=H(null),a=c=>{t.store.assertRowKey(),n.value=c,i(c)},o=()=>{n.value=null},i=c=>{const{data:d,rowKey:f}=e;let p=null;f.value&&(p=(A(d)||[]).find(v=>gr(v,f.value)===c)),r.value=p,t.emit("current-change",r.value,null)};return{setCurrentRowKey:a,restoreCurrentRowKey:o,setCurrentRowByKey:i,updateCurrentRow:c=>{const d=r.value;if(c&&c!==d){r.value=c,t.emit("current-change",r.value,d);return}!c&&d&&(r.value=null,t.emit("current-change",null,d))},updateCurrentRowData:()=>{const c=e.rowKey.value,d=e.data.value||[],f=r.value;if(!d.includes(f)&&f){if(c){const p=gr(f,c);i(p)}else r.value=null;r.value===null&&t.emit("current-change",null,f)}else n.value&&(i(n.value),o())},states:{_currentRowKey:n,currentRow:r}}}function Dpe(e){const t=H([]),n=H({}),r=H(16),a=H(!1),o=H({}),i=H("hasChildren"),l=H("children"),s=$t(),c=x(()=>{if(!e.rowKey.value)return{};const C=e.data.value||[];return f(C)}),d=x(()=>{const C=e.rowKey.value,S=Object.keys(o.value),w={};return S.length&&S.forEach(k=>{if(o.value[k].length){const $={children:[]};o.value[k].forEach(O=>{const T=gr(O,C);$.children.push(T),O[i.value]&&!w[T]&&(w[T]={children:[]})}),w[k]=$}}),w}),f=C=>{const S=e.rowKey.value,w={};return Mpe(C,(k,$,O)=>{const T=gr(k,S);Array.isArray($)?w[T]={children:$.map(_=>gr(_,S)),level:O}:a.value&&(w[T]={children:[],lazy:!0,level:O})},l.value,i.value),w},p=(C=!1,S=(w=>(w=s.store)==null?void 0:w.states.defaultExpandAll.value)())=>{var w;const k=c.value,$=d.value,O=Object.keys(k),T={};if(O.length){const _=A(n),I=[],L=(F,N)=>{if(C)return t.value?S||t.value.includes(N):!!(S||(F==null?void 0:F.expanded));{const D=S||t.value&&t.value.includes(N);return!!((F==null?void 0:F.expanded)||D)}};O.forEach(F=>{const N=_[F],D=Te({},k[F]);if(D.expanded=L(N,F),D.lazy){const{loaded:z=!1,loading:B=!1}=N||{};D.loaded=!!z,D.loading=!!B,I.push(F)}T[F]=D});const j=Object.keys($);a.value&&j.length&&I.length&&j.forEach(F=>{const N=_[F],D=$[F].children;if(I.includes(F)){if(T[F].children.length!==0)throw new Error("[ElTable]children must be an empty array.");T[F].children=D}else{const{loaded:z=!1,loading:B=!1}=N||{};T[F]={lazy:!0,loaded:!!z,loading:!!B,expanded:L(N,F),children:D,level:""}}})}n.value=T,(w=s.store)==null||w.updateTableScrollY()};ce(()=>t.value,()=>{p(!0)}),ce(()=>c.value,()=>{p()}),ce(()=>d.value,()=>{p()});const v=C=>{t.value=C,p()},m=(C,S)=>{s.store.assertRowKey();const w=e.rowKey.value,k=gr(C,w),$=k&&n.value[k];if(k&&$&&"expanded"in $){const O=$.expanded;S=typeof S=="undefined"?!$.expanded:S,n.value[k].expanded=S,O!==S&&s.emit("expand-change",C,S),s.store.updateTableScrollY()}},y=C=>{s.store.assertRowKey();const S=e.rowKey.value,w=gr(C,S),k=n.value[w];a.value&&k&&"loaded"in k&&!k.loaded?b(C,w,k):m(C,void 0)},b=(C,S,w)=>{const{load:k}=s.props;k&&!n.value[S].loaded&&(n.value[S].loading=!0,k(C,w,$=>{if(!Array.isArray($))throw new TypeError("[ElTable] data must be an array");n.value[S].loading=!1,n.value[S].loaded=!0,n.value[S].expanded=!0,$.length&&(o.value[S]=$),s.emit("expand-change",C,!0)}))};return{loadData:b,loadOrToggle:y,toggleTreeExpansion:m,updateTreeExpandKeys:v,updateTreeData:p,normalize:f,states:{expandRowKeys:t,treeData:n,indent:r,lazy:a,lazyTreeNodeMap:o,lazyColumnIdentifier:i,childrenColumnName:l}}}const Rpe=(e,t)=>{const n=t.sortingColumn;return!n||typeof n.sortable=="string"?e:Tpe(e,t.sortProp,t.sortOrder,n.sortMethod,n.sortBy)},xp=e=>{const t=[];return e.forEach(n=>{n.children?t.push.apply(t,xp(n.children)):t.push(n)}),t};function Lpe(){var e;const t=$t(),{size:n}=or((e=t.proxy)==null?void 0:e.$props),r=H(null),a=H([]),o=H([]),i=H(!1),l=H([]),s=H([]),c=H([]),d=H([]),f=H([]),p=H([]),v=H([]),m=H([]),y=H(0),b=H(0),C=H(0),S=H(!1),w=H([]),k=H(!1),$=H(!1),O=H(null),T=H({}),_=H(null),I=H(null),L=H(null),j=H(null),F=H(null);ce(a,()=>t.state&&z(!1),{deep:!0});const N=()=>{if(!r.value)throw new Error("[ElTable] prop row-key is required")},D=()=>{d.value=l.value.filter(Ve=>Ve.fixed===!0||Ve.fixed==="left"),f.value=l.value.filter(Ve=>Ve.fixed==="right"),d.value.length>0&&l.value[0]&&l.value[0].type==="selection"&&!l.value[0].fixed&&(l.value[0].fixed=!0,d.value.unshift(l.value[0]));const Ye=l.value.filter(Ve=>!Ve.fixed);s.value=[].concat(d.value).concat(Ye).concat(f.value);const he=xp(Ye),_e=xp(d.value),$e=xp(f.value);y.value=he.length,b.value=_e.length,C.value=$e.length,c.value=[].concat(_e).concat(he).concat($e),i.value=d.value.length>0||f.value.length>0},z=(Ye,he=!1)=>{Ye&&D(),he?t.state.doLayout():t.state.debouncedUpdateLayout()},B=Ye=>w.value.includes(Ye),M=()=>{S.value=!1,w.value.length&&(w.value=[],t.emit("selection-change",[]))},E=()=>{let Ye;if(r.value){Ye=[];const he=Gl(w.value,r.value),_e=Gl(a.value,r.value);for(const $e in he)Mt(he,$e)&&!_e[$e]&&Ye.push(he[$e].row)}else Ye=w.value.filter(he=>!a.value.includes(he));if(Ye.length){const he=w.value.filter(_e=>!Ye.includes(_e));w.value=he,t.emit("selection-change",he.slice())}},K=()=>(w.value||[]).slice(),W=(Ye,he=void 0,_e=!0)=>{if(Tp(w.value,Ye,he)){const Ve=(w.value||[]).slice();_e&&t.emit("select",Ve,Ye),t.emit("selection-change",Ve)}},Y=()=>{var Ye,he;const _e=$.value?!S.value:!(S.value||w.value.length);S.value=_e;let $e=!1,Ve=0;const st=(he=(Ye=t==null?void 0:t.store)==null?void 0:Ye.states)==null?void 0:he.rowKey.value;a.value.forEach((Dt,Ut)=>{const Ft=Ut+Ve;O.value?O.value.call(null,Dt,Ft)&&Tp(w.value,Dt,_e)&&($e=!0):Tp(w.value,Dt,_e)&&($e=!0),Ve+=ne(gr(Dt,st))}),$e&&t.emit("selection-change",w.value?w.value.slice():[]),t.emit("select-all",w.value)},q=()=>{const Ye=Gl(w.value,r.value);a.value.forEach(he=>{const _e=gr(he,r.value),$e=Ye[_e];$e&&(w.value[$e.index]=he)})},J=()=>{var Ye,he,_e;if(((Ye=a.value)==null?void 0:Ye.length)===0){S.value=!1;return}let $e;r.value&&($e=Gl(w.value,r.value));const Ve=function(Ft){return $e?!!$e[gr(Ft,r.value)]:w.value.includes(Ft)};let st=!0,Dt=0,Ut=0;for(let Ft=0,Jt=(a.value||[]).length;Ft<Jt;Ft++){const Mn=(_e=(he=t==null?void 0:t.store)==null?void 0:he.states)==null?void 0:_e.rowKey.value,tr=Ft+Ut,nr=a.value[Ft],Va=O.value&&O.value.call(null,nr,tr);if(Ve(nr))Dt++;else if(!O.value||Va){st=!1;break}Ut+=ne(gr(nr,Mn))}Dt===0&&(st=!1),S.value=st},ne=Ye=>{var he;if(!t||!t.store)return 0;const{treeData:_e}=t.store.states;let $e=0;const Ve=(he=_e.value[Ye])==null?void 0:he.children;return Ve&&($e+=Ve.length,Ve.forEach(st=>{$e+=ne(st)})),$e},oe=(Ye,he)=>{Array.isArray(Ye)||(Ye=[Ye]);const _e={};return Ye.forEach($e=>{T.value[$e.id]=he,_e[$e.columnKey||$e.id]=he}),_e},Q=(Ye,he,_e)=>{I.value&&I.value!==Ye&&(I.value.order=null),I.value=Ye,L.value=he,j.value=_e},ae=()=>{let Ye=A(o);Object.keys(T.value).forEach(he=>{const _e=T.value[he];if(!_e||_e.length===0)return;const $e=J5({columns:c.value},he);$e&&$e.filterMethod&&(Ye=Ye.filter(Ve=>_e.some(st=>$e.filterMethod.call(null,st,Ve,$e))))}),_.value=Ye},de=()=>{a.value=Rpe(_.value,{sortingColumn:I.value,sortProp:L.value,sortOrder:j.value})},be=(Ye=void 0)=>{Ye&&Ye.filter||ae(),de()},Ee=Ye=>{const{tableHeaderRef:he}=t.refs;if(!he)return;const _e=Object.assign({},he.filterPanels),$e=Object.keys(_e);if(!!$e.length)if(typeof Ye=="string"&&(Ye=[Ye]),Array.isArray(Ye)){const Ve=Ye.map(st=>xpe({columns:c.value},st));$e.forEach(st=>{const Dt=Ve.find(Ut=>Ut.id===st);Dt&&(Dt.filteredValue=[])}),t.store.commit("filterChange",{column:Ve,values:[],silent:!0,multi:!0})}else $e.forEach(Ve=>{const st=c.value.find(Dt=>Dt.id===Ve);st&&(st.filteredValue=[])}),T.value={},t.store.commit("filterChange",{column:{},values:[],silent:!0})},Pe=()=>{!I.value||(Q(null,null,null),t.store.commit("changeSortCondition",{silent:!0}))},{setExpandRowKeys:Be,toggleRowExpansion:te,updateExpandRows:ie,states:ge,isRowExpanded:ke}=Npe({data:a,rowKey:r}),{updateTreeExpandKeys:xe,toggleTreeExpansion:Ie,updateTreeData:ye,loadOrToggle:pe,states:ue}=Dpe({data:a,rowKey:r}),{updateCurrentRowData:Ce,updateCurrentRow:je,setCurrentRowKey:ee,states:me}=Ape({data:a,rowKey:r});return{assertRowKey:N,updateColumns:D,scheduleLayout:z,isSelected:B,clearSelection:M,cleanSelection:E,getSelectionRows:K,toggleRowSelection:W,_toggleAllSelection:Y,toggleAllSelection:null,updateSelectionByRowKey:q,updateAllSelected:J,updateFilters:oe,updateCurrentRow:je,updateSort:Q,execFilter:ae,execSort:de,execQuery:be,clearFilter:Ee,clearSort:Pe,toggleRowExpansion:te,setExpandRowKeysAdapter:Ye=>{Be(Ye),xe(Ye)},setCurrentRowKey:ee,toggleRowExpansionAdapter:(Ye,he)=>{c.value.some(({type:$e})=>$e==="expand")?te(Ye,he):Ie(Ye,he)},isRowExpanded:ke,updateExpandRows:ie,updateCurrentRowData:Ce,loadOrToggle:pe,updateTreeData:ye,states:Te(Te(Te({tableSize:n,rowKey:r,data:a,_data:o,isComplex:i,_columns:l,originColumns:s,columns:c,fixedColumns:d,rightFixedColumns:f,leafColumns:p,fixedLeafColumns:v,rightFixedLeafColumns:m,leafColumnsLength:y,fixedLeafColumnsLength:b,rightFixedLeafColumnsLength:C,isAllSelected:S,selection:w,reserveSelection:k,selectOnIndeterminate:$,selectable:O,filters:T,filteredData:_,sortingColumn:I,sortProp:L,sortOrder:j,hoverRow:F},ge),ue),me)}}function K0(e,t){return e.map(n=>{var r;return n.id===t.id?t:((r=n.children)!=null&&r.length&&(n.children=K0(n.children,t)),n)})}function tI(e){e.forEach(t=>{var n,r;t.no=(n=t.getColumnIndex)==null?void 0:n.call(t),(r=t.children)!=null&&r.length&&tI(t.children)}),e.sort((t,n)=>t.no-n.no)}function Fpe(){const e=$t(),t=Lpe(),n=De("table"),r={setData(i,l){const s=A(i._data)!==l;i.data.value=l,i._data.value=l,e.store.execQuery(),e.store.updateCurrentRowData(),e.store.updateExpandRows(),e.store.updateTreeData(e.store.states.defaultExpandAll.value),A(i.reserveSelection)?(e.store.assertRowKey(),e.store.updateSelectionByRowKey()):s?e.store.clearSelection():e.store.cleanSelection(),e.store.updateAllSelected(),e.$ready&&e.store.scheduleLayout()},insertColumn(i,l,s){const c=A(i._columns);let d=[];s?(s&&!s.children&&(s.children=[]),s.children.push(l),d=K0(c,s)):(c.push(l),d=c),tI(d),i._columns.value=d,l.type==="selection"&&(i.selectable.value=l.selectable,i.reserveSelection.value=l.reserveSelection),e.$ready&&(e.store.updateColumns(),e.store.scheduleLayout())},removeColumn(i,l,s){const c=A(i._columns)||[];if(s)s.children.splice(s.children.findIndex(d=>d.id===l.id),1),s.children.length===0&&delete s.children,i._columns.value=K0(c,s);else{const d=c.indexOf(l);d>-1&&(c.splice(d,1),i._columns.value=c)}e.$ready&&(e.store.updateColumns(),e.store.scheduleLayout())},sort(i,l){const{prop:s,order:c,init:d}=l;if(s){const f=A(i.columns).find(p=>p.property===s);f&&(f.order=c,e.store.updateSort(f,s,c),e.store.commit("changeSortCondition",{init:d}))}},changeSortCondition(i,l){const{sortingColumn:s,sortProp:c,sortOrder:d}=i;A(d)===null&&(i.sortingColumn.value=null,i.sortProp.value=null);const f={filter:!0};e.store.execQuery(f),(!l||!(l.silent||l.init))&&e.emit("sort-change",{column:A(s),prop:A(c),order:A(d)}),e.store.updateTableScrollY()},filterChange(i,l){const{column:s,values:c,silent:d}=l,f=e.store.updateFilters(s,c);e.store.execQuery(),d||e.emit("filter-change",f),e.store.updateTableScrollY()},toggleAllSelection(){e.store.toggleAllSelection()},rowSelectedChanged(i,l){e.store.toggleRowSelection(l),e.store.updateAllSelected()},setHoverRow(i,l){i.hoverRow.value=l},setCurrentRow(i,l){e.store.updateCurrentRow(l)}},a=function(i,...l){const s=e.store.mutations;if(s[i])s[i].apply(e,[e.store.states].concat(l));else throw new Error(`Action not found: ${i}`)},o=function(){Ne(()=>e.layout.updateScrollY.apply(e.layout))};return Ke(Te({ns:n},t),{mutations:r,commit:a,updateTableScrollY:o})}const wd={rowKey:"rowKey",defaultExpandAll:"defaultExpandAll",selectOnIndeterminate:"selectOnIndeterminate",indent:"indent",lazy:"lazy",data:"data",["treeProps.hasChildren"]:{key:"lazyColumnIdentifier",default:"hasChildren"},["treeProps.children"]:{key:"childrenColumnName",default:"children"}};function Bpe(e,t){if(!e)throw new Error("Table is required.");const n=Fpe();return n.toggleAllSelection=Yn(n._toggleAllSelection,10),Object.keys(wd).forEach(r=>{nI(rI(t,r),r,n)}),Vpe(n,t),n}function Vpe(e,t){Object.keys(wd).forEach(n=>{ce(()=>rI(t,n),r=>{nI(r,n,e)})})}function nI(e,t,n){let r=e,a=wd[t];typeof wd[t]=="object"&&(a=a.key,r=r||wd[t].default),n.states[a].value=r}function rI(e,t){if(t.includes(".")){const n=t.split(".");let r=e;return n.forEach(a=>{r=r[a]}),r}else return e[t]}class zpe{constructor(t){this.observers=[],this.table=null,this.store=null,this.columns=[],this.fit=!0,this.showHeader=!0,this.height=H(null),this.scrollX=H(!1),this.scrollY=H(!1),this.bodyWidth=H(null),this.fixedWidth=H(null),this.rightFixedWidth=H(null),this.tableHeight=H(null),this.headerHeight=H(44),this.appendHeight=H(0),this.footerHeight=H(44),this.viewportHeight=H(null),this.bodyHeight=H(null),this.bodyScrollHeight=H(0),this.fixedBodyHeight=H(null),this.gutterWidth=0;for(const n in t)Mt(t,n)&&(_n(this[n])?this[n].value=t[n]:this[n]=t[n]);if(!this.table)throw new Error("Table is required for Table Layout");if(!this.store)throw new Error("Store is required for Table Layout")}updateScrollY(){if(this.height.value===null)return!1;const n=this.table.refs.bodyWrapper;if(this.table.vnode.el&&n){let r=!0;const a=this.scrollY.value;return this.bodyHeight.value===null?r=!1:r=n.scrollHeight>this.bodyHeight.value,this.scrollY.value=r,a!==r}return!1}setHeight(t,n="height"){if(!Bt)return;const r=this.table.vnode.el;if(t=j0(t),this.height.value=Number(t),!r&&(t||t===0))return Ne(()=>this.setHeight(t,n));typeof t=="number"?(r.style[n]=`${t}px`,this.updateElsHeight()):typeof t=="string"&&(r.style[n]=t,this.updateElsHeight())}setMaxHeight(t){this.setHeight(t,"max-height")}getFlattenColumns(){const t=[];return this.table.store.states.columns.value.forEach(r=>{r.isColumnGroup?t.push.apply(t,r.columns):t.push(r)}),t}updateElsHeight(){var t,n;if(!this.table.$ready)return Ne(()=>this.updateElsHeight());const{tableWrapper:r,headerWrapper:a,appendWrapper:o,footerWrapper:i,tableHeader:l,tableBody:s}=this.table.refs;if(r&&r.style.display==="none")return;const{tableLayout:c}=this.table.props;if(this.appendHeight.value=o?o.offsetHeight:0,this.showHeader&&!a&&c==="fixed")return;const d=l||null,f=this.headerDisplayNone(d),p=(a==null?void 0:a.offsetHeight)||0,v=this.headerHeight.value=this.showHeader?p:0;if(this.showHeader&&!f&&p>0&&(this.table.store.states.columns.value||[]).length>0&&v<2)return Ne(()=>this.updateElsHeight());const m=this.tableHeight.value=(n=(t=this.table)==null?void 0:t.vnode.el)==null?void 0:n.clientHeight,y=this.footerHeight.value=i?i.offsetHeight:0;this.height.value!==null&&(this.bodyHeight.value===null&&requestAnimationFrame(()=>this.updateElsHeight()),this.bodyHeight.value=m-v-y+(i?1:0),this.bodyScrollHeight.value=s==null?void 0:s.scrollHeight),this.fixedBodyHeight.value=this.scrollX.value?this.bodyHeight.value-this.gutterWidth:this.bodyHeight.value,this.viewportHeight.value=this.scrollX.value?m-this.gutterWidth:m,this.updateScrollY(),this.notifyObservers("scrollable")}headerDisplayNone(t){if(!t)return!0;let n=t;for(;n.tagName!=="DIV";){if(getComputedStyle(n).display==="none")return!0;n=n.parentElement}return!1}updateColumnsWidth(){if(!Bt)return;const t=this.fit,n=this.table.vnode.el.clientWidth;let r=0;const a=this.getFlattenColumns(),o=a.filter(s=>typeof s.width!="number");if(a.forEach(s=>{typeof s.width=="number"&&s.realWidth&&(s.realWidth=null)}),o.length>0&&t){if(a.forEach(s=>{r+=Number(s.width||s.minWidth||80)}),r<=n){this.scrollX.value=!1;const s=n-r;if(o.length===1)o[0].realWidth=Number(o[0].minWidth||80)+s;else{const c=o.reduce((p,v)=>p+Number(v.minWidth||80),0),d=s/c;let f=0;o.forEach((p,v)=>{if(v===0)return;const m=Math.floor(Number(p.minWidth||80)*d);f+=m,p.realWidth=Number(p.minWidth||80)+m}),o[0].realWidth=Number(o[0].minWidth||80)+s-f}}else this.scrollX.value=!0,o.forEach(s=>{s.realWidth=Number(s.minWidth)});this.bodyWidth.value=Math.max(r,n),this.table.state.resizeState.value.width=this.bodyWidth.value}else a.forEach(s=>{!s.width&&!s.minWidth?s.realWidth=80:s.realWidth=Number(s.width||s.minWidth),r+=s.realWidth}),this.scrollX.value=r>n,this.bodyWidth.value=r;const i=this.store.states.fixedColumns.value;if(i.length>0){let s=0;i.forEach(c=>{s+=Number(c.realWidth||c.width)}),this.fixedWidth.value=s}const l=this.store.states.rightFixedColumns.value;if(l.length>0){let s=0;l.forEach(c=>{s+=Number(c.realWidth||c.width)}),this.rightFixedWidth.value=s}this.notifyObservers("columns")}addObserver(t){this.observers.push(t)}removeObserver(t){const n=this.observers.indexOf(t);n!==-1&&this.observers.splice(n,1)}notifyObservers(t){this.observers.forEach(r=>{var a,o;switch(t){case"columns":(a=r.state)==null||a.onColumnsChange(this);break;case"scrollable":(o=r.state)==null||o.onScrollableChange(this);break;default:throw new Error(`Table Layout don't have event ${t}.`)}})}}const{CheckboxGroup:Hpe}=io,jpe=G({name:"ElTableFilterPanel",components:{ElCheckbox:io,ElCheckboxGroup:Hpe,ElScrollbar:xi,ElTooltip:Ur,ElIcon:ft,ArrowDown:_s,ArrowUp:Of},directives:{ClickOutside:Is},props:{placement:{type:String,default:"bottom-start"},store:{type:Object},column:{type:Object},upDataColumn:{type:Function}},setup(e){const t=$t(),{t:n}=ln(),r=De("table-filter"),a=t==null?void 0:t.parent;a.filterPanels.value[e.column.id]||(a.filterPanels.value[e.column.id]=t);const o=H(!1),i=H(null),l=x(()=>e.column&&e.column.filters),s=x({get:()=>{var k;return(((k=e.column)==null?void 0:k.filteredValue)||[])[0]},set:k=>{c.value&&(typeof k!="undefined"&&k!==null?c.value.splice(0,1,k):c.value.splice(0,1))}}),c=x({get(){return e.column?e.column.filteredValue||[]:[]},set(k){e.column&&e.upDataColumn("filteredValue",k)}}),d=x(()=>e.column?e.column.filterMultiple:!0),f=k=>k.value===s.value,p=()=>{o.value=!1},v=k=>{k.stopPropagation(),o.value=!o.value},m=()=>{o.value=!1},y=()=>{S(c.value),p()},b=()=>{c.value=[],S(c.value),p()},C=k=>{s.value=k,S(typeof k!="undefined"&&k!==null?c.value:[]),p()},S=k=>{e.store.commit("filterChange",{column:e.column,values:k}),e.store.updateAllSelected()};ce(o,k=>{e.column&&e.upDataColumn("filterOpened",k)},{immediate:!0});const w=x(()=>{var k,$;return($=(k=i.value)==null?void 0:k.popperRef)==null?void 0:$.contentRef});return{tooltipVisible:o,multiple:d,filteredValue:c,filterValue:s,filters:l,handleConfirm:y,handleReset:b,handleSelect:C,isActive:f,t:n,ns:r,showFilterPanel:v,hideFilterPanel:m,popperPaneRef:w,tooltip:i}}}),Kpe={key:0},Wpe=["disabled"],Upe=["label","onClick"];function Ype(e,t,n,r,a,o){const i=we("el-checkbox"),l=we("el-checkbox-group"),s=we("el-scrollbar"),c=we("arrow-up"),d=we("arrow-down"),f=we("el-icon"),p=we("el-tooltip"),v=pa("click-outside");return R(),fe(p,{ref:"tooltip",visible:e.tooltipVisible,"onUpdate:visible":t[5]||(t[5]=m=>e.tooltipVisible=m),offset:0,placement:e.placement,"show-arrow":!1,"stop-popper-mouse-event":!1,"append-to-body":"",effect:"light",pure:"","popper-class":e.ns.b(),persistent:""},{content:re(()=>[e.multiple?(R(),X("div",Kpe,[Z("div",{class:U(e.ns.e("content"))},[g(s,{"wrap-class":e.ns.e("wrap")},{default:re(()=>[g(l,{modelValue:e.filteredValue,"onUpdate:modelValue":t[0]||(t[0]=m=>e.filteredValue=m),class:U(e.ns.e("checkbox-group"))},{default:re(()=>[(R(!0),X(Fe,null,Rt(e.filters,m=>(R(),fe(i,{key:m.value,label:m.value},{default:re(()=>[yt(Me(m.text),1)]),_:2},1032,["label"]))),128))]),_:1},8,["modelValue","class"])]),_:1},8,["wrap-class"])],2),Z("div",{class:U(e.ns.e("bottom"))},[Z("button",{class:U({[e.ns.is("disabled")]:e.filteredValue.length===0}),disabled:e.filteredValue.length===0,type:"button",onClick:t[1]||(t[1]=(...m)=>e.handleConfirm&&e.handleConfirm(...m))},Me(e.t("el.table.confirmFilter")),11,Wpe),Z("button",{type:"button",onClick:t[2]||(t[2]=(...m)=>e.handleReset&&e.handleReset(...m))},Me(e.t("el.table.resetFilter")),1)],2)])):(R(),X("ul",{key:1,class:U(e.ns.e("list"))},[Z("li",{class:U([e.ns.e("list-item"),{[e.ns.is("active")]:e.filterValue===void 0||e.filterValue===null}]),onClick:t[3]||(t[3]=m=>e.handleSelect(null))},Me(e.t("el.table.clearFilter")),3),(R(!0),X(Fe,null,Rt(e.filters,m=>(R(),X("li",{key:m.value,class:U([e.ns.e("list-item"),e.ns.is("active",e.isActive(m))]),label:m.value,onClick:y=>e.handleSelect(m.value)},Me(m.text),11,Upe))),128))],2))]),default:re(()=>[at((R(),X("span",{class:U([`${e.ns.namespace.value}-table__column-filter-trigger`,`${e.ns.namespace.value}-none-outline`]),onClick:t[4]||(t[4]=(...m)=>e.showFilterPanel&&e.showFilterPanel(...m))},[g(f,null,{default:re(()=>[e.column.filterOpened?(R(),fe(c,{key:0})):(R(),fe(d,{key:1}))]),_:1})],2)),[[v,e.hideFilterPanel,e.popperPaneRef]])]),_:1},8,["visible","placement","popper-class"])}var qpe=Ae(jpe,[["render",Ype],["__file","/home/runner/work/element-plus/element-plus/packages/components/table/src/filter-panel.vue"]]);function aI(e){const t=$t();hc(()=>{n.value.addObserver(t)}),et(()=>{r(n.value),a(n.value)}),ur(()=>{r(n.value),a(n.value)}),Wr(()=>{n.value.removeObserver(t)});const n=x(()=>{const o=e.layout;if(!o)throw new Error("Can not find table layout.");return o}),r=o=>{var i;const l=((i=e.vnode.el)==null?void 0:i.querySelectorAll("colgroup > col"))||[];if(!l.length)return;const s=o.getFlattenColumns(),c={};s.forEach(d=>{c[d.id]=d});for(let d=0,f=l.length;d<f;d++){const p=l[d],v=p.getAttribute("name"),m=c[v];m&&p.setAttribute("width",m.realWidth||m.width)}},a=o=>{var i,l;const s=((i=e.vnode.el)==null?void 0:i.querySelectorAll("colgroup > col[name=gutter]"))||[];for(let d=0,f=s.length;d<f;d++)s[d].setAttribute("width",o.scrollY.value?o.gutterWidth:"0");const c=((l=e.vnode.el)==null?void 0:l.querySelectorAll("th.gutter"))||[];for(let d=0,f=c.length;d<f;d++){const p=c[d];p.style.width=o.scrollY.value?`${o.gutterWidth}px`:"0",p.style.display=o.scrollY.value?"":"none"}};return{tableLayout:n.value,onColumnsChange:r,onScrollableChange:a}}const Vo=Symbol("ElTable");function Gpe(e,t){const n=$t(),r=ve(Vo),a=y=>{y.stopPropagation()},o=(y,b)=>{!b.filters&&b.sortable?m(y,b,!1):b.filterable&&!b.sortable&&a(y),r==null||r.emit("header-click",b,y)},i=(y,b)=>{r==null||r.emit("header-contextmenu",b,y)},l=H(null),s=H(!1),c=H({}),d=(y,b)=>{if(!!Bt&&!(b.children&&b.children.length>0)&&l.value&&e.border){s.value=!0;const C=r;t("set-drag-visible",!0);const w=(C==null?void 0:C.vnode.el).getBoundingClientRect().left,k=n.vnode.el.querySelector(`th.${b.id}`),$=k.getBoundingClientRect(),O=$.left-w+30;xo(k,"noclick"),c.value={startMouseLeft:y.clientX,startLeft:$.right-w,startColumnLeft:$.left-w,tableLeft:w};const T=C==null?void 0:C.refs.resizeProxy;T.style.left=`${c.value.startLeft}px`,document.onselectstart=function(){return!1},document.ondragstart=function(){return!1};const _=L=>{const j=L.clientX-c.value.startMouseLeft,F=c.value.startLeft+j;T.style.left=`${Math.max(O,F)}px`},I=()=>{if(s.value){const{startColumnLeft:L,startLeft:j}=c.value,N=Number.parseInt(T.style.left,10)-L;b.width=b.realWidth=N,C==null||C.emit("header-dragend",b.width,j-L,b,y),requestAnimationFrame(()=>{e.store.scheduleLayout(!1,!0)}),document.body.style.cursor="",s.value=!1,l.value=null,c.value={},t("set-drag-visible",!1)}document.removeEventListener("mousemove",_),document.removeEventListener("mouseup",I),document.onselectstart=null,document.ondragstart=null,setTimeout(()=>{Br(k,"noclick")},0)};document.addEventListener("mousemove",_),document.addEventListener("mouseup",I)}},f=(y,b)=>{if(b.children&&b.children.length>0)return;let C=y.target;for(;C&&C.tagName!=="TH";)C=C.parentNode;if(!(!b||!b.resizable)&&!s.value&&e.border){const S=C.getBoundingClientRect(),w=document.body.style;S.width>12&&S.right-y.pageX<8?(w.cursor="col-resize",to(C,"is-sortable")&&(C.style.cursor="col-resize"),l.value=b):s.value||(w.cursor="",to(C,"is-sortable")&&(C.style.cursor="pointer"),l.value=null)}},p=()=>{!Bt||(document.body.style.cursor="")},v=({order:y,sortOrders:b})=>{if(y==="")return b[0];const C=b.indexOf(y||null);return b[C>b.length-2?0:C+1]},m=(y,b,C)=>{y.stopPropagation();const S=b.order===C?null:C||v(b);let w=y.target;for(;w&&w.tagName!=="TH";)w=w.parentNode;if(w&&w.tagName==="TH"&&to(w,"noclick")){Br(w,"noclick");return}if(!b.sortable)return;const k=e.store.states;let $=k.sortProp.value,O;const T=k.sortingColumn.value;(T!==b||T===b&&T.order===null)&&(T&&(T.order=null),k.sortingColumn.value=b,$=b.property),S?O=b.order=S:O=b.order=null,k.sortProp.value=$,k.sortOrder.value=O,r==null||r.store.commit("changeSortCondition")};return{handleHeaderClick:o,handleHeaderContextMenu:i,handleMouseDown:d,handleMouseMove:f,handleMouseOut:p,handleSortClick:m,handleFilterClick:a}}function Xpe(e){const t=ve(Vo),n=De("table");return{getHeaderRowStyle:l=>{const s=t==null?void 0:t.props.headerRowStyle;return typeof s=="function"?s.call(null,{rowIndex:l}):s},getHeaderRowClass:l=>{const s=[],c=t==null?void 0:t.props.headerRowClassName;return typeof c=="string"?s.push(c):typeof c=="function"&&s.push(c.call(null,{rowIndex:l})),s.join(" ")},getHeaderCellStyle:(l,s,c,d)=>{var f;let p=(f=t==null?void 0:t.props.headerCellStyle)!=null?f:{};typeof p=="function"&&(p=p.call(null,{rowIndex:l,columnIndex:s,row:c,column:d}));const v=d.isSubColumn?null:bw(s,d.fixed,e.store,c);return ec(v,"left"),ec(v,"right"),Object.assign({},p,v)},getHeaderCellClass:(l,s,c,d)=>{const f=d.isSubColumn?[]:yw(n.b(),s,d.fixed,e.store,c),p=[d.id,d.order,d.headerAlign,d.className,d.labelClassName,...f];d.children||p.push("is-leaf"),d.sortable&&p.push("is-sortable");const v=t==null?void 0:t.props.headerCellClassName;return typeof v=="string"?p.push(v):typeof v=="function"&&p.push(v.call(null,{rowIndex:l,columnIndex:s,row:c,column:d})),p.push(n.e("cell")),p.filter(m=>Boolean(m)).join(" ")}}}const oI=e=>{const t=[];return e.forEach(n=>{n.children?(t.push(n),t.push.apply(t,oI(n.children))):t.push(n)}),t},Zpe=e=>{let t=1;const n=(o,i)=>{if(i&&(o.level=i.level+1,t<o.level&&(t=o.level)),o.children){let l=0;o.children.forEach(s=>{n(s,o),l+=s.colSpan}),o.colSpan=l}else o.colSpan=1};e.forEach(o=>{o.level=1,n(o,void 0)});const r=[];for(let o=0;o<t;o++)r.push([]);return oI(e).forEach(o=>{o.children?(o.rowSpan=1,o.children.forEach(i=>i.isSubColumn=!0)):o.rowSpan=t-o.level+1,r[o.level-1].push(o)}),r};function Jpe(e){const t=ve(Vo),n=x(()=>Zpe(e.store.states.originColumns.value));return{isGroup:x(()=>{const o=n.value.length>1;return o&&t&&(t.state.isGroup.value=!0),o}),toggleAllSelection:o=>{o.stopPropagation(),t==null||t.store.commit("toggleAllSelection")},columnRows:n}}var Qpe=G({name:"ElTableHeader",components:{ElCheckbox:io},props:{fixed:{type:String,default:""},store:{required:!0,type:Object},border:Boolean,defaultSort:{type:Object,default:()=>({prop:"",order:""})}},setup(e,{emit:t}){const n=$t(),r=ve(Vo),a=De("table"),o=H({}),{onColumnsChange:i,onScrollableChange:l}=aI(r);et(async()=>{await Ne(),await Ne();const{prop:O,order:T}=e.defaultSort;r==null||r.store.commit("sort",{prop:O,order:T,init:!0})});const{handleHeaderClick:s,handleHeaderContextMenu:c,handleMouseDown:d,handleMouseMove:f,handleMouseOut:p,handleSortClick:v,handleFilterClick:m}=Gpe(e,t),{getHeaderRowStyle:y,getHeaderRowClass:b,getHeaderCellStyle:C,getHeaderCellClass:S}=Xpe(e),{isGroup:w,toggleAllSelection:k,columnRows:$}=Jpe(e);return n.state={onColumnsChange:i,onScrollableChange:l},n.filterPanels=o,{ns:a,filterPanels:o,onColumnsChange:i,onScrollableChange:l,columnRows:$,getHeaderRowClass:b,getHeaderRowStyle:y,getHeaderCellClass:S,getHeaderCellStyle:C,handleHeaderClick:s,handleHeaderContextMenu:c,handleMouseDown:d,handleMouseMove:f,handleMouseOut:p,handleSortClick:v,handleFilterClick:m,isGroup:w,toggleAllSelection:k}},render(){const{ns:e,isGroup:t,columnRows:n,getHeaderCellStyle:r,getHeaderCellClass:a,getHeaderRowClass:o,getHeaderRowStyle:i,handleHeaderClick:l,handleHeaderContextMenu:s,handleMouseDown:c,handleMouseMove:d,handleSortClick:f,handleMouseOut:p,store:v,$parent:m}=this;let y=1;return qe("thead",{class:{[e.is("group")]:t}},n.map((b,C)=>qe("tr",{class:o(C),key:C,style:i(C)},b.map((S,w)=>(S.rowSpan>y&&(y=S.rowSpan),qe("th",{class:a(C,w,b,S),colspan:S.colSpan,key:`${S.id}-thead`,rowspan:S.rowSpan,style:r(C,w,b,S),onClick:k=>l(k,S),onContextmenu:k=>s(k,S),onMousedown:k=>c(k,S),onMousemove:k=>d(k,S),onMouseout:p},[qe("div",{class:["cell",S.filteredValue&&S.filteredValue.length>0?"highlight":"",S.labelClassName]},[S.renderHeader?S.renderHeader({column:S,$index:w,store:v,_self:m}):S.label,S.sortable&&qe("span",{onClick:k=>f(k,S),class:"caret-wrapper"},[qe("i",{onClick:k=>f(k,S,"ascending"),class:"sort-caret ascending"}),qe("i",{onClick:k=>f(k,S,"descending"),class:"sort-caret descending"})]),S.filterable&&qe(qpe,{store:v,placement:S.filterPlacement||"bottom-start",column:S,upDataColumn:(k,$)=>{S[k]=$}})])]))))))}});function eve(e){const t=ve(Vo),n=H(""),r=H(qe("div")),a=(p,v,m)=>{var y;const b=t,C=$y(p);let S;const w=(y=b==null?void 0:b.vnode.el)==null?void 0:y.dataset.prefix;C&&(S=wO({columns:e.store.states.columns.value},C,w),S&&(b==null||b.emit(`cell-${m}`,v,S,C,p))),b==null||b.emit(`row-${m}`,v,S,p)},o=(p,v)=>{a(p,v,"dblclick")},i=(p,v)=>{e.store.commit("setCurrentRow",v),a(p,v,"click")},l=(p,v)=>{a(p,v,"contextmenu")},s=Yn(p=>{e.store.commit("setHoverRow",p)},30),c=Yn(()=>{e.store.commit("setHoverRow",null)},30);return{handleDoubleClick:o,handleClick:i,handleContextMenu:l,handleMouseEnter:s,handleMouseLeave:c,handleCellMouseEnter:(p,v)=>{var m;const y=t,b=$y(p),C=(m=y==null?void 0:y.vnode.el)==null?void 0:m.dataset.prefix;if(b){const O=wO({columns:e.store.states.columns.value},b,C),T=y.hoverState={cell:b,column:O,row:v};y==null||y.emit("cell-mouse-enter",T.row,T.column,T.cell,p)}const S=p.target.querySelector(".cell");if(!(to(S,`${C}-tooltip`)&&S.childNodes.length))return;const w=document.createRange();w.setStart(S,0),w.setEnd(S,S.childNodes.length);const k=w.getBoundingClientRect().width,$=(Number.parseInt($o(S,"paddingLeft"),10)||0)+(Number.parseInt($o(S,"paddingRight"),10)||0);(k+$>S.offsetWidth||S.scrollWidth>S.offsetWidth)&&Ipe(b,b.innerText||b.textContent,{placement:"top",strategy:"fixed"},v.tooltipEffect)},handleCellMouseLeave:p=>{if(!$y(p))return;const m=t==null?void 0:t.hoverState;t==null||t.emit("cell-mouse-leave",m==null?void 0:m.row,m==null?void 0:m.column,m==null?void 0:m.cell,p)},tooltipContent:n,tooltipTrigger:r}}function tve(e){const t=ve(Vo),n=De("table");return{getRowStyle:(c,d)=>{const f=t==null?void 0:t.props.rowStyle;return typeof f=="function"?f.call(null,{row:c,rowIndex:d}):f||null},getRowClass:(c,d)=>{const f=[n.e("row")];(t==null?void 0:t.props.highlightCurrentRow)&&c===e.store.states.currentRow.value&&f.push("current-row"),e.stripe&&d%2===1&&f.push(n.em("row","striped"));const p=t==null?void 0:t.props.rowClassName;return typeof p=="string"?f.push(p):typeof p=="function"&&f.push(p.call(null,{row:c,rowIndex:d})),f},getCellStyle:(c,d,f,p)=>{const v=t==null?void 0:t.props.cellStyle;let m=v!=null?v:{};typeof v=="function"&&(m=v.call(null,{rowIndex:c,columnIndex:d,row:f,column:p}));const y=p.isSubColumn?null:bw(d,e==null?void 0:e.fixed,e.store);return ec(y,"left"),ec(y,"right"),Object.assign({},m,y)},getCellClass:(c,d,f,p)=>{const v=p.isSubColumn?[]:yw(n.b(),d,e==null?void 0:e.fixed,e.store),m=[p.id,p.align,p.className,...v],y=t==null?void 0:t.props.cellClassName;return typeof y=="string"?m.push(y):typeof y=="function"&&m.push(y.call(null,{rowIndex:c,columnIndex:d,row:f,column:p})),m.push(n.e("cell")),m.filter(b=>Boolean(b)).join(" ")},getSpan:(c,d,f,p)=>{let v=1,m=1;const y=t==null?void 0:t.props.spanMethod;if(typeof y=="function"){const b=y({row:c,column:d,rowIndex:f,columnIndex:p});Array.isArray(b)?(v=b[0],m=b[1]):typeof b=="object"&&(v=b.rowspan,m=b.colspan)}return{rowspan:v,colspan:m}},getColspanRealWidth:(c,d,f)=>{if(d<1)return c[f].realWidth;const p=c.map(({realWidth:v,width:m})=>v||m).slice(f,f+d);return Number(p.reduce((v,m)=>Number(v)+Number(m),-1))}}}function nve(e){const t=ve(Vo),{handleDoubleClick:n,handleClick:r,handleContextMenu:a,handleMouseEnter:o,handleMouseLeave:i,handleCellMouseEnter:l,handleCellMouseLeave:s,tooltipContent:c,tooltipTrigger:d}=eve(e),{getRowStyle:f,getRowClass:p,getCellStyle:v,getCellClass:m,getSpan:y,getColspanRealWidth:b}=tve(e),C=x(()=>e.store.states.columns.value.findIndex(({type:O})=>O==="default")),S=(O,T)=>{const _=t.props.rowKey;return _?gr(O,_):T},w=(O,T,_,I=!1)=>{const{tooltipEffect:L,store:j}=e,{indent:F,columns:N}=j.states,D=p(O,T);let z=!0;return _&&(D.push(`el-table__row--level-${_.level}`),z=_.display),qe("tr",{style:[z?null:{display:"none"},f(O,T)],class:D,key:S(O,T),onDblclick:M=>n(M,O),onClick:M=>r(M,O),onContextmenu:M=>a(M,O),onMouseenter:()=>o(T),onMouseleave:i},N.value.map((M,E)=>{const{rowspan:K,colspan:W}=y(O,M,T,E);if(!K||!W)return null;const Y=Te({},M);Y.realWidth=b(N.value,W,E);const q={store:e.store,_self:e.context||t,column:Y,row:O,$index:T,cellIndex:E,expanded:I};E===C.value&&_&&(q.treeNode={indent:_.level*F.value,level:_.level},typeof _.expanded=="boolean"&&(q.treeNode.expanded=_.expanded,"loading"in _&&(q.treeNode.loading=_.loading),"noLazyChildren"in _&&(q.treeNode.noLazyChildren=_.noLazyChildren)));const J=`${T},${E}`,ne=Y.columnKey||Y.rawColumnKey||"",oe=k(E,M,q);return qe("td",{style:v(T,E,O,M),class:m(T,E,O,M),key:`${ne}${J}`,rowspan:K,colspan:W,onMouseenter:Q=>l(Q,Ke(Te({},O),{tooltipEffect:L})),onMouseleave:s},[oe])}))},k=(O,T,_)=>T.renderCell(_);return{wrappedRowRender:(O,T)=>{const _=e.store,{isRowExpanded:I,assertRowKey:L}=_,{treeData:j,lazyTreeNodeMap:F,childrenColumnName:N,rowKey:D}=_.states,z=_.states.columns.value;if(z.some(({type:M})=>M==="expand")){const M=I(O),E=w(O,T,void 0,M),K=t.renderExpanded;return M?K?[[E,qe("tr",{key:`expanded-row__${E.key}`},[qe("td",{colspan:z.length,class:"el-table__cell el-table__expanded-cell"},[K({row:O,$index:T,store:_,expanded:M})])])]]:(console.error("[Element Error]renderExpanded is required."),E):[[E]]}else if(Object.keys(j.value).length){L();const M=gr(O,D.value);let E=j.value[M],K=null;E&&(K={expanded:E.expanded,level:E.level,display:!0},typeof E.lazy=="boolean"&&(typeof E.loaded=="boolean"&&E.loaded&&(K.noLazyChildren=!(E.children&&E.children.length)),K.loading=E.loading));const W=[w(O,T,K)];if(E){let Y=0;const q=(ne,oe)=>{!(ne&&ne.length&&oe)||ne.forEach(Q=>{const ae={display:oe.display&&oe.expanded,level:oe.level+1,expanded:!1,noLazyChildren:!1,loading:!1},de=gr(Q,D.value);if(de==null)throw new Error("For nested data item, row-key is required.");if(E=Te({},j.value[de]),E&&(ae.expanded=E.expanded,E.level=E.level||ae.level,E.display=!!(E.expanded&&ae.display),typeof E.lazy=="boolean"&&(typeof E.loaded=="boolean"&&E.loaded&&(ae.noLazyChildren=!(E.children&&E.children.length)),ae.loading=E.loading)),Y++,W.push(w(Q,T+Y,ae)),E){const be=F.value[de]||Q[N.value];q(be,E)}})};E.display=!0;const J=F.value[M]||O[N.value];q(J,E)}return W}else return w(O,T,void 0)},tooltipContent:c,tooltipTrigger:d}}const rve={store:{required:!0,type:Object},stripe:Boolean,tooltipEffect:String,context:{default:()=>({}),type:Object},rowClassName:[String,Function],rowStyle:[Object,Function],fixed:{type:String,default:""},highlight:Boolean};var ave=G({name:"ElTableBody",props:rve,setup(e){const t=$t(),n=ve(Vo),r=De("table"),{wrappedRowRender:a,tooltipContent:o,tooltipTrigger:i}=nve(e),{onColumnsChange:l,onScrollableChange:s}=aI(n);return ce(e.store.states.hoverRow,(c,d)=>{if(!e.store.states.isComplex.value||!Bt)return;let f=window.requestAnimationFrame;f||(f=p=>window.setTimeout(p,16)),f(()=>{var p;const v=(p=t==null?void 0:t.vnode.el)==null?void 0:p.querySelectorAll(`.${r.e("row")}`),m=v[d],y=v[c];m&&Br(m,"hover-row"),y&&xo(y,"hover-row")})}),Wr(()=>{var c;(c=$v)==null||c()}),ur(()=>{var c;(c=$v)==null||c()}),{ns:r,onColumnsChange:l,onScrollableChange:s,wrappedRowRender:a,tooltipContent:o,tooltipTrigger:i}},render(){const{wrappedRowRender:e,store:t}=this,n=t.states.data.value||[];return qe("tbody",{},[n.reduce((r,a)=>r.concat(e(a,r.length)),[])])}});function Cw(e){const t=e.tableLayout==="auto";let n=e.columns||[];t&&n.every(a=>a.width===void 0)&&(n=[]);const r=a=>{const o={key:`${e.tableLayout}_${a.id}`,style:{},name:void 0};return t?o.style={width:`${a.width}px`}:o.name=a.id,o};return qe("colgroup",{},n.map(a=>qe("col",r(a))))}Cw.props=["columns","tableLayout"];function ove(){const e=ve(Vo),t=e==null?void 0:e.store,n=x(()=>t.states.fixedLeafColumnsLength.value),r=x(()=>t.states.rightFixedColumns.value.length),a=x(()=>t.states.columns.value.length),o=x(()=>t.states.fixedColumns.value.length),i=x(()=>t.states.rightFixedColumns.value.length);return{leftFixedLeafCount:n,rightFixedLeafCount:r,columnsCount:a,leftFixedCount:o,rightFixedCount:i,columns:t.states.columns}}function ive(e){const{columns:t}=ove(),n=De("table");return{getCellClasses:(o,i)=>{const l=o[i],s=[n.e("cell"),l.id,l.align,l.labelClassName,...yw(n.b(),i,l.fixed,e.store)];return l.className&&s.push(l.className),l.children||s.push(n.is("leaf")),s},getCellStyles:(o,i)=>{const l=bw(i,o.fixed,e.store);return ec(l,"left"),ec(l,"right"),l},columns:t}}var lve=G({name:"ElTableFooter",props:{fixed:{type:String,default:""},store:{required:!0,type:Object},summaryMethod:Function,sumText:String,border:Boolean,defaultSort:{type:Object,default:()=>({prop:"",order:""})}},setup(e){const{getCellClasses:t,getCellStyles:n,columns:r}=ive(e);return{ns:De("table"),getCellClasses:t,getCellStyles:n,columns:r}},render(){const{columns:e,getCellStyles:t,getCellClasses:n,summaryMethod:r,sumText:a,ns:o}=this,i=this.store.states.data.value;let l=[];return r?l=r({columns:e,data:i}):e.forEach((s,c)=>{if(c===0){l[c]=a;return}const d=i.map(m=>Number(m[s.property])),f=[];let p=!0;d.forEach(m=>{if(!Number.isNaN(+m)){p=!1;const y=`${m}`.split(".")[1];f.push(y?y.length:0)}});const v=Math.max.apply(null,f);p?l[c]="":l[c]=d.reduce((m,y)=>{const b=Number(y);return Number.isNaN(+b)?m:Number.parseFloat((m+y).toFixed(Math.min(v,20)))},0)}),qe("table",{class:o.e("footer"),cellspacing:"0",cellpadding:"0",border:"0"},[Cw({columns:e}),qe("tbody",[qe("tr",{},[...e.map((s,c)=>qe("td",{key:c,colspan:s.colSpan,rowspan:s.rowSpan,class:n(e,c),style:t(s,c)},[qe("div",{class:["cell",s.labelClassName]},[l[c]])]))])])])}});function sve(e){return{setCurrentRow:d=>{e.commit("setCurrentRow",d)},getSelectionRows:()=>e.getSelectionRows(),toggleRowSelection:(d,f)=>{e.toggleRowSelection(d,f,!1),e.updateAllSelected()},clearSelection:()=>{e.clearSelection()},clearFilter:d=>{e.clearFilter(d)},toggleAllSelection:()=>{e.commit("toggleAllSelection")},toggleRowExpansion:(d,f)=>{e.toggleRowExpansionAdapter(d,f)},clearSort:()=>{e.clearSort()},sort:(d,f)=>{e.commit("sort",{prop:d,order:f})}}}function uve(e,t,n,r){const a=H(!1),o=H(null),i=H(!1),l=E=>{i.value=E},s=H({width:null,height:null}),c=H(!1),d={display:"inline-block",verticalAlign:"middle"},f=H();Wn(()=>{t.setHeight(e.height)}),Wn(()=>{t.setMaxHeight(e.maxHeight)}),ce(()=>[e.currentRowKey,n.states.rowKey],([E,K])=>{!A(K)||n.setCurrentRowKey(`${E}`)},{immediate:!0}),ce(()=>e.data,E=>{r.store.commit("setData",E)},{immediate:!0,deep:!0}),Wn(()=>{e.expandRowKeys&&n.setExpandRowKeysAdapter(e.expandRowKeys)});const p=()=>{r.store.commit("setHoverRow",null),r.hoverState&&(r.hoverState=null)},v=(E,K)=>{const{pixelX:W,pixelY:Y}=K;Math.abs(W)>=Math.abs(Y)&&(r.refs.bodyWrapper.scrollLeft+=K.pixelX/5)},m=x(()=>e.height||e.maxHeight||n.states.fixedColumns.value.length>0||n.states.rightFixedColumns.value.length>0),y=x(()=>({width:t.bodyWidth.value?`${t.bodyWidth.value}px`:""})),b=()=>{m.value&&t.updateElsHeight(),t.updateColumnsWidth(),requestAnimationFrame(k)};et(async()=>{await Ne(),n.updateColumns(),$(),requestAnimationFrame(b),s.value={width:f.value=r.vnode.el.offsetWidth,height:r.vnode.el.offsetHeight},n.states.columns.value.forEach(E=>{E.filteredValue&&E.filteredValue.length&&r.store.commit("filterChange",{column:E,values:E.filteredValue,silent:!0})}),r.$ready=!0});const C=(E,K)=>{if(!E)return;const W=Array.from(E.classList).filter(Y=>!Y.startsWith("is-scrolling-"));W.push(t.scrollX.value?K:"is-scrolling-none"),E.className=W.join(" ")},S=E=>{const{tableWrapper:K}=r.refs;C(K,E)},w=E=>{const{tableWrapper:K}=r.refs;return!!(K&&K.classList.contains(E))},k=function(){if(!r.refs.scrollBarRef)return;if(!t.scrollX.value){const oe="is-scrolling-none";w(oe)||S(oe);return}const E=r.refs.scrollBarRef.wrap$;if(!E)return;const{scrollLeft:K,offsetWidth:W,scrollWidth:Y}=E,{headerWrapper:q,footerWrapper:J}=r.refs;q&&(q.scrollLeft=K),J&&(J.scrollLeft=K);const ne=Y-W-1;K>=ne?S("is-scrolling-right"):S(K===0?"is-scrolling-left":"is-scrolling-middle")},$=()=>{var E;!r.refs.scrollBarRef||((E=r.refs.scrollBarRef.wrap$)==null||E.addEventListener("scroll",k,{passive:!0}),e.fit?wc(r.vnode.el,T):gn(window,"resize",b))};Lt(()=>{O()});const O=()=>{var E;(E=r.refs.scrollBarRef.wrap$)==null||E.removeEventListener("scroll",k,!0),e.fit?Sc(r.vnode.el,T):Bn(window,"resize",b)},T=()=>{if(!r.$ready)return;let E=!1;const K=r.vnode.el,{width:W,height:Y}=s.value,q=f.value=K.offsetWidth;W!==q&&(E=!0);const J=K.offsetHeight;(e.height||m.value)&&Y!==J&&(E=!0),E&&(s.value={width:q,height:J},b())},_=Gn(),I=x(()=>{const{bodyWidth:E,scrollY:K,gutterWidth:W}=t;return E.value?`${E.value-(K.value?W:0)}px`:""}),L=x(()=>e.maxHeight?"fixed":e.tableLayout);function j(E,K,W){const Y=j0(E),q=e.showHeader?W:0;if(Y!==null)return wt(Y)?`calc(${Y} - ${K}px - ${q}px)`:Y-K-q}const F=x(()=>{const E=t.headerHeight.value||0,K=t.bodyHeight.value,W=t.footerHeight.value||0;if(e.height)return K||void 0;if(e.maxHeight)return j(e.maxHeight,W,E)}),N=x(()=>{const E=t.headerHeight.value||0,K=t.bodyHeight.value,W=t.footerHeight.value||0;if(e.height)return{height:K?`${K}px`:""};if(e.maxHeight){const Y=j(e.maxHeight,W,E);if(Y!==null)return{"max-height":`${Y}${Yt(Y)?"px":""}`}}return{}}),D=x(()=>{if(e.data&&e.data.length)return null;let E="100%";return t.appendHeight.value&&(E=`calc(100% - ${t.appendHeight.value}px)`),{width:f.value?`${f.value}px`:"",height:E}}),z=(E,K)=>{const W=r.refs.bodyWrapper;if(Math.abs(K.spinY)>0){const Y=W.scrollTop;K.pixelY<0&&Y!==0&&E.preventDefault(),K.pixelY>0&&W.scrollHeight-W.clientHeight>Y&&E.preventDefault(),W.scrollTop+=Math.ceil(K.pixelY/5)}else W.scrollLeft+=Math.ceil(K.pixelX/5)},B=x(()=>e.maxHeight?e.showSummary?{bottom:0}:{bottom:t.scrollX.value&&e.data.length?`${t.gutterWidth}px`:""}:e.showSummary?{height:t.tableHeight.value?`${t.tableHeight.value}px`:""}:{height:t.viewportHeight.value?`${t.viewportHeight.value}px`:""}),M=x(()=>{if(e.height)return{height:t.fixedBodyHeight.value?`${t.fixedBodyHeight.value}px`:""};if(e.maxHeight){let E=j0(e.maxHeight);if(typeof E=="number")return E=t.scrollX.value?E-t.gutterWidth:E,e.showHeader&&(E-=t.headerHeight.value),E-=t.footerHeight.value,{"max-height":`${E}px`}}return{}});return{isHidden:a,renderExpanded:o,setDragVisible:l,isGroup:c,handleMouseLeave:p,handleHeaderFooterMousewheel:v,tableSize:_,bodyHeight:N,height:F,emptyBlockStyle:D,handleFixedMousewheel:z,fixedHeight:B,fixedBodyHeight:M,resizeProxyVisible:i,bodyWidth:I,resizeState:s,doLayout:b,tableBodyStyles:y,tableLayout:L,scrollbarViewStyle:d}}var cve={data:{type:Array,default:()=>[]},size:String,width:[String,Number],height:[String,Number],maxHeight:[String,Number],fit:{type:Boolean,default:!0},stripe:Boolean,border:Boolean,rowKey:[String,Function],showHeader:{type:Boolean,default:!0},showSummary:Boolean,sumText:String,summaryMethod:Function,rowClassName:[String,Function],rowStyle:[Object,Function],cellClassName:[String,Function],cellStyle:[Object,Function],headerRowClassName:[String,Function],headerRowStyle:[Object,Function],headerCellClassName:[String,Function],headerCellStyle:[Object,Function],highlightCurrentRow:Boolean,currentRowKey:[String,Number],emptyText:String,expandRowKeys:Array,defaultExpandAll:Boolean,defaultSort:Object,tooltipEffect:String,spanMethod:Function,selectOnIndeterminate:{type:Boolean,default:!0},indent:{type:Number,default:16},treeProps:{type:Object,default:()=>({hasChildren:"hasChildren",children:"children"})},lazy:Boolean,load:Function,style:{type:Object,default:()=>({})},className:{type:String,default:""},tableLayout:{type:String,default:"fixed"},scrollbarAlwaysOn:{type:Boolean,default:!1}};const dve=()=>{const e=H(),t=(o,i)=>{const l=e.value;l&&l.scrollTo(o,i)},n=(o,i)=>{const l=e.value;l&&Yt(i)&&["Top","Left"].includes(o)&&l[`setScroll${o}`](i)};return{scrollBarRef:e,scrollTo:t,setScrollTop:o=>n("Top",o),setScrollLeft:o=>n("Left",o)}};let fve=1;const hve=G({name:"ElTable",directives:{Mousewheel:Dte},components:{TableHeader:Qpe,TableBody:ave,TableFooter:lve,ElScrollbar:xi,hColgroup:Cw},props:cve,emits:["select","select-all","selection-change","cell-mouse-enter","cell-mouse-leave","cell-contextmenu","cell-click","cell-dblclick","row-click","row-contextmenu","row-dblclick","header-click","header-contextmenu","sort-change","filter-change","current-change","header-dragend","expand-change"],setup(e){const{t}=ln(),n=De("table"),r=$t();ot(Vo,r);const a=Bpe(r,e);r.store=a;const o=new zpe({store:r.store,table:r,fit:e.fit,showHeader:e.showHeader});r.layout=o;const i=x(()=>(a.states.data.value||[]).length===0),{setCurrentRow:l,getSelectionRows:s,toggleRowSelection:c,clearSelection:d,clearFilter:f,toggleAllSelection:p,toggleRowExpansion:v,clearSort:m,sort:y}=sve(a),{isHidden:b,renderExpanded:C,setDragVisible:S,isGroup:w,handleMouseLeave:k,handleHeaderFooterMousewheel:$,tableSize:O,bodyHeight:T,height:_,emptyBlockStyle:I,handleFixedMousewheel:L,fixedHeight:j,fixedBodyHeight:F,resizeProxyVisible:N,bodyWidth:D,resizeState:z,doLayout:B,tableBodyStyles:M,tableLayout:E,scrollbarViewStyle:K}=uve(e,o,a,r),{scrollBarRef:W,scrollTo:Y,setScrollLeft:q,setScrollTop:J}=dve(),ne=Yn(B,50),oe=`el-table_${fve++}`;r.tableId=oe,r.state={isGroup:w,resizeState:z,doLayout:B,debouncedUpdateLayout:ne};const Q=x(()=>e.sumText||t("el.table.sumText")),ae=x(()=>e.emptyText||t("el.table.emptyText"));return{ns:n,layout:o,store:a,handleHeaderFooterMousewheel:$,handleMouseLeave:k,tableId:oe,tableSize:O,isHidden:b,isEmpty:i,renderExpanded:C,resizeProxyVisible:N,resizeState:z,isGroup:w,bodyWidth:D,bodyHeight:T,height:_,tableBodyStyles:M,emptyBlockStyle:I,debouncedUpdateLayout:ne,handleFixedMousewheel:L,fixedHeight:j,fixedBodyHeight:F,setCurrentRow:l,getSelectionRows:s,toggleRowSelection:c,clearSelection:d,clearFilter:f,toggleAllSelection:p,toggleRowExpansion:v,clearSort:m,doLayout:B,sort:y,t,setDragVisible:S,context:r,computedSumText:Q,computedEmptyText:ae,tableLayout:E,scrollbarViewStyle:K,scrollBarRef:W,scrollTo:Y,setScrollLeft:q,setScrollTop:J}}}),pve=["data-prefix"],vve={ref:"hiddenColumns",class:"hidden-columns"};function mve(e,t,n,r,a,o){const i=we("hColgroup"),l=we("table-header"),s=we("table-body"),c=we("el-scrollbar"),d=we("table-footer"),f=pa("mousewheel");return R(),X("div",{ref:"tableWrapper",class:U([{[e.ns.m("fit")]:e.fit,[e.ns.m("striped")]:e.stripe,[e.ns.m("border")]:e.border||e.isGroup,[e.ns.m("hidden")]:e.isHidden,[e.ns.m("group")]:e.isGroup,[e.ns.m("fluid-height")]:e.maxHeight,[e.ns.m("scrollable-x")]:e.layout.scrollX.value,[e.ns.m("scrollable-y")]:e.layout.scrollY.value,[e.ns.m("enable-row-hover")]:!e.store.states.isComplex.value,[e.ns.m("enable-row-transition")]:(e.store.states.data.value||[]).length!==0&&(e.store.states.data.value||[]).length<100,"has-footer":e.showSummary},e.ns.m(e.tableSize),e.className,e.ns.b(),e.ns.m(`layout-${e.tableLayout}`)]),style:Xe(e.style),"data-prefix":e.ns.namespace.value,onMouseleave:t[0]||(t[0]=p=>e.handleMouseLeave())},[Z("div",{class:U(e.ns.e("inner-wrapper"))},[Z("div",vve,[Oe(e.$slots,"default")],512),e.showHeader&&e.tableLayout==="fixed"?at((R(),X("div",{key:0,ref:"headerWrapper",class:U(e.ns.e("header-wrapper"))},[Z("table",{ref:"tableHeader",class:U(e.ns.e("header")),style:Xe(e.tableBodyStyles),border:"0",cellpadding:"0",cellspacing:"0"},[g(i,{columns:e.store.states.columns.value,"table-layout":e.tableLayout},null,8,["columns","table-layout"]),g(l,{ref:"tableHeaderRef",border:e.border,"default-sort":e.defaultSort,store:e.store,onSetDragVisible:e.setDragVisible},null,8,["border","default-sort","store","onSetDragVisible"])],6)],2)),[[f,e.handleHeaderFooterMousewheel]]):se("v-if",!0),Z("div",{ref:"bodyWrapper",style:Xe(e.bodyHeight),class:U(e.ns.e("body-wrapper"))},[g(c,{ref:"scrollBarRef",height:e.maxHeight?void 0:e.height,"max-height":e.maxHeight?e.height:void 0,"view-style":e.scrollbarViewStyle,always:e.scrollbarAlwaysOn},{default:re(()=>[Z("table",{ref:"tableBody",class:U(e.ns.e("body")),cellspacing:"0",cellpadding:"0",border:"0",style:Xe({width:e.bodyWidth,tableLayout:e.tableLayout})},[g(i,{columns:e.store.states.columns.value,"table-layout":e.tableLayout},null,8,["columns","table-layout"]),e.showHeader&&e.tableLayout==="auto"?(R(),fe(l,{key:0,border:e.border,"default-sort":e.defaultSort,store:e.store,onSetDragVisible:e.setDragVisible},null,8,["border","default-sort","store","onSetDragVisible"])):se("v-if",!0),g(s,{context:e.context,highlight:e.highlightCurrentRow,"row-class-name":e.rowClassName,"tooltip-effect":e.tooltipEffect,"row-style":e.rowStyle,store:e.store,stripe:e.stripe},null,8,["context","highlight","row-class-name","tooltip-effect","row-style","store","stripe"])],6),e.isEmpty?(R(),X("div",{key:0,ref:"emptyBlock",style:Xe(e.emptyBlockStyle),class:U(e.ns.e("empty-block"))},[Z("span",{class:U(e.ns.e("empty-text"))},[Oe(e.$slots,"empty",{},()=>[yt(Me(e.computedEmptyText),1)])],2)],6)):se("v-if",!0),e.$slots.append?(R(),X("div",{key:1,ref:"appendWrapper",class:U(e.ns.e("append-wrapper"))},[Oe(e.$slots,"append")],2)):se("v-if",!0)]),_:3},8,["height","max-height","view-style","always"])],6),e.border||e.isGroup?(R(),X("div",{key:1,class:U(e.ns.e("border-left-patch"))},null,2)):se("v-if",!0)],2),e.showSummary?at((R(),X("div",{key:0,ref:"footerWrapper",class:U(e.ns.e("footer-wrapper"))},[g(d,{border:e.border,"default-sort":e.defaultSort,store:e.store,style:Xe(e.tableBodyStyles),"sum-text":e.computedSumText,"summary-method":e.summaryMethod},null,8,["border","default-sort","store","style","sum-text","summary-method"])],2)),[[_t,!e.isEmpty],[f,e.handleHeaderFooterMousewheel]]):se("v-if",!0),at(Z("div",{ref:"resizeProxy",class:U(e.ns.e("column-resize-proxy"))},null,2),[[_t,e.resizeProxyVisible]])],46,pve)}var gve=Ae(hve,[["render",mve],["__file","/home/runner/work/element-plus/element-plus/packages/components/table/src/table.vue"]]);const yve={selection:"table-column--selection",expand:"table__expand-column"},bve={default:{order:""},selection:{width:48,minWidth:48,realWidth:48,order:""},expand:{width:48,minWidth:48,realWidth:48,order:""},index:{width:48,minWidth:48,realWidth:48,order:""}},Cve=e=>yve[e]||"",wve={selection:{renderHeader({store:e}){function t(){return e.states.data.value&&e.states.data.value.length===0}return qe(io,{disabled:t(),size:e.states.tableSize.value,indeterminate:e.states.selection.value.length>0&&!e.states.isAllSelected.value,"onUpdate:modelValue":e.toggleAllSelection,modelValue:e.states.isAllSelected.value})},renderCell({row:e,column:t,store:n,$index:r}){return qe(io,{disabled:t.selectable?!t.selectable.call(null,e,r):!1,size:n.states.tableSize.value,onChange:()=>{n.commit("rowSelectedChanged",e)},onClick:a=>a.stopPropagation(),modelValue:n.isSelected(e)})},sortable:!1,resizable:!1},index:{renderHeader({column:e}){return e.label||"#"},renderCell({column:e,$index:t}){let n=t+1;const r=e.index;return typeof r=="number"?n=t+r:typeof r=="function"&&(n=r(t)),qe("div",{},[n])},sortable:!1},expand:{renderHeader({column:e}){return e.label||""},renderCell({row:e,store:t,expanded:n}){const{ns:r}=t,a=[r.e("expand-icon")];return n&&a.push(r.em("expand-icon","expanded")),qe("div",{class:a,onClick:function(i){i.stopPropagation(),t.toggleRowExpansion(e)}},{default:()=>[qe(ft,null,{default:()=>[qe(Da)]})]})},sortable:!1,resizable:!1}};function Sve({row:e,column:t,$index:n}){var r;const a=t.property,o=a&&cp(e,a).value;return t&&t.formatter?t.formatter(e,t,o,n):((r=o==null?void 0:o.toString)==null?void 0:r.call(o))||""}function kve({row:e,treeNode:t,store:n},r=!1){const{ns:a}=n;if(!t)return r?[qe("span",{class:a.e("placeholder")})]:null;const o=[],i=function(l){l.stopPropagation(),n.loadOrToggle(e)};if(t.indent&&o.push(qe("span",{class:a.e("indent"),style:{"padding-left":`${t.indent}px`}})),typeof t.expanded=="boolean"&&!t.noLazyChildren){const l=[a.e("expand-icon"),t.expanded?a.em("expand-icon","expanded"):""];let s=Da;t.loading&&(s=gl),o.push(qe("div",{class:l,onClick:i},{default:()=>[qe(ft,{class:{[a.is("loading")]:t.loading}},{default:()=>[qe(s)]})]}))}else o.push(qe("span",{class:a.e("placeholder")}));return o}function kO(e,t){return e.reduce((n,r)=>(n[r]=r,n),t)}function $ve(e,t){const n=$t();return{registerComplexWatchers:()=>{const o=["fixed"],i={realWidth:"width",realMinWidth:"minWidth"},l=kO(o,i);Object.keys(l).forEach(s=>{const c=i[s];Mt(t,c)&&ce(()=>t[c],d=>{let f=d;c==="width"&&s==="realWidth"&&(f=gw(d)),c==="minWidth"&&s==="realMinWidth"&&(f=Q5(d)),n.columnConfig.value[c]=f,n.columnConfig.value[s]=f;const p=c==="fixed";e.value.store.scheduleLayout(p)})})},registerNormalWatchers:()=>{const o=["label","filters","filterMultiple","sortable","index","formatter","className","labelClassName","showOverflowTooltip"],i={property:"prop",align:"realAlign",headerAlign:"realHeaderAlign"},l=kO(o,i);Object.keys(l).forEach(s=>{const c=i[s];Mt(t,c)&&ce(()=>t[c],d=>{n.columnConfig.value[s]=d})})}}}function Ove(e,t,n){const r=$t(),a=H(""),o=H(!1),i=H(),l=H(),s=De("table");Wn(()=>{i.value=e.align?`is-${e.align}`:null,i.value}),Wn(()=>{l.value=e.headerAlign?`is-${e.headerAlign}`:i.value,l.value});const c=x(()=>{let w=r.vnode.vParent||r.parent;for(;w&&!w.tableId&&!w.columnId;)w=w.vnode.vParent||w.parent;return w}),d=x(()=>{const{store:w}=r.parent;if(!w)return!1;const{treeData:k}=w.states,$=k.value;return $&&Object.keys($).length>0}),f=H(gw(e.width)),p=H(Q5(e.minWidth)),v=w=>(f.value&&(w.width=f.value),p.value&&(w.minWidth=p.value),w.minWidth||(w.minWidth=80),w.realWidth=Number(w.width===void 0?w.minWidth:w.width),w),m=w=>{const k=w.type,$=wve[k]||{};Object.keys($).forEach(T=>{const _=$[T];T!=="className"&&_!==void 0&&(w[T]=_)});const O=Cve(k);if(O){const T=`${A(s.namespace)}-${O}`;w.className=w.className?`${w.className} ${T}`:T}return w},y=w=>{Array.isArray(w)?w.forEach($=>k($)):k(w);function k($){var O;((O=$==null?void 0:$.type)==null?void 0:O.name)==="ElTableColumn"&&($.vParent=r)}};return{columnId:a,realAlign:i,isSubColumn:o,realHeaderAlign:l,columnOrTableParent:c,setColumnWidth:v,setColumnForcedProps:m,setColumnRenders:w=>{e.renderHeader||w.type!=="selection"&&(w.renderHeader=O=>{r.columnConfig.value.label;const T=t.header;return T?T(O):w.label});let k=w.renderCell;const $=d.value;return w.type==="expand"?(w.renderCell=O=>qe("div",{class:"cell"},[k(O)]),n.value.renderExpanded=O=>t.default?t.default(O):t.default):(k=k||Sve,w.renderCell=O=>{let T=null;if(t.default){const j=t.default(O);T=j.some(F=>F.type!==Ir)?j:k(O)}else T=k(O);const _=$&&O.cellIndex===0,I=kve(O,_),L={class:"cell",style:{}};return w.showOverflowTooltip&&(L.class=`${L.class} ${A(s.namespace)}-tooltip`,L.style={width:`${(O.column.realWidth||Number(O.column.width))-1}px`}),y(T),qe("div",L,[I,T])}),w},getPropsData:(...w)=>w.reduce((k,$)=>(Array.isArray($)&&$.forEach(O=>{k[O]=e[O]}),k),{}),getColumnElIndex:(w,k)=>Array.prototype.indexOf.call(w,k)}}var Pve={type:{type:String,default:"default"},label:String,className:String,labelClassName:String,property:String,prop:String,width:{type:[String,Number],default:""},minWidth:{type:[String,Number],default:""},renderHeader:Function,sortable:{type:[Boolean,String],default:!1},sortMethod:Function,sortBy:[String,Function,Array],resizable:{type:Boolean,default:!0},columnKey:String,align:String,headerAlign:String,showTooltipWhenOverflow:Boolean,showOverflowTooltip:Boolean,fixed:[Boolean,String],formatter:Function,selectable:Function,reserveSelection:Boolean,filterMethod:Function,filteredValue:Array,filters:Array,filterPlacement:String,filterMultiple:{type:Boolean,default:!0},index:[Number,Function],sortOrders:{type:Array,default:()=>["ascending","descending",null],validator:e=>e.every(t=>["ascending","descending",null].includes(t))}};let Tve=1;var iI=G({name:"ElTableColumn",components:{ElCheckbox:io},props:Pve,setup(e,{slots:t}){const n=$t(),r=H({}),a=x(()=>{let S=n.parent;for(;S&&!S.tableId;)S=S.parent;return S}),{registerNormalWatchers:o,registerComplexWatchers:i}=$ve(a,e),{columnId:l,isSubColumn:s,realHeaderAlign:c,columnOrTableParent:d,setColumnWidth:f,setColumnForcedProps:p,setColumnRenders:v,getPropsData:m,getColumnElIndex:y,realAlign:b}=Ove(e,t,a),C=d.value;l.value=`${C.tableId||C.columnId}_column_${Tve++}`,hc(()=>{s.value=a.value!==C;const S=e.type||"default",w=e.sortable===""?!0:e.sortable,k=Ke(Te({},bve[S]),{id:l.value,type:S,property:e.prop||e.property,align:b,headerAlign:c,showOverflowTooltip:e.showOverflowTooltip||e.showTooltipWhenOverflow,filterable:e.filters||e.filterMethod,filteredValue:[],filterPlacement:"",isColumnGroup:!1,isSubColumn:!1,filterOpened:!1,sortable:w,index:e.index,rawColumnKey:n.vnode.key});let I=m(["columnKey","label","className","labelClassName","type","renderHeader","formatter","fixed","resizable"],["sortMethod","sortBy","sortOrders"],["selectable","reserveSelection"],["filterMethod","filters","filterMultiple","filterOpened","filteredValue","filterPlacement"]);I=_pe(k,I),I=Epe(v,f,p)(I),r.value=I,o(),i()}),et(()=>{var S;const w=d.value,k=s.value?w.vnode.el.children:(S=w.refs.hiddenColumns)==null?void 0:S.children,$=()=>y(k||[],n.vnode.el);r.value.getColumnIndex=$,$()>-1&&a.value.store.commit("insertColumn",r.value,s.value?w.columnConfig.value:null)}),Lt(()=>{a.value.store.commit("removeColumn",r.value,s.value?C.columnConfig.value:null)}),n.columnId=l.value,n.columnConfig=r},render(){var e,t,n;try{const r=(t=(e=this.$slots).default)==null?void 0:t.call(e,{row:{},column:{},$index:-1}),a=[];if(Array.isArray(r))for(const i of r)((n=i.type)==null?void 0:n.name)==="ElTableColumn"||i.shapeFlag&2?a.push(i):i.type===Fe&&Array.isArray(i.children)&&i.children.forEach(l=>{(l==null?void 0:l.patchFlag)!==1024&&!wt(l==null?void 0:l.children)&&a.push(l)});return qe("div",a)}catch{return qe("div",[])}}});const xve=xt(gve,{TableColumn:iI}),_ve=En(iI),Eve=Ze({tabs:{type:Le(Array),default:()=>xn([])}}),Mve={name:"ElTabBar"},Ive=G(Ke(Te({},Mve),{props:Eve,setup(e,{expose:t}){const n=e,r="ElTabBar",a=$t(),o=ve(Hm);o||qn(r,"<el-tabs><el-tab-bar /></el-tabs>");const i=De("tabs"),l=H(),s=H(),c=()=>{let f=0,p=0;const v=["top","bottom"].includes(o.props.tabPosition)?"width":"height",m=v==="width"?"x":"y";return n.tabs.every(y=>{var b,C,S,w;const k=(C=(b=a.parent)==null?void 0:b.refs)==null?void 0:C[`tab-${y.paneName}`];if(!k)return!1;if(!y.active)return!0;p=k[`client${ia(v)}`];const $=m==="x"?"left":"top";f=k.getBoundingClientRect()[$]-((w=(S=k.parentElement)==null?void 0:S.getBoundingClientRect()[$])!=null?w:0);const O=window.getComputedStyle(k);return v==="width"&&(n.tabs.length>1&&(p-=Number.parseFloat(O.paddingLeft)+Number.parseFloat(O.paddingRight)),f+=Number.parseFloat(O.paddingLeft)),!1}),{[v]:`${p}px`,transform:`translate${ia(m)}(${f}px)`}},d=()=>s.value=c();return ce(()=>n.tabs,async()=>{await Ne(),d()},{immediate:!0}),Cc(l,()=>d()),t({ref:l,update:d}),(f,p)=>(R(),X("div",{ref_key:"barRef",ref:l,class:U([A(i).e("active-bar"),A(i).is(A(o).props.tabPosition)]),style:Xe(s.value)},null,6))}}));var Nve=Ae(Ive,[["__file","/home/runner/work/element-plus/element-plus/packages/components/tabs/src/tab-bar.vue"]]);const Ave=Ze({panes:{type:Le(Array),default:()=>xn([])},currentName:{type:[String,Number],default:""},editable:Boolean,onTabClick:{type:Le(Function),default:Qt},onTabRemove:{type:Le(Function),default:Qt},type:{type:String,values:["card","border-card",""],default:""},stretch:Boolean}),$O="ElTabNav",Dve=G({name:$O,props:Ave,setup(e,{expose:t}){const n=ve(Hm);n||qn($O,"<el-tabs><tab-nav /></el-tabs>");const r=De("tabs"),a=CU(),o=$U(),i=H(),l=H(),s=H(),c=H(!1),d=H(0),f=H(!1),p=H(!0),v=x(()=>["top","bottom"].includes(n.props.tabPosition)?"width":"height"),m=x(()=>({transform:`translate${v.value==="width"?"X":"Y"}(-${d.value}px)`})),y=()=>{if(!i.value)return;const O=i.value[`offset${ia(v.value)}`],T=d.value;if(!T)return;const _=T>O?T-O:0;d.value=_},b=()=>{if(!i.value||!l.value)return;const O=l.value[`offset${ia(v.value)}`],T=i.value[`offset${ia(v.value)}`],_=d.value;if(O-_<=T)return;const I=O-_>T*2?_+T:O-T;d.value=I},C=()=>{const O=l.value;if(!c.value||!s.value||!i.value||!O)return;const T=s.value.querySelector(".is-active");if(!T)return;const _=i.value,I=["top","bottom"].includes(n.props.tabPosition),L=T.getBoundingClientRect(),j=_.getBoundingClientRect(),F=I?O.offsetWidth-j.width:O.offsetHeight-j.height,N=d.value;let D=N;I?(L.left<j.left&&(D=N-(j.left-L.left)),L.right>j.right&&(D=N+L.right-j.right)):(L.top<j.top&&(D=N-(j.top-L.top)),L.bottom>j.bottom&&(D=N+(L.bottom-j.bottom))),D=Math.max(D,0),d.value=Math.min(D,F)},S=()=>{if(!l.value||!i.value)return;const O=l.value[`offset${ia(v.value)}`],T=i.value[`offset${ia(v.value)}`],_=d.value;if(T<O){const I=d.value;c.value=c.value||{},c.value.prev=I,c.value.next=I+T<O,O-I<T&&(d.value=O-T)}else c.value=!1,_>0&&(d.value=0)},w=O=>{const T=O.code,{up:_,down:I,left:L,right:j}=Ge;if(![_,I,L,j].includes(T))return;const F=Array.from(O.currentTarget.querySelectorAll("[role=tab]")),N=F.indexOf(O.target);let D;T===L||T===_?N===0?D=F.length-1:D=N-1:N<F.length-1?D=N+1:D=0,F[D].focus(),F[D].click(),k()},k=()=>{p.value&&(f.value=!0)},$=()=>f.value=!1;return ce(a,O=>{O==="hidden"?p.value=!1:O==="visible"&&setTimeout(()=>p.value=!0,50)}),ce(o,O=>{O?setTimeout(()=>p.value=!0,50):p.value=!1}),Cc(s,S),et(()=>setTimeout(()=>C(),0)),ur(()=>S()),t({scrollToActiveTab:C,removeFocus:$}),()=>{const O=c.value?[g("span",{class:[r.e("nav-prev"),r.is("disabled",!c.value.prev)],onClick:y},[g(ft,null,{default:()=>[g(Es,null,null)]})]),g("span",{class:[r.e("nav-next"),r.is("disabled",!c.value.next)],onClick:b},[g(ft,null,{default:()=>[g(Da,null,null)]})])]:null,T=e.panes.map((_,I)=>{var L,j;const F=_.props.name||_.index||`${I}`,N=_.isClosable||e.editable;_.index=`${I}`;const D=N?g(ft,{class:"is-icon-close",onClick:M=>e.onTabRemove(_,M)},{default:()=>[g(Ma,null,null)]}):null,z=((j=(L=_.instance.slots).label)==null?void 0:j.call(L))||_.props.label,B=_.active?0:-1;return g("div",{ref:`tab-${F}`,class:[r.e("item"),r.is(n.props.tabPosition),r.is("active",_.active),r.is("disabled",_.props.disabled),r.is("closable",N),r.is("focus",f.value)],id:`tab-${F}`,key:`tab-${F}`,"aria-controls":`pane-${F}`,role:"tab","aria-selected":_.active,tabindex:B,onFocus:()=>k(),onBlur:()=>$(),onClick:M=>{$(),e.onTabClick(_,F,M)},onKeydown:M=>{N&&(M.code===Ge.delete||M.code===Ge.backspace)&&e.onTabRemove(_,M)}},[z,D])});return g("div",{ref:s,class:[r.e("nav-wrap"),r.is("scrollable",!!c.value),r.is(n.props.tabPosition)]},[O,g("div",{class:r.e("nav-scroll"),ref:i},[g("div",{class:[r.e("nav"),r.is(n.props.tabPosition),r.is("stretch",e.stretch&&["top","bottom"].includes(n.props.tabPosition))],ref:l,style:m.value,role:"tablist",onKeydown:w},[e.type?null:g(Nve,{tabs:[...e.panes]},null),T])])])}}}),Rve=Ze({type:{type:String,values:["card","border-card",""],default:""},activeName:{type:[String,Number],default:""},closable:Boolean,addable:Boolean,modelValue:{type:[String,Number],default:""},editable:Boolean,tabPosition:{type:String,values:["top","right","bottom","left"],default:"top"},beforeLeave:{type:Le(Function),default:()=>!0},stretch:Boolean}),dh=e=>wt(e)||Yt(e),Lve={[Pt]:e=>dh(e),[cl]:e=>dh(e),"tab-click":(e,t)=>t instanceof Event,"tab-change":e=>dh(e),edit:(e,t)=>["remove","add"].includes(t),"tab-remove":e=>dh(e),"tab-add":()=>!0},lI=(e,t=[])=>{const n=e.children||[];return Array.from(n).forEach(r=>{let a=r.type;a=a.name||a,a==="ElTabPane"&&r.component?t.push(r.component):(a===Fe||a==="template")&&lI(r,t)}),t};var Fve=G({name:"ElTabs",props:Rve,emits:Lve,setup(e,{emit:t,slots:n,expose:r}){const a=$t();Tf({scope:"el-tabs",type:"Event",from:"input",replacement:"tab-change",version:"2.5.0",ref:"https://element-plus.org/en-US/component/tabs.html#tabs-events"},x(()=>{var b;return Ct((b=a.vnode.props)==null?void 0:b.onInput)}));const o=De("tabs"),i=H(),l=H([]),s=H(e.modelValue||e.activeName||"0"),c={},d=(b=!1)=>{if(n.default){const C=a.subTree.children,S=Array.from(C).find(({props:$})=>($==null?void 0:$.class)===o.e("content"));if(!S)return;const w=lI(S).map($=>c[$.uid]),k=!(w.length===l.value.length&&w.every(($,O)=>$.uid===l.value[O].uid));(b||k)&&(l.value=w)}else l.value.length!==0&&(l.value=[])},f=b=>{s.value=b,t(cl,b),t(Pt,b),t("tab-change",b)},p=b=>{var C;if(s.value===b)return;const S=(C=e.beforeLeave)==null?void 0:C.call(e,b,s.value);hs(S)?S.then(()=>{var w,k;f(b),(k=(w=i.value)==null?void 0:w.removeFocus)==null||k.call(w)},Qt):S!==!1&&f(b)},v=(b,C,S)=>{b.props.disabled||(p(C),t("tab-click",b,S))},m=(b,C)=>{b.props.disabled||(C.stopPropagation(),t("edit",b.props.name,"remove"),t("tab-remove",b.props.name))},y=()=>{t("edit",void 0,"add"),t("tab-add")};return ur(()=>d()),et(()=>d()),ce(()=>e.activeName,b=>p(b)),ce(()=>e.modelValue,b=>p(b)),ce(s,async()=>{var b,C;d(!0),await Ne(),await((b=i.value)==null?void 0:b.$nextTick()),(C=i.value)==null||C.scrollToActiveTab()}),ot(Hm,{props:e,currentName:s,updatePaneState:b=>c[b.uid]=b}),r({currentName:s}),()=>{const b=e.editable||e.addable?g("span",{class:o.e("new-tab"),tabindex:"0",onClick:y,onKeydown:w=>{w.code===Ge.enter&&y()}},[g(ft,{class:o.is("icon-plus")},{default:()=>[g(cE,null,null)]})]):null,C=g("div",{class:[o.e("header"),o.is(e.tabPosition)]},[b,g(Dve,{ref:i,currentName:s.value,editable:e.editable,type:e.type,panes:l.value,stretch:e.stretch,onTabClick:v,onTabRemove:m},null)]),S=g("div",{class:o.e("content")},[Oe(n,"default")]);return g("div",{class:[o.b(),o.m(e.tabPosition),{[o.m("card")]:e.type==="card",[o.m("border-card")]:e.type==="border-card"}]},[...e.tabPosition!=="bottom"?[C,S]:[S,C]])}}});const Bve=Ze({label:{type:String,default:""},name:{type:[String,Number],default:""},closable:Boolean,disabled:Boolean,lazy:Boolean}),Vve=["id","aria-hidden","aria-labelledby"],zve={name:"ElTabPane"},Hve=G(Ke(Te({},zve),{props:Bve,setup(e){const t=e,n="ElTabPane",r=$t(),a=ve(Hm);a||qn(n,"usage: <el-tabs><el-tab-pane /></el-tabs/>");const o=De("tab-pane"),i=H(),l=x(()=>t.closable||a.props.closable),s=K$(()=>a.currentName.value===(t.name||i.value)),c=H(s.value),d=x(()=>t.name||i.value),f=K$(()=>!t.lazy||c.value||s.value);return ce(s,p=>{p&&(c.value=!0)}),a.updatePaneState(bt({uid:r.uid,instance:ps(r),props:t,paneName:d,active:s,index:i,isClosable:l})),(p,v)=>A(f)?at((R(),X("div",{key:0,id:`pane-${A(d)}`,class:U(A(o).b()),role:"tabpanel","aria-hidden":!A(s),"aria-labelledby":`tab-${A(d)}`},[Oe(p.$slots,"default")],10,Vve)),[[_t,A(s)]]):se("v-if",!0)}}));var sI=Ae(Hve,[["__file","/home/runner/work/element-plus/element-plus/packages/components/tabs/src/tab-pane.vue"]]);const jve=xt(Fve,{TabPane:sI}),Kve=En(sI);mt.extend(tw);const{Option:Wve}=bs,ai=e=>{const t=(e||"").split(":");if(t.length>=2){let n=Number.parseInt(t[0],10);const r=Number.parseInt(t[1],10),a=e.toUpperCase();return a.includes("AM")&&n===12?n=0:a.includes("PM")&&n!==12&&(n+=12),{hours:n,minutes:r}}return null},Oy=(e,t)=>{const n=ai(e),r=ai(t),a=n.minutes+n.hours*60,o=r.minutes+r.hours*60;return a===o?0:a>o?1:-1},OO=e=>`${e}`.padStart(2,"0"),lu=e=>`${OO(e.hours)}:${OO(e.minutes)}`,Uve=(e,t)=>{const n=ai(e),r=ai(t),a={hours:n.hours,minutes:n.minutes};return a.minutes+=r.minutes,a.hours+=r.hours,a.hours+=Math.floor(a.minutes/60),a.minutes=a.minutes%60,lu(a)},Yve=G({name:"ElTimeSelect",components:{ElSelect:bs,ElOption:Wve,ElIcon:ft},model:{prop:"value",event:"change"},props:{format:{type:String,default:"HH:mm"},modelValue:String,disabled:{type:Boolean,default:!1},editable:{type:Boolean,default:!0},effect:{type:String,default:"light"},clearable:{type:Boolean,default:!0},size:{type:String,values:Bo,default:""},placeholder:{type:String,default:""},start:{type:String,default:"09:00"},end:{type:String,default:"18:00"},step:{type:String,default:"00:30"},minTime:{type:String,default:""},maxTime:{type:String,default:""},name:{type:String,default:""},prefixIcon:{type:[String,Object],default:uE},clearIcon:{type:[String,Object],default:gi}},emits:["change","blur","focus","update:modelValue"],setup(e){const t=H(null),n=x(()=>e.modelValue),r=x(()=>{const f=ai(e.start);return lu(f)}),a=x(()=>{const f=ai(e.end);return lu(f)}),o=x(()=>{const f=ai(e.step);return lu(f)}),i=x(()=>{const f=ai(e.minTime);return f?lu(f):null}),l=x(()=>{const f=ai(e.maxTime);return f?lu(f):null}),s=x(()=>{const f=[];if(e.start&&e.end&&e.step){let p=r.value,v;for(;Oy(p,a.value)<=0;)v=mt(p,"HH:mm").format(e.format),f.push({value:v,disabled:Oy(p,i.value||"-1:-1")<=0||Oy(p,l.value||"100:100")>=0}),p=Uve(p,o.value)}return f});return{select:t,value:n,items:s,blur:()=>{var f,p;(p=(f=t.value)==null?void 0:f.blur)==null||p.call(f)},focus:()=>{var f,p;(p=(f=t.value)==null?void 0:f.focus)==null||p.call(f)}}}});function qve(e,t,n,r,a,o){const i=we("el-option"),l=we("el-icon"),s=we("el-select");return R(),fe(s,{ref:"select","model-value":e.value,disabled:e.disabled,clearable:e.clearable,"clear-icon":e.clearIcon,size:e.size,effect:e.effect,placeholder:e.placeholder,"default-first-option":"",filterable:e.editable,"onUpdate:modelValue":t[0]||(t[0]=c=>e.$emit("update:modelValue",c)),onChange:t[1]||(t[1]=c=>e.$emit("change",c)),onBlur:t[2]||(t[2]=c=>e.$emit("blur",c)),onFocus:t[3]||(t[3]=c=>e.$emit("focus",c))},{prefix:re(()=>[e.prefixIcon?(R(),fe(l,{key:0,class:"el-input__prefix-icon"},{default:re(()=>[(R(),fe(Kt(e.prefixIcon)))]),_:1})):se("v-if",!0)]),default:re(()=>[(R(!0),X(Fe,null,Rt(e.items,c=>(R(),fe(i,{key:c.value,label:c.value,value:c.value,disabled:c.disabled},null,8,["label","value","disabled"]))),128))]),_:1},8,["model-value","disabled","clearable","clear-icon","size","effect","placeholder","filterable"])}var _p=Ae(Yve,[["render",qve],["__file","/home/runner/work/element-plus/element-plus/packages/components/time-select/src/time-select.vue"]]);_p.install=e=>{e.component(_p.name,_p)};const Gve=_p,Xve=Gve,Zve=G({name:"ElTimeline",setup(e,{slots:t}){const n=De("timeline");return ot("timeline",t),()=>qe("ul",{class:[n.b()]},[Oe(t,"default")])}}),Jve=Ze({timestamp:{type:String,default:""},hideTimestamp:{type:Boolean,default:!1},center:{type:Boolean,default:!1},placement:{type:String,default:"bottom"},type:{type:String,default:""},color:{type:String,default:""},size:{type:String,default:"normal"},icon:{type:wr,default:""},hollow:{type:Boolean,default:!1}}),Qve=G({name:"ElTimelineItem",components:{ElIcon:ft},props:Jve,setup(){return{ns:De("timeline-item")}}});function eme(e,t,n,r,a,o){const i=we("el-icon");return R(),X("li",{class:U([e.ns.b(),{[e.ns.e("center")]:e.center}])},[Z("div",{class:U(e.ns.e("tail"))},null,2),e.$slots.dot?se("v-if",!0):(R(),X("div",{key:0,class:U([e.ns.e("node"),e.ns.em("node",e.size||""),e.ns.em("node",e.type||""),e.ns.is("hollow",e.hollow)]),style:Xe({backgroundColor:e.color})},[e.icon?(R(),fe(i,{key:0,class:U(e.ns.e("icon"))},{default:re(()=>[(R(),fe(Kt(e.icon)))]),_:1},8,["class"])):se("v-if",!0)],6)),e.$slots.dot?(R(),X("div",{key:1,class:U(e.ns.e("dot"))},[Oe(e.$slots,"dot")],2)):se("v-if",!0),Z("div",{class:U(e.ns.e("wrapper"))},[!e.hideTimestamp&&e.placement==="top"?(R(),X("div",{key:0,class:U([e.ns.e("timestamp"),e.ns.is("top")])},Me(e.timestamp),3)):se("v-if",!0),Z("div",{class:U(e.ns.e("content"))},[Oe(e.$slots,"default")],2),!e.hideTimestamp&&e.placement==="bottom"?(R(),X("div",{key:1,class:U([e.ns.e("timestamp"),e.ns.is("bottom")])},Me(e.timestamp),3)):se("v-if",!0)],2)],2)}var uI=Ae(Qve,[["render",eme],["__file","/home/runner/work/element-plus/element-plus/packages/components/timeline/src/timeline-item.vue"]]);const tme=xt(Zve,{TimelineItem:uI}),nme=En(uI),cI=Ze({nowrap:Boolean});var dI=(e=>(e.top="top",e.bottom="bottom",e.left="left",e.right="right",e))(dI||{});const rme=Object.values(dI),ww=Ze({width:{type:Number,default:10},height:{type:Number,default:10},style:{type:Le(Object),default:null}}),ame=Ze({side:{type:Le(String),values:rme,required:!0}}),ome=["absolute","fixed"],ime=["top-start","top-end","top","bottom-start","bottom-end","bottom","left-start","left-end","left","right-start","right-end","right"],Sw=Ze({ariaLabel:String,arrowPadding:{type:Le(Number),default:5},effect:{type:String,default:""},contentClass:String,placement:{type:Le(String),values:ime,default:"bottom"},reference:{type:Le(Object),default:null},offset:{type:Number,default:8},strategy:{type:Le(String),values:ome,default:"absolute"},showArrow:{type:Boolean,default:!1}}),kw=Ze({delayDuration:{type:Number,default:300},defaultOpen:Boolean,open:{type:Boolean,default:void 0},onOpenChange:{type:Le(Function)},"onUpdate:open":{type:Le(Function)}}),zs={type:Le(Function)},$w=Ze({onBlur:zs,onClick:zs,onFocus:zs,onMouseDown:zs,onMouseEnter:zs,onMouseLeave:zs}),lme=Ze(Ke(Te(Te(Te(Te({},kw),ww),$w),Sw),{alwaysOn:Boolean,fullTransition:Boolean,transitionProps:{type:Le(Object),default:null},teleported:Boolean,to:{type:Le(String),default:"body"}})),sme={name:"ElTooltipV2Root"},ume=G(Ke(Te({},sme),{props:kw,setup(e,{expose:t}){const n=e,r=H(n.defaultOpen),a=H(null),o=x({get:()=>_U(n.open)?r.value:n.open,set:b=>{var C;r.value=b,(C=n["onUpdate:open"])==null||C.call(n,b)}}),i=x(()=>Yt(n.delayDuration)&&n.delayDuration>0),{start:l,stop:s}=gs(()=>{o.value=!0},x(()=>n.delayDuration),{immediate:!1}),c=De("tooltip-v2"),d=jC(),f=()=>{s(),o.value=!0},p=()=>{A(i)?l():f()},v=f,m=()=>{s(),o.value=!1};return ce(o,b=>{var C;b&&(document.dispatchEvent(new CustomEvent(oy)),v()),(C=n.onOpenChange)==null||C.call(n,b)}),et(()=>{document.addEventListener(oy,m)}),Lt(()=>{s(),document.removeEventListener(oy,m)}),ot(jm,{contentId:d,triggerRef:a,ns:c,onClose:m,onDelayOpen:p,onOpen:v}),t({onOpen:v,onClose:m}),(b,C)=>Oe(b.$slots,"default",{open:A(o)})}}));var cme=Ae(ume,[["__file","/home/runner/work/element-plus/element-plus/packages/components/tooltip-v2/src/root.vue"]]);const dme={name:"ElTooltipV2Arrow"},fme=G(Ke(Te({},dme),{props:Te(Te({},ww),ame),setup(e){const t=e,{ns:n}=ve(jm),{arrowRef:r}=ve(IE),a=x(()=>{const{style:o,width:i,height:l}=t,s=n.namespace.value;return Te({[`--${s}-tooltip-v2-arrow-width`]:`${i}px`,[`--${s}-tooltip-v2-arrow-height`]:`${l}px`,[`--${s}-tooltip-v2-arrow-border-width`]:`${i/2}px`,[`--${s}-tooltip-v2-arrow-cover-width`]:i/2-1},o||{})});return(o,i)=>(R(),X("span",{ref_key:"arrowRef",ref:r,style:Xe(A(a)),class:U(A(n).e("arrow"))},null,6))}}));var PO=Ae(fme,[["__file","/home/runner/work/element-plus/element-plus/packages/components/tooltip-v2/src/arrow.vue"]]);const hme=["data-side"],pme={name:"ElTooltipV2Content"},vme=G(Ke(Te({},pme),{props:Te(Te({},Sw),cI),setup(e){const t=e,{triggerRef:n,contentId:r}=ve(jm),a=H(t.placement),o=H(t.strategy),i=H(null),{referenceRef:l,contentRef:s,middlewareData:c,x:d,y:f,update:p}=BZ({placement:a,strategy:o,middleware:x(()=>{const w=[IZ(t.offset)];return t.showArrow&&w.push(VZ({arrowRef:i})),w})}),v=Pi().nextZIndex(),m=De("tooltip-v2"),y=x(()=>a.value.split("-")[0]),b=x(()=>({position:A(o),top:`${A(f)||0}px`,left:`${A(d)||0}px`,zIndex:v})),C=x(()=>{if(!t.showArrow)return{};const{arrow:w}=A(c);return{[`--${m.namespace.value}-tooltip-v2-arrow-x`]:`${w==null?void 0:w.x}px`||"",[`--${m.namespace.value}-tooltip-v2-arrow-y`]:`${w==null?void 0:w.y}px`||""}}),S=x(()=>[m.e("content"),m.is("dark",t.effect==="dark"),m.is(A(o)),t.contentClass]);return ce(i,()=>p()),ce(()=>t.placement,w=>a.value=w),et(()=>{ce(()=>t.reference||n.value,w=>{l.value=w||void 0},{immediate:!0})}),ot(IE,{arrowRef:i}),(w,k)=>(R(),X("div",{ref_key:"contentRef",ref:s,style:Xe(A(b)),"data-tooltip-v2-root":""},[w.nowrap?se("v-if",!0):(R(),X("div",{key:0,"data-side":A(y),class:U(A(S))},[Oe(w.$slots,"default",{contentStyle:A(b),contentClass:A(S)}),g(A(cM),{id:A(r),role:"tooltip"},{default:re(()=>[w.ariaLabel?(R(),X(Fe,{key:0},[yt(Me(w.ariaLabel),1)],2112)):Oe(w.$slots,"default",{key:1})]),_:3},8,["id"]),Oe(w.$slots,"arrow",{style:Xe(A(C)),side:A(y)})],10,hme))],4))}}));var TO=Ae(vme,[["__file","/home/runner/work/element-plus/element-plus/packages/components/tooltip-v2/src/content.vue"]]);const mme=Ze({setRef:{type:Le(Function),required:!0},onlyChild:Boolean});var gme=G({props:mme,setup(e,{slots:t}){const n=H(),r=FC(n,a=>{a?e.setRef(a.nextElementSibling):e.setRef(null)});return()=>{var a;const[o]=((a=t.default)==null?void 0:a.call(t))||[],i=e.onlyChild?ZX(o.children):o.children;return g(Fe,{ref:r},[i])}}});const yme={name:"ElTooltipV2Trigger"},bme=G(Ke(Te({},yme),{props:Te(Te({},cI),$w),setup(e){const t=e,{onClose:n,onOpen:r,onDelayOpen:a,triggerRef:o,contentId:i}=ve(jm);let l=!1;const s=S=>{o.value=S},c=()=>{l=!1},d=Tn(t.onMouseEnter,a),f=Tn(t.onMouseLeave,n),p=Tn(t.onMouseDown,()=>{n(),l=!0,document.addEventListener("mouseup",c,{once:!0})}),v=Tn(t.onFocus,()=>{l||r()}),m=Tn(t.onBlur,n),y=Tn(t.onClick,S=>{S.detail===0&&n()}),b={blur:m,click:y,focus:v,mousedown:p,mouseenter:d,mouseleave:f},C=(S,w,k)=>{S&&Object.entries(w).forEach(([$,O])=>{S[k]($,O)})};return ce(o,(S,w)=>{C(S,b,"addEventListener"),C(w,b,"removeEventListener"),S&&S.setAttribute("aria-describedby",i.value)}),Lt(()=>{C(o.value,b,"removeEventListener"),document.removeEventListener("mouseup",c)}),(S,w)=>S.nowrap?(R(),fe(A(gme),{key:0,"set-ref":s,"only-child":""},{default:re(()=>[Oe(S.$slots,"default")]),_:3})):(R(),X("button",hn({key:1,ref_key:"triggerRef",ref:o},S.$attrs),[Oe(S.$slots,"default")],16))}}));var Cme=Ae(bme,[["__file","/home/runner/work/element-plus/element-plus/packages/components/tooltip-v2/src/trigger.vue"]]);const wme={name:"ElTooltipV2"},Sme=G(Ke(Te({},wme),{props:lme,setup(e){const n=or(e),r=bt(Yi(n,Object.keys(ww))),a=bt(Yi(n,Object.keys(Sw))),o=bt(Yi(n,Object.keys(kw))),i=bt(Yi(n,Object.keys($w)));return(l,s)=>(R(),fe(cme,Za(ll(A(o))),{default:re(({open:c})=>[g(Cme,hn(A(i),{nowrap:""}),{default:re(()=>[Oe(l.$slots,"trigger")]),_:3},16),(R(),fe(Ps,{to:l.to,disabled:!l.teleported},[l.fullTransition?(R(),fe(Vn,Za(hn({key:0},l.transitionProps)),{default:re(()=>[l.alwaysOn||c?(R(),fe(TO,Za(hn({key:0},A(a))),{arrow:re(({style:d,side:f})=>[l.showArrow?(R(),fe(PO,hn({key:0},A(r),{style:d,side:f}),null,16,["style","side"])):se("v-if",!0)]),default:re(()=>[Oe(l.$slots,"default")]),_:3},16)):se("v-if",!0)]),_:2},1040)):(R(),X(Fe,{key:1},[l.alwaysOn||c?(R(),fe(TO,Za(hn({key:0},A(a))),{arrow:re(({style:d,side:f})=>[l.showArrow?(R(),fe(PO,hn({key:0},A(r),{style:d,side:f}),null,16,["style","side"])):se("v-if",!0)]),default:re(()=>[Oe(l.$slots,"default")]),_:3},16)):se("v-if",!0)],2112))],8,["to","disabled"]))]),_:3},16))}}));var kme=Ae(Sme,[["__file","/home/runner/work/element-plus/element-plus/packages/components/tooltip-v2/src/tooltip.vue"]]);const $me=xt(kme),W0="checked-change",Ome={data:{type:Array,default(){return[]}},optionRender:Function,placeholder:String,title:String,filterable:Boolean,format:Object,filterMethod:Function,defaultChecked:Array,props:Object},Pme=(e,t)=>{const{emit:n}=$t(),r=x(()=>e.props.label||"label"),a=x(()=>e.props.key||"key"),o=x(()=>e.props.disabled||"disabled"),i=x(()=>e.data.filter(p=>typeof e.filterMethod=="function"?e.filterMethod(t.query,p):(p[r.value]||p[a.value].toString()).toLowerCase().includes(t.query.toLowerCase()))),l=x(()=>i.value.filter(p=>!p[o.value])),s=x(()=>{const p=t.checked.length,v=e.data.length,{noChecked:m,hasChecked:y}=e.format;return m&&y?p>0?y.replace(/\${checked}/g,p.toString()).replace(/\${total}/g,v.toString()):m.replace(/\${total}/g,v.toString()):`${p}/${v}`}),c=x(()=>{const p=t.checked.length;return p>0&&p<l.value.length}),d=()=>{const p=l.value.map(v=>v[a.value]);t.allChecked=p.length>0&&p.every(v=>t.checked.includes(v))},f=p=>{t.checked=p?l.value.map(v=>v[a.value]):[]};return ce(()=>t.checked,(p,v)=>{if(d(),t.checkChangeByUser){const m=p.concat(v).filter(y=>!p.includes(y)||!v.includes(y));n(W0,p,m)}else n(W0,p),t.checkChangeByUser=!0}),ce(l,()=>{d()}),ce(()=>e.data,()=>{const p=[],v=i.value.map(m=>m[a.value]);t.checked.forEach(m=>{v.includes(m)&&p.push(m)}),t.checkChangeByUser=!1,t.checked=p}),ce(()=>e.defaultChecked,(p,v)=>{if(v&&p.length===v.length&&p.every(b=>v.includes(b)))return;const m=[],y=l.value.map(b=>b[a.value]);p.forEach(b=>{y.includes(b)&&m.push(b)}),t.checkChangeByUser=!1,t.checked=m},{immediate:!0}),{labelProp:r,keyProp:a,disabledProp:o,filteredData:i,checkableData:l,checkedSummary:s,isIndeterminate:c,updateAllChecked:d,handleAllCheckedChange:f}},Tme=G({name:"ElTransferPanel",components:{ElCheckboxGroup:zM,ElCheckbox:io,ElInput:Ra,OptionContent:({option:e})=>e},props:Ome,emits:[W0],setup(e,{slots:t}){const{t:n}=ln(),r=De("transfer"),a=bt({checked:[],allChecked:!1,query:"",inputHover:!1,checkChangeByUser:!0}),{labelProp:o,keyProp:i,disabledProp:l,filteredData:s,checkedSummary:c,isIndeterminate:d,handleAllCheckedChange:f}=Pme(e,a),p=x(()=>a.query.length>0&&s.value.length===0),v=x(()=>!!t.default()[0].children.length),{checked:m,allChecked:y,query:b,inputHover:C,checkChangeByUser:S}=or(a);return{ns:r,labelProp:o,keyProp:i,disabledProp:l,filteredData:s,checkedSummary:c,isIndeterminate:d,handleAllCheckedChange:f,checked:m,allChecked:y,query:b,inputHover:C,checkChangeByUser:S,hasNoMatch:p,SearchIcon:oX,hasFooter:v,t:n}}});function xme(e,t,n,r,a,o){const i=we("el-checkbox"),l=we("el-input"),s=we("option-content"),c=we("el-checkbox-group");return R(),X("div",{class:U(e.ns.b("panel"))},[Z("p",{class:U(e.ns.be("panel","header"))},[g(i,{modelValue:e.allChecked,"onUpdate:modelValue":t[0]||(t[0]=d=>e.allChecked=d),indeterminate:e.isIndeterminate,onChange:e.handleAllCheckedChange},{default:re(()=>[yt(Me(e.title)+" ",1),Z("span",null,Me(e.checkedSummary),1)]),_:1},8,["modelValue","indeterminate","onChange"])],2),Z("div",{class:U([e.ns.be("panel","body"),e.ns.is("with-footer",e.hasFooter)])},[e.filterable?(R(),fe(l,{key:0,modelValue:e.query,"onUpdate:modelValue":t[1]||(t[1]=d=>e.query=d),class:U(e.ns.be("panel","filter")),size:"default",placeholder:e.placeholder,"prefix-icon":e.SearchIcon,clearable:"",onMouseenter:t[2]||(t[2]=d=>e.inputHover=!0),onMouseleave:t[3]||(t[3]=d=>e.inputHover=!1)},null,8,["modelValue","class","placeholder","prefix-icon"])):se("v-if",!0),at(g(c,{modelValue:e.checked,"onUpdate:modelValue":t[4]||(t[4]=d=>e.checked=d),class:U([e.ns.is("filterable",e.filterable),e.ns.be("panel","list")])},{default:re(()=>[(R(!0),X(Fe,null,Rt(e.filteredData,d=>(R(),fe(i,{key:d[e.keyProp],class:U(e.ns.be("panel","item")),label:d[e.keyProp],disabled:d[e.disabledProp]},{default:re(()=>[g(s,{option:e.optionRender(d)},null,8,["option"])]),_:2},1032,["class","label","disabled"]))),128))]),_:1},8,["modelValue","class"]),[[_t,!e.hasNoMatch&&e.data.length>0]]),at(Z("p",{class:U(e.ns.be("panel","empty"))},Me(e.hasNoMatch?e.t("el.transfer.noMatch"):e.t("el.transfer.noData")),3),[[_t,e.hasNoMatch||e.data.length===0]])],2),e.hasFooter?(R(),X("p",{key:0,class:U(e.ns.be("panel","footer"))},[Oe(e.$slots,"default")],2)):se("v-if",!0)],2)}var _me=Ae(Tme,[["render",xme],["__file","/home/runner/work/element-plus/element-plus/packages/components/transfer/src/transfer-panel.vue"]]);const Eme=e=>{const t=x(()=>e.props.key),n=x(()=>e.data.reduce((o,i)=>(o[i[t.value]]=i)&&o,{})),r=x(()=>e.data.filter(o=>!e.modelValue.includes(o[t.value]))),a=x(()=>e.targetOrder==="original"?e.data.filter(o=>e.modelValue.includes(o[t.value])):e.modelValue.reduce((o,i)=>{const l=n.value[i];return l&&o.push(l),o},[]));return{propsKey:t,sourceData:r,targetData:a}},fI="left-check-change",hI="right-check-change",Mme=(e,t)=>({onSourceCheckedChange:(a,o)=>{e.leftChecked=a,o!==void 0&&t(fI,a,o)},onTargetCheckedChange:(a,o)=>{e.rightChecked=a,o!==void 0&&t(hI,a,o)}}),Ime=(e,t,n,r)=>{const a=(l,s,c)=>{r(Pt,l),r(ir,l,s,c)};return{addToLeft:()=>{const l=e.modelValue.slice();t.rightChecked.forEach(s=>{const c=l.indexOf(s);c>-1&&l.splice(c,1)}),a(l,"left",t.rightChecked)},addToRight:()=>{let l=e.modelValue.slice();const s=e.data.filter(c=>{const d=c[n.value];return t.leftChecked.includes(d)&&!e.modelValue.includes(d)}).map(c=>c[n.value]);l=e.targetOrder==="unshift"?s.concat(l):l.concat(s),e.targetOrder==="original"&&(l=e.data.filter(c=>l.includes(c[n.value])).map(c=>c[n.value])),a(l,"right",t.leftChecked)}}},Nme=G({name:"ElTransfer",components:{TransferPanel:_me,ElButton:xa,ElIcon:ft,ArrowLeft:Es,ArrowRight:Da},props:{data:{type:Array,default:()=>[]},titles:{type:Array,default:()=>[]},buttonTexts:{type:Array,default:()=>[]},filterPlaceholder:{type:String,default:""},filterMethod:Function,leftDefaultChecked:{type:Array,default:()=>[]},rightDefaultChecked:{type:Array,default:()=>[]},renderContent:Function,modelValue:{type:Array,default:()=>[]},format:{type:Object,default:()=>({})},filterable:{type:Boolean,default:!1},props:{type:Object,default:()=>({label:"label",key:"key",disabled:"disabled"})},targetOrder:{type:String,default:"original",validator:e=>["original","push","unshift"].includes(e)}},emits:[Pt,ir,fI,hI],setup(e,{emit:t,slots:n}){const{t:r}=ln(),a=De("transfer"),o=ve(Ia,{}),i=bt({leftChecked:[],rightChecked:[]}),{propsKey:l,sourceData:s,targetData:c}=Eme(e),{onSourceCheckedChange:d,onTargetCheckedChange:f}=Mme(i,t),{addToLeft:p,addToRight:v}=Ime(e,i,l,t),m=H(),y=H(),b=O=>{switch(O){case"left":m.value.query="";break;case"right":y.value.query="";break}},C=x(()=>e.buttonTexts.length===2),S=x(()=>e.titles[0]||r("el.transfer.titles.0")),w=x(()=>e.titles[1]||r("el.transfer.titles.1")),k=x(()=>e.filterPlaceholder||r("el.transfer.filterPlaceholder"));ce(()=>e.modelValue,()=>{var O;(O=o.validate)==null||O.call(o,"change").catch(T=>void 0)});const $=x(()=>O=>e.renderContent?e.renderContent(qe,O):n.default?n.default({option:O}):qe("span",O[e.props.label]||O[e.props.key]));return Ke(Te({ns:a,sourceData:s,targetData:c,onSourceCheckedChange:d,onTargetCheckedChange:f,addToLeft:p,addToRight:v},or(i)),{hasButtonTexts:C,leftPanelTitle:S,rightPanelTitle:w,panelFilterPlaceholder:k,clearQuery:b,leftPanel:m,rightPanel:y,optionRender:$})}}),Ame={key:0},Dme={key:0};function Rme(e,t,n,r,a,o){const i=we("transfer-panel"),l=we("arrow-left"),s=we("el-icon"),c=we("el-button"),d=we("arrow-right");return R(),X("div",{class:U(e.ns.b())},[g(i,{ref:"leftPanel",data:e.sourceData,"option-render":e.optionRender,placeholder:e.panelFilterPlaceholder,title:e.leftPanelTitle,filterable:e.filterable,format:e.format,"filter-method":e.filterMethod,"default-checked":e.leftDefaultChecked,props:e.props,onCheckedChange:e.onSourceCheckedChange},{default:re(()=>[Oe(e.$slots,"left-footer")]),_:3},8,["data","option-render","placeholder","title","filterable","format","filter-method","default-checked","props","onCheckedChange"]),Z("div",{class:U(e.ns.e("buttons"))},[g(c,{type:"primary",class:U([e.ns.e("button"),e.ns.is("with-texts",e.hasButtonTexts)]),disabled:e.rightChecked.length===0,onClick:e.addToLeft},{default:re(()=>[g(s,null,{default:re(()=>[g(l)]),_:1}),e.buttonTexts[0]!==void 0?(R(),X("span",Ame,Me(e.buttonTexts[0]),1)):se("v-if",!0)]),_:1},8,["class","disabled","onClick"]),g(c,{type:"primary",class:U([e.ns.e("button"),e.ns.is("with-texts",e.hasButtonTexts)]),disabled:e.leftChecked.length===0,onClick:e.addToRight},{default:re(()=>[e.buttonTexts[1]!==void 0?(R(),X("span",Dme,Me(e.buttonTexts[1]),1)):se("v-if",!0),g(s,null,{default:re(()=>[g(d)]),_:1})]),_:1},8,["class","disabled","onClick"])],2),g(i,{ref:"rightPanel",data:e.targetData,"option-render":e.optionRender,placeholder:e.panelFilterPlaceholder,filterable:e.filterable,format:e.format,"filter-method":e.filterMethod,title:e.rightPanelTitle,"default-checked":e.rightDefaultChecked,props:e.props,onCheckedChange:e.onTargetCheckedChange},{default:re(()=>[Oe(e.$slots,"right-footer")]),_:3},8,["data","option-render","placeholder","filterable","format","filter-method","title","default-checked","props","onCheckedChange"])],2)}var Ep=Ae(Nme,[["render",Rme],["__file","/home/runner/work/element-plus/element-plus/packages/components/transfer/src/index.vue"]]);Ep.install=e=>{e.component(Ep.name,Ep)};const Lme=Ep,Fme=Lme,_u="$treeNodeId",xO=function(e,t){!t||t[_u]||Object.defineProperty(t,_u,{value:e.id,enumerable:!1,configurable:!1,writable:!1})},Ow=function(e,t){return e?t[e]:t[_u]},U0=e=>{let t=!0,n=!0,r=!0;for(let a=0,o=e.length;a<o;a++){const i=e[a];(i.checked!==!0||i.indeterminate)&&(t=!1,i.disabled||(r=!1)),(i.checked!==!1||i.indeterminate)&&(n=!1)}return{all:t,none:n,allWithoutDisable:r,half:!t&&!n}},Mp=function(e){if(e.childNodes.length===0)return;const{all:t,none:n,half:r}=U0(e.childNodes);t?(e.checked=!0,e.indeterminate=!1):r?(e.checked=!1,e.indeterminate=!0):n&&(e.checked=!1,e.indeterminate=!1);const a=e.parent;!a||a.level===0||e.store.checkStrictly||Mp(a)},fh=function(e,t){const n=e.store.props,r=e.data||{},a=n[t];if(typeof a=="function")return a(r,e);if(typeof a=="string")return r[a];if(typeof a=="undefined"){const o=r[t];return o===void 0?"":o}};let Bme=0;class is{constructor(t){this.id=Bme++,this.text=null,this.checked=!1,this.indeterminate=!1,this.data=null,this.expanded=!1,this.parent=null,this.visible=!0,this.isCurrent=!1,this.canFocus=!1;for(const n in t)Mt(t,n)&&(this[n]=t[n]);this.level=0,this.loaded=!1,this.childNodes=[],this.loading=!1,this.parent&&(this.level=this.parent.level+1)}initialize(){const t=this.store;if(!t)throw new Error("[Node]store is required!");t.registerNode(this);const n=t.props;if(n&&typeof n.isLeaf!="undefined"){const o=fh(this,"isLeaf");typeof o=="boolean"&&(this.isLeafByUser=o)}if(t.lazy!==!0&&this.data?(this.setData(this.data),t.defaultExpandAll&&(this.expanded=!0,this.canFocus=!0)):this.level>0&&t.lazy&&t.defaultExpandAll&&this.expand(),Array.isArray(this.data)||xO(this,this.data),!this.data)return;const r=t.defaultExpandedKeys,a=t.key;a&&r&&r.includes(this.key)&&this.expand(null,t.autoExpandParent),a&&t.currentNodeKey!==void 0&&this.key===t.currentNodeKey&&(t.currentNode=this,t.currentNode.isCurrent=!0),t.lazy&&t._initDefaultCheckedNode(this),this.updateLeafState(),this.parent&&(this.level===1||this.parent.expanded===!0)&&(this.canFocus=!0)}setData(t){Array.isArray(t)||xO(this,t),this.data=t,this.childNodes=[];let n;this.level===0&&Array.isArray(this.data)?n=this.data:n=fh(this,"children")||[];for(let r=0,a=n.length;r<a;r++)this.insertChild({data:n[r]})}get label(){return fh(this,"label")}get key(){const t=this.store.key;return this.data?this.data[t]:null}get disabled(){return fh(this,"disabled")}get nextSibling(){const t=this.parent;if(t){const n=t.childNodes.indexOf(this);if(n>-1)return t.childNodes[n+1]}return null}get previousSibling(){const t=this.parent;if(t){const n=t.childNodes.indexOf(this);if(n>-1)return n>0?t.childNodes[n-1]:null}return null}contains(t,n=!0){return(this.childNodes||[]).some(r=>r===t||n&&r.contains(t))}remove(){const t=this.parent;t&&t.removeChild(this)}insertChild(t,n,r){if(!t)throw new Error("InsertChild error: child is required.");if(!(t instanceof is)){if(!r){const a=this.getChildren(!0);a.includes(t.data)||(typeof n=="undefined"||n<0?a.push(t.data):a.splice(n,0,t.data))}Object.assign(t,{parent:this,store:this.store}),t=bt(new is(t)),t instanceof is&&t.initialize()}t.level=this.level+1,typeof n=="undefined"||n<0?this.childNodes.push(t):this.childNodes.splice(n,0,t),this.updateLeafState()}insertBefore(t,n){let r;n&&(r=this.childNodes.indexOf(n)),this.insertChild(t,r)}insertAfter(t,n){let r;n&&(r=this.childNodes.indexOf(n),r!==-1&&(r+=1)),this.insertChild(t,r)}removeChild(t){const n=this.getChildren()||[],r=n.indexOf(t.data);r>-1&&n.splice(r,1);const a=this.childNodes.indexOf(t);a>-1&&(this.store&&this.store.deregisterNode(t),t.parent=null,this.childNodes.splice(a,1)),this.updateLeafState()}removeChildByData(t){let n=null;for(let r=0;r<this.childNodes.length;r++)if(this.childNodes[r].data===t){n=this.childNodes[r];break}n&&this.removeChild(n)}expand(t,n){const r=()=>{if(n){let a=this.parent;for(;a.level>0;)a.expanded=!0,a=a.parent}this.expanded=!0,t&&t(),this.childNodes.forEach(a=>{a.canFocus=!0})};this.shouldLoadData()?this.loadData(a=>{Array.isArray(a)&&(this.checked?this.setChecked(!0,!0):this.store.checkStrictly||Mp(this),r())}):r()}doCreateChildren(t,n={}){t.forEach(r=>{this.insertChild(Object.assign({data:r},n),void 0,!0)})}collapse(){this.expanded=!1,this.childNodes.forEach(t=>{t.canFocus=!1})}shouldLoadData(){return this.store.lazy===!0&&this.store.load&&!this.loaded}updateLeafState(){if(this.store.lazy===!0&&this.loaded!==!0&&typeof this.isLeafByUser!="undefined"){this.isLeaf=this.isLeafByUser;return}const t=this.childNodes;if(!this.store.lazy||this.store.lazy===!0&&this.loaded===!0){this.isLeaf=!t||t.length===0;return}this.isLeaf=!1}setChecked(t,n,r,a){if(this.indeterminate=t==="half",this.checked=t===!0,this.store.checkStrictly)return;if(!(this.shouldLoadData()&&!this.store.checkDescendants)){const{all:i,allWithoutDisable:l}=U0(this.childNodes);!this.isLeaf&&!i&&l&&(this.checked=!1,t=!1);const s=()=>{if(n){const c=this.childNodes;for(let p=0,v=c.length;p<v;p++){const m=c[p];a=a||t!==!1;const y=m.disabled?m.checked:a;m.setChecked(y,n,!0,a)}const{half:d,all:f}=U0(c);f||(this.checked=f,this.indeterminate=d)}};if(this.shouldLoadData()){this.loadData(()=>{s(),Mp(this)},{checked:t!==!1});return}else s()}const o=this.parent;!o||o.level===0||r||Mp(o)}getChildren(t=!1){if(this.level===0)return this.data;const n=this.data;if(!n)return null;const r=this.store.props;let a="children";return r&&(a=r.children||"children"),n[a]===void 0&&(n[a]=null),t&&!n[a]&&(n[a]=[]),n[a]}updateChildren(){const t=this.getChildren()||[],n=this.childNodes.map(o=>o.data),r={},a=[];t.forEach((o,i)=>{const l=o[_u];!!l&&n.findIndex(c=>c[_u]===l)>=0?r[l]={index:i,data:o}:a.push({index:i,data:o})}),this.store.lazy||n.forEach(o=>{r[o[_u]]||this.removeChildByData(o)}),a.forEach(({index:o,data:i})=>{this.insertChild({data:i},o)}),this.updateLeafState()}loadData(t,n={}){if(this.store.lazy===!0&&this.store.load&&!this.loaded&&(!this.loading||Object.keys(n).length)){this.loading=!0;const r=a=>{this.loaded=!0,this.loading=!1,this.childNodes=[],this.doCreateChildren(a,n),this.updateLeafState(),t&&t.call(this,a)};this.store.load(this,r)}else t&&t.call(this)}}class Vme{constructor(t){this.currentNode=null,this.currentNodeKey=null;for(const n in t)Mt(t,n)&&(this[n]=t[n]);this.nodesMap={}}initialize(){this.root=new is({data:this.data,store:this}),this.root.initialize(),this.lazy&&this.load?this.load(this.root,n=>{this.root.doCreateChildren(n),this._initDefaultCheckedNodes()}):this._initDefaultCheckedNodes()}filter(t){const n=this.filterNodeMethod,r=this.lazy,a=function(o){const i=o.root?o.root.childNodes:o.childNodes;if(i.forEach(l=>{l.visible=n.call(l,t,l.data,l),a(l)}),!o.visible&&i.length){let l=!0;l=!i.some(s=>s.visible),o.root?o.root.visible=l===!1:o.visible=l===!1}!t||o.visible&&!o.isLeaf&&!r&&o.expand()};a(this)}setData(t){t!==this.root.data?(this.root.setData(t),this._initDefaultCheckedNodes()):this.root.updateChildren()}getNode(t){if(t instanceof is)return t;const n=typeof t!="object"?t:Ow(this.key,t);return this.nodesMap[n]||null}insertBefore(t,n){const r=this.getNode(n);r.parent.insertBefore({data:t},r)}insertAfter(t,n){const r=this.getNode(n);r.parent.insertAfter({data:t},r)}remove(t){const n=this.getNode(t);n&&n.parent&&(n===this.currentNode&&(this.currentNode=null),n.parent.removeChild(n))}append(t,n){const r=n?this.getNode(n):this.root;r&&r.insertChild({data:t})}_initDefaultCheckedNodes(){const t=this.defaultCheckedKeys||[],n=this.nodesMap;t.forEach(r=>{const a=n[r];a&&a.setChecked(!0,!this.checkStrictly)})}_initDefaultCheckedNode(t){(this.defaultCheckedKeys||[]).includes(t.key)&&t.setChecked(!0,!this.checkStrictly)}setDefaultCheckedKey(t){t!==this.defaultCheckedKeys&&(this.defaultCheckedKeys=t,this._initDefaultCheckedNodes())}registerNode(t){const n=this.key;!t||!t.data||(n?t.key!==void 0&&(this.nodesMap[t.key]=t):this.nodesMap[t.id]=t)}deregisterNode(t){!this.key||!t||!t.data||(t.childNodes.forEach(r=>{this.deregisterNode(r)}),delete this.nodesMap[t.key])}getCheckedNodes(t=!1,n=!1){const r=[],a=function(o){(o.root?o.root.childNodes:o.childNodes).forEach(l=>{(l.checked||n&&l.indeterminate)&&(!t||t&&l.isLeaf)&&r.push(l.data),a(l)})};return a(this),r}getCheckedKeys(t=!1){return this.getCheckedNodes(t).map(n=>(n||{})[this.key])}getHalfCheckedNodes(){const t=[],n=function(r){(r.root?r.root.childNodes:r.childNodes).forEach(o=>{o.indeterminate&&t.push(o.data),n(o)})};return n(this),t}getHalfCheckedKeys(){return this.getHalfCheckedNodes().map(t=>(t||{})[this.key])}_getAllNodes(){const t=[],n=this.nodesMap;for(const r in n)Mt(n,r)&&t.push(n[r]);return t}updateChildren(t,n){const r=this.nodesMap[t];if(!r)return;const a=r.childNodes;for(let o=a.length-1;o>=0;o--){const i=a[o];this.remove(i.data)}for(let o=0,i=n.length;o<i;o++){const l=n[o];this.append(l,r.data)}}_setCheckedKeys(t,n=!1,r){const a=this._getAllNodes().sort((l,s)=>s.level-l.level),o=Object.create(null),i=Object.keys(r);a.forEach(l=>l.setChecked(!1,!1));for(let l=0,s=a.length;l<s;l++){const c=a[l],d=c.data[t].toString();if(!i.includes(d)){c.checked&&!o[d]&&c.setChecked(!1,!1);continue}let p=c.parent;for(;p&&p.level>0;)o[p.data[t]]=!0,p=p.parent;if(c.isLeaf||this.checkStrictly){c.setChecked(!0,!1);continue}if(c.setChecked(!0,!0),n){c.setChecked(!1,!1);const v=function(m){m.childNodes.forEach(b=>{b.isLeaf||b.setChecked(!1,!1),v(b)})};v(c)}}}setCheckedNodes(t,n=!1){const r=this.key,a={};t.forEach(o=>{a[(o||{})[r]]=!0}),this._setCheckedKeys(r,n,a)}setCheckedKeys(t,n=!1){this.defaultCheckedKeys=t;const r=this.key,a={};t.forEach(o=>{a[o]=!0}),this._setCheckedKeys(r,n,a)}setDefaultExpandedKeys(t){t=t||[],this.defaultExpandedKeys=t,t.forEach(n=>{const r=this.getNode(n);r&&r.expand(null,this.autoExpandParent)})}setChecked(t,n,r){const a=this.getNode(t);a&&a.setChecked(!!n,r)}getCurrentNode(){return this.currentNode}setCurrentNode(t){const n=this.currentNode;n&&(n.isCurrent=!1),this.currentNode=t,this.currentNode.isCurrent=!0}setUserCurrentNode(t,n=!0){const r=t[this.key],a=this.nodesMap[r];this.setCurrentNode(a),n&&this.currentNode.level>1&&this.currentNode.parent.expand(null,!0)}setCurrentNodeKey(t,n=!0){if(t==null){this.currentNode&&(this.currentNode.isCurrent=!1),this.currentNode=null;return}const r=this.getNode(t);r&&(this.setCurrentNode(r),n&&this.currentNode.level>1&&this.currentNode.parent.expand(null,!0))}}const zme=G({name:"ElTreeNodeContent",props:{node:{type:Object,required:!0},renderContent:Function},setup(e){const t=De("tree"),n=ve("NodeInstance"),r=ve("RootTree");return()=>{const a=e.node,{data:o,store:i}=a;return e.renderContent?e.renderContent(qe,{_self:n,node:a,data:o,store:i}):r.ctx.slots.default?r.ctx.slots.default({node:a,data:o}):qe("span",{class:t.be("node","label")},[a.label])}}});var Hme=Ae(zme,[["__file","/home/runner/work/element-plus/element-plus/packages/components/tree/src/tree-node-content.vue"]]);function pI(e){const t=ve("TreeNodeMap",null),n={treeNodeExpand:r=>{e.node!==r&&e.node.collapse()},children:[]};return t&&t.children.push(n),ot("TreeNodeMap",n),{broadcastExpanded:r=>{if(!!e.accordion)for(const a of n.children)a.treeNodeExpand(r)}}}const vI=Symbol("dragEvents");function jme({props:e,ctx:t,el$:n,dropIndicator$:r,store:a}){const o=De("tree"),i=H({showDropIndicator:!1,draggingNode:null,dropNode:null,allowDrop:!0,dropType:null});return ot(vI,{treeNodeDragStart:({event:d,treeNode:f})=>{if(typeof e.allowDrag=="function"&&!e.allowDrag(f.node))return d.preventDefault(),!1;d.dataTransfer.effectAllowed="move";try{d.dataTransfer.setData("text/plain","")}catch{}i.value.draggingNode=f,t.emit("node-drag-start",f.node,d)},treeNodeDragOver:({event:d,treeNode:f})=>{const p=f,v=i.value.dropNode;v&&v!==p&&Br(v.$el,o.is("drop-inner"));const m=i.value.draggingNode;if(!m||!p)return;let y=!0,b=!0,C=!0,S=!0;typeof e.allowDrop=="function"&&(y=e.allowDrop(m.node,p.node,"prev"),S=b=e.allowDrop(m.node,p.node,"inner"),C=e.allowDrop(m.node,p.node,"next")),d.dataTransfer.dropEffect=b||y||C?"move":"none",(y||b||C)&&v!==p&&(v&&t.emit("node-drag-leave",m.node,v.node,d),t.emit("node-drag-enter",m.node,p.node,d)),(y||b||C)&&(i.value.dropNode=p),p.node.nextSibling===m.node&&(C=!1),p.node.previousSibling===m.node&&(y=!1),p.node.contains(m.node,!1)&&(b=!1),(m.node===p.node||m.node.contains(p.node))&&(y=!1,b=!1,C=!1);const w=p.$el.getBoundingClientRect(),k=n.value.getBoundingClientRect();let $;const O=y?b?.25:C?.45:1:-1,T=C?b?.75:y?.55:0:1;let _=-9999;const I=d.clientY-w.top;I<w.height*O?$="before":I>w.height*T?$="after":b?$="inner":$="none";const L=p.$el.querySelector(`.${o.be("node","expand-icon")}`).getBoundingClientRect(),j=r.value;$==="before"?_=L.top-k.top:$==="after"&&(_=L.bottom-k.top),j.style.top=`${_}px`,j.style.left=`${L.right-k.left}px`,$==="inner"?xo(p.$el,o.is("drop-inner")):Br(p.$el,o.is("drop-inner")),i.value.showDropIndicator=$==="before"||$==="after",i.value.allowDrop=i.value.showDropIndicator||S,i.value.dropType=$,t.emit("node-drag-over",m.node,p.node,d)},treeNodeDragEnd:d=>{const{draggingNode:f,dropType:p,dropNode:v}=i.value;if(d.preventDefault(),d.dataTransfer.dropEffect="move",f&&v){const m={data:f.node.data};p!=="none"&&f.node.remove(),p==="before"?v.node.parent.insertBefore(m,v.node):p==="after"?v.node.parent.insertAfter(m,v.node):p==="inner"&&v.node.insertChild(m),p!=="none"&&a.value.registerNode(m),Br(v.$el,o.is("drop-inner")),t.emit("node-drag-end",f.node,v.node,p,d),p!=="none"&&t.emit("node-drop",f.node,v.node,p,d)}f&&!v&&t.emit("node-drag-end",f.node,null,p,d),i.value.showDropIndicator=!1,i.value.draggingNode=null,i.value.dropNode=null,i.value.allowDrop=!0}}),{dragState:i}}const Kme=G({name:"ElTreeNode",components:{ElCollapseTransition:Xm,ElCheckbox:io,NodeContent:Hme,ElIcon:ft,Loading:gl},props:{node:{type:is,default:()=>({})},props:{type:Object,default:()=>({})},accordion:Boolean,renderContent:Function,renderAfterExpand:Boolean,showCheckbox:{type:Boolean,default:!1}},emits:["node-expand"],setup(e,t){const n=De("tree"),{broadcastExpanded:r}=pI(e),a=ve("RootTree"),o=H(!1),i=H(!1),l=H(null),s=H(null),c=H(null),d=ve(vI),f=$t();ot("NodeInstance",f),e.node.expanded&&(o.value=!0,i.value=!0);const p=a.props.children||"children";ce(()=>{const I=e.node.data[p];return I&&[...I]},()=>{e.node.updateChildren()}),ce(()=>e.node.indeterminate,I=>{y(e.node.checked,I)}),ce(()=>e.node.checked,I=>{y(I,e.node.indeterminate)}),ce(()=>e.node.expanded,I=>{Ne(()=>o.value=I),I&&(i.value=!0)});const v=I=>Ow(a.props.nodeKey,I.data),m=I=>{const L=e.props.class;if(!L)return{};let j;if(Ct(L)){const{data:F}=I;j=L(F,I)}else j=L;return wt(j)?{[j]:!0}:j},y=(I,L)=>{(l.value!==I||s.value!==L)&&a.ctx.emit("check-change",e.node.data,I,L),l.value=I,s.value=L},b=I=>{const L=a.store.value;L.setCurrentNode(e.node),a.ctx.emit("current-change",L.currentNode?L.currentNode.data:null,L.currentNode),a.currentNode.value=e.node,a.props.expandOnClickNode&&S(),a.props.checkOnClickNode&&!e.node.disabled&&w(null,{target:{checked:!e.node.checked}}),a.ctx.emit("node-click",e.node.data,e.node,f,I)},C=I=>{a.instance.vnode.props.onNodeContextmenu&&(I.stopPropagation(),I.preventDefault()),a.ctx.emit("node-contextmenu",I,e.node.data,e.node,f)},S=()=>{e.node.isLeaf||(o.value?(a.ctx.emit("node-collapse",e.node.data,e.node,f),e.node.collapse()):(e.node.expand(),t.emit("node-expand",e.node.data,e.node,f)))},w=(I,L)=>{e.node.setChecked(L.target.checked,!a.props.checkStrictly),Ne(()=>{const j=a.store.value;a.ctx.emit("check",e.node.data,{checkedNodes:j.getCheckedNodes(),checkedKeys:j.getCheckedKeys(),halfCheckedNodes:j.getHalfCheckedNodes(),halfCheckedKeys:j.getHalfCheckedKeys()})})};return{ns:n,node$:c,tree:a,expanded:o,childNodeRendered:i,oldChecked:l,oldIndeterminate:s,getNodeKey:v,getNodeClass:m,handleSelectChange:y,handleClick:b,handleContextMenu:C,handleExpandIconClick:S,handleCheckChange:w,handleChildNodeExpand:(I,L,j)=>{r(L),a.ctx.emit("node-expand",I,L,j)},handleDragStart:I=>{!a.props.draggable||d.treeNodeDragStart({event:I,treeNode:e})},handleDragOver:I=>{I.preventDefault(),a.props.draggable&&d.treeNodeDragOver({event:I,treeNode:{$el:c.value,node:e.node}})},handleDrop:I=>{I.preventDefault()},handleDragEnd:I=>{!a.props.draggable||d.treeNodeDragEnd(I)},CaretRight:sE}}}),Wme=["aria-expanded","aria-disabled","aria-checked","draggable","data-key"],Ume=["aria-expanded"];function Yme(e,t,n,r,a,o){const i=we("el-icon"),l=we("el-checkbox"),s=we("loading"),c=we("node-content"),d=we("el-tree-node"),f=we("el-collapse-transition");return at((R(),X("div",{ref:"node$",class:U([e.ns.b("node"),e.ns.is("expanded",e.expanded),e.ns.is("current",e.node.isCurrent),e.ns.is("hidden",!e.node.visible),e.ns.is("focusable",!e.node.disabled),e.ns.is("checked",!e.node.disabled&&e.node.checked),e.getNodeClass(e.node)]),role:"treeitem",tabindex:"-1","aria-expanded":e.expanded,"aria-disabled":e.node.disabled,"aria-checked":e.node.checked,draggable:e.tree.props.draggable,"data-key":e.getNodeKey(e.node),onClick:t[1]||(t[1]=dt((...p)=>e.handleClick&&e.handleClick(...p),["stop"])),onContextmenu:t[2]||(t[2]=(...p)=>e.handleContextMenu&&e.handleContextMenu(...p)),onDragstart:t[3]||(t[3]=dt((...p)=>e.handleDragStart&&e.handleDragStart(...p),["stop"])),onDragover:t[4]||(t[4]=dt((...p)=>e.handleDragOver&&e.handleDragOver(...p),["stop"])),onDragend:t[5]||(t[5]=dt((...p)=>e.handleDragEnd&&e.handleDragEnd(...p),["stop"])),onDrop:t[6]||(t[6]=dt((...p)=>e.handleDrop&&e.handleDrop(...p),["stop"]))},[Z("div",{class:U(e.ns.be("node","content")),style:Xe({paddingLeft:(e.node.level-1)*e.tree.props.indent+"px"})},[e.tree.props.icon||e.CaretRight?(R(),fe(i,{key:0,class:U([e.ns.be("node","expand-icon"),e.ns.is("leaf",e.node.isLeaf),{expanded:!e.node.isLeaf&&e.expanded}]),onClick:dt(e.handleExpandIconClick,["stop"])},{default:re(()=>[(R(),fe(Kt(e.tree.props.icon||e.CaretRight)))]),_:1},8,["class","onClick"])):se("v-if",!0),e.showCheckbox?(R(),fe(l,{key:1,"model-value":e.node.checked,indeterminate:e.node.indeterminate,disabled:!!e.node.disabled,onClick:t[0]||(t[0]=dt(()=>{},["stop"])),onChange:e.handleCheckChange},null,8,["model-value","indeterminate","disabled","onChange"])):se("v-if",!0),e.node.loading?(R(),fe(i,{key:2,class:U([e.ns.be("node","loading-icon"),e.ns.is("loading")])},{default:re(()=>[g(s)]),_:1},8,["class"])):se("v-if",!0),g(c,{node:e.node,"render-content":e.renderContent},null,8,["node","render-content"])],6),g(f,null,{default:re(()=>[!e.renderAfterExpand||e.childNodeRendered?at((R(),X("div",{key:0,class:U(e.ns.be("node","children")),role:"group","aria-expanded":e.expanded},[(R(!0),X(Fe,null,Rt(e.node.childNodes,p=>(R(),fe(d,{key:e.getNodeKey(p),"render-content":e.renderContent,"render-after-expand":e.renderAfterExpand,"show-checkbox":e.showCheckbox,node:p,accordion:e.accordion,props:e.props,onNodeExpand:e.handleChildNodeExpand},null,8,["render-content","render-after-expand","show-checkbox","node","accordion","props","onNodeExpand"]))),128))],10,Ume)),[[_t,e.expanded]]):se("v-if",!0)]),_:1})],42,Wme)),[[_t,e.node.visible]])}var qme=Ae(Kme,[["render",Yme],["__file","/home/runner/work/element-plus/element-plus/packages/components/tree/src/tree-node.vue"]]);function Gme({el$:e},t){const n=De("tree"),r=Qn([]),a=Qn([]);et(()=>{i(),gn(e.value,"keydown",o)}),Lt(()=>{Bn(e.value,"keydown",o)}),ur(()=>{r.value=Array.from(e.value.querySelectorAll("[role=treeitem]")),a.value=Array.from(e.value.querySelectorAll("input[type=checkbox]"))}),ce(a,l=>{l.forEach(s=>{s.setAttribute("tabindex","-1")})});const o=l=>{const s=l.target;if(!s.className.includes(n.b("node")))return;const c=l.code;r.value=Array.from(e.value.querySelectorAll(`.${n.is("focusable")}[role=treeitem]`));const d=r.value.indexOf(s);let f;if([Ge.up,Ge.down].includes(c)){if(l.preventDefault(),c===Ge.up){f=d===-1?0:d!==0?d-1:r.value.length-1;const v=f;for(;!t.value.getNode(r.value[f].dataset.key).canFocus;){if(f--,f===v){f=-1;break}f<0&&(f=r.value.length-1)}}else{f=d===-1?0:d<r.value.length-1?d+1:0;const v=f;for(;!t.value.getNode(r.value[f].dataset.key).canFocus;){if(f++,f===v){f=-1;break}f>=r.value.length&&(f=0)}}f!==-1&&r.value[f].focus()}[Ge.left,Ge.right].includes(c)&&(l.preventDefault(),s.click());const p=s.querySelector('[type="checkbox"]');[Ge.enter,Ge.space].includes(c)&&p&&(l.preventDefault(),p.click())},i=()=>{var l;r.value=Array.from(e.value.querySelectorAll(`.${n.is("focusable")}[role=treeitem]`)),a.value=Array.from(e.value.querySelectorAll("input[type=checkbox]"));const s=e.value.querySelectorAll(`.${n.is("checked")}[role=treeitem]`);if(s.length){s[0].setAttribute("tabindex","0");return}(l=r.value[0])==null||l.setAttribute("tabindex","0")}}const Xme=G({name:"ElTree",components:{ElTreeNode:qme},props:{data:{type:Array,default:()=>[]},emptyText:{type:String},renderAfterExpand:{type:Boolean,default:!0},nodeKey:String,checkStrictly:Boolean,defaultExpandAll:Boolean,expandOnClickNode:{type:Boolean,default:!0},checkOnClickNode:Boolean,checkDescendants:{type:Boolean,default:!1},autoExpandParent:{type:Boolean,default:!0},defaultCheckedKeys:Array,defaultExpandedKeys:Array,currentNodeKey:[String,Number],renderContent:Function,showCheckbox:{type:Boolean,default:!1},draggable:{type:Boolean,default:!1},allowDrag:Function,allowDrop:Function,props:{type:Object,default:()=>({children:"children",label:"label",disabled:"disabled"})},lazy:{type:Boolean,default:!1},highlightCurrent:Boolean,load:Function,filterNodeMethod:Function,accordion:Boolean,indent:{type:Number,default:18},icon:[String,Object]},emits:["check-change","current-change","node-click","node-contextmenu","node-collapse","node-expand","check","node-drag-start","node-drag-end","node-drop","node-drag-leave","node-drag-enter","node-drag-over"],setup(e,t){const{t:n}=ln(),r=De("tree"),a=H(new Vme({key:e.nodeKey,data:e.data,lazy:e.lazy,props:e.props,load:e.load,currentNodeKey:e.currentNodeKey,checkStrictly:e.checkStrictly,checkDescendants:e.checkDescendants,defaultCheckedKeys:e.defaultCheckedKeys,defaultExpandedKeys:e.defaultExpandedKeys,autoExpandParent:e.autoExpandParent,defaultExpandAll:e.defaultExpandAll,filterNodeMethod:e.filterNodeMethod}));a.value.initialize();const o=H(a.value.root),i=H(null),l=H(null),s=H(null),{broadcastExpanded:c}=pI(e),{dragState:d}=jme({props:e,ctx:t,el$:l,dropIndicator$:s,store:a});Gme({el$:l},a);const f=x(()=>{const{childNodes:M}=o.value;return!M||M.length===0||M.every(({visible:E})=>!E)});ce(()=>e.defaultCheckedKeys,M=>{a.value.setDefaultCheckedKey(M)}),ce(()=>e.defaultExpandedKeys,M=>{a.value.setDefaultExpandedKeys(M)}),ce(()=>e.data,M=>{a.value.setData(M)},{deep:!0}),ce(()=>e.checkStrictly,M=>{a.value.checkStrictly=M});const p=M=>{if(!e.filterNodeMethod)throw new Error("[Tree] filterNodeMethod is required when filter");a.value.filter(M)},v=M=>Ow(e.nodeKey,M.data),m=M=>{if(!e.nodeKey)throw new Error("[Tree] nodeKey is required in getNodePath");const E=a.value.getNode(M);if(!E)return[];const K=[E.data];let W=E.parent;for(;W&&W!==o.value;)K.push(W.data),W=W.parent;return K.reverse()},y=(M,E)=>a.value.getCheckedNodes(M,E),b=M=>a.value.getCheckedKeys(M),C=()=>{const M=a.value.getCurrentNode();return M?M.data:null},S=()=>{if(!e.nodeKey)throw new Error("[Tree] nodeKey is required in getCurrentKey");const M=C();return M?M[e.nodeKey]:null},w=(M,E)=>{if(!e.nodeKey)throw new Error("[Tree] nodeKey is required in setCheckedNodes");a.value.setCheckedNodes(M,E)},k=(M,E)=>{if(!e.nodeKey)throw new Error("[Tree] nodeKey is required in setCheckedKeys");a.value.setCheckedKeys(M,E)},$=(M,E,K)=>{a.value.setChecked(M,E,K)},O=()=>a.value.getHalfCheckedNodes(),T=()=>a.value.getHalfCheckedKeys(),_=(M,E=!0)=>{if(!e.nodeKey)throw new Error("[Tree] nodeKey is required in setCurrentNode");a.value.setUserCurrentNode(M,E)},I=(M,E=!0)=>{if(!e.nodeKey)throw new Error("[Tree] nodeKey is required in setCurrentKey");a.value.setCurrentNodeKey(M,E)},L=M=>a.value.getNode(M),j=M=>{a.value.remove(M)},F=(M,E)=>{a.value.append(M,E)},N=(M,E)=>{a.value.insertBefore(M,E)},D=(M,E)=>{a.value.insertAfter(M,E)},z=(M,E,K)=>{c(E),t.emit("node-expand",M,E,K)},B=(M,E)=>{if(!e.nodeKey)throw new Error("[Tree] nodeKey is required in updateKeyChild");a.value.updateChildren(M,E)};return ot("RootTree",{ctx:t,props:e,store:a,root:o,currentNode:i,instance:$t()}),{ns:r,store:a,root:o,currentNode:i,dragState:d,el$:l,dropIndicator$:s,isEmpty:f,filter:p,getNodeKey:v,getNodePath:m,getCheckedNodes:y,getCheckedKeys:b,getCurrentNode:C,getCurrentKey:S,setCheckedNodes:w,setCheckedKeys:k,setChecked:$,getHalfCheckedNodes:O,getHalfCheckedKeys:T,setCurrentNode:_,setCurrentKey:I,t:n,getNode:L,remove:j,append:F,insertBefore:N,insertAfter:D,handleNodeExpand:z,updateKeyChildren:B}}});function Zme(e,t,n,r,a,o){var i;const l=we("el-tree-node");return R(),X("div",{ref:"el$",class:U([e.ns.b(),e.ns.is("dragging",!!e.dragState.draggingNode),e.ns.is("drop-not-allow",!e.dragState.allowDrop),e.ns.is("drop-inner",e.dragState.dropType==="inner"),{[e.ns.m("highlight-current")]:e.highlightCurrent}]),role:"tree"},[(R(!0),X(Fe,null,Rt(e.root.childNodes,s=>(R(),fe(l,{key:e.getNodeKey(s),node:s,props:e.props,accordion:e.accordion,"render-after-expand":e.renderAfterExpand,"show-checkbox":e.showCheckbox,"render-content":e.renderContent,onNodeExpand:e.handleNodeExpand},null,8,["node","props","accordion","render-after-expand","show-checkbox","render-content","onNodeExpand"]))),128)),e.isEmpty?(R(),X("div",{key:0,class:U(e.ns.e("empty-block"))},[Z("span",{class:U(e.ns.e("empty-text"))},Me((i=e.emptyText)!=null?i:e.t("el.tree.emptyText")),3)],2)):se("v-if",!0),at(Z("div",{ref:"dropIndicator$",class:U(e.ns.e("drop-indicator"))},null,2),[[_t,e.dragState.showDropIndicator]])],2)}var Ip=Ae(Xme,[["render",Zme],["__file","/home/runner/work/element-plus/element-plus/packages/components/tree/src/tree.vue"]]);Ip.install=e=>{e.component(Ip.name,Ip)};const Ov=Ip,Jme=Ov,Qme=(e,{attrs:t},{tree:n,key:r})=>{const a=De("tree-select"),o=Ke(Te(Te({},Yi(or(e),Object.keys(bs.props))),t),{valueKey:r,popperClass:x(()=>{const i=[a.e("popper")];return e.popperClass&&i.push(e.popperClass),i.join(" ")}),filterMethod:(i="")=>{e.filterMethod&&e.filterMethod(i),Ne(()=>{var l;(l=n.value)==null||l.filter(i)})},onVisibleChange:i=>{var l;(l=t.onVisibleChange)==null||l.call(t,i),e.filterable&&i&&o.filterMethod()}});return o},ege=G({extends:Cv,setup(e,t){const n=Cv.setup(e,t);return delete n.selectOptionClick,n},methods:{selectOptionClick(){this.$el.parentElement.click()}}}),tge=(e,{attrs:t,slots:n,emit:r},{select:a,tree:o,key:i})=>{ce(()=>e.modelValue,()=>{e.showCheckbox&&Ne(()=>{const c=o.value;c&&!er(c.getCheckedKeys(),Py(e.modelValue))&&c.setCheckedKeys(Py(e.modelValue))})},{immediate:!0,deep:!0});const l=x(()=>Te({value:i.value},e.props)),s=(c,d)=>{var f;const p=l.value[c];return Ct(p)?p(d,(f=o.value)==null?void 0:f.getNode(s("value",d))):d[p]};return Ke(Te(Te({},Yi(or(e),Object.keys(Ov.props))),t),{nodeKey:i,defaultExpandedKeys:x(()=>e.defaultExpandedKeys?e.defaultExpandedKeys.concat(e.modelValue):Py(e.modelValue)),renderContent:(c,{node:d,data:f,store:p})=>c(ege,{value:s("value",f),label:s("label",f),disabled:s("disabled",f)},e.renderContent?()=>e.renderContent(c,{node:d,data:f,store:p}):n.default?()=>n.default({node:d,data:f,store:p}):void 0),filterNodeMethod:(c,d,f)=>{var p;return e.filterNodeMethod?e.filterNodeMethod(c,d,f):c?(p=s("label",d))==null?void 0:p.includes(c):!0},onNodeClick:(c,d,f)=>{var p,v,m;if((p=t.onNodeClick)==null||p.call(t,c,d,f),e.checkStrictly||d.isLeaf){if(!s("disabled",c)){const y=(v=a.value)==null?void 0:v.options.get(s("value",c));(m=a.value)==null||m.handleOptionSelect(y,!0)}}else f.ctx.handleExpandIconClick()},onCheck:(c,d)=>{var f,p;(f=t.onCheck)==null||f.call(t,c,d);const v=e.checkStrictly?d.checkedKeys:(p=o.value)==null?void 0:p.getCheckedKeys(!0),m=s("value",c);r(Pt,e.multiple?v:v.includes(m)?m:void 0)}})};function Py(e){return Array.isArray(e)?e:e||e===0?[e]:[]}const nge=G({name:"ElTreeSelect",props:Te(Te({},bs.props),Ov.props),setup(e,t){const{slots:n,expose:r}=t,a=H(),o=H(),i=x(()=>e.valueKey||e.nodeKey||"value"),l=Qme(e,t,{select:a,tree:o,key:i}),s=tge(e,t,{select:a,tree:o,key:i}),c=bt({});return r(c),et(()=>{Object.assign(c,Te(Te({},Yi(o.value,["filter","updateKeyChildren","getCheckedNodes","setCheckedNodes","getCheckedKeys","setCheckedKeys","setChecked","getHalfCheckedNodes","getHalfCheckedKeys","getCurrentKey","getCurrentNode","setCurrentKey","setCurrentNode","getNode","remove","append","insertBefore","insertAfter"])),Yi(a.value,["focus","blur"])))}),()=>qe(bs,bt(Ke(Te({},l),{ref:d=>a.value=d})),Ke(Te({},n),{default:()=>qe(Ov,bt(Ke(Te({},s),{ref:d=>o.value=d})))}))}});var Np=Ae(nge,[["__file","/home/runner/work/element-plus/element-plus/packages/components/tree-select/src/tree-select.vue"]]);Np.install=e=>{e.component(Np.name,Np)};const rge=Np,age=rge,Pw=Symbol(),oge={key:-1,level:-1,data:{}};var rd=(e=>(e.KEY="id",e.LABEL="label",e.CHILDREN="children",e.DISABLED="disabled",e))(rd||{}),Y0=(e=>(e.ADD="add",e.DELETE="delete",e))(Y0||{});const ige=Ze({data:{type:Le(Array),default:()=>xn([])},emptyText:{type:String},height:{type:Number,default:200},props:{type:Le(Object),default:()=>xn({children:"children",label:"label",disabled:"disabled",value:"id"})},highlightCurrent:{type:Boolean,default:!1},showCheckbox:{type:Boolean,default:!1},defaultCheckedKeys:{type:Le(Array),default:()=>xn([])},checkStrictly:{type:Boolean,default:!1},defaultExpandedKeys:{type:Le(Array),default:()=>xn([])},indent:{type:Number,default:16},icon:{type:String},expandOnClickNode:{type:Boolean,default:!0},checkOnClickNode:{type:Boolean,default:!1},currentNodeKey:{type:Le([String,Number])},accordion:{type:Boolean,default:!1},filterMethod:{type:Le(Function)},perfMode:{type:Boolean,default:!0}}),lge=Ze({node:{type:Le(Object),default:()=>xn(oge)},expanded:{type:Boolean,default:!1},checked:{type:Boolean,default:!1},indeterminate:{type:Boolean,default:!1},showCheckbox:{type:Boolean,default:!1},disabled:{type:Boolean,default:!1},current:{type:Boolean,default:!1},hiddenExpandIcon:{type:Boolean,default:!1}}),sge=Ze({node:{type:Le(Object),required:!0}}),mI="node-click",gI="node-expand",yI="node-collapse",bI="current-change",CI="check",wI="check-change",SI="node-contextmenu",uge={[mI]:(e,t,n)=>e&&t&&n,[gI]:(e,t)=>e&&t,[yI]:(e,t)=>e&&t,[bI]:(e,t)=>e&&t,[CI]:(e,t)=>e&&t,[wI]:(e,t)=>e&&typeof t=="boolean",[SI]:(e,t,n)=>e&&t&&n},cge={click:(e,t)=>!!(e&&t),toggle:e=>!!e,check:(e,t)=>e&&typeof t=="boolean"};function dge(e,t){const n=H(new Set),r=H(new Set),{emit:a}=$t();ce(()=>t.value,()=>Ne(()=>{S(e.defaultCheckedKeys)}),{immediate:!0});const o=()=>{if(!t.value||!e.showCheckbox||e.checkStrictly)return;const{levelTreeNodeMap:w,maxLevel:k}=t.value,$=n.value,O=new Set;for(let T=k-1;T>=1;--T){const _=w.get(T);!_||_.forEach(I=>{const L=I.children;if(L){let j=!0,F=!1;for(const N of L){const D=N.key;if($.has(D))F=!0;else if(O.has(D)){j=!1,F=!0;break}else j=!1}j?$.add(I.key):F?(O.add(I.key),$.delete(I.key)):($.delete(I.key),O.delete(I.key))}})}r.value=O},i=w=>n.value.has(w.key),l=w=>r.value.has(w.key),s=(w,k,$=!0)=>{const O=n.value,T=(_,I)=>{O[I?Y0.ADD:Y0.DELETE](_.key);const L=_.children;!e.checkStrictly&&L&&L.forEach(j=>{j.disabled||T(j,I)})};T(w,k),o(),$&&c(w,k)},c=(w,k)=>{const{checkedNodes:$,checkedKeys:O}=m(),{halfCheckedNodes:T,halfCheckedKeys:_}=y();a(CI,w.data,{checkedKeys:O,checkedNodes:$,halfCheckedKeys:_,halfCheckedNodes:T}),a(wI,w.data,k)};function d(w=!1){return m(w).checkedKeys}function f(w=!1){return m(w).checkedNodes}function p(){return y().halfCheckedKeys}function v(){return y().halfCheckedNodes}function m(w=!1){const k=[],$=[];if((t==null?void 0:t.value)&&e.showCheckbox){const{treeNodeMap:O}=t.value;n.value.forEach(T=>{const _=O.get(T);_&&(!w||w&&_.isLeaf)&&($.push(T),k.push(_.data))})}return{checkedKeys:$,checkedNodes:k}}function y(){const w=[],k=[];if((t==null?void 0:t.value)&&e.showCheckbox){const{treeNodeMap:$}=t.value;r.value.forEach(O=>{const T=$.get(O);T&&(k.push(O),w.push(T.data))})}return{halfCheckedNodes:w,halfCheckedKeys:k}}function b(w){n.value.clear(),r.value.clear(),S(w)}function C(w,k){if((t==null?void 0:t.value)&&e.showCheckbox){const $=t.value.treeNodeMap.get(w);$&&s($,k,!1)}}function S(w){if(t!=null&&t.value){const{treeNodeMap:k}=t.value;if(e.showCheckbox&&k&&w)for(const $ of w){const O=k.get($);O&&!i(O)&&s(O,!0,!1)}}}return{updateCheckedKeys:o,toggleCheckbox:s,isChecked:i,isIndeterminate:l,getCheckedKeys:d,getCheckedNodes:f,getHalfCheckedKeys:p,getHalfCheckedNodes:v,setChecked:C,setCheckedKeys:b}}function fge(e,t){const n=H(new Set([])),r=H(new Set([])),a=x(()=>Ct(e.filterMethod));function o(l){var s;if(!a.value)return;const c=new Set,d=r.value,f=n.value,p=[],v=((s=t.value)==null?void 0:s.treeNodes)||[],m=e.filterMethod;f.clear();function y(b){b.forEach(C=>{p.push(C),m!=null&&m(l,C.data)?p.forEach(w=>{c.add(w.key)}):C.isLeaf&&f.add(C.key);const S=C.children;if(S&&y(S),!C.isLeaf){if(!c.has(C.key))f.add(C.key);else if(S){let w=!0;for(const k of S)if(!f.has(k.key)){w=!1;break}w?d.add(C.key):d.delete(C.key)}}p.pop()})}return y(v),c}function i(l){return r.value.has(l.key)}return{hiddenExpandIconKeySet:r,hiddenNodeKeySet:n,doFilter:o,isForceHiddenExpandIcon:i}}function hge(e,t){const n=H(new Set(e.defaultExpandedKeys)),r=H(),a=Qn();ce(()=>e.currentNodeKey,Q=>{r.value=Q},{immediate:!0}),ce(()=>e.data,Q=>{oe(Q)},{immediate:!0});const{isIndeterminate:o,isChecked:i,toggleCheckbox:l,getCheckedKeys:s,getCheckedNodes:c,getHalfCheckedKeys:d,getHalfCheckedNodes:f,setChecked:p,setCheckedKeys:v}=dge(e,a),{doFilter:m,hiddenNodeKeySet:y,isForceHiddenExpandIcon:b}=fge(e,a),C=x(()=>{var Q;return((Q=e.props)==null?void 0:Q.value)||rd.KEY}),S=x(()=>{var Q;return((Q=e.props)==null?void 0:Q.children)||rd.CHILDREN}),w=x(()=>{var Q;return((Q=e.props)==null?void 0:Q.disabled)||rd.DISABLED}),k=x(()=>{var Q;return((Q=e.props)==null?void 0:Q.label)||rd.LABEL}),$=x(()=>{const Q=n.value,ae=y.value,de=[],be=a.value&&a.value.treeNodes||[];function Ee(){const Pe=[];for(let Be=be.length-1;Be>=0;--Be)Pe.push(be[Be]);for(;Pe.length;){const Be=Pe.pop();if(!!Be&&(ae.has(Be.key)||de.push(Be),Q.has(Be.key))){const te=Be.children;if(te){const ie=te.length;for(let ge=ie-1;ge>=0;--ge)Pe.push(te[ge])}}}}return Ee(),de}),O=x(()=>$.value.length>0);function T(Q){const ae=new Map,de=new Map;let be=1;function Ee(Be,te=1,ie=void 0){var ge;const ke=[];for(const xe of Be){const Ie=L(xe),ye={level:te,key:Ie,data:xe};ye.label=F(xe),ye.parent=ie;const pe=I(xe);ye.disabled=j(xe),ye.isLeaf=!pe||pe.length===0,pe&&pe.length&&(ye.children=Ee(pe,te+1,ye)),ke.push(ye),ae.set(Ie,ye),de.has(te)||de.set(te,[]),(ge=de.get(te))==null||ge.push(ye)}return te>be&&(be=te),ke}const Pe=Ee(Q);return{treeNodeMap:ae,levelTreeNodeMap:de,maxLevel:be,treeNodes:Pe}}function _(Q){const ae=m(Q);ae&&(n.value=ae)}function I(Q){return Q[S.value]}function L(Q){return Q?Q[C.value]:""}function j(Q){return Q[w.value]}function F(Q){return Q[k.value]}function N(Q){n.value.has(Q.key)?E(Q):M(Q)}function D(Q,ae){t(mI,Q.data,Q,ae),z(Q),e.expandOnClickNode&&N(Q),e.showCheckbox&&e.checkOnClickNode&&!Q.disabled&&l(Q,!i(Q),!0)}function z(Q){Y(Q)||(r.value=Q.key,t(bI,Q.data,Q))}function B(Q,ae){l(Q,ae)}function M(Q){const ae=n.value;if((a==null?void 0:a.value)&&e.accordion){const{treeNodeMap:de}=a.value;ae.forEach(be=>{const Ee=de.get(be);Ee&&Ee.level===Ee.level&&ae.delete(be)})}ae.add(Q.key),t(gI,Q.data,Q)}function E(Q){n.value.delete(Q.key),t(yI,Q.data,Q)}function K(Q){return n.value.has(Q.key)}function W(Q){return!!Q.disabled}function Y(Q){const ae=r.value;return!!ae&&ae===Q.key}function q(){var Q,ae;if(!!r.value)return(ae=(Q=a==null?void 0:a.value)==null?void 0:Q.treeNodeMap.get(r.value))==null?void 0:ae.data}function J(){return r.value}function ne(Q){r.value=Q}function oe(Q){Ne(()=>a.value=T(Q))}return{tree:a,flattenTree:$,isNotEmpty:O,getKey:L,getChildren:I,toggleExpand:N,toggleCheckbox:l,isExpanded:K,isChecked:i,isIndeterminate:o,isDisabled:W,isCurrent:Y,isForceHiddenExpandIcon:b,handleNodeClick:D,handleNodeCheck:B,getCurrentNode:q,getCurrentKey:J,setCurrentKey:ne,getCheckedKeys:s,getCheckedNodes:c,getHalfCheckedKeys:d,getHalfCheckedNodes:f,setChecked:p,setCheckedKeys:v,filter:_,setData:oe}}var pge=G({name:"ElTreeNodeContent",props:sge,setup(e){const t=ve(Pw),n=De("tree");return()=>{const r=e.node,{data:a}=r;return t!=null&&t.ctx.slots.default?t.ctx.slots.default({node:r,data:a}):qe("span",{class:n.be("node","label")},[r==null?void 0:r.label])}}});const vge="caret-right",mge=G({name:"ElTreeNode",components:{ElIcon:ft,CaretRight:sE,ElCheckbox:io,ElNodeContent:pge},props:lge,emits:cge,setup(e,{emit:t}){const n=ve(Pw),r=De("tree"),a=x(()=>{var d;return(d=n==null?void 0:n.props.indent)!=null?d:16}),o=x(()=>{var d;return(d=n==null?void 0:n.props.icon)!=null?d:vge});return{ns:r,indent:a,icon:o,handleClick:d=>{t("click",e.node,d)},handleExpandIconClick:()=>{t("toggle",e.node)},handleCheckChange:d=>{t("check",e.node,d)},handleContextMenu:d=>{var f,p,v,m;(v=(p=(f=n==null?void 0:n.instance)==null?void 0:f.vnode)==null?void 0:p.props)!=null&&v.onNodeContextmenu&&(d.stopPropagation(),d.preventDefault()),n==null||n.ctx.emit(SI,d,(m=e.node)==null?void 0:m.data,e.node)}}}}),gge=["aria-expanded","aria-disabled","aria-checked","data-key"];function yge(e,t,n,r,a,o){var i,l,s;const c=we("el-icon"),d=we("el-checkbox"),f=we("el-node-content");return R(),X("div",{ref:"node$",class:U([e.ns.b("node"),e.ns.is("expanded",e.expanded),e.ns.is("current",e.current),e.ns.is("focusable",!e.disabled),e.ns.is("checked",!e.disabled&&e.checked)]),role:"treeitem",tabindex:"-1","aria-expanded":e.expanded,"aria-disabled":e.disabled,"aria-checked":e.checked,"data-key":(i=e.node)==null?void 0:i.key,onClick:t[1]||(t[1]=dt((...p)=>e.handleClick&&e.handleClick(...p),["stop"])),onContextmenu:t[2]||(t[2]=(...p)=>e.handleContextMenu&&e.handleContextMenu(...p))},[Z("div",{class:U(e.ns.be("node","content")),style:Xe({paddingLeft:`${(e.node.level-1)*e.indent}px`})},[e.icon?(R(),fe(c,{key:0,class:U([e.ns.is("leaf",!!((l=e.node)!=null&&l.isLeaf)),e.ns.is("hidden",e.hiddenExpandIcon),{expanded:!((s=e.node)!=null&&s.isLeaf)&&e.expanded},e.ns.be("node","expand-icon")]),onClick:dt(e.handleExpandIconClick,["stop"])},{default:re(()=>[(R(),fe(Kt(e.icon)))]),_:1},8,["class","onClick"])):se("v-if",!0),e.showCheckbox?(R(),fe(d,{key:1,"model-value":e.checked,indeterminate:e.indeterminate,disabled:e.disabled,onChange:e.handleCheckChange,onClick:t[0]||(t[0]=dt(()=>{},["stop"]))},null,8,["model-value","indeterminate","disabled","onChange"])):se("v-if",!0),g(f,{node:e.node},null,8,["node"])],6)],42,gge)}var bge=Ae(mge,[["render",yge],["__file","/home/runner/work/element-plus/element-plus/packages/components/tree-v2/src/tree-node.vue"]]);const Cge=G({name:"ElTreeV2",components:{ElTreeNode:bge,FixedSizeList:q5},props:ige,emits:uge,setup(e,t){ot(Pw,{ctx:t,props:e,instance:$t()});const{t:n}=ln(),r=De("tree"),{flattenTree:a,isNotEmpty:o,toggleExpand:i,isExpanded:l,isIndeterminate:s,isChecked:c,isDisabled:d,isCurrent:f,isForceHiddenExpandIcon:p,toggleCheckbox:v,handleNodeClick:m,handleNodeCheck:y,getCurrentNode:b,getCurrentKey:C,setCurrentKey:S,getCheckedKeys:w,getCheckedNodes:k,getHalfCheckedKeys:$,getHalfCheckedNodes:O,setChecked:T,setCheckedKeys:_,filter:I,setData:L}=hge(e,t.emit);return t.expose({getCurrentNode:b,getCurrentKey:C,setCurrentKey:S,getCheckedKeys:w,getCheckedNodes:k,getHalfCheckedKeys:$,getHalfCheckedNodes:O,setChecked:T,setCheckedKeys:_,filter:I,setData:L}),{t:n,ns:r,flattenTree:a,itemSize:26,isNotEmpty:o,toggleExpand:i,toggleCheckbox:v,isExpanded:l,isIndeterminate:s,isChecked:c,isDisabled:d,isCurrent:f,isForceHiddenExpandIcon:p,handleNodeClick:m,handleNodeCheck:y}}});function wge(e,t,n,r,a,o){var i;const l=we("el-tree-node"),s=we("fixed-size-list");return R(),X("div",{class:U([e.ns.b(),{[e.ns.m("highlight-current")]:e.highlightCurrent}]),role:"tree"},[e.isNotEmpty?(R(),fe(s,{key:0,"class-name":e.ns.b("virtual-list"),data:e.flattenTree,total:e.flattenTree.length,height:e.height,"item-size":e.itemSize,"perf-mode":e.perfMode},{default:re(({data:c,index:d,style:f})=>[(R(),fe(l,{key:c[d].key,style:Xe(f),node:c[d],expanded:e.isExpanded(c[d]),"show-checkbox":e.showCheckbox,checked:e.isChecked(c[d]),indeterminate:e.isIndeterminate(c[d]),disabled:e.isDisabled(c[d]),current:e.isCurrent(c[d]),"hidden-expand-icon":e.isForceHiddenExpandIcon(c[d]),onClick:e.handleNodeClick,onToggle:e.toggleExpand,onCheck:e.handleNodeCheck},null,8,["style","node","expanded","show-checkbox","checked","indeterminate","disabled","current","hidden-expand-icon","onClick","onToggle","onCheck"]))]),_:1},8,["class-name","data","total","height","item-size","perf-mode"])):(R(),X("div",{key:1,class:U(e.ns.e("empty-block"))},[Z("span",{class:U(e.ns.e("empty-text"))},Me((i=e.emptyText)!=null?i:e.t("el.tree.emptyText")),3)],2))],2)}var Sge=Ae(Cge,[["render",wge],["__file","/home/runner/work/element-plus/element-plus/packages/components/tree-v2/src/tree.vue"]]);const kge=xt(Sge),$ge="ElUpload";class Oge extends Error{constructor(t,n,r,a){super(t),this.name="UploadAjaxError",this.status=n,this.method=r,this.url=a}}function _O(e,t,n){let r;return n.response?r=`${n.response.error||n.response}`:n.responseText?r=`${n.responseText}`:r=`fail to ${t.method} ${e} ${n.status}`,new Oge(r,n.status,t.method,e)}function Pge(e){const t=e.responseText||e.response;if(!t)return t;try{return JSON.parse(t)}catch{return t}}const Tge=e=>{typeof XMLHttpRequest=="undefined"&&qn($ge,"XMLHttpRequest is undefined");const t=new XMLHttpRequest,n=e.action;t.upload&&t.upload.addEventListener("progress",o=>{const i=o;i.percent=o.total>0?o.loaded/o.total*100:0,e.onProgress(i)});const r=new FormData;if(e.data)for(const[o,i]of Object.entries(e.data))Array.isArray(i)?r.append(o,...i):r.append(o,i);r.append(e.filename,e.file,e.file.name),t.addEventListener("error",()=>{e.onError(_O(n,e,t))}),t.addEventListener("load",()=>{if(t.status<200||t.status>=300)return e.onError(_O(n,e,t));e.onSuccess(Pge(t))}),t.open(e.method,n,!0),e.withCredentials&&"withCredentials"in t&&(t.withCredentials=!0);const a=e.headers||{};if(a instanceof Headers)a.forEach((o,i)=>t.setRequestHeader(i,o));else for(const[o,i]of Object.entries(a))ul(i)||t.setRequestHeader(o,String(i));return t.send(r),t},kI=["text","picture","picture-card"];let xge=1;const $I=()=>Date.now()+xge++,OI=Ze({action:{type:String,required:!0},headers:{type:Le(Object)},method:{type:String,default:"post"},data:{type:Object,default:()=>xn({})},multiple:{type:Boolean,default:!1},name:{type:String,default:"file"},drag:{type:Boolean,default:!1},withCredentials:Boolean,showFileList:{type:Boolean,default:!0},accept:{type:String,default:""},type:{type:String,default:"select"},fileList:{type:Le(Array),default:()=>xn([])},autoUpload:{type:Boolean,default:!0},listType:{type:String,values:kI,default:"text"},httpRequest:{type:Le(Function),default:Tge},disabled:Boolean,limit:Number}),_ge=Ze(Ke(Te({},OI),{beforeUpload:{type:Le(Function),default:Qt},beforeRemove:{type:Le(Function)},onRemove:{type:Le(Function),default:Qt},onChange:{type:Le(Function),default:Qt},onPreview:{type:Le(Function),default:Qt},onSuccess:{type:Le(Function),default:Qt},onProgress:{type:Le(Function),default:Qt},onError:{type:Le(Function),default:Qt},onExceed:{type:Le(Function),default:Qt}})),Ege=Ze({files:{type:Le(Array),default:()=>xn([])},disabled:{type:Boolean,default:!1},handlePreview:{type:Le(Function),default:Qt},listType:{type:String,values:kI,default:"text"}}),Mge={remove:e=>!!e},Ige=["onKeydown"],Nge=["src"],Age=["onClick"],Dge=["onClick"],Rge=["onClick"],Lge={name:"ElUploadList"},Fge=G(Ke(Te({},Lge),{props:Ege,emits:Mge,setup(e,{emit:t}){const n=e,{t:r}=ln(),a=De("upload"),o=De("icon"),i=De("list"),l=H(!1),s=f=>{n.handlePreview(f)},c=f=>{f.target.focus()},d=f=>{t("remove",f)};return(f,p)=>(R(),fe(e8,{tag:"ul",class:U([A(a).b("list"),A(a).bm("list",f.listType),A(a).is("disabled",f.disabled)]),name:A(i).b()},{default:re(()=>[(R(!0),X(Fe,null,Rt(f.files,v=>(R(),X("li",{key:v.uid||v.name,class:U([A(a).be("list","item"),A(a).is(v.status),{focusing:l.value}]),tabindex:"0",onKeydown:It(m=>!f.disabled&&d(v),["delete"]),onFocus:p[0]||(p[0]=m=>l.value=!0),onBlur:p[1]||(p[1]=m=>l.value=!1),onClick:c},[Oe(f.$slots,"default",{file:v},()=>[f.listType==="picture"||v.status!=="uploading"&&f.listType==="picture-card"?(R(),X("img",{key:0,class:U(A(a).be("list","item-thumbnail")),src:v.url,alt:""},null,10,Nge)):se("v-if",!0),f.listType!=="picture"&&(v.status==="uploading"||f.listType!=="picture-card")?(R(),X("div",{key:1,class:U(A(a).be("list","item-info"))},[Z("a",{class:U(A(a).be("list","item-name")),onClick:m=>s(v)},[g(A(ft),{class:U(A(o).m("document"))},{default:re(()=>[g(A(Mq))]),_:1},8,["class"]),yt(" "+Me(v.name),1)],10,Age),v.status==="uploading"?(R(),fe(A(L5),{key:0,type:f.listType==="picture-card"?"circle":"line","stroke-width":f.listType==="picture-card"?6:2,percentage:Number(v.percentage),style:Xe(f.listType==="picture-card"?"":"margin-top: 0.5rem")},null,8,["type","stroke-width","percentage","style"])):se("v-if",!0)],2)):se("v-if",!0),Z("label",{class:U(A(a).be("list","item-status-label"))},[f.listType==="text"?(R(),fe(A(ft),{key:0,class:U([A(o).m("upload-success"),A(o).m("circle-check")])},{default:re(()=>[g(A(mv))]),_:1},8,["class"])):["picture-card","picture"].includes(f.listType)?(R(),fe(A(ft),{key:1,class:U([A(o).m("upload-success"),A(o).m("check")])},{default:re(()=>[g(A(Wu))]),_:1},8,["class"])):se("v-if",!0)],2),f.disabled?se("v-if",!0):(R(),fe(A(ft),{key:2,class:U(A(o).m("close")),onClick:m=>d(v)},{default:re(()=>[g(A(Ma))]),_:2},1032,["class","onClick"])),se(" Due to close btn only appears when li gets focused disappears after li gets blurred, thus keyboard navigation can never reach close btn"),se(" This is a bug which needs to be fixed "),se(" TODO: Fix the incorrect navigation interaction "),f.disabled?se("v-if",!0):(R(),X("i",{key:3,class:U(A(o).m("close-tip"))},Me(A(r)("el.upload.deleteTip")),3)),f.listType==="picture-card"?(R(),X("span",{key:4,class:U(A(a).be("list","item-actions"))},[Z("span",{class:U(A(a).be("list","item-preview")),onClick:m=>f.handlePreview(v)},[g(A(ft),{class:U(A(o).m("zoom-in"))},{default:re(()=>[g(A(fE))]),_:1},8,["class"])],10,Dge),f.disabled?se("v-if",!0):(R(),X("span",{key:0,class:U(A(a).be("list","item-delete")),onClick:m=>d(v)},[g(A(ft),{class:U(A(o).m("delete"))},{default:re(()=>[g(A(Oq))]),_:1},8,["class"])],10,Rge))],2)):se("v-if",!0)])],42,Ige))),128)),Oe(f.$slots,"append")]),_:3},8,["class","name"]))}}));var EO=Ae(Fge,[["__file","/home/runner/work/element-plus/element-plus/packages/components/upload/src/upload-list.vue"]]);const Bge=Ze({disabled:{type:Boolean,default:!1}}),Vge={file:e=>pt(e)},zge=["onDrop","onDragover"],Hge={name:"ElUploadDrag"},jge=G(Ke(Te({},Hge),{props:Bge,emits:Vge,setup(e,{emit:t}){const n=e,r="ElUploadDrag",a=ve(EE);a||qn(r,"usage: <el-upload><el-upload-dragger /></el-upload>");const o=De("upload"),i=H(!1),l=c=>{if(n.disabled)return;i.value=!1;const d=Array.from(c.dataTransfer.files),f=a.accept.value;if(!f){t("file",d);return}const p=d.filter(v=>{const{type:m,name:y}=v,b=y.includes(".")?`.${y.split(".").pop()}`:"",C=m.replace(/\/.*$/,"");return f.split(",").map(S=>S.trim()).filter(S=>S).some(S=>S.startsWith(".")?b===S:/\/\*$/.test(S)?C===S.replace(/\/\*$/,""):/^[^/]+\/[^/]+$/.test(S)?m===S:!1)});t("file",p)},s=()=>{n.disabled||(i.value=!0)};return(c,d)=>(R(),X("div",{class:U([A(o).b("dragger"),A(o).is("dragover",i.value)]),onDrop:dt(l,["prevent"]),onDragover:dt(s,["prevent"]),onDragleave:d[0]||(d[0]=dt(f=>i.value=!1,["prevent"]))},[Oe(c.$slots,"default")],42,zge))}}));var Kge=Ae(jge,[["__file","/home/runner/work/element-plus/element-plus/packages/components/upload/src/upload-dragger.vue"]]);const Wge=Ze(Ke(Te({},OI),{fileList:{type:Le(Array),default:()=>xn([])},beforeUpload:{type:Le(Function),default:Qt},onRemove:{type:Le(Function),default:Qt},onStart:{type:Le(Function),default:Qt},onSuccess:{type:Le(Function),default:Qt},onProgress:{type:Le(Function),default:Qt},onError:{type:Le(Function),default:Qt},onExceed:{type:Le(Function),default:Qt}})),Uge=["onKeydown"],Yge=["name","multiple","accept"],qge={name:"ElUploadContent",inheritAttrs:!1},Gge=G(Ke(Te({},qge),{props:Wge,setup(e,{expose:t}){const n=e,r=De("upload"),a=Qn({}),o=Qn(),i=v=>{if(v.length===0)return;const{autoUpload:m,limit:y,fileList:b,multiple:C,onStart:S,onExceed:w}=n;if(y&&b.length+v.length>y){w(v,b);return}C||(v=v.slice(0,1));for(const k of v){const $=k;$.uid=$I(),S($),m&&l($)}},l=async v=>{if(o.value.value="",!n.beforeUpload)return s(v);let m;try{m=await n.beforeUpload(v)}catch{m=!1}if(m===!1){n.onRemove(v);return}let y=v;if(m instanceof Blob){m instanceof File?y=m:y=new File([m],v.name,{type:v.type});for(const b of Object.keys(v))y[b]=v[b]}s(v)},s=v=>{const{headers:m,data:y,method:b,withCredentials:C,name:S,action:w,onProgress:k,onSuccess:$,onError:O,httpRequest:T}=n,{uid:_}=v,I={headers:m||{},withCredentials:C,file:v,data:y,method:b,filename:S,action:w,onProgress:j=>{k(j,v)},onSuccess:j=>{$(j,v),delete a.value[_]},onError:j=>{O(j,v),delete a.value[_]}},L=T(I);a.value[_]=L,L instanceof Promise&&L.then(I.onSuccess,I.onError)},c=v=>{const m=v.target.files;!m||i(Array.from(m))},d=()=>{n.disabled||(o.value.value="",o.value.click())},f=()=>{d()};return t({abort:v=>{MU(a.value).filter(v?([y])=>String(v.uid)===y:()=>!0).forEach(([y,b])=>{b instanceof XMLHttpRequest&&b.abort(),delete a.value[y]})},upload:l}),(v,m)=>(R(),X("div",{class:U([A(r).b(),A(r).m(v.listType)]),tabindex:"0",onClick:d,onKeydown:It(dt(f,["self"]),["enter","space"])},[v.drag?(R(),fe(Kge,{key:0,disabled:v.disabled,onFile:i},{default:re(()=>[Oe(v.$slots,"default")]),_:3},8,["disabled"])):Oe(v.$slots,"default",{key:1}),Z("input",{ref_key:"inputRef",ref:o,class:U(A(r).e("input")),name:v.name,multiple:v.multiple,accept:v.accept,type:"file",onChange:c},null,42,Yge)],42,Uge))}}));var MO=Ae(Gge,[["__file","/home/runner/work/element-plus/element-plus/packages/components/upload/src/upload-content.vue"]]);const Ty="ElUpload",Xge=e=>{var t;(t=e.url)!=null&&t.startsWith("blob:")&&URL.revokeObjectURL(e.url)},Zge=(e,t)=>{const n=H([]),r=p=>n.value.find(v=>v.uid===p.uid);function a(p){var v;(v=t.value)==null||v.abort(p)}function o(p=["ready","uploading","success","fail"]){n.value=n.value.filter(v=>!p.includes(v.status))}const i=(p,v)=>{const m=r(v);!m||(m.status="fail",n.value.splice(n.value.indexOf(m),1),e.onError(p,m,n.value),e.onChange(m,n.value))},l=(p,v)=>{const m=r(v);!m||(e.onProgress(p,m,n.value),m.status="uploading",m.percentage=Math.round(p.percent))},s=(p,v)=>{const m=r(v);!m||(m.status="success",m.response=p,e.onSuccess(p,m,n.value),e.onChange(m,n.value))},c=p=>{const v={name:p.name,percentage:0,status:"ready",size:p.size,raw:p,uid:p.uid};if(e.listType==="picture-card"||e.listType==="picture")try{v.url=URL.createObjectURL(p)}catch(m){m.message,e.onError(m,v,n.value)}n.value.push(v),e.onChange(v,n.value)},d=async(p,v)=>{v&&Tf({scope:Ty,from:"handleRemove second argument",version:"2.2",replacement:"first argument `file`",ref:"https://element-plus.org/en-US/component/upload.html#methods"},!0);const m=v||p,y=m instanceof File?r(m):m;y||qn(Ty,"file to be removed not found");const b=C=>{a(C);const S=n.value;S.splice(S.indexOf(C),1),e.onRemove(C,S),Xge(C)};e.beforeRemove?await e.beforeRemove(y,n.value)!==!1&&b(y):b(y)};function f(){n.value.filter(({status:p})=>p==="ready").forEach(({raw:p})=>{var v;return p&&((v=t.value)==null?void 0:v.upload(p))})}return ce(()=>e.listType,p=>{p!=="picture-card"&&p!=="picture"||(n.value=n.value.map(v=>{const{raw:m,url:y}=v;if(!y&&m)try{v.url=URL.createObjectURL(m)}catch(b){e.onError(b,v,n.value)}return v}))}),ce(()=>e.fileList,p=>{for(const v of p)v.uid||(v.uid=$I()),v.status||(v.status="success");n.value=p},{immediate:!0,deep:!0}),{abort:a,clearFiles:o,handleError:i,handleProgress:l,handleStart:c,handleSuccess:s,handleRemove:d,submit:f,uploadFiles:n}},Jge={name:"ElUpload"},Qge=G(Ke(Te({},Jge),{props:_ge,setup(e,{expose:t}){const n=e,r=wf(),a=Ms(),o=Qn(),{abort:i,submit:l,clearFiles:s,uploadFiles:c,handleStart:d,handleError:f,handleRemove:p,handleSuccess:v,handleProgress:m}=Zge(n,o),y=x(()=>n.listType==="picture-card"),b=x(()=>Ke(Te({},n),{onStart:d,onProgress:m,onSuccess:v,onError:f,onRemove:p}));return Lt(()=>{c.value.forEach(({url:C})=>{C!=null&&C.startsWith("blob:")&&URL.revokeObjectURL(C)})}),ot(EE,{accept:yn(n,"accept")}),t({abort:i,submit:l,clearFiles:s,handleStart:d,handleRemove:p}),(C,S)=>(R(),X("div",null,[A(y)&&C.showFileList?(R(),fe(EO,{key:0,disabled:A(a),"list-type":C.listType,files:A(c),"handle-preview":C.onPreview,onRemove:A(p)},sl({append:re(()=>[C.listType==="picture-card"?(R(),fe(MO,hn({key:0,ref_key:"uploadRef",ref:o},A(b)),{default:re(()=>[A(r).trigger?Oe(C.$slots,"trigger",{key:0}):se("v-if",!0),!A(r).trigger&&A(r).default?Oe(C.$slots,"default",{key:1}):se("v-if",!0)]),_:3},16)):se("v-if",!0)]),_:2},[C.$slots.file?{name:"default",fn:re(({file:w})=>[Oe(C.$slots,"file",{file:w})])}:void 0]),1032,["disabled","list-type","files","handle-preview","onRemove"])):se("v-if",!0),C.listType!=="picture-card"?(R(),fe(MO,hn({key:1,ref_key:"uploadRef",ref:o},A(b)),{default:re(()=>[A(r).trigger?Oe(C.$slots,"trigger",{key:0}):se("v-if",!0),!A(r).trigger&&A(r).default?Oe(C.$slots,"default",{key:1}):se("v-if",!0)]),_:3},16)):se("v-if",!0),C.$slots.trigger?Oe(C.$slots,"default",{key:2}):se("v-if",!0),Oe(C.$slots,"tip"),!A(y)&&C.showFileList?(R(),fe(EO,{key:3,disabled:A(a),"list-type":C.listType,files:A(c),"handle-preview":C.onPreview,onRemove:A(p)},sl({_:2},[C.$slots.file?{name:"default",fn:re(({file:w})=>[Oe(C.$slots,"file",{file:w})])}:void 0]),1032,["disabled","list-type","files","handle-preview","onRemove"])):se("v-if",!0)]))}}));var eye=Ae(Qge,[["__file","/home/runner/work/element-plus/element-plus/packages/components/upload/src/upload.vue"]]);const tye=xt(eye);var nye=[qZ,oJ,xee,Dee,Hee,dM,tte,nte,xa,mM,une,pne,Pne,Tne,jre,xre,Gre,io,Yne,zM,Jre,hae,pae,iae,zae,Kae,noe,roe,aoe,ooe,ioe,Fie,Wie,Uie,ale,ule,ble,gse,yse,bse,rue,Yue,que,ft,sce,x5,Ra,_5,Cce,zce,Hce,jce,Gce,qde,Qde,ife,uM,L5,YM,cre,ure,$fe,_fe,Ife,xi,bs,Cv,$de,$he,Ihe,Nhe,Zhe,ape,cpe,dpe,kpe,xve,_ve,jve,Kve,Gm,qte,Xve,tme,nme,Ur,$me,Fme,Jme,age,kge,tye];const Ga="ElInfiniteScroll",rye=50,aye=200,oye=0,iye={delay:{type:Number,default:aye},distance:{type:Number,default:oye},disabled:{type:Boolean,default:!1},immediate:{type:Boolean,default:!0}},Tw=(e,t)=>Object.entries(iye).reduce((n,[r,a])=>{var o,i;const{type:l,default:s}=a,c=e.getAttribute(`infinite-scroll-${r}`);let d=(i=(o=t[c])!=null?o:c)!=null?i:s;return d=d==="false"?!1:d,d=l(d),n[r]=Number.isNaN(d)?s:d,n},{}),PI=e=>{const{observer:t}=e[Ga];t&&(t.disconnect(),delete e[Ga].observer)},lye=(e,t)=>{const{container:n,containerEl:r,instance:a,observer:o,lastScrollTop:i}=e[Ga],{disabled:l,distance:s}=Tw(e,a),{clientHeight:c,scrollHeight:d,scrollTop:f}=r,p=f-i;if(e[Ga].lastScrollTop=f,o||l||p<0)return;let v=!1;if(n===e)v=d-(c+f)<=s;else{const{clientTop:m,scrollHeight:y}=e,b=TU(e,r);v=f+c>=b+m+y-s}v&&t.call(a)};function xy(e,t){const{containerEl:n,instance:r}=e[Ga],{disabled:a}=Tw(e,r);a||n.clientHeight===0||(n.scrollHeight<=n.clientHeight?t.call(r):PI(e))}const sye={async mounted(e,t){const{instance:n,value:r}=t;Ct(r)||qn(Ga,"'v-infinite-scroll' binding value must be a function"),await Ne();const{delay:a,immediate:o}=Tw(e,n),i=DC(e,!0),l=i===window?document.documentElement:i,s=Qi(lye.bind(null,e,r),a);if(!!i){if(e[Ga]={instance:n,container:i,containerEl:l,delay:a,cb:r,onScroll:s,lastScrollTop:l.scrollTop},o){const c=new MutationObserver(Qi(xy.bind(null,e,r),rye));e[Ga].observer=c,c.observe(e,{childList:!0,subtree:!0}),xy(e,r)}i.addEventListener("scroll",s)}},unmounted(e){const{container:t,onScroll:n}=e[Ga];t==null||t.removeEventListener("scroll",n),PI(e)},async updated(e){e[Ga]||await Ne();const{containerEl:t,cb:n,observer:r}=e[Ga];t.clientHeight&&r&&xy(e,n)}},q0=sye;q0.install=e=>{e.directive("InfiniteScroll",q0)};const uye=q0;function cye(e){let t;const n=H(!1),r=bt(Ke(Te({},e),{originalPosition:"",originalOverflow:"",visible:!1}));function a(f){r.text=f}function o(){const f=r.parent;if(!f.vLoadingAddClassList){let p=f.getAttribute("loading-number");p=Number.parseInt(p)-1,p?f.setAttribute("loading-number",p.toString()):(Br(f,"el-loading-parent--relative"),f.removeAttribute("loading-number")),Br(f,"el-loading-parent--hidden")}i()}function i(){var f,p;(p=(f=d.$el)==null?void 0:f.parentNode)==null||p.removeChild(d.$el)}function l(){var f;if(e.beforeClose&&!e.beforeClose())return;const p=r.parent;p.vLoadingAddClassList=void 0,n.value=!0,clearTimeout(t),t=window.setTimeout(()=>{n.value&&(n.value=!1,o())},400),r.visible=!1,(f=e.closed)==null||f.call(e)}function s(){!n.value||(n.value=!1,o())}const d=$m({name:"ElLoading",setup(){return()=>{const f=r.spinner||r.svg,p=qe("svg",Te({class:"circular",viewBox:r.svgViewBox?r.svgViewBox:"25 25 50 50"},f?{innerHTML:f}:{}),[qe("circle",{class:"path",cx:"50",cy:"50",r:"20",fill:"none"})]),v=r.text?qe("p",{class:"el-loading-text"},[r.text]):void 0;return qe(Vn,{name:"el-loading-fade",onAfterLeave:s},{default:re(()=>[at(g("div",{style:{backgroundColor:r.background||""},class:["el-loading-mask",r.customClass,r.fullscreen?"is-fullscreen":""]},[qe("div",{class:"el-loading-spinner"},[p,v])]),[[_t,r.visible]])])})}}}).mount(document.createElement("div"));return Ke(Te({},or(r)),{setText:a,remvoeElLoadingChild:i,close:l,handleAfterLeave:s,vm:d,get $el(){return d.$el}})}let zc;const G0=function(e={}){if(!Bt)return;const t=dye(e);t.fullscreen&&zc&&(zc.remvoeElLoadingChild(),zc.close());const n=cye(Ke(Te({},t),{closed:()=>{var a;(a=t.closed)==null||a.call(t),t.fullscreen&&(zc=void 0)}}));fye(t,t.parent,n),IO(t,t.parent,n),t.parent.vLoadingAddClassList=()=>IO(t,t.parent,n);let r=t.parent.getAttribute("loading-number");return r?r=`${Number.parseInt(r)+1}`:r="1",t.parent.setAttribute("loading-number",r),t.parent.appendChild(n.$el),Ne(()=>n.visible.value=t.visible),t.fullscreen&&(zc=n),n},dye=e=>{var t,n,r,a;let o;return wt(e.target)?o=(t=document.querySelector(e.target))!=null?t:document.body:o=e.target||document.body,{parent:o===document.body||e.body?document.body:o,background:e.background||"",svg:e.svg||"",svgViewBox:e.svgViewBox||"",spinner:e.spinner||!1,text:e.text||"",fullscreen:o===document.body&&((n=e.fullscreen)!=null?n:!0),lock:(r=e.lock)!=null?r:!1,customClass:e.customClass||"",visible:(a=e.visible)!=null?a:!0,target:o}},fye=async(e,t,n)=>{const{nextZIndex:r}=Pi(),a={};if(e.fullscreen)n.originalPosition.value=$o(document.body,"position"),n.originalOverflow.value=$o(document.body,"overflow"),a.zIndex=r();else if(e.parent===document.body){n.originalPosition.value=$o(document.body,"position"),await Ne();for(const o of["top","left"]){const i=o==="top"?"scrollTop":"scrollLeft";a[o]=`${e.target.getBoundingClientRect()[o]+document.body[i]+document.documentElement[i]-Number.parseInt($o(document.body,`margin-${o}`),10)}px`}for(const o of["height","width"])a[o]=`${e.target.getBoundingClientRect()[o]}px`}else n.originalPosition.value=$o(t,"position");for(const[o,i]of Object.entries(a))n.$el.style[o]=i},IO=(e,t,n)=>{n.originalPosition.value!=="absolute"&&n.originalPosition.value!=="fixed"?xo(t,"el-loading-parent--relative"):Br(t,"el-loading-parent--relative"),e.fullscreen&&e.lock?xo(t,"el-loading-parent--hidden"):Br(t,"el-loading-parent--hidden")},X0=Symbol("ElLoading"),NO=(e,t)=>{var n,r,a,o;const i=t.instance,l=p=>zt(t.value)?t.value[p]:void 0,s=p=>{const v=wt(p)&&(i==null?void 0:i[p])||p;return v&&H(v)},c=p=>s(l(p)||e.getAttribute(`element-loading-${vl(p)}`)),d=(n=l("fullscreen"))!=null?n:t.modifiers.fullscreen,f={text:c("text"),svg:c("svg"),svgViewBox:c("svgViewBox"),spinner:c("spinner"),background:c("background"),customClass:c("customClass"),fullscreen:d,target:(r=l("target"))!=null?r:d?void 0:e,body:(a=l("body"))!=null?a:t.modifiers.body,lock:(o=l("lock"))!=null?o:t.modifiers.lock};e[X0]={options:f,instance:G0(f)}},hye=(e,t)=>{for(const n of Object.keys(t))_n(t[n])&&(t[n].value=e[n])},AO={mounted(e,t){t.value&&NO(e,t)},updated(e,t){const n=e[X0];t.oldValue!==t.value&&(t.value&&!t.oldValue?NO(e,t):t.value&&t.oldValue?zt(t.value)&&hye(t.value,n.options):n==null||n.instance.close())},unmounted(e){var t;(t=e[X0])==null||t.instance.close()}},pye={install(e){e.directive("loading",AO),e.config.globalProperties.$loading=G0},directive:AO,service:G0},TI=["success","info","warning","error"],vye=Ze({customClass:{type:String,default:""},center:{type:Boolean,default:!1},dangerouslyUseHTMLString:{type:Boolean,default:!1},duration:{type:Number,default:3e3},icon:{type:wr,default:""},id:{type:String,default:""},message:{type:Le([String,Object,Function]),default:""},onClose:{type:Le(Function),required:!1},showClose:{type:Boolean,default:!1},type:{type:String,values:TI,default:"info"},offset:{type:Number,default:20},zIndex:{type:Number,default:0},grouping:{type:Boolean,default:!1},repeatNum:{type:Number,default:1}}),mye={destroy:()=>!0},gye=G({name:"ElMessage",components:Te({ElBadge:dM,ElIcon:ft},Vm),props:vye,emits:mye,setup(e){const t=De("message"),n=H(!1),r=H(e.type?e.type==="error"?"danger":e.type:"info");let a;const o=x(()=>{const p=e.type;return{[t.bm("icon",p)]:p&&yi[p]}}),i=x(()=>e.icon||yi[e.type]||""),l=x(()=>({top:`${e.offset}px`,zIndex:e.zIndex}));function s(){e.duration>0&&({stop:a}=gs(()=>{n.value&&d()},e.duration))}function c(){a==null||a()}function d(){n.value=!1}function f({code:p}){p===Ge.esc?n.value&&d():s()}return et(()=>{s(),n.value=!0}),ce(()=>e.repeatNum,()=>{c(),s()}),Hn(document,"keydown",f),{ns:t,typeClass:o,iconComponent:i,customStyle:l,visible:n,badgeType:r,close:d,clearTimer:c,startTimer:s}}}),yye=["id"],bye=["innerHTML"];function Cye(e,t,n,r,a,o){const i=we("el-badge"),l=we("el-icon"),s=we("close");return R(),fe(Vn,{name:e.ns.b("fade"),onBeforeLeave:e.onClose,onAfterLeave:t[2]||(t[2]=c=>e.$emit("destroy"))},{default:re(()=>[at(Z("div",{id:e.id,class:U([e.ns.b(),{[e.ns.m(e.type)]:e.type&&!e.icon},e.ns.is("center",e.center),e.ns.is("closable",e.showClose),e.customClass]),style:Xe(e.customStyle),role:"alert",onMouseenter:t[0]||(t[0]=(...c)=>e.clearTimer&&e.clearTimer(...c)),onMouseleave:t[1]||(t[1]=(...c)=>e.startTimer&&e.startTimer(...c))},[e.repeatNum>1?(R(),fe(i,{key:0,value:e.repeatNum,type:e.badgeType,class:U(e.ns.e("badge"))},null,8,["value","type","class"])):se("v-if",!0),e.iconComponent?(R(),fe(l,{key:1,class:U([e.ns.e("icon"),e.typeClass])},{default:re(()=>[(R(),fe(Kt(e.iconComponent)))]),_:1},8,["class"])):se("v-if",!0),Oe(e.$slots,"default",{},()=>[e.dangerouslyUseHTMLString?(R(),X(Fe,{key:1},[se(" Caution here, message could've been compromised, never use user's input as message "),Z("p",{class:U(e.ns.e("content")),innerHTML:e.message},null,10,bye)],2112)):(R(),X("p",{key:0,class:U(e.ns.e("content"))},Me(e.message),3))]),e.showClose?(R(),fe(l,{key:2,class:U(e.ns.e("closeBtn")),onClick:dt(e.close,["stop"])},{default:re(()=>[g(s)]),_:1},8,["class","onClick"])):se("v-if",!0)],46,yye),[[_t,e.visible]])]),_:3},8,["name","onBeforeLeave"])}var wye=Ae(gye,[["render",Cye],["__file","/home/runner/work/element-plus/element-plus/packages/components/message/src/message.vue"]]);const ra=[];let Sye=1;const tc=function(e={},t){if(!Bt)return{close:()=>{}};if(Yt(I0.max)&&ra.length>=I0.max)return{close:()=>{}};if(!rn(e)&&zt(e)&&e.grouping&&!rn(e.message)&&ra.length){const f=ra.find(p=>{var v,m,y;return`${(m=(v=p.vm.props)==null?void 0:v.message)!=null?m:""}`==`${(y=e.message)!=null?y:""}`});if(f)return f.vm.component.props.repeatNum+=1,f.vm.component.props.type=(e==null?void 0:e.type)||"info",{close:()=>d.component.proxy.visible=!1}}(wt(e)||rn(e))&&(e={message:e});let n=e.offset||20;ra.forEach(({vm:f})=>{var p;n+=(((p=f.el)==null?void 0:p.offsetHeight)||0)+16}),n+=16;const{nextZIndex:r}=Pi(),a=`message_${Sye++}`,o=e.onClose,i=Ke(Te({zIndex:r()},e),{offset:n,id:a,onClose:()=>{kye(a,o)}});let l=document.body;ys(e.appendTo)?l=e.appendTo:wt(e.appendTo)&&(l=document.querySelector(e.appendTo)),ys(l)||(l=document.body);const s=document.createElement("div");s.className=`container_${a}`;const c=i.message,d=g(wye,i,Ct(c)?{default:c}:rn(c)?{default:()=>c}:null);return d.appContext=t||tc._context,d.props.onDestroy=()=>{vs(null,s)},vs(d,s),ra.push({vm:d}),l.appendChild(s.firstElementChild),{close:()=>d.component.proxy.visible=!1}};TI.forEach(e=>{tc[e]=(t={},n)=>((wt(t)||rn(t))&&(t={message:t}),tc(Ke(Te({},t),{type:e}),n))});function kye(e,t){const n=ra.findIndex(({vm:i})=>e===i.component.props.id);if(n===-1)return;const{vm:r}=ra[n];if(!r)return;t==null||t(r);const a=r.el.offsetHeight;ra.splice(n,1);const o=ra.length;if(!(o<1))for(let i=n;i<o;i++){const l=Number.parseInt(ra[i].vm.el.style.top,10)-a-16;ra[i].vm.component.props.offset=l}}function $ye(){var e;for(let t=ra.length-1;t>=0;t--){const n=ra[t].vm.component;(e=n==null?void 0:n.proxy)==null||e.close()}}tc.closeAll=$ye;tc._context=null;const Oye=pE(tc,"$message"),Pye=G({name:"ElMessageBox",directives:{TrapFocus:kM},components:Te({ElButton:xa,ElInput:Ra,ElOverlay:sw,ElIcon:ft},Vm),inheritAttrs:!1,props:{buttonSize:{type:String,validator:va},modal:{type:Boolean,default:!0},lockScroll:{type:Boolean,default:!0},showClose:{type:Boolean,default:!0},closeOnClickModal:{type:Boolean,default:!0},closeOnPressEscape:{type:Boolean,default:!0},closeOnHashChange:{type:Boolean,default:!0},center:Boolean,draggable:Boolean,roundButton:{default:!1,type:Boolean},container:{type:String,default:"body"},boxType:{type:String,default:""}},emits:["vanish","action"],setup(e,{emit:t}){const{t:n}=ln(),r=H(!1),{nextZIndex:a}=Pi(),o=bt({beforeClose:null,callback:null,cancelButtonText:"",cancelButtonClass:"",confirmButtonText:"",confirmButtonClass:"",customClass:"",customStyle:{},dangerouslyUseHTMLString:!1,distinguishCancelAndClose:!1,icon:"",inputPattern:null,inputPlaceholder:"",inputType:"text",inputValue:null,inputValidator:null,inputErrorMessage:"",message:null,modalFade:!0,modalClass:"",showCancelButton:!1,showConfirmButton:!0,type:"",title:void 0,showInput:!1,action:"",confirmButtonLoading:!1,cancelButtonLoading:!1,confirmButtonDisabled:!1,editorErrorMessage:"",validateError:!1,zIndex:a()}),i=x(()=>{const _=o.type;return _&&yi[_]?`el-message-box-icon--${_}`:""}),l=Gn(x(()=>e.buttonSize),{prop:!0,form:!0,formItem:!0}),s=x(()=>o.icon||yi[o.type]||""),c=x(()=>!!o.message),d=H(),f=H(),p=H(),v=H(),m=x(()=>o.confirmButtonClass);ce(()=>o.inputValue,async _=>{await Ne(),e.boxType==="prompt"&&_!==null&&$()},{immediate:!0}),ce(()=>r.value,_=>{_&&((e.boxType==="alert"||e.boxType==="confirm")&&Ne().then(()=>{var I,L,j;(j=(L=(I=v.value)==null?void 0:I.$el)==null?void 0:L.focus)==null||j.call(L)}),o.zIndex=a()),e.boxType==="prompt"&&(_?Ne().then(()=>{p.value&&p.value.$el&&O().focus()}):(o.editorErrorMessage="",o.validateError=!1))});const y=x(()=>e.draggable);DE(d,f,y),et(async()=>{await Ne(),e.closeOnHashChange&&gn(window,"hashchange",b)}),Lt(()=>{e.closeOnHashChange&&Bn(window,"hashchange",b)});function b(){!r.value||(r.value=!1,Ne(()=>{o.action&&t("action",o.action)}))}const C=()=>{e.closeOnClickModal&&k(o.distinguishCancelAndClose?"close":"cancel")},S=HC(C),w=_=>{if(o.inputType!=="textarea")return _.preventDefault(),k("confirm")},k=_=>{var I;e.boxType==="prompt"&&_==="confirm"&&!$()||(o.action=_,o.beforeClose?(I=o.beforeClose)==null||I.call(o,_,o,b):b())},$=()=>{if(e.boxType==="prompt"){const _=o.inputPattern;if(_&&!_.test(o.inputValue||""))return o.editorErrorMessage=o.inputErrorMessage||n("el.messagebox.error"),o.validateError=!0,!1;const I=o.inputValidator;if(typeof I=="function"){const L=I(o.inputValue);if(L===!1)return o.editorErrorMessage=o.inputErrorMessage||n("el.messagebox.error"),o.validateError=!0,!1;if(typeof L=="string")return o.editorErrorMessage=L,o.validateError=!0,!1}}return o.editorErrorMessage="",o.validateError=!1,!0},O=()=>{const _=p.value.$refs;return _.input||_.textarea},T=()=>{k("close")};return e.closeOnPressEscape?LE({handleClose:T},r):dZ(r,"keydown",_=>_.code===Ge.esc),e.lockScroll&&RE(r),FE(r),Ke(Te({},or(o)),{overlayEvent:S,visible:r,hasMessage:c,typeClass:i,btnSize:l,iconComponent:s,confirmButtonClasses:m,rootRef:d,headerRef:f,inputRef:p,confirmRef:v,doClose:b,handleClose:T,handleWrapperClick:C,handleInputEnter:w,handleAction:k,t:n})}}),Tye=["aria-label"],xye={key:0,ref:"headerRef",class:"el-message-box__header"},_ye={class:"el-message-box__title"},Eye={class:"el-message-box__content"},Mye={class:"el-message-box__container"},Iye={key:1,class:"el-message-box__message"},Nye={key:0},Aye=["innerHTML"],Dye={class:"el-message-box__input"},Rye={class:"el-message-box__btns"};function Lye(e,t,n,r,a,o){const i=we("el-icon"),l=we("close"),s=we("el-input"),c=we("el-button"),d=we("el-overlay"),f=pa("trap-focus");return R(),fe(Vn,{name:"fade-in-linear",onAfterLeave:t[11]||(t[11]=p=>e.$emit("vanish"))},{default:re(()=>[at(g(d,{"z-index":e.zIndex,"overlay-class":["is-message-box",e.modalClass],mask:e.modal},{default:re(()=>[Z("div",{class:"el-overlay-message-box",onClick:t[8]||(t[8]=(...p)=>e.overlayEvent.onClick&&e.overlayEvent.onClick(...p)),onMousedown:t[9]||(t[9]=(...p)=>e.overlayEvent.onMousedown&&e.overlayEvent.onMousedown(...p)),onMouseup:t[10]||(t[10]=(...p)=>e.overlayEvent.onMouseup&&e.overlayEvent.onMouseup(...p))},[at((R(),X("div",{ref:"rootRef",role:"dialog","aria-label":e.title||"dialog","aria-modal":"true",class:U(["el-message-box",e.customClass,{"el-message-box--center":e.center,"is-draggable":e.draggable}]),style:Xe(e.customStyle),onClick:t[7]||(t[7]=dt(()=>{},["stop"]))},[e.title!==null&&e.title!==void 0?(R(),X("div",xye,[Z("div",_ye,[e.iconComponent&&e.center?(R(),fe(i,{key:0,class:U(["el-message-box__status",e.typeClass])},{default:re(()=>[(R(),fe(Kt(e.iconComponent)))]),_:1},8,["class"])):se("v-if",!0),Z("span",null,Me(e.title),1)]),e.showClose?(R(),X("button",{key:0,type:"button",class:"el-message-box__headerbtn","aria-label":"Close",onClick:t[0]||(t[0]=p=>e.handleAction(e.distinguishCancelAndClose?"close":"cancel")),onKeydown:t[1]||(t[1]=It(dt(p=>e.handleAction(e.distinguishCancelAndClose?"close":"cancel"),["prevent"]),["enter"]))},[g(i,{class:"el-message-box__close"},{default:re(()=>[g(l)]),_:1})],32)):se("v-if",!0)],512)):se("v-if",!0),Z("div",Eye,[Z("div",Mye,[e.iconComponent&&!e.center&&e.hasMessage?(R(),fe(i,{key:0,class:U(["el-message-box__status",e.typeClass])},{default:re(()=>[(R(),fe(Kt(e.iconComponent)))]),_:1},8,["class"])):se("v-if",!0),e.hasMessage?(R(),X("div",Iye,[Oe(e.$slots,"default",{},()=>[e.dangerouslyUseHTMLString?(R(),X("p",{key:1,innerHTML:e.message},null,8,Aye)):(R(),X("p",Nye,Me(e.message),1))])])):se("v-if",!0)]),at(Z("div",Dye,[g(s,{ref:"inputRef",modelValue:e.inputValue,"onUpdate:modelValue":t[2]||(t[2]=p=>e.inputValue=p),type:e.inputType,placeholder:e.inputPlaceholder,class:U({invalid:e.validateError}),onKeydown:It(e.handleInputEnter,["enter"])},null,8,["modelValue","type","placeholder","class","onKeydown"]),Z("div",{class:"el-message-box__errormsg",style:Xe({visibility:e.editorErrorMessage?"visible":"hidden"})},Me(e.editorErrorMessage),5)],512),[[_t,e.showInput]])]),Z("div",Rye,[e.showCancelButton?(R(),fe(c,{key:0,loading:e.cancelButtonLoading,class:U([e.cancelButtonClass]),round:e.roundButton,size:e.btnSize,onClick:t[3]||(t[3]=p=>e.handleAction("cancel")),onKeydown:t[4]||(t[4]=It(dt(p=>e.handleAction("cancel"),["prevent"]),["enter"]))},{default:re(()=>[yt(Me(e.cancelButtonText||e.t("el.messagebox.cancel")),1)]),_:1},8,["loading","class","round","size"])):se("v-if",!0),at(g(c,{ref:"confirmRef",type:"primary",loading:e.confirmButtonLoading,class:U([e.confirmButtonClasses]),round:e.roundButton,disabled:e.confirmButtonDisabled,size:e.btnSize,onClick:t[5]||(t[5]=p=>e.handleAction("confirm")),onKeydown:t[6]||(t[6]=It(dt(p=>e.handleAction("confirm"),["prevent"]),["enter"]))},{default:re(()=>[yt(Me(e.confirmButtonText||e.t("el.messagebox.confirm")),1)]),_:1},8,["loading","class","round","disabled","size"]),[[_t,e.showConfirmButton]])])],14,Tye)),[[f]])],32)]),_:3},8,["z-index","overlay-class","mask"]),[[_t,e.visible]])]),_:3})}var Fye=Ae(Pye,[["render",Lye],["__file","/home/runner/work/element-plus/element-plus/packages/components/message-box/src/index.vue"]]);const Qd=new Map,Bye=(e,t,n=null)=>{const r=qe(Fye,e);return r.appContext=n,vs(r,t),document.body.appendChild(t.firstElementChild),r.component},Vye=()=>document.createElement("div"),zye=(e,t)=>{const n=Vye();e.onVanish=()=>{vs(null,n),Qd.delete(a)},e.onAction=o=>{const i=Qd.get(a);let l;e.showInput?l={value:a.inputValue,action:o}:l=o,e.callback?e.callback(l,r.proxy):o==="cancel"||o==="close"?e.distinguishCancelAndClose&&o!=="cancel"?i.reject("close"):i.reject("cancel"):i.resolve(l)};const r=Bye(e,n,t),a=r.proxy;for(const o in e)Mt(e,o)&&!Mt(a.$props,o)&&(a[o]=e[o]);return ce(()=>a.message,(o,i)=>{rn(o)?r.slots.default=()=>[o]:rn(i)&&!rn(o)&&delete r.slots.default},{immediate:!0}),a.visible=!0,a};function Tc(e,t=null){if(!Bt)return Promise.reject();let n;return wt(e)||rn(e)?e={message:e}:n=e.callback,new Promise((r,a)=>{const o=zye(e,t!=null?t:Tc._context);Qd.set(o,{options:e,callback:n,resolve:r,reject:a})})}const Hye=["alert","confirm","prompt"],jye={alert:{closeOnPressEscape:!1,closeOnClickModal:!1},confirm:{showCancelButton:!0},prompt:{showCancelButton:!0,showInput:!0}};Hye.forEach(e=>{Tc[e]=Kye(e)});function Kye(e){return(t,n,r,a)=>{let o;return zt(n)?(r=n,o=""):sa(n)?o="":o=n,Tc(Object.assign(Te({title:o,message:t,type:""},jye[e]),r,{boxType:e}),a)}}Tc.close=()=>{Qd.forEach((e,t)=>{t.doClose()}),Qd.clear()};Tc._context=null;const zi=Tc;zi.install=e=>{zi._context=e._context,e.config.globalProperties.$msgbox=zi,e.config.globalProperties.$messageBox=zi,e.config.globalProperties.$alert=zi.alert,e.config.globalProperties.$confirm=zi.confirm,e.config.globalProperties.$prompt=zi.prompt};const Wye=zi,xI=["success","info","warning","error"],Uye=Ze({customClass:{type:String,default:""},dangerouslyUseHTMLString:{type:Boolean,default:!1},duration:{type:Number,default:4500},icon:{type:Le([String,Object]),default:""},id:{type:String,default:""},message:{type:Le([String,Object]),default:""},offset:{type:Number,default:0},onClick:{type:Le(Function),default:()=>{}},onClose:{type:Le(Function),required:!0},position:{type:String,values:["top-right","top-left","bottom-right","bottom-left"],default:"top-right"},showClose:{type:Boolean,default:!0},title:{type:String,default:""},type:{type:String,values:[...xI,""],default:""},zIndex:{type:Number,default:0}}),Yye={destroy:()=>!0},qye=G({name:"ElNotification",components:Te({ElIcon:ft},Vm),props:Uye,emits:Yye,setup(e){const t=De("notification"),n=H(!1);let r;const a=x(()=>{const v=e.type;return v&&yi[e.type]?t.m(v):""}),o=x(()=>yi[e.type]||e.icon||""),i=x(()=>e.position.endsWith("right")?"right":"left"),l=x(()=>e.position.startsWith("top")?"top":"bottom"),s=x(()=>({[l.value]:`${e.offset}px`,zIndex:e.zIndex}));function c(){e.duration>0&&({stop:r}=gs(()=>{n.value&&f()},e.duration))}function d(){r==null||r()}function f(){n.value=!1}function p({code:v}){v===Ge.delete||v===Ge.backspace?d():v===Ge.esc?n.value&&f():c()}return et(()=>{c(),n.value=!0}),Hn(document,"keydown",p),{ns:t,horizontalClass:i,typeClass:a,iconComponent:o,positionStyle:s,visible:n,close:f,clearTimer:d,startTimer:c}}}),Gye=["id"],Xye=["textContent"],Zye={key:0},Jye=["innerHTML"];function Qye(e,t,n,r,a,o){const i=we("el-icon"),l=we("close");return R(),fe(Vn,{name:e.ns.b("fade"),onBeforeLeave:e.onClose,onAfterLeave:t[3]||(t[3]=s=>e.$emit("destroy"))},{default:re(()=>[at(Z("div",{id:e.id,class:U([e.ns.b(),e.customClass,e.horizontalClass]),style:Xe(e.positionStyle),role:"alert",onMouseenter:t[0]||(t[0]=(...s)=>e.clearTimer&&e.clearTimer(...s)),onMouseleave:t[1]||(t[1]=(...s)=>e.startTimer&&e.startTimer(...s)),onClick:t[2]||(t[2]=(...s)=>e.onClick&&e.onClick(...s))},[e.iconComponent?(R(),fe(i,{key:0,class:U([e.ns.e("icon"),e.typeClass])},{default:re(()=>[(R(),fe(Kt(e.iconComponent)))]),_:1},8,["class"])):se("v-if",!0),Z("div",{class:U(e.ns.e("group"))},[Z("h2",{class:U(e.ns.e("title")),textContent:Me(e.title)},null,10,Xye),at(Z("div",{class:U(e.ns.e("content")),style:Xe(e.title?void 0:{margin:0})},[Oe(e.$slots,"default",{},()=>[e.dangerouslyUseHTMLString?(R(),X(Fe,{key:1},[se(" Caution here, message could've been compromized, nerver use user's input as message "),se(" eslint-disable-next-line "),Z("p",{innerHTML:e.message},null,8,Jye)],2112)):(R(),X("p",Zye,Me(e.message),1))])],6),[[_t,e.message]]),e.showClose?(R(),fe(i,{key:0,class:U(e.ns.e("closeBtn")),onClick:dt(e.close,["stop"])},{default:re(()=>[g(l)]),_:1},8,["class","onClick"])):se("v-if",!0)],2)],46,Gye),[[_t,e.visible]])]),_:3},8,["name","onBeforeLeave"])}var ebe=Ae(qye,[["render",Qye],["__file","/home/runner/work/element-plus/element-plus/packages/components/notification/src/notification.vue"]]);const Pv={"top-left":[],"top-right":[],"bottom-left":[],"bottom-right":[]},Z0=16;let tbe=1;const nc=function(e={},t=null){if(!Bt)return{close:()=>{}};(typeof e=="string"||rn(e))&&(e={message:e});const n=e.position||"top-right";let r=e.offset||0;Pv[n].forEach(({vm:f})=>{var p;r+=(((p=f.el)==null?void 0:p.offsetHeight)||0)+Z0}),r+=Z0;const{nextZIndex:a}=Pi(),o=`notification_${tbe++}`,i=e.onClose,l=Ke(Te({zIndex:a(),offset:r},e),{id:o,onClose:()=>{nbe(o,n,i)}});let s=document.body;ys(e.appendTo)?s=e.appendTo:wt(e.appendTo)&&(s=document.querySelector(e.appendTo)),ys(s)||(s=document.body);const c=document.createElement("div"),d=g(ebe,l,rn(l.message)?{default:()=>l.message}:null);return d.appContext=t!=null?t:nc._context,d.props.onDestroy=()=>{vs(null,c)},vs(d,c),Pv[n].push({vm:d}),s.appendChild(c.firstElementChild),{close:()=>{d.component.proxy.visible=!1}}};xI.forEach(e=>{nc[e]=(t={})=>((typeof t=="string"||rn(t))&&(t={message:t}),nc(Ke(Te({},t),{type:e})))});function nbe(e,t,n){const r=Pv[t],a=r.findIndex(({vm:c})=>{var d;return((d=c.component)==null?void 0:d.props.id)===e});if(a===-1)return;const{vm:o}=r[a];if(!o)return;n==null||n(o);const i=o.el.offsetHeight,l=t.split("-")[0];r.splice(a,1);const s=r.length;if(!(s<1))for(let c=a;c<s;c++){const{el:d,component:f}=r[c].vm,p=Number.parseInt(d.style[l],10)-i-Z0;f.props.offset=p}}function rbe(){for(const e of Object.values(Pv))e.forEach(({vm:t})=>{t.component.proxy.visible=!1})}nc.closeAll=rbe;nc._context=null;const abe=pE(nc,"$notify");var obe=[uye,pye,Oye,Wye,abe,lfe],ibe=HZ([...nye,...obe]);function V(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function DO(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter(function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable})),n.push.apply(n,r)}return n}function le(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?arguments[t]:{};t%2?DO(Object(n),!0).forEach(function(r){V(e,r,n[r])}):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):DO(Object(n)).forEach(function(r){Object.defineProperty(e,r,Object.getOwnPropertyDescriptor(n,r))})}return e}function P(){return P=Object.assign||function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e},P.apply(this,arguments)}function RO(e,t){for(var n=0;n<t.length;n++){var r=t[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(e,r.key,r)}}function _I(e,t,n){return t&&RO(e.prototype,t),n&&RO(e,n),e}function Ap(){return(Ap=Object.assign||function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e}).apply(this,arguments)}function EI(e,t){e.prototype=Object.create(t.prototype),e.prototype.constructor=e,e.__proto__=t}function MI(e,t){if(e==null)return{};var n,r,a={},o=Object.keys(e);for(r=0;r<o.length;r++)t.indexOf(n=o[r])>=0||(a[n]=e[n]);return a}function LO(e){return((t=e)!=null&&typeof t=="object"&&Array.isArray(t)===!1)==1&&Object.prototype.toString.call(e)==="[object Object]";var t}var II=Object.prototype,NI=II.toString,lbe=II.hasOwnProperty,AI=/^\s*function (\w+)/;function FO(e){var t,n=(t=e==null?void 0:e.type)!==null&&t!==void 0?t:e;if(n){var r=n.toString().match(AI);return r?r[1]:""}return""}var Cs=function(e){var t,n;return LO(e)!==!1&&typeof(t=e.constructor)=="function"&&LO(n=t.prototype)!==!1&&n.hasOwnProperty("isPrototypeOf")!==!1},sbe=function(e){return e},Er=sbe,ef=function(e,t){return lbe.call(e,t)},ube=Number.isInteger||function(e){return typeof e=="number"&&isFinite(e)&&Math.floor(e)===e},rc=Array.isArray||function(e){return NI.call(e)==="[object Array]"},ac=function(e){return NI.call(e)==="[object Function]"},Tv=function(e){return Cs(e)&&ef(e,"_vueTypes_name")},DI=function(e){return Cs(e)&&(ef(e,"type")||["_vueTypes_name","validator","default","required"].some(function(t){return ef(e,t)}))};function xw(e,t){return Object.defineProperty(e.bind(t),"__original",{value:e})}function Ns(e,t,n){var r;n===void 0&&(n=!1);var a=!0,o="";r=Cs(e)?e:{type:e};var i=Tv(r)?r._vueTypes_name+" - ":"";if(DI(r)&&r.type!==null){if(r.type===void 0||r.type===!0||!r.required&&t===void 0)return a;rc(r.type)?(a=r.type.some(function(f){return Ns(f,t,!0)===!0}),o=r.type.map(function(f){return FO(f)}).join(" or ")):a=(o=FO(r))==="Array"?rc(t):o==="Object"?Cs(t):o==="String"||o==="Number"||o==="Boolean"||o==="Function"?function(f){if(f==null)return"";var p=f.constructor.toString().match(AI);return p?p[1]:""}(t)===o:t instanceof r.type}if(!a){var l=i+'value "'+t+'" should be of type "'+o+'"';return n===!1?(Er(l),!1):l}if(ef(r,"validator")&&ac(r.validator)){var s=Er,c=[];if(Er=function(f){c.push(f)},a=r.validator(t),Er=s,!a){var d=(c.length>1?"* ":"")+c.join(`
* `);return c.length=0,n===!1?(Er(d),a):d}}return a}function ma(e,t){var n=Object.defineProperties(t,{_vueTypes_name:{value:e,writable:!0},isRequired:{get:function(){return this.required=!0,this}},def:{value:function(a){return a!==void 0||this.default?ac(a)||Ns(this,a,!0)===!0?(this.default=rc(a)?function(){return[].concat(a)}:Cs(a)?function(){return Object.assign({},a)}:a,this):(Er(this._vueTypes_name+' - invalid default value: "'+a+'"'),this):this}}}),r=n.validator;return ac(r)&&(n.validator=xw(r,n)),n}function Do(e,t){var n=ma(e,t);return Object.defineProperty(n,"validate",{value:function(r){return ac(this.validator)&&Er(this._vueTypes_name+` - calling .validate() will overwrite the current custom validator function. Validator info:
`+JSON.stringify(this)),this.validator=xw(r,this),this}})}function BO(e,t,n){var r,a,o=(r=t,a={},Object.getOwnPropertyNames(r).forEach(function(f){a[f]=Object.getOwnPropertyDescriptor(r,f)}),Object.defineProperties({},a));if(o._vueTypes_name=e,!Cs(n))return o;var i,l,s=n.validator,c=MI(n,["validator"]);if(ac(s)){var d=o.validator;d&&(d=(l=(i=d).__original)!==null&&l!==void 0?l:i),o.validator=xw(d?function(f){return d.call(this,f)&&s.call(this,f)}:s,o)}return Object.assign(o,c)}function Jm(e){return e.replace(/^(?!\s*$)/gm,"  ")}var cbe=function(){return Do("any",{})},dbe=function(){return Do("function",{type:Function})},fbe=function(){return Do("boolean",{type:Boolean})},hbe=function(){return Do("string",{type:String})},pbe=function(){return Do("number",{type:Number})},vbe=function(){return Do("array",{type:Array})},mbe=function(){return Do("object",{type:Object})},gbe=function(){return ma("integer",{type:Number,validator:function(e){return ube(e)}})},ybe=function(){return ma("symbol",{validator:function(e){return typeof e=="symbol"}})};function bbe(e,t){if(t===void 0&&(t="custom validation failed"),typeof e!="function")throw new TypeError("[VueTypes error]: You must provide a function as argument");return ma(e.name||"<<anonymous function>>",{validator:function(n){var r=e(n);return r||Er(this._vueTypes_name+" - "+t),r}})}function Cbe(e){if(!rc(e))throw new TypeError("[VueTypes error]: You must provide an array as argument.");var t='oneOf - value should be one of "'+e.join('", "')+'".',n=e.reduce(function(r,a){if(a!=null){var o=a.constructor;r.indexOf(o)===-1&&r.push(o)}return r},[]);return ma("oneOf",{type:n.length>0?n:void 0,validator:function(r){var a=e.indexOf(r)!==-1;return a||Er(t),a}})}function wbe(e){if(!rc(e))throw new TypeError("[VueTypes error]: You must provide an array as argument");for(var t=!1,n=[],r=0;r<e.length;r+=1){var a=e[r];if(DI(a)){if(Tv(a)&&a._vueTypes_name==="oneOf"){n=n.concat(a.type);continue}if(ac(a.validator)&&(t=!0),a.type!==!0&&a.type){n=n.concat(a.type);continue}}n.push(a)}return n=n.filter(function(o,i){return n.indexOf(o)===i}),ma("oneOfType",t?{type:n,validator:function(o){var i=[],l=e.some(function(s){var c=Ns(Tv(s)&&s._vueTypes_name==="oneOf"?s.type||null:s,o,!0);return typeof c=="string"&&i.push(c),c===!0});return l||Er("oneOfType - provided value does not match any of the "+i.length+` passed-in validators:
`+Jm(i.join(`
`))),l}}:{type:n})}function Sbe(e){return ma("arrayOf",{type:Array,validator:function(t){var n,r=t.every(function(a){return(n=Ns(e,a,!0))===!0});return r||Er(`arrayOf - value validation error:
`+Jm(n)),r}})}function kbe(e){return ma("instanceOf",{type:e})}function $be(e){return ma("objectOf",{type:Object,validator:function(t){var n,r=Object.keys(t).every(function(a){return(n=Ns(e,t[a],!0))===!0});return r||Er(`objectOf - value validation error:
`+Jm(n)),r}})}function Obe(e){var t=Object.keys(e),n=t.filter(function(a){var o;return!!(!((o=e[a])===null||o===void 0)&&o.required)}),r=ma("shape",{type:Object,validator:function(a){var o=this;if(!Cs(a))return!1;var i=Object.keys(a);if(n.length>0&&n.some(function(s){return i.indexOf(s)===-1})){var l=n.filter(function(s){return i.indexOf(s)===-1});return Er(l.length===1?'shape - required property "'+l[0]+'" is not defined.':'shape - required properties "'+l.join('", "')+'" are not defined.'),!1}return i.every(function(s){if(t.indexOf(s)===-1)return o._vueTypes_isLoose===!0||(Er('shape - shape definition does not include a "'+s+'" property. Allowed keys: "'+t.join('", "')+'".'),!1);var c=Ns(e[s],a[s],!0);return typeof c=="string"&&Er('shape - "'+s+`" property validation error:
 `+Jm(c)),c===!0})}});return Object.defineProperty(r,"_vueTypes_isLoose",{writable:!0,value:!1}),Object.defineProperty(r,"loose",{get:function(){return this._vueTypes_isLoose=!0,this}}),r}var wo=function(){function e(){}return e.extend=function(t){var n=this;if(rc(t))return t.forEach(function(f){return n.extend(f)}),this;var r=t.name,a=t.validate,o=a!==void 0&&a,i=t.getter,l=i!==void 0&&i,s=MI(t,["name","validate","getter"]);if(ef(this,r))throw new TypeError('[VueTypes error]: Type "'+r+'" already defined');var c,d=s.type;return Tv(d)?(delete s.type,Object.defineProperty(this,r,l?{get:function(){return BO(r,d,s)}}:{value:function(){var f,p=BO(r,d,s);return p.validator&&(p.validator=(f=p.validator).bind.apply(f,[p].concat([].slice.call(arguments)))),p}})):(c=l?{get:function(){var f=Object.assign({},s);return o?Do(r,f):ma(r,f)},enumerable:!0}:{value:function(){var f,p,v=Object.assign({},s);return f=o?Do(r,v):ma(r,v),v.validator&&(f.validator=(p=v.validator).bind.apply(p,[f].concat([].slice.call(arguments)))),f},enumerable:!0},Object.defineProperty(this,r,c))},_I(e,null,[{key:"any",get:function(){return cbe()}},{key:"func",get:function(){return dbe().def(this.defaults.func)}},{key:"bool",get:function(){return fbe().def(this.defaults.bool)}},{key:"string",get:function(){return hbe().def(this.defaults.string)}},{key:"number",get:function(){return pbe().def(this.defaults.number)}},{key:"array",get:function(){return vbe().def(this.defaults.array)}},{key:"object",get:function(){return mbe().def(this.defaults.object)}},{key:"integer",get:function(){return gbe().def(this.defaults.integer)}},{key:"symbol",get:function(){return ybe()}}]),e}();function RI(e){var t;return e===void 0&&(e={func:function(){},bool:!0,string:"",number:0,array:function(){return[]},object:function(){return{}},integer:0}),(t=function(n){function r(){return n.apply(this,arguments)||this}return EI(r,n),_I(r,null,[{key:"sensibleDefaults",get:function(){return Ap({},this.defaults)},set:function(a){this.defaults=a!==!1?Ap({},a!==!0?a:e):{}}}]),r}(wo)).defaults=Ap({},e),t}wo.defaults={},wo.custom=bbe,wo.oneOf=Cbe,wo.instanceOf=kbe,wo.oneOfType=wbe,wo.arrayOf=Sbe,wo.objectOf=$be,wo.shape=Obe,wo.utils={validate:function(e,t){return Ns(t,e,!0)===!0},toType:function(e,t,n){return n===void 0&&(n=!1),n?Do(e,t):ma(e,t)}};(function(e){function t(){return e.apply(this,arguments)||this}return EI(t,e),t})(RI());var LI=RI({func:void 0,bool:void 0,string:void 0,number:void 0,array:void 0,object:void 0,integer:void 0});LI.extend([{name:"looseBool",getter:!0,type:Boolean,default:void 0},{name:"style",getter:!0,type:[String,Object],default:void 0},{name:"VNodeChild",getter:!0,type:null}]);function an(e){return e.default=void 0,e}var u=LI;function kt(e){return kt=typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?function(t){return typeof t}:function(t){return t&&typeof Symbol=="function"&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},kt(e)}var Pbe=function(t){return typeof t=="function"},Tbe=Array.isArray,xbe=function(t){return typeof t=="string"},_be=function(t){return t!==null&&kt(t)==="object"},Ebe=/^on[^a-z]/,Mbe=function(t){return Ebe.test(t)},FI=function(t){var n=Object.create(null);return function(r){var a=n[r];return a||(n[r]=t(r))}},Ibe=/-(\w)/g,Qm=FI(function(e){return e.replace(Ibe,function(t,n){return n?n.toUpperCase():""})}),Nbe=/\B([A-Z])/g,Abe=FI(function(e){return e.replace(Nbe,"-$1").toLowerCase()}),Dbe=Object.prototype.hasOwnProperty,VO=function(t,n){return Dbe.call(t,n)};function BI(e,t,n,r){var a=e[n];if(a!=null){var o=VO(a,"default");if(o&&r===void 0){var i=a.default;r=a.type!==Function&&Pbe(i)?i():i}a.type===Boolean&&(!VO(t,n)&&!o?r=!1:r===""&&(r=!0))}return r}function eg(e){return Object.keys(e).reduce(function(t,n){return(n.substr(0,5)==="data-"||n.substr(0,5)==="aria-")&&(t[n]=e[n]),t},{})}function Se(){for(var e=[],t=0;t<arguments.length;t++){var n=t<0||arguments.length<=t?void 0:arguments[t];if(!!n){if(xbe(n))e.push(n);else if(Tbe(n))for(var r=0;r<n.length;r++){var a=Se(n[r]);a&&e.push(a)}else if(_be(n))for(var o in n)n[o]&&e.push(o)}}return e.join(" ")}function bn(e,t){for(var n=Object.assign({},e),r=0;r<t.length;r+=1){var a=t[r];delete n[a]}return n}var VI=function(){if(typeof Map!="undefined")return Map;function e(t,n){var r=-1;return t.some(function(a,o){return a[0]===n?(r=o,!0):!1}),r}return function(){function t(){this.__entries__=[]}return Object.defineProperty(t.prototype,"size",{get:function(){return this.__entries__.length},enumerable:!0,configurable:!0}),t.prototype.get=function(n){var r=e(this.__entries__,n),a=this.__entries__[r];return a&&a[1]},t.prototype.set=function(n,r){var a=e(this.__entries__,n);~a?this.__entries__[a][1]=r:this.__entries__.push([n,r])},t.prototype.delete=function(n){var r=this.__entries__,a=e(r,n);~a&&r.splice(a,1)},t.prototype.has=function(n){return!!~e(this.__entries__,n)},t.prototype.clear=function(){this.__entries__.splice(0)},t.prototype.forEach=function(n,r){r===void 0&&(r=null);for(var a=0,o=this.__entries__;a<o.length;a++){var i=o[a];n.call(r,i[1],i[0])}},t}()}(),J0=typeof window!="undefined"&&typeof document!="undefined"&&window.document===document,xv=function(){return typeof global!="undefined"&&global.Math===Math?global:typeof self!="undefined"&&self.Math===Math?self:typeof window!="undefined"&&window.Math===Math?window:Function("return this")()}(),Rbe=function(){return typeof requestAnimationFrame=="function"?requestAnimationFrame.bind(xv):function(e){return setTimeout(function(){return e(Date.now())},1e3/60)}}(),Lbe=2;function Fbe(e,t){var n=!1,r=!1,a=0;function o(){n&&(n=!1,e()),r&&l()}function i(){Rbe(o)}function l(){var s=Date.now();if(n){if(s-a<Lbe)return;r=!0}else n=!0,r=!1,setTimeout(i,t);a=s}return l}var Bbe=20,Vbe=["top","right","bottom","left","width","height","size","weight"],zbe=typeof MutationObserver!="undefined",Hbe=function(){function e(){this.connected_=!1,this.mutationEventsAdded_=!1,this.mutationsObserver_=null,this.observers_=[],this.onTransitionEnd_=this.onTransitionEnd_.bind(this),this.refresh=Fbe(this.refresh.bind(this),Bbe)}return e.prototype.addObserver=function(t){~this.observers_.indexOf(t)||this.observers_.push(t),this.connected_||this.connect_()},e.prototype.removeObserver=function(t){var n=this.observers_,r=n.indexOf(t);~r&&n.splice(r,1),!n.length&&this.connected_&&this.disconnect_()},e.prototype.refresh=function(){var t=this.updateObservers_();t&&this.refresh()},e.prototype.updateObservers_=function(){var t=this.observers_.filter(function(n){return n.gatherActive(),n.hasActive()});return t.forEach(function(n){return n.broadcastActive()}),t.length>0},e.prototype.connect_=function(){!J0||this.connected_||(document.addEventListener("transitionend",this.onTransitionEnd_),window.addEventListener("resize",this.refresh),zbe?(this.mutationsObserver_=new MutationObserver(this.refresh),this.mutationsObserver_.observe(document,{attributes:!0,childList:!0,characterData:!0,subtree:!0})):(document.addEventListener("DOMSubtreeModified",this.refresh),this.mutationEventsAdded_=!0),this.connected_=!0)},e.prototype.disconnect_=function(){!J0||!this.connected_||(document.removeEventListener("transitionend",this.onTransitionEnd_),window.removeEventListener("resize",this.refresh),this.mutationsObserver_&&this.mutationsObserver_.disconnect(),this.mutationEventsAdded_&&document.removeEventListener("DOMSubtreeModified",this.refresh),this.mutationsObserver_=null,this.mutationEventsAdded_=!1,this.connected_=!1)},e.prototype.onTransitionEnd_=function(t){var n=t.propertyName,r=n===void 0?"":n,a=Vbe.some(function(o){return!!~r.indexOf(o)});a&&this.refresh()},e.getInstance=function(){return this.instance_||(this.instance_=new e),this.instance_},e.instance_=null,e}(),zI=function(e,t){for(var n=0,r=Object.keys(t);n<r.length;n++){var a=r[n];Object.defineProperty(e,a,{value:t[a],enumerable:!1,writable:!1,configurable:!0})}return e},oc=function(e){var t=e&&e.ownerDocument&&e.ownerDocument.defaultView;return t||xv},HI=tg(0,0,0,0);function _v(e){return parseFloat(e)||0}function zO(e){for(var t=[],n=1;n<arguments.length;n++)t[n-1]=arguments[n];return t.reduce(function(r,a){var o=e["border-"+a+"-width"];return r+_v(o)},0)}function jbe(e){for(var t=["top","right","bottom","left"],n={},r=0,a=t;r<a.length;r++){var o=a[r],i=e["padding-"+o];n[o]=_v(i)}return n}function Kbe(e){var t=e.getBBox();return tg(0,0,t.width,t.height)}function Wbe(e){var t=e.clientWidth,n=e.clientHeight;if(!t&&!n)return HI;var r=oc(e).getComputedStyle(e),a=jbe(r),o=a.left+a.right,i=a.top+a.bottom,l=_v(r.width),s=_v(r.height);if(r.boxSizing==="border-box"&&(Math.round(l+o)!==t&&(l-=zO(r,"left","right")+o),Math.round(s+i)!==n&&(s-=zO(r,"top","bottom")+i)),!Ybe(e)){var c=Math.round(l+o)-t,d=Math.round(s+i)-n;Math.abs(c)!==1&&(l-=c),Math.abs(d)!==1&&(s-=d)}return tg(a.left,a.top,l,s)}var Ube=function(){return typeof SVGGraphicsElement!="undefined"?function(e){return e instanceof oc(e).SVGGraphicsElement}:function(e){return e instanceof oc(e).SVGElement&&typeof e.getBBox=="function"}}();function Ybe(e){return e===oc(e).document.documentElement}function qbe(e){return J0?Ube(e)?Kbe(e):Wbe(e):HI}function Gbe(e){var t=e.x,n=e.y,r=e.width,a=e.height,o=typeof DOMRectReadOnly!="undefined"?DOMRectReadOnly:Object,i=Object.create(o.prototype);return zI(i,{x:t,y:n,width:r,height:a,top:n,right:t+r,bottom:a+n,left:t}),i}function tg(e,t,n,r){return{x:e,y:t,width:n,height:r}}var Xbe=function(){function e(t){this.broadcastWidth=0,this.broadcastHeight=0,this.contentRect_=tg(0,0,0,0),this.target=t}return e.prototype.isActive=function(){var t=qbe(this.target);return this.contentRect_=t,t.width!==this.broadcastWidth||t.height!==this.broadcastHeight},e.prototype.broadcastRect=function(){var t=this.contentRect_;return this.broadcastWidth=t.width,this.broadcastHeight=t.height,t},e}(),Zbe=function(){function e(t,n){var r=Gbe(n);zI(this,{target:t,contentRect:r})}return e}(),Jbe=function(){function e(t,n,r){if(this.activeObservations_=[],this.observations_=new VI,typeof t!="function")throw new TypeError("The callback provided as parameter 1 is not a function.");this.callback_=t,this.controller_=n,this.callbackCtx_=r}return e.prototype.observe=function(t){if(!arguments.length)throw new TypeError("1 argument required, but only 0 present.");if(!(typeof Element=="undefined"||!(Element instanceof Object))){if(!(t instanceof oc(t).Element))throw new TypeError('parameter 1 is not of type "Element".');var n=this.observations_;n.has(t)||(n.set(t,new Xbe(t)),this.controller_.addObserver(this),this.controller_.refresh())}},e.prototype.unobserve=function(t){if(!arguments.length)throw new TypeError("1 argument required, but only 0 present.");if(!(typeof Element=="undefined"||!(Element instanceof Object))){if(!(t instanceof oc(t).Element))throw new TypeError('parameter 1 is not of type "Element".');var n=this.observations_;!n.has(t)||(n.delete(t),n.size||this.controller_.removeObserver(this))}},e.prototype.disconnect=function(){this.clearActive(),this.observations_.clear(),this.controller_.removeObserver(this)},e.prototype.gatherActive=function(){var t=this;this.clearActive(),this.observations_.forEach(function(n){n.isActive()&&t.activeObservations_.push(n)})},e.prototype.broadcastActive=function(){if(!!this.hasActive()){var t=this.callbackCtx_,n=this.activeObservations_.map(function(r){return new Zbe(r.target,r.broadcastRect())});this.callback_.call(t,n,t),this.clearActive()}},e.prototype.clearActive=function(){this.activeObservations_.splice(0)},e.prototype.hasActive=function(){return this.activeObservations_.length>0},e}(),jI=typeof WeakMap!="undefined"?new WeakMap:new VI,KI=function(){function e(t){if(!(this instanceof e))throw new TypeError("Cannot call a class as a function.");if(!arguments.length)throw new TypeError("1 argument required, but only 0 present.");var n=Hbe.getInstance(),r=new Jbe(t,n,this);jI.set(this,r)}return e}();["observe","unobserve","disconnect"].forEach(function(e){KI.prototype[e]=function(){var t;return(t=jI.get(this))[e].apply(t,arguments)}});var ng=function(){return typeof xv.ResizeObserver!="undefined"?xv.ResizeObserver:KI}();function WI(e){if(Array.isArray(e))return e}function Qbe(e,t){var n=e==null?null:typeof Symbol!="undefined"&&e[Symbol.iterator]||e["@@iterator"];if(n!=null){var r=[],a=!0,o=!1,i,l;try{for(n=n.call(e);!(a=(i=n.next()).done)&&(r.push(i.value),!(t&&r.length===t));a=!0);}catch(s){o=!0,l=s}finally{try{!a&&n.return!=null&&n.return()}finally{if(o)throw l}}return r}}function Q0(e,t){(t==null||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);n<t;n++)r[n]=e[n];return r}function rg(e,t){if(!!e){if(typeof e=="string")return Q0(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);if(n==="Object"&&e.constructor&&(n=e.constructor.name),n==="Map"||n==="Set")return Array.from(e);if(n==="Arguments"||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n))return Q0(e,t)}}function UI(){throw new TypeError(`Invalid attempt to destructure non-iterable instance.
In order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)}function fn(e,t){return WI(e)||Qbe(e,t)||rg(e,t)||UI()}function e0e(e){if(Array.isArray(e))return Q0(e)}function YI(e){if(typeof Symbol!="undefined"&&e[Symbol.iterator]!=null||e["@@iterator"]!=null)return Array.from(e)}function t0e(){throw new TypeError(`Invalid attempt to spread non-iterable instance.
In order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)}function Je(e){return e0e(e)||YI(e)||rg(e)||t0e()}var n0e=function(t){return t!=null&&t!==""},tf=n0e,If=function(t){for(var n=Object.keys(t),r={},a={},o={},i=0,l=n.length;i<l;i++){var s=n[i];Mbe(s)?(r[s[2].toLowerCase()+s.slice(3)]=t[s],a[s]=t[s]):o[s]=t[s]}return{onEvents:a,events:r,extraAttrs:o}},r0e=function(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:"",n=arguments.length>1?arguments[1]:void 0,r={},a=/;(?![^(]*\))/g,o=/:(.+)/;return t.split(a).forEach(function(i){if(i){var l=i.split(o);if(l.length>1){var s=n?Qm(l[0].trim()):l[0].trim();r[s]=l[1].trim()}}}),r},vt=function(t,n){return n in Qe(t)},Un=function e(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!0,r=Array.isArray(t)?t:[t],a=[];return r.forEach(function(o){Array.isArray(o)?a.push.apply(a,Je(e(o,n))):o&&o.type===Fe?a.push.apply(a,Je(e(o.children,n))):o&&rn(o)?n&&!As(o)?a.push(o):n||a.push(o):tf(o)&&a.push(o)}),a},ht=function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:"default",r=arguments.length>2&&arguments[2]!==void 0?arguments[2]:{};if(rn(t))return t.type===Fe?n==="default"?Un(t.children):[]:t.children&&t.children[n]?Un(t.children[n](r)):[];var a=t.$slots[n]&&t.$slots[n](r);return Un(a)},Sn=function(t){for(var n,r=((n=t==null?void 0:t.vnode)===null||n===void 0?void 0:n.el)||t&&(t.$el||t);r&&!r.tagName;)r=r.nextSibling;return r},Qe=function(t){var n={};if(t.$&&t.$.vnode){var r=t.$.vnode.props||{};Object.keys(t.$props).forEach(function(l){var s=t.$props[l],c=Abe(l);(s!==void 0||c in r)&&(n[l]=s)})}else if(rn(t)&&kt(t.type)==="object"){var a=t.props||{},o={};Object.keys(a).forEach(function(l){o[Qm(l)]=a[l]});var i=t.type.props||{};Object.keys(i).forEach(function(l){var s=BI(i,o,l,o[l]);(s!==void 0||l in o)&&(n[l]=s)})}return n},We=function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:"default",r=arguments.length>2&&arguments[2]!==void 0?arguments[2]:t,a=arguments.length>3&&arguments[3]!==void 0?arguments[3]:!0,o=void 0;if(t.$){var i=t[n];if(i!==void 0)return typeof i=="function"&&a?i(r):i;o=t.$slots[n],o=a&&o?o(r):o}else if(rn(t)){var l=t.props&&t.props[n];if(l!==void 0&&t.props!==null)return typeof l=="function"&&a?l(r):l;t.type===Fe?o=t.children:t.children&&t.children[n]&&(o=t.children[n],o=a&&o?o(r):o)}return Array.isArray(o)&&(o=Un(o),o=o.length===1?o[0]:o,o=o.length===0?void 0:o),o},rr=function(t){var n=t.$?t.$:t,r={},a=n.props||{},o={};Object.keys(a).forEach(function(l){o[Qm(l)]=a[l]});var i=bc(n.type)?n.type.props:{};return i&&Object.keys(i).forEach(function(l){var s=BI(i,o,l,o[l]);l in o&&(r[l]=s)}),P(P({},o),r)},a0e=function(t){var n=t.key;return n};function nf(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!0,n={};return e.$?n=P(P({},n),e.$attrs):n=P(P({},n),e.props),If(n)[t?"onEvents":"events"]}function o0e(e){var t=(rn(e)?e.props:e.$attrs)||{},n=t.class||{},r={};return typeof n=="string"?n.split(" ").forEach(function(a){r[a.trim()]=!0}):Array.isArray(n)?Se(n).split(" ").forEach(function(a){r[a.trim()]=!0}):r=P(P({},r),n),r}function qI(e,t){var n=(rn(e)?e.props:e.$attrs)||{},r=n.style||{};if(typeof r=="string")r=r0e(r,t);else if(t&&r){var a={};return Object.keys(r).forEach(function(o){return a[Qm(o)]=r[o]}),a}return r}function i0e(e){return e==null||e===""||Array.isArray(e)&&e.length===0}function As(e){return e&&(e.type===Ir||e.type===Fe&&e.children.length===0||e.type===Fo&&e.children.trim()==="")}function l0e(e){return!e||e().every(As)}function s0e(e){return e&&e.type===Fo}function La(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],t=[];return e.forEach(function(n){Array.isArray(n)?t.push.apply(t,Je(n)):n.type===Fe?t.push.apply(t,Je(n.children)):t.push(n)}),t.filter(function(n){return!As(n)})}var An=function(t,n){return Object.keys(n).forEach(function(r){if(t[r])t[r].def&&(t[r]=t[r].def(n[r]));else throw new Error("not have ".concat(r," prop"))}),t};function HO(){var e=[].slice.call(arguments,0),t={};return e.forEach(function(){for(var n=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},r=0,a=Object.entries(n);r<a.length;r++){var o=fn(a[r],2),i=o[0],l=o[1];t[i]=t[i]||{},bc(l)?P(t[i],l):t[i]=l}}),t}function zn(e){return e&&e.__v_isVNode&&kt(e.type)!=="symbol"}function jn(e,t){var n=arguments.length>2&&arguments[2]!==void 0?arguments[2]:"default",r,a;return(r=t[n])!==null&&r!==void 0?r:(a=e[n])===null||a===void 0?void 0:a.call(e)}var rf=vt,zo=G({name:"ResizeObserver",props:{disabled:Boolean,onResize:Function},emits:["resize"],setup:function(t,n){var r=n.slots,a=bt({width:0,height:0,offsetHeight:0,offsetWidth:0}),o=null,i=null,l=function(){i&&(i.disconnect(),i=null)},s=function(p){var v=t.onResize,m=p[0].target,y=m.getBoundingClientRect(),b=y.width,C=y.height,S=m.offsetWidth,w=m.offsetHeight,k=Math.floor(b),$=Math.floor(C);if(a.width!==k||a.height!==$||a.offsetWidth!==S||a.offsetHeight!==w){var O={width:k,height:$,offsetWidth:S,offsetHeight:w};P(a,O),v&&Promise.resolve().then(function(){v(P(P({},O),{offsetWidth:S,offsetHeight:w}),m)})}},c=$t(),d=function(){var p=t.disabled;if(p){l();return}var v=Sn(c),m=v!==o;m&&(l(),o=v),!i&&v&&(i=new ng(s),i.observe(v))};return et(function(){d()}),ur(function(){d()}),Wr(function(){l()}),ce(function(){return t.disabled},function(){d()},{flush:"post"}),function(){var f;return(f=r.default)===null||f===void 0?void 0:f.call(r)[0]}}});function e1(e){var t,n=function(o){return function(){t=null,e.apply(void 0,Je(o))}},r=function(){if(t==null){for(var o=arguments.length,i=new Array(o),l=0;l<o;l++)i[l]=arguments[l];t=requestAnimationFrame(n(i))}};return r.cancel=function(){return cancelAnimationFrame(t)},r}var rt=function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];return n},u0e=function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];return n},kn=function(t){var n=t;return n.install=function(r){r.component(n.displayName||n.name,t)},t},GI=!1;try{var jO=Object.defineProperty({},"passive",{get:function(){GI=!0}});window.addEventListener("testPassive",null,jO),window.removeEventListener("testPassive",null,jO)}catch{}var mn=GI;function Kn(e,t,n,r){if(e.addEventListener){var a=r;a===void 0&&mn&&(t==="touchstart"||t==="touchmove"||t==="wheel")&&(a={passive:!1}),e.addEventListener(t,n,a)}return{remove:function(){e.removeEventListener&&e.removeEventListener(t,n)}}}function hh(e){return e!==window?e.getBoundingClientRect():{top:0,bottom:window.innerHeight}}function KO(e,t,n){if(n!==void 0&&t.top>e.top-n)return"".concat(n+t.top,"px")}function WO(e,t,n){if(n!==void 0&&t.bottom<e.bottom+n){var r=window.innerHeight-t.bottom;return"".concat(n+r,"px")}}var XI=["resize","scroll","touchstart","touchmove","touchend","pageshow","load"],Sd=[];function UO(e,t){if(!!e){var n=Sd.find(function(r){return r.target===e});n?n.affixList.push(t):(n={target:e,affixList:[t],eventHandlers:{}},Sd.push(n),XI.forEach(function(r){n.eventHandlers[r]=Kn(e,r,function(){n.affixList.forEach(function(a){var o=a.exposed.lazyUpdatePosition;o()},(r==="touchstart"||r==="touchmove")&&mn?{passive:!0}:!1)})}))}}function YO(e){var t=Sd.find(function(n){var r=n.affixList.some(function(a){return a===e});return r&&(n.affixList=n.affixList.filter(function(a){return a!==e})),r});t&&t.affixList.length===0&&(Sd=Sd.filter(function(n){return n!==t}),XI.forEach(function(n){var r=t.eventHandlers[n];r&&r.remove&&r.remove()}))}var ZI={items_per_page:"/ page",jump_to:"Go to",jump_to_confirm:"confirm",page:"",prev_page:"Previous Page",next_page:"Next Page",prev_5:"Previous 5 Pages",next_5:"Next 5 Pages",prev_3:"Previous 3 Pages",next_3:"Next 3 Pages"},Nf={today:"Today",now:"Now",backToToday:"Back to today",ok:"Ok",clear:"Clear",month:"Month",year:"Year",timeSelect:"select time",dateSelect:"select date",weekSelect:"Choose a week",monthSelect:"Choose a month",yearSelect:"Choose a year",decadeSelect:"Choose a decade",yearFormat:"YYYY",dateFormat:"M/D/YYYY",dayFormat:"D",dateTimeFormat:"M/D/YYYY HH:mm:ss",monthBeforeYear:!0,previousMonth:"Previous month (PageUp)",nextMonth:"Next month (PageDown)",previousYear:"Last year (Control + left)",nextYear:"Next year (Control + right)",previousDecade:"Last decade",nextDecade:"Next decade",previousCentury:"Last century",nextCentury:"Next century"},c0e={placeholder:"Select time"},_w=c0e,d0e={lang:P({placeholder:"Select date",rangePlaceholder:["Start date","End date"]},Nf),timePickerLocale:P({},_w)},Ev=d0e,Gr="${label} is not a valid ${type}",lo={locale:"en",Pagination:ZI,DatePicker:Ev,TimePicker:_w,Calendar:Ev,global:{placeholder:"Please select"},Table:{filterTitle:"Filter menu",filterConfirm:"OK",filterReset:"Reset",filterEmptyText:"No filters",emptyText:"No data",selectAll:"Select current page",selectInvert:"Invert current page",selectNone:"Clear all data",selectionAll:"Select all data",sortTitle:"Sort",expand:"Expand row",collapse:"Collapse row",triggerDesc:"Click to sort descending",triggerAsc:"Click to sort ascending",cancelSort:"Click to cancel sorting"},Modal:{okText:"OK",cancelText:"Cancel",justOkText:"OK"},Popconfirm:{okText:"OK",cancelText:"Cancel"},Transfer:{titles:["",""],searchPlaceholder:"Search here",itemUnit:"item",itemsUnit:"items",remove:"Remove",selectCurrent:"Select current page",removeCurrent:"Remove current page",selectAll:"Select all data",removeAll:"Remove all data",selectInvert:"Invert current page"},Upload:{uploading:"Uploading...",removeFile:"Remove file",uploadError:"Upload error",previewFile:"Preview file",downloadFile:"Download file"},Empty:{description:"No Data"},Icon:{icon:"icon"},Text:{edit:"Edit",copy:"Copy",copied:"Copied",expand:"Expand"},PageHeader:{back:"Back"},Form:{optional:"(optional)",defaultValidateMessages:{default:"Field validation error for ${label}",required:"Please enter ${label}",enum:"${label} must be one of [${enum}]",whitespace:"${label} cannot be a blank character",date:{format:"${label} date format is invalid",parse:"${label} cannot be converted to a date",invalid:"${label} is an invalid date"},types:{string:Gr,method:Gr,array:Gr,object:Gr,number:Gr,date:Gr,boolean:Gr,integer:Gr,float:Gr,regexp:Gr,email:Gr,url:Gr,hex:Gr},string:{len:"${label} must be ${len} characters",min:"${label} must be at least ${min} characters",max:"${label} must be up to ${max} characters",range:"${label} must be between ${min}-${max} characters"},number:{len:"${label} must be equal to ${len}",min:"${label} must be minimum ${min}",max:"${label} must be maximum ${max}",range:"${label} must be between ${min}-${max}"},array:{len:"Must be ${len} ${label}",min:"At least ${min} ${label}",max:"At most ${max} ${label}",range:"The amount of ${label} must be between ${min}-${max}"},pattern:{mismatch:"${label} does not match the pattern ${pattern}"}}},Image:{preview:"Preview"}},Kr=G({name:"LocaleReceiver",props:{componentName:u.string,defaultLocale:{type:[Object,Function]},children:{type:Function}},setup:function(t,n){var r=n.slots,a=ve("localeData",{}),o=x(function(){var l=t.componentName,s=l===void 0?"global":l,c=t.defaultLocale,d=c||lo[s||"global"],f=a.antLocale,p=s&&f?f[s]:{};return P(P({},typeof d=="function"?d():d),p||{})}),i=x(function(){var l=a.antLocale,s=l&&l.locale;return l&&l.exist&&!s?lo.locale:s});return function(){var l=t.children||r.default,s=a.antLocale;return l==null?void 0:l(o.value,i.value,s)}}});function f0e(e,t){var n=ve("localeData",{}),r=x(function(){var a=n.antLocale,o=t||lo[e||"global"],i=e&&a?a[e]:{};return P(P({},typeof o=="function"?o():o),i||{})});return[r]}var JI=function(){var t=ve("configProvider",St),n=t.getPrefixCls,r=n("empty-img-default");return g("svg",{class:r,width:"184",height:"152",viewBox:"0 0 184 152"},[g("g",{fill:"none","fill-rule":"evenodd"},[g("g",{transform:"translate(24 31.67)"},[g("ellipse",{class:"".concat(r,"-ellipse"),cx:"67.797",cy:"106.89",rx:"67.797",ry:"12.668"},null),g("path",{class:"".concat(r,"-path-1"),d:"M122.034 69.674L98.109 40.229c-1.148-1.386-2.826-2.225-4.593-2.225h-51.44c-1.766 0-3.444.839-4.592 2.225L13.56 69.674v15.383h108.475V69.674z"},null),g("path",{class:"".concat(r,"-path-2"),d:"M101.537 86.214L80.63 61.102c-1.001-1.207-2.507-1.867-4.048-1.867H31.724c-1.54 0-3.047.66-4.048 1.867L6.769 86.214v13.792h94.768V86.214z",transform:"translate(13.56)"},null),g("path",{class:"".concat(r,"-path-3"),d:"M33.83 0h67.933a4 4 0 0 1 4 4v93.344a4 4 0 0 1-4 4H33.83a4 4 0 0 1-4-4V4a4 4 0 0 1 4-4z"},null),g("path",{class:"".concat(r,"-path-4"),d:"M42.678 9.953h50.237a2 2 0 0 1 2 2V36.91a2 2 0 0 1-2 2H42.678a2 2 0 0 1-2-2V11.953a2 2 0 0 1 2-2zM42.94 49.767h49.713a2.262 2.262 0 1 1 0 4.524H42.94a2.262 2.262 0 0 1 0-4.524zM42.94 61.53h49.713a2.262 2.262 0 1 1 0 4.525H42.94a2.262 2.262 0 0 1 0-4.525zM121.813 105.032c-.775 3.071-3.497 5.36-6.735 5.36H20.515c-3.238 0-5.96-2.29-6.734-5.36a7.309 7.309 0 0 1-.222-1.79V69.675h26.318c2.907 0 5.25 2.448 5.25 5.42v.04c0 2.971 2.37 5.37 5.277 5.37h34.785c2.907 0 5.277-2.421 5.277-5.393V75.1c0-2.972 2.343-5.426 5.25-5.426h26.318v33.569c0 .617-.077 1.216-.221 1.789z"},null)]),g("path",{class:"".concat(r,"-path-5"),d:"M149.121 33.292l-6.83 2.65a1 1 0 0 1-1.317-1.23l1.937-6.207c-2.589-2.944-4.109-6.534-4.109-10.408C138.802 8.102 148.92 0 161.402 0 173.881 0 184 8.102 184 18.097c0 9.995-10.118 18.097-22.599 18.097-4.528 0-8.744-1.066-12.28-2.902z"},null),g("g",{class:"".concat(r,"-g"),transform:"translate(149.65 15.383)"},[g("ellipse",{cx:"20.654",cy:"3.167",rx:"2.849",ry:"2.815"},null),g("path",{d:"M5.698 5.63H0L2.898.704zM9.259.704h4.985V5.63H9.259z"},null)])])])};JI.PRESENTED_IMAGE_DEFAULT=!0;var h0e=JI,QI=function(){var t=ve("configProvider",St),n=t.getPrefixCls,r=n("empty-img-simple");return g("svg",{class:r,width:"64",height:"41",viewBox:"0 0 64 41"},[g("g",{transform:"translate(0 1)",fill:"none","fill-rule":"evenodd"},[g("ellipse",{class:"".concat(r,"-ellipse"),fill:"#F5F5F5",cx:"32",cy:"33",rx:"32",ry:"7"},null),g("g",{class:"".concat(r,"-g"),"fill-rule":"nonzero",stroke:"#D9D9D9"},[g("path",{d:"M55 12.76L44.854 1.258C44.367.474 43.656 0 42.907 0H21.093c-.749 0-1.46.474-1.947 1.257L9 12.761V22h46v-9.24z"},null),g("path",{d:"M41.613 15.931c0-1.605.994-2.93 2.227-2.931H55v18.137C55 33.26 53.68 35 52.05 35h-40.1C10.32 35 9 33.259 9 31.137V13h11.16c1.233 0 2.227 1.323 2.227 2.928v.022c0 1.605 1.005 2.901 2.237 2.901h14.752c1.232 0 2.237-1.308 2.237-2.913v-.007z",fill:"#FAFAFA",class:"".concat(r,"-path")},null)])])])};QI.PRESENTED_IMAGE_SIMPLE=!0;var p0e=QI,v0e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},eN=g(h0e,null,null),tN=g(p0e,null,null),xc=function(t,n){var r=n.slots,a=r===void 0?{}:r,o=n.attrs,i,l=ve("configProvider",St),s=l.getPrefixCls,c=l.direction,d=P(P({},t),o),f=d.prefixCls,p=d.image,v=p===void 0?eN:p,m=d.description,y=m===void 0?((i=a.description)===null||i===void 0?void 0:i.call(a))||void 0:m,b=d.imageStyle,C=d.class,S=C===void 0?"":C,w=v0e(d,["prefixCls","image","description","imageStyle","class"]);return g(Kr,{componentName:"Empty",children:function($){var O,T=s("empty",f),_=typeof y!="undefined"?y:$.description,I=typeof _=="string"?_:"empty",L=null;return typeof v=="string"?L=g("img",{alt:I,src:v},null):L=v,g("div",le({class:Se(T,S,(O={},V(O,"".concat(T,"-normal"),v===tN),V(O,"".concat(T,"-rtl"),c==="rtl"),O))},w),[g("div",{class:"".concat(T,"-image"),style:b},[L]),_&&g("p",{class:"".concat(T,"-description")},[_]),a.default&&g("div",{class:"".concat(T,"-footer")},[La(a.default())])])}},null)};xc.displayName="AEmpty";xc.PRESENTED_IMAGE_DEFAULT=eN;xc.PRESENTED_IMAGE_SIMPLE=tN;xc.inheritAttrs=!1;xc.props={prefixCls:u.string,image:u.any,description:u.any,imageStyle:u.object};var su=kn(xc),m0e=function(t){var n=ve("configProvider",St),r=function(o){var i=n.getPrefixCls,l=i("empty");switch(o){case"Table":case"List":return g(su,{image:su.PRESENTED_IMAGE_SIMPLE},null);case"Select":case"TreeSelect":case"Cascader":case"Transfer":case"Mentions":return g(su,{image:su.PRESENTED_IMAGE_SIMPLE,class:"".concat(l,"-small")},null);default:return g(su,null,null)}};return r(t.componentName)};function nN(e){return g(m0e,{componentName:e},null)}//! moment.js
//! version : 2.29.3
//! authors : Tim Wood, Iskren Chernev, Moment.js contributors
//! license : MIT
//! momentjs.com
var rN;function Ue(){return rN.apply(null,arguments)}function g0e(e){rN=e}function so(e){return e instanceof Array||Object.prototype.toString.call(e)==="[object Array]"}function ls(e){return e!=null&&Object.prototype.toString.call(e)==="[object Object]"}function cn(e,t){return Object.prototype.hasOwnProperty.call(e,t)}function Ew(e){if(Object.getOwnPropertyNames)return Object.getOwnPropertyNames(e).length===0;var t;for(t in e)if(cn(e,t))return!1;return!0}function Lr(e){return e===void 0}function Ci(e){return typeof e=="number"||Object.prototype.toString.call(e)==="[object Number]"}function Af(e){return e instanceof Date||Object.prototype.toString.call(e)==="[object Date]"}function aN(e,t){var n=[],r,a=e.length;for(r=0;r<a;++r)n.push(t(e[r],r));return n}function Gi(e,t){for(var n in t)cn(t,n)&&(e[n]=t[n]);return cn(t,"toString")&&(e.toString=t.toString),cn(t,"valueOf")&&(e.valueOf=t.valueOf),e}function Ho(e,t,n,r){return xN(e,t,n,r,!0).utc()}function y0e(){return{empty:!1,unusedTokens:[],unusedInput:[],overflow:-2,charsLeftOver:0,nullInput:!1,invalidEra:null,invalidMonth:null,invalidFormat:!1,userInvalidated:!1,iso:!1,parsedDateParts:[],era:null,meridiem:null,rfc2822:!1,weekdayMismatch:!1}}function qt(e){return e._pf==null&&(e._pf=y0e()),e._pf}var t1;Array.prototype.some?t1=Array.prototype.some:t1=function(e){var t=Object(this),n=t.length>>>0,r;for(r=0;r<n;r++)if(r in t&&e.call(this,t[r],r,t))return!0;return!1};function Mw(e){if(e._isValid==null){var t=qt(e),n=t1.call(t.parsedDateParts,function(a){return a!=null}),r=!isNaN(e._d.getTime())&&t.overflow<0&&!t.empty&&!t.invalidEra&&!t.invalidMonth&&!t.invalidWeekday&&!t.weekdayMismatch&&!t.nullInput&&!t.invalidFormat&&!t.userInvalidated&&(!t.meridiem||t.meridiem&&n);if(e._strict&&(r=r&&t.charsLeftOver===0&&t.unusedTokens.length===0&&t.bigHour===void 0),Object.isFrozen==null||!Object.isFrozen(e))e._isValid=r;else return r}return e._isValid}function ag(e){var t=Ho(NaN);return e!=null?Gi(qt(t),e):qt(t).userInvalidated=!0,t}var qO=Ue.momentProperties=[],_y=!1;function Iw(e,t){var n,r,a,o=qO.length;if(Lr(t._isAMomentObject)||(e._isAMomentObject=t._isAMomentObject),Lr(t._i)||(e._i=t._i),Lr(t._f)||(e._f=t._f),Lr(t._l)||(e._l=t._l),Lr(t._strict)||(e._strict=t._strict),Lr(t._tzm)||(e._tzm=t._tzm),Lr(t._isUTC)||(e._isUTC=t._isUTC),Lr(t._offset)||(e._offset=t._offset),Lr(t._pf)||(e._pf=qt(t)),Lr(t._locale)||(e._locale=t._locale),o>0)for(n=0;n<o;n++)r=qO[n],a=t[r],Lr(a)||(e[r]=a);return e}function Df(e){Iw(this,e),this._d=new Date(e._d!=null?e._d.getTime():NaN),this.isValid()||(this._d=new Date(NaN)),_y===!1&&(_y=!0,Ue.updateOffset(this),_y=!1)}function uo(e){return e instanceof Df||e!=null&&e._isAMomentObject!=null}function oN(e){Ue.suppressDeprecationWarnings===!1&&typeof console!="undefined"&&console.warn&&console.warn("Deprecation warning: "+e)}function Fa(e,t){var n=!0;return Gi(function(){if(Ue.deprecationHandler!=null&&Ue.deprecationHandler(null,e),n){var r=[],a,o,i,l=arguments.length;for(o=0;o<l;o++){if(a="",typeof arguments[o]=="object"){a+=`
[`+o+"] ";for(i in arguments[0])cn(arguments[0],i)&&(a+=i+": "+arguments[0][i]+", ");a=a.slice(0,-2)}else a=arguments[o];r.push(a)}oN(e+`
Arguments: `+Array.prototype.slice.call(r).join("")+`
`+new Error().stack),n=!1}return t.apply(this,arguments)},t)}var GO={};function iN(e,t){Ue.deprecationHandler!=null&&Ue.deprecationHandler(e,t),GO[e]||(oN(t),GO[e]=!0)}Ue.suppressDeprecationWarnings=!1;Ue.deprecationHandler=null;function jo(e){return typeof Function!="undefined"&&e instanceof Function||Object.prototype.toString.call(e)==="[object Function]"}function b0e(e){var t,n;for(n in e)cn(e,n)&&(t=e[n],jo(t)?this[n]=t:this["_"+n]=t);this._config=e,this._dayOfMonthOrdinalParseLenient=new RegExp((this._dayOfMonthOrdinalParse.source||this._ordinalParse.source)+"|"+/\d{1,2}/.source)}function n1(e,t){var n=Gi({},e),r;for(r in t)cn(t,r)&&(ls(e[r])&&ls(t[r])?(n[r]={},Gi(n[r],e[r]),Gi(n[r],t[r])):t[r]!=null?n[r]=t[r]:delete n[r]);for(r in e)cn(e,r)&&!cn(t,r)&&ls(e[r])&&(n[r]=Gi({},n[r]));return n}function Nw(e){e!=null&&this.set(e)}var r1;Object.keys?r1=Object.keys:r1=function(e){var t,n=[];for(t in e)cn(e,t)&&n.push(t);return n};var C0e={sameDay:"[Today at] LT",nextDay:"[Tomorrow at] LT",nextWeek:"dddd [at] LT",lastDay:"[Yesterday at] LT",lastWeek:"[Last] dddd [at] LT",sameElse:"L"};function w0e(e,t,n){var r=this._calendar[e]||this._calendar.sameElse;return jo(r)?r.call(t,n):r}function Ro(e,t,n){var r=""+Math.abs(e),a=t-r.length,o=e>=0;return(o?n?"+":"":"-")+Math.pow(10,Math.max(0,a)).toString().substr(1)+r}var Aw=/(\[[^\[]*\])|(\\)?([Hh]mm(ss)?|Mo|MM?M?M?|Do|DDDo|DD?D?D?|ddd?d?|do?|w[o|w]?|W[o|W]?|Qo?|N{1,5}|YYYYYY|YYYYY|YYYY|YY|y{2,4}|yo?|gg(ggg?)?|GG(GGG?)?|e|E|a|A|hh?|HH?|kk?|mm?|ss?|S{1,9}|x|X|zz?|ZZ?|.)/g,ph=/(\[[^\[]*\])|(\\)?(LTS|LT|LL?L?L?|l{1,4})/g,Ey={},Eu={};function Tt(e,t,n,r){var a=r;typeof r=="string"&&(a=function(){return this[r]()}),e&&(Eu[e]=a),t&&(Eu[t[0]]=function(){return Ro(a.apply(this,arguments),t[1],t[2])}),n&&(Eu[n]=function(){return this.localeData().ordinal(a.apply(this,arguments),e)})}function S0e(e){return e.match(/\[[\s\S]/)?e.replace(/^\[|\]$/g,""):e.replace(/\\/g,"")}function k0e(e){var t=e.match(Aw),n,r;for(n=0,r=t.length;n<r;n++)Eu[t[n]]?t[n]=Eu[t[n]]:t[n]=S0e(t[n]);return function(a){var o="",i;for(i=0;i<r;i++)o+=jo(t[i])?t[i].call(a,e):t[i];return o}}function Dp(e,t){return e.isValid()?(t=lN(t,e.localeData()),Ey[t]=Ey[t]||k0e(t),Ey[t](e)):e.localeData().invalidDate()}function lN(e,t){var n=5;function r(a){return t.longDateFormat(a)||a}for(ph.lastIndex=0;n>=0&&ph.test(e);)e=e.replace(ph,r),ph.lastIndex=0,n-=1;return e}var $0e={LTS:"h:mm:ss A",LT:"h:mm A",L:"MM/DD/YYYY",LL:"MMMM D, YYYY",LLL:"MMMM D, YYYY h:mm A",LLLL:"dddd, MMMM D, YYYY h:mm A"};function O0e(e){var t=this._longDateFormat[e],n=this._longDateFormat[e.toUpperCase()];return t||!n?t:(this._longDateFormat[e]=n.match(Aw).map(function(r){return r==="MMMM"||r==="MM"||r==="DD"||r==="dddd"?r.slice(1):r}).join(""),this._longDateFormat[e])}var P0e="Invalid date";function T0e(){return this._invalidDate}var x0e="%d",_0e=/\d{1,2}/;function E0e(e){return this._ordinal.replace("%d",e)}var M0e={future:"in %s",past:"%s ago",s:"a few seconds",ss:"%d seconds",m:"a minute",mm:"%d minutes",h:"an hour",hh:"%d hours",d:"a day",dd:"%d days",w:"a week",ww:"%d weeks",M:"a month",MM:"%d months",y:"a year",yy:"%d years"};function I0e(e,t,n,r){var a=this._relativeTime[n];return jo(a)?a(e,t,n,r):a.replace(/%d/i,e)}function N0e(e,t){var n=this._relativeTime[e>0?"future":"past"];return jo(n)?n(t):n.replace(/%s/i,t)}var kd={};function Or(e,t){var n=e.toLowerCase();kd[n]=kd[n+"s"]=kd[t]=e}function Ba(e){return typeof e=="string"?kd[e]||kd[e.toLowerCase()]:void 0}function Dw(e){var t={},n,r;for(r in e)cn(e,r)&&(n=Ba(r),n&&(t[n]=e[r]));return t}var sN={};function Pr(e,t){sN[e]=t}function A0e(e){var t=[],n;for(n in e)cn(e,n)&&t.push({unit:n,priority:sN[n]});return t.sort(function(r,a){return r.priority-a.priority}),t}function og(e){return e%4===0&&e%100!==0||e%400===0}function Oa(e){return e<0?Math.ceil(e)||0:Math.floor(e)}function Zt(e){var t=+e,n=0;return t!==0&&isFinite(t)&&(n=Oa(t)),n}function _c(e,t){return function(n){return n!=null?(uN(this,e,n),Ue.updateOffset(this,t),this):Mv(this,e)}}function Mv(e,t){return e.isValid()?e._d["get"+(e._isUTC?"UTC":"")+t]():NaN}function uN(e,t,n){e.isValid()&&!isNaN(n)&&(t==="FullYear"&&og(e.year())&&e.month()===1&&e.date()===29?(n=Zt(n),e._d["set"+(e._isUTC?"UTC":"")+t](n,e.month(),dg(n,e.month()))):e._d["set"+(e._isUTC?"UTC":"")+t](n))}function D0e(e){return e=Ba(e),jo(this[e])?this[e]():this}function R0e(e,t){if(typeof e=="object"){e=Dw(e);var n=A0e(e),r,a=n.length;for(r=0;r<a;r++)this[n[r].unit](e[n[r].unit])}else if(e=Ba(e),jo(this[e]))return this[e](t);return this}var cN=/\d/,ya=/\d\d/,dN=/\d{3}/,Rw=/\d{4}/,ig=/[+-]?\d{6}/,Dn=/\d\d?/,fN=/\d\d\d\d?/,hN=/\d\d\d\d\d\d?/,lg=/\d{1,3}/,Lw=/\d{1,4}/,sg=/[+-]?\d{1,6}/,Ec=/\d+/,ug=/[+-]?\d+/,L0e=/Z|[+-]\d\d:?\d\d/gi,cg=/Z|[+-]\d\d(?::?\d\d)?/gi,F0e=/[+-]?\d+(\.\d{1,3})?/,Rf=/[0-9]{0,256}['a-z\u00A0-\u05FF\u0700-\uD7FF\uF900-\uFDCF\uFDF0-\uFF07\uFF10-\uFFEF]{1,256}|[\u0600-\u06FF\/]{1,256}(\s*?[\u0600-\u06FF]{1,256}){1,2}/i,Iv;Iv={};function gt(e,t,n){Iv[e]=jo(t)?t:function(r,a){return r&&n?n:t}}function B0e(e,t){return cn(Iv,e)?Iv[e](t._strict,t._locale):new RegExp(V0e(e))}function V0e(e){return aa(e.replace("\\","").replace(/\\(\[)|\\(\])|\[([^\]\[]*)\]|\\(.)/g,function(t,n,r,a,o){return n||r||a||o}))}function aa(e){return e.replace(/[-\/\\^$*+?.()|[\]{}]/g,"\\$&")}var a1={};function Cn(e,t){var n,r=t,a;for(typeof e=="string"&&(e=[e]),Ci(t)&&(r=function(o,i){i[t]=Zt(o)}),a=e.length,n=0;n<a;n++)a1[e[n]]=r}function Lf(e,t){Cn(e,function(n,r,a,o){a._w=a._w||{},t(n,a._w,a,o)})}function z0e(e,t,n){t!=null&&cn(a1,e)&&a1[e](t,n._a,n,e)}var kr=0,ii=1,Oo=2,ar=3,Ja=4,li=5,Xl=6,H0e=7,j0e=8;function K0e(e,t){return(e%t+t)%t}var Zn;Array.prototype.indexOf?Zn=Array.prototype.indexOf:Zn=function(e){var t;for(t=0;t<this.length;++t)if(this[t]===e)return t;return-1};function dg(e,t){if(isNaN(e)||isNaN(t))return NaN;var n=K0e(t,12);return e+=(t-n)/12,n===1?og(e)?29:28:31-n%7%2}Tt("M",["MM",2],"Mo",function(){return this.month()+1});Tt("MMM",0,0,function(e){return this.localeData().monthsShort(this,e)});Tt("MMMM",0,0,function(e){return this.localeData().months(this,e)});Or("month","M");Pr("month",8);gt("M",Dn);gt("MM",Dn,ya);gt("MMM",function(e,t){return t.monthsShortRegex(e)});gt("MMMM",function(e,t){return t.monthsRegex(e)});Cn(["M","MM"],function(e,t){t[ii]=Zt(e)-1});Cn(["MMM","MMMM"],function(e,t,n,r){var a=n._locale.monthsParse(e,r,n._strict);a!=null?t[ii]=a:qt(n).invalidMonth=e});var W0e="January_February_March_April_May_June_July_August_September_October_November_December".split("_"),pN="Jan_Feb_Mar_Apr_May_Jun_Jul_Aug_Sep_Oct_Nov_Dec".split("_"),vN=/D[oD]?(\[[^\[\]]*\]|\s)+MMMM?/,U0e=Rf,Y0e=Rf;function q0e(e,t){return e?so(this._months)?this._months[e.month()]:this._months[(this._months.isFormat||vN).test(t)?"format":"standalone"][e.month()]:so(this._months)?this._months:this._months.standalone}function G0e(e,t){return e?so(this._monthsShort)?this._monthsShort[e.month()]:this._monthsShort[vN.test(t)?"format":"standalone"][e.month()]:so(this._monthsShort)?this._monthsShort:this._monthsShort.standalone}function X0e(e,t,n){var r,a,o,i=e.toLocaleLowerCase();if(!this._monthsParse)for(this._monthsParse=[],this._longMonthsParse=[],this._shortMonthsParse=[],r=0;r<12;++r)o=Ho([2e3,r]),this._shortMonthsParse[r]=this.monthsShort(o,"").toLocaleLowerCase(),this._longMonthsParse[r]=this.months(o,"").toLocaleLowerCase();return n?t==="MMM"?(a=Zn.call(this._shortMonthsParse,i),a!==-1?a:null):(a=Zn.call(this._longMonthsParse,i),a!==-1?a:null):t==="MMM"?(a=Zn.call(this._shortMonthsParse,i),a!==-1?a:(a=Zn.call(this._longMonthsParse,i),a!==-1?a:null)):(a=Zn.call(this._longMonthsParse,i),a!==-1?a:(a=Zn.call(this._shortMonthsParse,i),a!==-1?a:null))}function Z0e(e,t,n){var r,a,o;if(this._monthsParseExact)return X0e.call(this,e,t,n);for(this._monthsParse||(this._monthsParse=[],this._longMonthsParse=[],this._shortMonthsParse=[]),r=0;r<12;r++){if(a=Ho([2e3,r]),n&&!this._longMonthsParse[r]&&(this._longMonthsParse[r]=new RegExp("^"+this.months(a,"").replace(".","")+"$","i"),this._shortMonthsParse[r]=new RegExp("^"+this.monthsShort(a,"").replace(".","")+"$","i")),!n&&!this._monthsParse[r]&&(o="^"+this.months(a,"")+"|^"+this.monthsShort(a,""),this._monthsParse[r]=new RegExp(o.replace(".",""),"i")),n&&t==="MMMM"&&this._longMonthsParse[r].test(e))return r;if(n&&t==="MMM"&&this._shortMonthsParse[r].test(e))return r;if(!n&&this._monthsParse[r].test(e))return r}}function mN(e,t){var n;if(!e.isValid())return e;if(typeof t=="string"){if(/^\d+$/.test(t))t=Zt(t);else if(t=e.localeData().monthsParse(t),!Ci(t))return e}return n=Math.min(e.date(),dg(e.year(),t)),e._d["set"+(e._isUTC?"UTC":"")+"Month"](t,n),e}function gN(e){return e!=null?(mN(this,e),Ue.updateOffset(this,!0),this):Mv(this,"Month")}function J0e(){return dg(this.year(),this.month())}function Q0e(e){return this._monthsParseExact?(cn(this,"_monthsRegex")||yN.call(this),e?this._monthsShortStrictRegex:this._monthsShortRegex):(cn(this,"_monthsShortRegex")||(this._monthsShortRegex=U0e),this._monthsShortStrictRegex&&e?this._monthsShortStrictRegex:this._monthsShortRegex)}function e1e(e){return this._monthsParseExact?(cn(this,"_monthsRegex")||yN.call(this),e?this._monthsStrictRegex:this._monthsRegex):(cn(this,"_monthsRegex")||(this._monthsRegex=Y0e),this._monthsStrictRegex&&e?this._monthsStrictRegex:this._monthsRegex)}function yN(){function e(i,l){return l.length-i.length}var t=[],n=[],r=[],a,o;for(a=0;a<12;a++)o=Ho([2e3,a]),t.push(this.monthsShort(o,"")),n.push(this.months(o,"")),r.push(this.months(o,"")),r.push(this.monthsShort(o,""));for(t.sort(e),n.sort(e),r.sort(e),a=0;a<12;a++)t[a]=aa(t[a]),n[a]=aa(n[a]);for(a=0;a<24;a++)r[a]=aa(r[a]);this._monthsRegex=new RegExp("^("+r.join("|")+")","i"),this._monthsShortRegex=this._monthsRegex,this._monthsStrictRegex=new RegExp("^("+n.join("|")+")","i"),this._monthsShortStrictRegex=new RegExp("^("+t.join("|")+")","i")}Tt("Y",0,0,function(){var e=this.year();return e<=9999?Ro(e,4):"+"+e});Tt(0,["YY",2],0,function(){return this.year()%100});Tt(0,["YYYY",4],0,"year");Tt(0,["YYYYY",5],0,"year");Tt(0,["YYYYYY",6,!0],0,"year");Or("year","y");Pr("year",1);gt("Y",ug);gt("YY",Dn,ya);gt("YYYY",Lw,Rw);gt("YYYYY",sg,ig);gt("YYYYYY",sg,ig);Cn(["YYYYY","YYYYYY"],kr);Cn("YYYY",function(e,t){t[kr]=e.length===2?Ue.parseTwoDigitYear(e):Zt(e)});Cn("YY",function(e,t){t[kr]=Ue.parseTwoDigitYear(e)});Cn("Y",function(e,t){t[kr]=parseInt(e,10)});function $d(e){return og(e)?366:365}Ue.parseTwoDigitYear=function(e){return Zt(e)+(Zt(e)>68?1900:2e3)};var bN=_c("FullYear",!0);function t1e(){return og(this.year())}function n1e(e,t,n,r,a,o,i){var l;return e<100&&e>=0?(l=new Date(e+400,t,n,r,a,o,i),isFinite(l.getFullYear())&&l.setFullYear(e)):l=new Date(e,t,n,r,a,o,i),l}function af(e){var t,n;return e<100&&e>=0?(n=Array.prototype.slice.call(arguments),n[0]=e+400,t=new Date(Date.UTC.apply(null,n)),isFinite(t.getUTCFullYear())&&t.setUTCFullYear(e)):t=new Date(Date.UTC.apply(null,arguments)),t}function Nv(e,t,n){var r=7+t-n,a=(7+af(e,0,r).getUTCDay()-t)%7;return-a+r-1}function CN(e,t,n,r,a){var o=(7+n-r)%7,i=Nv(e,r,a),l=1+7*(t-1)+o+i,s,c;return l<=0?(s=e-1,c=$d(s)+l):l>$d(e)?(s=e+1,c=l-$d(e)):(s=e,c=l),{year:s,dayOfYear:c}}function of(e,t,n){var r=Nv(e.year(),t,n),a=Math.floor((e.dayOfYear()-r-1)/7)+1,o,i;return a<1?(i=e.year()-1,o=a+fi(i,t,n)):a>fi(e.year(),t,n)?(o=a-fi(e.year(),t,n),i=e.year()+1):(i=e.year(),o=a),{week:o,year:i}}function fi(e,t,n){var r=Nv(e,t,n),a=Nv(e+1,t,n);return($d(e)-r+a)/7}Tt("w",["ww",2],"wo","week");Tt("W",["WW",2],"Wo","isoWeek");Or("week","w");Or("isoWeek","W");Pr("week",5);Pr("isoWeek",5);gt("w",Dn);gt("ww",Dn,ya);gt("W",Dn);gt("WW",Dn,ya);Lf(["w","ww","W","WW"],function(e,t,n,r){t[r.substr(0,1)]=Zt(e)});function r1e(e){return of(e,this._week.dow,this._week.doy).week}var a1e={dow:0,doy:6};function o1e(){return this._week.dow}function i1e(){return this._week.doy}function l1e(e){var t=this.localeData().week(this);return e==null?t:this.add((e-t)*7,"d")}function s1e(e){var t=of(this,1,4).week;return e==null?t:this.add((e-t)*7,"d")}Tt("d",0,"do","day");Tt("dd",0,0,function(e){return this.localeData().weekdaysMin(this,e)});Tt("ddd",0,0,function(e){return this.localeData().weekdaysShort(this,e)});Tt("dddd",0,0,function(e){return this.localeData().weekdays(this,e)});Tt("e",0,0,"weekday");Tt("E",0,0,"isoWeekday");Or("day","d");Or("weekday","e");Or("isoWeekday","E");Pr("day",11);Pr("weekday",11);Pr("isoWeekday",11);gt("d",Dn);gt("e",Dn);gt("E",Dn);gt("dd",function(e,t){return t.weekdaysMinRegex(e)});gt("ddd",function(e,t){return t.weekdaysShortRegex(e)});gt("dddd",function(e,t){return t.weekdaysRegex(e)});Lf(["dd","ddd","dddd"],function(e,t,n,r){var a=n._locale.weekdaysParse(e,r,n._strict);a!=null?t.d=a:qt(n).invalidWeekday=e});Lf(["d","e","E"],function(e,t,n,r){t[r]=Zt(e)});function u1e(e,t){return typeof e!="string"?e:isNaN(e)?(e=t.weekdaysParse(e),typeof e=="number"?e:null):parseInt(e,10)}function c1e(e,t){return typeof e=="string"?t.weekdaysParse(e)%7||7:isNaN(e)?null:e}function Fw(e,t){return e.slice(t,7).concat(e.slice(0,t))}var d1e="Sunday_Monday_Tuesday_Wednesday_Thursday_Friday_Saturday".split("_"),wN="Sun_Mon_Tue_Wed_Thu_Fri_Sat".split("_"),f1e="Su_Mo_Tu_We_Th_Fr_Sa".split("_"),h1e=Rf,p1e=Rf,v1e=Rf;function m1e(e,t){var n=so(this._weekdays)?this._weekdays:this._weekdays[e&&e!==!0&&this._weekdays.isFormat.test(t)?"format":"standalone"];return e===!0?Fw(n,this._week.dow):e?n[e.day()]:n}function g1e(e){return e===!0?Fw(this._weekdaysShort,this._week.dow):e?this._weekdaysShort[e.day()]:this._weekdaysShort}function y1e(e){return e===!0?Fw(this._weekdaysMin,this._week.dow):e?this._weekdaysMin[e.day()]:this._weekdaysMin}function b1e(e,t,n){var r,a,o,i=e.toLocaleLowerCase();if(!this._weekdaysParse)for(this._weekdaysParse=[],this._shortWeekdaysParse=[],this._minWeekdaysParse=[],r=0;r<7;++r)o=Ho([2e3,1]).day(r),this._minWeekdaysParse[r]=this.weekdaysMin(o,"").toLocaleLowerCase(),this._shortWeekdaysParse[r]=this.weekdaysShort(o,"").toLocaleLowerCase(),this._weekdaysParse[r]=this.weekdays(o,"").toLocaleLowerCase();return n?t==="dddd"?(a=Zn.call(this._weekdaysParse,i),a!==-1?a:null):t==="ddd"?(a=Zn.call(this._shortWeekdaysParse,i),a!==-1?a:null):(a=Zn.call(this._minWeekdaysParse,i),a!==-1?a:null):t==="dddd"?(a=Zn.call(this._weekdaysParse,i),a!==-1||(a=Zn.call(this._shortWeekdaysParse,i),a!==-1)?a:(a=Zn.call(this._minWeekdaysParse,i),a!==-1?a:null)):t==="ddd"?(a=Zn.call(this._shortWeekdaysParse,i),a!==-1||(a=Zn.call(this._weekdaysParse,i),a!==-1)?a:(a=Zn.call(this._minWeekdaysParse,i),a!==-1?a:null)):(a=Zn.call(this._minWeekdaysParse,i),a!==-1||(a=Zn.call(this._weekdaysParse,i),a!==-1)?a:(a=Zn.call(this._shortWeekdaysParse,i),a!==-1?a:null))}function C1e(e,t,n){var r,a,o;if(this._weekdaysParseExact)return b1e.call(this,e,t,n);for(this._weekdaysParse||(this._weekdaysParse=[],this._minWeekdaysParse=[],this._shortWeekdaysParse=[],this._fullWeekdaysParse=[]),r=0;r<7;r++){if(a=Ho([2e3,1]).day(r),n&&!this._fullWeekdaysParse[r]&&(this._fullWeekdaysParse[r]=new RegExp("^"+this.weekdays(a,"").replace(".","\\.?")+"$","i"),this._shortWeekdaysParse[r]=new RegExp("^"+this.weekdaysShort(a,"").replace(".","\\.?")+"$","i"),this._minWeekdaysParse[r]=new RegExp("^"+this.weekdaysMin(a,"").replace(".","\\.?")+"$","i")),this._weekdaysParse[r]||(o="^"+this.weekdays(a,"")+"|^"+this.weekdaysShort(a,"")+"|^"+this.weekdaysMin(a,""),this._weekdaysParse[r]=new RegExp(o.replace(".",""),"i")),n&&t==="dddd"&&this._fullWeekdaysParse[r].test(e))return r;if(n&&t==="ddd"&&this._shortWeekdaysParse[r].test(e))return r;if(n&&t==="dd"&&this._minWeekdaysParse[r].test(e))return r;if(!n&&this._weekdaysParse[r].test(e))return r}}function w1e(e){if(!this.isValid())return e!=null?this:NaN;var t=this._isUTC?this._d.getUTCDay():this._d.getDay();return e!=null?(e=u1e(e,this.localeData()),this.add(e-t,"d")):t}function S1e(e){if(!this.isValid())return e!=null?this:NaN;var t=(this.day()+7-this.localeData()._week.dow)%7;return e==null?t:this.add(e-t,"d")}function k1e(e){if(!this.isValid())return e!=null?this:NaN;if(e!=null){var t=c1e(e,this.localeData());return this.day(this.day()%7?t:t-7)}else return this.day()||7}function $1e(e){return this._weekdaysParseExact?(cn(this,"_weekdaysRegex")||Bw.call(this),e?this._weekdaysStrictRegex:this._weekdaysRegex):(cn(this,"_weekdaysRegex")||(this._weekdaysRegex=h1e),this._weekdaysStrictRegex&&e?this._weekdaysStrictRegex:this._weekdaysRegex)}function O1e(e){return this._weekdaysParseExact?(cn(this,"_weekdaysRegex")||Bw.call(this),e?this._weekdaysShortStrictRegex:this._weekdaysShortRegex):(cn(this,"_weekdaysShortRegex")||(this._weekdaysShortRegex=p1e),this._weekdaysShortStrictRegex&&e?this._weekdaysShortStrictRegex:this._weekdaysShortRegex)}function P1e(e){return this._weekdaysParseExact?(cn(this,"_weekdaysRegex")||Bw.call(this),e?this._weekdaysMinStrictRegex:this._weekdaysMinRegex):(cn(this,"_weekdaysMinRegex")||(this._weekdaysMinRegex=v1e),this._weekdaysMinStrictRegex&&e?this._weekdaysMinStrictRegex:this._weekdaysMinRegex)}function Bw(){function e(d,f){return f.length-d.length}var t=[],n=[],r=[],a=[],o,i,l,s,c;for(o=0;o<7;o++)i=Ho([2e3,1]).day(o),l=aa(this.weekdaysMin(i,"")),s=aa(this.weekdaysShort(i,"")),c=aa(this.weekdays(i,"")),t.push(l),n.push(s),r.push(c),a.push(l),a.push(s),a.push(c);t.sort(e),n.sort(e),r.sort(e),a.sort(e),this._weekdaysRegex=new RegExp("^("+a.join("|")+")","i"),this._weekdaysShortRegex=this._weekdaysRegex,this._weekdaysMinRegex=this._weekdaysRegex,this._weekdaysStrictRegex=new RegExp("^("+r.join("|")+")","i"),this._weekdaysShortStrictRegex=new RegExp("^("+n.join("|")+")","i"),this._weekdaysMinStrictRegex=new RegExp("^("+t.join("|")+")","i")}function Vw(){return this.hours()%12||12}function T1e(){return this.hours()||24}Tt("H",["HH",2],0,"hour");Tt("h",["hh",2],0,Vw);Tt("k",["kk",2],0,T1e);Tt("hmm",0,0,function(){return""+Vw.apply(this)+Ro(this.minutes(),2)});Tt("hmmss",0,0,function(){return""+Vw.apply(this)+Ro(this.minutes(),2)+Ro(this.seconds(),2)});Tt("Hmm",0,0,function(){return""+this.hours()+Ro(this.minutes(),2)});Tt("Hmmss",0,0,function(){return""+this.hours()+Ro(this.minutes(),2)+Ro(this.seconds(),2)});function SN(e,t){Tt(e,0,0,function(){return this.localeData().meridiem(this.hours(),this.minutes(),t)})}SN("a",!0);SN("A",!1);Or("hour","h");Pr("hour",13);function kN(e,t){return t._meridiemParse}gt("a",kN);gt("A",kN);gt("H",Dn);gt("h",Dn);gt("k",Dn);gt("HH",Dn,ya);gt("hh",Dn,ya);gt("kk",Dn,ya);gt("hmm",fN);gt("hmmss",hN);gt("Hmm",fN);gt("Hmmss",hN);Cn(["H","HH"],ar);Cn(["k","kk"],function(e,t,n){var r=Zt(e);t[ar]=r===24?0:r});Cn(["a","A"],function(e,t,n){n._isPm=n._locale.isPM(e),n._meridiem=e});Cn(["h","hh"],function(e,t,n){t[ar]=Zt(e),qt(n).bigHour=!0});Cn("hmm",function(e,t,n){var r=e.length-2;t[ar]=Zt(e.substr(0,r)),t[Ja]=Zt(e.substr(r)),qt(n).bigHour=!0});Cn("hmmss",function(e,t,n){var r=e.length-4,a=e.length-2;t[ar]=Zt(e.substr(0,r)),t[Ja]=Zt(e.substr(r,2)),t[li]=Zt(e.substr(a)),qt(n).bigHour=!0});Cn("Hmm",function(e,t,n){var r=e.length-2;t[ar]=Zt(e.substr(0,r)),t[Ja]=Zt(e.substr(r))});Cn("Hmmss",function(e,t,n){var r=e.length-4,a=e.length-2;t[ar]=Zt(e.substr(0,r)),t[Ja]=Zt(e.substr(r,2)),t[li]=Zt(e.substr(a))});function x1e(e){return(e+"").toLowerCase().charAt(0)==="p"}var _1e=/[ap]\.?m?\.?/i,E1e=_c("Hours",!0);function M1e(e,t,n){return e>11?n?"pm":"PM":n?"am":"AM"}var $N={calendar:C0e,longDateFormat:$0e,invalidDate:P0e,ordinal:x0e,dayOfMonthOrdinalParse:_0e,relativeTime:M0e,months:W0e,monthsShort:pN,week:a1e,weekdays:d1e,weekdaysMin:f1e,weekdaysShort:wN,meridiemParse:_1e},Fn={},Hc={},lf;function I1e(e,t){var n,r=Math.min(e.length,t.length);for(n=0;n<r;n+=1)if(e[n]!==t[n])return n;return r}function XO(e){return e&&e.toLowerCase().replace("_","-")}function N1e(e){for(var t=0,n,r,a,o;t<e.length;){for(o=XO(e[t]).split("-"),n=o.length,r=XO(e[t+1]),r=r?r.split("-"):null;n>0;){if(a=fg(o.slice(0,n).join("-")),a)return a;if(r&&r.length>=n&&I1e(o,r)>=n-1)break;n--}t++}return lf}function A1e(e){return e.match("^[^/\\\\]*$")!=null}function fg(e){var t=null,n;if(Fn[e]===void 0&&typeof Sr!="undefined"&&Sr&&Sr.exports&&A1e(e))try{t=lf._abbr,n=require,n("./locale/"+e),nl(t)}catch{Fn[e]=null}return Fn[e]}function nl(e,t){var n;return e&&(Lr(t)?n=_i(e):n=zw(e,t),n?lf=n:typeof console!="undefined"&&console.warn&&console.warn("Locale "+e+" not found. Did you forget to load it?")),lf._abbr}function zw(e,t){if(t!==null){var n,r=$N;if(t.abbr=e,Fn[e]!=null)iN("defineLocaleOverride","use moment.updateLocale(localeName, config) to change an existing locale. moment.defineLocale(localeName, config) should only be used for creating a new locale See http://momentjs.com/guides/#/warnings/define-locale/ for more info."),r=Fn[e]._config;else if(t.parentLocale!=null)if(Fn[t.parentLocale]!=null)r=Fn[t.parentLocale]._config;else if(n=fg(t.parentLocale),n!=null)r=n._config;else return Hc[t.parentLocale]||(Hc[t.parentLocale]=[]),Hc[t.parentLocale].push({name:e,config:t}),null;return Fn[e]=new Nw(n1(r,t)),Hc[e]&&Hc[e].forEach(function(a){zw(a.name,a.config)}),nl(e),Fn[e]}else return delete Fn[e],null}function D1e(e,t){if(t!=null){var n,r,a=$N;Fn[e]!=null&&Fn[e].parentLocale!=null?Fn[e].set(n1(Fn[e]._config,t)):(r=fg(e),r!=null&&(a=r._config),t=n1(a,t),r==null&&(t.abbr=e),n=new Nw(t),n.parentLocale=Fn[e],Fn[e]=n),nl(e)}else Fn[e]!=null&&(Fn[e].parentLocale!=null?(Fn[e]=Fn[e].parentLocale,e===nl()&&nl(e)):Fn[e]!=null&&delete Fn[e]);return Fn[e]}function _i(e){var t;if(e&&e._locale&&e._locale._abbr&&(e=e._locale._abbr),!e)return lf;if(!so(e)){if(t=fg(e),t)return t;e=[e]}return N1e(e)}function R1e(){return r1(Fn)}function Hw(e){var t,n=e._a;return n&&qt(e).overflow===-2&&(t=n[ii]<0||n[ii]>11?ii:n[Oo]<1||n[Oo]>dg(n[kr],n[ii])?Oo:n[ar]<0||n[ar]>24||n[ar]===24&&(n[Ja]!==0||n[li]!==0||n[Xl]!==0)?ar:n[Ja]<0||n[Ja]>59?Ja:n[li]<0||n[li]>59?li:n[Xl]<0||n[Xl]>999?Xl:-1,qt(e)._overflowDayOfYear&&(t<kr||t>Oo)&&(t=Oo),qt(e)._overflowWeeks&&t===-1&&(t=H0e),qt(e)._overflowWeekday&&t===-1&&(t=j0e),qt(e).overflow=t),e}var L1e=/^\s*((?:[+-]\d{6}|\d{4})-(?:\d\d-\d\d|W\d\d-\d|W\d\d|\d\d\d|\d\d))(?:(T| )(\d\d(?::\d\d(?::\d\d(?:[.,]\d+)?)?)?)([+-]\d\d(?::?\d\d)?|\s*Z)?)?$/,F1e=/^\s*((?:[+-]\d{6}|\d{4})(?:\d\d\d\d|W\d\d\d|W\d\d|\d\d\d|\d\d|))(?:(T| )(\d\d(?:\d\d(?:\d\d(?:[.,]\d+)?)?)?)([+-]\d\d(?::?\d\d)?|\s*Z)?)?$/,B1e=/Z|[+-]\d\d(?::?\d\d)?/,vh=[["YYYYYY-MM-DD",/[+-]\d{6}-\d\d-\d\d/],["YYYY-MM-DD",/\d{4}-\d\d-\d\d/],["GGGG-[W]WW-E",/\d{4}-W\d\d-\d/],["GGGG-[W]WW",/\d{4}-W\d\d/,!1],["YYYY-DDD",/\d{4}-\d{3}/],["YYYY-MM",/\d{4}-\d\d/,!1],["YYYYYYMMDD",/[+-]\d{10}/],["YYYYMMDD",/\d{8}/],["GGGG[W]WWE",/\d{4}W\d{3}/],["GGGG[W]WW",/\d{4}W\d{2}/,!1],["YYYYDDD",/\d{7}/],["YYYYMM",/\d{6}/,!1],["YYYY",/\d{4}/,!1]],My=[["HH:mm:ss.SSSS",/\d\d:\d\d:\d\d\.\d+/],["HH:mm:ss,SSSS",/\d\d:\d\d:\d\d,\d+/],["HH:mm:ss",/\d\d:\d\d:\d\d/],["HH:mm",/\d\d:\d\d/],["HHmmss.SSSS",/\d\d\d\d\d\d\.\d+/],["HHmmss,SSSS",/\d\d\d\d\d\d,\d+/],["HHmmss",/\d\d\d\d\d\d/],["HHmm",/\d\d\d\d/],["HH",/\d\d/]],V1e=/^\/?Date\((-?\d+)/i,z1e=/^(?:(Mon|Tue|Wed|Thu|Fri|Sat|Sun),?\s)?(\d{1,2})\s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s(\d{2,4})\s(\d\d):(\d\d)(?::(\d\d))?\s(?:(UT|GMT|[ECMP][SD]T)|([Zz])|([+-]\d{4}))$/,H1e={UT:0,GMT:0,EDT:-4*60,EST:-5*60,CDT:-5*60,CST:-6*60,MDT:-6*60,MST:-7*60,PDT:-7*60,PST:-8*60};function ON(e){var t,n,r=e._i,a=L1e.exec(r)||F1e.exec(r),o,i,l,s,c=vh.length,d=My.length;if(a){for(qt(e).iso=!0,t=0,n=c;t<n;t++)if(vh[t][1].exec(a[1])){i=vh[t][0],o=vh[t][2]!==!1;break}if(i==null){e._isValid=!1;return}if(a[3]){for(t=0,n=d;t<n;t++)if(My[t][1].exec(a[3])){l=(a[2]||" ")+My[t][0];break}if(l==null){e._isValid=!1;return}}if(!o&&l!=null){e._isValid=!1;return}if(a[4])if(B1e.exec(a[4]))s="Z";else{e._isValid=!1;return}e._f=i+(l||"")+(s||""),Kw(e)}else e._isValid=!1}function j1e(e,t,n,r,a,o){var i=[K1e(e),pN.indexOf(t),parseInt(n,10),parseInt(r,10),parseInt(a,10)];return o&&i.push(parseInt(o,10)),i}function K1e(e){var t=parseInt(e,10);return t<=49?2e3+t:t<=999?1900+t:t}function W1e(e){return e.replace(/\([^)]*\)|[\n\t]/g," ").replace(/(\s\s+)/g," ").replace(/^\s\s*/,"").replace(/\s\s*$/,"")}function U1e(e,t,n){if(e){var r=wN.indexOf(e),a=new Date(t[0],t[1],t[2]).getDay();if(r!==a)return qt(n).weekdayMismatch=!0,n._isValid=!1,!1}return!0}function Y1e(e,t,n){if(e)return H1e[e];if(t)return 0;var r=parseInt(n,10),a=r%100,o=(r-a)/100;return o*60+a}function PN(e){var t=z1e.exec(W1e(e._i)),n;if(t){if(n=j1e(t[4],t[3],t[2],t[5],t[6],t[7]),!U1e(t[1],n,e))return;e._a=n,e._tzm=Y1e(t[8],t[9],t[10]),e._d=af.apply(null,e._a),e._d.setUTCMinutes(e._d.getUTCMinutes()-e._tzm),qt(e).rfc2822=!0}else e._isValid=!1}function q1e(e){var t=V1e.exec(e._i);if(t!==null){e._d=new Date(+t[1]);return}if(ON(e),e._isValid===!1)delete e._isValid;else return;if(PN(e),e._isValid===!1)delete e._isValid;else return;e._strict?e._isValid=!1:Ue.createFromInputFallback(e)}Ue.createFromInputFallback=Fa("value provided is not in a recognized RFC2822 or ISO format. moment construction falls back to js Date(), which is not reliable across all browsers and versions. Non RFC2822/ISO date formats are discouraged. Please refer to http://momentjs.com/guides/#/warnings/js-date/ for more info.",function(e){e._d=new Date(e._i+(e._useUTC?" UTC":""))});function uu(e,t,n){return e!=null?e:t!=null?t:n}function G1e(e){var t=new Date(Ue.now());return e._useUTC?[t.getUTCFullYear(),t.getUTCMonth(),t.getUTCDate()]:[t.getFullYear(),t.getMonth(),t.getDate()]}function jw(e){var t,n,r=[],a,o,i;if(!e._d){for(a=G1e(e),e._w&&e._a[Oo]==null&&e._a[ii]==null&&X1e(e),e._dayOfYear!=null&&(i=uu(e._a[kr],a[kr]),(e._dayOfYear>$d(i)||e._dayOfYear===0)&&(qt(e)._overflowDayOfYear=!0),n=af(i,0,e._dayOfYear),e._a[ii]=n.getUTCMonth(),e._a[Oo]=n.getUTCDate()),t=0;t<3&&e._a[t]==null;++t)e._a[t]=r[t]=a[t];for(;t<7;t++)e._a[t]=r[t]=e._a[t]==null?t===2?1:0:e._a[t];e._a[ar]===24&&e._a[Ja]===0&&e._a[li]===0&&e._a[Xl]===0&&(e._nextDay=!0,e._a[ar]=0),e._d=(e._useUTC?af:n1e).apply(null,r),o=e._useUTC?e._d.getUTCDay():e._d.getDay(),e._tzm!=null&&e._d.setUTCMinutes(e._d.getUTCMinutes()-e._tzm),e._nextDay&&(e._a[ar]=24),e._w&&typeof e._w.d!="undefined"&&e._w.d!==o&&(qt(e).weekdayMismatch=!0)}}function X1e(e){var t,n,r,a,o,i,l,s,c;t=e._w,t.GG!=null||t.W!=null||t.E!=null?(o=1,i=4,n=uu(t.GG,e._a[kr],of(Nn(),1,4).year),r=uu(t.W,1),a=uu(t.E,1),(a<1||a>7)&&(s=!0)):(o=e._locale._week.dow,i=e._locale._week.doy,c=of(Nn(),o,i),n=uu(t.gg,e._a[kr],c.year),r=uu(t.w,c.week),t.d!=null?(a=t.d,(a<0||a>6)&&(s=!0)):t.e!=null?(a=t.e+o,(t.e<0||t.e>6)&&(s=!0)):a=o),r<1||r>fi(n,o,i)?qt(e)._overflowWeeks=!0:s!=null?qt(e)._overflowWeekday=!0:(l=CN(n,r,a,o,i),e._a[kr]=l.year,e._dayOfYear=l.dayOfYear)}Ue.ISO_8601=function(){};Ue.RFC_2822=function(){};function Kw(e){if(e._f===Ue.ISO_8601){ON(e);return}if(e._f===Ue.RFC_2822){PN(e);return}e._a=[],qt(e).empty=!0;var t=""+e._i,n,r,a,o,i,l=t.length,s=0,c,d;for(a=lN(e._f,e._locale).match(Aw)||[],d=a.length,n=0;n<d;n++)o=a[n],r=(t.match(B0e(o,e))||[])[0],r&&(i=t.substr(0,t.indexOf(r)),i.length>0&&qt(e).unusedInput.push(i),t=t.slice(t.indexOf(r)+r.length),s+=r.length),Eu[o]?(r?qt(e).empty=!1:qt(e).unusedTokens.push(o),z0e(o,r,e)):e._strict&&!r&&qt(e).unusedTokens.push(o);qt(e).charsLeftOver=l-s,t.length>0&&qt(e).unusedInput.push(t),e._a[ar]<=12&&qt(e).bigHour===!0&&e._a[ar]>0&&(qt(e).bigHour=void 0),qt(e).parsedDateParts=e._a.slice(0),qt(e).meridiem=e._meridiem,e._a[ar]=Z1e(e._locale,e._a[ar],e._meridiem),c=qt(e).era,c!==null&&(e._a[kr]=e._locale.erasConvertYear(c,e._a[kr])),jw(e),Hw(e)}function Z1e(e,t,n){var r;return n==null?t:e.meridiemHour!=null?e.meridiemHour(t,n):(e.isPM!=null&&(r=e.isPM(n),r&&t<12&&(t+=12),!r&&t===12&&(t=0)),t)}function J1e(e){var t,n,r,a,o,i,l=!1,s=e._f.length;if(s===0){qt(e).invalidFormat=!0,e._d=new Date(NaN);return}for(a=0;a<s;a++)o=0,i=!1,t=Iw({},e),e._useUTC!=null&&(t._useUTC=e._useUTC),t._f=e._f[a],Kw(t),Mw(t)&&(i=!0),o+=qt(t).charsLeftOver,o+=qt(t).unusedTokens.length*10,qt(t).score=o,l?o<r&&(r=o,n=t):(r==null||o<r||i)&&(r=o,n=t,i&&(l=!0));Gi(e,n||t)}function Q1e(e){if(!e._d){var t=Dw(e._i),n=t.day===void 0?t.date:t.day;e._a=aN([t.year,t.month,n,t.hour,t.minute,t.second,t.millisecond],function(r){return r&&parseInt(r,10)}),jw(e)}}function eCe(e){var t=new Df(Hw(TN(e)));return t._nextDay&&(t.add(1,"d"),t._nextDay=void 0),t}function TN(e){var t=e._i,n=e._f;return e._locale=e._locale||_i(e._l),t===null||n===void 0&&t===""?ag({nullInput:!0}):(typeof t=="string"&&(e._i=t=e._locale.preparse(t)),uo(t)?new Df(Hw(t)):(Af(t)?e._d=t:so(n)?J1e(e):n?Kw(e):tCe(e),Mw(e)||(e._d=null),e))}function tCe(e){var t=e._i;Lr(t)?e._d=new Date(Ue.now()):Af(t)?e._d=new Date(t.valueOf()):typeof t=="string"?q1e(e):so(t)?(e._a=aN(t.slice(0),function(n){return parseInt(n,10)}),jw(e)):ls(t)?Q1e(e):Ci(t)?e._d=new Date(t):Ue.createFromInputFallback(e)}function xN(e,t,n,r,a){var o={};return(t===!0||t===!1)&&(r=t,t=void 0),(n===!0||n===!1)&&(r=n,n=void 0),(ls(e)&&Ew(e)||so(e)&&e.length===0)&&(e=void 0),o._isAMomentObject=!0,o._useUTC=o._isUTC=a,o._l=n,o._i=e,o._f=t,o._strict=r,eCe(o)}function Nn(e,t,n,r){return xN(e,t,n,r,!1)}var nCe=Fa("moment().min is deprecated, use moment.max instead. http://momentjs.com/guides/#/warnings/min-max/",function(){var e=Nn.apply(null,arguments);return this.isValid()&&e.isValid()?e<this?this:e:ag()}),rCe=Fa("moment().max is deprecated, use moment.min instead. http://momentjs.com/guides/#/warnings/min-max/",function(){var e=Nn.apply(null,arguments);return this.isValid()&&e.isValid()?e>this?this:e:ag()});function _N(e,t){var n,r;if(t.length===1&&so(t[0])&&(t=t[0]),!t.length)return Nn();for(n=t[0],r=1;r<t.length;++r)(!t[r].isValid()||t[r][e](n))&&(n=t[r]);return n}function aCe(){var e=[].slice.call(arguments,0);return _N("isBefore",e)}function oCe(){var e=[].slice.call(arguments,0);return _N("isAfter",e)}var iCe=function(){return Date.now?Date.now():+new Date},jc=["year","quarter","month","week","day","hour","minute","second","millisecond"];function lCe(e){var t,n=!1,r,a=jc.length;for(t in e)if(cn(e,t)&&!(Zn.call(jc,t)!==-1&&(e[t]==null||!isNaN(e[t]))))return!1;for(r=0;r<a;++r)if(e[jc[r]]){if(n)return!1;parseFloat(e[jc[r]])!==Zt(e[jc[r]])&&(n=!0)}return!0}function sCe(){return this._isValid}function uCe(){return mo(NaN)}function hg(e){var t=Dw(e),n=t.year||0,r=t.quarter||0,a=t.month||0,o=t.week||t.isoWeek||0,i=t.day||0,l=t.hour||0,s=t.minute||0,c=t.second||0,d=t.millisecond||0;this._isValid=lCe(t),this._milliseconds=+d+c*1e3+s*6e4+l*1e3*60*60,this._days=+i+o*7,this._months=+a+r*3+n*12,this._data={},this._locale=_i(),this._bubble()}function Rp(e){return e instanceof hg}function o1(e){return e<0?Math.round(-1*e)*-1:Math.round(e)}function cCe(e,t,n){var r=Math.min(e.length,t.length),a=Math.abs(e.length-t.length),o=0,i;for(i=0;i<r;i++)(n&&e[i]!==t[i]||!n&&Zt(e[i])!==Zt(t[i]))&&o++;return o+a}function EN(e,t){Tt(e,0,0,function(){var n=this.utcOffset(),r="+";return n<0&&(n=-n,r="-"),r+Ro(~~(n/60),2)+t+Ro(~~n%60,2)})}EN("Z",":");EN("ZZ","");gt("Z",cg);gt("ZZ",cg);Cn(["Z","ZZ"],function(e,t,n){n._useUTC=!0,n._tzm=Ww(cg,e)});var dCe=/([\+\-]|\d\d)/gi;function Ww(e,t){var n=(t||"").match(e),r,a,o;return n===null?null:(r=n[n.length-1]||[],a=(r+"").match(dCe)||["-",0,0],o=+(a[1]*60)+Zt(a[2]),o===0?0:a[0]==="+"?o:-o)}function Uw(e,t){var n,r;return t._isUTC?(n=t.clone(),r=(uo(e)||Af(e)?e.valueOf():Nn(e).valueOf())-n.valueOf(),n._d.setTime(n._d.valueOf()+r),Ue.updateOffset(n,!1),n):Nn(e).local()}function i1(e){return-Math.round(e._d.getTimezoneOffset())}Ue.updateOffset=function(){};function fCe(e,t,n){var r=this._offset||0,a;if(!this.isValid())return e!=null?this:NaN;if(e!=null){if(typeof e=="string"){if(e=Ww(cg,e),e===null)return this}else Math.abs(e)<16&&!n&&(e=e*60);return!this._isUTC&&t&&(a=i1(this)),this._offset=e,this._isUTC=!0,a!=null&&this.add(a,"m"),r!==e&&(!t||this._changeInProgress?NN(this,mo(e-r,"m"),1,!1):this._changeInProgress||(this._changeInProgress=!0,Ue.updateOffset(this,!0),this._changeInProgress=null)),this}else return this._isUTC?r:i1(this)}function hCe(e,t){return e!=null?(typeof e!="string"&&(e=-e),this.utcOffset(e,t),this):-this.utcOffset()}function pCe(e){return this.utcOffset(0,e)}function vCe(e){return this._isUTC&&(this.utcOffset(0,e),this._isUTC=!1,e&&this.subtract(i1(this),"m")),this}function mCe(){if(this._tzm!=null)this.utcOffset(this._tzm,!1,!0);else if(typeof this._i=="string"){var e=Ww(L0e,this._i);e!=null?this.utcOffset(e):this.utcOffset(0,!0)}return this}function gCe(e){return this.isValid()?(e=e?Nn(e).utcOffset():0,(this.utcOffset()-e)%60===0):!1}function yCe(){return this.utcOffset()>this.clone().month(0).utcOffset()||this.utcOffset()>this.clone().month(5).utcOffset()}function bCe(){if(!Lr(this._isDSTShifted))return this._isDSTShifted;var e={},t;return Iw(e,this),e=TN(e),e._a?(t=e._isUTC?Ho(e._a):Nn(e._a),this._isDSTShifted=this.isValid()&&cCe(e._a,t.toArray())>0):this._isDSTShifted=!1,this._isDSTShifted}function CCe(){return this.isValid()?!this._isUTC:!1}function wCe(){return this.isValid()?this._isUTC:!1}function MN(){return this.isValid()?this._isUTC&&this._offset===0:!1}var SCe=/^(-|\+)?(?:(\d*)[. ])?(\d+):(\d+)(?::(\d+)(\.\d*)?)?$/,kCe=/^(-|\+)?P(?:([-+]?[0-9,.]*)Y)?(?:([-+]?[0-9,.]*)M)?(?:([-+]?[0-9,.]*)W)?(?:([-+]?[0-9,.]*)D)?(?:T(?:([-+]?[0-9,.]*)H)?(?:([-+]?[0-9,.]*)M)?(?:([-+]?[0-9,.]*)S)?)?$/;function mo(e,t){var n=e,r=null,a,o,i;return Rp(e)?n={ms:e._milliseconds,d:e._days,M:e._months}:Ci(e)||!isNaN(+e)?(n={},t?n[t]=+e:n.milliseconds=+e):(r=SCe.exec(e))?(a=r[1]==="-"?-1:1,n={y:0,d:Zt(r[Oo])*a,h:Zt(r[ar])*a,m:Zt(r[Ja])*a,s:Zt(r[li])*a,ms:Zt(o1(r[Xl]*1e3))*a}):(r=kCe.exec(e))?(a=r[1]==="-"?-1:1,n={y:Ol(r[2],a),M:Ol(r[3],a),w:Ol(r[4],a),d:Ol(r[5],a),h:Ol(r[6],a),m:Ol(r[7],a),s:Ol(r[8],a)}):n==null?n={}:typeof n=="object"&&("from"in n||"to"in n)&&(i=$Ce(Nn(n.from),Nn(n.to)),n={},n.ms=i.milliseconds,n.M=i.months),o=new hg(n),Rp(e)&&cn(e,"_locale")&&(o._locale=e._locale),Rp(e)&&cn(e,"_isValid")&&(o._isValid=e._isValid),o}mo.fn=hg.prototype;mo.invalid=uCe;function Ol(e,t){var n=e&&parseFloat(e.replace(",","."));return(isNaN(n)?0:n)*t}function ZO(e,t){var n={};return n.months=t.month()-e.month()+(t.year()-e.year())*12,e.clone().add(n.months,"M").isAfter(t)&&--n.months,n.milliseconds=+t-+e.clone().add(n.months,"M"),n}function $Ce(e,t){var n;return e.isValid()&&t.isValid()?(t=Uw(t,e),e.isBefore(t)?n=ZO(e,t):(n=ZO(t,e),n.milliseconds=-n.milliseconds,n.months=-n.months),n):{milliseconds:0,months:0}}function IN(e,t){return function(n,r){var a,o;return r!==null&&!isNaN(+r)&&(iN(t,"moment()."+t+"(period, number) is deprecated. Please use moment()."+t+"(number, period). See http://momentjs.com/guides/#/warnings/add-inverted-param/ for more info."),o=n,n=r,r=o),a=mo(n,r),NN(this,a,e),this}}function NN(e,t,n,r){var a=t._milliseconds,o=o1(t._days),i=o1(t._months);!e.isValid()||(r=r==null?!0:r,i&&mN(e,Mv(e,"Month")+i*n),o&&uN(e,"Date",Mv(e,"Date")+o*n),a&&e._d.setTime(e._d.valueOf()+a*n),r&&Ue.updateOffset(e,o||i))}var OCe=IN(1,"add"),PCe=IN(-1,"subtract");function AN(e){return typeof e=="string"||e instanceof String}function TCe(e){return uo(e)||Af(e)||AN(e)||Ci(e)||_Ce(e)||xCe(e)||e===null||e===void 0}function xCe(e){var t=ls(e)&&!Ew(e),n=!1,r=["years","year","y","months","month","M","days","day","d","dates","date","D","hours","hour","h","minutes","minute","m","seconds","second","s","milliseconds","millisecond","ms"],a,o,i=r.length;for(a=0;a<i;a+=1)o=r[a],n=n||cn(e,o);return t&&n}function _Ce(e){var t=so(e),n=!1;return t&&(n=e.filter(function(r){return!Ci(r)&&AN(e)}).length===0),t&&n}function ECe(e){var t=ls(e)&&!Ew(e),n=!1,r=["sameDay","nextDay","lastDay","nextWeek","lastWeek","sameElse"],a,o;for(a=0;a<r.length;a+=1)o=r[a],n=n||cn(e,o);return t&&n}function MCe(e,t){var n=e.diff(t,"days",!0);return n<-6?"sameElse":n<-1?"lastWeek":n<0?"lastDay":n<1?"sameDay":n<2?"nextDay":n<7?"nextWeek":"sameElse"}function ICe(e,t){arguments.length===1&&(arguments[0]?TCe(arguments[0])?(e=arguments[0],t=void 0):ECe(arguments[0])&&(t=arguments[0],e=void 0):(e=void 0,t=void 0));var n=e||Nn(),r=Uw(n,this).startOf("day"),a=Ue.calendarFormat(this,r)||"sameElse",o=t&&(jo(t[a])?t[a].call(this,n):t[a]);return this.format(o||this.localeData().calendar(a,this,Nn(n)))}function NCe(){return new Df(this)}function ACe(e,t){var n=uo(e)?e:Nn(e);return this.isValid()&&n.isValid()?(t=Ba(t)||"millisecond",t==="millisecond"?this.valueOf()>n.valueOf():n.valueOf()<this.clone().startOf(t).valueOf()):!1}function DCe(e,t){var n=uo(e)?e:Nn(e);return this.isValid()&&n.isValid()?(t=Ba(t)||"millisecond",t==="millisecond"?this.valueOf()<n.valueOf():this.clone().endOf(t).valueOf()<n.valueOf()):!1}function RCe(e,t,n,r){var a=uo(e)?e:Nn(e),o=uo(t)?t:Nn(t);return this.isValid()&&a.isValid()&&o.isValid()?(r=r||"()",(r[0]==="("?this.isAfter(a,n):!this.isBefore(a,n))&&(r[1]===")"?this.isBefore(o,n):!this.isAfter(o,n))):!1}function LCe(e,t){var n=uo(e)?e:Nn(e),r;return this.isValid()&&n.isValid()?(t=Ba(t)||"millisecond",t==="millisecond"?this.valueOf()===n.valueOf():(r=n.valueOf(),this.clone().startOf(t).valueOf()<=r&&r<=this.clone().endOf(t).valueOf())):!1}function FCe(e,t){return this.isSame(e,t)||this.isAfter(e,t)}function BCe(e,t){return this.isSame(e,t)||this.isBefore(e,t)}function VCe(e,t,n){var r,a,o;if(!this.isValid())return NaN;if(r=Uw(e,this),!r.isValid())return NaN;switch(a=(r.utcOffset()-this.utcOffset())*6e4,t=Ba(t),t){case"year":o=Lp(this,r)/12;break;case"month":o=Lp(this,r);break;case"quarter":o=Lp(this,r)/3;break;case"second":o=(this-r)/1e3;break;case"minute":o=(this-r)/6e4;break;case"hour":o=(this-r)/36e5;break;case"day":o=(this-r-a)/864e5;break;case"week":o=(this-r-a)/6048e5;break;default:o=this-r}return n?o:Oa(o)}function Lp(e,t){if(e.date()<t.date())return-Lp(t,e);var n=(t.year()-e.year())*12+(t.month()-e.month()),r=e.clone().add(n,"months"),a,o;return t-r<0?(a=e.clone().add(n-1,"months"),o=(t-r)/(r-a)):(a=e.clone().add(n+1,"months"),o=(t-r)/(a-r)),-(n+o)||0}Ue.defaultFormat="YYYY-MM-DDTHH:mm:ssZ";Ue.defaultFormatUtc="YYYY-MM-DDTHH:mm:ss[Z]";function zCe(){return this.clone().locale("en").format("ddd MMM DD YYYY HH:mm:ss [GMT]ZZ")}function HCe(e){if(!this.isValid())return null;var t=e!==!0,n=t?this.clone().utc():this;return n.year()<0||n.year()>9999?Dp(n,t?"YYYYYY-MM-DD[T]HH:mm:ss.SSS[Z]":"YYYYYY-MM-DD[T]HH:mm:ss.SSSZ"):jo(Date.prototype.toISOString)?t?this.toDate().toISOString():new Date(this.valueOf()+this.utcOffset()*60*1e3).toISOString().replace("Z",Dp(n,"Z")):Dp(n,t?"YYYY-MM-DD[T]HH:mm:ss.SSS[Z]":"YYYY-MM-DD[T]HH:mm:ss.SSSZ")}function jCe(){if(!this.isValid())return"moment.invalid(/* "+this._i+" */)";var e="moment",t="",n,r,a,o;return this.isLocal()||(e=this.utcOffset()===0?"moment.utc":"moment.parseZone",t="Z"),n="["+e+'("]',r=0<=this.year()&&this.year()<=9999?"YYYY":"YYYYYY",a="-MM-DD[T]HH:mm:ss.SSS",o=t+'[")]',this.format(n+r+a+o)}function KCe(e){e||(e=this.isUtc()?Ue.defaultFormatUtc:Ue.defaultFormat);var t=Dp(this,e);return this.localeData().postformat(t)}function WCe(e,t){return this.isValid()&&(uo(e)&&e.isValid()||Nn(e).isValid())?mo({to:this,from:e}).locale(this.locale()).humanize(!t):this.localeData().invalidDate()}function UCe(e){return this.from(Nn(),e)}function YCe(e,t){return this.isValid()&&(uo(e)&&e.isValid()||Nn(e).isValid())?mo({from:this,to:e}).locale(this.locale()).humanize(!t):this.localeData().invalidDate()}function qCe(e){return this.to(Nn(),e)}function DN(e){var t;return e===void 0?this._locale._abbr:(t=_i(e),t!=null&&(this._locale=t),this)}var RN=Fa("moment().lang() is deprecated. Instead, use moment().localeData() to get the language configuration. Use moment().locale() to change languages.",function(e){return e===void 0?this.localeData():this.locale(e)});function LN(){return this._locale}var Av=1e3,Mu=60*Av,Dv=60*Mu,FN=(365*400+97)*24*Dv;function Iu(e,t){return(e%t+t)%t}function BN(e,t,n){return e<100&&e>=0?new Date(e+400,t,n)-FN:new Date(e,t,n).valueOf()}function VN(e,t,n){return e<100&&e>=0?Date.UTC(e+400,t,n)-FN:Date.UTC(e,t,n)}function GCe(e){var t,n;if(e=Ba(e),e===void 0||e==="millisecond"||!this.isValid())return this;switch(n=this._isUTC?VN:BN,e){case"year":t=n(this.year(),0,1);break;case"quarter":t=n(this.year(),this.month()-this.month()%3,1);break;case"month":t=n(this.year(),this.month(),1);break;case"week":t=n(this.year(),this.month(),this.date()-this.weekday());break;case"isoWeek":t=n(this.year(),this.month(),this.date()-(this.isoWeekday()-1));break;case"day":case"date":t=n(this.year(),this.month(),this.date());break;case"hour":t=this._d.valueOf(),t-=Iu(t+(this._isUTC?0:this.utcOffset()*Mu),Dv);break;case"minute":t=this._d.valueOf(),t-=Iu(t,Mu);break;case"second":t=this._d.valueOf(),t-=Iu(t,Av);break}return this._d.setTime(t),Ue.updateOffset(this,!0),this}function XCe(e){var t,n;if(e=Ba(e),e===void 0||e==="millisecond"||!this.isValid())return this;switch(n=this._isUTC?VN:BN,e){case"year":t=n(this.year()+1,0,1)-1;break;case"quarter":t=n(this.year(),this.month()-this.month()%3+3,1)-1;break;case"month":t=n(this.year(),this.month()+1,1)-1;break;case"week":t=n(this.year(),this.month(),this.date()-this.weekday()+7)-1;break;case"isoWeek":t=n(this.year(),this.month(),this.date()-(this.isoWeekday()-1)+7)-1;break;case"day":case"date":t=n(this.year(),this.month(),this.date()+1)-1;break;case"hour":t=this._d.valueOf(),t+=Dv-Iu(t+(this._isUTC?0:this.utcOffset()*Mu),Dv)-1;break;case"minute":t=this._d.valueOf(),t+=Mu-Iu(t,Mu)-1;break;case"second":t=this._d.valueOf(),t+=Av-Iu(t,Av)-1;break}return this._d.setTime(t),Ue.updateOffset(this,!0),this}function ZCe(){return this._d.valueOf()-(this._offset||0)*6e4}function JCe(){return Math.floor(this.valueOf()/1e3)}function QCe(){return new Date(this.valueOf())}function ewe(){var e=this;return[e.year(),e.month(),e.date(),e.hour(),e.minute(),e.second(),e.millisecond()]}function twe(){var e=this;return{years:e.year(),months:e.month(),date:e.date(),hours:e.hours(),minutes:e.minutes(),seconds:e.seconds(),milliseconds:e.milliseconds()}}function nwe(){return this.isValid()?this.toISOString():null}function rwe(){return Mw(this)}function awe(){return Gi({},qt(this))}function owe(){return qt(this).overflow}function iwe(){return{input:this._i,format:this._f,locale:this._locale,isUTC:this._isUTC,strict:this._strict}}Tt("N",0,0,"eraAbbr");Tt("NN",0,0,"eraAbbr");Tt("NNN",0,0,"eraAbbr");Tt("NNNN",0,0,"eraName");Tt("NNNNN",0,0,"eraNarrow");Tt("y",["y",1],"yo","eraYear");Tt("y",["yy",2],0,"eraYear");Tt("y",["yyy",3],0,"eraYear");Tt("y",["yyyy",4],0,"eraYear");gt("N",Yw);gt("NN",Yw);gt("NNN",Yw);gt("NNNN",gwe);gt("NNNNN",ywe);Cn(["N","NN","NNN","NNNN","NNNNN"],function(e,t,n,r){var a=n._locale.erasParse(e,r,n._strict);a?qt(n).era=a:qt(n).invalidEra=e});gt("y",Ec);gt("yy",Ec);gt("yyy",Ec);gt("yyyy",Ec);gt("yo",bwe);Cn(["y","yy","yyy","yyyy"],kr);Cn(["yo"],function(e,t,n,r){var a;n._locale._eraYearOrdinalRegex&&(a=e.match(n._locale._eraYearOrdinalRegex)),n._locale.eraYearOrdinalParse?t[kr]=n._locale.eraYearOrdinalParse(e,a):t[kr]=parseInt(e,10)});function lwe(e,t){var n,r,a,o=this._eras||_i("en")._eras;for(n=0,r=o.length;n<r;++n){switch(typeof o[n].since){case"string":a=Ue(o[n].since).startOf("day"),o[n].since=a.valueOf();break}switch(typeof o[n].until){case"undefined":o[n].until=1/0;break;case"string":a=Ue(o[n].until).startOf("day").valueOf(),o[n].until=a.valueOf();break}}return o}function swe(e,t,n){var r,a,o=this.eras(),i,l,s;for(e=e.toUpperCase(),r=0,a=o.length;r<a;++r)if(i=o[r].name.toUpperCase(),l=o[r].abbr.toUpperCase(),s=o[r].narrow.toUpperCase(),n)switch(t){case"N":case"NN":case"NNN":if(l===e)return o[r];break;case"NNNN":if(i===e)return o[r];break;case"NNNNN":if(s===e)return o[r];break}else if([i,l,s].indexOf(e)>=0)return o[r]}function uwe(e,t){var n=e.since<=e.until?1:-1;return t===void 0?Ue(e.since).year():Ue(e.since).year()+(t-e.offset)*n}function cwe(){var e,t,n,r=this.localeData().eras();for(e=0,t=r.length;e<t;++e)if(n=this.clone().startOf("day").valueOf(),r[e].since<=n&&n<=r[e].until||r[e].until<=n&&n<=r[e].since)return r[e].name;return""}function dwe(){var e,t,n,r=this.localeData().eras();for(e=0,t=r.length;e<t;++e)if(n=this.clone().startOf("day").valueOf(),r[e].since<=n&&n<=r[e].until||r[e].until<=n&&n<=r[e].since)return r[e].narrow;return""}function fwe(){var e,t,n,r=this.localeData().eras();for(e=0,t=r.length;e<t;++e)if(n=this.clone().startOf("day").valueOf(),r[e].since<=n&&n<=r[e].until||r[e].until<=n&&n<=r[e].since)return r[e].abbr;return""}function hwe(){var e,t,n,r,a=this.localeData().eras();for(e=0,t=a.length;e<t;++e)if(n=a[e].since<=a[e].until?1:-1,r=this.clone().startOf("day").valueOf(),a[e].since<=r&&r<=a[e].until||a[e].until<=r&&r<=a[e].since)return(this.year()-Ue(a[e].since).year())*n+a[e].offset;return this.year()}function pwe(e){return cn(this,"_erasNameRegex")||qw.call(this),e?this._erasNameRegex:this._erasRegex}function vwe(e){return cn(this,"_erasAbbrRegex")||qw.call(this),e?this._erasAbbrRegex:this._erasRegex}function mwe(e){return cn(this,"_erasNarrowRegex")||qw.call(this),e?this._erasNarrowRegex:this._erasRegex}function Yw(e,t){return t.erasAbbrRegex(e)}function gwe(e,t){return t.erasNameRegex(e)}function ywe(e,t){return t.erasNarrowRegex(e)}function bwe(e,t){return t._eraYearOrdinalRegex||Ec}function qw(){var e=[],t=[],n=[],r=[],a,o,i=this.eras();for(a=0,o=i.length;a<o;++a)t.push(aa(i[a].name)),e.push(aa(i[a].abbr)),n.push(aa(i[a].narrow)),r.push(aa(i[a].name)),r.push(aa(i[a].abbr)),r.push(aa(i[a].narrow));this._erasRegex=new RegExp("^("+r.join("|")+")","i"),this._erasNameRegex=new RegExp("^("+t.join("|")+")","i"),this._erasAbbrRegex=new RegExp("^("+e.join("|")+")","i"),this._erasNarrowRegex=new RegExp("^("+n.join("|")+")","i")}Tt(0,["gg",2],0,function(){return this.weekYear()%100});Tt(0,["GG",2],0,function(){return this.isoWeekYear()%100});function pg(e,t){Tt(0,[e,e.length],0,t)}pg("gggg","weekYear");pg("ggggg","weekYear");pg("GGGG","isoWeekYear");pg("GGGGG","isoWeekYear");Or("weekYear","gg");Or("isoWeekYear","GG");Pr("weekYear",1);Pr("isoWeekYear",1);gt("G",ug);gt("g",ug);gt("GG",Dn,ya);gt("gg",Dn,ya);gt("GGGG",Lw,Rw);gt("gggg",Lw,Rw);gt("GGGGG",sg,ig);gt("ggggg",sg,ig);Lf(["gggg","ggggg","GGGG","GGGGG"],function(e,t,n,r){t[r.substr(0,2)]=Zt(e)});Lf(["gg","GG"],function(e,t,n,r){t[r]=Ue.parseTwoDigitYear(e)});function Cwe(e){return zN.call(this,e,this.week(),this.weekday(),this.localeData()._week.dow,this.localeData()._week.doy)}function wwe(e){return zN.call(this,e,this.isoWeek(),this.isoWeekday(),1,4)}function Swe(){return fi(this.year(),1,4)}function kwe(){return fi(this.isoWeekYear(),1,4)}function $we(){var e=this.localeData()._week;return fi(this.year(),e.dow,e.doy)}function Owe(){var e=this.localeData()._week;return fi(this.weekYear(),e.dow,e.doy)}function zN(e,t,n,r,a){var o;return e==null?of(this,r,a).year:(o=fi(e,r,a),t>o&&(t=o),Pwe.call(this,e,t,n,r,a))}function Pwe(e,t,n,r,a){var o=CN(e,t,n,r,a),i=af(o.year,0,o.dayOfYear);return this.year(i.getUTCFullYear()),this.month(i.getUTCMonth()),this.date(i.getUTCDate()),this}Tt("Q",0,"Qo","quarter");Or("quarter","Q");Pr("quarter",7);gt("Q",cN);Cn("Q",function(e,t){t[ii]=(Zt(e)-1)*3});function Twe(e){return e==null?Math.ceil((this.month()+1)/3):this.month((e-1)*3+this.month()%3)}Tt("D",["DD",2],"Do","date");Or("date","D");Pr("date",9);gt("D",Dn);gt("DD",Dn,ya);gt("Do",function(e,t){return e?t._dayOfMonthOrdinalParse||t._ordinalParse:t._dayOfMonthOrdinalParseLenient});Cn(["D","DD"],Oo);Cn("Do",function(e,t){t[Oo]=Zt(e.match(Dn)[0])});var HN=_c("Date",!0);Tt("DDD",["DDDD",3],"DDDo","dayOfYear");Or("dayOfYear","DDD");Pr("dayOfYear",4);gt("DDD",lg);gt("DDDD",dN);Cn(["DDD","DDDD"],function(e,t,n){n._dayOfYear=Zt(e)});function xwe(e){var t=Math.round((this.clone().startOf("day")-this.clone().startOf("year"))/864e5)+1;return e==null?t:this.add(e-t,"d")}Tt("m",["mm",2],0,"minute");Or("minute","m");Pr("minute",14);gt("m",Dn);gt("mm",Dn,ya);Cn(["m","mm"],Ja);var _we=_c("Minutes",!1);Tt("s",["ss",2],0,"second");Or("second","s");Pr("second",15);gt("s",Dn);gt("ss",Dn,ya);Cn(["s","ss"],li);var Ewe=_c("Seconds",!1);Tt("S",0,0,function(){return~~(this.millisecond()/100)});Tt(0,["SS",2],0,function(){return~~(this.millisecond()/10)});Tt(0,["SSS",3],0,"millisecond");Tt(0,["SSSS",4],0,function(){return this.millisecond()*10});Tt(0,["SSSSS",5],0,function(){return this.millisecond()*100});Tt(0,["SSSSSS",6],0,function(){return this.millisecond()*1e3});Tt(0,["SSSSSSS",7],0,function(){return this.millisecond()*1e4});Tt(0,["SSSSSSSS",8],0,function(){return this.millisecond()*1e5});Tt(0,["SSSSSSSSS",9],0,function(){return this.millisecond()*1e6});Or("millisecond","ms");Pr("millisecond",16);gt("S",lg,cN);gt("SS",lg,ya);gt("SSS",lg,dN);var Xi,jN;for(Xi="SSSS";Xi.length<=9;Xi+="S")gt(Xi,Ec);function Mwe(e,t){t[Xl]=Zt(("0."+e)*1e3)}for(Xi="S";Xi.length<=9;Xi+="S")Cn(Xi,Mwe);jN=_c("Milliseconds",!1);Tt("z",0,0,"zoneAbbr");Tt("zz",0,0,"zoneName");function Iwe(){return this._isUTC?"UTC":""}function Nwe(){return this._isUTC?"Coordinated Universal Time":""}var tt=Df.prototype;tt.add=OCe;tt.calendar=ICe;tt.clone=NCe;tt.diff=VCe;tt.endOf=XCe;tt.format=KCe;tt.from=WCe;tt.fromNow=UCe;tt.to=YCe;tt.toNow=qCe;tt.get=D0e;tt.invalidAt=owe;tt.isAfter=ACe;tt.isBefore=DCe;tt.isBetween=RCe;tt.isSame=LCe;tt.isSameOrAfter=FCe;tt.isSameOrBefore=BCe;tt.isValid=rwe;tt.lang=RN;tt.locale=DN;tt.localeData=LN;tt.max=rCe;tt.min=nCe;tt.parsingFlags=awe;tt.set=R0e;tt.startOf=GCe;tt.subtract=PCe;tt.toArray=ewe;tt.toObject=twe;tt.toDate=QCe;tt.toISOString=HCe;tt.inspect=jCe;typeof Symbol!="undefined"&&Symbol.for!=null&&(tt[Symbol.for("nodejs.util.inspect.custom")]=function(){return"Moment<"+this.format()+">"});tt.toJSON=nwe;tt.toString=zCe;tt.unix=JCe;tt.valueOf=ZCe;tt.creationData=iwe;tt.eraName=cwe;tt.eraNarrow=dwe;tt.eraAbbr=fwe;tt.eraYear=hwe;tt.year=bN;tt.isLeapYear=t1e;tt.weekYear=Cwe;tt.isoWeekYear=wwe;tt.quarter=tt.quarters=Twe;tt.month=gN;tt.daysInMonth=J0e;tt.week=tt.weeks=l1e;tt.isoWeek=tt.isoWeeks=s1e;tt.weeksInYear=$we;tt.weeksInWeekYear=Owe;tt.isoWeeksInYear=Swe;tt.isoWeeksInISOWeekYear=kwe;tt.date=HN;tt.day=tt.days=w1e;tt.weekday=S1e;tt.isoWeekday=k1e;tt.dayOfYear=xwe;tt.hour=tt.hours=E1e;tt.minute=tt.minutes=_we;tt.second=tt.seconds=Ewe;tt.millisecond=tt.milliseconds=jN;tt.utcOffset=fCe;tt.utc=pCe;tt.local=vCe;tt.parseZone=mCe;tt.hasAlignedHourOffset=gCe;tt.isDST=yCe;tt.isLocal=CCe;tt.isUtcOffset=wCe;tt.isUtc=MN;tt.isUTC=MN;tt.zoneAbbr=Iwe;tt.zoneName=Nwe;tt.dates=Fa("dates accessor is deprecated. Use date instead.",HN);tt.months=Fa("months accessor is deprecated. Use month instead",gN);tt.years=Fa("years accessor is deprecated. Use year instead",bN);tt.zone=Fa("moment().zone is deprecated, use moment().utcOffset instead. http://momentjs.com/guides/#/warnings/zone/",hCe);tt.isDSTShifted=Fa("isDSTShifted is deprecated. See http://momentjs.com/guides/#/warnings/dst-shifted/ for more information",bCe);function Awe(e){return Nn(e*1e3)}function Dwe(){return Nn.apply(null,arguments).parseZone()}function KN(e){return e}var dn=Nw.prototype;dn.calendar=w0e;dn.longDateFormat=O0e;dn.invalidDate=T0e;dn.ordinal=E0e;dn.preparse=KN;dn.postformat=KN;dn.relativeTime=I0e;dn.pastFuture=N0e;dn.set=b0e;dn.eras=lwe;dn.erasParse=swe;dn.erasConvertYear=uwe;dn.erasAbbrRegex=vwe;dn.erasNameRegex=pwe;dn.erasNarrowRegex=mwe;dn.months=q0e;dn.monthsShort=G0e;dn.monthsParse=Z0e;dn.monthsRegex=e1e;dn.monthsShortRegex=Q0e;dn.week=r1e;dn.firstDayOfYear=i1e;dn.firstDayOfWeek=o1e;dn.weekdays=m1e;dn.weekdaysMin=y1e;dn.weekdaysShort=g1e;dn.weekdaysParse=C1e;dn.weekdaysRegex=$1e;dn.weekdaysShortRegex=O1e;dn.weekdaysMinRegex=P1e;dn.isPM=x1e;dn.meridiem=M1e;function Rv(e,t,n,r){var a=_i(),o=Ho().set(r,t);return a[n](o,e)}function WN(e,t,n){if(Ci(e)&&(t=e,e=void 0),e=e||"",t!=null)return Rv(e,t,n,"month");var r,a=[];for(r=0;r<12;r++)a[r]=Rv(e,r,n,"month");return a}function Gw(e,t,n,r){typeof e=="boolean"?(Ci(t)&&(n=t,t=void 0),t=t||""):(t=e,n=t,e=!1,Ci(t)&&(n=t,t=void 0),t=t||"");var a=_i(),o=e?a._week.dow:0,i,l=[];if(n!=null)return Rv(t,(n+o)%7,r,"day");for(i=0;i<7;i++)l[i]=Rv(t,(i+o)%7,r,"day");return l}function Rwe(e,t){return WN(e,t,"months")}function Lwe(e,t){return WN(e,t,"monthsShort")}function Fwe(e,t,n){return Gw(e,t,n,"weekdays")}function Bwe(e,t,n){return Gw(e,t,n,"weekdaysShort")}function Vwe(e,t,n){return Gw(e,t,n,"weekdaysMin")}nl("en",{eras:[{since:"0001-01-01",until:1/0,offset:1,name:"Anno Domini",narrow:"AD",abbr:"AD"},{since:"0000-12-31",until:-1/0,offset:1,name:"Before Christ",narrow:"BC",abbr:"BC"}],dayOfMonthOrdinalParse:/\d{1,2}(th|st|nd|rd)/,ordinal:function(e){var t=e%10,n=Zt(e%100/10)===1?"th":t===1?"st":t===2?"nd":t===3?"rd":"th";return e+n}});Ue.lang=Fa("moment.lang is deprecated. Use moment.locale instead.",nl);Ue.langData=Fa("moment.langData is deprecated. Use moment.localeData instead.",_i);var qo=Math.abs;function zwe(){var e=this._data;return this._milliseconds=qo(this._milliseconds),this._days=qo(this._days),this._months=qo(this._months),e.milliseconds=qo(e.milliseconds),e.seconds=qo(e.seconds),e.minutes=qo(e.minutes),e.hours=qo(e.hours),e.months=qo(e.months),e.years=qo(e.years),this}function UN(e,t,n,r){var a=mo(t,n);return e._milliseconds+=r*a._milliseconds,e._days+=r*a._days,e._months+=r*a._months,e._bubble()}function Hwe(e,t){return UN(this,e,t,1)}function jwe(e,t){return UN(this,e,t,-1)}function JO(e){return e<0?Math.floor(e):Math.ceil(e)}function Kwe(){var e=this._milliseconds,t=this._days,n=this._months,r=this._data,a,o,i,l,s;return e>=0&&t>=0&&n>=0||e<=0&&t<=0&&n<=0||(e+=JO(l1(n)+t)*864e5,t=0,n=0),r.milliseconds=e%1e3,a=Oa(e/1e3),r.seconds=a%60,o=Oa(a/60),r.minutes=o%60,i=Oa(o/60),r.hours=i%24,t+=Oa(i/24),s=Oa(YN(t)),n+=s,t-=JO(l1(s)),l=Oa(n/12),n%=12,r.days=t,r.months=n,r.years=l,this}function YN(e){return e*4800/146097}function l1(e){return e*146097/4800}function Wwe(e){if(!this.isValid())return NaN;var t,n,r=this._milliseconds;if(e=Ba(e),e==="month"||e==="quarter"||e==="year")switch(t=this._days+r/864e5,n=this._months+YN(t),e){case"month":return n;case"quarter":return n/3;case"year":return n/12}else switch(t=this._days+Math.round(l1(this._months)),e){case"week":return t/7+r/6048e5;case"day":return t+r/864e5;case"hour":return t*24+r/36e5;case"minute":return t*1440+r/6e4;case"second":return t*86400+r/1e3;case"millisecond":return Math.floor(t*864e5)+r;default:throw new Error("Unknown unit "+e)}}function Uwe(){return this.isValid()?this._milliseconds+this._days*864e5+this._months%12*2592e6+Zt(this._months/12)*31536e6:NaN}function Ei(e){return function(){return this.as(e)}}var Ywe=Ei("ms"),qwe=Ei("s"),Gwe=Ei("m"),Xwe=Ei("h"),Zwe=Ei("d"),Jwe=Ei("w"),Qwe=Ei("M"),e2e=Ei("Q"),t2e=Ei("y");function n2e(){return mo(this)}function r2e(e){return e=Ba(e),this.isValid()?this[e+"s"]():NaN}function Ds(e){return function(){return this.isValid()?this._data[e]:NaN}}var a2e=Ds("milliseconds"),o2e=Ds("seconds"),i2e=Ds("minutes"),l2e=Ds("hours"),s2e=Ds("days"),u2e=Ds("months"),c2e=Ds("years");function d2e(){return Oa(this.days()/7)}var ti=Math.round,gu={ss:44,s:45,m:45,h:22,d:26,w:null,M:11};function f2e(e,t,n,r,a){return a.relativeTime(t||1,!!n,e,r)}function h2e(e,t,n,r){var a=mo(e).abs(),o=ti(a.as("s")),i=ti(a.as("m")),l=ti(a.as("h")),s=ti(a.as("d")),c=ti(a.as("M")),d=ti(a.as("w")),f=ti(a.as("y")),p=o<=n.ss&&["s",o]||o<n.s&&["ss",o]||i<=1&&["m"]||i<n.m&&["mm",i]||l<=1&&["h"]||l<n.h&&["hh",l]||s<=1&&["d"]||s<n.d&&["dd",s];return n.w!=null&&(p=p||d<=1&&["w"]||d<n.w&&["ww",d]),p=p||c<=1&&["M"]||c<n.M&&["MM",c]||f<=1&&["y"]||["yy",f],p[2]=t,p[3]=+e>0,p[4]=r,f2e.apply(null,p)}function p2e(e){return e===void 0?ti:typeof e=="function"?(ti=e,!0):!1}function v2e(e,t){return gu[e]===void 0?!1:t===void 0?gu[e]:(gu[e]=t,e==="s"&&(gu.ss=t-1),!0)}function m2e(e,t){if(!this.isValid())return this.localeData().invalidDate();var n=!1,r=gu,a,o;return typeof e=="object"&&(t=e,e=!1),typeof e=="boolean"&&(n=e),typeof t=="object"&&(r=Object.assign({},gu,t),t.s!=null&&t.ss==null&&(r.ss=t.s-1)),a=this.localeData(),o=h2e(this,!n,r,a),n&&(o=a.pastFuture(+this,o)),a.postformat(o)}var Iy=Math.abs;function Hs(e){return(e>0)-(e<0)||+e}function vg(){if(!this.isValid())return this.localeData().invalidDate();var e=Iy(this._milliseconds)/1e3,t=Iy(this._days),n=Iy(this._months),r,a,o,i,l=this.asSeconds(),s,c,d,f;return l?(r=Oa(e/60),a=Oa(r/60),e%=60,r%=60,o=Oa(n/12),n%=12,i=e?e.toFixed(3).replace(/\.?0+$/,""):"",s=l<0?"-":"",c=Hs(this._months)!==Hs(l)?"-":"",d=Hs(this._days)!==Hs(l)?"-":"",f=Hs(this._milliseconds)!==Hs(l)?"-":"",s+"P"+(o?c+o+"Y":"")+(n?c+n+"M":"")+(t?d+t+"D":"")+(a||r||e?"T":"")+(a?f+a+"H":"")+(r?f+r+"M":"")+(e?f+i+"S":"")):"P0D"}var un=hg.prototype;un.isValid=sCe;un.abs=zwe;un.add=Hwe;un.subtract=jwe;un.as=Wwe;un.asMilliseconds=Ywe;un.asSeconds=qwe;un.asMinutes=Gwe;un.asHours=Xwe;un.asDays=Zwe;un.asWeeks=Jwe;un.asMonths=Qwe;un.asQuarters=e2e;un.asYears=t2e;un.valueOf=Uwe;un._bubble=Kwe;un.clone=n2e;un.get=r2e;un.milliseconds=a2e;un.seconds=o2e;un.minutes=i2e;un.hours=l2e;un.days=s2e;un.weeks=d2e;un.months=u2e;un.years=c2e;un.humanize=m2e;un.toISOString=vg;un.toString=vg;un.toJSON=vg;un.locale=DN;un.localeData=LN;un.toIsoString=Fa("toIsoString() is deprecated. Please use toISOString() instead (notice the capitals)",vg);un.lang=RN;Tt("X",0,0,"unix");Tt("x",0,0,"valueOf");gt("x",ug);gt("X",F0e);Cn("X",function(e,t,n){n._d=new Date(parseFloat(e)*1e3)});Cn("x",function(e,t,n){n._d=new Date(Zt(e))});//! moment.js
Ue.version="2.29.3";g0e(Nn);Ue.fn=tt;Ue.min=aCe;Ue.max=oCe;Ue.now=iCe;Ue.utc=Ho;Ue.unix=Awe;Ue.months=Rwe;Ue.isDate=Af;Ue.locale=nl;Ue.invalid=ag;Ue.duration=mo;Ue.isMoment=uo;Ue.weekdays=Fwe;Ue.parseZone=Dwe;Ue.localeData=_i;Ue.isDuration=Rp;Ue.monthsShort=Lwe;Ue.weekdaysMin=Vwe;Ue.defineLocale=zw;Ue.updateLocale=D1e;Ue.locales=R1e;Ue.weekdaysShort=Bwe;Ue.normalizeUnits=Ba;Ue.relativeTimeRounding=p2e;Ue.relativeTimeThreshold=v2e;Ue.calendarFormat=MCe;Ue.prototype=tt;Ue.HTML5_FMT={DATETIME_LOCAL:"YYYY-MM-DDTHH:mm",DATETIME_LOCAL_SECONDS:"YYYY-MM-DDTHH:mm:ss",DATETIME_LOCAL_MS:"YYYY-MM-DDTHH:mm:ss.SSS",DATE:"YYYY-MM-DD",TIME:"HH:mm",TIME_SECONDS:"HH:mm:ss",TIME_MS:"HH:mm:ss.SSS",WEEK:"GGGG-[W]WW",MONTH:"YYYY-MM"};function Hr(e){return e.default||e}var Fp=P({},lo.Modal);function QO(e){e?Fp=P(P({},Fp),e):Fp=P({},lo.Modal)}function qN(){return Fp}var eP={};function g2e(e,t){}function y2e(e,t){}function GN(e,t,n){!t&&!eP[n]&&(e(!1,n),eP[n]=!0)}function Ua(e,t){GN(g2e,e,t)}function tP(e,t){GN(y2e,e,t)}var on=function(e,t){var n=arguments.length>2&&arguments[2]!==void 0?arguments[2]:"";Ua(e,"[antdv: ".concat(t,"] ").concat(n))},s1="internalMark";function b2e(e){e&&e.locale?Hr(Ue).locale(e.locale):Hr(Ue).locale("en")}var Bp=G({name:"ALocaleProvider",props:{locale:{type:Object},ANT_MARK__:u.string},setup:function(t,n){var r=n.slots;on(t.ANT_MARK__===s1,"LocaleProvider","`LocaleProvider` is deprecated. Please use `locale` with `ConfigProvider` instead");var a=bt({antLocale:P(P({},t.locale),{exist:!0}),ANT_MARK__:s1});return ot("localeData",a),ce(function(){return t.locale},function(o){a.antLocale=P(P({},o),{exist:!0}),b2e(o),QO(o&&o.Modal)},{immediate:!0}),Wr(function(){QO()}),function(){var o;return(o=r.default)===null||o===void 0?void 0:o.call(r)}}});Bp.install=function(e){return e.component(Bp.name,Bp),e};var XN=kn(Bp),C2e={getTargetContainer:{type:Function},getPopupContainer:{type:Function},prefixCls:String,getPrefixCls:{type:Function},renderEmpty:{type:Function},transformCellText:{type:Function},csp:{type:Object},autoInsertSpaceInButton:u.looseBool,locale:{type:Object},pageHeader:{type:Object},componentSize:{type:String},direction:{type:String},space:{type:Object},virtual:u.looseBool,dropdownMatchSelectWidth:u.looseBool,form:{type:Object}},w2e=G({name:"AConfigProvider",props:C2e,setup:function(t,n){var r=n.slots,a=function(d,f){var p=t.prefixCls,v=p===void 0?"ant":p;return f||(d?"".concat(v,"-").concat(d):v)},o=function(d){var f=t.renderEmpty||r.renderEmpty||nN;return f(d)},i=function(d,f){var p=t.prefixCls;if(f)return f;var v=p||a("");return d?"".concat(v,"-").concat(d):v},l=bt(P(P({},t),{getPrefixCls:i,renderEmpty:o}));Object.keys(t).forEach(function(c){ce(function(){return t[c]},function(){l[c]=t[c]})}),ot("configProvider",l);var s=function(d){var f;return g(XN,{locale:t.locale||d,ANT_MARK__:s1},{default:function(){return[(f=r.default)===null||f===void 0?void 0:f.call(r)]}})};return function(){return g(Kr,{children:function(d,f,p){return s(p)}},null)}}}),St=bt({getPrefixCls:function(t,n){return n||(t?"ant-".concat(t):"ant")},renderEmpty:nN,direction:"ltr"}),S2e=kn(w2e),Wt=function(e,t){var n=ve("configProvider",St),r=x(function(){return n.getPrefixCls(e,t.prefixCls)}),a=x(function(){return n.direction}),o=x(function(){return n.autoInsertSpaceInButton}),i=x(function(){return n.renderEmpty}),l=x(function(){return n.space}),s=x(function(){return n.pageHeader}),c=x(function(){return n.form}),d=x(function(){return t.size||n.componentSize}),f=x(function(){return t.getTargetContainer});return{configProvider:n,prefixCls:r,direction:a,size:d,getTargetContainer:f,space:l,pageHeader:s,form:c,autoInsertSpaceInButton:o,renderEmpty:i}};function k2e(){return typeof window!="undefined"?window:null}var yu;(function(e){e[e.None=0]="None",e[e.Prepare=1]="Prepare"})(yu||(yu={}));var $2e={offsetTop:u.number,offset:u.number,offsetBottom:u.number,target:u.func.def(k2e),prefixCls:u.string,onChange:u.func,onTestUpdatePosition:u.func},O2e=G({name:"AAffix",props:$2e,emits:["change","testUpdatePosition"],setup:function(t,n){var r=n.slots,a=n.emit,o=n.expose,i=H(),l=H(),s=bt({affixStyle:void 0,placeholderStyle:void 0,status:yu.None,lastAffix:!1,prevTarget:null,timeout:null}),c=$t(),d=x(function(){return t.offsetBottom===void 0&&t.offsetTop===void 0?0:t.offsetTop}),f=x(function(){return t.offsetBottom}),p=function(){var w=s.status,k=s.lastAffix,$=t.target;if(!(w!==yu.Prepare||!l.value||!i.value||!$)){var O=$();if(!!O){var T={status:yu.None},_=hh(O),I=hh(i.value),L=KO(I,_,d.value),j=WO(I,_,f.value);L!==void 0?(T.affixStyle={position:"fixed",top:L,width:I.width+"px",height:I.height+"px"},T.placeholderStyle={width:I.width+"px",height:I.height+"px"}):j!==void 0&&(T.affixStyle={position:"fixed",bottom:j,width:I.width+"px",height:I.height+"px"},T.placeholderStyle={width:I.width+"px",height:I.height+"px"}),T.lastAffix=!!T.affixStyle,k!==T.lastAffix&&a("change",T.lastAffix),P(s,T)}}},v=function(){P(s,{status:yu.Prepare,affixStyle:void 0,placeholderStyle:void 0}),c.update()},m=e1(function(){v()}),y=e1(function(){var S=t.target,w=s.affixStyle;if(S&&w){var k=S();if(k&&i.value){var $=hh(k),O=hh(i.value),T=KO(O,$,d.value),_=WO(O,$,f.value);if(T!==void 0&&w.top===T||_!==void 0&&w.bottom===_)return}}v()});o({updatePosition:m,lazyUpdatePosition:y}),ce(function(){return t.target},function(S){var w=null;S&&(w=S()||null),s.prevTarget!==w&&(YO(c),w&&(UO(w,c),m()),s.prevTarget=w)}),ce(function(){return[t.offsetTop,t.offsetBottom]},m),et(function(){var S=t.target;S&&(s.timeout=setTimeout(function(){UO(S(),c),m()}))}),ur(function(){p()}),Wr(function(){clearTimeout(s.timeout),YO(c),m.cancel(),y.cancel()});var b=Wt("affix",t),C=b.prefixCls;return function(){var S,w=s.affixStyle,k=s.placeholderStyle,$=Se(V({},C.value,w)),O=bn(t,["prefixCls","offsetTop","offsetBottom","target"]);return g(zo,{onResize:m},{default:function(){return[g("div",le(le({},O),{},{style:k,ref:i}),[g("div",{class:$,ref:l,style:w},[(S=r.default)===null||S===void 0?void 0:S.call(r)])])]}})}}}),ZN=kn(O2e),P2e=0,Nu={};function en(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:1,n=P2e++,r=t;function a(){r-=1,r<=0?(e(),delete Nu[n]):Nu[n]=requestAnimationFrame(a)}return Nu[n]=requestAnimationFrame(a),n}en.cancel=function(t){t!==void 0&&(cancelAnimationFrame(Nu[t]),delete Nu[t])};en.ids=Nu;function u1(e){return e!=null&&e===e.window}function Xw(e,t){var n;if(typeof window=="undefined")return 0;var r=t?"scrollTop":"scrollLeft",a=0;return u1(e)?a=e[t?"pageYOffset":"pageXOffset"]:e instanceof Document?a=e.documentElement[r]:e&&(a=e[r]),e&&!u1(e)&&typeof a!="number"&&(a=(n=(e.ownerDocument||e).documentElement)===null||n===void 0?void 0:n[r]),a}function T2e(e,t,n,r){var a=n-t;return e/=r/2,e<1?a/2*e*e*e+t:a/2*((e-=2)*e*e+2)+t}function Zw(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},n=t.getContainer,r=n===void 0?function(){return window}:n,a=t.callback,o=t.duration,i=o===void 0?450:o,l=r(),s=Xw(l,!0),c=Date.now(),d=function f(){var p=Date.now(),v=p-c,m=T2e(v>i?i:v,s,e,i);u1(l)?l.scrollTo(window.pageXOffset,m):l instanceof HTMLDocument||l.constructor.name==="HTMLDocument"?l.documentElement.scrollTop=m:l.scrollTop=m,v<i?en(f):typeof a=="function"&&a()};en(d)}function mh(){}var JN=Symbol("anchorContextKey"),x2e=function(t){ot(JN,t)},_2e=function(){return ve(JN,{registerLink:mh,unregisterLink:mh,scrollTo:mh,activeLink:x(function(){return""}),handleClick:mh})};function QN(){return window}function nP(e,t){if(!e.getClientRects().length)return 0;var n=e.getBoundingClientRect();return n.width||n.height?t===window?(t=e.ownerDocument.documentElement,n.top-t.clientTop):n.top-t.getBoundingClientRect().top:n.top}var rP=/#(\S+)$/,E2e={prefixCls:u.string,offsetTop:u.number,bounds:u.number,affix:u.looseBool.def(!0),showInkInFixed:u.looseBool.def(!1),getContainer:u.func.def(QN),wrapperClass:u.string,wrapperStyle:u.style,getCurrentAnchor:u.func,targetOffset:u.number,onChange:u.func,onClick:u.func},Bl=G({name:"AAnchor",inheritAttrs:!1,props:E2e,emits:["change","click"],setup:function(t,n){var r=n.emit,a=n.attrs,o=n.slots,i=n.expose,l=Wt("anchor",t),s=l.prefixCls,c=l.getTargetContainer,d=l.direction,f=H(),p=H(),v=bt({links:[],scrollContainer:null,scrollEvent:null,animating:!1}),m=H(null),y=x(function(){var $=t.getContainer;return $||c.value||QN}),b=function(){var O=arguments.length>0&&arguments[0]!==void 0?arguments[0]:0,T=arguments.length>1&&arguments[1]!==void 0?arguments[1]:5,_=[],I=y.value();if(v.links.forEach(function(j){var F=rP.exec(j.toString());if(!!F){var N=document.getElementById(F[1]);if(N){var D=nP(N,I);D<O+T&&_.push({link:j,top:D})}}}),_.length){var L=_.reduce(function(j,F){return F.top>j.top?F:j});return L.link}return""},C=function(O){var T=t.getCurrentAnchor;m.value!==O&&(m.value=typeof T=="function"?T():O,r("change",O))},S=function(O){var T=t.offsetTop,_=t.getContainer,I=t.targetOffset;C(O);var L=_(),j=Xw(L,!0),F=rP.exec(O);if(!!F){var N=document.getElementById(F[1]);if(!!N){var D=nP(N,L),z=j+D;z-=I!==void 0?I:T||0,v.animating=!0,Zw(z,{callback:function(){v.animating=!1},getContainer:_})}}};i({scrollTo:S});var w=function(){if(!v.animating){var O=t.offsetTop,T=t.bounds,_=t.targetOffset,I=b(_!==void 0?_:O||0,T);C(I)}},k=function(){var O=p.value.getElementsByClassName("".concat(s.value,"-link-title-active"))[0];O&&(f.value.style.top="".concat(O.offsetTop+O.clientHeight/2-4.5,"px"))};return x2e({registerLink:function(O){v.links.includes(O)||v.links.push(O)},unregisterLink:function(O){var T=v.links.indexOf(O);T!==-1&&v.links.splice(T,1)},activeLink:m,scrollTo:S,handleClick:function(O,T){r("click",O,T)}}),et(function(){Ne(function(){var $=y.value();v.scrollContainer=$,v.scrollEvent=Kn(v.scrollContainer,"scroll",w),w()})}),Lt(function(){v.scrollEvent&&v.scrollEvent.remove()}),ur(function(){if(v.scrollEvent){var $=y.value();v.scrollContainer!==$&&(v.scrollContainer=$,v.scrollEvent.remove(),v.scrollEvent=Kn(v.scrollContainer,"scroll",w),w())}k()}),function(){var $,O=t.offsetTop,T=t.affix,_=t.showInkInFixed,I=s.value,L=Se("".concat(I,"-ink-ball"),{visible:m.value}),j=Se(t.wrapperClass,"".concat(I,"-wrapper"),V({},"".concat(I,"-rtl"),d.value==="rtl")),F=Se(I,{fixed:!T&&!_}),N=P({maxHeight:O?"calc(100vh - ".concat(O,"px)"):"100vh"},t.wrapperStyle),D=g("div",{class:j,style:N,ref:p},[g("div",{class:F},[g("div",{class:"".concat(I,"-ink")},[g("span",{class:L,ref:f},null)]),($=o.default)===null||$===void 0?void 0:$.call(o)])]);return T?g(ZN,le(le({},a),{},{offsetTop:O,target:y.value}),{default:function(){return[D]}}):D}}}),M2e={prefixCls:u.string,href:u.string.def("#"),title:u.VNodeChild,target:u.string},eA=G({name:"AAnchorLink",props:M2e,slots:["title"],setup:function(t,n){var r=n.slots,a=null,o=_2e(),i=o.handleClick,l=o.scrollTo,s=o.unregisterLink,c=o.registerLink,d=o.activeLink,f=Wt("anchor",t),p=f.prefixCls,v=function(y){var b=t.href;i(y,{title:a,href:b}),l(b)};return ce(function(){return t.href},function(m,y){Ne(function(){s(y),c(m)})}),et(function(){c(t.href)}),Lt(function(){s(t.href)}),function(){var m,y=t.href,b=t.target,C=p.value,S=jn(r,t,"title");a=S;var w=d.value===y,k=Se("".concat(C,"-link"),V({},"".concat(C,"-link-active"),w)),$=Se("".concat(C,"-link-title"),V({},"".concat(C,"-link-title-active"),w));return g("div",{class:k},[g("a",{class:$,href:y,title:typeof S=="string"?S:"",target:b,onClick:v},[S]),(m=r.default)===null||m===void 0?void 0:m.call(r)])}}});Bl.Link=eA;Bl.install=function(e){return e.component(Bl.name,Bl),e.component(Bl.Link.name,Bl.Link),e};var mg=function(t,n){var r=n.slots,a,o=t.class,i=t.customizeIcon,l=t.customizeIconProps,s=t.onMousedown,c=t.onClick,d;return typeof i=="function"?d=i(l):d=i,g("span",{class:o,onMousedown:function(p){p.preventDefault(),s&&s(p)},style:{userSelect:"none",WebkitUserSelect:"none"},unselectable:"on",onClick:c,"aria-hidden":!0},[d!==void 0?d:g("span",{class:o.split(/\s+/).map(function(f){return"".concat(f,"-icon")})},[(a=r.default)===null||a===void 0?void 0:a.call(r)])])};mg.inheritAttrs=!1;mg.displayName="TransBtn";mg.props={class:u.string,customizeIcon:u.any,customizeIconProps:u.any,onMousedown:u.func,onClick:u.func};var Lv=mg,At={MAC_ENTER:3,BACKSPACE:8,TAB:9,NUM_CENTER:12,ENTER:13,SHIFT:16,CTRL:17,ALT:18,PAUSE:19,CAPS_LOCK:20,ESC:27,SPACE:32,PAGE_UP:33,PAGE_DOWN:34,END:35,HOME:36,LEFT:37,UP:38,RIGHT:39,DOWN:40,PRINT_SCREEN:44,INSERT:45,DELETE:46,ZERO:48,ONE:49,TWO:50,THREE:51,FOUR:52,FIVE:53,SIX:54,SEVEN:55,EIGHT:56,NINE:57,QUESTION_MARK:63,A:65,B:66,C:67,D:68,E:69,F:70,G:71,H:72,I:73,J:74,K:75,L:76,M:77,N:78,O:79,P:80,Q:81,R:82,S:83,T:84,U:85,V:86,W:87,X:88,Y:89,Z:90,META:91,WIN_KEY_RIGHT:92,CONTEXT_MENU:93,NUM_ZERO:96,NUM_ONE:97,NUM_TWO:98,NUM_THREE:99,NUM_FOUR:100,NUM_FIVE:101,NUM_SIX:102,NUM_SEVEN:103,NUM_EIGHT:104,NUM_NINE:105,NUM_MULTIPLY:106,NUM_PLUS:107,NUM_MINUS:109,NUM_PERIOD:110,NUM_DIVISION:111,F1:112,F2:113,F3:114,F4:115,F5:116,F6:117,F7:118,F8:119,F9:120,F10:121,F11:122,F12:123,NUMLOCK:144,SEMICOLON:186,DASH:189,EQUALS:187,COMMA:188,PERIOD:190,SLASH:191,APOSTROPHE:192,SINGLE_QUOTE:222,OPEN_SQUARE_BRACKET:219,BACKSLASH:220,CLOSE_SQUARE_BRACKET:221,WIN_KEY:224,MAC_FF_META:224,WIN_IME:229,isTextModifyingKeyEvent:function(t){var n=t.keyCode;if(t.altKey&&!t.ctrlKey||t.metaKey||n>=At.F1&&n<=At.F12)return!1;switch(n){case At.ALT:case At.CAPS_LOCK:case At.CONTEXT_MENU:case At.CTRL:case At.DOWN:case At.END:case At.ESC:case At.HOME:case At.INSERT:case At.LEFT:case At.MAC_FF_META:case At.META:case At.NUMLOCK:case At.NUM_CENTER:case At.PAGE_DOWN:case At.PAGE_UP:case At.PAUSE:case At.PRINT_SCREEN:case At.RIGHT:case At.SHIFT:case At.UP:case At.WIN_KEY:case At.WIN_KEY_RIGHT:return!1;default:return!0}},isCharacterKey:function(t){if(t>=At.ZERO&&t<=At.NINE||t>=At.NUM_ZERO&&t<=At.NUM_MULTIPLY||t>=At.A&&t<=At.Z||window.navigator.userAgent.indexOf("WebKit")!==-1&&t===0)return!0;switch(t){case At.SPACE:case At.QUESTION_MARK:case At.NUM_PLUS:case At.NUM_MINUS:case At.NUM_PERIOD:case At.NUM_DIVISION:case At.SEMICOLON:case At.DASH:case At.EQUALS:case At.COMMA:case At.PERIOD:case At.SLASH:case At.APOSTROPHE:case At.SINGLE_QUOTE:case At.OPEN_SQUARE_BRACKET:case At.BACKSLASH:case At.CLOSE_SQUARE_BRACKET:return!0;default:return!1}}},ze=At,I2e=`accept acceptcharset accesskey action allowfullscreen allowtransparency
alt async autocomplete autofocus autoplay capture cellpadding cellspacing challenge
charset checked classid classname colspan cols content contenteditable contextmenu
controls coords crossorigin data datetime default defer dir disabled download draggable
enctype form formaction formenctype formmethod formnovalidate formtarget frameborder
headers height hidden high href hreflang htmlfor httpequiv icon id inputmode integrity
is keyparams keytype kind label lang list loop low manifest marginheight marginwidth max maxlength media
mediagroup method min minlength multiple muted name novalidate nonce open
optimum pattern placeholder poster preload radiogroup readonly rel required
reversed role rowspan rows sandbox scope scoped scrolling seamless selected
shape size sizes span spellcheck src srcdoc srclang srcset start step style
summary tabindex target title type usemap value width wmode wrap`,N2e=`onCopy onCut onPaste onCompositionend onCompositionstart onCompositionupdate onKeydown
    onKeypress onKeyup onFocus onBlur onChange onInput onSubmit onClick onContextmenu onDoubleclick onDblclick
    onDrag onDragend onDragenter onDragexit onDragleave onDragover onDragstart onDrop onMousedown
    onMouseenter onMouseleave onMousemove onMouseout onMouseover onMouseup onSelect onTouchcancel
    onTouchend onTouchmove onTouchstart onTouchstartPassive onTouchmovePassive onScroll onWheel onAbort onCanplay onCanplaythrough
    onDurationchange onEmptied onEncrypted onEnded onError onLoadeddata onLoadedmetadata
    onLoadstart onPause onPlay onPlaying onProgress onRatechange onSeeked onSeeking onStalled onSuspend onTimeupdate onVolumechange onWaiting onLoad onError`,aP="".concat(I2e," ").concat(N2e).split(/[\s\n]+/),A2e="aria-",D2e="data-";function oP(e,t){return e.indexOf(t)===0}function Jw(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1,n;t===!1?n={aria:!0,data:!0,attr:!0}:t===!0?n={aria:!0}:n=P({},t);var r={};return Object.keys(e).forEach(function(a){(n.aria&&(a==="role"||oP(a,A2e))||n.data&&oP(a,D2e)||n.attr&&(aP.includes(a)||aP.includes(a.toLowerCase())))&&(r[a]=e[a])}),r}function ic(){var e=function t(n){t.current=n};return e}var gg=function(t,n){var r=t.height,a=t.offset,o=t.prefixCls,i=t.onInnerResize,l=n.slots,s,c={},d={display:"flex",flexDirection:"column"};return a!==void 0&&(c={height:"".concat(r,"px"),position:"relative",overflow:"hidden"},d=P(P({},d),{transform:"translateY(".concat(a,"px)"),position:"absolute",left:0,right:0,top:0})),g("div",{style:c},[g(zo,{onResize:function(p){var v=p.offsetHeight;v&&i&&i()}},{default:function(){return[g("div",{style:d,class:Se(V({},"".concat(o,"-holder-inner"),o))},[(s=l.default)===null||s===void 0?void 0:s.call(l)])]}})])};gg.displayName="Filter";gg.inheritAttrs=!1;gg.props={prefixCls:String,height:Number,offset:Number,onInnerResize:Function};var R2e=gg,tA=function(t,n){var r=t.setRef,a=n.slots,o,i=(o=a.default)===null||o===void 0?void 0:o.call(a);return i&&i.length?hr(i[0],{ref:r}):i};tA.props={setRef:{type:Function,default:function(){}}};var L2e=tA,F2e=20;function iP(e){return"touches"in e?e.touches[0].pageY:e.pageY}var B2e=G({name:"ScrollBar",inheritAttrs:!1,props:{prefixCls:u.string,scrollTop:u.number,scrollHeight:u.number,height:u.number,count:u.number,onScroll:{type:Function},onStartMove:{type:Function},onStopMove:{type:Function}},setup:function(){return{moveRaf:null,scrollbarRef:ic(),thumbRef:ic(),visibleTimeout:null,state:bt({dragging:!1,pageY:null,startTop:null,visible:!1})}},watch:{scrollTop:{handler:function(){this.delayHidden()},flush:"post"}},mounted:function(){this.scrollbarRef.current.addEventListener("touchstart",this.onScrollbarTouchStart,mn?{passive:!1}:!1),this.thumbRef.current.addEventListener("touchstart",this.onMouseDown,mn?{passive:!1}:!1)},beforeUnmount:function(){this.removeEvents(),clearTimeout(this.visibleTimeout)},methods:{delayHidden:function(){var t=this;clearTimeout(this.visibleTimeout),this.state.visible=!0,this.visibleTimeout=setTimeout(function(){t.state.visible=!1},2e3)},onScrollbarTouchStart:function(t){t.preventDefault()},onContainerMouseDown:function(t){t.stopPropagation(),t.preventDefault()},patchEvents:function(){window.addEventListener("mousemove",this.onMouseMove),window.addEventListener("mouseup",this.onMouseUp),this.thumbRef.current.addEventListener("touchmove",this.onMouseMove,mn?{passive:!1}:!1),this.thumbRef.current.addEventListener("touchend",this.onMouseUp)},removeEvents:function(){window.removeEventListener("mousemove",this.onMouseMove),window.removeEventListener("mouseup",this.onMouseUp),this.scrollbarRef.current.removeEventListener("touchstart",this.onScrollbarTouchStart,mn?{passive:!1}:!1),this.thumbRef.current.removeEventListener("touchstart",this.onMouseDown,mn?{passive:!1}:!1),this.thumbRef.current.removeEventListener("touchmove",this.onMouseMove,mn?{passive:!1}:!1),this.thumbRef.current.removeEventListener("touchend",this.onMouseUp),en.cancel(this.moveRaf)},onMouseDown:function(t){var n=this.$props.onStartMove;P(this.state,{dragging:!0,pageY:iP(t),startTop:this.getTop()}),n(),this.patchEvents(),t.stopPropagation(),t.preventDefault()},onMouseMove:function(t){var n=this.state,r=n.dragging,a=n.pageY,o=n.startTop,i=this.$props.onScroll;if(en.cancel(this.moveRaf),r){var l=iP(t)-a,s=o+l,c=this.getEnableScrollRange(),d=this.getEnableHeightRange(),f=d?s/d:0,p=Math.ceil(f*c);this.moveRaf=en(function(){i(p)})}},onMouseUp:function(){var t=this.$props.onStopMove;this.state.dragging=!1,t(),this.removeEvents()},getSpinHeight:function(){var t=this.$props,n=t.height,r=t.count,a=n/r*10;return a=Math.max(a,F2e),a=Math.min(a,n/2),Math.floor(a)},getEnableScrollRange:function(){var t=this.$props,n=t.scrollHeight,r=t.height;return n-r||0},getEnableHeightRange:function(){var t=this.$props.height,n=this.getSpinHeight();return t-n||0},getTop:function(){var t=this.$props.scrollTop,n=this.getEnableScrollRange(),r=this.getEnableHeightRange();if(t===0||n===0)return 0;var a=t/n;return a*r},showScroll:function(){var t=this.$props,n=t.height,r=t.scrollHeight;return r>n}},render:function(){var t=this.state,n=t.dragging,r=t.visible,a=this.$props.prefixCls,o=this.getSpinHeight()+"px",i=this.getTop()+"px",l=this.showScroll(),s=l&&r;return g("div",{ref:this.scrollbarRef,class:Se("".concat(a,"-scrollbar"),V({},"".concat(a,"-scrollbar-show"),l)),style:{width:"8px",top:0,bottom:0,right:0,position:"absolute",display:s?void 0:"none"},onMousedown:this.onContainerMouseDown,onMousemove:this.delayHidden},[g("div",{ref:this.thumbRef,class:Se("".concat(a,"-scrollbar-thumb"),V({},"".concat(a,"-scrollbar-thumb-moving"),n)),style:{width:"100%",height:o,top:i,left:0,position:"absolute",background:"rgba(0, 0, 0, 0.5)",borderRadius:"99px",cursor:"pointer",userSelect:"none"},onMousedown:this.onMouseDown},null)])}});function V2e(e,t,n){var r=new Map,a=bt({}),o=0;function i(){o+=1;var s=o;Promise.resolve().then(function(){s===o&&r.forEach(function(c,d){if(c&&c.offsetParent){var f=c.offsetHeight;a[d]!==f&&(a[d]=c.offsetHeight)}})})}function l(s,c){var d=e(s),f=r.get(d);c?(r.set(d,c),i()):r.delete(d),!f!=!c&&(c?t==null||t(s):n==null||n(s))}return[l,i,a]}function z2e(e,t,n,r,a,o,i,l){var s=null;return function(c){if(c==null){l();return}en.cancel(s);var d=t.value,f=r.itemHeight;if(typeof c=="number")i(c);else if(c&&kt(c)==="object"){var p,v=c.align;"index"in c?p=c.index:p=d.findIndex(function(C){return a(C)===c.key});var m=c.offset,y=m===void 0?0:m,b=function C(S,w){if(!(S<0||!e.value)){var k=e.value.clientHeight,$=!1,O=w;if(k){for(var T=w||v,_=0,I=0,L=0,j=Math.min(d.length,p),F=0;F<=j;F+=1){var N=a(d[F]);I=_;var D=n[N];L=I+(D===void 0?f:D),_=L,F===p&&D===void 0&&($=!0)}var z=null;switch(T){case"top":z=I-y;break;case"bottom":z=L-k+y;break;default:{var B=e.value.scrollTop,M=B+k;I<B?O="top":L>M&&(O="bottom")}}z!==null&&z!==e.value.scrollTop&&i(z)}s=en(function(){$&&o(),C(S-1,O)})}};b(3)}}}var H2e=(typeof navigator=="undefined"?"undefined":kt(navigator))==="object"&&/Firefox/i.test(navigator.userAgent),j2e=H2e,nA=function(e,t){var n=!1,r=null;function a(){clearTimeout(r),n=!0,r=setTimeout(function(){n=!1},50)}return function(o){var i=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1,l=o<0&&e.value||o>0&&t.value;return i&&l?(clearTimeout(r),n=!1):(!l||n)&&a(),!n&&l}};function K2e(e,t,n,r){var a=0,o=null,i=null,l=!1,s=nA(t,n);function c(f){if(!!e.value){en.cancel(o);var p=f.deltaY;a+=p,i=p,!s(p)&&(j2e||f.preventDefault(),o=en(function(){var v=l?10:1;r(a*v),a=0}))}}function d(f){!e.value||(l=f.detail===i)}return[c,d]}var W2e=14/15;function U2e(e,t,n){var r=!1,a=0,o=null,i=null,l=function(){o&&(o.removeEventListener("touchmove",s,mn?{passive:!1}:!1),o.removeEventListener("touchend",c))},s=function(p){if(r){var v=Math.ceil(p.touches[0].pageY),m=a-v;a=v,n(m)&&p.preventDefault(),clearInterval(i),i=setInterval(function(){m*=W2e,(!n(m,!0)||Math.abs(m)<=.1)&&clearInterval(i)},16)}},c=function(){r=!1,l()},d=function(p){l(),p.touches.length===1&&!r&&(r=!0,a=Math.ceil(p.touches[0].pageY),o=p.target,o.addEventListener("touchmove",s,mn?{passive:!1}:!1),o.addEventListener("touchend",c))};et(function(){ce(e,function(f){t.value.removeEventListener("touchstart",d,mn?{passive:!1}:!1),l(),clearInterval(i),f&&t.value.addEventListener("touchstart",d,mn?{passive:!1}:!1)},{immediate:!0})})}var Y2e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},q2e=[],G2e={overflowY:"auto",overflowAnchor:"none"};function X2e(e,t,n,r,a,o){var i=o.getKey;return e.slice(t,n+1).map(function(l,s){var c=t+s,d=a(l,c,{}),f=i(l);return g(L2e,{key:f,setRef:function(v){return r(l,v)}},{default:function(){return[d]}})})}var Z2e=G({name:"List",inheritAttrs:!1,props:{prefixCls:u.string,data:u.array,height:u.number,itemHeight:u.number,fullHeight:u.looseBool,itemKey:{type:[String,Number,Function],required:!0},component:{type:[String,Object]},virtual:u.looseBool,children:u.func,onScroll:u.func,onMousedown:u.func,onMouseenter:u.func},setup:function(t){var n=x(function(){var M=t.height,E=t.itemHeight,K=t.virtual;return!!(K!==!1&&M&&E)}),r=x(function(){var M=t.height,E=t.itemHeight,K=t.data;return n.value&&K&&E*K.length>M}),a=bt({scrollTop:0,scrollMoving:!1}),o=x(function(){return t.data||q2e}),i=H(),l=H(),s=H(),c=function(E){return typeof t.itemKey=="function"?t.itemKey(E):E==null?void 0:E[t.itemKey]},d={getKey:c};function f(M){var E;typeof M=="function"?E=M(a.scrollTop):E=M;var K=w(E);i.value&&(i.value.scrollTop=K),a.scrollTop=K}var p=V2e(c,null,null),v=fn(p,3),m=v[0],y=v[1],b=v[2],C=H({});ce([r,n,function(){return a.scrollTop},o,b,function(){return t.height}],function(){Ne(function(){var M;if(!n.value){C.value={scrollHeight:void 0,start:0,end:o.value.length-1,offset:void 0};return}if(!r.value){C.value={scrollHeight:((M=l.value)===null||M===void 0?void 0:M.offsetHeight)||0,start:0,end:o.value.length-1,offset:void 0};return}for(var E=0,K,W,Y,q=o.value.length,J=o.value,ne=0;ne<q;ne+=1){var oe=J[ne],Q=c(oe),ae=b[Q],de=E+(ae===void 0?t.itemHeight:ae);de>=a.scrollTop&&K===void 0&&(K=ne,W=E),de>a.scrollTop+t.height&&Y===void 0&&(Y=ne),E=de}K===void 0&&(K=0,W=0),Y===void 0&&(Y=q-1),Y=Math.min(Y+1,q),C.value={scrollHeight:E,start:K,end:Y,offset:W}})},{immediate:!0,flush:"post"});var S=x(function(){return C.value.scrollHeight-t.height});function w(M){var E=M;return Number.isNaN(S.value)||(E=Math.min(E,S.value)),E=Math.max(E,0),E}var k=x(function(){return a.scrollTop<=0}),$=x(function(){return a.scrollTop>=S.value}),O=nA(k,$);function T(M){var E=M;f(E)}function _(M){var E,K=M.currentTarget.scrollTop;Math.abs(K-a.scrollTop)>=1&&f(K),(E=t.onScroll)===null||E===void 0||E.call(t,M)}var I=K2e(n,k,$,function(M){f(function(E){var K=E+M;return K})}),L=fn(I,2),j=L[0],F=L[1];U2e(n,i,function(M,E){return O(M,E)?!1:(j({preventDefault:function(){},deltaY:M}),!0)});function N(M){n.value&&M.preventDefault()}var D=function(){i.value&&(i.value.removeEventListener("wheel",j,mn?{passive:!1}:!1),i.value.removeEventListener("DOMMouseScroll",F),i.value.removeEventListener("MozMousePixelScroll",N))};Wn(function(){Ne(function(){i.value&&(D(),i.value.addEventListener("wheel",j,mn?{passive:!1}:!1),i.value.addEventListener("DOMMouseScroll",F),i.value.addEventListener("MozMousePixelScroll",N))})}),Lt(function(){D()});var z=z2e(i,o,b,t,c,y,f,function(){var M;(M=s.value)===null||M===void 0||M.delayHidden()}),B=x(function(){var M=null;return t.height&&(M=P(V({},t.fullHeight?"height":"maxHeight",t.height+"px"),G2e),n.value&&(M.overflowY="hidden",a.scrollMoving&&(M.pointerEvents="none"))),M});return{state:a,mergedData:o,componentStyle:B,scrollTo:z,onFallbackScroll:_,onScrollBar:T,componentRef:i,useVirtual:n,calRes:C,collectHeight:y,setInstance:m,sharedConfig:d,scrollBarRef:s,fillerInnerRef:l}},render:function(){var t=this,n=P(P({},this.$props),this.$attrs),r=n.prefixCls,a=r===void 0?"rc-virtual-list":r,o=n.height;n.itemHeight,n.fullHeight,n.data,n.itemKey,n.virtual;var i=n.component,l=i===void 0?"div":i;n.onScroll;var s=n.children,c=n.style,d=n.class,f=Y2e(n,["prefixCls","height","itemHeight","fullHeight","data","itemKey","virtual","component","onScroll","children","style","class"]),p=Se(a,d),v=this.state.scrollTop,m=this.calRes,y=m.scrollHeight,b=m.offset,C=m.start,S=m.end,w=this.componentStyle,k=this.onFallbackScroll,$=this.onScrollBar,O=this.useVirtual,T=this.collectHeight,_=this.sharedConfig,I=this.setInstance,L=this.mergedData,j=X2e(L,C,S,I,s,_);return g("div",le({style:P(P({},c),{position:"relative"}),class:p},f),[g(l,{class:"".concat(a,"-holder"),style:w,ref:"componentRef",onScroll:k},{default:function(){return[g(R2e,{prefixCls:a,height:y,offset:b,onInnerResize:T,ref:"fillerInnerRef"},{default:function(){return[j]}})]}}),O&&g(B2e,{ref:"scrollBarRef",prefixCls:a,scrollTop:v,height:o,scrollHeight:y,count:L.length,onScroll:$,onStartMove:function(){t.state.scrollMoving=!0},onStopMove:function(){t.state.scrollMoving=!1}},null)])}}),J2e=Z2e;function Q2e(e,t,n){var r=H(e());return ce(t,function(a,o){n?n(a,o)&&(r.value=e()):r.value=e()}),r}var eSe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},tSe={prefixCls:u.string,id:u.string,options:u.array,flattenOptions:u.array,height:u.number,itemHeight:u.number,values:u.any,multiple:u.looseBool,open:u.looseBool,defaultActiveFirstOption:u.looseBool,notFoundContent:u.any,menuItemSelectedIcon:u.any,childrenAsData:u.looseBool,searchValue:u.string,virtual:u.looseBool,onSelect:u.func,onToggleOpen:{type:Function},onActiveValue:u.func,onScroll:u.func,onMouseenter:u.func},rA=G({name:"OptionList",inheritAttrs:!1,slots:["option"],setup:function(t){var n=x(function(){return"".concat(t.prefixCls,"-item")}),r=Q2e(function(){return t.flattenOptions},[function(){return t.open},function(){return t.flattenOptions}],function(p){return p[0]}),a=ic(),o=function(v){v.preventDefault()},i=function(v){a.current&&a.current.scrollTo({index:v})},l=function(v){for(var m=arguments.length>1&&arguments[1]!==void 0?arguments[1]:1,y=r.value.length,b=0;b<y;b+=1){var C=(v+b*m+y)%y,S=r.value[C],w=S.group,k=S.data;if(!w&&!k.disabled)return C}return-1},s=bt({activeIndex:l(0)}),c=function(v){var m=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1;s.activeIndex=v;var y={source:m?"keyboard":"mouse"},b=r.value[v];if(!b){t.onActiveValue(null,-1,y);return}t.onActiveValue(b.data.value,v,y)};ce([function(){return r.value.length},function(){return t.searchValue}],function(){c(t.defaultActiveFirstOption!==!1?l(0):-1)},{immediate:!0}),ce(function(){return t.open},function(){if(!t.multiple&&t.open&&t.values.size===1){var p=Array.from(t.values)[0],v=r.value.findIndex(function(m){var y=m.data;return y.value===p});c(v),Ne(function(){i(v)})}t.open&&Ne(function(){var m;(m=a.current)===null||m===void 0||m.scrollTo(void 0)})},{immediate:!0,flush:"post"});var d=function(v){v!==void 0&&t.onSelect(v,{selected:!t.values.has(v)}),t.multiple||t.onToggleOpen(!1)};function f(p){var v=r.value[p];if(!v)return null;var m=v.data||{},y=m.value,b=m.label,C=m.children,S=Jw(m,!0),w=t.childrenAsData?C:b;return v?g("div",le(le({"aria-label":typeof w=="string"?w:void 0},S),{},{key:p,role:"option",id:"".concat(t.id,"_list_").concat(p),"aria-selected":t.values.has(y)}),[y]):null}return{memoFlattenOptions:r,renderItem:f,listRef:a,state:s,onListMouseDown:o,itemPrefixCls:n,setActive:c,onSelectValue:d,onKeydown:function(v){var m=v.which;switch(m){case ze.UP:case ze.DOWN:{var y=0;if(m===ze.UP?y=-1:m===ze.DOWN&&(y=1),y!==0){var b=l(s.activeIndex+y,y);i(b),c(b,!0)}break}case ze.ENTER:{var C=r.value[s.activeIndex];C&&!C.data.disabled?d(C.data.value):d(void 0),t.open&&v.preventDefault();break}case ze.ESC:t.onToggleOpen(!1),t.open&&v.stopPropagation()}},onKeyup:function(){},scrollTo:function(v){i(v)}}},render:function(){var t=this.renderItem,n=this.listRef,r=this.onListMouseDown,a=this.itemPrefixCls,o=this.setActive,i=this.onSelectValue,l=this.memoFlattenOptions,s=this.$slots,c=this.$props,d=c.id,f=c.childrenAsData,p=c.values,v=c.height,m=c.itemHeight,y=c.menuItemSelectedIcon,b=c.notFoundContent,C=c.virtual,S=c.onScroll,w=c.onMouseenter,k=s.option,$=this.state.activeIndex;return l.length===0?g("div",{role:"listbox",id:"".concat(d,"_list"),class:"".concat(a,"-empty"),onMousedown:r},[b]):g(Fe,null,[g("div",{role:"listbox",id:"".concat(d,"_list"),style:{height:0,width:0,overflow:"hidden"}},[t($-1),t($),t($+1)]),g(J2e,{itemKey:"key",ref:n,data:l,height:v,itemHeight:m,fullHeight:!1,onMousedown:r,onScroll:S,virtual:C,onMouseenter:w,children:function(T,_){var I,L=T.group,j=T.groupOption,F=T.data,N=F.label,D=F.key;if(L)return g("div",{class:Se(a,"".concat(a,"-group"))},[k?k(F):N!==void 0?N:D]);var z=F.disabled,B=F.value,M=F.title,E=F.children,K=F.style,W=F.class,Y=F.className,q=eSe(F,["disabled","value","title","children","style","class","className"]),J=p.has(B),ne="".concat(a,"-option"),oe=Se(a,ne,W,Y,(I={},V(I,"".concat(ne,"-grouped"),j),V(I,"".concat(ne,"-active"),$===_&&!z),V(I,"".concat(ne,"-disabled"),z),V(I,"".concat(ne,"-selected"),J),I)),Q=f?E:N,ae=!y||typeof y=="function"||J,de=Q||B,be=typeof de=="string"||typeof de=="number"?de.toString():void 0;return M!==void 0&&(be=M),g("div",le(le({},q),{},{"aria-selected":J,class:oe,title:be,onMousemove:function(Pe){q.onMousemove&&q.onMousemove(Pe),!($===_||z)&&o(_)},onClick:function(Pe){z||i(B),q.onClick&&q.onClick(Pe)},style:K}),[g("div",{class:"".concat(ne,"-content")},[k?k(F):de]),zn(y)||J,ae&&g(Lv,{class:"".concat(a,"-option-state"),customizeIcon:y,customizeIconProps:{isSelected:J}},{default:function(){return[J?"\u2713":null]}})])}},null)])}});rA.props=tSe;var nSe=rA,Qw=function(){return null};Qw.isSelectOption=!0;Qw.displayName="ASelectOption";var aA=Qw,e2=function(){return null};e2.isSelectOptGroup=!0;e2.displayName="ASelectOptGroup";var oA=e2,rSe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function aSe(e){var t=e,n=t.key,r=t.children,a=t.props,o=a.value,i=a.disabled,l=rSe(a,["value","disabled"]),s=r&&r.default?r.default():void 0;return P({key:n,value:o!==void 0?o:n,children:s,disabled:i||i===""},l)}function t2(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1,n=Un(e).map(function(r,a){var o;if(!zn(r)||!r.type)return null;var i=r.type.isSelectOptGroup,l=r.key,s=r.children,c=r.props;if(t||!i)return aSe(r);var d=s&&s.default?s.default():void 0,f=(c==null?void 0:c.label)||((o=s.label)===null||o===void 0?void 0:o.call(s))||l;return P(P({key:"__RC_SELECT_GRP__".concat(l===null?a:String(l),"__")},c),{label:f,options:t2(d||[])})}).filter(function(r){return r});return n}function oSe(e){return WI(e)||YI(e)||rg(e)||UI()}function n2(e){return Array.isArray(e)?e:e!==void 0?[e]:[]}function iSe(e,t){var n=t.labelInValue,r=t.combobox,a=new Map;if(e===void 0||e===""&&r)return[[],a];var o=Array.isArray(e)?e:[e],i=o;return n&&(i=o.filter(function(l){return l!==null}).map(function(l){var s=l.key,c=l.value,d=c!==void 0?c:s;return a.set(d,l),d})),[i,a]}function lSe(e,t){var n=t.optionLabelProp,r=t.labelInValue,a=t.prevValueMap,o=t.options,i=t.getLabeledValue,l=e;return r&&(l=l.map(function(s){return i(s,{options:o,prevValueMap:a,labelInValue:r,optionLabelProp:n})})),l}function sSe(e,t){var n=Je(t),r;for(r=e.length-1;r>=0&&e[r].disabled;r-=1);var a=null;return r!==-1&&(a=n[r],n.splice(r,1)),{values:n,removedValue:a}}var uSe=typeof window!="undefined"&&window.document&&window.document.documentElement,cSe=uSe,lP=0;function dSe(){var e;return cSe?(e=lP,lP+=1):e="TEST_OR_SSR",e}function sP(e,t){var n=e.key,r;return"value"in e&&(r=e.value),n!=null?n:r!==void 0?r:"rc-index-key-".concat(t)}function fSe(e){var t=[];function n(r,a){r.forEach(function(o){a||!("options"in o)?t.push({key:sP(o,t.length),groupOption:a,data:o}):(t.push({key:sP(o,t.length),group:!0,data:o}),n(o.options,!0))})}return n(e,!1),t}function iA(e){var t=P({},e);return"props"in t||Object.defineProperty(t,"props",{get:function(){return t}}),t}function r2(e,t){var n=arguments.length>2&&arguments[2]!==void 0?arguments[2]:{},r=n.prevValueOptions,a=r===void 0?[]:r,o=new Map;return t.forEach(function(i){if(!i.group){var l=i.data;o.set(l.value,l)}}),e.map(function(i){var l=o.get(i);return l||(l=P({},a.find(function(s){return s._INTERNAL_OPTION_VALUE_===i}))),iA(l)})}var hSe=function(t,n){var r=n.options,a=n.prevValueMap,o=n.labelInValue,i=n.optionLabelProp,l=r2([t],r)[0],s={value:t},c=o?a.get(t):void 0;return c&&kt(c)==="object"&&"label"in c?(s.label=c.label,l&&typeof c.label=="string"&&typeof l[i]=="string"&&(c.label.trim(),l[i].trim())):l&&i in l?Array.isArray(l[i])?s.label=rn(l[i][0])?hr(l[i][0]):l[i]:s.label=l[i]:(s.label=t,s.isCacheable=!0),s.key=s.value,s};function uP(e){return n2(e).map(function(t){var n,r;return rn(t)?((n=t==null?void 0:t.el)===null||n===void 0?void 0:n.innerText)||((r=t==null?void 0:t.el)===null||r===void 0?void 0:r.wholeText):t}).join("")}function pSe(e){return function(t,n){var r=t.toLowerCase();if("options"in n)return uP(n.label).toLowerCase().includes(r);var a=n[e],o=uP(a).toLowerCase();return o.includes(r)}}function vSe(e,t,n){var r=n.optionFilterProp,a=n.filterOption,o=[],i;return a===!1?Je(t):(typeof a=="function"?i=a:i=pSe(r),t.forEach(function(l){if("options"in l){var s=i(e,l);if(s)o.push(l);else{var c=l.options.filter(function(d){return i(e,d)});c.length&&o.push(P(P({},l),{options:c}))}return}i(e,iA(l))&&o.push(l)}),o)}function mSe(e,t){if(!t||!t.length)return null;var n=!1;function r(o,i){var l=oSe(i),s=l[0],c=l.slice(1);if(!s)return[o];var d=o.split(s);return n=n||d.length>1,d.reduce(function(f,p){return[].concat(Je(f),Je(r(p,c)))},[]).filter(function(f){return f})}var a=r(e,t);return n?a:null}function gSe(e,t){var n=r2([e],t)[0];return n.disabled}function ySe(e,t,n,r){var a=n2(t).slice().sort(),o=Je(e),i=new Set;return e.forEach(function(l){l.options?l.options.forEach(function(s){i.add(s.value)}):i.add(l.value)}),a.forEach(function(l){var s=r?l.value:l;if(!i.has(s)){var c;o.push(r?(c={},V(c,n,l.label),V(c,"value",s),c):{value:s})}}),o}function Ot(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},n=arguments.length>2&&arguments[2]!==void 0?arguments[2]:!0,r=arguments.length>3&&arguments[3]!==void 0?arguments[3]:!1,a=e;if(Array.isArray(e)&&(a=La(e)[0]),!a)return null;var o=hr(a,t,r);return o.props=n?P(P({},o.props),t):o.props,on(kt(o.props.class)!=="object","class must be string"),o}function bSe(e){e.target.composing=!0}function cP(e){!e.target.composing||(e.target.composing=!1,CSe(e.target,"input"))}function CSe(e,t){var n=document.createEvent("HTMLEvents");n.initEvent(t,!0,!0),e.dispatchEvent(n)}function Ny(e,t,n,r){e.addEventListener(t,n,r)}var wSe={created:function(t,n){(!n.modifiers||!n.modifiers.lazy)&&(Ny(t,"compositionstart",bSe),Ny(t,"compositionend",cP),Ny(t,"change",cP))}},Mi=wSe,SSe=G({name:"Input",inheritAttrs:!1,props:{inputRef:u.any,prefixCls:u.string,id:u.string,inputElement:u.any,disabled:u.looseBool,autofocus:u.looseBool,autocomplete:u.string,editable:u.looseBool,accessibilityIndex:u.number,value:u.string,open:u.looseBool,tabindex:u.oneOfType([u.number,u.string]),attrs:u.object,onKeydown:u.func,onMousedown:u.func,onChange:u.func,onPaste:u.func,onCompositionstart:u.func,onCompositionend:u.func,onFocus:u.func,onBlur:u.func},setup:function(t){return{blurTimeout:null,VCSelectContainerEvent:ve("VCSelectContainerEvent")}},render:function(){var t=this,n,r=this.$props,a=r.prefixCls,o=r.id,i=r.inputElement,l=r.disabled,s=r.tabindex,c=r.autofocus,d=r.autocomplete,f=r.editable,p=r.accessibilityIndex,v=r.value,m=r.onKeydown,y=r.onMousedown,b=r.onChange,C=r.onPaste,S=r.onCompositionstart,w=r.onCompositionend,k=r.onFocus,$=r.onBlur,O=r.open,T=r.inputRef,_=r.attrs,I=i||at(g("input",null,null),[[Mi]]),L=I.props||{},j=L.onKeydown,F=L.onInput,N=L.onFocus,D=L.onBlur,z=L.onMousedown,B=L.onCompositionstart,M=L.onCompositionend,E=L.style;return I=Ot(I,P(P(P({id:o,ref:T,disabled:l,tabindex:s,autocomplete:d||"off",autofocus:c,class:Se("".concat(a,"-selection-search-input"),(n=I==null?void 0:I.props)===null||n===void 0?void 0:n.className),style:P(P({},E),{opacity:f?null:0}),role:"combobox","aria-expanded":O,"aria-haspopup":"listbox","aria-owns":"".concat(o,"_list"),"aria-autocomplete":"list","aria-controls":"".concat(o,"_list"),"aria-activedescendant":"".concat(o,"_list_").concat(p)},_),{value:f?v:"",readonly:!f,unselectable:f?null:"on",onKeydown:function(W){m(W),j&&j(W)},onMousedown:function(W){y(W),z&&z(W)},onInput:function(W){b(W),F&&F(W)},onCompositionstart:function(W){S(W),B&&B(W)},onCompositionend:function(W){w(W),M&&M(W)},onPaste:C,onFocus:function(){var W;clearTimeout(t.blurTimeout),N&&N(arguments.length<=0?void 0:arguments[0]),k&&k(arguments.length<=0?void 0:arguments[0]),(W=t.VCSelectContainerEvent)===null||W===void 0||W.focus(arguments.length<=0?void 0:arguments[0])},onBlur:function(){for(var W=arguments.length,Y=new Array(W),q=0;q<W;q++)Y[q]=arguments[q];t.blurTimeout=setTimeout(function(){var J;D&&D(Y[0]),$&&$(Y[0]),(J=t.VCSelectContainerEvent)===null||J===void 0||J.blur(Y[0])},200)}}),I.type==="textarea"?{}:{type:"search"}),!0,!0),I}}),lA=SSe,sA=Symbol("OverflowContextProviderKey"),c1=G({name:"OverflowContextProvider",inheritAttrs:!1,props:{value:{type:Object}},setup:function(t,n){var r=n.slots;return ot(sA,x(function(){return t.value})),function(){var a;return(a=r.default)===null||a===void 0?void 0:a.call(r)}}}),kSe=function(){return ve(sA,x(function(){return null}))},$Se=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},js=void 0,Vp=G({name:"Item",props:{prefixCls:String,item:u.any,renderItem:Function,responsive:Boolean,itemKey:{type:[String,Number]},registerSize:Function,display:Boolean,order:Number,component:u.any,invalidate:Boolean},setup:function(t,n){var r=n.slots,a=n.expose,o=x(function(){return t.responsive&&!t.display}),i=H();a({itemNodeRef:i});function l(s){t.registerSize(t.itemKey,s)}return Wr(function(){l(null)}),function(){var s,c=t.prefixCls,d=t.invalidate,f=t.item,p=t.renderItem,v=t.responsive;t.registerSize,t.itemKey,t.display;var m=t.order,y=t.component,b=y===void 0?"div":y,C=$Se(t,["prefixCls","invalidate","item","renderItem","responsive","registerSize","itemKey","display","order","component"]),S=(s=r.default)===null||s===void 0?void 0:s.call(r),w=p&&f!==js?p(f):S,k;d||(k={opacity:o.value?0:1,height:o.value?0:js,overflowY:o.value?"hidden":js,order:v?m:js,pointerEvents:o.value?"none":js,position:o.value?"absolute":js});var $={};o.value&&($["aria-hidden"]=!0);var O=g(b,le(le(le({class:Se(!d&&c),style:k},$),C),{},{ref:i}),{default:function(){return[w]}});return g(zo,{disabled:!v,onResize:function(_){var I=_.offsetWidth;l(I)}},{default:function(){return[O]}})}}}),Ay=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},OSe=G({name:"RawItem",inheritAttrs:!1,props:{component:u.any,title:u.any},setup:function(t,n){var r=n.slots,a=n.attrs,o=kSe();return function(){var i,l;if(!o.value){var s=t.component,c=s===void 0?"div":s,d=Ay(t,["component"]);return g(c,le(le({},d),a),{default:function(){return[(i=r.default)===null||i===void 0?void 0:i.call(r)]}})}var f=o.value,p=f.className,v=Ay(f,["className"]),m=a.class,y=Ay(a,["class"]);return g(c1,{value:null},{default:function(){return[g(Vp,le(le(le({class:Se(p,m)},v),y),t),{default:function(){return[(l=r.default)===null||l===void 0?void 0:l.call(r)]}})]}})}}}),PSe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},uA="responsive",cA="invalidate";function TSe(e){return"+ ".concat(e.length," ...")}var yg=G({name:"Overflow",inheritAttrs:!1,props:{prefixCls:String,data:Array,itemKey:[String,Number,Function],itemWidth:{type:Number,default:10},renderItem:Function,renderRawItem:Function,maxCount:[Number,String],renderRest:Function,renderRawRest:Function,suffix:u.any,component:String,itemComponent:u.any,onVisibleChange:Function,ssr:String},emits:["visibleChange"],setup:function(t,n){var r=n.attrs,a=n.emit,o=x(function(){return t.ssr==="full"}),i=H(null),l=x(function(){return i.value||0}),s=H(new Map),c=H(0),d=H(0),f=H(0),p=H(null),v=H(null),m=x(function(){return v.value===null&&o.value?Number.MAX_SAFE_INTEGER:v.value||0}),y=H(!1),b=x(function(){return"".concat(t.prefixCls,"-item")}),C=x(function(){return Math.max(c.value,d.value)}),S=x(function(){return!!(t.data.length&&t.maxCount===uA)}),w=x(function(){return t.maxCount===cA}),k=x(function(){return S.value||typeof t.maxCount=="number"&&t.data.length>t.maxCount}),$=x(function(){var z=t.data;return S.value?i.value===null&&o.value?z=t.data:z=t.data.slice(0,Math.min(t.data.length,l.value/t.itemWidth)):typeof t.maxCount=="number"&&(z=t.data.slice(0,t.maxCount)),z}),O=x(function(){return S.value?t.data.slice(m.value+1):t.data.slice($.value.length)}),T=function(B,M){var E,K;return typeof t.itemKey=="function"?t.itemKey(B):(K=t.itemKey&&((E=B)===null||E===void 0?void 0:E[t.itemKey]))!==null&&K!==void 0?K:M},_=x(function(){return t.renderItem||function(z){return z}}),I=function(B,M){v.value=B,M||(y.value=B<t.data.length-1,a("visibleChange",B))},L=function(B,M){i.value=M.clientWidth},j=function(B,M){var E=new Map(s.value);M===null?E.delete(B):E.set(B,M),s.value=E},F=function(B,M){c.value=d.value,d.value=M},N=function(B,M){f.value=M},D=function(B){return s.value.get(T($.value[B],B))};return ce([l,s,d,f,function(){return t.itemKey},$],function(){if(l.value&&C.value&&$.value){var z=f.value,B=$.value.length,M=B-1;if(!B){I(0),p.value=null;return}for(var E=0;E<B;E+=1){var K=D(E);if(K===void 0){I(E-1,!0);break}if(z+=K,M===0&&z<=l.value||E===M-1&&z+D(M)<=l.value){I(M),p.value=null;break}else if(z+C.value>l.value){I(E-1),p.value=z-K-f.value+d.value;break}}t.suffix&&D(0)+f.value>l.value&&(p.value=null)}}),function(){var z=y.value&&!!O.value.length,B=t.itemComponent,M=t.renderRawItem,E=t.renderRawRest,K=t.renderRest,W=t.prefixCls,Y=W===void 0?"rc-overflow":W,q=t.suffix,J=t.component,ne=J===void 0?"div":J,oe=r.class,Q=r.style,ae=PSe(r,["class","style"]),de={};p.value!==null&&S.value&&(de={position:"absolute",left:"".concat(p.value,"px"),top:0});var be={prefixCls:b.value,responsive:S.value,component:B,invalidate:w.value},Ee=M?function(ge,ke){var xe=T(ge,ke);return g(c1,{key:xe,value:P(P({},be),{order:ke,item:ge,itemKey:xe,registerSize:j,display:ke<=m.value})},{default:function(){return[M(ge,ke)]}})}:function(ge,ke){var xe=T(ge,ke);return g(Vp,le(le({},be),{},{order:ke,key:xe,item:ge,renderItem:_.value,itemKey:xe,registerSize:j,display:ke<=m.value}),null)},Pe,Be={order:z?m.value:Number.MAX_SAFE_INTEGER,className:"".concat(b.value,"-rest"),registerSize:F,display:z};if(E)E&&(Pe=g(c1,{value:P(P({},be),Be)},{default:function(){return[E(O.value)]}}));else{var te=K||TSe;Pe=g(Vp,le(le({},be),Be),{default:function(){return[typeof te=="function"?te(O.value):te]}})}var ie=g(ne,le({class:Se(!w.value&&Y,oe),style:Q},ae),{default:function(){return[$.value.map(Ee),k.value?Pe:null,q&&g(Vp,le(le({},be),{},{order:m.value,class:"".concat(b.value,"-suffix"),registerSize:N,display:!0,style:de}),{default:function(){return[q]}})]}});return g(zo,{disabled:!S.value,onResize:L},{default:function(){return[ie]}})}}});yg.Item=OSe;yg.RESPONSIVE=uA;yg.INVALIDATE=cA;var Zl=yg,xSe={id:u.string,prefixCls:u.string,values:u.array,open:u.looseBool,searchValue:u.string,inputRef:u.any,placeholder:u.any,disabled:u.looseBool,mode:u.string,showSearch:u.looseBool,autofocus:u.looseBool,autocomplete:u.string,accessibilityIndex:u.number,tabindex:u.oneOfType([u.number,u.string]),removeIcon:u.VNodeChild,choiceTransitionName:u.string,maxTagCount:u.oneOfType([u.number,u.string]),maxTagTextLength:u.number,maxTagPlaceholder:u.any.def(function(){return function(e){return"+ ".concat(e.length," ...")}}),tagRender:u.func,onToggleOpen:{type:Function},onSelect:u.func,onInputChange:u.func,onInputPaste:u.func,onInputKeyDown:u.func,onInputMouseDown:u.func,onInputCompositionStart:u.func,onInputCompositionEnd:u.func},dP=function(t){t.preventDefault(),t.stopPropagation()},_Se=G({name:"MultipleSelectSelector",inheritAttrs:!1,props:xSe,setup:function(t){var n=H(),r=H(0),a=H(!1),o=x(function(){return"".concat(t.prefixCls,"-selection")}),i=x(function(){return t.open||t.mode==="tags"?t.searchValue:""}),l=x(function(){return t.mode==="tags"||t.showSearch&&(t.open||a.value)});et(function(){ce(i,function(){r.value=n.value.scrollWidth},{flush:"post",immediate:!0})});function s(p,v,m,y){return g("span",{class:Se("".concat(o.value,"-item"),V({},"".concat(o.value,"-item-disabled"),v))},[g("span",{class:"".concat(o.value,"-item-content")},[p]),m&&g(Lv,{class:"".concat(o.value,"-item-remove"),onMousedown:dP,onClick:y,customizeIcon:t.removeIcon},{default:function(){return[yt("\xD7")]}})])}function c(p,v,m,y,b){var C=function(w){dP(w),t.onToggleOpen(!open)};return g("span",{onMousedown:C},[t.tagRender({label:v,value:p,disabled:m,closable:y,onClose:b})])}function d(p){var v=p.disabled,m=p.label,y=p.value,b=!t.disabled&&!v,C=m;if(typeof t.maxTagTextLength=="number"&&(typeof m=="string"||typeof m=="number")){var S=String(C);S.length>t.maxTagTextLength&&(C="".concat(S.slice(0,t.maxTagTextLength),"..."))}var w=function($){$&&$.stopPropagation(),t.onSelect(y,{selected:!1})};return typeof t.tagRender=="function"?c(y,C,v,b,w):s(C,v,b,w)}function f(p){var v=t.maxTagPlaceholder,m=v===void 0?function(b){return"+ ".concat(b.length," ...")}:v,y=typeof m=="function"?m(p):m;return s(y,!1)}return function(){var p=t.id,v=t.prefixCls,m=t.values,y=t.open,b=t.inputRef,C=t.placeholder,S=t.disabled,w=t.autofocus,k=t.autocomplete,$=t.accessibilityIndex,O=t.tabindex,T=t.onInputChange,_=t.onInputPaste,I=t.onInputKeyDown,L=t.onInputMouseDown,j=t.onInputCompositionStart,F=t.onInputCompositionEnd,N=g("div",{class:"".concat(o.value,"-search"),style:{width:r.value+"px"},key:"input"},[g(lA,{inputRef:b,open:y,prefixCls:v,id:p,inputElement:null,disabled:S,autofocus:w,autocomplete:k,editable:l.value,accessibilityIndex:$,value:i.value,onKeydown:I,onMousedown:L,onChange:T,onPaste:_,onCompositionstart:j,onCompositionend:F,tabindex:O,attrs:Jw(t,!0),onFocus:function(){return a.value=!0},onBlur:function(){return a.value=!1}},null),g("span",{ref:n,class:"".concat(o.value,"-search-mirror"),"aria-hidden":!0},[i.value,yt("\xA0")])]),D=g(Zl,{prefixCls:"".concat(o.value,"-overflow"),data:m,renderItem:d,renderRest:f,suffix:N,itemKey:"key",maxCount:t.maxTagCount,key:"overflow"},null);return g(Fe,null,[D,!m.length&&!i.value&&g("span",{class:"".concat(o.value,"-placeholder")},[C])])}}}),ESe=_Se,MSe={inputElement:u.any,id:u.string,prefixCls:u.string,values:u.array,open:u.looseBool,searchValue:u.string,inputRef:u.any,placeholder:u.any,disabled:u.looseBool,mode:u.string,showSearch:u.looseBool,autofocus:u.looseBool,autocomplete:u.string,accessibilityIndex:u.number,tabindex:u.oneOfType([u.number,u.string]),activeValue:u.string,backfill:u.looseBool,onInputChange:u.func,onInputPaste:u.func,onInputKeyDown:u.func,onInputMouseDown:u.func,onInputCompositionStart:u.func,onInputCompositionEnd:u.func},a2=G({name:"SingleSelector",setup:function(t){var n=H(!1),r=x(function(){return t.mode==="combobox"}),a=x(function(){return r.value||t.showSearch}),o=x(function(){var s=t.searchValue||"";return r.value&&t.activeValue&&!n.value&&(s=t.activeValue),s});ce([r,function(){return t.activeValue}],function(){r.value&&(n.value=!1)},{immediate:!0});var i=x(function(){return t.mode!=="combobox"&&!t.open?!1:!!o.value}),l=x(function(){var s=t.values[0];return s&&(typeof s.label=="string"||typeof s.label=="number")?s.label.toString():void 0});return function(){var s=t.inputElement,c=t.prefixCls,d=t.id,f=t.values,p=t.inputRef,v=t.disabled,m=t.autofocus,y=t.autocomplete,b=t.accessibilityIndex,C=t.open,S=t.placeholder,w=t.tabindex,k=t.onInputKeyDown,$=t.onInputMouseDown,O=t.onInputChange,T=t.onInputPaste,_=t.onInputCompositionStart,I=t.onInputCompositionEnd,L=f[0];return g(Fe,null,[g("span",{class:"".concat(c,"-selection-search")},[g(lA,{inputRef:p,prefixCls:c,id:d,open:C,inputElement:s,disabled:v,autofocus:m,autocomplete:y,editable:a.value,accessibilityIndex:b,value:o.value,onKeydown:k,onMousedown:$,onChange:function(F){n.value=!0,O(F)},onPaste:T,onCompositionstart:_,onCompositionend:I,tabindex:w,attrs:Jw(t,!0)},null)]),!r.value&&L&&!i.value&&g("span",{class:"".concat(c,"-selection-item"),title:l.value},[g(Fe,{key:L.key||L.value},[L.label])]),!L&&!i.value&&g("span",{class:"".concat(c,"-selection-placeholder")},[S])])}}});a2.props=MSe;a2.inheritAttrs=!1;var ISe=a2;function dA(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:250,t=null,n;wm(function(){window.clearTimeout(n)});function r(a){(a||t===null)&&(t=a),window.clearTimeout(n),n=window.setTimeout(function(){t=null},e)}return[function(){return t},r]}var NSe=G({name:"Selector",inheritAttrs:!1,props:{id:u.string,prefixCls:u.string,showSearch:u.looseBool,open:u.looseBool,values:u.array,multiple:u.looseBool,mode:u.string,searchValue:u.string,activeValue:u.string,inputElement:u.any,autofocus:u.looseBool,accessibilityIndex:u.number,tabindex:u.oneOfType([u.number,u.string]),disabled:u.looseBool,placeholder:u.any,removeIcon:u.any,maxTagCount:u.oneOfType([u.number,u.string]),maxTagTextLength:u.number,maxTagPlaceholder:u.any,tagRender:u.func,tokenWithEnter:u.looseBool,choiceTransitionName:u.string,onToggleOpen:{type:Function},onSearch:u.func,onSearchSubmit:u.func,onSelect:u.func,onInputKeyDown:{type:Function},domRef:u.func},setup:function(t){var n=ic(),r=!1,a=dA(0),o=fn(a,2),i=o[0],l=o[1],s=function(w){var k=w.which;(k===ze.UP||k===ze.DOWN)&&w.preventDefault(),t.onInputKeyDown&&t.onInputKeyDown(w),k===ze.ENTER&&t.mode==="tags"&&!r&&!t.open&&t.onSearchSubmit(w.target.value),[ze.SHIFT,ze.TAB,ze.BACKSPACE,ze.ESC].includes(k)||t.onToggleOpen(!0)},c=function(){l(!0)},d=null,f=function(w){t.onSearch(w,!0,r)!==!1&&t.onToggleOpen(!0)},p=function(){r=!0},v=function(w){r=!1,t.mode!=="combobox"&&f(w.target.value)},m=function(w){var k=w.target.value;if(t.tokenWithEnter&&d&&/[\r\n]/.test(d)){var $=d.replace(/[\r\n]+$/,"").replace(/\r\n/g," ").replace(/[\r\n]/g," ");k=k.replace($,d)}d=null,f(k)},y=function(w){var k=w.clipboardData,$=k.getData("text");d=$},b=function(w){var k=w.target;if(k!==n.current){var $=document.body.style.msTouchAction!==void 0;$?setTimeout(function(){n.current.focus()}):n.current.focus()}},C=function(w){var k=i();w.target!==n.current&&!k&&w.preventDefault(),(t.mode!=="combobox"&&(!t.showSearch||!k)||!t.open)&&(t.open&&t.onSearch("",!0,!1),t.onToggleOpen())};return{focus:function(){n.current.focus()},blur:function(){n.current.blur()},onMousedown:C,onClick:b,onInputPaste:y,inputRef:n,onInternalInputKeyDown:s,onInternalInputMouseDown:c,onInputChange:m,onInputCompositionEnd:v,onInputCompositionStart:p}},render:function(){var t=this.$props,n=t.prefixCls,r=t.domRef,a=t.multiple,o=this.onMousedown,i=this.onClick,l=this.inputRef,s=this.onInputPaste,c=this.onInternalInputKeyDown,d=this.onInternalInputMouseDown,f=this.onInputChange,p=this.onInputCompositionStart,v=this.onInputCompositionEnd,m={inputRef:l,onInputKeyDown:c,onInputMouseDown:d,onInputChange:f,onInputPaste:s,onInputCompositionStart:p,onInputCompositionEnd:v},y=a?g(ESe,le(le({},this.$props),m),null):g(ISe,le(le({},this.$props),m),null);return g("div",{ref:r,class:"".concat(n,"-selector"),onClick:i,onMousedown:o},[y])}}),ASe=NSe;function bu(e,t){return e?e.contains(t):!1}var fA=["moz","ms","webkit"];function DSe(){var e=0;return function(t){var n=new Date().getTime(),r=Math.max(0,16-(n-e)),a=window.setTimeout(function(){t(n+r)},r);return e=n+r,a}}function RSe(){if(typeof window=="undefined")return function(){};if(window.requestAnimationFrame)return window.requestAnimationFrame.bind(window);var e=fA.filter(function(t){return"".concat(t,"RequestAnimationFrame")in window})[0];return e?window["".concat(e,"RequestAnimationFrame")]:DSe()}function LSe(e){if(typeof window=="undefined")return null;if(window.cancelAnimationFrame)return window.cancelAnimationFrame(e);var t=fA.filter(function(n){return"".concat(n,"CancelAnimationFrame")in window||"".concat(n,"CancelRequestAnimationFrame")in window})[0];return t?(window["".concat(t,"CancelAnimationFrame")]||window["".concat(t,"CancelRequestAnimationFrame")]).call(this,e):clearTimeout(e)}var fP=RSe(),o2=function(t){return LSe(t.id)},Fv=function(t,n){var r=Date.now();function a(){Date.now()-r>=n?t.call():o.id=fP(a)}var o={id:fP(a)};return o};function hP(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter(function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable})),n.push.apply(n,r)}return n}function pP(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?arguments[t]:{};t%2?hP(Object(n),!0).forEach(function(r){FSe(e,r,n[r])}):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):hP(Object(n)).forEach(function(r){Object.defineProperty(e,r,Object.getOwnPropertyDescriptor(n,r))})}return e}function zp(e){return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?zp=function(t){return typeof t}:zp=function(t){return t&&typeof Symbol=="function"&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},zp(e)}function FSe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var Kc,BSe={Webkit:"-webkit-",Moz:"-moz-",ms:"-ms-",O:"-o-"};function Bv(){if(Kc!==void 0)return Kc;Kc="";var e=document.createElement("p").style,t="Transform";for(var n in BSe)n+t in e&&(Kc=n);return Kc}function hA(){return Bv()?"".concat(Bv(),"TransitionProperty"):"transitionProperty"}function bg(){return Bv()?"".concat(Bv(),"Transform"):"transform"}function vP(e,t){var n=hA();n&&(e.style[n]=t,n!=="transitionProperty"&&(e.style.transitionProperty=t))}function Dy(e,t){var n=bg();n&&(e.style[n]=t,n!=="transform"&&(e.style.transform=t))}function VSe(e){return e.style.transitionProperty||e.style[hA()]}function zSe(e){var t=window.getComputedStyle(e,null),n=t.getPropertyValue("transform")||t.getPropertyValue(bg());if(n&&n!=="none"){var r=n.replace(/[^0-9\-.,]/g,"").split(",");return{x:parseFloat(r[12]||r[4],0),y:parseFloat(r[13]||r[5],0)}}return{x:0,y:0}}var HSe=/matrix\((.*)\)/,jSe=/matrix3d\((.*)\)/;function KSe(e,t){var n=window.getComputedStyle(e,null),r=n.getPropertyValue("transform")||n.getPropertyValue(bg());if(r&&r!=="none"){var a,o=r.match(HSe);if(o)o=o[1],a=o.split(",").map(function(l){return parseFloat(l,10)}),a[4]=t.x,a[5]=t.y,Dy(e,"matrix(".concat(a.join(","),")"));else{var i=r.match(jSe)[1];a=i.split(",").map(function(l){return parseFloat(l,10)}),a[12]=t.x,a[13]=t.y,Dy(e,"matrix3d(".concat(a.join(","),")"))}}else Dy(e,"translateX(".concat(t.x,"px) translateY(").concat(t.y,"px) translateZ(0)"))}var WSe=/[\-+]?(?:\d*\.|)\d+(?:[eE][\-+]?\d+|)/.source,Ff;function mP(e){var t=e.style.display;e.style.display="none",e.offsetHeight,e.style.display=t}function Au(e,t,n){var r=n;if(zp(t)==="object"){for(var a in t)t.hasOwnProperty(a)&&Au(e,a,t[a]);return}if(typeof r!="undefined"){typeof r=="number"&&(r="".concat(r,"px")),e.style[t]=r;return}return Ff(e,t)}function USe(e){var t,n,r,a=e.ownerDocument,o=a.body,i=a&&a.documentElement;return t=e.getBoundingClientRect(),n=Math.floor(t.left),r=Math.floor(t.top),n-=i.clientLeft||o.clientLeft||0,r-=i.clientTop||o.clientTop||0,{left:n,top:r}}function pA(e,t){var n=e["page".concat(t?"Y":"X","Offset")],r="scroll".concat(t?"Top":"Left");if(typeof n!="number"){var a=e.document;n=a.documentElement[r],typeof n!="number"&&(n=a.body[r])}return n}function vA(e){return pA(e)}function mA(e){return pA(e,!0)}function sf(e){var t=USe(e),n=e.ownerDocument,r=n.defaultView||n.parentWindow;return t.left+=vA(r),t.top+=mA(r),t}function i2(e){return e!=null&&e==e.window}function gA(e){return i2(e)?e.document:e.nodeType===9?e:e.ownerDocument}function YSe(e,t,n){var r=n,a="",o=gA(e);return r=r||o.defaultView.getComputedStyle(e,null),r&&(a=r.getPropertyValue(t)||r[t]),a}var qSe=new RegExp("^(".concat(WSe,")(?!px)[a-z%]+$"),"i"),GSe=/^(top|right|bottom|left)$/,Ry="currentStyle",Ly="runtimeStyle",Pl="left",XSe="px";function ZSe(e,t){var n=e[Ry]&&e[Ry][t];if(qSe.test(n)&&!GSe.test(t)){var r=e.style,a=r[Pl],o=e[Ly][Pl];e[Ly][Pl]=e[Ry][Pl],r[Pl]=t==="fontSize"?"1em":n||0,n=r.pixelLeft+XSe,r[Pl]=a,e[Ly][Pl]=o}return n===""?"auto":n}typeof window!="undefined"&&(Ff=window.getComputedStyle?YSe:ZSe);function gh(e,t){return e==="left"?t.useCssRight?"right":e:t.useCssBottom?"bottom":e}function gP(e){if(e==="left")return"right";if(e==="right")return"left";if(e==="top")return"bottom";if(e==="bottom")return"top"}function yP(e,t,n){Au(e,"position")==="static"&&(e.style.position="relative");var r=-999,a=-999,o=gh("left",n),i=gh("top",n),l=gP(o),s=gP(i);o!=="left"&&(r=999),i!=="top"&&(a=999);var c="",d=sf(e);("left"in t||"top"in t)&&(c=VSe(e)||"",vP(e,"none")),"left"in t&&(e.style[l]="",e.style[o]="".concat(r,"px")),"top"in t&&(e.style[s]="",e.style[i]="".concat(a,"px")),mP(e);var f=sf(e),p={};for(var v in t)if(t.hasOwnProperty(v)){var m=gh(v,n),y=v==="left"?r:a,b=d[v]-f[v];m===v?p[m]=y+b:p[m]=y-b}Au(e,p),mP(e),("left"in t||"top"in t)&&vP(e,c);var C={};for(var S in t)if(t.hasOwnProperty(S)){var w=gh(S,n),k=t[S]-d[S];S===w?C[w]=p[w]+k:C[w]=p[w]-k}Au(e,C)}function JSe(e,t){var n=sf(e),r=zSe(e),a={x:r.x,y:r.y};"left"in t&&(a.x=r.x+t.left-n.left),"top"in t&&(a.y=r.y+t.top-n.top),KSe(e,a)}function QSe(e,t,n){if(n.ignoreShake){var r=sf(e),a=r.left.toFixed(0),o=r.top.toFixed(0),i=t.left.toFixed(0),l=t.top.toFixed(0);if(a===i&&o===l)return}n.useCssRight||n.useCssBottom?yP(e,t,n):n.useCssTransform&&bg()in document.body.style?JSe(e,t):yP(e,t,n)}function l2(e,t){for(var n=0;n<e.length;n++)t(e[n])}function yA(e){return Ff(e,"boxSizing")==="border-box"}var eke=["margin","border","padding"],d1=-1,tke=2,f1=1,nke=0;function rke(e,t,n){var r={},a=e.style,o;for(o in t)t.hasOwnProperty(o)&&(r[o]=a[o],a[o]=t[o]);n.call(e);for(o in t)t.hasOwnProperty(o)&&(a[o]=r[o])}function ad(e,t,n){var r=0,a,o,i;for(o=0;o<t.length;o++)if(a=t[o],a)for(i=0;i<n.length;i++){var l=void 0;a==="border"?l="".concat(a).concat(n[i],"Width"):l=a+n[i],r+=parseFloat(Ff(e,l))||0}return r}var _o={getParent:function(t){var n=t;do n.nodeType===11&&n.host?n=n.host:n=n.parentNode;while(n&&n.nodeType!==1&&n.nodeType!==9);return n}};l2(["Width","Height"],function(e){_o["doc".concat(e)]=function(t){var n=t.document;return Math.max(n.documentElement["scroll".concat(e)],n.body["scroll".concat(e)],_o["viewport".concat(e)](n))},_o["viewport".concat(e)]=function(t){var n="client".concat(e),r=t.document,a=r.body,o=r.documentElement,i=o[n];return r.compatMode==="CSS1Compat"&&i||a&&a[n]||i}});function bP(e,t,n){var r=n;if(i2(e))return t==="width"?_o.viewportWidth(e):_o.viewportHeight(e);if(e.nodeType===9)return t==="width"?_o.docWidth(e):_o.docHeight(e);var a=t==="width"?["Left","Right"]:["Top","Bottom"],o=Math.floor(t==="width"?e.getBoundingClientRect().width:e.getBoundingClientRect().height),i=yA(e),l=0;(o==null||o<=0)&&(o=void 0,l=Ff(e,t),(l==null||Number(l)<0)&&(l=e.style[t]||0),l=parseFloat(l)||0),r===void 0&&(r=i?f1:d1);var s=o!==void 0||i,c=o||l;return r===d1?s?c-ad(e,["border","padding"],a):l:s?r===f1?c:c+(r===tke?-ad(e,["border"],a):ad(e,["margin"],a)):l+ad(e,eke.slice(r),a)}var ake={position:"absolute",visibility:"hidden",display:"block"};function CP(){for(var e=arguments.length,t=new Array(e),n=0;n<e;n++)t[n]=arguments[n];var r,a=t[0];return a.offsetWidth!==0?r=bP.apply(void 0,t):rke(a,ake,function(){r=bP.apply(void 0,t)}),r}l2(["width","height"],function(e){var t=e.charAt(0).toUpperCase()+e.slice(1);_o["outer".concat(t)]=function(r,a){return r&&CP(r,e,a?nke:f1)};var n=e==="width"?["Left","Right"]:["Top","Bottom"];_o[e]=function(r,a){var o=a;if(o!==void 0){if(r){var i=yA(r);return i&&(o+=ad(r,["padding","border"],n)),Au(r,e,o)}return}return r&&CP(r,e,d1)}});function bA(e,t){for(var n in t)t.hasOwnProperty(n)&&(e[n]=t[n]);return e}var jt={getWindow:function(t){if(t&&t.document&&t.setTimeout)return t;var n=t.ownerDocument||t;return n.defaultView||n.parentWindow},getDocument:gA,offset:function(t,n,r){if(typeof n!="undefined")QSe(t,n,r||{});else return sf(t)},isWindow:i2,each:l2,css:Au,clone:function(t){var n,r={};for(n in t)t.hasOwnProperty(n)&&(r[n]=t[n]);var a=t.overflow;if(a)for(n in t)t.hasOwnProperty(n)&&(r.overflow[n]=t.overflow[n]);return r},mix:bA,getWindowScrollLeft:function(t){return vA(t)},getWindowScrollTop:function(t){return mA(t)},merge:function(){for(var t={},n=0;n<arguments.length;n++)jt.mix(t,n<0||arguments.length<=n?void 0:arguments[n]);return t},viewportWidth:0,viewportHeight:0};bA(jt,_o);var Fy=jt.getParent;function h1(e){if(jt.isWindow(e)||e.nodeType===9)return null;var t=jt.getDocument(e),n=t.body,r,a=jt.css(e,"position"),o=a==="fixed"||a==="absolute";if(!o)return e.nodeName.toLowerCase()==="html"?null:Fy(e);for(r=Fy(e);r&&r!==n&&r.nodeType!==9;r=Fy(r))if(a=jt.css(r,"position"),a!=="static")return r;return null}var wP=jt.getParent;function oke(e){if(jt.isWindow(e)||e.nodeType===9)return!1;var t=jt.getDocument(e),n=t.body,r=null;for(r=wP(e);r&&r!==n&&r!==t;r=wP(r)){var a=jt.css(r,"position");if(a==="fixed")return!0}return!1}function s2(e,t){for(var n={left:0,right:1/0,top:0,bottom:1/0},r=h1(e),a=jt.getDocument(e),o=a.defaultView||a.parentWindow,i=a.body,l=a.documentElement;r;){if((navigator.userAgent.indexOf("MSIE")===-1||r.clientWidth!==0)&&r!==i&&r!==l&&jt.css(r,"overflow")!=="visible"){var s=jt.offset(r);s.left+=r.clientLeft,s.top+=r.clientTop,n.top=Math.max(n.top,s.top),n.right=Math.min(n.right,s.left+r.clientWidth),n.bottom=Math.min(n.bottom,s.top+r.clientHeight),n.left=Math.max(n.left,s.left)}else if(r===i||r===l)break;r=h1(r)}var c=null;if(!jt.isWindow(e)&&e.nodeType!==9){c=e.style.position;var d=jt.css(e,"position");d==="absolute"&&(e.style.position="fixed")}var f=jt.getWindowScrollLeft(o),p=jt.getWindowScrollTop(o),v=jt.viewportWidth(o),m=jt.viewportHeight(o),y=l.scrollWidth,b=l.scrollHeight,C=window.getComputedStyle(i);if(C.overflowX==="hidden"&&(y=o.innerWidth),C.overflowY==="hidden"&&(b=o.innerHeight),e.style&&(e.style.position=c),t||oke(e))n.left=Math.max(n.left,f),n.top=Math.max(n.top,p),n.right=Math.min(n.right,f+v),n.bottom=Math.min(n.bottom,p+m);else{var S=Math.max(y,f+v);n.right=Math.min(n.right,S);var w=Math.max(b,p+m);n.bottom=Math.min(n.bottom,w)}return n.top>=0&&n.left>=0&&n.bottom>n.top&&n.right>n.left?n:null}function ike(e,t,n,r){var a=jt.clone(e),o={width:t.width,height:t.height};return r.adjustX&&a.left<n.left&&(a.left=n.left),r.resizeWidth&&a.left>=n.left&&a.left+o.width>n.right&&(o.width-=a.left+o.width-n.right),r.adjustX&&a.left+o.width>n.right&&(a.left=Math.max(n.right-o.width,n.left)),r.adjustY&&a.top<n.top&&(a.top=n.top),r.resizeHeight&&a.top>=n.top&&a.top+o.height>n.bottom&&(o.height-=a.top+o.height-n.bottom),r.adjustY&&a.top+o.height>n.bottom&&(a.top=Math.max(n.bottom-o.height,n.top)),jt.mix(a,o)}function u2(e){var t,n,r;if(!jt.isWindow(e)&&e.nodeType!==9)t=jt.offset(e),n=jt.outerWidth(e),r=jt.outerHeight(e);else{var a=jt.getWindow(e);t={left:jt.getWindowScrollLeft(a),top:jt.getWindowScrollTop(a)},n=jt.viewportWidth(a),r=jt.viewportHeight(a)}return t.width=n,t.height=r,t}function SP(e,t){var n=t.charAt(0),r=t.charAt(1),a=e.width,o=e.height,i=e.left,l=e.top;return n==="c"?l+=o/2:n==="b"&&(l+=o),r==="c"?i+=a/2:r==="r"&&(i+=a),{left:i,top:l}}function yh(e,t,n,r,a){var o=SP(t,n[1]),i=SP(e,n[0]),l=[i.left-o.left,i.top-o.top];return{left:Math.round(e.left-l[0]+r[0]-a[0]),top:Math.round(e.top-l[1]+r[1]-a[1])}}function kP(e,t,n){return e.left<n.left||e.left+t.width>n.right}function $P(e,t,n){return e.top<n.top||e.top+t.height>n.bottom}function lke(e,t,n){return e.left>n.right||e.left+t.width<n.left}function ske(e,t,n){return e.top>n.bottom||e.top+t.height<n.top}function bh(e,t,n){var r=[];return jt.each(e,function(a){r.push(a.replace(t,function(o){return n[o]}))}),r}function Ch(e,t){return e[t]=-e[t],e}function OP(e,t){var n;return/%$/.test(e)?n=parseInt(e.substring(0,e.length-1),10)/100*t:n=parseInt(e,10),n||0}function PP(e,t){e[0]=OP(e[0],t.width),e[1]=OP(e[1],t.height)}function CA(e,t,n,r){var a=n.points,o=n.offset||[0,0],i=n.targetOffset||[0,0],l=n.overflow,s=n.source||e;o=[].concat(o),i=[].concat(i),l=l||{};var c={},d=0,f=!!(l&&l.alwaysByViewport),p=s2(s,f),v=u2(s);PP(o,v),PP(i,t);var m=yh(v,t,a,o,i),y=jt.merge(v,m);if(p&&(l.adjustX||l.adjustY)&&r){if(l.adjustX&&kP(m,v,p)){var b=bh(a,/[lr]/gi,{l:"r",r:"l"}),C=Ch(o,0),S=Ch(i,0),w=yh(v,t,b,C,S);lke(w,v,p)||(d=1,a=b,o=C,i=S)}if(l.adjustY&&$P(m,v,p)){var k=bh(a,/[tb]/gi,{t:"b",b:"t"}),$=Ch(o,1),O=Ch(i,1),T=yh(v,t,k,$,O);ske(T,v,p)||(d=1,a=k,o=$,i=O)}d&&(m=yh(v,t,a,o,i),jt.mix(y,m));var _=kP(m,v,p),I=$P(m,v,p);if(_||I){var L=a;_&&(L=bh(a,/[lr]/gi,{l:"r",r:"l"})),I&&(L=bh(a,/[tb]/gi,{t:"b",b:"t"})),a=L,o=n.offset||[0,0],i=n.targetOffset||[0,0]}c.adjustX=l.adjustX&&_,c.adjustY=l.adjustY&&I,(c.adjustX||c.adjustY)&&(y=ike(m,v,p,c))}return y.width!==v.width&&jt.css(s,"width",jt.width(s)+y.width-v.width),y.height!==v.height&&jt.css(s,"height",jt.height(s)+y.height-v.height),jt.offset(s,{left:y.left,top:y.top},{useCssRight:n.useCssRight,useCssBottom:n.useCssBottom,useCssTransform:n.useCssTransform,ignoreShake:n.ignoreShake}),{points:a,offset:o,targetOffset:i,overflow:c}}function uke(e,t){var n=s2(e,t),r=u2(e);return!n||r.left+r.width<=n.left||r.top+r.height<=n.top||r.left>=n.right||r.top>=n.bottom}function c2(e,t,n){var r=n.target||t,a=u2(r),o=!uke(r,n.overflow&&n.overflow.alwaysByViewport);return CA(e,a,n,o)}c2.__getOffsetParent=h1;c2.__getVisibleRectForElement=s2;function cke(e,t,n){var r,a,o=jt.getDocument(e),i=o.defaultView||o.parentWindow,l=jt.getWindowScrollLeft(i),s=jt.getWindowScrollTop(i),c=jt.viewportWidth(i),d=jt.viewportHeight(i);"pageX"in t?r=t.pageX:r=l+t.clientX,"pageY"in t?a=t.pageY:a=s+t.clientY;var f={left:r,top:a,width:0,height:0},p=r>=0&&r<=l+c&&a>=0&&a<=s+d,v=[n.points[0],"cc"];return CA(e,f,pP(pP({},n),{},{points:v}),p)}var dke=function(e){if(!e)return!1;if(e.offsetParent)return!0;if(e.getBBox){var t=e.getBBox();if(t.width||t.height)return!0}if(e.getBoundingClientRect){var n=e.getBoundingClientRect();if(n.width||n.height)return!0}return!1};function fke(e,t){return e===t?!0:!e||!t?!1:"pageX"in t&&"pageY"in t?e.pageX===t.pageX&&e.pageY===t.pageY:"clientX"in t&&"clientY"in t?e.clientX===t.clientX&&e.clientY===t.clientY:!1}function hke(e,t){e!==document.activeElement&&bu(t,e)&&typeof e.focus=="function"&&e.focus()}function TP(e,t){var n=null,r=null;function a(i){var l=fn(i,1),s=l[0].target;if(!!document.documentElement.contains(s)){var c=s.getBoundingClientRect(),d=c.width,f=c.height,p=Math.floor(d),v=Math.floor(f);(n!==p||r!==v)&&Promise.resolve().then(function(){t({width:p,height:v})}),n=p,r=v}}var o=new ng(a);return e&&o.observe(e),function(){o.disconnect()}}var pke=function(e,t){var n=!1,r=null;function a(){window.clearTimeout(r)}function o(i){if(!n||i===!0){if(e()===!1)return;n=!0,a(),r=window.setTimeout(function(){n=!1},t.value)}else a(),r=window.setTimeout(function(){n=!1,o()},t.value)}return[o,function(){n=!1,a()}]},vke={align:Object,target:[Object,Function],onAlign:Function,monitorBufferTime:Number,monitorWindowResize:Boolean,disabled:Boolean};function xP(e){return typeof e!="function"?null:e()}function _P(e){return kt(e)!=="object"||!e?null:e}var EP=G({name:"Align",props:vke,emits:["align"],setup:function(t,n){var r=n.expose,a=n.slots,o=H({}),i=H(),l=x(function(){return{disabled:t.disabled,target:t.target,onAlign:t.onAlign}}),s=pke(function(){var b=l.value,C=b.disabled,S=b.target,w=b.onAlign;if(!C&&S&&i.value&&i.value.$el){var k=i.value.$el,$,O=xP(S),T=_P(S);o.value.element=O,o.value.point=T;var _=document,I=_.activeElement;return O&&dke(O)?$=c2(k,O,t.align):T&&($=cke(k,T,t.align)),hke(I,k),w&&$&&w(k,$),!0}return!1},x(function(){return t.monitorBufferTime})),c=fn(s,2),d=c[0],f=c[1],p=H({cancel:function(){}}),v=H({cancel:function(){}}),m=function(){var C=t.target,S=xP(C),w=_P(C);i.value&&i.value.$el!==v.value.element&&(v.value.cancel(),v.value.element=i.value.$el,v.value.cancel=TP(i.value.$el,d)),(o.value.element!==S||!fke(o.value.point,w))&&(d(),p.value.element!==S&&(p.value.cancel(),p.value.element=S,p.value.cancel=TP(S,d)))};et(function(){m()}),ur(function(){m()}),ce(function(){return t.disabled},function(b){b?f():d()},{flush:"post"});var y=H(null);return ce(function(){return t.monitorWindowResize},function(b){b?y.value||(y.value=Kn(window,"resize",d)):y.value&&(y.value.remove(),y.value=null)},{flush:"post"}),Wr(function(){p.value.cancel(),v.value.cancel(),y.value&&y.value.remove(),f()}),r({forceAlign:function(){return d(!0)}}),function(){var b=a==null?void 0:a.default();return b?Ot(b[0],{ref:i},!0,!0):b&&b[0]}}}),wA={name:"LazyRenderBox",props:{visible:u.looseBool,hiddenClassName:u.string},render:function(){var t=this.$props.hiddenClassName,n=ht(this);return t||n&&n.length>1||n&&n[0]&&n[0].type===Fo?g("div",null,[n]):n&&n[0]}},MP={props:{hiddenClassName:u.string.def(""),prefixCls:u.string,visible:u.looseBool},render:function(){var t=this,n,r,a=this.$props,o=a.prefixCls,i=a.visible,l=a.hiddenClassName;return g("div",{class:i?"":l},[g(wA,{class:"".concat(o,"-content"),visible:i},{default:function(){return[(r=(n=t.$slots).default)===null||r===void 0?void 0:r.call(n)]}})])}},nt={methods:{setState:function(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},n=arguments.length>1?arguments[1]:void 0,r=typeof t=="function"?t(this.$data,this.$props):t;if(this.getDerivedStateFromProps){var a=this.getDerivedStateFromProps(Qe(this),P(P({},this.$data),r));if(a===null)return;r=P(P({},r),a||{})}P(this.$data,r),this._.isMounted&&this.$forceUpdate(),Ne(function(){n&&n()})},__emit:function(){var t=[].slice.call(arguments,0),n=t[0];n="on".concat(n[0].toUpperCase()).concat(n.substring(1));var r=this.$props[n]||this.$attrs[n];if(t.length&&r)if(Array.isArray(r))for(var a=0,o=r.length;a<o;a++)r[a].apply(r,Je(t.slice(1)));else r.apply(void 0,Je(t.slice(1)))}}};function mke(e,t,n){return n?e[0]===t[0]:e[0]===t[0]&&e[1]===t[1]}function gke(e,t,n){var r=e[t]||{};return P(P({},r),n)}function yke(e,t,n,r){var a=n.points;for(var o in e)if(e.hasOwnProperty(o)&&mke(e[o].points,a,r))return"".concat(t,"-placement-").concat(o);return""}function wh(){}function IP(e,t){this[e]=t}globalThis&&globalThis.__rest;var Lo=function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},r=P(t?{appear:!0,appearToClass:"".concat(t,"-appear ").concat(t,"-appear-active"),enterFromClass:"".concat(t,"-enter ").concat(t,"-enter-prepare"),enterToClass:"".concat(t,"-enter ").concat(t,"-enter-active"),leaveFromClass:" ".concat(t,"-leave"),leaveActiveClass:"".concat(t,"-leave ").concat(t,"-leave-active"),leaveToClass:"".concat(t,"-leave ").concat(t,"-leave-active")}:{css:!1},n);return r},d2=function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},r=P(t?{appear:!0,appearActiveClass:"".concat(t),appearToClass:"".concat(t,"-appear ").concat(t,"-appear-active"),enterFromClass:"".concat(t,"-appear ").concat(t,"-enter ").concat(t,"-appear-prepare ").concat(t,"-enter-prepare"),enterActiveClass:"".concat(t),enterToClass:"".concat(t,"-enter ").concat(t,"-appear ").concat(t,"-appear-active ").concat(t,"-enter-active"),leaveActiveClass:"".concat(t," ").concat(t,"-leave"),leaveToClass:"".concat(t,"-leave-active")}:{css:!1},n);return r},no=Vn,f2=e8,NP=function(){return{height:0,opacity:0}},bke=function(t){return{height:"".concat(t.scrollHeight,"px"),opacity:1}},Cke=function(t){return{height:"".concat(t.offsetHeight,"px")}},wke=function(t,n){return{name:"ant-motion-collapse",appear:!0,css:!0,onBeforeEnter:function(a){n.value="ant-motion-collapse",t.value=NP()},onEnter:function(a){Ne(function(){t.value=bke(a)})},onAfterEnter:function(){n.value="",t.value={}},onBeforeLeave:function(a){n.value="ant-motion-collapse",t.value=Cke(a)},onLeave:function(a){window.setTimeout(function(){t.value=NP()})},onAfterLeave:function(){n.value="",t.value={}}}},Cg=no,Ske={name:"VCTriggerPopup",mixins:[nt],inheritAttrs:!1,props:{visible:u.looseBool,getClassNameFromAlign:u.func,getRootDomNode:u.func,align:u.any,destroyPopupOnHide:u.looseBool,prefixCls:u.string,getContainer:u.func,transitionName:u.string,animation:u.any,maskAnimation:u.string,maskTransitionName:u.string,mask:u.looseBool,zIndex:u.number,popupClassName:u.any,popupStyle:u.object.def(function(){return{}}),stretch:u.string,point:u.shape({pageX:u.number,pageY:u.number}).loose},data:function(){return this.domEl=null,this.currentAlignClassName=void 0,this.transitionProps={},this.savePopupRef=IP.bind(this,"popupInstance"),this.saveAlignRef=IP.bind(this,"alignInstance"),{stretchChecked:!1,targetWidth:void 0,targetHeight:void 0}},mounted:function(){var t=this;this.$nextTick(function(){t.rootNode=t.getPopupDomNode(),t.setStretchSize()})},updated:function(){var t=this;this.$nextTick(function(){t.setStretchSize()})},methods:{onAlign:function(t,n){var r=this.$props,a=r.getClassNameFromAlign(n);this.currentAlignClassName!==a&&(this.currentAlignClassName=a,t.className=this.getClassName(a,t.className));var o=this.$attrs.onaAlign;o&&o(t,n)},setStretchSize:function(){var t=this.$props,n=t.stretch,r=t.getRootDomNode,a=t.visible,o=this.$data,i=o.stretchChecked,l=o.targetHeight,s=o.targetWidth;if(!n||!a){i&&this.setState({stretchChecked:!1});return}var c=r();if(!!c){var d=c.offsetHeight,f=c.offsetWidth;(l!==d||s!==f||!i)&&this.setState({stretchChecked:!0,targetHeight:d,targetWidth:f})}},getPopupDomNode:function(){return Sn(this.popupInstance)},getTargetElement:function(){return this.$props.getRootDomNode()},getAlignTarget:function(){var t=this.$props.point;return t||this.getTargetElement},getMaskTransitionName:function(){var t=this.$props,n=t.maskTransitionName,r=t.maskAnimation;return!n&&r&&(n="".concat(t.prefixCls,"-").concat(r)),n},getTransitionName:function(){var t=this.$props,n=t.transitionName,r=t.animation;return n||(typeof r=="string"?n="".concat(r):r&&r.props&&r.props.name&&(n=r.props.name)),n},getClassName:function(t){var n=this,r=arguments.length>1&&arguments[1]!==void 0?arguments[1]:"",a=[];this.transitionProps&&Object.keys(this.transitionProps).forEach(function(i){typeof n.transitionProps[i]=="string"&&a.push.apply(a,Je(n.transitionProps[i].split(" ")))});var o=r.split(" ").filter(function(i){return a.indexOf(i)!==-1}).join(" ");return"".concat(this.$props.prefixCls," ").concat(this.$attrs.class||""," ").concat(this.$props.popupClassName," ").concat(t," ").concat(o)},getPopupElement:function(){var t=this,n,r,a=this.savePopupRef,o=this.$props,i=this.$attrs,l=this.$slots,s=this.getTransitionName,c=this.$data,d=c.stretchChecked,f=c.targetHeight,p=c.targetWidth,v=i.style,m=v===void 0?{}:v,y=If(i).onEvents,b=o.align,C=o.visible,S=o.prefixCls,w=o.animation,k=o.popupStyle,$=o.getClassNameFromAlign,O=o.destroyPopupOnHide,T=o.stretch,_=this.getClassName(this.currentAlignClassName||$(b));C||(this.currentAlignClassName=null);var I={};T&&(T.indexOf("height")!==-1?I.height=typeof f=="number"?"".concat(f,"px"):f:T.indexOf("minHeight")!==-1&&(I.minHeight=typeof f=="number"?"".concat(f,"px"):f),T.indexOf("width")!==-1?I.width=typeof p=="number"?"".concat(p,"px"):p:T.indexOf("minWidth")!==-1&&(I.minWidth=typeof p=="number"?"".concat(p,"px"):p),d||setTimeout(function(){t.alignInstance&&t.alignInstance.forceAlign()},0));var L=P(P({prefixCls:S,visible:C,class:_},y),{ref:a,style:P(P(P(P({},I),k),m),this.getZIndexStyle())}),j=s(),F=!!j,N=Lo(j);return kt(w)==="object"&&(F=!0,N=P(P({},N),w)),F||(N={}),this.transitionProps=N,O?g(no,N,{default:function(){return[C?g(EP,{target:t.getAlignTarget(),key:"popup",ref:t.saveAlignRef,monitorWindowResize:!0,align:b,onAlign:t.onAlign},{default:function(){return[g(MP,L,{default:function(){return[(n=l.default)===null||n===void 0?void 0:n.call(l)]}})]}}):null]}}):g(no,N,{default:function(){return[at(g(EP,{target:t.getAlignTarget(),key:"popup",ref:t.saveAlignRef,monitorWindowResize:!0,disabled:!C,align:b,onAlign:t.onAlign},{default:function(){return[g(MP,L,{default:function(){return[(r=l.default)===null||r===void 0?void 0:r.call(l)]}})]}}),[[_t,C]])]}})},getZIndexStyle:function(){var t={},n=this.$props;return n.zIndex!==void 0&&(t.zIndex=n.zIndex),t},getMaskElement:function(){var t=this.$props,n=null;if(t.mask){var r=this.getMaskTransitionName();if(n=at(g(wA,{style:this.getZIndexStyle(),key:"mask",class:"".concat(t.prefixCls,"-mask"),visible:t.visible},null),[[_t,t.visible]]),r){var a=function(){return n}();n=g(no,{appear:!0,name:r},{default:function(){return[a]}})}}return n}},render:function(){var t=this.getMaskElement,n=this.getPopupElement;return g("div",null,[t(),n()])}},SA=G({name:"Portal",props:{getContainer:u.func.isRequired,children:u.any.isRequired,didUpdate:u.func},data:function(){return this._container=null,{}},mounted:function(){this.createContainer()},updated:function(){var t=this,n=this.$props.didUpdate;n&&Ne(function(){n(t.$props)})},beforeUnmount:function(){this.removeContainer()},methods:{createContainer:function(){this._container=this.$props.getContainer(),this.$forceUpdate()},removeContainer:function(){this._container&&this._container.parentNode&&this._container.parentNode.removeChild(this._container)}},render:function(){var t=this;return this._container?g(Ps,{to:this._container},{default:function(){return[t.$props.children]}}):null}});function kke(){return""}function $ke(){return window.document}var Oke=["onClick","onMousedown","onTouchstart","onMouseenter","onMouseleave","onFocus","onBlur","onContextmenu"],Ii=G({name:"Trigger",mixins:[nt],inheritAttrs:!1,props:{action:u.oneOfType([u.string,u.arrayOf(u.string)]).def([]),showAction:u.any.def([]),hideAction:u.any.def([]),getPopupClassNameFromAlign:u.any.def(kke),onPopupVisibleChange:u.func.def(wh),afterPopupVisibleChange:u.func.def(wh),popup:u.any,popupStyle:u.object.def(function(){return{}}),prefixCls:u.string.def("rc-trigger-popup"),popupClassName:u.string.def(""),popupPlacement:u.string,builtinPlacements:u.object,popupTransitionName:u.oneOfType([u.string,u.object]),popupAnimation:u.any,mouseEnterDelay:u.number.def(0),mouseLeaveDelay:u.number.def(.1),zIndex:u.number,focusDelay:u.number.def(0),blurDelay:u.number.def(.15),getPopupContainer:u.func,getDocument:u.func.def($ke),forceRender:u.looseBool,destroyPopupOnHide:u.looseBool.def(!1),mask:u.looseBool.def(!1),maskClosable:u.looseBool.def(!0),popupAlign:u.object.def(function(){return{}}),popupVisible:u.looseBool,defaultPopupVisible:u.looseBool.def(!1),maskTransitionName:u.oneOfType([u.string,u.object]),maskAnimation:u.string,stretch:u.string,alignPoint:u.looseBool},setup:function(){return{vcTriggerContext:ve("vcTriggerContext",{}),savePopupRef:ve("savePopupRef",wh),dialogContext:ve("dialogContext",null)}},data:function(){var t=this,n=this.$props,r;return vt(this,"popupVisible")?r=!!n.popupVisible:r=!!n.defaultPopupVisible,Oke.forEach(function(a){t["fire".concat(a)]=function(o){t.fireEvents(a,o)}}),this._component=null,this.focusTime=null,this.clickOutsideHandler=null,this.contextmenuOutsideHandler1=null,this.contextmenuOutsideHandler2=null,this.touchOutsideHandler=null,{prevPopupVisible:r,sPopupVisible:r,point:null}},watch:{popupVisible:function(t){t!==void 0&&(this.prevPopupVisible=this.sPopupVisible,this.sPopupVisible=t)}},created:function(){ot("vcTriggerContext",this)},deactivated:function(){this.setPopupVisible(!1)},mounted:function(){var t=this;this.$nextTick(function(){t.updatedCal()})},updated:function(){var t=this;this.$nextTick(function(){t.updatedCal()})},beforeUnmount:function(){this.clearDelayTimer(),this.clearOutsideHandler(),clearTimeout(this.mouseDownTimeout)},methods:{updatedCal:function(){var t=this.$props,n=this.$data;if(n.sPopupVisible){var r;!this.clickOutsideHandler&&(this.isClickToHide()||this.isContextmenuToShow())&&(r=t.getDocument(),this.clickOutsideHandler=Kn(r,"mousedown",this.onDocumentClick)),this.touchOutsideHandler||(r=r||t.getDocument(),this.touchOutsideHandler=Kn(r,"touchstart",this.onDocumentClick,mn?{passive:!1}:!1)),!this.contextmenuOutsideHandler1&&this.isContextmenuToShow()&&(r=r||t.getDocument(),this.contextmenuOutsideHandler1=Kn(r,"scroll",this.onContextmenuClose)),!this.contextmenuOutsideHandler2&&this.isContextmenuToShow()&&(this.contextmenuOutsideHandler2=Kn(window,"blur",this.onContextmenuClose))}else this.clearOutsideHandler()},onMouseenter:function(t){var n=this.$props.mouseEnterDelay;this.fireEvents("onMouseenter",t),this.delaySetPopupVisible(!0,n,n?null:t)},onMouseMove:function(t){this.fireEvents("onMousemove",t),this.setPoint(t)},onMouseleave:function(t){this.fireEvents("onMouseleave",t),this.delaySetPopupVisible(!1,this.$props.mouseLeaveDelay)},onPopupMouseenter:function(){this.clearDelayTimer()},onPopupMouseleave:function(t){t&&t.relatedTarget&&!t.relatedTarget.setTimeout&&this._component&&this._component.getPopupDomNode&&bu(this._component.getPopupDomNode(),t.relatedTarget)||this.delaySetPopupVisible(!1,this.$props.mouseLeaveDelay)},onFocus:function(t){this.fireEvents("onFocus",t),this.clearDelayTimer(),this.isFocusToShow()&&(this.focusTime=Date.now(),this.delaySetPopupVisible(!0,this.$props.focusDelay))},onMousedown:function(t){this.fireEvents("onMousedown",t),this.preClickTime=Date.now()},onTouchstart:function(t){this.fireEvents("onTouchstart",t),this.preTouchTime=Date.now()},onBlur:function(t){bu(t.target,t.relatedTarget||document.activeElement)||(this.fireEvents("onBlur",t),this.clearDelayTimer(),this.isBlurToHide()&&this.delaySetPopupVisible(!1,this.$props.blurDelay))},onContextmenu:function(t){t.preventDefault(),this.fireEvents("onContextmenu",t),this.setPopupVisible(!0,t)},onContextmenuClose:function(){this.isContextmenuToShow()&&this.close()},onClick:function(t){if(this.fireEvents("onClick",t),this.focusTime){var n;if(this.preClickTime&&this.preTouchTime?n=Math.min(this.preClickTime,this.preTouchTime):this.preClickTime?n=this.preClickTime:this.preTouchTime&&(n=this.preTouchTime),Math.abs(n-this.focusTime)<20)return;this.focusTime=0}this.preClickTime=0,this.preTouchTime=0,this.isClickToShow()&&(this.isClickToHide()||this.isBlurToHide())&&t&&t.preventDefault&&t.preventDefault(),t&&t.domEvent&&t.domEvent.preventDefault();var r=!this.$data.sPopupVisible;(this.isClickToHide()&&!r||r&&this.isClickToShow())&&this.setPopupVisible(!this.$data.sPopupVisible,t)},onPopupMouseDown:function(){var t=this,n=this.vcTriggerContext,r=n===void 0?{}:n;this.hasPopupMouseDown=!0,clearTimeout(this.mouseDownTimeout),this.mouseDownTimeout=setTimeout(function(){t.hasPopupMouseDown=!1},0),r.onPopupMouseDown&&r.onPopupMouseDown.apply(r,arguments)},onDocumentClick:function(t){if(!(this.$props.mask&&!this.$props.maskClosable)){var n=t.target,r=Sn(this);!bu(r,n)&&!this.hasPopupMouseDown&&this.close()}},getPopupDomNode:function(){return this._component&&this._component.getPopupDomNode?this._component.getPopupDomNode():null},getRootDomNode:function(){return Sn(this)},handleGetPopupClassFromAlign:function(t){var n=[],r=this.$props,a=r.popupPlacement,o=r.builtinPlacements,i=r.prefixCls,l=r.alignPoint,s=r.getPopupClassNameFromAlign;return a&&o&&n.push(yke(o,i,t,l)),s&&n.push(s(t)),n.join(" ")},getPopupAlign:function(){var t=this.$props,n=t.popupPlacement,r=t.popupAlign,a=t.builtinPlacements;return n&&a?gke(a,n,r):r},savePopup:function(t){this._component=t,this.savePopupRef(t)},getComponent:function(){var t=this,n={};this.isMouseEnterToShow()&&(n.onMouseenter=t.onPopupMouseenter),this.isMouseLeaveToHide()&&(n.onMouseleave=t.onPopupMouseleave),n.onMousedown=this.onPopupMouseDown,n[mn?"onTouchstartPassive":"onTouchstart"]=this.onPopupMouseDown;var r=t.handleGetPopupClassFromAlign,a=t.getRootDomNode,o=t.getContainer,i=t.$attrs,l=t.$props,s=l.prefixCls,c=l.destroyPopupOnHide,d=l.popupClassName,f=l.action,p=l.popupAnimation,v=l.popupTransitionName,m=l.popupStyle,y=l.mask,b=l.maskAnimation,C=l.maskTransitionName,S=l.zIndex,w=l.stretch,k=l.alignPoint,$=this.$data,O=$.sPopupVisible,T=$.point,_=this.getPopupAlign(),I=P(P({prefixCls:s,destroyPopupOnHide:c,visible:O,point:k?T:null,action:f,align:_,animation:p,getClassNameFromAlign:r,stretch:w,getRootDomNode:a,mask:y,zIndex:S,transitionName:v,maskAnimation:b,maskTransitionName:C,getContainer:o,popupClassName:d,popupStyle:m,onAlign:i.onPopupAlign||wh},n),{ref:this.savePopup});return g(Ske,I,{default:function(){return[We(t,"popup")]}})},getContainer:function(){var t=this.$props,n=this.dialogContext,r=document.createElement("div");r.style.position="absolute",r.style.top="0",r.style.left="0",r.style.width="100%";var a=t.getPopupContainer?t.getPopupContainer(Sn(this),n):t.getDocument().body;return a.appendChild(r),this.popupContainer=r,r},setPopupVisible:function(t,n){var r=this.alignPoint,a=this.sPopupVisible,o=this.onPopupVisibleChange;this.clearDelayTimer(),a!==t&&(vt(this,"popupVisible")||this.setState({sPopupVisible:t,prevPopupVisible:a}),o&&o(t)),r&&n&&this.setPoint(n)},setPoint:function(t){var n=this.$props.alignPoint;!n||!t||this.setState({point:{pageX:t.pageX,pageY:t.pageY}})},handlePortalUpdate:function(){this.prevPopupVisible!==this.sPopupVisible&&this.afterPopupVisibleChange(this.sPopupVisible)},delaySetPopupVisible:function(t,n,r){var a=this,o=n*1e3;if(this.clearDelayTimer(),o){var i=r?{pageX:r.pageX,pageY:r.pageY}:null;this.delayTimer=Fv(function(){a.setPopupVisible(t,i),a.clearDelayTimer()},o)}else this.setPopupVisible(t,r)},clearDelayTimer:function(){this.delayTimer&&(o2(this.delayTimer),this.delayTimer=null)},clearOutsideHandler:function(){this.clickOutsideHandler&&(this.clickOutsideHandler.remove(),this.clickOutsideHandler=null),this.contextmenuOutsideHandler1&&(this.contextmenuOutsideHandler1.remove(),this.contextmenuOutsideHandler1=null),this.contextmenuOutsideHandler2&&(this.contextmenuOutsideHandler2.remove(),this.contextmenuOutsideHandler2=null),this.touchOutsideHandler&&(this.touchOutsideHandler.remove(),this.touchOutsideHandler=null)},createTwoChains:function(t){var n=function(){},r=nf(this);return this.childOriginEvents[t]&&r[t]?this["fire".concat(t)]:(n=this.childOriginEvents[t]||r[t]||n,n)},isClickToShow:function(){var t=this.$props,n=t.action,r=t.showAction;return n.indexOf("click")!==-1||r.indexOf("click")!==-1},isContextmenuToShow:function(){var t=this.$props,n=t.action,r=t.showAction;return n.indexOf("contextmenu")!==-1||r.indexOf("contextmenu")!==-1},isClickToHide:function(){var t=this.$props,n=t.action,r=t.hideAction;return n.indexOf("click")!==-1||r.indexOf("click")!==-1},isMouseEnterToShow:function(){var t=this.$props,n=t.action,r=t.showAction;return n.indexOf("hover")!==-1||r.indexOf("mouseenter")!==-1},isMouseLeaveToHide:function(){var t=this.$props,n=t.action,r=t.hideAction;return n.indexOf("hover")!==-1||r.indexOf("mouseleave")!==-1},isFocusToShow:function(){var t=this.$props,n=t.action,r=t.showAction;return n.indexOf("focus")!==-1||r.indexOf("focus")!==-1},isBlurToHide:function(){var t=this.$props,n=t.action,r=t.hideAction;return n.indexOf("focus")!==-1||r.indexOf("blur")!==-1},forcePopupAlign:function(){this.$data.sPopupVisible&&this._component&&this._component.alignInstance&&this._component.alignInstance.forceAlign()},fireEvents:function(t,n){this.childOriginEvents[t]&&this.childOriginEvents[t](n);var r=this.$props[t]||this.$attrs[t];r&&r(n)},close:function(){this.setPopupVisible(!1)}},render:function(){var t=this,n=this.sPopupVisible,r=this.$attrs,a=La(ht(this)),o=this.$props,i=o.forceRender,l=o.alignPoint;a.length>1&&on(!1,"Trigger children just support only one default",!0);var s=a[0];this.childOriginEvents=nf(s);var c={key:"trigger"};this.isContextmenuToShow()?c.onContextmenu=this.onContextmenu:c.onContextmenu=this.createTwoChains("onContextmenu"),this.isClickToHide()||this.isClickToShow()?(c.onClick=this.onClick,c.onMousedown=this.onMousedown,c[mn?"onTouchstartPassive":"onTouchstart"]=this.onTouchstart):(c.onClick=this.createTwoChains("onClick"),c.onMousedown=this.createTwoChains("onMousedown"),c[mn?"onTouchstartPassive":"onTouchstart"]=this.createTwoChains("onTouchstart")),this.isMouseEnterToShow()?(c.onMouseenter=this.onMouseenter,l&&(c.onMousemove=this.onMouseMove)):c.onMouseenter=this.createTwoChains("onMouseenter"),this.isMouseLeaveToHide()?c.onMouseleave=this.onMouseleave:c.onMouseleave=this.createTwoChains("onMouseleave"),this.isFocusToShow()||this.isBlurToHide()?(c.onFocus=this.onFocus,c.onBlur=this.onBlur):(c.onFocus=this.createTwoChains("onFocus"),c.onBlur=function(v){v&&(!v.relatedTarget||!bu(v.target,v.relatedTarget))&&t.createTwoChains("onBlur")(v)});var d=Se(s&&s.props&&s.props.class,r.class);d&&(c.class=d);var f=Ot(s,c),p;return(n||this._component||i)&&(p=g(SA,{key:"portal",children:this.getComponent(),getContainer:this.getContainer,didUpdate:this.handlePortalUpdate},null)),[p,f]}}),Pke=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},Tke=function(t){var n=typeof t!="number"?0:1;return{bottomLeft:{points:["tl","bl"],offset:[0,4],overflow:{adjustX:n,adjustY:1}},bottomRight:{points:["tr","br"],offset:[0,4],overflow:{adjustX:n,adjustY:1}},topLeft:{points:["bl","tl"],offset:[0,-4],overflow:{adjustX:n,adjustY:1}},topRight:{points:["br","tr"],offset:[0,-4],overflow:{adjustX:n,adjustY:1}}}},kA=G({name:"SelectTrigger",inheritAttrs:!1,created:function(){this.popupRef=ic()},methods:{getPopupElement:function(){return this.popupRef.current}},render:function(){var t=this,n=P(P({},this.$props),this.$attrs),r=n.empty,a=r===void 0?!1:r,o=Pke(n,["empty"]),i=o.visible,l=o.dropdownAlign,s=o.prefixCls,c=o.popupElement,d=o.dropdownClassName,f=o.dropdownStyle,p=o.dropdownMatchSelectWidth,v=o.containerWidth,m=o.dropdownRender,y=o.animation,b=o.transitionName,C=o.direction,S=o.getPopupContainer,w="".concat(s,"-dropdown"),k=c;m&&(k=m({menuNode:c,props:o}));var $=Tke(p),O=y?"".concat(w,"-").concat(y):b,T=P({minWidth:"".concat(v,"px")},f);return typeof p=="number"?T.width="".concat(p,"px"):p&&(T.width="".concat(v,"px")),g(Ii,le(le({},o),{},{showAction:[],hideAction:[],popupPlacement:C==="rtl"?"bottomRight":"bottomLeft",builtinPlacements:$,prefixCls:w,popupTransitionName:O,popup:g("div",{ref:this.popupRef},[k]),popupAlign:l,popupVisible:i,getPopupContainer:S,popupClassName:Se(d,V({},"".concat(w,"-empty"),a)),popupStyle:T}),{default:function(){return[ht(t)[0]]}})}});kA.props={dropdownAlign:u.object,visible:u.looseBool,disabled:u.looseBool,dropdownClassName:u.string,dropdownStyle:u.object,empty:u.looseBool,prefixCls:u.string,popupClassName:u.string,animation:u.string,transitionName:u.string,getPopupContainer:u.func,dropdownRender:u.func,containerWidth:u.number,dropdownMatchSelectWidth:u.oneOfType([Number,Boolean]).def(!0),popupElement:u.any,direction:u.string,getTriggerDOMNode:u.func};var xke=kA,_ke="RC_SELECT_INTERNAL_PROPS_MARK";function Eke(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:10,t=H(!1),n,r=function(){window.clearTimeout(n)};et(function(){r()});var a=function(i,l){r(),n=window.setTimeout(function(){t.value=i,l&&l()},e)};return[t,a,r]}function Mke(e,t,n){function r(a){var o,i,l,s=a.target;s.shadowRoot&&a.composed&&(s=a.composedPath()[0]||s);var c=[(o=e[0])===null||o===void 0?void 0:o.value,(l=(i=e[1])===null||i===void 0?void 0:i.value)===null||l===void 0?void 0:l.getPopupElement()];t.value&&c.every(function(d){return d&&!d.contains(s)&&d!==s})&&n(!1)}et(function(){window.addEventListener("mousedown",r)}),Lt(function(){window.removeEventListener("mousedown",r)})}function Ike(e){var t=Je(e.value),n=x(function(){var r=new Map;t.forEach(function(o){var i=o.value,l=o.label;i!==l&&r.set(i,l)});var a=e.value.map(function(o){var i=r.get(o.value);return o.isCacheable&&i?P(P({},o),{label:i}):o});return t=a,a});return n}function Nke(e){var t=x(function(){var r=new Map;return e.value.forEach(function(a){var o=a.data.value;r.set(o,a)}),r}),n=function(a){return a.map(function(o){return t.value.get(o)}).filter(Boolean)};return n}var Ake=function(t,n){var r=P({},t);return Object.keys(n).forEach(function(a){var o=r[a];if(o)o.default=n[a];else throw new Error("not have ".concat(a," prop"))}),r},Rn=Ake,Dke=function(){if(typeof navigator=="undefined"||typeof window=="undefined")return!1;var e=navigator.userAgent||navigator.vendor||window.opera;return!!(/(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows ce|xda|xiino|android|ipad|playbook|silk/i.test(e)||/1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw-(n|u)|c55\/|capi|ccwa|cdm-|cell|chtm|cldc|cmd-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc-s|devi|dica|dmob|do(c|p)o|ds(12|-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(-|_)|g1 u|g560|gene|gf-5|g-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd-(m|p|t)|hei-|hi(pt|ta)|hp( i|ip)|hs-c|ht(c(-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i-(20|go|ma)|i230|iac( |-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|-[a-w])|libw|lynx|m1-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|-([1-8]|c))|phil|pire|pl(ay|uc)|pn-2|po(ck|rt|se)|prox|psio|pt-g|qa-a|qc(07|12|21|32|60|-[2-7]|i-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h-|oo|p-)|sdk\/|se(c(-|0|1)|47|mc|nd|ri)|sgh-|shar|sie(-|m)|sk-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h-|v-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl-|tdg-|tel(i|m)|tim-|t-mo|to(pl|sh)|ts(70|m-|m3|m5)|tx-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas-|your|zeto|zte-/i.test(e==null?void 0:e.substr(0,4)))},Rke=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},Lke=["children","removeIcon","placeholder","autofocus","maxTagCount","maxTagTextLength","maxTagPlaceholder","choiceTransitionName","onInputKeyDown","tabindex"],$A=function(){return{prefixCls:u.string,id:u.string,class:u.string,style:u.any,options:u.array,mode:u.string,value:u.any,defaultValue:u.any,labelInValue:u.looseBool,inputValue:u.string,searchValue:u.string,optionFilterProp:u.string,filterOption:u.any,filterSort:u.func,showSearch:u.looseBool,autoClearSearchValue:u.looseBool,onSearch:u.func,onClear:u.func,allowClear:u.looseBool,clearIcon:u.VNodeChild,showArrow:u.looseBool,inputIcon:u.VNodeChild,removeIcon:u.VNodeChild,menuItemSelectedIcon:u.VNodeChild,open:u.looseBool,defaultOpen:u.looseBool,listHeight:u.number,listItemHeight:u.number,dropdownStyle:u.object,dropdownClassName:u.string,dropdownMatchSelectWidth:an(u.oneOfType([Boolean,Number])),virtual:u.looseBool,dropdownRender:u.func,dropdownAlign:u.any,animation:u.string,transitionName:u.string,getPopupContainer:u.func,direction:u.string,disabled:u.looseBool,loading:u.looseBool,autofocus:u.looseBool,defaultActiveFirstOption:u.looseBool,notFoundContent:u.VNodeChild,placeholder:u.VNodeChild,backfill:u.looseBool,getInputElement:u.func,optionLabelProp:u.string,maxTagTextLength:u.number,maxTagCount:u.oneOfType([u.number,u.string]),maxTagPlaceholder:u.any,tokenSeparators:u.arrayOf(u.string),tagRender:u.func,showAction:u.array,tabindex:u.oneOfType([u.number,u.string]),onKeyup:u.func,onKeydown:u.func,onPopupScroll:u.func,onDropdownVisibleChange:u.func,onSelect:u.func,onDeselect:u.func,onInputKeyDown:{type:Function},onClick:u.func,onChange:u.func,onBlur:u.func,onFocus:u.func,onMousedown:u.func,onMouseenter:u.func,onMouseleave:u.func,choiceTransitionName:u.string,internalProps:u.object,children:u.array}};function Fke(e){var t=e.prefixCls,n=e.components.optionList,r=e.convertChildrenToData,a=e.flattenOptions,o=e.getLabeledValue,i=e.filterOptions,l=e.isValueDisabled,s=e.findValueOption;e.warningProps;var c=e.fillOptionsWithMissingValue,d=e.omitDOMProps,f=G({name:"Select",slots:["option"],props:Rn($A(),{}),setup:function(v){var m,y=x(function(){return v.internalProps&&v.internalProps.mark===_ke});on(v.optionFilterProp!=="children","Select","optionFilterProp not support children, please use label instead");var b=H(null),C=H(null),S=H(null),w=H(null),k=x(function(){return(v.tokenSeparators||[]).some(function(it){return[`
`,`\r
`].includes(it)})}),$=Eke(),O=fn($,3),T=O[0],_=O[1],I=O[2],L=x(function(){return v.id||"rc_select_".concat(dSe())}),j=x(function(){var it=v.optionLabelProp;return it===void 0&&(it=v.options?"label":"children"),it}),F=x(function(){return v.mode==="combobox"?!1:v.labelInValue}),N=x(function(){return v.mode==="tags"||v.mode==="multiple"}),D=x(function(){return v.showSearch!==void 0?v.showSearch:N.value||v.mode==="combobox"}),z=H(!1);et(function(){z.value=Dke()});var B=ic(),M=H(""),E=function(ct){M.value=ct},K=H(v.value!==void 0?v.value:v.defaultValue);ce(function(){return v.value},function(){K.value=v.value,M.value=""});var W=x(function(){return iSe(K.value,{labelInValue:F.value,combobox:v.mode==="combobox"})}),Y=x(function(){return W.value[0]}),q=x(function(){return W.value[1]}),J=x(function(){return new Set(Y.value)}),ne=H(null),oe=function(ct){ne.value=ct},Q=x(function(){var it=M.value;return v.mode==="combobox"&&K.value!==void 0?it=K.value:v.searchValue!==void 0?it=v.searchValue:v.inputValue&&(it=v.inputValue),it}),ae=x(function(){var it=v.options;return it===void 0&&(it=r(v.children)),v.mode==="tags"&&c&&(it=c(it,K.value,j.value,v.labelInValue)),it||[]}),de=x(function(){return a(ae.value,v)}),be=Nke(de),Ee=x(function(){if(!Q.value||!D.value)return Je(ae.value);var it=v.optionFilterProp,ct=it===void 0?"value":it,Nt=v.mode,sn=v.filterOption,pn=i(Q.value,ae.value,{optionFilterProp:ct,filterOption:Nt==="combobox"&&sn===void 0?function(){return!0}:sn});return Nt==="tags"&&pn.every(function($n){return $n[ct]!==Q.value})&&pn.unshift({value:Q.value,label:Q.value,key:"__RC_SELECT_TAG_PLACEHOLDER__"}),v.filterSort&&Array.isArray(pn)?Je(pn).sort(v.filterSort):pn}),Pe=x(function(){return a(Ee.value,v)});et(function(){ce(Q,function(){w.value&&w.value.scrollTo&&w.value.scrollTo(0)},{flush:"post",immediate:!0})});var Be=x(function(){var it=Y.value.map(function(ct){var Nt=be([ct]),sn=o(ct,{options:Nt,prevValueMap:q.value,labelInValue:F.value,optionLabelProp:j.value});return P(P({},sn),{disabled:l(ct,Nt)})});return!v.mode&&it.length===1&&it[0].value===null&&it[0].label===null?[]:it});Be=Ike(Be);var te=function(ct,Nt,sn){var pn=be([ct]),$n=s([ct],pn)[0],za=v.internalProps,Xn=za===void 0?{}:za;if(!Xn.skipTriggerSelect){var yo=F.value?o(ct,{options:pn,prevValueMap:q.value,labelInValue:F.value,optionLabelProp:j.value}):ct;Nt&&v.onSelect?v.onSelect(yo,$n):!Nt&&v.onDeselect&&v.onDeselect(yo,$n)}y.value&&(Nt&&Xn.onRawSelect?Xn.onRawSelect(ct,$n,sn):!Nt&&Xn.onRawDeselect&&Xn.onRawDeselect(ct,$n,sn))},ie=H([]),ge=function(ct){ie.value=ct},ke=function(ct){if(!(y.value&&v.internalProps&&v.internalProps.skipTriggerChange)){var Nt=be(ct),sn=lSe(Array.from(ct),{labelInValue:F.value,options:Nt,getLabeledValue:o,prevValueMap:q.value,optionLabelProp:j.value}),pn=N.value?sn:sn[0];if(v.onChange&&(Y.value.length!==0||sn.length!==0)){var $n=s(ct,Nt,{prevValueOptions:ie.value});ge($n.map(function(za,Xn){var yo=P({},za);return Object.defineProperty(yo,"_INTERNAL_OPTION_VALUE_",{get:function(){return ct[Xn]}}),yo})),v.onChange(pn,N.value?$n:$n[0])}K.value=pn}},xe=function(ct,Nt){var sn=Nt.selected,pn=Nt.source,$n=v.autoClearSearchValue,za=$n===void 0?!0:$n;if(!v.disabled){var Xn;N.value?(Xn=new Set(Y.value),sn?Xn.add(ct):Xn.delete(ct)):(Xn=new Set,Xn.add(ct)),(N.value||!N.value&&Array.from(Y.value)[0]!==ct)&&ke(Array.from(Xn)),te(ct,!N.value||sn,pn),v.mode==="combobox"?(E(String(ct)),oe("")):(!N.value||za)&&(E(""),oe(""))}},Ie=function(ct,Nt){xe(ct,P(P({},Nt),{source:"option"}))},ye=function(ct,Nt){xe(ct,P(P({},Nt),{source:"selection"}))},pe=v.open!==void 0?v.open:v.defaultOpen,ue=H(pe),Ce=H(pe),je=function(ct){ue.value=v.open!==void 0?v.open:ct,Ce.value=ue.value};ce(function(){return v.open},function(){je(v.open)});var ee=x(function(){return!v.notFoundContent&&!Ee.value.length});Wn(function(){Ce.value=ue.value,(v.disabled||ee.value&&Ce.value&&v.mode==="combobox")&&(Ce.value=!1)});var me=x(function(){return ee.value?!1:Ce.value}),He=function(ct){var Nt=ct!==void 0?ct:!Ce.value;ue.value!==Nt&&!v.disabled&&(je(Nt),v.onDropdownVisibleChange&&v.onDropdownVisibleChange(Nt))};Mke([b,C],me,He);var lt=function(ct,Nt,sn){var pn=!0,$n=ct,za=Q.value;oe(null);var Xn=sn?null:mSe(ct,v.tokenSeparators),yo=Xn;if(v.mode==="combobox")Nt&&ke([$n]);else if(Xn){$n="",v.mode!=="tags"&&(yo=Xn.map(function(Ac){var Ck=de.value.find(function(p9){var v9=p9.data;return v9[j.value]===Ac});return Ck?Ck.data.value:null}).filter(function(Ac){return Ac!==null}));var Uo=Array.from(new Set([].concat(Je(Y.value),Je(yo))));ke(Uo),Uo.forEach(function(Ac){te(Ac,!0,"input")}),He(!1),pn=!1}return E($n),v.onSearch&&za!==$n&&v.onSearch($n),pn},Ye=function(ct){if(!(!ct||!ct.trim())){var Nt=Array.from(new Set([].concat(Je(Y.value),[ct])));ke(Nt),Nt.forEach(function(sn){te(sn,!0,"input")}),E("")}};ce(function(){return v.disabled},function(){ue.value&&!!v.disabled&&je(!1)},{immediate:!0}),ce(Ce,function(){!Ce.value&&!N.value&&v.mode!=="combobox"&&lt("",!1,!1)},{immediate:!0});var he=dA(),_e=fn(he,2),$e=_e[0],Ve=_e[1],st=function(ct){var Nt=$e(),sn=ct.which;if(sn===ze.ENTER&&(v.mode!=="combobox"&&ct.preventDefault(),Ce.value||He(!0)),Ve(!!Q.value),sn===ze.BACKSPACE&&!Nt&&N.value&&!Q.value&&Y.value.length){var pn=sSe(Be.value,Y.value);pn.removedValue!==null&&(ke(pn.values),te(pn.removedValue,!1,"input"))}Ce.value&&w.value&&w.value.onKeydown(ct),v.onKeydown&&v.onKeydown(ct)},Dt=function(ct){Ce.value&&w.value&&w.value.onKeyup(ct),v.onKeyup&&v.onKeyup(ct)},Ut=H(!1),Ft=function(){_(!0),v.disabled||(v.onFocus&&!Ut.value&&v.onFocus(arguments.length<=0?void 0:arguments[0]),v.showAction&&v.showAction.includes("focus")&&He(!0)),Ut.value=!0},Jt=function(){if(_(!1,function(){Ut.value=!1,He(!1)}),!v.disabled){var ct=Q.value;ct&&(v.mode==="tags"?(lt("",!1,!1),ke(Array.from(new Set([].concat(Je(Y.value),[ct]))))):v.mode==="multiple"&&E("")),v.onBlur&&v.onBlur(arguments.length<=0?void 0:arguments[0])}};ot("VCSelectContainerEvent",{focus:Ft,blur:Jt});var Mn=[];et(function(){Mn.forEach(function(it){return window.clearTimeout(it)}),Mn.splice(0,Mn.length)}),Lt(function(){Mn.forEach(function(it){return window.clearTimeout(it)}),Mn.splice(0,Mn.length)});var tr=function(ct){var Nt=ct.target,sn=C.value&&C.value.getPopupElement();if(sn&&sn.contains(Nt)){var pn=window.setTimeout(function(){var $n=Mn.indexOf(pn);$n!==-1&&Mn.splice($n,1),I(),!z.value&&!sn.contains(document.activeElement)&&S.value.focus()});Mn.push(pn)}v.onMousedown&&v.onMousedown(ct)},nr=H(0),Va=x(function(){return v.defaultActiveFirstOption!==void 0?v.defaultActiveFirstOption:v.mode!=="combobox"}),Wo=function(ct,Nt){var sn=arguments.length>2&&arguments[2]!==void 0?arguments[2]:{},pn=sn.source,$n=pn===void 0?"keyboard":pn;nr.value=Nt,v.backfill&&v.mode==="combobox"&&ct!==null&&$n==="keyboard"&&oe(String(ct))},Re=H(null);et(function(){ce(me,function(){if(me.value){var it=Math.ceil(b.value.offsetWidth);Re.value!==it&&(Re.value=it)}},{immediate:!0})});var ut=function(){S.value.focus()},Vt=function(){S.value.blur()};return{focus:ut,blur:Vt,scrollTo:(m=w.value)===null||m===void 0?void 0:m.scrollTo,tokenWithEnter:k,mockFocused:T,mergedId:L,containerWidth:Re,onActiveValue:Wo,accessibilityIndex:nr,mergedDefaultActiveFirstOption:Va,onInternalMouseDown:tr,onContainerFocus:Ft,onContainerBlur:Jt,onInternalKeyDown:st,isMultiple:N,mergedOpen:Ce,displayOptions:Ee,displayFlattenOptions:Pe,rawValues:J,onInternalOptionSelect:Ie,onToggleOpen:He,mergedSearchValue:Q,useInternalProps:y,triggerChange:ke,triggerSearch:lt,mergedRawValue:Y,mergedShowSearch:D,onInternalKeyUp:Dt,triggerOpen:me,mergedOptions:ae,onInternalSelectionSelect:ye,selectorDomRef:B,displayValues:Be,activeValue:ne,onSearchSubmit:Ye,containerRef:b,listRef:w,triggerRef:C,selectorRef:S}},methods:{onPopupMouseEnter:function(){this.$forceUpdate()}},render:function(){var v,m=this,y=this.tokenWithEnter,b=this.mockFocused,C=this.mergedId,S=this.containerWidth,w=this.onActiveValue,k=this.accessibilityIndex,$=this.mergedDefaultActiveFirstOption,O=this.onInternalMouseDown,T=this.onInternalKeyDown,_=this.isMultiple,I=this.mergedOpen,L=this.displayOptions,j=this.displayFlattenOptions,F=this.rawValues,N=this.onInternalOptionSelect,D=this.onToggleOpen,z=this.mergedSearchValue,B=this.onPopupMouseEnter,M=this.useInternalProps,E=this.triggerChange,K=this.triggerSearch,W=this.mergedRawValue,Y=this.mergedShowSearch,q=this.onInternalKeyUp,J=this.triggerOpen,ne=this.mergedOptions,oe=this.onInternalSelectionSelect,Q=this.selectorDomRef,ae=this.displayValues,de=this.activeValue,be=this.onSearchSubmit,Ee=this.$slots,Pe=this.$props,Be=Pe.prefixCls,te=Be===void 0?t:Be,ie=Pe.class;Pe.id,Pe.open,Pe.defaultOpen;var ge=Pe.options;Pe.children;var ke=Pe.mode;Pe.value,Pe.defaultValue,Pe.labelInValue,Pe.showSearch,Pe.inputValue,Pe.searchValue,Pe.filterOption,Pe.optionFilterProp,Pe.autoClearSearchValue,Pe.onSearch;var xe=Pe.allowClear,Ie=Pe.clearIcon,ye=Pe.showArrow,pe=Pe.inputIcon,ue=Pe.menuItemSelectedIcon,Ce=Pe.disabled,je=Pe.loading;Pe.defaultActiveFirstOption;var ee=Pe.notFoundContent,me=ee===void 0?"Not Found":ee;Pe.optionLabelProp,Pe.backfill;var He=Pe.getInputElement,lt=Pe.getPopupContainer,Ye=Pe.listHeight,he=Ye===void 0?200:Ye,_e=Pe.listItemHeight,$e=_e===void 0?20:_e,Ve=Pe.animation,st=Pe.transitionName,Dt=Pe.virtual,Ut=Pe.dropdownStyle,Ft=Pe.dropdownClassName,Jt=Pe.dropdownMatchSelectWidth,Mn=Pe.dropdownRender,tr=Pe.dropdownAlign;Pe.showAction;var nr=Pe.direction;Pe.tokenSeparators;var Va=Pe.tagRender,Wo=Pe.onPopupScroll;Pe.onDropdownVisibleChange,Pe.onFocus,Pe.onBlur,Pe.onKeyup,Pe.onKeydown,Pe.onMousedown,Pe.onChange,Pe.onSelect,Pe.onDeselect;var Re=Pe.onClear,ut=Pe.internalProps,Vt=ut===void 0?{}:ut,it=Rke(Pe,["prefixCls","class","id","open","defaultOpen","options","children","mode","value","defaultValue","labelInValue","showSearch","inputValue","searchValue","filterOption","optionFilterProp","autoClearSearchValue","onSearch","allowClear","clearIcon","showArrow","inputIcon","menuItemSelectedIcon","disabled","loading","defaultActiveFirstOption","notFoundContent","optionLabelProp","backfill","getInputElement","getPopupContainer","listHeight","listItemHeight","animation","transitionName","virtual","dropdownStyle","dropdownClassName","dropdownMatchSelectWidth","dropdownRender","dropdownAlign","showAction","direction","tokenSeparators","tagRender","onPopupScroll","onDropdownVisibleChange","onFocus","onBlur","onKeyup","onKeydown","onMousedown","onChange","onSelect","onDeselect","onClear","internalProps"]),ct=ke==="combobox"&&He&&He()||null,Nt=d?d(it):it;Lke.forEach(function(Uo){delete Nt[Uo]});var sn=g(n,{ref:"listRef",prefixCls:te,id:C,open:I,childrenAsData:!ge,options:L,flattenOptions:j,multiple:_,values:F,height:he,itemHeight:$e,onSelect:N,onToggleOpen:D,onActiveValue:w,defaultActiveFirstOption:$,notFoundContent:me,onScroll:Wo,searchValue:z,menuItemSelectedIcon:ue,virtual:Dt!==!1&&Jt!==!1,onMouseenter:B},{option:Ee.option}),pn,$n=function(){M&&Vt.onClear&&Vt.onClear(),Re&&Re(),E([]),K("",!1,!1)};!Ce&&xe&&(W.length||z)&&(pn=g(Lv,{class:"".concat(te,"-clear"),onMousedown:$n,customizeIcon:Ie},{default:function(){return[yt("\xD7")]}}));var za=ye!==void 0?ye:je||!_&&ke!=="combobox",Xn;za&&(Xn=g(Lv,{class:Se("".concat(te,"-arrow"),V({},"".concat(te,"-arrow-loading"),je)),customizeIcon:pe,customizeIconProps:{loading:je,searchValue:z,open:I,focused:b,showSearch:Y}},null));var yo=Se(te,ie,(v={},V(v,"".concat(te,"-focused"),b),V(v,"".concat(te,"-multiple"),_),V(v,"".concat(te,"-single"),!_),V(v,"".concat(te,"-allow-clear"),xe),V(v,"".concat(te,"-show-arrow"),za),V(v,"".concat(te,"-disabled"),Ce),V(v,"".concat(te,"-loading"),je),V(v,"".concat(te,"-open"),I),V(v,"".concat(te,"-customize-input"),ct),V(v,"".concat(te,"-show-search"),Y),v));return g("div",le(le({class:yo},Nt),{},{ref:"containerRef",onMousedown:O,onKeydown:T,onKeyup:q}),[b&&!I&&g("span",{style:{width:0,height:0,display:"flex",overflow:"hidden",opacity:0},"aria-live":"polite"},["".concat(W.join(", "))]),g(xke,{ref:"triggerRef",disabled:Ce,prefixCls:te,visible:J,popupElement:sn,containerWidth:S,animation:Ve,transitionName:st,dropdownStyle:Ut,dropdownClassName:Ft,direction:nr,dropdownMatchSelectWidth:Jt,dropdownRender:Mn,dropdownAlign:tr,getPopupContainer:lt,empty:!ne.length,getTriggerDOMNode:function(){return Q.current}},{default:function(){return[g(ASe,le(le({},m.$props),{},{domRef:Q,prefixCls:te,inputElement:ct,ref:"selectorRef",id:C,showSearch:Y,mode:ke,accessibilityIndex:k,multiple:_,tagRender:Va,values:ae,open:I,onToggleOpen:D,searchValue:z,activeValue:de,onSearch:K,onSearchSubmit:be,onSelect:oe,tokenWithEnter:y}),null)]}}),Xn,pn])}});return f}function Bke(e){var t=e.mode,n=e.options,r=e.children,a=e.backfill,o=e.allowClear,i=e.placeholder,l=e.getInputElement,s=e.showSearch,c=e.onSearch,d=e.defaultOpen,f=e.autofocus,p=e.labelInValue,v=e.value,m=e.inputValue,y=e.optionLabelProp,b=t==="multiple"||t==="tags",C=s!==void 0?s:b||t==="combobox",S=n||t2(r);if(Ua(t!=="tags"||S.every(function($){return!$.disabled}),"Please avoid setting option to disabled in tags mode since user can always type text as tag."),Ua(t!=="combobox"||!y,"`combobox` mode not support `optionLabelProp`. Please set `value` on Option directly."),Ua(t==="combobox"||!a,"`backfill` only works with `combobox` mode."),Ua(t==="combobox"||!l,"`getInputElement` only work with `combobox` mode."),tP(t!=="combobox"||!l||!o||!i,"Customize `getInputElement` should customize clear and placeholder logic instead of configuring `allowClear` and `placeholder`."),c&&!C&&t!=="combobox"&&t!=="tags"&&Ua(!1,"`onSearch` should work with `showSearch` instead of use alone."),tP(!d||f,"`defaultOpen` makes Select open without focus which means it will not close by click outside. You can set `autofocus` if needed."),v!=null){var w=n2(v);Ua(!p||w.every(function($){return kt($)==="object"&&("key"in $||"value"in $)}),"`value` should in shape of `{ value: string | number, label?: any }` when you set `labelInValue` to `true`"),Ua(!b||Array.isArray(v),"`value` should be array when `mode` is `multiple` or `tags`")}if(r){var k=null;r.some(function($){var O;if(!zn($)||!$.type)return!1;var T=$.type;if(T.isSelectOption)return!1;if(T.isSelectOptGroup){var _=((O=$.children)===null||O===void 0?void 0:O.default())||[],I=_.every(function(L){return!zn(L)||!$.type||L.type.isSelectOption?!0:(k=L.type,!1)});return!I}return k=T,!0}),k&&Ua(!1,"`children` should be `Select.Option` or `Select.OptGroup` instead of `".concat(k.displayName||k.name||k,"`.")),Ua(m===void 0,"`inputValue` is deprecated, please use `searchValue` instead.")}}var OA=Fke({prefixCls:"rc-select",components:{optionList:nSe},convertChildrenToData:t2,flattenOptions:fSe,getLabeledValue:hSe,filterOptions:vSe,isValueDisabled:gSe,findValueOption:r2,warningProps:Bke,fillOptionsWithMissingValue:ySe}),Bf=G({setup:function(t,n){var r=n.attrs,a=n.expose,o=n.slots,i=H(null);return a({focus:function(){var s;(s=i.value)===null||s===void 0||s.focus()},blur:function(){var s;(s=i.value)===null||s===void 0||s.blur()}}),function(){var l;return g(OA,le(le(le({ref:i},t),r),{},{children:((l=o.default)===null||l===void 0?void 0:l.call(o))||[]}),o)}}});Bf.inheritAttrs=!1;Bf.props=MC(OA.props,["children"]);Bf.Option=aA;Bf.OptGroup=oA;var Vke=Bf,zke={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M884 256h-75c-5.1 0-9.9 2.5-12.9 6.6L512 654.2 227.9 262.6c-3-4.1-7.8-6.6-12.9-6.6h-75c-6.5 0-10.3 7.4-6.5 12.7l352.6 486.1c12.8 17.6 39 17.6 51.7 0l352.6-486.1c3.9-5.3.1-12.7-6.4-12.7z"}}]},name:"down",theme:"outlined"},Hke=zke,Sh=2,AP=.16,jke=.05,Kke=.05,Wke=.15,PA=5,TA=4,Uke=[{index:7,opacity:.15},{index:6,opacity:.25},{index:5,opacity:.3},{index:5,opacity:.45},{index:5,opacity:.65},{index:5,opacity:.85},{index:4,opacity:.9},{index:3,opacity:.95},{index:2,opacity:.97},{index:1,opacity:.98}];function DP(e){var t=e.r,n=e.g,r=e.b,a=y0(t,n,r);return{h:a.h*360,s:a.s,v:a.v}}function kh(e){var t=e.r,n=e.g,r=e.b;return"#".concat(b0(t,n,r,!1))}function Yke(e,t,n){var r=n/100,a={r:(t.r-e.r)*r+e.r,g:(t.g-e.g)*r+e.g,b:(t.b-e.b)*r+e.b};return a}function RP(e,t,n){var r;return Math.round(e.h)>=60&&Math.round(e.h)<=240?r=n?Math.round(e.h)-Sh*t:Math.round(e.h)+Sh*t:r=n?Math.round(e.h)+Sh*t:Math.round(e.h)-Sh*t,r<0?r+=360:r>=360&&(r-=360),r}function LP(e,t,n){if(e.h===0&&e.s===0)return e.s;var r;return n?r=e.s-AP*t:t===TA?r=e.s+AP:r=e.s+jke*t,r>1&&(r=1),n&&t===PA&&r>.1&&(r=.1),r<.06&&(r=.06),Number(r.toFixed(2))}function FP(e,t,n){var r;return n?r=e.v+Kke*t:r=e.v-Wke*t,r>1&&(r=1),Number(r.toFixed(2))}function p1(e){for(var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},n=[],r=iu(e),a=PA;a>0;a-=1){var o=DP(r),i=kh(iu({h:RP(o,a,!0),s:LP(o,a,!0),v:FP(o,a,!0)}));n.push(i)}n.push(kh(r));for(var l=1;l<=TA;l+=1){var s=DP(r),c=kh(iu({h:RP(s,l),s:LP(s,l),v:FP(s,l)}));n.push(c)}return t.theme==="dark"?Uke.map(function(d){var f=d.index,p=d.opacity,v=kh(Yke(iu(t.backgroundColor||"#141414"),iu(n[f]),p*100));return v}):n}var By={red:"#F5222D",volcano:"#FA541C",orange:"#FA8C16",gold:"#FAAD14",yellow:"#FADB14",lime:"#A0D911",green:"#52C41A",cyan:"#13C2C2",blue:"#1890FF",geekblue:"#2F54EB",purple:"#722ED1",magenta:"#EB2F96",grey:"#666666"},$r={},Vy={};Object.keys(By).forEach(function(e){$r[e]=p1(By[e]),$r[e].primary=$r[e][5],Vy[e]=p1(By[e],{theme:"dark",backgroundColor:"#141414"}),Vy[e].primary=Vy[e][5]});$r.red;$r.volcano;$r.gold;$r.orange;$r.yellow;$r.lime;$r.green;$r.cyan;$r.blue;$r.geekblue;$r.purple;$r.magenta;$r.grey;var BP=[],Wc=[],qke="insert-css: You need to provide a CSS string. Usage: insertCss(cssString[, options]).";function Gke(){var e=document.createElement("style");return e.setAttribute("type","text/css"),e}function Xke(e,t){if(t=t||{},e===void 0)throw new Error(qke);var n=t.prepend===!0?"prepend":"append",r=t.container!==void 0?t.container:document.querySelector("head"),a=BP.indexOf(r);a===-1&&(a=BP.push(r)-1,Wc[a]={});var o;return Wc[a]!==void 0&&Wc[a][n]!==void 0?o=Wc[a][n]:(o=Wc[a][n]=Gke(),n==="prepend"?r.insertBefore(o,r.childNodes[0]):r.appendChild(o)),e.charCodeAt(0)===65279&&(e=e.substr(1,e.length)),o.styleSheet?o.styleSheet.cssText+=e:o.textContent+=e,o}function VP(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){Zke(e,a,n[a])})}return e}function Zke(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function zP(e){return typeof e=="object"&&typeof e.name=="string"&&typeof e.theme=="string"&&(typeof e.icon=="object"||typeof e.icon=="function")}function v1(e,t,n){return n?qe(e.tag,VP({key:t},n,e.attrs),(e.children||[]).map(function(r,a){return v1(r,"".concat(t,"-").concat(e.tag,"-").concat(a))})):qe(e.tag,VP({key:t},e.attrs),(e.children||[]).map(function(r,a){return v1(r,"".concat(t,"-").concat(e.tag,"-").concat(a))}))}function xA(e){return p1(e)[0]}function _A(e){return e?Array.isArray(e)?e:[e]:[]}var Jke=`
.anticon {
  display: inline-block;
  color: inherit;
  font-style: normal;
  line-height: 0;
  text-align: center;
  text-transform: none;
  vertical-align: -0.125em;
  text-rendering: optimizeLegibility;
  -webkit-font-smoothing: antialiased;
  -moz-osx-font-smoothing: grayscale;
}

.anticon > * {
  line-height: 1;
}

.anticon svg {
  display: inline-block;
}

.anticon::before {
  display: none;
}

.anticon .anticon-icon {
  display: block;
}

.anticon[tabindex] {
  cursor: pointer;
}

.anticon-spin::before,
.anticon-spin {
  display: inline-block;
  -webkit-animation: loadingCircle 1s infinite linear;
  animation: loadingCircle 1s infinite linear;
}

@-webkit-keyframes loadingCircle {
  100% {
    -webkit-transform: rotate(360deg);
    transform: rotate(360deg);
  }
}

@keyframes loadingCircle {
  100% {
    -webkit-transform: rotate(360deg);
    transform: rotate(360deg);
  }
}
`,HP=!1,Qke=function(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:Jke;Ne(function(){HP||(typeof window!="undefined"&&window.document&&window.document.documentElement&&Xke(t,{prepend:!0}),HP=!0)})},e$e=["icon","primaryColor","secondaryColor"];function t$e(e,t){if(e==null)return{};var n=n$e(e,t),r,a;if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a<o.length;a++)r=o[a],!(t.indexOf(r)>=0)&&(!Object.prototype.propertyIsEnumerable.call(e,r)||(n[r]=e[r]))}return n}function n$e(e,t){if(e==null)return{};var n={},r=Object.keys(e),a,o;for(o=0;o<r.length;o++)a=r[o],!(t.indexOf(a)>=0)&&(n[a]=e[a]);return n}function Hp(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){r$e(e,a,n[a])})}return e}function r$e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var Od={primaryColor:"#333",secondaryColor:"#E6E6E6",calculated:!1};function a$e(e){var t=e.primaryColor,n=e.secondaryColor;Od.primaryColor=t,Od.secondaryColor=n||xA(t),Od.calculated=!!n}function o$e(){return Hp({},Od)}var Mc=function(t,n){var r=Hp({},t,n.attrs),a=r.icon,o=r.primaryColor,i=r.secondaryColor,l=t$e(r,e$e),s=Od;if(o&&(s={primaryColor:o,secondaryColor:i||xA(o)}),Qke(),zP(a),!zP(a))return null;var c=a;return c&&typeof c.icon=="function"&&(c=Hp({},c,{icon:c.icon(s.primaryColor,s.secondaryColor)})),v1(c.icon,"svg-".concat(c.name),Hp({},l,{"data-icon":c.name,width:"1em",height:"1em",fill:"currentColor","aria-hidden":"true"}))};Mc.props={icon:Object,primaryColor:String,secondaryColor:String,focusable:String};Mc.inheritAttrs=!1;Mc.displayName="IconBase";Mc.getTwoToneColors=o$e;Mc.setTwoToneColors=a$e;var h2=Mc;function i$e(e,t){return c$e(e)||u$e(e,t)||s$e(e,t)||l$e()}function l$e(){throw new TypeError(`Invalid attempt to destructure non-iterable instance.
In order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)}function s$e(e,t){if(!!e){if(typeof e=="string")return jP(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);if(n==="Object"&&e.constructor&&(n=e.constructor.name),n==="Map"||n==="Set")return Array.from(e);if(n==="Arguments"||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n))return jP(e,t)}}function jP(e,t){(t==null||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);n<t;n++)r[n]=e[n];return r}function u$e(e,t){var n=e==null?null:typeof Symbol!="undefined"&&e[Symbol.iterator]||e["@@iterator"];if(n!=null){var r=[],a=!0,o=!1,i,l;try{for(n=n.call(e);!(a=(i=n.next()).done)&&(r.push(i.value),!(t&&r.length===t));a=!0);}catch(s){o=!0,l=s}finally{try{!a&&n.return!=null&&n.return()}finally{if(o)throw l}}return r}}function c$e(e){if(Array.isArray(e))return e}function EA(e){var t=_A(e),n=i$e(t,2),r=n[0],a=n[1];return h2.setTwoToneColors({primaryColor:r,secondaryColor:a})}function d$e(){var e=h2.getTwoToneColors();return e.calculated?[e.primaryColor,e.secondaryColor]:e.primaryColor}var f$e=["class","icon","spin","rotate","tabindex","twoToneColor","onClick"];function h$e(e,t){return g$e(e)||m$e(e,t)||v$e(e,t)||p$e()}function p$e(){throw new TypeError(`Invalid attempt to destructure non-iterable instance.
In order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)}function v$e(e,t){if(!!e){if(typeof e=="string")return KP(e,t);var n=Object.prototype.toString.call(e).slice(8,-1);if(n==="Object"&&e.constructor&&(n=e.constructor.name),n==="Map"||n==="Set")return Array.from(e);if(n==="Arguments"||/^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n))return KP(e,t)}}function KP(e,t){(t==null||t>e.length)&&(t=e.length);for(var n=0,r=new Array(t);n<t;n++)r[n]=e[n];return r}function m$e(e,t){var n=e==null?null:typeof Symbol!="undefined"&&e[Symbol.iterator]||e["@@iterator"];if(n!=null){var r=[],a=!0,o=!1,i,l;try{for(n=n.call(e);!(a=(i=n.next()).done)&&(r.push(i.value),!(t&&r.length===t));a=!0);}catch(s){o=!0,l=s}finally{try{!a&&n.return!=null&&n.return()}finally{if(o)throw l}}return r}}function g$e(e){if(Array.isArray(e))return e}function WP(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){m1(e,a,n[a])})}return e}function m1(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function y$e(e,t){if(e==null)return{};var n=b$e(e,t),r,a;if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a<o.length;a++)r=o[a],!(t.indexOf(r)>=0)&&(!Object.prototype.propertyIsEnumerable.call(e,r)||(n[r]=e[r]))}return n}function b$e(e,t){if(e==null)return{};var n={},r=Object.keys(e),a,o;for(o=0;o<r.length;o++)a=r[o],!(t.indexOf(a)>=0)&&(n[a]=e[a]);return n}EA("#1890ff");var Ic=function(t,n){var r,a=WP({},t,n.attrs),o=a.class,i=a.icon,l=a.spin,s=a.rotate,c=a.tabindex,d=a.twoToneColor,f=a.onClick,p=y$e(a,f$e),v=(r={anticon:!0},m1(r,"anticon-".concat(i.name),Boolean(i.name)),m1(r,o,o),r),m=l===""||!!l||i.name==="loading"?"anticon-spin":"",y=c;y===void 0&&f&&(y=-1,p.tabindex=y);var b=s?{msTransform:"rotate(".concat(s,"deg)"),transform:"rotate(".concat(s,"deg)")}:void 0,C=_A(d),S=h$e(C,2),w=S[0],k=S[1];return g("span",WP({role:"img","aria-label":i.name},p,{onClick:f,class:v}),[g(h2,{class:m,icon:i,primaryColor:w,secondaryColor:k,style:b},null)])};Ic.props={spin:Boolean,rotate:Number,icon:Object,twoToneColor:String};Ic.displayName="AntdIcon";Ic.inheritAttrs=!1;Ic.getTwoToneColor=d$e;Ic.setTwoToneColor=EA;var Et=Ic;function UP(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){C$e(e,a,n[a])})}return e}function C$e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var p2=function(t,n){var r=UP({},t,n.attrs);return g(Et,UP({},r,{icon:Hke}),null)};p2.displayName="DownOutlined";p2.inheritAttrs=!1;var Rs=p2,w$e={icon:{tag:"svg",attrs:{viewBox:"0 0 1024 1024",focusable:"false"},children:[{tag:"path",attrs:{d:"M988 548c-19.9 0-36-16.1-36-36 0-59.4-11.6-117-34.6-171.3a440.45 440.45 0 00-94.3-139.9 437.71 437.71 0 00-139.9-94.3C629 83.6 571.4 72 512 72c-19.9 0-36-16.1-36-36s16.1-36 36-36c69.1 0 136.2 13.5 199.3 40.3C772.3 66 827 103 874 150c47 47 83.9 101.8 109.7 162.7 26.7 63.1 40.2 130.2 40.2 199.3.1 19.9-16 36-35.9 36z"}}]},name:"loading",theme:"outlined"},S$e=w$e;function YP(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){k$e(e,a,n[a])})}return e}function k$e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var v2=function(t,n){var r=YP({},t,n.attrs);return g(Et,YP({},r,{icon:S$e}),null)};v2.displayName="LoadingOutlined";v2.inheritAttrs=!1;var co=v2,$$e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M912 190h-69.9c-9.8 0-19.1 4.5-25.1 12.2L404.7 724.5 207 474a32 32 0 00-25.1-12.2H112c-6.7 0-10.4 7.7-6.3 12.9l273.9 347c12.8 16.2 37.4 16.2 50.3 0l488.4-618.9c4.1-5.1.4-12.8-6.3-12.8z"}}]},name:"check",theme:"outlined"},O$e=$$e;function qP(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){P$e(e,a,n[a])})}return e}function P$e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var m2=function(t,n){var r=qP({},t,n.attrs);return g(Et,qP({},r,{icon:O$e}),null)};m2.displayName="CheckOutlined";m2.inheritAttrs=!1;var wg=m2,T$e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M563.8 512l262.5-312.9c4.4-5.2.7-13.1-6.1-13.1h-79.8c-4.7 0-9.2 2.1-12.3 5.7L511.6 449.8 295.1 191.7c-3-3.6-7.5-5.7-12.3-5.7H203c-6.8 0-10.5 7.9-6.1 13.1L459.4 512 196.9 824.9A7.95 7.95 0 00203 838h79.8c4.7 0 9.2-2.1 12.3-5.7l216.5-258.1 216.5 258.1c3 3.6 7.5 5.7 12.3 5.7h79.8c6.8 0 10.5-7.9 6.1-13.1L563.8 512z"}}]},name:"close",theme:"outlined"},x$e=T$e;function GP(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){_$e(e,a,n[a])})}return e}function _$e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var g2=function(t,n){var r=GP({},t,n.attrs);return g(Et,GP({},r,{icon:x$e}),null)};g2.displayName="CloseOutlined";g2.inheritAttrs=!1;var go=g2,E$e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64C264.6 64 64 264.6 64 512s200.6 448 448 448 448-200.6 448-448S759.4 64 512 64zm165.4 618.2l-66-.3L512 563.4l-99.3 118.4-66.1.3c-4.4 0-8-3.5-8-8 0-1.9.7-3.7 1.9-5.2l130.1-155L340.5 359a8.32 8.32 0 01-1.9-5.2c0-4.4 3.6-8 8-8l66.1.3L512 464.6l99.3-118.4 66-.3c4.4 0 8 3.5 8 8 0 1.9-.7 3.7-1.9 5.2L553.5 514l130 155c1.2 1.5 1.9 3.3 1.9 5.2 0 4.4-3.6 8-8 8z"}}]},name:"close-circle",theme:"filled"},M$e=E$e;function XP(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){I$e(e,a,n[a])})}return e}function I$e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var y2=function(t,n){var r=XP({},t,n.attrs);return g(Et,XP({},r,{icon:M$e}),null)};y2.displayName="CloseCircleFilled";y2.inheritAttrs=!1;var Yr=y2,N$e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M909.6 854.5L649.9 594.8C690.2 542.7 712 479 712 412c0-80.2-31.3-155.4-87.9-212.1-56.6-56.7-132-87.9-212.1-87.9s-155.5 31.3-212.1 87.9C143.2 256.5 112 331.8 112 412c0 80.1 31.3 155.5 87.9 212.1C256.5 680.8 331.8 712 412 712c67 0 130.6-21.8 182.7-62l259.7 259.6a8.2 8.2 0 0011.6 0l43.6-43.5a8.2 8.2 0 000-11.6zM570.4 570.4C528 612.7 471.8 636 412 636s-116-23.3-158.4-65.6C211.3 528 188 471.8 188 412s23.3-116.1 65.6-158.4C296 211.3 352.2 188 412 188s116.1 23.2 158.4 65.6S636 352.2 636 412s-23.3 116.1-65.6 158.4z"}}]},name:"search",theme:"outlined"},A$e=N$e;function ZP(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){D$e(e,a,n[a])})}return e}function D$e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var b2=function(t,n){var r=ZP({},t,n.attrs);return g(Et,ZP({},r,{icon:A$e}),null)};b2.displayName="SearchOutlined";b2.inheritAttrs=!1;var Vv=b2;function R$e(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},n=e.loading,r=e.multiple,a=e.prefixCls,o=e.suffixIcon||t.suffixIcon&&t.suffixIcon(),i=e.clearIcon||t.clearIcon&&t.clearIcon(),l=e.menuItemSelectedIcon||t.menuItemSelectedIcon&&t.menuItemSelectedIcon(),s=e.removeIcon||t.removeIcon&&t.removeIcon(),c=i;i||(c=g(Yr,null,null));var d=null;if(o!==void 0)d=o;else if(n)d=g(co,{spin:!0},null);else{var f="".concat(a,"-suffix");d=function(y){var b=y.open,C=y.showSearch;return b&&C?g(Vv,{class:f},null):g(Rs,{class:f},null)}}var p=null;l!==void 0?p=l:r?p=g(wg,null,null):p=null;var v=null;return s!==void 0?v=s:v=g(go,null,null),{clearIcon:c,suffixIcon:d,itemIcon:p,removeIcon:v}}var Sg=function(){return P(P({},bn($A(),["inputIcon","mode","getInputElement","backfill","class","style"])),{value:{type:[Array,Object,String,Number]},defaultValue:{type:[Array,Object,String,Number]},notFoundContent:u.VNodeChild,suffixIcon:u.VNodeChild,itemIcon:u.VNodeChild,size:u.oneOf(rt("small","middle","large","default")),mode:u.oneOf(rt("multiple","tags","SECRET_COMBOBOX_MODE_DO_NOT_USE")),bordered:u.looseBool.def(!0),transitionName:u.string.def("slide-up"),choiceTransitionName:u.string.def("")})},qa=G({name:"ASelect",Option:aA,OptGroup:oA,inheritAttrs:!1,props:Sg(),SECRET_COMBOBOX_MODE_DO_NOT_USE:"SECRET_COMBOBOX_MODE_DO_NOT_USE",emits:["change","update:value"],slots:["notFoundContent","suffixIcon","itemIcon","removeIcon","clearIcon","dropdownRender","option"],setup:function(t,n){var r=n.attrs,a=n.emit,o=n.slots,i=n.expose,l=H(null),s=function(){l.value&&l.value.focus()},c=function(){l.value&&l.value.blur()},d=x(function(){var C=t.mode;if(C!=="combobox")return C===qa.SECRET_COMBOBOX_MODE_DO_NOT_USE?"combobox":C}),f=Wt("select",t),p=f.prefixCls,v=f.direction,m=f.configProvider,y=x(function(){var C;return Se((C={},V(C,"".concat(p.value,"-lg"),t.size==="large"),V(C,"".concat(p.value,"-sm"),t.size==="small"),V(C,"".concat(p.value,"-rtl"),v.value==="rtl"),V(C,"".concat(p.value,"-borderless"),!t.bordered),C))}),b=function(){for(var S=arguments.length,w=new Array(S),k=0;k<S;k++)w[k]=arguments[k];a("update:value",w[0]),a.apply(void 0,["change"].concat(w))};return i({blur:c,focus:s}),function(){var C,S=t.notFoundContent,w=t.listHeight,k=w===void 0?256:w,$=t.listItemHeight,O=$===void 0?24:$,T=t.getPopupContainer,_=t.dropdownClassName,I=t.virtual,L=t.dropdownMatchSelectWidth,j=m.renderEmpty,F=m.getPopupContainer,N=d.value==="multiple"||d.value==="tags",D;S!==void 0?D=S:o.notFoundContent?D=o.notFoundContent():d.value==="combobox"?D=null:D=j("Select");var z=R$e(P(P({},t),{multiple:N,prefixCls:p.value}),o),B=z.suffixIcon,M=z.itemIcon,E=z.removeIcon,K=z.clearIcon,W=bn(t,["prefixCls","suffixIcon","itemIcon","removeIcon","clearIcon","size","bordered"]),Y=Se(_,V({},"".concat(p.value,"-dropdown-").concat(v.value),v.value==="rtl"));return g(Vke,le(le(le({ref:l,virtual:I,dropdownMatchSelectWidth:L},W),r),{},{listHeight:k,listItemHeight:O,mode:d.value,prefixCls:p.value,direction:v.value,inputIcon:B,menuItemSelectedIcon:M,removeIcon:E,clearIcon:K,notFoundContent:D,class:[y.value,r.class],getPopupContainer:T||F,dropdownClassName:Y,onChange:b,dropdownRender:W.dropdownRender||o.dropdownRender}),{default:function(){return[(C=o.default)===null||C===void 0?void 0:C.call(o)]},option:o.option})}}});qa.install=function(e){return e.component(qa.name,qa),e.component(qa.Option.displayName,qa.Option),e.component(qa.OptGroup.displayName,qa.OptGroup),e};var L$e=qa.Option,F$e=qa.OptGroup,dl=qa,Vf={prefixCls:u.string,inputPrefixCls:u.string,defaultValue:u.oneOfType([u.string,u.number]),value:u.oneOfType([u.string,u.number]),placeholder:{type:[String,Number]},type:u.string.def("text"),name:u.string,size:{type:String},disabled:u.looseBool,readonly:u.looseBool,addonBefore:u.VNodeChild,addonAfter:u.VNodeChild,prefix:u.VNodeChild,suffix:u.VNodeChild,autofocus:u.looseBool,allowClear:u.looseBool,lazy:u.looseBool.def(!0),maxlength:u.number,loading:u.looseBool,onPressEnter:u.func,onKeydown:u.func,onKeyup:u.func,onFocus:u.func,onBlur:u.func,onChange:u.func,onInput:u.func,"onUpdate:value":u.func};function B$e(e){return!!(We(e,"prefix")||We(e,"suffix")||e.$props.allowClear)}var JP=["text","input"],V$e=G({name:"ClearableLabeledInput",inheritAttrs:!1,props:{prefixCls:u.string,inputType:u.oneOf(rt("text","input")),value:u.any,defaultValue:u.any,allowClear:u.looseBool,element:u.VNodeChild,handleReset:u.func,disabled:u.looseBool,size:u.oneOf(rt("small","large","default")),suffix:u.VNodeChild,prefix:u.VNodeChild,addonBefore:u.VNodeChild,addonAfter:u.VNodeChild,readonly:u.looseBool,isFocused:u.looseBool},methods:{renderClearIcon:function(t){var n=this.$props,r=n.allowClear,a=n.value,o=n.disabled,i=n.readonly,l=n.inputType,s=n.handleReset;if(!r)return null;var c=!o&&!i&&a!==void 0&&a!==null&&a!=="",d=l===JP[0]?"".concat(t,"-textarea-clear-icon"):"".concat(t,"-clear-icon");return g(Yr,{onClick:s,class:Se(d,V({},"".concat(d,"-hidden"),!c)),role:"button"},null)},renderSuffix:function(t){var n=this.$props,r=n.suffix,a=n.allowClear;return r||a?g("span",{class:"".concat(t,"-suffix")},[this.renderClearIcon(t),r]):null},renderLabeledIcon:function(t,n){var r,a,o=this.$props,i=this.$attrs.style,l=this.renderSuffix(t);if(!B$e(this))return Ot(n,{value:o.value});var s=o.prefix?g("span",{class:"".concat(t,"-prefix")},[o.prefix]):null,c=Se((a=this.$attrs)===null||a===void 0?void 0:a.class,"".concat(t,"-affix-wrapper"),(r={},V(r,"".concat(t,"-affix-wrapper-focused"),o.isFocused),V(r,"".concat(t,"-affix-wrapper-disabled"),o.disabled),V(r,"".concat(t,"-affix-wrapper-sm"),o.size==="small"),V(r,"".concat(t,"-affix-wrapper-lg"),o.size==="large"),V(r,"".concat(t,"-affix-wrapper-input-with-clear-btn"),o.suffix&&o.allowClear&&this.$props.value),r));return g("span",{class:c,style:i},[s,Ot(n,{style:null,value:o.value,class:NA(t,o.size,o.disabled)}),l])},renderInputWithLabel:function(t,n){var r,a=this.$props,o=a.addonBefore,i=a.addonAfter,l=a.size,s=this.$attrs,c=s.style,d=s.class;if(!o&&!i)return n;var f="".concat(t,"-group"),p="".concat(f,"-addon"),v=o?g("span",{class:p},[o]):null,m=i?g("span",{class:p},[i]):null,y=Se("".concat(t,"-wrapper"),V({},f,o||i)),b=Se(d,"".concat(t,"-group-wrapper"),(r={},V(r,"".concat(t,"-group-wrapper-sm"),l==="small"),V(r,"".concat(t,"-group-wrapper-lg"),l==="large"),r));return g("span",{class:b,style:c},[g("span",{class:y},[v,Ot(n,{style:null}),m])])},renderTextAreaWithClearIcon:function(t,n){var r=this.$props,a=r.value,o=r.allowClear,i=this.$attrs,l=i.style,s=i.class;if(!o)return Ot(n,{value:a});var c=Se(s,"".concat(t,"-affix-wrapper"),"".concat(t,"-affix-wrapper-textarea-with-clear-btn"));return g("span",{class:c,style:l},[Ot(n,{style:null,value:a}),this.renderClearIcon(t)])},renderClearableLabeledInput:function(){var t=this.$props,n=t.prefixCls,r=t.inputType,a=t.element;return r===JP[0]?this.renderTextAreaWithClearIcon(n,a):this.renderInputWithLabel(n,this.renderLabeledIcon(n,a))}},render:function(){return this.renderClearableLabeledInput()}}),MA=V$e;function IA(e){return typeof e=="undefined"||e===null?"":e}function zv(e,t,n){if(n){var r=t;if(t.type==="click"){Object.defineProperty(r,"target",{writable:!0}),Object.defineProperty(r,"currentTarget",{writable:!0}),r.target=e,r.currentTarget=e;var a=e.value;e.value="",n(r),e.value=a;return}n(r)}}function NA(e,t,n){var r;return Se(e,(r={},V(r,"".concat(e,"-sm"),t==="small"),V(r,"".concat(e,"-lg"),t==="large"),V(r,"".concat(e,"-disabled"),n),r))}var Jn=G({name:"AInput",inheritAttrs:!1,props:P({},Vf),setup:function(){return{configProvider:ve("configProvider",St),removePasswordTimeout:void 0,input:null,clearableInput:null}},data:function(){var t=this.$props,n=typeof t.value=="undefined"?t.defaultValue:t.value;return{stateValue:typeof n=="undefined"?"":n,isFocused:!1}},watch:{value:function(t){this.stateValue=t}},mounted:function(){var t=this;Ne(function(){t.clearPasswordValueAttribute()})},beforeUnmount:function(){this.removePasswordTimeout&&clearTimeout(this.removePasswordTimeout)},methods:{handleInputFocus:function(t){this.isFocused=!0,this.onFocus&&this.onFocus(t)},handleInputBlur:function(t){this.isFocused=!1,this.onBlur&&this.onBlur(t)},focus:function(){this.input.focus()},blur:function(){this.input.blur()},select:function(){this.input.select()},saveClearableInput:function(t){this.clearableInput=t},saveInput:function(t){this.input=t},setValue:function(t,n){this.stateValue!==t&&(vt(this,"value")?this.$forceUpdate():this.stateValue=t,Ne(function(){n&&n()}))},triggerChange:function(t){this.$emit("update:value",t.target.value),this.$emit("change",t),this.$emit("input",t)},handleReset:function(t){var n=this;this.setValue("",function(){n.focus()}),zv(this.input,t,this.triggerChange)},renderInput:function(t,n){var r=n.addonBefore,a=n.addonAfter,o=bn(this.$props,["prefixCls","onPressEnter","addonBefore","addonAfter","prefix","suffix","allowClear","defaultValue","lazy","size","inputPrefixCls","loading"]),i=this.handleKeyDown,l=this.handleChange,s=this.handleInputFocus,c=this.handleInputBlur,d=this.size,f=this.disabled,p=this.$attrs,v=P(P(P({},o),p),{onKeydown:i,class:Se(NA(t,d,f),V({},p.class,p.class&&!r&&!a)),ref:this.saveInput,key:"ant-input",onInput:l,onChange:l,onFocus:s,onBlur:c});v.autofocus||delete v.autofocus;var m=g("input",v,null);return at(m,[[Mi]])},clearPasswordValueAttribute:function(){var t=this;this.removePasswordTimeout=setTimeout(function(){t.input&&t.input.getAttribute&&t.input.getAttribute("type")==="password"&&t.input.hasAttribute("value")&&t.input.removeAttribute("value")})},handleChange:function(t){var n=t.target,r=n.value,a=n.composing,o=n.isComposing;(o||a)&&this.lazy||this.stateValue===r||(this.setValue(r,this.clearPasswordValueAttribute),zv(this.input,t,this.triggerChange))},handleKeyDown:function(t){t.keyCode===13&&this.$emit("pressEnter",t),this.$emit("keydown",t)}},render:function(){var t=this.$props.prefixCls,n=this.$data,r=n.stateValue,a=n.isFocused,o=this.configProvider.getPrefixCls,i=o("input",t),l=We(this,"addonAfter"),s=We(this,"addonBefore"),c=We(this,"suffix"),d=We(this,"prefix"),f=P(P(P({},this.$attrs),Qe(this)),{prefixCls:i,inputType:"input",value:IA(r),element:this.renderInput(i,{addonAfter:l,addonBefore:s}),handleReset:this.handleReset,addonAfter:l,addonBefore:s,suffix:c,prefix:d,isFocused:a});return g(MA,le(le({},f),{},{ref:this.saveClearableInput}),null)}}),AA=G({name:"AInputGroup",props:{prefixCls:u.string,size:u.oneOf(rt("small","large","default")),compact:u.looseBool},setup:function(){return{configProvider:ve("configProvider",St)}},computed:{classes:function(){var t,n=this.prefixCls,r=this.size,a=this.compact,o=a===void 0?!1:a,i=this.configProvider,l=i.getPrefixCls,s=l("input-group",n);return t={},V(t,"".concat(s),!0),V(t,"".concat(s,"-lg"),r==="large"),V(t,"".concat(s,"-sm"),r==="small"),V(t,"".concat(s,"-compact"),o),t}},render:function(){return g("span",{class:this.classes},[ht(this)])}}),zy=/iPhone/i,QP=/iPod/i,e3=/iPad/i,Hy=/\bAndroid(?:.+)Mobile\b/i,t3=/Android/i,Ks=/\bAndroid(?:.+)SD4930UR\b/i,$h=/\bAndroid(?:.+)(?:KF[A-Z]{2,4})\b/i,Go=/Windows Phone/i,n3=/\bWindows(?:.+)ARM\b/i,r3=/BlackBerry/i,a3=/BB10/i,o3=/Opera Mini/i,i3=/\b(CriOS|Chrome)(?:.+)Mobile/i,l3=/Mobile(?:.+)Firefox\b/i;function Ht(e,t){return e.test(t)}function s3(e){var t=e||(typeof navigator!="undefined"?navigator.userAgent:""),n=t.split("[FBAN");if(typeof n[1]!="undefined"){var r=n,a=fn(r,1);t=a[0]}if(n=t.split("Twitter"),typeof n[1]!="undefined"){var o=n,i=fn(o,1);t=i[0]}var l={apple:{phone:Ht(zy,t)&&!Ht(Go,t),ipod:Ht(QP,t),tablet:!Ht(zy,t)&&Ht(e3,t)&&!Ht(Go,t),device:(Ht(zy,t)||Ht(QP,t)||Ht(e3,t))&&!Ht(Go,t)},amazon:{phone:Ht(Ks,t),tablet:!Ht(Ks,t)&&Ht($h,t),device:Ht(Ks,t)||Ht($h,t)},android:{phone:!Ht(Go,t)&&Ht(Ks,t)||!Ht(Go,t)&&Ht(Hy,t),tablet:!Ht(Go,t)&&!Ht(Ks,t)&&!Ht(Hy,t)&&(Ht($h,t)||Ht(t3,t)),device:!Ht(Go,t)&&(Ht(Ks,t)||Ht($h,t)||Ht(Hy,t)||Ht(t3,t))||Ht(/\bokhttp\b/i,t)},windows:{phone:Ht(Go,t),tablet:Ht(n3,t),device:Ht(Go,t)||Ht(n3,t)},other:{blackberry:Ht(r3,t),blackberry10:Ht(a3,t),opera:Ht(o3,t),firefox:Ht(l3,t),chrome:Ht(i3,t),device:Ht(r3,t)||Ht(a3,t)||Ht(o3,t)||Ht(l3,t)||Ht(i3,t)},any:null,phone:null,tablet:null};return l.any=l.apple.device||l.android.device||l.windows.device||l.other.device,l.phone=l.apple.phone||l.android.phone||l.windows.phone,l.tablet=l.apple.tablet||l.android.tablet||l.windows.tablet,l}var z$e=P(P({},s3()),{isMobile:s3}),H$e=z$e,jy={transitionstart:{transition:"transitionstart",WebkitTransition:"webkitTransitionStart",MozTransition:"mozTransitionStart",OTransition:"oTransitionStart",msTransition:"MSTransitionStart"},animationstart:{animation:"animationstart",WebkitAnimation:"webkitAnimationStart",MozAnimation:"mozAnimationStart",OAnimation:"oAnimationStart",msAnimation:"MSAnimationStart"}},Ky={transitionend:{transition:"transitionend",WebkitTransition:"webkitTransitionEnd",MozTransition:"mozTransitionEnd",OTransition:"oTransitionEnd",msTransition:"MSTransitionEnd"},animationend:{animation:"animationend",WebkitAnimation:"webkitAnimationEnd",MozAnimation:"mozAnimationEnd",OAnimation:"oAnimationEnd",msAnimation:"MSAnimationEnd"}},cu=[],du=[];function j$e(){var e=document.createElement("div"),t=e.style;"AnimationEvent"in window||(delete jy.animationstart.animation,delete Ky.animationend.animation),"TransitionEvent"in window||(delete jy.transitionstart.transition,delete Ky.transitionend.transition);function n(r,a){for(var o in r)if(r.hasOwnProperty(o)){var i=r[o];for(var l in i)if(l in t){a.push(i[l]);break}}}n(jy,cu),n(Ky,du)}typeof window!="undefined"&&typeof document!="undefined"&&j$e();function u3(e,t,n){e.addEventListener(t,n,!1)}function c3(e,t,n){e.removeEventListener(t,n,!1)}var K$e={startEvents:cu,addStartEventListener:function(t,n){if(cu.length===0){window.setTimeout(n,0);return}cu.forEach(function(r){u3(t,r,n)})},removeStartEventListener:function(t,n){cu.length!==0&&cu.forEach(function(r){c3(t,r,n)})},endEvents:du,addEndEventListener:function(t,n){if(du.length===0){window.setTimeout(n,0);return}du.forEach(function(r){u3(t,r,n)})},removeEndEventListener:function(t,n){du.length!==0&&du.forEach(function(r){c3(t,r,n)})}},si=K$e,Li;function d3(e){return!e||e.offsetParent===null}function W$e(e){var t=(e||"").match(/rgba?\((\d*), (\d*), (\d*)(, [\.\d]*)?\)/);return t&&t[1]&&t[2]&&t[3]?!(t[1]===t[2]&&t[2]===t[3]):!0}var C2=G({name:"Wave",props:["insertExtraNode"],setup:function(){var t=ve("configProvider",St);return{configProvider:t}},mounted:function(){var t=this;Ne(function(){var n=Sn(t);n.nodeType===1&&(t.instance=t.bindAnimationEvent(n))})},beforeUnmount:function(){this.instance&&this.instance.cancel(),this.clickWaveTimeoutId&&clearTimeout(this.clickWaveTimeoutId)},methods:{onClick:function(t,n){if(!(!t||d3(t)||t.className.indexOf("-leave")>=0)){var r=this.$props.insertExtraNode;this.extraNode=document.createElement("div");var a=this.extraNode;a.className="ant-click-animating-node";var o=this.getAttributeName();t.removeAttribute(o),t.setAttribute(o,"true"),Li=Li||document.createElement("style"),n&&n!=="#ffffff"&&n!=="rgb(255, 255, 255)"&&W$e(n)&&!/rgba\(\d*, \d*, \d*, 0\)/.test(n)&&n!=="transparent"&&(this.csp&&this.csp.nonce&&(Li.nonce=this.csp.nonce),a.style.borderColor=n,Li.innerHTML=`
        [ant-click-animating-without-extra-node='true']::after, .ant-click-animating-node {
          --antd-wave-shadow-color: `.concat(n,`;
        }`),document.body.contains(Li)||document.body.appendChild(Li)),r&&t.appendChild(a),si.addStartEventListener(t,this.onTransitionStart),si.addEndEventListener(t,this.onTransitionEnd)}},onTransitionStart:function(t){if(!this._.isUnmounted){var n=Sn(this);!t||t.target!==n||this.animationStart||this.resetEffect(n)}},onTransitionEnd:function(t){!t||t.animationName!=="fadeEffect"||this.resetEffect(t.target)},getAttributeName:function(){var t=this.$props.insertExtraNode;return t?"ant-click-animating":"ant-click-animating-without-extra-node"},bindAnimationEvent:function(t){var n=this;if(!(!t||!t.getAttribute||t.getAttribute("disabled")||t.className.indexOf("disabled")>=0)){var r=function(o){if(!(o.target.tagName==="INPUT"||d3(o.target))){n.resetEffect(t);var i=getComputedStyle(t).getPropertyValue("border-top-color")||getComputedStyle(t).getPropertyValue("border-color")||getComputedStyle(t).getPropertyValue("background-color");n.clickWaveTimeoutId=window.setTimeout(function(){return n.onClick(t,i)},0),en.cancel(n.animationStartId),n.animationStart=!0,n.animationStartId=en(function(){n.animationStart=!1},10)}};return t.addEventListener("click",r,!0),{cancel:function(){t.removeEventListener("click",r,!0)}}}},resetEffect:function(t){if(!(!t||t===this.extraNode||!(t instanceof Element))){var n=this.$props.insertExtraNode,r=this.getAttributeName();t.setAttribute(r,"false"),Li&&(Li.innerHTML=""),n&&this.extraNode&&t.contains(this.extraNode)&&t.removeChild(this.extraNode),si.removeStartEventListener(t,this.onTransitionStart),si.removeEndEventListener(t,this.onTransitionEnd)}}},render:function(){var t,n,r=this.configProvider.csp;return r&&(this.csp=r),(n=(t=this.$slots).default)===null||n===void 0?void 0:n.call(t)[0]}}),U$e=rt("default","primary","ghost","dashed","link","text"),Y$e=rt("circle","round"),q$e=rt("submit","button","reset");function w2(e){return e==="danger"?{danger:!0}:{type:e}}var G$e=function(){return{prefixCls:u.string,type:u.oneOf(U$e),htmlType:u.oneOf(q$e).def("button"),shape:u.oneOf(Y$e),size:{type:String},loading:{type:[Boolean,Object],default:function(){return!1}},disabled:u.looseBool,ghost:u.looseBool,block:u.looseBool,danger:u.looseBool,icon:u.VNodeChild,href:u.string,target:u.string,title:u.string,onClick:{type:Function}}},Hv=G$e,uf=function(e,t,n){Ua(e,"[ant-design-vue: ".concat(t,"] ").concat(n))},f3=/^[\u4e00-\u9fa5]{2}$/,h3=f3.test.bind(f3),X$e=Hv();function Oh(e){return e==="text"||e==="link"}var Cr=G({name:"AButton",inheritAttrs:!1,__ANT_BUTTON:!0,props:X$e,slots:["icon"],emits:["click"],setup:function(t,n){var r=n.slots,a=n.attrs,o=n.emit,i=Wt("btn",t),l=i.prefixCls,s=i.autoInsertSpaceInButton,c=i.direction,d=H(null),f=H(void 0),p=!1,v=H(!1),m=H(!1),y=x(function(){return s.value!==!1}),b=x(function(){return kt(t.loading)==="object"&&t.loading.delay?t.loading.delay||!0:!!t.loading});ce(b,function($){clearTimeout(f.value),typeof b.value=="number"?f.value=window.setTimeout(function(){v.value=$},b.value):v.value=$},{immediate:!0});var C=x(function(){var $,O=t.type,T=t.shape,_=t.size,I=t.ghost,L=t.block,j=t.danger,F=l.value,N="";switch(_){case"large":N="lg";break;case"small":N="sm";break}return $={},V($,"".concat(F),!0),V($,"".concat(F,"-").concat(O),O),V($,"".concat(F,"-").concat(T),T),V($,"".concat(F,"-").concat(N),N),V($,"".concat(F,"-loading"),v.value),V($,"".concat(F,"-background-ghost"),I&&!Oh(O)),V($,"".concat(F,"-two-chinese-chars"),m.value&&y.value),V($,"".concat(F,"-block"),L),V($,"".concat(F,"-dangerous"),!!j),V($,"".concat(F,"-rtl"),c.value==="rtl"),$}),S=function(){var O=d.value;if(!(!O||s.value===!1)){var T=O.textContent;p&&h3(T)?m.value||(m.value=!0):m.value&&(m.value=!1)}},w=function(O){if(v.value||t.disabled){O.preventDefault();return}o("click",O)},k=function(O,T){var _=T?" ":"";if(O.type===Fo){var I=O.children.trim();return h3(I)&&(I=I.split("").join(_)),g("span",null,[I])}return O};return Wn(function(){uf(!(t.ghost&&Oh(t.type)),"Button","`link` or `text` button can't be a `ghost` button.")}),et(S),ur(S),Lt(function(){f.value&&clearTimeout(f.value)}),function(){var $=Un(jn(r,t)),O=jn(r,t,"icon");p=$.length===1&&!O&&!Oh(t.type);var T=t.type,_=t.htmlType,I=t.disabled,L=t.href,j=t.title,F=t.target,N=v.value?"loading":O,D=P(P({},a),{title:j,disabled:I,class:[C.value,a.class,V({},"".concat(l.value,"-icon-only"),$.length===0&&!!N)],onClick:w}),z=v.value?g(co,null,null):O,B=$.map(function(E){return k(E,p&&y.value)});if(L!==void 0)return g("a",le(le({},D),{},{href:L,target:F,ref:d}),[z,B]);var M=g("button",le(le({},D),{},{ref:d,type:_}),[z,B]);return Oh(T)?M:g(C2,{ref:"wave"},{default:function(){return[M]}})}}}),Z$e={prefixCls:u.string,size:{type:String}},jv=G({name:"AButtonGroup",props:Z$e,setup:function(t,n){var r=n.slots,a=Wt("btn-group",t),o=a.prefixCls,i=a.direction,l=x(function(){var s,c=t.size,d="";switch(c){case"large":d="lg";break;case"small":d="sm";break}return s={},V(s,"".concat(o.value),!0),V(s,"".concat(o.value,"-").concat(d),d),V(s,"".concat(o.value,"-rtl"),i.value==="rtl"),s});return function(){var s;return g("div",{class:l.value},[Un((s=r.default)===null||s===void 0?void 0:s.call(r))])}}});Cr.Group=jv;Cr.install=function(e){return e.component(Cr.name,Cr),e.component(jv.name,jv),e};var J$e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},DA=G({name:"AInputSearch",inheritAttrs:!1,props:P(P({},Vf),{enterButton:u.VNodeChild,onSearch:u.func}),setup:function(){return{configProvider:ve("configProvider",St),input:null}},methods:{saveInput:function(t){this.input=t},handleChange:function(t){this.$emit("update:value",t.target.value),t&&t.target&&t.type==="click"&&this.$emit("search",t.target.value,t),this.$emit("change",t)},handleSearch:function(t){this.loading||this.disabled||(this.$emit("search",this.input.stateValue,t),H$e.tablet||this.input.focus())},focus:function(){this.input.focus()},blur:function(){this.input.blur()},renderLoading:function(t){var n=this.$props.size,r=We(this,"enterButton");return r=r||r==="",r?g(Cr,{class:"".concat(t,"-button"),type:"primary",size:n,key:"enterButton"},{default:function(){return[g(co,null,null)]}}):g(co,{class:"".concat(t,"-icon"),key:"loadingIcon"},null)},renderSuffix:function(t){var n=this.loading,r=We(this,"suffix"),a=We(this,"enterButton");if(a=a||a==="",n&&!a)return[r,this.renderLoading(t)];if(a)return r;var o=g(Vv,{class:"".concat(t,"-icon"),key:"searchIcon",onClick:this.handleSearch},null);return r?[r,o]:o},renderAddonAfter:function(t){var n=this.size,r=this.disabled,a=this.loading,o="".concat(t,"-button"),i=We(this,"enterButton");i=i||i==="";var l=We(this,"addonAfter");if(a&&i)return[this.renderLoading(t),l];if(!i)return l;var s=Array.isArray(i)?i[0]:i,c,d=s.type&&bc(s.type)&&s.type.__ANT_BUTTON;return s.tagName==="button"||d?c=Ot(s,P(P({key:"enterButton",class:d?o:""},d?{size:n}:{}),{onClick:this.handleSearch})):c=g(Cr,{class:o,type:"primary",size:n,disabled:r,key:"enterButton",onClick:this.handleSearch},{default:function(){return[i===!0||i===""?g(Vv,null,null):i]}}),l?[c,l]:c}},render:function(){var t=P(P({},Qe(this)),this.$attrs),n=t.prefixCls,r=t.inputPrefixCls,a=t.size,o=t.class,i=J$e(t,["prefixCls","inputPrefixCls","size","class"]);delete i.onSearch,delete i.loading,delete i.enterButton,delete i.addonBefore,delete i["onUpdate:value"];var l=this.configProvider.getPrefixCls,s=l("input-search",n),c=l("input",r),d=We(this,"enterButton"),f=We(this,"addonBefore");d=d||d==="";var p;if(d){var v;p=Se(s,o,(v={},V(v,"".concat(s,"-enter-button"),!!d),V(v,"".concat(s,"-").concat(a),!!a),v))}else p=Se(s,o);var m=P(P({},i),{prefixCls:c,size:a,suffix:this.renderSuffix(s),prefix:We(this,"prefix"),addonAfter:this.renderAddonAfter(s),addonBefore:f,class:p,onPressEnter:this.handleSearch,onChange:this.handleChange});return g(Jn,le(le({},m),{},{ref:this.saveInput}),null)}}),Q$e=`
  min-height:0 !important;
  max-height:none !important;
  height:0 !important;
  visibility:hidden !important;
  overflow:hidden !important;
  position:absolute !important;
  z-index:-1000 !important;
  top:0 !important;
  right:0 !important
`,e4e=["letter-spacing","line-height","padding-top","padding-bottom","font-family","font-weight","font-size","font-variant","text-rendering","text-transform","width","text-indent","padding-left","padding-right","border-width","box-sizing"],Wy={},Co;function t4e(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1,n=e.getAttribute("id")||e.getAttribute("data-reactid")||e.getAttribute("name");if(t&&Wy[n])return Wy[n];var r=window.getComputedStyle(e),a=r.getPropertyValue("box-sizing")||r.getPropertyValue("-moz-box-sizing")||r.getPropertyValue("-webkit-box-sizing"),o=parseFloat(r.getPropertyValue("padding-bottom"))+parseFloat(r.getPropertyValue("padding-top")),i=parseFloat(r.getPropertyValue("border-bottom-width"))+parseFloat(r.getPropertyValue("border-top-width")),l=e4e.map(function(c){return"".concat(c,":").concat(r.getPropertyValue(c))}).join(";"),s={sizingStyle:l,paddingSize:o,borderSize:i,boxSizing:a};return t&&n&&(Wy[n]=s),s}function n4e(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1,n=arguments.length>2&&arguments[2]!==void 0?arguments[2]:null,r=arguments.length>3&&arguments[3]!==void 0?arguments[3]:null;Co||(Co=document.createElement("textarea"),document.body.appendChild(Co)),e.getAttribute("wrap")?Co.setAttribute("wrap",e.getAttribute("wrap")):Co.removeAttribute("wrap");var a=t4e(e,t),o=a.paddingSize,i=a.borderSize,l=a.boxSizing,s=a.sizingStyle;Co.setAttribute("style","".concat(s,";").concat(Q$e)),Co.value=e.value||e.placeholder||"";var c=Number.MIN_SAFE_INTEGER,d=Number.MAX_SAFE_INTEGER,f=Co.scrollHeight,p;if(l==="border-box"?f+=i:l==="content-box"&&(f-=o),n!==null||r!==null){Co.value=" ";var v=Co.scrollHeight-o;n!==null&&(c=v*n,l==="border-box"&&(c=c+o+i),f=Math.max(c,f)),r!==null&&(d=v*r,l==="border-box"&&(d=d+o+i),p=f>d?"":"hidden",f=Math.min(d,f))}return{height:"".concat(f,"px"),minHeight:"".concat(c,"px"),maxHeight:"".concat(d,"px"),overflowY:p,resize:"none"}}var Uy=0,p3=1,r4e=2,a4e=P(P({},Vf),{autosize:{type:[Boolean,Object],default:void 0},autoSize:{type:[Boolean,Object],default:void 0},onResize:u.func}),o4e=G({name:"ResizableTextArea",mixins:[nt],inheritAttrs:!1,props:a4e,setup:function(){return{nextFrameActionId:void 0,textArea:null,resizeFrameId:void 0}},data:function(){return{textareaStyles:{},resizeStatus:Uy}},watch:{value:function(){var t=this;Ne(function(){t.resizeTextarea()})}},mounted:function(){this.resizeTextarea()},beforeUnmount:function(){en.cancel(this.nextFrameActionId),en.cancel(this.resizeFrameId)},methods:{saveTextArea:function(t){this.textArea=t},handleResize:function(t){var n=this.$data.resizeStatus;n===Uy&&this.$emit("resize",t)},resizeOnNextFrame:function(){en.cancel(this.nextFrameActionId),this.nextFrameActionId=en(this.resizeTextarea)},resizeTextarea:function(){var t=this,n=this.$props.autoSize||this.$props.autosize;if(!(!n||!this.textArea)){var r=n.minRows,a=n.maxRows,o=n4e(this.textArea,!1,r,a);this.setState({textareaStyles:o,resizeStatus:p3},function(){en.cancel(t.resizeFrameId),t.resizeFrameId=en(function(){t.setState({resizeStatus:r4e},function(){t.resizeFrameId=en(function(){t.setState({resizeStatus:Uy}),t.fixFirefoxAutoScroll()})})})})}},fixFirefoxAutoScroll:function(){try{if(document.activeElement===this.textArea){var t=this.textArea.selectionStart,n=this.textArea.selectionEnd;this.textArea.setSelectionRange(t,n)}}catch{}},renderTextArea:function(){var t=this,n=P(P({},Qe(this)),this.$attrs),r=n.prefixCls,a=n.autoSize,o=n.autosize,i=n.disabled,l=n.class,s=this.$data,c=s.textareaStyles,d=s.resizeStatus;on(o===void 0,"Input.TextArea","autosize is deprecated, please use autoSize instead.");var f=bn(n,["prefixCls","onPressEnter","autoSize","autosize","defaultValue","allowClear","type","lazy"]),p=Se(r,l,V({},"".concat(r,"-disabled"),i));"value"in f&&(f.value=f.value||"");var v=P(P(P({},n.style),c),d===p3?{overflowX:"hidden",overflowY:"hidden"}:null),m=P(P({},f),{style:v,class:p});return m.autofocus||delete m.autofocus,g(zo,{onResize:this.handleResize,disabled:!(a||o)},{default:function(){return[at(g("textarea",le(le({},m),{},{ref:t.saveTextArea}),null),[[Mi]])]}})}},render:function(){return this.renderTextArea()}}),i4e=o4e,l4e=P(P({},Vf),{autosize:an(u.oneOfType([Object,Boolean])),autoSize:an(u.oneOfType([Object,Boolean])),showCount:u.looseBool,onCompositionstart:u.func,onCompositionend:u.func}),S2=G({name:"ATextarea",inheritAttrs:!1,props:P({},l4e),setup:function(){return{configProvider:ve("configProvider",St),resizableTextArea:null,clearableInput:null}},data:function(){var t=typeof this.value=="undefined"?this.defaultValue:this.value;return{stateValue:typeof t=="undefined"?"":t}},watch:{value:function(t){this.stateValue=t}},mounted:function(){Ne(function(){})},methods:{setValue:function(t,n){vt(this,"value")?this.$forceUpdate():this.stateValue=t,Ne(function(){n&&n()})},handleKeyDown:function(t){t.keyCode===13&&this.$emit("pressEnter",t),this.$emit("keydown",t)},triggerChange:function(t){this.$emit("update:value",t.target.value),this.$emit("change",t),this.$emit("input",t)},handleChange:function(t){var n=this,r=t.target,a=r.value,o=r.composing,i=r.isComposing;(i||o)&&this.lazy||this.stateValue===a||(this.setValue(t.target.value,function(){var l;(l=n.resizableTextArea)===null||l===void 0||l.resizeTextarea()}),zv(this.resizableTextArea.textArea,t,this.triggerChange))},focus:function(){this.resizableTextArea.textArea.focus()},blur:function(){this.resizableTextArea.textArea.blur()},saveTextArea:function(t){this.resizableTextArea=t},saveClearableInput:function(t){this.clearableInput=t},handleReset:function(t){var n=this;this.setValue("",function(){n.resizableTextArea.renderTextArea(),n.focus()}),zv(this.resizableTextArea.textArea,t,this.triggerChange)},renderTextArea:function(t){var n=Qe(this),r=this.$attrs,a=r.style,o=r.class,i=P(P(P({},n),this.$attrs),{style:!n.showCount&&a,class:!n.showCount&&o,showCount:null,prefixCls:t,onInput:this.handleChange,onChange:this.handleChange,onKeydown:this.handleKeyDown});return g(i4e,le(le({},i),{},{ref:this.saveTextArea}),null)}},render:function(){var t=this.stateValue,n=this.prefixCls,r=this.maxlength,a=this.showCount,o=this.$attrs,i=o.style,l=o.class,s=this.configProvider.getPrefixCls,c=s("input",n),d=IA(t),f=Number(r)>0;d=f?d.slice(0,r):d;var p=P(P(P({},Qe(this)),this.$attrs),{prefixCls:c,inputType:"text",element:this.renderTextArea(c),handleReset:this.handleReset}),v=g(MA,le(le({},p),{},{value:d,ref:this.saveClearableInput}),null);if(a){var m=Je(d).length,y="".concat(m).concat(f?" / ".concat(r):"");v=g("div",{class:Se("".concat(c,"-textarea"),"".concat(c,"-textarea-show-count"),l),style:i,"data-count":y},[v])}return v}}),s4e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M942.2 486.2C847.4 286.5 704.1 186 512 186c-192.2 0-335.4 100.5-430.2 300.3a60.3 60.3 0 000 51.5C176.6 737.5 319.9 838 512 838c192.2 0 335.4-100.5 430.2-300.3 7.7-16.2 7.7-35 0-51.5zM512 766c-161.3 0-279.4-81.8-362.7-254C232.6 339.8 350.7 258 512 258c161.3 0 279.4 81.8 362.7 254C791.5 684.2 673.4 766 512 766zm-4-430c-97.2 0-176 78.8-176 176s78.8 176 176 176 176-78.8 176-176-78.8-176-176-176zm0 288c-61.9 0-112-50.1-112-112s50.1-112 112-112 112 50.1 112 112-50.1 112-112 112z"}}]},name:"eye",theme:"outlined"},u4e=s4e;function v3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){c4e(e,a,n[a])})}return e}function c4e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var k2=function(t,n){var r=v3({},t,n.attrs);return g(Et,v3({},r,{icon:u4e}),null)};k2.displayName="EyeOutlined";k2.inheritAttrs=!1;var RA=k2,d4e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M942.2 486.2Q889.47 375.11 816.7 305l-50.88 50.88C807.31 395.53 843.45 447.4 874.7 512 791.5 684.2 673.4 766 512 766q-72.67 0-133.87-22.38L323 798.75Q408 838 512 838q288.3 0 430.2-300.3a60.29 60.29 0 000-51.5zm-63.57-320.64L836 122.88a8 8 0 00-11.32 0L715.31 232.2Q624.86 186 512 186q-288.3 0-430.2 300.3a60.3 60.3 0 000 51.5q56.69 119.4 136.5 191.41L112.48 835a8 8 0 000 11.31L155.17 889a8 8 0 0011.31 0l712.15-712.12a8 8 0 000-11.32zM149.3 512C232.6 339.8 350.7 258 512 258c54.54 0 104.13 9.36 149.12 28.39l-70.3 70.3a176 176 0 00-238.13 238.13l-83.42 83.42C223.1 637.49 183.3 582.28 149.3 512zm246.7 0a112.11 112.11 0 01146.2-106.69L401.31 546.2A112 112 0 01396 512z"}},{tag:"path",attrs:{d:"M508 624c-3.46 0-6.87-.16-10.25-.47l-52.82 52.82a176.09 176.09 0 00227.42-227.42l-52.82 52.82c.31 3.38.47 6.79.47 10.25a111.94 111.94 0 01-112 112z"}}]},name:"eye-invisible",theme:"outlined"},f4e=d4e;function m3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){h4e(e,a,n[a])})}return e}function h4e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var $2=function(t,n){var r=m3({},t,n.attrs);return g(Et,m3({},r,{icon:f4e}),null)};$2.displayName="EyeInvisibleOutlined";$2.inheritAttrs=!1;var p4e=$2,v4e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},m4e={click:"onClick",hover:"onMouseover"},LA=G({name:"AInputPassword",mixins:[nt],inheritAttrs:!1,props:P(P({},Vf),{prefixCls:u.string,inputPrefixCls:u.string,action:u.string.def("click"),visibilityToggle:u.looseBool.def(!0),iconRender:u.func.def(function(e){return g(e?RA:p4e,null,null)})}),setup:function(){return{input:null,configProvider:ve("configProvider",St)}},data:function(){return{visible:!1}},methods:{saveInput:function(t){this.input=t},focus:function(){this.input.focus()},blur:function(){this.input.blur()},onVisibleChange:function(){this.disabled||this.setState({visible:!this.visible})},getIcon:function(t){var n,r=this.$props.action,a=m4e[r]||"",o=this.$slots.iconRender||this.$props.iconRender,i=o(this.visible),l=(n={},V(n,a,this.onVisibleChange),V(n,"onMousedown",function(c){c.preventDefault()}),V(n,"onMouseup",function(c){c.preventDefault()}),V(n,"class","".concat(t,"-icon")),V(n,"key","passwordIcon"),n);return Ot(i,l)}},render:function(){var t=Qe(this),n=t.prefixCls,r=t.inputPrefixCls,a=t.size;t.suffix,t.action;var o=t.visibilityToggle;t.iconRender;var i=v4e(t,["prefixCls","inputPrefixCls","size","suffix","action","visibilityToggle","iconRender"]),l=this.$attrs.class,s=this.configProvider.getPrefixCls,c=s("input",r),d=s("input-password",n),f=o&&this.getIcon(d),p=Se(d,l,V({},"".concat(d,"-").concat(a),!!a)),v=P(P(P(P({},i),{prefixCls:c,size:a,suffix:f,prefix:We(this,"prefix"),addonAfter:We(this,"addonAfter"),addonBefore:We(this,"addonBefore")}),this.$attrs),{type:this.visible?"text":"password",class:p,ref:"input"});return g(Jn,le(le({},v),{},{ref:this.saveInput}),null)}});Jn.Group=AA;Jn.Search=DA;Jn.TextArea=S2;Jn.Password=LA;Jn.install=function(e){return e.component(Jn.name,Jn),e.component(Jn.Group.name,Jn.Group),e.component(Jn.Search.name,Jn.Search),e.component(Jn.TextArea.name,Jn.TextArea),e.component(Jn.Password.name,Jn.Password),e};var O2=function(){return null};O2.isSelectOption=!0;O2.displayName="AAutoCompleteOption";var jp=O2,P2=function(){return null};P2.isSelectOptGroup=!0;P2.displayName="AAutoCompleteOptGroup";var FA=P2;function g4e(e){var t,n;return((t=e==null?void 0:e.type)===null||t===void 0?void 0:t.isSelectOption)||((n=e==null?void 0:e.type)===null||n===void 0?void 0:n.isSelectOptGroup)}var y4e=P(P({},Sg()),{dataSource:u.array,dropdownMenuStyle:u.style,optionLabelProp:u.string,dropdownMatchSelectWidth:u.looseBool}),b4e=jp,C4e=FA,Hi=G({name:"AAutoComplete",inheritAttrs:!1,props:P(P({},y4e),{prefixCls:u.string,showSearch:u.looseBool,transitionName:u.string.def("slide-up"),choiceTransitionName:u.string.def("zoom"),autofocus:u.looseBool,backfill:u.looseBool,optionLabelProp:u.string.def("children"),filterOption:u.oneOfType([u.looseBool,u.func]).def(!1),defaultActiveFirstOption:u.looseBool.def(!0)}),emits:["change","select","focus","blur"],Option:jp,OptGroup:FA,setup:function(t,n){var r=n.slots;return on(!(t.dataSource!==void 0||"dataSource"in r),"AutoComplete","`dataSource` is deprecated, please use `options` instead."),{configProvider:ve("configProvider",St),popupRef:null,select:null}},created:function(){ot("savePopupRef",this.savePopupRef)},methods:{savePopupRef:function(t){this.popupRef=t},saveSelect:function(t){this.select=t},getInputElement:function(){var t=ht(this),n=t.length?t[0]:g(Jn,{lazy:!1},null);return n},focus:function(){this.select&&this.select.focus()},blur:function(){this.select&&this.select.blur()}},render:function(){var t,n=this.size,r=this.prefixCls,a=this.dataSource,o,i=this.configProvider.getPrefixCls,l=i("select",r),s=this.$attrs.class,c=(t={},V(t,s,!!s),V(t,"".concat(l,"-lg"),n==="large"),V(t,"".concat(l,"-sm"),n==="small"),V(t,"".concat(l,"-show-search"),!0),V(t,"".concat(l,"-auto-complete"),!0),t),d=ht(this,"dataSource");"options"in this.$slots&&(d=ht(this,"options")),d.length&&g4e(d[0])?o=d:o=a?a.map(function(p){if(zn(p))return p;switch(kt(p)){case"string":return g(jp,{key:p,value:p},{default:function(){return[p]}});case"object":return g(jp,{key:p.value,value:p.value},{default:function(){return[p.text]}});default:throw new Error("AutoComplete[dataSource] only supports type `string[] | Object[]`.")}}):[];var f=P(P(P({},bn(Qe(this),["dataSource","optionLabelProp"])),this.$attrs),{mode:dl.SECRET_COMBOBOX_MODE_DO_NOT_USE,getInputElement:this.getInputElement,notFoundContent:We(this,"notFoundContent"),class:c,ref:this.saveSelect});return g(dl,f,{default:function(){return[o]}})}});Hi.install=function(e){return e.component(Hi.name,Hi),e.component(Hi.Option.displayName,Hi.Option),e.component(Hi.OptGroup.displayName,Hi.OptGroup),e};var w4e=Hi,S4e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M699 353h-46.9c-10.2 0-19.9 4.9-25.9 13.3L469 584.3l-71.2-98.8c-6-8.3-15.6-13.3-25.9-13.3H325c-6.5 0-10.3 7.4-6.5 12.7l124.6 172.8a31.8 31.8 0 0051.7 0l210.6-292c3.9-5.3.1-12.7-6.4-12.7z"}},{tag:"path",attrs:{d:"M512 64C264.6 64 64 264.6 64 512s200.6 448 448 448 448-200.6 448-448S759.4 64 512 64zm0 820c-205.4 0-372-166.6-372-372s166.6-372 372-372 372 166.6 372 372-166.6 372-372 372z"}}]},name:"check-circle",theme:"outlined"},k4e=S4e;function g3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){$4e(e,a,n[a])})}return e}function $4e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var T2=function(t,n){var r=g3({},t,n.attrs);return g(Et,g3({},r,{icon:k4e}),null)};T2.displayName="CheckCircleOutlined";T2.inheritAttrs=!1;var x2=T2,O4e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64C264.6 64 64 264.6 64 512s200.6 448 448 448 448-200.6 448-448S759.4 64 512 64zm0 820c-205.4 0-372-166.6-372-372s166.6-372 372-372 372 166.6 372 372-166.6 372-372 372z"}},{tag:"path",attrs:{d:"M464 688a48 48 0 1096 0 48 48 0 10-96 0zm24-112h48c4.4 0 8-3.6 8-8V296c0-4.4-3.6-8-8-8h-48c-4.4 0-8 3.6-8 8v272c0 4.4 3.6 8 8 8z"}}]},name:"exclamation-circle",theme:"outlined"},P4e=O4e;function y3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){T4e(e,a,n[a])})}return e}function T4e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var _2=function(t,n){var r=y3({},t,n.attrs);return g(Et,y3({},r,{icon:P4e}),null)};_2.displayName="ExclamationCircleOutlined";_2.inheritAttrs=!1;var E2=_2,x4e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64C264.6 64 64 264.6 64 512s200.6 448 448 448 448-200.6 448-448S759.4 64 512 64zm0 820c-205.4 0-372-166.6-372-372s166.6-372 372-372 372 166.6 372 372-166.6 372-372 372z"}},{tag:"path",attrs:{d:"M464 336a48 48 0 1096 0 48 48 0 10-96 0zm72 112h-48c-4.4 0-8 3.6-8 8v272c0 4.4 3.6 8 8 8h48c4.4 0 8-3.6 8-8V456c0-4.4-3.6-8-8-8z"}}]},name:"info-circle",theme:"outlined"},_4e=x4e;function b3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){E4e(e,a,n[a])})}return e}function E4e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var M2=function(t,n){var r=b3({},t,n.attrs);return g(Et,b3({},r,{icon:_4e}),null)};M2.displayName="InfoCircleOutlined";M2.inheritAttrs=!1;var I2=M2,M4e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M685.4 354.8c0-4.4-3.6-8-8-8l-66 .3L512 465.6l-99.3-118.4-66.1-.3c-4.4 0-8 3.5-8 8 0 1.9.7 3.7 1.9 5.2l130.1 155L340.5 670a8.32 8.32 0 00-1.9 5.2c0 4.4 3.6 8 8 8l66.1-.3L512 564.4l99.3 118.4 66 .3c4.4 0 8-3.5 8-8 0-1.9-.7-3.7-1.9-5.2L553.5 515l130.1-155c1.2-1.4 1.8-3.3 1.8-5.2z"}},{tag:"path",attrs:{d:"M512 65C264.6 65 64 265.6 64 513s200.6 448 448 448 448-200.6 448-448S759.4 65 512 65zm0 820c-205.4 0-372-166.6-372-372s166.6-372 372-372 372 166.6 372 372-166.6 372-372 372z"}}]},name:"close-circle",theme:"outlined"},I4e=M4e;function C3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){N4e(e,a,n[a])})}return e}function N4e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var N2=function(t,n){var r=C3({},t,n.attrs);return g(Et,C3({},r,{icon:I4e}),null)};N2.displayName="CloseCircleOutlined";N2.inheritAttrs=!1;var A2=N2,A4e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64C264.6 64 64 264.6 64 512s200.6 448 448 448 448-200.6 448-448S759.4 64 512 64zm193.5 301.7l-210.6 292a31.8 31.8 0 01-51.7 0L318.5 484.9c-3.8-5.3 0-12.7 6.5-12.7h46.9c10.2 0 19.9 4.9 25.9 13.3l71.2 98.8 157.2-218c6-8.3 15.6-13.3 25.9-13.3H699c6.5 0 10.3 7.4 6.5 12.7z"}}]},name:"check-circle",theme:"filled"},D4e=A4e;function w3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){R4e(e,a,n[a])})}return e}function R4e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var D2=function(t,n){var r=w3({},t,n.attrs);return g(Et,w3({},r,{icon:D4e}),null)};D2.displayName="CheckCircleFilled";D2.inheritAttrs=!1;var zf=D2,L4e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64C264.6 64 64 264.6 64 512s200.6 448 448 448 448-200.6 448-448S759.4 64 512 64zm-32 232c0-4.4 3.6-8 8-8h48c4.4 0 8 3.6 8 8v272c0 4.4-3.6 8-8 8h-48c-4.4 0-8-3.6-8-8V296zm32 440a48.01 48.01 0 010-96 48.01 48.01 0 010 96z"}}]},name:"exclamation-circle",theme:"filled"},F4e=L4e;function S3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){B4e(e,a,n[a])})}return e}function B4e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var R2=function(t,n){var r=S3({},t,n.attrs);return g(Et,S3({},r,{icon:F4e}),null)};R2.displayName="ExclamationCircleFilled";R2.inheritAttrs=!1;var Hf=R2,V4e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64C264.6 64 64 264.6 64 512s200.6 448 448 448 448-200.6 448-448S759.4 64 512 64zm32 664c0 4.4-3.6 8-8 8h-48c-4.4 0-8-3.6-8-8V456c0-4.4 3.6-8 8-8h48c4.4 0 8 3.6 8 8v272zm-32-344a48.01 48.01 0 010-96 48.01 48.01 0 010 96z"}}]},name:"info-circle",theme:"filled"},z4e=V4e;function k3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){H4e(e,a,n[a])})}return e}function H4e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var L2=function(t,n){var r=k3({},t,n.attrs);return g(Et,k3({},r,{icon:z4e}),null)};L2.displayName="InfoCircleFilled";L2.inheritAttrs=!1;var BA=L2;function j4e(){}var K4e={success:zf,info:BA,error:Yr,warning:Hf},W4e={success:x2,info:I2,error:A2,warning:E2},U4e=rt("success","info","warning","error"),Y4e={type:u.oneOf(U4e),closable:u.looseBool,closeText:u.VNodeChild,message:u.VNodeChild,description:u.VNodeChild,afterClose:u.func.def(j4e),showIcon:u.looseBool,prefixCls:u.string,banner:u.looseBool,icon:u.VNodeChild,onClose:u.VNodeChild},q4e=G({name:"AAlert",inheritAttrs:!1,props:Y4e,emits:["close"],setup:function(t,n){var r=n.slots,a=n.emit,o=n.attrs,i=n.expose,l=ve("configProvider",St),s=H(!1),c=H(!1),d=H(),f=function(m){m.preventDefault();var y=d.value;y.style.height="".concat(y.offsetHeight,"px"),y.style.height="".concat(y.offsetHeight,"px"),s.value=!0,a("close",m)},p=function(){var m;s.value=!1,c.value=!0,(m=t.afterClose)===null||m===void 0||m.call(t)};return i({animationEnd:p}),function(){var v,m=t.prefixCls,y=t.banner,b=l.getPrefixCls,C=b("alert",m),S=t.closable,w=t.type,k=t.showIcon,$=jn(r,t,"closeText"),O=jn(r,t,"description"),T=jn(r,t,"message"),_=jn(r,t,"icon");k=y&&k===void 0?!0:k,w=y&&w===void 0?"warning":w||"info";var I=(O?W4e:K4e)[w]||null;$&&(S=!0);var L=Se(C,(v={},V(v,"".concat(C,"-").concat(w),!0),V(v,"".concat(C,"-closing"),s.value),V(v,"".concat(C,"-with-description"),!!O),V(v,"".concat(C,"-no-icon"),!k),V(v,"".concat(C,"-banner"),!!y),V(v,"".concat(C,"-closable"),S),v)),j=S?g("button",{type:"button",onClick:f,class:"".concat(C,"-close-icon"),tabindex:0},[$?g("span",{class:"".concat(C,"-close-text")},[$]):g(go,null,null)]):null,F=_&&(zn(_)?hr(_,{class:"".concat(C,"-icon")}):g("span",{class:"".concat(C,"-icon")},[_]))||g(I,{class:"".concat(C,"-icon")},null),N=Lo("".concat(C,"-slide-up"),{appear:!1,onAfterLeave:p});return c.value?null:g(no,N,{default:function(){return[at(g("div",le(le({},o),{},{class:[o.class,L],"data-show":!s.value,ref:d}),[k?F:null,g("div",{class:"".concat(C,"-content")},[g("div",{class:"".concat(C,"-message")},[T]),g("div",{class:"".concat(C,"-description")},[O])]),j]),[[_t,!s.value]])]}})}}}),G4e=kn(q4e),ws=["xxl","xl","lg","md","sm","xs"],Ph={xs:"(max-width: 575px)",sm:"(min-width: 576px)",md:"(min-width: 768px)",lg:"(min-width: 992px)",xl:"(min-width: 1200px)",xxl:"(min-width: 1600px)"},Tl=new Map,Yy=-1,Th={},X4e={matchHandlers:{},dispatch:function(t){return Th=t,Tl.forEach(function(n){return n(Th)}),Tl.size>=1},subscribe:function(t){return Tl.size||this.register(),Yy+=1,Tl.set(Yy,t),t(Th),Yy},unsubscribe:function(t){Tl.delete(t),Tl.size||this.unregister()},unregister:function(){var t=this;Object.keys(Ph).forEach(function(n){var r=Ph[n],a=t.matchHandlers[r];a==null||a.mql.removeListener(a==null?void 0:a.listener)}),Tl.clear()},register:function(){var t=this;Object.keys(Ph).forEach(function(n){var r=Ph[n],a=function(l){var s=l.matches;t.dispatch(P(P({},Th),V({},n,s)))},o=window.matchMedia(r);o.addListener(a),t.matchHandlers[r]={mql:o,listener:a},a(o)})}},lc=X4e;function F2(){var e=H({}),t=null;return et(function(){t=lc.subscribe(function(n){e.value=n})}),Wr(function(){lc.unsubscribe(t)}),e}var VA=Symbol("SizeProvider"),Z4e=function(t){var n=ve("configProvider",St),r=x(function(){return t.size||n.componentSize});return ot(VA,r),r},zA=function(t){var n=t?x(function(){return t.size}):ve(VA,x(function(){return"default"}));return n},J4e=Z4e,HA={prefixCls:u.string,shape:u.oneOf(rt("circle","square")).def("circle"),size:{type:[Number,String,Object],default:function(){return"default"}},src:u.string,srcset:u.string,icon:u.VNodeChild,alt:u.string,gap:u.number,draggable:u.bool,loadError:{type:Function}},Q4e=G({name:"AAvatar",inheritAttrs:!1,props:HA,slots:["icon"],setup:function(t,n){var r=n.slots,a=n.attrs,o=H(!0),i=H(!1),l=H(1),s=H(null),c=H(null),d=Wt("avatar",t),f=d.prefixCls,p=zA(),v=F2(),m=x(function(){if(kt(t.size)==="object"){var S=ws.find(function(k){return v.value[k]}),w=t.size[S];return w}}),y=function(w){return m.value?{width:"".concat(m.value,"px"),height:"".concat(m.value,"px"),lineHeight:"".concat(m.value,"px"),fontSize:"".concat(w?m.value/2:18,"px")}:{}},b=function(){if(!(!s.value||!c.value)){var w=s.value.offsetWidth,k=c.value.offsetWidth;if(w!==0&&k!==0){var $=t.gap,O=$===void 0?4:$;O*2<k&&(l.value=k-O*2<w?(k-O*2)/w:1)}}},C=function(){var w=t.loadError,k=w==null?void 0:w();k!==!1&&(o.value=!1)};return ce(function(){return t.src},function(){Ne(function(){o.value=!0,l.value=1})}),ce(function(){return t.gap},function(){Ne(function(){b()})}),et(function(){Ne(function(){b(),i.value=!0})}),function(){var S,w,k=t.shape,$=t.size,O=t.src,T=t.alt,_=t.srcset,I=t.draggable,L=jn(r,t,"icon"),j=f.value,F=$==="default"?p.value:$,N=(S={},V(S,"".concat(a.class),!!a.class),V(S,j,!0),V(S,"".concat(j,"-lg"),F==="large"),V(S,"".concat(j,"-sm"),F==="small"),V(S,"".concat(j,"-").concat(k),k),V(S,"".concat(j,"-image"),O&&o.value),V(S,"".concat(j,"-icon"),L),S),D=typeof F=="number"?{width:"".concat(F,"px"),height:"".concat(F,"px"),lineHeight:"".concat(F,"px"),fontSize:L?"".concat(F/2,"px"):"18px"}:{},z=(w=r.default)===null||w===void 0?void 0:w.call(r),B;if(O&&o.value)B=g("img",{draggable:I,src:O,srcset:_,onError:C,alt:T},null);else if(L)B=L;else if(i.value||l.value!==1){var M="scale(".concat(l.value,") translateX(-50%)"),E={msTransform:M,WebkitTransform:M,transform:M},K=typeof F=="number"?{lineHeight:"".concat(F,"px")}:{};B=g(zo,{onResize:b},{default:function(){return[g("span",{class:"".concat(j,"-string"),ref:s,style:P(P({},K),E)},[z])]}})}else B=g("span",{class:"".concat(j,"-string"),ref:s,style:{opacity:0}},[z]);return g("span",le(le({},a),{},{ref:c,class:N,style:P(P(P({},D),y(!!L)),a.style)}),[B])}}}),ss=Q4e,Ca={adjustX:1,adjustY:1},wa=[0,0],jA={left:{points:["cr","cl"],overflow:Ca,offset:[-4,0],targetOffset:wa},right:{points:["cl","cr"],overflow:Ca,offset:[4,0],targetOffset:wa},top:{points:["bc","tc"],overflow:Ca,offset:[0,-4],targetOffset:wa},bottom:{points:["tc","bc"],overflow:Ca,offset:[0,4],targetOffset:wa},topLeft:{points:["bl","tl"],overflow:Ca,offset:[0,-4],targetOffset:wa},leftTop:{points:["tr","tl"],overflow:Ca,offset:[-4,0],targetOffset:wa},topRight:{points:["br","tr"],overflow:Ca,offset:[0,-4],targetOffset:wa},rightTop:{points:["tl","tr"],overflow:Ca,offset:[4,0],targetOffset:wa},bottomRight:{points:["tr","br"],overflow:Ca,offset:[0,4],targetOffset:wa},rightBottom:{points:["bl","br"],overflow:Ca,offset:[4,0],targetOffset:wa},bottomLeft:{points:["tl","bl"],overflow:Ca,offset:[0,4],targetOffset:wa},leftBottom:{points:["br","bl"],overflow:Ca,offset:[-4,0],targetOffset:wa}},eOe={name:"Content",props:{prefixCls:u.string,overlay:u.any,trigger:u.any,overlayInnerStyle:u.any},updated:function(){var t=this.trigger;t&&t.forcePopupAlign()},render:function(){var t=this.overlay,n=this.prefixCls,r=this.overlayInnerStyle;return g("div",{class:"".concat(n,"-inner"),role:"tooltip",style:r},[typeof t=="function"?t():t])}},tOe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function $3(){}var nOe=G({name:"Tooltip",inheritAttrs:!1,props:{trigger:u.any.def(["hover"]),defaultVisible:u.looseBool,visible:u.looseBool,placement:u.string.def("right"),transitionName:u.oneOfType([u.string,u.object]),animation:u.any,afterVisibleChange:u.func.def(function(){}),overlay:u.any,overlayStyle:u.object,overlayClassName:u.string,prefixCls:u.string.def("rc-tooltip"),mouseEnterDelay:u.number.def(0),mouseLeaveDelay:u.number.def(.1),getTooltipContainer:u.func,destroyTooltipOnHide:u.looseBool.def(!1),align:u.object.def(function(){return{}}),arrowContent:u.any.def(null),tipId:u.string,builtinPlacements:u.object,overlayInnerStyle:u.style},methods:{getPopupElement:function(){var t=this.$props,n=t.prefixCls,r=t.tipId,a=t.overlayInnerStyle;return[g("div",{class:"".concat(n,"-arrow"),key:"arrow"},[We(this,"arrowContent")]),g(eOe,{key:"content",trigger:this.$refs.trigger,prefixCls:n,id:r,overlay:We(this,"overlay"),overlayInnerStyle:a},null)]},getPopupDomNode:function(){return this.$refs.trigger.getPopupDomNode()}},render:function(t){var n=this,r=Qe(this),a=r.overlayClassName,o=r.trigger,i=r.mouseEnterDelay,l=r.mouseLeaveDelay,s=r.overlayStyle,c=r.prefixCls,d=r.afterVisibleChange,f=r.transitionName,p=r.animation,v=r.placement,m=r.align,y=r.destroyTooltipOnHide,b=r.defaultVisible,C=r.getTooltipContainer,S=tOe(r,["overlayClassName","trigger","mouseEnterDelay","mouseLeaveDelay","overlayStyle","prefixCls","afterVisibleChange","transitionName","animation","placement","align","destroyTooltipOnHide","defaultVisible","getTooltipContainer"]),w=P({},S);vt(this,"visible")&&(w.popupVisible=this.$props.visible);var k=this.$attrs,$=P(P(P({popupClassName:a,prefixCls:c,action:o,builtinPlacements:jA,popupPlacement:v,popupAlign:m,getPopupContainer:C,afterPopupVisibleChange:d,popupTransitionName:f,popupAnimation:p,defaultPopupVisible:b,destroyPopupOnHide:y,mouseLeaveDelay:l,popupStyle:s,mouseEnterDelay:i},w),k),{onPopupVisibleChange:k.onVisibleChange||$3,onPopupAlign:k.onPopupAlign||$3,ref:"trigger",popup:this.getPopupElement()});return g(Ii,$,{default:function(){return[ht(n)[0]]}})}}),rOe={adjustX:1,adjustY:1},O3={adjustX:0,adjustY:0},aOe=[0,0];function P3(e){return typeof e=="boolean"?e?rOe:O3:P(P({},O3),e)}function oOe(e){var t=e.arrowWidth,n=t===void 0?5:t,r=e.horizontalArrowShift,a=r===void 0?16:r,o=e.verticalArrowShift,i=o===void 0?12:o,l=e.autoAdjustOverflow,s=l===void 0?!0:l,c={left:{points:["cr","cl"],offset:[-4,0]},right:{points:["cl","cr"],offset:[4,0]},top:{points:["bc","tc"],offset:[0,-4]},bottom:{points:["tc","bc"],offset:[0,4]},topLeft:{points:["bl","tc"],offset:[-(a+n),-4]},leftTop:{points:["tr","cl"],offset:[-4,-(i+n)]},topRight:{points:["br","tc"],offset:[a+n,-4]},rightTop:{points:["tl","cr"],offset:[4,-(i+n)]},bottomRight:{points:["tr","bc"],offset:[a+n,4]},rightBottom:{points:["bl","cr"],offset:[4,i+n]},bottomLeft:{points:["tl","bc"],offset:[-(a+n),4]},leftBottom:{points:["br","cl"],offset:[-4,i+n]}};return Object.keys(c).forEach(function(d){c[d]=e.arrowPointAtCenter?P(P({},c[d]),{overflow:P3(s),targetOffset:aOe}):P(P({},jA[d]),{overflow:P3(s)}),c[d].ignoreShake=!0}),c}var iOe=rt("success","processing","error","default","warning"),B2=rt("pink","red","yellow","orange","cyan","green","blue","purple","geekblue","magenta","volcano","gold","lime"),T3=u.oneOf(rt("hover","focus","click","contextmenu")),kg=function(){return{trigger:u.oneOfType([T3,u.arrayOf(T3)]).def("hover"),visible:u.looseBool,defaultVisible:u.looseBool,placement:u.oneOf(rt("top","left","right","bottom","topLeft","topRight","bottomLeft","bottomRight","leftTop","leftBottom","rightTop","rightBottom")).def("top"),color:u.string,transitionName:u.string.def("zoom-big-fast"),overlayStyle:u.object.def(function(){return{}}),overlayClassName:u.string,openClassName:u.string,prefixCls:u.string,mouseEnterDelay:u.number.def(.1),mouseLeaveDelay:u.number.def(.1),getPopupContainer:u.func,arrowPointAtCenter:u.looseBool.def(!1),autoAdjustOverflow:u.oneOfType([u.looseBool,u.object]).def(!0),destroyTooltipOnHide:u.looseBool.def(!1),align:u.object.def(function(){return{}}),builtinPlacements:u.object,children:u.array,onVisibleChange:u.func,"onUpdate:visible":u.func}},lOe=function(t,n){var r={},a=P({},t);return n.forEach(function(o){t&&o in t&&(r[o]=t[o],delete a[o])}),{picked:r,omitted:a}},sOe=kg(),x3=new RegExp("^(".concat(B2.join("|"),")(-inverse)?$")),uOe=P(P({},sOe),{title:u.VNodeChild}),cOe=G({name:"ATooltip",inheritAttrs:!1,props:uOe,emits:["update:visible","visibleChange"],setup:function(){return{configProvider:ve("configProvider",St)}},data:function(){return{sVisible:!!this.$props.visible||!!this.$props.defaultVisible}},watch:{visible:function(t){this.sVisible=t}},methods:{handleVisibleChange:function(t){vt(this,"visible")||(this.sVisible=this.isNoTitle()?!1:t),this.isNoTitle()||(this.$emit("update:visible",t),this.$emit("visibleChange",t))},getPopupDomNode:function(){return this.$refs.tooltip.getPopupDomNode()},getPlacements:function(){var t=this.$props,n=t.builtinPlacements,r=t.arrowPointAtCenter,a=t.autoAdjustOverflow;return n||oOe({arrowPointAtCenter:r,verticalArrowShift:8,autoAdjustOverflow:a})},getDisabledCompatibleChildren:function(t){if((kt(t.type)==="object"&&(t.type.__ANT_BUTTON===!0||t.type.__ANT_SWITCH===!0||t.type.__ANT_CHECKBOX===!0)||t.type==="button")&&t.props&&(t.props.disabled||t.props.disabled==="")){var n=lOe(qI(t),["position","left","right","top","bottom","float","display","zIndex"]),r=n.picked,a=n.omitted,o=P(P({display:"inline-block"},r),{cursor:"not-allowed",width:t.props&&t.props.block?"100%":null}),i=P(P({},a),{pointerEvents:"none"}),l=Ot(t,{style:i},!0);return g("span",{style:o},[l])}return t},isNoTitle:function(){var t=We(this,"title");return!t&&t!==0},getOverlay:function(){var t=We(this,"title");return t===0?t:t||""},onPopupAlign:function(t,n){var r=this.getPlacements(),a=Object.keys(r).filter(function(l){return r[l].points[0]===n.points[0]&&r[l].points[1]===n.points[1]})[0];if(!!a){var o=t.getBoundingClientRect(),i={top:"50%",left:"50%"};a.indexOf("top")>=0||a.indexOf("Bottom")>=0?i.top="".concat(o.height-n.offset[1],"px"):(a.indexOf("Top")>=0||a.indexOf("bottom")>=0)&&(i.top="".concat(-n.offset[1],"px")),a.indexOf("left")>=0||a.indexOf("Right")>=0?i.left="".concat(o.width-n.offset[0],"px"):(a.indexOf("right")>=0||a.indexOf("Left")>=0)&&(i.left="".concat(-n.offset[0],"px")),t.style.transformOrigin="".concat(i.left," ").concat(i.top)}}},render:function(){var t,n=this.$props,r=this.$data,a=this.$attrs,o=n.prefixCls,i=n.openClassName,l=n.getPopupContainer,s=n.color,c=n.overlayClassName,d=this.configProvider.getPopupContainer,f=this.configProvider.getPrefixCls,p=f("tooltip",o),v=this.children||La(ht(this));v=v.length===1?v[0]:v;var m=r.sVisible;if(!vt(this,"visible")&&this.isNoTitle()&&(m=!1),!v)return null;var y=this.getDisabledCompatibleChildren(zn(v)?v:g("span",null,[v])),b=Se((t={},V(t,i||"".concat(p,"-open"),m),V(t,y.props&&y.props.class,y.props&&y.props.class),t)),C=Se(c,V({},"".concat(p,"-").concat(s),s&&x3.test(s))),S,w;s&&!x3.test(s)&&(S={backgroundColor:s},w={backgroundColor:s});var k=P(P(P({},a),n),{prefixCls:p,getTooltipContainer:l||d,builtinPlacements:this.getPlacements(),overlay:this.getOverlay(),visible:m,ref:"tooltip",overlayClassName:C,overlayInnerStyle:S,arrowContent:g("span",{class:"".concat(p,"-arrow-content"),style:w},null),onVisibleChange:this.handleVisibleChange,onPopupAlign:this.onPopupAlign});return g(nOe,k,{default:function(){return[m?Ot(y,{class:b}):y]}})}}),Io=kn(cOe),dOe=kg(),fOe=G({name:"APopover",props:P(P({},dOe),{prefixCls:u.string,transitionName:u.string.def("zoom-big"),content:u.any,title:u.any}),setup:function(){return{configProvider:ve("configProvider",St)}},methods:{getPopupDomNode:function(){return this.$refs.tooltip.getPopupDomNode()}},render:function(){var t=this,n=this.title,r=this.prefixCls,a=this.$slots,o=this.configProvider.getPrefixCls,i=o("popover",r),l=Qe(this);delete l.title,delete l.content;var s=P(P({},l),{prefixCls:i,ref:"tooltip",title:g("div",null,[(n||a.title)&&g("div",{class:"".concat(i,"-title")},[We(this,"title")]),g("div",{class:"".concat(i,"-inner-content")},[We(this,"content")])])});return g(Io,s,{default:function(){return[ht(t)]}})}}),KA=kn(fOe),hOe={prefixCls:u.string,maxCount:u.number,maxStyle:{type:Object,default:function(){return{}}},maxPopoverPlacement:u.oneOf(rt("top","bottom")).def("top"),size:HA.size},pOe=G({name:"AAvatarGroup",inheritAttrs:!1,props:hOe,setup:function(t,n){var r=n.slots,a=n.attrs,o=Wt("avatar-group",t),i=o.prefixCls,l=o.direction;return J4e(t),function(){var s,c=t.maxPopoverPlacement,d=c===void 0?"top":c,f=t.maxCount,p=t.maxStyle,v=(s={},V(s,i.value,!0),V(s,"".concat(i.value,"-rtl"),l.value==="rtl"),V(s,"".concat(a.class),!!a.class),s),m=jn(r,t),y=Un(m).map(function(w,k){return Ot(w,{key:"avatar-key-".concat(k)})}),b=y.length;if(f&&f<b){var C=y.slice(0,f),S=y.slice(f,b);return C.push(g(KA,{key:"avatar-popover-key",content:S,trigger:"hover",placement:d,overlayClassName:"".concat(i.value,"-popover")},{default:function(){return[g(ss,{style:p},{default:function(){return["+".concat(b-f)]}})]}})),g("div",le(le({},a),{},{class:v,style:a.style}),[C])}return g("div",le(le({},a),{},{class:v,style:a.style}),[y])}}}),Kv=pOe;ss.Group=Kv;ss.install=function(e){return e.component(ss.name,ss),e.component(Kv.name,Kv),e};var vOe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M859.9 168H164.1c-4.5 0-8.1 3.6-8.1 8v60c0 4.4 3.6 8 8.1 8h695.8c4.5 0 8.1-3.6 8.1-8v-60c0-4.4-3.6-8-8.1-8zM518.3 355a8 8 0 00-12.6 0l-112 141.7a7.98 7.98 0 006.3 12.9h73.9V848c0 4.4 3.6 8 8 8h60c4.4 0 8-3.6 8-8V509.7H624c6.7 0 10.4-7.7 6.3-12.9L518.3 355z"}}]},name:"vertical-align-top",theme:"outlined"},mOe=vOe;function _3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){gOe(e,a,n[a])})}return e}function gOe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var V2=function(t,n){var r=_3({},t,n.attrs);return g(Et,_3({},r,{icon:mOe}),null)};V2.displayName="VerticalAlignTopOutlined";V2.inheritAttrs=!1;var yOe=V2,bOe={visibilityHeight:u.number.def(400),duration:u.number.def(450),target:Function,prefixCls:u.string,onClick:u.func},COe=G({name:"ABackTop",inheritAttrs:!1,props:bOe,emits:["click"],setup:function(t,n){var r=n.slots,a=n.attrs,o=n.emit,i=ve("configProvider",St),l=H(),s=bt({visible:!1,scrollEvent:null}),c=function(){return l.value&&l.value.ownerDocument?l.value.ownerDocument:window},d=function(b){var C=t.target,S=C===void 0?c:C,w=t.duration;Zw(0,{getContainer:S,duration:w}),o("click",b)},f=e1(function(y){var b=t.visibilityHeight,C=Xw(y.target,!0);s.visible=C>b}),p=function(){var b=t.target,C=b||c,S=C();s.scrollEvent=Kn(S,"scroll",function(w){f(w)}),f({target:S})},v=function(){s.scrollEvent&&s.scrollEvent.remove(),f.cancel()};ce(function(){return t.target},function(){v(),Ne(function(){p()})}),et(function(){Ne(function(){p()})}),O6(function(){Ne(function(){p()})}),P6(function(){v()}),Lt(function(){v()});var m=x(function(){return i.getPrefixCls("back-top",t.prefixCls)});return function(){var y,b,C=g("div",{class:"".concat(m.value,"-content")},[g("div",{class:"".concat(m.value,"-icon")},[g(yOe,null,null)])]),S=P(P({},a),{onClick:d,class:(y={},V(y,"".concat(m.value),!0),V(y,"".concat(a.class),a.class),V(y,"".concat(m.value,"-rtl"),i.direction==="rtl"),y)}),w=s.visible?g("div",le(le({},S),{},{ref:l}),[((b=r.default)===null||b===void 0?void 0:b.call(r))||C]):null,k=Lo("fade");return g(no,k,{default:function(){return[w]}})}}}),wOe=kn(COe);function E3(e){var t=e.prefixCls,n=e.value,r=e.current,a=e.offset,o=a===void 0?0:a,i;return o&&(i={position:"absolute",top:"".concat(o,"00%"),left:0}),g("p",{style:i,class:Se("".concat(t,"-only-unit"),{current:r})},[n])}function SOe(e,t,n){for(var r=e,a=0;(r+10)%10!==t;)r+=n,a+=n;return a}var kOe=G({name:"SingleNumber",props:{prefixCls:String,value:String,count:Number},setup:function(t){var n=x(function(){return Number(t.value)}),r=x(function(){return Math.abs(t.count)}),a=bt({prevValue:n.value,prevCount:r.value}),o=function(){a.prevValue=n.value,a.prevCount=r.value},i=H();return ce(n,function(){clearTimeout(i.value),i.value=setTimeout(function(){o()},1e3)},{flush:"post"}),Wr(function(){clearTimeout(i.value)}),function(){var l,s={},c=n.value;if(a.prevValue===c||Number.isNaN(c)||Number.isNaN(a.prevValue))l=[E3(P(P({},t),{current:!0}))],s={transition:"none"};else{l=[];for(var d=c+10,f=[],p=c;p<=d;p+=1)f.push(p);var v=f.findIndex(function(y){return y%10===a.prevValue});l=f.map(function(y,b){var C=y%10;return E3(P(P({},t),{value:C,offset:b-v,current:b===v}))});var m=a.prevCount<r.value?1:-1;s={transform:"translateY(".concat(-SOe(a.prevValue,c,m),"00%)")}}return g("span",{class:"".concat(t.prefixCls,"-only"),style:s,onTransitionend:function(){return o()}},[l])}}}),$Oe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},OOe={prefixCls:u.string,count:u.any,component:u.string,title:u.oneOfType([u.number,u.string,null]),show:Boolean},POe=G({name:"ScrollNumber",inheritAttrs:!1,props:OOe,setup:function(t,n){var r=n.attrs,a=n.slots,o=Wt("scroll-number",t),i=o.prefixCls;return function(){var l,s=P(P({},t),r);s.prefixCls;var c=s.count,d=s.title;s.show;var f=s.component,p=f===void 0?"sup":f,v=s.class,m=s.style,y=$Oe(s,["prefixCls","count","title","show","component","class","style"]),b=P(P({},y),{style:m,"data-show":t.show,class:Se(i.value,v),title:d}),C=c;if(c&&Number(c)%1===0){var S=String(c).split("");C=S.map(function(k,$){return g(kOe,{prefixCls:i.value,count:Number(c),value:k,key:S.length-$},null)})}m&&m.borderColor&&(b.style=P(P({},m),{boxShadow:"0 0 0 1px ".concat(m.borderColor," inset")}));var w=La((l=a.default)===null||l===void 0?void 0:l.call(a));return w&&w.length?Ot(w,{class:Se("".concat(i.value,"-custom-component"))},!1):g(p,b,{default:function(){return[C]}})}}});function od(e){return B2.indexOf(e)!==-1}var TOe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},xOe={prefix:u.string,color:{type:String},text:u.any,placement:u.oneOf(rt("start","end")).def("end")},Wv=G({name:"ABadgeRibbon",inheritAttrs:!1,props:xOe,slots:["text"],setup:function(t,n){var r=n.attrs,a=n.slots,o=Wt("ribbon",t),i=o.prefixCls,l=o.direction,s=x(function(){return od(t.color)}),c=x(function(){var d;return[i.value,"".concat(i.value,"-placement-").concat(t.placement),(d={},V(d,"".concat(i.value,"-rtl"),l.value==="rtl"),V(d,"".concat(i.value,"-color-").concat(t.color),s.value),d)]});return function(){var d,f,p=r.class,v=r.style,m=TOe(r,["class","style"]),y={},b={};return t.color&&!s.value&&(y.background=t.color,b.color=t.color),g("div",le({class:"".concat(i.value,"-wrapper")},m),[(d=a.default)===null||d===void 0?void 0:d.call(a),g("div",{class:[c.value,p],style:P(P({},y),v)},[g("span",{class:"".concat(i.value,"-text")},[t.text||((f=a.text)===null||f===void 0?void 0:f.call(a))]),g("div",{class:"".concat(i.value,"-corner"),style:b},null)])])}}}),_Oe=function(t){return!isNaN(parseFloat(t))&&isFinite(t)},WA=_Oe,EOe={count:u.any,showZero:u.looseBool,overflowCount:u.number.def(99),dot:u.looseBool,prefixCls:u.string,scrollNumberPrefixCls:u.string,status:u.oneOf(rt("success","processing","default","error","warning")),size:u.oneOf(rt("default","small")).def("default"),color:u.string,text:u.VNodeChild,offset:u.arrayOf(u.oneOfType([String,Number])),numberStyle:u.style,title:u.string},Kp=G({name:"ABadge",Ribbon:Wv,inheritAttrs:!1,props:EOe,slots:["text","count"],setup:function(t,n){var r=n.slots,a=n.attrs,o=Wt("badge",t),i=o.prefixCls,l=o.direction,s=x(function(){return t.count>t.overflowCount?"".concat(t.overflowCount,"+"):t.count}),c=x(function(){return t.status!==null&&t.status!==void 0||t.color!==null&&t.color!==void 0}),d=x(function(){return s.value==="0"||s.value===0}),f=x(function(){return t.dot&&!d.value||c.value}),p=x(function(){return f.value?"":s.value}),v=x(function(){var k=p.value===null||p.value===void 0||p.value==="";return(k||d.value&&!t.showZero)&&!f.value}),m=H(t.count),y=H(p.value),b=H(f.value);ce([function(){return t.count},p,f],function(){v.value||(m.value=t.count,y.value=p.value,b.value=f.value)},{immediate:!0});var C=x(function(){var k;return k={},V(k,"".concat(i.value,"-status-dot"),c.value),V(k,"".concat(i.value,"-status-").concat(t.status),!!t.status),V(k,"".concat(i.value,"-status-").concat(t.color),od(t.color)),k}),S=x(function(){return t.color&&!od(t.color)?{background:t.color}:{}}),w=x(function(){var k;return k={},V(k,"".concat(i.value,"-dot"),b.value),V(k,"".concat(i.value,"-count"),!b.value),V(k,"".concat(i.value,"-count-sm"),t.size==="small"),V(k,"".concat(i.value,"-multiple-words"),!b.value&&y.value&&y.value.toString().length>1),V(k,"".concat(i.value,"-status-").concat(t.status),!!t.status),V(k,"".concat(i.value,"-status-").concat(t.color),od(t.color)),k});return function(){var k,$,O,T=t.offset,_=t.title,I=t.color,L=a.style,j=jn(r,t,"text"),F=i.value,N=m.value,D=Un(($=r.default)===null||$===void 0?void 0:$.call(r));D=D.length?D:null;var z=!!(!v.value||r.count),B=function(){if(!T)return P({},L);var ne={marginTop:WA(T[1])?"".concat(T[1],"px"):T[1]};return l.value==="rtl"?ne.left="".concat(parseInt(T[0],10),"px"):ne.right="".concat(-parseInt(T[0],10),"px"),P(P({},ne),L)}(),M=_!=null?_:typeof N=="string"||typeof N=="number"?N:void 0,E=z||!j?null:g("span",{class:"".concat(F,"-status-text")},[j]),K=kt(N)==="object"||N===void 0&&r.count?Ot(N!=null?N:(O=r.count)===null||O===void 0?void 0:O.call(r),{style:B},!1):null,W=Se(F,(k={},V(k,"".concat(F,"-status"),c.value),V(k,"".concat(F,"-not-a-wrapper"),!D),V(k,"".concat(F,"-rtl"),l.value==="rtl"),k),a.class);if(!D&&c.value){var Y=B.color;return g("span",le(le({},a),{},{class:W,style:B}),[g("span",{class:C.value,style:S.value},null),g("span",{style:{color:Y},class:"".concat(F,"-status-text")},[j])])}var q=Lo(D?"".concat(F,"-zoom"):"",{appear:!1}),J=P(P({},B),t.numberStyle);return I&&!od(I)&&(J=J||{},J.background=I),g("span",le(le({},a),{},{class:W}),[D,g(no,q,{default:function(){return[at(g(POe,{prefixCls:t.scrollNumberPrefixCls,show:z,class:w.value,count:y.value,title:M,style:J,key:"scrollNumber"},{default:function(){return[K]}}),[[_t,z]])]}}),E])}}});Kp.install=function(e){return e.component(Kp.name,Kp),e.component(Wv.name,Wv),e};var Ws={adjustX:1,adjustY:1},Us=[0,0],MOe={topLeft:{points:["bl","tl"],overflow:Ws,offset:[0,-4],targetOffset:Us},topCenter:{points:["bc","tc"],overflow:Ws,offset:[0,-4],targetOffset:Us},topRight:{points:["br","tr"],overflow:Ws,offset:[0,-4],targetOffset:Us},bottomLeft:{points:["tl","bl"],overflow:Ws,offset:[0,4],targetOffset:Us},bottomCenter:{points:["tc","bc"],overflow:Ws,offset:[0,4],targetOffset:Us},bottomRight:{points:["tr","br"],overflow:Ws,offset:[0,4],targetOffset:Us}},IOe=MOe,NOe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},AOe=G({mixins:[nt],props:{minOverlayWidthMatchTrigger:u.looseBool,prefixCls:u.string.def("rc-dropdown"),transitionName:u.string,overlayClassName:u.string.def(""),openClassName:u.string,animation:u.any,align:u.object,overlayStyle:u.object.def(function(){return{}}),placement:u.string.def("bottomLeft"),overlay:u.any,trigger:u.oneOfType([u.string,u.arrayOf(u.string)]).def("hover"),alignPoint:u.looseBool,showAction:u.array,hideAction:u.array,getPopupContainer:u.func,visible:u.looseBool,defaultVisible:u.looseBool.def(!1),mouseEnterDelay:u.number.def(.15),mouseLeaveDelay:u.number.def(.1)},data:function(){var t=this.defaultVisible;return vt(this,"visible")&&(t=this.visible),{sVisible:t}},watch:{visible:function(t){t!==void 0&&this.setState({sVisible:t})}},methods:{onClick:function(t){var n=this.getOverlayElement().props;vt(this,"visible")||this.setState({sVisible:!1}),this.__emit("overlayClick",t),n.onClick&&n.onClick(t)},onVisibleChange:function(t){vt(this,"visible")||this.setState({sVisible:t}),this.__emit("update:visible",t),this.__emit("visibleChange",t)},getMinOverlayWidthMatchTrigger:function(){var t=Qe(this),n=t.minOverlayWidthMatchTrigger,r=t.alignPoint;return"minOverlayWidthMatchTrigger"in t?n:!r},getOverlayElement:function(){var t=We(this,"overlay");return Array.isArray(t)?t[0]:t},getMenuElement:function(){var t=this,n=this.onClick,r=this.prefixCls,a=this.getOverlayElement(),o={prefixCls:"".concat(r,"-menu"),getPopupContainer:function(){return t.getPopupDomNode()},onClick:n};return a&&a.type===Fo&&delete o.prefixCls,Ot(a,o)},getMenuElementOrLambda:function(){var t=this.overlay||this.$slots.overlay;return typeof t=="function"?this.getMenuElement:this.getMenuElement()},getPopupDomNode:function(){return this.triggerRef.getPopupDomNode()},getOpenClassName:function(){var t=this.$props,n=t.openClassName,r=t.prefixCls;return n!==void 0?n:"".concat(r,"-open")},afterVisibleChange:function(t){if(t&&this.getMinOverlayWidthMatchTrigger()){var n=this.getPopupDomNode(),r=Sn(this);r&&n&&r.offsetWidth>n.offsetWidth&&(n.style.minWidth="".concat(r.offsetWidth,"px"),this.triggerRef&&this.triggerRef._component&&this.triggerRef._component.alignInstance&&this.triggerRef._component.alignInstance.forceAlign())}},renderChildren:function(){var t=ht(this),n=this.sVisible;return n&&t?Ot(t[0],{class:this.getOpenClassName()},!1):t},saveTrigger:function(t){this.triggerRef=t}},render:function(){var t=this,n=this.$props,r=n.prefixCls,a=n.transitionName,o=n.animation,i=n.align,l=n.placement,s=n.getPopupContainer,c=n.showAction,d=n.hideAction,f=n.overlayClassName,p=n.overlayStyle,v=n.trigger,m=NOe(n,["prefixCls","transitionName","animation","align","placement","getPopupContainer","showAction","hideAction","overlayClassName","overlayStyle","trigger"]),y=d;!y&&v.indexOf("contextmenu")!==-1&&(y=["click"]);var b=P(P({},m),{prefixCls:r,popupClassName:f,popupStyle:p,builtinPlacements:IOe,action:v,showAction:c,hideAction:y||[],popupPlacement:l,popupAlign:i,popupTransitionName:a,popupAnimation:o,popupVisible:this.sVisible,afterPopupVisibleChange:this.afterVisibleChange,getPopupContainer:s,onPopupVisibleChange:this.onVisibleChange,popup:this.getMenuElementOrLambda(),ref:this.saveTrigger});return g(Ii,b,{default:function(){return[t.renderChildren()]}})}}),UA=function(){return{trigger:{type:[Array,String],default:"hover"},overlay:u.any,visible:u.looseBool,disabled:u.looseBool,align:u.object,getPopupContainer:u.func,prefixCls:u.string,transitionName:u.string,placement:u.oneOf(rt("topLeft","topCenter","topRight","bottomLeft","bottomCenter","bottomRight")),overlayClassName:u.string,overlayStyle:u.style,forceRender:u.looseBool,mouseEnterDelay:u.number,mouseLeaveDelay:u.number,openClassName:u.string,minOverlayWidthMatchTrigger:u.looseBool}},DOe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M176 511a56 56 0 10112 0 56 56 0 10-112 0zm280 0a56 56 0 10112 0 56 56 0 10-112 0zm280 0a56 56 0 10112 0 56 56 0 10-112 0z"}}]},name:"ellipsis",theme:"outlined"},ROe=DOe;function M3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){LOe(e,a,n[a])})}return e}function LOe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var z2=function(t,n){var r=M3({},t,n.attrs);return g(Et,M3({},r,{icon:ROe}),null)};z2.displayName="EllipsisOutlined";z2.inheritAttrs=!1;var YA=z2,FOe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},BOe=Hv(),I3=UA(),VOe=Cr.Group,zOe=P(P({},I3),{type:u.oneOf(rt("primary","ghost","dashed","danger","default")).def("default"),size:u.oneOf(rt("small","large","default")).def("default"),htmlType:BOe.htmlType,href:u.string,disabled:u.looseBool,prefixCls:u.string,placement:I3.placement.def("bottomRight"),icon:u.any,title:u.string,onClick:u.func,onVisibleChange:u.func,"onUpdate:visible":u.func}),cf=G({name:"ADropdownButton",inheritAttrs:!1,props:zOe,emits:["click","visibleChange","update:visible"],setup:function(){return{configProvider:ve("configProvider",St),popupRef:null}},created:function(){ot("savePopupRef",this.savePopupRef)},methods:{savePopupRef:function(t){this.popupRef=t},handleClick:function(t){this.$emit("click",t)},handleVisibleChange:function(t){this.$emit("update:visible",t),this.$emit("visibleChange",t)}},render:function(){var t=this,n=P(P({},this.$props),this.$attrs),r=n.type,a=n.disabled;n.onClick;var o=n.htmlType,i=n.class,l=n.prefixCls;n.overlay;var s=n.trigger,c=n.align,d=n.visible;n.onVisibleChange;var f=n.placement,p=n.getPopupContainer,v=n.href,m=n.title,y=FOe(n,["type","disabled","onClick","htmlType","class","prefixCls","overlay","trigger","align","visible","onVisibleChange","placement","getPopupContainer","href","title"]),b=We(this,"icon")||g(YA,null,null),C=this.configProvider.getPopupContainer,S=this.configProvider.getPrefixCls,w=S("dropdown-button",l),k={align:c,disabled:a,trigger:a?[]:s,placement:f,getPopupContainer:p||C,onVisibleChange:this.handleVisibleChange};vt(this,"visible")&&(k.visible=d);var $=P(P({},y),{class:Se(w,i)});return g(VOe,$,{default:function(){return[g(Cr,{type:r,disabled:a,onClick:t.handleClick,htmlType:o,href:v,title:m},{default:function(){return[ht(t)]}}),g(hi,le(le({},k),{},{overlay:We(t,"overlay")}),{default:function(){return[g(Cr,{type:r},{default:function(){return[b]}})]}})]}})}}),HOe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M765.7 486.8L314.9 134.7A7.97 7.97 0 00302 141v77.3c0 4.9 2.3 9.6 6.1 12.6l360 281.1-360 281.1c-3.9 3-6.1 7.7-6.1 12.6V883c0 6.7 7.7 10.4 12.9 6.3l450.8-352.1a31.96 31.96 0 000-50.4z"}}]},name:"right",theme:"outlined"},jOe=HOe;function N3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){KOe(e,a,n[a])})}return e}function KOe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var H2=function(t,n){var r=N3({},t,n.attrs);return g(Et,N3({},r,{icon:jOe}),null)};H2.displayName="RightOutlined";H2.inheritAttrs=!1;var wi=H2,A3=UA(),qA=G({name:"ADropdown",inheritAttrs:!1,props:P(P({},A3),{prefixCls:u.string,mouseEnterDelay:u.number.def(.15),mouseLeaveDelay:u.number.def(.1),placement:A3.placement.def("bottomLeft"),onVisibleChange:u.func,"onUpdate:visible":u.func}),emits:["visibleChange","update:visible"],setup:function(){return{configProvider:ve("configProvider",St),popupRef:null}},created:function(){ot("savePopupRef",this.savePopupRef)},methods:{savePopupRef:function(t){this.popupRef=t},getTransitionName:function(){var t=this.$props,n=t.placement,r=n===void 0?"":n,a=t.transitionName;return a!==void 0?a:r.indexOf("top")>=0?"slide-down":"slide-up"},renderOverlay:function(t){var n=We(this,"overlay"),r=Array.isArray(n)?n[0]:n,a=r&&rr(r),o=a||{},i=o.selectable,l=i===void 0?!1:i,s=o.focusable,c=s===void 0?!0:s,d=function(){return g("span",{class:"".concat(t,"-menu-submenu-arrow")},[g(wi,{class:"".concat(t,"-menu-submenu-arrow-icon")},null)])},f=zn(r)?hr(r,{mode:"vertical",selectable:l,focusable:c,expandIcon:d}):n;return f},handleVisibleChange:function(t){this.$emit("update:visible",t),this.$emit("visibleChange",t)}},render:function(){var t,n=Qe(this),r=n.prefixCls,a=n.trigger,o=n.disabled,i=n.getPopupContainer,l=this.configProvider.getPopupContainer,s=this.configProvider.getPrefixCls,c=s("dropdown",r),d=ht(this)[0],f=Ot(d,{class:Se((t=d==null?void 0:d.props)===null||t===void 0?void 0:t.class,"".concat(c,"-trigger")),disabled:o}),p=o?[]:typeof a=="string"?[a]:a,v;p&&p.indexOf("contextmenu")!==-1&&(v=!0);var m=P(P(P({alignPoint:v},n),this.$attrs),{prefixCls:c,getPopupContainer:i||l,transitionName:this.getTransitionName(),trigger:p,overlay:this.renderOverlay(c),onVisibleChange:this.handleVisibleChange});return g(AOe,m,{default:function(){return[f]}})}});qA.Button=cf;var hi=qA,WOe={prefixCls:u.string,href:u.string,separator:u.any,overlay:u.any},df=G({name:"ABreadcrumbItem",__ANT_BREADCRUMB_ITEM:!0,props:WOe,slots:["separator","overlay"],setup:function(t,n){var r=n.slots,a=Wt("breadcrumb",t),o=a.prefixCls,i=function(s,c){var d=jn(r,t,"overlay");return d?g(hi,{overlay:d,placement:"bottomCenter"},{default:function(){return[g("span",{class:"".concat(c,"-overlay-link")},[s,g(Rs,null,null)])]}}):s};return function(){var l,s=(l=jn(r,t,"separator"))!==null&&l!==void 0?l:"/",c=jn(r,t),d;return t.href!==void 0?d=g("a",{class:"".concat(o.value,"-link")},[c]):d=g("span",{class:"".concat(o.value,"-link")},[c]),d=i(d,o.value),c?g("span",null,[d,s&&g("span",{class:"".concat(o.value,"-separator")},[s])]):null}}});function UOe(e,t,n,r){var a=n?n.call(r,e,t):void 0;if(a!==void 0)return!!a;if(e===t)return!0;if(kt(e)!=="object"||!e||kt(t)!=="object"||!t)return!1;var o=Object.keys(e),i=Object.keys(t);if(o.length!==i.length)return!1;for(var l=Object.prototype.hasOwnProperty.bind(t),s=0;s<o.length;s++){var c=o[s];if(!l(c))return!1;var d=e[c],f=t[c];if(a=n?n.call(r,d,f,c):void 0,a===!1||a===void 0&&d!==f)return!1}return!0}function Mr(e,t,n,r){return UOe(Gt(e),Gt(t),n,r)}var GA=Symbol("menuContextKey"),XA=function(t){ot(GA,t)},Ni=function(){return ve(GA)},ZA=Symbol("menuFirstLevelContextKey"),JA=function(t){ot(ZA,t)},YOe=function(){return ve(ZA,!0)},Uv=G({name:"MenuContextProvider",inheritAttrs:!1,props:{mode:{type:String,default:void 0},overflowDisabled:{type:Boolean,default:void 0},isRootMenu:{type:Boolean,default:void 0}},setup:function(t,n){var r=n.slots,a=Ni(),o=P({},a);return t.mode!==void 0&&(o.mode=yn(t,"mode")),t.isRootMenu!==void 0&&(o.isRootMenu=yn(t,"isRootMenu")),t.overflowDisabled!==void 0&&(o.overflowDisabled=yn(t,"overflowDisabled")),XA(o),function(){var i;return(i=r.default)===null||i===void 0?void 0:i.call(r)}}}),qOe=XA,QA=Symbol("siderCollapsed"),eD=Symbol("siderHookProvider"),tD=Symbol("KeyPathContext"),j2=function(){return ve(tD,{parentEventKeys:x(function(){return[]}),parentKeys:x(function(){return[]}),parentInfo:{}})},GOe=function(t,n,r){var a=j2(),o=a.parentEventKeys,i=a.parentKeys,l=x(function(){return[].concat(Je(o.value),[t])}),s=x(function(){return[].concat(Je(i.value),[n])});return ot(tD,{parentEventKeys:l,parentKeys:s,parentInfo:r}),s},XOe=GOe;function nD(e){var t=Ni(),n=t.mode,r=t.rtl,a=t.inlineIndent;return x(function(){return n.value!=="inline"?null:r.value?{paddingRight:"".concat(e.value*a.value,"px")}:{paddingLeft:"".concat(e.value*a.value,"px")}})}var ZOe=0,JOe={role:String,disabled:Boolean,danger:Boolean,title:{type:[String,Boolean],default:void 0},icon:u.VNodeChild},fl=G({name:"AMenuItem",inheritAttrs:!1,props:JOe,emits:["mouseenter","mouseleave","click","keydown","focus"],slots:["icon","title"],setup:function(t,n){var r=n.slots,a=n.emit,o=n.attrs,i=$t(),l=kt(i.vnode.key)==="symbol"?String(i.vnode.key):i.vnode.key;uf(kt(i.vnode.key)!=="symbol","MenuItem",'MenuItem `:key="'.concat(String(l),'"` not support Symbol type'));var s="menu_item_".concat(++ZOe,"_$$_").concat(l),c=j2(),d=c.parentEventKeys,f=c.parentKeys,p=Ni(),v=p.prefixCls,m=p.activeKeys,y=p.disabled,b=p.changeActiveKeys,C=p.rtl,S=p.inlineCollapsed,w=p.siderCollapsed,k=p.onItemClick,$=p.selectedKeys,O=p.registerMenuInfo,T=p.unRegisterMenuInfo,_=YOe(),I=H(!1),L=x(function(){return[].concat(Je(f.value),[l])}),j={eventKey:s,key:l,parentEventKeys:d,parentKeys:f,isLeaf:!0};O(s,j),Lt(function(){T(s)}),ce(m,function(){I.value=!!m.value.find(function(J){return J===l})},{immediate:!0});var F=x(function(){return y.value||t.disabled}),N=x(function(){return $.value.includes(l)}),D=x(function(){var J,ne="".concat(v.value,"-item");return J={},V(J,"".concat(ne),!0),V(J,"".concat(ne,"-danger"),t.danger),V(J,"".concat(ne,"-active"),I.value),V(J,"".concat(ne,"-selected"),N.value),V(J,"".concat(ne,"-disabled"),F.value),J}),z=function(ne){return{key:l,eventKey:s,keyPath:L.value,eventKeyPath:[].concat(Je(d.value),[s]),domEvent:ne,item:P(P({},t),o)}},B=function(ne){if(!F.value){var oe=z(ne);a("click",ne),k(oe)}},M=function(ne){F.value||(b(L.value),a("mouseenter",ne))},E=function(ne){F.value||(b([]),a("mouseleave",ne))},K=function(ne){if(a("keydown",ne),ne.which===ze.ENTER){var oe=z(ne);a("click",ne),k(oe)}},W=function(ne){b(L.value),a("focus",ne)},Y=function(ne,oe){var Q=g("span",{class:"".concat(v.value,"-title-content")},[oe]);return(!ne||zn(oe)&&oe.type==="span")&&oe&&S.value&&_&&typeof oe=="string"?g("div",{class:"".concat(v.value,"-inline-collapsed-noicon")},[oe.charAt(0)]):Q},q=nD(x(function(){return L.value.length}));return function(){var J,ne,oe,Q,ae=(ne=t.title)!==null&&ne!==void 0?ne:(oe=r.title)===null||oe===void 0?void 0:oe.call(r),de=Un((Q=r.default)===null||Q===void 0?void 0:Q.call(r)),be=de.length,Ee=ae;typeof ae=="undefined"?Ee=_&&be?de:"":ae===!1&&(Ee="");var Pe={title:Ee};!w.value&&!S.value&&(Pe.title=null,Pe.visible=!1);var Be={};t.role==="option"&&(Be["aria-selected"]=N.value);var te=jn(r,t,"icon");return g(Io,le(le({},Pe),{},{placement:C.value?"left":"right",overlayClassName:"".concat(v.value,"-inline-collapsed-tooltip")}),{default:function(){return[g(Zl.Item,le(le(le({component:"li"},o),{},{style:P(P({},o.style||{}),q.value),class:[D.value,(J={},V(J,"".concat(o.class),!!o.class),V(J,"".concat(v.value,"-item-only-child"),(te?be+1:be)===1),J)],role:t.role||"menuitem",tabindex:t.disabled?null:-1,"data-menu-id":l,"aria-disabled":t.disabled},Be),{},{onMouseenter:M,onMouseleave:E,onClick:B,onKeydown:K,onFocus:W,title:typeof ae=="string"?ae:void 0}),{default:function(){return[Ot(te,{class:"".concat(v.value,"-item-icon")}),Y(te,de)]}})]}})}}}),Zi={adjustX:1,adjustY:1},QOe={topLeft:{points:["bl","tl"],overflow:Zi,offset:[0,-7]},bottomLeft:{points:["tl","bl"],overflow:Zi,offset:[0,7]},leftTop:{points:["tr","tl"],overflow:Zi,offset:[-4,0]},rightTop:{points:["tl","tr"],overflow:Zi,offset:[4,0]}},ePe={topLeft:{points:["bl","tl"],overflow:Zi,offset:[0,-7]},bottomLeft:{points:["tl","bl"],overflow:Zi,offset:[0,7]},rightTop:{points:["tr","tl"],overflow:Zi,offset:[-4,0]},leftTop:{points:["tl","tr"],overflow:Zi,offset:[4,0]}},tPe={horizontal:"bottomLeft",vertical:"rightTop","vertical-left":"rightTop","vertical-right":"leftTop"},D3=G({name:"PopupTrigger",inheritAttrs:!1,props:{prefixCls:String,mode:String,visible:Boolean,popupClassName:String,popupOffset:Array,disabled:Boolean,onVisibleChange:Function},slots:["popup"],emits:["visibleChange"],setup:function(t,n){var r=n.slots,a=n.emit,o=H(!1),i=Ni(),l=i.getPopupContainer,s=i.rtl,c=i.subMenuOpenDelay,d=i.subMenuCloseDelay,f=i.builtinPlacements,p=i.triggerSubMenuAction,v=i.isRootMenu,m=x(function(){return s.value?P(P({},ePe),f.value):P(P({},QOe),f.value)}),y=x(function(){return tPe[t.mode]}),b=H();ce(function(){return t.visible},function(S){en.cancel(b.value),b.value=en(function(){o.value=S})},{immediate:!0}),Lt(function(){en.cancel(b.value)});var C=function(w){a("visibleChange",w)};return function(){var S=t.prefixCls,w=t.popupClassName,k=t.mode,$=t.popupOffset,O=t.disabled;return g(Ii,{prefixCls:S,popupClassName:Se("".concat(S,"-popup"),V({},"".concat(S,"-rtl"),s.value),w),stretch:k==="horizontal"?"minWidth":null,getPopupContainer:v.value?l.value:function(T){return T.parentNode},builtinPlacements:m.value,popupPlacement:y.value,popupVisible:o.value,popupAlign:$&&{offset:$},action:O?[]:[p.value],mouseEnterDelay:c.value,mouseLeaveDelay:d.value,onPopupVisibleChange:C,forceRender:!0},{popup:function(){var _;return(_=r.popup)===null||_===void 0?void 0:_.call(r,{visible:o.value})},default:r.default})}}}),rD=function(t,n){var r=n.slots,a=n.attrs,o,i=Ni(),l=i.prefixCls,s=i.mode;return g("ul",le(le({},a),{},{class:Se(l.value,"".concat(l.value,"-sub"),"".concat(l.value,"-").concat(s.value==="inline"?"inline":"vertical")),"data-menu-list":!0}),[(o=r.default)===null||o===void 0?void 0:o.call(r)])};rD.displayName="SubMenuList";var aD=rD,nPe=G({name:"InlineSubMenuList",inheritAttrs:!1,props:{id:String,open:Boolean,keyPath:Array},setup:function(t,n){var r=n.slots,a=x(function(){return"inline"}),o=Ni(),i=o.motion,l=o.mode,s=o.defaultMotions,c=x(function(){return l.value===a.value}),d=H(!c.value),f=x(function(){return c.value?t.open:!1});ce(l,function(){c.value&&(d.value=!1)},{flush:"post"});var p=H({}),v=H(""),m=x(function(){var y,b,C=i.value||((y=s.value)===null||y===void 0?void 0:y[a.value])||((b=s.value)===null||b===void 0?void 0:b.other),S=typeof C=="function"?C(p,v):C;return P(P({},S),{appear:t.keyPath.length<=1})});return function(){var y;return d.value?null:g(Uv,{mode:a.value},{default:function(){return[g(Cg,m.value,{default:function(){return[at(g(aD,{id:t.id,style:p.value,class:v.value},{default:function(){return[(y=r.default)===null||y===void 0?void 0:y.call(r)]}}),[[_t,f.value]])]}})]}})}}}),R3=0,rPe={icon:u.VNodeChild,title:u.VNodeChild,disabled:Boolean,level:Number,popupClassName:String,popupOffset:Array,internalPopupClose:Boolean,eventKey:String,expandIcon:Function},sc=G({name:"ASubMenu",inheritAttrs:!1,props:rPe,slots:["icon","title","expandIcon"],emits:["titleClick","mouseenter","mouseleave"],setup:function(t,n){var r=n.slots,a=n.attrs,o=n.emit,i,l;JA(!1);var s=$t(),c=kt(s.vnode.key)==="symbol"?String(s.vnode.key):s.vnode.key;uf(kt(s.vnode.key)!=="symbol","SubMenu",'SubMenu `:key="'.concat(String(c),'"` not support Symbol type'));var d=tf(c)?c:"sub_menu_".concat(++R3,"_$$_not_set_key"),f=(i=t.eventKey)!==null&&i!==void 0?i:tf(c)?"sub_menu_".concat(++R3,"_$$_").concat(c):d,p=j2(),v=p.parentEventKeys,m=p.parentInfo,y=p.parentKeys,b=x(function(){return[].concat(Je(y.value),[d])}),C=x(function(){return[].concat(Je(v.value),[f])}),S=H([]),w={eventKey:f,key:d,parentEventKeys:v,childrenEventKeys:S,parentKeys:y};(l=m.childrenEventKeys)===null||l===void 0||l.value.push(f),Lt(function(){var ee;m.childrenEventKeys&&(m.childrenEventKeys.value=(ee=m.childrenEventKeys)===null||ee===void 0?void 0:ee.value.filter(function(me){return me!=f}))}),XOe(f,d,w);var k=Ni(),$=k.prefixCls,O=k.activeKeys,T=k.disabled,_=k.changeActiveKeys,I=k.mode,L=k.inlineCollapsed,j=k.antdMenuTheme,F=k.openKeys,N=k.overflowDisabled,D=k.onOpenChange,z=k.registerMenuInfo,B=k.unRegisterMenuInfo,M=k.selectedSubMenuEventKeys,E=k.motion,K=k.defaultMotions,W=k.expandIcon;z(f,w),Lt(function(){B(f)});var Y=x(function(){return"".concat($.value,"-submenu")}),q=x(function(){return T.value||t.disabled}),J=H(),ne=H(),oe=x(function(){return F.value.includes(d)}),Q=x(function(){return!N.value&&oe.value}),ae=x(function(){return M.value.includes(f)}),de=H(!1);ce(O,function(){de.value=!!O.value.find(function(ee){return ee===d})},{immediate:!0});var be=function(me){q.value||(o("titleClick",me,d),I.value==="inline"&&D(f,!oe.value))},Ee=function(me){q.value||(_(b.value),o("mouseenter",me))},Pe=function(me){q.value||(_([]),o("mouseleave",me))},Be=nD(x(function(){return C.value.length})),te=function(me){I.value!=="inline"&&D(f,me)},ie=function(){_(b.value)},ge=f&&"".concat(f,"-popup"),ke=x(function(){return Se($.value,"".concat($.value,"-").concat(j.value),t.popupClassName)}),xe=function(me,He){if(!He)return L.value&&!v.value.length&&me&&typeof me=="string"?g("div",{class:"".concat($.value,"-inline-collapsed-noicon")},[me.charAt(0)]):g("span",{class:"".concat($.value,"-title-content")},[me]);var lt=zn(me)&&me.type==="span";return g(Fe,null,[Ot(He,{class:"".concat($.value,"-item-icon")},!1),lt?me:g("span",{class:"".concat($.value,"-title-content")},[me])])},Ie=x(function(){return I.value!=="inline"&&C.value.length>1?"vertical":I.value}),ye=x(function(){return I.value==="horizontal"?"vertical":I.value}),pe=H({}),ue=H(""),Ce=x(function(){var ee,me,He=E.value||((ee=K.value)===null||ee===void 0?void 0:ee[I.value])||((me=K.value)===null||me===void 0?void 0:me.other),lt=typeof He=="function"?He(pe,ue):He;return lt?Lo(lt.name):void 0}),je=x(function(){return Ie.value==="horizontal"?"vertical":Ie.value});return function(){var ee,me,He=jn(r,t,"icon"),lt=xe(jn(r,t,"title"),He),Ye=Y.value,he=t.expandIcon||r.expandIcon||W,_e=g("div",{style:Be.value,class:"".concat(Ye,"-title"),tabindex:q.value?null:-1,ref:J,title:typeof lt=="string"?lt:null,"data-menu-id":d,"aria-expanded":Q.value,"aria-haspopup":!0,"aria-controls":ge,"aria-disabled":q.value,onClick:be,onFocus:ie},[lt,I.value!=="horizontal"&&he?he(P(P({},t),{isOpen:Q.value})):g("i",{class:"".concat(Ye,"-arrow")},null)]);if(!N.value&&I.value!=="inline"){var $e=Ie.value,Ve=function(){return _e}();_e=g(D3,{mode:$e,prefixCls:Ye,visible:!t.internalPopupClose&&Q.value,popupClassName:ke.value,popupOffset:t.popupOffset,disabled:q.value,onVisibleChange:te},{default:function(){return[Ve]},popup:function(Ut){var Ft=Ut.visible,Jt;return g(Uv,{mode:je.value,isRootMenu:!1},{default:function(){return[g(Cg,Ce.value,{default:function(){return[at(g(aD,{id:ge,ref:ne},{default:function(){return[(Jt=r.default)===null||Jt===void 0?void 0:Jt.call(r)]}}),[[_t,Ft]])]}})]}})}})}else{var st=function(){return _e}();_e=g(D3,null,{default:function(){return[st]}})}return g(Uv,{mode:ye.value},{default:function(){return[g(Zl.Item,le(le({component:"li"},a),{},{role:"none",class:Se(Ye,"".concat(Ye,"-").concat(I.value),a.class,(ee={},V(ee,"".concat(Ye,"-open"),Q.value),V(ee,"".concat(Ye,"-active"),de.value),V(ee,"".concat(Ye,"-selected"),ae.value),V(ee,"".concat(Ye,"-disabled"),q.value),ee)),onMouseenter:Ee,onMouseleave:Pe,"data-submenu-id":d}),{default:function(){return[_e,!N.value&&g(nPe,{id:ge,open:Q.value,keyPath:b.value},{default:function(){return[(me=r.default)===null||me===void 0?void 0:me.call(r)]}})]}})]}})}}}),aPe={prefixCls:String,disabled:Boolean,inlineCollapsed:Boolean,disabledOverflow:Boolean,openKeys:Array,selectedKeys:Array,activeKey:String,selectable:{type:Boolean,default:!0},multiple:{type:Boolean,default:!1},motion:Object,theme:{type:String,default:"light"},mode:{type:String,default:"vertical"},inlineIndent:{type:Number,default:24},subMenuOpenDelay:{type:Number,default:.1},subMenuCloseDelay:{type:Number,default:.1},builtinPlacements:{type:Object},triggerSubMenuAction:{type:String,default:"hover"},getPopupContainer:Function,expandIcon:Function},L3=[],jr=G({name:"AMenu",props:aPe,emits:["update:openKeys","openChange","select","deselect","update:selectedKeys","click","update:activeKey"],slots:["expandIcon","overflowedIndicator"],setup:function(t,n){var r=n.slots,a=n.emit,o=Wt("menu",t),i=o.prefixCls,l=o.direction,s=H({}),c=ve(QA,H(void 0)),d=x(function(){return c.value!==void 0?c.value:t.inlineCollapsed}),f=H(!1);et(function(){f.value=!0}),Wn(function(){uf(!(t.inlineCollapsed===!0&&t.mode!=="inline"),"Menu","`inlineCollapsed` should only be used when `mode` is inline."),uf(!(c.value!==void 0&&t.inlineCollapsed===!0),"Menu","`inlineCollapsed` not control Menu under Sider. Should set `collapsed` on Sider instead.")});var p=H([]),v=H([]),m=H({});ce(s,function(){for(var W={},Y=0,q=Object.values(s.value);Y<q.length;Y++){var J=q[Y];W[J.key]=J}m.value=W},{flush:"post"}),Wn(function(){if(t.activeKey!==void 0){var W=[],Y=t.activeKey?m.value[t.activeKey]:void 0;Y&&t.activeKey!==void 0?W=[].concat(Je(Y.parentKeys),[t.activeKey]):W=[],Mr(p.value,W)||(p.value=W)}}),ce(function(){return t.selectedKeys},function(W){v.value=W||v.value},{immediate:!0});var y=H([]);ce([m,v],function(){var W=[];v.value.forEach(function(Y){var q=m.value[Y];if(q){var J;(J=W).push.apply(J,Je(A(q.parentEventKeys)))}}),W=eU(W),Mr(y.value,W)||(y.value=W)},{immediate:!0});var b=function(Y){if(!!t.selectable){var q=Y.key,J=v.value.includes(q),ne;t.multiple?J?ne=v.value.filter(function(Q){return Q!==q}):ne=[].concat(Je(v.value),[q]):ne=[q];var oe=P(P({},Y),{selectedKeys:ne});Mr(ne,v.value)||(t.selectedKeys===void 0&&(v.value=ne),a("update:selectedKeys",ne),J&&t.multiple?a("deselect",oe):a("select",oe)),O.value!=="inline"&&!t.multiple&&C.value.length&&I(L3)}},C=H([]);ce(function(){return t.openKeys},function(){var W=arguments.length>0&&arguments[0]!==void 0?arguments[0]:C.value;Mr(C.value,W)||(C.value=W)},{immediate:!0});var S,w=function(Y){window.clearTimeout(S),S=window.setTimeout(function(){t.activeKey===void 0&&(p.value=Y),a("update:activeKey",Y[Y.length-1])})},k=x(function(){return!!t.disabled}),$=x(function(){return l.value==="rtl"}),O=H("vertical"),T=H(!1);Wn(function(){(t.mode==="inline"||t.mode==="vertical")&&d.value?(O.value="vertical",T.value=d.value):(O.value=t.mode,T.value=!1)});var _=x(function(){return O.value==="inline"}),I=function(Y){C.value=Y,a("update:openKeys",Y),a("openChange",Y)},L=H(C.value),j=H(!1);ce(C,function(){_.value&&(L.value=C.value)},{immediate:!0}),ce(_,function(){if(!j.value){j.value=!0;return}_.value?C.value=L.value:I(L3)},{immediate:!0});var F=x(function(){var W;return W={},V(W,"".concat(i.value),!0),V(W,"".concat(i.value,"-root"),!0),V(W,"".concat(i.value,"-").concat(O.value),!0),V(W,"".concat(i.value,"-inline-collapsed"),T.value),V(W,"".concat(i.value,"-rtl"),$.value),V(W,"".concat(i.value,"-").concat(t.theme),!0),W}),N={horizontal:{name:"ant-slide-up"},inline:wke,other:{name:"ant-zoom-big"}};JA(!0);var D=function W(){var Y=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],q=[],J=s.value;return Y.forEach(function(ne){var oe=J[ne],Q=oe.key,ae=oe.childrenEventKeys;q.push.apply(q,[Q].concat(Je(W(ae))))}),q},z=function(Y){a("click",Y),b(Y)},B=function(Y,q){var J=s.value[Y],ne=J.key,oe=J.childrenEventKeys,Q=C.value.filter(function(de){return de!==ne});if(q)Q.push(ne);else if(O.value!=="inline"){var ae=D(oe);Q=Q.filter(function(de){return!ae.includes(de)})}Mr(C,Q)||I(Q)},M=function(Y,q){s.value=P(P({},s.value),V({},Y,q))},E=function(Y){delete s.value[Y],s.value=P({},s.value)},K=H(0);return qOe({store:s,prefixCls:i,activeKeys:p,openKeys:C,selectedKeys:v,changeActiveKeys:w,disabled:k,rtl:$,mode:O,inlineIndent:x(function(){return t.inlineIndent}),subMenuCloseDelay:x(function(){return t.subMenuCloseDelay}),subMenuOpenDelay:x(function(){return t.subMenuOpenDelay}),builtinPlacements:x(function(){return t.builtinPlacements}),triggerSubMenuAction:x(function(){return t.triggerSubMenuAction}),getPopupContainer:x(function(){return t.getPopupContainer}),inlineCollapsed:T,antdMenuTheme:x(function(){return t.theme}),siderCollapsed:c,defaultMotions:x(function(){return f.value?N:null}),motion:x(function(){return f.value?t.motion:null}),overflowDisabled:H(void 0),onOpenChange:B,onItemClick:z,registerMenuInfo:M,unRegisterMenuInfo:E,selectedSubMenuEventKeys:y,isRootMenu:H(!0),expandIcon:t.expandIcon||r.expandIcon}),function(){var W,Y,q=Un((W=r.default)===null||W===void 0?void 0:W.call(r)),J=K.value>=q.length-1||O.value!=="horizontal"||t.disabledOverflow,ne=O.value!=="horizontal"||t.disabledOverflow?q:q.map(function(Q,ae){return g(Uv,{key:Q.key,overflowDisabled:ae>K.value},{default:function(){return[Q]}})}),oe=((Y=r.overflowedIndicator)===null||Y===void 0?void 0:Y.call(r))||g(YA,null,null);return g(Zl,{prefixCls:"".concat(i.value,"-overflow"),component:"ul",itemComponent:fl,class:F.value,role:"menu",data:ne,renderRawItem:function(ae){return ae},renderRawRest:function(ae){var de=ae.length,be=de?q.slice(-de):null;return g(sc,{eventKey:Zl.OVERFLOW_KEY,title:oe,disabled:J,internalPopupClose:de===0},{default:function(){return[be]}})},maxCount:O.value!=="horizontal"||t.disabledOverflow?Zl.INVALIDATE:Zl.RESPONSIVE,ssr:"full","data-menu-list":!0,onVisibleChange:function(ae){K.value=ae}},null)}}}),oPe={title:u.VNodeChild},Yv=G({name:"AMenuItemGroup",inheritAttrs:!1,props:oPe,slots:["title"],setup:function(t,n){var r=n.slots,a=n.attrs,o=Ni(),i=o.prefixCls,l=x(function(){return"".concat(i.value,"-item-group")});return function(){var s;return g("li",le(le({},a),{},{onClick:function(d){return d.stopPropagation()},class:l.value}),[g("div",{title:typeof t.title=="string"?t.title:void 0,class:"".concat(l.value,"-title")},[jn(r,t,"title")]),g("ul",{class:"".concat(l.value,"-list")},[(s=r.default)===null||s===void 0?void 0:s.call(r)])])}}}),qv=G({name:"AMenuDivider",setup:function(){var t=Ni(),n=t.prefixCls;return function(){return g("li",{class:"".concat(n.value,"-item-divider")},null)}}});jr.install=function(e){return e.component(jr.name,jr),e.component(fl.name,fl),e.component(sc.name,sc),e.component(qv.name,qv),e.component(Yv.name,Yv),e};jr.Item=fl;jr.Divider=qv;jr.SubMenu=sc;jr.ItemGroup=Yv;var iPe={prefixCls:u.string,routes:{type:Array},params:u.any,separator:u.any,itemRender:{type:Function}};function lPe(e,t){if(!e.breadcrumbName)return null;var n=Object.keys(t).join("|"),r=e.breadcrumbName.replace(new RegExp(":(".concat(n,")"),"g"),function(a,o){return t[o]||a});return r}function F3(e){var t=e.route,n=e.params,r=e.routes,a=e.paths,o=r.indexOf(t)===r.length-1,i=lPe(t,n);return o?g("span",null,[i]):g("a",{href:"#/".concat(a.join("/"))},[i])}var us=G({name:"ABreadcrumb",props:iPe,slots:["separator","itemRender"],setup:function(t,n){var r=n.slots,a=Wt("breadcrumb",t),o=a.prefixCls,i=a.direction,l=function(f,p){return f=(f||"").replace(/^\//,""),Object.keys(p).forEach(function(v){f=f.replace(":".concat(v),p[v])}),f},s=function(f){var p=arguments.length>1&&arguments[1]!==void 0?arguments[1]:"",v=arguments.length>2?arguments[2]:void 0,m=Je(f),y=l(p,v);return y&&m.push(y),m},c=function(f){var p=f.routes,v=p===void 0?[]:p,m=f.params,y=m===void 0?{}:m,b=f.separator,C=f.itemRender,S=C===void 0?F3:C,w=[];return v.map(function(k){var $=l(k.path,y);$&&w.push($);var O=[].concat(w),T=null;return k.children&&k.children.length&&(T=g(jr,null,{default:function(){return[k.children.map(function(I){return g(jr.Item,{key:I.path||I.breadcrumbName},{default:function(){return[S({route:I,params:y,routes:v,paths:s(O,I.path,y)})]}})})]}})),g(df,{overlay:T,separator:b,key:$||k.breadcrumbName},{default:function(){return[S({route:k,params:y,routes:v,paths:O})]}})})};return function(){var d,f,p,v=t.routes,m=t.params,y=m===void 0?{}:m,b=Un(jn(r,t)),C=(f=jn(r,t,"separator"))!==null&&f!==void 0?f:"/",S=t.itemRender||r.itemRender||F3;v&&v.length>0?p=c({routes:v,params:y,separator:C,itemRender:S}):b.length&&(p=b.map(function(k,$){return on(kt(k.type)==="object"&&(k.type.__ANT_BREADCRUMB_ITEM||k.type.__ANT_BREADCRUMB_SEPARATOR),"Breadcrumb","Only accepts Breadcrumb.Item and Breadcrumb.Separator as it's children"),hr(k,{separator:C,key:$})}));var w=(d={},V(d,o.value,!0),V(d,"".concat(o.value,"-rtl"),i.value==="rtl"),d);return g("div",{class:w},[p])}}}),sPe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},uPe={prefixCls:u.string},Gv=G({name:"ABreadcrumbSeparator",__ANT_BREADCRUMB_SEPARATOR:!0,inheritAttrs:!1,props:uPe,setup:function(t,n){var r=n.slots,a=n.attrs,o=Wt("breadcrumb",t),i=o.prefixCls;return function(){var l;a.separator;var s=a.class,c=sPe(a,["separator","class"]),d=Un((l=r.default)===null||l===void 0?void 0:l.call(r));return g("span",le({class:["".concat(i.value,"-separator"),s]},c),[d.length>0?d:"/"])}}});us.Item=df;us.Separator=Gv;us.install=function(e){return e.component(us.name,us),e.component(df.name,df),e.component(Gv.name,Gv),e};var Vl={DATE_ROW_COUNT:6,DATE_COL_COUNT:7},oD=function(t,n){for(var r=n.attrs,a=r.value,o=a.localeData(),i=r.prefixCls,l=[],s=[],c=o.firstDayOfWeek(),d,f=Ue(),p=0;p<Vl.DATE_COL_COUNT;p++){var v=(c+p)%Vl.DATE_COL_COUNT;f.day(v),l[p]=o.weekdaysMin(f),s[p]=o.weekdaysShort(f)}r.showWeekNumber&&(d=g("th",{role:"columnheader",class:"".concat(i,"-column-header ").concat(i,"-week-number-header")},[g("span",{class:"".concat(i,"-column-header-inner")},[yt("x")])]));var m=s.map(function(y,b){return g("th",{key:b,role:"columnheader",title:y,class:"".concat(i,"-column-header")},[g("span",{class:"".concat(i,"-column-header-inner")},[l[b]])])});return g("thead",null,[g("tr",{role:"row"},[d,m])])};oD.inheritAttrs=!1;var cPe=oD,dPe={disabledHours:function(){return[]},disabledMinutes:function(){return[]},disabledSeconds:function(){return[]}};function hl(e){var t=Ue();return t.locale(e.locale()).utcOffset(e.utcOffset()),t}function iD(e){return e.format("LL")}function fPe(e){var t=hl(e);return iD(t)}function lD(e){var t=e.locale(),n=e.localeData();return n[t==="zh-cn"?"months":"monthsShort"](e)}function Sa(e,t){!Ue.isMoment(e)||!Ue.isMoment(t)||(t.hour(e.hour()),t.minute(e.minute()),t.second(e.second()),t.millisecond(e.millisecond()))}function K2(e,t){var n=t?t(e):{};return n=P(P({},dPe),n),n}function hPe(e,t){var n=!1;if(e){var r=e.hour(),a=e.minute(),o=e.second(),i=t.disabledHours();if(i.indexOf(r)===-1){var l=t.disabledMinutes(r);if(l.indexOf(a)===-1){var s=t.disabledSeconds(r,a);n=s.indexOf(o)!==-1}else n=!0}else n=!0}return!n}function pPe(e,t){var n=K2(e,t);return hPe(e,n)}function Xv(e,t,n){return!(t&&t(e)||n&&!pPe(e,n))}function qy(e,t){if(!e)return"";if(Array.isArray(t)&&(t=t[0]),typeof t=="function"){var n=t(e);if(typeof n=="string")return n;throw new Error("The function of format does not return a string")}return e.format(t)}function xh(){}function Uc(e,t){return e&&t&&e.isSame(t,"day")}function vPe(e,t){return e.year()<t.year()?1:e.year()===t.year()&&e.month()<t.month()}function mPe(e,t){return e.year()>t.year()?1:e.year()===t.year()&&e.month()>t.month()}function gPe(e){return"rc-calendar-".concat(e.year(),"-").concat(e.month(),"-").concat(e.date())}var yPe={name:"DateTBody",inheritAttrs:!1,props:{contentRender:u.func,dateRender:u.func,disabledDate:u.func,prefixCls:u.string,selectedValue:u.any,value:u.object,hoverValue:u.any.def([]),showWeekNumber:u.looseBool},render:function(){var t=Qe(this),n=t.contentRender,r=t.prefixCls,a=t.selectedValue,o=t.value,i=t.showWeekNumber,l=t.dateRender,s=t.disabledDate,c=t.hoverValue,d=this.$attrs,f=d.onSelect,p=f===void 0?xh:f,v=d.onDayHover,m=v===void 0?xh:v,y,b,C,S=[],w=hl(o),k="".concat(r,"-cell"),$="".concat(r,"-week-number-cell"),O="".concat(r,"-date"),T="".concat(r,"-today"),_="".concat(r,"-selected-day"),I="".concat(r,"-selected-date"),L="".concat(r,"-selected-start-date"),j="".concat(r,"-selected-end-date"),F="".concat(r,"-in-range-cell"),N="".concat(r,"-last-month-cell"),D="".concat(r,"-next-month-btn-day"),z="".concat(r,"-disabled-cell"),B="".concat(r,"-disabled-cell-first-of-row"),M="".concat(r,"-disabled-cell-last-of-row"),E="".concat(r,"-last-day-of-month"),K=o.clone();K.date(1);var W=K.day(),Y=(W+7-o.localeData().firstDayOfWeek())%7,q=K.clone();q.add(0-Y,"days");var J=0;for(y=0;y<Vl.DATE_ROW_COUNT;y++)for(b=0;b<Vl.DATE_COL_COUNT;b++)C=q,J&&(C=C.clone(),C.add(J,"days")),S.push(C),J++;var ne=[];for(J=0,y=0;y<Vl.DATE_ROW_COUNT;y++){var oe,Q=void 0,ae=void 0,de=!1,be=[];for(i&&(ae=g("td",{key:"week-".concat(S[J].week()),role:"gridcell",class:$},[S[J].week()])),b=0;b<Vl.DATE_COL_COUNT;b++){var Ee=null,Pe=null;C=S[J],b<Vl.DATE_COL_COUNT-1&&(Ee=S[J+1]),b>0&&(Pe=S[J-1]);var Be=k,te=!1,ie=!1;Uc(C,w)&&(Be+=" ".concat(T),Q=!0);var ge=vPe(C,o),ke=mPe(C,o);if(a&&Array.isArray(a)){var xe=c.length?c:a;if(!ge&&!ke){var Ie=xe[0],ye=xe[1];Ie&&Uc(C,Ie)&&(ie=!0,de=!0,Be+=" ".concat(L)),(Ie||ye)&&(Uc(C,ye)?(ie=!0,de=!0,Be+=" ".concat(j)):(Ie==null&&C.isBefore(ye,"day")||ye==null&&C.isAfter(Ie,"day")||C.isAfter(Ie,"day")&&C.isBefore(ye,"day"))&&(Be+=" ".concat(F)))}}else Uc(C,o)&&(ie=!0,de=!0);Uc(C,a)&&(Be+=" ".concat(I)),ge&&(Be+=" ".concat(N)),ke&&(Be+=" ".concat(D)),C.clone().endOf("month").date()===C.date()&&(Be+=" ".concat(E)),s&&s(C,o)&&(te=!0,(!Pe||!s(Pe,o))&&(Be+=" ".concat(B)),(!Ee||!s(Ee,o))&&(Be+=" ".concat(M))),ie&&(Be+=" ".concat(_)),te&&(Be+=" ".concat(z));var pe=void 0;if(l)pe=l({current:C,today:o});else{var ue=n?n({current:C,today:o}):C.date();pe=g("div",{key:gPe(C),class:O,"aria-selected":ie,"aria-disabled":te},[ue])}be.push(g("td",{key:J,onClick:te?xh:p.bind(null,C),onMouseenter:te?xh:m.bind(null,C),role:"gridcell",title:iD(C),class:Be},[pe])),J++}ne.push(g("tr",{key:y,role:"row",class:Se((oe={},V(oe,"".concat(r,"-current-week"),Q),V(oe,"".concat(r,"-active-week"),de),oe))},[ae,be]))}return g("tbody",{class:"".concat(r,"-tbody")},[ne])}},bPe=yPe,sD=function(t,n){var r=n.attrs,a=r.prefixCls;return g("table",{class:"".concat(a,"-table"),cellspacing:"0",role:"grid"},[g(cPe,r,null),g(bPe,r,null)])};sD.inheritAttrs=!1;var W2=sD,CPe=4,wPe=3;function SPe(){}var kPe={name:"MonthTable",inheritAttrs:!1,mixins:[nt],props:{cellRender:u.func,prefixCls:u.string,value:u.object,locale:u.any,contentRender:u.any,disabledDate:u.func},data:function(){return{sValue:this.value}},watch:{value:function(t){this.setState({sValue:t})}},methods:{setAndSelectValue:function(t){this.setState({sValue:t}),this.__emit("select",t)},chooseMonth:function(t){var n=this.sValue.clone();n.month(t),this.setAndSelectValue(n)},months:function(){for(var t=this.sValue,n=t.clone(),r=[],a=0,o=0;o<CPe;o++){r[o]=[];for(var i=0;i<wPe;i++){n.month(a);var l=lD(n);r[o][i]={value:a,content:l,title:l},a++}}return r}},render:function(){var t=this,n=this.$props,r=this.sValue,a=hl(r),o=this.months(),i=r.month(),l=n.prefixCls,s=n.locale,c=n.contentRender,d=n.cellRender,f=n.disabledDate,p=o.map(function(v,m){var y=v.map(function(b){var C,S=!1;if(f){var w=r.clone();w.month(b.value),S=f(w)}var k=(C={},V(C,"".concat(l,"-cell"),1),V(C,"".concat(l,"-cell-disabled"),S),V(C,"".concat(l,"-selected-cell"),b.value===i),V(C,"".concat(l,"-current-cell"),a.year()===r.year()&&b.value===a.month()),C),$;if(d){var O=r.clone();O.month(b.value),$=d({current:O,locale:s})}else{var T;if(c){var _=r.clone();_.month(b.value),T=c({current:_,locale:s})}else T=b.content;$=g("a",{class:"".concat(l,"-month")},[T])}return g("td",{role:"gridcell",key:b.value,onClick:S?SPe:function(){return t.chooseMonth(b.value)},title:b.title,class:k},[$])});return g("tr",{key:m,role:"row"},[y])});return g("table",{class:"".concat(l,"-table"),cellspacing:"0",role:"grid"},[g("tbody",{class:"".concat(l,"-tbody")},[p])])}},uD=kPe;function _h(){}function Zv(e){var t;return e?t=hl(e):t=Ue(),t}function $Pe(e){return Array.isArray(e)?e.length===0||e.findIndex(function(t){return t===void 0||Ue.isMoment(t)})!==-1:e===void 0||Ue.isMoment(e)}var B3=u.custom($Pe),OPe={mixins:[nt],inheritAttrs:!1,name:"CalendarMixinWrapper",props:{value:B3,defaultValue:B3},data:function(){this.onKeyDown===void 0&&(this.onKeyDown=_h),this.onBlur===void 0&&(this.onBlur=_h);var t=this.$props,n=t.value||t.defaultValue||Zv();return{sValue:n,sSelectedValue:t.selectedValue||t.defaultSelectedValue}},watch:{value:function(t){var n=t||this.defaultValue||Zv(this.sValue);this.setState({sValue:n})},selectedValue:function(t){this.setState({sSelectedValue:t})}},methods:{onSelect:function(t,n){t&&this.setValue(t),this.setSelectedValue(t,n)},renderRoot:function(t){var n,r=P(P({},this.$props),this.$attrs),a=r.prefixCls,o=(n={},V(n,a,1),V(n,"".concat(a,"-hidden"),!r.visible),V(n,r.class,!!r.class),V(n,t.class,!!t.class),n);return g("div",{ref:this.saveRoot,class:o,tabindex:"0",onKeydown:this.onKeyDown||_h,onBlur:this.onBlur||_h},[t.children])},setSelectedValue:function(t,n){vt(this,"selectedValue")||this.setState({sSelectedValue:t}),this.__emit("select",t,n)},setValue:function(t){var n=this.sValue;vt(this,"value")||this.setState({sValue:t}),(n&&t&&!n.isSame(t)||!n&&t||n&&!t)&&this.__emit("change",t)},isAllowedDate:function(t){var n=this.disabledDate,r=this.disabledTime;return Xv(t,n,r)}}},U2=OPe,$g={methods:{getFormat:function(){var t=this.format,n=this.locale,r=this.timePicker;return t||(r?t=n.dateTimeFormat:t=n.dateFormat),t},focus:function(){this.focusElement?this.focusElement.focus():this.rootInstance&&this.rootInstance.focus()},saveFocusElement:function(t){this.focusElement=t},saveRoot:function(t){this.rootInstance=t}}},PPe={name:"CalendarHeader",inheritAttrs:!1,mixins:[nt],props:{value:u.object,locale:u.object,yearSelectOffset:u.number.def(10),yearSelectTotal:u.number.def(20),Select:u.object,prefixCls:u.string,type:u.string,showTypeSwitch:u.looseBool,headerComponents:u.array},methods:{onYearChange:function(t){var n=this.value.clone();n.year(parseInt(t,10)),this.__emit("valueChange",n)},onMonthChange:function(t){var n=this.value.clone();n.month(parseInt(t,10)),this.__emit("valueChange",n)},yearSelectElement:function(t){for(var n=this.yearSelectOffset,r=this.yearSelectTotal,a=this.prefixCls,o=this.Select,i=t-n,l=i+r,s=[],c=function(p){s.push(g(o.Option,{key:"".concat(p)},{default:function(){return[function(){return p}()]}}))},d=i;d<l;d++)c(d);return g(o,{class:"".concat(a,"-header-year-select"),onChange:this.onYearChange,dropdownStyle:{zIndex:2e3},dropdownMenuStyle:{maxHeight:"250px",overflow:"auto",fontSize:"12px"},optionLabelProp:"children",value:String(t),showSearch:!1},{default:function(){return[s]}})},monthSelectElement:function(t){for(var n=this.value,r=this.Select,a=this.prefixCls,o=n.clone(),i=[],l=0;l<12;l++)o.month(l),i.push(g(r.Option,{key:"".concat(l)},{default:function(){return[function(){return lD(o)}()]}}));return g(r,{class:"".concat(a,"-header-month-select"),dropdownStyle:{zIndex:2e3},dropdownMenuStyle:{maxHeight:"250px",overflow:"auto",overflowX:"hidden",fontSize:"12px"},optionLabelProp:"children",value:String(t),showSearch:!1,onChange:this.onMonthChange},{default:function(){return[i]}})},changeTypeToDate:function(){this.__emit("typeChange","date")},changeTypeToMonth:function(){this.__emit("typeChange","month")}},render:function(){var t=this.value,n=this.locale,r=this.prefixCls,a=this.type,o=this.showTypeSwitch,i=this.headerComponents,l=t.year(),s=t.month(),c=this.yearSelectElement(l),d=a==="month"?null:this.monthSelectElement(s),f="".concat(r,"-header-switcher"),p=o?g("span",{class:f},[a==="date"?g("span",{class:"".concat(f,"-focus")},[n.month]):g("span",{onClick:this.changeTypeToDate,class:"".concat(f,"-normal")},[n.month]),a==="month"?g("span",{class:"".concat(f,"-focus")},[n.year]):g("span",{onClick:this.changeTypeToMonth,class:"".concat(f,"-normal")},[n.year])]):null;return g("div",{class:"".concat(r,"-header")},[p,d,c,i])}},TPe=PPe,xPe=G({name:"FullCalendar",mixins:[nt,$g,U2],inheritAttrs:!1,props:{locale:u.object.def(Nf),format:u.oneOfType([u.string,u.array,u.func]),visible:u.looseBool.def(!0),prefixCls:u.string.def("rc-calendar"),defaultType:u.string.def("date"),type:u.string,fullscreen:u.looseBool.def(!1),monthCellRender:u.func,dateCellRender:u.func,showTypeSwitch:u.looseBool.def(!0),Select:u.object.isRequired,headerComponents:u.array,headerComponent:u.object,headerRender:u.func,showHeader:u.looseBool.def(!0),disabledDate:u.func,value:u.object,defaultValue:u.object,selectedValue:u.object,defaultSelectedValue:u.object,renderFooter:u.func.def(function(){return null}),renderSidebar:u.func.def(function(){return null})},data:function(){var t;vt(this,"type")?t=this.type:t=this.defaultType;var n=this.$props;return{sType:t,sValue:n.value||n.defaultValue||Ue(),sSelectedValue:n.selectedValue||n.defaultSelectedValue}},watch:{type:function(t){this.setState({sType:t})},value:function(t){var n=t||this.defaultValue||Zv(this.sValue);this.setState({sValue:n})},selectedValue:function(t){this.setState({sSelectedValue:t})}},methods:{onMonthSelect:function(t){this.onSelect(t,{target:"month"})},setType:function(t){vt(this,"type")||this.setState({sType:t}),this.__emit("typeChange",t)}},render:function(){var t=Qe(this),n=t.locale,r=t.prefixCls,a=t.fullscreen,o=t.showHeader,i=t.headerComponent,l=t.headerRender,s=t.disabledDate,c=this.sValue,d=this.sType,f=null;if(o)if(l)f=l(c,d,n);else{var p=i||TPe,v=P(P(P({},t),this.$attrs),{prefixCls:"".concat(r,"-full"),type:d,value:c,onTypeChange:this.setType,onValueChange:this.setValue,key:"calendar-header"});f=g(p,v,null)}var m=d==="date"?g(W2,{dateRender:t.dateCellRender,contentRender:t.dateCellContentRender,locale:n,prefixCls:r,onSelect:this.onSelect,value:c,disabledDate:s},null):g(uD,{cellRender:t.monthCellRender,contentRender:t.monthCellContentRender,locale:n,onSelect:this.onMonthSelect,prefixCls:"".concat(r,"-month-panel"),value:c,disabledDate:s},null),y=[f,g("div",{key:"calendar-body",class:"".concat(r,"-calendar-body")},[m])],b=["".concat(r,"-full")];return a&&b.push("".concat(r,"-fullscreen")),this.renderRoot({children:y,class:b.join(" ")})}}),_Pe=xPe,EPe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},cD=G({name:"Checkbox",mixins:[nt],inheritAttrs:!1,props:An({prefixCls:u.string,name:u.string,id:u.string,type:u.string,defaultChecked:an(u.oneOfType([u.number,u.looseBool])),checked:an(u.oneOfType([u.number,u.looseBool])),disabled:u.looseBool,tabindex:u.oneOfType([u.string,u.number]),readonly:u.looseBool,autofocus:u.looseBool,value:u.any},{prefixCls:"rc-checkbox",type:"checkbox",defaultChecked:!1}),data:function(){var t=vt(this,"checked")?this.checked:this.defaultChecked;return{sChecked:t}},watch:{checked:function(t){this.sChecked=t}},mounted:function(){Ne(function(){})},methods:{focus:function(){this.$refs.input.focus()},blur:function(){this.$refs.input.blur()},handleChange:function(t){var n=Qe(this);if(!n.disabled){"checked"in n||(this.sChecked=t.target.checked),t.shiftKey=this.eventShiftKey;var r={target:P(P({},n),{checked:t.target.checked}),stopPropagation:function(){t.stopPropagation()},preventDefault:function(){t.preventDefault()},nativeEvent:t};"checked"in n&&(this.$refs.input.checked=n.checked),this.__emit("change",r),this.eventShiftKey=!1}},onClick:function(t){this.__emit("click",t),this.eventShiftKey=t.shiftKey}},render:function(){var t,n=Qe(this),r=n.prefixCls,a=n.name,o=n.id,i=n.type,l=n.disabled,s=n.readonly,c=n.tabindex,d=n.autofocus,f=n.value,p=EPe(n,["prefixCls","name","id","type","disabled","readonly","tabindex","autofocus","value"]),v=this.$attrs,m=v.class,y=v.onFocus,b=v.onBlur,C=Object.keys(P(P({},p),this.$attrs)).reduce(function($,O){return(O.substr(0,5)==="aria-"||O.substr(0,5)==="data-"||O==="role")&&($[O]=p[O]),$},{}),S=this.sChecked,w=Se(r,m,(t={},V(t,"".concat(r,"-checked"),S),V(t,"".concat(r,"-disabled"),l),t)),k=P(P({name:a,id:o,type:i,readonly:s,disabled:l,tabindex:c,class:"".concat(r,"-input"),checked:!!S,autofocus:d,value:f},C),{onChange:this.handleChange,onClick:this.onClick,onFocus:y,onBlur:b});return g("span",{class:w},[g("input",le({ref:"input"},k),null),g("span",{class:"".concat(r,"-inner")},null)])}}),MPe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},dD={prefixCls:u.string,defaultChecked:u.looseBool,checked:u.looseBool,disabled:u.looseBool,isGroup:u.looseBool,value:u.any,name:u.string,id:u.string,autofocus:u.looseBool,type:u.string.def("radio"),onChange:u.func,onFocus:u.func,onBlur:u.func},_r=G({name:"ARadio",props:dD,emits:["update:checked","update:value","change","blur","focus"],setup:function(){return{configProvider:ve("configProvider",St),radioGroupContext:ve("radioGroupContext",null)}},methods:{focus:function(){this.$refs.vcCheckbox.focus()},blur:function(){this.$refs.vcCheckbox.blur()},handleChange:function(t){var n=t.target.checked;this.$emit("update:checked",n),this.$emit("update:value",n),this.$emit("change",t)},onChange2:function(t){this.$emit("change",t),this.radioGroupContext&&this.radioGroupContext.onRadioChange&&this.radioGroupContext.onRadioChange(t)}},render:function(){var t,n=this.$slots,r=this.radioGroupContext,a=Qe(this),o=a.prefixCls,i=MPe(a,["prefixCls"]),l=this.configProvider.getPrefixCls,s=l("radio",o),c=P({prefixCls:s},i);r?(c.name=r.name,c.onChange=this.onChange2,c.checked=a.value===r.stateValue,c.disabled=a.disabled||r.disabled):c.onChange=this.handleChange;var d=Se((t={},V(t,"".concat(s,"-wrapper"),!0),V(t,"".concat(s,"-wrapper-checked"),c.checked),V(t,"".concat(s,"-wrapper-disabled"),c.disabled),t));return g("label",{class:d},[g(cD,le(le({},c),{},{ref:"vcCheckbox"}),null),n.default&&g("span",null,[n.default()])])}}),Y2=G({name:"ARadioGroup",props:{prefixCls:u.string,defaultValue:u.any,value:u.any,size:u.oneOf(rt("large","default","small")).def("default"),options:u.array,disabled:u.looseBool,name:u.string,buttonStyle:u.string.def("outline"),onChange:u.func},emits:["update:value","change"],setup:function(){return{updatingValue:!1,configProvider:ve("configProvider",St),radioGroupContext:null}},data:function(){var t=this.value,n=this.defaultValue;return{stateValue:t===void 0?n:t}},watch:{value:function(t){this.updatingValue=!1,this.stateValue=t}},created:function(){this.radioGroupContext=ot("radioGroupContext",this)},methods:{onRadioChange:function(t){var n=this,r=this.stateValue,a=t.target.value;vt(this,"value")||(this.stateValue=a),!this.updatingValue&&a!==r&&(this.updatingValue=!0,this.$emit("update:value",a),this.$emit("change",t)),Ne(function(){n.updatingValue=!1})}},render:function(){var t=this,n=Qe(this),r=n.prefixCls,a=n.options,o=n.buttonStyle,i=this.configProvider.getPrefixCls,l=i("radio",r),s="".concat(l,"-group"),c=Se(s,"".concat(s,"-").concat(o),V({},"".concat(s,"-").concat(n.size),n.size)),d=La(ht(this));return a&&a.length>0&&(d=a.map(function(f){return typeof f=="string"?g(_r,{key:f,prefixCls:l,disabled:n.disabled,value:f,checked:t.stateValue===f},{default:function(){return[f]}}):g(_r,{key:"radio-group-value-options-".concat(f.value),prefixCls:l,disabled:f.disabled||n.disabled,value:f.value,checked:t.stateValue===f.value},{default:function(){return[f.label]}})})),g("div",{class:c},[d])}}),IPe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},Jv=G({name:"ARadioButton",props:P({},dD),setup:function(){return{configProvider:ve("configProvider",St),radioGroupContext:ve("radioGroupContext",{})}},render:function(){var t=this,n=Qe(this),r=n.prefixCls,a=IPe(n,["prefixCls"]),o=this.configProvider.getPrefixCls,i=o("radio-button",r),l=P({prefixCls:i},a);return this.radioGroupContext&&(l.onChange=this.radioGroupContext.onRadioChange,l.checked=n.value===this.radioGroupContext.stateValue,l.disabled=n.disabled||this.radioGroupContext.disabled),g(_r,l,{default:function(){return[ht(t)]}})}});_r.Group=Y2;_r.Button=Jv;_r.install=function(e){return e.component(_r.name,_r),e.component(_r.Group.name,_r.Group),e.component(_r.Button.name,_r.Button),e};function NPe(e){for(var t=e.clone(),n=e.localeData(),r=[],a=0;a<12;a++)t.month(a),r.push(n.monthsShort(t));return r}var APe={prefixCls:u.string,locale:u.any,fullscreen:u.looseBool,yearSelectOffset:u.number,yearSelectTotal:u.number,type:u.string,value:{type:Object},validRange:{type:Array},headerRender:u.func,onValueChange:u.func,onTypeChange:u.func},DPe=G({name:"CalendarHeader",inheritAttrs:!1,props:P(P({},APe),{yearSelectOffset:u.number.def(10),yearSelectTotal:u.number.def(20)}),setup:function(){return{configProvider:ve("configProvider",St),calendarHeaderNode:void 0}},methods:{getYearSelectElement:function(t,n){var r=this,a=this.yearSelectOffset,o=this.yearSelectTotal,i=this.locale,l=i===void 0?{}:i,s=this.fullscreen,c=this.validRange,d=n-a,f=d+o;c&&(d=c[0].get("year"),f=c[1].get("year")+1);for(var p=l&&l.year==="\u5E74"?"\u5E74":"",v=[],m=d;m<f;m++)v.push({label:"".concat(m).concat(p),value:m});return g(dl,{size:s?void 0:"small",class:"".concat(t,"-year-select"),onChange:this.onYearChange,value:n,options:v,getPopupContainer:function(){return r.calendarHeaderNode}},null)},getMonthSelectElement:function(t,n,r){var a=this,o=this.fullscreen,i=this.validRange,l=this.value,s=0,c=11;if(i){var d=fn(i,2),f=d[0],p=d[1],v=l.get("year");p.get("year")===v&&(c=p.get("month")+1),f.get("year")===v&&(s=f.get("month"))}for(var m=[],y=s;y<=c;y+=1)m.push({label:r[y],value:y});return g(dl,{size:o?void 0:"small",class:"".concat(t,"-month-select"),value:n,options:m,onChange:this.onMonthChange,getPopupContainer:function(){return a.calendarHeaderNode}},null)},onYearChange:function(t){var n=this.value,r=this.validRange,a=n.clone();if(a.year(parseInt(t,10)),r){var o=fn(r,2),i=o[0],l=o[1],s=a.get("year"),c=a.get("month");s===l.get("year")&&c>l.get("month")&&a.month(l.get("month")),s===i.get("year")&&c<i.get("month")&&a.month(i.get("month"))}this.$emit("valueChange",a)},onMonthChange:function(t){var n=this.value.clone();n.month(parseInt(t,10)),this.$emit("valueChange",n)},onInternalTypeChange:function(t){this.triggerTypeChange(t.target.value)},triggerTypeChange:function(t){this.$emit("typeChange",t)},getMonthYearSelections:function(t){var n=this.$props,r=n.prefixCls,a=n.type,o=n.value,i=t("fullcalendar",r),l=this.getYearSelectElement(i,o.year()),s=a==="month"?this.getMonthSelectElement(i,o.month(),NPe(o)):null;return{yearReactNode:l,monthReactNode:s}},getTypeSwitch:function(){var t=this.$props,n=t.locale,r=n===void 0?{}:n,a=t.type,o=t.fullscreen,i=o?"default":"small";return g(Y2,{onChange:this.onInternalTypeChange,value:a,size:i},{default:function(){return[g(Jv,{value:"month"},{default:function(){return[r.month]}}),g(Jv,{value:"year"},{default:function(){return[r.year]}})]}})},triggerValueChange:function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];this.$emit.apply(this,["valueChange"].concat(n))},saveCalendarHeaderNode:function(t){this.calendarHeaderNode=t},headerRenderCustom:function(t){var n=this.$props,r=n.type,a=n.value;return t({value:a,type:r||"month",onChange:this.triggerValueChange,onTypeChange:this.triggerTypeChange})}},render:function(){var t=this.prefixCls,n=this.headerRender,r=this.configProvider.getPrefixCls,a=r("fullcalendar",t),o=this.getTypeSwitch(),i=this.getMonthYearSelections(r),l=i.yearReactNode,s=i.monthReactNode;return n?this.headerRenderCustom(n):g("div",{class:"".concat(a,"-header"),ref:this.saveCalendarHeaderNode},[l,s,o])}}),V3={validator:function(t){return typeof t=="string"||ul(t)||Ue.isMoment(t)}},z3={validator:function(t){return Array.isArray(t)?t.length===0||t.findIndex(function(n){return typeof n!="string"})===-1||t.findIndex(function(n){return!ul(n)&&!Ue.isMoment(n)})===-1:typeof t=="string"||ul(t)||Ue.isMoment(t)}};function pi(e,t,n,r){var a=Array.isArray(t)?t:[t];a.forEach(function(o){!o||(r&&on(Hr(Ue)(o,r).isValid(),e,"When set `valueFormat`, `".concat(n,"` should provides invalidate string time. ")),!r&&on(Hr(Ue).isMoment(o)&&o.isValid(),e,"`".concat(n,"` provides invalidate moment time. If you want to set empty value, use `null` instead.")))})}var rl=function(t,n){return Array.isArray(t)?t.map(function(r){return typeof r=="string"&&r?Hr(Ue)(r,n):r||null}):typeof t=="string"&&t?Hr(Ue)(t,n):t||null},Du=function(t,n){return Array.isArray(t)?t.map(function(r){return Hr(Ue).isMoment(r)?r.format(n):r}):Hr(Ue).isMoment(t)?t.format(n):t};function H3(){return null}function RPe(e){return e<10?"0".concat(e):"".concat(e)}var LPe=rt("month","year"),FPe={monthCellRender:u.func,dateCellRender:u.func,monthFullCellRender:u.func,dateFullCellRender:u.func,prefixCls:u.string,value:V3,defaultValue:V3,mode:u.oneOf(LPe),fullscreen:u.looseBool.def(!0),locale:u.object.def({}),disabledDate:u.func,validRange:{type:Array},headerRender:u.func,valueFormat:u.string,onPanelChange:u.func,onSelect:u.func,onChange:u.func,"onUpdate:value":u.func},BPe=G({name:"ACalendar",mixins:[nt],inheritAttrs:!1,props:FPe,setup:function(){return{configProvider:ve("configProvider",St),sPrefixCls:void 0}},data:function(){var t=this.value,n=this.defaultValue,r=this.valueFormat,a=t||n||Hr(Ue)();return pi("Calendar",n,"defaultValue",r),pi("Calendar",t,"value",r),{sValue:rl(a,r),sMode:this.mode||"month"}},watch:{value:function(t){pi("Calendar",t,"value",this.valueFormat),this.setState({sValue:rl(t,this.valueFormat)})},mode:function(t){this.setState({sMode:t})}},methods:{onHeaderValueChange:function(t){this.setValue(t,"changePanel")},onHeaderTypeChange:function(t){this.sMode=t,this.triggerPanelChange(this.sValue,t)},triggerPanelChange:function(t,n){var r=this.valueFormat?Du(t,this.valueFormat):t;t!==this.sValue&&(this.$emit("update:value",r),this.$emit("change",r)),this.$emit("panelChange",r,n)},triggerSelect:function(t){this.setValue(t,"select")},setValue:function(t,n){var r=this.value?rl(this.value,this.valueFormat):this.sValue,a=this.sMode,o=this.valueFormat;if(vt(this,"value")||this.setState({sValue:t}),n==="select"){var i=o?Du(t,o):t;r&&r.month()!==t.month()?this.triggerPanelChange(t,a):this.$emit("update:value",i),this.$emit("select",i)}else n==="changePanel"&&this.triggerPanelChange(t,a)},getDateRange:function(t,n){return function(r){if(!r)return!1;var a=fn(t,2),o=a[0],i=a[1],l=!r.isBetween(o,i,"days","[]");return n&&n(r)||l}},getDefaultLocale:function(){var t=P(P({},Ev),this.$props.locale);return t.lang=P(P({},t.lang),(this.$props.locale||{}).lang),t},monthCellRender2:function(t){var n=t.current,r=this.sPrefixCls,a=this.$slots,o=this.monthCellRender||a.monthCellRender||H3;return g("div",{class:"".concat(r,"-month")},[g("div",{class:"".concat(r,"-value")},[n.localeData().monthsShort(n)]),g("div",{class:"".concat(r,"-content")},[o({current:n})])])},dateCellRender2:function(t){var n=t.current,r=this.sPrefixCls,a=this.$slots,o=this.dateCellRender||a.dateCellRender||H3;return g("div",{class:"".concat(r,"-date")},[g("div",{class:"".concat(r,"-value")},[RPe(n.date())]),g("div",{class:"".concat(r,"-content")},[o({current:n})])])},renderCalendar:function(t,n){var r=P(P({},Qe(this)),this.$attrs),a=this.sValue,o=this.sMode,i=this.$slots;a&&n&&a.locale(n);var l=r.prefixCls,s=r.fullscreen,c=r.dateFullCellRender,d=r.monthFullCellRender,f=r.class,p=r.style,v=this.headerRender||i.headerRender,m=this.configProvider.getPrefixCls,y=m("fullcalendar",l);this.sPrefixCls=y;var b=f||"";s&&(b+=" ".concat(y,"-fullscreen"));var C=d||i.monthFullCellRender||this.monthCellRender2,S=c||i.dateFullCellRender||this.dateCellRender2,w=r.disabledDate;r.validRange&&(w=this.getDateRange(r.validRange,w));var k=P(P(P({},r),this.$attrs),{Select:{},locale:t.lang,type:o==="year"?"month":"date",prefixCls:y,showHeader:!1,value:a,monthCellRender:C,dateCellRender:S,disabledDate:w,onSelect:this.triggerSelect});return g("div",{class:b,style:p},[g(DPe,{fullscreen:s,type:o,headerRender:v,value:a,locale:t.lang,prefixCls:y,onTypeChange:this.onHeaderTypeChange,onValueChange:this.onHeaderValueChange,validRange:r.validRange},null),g(_Pe,k,null)])}},render:function(){return g(Kr,{componentName:"Calendar",defaultLocale:this.getDefaultLocale,children:this.renderCalendar},null)}}),VPe=kn(BPe),zPe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"defs",attrs:{},children:[{tag:"style",attrs:{}}]},{tag:"path",attrs:{d:"M482 152h60q8 0 8 8v704q0 8-8 8h-60q-8 0-8-8V160q0-8 8-8z"}},{tag:"path",attrs:{d:"M176 474h672q8 0 8 8v60q0 8-8 8H176q-8 0-8-8v-60q0-8 8-8z"}}]},name:"plus",theme:"outlined"},HPe=zPe;function j3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){jPe(e,a,n[a])})}return e}function jPe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var q2=function(t,n){var r=j3({},t,n.attrs);return g(Et,j3({},r,{icon:HPe}),null)};q2.displayName="PlusOutlined";q2.inheritAttrs=!1;var KPe=q2,Eh={LEFT:37,UP:38,RIGHT:39,DOWN:40},WPe={width:0,height:0,overflow:"hidden",position:"absolute"},Qv={name:"Sentinel",props:{setRef:u.func,prevElement:u.any,nextElement:u.any},methods:{onKeyDown:function(t){var n=t.target,r=t.which,a=t.shiftKey,o=this.$props,i=o.nextElement,l=o.prevElement;r!==ze.TAB||document.activeElement!==n||(!a&&i&&i.focus(),a&&l&&l.focus())}},render:function(){var t=this.$props.setRef;return g("div",{tabindex:0,ref:t,style:WPe,onKeydown:this.onKeyDown,role:"presentation"},[ht(this)])}};function UPe(e){var t=[];return e.forEach(function(n){rn(n)&&t.push(n)}),t}function fD(e,t){for(var n=UPe(e),r=0;r<n.length;r++)if(n[r].key===t)return r;return-1}function Wp(e,t){e.transform=t,e.webkitTransform=t,e.mozTransform=t}function hD(e){return("transform"in e||"webkitTransform"in e||"MozTransform"in e)&&window.atob}function YPe(e){return{transform:e,WebkitTransform:e,MozTransform:e}}function em(e){return e==="left"||e==="right"}function qPe(e,t){var n=arguments.length>2&&arguments[2]!==void 0?arguments[2]:"ltr",r=em(t)?"translateY":"translateX";return!em(t)&&n==="rtl"?"".concat(r,"(").concat(e*100,"%) translateZ(0)"):"".concat(r,"(").concat(-e*100,"%) translateZ(0)")}function GPe(e,t){var n=em(t)?"marginTop":"marginLeft";return V({},n,"".concat(-e*100,"%"))}function pD(e,t){return+window.getComputedStyle(e).getPropertyValue(t).replace("px","")}function vD(e){return Object.keys(e).reduce(function(t,n){return(n.substr(0,5)==="aria-"||n.substr(0,5)==="data-"||n==="role")&&(t[n]=e[n]),t},{})}function Yc(e,t){return+e.getPropertyValue(t).replace("px","")}function mD(e,t,n,r,a){var o=pD(a,"padding-".concat(e));if(!r||!r.parentNode)return o;var i=r.parentNode.childNodes;return Array.prototype.some.call(i,function(l){if(!l.tagName)return!1;var s=window.getComputedStyle(l);return l!==r?(o+=Yc(s,"margin-".concat(e)),o+=l[t],o+=Yc(s,"margin-".concat(n)),s.boxSizing==="content-box"&&(o+=Yc(s,"border-".concat(e,"-width"))+Yc(s,"border-".concat(n,"-width"))),!1):(o+=Yc(s,"margin-".concat(e)),!0)}),o}function XPe(e,t){return mD("left","offsetWidth","right",e,t)}function ZPe(e,t){return mD("top","offsetHeight","bottom",e,t)}var JPe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function K3(e){var t,n=e.children;return n.forEach(function(r){r&&!tf(t)&&!r.disabled&&(t=r.key)}),t}function QPe(e,t){var n=e.children,r=n.map(function(a){return a&&a.key});return r.indexOf(t)>=0}var e3e=G({name:"Tabs",mixins:[nt],inheritAttrs:!1,props:{destroyInactiveTabPane:u.looseBool,renderTabBar:u.func.isRequired,renderTabContent:u.func.isRequired,navWrapper:u.func.def(function(e){return e}),children:u.any.def([]),prefixCls:u.string.def("ant-tabs"),tabBarPosition:u.string.def("top"),activeKey:u.oneOfType([u.string,u.number]),defaultActiveKey:u.oneOfType([u.string,u.number]),direction:u.string.def("ltr"),tabBarGutter:u.number},setup:function(t){var n;t.activeKey!==void 0?n=t.activeKey:t.defaultActiveKey!==void 0?n=t.defaultActiveKey:n=K3(t);var r=bt({_activeKey:n});return Wn(function(){t.activeKey!==void 0?r._activeKey=t.activeKey:QPe(t,r._activeKey)||(r._activeKey=K3(t))},{flush:"sync"}),{state:r}},created:function(){this.panelSentinelStart=void 0,this.panelSentinelEnd=void 0,this.sentinelStart=void 0,this.sentinelEnd=void 0,ot("sentinelContext",this)},beforeUnmount:function(){this.destroy=!0,cancelAnimationFrame(this.sentinelId)},methods:{onTabClick:function(t,n){this.tabBar.props&&this.tabBar.props.onTabClick&&this.tabBar.props.onTabClick(t,n),this.setActiveKey(t)},onNavKeyDown:function(t){var n=t.keyCode;if(n===Eh.RIGHT||n===Eh.DOWN){t.preventDefault();var r=this.getNextActiveKey(!0);this.onTabClick(r)}else if(n===Eh.LEFT||n===Eh.UP){t.preventDefault();var a=this.getNextActiveKey(!1);this.onTabClick(a)}},onScroll:function(t){var n=t.target,r=t.currentTarget;n===r&&n.scrollLeft>0&&(n.scrollLeft=0)},setSentinelStart:function(t){this.sentinelStart=t},setSentinelEnd:function(t){this.sentinelEnd=t},setPanelSentinelStart:function(t){t!==this.panelSentinelStart&&this.updateSentinelContext(),this.panelSentinelStart=t},setPanelSentinelEnd:function(t){t!==this.panelSentinelEnd&&this.updateSentinelContext(),this.panelSentinelEnd=t},setActiveKey:function(t){if(this.state._activeKey!==t){var n=this.$props;n.activeKey===void 0&&(this.state._activeKey=t),this.__emit("update:activeKey",t),this.__emit("change",t)}},getNextActiveKey:function(t){var n=this.state._activeKey,r=[];this.$props.children.forEach(function(i){var l,s;i&&!(!((l=i.props)===null||l===void 0)&&l.disabled)&&((s=i.props)===null||s===void 0?void 0:s.disabled)!==""&&(t?r.push(i):r.unshift(i))});var a=r.length,o=a&&r[0].key;return r.forEach(function(i,l){i.key===n&&(l===a-1?o=r[0].key:o=r[l+1].key)}),o},updateSentinelContext:function(){var t=this;this.destroy||(cancelAnimationFrame(this.sentinelId),this.sentinelId=requestAnimationFrame(function(){t.destroy||t.$forceUpdate()}))}},render:function(){var t,n=this.$props,r=n.prefixCls,a=n.navWrapper,o=n.tabBarPosition,i=n.renderTabContent,l=n.renderTabBar,s=n.destroyInactiveTabPane,c=n.direction,d=n.tabBarGutter,f=this.$attrs,p=f.class;f.onChange;var v=f.style,m=JPe(f,["class","onChange","style"]),y=(t={},V(t,p,p),V(t,r,1),V(t,"".concat(r,"-").concat(o),1),V(t,"".concat(r,"-rtl"),c==="rtl"),t);this.tabBar=l();var b=Ot(this.tabBar,{prefixCls:r,navWrapper:a,tabBarPosition:o,panels:n.children,activeKey:this.state._activeKey,direction:c,tabBarGutter:d,onKeydown:this.onNavKeyDown,onTabClick:this.onTabClick,key:"tabBar"}),C=Ot(i(),{prefixCls:r,tabBarPosition:o,activeKey:this.state._activeKey,destroyInactiveTabPane:s,direction:c,onChange:this.setActiveKey,children:n.children,key:"tabContent"}),S=g(Qv,{key:"sentinelStart",setRef:this.setSentinelStart,nextElement:this.panelSentinelStart},null),w=g(Qv,{key:"sentinelEnd",setRef:this.setSentinelEnd,prevElement:this.panelSentinelEnd},null),k=[];o==="bottom"?k.push(S,C,w,b):k.push(b,S,C,w);var $=P(P({},vD(m)),{style:v,onScroll:this.onScroll,class:y});return g("div",$,[k])}}),G2=G({name:"TabPane",props:{active:u.looseBool,destroyInactiveTabPane:u.looseBool,forceRender:u.looseBool,placeholder:u.any,rootPrefixCls:u.string,tab:u.any,closable:u.looseBool,disabled:u.looseBool},setup:function(){return{isActived:void 0,sentinelContext:ve("sentinelContext",{})}},render:function(){var t,n=this.$props,r=n.destroyInactiveTabPane,a=n.active,o=n.forceRender,i=n.rootPrefixCls,l=ht(this),s=We(this,"placeholder");this.isActived=this.isActived||a;var c="".concat(i,"-tabpane"),d=(t={},V(t,c,1),V(t,"".concat(c,"-inactive"),!a),V(t,"".concat(c,"-active"),a),t),f=r?a:this.isActived,p=f||o,v=this.sentinelContext,m=v.sentinelStart,y=v.sentinelEnd,b=v.setPanelSentinelStart,C=v.setPanelSentinelEnd,S,w;return a&&p&&(S=g(Qv,{setRef:b,prevElement:m},null),w=g(Qv,{setRef:C,nextElement:y},null)),g("div",{class:d,role:"tabpanel","aria-hidden":a?"false":"true"},[S,p?l:s,w])}}),X2=G({name:"TabContent",inheritAttrs:!1,props:{animated:u.looseBool.def(!0),animatedWithMargin:u.looseBool.def(!0),prefixCls:u.string.def("ant-tabs"),activeKey:u.oneOfType([u.string,u.number]),tabBarPosition:u.string,direction:u.string,destroyInactiveTabPane:u.looseBool,children:u.any},computed:{classes:function(){var t,n=this.animated,r=this.prefixCls,a=this.$attrs.class;return t={},V(t,a,!!a),V(t,"".concat(r,"-content"),!0),V(t,n?"".concat(r,"-content-animated"):"".concat(r,"-content-no-animated"),!0),t}},methods:{getTabPanes:function(t){var n=this.$props,r=n.activeKey,a=[];return t.forEach(function(o){if(!!o){var i=o.key,l=r===i;a.push(Ot(o,{active:l,destroyInactiveTabPane:n.destroyInactiveTabPane,rootPrefixCls:n.prefixCls}))}}),a}},render:function(){var t=this.activeKey,n=this.tabBarPosition,r=this.animated,a=this.animatedWithMargin,o=this.direction,i=this.classes,l=this.children,s={};if(r&&l){var c=fD(l,t);if(c!==-1){var d=a?GPe(c,n):YPe(qPe(c,n,o));s=P(P({},this.$attrs.style),d)}else s=P(P({},this.$attrs.style),{display:"none"})}return g("div",{class:i,style:s},[this.getTabPanes(l||[])])}}),t3e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M890.5 755.3L537.9 269.2c-12.8-17.6-39-17.6-51.7 0L133.5 755.3A8 8 0 00140 768h75c5.1 0 9.9-2.5 12.9-6.6L512 369.8l284.1 391.6c3 4.1 7.8 6.6 12.9 6.6h75c6.5 0 10.3-7.4 6.5-12.7z"}}]},name:"up",theme:"outlined"},n3e=t3e;function W3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){r3e(e,a,n[a])})}return e}function r3e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var Z2=function(t,n){var r=W3({},t,n.attrs);return g(Et,W3({},r,{icon:n3e}),null)};Z2.displayName="UpOutlined";Z2.inheritAttrs=!1;var gD=Z2,a3e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M724 218.3V141c0-6.7-7.7-10.4-12.9-6.3L260.3 486.8a31.86 31.86 0 000 50.3l450.8 352.1c5.3 4.1 12.9.4 12.9-6.3v-77.3c0-4.9-2.3-9.6-6.1-12.6l-360-281 360-281.1c3.8-3 6.1-7.7 6.1-12.6z"}}]},name:"left",theme:"outlined"},o3e=a3e;function U3(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){i3e(e,a,n[a])})}return e}function i3e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var J2=function(t,n){var r=U3({},t,n.attrs);return g(Et,U3({},r,{icon:o3e}),null)};J2.displayName="LeftOutlined";J2.inheritAttrs=!1;var uc=J2;function Y3(e,t){var n=e.$props,r=n.styles,a=r===void 0?{}:r,o=n.panels,i=n.activeKey,l=n.direction,s=e.getRef("root"),c=e.getRef("nav")||s,d=e.getRef("inkBar"),f=e.getRef("activeTab"),p=d.style,v=e.$props.tabBarPosition,m=fD(o,i);if(t&&(p.display="none"),f){var y=f,b=hD(p);if(Wp(p,""),p.width="",p.height="",p.left="",p.top="",p.bottom="",p.right="",v==="top"||v==="bottom"){var C=XPe(y,c),S=y.offsetWidth;S===s.offsetWidth?S=0:a.inkBar&&a.inkBar.width!==void 0&&(S=parseFloat(a.inkBar.width,10),S&&(C+=(y.offsetWidth-S)/2)),l==="rtl"&&(C=pD(y,"margin-left")-C),b?Wp(p,"translate3d(".concat(C,"px,0,0)")):p.left="".concat(C,"px"),p.width="".concat(S,"px")}else{var w=ZPe(y,c),k=y.offsetHeight;a.inkBar&&a.inkBar.height!==void 0&&(k=parseFloat(a.inkBar.height,10),k&&(w+=(y.offsetHeight-k)/2)),b?(Wp(p,"translate3d(0,".concat(w,"px,0)")),p.top="0"):p.top="".concat(w,"px"),p.height="".concat(k,"px")}}p.display=m!==-1?"block":"none"}var l3e={name:"InkTabBarNode",mixins:[nt],inheritAttrs:!1,props:{inkBarAnimated:{type:Boolean,default:!0},direction:u.string,prefixCls:String,styles:Object,tabBarPosition:String,saveRef:u.func.def(function(){}),getRef:u.func.def(function(){}),panels:u.array,activeKey:u.oneOfType([u.string,u.number])},updated:function(){var t=this;this.$nextTick(function(){Y3(t)})},mounted:function(){var t=this;this.$nextTick(function(){Y3(t,!0)})},render:function(){var t,n=this.prefixCls,r=this.styles,a=r===void 0?{}:r,o=this.inkBarAnimated,i="".concat(n,"-ink-bar"),l=(t={},V(t,i,!0),V(t,o?"".concat(i,"-animated"):"".concat(i,"-no-animated"),!0),t);return g("div",{style:a.inkBar,class:l,key:"inkBar",ref:this.saveRef("inkBar")},null)}},s3e=function(){},cc=s3e;function Gy(){}var u3e={name:"TabBarTabsNode",mixins:[nt],inheritAttrs:!1,props:{activeKey:u.oneOfType([u.string,u.number]),panels:u.any.def([]),prefixCls:u.string.def(""),tabBarGutter:u.any.def(null),onTabClick:u.func,saveRef:u.func.def(Gy),getRef:u.func.def(Gy),renderTabBarNode:u.func,tabBarPosition:u.string,direction:u.string},render:function(){var t=this,n=this.$props,r=n.panels,a=n.activeKey,o=n.prefixCls,i=n.tabBarGutter,l=n.saveRef,s=n.tabBarPosition,c=n.direction,d=[],f=this.renderTabBarNode||this.$slots.renderTabBarNode;return r.forEach(function(p,v){if(!!p){var m=rr(p),y=p.key,b=a===y?"".concat(o,"-tab-active"):"";b+=" ".concat(o,"-tab");var C={},S=m.disabled;S?b+=" ".concat(o,"-tab-disabled"):C.onClick=function(){t.__emit("tabClick",y)};var w=We(p,"tab"),k=i&&v===r.length-1?0:i;k=typeof k=="number"?"".concat(k,"px"):k;var $=c==="rtl"?"marginLeft":"marginRight",O=V({},em(s)?"marginBottom":$,k),T=g("div",le(le({role:"tab","aria-disabled":S?"true":"false","aria-selected":a===y?"true":"false"},C),{},{class:b.trim(),key:y,style:O,ref:a===y?l("activeTab"):Gy}),[w]);f&&(T=f(T)),d.push(T)}}),g("div",{ref:this.saveRef("navTabsContainer")},[d])}},c3e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function q3(){}var d3e={name:"TabBarRootNode",mixins:[nt],inheritAttrs:!1,props:{saveRef:u.func.def(q3),getRef:u.func.def(q3),prefixCls:u.string.def(""),tabBarPosition:u.string.def("top"),extraContent:u.any},methods:{onKeyDown:function(t){this.__emit("keydown",t)}},render:function(){var t,n=this.prefixCls,r=this.onKeyDown,a=this.tabBarPosition,o=this.extraContent,i=this.$attrs,l=i.class,s=i.style;i.onKeydown;var c=c3e(i,["class","style","onKeydown"]),d=(t={},V(t,"".concat(n,"-bar"),!0),V(t,l,!!l),t),f=a==="top"||a==="bottom",p=f?{float:"right"}:{},v=ht(this),m=v;return o&&(m=[Ot(o,{key:"extra",style:P({},p)}),Ot(v,{key:"content"})],m=f?m:m.reverse()),g("div",le({role:"tablist",class:d,tabindex:"0",onKeydown:r,style:s,ref:this.saveRef("root")},vD(c)),[m])}},f3e={name:"ScrollableTabBarNode",mixins:[nt],inheritAttrs:!1,props:{activeKey:u.any,getRef:u.func.def(function(){}),saveRef:u.func.def(function(){}),tabBarPosition:u.oneOf(["left","right","top","bottom"]).def("left"),prefixCls:u.string.def(""),scrollAnimated:u.looseBool.def(!0),navWrapper:u.func.def(function(e){return e}),prevIcon:u.any,nextIcon:u.any,direction:u.string},data:function(){return this.offset=0,this.prevProps=P({},this.$props),{next:!1,prev:!1}},watch:{tabBarPosition:function(){var t=this;this.tabBarPositionChange=!0,this.$nextTick(function(){t.setOffset(0)})}},mounted:function(){var t=this;this.$nextTick(function(){t.updatedCal(),t.debouncedResize=Yn(function(){t.setNextPrev(),t.scrollToActiveTab()},200),t.resizeObserver=new ng(t.debouncedResize),t.resizeObserver.observe(t.$props.getRef("container"))})},updated:function(){var t=this;this.$nextTick(function(){t.updatedCal(t.prevProps),t.prevProps=P({},t.$props)})},beforeUnmount:function(){this.resizeObserver&&this.resizeObserver.disconnect(),this.debouncedResize&&this.debouncedResize.cancel&&this.debouncedResize.cancel()},methods:{updatedCal:function(t){var n=this,r=this.$props;if(t&&t.tabBarPosition!==r.tabBarPosition){this.setOffset(0);return}this.isNextPrevShown(this.$data)!==this.isNextPrevShown(this.setNextPrev())?(this.$forceUpdate(),this.$nextTick(function(){n.scrollToActiveTab()})):(!t||r.activeKey!==t.activeKey)&&this.scrollToActiveTab()},setNextPrev:function(){var t=this.$props.getRef("nav"),n=this.$props.getRef("navTabsContainer"),r=this.getScrollWH(n||t),a=this.getOffsetWH(this.$props.getRef("container"))+1,o=this.getOffsetWH(this.$props.getRef("navWrap")),i=this.offset,l=a-r,s=this.next,c=this.prev;if(l>=0)s=!1,this.setOffset(0,!1),i=0;else if(l<i)s=!0;else{s=!1;var d=o-r;this.setOffset(d,!1),i=d}return i<0?c=!0:c=!1,this.setNext(s),this.setPrev(c),{next:s,prev:c}},getOffsetWH:function(t){var n=this.$props.tabBarPosition,r="offsetWidth";return(n==="left"||n==="right")&&(r="offsetHeight"),t[r]},getScrollWH:function(t){var n=this.tabBarPosition,r="scrollWidth";return(n==="left"||n==="right")&&(r="scrollHeight"),t[r]},getOffsetLT:function(t){var n=this.$props.tabBarPosition,r="left";return(n==="left"||n==="right")&&(r="top"),t.getBoundingClientRect()[r]},setOffset:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!0,r=Math.min(0,t);if(this.offset!==r){this.offset=r;var a={},o=this.$props.tabBarPosition,i=this.$props.getRef("nav").style,l=hD(i);o==="left"||o==="right"?l?a={value:"translate3d(0,".concat(r,"px,0)")}:a={name:"top",value:"".concat(r,"px")}:l?(this.$props.direction==="rtl"&&(r=-r),a={value:"translate3d(".concat(r,"px,0,0)")}):a={name:"left",value:"".concat(r,"px")},l?Wp(i,a.value):i[a.name]=a.value,n&&this.setNextPrev()}},setPrev:function(t){this.prev!==t&&(this.prev=t)},setNext:function(t){this.next!==t&&(this.next=t)},isNextPrevShown:function(t){return t?t.next||t.prev:this.next||this.prev},prevTransitionEnd:function(t){if(t.propertyName==="opacity"){var n=this.$props.getRef("container");this.scrollToActiveTab({target:n,currentTarget:n})}},scrollToActiveTab:function(t){var n=this.$props.getRef("activeTab"),r=this.$props.getRef("navWrap");if(!(t&&t.target!==t.currentTarget||!n)){var a=this.isNextPrevShown()&&this.lastNextPrevShown;if(this.lastNextPrevShown=this.isNextPrevShown(),!!a){var o=this.getScrollWH(n),i=this.getOffsetWH(r),l=this.offset,s=this.getOffsetLT(r),c=this.getOffsetLT(n);s>c?(l+=s-c,this.setOffset(l)):s+i<c+o&&(l-=c+o-(s+i),this.setOffset(l))}}},prevClick:function(t){this.__emit("prevClick",t);var n=this.$props.getRef("navWrap"),r=this.getOffsetWH(n),a=this.offset;this.setOffset(a+r)},nextClick:function(t){this.__emit("nextClick",t);var n=this.$props.getRef("navWrap"),r=this.getOffsetWH(n),a=this.offset;this.setOffset(a-r)}},render:function(){var t,n,r,a,o=this.next,i=this.prev,l=this.$props,s=l.prefixCls,c=l.scrollAnimated,d=l.navWrapper,f=We(this,"prevIcon"),p=We(this,"nextIcon"),v=i||o,m=g("span",{onClick:i&&this.prevClick,unselectable:"unselectable",class:(t={},V(t,"".concat(s,"-tab-prev"),1),V(t,"".concat(s,"-tab-btn-disabled"),!i),V(t,"".concat(s,"-tab-arrow-show"),v),t),onTransitionend:this.prevTransitionEnd},[f||g("span",{class:"".concat(s,"-tab-prev-icon")},null)]),y=g("span",{onClick:o&&this.nextClick,unselectable:"unselectable",class:(n={},V(n,"".concat(s,"-tab-next"),1),V(n,"".concat(s,"-tab-btn-disabled"),!o),V(n,"".concat(s,"-tab-arrow-show"),v),n)},[p||g("span",{class:"".concat(s,"-tab-next-icon")},null)]),b="".concat(s,"-nav"),C=(r={},V(r,b,!0),V(r,c?"".concat(b,"-animated"):"".concat(b,"-no-animated"),!0),r);return g("div",{class:(a={},V(a,"".concat(s,"-nav-container"),1),V(a,"".concat(s,"-nav-container-scrolling"),v),a),key:"container",ref:this.saveRef("container")},[m,y,g("div",{class:"".concat(s,"-nav-wrap"),ref:this.saveRef("navWrap")},[g("div",{class:"".concat(s,"-nav-scroll")},[g("div",{class:C,ref:this.saveRef("nav")},[d(ht(this))])])])])}},h3e={props:{children:u.func.def(function(){return null})},methods:{getRef:function(t){return this[t]},saveRef:function(t){var n=this;return function(r){r&&(n[t]=r)}}},render:function(){var t=this,n=function(o){return t.saveRef(o)},r=function(o){return t.getRef(o)};return this.children(n,r)}},G3=G({name:"ScrollableInkTabBar",inheritAttrs:!1,render:function(){var t=this,n=this.$attrs.children;return g(h3e,{children:function(a,o){return g(d3e,le({saveRef:a},t.$attrs),{default:function(){return[g(f3e,le({saveRef:a,getRef:o},t.$attrs),{default:function(){return[g(u3e,le({saveRef:a},P(P({},t.$attrs),{renderTabBarNode:n})),null),g(l3e,le({saveRef:a,getRef:o},t.$attrs),null)]}})]}})}},null)}}),p3e=G({name:"TabBar",inheritAttrs:!1,props:{prefixCls:u.string,centered:u.looseBool.def(!1),tabBarStyle:u.style,tabBarExtraContent:u.VNodeChild,type:u.oneOf(rt("line","card","editable-card")),tabPosition:u.oneOf(rt("top","right","bottom","left")).def("top"),tabBarPosition:u.oneOf(rt("top","right","bottom","left")),size:u.oneOf(rt("default","small","large")),animated:{type:[Boolean,Object],default:void 0},renderTabBar:u.func,panels:u.array.def([]),activeKey:u.oneOfType([u.string,u.number]),tabBarGutter:u.number},render:function(){var t,n=this.$props,r=n.centered,a=n.tabBarStyle,o=n.animated,i=o===void 0?!0:o,l=n.renderTabBar,s=n.tabBarExtraContent,c=n.tabPosition,d=n.prefixCls,f=n.type,p=f===void 0?"line":f,v=n.size,m=kt(i)==="object"?i.inkBar:i,y=c==="left"||c==="right",b=g("span",{class:"".concat(d,"-tab-prev-icon")},[y?g(gD,{class:"".concat(d,"-tab-prev-icon-target")},null):g(uc,{class:"".concat(d,"-tab-prev-icon-target")},null)]),C=g("span",{class:"".concat(d,"-tab-next-icon")},[y?g(Rs,{class:"".concat(d,"-tab-next-icon-target")},null):g(wi,{class:"".concat(d,"-tab-next-icon-target")},null)]),S=(t={},V(t,this.$attrs.class,this.$attrs.class),V(t,"".concat(d,"-centered-bar"),r),V(t,"".concat(d,"-").concat(c,"-bar"),!0),V(t,"".concat(d,"-").concat(v,"-bar"),!!v),V(t,"".concat(d,"-card-bar"),p&&p.indexOf("card")>=0),t),w=P(P(P({},this.$props),this.$attrs),{children:null,inkBarAnimated:m,extraContent:s,prevIcon:b,nextIcon:C,style:a,class:S});return l?l(P(P({},w),{DefaultTabBar:G3})):g(G3,w,null)}}),v3e=p3e,m3e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},$a=G({TabPane:G2,name:"ATabs",inheritAttrs:!1,props:{prefixCls:u.string,activeKey:u.oneOfType([u.string,u.number]),defaultActiveKey:u.oneOfType([u.string,u.number]),hideAdd:u.looseBool.def(!1),centered:u.looseBool.def(!1),tabBarStyle:u.object,tabBarExtraContent:u.any,destroyInactiveTabPane:u.looseBool.def(!1),type:u.oneOf(rt("line","card","editable-card")),tabPosition:u.oneOf(["top","right","bottom","left"]).def("top"),size:u.oneOf(["default","small","large"]),animated:an(u.oneOfType([u.looseBool,u.object])),tabBarGutter:u.number,renderTabBar:u.func,onChange:{type:Function},onTabClick:u.func,onPrevClick:{type:Function},onNextClick:{type:Function},onEdit:{type:Function}},emits:["update:activeKey","edit","change"],setup:function(){return{configProvider:ve("configProvider",St)}},methods:{removeTab:function(t,n){n.stopPropagation(),tf(t)&&this.$emit("edit",t,"remove")},handleChange:function(t){this.$emit("update:activeKey",t),this.$emit("change",t)},createNewTab:function(t){this.$emit("edit",t,"add")}},render:function(){var t,n=this,r,a=Qe(this),o=a.prefixCls,i=a.size,l=a.type,s=l===void 0?"line":l,c=a.tabPosition,d=a.animated,f=d===void 0?!0:d,p=a.hideAdd,v=a.renderTabBar,m=this.$attrs,y=m.class,b=m3e(m,["class"]),C=this.configProvider.getPrefixCls,S=C("tabs",o),w=La(ht(this)),k=We(this,"tabBarExtraContent"),$=kt(f)==="object"?f.tabPane:f;s!=="line"&&($="animated"in a?$:!1);var O=(t={},V(t,y,y),V(t,"".concat(S,"-vertical"),c==="left"||c==="right"),V(t,"".concat(S,"-").concat(i),!!i),V(t,"".concat(S,"-card"),s.indexOf("card")>=0),V(t,"".concat(S,"-").concat(s),!0),V(t,"".concat(S,"-no-animation"),!$),t),T=[];s==="editable-card"&&(T=[],w.forEach(function(F,N){var D=rr(F),z=D.closable;z=typeof z=="undefined"?!0:z;var B=z?g(go,{class:"".concat(S,"-close-x"),onClick:function(E){return n.removeTab(F.key,E)}},null):null;T.push(Ot(F,{tab:g("div",{class:z?void 0:"".concat(S,"-tab-unclosable")},[We(F,"tab"),B]),key:F.key||N}))}),p||(k=g("span",null,[g(KPe,{class:"".concat(S,"-new-tab"),onClick:this.createNewTab},null),k]))),k=k?g("div",{class:"".concat(S,"-extra-content")},[k]):null;var _=v||this.$slots.renderTabBar,I=P(P(P(P({},a),{prefixCls:S,tabBarExtraContent:k,renderTabBar:_}),b),{children:w}),L=(r={},V(r,"".concat(S,"-").concat(c,"-content"),!0),V(r,"".concat(S,"-card-content"),s.indexOf("card")>=0),r),j=P(P(P(P({},a),{prefixCls:S,tabBarPosition:c,renderTabBar:function(){return g(v3e,le({key:"tabBar"},I),null)},renderTabContent:function(){return g(X2,{class:L,animated:$,animatedWithMargin:!0},null)},children:T.length>0?T:w}),b),{onChange:this.handleChange,class:O});return g(e3e,j,null)}});$a.TabPane=P(P({},G2),{name:"ATabPane",__ANT_TAB_PANE:!0});$a.TabContent=P(P({},X2),{name:"ATabContent"});$a.install=function(e){return e.component($a.name,$a),e.component($a.TabPane.name,$a.TabPane),e.component($a.TabContent.name,$a.TabContent),e};function g3e(){return!!(typeof window!="undefined"&&window.document&&window.document.createElement)}var yD=function(){return g3e()&&window.document.documentElement},bD=function(t){if(yD()){var n=Array.isArray(t)?t:[t],r=window.document.documentElement;return n.some(function(a){return a in r.style})}return!1},Mh,y3e=function(){if(!yD())return!1;if(Mh!==void 0)return Mh;var t=document.createElement("div");return t.style.display="flex",t.style.flexDirection="column",t.style.rowGap="1px",t.appendChild(document.createElement("div")),t.appendChild(document.createElement("div")),document.body.appendChild(t),Mh=t.scrollHeight===1,document.body.removeChild(t),Mh},CD=function(){var e=H(!1);return et(function(){e.value=y3e()}),e},wD=Symbol("rowContextKey"),b3e=function(t){ot(wD,t)},C3e=function(){return ve(wD,{gutter:x(function(){}),wrap:x(function(){}),supportFlexGap:x(function(){})})},w3e=rt("top","middle","bottom","stretch"),S3e=rt("start","end","center","space-around","space-between"),k3e={type:u.oneOf(["flex"]),align:u.oneOf(w3e),justify:u.oneOf(S3e),prefixCls:u.string,gutter:u.oneOfType([u.object,u.number,u.array]).def(0),wrap:u.looseBool},$3e=G({name:"ARow",props:k3e,setup:function(t,n){var r=n.slots,a=Wt("row",t),o=a.prefixCls,i=a.direction,l,s=H({xs:!0,sm:!0,md:!0,lg:!0,xl:!0,xxl:!0}),c=CD();et(function(){l=lc.subscribe(function(v){var m=t.gutter||0;(!Array.isArray(m)&&kt(m)==="object"||Array.isArray(m)&&(kt(m[0])==="object"||kt(m[1])==="object"))&&(s.value=v)})}),Lt(function(){lc.unsubscribe(l)});var d=x(function(){var v=[0,0],m=t.gutter,y=m===void 0?0:m,b=Array.isArray(y)?y:[y,0];return b.forEach(function(C,S){if(kt(C)==="object")for(var w=0;w<ws.length;w++){var k=ws[w];if(s.value[k]&&C[k]!==void 0){v[S]=C[k];break}}else v[S]=C||0}),v});b3e({gutter:d,supportFlexGap:c,wrap:x(function(){return t.wrap})});var f=x(function(){var v;return Se(o.value,(v={},V(v,"".concat(o.value,"-no-wrap"),t.wrap===!1),V(v,"".concat(o.value,"-").concat(t.justify),t.justify),V(v,"".concat(o.value,"-").concat(t.align),t.align),V(v,"".concat(o.value,"-rtl"),i.value==="rtl"),v))}),p=x(function(){var v=d.value,m={},y=v[0]>0?"".concat(v[0]/-2,"px"):void 0,b=v[1]>0?"".concat(v[1]/-2,"px"):void 0;return y&&(m.marginLeft=y,m.marginRight=y),c.value?m.rowGap="".concat(v[1],"px"):b&&(m.marginTop=b,m.marginBottom=b),m});return function(){var v;return g("div",{class:f.value,style:p.value},[(v=r.default)===null||v===void 0?void 0:v.call(r)])}}}),Q2=$3e;function O3e(e){return typeof e=="number"?"".concat(e," ").concat(e," auto"):/^\d+(\.\d+)?(px|em|rem|%)$/.test(e)?"0 0 ".concat(e):e}var Xa=u.oneOfType([u.string,u.number]),P3e=u.shape({span:Xa,order:Xa,offset:Xa,push:Xa,pull:Xa}).loose,Ys=u.oneOfType([u.string,u.number,P3e]),T3e={span:Xa,order:Xa,offset:Xa,push:Xa,pull:Xa,xs:Ys,sm:Ys,md:Ys,lg:Ys,xl:Ys,xxl:Ys,prefixCls:u.string,flex:Xa},Og=G({name:"ACol",props:T3e,setup:function(t,n){var r=n.slots,a=C3e(),o=a.gutter,i=a.supportFlexGap,l=a.wrap,s=Wt("col",t),c=s.prefixCls,d=s.direction,f=x(function(){var v,m=t.span,y=t.order,b=t.offset,C=t.push,S=t.pull,w=c.value,k={};return["xs","sm","md","lg","xl","xxl"].forEach(function($){var O,T={},_=t[$];typeof _=="number"?T.span=_:kt(_)==="object"&&(T=_||{}),k=P(P({},k),(O={},V(O,"".concat(w,"-").concat($,"-").concat(T.span),T.span!==void 0),V(O,"".concat(w,"-").concat($,"-order-").concat(T.order),T.order||T.order===0),V(O,"".concat(w,"-").concat($,"-offset-").concat(T.offset),T.offset||T.offset===0),V(O,"".concat(w,"-").concat($,"-push-").concat(T.push),T.push||T.push===0),V(O,"".concat(w,"-").concat($,"-pull-").concat(T.pull),T.pull||T.pull===0),V(O,"".concat(w,"-rtl"),d.value==="rtl"),O))}),Se(w,(v={},V(v,"".concat(w,"-").concat(m),m!==void 0),V(v,"".concat(w,"-order-").concat(y),y),V(v,"".concat(w,"-offset-").concat(b),b),V(v,"".concat(w,"-push-").concat(C),C),V(v,"".concat(w,"-pull-").concat(S),S),v),k)}),p=x(function(){var v=t.flex,m=o.value,y={};if(m&&m[0]>0){var b="".concat(m[0]/2,"px");y.paddingLeft=b,y.paddingRight=b}if(m&&m[1]>0&&!i.value){var C="".concat(m[1]/2,"px");y.paddingTop=C,y.paddingBottom=C}return v&&(y.flex=O3e(v),v==="auto"&&l.value===!1&&!y.minWidth&&(y.minWidth=0)),y});return function(){var v;return g("div",{class:f.value,style:p.value},[(v=r.default)===null||v===void 0?void 0:v.call(r)])}}}),x3e={useBreakpoint:F2},fu=kn(Q2),Ka=kn(Og),_3e=$a.TabPane,E3e={prefixCls:u.string,title:u.VNodeChild,extra:u.VNodeChild,bordered:u.looseBool.def(!0),bodyStyle:u.style,headStyle:u.style,loading:u.looseBool.def(!1),hoverable:u.looseBool.def(!1),type:u.string,size:u.oneOf(rt("default","small")),actions:u.VNodeChild,tabList:{type:Array},tabBarExtraContent:u.VNodeChild,activeTabKey:u.string,defaultActiveTabKey:u.string,cover:u.VNodeChild,onTabChange:{type:Function}},M3e=G({name:"ACard",mixins:[nt],props:E3e,setup:function(){return{configProvider:ve("configProvider",St)}},data:function(){return{widerPadding:!1}},methods:{getAction:function(t){var n=t.map(function(r,a){return rn(r)&&!As(r)||!rn(r)?g("li",{style:{width:"".concat(100/t.length,"%")},key:"action-".concat(a)},[g("span",null,[r])]):null});return n},triggerTabChange:function(t){this.$emit("tabChange",t)},isContainGrid:function(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],n;return t.forEach(function(r){r&&bc(r.type)&&r.type.__ANT_CARD_GRID&&(n=!0)}),n}},render:function(){var t,n,r=this.$props,a=r.prefixCls,o=r.headStyle,i=o===void 0?{}:o,l=r.bodyStyle,s=l===void 0?{}:l,c=r.loading,d=r.bordered,f=d===void 0?!0:d,p=r.size,v=p===void 0?"default":p,m=r.type,y=r.tabList,b=r.hoverable,C=r.activeTabKey,S=r.defaultActiveTabKey,w=this.$slots,k=ht(this),$=this.configProvider.getPrefixCls,O=$("card",a),T=We(this,"tabBarExtraContent"),_=(t={},V(t,"".concat(O),!0),V(t,"".concat(O,"-loading"),c),V(t,"".concat(O,"-bordered"),f),V(t,"".concat(O,"-hoverable"),!!b),V(t,"".concat(O,"-contain-grid"),this.isContainGrid(k)),V(t,"".concat(O,"-contain-tabs"),y&&y.length),V(t,"".concat(O,"-").concat(v),v!=="default"),V(t,"".concat(O,"-type-").concat(m),!!m),t),I=s.padding===0||s.padding==="0px"?{padding:24}:void 0,L=g("div",{class:"".concat(O,"-loading-content"),style:I},[g(fu,{gutter:8},{default:function(){return[g(Ka,{span:22},{default:function(){return[g("div",{class:"".concat(O,"-loading-block")},null)]}})]}}),g(fu,{gutter:8},{default:function(){return[g(Ka,{span:8},{default:function(){return[g("div",{class:"".concat(O,"-loading-block")},null)]}}),g(Ka,{span:15},{default:function(){return[g("div",{class:"".concat(O,"-loading-block")},null)]}})]}}),g(fu,{gutter:8},{default:function(){return[g(Ka,{span:6},{default:function(){return[g("div",{class:"".concat(O,"-loading-block")},null)]}}),g(Ka,{span:18},{default:function(){return[g("div",{class:"".concat(O,"-loading-block")},null)]}})]}}),g(fu,{gutter:8},{default:function(){return[g(Ka,{span:13},{default:function(){return[g("div",{class:"".concat(O,"-loading-block")},null)]}}),g(Ka,{span:9},{default:function(){return[g("div",{class:"".concat(O,"-loading-block")},null)]}})]}}),g(fu,{gutter:8},{default:function(){return[g(Ka,{span:4},{default:function(){return[g("div",{class:"".concat(O,"-loading-block")},null)]}}),g(Ka,{span:3},{default:function(){return[g("div",{class:"".concat(O,"-loading-block")},null)]}}),g(Ka,{span:16},{default:function(){return[g("div",{class:"".concat(O,"-loading-block")},null)]}})]}})]),j=C!==void 0,F=(n={size:"large"},V(n,j?"activeKey":"defaultActiveKey",j?C:S),V(n,"tabBarExtraContent",T),V(n,"onChange",this.triggerTabChange),V(n,"class","".concat(O,"-head-tabs")),n),N,D=y&&y.length?g($a,F,{default:function(){return[y.map(function(J){var ne=J.tab,oe=J.slots,Q=oe==null?void 0:oe.tab,ae=ne!==void 0?ne:w[Q]?w[Q](J):null;return g(_3e,{tab:ae,key:J.key,disabled:J.disabled},null)})]}}):null,z=We(this,"title"),B=We(this,"extra");(z||B||D)&&(N=g("div",{class:"".concat(O,"-head"),style:i},[g("div",{class:"".concat(O,"-head-wrapper")},[z&&g("div",{class:"".concat(O,"-head-title")},[z]),B&&g("div",{class:"".concat(O,"-extra")},[B])]),D]));var M=We(this,"cover"),E=M?g("div",{class:"".concat(O,"-cover")},[M]):null,K=g("div",{class:"".concat(O,"-body"),style:s},[c?L:k]),W=We(this,"actions"),Y=W&&W.length?g("ul",{class:"".concat(O,"-actions")},[this.getAction(W)]):null;return g("div",{class:_,ref:"cardContainerRef"},[N,E,k?K:null,Y])}}),Ru=M3e,tm=G({name:"ACardMeta",props:{prefixCls:u.string,title:u.VNodeChild,description:u.VNodeChild,avatar:u.VNodeChild},setup:function(){return{configProvider:ve("configProvider",St)}},render:function(){var t=this.$props.prefixCls,n=this.configProvider.getPrefixCls,r=n("card",t),a=V({},"".concat(r,"-meta"),!0),o=We(this,"avatar"),i=We(this,"title"),l=We(this,"description"),s=o?g("div",{class:"".concat(r,"-meta-avatar")},[o]):null,c=i?g("div",{class:"".concat(r,"-meta-title")},[i]):null,d=l?g("div",{class:"".concat(r,"-meta-description")},[l]):null,f=c||d?g("div",{class:"".concat(r,"-meta-detail")},[c,d]):null;return g("div",{class:a},[s,f])}}),nm=G({name:"ACardGrid",__ANT_CARD_GRID:!0,props:{prefixCls:u.string,hoverable:u.looseBool},setup:function(){return{configProvider:ve("configProvider",St)}},render:function(){var t,n=this.$props,r=n.prefixCls,a=n.hoverable,o=a===void 0?!0:a,i=this.configProvider.getPrefixCls,l=i("card",r),s=(t={},V(t,"".concat(l,"-grid"),!0),V(t,"".concat(l,"-grid-hoverable"),o),t);return g("div",{class:s},[ht(this)])}});Ru.Meta=tm;Ru.Grid=nm;Ru.install=function(e){return e.component(Ru.name,Ru),e.component(tm.name,tm),e.component(nm.name,nm),e};function I3e(e,t){if(!(e instanceof t))throw new TypeError("Cannot call a class as a function")}function X3(e,t){for(var n=0;n<t.length;n++){var r=t[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(e,r.key,r)}}function N3e(e,t,n){return t&&X3(e.prototype,t),n&&X3(e,n),Object.defineProperty(e,"prototype",{writable:!1}),e}var A3e=/\s+/,D3e=function(){function e(t){if(I3e(this,e),!t||!t.nodeType)throw new Error("A DOM element reference is required");this.el=t,this.list=t.classList}return N3e(e,[{key:"array",value:function(){var n=this.el.getAttribute("class")||"",r=n.replace(/^\s+|\s+$/g,""),a=r.split(A3e);return a[0]===""&&a.shift(),a}},{key:"add",value:function(n){if(this.list)return this.list.add(n),this;var r=this.array(),a=ty(r,n);return~a||r.push(n),this.el.className=r.join(" "),this}},{key:"remove",value:function(n){if(toString.call(n)==="[object RegExp]")return this._removeMatching(n);if(this.list)return this.list.remove(n),this;var r=this.array(),a=ty(r,n);return~a&&r.splice(a,1),this.el.className=r.join(" "),this}},{key:"_removeMatching",value:function(n){for(var r=this.array(),a=0;a<r.length;a++)n.test(r[a])&&this.remove(r[a]);return this}},{key:"toggle",value:function(n,r){return this.list?(typeof r!="undefined"?r!==this.list.toggle(n,r)&&this.list.toggle(n):this.list.toggle(n),this):(typeof r!="undefined"?r?this.add(n):this.remove(n):this.has(n)?this.remove(n):this.add(n),this)}},{key:"has",value:function(n){return this.list?this.list.contains(n):!!~ty(this.array(),n)}},{key:"contains",value:function(n){return this.has(n)}}]),e}();function g1(e){return new D3e(e)}var SD=si.endEvents.length!==0,R3e=["Webkit","Moz","O","ms"],Z3=["-webkit-","-moz-","-o-","ms-",""];function Ih(e,t){for(var n=window.getComputedStyle(e,null),r="",a=0;a<Z3.length&&(r=n.getPropertyValue(Z3[a]+t),!r);a++);return r}function kD(e){if(SD){var t=parseFloat(Ih(e,"transition-delay"))||0,n=parseFloat(Ih(e,"transition-duration"))||0,r=parseFloat(Ih(e,"animation-delay"))||0,a=parseFloat(Ih(e,"animation-duration"))||0,o=Math.max(n+t,a+r);e.rcEndAnimTimeout=setTimeout(function(){e.rcEndAnimTimeout=null,e.rcEndListener&&e.rcEndListener()},o*1e3+200)}}function $D(e){e.rcEndAnimTimeout&&(clearTimeout(e.rcEndAnimTimeout),e.rcEndAnimTimeout=null)}var Pg=function(t,n,r){var a=kt(n)==="object",o=a?n.name:n,i=a?n.active:"".concat(n,"-active"),l=r,s,c,d=g1(t);return r&&Object.prototype.toString.call(r)==="[object Object]"&&(l=r.end,s=r.start,c=r.active),t.rcEndListener&&t.rcEndListener(),t.rcEndListener=function(f){f&&f.target!==t||(t.rcAnimTimeout&&(o2(t.rcAnimTimeout),t.rcAnimTimeout=null),$D(t),d.remove(o),d.remove(i),si.removeEndEventListener(t,t.rcEndListener),t.rcEndListener=null,l&&l())},si.addEndEventListener(t,t.rcEndListener),s&&s(),d.add(o),t.rcAnimTimeout=Fv(function(){t.rcAnimTimeout=null,d.add(o),d.add(i),c&&Fv(c,0),kD(t)},30),{stop:function(){t.rcEndListener&&t.rcEndListener()}}};Pg.style=function(e,t,n){e.rcEndListener&&e.rcEndListener(),e.rcEndListener=function(r){r&&r.target!==e||(e.rcAnimTimeout&&(o2(e.rcAnimTimeout),e.rcAnimTimeout=null),$D(e),si.removeEndEventListener(e,e.rcEndListener),e.rcEndListener=null,n&&n())},si.addEndEventListener(e,e.rcEndListener),e.rcAnimTimeout=Fv(function(){for(var r in t)t.hasOwnProperty(r)&&(e.style[r]=t[r]);e.rcAnimTimeout=null,kD(e)},0)};Pg.setTransition=function(e,t,n){var r=t,a=n;n===void 0&&(a=r,r=""),r=r||"",R3e.forEach(function(o){e.style["".concat(o,"Transition").concat(r)]=a})};Pg.isCssAnimationSupported=SD;var OD=Pg;function J3(e,t,n){var r,a,o;return OD(e,"ant-motion-collapse-legacy",{start:function(){o&&cancelAnimationFrame(o),t?(r=e.offsetHeight,r===0?o=requestAnimationFrame(function(){r=e.offsetHeight,e.style.height="0px",e.style.opacity="0"}):(e.style.height="0px",e.style.opacity="0")):(e.style.height="".concat(e.offsetHeight,"px"),e.style.opacity="1")},active:function(){a&&cancelAnimationFrame(a),a=requestAnimationFrame(function(){e.style.height="".concat(t?r:0,"px"),e.style.opacity=t?"1":"0"})},end:function(){o&&cancelAnimationFrame(o),a&&cancelAnimationFrame(a),e.style.height="",e.style.opacity="",n&&n()}})}var L3e={onEnter:function(t,n){Ne(function(){J3(t,!0,n)})},onLeave:function(t,n){return J3(t,!1,n)}},PD=L3e,F3e=G({name:"PanelContent",props:{prefixCls:u.string,isActive:u.looseBool,destroyInactivePanel:u.looseBool,forceRender:u.looseBool,role:u.any},data:function(){return{_isActive:void 0}},render:function(){var t;if(this._isActive=this.forceRender||this._isActive||this.isActive,!this._isActive)return null;var n=this.$props,r=n.prefixCls,a=n.isActive,o=n.destroyInactivePanel,i=n.forceRender,l=n.role,s=(t={},V(t,"".concat(r,"-content"),!0),V(t,"".concat(r,"-content-active"),a),t),c=!i&&!a&&o?null:g("div",{class:"".concat(r,"-content-box")},[ht(this)]);return g("div",{class:s,role:l},[c])}}),B3e=function(){return{prefixCls:u.string,activeKey:u.oneOfType([u.string,u.number,u.arrayOf(u.oneOfType([u.string,u.number]))]),defaultActiveKey:u.oneOfType([u.string,u.number,u.arrayOf(u.oneOfType([u.string,u.number]))]),accordion:u.looseBool,destroyInactivePanel:u.looseBool,bordered:u.looseBool,expandIcon:u.func,openAnimation:u.object,expandIconPosition:u.oneOf(["left","right"]),onChange:u.func}},V3e=function(){return{openAnimation:u.object,prefixCls:u.string,header:u.any,headerClass:u.string,showArrow:u.looseBool,isActive:u.looseBool,destroyInactivePanel:u.looseBool,disabled:u.looseBool,accordion:u.looseBool,forceRender:u.looseBool,expandIcon:u.func,extra:u.any,panelKey:u.any}},z3e=G({name:"Panel",mixins:[nt],props:An(V3e(),{showArrow:!0,isActive:!1,destroyInactivePanel:!1,headerClass:"",forceRender:!1}),methods:{handleItemClick:function(){this.__emit("itemClick",this.panelKey)},handleKeyPress:function(t){(t.key==="Enter"||t.keyCode===13||t.which===13)&&this.handleItemClick()}},render:function(){var t,n,r=this,a=this.$props,o=a.prefixCls,i=a.headerClass,l=a.isActive,s=a.showArrow,c=a.destroyInactivePanel,d=a.disabled,f=a.openAnimation,p=a.accordion,v=a.forceRender,m=a.expandIcon,y=a.extra,b=P({appear:!0,css:!1},f),C=(t={},V(t,"".concat(o,"-header"),!0),V(t,i,i),t),S=We(this,"header"),w=(n={},V(n,"".concat(o,"-item"),!0),V(n,"".concat(o,"-item-active"),l),V(n,"".concat(o,"-item-disabled"),d),n),k=g("i",{class:"arrow"},null);s&&typeof m=="function"&&(k=m(this.$props));var $=at(g(F3e,{prefixCls:o,isActive:l,destroyInactivePanel:c,forceRender:v,role:p?"tabpanel":null},{default:function(){return[ht(r)]}}),[[_t,l]]);return g("div",{class:w,role:"tablist"},[g("div",{class:C,onClick:this.handleItemClick,onKeypress:this.handleKeyPress,role:p?"tab":"button",tabindex:d?-1:0,"aria-expanded":l},[s&&k,S,y&&g("div",{class:"".concat(o,"-extra")},[y])]),g(Cg,b,{default:function(){return[$]}})])}});function Q3(e,t,n,r){var a;return OD(e,n,{start:function(){t?(a=e.offsetHeight,e.style.height=0):e.style.height="".concat(e.offsetHeight,"px")},active:function(){e.style.height="".concat(t?a:0,"px")},end:function(){e.style.height="",r()}})}function H3e(e){return{onEnter:function(n,r){return Q3(n,!0,"".concat(e,"-anim"),r)},onLeave:function(n,r){return Q3(n,!1,"".concat(e,"-anim"),r)}}}function eT(e){var t=e;if(!Array.isArray(t)){var n=kt(t);t=n==="number"||n==="string"?[t]:[]}return t.map(function(r){return String(r)})}var eS=G({name:"Collapse",mixins:[nt],inheritAttrs:!1,props:An(B3e(),{prefixCls:"rc-collapse",accordion:!1,destroyInactivePanel:!1}),data:function(){var t=this.$props,n=t.activeKey,r=t.defaultActiveKey,a=t.openAnimation,o=t.prefixCls,i=r;vt(this,"activeKey")&&(i=n);var l=a||H3e(o);return{currentOpenAnimations:l,stateActiveKey:eT(i)}},watch:{activeKey:function(t){this.setState({stateActiveKey:eT(t)})},openAnimation:function(t){this.setState({currentOpenAnimations:t})}},methods:{onClickItem:function(t){var n=this.stateActiveKey;if(this.accordion)n=n[0]===t?[]:[t];else{n=Je(n);var r=n.indexOf(t),a=r>-1;a?n.splice(r,1):n.push(t)}this.setActiveKey(n)},getNewChild:function(t,n){var r;if(!As(t)){var a=this.stateActiveKey,o=this.$props,i=o.prefixCls,l=o.accordion,s=o.destroyInactivePanel,c=o.expandIcon,d=String((r=t.key)!==null&&r!==void 0?r:n),f=rr(t),p=f.header,v=f.headerClass,m=f.disabled,y=!1;l?y=a[0]===d:y=a.indexOf(d)>-1;var b={};!m&&m!==""&&(b={onItemClick:this.onClickItem});var C=P({key:d,panelKey:d,header:p,headerClass:v,isActive:y,prefixCls:i,destroyInactivePanel:s,openAnimation:this.currentOpenAnimations,accordion:l,expandIcon:c},b);return Ot(t,C)}},getItems:function(){var t=this,n=[],r=ht(this);return r&&r.forEach(function(a,o){n.push(t.getNewChild(a,o))}),n},setActiveKey:function(t){vt(this,"activeKey")||this.setState({stateActiveKey:t}),this.__emit("change",this.accordion?t[0]:t)}},render:function(){var t,n=this.$props,r=n.prefixCls,a=n.accordion,o=this.$attrs,i=o.class,l=o.style,s=(t={},V(t,r,!0),V(t,i,i),t);return g("div",le(le({class:s},eg(this.$attrs)),{},{style:l,role:a?"tablist":null}),[this.getItems()])}});eS.Panel=z3e;var j3e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},K3e={prefixCls:u.string,activeKey:{type:[Array,Number,String]},defaultActiveKey:{type:[Array,Number,String]},accordion:u.looseBool,destroyInactivePanel:u.looseBool,bordered:u.looseBool.def(!0),expandIcon:u.func,openAnimation:u.object.def(PD),expandIconPosition:u.oneOf(rt("left","right")).def("left"),"onUpdate:activeKey":u.func,onChange:u.func},Pd=G({name:"ACollapse",inheritAttrs:!1,props:K3e,setup:function(){return{configProvider:ve("configProvider",St)}},methods:{renderExpandIcon:function(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},n=arguments.length>1?arguments[1]:void 0,r=We(this,"expandIcon",t),a=r||g(wi,{rotate:t.isActive?90:void 0},null);return zn(Array.isArray(r)?a[0]:a)?Ot(a,{class:"".concat(n,"-arrow")}):a},handleChange:function(t){this.$emit("update:activeKey",t),this.$emit("change",t)}},render:function(){var t,n=this,r=this.prefixCls,a=this.bordered,o=this.expandIconPosition,i=this.configProvider.getPrefixCls,l=i("collapse",r),s=this.$attrs,c=s.class,d=j3e(s,["class"]),f=(t={},V(t,c,c),V(t,"".concat(l,"-borderless"),!a),V(t,"".concat(l,"-icon-position-").concat(o),!0),t),p=P(P(P(P({},Qe(this)),{prefixCls:l,expandIcon:function(m){return n.renderExpandIcon(m,l)},class:f}),d),{onChange:this.handleChange});return g(eS,p,{default:function(){return[ht(n)]}})}}),W3e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},U3e={openAnimation:u.object,prefixCls:u.string,header:u.VNodeChild,headerClass:u.string,showArrow:u.looseBool,isActive:u.looseBool,destroyInactivePanel:u.looseBool,disabled:u.looseBool,accordion:u.looseBool,forceRender:u.looseBool,expandIcon:u.func,extra:u.VNodeChild,panelKey:u.VNodeChild},rm=G({name:"ACollapsePanel",inheritAttrs:!1,props:U3e,setup:function(){return{configProvider:ve("configProvider",St)}},render:function(){var t,n=this,r=this.prefixCls,a=this.showArrow,o=a===void 0?!0:a,i=this.configProvider.getPrefixCls,l=i("collapse",r),s=this.$attrs,c=s.class,d=W3e(s,["class"]),f=(t={},V(t,c,c),V(t,"".concat(l,"-no-arrow"),!o),t),p=P(P(P({},Qe(this)),{header:We(this,"header"),prefixCls:l,extra:We(this,"extra"),class:f}),d);return g(eS.Panel,p,{default:function(){return[ht(n)]}})}});Pd.Panel=rm;Pd.install=function(e){return e.component(Pd.name,Pd),e.component(rm.name,rm),e};var Y3e=function(t){return t.replace(/[A-Z]/g,function(n){return"-"+n.toLowerCase()}).toLowerCase()},q3e=function(t){var n=/[height|width]$/;return n.test(t)},tT=function(t){var n="",r=Object.keys(t);return r.forEach(function(a,o){var i=t[a];a=Y3e(a),q3e(a)&&typeof i=="number"&&(i=i+"px"),i===!0?n+=a:i===!1?n+="not "+a:n+="("+a+": "+i+")",o<r.length-1&&(n+=" and ")}),n};function Xy(e){var t="";return typeof e=="string"?e:e instanceof Array?(e.forEach(function(n,r){t+=tT(n),r<e.length-1&&(t+=", ")}),t):tT(e)}var G3e={accessibility:u.looseBool.def(!0),adaptiveHeight:u.looseBool.def(!1),afterChange:u.any.def(null),arrows:u.looseBool.def(!0),autoplay:u.looseBool.def(!1),autoplaySpeed:u.number.def(3e3),beforeChange:u.any.def(null),centerMode:u.looseBool.def(!1),centerPadding:u.string.def("50px"),cssEase:u.string.def("ease"),dots:u.looseBool.def(!1),dotsClass:u.string.def("slick-dots"),draggable:u.looseBool.def(!0),unslick:u.looseBool.def(!1),easing:u.string.def("linear"),edgeFriction:u.number.def(.35),fade:u.looseBool.def(!1),focusOnSelect:u.looseBool.def(!1),infinite:u.looseBool.def(!0),initialSlide:u.number.def(0),lazyLoad:u.any.def(null),verticalSwiping:u.looseBool.def(!1),asNavFor:u.any.def(null),pauseOnDotsHover:u.looseBool.def(!1),pauseOnFocus:u.looseBool.def(!1),pauseOnHover:u.looseBool.def(!0),responsive:u.array,rows:u.number.def(1),rtl:u.looseBool.def(!1),slide:u.string.def("div"),slidesPerRow:u.number.def(1),slidesToScroll:u.number.def(1),slidesToShow:u.number.def(1),speed:u.number.def(500),swipe:u.looseBool.def(!0),swipeEvent:u.any.def(null),swipeToSlide:u.looseBool.def(!1),touchMove:u.looseBool.def(!0),touchThreshold:u.number.def(5),useCSS:u.looseBool.def(!0),useTransform:u.looseBool.def(!0),variableWidth:u.looseBool.def(!1),vertical:u.looseBool.def(!1),waitForAnimate:u.looseBool.def(!0),children:u.array,__propsSymbol__:u.any},TD=G3e,X3e={animating:!1,autoplaying:null,currentDirection:0,currentLeft:null,currentSlide:0,direction:1,dragging:!1,edgeDragged:!1,initialized:!1,lazyLoadedList:[],listHeight:null,listWidth:null,scrolling:!1,slideCount:null,slideHeight:null,slideWidth:null,swipeLeft:null,swiped:!1,swiping:!1,touchObject:{startX:0,startY:0,curX:0,curY:0},trackStyle:{},trackWidth:0},Z3e=X3e,am=function(t){for(var n=[],r=xD(t),a=_D(t),o=r;o<a;o++)t.lazyLoadedList.indexOf(o)<0&&n.push(o);return n},xD=function(t){return t.currentSlide-J3e(t)},_D=function(t){return t.currentSlide+Q3e(t)},J3e=function(t){return t.centerMode?Math.floor(t.slidesToShow/2)+(parseInt(t.centerPadding)>0?1:0):0},Q3e=function(t){return t.centerMode?Math.floor((t.slidesToShow-1)/2)+1+(parseInt(t.centerPadding)>0?1:0):t.slidesToShow},y1=function(t){return t&&t.offsetWidth||0},tS=function(t){return t&&t.offsetHeight||0},ED=function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1,r,a=t.startX-t.curX,o=t.startY-t.curY,i=Math.atan2(o,a);return r=Math.round(i*180/Math.PI),r<0&&(r=360-Math.abs(r)),r<=45&&r>=0||r<=360&&r>=315?"left":r>=135&&r<=225?"right":n===!0?r>=35&&r<=135?"up":"down":"vertical"},Tg=function(t){var n=!0;return t.infinite||(t.centerMode&&t.currentSlide>=t.slideCount-1||t.slideCount<=t.slidesToShow||t.currentSlide>=t.slideCount-t.slidesToShow)&&(n=!1),n},Zy=function(t,n){var r={};return n.forEach(function(a){return r[a]=t[a]}),r},eTe=function(t){var n=t.children.length,r=Math.ceil(y1(t.listRef)),a=Math.ceil(y1(t.trackRef)),o;if(t.vertical)o=r;else{var i=t.centerMode&&parseInt(t.centerPadding)*2;typeof t.centerPadding=="string"&&t.centerPadding.slice(-1)==="%"&&(i*=r/100),o=Math.ceil((r-i)/t.slidesToShow)}var l=t.listRef&&tS(t.listRef.querySelector('[data-index="0"]')),s=l*t.slidesToShow,c=t.currentSlide===void 0?t.initialSlide:t.currentSlide;t.rtl&&t.currentSlide===void 0&&(c=n-1-t.initialSlide);var d=t.lazyLoadedList||[],f=am({currentSlide:c,lazyLoadedList:d});d.concat(f);var p={slideCount:n,slideWidth:o,listWidth:r,trackWidth:a,currentSlide:c,slideHeight:l,listHeight:s,lazyLoadedList:d};return t.autoplaying===null&&t.autoplay&&(p.autoplaying="playing"),p},tTe=function(t){var n=t.waitForAnimate,r=t.animating,a=t.fade,o=t.infinite,i=t.index,l=t.slideCount,s=t.lazyLoadedList,c=t.lazyLoad,d=t.currentSlide,f=t.centerMode,p=t.slidesToScroll,v=t.slidesToShow,m=t.useCSS;if(n&&r)return{};var y=i,b,C,S,w={},k={};if(a){if(!o&&(i<0||i>=l))return{};i<0?y=i+l:i>=l&&(y=i-l),c&&s.indexOf(y)<0&&s.push(y),w={animating:!0,currentSlide:y,lazyLoadedList:s},k={animating:!1}}else b=y,y<0?(b=y+l,o?l%p!==0&&(b=l-l%p):b=0):!Tg(t)&&y>d?y=b=d:f&&y>=l?(y=o?l:l-1,b=o?0:l-1):y>=l&&(b=y-l,o?l%p!==0&&(b=0):b=l-v),C=hf(P(P({},t),{slideIndex:y})),S=hf(P(P({},t),{slideIndex:b})),o||(C===S&&(y=b),C=S),c&&s.concat(am(P(P({},t),{currentSlide:y}))),m?(w={animating:!0,currentSlide:b,trackStyle:MD(P(P({},t),{left:C})),lazyLoadedList:s},k={animating:!1,currentSlide:b,trackStyle:ff(P(P({},t),{left:S})),swipeLeft:null}):w={currentSlide:b,trackStyle:ff(P(P({},t),{left:S})),lazyLoadedList:s};return{state:w,nextState:k}},nTe=function(t,n){var r,a,o,i=t.slidesToScroll,l=t.slidesToShow,s=t.slideCount,c=t.currentSlide,d=t.lazyLoad,f=t.infinite,p=s%i!==0,v=p?0:(s-c)%i;if(n.message==="previous")a=v===0?i:l-v,o=c-a,d&&!f&&(r=c-a,o=r===-1?s-1:r);else if(n.message==="next")a=v===0?i:v,o=c+a,d&&!f&&(o=(c+i)%s+v);else if(n.message==="dots"){if(o=n.index*n.slidesToScroll,o===n.currentSlide)return null}else if(n.message==="children"){if(o=n.index,o===n.currentSlide)return null;if(f){var m=uTe(P(P({},t),{targetSlide:o}));o>n.currentSlide&&m==="left"?o=o-s:o<n.currentSlide&&m==="right"&&(o=o+s)}}else if(n.message==="index"&&(o=Number(n.index),o===n.currentSlide))return null;return o},rTe=function(t,n,r){return t.target.tagName.match("TEXTAREA|INPUT|SELECT")||!n?"":t.keyCode===37?r?"next":"previous":t.keyCode===39?r?"previous":"next":""},aTe=function(t,n,r){return t.target.tagName==="IMG"&&t.preventDefault(),!n||!r&&t.type.indexOf("mouse")!==-1?"":{dragging:!0,touchObject:{startX:t.touches?t.touches[0].pageX:t.clientX,startY:t.touches?t.touches[0].pageY:t.clientY,curX:t.touches?t.touches[0].pageX:t.clientX,curY:t.touches?t.touches[0].pageY:t.clientY}}},oTe=function(t,n){var r=n.scrolling,a=n.animating,o=n.vertical,i=n.swipeToSlide,l=n.verticalSwiping,s=n.rtl,c=n.currentSlide,d=n.edgeFriction,f=n.edgeDragged,p=n.onEdge,v=n.swiped,m=n.swiping,y=n.slideCount,b=n.slidesToScroll,C=n.infinite,S=n.touchObject,w=n.swipeEvent,k=n.listHeight,$=n.listWidth;if(!r){if(a)return t.preventDefault();o&&i&&l&&t.preventDefault();var O,T={},_=hf(n);S.curX=t.touches?t.touches[0].pageX:t.clientX,S.curY=t.touches?t.touches[0].pageY:t.clientY,S.swipeLength=Math.round(Math.sqrt(Math.pow(S.curX-S.startX,2)));var I=Math.round(Math.sqrt(Math.pow(S.curY-S.startY,2)));if(!l&&!m&&I>10)return{scrolling:!0};l&&(S.swipeLength=I);var L=(s?-1:1)*(S.curX>S.startX?1:-1);l&&(L=S.curY>S.startY?1:-1);var j=Math.ceil(y/b),F=ED(n.touchObject,l),N=S.swipeLength;return C||(c===0&&F==="right"||c+1>=j&&F==="left"||!Tg(n)&&F==="left")&&(N=S.swipeLength*d,f===!1&&p&&(p(F),T.edgeDragged=!0)),!v&&w&&(w(F),T.swiped=!0),o?O=_+N*(k/$)*L:s?O=_-N*L:O=_+N*L,l&&(O=_+N*L),T=P(P({},T),{touchObject:S,swipeLeft:O,trackStyle:ff(P(P({},n),{left:O}))}),Math.abs(S.curX-S.startX)<Math.abs(S.curY-S.startY)*.8||S.swipeLength>10&&(T.swiping=!0,t.preventDefault()),T}},iTe=function(t,n){var r=n.dragging,a=n.swipe,o=n.touchObject,i=n.listWidth,l=n.touchThreshold,s=n.verticalSwiping,c=n.listHeight,d=n.currentSlide,f=n.swipeToSlide,p=n.scrolling,v=n.onSwipe;if(!r)return a&&t.preventDefault(),{};var m=s?c/l:i/l,y=ED(o,s),b={dragging:!1,edgeDragged:!1,scrolling:!1,swiping:!1,swiped:!1,swipeLeft:null,touchObject:{}};if(p||!o.swipeLength)return b;if(o.swipeLength>m){t.preventDefault(),v&&v(y);var C,S;switch(y){case"left":case"up":S=d+rT(n),C=f?nT(n,S):S,b.currentDirection=0;break;case"right":case"down":S=d-rT(n),C=f?nT(n,S):S,b.currentDirection=1;break;default:C=d}b.triggerSlideHandler=C}else{var w=hf(n);b.trackStyle=MD(P(P({},n),{left:w}))}return b},lTe=function(t){for(var n=t.infinite?t.slideCount*2:t.slideCount,r=t.infinite?t.slidesToShow*-1:0,a=t.infinite?t.slidesToShow*-1:0,o=[];r<n;)o.push(r),r=a+t.slidesToScroll,a+=Math.min(t.slidesToScroll,t.slidesToShow);return o},nT=function(t,n){var r=lTe(t),a=0;if(n>r[r.length-1])n=r[r.length-1];else for(var o in r){if(n<r[o]){n=a;break}a=r[o]}return n},rT=function(t){var n=t.centerMode?t.slideWidth*Math.floor(t.slidesToShow/2):0;if(t.swipeToSlide){var r,a=t.listRef,o=a.querySelectorAll(".slick-slide");if(Array.from(o).every(function(s){if(t.vertical){if(s.offsetTop+tS(s)/2>t.swipeLeft*-1)return r=s,!1}else if(s.offsetLeft-n+y1(s)/2>t.swipeLeft*-1)return r=s,!1;return!0}),!r)return 0;var i=t.rtl===!0?t.slideCount-t.currentSlide:t.currentSlide,l=Math.abs(r.dataset.index-i)||1;return l}else return t.slidesToScroll},nS=function(t,n){return n.reduce(function(r,a){return r&&t.hasOwnProperty(a)},!0)?null:console.error("Keys Missing:",t)},ff=function(t){nS(t,["left","variableWidth","slideCount","slidesToShow","slideWidth"]);var n,r,a=t.slideCount+2*t.slidesToShow;t.vertical?r=a*t.slideHeight:n=sTe(t)*t.slideWidth;var o={opacity:1,transition:"",WebkitTransition:""};if(t.useTransform){var i=t.vertical?"translate3d(0px, "+t.left+"px, 0px)":"translate3d("+t.left+"px, 0px, 0px)",l=t.vertical?"translate3d(0px, "+t.left+"px, 0px)":"translate3d("+t.left+"px, 0px, 0px)",s=t.vertical?"translateY("+t.left+"px)":"translateX("+t.left+"px)";o=P(P({},o),{WebkitTransform:i,transform:l,msTransform:s})}else t.vertical?o.top=t.left:o.left=t.left;return t.fade&&(o={opacity:1}),n&&(o.width=n+"px"),r&&(o.height=r+"px"),window&&!window.addEventListener&&window.attachEvent&&(t.vertical?o.marginTop=t.left+"px":o.marginLeft=t.left+"px"),o},MD=function(t){nS(t,["left","variableWidth","slideCount","slidesToShow","slideWidth","speed","cssEase"]);var n=ff(t);return t.useTransform?(n.WebkitTransition="-webkit-transform "+t.speed+"ms "+t.cssEase,n.transition="transform "+t.speed+"ms "+t.cssEase):t.vertical?n.transition="top "+t.speed+"ms "+t.cssEase:n.transition="left "+t.speed+"ms "+t.cssEase,n},hf=function(t){if(t.unslick)return 0;nS(t,["slideIndex","trackRef","infinite","centerMode","slideCount","slidesToShow","slidesToScroll","slideWidth","listWidth","variableWidth","slideHeight"]);var n=t.slideIndex,r=t.trackRef,a=t.infinite,o=t.centerMode,i=t.slideCount,l=t.slidesToShow,s=t.slidesToScroll,c=t.slideWidth,d=t.listWidth,f=t.variableWidth,p=t.slideHeight,v=t.fade,m=t.vertical,y=0,b,C,S=0;if(v||t.slideCount===1)return 0;var w=0;if(a?(w=-ui(t),i%s!==0&&n+s>i&&(w=-(n>i?l-(n-i):i%s)),o&&(w+=parseInt(l/2))):(i%s!==0&&n+s>i&&(w=l-i%s),o&&(w=parseInt(l/2))),y=w*c,S=w*p,m?b=n*p*-1+S:b=n*c*-1+y,f===!0){var k,$=r;if(k=n+ui(t),C=$&&$.childNodes[k],b=C?C.offsetLeft*-1:0,o===!0){k=a?n+ui(t):n,C=$&&$.children[k],b=0;for(var O=0;O<k;O++)b-=$&&$.children[O]&&$.children[O].offsetWidth;b-=parseInt(t.centerPadding),b+=C&&(d-C.offsetWidth)/2}}return b},ui=function(t){return t.unslick||!t.infinite?0:t.variableWidth?t.slideCount:t.slidesToShow+(t.centerMode?1:0)},Up=function(t){return t.unslick||!t.infinite?0:t.slideCount},sTe=function(t){return t.slideCount===1?1:ui(t)+t.slideCount+Up(t)},uTe=function(t){return t.targetSlide>t.currentSlide?t.targetSlide>t.currentSlide+cTe(t)?"left":"right":t.targetSlide<t.currentSlide-dTe(t)?"right":"left"},cTe=function(t){var n=t.slidesToShow,r=t.centerMode,a=t.rtl,o=t.centerPadding;if(r){var i=(n-1)/2+1;return parseInt(o)>0&&(i+=1),a&&n%2===0&&(i+=1),i}return a?0:n-1},dTe=function(t){var n=t.slidesToShow,r=t.centerMode,a=t.rtl,o=t.centerPadding;if(r){var i=(n-1)/2+1;return parseInt(o)>0&&(i+=1),!a&&n%2===0&&(i+=1),i}return a?n-1:0},aT=function(){return!!(typeof window!="undefined"&&window.document&&window.document.createElement)},Jy=function(t){var n,r,a,o;t.rtl?o=t.slideCount-1-t.index:o=t.index;var i=o<0||o>=t.slideCount;t.centerMode?(a=Math.floor(t.slidesToShow/2),r=(o-t.currentSlide)%t.slideCount===0,o>t.currentSlide-a-1&&o<=t.currentSlide+a&&(n=!0)):n=t.currentSlide<=o&&o<t.currentSlide+t.slidesToShow;var l=o===t.currentSlide;return{"slick-slide":!0,"slick-active":n,"slick-center":r,"slick-cloned":i,"slick-current":l}},fTe=function(t){var n={};return(t.variableWidth===void 0||t.variableWidth===!1)&&(n.width=t.slideWidth+(typeof t.slideWidth=="number"?"px":"")),t.fade&&(n.position="relative",t.vertical?n.top=-t.index*parseInt(t.slideHeight)+"px":n.left=-t.index*parseInt(t.slideWidth)+"px",n.opacity=t.currentSlide===t.index?1:0,n.transition="opacity "+t.speed+"ms "+t.cssEase+", visibility "+t.speed+"ms "+t.cssEase,n.WebkitTransition="opacity "+t.speed+"ms "+t.cssEase+", visibility "+t.speed+"ms "+t.cssEase),n},Qy=function(t,n){return t.key||t.key===0&&"0"||n},hTe=function(t,n){var r,a=[],o=[],i=[],l=n.length,s=xD(t),c=_D(t);return n.forEach(function(d,f){var p,v={message:"children",index:f,slidesToScroll:t.slidesToScroll,currentSlide:t.currentSlide};!t.lazyLoad||t.lazyLoad&&t.lazyLoadedList.indexOf(f)>=0?p=d:p=g("div");var m=fTe(P(P({},t),{index:f})),y=p.props.class||"",b=Jy(P(P({},t),{index:f}));if(a.push(Ot(p,{key:"original"+Qy(p,f),tabindex:"-1","data-index":f,"aria-hidden":!b["slick-active"],class:Se(b,y),style:P(P({outline:"none"},p.props.style||{}),m),onClick:function(){t.focusOnSelect&&t.focusOnSelect(v)}})),t.infinite&&t.fade===!1){var C=l-f;C<=ui(t)&&l!==t.slidesToShow&&(r=-C,r>=s&&(p=d),b=Jy(P(P({},t),{index:r})),o.push(Ot(p,{key:"precloned"+Qy(p,r),class:Se(b,y),tabindex:"-1","data-index":r,"aria-hidden":!b["slick-active"],style:P(P({},p.props.style||{}),m),onClick:function(){t.focusOnSelect&&t.focusOnSelect(v)}}))),l!==t.slidesToShow&&(r=l+f,r<c&&(p=d),b=Jy(P(P({},t),{index:r})),i.push(Ot(p,{key:"postcloned"+Qy(p,r),tabindex:"-1","data-index":r,"aria-hidden":!b["slick-active"],class:Se(b,y),style:P(P({},p.props.style||{}),m),onClick:function(){t.focusOnSelect&&t.focusOnSelect(v)}})))}}),t.rtl?o.concat(a,i).reverse():o.concat(a,i)},ID=function(t,n){var r=n.attrs,a=n.slots,o=hTe(r,Un(a==null?void 0:a.default())),i=r.onMouseenter,l=r.onMouseover,s=r.onMouseleave,c={onMouseenter:i,onMouseover:l,onMouseleave:s},d=P({class:"slick-track",style:r.trackStyle},c);return g("div",d,[o])};ID.inheritAttrs=!1;var pTe=ID,vTe=function(t){var n;return t.infinite?n=Math.ceil(t.slideCount/t.slidesToScroll):n=Math.ceil((t.slideCount-t.slidesToShow)/t.slidesToScroll)+1,n},ND=function(t,n){var r=n.attrs,a=r.slideCount,o=r.slidesToScroll,i=r.slidesToShow,l=r.infinite,s=r.currentSlide,c=r.appendDots,d=r.customPaging,f=r.clickHandler,p=r.dotsClass,v=r.onMouseenter,m=r.onMouseover,y=r.onMouseleave,b=vTe({slideCount:a,slidesToScroll:o,slidesToShow:i,infinite:l}),C={onMouseenter:v,onMouseover:m,onMouseleave:y},S=Array.apply(null,Array(b+1).join("0").split("")).map(function(w,k){var $=k*o,O=k*o+(o-1),T=Se({"slick-active":s>=$&&s<=O}),_={message:"dots",index:k,slidesToScroll:o,currentSlide:s};function I(L){L&&L.preventDefault(),f(_)}return g("li",{key:k,class:T},[Ot(d({i:k}),{onClick:I})])});return Ot(c({dots:S}),P({class:p},C))};ND.inheritAttrs=!1;var mTe=ND;function AD(){}function DD(e,t,n){n&&n.preventDefault(),t(e,n)}var RD=function(t,n){var r=n.attrs,a=r.clickHandler,o=r.infinite,i=r.currentSlide,l=r.slideCount,s=r.slidesToShow,c={"slick-arrow":!0,"slick-prev":!0},d=function(y){DD({message:"previous"},a,y)};!o&&(i===0||l<=s)&&(c["slick-disabled"]=!0,d=AD);var f={key:"0","data-role":"none",class:c,style:{display:"block"},onClick:d},p={currentSlide:i,slideCount:l},v;return r.prevArrow?v=Ot(r.prevArrow(P(P({},f),p)),{key:"0",class:c,style:{display:"block"},onClick:d},!1):v=g("button",le({key:"0",type:"button"},f),[" ",yt("Previous")]),v};RD.inheritAttrs=!1;var LD=function(t,n){var r=n.attrs,a=r.clickHandler,o=r.currentSlide,i=r.slideCount,l={"slick-arrow":!0,"slick-next":!0},s=function(v){DD({message:"next"},a,v)};Tg(r)||(l["slick-disabled"]=!0,s=AD);var c={key:"1","data-role":"none",class:Se(l),style:{display:"block"},onClick:s},d={currentSlide:o,slideCount:i},f;return r.nextArrow?f=Ot(r.nextArrow(P(P({},c),d)),{key:"1",class:Se(l),style:{display:"block"},onClick:s},!1):f=g("button",le({key:"1",type:"button"},c),[" ",yt("Next")]),f};LD.inheritAttrs=!1;var gTe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function Xr(){}var yTe={name:"InnerSlider",inheritAttrs:!1,props:P({},TD),mixins:[nt],data:function(){return this.preProps=P({},this.$props),this.list=null,this.track=null,this.callbackTimers=[],this.clickable=!0,this.debouncedResize=null,P(P({},Z3e),{currentSlide:this.initialSlide,slideCount:this.children.length})},methods:{listRefHandler:function(t){this.list=t},trackRefHandler:function(t){this.track=t},adaptHeight:function(){if(this.adaptiveHeight&&this.list){var t=this.list.querySelector('[data-index="'.concat(this.currentSlide,'"]'));this.list.style.height=tS(t)+"px"}},onWindowResized:function(t){var n=this;this.debouncedResize&&this.debouncedResize.cancel(),this.debouncedResize=Yn(function(){return n.resizeWindow(t)},50),this.debouncedResize()},resizeWindow:function(){var t=this,n=arguments.length>0&&arguments[0]!==void 0?arguments[0]:!0;if(!!this.track){var r=P(P({listRef:this.list,trackRef:this.track,children:this.children},this.$props),this.$data);this.updateState(r,n,function(){t.autoplay?t.handleAutoPlay("update"):t.pause("paused")}),this.setState({animating:!1}),clearTimeout(this.animationEndCallback),delete this.animationEndCallback}},updateState:function(t,n,r){var a=eTe(t);t=P(P(P({},t),a),{slideIndex:a.currentSlide});var o=hf(t);t=P(P({},t),{left:o});var i=ff(t);(n||this.children.length!==t.children.length)&&(a.trackStyle=i),this.setState(a,r)},ssrInit:function(){var t=this.children;if(this.variableWidth){var n=0,r=0,a=[],o=ui(P(P(P({},this.$props),this.$data),{slideCount:t.length})),i=Up(P(P(P({},this.$props),this.$data),{slideCount:t.length}));t.forEach(function(w){var k,$,O=(($=(k=w.props.style)===null||k===void 0?void 0:k.width)===null||$===void 0?void 0:$.split("px")[0])||0;a.push(O),n+=O});for(var l=0;l<o;l++)r+=a[a.length-1-l],n+=a[a.length-1-l];for(var s=0;s<i;s++)n+=a[s];for(var c=0;c<this.currentSlide;c++)r+=a[c];var d={width:n+"px",left:-r+"px"};if(this.centerMode){var f="".concat(a[this.currentSlide],"px");d.left="calc(".concat(d.left," + (100% - ").concat(f,") / 2 ) ")}this.setState({trackStyle:d});return}var p=t.length,v=P(P(P({},this.$props),this.$data),{slideCount:p}),m=ui(v)+Up(v)+p,y=100/this.slidesToShow*m,b=100/m,C=-b*(ui(v)+this.currentSlide)*y/100;this.centerMode&&(C+=(100-b*y/100)/2);var S={width:y+"%",left:C+"%"};this.setState({slideWidth:b+"%",trackStyle:S})},checkImagesLoad:function(){var t=this,n=document.querySelectorAll(".slick-slide img"),r=n.length,a=0;Array.prototype.forEach.call(n,function(o){var i=function(){return++a&&a>=r&&t.onWindowResized()};if(!o.onclick)o.onclick=function(){return o.parentNode.focus()};else{var l=o.onclick;o.onclick=function(){l(),o.parentNode.focus()}}o.onload||(t.$props.lazyLoad?o.onload=function(){t.adaptHeight(),t.callbackTimers.push(setTimeout(t.onWindowResized,t.speed))}:(o.onload=i,o.onerror=function(){i(),t.__emit("lazyLoadError")}))})},progressiveLazyLoad:function(){for(var t=[],n=P(P({},this.$props),this.$data),r=this.currentSlide;r<this.slideCount+Up(n);r++)if(this.lazyLoadedList.indexOf(r)<0){t.push(r);break}for(var a=this.currentSlide-1;a>=-ui(n);a--)if(this.lazyLoadedList.indexOf(a)<0){t.push(a);break}t.length>0?(this.setState(function(o){return{lazyLoadedList:o.lazyLoadedList.concat(t)}}),this.__emit("lazyLoad",t)):this.lazyLoadTimer&&(clearInterval(this.lazyLoadTimer),delete this.lazyLoadTimer)},slideHandler:function(t){var n=this,r=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1,a=this.$props,o=a.asNavFor,i=a.currentSlide,l=a.beforeChange,s=a.speed,c=a.afterChange,d=tTe(P(P(P({index:t},this.$props),this.$data),{trackRef:this.track,useCSS:this.useCSS&&!r})),f=d.state,p=d.nextState;if(!!f){l&&l(i,f.currentSlide);var v=f.lazyLoadedList.filter(function(m){return n.lazyLoadedList.indexOf(m)<0});this.$attrs.onLazyLoad&&v.length>0&&this.__emit("lazyLoad",v),this.setState(f,function(){o&&o.innerSlider.currentSlide!==i&&o.innerSlider.slideHandler(t),p&&(n.animationEndCallback=setTimeout(function(){var m=p.animating,y=gTe(p,["animating"]);n.setState(y,function(){n.callbackTimers.push(setTimeout(function(){return n.setState({animating:m})},10)),c&&c(f.currentSlide),delete n.animationEndCallback})},s))})}},changeSlide:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1,r=P(P({},this.$props),this.$data),a=nTe(r,t);a!==0&&!a||(n===!0?this.slideHandler(a,n):this.slideHandler(a))},clickHandler:function(t){this.clickable===!1&&(t.stopPropagation(),t.preventDefault()),this.clickable=!0},keyHandler:function(t){var n=rTe(t,this.accessibility,this.rtl);n!==""&&this.changeSlide({message:n})},selectHandler:function(t){this.changeSlide(t)},disableBodyScroll:function(){var t=function(r){r=r||window.event,r.preventDefault&&r.preventDefault(),r.returnValue=!1};window.ontouchmove=t},enableBodyScroll:function(){window.ontouchmove=null},swipeStart:function(t){this.verticalSwiping&&this.disableBodyScroll();var n=aTe(t,this.swipe,this.draggable);n!==""&&this.setState(n)},swipeMove:function(t){var n=oTe(t,P(P(P({},this.$props),this.$data),{trackRef:this.track,listRef:this.list,slideIndex:this.currentSlide}));!n||(n.swiping&&(this.clickable=!1),this.setState(n))},swipeEnd:function(t){var n=iTe(t,P(P(P({},this.$props),this.$data),{trackRef:this.track,listRef:this.list,slideIndex:this.currentSlide}));if(!!n){var r=n.triggerSlideHandler;delete n.triggerSlideHandler,this.setState(n),r!==void 0&&(this.slideHandler(r),this.$props.verticalSwiping&&this.enableBodyScroll())}},slickPrev:function(){var t=this;this.callbackTimers.push(setTimeout(function(){return t.changeSlide({message:"previous"})},0))},slickNext:function(){var t=this;this.callbackTimers.push(setTimeout(function(){return t.changeSlide({message:"next"})},0))},slickGoTo:function(t){var n=this,r=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1;if(t=Number(t),isNaN(t))return"";this.callbackTimers.push(setTimeout(function(){return n.changeSlide({message:"index",index:t,currentSlide:n.currentSlide},r)},0))},play:function(){var t;if(this.rtl)t=this.currentSlide-this.slidesToScroll;else if(Tg(P(P({},this.$props),this.$data)))t=this.currentSlide+this.slidesToScroll;else return!1;this.slideHandler(t)},handleAutoPlay:function(t){this.autoplayTimer&&clearInterval(this.autoplayTimer);var n=this.autoplaying;if(t==="update"){if(n==="hovered"||n==="focused"||n==="paused")return}else if(t==="leave"){if(n==="paused"||n==="focused")return}else if(t==="blur"&&(n==="paused"||n==="hovered"))return;this.autoplayTimer=setInterval(this.play,this.autoplaySpeed+50),this.setState({autoplaying:"playing"})},pause:function(t){this.autoplayTimer&&(clearInterval(this.autoplayTimer),this.autoplayTimer=null);var n=this.autoplaying;t==="paused"?this.setState({autoplaying:"paused"}):t==="focused"?(n==="hovered"||n==="playing")&&this.setState({autoplaying:"focused"}):n==="playing"&&this.setState({autoplaying:"hovered"})},onDotsOver:function(){this.autoplay&&this.pause("hovered")},onDotsLeave:function(){this.autoplay&&this.autoplaying==="hovered"&&this.handleAutoPlay("leave")},onTrackOver:function(){this.autoplay&&this.pause("hovered")},onTrackLeave:function(){this.autoplay&&this.autoplaying==="hovered"&&this.handleAutoPlay("leave")},onSlideFocus:function(){this.autoplay&&this.pause("focused")},onSlideBlur:function(){this.autoplay&&this.autoplaying==="focused"&&this.handleAutoPlay("blur")},customPaging:function(t){var n=t.i;return g("button",null,[n+1])},appendDots:function(t){var n=t.dots;return g("ul",{style:{display:"block"}},[n])}},beforeMount:function(){if(this.ssrInit(),this.__emit("init"),this.lazyLoad){var t=am(P(P({},this.$props),this.$data));t.length>0&&(this.setState(function(n){return{lazyLoadedList:n.lazyLoadedList.concat(t)}}),this.__emit("lazyLoad",t))}},mounted:function(){var t=this;this.$nextTick(function(){var n=P({listRef:t.list,trackRef:t.track,children:t.children},t.$props);t.updateState(n,!0,function(){t.adaptHeight(),t.autoplay&&t.handleAutoPlay("update")}),t.lazyLoad==="progressive"&&(t.lazyLoadTimer=setInterval(t.progressiveLazyLoad,1e3)),t.ro=new ng(function(){t.animating?(t.onWindowResized(!1),t.callbackTimers.push(setTimeout(function(){return t.onWindowResized()},t.speed))):t.onWindowResized()}),t.ro.observe(t.list),Array.prototype.forEach.call(document.querySelectorAll(".slick-slide"),function(r){r.onfocus=t.$props.pauseOnFocus?t.onSlideFocus:null,r.onblur=t.$props.pauseOnFocus?t.onSlideBlur:null}),window&&(window.addEventListener?window.addEventListener("resize",t.onWindowResized):window.attachEvent("onresize",t.onWindowResized))})},beforeUnmount:function(){this.animationEndCallback&&clearTimeout(this.animationEndCallback),this.lazyLoadTimer&&clearInterval(this.lazyLoadTimer),this.callbackTimers.length&&(this.callbackTimers.forEach(function(t){return clearTimeout(t)}),this.callbackTimers=[]),window.addEventListener?window.removeEventListener("resize",this.onWindowResized):window.detachEvent("onresize",this.onWindowResized),this.autoplayTimer&&clearInterval(this.autoplayTimer)},updated:function(){if(this.checkImagesLoad(),this.__emit("reInit"),this.lazyLoad){var t=am(P(P({},this.$props),this.$data));t.length>0&&(this.setState(function(n){return{lazyLoadedList:n.lazyLoadedList.concat(t)}}),this.__emit("lazyLoad"))}this.adaptHeight()},watch:{__propsSymbol__:function(){for(var t=this,n=this.$props,r=P(P({listRef:this.list,trackRef:this.track},n),this.$data),a=!1,o=0,i=Object.keys(this.preProps);o<i.length;o++){var l=i[o];if(!n.hasOwnProperty(l)){a=!0;break}if(!(kt(n[l])==="object"||typeof n[l]=="function"||kt(n[l])==="symbol")&&n[l]!==this.preProps[l]){a=!0;break}}this.updateState(r,a,function(){t.currentSlide>=n.children.length&&t.changeSlide({message:"index",index:n.children.length-n.slidesToShow,currentSlide:t.currentSlide}),n.autoplay?t.handleAutoPlay("update"):t.pause("paused")}),this.preProps=P({},n)}},render:function(){var t,n=this,r=Se("slick-slider",this.$attrs.class,{"slick-vertical":this.vertical,"slick-initialized":!0}),a=P(P({},this.$props),this.$data),o=Zy(a,["fade","cssEase","speed","infinite","centerMode","focusOnSelect","currentSlide","lazyLoad","lazyLoadedList","rtl","slideWidth","slideHeight","listHeight","vertical","slidesToShow","slidesToScroll","slideCount","trackStyle","variableWidth","unslick","centerPadding"]),i=this.$props.pauseOnHover;o=P(P({},o),{focusOnSelect:this.focusOnSelect?this.selectHandler:null,ref:this.trackRefHandler,onMouseleave:i?this.onTrackLeave:Xr,onMouseover:i?this.onTrackOver:Xr});var l;if(this.dots===!0&&this.slideCount>=this.slidesToShow){var s=Zy(a,["dotsClass","slideCount","slidesToShow","currentSlide","slidesToScroll","clickHandler","children","infinite","appendDots"]);s.customPaging=this.customPaging,s.appendDots=this.appendDots;var c=this.$slots,d=c.customPaging,f=c.appendDots;d&&(s.customPaging=d),f&&(s.appendDots=f);var p=this.$props.pauseOnDotsHover;s=P(P({},s),{clickHandler:this.changeSlide,onMouseover:p?this.onDotsOver:Xr,onMouseleave:p?this.onDotsLeave:Xr}),l=g(mTe,s,null)}var v,m,y=Zy(a,["infinite","centerMode","currentSlide","slideCount","slidesToShow"]);y.clickHandler=this.changeSlide;var b=this.$slots,C=b.prevArrow,S=b.nextArrow;C&&(y.prevArrow=C),S&&(y.nextArrow=S),this.arrows&&(v=g(RD,y,null),m=g(LD,y,null));var w=null;this.vertical&&(w={height:typeof this.listHeight=="number"?"".concat(this.listHeight,"px"):this.listHeight});var k=null;this.vertical===!1?this.centerMode===!0&&(k={padding:"0px "+this.centerPadding}):this.centerMode===!0&&(k={padding:this.centerPadding+" 0px"});var $=P(P({},w),k),O=this.touchMove,T=(t={ref:this.listRefHandler,class:"slick-list",style:$,onClick:this.clickHandler,onMousedown:O?this.swipeStart:Xr,onMousemove:this.dragging&&O?this.swipeMove:Xr,onMouseup:O?this.swipeEnd:Xr,onMouseleave:this.dragging&&O?this.swipeEnd:Xr},V(t,mn?"onTouchstartPassive":"onTouchstart",O?this.swipeStart:Xr),V(t,mn?"onTouchmovePassive":"onTouchmove",this.dragging&&O?this.swipeMove:Xr),V(t,"onTouchend",O?this.swipeEnd:Xr),V(t,"onTouchcancel",this.dragging&&O?this.swipeEnd:Xr),V(t,"onKeydown",this.accessibility?this.keyHandler:Xr),t),_={class:r};return this.unslick&&(T={class:"slick-list",ref:this.listRefHandler},_={class:r}),g("div",_,[this.unslick?"":v,g("div",T,[g(pTe,o,{default:function(){return[n.children]}})]),this.unslick?"":m,this.unslick?"":l])}},bTe=G({name:"Slider",mixins:[nt],inheritAttrs:!1,props:P({},TD),data:function(){return this._responsiveMediaHandlers=[],{breakpoint:null}},beforeMount:function(){var t=this;if(this.responsive){var n=this.responsive.map(function(a){return a.breakpoint});n.sort(function(a,o){return a-o}),n.forEach(function(a,o){var i;o===0?i=Xy({minWidth:0,maxWidth:a}):i=Xy({minWidth:n[o-1]+1,maxWidth:a}),aT()&&t.media(i,function(){t.setState({breakpoint:a})})});var r=Xy({minWidth:n.slice(-1)[0]});aT()&&this.media(r,function(){t.setState({breakpoint:null})})}},beforeUnmount:function(){this._responsiveMediaHandlers.forEach(function(t){t.mql.removeListener(t.listener)})},methods:{innerSliderRefHandler:function(t){this.innerSlider=t},media:function(t,n){var r=window.matchMedia(t),a=function(i){var l=i.matches;l&&n()};r.addListener(a),a(r),this._responsiveMediaHandlers.push({mql:r,query:t,listener:a})},slickPrev:function(){this.innerSlider.slickPrev()},slickNext:function(){this.innerSlider.slickNext()},slickGoTo:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1;this.innerSlider.slickGoTo(t,n)},slickPause:function(){this.innerSlider.pause("paused")},slickPlay:function(){this.innerSlider.handleAutoPlay("play")}},render:function(){var t=this,n,r,a;this.breakpoint?(a=this.responsive.filter(function(y){return y.breakpoint===t.breakpoint}),r=a[0].settings==="unslick"?"unslick":P(P({},this.$props),a[0].settings)):r=P({},this.$props),r.centerMode&&(r.slidesToScroll>1,r.slidesToScroll=1),r.fade&&(r.slidesToShow>1,r.slidesToScroll>1,r.slidesToShow=1,r.slidesToScroll=1);var o=ht(this)||[];o=o.filter(function(y){return typeof y=="string"?!!y.trim():!!y}),r.variableWidth&&(r.rows>1||r.slidesPerRow>1)&&(console.warn("variableWidth is not supported in case of rows > 1 or slidesPerRow > 1"),r.variableWidth=!1);for(var i=[],l=null,s=0;s<o.length;s+=r.rows*r.slidesPerRow){for(var c=[],d=s;d<s+r.rows*r.slidesPerRow;d+=r.slidesPerRow){for(var f=[],p=d;p<d+r.slidesPerRow&&(r.variableWidth&&((n=o[p].props)===null||n===void 0?void 0:n.style)&&(l=o[p].props.style.width),!(p>=o.length));p+=1)f.push(Ot(o[p],{key:100*s+10*d+p,tabindex:-1,style:{width:"".concat(100/r.slidesPerRow,"%"),display:"inline-block"}}));c.push(g("div",{key:10*s+d},[f]))}r.variableWidth?i.push(g("div",{key:s,style:{width:l}},[c])):i.push(g("div",{key:s},[c]))}if(r==="unslick"){var v="regular slider "+(this.className||"");return g("div",{class:v},[i])}else i.length<=r.slidesToShow&&(r.unslick=!0);var m=P(P(P({},this.$attrs),r),{children:i,ref:this.innerSliderRefHandler});return g(yTe,le(le({},m),{},{__propsSymbol__:[]}),this.$slots)}}),CTe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},wTe={effect:u.oneOf(rt("scrollx","fade")),dots:u.looseBool.def(!0),vertical:u.looseBool,autoplay:u.looseBool,easing:u.string,beforeChange:u.func,afterChange:u.func,prefixCls:u.string,accessibility:u.looseBool,nextArrow:u.VNodeChild,prevArrow:u.VNodeChild,pauseOnHover:u.looseBool,adaptiveHeight:u.looseBool,arrows:u.looseBool.def(!1),autoplaySpeed:u.number,centerMode:u.looseBool,centerPadding:u.string,cssEase:u.string,dotsClass:u.string,draggable:u.looseBool.def(!1),fade:u.looseBool,focusOnSelect:u.looseBool,infinite:u.looseBool,initialSlide:u.number,lazyLoad:u.looseBool,rtl:u.looseBool,slide:u.string,slidesToShow:u.number,slidesToScroll:u.number,speed:u.number,swipe:u.looseBool,swipeToSlide:u.looseBool,touchMove:u.looseBool,touchThreshold:u.number,variableWidth:u.looseBool,useCSS:u.looseBool,slickGoTo:u.number,responsive:u.array,dotPosition:u.oneOf(rt("top","bottom","left","right")),verticalSwiping:u.looseBool.def(!1)},STe=G({name:"ACarousel",inheritAttrs:!1,props:wTe,setup:function(){return{configProvider:ve("configProvider",St),slick:void 0,innerSlider:void 0}},beforeMount:function(){this.onWindowResized=Yn(this.onWindowResized,500,{leading:!1})},mounted:function(){rf(this,"vertical")&&on(!this.vertical,"Carousel","`vertical` is deprecated, please use `dotPosition` instead.");var t=this.autoplay;t&&window.addEventListener("resize",this.onWindowResized),this.innerSlider=this.slick&&this.slick.innerSlider},beforeUnmount:function(){var t=this.autoplay;t&&(window.removeEventListener("resize",this.onWindowResized),this.onWindowResized.cancel())},methods:{getDotPosition:function(){return this.dotPosition?this.dotPosition:rf(this,"vertical")&&this.vertical?"right":"bottom"},saveSlick:function(t){this.slick=t},onWindowResized:function(){var t=this.autoplay;t&&this.slick&&this.slick.innerSlider&&this.slick.innerSlider.autoPlay&&this.slick.innerSlider.autoPlay()},next:function(){this.slick.slickNext()},prev:function(){this.slick.slickPrev()},goTo:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1;this.slick.slickGoTo(t,n)}},render:function(){var t,n=P({},this.$props),r=this.$slots;n.effect==="fade"&&(n.fade=!0);var a=this.$attrs,o=a.class,i=a.style,l=CTe(a,["class","style"]),s=this.configProvider.getPrefixCls,c=s("carousel",n.prefixCls),d="slick-dots",f=this.getDotPosition();n.vertical=f==="left"||f==="right",n.dotsClass=Se("".concat(d),"".concat(d,"-").concat(f||"bottom"),V({},"".concat(n.dotsClass),!!n.dotsClass)),c=Se((t={},V(t,o,!!o),V(t,c,!!c),V(t,"".concat(c,"-vertical"),n.vertical),t));var p=P(P(P({},n),l),{nextArrow:We(this,"nextArrow"),prevArrow:We(this,"prevArrow")});return g("div",{class:c,style:i},[g(bTe,le({ref:this.saveSlick},p),r)])}}),kTe=kn(STe),FD={exports:{}};(function(e,t){(function(n,r){e.exports=r()})(vo,function(){function n(r,a,o){o=o||{},o.childrenKeyName=o.childrenKeyName||"children";var i=r||[],l=[],s=0;do{var c=i.filter(function(d){return a(d,s)})[0];if(!c)break;l.push(c),i=c[o.childrenKeyName]||[],s+=1}while(i.length>0);return l}return n})})(FD);var om=FD.exports,$Te={name:"CascaderMenus",mixins:[nt],inheritAttrs:!1,props:{value:u.array.def([]),activeValue:u.array.def([]),options:u.array,prefixCls:u.string.def("rc-cascader-menus"),expandTrigger:u.string.def("click"),visible:u.looseBool.def(!1),dropdownMenuColumnStyle:u.object,defaultFieldNames:u.object,fieldNames:u.object,expandIcon:u.any,loadingIcon:u.any},data:function(){return this.menuItems={},{}},watch:{visible:function(t){var n=this;t&&this.$nextTick(function(){n.scrollActiveItemToView()})}},mounted:function(){var t=this;this.$nextTick(function(){t.scrollActiveItemToView()})},methods:{getFieldName:function(t){var n=this.$props,r=n.fieldNames,a=n.defaultFieldNames;return r[t]||a[t]},getOption:function(t,n){var r=this,a=this.prefixCls,o=this.expandTrigger,i=We(this,"loadingIcon"),l=We(this,"expandIcon"),s=function(S){r.__emit("select",t,n,S)},c=function(S){r.__emit("itemDoubleClick",t,n,S)},d=t[this.getFieldName("value")],f={onClick:s,onDblclick:c},p="".concat(a,"-menu-item"),v=null,m=t[this.getFieldName("children")]&&t[this.getFieldName("children")].length>0;(m||t.isLeaf===!1)&&(p+=" ".concat(a,"-menu-item-expand"),t.loading||(v=g("span",{class:"".concat(a,"-menu-item-expand-icon")},[l]))),o==="hover"&&(m||t.isLeaf===!1)&&(f={onMouseenter:this.delayOnSelect.bind(this,s),onMouseleave:this.delayOnSelect.bind(this),onClick:s}),this.isActiveOption(t,n)&&(p+=" ".concat(a,"-menu-item-active"),f.ref=this.saveMenuItem(n)),t.disabled&&(p+=" ".concat(a,"-menu-item-disabled"));var y=null;t.loading&&(p+=" ".concat(a,"-menu-item-loading"),y=i||null);var b="";return t.title?b=t.title:typeof t[this.getFieldName("label")]=="string"&&(b=t[this.getFieldName("label")]),g("li",le(le({key:Array.isArray(d)?d.join("__ant__"):d,class:p,title:b},f),{},{role:"menuitem",onMousedown:function(S){return S.preventDefault()}}),[t[this.getFieldName("label")],v,y])},getActiveOptions:function(t){var n=this,r=t||this.activeValue,a=this.options;return om(a,function(o,i){return er(o[n.getFieldName("value")],r[i])},{childrenKeyName:this.getFieldName("children")})},getShowOptions:function(){var t=this,n=this.options,r=this.getActiveOptions().map(function(a){return a[t.getFieldName("children")]}).filter(function(a){return!!a});return r.unshift(n),r},delayOnSelect:function(t){for(var n=this,r=arguments.length,a=new Array(r>1?r-1:0),o=1;o<r;o++)a[o-1]=arguments[o];this.delayTimer&&(clearTimeout(this.delayTimer),this.delayTimer=null),typeof t=="function"&&(this.delayTimer=setTimeout(function(){t(a),n.delayTimer=null},150))},scrollActiveItemToView:function(){for(var t=this.getShowOptions().length,n=0;n<t;n++){var r=this.menuItems[n];if(r){var a=Sn(r);a.parentNode.scrollTop=a.offsetTop}}},isActiveOption:function(t,n){var r=this.activeValue,a=r===void 0?[]:r;return er(a[n],t[this.getFieldName("value")])},saveMenuItem:function(t){var n=this;return function(r){n.menuItems[t]=r}}},render:function(){var t=this,n=this.prefixCls,r=this.dropdownMenuColumnStyle;return g("div",null,[this.getShowOptions().map(function(a,o){return g("ul",{class:"".concat(n,"-menu"),key:o,style:r},[a.map(function(i){return t.getOption(i,o)})])})])}};function OTe(e,t){if(e===t)return!0;if(!e||!t)return!1;var n=e.length;if(t.length!==n)return!1;for(var r=0;r<n;r++)if(e[r]!==t[r])return!1;return!0}var PTe=OTe,TTe=PTe,xTe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},_Te={bottomLeft:{points:["tl","bl"],offset:[0,4],overflow:{adjustX:1,adjustY:1}},topLeft:{points:["bl","tl"],offset:[0,-4],overflow:{adjustX:1,adjustY:1}},bottomRight:{points:["tr","br"],offset:[0,4],overflow:{adjustX:1,adjustY:1}},topRight:{points:["br","tr"],offset:[0,-4],overflow:{adjustX:1,adjustY:1}}},ETe=G({name:"Cascader",mixins:[nt],inheritAttrs:!1,props:{value:u.array,defaultValue:u.array,options:u.array,popupVisible:u.looseBool,disabled:u.looseBool.def(!1),transitionName:u.string.def(""),popupClassName:u.string.def(""),popupStyle:u.object.def(function(){return{}}),popupPlacement:u.string.def("bottomLeft"),prefixCls:u.string.def("rc-cascader"),dropdownMenuColumnStyle:u.object,builtinPlacements:u.object.def(_Te),loadData:u.func,changeOnSelect:u.looseBool,expandTrigger:u.string.def("click"),fieldNames:u.object.def(function(){return{label:"label",value:"value",children:"children"}}),expandIcon:u.any,loadingIcon:u.any,getPopupContainer:u.func},data:function(){var t=[],n=this.value,r=this.defaultValue,a=this.popupVisible;return vt(this,"value")?t=n||[]:vt(this,"defaultValue")&&(t=r||[]),this.children=void 0,this.defaultFieldNames={label:"label",value:"value",children:"children"},{sPopupVisible:a,sActiveValue:t,sValue:t}},watch:{value:function(t,n){if(!TTe(t,n)){var r={sValue:t||[]};vt(this,"loadData")||(r.sActiveValue=t||[]),this.setState(r)}},popupVisible:function(t){this.setState({sPopupVisible:t})}},methods:{getPopupDOMNode:function(){return this.trigger.getPopupDomNode()},getFieldName:function(t){var n=this.defaultFieldNames,r=this.fieldNames;return r[t]||n[t]},getFieldNames:function(){return this.fieldNames},getCurrentLevelOptions:function(){var t=this,n=this.options,r=n===void 0?[]:n,a=this.sActiveValue,o=a===void 0?[]:a,i=om(r,function(l,s){return er(l[t.getFieldName("value")],o[s])},{childrenKeyName:this.getFieldName("children")});return i[i.length-2]?i[i.length-2][this.getFieldName("children")]:Je(r).filter(function(l){return!l.disabled})},getActiveOptions:function(t){var n=this;return om(this.options||[],function(r,a){return er(r[n.getFieldName("value")],t[a])},{childrenKeyName:this.getFieldName("children")})},setPopupVisible:function(t){vt(this,"popupVisible")||this.setState({sPopupVisible:t}),t&&!this.sPopupVisible&&this.setState({sActiveValue:this.sValue}),this.__emit("popupVisibleChange",t)},handleChange:function(t,n,r){var a=this;if(r.type!=="keydown"||r.keyCode===ze.ENTER){var o=t.map(function(i){return i[a.getFieldName("value")]});this.__emit("change",o,t),this.setPopupVisible(n.visible)}},handlePopupVisibleChange:function(t){this.setPopupVisible(t)},handleMenuSelect:function(t,n,r){var a=this.trigger.getRootDomNode();a&&a.focus&&a.focus();var o=this.changeOnSelect,i=this.loadData,l=this.expandTrigger;if(!(!t||t.disabled)){var s=this.sActiveValue;s=s.slice(0,n+1),s[n]=t[this.getFieldName("value")];var c=this.getActiveOptions(s);if(t.isLeaf===!1&&!t[this.getFieldName("children")]&&i){o&&this.handleChange(c,{visible:!0},r),this.setState({sActiveValue:s}),i(c);return}var d={};!t[this.getFieldName("children")]||!t[this.getFieldName("children")].length?(this.handleChange(c,{visible:!1},r),d.sValue=s):o&&(r.type==="click"||r.type==="keydown")&&(l==="hover"?this.handleChange(c,{visible:!1},r):this.handleChange(c,{visible:!0},r),d.sValue=s),d.sActiveValue=s,(vt(this,"value")||r.type==="keydown"&&r.keyCode!==ze.ENTER)&&delete d.sValue,this.setState(d)}},handleItemDoubleClick:function(){var t=this.$props.changeOnSelect;t&&this.setPopupVisible(!1)},handleKeyDown:function(t){var n=this,r=this.children;if(r){var a=nf(r).onKeydown;if(a){a(t);return}}var o=Je(this.sActiveValue),i=o.length-1<0?0:o.length-1,l=this.getCurrentLevelOptions(),s=l.map(function(p){return p[n.getFieldName("value")]}).findIndex(function(p){return er(o[i],p)});if(!(t.keyCode!==ze.DOWN&&t.keyCode!==ze.UP&&t.keyCode!==ze.LEFT&&t.keyCode!==ze.RIGHT&&t.keyCode!==ze.ENTER&&t.keyCode!==ze.SPACE&&t.keyCode!==ze.BACKSPACE&&t.keyCode!==ze.ESC&&t.keyCode!==ze.TAB)){if(!this.sPopupVisible&&t.keyCode!==ze.BACKSPACE&&t.keyCode!==ze.LEFT&&t.keyCode!==ze.RIGHT&&t.keyCode!==ze.ESC&&t.keyCode!==ze.TAB){this.setPopupVisible(!0);return}if(t.keyCode===ze.DOWN||t.keyCode===ze.UP){t.preventDefault();var c=s;c!==-1?t.keyCode===ze.DOWN?(c+=1,c=c>=l.length?0:c):(c-=1,c=c<0?l.length-1:c):c=0,o[i]=l[c][this.getFieldName("value")]}else if(t.keyCode===ze.LEFT||t.keyCode===ze.BACKSPACE)t.preventDefault(),o.splice(o.length-1,1);else if(t.keyCode===ze.RIGHT)t.preventDefault(),l[s]&&l[s][this.getFieldName("children")]&&o.push(l[s][this.getFieldName("children")][0][this.getFieldName("value")]);else if(t.keyCode===ze.ESC||t.keyCode===ze.TAB){this.setPopupVisible(!1);return}(!o||o.length===0)&&this.setPopupVisible(!1);var d=this.getActiveOptions(o),f=d[d.length-1];this.handleMenuSelect(f,d.length-1,t),this.__emit("keydown",t)}},saveTrigger:function(t){this.trigger=t}},render:function(){var t=this.$props,n=this.sActiveValue,r=this.handleMenuSelect,a=this.sPopupVisible,o=this.handlePopupVisibleChange,i=this.handleKeyDown,l=t.prefixCls,s=t.transitionName,c=t.popupClassName,d=t.options,f=d===void 0?[]:d,p=t.disabled,v=t.builtinPlacements,m=t.popupPlacement,y=xTe(t,["prefixCls","transitionName","popupClassName","options","disabled","builtinPlacements","popupPlacement"]),b=g("div",null,null),C="";if(f&&f.length>0){var S=We(this,"loadingIcon"),w=We(this,"expandIcon")||">",k=P(P(P({},t),this.$attrs),{fieldNames:this.getFieldNames(),defaultFieldNames:this.defaultFieldNames,activeValue:n,visible:a,loadingIcon:S,expandIcon:w,onSelect:r,onItemDoubleClick:this.handleItemDoubleClick});b=g($Te,k,null)}else C=" ".concat(l,"-menus-empty");var $=P(P(P({},y),this.$attrs),{disabled:p,popupPlacement:m,builtinPlacements:v,popupTransitionName:s,action:p?[]:["click"],popupVisible:p?!1:a,prefixCls:"".concat(l,"-menus"),popupClassName:c+C,popup:b,onPopupVisibleChange:o,ref:this.saveTrigger}),O=ht(this);return this.children=O,g(Ii,$,{default:function(){return[O&&Ot(O[0],{onKeydown:i,tabindex:p?void 0:0})]}})}}),MTe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M758.2 839.1C851.8 765.9 912 651.9 912 523.9 912 303 733.5 124.3 512.6 124 291.4 123.7 112 302.8 112 523.9c0 125.2 57.5 236.9 147.6 310.2 3.5 2.8 8.6 2.2 11.4-1.3l39.4-50.5c2.7-3.4 2.1-8.3-1.2-11.1-8.1-6.6-15.9-13.7-23.4-21.2a318.64 318.64 0 01-68.6-101.7C200.4 609 192 567.1 192 523.9s8.4-85.1 25.1-124.5c16.1-38.1 39.2-72.3 68.6-101.7 29.4-29.4 63.6-52.5 101.7-68.6C426.9 212.4 468.8 204 512 204s85.1 8.4 124.5 25.1c38.1 16.1 72.3 39.2 101.7 68.6 29.4 29.4 52.5 63.6 68.6 101.7 16.7 39.4 25.1 81.3 25.1 124.5s-8.4 85.1-25.1 124.5a318.64 318.64 0 01-68.6 101.7c-9.3 9.3-19.1 18-29.3 26L668.2 724a8 8 0 00-14.1 3l-39.6 162.2c-1.2 5 2.6 9.9 7.7 9.9l167 .8c6.7 0 10.5-7.7 6.3-12.9l-37.3-47.9z"}}]},name:"redo",theme:"outlined"},ITe=MTe;function oT(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){NTe(e,a,n[a])})}return e}function NTe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var rS=function(t,n){var r=oT({},t,n.attrs);return g(Et,oT({},r,{icon:ITe}),null)};rS.displayName="RedoOutlined";rS.inheritAttrs=!1;var ATe=rS,iT=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function lT(){}var DTe={options:{type:Array,default:[]},defaultValue:u.array,value:u.array,displayRender:u.func,transitionName:u.string.def("slide-up"),popupStyle:u.object.def(function(){return{}}),popupClassName:u.string,popupPlacement:u.oneOf(rt("bottomLeft","bottomRight","topLeft","topRight")).def("bottomLeft"),placeholder:u.string.def("Please select"),size:u.oneOf(rt("large","default","small")),disabled:u.looseBool.def(!1),allowClear:u.looseBool.def(!0),showSearch:{type:[Boolean,Object],default:void 0},notFoundContent:u.VNodeChild,loadData:u.func,expandTrigger:u.oneOf(rt("click","hover")),changeOnSelect:u.looseBool,prefixCls:u.string,inputPrefixCls:u.string,getPopupContainer:u.func,popupVisible:u.looseBool,fieldNames:{type:Object},autofocus:u.looseBool,suffixIcon:u.VNodeChild,showSearchRender:u.any,onChange:u.func,onPopupVisibleChange:u.func,onFocus:u.func,onBlur:u.func,onSearch:u.func,"onUpdate:value":u.func},RTe=50;function LTe(e,t,n){return t.some(function(r){return r[n.label].indexOf(e)>-1})}function FTe(e,t,n,r){function a(o){return o[r.label].indexOf(n)>-1}return e.findIndex(a)-t.findIndex(a)}function Yp(e){var t=e.fieldNames||{},n={children:t.children||"children",label:t.label||"label",value:t.value||"value"};return n}function b1(e,t){var n=arguments.length>2&&arguments[2]!==void 0?arguments[2]:[],r=Yp(t),a=[],o=r.children;return e.forEach(function(i){var l=n.concat(i);(t.changeOnSelect||!i[o]||!i[o].length)&&a.push(l),i[o]&&(a=a.concat(b1(i[o],t,l)))}),a}var BTe=function(t){var n=t.labels;return n.join(" / ")},VTe=G({name:"ACascader",mixins:[nt],inheritAttrs:!1,props:DTe,setup:function(){return{configProvider:ve("configProvider",St),localeData:ve("localeData",{}),cachedOptions:[],popupRef:void 0,input:void 0}},data:function(){var t=this.$props,n=t.value,r=t.defaultValue,a=t.popupVisible,o=t.showSearch,i=t.options;return{sValue:n||r||[],inputValue:"",inputFocused:!1,sPopupVisible:a,flattenOptions:o?b1(i,this.$props):void 0}},watch:{value:function(t){this.setState({sValue:t||[]})},popupVisible:function(t){this.setState({sPopupVisible:t})},options:function(t){this.showSearch&&this.setState({flattenOptions:b1(t,this.$props)})}},created:function(){ot("savePopupRef",this.savePopupRef)},methods:{savePopupRef:function(t){this.popupRef=t},highlightKeyword:function(t,n,r){return t.split(n).map(function(a,o){return o===0?a:[g("span",{class:"".concat(r,"-menu-item-keyword")},[n]),a]})},defaultRenderFilteredOption:function(t){var n=this,r=t.inputValue,a=t.path,o=t.prefixCls,i=t.names;return a.map(function(l,s){var c=l[i.label],d=c.indexOf(r)>-1?n.highlightKeyword(c,r,o):c;return s===0?d:[" / ",d]})},saveInput:function(t){this.input=t},handleChange:function(t,n){if(this.setState({inputValue:""}),n[0].__IS_FILTERED_OPTION){var r=t[0],a=n[0].path;this.setValue(r,a);return}this.setValue(t,n)},handlePopupVisibleChange:function(t){vt(this,"popupVisible")||this.setState(function(n){return{sPopupVisible:t,inputFocused:t,inputValue:t?n.inputValue:""}}),this.$emit("popupVisibleChange",t)},handleInputFocus:function(t){this.$emit("focus",t)},handleInputBlur:function(t){this.setState({inputFocused:!1}),this.$emit("blur",t)},handleInputClick:function(t){var n=this.inputFocused,r=this.sPopupVisible;(n||r)&&(t.stopPropagation(),t.nativeEvent&&t.nativeEvent.stopImmediatePropagation&&t.nativeEvent.stopImmediatePropagation())},handleKeyDown:function(t){(t.keyCode===ze.BACKSPACE||t.keyCode===ze.SPACE)&&t.stopPropagation()},handleInputChange:function(t){var n=t.target.value;this.setState({inputValue:n}),this.$emit("search",n)},setValue:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:[];vt(this,"value")||this.setState({sValue:t}),this.$emit("update:value",t),this.$emit("change",t,n)},getLabel:function(){var t=this.options,n=Yp(this.$props),r=We(this,"displayRender",{},!1)||BTe,a=this.sValue,o=Array.isArray(a[0])?a[0]:a,i=om(t,function(s,c){return s[n.value]===o[c]},{childrenKeyName:n.children}),l=i.map(function(s){return s[n.label]});return r({labels:l,selectedOptions:i})},clearSelection:function(t){t.preventDefault(),t.stopPropagation(),this.inputValue?this.setState({inputValue:""}):(this.setValue([]),this.handlePopupVisibleChange(!1))},generateFilteredOptions:function(t,n){var r,a=this.showSearch,o=this.notFoundContent,i=Yp(this.$props),l=a.filter,s=l===void 0?LTe:l,c=a.sort,d=c===void 0?FTe:c,f=a.limit,p=f===void 0?RTe:f,v=a.render||We(this,"showSearchRender")||this.defaultRenderFilteredOption,m=this.$data,y=m.flattenOptions,b=y===void 0?[]:y,C=m.inputValue,S;if(p>0){S=[];var w=0;b.some(function(k){var $=s(C,k,i);return $&&(S.push(k),w+=1),w>=p})}else on(typeof p!="number","Cascader","'limit' of showSearch in Cascader should be positive number or false."),S=b.filter(function(k){return s(C,k,i)});return S.sort(function(k,$){return d(k,$,C,i)}),S.length>0?S.map(function(k){var $;return $={__IS_FILTERED_OPTION:!0,path:k},V($,i.label,v({inputValue:C,path:k,prefixCls:t,names:i})),V($,i.value,k.map(function(O){return O[i.value]})),V($,"disabled",k.some(function(O){return!!O.disabled})),$}):[(r={},V(r,i.label,o||n("Cascader")),V(r,i.value,"ANT_CASCADER_NOT_FOUND"),V(r,"disabled",!0),r)]},focus:function(){this.input&&this.input.focus()},blur:function(){this.input&&this.input.blur()}},render:function(){var t,n,r,a=this.sPopupVisible,o=this.inputValue,i=this.configProvider,l=this.localeData,s=this.$data,c=s.sValue,d=s.inputFocused,f=Qe(this),p=We(this,"suffixIcon");p=Array.isArray(p)?p[0]:p;var v=i.getPopupContainer,m=f,y=m.prefixCls,b=m.inputPrefixCls,C=m.placeholder,S=C===void 0?l.placeholder:C,w=m.size,k=m.disabled,$=m.allowClear,O=m.showSearch,T=O===void 0?!1:O,_=m.notFoundContent,I=iT(m,["prefixCls","inputPrefixCls","placeholder","size","disabled","allowClear","showSearch","notFoundContent"]),L=If(this.$attrs),j=L.onEvents,F=L.extraAttrs,N=F.class,D=F.style,z=iT(F,["class","style"]),B=this.configProvider.getPrefixCls,M=this.configProvider.renderEmpty,E=B("cascader",y),K=B("input",b),W=Se((t={},V(t,"".concat(K,"-lg"),w==="large"),V(t,"".concat(K,"-sm"),w==="small"),t)),Y=$&&!k&&c.length>0||o?g(Yr,{class:"".concat(E,"-picker-clear"),onClick:this.clearSelection,key:"clear-icon"},null):null,q=Se((n={},V(n,"".concat(E,"-picker-arrow"),!0),V(n,"".concat(E,"-picker-arrow-expand"),a),n)),J=Se(N,"".concat(E,"-picker"),(r={},V(r,"".concat(E,"-picker-with-value"),o),V(r,"".concat(E,"-picker-disabled"),k),V(r,"".concat(E,"-picker-").concat(w),!!w),V(r,"".concat(E,"-picker-show-search"),!!T),V(r,"".concat(E,"-picker-focused"),d),r)),ne=bn(I,["popupStyle","options","popupPlacement","transitionName","displayRender","changeOnSelect","expandTrigger","popupVisible","getPopupContainer","loadData","popupClassName","filterOption","renderFilteredOption","sortFilteredOption","notFoundContent","defaultValue","fieldNames","onChange","onPopupVisibleChange","onFocus","onBlur","onSearch","onUpdate:value"]),oe=f.options,Q=Yp(this.$props);if(oe&&oe.length>0)o&&(oe=this.generateFilteredOptions(E,M));else{var ae;oe=[(ae={},V(ae,Q.label,_||M("Cascader")),V(ae,Q.value,"ANT_CASCADER_NOT_FOUND"),V(ae,"disabled",!0),ae)]}a?this.cachedOptions=oe:oe=this.cachedOptions;var de={},be=(oe||[]).length===1&&oe[0].value==="ANT_CASCADER_NOT_FOUND";be&&(de.height="auto");var Ee=T.matchInputWidth!==!1;Ee&&(o||be)&&this.input&&(de.width=Sn(this.input.input).offsetWidth+"px");var Pe=P(P(P({},z),ne),{prefixCls:K,placeholder:c&&c.length>0?void 0:S,value:o,disabled:k,readonly:!T,autocomplete:"off",class:"".concat(E,"-input ").concat(W),onFocus:this.handleInputFocus,onClick:T?this.handleInputClick:lT,onBlur:T?this.handleInputBlur:f.onBlur,onKeydown:this.handleKeyDown,onChange:T?this.handleInputChange:lT}),Be=ht(this),te=p&&(zn(p)?Ot(p,{class:"".concat(E,"-picker-arrow")}):g("span",{class:"".concat(E,"-picker-arrow")},[p]))||g(Rs,{class:q},null),ie=Be.length?Be:g("span",{class:J,style:D},[g("span",{class:"".concat(E,"-picker-label")},[this.getLabel()]),g(Jn,le(le({},Pe),{},{ref:this.saveInput}),null),Y,te]),ge=g(wi,null,null),ke=g("span",{class:"".concat(E,"-menu-item-loading-icon")},[g(ATe,{spin:!0},null)]),xe=f.getPopupContainer||v,Ie=P(P(P(P({},f),{getPopupContainer:xe,options:oe,prefixCls:E,value:c,popupVisible:a,dropdownMenuColumnStyle:de,expandIcon:ge,loadingIcon:ke}),j),{onPopupVisibleChange:this.handlePopupVisibleChange,onChange:this.handleChange});return g(ETe,Ie,{default:function(){return[ie]}})}}),zTe=kn(VTe),sT=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function uT(){}var ro=G({name:"ACheckbox",inheritAttrs:!1,__ANT_CHECKBOX:!0,props:{prefixCls:u.string,defaultChecked:u.looseBool,checked:u.looseBool,disabled:u.looseBool,isGroup:u.looseBool,value:u.any,name:u.string,id:u.string,indeterminate:u.looseBool,type:u.string.def("checkbox"),autofocus:u.looseBool,onChange:u.func,"onUpdate:checked":u.func},emits:["change","update:checked"],setup:function(){return{configProvider:ve("configProvider",St),checkboxGroupContext:ve("checkboxGroupContext",void 0)}},watch:{value:function(t,n){var r=this;Ne(function(){var a=r.checkboxGroupContext,o=a===void 0?{}:a;o.registerValue&&o.cancelValue&&(o.cancelValue(n),o.registerValue(t))})}},mounted:function(){var t=this.value,n=this.checkboxGroupContext,r=n===void 0?{}:n;r.registerValue&&r.registerValue(t),on(rf(this,"checked")||this.checkboxGroupContext||!rf(this,"value"),"Checkbox","`value` is not validate prop, do you mean `checked`?")},beforeUnmount:function(){var t=this.value,n=this.checkboxGroupContext,r=n===void 0?{}:n;r.cancelValue&&r.cancelValue(t)},methods:{handleChange:function(t){var n=t.target.checked;this.$emit("update:checked",n),this.$emit("change",t)},focus:function(){this.$refs.vcCheckbox.focus()},blur:function(){this.$refs.vcCheckbox.blur()}},render:function(){var t=this,n,r=Qe(this),a=this.checkboxGroupContext,o=this.$attrs,i=ht(this),l=r.indeterminate,s=r.prefixCls,c=sT(r,["indeterminate","prefixCls"]),d=this.configProvider.getPrefixCls,f=d("checkbox",s),p=o.onMouseenter,v=p===void 0?uT:p,m=o.onMouseleave,y=m===void 0?uT:m;o.onInput;var b=o.class,C=o.style,S=sT(o,["onMouseenter","onMouseleave","onInput","class","style"]),w=P(P(P({},c),{prefixCls:f}),S);a?(w.onChange=function(){for(var O=arguments.length,T=new Array(O),_=0;_<O;_++)T[_]=arguments[_];t.$emit.apply(t,["change"].concat(T)),a.toggleOption({label:i,value:r.value})},w.name=a.name,w.checked=a.sValue.indexOf(r.value)!==-1,w.disabled=r.disabled||a.disabled,w.indeterminate=l):w.onChange=this.handleChange;var k=Se((n={},V(n,"".concat(f,"-wrapper"),!0),V(n,"".concat(f,"-wrapper-checked"),w.checked),V(n,"".concat(f,"-wrapper-disabled"),w.disabled),n),b),$=Se(V({},"".concat(f,"-indeterminate"),l));return g("label",{class:k,style:C,onMouseenter:v,onMouseleave:y},[g(cD,le(le({},w),{},{class:$,ref:"vcCheckbox"}),null),i.length?g("span",null,[i]):null])}});function HTe(){}var im=G({name:"ACheckboxGroup",props:{name:u.string,prefixCls:u.string,defaultValue:{type:Array},value:{type:Array},options:{type:Array},disabled:u.looseBool,onChange:u.func},emits:["change","update:value"],setup:function(){return{configProvider:ve("configProvider",St)}},data:function(){var t=this.value,n=this.defaultValue;return{sValue:t||n||[],registeredValues:[]}},watch:{value:function(t){this.sValue=t||[]}},created:function(){ot("checkboxGroupContext",this)},methods:{getOptions:function(){var t=this.options,n=t===void 0?[]:t,r=this.$slots;return n.map(function(a){if(typeof a=="string")return{label:a,value:a};var o=a.label;return o===void 0&&r.label&&(o=r.label(a)),P(P({},a),{label:o})})},cancelValue:function(t){this.registeredValues=this.registeredValues.filter(function(n){return n!==t})},registerValue:function(t){this.registeredValues=[].concat(Je(this.registeredValues),[t])},toggleOption:function(t){var n=this.registeredValues,r=this.sValue.indexOf(t.value),a=Je(this.sValue);r===-1?a.push(t.value):a.splice(r,1),rf(this,"value")||(this.sValue=a);var o=this.getOptions(),i=a.filter(function(l){return n.indexOf(l)!==-1}).sort(function(l,s){var c=o.findIndex(function(f){return f.value===l}),d=o.findIndex(function(f){return f.value===s});return c-d});this.$emit("update:value",i),this.$emit("change",i)}},render:function(){var t=this.$props,n=this.$data,r=t.prefixCls,a=t.options,o=this.configProvider.getPrefixCls,i=o("checkbox",r),l=ht(this),s="".concat(i,"-group");return a&&a.length>0&&(l=this.getOptions().map(function(c){return g(ro,{prefixCls:i,key:c.value.toString(),disabled:"disabled"in c?c.disabled:t.disabled,indeterminate:c.indeterminate,value:c.value,checked:n.sValue.indexOf(c.value)!==-1,onChange:c.onChange||HTe,class:"".concat(s,"-item")},{default:function(){return[c.label]}})})),g("div",{class:s},[l])}});ro.Group=im;ro.install=function(e){return e.component(ro.name,ro),e.component(im.name,im),e};var jTe={actions:u.array,author:u.VNodeChild,avatar:u.VNodeChild,content:u.VNodeChild,prefixCls:u.string,datetime:u.VNodeChild},KTe=G({name:"AComment",props:jTe,slots:["actions","author","avatar","content","datetime"],setup:function(t,n){var r=n.slots,a=Wt("comment",t),o=a.prefixCls,i=a.direction,l=function(d,f){return g("div",{class:"".concat(d,"-nested")},[f])},s=function(d){if(!d||!d.length)return null;var f=d.map(function(p,v){return g("li",{key:"action-".concat(v)},[p])});return f};return function(){var c,d,f,p,v,m,y,b,C,S,w,k=o.value,$=(c=t.actions)!==null&&c!==void 0?c:(d=r.actions)===null||d===void 0?void 0:d.call(r),O=(f=t.author)!==null&&f!==void 0?f:(p=r.author)===null||p===void 0?void 0:p.call(r),T=(v=t.avatar)!==null&&v!==void 0?v:(m=r.avatar)===null||m===void 0?void 0:m.call(r),_=(y=t.content)!==null&&y!==void 0?y:(b=r.content)===null||b===void 0?void 0:b.call(r),I=(C=t.datetime)!==null&&C!==void 0?C:(S=r.datetime)===null||S===void 0?void 0:S.call(r),L=g("div",{class:"".concat(k,"-avatar")},[typeof T=="string"?g("img",{src:T,alt:"comment-avatar"},null):T]),j=$?g("ul",{class:"".concat(k,"-actions")},[s(Array.isArray($)?$:[$])]):null,F=g("div",{class:"".concat(k,"-content-author")},[O&&g("span",{class:"".concat(k,"-content-author-name")},[O]),I&&g("span",{class:"".concat(k,"-content-author-time")},[I])]),N=g("div",{class:"".concat(k,"-content")},[F,g("div",{class:"".concat(k,"-content-detail")},[_]),j]),D=g("div",{class:"".concat(k,"-inner")},[L,N]),z=Un((w=r.default)===null||w===void 0?void 0:w.call(r));return g("div",{class:[k,V({},"".concat(k,"-rtl"),i.value==="rtl")]},[D,z&&z.length?l(k,z):null])}}}),WTe=kn(KTe);function cT(e){this.changeYear(e)}function dT(){}var UTe={name:"MonthPanel",inheritAttrs:!1,mixins:[nt],props:{value:u.any,defaultValue:u.any,cellRender:u.any,contentRender:u.any,locale:u.any,rootPrefixCls:u.string,disabledDate:u.func,renderFooter:u.func,changeYear:u.func.def(dT)},data:function(){var t=this.value,n=this.defaultValue;return this.nextYear=cT.bind(this,1),this.previousYear=cT.bind(this,-1),{sValue:t||n}},watch:{value:function(t){this.setState({sValue:t})}},methods:{setAndSelectValue:function(t){this.setValue(t),this.__emit("select",t)},setValue:function(t){vt(this,"value")&&this.setState({sValue:t})}},render:function(){var t=this.sValue,n=this.cellRender,r=this.contentRender,a=this.locale,o=this.rootPrefixCls,i=this.disabledDate,l=this.renderFooter,s=t.year(),c="".concat(o,"-month-panel"),d=l&&l("month");return g("div",{class:c},[g("div",null,[g("div",{class:"".concat(c,"-header")},[g("a",{class:"".concat(c,"-prev-year-btn"),role:"button",onClick:this.previousYear,title:a.previousYear},null),g("a",{class:"".concat(c,"-year-select"),role:"button",onClick:this.$attrs.onYearPanelShow||dT,title:a.yearSelect},[g("span",{class:"".concat(c,"-year-select-content")},[s]),g("span",{class:"".concat(c,"-year-select-arrow")},[yt("x")])]),g("a",{class:"".concat(c,"-next-year-btn"),role:"button",onClick:this.nextYear,title:a.nextYear},null)]),g("div",{class:"".concat(c,"-body")},[g(uD,{disabledDate:i,onSelect:this.setAndSelectValue,locale:a,value:t,cellRender:n,contentRender:r,prefixCls:c},null)]),d&&g("div",{class:"".concat(c,"-footer")},[d])])])}},YTe=UTe,qTe=4,GTe=3;function fT(){}function hT(e){var t=this.sValue.clone();t.add(e,"year"),this.setState({sValue:t})}function XTe(e){var t=this.sValue.clone();t.year(e),t.month(this.sValue.month()),this.sValue=t,this.__emit("select",t)}var ZTe={name:"YearPanel",mixins:[nt],inheritAttrs:!1,props:{rootPrefixCls:u.string,value:u.object,defaultValue:u.object,locale:u.object,renderFooter:u.func},data:function(){return this.nextDecade=hT.bind(this,10),this.previousDecade=hT.bind(this,-10),{sValue:this.value||this.defaultValue}},watch:{value:function(t){this.sValue=t}},methods:{years:function(){for(var t=this.sValue,n=t.year(),r=parseInt(n/10,10)*10,a=r-1,o=[],i=0,l=0;l<qTe;l++){o[l]=[];for(var s=0;s<GTe;s++){var c=a+i,d=String(c);o[l][s]={content:d,year:c,title:d},i++}}return o}},render:function(){var t=this,n=this.sValue,r=this.locale,a=this.renderFooter,o=this.$attrs.onDecadePanelShow||fT,i=this.years(),l=n.year(),s=parseInt(l/10,10)*10,c=s+9,d="".concat(this.rootPrefixCls,"-year-panel"),f=i.map(function(v,m){var y=v.map(function(b){var C,S=(C={},V(C,"".concat(d,"-cell"),1),V(C,"".concat(d,"-selected-cell"),b.year===l),V(C,"".concat(d,"-last-decade-cell"),b.year<s),V(C,"".concat(d,"-next-decade-cell"),b.year>c),C),w=fT;return b.year<s?w=t.previousDecade:b.year>c?w=t.nextDecade:w=XTe.bind(t,b.year),g("td",{role:"gridcell",title:b.title,key:b.content,onClick:w,class:S},[g("a",{class:"".concat(d,"-year")},[b.content])])});return g("tr",{key:m,role:"row"},[y])}),p=a&&a("year");return g("div",{class:d},[g("div",null,[g("div",{class:"".concat(d,"-header")},[g("a",{class:"".concat(d,"-prev-decade-btn"),role:"button",onClick:this.previousDecade,title:r.previousDecade},null),g("a",{class:"".concat(d,"-decade-select"),role:"button",onClick:o,title:r.decadeSelect},[g("span",{class:"".concat(d,"-decade-select-content")},[s,yt("-"),c]),g("span",{class:"".concat(d,"-decade-select-arrow")},[yt("x")])]),g("a",{class:"".concat(d,"-next-decade-btn"),role:"button",onClick:this.nextDecade,title:r.nextDecade},null)]),g("div",{class:"".concat(d,"-body")},[g("table",{class:"".concat(d,"-table"),cellspacing:"0",role:"grid"},[g("tbody",{class:"".concat(d,"-tbody")},[f])])]),p&&g("div",{class:"".concat(d,"-footer")},[p])])])}},JTe=4,QTe=3;function exe(){}function pT(e){var t=this.sValue.clone();t.add(e,"years"),this.setState({sValue:t})}function txe(e,t){var n=this.sValue.clone();n.year(e),n.month(this.sValue.month()),this.__emit("select",n),t.preventDefault()}var nxe={name:"DecadePanel",mixins:[nt],inheritAttrs:!1,props:{locale:u.object,value:u.object,defaultValue:u.object,rootPrefixCls:u.string,renderFooter:u.func},data:function(){return this.nextCentury=pT.bind(this,100),this.previousCentury=pT.bind(this,-100),{sValue:this.value||this.defaultValue}},watch:{value:function(t){this.sValue=t}},render:function(){for(var t=this,n=this.sValue,r=this.$props,a=r.locale,o=r.renderFooter,i=n.year(),l=parseInt(i/100,10)*100,s=l-10,c=l+99,d=[],f=0,p="".concat(this.rootPrefixCls,"-decade-panel"),v=0;v<JTe;v++){d[v]=[];for(var m=0;m<QTe;m++){var y=s+f*10,b=s+f*10+9;d[v][m]={startDecade:y,endDecade:b},f++}}var C=o&&o("decade"),S=d.map(function(w,k){var $=w.map(function(O){var T,_=O.startDecade,I=O.endDecade,L=_<l,j=I>c,F=(T={},V(T,"".concat(p,"-cell"),1),V(T,"".concat(p,"-selected-cell"),_<=i&&i<=I),V(T,"".concat(p,"-last-century-cell"),L),V(T,"".concat(p,"-next-century-cell"),j),T),N="".concat(_,"-").concat(I),D=exe;return L?D=t.previousCentury:j?D=t.nextCentury:D=txe.bind(t,_),g("td",{key:_,onClick:D,role:"gridcell",class:F},[g("a",{class:"".concat(p,"-decade")},[N])])});return g("tr",{key:k,role:"row"},[$])});return g("div",{class:p},[g("div",{class:"".concat(p,"-header")},[g("a",{class:"".concat(p,"-prev-century-btn"),role:"button",onClick:this.previousCentury,title:a.previousCentury},null),g("div",{class:"".concat(p,"-century")},[l,yt("-"),c]),g("a",{class:"".concat(p,"-next-century-btn"),role:"button",onClick:this.nextCentury,title:a.nextCentury},null)]),g("div",{class:"".concat(p,"-body")},[g("table",{class:"".concat(p,"-table"),cellspacing:"0",role:"grid"},[g("tbody",{class:"".concat(p,"-tbody")},[S])])]),C&&g("div",{class:"".concat(p,"-footer")},[C])])}};function vT(){}function mT(e){var t=this.value.clone();t.add(e,"months"),this.__emit("valueChange",t)}function gT(e){var t=this.value.clone();t.add(e,"years"),this.__emit("valueChange",t)}function Nh(e,t){return e?t:null}var rxe={name:"CalendarHeader",inheritAttrs:!1,mixins:[nt],props:{prefixCls:u.string,value:u.object,showTimePicker:u.looseBool,locale:u.object,enablePrev:u.any.def(1),enableNext:u.any.def(1),disabledMonth:u.func,mode:u.any,monthCellRender:u.func,monthCellContentRender:u.func,renderFooter:u.func},data:function(){return this.nextMonth=mT.bind(this,1),this.previousMonth=mT.bind(this,-1),this.nextYear=gT.bind(this,1),this.previousYear=gT.bind(this,-1),{yearPanelReferer:null}},methods:{onMonthSelect:function(t){this.__emit("panelChange",t,"date"),this.$attrs.onMonthSelect?this.__emit("monthSelect",t):this.__emit("valueChange",t)},onYearSelect:function(t){var n=this.yearPanelReferer;this.setState({yearPanelReferer:null}),this.__emit("panelChange",t,n),this.__emit("valueChange",t)},onDecadeSelect:function(t){this.__emit("panelChange",t,"year"),this.__emit("valueChange",t)},changeYear:function(t){t>0?this.nextYear():this.previousYear()},monthYearElement:function(t){var n=this,r=this.$props,a=r.prefixCls,o=r.locale,i=r.value,l=i.localeData(),s=o.monthBeforeYear,c="".concat(a,"-").concat(s?"my-select":"ym-select"),d=t?" ".concat(a,"-time-status"):"",f=g("a",{class:"".concat(a,"-year-select").concat(d),role:"button",onClick:t?vT:function(){return n.showYearPanel("date")},title:t?null:o.yearSelect},[i.format(o.yearFormat)]),p=g("a",{class:"".concat(a,"-month-select").concat(d),role:"button",onClick:t?vT:this.showMonthPanel,title:t?null:o.monthSelect},[o.monthFormat?i.format(o.monthFormat):l.monthsShort(i)]),v;t&&(v=g("a",{class:"".concat(a,"-day-select").concat(d),role:"button"},[i.format(o.dayFormat)]));var m=[];return s?m=[p,v,f]:m=[f,p,v],g("span",{class:c},[m])},showMonthPanel:function(){this.__emit("panelChange",null,"month")},showYearPanel:function(t){this.setState({yearPanelReferer:t}),this.__emit("panelChange",null,"year")},showDecadePanel:function(){this.__emit("panelChange",null,"decade")}},render:function(){var t=this,n=Qe(this),r=n.prefixCls,a=n.locale,o=n.mode,i=n.value,l=n.showTimePicker,s=n.enableNext,c=n.enablePrev,d=n.disabledMonth,f=n.renderFooter,p=null;return o==="month"&&(p=g(YTe,{locale:a,value:i,rootPrefixCls:r,onSelect:this.onMonthSelect,onYearPanelShow:function(){return t.showYearPanel("month")},disabledDate:d,cellRender:n.monthCellRender,contentRender:n.monthCellContentRender,renderFooter:f,changeYear:this.changeYear},null)),o==="year"&&(p=g(ZTe,{locale:a,value:i,rootPrefixCls:r,onSelect:this.onYearSelect,onDecadePanelShow:this.showDecadePanel,renderFooter:f},null)),o==="decade"&&(p=g(nxe,{locale:a,value:i,rootPrefixCls:r,onSelect:this.onDecadeSelect,renderFooter:f},null)),g("div",{class:"".concat(r,"-header")},[g("div",{style:{position:"relative"}},[Nh(c&&!l,g("a",{class:"".concat(r,"-prev-year-btn"),role:"button",onClick:this.previousYear,title:a.previousYear},null)),Nh(c&&!l,g("a",{class:"".concat(r,"-prev-month-btn"),role:"button",onClick:this.previousMonth,title:a.previousMonth},null)),this.monthYearElement(l),Nh(s&&!l,g("a",{class:"".concat(r,"-next-month-btn"),onClick:this.nextMonth,title:a.nextMonth},null)),Nh(s&&!l,g("a",{class:"".concat(r,"-next-year-btn"),onClick:this.nextYear,title:a.nextYear},null))]),p])}},aS=rxe;function axe(){}var BD=function(t,n){var r=n.attrs,a=r.prefixCls,o=r.locale,i=r.value,l=r.timePicker,s=r.disabled,c=r.disabledDate,d=r.onToday,f=r.text,p=(!f&&l?o.now:f)||o.today,v=c&&!Xv(hl(i),c),m=v||s,y=m?"".concat(a,"-today-btn-disabled"):"";return g("a",{class:"".concat(a,"-today-btn ").concat(y),role:"button",onClick:m?axe:d,title:fPe(i)},[p])};BD.inheritAttrs=!1;var VD=BD;function oxe(){}var zD=function(t,n){var r=n.attrs,a=r.prefixCls,o=r.locale,i=r.okDisabled,l=r.onOk,s="".concat(a,"-ok-btn");return i&&(s+=" ".concat(a,"-ok-btn-disabled")),g("a",{class:s,role:"button",onClick:i?oxe:l},[o.ok])};zD.inheritAttrs=!1;var HD=zD;function eb(){}var jD=function(t,n){var r,a=n.attrs,o=a.prefixCls,i=a.locale,l=a.showTimePicker,s=a.timePickerDisabled,c=a.onCloseTimePicker,d=c===void 0?eb:c,f=a.onOpenTimePicker,p=f===void 0?eb:f,v=(r={},V(r,"".concat(o,"-time-picker-btn"),!0),V(r,"".concat(o,"-time-picker-btn-disabled"),s),r),m=eb;return s||(m=l?d:p),g("a",{class:v,role:"button",onClick:m},[l?i.dateSelect:i.timeSelect])};jD.inheritAttrs=!1;var KD=jD,ixe={name:"CalendarFooter",inheritAttrs:!1,mixins:[nt],props:{prefixCls:u.string,showDateInput:u.looseBool,disabledTime:u.any,timePicker:u.any,selectedValue:u.any,showOk:u.looseBool,value:u.object,renderFooter:u.func,defaultValue:u.object,locale:u.object,showToday:u.looseBool,disabledDate:u.func,showTimePicker:u.looseBool,okDisabled:u.looseBool,mode:u.string},methods:{onSelect:function(t){this.__emit("select",t)},getRootDOMNode:function(){return Sn(this)}},render:function(){var t=Qe(this),n=t.value,r=t.prefixCls,a=t.showOk,o=t.timePicker,i=t.renderFooter,l=t.showToday,s=t.mode,c=null,d=i&&i(s);if(l||o||d){var f,p=P(P(P({},t),this.$attrs),{value:n}),v=null;l&&(v=g(VD,le({key:"todayButton"},p),null)),delete p.value;var m=null;(a===!0||a!==!1&&!!o)&&(m=g(HD,le({key:"okButton"},p),null));var y=null;o&&(y=g(KD,le({key:"timePickerButton"},p),null));var b;(v||y||m||d)&&(b=g("span",{class:"".concat(r,"-footer-btn")},[d,v,y,m]));var C=(f={},V(f,"".concat(r,"-footer"),!0),V(f,"".concat(r,"-footer-show-ok"),!!m),f);c=g("div",{class:C},[b])}return c}},WD=ixe,tb,nb,Xo,lxe={name:"DateInput",inheritAttrs:!1,mixins:[nt],props:{prefixCls:u.string,timePicker:u.object,value:u.object,disabledTime:u.any,format:u.oneOfType([u.string,u.arrayOf(u.string),u.func]),locale:u.object,disabledDate:u.func,placeholder:u.string,selectedValue:u.object,clearIcon:u.any,inputMode:u.string,inputReadOnly:u.looseBool,disabled:u.looseBool,showClear:u.looseBool},data:function(){var t=this.selectedValue;return{str:qy(t,this.format),invalid:!1,hasFocus:!1}},watch:{selectedValue:function(){this.setState()},format:function(){this.setState()}},updated:function(){var t=this;this.$nextTick(function(){Xo&&t.$data.hasFocus&&!t.invalid&&!(tb===0&&nb===0)&&Xo.setSelectionRange(tb,nb)})},getInstance:function(){return Xo},methods:{getDerivedStateFromProps:function(t,n){var r={};Xo&&(tb=Xo.selectionStart,nb=Xo.selectionEnd);var a=t.selectedValue;return n.hasFocus||(r={str:qy(a,this.format),invalid:!1}),r},onClear:function(){this.setState({str:""}),this.__emit("clear",null)},onInputChange:function(t){var n=t.target,r=n.value,a=n.composing,o=this.str,i=o===void 0?"":o;if(!(t.isComposing||a||i===r)){var l=this.$props,s=l.disabledDate,c=l.format,d=l.selectedValue;if(!r){this.__emit("change",null),this.setState({invalid:!1,str:r});return}var f=Ue(r,c,!0);if(!f.isValid()){this.setState({invalid:!0,str:r});return}var p=this.value.clone();if(p.year(f.year()).month(f.month()).date(f.date()).hour(f.hour()).minute(f.minute()).second(f.second()),!p||s&&s(p)){this.setState({invalid:!0,str:r});return}(d!==p||d&&p&&!d.isSame(p))&&(this.setState({invalid:!1,str:r}),this.__emit("change",p))}},onFocus:function(){this.setState({hasFocus:!0})},onBlur:function(){this.setState(function(t,n){return{hasFocus:!1,str:qy(n.value,n.format)}})},onKeyDown:function(t){var n=t.keyCode,r=this.$props,a=r.value,o=r.disabledDate;if(n===ze.ENTER){var i=!o||!o(a);i&&this.__emit("select",a.clone()),t.preventDefault()}},getRootDOMNode:function(){return Sn(this)},focus:function(){Xo&&Xo.focus()},saveDateInput:function(t){Xo=t}},render:function(){var t=this.invalid,n=this.str,r=this.locale,a=this.prefixCls,o=this.placeholder,i=this.disabled,l=this.showClear,s=this.inputMode,c=this.inputReadOnly,d=We(this,"clearIcon"),f=t?"".concat(a,"-input-invalid"):"";return g("div",{class:"".concat(a,"-input-wrap")},[g("div",{class:"".concat(a,"-date-input-wrap")},[at(g("input",{ref:this.saveDateInput,class:"".concat(a,"-input ").concat(f),value:n,disabled:i,placeholder:o,onInput:this.onInputChange,onChange:this.onInputChange,onKeydown:this.onKeyDown,onFocus:this.onFocus,onBlur:this.onBlur,inputMode:s,readonly:c},null),[[Mi]])]),l?g("a",{role:"button",title:r.clear,onClick:this.onClear},[d||g("span",{class:"".concat(a,"-clear-btn")},null)]):null])}},qp=lxe;function UD(e){return e.clone().startOf("month")}function YD(e){return e.clone().endOf("month")}function ei(e,t,n){return e.clone().add(t,n)}function sxe(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],t=arguments.length>1?arguments[1]:void 0,n=arguments.length>2?arguments[2]:void 0;return e.some(function(r){return r.isSame(t,n)})}var Ah=function(t){return Ue.isMoment(t)&&t.isValid()?t:!1},uxe=G({name:"Calendar",mixins:[nt,$g,U2],inheritAttrs:!1,props:{locale:u.object.def(Nf),format:u.oneOfType([u.string,u.arrayOf(u.string),u.func]),visible:u.looseBool.def(!0),prefixCls:u.string.def("rc-calendar"),defaultValue:u.object,value:u.object,selectedValue:u.object,defaultSelectedValue:u.object,mode:u.oneOf(["time","date","month","year","decade"]),showDateInput:u.looseBool.def(!0),showWeekNumber:u.looseBool,showToday:u.looseBool.def(!0),showOk:u.looseBool,timePicker:u.any,dateInputPlaceholder:u.any,disabledDate:u.func,disabledTime:u.any,dateRender:u.func,renderFooter:u.func.def(function(){return null}),renderSidebar:u.func.def(function(){return null}),clearIcon:u.any,focusablePanel:u.looseBool.def(!0),inputMode:u.string,inputReadOnly:u.looseBool,monthCellRender:u.func,monthCellContentRender:u.func},data:function(){var t=this.$props;return{sMode:this.mode||"date",sValue:Ah(t.value)||Ah(t.defaultValue)||Ue(),sSelectedValue:t.selectedValue||t.defaultSelectedValue}},watch:{mode:function(t){this.setState({sMode:t})},value:function(t){this.setState({sValue:Ah(t)||Ah(this.defaultValue)||Zv(this.sValue)})},selectedValue:function(t){this.setState({sSelectedValue:t})}},mounted:function(){var t=this;this.$nextTick(function(){t.saveFocusElement(qp.getInstance())})},methods:{onPanelChange:function(t,n){var r=this.sValue;vt(this,"mode")||this.setState({sMode:n}),this.__emit("panelChange",t||r,n)},onKeyDown:function(t){if(t.target.nodeName.toLowerCase()!=="input"){var n=t.keyCode,r=t.ctrlKey||t.metaKey,a=this.disabledDate,o=this.sValue;switch(n){case ze.DOWN:return this.goTime(1,"weeks"),t.preventDefault(),1;case ze.UP:return this.goTime(-1,"weeks"),t.preventDefault(),1;case ze.LEFT:return r?this.goTime(-1,"years"):this.goTime(-1,"days"),t.preventDefault(),1;case ze.RIGHT:return r?this.goTime(1,"years"):this.goTime(1,"days"),t.preventDefault(),1;case ze.HOME:return this.setValue(UD(o)),t.preventDefault(),1;case ze.END:return this.setValue(YD(o)),t.preventDefault(),1;case ze.PAGE_DOWN:return this.goTime(1,"month"),t.preventDefault(),1;case ze.PAGE_UP:return this.goTime(-1,"month"),t.preventDefault(),1;case ze.ENTER:return(!a||!a(o))&&this.onSelect(o,{source:"keyboard"}),t.preventDefault(),1;default:return this.__emit("keydown",t),1}}},onClear:function(){this.onSelect(null),this.__emit("clear")},onOk:function(){var t=this.sSelectedValue;this.isAllowedDate(t)&&this.__emit("ok",t)},onDateInputChange:function(t){this.onSelect(t,{source:"dateInput"})},onDateInputSelect:function(t){this.onSelect(t,{source:"dateInputSelect"})},onDateTableSelect:function(t){var n=this.timePicker,r=this.sSelectedValue;if(!r&&n){var a=Qe(n),o=a.defaultValue;o&&Sa(o,t)}this.onSelect(t)},onToday:function(){var t=this.sValue,n=hl(t);this.onSelect(n,{source:"todayButton"})},onBlur:function(t){var n=this;setTimeout(function(){var r=qp.getInstance(),a=n.rootInstance;!a||a.contains(document.activeElement)||r&&r.contains(document.activeElement)||n.__emit("blur",t)},0)},getRootDOMNode:function(){return Sn(this)},openTimePicker:function(){this.onPanelChange(null,"time")},closeTimePicker:function(){this.onPanelChange(null,"date")},goTime:function(t,n){this.setValue(ei(this.sValue,t,n))}},render:function(){var t=this.locale,n=this.prefixCls,r=this.disabledDate,a=this.dateInputPlaceholder,o=this.timePicker,i=this.disabledTime,l=this.showDateInput,s=this.sValue,c=this.sSelectedValue,d=this.sMode,f=this.renderFooter,p=this.inputMode,v=this.inputReadOnly,m=this.monthCellRender,y=this.monthCellContentRender,b=this.$props,C=We(this,"clearIcon"),S=d==="time",w=S&&i&&o?K2(c,i):null,k=null;if(o&&S){var $=Qe(o),O=P(P(P({showHour:!0,showSecond:!0,showMinute:!0},$),w),{value:c,disabledTime:i,onChange:this.onDateInputChange});$.defaultValue!==void 0&&(O.defaultOpenValue=$.defaultValue),k=Ot(o,O)}var T=l?g(qp,{format:this.getFormat(),key:"date-input",value:s,locale:t,placeholder:a,showClear:!0,disabledTime:i,disabledDate:r,onClear:this.onClear,prefixCls:n,selectedValue:c,onChange:this.onDateInputChange,clearIcon:C,onSelect:this.onDateInputSelect,inputMode:p,inputReadOnly:v},null):null,_=[];return b.renderSidebar&&_.push(b.renderSidebar()),_.push(g("div",{class:"".concat(n,"-panel"),key:"panel"},[T,g("div",{tabindex:b.focusablePanel?0:void 0,class:"".concat(n,"-date-panel")},[g(aS,{locale:t,mode:d,value:s,onValueChange:this.setValue,onPanelChange:this.onPanelChange,renderFooter:f,showTimePicker:S,prefixCls:n,monthCellRender:m,monthCellContentRender:y},null),o&&S?g("div",{class:"".concat(n,"-time-picker")},[g("div",{class:"".concat(n,"-time-picker-panel")},[k])]):null,g("div",{class:"".concat(n,"-body")},[g(W2,{locale:t,value:s,selectedValue:c,prefixCls:n,dateRender:b.dateRender,onSelect:this.onDateTableSelect,disabledDate:r,showWeekNumber:b.showWeekNumber},null)]),g(WD,{showOk:b.showOk,mode:d,renderFooter:b.renderFooter,locale:t,prefixCls:n,showToday:b.showToday,disabledTime:i,showTimePicker:S,showDateInput:b.showDateInput,timePicker:o,selectedValue:c,timePickerDisabled:!c,value:s,disabledDate:r,okDisabled:b.showOk!==!1&&(!c||!this.isAllowedDate(c)),onOk:this.onOk,onSelect:this.onSelect,onToday:this.onToday,onOpenTimePicker:this.openTimePicker,onCloseTimePicker:this.closeTimePicker},null)])])),this.renderRoot({children:_,class:b.showWeekNumber?"".concat(n,"-week-number"):""})}}),qD=uxe,cxe=G({name:"MonthCalendar",mixins:[nt,$g,U2],inheritAttrs:!1,props:{locale:u.object.def(Nf),format:u.string,visible:u.looseBool.def(!0),prefixCls:u.string.def("rc-calendar"),monthCellRender:u.func,value:u.object,defaultValue:u.object,selectedValue:u.object,defaultSelectedValue:u.object,disabledDate:u.func,monthCellContentRender:u.func,renderFooter:u.func.def(function(){return null}),renderSidebar:u.func.def(function(){return null})},data:function(){var t=this.$props;return{mode:"month",sValue:t.value||t.defaultValue||Ue(),sSelectedValue:t.selectedValue||t.defaultSelectedValue}},methods:{onKeyDown:function(t){var n=t.keyCode,r=t.ctrlKey||t.metaKey,a=this.sValue,o=this.disabledDate,i=a;switch(n){case ze.DOWN:i=a.clone(),i.add(3,"months");break;case ze.UP:i=a.clone(),i.add(-3,"months");break;case ze.LEFT:i=a.clone(),r?i.add(-1,"years"):i.add(-1,"months");break;case ze.RIGHT:i=a.clone(),r?i.add(1,"years"):i.add(1,"months");break;case ze.ENTER:return(!o||!o(a))&&this.onSelect(a),t.preventDefault(),1;default:return}if(i!==a)return this.setValue(i),t.preventDefault(),1},handlePanelChange:function(t,n){n!=="date"&&this.setState({mode:n})}},render:function(){var t=this.mode,n=this.sValue,r=this.$props,a=this.$slots,o=r.prefixCls,i=r.locale,l=r.disabledDate,s=this.monthCellRender||a.monthCellRender,c=this.monthCellContentRender||a.monthCellContentRender,d=this.renderFooter||a.renderFooter,f=g("div",{class:"".concat(o,"-month-calendar-content")},[g("div",{class:"".concat(o,"-month-header-wrap")},[g(aS,{prefixCls:o,mode:t,value:n,locale:i,disabledMonth:l,monthCellRender:s,monthCellContentRender:c,onMonthSelect:this.onSelect,onValueChange:this.setValue,onPanelChange:this.handlePanelChange},null)]),g(WD,{prefixCls:o,renderFooter:d},null)]);return this.renderRoot({class:"".concat(r.prefixCls,"-month-calendar"),children:f})}}),GD=cxe;function id(){var e=[].slice.call(arguments,0);return e.length===1?e[0]:function(){for(var n=0;n<e.length;n++)e[n]&&e[n].apply&&e[n].apply(this,arguments)}}var Dh={adjustX:1,adjustY:1},Rh=[0,0],dxe={bottomLeft:{points:["tl","tl"],overflow:Dh,offset:[0,-3],targetOffset:Rh},bottomRight:{points:["tr","tr"],overflow:Dh,offset:[0,-3],targetOffset:Rh},topRight:{points:["br","br"],overflow:Dh,offset:[0,3],targetOffset:Rh},topLeft:{points:["bl","bl"],overflow:Dh,offset:[0,3],targetOffset:Rh}},fxe=dxe,yT={validator:function(t){return Array.isArray(t)?t.length===0||t.findIndex(function(n){return!ul(n)&&!Ue.isMoment(n)})===-1:ul(t)||Ue.isMoment(t)}};function hxe(e,t){this[e]=t}var pxe=G({name:"Picker",mixins:[nt],inheritAttrs:!1,props:{animation:u.oneOfType([u.func,u.string]),disabled:u.looseBool,transitionName:u.string,format:u.oneOfType([u.string,u.array,u.func]),getCalendarContainer:u.func,calendar:u.any,open:u.looseBool,defaultOpen:u.looseBool.def(!1),prefixCls:u.string.def("rc-calendar-picker"),placement:u.any.def("bottomLeft"),value:yT,defaultValue:yT,align:u.object.def(function(){return{}}),dropdownClassName:u.string,dateRender:u.func,children:u.func},data:function(){var t=this.$props;this.calendarElement=null,this.saveCalendarRef=hxe.bind(this,"calendarInstance");var n;vt(this,"open")?n=t.open:n=t.defaultOpen;var r=t.value||t.defaultValue;return{sOpen:n,sValue:r}},watch:{value:function(t){this.setState({sValue:t})},open:function(t){this.setState({sOpen:t})}},mounted:function(){this.preSOpen=this.sOpen},updated:function(){!this.preSOpen&&this.sOpen&&(this.focusTimeout=setTimeout(this.focusCalendar,100)),this.preSOpen=this.sOpen},beforeUnmount:function(){clearTimeout(this.focusTimeout)},methods:{onCalendarKeyDown:function(t){t.keyCode===ze.ESC&&(t.stopPropagation(),this.closeCalendar(this.focus))},onCalendarSelect:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},r=this.$props;vt(this,"value")||this.setState({sValue:t});var a=Qe(r.calendar);(n.source==="keyboard"||n.source==="dateInputSelect"||!a.timePicker&&n.source!=="dateInput"||n.source==="todayButton")&&this.closeCalendar(this.focus),this.__emit("change",t)},onKeyDown:function(t){!this.sOpen&&(t.keyCode===ze.DOWN||t.keyCode===ze.ENTER)&&(this.openCalendar(),t.preventDefault())},onCalendarOk:function(){this.closeCalendar(this.focus)},onCalendarClear:function(){this.closeCalendar(this.focus)},onCalendarBlur:function(){this.setOpen(!1)},onVisibleChange:function(t){this.setOpen(t)},getCalendarElement:function(){var t=this.$props,n=Qe(t.calendar),r=nf(t.calendar),a=this.sValue,o=a,i={ref:this.saveCalendarRef,defaultValue:o||n.defaultValue,selectedValue:a,onKeydown:this.onCalendarKeyDown,onOk:id(r.onOk,this.onCalendarOk),onSelect:id(r.onSelect,this.onCalendarSelect),onClear:id(r.onClear,this.onCalendarClear),onBlur:id(r.onBlur,this.onCalendarBlur)};return Ot(t.calendar,i)},setOpen:function(t,n){this.sOpen!==t&&(vt(this,"open")||this.setState({sOpen:t},n),this.__emit("openChange",t))},openCalendar:function(t){this.setOpen(!0,t)},closeCalendar:function(t){this.setOpen(!1,t)},focus:function(){this.sOpen||Sn(this).focus()},focusCalendar:function(){this.sOpen&&!!this.calendarInstance&&this.calendarInstance.focus()}},render:function(){var t=this,n=Qe(this),r=n.prefixCls,a=n.placement,o=n.getCalendarContainer,i=n.align,l=n.animation,s=n.disabled,c=n.dropdownClassName,d=n.transitionName,f=this.sValue,p=this.sOpen,v={value:f,open:p},m=this.$slots.default(v);return(this.sOpen||!this.calendarElement)&&(this.calendarElement=this.getCalendarElement()),g(Ii,{popupAlign:i,builtinPlacements:fxe,popupPlacement:a,action:s&&!p?[]:["click"],destroyPopupOnHide:!0,getPopupContainer:o,popupStyle:this.$attrs.style||{},popupAnimation:l,popupTransitionName:d,popupVisible:p,onPopupVisibleChange:this.onVisibleChange,prefixCls:r,popupClassName:c,popup:this.calendarElement},{default:function(){return[Ot(m,{onKeydown:t.onKeyDown})]}})}}),oS=pxe,vxe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M880 184H712v-64c0-4.4-3.6-8-8-8h-56c-4.4 0-8 3.6-8 8v64H384v-64c0-4.4-3.6-8-8-8h-56c-4.4 0-8 3.6-8 8v64H144c-17.7 0-32 14.3-32 32v664c0 17.7 14.3 32 32 32h736c17.7 0 32-14.3 32-32V216c0-17.7-14.3-32-32-32zm-40 656H184V460h656v380zM184 392V256h128v48c0 4.4 3.6 8 8 8h56c4.4 0 8-3.6 8-8v-48h256v48c0 4.4 3.6 8 8 8h56c4.4 0 8-3.6 8-8v-48h128v136H184z"}}]},name:"calendar",theme:"outlined"},mxe=vxe;function bT(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){gxe(e,a,n[a])})}return e}function gxe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var iS=function(t,n){var r=bT({},t,n.attrs);return g(Et,bT({},r,{icon:mxe}),null)};iS.displayName="CalendarOutlined";iS.inheritAttrs=!1;var XD=iS;function Cu(e,t){return e?(Array.isArray(t)&&(t=t[0]),typeof t=="function"?t(e):e.format(t)):""}function ZD(e,t,n){return G({name:n,mixins:[nt],inheritAttrs:!1,props:P(P({},t),{allowClear:u.looseBool.def(!0),showToday:u.looseBool.def(!0)}),setup:function(){return{configProvider:ve("configProvider",St),input:void 0,sPrefixCls:void 0}},data:function(){var a=this.value||this.defaultValue;return{sValue:a,showDate:a,sOpen:!!this.open}},watch:{open:function(a){var o=Qe(this),i={};i.sOpen=a,"value"in o&&!a&&o.value!==this.showDate&&(i.showDate=o.value),this.setState(i)},value:function(a){var o={};o.sValue=a,a!==this.sValue&&(o.showDate=a),this.setState(o)},sOpen:function(a,o){var i=this;Ne(function(){!vt(i,"open")&&o&&!a&&i.focus()})}},methods:{saveInput:function(a){this.input=a},clearSelection:function(a){a.preventDefault(),a.stopPropagation(),this.handleChange(null)},handleChange:function(a){vt(this,"value")||this.setState({sValue:a,showDate:a}),this.$emit("change",a,Cu(a,this.format))},handleCalendarChange:function(a){this.setState({showDate:a})},handleOpenChange:function(a){var o=Qe(this);"open"in o||this.setState({sOpen:a}),this.$emit("openChange",a)},focus:function(){var a;(a=this.input)===null||a===void 0||a.focus()},blur:function(){var a;(a=this.input)===null||a===void 0||a.blur()},renderFooter:function(){var a=this.$slots,o=this.sPrefixCls,i=this.renderExtraFooter||a.renderExtraFooter;return i?g("div",{class:"".concat(o,"-footer-extra")},[typeof i=="function"?i.apply(void 0,arguments):i]):null},onMouseEnter:function(a){this.$emit("mouseenter",a)},onMouseLeave:function(a){this.$emit("mouseleave",a)}},render:function(){var a,o=this,i=this.$slots,l=this.$data,s=l.sValue,c=l.showDate,d=l.sOpen,f=We(this,"suffixIcon");f=Array.isArray(f)?f[0]:f;var p=MC(P(P({},Qe(this)),this.$attrs),["onChange"]),v=p.prefixCls,m=p.locale,y=p.localeCode,b=p.inputReadOnly,C=this.configProvider.getPrefixCls,S=C("calendar",v);this.sPrefixCls=S;var w=p.dateRender||i.dateRender,k=p.monthCellContentRender||i.monthCellContentRender,$="placeholder"in p?p.placeholder:m.lang.placeholder,O=p.showTime?p.disabledTime:null,T=Se((a={},V(a,"".concat(S,"-time"),p.showTime),V(a,"".concat(S,"-month"),GD===e),a));s&&y&&s.locale(y);var _={},I={},L={};p.showTime?(I.onSelect=this.handleChange,L.minWidth="195px"):_.onChange=this.handleChange,"mode"in p&&(I.mode=p.mode);var j=P(P({},I),{disabledDate:p.disabledDate,disabledTime:O,locale:m.lang,timePicker:p.timePicker,defaultValue:p.defaultPickerValue||Hr(Ue)(),dateInputPlaceholder:$,prefixCls:S,dateRender:w,format:p.format,showToday:p.showToday,monthCellContentRender:k,renderFooter:this.renderFooter,value:c,inputReadOnly:b,onOk:p.onOk,onPanelChange:p.onPanelChange,onChange:this.handleCalendarChange,class:T}),F=g(e,j,i),N=!p.disabled&&p.allowClear&&s?g(Yr,{class:"".concat(S,"-picker-clear"),onClick:this.clearSelection},null):null,D=f&&(zn(f)?Ot(f,{class:"".concat(S,"-picker-icon")}):g("span",{class:"".concat(S,"-picker-icon")},[f]))||g(XD,{class:"".concat(S,"-picker-icon")},null),z=function(E){var K=E.value;return g("div",null,[g("input",{ref:o.saveInput,disabled:p.disabled,onFocus:p.onFocus,onBlur:p.onBlur,readonly:!0,value:Cu(K,o.format),placeholder:$,class:p.pickerInputClass,tabindex:p.tabindex,name:o.name},null),N,D])},B=P(P(P({},p),_),{calendar:F,value:s,prefixCls:"".concat(S,"-picker-container"),open:d,onOpenChange:this.handleOpenChange,style:p.popupStyle});return g("span",le(le({id:p.id,class:Se(p.class,p.pickerClass),style:P(P({},L),p.style)},eg(this.$attrs)),{},{onMouseenter:this.onMouseEnter,onMouseleave:this.onMouseLeave}),[g(oS,B,P(P({},i),{default:l0e(i.default)?z:i.default}))])}})}var yxe={inheritAttrs:!1,name:"Header",mixins:[nt],props:{format:u.string,prefixCls:u.string,disabledDate:u.func,placeholder:u.string,clearText:u.string,value:u.object,inputReadOnly:u.looseBool.def(!1),hourOptions:u.array,minuteOptions:u.array,secondOptions:u.array,disabledHours:u.func,disabledMinutes:u.func,disabledSeconds:u.func,allowEmpty:u.looseBool,defaultOpenValue:u.object,currentSelectPanel:u.string,focusOnOpen:u.looseBool,clearIcon:u.any},data:function(){var t=this.value,n=this.format;return{str:t&&t.format(n)||"",invalid:!1}},mounted:function(){var t=this;if(this.focusOnOpen){var n=window.requestAnimationFrame||window.setTimeout;n(function(){t.refInput.focus(),t.refInput.select()})}},watch:{value:function(t){var n=this;this.$nextTick(function(){n.setState({str:t&&t.format(n.format)||"",invalid:!1})})}},methods:{onInputChange:function(t){var n=t.target,r=n.value,a=n.composing,o=this.str,i=o===void 0?"":o;if(!(t.isComposing||a||i===r)){this.setState({str:r});var l=this.format,s=this.hourOptions,c=this.minuteOptions,d=this.secondOptions,f=this.disabledHours,p=this.disabledMinutes,v=this.disabledSeconds,m=this.value;if(r){var y=this.getProtoValue().clone(),b=Ue(r,l,!0);if(!b.isValid()){this.setState({invalid:!0});return}if(y.hour(b.hour()).minute(b.minute()).second(b.second()),s.indexOf(y.hour())<0||c.indexOf(y.minute())<0||d.indexOf(y.second())<0){this.setState({invalid:!0});return}var C=f(),S=p(y.hour()),w=v(y.hour(),y.minute());if(C&&C.indexOf(y.hour())>=0||S&&S.indexOf(y.minute())>=0||w&&w.indexOf(y.second())>=0){this.setState({invalid:!0});return}if(m){if(m.hour()!==y.hour()||m.minute()!==y.minute()||m.second()!==y.second()){var k=m.clone();k.hour(y.hour()),k.minute(y.minute()),k.second(y.second()),this.__emit("change",k)}}else m!==y&&this.__emit("change",y)}else this.__emit("change",null);this.setState({invalid:!1})}},onKeyDown:function(t){t.keyCode===27&&this.__emit("esc"),this.__emit("keydown",t)},getProtoValue:function(){return this.value||this.defaultOpenValue},getInput:function(){var t=this,n=this.prefixCls,r=this.placeholder,a=this.inputReadOnly,o=this.invalid,i=this.str,l=o?"".concat(n,"-input-invalid"):"";return at(g("input",{class:"".concat(n,"-input ").concat(l),ref:function(c){t.refInput=c},onKeydown:this.onKeyDown,value:i,placeholder:r,onInput:this.onInputChange,onChange:this.onInputChange,readonly:!!a},null),[[Mi]])}},render:function(){var t=this.prefixCls;return g("div",{class:"".concat(t,"-input-wrap")},[this.getInput()])}},bxe=yxe;function Cxe(){}var wxe=function e(t,n,r){if(r<=0){requestAnimationFrame(function(){t.scrollTop=n});return}var a=n-t.scrollTop,o=a/r*10;requestAnimationFrame(function(){t.scrollTop+=o,t.scrollTop!==n&&e(t,n,r-10)})},Sxe={name:"Select",mixins:[nt],inheritAttrs:!1,props:{prefixCls:u.string,options:u.array,selectedIndex:u.number,type:u.string},data:function(){return{active:!1}},mounted:function(){var t=this;this.$nextTick(function(){t.scrollToSelected(0)})},watch:{selectedIndex:function(){var t=this;this.$nextTick(function(){t.scrollToSelected(120)})}},methods:{onSelect:function(t){var n=this.type;this.__emit("select",n,t)},onEsc:function(t){this.__emit("esc",t)},getOptions:function(){var t=this,n=this.options,r=this.selectedIndex,a=this.prefixCls;return n.map(function(o,i){var l,s=Se((l={},V(l,"".concat(a,"-select-option-selected"),r===i),V(l,"".concat(a,"-select-option-disabled"),o.disabled),l)),c=o.disabled?Cxe:function(){t.onSelect(o.value)},d=function(p){p.keyCode===13?c():p.keyCode===27&&t.onEsc()};return g("li",{role:"button",onClick:c,class:s,key:i,disabled:o.disabled,tabindex:"0",onKeydown:d},[o.value])})},handleMouseEnter:function(t){this.setState({active:!0}),this.__emit("mouseenter",t)},handleMouseLeave:function(){this.setState({active:!1})},scrollToSelected:function(t){var n=Sn(this),r=this.$refs.list;if(!!r){var a=this.selectedIndex;a<0&&(a=0);var o=r.children[a],i=o.offsetTop;wxe(n,i,t)}}},render:function(){var t,n=this.prefixCls,r=this.options,a=this.active;if(r.length===0)return null;var o=(t={},V(t,"".concat(n,"-select"),1),V(t,"".concat(n,"-select-active"),a),t);return g("div",{class:o,onMouseenter:this.handleMouseEnter,onMouseleave:this.handleMouseLeave},[g("ul",{ref:"list"},[this.getOptions()])])}},Lh=Sxe,rb=function(t,n){var r="".concat(t);t<10&&(r="0".concat(t));var a=!1;return n&&n.indexOf(t)>=0&&(a=!0),{value:r,disabled:a}},kxe={inheritAttrs:!1,mixins:[nt],name:"Combobox",props:{format:u.string,defaultOpenValue:u.object,prefixCls:u.string,value:u.object,showHour:u.looseBool,showMinute:u.looseBool,showSecond:u.looseBool,hourOptions:u.array,minuteOptions:u.array,secondOptions:u.array,disabledHours:u.func,disabledMinutes:u.func,disabledSeconds:u.func,use12Hours:u.looseBool,isAM:u.looseBool},methods:{onItemChange:function(t,n){var r=this.defaultOpenValue,a=this.use12Hours,o=this.value,i=this.isAM,l=(o||r).clone();if(t==="hour")a?i?l.hour(+n%12):l.hour(+n%12+12):l.hour(+n);else if(t==="minute")l.minute(+n);else if(t==="ampm"){var s=n.toUpperCase();a&&(s==="PM"&&l.hour()<12&&l.hour(l.hour()%12+12),s==="AM"&&l.hour()>=12&&l.hour(l.hour()-12)),this.__emit("amPmChange",s)}else l.second(+n);this.__emit("change",l)},onEnterSelectPanel:function(t){this.__emit("currentSelectPanelChange",t)},onEsc:function(t){this.__emit("esc",t)},getHourSelect:function(t){var n=this,r=this.prefixCls,a=this.hourOptions,o=this.disabledHours,i=this.showHour,l=this.use12Hours;if(!i)return null;var s=o(),c,d;return l?(c=[12].concat(a.filter(function(f){return f<12&&f>0})),d=t%12||12):(c=a,d=t),g(Lh,{prefixCls:r,options:c.map(function(f){return rb(f,s)}),selectedIndex:c.indexOf(d),type:"hour",onSelect:this.onItemChange,onMouseenter:function(){return n.onEnterSelectPanel("hour")},onEsc:this.onEsc},null)},getMinuteSelect:function(t){var n=this,r=this.prefixCls,a=this.minuteOptions,o=this.disabledMinutes,i=this.defaultOpenValue,l=this.showMinute,s=this.value;if(!l)return null;var c=s||i,d=o(c.hour());return g(Lh,{prefixCls:r,options:a.map(function(f){return rb(f,d)}),selectedIndex:a.indexOf(t),type:"minute",onSelect:this.onItemChange,onMouseenter:function(){return n.onEnterSelectPanel("minute")},onEsc:this.onEsc},null)},getSecondSelect:function(t){var n=this,r=this.prefixCls,a=this.secondOptions,o=this.disabledSeconds,i=this.showSecond,l=this.defaultOpenValue,s=this.value;if(!i)return null;var c=s||l,d=o(c.hour(),c.minute());return g(Lh,{prefixCls:r,options:a.map(function(f){return rb(f,d)}),selectedIndex:a.indexOf(t),type:"second",onSelect:this.onItemChange,onMouseenter:function(){return n.onEnterSelectPanel("second")},onEsc:this.onEsc},null)},getAMPMSelect:function(){var t=this,n=this.prefixCls,r=this.use12Hours,a=this.format,o=this.isAM;if(!r)return null;var i=["am","pm"].map(function(s){return a.match(/\sA/)?s.toUpperCase():s}).map(function(s){return{value:s}}),l=o?0:1;return g(Lh,{prefixCls:n,options:i,selectedIndex:l,type:"ampm",onSelect:this.onItemChange,onMouseenter:function(){return t.onEnterSelectPanel("ampm")},onEsc:this.onEsc},null)}},render:function(){var t=this.prefixCls,n=this.defaultOpenValue,r=this.value,a=r||n;return g("div",{class:"".concat(t,"-combobox")},[this.getHourSelect(a.hour()),this.getMinuteSelect(a.minute()),this.getSecondSelect(a.second()),this.getAMPMSelect(a.hour())])}},$xe=kxe;function qs(){}function ab(e,t,n){for(var r=arguments.length>3&&arguments[3]!==void 0?arguments[3]:1,a=[],o=0;o<e;o+=r)(!t||t.indexOf(o)<0||!n)&&a.push(o);return a}function Oxe(e,t,n,r){var a=t.slice().sort(function(l,s){return Math.abs(e.hour()-l)-Math.abs(e.hour()-s)})[0],o=n.slice().sort(function(l,s){return Math.abs(e.minute()-l)-Math.abs(e.minute()-s)})[0],i=r.slice().sort(function(l,s){return Math.abs(e.second()-l)-Math.abs(e.second()-s)})[0];return Ue("".concat(a,":").concat(o,":").concat(i),"HH:mm:ss")}var Pxe=G({name:"Panel",mixins:[nt],inheritAttrs:!1,props:{clearText:u.string,prefixCls:u.string.def("rc-time-picker-panel"),defaultOpenValue:{type:Object,default:function(){return Ue()}},value:u.any,defaultValue:u.any,placeholder:u.string,format:u.string,inputReadOnly:u.looseBool.def(!1),disabledHours:u.func.def(qs),disabledMinutes:u.func.def(qs),disabledSeconds:u.func.def(qs),hideDisabledOptions:u.looseBool,allowEmpty:u.looseBool,showHour:u.looseBool,showMinute:u.looseBool,showSecond:u.looseBool,use12Hours:u.looseBool.def(!1),hourStep:u.number,minuteStep:u.number,secondStep:u.number,addon:u.func.def(qs),focusOnOpen:u.looseBool,clearIcon:u.any},data:function(){return{sValue:this.value,selectionRange:[],currentSelectPanel:""}},watch:{value:function(t){this.setState({sValue:t})}},methods:{onChange:function(t){this.setState({sValue:t}),this.__emit("change",t)},onAmPmChange:function(t){this.__emit("amPmChange",t)},onCurrentSelectPanelChange:function(t){this.setState({currentSelectPanel:t})},close:function(){this.__emit("esc")},onEsc:function(t){this.__emit("esc",t)},disabledHours2:function(){var t=this.use12Hours,n=this.disabledHours,r=n();return t&&Array.isArray(r)&&(this.isAM()?r=r.filter(function(a){return a<12}).map(function(a){return a===0?12:a}):r=r.map(function(a){return a===12?12:a-12})),r},isAM:function(){var t=this.sValue||this.defaultOpenValue;return t.hour()>=0&&t.hour()<12}},render:function(){var t=this.prefixCls,n=this.placeholder,r=this.disabledMinutes,a=this.addon,o=this.disabledSeconds,i=this.hideDisabledOptions,l=this.showHour,s=this.showMinute,c=this.showSecond,d=this.format,f=this.defaultOpenValue,p=this.clearText,v=this.use12Hours,m=this.focusOnOpen,y=this.hourStep,b=this.minuteStep,C=this.secondStep,S=this.inputReadOnly,w=this.sValue,k=this.currentSelectPanel,$=this.$attrs,O=$.class,T=$.onEsc,_=T===void 0?qs:T,I=$.onKeydown,L=I===void 0?qs:I,j=We(this,"clearIcon"),F=this.disabledHours2(),N=r(w?w.hour():null),D=o(w?w.hour():null,w?w.minute():null),z=ab(24,F,i,y),B=ab(60,N,i,b),M=ab(60,D,i,C),E=Oxe(f,z,B,M);return g("div",{className:Se(O,"".concat(t,"-inner"))},[g(bxe,{clearText:p,prefixCls:t,defaultOpenValue:E,value:w,currentSelectPanel:k,onEsc:_,format:d,placeholder:n,hourOptions:z,minuteOptions:B,secondOptions:M,disabledHours:this.disabledHours2,disabledMinutes:r,disabledSeconds:o,onChange:this.onChange,focusOnOpen:m,onKeydown:L,inputReadOnly:S,clearIcon:j},null),g($xe,{prefixCls:t,value:w,defaultOpenValue:E,format:d,onChange:this.onChange,onAmPmChange:this.onAmPmChange,showHour:l,showMinute:s,showSecond:c,hourOptions:z,minuteOptions:B,secondOptions:M,disabledHours:this.disabledHours2,disabledMinutes:r,disabledSeconds:o,onCurrentSelectPanelChange:this.onCurrentSelectPanelChange,use12Hours:v,onEsc:this.onEsc,isAM:this.isAM()},null),a(this)])}}),JD=Pxe,Fh={adjustX:1,adjustY:1},Bh=[0,0],Txe={bottomLeft:{points:["tl","tl"],overflow:Fh,offset:[0,-3],targetOffset:Bh},bottomRight:{points:["tr","tr"],overflow:Fh,offset:[0,-3],targetOffset:Bh},topRight:{points:["br","br"],overflow:Fh,offset:[0,3],targetOffset:Bh},topLeft:{points:["bl","bl"],overflow:Fh,offset:[0,3],targetOffset:Bh}},xxe=Txe;function ob(){}function CT(e,t){this[e]=t}var _xe=G({name:"VcTimePicker",mixins:[nt],inheritAttrs:!1,props:An({prefixCls:u.string,clearText:u.string,value:u.any,defaultOpenValue:{type:Object,default:function(){return Ue()}},pickerInputClass:String,inputReadOnly:u.looseBool,disabled:u.looseBool,allowEmpty:u.looseBool,defaultValue:u.any,open:u.looseBool,defaultOpen:u.looseBool,align:u.object,placement:u.any,transitionName:u.string,getPopupContainer:u.func,placeholder:u.string,format:u.string,showHour:u.looseBool,showMinute:u.looseBool,showSecond:u.looseBool,popupClassName:u.string,popupStyle:u.object,disabledHours:u.func,disabledMinutes:u.func,disabledSeconds:u.func,hideDisabledOptions:u.looseBool,name:u.string,autocomplete:u.string,use12Hours:u.looseBool,hourStep:u.number,minuteStep:u.number,secondStep:u.number,focusOnOpen:u.looseBool,autofocus:u.looseBool,id:u.string,inputIcon:u.any,clearIcon:u.any,addon:u.func},{clearText:"clear",prefixCls:"rc-time-picker",defaultOpen:!1,inputReadOnly:!1,popupClassName:"",popupStyle:{},align:{},allowEmpty:!0,showHour:!0,showMinute:!0,showSecond:!0,disabledHours:ob,disabledMinutes:ob,disabledSeconds:ob,hideDisabledOptions:!1,placement:"bottomLeft",use12Hours:!1,focusOnOpen:!1}),data:function(){this.saveInputRef=CT.bind(this,"picker"),this.savePanelRef=CT.bind(this,"panelInstance");var t=this.defaultOpen,n=this.defaultValue,r=this.open,a=r===void 0?t:r,o=this.value,i=o===void 0?n:o;return{sOpen:a,sValue:i}},watch:{value:function(t){this.setState({sValue:t})},open:function(t){t!==void 0&&this.setState({sOpen:t})}},mounted:function(){var t=this;this.$nextTick(function(){t.autofocus&&t.focus()})},methods:{onPanelChange:function(t){this.setValue(t)},onAmPmChange:function(t){this.__emit("amPmChange",t)},onClear:function(t){t.stopPropagation(),this.setValue(null),this.setOpen(!1)},onVisibleChange:function(t){this.setOpen(t)},onEsc:function(){this.setOpen(!1),this.focus()},onKeyDown:function(t){t.keyCode===40&&this.setOpen(!0)},onKeyDown2:function(t){this.__emit("keydown",t)},setValue:function(t){vt(this,"value")||this.setState({sValue:t}),this.__emit("change",t)},getFormat:function(){var t=this.format,n=this.showHour,r=this.showMinute,a=this.showSecond,o=this.use12Hours;if(t)return t;if(o){var i=[n?"h":"",r?"mm":"",a?"ss":""].filter(function(l){return!!l}).join(":");return i.concat(" a")}return[n?"HH":"",r?"mm":"",a?"ss":""].filter(function(l){return!!l}).join(":")},getPanelElement:function(){var t=this.prefixCls,n=this.placeholder,r=this.disabledHours,a=this.addon,o=this.disabledMinutes,i=this.disabledSeconds,l=this.hideDisabledOptions,s=this.inputReadOnly,c=this.showHour,d=this.showMinute,f=this.showSecond,p=this.defaultOpenValue,v=this.clearText,m=this.use12Hours,y=this.focusOnOpen,b=this.onKeyDown2,C=this.hourStep,S=this.minuteStep,w=this.secondStep,k=this.sValue,$=We(this,"clearIcon");return g(JD,{clearText:v,prefixCls:"".concat(t,"-panel"),ref:this.savePanelRef,value:k,inputReadOnly:s,onChange:this.onPanelChange,onAmPmChange:this.onAmPmChange,defaultOpenValue:p,showHour:c,showMinute:d,showSecond:f,onEsc:this.onEsc,format:this.getFormat(),placeholder:n,disabledHours:r,disabledMinutes:o,disabledSeconds:i,hideDisabledOptions:l,use12Hours:m,hourStep:C,minuteStep:S,secondStep:w,focusOnOpen:y,onKeydown:b,clearIcon:$,addon:a},null)},getPopupClassName:function(){var t=this.showHour,n=this.showMinute,r=this.showSecond,a=this.use12Hours,o=this.prefixCls,i=this.popupClassName,l=0;return t&&(l+=1),n&&(l+=1),r&&(l+=1),a&&(l+=1),Se(i,V({},"".concat(o,"-panel-narrow"),(!t||!n||!r)&&!a),"".concat(o,"-panel-column-").concat(l))},setOpen:function(t){this.sOpen!==t&&(vt(this,"open")||this.setState({sOpen:t}),t?this.__emit("open",{open:t}):this.__emit("close",{open:t}))},focus:function(){this.picker.focus()},blur:function(){this.picker.blur()},onFocus:function(t){this.__emit("focus",t)},onBlur:function(t){this.__emit("blur",t)},renderClearButton:function(){var t=this,n=this.sValue,r=this.$props,a=r.prefixCls,o=r.allowEmpty,i=r.clearText,l=r.disabled;if(!o||!n||l)return null;var s=We(this,"clearIcon");if(zn(s)){var c=nf(s)||{},d=c.onClick;return Ot(s,{onClick:function(){d&&d.apply(void 0,arguments),t.onClear.apply(t,arguments)}})}return g("a",{role:"button",class:"".concat(a,"-clear"),title:i,onClick:this.onClear,tabindex:0},[s||g("i",{class:"".concat(a,"-clear-icon")},null)])}},render:function(){var t=this,n=this.prefixCls,r=this.placeholder,a=this.placement,o=this.align,i=this.id,l=this.disabled,s=this.transitionName,c=this.getPopupContainer,d=this.name,f=this.autocomplete,p=this.autofocus,v=this.sOpen,m=this.sValue,y=this.onFocus,b=this.onBlur,C=this.popupStyle,S=this.pickerInputClass,w=this.$attrs,k=w.class,$=w.style,O=this.getPopupClassName(),T=We(this,"inputIcon");return g(Ii,{prefixCls:"".concat(n,"-panel"),popupClassName:O,popupStyle:C,popupAlign:o,builtinPlacements:xxe,popupPlacement:a,action:l?[]:["click"],destroyPopupOnHide:!0,getPopupContainer:c,popupTransitionName:s,popupVisible:v,onPopupVisibleChange:this.onVisibleChange,popup:this.getPanelElement()},{default:function(){return[g("span",{class:Se(n,k),style:$},[g("input",{class:S,ref:t.saveInputRef,type:"text",placeholder:r,name:d,onKeydown:t.onKeyDown,disabled:l,value:m&&m.format(t.getFormat())||"",autocomplete:f,onFocus:y,onBlur:b,autofocus:p,readonly:!0,id:i},null),T||g("span",{class:"".concat(n,"-icon")},null),t.renderClearButton()])]}})}}),Exe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M512 64C264.6 64 64 264.6 64 512s200.6 448 448 448 448-200.6 448-448S759.4 64 512 64zm0 820c-205.4 0-372-166.6-372-372s166.6-372 372-372 372 166.6 372 372-166.6 372-372 372z"}},{tag:"path",attrs:{d:"M686.7 638.6L544.1 535.5V288c0-4.4-3.6-8-8-8H488c-4.4 0-8 3.6-8 8v275.4c0 2.6 1.2 5 3.3 6.5l165.4 120.6c3.6 2.6 8.6 1.8 11.2-1.7l28.6-39c2.6-3.7 1.8-8.7-1.8-11.2z"}}]},name:"clock-circle",theme:"outlined"},Mxe=Exe;function wT(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){Ixe(e,a,n[a])})}return e}function Ixe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var lS=function(t,n){var r=wT({},t,n.attrs);return g(Et,wT({},r,{icon:Mxe}),null)};lS.displayName="ClockCircleOutlined";lS.inheritAttrs=!1;var Nxe=lS;function QD(e){return{showHour:e.indexOf("H")>-1||e.indexOf("h")>-1||e.indexOf("k")>-1,showMinute:e.indexOf("m")>-1,showSecond:e.indexOf("s")>-1}}var Axe=function(){return{size:u.oneOf(rt("large","default","small")),value:z3,defaultValue:z3,open:u.looseBool,format:u.string,disabled:u.looseBool,placeholder:u.string,prefixCls:u.string,hideDisabledOptions:u.looseBool,disabledHours:u.func,disabledMinutes:u.func,disabledSeconds:u.func,getPopupContainer:u.func,use12Hours:u.looseBool,focusOnOpen:u.looseBool,hourStep:u.number,minuteStep:u.number,secondStep:u.number,allowEmpty:u.looseBool,allowClear:u.looseBool,inputReadOnly:u.looseBool,clearText:u.string,defaultOpenValue:u.object,popupClassName:u.string,popupStyle:u.style,suffixIcon:u.any,align:u.object,placement:u.any,transitionName:u.string,autofocus:u.looseBool,addon:u.any,clearIcon:u.any,locale:u.object,valueFormat:u.string,onChange:u.func,onAmPmChange:u.func,onOpen:u.func,onClose:u.func,onFocus:u.func,onBlur:u.func,onKeydown:u.func,onOpenChange:u.func}},Dxe=G({name:"ATimePicker",mixins:[nt],inheritAttrs:!1,props:Rn(Axe(),{align:{offset:[0,-2]},disabled:!1,disabledHours:void 0,disabledMinutes:void 0,disabledSeconds:void 0,hideDisabledOptions:!1,placement:"bottomLeft",transitionName:"slide-up",focusOnOpen:!0,allowClear:!0}),emits:["update:value","update:open","change","openChange","focus","blur","keydown"],setup:function(){return{popupRef:null,timePickerRef:null,configProvider:ve("configProvider",St)}},data:function(){var t=this.value,n=this.defaultValue,r=this.valueFormat;return pi("TimePicker",n,"defaultValue",r),pi("TimePicker",t,"value",r),on(!vt(this,"allowEmpty"),"TimePicker","`allowEmpty` is deprecated. Please use `allowClear` instead."),{sValue:rl(t||n,r)}},watch:{value:function(t){pi("TimePicker",t,"value",this.valueFormat),this.setState({sValue:rl(t,this.valueFormat)})}},created:function(){ot("savePopupRef",this.savePopupRef)},methods:{getDefaultFormat:function(){var t=this.format,n=this.use12Hours;return t||(n?"h:mm:ss a":"HH:mm:ss")},getAllowClear:function(){var t=this.$props,n=t.allowClear,r=t.allowEmpty;return vt(this,"allowClear")?n:r},getDefaultLocale:function(){var t=P(P({},_w),this.$props.locale);return t},savePopupRef:function(t){this.popupRef=t},saveTimePicker:function(t){this.timePickerRef=t},handleChange:function(t){vt(this,"value")||this.setState({sValue:t});var n=this.format,r=n===void 0?"HH:mm:ss":n,a=this.valueFormat?Du(t,this.valueFormat):t;this.$emit("update:value",a),this.$emit("change",a,t&&t.format(r)||"")},handleOpenClose:function(t){var n=t.open;this.$emit("update:open",n),this.$emit("openChange",n)},focus:function(){this.timePickerRef.focus()},blur:function(){this.timePickerRef.blur()},renderInputIcon:function(t){var n=We(this,"suffixIcon");n=Array.isArray(n)?n[0]:n;var r=n&&zn(n)&&Ot(n,{class:"".concat(t,"-clock-icon")})||g(Nxe,{class:"".concat(t,"-clock-icon")},null);return g("span",{class:"".concat(t,"-icon")},[r])},renderClearIcon:function(t){var n=We(this,"clearIcon"),r="".concat(t,"-clear");return n&&zn(n)?Ot(n,{class:r}):g(Yr,{class:r},null)},renderTimePicker:function(t){var n,r=Qe(this);r=bn(r,["defaultValue","suffixIcon","allowEmpty","allowClear"]);var a=this.$attrs.class,o=r,i=o.prefixCls,l=o.getPopupContainer,s=o.placeholder,c=o.size,d=this.configProvider.getPrefixCls,f=d("time-picker",i),p=d("input"),v=Se("".concat(f,"-input"),p),m=this.getDefaultFormat(),y=(n={},V(n,a,a),V(n,"".concat(f,"-").concat(c),!!c),n),b=We(this,"addon",{},!1),C=function(T){return b?g("div",{class:"".concat(f,"-panel-addon")},[typeof b=="function"?b(T):b]):null},S=this.renderInputIcon(f),w=this.renderClearIcon(f),k=this.configProvider.getPopupContainer,$=P(P(P(P({},QD(m)),r),this.$attrs),{allowEmpty:this.getAllowClear(),prefixCls:f,pickerInputClass:v,getPopupContainer:l||k,format:m,value:this.sValue,placeholder:s===void 0?t.placeholder:s,addon:C,inputIcon:S,clearIcon:w,class:y,ref:this.saveTimePicker,onChange:this.handleChange,onOpen:this.handleOpenClose,onClose:this.handleOpenClose});return g(_xe,$,null)}},render:function(){return g(Kr,{componentName:"TimePicker",defaultLocale:this.getDefaultLocale(),children:this.renderTimePicker},null)}}),Rxe=kn(Dxe),Lxe={date:"YYYY-MM-DD",dateTime:"YYYY-MM-DD HH:mm:ss",week:"gggg-wo",month:"YYYY-MM"},Fxe={date:"dateFormat",dateTime:"dateTimeFormat",week:"weekFormat",month:"monthFormat"};function Bxe(e){var t=e.showHour,n=e.showMinute,r=e.showSecond,a=e.use12Hours,o=0;return t&&(o+=1),n&&(o+=1),r&&(o+=1),a&&(o+=1),o}function xg(e,t,n){return G({name:e.name,inheritAttrs:!1,props:P(P({},t),{transitionName:u.string.def("slide-up"),popupStyle:u.style,locale:u.any.def({})}),emits:["update:value","openChange","focus","blur","mouseenter","mouseleave","change","ok","calendarChange"],setup:function(){return{configProvider:ve("configProvider",St),picker:void 0,popupRef:void 0}},watch:{value:function(a){pi("DatePicker",a,"value",this.valueFormat)}},created:function(){ot("savePopupRef",this.savePopupRef)},mounted:function(){var a=this,o=this.$props,i=o.autofocus,l=o.disabled,s=o.value,c=o.defaultValue,d=o.valueFormat;pi("DatePicker",c,"defaultValue",d),pi("DatePicker",s,"value",d),i&&!l&&Ne(function(){a.focus()})},methods:{savePicker:function(a){this.picker=a},getDefaultLocale:function(){var a=P(P({},Ev),this.locale);return a.lang=P(P({},a.lang),(this.locale||{}).lang),a},savePopupRef:function(a){this.popupRef=a},handleOpenChange:function(a){this.$emit("openChange",a)},handleFocus:function(a){this.$emit("focus",a)},handleBlur:function(a){this.$emit("blur",a)},handleMouseEnter:function(a){this.$emit("mouseenter",a)},handleMouseLeave:function(a){this.$emit("mouseleave",a)},handleChange:function(a,o){var i=this.valueFormat?Du(a,this.valueFormat):a;this.$emit("update:value",i),this.$emit("change",i,o)},handleOk:function(a){this.$emit("ok",this.valueFormat?Du(a,this.valueFormat):a)},handleCalendarChange:function(a,o){this.$emit("calendarChange",this.valueFormat?Du(a,this.valueFormat):a,o)},focus:function(){this.picker.focus()},blur:function(){this.picker.blur()},transformValue:function(a){"value"in a&&(a.value=rl(a.value,this.valueFormat)),"defaultValue"in a&&(a.defaultValue=rl(a.defaultValue,this.valueFormat)),"defaultPickerValue"in a&&(a.defaultPickerValue=rl(a.defaultPickerValue,this.valueFormat))},renderPicker:function(a,o){var i,l=P(P({},Qe(this)),this.$attrs);this.transformValue(l);var s=l.prefixCls,c=l.inputPrefixCls,d=l.getCalendarContainer,f=l.size,p=l.showTime,v=l.disabled,m=l.format,y=p?"".concat(n,"Time"):n,b=m||a[Fxe[y]]||Lxe[y],C=this.configProvider,S=C.getPrefixCls,w=C.getPopupContainer,k=d||w,$=S("calendar",s),O=S("input",c),T=Se("".concat($,"-picker"),V({},"".concat($,"-picker-").concat(f),!!f)),_=Se("".concat($,"-picker-input"),O,(i={},V(i,"".concat(O,"-lg"),f==="large"),V(i,"".concat(O,"-sm"),f==="small"),V(i,"".concat(O,"-disabled"),v),i)),I=p&&p.format||"HH:mm:ss",L=P(P({},QD(I)),{format:I,use12Hours:p&&p.use12Hours}),j=Bxe(L),F="".concat($,"-time-picker-column-").concat(j),N=P(P(P({},L),p),{prefixCls:"".concat($,"-time-picker"),placeholder:a.timePickerLocale.placeholder,transitionName:"slide-up",class:F,onEsc:function(){}}),D=p?g(JD,N,null):null,z=P(P({},l),{getCalendarContainer:k,format:b,pickerClass:T,pickerInputClass:_,locale:a,localeCode:o,timePicker:D,onOpenChange:this.handleOpenChange,onFocus:this.handleFocus,onBlur:this.handleBlur,onMouseenter:this.handleMouseEnter,onMouseleave:this.handleMouseLeave,onChange:this.handleChange,onOk:this.handleOk,onCalendarChange:this.handleCalendarChange,ref:this.savePicker});return g(e,z,this.$slots)}},render:function(){return g(Kr,{componentName:"DatePicker",defaultLocale:this.getDefaultLocale,children:this.renderPicker},null)}})}function Gs(){}var Vxe={name:"CalendarPart",inheritAttrs:!1,mixins:[nt],props:{prefixCls:u.string,value:u.any,hoverValue:u.any,selectedValue:u.any,direction:u.any,locale:u.any,showDateInput:u.looseBool,showTimePicker:u.looseBool,showWeekNumber:u.looseBool,format:u.any,placeholder:u.any,disabledDate:u.any,timePicker:u.any,disabledTime:u.any,disabledMonth:u.any,mode:u.any,timePickerDisabledTime:u.object,enableNext:u.any,enablePrev:u.any,clearIcon:u.any,dateRender:u.func,inputMode:u.string,inputReadOnly:u.looseBool},render:function(){var t=this.$props,n=t.prefixCls,r=t.value,a=t.hoverValue,o=t.selectedValue,i=t.mode,l=t.direction,s=t.locale,c=t.format,d=t.placeholder,f=t.disabledDate,p=t.timePicker,v=t.disabledTime,m=t.timePickerDisabledTime,y=t.showTimePicker,b=t.enablePrev,C=t.enableNext,S=t.disabledMonth,w=t.showDateInput,k=t.dateRender,$=t.showWeekNumber,O=t.showClear,T=t.inputMode,_=t.inputReadOnly,I=We(this,"clearIcon"),L=this.$attrs,j=L.onInputChange,F=j===void 0?Gs:j,N=L.onInputSelect,D=N===void 0?Gs:N,z=L.onValueChange,B=z===void 0?Gs:z,M=L.onPanelChange,E=M===void 0?Gs:M,K=L.onSelect,W=K===void 0?Gs:K,Y=L.onDayHover,q=Y===void 0?Gs:Y,J=y&&p,ne=J&&v?K2(o,v):null,oe="".concat(n,"-range"),Q={locale:s,value:r,prefixCls:n,showTimePicker:y},ae=l==="left"?0:1,de=null;if(J){var be=Qe(p);de=Ot(p,P(P(P(P({showHour:!0,showMinute:!0,showSecond:!0},be),ne),m),{defaultOpenValue:r,value:o[ae],onChange:F}))}var Ee=w&&g(qp,{format:c,locale:s,prefixCls:n,timePicker:p,disabledDate:f,placeholder:d,disabledTime:v,value:r,showClear:O||!1,selectedValue:o[ae],onChange:F,onSelect:D,clearIcon:I,inputMode:T,inputReadOnly:_},null),Pe=P(P({},Q),{mode:i,enableNext:C,enablePrev:b,disabledMonth:S,onValueChange:B,onPanelChange:E}),Be=P(P({},Q),{hoverValue:a,selectedValue:o,dateRender:k,disabledDate:f,showWeekNumber:$,onSelect:W,onDayHover:q});return g("div",{class:"".concat(oe,"-part ").concat(oe,"-").concat(l)},[Ee,g("div",{style:{outline:"none"}},[g(aS,Pe,null),y?g("div",{class:"".concat(n,"-time-picker")},[g("div",{class:"".concat(n,"-time-picker-panel")},[de])]):null,g("div",{class:"".concat(n,"-body")},[g(W2,Be,null)])])])}},ST=Vxe;function Vh(){}function zxe(e){return Array.isArray(e)&&(e.length===0||e.every(function(t){return!t}))}function kT(e,t){if(e===t)return!0;if(e===null||typeof e=="undefined"||t===null||typeof t=="undefined"||e.length!==t.length)return!1;for(var n=0;n<e.length;++n)if(e[n]!==t[n])return!1;return!0}function C1(e){var t=fn(e,2),n=t[0],r=t[1];return r&&n==null&&(n=r.clone().subtract(1,"month")),n&&r==null&&(r=n.clone().add(1,"month")),[n,r]}function $T(e,t){var n=e.selectedValue||t&&e.defaultSelectedValue,r=e.value||t&&e.defaultValue,a=C1(r||n);return zxe(a)?t&&[Ue(),Ue().add(1,"months")]:a}function ib(e,t){for(var n=t?t().concat():[],r=0;r<e;r++)n.indexOf(r)===-1&&n.push(r);return n}function zh(e,t,n){if(!!t){var r=this.sSelectedValue,a=r.concat(),o=e==="left"?0:1;a[o]=t,a[0]&&this.compare(a[0],a[1])>0&&(a[1-o]=this.sShowTimePicker?a[o]:void 0),this.__emit("inputSelect",a),this.fireSelectValueChange(a,null,n||{source:"dateInput"})}}var Hxe=G({name:"RangeCalendar",mixins:[nt,$g],inheritAttrs:!1,props:{locale:u.object.def(Nf),visible:u.looseBool.def(!0),prefixCls:u.string.def("rc-calendar"),dateInputPlaceholder:u.any,seperator:u.string.def("~"),defaultValue:u.any,value:u.any,hoverValue:u.any,mode:u.arrayOf(u.oneOf(["time","date","month","year","decade"])),showDateInput:u.looseBool.def(!0),timePicker:u.any,showOk:u.looseBool,showToday:u.looseBool.def(!0),defaultSelectedValue:u.array.def([]),selectedValue:u.array,showClear:u.looseBool,showWeekNumber:u.looseBool,format:u.oneOfType([u.string,u.arrayOf(u.string),u.func]),type:u.any.def("both"),disabledDate:u.func,disabledTime:u.func.def(Vh),renderFooter:u.func.def(function(){return null}),renderSidebar:u.func.def(function(){return null}),dateRender:u.func,clearIcon:u.any,inputReadOnly:u.looseBool},data:function(){var t=this.$props,n=t.selectedValue||t.defaultSelectedValue,r=$T(t,1);return{sSelectedValue:n,prevSelectedValue:n,firstSelectedValue:null,sHoverValue:t.hoverValue||[],sValue:r,sShowTimePicker:!1,sMode:t.mode||["date","date"],sPanelTriggerSource:""}},watch:{value:function(){var t={};t.sValue=$T(this.$props,0),this.setState(t)},hoverValue:function(t){kT(this.sHoverValue,t)||this.setState({sHoverValue:t})},selectedValue:function(t){var n={};n.sSelectedValue=t,n.prevSelectedValue=t,this.setState(n)},mode:function(t){kT(this.sMode,t)||this.setState({sMode:t})}},methods:{onDatePanelEnter:function(){this.hasSelectedValue()&&this.fireHoverValueChange(this.sSelectedValue.concat())},onDatePanelLeave:function(){this.hasSelectedValue()&&this.fireHoverValueChange([])},onSelect:function(t){var n=this.type,r=this.sSelectedValue,a=this.prevSelectedValue,o=this.firstSelectedValue,i;if(n==="both")o?this.compare(o,t)<0?(Sa(a[1],t),i=[o,t]):(Sa(a[0],t),Sa(a[1],o),i=[t,o]):(Sa(a[0],t),i=[t]);else if(n==="start"){Sa(a[0],t);var l=r[1];i=l&&this.compare(l,t)>0?[t,l]:[t]}else{var s=r[0];s&&this.compare(s,t)<=0?(Sa(a[1],t),i=[s,t]):(Sa(a[0],t),i=[t])}this.fireSelectValueChange(i)},onKeyDown:function(t){var n=this;if(t.target.nodeName.toLowerCase()!=="input"){var r=t.keyCode,a=t.ctrlKey||t.metaKey,o=this.$data,i=o.sSelectedValue,l=o.sHoverValue,s=o.firstSelectedValue,c=o.sValue,d=this.$props.disabledDate,f=function(m){var y,b,C;if(s?l.length===1?(y=l[0].clone(),b=m(y),C=n.onDayHover(b)):(y=l[0].isSame(s,"day")?l[1]:l[0],b=m(y),C=n.onDayHover(b)):(y=l[0]||i[0]||c[0]||Ue(),b=m(y),C=[b],n.fireHoverValueChange(C)),C.length>=2){var S=C.some(function(O){return!sxe(c,O,"month")});if(S){var w=C.slice().sort(function(O,T){return O.valueOf()-T.valueOf()});w[0].isSame(w[1],"month")&&(w[1]=w[0].clone().add(1,"month")),n.fireValueChange(w)}}else if(C.length===1){var k=c.findIndex(function(O){return O.isSame(y,"month")});if(k===-1&&(k=0),c.every(function(O){return!O.isSame(b,"month")})){var $=c.slice();$[k]=b.clone(),n.fireValueChange($)}}return t.preventDefault(),b};switch(r){case ze.DOWN:f(function(v){return ei(v,1,"weeks")});return;case ze.UP:f(function(v){return ei(v,-1,"weeks")});return;case ze.LEFT:f(a?function(v){return ei(v,-1,"years")}:function(v){return ei(v,-1,"days")});return;case ze.RIGHT:f(a?function(v){return ei(v,1,"years")}:function(v){return ei(v,1,"days")});return;case ze.HOME:f(function(v){return UD(v)});return;case ze.END:f(function(v){return YD(v)});return;case ze.PAGE_DOWN:f(function(v){return ei(v,1,"month")});return;case ze.PAGE_UP:f(function(v){return ei(v,-1,"month")});return;case ze.ENTER:{var p;l.length===0?p=f(function(v){return v}):l.length===1?p=l[0]:p=l[0].isSame(s,"day")?l[1]:l[0],p&&(!d||!d(p))&&this.onSelect(p),t.preventDefault();return}default:this.__emit("keydown",t)}}},onDayHover:function(t){var n=[],r=this.sSelectedValue,a=this.firstSelectedValue,o=this.type;if(o==="start"&&r[1])n=this.compare(t,r[1])<0?[t,r[1]]:[t];else if(o==="end"&&r[0])n=this.compare(t,r[0])>0?[r[0],t]:[];else{if(!a)return this.sHoverValue.length&&this.setState({sHoverValue:[]}),n;n=this.compare(t,a)<0?[t,a]:[a,t]}return this.fireHoverValueChange(n),n},onToday:function(){var t=hl(this.sValue[0]),n=t.clone().add(1,"months");this.setState({sValue:[t,n]})},onOpenTimePicker:function(){this.setState({sShowTimePicker:!0})},onCloseTimePicker:function(){this.setState({sShowTimePicker:!1})},onOk:function(){var t=this.sSelectedValue;this.isAllowedDateAndTime(t)&&this.__emit("ok",t)},onStartInputChange:function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];var a=["left"].concat(n);return zh.apply(this,a)},onEndInputChange:function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];var a=["right"].concat(n);return zh.apply(this,a)},onStartInputSelect:function(t){var n=["left",t,{source:"dateInputSelect"}];return zh.apply(this,n)},onEndInputSelect:function(t){var n=["right",t,{source:"dateInputSelect"}];return zh.apply(this,n)},onStartValueChange:function(t){var n=Je(this.sValue);return n[0]=t,this.fireValueChange(n)},onEndValueChange:function(t){var n=Je(this.sValue);return n[1]=t,this.fireValueChange(n)},onStartPanelChange:function(t,n){var r=this.sMode,a=this.sValue,o=[n,r[1]],i=[t||a[0],a[1]];this.__emit("panelChange",i,o);var l={sPanelTriggerSource:"start"};vt(this,"mode")||(l.sMode=o),this.setState(l)},onEndPanelChange:function(t,n){var r=this.sMode,a=this.sValue,o=[r[0],n],i=[a[0],t||a[1]];this.__emit("panelChange",i,o);var l={sPanelTriggerSource:"end"};vt(this,"mode")||(l.sMode=o),this.setState(l)},getStartValue:function(){var t=this.$data,n=t.sSelectedValue,r=t.sShowTimePicker,a=t.sValue,o=t.sMode,i=t.sPanelTriggerSource,l=a[0];return n[0]&&this.$props.timePicker&&(l=l.clone(),Sa(n[0],l)),r&&n[0]&&(l=n[0]),i==="end"&&o[0]==="date"&&o[1]==="date"&&l.isSame(a[1],"month")&&(l=l.clone().subtract(1,"month")),l},getEndValue:function(){var t=this.$data,n=t.sSelectedValue,r=t.sShowTimePicker,a=t.sValue,o=t.sMode,i=t.sPanelTriggerSource,l=a[1]?a[1].clone():a[0].clone().add(1,"month");return n[1]&&this.$props.timePicker&&Sa(n[1],l),r&&(l=n[1]?n[1]:this.getStartValue()),!r&&i!=="end"&&o[0]==="date"&&o[1]==="date"&&l.isSame(a[0],"month")&&(l=l.clone().add(1,"month")),l},getEndDisableTime:function(){var t=this.sSelectedValue,n=this.sValue,r=this.disabledTime,a=r(t,"end")||{},o=t&&t[0]||n[0].clone();if(!t[1]||o.isSame(t[1],"day")){var i=o.hour(),l=o.minute(),s=o.second(),c=a.disabledHours,d=a.disabledMinutes,f=a.disabledSeconds,p=d?d():[],v=f?f():[];return c=ib(i,c),d=ib(l,d),f=ib(s,f),{disabledHours:function(){return c},disabledMinutes:function(y){return y===i?d:p},disabledSeconds:function(y,b){return y===i&&b===l?f:v}}}return a},isAllowedDateAndTime:function(t){return Xv(t[0],this.disabledDate,this.disabledStartTime)&&Xv(t[1],this.disabledDate,this.disabledEndTime)},isMonthYearPanelShow:function(t){return["month","year","decade"].indexOf(t)>-1},hasSelectedValue:function(){var t=this.sSelectedValue;return!!t[1]&&!!t[0]},compare:function(t,n){return this.timePicker?t.diff(n):t.diff(n,"days")},fireSelectValueChange:function(t,n,r){var a=this.timePicker,o=this.prevSelectedValue;if(a){var i=Qe(a);if(i.defaultValue){var l=i.defaultValue;!o[0]&&t[0]&&Sa(l[0],t[0]),!o[1]&&t[1]&&Sa(l[1],t[1])}}if(!this.sSelectedValue[0]||!this.sSelectedValue[1]){var s=t[0]||Ue(),c=t[1]||s.clone().add(1,"months");this.setState({sSelectedValue:t,sValue:t&&t.length===2?C1([s,c]):this.sValue})}t[0]&&!t[1]&&(this.setState({firstSelectedValue:t[0]}),this.fireHoverValueChange(t.concat())),this.__emit("change",t),(n||t[0]&&t[1])&&(this.setState({prevSelectedValue:t,firstSelectedValue:null}),this.fireHoverValueChange([]),this.__emit("select",t,r)),vt(this,"selectedValue")||this.setState({sSelectedValue:t})},fireValueChange:function(t){vt(this,"value")||this.setState({sValue:t}),this.__emit("valueChange",t)},fireHoverValueChange:function(t){vt(this,"hoverValue")||this.setState({sHoverValue:t}),this.__emit("hoverChange",t)},clear:function(){this.fireSelectValueChange([],!0),this.__emit("clear")},disabledStartTime:function(t){return this.disabledTime(t,"start")},disabledEndTime:function(t){return this.disabledTime(t,"end")},disabledStartMonth:function(t){var n=this.sValue;return t.isAfter(n[1],"month")},disabledEndMonth:function(t){var n=this.sValue;return t.isBefore(n[0],"month")}},render:function(){var t,n,r=Qe(this),a=r.prefixCls,o=r.dateInputPlaceholder,i=r.timePicker,l=r.showOk,s=r.locale,c=r.showClear,d=r.showToday,f=r.type,p=r.seperator,v=We(this,"clearIcon"),m=this.sHoverValue,y=this.sSelectedValue,b=this.sMode,C=this.sShowTimePicker,S=this.sValue,w=(t={},V(t,this.$attrs.class,!!this.$attrs.class),V(t,a,1),V(t,"".concat(a,"-hidden"),!r.visible),V(t,"".concat(a,"-range"),1),V(t,"".concat(a,"-show-time-picker"),C),V(t,"".concat(a,"-week-number"),r.showWeekNumber),t),k=P(P({},r),this.$attrs),$={selectedValue:y,onSelect:this.onSelect,onDayHover:f==="start"&&y[1]||f==="end"&&y[0]||!!m.length?this.onDayHover:Vh},O,T;if(o)if(Array.isArray(o)){var _=fn(o,2);O=_[0],T=_[1]}else O=T=o;var I=l===!0||l!==!1&&!!i,L=(n={},V(n,"".concat(a,"-footer"),!0),V(n,"".concat(a,"-range-bottom"),!0),V(n,"".concat(a,"-footer-show-ok"),I),n),j=this.getStartValue(),F=this.getEndValue(),N=hl(j),D=N.month(),z=N.year(),B=j.year()===z&&j.month()===D||F.year()===z&&F.month()===D,M=j.clone().add(1,"months"),E=M.year()===F.year()&&M.month()===F.month(),K=P(P(P({},k),$),{hoverValue:m,direction:"left",disabledTime:this.disabledStartTime,disabledMonth:this.disabledStartMonth,format:this.getFormat(),value:j,mode:b[0],placeholder:O,showDateInput:this.showDateInput,timePicker:i,showTimePicker:C||b[0]==="time",enablePrev:!0,enableNext:!E||this.isMonthYearPanelShow(b[1]),clearIcon:v,onInputChange:this.onStartInputChange,onInputSelect:this.onStartInputSelect,onValueChange:this.onStartValueChange,onPanelChange:this.onStartPanelChange}),W=P(P(P({},k),$),{hoverValue:m,direction:"right",format:this.getFormat(),timePickerDisabledTime:this.getEndDisableTime(),placeholder:T,value:F,mode:b[1],showDateInput:this.showDateInput,timePicker:i,showTimePicker:C||b[1]==="time",disabledTime:this.disabledEndTime,disabledMonth:this.disabledEndMonth,enablePrev:!E||this.isMonthYearPanelShow(b[0]),enableNext:!0,clearIcon:v,onInputChange:this.onEndInputChange,onInputSelect:this.onEndInputSelect,onValueChange:this.onEndValueChange,onPanelChange:this.onEndPanelChange}),Y=null;if(d){var q=P(P({},k),{disabled:B,value:S[0],text:s.backToToday,onToday:this.onToday});Y=g(VD,le({key:"todayButton"},q),null)}var J=null;if(r.timePicker){var ne=P(P({},k),{showTimePicker:C||b[0]==="time"&&b[1]==="time",timePickerDisabled:!this.hasSelectedValue()||m.length,onOpenTimePicker:this.onOpenTimePicker,onCloseTimePicker:this.onCloseTimePicker});J=g(KD,le({key:"timePickerButton"},ne),null)}var oe=null;if(I){var Q=P(P({},k),{okDisabled:!this.isAllowedDateAndTime(y)||!this.hasSelectedValue()||m.length,onOk:this.onOk});oe=g(HD,le({key:"okButtonNode"},Q),null)}var ae=this.renderFooter(b);return g("div",{ref:"rootInstance",class:w,tabindex:"0",onKeydown:this.onKeyDown},[r.renderSidebar(),g("div",{class:"".concat(a,"-panel")},[c&&y[0]&&y[1]?g("a",{role:"button",title:s.clear,onClick:this.clear},[v||g("span",{class:"".concat(a,"-clear-btn")},null)]):null,g("div",{class:"".concat(a,"-date-panel"),onMouseleave:f!=="both"?this.onDatePanelLeave:Vh,onMouseenter:f!=="both"?this.onDatePanelEnter:Vh},[g(ST,K,null),g("span",{class:"".concat(a,"-range-middle")},[p]),g(ST,W,null)]),g("div",{class:L},[d||r.timePicker||I||ae?g("div",{class:"".concat(a,"-footer-btn")},[ae,Y,J,oe]):null])])])}}),jxe=Hxe,Kxe=G({name:"ACheckableTag",props:{prefixCls:u.string,checked:u.looseBool,onChange:{type:Function},onClick:{type:Function}},emits:["update:checked","change","click"],setup:function(t,n){var r=n.slots,a=n.emit,o=Wt("tag",t),i=o.prefixCls,l=function(d){var f=t.checked;a("update:checked",!f),a("change",!f),a("click",d)},s=x(function(){var c;return Se(i.value,(c={},V(c,"".concat(i.value,"-checkable"),!0),V(c,"".concat(i.value,"-checkable-checked"),t.checked),c))});return function(){var c;return g("span",{class:s.value,onClick:l},[(c=r.default)===null||c===void 0?void 0:c.call(r)])}}}),lm=Kxe,Wxe=new RegExp("^(".concat(B2.join("|"),")(-inverse)?$")),Uxe=new RegExp("^(".concat(iOe.join("|"),")$")),Yxe={prefixCls:u.string,color:{type:String},closable:u.looseBool.def(!1),closeIcon:u.VNodeChild,visible:u.looseBool,onClose:{type:Function},icon:u.VNodeChild},Td=G({name:"ATag",props:Yxe,emits:["update:visible","close"],slots:["closeIcon","icon"],setup:function(t,n){var r=n.slots,a=n.emit,o=n.attrs,i=Wt("tag",t),l=i.prefixCls,s=i.direction,c=H(!0);Wn(function(){t.visible!==void 0&&(c.value=t.visible)});var d=function(m){m.stopPropagation(),a("update:visible",!1),a("close",m),!m.defaultPrevented&&t.visible===void 0&&(c.value=!1)},f=x(function(){var v=t.color;return v?Wxe.test(v)||Uxe.test(v):!1}),p=x(function(){var v;return Se(l.value,(v={},V(v,"".concat(l.value,"-").concat(t.color),f.value),V(v,"".concat(l.value,"-has-color"),t.color&&!f.value),V(v,"".concat(l.value,"-hidden"),!c.value),V(v,"".concat(l.value,"-rtl"),s.value==="rtl"),v))});return function(){var v,m,y,b=t.icon,C=b===void 0?(v=r.icon)===null||v===void 0?void 0:v.call(r):b,S=t.color,w=t.closeIcon,k=w===void 0?(m=r.closeIcon)===null||m===void 0?void 0:m.call(r):w,$=t.closable,O=$===void 0?!1:$,T=function(){return O?k?g("div",{class:"".concat(l.value,"-close-icon"),onClick:d},[k]):g(go,{class:"".concat(l.value,"-close-icon"),onClick:d},null):null},_={backgroundColor:S&&!f.value?S:void 0},I=C||null,L=(y=r.default)===null||y===void 0?void 0:y.call(r),j=I?g(Fe,null,[I,g("span",null,[L])]):L,F="onClick"in o,N=g("span",{class:p.value,style:_},[j,T()]);return F?g(C2,null,{default:function(){return[N]}}):N}}});Td.CheckableTag=lm;Td.install=function(e){return e.component(Td.name,Td),e.component(lm.name,lm),e};var e7=Td,_g={name:u.string,transitionName:u.string,prefixCls:u.string,inputPrefixCls:u.string,format:u.oneOfType([u.string,u.array,u.func]),disabled:u.looseBool,allowClear:u.looseBool,suffixIcon:u.any,popupStyle:u.object,dropdownClassName:u.string,locale:u.any,localeCode:u.string,size:u.oneOf(rt("large","small","default")),getCalendarContainer:u.func,open:u.looseBool,disabledDate:u.func,showToday:u.looseBool,dateRender:u.any,pickerClass:u.string,pickerInputClass:u.string,timePicker:u.any,autofocus:u.looseBool,tagPrefixCls:u.string,tabindex:u.oneOfType([u.string,u.number]),align:u.object.def(function(){return{}}),inputReadOnly:u.looseBool,valueFormat:u.string,onOpenChange:u.func,onFocus:u.func,onBlur:u.func,onMouseenter:u.func,onMouseleave:u.func},sS={value:{type:[String,Object]},defaultValue:{type:[String,Object]},defaultPickerValue:{type:[String,Object]},renderExtraFooter:u.any,placeholder:u.string,onChange:u.func},OT=P(P(P({},_g),sS),{showTime:an(u.oneOfType([u.object,u.looseBool])),open:u.looseBool,disabledTime:u.func,mode:u.oneOf(rt("time","date","month","year","decade")),onOpenChange:u.func,onPanelChange:u.func,onOk:u.func}),PT=P(P(P({},_g),sS),{placeholder:u.string,monthCellContentRender:u.func}),t7=P(P({},_g),{tagPrefixCls:u.string,value:{type:Array},defaultValue:{type:Array},defaultPickerValue:{type:Array},timePicker:u.any,showTime:an(u.oneOfType([u.object,u.looseBool])),ranges:u.object,placeholder:u.arrayOf(String),mode:u.oneOfType([u.string,u.arrayOf(String)]),separator:u.any,disabledTime:u.func,showToday:u.looseBool,renderExtraFooter:u.any,onChange:u.func,onCalendarChange:u.func,onOk:u.func,onPanelChange:u.func,onMouseenter:u.func,onMouseleave:u.func}),n7=P(P(P({},_g),sS),{placeholder:u.string}),r7=function(t,n){var r,a=n.attrs,o,i,l=a.suffixIcon,s=a.prefixCls;return(l&&zn(l)?Ot(l,{class:Se((r={},V(r,(o=l.props)===null||o===void 0?void 0:o.class,(i=l.props)===null||i===void 0?void 0:i.class),V(r,"".concat(s,"-picker-icon"),!0),r))}):g("span",{class:"".concat(s,"-picker-icon")},[l]))||g(XD,{class:"".concat(s,"-picker-icon")},null)};r7.inheritAttrs=!1;var a7=r7;function lb(e,t){var n=fn(e,2),r=n[0],a=n[1];if(!(!r&&!a)){if(t&&t[0]==="month")return[r,a];var o=a&&a.isSame(r,"month")?a.clone().add(1,"month"):a;return[r,o]}}function qxe(e){if(!!e)return Array.isArray(e)?e:[e,e.clone().add(1,"month")]}function Gxe(e){return Array.isArray(e)?e.length===0||e.every(function(t){return!t}):!1}function TT(e,t){if(!!t&&!(!e||e.length===0)){var n=fn(e,2),r=n[0],a=n[1];r&&r.locale(t),a&&a.locale(t)}}var Xxe=G({name:"ARangePicker",mixins:[nt],inheritAttrs:!1,props:Rn(t7,{allowClear:!0,showToday:!1,separator:"~"}),setup:function(){return{configProvider:ve("configProvider",St),picker:null,sTagPrefixCls:void 0,sPrefixCls:""}},data:function(){var t=this.value||this.defaultValue||[],n=fn(t,2),r=n[0],a=n[1];if(r&&!Hr(Ue).isMoment(r)||a&&!Hr(Ue).isMoment(a))throw new Error("The value/defaultValue of RangePicker must be a moment object array after `antd@2.0`, see: https://u.ant.design/date-picker-value");var o=!t||Gxe(t)?this.defaultPickerValue:t;return{sValue:t,sShowDate:qxe(o||Hr(Ue)()),sOpen:this.open,sHoverValue:[]}},watch:{value:function(t){var n=t||[],r={sValue:n};Mr(t,this.sValue)||(r=P(P({},r),{sShowDate:lb(n,this.mode)||this.sShowDate})),this.setState(r)},open:function(t){var n={sOpen:t};this.setState(n)},sOpen:function(t,n){var r=this;Ne(function(){!vt(r,"open")&&n&&!t&&r.focus()})}},methods:{setValue:function(t,n){this.handleChange(t),(n||!this.showTime)&&!vt(this,"open")&&this.setState({sOpen:!1})},savePicker:function(t){this.picker=t},clearSelection:function(t){t.preventDefault(),t.stopPropagation(),this.setState({sValue:[]}),this.handleChange([])},clearHoverValue:function(){this.setState({sHoverValue:[]})},handleChange:function(t){vt(this,"value")||this.setState(function(o){var i=o.sShowDate;return{sValue:t,sShowDate:lb(t)||i}}),t[0]&&t[1]&&t[0].diff(t[1])>0&&(t[1]=void 0);var n=fn(t,2),r=n[0],a=n[1];this.$emit("change",t,[Cu(r,this.format),Cu(a,this.format)])},handleOpenChange:function(t){vt(this,"open")||this.setState({sOpen:t}),t===!1&&this.clearHoverValue(),this.$emit("openChange",t)},handleShowDateChange:function(t){this.setState({sShowDate:t})},handleHoverChange:function(t){this.setState({sHoverValue:t})},handleRangeMouseLeave:function(){this.sOpen&&this.clearHoverValue()},handleCalendarInputSelect:function(t){var n=fn(t,1),r=n[0];!r||this.setState(function(a){var o=a.sShowDate;return{sValue:t,sShowDate:lb(t)||o}})},handleRangeClick:function(t){typeof t=="function"&&(t=t()),this.setValue(t,!0),this.$emit("ok",t),this.$emit("openChange",!1)},onMouseEnter:function(t){this.$emit("mouseenter",t)},onMouseLeave:function(t){this.$emit("mouseleave",t)},focus:function(){this.picker.focus()},blur:function(){this.picker.blur()},renderFooter:function(){var t=this,n=this.ranges,r=this.$slots,a=this.sPrefixCls,o=this.sTagPrefixCls,i=this.renderExtraFooter||r.renderExtraFooter;if(!n&&!i)return null;var l=i?g("div",{class:"".concat(a,"-footer-extra"),key:"extra"},[typeof i=="function"?i():i]):null,s=n&&Object.keys(n).map(function(d){var f=n[d],p=typeof f=="function"?f.call(t):f;return g(e7,{key:d,prefixCls:o,color:"blue",onClick:function(){return t.handleRangeClick(f)},onMouseenter:function(){return t.setState({sHoverValue:p})},onMouseleave:t.handleRangeMouseLeave},{default:function(){return[d]}})}),c=s&&s.length>0?g("div",{class:"".concat(a,"-footer-extra ").concat(a,"-range-quick-selector"),key:"range"},[s]):null;return[c,l]}},render:function(){var t,n=this,r=P(P({},Qe(this)),this.$attrs),a=We(this,"suffixIcon");a=Array.isArray(a)?a[0]:a;var o=this.sValue,i=this.sShowDate,l=this.sHoverValue,s=this.sOpen,c=this.$slots,d=r.prefixCls,f=r.tagPrefixCls,p=r.popupStyle,v=r.disabledDate,m=r.disabledTime,y=r.showTime,b=r.showToday,C=r.ranges,S=r.locale,w=r.localeCode,k=r.format,$=r.separator,O=r.inputReadOnly,T=r.style,_=r.onCalendarChange,I=r.onOk,L=r.onBlur,j=r.onFocus,F=r.onPanelChange,N=this.configProvider.getPrefixCls,D=N("calendar",d),z=N("tag",f);this.sPrefixCls=D,this.sTagPrefixCls=z;var B=r.dateRender||c.dateRender;TT(o,w),TT(i,w);var M=Se((t={},V(t,"".concat(D,"-time"),y),V(t,"".concat(D,"-range-with-ranges"),C),t)),E={onChange:this.handleChange},K={onOk:this.handleChange};r.timePicker?E.onChange=function(Be){return n.handleChange(Be)}:K={},"mode"in r&&(K.mode=r.mode);var W=Array.isArray(r.placeholder)?r.placeholder[0]:S.lang.rangePlaceholder[0],Y=Array.isArray(r.placeholder)?r.placeholder[1]:S.lang.rangePlaceholder[1],q=P(P({},K),{separator:$,format:k,prefixCls:D,renderFooter:this.renderFooter,timePicker:r.timePicker,disabledDate:v,disabledTime:m,dateInputPlaceholder:[W,Y],locale:S.lang,dateRender:B,value:i,hoverValue:l,showToday:b,inputReadOnly:O,onChange:_,onOk:I,onValueChange:this.handleShowDateChange,onHoverChange:this.handleHoverChange,onPanelChange:F,onInputSelect:this.handleCalendarInputSelect,class:M}),J=g(jxe,q,c),ne={};r.showTime&&(ne.width="350px");var oe=fn(o,2),Q=oe[0],ae=oe[1],de=!r.disabled&&r.allowClear&&o&&(Q||ae)?g(Yr,{class:"".concat(D,"-picker-clear"),onClick:this.clearSelection},null):null,be=g(a7,{suffixIcon:a,prefixCls:D},null),Ee=function(te){var ie=te.value,ge=fn(ie,2),ke=ge[0],xe=ge[1];return g("span",{class:r.pickerInputClass},[g("input",{disabled:r.disabled,readonly:!0,value:Cu(ke,r.format),placeholder:W,class:"".concat(D,"-range-picker-input"),tabindex:-1},null),g("span",{class:"".concat(D,"-range-picker-separator")},[yt(" "),$,yt(" ")]),g("input",{disabled:r.disabled,readonly:!0,value:Cu(xe,r.format),placeholder:Y,class:"".concat(D,"-range-picker-input"),tabindex:-1},null),de,be])},Pe=P(P(P({},r),E),{calendar:J,value:o,open:s,prefixCls:"".concat(D,"-picker-container"),onOpenChange:this.handleOpenChange,style:p});return g("span",le({ref:this.savePicker,id:r.id,class:Se(r.class,r.pickerClass),style:P(P({},ne),T),tabindex:r.disabled?-1:0,onFocus:j,onBlur:L,onMouseenter:this.onMouseEnter,onMouseleave:this.onMouseLeave},eg(r)),[g(oS,Pe,P({default:Ee},c))])}});function Zxe(e,t){return e&&e.format(t)||""}function xT(){}var Jxe=G({name:"AWeekPicker",mixins:[nt],inheritAttrs:!1,props:Rn(n7,{allowClear:!0}),setup:function(){return{configProvider:ve("configProvider",St),prevState:{},input:void 0,sPrefixCls:void 0}},data:function(){var t=this.value||this.defaultValue;if(t&&!Hr(Ue).isMoment(t))throw new Error("The value/defaultValue of WeekPicker or MonthPicker must be a moment object");return{_value:t,_open:this.open}},watch:{value:function(t){var n={_value:t};this.setState(n),this.prevState=P(P({},this.$data),n)},open:function(t){var n={_open:t};this.setState(n),this.prevState=P(P({},this.$data),n)},_open:function(t,n){var r=this;Ne(function(){!vt(r,"open")&&n&&!t&&r.focus()})}},mounted:function(){this.prevState=P({},this.$data)},updated:function(){var t=this;Ne(function(){!vt(t,"open")&&t.prevState._open&&!t._open&&t.focus()})},methods:{saveInput:function(t){this.input=t},weekDateRender:function(t){var n=t.current,r=this.$data._value,a=this.sPrefixCls,o=this.$slots,i=this.dateRender||o.dateRender,l=i?i({current:n}):n.date();return r&&n.year()===r.year()&&n.week()===r.week()?g("div",{class:"".concat(a,"-selected-day")},[g("div",{class:"".concat(a,"-date")},[l])]):g("div",{class:"".concat(a,"-date")},[l])},handleChange:function(t){vt(this,"value")||this.setState({_value:t}),this.$emit("change",t,Zxe(t,this.format))},handleOpenChange:function(t){vt(this,"open")||this.setState({_open:t}),this.$emit("openChange",t)},clearSelection:function(t){t.preventDefault(),t.stopPropagation(),this.handleChange(null)},focus:function(){this.input.focus()},blur:function(){this.input.blur()},renderFooter:function(){var t=this.sPrefixCls,n=this.$slots,r=this.renderExtraFooter||n.renderExtraFooter;return r?g("div",{class:"".concat(t,"-footer-extra")},[r.apply(void 0,arguments)]):null}},render:function(){var t=this,n=P(P({},Qe(this)),this.$attrs),r=We(this,"suffixIcon");r=Array.isArray(r)?r[0]:r;var a=this.prefixCls,o=this.disabled,i=this.pickerClass,l=this.popupStyle,s=this.pickerInputClass,c=this.format,d=this.allowClear,f=this.locale,p=this.localeCode,v=this.disabledDate,m=this.defaultPickerValue,y=this.$data,b=this.$slots,C=this.configProvider.getPrefixCls,S=C("calendar",a);this.sPrefixCls=S;var w=y._value,k=y._open,$=n.class,O=n.style,T=n.id,_=n.onFocus,I=_===void 0?xT:_,L=n.onBlur,j=L===void 0?xT:L;w&&p&&w.locale(p);var F=vt(this,"placeholder")?this.placeholder:f.lang.placeholder,N=this.dateRender||b.dateRender||this.weekDateRender,D=g(qD,{showWeekNumber:!0,dateRender:N,prefixCls:S,format:c,locale:f.lang,showDateInput:!1,showToday:!1,disabledDate:v,renderFooter:this.renderFooter,defaultValue:m},null),z=!o&&d&&y._value?g(Yr,{class:"".concat(S,"-picker-clear"),onClick:this.clearSelection},null):null,B=g(a7,{suffixIcon:r,prefixCls:S},null),M=function(W){var Y=W.value;return g("span",{style:{display:"inline-block",width:"100%"}},[g("input",{ref:t.saveInput,disabled:o,readonly:!0,value:Y&&Y.format(c)||"",placeholder:F,class:s,onFocus:I,onBlur:j},null),z,B])},E=P(P({},n),{calendar:D,prefixCls:"".concat(S,"-picker-container"),value:w,open:k,onChange:this.handleChange,onOpenChange:this.handleOpenChange,style:l});return g("span",le({class:Se($,i),style:O,id:T},eg(n)),[g(oS,E,P({default:M},b))])}}),o7=xg(Xxe,t7,"date"),i7=xg(Jxe,n7,"week"),Ya=xg(ZD(qD,OT,"ADatePicker"),OT,"date"),l7=xg(ZD(GD,PT,"AMonthPicker"),PT,"month");P(Ya,{RangePicker:o7,MonthPicker:l7,WeekPicker:i7});Ya.install=function(e){return e.component(Ya.name,Ya),e.component(Ya.RangePicker.name,Ya.RangePicker),e.component(Ya.MonthPicker.name,Ya.MonthPicker),e.component(Ya.WeekPicker.name,Ya.WeekPicker),e};var Qxe=Ya;function Hh(e){return e!=null}var e_e=function(t){var n=t.itemPrefixCls,r=t.component,a=t.span,o=t.labelStyle,i=t.contentStyle,l=t.bordered,s=t.label,c=t.content,d=t.colon,f=r;if(l){var p;return g(f,{class:[(p={},V(p,"".concat(n,"-item-label"),Hh(s)),V(p,"".concat(n,"-item-content"),Hh(c)),p)],colSpan:a},{default:function(){return[Hh(s)&&g("span",{style:o},[s]),Hh(c)&&g("span",{style:i},[c])]}})}return g(f,{class:["".concat(n,"-item")],colSpan:a},{default:function(){return[g("div",{class:"".concat(n,"-item-container")},[s&&g("span",{class:["".concat(n,"-item-label"),V({},"".concat(n,"-item-no-colon"),!d)],style:o},[s]),c&&g("span",{class:"".concat(n,"-item-content"),style:i},[c])])]}})},sb=e_e,t_e=function(t){var n=function(p,v,m){var y=v.colon,b=v.prefixCls,C=v.bordered,S=m.component,w=m.type,k=m.showLabel,$=m.showContent,O=m.labelStyle,T=m.contentStyle;return p.map(function(_,I){var L,j,F=_.props||{},N=F.prefixCls,D=N===void 0?b:N,z=F.span,B=z===void 0?1:z,M=F.labelStyle,E=F.contentStyle,K=F.label,W=K===void 0?(j=(L=_.children)===null||L===void 0?void 0:L.label)===null||j===void 0?void 0:j.call(L):K,Y=ht(_),q=o0e(_),J=qI(_),ne=_.key;return typeof S=="string"?g(sb,{key:"".concat(w,"-").concat(String(ne)||I),class:q,style:J,labelStyle:P(P({},O.value),M),contentStyle:P(P({},T.value),E),span:B,colon:y,component:S,itemPrefixCls:D,bordered:C,label:k?W:null,content:$?Y:null},null):[g(sb,{key:"label-".concat(String(ne)||I),class:q,style:P(P(P({},O.value),J),M),span:1,colon:y,component:S[0],itemPrefixCls:D,bordered:C,label:W},null),g(sb,{key:"content-".concat(String(ne)||I),class:q,style:P(P(P({},T.value),J),E),span:B*2-1,component:S[1],itemPrefixCls:D,bordered:C,content:Y},null)]})},r=t.prefixCls,a=t.vertical,o=t.row,i=t.index,l=t.bordered,s=ve(c7,{labelStyle:H({}),contentStyle:H({})}),c=s.labelStyle,d=s.contentStyle;return a?g(Fe,null,[g("tr",{key:"label-".concat(i),class:"".concat(r,"-row")},[n(o,t,{component:"th",type:"label",showLabel:!0,labelStyle:c,contentStyle:d})]),g("tr",{key:"content-".concat(i),class:"".concat(r,"-row")},[n(o,t,{component:"td",type:"content",showContent:!0,labelStyle:c,contentStyle:d})])]):g("tr",{key:i,class:"".concat(r,"-row")},[n(o,t,{component:l?["th","td"]:"td",type:"item",showLabel:!0,showContent:!0,labelStyle:c,contentStyle:d})])},n_e=t_e;u.string,u.any,u.number;var r_e={prefixCls:u.string,label:u.VNodeChild,labelStyle:u.style,contentStyle:u.style,span:u.number.def(1)},s7=G({name:"ADescriptionsItem",props:r_e,slots:["label"],setup:function(t,n){var r=n.slots;return function(){var a;return(a=r.default)===null||a===void 0?void 0:a.call(r)}}}),u7={xxl:3,xl:3,lg:3,md:3,sm:2,xs:1};function a_e(e,t){if(typeof e=="number")return e;if(kt(e)==="object")for(var n=0;n<ws.length;n++){var r=ws[n];if(t[r]&&e[r]!==void 0)return e[r]||u7[r]}return 3}function _T(e,t,n){var r=e;return(t===void 0||t>n)&&(r=Ot(e,{span:n}),on(t===void 0,"Descriptions","Sum of column `span` in a line not match `column` of Descriptions.")),r}function o_e(e,t){var n=Un(e),r=[],a=[],o=t;return n.forEach(function(i,l){var s,c=(s=i.props)===null||s===void 0?void 0:s.span,d=c||1;if(l===n.length-1){a.push(_T(i,c,o)),r.push(a);return}d<o?(o-=d,a.push(i)):(a.push(_T(i,d,o)),r.push(a),o=t,a=[])}),r}var i_e={prefixCls:u.string,bordered:u.looseBool,size:u.oneOf(rt("default","middle","small")).def("default"),title:u.VNodeChild,extra:u.VNodeChild,column:{type:[Number,Object],default:function(){return u7}},layout:u.oneOf(rt("horizontal","vertical")),colon:u.looseBool,labelStyle:u.style,contentStyle:u.style},c7=Symbol("descriptionsContext"),hu=G({name:"ADescriptions",props:i_e,slots:["title","extra"],Item:s7,setup:function(t,n){var r=n.slots,a=Wt("descriptions",t),o=a.prefixCls,i=a.direction,l,s=H({});et(function(){l=lc.subscribe(function(d){kt(t.column)==="object"&&(s.value=d)})}),Lt(function(){lc.unsubscribe(l)}),ot(c7,{labelStyle:yn(t,"labelStyle"),contentStyle:yn(t,"contentStyle")});var c=x(function(){return a_e(t.column,s.value)});return function(){var d,f,p,v,m=t.size,y=t.bordered,b=y===void 0?!1:y,C=t.layout,S=C===void 0?"horizontal":C,w=t.colon,k=w===void 0?!0:w,$=t.title,O=$===void 0?(f=r.title)===null||f===void 0?void 0:f.call(r):$,T=t.extra,_=T===void 0?(p=r.extra)===null||p===void 0?void 0:p.call(r):T,I=(v=r.default)===null||v===void 0?void 0:v.call(r),L=o_e(I,c.value);return g("div",{class:[o.value,(d={},V(d,"".concat(o.value,"-").concat(m),m!=="default"),V(d,"".concat(o.value,"-bordered"),!!b),V(d,"".concat(o.value,"-rtl"),i.value==="rtl"),d)]},[(O||_)&&g("div",{class:"".concat(o.value,"-header")},[O&&g("div",{class:"".concat(o.value,"-title")},[O]),_&&g("div",{class:"".concat(o.value,"-extra")},[_])]),g("div",{class:"".concat(o.value,"-view")},[g("table",null,[g("tbody",null,[L.map(function(j,F){return g(n_e,{key:F,index:F,colon:k,prefixCls:o.value,vertical:S==="vertical",bordered:b,row:j},null)})])])])])}}});hu.install=function(e){return e.component(hu.name,hu),e.component(hu.Item.name,hu.Item),e};var l_e=hu,s_e={prefixCls:String,type:{type:String,default:"horizontal"},dashed:{type:Boolean,default:!1},orientation:{type:String,default:"center"},plain:{type:Boolean,default:!1}},u_e=G({name:"ADivider",props:s_e,setup:function(t,n){var r=n.slots,a=ve("configProvider",St),o=x(function(){return a.getPrefixCls("divider",t.prefixCls)}),i=x(function(){var s,c=t.type,d=t.dashed,f=t.plain,p=o.value;return s={},V(s,p,!0),V(s,"".concat(p,"-").concat(c),!0),V(s,"".concat(p,"-dashed"),!!d),V(s,"".concat(p,"-plain"),!!f),V(s,"".concat(p,"-rtl"),a.direction==="rtl"),s}),l=x(function(){return t.orientation.length>0?"-"+t.orientation:t.orientation});return function(){var s,c=Un((s=r.default)===null||s===void 0?void 0:s.call(r));return g("div",{class:[i.value,c.length?"".concat(o.value,"-with-text ").concat(o.value,"-with-text").concat(l.value):""],role:"separator"},[c.length?g("span",{class:"".concat(o.value,"-inner-text")},[c]):null])}}}),c_e=kn(u_e);hi.Button=cf;hi.install=function(e){return e.component(hi.name,hi),e.component(cf.name,cf),e};var ub;function d7(e){if(e||ub===void 0){var t=document.createElement("div");t.style.width="100%",t.style.height="200px";var n=document.createElement("div"),r=n.style;r.position="absolute",r.top=0,r.left=0,r.pointerEvents="none",r.visibility="hidden",r.width="200px",r.height="150px",r.overflow="hidden",n.appendChild(t),document.body.appendChild(n);var a=t.offsetWidth;n.style.overflow="scroll";var o=t.offsetWidth;a===o&&(o=n.clientWidth),document.body.removeChild(n),ub=a-o}return ub}var f7={width:u.any,height:u.any,defaultOpen:u.looseBool,firstEnter:u.looseBool,open:u.looseBool,prefixCls:u.string,placement:u.string,level:u.oneOfType([u.string,u.array]),levelMove:u.oneOfType([u.number,u.func,u.array]),ease:u.string,duration:u.string,handler:u.any,showMask:u.looseBool,maskStyle:u.object,className:u.string,wrapStyle:u.object,maskClosable:u.looseBool,afterVisibleChange:u.func,keyboard:u.looseBool},d_e=P(P({},f7),{wrapperClassName:u.string,forceRender:u.looseBool,getContainer:u.oneOfType([u.string,u.func,u.object,u.looseBool])});P(P({},f7),{getContainer:u.func,getOpenCount:u.func,switchScrollingEffect:u.func});function f_e(e){return Array.isArray(e)?e:[e]}var h7={transition:"transitionend",WebkitTransition:"webkitTransitionEnd",MozTransition:"transitionend",OTransition:"oTransitionEnd otransitionend"},p7=Object.keys(h7).filter(function(e){if(typeof document=="undefined")return!1;var t=document.getElementsByTagName("html")[0];return e in(t?t.style:{})})[0],ET=h7[p7];function MT(e,t,n,r){e.addEventListener?e.addEventListener(t,n,r):e.attachEvent&&e.attachEvent("on".concat(t),n)}function IT(e,t,n,r){e.removeEventListener?e.removeEventListener(t,n,r):e.attachEvent&&e.detachEvent("on".concat(t),n)}function h_e(e,t){var n;return typeof e=="function"?n=e(t):n=e,Array.isArray(n)?n.length===2?n:[n[0],n[1]]:[n]}var NT=function(t){return!isNaN(parseFloat(t))&&isFinite(t)},p_e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function jh(){}var qc={},Kh=!(typeof window!="undefined"&&window.document&&window.document.createElement),v_e=G({name:"Drawer",mixins:[nt],inheritAttrs:!1,props:An(d_e,{prefixCls:"drawer",placement:"left",getContainer:"body",level:"all",duration:".3s",ease:"cubic-bezier(0.78, 0.14, 0.15, 0.86)",firstEnter:!1,showMask:!0,handler:!0,maskStyle:{},wrapperClassName:""}),data:function(){this.levelDom=[],this.contentDom=null,this.maskDom=null,this.handlerdom=null,this.mousePos=null,this.sFirstEnter=this.firstEnter,this.timeout=null,this.children=null,this.dom=null,this.drawerId=Number((Date.now()+Math.random()).toString().replace(".",Math.round(Math.random()*9))).toString(16);var t=this.open!==void 0?this.open:!!this.defaultOpen;return qc[this.drawerId]=t,this.orignalOpen=this.open,this.preProps=P({},this.$props),{sOpen:t,isOpenChange:void 0,passive:void 0,container:void 0}},watch:{open:function(e){function t(n){return e.apply(this,arguments)}return t.toString=function(){return e.toString()},t}(function(e){var t=this;e!==void 0&&e!==this.preProps.open&&(this.isOpenChange=!0,this.container||this.getDefault(this.$props),this.setState({sOpen:open})),this.preProps.open=e,e&&setTimeout(function(){t.domFocus()})}),placement:function(t){t!==this.preProps.placement&&(this.contentDom=null),this.preProps.placement=t},level:function(t){this.preProps.level!==t&&this.getParentAndLevelDom(this.$props),this.preProps.level=t}},mounted:function(){var t=this;Ne(function(){Kh||(t.passive=mn?{passive:!1}:!1);var n=t.getOpen();(t.handler||n||t.sFirstEnter)&&(t.getDefault(t.$props),n&&(t.isOpenChange=!0,Ne(function(){t.domFocus()})),t.$forceUpdate())})},updated:function(){var t=this;Ne(function(){!t.sFirstEnter&&t.container&&(t.$forceUpdate(),t.sFirstEnter=!0)})},beforeUnmount:function(){delete qc[this.drawerId],delete this.isOpenChange,this.container&&(this.sOpen&&this.setLevelDomTransform(!1,!0),document.body.style.overflow=""),this.sFirstEnter=!1,clearTimeout(this.timeout)},methods:{domFocus:function(){this.dom&&this.dom.focus()},onKeyDown:function(t){t.keyCode===ze.ESC&&(t.stopPropagation(),this.__emit("close",t))},onMaskTouchEnd:function(t){this.__emit("close",t),this.onTouchEnd(t,!0)},onIconTouchEnd:function(t){this.__emit("handleClick",t),this.onTouchEnd(t)},onTouchEnd:function(t,n){if(this.open===void 0){var r=n||this.sOpen;this.isOpenChange=!0,this.setState({sOpen:!r})}},onWrapperTransitionEnd:function(t){if(t.target===this.contentWrapper&&t.propertyName.match(/transform$/)){var n=this.getOpen();this.dom.style.transition="",!n&&this.getCurrentDrawerSome()&&(document.body.style.overflowX="",this.maskDom&&(this.maskDom.style.left="",this.maskDom.style.width="")),this.afterVisibleChange&&this.afterVisibleChange(!!n)}},getDefault:function(t){this.getParentAndLevelDom(t),(t.getContainer||t.parent)&&(this.container=this.defaultGetContainer())},getCurrentDrawerSome:function(){return!Object.keys(qc).some(function(t){return qc[t]})},getSelfContainer:function(){return this.container},getParentAndLevelDom:function(t){var n=this;if(!Kh){var r=t.level,a=t.getContainer;if(this.levelDom=[],a){if(typeof a=="string"){var o=document.querySelectorAll(a)[0];this.parent=o}typeof a=="function"&&(this.parent=a()),kt(a)==="object"&&a instanceof window.HTMLElement&&(this.parent=a)}if(!a&&this.container&&(this.parent=this.container.parentNode),r==="all"){var i=Array.prototype.slice.call(this.parent.children);i.forEach(function(l){l.nodeName!=="SCRIPT"&&l.nodeName!=="STYLE"&&l.nodeName!=="LINK"&&l!==n.container&&n.levelDom.push(l)})}else r&&f_e(r).forEach(function(l){document.querySelectorAll(l).forEach(function(s){n.levelDom.push(s)})})}},setLevelDomTransform:function(t,n,r,a){var o=this,i=this.$props,l=i.placement,s=i.levelMove,c=i.duration,d=i.ease,f=i.getContainer;if(!Kh&&(this.levelDom.forEach(function(w){if(w&&(o.isOpenChange||n)){w.style.transition="transform ".concat(c," ").concat(d),MT(w,ET,o.trnasitionEnd);var k=t?a:0;if(s){var $=h_e(s,{target:w,open:t});k=t?$[0]:$[1]||0}var O=typeof k=="number"?"".concat(k,"px"):k,T=l==="left"||l==="top"?O:"-".concat(O);w.style.transform=k?"".concat(r,"(").concat(T,")"):"",w.style.msTransform=k?"".concat(r,"(").concat(T,")"):""}}),f==="body")){var p=["touchstart"],v=[document.body,this.maskDom,this.handlerdom,this.contentDom],m=document.body.scrollHeight>(window.innerHeight||document.documentElement.clientHeight)&&window.innerWidth>document.body.offsetWidth?d7(1):0,y="width ".concat(c," ").concat(d),b="transform ".concat(c," ").concat(d);if(t&&document.body.style.overflow!=="hidden"){if(document.body.style.overflow="hidden",m&&(document.body.style.position="relative",document.body.style.width="calc(100% - ".concat(m,"px)"),clearTimeout(this.timeout),this.dom)){switch(this.dom.style.transition="none",l){case"right":this.dom.style.transform="translateX(-".concat(m,"px)"),this.dom.style.msTransform="translateX(-".concat(m,"px)");break;case"top":case"bottom":this.dom.style.width="calc(100% - ".concat(m,"px)"),this.dom.style.transform="translateZ(0)";break}this.timeout=setTimeout(function(){o.dom.style.transition="".concat(b,",").concat(y),o.dom.style.width="",o.dom.style.transform="",o.dom.style.msTransform=""})}v.forEach(function(w,k){!w||MT(w,p[k]||"touchmove",k?o.removeMoveHandler:o.removeStartHandler,o.passive)})}else if(this.getCurrentDrawerSome()){if(document.body.style.overflow="",(this.isOpenChange||n)&&m&&(document.body.style.position="",document.body.style.width="",p7&&(document.body.style.overflowX="hidden"),l==="right"&&this.maskDom&&(this.maskDom.style.left="-".concat(m,"px"),this.maskDom.style.width="calc(100% + ".concat(m,"px)")),clearTimeout(this.timeout),this.dom)){this.dom.style.transition="none";var C;switch(l){case"right":{this.dom.style.transform="translateX(".concat(m,"px)"),this.dom.style.msTransform="translateX(".concat(m,"px)"),this.dom.style.width="100%",y="width 0s ".concat(d," ").concat(c);break}case"top":case"bottom":{this.dom.style.width="calc(100% + ".concat(m,"px)"),this.dom.style.height="100%",this.dom.style.transform="translateZ(0)",C="height 0s ".concat(d," ").concat(c);break}}this.timeout=setTimeout(function(){o.dom.style.transition="".concat(b,",").concat(C?"".concat(C,","):"").concat(y),o.dom.style.transform="",o.dom.style.msTransform="",o.dom.style.width="",o.dom.style.height=""})}v.forEach(function(w,k){!w||IT(w,p[k]||"touchmove",k?o.removeMoveHandler:o.removeStartHandler,o.passive)})}}var S=this.$attrs.onChange;S&&this.isOpenChange&&this.sFirstEnter&&(S(t),this.isOpenChange=!1)},getChildToRender:function(t){var n,r=this,a,o=this.$props,i=o.prefixCls,l=o.placement,s=o.handler,c=o.showMask,d=o.maskStyle,f=o.width,p=o.height,v=o.wrapStyle,m=o.keyboard,y=o.maskClosable,b=this.$attrs,C=b.class,S=b.style,w=p_e(b,["class","style"]),k=ht(this),$=Se(i,(n={},V(n,"".concat(i,"-").concat(l),!0),V(n,"".concat(i,"-open"),t),V(n,"no-mask",!c),V(n,C,C),n)),O=this.isOpenChange,T=l==="left"||l==="right",_="translate".concat(T?"X":"Y"),I=l==="left"||l==="top"?"-100%":"100%",L=t?"":"".concat(_,"(").concat(I,")");if(O===void 0||O){var j=this.contentDom?this.contentDom.getBoundingClientRect()[T?"width":"height"]:0,F=(T?f:p)||j;this.setLevelDomTransform(t,!1,_,F)}var N;if(s!==!1){var D=g("div",{class:"drawer-handle",onClick:function(){}},[g("i",{class:"drawer-handle-icon"},null)]),z=this.handler,B=z||D,M=B.props&&B.props.onClick;N=Ot(B,{onClick:function(Y){M&&M(Y),r.onIconTouchEnd(Y)},ref:function(Y){r.handlerdom=Y}})}var E=P(P({},w),{class:$,onTransitionend:this.onWrapperTransitionEnd,onKeydown:t&&m?this.onKeyDown:jh,style:P(P({},v),S)}),K=(a={},V(a,mn?"onTouchstartPassive":"onTouchstart",t?this.removeStartHandler:jh),V(a,mn?"onTouchmovePassive":"onTouchmove",t?this.removeMoveHandler:jh),a);return g("div",le(le({ref:function(Y){r.dom=Y}},E),{},{tabindex:-1}),[c&&g("div",{key:t,class:"".concat(i,"-mask"),onClick:y?this.onMaskTouchEnd:jh,style:d,ref:function(Y){r.maskDom=Y}},null),g("div",{class:"".concat(i,"-content-wrapper"),style:{transform:L,msTransform:L,width:NT(f)?"".concat(f,"px"):f,height:NT(p)?"".concat(p,"px"):p},ref:function(Y){r.contentWrapper=Y}},[g("div",le({class:"".concat(i,"-content"),ref:function(Y){r.contentDom=Y}},K),[k]),N])])},getOpen:function(){return this.open!==void 0?this.open:this.sOpen},getTouchParentScroll:function(t,n,r,a){if(!n||n===document)return!1;if(n===t.parentNode)return!0;var o=Math.max(Math.abs(r),Math.abs(a))===Math.abs(a),i=Math.max(Math.abs(r),Math.abs(a))===Math.abs(r),l=n.scrollHeight-n.clientHeight,s=n.scrollWidth-n.clientWidth,c=n.scrollTop,d=n.scrollLeft;n.scrollTo&&n.scrollTo(n.scrollLeft+1,n.scrollTop+1);var f=n.scrollTop,p=n.scrollLeft;return n.scrollTo&&n.scrollTo(n.scrollLeft-1,n.scrollTop-1),o&&(!l||!(f-c)||l&&(n.scrollTop>=l&&a<0||n.scrollTop<=0&&a>0))||i&&(!s||!(p-d)||s&&(n.scrollLeft>=s&&r<0||n.scrollLeft<=0&&r>0))?this.getTouchParentScroll(t,n.parentNode,r,a):!1},removeStartHandler:function(t){t.touches.length>1||(this.startPos={x:t.touches[0].clientX,y:t.touches[0].clientY})},removeMoveHandler:function(t){if(!(t.changedTouches.length>1)){var n=t.currentTarget,r=t.changedTouches[0].clientX-this.startPos.x,a=t.changedTouches[0].clientY-this.startPos.y;(n===this.maskDom||n===this.handlerdom||n===this.contentDom&&this.getTouchParentScroll(n,t.target,r,a))&&t.preventDefault()}},trnasitionEnd:function(t){IT(t.target,ET,this.trnasitionEnd),t.target.style.transition=""},defaultGetContainer:function(){if(Kh)return null;var t=document.createElement("div");return this.parent.appendChild(t),this.wrapperClassName&&(t.className=this.wrapperClassName),t}},render:function(){var t=this,n=this.$props,r=n.getContainer,a=n.wrapperClassName,o=n.handler,i=n.forceRender,l=this.getOpen(),s=null;qc[this.drawerId]=l&&this.container;var c=this.getChildToRender(this.sFirstEnter?l:!1);if(!r)return g("div",{class:a,ref:function(p){t.container=p}},[c]);if(!this.container||!l&&!this.sFirstEnter)return null;var d=!!o||i;return(d||l||this.dom)&&(s=g(Ps,{to:this.getSelfContainer()},{default:function(){return[c]}})),s}}),m_e=v_e,g_e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},y_e=rt("top","right","bottom","left"),b_e=G({name:"ADrawer",mixins:[nt],inheritAttrs:!1,props:{closable:u.looseBool.def(!0),destroyOnClose:u.looseBool,getContainer:u.any,maskClosable:u.looseBool.def(!0),mask:u.looseBool.def(!0),maskStyle:u.object,wrapStyle:u.object,bodyStyle:u.object,headerStyle:u.object,drawerStyle:u.object,title:u.VNodeChild,visible:u.looseBool,width:u.oneOfType([u.string,u.number]).def(256),height:u.oneOfType([u.string,u.number]).def(256),zIndex:u.number,prefixCls:u.string,placement:u.oneOf(y_e).def("right"),level:u.any.def(null),wrapClassName:u.string,handle:u.VNodeChild,afterVisibleChange:u.func,keyboard:u.looseBool.def(!0),onClose:u.func,"onUpdate:visible":u.func},setup:function(t){var n=ve("configProvider",St);return{configProvider:n,destroyClose:!1,preVisible:t.visible,parentDrawer:ve("parentDrawer",null)}},data:function(){return{sPush:!1}},beforeCreate:function(){ot("parentDrawer",this)},mounted:function(){var t=this.visible;t&&this.parentDrawer&&this.parentDrawer.push()},updated:function(){var t=this;Ne(function(){t.preVisible!==t.visible&&t.parentDrawer&&(t.visible?t.parentDrawer.push():t.parentDrawer.pull()),t.preVisible=t.visible})},beforeUnmount:function(){this.parentDrawer&&this.parentDrawer.pull()},methods:{domFocus:function(){this.$refs.vcDrawer&&this.$refs.vcDrawer.domFocus()},close:function(t){this.$emit("update:visible",!1),this.$emit("close",t)},push:function(){this.setState({sPush:!0})},pull:function(){var t=this;this.setState({sPush:!1},function(){t.domFocus()})},onDestroyTransitionEnd:function(){var t=this.getDestroyOnClose();!t||this.visible||(this.destroyClose=!0,this.$forceUpdate())},getDestroyOnClose:function(){return this.destroyOnClose&&!this.visible},getPushTransform:function(t){if(t==="left"||t==="right")return"translateX(".concat(t==="left"?180:-180,"px)");if(t==="top"||t==="bottom")return"translateY(".concat(t==="top"?180:-180,"px)")},getRcDrawerStyle:function(){var t=this.$props,n=t.zIndex,r=t.placement,a=t.wrapStyle,o=this.$data.sPush;return P({zIndex:n,transform:o?this.getPushTransform(r):void 0},a)},renderHeader:function(t){var n=this.$props,r=n.closable,a=n.headerStyle,o=We(this,"title");if(!o&&!r)return null;var i=o?"".concat(t,"-header"):"".concat(t,"-header-no-title");return g("div",{class:i,style:a},[o&&g("div",{class:"".concat(t,"-title")},[o]),r?this.renderCloseIcon(t):null])},renderCloseIcon:function(t){var n=this.closable;return n&&g("button",{key:"closer",onClick:this.close,"aria-label":"Close",class:"".concat(t,"-close")},[g(go,null,null)])},renderBody:function(t){var n,r;if(this.destroyClose&&!this.visible)return null;this.destroyClose=!1;var a=this.$props,o=a.bodyStyle,i=a.drawerStyle,l={},s=this.getDestroyOnClose();return s&&(l.opacity=0,l.transition="opacity .3s"),g("div",{class:"".concat(t,"-wrapper-body"),style:P(P({},l),i),onTransitionend:this.onDestroyTransitionEnd},[this.renderHeader(t),g("div",{key:"body",class:"".concat(t,"-body"),style:o},[(r=(n=this.$slots).default)===null||r===void 0?void 0:r.call(n)])])}},render:function(){var t,n=this,r=Qe(this),a=r.prefixCls,o=r.width,i=r.height,l=r.visible,s=r.placement,c=r.wrapClassName,d=r.mask,f=g_e(r,["prefixCls","width","height","visible","placement","wrapClassName","mask"]),p=d?"":"no-mask",v={};s==="left"||s==="right"?v.width=typeof o=="number"?"".concat(o,"px"):o:v.height=typeof i=="number"?"".concat(i,"px"):i;var m=We(this,"handle")||!1,y=this.configProvider.getPrefixCls,b=y("drawer",a),C=this.$attrs.class,S=P(P(P(P(P({},this.$attrs),bn(f,["closable","destroyOnClose","drawerStyle","headerStyle","bodyStyle","title","push","visible","getPopupContainer","rootPrefixCls","getPrefixCls","renderEmpty","csp","pageHeader","autoInsertSpaceInButton"])),{onClose:this.close,handler:m}),v),{prefixCls:b,open:l,showMask:d,placement:s,class:Se((t={},V(t,C,!!C),V(t,c,!!c),V(t,p,!!p),t)),wrapStyle:this.getRcDrawerStyle(),ref:"vcDrawer"});return g(m_e,S,{default:function(){return[n.renderBody(b)]}})}}),C_e=kn(b_e),v7={exports:{}};(function(e){var t=function(n){var r=Object.prototype,a=r.hasOwnProperty,o,i=typeof Symbol=="function"?Symbol:{},l=i.iterator||"@@iterator",s=i.asyncIterator||"@@asyncIterator",c=i.toStringTag||"@@toStringTag";function d(E,K,W){return Object.defineProperty(E,K,{value:W,enumerable:!0,configurable:!0,writable:!0}),E[K]}try{d({},"")}catch{d=function(K,W,Y){return K[W]=Y}}function f(E,K,W,Y){var q=K&&K.prototype instanceof S?K:S,J=Object.create(q.prototype),ne=new z(Y||[]);return J._invoke=j(E,W,ne),J}n.wrap=f;function p(E,K,W){try{return{type:"normal",arg:E.call(K,W)}}catch(Y){return{type:"throw",arg:Y}}}var v="suspendedStart",m="suspendedYield",y="executing",b="completed",C={};function S(){}function w(){}function k(){}var $={};d($,l,function(){return this});var O=Object.getPrototypeOf,T=O&&O(O(B([])));T&&T!==r&&a.call(T,l)&&($=T);var _=k.prototype=S.prototype=Object.create($);w.prototype=k,d(_,"constructor",k),d(k,"constructor",w),w.displayName=d(k,c,"GeneratorFunction");function I(E){["next","throw","return"].forEach(function(K){d(E,K,function(W){return this._invoke(K,W)})})}n.isGeneratorFunction=function(E){var K=typeof E=="function"&&E.constructor;return K?K===w||(K.displayName||K.name)==="GeneratorFunction":!1},n.mark=function(E){return Object.setPrototypeOf?Object.setPrototypeOf(E,k):(E.__proto__=k,d(E,c,"GeneratorFunction")),E.prototype=Object.create(_),E},n.awrap=function(E){return{__await:E}};function L(E,K){function W(J,ne,oe,Q){var ae=p(E[J],E,ne);if(ae.type==="throw")Q(ae.arg);else{var de=ae.arg,be=de.value;return be&&typeof be=="object"&&a.call(be,"__await")?K.resolve(be.__await).then(function(Ee){W("next",Ee,oe,Q)},function(Ee){W("throw",Ee,oe,Q)}):K.resolve(be).then(function(Ee){de.value=Ee,oe(de)},function(Ee){return W("throw",Ee,oe,Q)})}}var Y;function q(J,ne){function oe(){return new K(function(Q,ae){W(J,ne,Q,ae)})}return Y=Y?Y.then(oe,oe):oe()}this._invoke=q}I(L.prototype),d(L.prototype,s,function(){return this}),n.AsyncIterator=L,n.async=function(E,K,W,Y,q){q===void 0&&(q=Promise);var J=new L(f(E,K,W,Y),q);return n.isGeneratorFunction(K)?J:J.next().then(function(ne){return ne.done?ne.value:J.next()})};function j(E,K,W){var Y=v;return function(J,ne){if(Y===y)throw new Error("Generator is already running");if(Y===b){if(J==="throw")throw ne;return M()}for(W.method=J,W.arg=ne;;){var oe=W.delegate;if(oe){var Q=F(oe,W);if(Q){if(Q===C)continue;return Q}}if(W.method==="next")W.sent=W._sent=W.arg;else if(W.method==="throw"){if(Y===v)throw Y=b,W.arg;W.dispatchException(W.arg)}else W.method==="return"&&W.abrupt("return",W.arg);Y=y;var ae=p(E,K,W);if(ae.type==="normal"){if(Y=W.done?b:m,ae.arg===C)continue;return{value:ae.arg,done:W.done}}else ae.type==="throw"&&(Y=b,W.method="throw",W.arg=ae.arg)}}}function F(E,K){var W=E.iterator[K.method];if(W===o){if(K.delegate=null,K.method==="throw"){if(E.iterator.return&&(K.method="return",K.arg=o,F(E,K),K.method==="throw"))return C;K.method="throw",K.arg=new TypeError("The iterator does not provide a 'throw' method")}return C}var Y=p(W,E.iterator,K.arg);if(Y.type==="throw")return K.method="throw",K.arg=Y.arg,K.delegate=null,C;var q=Y.arg;if(!q)return K.method="throw",K.arg=new TypeError("iterator result is not an object"),K.delegate=null,C;if(q.done)K[E.resultName]=q.value,K.next=E.nextLoc,K.method!=="return"&&(K.method="next",K.arg=o);else return q;return K.delegate=null,C}I(_),d(_,c,"Generator"),d(_,l,function(){return this}),d(_,"toString",function(){return"[object Generator]"});function N(E){var K={tryLoc:E[0]};1 in E&&(K.catchLoc=E[1]),2 in E&&(K.finallyLoc=E[2],K.afterLoc=E[3]),this.tryEntries.push(K)}function D(E){var K=E.completion||{};K.type="normal",delete K.arg,E.completion=K}function z(E){this.tryEntries=[{tryLoc:"root"}],E.forEach(N,this),this.reset(!0)}n.keys=function(E){var K=[];for(var W in E)K.push(W);return K.reverse(),function Y(){for(;K.length;){var q=K.pop();if(q in E)return Y.value=q,Y.done=!1,Y}return Y.done=!0,Y}};function B(E){if(E){var K=E[l];if(K)return K.call(E);if(typeof E.next=="function")return E;if(!isNaN(E.length)){var W=-1,Y=function q(){for(;++W<E.length;)if(a.call(E,W))return q.value=E[W],q.done=!1,q;return q.value=o,q.done=!0,q};return Y.next=Y}}return{next:M}}n.values=B;function M(){return{value:o,done:!0}}return z.prototype={constructor:z,reset:function(E){if(this.prev=0,this.next=0,this.sent=this._sent=o,this.done=!1,this.delegate=null,this.method="next",this.arg=o,this.tryEntries.forEach(D),!E)for(var K in this)K.charAt(0)==="t"&&a.call(this,K)&&!isNaN(+K.slice(1))&&(this[K]=o)},stop:function(){this.done=!0;var E=this.tryEntries[0],K=E.completion;if(K.type==="throw")throw K.arg;return this.rval},dispatchException:function(E){if(this.done)throw E;var K=this;function W(Q,ae){return J.type="throw",J.arg=E,K.next=Q,ae&&(K.method="next",K.arg=o),!!ae}for(var Y=this.tryEntries.length-1;Y>=0;--Y){var q=this.tryEntries[Y],J=q.completion;if(q.tryLoc==="root")return W("end");if(q.tryLoc<=this.prev){var ne=a.call(q,"catchLoc"),oe=a.call(q,"finallyLoc");if(ne&&oe){if(this.prev<q.catchLoc)return W(q.catchLoc,!0);if(this.prev<q.finallyLoc)return W(q.finallyLoc)}else if(ne){if(this.prev<q.catchLoc)return W(q.catchLoc,!0)}else if(oe){if(this.prev<q.finallyLoc)return W(q.finallyLoc)}else throw new Error("try statement without catch or finally")}}},abrupt:function(E,K){for(var W=this.tryEntries.length-1;W>=0;--W){var Y=this.tryEntries[W];if(Y.tryLoc<=this.prev&&a.call(Y,"finallyLoc")&&this.prev<Y.finallyLoc){var q=Y;break}}q&&(E==="break"||E==="continue")&&q.tryLoc<=K&&K<=q.finallyLoc&&(q=null);var J=q?q.completion:{};return J.type=E,J.arg=K,q?(this.method="next",this.next=q.finallyLoc,C):this.complete(J)},complete:function(E,K){if(E.type==="throw")throw E.arg;return E.type==="break"||E.type==="continue"?this.next=E.arg:E.type==="return"?(this.rval=this.arg=E.arg,this.method="return",this.next="end"):E.type==="normal"&&K&&(this.next=K),C},finish:function(E){for(var K=this.tryEntries.length-1;K>=0;--K){var W=this.tryEntries[K];if(W.finallyLoc===E)return this.complete(W.completion,W.afterLoc),D(W),C}},catch:function(E){for(var K=this.tryEntries.length-1;K>=0;--K){var W=this.tryEntries[K];if(W.tryLoc===E){var Y=W.completion;if(Y.type==="throw"){var q=Y.arg;D(W)}return q}}throw new Error("illegal catch attempt")},delegateYield:function(E,K,W){return this.delegate={iterator:B(E),resultName:K,nextLoc:W},this.method==="next"&&(this.arg=o),C}},n}(e.exports);try{regeneratorRuntime=t}catch{typeof globalThis=="object"?globalThis.regeneratorRuntime=t:Function("r","regeneratorRuntime = r")(t)}})(v7);var pl=v7.exports;function Jl(){return Jl=Object.assign||function(e){for(var t=1;t<arguments.length;t++){var n=arguments[t];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(e[r]=n[r])}return e},Jl.apply(this,arguments)}function w_e(e,t){e.prototype=Object.create(t.prototype),e.prototype.constructor=e,pf(e,t)}function w1(e){return w1=Object.setPrototypeOf?Object.getPrototypeOf:function(n){return n.__proto__||Object.getPrototypeOf(n)},w1(e)}function pf(e,t){return pf=Object.setPrototypeOf||function(r,a){return r.__proto__=a,r},pf(e,t)}function S_e(){if(typeof Reflect=="undefined"||!Reflect.construct||Reflect.construct.sham)return!1;if(typeof Proxy=="function")return!0;try{return Boolean.prototype.valueOf.call(Reflect.construct(Boolean,[],function(){})),!0}catch{return!1}}function Gp(e,t,n){return S_e()?Gp=Reflect.construct:Gp=function(a,o,i){var l=[null];l.push.apply(l,o);var s=Function.bind.apply(a,l),c=new s;return i&&pf(c,i.prototype),c},Gp.apply(null,arguments)}function k_e(e){return Function.toString.call(e).indexOf("[native code]")!==-1}function S1(e){var t=typeof Map=="function"?new Map:void 0;return S1=function(r){if(r===null||!k_e(r))return r;if(typeof r!="function")throw new TypeError("Super expression must either be null or a function");if(typeof t!="undefined"){if(t.has(r))return t.get(r);t.set(r,a)}function a(){return Gp(r,arguments,w1(this).constructor)}return a.prototype=Object.create(r.prototype,{constructor:{value:a,enumerable:!1,writable:!0,configurable:!0}}),pf(a,r)},S1(e)}var $_e=/%[sdj%]/g,O_e=function(){};typeof process!="undefined"&&process.env;function k1(e){if(!e||!e.length)return null;var t={};return e.forEach(function(n){var r=n.field;t[r]=t[r]||[],t[r].push(n)}),t}function da(){for(var e=arguments.length,t=new Array(e),n=0;n<e;n++)t[n]=arguments[n];var r=1,a=t[0],o=t.length;if(typeof a=="function")return a.apply(null,t.slice(1));if(typeof a=="string"){var i=String(a).replace($_e,function(l){if(l==="%%")return"%";if(r>=o)return l;switch(l){case"%s":return String(t[r++]);case"%d":return Number(t[r++]);case"%j":try{return JSON.stringify(t[r++])}catch{return"[Circular]"}break;default:return l}});return i}return a}function P_e(e){return e==="string"||e==="url"||e==="hex"||e==="email"||e==="date"||e==="pattern"}function sr(e,t){return!!(e==null||t==="array"&&Array.isArray(e)&&!e.length||P_e(t)&&typeof e=="string"&&!e)}function T_e(e,t,n){var r=[],a=0,o=e.length;function i(l){r.push.apply(r,l),a++,a===o&&n(r)}e.forEach(function(l){t(l,i)})}function AT(e,t,n){var r=0,a=e.length;function o(i){if(i&&i.length){n(i);return}var l=r;r=r+1,l<a?t(e[l],o):n([])}o([])}function x_e(e){var t=[];return Object.keys(e).forEach(function(n){t.push.apply(t,e[n])}),t}var DT=function(e){w_e(t,e);function t(n,r){var a;return a=e.call(this,"Async Validation Error")||this,a.errors=n,a.fields=r,a}return t}(S1(Error));function __e(e,t,n,r){if(t.first){var a=new Promise(function(f,p){var v=function(b){return r(b),b.length?p(new DT(b,k1(b))):f()},m=x_e(e);AT(m,n,v)});return a.catch(function(f){return f}),a}var o=t.firstFields||[];o===!0&&(o=Object.keys(e));var i=Object.keys(e),l=i.length,s=0,c=[],d=new Promise(function(f,p){var v=function(y){if(c.push.apply(c,y),s++,s===l)return r(c),c.length?p(new DT(c,k1(c))):f()};i.length||(r(c),f()),i.forEach(function(m){var y=e[m];o.indexOf(m)!==-1?AT(y,n,v):T_e(y,n,v)})});return d.catch(function(f){return f}),d}function RT(e){return function(t){return t&&t.message?(t.field=t.field||e.fullField,t):{message:typeof t=="function"?t():t,field:t.field||e.fullField}}}function LT(e,t){if(t){for(var n in t)if(t.hasOwnProperty(n)){var r=t[n];typeof r=="object"&&typeof e[n]=="object"?e[n]=Jl({},e[n],r):e[n]=r}}return e}function m7(e,t,n,r,a,o){e.required&&(!n.hasOwnProperty(e.field)||sr(t,o||e.type))&&r.push(da(a.messages.required,e.fullField))}function E_e(e,t,n,r,a){(/^\s+$/.test(t)||t==="")&&r.push(da(a.messages.whitespace,e.fullField))}var cb={email:/^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/,url:new RegExp("^(?!mailto:)(?:(?:http|https|ftp)://|//)(?:\\S+(?::\\S*)?@)?(?:(?:(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}(?:\\.(?:[0-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))|(?:(?:[a-z\\u00a1-\\uffff0-9]+-*)*[a-z\\u00a1-\\uffff0-9]+)(?:\\.(?:[a-z\\u00a1-\\uffff0-9]+-*)*[a-z\\u00a1-\\uffff0-9]+)*(?:\\.(?:[a-z\\u00a1-\\uffff]{2,})))|localhost)(?::\\d{2,5})?(?:(/|\\?|#)[^\\s]*)?$","i"),hex:/^#?([a-f0-9]{6}|[a-f0-9]{3})$/i},ld={integer:function(t){return ld.number(t)&&parseInt(t,10)===t},float:function(t){return ld.number(t)&&!ld.integer(t)},array:function(t){return Array.isArray(t)},regexp:function(t){if(t instanceof RegExp)return!0;try{return!!new RegExp(t)}catch{return!1}},date:function(t){return typeof t.getTime=="function"&&typeof t.getMonth=="function"&&typeof t.getYear=="function"&&!isNaN(t.getTime())},number:function(t){return isNaN(t)?!1:typeof t=="number"},object:function(t){return typeof t=="object"&&!ld.array(t)},method:function(t){return typeof t=="function"},email:function(t){return typeof t=="string"&&!!t.match(cb.email)&&t.length<255},url:function(t){return typeof t=="string"&&!!t.match(cb.url)},hex:function(t){return typeof t=="string"&&!!t.match(cb.hex)}};function M_e(e,t,n,r,a){if(e.required&&t===void 0){m7(e,t,n,r,a);return}var o=["integer","float","array","regexp","object","method","email","number","date","url","hex"],i=e.type;o.indexOf(i)>-1?ld[i](t)||r.push(da(a.messages.types[i],e.fullField,e.type)):i&&typeof t!==e.type&&r.push(da(a.messages.types[i],e.fullField,e.type))}function I_e(e,t,n,r,a){var o=typeof e.len=="number",i=typeof e.min=="number",l=typeof e.max=="number",s=/[\uD800-\uDBFF][\uDC00-\uDFFF]/g,c=t,d=null,f=typeof t=="number",p=typeof t=="string",v=Array.isArray(t);if(f?d="number":p?d="string":v&&(d="array"),!d)return!1;v&&(c=t.length),p&&(c=t.replace(s,"_").length),o?c!==e.len&&r.push(da(a.messages[d].len,e.fullField,e.len)):i&&!l&&c<e.min?r.push(da(a.messages[d].min,e.fullField,e.min)):l&&!i&&c>e.max?r.push(da(a.messages[d].max,e.fullField,e.max)):i&&l&&(c<e.min||c>e.max)&&r.push(da(a.messages[d].range,e.fullField,e.min,e.max))}var Xs="enum";function N_e(e,t,n,r,a){e[Xs]=Array.isArray(e[Xs])?e[Xs]:[],e[Xs].indexOf(t)===-1&&r.push(da(a.messages[Xs],e.fullField,e[Xs].join(", ")))}function A_e(e,t,n,r,a){if(e.pattern){if(e.pattern instanceof RegExp)e.pattern.lastIndex=0,e.pattern.test(t)||r.push(da(a.messages.pattern.mismatch,e.fullField,t,e.pattern));else if(typeof e.pattern=="string"){var o=new RegExp(e.pattern);o.test(t)||r.push(da(a.messages.pattern.mismatch,e.fullField,t,e.pattern))}}}var nn={required:m7,whitespace:E_e,type:M_e,range:I_e,enum:N_e,pattern:A_e};function D_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t,"string")&&!e.required)return n();nn.required(e,t,r,o,a,"string"),sr(t,"string")||(nn.type(e,t,r,o,a),nn.range(e,t,r,o,a),nn.pattern(e,t,r,o,a),e.whitespace===!0&&nn.whitespace(e,t,r,o,a))}n(o)}function R_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t)&&!e.required)return n();nn.required(e,t,r,o,a),t!==void 0&&nn.type(e,t,r,o,a)}n(o)}function L_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(t===""&&(t=void 0),sr(t)&&!e.required)return n();nn.required(e,t,r,o,a),t!==void 0&&(nn.type(e,t,r,o,a),nn.range(e,t,r,o,a))}n(o)}function F_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t)&&!e.required)return n();nn.required(e,t,r,o,a),t!==void 0&&nn.type(e,t,r,o,a)}n(o)}function B_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t)&&!e.required)return n();nn.required(e,t,r,o,a),sr(t)||nn.type(e,t,r,o,a)}n(o)}function V_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t)&&!e.required)return n();nn.required(e,t,r,o,a),t!==void 0&&(nn.type(e,t,r,o,a),nn.range(e,t,r,o,a))}n(o)}function z_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t)&&!e.required)return n();nn.required(e,t,r,o,a),t!==void 0&&(nn.type(e,t,r,o,a),nn.range(e,t,r,o,a))}n(o)}function H_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(t==null&&!e.required)return n();nn.required(e,t,r,o,a,"array"),t!=null&&(nn.type(e,t,r,o,a),nn.range(e,t,r,o,a))}n(o)}function j_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t)&&!e.required)return n();nn.required(e,t,r,o,a),t!==void 0&&nn.type(e,t,r,o,a)}n(o)}var K_e="enum";function W_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t)&&!e.required)return n();nn.required(e,t,r,o,a),t!==void 0&&nn[K_e](e,t,r,o,a)}n(o)}function U_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t,"string")&&!e.required)return n();nn.required(e,t,r,o,a),sr(t,"string")||nn.pattern(e,t,r,o,a)}n(o)}function Y_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t,"date")&&!e.required)return n();if(nn.required(e,t,r,o,a),!sr(t,"date")){var l;t instanceof Date?l=t:l=new Date(t),nn.type(e,l,r,o,a),l&&nn.range(e,l.getTime(),r,o,a)}}n(o)}function q_e(e,t,n,r,a){var o=[],i=Array.isArray(t)?"array":typeof t;nn.required(e,t,r,o,a,i),n(o)}function db(e,t,n,r,a){var o=e.type,i=[],l=e.required||!e.required&&r.hasOwnProperty(e.field);if(l){if(sr(t,o)&&!e.required)return n();nn.required(e,t,r,i,a,o),sr(t,o)||nn.type(e,t,r,i,a)}n(i)}function G_e(e,t,n,r,a){var o=[],i=e.required||!e.required&&r.hasOwnProperty(e.field);if(i){if(sr(t)&&!e.required)return n();nn.required(e,t,r,o,a)}n(o)}var xd={string:D_e,method:R_e,number:L_e,boolean:F_e,regexp:B_e,integer:V_e,float:z_e,array:H_e,object:j_e,enum:W_e,pattern:U_e,date:Y_e,url:db,hex:db,email:db,required:q_e,any:G_e};function $1(){return{default:"Validation error on field %s",required:"%s is required",enum:"%s must be one of %s",whitespace:"%s cannot be empty",date:{format:"%s date %s is invalid for format %s",parse:"%s date could not be parsed, %s is invalid ",invalid:"%s date %s is invalid"},types:{string:"%s is not a %s",method:"%s is not a %s (function)",array:"%s is not an %s",object:"%s is not an %s",number:"%s is not a %s",date:"%s is not a %s",boolean:"%s is not a %s",integer:"%s is not an %s",float:"%s is not a %s",regexp:"%s is not a valid %s",email:"%s is not a valid %s",url:"%s is not a valid %s",hex:"%s is not a valid %s"},string:{len:"%s must be exactly %s characters",min:"%s must be at least %s characters",max:"%s cannot be longer than %s characters",range:"%s must be between %s and %s characters"},number:{len:"%s must equal %s",min:"%s cannot be less than %s",max:"%s cannot be greater than %s",range:"%s must be between %s and %s"},array:{len:"%s must be exactly %s in length",min:"%s cannot be less than %s in length",max:"%s cannot be greater than %s in length",range:"%s must be between %s and %s in length"},pattern:{mismatch:"%s value %s does not match pattern %s"},clone:function(){var t=JSON.parse(JSON.stringify(this));return t.clone=this.clone,t}}}var uS=$1();function al(e){this.rules=null,this._messages=uS,this.define(e)}al.prototype={messages:function(t){return t&&(this._messages=LT($1(),t)),this._messages},define:function(t){if(!t)throw new Error("Cannot configure a schema with no rules");if(typeof t!="object"||Array.isArray(t))throw new Error("Rules must be an object");this.rules={};var n,r;for(n in t)t.hasOwnProperty(n)&&(r=t[n],this.rules[n]=Array.isArray(r)?r:[r])},validate:function(t,n,r){var a=this;n===void 0&&(n={}),r===void 0&&(r=function(){});var o=t,i=n,l=r;if(typeof i=="function"&&(l=i,i={}),!this.rules||Object.keys(this.rules).length===0)return l&&l(),Promise.resolve();function s(y){var b,C=[],S={};function w(k){if(Array.isArray(k)){var $;C=($=C).concat.apply($,k)}else C.push(k)}for(b=0;b<y.length;b++)w(y[b]);C.length?S=k1(C):(C=null,S=null),l(C,S)}if(i.messages){var c=this.messages();c===uS&&(c=$1()),LT(c,i.messages),i.messages=c}else i.messages=this.messages();var d,f,p={},v=i.keys||Object.keys(this.rules);v.forEach(function(y){d=a.rules[y],f=o[y],d.forEach(function(b){var C=b;typeof C.transform=="function"&&(o===t&&(o=Jl({},o)),f=o[y]=C.transform(f)),typeof C=="function"?C={validator:C}:C=Jl({},C),C.validator=a.getValidationMethod(C),C.field=y,C.fullField=C.fullField||y,C.type=a.getType(C),C.validator&&(p[y]=p[y]||[],p[y].push({rule:C,value:f,source:o,field:y}))})});var m={};return __e(p,i,function(y,b){var C=y.rule,S=(C.type==="object"||C.type==="array")&&(typeof C.fields=="object"||typeof C.defaultField=="object");S=S&&(C.required||!C.required&&y.value),C.field=y.field;function w(O,T){return Jl({},T,{fullField:C.fullField+"."+O})}function k(O){O===void 0&&(O=[]);var T=O;if(Array.isArray(T)||(T=[T]),!i.suppressWarning&&T.length&&al.warning("async-validator:",T),T.length&&C.message!==void 0&&(T=[].concat(C.message)),T=T.map(RT(C)),i.first&&T.length)return m[C.field]=1,b(T);if(!S)b(T);else{if(C.required&&!y.value)return C.message!==void 0?T=[].concat(C.message).map(RT(C)):i.error&&(T=[i.error(C,da(i.messages.required,C.field))]),b(T);var _={};if(C.defaultField)for(var I in y.value)y.value.hasOwnProperty(I)&&(_[I]=C.defaultField);_=Jl({},_,y.rule.fields);for(var L in _)if(_.hasOwnProperty(L)){var j=Array.isArray(_[L])?_[L]:[_[L]];_[L]=j.map(w.bind(null,L))}var F=new al(_);F.messages(i.messages),y.rule.options&&(y.rule.options.messages=i.messages,y.rule.options.error=i.error),F.validate(y.value,y.rule.options||i,function(N){var D=[];T&&T.length&&D.push.apply(D,T),N&&N.length&&D.push.apply(D,N),b(D.length?D:null)})}}var $;C.asyncValidator?$=C.asyncValidator(C,y.value,k,y.source,i):C.validator&&($=C.validator(C,y.value,k,y.source,i),$===!0?k():$===!1?k(C.message||C.field+" fails"):$ instanceof Array?k($):$ instanceof Error&&k($.message)),$&&$.then&&$.then(function(){return k()},function(O){return k(O)})},function(y){s(y)})},getType:function(t){if(t.type===void 0&&t.pattern instanceof RegExp&&(t.type="pattern"),typeof t.validator!="function"&&t.type&&!xd.hasOwnProperty(t.type))throw new Error(da("Unknown rule type %s",t.type));return t.type||"string"},getValidationMethod:function(t){if(typeof t.validator=="function")return t.validator;var n=Object.keys(t),r=n.indexOf("message");return r!==-1&&n.splice(r,1),n.length===1&&n[0]==="required"?xd.required:xd[this.getType(t)]||!1}};al.register=function(t,n){if(typeof n!="function")throw new Error("Cannot register a validator by type, validator is not a function");xd[t]=n};al.warning=O_e;al.messages=uS;al.validators=xd;function vi(e){return e==null?[]:Array.isArray(e)?e:[e]}function O1(e){return vi(e)}function X_e(e,t){return e&&e.some(function(n){return J_e(n,t)})}function FT(e){return kt(e)==="object"&&e!==null&&Object.getPrototypeOf(e)===Object.prototype}function g7(e,t){var n=Array.isArray(e)?Je(e):P({},e);return t&&Object.keys(t).forEach(function(r){var a=n[r],o=t[r],i=FT(a)&&FT(o);n[r]=i?g7(a,o||{}):o}),n}function Z_e(e){for(var t=arguments.length,n=new Array(t>1?t-1:0),r=1;r<t;r++)n[r-1]=arguments[r];return n.reduce(function(a,o){return g7(a,o)},e)}function J_e(e,t){return!e||!t||e.length!==t.length?!1:e.every(function(n,r){return t[r]===n})}var Zr="'${name}' is not a valid ${type}",cS={default:"Validation error on field '${name}'",required:"'${name}' is required",enum:"'${name}' must be one of [${enum}]",whitespace:"'${name}' cannot be empty",date:{format:"'${name}' is invalid for format date",parse:"'${name}' could not be parsed as date",invalid:"'${name}' is invalid date"},types:{string:Zr,method:Zr,array:Zr,object:Zr,number:Zr,date:Zr,boolean:Zr,integer:Zr,float:Zr,regexp:Zr,email:Zr,url:Zr,hex:Zr},string:{len:"'${name}' must be exactly ${len} characters",min:"'${name}' must be at least ${min} characters",max:"'${name}' cannot be longer than ${max} characters",range:"'${name}' must be between ${min} and ${max} characters"},number:{len:"'${name}' must equal ${len}",min:"'${name}' cannot be less than ${min}",max:"'${name}' cannot be greater than ${max}",range:"'${name}' must be between ${min} and ${max}"},array:{len:"'${name}' must be exactly ${len} in length",min:"'${name}' cannot be less than ${min} in length",max:"'${name}' cannot be greater than ${max} in length",range:"'${name}' must be between ${min} and ${max} in length"},pattern:{mismatch:"'${name}' does not match pattern ${pattern}"}},Eg=globalThis&&globalThis.__awaiter||function(e,t,n,r){function a(o){return o instanceof n?o:new n(function(i){i(o)})}return new(n||(n=Promise))(function(o,i){function l(d){try{c(r.next(d))}catch(f){i(f)}}function s(d){try{c(r.throw(d))}catch(f){i(f)}}function c(d){d.done?o(d.value):a(d.value).then(l,s)}c((r=r.apply(e,t||[])).next())})},Q_e=al;function e6e(e,t){return e.replace(/\$\{\w+\}/g,function(n){var r=n.slice(2,-1);return t[r]})}function P1(e,t,n,r,a){return Eg(this,void 0,void 0,pl.mark(function o(){var i,l,s,c,d,f,p,v;return pl.wrap(function(y){for(;;)switch(y.prev=y.next){case 0:return i=P({},n),delete i.ruleIndex,l=null,i&&i.type==="array"&&i.defaultField&&(l=i.defaultField,delete i.defaultField),s=new Q_e(V({},e,[i])),c=Z_e({},cS,r.validateMessages),s.messages(c),d=[],y.prev=8,y.next=11,Promise.resolve(s.validate(V({},e,t),P({},r)));case 11:y.next=16;break;case 13:y.prev=13,y.t0=y.catch(8),y.t0.errors?d=y.t0.errors.map(function(b,C){var S=b.message;return zn(S)?hr(S,{key:"error_".concat(C)}):S}):(console.error(y.t0),d=[c.default()]);case 16:if(!(!d.length&&l)){y.next=21;break}return y.next=19,Promise.all(t.map(function(b,C){return P1("".concat(e,".").concat(C),b,l,r,a)}));case 19:return f=y.sent,y.abrupt("return",f.reduce(function(b,C){return[].concat(Je(b),Je(C))},[]));case 21:return p=P(P(P({},n),{name:e,enum:(n.enum||[]).join(", ")}),a),v=d.map(function(b){return typeof b=="string"?e6e(b,p):b}),y.abrupt("return",v);case 24:case"end":return y.stop()}},o,null,[[8,13]])}))}function y7(e,t,n,r,a,o){var i=this,l=e.join("."),s=n.map(function(f,p){var v=f.validator,m=P(P({},f),{ruleIndex:p});return v&&(m.validator=function(y,b,C){var S=!1,w=function(){for(var O=arguments.length,T=new Array(O),_=0;_<O;_++)T[_]=arguments[_];Promise.resolve().then(function(){S||C.apply(void 0,T)})},k=v(y,b,w);S=k&&typeof k.then=="function"&&typeof k.catch=="function",S&&k.then(function(){C()}).catch(function($){C($||" ")})}),m}).sort(function(f,p){var v=f.warningOnly,m=f.ruleIndex,y=p.warningOnly,b=p.ruleIndex;return!!v==!!y?m-b:v?1:-1}),c;if(a===!0)c=new Promise(function(f,p){return Eg(i,void 0,void 0,pl.mark(function v(){var m,y,b;return pl.wrap(function(S){for(;;)switch(S.prev=S.next){case 0:m=0;case 1:if(!(m<s.length)){S.next=12;break}return y=s[m],S.next=5,P1(l,t,y,r,o);case 5:if(b=S.sent,!b.length){S.next=9;break}return p([{errors:b,rule:y}]),S.abrupt("return");case 9:m+=1,S.next=1;break;case 12:f([]);case 13:case"end":return S.stop()}},v)}))});else{var d=s.map(function(f){return P1(l,t,f,r,o).then(function(p){return{errors:p,rule:f}})});c=(a?n6e(d):t6e(d)).then(function(f){return Promise.reject(f)})}return c.catch(function(f){return f}),c}function t6e(e){return Eg(this,void 0,void 0,pl.mark(function t(){return pl.wrap(function(r){for(;;)switch(r.prev=r.next){case 0:return r.abrupt("return",Promise.all(e).then(function(a){var o,i=(o=[]).concat.apply(o,Je(a));return i}));case 1:case"end":return r.stop()}},t)}))}function n6e(e){return Eg(this,void 0,void 0,pl.mark(function t(){var n;return pl.wrap(function(a){for(;;)switch(a.prev=a.next){case 0:return n=0,a.abrupt("return",new Promise(function(o){e.forEach(function(i){i.then(function(l){l.errors.length&&o([l]),n+=1,n===e.length&&o([])})})}));case 2:case"end":return a.stop()}},t)}))}var b7=Symbol("formContextKey"),C7=function(t){ot(b7,t)},dS=function(){return ve(b7,{labelAlign:x(function(){return"right"}),vertical:x(function(){return!1}),addField:function(n,r){},removeField:function(n){},model:x(function(){}),rules:x(function(){}),requiredMark:x(function(){return!1})})},w7=Symbol("formItemPrefixContextKey"),r6e=function(t){ot(w7,t)},a6e=function(){return ve(w7,{prefixCls:x(function(){return""})})},fS=function(t,n){var r,a=n.slots,o=n.emit,i=n.attrs,l,s,c,d,f,p=P(P({},t),i),v=p.prefixCls,m=p.htmlFor,y=p.labelCol,b=p.labelAlign,C=p.colon,S=p.required,w=p.requiredMark,k=f0e("Form"),$=fn(k,1),O=$[0],T=(l=t.label)!==null&&l!==void 0?l:(s=a.label)===null||s===void 0?void 0:s.call(a);if(!T)return null;var _=dS(),I=_.vertical,L=_.labelAlign,j=_.labelCol,F=_.colon,N=y||(j==null?void 0:j.value)||{},D=b||(L==null?void 0:L.value),z="".concat(v,"-item-label"),B=Se(z,D==="left"&&"".concat(z,"-left"),N.class),M=T,E=C===!0||(F==null?void 0:F.value)!==!1&&C!==!1,K=E&&!I.value;K&&typeof T=="string"&&T.trim()!==""&&(M=T.replace(/[:|：]\s*$/,"")),M=g(Fe,null,[M,(c=a.tooltip)===null||c===void 0?void 0:c.call(a,{class:"".concat(v,"-item-tooltip")})]),w==="optional"&&!S&&(M=g(Fe,null,[M,g("span",{class:"".concat(v,"-item-optional")},[((d=O.value)===null||d===void 0?void 0:d.optional)||((f=lo.Form)===null||f===void 0?void 0:f.optional)])]));var W=Se((r={},V(r,"".concat(v,"-item-required"),S),V(r,"".concat(v,"-item-required-mark-optional"),w==="optional"),V(r,"".concat(v,"-item-no-colon"),!E),r));return g(Og,le(le({},N),{},{class:B}),{default:function(){return[g("label",{"html-for":m,class:W,title:typeof T=="string"?T:"",onClick:function(J){return o("click",J)}},[M])]}})};fS.displayName="FormItemLabel";fS.inheritAttrs=!1;var o6e=fS,i6e=G({name:"ErrorList",props:["errors","help","onDomErrorVisibleChange"],setup:function(t){var n=Wt("",t),r=n.prefixCls,a=a6e(),o=a.prefixCls,i=a.status,l=H(!!(t.errors&&t.errors.length)),s=H(i.value),c=H(),d=H(Je(t.errors));return ce([function(){return Je(t.errors)},function(){return t.help}],function(f){window.clearTimeout(c.value),t.help?(l.value=!!(t.errors&&t.errors.length),l.value&&(d.value=f[0])):c.value=window.setTimeout(function(){l.value=!!(t.errors&&t.errors.length),l.value&&(d.value=f[0])})}),Lt(function(){window.clearTimeout(c.value)}),ce([l,i],function(){l.value&&i.value&&(s.value=i.value)}),ce(l,function(){var f;l.value&&((f=t.onDomErrorVisibleChange)===null||f===void 0||f.call(t,!0))},{immediate:!0,flush:"post"}),function(){var f,p="".concat(o.value,"-item-explain"),v=Lo("".concat(r.value,"-show-help"),{onAfterLeave:function(){var y;(y=t.onDomErrorVisibleChange)===null||y===void 0||y.call(t,!1)}});return g(Cg,v,{default:function(){return[l.value?g("div",{class:Se(p,V({},"".concat(p,"-").concat(s.value),s.value)),key:"help"},[(f=d.value)===null||f===void 0?void 0:f.map(function(y,b){return g("div",{key:b,role:"alert"},[y])})]):null]}})}}}),l6e={success:zf,warning:Hf,error:Yr,validating:co},s6e=G({slots:["help","extra","errors"],inheritAttrs:!1,props:["prefixCls","errors","hasFeedback","validateStatus","onDomErrorVisibleChange","wrapperCol","help","extra","status"],setup:function(t,n){var r=n.slots,a=dS(),o=a.wrapperCol,i=P({},a);return delete i.labelCol,delete i.wrapperCol,C7(i),r6e({prefixCls:x(function(){return t.prefixCls}),status:x(function(){return t.status})}),Wr(function(){t.onDomErrorVisibleChange(!1)}),function(){var l,s,c,d,f=t.prefixCls,p=t.wrapperCol,v=t.help,m=v===void 0?(l=r.help)===null||l===void 0?void 0:l.call(r):v,y=t.errors,b=y===void 0?(s=r.errors)===null||s===void 0?void 0:s.call(r):y,C=t.onDomErrorVisibleChange,S=t.hasFeedback,w=t.validateStatus,k=t.extra,$=k===void 0?(c=r.extra)===null||c===void 0?void 0:c.call(r):k,O="".concat(f,"-item"),T=p||(o==null?void 0:o.value)||{},_=Se("".concat(O,"-control"),T.class),I=w&&l6e[w],L=S&&I?g("span",{class:"".concat(O,"-children-icon")},[g(I,null,null)]):null,j=g("div",{class:"".concat(O,"-control-input")},[g("div",{class:"".concat(O,"-control-input-content")},[(d=r.default)===null||d===void 0?void 0:d.call(r)]),L]),F=g(i6e,{errors:b,help:m,onDomErrorVisibleChange:C},null),N=$?g("div",{class:"".concat(O,"-extra")},[$]):null;return g(Og,le(le({},T),{},{class:_}),{default:function(){return[j,F,N]}})}}}),u6e=s6e;rt("success","warning","error","validating","");function fb(e,t,n){var r=e,a=t,o=0;try{for(var i=a.length;o<i-1&&!(!r&&!n);++o){var l=a[o];if(l in r)r=r[l];else{if(n)throw Error("please transfer a valid name path to form item!");break}}if(n&&!r)throw Error("please transfer a valid name path to form item!")}catch{console.error("please transfer a valid name path to form item!")}return{o:r,k:a[o],v:r?r[a[o]]:void 0}}var c6e={id:u.string,htmlFor:u.string,prefixCls:u.string,label:u.VNodeChild,help:u.VNodeChild,extra:u.VNodeChild,labelCol:{type:Object},wrapperCol:{type:Object},hasFeedback:u.looseBool.def(!1),colon:u.looseBool,labelAlign:u.oneOf(rt("left","right")),prop:{type:[String,Number,Array]},name:{type:[String,Number,Array]},rules:u.oneOfType([Array,Object]),autoLink:u.looseBool.def(!0),required:u.looseBool,validateFirst:u.looseBool,validateStatus:u.oneOf(rt("","success","warning","error","validating")),validateTrigger:{type:[String,Array]},messageVariables:{type:Object},hidden:Boolean},d6e=0,S7=G({name:"AFormItem",mixins:[nt],inheritAttrs:!1,__ANT_NEW_FORM_ITEM:!0,props:c6e,slots:["help","label","extra"],setup:function(t,n){var r=n.slots,a=n.attrs,o=n.expose;t.prop;var i="form-item-".concat(++d6e),l=Wt("form",t),s=l.prefixCls,c=dS(),d=x(function(){return t.name||t.prop}),f=H([]),p=H(!1),v=H(!1),m=H(),y=x(function(){var z=d.value;return O1(z)}),b=x(function(){var z=t.id;if(z)return z;if(y.value.length){var B=c.name.value,M=y.value.join("_");return B?"".concat(B,"_").concat(M):M}else return}),C=x(function(){var z=c.model.value;if(!(!z||!d.value))return fb(z,y.value,!0).v}),S=H(lp(C.value)),w=x(function(){var z=t.validateTrigger!==void 0?t.validateTrigger:c.validateTrigger.value;return z=z===void 0?"change":z,vi(z)}),k=x(function(){var z=c.rules.value,B=t.rules,M=t.required!==void 0?{required:!!t.required,trigger:w.value}:[],E=fb(z,y.value);z=z?E.o[E.k]||E.v:[];var K=[].concat(B||z||[]);return aW(K,function(W){return W.required})?K:K.concat(M)}),$=x(function(){var z=k.value,B=!1;return z&&z.length&&z.every(function(M){return M.required?(B=!0,!1):!0}),B||t.required}),O=H();Wn(function(){O.value=t.validateStatus});var T=function(B){var M=t.validateFirst,E=M===void 0?!1:M,K=t.messageVariables,W=B||{},Y=W.triggerName,q=k.value;if(Y&&(q=q.filter(function(ne){var oe=ne.trigger;if(!oe&&!w.value.length)return!0;var Q=vi(oe||w.value);return Q.includes(Y)})),!q.length)return Promise.resolve();var J=y7(y.value,C.value,q,B,E,K);return O.value="validating",f.value=[],J.catch(function(ne){return ne}).then(function(){var ne=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[];if(O.value==="validating"){var oe=ne.filter(function(Q){return Q&&Q.errors.length});O.value=oe.length?"error":"success",f.value=oe.map(function(Q){return Q.errors})}}),J},_=function(){T({triggerName:"blur"})},I=function(){if(p.value){p.value=!1;return}T({triggerName:"change"})},L=function(){O.value="",p.value=!1,f.value=[]},j=function(){O.value="",p.value=!0,f.value=[];var B=c.model.value||{},M=C.value,E=fb(B,y.value,!0);Array.isArray(M)?E.o[E.k]=[].concat(S.value):E.o[E.k]=S.value,Ne(function(){p.value=!1})},F=function(){var B=b.value;if(!(!B||!m.value)){var M=m.value.$el.querySelector('[id="'.concat(B,'"]'));M&&M.focus&&M.focus()}};o({onFieldBlur:_,onFieldChange:I,clearValidate:L,resetField:j});var N=!1;ce(d,function(z){z?N||(N=!0,c.addField(i,{fieldValue:C,fieldId:b,fieldName:d,resetField:j,clearValidate:L,namePath:y,validateRules:T,rules:k})):(N=!1,c.removeField(i))},{immediate:!0}),Lt(function(){c.removeField(i)});var D=x(function(){var z;return z={},V(z,"".concat(s.value,"-item"),!0),V(z,"".concat(s.value,"-item-has-feedback"),O.value&&t.hasFeedback),V(z,"".concat(s.value,"-item-has-success"),O.value==="success"),V(z,"".concat(s.value,"-item-has-warning"),O.value==="warning"),V(z,"".concat(s.value,"-item-has-error"),O.value==="error"),V(z,"".concat(s.value,"-item-is-validating"),O.value==="validating"),V(z,"".concat(s.value,"-item-hidden"),t.hidden),z});return function(){var z,B,M,E,K,W,Y=(z=t.help)!==null&&z!==void 0?z:r.help?La(r.help()):null,q=Un((B=r.default)===null||B===void 0?void 0:B.call(r)),J=q[0];if(d.value&&t.autoLink&&zn(J)){var ne=J.props||{},oe=ne.onBlur,Q=ne.onChange;J=Ot(J,P(P({},b.value?{id:b.value}:void 0),{onBlur:function(){if(Array.isArray(Q))for(var de=0,be=Q.length;de<be;de++)oe[de].apply(oe,arguments);else oe&&oe.apply(void 0,arguments);_()},onChange:function(){if(Array.isArray(Q))for(var de=0,be=Q.length;de<be;de++)Q[de].apply(Q,arguments);else Q&&Q.apply(void 0,arguments);I()}}))}return g(Q2,le(le({},a),{},{class:[D.value,v.value||!!Y?"".concat(s.value,"-item-with-help"):"",a.class],key:"row"}),{default:function(){return[g(o6e,le(le({},t),{},{htmlFor:b.value,required:$.value,requiredMark:c.requiredMark.value,prefixCls:s.value,onClick:F,label:(M=t.label)!==null&&M!==void 0?M:(E=r.label)===null||E===void 0?void 0:E.call(r)}),null),g(u6e,le(le({},t),{},{errors:Y!=null?vi(Y):f.value,prefixCls:s.value,status:O.value,onDomErrorVisibleChange:function(be){return v.value=be},validateStatus:O.value,ref:m,help:Y,extra:(K=t.extra)!==null&&K!==void 0?K:(W=r.extra)===null||W===void 0?void 0:W.call(r)}),{default:function(){return[[J,q.slice(1)]]}})]}})}}});function k7(e){var t=!1,n=e.length,r=[];return e.length?new Promise(function(a,o){e.forEach(function(i,l){i.catch(function(s){return t=!0,s}).then(function(s){n-=1,r[l]=s,!(n>0)&&(t&&o(r),a(r))})})}):Promise.resolve([])}function BT(e){return typeof e=="object"&&e!=null&&e.nodeType===1}function VT(e,t){return(!t||e!=="hidden")&&e!=="visible"&&e!=="clip"}function hb(e,t){if(e.clientHeight<e.scrollHeight||e.clientWidth<e.scrollWidth){var n=getComputedStyle(e,null);return VT(n.overflowY,t)||VT(n.overflowX,t)||function(r){var a=function(o){if(!o.ownerDocument||!o.ownerDocument.defaultView)return null;try{return o.ownerDocument.defaultView.frameElement}catch{return null}}(r);return!!a&&(a.clientHeight<r.scrollHeight||a.clientWidth<r.scrollWidth)}(e)}return!1}function Wh(e,t,n,r,a,o,i,l){return o<e&&i>t||o>e&&i<t?0:o<=e&&l<=n||i>=t&&l>=n?o-e-r:i>t&&l<n||o<e&&l>n?i-t+a:0}function zT(e,t){var n=window,r=t.scrollMode,a=t.block,o=t.inline,i=t.boundary,l=t.skipOverflowHiddenElements,s=typeof i=="function"?i:function(Ee){return Ee!==i};if(!BT(e))throw new TypeError("Invalid target");for(var c=document.scrollingElement||document.documentElement,d=[],f=e;BT(f)&&s(f);){if((f=f.parentElement)===c){d.push(f);break}f!=null&&f===document.body&&hb(f)&&!hb(document.documentElement)||f!=null&&hb(f,l)&&d.push(f)}for(var p=n.visualViewport?n.visualViewport.width:innerWidth,v=n.visualViewport?n.visualViewport.height:innerHeight,m=window.scrollX||pageXOffset,y=window.scrollY||pageYOffset,b=e.getBoundingClientRect(),C=b.height,S=b.width,w=b.top,k=b.right,$=b.bottom,O=b.left,T=a==="start"||a==="nearest"?w:a==="end"?$:w+C/2,_=o==="center"?O+S/2:o==="end"?k:O,I=[],L=0;L<d.length;L++){var j=d[L],F=j.getBoundingClientRect(),N=F.height,D=F.width,z=F.top,B=F.right,M=F.bottom,E=F.left;if(r==="if-needed"&&w>=0&&O>=0&&$<=v&&k<=p&&w>=z&&$<=M&&O>=E&&k<=B)return I;var K=getComputedStyle(j),W=parseInt(K.borderLeftWidth,10),Y=parseInt(K.borderTopWidth,10),q=parseInt(K.borderRightWidth,10),J=parseInt(K.borderBottomWidth,10),ne=0,oe=0,Q="offsetWidth"in j?j.offsetWidth-j.clientWidth-W-q:0,ae="offsetHeight"in j?j.offsetHeight-j.clientHeight-Y-J:0;if(c===j)ne=a==="start"?T:a==="end"?T-v:a==="nearest"?Wh(y,y+v,v,Y,J,y+T,y+T+C,C):T-v/2,oe=o==="start"?_:o==="center"?_-p/2:o==="end"?_-p:Wh(m,m+p,p,W,q,m+_,m+_+S,S),ne=Math.max(0,ne+y),oe=Math.max(0,oe+m);else{ne=a==="start"?T-z-Y:a==="end"?T-M+J+ae:a==="nearest"?Wh(z,M,N,Y,J+ae,T,T+C,C):T-(z+N/2)+ae/2,oe=o==="start"?_-E-W:o==="center"?_-(E+D/2)+Q/2:o==="end"?_-B+q+Q:Wh(E,B,D,W,q+Q,_,_+S,S);var de=j.scrollLeft,be=j.scrollTop;T+=be-(ne=Math.max(0,Math.min(be+ne,j.scrollHeight-N+ae))),_+=de-(oe=Math.max(0,Math.min(de+oe,j.scrollWidth-D+Q)))}I.push({el:j,top:ne,left:oe})}return I}function $7(e){return e===Object(e)&&Object.keys(e).length!==0}function f6e(e,t){t===void 0&&(t="auto");var n="scrollBehavior"in document.body.style;e.forEach(function(r){var a=r.el,o=r.top,i=r.left;a.scroll&&n?a.scroll({top:o,left:i,behavior:t}):(a.scrollTop=o,a.scrollLeft=i)})}function h6e(e){return e===!1?{block:"end",inline:"nearest"}:$7(e)?e:{block:"start",inline:"nearest"}}function p6e(e,t){var n=e.isConnected||e.ownerDocument.documentElement.contains(e);if($7(t)&&typeof t.behavior=="function")return t.behavior(n?zT(e,t):[]);if(!!n){var r=h6e(t);return f6e(zT(e,r),r.behavior)}}function HT(e){var t=!1;return e&&e.length&&e.every(function(n){return n.required?(t=!0,!1):!0}),t}function jT(e){return e==null?[]:Array.isArray(e)?e:[e]}function pb(e,t,n){var r=e;t=t.replace(/\[(\w+)\]/g,".$1"),t=t.replace(/^\./,"");for(var a=t.split("."),o=0,i=a.length;o<i-1&&!(!r&&!n);++o){var l=a[o];if(l in r)r=r[l];else{if(n)throw new Error("please transfer a valid name path to validate!");break}}return{o:r,k:a[o],v:r?r[a[o]]:null,isValid:r&&a[o]in r}}function v6e(e,t,n){var r=lp(A(e)),a=bt({}),o=x(function(){return t?Object.keys(A(t)):[]});ce(o,function(){var S={};o.value.forEach(function(k){S[k]=a[k]||{autoLink:!1,required:HT(A(t)[k])},delete a[k]});for(var w in a)Object.prototype.hasOwnProperty.call(a,w)&&delete a[w];P(a,S)},{immediate:!0});var i=function(w){P(A(e),P(P({},lp(r)),w)),Ne(function(){Object.keys(a).forEach(function(k){a[k]={autoLink:!1,required:HT(A(t)[k])}})})},l=function(){var w=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],k=arguments.length>1?arguments[1]:void 0;return k.length?w.filter(function($){var O=jT($.trigger||"change");return fW(O,k).length}):w},s=null,c=function(w){for(var k=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},$=arguments.length>2?arguments[2]:void 0,O=[],T={},_=function(D){var z=w[D],B=pb(A(e),z,$);if(!B.isValid)return"continue";T[z]=B.v;var M=l(A(t)[z],jT(k&&k.trigger));M.length&&O.push(d(z,B.v,M,k||{}).then(function(){return{name:z,errors:[],warnings:[]}}).catch(function(E){var K=[],W=[];return E.forEach(function(Y){var q=Y.rule.warningOnly,J=Y.errors;q?W.push.apply(W,Je(J)):K.push.apply(K,Je(J))}),K.length?Promise.reject({name:z,errors:K,warnings:W}):{name:z,errors:K,warnings:W}}))},I=0;I<w.length;I++)var L=_(I);var j=k7(O);s=j;var F=j.then(function(){return s===j?Promise.resolve(T):Promise.reject([])}).catch(function(N){var D=N.filter(function(z){return z&&z.errors.length});return Promise.reject({values:T,errorFields:D,outOfDate:s!==j})});return F.catch(function(N){return N}),F},d=function(w,k,$){var O=arguments.length>3&&arguments[3]!==void 0?arguments[3]:{},T=y7([w],k,$,P({validateMessages:cS},O),!!O.validateFirst);return a[w]?(a[w].validateStatus="validating",T.catch(function(_){return _}).then(function(){var _=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[];if(a[w].validateStatus==="validating"){var I=_.filter(function(L){return L&&L.errors.length});a[w].validateStatus=I.length?"error":"success",a[w].help=I.length?I.map(function(L){return L.errors}):""}}),T):T.catch(function(_){return _})},f=function(w,k){var $=[],O=!0;w?Array.isArray(w)?$=w:$=[w]:(O=!1,$=o.value);var T=c($,k||{},O);return T.catch(function(_){return _}),T},p=function(w){var k=[];w?Array.isArray(w)?k=w:k=[w]:k=o.value,k.forEach(function($){a[$]&&P(a[$],{validateStatus:"",help:""})})},v=function(w){for(var k={autoLink:!1},$=[],O=Array.isArray(w)?w:[w],T=0;T<O.length;T++){var _=O[T];(_==null?void 0:_.validateStatus)==="error"&&(k.validateStatus="error",_.help&&$.push(_.help)),k.required=k.required||(_==null?void 0:_.required)}return k.help=$,k},m=r,y=!0,b=function(w){var k=[];o.value.forEach(function($){var O=pb(w,$,!1),T=pb(m,$,!1),_=y&&(n==null?void 0:n.immediate)&&O.isValid;(_||!er(O.v,T.v))&&k.push($)}),f(k,{trigger:"change"}),y=!1,m=lp(w)},C=n==null?void 0:n.debounce;return ce(e,C&&C.wait?Yn(b,C.wait,MC(C,["wait"])):b,{immediate:n&&!!n.immediate,deep:!0}),ce(t,function(){n&&n.validateOnRuleChange&&f()},{deep:!0}),{modelRef:e,rulesRef:t,initialModel:r,validateInfos:a,resetFields:i,validate:f,validateField:d,mergeValidateInfo:v,clearValidate:p}}var m6e={layout:u.oneOf(rt("horizontal","inline","vertical")),labelCol:{type:Object},wrapperCol:{type:Object},colon:u.looseBool,labelAlign:u.oneOf(rt("left","right")),prefixCls:u.string,requiredMark:{type:[String,Boolean],default:void 0},hideRequiredMark:u.looseBool,model:u.object,rules:{type:Object},validateMessages:u.object,validateOnRuleChange:u.looseBool,scrollToFirstError:{type:[Boolean,Object]},onSubmit:u.func,onFinish:u.func,onFinishFailed:u.func,name:u.string,validateTrigger:{type:[String,Array]},size:{type:String}};function g6e(e,t){return er(vi(e),vi(t))}var y6e=G({name:"AForm",inheritAttrs:!1,props:Rn(m6e,{layout:"horizontal",hideRequiredMark:!1,colon:!0}),Item:S7,useForm:v6e,emits:["finishFailed","submit","finish"],setup:function(t,n){var r=n.emit,a=n.slots,o=n.expose,i=n.attrs,l=zA(t),s=Wt("form",t),c=s.prefixCls,d=s.direction,f=s.form,p=x(function(){return t.requiredMark===""||t.requiredMark}),v=x(function(){var N;return p.value!==void 0?p.value:f&&((N=f.value)===null||N===void 0?void 0:N.requiredMark)!==void 0?f.value.requiredMark:!t.hideRequiredMark}),m=x(function(){var N;return Se(c.value,(N={},V(N,"".concat(c.value,"-").concat(t.layout),!0),V(N,"".concat(c.value,"-hide-required-mark"),v.value===!1),V(N,"".concat(c.value,"-rtl"),d.value==="rtl"),V(N,"".concat(c.value,"-").concat(l.value),l.value),N))}),y=H(),b={},C=function(D,z){b[D]=z},S=function(D){delete b[D]},w=function(D){var z=!!D,B=z?vi(D).map(O1):[];return z?Object.values(b).filter(function(M){return B.findIndex(function(E){return g6e(E,M.fieldName.value)})>-1}):Object.values(b)},k=function(D){if(!t.model){on(!1,"Form","model is required for resetFields to work.");return}w(D).forEach(function(z){z.resetField()})},$=function(D){w(D).forEach(function(z){z.clearValidate()})},O=function(D){var z=t.scrollToFirstError;if(r("finishFailed",D),z&&D.errorFields.length){var B={};kt(z)==="object"&&(B=z),_(D.errorFields[0].name,B)}},T=function(){return j.apply(void 0,arguments)},_=function(D){var z=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},B=w(D);if(B.length){var M=B[0].fieldId.value,E=M?document.getElementById(M):null;E&&p6e(E,P({scrollMode:"if-needed",block:"nearest"},z))}},I=function(){var D=arguments.length>0&&arguments[0]!==void 0?arguments[0]:!0,z={};if(Object.values(b).forEach(function(M){var E=M.fieldName,K=M.fieldValue;z[E.value]=K.value}),D===!0)return z;var B={};return vi(D).forEach(function(M){return B[M]=z[M]}),B},L=function(D,z){if(on(!(D instanceof Function),"Form","validateFields/validateField/validate not support callback, please use promise instead"),!t.model)return on(!1,"Form","model is required for validateFields to work."),Promise.reject("Form `model` is required for validateFields to work.");var B=!!D,M=B?vi(D).map(O1):[],E=[];Object.values(b).forEach(function(Y){var q;if(B||M.push(Y.namePath.value),!!(!((q=Y.rules)===null||q===void 0)&&q.value.length)){var J=Y.namePath.value;if(!B||X_e(M,J)){var ne=Y.validateRules(P({validateMessages:P(P({},cS),t.validateMessages)},z));E.push(ne.then(function(){return{name:J,errors:[],warnings:[]}}).catch(function(oe){var Q=[],ae=[];return oe.forEach(function(de){var be=de.rule.warningOnly,Ee=de.errors;be?ae.push.apply(ae,Je(Ee)):Q.push.apply(Q,Je(Ee))}),Q.length?Promise.reject({name:J,errors:Q,warnings:ae}):{name:J,errors:Q,warnings:ae}}))}}});var K=k7(E);y.value=K;var W=K.then(function(){return y.value===K?Promise.resolve(I(M)):Promise.reject([])}).catch(function(Y){var q=Y.filter(function(J){return J&&J.errors.length});return Promise.reject({values:I(M),errorFields:q,outOfDate:y.value!==K})});return W.catch(function(Y){return Y}),W},j=function(){return L.apply(void 0,arguments)},F=function(D){if(D.preventDefault(),D.stopPropagation(),r("submit",D),t.model){var z=L();z.then(function(B){r("finish",B)}).catch(function(B){O(B)})}};return o({resetFields:k,clearValidate:$,validateFields:L,getFieldsValue:I,validate:T,scrollToField:_}),C7({model:x(function(){return t.model}),name:x(function(){return t.name}),labelAlign:x(function(){return t.labelAlign}),labelCol:x(function(){return t.labelCol}),wrapperCol:x(function(){return t.wrapperCol}),vertical:x(function(){return t.layout==="vertical"}),colon:x(function(){return t.colon}),requiredMark:v,validateTrigger:x(function(){return t.validateTrigger}),rules:x(function(){return t.rules}),addField:C,removeField:S}),ce(function(){return t.rules},function(){t.validateOnRuleChange&&L()}),function(){var N;return g("form",le(le({},i),{},{onSubmit:F,class:[m.value,i.class]}),[(N=a.default)===null||N===void 0?void 0:N.call(a)])}}}),pu=y6e;pu.install=function(e){return e.component(pu.name,pu),e.component(pu.Item.name,pu.Item),e};function b6e(){var e=document.documentElement.clientWidth,t=window.innerHeight||document.documentElement.clientHeight;return{width:e,height:t}}function O7(e){var t=e.getBoundingClientRect(),n=document.documentElement;return{left:t.left+(window.pageXOffset||n.scrollLeft)-(n.clientLeft||document.body.clientLeft||0),top:t.top+(window.pageYOffset||n.scrollTop)-(n.clientTop||document.body.clientTop||0)}}var C6e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"defs",attrs:{},children:[{tag:"style",attrs:{}}]},{tag:"path",attrs:{d:"M672 418H144c-17.7 0-32 14.3-32 32v414c0 17.7 14.3 32 32 32h528c17.7 0 32-14.3 32-32V450c0-17.7-14.3-32-32-32zm-44 402H188V494h440v326z"}},{tag:"path",attrs:{d:"M819.3 328.5c-78.8-100.7-196-153.6-314.6-154.2l-.2-64c0-6.5-7.6-10.1-12.6-6.1l-128 101c-4 3.1-3.9 9.1 0 12.3L492 318.6c5.1 4 12.7.4 12.6-6.1v-63.9c12.9.1 25.9.9 38.8 2.5 42.1 5.2 82.1 18.2 119 38.7 38.1 21.2 71.2 49.7 98.4 84.3 27.1 34.7 46.7 73.7 58.1 115.8a325.95 325.95 0 016.5 140.9h74.9c14.8-103.6-11.3-213-81-302.3z"}}]},name:"rotate-left",theme:"outlined"},w6e=C6e;function KT(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){S6e(e,a,n[a])})}return e}function S6e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var hS=function(t,n){var r=KT({},t,n.attrs);return g(Et,KT({},r,{icon:w6e}),null)};hS.displayName="RotateLeftOutlined";hS.inheritAttrs=!1;var k6e=hS,$6e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"defs",attrs:{},children:[{tag:"style",attrs:{}}]},{tag:"path",attrs:{d:"M480.5 251.2c13-1.6 25.9-2.4 38.8-2.5v63.9c0 6.5 7.5 10.1 12.6 6.1L660 217.6c4-3.2 4-9.2 0-12.3l-128-101c-5.1-4-12.6-.4-12.6 6.1l-.2 64c-118.6.5-235.8 53.4-314.6 154.2A399.75 399.75 0 00123.5 631h74.9c-.9-5.3-1.7-10.7-2.4-16.1-5.1-42.1-2.1-84.1 8.9-124.8 11.4-42.2 31-81.1 58.1-115.8 27.2-34.7 60.3-63.2 98.4-84.3 37-20.6 76.9-33.6 119.1-38.8z"}},{tag:"path",attrs:{d:"M880 418H352c-17.7 0-32 14.3-32 32v414c0 17.7 14.3 32 32 32h528c17.7 0 32-14.3 32-32V450c0-17.7-14.3-32-32-32zm-44 402H396V494h440v326z"}}]},name:"rotate-right",theme:"outlined"},O6e=$6e;function WT(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){P6e(e,a,n[a])})}return e}function P6e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var pS=function(t,n){var r=WT({},t,n.attrs);return g(Et,WT({},r,{icon:O6e}),null)};pS.displayName="RotateRightOutlined";pS.inheritAttrs=!1;var T6e=pS,x6e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M637 443H519V309c0-4.4-3.6-8-8-8h-60c-4.4 0-8 3.6-8 8v134H325c-4.4 0-8 3.6-8 8v60c0 4.4 3.6 8 8 8h118v134c0 4.4 3.6 8 8 8h60c4.4 0 8-3.6 8-8V519h118c4.4 0 8-3.6 8-8v-60c0-4.4-3.6-8-8-8zm284 424L775 721c122.1-148.9 113.6-369.5-26-509-148-148.1-388.4-148.1-537 0-148.1 148.6-148.1 389 0 537 139.5 139.6 360.1 148.1 509 26l146 146c3.2 2.8 8.3 2.8 11 0l43-43c2.8-2.7 2.8-7.8 0-11zM696 696c-118.8 118.7-311.2 118.7-430 0-118.7-118.8-118.7-311.2 0-430 118.8-118.7 311.2-118.7 430 0 118.7 118.8 118.7 311.2 0 430z"}}]},name:"zoom-in",theme:"outlined"},_6e=x6e;function UT(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){E6e(e,a,n[a])})}return e}function E6e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var vS=function(t,n){var r=UT({},t,n.attrs);return g(Et,UT({},r,{icon:_6e}),null)};vS.displayName="ZoomInOutlined";vS.inheritAttrs=!1;var M6e=vS,I6e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M637 443H325c-4.4 0-8 3.6-8 8v60c0 4.4 3.6 8 8 8h312c4.4 0 8-3.6 8-8v-60c0-4.4-3.6-8-8-8zm284 424L775 721c122.1-148.9 113.6-369.5-26-509-148-148.1-388.4-148.1-537 0-148.1 148.6-148.1 389 0 537 139.5 139.6 360.1 148.1 509 26l146 146c3.2 2.8 8.3 2.8 11 0l43-43c2.8-2.7 2.8-7.8 0-11zM696 696c-118.8 118.7-311.2 118.7-430 0-118.7-118.8-118.7-311.2 0-430 118.8-118.7 311.2-118.7 430 0 118.7 118.8 118.7 311.2 0 430z"}}]},name:"zoom-out",theme:"outlined"},N6e=I6e;function YT(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){A6e(e,a,n[a])})}return e}function A6e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var mS=function(t,n){var r=YT({},t,n.attrs);return g(Et,YT({},r,{icon:N6e}),null)};mS.displayName="ZoomOutOutlined";mS.inheritAttrs=!1;var D6e=mS,R6e={visible:u.looseBool,hiddenClassName:u.string,forceRender:u.looseBool},qT={props:R6e,render:function(){return g("div",null,[ht(this)])}},sm=function(e){var t=document.body.scrollHeight>(window.innerHeight||document.documentElement.clientHeight)&&window.innerWidth>document.body.offsetWidth;if(!!t){if(e){document.body.style.position="",document.body.style.width="";return}var n=d7();n&&(document.body.style.position="relative",document.body.style.width="calc(100% - ".concat(n,"px)"))}};function gS(){return{keyboard:u.looseBool,mask:u.looseBool,afterClose:u.func,closable:u.looseBool,maskClosable:u.looseBool,visible:u.looseBool,destroyOnClose:u.looseBool,mousePosition:u.shape({x:u.number,y:u.number}).loose,title:u.any,footer:u.any,transitionName:u.string,maskTransitionName:u.string,animation:u.any,maskAnimation:u.any,wrapStyle:u.object,bodyStyle:u.object,maskStyle:u.object,prefixCls:u.string,wrapClassName:u.string,width:u.oneOfType([u.string,u.number]),height:u.oneOfType([u.string,u.number]),zIndex:u.number,bodyProps:u.any,maskProps:u.any,wrapProps:u.any,getContainer:u.any,dialogStyle:u.object,dialogClass:u.string,closeIcon:u.any,forceRender:u.looseBool,getOpenCount:u.func,focusTriggerAfterClose:u.looseBool,onClose:u.func}}var L6e=gS(),F6e=0;function vb(){}function GT(e,t){var n=e["page".concat(t?"Y":"X","Offset")],r="scroll".concat(t?"Top":"Left");if(typeof n!="number"){var a=e.document;n=a.documentElement[r],typeof n!="number"&&(n=a.body[r])}return n}function XT(e,t){var n=e.style;["Webkit","Moz","Ms","ms"].forEach(function(r){n["".concat(r,"TransformOrigin")]=t}),n.transformOrigin=t}function B6e(e){var t=e.getBoundingClientRect(),n={left:t.left,top:t.top},r=e.ownerDocument,a=r.defaultView||r.parentWindow;return n.left+=GT(a),n.top+=GT(a,!0),n}var Zo={},ZT=G({name:"VcDialog",mixins:[nt],inheritAttrs:!1,props:An(L6e,{mask:!0,visible:!1,keyboard:!0,closable:!0,maskClosable:!0,destroyOnClose:!1,prefixCls:"rc-dialog",getOpenCount:function(){return null},focusTriggerAfterClose:!0}),data:function(){return on(!this.dialogClass,"Modal","dialogClass is deprecated, please use class instead."),on(!this.dialogStyle,"Modal","dialogStyle is deprecated, please use style instead."),{inTransition:!1,titleId:"rcDialogTitle".concat(F6e++),dialogMouseDown:void 0}},watch:{visible:function(t){var n=this;this.$nextTick(function(){n.updatedCallback(!t)})}},created:function(){ot("dialogContext",this)},mounted:function(){var t=this;this.$nextTick(function(){t.updatedCallback(!1),(t.forceRender||t.getContainer===!1&&!t.visible)&&t.$refs.wrap&&(t.$refs.wrap.style.display="none")})},beforeUnmount:function(){var t=this.visible,n=this.getOpenCount;(t||this.inTransition)&&!n()&&this.switchScrollingEffect(),clearTimeout(this.timeoutId)},methods:{getDialogWrap:function(){return this.$refs.wrap},updatedCallback:function(t){var n=this.mousePosition,r=this.mask,a=this.focusTriggerAfterClose;if(this.visible){if(!t){this.openTime=Date.now(),this.switchScrollingEffect(),this.tryFocus();var o=Sn(this.$refs.dialog);if(n){var i=B6e(o);XT(o,"".concat(n.x-i.left,"px ").concat(n.y-i.top,"px"))}else XT(o,"")}}else if(t&&(this.inTransition=!0,r&&this.lastOutSideFocusNode&&a)){try{this.lastOutSideFocusNode.focus()}catch{this.lastOutSideFocusNode=null}this.lastOutSideFocusNode=null}},tryFocus:function(){bu(this.$refs.wrap,document.activeElement)||(this.lastOutSideFocusNode=document.activeElement,this.$refs.sentinelStart.focus())},onAnimateLeave:function(){var t=this.afterClose;this.$refs.wrap&&(this.$refs.wrap.style.display="none"),this.inTransition=!1,this.switchScrollingEffect(),t&&t()},onDialogMouseDown:function(){this.dialogMouseDown=!0},onMaskMouseUp:function(){var t=this;this.dialogMouseDown&&(this.timeoutId=setTimeout(function(){t.dialogMouseDown=!1},0))},onMaskClick:function(t){Date.now()-this.openTime<300||t.target===t.currentTarget&&!this.dialogMouseDown&&this.close(t)},onKeydown:function(t){var n=this.$props;if(n.keyboard&&t.keyCode===ze.ESC){t.stopPropagation(),this.close(t);return}if(n.visible&&t.keyCode===ze.TAB){var r=document.activeElement,a=this.$refs.sentinelStart;t.shiftKey?r===a&&this.$refs.sentinelEnd.focus():r===this.$refs.sentinelEnd&&a.focus()}},getDialogElement:function(){var t=this,n=this.closable,r=this.prefixCls,a=this.width,o=this.height,i=this.title,l=this.footer,s=this.bodyStyle,c=this.visible,d=this.bodyProps,f=this.forceRender,p=this.closeIcon,v=this.dialogStyle,m=v===void 0?{}:v,y=this.dialogClass,b=y===void 0?"":y,C=P({},m);a!==void 0&&(C.width=typeof a=="number"?"".concat(a,"px"):a),o!==void 0&&(C.height=typeof o=="number"?"".concat(o,"px"):o);var S;l&&(S=g("div",{key:"footer",class:"".concat(r,"-footer"),ref:"footer"},[l]));var w;i&&(w=g("div",{key:"header",class:"".concat(r,"-header"),ref:"header"},[g("div",{class:"".concat(r,"-title"),id:this.titleId},[i])]));var k;n&&(k=g("button",{type:"button",key:"close",onClick:this.close||vb,"aria-label":"Close",class:"".concat(r,"-close")},[p||g("span",{class:"".concat(r,"-close-x")},null)]));var $=this.$attrs,O=$.style,T=$.class,_=P(P({},O),C),I={width:0,height:0,overflow:"hidden"},L=[r,T,b],j=this.getTransitionName(),F=at(g(qT,{key:"dialog-element",role:"document",ref:"dialog",style:_,class:L,forceRender:f,onMousedown:this.onDialogMouseDown},{default:function(){return[g("div",{tabindex:0,ref:"sentinelStart",style:I,"aria-hidden":"true"},null),g("div",{class:"".concat(r,"-content")},[k,w,g("div",le({key:"body",class:"".concat(r,"-body"),style:s,ref:"body"},d),[ht(t)]),S]),g("div",{tabindex:0,ref:"sentinelEnd",style:I,"aria-hidden":"true"},null)]}}),[[_t,c]]),N=Lo(j,{onAfterLeave:this.onAnimateLeave});return g(no,le({key:"dialog"},N),{default:function(){return[c||!t.destroyOnClose?F:null]}})},getZIndexStyle:function(){var t={},n=this.$props;return n.zIndex!==void 0&&(t.zIndex=n.zIndex),t},getWrapStyle:function(){return P(P({},this.getZIndexStyle()),this.wrapStyle)},getMaskStyle:function(){return P(P({},this.getZIndexStyle()),this.maskStyle)},getMaskElement:function(){var t=this.$props,n;if(t.mask){var r=this.getMaskTransitionName(),a=at(g(qT,le({style:this.getMaskStyle(),key:"mask",class:"".concat(t.prefixCls,"-mask")},t.maskProps||{}),null),[[_t,t.visible]]);if(r){var o=Lo(r);n=g(no,le({key:"mask"},o),{default:function(){return[a]}})}else n=a}return n},getMaskTransitionName:function(){var t=this.$props,n=t.maskTransitionName,r=t.maskAnimation;return!n&&r&&(n="".concat(t.prefixCls,"-").concat(r)),n},getTransitionName:function(){var t=this.$props,n=t.transitionName,r=t.animation;return!n&&r&&(n="".concat(t.prefixCls,"-").concat(r)),n},switchScrollingEffect:function(){var t=this.getOpenCount,n=t();if(n===1){if(Zo.hasOwnProperty("overflowX"))return;Zo={overflowX:document.body.style.overflowX,overflowY:document.body.style.overflowY,overflow:document.body.style.overflow},sm(),document.body.style.overflow="hidden"}else n||(Zo.overflow!==void 0&&(document.body.style.overflow=Zo.overflow),Zo.overflowX!==void 0&&(document.body.style.overflowX=Zo.overflowX),Zo.overflowY!==void 0&&(document.body.style.overflowY=Zo.overflowY),Zo={},sm(!0))},close:function(t){this.__emit("close",t)}},render:function(){var t=this.prefixCls,n=this.maskClosable,r=this.visible,a=this.wrapClassName,o=this.title,i=this.wrapProps,l=this.getWrapStyle();return r&&(l.display=null),g("div",{class:"".concat(t,"-root")},[this.getMaskElement(),g("div",le({tabindex:-1,onKeydown:this.onKeydown,class:"".concat(t,"-wrap ").concat(a||""),ref:"wrap",onClick:n?this.onMaskClick:vb,onMouseup:n?this.onMaskMouseUp:vb,role:"dialog","aria-labelledby":o?this.titleId:null,style:l},i),[this.getDialogElement()])])}});function JT(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},n=t.element,r=n===void 0?document.body:n,a={},o=Object.keys(e);return o.forEach(function(i){a[i]=r.style[i]}),o.forEach(function(i){r.style[i]=e[i]}),a}var Jr=0,V6e=!(typeof window!="undefined"&&window.document&&window.document.createElement),Uh={},z6e=G({name:"PortalWrapper",props:{wrapperClassName:u.string,forceRender:u.looseBool,getContainer:u.any,children:u.func,visible:u.looseBool},data:function(){this._component=null;var t=this.$props.visible;return Jr=t?Jr+1:Jr,{}},watch:{visible:function(t){Jr=t?Jr+1:Jr-1},getContainer:function(t,n){var r=typeof t=="function"&&typeof n=="function";(r?t.toString()!==n.toString():t!==n)&&this.removeCurrentContainer(!1)}},updated:function(){this.setWrapperClassName()},beforeUnmount:function(){var t=this.$props.visible;Jr=t&&Jr?Jr-1:Jr,this.removeCurrentContainer(t)},methods:{getParent:function(){var t=this.$props.getContainer;if(t){if(typeof t=="string")return document.querySelectorAll(t)[0];if(typeof t=="function")return t();if(kt(t)==="object"&&t instanceof window.HTMLElement)return t}return document.body},getDomContainer:function(){if(V6e)return null;if(!this.container){this.container=document.createElement("div");var t=this.getParent();t&&t.appendChild(this.container)}return this.setWrapperClassName(),this.container},setWrapperClassName:function(){var t=this.$props.wrapperClassName;this.container&&t&&t!==this.container.className&&(this.container.className=t)},savePortal:function(t){this._component=t},removeCurrentContainer:function(){this.container=null,this._component=null},switchScrollingEffect:function(){Jr===1&&!Object.keys(Uh).length?(sm(),Uh=JT({overflow:"hidden",overflowX:"hidden",overflowY:"hidden"})):Jr||(JT(Uh),Uh={},sm(!0))}},render:function(){var t=this.$props,n=t.children,r=t.forceRender,a=t.visible,o=null,i={getOpenCount:function(){return Jr},getContainer:this.getDomContainer,switchScrollingEffect:this.switchScrollingEffect};return(r||a||this._component)&&(o=g(SA,{getContainer:this.getDomContainer,children:n(i),ref:this.savePortal},null)),o}}),QT=gS(),H6e=G({inheritAttrs:!1,props:P(P({},QT),{visible:QT.visible.def(!1)}),render:function(){var t=this,n=this.$props,r=n.visible,a=n.getContainer,o=n.forceRender,i=P(P(P({},this.$props),this.$attrs),{ref:"_component",key:"dialog"});return a===!1?g(ZT,le(le({},i),{},{getOpenCount:function(){return 2}}),{default:function(){return[ht(t)]}}):g(z6e,{visible:r,forceRender:o,getContainer:a,children:function(s){return i=P(P({},i),s),g(ZT,i,{default:function(){return[ht(t)]}})}},null)}}),P7=H6e;function j6e(e){var t=H(null),n=bt(P({},e)),r=H([]),a=function(i){t.value===null&&(r.value=[],t.value=en(function(){var l;r.value.forEach(function(s){l=P(P({},l),s)}),P(n,l),t.value=null})),r.value.push(i)};return et(function(){t.value&&en.cancel(t.value)}),[n,a]}function ex(e,t,n,r){var a=t+n,o=(n-r)/2;if(n>r){if(t>0)return V({},e,o);if(t<0&&a<r)return V({},e,-o)}else if(t<0||a>r)return V({},e,t<0?o:-o);return{}}function K6e(e,t,n,r){var a=b6e(),o=a.width,i=a.height,l=null;return e<=o&&t<=i?l={x:0,y:0}:(e>o||t>i)&&(l=P(P({},ex("x",n,e,o)),ex("y",r,t,i))),l}var tx=Symbol("previewGroupContext"),yS={provide:function(t){ot(tx,t)},inject:function(){return ve(tx,{isPreviewGroup:H(!1),previewUrls:bt({}),setPreviewUrls:function(){},current:H(null),setCurrent:function(){},setShowPreview:function(){},setMousePosition:function(){},registerImage:null})}},W6e=G({name:"PreviewGroup",inheritAttrs:!1,props:{previewPrefixCls:String},setup:function(t,n){var r=n.slots,a=bt({}),o=H(),i=H(!1),l=H(null),s=function(y){P(a,y)},c=function(y){o.value=y},d=function(y){l.value=y},f=function(y){i.value=y},p=function(y,b){return a[y]=b,function(){delete a[y]}},v=function(y){y==null||y.stopPropagation(),i.value=!1,l.value=null};return yS.provide({isPreviewGroup:H(!0),previewUrls:a,setPreviewUrls:s,current:o,setCurrent:c,setShowPreview:f,setMousePosition:d,registerImage:p}),function(){return g(Fe,null,[r.default&&r.default(),g(x7,{"ria-hidden":!i.value,visible:i.value,prefixCls:t.previewPrefixCls,onClose:v,mousePosition:l.value,src:a[o.value]},null)])}}}),T7=W6e,U6e=gS(),Yh={x:0,y:0},Y6e=P({src:u.string,alt:u.string},U6e),q6e=G({name:"Preview",inheritAttrs:!1,props:Y6e,emits:["close","afterClose"],setup:function(t,n){var r=n.emit,a=n.attrs,o=H(1),i=H(0),l=j6e(Yh),s=fn(l,2),c=s[0],d=s[1],f=function(){return r("close")},p=H(),v=bt({originX:0,originY:0,deltaX:0,deltaY:0}),m=H(!1),y=yS.inject(),b=y.previewUrls,C=y.current,S=y.isPreviewGroup,w=y.setCurrent,k=x(function(){return Object.keys(b).length}),$=x(function(){return Object.keys(b)}),O=x(function(){return $.value.indexOf(String(C.value))}),T=x(function(){return S.value?b[C.value]:t.src}),_=x(function(){return S.value&&k.value>1}),I=function(){o.value=1,i.value=0,d(Yh)},L=function(){o.value++,d(Yh)},j=function(){o.value>1&&o.value--,d(Yh)},F=function(){i.value+=90},N=function(){i.value-=90},D=function(oe){oe.preventDefault(),oe.stopPropagation(),O.value>0&&w($.value[String(O.value-1)])},z=function(oe){oe.preventDefault(),oe.stopPropagation(),O.value<k.value-1&&w($.value[String(O.value+1)])},B=Se(V({},"".concat(t.prefixCls,"-moving"),m.value)),M="".concat(t.prefixCls,"-operations-operation"),E="".concat(t.prefixCls,"-operations-icon"),K=[{icon:go,onClick:f,type:"close"},{icon:M6e,onClick:L,type:"zoomIn"},{icon:D6e,onClick:j,type:"zoomOut",disabled:x(function(){return o.value===1})},{icon:T6e,onClick:F,type:"rotateRight"},{icon:k6e,onClick:N,type:"rotateLeft"}],W=function(){if(t.visible&&m.value){var oe=p.value.offsetWidth*o.value,Q=p.value.offsetHeight*o.value,ae=O7(p.value),de=ae.left,be=ae.top,Ee=i.value%180!==0;m.value=!1;var Pe=K6e(Ee?Q:oe,Ee?oe:Q,de,be);Pe&&d(P({},Pe))}},Y=function(oe){oe.preventDefault(),oe.stopPropagation(),v.deltaX=oe.pageX-c.x,v.deltaY=oe.pageY-c.y,v.originX=c.x,v.originY=c.y,m.value=!0},q=function(oe){t.visible&&m.value&&d({x:oe.pageX-v.deltaX,y:oe.pageY-v.deltaY})},J=function(){};return et(function(){ce([function(){return t.visible},m],function(){J();var ne,oe,Q=Kn(window,"mouseup",W,!1),ae=Kn(window,"mousemove",q,!1);try{window.top!==window.self&&(ne=Kn(window.top,"mouseup",W,!1),oe=Kn(window.top,"mousemove",q,!1))}catch{}J=function(){Q.remove(),ae.remove(),ne&&ne.remove(),oe&&oe.remove()}},{flush:"post",immediate:!0})}),Wr(function(){J()}),function(){return g(P7,le(le({},a),{},{transitionName:"zoom",maskTransitionName:"fade",closable:!1,keyboard:!0,prefixCls:t.prefixCls,onClose:f,afterClose:I,visible:t.visible,wrapClassName:B,getContainer:t.getContainer}),{default:function(){return[g("ul",{class:"".concat(t.prefixCls,"-operations")},[K.map(function(oe){var Q=oe.icon,ae=oe.onClick,de=oe.type,be=oe.disabled;return g("li",{class:Se(M,V({},"".concat(t.prefixCls,"-operations-operation-disabled"),be&&(be==null?void 0:be.value))),onClick:ae,key:de},[g(Q,{class:E},null)])})]),g("div",{class:"".concat(t.prefixCls,"-img-wrapper"),style:{transform:"translate3d(".concat(c.x,"px, ").concat(c.y,"px, 0)")}},[g("img",{onMousedown:Y,ref:p,class:"".concat(t.prefixCls,"-img"),src:T.value,alt:t.alt,style:{transform:"scale3d(".concat(o.value,", ").concat(o.value,", 1) rotate(").concat(i.value,"deg)")}},null)]),_.value&&g("div",{class:Se("".concat(t.prefixCls,"-switch-left"),V({},"".concat(t.prefixCls,"-switch-left-disabled"),O.value<=0)),onClick:D},[g(uc,null,null)]),_.value&&g("div",{class:Se("".concat(t.prefixCls,"-switch-right"),V({},"".concat(t.prefixCls,"-switch-right-disabled"),O.value>=k.value-1)),onClick:z},[g(wi,null,null)])]}})}}}),x7=q6e,_7={src:u.string,wrapperClassName:u.string,wrapperStyle:u.style,prefixCls:u.string,previewPrefixCls:u.string,placeholder:u.VNodeChild,fallback:u.string,preview:u.oneOfType([u.looseBool,u.shape({visible:u.bool,onVisibleChange:u.func,getContainer:u.oneOfType([u.func,u.looseBool,u.string])}).loose]).def(!0)},G6e=function(t,n){var r=P({},t);return Object.keys(n).forEach(function(a){t[a]===void 0&&(r[a]=n[a])}),r},X6e=0,E7=G({name:"Image",mixins:[nt],inheritAttrs:!1,props:_7,emits:["click"],setup:function(t,n){var r=n.attrs,a=n.slots,o=n.emit,i=x(function(){return t.prefixCls}),l=x(function(){return"".concat(i.value,"-preview")}),s=x(function(){var M={visible:void 0,onVisibleChange:function(){},getContainer:void 0};return kt(t.preview)==="object"?G6e(t.preview,M):M}),c=x(function(){return t.placeholder&&t.placeholder!==!0||a.placeholder}),d=x(function(){return s.value.visible}),f=x(function(){return s.value.onVisibleChange}),p=x(function(){return s.value.getContainer}),v=x(function(){return d.value!==void 0}),m=H(!!d.value);ce(d,function(){m.value=!!d.value}),ce(m,function(M,E){f.value(M,E)});var y=H(c.value?"loading":"normal");ce(function(){return t.src},function(){y.value=c.value?"loading":"normal"});var b=H(null),C=x(function(){return y.value==="error"}),S=yS.inject(),w=S.isPreviewGroup,k=S.setCurrent,$=S.setShowPreview,O=S.setMousePosition,T=S.registerImage,_=H(X6e++),I=x(function(){return t.preview&&!C.value}),L=function(){y.value="normal"},j=function(){y.value="error"},F=function(E){if(!v.value){var K=O7(E.target),W=K.left,Y=K.top;w.value?(k(_.value),O({x:W,y:Y})):b.value={x:W,y:Y}}w.value?$(!0):m.value=!0,o("click",E)},N=function(){m.value=!1,v.value||(b.value=null)},D=H(null);ce(function(){return D},function(){y.value==="loading"&&D.value.complete&&(D.value.naturalWidth||D.value.naturalHeight)&&L()});var z=function(){};et(function(){ce([function(){return t.src},I],function(){if(z(),!w.value)return function(){};z=T(_.value,t.src),I.value||z()},{flush:"post",immediate:!0})});var B=function(E){return vW(E)?E+"px":E};return function(){var M=t.prefixCls,E=t.wrapperClassName,K=t.fallback,W=t.src,Y=t.preview,q=t.placeholder,J=t.wrapperStyle,ne=r.width,oe=r.height,Q=r.crossorigin,ae=r.decoding,de=r.alt,be=r.sizes,Ee=r.srcset,Pe=r.usemap,Be=r.class,te=r.style,ie=Se(M,E,V({},"".concat(M,"-error"),C.value)),ge=C.value&&K?K:W,ke=a.previewMask&&a.previewMask(),xe={crossorigin:Q,decoding:ae,alt:de,sizes:be,srcset:Ee,usemap:Pe,class:Se("".concat(M,"-img"),V({},"".concat(M,"-img-placeholder"),q===!0),Be),style:P({height:oe},te)};return g(Fe,null,[g("div",{class:ie,onClick:Y&&!C.value?F:function(Ie){o("click",Ie)},style:P({width:B(ne),height:B(oe)},J)},[g("img",le(le(le({},xe),C.value&&K?{src:K}:{onLoad:L,onError:j,src:W}),{},{ref:D}),null),y.value==="loading"&&g("div",{"aria-hidden":"true",class:"".concat(M,"-placeholder")},[q||a.placeholder&&a.placeholder()]),ke&&I.value&&g("div",{class:"".concat(M,"-mask")},[ke])]),!w.value&&I.value&&g(x7,{"aria-hidden":!m.value,visible:m.value,prefixCls:l.value,onClose:N,mousePosition:b.value,src:ge,alt:de,getContainer:p.value},null)])}}});E7.PreviewGroup=T7;var Z6e=E7,J6e=G({name:"AImagePreviewGroup",inheritAttrs:!1,props:{previewPrefixCls:u.string},setup:function(t,n){var r=n.attrs,a=n.slots,o=ve("configProvider",St),i=x(function(){return o.getPrefixCls("image-preview",t.previewPrefixCls)});return function(){return g(T7,le(le({},P(P({},r),t)),{},{previewPrefixCls:i.value}),a)}}}),M7=J6e,zl=G({name:"AImage",inheritAttrs:!1,props:_7,setup:function(t,n){var r=n.slots,a=n.attrs,o=Wt("image",t),i=o.prefixCls;return function(){return g(Z6e,P(P(P({},a),t),{prefixCls:i.value}),r)}}});zl.PreviewGroup=M7;zl.install=function(e){return e.component(zl.name,zl),e.component(zl.PreviewGroup.name,zl.PreviewGroup),e};var Q6e=zl,e8e={disabled:u.looseBool,activeClassName:u.string,activeStyle:u.any},t8e=G({name:"TouchFeedback",mixins:[nt],inheritAttrs:!1,props:An(e8e,{disabled:!1}),data:function(){return this.child=null,{active:!1}},mounted:function(){var t=this;this.$nextTick(function(){t.disabled&&t.active&&t.setState({active:!1})})},methods:{triggerEvent:function(t,n,r){var a="on".concat(t),o=this.child;o.props[a]&&o.props[a](r),n!==this.active&&this.setState({active:n})},onTouchStart:function(t){this.triggerEvent("Touchstart",!0,t)},onTouchMove:function(t){this.triggerEvent("Touchmove",!1,t)},onTouchEnd:function(t){this.triggerEvent("Touchend",!1,t)},onTouchCancel:function(t){this.triggerEvent("Touchcancel",!1,t)},onMouseDown:function(t){this.triggerEvent("Mousedown",!0,t)},onMouseUp:function(t){this.triggerEvent("Mouseup",!1,t)},onMouseLeave:function(t){this.triggerEvent("Mouseleave",!1,t)}},render:function(){var t,n=this.$props,r=n.disabled,a=n.activeClassName,o=a===void 0?"":a,i=n.activeStyle,l=i===void 0?{}:i,s=ht(this);if(s.length!==1)return on(!1,"m-feedback\u7EC4\u4EF6\u53EA\u80FD\u5305\u542B\u4E00\u4E2A\u5B50\u5143\u7D20"),null;var c=r?void 0:(t={},V(t,mn?"onTouchstartPassive":"onTouchstart",this.onTouchStart),V(t,mn?"onTouchmovePassive":"onTouchmove",this.onTouchMove),V(t,"onTouchend",this.onTouchEnd),V(t,"onTouchcancel",this.onTouchCancel),V(t,"onMousedown",this.onMouseDown),V(t,"onMouseup",this.onMouseUp),V(t,"onMouseleave",this.onMouseLeave),t);if(s=s[0],this.child=s,!r&&this.active){var d=s.props,f=d.style,p=d.class;return l!==!1&&(l&&(f=P(P({},f),l)),p=Se(p,o)),Ot(s,P({class:p,style:f},c))}return Ot(s,c)}}),n8e={name:"InputHandler",inheritAttrs:!1,props:{prefixCls:u.string,disabled:u.looseBool},render:function(){var t=this,n=this.$props,r=n.prefixCls,a=n.disabled,o={disabled:a,activeClassName:"".concat(r,"-handler-active")};return g(t8e,o,{default:function(){return[g("span",t.$attrs,[ht(t)])]}})}},nx=n8e;function rx(e){e.preventDefault()}function r8e(e){return e.replace(/[^\w\.-]+/g,"")}var a8e=200,o8e=600,i8e=Number.MAX_SAFE_INTEGER||Math.pow(2,53)-1,Gc=function(t){return t!=null},mb=function(t,n){return n===t||typeof n=="number"&&typeof t=="number"&&isNaN(n)&&isNaN(t)},l8e={value:u.oneOfType([u.number,u.string]),defaultValue:u.oneOfType([u.number,u.string]),focusOnUpDown:u.looseBool,autofocus:u.looseBool,prefixCls:u.string,tabindex:u.oneOfType([u.string,u.number]),placeholder:u.string,disabled:u.looseBool,readonly:u.looseBool,max:u.number,min:u.number,step:u.oneOfType([u.number,u.string]),upHandler:u.any,downHandler:u.any,useTouch:u.looseBool,formatter:u.func,parser:u.func,precision:u.number,required:u.looseBool,pattern:u.string,decimalSeparator:u.string,autocomplete:u.string,title:u.string,name:u.string,id:u.string,type:u.string,maxlength:u.any},s8e=G({name:"VCInputNumber",mixins:[nt],inheritAttrs:!1,props:An(l8e,{focusOnUpDown:!0,useTouch:!1,prefixCls:"rc-input-number",min:-i8e,step:1,parser:r8e,required:!1,autocomplete:"off"}),data:function(){var t=Qe(this);this.prevProps=P({},t);var n;"value"in t?n=this.value:n=this.defaultValue;var r=this.getValidValue(this.toNumber(n));return{inputValue:this.toPrecisionAsStep(r),sValue:r,focused:this.autofocus}},mounted:function(){var t=this;this.$nextTick(function(){t.updatedFunc()})},updated:function(){var t=this,n=this.$props,r=n.value,a=n.max,o=n.min,i=this.$data.focused,l=this.prevProps,s=Qe(this);if(l){if(!mb(l.value,r)||!mb(l.max,a)||!mb(l.min,o)){var c=i?r:this.getValidValue(r),d;this.pressingUpOrDown?d=c:this.inputting?d=this.rawInput:d=this.toPrecisionAsStep(c),this.setState({sValue:c,inputValue:d})}var f="value"in s?r:this.$data.sValue;"max"in s&&l.max!==a&&typeof f=="number"&&f>a&&(this.__emit("update:value",a),this.__emit("change",a)),"min"in s&&l.min!==o&&typeof f=="number"&&f<o&&(this.__emit("update:value",o),this.__emit("change",o))}this.prevProps=P({},s),this.$nextTick(function(){t.updatedFunc()})},beforeUnmount:function(){this.stop()},methods:{updatedFunc:function(){var t=this.inputRef;try{if(this.cursorStart!==void 0&&this.$data.focused){if(!this.partRestoreByAfter(this.cursorAfter)&&this.$data.sValue!==this.value){var n=this.cursorStart+1;this.cursorAfter?this.lastKeyCode===ze.BACKSPACE?n=this.cursorStart-1:this.lastKeyCode===ze.DELETE&&(n=this.cursorStart):n=t.value.length,this.fixCaret(n,n)}else if(this.currentValue===t.value)switch(this.lastKeyCode){case ze.BACKSPACE:this.fixCaret(this.cursorStart-1,this.cursorStart-1);break;case ze.DELETE:this.fixCaret(this.cursorStart+1,this.cursorStart+1);break;default:}}}catch{}this.lastKeyCode=null,this.pressingUpOrDown&&(this.focusOnUpDown&&this.$data.focused&&document.activeElement!==t&&this.focus(),this.pressingUpOrDown=!1)},onKeyDown:function(t){if(t.keyCode===ze.UP){var n=this.getRatio(t);this.up(t,n),this.stop()}else if(t.keyCode===ze.DOWN){var r=this.getRatio(t);this.down(t,r),this.stop()}else t.keyCode===ze.ENTER&&this.__emit("pressEnter",t);this.recordCursorPosition(),this.lastKeyCode=t.keyCode;for(var a=arguments.length,o=new Array(a>1?a-1:0),i=1;i<a;i++)o[i-1]=arguments[i];this.__emit.apply(this,["keydown",t].concat(o))},onKeyUp:function(t){this.stop(),this.recordCursorPosition();for(var n=arguments.length,r=new Array(n>1?n-1:0),a=1;a<n;a++)r[a-1]=arguments[a];this.__emit.apply(this,["keyup",t].concat(r))},onTrigger:function(t){if(t.target.composing)return!1;this.onChange(t)},onChange:function(t){this.$data.focused&&(this.inputting=!0),this.rawInput=this.parser(this.getValueFromEvent(t)),this.setState({inputValue:this.rawInput});var n=this.toNumber(this.rawInput);this.__emit("update:value",n),this.__emit("change",n)},onFocus:function(){this.setState({focused:!0});for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];this.__emit.apply(this,["focus"].concat(n))},onBlur:function(){this.inputting=!1,this.setState({focused:!1});var t=this.getCurrentValidValue(this.$data.inputValue),n=this.setValue(t);if(this.$attrs.onBlur&&this.inputRef){var r=this.inputRef.value,a=this.getInputDisplayValue({focused:!1,sValue:n});this.inputRef.value=a;for(var o=arguments.length,i=new Array(o),l=0;l<o;l++)i[l]=arguments[l];this.__emit.apply(this,["blur"].concat(i)),this.inputRef.value=r}},getCurrentValidValue:function(t){var n=t;return n===""?n="":this.isNotCompleteNumber(parseFloat(n,10))?n=this.$data.sValue:n=this.getValidValue(n),this.toNumber(n)},getRatio:function(t){var n=1;return t.metaKey||t.ctrlKey?n=.1:t.shiftKey&&(n=10),n},getValueFromEvent:function(t){var n=t.target.value.trim().replace(/。/g,".");return Gc(this.decimalSeparator)&&(n=n.replace(this.decimalSeparator,".")),n},getValidValue:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:this.min,r=arguments.length>2&&arguments[2]!==void 0?arguments[2]:this.max,a=parseFloat(t,10);return isNaN(a)?t:(a<n&&(a=n),a>r&&(a=r),a)},setValue:function(t,n){var r=this.$props.precision,a=this.isNotCompleteNumber(parseFloat(t,10))?null:parseFloat(t,10),o=this.$data,i=o.sValue,l=i===void 0?null:i,s=o.inputValue,c=s===void 0?null:s,d=typeof a=="number"?a.toFixed(r):"".concat(a),f=a!==l||d!=="".concat(c);return vt(this,"value")?this.setState({inputValue:this.toPrecisionAsStep(this.$data.sValue)},n):this.setState({sValue:a,inputValue:this.toPrecisionAsStep(t)},n),f&&(this.__emit("update:value",a),this.__emit("change",a)),a},getPrecision:function(t){if(Gc(this.precision))return this.precision;var n=t.toString();if(n.indexOf("e-")>=0)return parseInt(n.slice(n.indexOf("e-")+2),10);var r=0;return n.indexOf(".")>=0&&(r=n.length-n.indexOf(".")-1),r},getMaxPrecision:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:1;if(Gc(this.precision))return this.precision;var r=this.step,a=this.getPrecision(n),o=this.getPrecision(r),i=this.getPrecision(t);return t?Math.max(i,a+o):a+o},getPrecisionFactor:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:1,r=this.getMaxPrecision(t,n);return Math.pow(10,r)},getInputDisplayValue:function(t){var n=t||this.$data,r=n.focused,a=n.inputValue,o=n.sValue,i;r?i=a:i=this.toPrecisionAsStep(o),i==null&&(i="");var l=this.formatWrapper(i);return Gc(this.$props.decimalSeparator)&&(l=l.toString().replace(".",this.$props.decimalSeparator)),l},recordCursorPosition:function(){try{var t=this.inputRef;this.cursorStart=t.selectionStart,this.cursorEnd=t.selectionEnd,this.currentValue=t.value,this.cursorBefore=t.value.substring(0,this.cursorStart),this.cursorAfter=t.value.substring(this.cursorEnd)}catch{}},fixCaret:function(t,n){if(!(t===void 0||n===void 0||!this.inputRef||!this.inputRef.value))try{var r=this.inputRef,a=r.selectionStart,o=r.selectionEnd;(t!==a||n!==o)&&r.setSelectionRange(t,n)}catch{}},restoreByAfter:function(t){if(t===void 0)return!1;var n=this.inputRef.value,r=n.lastIndexOf(t);if(r===-1)return!1;var a=this.cursorBefore.length;return this.lastKeyCode===ze.DELETE&&this.cursorBefore.charAt(a-1)===t[0]?(this.fixCaret(a,a),!0):r+t.length===n.length?(this.fixCaret(r,r),!0):!1},partRestoreByAfter:function(t){var n=this;return t===void 0?!1:Array.prototype.some.call(t,function(r,a){var o=t.substring(a);return n.restoreByAfter(o)})},focus:function(){this.inputRef.focus(),this.recordCursorPosition()},blur:function(){this.inputRef.blur()},formatWrapper:function(t){return this.formatter?this.formatter(t):t},toPrecisionAsStep:function(t){if(this.isNotCompleteNumber(t)||t==="")return t;var n=Math.abs(this.getMaxPrecision(t));return isNaN(n)?t.toString():Number(t).toFixed(n)},isNotCompleteNumber:function(t){return isNaN(t)||t===""||t===null||t&&t.toString().indexOf(".")===t.toString().length-1},toNumber:function(t){var n=this.$props,r=n.precision,a=n.autofocus,o=this.$data.focused,i=o===void 0?a:o,l=t&&t.length>16&&i;return this.isNotCompleteNumber(t)||l?t:Gc(r)?Math.round(t*Math.pow(10,r))/Math.pow(10,r):Number(t)},upStep:function(t,n){var r=this.step,a=this.getPrecisionFactor(t,n),o=Math.abs(this.getMaxPrecision(t,n)),i=((a*t+a*r*n)/a).toFixed(o);return this.toNumber(i)},downStep:function(t,n){var r=this.step,a=this.getPrecisionFactor(t,n),o=Math.abs(this.getMaxPrecision(t,n)),i=((a*t-a*r*n)/a).toFixed(o);return this.toNumber(i)},stepFn:function(t,n){var r=this,a=arguments.length>2&&arguments[2]!==void 0?arguments[2]:1,o=arguments.length>3?arguments[3]:void 0;if(this.stop(),n&&n.preventDefault(),!this.disabled){var i=this.max,l=this.min,s=this.getCurrentValidValue(this.$data.inputValue)||0;if(!this.isNotCompleteNumber(s)){var c=this["".concat(t,"Step")](s,a),d=c>i||c<l;c>i?c=i:c<l&&(c=l),this.setValue(c),this.setState({focused:!0}),!d&&(this.autoStepTimer=setTimeout(function(){r[t](n,a,!0)},o?a8e:o8e))}}},stop:function(){this.autoStepTimer&&clearTimeout(this.autoStepTimer)},down:function(t,n,r){this.pressingUpOrDown=!0,this.stepFn("down",t,n,r)},up:function(t,n,r){this.pressingUpOrDown=!0,this.stepFn("up",t,n,r)},handleInputClick:function(){this.__emit("click")},saveUp:function(t){this.upHandlerRef=t},saveDown:function(t){this.downHandlerRef=t},saveInput:function(t){this.inputRef=t},onCompositionstart:function(t){t.target.composing=!0},onCompositionend:function(t){this.onChange(t),t.target.composing=!1}},render:function(){var t,n=P(P({},this.$props),this.$attrs),r=n.prefixCls,a=n.disabled,o=n.readonly,i=n.useTouch,l=n.autocomplete,s=n.upHandler,c=n.downHandler,d=n.class,f=Se((t={},V(t,d,d),V(t,r,!0),V(t,"".concat(r,"-disabled"),a),V(t,"".concat(r,"-focused"),this.$data.focused),t)),p="",v="",m=this.$data.sValue;if(m||m===0)if(isNaN(m))p="".concat(r,"-handler-up-disabled"),v="".concat(r,"-handler-down-disabled");else{var y=Number(m);y>=this.max&&(p="".concat(r,"-handler-up-disabled")),y<=this.min&&(v="".concat(r,"-handler-down-disabled"))}var b={};for(var C in n)n.hasOwnProperty(C)&&(C.substr(0,5)==="data-"||C.substr(0,5)==="aria-"||C==="role")&&(b[C]=n[C]);var S=!this.readonly&&!this.disabled,w=this.getInputDisplayValue(),k,$;if(i){var O,T;k=(O={},V(O,mn?"onTouchstartPassive":"onTouchstart",S&&!p&&this.up),V(O,"onTouchend",this.stop),O),$=(T={},V(T,mn?"onTouchstartPassive":"onTouchstart",S&&!v&&this.down),V(T,"onTouchend",this.stop),T)}else k={onMousedown:S&&!p&&this.up,onMouseup:this.stop,onMouseleave:this.stop},$={onMousedown:S&&!v&&this.down,onMouseup:this.stop,onMouseleave:this.stop};var _=!!p||a||o,I=!!v||a||o,L=P(P({disabled:_,prefixCls:r,unselectable:"unselectable",role:"button","aria-label":"Increase Value","aria-disabled":!!_,class:"".concat(r,"-handler ").concat(r,"-handler-up ").concat(p)},k),{ref:this.saveUp}),j=P(P({disabled:I,prefixCls:r,unselectable:"unselectable",role:"button","aria-label":"Decrease Value","aria-disabled":!!I,class:"".concat(r,"-handler ").concat(r,"-handler-down ").concat(v)},$),{ref:this.saveDown});return g("div",{class:f,style:n.style,title:n.title,onMouseenter:n.onMouseenter,onMouseleave:n.onMouseleave,onMouseover:n.onMouseover,onMouseout:n.onMouseout},[g("div",{class:"".concat(r,"-handler-wrap")},[g("span",null,[g(nx,le(le({},L),{},{key:"upHandler"}),{default:function(){return[s||g("span",{unselectable:"unselectable",class:"".concat(r,"-handler-up-inner"),onClick:rx},null)]}})]),g(nx,le(le({},j),{},{key:"downHandler"}),{default:function(){return[c||g("span",{unselectable:"unselectable",class:"".concat(r,"-handler-down-inner"),onClick:rx},null)]}})]),g("div",{class:"".concat(r,"-input-wrap")},[g("input",le({role:"spinbutton","aria-valuemin":this.min,"aria-valuemax":this.max,"aria-valuenow":m,required:this.required,type:n.type,placeholder:this.placeholder,onClick:this.handleInputClick,class:"".concat(r,"-input"),tabindex:this.tabindex,autocomplete:l,onFocus:this.onFocus,onBlur:this.onBlur,onKeydown:S&&this.onKeyDown,onKeyup:S&&this.onKeyUp,autofocus:this.autofocus,maxlength:this.maxlength,readonly:this.readonly,disabled:this.disabled,max:this.max,min:this.min,step:this.step,name:this.name,title:this.title,id:this.id,onInput:this.onTrigger,onCompositionstart:this.onCompositionstart,onCompositionend:this.onCompositionend,ref:this.saveInput,value:w,pattern:this.pattern},b),null)])])}}),u8e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},c8e={prefixCls:u.string,min:u.number,max:u.number,value:u.oneOfType([u.number,u.string]),step:u.oneOfType([u.number,u.string]).def(1),defaultValue:u.oneOfType([u.number,u.string]),tabindex:u.oneOfType([u.number,u.string]),disabled:u.looseBool,size:u.oneOf(rt("large","small","default")),formatter:u.func,parser:u.func,decimalSeparator:u.string,placeholder:u.string,name:u.string,id:u.string,precision:u.number,autofocus:u.looseBool,onPressEnter:{type:Function},onChange:Function},d8e=G({name:"AInputNumber",inheritAttrs:!1,props:c8e,setup:function(t){var n=H(null),r=function(){n.value.focus()},a=function(){n.value.blur()};return et(function(){Ne(function(){})}),{configProvider:ve("configProvider",St),inputNumberRef:n,focus:r,blur:a}},render:function(){var t,n=P(P({},Qe(this)),this.$attrs),r=n.prefixCls,a=n.size,o=n.class,i=u8e(n,["prefixCls","size","class"]),l=this.configProvider.getPrefixCls,s=l("input-number",r),c=Se((t={},V(t,"".concat(s,"-lg"),a==="large"),V(t,"".concat(s,"-sm"),a==="small"),t),o),d=g(gD,{class:"".concat(s,"-handler-up-inner")},null),f=g(Rs,{class:"".concat(s,"-handler-down-inner")},null),p=P(P({prefixCls:s,upHandler:d,downHandler:f},i),{class:c});return g(s8e,le(le({},p),{},{ref:"inputNumberRef"}),null)}}),f8e=kn(d8e),bS={prefixCls:u.string,hasSider:u.looseBool,tagName:u.string};function Mg(e){var t=e.suffixCls,n=e.tagName,r=e.name;return function(a){var o=G({name:r,props:bS,setup:function(l,s){var c=s.slots,d=Wt(t,l),f=d.prefixCls;return function(){var p,v=P(P({},l),{prefixCls:f.value,tagName:n});return g(a,v,{default:function(){return[(p=c.default)===null||p===void 0?void 0:p.call(c)]}})}}});return o}}var CS=G({props:bS,setup:function(t,n){var r=n.slots;return function(){var a;return g(t.tagName,{class:t.prefixCls},(a=r.default)===null||a===void 0?void 0:a.call(r))}}}),h8e=G({props:bS,setup:function(t,n){var r=n.slots,a=Wt("",t),o=a.direction,i=H([]),l={addSider:function(c){i.value=[].concat(Je(i.value),[c])},removeSider:function(c){i.value=i.value.filter(function(d){return d!==c})}};return ot(eD,l),function(){var s,c,d=t.prefixCls,f=t.hasSider,p=t.tagName,v=Se(d,(s={},V(s,"".concat(d,"-has-sider"),typeof f=="boolean"?f:i.value.length>0),V(s,"".concat(d,"-rtl"),o.value==="rtl"),s));return g(p,{class:v},(c=r.default)===null||c===void 0?void 0:c.call(r))}}}),Ig=Mg({suffixCls:"layout",tagName:"section",name:"ALayout"})(h8e),p8e=Mg({suffixCls:"layout-header",tagName:"header",name:"ALayoutHeader"})(CS),v8e=Mg({suffixCls:"layout-footer",tagName:"footer",name:"ALayoutFooter"})(CS),m8e=Mg({suffixCls:"layout-content",tagName:"main",name:"ALayoutContent"})(CS);Ig.Header=p8e;Ig.Footer=v8e;Ig.Content=m8e;var pr=Ig,g8e={icon:{tag:"svg",attrs:{viewBox:"0 0 1024 1024",focusable:"false"},children:[{tag:"path",attrs:{d:"M912 192H328c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h584c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 284H328c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h584c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zm0 284H328c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h584c4.4 0 8-3.6 8-8v-56c0-4.4-3.6-8-8-8zM104 228a56 56 0 10112 0 56 56 0 10-112 0zm0 284a56 56 0 10112 0 56 56 0 10-112 0zm0 284a56 56 0 10112 0 56 56 0 10-112 0z"}}]},name:"bars",theme:"outlined"},y8e=g8e;function ax(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){b8e(e,a,n[a])})}return e}function b8e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var wS=function(t,n){var r=ax({},t,n.attrs);return g(Et,ax({},r,{icon:y8e}),null)};wS.displayName="BarsOutlined";wS.inheritAttrs=!1;var C8e=wS,ox={xs:"479.98px",sm:"575.98px",md:"767.98px",lg:"991.98px",xl:"1199.98px",xxl:"1599.98px"},w8e={prefixCls:u.string,collapsible:u.looseBool,collapsed:u.looseBool,defaultCollapsed:u.looseBool,reverseArrow:u.looseBool,zeroWidthTriggerStyle:u.style,trigger:u.VNodeChild,width:u.oneOfType([u.number,u.string]),collapsedWidth:u.oneOfType([u.number,u.string]),breakpoint:u.oneOf(rt("xs","sm","md","lg","xl","xxl")),theme:u.oneOf(rt("light","dark")).def("dark"),onBreakpoint:Function,onCollapse:Function},S8e=function(){var e=0;return function(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:"";return e+=1,"".concat(t).concat(e)}}(),k8e=G({name:"ALayoutSider",inheritAttrs:!1,props:Rn(w8e,{collapsible:!1,defaultCollapsed:!1,reverseArrow:!1,width:200,collapsedWidth:80}),emits:["breakpoint","update:collapsed","collapse"],setup:function(t,n){var r=n.emit,a=n.attrs,o=n.slots,i=Wt("layout-sider",t),l=i.prefixCls,s=ve(eD,void 0),c=H(!!(t.collapsed!==void 0?t.collapsed:t.defaultCollapsed)),d=H(!1);ce(function(){return t.collapsed},function(){c.value=!!t.collapsed}),ot(QA,c);var f=function(S,w){t.collapsed===void 0&&(c.value=S),r("update:collapsed",S),r("collapse",S,w)},p=H(function(C){d.value=C.matches,r("breakpoint",C.matches),c.value!==C.matches&&f(C.matches,"responsive")}),v;function m(C){return p.value(C)}var y=S8e("ant-sider-");et(function(){if(typeof window!="undefined"){var C=window,S=C.matchMedia;if(S&&t.breakpoint&&t.breakpoint in ox){v=S("(max-width: ".concat(ox[t.breakpoint],")"));try{v.addEventListener("change",m)}catch{v.addListener(m)}m(v)}}s&&s.addSider(y)}),Lt(function(){try{v==null||v.removeEventListener("change",m)}catch{v==null||v.removeListener(m)}s&&s.removeSider(y)});var b=function(){f(!c.value,"clickTrigger")};return function(){var C,S,w=l.value,k=t.collapsedWidth,$=t.width,O=t.reverseArrow,T=t.zeroWidthTriggerStyle,_=t.trigger,I=t.collapsible,L=t.theme,j=c.value?k:$,F=WA(j)?"".concat(j,"px"):String(j),N=parseFloat(String(k||0))===0?g("span",{onClick:b,class:Se("".concat(w,"-zero-width-trigger"),"".concat(w,"-zero-width-trigger-").concat(O?"right":"left")),style:T},[_||g(C8e,null,null)]):null,D={expanded:g(O?wi:uc,null,null),collapsed:g(O?uc:wi,null,null)},z=c.value?"collapsed":"expanded",B=D[z],M=_!==null?N||g("div",{class:"".concat(w,"-trigger"),onClick:b,style:{width:F}},[_||B]):null,E=P(P({},a.style),{flex:"0 0 ".concat(F),maxWidth:F,minWidth:F,width:F}),K=Se(w,"".concat(w,"-").concat(L),(C={},V(C,"".concat(w,"-collapsed"),!!c.value),V(C,"".concat(w,"-has-trigger"),I&&_!==null&&!N),V(C,"".concat(w,"-below"),!!d.value),V(C,"".concat(w,"-zero-width"),parseFloat(F)===0),C),a.class);return g("aside",le(le({},a),{},{class:K,style:E,ref:H}),[g("div",{class:"".concat(w,"-children")},[(S=o.default)===null||S===void 0?void 0:S.call(o)]),I||d.value&&N?M:null])}}});pr.Sider=k8e;pr.install=function(e){return e.component(pr.name,pr),e.component(pr.Header.name,pr.Header),e.component(pr.Footer.name,pr.Footer),e.component(pr.Sider.name,pr.Sider),e.component(pr.Content.name,pr.Content),e};var $8e=pr.Header,O8e=pr.Footer,P8e=pr.Sider,T8e=pr.Content,x8e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},_8e=u.oneOf(rt("small","default","large")),I7=function(){return{prefixCls:u.string,spinning:u.looseBool,size:_8e,wrapperClassName:u.string,tip:u.string,delay:u.number,indicator:u.any}},Xp=null;function E8e(e,t){return!!e&&!!t&&!isNaN(Number(t))}function M8e(e){var t=e.indicator;Xp=typeof t=="function"?t:function(){return g(t,null,null)}}var ol=G({name:"ASpin",mixins:[nt],inheritAttrs:!1,props:Rn(I7(),{size:"default",spinning:!0,wrapperClassName:""}),setup:function(){return{originalUpdateSpinning:null,configProvider:ve("configProvider",St)}},data:function(){var t=this.spinning,n=this.delay,r=E8e(t,n);return{sSpinning:t&&!r}},created:function(){this.originalUpdateSpinning=this.updateSpinning,this.debouncifyUpdateSpinning(this.$props)},mounted:function(){this.updateSpinning()},updated:function(){var t=this;Ne(function(){t.debouncifyUpdateSpinning(),t.updateSpinning()})},beforeUnmount:function(){this.cancelExistingSpin()},methods:{debouncifyUpdateSpinning:function(t){var n=t||this.$props,r=n.delay;r&&(this.cancelExistingSpin(),this.updateSpinning=Yn(this.originalUpdateSpinning,r))},updateSpinning:function(){var t=this.spinning,n=this.sSpinning;n!==t&&this.setState({sSpinning:t})},cancelExistingSpin:function(){var t=this.updateSpinning;t&&t.cancel&&t.cancel()},renderIndicator:function(t){var n="".concat(t,"-dot"),r=We(this,"indicator");return r===null?null:(Array.isArray(r)&&(r=r.length===1?r[0]:r),rn(r)?hr(r,{class:n}):Xp&&rn(Xp())?hr(Xp(),{class:n}):g("span",{class:"".concat(n," ").concat(t,"-dot-spin")},[g("i",{class:"".concat(t,"-dot-item")},null),g("i",{class:"".concat(t,"-dot-item")},null),g("i",{class:"".concat(t,"-dot-item")},null),g("i",{class:"".concat(t,"-dot-item")},null)]))}},render:function(){var t,n=this.$props,r=n.size,a=n.prefixCls,o=n.tip,i=n.wrapperClassName,l=this.$attrs,s=l.class,c=l.style,d=x8e(l,["class","style"]),f=this.configProvider,p=f.getPrefixCls,v=f.direction,m=p("spin",a),y=this.sSpinning,b=(t={},V(t,m,!0),V(t,"".concat(m,"-sm"),r==="small"),V(t,"".concat(m,"-lg"),r==="large"),V(t,"".concat(m,"-spinning"),y),V(t,"".concat(m,"-show-text"),!!o),V(t,"".concat(m,"-rtl"),v==="rtl"),V(t,s,!!s),t),C=g("div",le(le({},d),{},{style:c,class:b}),[this.renderIndicator(m),o?g("div",{class:"".concat(m,"-text")},[o]):null]),S=ht(this);if(S&&S.length){var w,k=(w={},V(w,"".concat(m,"-container"),!0),V(w,"".concat(m,"-blur"),y),w);return g("div",{class:["".concat(m,"-nested-loading"),i]},[y&&g("div",{key:"loading"},[C]),g("div",{class:k,key:"container"},[S])])}return C}});ol.setDefaultIndicator=M8e;ol.install=function(e){return e.component(ol.name,ol),e};var I8e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M272.9 512l265.4-339.1c4.1-5.2.4-12.9-6.3-12.9h-77.3c-4.9 0-9.6 2.3-12.6 6.1L186.8 492.3a31.99 31.99 0 000 39.5l255.3 326.1c3 3.9 7.7 6.1 12.6 6.1H532c6.7 0 10.4-7.7 6.3-12.9L272.9 512zm304 0l265.4-339.1c4.1-5.2.4-12.9-6.3-12.9h-77.3c-4.9 0-9.6 2.3-12.6 6.1L490.8 492.3a31.99 31.99 0 000 39.5l255.3 326.1c3 3.9 7.7 6.1 12.6 6.1H836c6.7 0 10.4-7.7 6.3-12.9L576.9 512z"}}]},name:"double-left",theme:"outlined"},N8e=I8e;function ix(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){A8e(e,a,n[a])})}return e}function A8e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var SS=function(t,n){var r=ix({},t,n.attrs);return g(Et,ix({},r,{icon:N8e}),null)};SS.displayName="DoubleLeftOutlined";SS.inheritAttrs=!1;var D8e=SS,R8e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M533.2 492.3L277.9 166.1c-3-3.9-7.7-6.1-12.6-6.1H188c-6.7 0-10.4 7.7-6.3 12.9L447.1 512 181.7 851.1A7.98 7.98 0 00188 864h77.3c4.9 0 9.6-2.3 12.6-6.1l255.3-326.1c9.1-11.7 9.1-27.9 0-39.5zm304 0L581.9 166.1c-3-3.9-7.7-6.1-12.6-6.1H492c-6.7 0-10.4 7.7-6.3 12.9L751.1 512 485.7 851.1A7.98 7.98 0 00492 864h77.3c4.9 0 9.6-2.3 12.6-6.1l255.3-326.1c9.1-11.7 9.1-27.9 0-39.5z"}}]},name:"double-right",theme:"outlined"},L8e=R8e;function lx(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){F8e(e,a,n[a])})}return e}function F8e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var kS=function(t,n){var r=lx({},t,n.attrs);return g(Et,lx({},r,{icon:L8e}),null)};kS.displayName="DoubleRightOutlined";kS.inheritAttrs=!1;var B8e=kS,V8e=G({inheritAttrs:!1,props:Sg(),Option:dl.Option,render:function(){var t=this,n=Qe(this),r=P(P(P({},n),{size:"small"}),this.$attrs);return g(dl,r,{default:function(){return[ht(t)]}})}}),xl={name:"Pager",mixins:[nt],inheritAttrs:!1,props:{rootPrefixCls:u.string,page:u.number,active:u.looseBool,last:u.looseBool,locale:u.object,showTitle:u.looseBool,itemRender:{type:Function,default:function(){}}},methods:{handleClick:function(){this.__emit("click",this.page)},handleKeyPress:function(t){this.__emit("keypress",t,this.handleClick,this.page)}},render:function(){var t,n=this.$attrs,r=n.class,a=n.style,o=this.$props,i="".concat(o.rootPrefixCls,"-item"),l=Se(i,"".concat(i,"-").concat(o.page),(t={},V(t,"".concat(i,"-active"),o.active),V(t,"".concat(i,"-disabled"),!o.page),t),r);return g("li",{onClick:this.handleClick,onKeypress:this.handleKeyPress,title:this.showTitle?this.page:null,tabindex:"0",class:l,style:a},[this.itemRender({page:this.page,type:"page",originalElement:g("a",null,[this.page])})])}},Dl={ZERO:48,NINE:57,NUMPAD_ZERO:96,NUMPAD_NINE:105,BACKSPACE:8,DELETE:46,ENTER:13,ARROW_UP:38,ARROW_DOWN:40},z8e={mixins:[nt],props:{disabled:u.looseBool,changeSize:u.func,quickGo:u.func,selectComponentClass:u.any,current:u.number,pageSizeOptions:u.array.def(["10","20","30","40"]),pageSize:u.number,buildOptionText:u.func,locale:u.object,rootPrefixCls:u.string,selectPrefixCls:u.string,goButton:u.any},data:function(){return{goInputText:""}},methods:{getValidValue:function(){var t=this.goInputText,n=this.current;return!t||isNaN(t)?n:Number(t)},defaultBuildOptionText:function(t){return"".concat(t.value," ").concat(this.locale.items_per_page)},handleChange:function(t){var n=t.target,r=n.value,a=n.composing;t.isComposing||a||this.goInputText===r||this.setState({goInputText:r})},handleBlur:function(t){var n=this.$props,r=n.goButton,a=n.quickGo,o=n.rootPrefixCls;r||t.relatedTarget&&(t.relatedTarget.className.indexOf("".concat(o,"-prev"))>=0||t.relatedTarget.className.indexOf("".concat(o,"-next"))>=0)||a(this.getValidValue())},go:function(t){var n=this.goInputText;n!==""&&(t.keyCode===Dl.ENTER||t.type==="click")&&(this.quickGo(this.getValidValue()),this.setState({goInputText:""}))}},render:function(){var t=this,n=this.rootPrefixCls,r=this.locale,a=this.changeSize,o=this.quickGo,i=this.goButton,l=this.selectComponentClass,s=this.defaultBuildOptionText,c=this.selectPrefixCls,d=this.pageSize,f=this.pageSizeOptions,p=this.goInputText,v=this.disabled,m="".concat(n,"-options"),y=null,b=null,C=null;if(!a&&!o)return null;if(a&&l){var S=this.buildOptionText||s,w=f.map(function(k,$){return g(l.Option,{key:$,value:k},{default:function(){return[S({value:k})]}})});y=g(l,{disabled:v,prefixCls:c,showSearch:!1,class:"".concat(m,"-size-changer"),optionLabelProp:"children",value:(d||f[0]).toString(),onChange:function($){return t.changeSize(Number($))},getPopupContainer:function($){return $.parentNode}},{default:function(){return[w]}})}return o&&(i&&(C=typeof i=="boolean"?g("button",{type:"button",onClick:this.go,onKeyup:this.go,disabled:v},[r.jump_to_confirm]):g("span",{onClick:this.go,onKeyup:this.go},[i])),b=g("div",{class:"".concat(m,"-quick-jumper")},[r.jump_to,at(g("input",{disabled:v,type:"text",value:p,onInput:this.handleChange,onChange:this.handleChange,onKeyup:this.go,onBlur:this.handleBlur},null),[[Mi]]),r.page,C])),g("li",{class:"".concat(m)},[y,b])}},H8e={items_per_page:"\u6761/\u9875",jump_to:"\u8DF3\u81F3",jump_to_confirm:"\u786E\u5B9A",page:"\u9875",prev_page:"\u4E0A\u4E00\u9875",next_page:"\u4E0B\u4E00\u9875",prev_5:"\u5411\u524D 5 \u9875",next_5:"\u5411\u540E 5 \u9875",prev_3:"\u5411\u524D 3 \u9875",next_3:"\u5411\u540E 3 \u9875"},j8e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function K8e(){}function W8e(e){return typeof e=="number"&&isFinite(e)&&Math.floor(e)===e}function U8e(e){var t=e.originalElement;return t}function Jo(e,t,n){var r=e;return typeof r=="undefined"&&(r=t.statePageSize),Math.floor((n.total-1)/r)+1}var Y8e=G({name:"Pagination",mixins:[nt],inheritAttrs:!1,props:{disabled:u.looseBool,prefixCls:u.string.def("rc-pagination"),selectPrefixCls:u.string.def("rc-select"),current:u.number,defaultCurrent:u.number.def(1),total:u.number.def(0),pageSize:u.number,defaultPageSize:u.number.def(10),hideOnSinglePage:u.looseBool.def(!1),showSizeChanger:u.looseBool.def(!1),showLessItems:u.looseBool.def(!1),selectComponentClass:u.any,showPrevNextJumpers:u.looseBool.def(!0),showQuickJumper:u.oneOfType([u.looseBool,u.object]).def(!1),showTitle:u.looseBool.def(!0),pageSizeOptions:u.arrayOf(u.string),buildOptionText:u.func,showTotal:u.func,simple:u.looseBool,locale:u.object.def(H8e),itemRender:u.func,prevIcon:u.any,nextIcon:u.any,jumpPrevIcon:u.any,jumpNextIcon:u.any},data:function(){var t=Qe(this),n=this.onChange!==K8e,r="current"in t;r&&!n&&console.warn("Warning: You provided a `current` prop to a Pagination component without an `onChange` handler. This will render a read-only component.");var a=this.defaultCurrent;"current"in t&&(a=this.current);var o=this.defaultPageSize;return"pageSize"in t&&(o=this.pageSize),a=Math.min(a,Jo(o,void 0,t)),{stateCurrent:a,stateCurrentInputValue:a,statePageSize:o}},watch:{current:function(t){this.setState({stateCurrent:t,stateCurrentInputValue:t})},pageSize:function(t){var n={},r=this.stateCurrent,a=Jo(t,this.$data,this.$props);r=r>a?a:r,vt(this,"current")||(n.stateCurrent=r,n.stateCurrentInputValue=r),n.statePageSize=t,this.setState(n)},stateCurrent:function(t,n){var r=this;this.$nextTick(function(){if(r.$refs.paginationNode){var a=r.$refs.paginationNode.querySelector(".".concat(r.prefixCls,"-item-").concat(n));a&&document.activeElement===a&&a.blur()}})},total:function(){var t={},n=Jo(this.pageSize,this.$data,this.$props);if(vt(this,"current")){var r=Math.min(this.current,n);t.stateCurrent=r,t.stateCurrentInputValue=r}else{var a=this.stateCurrent;a===0&&n>0?a=1:a=Math.min(this.stateCurrent,n),t.stateCurrent=a}this.setState(t)}},methods:{getJumpPrevPage:function(){return Math.max(1,this.stateCurrent-(this.showLessItems?3:5))},getJumpNextPage:function(){return Math.min(Jo(void 0,this.$data,this.$props),this.stateCurrent+(this.showLessItems?3:5))},getItemIcon:function(t){var n=this.$props.prefixCls,r=We(this,t,this.$props)||g("a",{class:"".concat(n,"-item-link")},null);return r},getValidValue:function(t){var n=t.target.value,r=Jo(void 0,this.$data,this.$props),a=this.$data.stateCurrentInputValue,o;return n===""?o=n:isNaN(Number(n))?o=a:n>=r?o=r:o=Number(n),o},isValid:function(t){return W8e(t)&&t!==this.stateCurrent},shouldDisplayQuickJumper:function(){var t=this.$props,n=t.showQuickJumper,r=t.pageSize,a=t.total;return a<=r?!1:n},handleKeyDown:function(t){(t.keyCode===Dl.ARROW_UP||t.keyCode===Dl.ARROW_DOWN)&&t.preventDefault()},handleKeyUp:function(t){if(!(t.isComposing||t.target.composing)){var n=this.getValidValue(t),r=this.stateCurrentInputValue;n!==r&&this.setState({stateCurrentInputValue:n}),t.keyCode===Dl.ENTER?this.handleChange(n):t.keyCode===Dl.ARROW_UP?this.handleChange(n-1):t.keyCode===Dl.ARROW_DOWN&&this.handleChange(n+1)}},changePageSize:function(t){var n=this.stateCurrent,r=n,a=Jo(t,this.$data,this.$props);n=n>a?a:n,a===0&&(n=this.stateCurrent),typeof t=="number"&&(vt(this,"pageSize")||this.setState({statePageSize:t}),vt(this,"current")||this.setState({stateCurrent:n,stateCurrentInputValue:n})),this.__emit("update:pageSize",t),n!==r&&this.__emit("update:current",n),this.__emit("showSizeChange",n,t)},handleChange:function(t){var n=this.$props.disabled,r=t;if(this.isValid(r)&&!n){var a=Jo(void 0,this.$data,this.$props);return r>a?r=a:r<1&&(r=1),vt(this,"current")||this.setState({stateCurrent:r,stateCurrentInputValue:r}),this.__emit("update:current",r),this.__emit("change",r,this.statePageSize),r}return this.stateCurrent},prev:function(){this.hasPrev()&&this.handleChange(this.stateCurrent-1)},next:function(){this.hasNext()&&this.handleChange(this.stateCurrent+1)},jumpPrev:function(){this.handleChange(this.getJumpPrevPage())},jumpNext:function(){this.handleChange(this.getJumpNextPage())},hasPrev:function(){return this.stateCurrent>1},hasNext:function(){return this.stateCurrent<Jo(void 0,this.$data,this.$props)},runIfEnter:function(t,n){if(t.key==="Enter"||t.charCode===13){for(var r=arguments.length,a=new Array(r>2?r-2:0),o=2;o<r;o++)a[o-2]=arguments[o];n.apply(void 0,a)}},runIfEnterPrev:function(t){this.runIfEnter(t,this.prev)},runIfEnterNext:function(t){this.runIfEnter(t,this.next)},runIfEnterJumpPrev:function(t){this.runIfEnter(t,this.jumpPrev)},runIfEnterJumpNext:function(t){this.runIfEnter(t,this.jumpNext)},handleGoTO:function(t){(t.keyCode===Dl.ENTER||t.type==="click")&&this.handleChange(this.stateCurrentInputValue)}},render:function(){var t,n=this.$props,r=n.prefixCls,a=n.disabled,o=If(this.$attrs).extraAttrs,i=o.class,l=j8e(o,["class"]);if(this.hideOnSinglePage===!0&&this.total<=this.statePageSize)return null;var s=this.itemRender||U8e,c=this.$props,d=this.locale,f=Jo(void 0,this.$data,this.$props),p=[],v=null,m=null,y=null,b=null,C=null,S=this.showQuickJumper&&this.showQuickJumper.goButton,w=this.showLessItems?1:2,k=this.stateCurrent,$=this.statePageSize,O=k-1>0?k-1:0,T=k+1<f?k+1:f;if(this.simple){S&&(typeof S=="boolean"?C=g("button",{type:"button",onClick:this.handleGoTO,onKeyup:this.handleGoTO},[d.jump_to_confirm]):C=g("span",{onClick:this.handleGoTO,onKeyup:this.handleGoTO},[S]),C=g("li",{title:this.showTitle?"".concat(d.jump_to).concat(this.stateCurrent,"/").concat(f):null,class:"".concat(r,"-simple-pager")},[C]));var _=this.hasPrev(),I=this.hasNext();return g("ul",le({class:Se("".concat(r," ").concat(r,"-simple"),i)},l),[g("li",{title:this.showTitle?d.prev_page:null,onClick:this.prev,tabindex:_?0:null,onKeypress:this.runIfEnterPrev,class:"".concat(_?"":"".concat(r,"-disabled")," ").concat(r,"-prev"),"aria-disabled":!this.hasPrev()},[s({page:O,type:"prev",originalElement:this.getItemIcon("prevIcon")})]),g("li",{title:this.showTitle?"".concat(k,"/").concat(f):null,class:"".concat(r,"-simple-pager")},[at(g("input",{type:"text",value:this.stateCurrentInputValue,onKeydown:this.handleKeyDown,onKeyup:this.handleKeyUp,onInput:this.handleKeyUp,onChange:this.handleKeyUp,size:"3"},null),[[Mi]]),g("span",{class:"".concat(r,"-slash")},[yt("\uFF0F")]),f]),g("li",{title:this.showTitle?d.next_page:null,onClick:this.next,tabindex:this.hasNext?0:null,onKeypress:this.runIfEnterNext,class:"".concat(I?"":"".concat(r,"-disabled")," ").concat(r,"-next"),"aria-disabled":!this.hasNext()},[s({page:T,type:"next",originalElement:this.getItemIcon("nextIcon")})]),C])}if(f<=5+w*2){var L={locale:d,rootPrefixCls:r,showTitle:c.showTitle,itemRender:s,onClick:this.handleChange,onKeypress:this.runIfEnter};f||p.push(g(xl,le(le({},L),{},{key:"noPager",page:f,class:"".concat(r,"-disabled")}),null));for(var j=1;j<=f;j++){var F=k===j;p.push(g(xl,le(le({},L),{},{key:j,page:j,active:F}),null))}}else{var N=this.showLessItems?d.prev_3:d.prev_5,D=this.showLessItems?d.next_3:d.next_5;if(this.showPrevNextJumpers){var z="".concat(r,"-jump-prev");c.jumpPrevIcon&&(z+=" ".concat(r,"-jump-prev-custom-icon")),v=g("li",{title:this.showTitle?N:null,key:"prev",onClick:this.jumpPrev,tabindex:"0",onKeypress:this.runIfEnterJumpPrev,class:z},[s({page:this.getJumpPrevPage(),type:"jump-prev",originalElement:this.getItemIcon("jumpPrevIcon")})]);var B="".concat(r,"-jump-next");c.jumpNextIcon&&(B+=" ".concat(r,"-jump-next-custom-icon")),m=g("li",{title:this.showTitle?D:null,key:"next",tabindex:"0",onClick:this.jumpNext,onKeypress:this.runIfEnterJumpNext,class:B},[s({page:this.getJumpNextPage(),type:"jump-next",originalElement:this.getItemIcon("jumpNextIcon")})])}b=g(xl,{locale:d,last:!0,rootPrefixCls:r,onClick:this.handleChange,onKeypress:this.runIfEnter,key:f,page:f,active:!1,showTitle:this.showTitle,itemRender:s},null),y=g(xl,{locale:d,rootPrefixCls:r,onClick:this.handleChange,onKeypress:this.runIfEnter,key:1,page:1,active:!1,showTitle:this.showTitle,itemRender:s},null);var M=Math.max(1,k-w),E=Math.min(k+w,f);k-1<=w&&(E=1+w*2),f-k<=w&&(M=f-w*2);for(var K=M;K<=E;K++){var W=k===K;p.push(g(xl,{locale:d,rootPrefixCls:r,onClick:this.handleChange,onKeypress:this.runIfEnter,key:K,page:K,active:W,showTitle:this.showTitle,itemRender:s},null))}k-1>=w*2&&k!==1+2&&(p[0]=g(xl,{locale:d,rootPrefixCls:r,onClick:this.handleChange,onKeypress:this.runIfEnter,key:M,page:M,class:"".concat(r,"-item-after-jump-prev"),active:!1,showTitle:this.showTitle,itemRender:s},null),p.unshift(v)),f-k>=w*2&&k!==f-2&&(p[p.length-1]=g(xl,{locale:d,rootPrefixCls:r,onClick:this.handleChange,onKeypress:this.runIfEnter,key:E,page:E,class:"".concat(r,"-item-before-jump-next"),active:!1,showTitle:this.showTitle,itemRender:s},null),p.push(m)),M!==1&&p.unshift(y),E!==f&&p.push(b)}var Y=null;this.showTotal&&(Y=g("li",{class:"".concat(r,"-total-text")},[this.showTotal(this.total,[this.total===0?0:(k-1)*$+1,k*$>this.total?this.total:k*$])]));var q=!this.hasPrev()||!f,J=!this.hasNext()||!f,ne=this.buildOptionText||this.$slots.buildOptionText;return g("ul",le(le({unselectable:"unselectable",ref:"paginationNode"},l),{},{class:Se((t={},V(t,"".concat(r),!0),V(t,"".concat(r,"-disabled"),a),t),i)}),[Y,g("li",{title:this.showTitle?d.prev_page:null,onClick:this.prev,tabindex:q?null:0,onKeypress:this.runIfEnterPrev,class:"".concat(q?"".concat(r,"-disabled"):""," ").concat(r,"-prev"),"aria-disabled":q},[s({page:O,type:"prev",originalElement:this.getItemIcon("prevIcon")})]),p,g("li",{title:this.showTitle?d.next_page:null,onClick:this.next,tabindex:J?null:0,onKeypress:this.runIfEnterNext,class:"".concat(J?"".concat(r,"-disabled"):""," ").concat(r,"-next"),"aria-disabled":J},[s({page:T,type:"next",originalElement:this.getItemIcon("nextIcon")})]),g(z8e,{disabled:a,locale:d,rootPrefixCls:r,selectComponentClass:this.selectComponentClass,selectPrefixCls:this.selectPrefixCls,changeSize:this.showSizeChanger?this.changePageSize:null,current:k,pageSize:$,pageSizeOptions:this.pageSizeOptions,buildOptionText:ne||null,quickGo:this.shouldDisplayQuickJumper()?this.handleChange:null,goButton:S},null)])}}),q8e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},$S=function(){return{total:u.number,defaultCurrent:u.number,disabled:u.looseBool,current:u.number,defaultPageSize:u.number,pageSize:u.number,hideOnSinglePage:u.looseBool,showSizeChanger:u.looseBool,pageSizeOptions:u.arrayOf(u.oneOfType([u.number,u.string])),buildOptionText:u.func,showSizeChange:u.func,showQuickJumper:an(u.oneOfType([u.looseBool,u.object])),showTotal:u.any,size:u.string,simple:u.looseBool,locale:u.object,prefixCls:u.string,selectPrefixCls:u.string,itemRender:u.func,role:u.string,showLessItems:u.looseBool,onChange:u.func,onShowSizeChange:u.func,"onUpdate:current":u.func,"onUpdate:pageSize":u.func}},N7=function(){return P(P({},$S()),{position:u.oneOf(rt("top","bottom","both"))})},G8e=G({name:"APagination",inheritAttrs:!1,props:P({},$S()),emits:["change","showSizeChange","update:current","update:pageSize"],setup:function(){return{configProvider:ve("configProvider",St)}},methods:{getIconsProps:function(t){var n=g("a",{class:"".concat(t,"-item-link")},[g(uc,null,null)]),r=g("a",{class:"".concat(t,"-item-link")},[g(wi,null,null)]),a=g("a",{class:"".concat(t,"-item-link")},[g("div",{class:"".concat(t,"-item-container")},[g(D8e,{class:"".concat(t,"-item-link-icon")},null),g("span",{class:"".concat(t,"-item-ellipsis")},[yt("\u2022\u2022\u2022")])])]),o=g("a",{class:"".concat(t,"-item-link")},[g("div",{class:"".concat(t,"-item-container")},[g(B8e,{class:"".concat(t,"-item-link-icon")},null),g("span",{class:"".concat(t,"-item-ellipsis")},[yt("\u2022\u2022\u2022")])])]);return{prevIcon:n,nextIcon:r,jumpPrevIcon:a,jumpNextIcon:o}},renderPagination:function(t){var n=Qe(this),r=n.prefixCls,a=n.selectPrefixCls,o=n.buildOptionText,i=n.size,l=n.locale,s=q8e(n,["prefixCls","selectPrefixCls","buildOptionText","size","locale"]),c=this.configProvider.getPrefixCls,d=c("pagination",r),f=c("select",a),p=i==="small",v=P(P(P(P(P({prefixCls:d,selectPrefixCls:f},s),this.getIconsProps(d)),{selectComponentClass:p?V8e:dl,locale:P(P({},t),l),buildOptionText:o||this.$slots.buildOptionText}),this.$attrs),{class:Se({mini:p},this.$attrs.class),itemRender:this.itemRender||this.$slots.itemRender});return g(Y8e,v,null)}},render:function(){return g(Kr,{componentName:"Pagination",defaultLocale:ZI,children:this.renderPagination},null)}}),OS=kn(G8e),X8e={avatar:u.any,description:u.any,prefixCls:u.string,title:u.any},A7=G({name:"AListItemMeta",props:X8e,displayName:"AListItemMeta",__ANT_LIST_ITEM_META:!0,slots:["avatar","description","title"],setup:function(t,n){var r=n.slots,a=Wt("list",t),o=a.prefixCls;return function(){var i,l,s,c,d,f,p="".concat(o.value,"-item-meta"),v=(i=t.title)!==null&&i!==void 0?i:(l=r.title)===null||l===void 0?void 0:l.call(r),m=(s=t.description)!==null&&s!==void 0?s:(c=r.description)===null||c===void 0?void 0:c.call(r),y=(d=t.avatar)!==null&&d!==void 0?d:(f=r.avatar)===null||f===void 0?void 0:f.call(r),b=g("div",{class:"".concat(o.value,"-item-meta-content")},[v&&g("h4",{class:"".concat(o.value,"-item-meta-title")},[v]),m&&g("div",{class:"".concat(o.value,"-item-meta-description")},[m])]);return g("div",{class:p},[y&&g("div",{class:"".concat(o.value,"-item-meta-avatar")},[y]),(v||m)&&b])}}}),D7=Symbol("ListContextKey"),Z8e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},J8e={prefixCls:u.string,extra:u.any,actions:u.array,grid:u.any,colStyle:u.style},R7=G({name:"AListItem",inheritAttrs:!1,Meta:A7,props:J8e,slots:["actions","extra"],setup:function(t,n){var r=n.slots,a=n.attrs,o=ve(D7,{grid:H(),itemLayout:H()}),i=o.itemLayout,l=o.grid,s=Wt("list",t),c=s.prefixCls,d=function(){var v,m=((v=r.default)===null||v===void 0?void 0:v.call(r))||[],y;return m.forEach(function(b){s0e(b)&&!As(b)&&(y=!0)}),y&&m.length>1},f=function(){var v,m,y=(v=t.extra)!==null&&v!==void 0?v:(m=r.extra)===null||m===void 0?void 0:m.call(r);return i.value==="vertical"?!!y:!d()};return function(){var p,v,m,y,b,C=a.class,S=Z8e(a,["class"]),w=c.value,k=(p=t.extra)!==null&&p!==void 0?p:(v=r.extra)===null||v===void 0?void 0:v.call(r),$=(m=r.default)===null||m===void 0?void 0:m.call(r),O=(y=t.actions)!==null&&y!==void 0?y:Un((b=r.actions)===null||b===void 0?void 0:b.call(r));O=O&&!Array.isArray(O)?[O]:O;var T=O&&O.length>0&&g("ul",{class:"".concat(w,"-item-action"),key:"actions"},[O.map(function(L,j){return g("li",{key:"".concat(w,"-item-action-").concat(j)},[L,j!==O.length-1&&g("em",{class:"".concat(w,"-item-action-split")},null)])})]),_=l.value?"div":"li",I=g(_,le(le({},S),{},{class:Se("".concat(w,"-item"),V({},"".concat(w,"-item-no-flex"),!f()),C)}),{default:function(){return[i.value==="vertical"&&k?[g("div",{class:"".concat(w,"-item-main"),key:"content"},[$,T]),g("div",{class:"".concat(w,"-item-extra"),key:"extra"},[k])]:[$,T,Ot(k,{key:"extra"})]]}});return l.value?g(Og,{flex:1,style:t.colStyle},{default:function(){return[I]}}):I}}}),Q8e={gutter:u.oneOfType([u.number,u.arrayOf(Number)]),column:u.number,xs:u.number,sm:u.number,md:u.number,lg:u.number,xl:u.number,xxl:u.number},eEe=rt("small","default","large"),tEe={bordered:u.looseBool,dataSource:u.array,extra:u.any,grid:u.shape(Q8e).loose,itemLayout:u.oneOf(rt("horizontal","vertical")),loading:an(u.oneOfType([u.looseBool,u.object])),loadMore:u.any,pagination:an(u.oneOfType([u.shape(N7()).loose,u.looseBool])),prefixCls:u.string,rowKey:u.any,renderItem:u.any,size:u.oneOf(eEe),split:u.looseBool,header:u.any,footer:u.any,locale:{type:Object}},ji=G({name:"AList",Item:R7,props:Rn(tEe,{dataSource:[],bordered:!1,split:!0,loading:!1,pagination:!1}),slots:["extra","loadMore","renderItem","header","footer"],setup:function(t,n){var r=n.slots,a,o;ot(D7,{grid:yn(t,"grid"),itemLayout:yn(t,"itemLayout")});var i={current:1,total:0},l=Wt("list",t),s=l.prefixCls,c=l.direction,d=l.renderEmpty,f=x(function(){return t.pagination&&kt(t.pagination)==="object"?t.pagination:{}}),p=H((a=f.value.defaultCurrent)!==null&&a!==void 0?a:1),v=H((o=f.value.defaultPageSize)!==null&&o!==void 0?o:10);ce(f,function(){"current"in f.value&&(p.value=f.value.current),"pageSize"in f.value&&(v.value=f.value.pageSize)});var m=function(N){return function(D,z){p.value=D,v.value=z,f.value[N]&&f.value[N](D,z)}},y=m("onChange"),b=m("onShowSizeChange"),C=function(N){var D;return g("div",{class:"".concat(s.value,"-empty-text")},[((D=t.locale)===null||D===void 0?void 0:D.emptyText)||N("List")])},S=x(function(){return typeof t.loading=="boolean"?{spinning:t.loading}:t.loading}),w=x(function(){return S.value&&S.value.spinning}),k=x(function(){var F="";switch(t.size){case"large":F="lg";break;case"small":F="sm";break}return F}),$=x(function(){var F;return F={},V(F,"".concat(s.value),!0),V(F,"".concat(s.value,"-vertical"),t.itemLayout==="vertical"),V(F,"".concat(s.value,"-").concat(k.value),k.value),V(F,"".concat(s.value,"-split"),t.split),V(F,"".concat(s.value,"-bordered"),t.bordered),V(F,"".concat(s.value,"-loading"),w.value),V(F,"".concat(s.value,"-grid"),!!t.grid),V(F,"".concat(s.value,"-rtl"),c.value==="rtl"),F}),O=x(function(){var F=P(P(P({},i),{total:t.dataSource.length,current:p.value,pageSize:v.value}),t.pagination||{}),N=Math.ceil(F.total/F.pageSize);return F.current>N&&(F.current=N),F}),T=x(function(){var F=Je(t.dataSource);return t.pagination&&t.dataSource.length>(O.value.current-1)*O.value.pageSize&&(F=Je(t.dataSource).splice((O.value.current-1)*O.value.pageSize,O.value.pageSize)),F}),_=F2(),I=x(function(){for(var F=0;F<ws.length;F+=1){var N=ws[F];if(_.value[N])return N}}),L=x(function(){if(!!t.grid){var F=I.value&&t.grid[I.value]?t.grid[I.value]:t.grid.column;if(F)return{width:"".concat(100/F,"%"),maxWidth:"".concat(100/F,"%")}}}),j=function(N,D,z){var B,M=(B=t.renderItem)!==null&&B!==void 0?B:r.renderItem;if(!M)return null;var E;return typeof t.rowKey=="function"?E=t.rowKey(D):typeof t.rowKey=="string"?E=D[t.rowKey]:E=D.key,E||(E="list-item-".concat(z)),N[z]=E,M({item:D,index:z})};return function(){var F,N,D,z,B,M,E,K=(F=t.loadMore)!==null&&F!==void 0?F:(N=r.loadMore)===null||N===void 0?void 0:N.call(r),W=(D=t.footer)!==null&&D!==void 0?D:(z=r.footer)===null||z===void 0?void 0:z.call(r),Y=(B=t.header)!==null&&B!==void 0?B:(M=r.header)===null||M===void 0?void 0:M.call(r),q=Un((E=r.default)===null||E===void 0?void 0:E.call(r)),J=[],ne=!!(K||t.pagination||W),oe=P(P({},$.value),V({},"".concat(s.value,"-something-after-last-item"),ne)),Q=t.pagination?g("div",{class:"".concat(s.value,"-pagination")},[g(OS,le(le({},O.value),{},{onChange:y,onShowSizeChange:b}),null)]):null,ae=w.value&&g("div",{style:{minHeight:"53px"}},null);if(T.value.length>0){var de=T.value.map(function(Pe,Be){return j(J,Pe,Be)}),be=de.map(function(Pe,Be){return g("div",{key:J[Be],style:L.value},[Pe])});ae=t.grid?g(Q2,{gutter:t.grid.gutter},{default:function(){return[be]}}):g("ul",{class:"".concat(s.value,"-items")},[de])}else!q.length&&!w.value&&(ae=C(d.value));var Ee=O.value.position||"bottom";return g("div",{class:oe},[(Ee==="top"||Ee==="both")&&Q,Y&&g("div",{class:"".concat(s.value,"-header")},[Y]),g(ol,S.value,{default:function(){return[ae,q]}}),W&&g("div",{class:"".concat(s.value,"-footer")},[W]),K||(Ee==="bottom"||Ee==="both")&&Q])}}});ji.install=function(e){return e.component(ji.name,ji),e.component(ji.Item.name,ji.Item),e.component(ji.Item.Meta.name,ji.Item.Meta),e};var nEe=ji,rEe={mixins:[nt],props:{duration:u.number.def(1.5),closable:u.looseBool,prefixCls:u.string,update:u.looseBool,closeIcon:u.any,onClose:u.func},watch:{duration:function(){this.restartCloseTimer()}},mounted:function(){this.startCloseTimer()},updated:function(){this.update&&this.restartCloseTimer()},beforeUnmount:function(){this.clearCloseTimer(),this.willDestroy=!0},methods:{close:function(t){t&&t.stopPropagation(),this.clearCloseTimer(),this.__emit("close")},startCloseTimer:function(){var t=this;this.clearCloseTimer(),!this.willDestroy&&this.duration&&(this.closeTimer=setTimeout(function(){t.close()},this.duration*1e3))},clearCloseTimer:function(){this.closeTimer&&(clearTimeout(this.closeTimer),this.closeTimer=null)},restartCloseTimer:function(){this.clearCloseTimer(),this.startCloseTimer()}},render:function(){var t,n=this.prefixCls,r=this.closable,a=this.clearCloseTimer,o=this.startCloseTimer,i=this.close,l=this.$attrs,s="".concat(n,"-notice"),c=(t={},V(t,"".concat(s),1),V(t,"".concat(s,"-closable"),r),t),d=We(this,"closeIcon");return g("div",{class:c,style:l.style||{right:"50%"},onMouseenter:a,onMouseleave:o},[g("div",{class:"".concat(s,"-content")},[ht(this)]),r?g("a",{tabindex:"0",onClick:i,class:"".concat(s,"-close")},[d||g("span",{class:"".concat(s,"-close-x")},null)]):null])}},aEe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function oEe(){}var iEe=0,lEe=Date.now();function sEe(){return"rcNotification_".concat(lEe,"_").concat(iEe++)}var T1=G({mixins:[nt],props:{prefixCls:u.string.def("rc-notification"),transitionName:u.string,animation:u.oneOfType([u.string,u.object]).def("fade"),maxCount:u.number,closeIcon:u.any},data:function(){return{notices:[]}},methods:{getTransitionName:function(){var t=this.$props,n=t.transitionName;return!n&&t.animation&&(n="".concat(t.prefixCls,"-").concat(t.animation)),n},add:function(t){var n=t.key=t.key||sEe(),r=this.$props.maxCount;this.setState(function(a){var o=a.notices,i=o.map(function(s){return s.key}).indexOf(n),l=o.concat();return i!==-1?l.splice(i,1,t):(r&&o.length>=r&&(t.updateKey=l[0].updateKey||l[0].key,l.shift()),l.push(t)),{notices:l}})},remove:function(t){this.setState(function(n){return{notices:n.notices.filter(function(r){return r.key!==t})}})}},render:function(){var t=this,n=this.prefixCls,r=this.notices,a=this.remove,o=this.getTransitionName,i=this.$attrs,l=d2(o()),s=r.map(function(d,f){var p=Boolean(f===r.length-1&&d.updateKey),v=d.updateKey?d.updateKey:d.key,m=d.content,y=d.duration,b=d.closable,C=d.onClose,S=d.style,w=d.class,k=id(a.bind(t,d.key),C),$={prefixCls:n,duration:y,closable:b,update:p,closeIcon:We(t,"closeIcon"),onClose:k,onClick:d.onClick||oEe,style:S,class:w,key:v};return g(rEe,$,{default:function(){return[typeof m=="function"?m():m]}})}),c=V({},n,1);return g("div",{class:c,style:i.style||{top:"65px",left:"50%"}},[g(f2,le({tag:"span"},l),{default:function(){return[s]}})])}});T1.newInstance=function(t,n){var r=t||{},a=r.getContainer,o=r.style,i=r.class,l=aEe(r,["getContainer","style","class"]),s=document.createElement("div");if(a){var c=a();c.appendChild(s)}else document.body.appendChild(s);var d=$m({mounted:function(){var p=this;this.$nextTick(function(){n({notice:function(m){p.$refs.notification.add(m)},removeNotice:function(m){p.$refs.notification.remove(m)},component:p,destroy:function(){d.unmount(s),s.parentNode&&s.parentNode.removeChild(s)}})})},render:function(){var p=P(P({},l),{ref:"notification",style:o,class:i});return g(T1,p,null)}});d.mount(s)};var L7=T1,F7=3,B7,oa,uEe=1,um="ant-message",V7="move-up",z7=function(){return document.body},H7;function cEe(e){if(oa){e(oa);return}L7.newInstance({prefixCls:um,transitionName:V7,style:{top:B7},getContainer:z7,maxCount:H7},function(t){if(oa){e(oa);return}oa=t,e(t)})}var dEe={info:BA,success:zf,error:Yr,warning:Hf,loading:co};function fEe(e){var t=e.duration!==void 0?e.duration:F7,n=dEe[e.type],r=n?g(n,null,null):"",a=e.key||uEe++,o=new Promise(function(l){var s=function(){return typeof e.onClose=="function"&&e.onClose(),l(!0)};cEe(function(c){c.notice({key:a,duration:t,style:e.style||{},class:e.class,content:function(){return g("div",{class:"".concat(um,"-custom-content").concat(e.type?" ".concat(um,"-").concat(e.type):"")},[e.icon||r,g("span",null,[e.content])])},onClose:s})})}),i=function(){oa&&oa.removeNotice(a)};return i.then=function(l,s){return o.then(l,s)},i.promise=o,i}function hEe(e){return Object.prototype.toString.call(e)==="[object Object]"&&!!e.content}var Lu={open:fEe,config:function(t){t.top!==void 0&&(B7=t.top,oa=null),t.duration!==void 0&&(F7=t.duration),t.prefixCls!==void 0&&(um=t.prefixCls),t.getContainer!==void 0&&(z7=t.getContainer),t.transitionName!==void 0&&(V7=t.transitionName,oa=null),t.maxCount!==void 0&&(H7=t.maxCount,oa=null)},destroy:function(){oa&&(oa.destroy(),oa=null)}};["success","info","warning","error","loading"].forEach(function(e){Lu[e]=function(t,n,r){return hEe(t)?Lu.open(P(P({},t),{type:e})):(typeof n=="function"&&(r=n,n=void 0),Lu.open({content:t,duration:n,type:e,onClose:r}))}});Lu.warn=Lu.warning;var j7=Lu;function pEe(e){var t=e.selectionStart;return e.value.slice(0,t)}function sx(e){return(e||"").toLowerCase()}function vEe(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:"",n=Array.isArray(t)?t:[t];return n.reduce(function(r,a){var o=e.lastIndexOf(a);return o>r.location?{location:o,prefix:a}:r},{location:-1,prefix:""})}function mEe(e,t,n){var r=e[0];if(!r||r===n)return e;for(var a=e,o=t.length,i=0;i<o;i+=1)if(sx(a[i])!==sx(t[i])){a=a.slice(i);break}else i===o-1&&(a=a.slice(o));return a}function gEe(e,t){var n=t.measureLocation,r=t.prefix,a=t.targetText,o=t.selectionStart,i=t.split,l=e.slice(0,n);l[l.length-i.length]===i&&(l=l.slice(0,l.length-i.length)),l&&(l="".concat(l).concat(i));var s=mEe(e.slice(o),a.slice(o-n-r.length),i);s.slice(0,i.length)===i&&(s=s.slice(i.length));var c="".concat(l).concat(r).concat(a).concat(i);return{text:"".concat(c).concat(s),selectionLocation:c.length}}function yEe(e,t){e.setSelectionRange(t,t),e.blur(),e.focus()}function bEe(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:"",t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},n=t.split;return!n||e.indexOf(n)===-1}function CEe(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:"",t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},n=t.value,r=n===void 0?"":n,a=e.toLowerCase();return r.toLowerCase().indexOf(a)!==-1}var PS={value:u.string,disabled:u.looseBool,children:u.any},wEe=G({name:"Option",props:PS,render:function(){return null}});function ux(){}var SEe={name:"DropdownMenu",props:{prefixCls:u.string,options:u.arrayOf(PS)},setup:function(){return{mentionsContext:ve("mentionsContext")}},render:function(){var t=this.mentionsContext,n=t.notFoundContent,r=t.activeIndex,a=t.setActiveIndex,o=t.selectOption,i=t.onFocus,l=i===void 0?ux:i,s=t.onBlur,c=s===void 0?ux:s,d=this.$props,f=d.prefixCls,p=d.options,v=p[r]||{};return g(jr,{prefixCls:"".concat(f,"-menu"),activeKey:v.value,onSelect:function(y){var b=y.key,C=p.find(function(S){var w=S.value;return w===b});o(C)},onBlur:c,onFocus:l},{default:function(){return[[].concat(Je(p.map(function(y,b){var C=y.value,S=y.disabled,w=y.children;return g(fl,{key:C,disabled:S,onMouseenter:function(){a(b)}},{default:function(){return[w]}})})),[!p.length&&g(fl,{key:"notFoundContent",disabled:!0},{default:function(){return[n]}})]).filter(Boolean)]}})}},K7=rt("top","bottom"),kEe={bottomRight:{points:["tl","br"],offset:[0,4],overflow:{adjustX:0,adjustY:1}},topRight:{points:["bl","tr"],offset:[0,-4],overflow:{adjustX:0,adjustY:1}}},$Ee={name:"KeywordTrigger",props:{loading:u.looseBool,options:u.arrayOf(PS),prefixCls:u.string,placement:u.oneOf(K7),visible:u.looseBool,transitionName:u.string,getPopupContainer:u.func},methods:{getDropdownPrefix:function(){return"".concat(this.$props.prefixCls,"-dropdown")},getDropdownElement:function(){var t=this.$props.options;return g(SEe,{prefixCls:this.getDropdownPrefix(),options:t},null)}},render:function(){var t,n=this.$props,r=n.visible,a=n.placement,o=n.transitionName,i=n.getPopupContainer,l=this.$slots,s=(t=l.default)===null||t===void 0?void 0:t.call(l),c=this.getDropdownElement();return g(Ii,{prefixCls:this.getDropdownPrefix(),popupVisible:r,popup:c,popupPlacement:a==="top"?"topRight":"bottomRight",popupTransitionName:o,builtinPlacements:kEe,getPopupContainer:i},{default:function(){return[s]}})}},W7={autofocus:u.looseBool,prefix:u.oneOfType([u.string,u.array]),prefixCls:u.string,value:u.string,defaultValue:u.string,disabled:u.looseBool,notFoundContent:u.VNodeChild,split:u.string,transitionName:u.string,placement:u.oneOf(K7),character:u.any,characterRender:u.func,filterOption:{type:[Boolean,Function]},validateSearch:u.func,getPopupContainer:{type:Function}},U7=P(P({},W7),{children:u.any}),Y7={prefix:"@",split:" ",validateSearch:bEe,filterOption:CEe};An(U7,Y7);var cx=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function dx(){}var OEe={name:"Mentions",mixins:[nt],inheritAttrs:!1,props:An(U7,Y7),created:function(){this.mentionsContext=ot("mentionsContext",this)},data:function(){var t=this.$props,n=t.value,r=n===void 0?"":n,a=t.defaultValue,o=a===void 0?"":a;return cc(this.$props.children),{_value:vt(this,"value")?r:o,measuring:!1,measureLocation:0,measureText:null,measurePrefix:"",activeIndex:0,isFocus:!1}},watch:{value:function(t){this.$data._value=t}},updated:function(){var t=this;this.$nextTick(function(){var n=t.$data.measuring;n&&(t.$refs.measure.scrollTop=t.$refs.textarea.scrollTop)})},methods:{triggerChange:function(t){var n=Qe(this);"value"in n?this.$forceUpdate():this.setState({_value:t}),this.__emit("change",t)},onChange:function(t){var n=t.target,r=n.value,a=n.composing,o=t.isComposing;o||a||this.triggerChange(r)},onKeyDown:function(t){var n=t.which,r=this.$data,a=r.activeIndex,o=r.measuring;if(!!o){if(n===ze.UP||n===ze.DOWN){var i=this.getOptions().length,l=n===ze.UP?-1:1,s=(a+l+i)%i;this.setState({activeIndex:s}),t.preventDefault()}else if(n===ze.ESC)this.stopMeasure();else if(n===ze.ENTER){t.preventDefault();var c=this.getOptions();if(!c.length){this.stopMeasure();return}var d=c[a];this.selectOption(d)}}},onKeyUp:function(t){var n=t.key,r=t.which,a=this.$data,o=a.measureText,i=a.measuring,l=this.$props,s=l.prefix,c=s===void 0?"":s,d=l.validateSearch,f=t.target;if(!f.composing){var p=pEe(f),v=vEe(p,c),m=v.location,y=v.prefix;if([ze.ESC,ze.UP,ze.DOWN,ze.ENTER].indexOf(r)===-1)if(m!==-1){var b=p.slice(m+y.length),C=d(b,this.$props),S=!!this.getOptions(b).length;C?(n===y||i||b!==o&&S)&&this.startMeasure(b,y,m):i&&this.stopMeasure(),C&&this.__emit("search",b,y)}else i&&this.stopMeasure()}},onInputFocus:function(t){this.onFocus(t)},onInputBlur:function(t){this.onBlur(t)},onDropdownFocus:function(){this.onFocus()},onDropdownBlur:function(){this.onBlur()},onFocus:function(t){window.clearTimeout(this.focusId);var n=this.$data.isFocus;!n&&t&&this.__emit("focus",t),this.setState({isFocus:!0})},onBlur:function(t){var n=this;this.focusId=window.setTimeout(function(){n.setState({isFocus:!1}),n.stopMeasure(),n.__emit("blur",t)},100)},selectOption:function(t){var n=this,r=this.$data,a=r._value,o=r.measureLocation,i=r.measurePrefix,l=this.$props.split,s=t.value,c=s===void 0?"":s,d=gEe(a,{measureLocation:o,targetText:c,prefix:i,selectionStart:this.$refs.textarea.selectionStart,split:l}),f=d.text,p=d.selectionLocation;this.triggerChange(f),this.stopMeasure(function(){yEe(n.$refs.textarea,p)}),this.__emit("select",t,i)},setActiveIndex:function(t){this.setState({activeIndex:t})},getOptions:function(t){var n=t||this.$data.measureText||"",r=this.$props,a=r.filterOption,o=r.children,i=o===void 0?[]:o,l=(Array.isArray(i)?i:[i]).map(function(s){var c,d;return P(P({},Qe(s)),{children:(d=(c=s.children).default)===null||d===void 0?void 0:d.call(c)})}).filter(function(s){return a===!1?!0:a(n,s)});return l},startMeasure:function(t,n,r){this.setState({measuring:!0,measureText:t,measurePrefix:n,measureLocation:r,activeIndex:0})},stopMeasure:function(t){this.setState({measuring:!1,measureLocation:0,measureText:null},t)},focus:function(){this.$refs.textarea.focus()},blur:function(){this.$refs.textarea.blur()}},render:function(){var t=this.$data,n=t._value,r=t.measureLocation,a=t.measurePrefix,o=t.measuring,i=Qe(this),l=i.prefixCls,s=i.placement,c=i.transitionName;i.notFoundContent;var d=i.getPopupContainer,f=cx(i,["prefixCls","placement","transitionName","notFoundContent","getPopupContainer"]),p=this.$attrs,v=p.class,m=p.style,y=cx(p,["class","style"]),b=bn(f,["value","defaultValue","prefix","split","children","validateSearch","filterOption"]),C=o?this.getOptions():[],S=P(P(P({},b),y),{onChange:dx,onSelect:dx,value:n,onInput:this.onChange,onBlur:this.onInputBlur,onKeydown:this.onKeyDown,onKeyup:this.onKeyUp,onFocus:this.onInputFocus});return g("div",{class:Se(l,v),style:m},[at(g("textarea",le({ref:"textarea"},S),null),[[Mi]]),o&&g("div",{ref:"measure",class:"".concat(l,"-measure")},[n.slice(0,r),g($Ee,{prefixCls:l,transitionName:c,placement:s,options:C,visible:!0,getPopupContainer:d},{default:function(){return[g("span",null,[a])]}}),n.slice(r+a.length)])])}},TS=G(OEe);TS.Option=wEe;var fx=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},hx=TS.Option;function PEe(){return!0}function TEe(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:"",t=arguments.length>1?arguments[1]:void 0,n=t||{},r=n.prefix,a=r===void 0?"@":r,o=n.split,i=o===void 0?" ":o,l=Array.isArray(a)?a:[a];return e.split(i).map(function(){var s=arguments.length>0&&arguments[0]!==void 0?arguments[0]:"",c=null;return l.some(function(d){var f=s.slice(0,d.length);return f===d?(c=d,!0):!1}),c!==null?{prefix:c,value:s.slice(c.length)}:null}).filter(function(s){return!!s&&!!s.value})}var xEe=P(P({},W7),{loading:u.looseBool,onFocus:{type:Function},onBlur:{type:Function},onSelect:{type:Function},onChange:{type:Function}}),Hl=G({name:"AMentions",mixins:[nt],inheritAttrs:!1,Option:P(P({},hx),{name:"AMentionsOption"}),getMentions:TEe,props:xEe,emits:["update:value","change","focus","blur","select"],setup:function(){return{configProvider:ve("configProvider",St)}},data:function(){return{focused:!1}},mounted:function(){Ne(function(){})},methods:{handleFocus:function(t){this.$emit("focus",t),this.setState({focused:!0})},handleBlur:function(t){this.$emit("blur",t),this.setState({focused:!1})},handleSelect:function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];this.$emit.apply(this,["select"].concat(n)),this.setState({focused:!0})},handleChange:function(t){this.$emit("update:value",t),this.$emit("change",t)},getNotFoundContent:function(t){var n=We(this,"notFoundContent");return n!==void 0?n:t("Select")},getOptions:function(){var t=this.$props.loading,n=ht(this);return t?g(hx,{value:"ANTD_SEARCHING",disabled:!0},{default:function(){return[g(ol,{size:"small"},null)]}}):n},getFilterOption:function(){var t=this.$props,n=t.filterOption,r=t.loading;return r?PEe:n},focus:function(){this.$refs.vcMentions.focus()},blur:function(){this.$refs.vcMentions.blur()}},render:function(){var t,n=this.$data.focused,r=this.configProvider,a=r.getPrefixCls,o=r.renderEmpty,i=Qe(this),l=i.prefixCls,s=i.disabled,c=i.getPopupContainer,d=fx(i,["prefixCls","disabled","getPopupContainer"]),f=this.$attrs,p=f.class,v=fx(f,["class"]),m=a("mentions",l),y=bn(d,["loading","onUpdate:value"]),b=Se(p,(t={},V(t,"".concat(m,"-disabled"),s),V(t,"".concat(m,"-focused"),n),t)),C=P(P(P(P({prefixCls:m,notFoundContent:this.getNotFoundContent(o)},y),{disabled:s,filterOption:this.getFilterOption(),getPopupContainer:c,children:this.getOptions(),class:b,rows:1}),v),{onChange:this.handleChange,onSelect:this.handleSelect,onFocus:this.handleFocus,onBlur:this.handleBlur,ref:"vcMentions"});return g(TS,C,null)}});Hl.install=function(e){return e.component(Hl.name,Hl),e.component(Hl.Option.name,Hl.Option),e};var _Ee=Hl.Option,EEe=Hl,x1=null,MEe=function(t){x1={x:t.pageX,y:t.pageY},setTimeout(function(){return x1=null},100)};typeof window!="undefined"&&window.document&&window.document.documentElement&&Kn(document.documentElement,"click",MEe,!0);function IEe(){}var NEe={prefixCls:u.string,visible:u.looseBool,confirmLoading:u.looseBool,title:u.any,closable:u.looseBool,closeIcon:u.any,onOk:{type:Function},onCancel:{type:Function},afterClose:u.func.def(IEe),centered:u.looseBool,width:u.oneOfType([u.string,u.number]),footer:u.any,okText:u.any,okType:{type:String},cancelText:u.any,icon:u.any,maskClosable:u.looseBool,forceRender:u.looseBool,okButtonProps:u.shape(Hv).loose,cancelButtonProps:u.shape(Hv).loose,destroyOnClose:u.looseBool,wrapClassName:u.string,maskTransitionName:u.string,transitionName:u.string,getContainer:u.any,zIndex:u.number,bodyStyle:u.style,maskStyle:u.style,mask:u.looseBool,keyboard:u.looseBool,wrapProps:u.object,focusTriggerAfterClose:u.looseBool},wu=[],cr=G({name:"AModal",inheritAttrs:!1,props:Rn(NEe,{width:520,transitionName:"zoom",maskTransitionName:"fade",confirmLoading:!1,visible:!1,okType:"primary"}),emits:["update:visible","cancel","change","ok"],setup:function(){return{configProvider:ve("configProvider",St)}},data:function(){return{sVisible:!!this.visible}},watch:{visible:function(t){this.sVisible=t}},methods:{handleCancel:function(t){this.$emit("update:visible",!1),this.$emit("cancel",t),this.$emit("change",!1)},handleOk:function(t){this.$emit("ok",t)},renderFooter:function(t){var n=this,r=this.okType,a=this.confirmLoading,o=P({onClick:this.handleCancel},this.cancelButtonProps||{}),i=P(P(P({onClick:this.handleOk},w2(r)),{loading:a}),this.okButtonProps||{});return g("div",null,[g(Cr,o,{default:function(){return[We(n,"cancelText")||t.cancelText]}}),g(Cr,i,{default:function(){return[We(n,"okText")||t.okText]}})])}},render:function(){var t=this.prefixCls,n=this.sVisible,r=this.wrapClassName,a=this.centered,o=this.getContainer,i=this.$attrs,l=ht(this),s=this.configProvider,c=s.getPrefixCls,d=s.getPopupContainer,f=c("modal",t),p=g(Kr,{componentName:"Modal",defaultLocale:qN(),children:this.renderFooter},null),v=We(this,"closeIcon"),m=g("span",{class:"".concat(f,"-close-x")},[v||g(go,{class:"".concat(f,"-close-icon")},null)]),y=We(this,"footer"),b=We(this,"title"),C=P(P(P({},this.$props),i),{getContainer:o===void 0?d:o,prefixCls:f,wrapClassName:Se(V({},"".concat(f,"-centered"),!!a),r),title:b,footer:y===void 0?p:y,visible:n,mousePosition:x1,closeIcon:m,onClose:this.handleCancel});return g(P7,C,{default:function(){return[l]}})}}),AEe={type:{type:String},actionFn:u.func,closeModal:u.func,autofocus:u.looseBool,buttonProps:u.object},px=G({mixins:[nt],props:AEe,setup:function(){return{timeoutId:void 0}},data:function(){return{loading:!1}},mounted:function(){var t=this;this.autofocus&&(this.timeoutId=setTimeout(function(){return Sn(t).focus()}))},beforeUnmount:function(){clearTimeout(this.timeoutId)},methods:{onClick:function(){var t=this,n=this.actionFn,r=this.closeModal;if(n){var a;n.length?a=n(r):(a=n(),a||r()),a&&a.then&&(this.setState({loading:!0}),a.then(function(){r.apply(void 0,arguments)},function(o){console.error(o),t.setState({loading:!1})}))}else r()}},render:function(){var t=this,n=this.type,r=this.loading,a=this.buttonProps,o=P(P(P({},w2(n)),{onClick:this.onClick,loading:r}),a);return g(Cr,o,{default:function(){return[ht(t)]}})}});function Xc(e,t){return typeof t=="function"?t():t}var q7=function(t){var n=t.icon,r=t.onCancel,a=t.onOk,o=t.close,i=t.closable,l=i===void 0?!1:i,s=t.zIndex,c=t.afterClose,d=t.visible,f=t.keyboard,p=t.centered,v=t.getContainer,m=t.maskStyle,y=t.okButtonProps,b=t.cancelButtonProps,C=t.okType||"primary",S=t.prefixCls||"ant-modal",w="".concat(S,"-confirm"),k="okCancel"in t?t.okCancel:!0,$=t.width||416,O=t.style||{},T=t.mask===void 0?!0:t.mask,_=t.maskClosable===void 0?!1:t.maskClosable,I=qN(),L=Xc("okText",t.okText)||(k?I.okText:I.justOkText),j=Xc("cancelText",t.cancelText)||I.cancelText,F=t.autoFocusButton===null?!1:t.autoFocusButton||"ok",N=t.transitionName||"zoom",D=t.maskTransitionName||"fade",z=Se(w,"".concat(w,"-").concat(t.type),"".concat(S,"-").concat(t.type),t.class),B=k&&g(px,{actionFn:r,closeModal:o,autofocus:F==="cancel",buttonProps:b},{default:function(){return[j]}});return g(cr,{prefixCls:S,class:z,wrapClassName:Se(V({},"".concat(w,"-centered"),!!p)),onCancel:function(E){return o({triggerCancel:!0},E)},visible:d,title:"",transitionName:N,footer:"",maskTransitionName:D,mask:T,maskClosable:_,maskStyle:m,style:O,width:$,zIndex:s,afterClose:c,keyboard:f,centered:p,getContainer:v,closable:l},{default:function(){return[g("div",{class:"".concat(w,"-body-wrapper")},[g("div",{class:"".concat(w,"-body")},[Xc("icon",n),t.title===void 0?null:g("span",{class:"".concat(w,"-title")},[Xc("title",t.title)]),g("div",{class:"".concat(w,"-content")},[Xc("content",t.content)])]),g("div",{class:"".concat(w,"-btns")},[B,g(px,{type:C,actionFn:a,closeModal:o,autofocus:F==="ok",buttonProps:y},{default:function(){return[L]}})])])]}})};q7.inheritAttrs=!1;var DEe=q7,REe=function(t){var n=document.createElement("div");document.body.appendChild(n);var r=P(P({},bn(t,["parentContext","appContext"])),{close:o,visible:!0}),a=null;function o(){for(var d=arguments.length,f=new Array(d),p=0;p<d;p++)f[p]=arguments[p];r=P(P({},r),{visible:!1,afterClose:l.bind.apply(l,[this].concat(f))}),i(r)}function i(d){r=P(P({},r),d),a&&(P(a.component.props,r),a.component.update())}function l(){a&&n.parentNode&&(P(a.component.props,{vIf:!1}),a.component.update(),a=null,n.parentNode.removeChild(n));for(var d=arguments.length,f=new Array(d),p=0;p<d;p++)f[p]=arguments[p];var v=f.some(function(b){return b&&b.triggerCancel});t.onCancel&&v&&t.onCancel.apply(t,f);for(var m=0;m<wu.length;m++){var y=wu[m];if(y===o){wu.splice(m,1);break}}}var s=function(f){return f.vIf?g(DEe,f,null):null};function c(d){var f=g(s,P(P({},d),{vIf:!0}));return f.appContext=t.parentContext||t.appContext||f.appContext,vs(f,n),f}return a=c(r),wu.push(o),{destroy:o,update:i}},jf=REe,LEe=function(t){var n=P({type:"info",icon:function(){return g(I2,null,null)},okCancel:!1},t);return jf(n)},FEe=function(t){var n=P({type:"success",icon:function(){return g(x2,null,null)},okCancel:!1},t);return jf(n)},BEe=function(t){var n=P({type:"error",icon:function(){return g(A2,null,null)},okCancel:!1},t);return jf(n)},G7=function(t){var n=P({type:"warning",icon:function(){return g(E2,null,null)},okCancel:!1},t);return jf(n)},VEe=G7,zEe=function(t){var n=P({type:"confirm",okCancel:!0},t);return jf(n)};cr.info=LEe;cr.success=FEe;cr.error=BEe;cr.warning=G7;cr.warn=VEe;cr.confirm=zEe;cr.destroyAll=function(){for(;wu.length;){var t=wu.pop();t&&t()}};cr.install=function(e){return e.component(cr.name,cr),e};var X7=function(t){var n=t.value,r=t.formatter,a=t.precision,o=t.decimalSeparator,i=t.groupSeparator,l=i===void 0?"":i,s=t.prefixCls,c;if(typeof r=="function")c=r({value:n});else{var d=String(n),f=d.match(/^(-?)(\d*)(\.(\d+))?$/);if(!f)c=d;else{var p=f[1],v=f[2]||"0",m=f[4]||"";v=v.replace(/\B(?=(\d{3})+(?!\d))/g,l),typeof a=="number"&&(m=zW(m,a,"0").slice(0,a)),m&&(m="".concat(o).concat(m)),c=[g("span",{key:"int",class:"".concat(s,"-content-value-int")},[p,v]),m&&g("span",{key:"decimal",class:"".concat(s,"-content-value-decimal")},[m])]}}return g("span",{class:"".concat(s,"-content-value")},[c])};X7.displayName="StatisticNumber";var HEe=X7,Ng=function(){return{prefixCls:u.string,size:u.oneOfType([u.oneOf(rt("large","small","default")),u.number]),shape:u.oneOf(rt("circle","square","round")),active:u.looseBool}},Z7=function(t){var n,r,a=t.prefixCls,o=t.size,i=t.shape,l=Se((n={},V(n,"".concat(a,"-lg"),o==="large"),V(n,"".concat(a,"-sm"),o==="small"),n)),s=Se((r={},V(r,"".concat(a,"-circle"),i==="circle"),V(r,"".concat(a,"-square"),i==="square"),V(r,"".concat(a,"-round"),i==="round"),r)),c=typeof o=="number"?{width:"".concat(o,"px"),height:"".concat(o,"px"),lineHeight:"".concat(o,"px")}:{};return g("span",{class:Se(a,l,s),style:c},null)};Z7.displayName="SkeletonElement";var Ag=Z7,J7=Rn(P(P({},Ng()),{shape:u.oneOf(rt("circle","square"))}),{size:"large"}),jEe=G({name:"ASkeletonAvatar",props:J7,setup:function(t){var n=Wt("skeleton",t),r=n.prefixCls,a=x(function(){return Se(r.value,"".concat(r.value,"-element"),V({},"".concat(r.value,"-active"),t.active))});return function(){return g("div",{class:a.value},[g(Ag,le(le({},t),{},{prefixCls:"".concat(r.value,"-avatar")}),null)])}}}),xS=jEe,Q7={prefixCls:u.string,width:u.oneOfType([u.number,u.string])},KEe=G({name:"SkeletonTitle",props:Q7,setup:function(t){return function(){var n=t.prefixCls,r=t.width,a=typeof r=="number"?"".concat(r,"px"):r;return g("h3",{class:n,style:{width:a}},null)}}}),WEe=KEe,vx=u.oneOfType([u.number,u.string]),eR={prefixCls:u.string,width:u.oneOfType([vx,u.arrayOf(vx)]),rows:u.number},UEe=G({name:"SkeletonParagraph",props:eR,setup:function(t){var n=function(a){var o=t.width,i=t.rows,l=i===void 0?2:i;if(Array.isArray(o))return o[a];if(l-1===a)return o};return function(){var r=t.prefixCls,a=t.rows,o=Je(Array(a)).map(function(i,l){var s=n(l);return g("li",{key:l,style:{width:typeof s=="number"?"".concat(s,"px"):s}},null)});return g("ul",{class:r},[o])}}}),YEe=UEe,qEe={active:u.looseBool,loading:u.looseBool,prefixCls:u.string,avatar:an(u.oneOfType([u.string,u.shape(bn(J7,["active"])).loose,u.looseBool])),title:an(u.oneOfType([u.looseBool,u.string,u.shape(Q7).loose])),paragraph:an(u.oneOfType([u.looseBool,u.string,u.shape(eR).loose]))};function gb(e){return e&&kt(e)==="object"?e:{}}function GEe(e,t){return e&&!t?{size:"large",shape:"square"}:{size:"large",shape:"circle"}}function XEe(e,t){return!e&&t?{width:"38%"}:e&&t?{width:"50%"}:{}}function ZEe(e,t){var n={};return(!e||!t)&&(n.width="61%"),!e&&t?n.rows=3:n.rows=2,n}var JEe=G({name:"ASkeleton",props:An(qEe,{avatar:!1,title:!0,paragraph:!0}),setup:function(t,n){var r=n.slots,a=Wt("skeleton",t),o=a.prefixCls,i=a.direction;return function(){var l,s=t.loading,c=t.avatar,d=t.title,f=t.paragraph,p=t.active,v=t.round,m=o.value;if(s||t.loading===void 0){var y,b=!!c||c==="",C=!!d||d==="",S=!!f||f==="",w;if(b){var k=P(P({prefixCls:"".concat(m,"-avatar")},GEe(C,S)),gb(c));w=g("div",{class:"".concat(m,"-header")},[g(Ag,k,null)])}var $;if(C||S){var O;if(C){var T=P(P({prefixCls:"".concat(m,"-title")},XEe(b,S)),gb(d));O=g(WEe,T,null)}var _;if(S){var I=P(P({prefixCls:"".concat(m,"-paragraph")},ZEe(b,C)),gb(f));_=g(YEe,I,null)}$=g("div",{class:"".concat(m,"-content")},[O,_])}var L=Se(m,(y={},V(y,"".concat(m,"-with-avatar"),b),V(y,"".concat(m,"-active"),p),V(y,"".concat(m,"-rtl"),i.value==="rtl"),V(y,"".concat(m,"-round"),v),y));return g("div",{class:L},[w,$])}return(l=r.default)===null||l===void 0?void 0:l.call(r)}}}),na=JEe,tR={prefixCls:u.string,decimalSeparator:u.string,groupSeparator:u.string,format:u.string,value:{type:[String,Number,Object]},valueStyle:u.style,valueRender:u.any,formatter:u.any,precision:u.number,prefix:u.VNodeChild,suffix:u.VNodeChild,title:u.VNodeChild,onFinish:u.func,loading:u.looseBool},oi=G({name:"AStatistic",props:Rn(tR,{decimalSeparator:".",groupSeparator:",",loading:!1}),slots:["title","prefix","suffix","formatter"],setup:function(t,n){var r=n.slots,a=Wt("statistic",t),o=a.prefixCls,i=a.direction;return function(){var l,s,c,d,f,p,v,m=t.value,y=m===void 0?0:m,b=t.valueStyle,C=t.valueRender,S=o.value,w=(l=t.title)!==null&&l!==void 0?l:(s=r.title)===null||s===void 0?void 0:s.call(r),k=(c=t.prefix)!==null&&c!==void 0?c:(d=r.prefix)===null||d===void 0?void 0:d.call(r),$=(f=t.suffix)!==null&&f!==void 0?f:(p=r.suffix)===null||p===void 0?void 0:p.call(r),O=(v=t.formatter)!==null&&v!==void 0?v:r.formatter,T=g(HEe,le({"data-for-update":Date.now()},P(P({},t),{prefixCls:S,value:y,formatter:O})),null);return C&&(T=C(T)),g("div",{class:[S,V({},"".concat(S,"-rtl"),i.value==="rtl")]},[w&&g("div",{class:"".concat(S,"-title")},[w]),g(na,{paragraph:!1,loading:t.loading},{default:function(){return[g("div",{style:b,class:"".concat(S,"-content")},[k&&g("span",{class:"".concat(S,"-content-prefix")},[k]),T,$&&g("span",{class:"".concat(S,"-content-suffix")},[$])])]}})])}}}),QEe=[["Y",1e3*60*60*24*365],["M",1e3*60*60*24*30],["D",1e3*60*60*24],["H",1e3*60*60],["m",1e3*60],["s",1e3],["S",1]];function eMe(e,t){var n=e,r=/\[[^\]]*]/g,a=(t.match(r)||[]).map(function(s){return s.slice(1,-1)}),o=t.replace(r,"[]"),i=QEe.reduce(function(s,c){var d=fn(c,2),f=d[0],p=d[1];if(s.indexOf(f)!==-1){var v=Math.floor(n/p);return n-=v*p,s.replace(new RegExp("".concat(f,"+"),"g"),function(m){var y=m.length;return HW(v.toString(),y,"0")})}return s},o),l=0;return i.replace(r,function(){var s=a[l];return l+=1,s})}function tMe(e,t){var n=t.format,r=n===void 0?"":n,a=new Date(e).getTime(),o=Date.now(),i=Math.max(a-o,0);return eMe(i,r)}var nMe=1e3/30;function yb(e){return new Date(e).getTime()}var rMe=G({name:"AStatisticCountdown",props:Rn(tR,{format:"HH:mm:ss"}),emits:["finish","change"],setup:function(t,n){var r=n.emit,a=H(),o=H(),i=function(){var p=t.value,v=yb(p);v>=Date.now()?l():s()},l=function(){if(!a.value){var p=yb(t.value);a.value=window.setInterval(function(){o.value.$forceUpdate(),p>Date.now()&&r("change",p-Date.now()),i()},nMe)}},s=function(){var p=t.value;if(a.value){clearInterval(a.value),a.value=void 0;var v=yb(p);v<Date.now()&&r("finish")}},c=function(p){var v=p.value,m=p.config,y=t.format;return tMe(v,P(P({},m),{format:y}))},d=function(p){return p};return et(function(){i()}),ur(function(){i()}),Lt(function(){s()}),function(){return g(oi,le({ref:o},P(P({},t),{valueRender:d,formatter:c})),null)}}});oi.Countdown=rMe;oi.install=function(e){return e.component(oi.name,oi),e.component(oi.Countdown.name,oi.Countdown),e};var aMe=oi.Countdown,Ui={},nR=4.5,rR="24px",aR="24px",oR="topRight",iR=function(){return document.body},lR=null;function oMe(e){var t=e.duration,n=e.placement,r=e.bottom,a=e.top,o=e.getContainer,i=e.closeIcon;t!==void 0&&(nR=t),n!==void 0&&(oR=n),r!==void 0&&(aR=typeof r=="number"?"".concat(r,"px"):r),a!==void 0&&(rR=typeof a=="number"?"".concat(a,"px"):a),o!==void 0&&(iR=o),i!==void 0&&(lR=i)}function iMe(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:rR,n=arguments.length>2&&arguments[2]!==void 0?arguments[2]:aR,r;switch(e){case"topLeft":r={left:"0px",top:t,bottom:"auto"};break;case"topRight":r={right:"0px",top:t,bottom:"auto"};break;case"bottomLeft":r={left:"0px",top:"auto",bottom:n};break;default:r={right:"0px",top:"auto",bottom:n};break}return r}function lMe(e,t){var n=e.prefixCls,r=e.placement,a=r===void 0?oR:r,o=e.getContainer,i=o===void 0?iR:o,l=e.top,s=e.bottom,c=e.closeIcon,d=c===void 0?lR:c,f="".concat(n,"-").concat(a);if(Ui[f]){t(Ui[f]);return}L7.newInstance({prefixCls:n,class:"".concat(n,"-").concat(a),style:iMe(a,l,s),getContainer:i,closeIcon:function(){var v=g("span",{class:"".concat(n,"-close-x")},[d||g(go,{class:"".concat(n,"-close-icon")},null)]);return v}},function(p){Ui[f]=p,t(p)})}var sMe={success:x2,info:I2,error:A2,warning:E2};function uMe(e){var t=e.icon,n=e.type,r=e.description,a=e.message,o=e.btn,i=e.prefixCls||"ant-notification",l="".concat(i,"-notice"),s=e.duration===void 0?nR:e.duration,c=null;if(t)c=function(){return g("span",{class:"".concat(l,"-icon")},[t])};else if(n){var d=sMe[n];c=function(){return g(d,{class:"".concat(l,"-icon ").concat(l,"-icon-").concat(n)},null)}}var f=e.placement,p=e.top,v=e.bottom,m=e.getContainer,y=e.closeIcon;lMe({prefixCls:i,placement:f,top:p,bottom:v,getContainer:m,closeIcon:y},function(b){b.notice({content:function(){return g("div",{class:c?"".concat(l,"-with-icon"):""},[c&&c(),g("div",{class:"".concat(l,"-message")},[!r&&c?g("span",{class:"".concat(l,"-message-single-line-auto-margin")},null):null,a]),g("div",{class:"".concat(l,"-description")},[r]),o?g("span",{class:"".concat(l,"-btn")},[o]):null])},duration:s,closable:!0,onClose:e.onClose,onClick:e.onClick,key:e.key,style:e.style||{},class:e.class})})}var cMe={open:uMe,close:function(t){Object.keys(Ui).forEach(function(n){return Ui[n].removeNotice(t)})},config:oMe,destroy:function(){Object.keys(Ui).forEach(function(t){Ui[t].destroy(),delete Ui[t]})}},vf=cMe,dMe=["success","info","warning","error"];dMe.forEach(function(e){vf[e]=function(t){return vf.open(P(P({},t),{type:e}))}});vf.warn=vf.warning;var sR=vf,fMe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M872 474H286.9l350.2-304c5.6-4.9 2.2-14-5.2-14h-88.5c-3.9 0-7.6 1.4-10.5 3.9L155 487.8a31.96 31.96 0 000 48.3L535.1 866c1.5 1.3 3.3 2 5.2 2h91.5c7.4 0 10.8-9.2 5.2-14L286.9 550H872c4.4 0 8-3.6 8-8v-60c0-4.4-3.6-8-8-8z"}}]},name:"arrow-left",theme:"outlined"},hMe=fMe;function mx(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){pMe(e,a,n[a])})}return e}function pMe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var _S=function(t,n){var r=mx({},t,n.attrs);return g(Et,mx({},r,{icon:hMe}),null)};_S.displayName="ArrowLeftOutlined";_S.inheritAttrs=!1;var vMe=_S,mMe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M869 487.8L491.2 159.9c-2.9-2.5-6.6-3.9-10.5-3.9h-88.5c-7.4 0-10.8 9.2-5.2 14l350.2 304H152c-4.4 0-8 3.6-8 8v60c0 4.4 3.6 8 8 8h585.1L386.9 854c-5.6 4.9-2.2 14 5.2 14h91.5c1.9 0 3.8-.7 5.2-2L869 536.2a32.07 32.07 0 000-48.4z"}}]},name:"arrow-right",theme:"outlined"},gMe=mMe;function gx(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){yMe(e,a,n[a])})}return e}function yMe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var ES=function(t,n){var r=gx({},t,n.attrs);return g(Et,gx({},r,{icon:gMe}),null)};ES.displayName="ArrowRightOutlined";ES.inheritAttrs=!1;var bMe=ES,CMe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},wMe={border:0,background:"transparent",padding:0,lineHeight:"inherit",display:"inline-block"},SMe=G({name:"TransButton",inheritAttrs:!1,props:{noStyle:u.looseBool,onClick:u.func,disabled:u.looseBool,autofocus:u.looseBool},setup:function(t,n){var r=n.slots,a=n.emit,o=n.attrs,i=n.expose,l=H(),s=function(m){var y=m.keyCode;y===ze.ENTER&&m.preventDefault()},c=function(m){var y=m.keyCode;y===ze.ENTER&&a("click",m)},d=function(m){a("click",m)},f=function(){l.value&&l.value.focus()},p=function(){l.value&&l.value.blur()};return et(function(){t.autofocus&&f()}),i({focus:f,blur:p}),function(){var v,m=t.noStyle,y=t.disabled,b=CMe(t,["noStyle","disabled"]),C={};return m||(C=P({},wMe)),y&&(C.pointerEvents="none"),g("div",le(le(le({role:"button",tabindex:0,ref:l},b),o),{},{onClick:d,onKeydown:s,onKeyup:c,style:P(P({},C),o.style||{})}),[(v=r.default)===null||v===void 0?void 0:v.call(r)])}}}),cm=SMe,kMe={backIcon:u.VNodeChild,prefixCls:u.string,title:u.VNodeChild,subTitle:u.VNodeChild,breadcrumb:u.object,tags:u.any,footer:u.VNodeChild,extra:u.VNodeChild,avatar:u.object,ghost:u.looseBool,onBack:u.func},$Me=G({name:"APageHeader",props:kMe,emits:["back"],slots:["backIcon","avatar","breadcrumb","title","subTitle","tags","extra","footer"],setup:function(t,n){var r=n.emit,a=n.slots,o=Wt("page-header",t),i=o.prefixCls,l=o.direction,s=o.pageHeader,c=H(!1),d=function(w){var k=w.width;c.value=k<768},f=x(function(){var S,w,k;return(k=(S=t.ghost)!==null&&S!==void 0?S:(w=s.value)===null||w===void 0?void 0:w.ghost)!==null&&k!==void 0?k:!0}),p=function(){var w,k,$;return($=(w=t.backIcon)!==null&&w!==void 0?w:(k=a.backIcon)===null||k===void 0?void 0:k.call(a))!==null&&$!==void 0?$:l.value==="rtl"?g(bMe,null,null):g(vMe,null,null)},v=function(w){return!w||!t.onBack?null:g(Kr,{componentName:"PageHeader",children:function($){var O=$.back;return g("div",{class:"".concat(i.value,"-back")},[g(cm,{onClick:function(_){r("back",_)},class:"".concat(i.value,"-back-button"),"aria-label":O},{default:function(){return[w]}})])}},null)},m=function(){var w;return t.breadcrumb?g(us,t.breadcrumb,null):(w=a.breadcrumb)===null||w===void 0?void 0:w.call(a)},y=function(){var w,k,$,O,T,_,I,L,j,F=t.avatar,N=(w=t.title)!==null&&w!==void 0?w:(k=a.title)===null||k===void 0?void 0:k.call(a),D=($=t.subTitle)!==null&&$!==void 0?$:(O=a.subTitle)===null||O===void 0?void 0:O.call(a),z=(T=t.tags)!==null&&T!==void 0?T:(_=a.tags)===null||_===void 0?void 0:_.call(a),B=(I=t.extra)!==null&&I!==void 0?I:(L=a.extra)===null||L===void 0?void 0:L.call(a),M="".concat(i.value,"-heading"),E=N||D||z||B;if(!E)return null;var K=p(),W=v(K),Y=W||F||E;return g("div",{class:M},[Y&&g("div",{class:"".concat(M,"-left")},[W,F?g(ss,F,null):(j=a.avatar)===null||j===void 0?void 0:j.call(a),N&&g("span",{class:"".concat(M,"-title"),title:typeof N=="string"?N:void 0},[N]),D&&g("span",{class:"".concat(M,"-sub-title"),title:typeof D=="string"?D:void 0},[D]),z&&g("span",{class:"".concat(M,"-tags")},[z])]),B&&g("span",{class:"".concat(M,"-extra")},[B])])},b=function(){var w,k,$=(w=t.footer)!==null&&w!==void 0?w:La((k=a.footer)===null||k===void 0?void 0:k.call(a));return i0e($)?null:g("div",{class:"".concat(i.value,"-footer")},[$])},C=function(w){return g("div",{class:"".concat(i.value,"-content")},[w])};return function(){var S,w,k,$=((w=t.breadcrumb)===null||w===void 0?void 0:w.routes)||a.breadcrumb,O=t.footer||a.footer,T=Un((k=a.default)===null||k===void 0?void 0:k.call(a)),_=Se(i.value,(S={"has-breadcrumb":$,"has-footer":O},V(S,"".concat(i.value,"-ghost"),f.value),V(S,"".concat(i.value,"-rtl"),l.value==="rtl"),V(S,"".concat(i.value,"-compact"),c.value),S));return g(zo,{onResize:d},{default:function(){return[g("div",{class:_},[m(),y(),T.length?C(T):null,b()])]}})}}}),OMe=kn($Me),yx=kg(),PMe=G({name:"APopconfirm",mixins:[nt],props:P(P({},yx),{prefixCls:u.string,transitionName:u.string.def("zoom-big"),content:u.any,title:u.any,trigger:yx.trigger.def("click"),okType:{type:String,default:"primary"},disabled:u.looseBool.def(!1),okText:u.any,cancelText:u.any,icon:u.any,okButtonProps:u.object,cancelButtonProps:u.object,onConfirm:u.func,onCancel:u.func,onVisibleChange:u.func}),emits:["update:visible","confirm","cancel","visibleChange"],setup:function(){return{configProvider:ve("configProvider",St)}},data:function(){var t=Qe(this),n={sVisible:!1};return"visible"in t&&(n.sVisible=t.visible),"defaultVisible"in t&&(n.sVisible=t.defaultVisible),n},watch:{visible:function(t){this.sVisible=t}},methods:{onConfirmHandle:function(t){this.setVisible(!1,t),this.$emit("confirm",t)},onCancelHandle:function(t){this.setVisible(!1,t),this.$emit("cancel",t)},onVisibleChangeHandle:function(t){var n=this.$props.disabled;n||this.setVisible(t)},setVisible:function(t,n){vt(this,"visible")||this.setState({sVisible:t}),this.$emit("update:visible",t),this.$emit("visibleChange",t,n)},getPopupDomNode:function(){return this.$refs.tooltip.getPopupDomNode()},renderOverlay:function(t,n){var r=this,a=this.okType,o=this.okButtonProps,i=this.cancelButtonProps,l=We(this,"icon")||g(Hf,null,null),s=HO(P({size:"small",onClick:this.onCancelHandle},i)),c=HO(P(P(P({},w2(a)),{size:"small",onClick:this.onConfirmHandle}),o));return g("div",{class:"".concat(t,"-inner-content")},[g("div",{class:"".concat(t,"-message")},[l,g("div",{class:"".concat(t,"-message-title")},[We(this,"title")])]),g("div",{class:"".concat(t,"-buttons")},[g(Cr,s,{default:function(){return[We(r,"cancelText")||n.cancelText]}}),g(Cr,c,{default:function(){return[We(r,"okText")||n.okText]}})])])}},render:function(){var t=this,n,r=Qe(this),a=r.prefixCls,o=this.configProvider.getPrefixCls,i=o("popover",a),l=bn(r,["title","content","cancelText","okText","onUpdate:visible"]),s=g(Kr,{componentName:"Popconfirm",defaultLocale:lo.Popconfirm,children:function(f){return t.renderOverlay(i,f)}},null),c=P(P({},l),{title:s,prefixCls:i,visible:this.sVisible,ref:"tooltip",onVisibleChange:this.onVisibleChangeHandle});return g(Io,c,{default:function(){return[(n=t.$slots)===null||n===void 0?void 0:n.default()]}})}}),TMe=kn(PMe);function cs(e){return!e||e<0?0:e>100?100:e}var xMe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},_Me=function(t){for(var n=[],r=0,a=Object.entries(t);r<a.length;r++){var o=fn(a[r],2),i=o[0],l=o[1],s=parseFloat(i.replace(/%/g,""));if(isNaN(s))return{};n.push({key:s,value:l})}return n=n.sort(function(c,d){return c.key-d.key}),n.map(function(c){var d=c.key,f=c.value;return"".concat(f," ").concat(d,"%")}).join(", ")},EMe=function(t){var n=t.from,r=n===void 0?"#1890ff":n,a=t.to,o=a===void 0?"#1890ff":a,i=t.direction,l=i===void 0?"to right":i,s=xMe(t,["from","to","direction"]);if(Object.keys(s).length!==0){var c=_Me(s);return{backgroundImage:"linear-gradient(".concat(l,", ").concat(c,")")}}return{backgroundImage:"linear-gradient(".concat(l,", ").concat(r,", ").concat(o,")")}},MMe=function(t,n){var r=n.attrs,a=n.slots,o=r.prefixCls,i=r.percent,l=r.successPercent,s=r.strokeWidth,c=r.size,d=r.strokeColor,f=r.strokeLinecap,p=r.trailColor,v;d&&typeof d!="string"?v=EMe(d):v={background:d};var m=p?{style:{backgroundColor:p}}:void 0,y=P({width:"".concat(cs(i),"%"),height:"".concat(s||(c==="small"?6:8),"px"),background:d,borderRadius:f==="square"?0:"100px"},v),b={width:"".concat(cs(l),"%"),height:"".concat(s||(c==="small"?6:8),"px"),borderRadius:f==="square"?0:""},C=l!==void 0?g("div",{class:"".concat(o,"-success-bg"),style:b},null):null;return g("div",null,[g("div",{class:"".concat(o,"-outer")},[g("div",le({class:"".concat(o,"-inner")},m),[g("div",{class:"".concat(o,"-bg"),style:y},null),C])]),a==null?void 0:a.default()])},IMe=MMe;function NMe(e){return P(P({},e),{updated:function(){var n=this,r=Date.now(),a=!1;Object.keys(this.paths).forEach(function(o){var i=n.paths[o];if(!!i){a=!0;var l=i.style;l.transitionDuration=".3s, .3s, .3s, .06s",n.prevTimeStamp&&r-n.prevTimeStamp<100&&(l.transitionDuration="0s, 0s")}}),a&&(this.prevTimeStamp=Date.now())}})}var AMe={percent:0,prefixCls:"rc-progress",strokeColor:"#2db7f5",strokeLinecap:"round",strokeWidth:1,trailColor:"#D9D9D9",trailWidth:1},qh=u.oneOfType([u.number,u.string]),DMe={percent:u.oneOfType([qh,u.arrayOf(qh)]),prefixCls:u.string,strokeColor:u.oneOfType([u.string,u.arrayOf(u.oneOfType([u.string,u.object])),u.object]),strokeLinecap:u.oneOf(["butt","round","square"]),strokeWidth:qh,trailColor:u.string,trailWidth:qh},RMe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},LMe=P(P({},DMe),{gapPosition:u.oneOf(["top","bottom","left","right"]),gapDegree:an(u.oneOfType([u.number,u.string,u.looseBool]))}),FMe=P(P({},AMe),{gapPosition:"top"}),bx=0;function Cx(e){return+e.replace("%","")}function bb(e){return Array.isArray(e)?e:[e]}function wx(e,t,n,r){var a=arguments.length>4&&arguments[4]!==void 0?arguments[4]:0,o=arguments.length>5?arguments[5]:void 0,i=50-r/2,l=0,s=-i,c=0,d=-2*i;switch(o){case"left":l=-i,s=0,c=2*i,d=0;break;case"right":l=i,s=0,c=-2*i,d=0;break;case"bottom":s=i,d=2*i;break}var f="M 50,50 m ".concat(l,",").concat(s,`
   a `).concat(i,",").concat(i," 0 1 1 ").concat(c,",").concat(-d,`
   a `).concat(i,",").concat(i," 0 1 1 ").concat(-c,",").concat(d),p=Math.PI*2*i,v={stroke:n,strokeDasharray:"".concat(t/100*(p-a),"px ").concat(p,"px"),strokeDashoffset:"-".concat(a/2+e/100*(p-a),"px"),transition:"stroke-dashoffset .3s ease 0s, stroke-dasharray .3s ease 0s, stroke .3s, stroke-width .06s ease .3s, opacity .3s ease 0s"};return{pathString:f,pathStyle:v}}var BMe=G({name:"Circle",props:An(LMe,FMe),created:function(){this.paths={},this.gradientId=bx,bx+=1},methods:{getStokeList:function(){var t=this,n=this.$props,r=n.prefixCls,a=n.percent,o=n.strokeColor,i=n.strokeWidth,l=n.strokeLinecap,s=n.gapDegree,c=n.gapPosition,d=bb(a),f=bb(o),p=0;return d.map(function(v,m){var y=f[m]||f[f.length-1],b=Object.prototype.toString.call(y)==="[object Object]"?"url(#".concat(r,"-gradient-").concat(t.gradientId,")"):"",C=wx(p,v,y,i,s,c),S=C.pathString,w=C.pathStyle;p+=v;var k={key:m,d:S,stroke:b,"stroke-linecap":l,"stroke-width":i,opacity:v===0?0:1,"fill-opacity":"0",class:"".concat(r,"-circle-path"),style:w};return g("path",le({ref:function(O){return t.paths[m]=O}},k),null)})}},render:function(){var t=this.$props,n=t.prefixCls,r=t.strokeWidth,a=t.trailWidth,o=t.gapDegree,i=t.gapPosition,l=t.trailColor,s=t.strokeLinecap,c=t.strokeColor,d=RMe(t,["prefixCls","strokeWidth","trailWidth","gapDegree","gapPosition","trailColor","strokeLinecap","strokeColor"]),f=wx(0,100,l,r,o,i),p=f.pathString,v=f.pathStyle;delete d.percent;var m=bb(c),y=m.find(function(C){return Object.prototype.toString.call(C)==="[object Object]"}),b={d:p,stroke:l,"stroke-linecap":s,"stroke-width":a||r,"fill-opacity":"0",class:"".concat(n,"-circle-trail"),style:v};return g("svg",le({class:"".concat(n,"-circle"),viewBox:"0 0 100 100"},d),[y&&g("defs",null,[g("linearGradient",{id:"".concat(n,"-gradient-").concat(this.gradientId),x1:"100%",y1:"0%",x2:"0%",y2:"0%"},[Object.keys(y).sort(function(C,S){return Cx(C)-Cx(S)}).map(function(C,S){return g("stop",{key:S,offset:C,"stop-color":y[C]},null)})])]),g("path",b,null),this.getStokeList().reverse()])}}),VMe=NMe(BMe),uR=rt("normal","exception","active","success"),zMe=u.oneOf(rt("line","circle","dashboard")),HMe=u.oneOf(rt("default","small")),cR={prefixCls:u.string,type:zMe,percent:u.number,successPercent:u.number,format:u.func,status:u.oneOf(uR),showInfo:u.looseBool,strokeWidth:u.number,strokeLinecap:u.oneOf(["butt","round","square"]),strokeColor:u.oneOfType([u.string,u.object]),trailColor:u.string,width:u.number,gapDegree:u.number,gapPosition:u.oneOf(rt("top","bottom","left","right")),size:HMe},jMe=P(P({},cR),{progressStatus:u.string}),Sx={normal:"#108ee9",exception:"#ff5500",success:"#87d068"};function KMe(e){var t=e.percent,n=e.successPercent,r=cs(t);if(!n)return r;var a=cs(n);return[n,cs(r-a)]}function WMe(e){var t=e.progressStatus,n=e.successPercent,r=e.strokeColor,a=r||Sx[t];return n?[Sx.success,a]:a}var UMe=G({props:jMe,setup:function(t,n){var r=n.slots;return function(){var a,o=t.prefixCls,i=t.width,l=t.strokeWidth,s=t.trailColor,c=t.strokeLinecap,d=t.gapPosition,f=t.gapDegree,p=t.type,v=i||120,m={width:typeof v=="number"?"".concat(v,"px"):v,height:typeof v=="number"?"".concat(v,"px"):v,fontSize:"".concat(v*.15+6,"px")},y=l||6,b=d||p==="dashboard"&&"bottom"||"top",C=f||p==="dashboard"&&75,S=WMe(t),w=Object.prototype.toString.call(S)==="[object Object]",k=(a={},V(a,"".concat(o,"-inner"),!0),V(a,"".concat(o,"-circle-gradient"),w),a);return g("div",{class:k,style:m},[g(VMe,{percent:KMe(t),strokeWidth:y,trailWidth:y,strokeColor:S,strokeLinecap:c,trailColor:s,prefixCls:o,gapDegree:C,gapPosition:b},null),r==null?void 0:r.default()])}}}),YMe=UMe,qMe=G({name:"AProgress",props:Rn(cR,{type:"line",percent:0,showInfo:!0,trailColor:null,size:"default",gapDegree:0,strokeLinecap:"round"}),setup:function(){return{configProvider:ve("configProvider",St)}},methods:{getPercentNumber:function(){var t=this.$props,n=t.successPercent,r=t.percent,a=r===void 0?0:r;return parseInt(n!==void 0?n.toString():a.toString(),10)},getProgressStatus:function(){var t=this.$props.status;return uR.indexOf(t)<0&&this.getPercentNumber()>=100?"success":t||"normal"},renderProcessInfo:function(t,n){var r=this.$props,a=r.showInfo,o=r.format,i=r.type,l=r.percent,s=r.successPercent;if(!a)return null;var c,d=o||this.$slots.format||function(p){return"".concat(p,"%")},f=i==="line";return o||this.$slots.format||n!=="exception"&&n!=="success"?c=d(cs(l),cs(s)):n==="exception"?c=g(f?Yr:go,null,null):n==="success"&&(c=g(f?zf:wg,null,null)),g("span",{class:"".concat(t,"-text"),title:typeof c=="string"?c:void 0},[c])}},render:function(){var t,n=Qe(this),r=n.prefixCls,a=n.size,o=n.type,i=n.showInfo,l=this.configProvider.getPrefixCls,s=l("progress",r),c=this.getProgressStatus(),d=this.renderProcessInfo(s,c),f;if(o==="line"){var p=P(P({},n),{prefixCls:s});f=g(IMe,p,{default:function(){return[d]}})}else if(o==="circle"||o==="dashboard"){var v=P(P({},n),{prefixCls:s,progressStatus:c});f=g(YMe,v,{default:function(){return[d]}})}var m=Se(s,(t={},V(t,"".concat(s,"-").concat(o==="dashboard"&&"circle"||o),!0),V(t,"".concat(s,"-status-").concat(c),!0),V(t,"".concat(s,"-show-info"),i),V(t,"".concat(s,"-").concat(a),a),t)),y={class:m};return g("div",y,[f])}}),dR=kn(qMe);function GMe(e){var t=e.pageXOffset,n="scrollLeft";if(typeof t!="number"){var r=e.document;t=r.documentElement[n],typeof t!="number"&&(t=r.body[n])}return t}function XMe(e){var t,n,r=e.ownerDocument,a=r.body,o=r&&r.documentElement,i=e.getBoundingClientRect();return t=i.left,n=i.top,t-=o.clientLeft||a.clientLeft||0,n-=o.clientTop||a.clientTop||0,{left:t,top:n}}function ZMe(e){var t=XMe(e),n=e.ownerDocument,r=n.defaultView||n.parentWindow;return t.left+=GMe(r),t.left}var JMe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M908.1 353.1l-253.9-36.9L540.7 86.1c-3.1-6.3-8.2-11.4-14.5-14.5-15.8-7.8-35-1.3-42.9 14.5L369.8 316.2l-253.9 36.9c-7 1-13.4 4.3-18.3 9.3a32.05 32.05 0 00.6 45.3l183.7 179.1-43.4 252.9a31.95 31.95 0 0046.4 33.7L512 754l227.1 119.4c6.2 3.3 13.4 4.4 20.3 3.2 17.4-3 29.1-19.5 26.1-36.9l-43.4-252.9 183.7-179.1c5-4.9 8.3-11.3 9.3-18.3 2.7-17.5-9.5-33.7-27-36.3z"}}]},name:"star",theme:"filled"},QMe=JMe;function kx(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){e5e(e,a,n[a])})}return e}function e5e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var MS=function(t,n){var r=kx({},t,n.attrs);return g(Et,kx({},r,{icon:QMe}),null)};MS.displayName="StarFilled";MS.inheritAttrs=!1;var t5e=MS,n5e={value:u.number,index:u.number,prefixCls:u.string,allowHalf:u.looseBool,disabled:u.looseBool,character:u.any,characterRender:u.func,focused:u.looseBool,count:u.number,onClick:u.func,onHover:u.func},r5e=G({name:"Star",inheritAttrs:!1,props:n5e,emits:["hover","click"],setup:function(t,n){var r=n.slots,a=n.emit,o=function(d){var f=t.index;a("hover",d,f)},i=function(d){var f=t.index;a("click",d,f)},l=function(d){var f=t.index;d.keyCode===13&&a("click",d,f)},s=x(function(){var c=t.prefixCls,d=t.index,f=t.value,p=t.allowHalf,v=t.focused,m=d+1,y=c;return f===0&&d===0&&v?y+=" ".concat(c,"-focused"):p&&f+.5>=m&&f<m?(y+=" ".concat(c,"-half ").concat(c,"-active"),v&&(y+=" ".concat(c,"-focused"))):(y+=m<=f?" ".concat(c,"-full"):" ".concat(c,"-zero"),m===f&&v&&(y+=" ".concat(c,"-focused"))),y});return function(){var c=t.disabled,d=t.prefixCls,f=t.characterRender,p=t.index,v=t.count,m=t.value,y=jn(r,t,"character"),b=g("li",{class:s.value},[g("div",{onClick:c?null:i,onKeydown:c?null:l,onMousemove:c?null:o,role:"radio","aria-checked":m>p?"true":"false","aria-posinset":p+1,"aria-setsize":v,tabindex:c?-1:0},[g("div",{class:"".concat(d,"-first")},[y]),g("div",{class:"".concat(d,"-second")},[y])])]);return f&&(b=f(b,t)),b}}}),a5e=function(){var t=H({}),n=function(a,o){t.value[o]=a};return wm(function(){t.value={}}),[n,t]},o5e={prefixCls:u.string,count:u.number,value:u.number,allowHalf:u.looseBool,allowClear:u.looseBool,tooltips:u.arrayOf(u.string),disabled:u.looseBool,character:u.any,autofocus:u.looseBool,tabindex:u.oneOfType([u.number,u.string]),direction:u.string},i5e=G({name:"ARate",inheritAttrs:!1,props:An(o5e,{value:0,count:5,allowHalf:!1,allowClear:!0,prefixCls:"ant-rate",tabindex:0,direction:"ltr"}),emits:["hoverChange","update:value","change","focus","blur","keydown"],setup:function(t,n){var r=n.slots,a=n.attrs,o=n.emit,i=n.expose,l=Wt("rate",t),s=l.prefixCls,c=l.direction,d=H(),f=a5e(),p=fn(f,2),v=p[0],m=p[1],y=bt({value:t.value,focused:!1,cleanedValue:null,hoverValue:void 0});ce(function(){return t.value},function(){y.value=t.value});var b=function(D){return Sn(m.value[D])},C=function(D,z){var B=c.value==="rtl",M=D+1;if(t.allowHalf){var E=b(D),K=ZMe(E),W=E.clientWidth;(B&&z-K>W/2||!B&&z-K<W/2)&&(M-=.5)}return M},S=function(D){t.value===void 0&&(y.value=D),o("update:value",D),o("change",D)},w=function(D,z){var B=C(z,D.pageX);B!==y.cleanedValue&&(y.hoverValue=B,y.cleanedValue=null),o("hoverChange",B)},k=function(){y.hoverValue=void 0,y.cleanedValue=null,o("hoverChange",void 0)},$=function(D,z){var B=t.allowClear,M=C(z,D.pageX),E=!1;B&&(E=M===y.value),k(),S(E?0:M),y.cleanedValue=E?M:null},O=function(){y.focused=!0,o("focus")},T=function(){y.focused=!1,o("blur")},_=function(D){var z=D.keyCode,B=t.count,M=t.allowHalf,E=c.value==="rtl";z===ze.RIGHT&&y.value<B&&!E?(M?y.value+=.5:y.value+=1,S(y.value),D.preventDefault()):z===ze.LEFT&&y.value>0&&!E||z===ze.RIGHT&&y.value>0&&E?(M?y.value-=.5:y.value-=1,S(y.value),D.preventDefault()):z===ze.LEFT&&y.value<B&&E&&(M?y.value+=.5:y.value+=1,S(y.value),D.preventDefault()),o("keydown",D)},I=function(){t.disabled||d.value.focus()},L=function(){t.disabled||d.value.blur()};i({focus:I,blur:L}),et(function(){var N=t.autofocus,D=t.disabled;N&&!D&&I()});var j=function(D,z){var B=z.index,M=t.tooltips;return M?g(Io,{title:M[B]},{default:function(){return[D]}}):D},F=jn(r,t,"character")||g(t5e,null,null);return function(){for(var N=t.count,D=t.allowHalf,z=t.disabled,B=t.tabindex,M=a.class,E=a.style,K=[],W=z?"".concat(s.value,"-disabled"):"",Y=function(oe){K.push(g(r5e,{ref:function(ae){return v(ae,oe)},key:oe,index:oe,count:N,disabled:z,prefixCls:"".concat(s.value,"-star"),allowHalf:D,value:y.hoverValue===void 0?y.value:y.hoverValue,onClick:$,onHover:w,character:F,characterRender:j,focused:y.focused},null))},q=0;q<N;q++)Y(q);var J=Se(s.value,W,M,V({},"".concat(s.value,"-rtl"),c.value==="rtl"));return g("ul",le(le({},a),{},{class:J,style:E,onMouseleave:z?null:k,tabindex:z?-1:B,onFocus:z?null:O,onBlur:z?null:T,onKeydown:z?null:_,ref:d,role:"radiogroup"}),[K])}}}),l5e=kn(i5e),s5e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M955.7 856l-416-720c-6.2-10.7-16.9-16-27.7-16s-21.6 5.3-27.7 16l-416 720C56 877.4 71.4 904 96 904h832c24.6 0 40-26.6 27.7-48zM480 416c0-4.4 3.6-8 8-8h48c4.4 0 8 3.6 8 8v184c0 4.4-3.6 8-8 8h-48c-4.4 0-8-3.6-8-8V416zm32 352a48.01 48.01 0 010-96 48.01 48.01 0 010 96z"}}]},name:"warning",theme:"filled"},u5e=s5e;function $x(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){c5e(e,a,n[a])})}return e}function c5e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var IS=function(t,n){var r=$x({},t,n.attrs);return g(Et,$x({},r,{icon:u5e}),null)};IS.displayName="WarningFilled";IS.inheritAttrs=!1;var d5e=IS,f5e=function(){return g("svg",{width:"252",height:"294"},[g("defs",null,[g("path",{d:"M0 .387h251.772v251.772H0z"},null)]),g("g",{fill:"none","fill-rule":"evenodd"},[g("g",{transform:"translate(0 .012)"},[g("mask",{fill:"#fff"},null),g("path",{d:"M0 127.32v-2.095C0 56.279 55.892.387 124.838.387h2.096c68.946 0 124.838 55.892 124.838 124.838v2.096c0 68.946-55.892 124.838-124.838 124.838h-2.096C55.892 252.16 0 196.267 0 127.321",fill:"#E4EBF7",mask:"url(#b)"},null)]),g("path",{d:"M39.755 130.84a8.276 8.276 0 1 1-16.468-1.66 8.276 8.276 0 0 1 16.468 1.66",fill:"#FFF"},null),g("path",{d:"M36.975 134.297l10.482 5.943M48.373 146.508l-12.648 10.788",stroke:"#FFF","stroke-width":"2"},null),g("path",{d:"M39.875 159.352a5.667 5.667 0 1 1-11.277-1.136 5.667 5.667 0 0 1 11.277 1.136M57.588 143.247a5.708 5.708 0 1 1-11.358-1.145 5.708 5.708 0 0 1 11.358 1.145M99.018 26.875l29.82-.014a4.587 4.587 0 1 0-.003-9.175l-29.82.013a4.587 4.587 0 1 0 .003 9.176M110.424 45.211l29.82-.013a4.588 4.588 0 0 0-.004-9.175l-29.82.013a4.587 4.587 0 1 0 .004 9.175",fill:"#FFF"},null),g("path",{d:"M112.798 26.861v-.002l15.784-.006a4.588 4.588 0 1 0 .003 9.175l-15.783.007v-.002a4.586 4.586 0 0 0-.004-9.172M184.523 135.668c-.553 5.485-5.447 9.483-10.931 8.93-5.485-.553-9.483-5.448-8.93-10.932.552-5.485 5.447-9.483 10.932-8.93 5.485.553 9.483 5.447 8.93 10.932",fill:"#FFF"},null),g("path",{d:"M179.26 141.75l12.64 7.167M193.006 156.477l-15.255 13.011",stroke:"#FFF","stroke-width":"2"},null),g("path",{d:"M184.668 170.057a6.835 6.835 0 1 1-13.6-1.372 6.835 6.835 0 0 1 13.6 1.372M203.34 153.325a6.885 6.885 0 1 1-13.7-1.382 6.885 6.885 0 0 1 13.7 1.382",fill:"#FFF"},null),g("path",{d:"M151.931 192.324a2.222 2.222 0 1 1-4.444 0 2.222 2.222 0 0 1 4.444 0zM225.27 116.056a2.222 2.222 0 1 1-4.445 0 2.222 2.222 0 0 1 4.444 0zM216.38 151.08a2.223 2.223 0 1 1-4.446-.001 2.223 2.223 0 0 1 4.446 0zM176.917 107.636a2.223 2.223 0 1 1-4.445 0 2.223 2.223 0 0 1 4.445 0zM195.291 92.165a2.223 2.223 0 1 1-4.445 0 2.223 2.223 0 0 1 4.445 0zM202.058 180.711a2.223 2.223 0 1 1-4.446 0 2.223 2.223 0 0 1 4.446 0z",stroke:"#FFF","stroke-width":"2"},null),g("path",{stroke:"#FFF","stroke-width":"2",d:"M214.404 153.302l-1.912 20.184-10.928 5.99M173.661 174.792l-6.356 9.814h-11.36l-4.508 6.484M174.941 125.168v-15.804M220.824 117.25l-12.84 7.901-15.31-7.902V94.39"},null),g("path",{d:"M166.588 65.936h-3.951a4.756 4.756 0 0 1-4.743-4.742 4.756 4.756 0 0 1 4.743-4.743h3.951a4.756 4.756 0 0 1 4.743 4.743 4.756 4.756 0 0 1-4.743 4.742",fill:"#FFF"},null),g("path",{d:"M174.823 30.03c0-16.281 13.198-29.48 29.48-29.48 16.28 0 29.48 13.199 29.48 29.48 0 16.28-13.2 29.48-29.48 29.48-16.282 0-29.48-13.2-29.48-29.48",fill:"#1890FF"},null),g("path",{d:"M205.952 38.387c.5.5.785 1.142.785 1.928s-.286 1.465-.785 1.964c-.572.5-1.214.75-2 .75-.785 0-1.429-.285-1.929-.785-.572-.5-.82-1.143-.82-1.929s.248-1.428.82-1.928c.5-.5 1.144-.75 1.93-.75.785 0 1.462.25 1.999.75m4.285-19.463c1.428 1.249 2.143 2.963 2.143 5.142 0 1.712-.427 3.13-1.219 4.25-.067.096-.137.18-.218.265-.416.429-1.41 1.346-2.956 2.699a5.07 5.07 0 0 0-1.428 1.75 5.207 5.207 0 0 0-.536 2.357v.5h-4.107v-.5c0-1.357.215-2.536.714-3.5.464-.964 1.857-2.464 4.178-4.536l.43-.5c.643-.785.964-1.643.964-2.535 0-1.18-.358-2.108-1-2.785-.678-.68-1.643-1.001-2.858-1.001-1.536 0-2.642.464-3.357 1.43-.37.5-.621 1.135-.76 1.904a1.999 1.999 0 0 1-1.971 1.63h-.004c-1.277 0-2.257-1.183-1.98-2.43.337-1.518 1.02-2.78 2.073-3.784 1.536-1.5 3.607-2.25 6.25-2.25 2.32 0 4.214.607 5.642 1.894",fill:"#FFF"},null),g("path",{d:"M52.04 76.131s21.81 5.36 27.307 15.945c5.575 10.74-6.352 9.26-15.73 4.935-10.86-5.008-24.7-11.822-11.577-20.88",fill:"#FFB594"},null),g("path",{d:"M90.483 67.504l-.449 2.893c-.753.49-4.748-2.663-4.748-2.663l-1.645.748-1.346-5.684s6.815-4.589 8.917-5.018c2.452-.501 9.884.94 10.7 2.278 0 0 1.32.486-2.227.69-3.548.203-5.043.447-6.79 3.132-1.747 2.686-2.412 3.624-2.412 3.624",fill:"#FFC6A0"},null),g("path",{d:"M128.055 111.367c-2.627-7.724-6.15-13.18-8.917-15.478-3.5-2.906-9.34-2.225-11.366-4.187-1.27-1.231-3.215-1.197-3.215-1.197s-14.98-3.158-16.828-3.479c-2.37-.41-2.124-.714-6.054-1.405-1.57-1.907-2.917-1.122-2.917-1.122l-7.11-1.383c-.853-1.472-2.423-1.023-2.423-1.023l-2.468-.897c-1.645 9.976-7.74 13.796-7.74 13.796 1.795 1.122 15.703 8.3 15.703 8.3l5.107 37.11s-3.321 5.694 1.346 9.109c0 0 19.883-3.743 34.921-.329 0 0 3.047-2.546.972-8.806.523-3.01 1.394-8.263 1.736-11.622.385.772 2.019 1.918 3.14 3.477 0 0 9.407-7.365 11.052-14.012-.832-.723-1.598-1.585-2.267-2.453-.567-.736-.358-2.056-.765-2.717-.669-1.084-1.804-1.378-1.907-1.682",fill:"#FFF"},null),g("path",{d:"M101.09 289.998s4.295 2.041 7.354 1.021c2.821-.94 4.53.668 7.08 1.178 2.55.51 6.874 1.1 11.686-1.26-.103-5.51-6.889-3.98-11.96-6.713-2.563-1.38-3.784-4.722-3.598-8.799h-9.402s-1.392 10.52-1.16 14.573",fill:"#CBD1D1"},null),g("path",{d:"M101.067 289.826s2.428 1.271 6.759.653c3.058-.437 3.712.481 7.423 1.031 3.712.55 10.724-.069 11.823-.894.413 1.1-.343 2.063-.343 2.063s-1.512.603-4.812.824c-2.03.136-5.8.291-7.607-.503-1.787-1.375-5.247-1.903-5.728-.241-3.918.95-7.355-.286-7.355-.286l-.16-2.647z",fill:"#2B0849"},null),g("path",{d:"M108.341 276.044h3.094s-.103 6.702 4.536 8.558c-4.64.618-8.558-2.303-7.63-8.558",fill:"#A4AABA"},null),g("path",{d:"M57.542 272.401s-2.107 7.416-4.485 12.306c-1.798 3.695-4.225 7.492 5.465 7.492 6.648 0 8.953-.48 7.423-6.599-1.53-6.12.266-13.199.266-13.199h-8.669z",fill:"#CBD1D1"},null),g("path",{d:"M51.476 289.793s2.097 1.169 6.633 1.169c6.083 0 8.249-1.65 8.249-1.65s.602 1.114-.619 2.165c-.993.855-3.597 1.591-7.39 1.546-4.145-.048-5.832-.566-6.736-1.168-.825-.55-.687-1.58-.137-2.062",fill:"#2B0849"},null),g("path",{d:"M58.419 274.304s.033 1.519-.314 2.93c-.349 1.42-1.078 3.104-1.13 4.139-.058 1.151 4.537 1.58 5.155.034.62-1.547 1.294-6.427 1.913-7.252.619-.825-4.903-2.119-5.624.15",fill:"#A4AABA"},null),g("path",{d:"M99.66 278.514l13.378.092s1.298-54.52 1.853-64.403c.554-9.882 3.776-43.364 1.002-63.128l-12.547-.644-22.849.78s-.434 3.966-1.195 9.976c-.063.496-.682.843-.749 1.365-.075.585.423 1.354.32 1.966-2.364 14.08-6.377 33.104-8.744 46.677-.116.666-1.234 1.009-1.458 2.691-.04.302.211 1.525.112 1.795-6.873 18.744-10.949 47.842-14.277 61.885l14.607-.014s2.197-8.57 4.03-16.97c2.811-12.886 23.111-85.01 23.111-85.01l3.016-.521 1.043 46.35s-.224 1.234.337 2.02c.56.785-.56 1.123-.392 2.244l.392 1.794s-.449 7.178-.898 11.89c-.448 4.71-.092 39.165-.092 39.165",fill:"#7BB2F9"},null),g("path",{d:"M76.085 221.626c1.153.094 4.038-2.019 6.955-4.935M106.36 225.142s2.774-1.11 6.103-3.883",stroke:"#648BD8","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M107.275 222.1s2.773-1.11 6.102-3.884",stroke:"#648BD8","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M74.74 224.767s2.622-.591 6.505-3.365M86.03 151.634c-.27 3.106.3 8.525-4.336 9.123M103.625 149.88s.11 14.012-1.293 15.065c-2.219 1.664-2.99 1.944-2.99 1.944M99.79 150.438s.035 12.88-1.196 24.377M93.673 175.911s7.212-1.664 9.431-1.664M74.31 205.861a212.013 212.013 0 0 1-.979 4.56s-1.458 1.832-1.009 3.776c.449 1.944-.947 2.045-4.985 15.355-1.696 5.59-4.49 18.591-6.348 27.597l-.231 1.12M75.689 197.807a320.934 320.934 0 0 1-.882 4.754M82.591 152.233L81.395 162.7s-1.097.15-.5 2.244c.113 1.346-2.674 15.775-5.18 30.43M56.12 274.418h13.31",stroke:"#648BD8","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M116.241 148.22s-17.047-3.104-35.893.2c.158 2.514-.003 4.15-.003 4.15s14.687-2.818 35.67-.312c.252-2.355.226-4.038.226-4.038",fill:"#192064"},null),g("path",{d:"M106.322 151.165l.003-4.911a.81.81 0 0 0-.778-.815c-2.44-.091-5.066-.108-7.836-.014a.818.818 0 0 0-.789.815l-.003 4.906a.81.81 0 0 0 .831.813c2.385-.06 4.973-.064 7.73.017a.815.815 0 0 0 .842-.81",fill:"#FFF"},null),g("path",{d:"M105.207 150.233l.002-3.076a.642.642 0 0 0-.619-.646 94.321 94.321 0 0 0-5.866-.01.65.65 0 0 0-.63.647v3.072a.64.64 0 0 0 .654.644 121.12 121.12 0 0 1 5.794.011c.362.01.665-.28.665-.642",fill:"#192064"},null),g("path",{d:"M100.263 275.415h12.338M101.436 270.53c.006 3.387.042 5.79.111 6.506M101.451 264.548a915.75 915.75 0 0 0-.015 4.337M100.986 174.965l.898 44.642s.673 1.57-.225 2.692c-.897 1.122 2.468.673.898 2.243-1.57 1.57.897 1.122 0 3.365-.596 1.489-.994 21.1-1.096 35.146",stroke:"#648BD8","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M46.876 83.427s-.516 6.045 7.223 5.552c11.2-.712 9.218-9.345 31.54-21.655-.786-2.708-2.447-4.744-2.447-4.744s-11.068 3.11-22.584 8.046c-6.766 2.9-13.395 6.352-13.732 12.801M104.46 91.057l.941-5.372-8.884-11.43-5.037 5.372-1.74 7.834a.321.321 0 0 0 .108.32c.965.8 6.5 5.013 14.347 3.544a.332.332 0 0 0 .264-.268",fill:"#FFC6A0"},null),g("path",{d:"M93.942 79.387s-4.533-2.853-2.432-6.855c1.623-3.09 4.513 1.133 4.513 1.133s.52-3.642 3.121-3.642c.52-1.04 1.561-4.162 1.561-4.162s11.445 2.601 13.526 3.121c0 5.203-2.304 19.424-7.84 19.861-8.892.703-12.449-9.456-12.449-9.456",fill:"#FFC6A0"},null),g("path",{d:"M113.874 73.446c2.601-2.081 3.47-9.722 3.47-9.722s-2.479-.49-6.64-2.05c-4.683-2.081-12.798-4.747-17.48.976-9.668 3.223-2.05 19.823-2.05 19.823l2.713-3.021s-3.935-3.287-2.08-6.243c2.17-3.462 3.92 1.073 3.92 1.073s.637-2.387 3.581-3.342c.355-.71 1.036-2.674 1.432-3.85a1.073 1.073 0 0 1 1.263-.704c2.4.558 8.677 2.019 11.356 2.662.522.125.871.615.82 1.15l-.305 3.248z",fill:"#520038"},null),g("path",{d:"M104.977 76.064c-.103.61-.582 1.038-1.07.956-.489-.083-.801-.644-.698-1.254.103-.61.582-1.038 1.07-.956.488.082.8.644.698 1.254M112.132 77.694c-.103.61-.582 1.038-1.07.956-.488-.083-.8-.644-.698-1.254.103-.61.582-1.038 1.07-.956.488.082.8.643.698 1.254",fill:"#552950"},null),g("path",{stroke:"#DB836E","stroke-width":"1.118","stroke-linecap":"round","stroke-linejoin":"round",d:"M110.13 74.84l-.896 1.61-.298 4.357h-2.228"},null),g("path",{d:"M110.846 74.481s1.79-.716 2.506.537",stroke:"#5C2552","stroke-width":"1.118","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M92.386 74.282s.477-1.114 1.113-.716c.637.398 1.274 1.433.558 1.99-.717.556.159 1.67.159 1.67",stroke:"#DB836E","stroke-width":"1.118","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M103.287 72.93s1.83 1.113 4.137.954",stroke:"#5C2552","stroke-width":"1.118","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M103.685 81.762s2.227 1.193 4.376 1.193M104.64 84.308s.954.398 1.511.318M94.693 81.205s2.308 7.4 10.424 7.639",stroke:"#DB836E","stroke-width":"1.118","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M81.45 89.384s.45 5.647-4.935 12.787M69 82.654s-.726 9.282-8.204 14.206",stroke:"#E4EBF7","stroke-width":"1.101","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M129.405 122.865s-5.272 7.403-9.422 10.768",stroke:"#E4EBF7","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M119.306 107.329s.452 4.366-2.127 32.062",stroke:"#E4EBF7","stroke-width":"1.101","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M150.028 151.232h-49.837a1.01 1.01 0 0 1-1.01-1.01v-31.688c0-.557.452-1.01 1.01-1.01h49.837c.558 0 1.01.453 1.01 1.01v31.688a1.01 1.01 0 0 1-1.01 1.01",fill:"#F2D7AD"},null),g("path",{d:"M150.29 151.232h-19.863v-33.707h20.784v32.786a.92.92 0 0 1-.92.92",fill:"#F4D19D"},null),g("path",{d:"M123.554 127.896H92.917a.518.518 0 0 1-.425-.816l6.38-9.113c.193-.277.51-.442.85-.442h31.092l-7.26 10.371z",fill:"#F2D7AD"},null),g("path",{fill:"#CC9B6E",d:"M123.689 128.447H99.25v-.519h24.169l7.183-10.26.424.298z"},null),g("path",{d:"M158.298 127.896h-18.669a2.073 2.073 0 0 1-1.659-.83l-7.156-9.541h19.965c.49 0 .95.23 1.244.622l6.69 8.92a.519.519 0 0 1-.415.83",fill:"#F4D19D"},null),g("path",{fill:"#CC9B6E",d:"M157.847 128.479h-19.384l-7.857-10.475.415-.31 7.7 10.266h19.126zM130.554 150.685l-.032-8.177.519-.002.032 8.177z"},null),g("path",{fill:"#CC9B6E",d:"M130.511 139.783l-.08-21.414.519-.002.08 21.414zM111.876 140.932l-.498-.143 1.479-5.167.498.143zM108.437 141.06l-2.679-2.935 2.665-3.434.41.318-2.397 3.089 2.384 2.612zM116.607 141.06l-.383-.35 2.383-2.612-2.397-3.089.41-.318 2.665 3.434z"},null),g("path",{d:"M154.316 131.892l-3.114-1.96.038 3.514-1.043.092c-1.682.115-3.634.23-4.789.23-1.902 0-2.693 2.258 2.23 2.648l-2.645-.596s-2.168 1.317.504 2.3c0 0-1.58 1.217.561 2.58-.584 3.504 5.247 4.058 7.122 3.59 1.876-.47 4.233-2.359 4.487-5.16.28-3.085-.89-5.432-3.35-7.238",fill:"#FFC6A0"},null),g("path",{d:"M153.686 133.577s-6.522.47-8.36.372c-1.836-.098-1.904 2.19 2.359 2.264 3.739.15 5.451-.044 5.451-.044",stroke:"#DB836E","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M145.16 135.877c-1.85 1.346.561 2.355.561 2.355s3.478.898 6.73.617",stroke:"#DB836E","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M151.89 141.71s-6.28.111-6.73-2.132c-.223-1.346.45-1.402.45-1.402M146.114 140.868s-1.103 3.16 5.44 3.533M151.202 129.932v3.477M52.838 89.286c3.533-.337 8.423-1.248 13.582-7.754",stroke:"#DB836E","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M168.567 248.318a6.647 6.647 0 0 1-6.647-6.647v-66.466a6.647 6.647 0 1 1 13.294 0v66.466a6.647 6.647 0 0 1-6.647 6.647",fill:"#5BA02E"},null),g("path",{d:"M176.543 247.653a6.647 6.647 0 0 1-6.646-6.647v-33.232a6.647 6.647 0 1 1 13.293 0v33.232a6.647 6.647 0 0 1-6.647 6.647",fill:"#92C110"},null),g("path",{d:"M186.443 293.613H158.92a3.187 3.187 0 0 1-3.187-3.187v-46.134a3.187 3.187 0 0 1 3.187-3.187h27.524a3.187 3.187 0 0 1 3.187 3.187v46.134a3.187 3.187 0 0 1-3.187 3.187",fill:"#F2D7AD"},null),g("path",{d:"M88.979 89.48s7.776 5.384 16.6 2.842",stroke:"#E4EBF7","stroke-width":"1.101","stroke-linecap":"round","stroke-linejoin":"round"},null)])])},h5e=f5e,p5e=function(){return g("svg",{width:"254",height:"294"},[g("defs",null,[g("path",{d:"M0 .335h253.49v253.49H0z"},null),g("path",{d:"M0 293.665h253.49V.401H0z"},null)]),g("g",{fill:"none","fill-rule":"evenodd"},[g("g",{transform:"translate(0 .067)"},[g("mask",{fill:"#fff"},null),g("path",{d:"M0 128.134v-2.11C0 56.608 56.273.334 125.69.334h2.11c69.416 0 125.69 56.274 125.69 125.69v2.11c0 69.417-56.274 125.69-125.69 125.69h-2.11C56.273 253.824 0 197.551 0 128.134",fill:"#E4EBF7",mask:"url(#b)"},null)]),g("path",{d:"M39.989 132.108a8.332 8.332 0 1 1-16.581-1.671 8.332 8.332 0 0 1 16.58 1.671",fill:"#FFF"},null),g("path",{d:"M37.19 135.59l10.553 5.983M48.665 147.884l-12.734 10.861",stroke:"#FFF","stroke-width":"2"},null),g("path",{d:"M40.11 160.816a5.706 5.706 0 1 1-11.354-1.145 5.706 5.706 0 0 1 11.354 1.145M57.943 144.6a5.747 5.747 0 1 1-11.436-1.152 5.747 5.747 0 0 1 11.436 1.153M99.656 27.434l30.024-.013a4.619 4.619 0 1 0-.004-9.238l-30.024.013a4.62 4.62 0 0 0 .004 9.238M111.14 45.896l30.023-.013a4.62 4.62 0 1 0-.004-9.238l-30.024.013a4.619 4.619 0 1 0 .004 9.238",fill:"#FFF"},null),g("path",{d:"M113.53 27.421v-.002l15.89-.007a4.619 4.619 0 1 0 .005 9.238l-15.892.007v-.002a4.618 4.618 0 0 0-.004-9.234M150.167 70.091h-3.979a4.789 4.789 0 0 1-4.774-4.775 4.788 4.788 0 0 1 4.774-4.774h3.979a4.789 4.789 0 0 1 4.775 4.774 4.789 4.789 0 0 1-4.775 4.775",fill:"#FFF"},null),g("path",{d:"M171.687 30.234c0-16.392 13.289-29.68 29.681-29.68 16.392 0 29.68 13.288 29.68 29.68 0 16.393-13.288 29.681-29.68 29.681s-29.68-13.288-29.68-29.68",fill:"#FF603B"},null),g("path",{d:"M203.557 19.435l-.676 15.035a1.514 1.514 0 0 1-3.026 0l-.675-15.035a2.19 2.19 0 1 1 4.377 0m-.264 19.378c.513.477.77 1.1.77 1.87s-.257 1.393-.77 1.907c-.55.476-1.21.733-1.943.733a2.545 2.545 0 0 1-1.87-.77c-.55-.514-.806-1.136-.806-1.87 0-.77.256-1.393.806-1.87.513-.513 1.137-.733 1.87-.733.77 0 1.43.22 1.943.733",fill:"#FFF"},null),g("path",{d:"M119.3 133.275c4.426-.598 3.612-1.204 4.079-4.778.675-5.18-3.108-16.935-8.262-25.118-1.088-10.72-12.598-11.24-12.598-11.24s4.312 4.895 4.196 16.199c1.398 5.243.804 14.45.804 14.45s5.255 11.369 11.78 10.487",fill:"#FFB594"},null),g("path",{d:"M100.944 91.61s1.463-.583 3.211.582c8.08 1.398 10.368 6.706 11.3 11.368 1.864 1.282 1.864 2.33 1.864 3.496.365.777 1.515 3.03 1.515 3.03s-7.225 1.748-10.954 6.758c-1.399-6.41-6.936-25.235-6.936-25.235",fill:"#FFF"},null),g("path",{d:"M94.008 90.5l1.019-5.815-9.23-11.874-5.233 5.581-2.593 9.863s8.39 5.128 16.037 2.246",fill:"#FFB594"},null),g("path",{d:"M82.931 78.216s-4.557-2.868-2.445-6.892c1.632-3.107 4.537 1.139 4.537 1.139s.524-3.662 3.139-3.662c.523-1.046 1.569-4.184 1.569-4.184s11.507 2.615 13.6 3.138c-.001 5.23-2.317 19.529-7.884 19.969-8.94.706-12.516-9.508-12.516-9.508",fill:"#FFC6A0"},null),g("path",{d:"M102.971 72.243c2.616-2.093 3.489-9.775 3.489-9.775s-2.492-.492-6.676-2.062c-4.708-2.092-12.867-4.771-17.575.982-9.54 4.41-2.062 19.93-2.062 19.93l2.729-3.037s-3.956-3.304-2.092-6.277c2.183-3.48 3.943 1.08 3.943 1.08s.64-2.4 3.6-3.36c.356-.714 1.04-2.69 1.44-3.872a1.08 1.08 0 0 1 1.27-.707c2.41.56 8.723 2.03 11.417 2.676.524.126.876.619.825 1.156l-.308 3.266z",fill:"#520038"},null),g("path",{d:"M101.22 76.514c-.104.613-.585 1.044-1.076.96-.49-.082-.805-.646-.702-1.26.104-.613.585-1.044 1.076-.961.491.083.805.647.702 1.26M94.26 75.074c-.104.613-.585 1.044-1.076.96-.49-.082-.805-.646-.702-1.26.104-.613.585-1.044 1.076-.96.491.082.805.646.702 1.26",fill:"#552950"},null),g("path",{stroke:"#DB836E","stroke-width":"1.063","stroke-linecap":"round","stroke-linejoin":"round",d:"M99.206 73.644l-.9 1.62-.3 4.38h-2.24"},null),g("path",{d:"M99.926 73.284s1.8-.72 2.52.54",stroke:"#5C2552","stroke-width":"1.117","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M81.367 73.084s.48-1.12 1.12-.72c.64.4 1.28 1.44.56 2s.16 1.68.16 1.68",stroke:"#DB836E","stroke-width":"1.117","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M92.326 71.724s1.84 1.12 4.16.96",stroke:"#5C2552","stroke-width":"1.117","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M92.726 80.604s2.24 1.2 4.4 1.2M93.686 83.164s.96.4 1.52.32M83.687 80.044s1.786 6.547 9.262 7.954",stroke:"#DB836E","stroke-width":"1.063","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M95.548 91.663s-1.068 2.821-8.298 2.105c-7.23-.717-10.29-5.044-10.29-5.044",stroke:"#E4EBF7","stroke-width":"1.136","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M78.126 87.478s6.526 4.972 16.47 2.486c0 0 9.577 1.02 11.536 5.322 5.36 11.77.543 36.835 0 39.962 3.496 4.055-.466 8.483-.466 8.483-15.624-3.548-35.81-.6-35.81-.6-4.849-3.546-1.223-9.044-1.223-9.044L62.38 110.32c-2.485-15.227.833-19.803 3.549-20.743 3.03-1.049 8.04-1.282 8.04-1.282.496-.058 1.08-.076 1.37-.233 2.36-1.282 2.787-.583 2.787-.583",fill:"#FFF"},null),g("path",{d:"M65.828 89.81s-6.875.465-7.59 8.156c-.466 8.857 3.03 10.954 3.03 10.954s6.075 22.102 16.796 22.957c8.39-2.176 4.758-6.702 4.661-11.42-.233-11.304-7.108-16.897-7.108-16.897s-4.212-13.75-9.789-13.75",fill:"#FFC6A0"},null),g("path",{d:"M71.716 124.225s.855 11.264 9.828 6.486c4.765-2.536 7.581-13.828 9.789-22.568 1.456-5.768 2.58-12.197 2.58-12.197l-4.973-1.709s-2.408 5.516-7.769 12.275c-4.335 5.467-9.144 11.11-9.455 17.713",fill:"#FFC6A0"},null),g("path",{d:"M108.463 105.191s1.747 2.724-2.331 30.535c2.376 2.216 1.053 6.012-.233 7.51",stroke:"#E4EBF7","stroke-width":"1.085","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M123.262 131.527s-.427 2.732-11.77 1.981c-15.187-1.006-25.326-3.25-25.326-3.25l.933-5.8s.723.215 9.71-.068c11.887-.373 18.714-6.07 24.964-1.022 4.039 3.263 1.489 8.16 1.489 8.16",fill:"#FFC6A0"},null),g("path",{d:"M70.24 90.974s-5.593-4.739-11.054 2.68c-3.318 7.223.517 15.284 2.664 19.578-.31 3.729 2.33 4.311 2.33 4.311s.108.895 1.516 2.68c4.078-7.03 6.72-9.166 13.711-12.546-.328-.656-1.877-3.265-1.825-3.767.175-1.69-1.282-2.623-1.282-2.623s-.286-.156-1.165-2.738c-.788-2.313-2.036-5.177-4.895-7.575",fill:"#FFF"},null),g("path",{d:"M90.232 288.027s4.855 2.308 8.313 1.155c3.188-1.063 5.12.755 8.002 1.331 2.881.577 7.769 1.243 13.207-1.424-.117-6.228-7.786-4.499-13.518-7.588-2.895-1.56-4.276-5.336-4.066-9.944H91.544s-1.573 11.89-1.312 16.47",fill:"#CBD1D1"},null),g("path",{d:"M90.207 287.833s2.745 1.437 7.639.738c3.456-.494 3.223.66 7.418 1.282 4.195.621 13.092-.194 14.334-1.126.466 1.242-.388 2.33-.388 2.33s-1.709.682-5.438.932c-2.295.154-8.098.276-10.14-.621-2.02-1.554-4.894-1.515-6.06-.234-4.427 1.075-7.184-.31-7.184-.31l-.181-2.991z",fill:"#2B0849"},null),g("path",{d:"M98.429 272.257h3.496s-.117 7.574 5.127 9.671c-5.244.7-9.672-2.602-8.623-9.671",fill:"#A4AABA"},null),g("path",{d:"M44.425 272.046s-2.208 7.774-4.702 12.899c-1.884 3.874-4.428 7.854 5.729 7.854 6.97 0 9.385-.503 7.782-6.917-1.604-6.415.279-13.836.279-13.836h-9.088z",fill:"#CBD1D1"},null),g("path",{d:"M38.066 290.277s2.198 1.225 6.954 1.225c6.376 0 8.646-1.73 8.646-1.73s.63 1.168-.649 2.27c-1.04.897-3.77 1.668-7.745 1.621-4.347-.05-6.115-.593-7.062-1.224-.864-.577-.72-1.657-.144-2.162",fill:"#2B0849"},null),g("path",{d:"M45.344 274.041s.035 1.592-.329 3.07c-.365 1.49-1.13 3.255-1.184 4.34-.061 1.206 4.755 1.657 5.403.036.65-1.622 1.357-6.737 2.006-7.602.648-.865-5.14-2.222-5.896.156",fill:"#A4AABA"},null),g("path",{d:"M89.476 277.57l13.899.095s1.349-56.643 1.925-66.909c.576-10.267 3.923-45.052 1.042-65.585l-13.037-.669-23.737.81s-.452 4.12-1.243 10.365c-.065.515-.708.874-.777 1.417-.078.608.439 1.407.332 2.044-2.455 14.627-5.797 32.736-8.256 46.837-.121.693-1.282 1.048-1.515 2.796-.042.314.22 1.584.116 1.865-7.14 19.473-12.202 52.601-15.66 67.19l15.176-.015s2.282-10.145 4.185-18.871c2.922-13.389 24.012-88.32 24.012-88.32l3.133-.954-.158 48.568s-.233 1.282.35 2.098c.583.815-.581 1.167-.408 2.331l.408 1.864s-.466 7.458-.932 12.352c-.467 4.895 1.145 40.69 1.145 40.69",fill:"#7BB2F9"},null),g("path",{d:"M64.57 218.881c1.197.099 4.195-2.097 7.225-5.127M96.024 222.534s2.881-1.152 6.34-4.034",stroke:"#648BD8","stroke-width":"1.085","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M96.973 219.373s2.882-1.153 6.34-4.034",stroke:"#648BD8","stroke-width":"1.032","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M63.172 222.144s2.724-.614 6.759-3.496M74.903 146.166c-.281 3.226.31 8.856-4.506 9.478M93.182 144.344s.115 14.557-1.344 15.65c-2.305 1.73-3.107 2.02-3.107 2.02M89.197 144.923s.269 13.144-1.01 25.088M83.525 170.71s6.81-1.051 9.116-1.051M46.026 270.045l-.892 4.538M46.937 263.289l-.815 4.157M62.725 202.503c-.33 1.618-.102 1.904-.449 3.438 0 0-2.756 1.903-2.29 3.923.466 2.02-.31 3.424-4.505 17.252-1.762 5.807-4.233 18.922-6.165 28.278-.03.144-.521 2.646-1.14 5.8M64.158 194.136c-.295 1.658-.6 3.31-.917 4.938M71.33 146.787l-1.244 10.877s-1.14.155-.519 2.33c.117 1.399-2.778 16.39-5.382 31.615M44.242 273.727H58.07",stroke:"#648BD8","stroke-width":"1.085","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M106.18 142.117c-3.028-.489-18.825-2.744-36.219.2a.625.625 0 0 0-.518.644c.063 1.307.044 2.343.015 2.995a.617.617 0 0 0 .716.636c3.303-.534 17.037-2.412 35.664-.266.347.04.66-.214.692-.56.124-1.347.16-2.425.17-3.029a.616.616 0 0 0-.52-.62",fill:"#192064"},null),g("path",{d:"M96.398 145.264l.003-5.102a.843.843 0 0 0-.809-.847 114.104 114.104 0 0 0-8.141-.014.85.85 0 0 0-.82.847l-.003 5.097c0 .476.388.857.864.845 2.478-.064 5.166-.067 8.03.017a.848.848 0 0 0 .876-.843",fill:"#FFF"},null),g("path",{d:"M95.239 144.296l.002-3.195a.667.667 0 0 0-.643-.672c-1.9-.061-3.941-.073-6.094-.01a.675.675 0 0 0-.654.672l-.002 3.192c0 .376.305.677.68.669 1.859-.042 3.874-.043 6.02.012.376.01.69-.291.691-.668",fill:"#192064"},null),g("path",{d:"M90.102 273.522h12.819M91.216 269.761c.006 3.519-.072 5.55 0 6.292M90.923 263.474c-.009 1.599-.016 2.558-.016 4.505M90.44 170.404l.932 46.38s.7 1.631-.233 2.796c-.932 1.166 2.564.7.932 2.33-1.63 1.633.933 1.166 0 3.497-.618 1.546-1.031 21.921-1.138 36.513",stroke:"#648BD8","stroke-width":"1.085","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M73.736 98.665l2.214 4.312s2.098.816 1.865 2.68l.816 2.214M64.297 116.611c.233-.932 2.176-7.147 12.585-10.488M77.598 90.042s7.691 6.137 16.547 2.72",stroke:"#E4EBF7","stroke-width":"1.085","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M91.974 86.954s5.476-.816 7.574-4.545c1.297-.345.72 2.212-.33 3.671-.7.971-1.01 1.554-1.01 1.554s.194.31.155.816c-.053.697-.175.653-.272 1.048-.081.335.108.657 0 1.049-.046.17-.198.5-.382.878-.12.249-.072.687-.2.948-.231.469-1.562 1.87-2.622 2.855-3.826 3.554-5.018 1.644-6.001-.408-.894-1.865-.661-5.127-.874-6.875-.35-2.914-2.622-3.03-1.923-4.429.343-.685 2.87.69 3.263 1.748.757 2.04 2.952 1.807 2.622 1.69",fill:"#FFC6A0"},null),g("path",{d:"M99.8 82.429c-.465.077-.35.272-.97 1.243-.622.971-4.817 2.932-6.39 3.224-2.589.48-2.278-1.56-4.254-2.855-1.69-1.107-3.562-.638-1.398 1.398.99.932.932 1.107 1.398 3.205.335 1.506-.64 3.67.7 5.593",stroke:"#DB836E","stroke-width":".774","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M79.543 108.673c-2.1 2.926-4.266 6.175-5.557 8.762",stroke:"#E59788","stroke-width":".774","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M87.72 124.768s-2.098-1.942-5.127-2.719c-3.03-.777-3.574-.155-5.516.078-1.942.233-3.885-.932-3.652.7.233 1.63 5.05 1.01 5.206 2.097.155 1.087-6.37 2.796-8.313 2.175-.777.777.466 1.864 2.02 2.175.233 1.554 2.253 1.554 2.253 1.554s.699 1.01 2.641 1.088c2.486 1.32 8.934-.7 10.954-1.554 2.02-.855-.466-5.594-.466-5.594",fill:"#FFC6A0"},null),g("path",{d:"M73.425 122.826s.66 1.127 3.167 1.418c2.315.27 2.563.583 2.563.583s-2.545 2.894-9.07 2.272M72.416 129.274s3.826.097 4.933-.718M74.98 130.75s1.961.136 3.36-.505M77.232 131.916s1.748.019 2.914-.505M73.328 122.321s-.595-1.032 1.262-.427c1.671.544 2.833.055 5.128.155 1.389.061 3.067-.297 3.982.15 1.606.784 3.632 2.181 3.632 2.181s10.526 1.204 19.033-1.127M78.864 108.104s-8.39 2.758-13.168 12.12",stroke:"#E59788","stroke-width":".774","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M109.278 112.533s3.38-3.613 7.575-4.662",stroke:"#E4EBF7","stroke-width":"1.085","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M107.375 123.006s9.697-2.745 11.445-.88",stroke:"#E59788","stroke-width":".774","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M194.605 83.656l3.971-3.886M187.166 90.933l3.736-3.655M191.752 84.207l-4.462-4.56M198.453 91.057l-4.133-4.225M129.256 163.074l3.718-3.718M122.291 170.039l3.498-3.498M126.561 163.626l-4.27-4.27M132.975 170.039l-3.955-3.955",stroke:"#BFCDDD","stroke-width":"2","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M190.156 211.779h-1.604a4.023 4.023 0 0 1-4.011-4.011V175.68a4.023 4.023 0 0 1 4.01-4.01h1.605a4.023 4.023 0 0 1 4.011 4.01v32.088a4.023 4.023 0 0 1-4.01 4.01",fill:"#A3B4C6"},null),g("path",{d:"M237.824 212.977a4.813 4.813 0 0 1-4.813 4.813h-86.636a4.813 4.813 0 0 1 0-9.626h86.636a4.813 4.813 0 0 1 4.813 4.813",fill:"#A3B4C6"},null),g("mask",{fill:"#fff"},null),g("path",{fill:"#A3B4C6",mask:"url(#d)",d:"M154.098 190.096h70.513v-84.617h-70.513z"},null),g("path",{d:"M224.928 190.096H153.78a3.219 3.219 0 0 1-3.208-3.209V167.92a3.219 3.219 0 0 1 3.208-3.21h71.148a3.219 3.219 0 0 1 3.209 3.21v18.967a3.219 3.219 0 0 1-3.21 3.209M224.928 130.832H153.78a3.218 3.218 0 0 1-3.208-3.208v-18.968a3.219 3.219 0 0 1 3.208-3.209h71.148a3.219 3.219 0 0 1 3.209 3.21v18.967a3.218 3.218 0 0 1-3.21 3.208",fill:"#BFCDDD",mask:"url(#d)"},null),g("path",{d:"M159.563 120.546a2.407 2.407 0 1 1 0-4.813 2.407 2.407 0 0 1 0 4.813M166.98 120.546a2.407 2.407 0 1 1 0-4.813 2.407 2.407 0 0 1 0 4.813M174.397 120.546a2.407 2.407 0 1 1 0-4.813 2.407 2.407 0 0 1 0 4.813M222.539 120.546h-22.461a.802.802 0 0 1-.802-.802v-3.208c0-.443.359-.803.802-.803h22.46c.444 0 .803.36.803.803v3.208c0 .443-.36.802-.802.802",fill:"#FFF",mask:"url(#d)"},null),g("path",{d:"M224.928 160.464H153.78a3.218 3.218 0 0 1-3.208-3.209v-18.967a3.219 3.219 0 0 1 3.208-3.209h71.148a3.219 3.219 0 0 1 3.209 3.209v18.967a3.218 3.218 0 0 1-3.21 3.209",fill:"#BFCDDD",mask:"url(#d)"},null),g("path",{d:"M173.455 130.832h49.301M164.984 130.832h6.089M155.952 130.832h6.75M173.837 160.613h49.3M165.365 160.613h6.089M155.57 160.613h6.751",stroke:"#7C90A5","stroke-width":"1.124","stroke-linecap":"round","stroke-linejoin":"round",mask:"url(#d)"},null),g("path",{d:"M159.563 151.038a2.407 2.407 0 1 1 0-4.814 2.407 2.407 0 0 1 0 4.814M166.98 151.038a2.407 2.407 0 1 1 0-4.814 2.407 2.407 0 0 1 0 4.814M174.397 151.038a2.407 2.407 0 1 1 .001-4.814 2.407 2.407 0 0 1 0 4.814M222.539 151.038h-22.461a.802.802 0 0 1-.802-.802v-3.209c0-.443.359-.802.802-.802h22.46c.444 0 .803.36.803.802v3.209c0 .443-.36.802-.802.802M159.563 179.987a2.407 2.407 0 1 1 0-4.813 2.407 2.407 0 0 1 0 4.813M166.98 179.987a2.407 2.407 0 1 1 0-4.813 2.407 2.407 0 0 1 0 4.813M174.397 179.987a2.407 2.407 0 1 1 0-4.813 2.407 2.407 0 0 1 0 4.813M222.539 179.987h-22.461a.802.802 0 0 1-.802-.802v-3.209c0-.443.359-.802.802-.802h22.46c.444 0 .803.36.803.802v3.209c0 .443-.36.802-.802.802",fill:"#FFF",mask:"url(#d)"},null),g("path",{d:"M203.04 221.108h-27.372a2.413 2.413 0 0 1-2.406-2.407v-11.448a2.414 2.414 0 0 1 2.406-2.407h27.372a2.414 2.414 0 0 1 2.407 2.407V218.7a2.413 2.413 0 0 1-2.407 2.407",fill:"#BFCDDD",mask:"url(#d)"},null),g("path",{d:"M177.259 207.217v11.52M201.05 207.217v11.52",stroke:"#A3B4C6","stroke-width":"1.124","stroke-linecap":"round","stroke-linejoin":"round",mask:"url(#d)"},null),g("path",{d:"M162.873 267.894a9.422 9.422 0 0 1-9.422-9.422v-14.82a9.423 9.423 0 0 1 18.845 0v14.82a9.423 9.423 0 0 1-9.423 9.422",fill:"#5BA02E",mask:"url(#d)"},null),g("path",{d:"M171.22 267.83a9.422 9.422 0 0 1-9.422-9.423v-3.438a9.423 9.423 0 0 1 18.845 0v3.438a9.423 9.423 0 0 1-9.422 9.423",fill:"#92C110",mask:"url(#d)"},null),g("path",{d:"M181.31 293.666h-27.712a3.209 3.209 0 0 1-3.209-3.21V269.79a3.209 3.209 0 0 1 3.209-3.21h27.711a3.209 3.209 0 0 1 3.209 3.21v20.668a3.209 3.209 0 0 1-3.209 3.209",fill:"#F2D7AD",mask:"url(#d)"},null)])])},v5e=p5e,m5e=function(){return g("svg",{width:"251",height:"294"},[g("g",{fill:"none","fill-rule":"evenodd"},[g("path",{d:"M0 129.023v-2.084C0 58.364 55.591 2.774 124.165 2.774h2.085c68.574 0 124.165 55.59 124.165 124.165v2.084c0 68.575-55.59 124.166-124.165 124.166h-2.085C55.591 253.189 0 197.598 0 129.023",fill:"#E4EBF7"},null),g("path",{d:"M41.417 132.92a8.231 8.231 0 1 1-16.38-1.65 8.231 8.231 0 0 1 16.38 1.65",fill:"#FFF"},null),g("path",{d:"M38.652 136.36l10.425 5.91M49.989 148.505l-12.58 10.73",stroke:"#FFF","stroke-width":"2"},null),g("path",{d:"M41.536 161.28a5.636 5.636 0 1 1-11.216-1.13 5.636 5.636 0 0 1 11.216 1.13M59.154 145.261a5.677 5.677 0 1 1-11.297-1.138 5.677 5.677 0 0 1 11.297 1.138M100.36 29.516l29.66-.013a4.562 4.562 0 1 0-.004-9.126l-29.66.013a4.563 4.563 0 0 0 .005 9.126M111.705 47.754l29.659-.013a4.563 4.563 0 1 0-.004-9.126l-29.66.013a4.563 4.563 0 1 0 .005 9.126",fill:"#FFF"},null),g("path",{d:"M114.066 29.503V29.5l15.698-.007a4.563 4.563 0 1 0 .004 9.126l-15.698.007v-.002a4.562 4.562 0 0 0-.004-9.122M185.405 137.723c-.55 5.455-5.418 9.432-10.873 8.882-5.456-.55-9.432-5.418-8.882-10.873.55-5.455 5.418-9.432 10.873-8.882 5.455.55 9.432 5.418 8.882 10.873",fill:"#FFF"},null),g("path",{d:"M180.17 143.772l12.572 7.129M193.841 158.42L178.67 171.36",stroke:"#FFF","stroke-width":"2"},null),g("path",{d:"M185.55 171.926a6.798 6.798 0 1 1-13.528-1.363 6.798 6.798 0 0 1 13.527 1.363M204.12 155.285a6.848 6.848 0 1 1-13.627-1.375 6.848 6.848 0 0 1 13.626 1.375",fill:"#FFF"},null),g("path",{d:"M152.988 194.074a2.21 2.21 0 1 1-4.42 0 2.21 2.21 0 0 1 4.42 0zM225.931 118.217a2.21 2.21 0 1 1-4.421 0 2.21 2.21 0 0 1 4.421 0zM217.09 153.051a2.21 2.21 0 1 1-4.421 0 2.21 2.21 0 0 1 4.42 0zM177.84 109.842a2.21 2.21 0 1 1-4.422 0 2.21 2.21 0 0 1 4.421 0zM196.114 94.454a2.21 2.21 0 1 1-4.421 0 2.21 2.21 0 0 1 4.421 0zM202.844 182.523a2.21 2.21 0 1 1-4.42 0 2.21 2.21 0 0 1 4.42 0z",stroke:"#FFF","stroke-width":"2"},null),g("path",{stroke:"#FFF","stroke-width":"2",d:"M215.125 155.262l-1.902 20.075-10.87 5.958M174.601 176.636l-6.322 9.761H156.98l-4.484 6.449M175.874 127.28V111.56M221.51 119.404l-12.77 7.859-15.228-7.86V96.668"},null),g("path",{d:"M180.68 29.32C180.68 13.128 193.806 0 210 0c16.193 0 29.32 13.127 29.32 29.32 0 16.194-13.127 29.322-29.32 29.322-16.193 0-29.32-13.128-29.32-29.321",fill:"#A26EF4"},null),g("path",{d:"M221.45 41.706l-21.563-.125a1.744 1.744 0 0 1-1.734-1.754l.071-12.23a1.744 1.744 0 0 1 1.754-1.734l21.562.125c.964.006 1.74.791 1.735 1.755l-.071 12.229a1.744 1.744 0 0 1-1.754 1.734",fill:"#FFF"},null),g("path",{d:"M215.106 29.192c-.015 2.577-2.049 4.654-4.543 4.64-2.494-.014-4.504-2.115-4.489-4.693l.04-6.925c.016-2.577 2.05-4.654 4.543-4.64 2.494.015 4.504 2.116 4.49 4.693l-.04 6.925zm-4.53-14.074a6.877 6.877 0 0 0-6.916 6.837l-.043 7.368a6.877 6.877 0 0 0 13.754.08l.042-7.368a6.878 6.878 0 0 0-6.837-6.917zM167.566 68.367h-3.93a4.73 4.73 0 0 1-4.717-4.717 4.73 4.73 0 0 1 4.717-4.717h3.93a4.73 4.73 0 0 1 4.717 4.717 4.73 4.73 0 0 1-4.717 4.717",fill:"#FFF"},null),g("path",{d:"M168.214 248.838a6.611 6.611 0 0 1-6.61-6.611v-66.108a6.611 6.611 0 0 1 13.221 0v66.108a6.611 6.611 0 0 1-6.61 6.61",fill:"#5BA02E"},null),g("path",{d:"M176.147 248.176a6.611 6.611 0 0 1-6.61-6.61v-33.054a6.611 6.611 0 1 1 13.221 0v33.053a6.611 6.611 0 0 1-6.61 6.611",fill:"#92C110"},null),g("path",{d:"M185.994 293.89h-27.376a3.17 3.17 0 0 1-3.17-3.17v-45.887a3.17 3.17 0 0 1 3.17-3.17h27.376a3.17 3.17 0 0 1 3.17 3.17v45.886a3.17 3.17 0 0 1-3.17 3.17",fill:"#F2D7AD"},null),g("path",{d:"M81.972 147.673s6.377-.927 17.566-1.28c11.729-.371 17.57 1.086 17.57 1.086s3.697-3.855.968-8.424c1.278-12.077 5.982-32.827.335-48.273-1.116-1.339-3.743-1.512-7.536-.62-1.337.315-7.147-.149-7.983-.1l-15.311-.347s-3.487-.17-8.035-.508c-1.512-.113-4.227-1.683-5.458-.338-.406.443-2.425 5.669-1.97 16.077l8.635 35.642s-3.141 3.61 1.219 7.085",fill:"#FFF"},null),g("path",{d:"M75.768 73.325l-.9-6.397 11.982-6.52s7.302-.118 8.038 1.205c.737 1.324-5.616.993-5.616.993s-1.836 1.388-2.615 2.5c-1.654 2.363-.986 6.471-8.318 5.986-1.708.284-2.57 2.233-2.57 2.233",fill:"#FFC6A0"},null),g("path",{d:"M52.44 77.672s14.217 9.406 24.973 14.444c1.061.497-2.094 16.183-11.892 11.811-7.436-3.318-20.162-8.44-21.482-14.496-.71-3.258 2.543-7.643 8.401-11.76M141.862 80.113s-6.693 2.999-13.844 6.876c-3.894 2.11-10.137 4.704-12.33 7.988-6.224 9.314 3.536 11.22 12.947 7.503 6.71-2.651 28.999-12.127 13.227-22.367",fill:"#FFB594"},null),g("path",{d:"M76.166 66.36l3.06 3.881s-2.783 2.67-6.31 5.747c-7.103 6.195-12.803 14.296-15.995 16.44-3.966 2.662-9.754 3.314-12.177-.118-3.553-5.032.464-14.628 31.422-25.95",fill:"#FFC6A0"},null),g("path",{d:"M64.674 85.116s-2.34 8.413-8.912 14.447c.652.548 18.586 10.51 22.144 10.056 5.238-.669 6.417-18.968 1.145-20.531-.702-.208-5.901-1.286-8.853-2.167-.87-.26-1.611-1.71-3.545-.936l-1.98-.869zM128.362 85.826s5.318 1.956 7.325 13.734c-.546.274-17.55 12.35-21.829 7.805-6.534-6.94-.766-17.393 4.275-18.61 4.646-1.121 5.03-1.37 10.23-2.929",fill:"#FFF"},null),g("path",{d:"M78.18 94.656s.911 7.41-4.914 13.078",stroke:"#E4EBF7","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M87.397 94.68s3.124 2.572 10.263 2.572c7.14 0 9.074-3.437 9.074-3.437",stroke:"#E4EBF7","stroke-width":".932","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M117.184 68.639l-6.781-6.177s-5.355-4.314-9.223-.893c-3.867 3.422 4.463 2.083 5.653 4.165 1.19 2.082.848 1.143-2.083.446-5.603-1.331-2.082.893 2.975 5.355 2.091 1.845 6.992.955 6.992.955l2.467-3.851z",fill:"#FFC6A0"},null),g("path",{d:"M105.282 91.315l-.297-10.937-15.918-.027-.53 10.45c-.026.403.17.788.515.999 2.049 1.251 9.387 5.093 15.799.424.287-.21.443-.554.431-.91",fill:"#FFB594"},null),g("path",{d:"M107.573 74.24c.817-1.147.982-9.118 1.015-11.928a1.046 1.046 0 0 0-.965-1.055l-4.62-.365c-7.71-1.044-17.071.624-18.253 6.346-5.482 5.813-.421 13.244-.421 13.244s1.963 3.566 4.305 6.791c.756 1.041.398-3.731 3.04-5.929 5.524-4.594 15.899-7.103 15.899-7.103",fill:"#5C2552"},null),g("path",{d:"M88.426 83.206s2.685 6.202 11.602 6.522c7.82.28 8.973-7.008 7.434-17.505l-.909-5.483c-6.118-2.897-15.478.54-15.478.54s-.576 2.044-.19 5.504c-2.276 2.066-1.824 5.618-1.824 5.618s-.905-1.922-1.98-2.321c-.86-.32-1.897.089-2.322 1.98-1.04 4.632 3.667 5.145 3.667 5.145",fill:"#FFC6A0"},null),g("path",{stroke:"#DB836E","stroke-width":"1.145","stroke-linecap":"round","stroke-linejoin":"round",d:"M100.843 77.099l1.701-.928-1.015-4.324.674-1.406"},null),g("path",{d:"M105.546 74.092c-.022.713-.452 1.279-.96 1.263-.51-.016-.904-.607-.882-1.32.021-.713.452-1.278.96-1.263.51.016.904.607.882 1.32M97.592 74.349c-.022.713-.452 1.278-.961 1.263-.509-.016-.904-.607-.882-1.32.022-.713.452-1.279.961-1.263.51.016.904.606.882 1.32",fill:"#552950"},null),g("path",{d:"M91.132 86.786s5.269 4.957 12.679 2.327",stroke:"#DB836E","stroke-width":"1.145","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M99.776 81.903s-3.592.232-1.44-2.79c1.59-1.496 4.897-.46 4.897-.46s1.156 3.906-3.457 3.25",fill:"#DB836E"},null),g("path",{d:"M102.88 70.6s2.483.84 3.402.715M93.883 71.975s2.492-1.144 4.778-1.073",stroke:"#5C2552","stroke-width":"1.526","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M86.32 77.374s.961.879 1.458 2.106c-.377.48-1.033 1.152-.236 1.809M99.337 83.719s1.911.151 2.509-.254",stroke:"#DB836E","stroke-width":"1.145","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M87.782 115.821l15.73-3.012M100.165 115.821l10.04-2.008",stroke:"#E4EBF7","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M66.508 86.763s-1.598 8.83-6.697 14.078",stroke:"#E4EBF7","stroke-width":"1.114","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M128.31 87.934s3.013 4.121 4.06 11.785",stroke:"#E4EBF7","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M64.09 84.816s-6.03 9.912-13.607 9.903",stroke:"#DB836E","stroke-width":".795","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M112.366 65.909l-.142 5.32s5.993 4.472 11.945 9.202c4.482 3.562 8.888 7.455 10.985 8.662 4.804 2.766 8.9 3.355 11.076 1.808 4.071-2.894 4.373-9.878-8.136-15.263-4.271-1.838-16.144-6.36-25.728-9.73",fill:"#FFC6A0"},null),g("path",{d:"M130.532 85.488s4.588 5.757 11.619 6.214",stroke:"#DB836E","stroke-width":".75","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M121.708 105.73s-.393 8.564-1.34 13.612",stroke:"#E4EBF7","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M115.784 161.512s-3.57-1.488-2.678-7.14",stroke:"#648BD8","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M101.52 290.246s4.326 2.057 7.408 1.03c2.842-.948 4.564.673 7.132 1.186 2.57.514 6.925 1.108 11.772-1.269-.104-5.551-6.939-4.01-12.048-6.763-2.582-1.39-3.812-4.757-3.625-8.863h-9.471s-1.402 10.596-1.169 14.68",fill:"#CBD1D1"},null),g("path",{d:"M101.496 290.073s2.447 1.281 6.809.658c3.081-.44 3.74.485 7.479 1.039 3.739.554 10.802-.07 11.91-.9.415 1.108-.347 2.077-.347 2.077s-1.523.608-4.847.831c-2.045.137-5.843.293-7.663-.507-1.8-1.385-5.286-1.917-5.77-.243-3.947.958-7.41-.288-7.41-.288l-.16-2.667z",fill:"#2B0849"},null),g("path",{d:"M108.824 276.19h3.116s-.103 6.751 4.57 8.62c-4.673.624-8.62-2.32-7.686-8.62",fill:"#A4AABA"},null),g("path",{d:"M57.65 272.52s-2.122 7.47-4.518 12.396c-1.811 3.724-4.255 7.548 5.505 7.548 6.698 0 9.02-.483 7.479-6.648-1.541-6.164.268-13.296.268-13.296H57.65z",fill:"#CBD1D1"},null),g("path",{d:"M51.54 290.04s2.111 1.178 6.682 1.178c6.128 0 8.31-1.662 8.31-1.662s.605 1.122-.624 2.18c-1 .862-3.624 1.603-7.444 1.559-4.177-.049-5.876-.57-6.786-1.177-.831-.554-.692-1.593-.138-2.078",fill:"#2B0849"},null),g("path",{d:"M58.533 274.438s.034 1.529-.315 2.95c-.352 1.431-1.087 3.127-1.139 4.17-.058 1.16 4.57 1.592 5.194.035.623-1.559 1.303-6.475 1.927-7.306.622-.831-4.94-2.135-5.667.15",fill:"#A4AABA"},null),g("path",{d:"M100.885 277.015l13.306.092s1.291-54.228 1.843-64.056c.552-9.828 3.756-43.13.997-62.788l-12.48-.64-22.725.776s-.433 3.944-1.19 9.921c-.062.493-.677.838-.744 1.358-.075.582.42 1.347.318 1.956-2.35 14.003-6.343 32.926-8.697 46.425-.116.663-1.227 1.004-1.45 2.677-.04.3.21 1.516.112 1.785-6.836 18.643-10.89 47.584-14.2 61.551l14.528-.014s2.185-8.524 4.008-16.878c2.796-12.817 22.987-84.553 22.987-84.553l3-.517 1.037 46.1s-.223 1.228.334 2.008c.558.782-.556 1.117-.39 2.233l.39 1.784s-.446 7.14-.892 11.826c-.446 4.685-.092 38.954-.092 38.954",fill:"#7BB2F9"},null),g("path",{d:"M77.438 220.434c1.146.094 4.016-2.008 6.916-4.91M107.55 223.931s2.758-1.103 6.069-3.862",stroke:"#648BD8","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M108.459 220.905s2.759-1.104 6.07-3.863",stroke:"#648BD8","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M76.099 223.557s2.608-.587 6.47-3.346M87.33 150.82c-.27 3.088.297 8.478-4.315 9.073M104.829 149.075s.11 13.936-1.286 14.983c-2.207 1.655-2.975 1.934-2.975 1.934M101.014 149.63s.035 12.81-1.19 24.245M94.93 174.965s7.174-1.655 9.38-1.655M75.671 204.754c-.316 1.55-.64 3.067-.973 4.535 0 0-1.45 1.822-1.003 3.756.446 1.934-.943 2.034-4.96 15.273-1.686 5.559-4.464 18.49-6.313 27.447-.078.38-4.018 18.06-4.093 18.423M77.043 196.743a313.269 313.269 0 0 1-.877 4.729M83.908 151.414l-1.19 10.413s-1.091.148-.496 2.23c.111 1.34-2.66 15.692-5.153 30.267M57.58 272.94h13.238",stroke:"#648BD8","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null),g("path",{d:"M117.377 147.423s-16.955-3.087-35.7.199c.157 2.501-.002 4.128-.002 4.128s14.607-2.802 35.476-.31c.251-2.342.226-4.017.226-4.017",fill:"#192064"},null),g("path",{d:"M107.511 150.353l.004-4.885a.807.807 0 0 0-.774-.81c-2.428-.092-5.04-.108-7.795-.014a.814.814 0 0 0-.784.81l-.003 4.88c0 .456.371.82.827.808a140.76 140.76 0 0 1 7.688.017.81.81 0 0 0 .837-.806",fill:"#FFF"},null),g("path",{d:"M106.402 149.426l.002-3.06a.64.64 0 0 0-.616-.643 94.135 94.135 0 0 0-5.834-.009.647.647 0 0 0-.626.643l-.001 3.056c0 .36.291.648.651.64 1.78-.04 3.708-.041 5.762.012.36.009.662-.279.662-.64",fill:"#192064"},null),g("path",{d:"M101.485 273.933h12.272M102.652 269.075c.006 3.368.04 5.759.11 6.47M102.667 263.125c-.009 1.53-.015 2.98-.016 4.313M102.204 174.024l.893 44.402s.669 1.561-.224 2.677c-.892 1.116 2.455.67.893 2.231-1.562 1.562.893 1.116 0 3.347-.592 1.48-.988 20.987-1.09 34.956",stroke:"#648BD8","stroke-width":"1.051","stroke-linecap":"round","stroke-linejoin":"round"},null)])])},g5e=m5e,y5e={success:zf,error:Yr,info:Hf,warning:d5e},Kf={"404":h5e,"500":v5e,"403":g5e},b5e=Object.keys(Kf),C5e={prefixCls:u.string,icon:u.any,status:u.oneOf(rt("success","error","info","warning","404","403","500")).def("info"),title:u.any,subTitle:u.any,extra:u.any},w5e=function(t,n){var r=n.status,a=n.icon;if(b5e.includes("".concat(r))){var o=Kf[r];return g("div",{class:"".concat(t,"-icon ").concat(t,"-image")},[g(o,null,null)])}var i=y5e[r],l=a||g(i,null,null);return g("div",{class:"".concat(t,"-icon")},[l])},S5e=function(t,n){return n&&g("div",{class:"".concat(t,"-extra")},[n])},ds=G({name:"AResult",props:C5e,slots:["title","subTitle","icon","extra"],setup:function(t,n){var r=n.slots,a=Wt("result",t),o=a.prefixCls,i=a.direction,l=x(function(){return Se(o.value,"".concat(o.value,"-").concat(t.status),V({},"".concat(o.value,"-rtl"),i.value==="rtl"))});return function(){var s,c,d,f,p,v,m,y,b=(s=t.title)!==null&&s!==void 0?s:(c=r.title)===null||c===void 0?void 0:c.call(r),C=(d=t.subTitle)!==null&&d!==void 0?d:(f=r.subTitle)===null||f===void 0?void 0:f.call(r),S=(p=t.icon)!==null&&p!==void 0?p:(v=r.icon)===null||v===void 0?void 0:v.call(r),w=(m=t.extra)!==null&&m!==void 0?m:(y=r.extra)===null||y===void 0?void 0:y.call(r),k=o.value;return g("div",{class:l.value},[w5e(k,{status:t.status,icon:S}),g("div",{class:"".concat(k,"-title")},[b]),C&&g("div",{class:"".concat(k,"-subtitle")},[C]),S5e(k,w),r.default&&g("div",{class:"".concat(k,"-content")},[r.default()])])}}});ds.PRESENTED_IMAGE_403=Kf[403];ds.PRESENTED_IMAGE_404=Kf[404];ds.PRESENTED_IMAGE_500=Kf[500];ds.install=function(e){return e.component(ds.name,ds),e};var k5e=ds,$5e=G({name:"ASkeletonButton",props:P(P({},Ng()),{size:u.oneOf(rt("large","small","default"))}),setup:function(t){var n=Wt("skeleton",t),r=n.prefixCls,a=x(function(){return Se(r.value,"".concat(r.value,"-element"),V({},"".concat(r.value,"-active"),t.active))});return function(){return g("div",{class:a.value},[g(Ag,le(le({},t),{},{prefixCls:"".concat(r.value,"-button")}),null)])}}}),NS=$5e,O5e=G({name:"ASkeletonInput",props:P(P({},bn(Ng(),"shape")),{size:u.oneOf(rt("large","small","default"))}),setup:function(t){var n=Wt("skeleton",t),r=n.prefixCls,a=x(function(){return Se(r.value,"".concat(r.value,"-element"),V({},"".concat(r.value,"-active"),t.active))});return function(){return g("div",{class:a.value},[g(Ag,le(le({},t),{},{prefixCls:"".concat(r.value,"-input")}),null)])}}}),AS=O5e,P5e="M365.714286 329.142857q0 45.714286-32.036571 77.677714t-77.677714 32.036571-77.677714-32.036571-32.036571-77.677714 32.036571-77.677714 77.677714-32.036571 77.677714 32.036571 32.036571 77.677714zM950.857143 548.571429l0 256-804.571429 0 0-109.714286 182.857143-182.857143 91.428571 91.428571 292.571429-292.571429zM1005.714286 146.285714l-914.285714 0q-7.460571 0-12.873143 5.412571t-5.412571 12.873143l0 694.857143q0 7.460571 5.412571 12.873143t12.873143 5.412571l914.285714 0q7.460571 0 12.873143-5.412571t5.412571-12.873143l0-694.857143q0-7.460571-5.412571-12.873143t-12.873143-5.412571zM1097.142857 164.571429l0 694.857143q0 37.741714-26.843429 64.585143t-64.585143 26.843429l-914.285714 0q-37.741714 0-64.585143-26.843429t-26.843429-64.585143l0-694.857143q0-37.741714 26.843429-64.585143t64.585143-26.843429l914.285714 0q37.741714 0 64.585143 26.843429t26.843429 64.585143z",T5e=G({name:"ASkeletonImage",props:Ng(),setup:function(t){var n=Wt("skeleton",t),r=n.prefixCls,a=x(function(){return Se(r.value,"".concat(r.value,"-element"))});return function(){return g("div",{class:a.value},[g("div",{class:"".concat(r.value,"-image")},[g("svg",{viewBox:"0 0 1098 1024",xmlns:"http://www.w3.org/2000/svg",class:"".concat(r.value,"-image-svg")},[g("path",{d:P5e,class:"".concat(r.value,"-image-path")},null)])])])}}}),DS=T5e;na.Button=NS;na.Avatar=xS;na.Input=AS;na.Image=DS;na.install=function(e){return e.component(na.name,na),e.component(na.Button.name,NS),e.component(na.Avatar.name,xS),e.component(na.Input.name,AS),e.component(na.Image.name,DS),e};var fR=function(t,n){var r,a,o=n.attrs,i=o.included,l=o.vertical,s=o.offset,c=o.length,d=o.reverse,f=o.style,p=o.class,v=l?(r={},V(r,d?"top":"bottom","".concat(s,"%")),V(r,d?"bottom":"top","auto"),V(r,"height","".concat(c,"%")),r):(a={},V(a,d?"right":"left","".concat(s,"%")),V(a,d?"left":"right","auto"),V(a,"width","".concat(c,"%")),a),m=P(P({},f),v);return i?g("div",{class:p,style:m},null):null};fR.inheritAttrs=!1;var hR=fR,x5e=function(t,n,r,a,o,i){on(r?a>0:!0,"Slider","`Slider[step]` should be a positive number in order to make Slider[dots] work.");var l=Object.keys(n).map(parseFloat).sort(function(c,d){return c-d});if(r&&a)for(var s=o;s<=i;s+=a)l.indexOf(s)===-1&&l.push(s);return l},pR=function(t,n){var r=n.attrs,a=r.prefixCls,o=r.vertical,i=r.reverse,l=r.marks,s=r.dots,c=r.step,d=r.included,f=r.lowerBound,p=r.upperBound,v=r.max,m=r.min,y=r.dotStyle,b=r.activeDotStyle,C=v-m,S=x5e(o,l,s,c,m,v).map(function(w){var k,$="".concat(Math.abs(w-m)/C*100,"%"),O=!d&&w===p||d&&w<=p&&w>=f,T=o?P(P({},y),V({},i?"top":"bottom",$)):P(P({},y),V({},i?"right":"left",$));O&&(T=P(P({},T),b));var _=Se((k={},V(k,"".concat(a,"-dot"),!0),V(k,"".concat(a,"-dot-active"),O),V(k,"".concat(a,"-dot-reverse"),i),k));return g("span",{class:_,style:T,key:w},null)});return g("div",{class:"".concat(a,"-step")},[S])};pR.inheritAttrs=!1;var _5e=pR,vR=function(t,n){var r=n.attrs,a=r.class,o=r.vertical,i=r.reverse,l=r.marks,s=r.included,c=r.upperBound,d=r.lowerBound,f=r.max,p=r.min,v=r.onClickLabel,m=Object.keys(l),y=f-p,b=m.map(parseFloat).sort(function(C,S){return C-S}).map(function(C){var S,w=typeof l[C]=="function"?l[C](h):l[C],k=kt(w)==="object"&&!zn(w),$=k?w.label:w;if(!$&&$!==0)return null;var O=!s&&C===c||s&&C<=c&&C>=d,T=Se((S={},V(S,"".concat(a,"-text"),!0),V(S,"".concat(a,"-text-active"),O),S)),_=V({marginBottom:"-50%"},i?"top":"bottom","".concat((C-p)/y*100,"%")),I=V({transform:"translateX(-50%)",msTransform:"translateX(-50%)"},i?"right":"left",i?"".concat((C-p/4)/y*100,"%"):"".concat((C-p)/y*100,"%")),L=o?_:I,j=k?P(P({},L),w.style):L,F=V({},mn?"onTouchstartPassive":"onTouchstart",function(N){return v(N,C)});return g("span",le({class:T,style:j,key:C,onMousedown:function(D){return v(D,C)}},F),[$])});return g("div",{class:a},[b])};vR.inheritAttrs=!1;var E5e=vR,mR=G({name:"Handle",mixins:[nt],inheritAttrs:!1,props:{prefixCls:u.string,vertical:u.looseBool,offset:u.number,disabled:u.looseBool,min:u.number,max:u.number,value:u.number,tabindex:u.oneOfType([u.number,u.string]),reverse:u.looseBool},data:function(){return{clickFocused:!1}},mounted:function(){this.onMouseUpListener=Kn(document,"mouseup",this.handleMouseUp)},beforeUnmount:function(){this.onMouseUpListener&&this.onMouseUpListener.remove()},methods:{setHandleRef:function(t){this.handle=t},setClickFocus:function(t){this.setState({clickFocused:t})},handleMouseUp:function(){document.activeElement===this.handle&&this.setClickFocus(!0)},handleBlur:function(t){this.setClickFocus(!1),this.__emit("blur",t)},handleKeyDown:function(){this.setClickFocus(!1)},clickFocus:function(){this.setClickFocus(!0),this.focus()},focus:function(){this.handle.focus()},blur:function(){this.handle.blur()},handleMousedown:function(t){this.focus(),this.__emit("mousedown",t)}},render:function(){var t,n,r=Qe(this),a=r.prefixCls,o=r.vertical,i=r.reverse,l=r.offset,s=r.disabled,c=r.min,d=r.max,f=r.value,p=r.tabindex,v=Se(this.$attrs.class,V({},"".concat(a,"-handle-click-focused"),this.clickFocused)),m=o?(t={},V(t,i?"top":"bottom","".concat(l,"%")),V(t,i?"bottom":"top","auto"),V(t,"transform","translateY(+50%)"),t):(n={},V(n,i?"right":"left","".concat(l,"%")),V(n,i?"left":"right","auto"),V(n,"transform","translateX(".concat(i?"+":"-","50%)")),n),y={"aria-valuemin":c,"aria-valuemax":d,"aria-valuenow":f,"aria-disabled":!!s},b=P(P({},this.$attrs.style),m),C=p||0;(s||p===null)&&(C=null);var S=P(P(P(P({},this.$attrs),{role:"slider",tabindex:C}),y),{class:v,onBlur:this.handleBlur,onKeydown:this.handleKeyDown,onMousedown:this.handleMousedown,ref:this.setHandleRef,style:b});return g("div",S,null)}});function Gh(e,t){try{return Object.keys(t).some(function(n){return e.target===Sn(t[n])||e.target===t[n]})}catch{return!1}}function gR(e,t){var n=t.min,r=t.max;return e<n||e>r}function Ox(e){return e.touches.length>1||e.type.toLowerCase()==="touchend"&&e.touches.length>0}function Px(e,t){var n=t.marks,r=t.step,a=t.min,o=t.max,i=Object.keys(n).map(parseFloat);if(r!==null){var l=Math.pow(10,yR(r)),s=Math.floor((o*l-a*l)/(r*l)),c=Math.min((e-a)/r,s),d=Math.round(c)*r+a;i.push(d)}var f=i.map(function(p){return Math.abs(e-p)});return i[f.indexOf(Math.min.apply(Math,Je(f)))]}function yR(e){var t=e.toString(),n=0;return t.indexOf(".")>=0&&(n=t.length-t.indexOf(".")-1),n}function Tx(e,t){var n=1;return window.visualViewport&&(n=+(window.visualViewport.width/document.body.getBoundingClientRect().width).toFixed(2)),(e?t.clientY:t.pageX)/n}function xx(e,t){var n=1;return window.visualViewport&&(n=+(window.visualViewport.width/document.body.getBoundingClientRect().width).toFixed(2)),(e?t.touches[0].clientY:t.touches[0].pageX)/n}function Cb(e,t){var n=t.getBoundingClientRect();return e?n.top+n.height*.5:window.pageXOffset+n.left+n.width*.5}function RS(e,t){var n=t.max,r=t.min;return e<=r?r:e>=n?n:e}function bR(e,t){var n=t.step,r=isFinite(Px(e,t))?Px(e,t):0;return n===null?r:parseFloat(r.toFixed(yR(n)))}function fs(e){e.stopPropagation(),e.preventDefault()}function M5e(e,t,n){var r={increase:function(l,s){return l+s},decrease:function(l,s){return l-s}},a=r[e](Object.keys(n.marks).indexOf(JSON.stringify(t)),1),o=Object.keys(n.marks)[a];return n.step?r[e](t,n.step):!!Object.keys(n.marks).length&&!!n.marks[o]?n.marks[o]:t}function CR(e,t,n){var r="increase",a="decrease",o=r;switch(e.keyCode){case ze.UP:o=t&&n?a:r;break;case ze.RIGHT:o=!t&&n?a:r;break;case ze.DOWN:o=t&&n?r:a;break;case ze.LEFT:o=!t&&n?r:a;break;case ze.END:return function(i,l){return l.max};case ze.HOME:return function(i,l){return l.min};case ze.PAGE_UP:return function(i,l){return i+l.step*2};case ze.PAGE_DOWN:return function(i,l){return i-l.step*2};default:return}return function(i,l){return M5e(o,i,l)}}var I5e=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function _l(){}function wR(e){var t={min:u.number,max:u.number,step:u.number,marks:u.object,included:u.looseBool,prefixCls:u.string,disabled:u.looseBool,handle:u.func,dots:u.looseBool,vertical:u.looseBool,reverse:u.looseBool,minimumTrackStyle:u.object,maximumTrackStyle:u.object,handleStyle:u.oneOfType([u.object,u.arrayOf(u.object)]),trackStyle:u.oneOfType([u.object,u.arrayOf(u.object)]),railStyle:u.object,dotStyle:u.object,activeDotStyle:u.object,autofocus:u.looseBool};return G({name:"CreateSlider",mixins:[nt,e],inheritAttrs:!1,props:An(t,{prefixCls:"rc-slider",min:0,max:100,step:1,marks:{},included:!0,disabled:!1,dots:!1,vertical:!1,reverse:!1,trackStyle:[{}],handleStyle:[{}],railStyle:{},dotStyle:{},activeDotStyle:{}}),data:function(){var r=this.step,a=this.max,o=this.min,i=isFinite(a-o)?(a-o)%r===0:!0;return on(r&&Math.floor(r)===r?i:!0,"Slider","Slider[max] - Slider[min] (%s) should be a multiple of Slider[step] (%s)",a-o,r),this.handlesRefs={},{}},mounted:function(){var r=this;this.$nextTick(function(){r.document=r.sliderRef&&r.sliderRef.ownerDocument;var a=r.autofocus,o=r.disabled;a&&!o&&r.focus()})},beforeUnmount:function(){var r=this;this.$nextTick(function(){r.removeDocumentEvents()})},methods:{defaultHandle:function(r){var a=r.index;r.directives;var o=r.className,i=r.style,l=I5e(r,["index","directives","className","style"]);if(delete l.dragging,l.value===null)return null;var s=P(P({},l),{class:o,style:i,key:a});return g(mR,s,null)},onMouseDown:function(r){if(r.button===0){var a=this.vertical,o=Tx(a,r);if(!Gh(r,this.handlesRefs))this.dragOffset=0;else{var i=Cb(a,r.target);this.dragOffset=o-i,o=i}this.removeDocumentEvents(),this.onStart(o),this.addDocumentMouseEvents(),fs(r)}},onTouchStart:function(r){if(!Ox(r)){var a=this.vertical,o=xx(a,r);if(!Gh(r,this.handlesRefs))this.dragOffset=0;else{var i=Cb(a,r.target);this.dragOffset=o-i,o=i}this.onStart(o),this.addDocumentTouchEvents(),fs(r)}},onFocus:function(r){var a=this.vertical;if(Gh(r,this.handlesRefs)){var o=Cb(a,r.target);this.dragOffset=0,this.onStart(o),fs(r),this.__emit("focus",r)}},onBlur:function(r){this.onEnd(),this.__emit("blur",r)},onMouseUp:function(){this.handlesRefs[this.prevMovedHandleIndex]&&this.handlesRefs[this.prevMovedHandleIndex].clickFocus()},onMouseMove:function(r){if(!this.sliderRef){this.onEnd();return}var a=Tx(this.vertical,r);this.onMove(r,a-this.dragOffset)},onTouchMove:function(r){if(Ox(r)||!this.sliderRef){this.onEnd();return}var a=xx(this.vertical,r);this.onMove(r,a-this.dragOffset)},onKeyDown:function(r){this.sliderRef&&Gh(r,this.handlesRefs)&&this.onKeyboard(r)},onClickMarkLabel:function(r,a){var o=this;r.stopPropagation(),this.onChange({sValue:a}),this.setState({sValue:a},function(){return o.onEnd(!0)})},getSliderStart:function(){var r=this.sliderRef,a=this.vertical,o=this.reverse,i=r.getBoundingClientRect();return a?o?i.bottom:i.top:window.pageXOffset+(o?i.right:i.left)},getSliderLength:function(){var r=this.sliderRef;if(!r)return 0;var a=r.getBoundingClientRect();return this.vertical?a.height:a.width},addDocumentTouchEvents:function(){this.onTouchMoveListener=Kn(this.document,"touchmove",this.onTouchMove),this.onTouchUpListener=Kn(this.document,"touchend",this.onEnd)},addDocumentMouseEvents:function(){this.onMouseMoveListener=Kn(this.document,"mousemove",this.onMouseMove),this.onMouseUpListener=Kn(this.document,"mouseup",this.onEnd)},removeDocumentEvents:function(){this.onTouchMoveListener&&this.onTouchMoveListener.remove(),this.onTouchUpListener&&this.onTouchUpListener.remove(),this.onMouseMoveListener&&this.onMouseMoveListener.remove(),this.onMouseUpListener&&this.onMouseUpListener.remove()},focus:function(){this.disabled||this.handlesRefs[0].focus()},blur:function(){var r=this;this.disabled||Object.keys(this.handlesRefs).forEach(function(a){r.handlesRefs[a]&&r.handlesRefs[a].blur&&r.handlesRefs[a].blur()})},calcValue:function(r){var a=this.vertical,o=this.min,i=this.max,l=Math.abs(Math.max(r,0)/this.getSliderLength()),s=a?(1-l)*(i-o)+o:l*(i-o)+o;return s},calcValueByPos:function(r){var a=this.reverse?-1:1,o=a*(r-this.getSliderStart()),i=this.trimAlignValue(this.calcValue(o));return i},calcOffset:function(r){var a=this.min,o=this.max,i=(r-a)/(o-a);return i*100},saveSlider:function(r){this.sliderRef=r},saveHandle:function(r,a){this.handlesRefs[r]=a}},render:function(){var r,a=this.prefixCls,o=this.marks,i=this.dots,l=this.step,s=this.included,c=this.disabled,d=this.vertical,f=this.reverse,p=this.min,v=this.max,m=this.maximumTrackStyle,y=this.railStyle,b=this.dotStyle,C=this.activeDotStyle,S=this.$attrs,w=S.class,k=S.style,$=S.id,O=this.renderSlider(),T=O.tracks,_=O.handles,I=Se(a,w,(r={},V(r,"".concat(a,"-with-marks"),Object.keys(o).length),V(r,"".concat(a,"-disabled"),c),V(r,"".concat(a,"-vertical"),d),r)),L={vertical:d,marks:o,included:s,lowerBound:this.getLowerBound(),upperBound:this.getUpperBound(),max:v,min:p,reverse:f,class:"".concat(a,"-mark"),onClickLabel:c?_l:this.onClickMarkLabel},j=V({},mn?"onTouchstartPassive":"onTouchstart",c?_l:this.onTouchStart);return g("div",le(le({id:$,ref:this.saveSlider,tabindex:"-1",class:I},j),{},{onMousedown:c?_l:this.onMouseDown,onMouseup:c?_l:this.onMouseUp,onKeydown:c?_l:this.onKeyDown,onFocus:c?_l:this.onFocus,onBlur:c?_l:this.onBlur,style:k}),[g("div",{class:"".concat(a,"-rail"),style:P(P({},m),y)},null),T,g(_5e,{prefixCls:a,vertical:d,reverse:f,marks:o,dots:i,step:l,included:s,lowerBound:this.getLowerBound(),upperBound:this.getUpperBound(),max:v,min:p,dotStyle:b,activeDotStyle:C},null),_,g(E5e,L,null),ht(this)])}})}var N5e=G({name:"Slider",mixins:[nt],inheritAttrs:!1,props:{defaultValue:u.number,value:u.number,disabled:u.looseBool,autofocus:u.looseBool,tabindex:u.oneOfType([u.number,u.string]),reverse:u.looseBool,min:u.number,max:u.number},data:function(){var t=this.defaultValue!==void 0?this.defaultValue:this.min,n=this.value!==void 0?this.value:t;return{sValue:this.trimAlignValue(n),dragging:!1}},watch:{value:{handler:function(t){this.setChangeValue(t)},deep:!0},min:function(){var t=this.sValue;this.setChangeValue(t)},max:function(){var t=this.sValue;this.setChangeValue(t)}},methods:{setChangeValue:function(t){var n=t!==void 0?t:this.sValue,r=this.trimAlignValue(n,this.$props);r!==this.sValue&&(this.setState({sValue:r}),gR(n,this.$props)&&this.__emit("change",r))},onChange:function(t){var n=!vt(this,"value"),r=t.sValue>this.max?P(P({},t),{sValue:this.max}):t;n&&this.setState(r);var a=r.sValue;this.__emit("change",a)},onStart:function(t){this.setState({dragging:!0});var n=this.sValue;this.__emit("beforeChange",n);var r=this.calcValueByPos(t);this.startValue=r,this.startPosition=t,r!==n&&(this.prevMovedHandleIndex=0,this.onChange({sValue:r}))},onEnd:function(t){var n=this.dragging;this.removeDocumentEvents(),(n||t)&&this.__emit("afterChange",this.sValue),this.setState({dragging:!1})},onMove:function(t,n){fs(t);var r=this.sValue,a=this.calcValueByPos(n);a!==r&&this.onChange({sValue:a})},onKeyboard:function(t){var n=this.$props,r=n.reverse,a=n.vertical,o=CR(t,a,r);if(o){fs(t);var i=this.sValue,l=o(i,this.$props),s=this.trimAlignValue(l);if(s===i)return;this.onChange({sValue:s}),this.__emit("afterChange",s),this.onEnd()}},getLowerBound:function(){return this.min},getUpperBound:function(){return this.sValue},trimAlignValue:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{};if(t===null)return null;var r=P(P({},this.$props),n),a=RS(t,r);return bR(a,r)},getTrack:function(t){var n=t.prefixCls,r=t.reverse,a=t.vertical,o=t.included,i=t.offset,l=t.minimumTrackStyle,s=t._trackStyle;return g(hR,{class:"".concat(n,"-track"),vertical:a,included:o,offset:0,reverse:r,length:i,style:P(P({},l),s)},null)},renderSlider:function(){var t=this,n=this.prefixCls,r=this.vertical,a=this.included,o=this.disabled,i=this.minimumTrackStyle,l=this.trackStyle,s=this.handleStyle,c=this.tabindex,d=this.min,f=this.max,p=this.reverse,v=this.handle,m=this.defaultHandle,y=v||m,b=this.sValue,C=this.dragging,S=this.calcOffset(b),w=y({class:"".concat(n,"-handle"),prefixCls:n,vertical:r,offset:S,value:b,dragging:C,disabled:o,min:d,max:f,reverse:p,index:0,tabindex:c,style:s[0]||s,ref:function(O){return t.saveHandle(0,O)},onFocus:this.onFocus,onBlur:this.onBlur}),k=l[0]||l;return{tracks:this.getTrack({prefixCls:n,reverse:p,vertical:r,included:a,offset:S,minimumTrackStyle:i,_trackStyle:k}),handles:w}}}}),A5e=wR(N5e),Xh=function(t){var n=t.value,r=t.handle,a=t.bounds,o=t.props,i=o.allowCross,l=o.pushable,s=Number(l),c=RS(n,o),d=c;return!i&&r!=null&&a!==void 0&&(r>0&&c<=a[r-1]+s&&(d=a[r-1]+s),r<a.length-1&&c>=a[r+1]-s&&(d=a[r+1]-s)),bR(d,o)},D5e={defaultValue:u.arrayOf(u.number),value:u.arrayOf(u.number),count:u.number,pushable:an(u.oneOfType([u.looseBool,u.number])),allowCross:u.looseBool,disabled:u.looseBool,reverse:u.looseBool,tabindex:u.arrayOf(u.number),prefixCls:u.string,min:u.number,max:u.number,autofocus:u.looseBool},R5e={name:"Range",inheritAttrs:!1,displayName:"Range",mixins:[nt],props:An(D5e,{count:1,allowCross:!0,pushable:!1,tabindex:[]}),data:function(){var t=this,n=this.count,r=this.min,a=this.max,o=Array.apply(void 0,Je(Array(n+1))).map(function(){return r}),i=vt(this,"defaultValue")?this.defaultValue:o,l=this.value;l===void 0&&(l=i);var s=l.map(function(d,f){return Xh({value:d,handle:f,props:t.$props})}),c=s[0]===a?0:s.length-1;return{sHandle:null,recent:c,bounds:s}},watch:{value:{handler:function(t){var n=this.bounds;this.setChangeValue(t||n)},deep:!0},min:function(){var t=this.value;this.setChangeValue(t||this.bounds)},max:function(){var t=this.value;this.setChangeValue(t||this.bounds)}},methods:{setChangeValue:function(t){var n=this,r=this.bounds,a=t.map(function(i,l){return Xh({value:i,handle:l,bounds:r,props:n.$props})});if(!(a.length===r.length&&a.every(function(i,l){return i===r[l]}))&&(this.setState({bounds:a}),t.some(function(i){return gR(i,n.$props)}))){var o=t.map(function(i){return RS(i,n.$props)});this.__emit("change",o)}},onChange:function(t){var n=!vt(this,"value");if(n)this.setState(t);else{var r={};["sHandle","recent"].forEach(function(i){t[i]!==void 0&&(r[i]=t[i])}),Object.keys(r).length&&this.setState(r)}var a=P(P({},this.$data),t),o=a.bounds;this.__emit("change",o)},onStart:function(t){var n=this.bounds;this.__emit("beforeChange",n);var r=this.calcValueByPos(t);this.startValue=r,this.startPosition=t;var a=this.getClosestBound(r);this.prevMovedHandleIndex=this.getBoundNeedMoving(r,a),this.setState({sHandle:this.prevMovedHandleIndex,recent:this.prevMovedHandleIndex});var o=n[this.prevMovedHandleIndex];if(r!==o){var i=Je(n);i[this.prevMovedHandleIndex]=r,this.onChange({bounds:i})}},onEnd:function(t){var n=this.sHandle;this.removeDocumentEvents(),(n!==null||t)&&this.__emit("afterChange",this.bounds),this.setState({sHandle:null})},onMove:function(t,n){fs(t);var r=this.bounds,a=this.sHandle,o=this.calcValueByPos(n),i=r[a];o!==i&&this.moveTo(o)},onKeyboard:function(t){var n=this.$props,r=n.reverse,a=n.vertical,o=CR(t,a,r);if(o){fs(t);var i=this.bounds,l=this.sHandle,s=i[l===null?this.recent:l],c=o(s,this.$props),d=Xh({value:c,handle:l,bounds:i,props:this.$props});if(d===s)return;var f=!0;this.moveTo(d,f)}},getClosestBound:function(t){for(var n=this.bounds,r=0,a=1;a<n.length-1;++a)t>n[a]&&(r=a);return Math.abs(n[r+1]-t)<Math.abs(n[r]-t)&&(r+=1),r},getBoundNeedMoving:function(t,n){var r=this.bounds,a=this.recent,o=n,i=r[n+1]===r[n];return i&&r[a]===r[n]&&(o=a),i&&t!==r[n+1]&&(o=t<r[n+1]?n:n+1),o},getLowerBound:function(){return this.bounds[0]},getUpperBound:function(){var t=this.bounds;return t[t.length-1]},getPoints:function(){var t=this.marks,n=this.step,r=this.min,a=this.max,o=this._getPointsCache;if(!o||o.marks!==t||o.step!==n){var i=P({},t);if(n!==null)for(var l=r;l<=a;l+=n)i[l]=l;var s=Object.keys(i).map(parseFloat);s.sort(function(c,d){return c-d}),this._getPointsCache={marks:t,step:n,points:s}}return this._getPointsCache.points},moveTo:function(t,n){var r=this,a=Je(this.bounds),o=this.sHandle,i=this.recent,l=o===null?i:o;a[l]=t;var s=l;this.$props.pushable!==!1?this.pushSurroundingHandles(a,s):this.$props.allowCross&&(a.sort(function(c,d){return c-d}),s=a.indexOf(t)),this.onChange({recent:s,sHandle:s,bounds:a}),n&&(this.__emit("afterChange",a),this.setState({},function(){r.handlesRefs[s].focus()}),this.onEnd())},pushSurroundingHandles:function(t,n){var r=t[n],a=this.pushable;a=Number(a);var o=0;if(t[n+1]-r<a&&(o=1),r-t[n-1]<a&&(o=-1),o!==0){var i=n+o,l=o*(t[i]-r);this.pushHandle(t,i,o,a-l)||(t[n]=t[i]-o*a)}},pushHandle:function(t,n,r,a){for(var o=t[n],i=t[n];r*(i-o)<a;){if(!this.pushHandleOnePoint(t,n,r))return t[n]=o,!1;i=t[n]}return!0},pushHandleOnePoint:function(t,n,r){var a=this.getPoints(),o=a.indexOf(t[n]),i=o+r;if(i>=a.length||i<0)return!1;var l=n+r,s=a[i],c=this.pushable,d=r*(t[l]-s);return this.pushHandle(t,l,r,c-d)?(t[n]=s,!0):!1},trimAlignValue:function(t){var n=this.sHandle,r=this.bounds;return Xh({value:t,handle:n,bounds:r,props:this.$props})},ensureValueNotConflict:function(t,n,r){var a=r.allowCross,o=r.pushable,i=this.$data||{},l=i.bounds;if(t=t===void 0?i.sHandle:t,o=Number(o),!a&&t!=null&&l!==void 0){if(t>0&&n<=l[t-1]+o)return l[t-1]+o;if(t<l.length-1&&n>=l[t+1]-o)return l[t+1]-o}return n},getTrack:function(t){var n=t.bounds,r=t.prefixCls,a=t.reverse,o=t.vertical,i=t.included,l=t.offsets,s=t.trackStyle;return n.slice(0,-1).map(function(c,d){var f,p=d+1,v=Se((f={},V(f,"".concat(r,"-track"),!0),V(f,"".concat(r,"-track-").concat(p),!0),f));return g(hR,{class:v,vertical:o,reverse:a,included:i,offset:l[p-1],length:l[p]-l[p-1],style:s[d],key:p},null)})},renderSlider:function(){var t=this,n=this.sHandle,r=this.bounds,a=this.prefixCls,o=this.vertical,i=this.included,l=this.disabled,s=this.min,c=this.max,d=this.reverse,f=this.handle,p=this.defaultHandle,v=this.trackStyle,m=this.handleStyle,y=this.tabindex,b=f||p,C=r.map(function(k){return t.calcOffset(k)}),S="".concat(a,"-handle"),w=r.map(function(k,$){var O,T=y[$]||0;return(l||y[$]===null)&&(T=null),b({class:Se((O={},V(O,S,!0),V(O,"".concat(S,"-").concat($+1),!0),O)),prefixCls:a,vertical:o,offset:C[$],value:k,dragging:n===$,index:$,tabindex:T,min:s,max:c,reverse:d,disabled:l,style:m[$],ref:function(I){return t.saveHandle($,I)},onFocus:t.onFocus,onBlur:t.onBlur})});return{tracks:this.getTrack({bounds:r,prefixCls:a,reverse:d,vertical:o,included:i,offsets:C,trackStyle:v}),handles:w}}}},L5e=wR(R5e),_x=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},F5e=kg(),B5e=function(){return{prefixCls:u.string,tooltipPrefixCls:u.string,range:u.looseBool,reverse:u.looseBool,min:u.number,max:u.number,step:u.any,marks:u.object,dots:u.looseBool,value:u.oneOfType([u.number,u.arrayOf(u.number)]),defaultValue:u.oneOfType([u.number,u.arrayOf(u.number)]),included:u.looseBool,disabled:u.looseBool,vertical:u.looseBool,tipFormatter:u.oneOfType([u.func,u.object]),tooltipVisible:u.looseBool,tooltipPlacement:F5e.placement,getTooltipPopupContainer:u.func,onChange:u.func,onAfterChange:u.func}},V5e=function(t){return t.toString()},z5e=G({name:"ASlider",mixins:[nt],inheritAttrs:!1,props:P({},B5e()),emits:["update:value","change"],setup:function(){return{vcSlider:null,configProvider:ve("configProvider",St)}},data:function(){return{visibles:{}}},methods:{toggleTooltipVisible:function(t,n){this.setState(function(r){var a=r.visibles;return{visibles:P(P({},a),V({},t,n))}})},handleWithTooltip:function(t,n,r){var a=this,o=r.value,i=r.dragging,l=r.index,s=_x(r,["value","dragging","index"]),c=this.$props,d=c.tipFormatter,f=d===void 0?V5e:d,p=c.tooltipVisible,v=c.tooltipPlacement,m=c.getTooltipPopupContainer,y=this.visibles,b=f?y[l]||i:!1,C=p||p===void 0&&b,S={prefixCls:t,title:f?f(o):"",visible:C,placement:v||"top",transitionName:"zoom-down",overlayClassName:"".concat(n,"-tooltip"),getPopupContainer:m||function(){return document.body},key:l},w=P(P({value:o},s),{onMouseenter:function(){return a.toggleTooltipVisible(l,!0)},onMouseleave:function(){return a.toggleTooltipVisible(l,!1)}});return g(Io,S,{default:function(){return[g(mR,w,null)]}})},saveSlider:function(t){this.vcSlider=t},focus:function(){this.vcSlider.focus()},blur:function(){this.vcSlider.blur()},handleChange:function(t){this.$emit("update:value",t),this.$emit("change",t)}},render:function(){var t=this,n=P(P({},Qe(this)),this.$attrs),r=n.range,a=n.prefixCls,o=n.tooltipPrefixCls,i=_x(n,["range","prefixCls","tooltipPrefixCls"]),l=this.configProvider.getPrefixCls,s=l("slider",a),c=l("tooltip",o);if(r){var d=P(P({},i),{prefixCls:s,tooltipPrefixCls:c,handle:function(v){return t.handleWithTooltip(c,s,v)},ref:this.saveSlider,onChange:this.handleChange});return g(L5e,d,null)}var f=P(P({},i),{prefixCls:s,tooltipPrefixCls:c,handle:function(v){return t.handleWithTooltip(c,s,v)},ref:this.saveSlider,onChange:this.handleChange});return g(A5e,f,null)}}),H5e=kn(z5e),j5e={small:8,middle:16,large:24},K5e={prefixCls:u.string,size:{type:[String,Number,Array]},direction:u.oneOf(rt("horizontal","vertical")).def("horizontal"),align:u.oneOf(rt("start","end","center","baseline")),wrap:u.looseBool};function W5e(e){return typeof e=="string"?j5e[e]:e||0}var U5e=G({name:"ASpace",props:K5e,slots:["split"],setup:function(t,n){var r=n.slots,a=Wt("space",t),o=a.prefixCls,i=a.space,l=a.direction,s=CD(),c=x(function(){var b;return t.size||((b=i.value)===null||b===void 0?void 0:b.size)||"small"}),d=H(),f=H();ce(c,function(){var b=(Array.isArray(c.value)?c.value:[c.value,c.value]).map(function(S){return W5e(S)}),C=fn(b,2);d.value=C[0],f.value=C[1]},{immediate:!0});var p=x(function(){return t.align===void 0&&t.direction==="horizontal"?"center":t.align}),v=x(function(){var b;return Se(o.value,"".concat(o.value,"-").concat(t.direction),(b={},V(b,"".concat(o.value,"-rtl"),l.value==="rtl"),V(b,"".concat(o.value,"-align-").concat(p.value),p.value),b))}),m=x(function(){return l.value==="rtl"?"marginLeft":"marginRight"}),y=x(function(){var b={};return s.value&&(b.columnGap="".concat(d.value,"px"),b.rowGap="".concat(f.value,"px")),P(P({},b),t.wrap&&{flexWrap:"wrap",marginBottom:"".concat(-f.value,"px")})});return function(){var b,C,S=t.wrap,w=t.direction,k=w===void 0?"horizontal":w,$=La((b=r.default)===null||b===void 0?void 0:b.call(r)),O=$.length;if(O===0)return null;var T=(C=r.split)===null||C===void 0?void 0:C.call(r),_="".concat(o.value,"-item"),I=d.value,L=O-1;return g("div",{class:v.value,style:y.value},[$.map(function(j,F){var N={};return s.value||(k==="vertical"?F<L&&(N={marginBottom:"".concat(I/(T?2:1),"px")}):N=P(P({},F<L&&V({},m.value,"".concat(I/(T?2:1),"px"))),S&&{paddingBottom:"".concat(f.value,"px")})),g(Fe,null,[g("div",{class:_,style:N},[j]),F<L&&T&&g("span",{class:"".concat(_,"-split"),style:N},[T])])})])}}}),Y5e=kn(U5e);function Ex(){if(typeof window!="undefined"&&window.document&&window.document.documentElement){var e=window.document.documentElement;return"flex"in e.style||"webkitFlex"in e.style||"Flex"in e.style||"msFlex"in e.style}return!1}var _1=G({name:"Steps",mixins:[nt],props:{type:u.string.def("default"),prefixCls:u.string.def("rc-steps"),iconPrefix:u.string.def("rc"),direction:u.string.def("horizontal"),labelPlacement:u.string.def("horizontal"),status:u.string.def("process"),size:u.string.def(""),progressDot:an(u.oneOfType([u.looseBool,u.func])),initial:u.number.def(0),current:u.number.def(0),icons:u.shape({finish:u.any,error:u.any}).loose,canClick:u.looseBool},data:function(){return this.calcStepOffsetWidth=Yn(this.calcStepOffsetWidth,150),{flexSupported:!0,lastStepOffsetWidth:0}},mounted:function(){var t=this;this.$nextTick(function(){t.calcStepOffsetWidth(),Ex()||t.setState({flexSupported:!1})})},updated:function(){var t=this;this.$nextTick(function(){t.calcStepOffsetWidth()})},beforeUnmount:function(){this.calcTimeout&&clearTimeout(this.calcTimeout),this.calcStepOffsetWidth&&this.calcStepOffsetWidth.cancel&&this.calcStepOffsetWidth.cancel()},methods:{onStepClick:function(t){var n=this.$props.current;n!==t&&this.__emit("change",t)},calcStepOffsetWidth:function(){var t=this;if(!Ex()){var n=this.$data.lastStepOffsetWidth,r=this.$refs.vcStepsRef;r.children.length>0&&(this.calcTimeout&&clearTimeout(this.calcTimeout),this.calcTimeout=setTimeout(function(){var a=(r.lastChild.offsetWidth||0)+1;n===a||Math.abs(n-a)<=3||t.setState({lastStepOffsetWidth:a})}))}}},render:function(){var t,n=this,r=this.prefixCls,a=this.direction,o=this.type,i=this.labelPlacement,l=this.iconPrefix,s=this.status,c=this.size,d=this.current,f=this.progressDot,p=this.initial,v=this.icons,m=this.canClick,y=o==="navigation",b=this.lastStepOffsetWidth,C=this.flexSupported,S=ht(this),w=S.length-1,k=f?"vertical":i,$=(t={},V(t,r,!0),V(t,"".concat(r,"-").concat(a),!0),V(t,"".concat(r,"-").concat(c),c),V(t,"".concat(r,"-label-").concat(k),a==="horizontal"),V(t,"".concat(r,"-dot"),!!f),V(t,"".concat(r,"-navigation"),y),V(t,"".concat(r,"-flex-not-supported"),!C),t),O={class:$,ref:"vcStepsRef"};return g("div",O,[S.map(function(T,_){var I=rr(T),L=p+_,j=P({stepNumber:"".concat(L+1),stepIndex:L,prefixCls:r,iconPrefix:l,progressDot:f,icons:v},I);return m&&(j.onStepClick=n.onStepClick),!C&&a!=="vertical"&&(y?(j.itemWidth="".concat(100/(w+1),"%"),j.adjustMarginRight=0):_!==w&&(j.itemWidth="".concat(100/w,"%"),j.adjustMarginRight="".concat(-Math.round(b/w+1),"px"))),s==="error"&&_===d-1&&(j.class="".concat(r,"-next-error")),I.status||(L===d?j.status=s:L<d?j.status="finish":j.status="wait"),j.active=L===d,Ot(T,j)})])}});function Mx(e){return typeof e=="string"}function q5e(){}var G5e=G({name:"Step",mixins:[nt],props:{prefixCls:u.string,wrapperStyle:u.object,itemWidth:u.string,active:u.looseBool,disabled:u.looseBool,status:u.string,iconPrefix:u.string,icon:u.any,adjustMarginRight:u.string,stepNumber:u.string,stepIndex:u.number,description:u.any,title:u.any,subTitle:u.any,progressDot:an(u.oneOfType([u.looseBool,u.func])),tailContent:u.any,icons:u.shape({finish:u.any,error:u.any}).loose,onClick:u.func,onStepClick:u.func},methods:{onItemClick:function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];this.__emit.apply(this,["click"].concat(n)),this.__emit("stepClick",this.stepIndex)},renderIconNode:function(){var t,n=Qe(this),r=n.prefixCls,a=n.stepNumber,o=n.status,i=n.iconPrefix,l=n.icons,s=n.progressDot,c=We(this,"icon"),d=We(this,"title"),f=We(this,"description"),p,v=(t={},V(t,"".concat(r,"-icon"),!0),V(t,"".concat(i,"icon"),!0),V(t,"".concat(i,"icon-").concat(c),c&&Mx(c)),V(t,"".concat(i,"icon-check"),!c&&o==="finish"&&l&&!l.finish),V(t,"".concat(i,"icon-close"),!c&&o==="error"&&l&&!l.error),t),m=g("span",{class:"".concat(r,"-icon-dot")},null);return s?typeof s=="function"?p=g("span",{class:"".concat(r,"-icon")},[s({index:a-1,status:o,title:d,description:f,prefixCls:r})]):p=g("span",{class:"".concat(r,"-icon")},[m]):c&&!Mx(c)?p=g("span",{class:"".concat(r,"-icon")},[c]):l&&l.finish&&o==="finish"?p=g("span",{class:"".concat(r,"-icon")},[l.finish]):l&&l.error&&o==="error"?p=g("span",{class:"".concat(r,"-icon")},[l.error]):c||o==="finish"||o==="error"?p=g("span",{class:v},null):p=g("span",{class:"".concat(r,"-icon")},[a]),p}},render:function(){var t,n=Qe(this),r=n.prefixCls,a=n.itemWidth,o=n.active,i=n.status,l=i===void 0?"wait":i,s=n.tailContent,c=n.adjustMarginRight,d=n.disabled,f=n.onClick,p=n.onStepClick,v=We(this,"title"),m=We(this,"subTitle"),y=We(this,"description"),b=(t={},V(t,"".concat(r,"-item"),!0),V(t,"".concat(r,"-item-").concat(l),!0),V(t,"".concat(r,"-item-custom"),We(this,"icon")),V(t,"".concat(r,"-item-active"),o),V(t,"".concat(r,"-item-disabled"),d===!0),t),C={class:b},S={};a&&(S.width=a),c&&(S.marginRight=c);var w={onClick:f||q5e};return p&&!d&&(w.role="button",w.tabindex=0,w.onClick=this.onItemClick),g("div",le(le({},C),{},{style:S}),[g("div",le(le({},w),{},{class:"".concat(r,"-item-container")}),[g("div",{class:"".concat(r,"-item-tail")},[s]),g("div",{class:"".concat(r,"-item-icon")},[this.renderIconNode()]),g("div",{class:"".concat(r,"-item-content")},[g("div",{class:"".concat(r,"-item-title")},[v,m&&g("div",{title:m,class:"".concat(r,"-item-subtitle")},[m])]),y&&g("div",{class:"".concat(r,"-item-description")},[y])])])])}});_1.Step=G5e;var X5e={prefixCls:u.string,iconPrefix:u.string,current:u.number,initial:u.number,labelPlacement:u.oneOf(rt("horizontal","vertical")).def("horizontal"),status:u.oneOf(rt("wait","process","finish","error")),size:u.oneOf(rt("default","small")),direction:u.oneOf(rt("horizontal","vertical")),progressDot:an(u.oneOfType([u.looseBool,u.func])),type:u.oneOf(rt("default","navigation")),onChange:u.func,"onUpdate:current":u.func},jl=G({name:"ASteps",inheritAttrs:!1,props:Rn(X5e,{current:0}),emits:["update:current","change"],setup:function(){return{configProvider:ve("configProvider",St)}},Step:P(P({},_1.Step),{name:"AStep"}),methods:{handleChange:function(t){this.$emit("update:current",t),this.$emit("change",t)}},render:function(){var t=this,n=P(P({},Qe(this)),this.$attrs),r=n.prefixCls,a=n.iconPrefix,o=this.configProvider.getPrefixCls,i=o("steps",r),l=o("",a),s=We(this,"progressDot",this,!1),c={finish:g(wg,{class:"".concat(i,"-finish-icon")},null),error:g(go,{class:"".concat(i,"-error-icon")},null)},d=P(P({icons:c,iconPrefix:l,prefixCls:i,progressDot:s},n),{canClick:!!(this.onChange||this["onUpdate:current"]),onChange:this.handleChange});return g(_1,d,{default:function(){return[ht(t)]}})}});jl.install=function(e){return e.component(jl.name,jl),e.component(jl.Step.name,jl.Step),e};var Z5e=jl.Step,J5e=jl,Q5e=rt("small","default"),eIe={prefixCls:u.string,size:u.oneOf(Q5e),disabled:u.looseBool,checkedChildren:u.VNodeChild,unCheckedChildren:u.VNodeChild,tabindex:u.oneOfType([u.string,u.number]),autofocus:u.looseBool,loading:u.looseBool,checked:u.oneOfType([u.string,u.number,u.looseBool]),checkedValue:u.oneOfType([u.string,u.number,u.looseBool]).def(!0),unCheckedValue:u.oneOfType([u.string,u.number,u.looseBool]).def(!1),onChange:{type:Function},onClick:{type:Function},onKeydown:{type:Function},onMouseup:{type:Function},"onUpdate:checked":{type:Function}},tIe=G({name:"ASwitch",__ANT_SWITCH:!0,inheritAttrs:!1,props:eIe,slots:["checkedChildren","unCheckedChildren"],emits:["update:checked","mouseup","change","click","keydown"],setup:function(t,n){var r=n.attrs,a=n.slots,o=n.expose,i=n.emit;hc(function(){on(!("defaultChecked"in r),"Switch","'defaultChecked' is deprecated, please use 'v-model:checked'"),on(!("value"in r),"Switch","`value` is not validate prop, do you mean `checked`?")});var l=H(t.checked!==void 0?t.checked:r.defaultChecked),s=x(function(){return l.value===t.checkedValue});ce(function(){return t.checked},function(){l.value=t.checked});var c=Wt("switch",t),d=c.prefixCls,f=H(),p=function(){var k;(k=f.value)===null||k===void 0||k.focus()},v=function(){var k;(k=f.value)===null||k===void 0||k.blur()};o({focus:p,blur:v}),et(function(){Ne(function(){t.autofocus&&!t.disabled&&f.value.focus()})});var m=function(k,$){t.disabled||(i("update:checked",k),i("change",k,$))},y=function(k){p();var $=s.value?t.unCheckedValue:t.checkedValue;m($,k),i("click",$,k)},b=function(k){k.keyCode===ze.LEFT?m(t.unCheckedValue,k):k.keyCode===ze.RIGHT&&m(t.checkedValue,k),i("keydown",k)},C=function(k){var $;($=f.value)===null||$===void 0||$.blur(),i("mouseup",k)},S=x(function(){var w;return w={},V(w,"".concat(d.value,"-small"),t.size==="small"),V(w,"".concat(d.value,"-loading"),t.loading),V(w,"".concat(d.value,"-checked"),s.value),V(w,"".concat(d.value,"-disabled"),t.disabled),V(w,d.value,!0),w});return function(){return g(C2,{insertExtraNode:!0},{default:function(){return[g("button",le(le(le({},bn(t,["prefixCls","checkedChildren","unCheckedChildren","checked","autofocus","defaultChecked","checkedValue","unCheckedValue"])),r),{},{onKeydown:b,onClick:y,onMouseup:C,type:"button",role:"switch","aria-checked":l.value,disabled:t.disabled||t.loading,class:[r.class,S.value],ref:f}),[t.loading?g(co,{class:"".concat(d.value,"-loading-icon")},null):null,g("span",{class:"".concat(d.value,"-inner")},[s.value?jn(a,t,"checkedChildren"):jn(a,t,"unCheckedChildren")])])]}})}}}),nIe=kn(tIe),rIe={icon:{tag:"svg",attrs:{viewBox:"0 0 1024 1024",focusable:"false"},children:[{tag:"path",attrs:{d:"M858.9 689L530.5 308.2c-9.4-10.9-27.5-10.9-37 0L165.1 689c-12.2 14.2-1.2 35 18.5 35h656.8c19.7 0 30.7-20.8 18.5-35z"}}]},name:"caret-up",theme:"filled"},aIe=rIe;function Ix(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){oIe(e,a,n[a])})}return e}function oIe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var LS=function(t,n){var r=Ix({},t,n.attrs);return g(Et,Ix({},r,{icon:aIe}),null)};LS.displayName="CaretUpFilled";LS.inheritAttrs=!1;var iIe=LS,lIe={icon:{tag:"svg",attrs:{viewBox:"0 0 1024 1024",focusable:"false"},children:[{tag:"path",attrs:{d:"M840.4 300H183.6c-19.7 0-30.7 20.8-18.5 35l328.4 380.8c9.4 10.9 27.5 10.9 37 0L858.9 335c12.2-14.2 1.2-35-18.5-35z"}}]},name:"caret-down",theme:"filled"},sIe=lIe;function Nx(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){uIe(e,a,n[a])})}return e}function uIe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var FS=function(t,n){var r=Nx({},t,n.attrs);return g(Et,Nx({},r,{icon:sIe}),null)};FS.displayName="CaretDownFilled";FS.inheritAttrs=!1;var SR=FS,wb,Sb,Ax={position:"absolute",top:"-9999px",width:"50px",height:"50px"},kR="RC_TABLE_INTERNAL_COL_DEFINE";function Dx(e){var t=e.direction,n=t===void 0?"vertical":t,r=e.prefixCls;if(typeof document=="undefined"||typeof window=="undefined")return 0;var a=n==="vertical";if(a&&wb)return wb;if(!a&&Sb)return Sb;var o=document.createElement("div");Object.keys(Ax).forEach(function(l){o.style[l]=Ax[l]}),o.className="".concat(r,"-hide-scrollbar scroll-div-append-to-body"),a?o.style.overflowY="scroll":o.style.overflowX="scroll",document.body.appendChild(o);var i=0;return a?(i=o.offsetWidth-o.clientWidth,wb=i):(i=o.offsetHeight-o.clientHeight,Sb=i),document.body.removeChild(o),i}function cIe(e,t,n){var r;function a(){for(var o=arguments.length,i=new Array(o),l=0;l<o;l++)i[l]=arguments[l];var s=this;i[0]&&i[0].persist&&i[0].persist();var c=function(){r=null,n||e.apply(s,i)},d=n&&!r;clearTimeout(r),r=setTimeout(c,t),d&&e.apply(s,i)}return a.cancel=function(){r&&(clearTimeout(r),r=null)},a}function dIe(e,t){var n=e.indexOf(t),r=e.slice(0,n),a=e.slice(n+1,e.length);return r.concat(a)}function fIe(e){return Object.keys(e).reduce(function(t,n){return(n.substr(0,5)==="data-"||n.substr(0,5)==="aria-")&&(t[n]=e[n]),t},{})}function hIe(e){var t=[];return e.forEach(function(n){var r=n||{},a=r.key,o=r.dataIndex;t.push(a!==void 0?a:o)}),t}function pIe(e){return e!=null}var vIe={name:"ColGroup",inheritAttrs:!1,props:{fixed:u.string,columns:u.array},setup:function(){return{table:ve("table",{}),store:ve("table-store",function(){return{}})}},render:function(){var t=this.fixed,n=this.table,r=n.prefixCls,a=n.expandIconAsCell,o=n.onColumnResize,i=[];a&&t!=="right"&&i.push(g("col",{class:"".concat(r,"-expand-icon-col"),key:"rc-table-expand-icon-col"},null));var l,s=this.store.columnManager;return t==="left"?l=s.leftLeafColumns:t==="right"?l=s.rightLeafColumns:l=s.leafColumns,i=i.concat(l.map(function(c){var d=c.key,f=c.dataIndex,p=c.width,v=c[kR],m=d!==void 0?d:f,y=typeof p=="number"?"".concat(p,"px"):p;return g(zo,{onResize:function(C){var S=C.offsetWidth;o(m,S)}},{default:function(){return[g("col",le({"data-key":m,key:m,style:{width:y,minWidth:y}},v),null)]}})})),g("colgroup",null,[i])}};function $R(e,t,n,r,a){var o=n[e]||{},i=n[t]||{},l,s;o.fixed==="left"?l=r.left[e]:i.fixed==="right"&&(s=r.right[t]);var c=!1,d=!1,f=!1,p=!1,v=n[t+1],m=n[e-1];if(a==="rtl"){if(l!==void 0){var y=m&&m.fixed==="left";p=!y}else if(s!==void 0){var b=v&&v.fixed==="right";f=!b}}else if(l!==void 0){var C=v&&v.fixed==="left";c=!C}else if(s!==void 0){var S=m&&m.fixed==="right";d=!S}return{fixLeft:l,fixRight:s,lastFixLeft:c,firstFixRight:d,lastFixRight:f,firstFixLeft:p,isSticky:r.isSticky}}var mIe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},gIe={name:"TableHeaderRow",inheritAttrs:!1,props:{index:u.number,fixed:u.string,columns:u.array,rows:u.array,row:u.array,components:u.object,customHeaderRow:u.func,prefixCls:u.prefixCls},setup:function(t){var n=ve("table-store",function(){return{}});return{height:x(function(){var r=n.fixedColumnsHeadRowsHeight,a=t.columns,o=t.rows,i=t.fixed,l=r[0];return i&&l&&a?l==="auto"?"auto":"".concat(l/o.length,"px"):null}),store:n}},render:function(){var t=this.row,n=this.index,r=this.height,a=this.components,o=this.customHeaderRow,i=this.prefixCls,l=a.header.row,s=a.header.cell,c=o(t.map(function(y){return y.column}),n),d=c?c.style:{},f=P({height:r},d);f.height===null&&delete f.height;var p=this.store,v=p.stickyOffsets,m=p.columnManager;return g(l,le(le({},c),{},{style:f}),{default:function(){return[t.map(function(b,C){var S,w=b.column,k=b.isLast,$=b.children;b.className;var O=mIe(b,["column","isLast","children","className"]),T=$R(b.colStart,b.colEnd,m.leafColumns,v),_=w.customHeaderCell?w.customHeaderCell(w):{},I=P(P(P({},O),_),{key:w.key||w.dataIndex||C});if(I.colSpan===0)return null;w.align&&(I.style=P(P({},_.style),{textAlign:w.align}));var L=T.fixLeft,j=T.fixRight,F=T.firstFixLeft,N=T.lastFixLeft,D=T.firstFixRight,z=T.lastFixRight,B={},M=typeof L=="number",E=typeof j=="number";return M&&(B.position="sticky",B.left="".concat(L,"px")),E&&(B.position="sticky",B.right="".concat(j,"px")),I.class=Se(_.class,_.className,w.class,w.className,(S={},V(S,"".concat(i,"-align-").concat(w.align),!!w.align),V(S,"".concat(i,"-row-cell-ellipsis"),!!w.ellipsis),V(S,"".concat(i,"-row-cell-break-word"),!!w.width),V(S,"".concat(i,"-row-cell-last"),k),V(S,"".concat(i,"-cell-fix-left"),M),V(S,"".concat(i,"-cell-fix-left-first"),F),V(S,"".concat(i,"-cell-fix-left-last"),N),V(S,"".concat(i,"-cell-fix-right"),E),V(S,"".concat(i,"-cell-fix-right-first"),D),V(S,"".concat(i,"-cell-fix-right-last"),z),S)),I.style=P(P({},I.style||{}),B),typeof s=="function"?s(I,$):g(s,I,{default:function(){return[$]}})})]}})}},yIe=gIe;function bIe(e){var t=[];function n(i,l){var s=arguments.length>2&&arguments[2]!==void 0?arguments[2]:0;t[s]=t[s]||[];var c=l,d=i.filter(Boolean).map(function(f){var p={key:f.key,className:f.className||f.class||"",children:f.title,column:f,colStart:c},v=1,m=f.children;return m&&m.length>0&&(v=n(m,c,s+1).reduce(function(y,b){return y+b},0),p.hasSubColumns=!0),"colSpan"in f&&(v=f.colSpan),"rowSpan"in f&&(p.rowSpan=f.rowSpan),p.colSpan=v,p.colEnd=p.colStart+v-1,t[s].push(p),c+=v,v});return d}n(e,0);for(var r=t.length,a=function(l){t[l].forEach(function(s){!("rowSpan"in s)&&!s.hasSubColumns&&(s.rowSpan=r-l)})},o=0;o<r;o+=1)a(o);return t}var CIe={name:"TableHeader",inheritAttrs:!1,props:{fixed:u.string,columns:u.array.isRequired,expander:u.object.isRequired},setup:function(){return{table:ve("table",{})}},render:function(){var t=this.table,n=t.sComponents,r=t.prefixCls,a=t.showHeader,o=t.customHeaderRow,i=this.expander,l=this.columns,s=this.fixed;if(!a)return null;var c=bIe(this.columns);i.renderExpandIndentCell(c,s);var d=n.header.wrapper;return g(d,{class:"".concat(r,"-thead")},{default:function(){return[c.map(function(p,v){return g(yIe,{prefixCls:r,key:v,index:v,fixed:s,columns:l,rows:c,row:p,components:n,customHeaderRow:o},null)})]}})}};function Rx(e){return e&&!zn(e)&&Object.prototype.toString.call(e)==="[object Object]"}var wIe={name:"TableCell",inheritAttrs:!1,props:{record:u.object,prefixCls:u.string,index:u.number,indent:u.number,indentSize:u.number,column:u.object,expandIcon:u.any,component:u.any,colIndex:u.number},setup:function(){return{table:ve("table",{}),store:ve("table-store",{})}},methods:{handleClick:function(t){var n=this.record,r=this.column.onCellClick;r&&r(n,t)}},render:function(){var t,n=this.record,r=this.indentSize,a=this.prefixCls,o=this.indent,i=this.index,l=this.expandIcon,s=this.column,c=this.component,d=this.store.fixedInfoList||[],f=d[this.colIndex]||{},p=f.fixLeft,v=f.fixRight,m=f.firstFixLeft,y=f.lastFixLeft,b=f.firstFixRight,C=f.lastFixRight,S={},w=typeof p=="number",k=typeof v=="number";w&&(S.position="sticky",S.left="".concat(p,"px")),k&&(S.position="sticky",S.right="".concat(v,"px"));var $=s.dataIndex,O=s.customRender,T=s.className,_=T===void 0?"":T,I=this.table,L=I.transformCellText,j=I.prefixCls,F;typeof $=="number"?F=vn(n,$):!$||$.length===0?F=n:F=vn(n,$);var N={onClick:this.handleClick},D,z;if(O&&(F=O({text:F,record:n,index:i,column:s}),Rx(F))){N=F.props||F.attrs||N;var B=N;D=B.colSpan,z=B.rowSpan,F=F.children}s.customCell&&(N=P(P({},N),s.customCell(n,i))),Rx(F)&&(F=null),L&&(F=L({text:F,column:s,record:n,index:i}));var M=l?g("span",{style:{paddingLeft:"".concat(r*o,"px")},class:"".concat(a,"-indent indent-level-").concat(o)},null):null;if(z===0||D===0)return null;s.align&&(N.style=P({textAlign:s.align},N.style));var E=Se(_,s.class,(t={},V(t,"".concat(a,"-cell-ellipsis"),!!s.ellipsis),V(t,"".concat(a,"-cell-break-word"),!!s.width),V(t,"".concat(j,"-cell-fix-left"),w),V(t,"".concat(j,"-cell-fix-left-first"),m),V(t,"".concat(j,"-cell-fix-left-last"),y),V(t,"".concat(j,"-cell-fix-right"),k),V(t,"".concat(j,"-cell-fix-right-first"),b),V(t,"".concat(j,"-cell-fix-right-last"),C),t));return s.ellipsis&&typeof F=="string"&&(N.title=F),g(c,le(le({class:E},N),{},{style:P(P({},N.style||{}),S)}),{default:function(){return[M,l,Gt(F)]}})}},SIe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function Zs(){}var kIe={name:"TableRow",inheritAttrs:!1,mixins:[nt],props:An({customRow:u.func,record:u.object,prefixCls:u.string,columns:u.array,index:u.number,rowKey:u.oneOfType([u.string,u.number]).isRequired,className:u.string,indent:u.number,indentSize:u.number,hasExpandIcon:u.func,fixed:an(u.oneOfType([u.string,u.looseBool])),renderExpandIcon:u.func,renderExpandIconCell:u.func,components:u.any,expandedRow:u.looseBool,isAnyColumnsFixed:u.looseBool,ancestorKeys:u.array.isRequired,expandIconColumnIndex:u.number,expandRowByClick:u.looseBool},{hasExpandIcon:function(){},renderExpandIcon:function(){},renderExpandIconCell:function(){}}),setup:function(t){var n=ve("table-store",function(){return{}}),r=x(function(){var i=n.expandedRowKeys,l=t.ancestorKeys;return!!(l.length===0||l.every(function(s){return i.includes(s)}))}),a=x(function(){var i=n.expandedRowsHeight,l=n.fixedColumnsBodyRowsHeight,s=t.fixed,c=t.rowKey;return s?i[c]?i[c]:l[c]?l[c]:null:null}),o=x(function(){var i=n.currentHoverKey,l=t.rowKey;return i===l});return{store:n,visible:r,hovered:o,height:a}},data:function(){return this.rowRef=null,{shouldRender:this.visible}},mounted:function(){var t=this;this.shouldRender&&this.$nextTick(function(){t.saveRowRef()})},watch:{visible:function(t){t&&(this.shouldRender=!0)}},updated:function(){var t=this;this.shouldRender&&!this.rowRef&&this.$nextTick(function(){t.saveRowRef()})},methods:{onRowClick:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:Zs,r=this.record,a=this.index;this.__emit("rowClick",r,a,t),n(t)},onRowDoubleClick:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:Zs,r=this.record,a=this.index;this.__emit("rowDoubleClick",r,a,t),n(t)},onContextMenu:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:Zs,r=this.record,a=this.index;this.__emit("rowContextmenu",r,a,t),n(t)},onMouseEnter:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:Zs,r=this.record,a=this.index,o=this.rowKey;this.__emit("hover",!0,o),this.__emit("rowMouseenter",r,a,t),n(t)},onMouseLeave:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:Zs,r=this.record,a=this.index,o=this.rowKey;this.__emit("hover",!1,o),this.__emit("rowMouseleave",r,a,t),n(t)},setExpandedRowHeight:function(){var t=this.store,n=this.rowKey,r=t.expandedRowsHeight,a=this.rowRef.getBoundingClientRect().height;r=P(P({},r),V({},n,a)),t.expandedRowsHeight=r},setRowHeight:function(){var t=this.store,n=this.rowKey,r=t.fixedColumnsBodyRowsHeight,a=this.rowRef.getBoundingClientRect().height;t.fixedColumnsBodyRowsHeight=P(P({},r),V({},n,a))},getStyle:function(){var t=this.height,n=this.visible,r=this.$attrs.style||{};return t&&(r=P(P({},r),{height:t})),!n&&!r.display&&(r=P(P({},r),{display:"none"})),r},saveRowRef:function(){this.rowRef=Sn(this);var t=this.isAnyColumnsFixed,n=this.fixed,r=this.expandedRow,a=this.ancestorKeys;!t||(!n&&r&&this.setExpandedRowHeight(),!n&&a.length>=0&&this.setRowHeight())}},render:function(){var t=this;if(!this.shouldRender)return null;var n=this.prefixCls,r=this.columns,a=this.record,o=this.rowKey,i=this.index,l=this.customRow,s=l===void 0?Zs:l,c=this.indent,d=this.indentSize,f=this.hovered,p=this.height,v=this.visible,m=this.components,y=this.hasExpandIcon,b=this.renderExpandIcon,C=this.renderExpandIconCell,S=m.body.row,w=m.body.cell,k=this.$attrs.class||"";f&&(k+=" ".concat(n,"-hover"));var $=[];C($);for(var O=0;O<r.length;O+=1){var T=r[O];on(T.onCellClick===void 0,"column[onCellClick] is deprecated, please use column[customCell] instead."),$.push(g(wIe,{prefixCls:n,record:a,indentSize:d,indent:c,index:i,colIndex:O,column:T,key:T.key||T.dataIndex,expandIcon:y(O)&&b(),component:w},null))}var _=s(a,i)||{},I=_.class,L=_.className,j=_.style,F=SIe(_,["class","className","style"]),N={height:typeof p=="number"?"".concat(p,"px"):p};v||(N.display="none"),N=P(P({},N),j);var D=Se(n,k,"".concat(n,"-level-").concat(c),L,I),z=P(P({},F),{style:N,onClick:function(M){t.onRowClick(M,F.onClick)},onDblclick:function(M){t.onRowDoubleClick(M,F.onDblclick)},onMouseenter:function(M){t.onMouseEnter(M,F.onMouseenter)},onMouseleave:function(M){t.onMouseLeave(M,F.onMouseleave)},onContextmenu:function(M){t.onContextMenu(M,F.onContextmenu)},class:D,"data-row-key":o});return g(S,z,{default:function(){return[$]}})}},OR=kIe,$Ie={name:"ExpandIcon",mixins:[nt],inheritAttrs:!1,props:{record:u.object,prefixCls:u.string,expandable:u.any,expanded:u.looseBool,needIndentSpaced:u.looseBool},methods:{onExpand:function(t){this.__emit("expand",this.record,t)}},render:function(){var t=this.expandable,n=this.prefixCls,r=this.onExpand,a=this.needIndentSpaced,o=this.expanded;if(t){var i=o?"expanded":"collapsed";return g("span",{class:"".concat(n,"-expand-icon ").concat(n,"-").concat(i),onClick:r},null)}return a?g("span",{class:"".concat(n,"-expand-icon ").concat(n,"-spaced")},null):null}},OIe={mixins:[nt],name:"ExpandableRow",inheritAttrs:!1,props:{prefixCls:u.string.isRequired,rowKey:u.oneOfType([u.string,u.number]).isRequired,fixed:an(u.oneOfType([u.string,u.looseBool])),record:u.oneOfType([u.object,u.array]).isRequired,indentSize:u.number,needIndentSpaced:u.looseBool.isRequired,expandRowByClick:u.looseBool,expandIconAsCell:u.looseBool,expandIconColumnIndex:u.number,childrenColumnName:u.string,expandedRowRender:u.func,expandIcon:u.func},setup:function(t){var n=ve("table-store",function(){return{}});return{expanded:x(function(){return n.expandedRowKeys.includes(t.rowKey)})}},beforeUnmount:function(){this.handleDestroy()},methods:{hasExpandIcon:function(t){var n=this.$props,r=n.expandRowByClick,a=n.expandIcon;return this.tempExpandIconAsCell||t!==this.tempExpandIconColumnIndex?!1:!!a||!r},handleExpandChange:function(t,n){var r=this.expanded,a=this.rowKey;this.__emit("expandedChange",!r,t,n,a)},handleDestroy:function(){var t=this.rowKey,n=this.record;this.__emit("expandedChange",!1,n,null,t,!0)},handleRowClick:function(t,n,r){var a=this.expandRowByClick;a&&this.handleExpandChange(t,r),this.__emit("rowClick",t,n,r)},renderExpandIcon:function(){var t=this.prefixCls,n=this.expanded,r=this.record,a=this.needIndentSpaced,o=this.expandIcon;return o?o({prefixCls:t,expanded:n,record:r,needIndentSpaced:a,expandable:this.expandable,onExpand:this.handleExpandChange}):g($Ie,{expandable:this.expandable,prefixCls:t,onExpand:this.handleExpandChange,needIndentSpaced:a,expanded:n,record:r},null)},renderExpandIconCell:function(t){if(!!this.tempExpandIconAsCell){var n=this.prefixCls;t.push(g("td",{class:"".concat(n,"-expand-icon-cell"),key:"rc-table-expand-icon-cell"},[this.renderExpandIcon()]))}}},render:function(){var t=this.childrenColumnName,n=this.expandedRowRender,r=this.indentSize,a=this.record,o=this.fixed,i=this.expanded;this.tempExpandIconAsCell=o!=="right"?this.expandIconAsCell:!1,this.tempExpandIconColumnIndex=o!=="right"?this.expandIconColumnIndex:-1;var l=a[t];this.expandable=!!(l||n);var s={indentSize:r,expanded:i,hasExpandIcon:this.hasExpandIcon,renderExpandIcon:this.renderExpandIcon,renderExpandIconCell:this.renderExpandIconCell,onRowClick:this.handleRowClick};return ht(this,"default",s)}},PIe=OIe;function Js(){}var TIe={name:"BaseTable",inheritAttrs:!1,props:{fixed:an(u.oneOfType([u.string,u.looseBool])),columns:u.array.isRequired,tableClassName:u.string.isRequired,hasHead:u.looseBool.isRequired,hasBody:u.looseBool.isRequired,expander:u.object.isRequired,getRowKey:u.func,isAnyColumnsFixed:u.looseBool},setup:function(){return{table:ve("table",function(){return{}}),store:ve("table-store",function(){return{}})}},methods:{getColumns:function(t){var n=this.$props.columns,r=n===void 0?[]:n;return(t||r).map(function(a){return P(P({},a),{className:Se(a.className,a.class)})})},handleRowHover:function(t,n){this.store.currentHoverKey=t?n:null},renderRows:function(t,n){for(var r=this,a=arguments.length>2&&arguments[2]!==void 0?arguments[2]:[],o=P(P(P({},this.table.$attrs),this.table.$props),this.table.$data),i=o.sComponents,l=o.prefixCls,s=o.childrenColumnName,c=o.rowClassName,d=o.customRow,f=d===void 0?Js:d,p=o.onRowClick,v=p===void 0?Js:p,m=o.onRowDoubleClick,y=m===void 0?Js:m,b=o.onRowContextMenu,C=b===void 0?Js:b,S=o.onRowMouseEnter,w=S===void 0?Js:S,k=o.onRowMouseLeave,$=k===void 0?Js:k,O=o.rowRef,T=this.store.columnManager,_=this.getRowKey,I=this.fixed,L=this.expander,j=this.isAnyColumnsFixed,F=[],N=function(B){var M=t[B],E=_(M,B),K=typeof c=="string"?c:c(M,B,n),W={};T.isAnyColumnsFixed&&(W.onHover=r.handleRowHover);var Y=void 0;I==="left"?Y=T.leftLeafColumns:I==="right"?Y=T.rightLeafColumns:Y=r.getColumns(T.leafColumns);var q="".concat(l,"-row"),J=P(P({},L.props),{fixed:I,index:B,prefixCls:q,record:M,rowKey:E,needIndentSpaced:L.needIndentSpaced,key:E,onRowClick:v,onExpandedChange:L.handleExpandChange}),ne=g(PIe,J,{default:function(Q){var ae=P(P(P({fixed:I,indent:n,record:M,index:B,prefixCls:q,childrenColumnName:s,columns:Y,rowKey:E,ancestorKeys:a,components:i,isAnyColumnsFixed:j,customRow:f,onRowDoubleClick:y,onRowContextMenu:C,onRowMouseEnter:w,onRowMouseLeave:$},W),{class:K,ref:O(M,B,n)}),Q);return g(OR,ae,null)}});F.push(ne),L.renderRows(r.renderRows,F,M,B,n,I,E,a)},D=0;D<t.length;D+=1)N(D);return F}},render:function(){var t=this,n=this.table,r=n.sComponents,a=n.prefixCls,o=n.scroll,i=n.data,l=this.$props,s=l.expander,c=l.tableClassName,d=l.hasHead,f=l.hasBody,p=l.fixed,v=this.getColumns(),m={};if(!p&&o.x&&(m.width=o.x===!0?"auto":o.x,m.width=typeof m.width=="number"?"".concat(m.width,"px"):m.width),p){var y=v.reduce(function(w,k){var $=k.width;return w+parseFloat($,10)},0);y>0&&(m.width=y+"px")}var b=f?r.table:"table",C=r.body.wrapper,S;return f&&(S=g(C,{class:"".concat(a,"-tbody")},{default:function(){return[t.renderRows(i,0)]}})),g(b,{class:c,style:m,key:"table"},{default:function(){return[g(vIe,{columns:v,fixed:p},null),d&&g(CIe,{expander:s,columns:v,fixed:p},null),S]}})}},PR=TIe,xIe={name:"HeadTable",inheritAttrs:!1,props:{fixed:an(u.oneOfType([u.string,u.looseBool])),columns:u.array.isRequired,tableClassName:u.string.isRequired,handleBodyScrollLeft:u.func.isRequired,expander:u.object.isRequired},setup:function(){return{table:ve("table",{})}},render:function(){var t=this.columns,n=this.fixed,r=this.tableClassName,a=this.handleBodyScrollLeft,o=this.expander,i=this.table,l=i.prefixCls,s=i.scroll,c=i.showHeader,d=i.saveRef,f=i.useFixedHeader,p={},v=Dx({direction:"vertical"});if(s.y){f=!0;var m=Dx({direction:"horizontal",prefixCls:l});m>0&&!n&&(p.marginBottom="-".concat(m,"px"),p.paddingBottom="0px",p.minWidth="".concat(v,"px"),p.overflowX="scroll",p.overflowY=v===0?"hidden":"scroll")}return!f||!c?null:g("div",{key:"headTable",ref:n?function(){}:d("headTable"),class:Se("".concat(l,"-header"),V({},"".concat(l,"-hide-scrollbar"),v>0)),style:p,onScroll:a},[g(PR,{tableClassName:r,hasHead:!0,hasBody:!1,fixed:n,columns:t,expander:o},null)])}},_Ie={name:"BodyTable",inheritAttrs:!1,props:{columns:u.array.isRequired,tableClassName:u.string.isRequired,handleBodyScroll:u.func.isRequired,handleWheel:u.func.isRequired,getRowKey:u.func.isRequired,expander:u.object.isRequired,isAnyColumnsFixed:u.looseBool},setup:function(){return{table:ve("table",{})}},render:function(){var t=this.table,n=t.prefixCls,r=t.scroll,a=this.columns,o=this.tableClassName,i=this.getRowKey,l=this.handleBodyScroll,s=this.handleWheel,c=this.expander,d=this.isAnyColumnsFixed,f=this.table,p=f.useFixedHeader,v=f.saveRef,m=P({},this.table.bodyStyle);if(r.y){var y=m.maxHeight||r.y;y=typeof y=="number"?"".concat(y,"px"):y,m.maxHeight=y,m.overflowY=m.overflowY||"scroll",p=!0}r.x&&(m.overflowX=m.overflowX||"auto",m.WebkitTransform="translate3d (0, 0, 0)",r.y||(m.overflowY="hidden"));var b=g(PR,{tableClassName:o,hasHead:!p,hasBody:!0,columns:a,expander:c,getRowKey:i,isAnyColumnsFixed:d},null),C=r&&(r.x||r.y);return g("div",{tabindex:C?-1:void 0,key:"bodyTable",class:"".concat(n,"-body"),style:m,ref:v("bodyTable"),onWheel:s,onScroll:l},[b])}},EIe=function(){return{expandIconAsCell:u.looseBool,expandRowByClick:u.looseBool,expandedRowKeys:u.array,expandedRowClassName:u.func,defaultExpandAllRows:u.looseBool,defaultExpandedRowKeys:u.array,expandIconColumnIndex:u.number,expandedRowRender:u.func,expandIcon:u.func,childrenColumnName:u.string,indentSize:u.number,columnManager:u.object.isRequired,prefixCls:u.string.isRequired,data:u.array,getRowKey:u.func}},MIe={name:"ExpandableTable",inheritAttrs:!1,mixins:[nt],props:An(EIe(),{expandIconAsCell:!1,expandedRowClassName:function(){return""},expandIconColumnIndex:0,defaultExpandAllRows:!1,defaultExpandedRowKeys:[],childrenColumnName:"children",indentSize:15}),setup:function(t){var n=ve("table-store",function(){return{}}),r=t.data,a=t.childrenColumnName,o=t.defaultExpandAllRows,i=t.expandedRowKeys,l=t.defaultExpandedRowKeys,s=t.getRowKey,c=[],d=Je(r);if(o)for(var f=0;f<d.length;f+=1){var p=d[f];c.push(s(p,f)),d=d.concat(p[a]||[])}else c=i||l;return P(n,{expandedRowsHeight:{},expandedRowKeys:c}),{store:n}},mounted:function(){this.handleUpdated()},updated:function(){this.handleUpdated()},watch:{expandedRowKeys:function(t){var n=this;this.$nextTick(function(){n.store.expandedRowKeys=t})}},methods:{handleUpdated:function(){this.latestExpandedRows=null},handleExpandChange:function(t,n,r,a){var o=arguments.length>4&&arguments[4]!==void 0?arguments[4]:!1;r&&(r.preventDefault(),r.stopPropagation());var i=this.store.expandedRowKeys;if(t)i=[].concat(Je(i),[a]);else{var l=i.indexOf(a);l!==-1&&(i=dIe(i,a))}this.expandedRowKeys||(this.store.expandedRowKeys=i),(!this.latestExpandedRows||!Mr(this.latestExpandedRows,i))&&(this.latestExpandedRows=i,this.__emit("expandedRowsChange",i)),o||this.__emit("expand",t,n)},renderExpandIndentCell:function(t,n){var r=this.prefixCls,a=this.expandIconAsCell;if(!(!a||n==="right"||!t.length)){var o={key:"rc-table-expand-icon-cell",className:"".concat(r,"-expand-icon-th"),title:"",rowSpan:t.length};t[0].unshift(P(P({},o),{column:o}))}},renderExpandedRow:function(t,n,r,a,o,i,l){var s=this,c=this.prefixCls,d=this.expandIconAsCell,f=this.indentSize,p=o[o.length-1],v="".concat(p,"-extra-row"),m={body:{row:"tr",cell:"td"}},y;l==="left"?y=this.columnManager.leftLeafColumns.value.length:l==="right"?y=this.columnManager.rightLeafColumns.value.length:y=this.columnManager.leafColumns.value.length;var b=[{key:"extra-row",customRender:function(){var S=s.store.expandedRowKeys,w=S.includes(p);return{props:{colSpan:y},children:l!=="right"?r({record:t,index:n,indent:i,expanded:w}):"&nbsp;"}}}];return d&&l!=="right"&&b.unshift({key:"expand-icon-placeholder",customRender:function(){return null}}),g(OR,{key:v,columns:b,class:a,rowKey:v,ancestorKeys:o,prefixCls:"".concat(c,"-expanded-row"),indentSize:f,indent:i,fixed:l,components:m,expandedRow:!0,hasExpandIcon:function(){}},null)},renderRows:function(t,n,r,a,o,i,l,s){var c=this.expandedRowClassName,d=this.expandedRowRender,f=this.childrenColumnName,p=r[f],v=[].concat(Je(s),[l]),m=o+1;d&&n.push(this.renderExpandedRow(r,a,d,c(r,a,o),v,m,i)),p&&n.push.apply(n,Je(t(p,m,v)))}},render:function(){var t=this.data,n=this.childrenColumnName,r=Qe(this),a=t.some(function(o){return o[n]});return ht(this,"default",{props:P(P({},r),this.$attrs),needIndentSpaced:a,renderRows:this.renderRows,handleExpandChange:this.handleExpandChange,renderExpandIndentCell:this.renderExpandIndentCell})}},IIe=MIe;function NIe(e){var t=H(e),n=t.value,r=[],a=H();function o(i){en.cancel(a.value),r.push(i),a.value=en(function(){var l=r;r=[],l.forEach(function(s){n=s(n)}),t.value=n})}return Lt(function(){en.cancel(a.value)}),[t,o]}function AIe(e){var t=function f(p){var v=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1,m=[];return p.forEach(function(y){y.fixed=v||y.fixed,y.children?m.push.apply(m,Je(f(y.children,y.fixed))):m.push(y)}),m},n=x(function(){var f=function p(v){var m=arguments.length>1&&arguments[1]!==void 0?arguments[1]:0,y=arguments.length>2&&arguments[2]!==void 0?arguments[2]:{},b=arguments.length>3&&arguments[3]!==void 0?arguments[3]:[],C=arguments.length>4&&arguments[4]!==void 0?arguments[4]:!1;b[m]=b[m]||[];var S=[],w=function($){var O=b.length-m;$&&!$.children&&O>1&&(!$.rowSpan||$.rowSpan<O)&&($.rowSpan=O)};return v.forEach(function(k,$){var O=P({},k);O.fixed=C||k.fixed,b[m].push(O),y.colSpan=y.colSpan||0,O.children&&O.children.length>0?(O.children=p(O.children,m+1,O,b,O.fixed),y.colSpan+=O.colSpan):y.colSpan+=1;for(var T=0;T<b[m].length-1;T+=1)w(b[m][T]);$+1===v.length&&w(O),S.push(O)}),S};return f(e.value)}),r=x(function(){return e.value.some(function(f){return!!f.fixed})}),a=x(function(){return e.value.some(function(f){return f.fixed==="left"||f.fixed===!0})}),o=x(function(){return e.value.some(function(f){return f.fixed==="right"})}),i=x(function(){return n.value.filter(function(f){return f.fixed==="left"||f.fixed===!0})}),l=x(function(){return n.value.filter(function(f){return f.fixed==="right"})}),s=x(function(){return t(e.value)}),c=x(function(){return t(i.value)}),d=x(function(){return t(l.value)});return{groupedColumns:n,isAnyColumnsFixed:r,isAnyColumnsLeftFixed:a,isAnyColumnsRightFixed:o,leftColumns:i,rightColumns:l,leafColumns:s,leftLeafColumns:c,rightLeafColumns:d}}function DIe(e,t){var n=H({left:[],right:[]}),r=H();return ce(t,function(){r.value=t.value.length},{immediate:!0}),ce([e,r],function(){for(var a=[],o=[],i=0,l=0,s=0;s<r.value;s+=1){a[s]=i,i+=e.value[s]||0;var c=r.value-s-1;o[c]=l,l+=e.value[c]||0}n.value={left:a,right:o}}),n}var RIe=G({name:"Table",mixins:[nt],inheritAttrs:!1,props:An({data:u.array,useFixedHeader:u.looseBool,columns:u.array,prefixCls:u.string,bodyStyle:u.object,rowKey:u.oneOfType([u.string,u.func]),rowClassName:u.oneOfType([u.string,u.func]),customRow:u.func,customHeaderRow:u.func,showHeader:u.looseBool,title:u.func,id:u.string,footer:u.func,emptyText:u.any,scroll:u.object,rowRef:u.func,components:u.shape({table:u.any,header:u.shape({wrapper:u.any,row:u.any,cell:u.any}).loose,body:u.shape({wrapper:u.any,row:u.any,cell:u.any}).loose}).loose,expandIconAsCell:u.looseBool,expandedRowKeys:u.array,expandedRowClassName:u.func,defaultExpandAllRows:u.looseBool,defaultExpandedRowKeys:u.array,expandIconColumnIndex:u.number,expandedRowRender:u.func,childrenColumnName:u.string,indentSize:u.number,expandRowByClick:u.looseBool,expandIcon:u.func,tableLayout:u.string,transformCellText:u.func},{data:[],useFixedHeader:!1,rowKey:"key",rowClassName:function(){return""},prefixCls:"rc-table",bodyStyle:{},showHeader:!0,scroll:{},rowRef:function(){return null},emptyText:function(){return"No Data"},customHeaderRow:function(){}}),setup:function(t){var n=AIe(yn(t,"columns")),r=x(function(){return hIe(n.leafColumns.value)}),a=NIe(new Map),o=fn(a,2),i=o[0],l=o[1],s=x(function(){return r.value.map(function(w){return i.value.get(w)})}),c=DIe(s,n.leafColumns),d=function(k,$){l(function(O){if(O.get(k)!==$){var T=new Map(O);return T.set(k,$),T}return O})},f=x(function(){return n.leafColumns.value.map(function(w,k){return $R(k,k,n.leafColumns.value,c.value)})}),p=bt({currentHoverKey:null,fixedColumnsHeadRowsHeight:[],fixedColumnsBodyRowsHeight:{},expandedRowsHeight:{},expandedRowKeys:[],columnManager:n,fixedInfoList:f,stickyOffsets:c});ot("table-store",p);var v=H(),m=H(!1),y=H(!1),b=x(function(){return t.scroll&&pIe(t.scroll.x)}),C=function(k){var $=k.scrollWidth,O=k.clientWidth,T=k.scrollLeft;m.value=T>0,y.value=T<$-O};ur(function(){Ne(function(){b.value&&C(v.value.$el)})}),et(function(){Ne(function(){b.value&&C(v.value.$el)})});var S=function(){b.value&&C(v.value.$el)};return{bodyRef:v,store:p,onColumnResize:d,columnManager:n,onScroll:C,pingedLeft:m,pingedRight:y,onFullTableResize:S}},data:function(){return this.preData=Je(this.data),{sComponents:ps(L$({table:"table",header:{wrapper:"thead",row:"tr",cell:"th"},body:{wrapper:"tbody",row:"tr",cell:"td"}},this.components))}},computed:{dataLen:function(){return this.$props.data.length}},watch:{components:function(){this._components=L$({table:"table",header:{wrapper:"thead",row:"tr",cell:"th"},body:{wrapper:"tbody",row:"tr",cell:"td"}},this.components)},dataLen:function(t,n){var r=this;(t===0||n===0)&&this.hasScrollX()&&Ne(function(){r.resetScrollX()})}},created:function(){ot("table",this),this.setScrollPosition("left"),this.debouncedWindowResize=cIe(this.handleWindowResize,150)},mounted:function(){var t=this;this.$nextTick(function(){t.columnManager.isAnyColumnsFixed.value&&(t.handleWindowResize(),t.resizeEvent=Kn(window,"resize",t.debouncedWindowResize)),t.ref_headTable&&(t.ref_headTable.scrollLeft=0),t.ref_bodyTable&&(t.ref_bodyTable.scrollLeft=0)})},updated:function(){var t=this;this.$nextTick(function(){t.columnManager.isAnyColumnsFixed.value&&(t.handleWindowResize(),t.resizeEvent||(t.resizeEvent=Kn(window,"resize",t.debouncedWindowResize)))})},beforeUnmount:function(){this.resizeEvent&&this.resizeEvent.remove(),this.debouncedWindowResize&&this.debouncedWindowResize.cancel()},methods:{getRowKey:function(t,n){var r=this.rowKey,a=typeof r=="function"?r(t,n):t[r];return on(a!==void 0,"Each record in table should have a unique `key` prop,or set `rowKey` to an unique primary key."),a===void 0?n:a},setScrollPosition:function(t){if(this.scrollPosition=t,this.tableNode){var n=this.prefixCls;t==="both"?g1(this.tableNode).remove(new RegExp("^".concat(n,"-scroll-position-.+$"))).add("".concat(n,"-scroll-position-left")).add("".concat(n,"-scroll-position-right")):g1(this.tableNode).remove(new RegExp("^".concat(n,"-scroll-position-.+$"))).add("".concat(n,"-scroll-position-").concat(t))}},setScrollPositionClassName:function(){var t=this.ref_bodyTable,n=t.scrollLeft===0,r=t.scrollLeft+1>=t.children[0].getBoundingClientRect().width-t.getBoundingClientRect().width;n&&r?this.setScrollPosition("both"):n?this.setScrollPosition("left"):r?this.setScrollPosition("right"):this.scrollPosition!=="middle"&&this.setScrollPosition("middle")},isTableLayoutFixed:function(){var t=this.$props,n=t.tableLayout,r=t.columns,a=r===void 0?[]:r,o=t.useFixedHeader,i=t.scroll,l=i===void 0?{}:i;return typeof n!="undefined"?n==="fixed":!!(a.some(function(s){var c=s.ellipsis;return!!c})||o||l.y||l.x&&l.x!==!0&&l.x!=="max-content")},handleWindowResize:function(){this.syncFixedTableRowHeight(),this.setScrollPositionClassName()},syncFixedTableRowHeight:function(){var t=this.tableNode.getBoundingClientRect();if(!(t.height!==void 0&&t.height<=0)){var n=this.prefixCls,r=this.ref_headTable?this.ref_headTable.querySelectorAll("thead"):this.ref_bodyTable.querySelectorAll("thead"),a=this.ref_bodyTable.querySelectorAll(".".concat(n,"-row"))||[],o=[].map.call(r,function(s){return s.getBoundingClientRect().height?s.getBoundingClientRect().height-.5:"auto"}),i=this.store,l=[].reduce.call(a,function(s,c){var d=c.getAttribute("data-row-key"),f=c.getBoundingClientRect().height||i.fixedColumnsBodyRowsHeight[d]||"auto";return s[d]=f,s},{});Mr(i.fixedColumnsHeadRowsHeight,o)&&Mr(i.fixedColumnsBodyRowsHeight,l)||(this.store.fixedColumnsHeadRowsHeight=o,this.store.fixedColumnsBodyRowsHeight=l)}},resetScrollX:function(){this.ref_headTable&&(this.ref_headTable.scrollLeft=0),this.ref_bodyTable&&(this.ref_bodyTable.scrollLeft=0)},hasScrollX:function(){var t=this.scroll,n=t===void 0?{}:t;return"x"in n},handleBodyScrollLeft:function(t){var n=t.target,r=this.scroll,a=r===void 0?{}:r,o=this.ref_headTable,i=this.ref_bodyTable;n.scrollLeft!==this.lastScrollLeft&&a.x&&(n===i&&o?o.scrollLeft=n.scrollLeft:n===o&&i&&(i.scrollLeft=n.scrollLeft),this.setScrollPositionClassName()),this.lastScrollLeft=n.scrollLeft},handleBodyScrollTop:function(t){var n=t.target;if(t.currentTarget===n){var r=this.scroll,a=r===void 0?{}:r,o=this.ref_headTable,i=this.ref_bodyTable,l=this.ref_fixedColumnsBodyLeft,s=this.ref_fixedColumnsBodyRight;if(n.scrollTop!==this.lastScrollTop&&a.y&&n!==o){var c=n.scrollTop;l&&n!==l&&(l.scrollTop=c),s&&n!==s&&(s.scrollTop=c),i&&n!==i&&(i.scrollTop=c)}this.lastScrollTop=n.scrollTop}},handleBodyScroll:function(t){this.onScroll(t.target),this.handleBodyScrollLeft(t),this.handleBodyScrollTop(t)},handleWheel:function(t){var n=this.$props.scroll,r=n===void 0?{}:n;if(window.navigator.userAgent.match(/Trident\/7\./)&&r.y){t.preventDefault();var a=t.deltaY,o=t.target,i=this.ref_bodyTable,l=this.ref_fixedColumnsBodyLeft,s=this.ref_fixedColumnsBodyRight,c=0;this.lastScrollTop?c=this.lastScrollTop+a:c=a,l&&o!==l&&(l.scrollTop=c),s&&o!==s&&(s.scrollTop=c),i&&o!==i&&(i.scrollTop=c)}},saveRef:function(t){var n=this;return function(r){n["ref_".concat(t)]=r}},saveTableNodeRef:function(t){this.tableNode=t},renderMainTable:function(){var t=this.scroll,n=this.prefixCls,r=this.columnManager.isAnyColumnsFixed.value,a=r||t.x||t.y,o=[this.renderTable({columns:this.columnManager.groupedColumns.value,isAnyColumnsFixed:r}),this.renderEmptyText(),this.renderFooter()];return a?g(zo,{onResize:this.onFullTableResize},{default:function(){return[g("div",{class:"".concat(n,"-scroll")},[o])]}}):o},renderTable:function(t){var n=t.columns,r=t.isAnyColumnsFixed,a=this.prefixCls,o=this.scroll,i=o===void 0?{}:o,l=i.x?"".concat(a,"-fixed"):"",s=g(xIe,{key:"head",columns:n,tableClassName:l,handleBodyScrollLeft:this.handleBodyScrollLeft,expander:this.expander},null),c=g(_Ie,{key:"body",columns:n,tableClassName:l,getRowKey:this.getRowKey,handleWheel:this.handleWheel,handleBodyScroll:this.handleBodyScroll,expander:this.expander,isAnyColumnsFixed:r,ref:"bodyRef"},null);return[s,c]},renderTitle:function(){var t=this.title,n=this.prefixCls,r=this.data;return t?g("div",{class:"".concat(n,"-title"),key:"title"},[t(r)]):null},renderFooter:function(){var t=this.footer,n=this.prefixCls,r=this.data;return t?g("div",{class:"".concat(n,"-footer"),key:"footer"},[t(r)]):null},renderEmptyText:function(){var t=this.emptyText,n=this.prefixCls,r=this.data;if(r.length)return null;var a="".concat(n,"-placeholder");return g("div",{class:a,key:"emptyText"},[typeof t=="function"?t():t])}},render:function(){var t,n=this,r=P(P({},Qe(this)),this.$attrs),a=this.columnManager,o=this.getRowKey,i=r.prefixCls,l=Se(r.prefixCls,r.class,(t={},V(t,"".concat(i,"-fixed-header"),r.useFixedHeader||r.scroll&&r.scroll.y),V(t,"".concat(i,"-scroll-position-left ").concat(i,"-scroll-position-right"),this.scrollPosition==="both"),V(t,"".concat(i,"-scroll-position-").concat(this.scrollPosition),this.scrollPosition!=="both"),V(t,"".concat(i,"-layout-fixed"),this.isTableLayoutFixed()),V(t,"".concat(i,"-ping-left"),this.pingedLeft),V(t,"".concat(i,"-ping-right"),this.pingedRight),t)),s=fIe(r),c=P(P({},r),{columnManager:a,getRowKey:o});return g(IIe,c,{default:function(f){return n.expander=f,g("div",le({ref:n.saveTableNodeRef,class:l,style:r.style,id:r.id},s),[n.renderTitle(),g("div",{class:"".concat(i,"-content")},[n.renderMainTable()])])}})}}),LIe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M349 838c0 17.7 14.2 32 31.8 32h262.4c17.6 0 31.8-14.3 31.8-32V642H349v196zm531.1-684H143.9c-24.5 0-39.8 26.7-27.5 48l221.3 376h348.8l221.3-376c12.1-21.3-3.2-48-27.7-48z"}}]},name:"filter",theme:"filled"},FIe=LIe;function Lx(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){BIe(e,a,n[a])})}return e}function BIe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var BS=function(t,n){var r=Lx({},t,n.attrs);return g(Et,Lx({},r,{icon:FIe}),null)};BS.displayName="FilterFilled";BS.inheritAttrs=!1;var VIe=BS,TR=function(t,n){var r=n.slots,a;return g("div",{class:t.class,onClick:function(i){return i.stopPropagation()}},[(a=r.default)===null||a===void 0?void 0:a.call(r)])};TR.inheritAttrs=!1;var Fx=TR;$S();var zIe=u.shape({text:u.string,value:u.string,children:u.array}).loose,HIe={title:u.VNodeChild,key:u.oneOfType([u.string,u.number]),dataIndex:u.string,customRender:u.func,customCell:u.func,customHeaderCell:u.func,align:u.oneOf(rt("left","right","center")),ellipsis:u.looseBool,filters:u.arrayOf(zIe),onFilter:{type:Function},filterMultiple:u.looseBool,filterDropdown:u.any,filterDropdownVisible:u.looseBool,onFilterDropdownVisibleChange:{type:Function},sorter:u.oneOfType([u.looseBool,u.func]),defaultSortOrder:u.oneOf(rt("ascend","descend")),colSpan:u.number,width:u.oneOfType([u.string,u.number]),className:u.string,fixed:an(u.oneOfType([u.looseBool,u.oneOf(rt("left","right"))])),filterIcon:u.any,filteredValue:u.array,filtered:u.looseBool,defaultFilteredValue:u.array,sortOrder:an(u.oneOfType([u.looseBool,u.oneOf(rt("ascend","descend"))])),sortDirections:u.array},xR=u.shape({filterTitle:u.string,filterConfirm:u.any,filterReset:u.any,emptyText:u.any,selectAll:u.any,selectInvert:u.any,sortTitle:u.string,expand:u.string,collapse:u.string}).loose,_R=u.oneOf(rt("checkbox","radio")),jIe={type:_R,selectedRowKeys:u.array,getCheckboxProps:u.func,selections:an(u.oneOfType([u.array,u.looseBool])),hideDefaultSelections:u.looseBool,fixed:u.looseBool,columnWidth:u.oneOfType([u.string,u.number]),selectWay:u.oneOf(rt("onSelect","onSelectMultiple","onSelectAll","onSelectInvert")),columnTitle:u.any},KIe=N7(),WIe={prefixCls:u.string,dropdownPrefixCls:u.string,rowSelection:u.oneOfType([u.shape(jIe).loose,Object]),pagination:an(u.oneOfType([u.shape(KIe).loose,u.looseBool])),size:u.oneOf(rt("default","middle","small","large")),dataSource:u.array,components:u.object,columns:{type:Array},rowKey:u.oneOfType([u.string,u.func]),rowClassName:u.func,expandedRowRender:u.any,defaultExpandAllRows:u.looseBool,defaultExpandedRowKeys:u.array,expandedRowKeys:u.array,expandIconAsCell:u.looseBool,expandIconColumnIndex:u.number,expandRowByClick:u.looseBool,loading:u.oneOfType([u.shape(I7()).loose,u.looseBool]),locale:xR,indentSize:u.number,customRow:u.func,customHeaderRow:u.func,useFixedHeader:u.looseBool,bordered:u.looseBool,showHeader:u.looseBool,footer:u.func,title:u.func,scroll:{type:Object},childrenColumnName:u.oneOfType([u.array,u.string]),bodyStyle:u.style,sortDirections:{type:Array},tableLayout:u.string,getPopupContainer:u.func,expandIcon:u.func,transformCellText:u.func,onExpandedRowsChange:u.func,onExpand:u.func,onChange:u.func,onRowClick:u.func},UIe={propsSymbol:u.any,store:u.any,locale:u.any,disabled:u.looseBool,getCheckboxPropsByItem:u.func,getRecordKey:u.func,data:u.array,prefixCls:u.string,hideDefaultSelections:u.looseBool,selections:u.oneOfType([u.array,u.looseBool]),getPopupContainer:u.func,onSelect:u.func},YIe={store:u.any,type:_R,defaultSelection:u.array,rowIndex:u.oneOfType([u.string,u.number]),name:u.string,disabled:u.looseBool,id:u.string},qIe={locale:xR,selectedKeys:u.array,column:u.object,confirmFilter:u.func,prefixCls:u.string,dropdownPrefixCls:u.string,getPopupContainer:u.func,handleFilter:u.func},GIe={methods:{setState:function(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},n=arguments.length>1?arguments[1]:void 0,r=typeof t=="function"?t(this,this.$props):t;if(this.getDerivedStateFromProps){var a=this.getDerivedStateFromProps(Qe(this),P(P({},this),r));if(a===null)return;r=P(P({},r),a||{})}P(this,r),this._.isMounted&&this.$forceUpdate(),Ne(function(){n&&n()})},__emit:function(){var t=[].slice.call(arguments,0),n=t[0];n="on".concat(n[0].toUpperCase()).concat(n.substring(1));var r=this.$props[n]||this.$attrs[n];if(t.length&&r)if(Array.isArray(r))for(var a=0,o=r.length;a<o;a++)r[a].apply(r,Je(t.slice(1)));else r.apply(void 0,Je(t.slice(1)))}}};function Bx(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:"children",n=[],r=function a(o){o.forEach(function(i){if(i[t]){var l=P({},i);delete l[t],n.push(l),i[t].length>0&&a(i[t])}else n.push(i)})};return r(e),n}function Zp(e,t){var n=arguments.length>2&&arguments[2]!==void 0?arguments[2]:"children";return e.map(function(r,a){var o={};return r[n]&&(o[n]=Zp(r[n],t,n)),P(P({},t(r,a)),o)})}function _d(e,t){return e.reduce(function(n,r){if(t(r)&&n.push(r),r.children){var a=_d(r.children,t);n.push.apply(n,Je(a))}return n},[])}function ER(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{};return(e||[]).forEach(function(n){var r=n.value,a=n.children;t[r.toString()]=r,ER(a,t)}),t}function kb(e){e.stopPropagation()}var XIe=G({name:"FilterMenu",mixins:[GIe],inheritAttrs:!1,props:Rn(qIe,{column:{}}),setup:function(t){var n=x(function(){return t.selectedKeys}),r=x(function(){return"filterDropdownVisible"in t.column?t.column.filterDropdownVisible:!1}),a=x(function(){return ER(t.column.filters)}),o=bt({neverShown:!1,sSelectedKeys:n.value,sKeyPathOfSelectedItem:{},sVisible:r.value,sValueKeys:a.value});return ce(n,function(){o.sSelectedKeys=n.value}),ce(r,function(){o.sVisible=r.value}),ce(a,function(){o.sValueKeys=a.value}),o},methods:{getDropdownVisible:function(){return!!this.sVisible},setSelectedKeys:function(t){var n=t.selectedKeys;this.setState({sSelectedKeys:n})},setVisible:function(t){var n=this.column;"filterDropdownVisible"in n||this.setState({sVisible:t}),n.onFilterDropdownVisibleChange&&n.onFilterDropdownVisibleChange(t)},handleClearFilters:function(){this.setState({sSelectedKeys:[]},this.handleConfirm)},handleConfirm:function(){this.setVisible(!1),this.$forceUpdate(),Ne(this.confirmFilter2)},onVisibleChange:function(t){this.setVisible(t);var n=this.$props.column;!t&&!(n.filterDropdown instanceof Function)&&this.confirmFilter2()},handleMenuItemClick:function(t){var n=this.sSelectedKeys;if(!(!t.keyPath||t.keyPath.length<=1)){var r=this.sKeyPathOfSelectedItem;n&&n.indexOf(t.key)>=0?delete r[t.key]:r[t.key]=t.keyPath,this.setState({sKeyPathOfSelectedItem:r})}},hasSubMenu:function(){var t=this.column.filters,n=t===void 0?[]:t;return n.some(function(r){return!!(r.children&&r.children.length>0)})},confirmFilter2:function(){var t=this.$props,n=t.column,r=t.selectedKeys,a=t.confirmFilter,o=this.sSelectedKeys,i=this.sValueKeys,l=n.filterDropdown;Mr(o,r)||a(n,l?o:o.map(function(s){return i[s]}).filter(function(s){return s!==void 0}))},renderMenus:function(t){var n=this,r=this.$props,a=r.dropdownPrefixCls,o=r.prefixCls;return t.map(function(i){if(i.children&&i.children.length>0){var l=n.sKeyPathOfSelectedItem,s=Object.keys(l).some(function(d){return l[d].indexOf(i.value)>=0}),c=Se("".concat(o,"-dropdown-submenu"),V({},"".concat(a,"-submenu-contain-selected"),s));return g(sc,{title:i.text,popupClassName:c,key:i.value},{default:function(){return[n.renderMenus(i.children)]}})}return n.renderMenuItem(i)})},renderFilterIcon:function(){var t,n,r,a=this.column,o=this.locale,i=this.prefixCls,l=this.selectedKeys,s=l&&l.length>0,c=a.filterIcon;typeof c=="function"&&(c=c({filtered:s,column:a}));var d=Se((t={},V(t,"".concat(i,"-selected"),"filtered"in a?a.filtered:s),V(t,"".concat(i,"-open"),this.getDropdownVisible()),t));return c?c.length===1&&zn(c[0])?Ot(c[0],{title:((n=c.props)===null||n===void 0?void 0:n.title)||o.filterTitle,onClick:kb,class:Se("".concat(i,"-icon"),d,(r=c.props)===null||r===void 0?void 0:r.class)}):g("span",{class:Se("".concat(i,"-icon"),d),onClick:kb},[c]):g(VIe,{title:o.filterTitle,class:d,onClick:kb},null)},renderMenuItem:function(t){var n=this.column,r=this.sSelectedKeys,a="filterMultiple"in n?n.filterMultiple:!0,o=a?g(ro,{checked:r&&r.indexOf(t.value)>=0},null):g(_r,{checked:r&&r.indexOf(t.value)>=0},null);return g(fl,{key:t.value},{default:function(){return[o,g("span",null,[t.text])]}})}},render:function(){var t=this,n=this.sSelectedKeys,r=this.column,a=this.locale,o=this.prefixCls,i=this.dropdownPrefixCls,l=this.getPopupContainer,s="filterMultiple"in r?r.filterMultiple:!0,c=Se(V({},"".concat(i,"-menu-without-submenu"),!this.hasSubMenu())),d=r.filterDropdown;d instanceof Function&&(d=d({prefixCls:"".concat(i,"-custom"),setSelectedKeys:function(v){return t.setSelectedKeys({selectedKeys:v})},selectedKeys:n,confirm:this.handleConfirm,clearFilters:this.handleClearFilters,filters:r.filters,visible:this.getDropdownVisible(),column:r}));var f=d?g(Fx,{class:"".concat(o,"-dropdown")},{default:function(){return[d]}}):g(Fx,{class:"".concat(o,"-dropdown")},{default:function(){return[g(jr,{multiple:s,onClick:t.handleMenuItemClick,prefixCls:"".concat(i,"-menu"),class:c,onSelect:t.setSelectedKeys,onDeselect:t.setSelectedKeys,selectedKeys:n,getPopupContainer:l},{default:function(){return[t.renderMenus(r.filters)]}}),g("div",{class:"".concat(o,"-dropdown-btns")},[g("a",{class:"".concat(o,"-dropdown-link confirm"),onClick:t.handleConfirm},[a.filterConfirm]),g("a",{class:"".concat(o,"-dropdown-link clear"),onClick:t.handleClearFilters},[a.filterReset])])]}});return g(hi,{trigger:["click"],placement:"bottomRight",visible:this.getDropdownVisible(),onVisibleChange:this.onVisibleChange,getPopupContainer:l,forceRender:!0,overlay:f},{default:function(){return[t.renderFilterIcon()]}})}}),ZIe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},JIe=G({name:"SelectionBox",mixins:[nt],inheritAttrs:!1,props:YIe,setup:function(t){return{checked:x(function(){var n=t.store,r=t.defaultSelection,a=t.rowIndex,o=!1;return n.selectionDirty?o=n.selectedRowKeys.indexOf(a)>=0:o=n.selectedRowKeys.indexOf(a)>=0||r.indexOf(a)>=0,o})}},render:function(){var t=P(P({},Qe(this)),this.$attrs),n=t.type,r=t.rowIndex,a=ZIe(t,["type","rowIndex"]),o=this.checked,i=P({checked:o},a);return n==="radio"?(i.value=r,g(_r,i,null)):g(ro,i,null)}});function Ql(e){var t=e.store,n=e.getCheckboxPropsByItem,r=e.getRecordKey,a=e.data,o=e.type,i=e.byDefaultChecked;return i?a[o](function(l,s){return n(l,s).defaultChecked}):a[o](function(l,s){return t.selectedRowKeys.indexOf(r(l,s))>=0})}function QIe(e){var t=e.store,n=e.data;if(!n.length)return!1;var r=Ql(P(P({},e),{data:n,type:"some",byDefaultChecked:!1}))&&!Ql(P(P({},e),{data:n,type:"every",byDefaultChecked:!1})),a=Ql(P(P({},e),{data:n,type:"some",byDefaultChecked:!0}))&&!Ql(P(P({},e),{data:n,type:"every",byDefaultChecked:!0}));return t.selectionDirty?r:r||a}function eNe(e){var t=e.store,n=e.data;return n.length?t.selectionDirty?Ql(P(P({},e),{data:n,type:"every",byDefaultChecked:!1})):Ql(P(P({},e),{data:n,type:"every",byDefaultChecked:!1}))||Ql(P(P({},e),{data:n,type:"every",byDefaultChecked:!0})):!1}var tNe=G({name:"SelectionCheckboxAll",mixins:[nt],inheritAttrs:!1,props:UIe,setup:function(t){return{defaultSelections:[],checked:x(function(){return eNe(t)}),indeterminate:x(function(){return QIe(t)})}},created:function(){var t=this.$props;this.defaultSelections=t.hideDefaultSelections?[]:[{key:"all",text:t.locale.selectAll},{key:"invert",text:t.locale.selectInvert}]},methods:{handleSelectAllChange:function(t){var n=t.target.checked;this.$emit("select",n?"all":"removeAll",0,null)},renderMenus:function(t){var n=this;return t.map(function(r,a){return g(jr.Item,{key:r.key||a},{default:function(){return[g("div",{onClick:function(){n.$emit("select",r.key,a,r.onSelect)}},[r.text])]}})})}},render:function(){var t=this,n=this.disabled,r=this.prefixCls,a=this.selections,o=this.getPopupContainer,i=this.checked,l=this.indeterminate,s="".concat(r,"-selection"),c=null;if(a){var d=Array.isArray(a)?this.defaultSelections.concat(a):this.defaultSelections,f=g(jr,{class:"".concat(s,"-menu"),selectedKeys:[]},{default:function(){return[t.renderMenus(d)]}});c=d.length>0?g(hi,{getPopupContainer:o,overlay:f},{default:function(){return[g("div",{class:"".concat(s,"-down")},[g(Rs,null,null)])]}}):null}return g("div",{class:s},[g(ro,{class:Se(V({},"".concat(s,"-select-all-custom"),c)),checked:i,indeterminate:l,disabled:n,onChange:this.handleSelectAllChange},null),c])}}),nNe=G({name:"ATableColumn",props:HIe,render:function(){return null}}),rNe=G({name:"ATableColumnGroup",props:{fixed:an(u.oneOfType([u.looseBool,u.oneOf(rt("left","right"))])),title:u.any},__ANT_TABLE_COLUMN_GROUP:!0,render:function(){return null}}),aNe={store:u.object,rowKey:u.oneOfType([u.string,u.number]),prefixCls:u.string};function oNe(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:"tr",t=G({name:"BodyRow",inheritAttrs:!1,props:aNe,setup:function(r){return{selected:x(function(){var a;return((a=r.store)===null||a===void 0?void 0:a.selectedRowKeys.indexOf(r.rowKey))>=0})}},render:function(){var r,a=this,o=bn(P(P({},this.$props),this.$attrs),["prefixCls","rowKey","store","class"]),i=(r={},V(r,"".concat(this.prefixCls,"-row-selected"),this.selected),V(r,this.$attrs.class,!!this.$attrs.class),r);return g(e,le({class:i},o),{default:function(){return[ht(a)]}})}});return t}var Vx=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function zx(){}function iNe(e){e.stopPropagation()}function Zh(e){return e.rowSelection||{}}function Wa(e,t){return e.key||e.dataIndex||t}function Hx(e,t){return e&&t&&e.key&&e.key===t.key?!0:e===t||Mr(e,t,function(n,r){if(typeof n=="function"&&typeof r=="function")return n===r||n.toString()===r.toString();if(Array.isArray(n)&&Array.isArray(r))return n===r||Mr(n,r)})}var jx={onChange:zx,onShowSizeChange:zx},lNe={},Kx=function(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},n=t&&t.body&&t.body.row;return P(P({},t),{body:P(P({},t.body),{row:oNe(n)})})};function sNe(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{};return e===t||["table","header","body"].every(function(n){return Mr(e[n],t[n])})}function E1(e,t){return _d(t||(e||{}).columns||[],function(n){return typeof n.filteredValue!="undefined"})}function Wx(e,t){var n={};return E1(e,t).forEach(function(r){var a=Wa(r);n[a]=r.filteredValue}),n}function uNe(e,t){return Object.keys(t).length!==Object.keys(e.filters).length?!0:Object.keys(t).some(function(n){return t[n]!==e.filters[n]})}var MR=Rn(WIe,{dataSource:[],useFixedHeader:!1,size:"default",loading:!1,bordered:!1,indentSize:20,locale:{},rowKey:"key",showHeader:!0,sortDirections:["ascend","descend"],childrenColumnName:"children"}),$b=G({name:"Table",mixins:[nt],inheritAttrs:!1,Column:nNe,ColumnGroup:rNe,props:MR,setup:function(t){var n=bt({selectedRowKeys:Zh(t).selectedRowKeys||[],selectionDirty:!1});return{vcTable:null,checkboxPropsCache:{},store:n,configProvider:ve("configProvider",St)}},data:function(){var t=Qe(this);on(!t.expandedRowRender||!("scroll"in t),"`expandedRowRender` and `scroll` are not compatible. Please use one of them at one time.");var n=this.getDefaultSortOrder,r=this.getDefaultFilters,a=this.getDefaultPagination;return P(P({},n(t.columns||[])),{sFilters:r(t.columns),sPagination:a(this.$props),pivot:void 0,sComponents:ps(Kx(this.components)),filterDataCnt:0})},watch:{pagination:{handler:function(t){this.setState(function(n){var r=P(P(P({},jx),n.sPagination),t);return r.current=r.current||1,r.pageSize=r.pageSize||10,{sPagination:t!==!1?r:lNe}})},deep:!0},rowSelection:{handler:function(t,n){if(t&&"selectedRowKeys"in t){this.store.selectedRowKeys=t.selectedRowKeys||[];var r=this.rowSelection;r&&t.getCheckboxProps!==r.getCheckboxProps&&(this.checkboxPropsCache={})}else n&&!t&&(this.store.selectedRowKeys=[])},deep:!0},dataSource:function(){this.store.selectionDirty=!1,this.checkboxPropsCache={}},columns:function(t){var n=E1({columns:t},t);if(n.length>0){var r=Wx({columns:t},t),a=P({},this.sFilters);Object.keys(r).forEach(function(o){a[o]=r[o]}),uNe({filters:this.sFilters},a)&&this.setState({sFilters:a})}},components:{handler:function(t,n){if(!sNe(t,n)){var r=Kx(t);this.setState({sComponents:r})}},deep:!0}},updated:function(){var t=this.columns,n=this.sSortColumn,r=this.sSortOrder;if(this.getSortOrderColumns(t).length>0){var a=this.getSortStateFromColumns(t);(!Hx(a.sSortColumn,n)||a.sSortOrder!==r)&&this.setState(a)}},methods:{setTableRef:function(t){this.vcTable=t},getCheckboxPropsByItem:function(t,n){var r=Zh(this.$props);if(!r.getCheckboxProps)return{};var a=this.getRecordKey(t,n);return this.checkboxPropsCache[a]||(this.checkboxPropsCache[a]=r.getCheckboxProps(t)||{}),this.checkboxPropsCache[a]},getDefaultSelection:function(){var t=this,n=Zh(this.$props);return n.getCheckboxProps?this.getFlatData().filter(function(r,a){return t.getCheckboxPropsByItem(r,a).defaultChecked}).map(function(r,a){return t.getRecordKey(r,a)}):[]},getDefaultPagination:function(t){var n=kt(t.pagination)==="object"?t.pagination:{},r;"current"in n?r=n.current:"defaultCurrent"in n&&(r=n.defaultCurrent);var a;return"pageSize"in n?a=n.pageSize:"defaultPageSize"in n&&(a=n.defaultPageSize),this.hasPagination(t)?P(P(P({},jx),n),{current:r||1,pageSize:a||10}):{}},getSortOrderColumns:function(t){return _d(t||this.columns||[],function(n){return"sortOrder"in n})},getDefaultFilters:function(t){var n=Wx({columns:this.columns},t),r=_d(t||[],function(o){return typeof o.defaultFilteredValue!="undefined"}),a=r.reduce(function(o,i){var l=Wa(i);return o[l]=i.defaultFilteredValue,o},{});return P(P({},a),n)},getDefaultSortOrder:function(t){var n=this.getSortStateFromColumns(t),r=_d(t||[],function(a){return a.defaultSortOrder!=null})[0];return r&&!n.sortColumn?{sSortColumn:r,sSortOrder:r.defaultSortOrder}:n},getSortStateFromColumns:function(t){var n=this.getSortOrderColumns(t).filter(function(r){return r.sortOrder})[0];return n?{sSortColumn:n,sSortOrder:n.sortOrder}:{sSortColumn:null,sSortOrder:null}},getMaxCurrent:function(t){var n=this.sPagination,r=n.current,a=n.pageSize;return(r-1)*a>=t?Math.floor((t-1)/a)+1:r},getRecordKey:function(t,n){var r=this.rowKey,a=typeof r=="function"?r(t,n):t[r];return on(a!==void 0,"Table","Each record in dataSource of table should have a unique `key` prop, or set `rowKey` of Table to an unique primary key, "),a===void 0?n:a},getSorterFn:function(t){var n=t||this.$data,r=n.sSortOrder,a=n.sSortColumn;if(!(!r||!a||typeof a.sorter!="function"))return function(o,i){var l=a.sorter(o,i,r);return l!==0?r==="descend"?-l:l:0}},getCurrentPageData:function(){var t=this.getLocalData();this.filterDataCnt=t.length;var n,r,a=this.sPagination;return this.hasPagination()?(r=a.pageSize,n=this.getMaxCurrent(a.total||t.length)):(r=Number.MAX_VALUE,n=1),(t.length>r||r===Number.MAX_VALUE)&&(t=t.slice((n-1)*r,n*r)),t},getFlatData:function(){var t=this.$props.childrenColumnName;return Bx(this.getLocalData(null,!1),t)},getFlatCurrentPageData:function(){var t=this.$props.childrenColumnName;return Bx(this.getCurrentPageData(),t)},getLocalData:function(t){var n=this,r=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!0,a=t||this.$data,o=a.sFilters,i=this.$props.dataSource,l=i||[];l=l.slice(0);var s=this.getSorterFn(a);return s&&(l=this.recursiveSort(Je(l),s)),r&&o&&Object.keys(o).forEach(function(c){var d=n.findColumn(c);if(!!d){var f=o[c]||[];if(f.length!==0){var p=d.onFilter;l=p?l.filter(function(v){return f.some(function(m){return p(m,v)})}):l}}}),l},onRow:function(t,n,r){var a=this.customRow,o=a?a(n,r):{};return P(P({},o),{prefixCls:t,store:this.store,rowKey:this.getRecordKey(n,r)})},setSelectedRowKeys:function(t,n){var r=this,a=n.selectWay,o=n.record,i=n.checked,l=n.changeRowKeys,s=n.nativeEvent,c=Zh(this.$props);c&&!("selectedRowKeys"in c)&&(this.store.selectedRowKeys=t);var d=this.getFlatData();if(!(!c.onChange&&!c[a])){var f=d.filter(function(m,y){return t.indexOf(r.getRecordKey(m,y))>=0});if(c.onChange&&c.onChange(t,f),a==="onSelect"&&c.onSelect)c.onSelect(o,i,f,s);else if(a==="onSelectMultiple"&&c.onSelectMultiple){var p=d.filter(function(m,y){return l.indexOf(r.getRecordKey(m,y))>=0});c.onSelectMultiple(i,f,p)}else if(a==="onSelectAll"&&c.onSelectAll){var v=d.filter(function(m,y){return l.indexOf(r.getRecordKey(m,y))>=0});c.onSelectAll(i,f,v)}else a==="onSelectInvert"&&c.onSelectInvert&&c.onSelectInvert(t)}},generatePopupContainerFunc:function(t){var n=this.$props.scroll,r=this.vcTable;return t||(n&&r?function(){return r.tableNode}:void 0)},scrollToFirstRow:function(){var t=this,n=this.$props.scroll;n&&n.scrollToFirstRowOnChange!==!1&&Zw(0,{getContainer:function(){return t.vcTable.ref_bodyTable}})},isSameColumn:function(t,n){return t&&n&&t.key&&t.key===n.key?!0:t===n||Mr(t,n,function(r,a){if(typeof r=="function"&&typeof a=="function")return r===a||r.toString()===a.toString()})},handleFilter:function(t,n){var r=this,a=this.$props,o=P({},this.sPagination),i=P(P({},this.sFilters),V({},Wa(t),n)),l=[];Zp(this.columns,function(d){d.children||l.push(Wa(d))}),Object.keys(i).forEach(function(d){l.indexOf(d)<0&&delete i[d]}),a.pagination&&(o.current=1,o.onChange(o.current));var s={sPagination:o,sFilters:{}},c=P({},i);E1({columns:a.columns}).forEach(function(d){var f=Wa(d);f&&delete c[f]}),Object.keys(c).length>0&&(s.sFilters=c),kt(a.pagination)==="object"&&"current"in a.pagination&&(s.sPagination=P(P({},o),{current:this.sPagination.current})),this.setState(s,function(){r.scrollToFirstRow(),r.store.selectionDirty=!1,r.$emit.apply(r,["change"].concat(Je(r.prepareParamsArguments(P(P({},r.$data),{sSelectionDirty:!1,sFilters:i,sPagination:o})))))})},handleSelect:function(t,n,r){var a=this,o=r.target.checked,i=r.nativeEvent,l=this.store.selectionDirty?[]:this.getDefaultSelection(),s=this.store.selectedRowKeys.concat(l),c=this.getRecordKey(t,n),d=this.$data.pivot,f=this.getFlatCurrentPageData(),p=n;if(this.$props.expandedRowRender&&(p=f.findIndex(function(S){return a.getRecordKey(S,n)===c})),i.shiftKey&&d!==void 0&&p!==d){for(var v=[],m=Math.sign(d-p),y=Math.abs(d-p),b=0,C=function(){var w=p+b*m;b+=1;var k=f[w],$=a.getRecordKey(k,w),O=a.getCheckboxPropsByItem(k,w);O.disabled||(s.includes($)?o||(s=s.filter(function(T){return $!==T}),v.push($)):o&&(s.push($),v.push($)))};b<=y;)C();this.setState({pivot:p}),this.store.selectionDirty=!0,this.setSelectedRowKeys(s,{selectWay:"onSelectMultiple",record:t,checked:o,changeRowKeys:v,nativeEvent:i})}else o?s.push(this.getRecordKey(t,p)):s=s.filter(function(S){return c!==S}),this.setState({pivot:p}),this.store.selectionDirty=!0,this.setSelectedRowKeys(s,{selectWay:"onSelect",record:t,checked:o,changeRowKeys:void 0,nativeEvent:i})},handleRadioSelect:function(t,n,r){var a=r.target.checked,o=r.nativeEvent,i=this.getRecordKey(t,n),l=[i];this.store.selectionDirty=!0,this.setSelectedRowKeys(l,{selectWay:"onSelect",record:t,checked:a,changeRowKeys:void 0,nativeEvent:o})},handleSelectRow:function(t,n,r){var a=this,o=this.getFlatCurrentPageData(),i=this.store.selectionDirty?[]:this.getDefaultSelection(),l=this.store.selectedRowKeys.concat(i),s=o.filter(function(m,y){return!a.getCheckboxPropsByItem(m,y).disabled}).map(function(m,y){return a.getRecordKey(m,y)}),c=[],d="onSelectAll",f;switch(t){case"all":s.forEach(function(m){l.indexOf(m)<0&&(l.push(m),c.push(m))}),d="onSelectAll",f=!0;break;case"removeAll":s.forEach(function(m){l.indexOf(m)>=0&&(l.splice(l.indexOf(m),1),c.push(m))}),d="onSelectAll",f=!1;break;case"invert":s.forEach(function(m){l.indexOf(m)<0?l.push(m):l.splice(l.indexOf(m),1),c.push(m),d="onSelectInvert"});break}this.store.selectionDirty=!0;var p=this.rowSelection,v=2;if(p&&p.hideDefaultSelections&&(v=0),n>=v&&typeof r=="function")return r(s);this.setSelectedRowKeys(l,{selectWay:d,checked:f,changeRowKeys:c})},handlePageChange:function(t){var n=this.$props,r=P({},this.sPagination);t?r.current=t:r.current=r.current||1;for(var a=arguments.length,o=new Array(a>1?a-1:0),i=1;i<a;i++)o[i-1]=arguments[i];r.onChange.apply(r,[r.current].concat(o));var l={sPagination:r};n.pagination&&kt(n.pagination)==="object"&&"current"in n.pagination&&(l.sPagination=P(P({},r),{current:this.sPagination.current})),this.setState(l,this.scrollToFirstRow),this.store.selectionDirty=!1,this.$emit.apply(this,["change"].concat(Je(this.prepareParamsArguments(P(P({},this.$data),{sSelectionDirty:!1,sPagination:r})))))},handleShowSizeChange:function(t,n){var r=this.sPagination;r.onShowSizeChange(t,n);var a=P(P({},r),{pageSize:n,current:t});this.setState({sPagination:a},this.scrollToFirstRow),this.$emit.apply(this,["change"].concat(Je(this.prepareParamsArguments(P(P({},this.$data),{sPagination:a})))))},toggleSortOrder:function(t){var n=t.sortDirections||this.sortDirections,r=this.sSortOrder,a=this.sSortColumn,o;if(Hx(a,t)&&r!==void 0){var i=n.indexOf(r)+1;o=i===n.length?void 0:n[i]}else o=n[0];var l={sSortOrder:o,sSortColumn:o?t:null};this.getSortOrderColumns().length===0&&this.setState(l,this.scrollToFirstRow),this.$emit.apply(this,["change"].concat(Je(this.prepareParamsArguments(P(P({},this.$data),l),t))))},hasPagination:function(t){return(t||this.$props).pagination!==!1},isSortColumn:function(t){var n=this.sSortColumn;return!t||!n?!1:Wa(n)===Wa(t)},prepareParamsArguments:function(t,n){var r=P({},t.sPagination);delete r.onChange,delete r.onShowSizeChange;var a=t.sFilters,o={},i=n;t.sSortColumn&&t.sSortOrder&&(i=t.sSortColumn,o.column=t.sSortColumn,o.order=t.sSortOrder),i&&(o.field=i.dataIndex,o.columnKey=Wa(i));var l={currentDataSource:this.getLocalData(t)};return[r,a,o,l]},findColumn:function(t){var n;return Zp(this.columns,function(r){Wa(r)===t&&(n=r)}),n},recursiveSort:function(t,n){var r=this,a=this.childrenColumnName,o=a===void 0?"children":a;return t.sort(n).map(function(i){return i[o]?P(P({},i),V({},o,r.recursiveSort(Je(i[o]),n))):i})},renderExpandIcon:function(t){return this.expandIcon?this.expandIcon:function(n){var r=n.expandable,a=n.expanded,o=n.needIndentSpaced,i=n.record,l=n.onExpand;return r?g(Kr,{componentName:"Table",defaultLocale:lo.Table,children:function(c){var d;return g(cm,{class:Se("".concat(t,"-row-expand-icon"),(d={},V(d,"".concat(t,"-row-collapsed"),!a),V(d,"".concat(t,"-row-expanded"),a),d)),onClick:function(p){l(i,p)},"aria-label":a?c.collapse:c.expand,noStyle:!0},null)}},null):o?g("span",{class:"".concat(t,"-row-expand-icon ").concat(t,"-row-spaced")},null):null}},renderPagination:function(t,n){if(!this.hasPagination())return null;var r="default",a=this.sPagination;a.size?r=a.size:(this.size==="middle"||this.size==="small")&&(r="small");var o=a.position||"bottom",i=a.total||this.filterDataCnt,l=a.class,s=a.style;a.onChange,a.onShowSizeChange;var c=Vx(a,["class","style","onChange","onShowSizeChange"]),d=P(P({key:"pagination-".concat(n),class:Se(l,"".concat(t,"-pagination"))},c),{total:i,size:r,current:this.getMaxCurrent(i),style:s,onChange:this.handlePageChange,onShowSizeChange:this.handleShowSizeChange});return i>0&&(o===n||o==="both")?g(OS,d,null):null},renderSelectionBox:function(t){var n=this;return function(r){var a=r.record,o=r.index,i=n.getRecordKey(a,o),l=n.getCheckboxPropsByItem(a,o),s=function(f){t==="radio"?n.handleRadioSelect(a,o,f):n.handleSelect(a,o,f)},c=P({type:t,store:n.store,rowIndex:i,defaultSelection:n.getDefaultSelection(),onChange:s},l);return g("span",{onClick:iNe},[g(JIe,c,null)])}},renderRowSelection:function(t){var n=this,r=t.prefixCls,a=t.locale,o=t.getPopupContainer,i=this.rowSelection,l=this.columns.concat();if(i){var s=this.getFlatCurrentPageData().filter(function(p,v){return i.getCheckboxProps?!n.getCheckboxPropsByItem(p,v).disabled:!0}),c=Se("".concat(r,"-selection-column"),V({},"".concat(r,"-selection-column-custom"),i.selections)),d=V({key:"selection-column",customRender:this.renderSelectionBox(i.type),className:c,fixed:i.fixed,width:i.columnWidth,title:i.columnTitle},kR,{class:"".concat(r,"-selection-col")});if(i.type!=="radio"){var f=s.every(function(p,v){return n.getCheckboxPropsByItem(p,v).disabled});d.title=d.title||g(tNe,{store:this.store,locale:a,data:s,getCheckboxPropsByItem:this.getCheckboxPropsByItem,getRecordKey:this.getRecordKey,disabled:f,prefixCls:r,onSelect:this.handleSelectRow,selections:i.selections,hideDefaultSelections:i.hideDefaultSelections,getPopupContainer:this.generatePopupContainerFunc(o),propsSymbol:Symbol()},null)}"fixed"in i?d.fixed=i.fixed:l.some(function(p){return p.fixed==="left"||p.fixed===!0})&&(d.fixed="left"),l[0]&&l[0].key==="selection-column"?l[0]=d:l.unshift(d)}return l},renderColumnsDropdown:function(t){var n=this,r=t.prefixCls,a=t.dropdownPrefixCls,o=t.columns,i=t.locale,l=t.getPopupContainer,s=this.sSortOrder,c=this.sFilters;return Zp(o,function(d,f){var p,v=Wa(d,f),m,y,b=d.customHeaderCell,C=n.isSortColumn(d);if(d.filters&&d.filters.length>0||d.filterDropdown){var S=v in c?c[v]:[];m=g(XIe,{locale:i,column:d,selectedKeys:S,confirmFilter:n.handleFilter,prefixCls:"".concat(r,"-filter"),dropdownPrefixCls:a||"ant-dropdown",getPopupContainer:n.generatePopupContainerFunc(l),key:"filter-dropdown"},null)}if(d.sorter){var w=d.sortDirections||n.sortDirections,k=C&&s==="ascend",$=C&&s==="descend",O=w.indexOf("ascend")!==-1&&g(iIe,{class:"".concat(r,"-column-sorter-up ").concat(k?"on":"off"),key:"caret-up"},null),T=w.indexOf("descend")!==-1&&g(SR,{class:"".concat(r,"-column-sorter-down ").concat($?"on":"off"),key:"caret-down"},null);y=g("div",{title:i.sortTitle,class:Se("".concat(r,"-column-sorter-inner"),O&&T&&"".concat(r,"-column-sorter-inner-full")),key:"sorter"},[O,T]),b=function(I){var L={};d.customHeaderCell&&(L=P({},d.customHeaderCell(I)));var j=L.onClick;return L.onClick=function(){n.toggleSortOrder(d),j&&j.apply(void 0,arguments)},L}}return P(P({},d),{className:Se(d.className,(p={},V(p,"".concat(r,"-column-has-actions"),y||m),V(p,"".concat(r,"-column-has-filters"),m),V(p,"".concat(r,"-column-has-sorters"),y),V(p,"".concat(r,"-column-sort"),C&&s),p)),title:[g("span",{key:"title",class:"".concat(r,"-header-column")},[g("div",{class:y?"".concat(r,"-column-sorters"):void 0},[g("span",{class:"".concat(r,"-column-title")},[n.renderColumnTitle(d.title)]),g("span",{class:"".concat(r,"-column-sorter")},[y])])]),m],customHeaderCell:b})})},renderColumnTitle:function(t){var n=this.$data,r=n.sFilters,a=n.sSortOrder,o=n.sSortColumn;return t instanceof Function?t({filters:r,sortOrder:a,sortColumn:o}):t},renderTable:function(t){var n,r=this,a=t.prefixCls,o=t.renderEmpty,i=t.dropdownPrefixCls,l=t.contextLocale,s=t.getPopupContainer,c=t.transformCellText,d=P(P({},Qe(this)),this.$attrs),f=d.showHeader,p=d.locale,v=d.getPopupContainer;d.style;var m=Vx(d,["showHeader","locale","getPopupContainer","style"]),y=this.getCurrentPageData(),b=this.expandedRowRender&&this.expandIconAsCell!==!1,C=v||s,S=P(P({},l),p);(!p||!p.emptyText)&&(S.emptyText=o("Table"));var w=Se((n={},V(n,"".concat(a,"-").concat(this.size),!0),V(n,"".concat(a,"-bordered"),this.bordered),V(n,"".concat(a,"-empty"),!y.length),V(n,"".concat(a,"-without-column-header"),!f),n)),k=this.renderRowSelection({prefixCls:a,locale:S,getPopupContainer:C}),$=this.renderColumnsDropdown({columns:k,prefixCls:a,dropdownPrefixCls:i,locale:S,getPopupContainer:C}).map(function(_,I){var L=P({},_);return L.key=Wa(L,I),L}),O=$[0]&&$[0].key==="selection-column"?1:0;"expandIconColumnIndex"in m&&(O=m.expandIconColumnIndex);var T=P(P({key:"table",expandIcon:this.renderExpandIcon(a)},m),{customRow:function(I,L){return r.onRow(a,I,L)},components:this.sComponents,prefixCls:a,data:y,columns:$,showHeader:f,expandIconColumnIndex:O,expandIconAsCell:b,emptyText:S.emptyText,transformCellText:c,class:w,ref:this.setTableRef});return g(RIe,T,null)}},render:function(){var t=this,n=this.prefixCls,r=this.dropdownPrefixCls,a=this.transformCellText,o=this.getCurrentPageData(),i=this.configProvider,l=i.getPopupContainer,s=i.transformCellText,c=this.getPopupContainer||l,d=a||s,f=this.loading;typeof f=="boolean"&&(f={spinning:f});var p=this.configProvider.getPrefixCls,v=this.configProvider.renderEmpty,m=p("table",n),y=p("dropdown",r),b=g(Kr,{componentName:"Table",defaultLocale:lo.Table,children:function(T){return t.renderTable({prefixCls:m,renderEmpty:v,dropdownPrefixCls:y,contextLocale:T,getPopupContainer:c,transformCellText:d})}},null),C=this.hasPagination()&&o&&o.length!==0?"".concat(m,"-with-pagination"):"".concat(m,"-without-pagination"),S=P(P({},f),{class:f&&f.spinning?"".concat(C," ").concat(m,"-spin-holder"):""}),w=this.$attrs,k=w.class,$=w.style;return g("div",{class:Se("".concat(m,"-wrapper"),k),style:$},[g(ol,S,{default:function(){return[t.renderPagination(m,"top"),b,t.renderPagination(m,"bottom")]}})])}}),Ux=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},So=G({name:"ATable",Column:$b.Column,ColumnGroup:$b.ColumnGroup,inheritAttrs:!1,props:MR,methods:{normalize:function(){var t=this,n=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],r=Un(n),a=[];return r.forEach(function(o){var i,l,s,c;if(!!o){var d=a0e(o),f=((i=o.props)===null||i===void 0?void 0:i.style)||{},p=((l=o.props)===null||l===void 0?void 0:l.class)||"",v=rr(o),m=o.children||{},y=m.default,b=Ux(m,["default"]),C=P(P(P({},b),v),{style:f,class:p});if(d&&(C.key=d),!((s=o.type)===null||s===void 0)&&s.__ANT_TABLE_COLUMN_GROUP)C.children=t.normalize(typeof y=="function"?y():y);else{var S=(c=o.children)===null||c===void 0?void 0:c.default;C.customRender=C.customRender||S}a.push(C)}}),a},updateColumns:function(){var t=this,n=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],r=[],a=this.$slots;return n.forEach(function(o){var i=o.slots,l=i===void 0?{}:i,s=Ux(o,["slots"]),c=P({},s);Object.keys(l).forEach(function(d){var f=l[d];c[d]===void 0&&a[f]&&(c[d]=a[f])}),o.children&&(c.children=t.updateColumns(c.children)),r.push(c)}),r}},render:function(){var t=this.normalize,n=this.$slots,r=P(P({},Qe(this)),this.$attrs),a=r.columns?this.updateColumns(r.columns):t(ht(this)),o=r.title,i=r.footer,l=n.title,s=n.footer,c=n.expandedRowRender,d=c===void 0?r.expandedRowRender:c,f=n.expandIcon;o=o||l,i=i||s;var p=P(P({},r),{columns:a,title:o,footer:i,expandedRowRender:d,expandIcon:this.$props.expandIcon||f});return g($b,le(le({},p),{},{ref:"table"}),null)}});So.install=function(e){return e.component(So.name,So),e.component(So.Column.name,So.Column),e.component(So.ColumnGroup.name,So.ColumnGroup),e};var cNe=So.Column,dNe=So.ColumnGroup,fNe=So,hNe={prefixCls:u.string,placeholder:u.string,value:u.any,handleClear:u.func,disabled:u.looseBool,onChange:u.func},pNe=G({name:"Search",inheritAttrs:!1,props:Rn(hNe,{placeholder:""}),methods:{handleChange:function(t){this.$emit("change",t)},handleClear2:function(t){t.preventDefault();var n=this.$props,r=n.handleClear,a=n.disabled;!a&&r&&r(t)}},render:function(){var t=Qe(this),n=t.placeholder,r=t.value,a=t.prefixCls,o=t.disabled,i=r&&r.length>0?g("a",{href:"#",class:"".concat(a,"-action"),onClick:this.handleClear2},[g(Yr,null,null)]):g("span",{class:"".concat(a,"-action")},[g(Vv,null,null)]);return g(Fe,null,[g(Jn,{placeholder:n,class:a,value:r,onChange:this.handleChange,disabled:o},null),i])}}),Ob=function(t,n){var r="";return typeof getComputedStyle!="undefined"?r=window.getComputedStyle(t,null).getPropertyValue(n):r=t.style[n],r},vNe=function(t){return Ob(t,"overflow")+Ob(t,"overflow-y")+Ob(t,"overflow-x")},mNe=function(t){if(!(t instanceof window.HTMLElement))return window;for(var n=t;n&&!(n===document.body||n===document.documentElement||!n.parentNode);){if(/(scroll|auto)/.test(vNe(n)))return n;n=n.parentNode}return window},gNe=mNe;function Yx(e){var t=e.getBoundingClientRect();return{top:t.top+window.pageYOffset,left:t.left+window.pageXOffset}}var yNe=function(t){return t.offsetParent===null};function bNe(e,t,n){if(yNe(e))return!1;var r,a,o,i;if(typeof t=="undefined"||t===window)r=window.pageYOffset,o=window.pageXOffset,a=r+window.innerHeight,i=o+window.innerWidth;else{var l=Yx(t);r=l.top,o=l.left,a=r+t.offsetHeight,i=o+t.offsetWidth}var s=Yx(e);return r<=s.top+e.offsetHeight+n.top&&a>=s.top-n.bottom&&o<=s.left+e.offsetWidth+n.left&&i>=s.left-n.right}var CNe={debounce:u.looseBool,elementType:u.string,height:u.oneOfType([u.string,u.number]),offset:u.number,offsetBottom:u.number,offsetHorizontal:u.number,offsetLeft:u.number,offsetRight:u.number,offsetTop:u.number,offsetVertical:u.number,threshold:u.number,throttle:u.number,width:u.oneOfType([u.string,u.number])},wNe=G({name:"LazyLoad",mixins:[nt],inheritAttrs:!1,props:An(CNe,{elementType:"div",debounce:!0,offset:0,offsetBottom:0,offsetHorizontal:0,offsetLeft:0,offsetRight:0,offsetTop:0,offsetVertical:0,throttle:250}),data:function(){return this.throttle>0&&(this.debounce?this.lazyLoadHandler=Yn(this.lazyLoadHandler,this.throttle):this.lazyLoadHandler=Qi(this.lazyLoadHandler,this.throttle)),{visible:!1}},mounted:function(){var t=this;this.$nextTick(function(){Wn(function(){t.visible||t.lazyLoadHandler(t.$props)});var n=t.getEventNode();t.lazyLoadHandler.flush&&t.lazyLoadHandler.flush(),t.resizeHander=Kn(window,"resize",t.lazyLoadHandler),t.scrollHander=Kn(n,"scroll",t.lazyLoadHandler)})},beforeUnmount:function(){this.lazyLoadHandler.cancel&&this.lazyLoadHandler.cancel(),this.detachListeners()},methods:{getEventNode:function(){return gNe(Sn(this))},getOffset:function(){var t=this.$props,n=t.offset,r=t.offsetVertical,a=t.offsetHorizontal,o=t.offsetTop,i=t.offsetBottom,l=t.offsetLeft,s=t.offsetRight,c=t.threshold,d=c||n,f=r||d,p=a||d;return{top:o||f,bottom:i||f,left:l||p,right:s||p}},lazyLoadHandler:function(){var t=this;if(!!this._.isMounted){var n=this.getOffset(),r=Sn(this),a=this.getEventNode();bNe(r,a,n)&&(this.setState({visible:!0},function(){t.__emit("contentVisible")}),this.detachListeners())}},detachListeners:function(){this.resizeHander&&this.resizeHander.remove(),this.scrollHander&&this.scrollHander.remove()}},render:function(){var t=ht(this);if(t.length!==1)return on(!1,"lazyLoad\u7EC4\u4EF6\u53EA\u80FD\u5305\u542B\u4E00\u4E2A\u5B50\u5143\u7D20"),null;var n=this.$props,r=n.height,a=n.width,o=n.elementType,i=this.visible,l=this.$attrs.class,s={height:typeof r=="number"?r+"px":r,width:typeof a=="number"?a+"px":a},c=V({LazyLoad:!0,"is-visible":i},l,l);return g(o,{class:c,style:s},{default:function(){return[i?t[0]:null]}})}});function SNe(){}var kNe=G({name:"ListItem",inheritAttrs:!1,props:{renderedText:u.any,renderedEl:u.any,item:u.any,lazy:an(u.oneOfType([u.looseBool,u.object])),checked:u.looseBool,prefixCls:u.string,disabled:u.looseBool,onClick:u.func},render:function(){var t,n=this,r=this.$props,a=r.renderedText,o=r.renderedEl,i=r.item,l=r.lazy,s=r.checked,c=r.disabled,d=r.prefixCls,f=Se((t={},V(t,"".concat(d,"-content-item"),!0),V(t,"".concat(d,"-content-item-disabled"),c||i.disabled),t)),p;(typeof a=="string"||typeof a=="number")&&(p=String(a));var v=g("li",{class:f,title:p,onClick:c||i.disabled?SNe:function(){n.$emit("click",i)}},[g(ro,{checked:s,disabled:c||i.disabled},null),g("span",{class:"".concat(d,"-content-item-text")},[o])]),m=null;if(l){var y=P({height:32,offset:500,throttle:0,debounce:!1},l);m=g(wNe,y,{default:function(){return[v]}})}else m=v;return m}}),$Ne=G({name:"ListBody",inheritAttrs:!1,props:{prefixCls:u.string,filteredRenderItems:u.array.def([]),lazy:an(u.oneOfType([u.looseBool,u.object])),selectedKeys:u.array,disabled:u.looseBool,onItemSelect:u.func,onItemSelectAll:u.func,onScroll:u.func},setup:function(){return{mountId:null,lazyId:null}},data:function(){return{mounted:!1}},computed:{itemsLength:function(){return this.filteredRenderItems?this.filteredRenderItems.length:0}},watch:{itemsLength:function(){var t=this;Ne(function(){var n=t.$props.lazy;if(n!==!1){var r=Sn(t);en.cancel(t.lazyId),t.lazyId=en(function(){if(r){var a=new Event("scroll",{bubbles:!0});r.dispatchEvent(a)}})}})}},mounted:function(){var t=this;this.mountId=en(function(){t.mounted=!0})},beforeUnmount:function(){en.cancel(this.mountId),en.cancel(this.lazyId)},methods:{handleItemSelect:function(t){var n=this.$props.selectedKeys,r=n.indexOf(t.key)>=0;this.$emit("itemSelect",t.key,!r)},handleScroll:function(t){this.$emit("scroll",t)}},render:function(){var t=this,n=this.$data.mounted,r=this.$props,a=r.prefixCls,o=r.filteredRenderItems,i=r.lazy,l=r.selectedKeys,s=r.disabled,c=o.map(function(f){var p=f.renderedEl,v=f.renderedText,m=f.item,y=m.disabled,b=l.indexOf(m.key)>=0;return g(kNe,{disabled:s||y,key:m.key,item:m,lazy:i,renderedText:v,renderedEl:p,checked:b,prefixCls:a,onClick:t.handleItemSelect},null)}),d=d2(n?"".concat(a,"-content-item-highlight"):"",{tag:"ul",class:"".concat(a,"-content"),onScroll:this.handleScroll});return g(f2,d,{default:function(){return[c]}})}}),ONe=function(e){return g($Ne,e,null)};function PNe(e,t){if("createEvent"in document){var n=document.createEvent("HTMLEvents");n.initEvent(t,!1,!0),e.dispatchEvent(n)}}var TNe=function(){return null},xNe={key:u.string,title:u.string,description:u.string,disabled:u.looseBool};function _Ne(e){return e&&!zn(e)&&Object.prototype.toString.call(e)==="[object Object]"}var ENe={prefixCls:u.string,titleText:u.string,dataSource:u.arrayOf(u.shape(xNe).loose),filter:u.string,filterOption:u.func,checkedKeys:u.arrayOf(u.string),handleFilter:u.func,handleSelect:u.func,handleSelectAll:u.func,handleClear:u.func,renderItem:u.func,showSearch:u.looseBool,searchPlaceholder:u.string,notFoundContent:u.any,itemUnit:u.string,itemsUnit:u.string,body:u.any,renderList:u.any,footer:u.any,lazy:an(u.oneOfType([u.looseBool,u.object])),disabled:u.looseBool,direction:u.string,showSelectAll:u.looseBool,onItemSelect:u.func,onItemSelectAll:u.func,onScroll:u.func};function MNe(e,t){var n=e?e(t):null,r=!!n&&La(n).length>0;return r||(n=ONe(t)),{customize:r,bodyContent:n}}var qx=G({name:"TransferList",mixins:[nt],inheritAttrs:!1,props:Rn(ENe,{dataSource:[],titleText:"",showSearch:!1,lazy:{}}),setup:function(){return{timer:null,triggerScrollTimer:null,scrollEvent:null}},data:function(){return{filterValue:""}},beforeUnmount:function(){clearTimeout(this.triggerScrollTimer)},updated:function(){var t=this;Ne(function(){t.scrollEvent&&t.scrollEvent.remove()})},methods:{handleScroll:function(t){this.$emit("scroll",t)},getCheckStatus:function(t){var n=this.$props.checkedKeys;return n.length===0?"none":t.every(function(r){return n.indexOf(r.key)>=0||!!r.disabled})?"all":"part"},getFilteredItems:function(t,n){var r=this,a=[],o=[];return t.forEach(function(i){var l=r.renderItemHtml(i),s=l.renderedText;if(n&&n.trim()&&!r.matchFilter(s,i))return null;a.push(i),o.push(l)}),{filteredItems:a,filteredRenderItems:o}},getListBody:function(t,n,r,a,o,i,l,s,c,d,f){var p=d?g("div",{class:"".concat(t,"-body-search-wrapper")},[g(pNe,{prefixCls:"".concat(t,"-search"),onChange:this._handleFilter,handleClear:this._handleClear,placeholder:n,value:r,disabled:f},null)]):null,v=i;if(!v){var m,y=If(this.$attrs),b=y.onEvents,C=MNe(c,P(P(P({},this.$props),{filteredItems:a,filteredRenderItems:l,selectedKeys:s}),b)),S=C.bodyContent,w=C.customize;w?m=g("div",{class:"".concat(t,"-body-customize-wrapper")},[S]):m=a.length?S:g("div",{class:"".concat(t,"-body-not-found")},[o]),v=g("div",{class:Se(d?"".concat(t,"-body ").concat(t,"-body-with-search"):"".concat(t,"-body"))},[p,m])}return v},getCheckBox:function(t,n,r){var a=this,o=this.getCheckStatus(t),i=o==="all",l=n!==!1&&g(ro,{disabled:r,checked:i,indeterminate:o==="part",onChange:function(){a.$emit("itemSelectAll",t.filter(function(c){return!c.disabled}).map(function(c){var d=c.key;return d}),!i)}},null);return l},_handleSelect:function(t){var n=this.$props.checkedKeys,r=n.some(function(a){return a===t.key});this.handleSelect(t,!r)},_handleFilter:function(t){var n=this,r=this.$props.handleFilter,a=t.target.value;this.setState({filterValue:a}),r(t),a&&(this.triggerScrollTimer=setTimeout(function(){var o=Sn(n),i=o.querySelectorAll(".ant-transfer-list-content")[0];i&&PNe(i,"scroll")},0))},_handleClear:function(t){this.setState({filterValue:""}),this.handleClear(t)},matchFilter:function(t,n){var r=this.$data.filterValue,a=this.$props.filterOption;return a?a(r,n):t.indexOf(r)>=0},renderItemHtml:function(t){var n=this.$props.renderItem,r=n===void 0?TNe:n,a=r(t),o=_Ne(a);return{renderedText:o?a.value:a,renderedEl:o?a.label:a,item:t}},filterNull:function(t){return t.filter(function(n){return n!==null})}},render:function(){var t=this.$data.filterValue,n=this.$props,r=n.prefixCls,a=n.dataSource,o=n.titleText,i=n.checkedKeys,l=n.disabled,s=n.body,c=n.footer,d=n.showSearch,f=n.searchPlaceholder,p=n.notFoundContent,v=n.itemUnit,m=n.itemsUnit,y=n.renderList,b=n.showSelectAll,C=c&&c(P({},this.$props)),S=s&&s(P({},this.$props)),w=Se(r,V({},"".concat(r,"-with-footer"),!!C)),k=this.getFilteredItems(a,t),$=k.filteredItems,O=k.filteredRenderItems,T=a.length>1?m:v,_=this.getListBody(r,f,t,$,p,S,O,i,y,d,l),I=C?g("div",{class:"".concat(r,"-footer")},[C]):null,L=this.getCheckBox($,b,l);return g("div",{class:w,style:this.$attrs.style},[g("div",{class:"".concat(r,"-header")},[L,g("span",{class:"".concat(r,"-header-selected")},[g("span",null,[(i.length>0?"".concat(i.length,"/"):"")+$.length," ",T]),g("span",{class:"".concat(r,"-header-title")},[o])])]),_,I])}});function Gx(){}var IR=function(t){var n=t.disabled,r=t.moveToLeft,a=r===void 0?Gx:r,o=t.moveToRight,i=o===void 0?Gx:o,l=t.leftArrowText,s=l===void 0?"":l,c=t.rightArrowText,d=c===void 0?"":c,f=t.leftActive,p=t.rightActive,v=t.class,m=t.style;return g("div",{class:v,style:m},[g(Cr,{type:"primary",size:"small",disabled:n||!p,onClick:i,icon:g(wi,null,null)},{default:function(){return[d]}}),g(Cr,{type:"primary",size:"small",disabled:n||!f,onClick:a,icon:g(uc,null,null)},{default:function(){return[s]}})])};IR.inheritAttrs=!1;var INe=IR,NNe={key:u.string,title:u.string,description:u.string,disabled:u.looseBool},ANe={prefixCls:u.string,dataSource:u.arrayOf(u.shape(NNe).loose),disabled:u.looseBool,targetKeys:u.arrayOf(u.string),selectedKeys:u.arrayOf(u.string),render:u.func,listStyle:u.oneOfType([u.func,u.object]),operationStyle:u.object,titles:u.arrayOf(u.string),operations:u.arrayOf(u.string),showSearch:u.looseBool,filterOption:u.func,searchPlaceholder:u.string,notFoundContent:u.any,locale:u.object,rowKey:u.func,lazy:u.oneOfType([u.object,u.looseBool]),showSelectAll:u.looseBool,children:u.any,onChange:u.func,onSelectChange:u.func,onSearchChange:u.func,onSearch:u.func,onScroll:u.func},DNe=G({name:"ATransfer",mixins:[nt],inheritAttrs:!1,props:Rn(ANe,{dataSource:[],locale:{},showSearch:!1,listStyle:function(){}}),setup:function(){return{separatedDataSource:null,configProvider:ve("configProvider",St)}},data:function(){var t=this.selectedKeys,n=t===void 0?[]:t,r=this.targetKeys,a=r===void 0?[]:r;return{leftFilter:"",rightFilter:"",sourceSelectedKeys:n.filter(function(o){return a.indexOf(o)===-1}),targetSelectedKeys:n.filter(function(o){return a.indexOf(o)>-1})}},watch:{targetKeys:function(){if(this.updateState(),this.selectedKeys){var t=this.targetKeys||[];this.setState({sourceSelectedKeys:this.selectedKeys.filter(function(n){return!t.includes(n)}),targetSelectedKeys:this.selectedKeys.filter(function(n){return t.includes(n)})})}},dataSource:function(){this.updateState()},selectedKeys:function(){if(this.selectedKeys){var t=this.targetKeys||[];this.setState({sourceSelectedKeys:this.selectedKeys.filter(function(n){return!t.includes(n)}),targetSelectedKeys:this.selectedKeys.filter(function(n){return t.includes(n)})})}}},mounted:function(){},methods:{getSelectedKeysName:function(t){return t==="left"?"sourceSelectedKeys":"targetSelectedKeys"},getTitles:function(t){return this.titles?this.titles:t.titles||["",""]},getLocale:function(t,n){var r={notFoundContent:n("Transfer")},a=We(this,"notFoundContent");return a&&(r.notFoundContent=a),vt(this,"searchPlaceholder")&&(r.searchPlaceholder=this.$props.searchPlaceholder),P(P(P({},t),r),this.$props.locale)},updateState:function(){var t=this.sourceSelectedKeys,n=this.targetSelectedKeys;if(this.separatedDataSource=null,!this.selectedKeys){var r=this.dataSource,a=this.targetKeys,o=a===void 0?[]:a,i=[],l=[];r.forEach(function(s){var c=s.key;t.includes(c)&&!o.includes(c)&&i.push(c),n.includes(c)&&o.includes(c)&&l.push(c)}),this.setState({sourceSelectedKeys:i,targetSelectedKeys:l})}},moveTo:function(t){var n=this.$props,r=n.targetKeys,a=r===void 0?[]:r,o=n.dataSource,i=o===void 0?[]:o,l=this.sourceSelectedKeys,s=this.targetSelectedKeys,c=t==="right"?l:s,d=c.filter(function(v){return!i.some(function(m){return!!(v===m.key&&m.disabled)})}),f=t==="right"?d.concat(a):a.filter(function(v){return d.indexOf(v)===-1}),p=t==="right"?"left":"right";this.setState(V({},this.getSelectedKeysName(p),[])),this.handleSelectChange(p,[]),this.$emit("change",f,t,d)},moveToLeft:function(){this.moveTo("left")},moveToRight:function(){this.moveTo("right")},onItemSelectAll:function(t,n,r){var a=this.$data[this.getSelectedKeysName(t)]||[],o=[];r?o=Array.from(new Set([].concat(Je(a),Je(n)))):o=a.filter(function(i){return n.indexOf(i)===-1}),this.handleSelectChange(t,o),this.$props.selectedKeys||this.setState(V({},this.getSelectedKeysName(t),o))},handleSelectAll:function(t,n,r){this.onItemSelectAll(t,n.map(function(a){var o=a.key;return o}),!r)},handleLeftSelectAll:function(t,n){return this.handleSelectAll("left",t,!n)},handleRightSelectAll:function(t,n){return this.handleSelectAll("right",t,!n)},onLeftItemSelectAll:function(t,n){return this.onItemSelectAll("left",t,n)},onRightItemSelectAll:function(t,n){return this.onItemSelectAll("right",t,n)},handleFilter:function(t,n){var r=n.target.value;this.$emit("search",t,r)},handleLeftFilter:function(t){this.handleFilter("left",t)},handleRightFilter:function(t){this.handleFilter("right",t)},handleClear:function(t){this.$emit("search",t,"")},handleLeftClear:function(){this.handleClear("left")},handleRightClear:function(){this.handleClear("right")},onItemSelect:function(t,n,r){var a=this.sourceSelectedKeys,o=this.targetSelectedKeys,i=Je(t==="left"?a:o),l=i.indexOf(n);l>-1&&i.splice(l,1),r&&i.push(n),this.handleSelectChange(t,i),this.selectedKeys||this.setState(V({},this.getSelectedKeysName(t),i))},onLeftItemSelect:function(t,n){return this.onItemSelect("left",t,n)},onRightItemSelect:function(t,n){return this.onItemSelect("right",t,n)},handleScroll:function(t,n){this.$emit("scroll",t,n)},handleLeftScroll:function(t){this.handleScroll("left",t)},handleRightScroll:function(t){this.handleScroll("right",t)},handleSelectChange:function(t,n){var r=this.sourceSelectedKeys,a=this.targetSelectedKeys;t==="left"?this.$emit("selectChange",n,a):this.$emit("selectChange",r,n)},handleListStyle:function(t,n){return typeof t=="function"?t({direction:n}):t},separateDataSource:function(){var t=this.$props,n=t.dataSource,r=t.rowKey,a=t.targetKeys,o=a===void 0?[]:a,i=[],l=new Array(o.length);return n.forEach(function(s){r&&(s.key=r(s));var c=o.indexOf(s.key);c!==-1?l[c]=s:i.push(s)}),{leftDataSource:i,rightDataSource:l}},renderTransfer:function(t){var n,r=Qe(this),a=r.prefixCls,o=r.disabled,i=r.operations,l=i===void 0?[]:i,s=r.showSearch,c=r.listStyle,d=r.operationStyle,f=r.filterOption,p=r.lazy,v=r.showSelectAll,m=this.$attrs,y=m.class,b=m.style,C=We(this,"children",{},!1),S=this.configProvider.getPrefixCls,w=S("transfer",a),k=this.configProvider.renderEmpty,$=this.getLocale(t,k),O=this.sourceSelectedKeys,T=this.targetSelectedKeys,_=this.$slots,I=_.body,L=_.footer,j=r.render||this.$slots.render,F=this.separateDataSource(),N=F.leftDataSource,D=F.rightDataSource,z=T.length>0,B=O.length>0,M=Se(w,y,(n={},V(n,"".concat(w,"-disabled"),o),V(n,"".concat(w,"-customize-list"),!!C),n)),E=this.getTitles($);return g("div",{class:M,style:b},[g(qx,{key:"leftList",prefixCls:"".concat(w,"-list"),titleText:E[0],dataSource:N,filterOption:f,style:this.handleListStyle(c,"left"),checkedKeys:O,handleFilter:this.handleLeftFilter,handleClear:this.handleLeftClear,handleSelectAll:this.handleLeftSelectAll,onItemSelect:this.onLeftItemSelect,onItemSelectAll:this.onLeftItemSelectAll,renderItem:j,showSearch:s,body:I,renderList:C,footer:L,lazy:p,onScroll:this.handleLeftScroll,disabled:o,direction:"left",showSelectAll:v,itemUnit:$.itemUnit,itemsUnit:$.itemsUnit,notFoundContent:$.notFoundContent,searchPlaceholder:$.searchPlaceholder},null),g(INe,{key:"operation",class:"".concat(w,"-operation"),rightActive:B,rightArrowText:l[0],moveToRight:this.moveToRight,leftActive:z,leftArrowText:l[1],moveToLeft:this.moveToLeft,style:d,disabled:o},null),g(qx,{key:"rightList",prefixCls:"".concat(w,"-list"),titleText:E[1],dataSource:D,filterOption:f,style:this.handleListStyle(c,"right"),checkedKeys:T,handleFilter:this.handleRightFilter,handleClear:this.handleRightClear,handleSelectAll:this.handleRightSelectAll,onItemSelect:this.onRightItemSelect,onItemSelectAll:this.onRightItemSelectAll,renderItem:j,showSearch:s,body:I,renderList:C,footer:L,lazy:p,onScroll:this.handleRightScroll,disabled:o,direction:"right",showSelectAll:v,itemUnit:$.itemUnit,itemsUnit:$.itemsUnit,notFoundContent:$.notFoundContent,searchPlaceholder:$.searchPlaceholder},null)])}},render:function(){return g(Kr,{componentName:"Transfer",defaultLocale:lo.Transfer,children:this.renderTransfer},null)}}),RNe=kn(DNe),LNe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M854.6 288.6L639.4 73.4c-6-6-14.1-9.4-22.6-9.4H192c-17.7 0-32 14.3-32 32v832c0 17.7 14.3 32 32 32h640c17.7 0 32-14.3 32-32V311.3c0-8.5-3.4-16.7-9.4-22.7zM790.2 326H602V137.8L790.2 326zm1.8 562H232V136h302v216a42 42 0 0042 42h216v494z"}}]},name:"file",theme:"outlined"},FNe=LNe;function Xx(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){BNe(e,a,n[a])})}return e}function BNe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var VS=function(t,n){var r=Xx({},t,n.attrs);return g(Et,Xx({},r,{icon:FNe}),null)};VS.displayName="FileOutlined";VS.inheritAttrs=!1;var zS=VS,VNe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M328 544h368c4.4 0 8-3.6 8-8v-48c0-4.4-3.6-8-8-8H328c-4.4 0-8 3.6-8 8v48c0 4.4 3.6 8 8 8z"}},{tag:"path",attrs:{d:"M880 112H144c-17.7 0-32 14.3-32 32v736c0 17.7 14.3 32 32 32h736c17.7 0 32-14.3 32-32V144c0-17.7-14.3-32-32-32zm-40 728H184V184h656v656z"}}]},name:"minus-square",theme:"outlined"},zNe=VNe;function Zx(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){HNe(e,a,n[a])})}return e}function HNe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var HS=function(t,n){var r=Zx({},t,n.attrs);return g(Et,Zx({},r,{icon:zNe}),null)};HS.displayName="MinusSquareOutlined";HS.inheritAttrs=!1;var jNe=HS,KNe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M328 544h152v152c0 4.4 3.6 8 8 8h48c4.4 0 8-3.6 8-8V544h152c4.4 0 8-3.6 8-8v-48c0-4.4-3.6-8-8-8H544V328c0-4.4-3.6-8-8-8h-48c-4.4 0-8 3.6-8 8v152H328c-4.4 0-8 3.6-8 8v48c0 4.4 3.6 8 8 8z"}},{tag:"path",attrs:{d:"M880 112H144c-17.7 0-32 14.3-32 32v736c0 17.7 14.3 32 32 32h736c17.7 0 32-14.3 32-32V144c0-17.7-14.3-32-32-32zm-40 728H184V184h656v656z"}}]},name:"plus-square",theme:"outlined"},WNe=KNe;function Jx(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){UNe(e,a,n[a])})}return e}function UNe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var jS=function(t,n){var r=Jx({},t,n.attrs);return g(Et,Jx({},r,{icon:WNe}),null)};jS.displayName="PlusSquareOutlined";jS.inheritAttrs=!1;var YNe=jS;function M1(e,t){var n=typeof Symbol!="undefined"&&e[Symbol.iterator]||e["@@iterator"];if(!n){if(Array.isArray(e)||(n=rg(e))||t&&e&&typeof e.length=="number"){n&&(e=n);var r=0,a=function(){};return{s:a,n:function(){return r>=e.length?{done:!0}:{done:!1,value:e[r++]}},e:function(c){throw c},f:a}}throw new TypeError(`Invalid attempt to iterate non-iterable instance.
In order to be iterable, non-array objects must have a [Symbol.iterator]() method.`)}var o=!0,i=!1,l;return{s:function(){n=n.call(e)},n:function(){var c=n.next();return o=c.done,c},e:function(c){i=!0,l=c},f:function(){try{!o&&n.return!=null&&n.return()}finally{if(i)throw l}}}}function Qs(){}var Qx="open",e_="close",qNe="---",NR=G({name:"TreeNode",mixins:[nt],inheritAttrs:!1,__ANT_TREE_NODE:!0,props:An({eventKey:u.oneOfType([u.string,u.number]),prefixCls:u.string,root:u.object,expanded:u.looseBool,selected:u.looseBool,checked:u.looseBool,loaded:u.looseBool,loading:u.looseBool,halfChecked:u.looseBool,title:u.any,pos:u.string,dragOver:u.looseBool,dragOverGapTop:u.looseBool,dragOverGapBottom:u.looseBool,isLeaf:u.looseBool,checkable:u.looseBool,selectable:u.looseBool,disabled:u.looseBool,disableCheckbox:u.looseBool,icon:u.any,dataRef:u.object,switcherIcon:u.any,label:u.any,value:u.any},{}),setup:function(){return{vcTree:ve("vcTree",{}),vcTreeNode:ve("vcTreeNode",{})}},data:function(){return this.children=null,{dragNodeHighlight:!1}},created:function(){ot("vcTreeNode",this)},mounted:function(){var t=this.eventKey,n=this.vcTree.registerTreeNode;this.syncLoadData(this.$props),n&&n(t,this)},updated:function(){this.syncLoadData(this.$props)},beforeUnmount:function(){var t=this.eventKey,n=this.vcTree.registerTreeNode;n&&n(t,null)},methods:{onSelectorClick:function(t){var n=this.vcTree.onNodeClick;n(t,this),this.isSelectable()?this.onSelect(t):this.onCheck(t)},onSelectorDoubleClick:function(t){var n=this.vcTree.onNodeDoubleClick;n(t,this)},onSelect:function(t){if(!this.isDisabled()){var n=this.vcTree.onNodeSelect;t.preventDefault(),n(t,this)}},onCheck:function(t){if(!this.isDisabled()){var n=this.disableCheckbox,r=this.checked,a=this.vcTree.onNodeCheck;if(!(!this.isCheckable()||n)){t.preventDefault();var o=!r;a(t,this,o)}}},onMouseEnter:function(t){var n=this.vcTree.onNodeMouseEnter;n(t,this)},onMouseLeave:function(t){var n=this.vcTree.onNodeMouseLeave;n(t,this)},onContextMenu:function(t){var n=this.vcTree.onNodeContextMenu;n(t,this)},onDragStart:function(t){var n=this.vcTree.onNodeDragStart;t.stopPropagation(),this.setState({dragNodeHighlight:!0}),n(t,this);try{t.dataTransfer.setData("text/plain","")}catch{}},onDragEnter:function(t){var n=this.vcTree.onNodeDragEnter;t.preventDefault(),t.stopPropagation(),n(t,this)},onDragOver:function(t){var n=this.vcTree.onNodeDragOver;t.preventDefault(),t.stopPropagation(),n(t,this)},onDragLeave:function(t){var n=this.vcTree.onNodeDragLeave;t.stopPropagation(),n(t,this)},onDragEnd:function(t){var n=this.vcTree.onNodeDragEnd;t.stopPropagation(),this.setState({dragNodeHighlight:!1}),n(t,this)},onDrop:function(t){var n=this.vcTree.onNodeDrop;t.preventDefault(),t.stopPropagation(),this.setState({dragNodeHighlight:!1}),n(t,this)},onExpand:function(t){var n=this.vcTree.onNodeExpand;n(t,this)},setSelectHandle:function(t){this.selectHandle=t},getNodeChildren:function(){var t=this.children,n=KS(t);return t.length,n.length,n},getNodeState:function(){var t=this.expanded;return this.isLeaf2()?null:t?Qx:e_},isLeaf2:function(){var t=this.isLeaf,n=this.loaded,r=this.vcTree.loadData,a=this.getNodeChildren().length!==0;return t===!1?!1:t||!r&&!a||r&&n&&!a},isDisabled:function(){var t=this.disabled,n=this.vcTree.disabled;return t===!1?!1:!!(n||t)},isCheckable:function(){var t=this.$props.checkable,n=this.vcTree.checkable;return!n||t===!1?!1:n},syncLoadData:function(t){var n=t.expanded,r=t.loading,a=t.loaded,o=this.vcTree,i=o.loadData,l=o.onNodeLoad;if(!r&&i&&n&&!this.isLeaf2()){var s=this.getNodeChildren().length!==0;!s&&!a&&l(this)}},isSelectable:function(){var t=this.selectable,n=this.vcTree.selectable;return typeof t=="boolean"?t:n},renderSwitcher:function(){var t=this.expanded,n=this.vcTree.prefixCls,r=We(this,"switcherIcon",{},!1)||We(this.vcTree,"switcherIcon",{},!1);if(this.isLeaf2())return g("span",{key:"switcher",class:Se("".concat(n,"-switcher"),"".concat(n,"-switcher-noop"))},[typeof r=="function"?r(P(P(P({},this.$props),this.$props.dataRef),{isLeaf:!0})):r]);var a=Se("".concat(n,"-switcher"),"".concat(n,"-switcher_").concat(t?Qx:e_));return g("span",{key:"switcher",onClick:this.onExpand,class:a},[typeof r=="function"?r(P(P(P({},this.$props),this.$props.dataRef),{isLeaf:!1})):r])},renderCheckbox:function(){var t=this.checked,n=this.halfChecked,r=this.disableCheckbox,a=this.vcTree.prefixCls,o=this.isDisabled(),i=this.isCheckable();if(!i)return null;var l=typeof i!="boolean"?i:null;return g("span",{key:"checkbox",class:Se("".concat(a,"-checkbox"),t&&"".concat(a,"-checkbox-checked"),!t&&n&&"".concat(a,"-checkbox-indeterminate"),(o||r)&&"".concat(a,"-checkbox-disabled")),onClick:this.onCheck},[l])},renderIcon:function(){var t=this.loading,n=this.vcTree.prefixCls;return g("span",{key:"icon",class:Se("".concat(n,"-iconEle"),"".concat(n,"-icon__").concat(this.getNodeState()||"docu"),t&&"".concat(n,"-icon_loading"))},null)},renderSelector:function(){var t=this.selected,n=this.loading,r=this.dragNodeHighlight,a=We(this,"icon",{},!1),o=this.vcTree,i=o.prefixCls,l=o.showIcon,s=o.icon,c=o.draggable,d=o.loadData,f=this.isDisabled(),p=We(this,"title",{},!1),v="".concat(i,"-node-content-wrapper"),m;if(l){var y=a||s;m=y?g("span",{class:Se("".concat(i,"-iconEle"),"".concat(i,"-icon__customize"))},[typeof y=="function"?y(P(P({},this.$props),this.$props.dataRef)):y]):this.renderIcon()}else d&&n&&(m=this.renderIcon());var b=p,C=b?g("span",{class:"".concat(i,"-title")},[typeof b=="function"?b(P(P({},this.$props),this.$props.dataRef)):b]):g("span",{class:"".concat(i,"-title")},[qNe]);return g("span",{key:"selector",ref:this.setSelectHandle,title:typeof p=="string"?p:"",class:Se("".concat(v),"".concat(v,"-").concat(this.getNodeState()||"normal"),!f&&(t||r)&&"".concat(i,"-node-selected"),!f&&c&&"draggable"),draggable:!f&&c||void 0,"aria-grabbed":!f&&c||void 0,onMouseenter:this.onMouseEnter,onMouseleave:this.onMouseLeave,onContextmenu:this.onContextMenu,onClick:this.onSelectorClick,onDblclick:this.onSelectorDoubleClick,onDragstart:c?this.onDragStart:Qs},[m,C])},renderChildren:function(){var t=this.expanded,n=this.pos,r=this.vcTree,a=r.prefixCls,o=r.openTransitionName,i=r.openAnimation,l=r.renderTreeNode,s={};o?s=Lo(o):kt(i)==="object"&&(s=P(P(P({},i),{css:!1}),s));var c=this.getNodeChildren();if(c.length===0)return null;var d;return t&&(d=g("ul",{class:Se("".concat(a,"-child-tree"),t&&"".concat(a,"-child-tree-open")),"data-expanded":t,role:"group"},[LR(c,function(f,p){return l(f,p,n)})])),g(no,s,{default:function(){return[d]}})}},render:function(){var t;this.children=ht(this);var n=this.$props,r=n.dragOver,a=n.dragOverGapTop,o=n.dragOverGapBottom,i=n.isLeaf,l=n.expanded,s=n.selected,c=n.checked,d=n.halfChecked,f=n.loading,p=this.vcTree,v=p.prefixCls,m=p.filterTreeNode,y=p.draggable,b=this.isDisabled(),C=US(P(P({},this.$props),this.$attrs)),S=this.$attrs,w=S.class,k=S.style;return g("li",le({class:(t={},V(t,w,w),V(t,"".concat(v,"-treenode-disabled"),b),V(t,"".concat(v,"-treenode-switcher-").concat(l?"open":"close"),!i),V(t,"".concat(v,"-treenode-checkbox-checked"),c),V(t,"".concat(v,"-treenode-checkbox-indeterminate"),d),V(t,"".concat(v,"-treenode-selected"),s),V(t,"".concat(v,"-treenode-loading"),f),V(t,"drag-over",!b&&r),V(t,"drag-over-gap-top",!b&&a),V(t,"drag-over-gap-bottom",!b&&o),V(t,"filter-node",m&&m(this)),t),style:k,role:"treeitem",onDragenter:y?this.onDragEnter:Qs,onDragover:y?this.onDragOver:Qs,onDragleave:y?this.onDragLeave:Qs,onDrop:y?this.onDrop:Qs,onDragend:y?this.onDragEnd:Qs},C),[this.renderSwitcher(),this.renderCheckbox(),this.renderSelector(),this.renderChildren()])}});NR.isTreeNode=1;var AR=NR,GNe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},XNe=.25,ZNe=2;function eu(e,t){var n=e.slice(),r=n.indexOf(t);return r>=0&&n.splice(r,1),n}function tu(e,t){var n=e.slice();return n.indexOf(t)===-1&&n.push(t),n}function JNe(e){return e.split("-")}function DR(e,t){return"".concat(e,"-").concat(t)}function QNe(e){return e.type&&e.type.isTreeNode}function KS(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[];return e.filter(QNe)}function Zc(e){var t=Qe(e)||{},n=t.disabled,r=t.disableCheckbox,a=t.checkable;return!!(n||r)||a===!1}function RR(e,t){function n(r,a,o){var i=r?ht(r):e,l=r?DR(o.pos,a):0,s=KS(i);if(r){var c=r.key;!c&&c==null&&(c=l);var d={node:r,index:a,pos:l,key:c,parentPos:o.node?o.pos:null};t(d)}s.forEach(function(f,p){n(f,p,{node:r,pos:l})})}n(null)}function LR(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],t=arguments.length>1?arguments[1]:void 0,n=e.map(t);return n.length===1?n[0]:n}function eAe(e,t){var n=Qe(t),r=n.eventKey,a=n.pos,o=[];return RR(e,function(i){var l=i.key;o.push(l)}),o.push(r||a),o}function t_(e,t){var n=e.clientY,r=t.selectHandle.getBoundingClientRect(),a=r.top,o=r.bottom,i=r.height,l=Math.max(i*XNe,ZNe);return n<=a+l?-1:n>=o-l?1:0}function n_(e,t){if(!!e){var n=t.multiple;return n?e.slice():e.length?[e[0]]:e}}var tAe=function(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{};return P(P({},t),{class:t.class||t.className,style:t.style,key:t.key})};function WS(e,t){if(!e)return[];var n=t||{},r=n.processProps,a=r===void 0?tAe:r,o=Array.isArray(e)?e:[e];return o.map(function(i){var l=i.children,s=GNe(i,["children"]),c=WS(l,t);return g(AR,a(s),{default:function(){return[c]}})})}function Dg(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},n=t.initWrapper,r=t.processEntity,a=t.onProcessFinished,o=new Map,i=new Map,l={posEntities:o,keyEntities:i};return n&&(l=n(l)||l),RR(e,function(s){var c=s.node,d=s.index,f=s.pos,p=s.key,v=s.parentPos,m={node:c,index:d,key:p,pos:f};o.set(f,m),i.set(p,m),m.parent=o.get(v),m.parent&&(m.parent.children=m.parent.children||[],m.parent.children.push(m)),r&&r(m,l)}),a&&a(l),l}function Pb(e){if(!e)return null;var t;if(Array.isArray(e))t={checkedKeys:e,halfCheckedKeys:void 0};else if(kt(e)==="object")t={checkedKeys:e.checked||void 0,halfCheckedKeys:e.halfChecked||void 0};else return null;return t}function I1(e,t,n){var r=arguments.length>3&&arguments[3]!==void 0?arguments[3]:{},a=new Map,o=new Map;(r.checkedKeys||[]).forEach(function($){a.set($,!0)}),(r.halfCheckedKeys||[]).forEach(function($){o.set($,!0)});function i($){if(a.get($)!==t){var O=n.get($);if(!!O){var T=O.children,_=O.parent,I=O.node;if(!Zc(I)){var L=!0,j=!1;(T||[]).filter(function(F){return!Zc(F.node)}).forEach(function(F){var N=F.key,D=a.get(N),z=o.get(N);(D||z)&&(j=!0),D||(L=!1)}),t?a.set($,L):a.set($,!1),o.set($,j),_&&i(_.key)}}}}function l($){if(a.get($)!==t){var O=n.get($);if(!!O){var T=O.children,_=O.node;Zc(_)||(a.set($,t),(T||[]).forEach(function(I){l(I.key)}))}}}function s($){var O=n.get($);if(!!O){var T=O.children,_=O.parent,I=O.node;a.set($,t),!Zc(I)&&((T||[]).filter(function(L){return!Zc(L.node)}).forEach(function(L){l(L.key)}),_&&i(_.key))}}(e||[]).forEach(function($){s($)});var c=[],d=[],f=M1(a),p;try{for(f.s();!(p=f.n()).done;){var v=fn(p.value,2),m=v[0],y=v[1];y&&c.push(m)}}catch($){f.e($)}finally{f.f()}var b=M1(o),C;try{for(b.s();!(C=b.n()).done;){var S=fn(C.value,2),w=S[0],k=S[1];!a.get(w)&&k&&d.push(w)}}catch($){b.e($)}finally{b.f()}return{checkedKeys:c,halfCheckedKeys:d}}function N1(e,t){var n=new Map;function r(a){if(!n.get(a)){var o=t.get(a);if(!!o){n.set(a,!0);var i=o.parent,l=o.node,s=Qe(l);s&&s.disabled||i&&r(i.key)}}}return(e||[]).forEach(function(a){r(a)}),Je(n.keys())}function US(e){return Object.keys(e).reduce(function(t,n){return(n.substr(0,5)==="data-"||n.substr(0,5)==="aria-")&&(t[n]=e[n]),t},{})}function nAe(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],t={};return e.forEach(function(n){t[n]={handler:function(){this.needSyncKeys[n]=!0},flush:"sync"}}),t}var rAe=G({name:"Tree",mixins:[nt],provide:function(){return{vcTree:this}},inheritAttrs:!1,props:An({prefixCls:u.string,tabindex:u.oneOfType([u.string,u.number]),children:u.any,treeData:u.array,showLine:u.looseBool,showIcon:u.looseBool,icon:u.oneOfType([u.object,u.func]),focusable:u.looseBool,selectable:u.looseBool,disabled:u.looseBool,multiple:u.looseBool,checkable:an(u.oneOfType([u.object,u.looseBool])),checkStrictly:u.looseBool,draggable:u.looseBool,defaultExpandParent:u.looseBool,autoExpandParent:u.looseBool,defaultExpandAll:u.looseBool,defaultExpandedKeys:u.array,expandedKeys:u.array,defaultCheckedKeys:u.array,checkedKeys:u.oneOfType([u.array,u.object]),defaultSelectedKeys:u.array,selectedKeys:u.array,loadData:u.func,loadedKeys:u.array,filterTreeNode:u.func,openTransitionName:u.string,openAnimation:u.oneOfType([u.string,u.object]),switcherIcon:u.any,__propsSymbol__:u.any},{prefixCls:"rc-tree",showLine:!1,showIcon:!0,selectable:!0,multiple:!1,checkable:!1,disabled:!1,checkStrictly:!1,draggable:!1,defaultExpandParent:!0,autoExpandParent:!1,defaultExpandAll:!1,defaultExpandedKeys:[],defaultCheckedKeys:[],defaultSelectedKeys:[]}),data:function(){cc(this.$props.__propsSymbol__),cc(this.$props.children),this.needSyncKeys={},this.domTreeNodes={};var t={_posEntities:new Map,_keyEntities:new Map,_expandedKeys:[],_selectedKeys:[],_checkedKeys:[],_halfCheckedKeys:[],_loadedKeys:[],_loadingKeys:[],_treeNode:[],_prevProps:null,_dragOverNodeKey:"",_dropPosition:null,_dragNodesKeys:[]};return P(P({},t),this.getDerivedState(Qe(this),t))},watch:P(P({},nAe(["treeData","children","expandedKeys","autoExpandParent","selectedKeys","checkedKeys","loadedKeys"])),{__propsSymbol__:function(){this.setState(this.getDerivedState(Qe(this),this.$data)),this.needSyncKeys={}}}),methods:{getDerivedState:function(t,n){var r=n._prevProps,a={_prevProps:P({},t)},o=this;function i(C){return!r&&C in t||r&&o.needSyncKeys[C]}var l=null;if(i("treeData")?l=WS(t.treeData):i("children")&&(l=t.children),l){a._treeNode=l;var s=Dg(l);a._keyEntities=s.keyEntities}var c=a._keyEntities||n._keyEntities;if(i("expandedKeys")||r&&i("autoExpandParent")?a._expandedKeys=t.autoExpandParent||!r&&t.defaultExpandParent?N1(t.expandedKeys,c):t.expandedKeys:!r&&t.defaultExpandAll?a._expandedKeys=Je(c.keys()):!r&&t.defaultExpandedKeys&&(a._expandedKeys=t.autoExpandParent||t.defaultExpandParent?N1(t.defaultExpandedKeys,c):t.defaultExpandedKeys),t.selectable&&(i("selectedKeys")?a._selectedKeys=n_(t.selectedKeys,t):!r&&t.defaultSelectedKeys&&(a._selectedKeys=n_(t.defaultSelectedKeys,t))),t.checkable){var d;if(i("checkedKeys")?d=Pb(t.checkedKeys)||{}:!r&&t.defaultCheckedKeys?d=Pb(t.defaultCheckedKeys)||{}:l&&(d=Pb(t.checkedKeys)||{checkedKeys:n._checkedKeys,halfCheckedKeys:n._halfCheckedKeys}),d){var f=d,p=f.checkedKeys,v=p===void 0?[]:p,m=f.halfCheckedKeys,y=m===void 0?[]:m;if(!t.checkStrictly){var b=I1(v,!0,c);v=b.checkedKeys,y=b.halfCheckedKeys}a._checkedKeys=v,a._halfCheckedKeys=y}}return i("loadedKeys")&&(a._loadedKeys=t.loadedKeys),a},onNodeDragStart:function(t,n){var r=this.$data._expandedKeys,a=n.eventKey,o=ht(n);this.dragNode=n,this.setState({_dragNodesKeys:eAe(typeof o=="function"?o():o,n),_expandedKeys:eu(r,a)}),this.__emit("dragstart",{event:t,node:n})},onNodeDragEnter:function(t,n){var r=this,a=this.$data._expandedKeys,o=n.pos,i=n.eventKey;if(!(!this.dragNode||!n.selectHandle)){var l=t_(t,n);if(this.dragNode.eventKey===i&&l===0){this.setState({_dragOverNodeKey:"",_dropPosition:null});return}setTimeout(function(){r.setState({_dragOverNodeKey:i,_dropPosition:l}),r.delayedDragEnterLogic||(r.delayedDragEnterLogic={}),Object.keys(r.delayedDragEnterLogic).forEach(function(s){clearTimeout(r.delayedDragEnterLogic[s])}),r.delayedDragEnterLogic[o]=setTimeout(function(){var s=tu(a,i);vt(r,"expandedKeys")||r.setState({_expandedKeys:s}),r.__emit("dragenter",{event:t,node:n,expandedKeys:s})},400)},0)}},onNodeDragOver:function(t,n){var r=n.eventKey,a=this.$data,o=a._dragOverNodeKey,i=a._dropPosition;if(this.dragNode&&r===o&&n.selectHandle){var l=t_(t,n);if(l===i)return;this.setState({_dropPosition:l})}this.__emit("dragover",{event:t,node:n})},onNodeDragLeave:function(t,n){this.setState({_dragOverNodeKey:""}),this.__emit("dragleave",{event:t,node:n})},onNodeDragEnd:function(t,n){this.setState({_dragOverNodeKey:""}),this.__emit("dragend",{event:t,node:n}),this.dragNode=null},onNodeDrop:function(t,n){var r=this.$data,a=r._dragNodesKeys,o=a===void 0?[]:a,i=r._dropPosition,l=n.eventKey,s=n.pos;if(this.setState({_dragOverNodeKey:""}),o.indexOf(l)===-1){var c=JNe(s),d={event:t,node:n,dragNode:this.dragNode,dragNodesKeys:o.slice(),dropPosition:i+Number(c[c.length-1]),dropToGap:!1};i!==0&&(d.dropToGap=!0),this.__emit("drop",d),this.dragNode=null}},onNodeClick:function(t,n){this.__emit("click",t,n)},onNodeDoubleClick:function(t,n){this.__emit("dblclick",t,n)},onNodeSelect:function(t,n){var r=this.$data._selectedKeys,a=this.$data._keyEntities,o=this.$props.multiple,i=Qe(n),l=i.selected,s=i.eventKey,c=!l;c?o?r=tu(r,s):r=[s]:r=eu(r,s);var d=r.map(function(p){var v=a.get(p);return v?v.node:null}).filter(function(p){return p});this.setUncontrolledState({_selectedKeys:r});var f={event:"select",selected:c,node:n,selectedNodes:d,nativeEvent:t};this.__emit("select",r,f)},onNodeCheck:function(t,n,r){var a=this.$data,o=a._keyEntities,i=a._checkedKeys,l=a._halfCheckedKeys,s=this.$props.checkStrictly,c=Qe(n),d=c.eventKey,f,p={event:"check",node:n,checked:r,nativeEvent:t};if(s){var v=r?tu(i,d):eu(i,d),m=eu(l,d);f={checked:v,halfChecked:m},p.checkedNodes=v.map(function(S){return o.get(S)}).filter(function(S){return S}).map(function(S){return S.node}),this.setUncontrolledState({_checkedKeys:v})}else{var y=I1([d],r,o,{checkedKeys:i,halfCheckedKeys:l}),b=y.checkedKeys,C=y.halfCheckedKeys;f=b,p.checkedNodes=[],p.checkedNodesPositions=[],p.halfCheckedKeys=C,b.forEach(function(S){var w=o.get(S);if(!!w){var k=w.node,$=w.pos;p.checkedNodes.push(k),p.checkedNodesPositions.push({node:k,pos:$})}}),this.setUncontrolledState({_checkedKeys:b,_halfCheckedKeys:C})}this.__emit("check",f,p)},onNodeLoad:function(t){var n=this;return new Promise(function(r){n.setState(function(a){var o=a._loadedKeys,i=o===void 0?[]:o,l=a._loadingKeys,s=l===void 0?[]:l,c=n.$props.loadData,d=Qe(t),f=d.eventKey;if(!c||i.indexOf(f)!==-1||s.indexOf(f)!==-1)return{};var p=c(t);return p.then(function(){var v=n.$data,m=v._loadedKeys,y=v._loadingKeys,b=tu(m,f),C=eu(y,f);n.__emit("load",b,{event:"load",node:t}),n.setUncontrolledState({_loadedKeys:b}),n.setState({_loadingKeys:C}),r()}),{_loadingKeys:tu(s,f)}})})},onNodeExpand:function(t,n){var r=this,a=this.$data._expandedKeys,o=this.$props.loadData,i=Qe(n),l=i.eventKey,s=i.expanded;a.indexOf(l);var c=!s;if(c?a=tu(a,l):a=eu(a,l),this.setUncontrolledState({_expandedKeys:a}),this.__emit("expand",a,{node:n,expanded:c,nativeEvent:t}),c&&o){var d=this.onNodeLoad(n);return d?d.then(function(){r.setUncontrolledState({_expandedKeys:a})}):null}return null},onNodeMouseEnter:function(t,n){this.__emit("mouseenter",{event:t,node:n})},onNodeMouseLeave:function(t,n){this.__emit("mouseleave",{event:t,node:n})},onNodeContextMenu:function(t,n){t.preventDefault(),this.__emit("rightClick",{event:t,node:n})},setUncontrolledState:function(t){var n=!1,r={},a=Qe(this);Object.keys(t).forEach(function(o){o.replace("_","")in a||(n=!0,r[o]=t[o])}),n&&this.setState(r)},registerTreeNode:function(t,n){n?this.domTreeNodes[t]=n:delete this.domTreeNodes[t]},isKeyChecked:function(t){var n=this.$data._checkedKeys,r=n===void 0?[]:n;return r.indexOf(t)!==-1},renderTreeNode:function(t,n){var r=arguments.length>2&&arguments[2]!==void 0?arguments[2]:0,a=this.$data,o=a._keyEntities,i=a._expandedKeys,l=i===void 0?[]:i,s=a._selectedKeys,c=s===void 0?[]:s,d=a._halfCheckedKeys,f=d===void 0?[]:d,p=a._loadedKeys,v=p===void 0?[]:p,m=a._loadingKeys,y=m===void 0?[]:m,b=a._dragOverNodeKey,C=a._dropPosition,S=DR(r,n),w=t.key;return!w&&w==null&&(w=S),o.get(w)?Ot(t,{eventKey:w,expanded:l.indexOf(w)!==-1,selected:c.indexOf(w)!==-1,loaded:v.indexOf(w)!==-1,loading:y.indexOf(w)!==-1,checked:this.isKeyChecked(w),halfChecked:f.indexOf(w)!==-1,pos:S,dragOver:b===w&&C===0,dragOverGapTop:b===w&&C===-1,dragOverGapBottom:b===w&&C===1,key:w}):null}},render:function(){var t=this,n=this.$data._treeNode,r=this.$props,a=r.prefixCls,o=r.focusable,i=r.showLine,l=r.tabindex,s=l===void 0?0:l,c=US(P(P({},this.$props),this.$attrs)),d=this.$attrs,f=d.class,p=d.style;return g("ul",le(le({},c),{},{class:Se(a,f,V({},"".concat(a,"-show-line"),i)),style:p,role:"tree",unselectable:"on",tabindex:o?s:null}),[LR(n,function(v,m){return t.renderTreeNode(v,m)})])}}),Wf=rAe;Wf.TreeNode=AR;var r_=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},aAe=Wf.TreeNode;function FR(){return{showLine:u.looseBool,multiple:u.looseBool,autoExpandParent:u.looseBool,checkStrictly:u.looseBool,checkable:u.looseBool,disabled:u.looseBool,defaultExpandAll:u.looseBool,defaultExpandParent:u.looseBool,defaultExpandedKeys:u.arrayOf(u.oneOfType([u.string,u.number])),expandedKeys:u.arrayOf(u.oneOfType([u.string,u.number])),checkedKeys:u.oneOfType([u.arrayOf(u.oneOfType([u.string,u.number])),u.shape({checked:u.arrayOf(u.oneOfType([u.string,u.number])),halfChecked:u.arrayOf(u.oneOfType([u.string,u.number]))}).loose]),defaultCheckedKeys:u.arrayOf(u.oneOfType([u.string,u.number])),selectedKeys:u.arrayOf(u.oneOfType([u.string,u.number])),defaultSelectedKeys:u.arrayOf(u.oneOfType([u.string,u.number])),selectable:u.looseBool,filterAntTreeNode:u.func,loadData:u.func,loadedKeys:u.arrayOf(u.oneOfType([u.string,u.number])),draggable:u.looseBool,showIcon:u.looseBool,icon:u.func,switcherIcon:u.any,prefixCls:u.string,filterTreeNode:u.func,openAnimation:u.any,treeData:{type:Array},replaceFields:u.object,blockNode:u.looseBool,onExpand:u.func,onCheck:u.func,onSelect:u.func,onClick:u.func,onDoubleclick:u.func,onDblclick:u.func,"onUpdate:selectedKeys":u.func,"onUpdate:checkedKeys":u.func,"onUpdate:expandedKeys":u.func}}var Po=G({name:"ATree",inheritAttrs:!1,props:Rn(FR(),{checkable:!1,showIcon:!1,openAnimation:P(P({},PD),{appear:null}),blockNode:!1}),setup:function(){return{tree:null,configProvider:ve("configProvider",St)}},TreeNode:aAe,methods:{renderSwitcherIcon:function(t,n,r){var a=r.isLeaf,o=r.loading,i=r.expanded,l=this.$props.showLine;if(o)return g(co,{class:"".concat(t,"-switcher-loading-icon")},null);if(a)return l?g(zS,{class:"".concat(t,"-switcher-line-icon")},null):null;var s="".concat(t,"-switcher-icon");return n?Ot(n,{class:s}):l?i?g(jNe,{class:"".concat(t,"-switcher-line-icon")},null):g(YNe,{class:"".concat(t,"-switcher-line-icon")},null):g(SR,{class:s},null)},updateTreeData:function(t){var n=this,r=this.$slots,a={children:"children",title:"title",key:"key"},o=P(P({},a),this.$props.replaceFields);return t.map(function(i){var l=i[o.key],s=i[o.children],c=i.slots,d=c===void 0?{}:c,f=i.class,p=i.style,v=r_(i,["slots","class","style"]),m=P(P({},v),{icon:r[d.icon]||v.icon,switcherIcon:r[d.switcherIcon]||v.switcherIcon,title:r[d.title]||r.title||v[o.title],dataRef:i,key:l,class:f,style:p});return s?P(P({},m),{children:n.updateTreeData(s)}):m})},setTreeRef:function(t){this.tree=t},handleCheck:function(t,n){this.$emit("update:checkedKeys",t),this.$emit("check",t,n)},handleExpand:function(t,n){this.$emit("update:expandedKeys",t),this.$emit("expand",t,n)},handleSelect:function(t,n){this.$emit("update:selectedKeys",t),this.$emit("select",t,n)}},render:function(){var t=this,n,r=Qe(this),a=r.prefixCls,o=r.showIcon,i=r.treeNodes,l=r.blockNode,s=this.configProvider.getPrefixCls,c=s("tree",a),d=We(this,"switcherIcon"),f=r.checkable,p=r.treeData||i;p&&(p=this.updateTreeData(p));var v=this.$attrs,m=v.class,y=r_(v,["class"]),b=P(P(P(P({},r),{prefixCls:c,checkable:f&&g("span",{class:"".concat(c,"-checkbox-inner")},null),children:ht(this),switcherIcon:function(S){return t.renderSwitcherIcon(c,d,S)},ref:this.setTreeRef}),y),{class:Se(m,(n={},V(n,"".concat(c,"-icon-hide"),!o),V(n,"".concat(c,"-block-node"),l),n)),onCheck:this.handleCheck,onExpand:this.handleExpand,onSelect:this.handleSelect});return p&&(b.treeData=p),g(Wf,le(le({},b),{},{__propsSymbol__:[]}),null)}}),oAe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M928 444H820V330.4c0-17.7-14.3-32-32-32H473L355.7 186.2a8.15 8.15 0 00-5.5-2.2H96c-17.7 0-32 14.3-32 32v592c0 17.7 14.3 32 32 32h698c13 0 24.8-7.9 29.7-20l134-332c1.5-3.8 2.3-7.9 2.3-12 0-17.7-14.3-32-32-32zM136 256h188.5l119.6 114.4H748V444H238c-13 0-24.8 7.9-29.7 20L136 643.2V256zm635.3 512H159l103.3-256h612.4L771.3 768z"}}]},name:"folder-open",theme:"outlined"},iAe=oAe;function a_(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){lAe(e,a,n[a])})}return e}function lAe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var YS=function(t,n){var r=a_({},t,n.attrs);return g(Et,a_({},r,{icon:iAe}),null)};YS.displayName="FolderOpenOutlined";YS.inheritAttrs=!1;var sAe=YS,uAe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M880 298.4H521L403.7 186.2a8.15 8.15 0 00-5.5-2.2H144c-17.7 0-32 14.3-32 32v592c0 17.7 14.3 32 32 32h736c17.7 0 32-14.3 32-32V330.4c0-17.7-14.3-32-32-32zM840 768H184V256h188.5l119.6 114.4H840V768z"}}]},name:"folder",theme:"outlined"},cAe=uAe;function o_(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){dAe(e,a,n[a])})}return e}function dAe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var qS=function(t,n){var r=o_({},t,n.attrs);return g(Et,o_({},r,{icon:cAe}),null)};qS.displayName="FolderOutlined";qS.inheritAttrs=!1;var fAe=qS,ni;(function(e){e[e.None=0]="None",e[e.Start=1]="Start",e[e.End=2]="End"})(ni||(ni={}));function GS(e,t){var n=KS(e)||[];function r(a){var o=a.key,i=ht(a);t(o,a)!==!1&&GS(i,t)}n.forEach(r)}function hAe(e){var t=Dg(e),n=t.keyEntities;return Je(n.keys())}function pAe(e,t,n,r){var a=[],o=ni.None;if(n&&n===r)return[n];if(!n||!r)return[];function i(l){return l===n||l===r}return GS(e,function(l){if(o===ni.End)return!1;if(i(l)){if(a.push(l),o===ni.None)o=ni.Start;else if(o===ni.Start)return o=ni.End,!1}else o===ni.Start&&a.push(l);return t.indexOf(l)!==-1}),a}function i_(e,t){var n=Je(t),r=[];return GS(e,function(a,o){var i=n.indexOf(a);return i!==-1&&(r.push(o),n.splice(i,1)),!!n.length}),r}function BR(e){var t=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{},n=[],r=t.key,a=r===void 0?"key":r,o=t.children,i=o===void 0?"children":o;return(e||[]).forEach(function(l){n.push(l[a]),l[i]&&(n=[].concat(Je(n),Je(BR(l[i],t))))}),n}var l_=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function vAe(e){var t=e.isLeaf,n=e.expanded;return g(t?zS:n?sAe:fAe,null,null)}var dm=G({name:"ADirectoryTree",mixins:[nt],inheritAttrs:!1,props:Rn(P(P({},FR()),{expandAction:u.oneOf([!1,"click","doubleclick","dblclick"])}),{showIcon:!0,expandAction:"click"}),setup:function(){return{children:null,onDebounceExpand:null,tree:null,lastSelectedKey:"",cachedSelectedKeys:[],configProvider:ve("configProvider",St)}},data:function(){var t=Qe(this),n=t.defaultExpandAll,r=t.defaultExpandParent,a=t.expandedKeys,o=t.defaultExpandedKeys,i=ht(this),l=Dg(i),s=l.keyEntities,c={};return c._selectedKeys=t.selectedKeys||t.defaultSelectedKeys||[],n?t.treeData?c._expandedKeys=BR(t.treeData,t.replaceFields):c._expandedKeys=hAe(i):r?c._expandedKeys=N1(a||o,s):c._expandedKeys=a||o,P({_selectedKeys:[],_expandedKeys:[]},c)},watch:{expandedKeys:function(t){this.setState({_expandedKeys:t})},selectedKeys:function(t){this.setState({_selectedKeys:t})}},created:function(){this.onDebounceExpand=Yn(this.expandFolderNode,200,{leading:!0})},methods:{handleExpand:function(t,n){this.setUncontrolledState({_expandedKeys:t}),this.$emit("update:expandedKeys",t),this.$emit("expand",t,n)},handleClick:function(t,n){var r=this.$props.expandAction;r==="click"&&this.onDebounceExpand(t,n),this.$emit("click",t,n)},handleDoubleClick:function(t,n){var r=this.$props.expandAction;(r==="dblclick"||r==="doubleclick")&&this.onDebounceExpand(t,n),this.$emit("doubleclick",t,n),this.$emit("dblclick",t,n)},hanldeSelect:function(t,n){var r=this.$props.multiple,a=this.children||[],o=this.$data._expandedKeys,i=o===void 0?[]:o,l=n.node,s=n.nativeEvent,c=l.eventKey,d=c===void 0?"":c,f={},p=P(P({},n),{selected:!0}),v=s.ctrlKey||s.metaKey,m=s.shiftKey,y;r&&v?(y=t,this.lastSelectedKey=d,this.cachedSelectedKeys=y,p.selectedNodes=i_(a,y)):r&&m?(y=Array.from(new Set([].concat(Je(this.cachedSelectedKeys||[]),Je(pAe(a,i,d,this.lastSelectedKey))))),p.selectedNodes=i_(a,y)):(y=[d],this.lastSelectedKey=d,this.cachedSelectedKeys=y,p.selectedNodes=[n.node]),f._selectedKeys=y,this.$emit("update:selectedKeys",y),this.$emit("select",y,p),this.setUncontrolledState(f)},setTreeRef:function(t){this.tree=t},expandFolderNode:function(t,n){var r=n.isLeaf;if(!(r||t.shiftKey||t.metaKey||t.ctrlKey)&&this.tree.tree){var a=this.tree.tree;a.onNodeExpand(t,n)}},setUncontrolledState:function(t){var n=bn(t,Object.keys(Qe(this)).map(function(r){return"_".concat(r)}));Object.keys(n).length&&this.setState(n)},handleCheck:function(t,n){this.$emit("update:checkedKeys",t),this.$emit("check",t,n)}},render:function(){var t=this;this.children=ht(this);var n=Qe(this),r=n.prefixCls,a=l_(n,["prefixCls"]),o=this.configProvider.getPrefixCls,i=o("tree",r),l=this.$data,s=l._expandedKeys,c=l._selectedKeys,d=this.$attrs,f=d.class,p=l_(d,["class"]),v=Se("".concat(i,"-directory"),f),m=P(P(P({icon:vAe},p),bn(a,["onUpdate:selectedKeys","onUpdate:checkedKeys","onUpdate:expandedKeys"])),{prefixCls:i,expandedKeys:s,selectedKeys:c,switcherIcon:We(this,"switcherIcon"),ref:this.setTreeRef,class:v,onSelect:this.hanldeSelect,onClick:this.handleClick,onDblclick:this.handleDoubleClick,onExpand:this.handleExpand,onCheck:this.handleCheck});return g(Po,m,le({default:function(){return[t.children]}},bn(this.$slots,["default"])))}});Po.TreeNode.name="ATreeNode";Po.DirectoryTree=dm;Po.install=function(e){return e.component(Po.name,Po),e.component(Po.TreeNode.name,Po.TreeNode),e.component(dm.name,dm),e};var mAe=Po.TreeNode;function Jp(e){return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Jp=function(t){return typeof t}:Jp=function(t){return t&&typeof Symbol=="function"&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t},Jp(e)}function gAe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function s_(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter(function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable})),n.push.apply(n,r)}return n}function yAe(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?arguments[t]:{};t%2?s_(n,!0).forEach(function(r){gAe(e,r,n[r])}):Object.getOwnPropertyDescriptors?Object.defineProperties(e,Object.getOwnPropertyDescriptors(n)):s_(n).forEach(function(r){Object.defineProperty(e,r,Object.getOwnPropertyDescriptor(n,r))})}return e}var bAe=/[\-+]?(?:\d*\.|)\d+(?:[eE][\-+]?\d+|)/.source;function CAe(e){var t,n,r,a=e.ownerDocument,o=a.body,i=a&&a.documentElement;return t=e.getBoundingClientRect(),n=t.left,r=t.top,n-=i.clientLeft||o.clientLeft||0,r-=i.clientTop||o.clientTop||0,{left:n,top:r}}function VR(e,t){var n=e["page".concat(t?"Y":"X","Offset")],r="scroll".concat(t?"Top":"Left");if(typeof n!="number"){var a=e.document;n=a.documentElement[r],typeof n!="number"&&(n=a.body[r])}return n}function A1(e){return VR(e)}function D1(e){return VR(e,!0)}function zR(e){var t=CAe(e),n=e.ownerDocument,r=n.defaultView||n.parentWindow;return t.left+=A1(r),t.top+=D1(r),t}function wAe(e,t,n){var r="",a=e.ownerDocument,o=n||a.defaultView.getComputedStyle(e,null);return o&&(r=o.getPropertyValue(t)||o[t]),r}var SAe=new RegExp("^(".concat(bAe,")(?!px)[a-z%]+$"),"i"),kAe=/^(top|right|bottom|left)$/,Tb="currentStyle",xb="runtimeStyle",El="left",$Ae="px";function OAe(e,t){var n=e[Tb]&&e[Tb][t];if(SAe.test(n)&&!kAe.test(t)){var r=e.style,a=r[El],o=e[xb][El];e[xb][El]=e[Tb][El],r[El]=t==="fontSize"?"1em":n||0,n=r.pixelLeft+$Ae,r[El]=a,e[xb][El]=o}return n===""?"auto":n}var Ss;typeof window!="undefined"&&(Ss=window.getComputedStyle?wAe:OAe);function XS(e,t){for(var n=0;n<e.length;n++)t(e[n])}function HR(e){return Ss(e,"boxSizing")==="border-box"}var PAe=["margin","border","padding"],R1=-1,TAe=2,L1=1,xAe=0;function _Ae(e,t,n){var r={},a=e.style,o;for(o in t)t.hasOwnProperty(o)&&(r[o]=a[o],a[o]=t[o]);n.call(e);for(o in t)t.hasOwnProperty(o)&&(a[o]=r[o])}function sd(e,t,n){var r=0,a,o,i;for(o=0;o<t.length;o++)if(a=t[o],a)for(i=0;i<n.length;i++){var l=void 0;a==="border"?l="".concat(a+n[i],"Width"):l=a+n[i],r+=parseFloat(Ss(e,l))||0}return r}function Qp(e){return e!=null&&e==e.window}var Eo={};XS(["Width","Height"],function(e){Eo["doc".concat(e)]=function(t){var n=t.document;return Math.max(n.documentElement["scroll".concat(e)],n.body["scroll".concat(e)],Eo["viewport".concat(e)](n))},Eo["viewport".concat(e)]=function(t){var n="client".concat(e),r=t.document,a=r.body,o=r.documentElement,i=o[n];return r.compatMode==="CSS1Compat"&&i||a&&a[n]||i}});function u_(e,t,n){if(Qp(e))return t==="width"?Eo.viewportWidth(e):Eo.viewportHeight(e);if(e.nodeType===9)return t==="width"?Eo.docWidth(e):Eo.docHeight(e);var r=t==="width"?["Left","Right"]:["Top","Bottom"],a=t==="width"?e.offsetWidth:e.offsetHeight;Ss(e);var o=HR(e),i=0;(a==null||a<=0)&&(a=void 0,i=Ss(e,t),(i==null||Number(i)<0)&&(i=e.style[t]||0),i=parseFloat(i)||0),n===void 0&&(n=o?L1:R1);var l=a!==void 0||o,s=a||i;if(n===R1)return l?s-sd(e,["border","padding"],r):i;if(l){var c=n===TAe?-sd(e,["border"],r):sd(e,["margin"],r);return s+(n===L1?0:c)}return i+sd(e,PAe.slice(n),r)}var EAe={position:"absolute",visibility:"hidden",display:"block"};function c_(e){var t,n=arguments;return e.offsetWidth!==0?t=u_.apply(void 0,n):_Ae(e,EAe,function(){t=u_.apply(void 0,n)}),t}function Fu(e,t,n){var r=n;if(Jp(t)==="object"){for(var a in t)t.hasOwnProperty(a)&&Fu(e,a,t[a]);return}if(typeof r!="undefined"){typeof r=="number"&&(r+="px"),e.style[t]=r;return}return Ss(e,t)}XS(["width","height"],function(e){var t=e.charAt(0).toUpperCase()+e.slice(1);Eo["outer".concat(t)]=function(r,a){return r&&c_(r,e,a?xAe:L1)};var n=e==="width"?["Left","Right"]:["Top","Bottom"];Eo[e]=function(r,a){if(a!==void 0){if(r){Ss(r);var o=HR(r);return o&&(a+=sd(r,["padding","border"],n)),Fu(r,e,a)}return}return r&&c_(r,e,R1)}});function MAe(e,t){Fu(e,"position")==="static"&&(e.style.position="relative");var n=zR(e),r={},a,o;for(o in t)t.hasOwnProperty(o)&&(a=parseFloat(Fu(e,o))||0,r[o]=a+t[o]-n[o]);Fu(e,r)}var On=yAe({getWindow:function(t){var n=t.ownerDocument||t;return n.defaultView||n.parentWindow},offset:function(t,n){if(typeof n!="undefined")MAe(t,n);else return zR(t)},isWindow:Qp,each:XS,css:Fu,clone:function(t){var n={};for(var r in t)t.hasOwnProperty(r)&&(n[r]=t[r]);var a=t.overflow;if(a)for(var o in t)t.hasOwnProperty(o)&&(n.overflow[o]=t.overflow[o]);return n},scrollLeft:function(t,n){if(Qp(t)){if(n===void 0)return A1(t);window.scrollTo(n,D1(t))}else{if(n===void 0)return t.scrollLeft;t.scrollLeft=n}},scrollTop:function(t,n){if(Qp(t)){if(n===void 0)return D1(t);window.scrollTo(A1(t),n)}else{if(n===void 0)return t.scrollTop;t.scrollTop=n}},viewportWidth:0,viewportHeight:0},Eo);function IAe(e,t,n){n=n||{},t.nodeType===9&&(t=On.getWindow(t));var r=n.allowHorizontalScroll,a=n.onlyScrollIfNeeded,o=n.alignWithTop,i=n.alignWithLeft,l=n.offsetTop||0,s=n.offsetLeft||0,c=n.offsetBottom||0,d=n.offsetRight||0;r=r===void 0?!0:r;var f=On.isWindow(t),p=On.offset(e),v=On.outerHeight(e),m=On.outerWidth(e),y,b,C,S,w,k,$,O,T,_;f?($=t,_=On.height($),T=On.width($),O={left:On.scrollLeft($),top:On.scrollTop($)},w={left:p.left-O.left-s,top:p.top-O.top-l},k={left:p.left+m-(O.left+T)+d,top:p.top+v-(O.top+_)+c},S=O):(y=On.offset(t),b=t.clientHeight,C=t.clientWidth,S={left:t.scrollLeft,top:t.scrollTop},w={left:p.left-(y.left+(parseFloat(On.css(t,"borderLeftWidth"))||0))-s,top:p.top-(y.top+(parseFloat(On.css(t,"borderTopWidth"))||0))-l},k={left:p.left+m-(y.left+C+(parseFloat(On.css(t,"borderRightWidth"))||0))+d,top:p.top+v-(y.top+b+(parseFloat(On.css(t,"borderBottomWidth"))||0))+c}),w.top<0||k.top>0?o===!0?On.scrollTop(t,S.top+w.top):o===!1?On.scrollTop(t,S.top+k.top):w.top<0?On.scrollTop(t,S.top+w.top):On.scrollTop(t,S.top+k.top):a||(o=o===void 0?!0:!!o,o?On.scrollTop(t,S.top+w.top):On.scrollTop(t,S.top+k.top)),r&&(w.left<0||k.left>0?i===!0?On.scrollLeft(t,S.left+w.left):i===!1?On.scrollLeft(t,S.left+k.left):w.left<0?On.scrollLeft(t,S.left+w.left):On.scrollLeft(t,S.left+k.left):a||(i=i===void 0?!0:!!i,i?On.scrollLeft(t,S.left+w.left):On.scrollLeft(t,S.left+k.left)))}function NAe(e,t){if(e.classList)return e.classList.contains(t);var n=e.className;return" ".concat(n," ").indexOf(" ".concat(t," "))>-1}var ZS="SHOW_ALL",Rg="SHOW_PARENT",mf="SHOW_CHILD";function AAe(e,t){for(var n=e;n;){if(NAe(n,t))return n;n=n.parentNode}return null}function jR(e){return typeof e=="string"?e:null}function DAe(e){return e==null?[]:Array.isArray(e)?e:[e]}function Si(){var e=function(n){e.current=n};return e}var RAe={userSelect:"none",WebkitUserSelect:"none"},LAe={unselectable:"unselectable"};function F1(e){if(!e.length)return[];var t={},n={},r=e.slice().map(function(a){var o=P(P({},a),{fields:a.pos.split("-")});return delete o.children,o});return r.forEach(function(a){n[a.pos]=a}),r.sort(function(a,o){return a.fields.length-o.fields.length}),r.forEach(function(a){var o=a.fields.slice(0,-1).join("-"),i=n[o];i?(i.children=i.children||[],i.children.push(a)):t[a.pos]=a,delete a.key,delete a.fields}),Object.keys(t).map(function(a){return t[a]})}var d_=0;function FAe(e){return d_+=1,"".concat(e,"_").concat(d_)}function KR(e){var t=e.treeCheckable,n=e.treeCheckStrictly,r=e.labelInValue;return t&&n?!0:r||!1}function BAe(e,t){var n=t.id,r=t.pId,a=t.rootPId,o={},i=[],l=e.map(function(s){var c=P({},s),d=c[n];return o[d]=c,c.key=c.key||d,c});return l.forEach(function(s){var c=s[r],d=o[c];d&&(d.children=d.children||[],d.children.push(s)),(c===a||!d&&a===null)&&i.push(s)}),i}function VAe(e,t){for(var n=e.split("-"),r=t.split("-"),a=Math.min(n.length,r.length),o=0;o<a;o+=1)if(n[o]!==r[o])return!1;return!0}function WR(e){var t=e.node,n=e.pos,r=e.children,a={node:t,pos:n};return r&&(a.children=r.map(WR)),a}function f_(e,t,n,r,a){if(!t)return null;function o(i){if(!i||As(i))return null;var l=!1;n(t,i)&&(l=!0);var s=ht(i);return s=((typeof s=="function"?s():s)||[]).map(o).filter(function(c){return c}),s.length||l?g(a,le(le({},i.props),{},{key:r[rr(i).value].key}),{default:function(){return[s]}}):null}return e.map(o).filter(function(i){return i})}function h_(e,t){var n=DAe(e);return KR(t)?n.map(function(r){return kt(r)!=="object"||!r?{value:"",label:""}:r}):n.map(function(r){return{value:r}})}function _b(e,t,n){if(e.label)return e.label;if(t){var r=rr(t.node);if(Object.keys(r).length)return r[n]}return e.value}function Eb(e,t,n){var r=t.treeNodeLabelProp,a=t.treeCheckable,o=t.treeCheckStrictly,i=t.showCheckedStrategy;if(a&&!o){var l={};e.forEach(function(f){l[f.value]=f});var s=F1(e.map(function(f){var p=f.value;return n[p]}));if(i===Rg)return s.map(function(f){var p=f.node,v=rr(p).value;return{label:_b(l[v],n[v],r),value:v}});if(i===mf){var c=[],d=function f(p){var v=p.node,m=p.children,y=rr(v).value;if(!m||m.length===0){c.push({label:_b(l[y],n[y],r),value:y});return}m.forEach(function(b){f(b)})};return s.forEach(function(f){d(f)}),c}}return e.map(function(f){return{label:_b(f,n[f.value],r),value:f.value}})}function zAe(e){var t=e.title,n=e.label,r=e.key,a=e.value,o=P({},e);return n&&!t&&(o.title=n),!r&&r==null&&(o.key=a),o}function p_(e){return WS(e,{processProps:zAe})}function HAe(e){return P(P({},e),{valueEntities:{}})}function jAe(e,t){var n=rr(e.node).value;e.value=n;var r=t.valueEntities[n];r&&cc(!1,"Conflict! value of node '".concat(e.key,"' (").concat(n,") has already used by node '").concat(r.key,"'.")),t.valueEntities[n]=e}function KAe(e){return Dg(e,{initWrapper:HAe,processEntity:jAe})}function v_(e,t){var n={};return e.forEach(function(r){var a=r.value;n[a]=!1}),e.forEach(function(r){for(var a=r.value,o=t[a];o&&o.parent;){var i=o.parent.value;if(i in n)break;n[i]=!0,o=o.parent}}),Object.keys(n).filter(function(r){return n[r]}).map(function(r){return t[r].key})}var Jh=I1,WAe={bottomLeft:{points:["tl","bl"],offset:[0,4],overflow:{adjustX:0,adjustY:1},ignoreShake:!0},topLeft:{points:["bl","tl"],offset:[0,-4],overflow:{adjustX:0,adjustY:1},ignoreShake:!0}},UAe={name:"SelectTrigger",inheritAttrs:!1,props:{disabled:u.looseBool,showSearch:u.looseBool,prefixCls:u.string,dropdownPopupAlign:u.object,dropdownClassName:u.string,dropdownStyle:u.object,transitionName:u.string,animation:u.string,getPopupContainer:u.func,dropdownMatchSelectWidth:u.looseBool,isMultiple:u.looseBool,dropdownPrefixCls:u.string,dropdownVisibleChange:u.func,popupElement:u.any,open:u.looseBool},created:function(){this.triggerRef=Si()},methods:{getDropdownTransitionName:function(){var t=this.$props,n=t.transitionName,r=t.animation,a=t.dropdownPrefixCls;return!n&&r?"".concat(a,"-").concat(r):n},forcePopupAlign:function(){var t=this.triggerRef.current;t&&t.forcePopupAlign()}},render:function(){var t,n=this,r=this.$props,a=r.disabled,o=r.isMultiple,i=r.dropdownPopupAlign,l=r.dropdownMatchSelectWidth,s=r.dropdownClassName,c=r.dropdownStyle,d=r.dropdownVisibleChange,f=r.getPopupContainer,p=r.dropdownPrefixCls,v=r.popupElement,m=r.open,y;return l!==!1&&(y=l?"width":"minWidth"),g(Ii,{ref:this.triggerRef,action:a?[]:["click"],popupPlacement:"bottomLeft",builtinPlacements:WAe,popupAlign:i,prefixCls:p,popupTransitionName:this.getDropdownTransitionName(),onPopupVisibleChange:d,popup:v,popupVisible:m,getPopupContainer:f,stretch:y,popupClassName:Se(s,(t={},V(t,"".concat(p,"--multiple"),o),V(t,"".concat(p,"--single"),!o),t)),popupStyle:c},{default:function(){return[ht(n)]}})}},YAe=UAe,JS=function(){return{prefixCls:u.string,open:u.looseBool,selectorValueList:u.array,allowClear:u.looseBool,showArrow:u.looseBool,removeSelected:u.func,choiceTransitionName:u.string,ariaId:u.string,inputIcon:u.any,clearIcon:u.any,removeIcon:u.any,placeholder:u.any,disabled:u.looseBool,focused:u.looseBool,isMultiple:u.looseBool,showSearch:u.looseBool,searchValue:u.string}};function qAe(){}function UR(){var e={name:"BaseSelector",inheritAttrs:!1,mixins:[nt],props:An(P(P({},JS()),{renderSelection:u.func.isRequired,renderPlaceholder:u.func,tabindex:u.oneOfType([u.number,u.string])}),{tabindex:0}),setup:function(){return{vcTreeSelect:ve("vcTreeSelect",{})}},created:function(){this.domRef=Si()},methods:{onFocus:function(n){var r=this.$props.focused,a=this.vcTreeSelect.onSelectorFocus;r||a(),this.__emit("focus",n)},onBlur:function(n){var r=this.vcTreeSelect.onSelectorBlur;r(),this.__emit("blur",n)},focus:function(){this.domRef.current.focus()},blur:function(){this.domRef.current.blur()},renderClear:function(){var n=this.$props,r=n.prefixCls,a=n.allowClear,o=n.selectorValueList,i=this.vcTreeSelect.onSelectorClear;if(!a||!o.length)return null;var l=We(this,"clearIcon");return g("span",{key:"clear",unselectable:"on","aria-hidden":"true",style:"user-select: none;",class:"".concat(r,"-clear"),onClick:i},[l])},renderArrow:function(){var n=this.$props,r=n.prefixCls,a=n.showArrow;if(!a)return null;var o=We(this,"inputIcon");return g("span",{key:"arrow",class:"".concat(r,"-arrow"),style:{outline:"none",userSelect:"none"}},[o])}},render:function(){var n,r=this.$props,a=r.prefixCls,o=r.open,i=r.focused,l=r.disabled,s=r.allowClear,c=r.ariaId,d=r.renderSelection,f=r.renderPlaceholder,p=r.tabindex,v=r.isMultiple,m=r.showArrow,y=r.showSearch,b=this.$attrs,C=b.class,S=b.style,w=b.onClick,k=w===void 0?qAe:w,$=this.vcTreeSelect.onSelectorKeyDown,O=p;l&&(O=null);var T=Se(a,C,(n={},V(n,"".concat(a,"-focused"),o||i),V(n,"".concat(a,"-multiple"),v),V(n,"".concat(a,"-single"),!v),V(n,"".concat(a,"-allow-clear"),s),V(n,"".concat(a,"-show-arrow"),m),V(n,"".concat(a,"-disabled"),l),V(n,"".concat(a,"-open"),o),V(n,"".concat(a,"-show-search"),y),n));return g("div",{style:S,onClick:k,class:T,ref:this.domRef,role:"combobox","aria-expanded":o,"aria-owns":o?c:void 0,"aria-controls":o?c:void 0,"aria-haspopup":"listbox","aria-disabled":l,tabindex:O,onFocus:this.onFocus,onBlur:this.onBlur,onKeydown:$},[g("span",{class:"".concat(a,"-selector")},[d(),f&&f()]),this.renderArrow(),this.renderClear()])}};return e}var GAe=G({props:{value:u.string.def("")},emits:["change","input"],setup:function(t,n){var r=n.emit,a=H(null),o=function(l){var s=l.target.composing;l.isComposing||s?r("input",l):(r("input",l),r("change",l))};return{inputRef:a,focus:function(){a.value&&a.value.focus()},blur:function(){a.value&&a.value.blur()},handleChange:o}},render:function(){return at(g("input",le(le(le({},this.$props),this.$attrs),{},{onInput:this.handleChange,onChange:this.handleChange,ref:"inputRef"}),null),[[Mi]])}}),XAe=GAe,ZAe={name:"SearchInput",inheritAttrs:!1,props:{open:u.looseBool,searchValue:u.string,prefixCls:u.string,disabled:u.looseBool,renderPlaceholder:u.func,needAlign:u.looseBool,ariaId:u.string,isMultiple:u.looseBool.def(!0),showSearch:u.looseBool},emits:["mirrorSearchValueChange"],setup:function(t,n){var r=n.emit,a=H(),o=H(0),i=H(t.searchValue);return ce(x(function(){return t.searchValue}),function(){i.value=t.searchValue}),ce(i,function(){r("mirrorSearchValueChange",i.value)},{immediate:!0}),et(function(){t.isMultiple&&ce(i,function(){o.value=a.value.scrollWidth},{flush:"post",immediate:!0})}),{measureRef:a,inputWidth:o,vcTreeSelect:ve("vcTreeSelect",{}),mirrorSearchValue:i}},created:function(){this.inputRef=Si(),this.prevProps=P({},this.$props)},mounted:function(){var t=this;this.$nextTick(function(){var n=t.$props.open;n&&t.focus(!0)})},updated:function(){var t=this,n=this.$props.open,r=this.prevProps;this.$nextTick(function(){n&&r.open!==n&&t.focus(),t.prevProps=P({},t.$props)})},methods:{focus:function(t){var n=this;this.inputRef.current&&(t?setTimeout(function(){n.inputRef.current.focus()},0):this.inputRef.current.focus())},blur:function(){this.inputRef.current&&this.inputRef.current.blur()},handleInputChange:function(t){var n=t.target,r=n.value,a=n.composing,o=this.searchValue,i=o===void 0?"":o;if(t.isComposing||a||i===r){this.mirrorSearchValue=r;return}this.vcTreeSelect.onSearchInputChange(t)}},render:function(){var t=this.$props,n=t.searchValue,r=t.prefixCls,a=t.disabled,o=t.renderPlaceholder,i=t.open,l=t.ariaId,s=t.isMultiple,c=t.showSearch,d=this.vcTreeSelect.onSearchInputKeyDown,f=this.handleInputChange,p=this.mirrorSearchValue,v=this.inputWidth;return g(Fe,null,[g("span",{class:"".concat(r,"-selection-search"),style:s?{width:v+"px"}:{}},[g(XAe,{type:"text",ref:this.inputRef,onChange:f,onKeydown:d,value:n,disabled:a,readonly:!c,class:"".concat(r,"-selection-search-input"),"aria-label":"filter select","aria-autocomplete":"list","aria-controls":i?l:void 0,"aria-multiline":"false"},null),s?g("span",{ref:"measureRef",class:"".concat(r,"-selection-search-mirror"),"aria-hidden":!0},[p,yt("\xA0")]):null]),o&&!p?o():null])}},B1=ZAe,JAe=UR(),QAe={name:"SingleSelector",inheritAttrs:!1,props:JS(),created:function(){this.selectorRef=Si(),this.inputRef=Si()},data:function(){return{mirrorSearchValue:this.searchValue}},watch:{searchValue:function(t){this.mirrorSearchValue=t}},methods:{onPlaceholderClick:function(){this.inputRef.current.focus()},focus:function(){this.selectorRef.current.focus()},blur:function(){this.selectorRef.current.blur()},_renderPlaceholder:function(){var t=this.$props,n=t.prefixCls,r=t.placeholder,a=t.searchPlaceholder,o=t.selectorValueList,i=r||a;if(!i)return null;var l=this.mirrorSearchValue||o.length;return g("span",{style:{display:l?"none":"block"},onClick:this.onPlaceholderClick,class:"".concat(n,"-selection-placeholder")},[i])},onMirrorSearchValueChange:function(t){this.mirrorSearchValue=t},renderSelection:function(){var t=this.$props,n=t.selectorValueList,r=t.prefixCls,a=[];if(n.length&&!this.mirrorSearchValue){var o=n[0],i=o.label,l=o.value;a.push(g("span",{key:l,title:jR(i),class:"".concat(r,"-selection-item")},[i||l]))}return a.push(g(B1,le(le(le({},this.$props),this.$attrs),{},{ref:this.inputRef,isMultiple:!1,onMirrorSearchValueChange:this.onMirrorSearchValueChange}),null)),a}},render:function(){var t=P(P(P({},Qe(this)),this.$attrs),{renderSelection:this.renderSelection,renderPlaceholder:this._renderPlaceholder,ref:this.selectorRef});return g(JAe,t,null)}},eDe=QAe,tDe={mixins:[nt],inheritAttrs:!1,props:{prefixCls:u.string,maxTagTextLength:u.number,label:u.any,value:u.oneOfType([u.string,u.number]),removeIcon:u.any},methods:{onRemove:function(t){var n=this.$props.value;this.__emit("remove",t,n),t.stopPropagation()}},render:function(){var t=this.$props,n=t.prefixCls,r=t.maxTagTextLength,a=t.label,o=t.value,i=a||o;r&&typeof i=="string"&&i.length>r&&(i="".concat(i.slice(0,r),"..."));var l=this.$attrs,s=l.class,c=l.style,d=l.onRemove;return g("span",le(le({style:P(P({},RAe),c)},LAe),{},{role:"menuitem",class:Se("".concat(n,"-selection-item"),s),title:jR(a)}),[g("span",{class:"".concat(n,"-selection-item-content")},[i]),d&&g("span",{class:"".concat(n,"-selection-item-remove"),onClick:this.onRemove},[We(this,"removeIcon")])])}},m_=tDe,nDe="RC_TREE_SELECT_EMPTY_VALUE_KEY",rDe=UR(),aDe={name:"MultipleSelector",mixins:[nt],inheritAttrs:!1,props:P(P(P({},JS()),B1.props),{selectorValueList:u.array,disabled:u.looseBool,labelInValue:u.looseBool,maxTagCount:u.number,maxTagPlaceholder:u.any}),setup:function(){return{vcTreeSelect:ve("vcTreeSelect",{})}},created:function(){this.inputRef=Si()},methods:{onPlaceholderClick:function(){this.inputRef.current.focus()},focus:function(){this.inputRef.current.focus()},blur:function(){this.inputRef.current.blur()},_renderPlaceholder:function(){var t=this.$props,n=t.prefixCls,r=t.placeholder,a=t.searchPlaceholder,o=t.searchValue,i=t.selectorValueList,l=r||a;if(!l)return null;var s=o||i.length;return g("span",{style:{display:s?"none":"block"},onClick:this.onPlaceholderClick,class:"".concat(n,"-selection-placeholder")},[l])},onChoiceAnimationLeave:function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];this.__emit.apply(this,["choiceAnimationLeave"].concat(n))},renderSelection:function(){var t=this,n=this.$props,r=n.selectorValueList,a=n.labelInValue,o=n.maxTagCount,i=ht(this),l=this.vcTreeSelect.onMultipleSelectorRemove,s=r;o>=0&&(s=r.slice(0,o));var c=s.map(function(m){var y=m.label,b=m.value;return g(m_,le(le({},P(P({},t.$props),{label:y,value:b,onRemove:l})),{},{key:b||nDe}),{default:function(){return[i]}})});if(o>=0&&o<r.length){var d="+ ".concat(r.length-o," ..."),f=We(this,"maxTagPlaceholder",{},!1);if(typeof f=="string")d=f;else if(typeof f=="function"){var p=r.slice(o);d=f(a?p:p.map(function(m){var y=m.value;return y}))}var v=g(m_,le(le({},P(P({},this.$props),{label:d,value:null})),{},{key:"rc-tree-select-internal-max-tag-counter"}),{default:function(){return[i]}});c.push(v)}return c.push(g(B1,le(le(le({key:"SearchInput"},this.$props),this.$attrs),{},{ref:this.inputRef}),{default:function(){return[i]}})),c}},render:function(){var t=this;return g(rDe,P(P(P({},this.$props),this.$attrs),{tabindex:-1,showArrow:!1,renderSelection:this.renderSelection,renderPlaceholder:this._renderPlaceholder}),{default:function(){return[ht(t)]}})}},oDe=aDe,iDe=Wf.TreeNode;function ks(e,t){var n=t.attrs,r=t.slots;return g(iDe,n,r)}ks.isTreeNode=!0;ks.inheritAttrs=!1;ks.displayName="ATreeSelectNode";function g_(e,t){var n=t||{},r=n._prevProps,a=r===void 0?{}:r,o=n._loadedKeys,i=n._expandedKeyList,l=n._cachedExpandedKeyList,s=e.valueList,c=e.valueEntities,d=e.keyEntities,f=e.treeExpandedKeys,p=e.filteredTreeNodes,v=e.upperSearchValue,m={_prevProps:P({},e)};return s!==a.valueList&&(m._keyList=s.map(function(y){var b=y.value;return c[b]}).filter(function(y){return y}).map(function(y){var b=y.key;return b})),!f&&p&&p.length&&p!==a.filteredTreeNodes&&(m._expandedKeyList=Je(d.keys())),v&&!a.upperSearchValue?m._cachedExpandedKeyList=i:!v&&a.upperSearchValue&&!f&&(m._expandedKeyList=l||[],m._cachedExpandedKeyList=[]),a.treeExpandedKeys!==f&&(m._expandedKeyList=f),e.loadData&&(m._loadedKeys=o.filter(function(y){return d.has(y)})),m}var lDe={mixins:[nt],inheritAttrs:!1,name:"BasePopup",props:{prefixCls:u.string,upperSearchValue:u.string,valueList:u.array,searchHalfCheckedKeys:u.array,valueEntities:u.object,keyEntities:Map,treeIcon:u.looseBool,treeLine:u.looseBool,treeNodeFilterProp:u.string,treeCheckable:u.any,treeCheckStrictly:u.looseBool,treeDefaultExpandAll:u.looseBool,treeDefaultExpandedKeys:u.array,treeExpandedKeys:u.array,loadData:u.func,multiple:u.looseBool,searchValue:u.string,treeNodes:u.any,filteredTreeNodes:u.any,notFoundContent:u.any,ariaId:u.string,switcherIcon:u.any,renderSearch:u.func,__propsSymbol__:u.any},setup:function(){return{vcTreeSelect:ve("vcTreeSelect",{})}},watch:{__propsSymbol__:function(){var t=g_(this.$props,this.$data);this.setState(t)}},data:function(){this.treeRef=Si(),cc(this.$props.__propsSymbol__);var t=this.$props,n=t.treeDefaultExpandAll,r=t.treeDefaultExpandedKeys,a=t.keyEntities,o=r;n&&(o=Je(a.keys()));var i={_keyList:[],_expandedKeyList:o,_cachedExpandedKeyList:[],_loadedKeys:[],_prevProps:{}};return P(P({},i),g_(this.$props,i))},methods:{onTreeExpand:function(t){var n=this,r=this.$props.treeExpandedKeys;r||this.setState({_expandedKeyList:t},function(){n.__emit("treeExpanded")}),this.__emit("treeExpand",t)},onLoad:function(t){this.setState({_loadedKeys:t})},getTree:function(){return this.treeRef.current},getLoadData:function(){var t=this.$props,n=t.loadData,r=t.upperSearchValue;return r?null:n},filterTreeNode:function(t){var n=this.$props,r=n.upperSearchValue,a=n.treeNodeFilterProp,o=t[a];return typeof o=="string"?r&&o.toUpperCase().indexOf(r)!==-1:!1},renderNotFound:function(){var t=this.$props,n=t.prefixCls,r=t.notFoundContent;return g("span",{class:"".concat(n,"-not-found")},[r])}},render:function(){var t=this.$data,n=t._keyList,r=t._expandedKeyList,a=t._loadedKeys,o=this.$props,i=o.prefixCls,l=o.treeNodes,s=o.filteredTreeNodes,c=o.treeIcon,d=o.treeLine,f=o.treeCheckable,p=o.treeCheckStrictly,v=o.multiple,m=o.ariaId,y=o.renderSearch,b=o.switcherIcon,C=o.searchHalfCheckedKeys,S=this.vcTreeSelect,w=S.onPopupKeyDown,k=S.onTreeNodeSelect,$=S.onTreeNodeCheck,O=this.getLoadData(),T={};f?T.checkedKeys=n:T.selectedKeys=n;var _,I;s?s.length?(T.checkStrictly=!0,I=s,f&&!p&&(T.checkedKeys={checked:n,halfChecked:C})):_=this.renderNotFound():!l||!l.length?_=this.renderNotFound():I=l;var L;if(_)L=_;else{var j=P(P({prefixCls:"".concat(i,"-tree"),showIcon:c,showLine:d,selectable:!f,checkable:f,checkStrictly:p,multiple:v,loadData:O,loadedKeys:a,expandedKeys:r,filterTreeNode:this.filterTreeNode,switcherIcon:b},T),{children:I,onSelect:k,onCheck:$,onExpand:this.onTreeExpand,onLoad:this.onLoad});L=g(Wf,le(le({},j),{},{ref:this.treeRef,__propsSymbol__:[]}),null)}return g("div",{role:"listbox",id:m,onKeydown:w,tabindex:-1},[y?y():null,L])}},sDe=lDe;function uDe(){var e=arguments.length>0&&arguments[0]!==void 0?arguments[0]:[],t={};return e.forEach(function(n){t[n]=function(){this.needSyncKeys[n]=!0}}),t}var Nc=G({name:"Select",mixins:[nt],inheritAttrs:!1,props:An({prefixCls:u.string,prefixAria:u.string,multiple:u.looseBool,showArrow:u.looseBool,open:u.looseBool,value:u.any,autofocus:u.looseBool,defaultOpen:u.looseBool,defaultValue:u.any,showSearch:u.looseBool,placeholder:u.any,inputValue:u.string,searchValue:u.string,autoClearSearchValue:u.looseBool,searchPlaceholder:u.any,disabled:u.looseBool,children:u.any,labelInValue:u.looseBool,maxTagCount:u.number,maxTagPlaceholder:u.any,maxTagTextLength:u.number,showCheckedStrategy:u.oneOf([ZS,Rg,mf]),dropdownClassName:u.string,dropdownStyle:u.object,dropdownVisibleChange:u.func,dropdownMatchSelectWidth:u.looseBool,treeData:u.array,treeDataSimpleMode:an(u.oneOfType([u.looseBool,u.object])),treeNodeFilterProp:u.string,treeNodeLabelProp:u.string,treeCheckable:u.any,treeCheckStrictly:u.looseBool,treeIcon:u.looseBool,treeLine:u.looseBool,treeDefaultExpandAll:u.looseBool,treeDefaultExpandedKeys:u.array,treeExpandedKeys:u.array,loadData:u.func,filterTreeNode:an(u.oneOfType([u.func,u.looseBool])),notFoundContent:u.any,getPopupContainer:u.func,allowClear:u.looseBool,transitionName:u.string,animation:u.string,choiceTransitionName:u.string,inputIcon:u.any,clearIcon:u.any,removeIcon:u.any,switcherIcon:u.any,__propsSymbol__:u.any},{prefixCls:"rc-tree-select",prefixAria:"rc-tree-select",showArrow:!0,showSearch:!0,autoClearSearchValue:!0,showCheckedStrategy:mf,treeNodeFilterProp:"value",treeNodeLabelProp:"title",treeIcon:!1,notFoundContent:"Not Found",dropdownStyle:{},dropdownVisibleChange:function(){return!0}}),data:function(){cc(this.$props.__propsSymbol__);var t=this.$props,n=t.prefixAria,r=t.defaultOpen,a=t.open;this.needSyncKeys={},this.selectorRef=Si(),this.selectTriggerRef=Si(),this.ariaId=FAe("".concat(n,"-list"));var o={_open:a||r,_valueList:[],_searchHalfCheckedKeys:[],_missValueList:[],_selectorValueList:[],_valueEntities:{},_posEntities:new Map,_keyEntities:new Map,_searchValue:"",_prevProps:{},_init:!0,_focused:void 0,_treeNodes:void 0,_filteredTreeNodes:void 0},i=this.getDerivedState(this.$props,o);return P(P({},o),i)},watch:P(P({},uDe(["treeData","defaultValue","value"])),{__propsSymbol__:function(){var t=this.getDerivedState(this.$props,this.$data);this.setState(t),this.needSyncKeys={}},_valueList:function(){var t=this;this.$nextTick(function(){t.forcePopupAlign()})},_open:function(t){var n=this;this.$nextTick(function(){!t&&!n.isSearchValueControlled()&&n.setState({_searchValue:""}),t&&!n.$data._searchValue&&n.setState({_filteredTreeNodes:null});var r=n.$props.prefixCls,a=n.$data,o=a._selectorValueList,i=a._valueEntities,l=n.isMultiple();if(!l&&o.length&&t&&n.popup){var s=o[0].value,c=n.popup.getTree(),d=c.domTreeNodes,f=i[s]||{},p=f.key,v=d[p];if(v){var m=Sn(v);requestAnimationFrame(function(){var y=Sn(n.popup),b=AAe(y,"".concat(r,"-dropdown"));m&&b&&IAe(m,b,{onlyScrollIfNeeded:!0,offsetTop:0})})}}})}}),created:function(){ot("vcTreeSelect",{onSelectorFocus:this.onSelectorFocus,onSelectorBlur:this.onSelectorBlur,onSelectorKeyDown:this.onComponentKeyDown,onSelectorClear:this.onSelectorClear,onMultipleSelectorRemove:this.onMultipleSelectorRemove,onTreeNodeSelect:this.onTreeNodeSelect,onTreeNodeCheck:this.onTreeNodeCheck,onPopupKeyDown:this.onComponentKeyDown,onSearchInputChange:this.onSearchInputChange,onSearchInputKeyDown:this.onSearchInputKeyDown})},mounted:function(){var t=this;this.$nextTick(function(){var n=t.$props,r=n.autofocus,a=n.disabled;r&&!a&&t.focus()})},methods:{getDerivedState:function(t,n){var r=n._prevProps,a=r===void 0?{}:r,o=t.treeCheckable,i=t.treeCheckStrictly,l=t.filterTreeNode,s=t.treeNodeFilterProp,c=t.treeDataSimpleMode,d={_prevProps:P({},t),_init:!1},f=this;function p(N,D){return a[N]!==t[N]||f.needSyncKeys[N]?(D(t[N],a[N]),!0):!1}var v=!1;p("open",function(N){d._open=N});var m,y=!1,b=!1;if(p("treeData",function(N){m=p_(N),y=!0}),p("treeDataSimpleMode",function(N,D){if(!!N){var z=!D||D===!0?{}:D;Mr(N,z)||(b=!0)}}),c&&(y||b)){var C=P({id:"id",pId:"pId",rootPId:null},c!==!0?c:{});m=p_(BAe(t.treeData,C))}if(t.treeData||(m=this.children||[]),m){var S=KAe(m);d._treeNodes=m,d._posEntities=S.posEntities,d._valueEntities=S.valueEntities,d._keyEntities=S.keyEntities,v=!0}if(n._init&&p("defaultValue",function(N){d._valueList=h_(N,t),v=!0}),p("value",function(N){d._valueList=h_(N,t),v=!0}),v){var w=[],k=[],$=[],O=d._valueList;O||(O=[].concat(Je(n._valueList),Je(n._missValueList)));var T={};if(O.forEach(function(N){var D=N.value,z=N.label,B=(d._valueEntities||n._valueEntities)[D];if(T[D]=z,B){$.push(B.key),k.push(N);return}w.push(N)}),o&&!i){var _=Jh($,!0,d._keyEntities||n._keyEntities),I=_.checkedKeys;d._valueList=I.map(function(N){var D=(d._keyEntities||n._keyEntities).get(N).value,z={value:D};return T[D]!==void 0&&(z.label=T[D]),z})}else d._valueList=k;d._missValueList=w,d._selectorValueList=Eb(d._valueList,t,d._valueEntities||n._valueEntities)}if(p("inputValue",function(N){N!==null&&(d._searchValue=N)}),p("searchValue",function(N){d._searchValue=N}),d._searchValue!==void 0||n._searchValue&&m){var L=d._searchValue!==void 0?d._searchValue:n._searchValue,j=String(L).toUpperCase(),F=l;l===!1?F=function(){return!0}:typeof F!="function"&&(F=function(D,z){var B=String(rr(z)[s]).toUpperCase();return B.indexOf(j)!==-1}),d._filteredTreeNodes=f_(d._treeNodes||n._treeNodes,L,F,d._valueEntities||n._valueEntities,ks)}return v&&o&&!i&&(d._searchValue||n._searchValue)&&(d._searchHalfCheckedKeys=v_(d._valueList,d._valueEntities||n._valueEntities)),p("showCheckedStrategy",function(){d._selectorValueList=d._selectorValueList||Eb(d._valueList||n._valueList,t,d._valueEntities||n._valueEntities)}),d},onSelectorFocus:function(){this.setState({_focused:!0})},onSelectorBlur:function(){this.setState({_focused:!1})},onComponentKeyDown:function(t){var n=this.$data._open,r=t.keyCode;n?ze.ESC===r?this.setOpenState(!1):[ze.UP,ze.DOWN,ze.LEFT,ze.RIGHT].indexOf(r)!==-1&&t.stopPropagation():[ze.ENTER,ze.DOWN].indexOf(r)!==-1&&this.setOpenState(!0)},onDeselect:function(t,n,r){this.__emit("deselect",t,n,r)},onSelectorClear:function(t){var n=this.$props.disabled;n||(this.triggerChange([],[]),this.isSearchValueControlled()||this.setUncontrolledState({_searchValue:"",_filteredTreeNodes:null}),t.stopPropagation())},onMultipleSelectorRemove:function(t,n){t.stopPropagation();var r=this.$data,a=r._valueList,o=r._missValueList,i=r._valueEntities,l=this.$props,s=l.treeCheckable,c=l.treeCheckStrictly,d=l.treeNodeLabelProp,f=l.disabled;if(!f){var p=i[n],v=a;p&&(s&&!c?v=a.filter(function(k){var $=k.value,O=i[$];return!VAe(O.pos,p.pos)}):v=a.filter(function(k){var $=k.value;return $!==n}));var m=p?p.node:null,y={triggerValue:n,triggerNode:m},b={node:m};if(s){var C=v.map(function(k){var $=k.value;return i[$]});b.event="check",b.checked=!1,b.checkedNodes=C.map(function(k){var $=k.node;return $}),b.checkedNodesPositions=C.map(function(k){var $=k.node,O=k.pos;return{node:$,pos:O}}),c?y.allCheckedNodes=b.checkedNodes:y.allCheckedNodes=F1(C).map(function(k){var $=k.node;return $})}else b.event="select",b.selected=!1,b.selectedNodes=v.map(function(k){var $=k.value;return(i[$]||{}).node});var S=o.filter(function(k){var $=k.value;return $!==n}),w;this.isLabelInValue()?w={label:m?rr(m)[d]:null,value:n}:w=n,this.onDeselect(w,m,b),this.triggerChange(S,v,y)}},onValueTrigger:function(t,n,r,a){var o=r.node,i=o.$props.value,l=this.$data,s=l._missValueList,c=l._valueEntities,d=l._keyEntities,f=l._searchValue,p=this.$props,v=p.disabled,m=p.inputValue,y=p.treeNodeLabelProp,b=p.treeCheckable,C=p.treeCheckStrictly,S=p.autoClearSearchValue,w=o.$props[y];if(!v){var k;this.isLabelInValue()?k={value:i,label:w}:k=i,t?this.__emit("select",k,o,r):this.__emit("deselect",k,o,r);var $=n.map(function(_){var I=rr(_);return{value:I.value,label:I[y]}});if(b&&!C){var O=$.map(function(_){var I=_.value;return c[I].key});t?O=Jh(O,!0,d).checkedKeys:O=Jh([c[i].key],!1,d,{checkedKeys:O}).checkedKeys,$=O.map(function(_){var I=rr(d.get(_).node);return{value:I.value,label:I[y]}})}(S||m===null)&&(this.isSearchValueControlled()||this.setUncontrolledState({_searchValue:"",_filteredTreeNodes:null}),f&&f.length&&this.__emit("search",""));var T=P(P({},a),{triggerValue:i,triggerNode:o});this.triggerChange(s,$,T)}},onTreeNodeSelect:function(t,n){var r=this.$data,a=r._valueList,o=r._valueEntities,i=this.$props,l=i.treeCheckable,s=i.multiple;if(!l){s||this.setOpenState(!1);var c=n.selected,d=n.node.$props.value,f;s?(f=a.filter(function(v){var m=v.value;return m!==d}),c&&f.push({value:d})):f=[{value:d}];var p=f.map(function(v){var m=v.value;return o[m]}).filter(function(v){return v}).map(function(v){var m=v.node;return m});this.onValueTrigger(c,p,n,{selected:c})}},onTreeNodeCheck:function(t,n){var r=this.$data,a=r._searchValue,o=r._keyEntities,i=r._valueEntities,l=r._valueList,s=this.$props.treeCheckStrictly,c=n.checkedNodes,d=n.checkedNodesPositions,f=n.checked,p={checked:f},v=c;if(a){var m=l.map(function(b){var C=b.value;return i[C]}).filter(function(b){return b}).map(function(b){var C=b.key;return C}),y;f?y=Array.from(new Set([].concat(Je(m),Je(v.map(function(b){var C=rr(b),S=C.value;return i[S].key}))))):y=Jh([rr(n.node).eventKey],!1,o,{checkedKeys:m}).checkedKeys,v=y.map(function(b){return o.get(b).node}),p.allCheckedNodes=y.map(function(b){return WR(o.get(b))})}else s?p.allCheckedNodes=n.checkedNodes:p.allCheckedNodes=F1(d);this.onValueTrigger(f,v,n,p)},onDropdownVisibleChange:function(t){var n=this.$props,r=n.multiple,a=n.treeCheckable,o=this.$data._searchValue;t&&!r&&!a&&o&&this.setUncontrolledState({_searchValue:"",_filteredTreeNodes:null}),this.setOpenState(t,!0)},onSearchInputChange:function(t){var n=t.target.value,r=this.$data,a=r._treeNodes,o=r._valueEntities,i=this.$props,l=i.filterTreeNode,s=i.treeNodeFilterProp;this.__emit("search",n);var c=!1;if(this.isSearchValueControlled()||(c=this.setUncontrolledState({_searchValue:n}),this.setOpenState(!0)),c){var d=String(n).toUpperCase(),f=l;l===!1?f=function(){return!0}:f||(f=function(v,m){var y=String(rr(m)[s]).toUpperCase();return y.indexOf(d)!==-1}),this.setState({_filteredTreeNodes:f_(a,n,f,o,ks)})}},onSearchInputKeyDown:function(t){var n=this.$data,r=n._searchValue,a=n._valueList,o=n._valueEntities,i=t.keyCode;if(ze.BACKSPACE===i&&this.isMultiple()&&!r&&a.length){var l=a[a.length-1].value,s=this.$props.treeCheckStrictly;s||function(){for(var c=o[l];c;)a.some(function(d){return d.value===c.value})?(l=c.value,c=c.parent):c=null}(),this.onMultipleSelectorRemove(t,l)}},onChoiceAnimationLeave:function(){var t=this;requestAnimationFrame(function(){t.forcePopupAlign()})},setPopupRef:function(t){this.popup=t},setUncontrolledState:function(t){var n=!1,r={},a=Qe(this);return Object.keys(t).forEach(function(o){o.slice(1)in a||(n=!0,r[o]=t[o])}),n&&this.setState(r),n},setOpenState:function(t){var n=arguments.length>1&&arguments[1]!==void 0?arguments[1]:!1,r=this.$props.dropdownVisibleChange;r&&r(t,{documentClickClose:!t&&n})===!1||this.setUncontrolledState({_open:t})},isMultiple:function(){var t=this.$props,n=t.multiple,r=t.treeCheckable;return!!(n||r)},isLabelInValue:function(){return KR(this.$props)},isSearchValueControlled:function(){var t=Qe(this),n=t.inputValue;return"searchValue"in t?!0:"inputValue"in t&&n!==null},forcePopupAlign:function(){var t=this.selectTriggerRef.current;t&&t.forcePopupAlign()},delayForcePopupAlign:function(){var t=this;requestAnimationFrame(function(){requestAnimationFrame(t.forcePopupAlign)})},triggerChange:function(t,n){var r=arguments.length>2&&arguments[2]!==void 0?arguments[2]:{},a=this.$data,o=a._valueEntities,i=a._searchValue,l=a._selectorValueList,s=Qe(this),c=s.disabled,d=s.treeCheckable,f=s.treeCheckStrictly;if(!c){var p=P({preValue:l.map(function(S){var w=S.label,k=S.value;return{label:w,value:k}})},r),v=Eb(n,s,o);if(!("value"in s)){var m={_missValueList:t,_valueList:n,_selectorValueList:v};i&&d&&!f&&(m._searchHalfCheckedKeys=v_(n,o)),this.setState(m)}if(this.$attrs.onChange){var y;this.isMultiple()?y=[].concat(Je(t),Je(v)):y=v.slice(0,1);var b=null,C;this.isLabelInValue()?C=y.map(function(S){var w=S.label,k=S.value;return{label:w,value:k}}):(b=[],C=y.map(function(S){var w=S.label,k=S.value;return b.push(w),k})),this.isMultiple()||(C=C[0]),this.__emit("change",C,b,p)}}},focus:function(){this.selectorRef.current.focus()},blur:function(){this.selectorRef.current.blur()}},render:function(){var t=this.$data,n=t._valueList,r=t._missValueList,a=t._selectorValueList,o=t._searchHalfCheckedKeys,i=t._valueEntities,l=t._keyEntities,s=t._searchValue,c=t._open,d=t._focused,f=t._treeNodes,p=t._filteredTreeNodes,v=Qe(this),m=v.prefixCls,y=v.treeExpandedKeys,b=this.isMultiple(),C=P(P(P({},v),this.$attrs),{isMultiple:b,valueList:n,searchHalfCheckedKeys:o,selectorValueList:[].concat(Je(r),Je(a)),valueEntities:i,keyEntities:l,searchValue:s,upperSearchValue:(s||"").toUpperCase(),open:c,focused:d,dropdownPrefixCls:"".concat(m,"-dropdown"),ariaId:this.ariaId,onChoiceAnimationLeave:this.onChoiceAnimationLeave,vSlots:this.$slots}),S=P(P({},C),{treeNodes:f,filteredTreeNodes:p,treeExpandedKeys:y,onTreeExpanded:this.delayForcePopupAlign,ref:this.setPopupRef}),w=g(sDe,le(le({},S),{},{__propsSymbol__:[]}),null),k=b?oDe:eDe,$=g(k,le(le({},C),{},{isMultiple:b,ref:this.selectorRef}),null),O=P(P({},C),{popupElement:w,dropdownVisibleChange:this.onDropdownVisibleChange,ref:this.selectTriggerRef});return g(YAe,O,{default:function(){return[$]}})}});Nc.TreeNode=ks;Nc.SHOW_ALL=ZS;Nc.SHOW_PARENT=Rg;Nc.SHOW_CHILD=mf;Nc.name="TreeSelect";var cDe=Nc,dDe=ks;u.shape({key:u.string,value:u.string,label:u.VNodeChild,slots:u.object,children:u.array}).loose;var fDe=function(){return P(P({},Sg()),{autofocus:u.looseBool,dropdownStyle:u.object,filterTreeNode:an(u.oneOfType([Function,Boolean])),getPopupContainer:u.func,labelInValue:u.looseBool,loadData:u.func,maxTagCount:u.number,maxTagPlaceholder:u.VNodeChild,value:u.oneOfType([u.string,u.object,u.array,u.number]),defaultValue:u.oneOfType([u.string,u.object,u.array,u.number]),multiple:u.looseBool,notFoundContent:u.VNodeChild,searchPlaceholder:u.string,searchValue:u.string,showCheckedStrategy:u.oneOf(rt("SHOW_ALL","SHOW_PARENT","SHOW_CHILD")),suffixIcon:u.VNodeChild,treeCheckable:u.looseBool,treeCheckStrictly:u.looseBool,treeData:u.arrayOf(Object),treeDataSimpleMode:an(u.oneOfType([u.looseBool,Object])),dropdownClassName:u.string,dropdownMatchSelectWidth:u.looseBool,treeDefaultExpandAll:u.looseBool,treeExpandedKeys:u.array,treeIcon:u.looseBool,treeDefaultExpandedKeys:u.array,treeNodeFilterProp:u.string,treeNodeLabelProp:u.string,replaceFields:u.object.def({}),clearIcon:u.VNodeChild,removeIcon:u.VNodeChild,onSelect:u.func,onChange:u.func,onSearch:u.func,onTreeExpand:u.func,"onUpdate:treeExpandedKeys":u.func,"onUpdate:searchValue":u.func,"onUpdate:value":u.func})},hDe={icon:{tag:"svg",attrs:{viewBox:"0 0 1024 1024",focusable:"false"},children:[{tag:"path",attrs:{d:"M840.4 300H183.6c-19.7 0-30.7 20.8-18.5 35l328.4 380.8c9.4 10.9 27.5 10.9 37 0L858.9 335c12.2-14.2 1.2-35-18.5-35z"}}]},name:"caret-down",theme:"outlined"},pDe=hDe;function y_(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){vDe(e,a,n[a])})}return e}function vDe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var QS=function(t,n){var r=y_({},t,n.attrs);return g(Et,y_({},r,{icon:pDe}),null)};QS.displayName="CaretDownOutlined";QS.inheritAttrs=!1;var mDe=QS,b_=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n};function YR(e){return Un(e).map(function(t){if(!zn(t)||!t.type)return null;var n=t.children||{},r=n.default,a=b_(n,["default"]),o=r?r():[],i=t.key,l=t.props,s=l.value,c=b_(l,["value"]),d=P({key:i,value:s},c);Object.keys(a).forEach(function(p){typeof a[p]=="function"&&(d[p]=g(Fe,null,[a[p]()]))});var f=YR(o);return f.length&&(d.children=f),d}).filter(function(t){return t})}var gDe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},Kl=G({TreeNode:dDe,SHOW_ALL:ZS,SHOW_PARENT:Rg,SHOW_CHILD:mf,name:"ATreeSelect",inheritAttrs:!1,props:Rn(fDe(),{transitionName:"slide-up",choiceTransitionName:""}),setup:function(){return{vcTreeSelect:null,configProvider:ve("configProvider",St)}},created:function(){on(this.multiple!==!1||!this.treeCheckable,"TreeSelect","`multiple` will alway be `true` when `treeCheckable` is true")},methods:{saveTreeSelect:function(t){this.vcTreeSelect=t},focus:function(){this.vcTreeSelect.focus()},blur:function(){this.vcTreeSelect.blur()},renderSwitcherIcon:function(t,n){var r=n.isLeaf,a=n.loading;return a?g(co,{class:"".concat(t,"-switcher-loading-icon")},null):r?null:g(mDe,{class:"".concat(t,"-switcher-icon")},null)},handleChange:function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];this.$emit("update:value",n[0]),this.$emit.apply(this,["change"].concat(n))},handleTreeExpand:function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];this.$emit("update:treeExpandedKeys",n[0]),this.$emit.apply(this,["treeExpand"].concat(n))},handleSearch:function(){for(var t=arguments.length,n=new Array(t),r=0;r<t;r++)n[r]=arguments[r];this.$emit("update:searchValue",n[0]),this.$emit.apply(this,["search"].concat(n))},updateTreeData:function(t){var n=this,r=this.$slots,a={children:"children",title:"title",key:"key",label:"label",value:"value"},o=P(P({},a),this.$props.replaceFields);return t.map(function(i){var l=i.slots,s=l===void 0?{}:l,c=i[o.label],d=i[o.title],f=i[o.value],p=i[o.key],v=i[o.children],m=typeof c=="function"?c():c,y=typeof d=="function"?d():d;!m&&s.label&&r[s.label]&&(m=g(Fe,null,[r[s.label](i)])),!y&&s.title&&r[s.title]&&(y=g(Fe,null,[r[s.title](i)]));var b=P(P({},i),{title:y||m,value:f,dataRef:i,key:p});return v?P(P({},b),{children:n.updateTreeData(v)}):b})}},render:function(){var t,n=this,r=Qe(this),a=r.prefixCls,o=r.size,i=r.dropdownStyle,l=r.dropdownClassName,s=r.getPopupContainer,c=gDe(r,["prefixCls","size","dropdownStyle","dropdownClassName","getPopupContainer"]),d=this.$attrs.class,f=this.configProvider,p=f.renderEmpty,v=f.getPrefixCls,m=v("select",a),y=We(this,"notFoundContent"),b=We(this,"removeIcon"),C=We(this,"clearIcon"),S=this.configProvider.getPopupContainer,w=bn(c,["inputIcon","removeIcon","clearIcon","switcherIcon","suffixIcon"]),k=We(this,"suffixIcon");k=Array.isArray(k)?k[0]:k;var $=r.treeData;$&&($=this.updateTreeData($));var O=(t={},V(t,"".concat(m,"-lg"),o==="large"),V(t,"".concat(m,"-sm"),o==="small"),V(t,d,d),t),T=c.showSearch;"showSearch"in c||(T=!!(c.multiple||c.treeCheckable));var _=We(this,"treeCheckable");_&&(_=g("span",{class:"".concat(m,"-tree-checkbox-inner")},null));var I=k||g(Rs,{class:"".concat(m,"-arrow-icon")},null),L=b||g(go,{class:"".concat(m,"-remove-icon")},null),j=C||g(Yr,{class:"".concat(m,"-clear-icon")},null),F=P(P(P(P({},this.$attrs),{switcherIcon:function(D){return n.renderSwitcherIcon(m,D)},inputIcon:I,removeIcon:L,clearIcon:j}),w),{showSearch:T,getPopupContainer:s||S,dropdownClassName:Se(l,"".concat(m,"-tree-dropdown")),prefixCls:m,dropdownStyle:P({maxHeight:"100vh",overflow:"auto"},i),treeCheckable:_,notFoundContent:y||p("Select"),class:O,onChange:this.handleChange,onSearch:this.handleSearch,onTreeExpand:this.handleTreeExpand,ref:this.saveTreeSelect,treeData:$||YR(ht(this))});return g(cDe,le(le({},F),{},{__propsSymbol__:[]}),bn(this.$slots,["default"]))}});Kl.install=function(e){return e.component(Kl.name,Kl),e.component(Kl.TreeNode.displayName,Kl.TreeNode),e};var yDe=Kl.TreeNode,bDe=Kl,CDe={prefixCls:u.string,color:u.string,dot:u.any,pending:u.looseBool,position:u.oneOf(rt("left","right","")).def("")},gf=G({name:"ATimelineItem",props:Rn(CDe,{color:"blue",pending:!1}),setup:function(){return{configProvider:ve("configProvider",St)}},render:function(){var t,n,r,a,o=Qe(this),i=o.prefixCls,l=o.color,s=l===void 0?"":l,c=o.pending,d=this.configProvider.getPrefixCls,f=d("timeline",i),p=We(this,"dot"),v=Se((t={},V(t,"".concat(f,"-item"),!0),V(t,"".concat(f,"-item-pending"),c),t)),m=Se((n={},V(n,"".concat(f,"-item-head"),!0),V(n,"".concat(f,"-item-head-custom"),p),V(n,"".concat(f,"-item-head-").concat(s),!0),n));return g("li",{class:v},[g("div",{class:"".concat(f,"-item-tail")},null),g("div",{class:m,style:{borderColor:/blue|red|green|gray/.test(s)?void 0:s}},[p]),g("div",{class:"".concat(f,"-item-content")},[(a=(r=this.$slots).default)===null||a===void 0?void 0:a.call(r)])])}}),wDe={prefixCls:u.string,pending:u.any,pendingDot:u.string,reverse:u.looseBool,mode:u.oneOf(rt("left","alternate","right",""))},Ed=G({name:"ATimeline",props:Rn(wDe,{reverse:!1,mode:""}),setup:function(){return{configProvider:ve("configProvider",St)}},render:function(){var t,n,r,a=Qe(this),o=a.prefixCls,i=a.reverse,l=a.mode,s=this.configProvider.getPrefixCls,c=s("timeline",o),d=We(this,"pendingDot"),f=We(this,"pending"),p=typeof f=="boolean"?null:f,v=Se(c,(t={},V(t,"".concat(c,"-pending"),!!f),V(t,"".concat(c,"-reverse"),!!i),V(t,"".concat(c,"-").concat(l),!!l),t)),m=La((r=(n=this.$slots).default)===null||r===void 0?void 0:r.call(n)),y=f?g(gf,{pending:!!f,dot:d||g(co,null,null)},{default:function(){return[p]}}):null,b=i?[y].concat(Je(m.reverse())):[].concat(Je(m),[y]),C=function(T,_){var I=rr(T);return l==="alternate"?I.position==="right"?"".concat(c,"-item-right"):I.position==="left"||_%2===0?"".concat(c,"-item-left"):"".concat(c,"-item-right"):l==="left"?"".concat(c,"-item-left"):l==="right"||I.position==="right"?"".concat(c,"-item-right"):""},S=b.filter(function(O){return!!O}),w=S.length,k="".concat(c,"-item-last"),$=S.map(function(O,T){var _=T===w-2?k:"",I=T===w-1?k:"";return hr(O,{class:Se([!i&&!!f?_:I,C(O,T)])})});return g("ul",{class:v},[$])}});Ed.Item=gf;Ed.install=function(e){return e.component(Ed.name,Ed),e.component(gf.name,gf),e};var SDe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M864 170h-60c-4.4 0-8 3.6-8 8v518H310v-73c0-6.7-7.8-10.5-13-6.3l-141.9 112a8 8 0 000 12.6l141.9 112c5.3 4.2 13 .4 13-6.3v-75h498c35.3 0 64-28.7 64-64V178c0-4.4-3.6-8-8-8z"}}]},name:"enter",theme:"outlined"},kDe=SDe;function C_(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){$De(e,a,n[a])})}return e}function $De(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var ek=function(t,n){var r=C_({},t,n.attrs);return g(Et,C_({},r,{icon:kDe}),null)};ek.displayName="EnterOutlined";ek.inheritAttrs=!1;var ODe=ek,PDe=G({name:"Editable",props:{prefixCls:u.string,value:u.string,maxlength:u.number,autoSize:u.oneOfType([u.looseBool,u.object]),onSave:u.func,onCancel:u.func,onEnd:u.func,onChange:u.func,originContent:u.string},emits:["save","cancel","end","change"],setup:function(t,n){var r=n.emit,a=bt({current:t.value||"",lastKeyCode:void 0,inComposition:!1,cancelFlag:!1});ce(function(){return t.value},function(m){a.current=m});var o=H();et(function(){var m;if(o.value){var y=(m=o.value)===null||m===void 0?void 0:m.resizableTextArea,b=y==null?void 0:y.textArea;b.focus();var C=b.value.length;b.setSelectionRange(C,C)}});function i(m){o.value=m}function l(m){var y=m.target.value;a.current=y.replace(/[\r\n]/g,""),r("change",a.current)}function s(){a.inComposition=!0}function c(){a.inComposition=!1}function d(m){var y=m.keyCode;y===ze.ENTER&&m.preventDefault(),!a.inComposition&&(a.lastKeyCode=y)}function f(m){var y=m.keyCode,b=m.ctrlKey,C=m.altKey,S=m.metaKey,w=m.shiftKey;a.lastKeyCode===y&&!a.inComposition&&!b&&!C&&!S&&!w&&(y===ze.ENTER?(v(),r("end")):y===ze.ESC&&(a.current=t.originContent,r("cancel")))}function p(){v(),r("end")}function v(){r("save",a.current.trim())}return function(){return g("div",{class:"".concat(t.prefixCls," ").concat(t.prefixCls,"-edit-content")},[g(S2,{ref:i,maxlength:t.maxlength,value:a.current,onChange:l,onKeydown:d,onKeyup:f,onCompositionstart:s,onCompositionend:c,onBlur:p,autoSize:t.autoSize===void 0||t.autoSize},null),g(ODe,{class:"".concat(t.prefixCls,"-edit-content-confirm")},null)])}}}),TDe=PDe,xDe=3,_De=8,Ln,Mb={padding:0,margin:0,display:"inline",lineHeight:"inherit"};function Ib(e){if(!e)return 0;var t=e.match(/^\d*(\.\d*)?/);return t?Number(t[0]):0}function EDe(e){var t=Array.prototype.slice.apply(e);return t.map(function(n){return"".concat(n,": ").concat(e.getPropertyValue(n),";")}).join("")}var MDe=function(e,t,n,r,a){Ln||(Ln=document.createElement("div"),Ln.setAttribute("aria-hidden","true"),document.body.appendChild(Ln));var o=t.rows,i=t.suffix,l=i===void 0?"":i,s=window.getComputedStyle(e),c=EDe(s),d=Ib(s.lineHeight),f=Math.round(d*(o+1)+Ib(s.paddingTop)+Ib(s.paddingBottom));Ln.setAttribute("style",c),Ln.style.position="fixed",Ln.style.left="0",Ln.style.height="auto",Ln.style.minHeight="auto",Ln.style.maxHeight="auto",Ln.style.top="-999999px",Ln.style.zIndex="-1000",Ln.style.textOverflow="clip",Ln.style.whiteSpace="normal",Ln.style.webkitLineClamp="none";var p=$m({render:function(){return g("div",{style:Mb},[g("span",{style:Mb},[n,l]),g("span",{style:Mb},[r])])}});p.mount(Ln);function v(){return Ln.offsetHeight<f}if(v())return p.unmount(),{content:n,text:Ln.innerHTML,ellipsis:!1};var m=Array.prototype.slice.apply(Ln.childNodes[0].childNodes[0].cloneNode(!0).childNodes).filter(function(O){var T=O.nodeType,_=O.data;return T!==_De&&_!==""}),y=Array.prototype.slice.apply(Ln.childNodes[0].childNodes[1].cloneNode(!0).childNodes);p.unmount();var b=[];Ln.innerHTML="";var C=document.createElement("span");Ln.appendChild(C);var S=document.createTextNode(a+l);C.appendChild(S),y.forEach(function(O){Ln.appendChild(O)});function w(O){C.insertBefore(O,S)}function k(O,T){var _=arguments.length>2&&arguments[2]!==void 0?arguments[2]:0,I=arguments.length>3&&arguments[3]!==void 0?arguments[3]:T.length,L=arguments.length>4&&arguments[4]!==void 0?arguments[4]:0,j=Math.floor((_+I)/2),F=T.slice(0,j);if(O.textContent=F,_>=I-1)for(var N=I;N>=_;N-=1){var D=T.slice(0,N);if(O.textContent=D,v()||!D)return N===T.length?{finished:!1,vNode:T}:{finished:!0,vNode:D}}return v()?k(O,T,j,I,j):k(O,T,_,j,L)}function $(O){var T=O.nodeType;if(T===xDe){var _=O.textContent||"",I=document.createTextNode(_);return w(I),k(I,_)}return{finished:!1,vNode:null}}return m.some(function(O){var T=$(O),_=T.finished,I=T.vNode;return I&&b.push(I),_}),{content:b,text:Ln.innerHTML,ellipsis:!0}},IDe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},qR=G({name:"ATypography",inheritAttrs:!1,setup:function(t,n){var r=n.slots,a=n.attrs,o=Wt("typography",t),i=o.prefixCls;return function(){var l,s=P(P({},t),a);s.prefixCls,s.class;var c=s.component,d=c===void 0?"article":c,f=IDe(s,["prefixCls","class","component"]);return g(d,le({class:Se(i.value,a.class)},f),{default:function(){return[(l=r.default)===null||l===void 0?void 0:l.call(r)]}})}}});qR.props={prefixCls:u.string,component:u.string};var Fr=qR,NDe=function(){var t=document.getSelection();if(!t.rangeCount)return function(){};for(var n=document.activeElement,r=[],a=0;a<t.rangeCount;a++)r.push(t.getRangeAt(a));switch(n.tagName.toUpperCase()){case"INPUT":case"TEXTAREA":n.blur();break;default:n=null;break}return t.removeAllRanges(),function(){t.type==="Caret"&&t.removeAllRanges(),t.rangeCount||r.forEach(function(o){t.addRange(o)}),n&&n.focus()}},ADe=NDe,w_={"text/plain":"Text","text/html":"Url",default:"Text"},DDe="Copy to clipboard: #{key}, Enter";function RDe(e){var t=(/mac os x/i.test(navigator.userAgent)?"\u2318":"Ctrl")+"+C";return e.replace(/#{\s*key\s*}/g,t)}function LDe(e,t){var n,r,a,o,i,l=!1;t||(t={});var s=t.debug||!1;try{r=ADe(),a=document.createRange(),o=document.getSelection(),i=document.createElement("span"),i.textContent=e,i.style.all="unset",i.style.position="fixed",i.style.top=0,i.style.clip="rect(0, 0, 0, 0)",i.style.whiteSpace="pre",i.style.webkitUserSelect="text",i.style.MozUserSelect="text",i.style.msUserSelect="text",i.style.userSelect="text",i.addEventListener("copy",function(d){if(d.stopPropagation(),t.format)if(d.preventDefault(),typeof d.clipboardData=="undefined"){s&&console.warn("unable to use e.clipboardData"),s&&console.warn("trying IE specific stuff"),window.clipboardData.clearData();var f=w_[t.format]||w_.default;window.clipboardData.setData(f,e)}else d.clipboardData.clearData(),d.clipboardData.setData(t.format,e);t.onCopy&&(d.preventDefault(),t.onCopy(d.clipboardData))}),document.body.appendChild(i),a.selectNodeContents(i),o.addRange(a);var c=document.execCommand("copy");if(!c)throw new Error("copy command was unsuccessful");l=!0}catch(d){s&&console.error("unable to copy using execCommand: ",d),s&&console.warn("trying IE specific stuff");try{window.clipboardData.setData(t.format||"text",e),t.onCopy&&t.onCopy(window.clipboardData),l=!0}catch(f){s&&console.error("unable to copy using clipboardData: ",f),s&&console.error("falling back to prompt"),n=RDe("message"in t?t.message:DDe),window.prompt(n,e)}}finally{o&&(typeof o.removeRange=="function"?o.removeRange(a):o.removeAllRanges()),i&&document.body.removeChild(i),r()}return l}var FDe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M832 64H296c-4.4 0-8 3.6-8 8v56c0 4.4 3.6 8 8 8h496v688c0 4.4 3.6 8 8 8h56c4.4 0 8-3.6 8-8V96c0-17.7-14.3-32-32-32zM704 192H192c-17.7 0-32 14.3-32 32v530.7c0 8.5 3.4 16.6 9.4 22.6l173.3 173.3c2.2 2.2 4.7 4 7.4 5.5v1.9h4.2c3.5 1.3 7.2 2 11 2H704c17.7 0 32-14.3 32-32V224c0-17.7-14.3-32-32-32zM350 856.2L263.9 770H350v86.2zM664 888H414V746c0-22.1-17.9-40-40-40H232V264h432v624z"}}]},name:"copy",theme:"outlined"},BDe=FDe;function S_(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){VDe(e,a,n[a])})}return e}function VDe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var tk=function(t,n){var r=S_({},t,n.attrs);return g(Et,S_({},r,{icon:BDe}),null)};tk.displayName="CopyOutlined";tk.inheritAttrs=!1;var zDe=tk,HDe={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M257.7 752c2 0 4-.2 6-.5L431.9 722c2-.4 3.9-1.3 5.3-2.8l423.9-423.9a9.96 9.96 0 000-14.1L694.9 114.9c-1.9-1.9-4.4-2.9-7.1-2.9s-5.2 1-7.1 2.9L256.8 538.8c-1.5 1.5-2.4 3.3-2.8 5.3l-29.5 168.2a33.5 33.5 0 009.4 29.8c6.6 6.4 14.9 9.9 23.8 9.9zm67.4-174.4L687.8 215l73.3 73.3-362.7 362.6-88.9 15.7 15.6-89zM880 836H144c-17.7 0-32 14.3-32 32v36c0 4.4 3.6 8 8 8h784c4.4 0 8-3.6 8-8v-36c0-17.7-14.3-32-32-32z"}}]},name:"edit",theme:"outlined"},jDe=HDe;function k_(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){KDe(e,a,n[a])})}return e}function KDe(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var nk=function(t,n){var r=k_({},t,n.attrs);return g(Et,k_({},r,{icon:jDe}),null)};nk.displayName="EditOutlined";nk.inheritAttrs=!1;var WDe=nk,UDe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},YDe=bD("webkitLineClamp"),qDe=bD("textOverflow"),$_="...",GR=G({name:"Base",inheritAttrs:!1,emits:["update:content"],setup:function(t,n){var r=n.slots,a=n.attrs,o=n.emit,i=Wt("typography",t),l=i.prefixCls,s=bt({edit:!1,copied:!1,ellipsisText:"",ellipsisContent:null,isEllipsis:!1,expanded:!1,clientRendered:!1,expandStr:"",copyStr:"",copiedStr:"",editStr:"",copyId:void 0,rafId:void 0,prevProps:void 0,originContent:""}),c=H(),d=H(),f=x(function(){var D=t.ellipsis;return D?P({rows:1,expandable:!1},kt(D)==="object"?D:null):{}});et(function(){s.clientRendered=!0}),Lt(function(){window.clearTimeout(s.copyId),en.cancel(s.rafId)}),ce([function(){return f.value.rows},function(){return t.content}],function(){Ne(function(){$()})},{flush:"post",deep:!0,immediate:!0}),Wn(function(){t.content===void 0&&(on(!t.editable,"Typography","When `editable` is enabled, please use `content` instead of children"),on(!t.ellipsis,"Typography","When `ellipsis` is enabled, please use `content` instead of children"))});function p(){var D,z;return t.ellipsis||t.editable?t.content:(z=(D=c.value)===null||D===void 0?void 0:D.$el)===null||z===void 0?void 0:z.innerText}function v(D){var z=f.value.onExpand;s.expanded=!0,z==null||z(D)}function m(D){D.preventDefault(),s.originContent=t.content,k(!0)}function y(D){b(D),k(!1)}function b(D){var z=w.value.onChange;D!==t.content&&(o("update:content",D),z==null||z(D))}function C(){var D,z;(z=(D=w.value).onCancel)===null||z===void 0||z.call(D),k(!1)}function S(D){D.preventDefault();var z=t.copyable,B=P({},kt(z)==="object"?z:null);B.text===void 0&&(B.text=p()),LDe(B.text||""),s.copied=!0,Ne(function(){B.onCopy&&B.onCopy(),s.copyId=window.setTimeout(function(){s.copied=!1},3e3)})}var w=x(function(){var D=t.editable;return D?P({editing:s.edit},kt(D)==="object"?D:null):{editing:s.edit}});function k(D){var z=w.value.onStart;D&&z&&z(),s.edit=D,Ne(function(){var B;D||(B=d.value)===null||B===void 0||B.focus()})}function $(){en.cancel(s.rafId),s.rafId=en(function(){T()})}var O=x(function(){var D=f.value,z=D.rows,B=D.expandable,M=D.suffix,E=D.onEllipsis,K=D.tooltip;return M||K||t.editable||t.copyable||B||E?!1:z===1?qDe:YDe}),T=function(){var z,B,M=s.ellipsisText,E=s.isEllipsis,K=f.value,W=K.rows,Y=K.suffix,q=K.onEllipsis;if(!(!W||W<0||!(!((z=c.value)===null||z===void 0)&&z.$el)||s.expanded||t.content===void 0)&&!O.value){var J=MDe((B=c.value)===null||B===void 0?void 0:B.$el,{rows:W,suffix:Y},t.content,N(!0),$_),ne=J.content,oe=J.text,Q=J.ellipsis;(M!==oe||s.isEllipsis!==Q)&&(s.ellipsisText=oe,s.ellipsisContent=ne,s.isEllipsis=Q,E!==Q&&q&&q(Q))}};function _(D,z){var B=D.mark,M=D.code,E=D.underline,K=D.delete,W=D.strong,Y=D.keyboard,q=z;function J(ne,oe){if(!!ne){var Q=function(){return q}();q=g(oe,null,{default:function(){return[Q]}})}}return J(W,"strong"),J(E,"u"),J(K,"del"),J(M,"code"),J(B,"mark"),J(Y,"kbd"),q}function I(D){var z=f.value,B=z.expandable,M=z.symbol;if(!B||!D&&(s.expanded||!s.isEllipsis))return null;var E=(r.ellipsisSymbol?r.ellipsisSymbol():M)||s.expandStr;return g("a",{key:"expand",class:"".concat(l.value,"-expand"),onClick:v,"aria-label":s.expandStr},[E])}function L(){if(!!t.editable){var D=t.editable.tooltip,z=r.editableIcon?r.editableIcon():g(WDe,{role:"button"},null),B=r.editableTooltip?r.editableTooltip():s.editStr,M=typeof B=="string"?B:"";return g(Io,{key:"edit",title:D===!1?"":B},{default:function(){return[g(cm,{ref:d,class:"".concat(l.value,"-edit"),onClick:m,"aria-label":M},{default:function(){return[z]}})]}})}}function j(){if(!!t.copyable){var D=t.copyable.tooltip,z=s.copied?s.copiedStr:s.copyStr,B=r.copyableTooltip?r.copyableTooltip({copied:s.copied}):z,M=typeof B=="string"?B:"",E=s.copied?g(wg,null,null):g(zDe,null,null),K=r.copyableIcon?r.copyableIcon({copied:!!s.copied}):E;return g(Io,{key:"copy",title:D===!1?"":B},{default:function(){return[g(cm,{class:["".concat(l.value,"-copy"),V({},"".concat(l.value,"-copy-success"),s.copied)],onClick:S,"aria-label":M},{default:function(){return[K]}})]}})}}function F(){var D=a.class,z=a.style,B=w.value,M=B.maxlength,E=B.autoSize,K=B.onEnd;return g(TDe,{class:D,style:z,prefixCls:l.value,value:t.content,originContent:s.originContent,maxlength:M,autoSize:E,onSave:y,onChange:b,onCancel:C,onEnd:K},null)}function N(D){return[I(D),L(),j()].filter(function(z){return z})}return function(){var D,z=w.value.editing,B=t.ellipsis||t.editable?t.content!==void 0?t.content:(D=r.default)===null||D===void 0?void 0:D.call(r):r.default?r.default():t.content;return z?F():g(Kr,{componentName:"Text",children:function(E){var K,W=P(P({},t),a),Y=W.type,q=W.disabled;W.content;var J=W.class,ne=W.style,oe=UDe(W,["type","disabled","content","class","style"]),Q=f.value,ae=Q.rows,de=Q.suffix,be=Q.tooltip,Ee=E.edit,Pe=E.copy,Be=E.copied,te=E.expand;s.editStr=Ee,s.copyStr=Pe,s.copiedStr=Be,s.expandStr=te;var ie=bn(oe,["prefixCls","editable","copyable","ellipsis","mark","code","delete","underline","strong","keyboard"]),ge=O.value,ke=ae===1&&ge,xe=ae&&ae>1&&ge,Ie=B,ye;if(ae&&s.isEllipsis&&!s.expanded&&!ge){var pe=oe.title,ue=pe||"";!pe&&(typeof B=="string"||typeof B=="number")&&(ue=String(B)),ue=ue==null?void 0:ue.slice(String(s.ellipsisContent||"").length),Ie=g(Fe,null,[Gt(s.ellipsisContent),g("span",{title:ue,"aria-hidden":"true"},[$_]),de])}else Ie=g(Fe,null,[B,de]);Ie=_(t,Ie);var Ce=be&&ae&&s.isEllipsis&&!s.expanded&&!ge,je=r.ellipsisTooltip?r.ellipsisTooltip():be;return g(zo,{onResize:$,disabled:!ae},{default:function(){return[g(Fr,le({ref:c,class:[(K={},V(K,"".concat(l.value,"-").concat(Y),Y),V(K,"".concat(l.value,"-disabled"),q),V(K,"".concat(l.value,"-ellipsis"),ae),V(K,"".concat(l.value,"-single-line"),ae===1),V(K,"".concat(l.value,"-ellipsis-single-line"),ke),V(K,"".concat(l.value,"-ellipsis-multiple-line"),xe),K),J],style:P(P({},ne),{WebkitLineClamp:xe?ae:void 0}),"aria-label":ye},ie),{default:function(){return[Ce?g(Io,{title:be===!0?B:je},{default:function(){return[g("span",null,[Ie])]}}):Ie,N()]}})]}})}},null)}}}),Uf=function(){return{editable:u.oneOfType([u.looseBool,u.object]),copyable:u.oneOfType([u.looseBool,u.object]),prefixCls:u.string,component:u.string,type:u.oneOf(["secondary","success","danger","warning"]),disabled:u.looseBool,ellipsis:u.oneOfType([u.looseBool,u.object]),code:u.looseBool,mark:u.looseBool,underline:u.looseBool,delete:u.looseBool,strong:u.looseBool,keyboard:u.looseBool,content:u.string}};GR.props=Uf();var Yf=GR,GDe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},Lg=function(t,n){var r=n.slots,a=n.attrs,o=P(P({},t),a),i=o.ellipsis,l=o.rel,s=GDe(o,["ellipsis","rel"]);on(kt(i)!=="object","Typography.Link","`ellipsis` only supports boolean value.");var c=P(P({},s),{rel:l===void 0&&s.target==="_blank"?"noopener noreferrer":l,ellipsis:!!i,component:"a"});return delete c.navigate,g(Yf,c,r)};Lg.displayName="ATypographyLink";Lg.inheritAttrs=!1;Lg.props=bn(P(P({},Uf()),{ellipsis:u.looseBool}),["component"]);var rk=Lg,Fg=function(t,n){var r=n.slots,a=n.attrs,o=P(P(P({},t),{component:"div"}),a);return g(Yf,o,r)};Fg.displayName="ATypographyParagraph";Fg.inheritAttrs=!1;Fg.props=bn(Uf(),["component"]);var ak=Fg,Bg=function(t,n){var r=n.slots,a=n.attrs,o=t.ellipsis;on(kt(o)!=="object"||!o||!("expandable"in o)&&!("rows"in o),"Typography.Text","`ellipsis` do not support `expandable` or `rows` props.");var i=P(P(P({},t),{ellipsis:o&&kt(o)==="object"?bn(o,["expandable","rows"]):o,component:"span"}),a);return g(Yf,i,r)};Bg.displayName="ATypographyText";Bg.inheritAttrs=!1;Bg.props=bn(Uf(),["component"]);var ok=Bg,XDe=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},ZDe=u0e(1,2,3,4,5),Vg=function(t,n){var r=n.slots,a=n.attrs,o=t.level,i=o===void 0?1:o,l=XDe(t,["level"]),s;ZDe.indexOf(i)!==-1?s="h".concat(i):(on(!1,"Typography","Title only accept `1 | 2 | 3 | 4 | 5` as `level` value."),s="h1");var c=P(P(P({},l),{component:s}),a);return g(Yf,c,r)};Vg.displayName="ATypographyTitle";Vg.inheritAttrs=!1;Vg.props=bn(P(P({},Uf()),{level:u.number}),["component","strong"]);var ik=Vg;Fr.Text=ok;Fr.Title=ik;Fr.Paragraph=ak;Fr.Link=rk;Fr.Base=Yf;Fr.install=function(e){return e.component(Fr.name,Fr),e.component(Fr.Text.displayName,ok),e.component(Fr.Title.displayName,ik),e.component(Fr.Paragraph.displayName,ak),e.component(Fr.Link.displayName,rk),e};function JDe(e,t){var n="cannot ".concat(e.method," ").concat(e.action," ").concat(t.status,"'"),r=new Error(n);return r.status=t.status,r.method=e.method,r.url=e.action,r}function O_(e){var t=e.responseText||e.response;if(!t)return t;try{return JSON.parse(t)}catch{return t}}function QDe(e){var t=new window.XMLHttpRequest;e.onProgress&&t.upload&&(t.upload.onprogress=function(i){i.total>0&&(i.percent=i.loaded/i.total*100),e.onProgress(i)});var n=new window.FormData;e.data&&Object.keys(e.data).forEach(function(o){var i=e.data[o];if(Array.isArray(i)){i.forEach(function(l){n.append("".concat(o,"[]"),l)});return}n.append(o,e.data[o])}),n.append(e.filename,e.file),t.onerror=function(i){e.onError(i)},t.onload=function(){if(t.status<200||t.status>=300)return e.onError(JDe(e,t),O_(t));e.onSuccess(O_(t),t)},t.open(e.method,e.action,!0),e.withCredentials&&"withCredentials"in t&&(t.withCredentials=!0);var r=e.headers||{};r["X-Requested-With"]!==null&&t.setRequestHeader("X-Requested-With","XMLHttpRequest");for(var a in r)r.hasOwnProperty(a)&&r[a]!==null&&t.setRequestHeader(a,r[a]);return t.send(n),{abort:function(){t.abort()}}}var e7e=+new Date,t7e=0;function ev(){return"vc-upload-".concat(e7e,"-").concat(++t7e)}function n7e(e,t){return e.indexOf(t,e.length-t.length)!==-1}var P_=function(e,t){if(e&&t){var n=Array.isArray(t)?t:t.split(","),r=e.name||"",a=e.type||"",o=a.replace(/\/.*$/,"");return n.some(function(i){var l=i.trim();return l.charAt(0)==="."?n7e(r.toLowerCase(),l.toLowerCase()):/\/\*$/.test(l)?o===l.replace(/\/.*$/,""):a===l})}return!0};function r7e(e,t){var n=e.createReader(),r=[];function a(){n.readEntries(function(o){var i=Array.prototype.slice.apply(o);r=r.concat(i);var l=!i.length;l?t(r):a()})}a()}var a7e=function(t,n,r){var a=function s(c,d){d=d||"",c.isFile?c.file(function(f){r(f)&&(c.fullPath&&!f.webkitRelativePath&&(Object.defineProperties(f,{webkitRelativePath:{writable:!0}}),f.webkitRelativePath=c.fullPath.replace(/^\//,""),Object.defineProperties(f,{webkitRelativePath:{writable:!1}})),n([f]))}):c.isDirectory&&r7e(c,function(f){f.forEach(function(p){s(p,"".concat(d).concat(c.name,"/"))})})},o=M1(t),i;try{for(o.s();!(i=o.n()).done;){var l=i.value;a(l.webkitGetAsEntry())}}catch(s){o.e(s)}finally{o.f()}},o7e=a7e,i7e={componentTag:u.string,prefixCls:u.string,name:u.string,multiple:u.looseBool,directory:u.looseBool,disabled:u.looseBool,accept:u.string,data:u.oneOfType([u.object,u.func]),action:u.oneOfType([u.string,u.func]),headers:u.object,beforeUpload:u.func,customRequest:u.func,withCredentials:u.looseBool,openFileDialogOnClick:u.looseBool,transformFile:u.func,method:u.string},l7e={inheritAttrs:!1,name:"ajaxUploader",mixins:[nt],props:i7e,data:function(){return this.reqs={},{uid:ev()}},mounted:function(){this._isMounted=!0},beforeUnmount:function(){this._isMounted=!1,this.abort()},methods:{onChange:function(t){var n=t.target.files;this.uploadFiles(n),this.reset()},onClick:function(){var t=this.$refs.fileInputRef;!t||t.click()},onKeyDown:function(t){t.key==="Enter"&&this.onClick()},onFileDrop:function(t){var n=this,r=this.$props.multiple;if(t.preventDefault(),t.type!=="dragover")if(this.directory)o7e(t.dataTransfer.items,this.uploadFiles,function(l){return P_(l,n.accept)});else{var a=KW(Array.prototype.slice.call(t.dataTransfer.files),function(l){return P_(l,n.accept)}),o=a[0],i=a[1];r===!1&&(o=o.slice(0,1)),this.uploadFiles(o),i.length&&this.__emit("reject",i)}},uploadFiles:function(t){var n=this,r=Array.prototype.slice.call(t);r.map(function(a){return a.uid=ev(),a}).forEach(function(a){n.upload(a,r)})},upload:function(t,n){var r=this;if(!this.beforeUpload)return setTimeout(function(){return r.post(t)},0);var a=this.beforeUpload(t,n);a&&a.then?a.then(function(o){var i=Object.prototype.toString.call(o);return i==="[object File]"||i==="[object Blob]"?r.post(o):r.post(t)}).catch(function(o){console&&console.log(o)}):a!==!1&&setTimeout(function(){return r.post(t)},0)},post:function(t){var n=this;if(!!this._isMounted){var r=this.$props,a=r.data,o=r.transformFile,i=o===void 0?function(l){return l}:o;new Promise(function(l){var s=n.action;if(typeof s=="function")return l(s(t));l(s)}).then(function(l){var s=t.uid,c=n.customRequest||QDe,d=Promise.resolve(i(t)).catch(function(f){console.error(f)});d.then(function(f){typeof a=="function"&&(a=a(t));var p={action:l,filename:n.name,data:a,file:f,headers:n.headers,withCredentials:n.withCredentials,method:r.method||"post",onProgress:function(m){n.__emit("progress",m,t)},onSuccess:function(m,y){delete n.reqs[s],n.__emit("success",m,t,y)},onError:function(m,y){delete n.reqs[s],n.__emit("error",m,y,t)}};n.reqs[s]=c(p),n.__emit("start",t)})})}},reset:function(){this.setState({uid:ev()})},abort:function(t){var n=this.reqs;if(t){var r=t;t&&t.uid&&(r=t.uid),n[r]&&n[r].abort&&n[r].abort(),delete n[r]}else Object.keys(n).forEach(function(a){n[a]&&n[a].abort&&n[a].abort(),delete n[a]})}},render:function(){var t,n=this,r=this.$props,a=this.$attrs,o=r.componentTag,i=r.prefixCls,l=r.disabled,s=r.multiple,c=r.accept,d=r.directory,f=r.openFileDialogOnClick,p=a.class,v=a.style,m=a.id,y=Se((t={},V(t,i,!0),V(t,"".concat(i,"-disabled"),l),V(t,p,p),t)),b=l?{}:{onClick:f?this.onClick:function(){},onKeydown:f?this.onKeyDown:function(){},onDrop:this.onFileDrop,onDragover:this.onFileDrop},C=P(P({},b),{role:"button",tabindex:l?null:"0",class:y,style:v});return g(o,C,{default:function(){return[g("input",{id:m,type:"file",ref:"fileInputRef",onClick:function(k){return k.stopPropagation()},key:n.uid,style:{display:"none"},accept:c,directory:d?"directory":null,webkitdirectory:d?"webkitdirectory":null,multiple:s,onChange:n.onChange},null),ht(n)]}})}},s7e=l7e,u7e={position:"absolute",top:0,opacity:0,filter:"alpha(opacity=0)",left:0,zIndex:9999},c7e={name:"IframeUploader",mixins:[nt],props:{componentTag:u.string,disabled:u.looseBool,prefixCls:u.string,accept:u.string,multiple:u.looseBool,data:u.oneOfType([u.object,u.func]),action:u.oneOfType([u.string,u.func]),name:u.string},data:function(){return this.file={},{uploading:!1}},methods:{onLoad:function(){if(!!this.uploading){var t=this.file,n;try{var r=this.getIframeDocument(),a=r.getElementsByTagName("script")[0];a&&a.parentNode===r.body&&r.body.removeChild(a),n=r.body.innerHTML,this.__emit("success",n,t)}catch(o){on(!1,"cross domain error for Upload. Maybe server should return document.domain script. see Note from https://github.com/react-component/upload"),n="cross-domain",this.__emit("error",o,null,t)}this.endUpload()}},onChange:function(){var t=this,n=this.getFormInputNode(),r=this.file={uid:ev(),name:n.value&&n.value.substring(n.value.lastIndexOf("\\")+1,n.value.length)};this.startUpload();var a=this.$props;if(!a.beforeUpload)return this.post(r);var o=a.beforeUpload(r);o&&o.then?o.then(function(){t.post(r)},function(){t.endUpload()}):o!==!1?this.post(r):this.endUpload()},getIframeNode:function(){return this.$refs.iframeRef},getIframeDocument:function(){return this.getIframeNode().contentDocument},getFormNode:function(){return this.getIframeDocument().getElementById("form")},getFormInputNode:function(){return this.getIframeDocument().getElementById("input")},getFormDataNode:function(){return this.getIframeDocument().getElementById("data")},getFileForMultiple:function(t){return this.multiple?[t]:t},getIframeHTML:function(t){var n="",r="";if(t){var a="script";n="<".concat(a,'>document.domain="').concat(t,'";</').concat(a,">"),r='<input name="_documentDomain" value="'.concat(t,'" />')}return`
      <!DOCTYPE html>
      <html>
      <head>
      <meta http-equiv="X-UA-Compatible" content="IE=edge" />
      <style>
      body,html {padding:0;margin:0;border:0;overflow:hidden;}
      </style>
      `.concat(n,`
      </head>
      <body>
      <form method="post"
      encType="multipart/form-data"
      action="" id="form"
      style="display:block;height:9999px;position:relative;overflow:hidden;">
      <input id="input" type="file"
       name="`).concat(this.name,`"
       style="position:absolute;top:0;right:0;height:9999px;font-size:9999px;cursor:pointer;"/>
      `).concat(r,`
      <span id="data"></span>
      </form>
      </body>
      </html>
      `)},initIframeSrc:function(){this.domain&&(this.getIframeNode().src=`javascript:void((function(){
          var d = document;
          d.open();
          d.domain='`.concat(this.domain,`';
          d.write('');
          d.close();
        })())`))},initIframe:function(){var t=this.getIframeNode(),n=t.contentWindow,r;this.domain=this.domain||"",this.initIframeSrc();try{r=n.document}catch{this.domain=document.domain,this.initIframeSrc(),n=t.contentWindow,r=n.document}r.open("text/html","replace"),r.write(this.getIframeHTML(this.domain)),r.close(),this.getFormInputNode().onchange=this.onChange},endUpload:function(){this.uploading&&(this.file={},this.uploading=!1,this.setState({uploading:!1}),this.initIframe())},startUpload:function(){this.uploading||(this.uploading=!0,this.setState({uploading:!0}))},updateIframeWH:function(){var t=Sn(this),n=this.getIframeNode();n.style.height="".concat(t.offsetHeight,"px"),n.style.width="".concat(t.offsetWidth,"px")},abort:function(t){if(t){var n=t;t&&t.uid&&(n=t.uid),n===this.file.uid&&this.endUpload()}else this.endUpload()},post:function(t){var n=this,r=this.getFormNode(),a=this.getFormDataNode(),o=this.$props.data;typeof o=="function"&&(o=o(t));var i=document.createDocumentFragment();for(var l in o)if(o.hasOwnProperty(l)){var s=document.createElement("input");s.setAttribute("name",l),s.value=o[l],i.appendChild(s)}a.appendChild(i),new Promise(function(c){var d=n.action;if(typeof d=="function")return c(d(t));c(d)}).then(function(c){r.setAttribute("action",c),r.submit(),a.innerHTML="",n.__emit("start",t)})}},mounted:function(){var t=this;this.$nextTick(function(){t.updateIframeWH(),t.initIframe()})},updated:function(){var t=this;this.$nextTick(function(){t.updateIframeWH()})},render:function(){var t,n=this,r=this.$props,a=r.componentTag,o=r.disabled,i=r.prefixCls,l=this.$attrs,s=l.class,c=l.style,d=P(P({},u7e),{display:this.uploading||o?"none":""}),f=Se((t={},V(t,i,!0),V(t,"".concat(i,"-disabled"),o),V(t,s,s),t));return g(a,{class:f,style:P({position:"relative",zIndex:0},c)},{default:function(){return[g("iframe",{ref:"iframeRef",onLoad:n.onLoad,style:d},null),ht(n)]}})}},d7e=c7e;function Jc(){}var f7e={componentTag:u.string,prefixCls:u.string,action:u.oneOfType([u.string,u.func]),name:u.string,multipart:u.looseBool,directory:u.looseBool,onError:u.func,onSuccess:u.func,onProgress:u.func,onStart:u.func,data:u.oneOfType([u.object,u.func]),headers:u.object,accept:u.string,multiple:u.looseBool,disabled:u.looseBool,beforeUpload:u.func,customRequest:u.func,onReady:u.func,withCredentials:u.looseBool,supportServerRender:u.looseBool,openFileDialogOnClick:u.looseBool,method:u.string},T_=G({name:"Upload",mixins:[nt],inheritAttrs:!1,props:An(f7e,{componentTag:"span",prefixCls:"rc-upload",data:{},headers:{},name:"file",multipart:!1,onReady:Jc,onStart:Jc,onError:Jc,onSuccess:Jc,supportServerRender:!1,multiple:!1,beforeUpload:Jc,withCredentials:!1,openFileDialogOnClick:!0}),data:function(){return this.Component=null,{}},mounted:function(){var t=this;this.$nextTick(function(){t.supportServerRender&&(t.Component=t.getComponent(),t.$forceUpdate(),Ne(function(){t.__emit("ready")}))})},methods:{getComponent:function(){return typeof File!="undefined"?s7e:d7e},abort:function(t){this.$refs.uploaderRef.abort(t)}},render:function(){var t=this,n=P(P(P({},this.$props),{ref:"uploaderRef"}),this.$attrs);if(this.supportServerRender){var r=this.Component;return r?g(r,n,{default:function(){return[ht(t)]}}):null}var a=this.getComponent();return g(a,n,{default:function(){return[ht(t)]}})}});u.oneOf(rt("error","success","done","uploading","removed"));function dc(e){var t=e.uid,n=e.name;return!(!t&&t!==0||!["string","number"].includes(kt(t))||n===""||typeof n!="string")}u.custom(dc),u.arrayOf(u.custom(dc)),u.object;var h7e=u.shape({showRemoveIcon:u.looseBool,showPreviewIcon:u.looseBool}).loose,XR=u.shape({uploading:u.string,removeFile:u.string,downloadFile:u.string,uploadError:u.string,previewFile:u.string}).loose,ZR={type:u.oneOf(rt("drag","select")),name:u.string,defaultFileList:u.arrayOf(u.custom(dc)),fileList:u.arrayOf(u.custom(dc)),action:u.oneOfType([u.string,u.func]),directory:u.looseBool,data:u.oneOfType([u.object,u.func]),method:u.oneOf(rt("POST","PUT","PATCH","post","put","patch")),headers:u.object,showUploadList:u.oneOfType([u.looseBool,h7e]),multiple:u.looseBool,accept:u.string,beforeUpload:u.func,listType:u.oneOf(rt("text","picture","picture-card")),remove:u.func,supportServerRender:u.looseBool,disabled:u.looseBool,prefixCls:u.string,customRequest:u.func,withCredentials:u.looseBool,openFileDialogOnClick:u.looseBool,locale:XR,height:u.number,id:u.string,previewFile:u.func,transformFile:u.func,onChange:u.func,onPreview:u.func,onRemove:u.func,onDownload:u.func,"onUpdate:fileList":u.func};u.arrayOf(u.custom(dc)),u.string;var p7e={listType:u.oneOf(rt("text","picture","picture-card")),items:u.arrayOf(u.custom(dc)),progressAttr:u.object,prefixCls:u.string,showRemoveIcon:u.looseBool,showDownloadIcon:u.looseBool,showPreviewIcon:u.looseBool,locale:XR,previewFile:u.func,onPreview:u.func,onRemove:u.func,onDownload:u.func},x_=globalThis&&globalThis.__rest||function(e,t){var n={};for(var r in e)Object.prototype.hasOwnProperty.call(e,r)&&t.indexOf(r)<0&&(n[r]=e[r]);if(e!=null&&typeof Object.getOwnPropertySymbols=="function")for(var a=0,r=Object.getOwnPropertySymbols(e);a<r.length;a++)t.indexOf(r[a])<0&&Object.prototype.propertyIsEnumerable.call(e,r[a])&&(n[r[a]]=e[r[a]]);return n},yf=G({name:"AUploadDragger",inheritAttrs:!1,props:ZR,render:function(){var t=this,n=Qe(this),r=n.height,a=x_(n,["height"]),o=this.$attrs,i=o.style,l=x_(o,["style"]),s=P(P(P({},a),l),{type:"drag",style:P(P({},i),{height:r})});return g(Bu,s,{default:function(){return[ht(t)]}})}});function v7e(){return!0}function __(e){return P(P({},e),{lastModified:e.lastModified,lastModifiedDate:e.lastModifiedDate,name:e.name,size:e.size,type:e.type,uid:e.uid,percent:0,originFileObj:e})}function m7e(){var e=.1,t=.01,n=.98;return function(r){var a=r;return a>=n||(a+=e,e=e-t,e<.001&&(e=.001)),a}}function Nb(e,t){var n=e.uid!==void 0?"uid":"name";return t.filter(function(r){return r[n]===e[n]})[0]}function g7e(e,t){var n=e.uid!==void 0?"uid":"name",r=t.filter(function(a){return a[n]!==e[n]});return r.length===t.length?null:r}var y7e=function(){var t=arguments.length>0&&arguments[0]!==void 0?arguments[0]:"",n=t.split("/"),r=n[n.length-1],a=r.split(/#|\?/)[0];return(/\.[^./\\]*$/.exec(a)||[""])[0]},JR=function(t){return!!t&&t.indexOf("image/")===0},b7e=function(t){if(JR(t.type))return!0;var n=t.thumbUrl||t.url,r=y7e(n);return/^data:image\//.test(n)||/(webp|svg|png|gif|jpg|jpeg|jfif|bmp|dpg|ico)$/i.test(r)?!0:!(/^data:/.test(n)||r)},Fi=200;function C7e(e){return new Promise(function(t){if(!JR(e.type)){t("");return}var n=document.createElement("canvas");n.width=Fi,n.height=Fi,n.style.cssText="position: fixed; left: 0; top: 0; width: ".concat(Fi,"px; height: ").concat(Fi,"px; z-index: 9999; display: none;"),document.body.appendChild(n);var r=n.getContext("2d"),a=new Image;a.onload=function(){var o=a.width,i=a.height,l=Fi,s=Fi,c=0,d=0;o<i?(s=i*(Fi/o),d=-(s-l)/2):(l=o*(Fi/i),c=-(l-s)/2),r.drawImage(a,c,d,l,s);var f=n.toDataURL();document.body.removeChild(n),t(f)},a.src=window.URL.createObjectURL(e)})}var w7e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M779.3 196.6c-94.2-94.2-247.6-94.2-341.7 0l-261 260.8c-1.7 1.7-2.6 4-2.6 6.4s.9 4.7 2.6 6.4l36.9 36.9a9 9 0 0012.7 0l261-260.8c32.4-32.4 75.5-50.2 121.3-50.2s88.9 17.8 121.2 50.2c32.4 32.4 50.2 75.5 50.2 121.2 0 45.8-17.8 88.8-50.2 121.2l-266 265.9-43.1 43.1c-40.3 40.3-105.8 40.3-146.1 0-19.5-19.5-30.2-45.4-30.2-73s10.7-53.5 30.2-73l263.9-263.8c6.7-6.6 15.5-10.3 24.9-10.3h.1c9.4 0 18.1 3.7 24.7 10.3 6.7 6.7 10.3 15.5 10.3 24.9 0 9.3-3.7 18.1-10.3 24.7L372.4 653c-1.7 1.7-2.6 4-2.6 6.4s.9 4.7 2.6 6.4l36.9 36.9a9 9 0 0012.7 0l215.6-215.6c19.9-19.9 30.8-46.3 30.8-74.4s-11-54.6-30.8-74.4c-41.1-41.1-107.9-41-149 0L463 364 224.8 602.1A172.22 172.22 0 00174 724.8c0 46.3 18.1 89.8 50.8 122.5 33.9 33.8 78.3 50.7 122.7 50.7 44.4 0 88.8-16.9 122.6-50.7l309.2-309C824.8 492.7 850 432 850 367.5c.1-64.6-25.1-125.3-70.7-170.9z"}}]},name:"paper-clip",theme:"outlined"},S7e=w7e;function E_(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){k7e(e,a,n[a])})}return e}function k7e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var lk=function(t,n){var r=E_({},t,n.attrs);return g(Et,E_({},r,{icon:S7e}),null)};lk.displayName="PaperClipOutlined";lk.inheritAttrs=!1;var $7e=lk,O7e={icon:function(t,n){return{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M928 160H96c-17.7 0-32 14.3-32 32v640c0 17.7 14.3 32 32 32h832c17.7 0 32-14.3 32-32V192c0-17.7-14.3-32-32-32zm-40 632H136v-39.9l138.5-164.3 150.1 178L658.1 489 888 761.6V792zm0-129.8L664.2 396.8c-3.2-3.8-9-3.8-12.2 0L424.6 666.4l-144-170.7c-3.2-3.8-9-3.8-12.2 0L136 652.7V232h752v430.2z",fill:t}},{tag:"path",attrs:{d:"M424.6 765.8l-150.1-178L136 752.1V792h752v-30.4L658.1 489z",fill:n}},{tag:"path",attrs:{d:"M136 652.7l132.4-157c3.2-3.8 9-3.8 12.2 0l144 170.7L652 396.8c3.2-3.8 9-3.8 12.2 0L888 662.2V232H136v420.7zM304 280a88 88 0 110 176 88 88 0 010-176z",fill:n}},{tag:"path",attrs:{d:"M276 368a28 28 0 1056 0 28 28 0 10-56 0z",fill:n}},{tag:"path",attrs:{d:"M304 456a88 88 0 100-176 88 88 0 000 176zm0-116c15.5 0 28 12.5 28 28s-12.5 28-28 28-28-12.5-28-28 12.5-28 28-28z",fill:t}}]}},name:"picture",theme:"twotone"},P7e=O7e;function M_(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){T7e(e,a,n[a])})}return e}function T7e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var sk=function(t,n){var r=M_({},t,n.attrs);return g(Et,M_({},r,{icon:P7e}),null)};sk.displayName="PictureTwoTone";sk.inheritAttrs=!1;var x7e=sk,_7e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M360 184h-8c4.4 0 8-3.6 8-8v8h304v-8c0 4.4 3.6 8 8 8h-8v72h72v-80c0-35.3-28.7-64-64-64H352c-35.3 0-64 28.7-64 64v80h72v-72zm504 72H160c-17.7 0-32 14.3-32 32v32c0 4.4 3.6 8 8 8h60.4l24.7 523c1.6 34.1 29.8 61 63.9 61h454c34.2 0 62.3-26.8 63.9-61l24.7-523H888c4.4 0 8-3.6 8-8v-32c0-17.7-14.3-32-32-32zM731.3 840H292.7l-24.2-512h487l-24.2 512z"}}]},name:"delete",theme:"outlined"},E7e=_7e;function I_(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){M7e(e,a,n[a])})}return e}function M7e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var uk=function(t,n){var r=I_({},t,n.attrs);return g(Et,I_({},r,{icon:E7e}),null)};uk.displayName="DeleteOutlined";uk.inheritAttrs=!1;var I7e=uk,N7e={icon:{tag:"svg",attrs:{viewBox:"64 64 896 896",focusable:"false"},children:[{tag:"path",attrs:{d:"M505.7 661a8 8 0 0012.6 0l112-141.7c4.1-5.2.4-12.9-6.3-12.9h-74.1V168c0-4.4-3.6-8-8-8h-60c-4.4 0-8 3.6-8 8v338.3H400c-6.7 0-10.4 7.7-6.3 12.9l112 141.8zM878 626h-60c-4.4 0-8 3.6-8 8v154H214V634c0-4.4-3.6-8-8-8h-60c-4.4 0-8 3.6-8 8v198c0 17.7 14.3 32 32 32h684c17.7 0 32-14.3 32-32V634c0-4.4-3.6-8-8-8z"}}]},name:"download",theme:"outlined"},A7e=N7e;function N_(e){for(var t=1;t<arguments.length;t++){var n=arguments[t]!=null?Object(arguments[t]):{},r=Object.keys(n);typeof Object.getOwnPropertySymbols=="function"&&(r=r.concat(Object.getOwnPropertySymbols(n).filter(function(a){return Object.getOwnPropertyDescriptor(n,a).enumerable}))),r.forEach(function(a){D7e(e,a,n[a])})}return e}function D7e(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}var ck=function(t,n){var r=N_({},t,n.attrs);return g(Et,N_({},r,{icon:A7e}),null)};ck.displayName="DownloadOutlined";ck.inheritAttrs=!1;var R7e=ck,L7e=G({name:"AUploadList",mixins:[nt],props:An(p7e,{listType:"text",progressAttr:{strokeWidth:2,showInfo:!1},showRemoveIcon:!0,showDownloadIcon:!1,showPreviewIcon:!0,previewFile:C7e}),setup:function(){return{configProvider:ve("configProvider",St)}},updated:function(){var t=this;Ne(function(){var n=t.$props,r=n.listType,a=n.items,o=n.previewFile;r!=="picture"&&r!=="picture-card"||(a||[]).forEach(function(i){typeof document=="undefined"||typeof window=="undefined"||!window.FileReader||!window.File||!(i.originFileObj instanceof File||i.originFileObj instanceof Blob)||i.thumbUrl!==void 0||(i.thumbUrl="",o&&o(i.originFileObj).then(function(l){i.thumbUrl=l||"",t.$forceUpdate()}))})})},methods:{handlePreview:function(t,n){var r=this.$props.onPreview;if(!!r)return n.preventDefault(),this.$emit("preview",t)},handleDownload:function(t){var n=this.$props.onDownload;typeof n=="function"?this.$emit("download",t):t.url&&window.open(t.url)},handleClose:function(t){this.$emit("remove",t)}},render:function(){var t=this,n,r=Qe(this),a=r.prefixCls,o=r.items,i=o===void 0?[]:o,l=r.listType,s=r.showPreviewIcon,c=r.showRemoveIcon,d=r.showDownloadIcon,f=r.locale,p=r.progressAttr,v=this.configProvider.getPrefixCls,m=v("upload",a),y=i.map(function(w){var k,$,O,T=w.status==="uploading"?g(co,null,null):g($7e,null,null);if(l==="picture"||l==="picture-card")if(l==="picture-card"&&w.status==="uploading")T=g("div",{class:"".concat(m,"-list-item-uploading-text")},[f.uploading]);else if(!w.thumbUrl&&!w.url)T=g(x7e,{class:"".concat(m,"-list-item-thumbnail")},null);else{var _=b7e(w)?g("img",{src:w.thumbUrl||w.url,alt:w.name,class:"".concat(m,"-list-item-image")},null):g(zS,{class:"".concat(m,"-list-item-icon")},null);T=g("a",{class:"".concat(m,"-list-item-thumbnail"),onClick:function(ae){return t.handlePreview(w,ae)},href:w.url||w.thumbUrl,target:"_blank",rel:"noopener noreferrer"},[_])}if(w.status==="uploading"){var I=P(P({},p),{type:"line",percent:w.percent}),L="percent"in w?g(dR,I,null):null;O=g("div",{class:"".concat(m,"-list-item-progress"),key:"progress"},[L])}var j=Se((k={},V(k,"".concat(m,"-list-item"),!0),V(k,"".concat(m,"-list-item-").concat(w.status),!0),V(k,"".concat(m,"-list-item-list-type-").concat(l),!0),k)),F=typeof w.linkProps=="string"?JSON.parse(w.linkProps):w.linkProps,N=c?g(I7e,{title:f.removeFile,onClick:function(){return t.handleClose(w)}},null):null,D=d&&w.status==="done"?g(R7e,{title:f.downloadFile,onClick:function(){return t.handleDownload(w)}},null):null,z=l!=="picture-card"&&g("span",{key:"download-delete",class:"".concat(m,"-list-item-card-actions ").concat(l==="picture"?"picture":"")},[D&&g("a",{title:f.downloadFile},[D]),N&&g("a",{title:f.removeFile},[N])]),B=Se(($={},V($,"".concat(m,"-list-item-name"),!0),V($,"".concat(m,"-list-item-name-icon-count-").concat([D,N].filter(function(Q){return Q}).length),!0),$)),M=w.url?[g("a",le(le({target:"_blank",rel:"noopener noreferrer",class:B,title:w.name},F),{},{href:w.url,onClick:function(ae){return t.handlePreview(w,ae)}}),[w.name]),z]:[g("span",{key:"view",class:"".concat(m,"-list-item-name"),onClick:function(ae){return t.handlePreview(w,ae)},title:w.name},[w.name]),z],E=w.url||w.thumbUrl?void 0:{pointerEvents:"none",opacity:.5},K=s?g("a",{href:w.url||w.thumbUrl,target:"_blank",rel:"noopener noreferrer",style:E,onClick:function(ae){return t.handlePreview(w,ae)},title:f.previewFile},[g(RA,null,null)]):null,W=l==="picture-card"&&w.status!=="uploading"&&g("span",{class:"".concat(m,"-list-item-actions")},[K,w.status==="done"&&D,N]),Y;w.response&&typeof w.response=="string"?Y=w.response:Y=w.error&&w.error.statusText||f.uploadError;var q=g("span",null,[T,M]),J=Lo("fade"),ne=g("div",{class:j,key:w.uid},[g("div",{class:"".concat(m,"-list-item-info")},[q]),W,g(no,J,{default:function(){return[O]}})]),oe=Se(V({},"".concat(m,"-list-picture-card-container"),l==="picture-card"));return g("div",{key:w.uid,class:oe},[w.status==="error"?g(Io,{title:Y},{default:function(){return[ne]}}):g("span",null,[ne])])}),b=Se((n={},V(n,"".concat(m,"-list"),!0),V(n,"".concat(m,"-list-").concat(l),!0),n)),C=l==="picture-card"?"animate-inline":"animate",S=P(P({},d2("".concat(m,"-").concat(C))),{class:b});return g(f2,le(le({},S),{},{tag:"div"}),{default:function(){return[y]}})}}),Bu=G({name:"AUpload",mixins:[nt],inheritAttrs:!1,Dragger:yf,props:Rn(ZR,{type:"select",multiple:!1,action:"",data:{},accept:"",beforeUpload:v7e,showUploadList:!0,listType:"text",disabled:!1,supportServerRender:!0}),setup:function(){return{upload:null,progressTimer:null,configProvider:ve("configProvider",St)}},data:function(){return{sFileList:this.fileList||this.defaultFileList||[],dragState:"drop"}},watch:{fileList:function(t){this.sFileList=t||[]}},beforeUnmount:function(){this.clearProgressTimer()},methods:{onStart:function(t){var n=__(t);n.status="uploading";var r=this.sFileList.concat(),a=U8(r,function(o){var i=o.uid;return i===n.uid});a===-1?r.push(n):r[a]=n,this.handleChange({file:n,fileList:r}),(!window.File||(typeof process=="undefined"?"undefined":kt(process))==="object"&&{}.TEST_IE)&&this.autoUpdateProgress(0,n)},onSuccess:function(t,n,r){this.clearProgressTimer();try{typeof t=="string"&&(t=JSON.parse(t))}catch{}var a=this.sFileList,o=Nb(n,a);!o||(o.status="done",o.response=t,o.xhr=r,this.handleChange({file:P({},o),fileList:a}))},onProgress:function(t,n){var r=this.sFileList,a=Nb(n,r);!a||(a.percent=t.percent,this.handleChange({event:t,file:P({},a),fileList:this.sFileList}))},onError:function(t,n,r){this.clearProgressTimer();var a=this.sFileList,o=Nb(r,a);!o||(o.error=t,o.response=n,o.status="error",this.handleChange({file:P({},o),fileList:a}))},onReject:function(t){this.$emit("reject",t)},handleRemove:function(t){var n=this,r=this.remove,a=this.$data.sFileList;Promise.resolve(typeof r=="function"?r(t):r).then(function(o){if(o!==!1){var i=g7e(t,a);i&&(t.status="removed",n.upload&&n.upload.abort(t),n.handleChange({file:t,fileList:i}))}})},handleManualRemove:function(t){this.$refs.uploadRef&&this.$refs.uploadRef.abort(t),this.handleRemove(t)},handleChange:function(t){vt(this,"fileList")||this.setState({sFileList:t.fileList}),this.$emit("update:fileList",t.fileList),this.$emit("change",t)},onFileDrop:function(t){this.setState({dragState:t.type})},reBeforeUpload:function(t,n){var r=this.$props.beforeUpload,a=this.$data.sFileList;if(!r)return!0;var o=r(t,n);return o===!1?(this.handleChange({file:t,fileList:tU(a.concat(n.map(__)),function(i){return i.uid})}),!1):o&&o.then?o:!0},clearProgressTimer:function(){clearInterval(this.progressTimer)},autoUpdateProgress:function(t,n){var r=this,a=m7e(),o=0;this.clearProgressTimer(),this.progressTimer=setInterval(function(){o=a(o),r.onProgress({percent:o*100},n)},200)},renderUploadList:function(t){var n=Qe(this),r=n.showUploadList,a=r===void 0?{}:r,o=n.listType,i=n.previewFile,l=n.disabled,s=n.locale,c=a.showRemoveIcon,d=a.showPreviewIcon,f=a.showDownloadIcon,p=this.$data.sFileList,v=this.$props,m=v.onDownload,y=v.onPreview,b={listType:o,items:p,previewFile:i,showRemoveIcon:!l&&c,showPreviewIcon:d,showDownloadIcon:f,locale:P(P({},t),s),onRemove:this.handleManualRemove,onDownload:m,onPreview:y};return g(L7e,b,null)}},render:function(){var t,n=Qe(this),r=n.prefixCls,a=n.showUploadList,o=n.listType,i=n.type,l=n.disabled,s=this.$data,c=s.sFileList,d=s.dragState,f=this.$attrs,p=f.class,v=f.style,m=this.configProvider.getPrefixCls,y=m("upload",r),b=P(P({},this.$props),{prefixCls:y,beforeUpload:this.reBeforeUpload,onStart:this.onStart,onError:this.onError,onProgress:this.onProgress,onSuccess:this.onSuccess,onReject:this.onReject,ref:"uploadRef"}),C=a?g(Kr,{componentName:"Upload",defaultLocale:lo.Upload,children:this.renderUploadList},null):null,S=ht(this);if(i==="drag"){var w,k=Se(y,(w={},V(w,"".concat(y,"-drag"),!0),V(w,"".concat(y,"-drag-uploading"),c.some(function(T){return T.status==="uploading"})),V(w,"".concat(y,"-drag-hover"),d==="dragover"),V(w,"".concat(y,"-disabled"),l),w));return g("span",le({class:p},US(this.$attrs)),[g("div",{class:k,onDrop:this.onFileDrop,onDragover:this.onFileDrop,onDragleave:this.onFileDrop,style:v},[g(T_,le(le({},b),{},{class:"".concat(y,"-btn")}),{default:function(){return[g("div",{class:"".concat(y,"-drag-container")},[S])]}})]),C])}var $=Se(y,(t={},V(t,"".concat(y,"-select"),!0),V(t,"".concat(y,"-select-").concat(o),!0),V(t,"".concat(y,"-disabled"),l),t));(!S.length||l)&&delete b.id;var O=g("div",{class:$,style:S.length?void 0:{display:"none"}},[g(T_,b,{default:function(){return[S]}})]);return o==="picture-card"?g("span",{class:Se("".concat(y,"-picture-card-wrapper"),p)},[C,O]):g("span",{class:p},[O,C])}});Bu.Dragger=yf;Bu.install=function(e){return e.component(Bu.name,Bu),e.component(yf.name,yf),e};var F7e=yf,A_=Object.freeze(Object.defineProperty({__proto__:null,Affix:ZN,Anchor:Bl,AnchorLink:eA,AutoComplete:w4e,AutoCompleteOptGroup:C4e,AutoCompleteOption:b4e,Alert:G4e,Avatar:ss,AvatarGroup:Kv,BackTop:wOe,Badge:Kp,BadgeRibbon:Wv,Breadcrumb:us,BreadcrumbItem:df,BreadcrumbSeparator:Gv,Button:Cr,ButtonGroup:jv,Calendar:VPe,Card:Ru,CardGrid:nm,CardMeta:tm,Collapse:Pd,CollapsePanel:rm,Carousel:kTe,Cascader:zTe,Checkbox:ro,CheckboxGroup:im,Col:Ka,Comment:WTe,ConfigProvider:S2e,DatePicker:Qxe,RangePicker:o7,MonthPicker:l7,WeekPicker:i7,Descriptions:l_e,DescriptionsItem:s7,Divider:c_e,Dropdown:hi,DropdownButton:cf,Drawer:C_e,Empty:su,Form:pu,FormItem:S7,Grid:x3e,Input:Jn,InputGroup:AA,InputPassword:LA,InputSearch:DA,Textarea:S2,Image:Q6e,ImagePreviewGroup:M7,InputNumber:f8e,Layout:pr,LayoutHeader:$8e,LayoutSider:P8e,LayoutFooter:O8e,LayoutContent:T8e,List:nEe,ListItem:R7,ListItemMeta:A7,message:j7,Menu:jr,MenuDivider:qv,MenuItem:fl,MenuItemGroup:Yv,SubMenu:sc,Mentions:EEe,MentionsOption:_Ee,Modal:cr,Statistic:oi,StatisticCountdown:aMe,notification:sR,PageHeader:OMe,Pagination:OS,Popconfirm:TMe,Popover:KA,Progress:dR,Radio:_r,RadioButton:Jv,RadioGroup:Y2,Rate:l5e,Result:k5e,Row:fu,Select:dl,SelectOptGroup:F$e,SelectOption:L$e,Skeleton:na,SkeletonButton:NS,SkeletonAvatar:xS,SkeletonInput:AS,SkeletonImage:DS,Slider:H5e,Space:Y5e,Spin:ol,Steps:J5e,Step:Z5e,Switch:nIe,Table:fNe,TableColumn:cNe,TableColumnGroup:dNe,Transfer:RNe,Tree:Po,TreeNode:mAe,DirectoryTree:dm,TreeSelect:bDe,TreeSelectNode:yDe,Tabs:$a,TabPane:G2,TabContent:X2,Tag:e7,CheckableTag:lm,TimePicker:Rxe,Timeline:Ed,TimelineItem:gf,Tooltip:Io,Typography:Fr,TypographyLink:rk,TypographyParagraph:ak,TypographyText:ok,TypographyTitle:ik,Upload:Bu,UploadDragger:F7e,LocaleProvider:XN},Symbol.toStringTag,{value:"Module"}));const B7e="ant-design-vue",V7e="2.2.8",z7e="Ant Design Vue",H7e="An enterprise-class UI design language and Vue-based implementation",j7e=["vue","vue3","ant","design","antd","vueComponent","component","components","ui","framework","frontend"],K7e="lib/index.js",W7e="es/index.js",U7e="dist/antd.min.js",Y7e="lib/index.d.ts",q7e=["dist","lib","es","scripts","vetur"],G7e={dev:"webpack serve",test:"cross-env NODE_ENV=test WORKFLOW=true jest --config .jest.js","test:dev":"cross-env NODE_ENV=test jest --config .jest.js",compile:"node antd-tools/cli/run.js compile","generator-webtypes":"tsc -p antd-tools/generator-types/tsconfig.json && node antd-tools/generator-types/index.js",pub:"node --max_old_space_size=8192 antd-tools/cli/run.js pub","pub-with-ci":"node antd-tools/cli/run.js pub-with-ci",prepublishOnly:"node antd-tools/cli/run.js guard","pre-publish":"node ./scripts/prepub && npm run generator-webtypes",prettier:"prettier -c --write '**/*'","pretty-quick":"pretty-quick",dist:"node --max_old_space_size=8192 antd-tools/cli/run.js dist",lint:"eslint -c ./.eslintrc --fix --ext .jsx,.js,.vue,.ts,.tsx ./components","lint:style":'stylelint "{site,components}/**/*.less" --syntax less',codecov:"codecov"},X7e={type:"git",url:"git+https://github.com/vueComponent/ant-design-vue.git"},Z7e="MIT",J7e={type:"opencollective",url:"https://opencollective.com/ant-design-vue"},Q7e={url:"https://github.com/vueComponent/ant-design-vue/issues"},eRe="https://www.antdv.com/",tRe={"@vue/compiler-sfc":">=3.1.0",vue:">=3.1.0"},nRe={"@babel/cli":"^7.8.4","@babel/core":"^7.10.5","@babel/plugin-proposal-class-properties":"^7.8.3","@babel/plugin-proposal-export-default-from":"^7.8.3","@babel/plugin-proposal-export-namespace-from":"^7.12.1","@babel/plugin-proposal-object-rest-spread":"^7.9.6","@babel/plugin-proposal-optional-chaining":"^7.10.1","@babel/plugin-syntax-dynamic-import":"^7.8.3","@babel/plugin-transform-member-expression-literals":"^7.8.3","@babel/plugin-transform-object-assign":"^7.8.3","@babel/plugin-transform-property-literals":"^7.8.3","@babel/plugin-transform-runtime":"^7.10.5","@babel/plugin-transform-template-literals":"^7.8.3","@babel/plugin-transform-typescript":"^7.12.1","@babel/polyfill":"^7.8.7","@babel/preset-env":"^7.9.6","@babel/preset-typescript":"^7.10.4","@commitlint/cli":"^12.0.0","@commitlint/config-conventional":"^12.0.0","@octokit/rest":"^18.0.0","@types/fs-extra":"^9.0.8","@types/lodash-es":"^4.17.3","@types/raf":"^3.4.0","@typescript-eslint/eslint-plugin":"^4.1.0","@typescript-eslint/parser":"^4.1.0","@vue/babel-plugin-jsx":"^1.0.0","@vue/cli-plugin-eslint":"^5.0.0-0","@vue/compiler-sfc":"^3.1.0","@vue/eslint-config-prettier":"^6.0.0","@vue/eslint-config-typescript":"^7.0.0","@vue/test-utils":"^2.0.0-0","@webpack-cli/serve":"^1.3.1",acorn:"^8.0.0",autoprefixer:"^10.2.0",axios:"^0.21.0","babel-eslint":"^10.0.1","babel-jest":"^26.1.0","babel-loader":"^8.0.0","babel-plugin-import":"^1.1.1","babel-plugin-inline-import-data-uri":"^1.0.1","babel-plugin-istanbul":"^6.0.0","case-sensitive-paths-webpack-plugin":"^2.1.2",chalk:"^4.1.0",cheerio:"^1.0.0-rc.2",codecov:"^3.0.0",colorful:"^2.1.0",commander:"^7.2.0","compare-versions":"^3.3.0","cross-env":"^7.0.0","css-loader":"^5.0.0","css-minimizer-webpack-plugin":"^3.0.0","deep-assign":"^3.0.0","docsearch.js":"^2.6.3","enquire-js":"^0.2.1",eslint:"^7.25.0","eslint-config-prettier":"^8.0.0","eslint-plugin-html":"^6.0.0","eslint-plugin-jest":"^24.3.6","eslint-plugin-markdown":"^2.0.0-alpha.0","eslint-plugin-prettier":"^3.1.4","eslint-plugin-vue":"^7.1.0","fetch-jsonp":"^1.1.3","fs-extra":"^10.0.0",glob:"^7.1.2",gulp:"^4.0.1","gulp-babel":"^8.0.0","gulp-strip-code":"^0.1.4","gulp-typescript":"^6.0.0-alpha.1","html-webpack-plugin":"^5.3.1",husky:"^4.0.0","ignore-emit-webpack-plugin":"^2.0.6",jest:"^26.0.0","jest-environment-jsdom-fifteen":"^1.0.2","jest-serializer-vue":"^2.0.0","jest-transform-stub":"^2.0.0","js-base64":"^3.0.0","json-templater":"^1.2.0",jsonp:"^0.2.1",less:"^4.0.0","less-loader":"^10.0.0","less-plugin-npm-import":"^2.1.0","less-vars-to-js":"^1.3.0","lint-staged":"^11.0.0",marked:"0.3.18",merge2:"^1.2.1","mini-css-extract-plugin":"^1.5.0",minimist:"^1.2.0",mkdirp:"^0.5.1",mockdate:"^2.0.2",nprogress:"^0.2.0","object-assign":"^4.1.1",postcss:"^8.2.12","postcss-loader":"^6.0.0",prettier:"^2.2.0","pretty-quick":"^3.0.0",prismjs:"^1.20.0",querystring:"^0.2.0","raw-loader":"^4.0.2",reqwest:"^2.0.5",rimraf:"^3.0.0","rucksack-css":"^1.0.2","selenium-server":"^3.0.1",semver:"^7.0.0","style-loader":"^3.0.0",stylelint:"^13.0.0","stylelint-config-prettier":"^8.0.0","stylelint-config-rational-order":"^0.1.2","stylelint-config-standard":"^22.0.0","stylelint-declaration-block-no-ignored-properties":"^2.1.0","stylelint-order":"^4.0.0","terser-webpack-plugin":"^5.1.1",through2:"^3.0.0","ts-jest":"^26.4.1","ts-loader":"^9.1.0",typescript:"~4.3.5","umi-mock-middleware":"^1.0.0","umi-request":"^1.3.5","url-loader":"^3.0.0",vue:"^3.1.0","vue-antd-md-loader":"^1.2.1-beta.1","vue-clipboard2":"0.3.1","vue-draggable-resizable":"^2.1.0","vue-eslint-parser":"^7.0.0","vue-i18n":"^9.1.7","vue-infinite-scroll":"^2.0.2","vue-jest":"^5.0.0-alpha.3","vue-loader":"^16.1.1","vue-request":"^1.0.2","vue-router":"^4.0.0","vue-server-renderer":"^2.6.11","vue-style-loader":"^4.1.2",vuex:"^4.0.0-beta.2",webpack:"^5.0.0","webpack-bundle-analyzer":"^4.0.0","webpack-cli":"^4.6.0","webpack-dev-server":"^4.0.0","webpack-merge":"^5.0.0",webpackbar:"^5.0.0-3","xhr-mock":"^2.5.1"},rRe={"@ant-design/icons-vue":"^6.0.0","@babel/runtime":"^7.10.5","@simonwep/pickr":"~1.8.0","array-tree-filter":"^2.1.0","async-validator":"^3.3.0","dom-align":"^1.12.1","dom-scroll-into-view":"^2.0.0",lodash:"^4.17.21","lodash-es":"^4.17.15",moment:"^2.27.0","omit.js":"^2.0.0","resize-observer-polyfill":"^1.5.1","scroll-into-view-if-needed":"^2.2.25","shallow-equal":"^1.0.0","vue-types":"^3.0.0",warning:"^4.0.0"},aRe=["site/*","components/style.ts","components/**/style/*","*.vue","*.md","dist/*","es/**/style/*","lib/**/style/*","*.less"],oRe={tags:"vetur/tags.json",attributes:"vetur/attributes.json"};var iRe={name:B7e,version:V7e,title:z7e,description:H7e,keywords:j7e,main:K7e,module:W7e,unpkg:U7e,typings:Y7e,files:q7e,scripts:G7e,repository:X7e,license:Z7e,funding:J7e,bugs:Q7e,homepage:eRe,peerDependencies:tRe,devDependencies:nRe,dependencies:rRe,sideEffects:aRe,vetur:oRe,"web-types":"vetur/web-types.json"},lRe=iRe.version,sRe=lRe,uRe=function(t){return Object.keys(A_).forEach(function(n){var r=A_[n];r.install&&t.use(r)}),t.config.globalProperties.$message=j7,t.config.globalProperties.$notification=sR,t.config.globalProperties.$info=cr.info,t.config.globalProperties.$success=cr.success,t.config.globalProperties.$error=cr.error,t.config.globalProperties.$warning=cr.warning,t.config.globalProperties.$confirm=cr.confirm,t.config.globalProperties.$destroyAll=cr.destroyAll,t},cRe={version:sRe,install:uRe},dk={exports:{}},QR=function(t,n){return function(){for(var a=new Array(arguments.length),o=0;o<a.length;o++)a[o]=arguments[o];return t.apply(n,a)}},dRe=QR,bl=Object.prototype.toString;function fk(e){return Array.isArray(e)}function V1(e){return typeof e=="undefined"}function fRe(e){return e!==null&&!V1(e)&&e.constructor!==null&&!V1(e.constructor)&&typeof e.constructor.isBuffer=="function"&&e.constructor.isBuffer(e)}function e9(e){return bl.call(e)==="[object ArrayBuffer]"}function hRe(e){return bl.call(e)==="[object FormData]"}function pRe(e){var t;return typeof ArrayBuffer!="undefined"&&ArrayBuffer.isView?t=ArrayBuffer.isView(e):t=e&&e.buffer&&e9(e.buffer),t}function vRe(e){return typeof e=="string"}function mRe(e){return typeof e=="number"}function t9(e){return e!==null&&typeof e=="object"}function tv(e){if(bl.call(e)!=="[object Object]")return!1;var t=Object.getPrototypeOf(e);return t===null||t===Object.prototype}function gRe(e){return bl.call(e)==="[object Date]"}function yRe(e){return bl.call(e)==="[object File]"}function bRe(e){return bl.call(e)==="[object Blob]"}function n9(e){return bl.call(e)==="[object Function]"}function CRe(e){return t9(e)&&n9(e.pipe)}function wRe(e){return bl.call(e)==="[object URLSearchParams]"}function SRe(e){return e.trim?e.trim():e.replace(/^\s+|\s+$/g,"")}function kRe(){return typeof navigator!="undefined"&&(navigator.product==="ReactNative"||navigator.product==="NativeScript"||navigator.product==="NS")?!1:typeof window!="undefined"&&typeof document!="undefined"}function hk(e,t){if(!(e===null||typeof e=="undefined"))if(typeof e!="object"&&(e=[e]),fk(e))for(var n=0,r=e.length;n<r;n++)t.call(null,e[n],n,e);else for(var a in e)Object.prototype.hasOwnProperty.call(e,a)&&t.call(null,e[a],a,e)}function z1(){var e={};function t(a,o){tv(e[o])&&tv(a)?e[o]=z1(e[o],a):tv(a)?e[o]=z1({},a):fk(a)?e[o]=a.slice():e[o]=a}for(var n=0,r=arguments.length;n<r;n++)hk(arguments[n],t);return e}function $Re(e,t,n){return hk(t,function(a,o){n&&typeof a=="function"?e[o]=dRe(a,n):e[o]=a}),e}function ORe(e){return e.charCodeAt(0)===65279&&(e=e.slice(1)),e}var qr={isArray:fk,isArrayBuffer:e9,isBuffer:fRe,isFormData:hRe,isArrayBufferView:pRe,isString:vRe,isNumber:mRe,isObject:t9,isPlainObject:tv,isUndefined:V1,isDate:gRe,isFile:yRe,isBlob:bRe,isFunction:n9,isStream:CRe,isURLSearchParams:wRe,isStandardBrowserEnv:kRe,forEach:hk,merge:z1,extend:$Re,trim:SRe,stripBOM:ORe},nu=qr;function D_(e){return encodeURIComponent(e).replace(/%3A/gi,":").replace(/%24/g,"$").replace(/%2C/gi,",").replace(/%20/g,"+").replace(/%5B/gi,"[").replace(/%5D/gi,"]")}var r9=function(t,n,r){if(!n)return t;var a;if(r)a=r(n);else if(nu.isURLSearchParams(n))a=n.toString();else{var o=[];nu.forEach(n,function(s,c){s===null||typeof s=="undefined"||(nu.isArray(s)?c=c+"[]":s=[s],nu.forEach(s,function(f){nu.isDate(f)?f=f.toISOString():nu.isObject(f)&&(f=JSON.stringify(f)),o.push(D_(c)+"="+D_(f))}))}),a=o.join("&")}if(a){var i=t.indexOf("#");i!==-1&&(t=t.slice(0,i)),t+=(t.indexOf("?")===-1?"?":"&")+a}return t},PRe=qr;function zg(){this.handlers=[]}zg.prototype.use=function(t,n,r){return this.handlers.push({fulfilled:t,rejected:n,synchronous:r?r.synchronous:!1,runWhen:r?r.runWhen:null}),this.handlers.length-1};zg.prototype.eject=function(t){this.handlers[t]&&(this.handlers[t]=null)};zg.prototype.forEach=function(t){PRe.forEach(this.handlers,function(r){r!==null&&t(r)})};var TRe=zg,xRe=qr,_Re=function(t,n){xRe.forEach(t,function(a,o){o!==n&&o.toUpperCase()===n.toUpperCase()&&(t[n]=a,delete t[o])})},a9=function(t,n,r,a,o){return t.config=n,r&&(t.code=r),t.request=a,t.response=o,t.isAxiosError=!0,t.toJSON=function(){return{message:this.message,name:this.name,description:this.description,number:this.number,fileName:this.fileName,lineNumber:this.lineNumber,columnNumber:this.columnNumber,stack:this.stack,config:this.config,code:this.code,status:this.response&&this.response.status?this.response.status:null}},t},o9={silentJSONParsing:!0,forcedJSONParsing:!0,clarifyTimeoutError:!1},ERe=a9,i9=function(t,n,r,a,o){var i=new Error(t);return ERe(i,n,r,a,o)},MRe=i9,IRe=function(t,n,r){var a=r.config.validateStatus;!r.status||!a||a(r.status)?t(r):n(MRe("Request failed with status code "+r.status,r.config,null,r.request,r))},Qh=qr,NRe=Qh.isStandardBrowserEnv()?function(){return{write:function(n,r,a,o,i,l){var s=[];s.push(n+"="+encodeURIComponent(r)),Qh.isNumber(a)&&s.push("expires="+new Date(a).toGMTString()),Qh.isString(o)&&s.push("path="+o),Qh.isString(i)&&s.push("domain="+i),l===!0&&s.push("secure"),document.cookie=s.join("; ")},read:function(n){var r=document.cookie.match(new RegExp("(^|;\\s*)("+n+")=([^;]*)"));return r?decodeURIComponent(r[3]):null},remove:function(n){this.write(n,"",Date.now()-864e5)}}}():function(){return{write:function(){},read:function(){return null},remove:function(){}}}(),ARe=function(t){return/^([a-z][a-z\d+\-.]*:)?\/\//i.test(t)},DRe=function(t,n){return n?t.replace(/\/+$/,"")+"/"+n.replace(/^\/+/,""):t},RRe=ARe,LRe=DRe,FRe=function(t,n){return t&&!RRe(n)?LRe(t,n):n},Ab=qr,BRe=["age","authorization","content-length","content-type","etag","expires","from","host","if-modified-since","if-unmodified-since","last-modified","location","max-forwards","proxy-authorization","referer","retry-after","user-agent"],VRe=function(t){var n={},r,a,o;return t&&Ab.forEach(t.split(`
`),function(l){if(o=l.indexOf(":"),r=Ab.trim(l.substr(0,o)).toLowerCase(),a=Ab.trim(l.substr(o+1)),r){if(n[r]&&BRe.indexOf(r)>=0)return;r==="set-cookie"?n[r]=(n[r]?n[r]:[]).concat([a]):n[r]=n[r]?n[r]+", "+a:a}}),n},R_=qr,zRe=R_.isStandardBrowserEnv()?function(){var t=/(msie|trident)/i.test(navigator.userAgent),n=document.createElement("a"),r;function a(o){var i=o;return t&&(n.setAttribute("href",i),i=n.href),n.setAttribute("href",i),{href:n.href,protocol:n.protocol?n.protocol.replace(/:$/,""):"",host:n.host,search:n.search?n.search.replace(/^\?/,""):"",hash:n.hash?n.hash.replace(/^#/,""):"",hostname:n.hostname,port:n.port,pathname:n.pathname.charAt(0)==="/"?n.pathname:"/"+n.pathname}}return r=a(window.location.href),function(i){var l=R_.isString(i)?a(i):i;return l.protocol===r.protocol&&l.host===r.host}}():function(){return function(){return!0}}();function pk(e){this.message=e}pk.prototype.toString=function(){return"Cancel"+(this.message?": "+this.message:"")};pk.prototype.__CANCEL__=!0;var Hg=pk,ep=qr,HRe=IRe,jRe=NRe,KRe=r9,WRe=FRe,URe=VRe,YRe=zRe,Db=i9,qRe=o9,GRe=Hg,L_=function(t){return new Promise(function(r,a){var o=t.data,i=t.headers,l=t.responseType,s;function c(){t.cancelToken&&t.cancelToken.unsubscribe(s),t.signal&&t.signal.removeEventListener("abort",s)}ep.isFormData(o)&&delete i["Content-Type"];var d=new XMLHttpRequest;if(t.auth){var f=t.auth.username||"",p=t.auth.password?unescape(encodeURIComponent(t.auth.password)):"";i.Authorization="Basic "+btoa(f+":"+p)}var v=WRe(t.baseURL,t.url);d.open(t.method.toUpperCase(),KRe(v,t.params,t.paramsSerializer),!0),d.timeout=t.timeout;function m(){if(!!d){var b="getAllResponseHeaders"in d?URe(d.getAllResponseHeaders()):null,C=!l||l==="text"||l==="json"?d.responseText:d.response,S={data:C,status:d.status,statusText:d.statusText,headers:b,config:t,request:d};HRe(function(k){r(k),c()},function(k){a(k),c()},S),d=null}}if("onloadend"in d?d.onloadend=m:d.onreadystatechange=function(){!d||d.readyState!==4||d.status===0&&!(d.responseURL&&d.responseURL.indexOf("file:")===0)||setTimeout(m)},d.onabort=function(){!d||(a(Db("Request aborted",t,"ECONNABORTED",d)),d=null)},d.onerror=function(){a(Db("Network Error",t,null,d)),d=null},d.ontimeout=function(){var C=t.timeout?"timeout of "+t.timeout+"ms exceeded":"timeout exceeded",S=t.transitional||qRe;t.timeoutErrorMessage&&(C=t.timeoutErrorMessage),a(Db(C,t,S.clarifyTimeoutError?"ETIMEDOUT":"ECONNABORTED",d)),d=null},ep.isStandardBrowserEnv()){var y=(t.withCredentials||YRe(v))&&t.xsrfCookieName?jRe.read(t.xsrfCookieName):void 0;y&&(i[t.xsrfHeaderName]=y)}"setRequestHeader"in d&&ep.forEach(i,function(C,S){typeof o=="undefined"&&S.toLowerCase()==="content-type"?delete i[S]:d.setRequestHeader(S,C)}),ep.isUndefined(t.withCredentials)||(d.withCredentials=!!t.withCredentials),l&&l!=="json"&&(d.responseType=t.responseType),typeof t.onDownloadProgress=="function"&&d.addEventListener("progress",t.onDownloadProgress),typeof t.onUploadProgress=="function"&&d.upload&&d.upload.addEventListener("progress",t.onUploadProgress),(t.cancelToken||t.signal)&&(s=function(b){!d||(a(!b||b&&b.type?new GRe("canceled"):b),d.abort(),d=null)},t.cancelToken&&t.cancelToken.subscribe(s),t.signal&&(t.signal.aborted?s():t.signal.addEventListener("abort",s))),o||(o=null),d.send(o)})},vr=qr,F_=_Re,XRe=a9,ZRe=o9,JRe={"Content-Type":"application/x-www-form-urlencoded"};function B_(e,t){!vr.isUndefined(e)&&vr.isUndefined(e["Content-Type"])&&(e["Content-Type"]=t)}function QRe(){var e;return(typeof XMLHttpRequest!="undefined"||typeof process!="undefined"&&Object.prototype.toString.call(process)==="[object process]")&&(e=L_),e}function e9e(e,t,n){if(vr.isString(e))try{return(t||JSON.parse)(e),vr.trim(e)}catch(r){if(r.name!=="SyntaxError")throw r}return(n||JSON.stringify)(e)}var jg={transitional:ZRe,adapter:QRe(),transformRequest:[function(t,n){return F_(n,"Accept"),F_(n,"Content-Type"),vr.isFormData(t)||vr.isArrayBuffer(t)||vr.isBuffer(t)||vr.isStream(t)||vr.isFile(t)||vr.isBlob(t)?t:vr.isArrayBufferView(t)?t.buffer:vr.isURLSearchParams(t)?(B_(n,"application/x-www-form-urlencoded;charset=utf-8"),t.toString()):vr.isObject(t)||n&&n["Content-Type"]==="application/json"?(B_(n,"application/json"),e9e(t)):t}],transformResponse:[function(t){var n=this.transitional||jg.transitional,r=n&&n.silentJSONParsing,a=n&&n.forcedJSONParsing,o=!r&&this.responseType==="json";if(o||a&&vr.isString(t)&&t.length)try{return JSON.parse(t)}catch(i){if(o)throw i.name==="SyntaxError"?XRe(i,this,"E_JSON_PARSE"):i}return t}],timeout:0,xsrfCookieName:"XSRF-TOKEN",xsrfHeaderName:"X-XSRF-TOKEN",maxContentLength:-1,maxBodyLength:-1,validateStatus:function(t){return t>=200&&t<300},headers:{common:{Accept:"application/json, text/plain, */*"}}};vr.forEach(["delete","get","head"],function(t){jg.headers[t]={}});vr.forEach(["post","put","patch"],function(t){jg.headers[t]=vr.merge(JRe)});var vk=jg,t9e=qr,n9e=vk,r9e=function(t,n,r){var a=this||n9e;return t9e.forEach(r,function(i){t=i.call(a,t,n)}),t},l9=function(t){return!!(t&&t.__CANCEL__)},V_=qr,Rb=r9e,a9e=l9,o9e=vk,i9e=Hg;function Lb(e){if(e.cancelToken&&e.cancelToken.throwIfRequested(),e.signal&&e.signal.aborted)throw new i9e("canceled")}var l9e=function(t){Lb(t),t.headers=t.headers||{},t.data=Rb.call(t,t.data,t.headers,t.transformRequest),t.headers=V_.merge(t.headers.common||{},t.headers[t.method]||{},t.headers),V_.forEach(["delete","get","head","post","put","patch","common"],function(a){delete t.headers[a]});var n=t.adapter||o9e.adapter;return n(t).then(function(a){return Lb(t),a.data=Rb.call(t,a.data,a.headers,t.transformResponse),a},function(a){return a9e(a)||(Lb(t),a&&a.response&&(a.response.data=Rb.call(t,a.response.data,a.response.headers,t.transformResponse))),Promise.reject(a)})},Qr=qr,s9=function(t,n){n=n||{};var r={};function a(d,f){return Qr.isPlainObject(d)&&Qr.isPlainObject(f)?Qr.merge(d,f):Qr.isPlainObject(f)?Qr.merge({},f):Qr.isArray(f)?f.slice():f}function o(d){if(Qr.isUndefined(n[d])){if(!Qr.isUndefined(t[d]))return a(void 0,t[d])}else return a(t[d],n[d])}function i(d){if(!Qr.isUndefined(n[d]))return a(void 0,n[d])}function l(d){if(Qr.isUndefined(n[d])){if(!Qr.isUndefined(t[d]))return a(void 0,t[d])}else return a(void 0,n[d])}function s(d){if(d in n)return a(t[d],n[d]);if(d in t)return a(void 0,t[d])}var c={url:i,method:i,data:i,baseURL:l,transformRequest:l,transformResponse:l,paramsSerializer:l,timeout:l,timeoutMessage:l,withCredentials:l,adapter:l,responseType:l,xsrfCookieName:l,xsrfHeaderName:l,onUploadProgress:l,onDownloadProgress:l,decompress:l,maxContentLength:l,maxBodyLength:l,transport:l,httpAgent:l,httpsAgent:l,cancelToken:l,socketPath:l,responseEncoding:l,validateStatus:s};return Qr.forEach(Object.keys(t).concat(Object.keys(n)),function(f){var p=c[f]||o,v=p(f);Qr.isUndefined(v)&&p!==s||(r[f]=v)}),r},u9={version:"0.26.1"},s9e=u9.version,mk={};["object","boolean","number","function","string","symbol"].forEach(function(e,t){mk[e]=function(r){return typeof r===e||"a"+(t<1?"n ":" ")+e}});var z_={};mk.transitional=function(t,n,r){function a(o,i){return"[Axios v"+s9e+"] Transitional option '"+o+"'"+i+(r?". "+r:"")}return function(o,i,l){if(t===!1)throw new Error(a(i," has been removed"+(n?" in "+n:"")));return n&&!z_[i]&&(z_[i]=!0,console.warn(a(i," has been deprecated since v"+n+" and will be removed in the near future"))),t?t(o,i,l):!0}};function u9e(e,t,n){if(typeof e!="object")throw new TypeError("options must be an object");for(var r=Object.keys(e),a=r.length;a-- >0;){var o=r[a],i=t[o];if(i){var l=e[o],s=l===void 0||i(l,o,e);if(s!==!0)throw new TypeError("option "+o+" must be "+s);continue}if(n!==!0)throw Error("Unknown option "+o)}}var c9e={assertOptions:u9e,validators:mk},c9=qr,d9e=r9,H_=TRe,j_=l9e,Kg=s9,d9=c9e,ru=d9.validators;function qf(e){this.defaults=e,this.interceptors={request:new H_,response:new H_}}qf.prototype.request=function(t,n){typeof t=="string"?(n=n||{},n.url=t):n=t||{},n=Kg(this.defaults,n),n.method?n.method=n.method.toLowerCase():this.defaults.method?n.method=this.defaults.method.toLowerCase():n.method="get";var r=n.transitional;r!==void 0&&d9.assertOptions(r,{silentJSONParsing:ru.transitional(ru.boolean),forcedJSONParsing:ru.transitional(ru.boolean),clarifyTimeoutError:ru.transitional(ru.boolean)},!1);var a=[],o=!0;this.interceptors.request.forEach(function(v){typeof v.runWhen=="function"&&v.runWhen(n)===!1||(o=o&&v.synchronous,a.unshift(v.fulfilled,v.rejected))});var i=[];this.interceptors.response.forEach(function(v){i.push(v.fulfilled,v.rejected)});var l;if(!o){var s=[j_,void 0];for(Array.prototype.unshift.apply(s,a),s=s.concat(i),l=Promise.resolve(n);s.length;)l=l.then(s.shift(),s.shift());return l}for(var c=n;a.length;){var d=a.shift(),f=a.shift();try{c=d(c)}catch(p){f(p);break}}try{l=j_(c)}catch(p){return Promise.reject(p)}for(;i.length;)l=l.then(i.shift(),i.shift());return l};qf.prototype.getUri=function(t){return t=Kg(this.defaults,t),d9e(t.url,t.params,t.paramsSerializer).replace(/^\?/,"")};c9.forEach(["delete","get","head","options"],function(t){qf.prototype[t]=function(n,r){return this.request(Kg(r||{},{method:t,url:n,data:(r||{}).data}))}});c9.forEach(["post","put","patch"],function(t){qf.prototype[t]=function(n,r,a){return this.request(Kg(a||{},{method:t,url:n,data:r}))}});var f9e=qf,h9e=Hg;function fc(e){if(typeof e!="function")throw new TypeError("executor must be a function.");var t;this.promise=new Promise(function(a){t=a});var n=this;this.promise.then(function(r){if(!!n._listeners){var a,o=n._listeners.length;for(a=0;a<o;a++)n._listeners[a](r);n._listeners=null}}),this.promise.then=function(r){var a,o=new Promise(function(i){n.subscribe(i),a=i}).then(r);return o.cancel=function(){n.unsubscribe(a)},o},e(function(a){n.reason||(n.reason=new h9e(a),t(n.reason))})}fc.prototype.throwIfRequested=function(){if(this.reason)throw this.reason};fc.prototype.subscribe=function(t){if(this.reason){t(this.reason);return}this._listeners?this._listeners.push(t):this._listeners=[t]};fc.prototype.unsubscribe=function(t){if(!!this._listeners){var n=this._listeners.indexOf(t);n!==-1&&this._listeners.splice(n,1)}};fc.source=function(){var t,n=new fc(function(a){t=a});return{token:n,cancel:t}};var p9e=fc,v9e=function(t){return function(r){return t.apply(null,r)}},m9e=qr,g9e=function(t){return m9e.isObject(t)&&t.isAxiosError===!0},K_=qr,y9e=QR,nv=f9e,b9e=s9,C9e=vk;function f9(e){var t=new nv(e),n=y9e(nv.prototype.request,t);return K_.extend(n,nv.prototype,t),K_.extend(n,t),n.create=function(a){return f9(b9e(e,a))},n}var Ko=f9(C9e);Ko.Axios=nv;Ko.Cancel=Hg;Ko.CancelToken=p9e;Ko.isCancel=l9;Ko.VERSION=u9.version;Ko.all=function(t){return Promise.all(t)};Ko.spread=v9e;Ko.isAxiosError=g9e;dk.exports=Ko;dk.exports.default=Ko;var w9e=dk.exports,h9={exports:{}};/*!
 * 
 * js-audio-recorder - js audio recorder plugin
 * 
 * @version v0.5.7
 * @homepage https://github.com/2fps/recorder
 * @author 2fps <echoweb@126.com> (https://www.zhuyuntao.cn)
 * @license MIT
 *         
 */(function(e,t){(function(n,r){e.exports=r()})(vo,function(){return function(n){var r={};function a(o){if(r[o])return r[o].exports;var i=r[o]={i:o,l:!1,exports:{}};return n[o].call(i.exports,i,i.exports,a),i.l=!0,i.exports}return a.m=n,a.c=r,a.d=function(o,i,l){a.o(o,i)||Object.defineProperty(o,i,{enumerable:!0,get:l})},a.r=function(o){typeof Symbol!="undefined"&&Symbol.toStringTag&&Object.defineProperty(o,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(o,"__esModule",{value:!0})},a.t=function(o,i){if(1&i&&(o=a(o)),8&i||4&i&&typeof o=="object"&&o&&o.__esModule)return o;var l=Object.create(null);if(a.r(l),Object.defineProperty(l,"default",{enumerable:!0,value:o}),2&i&&typeof o!="string")for(var s in o)a.d(l,s,function(c){return o[c]}.bind(null,s));return l},a.n=function(o){var i=o&&o.__esModule?function(){return o.default}:function(){return o};return a.d(i,"a",i),i},a.o=function(o,i){return Object.prototype.hasOwnProperty.call(o,i)},a.p="",a(a.s=0)}([function(n,r,a){Object.defineProperty(r,"__esModule",{value:!0});var o=function(){function l(s){s===void 0&&(s={}),this.isplaying=!1,this.lBuffer=[],this.rBuffer=[],this.tempPCM=[],this.inputSampleBits=16,this.playStamp=0,this.playTime=0,this.totalPlayTime=0,this.offset=0,this.fileSize=0;var c,d=new(window.AudioContext||window.webkitAudioContext);this.inputSampleRate=d.sampleRate,this.config={sampleBits:~[8,16].indexOf(s.sampleBits)?s.sampleBits:16,sampleRate:~[8e3,11025,16e3,22050,24e3,44100,48e3].indexOf(s.sampleRate)?s.sampleRate:this.inputSampleRate,numChannels:~[1,2].indexOf(s.numChannels)?s.numChannels:1,compiling:!!s.compiling||!1},this.outputSampleRate=this.config.sampleRate,this.oututSampleBits=this.config.sampleBits,this.littleEdian=(c=new ArrayBuffer(2),new DataView(c).setInt16(0,256,!0),new Int16Array(c)[0]===256),l.initUserMedia()}return l.prototype.initRecorder=function(){var s=this;this.context&&this.destroy(),this.context=new(window.AudioContext||window.webkitAudioContext),this.analyser=this.context.createAnalyser(),this.analyser.fftSize=2048;var c=this.context.createScriptProcessor||this.context.createJavaScriptNode;this.recorder=c.apply(this.context,[4096,this.config.numChannels,this.config.numChannels]),this.recorder.onaudioprocess=function(d){if(s.isrecording&&!s.ispause){var f,p=d.inputBuffer.getChannelData(0),v=null;if(s.lBuffer.push(new Float32Array(p)),s.size+=p.length,s.config.numChannels===2&&(v=d.inputBuffer.getChannelData(1),s.rBuffer.push(new Float32Array(v)),s.size+=v.length),s.config.compiling){var m=s.transformIntoPCM(p,v);s.tempPCM.push(m),s.fileSize=m.byteLength*s.tempPCM.length}else s.fileSize=Math.floor(s.size/Math.max(s.inputSampleRate/s.outputSampleRate,1))*(s.oututSampleBits/8);f=100*Math.max.apply(Math,p),s.duration+=4096/s.inputSampleRate,s.onprocess&&s.onprocess(s.duration),s.onprogress&&s.onprogress({duration:s.duration,fileSize:s.fileSize,vol:f,data:s.tempPCM})}}},l.prototype.start=function(){var s=this;if(!this.isrecording)return this.clear(),this.initRecorder(),this.isrecording=!0,navigator.mediaDevices.getUserMedia({audio:!0}).then(function(c){s.audioInput=s.context.createMediaStreamSource(c),s.stream=c}).then(function(){s.audioInput.connect(s.analyser),s.analyser.connect(s.recorder),s.recorder.connect(s.context.destination)})},l.prototype.pause=function(){this.isrecording&&!this.ispause&&(this.ispause=!0)},l.prototype.resume=function(){this.isrecording&&this.ispause&&(this.ispause=!1)},l.prototype.stop=function(){this.isrecording=!1,this.audioInput&&this.audioInput.disconnect(),this.recorder.disconnect()},l.prototype.play=function(){this.stop(),this.source&&this.source.stop(),this.isplaying=!0,this.playTime=0,this.playAudioData()},l.prototype.getPlayTime=function(){var s=0;return(s=this.isplaying?this.context.currentTime-this.playStamp+this.playTime:this.playTime)>=this.totalPlayTime&&(s=this.totalPlayTime),s},l.prototype.pausePlay=function(){!this.isrecording&&this.isplaying&&(this.source&&this.source.disconnect(),this.playTime+=this.context.currentTime-this.playStamp,this.isplaying=!1)},l.prototype.resumePlay=function(){this.isrecording||this.isplaying||this.playTime===0||(this.isplaying=!0,this.playAudioData())},l.prototype.stopPlay=function(){this.isrecording||(this.playTime=0,this.isplaying=!1,this.source&&this.source.stop())},l.prototype.getWholeData=function(){return this.tempPCM},l.prototype.getNextData=function(){var s=this.tempPCM.length,c=this.tempPCM.slice(this.offset);return this.offset=s,c},l.prototype.playAudioData=function(){var s=this;this.context.decodeAudioData(this.getWAV().buffer,function(c){s.source=s.context.createBufferSource(),s.source.buffer=c,s.totalPlayTime=s.source.buffer.duration,s.source.connect(s.analyser),s.analyser.connect(s.context.destination),s.source.start(0,s.playTime),s.playStamp=s.context.currentTime},function(c){l.throwError(c)})},l.prototype.getRecordAnalyseData=function(){if(this.ispause)return this.prevDomainData;var s=new Uint8Array(this.analyser.frequencyBinCount);return this.analyser.getByteTimeDomainData(s),this.prevDomainData=s},l.prototype.getPlayAnalyseData=function(){return this.getRecordAnalyseData()},l.prototype.getPCM=function(){if(this.tempPCM.length){var s=new ArrayBuffer(this.tempPCM.length*this.tempPCM[0].byteLength),c=new DataView(s),d=0;this.tempPCM.forEach(function(p){for(var v=0,m=p.byteLength;v<m;++v)c.setInt8(d,p.getInt8(v)),d++}),this.PCM=c,this.tempPCM=[]}if(this.PCM)return this.PCM;var f=this.flat();return f=l.compress(f,this.inputSampleRate,this.outputSampleRate),this.PCM=l.encodePCM(f,this.oututSampleBits,this.littleEdian)},l.prototype.getPCMBlob=function(){return this.stop(),new Blob([this.getPCM()])},l.prototype.downloadPCM=function(s){s===void 0&&(s="recorder");var c=this.getPCMBlob();this.download(c,s,"pcm")},l.prototype.getWAV=function(){var s=this.getPCM();return l.encodeWAV(s,this.inputSampleRate,this.outputSampleRate,this.config.numChannels,this.oututSampleBits,this.littleEdian)},l.prototype.getWAVBlob=function(){return this.stop(),new Blob([this.getWAV()],{type:"audio/wav"})},l.prototype.downloadWAV=function(s){s===void 0&&(s="recorder");var c=this.getWAVBlob();this.download(c,s,"wav")},l.prototype.transformIntoPCM=function(s,c){var d=new Float32Array(s),f=new Float32Array(c),p=l.compress({left:d,right:f},this.inputSampleRate,this.outputSampleRate);return l.encodePCM(p,this.oututSampleBits,this.littleEdian)},l.prototype.destroy=function(){return this.stopStream(),this.closeAudioContext()},l.prototype.stopStream=function(){this.stream&&this.stream.getTracks&&(this.stream.getTracks().forEach(function(s){return s.stop()}),this.stream=null)},l.prototype.closeAudioContext=function(){return this.context&&this.context.close&&this.context.state!=="closed"?this.context.close():new Promise(function(s){s()})},l.prototype.download=function(s,c,d){try{var f=document.createElement("a");f.href=window.URL.createObjectURL(s),f.download=c+"."+d,f.click()}catch(p){l.throwError(p)}},l.prototype.clear=function(){this.lBuffer.length=0,this.rBuffer.length=0,this.size=0,this.fileSize=0,this.PCM=null,this.audioInput=null,this.duration=0,this.ispause=!1,this.isplaying=!1,this.playTime=0,this.totalPlayTime=0,this.source&&(this.source.stop(),this.source=null)},l.prototype.flat=function(){var s=null,c=new Float32Array(0);this.config.numChannels===1?s=new Float32Array(this.size):(s=new Float32Array(this.size/2),c=new Float32Array(this.size/2));for(var d=0,f=0;f<this.lBuffer.length;f++)s.set(this.lBuffer[f],d),d+=this.lBuffer[f].length;for(d=0,f=0;f<this.rBuffer.length;f++)c.set(this.rBuffer[f],d),d+=this.rBuffer[f].length;return{left:s,right:c}},l.playAudio=function(s){var c=document.createElement("audio");c.src=window.URL.createObjectURL(s),c.play()},l.compress=function(s,c,d){for(var f=c/d,p=Math.max(f,1),v=s.left,m=s.right,y=Math.floor((v.length+m.length)/f),b=new Float32Array(y),C=0,S=0;C<y;){var w=Math.floor(S);b[C]=v[w],C++,m.length&&(b[C]=m[w],C++),S+=p}return b},l.encodePCM=function(s,c,d){d===void 0&&(d=!0);var f=0,p=s.length*(c/8),v=new ArrayBuffer(p),m=new DataView(v);if(c===8)for(var y=0;y<s.length;y++,f++){var b=(C=Math.max(-1,Math.min(1,s[y])))<0?128*C:127*C;b=+b+128,m.setInt8(f,b)}else for(y=0;y<s.length;y++,f+=2){var C=Math.max(-1,Math.min(1,s[y]));m.setInt16(f,C<0?32768*C:32767*C,d)}return m},l.encodeWAV=function(s,c,d,f,p,v){v===void 0&&(v=!0);var m=d>c?c:d,y=p,b=new ArrayBuffer(44+s.byteLength),C=new DataView(b),S=f,w=0;i(C,w,"RIFF"),w+=4,C.setUint32(w,36+s.byteLength,v),i(C,w+=4,"WAVE"),i(C,w+=4,"fmt "),w+=4,C.setUint32(w,16,v),w+=4,C.setUint16(w,1,v),w+=2,C.setUint16(w,S,v),w+=2,C.setUint32(w,m,v),w+=4,C.setUint32(w,S*m*(y/8),v),w+=4,C.setUint16(w,S*(y/8),v),w+=2,C.setUint16(w,y,v),i(C,w+=2,"data"),w+=4,C.setUint32(w,s.byteLength,v),w+=4;for(var k=0;k<s.byteLength;)C.setUint8(w,s.getUint8(k)),w++,k++;return C},l.throwError=function(s){throw new Error(s)},l.initUserMedia=function(){navigator.mediaDevices===void 0&&(navigator.mediaDevices={}),navigator.mediaDevices.getUserMedia===void 0&&(navigator.mediaDevices.getUserMedia=function(s){var c=navigator.getUserMedia||navigator.webkitGetUserMedia||navigator.mozGetUserMedia;return c?new Promise(function(d,f){c.call(navigator,s,d,f)}):Promise.reject(new Error("\u6D4F\u89C8\u5668\u4E0D\u652F\u6301 getUserMedia !"))})},l.getPermission=function(){return this.initUserMedia(),navigator.mediaDevices.getUserMedia({audio:!0}).then(function(s){s.getTracks().forEach(function(c){return c.stop()})})},l}();function i(l,s,c){for(var d=0;d<c.length;d++)l.setUint8(s+d,c.charCodeAt(d))}r.default=o}]).default})})(h9);var Gf=h9.exports,gk=(e,t)=>{const n=e.__vccOpts||e;for(const[r,a]of t)n[r]=a;return n};new(window.AudioContext||window.webkitAudioContext)({latencyHint:"interactive",sampleRate:24e3});new Gf({sampleBits:16,sampleRate:16e3,numChannels:1,compiling:!0});new AudioContext({latencyHint:"interactive",sampleRate:24e3});const tp=new Gf({sampleBits:16,sampleRate:16e3,numChannels:1,compiling:!0}),S9e={data(){return{onReco:!1,asrResult:"",wsUrl:"127.0.0.1:8090/paddlespeech/asr/streaming",ws:""}},methods:{connectServer(){if(this.wsUrl){console.log("wsUrl",this.wsUrl),this.ws=new WebSocket("ws://"+this.wsUrl);var e=this;this.ws.addEventListener("message",function(t){var n=JSON.parse(t.data);n.result&&n.result!=e.streamAsrResult&&(e.asrResult=n.result,e.$nextTick(()=>{}))}),this.ws.addEventListener("open",function(){e.$message.success("Websocket\u5EFA\u7ACB\u8FDE\u63A5\u6210\u529F")}),this.ws.addEventListener("error",function(){e.$message.error("Websocket\u8FDE\u63A5\u5931\u8D25")})}else{this.$message.error("\u8BF7\u8F93\u5165URL\u5730\u5740");return}},startRecorder(){if(!this.ws){this.$message.error("\u8BF7\u5148\u8FDE\u63A5\u540E\u7AEF\u670D\u52A1");return}if(this.ws.readyState!=1){this.$message.error("WebSocket\u672A\u80FD\u6210\u529F\u8FDE\u63A5\uFF0C\u8BF7\u68C0\u67E5\u670D\u52A1\u662F\u5426\u6B63\u786E");return}this.onReco=!0;var e=JSON.stringify({name:"test.wav",nbest:5,signal:"start"});this.ws.send(e),tp.start().then(()=>{setInterval(()=>{let t=tp.getNextData();!t.length||this.uploadChunk(t)},300)},t=>{console.log("\u5F55\u97F3\u51FA\u9519")})},endRecorder(){tp.stop(),this.onReco=!1,tp.clear()},uploadChunk(e){e.forEach(t=>{this.ws.send(t)})}}},Wg=e=>(rC("data-v-1bbbdb64"),e=e(),aC(),e),k9e={class:"server_input"},$9e=yt("ws://"),O9e=yt("\u8FDE\u63A5"),P9e={class:"realTime"},T9e={class:"public_recognition_speech"},x9e={key:0},_9e=Wg(()=>Z("div",{class:"endToEndIdentification_end_recorder_img_back"},null,-1)),E9e=[_9e],M9e={key:1},I9e={class:"endToEndIdentification_prompt"},N9e={key:0},A9e={key:1},D9e=Wg(()=>Z("div",{class:"speech_text_prompt"}," \u5B9E\u65F6\u5F97\u5230\u8BC6\u522B\u7ED3\u679C ",-1)),R9e=Wg(()=>Z("div",{class:"public_recognition_point_to"},null,-1)),L9e={class:"public_recognition_result"},F9e=Wg(()=>Z("div",null,"\u8BC6\u522B\u7ED3\u679C",-1));function B9e(e,t,n,r,a,o){const i=we("el-input"),l=we("el-button");return R(),X(Fe,null,[Z("div",k9e,[g(i,{class:"serverAddress",modelValue:a.wsUrl,"onUpdate:modelValue":t[0]||(t[0]=s=>a.wsUrl=s),placeholder:"Please input"},{prepend:re(()=>[$9e]),_:1},8,["modelValue"]),g(l,{class:"serverConnect",type:"primary",onClick:t[1]||(t[1]=s=>o.connectServer())},{default:re(()=>[O9e]),_:1})]),Z("div",P9e,[Z("div",T9e,[a.onReco?(R(),X("div",x9e,[Z("div",{onClick:t[2]||(t[2]=s=>o.endRecorder()),class:"endToEndIdentification_end_recorder_img"},E9e)])):(R(),X("div",M9e,[Z("div",{onClick:t[3]||(t[3]=s=>o.startRecorder()),class:"endToEndIdentification_start_recorder_img"})])),Z("div",I9e,[a.onReco?(R(),X("div",N9e," \u7ED3\u675F\u8BC6\u522B ")):(R(),X("div",A9e," \u5F00\u59CB\u8BC6\u522B "))]),D9e]),R9e,Z("div",L9e,[F9e,Z("div",null,Me(a.asrResult),1)])])],64)}var V9e=gk(S9e,[["render",B9e],["__scopeId","data-v-1bbbdb64"]]);new Gf({sampleBits:16,sampleRate:16e3,numChannels:1,compiling:!0});const z9e=e=>(rC("data-v-02a5e0f4"),e=e(),aC(),e),H9e={class:"speech_recognition"},j9e={class:"speech_recognition_tabs"},K9e=z9e(()=>Z("div",{class:"frame"},null,-1)),W9e={},U9e=Object.assign(W9e,{setup(e){return(t,n)=>{const r=we("el-tab-pane"),a=we("el-tabs");return R(),X("div",H9e,[Z("div",j9e,[K9e,g(a,{class:"speech_recognition_mytabs",type:"border-card"},{default:re(()=>[g(r,{label:"\u5B9E\u65F6\u8BED\u97F3\u8BC6\u522B",key:"1"},{default:re(()=>[g(V9e)]),_:1})]),_:1})])])}}});var Y9e=gk(U9e,[["__scopeId","data-v-02a5e0f4"]]);new(window.AudioContext||window.webkitAudioContext)({latencyHint:"interactive"});new Gf({sampleBits:16,sampleRate:16e3,numChannels:1,compiling:!0});new AudioContext({latencyHint:"interactive",sampleRate:16e3});new Gf({sampleBits:16,sampleRate:16e3,numChannels:1,compiling:!0});const q9e={className:"experience"},G9e={className:"experience_wrapper"},X9e=Z("div",{className:"experience_title"}," \u529F\u80FD\u4F53\u9A8C ",-1),Z9e=Z("div",{className:"experience_describe"}," \u4F53\u9A8C\u524D\uFF0C\u8BF7\u5141\u8BB8\u6D4F\u89C8\u5668\u83B7\u53D6\u9EA6\u514B\u98CE\u6743\u9650 ",-1),J9e={className:"experience_content"},Q9e={setup(e){return(t,n)=>{const r=we("el-tab-pane"),a=we("el-tabs");return R(),X("div",q9e,[Z("div",G9e,[X9e,Z9e,Z("div",J9e,[g(a,{className:"experience_tabs",type:"border-card"},{default:re(()=>[g(r,{label:"\u8BED\u97F3\u8BC6\u522B",key:"3"},{default:re(()=>[g(Y9e)]),_:1})]),_:1})])])])}}},eLe={name:"Header"},yk=e=>(rC("data-v-e2003d16"),e=e(),aC(),e),tLe={className:"speech_header"},nLe=yk(()=>Z("div",{className:"speech_header_title"}," \u98DE\u6868-PaddleSpeech ",-1)),rLe=yk(()=>Z("div",{className:"speech_header_describe"}," PaddleSpeech \u662F\u57FA\u4E8E\u98DE\u6868 PaddlePaddle \u7684\u8BED\u97F3\u65B9\u5411\u7684\u5F00\u6E90\u6A21\u578B\u5E93\uFF0C\u7528\u4E8E\u8BED\u97F3\u548C\u97F3\u9891\u4E2D\u7684\u5404\u79CD\u5173\u952E\u4EFB\u52A1\u7684\u5F00\u53D1\uFF0C\u6B22\u8FCE\u5927\u5BB6Star\u6536\u85CF\u9F13\u52B1 ",-1)),aLe=yk(()=>Z("div",{className:"speech_header_link_box"},[Z("a",{href:"https://github.com/PaddlePaddle/PaddleSpeech",className:"speech_header_link",target:"_blank",rel:"noreferrer",key:"{index}"}," \u524D\u5F80Github ")],-1)),oLe=[nLe,rLe,aLe];function iLe(e,t,n,r,a,o){return R(),X("div",tLe,oLe)}var lLe=gk(eLe,[["render",iLe],["__scopeId","data-v-e2003d16"]]);const sLe={class:"app"},uLe={setup(e){return(t,n)=>(R(),X("div",sLe,[g(lLe),g(Q9e)]))}},bk=$m(uLe);bk.config.globalProperties.$http=w9e;bk.use(ibe).use(cRe);bk.mount("#app")});export default cLe();

</script>
    <style type="text/css">
@charset "UTF-8";:root{--el-color-white:#ffffff;--el-color-black:#000000;--el-color-primary-rgb:64,158,255;--el-color-success-rgb:103,194,58;--el-color-warning-rgb:230,162,60;--el-color-danger-rgb:245,108,108;--el-color-error-rgb:245,108,108;--el-color-info-rgb:144,147,153;--el-font-size-extra-large:20px;--el-font-size-large:18px;--el-font-size-medium:16px;--el-font-size-base:14px;--el-font-size-small:13px;--el-font-size-extra-small:12px;--el-font-family:"Helvetica Neue",Helvetica,"PingFang SC","Hiragino Sans GB","Microsoft YaHei","\5fae\8f6f\96c5\9ed1",Arial,sans-serif;--el-font-weight-primary:500;--el-font-line-height-primary:24px;--el-index-normal:1;--el-index-top:1000;--el-index-popper:2000;--el-border-radius-base:4px;--el-border-radius-small:2px;--el-border-radius-round:20px;--el-border-radius-circle:100%;--el-transition-duration:.3s;--el-transition-duration-fast:.2s;--el-transition-function-ease-in-out-bezier:cubic-bezier(.645, .045, .355, 1);--el-transition-function-fast-bezier:cubic-bezier(.23, 1, .32, 1);--el-transition-all:all var(--el-transition-duration) var(--el-transition-function-ease-in-out-bezier);--el-transition-fade:opacity var(--el-transition-duration) var(--el-transition-function-fast-bezier);--el-transition-md-fade:transform var(--el-transition-duration) var(--el-transition-function-fast-bezier),opacity var(--el-transition-duration) var(--el-transition-function-fast-bezier);--el-transition-fade-linear:opacity var(--el-transition-duration-fast) linear;--el-transition-border:border-color var(--el-transition-duration-fast) var(--el-transition-function-ease-in-out-bezier);--el-transition-box-shadow:box-shadow var(--el-transition-duration-fast) var(--el-transition-function-ease-in-out-bezier);--el-transition-color:color var(--el-transition-duration-fast) var(--el-transition-function-ease-in-out-bezier)}:root{color-scheme:light;--el-color-white:#ffffff;--el-color-black:#000000;--el-color-primary:#409eff;--el-color-primary-light-3:#79bbff;--el-color-primary-light-5:#a0cfff;--el-color-primary-light-7:#c6e2ff;--el-color-primary-light-8:#d9ecff;--el-color-primary-light-9:#ecf5ff;--el-color-primary-dark-2:#337ecc;--el-color-success:#67c23a;--el-color-success-light-3:#95d475;--el-color-success-light-5:#b3e19d;--el-color-success-light-7:#d1edc4;--el-color-success-light-8:#e1f3d8;--el-color-success-light-9:#f0f9eb;--el-color-success-dark-2:#529b2e;--el-color-warning:#e6a23c;--el-color-warning-light-3:#eebe77;--el-color-warning-light-5:#f3d19e;--el-color-warning-light-7:#f8e3c5;--el-color-warning-light-8:#faecd8;--el-color-warning-light-9:#fdf6ec;--el-color-warning-dark-2:#b88230;--el-color-danger:#f56c6c;--el-color-danger-light-3:#f89898;--el-color-danger-light-5:#fab6b6;--el-color-danger-light-7:#fcd3d3;--el-color-danger-light-8:#fde2e2;--el-color-danger-light-9:#fef0f0;--el-color-danger-dark-2:#c45656;--el-color-error:#f56c6c;--el-color-error-light-3:#f89898;--el-color-error-light-5:#fab6b6;--el-color-error-light-7:#fcd3d3;--el-color-error-light-8:#fde2e2;--el-color-error-light-9:#fef0f0;--el-color-error-dark-2:#c45656;--el-color-info:#909399;--el-color-info-light-3:#b1b3b8;--el-color-info-light-5:#c8c9cc;--el-color-info-light-7:#dedfe0;--el-color-info-light-8:#e9e9eb;--el-color-info-light-9:#f4f4f5;--el-color-info-dark-2:#73767a;--el-bg-color:#ffffff;--el-bg-color-page:#ffffff;--el-bg-color-overlay:#ffffff;--el-text-color-primary:#303133;--el-text-color-regular:#606266;--el-text-color-secondary:#909399;--el-text-color-placeholder:#a8abb2;--el-text-color-disabled:#c0c4cc;--el-border-color:#dcdfe6;--el-border-color-light:#e4e7ed;--el-border-color-lighter:#ebeef5;--el-border-color-extra-light:#f2f6fc;--el-border-color-dark:#d4d7de;--el-border-color-darker:#cdd0d6;--el-fill-color:#f0f2f5;--el-fill-color-light:#f5f7fa;--el-fill-color-lighter:#fafafa;--el-fill-color-extra-light:#fafcff;--el-fill-color-dark:#ebedf0;--el-fill-color-darker:#e6e8eb;--el-fill-color-blank:#ffffff;--el-box-shadow:0px 12px 32px 4px rgba(0, 0, 0, .04),0px 8px 20px rgba(0, 0, 0, .08);--el-box-shadow-light:0px 0px 12px rgba(0, 0, 0, .12);--el-box-shadow-lighter:0px 0px 6px rgba(0, 0, 0, .12);--el-box-shadow-dark:0px 16px 48px 16px rgba(0, 0, 0, .08),0px 12px 32px rgba(0, 0, 0, .12),0px 8px 16px -8px rgba(0, 0, 0, .16);--el-disabled-bg-color:var(--el-fill-color-light);--el-disabled-text-color:var(--el-text-color-placeholder);--el-disabled-border-color:var(--el-border-color-light);--el-overlay-color:rgba(0, 0, 0, .8);--el-overlay-color-light:rgba(0, 0, 0, .7);--el-overlay-color-lighter:rgba(0, 0, 0, .5);--el-mask-color:rgba(255, 255, 255, .9);--el-mask-color-extra-light:rgba(255, 255, 255, .3);--el-border-width:1px;--el-border-style:solid;--el-border-color-hover:var(--el-text-color-disabled);--el-border:var(--el-border-width) var(--el-border-style) var(--el-border-color);--el-svg-monochrome-grey:var(--el-border-color)}.fade-in-linear-enter-active,.fade-in-linear-leave-active{transition:var(--el-transition-fade-linear)}.fade-in-linear-enter-from,.fade-in-linear-leave-to{opacity:0}.el-fade-in-linear-enter-active,.el-fade-in-linear-leave-active{transition:var(--el-transition-fade-linear)}.el-fade-in-linear-enter-from,.el-fade-in-linear-leave-to{opacity:0}.el-fade-in-enter-active,.el-fade-in-leave-active{transition:all var(--el-transition-duration) cubic-bezier(.55,0,.1,1)}.el-fade-in-enter-from,.el-fade-in-leave-active{opacity:0}.el-zoom-in-center-enter-active,.el-zoom-in-center-leave-active{transition:all var(--el-transition-duration) cubic-bezier(.55,0,.1,1)}.el-zoom-in-center-enter-from,.el-zoom-in-center-leave-active{opacity:0;transform:scaleX(0)}.el-zoom-in-top-enter-active,.el-zoom-in-top-leave-active{opacity:1;transform:scaleY(1);transition:var(--el-transition-md-fade);transform-origin:center top}.el-zoom-in-top-enter-active[data-popper-placement^=top],.el-zoom-in-top-leave-active[data-popper-placement^=top]{transform-origin:center bottom}.el-zoom-in-top-enter-from,.el-zoom-in-top-leave-active{opacity:0;transform:scaleY(0)}.el-zoom-in-bottom-enter-active,.el-zoom-in-bottom-leave-active{opacity:1;transform:scaleY(1);transition:var(--el-transition-md-fade);transform-origin:center bottom}.el-zoom-in-bottom-enter-from,.el-zoom-in-bottom-leave-active{opacity:0;transform:scaleY(0)}.el-zoom-in-left-enter-active,.el-zoom-in-left-leave-active{opacity:1;transform:scale(1);transition:var(--el-transition-md-fade);transform-origin:top left}.el-zoom-in-left-enter-from,.el-zoom-in-left-leave-active{opacity:0;transform:scale(.45)}.collapse-transition{transition:var(--el-transition-duration) height ease-in-out,var(--el-transition-duration) padding-top ease-in-out,var(--el-transition-duration) padding-bottom ease-in-out}.el-collapse-transition-enter-active,.el-collapse-transition-leave-active{transition:var(--el-transition-duration) max-height ease-in-out,var(--el-transition-duration) padding-top ease-in-out,var(--el-transition-duration) padding-bottom ease-in-out}.horizontal-collapse-transition{transition:var(--el-transition-duration) width ease-in-out,var(--el-transition-duration) padding-left ease-in-out,var(--el-transition-duration) padding-right ease-in-out}.el-list-enter-active,.el-list-leave-active{transition:all 1s}.el-list-enter-from,.el-list-leave-to{opacity:0;transform:translateY(-30px)}.el-list-leave-active{position:absolute!important}.el-opacity-transition{transition:opacity var(--el-transition-duration) cubic-bezier(.55,0,.1,1)}.el-icon-loading{animation:rotating 2s linear infinite}.el-icon--right{margin-left:5px}.el-icon--left{margin-right:5px}@keyframes rotating{0%{transform:rotate(0)}to{transform:rotate(360deg)}}.el-icon{--color:inherit;height:1em;width:1em;line-height:1em;display:inline-flex;justify-content:center;align-items:center;position:relative;fill:currentColor;color:var(--color);font-size:inherit}.el-icon.is-loading{animation:rotating 2s linear infinite}.el-icon svg{height:1em;width:1em}.el-affix--fixed{position:fixed}.el-alert{--el-alert-padding:8px 16px;--el-alert-border-radius-base:var(--el-border-radius-base);--el-alert-title-font-size:13px;--el-alert-description-font-size:12px;--el-alert-close-font-size:12px;--el-alert-close-customed-font-size:13px;--el-alert-icon-size:16px;--el-alert-icon-large-size:28px;width:100%;padding:var(--el-alert-padding);margin:0;box-sizing:border-box;border-radius:var(--el-alert-border-radius-base);position:relative;background-color:var(--el-color-white);overflow:hidden;opacity:1;display:flex;align-items:center;transition:opacity var(--el-transition-duration-fast)}.el-alert.is-light .el-alert__close-btn{color:var(--el-text-color-placeholder)}.el-alert.is-dark .el-alert__close-btn,.el-alert.is-dark .el-alert__description{color:var(--el-color-white)}.el-alert.is-center{justify-content:center}.el-alert--success{--el-alert-bg-color:var(--el-color-success-light-9)}.el-alert--success.is-light{background-color:var(--el-alert-bg-color);color:var(--el-color-success)}.el-alert--success.is-light .el-alert__description{color:var(--el-color-success)}.el-alert--success.is-dark{background-color:var(--el-color-success);color:var(--el-color-white)}.el-alert--info{--el-alert-bg-color:var(--el-color-info-light-9)}.el-alert--info.is-light{background-color:var(--el-alert-bg-color);color:var(--el-color-info)}.el-alert--info.is-light .el-alert__description{color:var(--el-color-info)}.el-alert--info.is-dark{background-color:var(--el-color-info);color:var(--el-color-white)}.el-alert--warning{--el-alert-bg-color:var(--el-color-warning-light-9)}.el-alert--warning.is-light{background-color:var(--el-alert-bg-color);color:var(--el-color-warning)}.el-alert--warning.is-light .el-alert__description{color:var(--el-color-warning)}.el-alert--warning.is-dark{background-color:var(--el-color-warning);color:var(--el-color-white)}.el-alert--error{--el-alert-bg-color:var(--el-color-error-light-9)}.el-alert--error.is-light{background-color:var(--el-alert-bg-color);color:var(--el-color-error)}.el-alert--error.is-light .el-alert__description{color:var(--el-color-error)}.el-alert--error.is-dark{background-color:var(--el-color-error);color:var(--el-color-white)}.el-alert__content{display:table-cell;padding:0 8px}.el-alert .el-alert__icon{font-size:var(--el-alert-icon-size);width:var(--el-alert-icon-size)}.el-alert .el-alert__icon.is-big{font-size:var(--el-alert-icon-large-size);width:var(--el-alert-icon-large-size)}.el-alert__title{font-size:var(--el-alert-title-font-size);line-height:18px;vertical-align:text-top}.el-alert__title.is-bold{font-weight:700}.el-alert .el-alert__description{font-size:var(--el-alert-description-font-size);margin:5px 0 0}.el-alert .el-alert__close-btn{font-size:var(--el-alert-close-font-size);opacity:1;position:absolute;top:12px;right:15px;cursor:pointer}.el-alert .el-alert__close-btn.is-customed{font-style:normal;font-size:var(--el-alert-close-customed-font-size);top:9px}.el-alert-fade-enter-from,.el-alert-fade-leave-active{opacity:0}.el-aside{--el-aside-width:300px;overflow:auto;box-sizing:border-box;flex-shrink:0;width:var(--el-aside-width)}.el-autocomplete{position:relative;display:inline-block}.el-autocomplete__popper.el-popper[role=tooltip]{background:#fff;border:1px solid var(--el-border-color-light);box-shadow:var(--el-box-shadow-light)}.el-autocomplete__popper.el-popper[role=tooltip] .el-popper__arrow:before{border:1px solid var(--el-border-color-light)}.el-autocomplete__popper.el-popper[role=tooltip][data-popper-placement^=top] .el-popper__arrow:before{border-top-color:transparent;border-left-color:transparent}.el-autocomplete__popper.el-popper[role=tooltip][data-popper-placement^=bottom] .el-popper__arrow:before{border-bottom-color:transparent;border-right-color:transparent}.el-autocomplete__popper.el-popper[role=tooltip][data-popper-placement^=left] .el-popper__arrow:before{border-left-color:transparent;border-bottom-color:transparent}.el-autocomplete__popper.el-popper[role=tooltip][data-popper-placement^=right] .el-popper__arrow:before{border-right-color:transparent;border-top-color:transparent}.el-autocomplete-suggestion{border-radius:var(--el-border-radius-base);box-sizing:border-box}.el-autocomplete-suggestion__wrap{max-height:280px;padding:10px 0;box-sizing:border-box}.el-autocomplete-suggestion__list{margin:0;padding:0}.el-autocomplete-suggestion li{padding:0 20px;margin:0;line-height:34px;cursor:pointer;color:var(--el-text-color-regular);font-size:var(--el-font-size-base);list-style:none;text-align:left;white-space:nowrap;overflow:hidden;text-overflow:ellipsis}.el-autocomplete-suggestion li:hover,.el-autocomplete-suggestion li.highlighted{background-color:var(--el-fill-color-light)}.el-autocomplete-suggestion li.divider{margin-top:6px;border-top:1px solid var(--el-color-black)}.el-autocomplete-suggestion li.divider:last-child{margin-bottom:-6px}.el-autocomplete-suggestion.is-loading li{text-align:center;height:100px;line-height:100px;font-size:20px;color:var(--el-text-color-secondary)}.el-autocomplete-suggestion.is-loading li:after{display:inline-block;content:"";height:100%;vertical-align:middle}.el-autocomplete-suggestion.is-loading li:hover{background-color:var(--el-color-white)}.el-autocomplete-suggestion.is-loading .el-icon-loading{vertical-align:middle}.el-avatar{--el-avatar-text-color:var(--el-color-white);--el-avatar-bg-color:var(--el-text-color-disabled);--el-avatar-text-size:14px;--el-avatar-icon-size:18px;--el-avatar-border-radius:var(--el-border-radius-base);--el-avatar-size-large:56px;--el-avatar-size-default:40px;--el-avatar-size-small:24px;--el-avatar-size:40px;display:inline-flex;justify-content:center;align-items:center;box-sizing:border-box;text-align:center;overflow:hidden;color:var(--el-avatar-text-color);background:var(--el-avatar-bg-color);width:var(--el-avatar-size);height:var(--el-avatar-size);font-size:var(--el-avatar-text-size)}.el-avatar>img{display:block;height:100%}.el-avatar--circle{border-radius:50%}.el-avatar--square{border-radius:var(--el-avatar-border-radius)}.el-avatar--icon{font-size:var(--el-avatar-icon-size)}.el-avatar--small{--el-avatar-size:24px}.el-avatar--large{--el-avatar-size:56px}.el-backtop{--el-backtop-bg-color:var(--el-bg-color-overlay);--el-backtop-text-color:var(--el-color-primary);--el-backtop-hover-bg-color:var(--el-border-color-extra-light);position:fixed;background-color:var(--el-backtop-bg-color);width:40px;height:40px;border-radius:50%;color:var(--el-backtop-text-color);display:flex;align-items:center;justify-content:center;font-size:20px;box-shadow:var(--el-box-shadow-lighter);cursor:pointer;z-index:5}.el-backtop:hover{background-color:var(--el-backtop-hover-bg-color)}.el-backtop__icon{font-size:20px}.el-badge{--el-badge-bg-color:var(--el-color-danger);--el-badge-radius:10px;--el-badge-font-size:12px;--el-badge-padding:6px;--el-badge-size:18px;position:relative;vertical-align:middle;display:inline-block}.el-badge__content{background-color:var(--el-badge-bg-color);border-radius:var(--el-badge-radius);color:var(--el-color-white);display:inline-block;font-size:var(--el-badge-font-size);height:var(--el-badge-size);line-height:var(--el-badge-size);padding:0 var(--el-badge-padding);text-align:center;white-space:nowrap;border:1px solid var(--el-color-white)}.el-badge__content.is-fixed{position:absolute;top:0;right:calc(1px + var(--el-badge-size)/ 2);transform:translateY(-50%) translate(100%)}.el-badge__content.is-fixed.is-dot{right:5px}.el-badge__content.is-dot{height:8px;width:8px;padding:0;right:0;border-radius:50%}.el-badge__content--primary{background-color:var(--el-color-primary)}.el-badge__content--success{background-color:var(--el-color-success)}.el-badge__content--warning{background-color:var(--el-color-warning)}.el-badge__content--info{background-color:var(--el-color-info)}.el-badge__content--danger{background-color:var(--el-color-danger)}.el-breadcrumb{font-size:14px;line-height:1}.el-breadcrumb:after,.el-breadcrumb:before{display:table;content:""}.el-breadcrumb:after{clear:both}.el-breadcrumb__separator{margin:0 9px;font-weight:700;color:var(--el-text-color-placeholder)}.el-breadcrumb__separator.el-icon{margin:0 6px;font-weight:400}.el-breadcrumb__separator.el-icon svg{vertical-align:middle}.el-breadcrumb__item{float:left;display:flex;align-items:center}.el-breadcrumb__inner{color:var(--el-text-color-regular)}.el-breadcrumb__inner a,.el-breadcrumb__inner.is-link{font-weight:700;text-decoration:none;transition:var(--el-transition-color);color:var(--el-text-color-primary)}.el-breadcrumb__inner a:hover,.el-breadcrumb__inner.is-link:hover{color:var(--el-color-primary);cursor:pointer}.el-breadcrumb__item:last-child .el-breadcrumb__inner,.el-breadcrumb__item:last-child .el-breadcrumb__inner a,.el-breadcrumb__item:last-child .el-breadcrumb__inner a:hover,.el-breadcrumb__item:last-child .el-breadcrumb__inner:hover{font-weight:400;color:var(--el-text-color-regular);cursor:text}.el-breadcrumb__item:last-child .el-breadcrumb__separator{display:none}.el-button-group{display:inline-block;vertical-align:middle}.el-button-group:after,.el-button-group:before{display:table;content:""}.el-button-group:after{clear:both}.el-button-group>.el-button{float:left;position:relative}.el-button-group>.el-button+.el-button{margin-left:0}.el-button-group>.el-button:first-child{border-top-right-radius:0;border-bottom-right-radius:0}.el-button-group>.el-button:last-child{border-top-left-radius:0;border-bottom-left-radius:0}.el-button-group>.el-button:first-child:last-child{border-top-right-radius:var(--el-border-radius-base);border-bottom-right-radius:var(--el-border-radius-base);border-top-left-radius:var(--el-border-radius-base);border-bottom-left-radius:var(--el-border-radius-base)}.el-button-group>.el-button:first-child:last-child.is-round{border-radius:var(--el-border-radius-round)}.el-button-group>.el-button:first-child:last-child.is-circle{border-radius:50%}.el-button-group>.el-button:not(:first-child):not(:last-child){border-radius:0}.el-button-group>.el-button:not(:last-child){margin-right:-1px}.el-button-group>.el-button:active,.el-button-group>.el-button:focus,.el-button-group>.el-button:hover{z-index:1}.el-button-group>.el-button.is-active{z-index:1}.el-button-group>.el-dropdown>.el-button{border-top-left-radius:0;border-bottom-left-radius:0;border-left-color:var(--el-button-divide-border-color)}.el-button-group .el-button--primary:first-child{border-right-color:var(--el-button-divide-border-color)}.el-button-group .el-button--primary:last-child{border-left-color:var(--el-button-divide-border-color)}.el-button-group .el-button--primary:not(:first-child):not(:last-child){border-left-color:var(--el-button-divide-border-color);border-right-color:var(--el-button-divide-border-color)}.el-button-group .el-button--success:first-child{border-right-color:var(--el-button-divide-border-color)}.el-button-group .el-button--success:last-child{border-left-color:var(--el-button-divide-border-color)}.el-button-group .el-button--success:not(:first-child):not(:last-child){border-left-color:var(--el-button-divide-border-color);border-right-color:var(--el-button-divide-border-color)}.el-button-group .el-button--warning:first-child{border-right-color:var(--el-button-divide-border-color)}.el-button-group .el-button--warning:last-child{border-left-color:var(--el-button-divide-border-color)}.el-button-group .el-button--warning:not(:first-child):not(:last-child){border-left-color:var(--el-button-divide-border-color);border-right-color:var(--el-button-divide-border-color)}.el-button-group .el-button--danger:first-child{border-right-color:var(--el-button-divide-border-color)}.el-button-group .el-button--danger:last-child{border-left-color:var(--el-button-divide-border-color)}.el-button-group .el-button--danger:not(:first-child):not(:last-child){border-left-color:var(--el-button-divide-border-color);border-right-color:var(--el-button-divide-border-color)}.el-button-group .el-button--info:first-child{border-right-color:var(--el-button-divide-border-color)}.el-button-group .el-button--info:last-child{border-left-color:var(--el-button-divide-border-color)}.el-button-group .el-button--info:not(:first-child):not(:last-child){border-left-color:var(--el-button-divide-border-color);border-right-color:var(--el-button-divide-border-color)}.el-button{--el-button-font-weight:var(--el-font-weight-primary);--el-button-border-color:var(--el-border-color);--el-button-bg-color:var(--el-fill-color-blank);--el-button-text-color:var(--el-text-color-regular);--el-button-disabled-text-color:var(--el-disabled-text-color);--el-button-disabled-bg-color:var(--el-fill-color-blank);--el-button-disabled-border-color:var(--el-border-color-light);--el-button-divide-border-color:rgba(255, 255, 255, .5);--el-button-hover-text-color:var(--el-color-primary);--el-button-hover-bg-color:var(--el-color-primary-light-9);--el-button-hover-border-color:var(--el-color-primary-light-7);--el-button-active-text-color:var(--el-button-hover-text-color);--el-button-active-border-color:var(--el-color-primary);--el-button-active-bg-color:var(--el-button-hover-bg-color)}.el-button{display:inline-flex;justify-content:center;align-items:center;line-height:1;height:32px;white-space:nowrap;cursor:pointer;background-color:var(--el-button-bg-color);border:var(--el-border);border-color:var(--el-button-border-color);color:var(--el-button-text-color);-webkit-appearance:none;text-align:center;box-sizing:border-box;outline:0;transition:.1s;font-weight:var(--el-button-font-weight);-webkit-user-select:none;user-select:none;vertical-align:middle;padding:8px 15px;font-size:var(--el-font-size-base);border-radius:var(--el-border-radius-base)}.el-button>span{display:inline-flex;align-items:center}.el-button+.el-button{margin-left:12px}.el-button.is-round{padding:8px 15px}.el-button:focus,.el-button:hover{color:var(--el-button-hover-text-color);border-color:var(--el-button-hover-border-color);background-color:var(--el-button-hover-bg-color);outline:0}.el-button:active{color:var(--el-button-active-text-color);border-color:var(--el-button-active-border-color);background-color:var(--el-button-active-bg-color);outline:0}.el-button::-moz-focus-inner{border:0}.el-button [class*=el-icon]+span{margin-left:6px}.el-button [class*=el-icon] svg{vertical-align:bottom}.el-button.is-plain{--el-button-hover-text-color:var(--el-color-primary);--el-button-hover-bg-color:var(--el-fill-color-blank);--el-button-hover-border-color:var(--el-color-primary)}.el-button.is-active{color:var(--el-button-active-text-color);border-color:var(--el-button-active-border-color);background-color:var(--el-button-active-bg-color);outline:0}.el-button.is-disabled,.el-button.is-disabled:focus,.el-button.is-disabled:hover{color:var(--el-button-disabled-text-color);cursor:not-allowed;background-image:none;background-color:var(--el-button-disabled-bg-color);border-color:var(--el-button-disabled-border-color)}.el-button.is-loading{position:relative;pointer-events:none}.el-button.is-loading:before{z-index:1;pointer-events:none;content:"";position:absolute;left:-1px;top:-1px;right:-1px;bottom:-1px;border-radius:inherit;background-color:var(--el-mask-color-extra-light)}.el-button.is-round{border-radius:var(--el-border-radius-round)}.el-button.is-circle{border-radius:50%;padding:8px}.el-button__text--expand{letter-spacing:.3em;margin-right:-.3em}.el-button--primary{--el-button-text-color:var(--el-color-white);--el-button-bg-color:var(--el-color-primary);--el-button-border-color:var(--el-color-primary);--el-button-hover-text-color:var(--el-color-white);--el-button-hover-bg-color:var(--el-color-primary-light-3);--el-button-hover-border-color:var(--el-color-primary-light-3);--el-button-active-bg-color:var(--el-color-primary-dark-2);--el-button-active-border-color:var(--el-color-primary-dark-2);--el-button-disabled-text-color:var(--el-color-white);--el-button-disabled-bg-color:var(--el-color-primary-light-5);--el-button-disabled-border-color:var(--el-color-primary-light-5)}.el-button--primary.is-plain{--el-button-text-color:var(--el-color-primary);--el-button-bg-color:var(--el-color-primary-light-9);--el-button-border-color:var(--el-color-primary-light-5);--el-button-hover-text-color:var(--el-color-white);--el-button-hover-bg-color:var(--el-color-primary);--el-button-hover-border-color:var(--el-color-primary);--el-button-active-text-color:var(--el-color-white)}.el-button--primary.is-plain.is-disabled,.el-button--primary.is-plain.is-disabled:active,.el-button--primary.is-plain.is-disabled:focus,.el-button--primary.is-plain.is-disabled:hover{color:var(--el-color-primary-light-5);background-color:var(--el-color-primary-light-9);border-color:var(--el-color-primary-light-8)}.el-button--success{--el-button-text-color:var(--el-color-white);--el-button-bg-color:var(--el-color-success);--el-button-border-color:var(--el-color-success);--el-button-hover-text-color:var(--el-color-white);--el-button-hover-bg-color:var(--el-color-success-light-3);--el-button-hover-border-color:var(--el-color-success-light-3);--el-button-active-bg-color:var(--el-color-success-dark-2);--el-button-active-border-color:var(--el-color-success-dark-2);--el-button-disabled-text-color:var(--el-color-white);--el-button-disabled-bg-color:var(--el-color-success-light-5);--el-button-disabled-border-color:var(--el-color-success-light-5)}.el-button--success.is-plain{--el-button-text-color:var(--el-color-success);--el-button-bg-color:var(--el-color-success-light-9);--el-button-border-color:var(--el-color-success-light-5);--el-button-hover-text-color:var(--el-color-white);--el-button-hover-bg-color:var(--el-color-success);--el-button-hover-border-color:var(--el-color-success);--el-button-active-text-color:var(--el-color-white)}.el-button--success.is-plain.is-disabled,.el-button--success.is-plain.is-disabled:active,.el-button--success.is-plain.is-disabled:focus,.el-button--success.is-plain.is-disabled:hover{color:var(--el-color-success-light-5);background-color:var(--el-color-success-light-9);border-color:var(--el-color-success-light-8)}.el-button--warning{--el-button-text-color:var(--el-color-white);--el-button-bg-color:var(--el-color-warning);--el-button-border-color:var(--el-color-warning);--el-button-hover-text-color:var(--el-color-white);--el-button-hover-bg-color:var(--el-color-warning-light-3);--el-button-hover-border-color:var(--el-color-warning-light-3);--el-button-active-bg-color:var(--el-color-warning-dark-2);--el-button-active-border-color:var(--el-color-warning-dark-2);--el-button-disabled-text-color:var(--el-color-white);--el-button-disabled-bg-color:var(--el-color-warning-light-5);--el-button-disabled-border-color:var(--el-color-warning-light-5)}.el-button--warning.is-plain{--el-button-text-color:var(--el-color-warning);--el-button-bg-color:var(--el-color-warning-light-9);--el-button-border-color:var(--el-color-warning-light-5);--el-button-hover-text-color:var(--el-color-white);--el-button-hover-bg-color:var(--el-color-warning);--el-button-hover-border-color:var(--el-color-warning);--el-button-active-text-color:var(--el-color-white)}.el-button--warning.is-plain.is-disabled,.el-button--warning.is-plain.is-disabled:active,.el-button--warning.is-plain.is-disabled:focus,.el-button--warning.is-plain.is-disabled:hover{color:var(--el-color-warning-light-5);background-color:var(--el-color-warning-light-9);border-color:var(--el-color-warning-light-8)}.el-button--danger{--el-button-text-color:var(--el-color-white);--el-button-bg-color:var(--el-color-danger);--el-button-border-color:var(--el-color-danger);--el-button-hover-text-color:var(--el-color-white);--el-button-hover-bg-color:var(--el-color-danger-light-3);--el-button-hover-border-color:var(--el-color-danger-light-3);--el-button-active-bg-color:var(--el-color-danger-dark-2);--el-button-active-border-color:var(--el-color-danger-dark-2);--el-button-disabled-text-color:var(--el-color-white);--el-button-disabled-bg-color:var(--el-color-danger-light-5);--el-button-disabled-border-color:var(--el-color-danger-light-5)}.el-button--danger.is-plain{--el-button-text-color:var(--el-color-danger);--el-button-bg-color:var(--el-color-danger-light-9);--el-button-border-color:var(--el-color-danger-light-5);--el-button-hover-text-color:var(--el-color-white);--el-button-hover-bg-color:var(--el-color-danger);--el-button-hover-border-color:var(--el-color-danger);--el-button-active-text-color:var(--el-color-white)}.el-button--danger.is-plain.is-disabled,.el-button--danger.is-plain.is-disabled:active,.el-button--danger.is-plain.is-disabled:focus,.el-button--danger.is-plain.is-disabled:hover{color:var(--el-color-danger-light-5);background-color:var(--el-color-danger-light-9);border-color:var(--el-color-danger-light-8)}.el-button--info{--el-button-text-color:var(--el-color-white);--el-button-bg-color:var(--el-color-info);--el-button-border-color:var(--el-color-info);--el-button-hover-text-color:var(--el-color-white);--el-button-hover-bg-color:var(--el-color-info-light-3);--el-button-hover-border-color:var(--el-color-info-light-3);--el-button-active-bg-color:var(--el-color-info-dark-2);--el-button-active-border-color:var(--el-color-info-dark-2);--el-button-disabled-text-color:var(--el-color-white);--el-button-disabled-bg-color:var(--el-color-info-light-5);--el-button-disabled-border-color:var(--el-color-info-light-5)}.el-button--info.is-plain{--el-button-text-color:var(--el-color-info);--el-button-bg-color:var(--el-color-info-light-9);--el-button-border-color:var(--el-color-info-light-5);--el-button-hover-text-color:var(--el-color-white);--el-button-hover-bg-color:var(--el-color-info);--el-button-hover-border-color:var(--el-color-info);--el-button-active-text-color:var(--el-color-white)}.el-button--info.is-plain.is-disabled,.el-button--info.is-plain.is-disabled:active,.el-button--info.is-plain.is-disabled:focus,.el-button--info.is-plain.is-disabled:hover{color:var(--el-color-info-light-5);background-color:var(--el-color-info-light-9);border-color:var(--el-color-info-light-8)}.el-button--large{--el-button-size:40px;height:var(--el-button-size);padding:12px 19px;font-size:var(--el-font-size-base);border-radius:var(--el-border-radius-base)}.el-button--large [class*=el-icon]+span{margin-left:8px}.el-button--large.is-round{padding:12px 19px}.el-button--large.is-circle{width:var(--el-button-size);padding:12px}.el-button--small{--el-button-size:24px;height:var(--el-button-size);padding:5px 11px;font-size:12px;border-radius:calc(var(--el-border-radius-base) - 1px)}.el-button--small [class*=el-icon]+span{margin-left:4px}.el-button--small.is-round{padding:5px 11px}.el-button--small.is-circle{width:var(--el-button-size);padding:5px}.el-button--text{border-color:transparent;color:var(--el-color-primary);background:0 0;padding-left:0;padding-right:0}.el-button--text:focus,.el-button--text:hover{color:var(--el-color-primary-light-3);border-color:transparent;background-color:transparent}.el-button--text:active{color:var(--el-color-primary-dark-2);border-color:transparent;background-color:transparent}.el-button--text.is-disabled,.el-button--text.is-disabled:focus,.el-button--text.is-disabled:hover{border-color:transparent}.el-calendar{--el-calendar-border:var(--el-table-border, 1px solid var(--el-border-color-lighter));--el-calendar-header-border-bottom:var(--el-calendar-border);--el-calendar-selected-bg-color:var(--el-color-primary-light-9);--el-calendar-cell-width:85px;background-color:var(--el-fill-color-blank)}.el-calendar__header{display:flex;justify-content:space-between;padding:12px 20px;border-bottom:var(--el-calendar-header-border-bottom)}.el-calendar__title{color:var(--el-text-color);align-self:center}.el-calendar__body{padding:12px 20px 35px}.el-calendar-table{table-layout:fixed;width:100%}.el-calendar-table thead th{padding:12px 0;color:var(--el-text-color-regular);font-weight:400}.el-calendar-table:not(.is-range) td.next,.el-calendar-table:not(.is-range) td.prev{color:var(--el-text-color-placeholder)}.el-calendar-table td{border-bottom:var(--el-calendar-border);border-right:var(--el-calendar-border);vertical-align:top;transition:background-color var(--el-transition-duration-fast) ease}.el-calendar-table td.is-selected{background-color:var(--el-calendar-selected-bg-color)}.el-calendar-table td.is-today{color:var(--el-color-primary)}.el-calendar-table tr:first-child td{border-top:var(--el-calendar-border)}.el-calendar-table tr td:first-child{border-left:var(--el-calendar-border)}.el-calendar-table tr.el-calendar-table__row--hide-border td{border-top:none}.el-calendar-table .el-calendar-day{box-sizing:border-box;padding:8px;height:var(--el-calendar-cell-width)}.el-calendar-table .el-calendar-day:hover{cursor:pointer;background-color:var(--el-calendar-selected-bg-color)}.el-card{--el-card-border-color:var(--el-border-color-light);--el-card-border-radius:4px;--el-card-padding:20px;--el-card-bg-color:var(--el-fill-color-blank)}.el-card{border-radius:var(--el-card-border-radius);border:1px solid var(--el-card-border-color);background-color:var(--el-card-bg-color);overflow:hidden;color:var(--el-text-color-primary);transition:var(--el-transition-duration)}.el-card.is-always-shadow{box-shadow:var(--el-box-shadow-light)}.el-card.is-hover-shadow:focus,.el-card.is-hover-shadow:hover{box-shadow:var(--el-box-shadow-light)}.el-card__header{padding:calc(var(--el-card-padding) - 2px) var(--el-card-padding);border-bottom:1px solid var(--el-card-border-color);box-sizing:border-box}.el-card__body{padding:var(--el-card-padding)}.el-carousel__item{position:absolute;top:0;left:0;width:100%;height:100%;display:inline-block;overflow:hidden;z-index:calc(var(--el-index-normal) - 1)}.el-carousel__item.is-active{z-index:calc(var(--el-index-normal) - 1)}.el-carousel__item.is-animating{transition:transform .4s ease-in-out}.el-carousel__item--card{width:50%;transition:transform .4s ease-in-out}.el-carousel__item--card.is-in-stage{cursor:pointer;z-index:var(--el-index-normal)}.el-carousel__item--card.is-in-stage.is-hover .el-carousel__mask,.el-carousel__item--card.is-in-stage:hover .el-carousel__mask{opacity:.12}.el-carousel__item--card.is-active{z-index:calc(var(--el-index-normal) + 1)}.el-carousel__mask{position:absolute;width:100%;height:100%;top:0;left:0;background-color:#fff;opacity:.24;transition:var(--el-transition-duration-fast)}.el-carousel{--el-carousel-arrow-font-size:12px;--el-carousel-arrow-size:36px;--el-carousel-arrow-background:rgba(31, 45, 61, .11);--el-carousel-arrow-hover-background:rgba(31, 45, 61, .23);--el-carousel-indicator-width:30px;--el-carousel-indicator-height:2px;--el-carousel-indicator-padding-horizontal:4px;--el-carousel-indicator-padding-vertical:12px;--el-carousel-indicator-out-color:var(--el-border-color-hover);position:relative}.el-carousel--horizontal{overflow-x:hidden}.el-carousel--vertical{overflow-y:hidden}.el-carousel__container{position:relative;height:300px}.el-carousel__arrow{border:none;outline:0;padding:0;margin:0;height:var(--el-carousel-arrow-size);width:var(--el-carousel-arrow-size);cursor:pointer;transition:var(--el-transition-duration);border-radius:50%;background-color:var(--el-carousel-arrow-background);color:#fff;position:absolute;top:50%;z-index:10;transform:translateY(-50%);text-align:center;font-size:var(--el-carousel-arrow-font-size);display:inline-flex;justify-content:center;align-items:center}.el-carousel__arrow--left{left:16px}.el-carousel__arrow--right{right:16px}.el-carousel__arrow:hover{background-color:var(--el-carousel-arrow-hover-background)}.el-carousel__arrow i{cursor:pointer}.el-carousel__indicators{position:absolute;list-style:none;margin:0;padding:0;z-index:calc(var(--el-index-normal) + 1)}.el-carousel__indicators--horizontal{bottom:0;left:50%;transform:translate(-50%)}.el-carousel__indicators--vertical{right:0;top:50%;transform:translateY(-50%)}.el-carousel__indicators--outside{bottom:calc(var(--el-carousel-indicator-height) + var(--el-carousel-indicator-padding-vertical) * 2);text-align:center;position:static;transform:none}.el-carousel__indicators--outside .el-carousel__indicator:hover button{opacity:.64}.el-carousel__indicators--outside button{background-color:var(--el-carousel-indicator-out-color);opacity:.24}.el-carousel__indicators--labels{left:0;right:0;transform:none;text-align:center}.el-carousel__indicators--labels .el-carousel__button{height:auto;width:auto;padding:2px 18px;font-size:12px}.el-carousel__indicators--labels .el-carousel__indicator{padding:6px 4px}.el-carousel__indicator{background-color:transparent;cursor:pointer}.el-carousel__indicator:hover button{opacity:.72}.el-carousel__indicator--horizontal{display:inline-block;padding:var(--el-carousel-indicator-padding-vertical) var(--el-carousel-indicator-padding-horizontal)}.el-carousel__indicator--vertical{padding:var(--el-carousel-indicator-padding-horizontal) var(--el-carousel-indicator-padding-vertical)}.el-carousel__indicator--vertical .el-carousel__button{width:var(--el-carousel-indicator-height);height:calc(var(--el-carousel-indicator-width)/ 2)}.el-carousel__indicator.is-active button{opacity:1}.el-carousel__button{display:block;opacity:.48;width:var(--el-carousel-indicator-width);height:var(--el-carousel-indicator-height);background-color:#fff;border:none;outline:0;padding:0;margin:0;cursor:pointer;transition:var(--el-transition-duration)}.carousel-arrow-left-enter-from,.carousel-arrow-left-leave-active{transform:translateY(-50%) translate(-10px);opacity:0}.carousel-arrow-right-enter-from,.carousel-arrow-right-leave-active{transform:translateY(-50%) translate(10px);opacity:0}.el-cascader-panel{--el-cascader-menu-text-color:var(--el-text-color-regular);--el-cascader-menu-selected-text-color:var(--el-color-primary);--el-cascader-menu-fill:var(--el-bg-color-overlay);--el-cascader-menu-font-size:var(--el-font-size-base);--el-cascader-menu-radius:var(--el-border-radius-base);--el-cascader-menu-border:solid 1px var(--el-border-color-light);--el-cascader-menu-shadow:var(--el-box-shadow-light);--el-cascader-node-background-hover:var(--el-fill-color-light);--el-cascader-node-color-disabled:var(--el-text-color-placeholder);--el-cascader-color-empty:var(--el-text-color-placeholder);--el-cascader-tag-background:var(--el-fill-color)}.el-cascader-panel{display:flex;border-radius:var(--el-cascader-menu-radius);font-size:var(--el-cascader-menu-font-size)}.el-cascader-panel.is-bordered{border:var(--el-cascader-menu-border);border-radius:var(--el-cascader-menu-radius)}.el-cascader-menu{min-width:180px;box-sizing:border-box;color:var(--el-cascader-menu-text-color);border-right:var(--el-cascader-menu-border)}.el-cascader-menu:last-child{border-right:none}.el-cascader-menu:last-child .el-cascader-node{padding-right:20px}.el-cascader-menu__wrap.el-scrollbar__wrap{height:204px}.el-cascader-menu__list{position:relative;min-height:100%;margin:0;padding:6px 0;list-style:none;box-sizing:border-box}.el-cascader-menu__hover-zone{position:absolute;top:0;left:0;width:100%;height:100%;pointer-events:none}.el-cascader-menu__empty-text{position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);display:flex;align-items:center;color:var(--el-cascader-color-empty)}.el-cascader-menu__empty-text .is-loading{margin-right:2px}.el-cascader-node{position:relative;display:flex;align-items:center;padding:0 30px 0 20px;height:34px;line-height:34px;outline:0}.el-cascader-node.is-selectable.in-active-path{color:var(--el-cascader-menu-text-color)}.el-cascader-node.in-active-path,.el-cascader-node.is-active,.el-cascader-node.is-selectable.in-checked-path{color:var(--el-cascader-menu-selected-text-color);font-weight:700}.el-cascader-node:not(.is-disabled){cursor:pointer}.el-cascader-node:not(.is-disabled):focus,.el-cascader-node:not(.is-disabled):hover{background:var(--el-cascader-node-background-hover)}.el-cascader-node.is-disabled{color:var(--el-cascader-node-color-disabled);cursor:not-allowed}.el-cascader-node__prefix{position:absolute;left:10px}.el-cascader-node__postfix{position:absolute;right:10px}.el-cascader-node__label{flex:1;text-align:left;padding:0 8px;white-space:nowrap;overflow:hidden;text-overflow:ellipsis}.el-cascader-node>.el-radio{margin-right:0}.el-cascader-node>.el-radio .el-radio__label{padding-left:0}.el-cascader{--el-cascader-menu-text-color:var(--el-text-color-regular);--el-cascader-menu-selected-text-color:var(--el-color-primary);--el-cascader-menu-fill:var(--el-bg-color-overlay);--el-cascader-menu-font-size:var(--el-font-size-base);--el-cascader-menu-radius:var(--el-border-radius-base);--el-cascader-menu-border:solid 1px var(--el-border-color-light);--el-cascader-menu-shadow:var(--el-box-shadow-light);--el-cascader-node-background-hover:var(--el-fill-color-light);--el-cascader-node-color-disabled:var(--el-text-color-placeholder);--el-cascader-color-empty:var(--el-text-color-placeholder);--el-cascader-tag-background:var(--el-fill-color);display:inline-block;position:relative;font-size:var(--el-font-size-base);line-height:32px;outline:0}.el-cascader:not(.is-disabled):hover .el-input__inner{cursor:pointer;box-shadow:0 0 0 1px var(--el-input-hover-border-color) inset}.el-cascader .el-input{cursor:pointer}.el-cascader .el-input .el-input__inner{text-overflow:ellipsis}.el-cascader .el-input .el-input__inner:focus{box-shadow:0 0 0 1px var(--el-input-focus-border-color,var(--el-color-primary)) inset}.el-cascader .el-input .el-input__suffix-inner .el-icon{height:calc(100% - 2px)}.el-cascader .el-input .el-input__suffix-inner .el-icon svg{vertical-align:middle}.el-cascader .el-input .icon-arrow-down{transition:transform var(--el-transition-duration);font-size:14px}.el-cascader .el-input .icon-arrow-down.is-reverse{transform:rotate(180deg)}.el-cascader .el-input .icon-circle-close:hover{color:var(--el-input-clear-hover-color,var(--el-text-color-secondary))}.el-cascader .el-input.is-focus .el-input__inner{box-shadow:0 0 0 1px var(--el-input-focus-border-color,var(--el-color-primary)) inset}.el-cascader--large{font-size:14px;line-height:40px}.el-cascader--small{font-size:12px;line-height:24px}.el-cascader.is-disabled .el-cascader__label{z-index:calc(var(--el-index-normal) + 1);color:var(--el-disabled-text-color)}.el-cascader__dropdown{--el-cascader-menu-text-color:var(--el-text-color-regular);--el-cascader-menu-selected-text-color:var(--el-color-primary);--el-cascader-menu-fill:var(--el-bg-color-overlay);--el-cascader-menu-font-size:var(--el-font-size-base);--el-cascader-menu-radius:var(--el-border-radius-base);--el-cascader-menu-border:solid 1px var(--el-border-color-light);--el-cascader-menu-shadow:var(--el-box-shadow-light);--el-cascader-node-background-hover:var(--el-fill-color-light);--el-cascader-node-color-disabled:var(--el-text-color-placeholder);--el-cascader-color-empty:var(--el-text-color-placeholder);--el-cascader-tag-background:var(--el-fill-color)}.el-cascader__dropdown{font-size:var(--el-cascader-menu-font-size);border-radius:var(--el-cascader-menu-radius)}.el-cascader__dropdown.el-popper[role=tooltip]{background:var(--el-cascader-menu-fill);border:var(--el-cascader-menu-border);box-shadow:var(--el-cascader-menu-shadow)}.el-cascader__dropdown.el-popper[role=tooltip] .el-popper__arrow:before{border:var(--el-cascader-menu-border)}.el-cascader__dropdown.el-popper[role=tooltip][data-popper-placement^=top] .el-popper__arrow:before{border-top-color:transparent;border-left-color:transparent}.el-cascader__dropdown.el-popper[role=tooltip][data-popper-placement^=bottom] .el-popper__arrow:before{border-bottom-color:transparent;border-right-color:transparent}.el-cascader__dropdown.el-popper[role=tooltip][data-popper-placement^=left] .el-popper__arrow:before{border-left-color:transparent;border-bottom-color:transparent}.el-cascader__dropdown.el-popper[role=tooltip][data-popper-placement^=right] .el-popper__arrow:before{border-right-color:transparent;border-top-color:transparent}.el-cascader__dropdown.el-popper{box-shadow:var(--el-cascader-menu-shadow)}.el-cascader__tags{position:absolute;left:0;right:30px;top:50%;transform:translateY(-50%);display:flex;flex-wrap:wrap;line-height:normal;text-align:left;box-sizing:border-box}.el-cascader__tags .el-tag{display:inline-flex;align-items:center;max-width:100%;margin:2px 0 2px 6px;text-overflow:ellipsis;background:var(--el-cascader-tag-background)}.el-cascader__tags .el-tag:not(.is-hit){border-color:transparent}.el-cascader__tags .el-tag>span{flex:1;overflow:hidden;text-overflow:ellipsis}.el-cascader__tags .el-tag .el-icon-close{flex:none;background-color:var(--el-text-color-placeholder);color:var(--el-color-white)}.el-cascader__tags .el-tag .el-icon-close:hover{background-color:var(--el-text-color-secondary)}.el-cascader__collapse-tags{white-space:normal;z-index:var(--el-index-normal);display:flex;align-items:center;flex-wrap:wrap}.el-cascader__collapse-tag{line-height:inherit;height:inherit;display:flex}.el-cascader__suggestion-panel{border-radius:var(--el-cascader-menu-radius)}.el-cascader__suggestion-list{max-height:204px;margin:0;padding:6px 0;font-size:var(--el-font-size-base);color:var(--el-cascader-menu-text-color);text-align:center}.el-cascader__suggestion-item{display:flex;justify-content:space-between;align-items:center;height:34px;padding:0 15px;text-align:left;outline:0;cursor:pointer}.el-cascader__suggestion-item:focus,.el-cascader__suggestion-item:hover{background:var(--el-cascader-node-background-hover)}.el-cascader__suggestion-item.is-checked{color:var(--el-cascader-menu-selected-text-color);font-weight:700}.el-cascader__suggestion-item>span{margin-right:10px}.el-cascader__empty-text{margin:10px 0;color:var(--el-cascader-color-empty)}.el-cascader__search-input{flex:1;height:24px;min-width:60px;margin:2px 0 2px 11px;padding:0;color:var(--el-cascader-menu-text-color);border:none;outline:0;box-sizing:border-box}.el-cascader__search-input::placeholder{color:var(--el-text-color-placeholder)}.el-check-tag{background-color:var(--el-color-info-light-9);border-radius:var(--el-border-radius-base);color:var(--el-color-info);cursor:pointer;display:inline-block;font-size:var(--el-font-size-base);line-height:var(--el-font-size-base);padding:7px 15px;transition:var(--el-transition-all);font-weight:700}.el-check-tag:hover{background-color:var(--el-color-info-light-7)}.el-check-tag.is-checked{background-color:var(--el-color-primary-light-8);color:var(--el-color-primary-light-1)}.el-check-tag.is-checked:hover{background-color:var(--el-color-primary-light-7)}.el-checkbox-button{--el-checkbox-button-checked-bg-color:var(--el-color-primary);--el-checkbox-button-checked-text-color:var(--el-color-white);--el-checkbox-button-checked-border-color:var(--el-color-primary)}.el-checkbox-button{position:relative;display:inline-block}.el-checkbox-button__inner{display:inline-block;line-height:1;font-weight:var(--el-checkbox-font-weight);white-space:nowrap;vertical-align:middle;cursor:pointer;background:var(--el-button-bg-color,var(--el-fill-color-blank));border:var(--el-border);border-left:0;color:var(--el-button-text-color,var(--el-text-color-regular));-webkit-appearance:none;text-align:center;box-sizing:border-box;outline:0;margin:0;position:relative;transition:var(--el-transition-all);-webkit-user-select:none;user-select:none;padding:8px 15px;font-size:var(--el-font-size-base);border-radius:0}.el-checkbox-button__inner.is-round{padding:8px 15px}.el-checkbox-button__inner:hover{color:var(--el-color-primary)}.el-checkbox-button__inner [class*=el-icon-]{line-height:.9}.el-checkbox-button__inner [class*=el-icon-]+span{margin-left:5px}.el-checkbox-button__original{opacity:0;outline:0;position:absolute;margin:0;z-index:-1}.el-checkbox-button.is-checked .el-checkbox-button__inner{color:var(--el-checkbox-button-checked-text-color);background-color:var(--el-checkbox-button-checked-bg-color);border-color:var(--el-checkbox-button-checked-border-color);box-shadow:-1px 0 0 0 var(--el-color-primary-light-7)}.el-checkbox-button.is-checked:first-child .el-checkbox-button__inner{border-left-color:var(--el-checkbox-button-checked-border-color)}.el-checkbox-button.is-disabled .el-checkbox-button__inner{color:var(--el-disabled-text-color);cursor:not-allowed;background-image:none;background-color:var(--el-button-disabled-bg-color,var(--el-fill-color-blank));border-color:var(--el-button-disabled-border-color,var(--el-border-color-light));box-shadow:none}.el-checkbox-button.is-disabled:first-child .el-checkbox-button__inner{border-left-color:var(--el-button-disabled-border-color,var(--el-border-color-light))}.el-checkbox-button:first-child .el-checkbox-button__inner{border-left:var(--el-border);border-radius:var(--el-border-radius-base) 0 0 var(--el-border-radius-base);box-shadow:none!important}.el-checkbox-button.is-focus .el-checkbox-button__inner{border-color:var(--el-checkbox-button-checked-border-color)}.el-checkbox-button:last-child .el-checkbox-button__inner{border-radius:0 var(--el-border-radius-base) var(--el-border-radius-base) 0}.el-checkbox-button--large .el-checkbox-button__inner{padding:12px 19px;font-size:var(--el-font-size-base);border-radius:0}.el-checkbox-button--large .el-checkbox-button__inner.is-round{padding:12px 19px}.el-checkbox-button--small .el-checkbox-button__inner{padding:5px 11px;font-size:12px;border-radius:0}.el-checkbox-button--small .el-checkbox-button__inner.is-round{padding:5px 11px}.el-checkbox-group{font-size:0;line-height:0}.el-checkbox{--el-checkbox-font-size:14px;--el-checkbox-font-weight:var(--el-font-weight-primary);--el-checkbox-text-color:var(--el-text-color-regular);--el-checkbox-input-height:14px;--el-checkbox-input-width:14px;--el-checkbox-border-radius:var(--el-border-radius-small);--el-checkbox-bg-color:var(--el-fill-color-blank);--el-checkbox-input-border:var(--el-border);--el-checkbox-disabled-border-color:var(--el-border-color);--el-checkbox-disabled-input-fill:var(--el-fill-color-light);--el-checkbox-disabled-icon-color:var(--el-text-color-placeholder);--el-checkbox-disabled-checked-input-fill:var(--el-border-color-extra-light);--el-checkbox-disabled-checked-input-border-color:var(--el-border-color);--el-checkbox-disabled-checked-icon-color:var(--el-text-color-placeholder);--el-checkbox-checked-text-color:var(--el-color-primary);--el-checkbox-checked-input-border-color:var(--el-color-primary);--el-checkbox-checked-bg-color:var(--el-color-primary);--el-checkbox-checked-icon-color:var(--el-color-white);--el-checkbox-input-border-color-hover:var(--el-color-primary)}.el-checkbox{color:var(--el-checkbox-text-color);font-weight:var(--el-checkbox-font-weight);font-size:var(--el-font-size-base);position:relative;cursor:pointer;display:inline-flex;align-items:center;white-space:nowrap;-webkit-user-select:none;user-select:none;margin-right:30px;height:32px}.el-checkbox.is-bordered{padding:0 15px 0 9px;border-radius:var(--el-border-radius-base);border:var(--el-border);box-sizing:border-box}.el-checkbox.is-bordered.is-checked{border-color:var(--el-color-primary)}.el-checkbox.is-bordered.is-disabled{border-color:var(--el-border-color-lighter);cursor:not-allowed}.el-checkbox.is-bordered.el-checkbox--large{padding:0 19px 0 11px;border-radius:var(--el-border-radius-base)}.el-checkbox.is-bordered.el-checkbox--large .el-checkbox__label{font-size:var(--el-font-size-base)}.el-checkbox.is-bordered.el-checkbox--large .el-checkbox__inner{height:14px;width:14px}.el-checkbox.is-bordered.el-checkbox--small{padding:0 11px 0 7px;border-radius:calc(var(--el-border-radius-base) - 1px)}.el-checkbox.is-bordered.el-checkbox--small .el-checkbox__label{font-size:12px}.el-checkbox.is-bordered.el-checkbox--small .el-checkbox__inner{height:12px;width:12px}.el-checkbox.is-bordered.el-checkbox--small .el-checkbox__inner:after{height:6px;width:2px}.el-checkbox__input{white-space:nowrap;cursor:pointer;outline:0;display:inline-flex;position:relative}.el-checkbox__input.is-disabled .el-checkbox__inner{background-color:var(--el-checkbox-disabled-input-fill);border-color:var(--el-checkbox-disabled-border-color);cursor:not-allowed}.el-checkbox__input.is-disabled .el-checkbox__inner:after{cursor:not-allowed;border-color:var(--el-checkbox-disabled-icon-color)}.el-checkbox__input.is-disabled .el-checkbox__inner+.el-checkbox__label{cursor:not-allowed}.el-checkbox__input.is-disabled.is-checked .el-checkbox__inner{background-color:var(--el-checkbox-disabled-checked-input-fill);border-color:var(--el-checkbox-disabled-checked-input-border-color)}.el-checkbox__input.is-disabled.is-checked .el-checkbox__inner:after{border-color:var(--el-checkbox-disabled-checked-icon-color)}.el-checkbox__input.is-disabled.is-indeterminate .el-checkbox__inner{background-color:var(--el-checkbox-disabled-checked-input-fill);border-color:var(--el-checkbox-disabled-checked-input-border-color)}.el-checkbox__input.is-disabled.is-indeterminate .el-checkbox__inner:before{background-color:var(--el-checkbox-disabled-checked-icon-color);border-color:var(--el-checkbox-disabled-checked-icon-color)}.el-checkbox__input.is-disabled+span.el-checkbox__label{color:var(--el-disabled-text-color);cursor:not-allowed}.el-checkbox__input.is-checked .el-checkbox__inner{background-color:var(--el-checkbox-checked-bg-color);border-color:var(--el-checkbox-checked-input-border-color)}.el-checkbox__input.is-checked .el-checkbox__inner:after{transform:rotate(45deg) scaleY(1)}.el-checkbox__input.is-checked+.el-checkbox__label{color:var(--el-checkbox-checked-text-color)}.el-checkbox__input.is-focus .el-checkbox__inner{border-color:var(--el-checkbox-input-border-color-hover)}.el-checkbox__input.is-indeterminate .el-checkbox__inner{background-color:var(--el-checkbox-checked-bg-color);border-color:var(--el-checkbox-checked-input-border-color)}.el-checkbox__input.is-indeterminate .el-checkbox__inner:before{content:"";position:absolute;display:block;background-color:var(--el-checkbox-checked-icon-color);height:2px;transform:scale(.5);left:0;right:0;top:5px}.el-checkbox__input.is-indeterminate .el-checkbox__inner:after{display:none}.el-checkbox__inner{display:inline-block;position:relative;border:var(--el-checkbox-input-border);border-radius:var(--el-checkbox-border-radius);box-sizing:border-box;width:var(--el-checkbox-input-width);height:var(--el-checkbox-input-height);background-color:var(--el-checkbox-bg-color);z-index:var(--el-index-normal);transition:border-color .25s cubic-bezier(.71,-.46,.29,1.46),background-color .25s cubic-bezier(.71,-.46,.29,1.46)}.el-checkbox__inner:hover{border-color:var(--el-checkbox-input-border-color-hover)}.el-checkbox__inner:after{box-sizing:content-box;content:"";border:1px solid var(--el-checkbox-checked-icon-color);border-left:0;border-top:0;height:7px;left:4px;position:absolute;top:1px;transform:rotate(45deg) scaleY(0);width:3px;transition:transform .15s ease-in 50ms;transform-origin:center}.el-checkbox__original{opacity:0;outline:0;position:absolute;margin:0;width:0;height:0;z-index:-1}.el-checkbox__label{display:inline-block;padding-left:8px;line-height:1;font-size:var(--el-checkbox-font-size)}.el-checkbox.el-checkbox--large{height:40px}.el-checkbox.el-checkbox--large .el-checkbox__label{font-size:14px}.el-checkbox.el-checkbox--large .el-checkbox__inner{width:14px;height:14px}.el-checkbox.el-checkbox--small{height:24px}.el-checkbox.el-checkbox--small .el-checkbox__label{font-size:12px}.el-checkbox.el-checkbox--small .el-checkbox__inner{width:12px;height:12px}.el-checkbox.el-checkbox--small .el-checkbox__input.is-indeterminate .el-checkbox__inner:before{top:4px}.el-checkbox.el-checkbox--small .el-checkbox__inner:after{width:2px;height:6px}.el-checkbox:last-of-type{margin-right:0}[class*=el-col-]{float:left;box-sizing:border-box}[class*=el-col-].is-guttered{display:block;min-height:1px}.el-col-0,.el-col-0.is-guttered{display:none}.el-col-0{max-width:0%;flex:0 0 0%}.el-col-offset-0{margin-left:0}.el-col-pull-0{position:relative;right:0}.el-col-push-0{position:relative;left:0}.el-col-1{max-width:4.1666666667%;flex:0 0 4.1666666667%}.el-col-offset-1{margin-left:4.1666666667%}.el-col-pull-1{position:relative;right:4.1666666667%}.el-col-push-1{position:relative;left:4.1666666667%}.el-col-2{max-width:8.3333333333%;flex:0 0 8.3333333333%}.el-col-offset-2{margin-left:8.3333333333%}.el-col-pull-2{position:relative;right:8.3333333333%}.el-col-push-2{position:relative;left:8.3333333333%}.el-col-3{max-width:12.5%;flex:0 0 12.5%}.el-col-offset-3{margin-left:12.5%}.el-col-pull-3{position:relative;right:12.5%}.el-col-push-3{position:relative;left:12.5%}.el-col-4{max-width:16.6666666667%;flex:0 0 16.6666666667%}.el-col-offset-4{margin-left:16.6666666667%}.el-col-pull-4{position:relative;right:16.6666666667%}.el-col-push-4{position:relative;left:16.6666666667%}.el-col-5{max-width:20.8333333333%;flex:0 0 20.8333333333%}.el-col-offset-5{margin-left:20.8333333333%}.el-col-pull-5{position:relative;right:20.8333333333%}.el-col-push-5{position:relative;left:20.8333333333%}.el-col-6{max-width:25%;flex:0 0 25%}.el-col-offset-6{margin-left:25%}.el-col-pull-6{position:relative;right:25%}.el-col-push-6{position:relative;left:25%}.el-col-7{max-width:29.1666666667%;flex:0 0 29.1666666667%}.el-col-offset-7{margin-left:29.1666666667%}.el-col-pull-7{position:relative;right:29.1666666667%}.el-col-push-7{position:relative;left:29.1666666667%}.el-col-8{max-width:33.3333333333%;flex:0 0 33.3333333333%}.el-col-offset-8{margin-left:33.3333333333%}.el-col-pull-8{position:relative;right:33.3333333333%}.el-col-push-8{position:relative;left:33.3333333333%}.el-col-9{max-width:37.5%;flex:0 0 37.5%}.el-col-offset-9{margin-left:37.5%}.el-col-pull-9{position:relative;right:37.5%}.el-col-push-9{position:relative;left:37.5%}.el-col-10{max-width:41.6666666667%;flex:0 0 41.6666666667%}.el-col-offset-10{margin-left:41.6666666667%}.el-col-pull-10{position:relative;right:41.6666666667%}.el-col-push-10{position:relative;left:41.6666666667%}.el-col-11{max-width:45.8333333333%;flex:0 0 45.8333333333%}.el-col-offset-11{margin-left:45.8333333333%}.el-col-pull-11{position:relative;right:45.8333333333%}.el-col-push-11{position:relative;left:45.8333333333%}.el-col-12{max-width:50%;flex:0 0 50%}.el-col-offset-12{margin-left:50%}.el-col-pull-12{position:relative;right:50%}.el-col-push-12{position:relative;left:50%}.el-col-13{max-width:54.1666666667%;flex:0 0 54.1666666667%}.el-col-offset-13{margin-left:54.1666666667%}.el-col-pull-13{position:relative;right:54.1666666667%}.el-col-push-13{position:relative;left:54.1666666667%}.el-col-14{max-width:58.3333333333%;flex:0 0 58.3333333333%}.el-col-offset-14{margin-left:58.3333333333%}.el-col-pull-14{position:relative;right:58.3333333333%}.el-col-push-14{position:relative;left:58.3333333333%}.el-col-15{max-width:62.5%;flex:0 0 62.5%}.el-col-offset-15{margin-left:62.5%}.el-col-pull-15{position:relative;right:62.5%}.el-col-push-15{position:relative;left:62.5%}.el-col-16{max-width:66.6666666667%;flex:0 0 66.6666666667%}.el-col-offset-16{margin-left:66.6666666667%}.el-col-pull-16{position:relative;right:66.6666666667%}.el-col-push-16{position:relative;left:66.6666666667%}.el-col-17{max-width:70.8333333333%;flex:0 0 70.8333333333%}.el-col-offset-17{margin-left:70.8333333333%}.el-col-pull-17{position:relative;right:70.8333333333%}.el-col-push-17{position:relative;left:70.8333333333%}.el-col-18{max-width:75%;flex:0 0 75%}.el-col-offset-18{margin-left:75%}.el-col-pull-18{position:relative;right:75%}.el-col-push-18{position:relative;left:75%}.el-col-19{max-width:79.1666666667%;flex:0 0 79.1666666667%}.el-col-offset-19{margin-left:79.1666666667%}.el-col-pull-19{position:relative;right:79.1666666667%}.el-col-push-19{position:relative;left:79.1666666667%}.el-col-20{max-width:83.3333333333%;flex:0 0 83.3333333333%}.el-col-offset-20{margin-left:83.3333333333%}.el-col-pull-20{position:relative;right:83.3333333333%}.el-col-push-20{position:relative;left:83.3333333333%}.el-col-21{max-width:87.5%;flex:0 0 87.5%}.el-col-offset-21{margin-left:87.5%}.el-col-pull-21{position:relative;right:87.5%}.el-col-push-21{position:relative;left:87.5%}.el-col-22{max-width:91.6666666667%;flex:0 0 91.6666666667%}.el-col-offset-22{margin-left:91.6666666667%}.el-col-pull-22{position:relative;right:91.6666666667%}.el-col-push-22{position:relative;left:91.6666666667%}.el-col-23{max-width:95.8333333333%;flex:0 0 95.8333333333%}.el-col-offset-23{margin-left:95.8333333333%}.el-col-pull-23{position:relative;right:95.8333333333%}.el-col-push-23{position:relative;left:95.8333333333%}.el-col-24{max-width:100%;flex:0 0 100%}.el-col-offset-24{margin-left:100%}.el-col-pull-24{position:relative;right:100%}.el-col-push-24{position:relative;left:100%}@media only screen and (max-width:768px){.el-col-xs-0,.el-col-xs-0.is-guttered{display:none}.el-col-xs-0{max-width:0%;flex:0 0 0%}.el-col-xs-offset-0{margin-left:0}.el-col-xs-pull-0{position:relative;right:0}.el-col-xs-push-0{position:relative;left:0}.el-col-xs-1{display:block;max-width:4.1666666667%;flex:0 0 4.1666666667%}.el-col-xs-offset-1{margin-left:4.1666666667%}.el-col-xs-pull-1{position:relative;right:4.1666666667%}.el-col-xs-push-1{position:relative;left:4.1666666667%}.el-col-xs-2{display:block;max-width:8.3333333333%;flex:0 0 8.3333333333%}.el-col-xs-offset-2{margin-left:8.3333333333%}.el-col-xs-pull-2{position:relative;right:8.3333333333%}.el-col-xs-push-2{position:relative;left:8.3333333333%}.el-col-xs-3{display:block;max-width:12.5%;flex:0 0 12.5%}.el-col-xs-offset-3{margin-left:12.5%}.el-col-xs-pull-3{position:relative;right:12.5%}.el-col-xs-push-3{position:relative;left:12.5%}.el-col-xs-4{display:block;max-width:16.6666666667%;flex:0 0 16.6666666667%}.el-col-xs-offset-4{margin-left:16.6666666667%}.el-col-xs-pull-4{position:relative;right:16.6666666667%}.el-col-xs-push-4{position:relative;left:16.6666666667%}.el-col-xs-5{display:block;max-width:20.8333333333%;flex:0 0 20.8333333333%}.el-col-xs-offset-5{margin-left:20.8333333333%}.el-col-xs-pull-5{position:relative;right:20.8333333333%}.el-col-xs-push-5{position:relative;left:20.8333333333%}.el-col-xs-6{display:block;max-width:25%;flex:0 0 25%}.el-col-xs-offset-6{margin-left:25%}.el-col-xs-pull-6{position:relative;right:25%}.el-col-xs-push-6{position:relative;left:25%}.el-col-xs-7{display:block;max-width:29.1666666667%;flex:0 0 29.1666666667%}.el-col-xs-offset-7{margin-left:29.1666666667%}.el-col-xs-pull-7{position:relative;right:29.1666666667%}.el-col-xs-push-7{position:relative;left:29.1666666667%}.el-col-xs-8{display:block;max-width:33.3333333333%;flex:0 0 33.3333333333%}.el-col-xs-offset-8{margin-left:33.3333333333%}.el-col-xs-pull-8{position:relative;right:33.3333333333%}.el-col-xs-push-8{position:relative;left:33.3333333333%}.el-col-xs-9{display:block;max-width:37.5%;flex:0 0 37.5%}.el-col-xs-offset-9{margin-left:37.5%}.el-col-xs-pull-9{position:relative;right:37.5%}.el-col-xs-push-9{position:relative;left:37.5%}.el-col-xs-10{display:block;max-width:41.6666666667%;flex:0 0 41.6666666667%}.el-col-xs-offset-10{margin-left:41.6666666667%}.el-col-xs-pull-10{position:relative;right:41.6666666667%}.el-col-xs-push-10{position:relative;left:41.6666666667%}.el-col-xs-11{display:block;max-width:45.8333333333%;flex:0 0 45.8333333333%}.el-col-xs-offset-11{margin-left:45.8333333333%}.el-col-xs-pull-11{position:relative;right:45.8333333333%}.el-col-xs-push-11{position:relative;left:45.8333333333%}.el-col-xs-12{display:block;max-width:50%;flex:0 0 50%}.el-col-xs-offset-12{margin-left:50%}.el-col-xs-pull-12{position:relative;right:50%}.el-col-xs-push-12{position:relative;left:50%}.el-col-xs-13{display:block;max-width:54.1666666667%;flex:0 0 54.1666666667%}.el-col-xs-offset-13{margin-left:54.1666666667%}.el-col-xs-pull-13{position:relative;right:54.1666666667%}.el-col-xs-push-13{position:relative;left:54.1666666667%}.el-col-xs-14{display:block;max-width:58.3333333333%;flex:0 0 58.3333333333%}.el-col-xs-offset-14{margin-left:58.3333333333%}.el-col-xs-pull-14{position:relative;right:58.3333333333%}.el-col-xs-push-14{position:relative;left:58.3333333333%}.el-col-xs-15{display:block;max-width:62.5%;flex:0 0 62.5%}.el-col-xs-offset-15{margin-left:62.5%}.el-col-xs-pull-15{position:relative;right:62.5%}.el-col-xs-push-15{position:relative;left:62.5%}.el-col-xs-16{display:block;max-width:66.6666666667%;flex:0 0 66.6666666667%}.el-col-xs-offset-16{margin-left:66.6666666667%}.el-col-xs-pull-16{position:relative;right:66.6666666667%}.el-col-xs-push-16{position:relative;left:66.6666666667%}.el-col-xs-17{display:block;max-width:70.8333333333%;flex:0 0 70.8333333333%}.el-col-xs-offset-17{margin-left:70.8333333333%}.el-col-xs-pull-17{position:relative;right:70.8333333333%}.el-col-xs-push-17{position:relative;left:70.8333333333%}.el-col-xs-18{display:block;max-width:75%;flex:0 0 75%}.el-col-xs-offset-18{margin-left:75%}.el-col-xs-pull-18{position:relative;right:75%}.el-col-xs-push-18{position:relative;left:75%}.el-col-xs-19{display:block;max-width:79.1666666667%;flex:0 0 79.1666666667%}.el-col-xs-offset-19{margin-left:79.1666666667%}.el-col-xs-pull-19{position:relative;right:79.1666666667%}.el-col-xs-push-19{position:relative;left:79.1666666667%}.el-col-xs-20{display:block;max-width:83.3333333333%;flex:0 0 83.3333333333%}.el-col-xs-offset-20{margin-left:83.3333333333%}.el-col-xs-pull-20{position:relative;right:83.3333333333%}.el-col-xs-push-20{position:relative;left:83.3333333333%}.el-col-xs-21{display:block;max-width:87.5%;flex:0 0 87.5%}.el-col-xs-offset-21{margin-left:87.5%}.el-col-xs-pull-21{position:relative;right:87.5%}.el-col-xs-push-21{position:relative;left:87.5%}.el-col-xs-22{display:block;max-width:91.6666666667%;flex:0 0 91.6666666667%}.el-col-xs-offset-22{margin-left:91.6666666667%}.el-col-xs-pull-22{position:relative;right:91.6666666667%}.el-col-xs-push-22{position:relative;left:91.6666666667%}.el-col-xs-23{display:block;max-width:95.8333333333%;flex:0 0 95.8333333333%}.el-col-xs-offset-23{margin-left:95.8333333333%}.el-col-xs-pull-23{position:relative;right:95.8333333333%}.el-col-xs-push-23{position:relative;left:95.8333333333%}.el-col-xs-24{display:block;max-width:100%;flex:0 0 100%}.el-col-xs-offset-24{margin-left:100%}.el-col-xs-pull-24{position:relative;right:100%}.el-col-xs-push-24{position:relative;left:100%}}@media only screen and (min-width:768px){.el-col-sm-0,.el-col-sm-0.is-guttered{display:none}.el-col-sm-0{max-width:0%;flex:0 0 0%}.el-col-sm-offset-0{margin-left:0}.el-col-sm-pull-0{position:relative;right:0}.el-col-sm-push-0{position:relative;left:0}.el-col-sm-1{display:block;max-width:4.1666666667%;flex:0 0 4.1666666667%}.el-col-sm-offset-1{margin-left:4.1666666667%}.el-col-sm-pull-1{position:relative;right:4.1666666667%}.el-col-sm-push-1{position:relative;left:4.1666666667%}.el-col-sm-2{display:block;max-width:8.3333333333%;flex:0 0 8.3333333333%}.el-col-sm-offset-2{margin-left:8.3333333333%}.el-col-sm-pull-2{position:relative;right:8.3333333333%}.el-col-sm-push-2{position:relative;left:8.3333333333%}.el-col-sm-3{display:block;max-width:12.5%;flex:0 0 12.5%}.el-col-sm-offset-3{margin-left:12.5%}.el-col-sm-pull-3{position:relative;right:12.5%}.el-col-sm-push-3{position:relative;left:12.5%}.el-col-sm-4{display:block;max-width:16.6666666667%;flex:0 0 16.6666666667%}.el-col-sm-offset-4{margin-left:16.6666666667%}.el-col-sm-pull-4{position:relative;right:16.6666666667%}.el-col-sm-push-4{position:relative;left:16.6666666667%}.el-col-sm-5{display:block;max-width:20.8333333333%;flex:0 0 20.8333333333%}.el-col-sm-offset-5{margin-left:20.8333333333%}.el-col-sm-pull-5{position:relative;right:20.8333333333%}.el-col-sm-push-5{position:relative;left:20.8333333333%}.el-col-sm-6{display:block;max-width:25%;flex:0 0 25%}.el-col-sm-offset-6{margin-left:25%}.el-col-sm-pull-6{position:relative;right:25%}.el-col-sm-push-6{position:relative;left:25%}.el-col-sm-7{display:block;max-width:29.1666666667%;flex:0 0 29.1666666667%}.el-col-sm-offset-7{margin-left:29.1666666667%}.el-col-sm-pull-7{position:relative;right:29.1666666667%}.el-col-sm-push-7{position:relative;left:29.1666666667%}.el-col-sm-8{display:block;max-width:33.3333333333%;flex:0 0 33.3333333333%}.el-col-sm-offset-8{margin-left:33.3333333333%}.el-col-sm-pull-8{position:relative;right:33.3333333333%}.el-col-sm-push-8{position:relative;left:33.3333333333%}.el-col-sm-9{display:block;max-width:37.5%;flex:0 0 37.5%}.el-col-sm-offset-9{margin-left:37.5%}.el-col-sm-pull-9{position:relative;right:37.5%}.el-col-sm-push-9{position:relative;left:37.5%}.el-col-sm-10{display:block;max-width:41.6666666667%;flex:0 0 41.6666666667%}.el-col-sm-offset-10{margin-left:41.6666666667%}.el-col-sm-pull-10{position:relative;right:41.6666666667%}.el-col-sm-push-10{position:relative;left:41.6666666667%}.el-col-sm-11{display:block;max-width:45.8333333333%;flex:0 0 45.8333333333%}.el-col-sm-offset-11{margin-left:45.8333333333%}.el-col-sm-pull-11{position:relative;right:45.8333333333%}.el-col-sm-push-11{position:relative;left:45.8333333333%}.el-col-sm-12{display:block;max-width:50%;flex:0 0 50%}.el-col-sm-offset-12{margin-left:50%}.el-col-sm-pull-12{position:relative;right:50%}.el-col-sm-push-12{position:relative;left:50%}.el-col-sm-13{display:block;max-width:54.1666666667%;flex:0 0 54.1666666667%}.el-col-sm-offset-13{margin-left:54.1666666667%}.el-col-sm-pull-13{position:relative;right:54.1666666667%}.el-col-sm-push-13{position:relative;left:54.1666666667%}.el-col-sm-14{display:block;max-width:58.3333333333%;flex:0 0 58.3333333333%}.el-col-sm-offset-14{margin-left:58.3333333333%}.el-col-sm-pull-14{position:relative;right:58.3333333333%}.el-col-sm-push-14{position:relative;left:58.3333333333%}.el-col-sm-15{display:block;max-width:62.5%;flex:0 0 62.5%}.el-col-sm-offset-15{margin-left:62.5%}.el-col-sm-pull-15{position:relative;right:62.5%}.el-col-sm-push-15{position:relative;left:62.5%}.el-col-sm-16{display:block;max-width:66.6666666667%;flex:0 0 66.6666666667%}.el-col-sm-offset-16{margin-left:66.6666666667%}.el-col-sm-pull-16{position:relative;right:66.6666666667%}.el-col-sm-push-16{position:relative;left:66.6666666667%}.el-col-sm-17{display:block;max-width:70.8333333333%;flex:0 0 70.8333333333%}.el-col-sm-offset-17{margin-left:70.8333333333%}.el-col-sm-pull-17{position:relative;right:70.8333333333%}.el-col-sm-push-17{position:relative;left:70.8333333333%}.el-col-sm-18{display:block;max-width:75%;flex:0 0 75%}.el-col-sm-offset-18{margin-left:75%}.el-col-sm-pull-18{position:relative;right:75%}.el-col-sm-push-18{position:relative;left:75%}.el-col-sm-19{display:block;max-width:79.1666666667%;flex:0 0 79.1666666667%}.el-col-sm-offset-19{margin-left:79.1666666667%}.el-col-sm-pull-19{position:relative;right:79.1666666667%}.el-col-sm-push-19{position:relative;left:79.1666666667%}.el-col-sm-20{display:block;max-width:83.3333333333%;flex:0 0 83.3333333333%}.el-col-sm-offset-20{margin-left:83.3333333333%}.el-col-sm-pull-20{position:relative;right:83.3333333333%}.el-col-sm-push-20{position:relative;left:83.3333333333%}.el-col-sm-21{display:block;max-width:87.5%;flex:0 0 87.5%}.el-col-sm-offset-21{margin-left:87.5%}.el-col-sm-pull-21{position:relative;right:87.5%}.el-col-sm-push-21{position:relative;left:87.5%}.el-col-sm-22{display:block;max-width:91.6666666667%;flex:0 0 91.6666666667%}.el-col-sm-offset-22{margin-left:91.6666666667%}.el-col-sm-pull-22{position:relative;right:91.6666666667%}.el-col-sm-push-22{position:relative;left:91.6666666667%}.el-col-sm-23{display:block;max-width:95.8333333333%;flex:0 0 95.8333333333%}.el-col-sm-offset-23{margin-left:95.8333333333%}.el-col-sm-pull-23{position:relative;right:95.8333333333%}.el-col-sm-push-23{position:relative;left:95.8333333333%}.el-col-sm-24{display:block;max-width:100%;flex:0 0 100%}.el-col-sm-offset-24{margin-left:100%}.el-col-sm-pull-24{position:relative;right:100%}.el-col-sm-push-24{position:relative;left:100%}}@media only screen and (min-width:992px){.el-col-md-0,.el-col-md-0.is-guttered{display:none}.el-col-md-0{max-width:0%;flex:0 0 0%}.el-col-md-offset-0{margin-left:0}.el-col-md-pull-0{position:relative;right:0}.el-col-md-push-0{position:relative;left:0}.el-col-md-1{display:block;max-width:4.1666666667%;flex:0 0 4.1666666667%}.el-col-md-offset-1{margin-left:4.1666666667%}.el-col-md-pull-1{position:relative;right:4.1666666667%}.el-col-md-push-1{position:relative;left:4.1666666667%}.el-col-md-2{display:block;max-width:8.3333333333%;flex:0 0 8.3333333333%}.el-col-md-offset-2{margin-left:8.3333333333%}.el-col-md-pull-2{position:relative;right:8.3333333333%}.el-col-md-push-2{position:relative;left:8.3333333333%}.el-col-md-3{display:block;max-width:12.5%;flex:0 0 12.5%}.el-col-md-offset-3{margin-left:12.5%}.el-col-md-pull-3{position:relative;right:12.5%}.el-col-md-push-3{position:relative;left:12.5%}.el-col-md-4{display:block;max-width:16.6666666667%;flex:0 0 16.6666666667%}.el-col-md-offset-4{margin-left:16.6666666667%}.el-col-md-pull-4{position:relative;right:16.6666666667%}.el-col-md-push-4{position:relative;left:16.6666666667%}.el-col-md-5{display:block;max-width:20.8333333333%;flex:0 0 20.8333333333%}.el-col-md-offset-5{margin-left:20.8333333333%}.el-col-md-pull-5{position:relative;right:20.8333333333%}.el-col-md-push-5{position:relative;left:20.8333333333%}.el-col-md-6{display:block;max-width:25%;flex:0 0 25%}.el-col-md-offset-6{margin-left:25%}.el-col-md-pull-6{position:relative;right:25%}.el-col-md-push-6{position:relative;left:25%}.el-col-md-7{display:block;max-width:29.1666666667%;flex:0 0 29.1666666667%}.el-col-md-offset-7{margin-left:29.1666666667%}.el-col-md-pull-7{position:relative;right:29.1666666667%}.el-col-md-push-7{position:relative;left:29.1666666667%}.el-col-md-8{display:block;max-width:33.3333333333%;flex:0 0 33.3333333333%}.el-col-md-offset-8{margin-left:33.3333333333%}.el-col-md-pull-8{position:relative;right:33.3333333333%}.el-col-md-push-8{position:relative;left:33.3333333333%}.el-col-md-9{display:block;max-width:37.5%;flex:0 0 37.5%}.el-col-md-offset-9{margin-left:37.5%}.el-col-md-pull-9{position:relative;right:37.5%}.el-col-md-push-9{position:relative;left:37.5%}.el-col-md-10{display:block;max-width:41.6666666667%;flex:0 0 41.6666666667%}.el-col-md-offset-10{margin-left:41.6666666667%}.el-col-md-pull-10{position:relative;right:41.6666666667%}.el-col-md-push-10{position:relative;left:41.6666666667%}.el-col-md-11{display:block;max-width:45.8333333333%;flex:0 0 45.8333333333%}.el-col-md-offset-11{margin-left:45.8333333333%}.el-col-md-pull-11{position:relative;right:45.8333333333%}.el-col-md-push-11{position:relative;left:45.8333333333%}.el-col-md-12{display:block;max-width:50%;flex:0 0 50%}.el-col-md-offset-12{margin-left:50%}.el-col-md-pull-12{position:relative;right:50%}.el-col-md-push-12{position:relative;left:50%}.el-col-md-13{display:block;max-width:54.1666666667%;flex:0 0 54.1666666667%}.el-col-md-offset-13{margin-left:54.1666666667%}.el-col-md-pull-13{position:relative;right:54.1666666667%}.el-col-md-push-13{position:relative;left:54.1666666667%}.el-col-md-14{display:block;max-width:58.3333333333%;flex:0 0 58.3333333333%}.el-col-md-offset-14{margin-left:58.3333333333%}.el-col-md-pull-14{position:relative;right:58.3333333333%}.el-col-md-push-14{position:relative;left:58.3333333333%}.el-col-md-15{display:block;max-width:62.5%;flex:0 0 62.5%}.el-col-md-offset-15{margin-left:62.5%}.el-col-md-pull-15{position:relative;right:62.5%}.el-col-md-push-15{position:relative;left:62.5%}.el-col-md-16{display:block;max-width:66.6666666667%;flex:0 0 66.6666666667%}.el-col-md-offset-16{margin-left:66.6666666667%}.el-col-md-pull-16{position:relative;right:66.6666666667%}.el-col-md-push-16{position:relative;left:66.6666666667%}.el-col-md-17{display:block;max-width:70.8333333333%;flex:0 0 70.8333333333%}.el-col-md-offset-17{margin-left:70.8333333333%}.el-col-md-pull-17{position:relative;right:70.8333333333%}.el-col-md-push-17{position:relative;left:70.8333333333%}.el-col-md-18{display:block;max-width:75%;flex:0 0 75%}.el-col-md-offset-18{margin-left:75%}.el-col-md-pull-18{position:relative;right:75%}.el-col-md-push-18{position:relative;left:75%}.el-col-md-19{display:block;max-width:79.1666666667%;flex:0 0 79.1666666667%}.el-col-md-offset-19{margin-left:79.1666666667%}.el-col-md-pull-19{position:relative;right:79.1666666667%}.el-col-md-push-19{position:relative;left:79.1666666667%}.el-col-md-20{display:block;max-width:83.3333333333%;flex:0 0 83.3333333333%}.el-col-md-offset-20{margin-left:83.3333333333%}.el-col-md-pull-20{position:relative;right:83.3333333333%}.el-col-md-push-20{position:relative;left:83.3333333333%}.el-col-md-21{display:block;max-width:87.5%;flex:0 0 87.5%}.el-col-md-offset-21{margin-left:87.5%}.el-col-md-pull-21{position:relative;right:87.5%}.el-col-md-push-21{position:relative;left:87.5%}.el-col-md-22{display:block;max-width:91.6666666667%;flex:0 0 91.6666666667%}.el-col-md-offset-22{margin-left:91.6666666667%}.el-col-md-pull-22{position:relative;right:91.6666666667%}.el-col-md-push-22{position:relative;left:91.6666666667%}.el-col-md-23{display:block;max-width:95.8333333333%;flex:0 0 95.8333333333%}.el-col-md-offset-23{margin-left:95.8333333333%}.el-col-md-pull-23{position:relative;right:95.8333333333%}.el-col-md-push-23{position:relative;left:95.8333333333%}.el-col-md-24{display:block;max-width:100%;flex:0 0 100%}.el-col-md-offset-24{margin-left:100%}.el-col-md-pull-24{position:relative;right:100%}.el-col-md-push-24{position:relative;left:100%}}@media only screen and (min-width:1200px){.el-col-lg-0,.el-col-lg-0.is-guttered{display:none}.el-col-lg-0{max-width:0%;flex:0 0 0%}.el-col-lg-offset-0{margin-left:0}.el-col-lg-pull-0{position:relative;right:0}.el-col-lg-push-0{position:relative;left:0}.el-col-lg-1{display:block;max-width:4.1666666667%;flex:0 0 4.1666666667%}.el-col-lg-offset-1{margin-left:4.1666666667%}.el-col-lg-pull-1{position:relative;right:4.1666666667%}.el-col-lg-push-1{position:relative;left:4.1666666667%}.el-col-lg-2{display:block;max-width:8.3333333333%;flex:0 0 8.3333333333%}.el-col-lg-offset-2{margin-left:8.3333333333%}.el-col-lg-pull-2{position:relative;right:8.3333333333%}.el-col-lg-push-2{position:relative;left:8.3333333333%}.el-col-lg-3{display:block;max-width:12.5%;flex:0 0 12.5%}.el-col-lg-offset-3{margin-left:12.5%}.el-col-lg-pull-3{position:relative;right:12.5%}.el-col-lg-push-3{position:relative;left:12.5%}.el-col-lg-4{display:block;max-width:16.6666666667%;flex:0 0 16.6666666667%}.el-col-lg-offset-4{margin-left:16.6666666667%}.el-col-lg-pull-4{position:relative;right:16.6666666667%}.el-col-lg-push-4{position:relative;left:16.6666666667%}.el-col-lg-5{display:block;max-width:20.8333333333%;flex:0 0 20.8333333333%}.el-col-lg-offset-5{margin-left:20.8333333333%}.el-col-lg-pull-5{position:relative;right:20.8333333333%}.el-col-lg-push-5{position:relative;left:20.8333333333%}.el-col-lg-6{display:block;max-width:25%;flex:0 0 25%}.el-col-lg-offset-6{margin-left:25%}.el-col-lg-pull-6{position:relative;right:25%}.el-col-lg-push-6{position:relative;left:25%}.el-col-lg-7{display:block;max-width:29.1666666667%;flex:0 0 29.1666666667%}.el-col-lg-offset-7{margin-left:29.1666666667%}.el-col-lg-pull-7{position:relative;right:29.1666666667%}.el-col-lg-push-7{position:relative;left:29.1666666667%}.el-col-lg-8{display:block;max-width:33.3333333333%;flex:0 0 33.3333333333%}.el-col-lg-offset-8{margin-left:33.3333333333%}.el-col-lg-pull-8{position:relative;right:33.3333333333%}.el-col-lg-push-8{position:relative;left:33.3333333333%}.el-col-lg-9{display:block;max-width:37.5%;flex:0 0 37.5%}.el-col-lg-offset-9{margin-left:37.5%}.el-col-lg-pull-9{position:relative;right:37.5%}.el-col-lg-push-9{position:relative;left:37.5%}.el-col-lg-10{display:block;max-width:41.6666666667%;flex:0 0 41.6666666667%}.el-col-lg-offset-10{margin-left:41.6666666667%}.el-col-lg-pull-10{position:relative;right:41.6666666667%}.el-col-lg-push-10{position:relative;left:41.6666666667%}.el-col-lg-11{display:block;max-width:45.8333333333%;flex:0 0 45.8333333333%}.el-col-lg-offset-11{margin-left:45.8333333333%}.el-col-lg-pull-11{position:relative;right:45.8333333333%}.el-col-lg-push-11{position:relative;left:45.8333333333%}.el-col-lg-12{display:block;max-width:50%;flex:0 0 50%}.el-col-lg-offset-12{margin-left:50%}.el-col-lg-pull-12{position:relative;right:50%}.el-col-lg-push-12{position:relative;left:50%}.el-col-lg-13{display:block;max-width:54.1666666667%;flex:0 0 54.1666666667%}.el-col-lg-offset-13{margin-left:54.1666666667%}.el-col-lg-pull-13{position:relative;right:54.1666666667%}.el-col-lg-push-13{position:relative;left:54.1666666667%}.el-col-lg-14{display:block;max-width:58.3333333333%;flex:0 0 58.3333333333%}.el-col-lg-offset-14{margin-left:58.3333333333%}.el-col-lg-pull-14{position:relative;right:58.3333333333%}.el-col-lg-push-14{position:relative;left:58.3333333333%}.el-col-lg-15{display:block;max-width:62.5%;flex:0 0 62.5%}.el-col-lg-offset-15{margin-left:62.5%}.el-col-lg-pull-15{position:relative;right:62.5%}.el-col-lg-push-15{position:relative;left:62.5%}.el-col-lg-16{display:block;max-width:66.6666666667%;flex:0 0 66.6666666667%}.el-col-lg-offset-16{margin-left:66.6666666667%}.el-col-lg-pull-16{position:relative;right:66.6666666667%}.el-col-lg-push-16{position:relative;left:66.6666666667%}.el-col-lg-17{display:block;max-width:70.8333333333%;flex:0 0 70.8333333333%}.el-col-lg-offset-17{margin-left:70.8333333333%}.el-col-lg-pull-17{position:relative;right:70.8333333333%}.el-col-lg-push-17{position:relative;left:70.8333333333%}.el-col-lg-18{display:block;max-width:75%;flex:0 0 75%}.el-col-lg-offset-18{margin-left:75%}.el-col-lg-pull-18{position:relative;right:75%}.el-col-lg-push-18{position:relative;left:75%}.el-col-lg-19{display:block;max-width:79.1666666667%;flex:0 0 79.1666666667%}.el-col-lg-offset-19{margin-left:79.1666666667%}.el-col-lg-pull-19{position:relative;right:79.1666666667%}.el-col-lg-push-19{position:relative;left:79.1666666667%}.el-col-lg-20{display:block;max-width:83.3333333333%;flex:0 0 83.3333333333%}.el-col-lg-offset-20{margin-left:83.3333333333%}.el-col-lg-pull-20{position:relative;right:83.3333333333%}.el-col-lg-push-20{position:relative;left:83.3333333333%}.el-col-lg-21{display:block;max-width:87.5%;flex:0 0 87.5%}.el-col-lg-offset-21{margin-left:87.5%}.el-col-lg-pull-21{position:relative;right:87.5%}.el-col-lg-push-21{position:relative;left:87.5%}.el-col-lg-22{display:block;max-width:91.6666666667%;flex:0 0 91.6666666667%}.el-col-lg-offset-22{margin-left:91.6666666667%}.el-col-lg-pull-22{position:relative;right:91.6666666667%}.el-col-lg-push-22{position:relative;left:91.6666666667%}.el-col-lg-23{display:block;max-width:95.8333333333%;flex:0 0 95.8333333333%}.el-col-lg-offset-23{margin-left:95.8333333333%}.el-col-lg-pull-23{position:relative;right:95.8333333333%}.el-col-lg-push-23{position:relative;left:95.8333333333%}.el-col-lg-24{display:block;max-width:100%;flex:0 0 100%}.el-col-lg-offset-24{margin-left:100%}.el-col-lg-pull-24{position:relative;right:100%}.el-col-lg-push-24{position:relative;left:100%}}@media only screen and (min-width:1920px){.el-col-xl-0,.el-col-xl-0.is-guttered{display:none}.el-col-xl-0{max-width:0%;flex:0 0 0%}.el-col-xl-offset-0{margin-left:0}.el-col-xl-pull-0{position:relative;right:0}.el-col-xl-push-0{position:relative;left:0}.el-col-xl-1{display:block;max-width:4.1666666667%;flex:0 0 4.1666666667%}.el-col-xl-offset-1{margin-left:4.1666666667%}.el-col-xl-pull-1{position:relative;right:4.1666666667%}.el-col-xl-push-1{position:relative;left:4.1666666667%}.el-col-xl-2{display:block;max-width:8.3333333333%;flex:0 0 8.3333333333%}.el-col-xl-offset-2{margin-left:8.3333333333%}.el-col-xl-pull-2{position:relative;right:8.3333333333%}.el-col-xl-push-2{position:relative;left:8.3333333333%}.el-col-xl-3{display:block;max-width:12.5%;flex:0 0 12.5%}.el-col-xl-offset-3{margin-left:12.5%}.el-col-xl-pull-3{position:relative;right:12.5%}.el-col-xl-push-3{position:relative;left:12.5%}.el-col-xl-4{display:block;max-width:16.6666666667%;flex:0 0 16.6666666667%}.el-col-xl-offset-4{margin-left:16.6666666667%}.el-col-xl-pull-4{position:relative;right:16.6666666667%}.el-col-xl-push-4{position:relative;left:16.6666666667%}.el-col-xl-5{display:block;max-width:20.8333333333%;flex:0 0 20.8333333333%}.el-col-xl-offset-5{margin-left:20.8333333333%}.el-col-xl-pull-5{position:relative;right:20.8333333333%}.el-col-xl-push-5{position:relative;left:20.8333333333%}.el-col-xl-6{display:block;max-width:25%;flex:0 0 25%}.el-col-xl-offset-6{margin-left:25%}.el-col-xl-pull-6{position:relative;right:25%}.el-col-xl-push-6{position:relative;left:25%}.el-col-xl-7{display:block;max-width:29.1666666667%;flex:0 0 29.1666666667%}.el-col-xl-offset-7{margin-left:29.1666666667%}.el-col-xl-pull-7{position:relative;right:29.1666666667%}.el-col-xl-push-7{position:relative;left:29.1666666667%}.el-col-xl-8{display:block;max-width:33.3333333333%;flex:0 0 33.3333333333%}.el-col-xl-offset-8{margin-left:33.3333333333%}.el-col-xl-pull-8{position:relative;right:33.3333333333%}.el-col-xl-push-8{position:relative;left:33.3333333333%}.el-col-xl-9{display:block;max-width:37.5%;flex:0 0 37.5%}.el-col-xl-offset-9{margin-left:37.5%}.el-col-xl-pull-9{position:relative;right:37.5%}.el-col-xl-push-9{position:relative;left:37.5%}.el-col-xl-10{display:block;max-width:41.6666666667%;flex:0 0 41.6666666667%}.el-col-xl-offset-10{margin-left:41.6666666667%}.el-col-xl-pull-10{position:relative;right:41.6666666667%}.el-col-xl-push-10{position:relative;left:41.6666666667%}.el-col-xl-11{display:block;max-width:45.8333333333%;flex:0 0 45.8333333333%}.el-col-xl-offset-11{margin-left:45.8333333333%}.el-col-xl-pull-11{position:relative;right:45.8333333333%}.el-col-xl-push-11{position:relative;left:45.8333333333%}.el-col-xl-12{display:block;max-width:50%;flex:0 0 50%}.el-col-xl-offset-12{margin-left:50%}.el-col-xl-pull-12{position:relative;right:50%}.el-col-xl-push-12{position:relative;left:50%}.el-col-xl-13{display:block;max-width:54.1666666667%;flex:0 0 54.1666666667%}.el-col-xl-offset-13{margin-left:54.1666666667%}.el-col-xl-pull-13{position:relative;right:54.1666666667%}.el-col-xl-push-13{position:relative;left:54.1666666667%}.el-col-xl-14{display:block;max-width:58.3333333333%;flex:0 0 58.3333333333%}.el-col-xl-offset-14{margin-left:58.3333333333%}.el-col-xl-pull-14{position:relative;right:58.3333333333%}.el-col-xl-push-14{position:relative;left:58.3333333333%}.el-col-xl-15{display:block;max-width:62.5%;flex:0 0 62.5%}.el-col-xl-offset-15{margin-left:62.5%}.el-col-xl-pull-15{position:relative;right:62.5%}.el-col-xl-push-15{position:relative;left:62.5%}.el-col-xl-16{display:block;max-width:66.6666666667%;flex:0 0 66.6666666667%}.el-col-xl-offset-16{margin-left:66.6666666667%}.el-col-xl-pull-16{position:relative;right:66.6666666667%}.el-col-xl-push-16{position:relative;left:66.6666666667%}.el-col-xl-17{display:block;max-width:70.8333333333%;flex:0 0 70.8333333333%}.el-col-xl-offset-17{margin-left:70.8333333333%}.el-col-xl-pull-17{position:relative;right:70.8333333333%}.el-col-xl-push-17{position:relative;left:70.8333333333%}.el-col-xl-18{display:block;max-width:75%;flex:0 0 75%}.el-col-xl-offset-18{margin-left:75%}.el-col-xl-pull-18{position:relative;right:75%}.el-col-xl-push-18{position:relative;left:75%}.el-col-xl-19{display:block;max-width:79.1666666667%;flex:0 0 79.1666666667%}.el-col-xl-offset-19{margin-left:79.1666666667%}.el-col-xl-pull-19{position:relative;right:79.1666666667%}.el-col-xl-push-19{position:relative;left:79.1666666667%}.el-col-xl-20{display:block;max-width:83.3333333333%;flex:0 0 83.3333333333%}.el-col-xl-offset-20{margin-left:83.3333333333%}.el-col-xl-pull-20{position:relative;right:83.3333333333%}.el-col-xl-push-20{position:relative;left:83.3333333333%}.el-col-xl-21{display:block;max-width:87.5%;flex:0 0 87.5%}.el-col-xl-offset-21{margin-left:87.5%}.el-col-xl-pull-21{position:relative;right:87.5%}.el-col-xl-push-21{position:relative;left:87.5%}.el-col-xl-22{display:block;max-width:91.6666666667%;flex:0 0 91.6666666667%}.el-col-xl-offset-22{margin-left:91.6666666667%}.el-col-xl-pull-22{position:relative;right:91.6666666667%}.el-col-xl-push-22{position:relative;left:91.6666666667%}.el-col-xl-23{display:block;max-width:95.8333333333%;flex:0 0 95.8333333333%}.el-col-xl-offset-23{margin-left:95.8333333333%}.el-col-xl-pull-23{position:relative;right:95.8333333333%}.el-col-xl-push-23{position:relative;left:95.8333333333%}.el-col-xl-24{display:block;max-width:100%;flex:0 0 100%}.el-col-xl-offset-24{margin-left:100%}.el-col-xl-pull-24{position:relative;right:100%}.el-col-xl-push-24{position:relative;left:100%}}.el-collapse{--el-collapse-border-color:var(--el-border-color-lighter);--el-collapse-header-height:48px;--el-collapse-header-bg-color:var(--el-fill-color-blank);--el-collapse-header-text-color:var(--el-text-color-primary);--el-collapse-header-font-size:13px;--el-collapse-content-bg-color:var(--el-fill-color-blank);--el-collapse-content-font-size:13px;--el-collapse-content-text-color:var(--el-text-color-primary);border-top:1px solid var(--el-collapse-border-color);border-bottom:1px solid var(--el-collapse-border-color)}.el-collapse-item.is-disabled .el-collapse-item__header{color:var(--el-text-color-disabled);cursor:not-allowed}.el-collapse-item__header{display:flex;align-items:center;height:var(--el-collapse-header-height);line-height:var(--el-collapse-header-height);background-color:var(--el-collapse-header-bg-color);color:var(--el-collapse-header-text-color);cursor:pointer;border-bottom:1px solid var(--el-collapse-border-color);font-size:var(--el-collapse-header-font-size);font-weight:500;transition:border-bottom-color var(--el-transition-duration);outline:0}.el-collapse-item__arrow{margin:0 8px 0 auto;transition:transform var(--el-transition-duration);font-weight:300}.el-collapse-item__arrow.is-active{transform:rotate(90deg)}.el-collapse-item__header.focusing:focus:not(:hover){color:var(--el-color-primary)}.el-collapse-item__header.is-active{border-bottom-color:transparent}.el-collapse-item__wrap{will-change:height;background-color:var(--el-collapse-content-bg-color);overflow:hidden;box-sizing:border-box;border-bottom:1px solid var(--el-collapse-border-color)}.el-collapse-item__content{padding-bottom:25px;font-size:var(--el-collapse-content-font-size);color:var(--el-collapse-content-text-color);line-height:1.7692307692}.el-collapse-item:last-child{margin-bottom:-1px}.el-color-predefine{display:flex;font-size:12px;margin-top:8px;width:280px}.el-color-predefine__colors{display:flex;flex:1;flex-wrap:wrap}.el-color-predefine__color-selector{margin:0 0 8px 8px;width:20px;height:20px;border-radius:4px;cursor:pointer}.el-color-predefine__color-selector:nth-child(10n+1){margin-left:0}.el-color-predefine__color-selector.selected{box-shadow:0 0 3px 2px var(--el-color-primary)}.el-color-predefine__color-selector>div{display:flex;height:100%;border-radius:3px}.el-color-predefine__color-selector.is-alpha{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAGUlEQVQYV2M4gwH+YwCGIasIUwhT25BVBADtzYNYrHvv4gAAAABJRU5ErkJggg==)}.el-color-hue-slider{position:relative;box-sizing:border-box;width:280px;height:12px;background-color:red;padding:0 2px;float:right}.el-color-hue-slider__bar{position:relative;background:linear-gradient(to right,red 0,#ff0 17%,#0f0 33%,#0ff 50%,#00f 67%,#f0f 83%,red 100%);height:100%}.el-color-hue-slider__thumb{position:absolute;cursor:pointer;box-sizing:border-box;left:0;top:0;width:4px;height:100%;border-radius:1px;background:#fff;border:1px solid var(--el-border-color-lighter);box-shadow:0 0 2px #0009;z-index:1}.el-color-hue-slider.is-vertical{width:12px;height:180px;padding:2px 0}.el-color-hue-slider.is-vertical .el-color-hue-slider__bar{background:linear-gradient(to right,red 0,#ff0 17%,#0f0 33%,#0ff 50%,#00f 67%,#f0f 83%,red 100%)}.el-color-hue-slider.is-vertical .el-color-hue-slider__thumb{left:0;top:0;width:100%;height:4px}.el-color-svpanel{position:relative;width:280px;height:180px}.el-color-svpanel__black,.el-color-svpanel__white{position:absolute;top:0;left:0;right:0;bottom:0}.el-color-svpanel__white{background:linear-gradient(to right,#fff,rgba(255,255,255,0))}.el-color-svpanel__black{background:linear-gradient(to top,#000,rgba(0,0,0,0))}.el-color-svpanel__cursor{position:absolute}.el-color-svpanel__cursor>div{cursor:head;width:4px;height:4px;box-shadow:0 0 0 1.5px #fff,inset 0 0 1px 1px #0000004d,0 0 1px 2px #0006;border-radius:50%;transform:translate(-2px,-2px)}.el-color-alpha-slider{position:relative;box-sizing:border-box;width:280px;height:12px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAGUlEQVQYV2M4gwH+YwCGIasIUwhT25BVBADtzYNYrHvv4gAAAABJRU5ErkJggg==)}.el-color-alpha-slider__bar{position:relative;background:linear-gradient(to right,rgba(255,255,255,0) 0,#fff 100%);height:100%}.el-color-alpha-slider__thumb{position:absolute;cursor:pointer;box-sizing:border-box;left:0;top:0;width:4px;height:100%;border-radius:1px;background:#fff;border:1px solid var(--el-border-color-lighter);box-shadow:0 0 2px #0009;z-index:1}.el-color-alpha-slider.is-vertical{width:20px;height:180px}.el-color-alpha-slider.is-vertical .el-color-alpha-slider__bar{background:linear-gradient(to bottom,rgba(255,255,255,0) 0,#fff 100%)}.el-color-alpha-slider.is-vertical .el-color-alpha-slider__thumb{left:0;top:0;width:100%;height:4px}.el-color-dropdown{width:300px}.el-color-dropdown__main-wrapper{margin-bottom:6px}.el-color-dropdown__main-wrapper:after{content:"";display:table;clear:both}.el-color-dropdown__btns{margin-top:12px;text-align:right}.el-color-dropdown__value{float:left;line-height:26px;font-size:12px;color:#000;width:160px}.el-color-picker{display:inline-block;position:relative;line-height:normal}.el-color-picker.is-disabled .el-color-picker__trigger{cursor:not-allowed}.el-color-picker--large{height:40px}.el-color-picker--large .el-color-picker__trigger{height:40px;width:40px}.el-color-picker--large .el-color-picker__mask{height:38px;width:38px}.el-color-picker--small{height:24px}.el-color-picker--small .el-color-picker__trigger{height:24px;width:24px}.el-color-picker--small .el-color-picker__mask{height:22px;width:22px}.el-color-picker--small .el-color-picker__empty,.el-color-picker--small .el-color-picker__icon{transform:scale(.8)}.el-color-picker__mask{height:38px;width:38px;border-radius:4px;position:absolute;top:1px;left:1px;z-index:1;cursor:not-allowed;background-color:#ffffffb3}.el-color-picker__trigger{display:inline-flex;justify-content:center;align-items:center;box-sizing:border-box;height:32px;width:32px;padding:4px;border:1px solid var(--el-border-color);border-radius:4px;font-size:0;position:relative;cursor:pointer}.el-color-picker__color{position:relative;display:block;box-sizing:border-box;border:1px solid var(--el-text-color-secondary);border-radius:var(--el-border-radius-small);width:100%;height:100%;text-align:center}.el-color-picker__color.is-alpha{background-image:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAwAAAAMCAIAAADZF8uwAAAAGUlEQVQYV2M4gwH+YwCGIasIUwhT25BVBADtzYNYrHvv4gAAAABJRU5ErkJggg==)}.el-color-picker__color-inner{display:inline-flex;justify-content:center;align-items:center;width:100%;height:100%}.el-color-picker .el-color-picker__empty{font-size:12px;color:var(--el-text-color-secondary)}.el-color-picker .el-color-picker__icon{display:inline-flex;justify-content:center;align-items:center;color:#fff;font-size:12px}.el-color-picker__panel{position:absolute;z-index:10;padding:6px;box-sizing:content-box;background-color:#fff;border-radius:var(--el-border-radius-base);box-shadow:var(--el-box-shadow-light)}.el-color-picker__panel.el-popper{border:1px solid var(--el-border-color-lighter)}.el-container{display:flex;flex-direction:row;flex:1;flex-basis:auto;box-sizing:border-box;min-width:0}.el-container.is-vertical{flex-direction:column}.el-date-table{font-size:12px;-webkit-user-select:none;user-select:none}.el-date-table.is-week-mode .el-date-table__row:hover .el-date-table-cell{background-color:var(--el-datepicker-inrange-bg-color)}.el-date-table.is-week-mode .el-date-table__row:hover td.available:hover{color:var(--el-datepicker-text-color)}.el-date-table.is-week-mode .el-date-table__row:hover td:first-child .el-date-table-cell{margin-left:5px;border-top-left-radius:15px;border-bottom-left-radius:15px}.el-date-table.is-week-mode .el-date-table__row:hover td:last-child .el-date-table-cell{margin-right:5px;border-top-right-radius:15px;border-bottom-right-radius:15px}.el-date-table.is-week-mode .el-date-table__row.current .el-date-table-cell{background-color:var(--el-datepicker-inrange-bg-color)}.el-date-table td{width:32px;height:30px;padding:4px 0;box-sizing:border-box;text-align:center;cursor:pointer;position:relative}.el-date-table td .el-date-table-cell{height:30px;padding:3px 0;box-sizing:border-box}.el-date-table td .el-date-table-cell .el-date-table-cell__text{width:24px;height:24px;display:block;margin:0 auto;line-height:24px;position:absolute;left:50%;transform:translate(-50%);border-radius:50%}.el-date-table td.next-month,.el-date-table td.prev-month{color:var(--el-datepicker-off-text-color)}.el-date-table td.today{position:relative}.el-date-table td.today .el-date-table-cell__text{color:var(--el-color-primary);font-weight:700}.el-date-table td.today.end-date .el-date-table-cell__text,.el-date-table td.today.start-date .el-date-table-cell__text{color:#fff}.el-date-table td.available:hover{color:var(--el-datepicker-hover-text-color)}.el-date-table td.in-range .el-date-table-cell{background-color:var(--el-datepicker-inrange-bg-color)}.el-date-table td.in-range .el-date-table-cell:hover{background-color:var(--el-datepicker-inrange-hover-bg-color)}.el-date-table td.current:not(.disabled) .el-date-table-cell__text{color:#fff;background-color:var(--el-datepicker-active-color)}.el-date-table td.end-date .el-date-table-cell,.el-date-table td.start-date .el-date-table-cell{color:#fff}.el-date-table td.end-date .el-date-table-cell__text,.el-date-table td.start-date .el-date-table-cell__text{background-color:var(--el-datepicker-active-color)}.el-date-table td.start-date .el-date-table-cell{margin-left:5px;border-top-left-radius:15px;border-bottom-left-radius:15px}.el-date-table td.end-date .el-date-table-cell{margin-right:5px;border-top-right-radius:15px;border-bottom-right-radius:15px}.el-date-table td.disabled .el-date-table-cell{background-color:var(--el-fill-color-light);opacity:1;cursor:not-allowed;color:var(--el-text-color-placeholder)}.el-date-table td.selected .el-date-table-cell{margin-left:5px;margin-right:5px;background-color:var(--el-datepicker-inrange-bg-color);border-radius:15px}.el-date-table td.selected .el-date-table-cell:hover{background-color:var(--el-datepicker-inrange-hover-bg-color)}.el-date-table td.selected .el-date-table-cell__text{background-color:var(--el-datepicker-active-color);color:#fff;border-radius:15px}.el-date-table td.week{font-size:80%;color:var(--el-datepicker-header-text-color)}.el-date-table th{padding:5px;color:var(--el-datepicker-header-text-color);font-weight:400;border-bottom:solid 1px var(--el-border-color-lighter)}.el-month-table{font-size:12px;margin:-1px;border-collapse:collapse}.el-month-table td{text-align:center;padding:8px 0;cursor:pointer}.el-month-table td div{height:48px;padding:6px 0;box-sizing:border-box}.el-month-table td.today .cell{color:var(--el-color-primary);font-weight:700}.el-month-table td.today.end-date .cell,.el-month-table td.today.start-date .cell{color:#fff}.el-month-table td.disabled .cell{background-color:var(--el-fill-color-light);cursor:not-allowed;color:var(--el-text-color-placeholder)}.el-month-table td.disabled .cell:hover{color:var(--el-text-color-placeholder)}.el-month-table td .cell{width:60px;height:36px;display:block;line-height:36px;color:var(--el-datepicker-text-color);margin:0 auto;border-radius:18px}.el-month-table td .cell:hover{color:var(--el-datepicker-hover-text-color)}.el-month-table td.in-range div{background-color:var(--el-datepicker-inrange-bg-color)}.el-month-table td.in-range div:hover{background-color:var(--el-datepicker-inrange-hover-bg-color)}.el-month-table td.end-date div,.el-month-table td.start-date div{color:#fff}.el-month-table td.end-date .cell,.el-month-table td.start-date .cell{color:#fff;background-color:var(--el-datepicker-active-color)}.el-month-table td.start-date div{border-top-left-radius:24px;border-bottom-left-radius:24px}.el-month-table td.end-date div{border-top-right-radius:24px;border-bottom-right-radius:24px}.el-month-table td.current:not(.disabled) .cell{color:var(--el-datepicker-active-color)}.el-year-table{font-size:12px;margin:-1px;border-collapse:collapse}.el-year-table .el-icon{color:var(--el-datepicker-icon-color)}.el-year-table td{text-align:center;padding:20px 3px;cursor:pointer}.el-year-table td.today .cell{color:var(--el-color-primary);font-weight:700}.el-year-table td.disabled .cell{background-color:var(--el-fill-color-light);cursor:not-allowed;color:var(--el-text-color-placeholder)}.el-year-table td.disabled .cell:hover{color:var(--el-text-color-placeholder)}.el-year-table td .cell{width:48px;height:32px;display:block;line-height:32px;color:var(--el-datepicker-text-color);margin:0 auto}.el-year-table td .cell:hover{color:var(--el-datepicker-hover-text-color)}.el-year-table td.current:not(.disabled) .cell{color:var(--el-datepicker-active-color)}.el-time-spinner.has-seconds .el-time-spinner__wrapper{width:33.3%}.el-time-spinner__wrapper{max-height:192px;overflow:auto;display:inline-block;width:50%;vertical-align:top;position:relative}.el-time-spinner__wrapper.el-scrollbar__wrap:not(.el-scrollbar__wrap--hidden-default){padding-bottom:15px}.el-time-spinner__wrapper.is-arrow{box-sizing:border-box;text-align:center;overflow:hidden}.el-time-spinner__wrapper.is-arrow .el-time-spinner__list{transform:translateY(-32px)}.el-time-spinner__wrapper.is-arrow .el-time-spinner__item:hover:not(.is-disabled):not(.is-active){background:#fff;cursor:default}.el-time-spinner__arrow{font-size:12px;color:var(--el-text-color-secondary);position:absolute;left:0;width:100%;z-index:var(--el-index-normal);text-align:center;height:30px;line-height:30px;cursor:pointer}.el-time-spinner__arrow:hover{color:var(--el-color-primary)}.el-time-spinner__arrow.arrow-up{top:10px}.el-time-spinner__arrow.arrow-down{bottom:10px}.el-time-spinner__input.el-input{width:70%}.el-time-spinner__input.el-input .el-input__inner{padding:0;text-align:center}.el-time-spinner__list{padding:0;margin:0;list-style:none;text-align:center}.el-time-spinner__list:after,.el-time-spinner__list:before{content:"";display:block;width:100%;height:80px}.el-time-spinner__item{height:32px;line-height:32px;font-size:12px;color:var(--el-text-color-regular)}.el-time-spinner__item:hover:not(.is-disabled):not(.is-active){background:var(--el-fill-color-light);cursor:pointer}.el-time-spinner__item.is-active:not(.is-disabled){color:var(--el-text-color-primary);font-weight:700}.el-time-spinner__item.is-disabled{color:var(--el-text-color-placeholder);cursor:not-allowed}.el-picker__popper{--el-datepicker-border-color:var(--el-disabled-border-color)}.el-picker__popper.el-popper[role=tooltip]{background:var(--el-bg-color-overlay);border:1px solid var(--el-datepicker-border-color);box-shadow:var(--el-box-shadow-light)}.el-picker__popper.el-popper[role=tooltip] .el-popper__arrow:before{border:1px solid var(--el-datepicker-border-color)}.el-picker__popper.el-popper[role=tooltip][data-popper-placement^=top] .el-popper__arrow:before{border-top-color:transparent;border-left-color:transparent}.el-picker__popper.el-popper[role=tooltip][data-popper-placement^=bottom] .el-popper__arrow:before{border-bottom-color:transparent;border-right-color:transparent}.el-picker__popper.el-popper[role=tooltip][data-popper-placement^=left] .el-popper__arrow:before{border-left-color:transparent;border-bottom-color:transparent}.el-picker__popper.el-popper[role=tooltip][data-popper-placement^=right] .el-popper__arrow:before{border-right-color:transparent;border-top-color:transparent}.el-date-editor{--el-date-editor-width:220px;--el-date-editor-monthrange-width:300px;--el-date-editor-daterange-width:350px;--el-date-editor-datetimerange-width:400px;--el-input-text-color:var(--el-text-color-regular);--el-input-border:var(--el-border);--el-input-hover-border:var(--el-border-color-hover);--el-input-focus-border:var(--el-color-primary);--el-input-transparent-border:0 0 0 1px transparent inset;--el-input-border-color:var(--el-border-color);--el-input-border-radius:var(--el-border-radius-base);--el-input-bg-color:var(--el-fill-color-blank);--el-input-icon-color:var(--el-text-color-placeholder);--el-input-placeholder-color:var(--el-text-color-placeholder);--el-input-hover-border-color:var(--el-border-color-hover);--el-input-clear-hover-color:var(--el-text-color-secondary);--el-input-focus-border-color:var(--el-color-primary);position:relative;display:inline-block;text-align:left}.el-date-editor.el-input,.el-date-editor.el-input__inner{width:var(--el-date-editor-width)}.el-date-editor--monthrange.el-input,.el-date-editor--monthrange.el-input__inner{width:var(--el-date-editor-monthrange-width)}.el-date-editor--daterange.el-input,.el-date-editor--daterange.el-input__inner,.el-date-editor--timerange.el-input,.el-date-editor--timerange.el-input__inner{width:var(--el-date-editor-daterange-width)}.el-date-editor--datetimerange.el-input,.el-date-editor--datetimerange.el-input__inner{width:var(--el-date-editor-datetimerange-width)}.el-date-editor--dates .el-input__inner{text-overflow:ellipsis;white-space:nowrap}.el-date-editor .close-icon,.el-date-editor .clear-icon{cursor:pointer}.el-date-editor .clear-icon:hover{color:var(--el-text-color-secondary)}.el-date-editor .el-range__icon{height:inherit;font-size:14px;color:var(--el-text-color-placeholder);float:left}.el-date-editor .el-range__icon svg{vertical-align:middle}.el-date-editor .el-range-input{-webkit-appearance:none;appearance:none;border:none;outline:0;display:inline-block;height:100%;margin:0;padding:0;width:39%;text-align:center;font-size:var(--el-font-size-base);color:var(--el-text-color-regular);background-color:transparent}.el-date-editor .el-range-input::placeholder{color:var(--el-text-color-placeholder)}.el-date-editor .el-range-separator{flex:1;display:inline-flex;justify-content:center;align-items:center;height:100%;padding:0 5px;margin:0;font-size:14px;word-break:keep-all;color:var(--el-text-color-primary)}.el-date-editor .el-range__close-icon{font-size:14px;color:var(--el-text-color-placeholder);height:inherit;width:unset;cursor:pointer}.el-date-editor .el-range__close-icon:hover{color:var(--el-text-color-secondary)}.el-date-editor .el-range__close-icon svg{vertical-align:middle}.el-date-editor .el-range__close-icon--hidden{opacity:0;visibility:hidden}.el-range-editor.el-input__inner{display:inline-flex;align-items:center;padding:3px 10px}.el-range-editor .el-range-input{line-height:1}.el-range-editor.is-active,.el-range-editor.is-active:hover{box-shadow:0 0 0 1px var(--el-input-focus-border-color) inset}.el-range-editor--large{line-height:40px}.el-range-editor--large.el-input__inner{height:40px}.el-range-editor--large .el-range-separator{line-height:40px;font-size:14px}.el-range-editor--large .el-range-input{font-size:14px}.el-range-editor--small{line-height:24px}.el-range-editor--small.el-input__inner{height:24px}.el-range-editor--small .el-range-separator{line-height:24px;font-size:12px}.el-range-editor--small .el-range-input{font-size:12px}.el-range-editor.is-disabled{background-color:var(--el-disabled-bg-color);border-color:var(--el-disabled-border-color);color:var(--el-disabled-text-color);cursor:not-allowed}.el-range-editor.is-disabled:focus,.el-range-editor.is-disabled:hover{border-color:var(--el-disabled-border-color)}.el-range-editor.is-disabled input{background-color:var(--el-disabled-bg-color);color:var(--el-disabled-text-color);cursor:not-allowed}.el-range-editor.is-disabled input::placeholder{color:var(--el-text-color-placeholder)}.el-range-editor.is-disabled .el-range-separator{color:var(--el-disabled-text-color)}.el-picker-panel{color:var(--el-text-color-regular);background:var(--el-bg-color-overlay);border-radius:var(--el-border-radius-base);line-height:30px}.el-picker-panel .el-time-panel{margin:5px 0;border:solid 1px var(--el-datepicker-border-color);background-color:var(--el-color-white);box-shadow:var(--el-box-shadow-light)}.el-picker-panel__body-wrapper:after,.el-picker-panel__body:after{content:"";display:table;clear:both}.el-picker-panel__content{position:relative;margin:15px}.el-picker-panel__footer{border-top:1px solid var(--el-datepicker-inner-border-color);padding:4px 12px;text-align:right;background-color:var(--el-bg-color-overlay);position:relative;font-size:0}.el-picker-panel__shortcut{display:block;width:100%;border:0;background-color:transparent;line-height:28px;font-size:14px;color:var(--el-datepicker-text-color);padding-left:12px;text-align:left;outline:0;cursor:pointer}.el-picker-panel__shortcut:hover{color:var(--el-datepicker-hover-text-color)}.el-picker-panel__shortcut.active{background-color:#e6f1fe;color:var(--el-datepicker-active-color)}.el-picker-panel__btn{border:1px solid var(--el-fill-color-darker);color:var(--el-text-color-primary);line-height:24px;border-radius:2px;padding:0 20px;cursor:pointer;background-color:transparent;outline:0;font-size:12px}.el-picker-panel__btn[disabled]{color:var(--el-text-color-disabled);cursor:not-allowed}.el-picker-panel__icon-btn{font-size:12px;color:var(--el-datepicker-icon-color);border:0;background:0 0;cursor:pointer;outline:0;margin-top:8px}.el-picker-panel__icon-btn:hover{color:var(--el-datepicker-hover-text-color)}.el-picker-panel__icon-btn.is-disabled{color:var(--el-text-color-disabled)}.el-picker-panel__icon-btn.is-disabled:hover{cursor:not-allowed}.el-picker-panel__icon-btn .el-icon{cursor:pointer;font-size:inherit}.el-picker-panel__link-btn{vertical-align:middle}.el-picker-panel [slot=sidebar],.el-picker-panel__sidebar{position:absolute;top:0;bottom:0;width:110px;border-right:1px solid var(--el-datepicker-inner-border-color);box-sizing:border-box;padding-top:6px;background-color:var(--el-bg-color-overlay);overflow:auto}.el-picker-panel [slot=sidebar]+.el-picker-panel__body,.el-picker-panel__sidebar+.el-picker-panel__body{margin-left:110px}.el-date-picker{--el-datepicker-text-color:var(--el-text-color-regular);--el-datepicker-off-text-color:var(--el-text-color-placeholder);--el-datepicker-header-text-color:var(--el-text-color-regular);--el-datepicker-icon-color:var(--el-text-color-primary);--el-datepicker-border-color:var(--el-disabled-border-color);--el-datepicker-inner-border-color:var(--el-border-color-light);--el-datepicker-inrange-bg-color:var(--el-border-color-extra-light);--el-datepicker-inrange-hover-bg-color:var(--el-border-color-extra-light);--el-datepicker-active-color:var(--el-color-primary);--el-datepicker-hover-text-color:var(--el-color-primary)}.el-date-picker{width:322px}.el-date-picker.has-sidebar.has-time{width:434px}.el-date-picker.has-sidebar{width:438px}.el-date-picker.has-time .el-picker-panel__body-wrapper{position:relative}.el-date-picker .el-picker-panel__content{width:292px}.el-date-picker table{table-layout:fixed;width:100%}.el-date-picker__editor-wrap{position:relative;display:table-cell;padding:0 5px}.el-date-picker__time-header{position:relative;border-bottom:1px solid var(--el-datepicker-inner-border-color);font-size:12px;padding:8px 5px 5px;display:table;width:100%;box-sizing:border-box}.el-date-picker__header{margin:12px;text-align:center}.el-date-picker__header--bordered{margin-bottom:0;padding-bottom:12px;border-bottom:solid 1px var(--el-border-color-lighter)}.el-date-picker__header--bordered+.el-picker-panel__content{margin-top:0}.el-date-picker__header-label{font-size:16px;font-weight:500;padding:0 5px;line-height:22px;text-align:center;cursor:pointer;color:var(--el-text-color-regular)}.el-date-picker__header-label:hover{color:var(--el-datepicker-hover-text-color)}.el-date-picker__header-label.active{color:var(--el-datepicker-active-color)}.el-date-picker__prev-btn{float:left}.el-date-picker__next-btn{float:right}.el-date-picker__time-wrap{padding:10px;text-align:center}.el-date-picker__time-label{float:left;cursor:pointer;line-height:30px;margin-left:10px}.el-date-picker .el-time-panel{position:absolute}.el-date-range-picker{--el-datepicker-text-color:var(--el-text-color-regular);--el-datepicker-off-text-color:var(--el-text-color-placeholder);--el-datepicker-header-text-color:var(--el-text-color-regular);--el-datepicker-icon-color:var(--el-text-color-primary);--el-datepicker-border-color:var(--el-disabled-border-color);--el-datepicker-inner-border-color:var(--el-border-color-light);--el-datepicker-inrange-bg-color:var(--el-border-color-extra-light);--el-datepicker-inrange-hover-bg-color:var(--el-border-color-extra-light);--el-datepicker-active-color:var(--el-color-primary);--el-datepicker-hover-text-color:var(--el-color-primary)}.el-date-range-picker{width:646px}.el-date-range-picker.has-sidebar{width:756px}.el-date-range-picker table{table-layout:fixed;width:100%}.el-date-range-picker .el-picker-panel__body{min-width:513px}.el-date-range-picker .el-picker-panel__content{margin:0}.el-date-range-picker__header{position:relative;text-align:center;height:28px}.el-date-range-picker__header [class*=arrow-left]{float:left}.el-date-range-picker__header [class*=arrow-right]{float:right}.el-date-range-picker__header div{font-size:16px;font-weight:500;margin-right:50px}.el-date-range-picker__content{float:left;width:50%;box-sizing:border-box;margin:0;padding:16px}.el-date-range-picker__content.is-left{border-right:1px solid var(--el-datepicker-inner-border-color)}.el-date-range-picker__content .el-date-range-picker__header div{margin-left:50px;margin-right:50px}.el-date-range-picker__editors-wrap{box-sizing:border-box;display:table-cell}.el-date-range-picker__editors-wrap.is-right{text-align:right}.el-date-range-picker__time-header{position:relative;border-bottom:1px solid var(--el-datepicker-inner-border-color);font-size:12px;padding:8px 5px 5px;display:table;width:100%;box-sizing:border-box}.el-date-range-picker__time-header>.el-icon-arrow-right{font-size:20px;vertical-align:middle;display:table-cell;color:var(--el-datepicker-icon-color)}.el-date-range-picker__time-picker-wrap{position:relative;display:table-cell;padding:0 5px}.el-date-range-picker__time-picker-wrap .el-picker-panel{position:absolute;top:13px;right:0;z-index:1;background:#fff}.el-date-range-picker__time-picker-wrap .el-time-panel{position:absolute}.el-time-range-picker{width:354px;overflow:visible}.el-time-range-picker__content{position:relative;text-align:center;padding:10px;z-index:1}.el-time-range-picker__cell{box-sizing:border-box;margin:0;padding:4px 7px 7px;width:50%;display:inline-block}.el-time-range-picker__header{margin-bottom:5px;text-align:center;font-size:14px}.el-time-range-picker__body{border-radius:2px;border:1px solid var(--el-datepicker-border-color)}.el-time-panel{border-radius:2px;position:relative;width:180px;left:0;z-index:var(--el-index-top);-webkit-user-select:none;user-select:none;box-sizing:content-box}.el-time-panel__content{font-size:0;position:relative;overflow:hidden}.el-time-panel__content:after,.el-time-panel__content:before{content:"";top:50%;position:absolute;margin-top:-16px;height:32px;z-index:-1;left:0;right:0;box-sizing:border-box;padding-top:6px;text-align:left;border-top:1px solid var(--el-border-color-light);border-bottom:1px solid var(--el-border-color-light)}.el-time-panel__content:after{left:50%;margin-left:12%;margin-right:12%}.el-time-panel__content:before{padding-left:50%;margin-right:12%;margin-left:12%}.el-time-panel__content.has-seconds:after{left:66.6666666667%}.el-time-panel__content.has-seconds:before{padding-left:33.3333333333%}.el-time-panel__footer{border-top:1px solid var(--el-timepicker-inner-border-color,var(--el-border-color-light));padding:4px;height:36px;line-height:25px;text-align:right;box-sizing:border-box}.el-time-panel__btn{border:none;line-height:28px;padding:0 5px;margin:0 5px;cursor:pointer;background-color:transparent;outline:0;font-size:12px;color:var(--el-text-color-primary)}.el-time-panel__btn.confirm{font-weight:800;color:var(--el-timepicker-active-color,var(--el-color-primary))}.el-descriptions{--el-descriptions-table-border:1px solid var(--el-border-color-lighter);--el-descriptions-item-bordered-label-background:var(--el-fill-color-light);box-sizing:border-box;font-size:var(--el-font-size-base);color:var(--el-text-color-primary)}.el-descriptions__header{display:flex;justify-content:space-between;align-items:center;margin-bottom:16px}.el-descriptions__title{color:var(--el-text-color-primary);font-size:16px;font-weight:700}.el-descriptions__body{background-color:var(--el-fill-color-blank)}.el-descriptions__body .el-descriptions__table{border-collapse:collapse;width:100%}.el-descriptions__body .el-descriptions__table .el-descriptions__cell{box-sizing:border-box;text-align:left;font-weight:400;line-height:23px;font-size:14px}.el-descriptions__body .el-descriptions__table .el-descriptions__cell.is-left{text-align:left}.el-descriptions__body .el-descriptions__table .el-descriptions__cell.is-center{text-align:center}.el-descriptions__body .el-descriptions__table .el-descriptions__cell.is-right{text-align:right}.el-descriptions__body .el-descriptions__table.is-bordered .el-descriptions__cell{border:var(--el-descriptions-table-border);padding:8px 11px}.el-descriptions__body .el-descriptions__table:not(.is-bordered) .el-descriptions__cell{padding-bottom:12px}.el-descriptions--large{font-size:14px}.el-descriptions--large .el-descriptions__header{margin-bottom:20px}.el-descriptions--large .el-descriptions__header .el-descriptions__title{font-size:16px}.el-descriptions--large .el-descriptions__body .el-descriptions__table .el-descriptions__cell{font-size:14px}.el-descriptions--large .el-descriptions__body .el-descriptions__table.is-bordered .el-descriptions__cell{padding:12px 15px}.el-descriptions--large .el-descriptions__body .el-descriptions__table:not(.is-bordered) .el-descriptions__cell{padding-bottom:16px}.el-descriptions--small{font-size:12px}.el-descriptions--small .el-descriptions__header{margin-bottom:12px}.el-descriptions--small .el-descriptions__header .el-descriptions__title{font-size:14px}.el-descriptions--small .el-descriptions__body .el-descriptions__table .el-descriptions__cell{font-size:12px}.el-descriptions--small .el-descriptions__body .el-descriptions__table.is-bordered .el-descriptions__cell{padding:4px 7px}.el-descriptions--small .el-descriptions__body .el-descriptions__table:not(.is-bordered) .el-descriptions__cell{padding-bottom:8px}.el-descriptions__label.el-descriptions__cell.is-bordered-label{font-weight:700;color:var(--el-text-color-regular);background:var(--el-descriptions-item-bordered-label-background)}.el-descriptions__label:not(.is-bordered-label){color:var(--el-text-color-primary);margin-right:16px}.el-descriptions__label.el-descriptions__cell:not(.is-bordered-label).is-vertical-label{padding-bottom:6px}.el-descriptions__content.el-descriptions__cell.is-bordered-content{color:var(--el-text-color-primary)}.el-descriptions__content:not(.is-bordered-label){color:var(--el-text-color-regular)}.el-descriptions--large .el-descriptions__label:not(.is-bordered-label){margin-right:16px}.el-descriptions--large .el-descriptions__label.el-descriptions__cell:not(.is-bordered-label).is-vertical-label{padding-bottom:8px}.el-descriptions--small .el-descriptions__label:not(.is-bordered-label){margin-right:12px}.el-descriptions--small .el-descriptions__label.el-descriptions__cell:not(.is-bordered-label).is-vertical-label{padding-bottom:4px}:root{--el-popup-modal-bg-color:var(--el-color-black);--el-popup-modal-opacity:.5}.v-modal-enter{animation:v-modal-in var(--el-transition-duration-fast) ease}.v-modal-leave{animation:v-modal-out var(--el-transition-duration-fast) ease forwards}@keyframes v-modal-in{0%{opacity:0}}@keyframes v-modal-out{to{opacity:0}}.v-modal{position:fixed;left:0;top:0;width:100%;height:100%;opacity:var(--el-popup-modal-opacity);background:var(--el-popup-modal-bg-color)}.el-popup-parent--hidden{overflow:hidden}.el-dialog{--el-dialog-width:50%;--el-dialog-margin-top:15vh;--el-dialog-bg-color:var(--el-bg-color);--el-dialog-box-shadow:var(--el-box-shadow);--el-dialog-title-font-size:var(--el-font-size-large);--el-dialog-content-font-size:14px;--el-dialog-font-line-height:var(--el-font-line-height-primary);--el-dialog-padding-primary:20px;--el-dialog-border-radius:var(--el-border-radius-small);position:relative;margin:var(--el-dialog-margin-top,15vh) auto 50px;background:var(--el-dialog-bg-color);border-radius:var(--el-dialog-border-radius);box-shadow:var(--el-dialog-box-shadow);box-sizing:border-box;width:var(--el-dialog-width,50%)}.el-dialog.is-fullscreen{--el-dialog-width:100%;--el-dialog-margin-top:0;margin-bottom:0;height:100%;overflow:auto}.el-dialog__wrapper{position:fixed;top:0;right:0;bottom:0;left:0;overflow:auto;margin:0}.el-dialog.is-draggable .el-dialog__header{cursor:move;-webkit-user-select:none;user-select:none}.el-dialog__header{padding:var(--el-dialog-padding-primary);padding-bottom:10px;margin-right:16px;word-break:break-all}.el-dialog__headerbtn{position:absolute;top:6px;right:0;padding:0;width:54px;height:54px;background:0 0;border:none;outline:0;cursor:pointer;font-size:var(--el-message-close-size,16px)}.el-dialog__headerbtn .el-dialog__close{color:var(--el-color-info);font-size:inherit}.el-dialog__headerbtn:focus .el-dialog__close,.el-dialog__headerbtn:hover .el-dialog__close{color:var(--el-color-primary)}.el-dialog__title{line-height:var(--el-dialog-font-line-height);font-size:var(--el-dialog-title-font-size);color:var(--el-text-color-primary)}.el-dialog__body{padding:calc(var(--el-dialog-padding-primary) + 10px) var(--el-dialog-padding-primary);color:var(--el-text-color-regular);font-size:var(--el-dialog-content-font-size);word-break:break-all}.el-dialog__footer{padding:var(--el-dialog-padding-primary);padding-top:10px;text-align:right;box-sizing:border-box}.el-dialog--center{text-align:center}.el-dialog--center .el-dialog__body{text-align:initial;padding:25px calc(var(--el-dialog-padding-primary) + 5px) 30px}.el-dialog--center .el-dialog__footer{text-align:inherit}.el-overlay-dialog{position:fixed;top:0;right:0;bottom:0;left:0;overflow:auto}.dialog-fade-enter-active{animation:modal-fade-in var(--el-transition-duration)}.dialog-fade-enter-active .el-overlay-dialog{animation:dialog-fade-in var(--el-transition-duration)}.dialog-fade-leave-active{animation:modal-fade-out var(--el-transition-duration)}.dialog-fade-leave-active .el-overlay-dialog{animation:dialog-fade-out var(--el-transition-duration)}@keyframes dialog-fade-in{0%{transform:translate3d(0,-20px,0);opacity:0}to{transform:translateZ(0);opacity:1}}@keyframes dialog-fade-out{0%{transform:translateZ(0);opacity:1}to{transform:translate3d(0,-20px,0);opacity:0}}@keyframes modal-fade-in{0%{opacity:0}to{opacity:1}}@keyframes modal-fade-out{0%{opacity:1}to{opacity:0}}.el-divider{position:relative}.el-divider--horizontal{display:block;height:1px;width:100%;margin:24px 0;border-top:1px var(--el-border-color) var(--el-border-style)}.el-divider--vertical{display:inline-block;width:1px;height:1em;margin:0 8px;vertical-align:middle;position:relative;border-left:1px var(--el-border-color) var(--el-border-style)}.el-divider__text{position:absolute;background-color:var(--el-bg-color);padding:0 20px;font-weight:500;color:var(--el-text-color-primary);font-size:14px}.el-divider__text.is-left{left:20px;transform:translateY(-50%)}.el-divider__text.is-center{left:50%;transform:translate(-50%) translateY(-50%)}.el-divider__text.is-right{right:20px;transform:translateY(-50%)}.el-drawer{--el-drawer-bg-color:var(--el-dialog-bg-color, var(--el-bg-color));--el-drawer-padding-primary:var(--el-dialog-padding-primary, 20px)}.el-drawer{position:absolute;box-sizing:border-box;background-color:var(--el-drawer-bg-color);display:flex;flex-direction:column;box-shadow:var(--el-box-shadow-dark);overflow:hidden;transition:all var(--el-transition-duration)}.el-drawer .rtl,.el-drawer .ltr,.el-drawer .ttb,.el-drawer .btt{transform:translate(0)}.el-drawer__header{align-items:center;color:#72767b;display:flex;margin-bottom:32px;padding:var(--el-drawer-padding-primary);padding-bottom:0}.el-drawer__header>:first-child{flex:1}.el-drawer__title{margin:0;flex:1;line-height:inherit;font-size:1rem}.el-drawer__footer{padding:var(--el-drawer-padding-primary);padding-top:10px;text-align:right}.el-drawer__close-btn{border:none;cursor:pointer;font-size:var(--el-font-size-extra-large);color:inherit;background-color:transparent;outline:0}.el-drawer__close-btn:hover i{color:var(--el-color-primary)}.el-drawer__close-btn .el-icon{font-size:inherit;vertical-align:text-bottom}.el-drawer__body{flex:1;padding:var(--el-drawer-padding-primary);overflow:auto}.el-drawer__body>*{box-sizing:border-box}.el-drawer.ltr,.el-drawer.rtl{height:100%;top:0;bottom:0}.el-drawer.btt,.el-drawer.ttb{width:100%;left:0;right:0}.el-drawer.ltr{left:0}.el-drawer.rtl{right:0}.el-drawer.ttb{top:0}.el-drawer.btt{bottom:0}.el-drawer-fade-enter-active,.el-drawer-fade-leave-active{transition:all var(--el-transition-duration)}.el-drawer-fade-enter-active,.el-drawer-fade-enter-from,.el-drawer-fade-enter-to,.el-drawer-fade-leave-active,.el-drawer-fade-leave-from,.el-drawer-fade-leave-to{overflow:hidden!important}.el-drawer-fade-enter-from,.el-drawer-fade-leave-to{opacity:0}.el-drawer-fade-enter-to,.el-drawer-fade-leave-from{opacity:1}.el-drawer-fade-enter-from .rtl,.el-drawer-fade-leave-to .rtl{transform:translate(100%)}.el-drawer-fade-enter-from .ltr,.el-drawer-fade-leave-to .ltr{transform:translate(-100%)}.el-drawer-fade-enter-from .ttb,.el-drawer-fade-leave-to .ttb{transform:translateY(-100%)}.el-drawer-fade-enter-from .btt,.el-drawer-fade-leave-to .btt{transform:translateY(100%)}.el-dropdown{--el-dropdown-menu-box-shadow:var(--el-box-shadow-light);--el-dropdown-menuItem-hover-fill:var(--el-color-primary-light-9);--el-dropdown-menuItem-hover-color:var(--el-color-primary-light-3);--el-dropdown-menu-index:10;display:inline-flex;position:relative;color:var(--el-text-color-regular);font-size:var(--el-font-size-base);line-height:1;vertical-align:top}.el-dropdown.is-disabled{color:var(--el-text-color-placeholder);cursor:not-allowed}.el-dropdown__popper{--el-dropdown-menu-box-shadow:var(--el-box-shadow-light);--el-dropdown-menuItem-hover-fill:var(--el-color-primary-light-9);--el-dropdown-menuItem-hover-color:var(--el-color-primary-light-3);--el-dropdown-menu-index:10}.el-dropdown__popper.el-popper[role=tooltip]{background:var(--el-bg-color-overlay);border:1px solid var(--el-border-color-light);box-shadow:var(--el-dropdown-menu-box-shadow)}.el-dropdown__popper.el-popper[role=tooltip] .el-popper__arrow:before{border:1px solid var(--el-border-color-light)}.el-dropdown__popper.el-popper[role=tooltip][data-popper-placement^=top] .el-popper__arrow:before{border-top-color:transparent;border-left-color:transparent}.el-dropdown__popper.el-popper[role=tooltip][data-popper-placement^=bottom] .el-popper__arrow:before{border-bottom-color:transparent;border-right-color:transparent}.el-dropdown__popper.el-popper[role=tooltip][data-popper-placement^=left] .el-popper__arrow:before{border-left-color:transparent;border-bottom-color:transparent}.el-dropdown__popper.el-popper[role=tooltip][data-popper-placement^=right] .el-popper__arrow:before{border-right-color:transparent;border-top-color:transparent}.el-dropdown__popper .el-dropdown-menu{border:none}.el-dropdown__popper .el-dropdown__popper-selfdefine{outline:0}.el-dropdown__popper .el-scrollbar__bar{z-index:calc(var(--el-dropdown-menu-index) + 1)}.el-dropdown__popper .el-dropdown__list{list-style:none;padding:0;margin:0;box-sizing:border-box}.el-dropdown .el-dropdown__caret-button{padding-left:0;padding-right:0;display:inline-flex;justify-content:center;align-items:center;width:32px;border-left:none}.el-dropdown .el-dropdown__caret-button>span{display:inline-flex}.el-dropdown .el-dropdown__caret-button:before{content:"";position:absolute;display:block;width:1px;top:5px;bottom:5px;left:0;background:var(--el-overlay-color-lighter)}.el-dropdown .el-dropdown__caret-button.el-button:before{background:var(--el-border-color);opacity:.5}.el-dropdown .el-dropdown__caret-button:hover:before{top:0;bottom:0}.el-dropdown .el-dropdown__caret-button .el-dropdown__icon{font-size:inherit;padding-left:0}.el-dropdown .el-dropdown-selfdefine{outline:0}.el-dropdown--large .el-dropdown__caret-button{width:40px}.el-dropdown--small .el-dropdown__caret-button{width:24px}.el-dropdown-menu{position:relative;top:0;left:0;z-index:var(--el-dropdown-menu-index);padding:5px 0;margin:0;background-color:var(--el-bg-color-overlay);border:none;border-radius:var(--el-border-radius-base);box-shadow:none;list-style:none}.el-dropdown-menu__item{display:flex;align-items:center;white-space:nowrap;list-style:none;line-height:22px;padding:5px 16px;margin:0;font-size:var(--el-font-size-base);color:var(--el-text-color-regular);cursor:pointer;outline:0}.el-dropdown-menu__item:not(.is-disabled):focus{background-color:var(--el-dropdown-menuItem-hover-fill);color:var(--el-dropdown-menuItem-hover-color)}.el-dropdown-menu__item i{margin-right:5px}.el-dropdown-menu__item--divided{margin:6px 0;border-top:1px solid var(--el-border-color-lighter)}.el-dropdown-menu__item.is-disabled{cursor:not-allowed;color:var(--el-text-color-disabled)}.el-dropdown-menu--large{padding:7px 0}.el-dropdown-menu--large .el-dropdown-menu__item{padding:7px 20px;line-height:22px;font-size:14px}.el-dropdown-menu--large .el-dropdown-menu__item--divided{margin:8px 0}.el-dropdown-menu--small{padding:3px 0}.el-dropdown-menu--small .el-dropdown-menu__item{padding:2px 12px;line-height:20px;font-size:12px}.el-dropdown-menu--small .el-dropdown-menu__item--divided{margin:4px 0}.el-empty{--el-empty-padding:40px 0;--el-empty-image-width:160px;--el-empty-description-margin-top:20px;--el-empty-bottom-margin-top:20px;--el-empty-fill-color-0:var(--el-color-white);--el-empty-fill-color-1:#fcfcfd;--el-empty-fill-color-2:#f8f9fb;--el-empty-fill-color-3:#f7f8fc;--el-empty-fill-color-4:#eeeff3;--el-empty-fill-color-5:#edeef2;--el-empty-fill-color-6:#e9ebef;--el-empty-fill-color-7:#e5e7e9;--el-empty-fill-color-8:#e0e3e9;--el-empty-fill-color-9:#d5d7de;display:flex;justify-content:center;align-items:center;flex-direction:column;text-align:center;box-sizing:border-box;padding:var(--el-empty-padding)}.el-empty__image{width:var(--el-empty-image-width)}.el-empty__image img{-webkit-user-select:none;user-select:none;width:100%;height:100%;vertical-align:top;object-fit:contain}.el-empty__image svg{color:var(--el-svg-monochrome-grey);fill:currentColor;width:100%;height:100%;vertical-align:top}.el-empty__description{margin-top:var(--el-empty-description-margin-top)}.el-empty__description p{margin:0;font-size:var(--el-font-size-base);color:var(--el-text-color-secondary)}.el-empty__bottom{margin-top:var(--el-empty-bottom-margin-top)}.el-footer{--el-footer-padding:0 20px;--el-footer-height:60px;padding:var(--el-footer-padding);box-sizing:border-box;flex-shrink:0;height:var(--el-footer-height)}.el-form{--el-form-label-font-size:var(--el-font-size-base)}.el-form--label-left .el-form-item__label{text-align:left}.el-form--label-top .el-form-item{display:block}.el-form--label-top .el-form-item .el-form-item__label{display:block;text-align:left;margin-bottom:8px;line-height:22px}.el-form--inline .el-form-item{display:inline-flex;vertical-align:middle;margin-right:32px}.el-form--inline.el-form--label-top{display:flex;flex-wrap:wrap}.el-form--inline.el-form--label-top .el-form-item{display:block}.el-form--large.el-form--label-top .el-form-item .el-form-item__label{margin-bottom:12px;line-height:22px}.el-form--default.el-form--label-top .el-form-item .el-form-item__label{margin-bottom:8px;line-height:22px}.el-form--small.el-form--label-top .el-form-item .el-form-item__label{margin-bottom:4px;line-height:20px}.el-form-item{display:flex;--font-size:14px;margin-bottom:18px}.el-form-item .el-form-item{margin-bottom:0}.el-form-item .el-input__validateIcon{display:none}.el-form-item--large{--font-size:14px;--el-form-label-font-size:var(--font-size);margin-bottom:22px}.el-form-item--large .el-form-item__label,.el-form-item--large .el-form-item__content{line-height:40px}.el-form-item--large .el-form-item__error{padding-top:4px}.el-form-item--default{--font-size:14px;--el-form-label-font-size:var(--font-size);margin-bottom:18px}.el-form-item--default .el-form-item__label,.el-form-item--default .el-form-item__content{line-height:32px}.el-form-item--default .el-form-item__error{padding-top:2px}.el-form-item--small{--font-size:12px;--el-form-label-font-size:var(--font-size);margin-bottom:18px}.el-form-item--small .el-form-item__label,.el-form-item--small .el-form-item__content{line-height:24px}.el-form-item--small .el-form-item__error{padding-top:2px}.el-form-item__label-wrap{display:flex}.el-form-item__label-wrap .el-form-item__label{display:inline-block}.el-form-item__label{flex:0 0 auto;text-align:right;font-size:var(--el-form-label-font-size);color:var(--el-text-color-regular);line-height:32px;padding:0 12px 0 0;box-sizing:border-box}.el-form-item__content{display:flex;flex-wrap:wrap;align-items:center;flex:1;line-height:32px;position:relative;font-size:var(--font-size);min-width:0}.el-form-item__content .el-input-group{vertical-align:top}.el-form-item__error{color:var(--el-color-danger);font-size:12px;line-height:1;padding-top:2px;position:absolute;top:100%;left:0}.el-form-item__error--inline{position:relative;top:auto;left:auto;display:inline-block;margin-left:10px}.el-form-item.is-required:not(.is-no-asterisk)>.el-form-item__label-wrap>.el-form-item__label:before,.el-form-item.is-required:not(.is-no-asterisk)>.el-form-item__label:before{content:"*";color:var(--el-color-danger);margin-right:4px}.el-form-item.is-error .el-input__inner,.el-form-item.is-error .el-input__inner:focus,.el-form-item.is-error .el-select-v2__wrapper,.el-form-item.is-error .el-select-v2__wrapper:focus,.el-form-item.is-error .el-textarea__inner,.el-form-item.is-error .el-textarea__inner:focus{box-shadow:0 0 0 1px var(--el-color-danger) inset}.el-form-item.is-error .el-input-group__append .el-input__inner,.el-form-item.is-error .el-input-group__prepend .el-input__inner{box-shadow:0 0 0 1px transparent inset}.el-form-item.is-error .el-input__validateIcon{color:var(--el-color-danger)}.el-form-item--feedback .el-input__validateIcon{display:inline-flex}.el-header{--el-header-padding:0 20px;--el-header-height:60px;padding:var(--el-header-padding);box-sizing:border-box;flex-shrink:0;height:var(--el-header-height)}.el-image-viewer__wrapper{position:fixed;top:0;right:0;bottom:0;left:0}.el-image-viewer__btn{position:absolute;z-index:1;display:flex;align-items:center;justify-content:center;border-radius:50%;opacity:.8;cursor:pointer;box-sizing:border-box;-webkit-user-select:none;user-select:none}.el-image-viewer__btn .el-icon{font-size:inherit;cursor:pointer}.el-image-viewer__close{top:40px;right:40px;width:40px;height:40px;font-size:40px}.el-image-viewer__canvas{width:100%;height:100%;display:flex;justify-content:center;align-items:center;-webkit-user-select:none;user-select:none}.el-image-viewer__actions{left:50%;bottom:30px;transform:translate(-50%);width:282px;height:44px;padding:0 23px;background-color:var(--el-text-color-regular);border-color:#fff;border-radius:22px}.el-image-viewer__actions__inner{width:100%;height:100%;text-align:justify;cursor:default;font-size:23px;color:#fff;display:flex;align-items:center;justify-content:space-around}.el-image-viewer__prev{top:50%;transform:translateY(-50%);left:40px;width:44px;height:44px;font-size:24px;color:#fff;background-color:var(--el-text-color-regular);border-color:#fff}.el-image-viewer__next{top:50%;transform:translateY(-50%);right:40px;text-indent:2px;width:44px;height:44px;font-size:24px;color:#fff;background-color:var(--el-text-color-regular);border-color:#fff}.el-image-viewer__close{width:44px;height:44px;font-size:24px;color:#fff;background-color:var(--el-text-color-regular);border-color:#fff}.el-image-viewer__mask{position:absolute;width:100%;height:100%;top:0;left:0;opacity:.5;background:#000}.viewer-fade-enter-active{animation:viewer-fade-in var(--el-transition-duration)}.viewer-fade-leave-active{animation:viewer-fade-out var(--el-transition-duration)}@keyframes viewer-fade-in{0%{transform:translate3d(0,-20px,0);opacity:0}to{transform:translateZ(0);opacity:1}}@keyframes viewer-fade-out{0%{transform:translateZ(0);opacity:1}to{transform:translate3d(0,-20px,0);opacity:0}}.el-image__error,.el-image__inner,.el-image__placeholder{width:100%;height:100%}.el-image{position:relative;display:inline-block;overflow:hidden}.el-image__inner{vertical-align:top}.el-image__placeholder{background:var(--el-fill-color-light)}.el-image__error{display:flex;justify-content:center;align-items:center;font-size:14px;background:var(--el-fill-color-light);color:var(--el-text-color-placeholder);vertical-align:middle}.el-image__preview{cursor:pointer}.el-input-number{position:relative;display:inline-block;width:150px;line-height:30px}.el-input-number .el-input{display:block}.el-input-number .el-input__inner{-webkit-appearance:none;-moz-appearance:textfield;padding-left:42px;padding-right:42px;text-align:center}.el-input-number .el-input__inner::-webkit-inner-spin-button,.el-input-number .el-input__inner::-webkit-outer-spin-button{margin:0;-webkit-appearance:none}.el-input-number__decrease,.el-input-number__increase{display:flex;justify-content:center;align-items:center;height:auto;position:absolute;z-index:1;top:1px;bottom:1px;width:32px;background:var(--el-fill-color-light);color:var(--el-text-color-regular);cursor:pointer;font-size:13px;-webkit-user-select:none;user-select:none}.el-input-number__decrease:hover,.el-input-number__increase:hover{color:var(--el-color-primary)}.el-input-number__decrease:hover~.el-input:not(.is-disabled) .el-input__inner,.el-input-number__increase:hover~.el-input:not(.is-disabled) .el-input__inner{box-shadow:0 0 0 1px var(--el-input-focus-border-color,var(--el-color-primary)) inset}.el-input-number__decrease.is-disabled,.el-input-number__increase.is-disabled{color:var(--el-disabled-text-color);cursor:not-allowed}.el-input-number__increase{right:1px;border-radius:0 var(--el-border-radius-base) var(--el-border-radius-base) 0;border-left:var(--el-border)}.el-input-number__decrease{left:1px;border-radius:var(--el-border-radius-base) 0 0 var(--el-border-radius-base);border-right:var(--el-border)}.el-input-number.is-disabled .el-input-number__decrease,.el-input-number.is-disabled .el-input-number__increase{border-color:var(--el-disabled-border-color);color:var(--el-disabled-border-color)}.el-input-number.is-disabled .el-input-number__decrease:hover,.el-input-number.is-disabled .el-input-number__increase:hover{color:var(--el-disabled-border-color);cursor:not-allowed}.el-input-number--large{width:180px;line-height:38px}.el-input-number--large .el-input-number__decrease,.el-input-number--large .el-input-number__increase{width:40px;font-size:14px}.el-input-number--large .el-input__inner{padding-left:47px;padding-right:47px}.el-input-number--small{width:120px;line-height:22px}.el-input-number--small .el-input-number__decrease,.el-input-number--small .el-input-number__increase{width:24px;font-size:12px}.el-input-number--small .el-input__inner{padding-left:31px;padding-right:31px}.el-input-number--small .el-input-number__decrease [class*=el-icon],.el-input-number--small .el-input-number__increase [class*=el-icon]{transform:scale(.9)}.el-input-number.is-without-controls .el-input__inner{padding-left:15px;padding-right:15px}.el-input-number.is-controls-right .el-input__inner{padding-left:15px;padding-right:42px}.el-input-number.is-controls-right .el-input-number__decrease,.el-input-number.is-controls-right .el-input-number__increase{--el-input-number-controls-height:15px;height:var(--el-input-number-controls-height);line-height:var(--el-input-number-controls-height)}.el-input-number.is-controls-right .el-input-number__decrease [class*=el-icon],.el-input-number.is-controls-right .el-input-number__increase [class*=el-icon]{transform:scale(.8)}.el-input-number.is-controls-right .el-input-number__increase{bottom:auto;left:auto;border-radius:0 var(--el-border-radius-base) 0 0;border-bottom:var(--el-border)}.el-input-number.is-controls-right .el-input-number__decrease{right:1px;top:auto;left:auto;border-right:none;border-left:var(--el-border);border-radius:0 0 var(--el-border-radius-base) 0}.el-input-number.is-controls-right[class*=large] [class*=decrease],.el-input-number.is-controls-right[class*=large] [class*=increase]{--el-input-number-controls-height:19px}.el-input-number.is-controls-right[class*=small] [class*=decrease],.el-input-number.is-controls-right[class*=small] [class*=increase]{--el-input-number-controls-height:11px}.el-textarea{--el-input-text-color:var(--el-text-color-regular);--el-input-border:var(--el-border);--el-input-hover-border:var(--el-border-color-hover);--el-input-focus-border:var(--el-color-primary);--el-input-transparent-border:0 0 0 1px transparent inset;--el-input-border-color:var(--el-border-color);--el-input-border-radius:var(--el-border-radius-base);--el-input-bg-color:var(--el-fill-color-blank);--el-input-icon-color:var(--el-text-color-placeholder);--el-input-placeholder-color:var(--el-text-color-placeholder);--el-input-hover-border-color:var(--el-border-color-hover);--el-input-clear-hover-color:var(--el-text-color-secondary);--el-input-focus-border-color:var(--el-color-primary)}.el-textarea{position:relative;display:inline-block;width:100%;vertical-align:bottom;font-size:var(--el-font-size-base)}.el-textarea__inner{position:relative;display:block;resize:vertical;padding:5px 15px;line-height:1.5;box-sizing:border-box;width:100%;font-size:inherit;font-family:inherit;color:var(--el-input-text-color,var(--el-text-color-regular));background-color:var(--el-input-bg-color,var(--el-fill-color-blank));background-image:none;-webkit-appearance:none;box-shadow:0 0 0 1px var(--el-input-border-color,var(--el-border-color)) inset;border-radius:var(--el-input-border-radius,var(--el-border-radius-base));transition:var(--el-transition-box-shadow);border:none}.el-textarea__inner::placeholder{color:var(--el-input-placeholder-color,var(--el-text-color-placeholder))}.el-textarea__inner:hover{box-shadow:0 0 0 1px var(--el-input-hover-border-color) inset}.el-textarea__inner:focus{outline:0;box-shadow:0 0 0 1px var(--el-input-focus-border-color) inset}.el-textarea .el-input__count{color:var(--el-color-info);background:var(--el-fill-color-blank);position:absolute;font-size:12px;line-height:14px;bottom:5px;right:10px}.el-textarea.is-disabled .el-textarea__inner{background-color:var(--el-disabled-bg-color);border-color:var(--el-disabled-border-color);color:var(--el-disabled-text-color);cursor:not-allowed}.el-textarea.is-disabled .el-textarea__inner::placeholder{color:var(--el-text-color-placeholder)}.el-textarea.is-exceed .el-textarea__inner{border-color:var(--el-color-danger)}.el-textarea.is-exceed .el-input__count{color:var(--el-color-danger)}.el-input{--el-input-text-color:var(--el-text-color-regular);--el-input-border:var(--el-border);--el-input-hover-border:var(--el-border-color-hover);--el-input-focus-border:var(--el-color-primary);--el-input-transparent-border:0 0 0 1px transparent inset;--el-input-border-color:var(--el-border-color);--el-input-border-radius:var(--el-border-radius-base);--el-input-bg-color:var(--el-fill-color-blank);--el-input-icon-color:var(--el-text-color-placeholder);--el-input-placeholder-color:var(--el-text-color-placeholder);--el-input-hover-border-color:var(--el-border-color-hover);--el-input-clear-hover-color:var(--el-text-color-secondary);--el-input-focus-border-color:var(--el-color-primary);position:relative;font-size:var(--el-font-size-base);display:inline-flex;width:100%;line-height:32px}.el-input::-webkit-scrollbar{z-index:11;width:6px}.el-input::-webkit-scrollbar:horizontal{height:6px}.el-input::-webkit-scrollbar-thumb{border-radius:5px;width:6px;background:var(--el-text-color-disabled)}.el-input::-webkit-scrollbar-corner{background:var(--el-fill-color-blank)}.el-input::-webkit-scrollbar-track{background:var(--el-fill-color-blank)}.el-input::-webkit-scrollbar-track-piece{background:var(--el-fill-color-blank);width:6px}.el-input .el-input__clear{color:var(--el-input-icon-color);font-size:14px;cursor:pointer;transition:var(--el-transition-color);margin-left:8px}.el-input .el-input__clear:hover{color:var(--el-input-clear-hover-color)}.el-input .el-input__count{height:100%;display:inline-flex;align-items:center;color:var(--el-color-info);font-size:12px}.el-input .el-input__count .el-input__count-inner{background:var(--el-fill-color-blank);line-height:initial;display:inline-block;padding:0 5px}.el-input__inner{position:relative;-webkit-appearance:none;background-color:var(--el-input-bg-color,var(--el-fill-color-blank));background-image:none;border-radius:var(--el-input-border-radius,var(--el-border-radius-base));box-sizing:border-box;color:var(--el-input-text-color,var(--el-text-color-regular));display:inline-block;font-size:inherit;height:32px;line-height:32px;outline:0;padding:0 11px;transition:var(--el-transition-box-shadow);width:100%;box-shadow:0 0 0 1px var(--el-input-border-color,var(--el-border-color)) inset;border:none}.el-input__inner::placeholder{color:var(--el-input-placeholder-color,var(--el-text-color-placeholder))}.el-input__inner:hover{box-shadow:0 0 0 1px var(--el-input-hover-border-color) inset}.el-input__inner:focus{outline:0;box-shadow:0 0 0 1px var(--el-input-focus-border-color) inset}.el-input__inner[type=password]::-ms-reveal{display:none}.el-input__suffix{display:inline-flex;position:absolute;height:100%;right:12px;top:0;text-align:center;color:var(--el-input-icon-color,var(--el-text-color-placeholder));transition:all var(--el-transition-duration);pointer-events:none}.el-input__suffix-inner{pointer-events:all;display:inline-flex}.el-input__prefix{display:inline-flex;position:absolute;height:100%;left:12px;top:0;text-align:center;color:var(--el-input-icon-color,var(--el-text-color-placeholder));transition:all var(--el-transition-duration)}.el-input__prefix-inner{pointer-events:all;display:inline-flex}.el-input .el-input__icon{height:inherit;display:flex;justify-content:center;align-items:center;transition:all var(--el-transition-duration)}.el-input__validateIcon{pointer-events:none}.el-input.is-active .el-input__inner{outline:0;box-shadow:0 0 0 1px var(--el-input-focus-border,) inset}.el-input.is-disabled .el-input__inner{background-color:var(--el-disabled-bg-color);box-shadow:0 0 0 1px var(--el-disabled-border-color) inset;color:var(--el-disabled-text-color);cursor:not-allowed}.el-input.is-disabled .el-input__inner::placeholder{color:var(--el-text-color-placeholder)}.el-input.is-disabled .el-input__icon{cursor:not-allowed}.el-input.is-exceed .el-input__inner{box-shadow:0 0 0 1px var(--el-color-danger) inset}.el-input.is-exceed .el-input__suffix .el-input__count{color:var(--el-color-danger)}.el-input--suffix .el-input__inner{padding-right:31px}.el-input--suffix--password-clear .el-input__inner{padding-right:55px}.el-input--prefix .el-input__inner{padding-left:31px}.el-input--large{font-size:14px;line-height:38px}.el-input--large .el-input__inner{height:40px;line-height:40px;padding:0 15px}.el-input--large .el-input__icon{line-height:40px}.el-input--large.el-input--prefix .el-input__inner{padding-left:35px}.el-input--large.el-input--suffix .el-input__inner{padding-right:35px}.el-input--large .el-input__prefix{left:16px}.el-input--large .el-input__suffix{right:16px}.el-input--small{font-size:12px;line-height:22px}.el-input--small .el-input__inner{height:24px;line-height:24px;padding:0 7px}.el-input--small .el-input__icon{line-height:24px}.el-input--small.el-input--prefix .el-input__inner{padding-left:25px}.el-input--small.el-input--suffix .el-input__inner{padding-right:25px}.el-input--small .el-input__prefix{left:8px}.el-input--small .el-input__suffix{right:8px}.el-input-group{line-height:normal;display:inline-table;width:100%;border-collapse:separate;border-spacing:0}.el-input-group>.el-input__inner{vertical-align:middle;display:table-cell}.el-input-group__append,.el-input-group__prepend{background-color:var(--el-fill-color-light);color:var(--el-color-info);vertical-align:middle;display:table-cell;position:relative;border-radius:var(--el-input-border-radius);padding:0 20px;width:1px;white-space:nowrap}.el-input-group__append:focus,.el-input-group__prepend:focus{outline:0}.el-input-group__append .el-button,.el-input-group__append .el-select,.el-input-group__prepend .el-button,.el-input-group__prepend .el-select{display:inline-block;margin:0 -20px}.el-input-group__append button.el-button,.el-input-group__append button.el-button:hover,.el-input-group__append div.el-select .el-input__inner,.el-input-group__append div.el-select:hover .el-input__inner,.el-input-group__prepend button.el-button,.el-input-group__prepend button.el-button:hover,.el-input-group__prepend div.el-select .el-input__inner,.el-input-group__prepend div.el-select:hover .el-input__inner{border-color:transparent;background-color:transparent;color:inherit}.el-input-group__append .el-button,.el-input-group__append .el-input,.el-input-group__prepend .el-button,.el-input-group__prepend .el-input{font-size:inherit}.el-input-group__prepend{border-right:0;border-top-right-radius:0;border-bottom-right-radius:0;box-shadow:1px 0 0 0 var(--el-input-border-color) inset,0 1px 0 0 var(--el-input-border-color) inset,0 -1px 0 0 var(--el-input-border-color) inset}.el-input-group__append{border-left:0;border-top-left-radius:0;border-bottom-left-radius:0;box-shadow:0 1px 0 0 var(--el-input-border-color) inset,0 -1px 0 0 var(--el-input-border-color) inset,-1px 0 0 0 var(--el-input-border-color) inset}.el-input-group--prepend .el-input__inner{box-shadow:1px 0 0 0 var(--el-input-border-color) inset,0 1px 0 0 var(--el-input-border-color) inset,0 -1px 0 0 var(--el-input-border-color) inset}.el-input-group--prepend>.el-input__inner{border-top-left-radius:0;border-bottom-left-radius:0;box-shadow:0 0 0 1px var(--el-input-border-color) inset}.el-input-group--prepend>.el-input__inner:hover{box-shadow:0 0 0 1px var(--el-input-hover-border-color) inset}.el-input-group--prepend>.el-input__inner:focus{outline:0;box-shadow:0 0 0 1px var(--el-input-focus-border-color) inset}.el-input-group--prepend .el-input-group__prepend .el-select .el-input .el-input__inner{border-top-right-radius:0;border-bottom-right-radius:0}.el-input-group--prepend .el-input-group__prepend .el-select .el-input .el-input__inner:focus{outline:0;z-index:2;box-shadow:1px 0 0 0 var(--el-input-focus-border-color) inset,1px 0 0 0 var(--el-input-focus-border-color),0 1px 0 0 var(--el-input-focus-border-color) inset,0 -1px 0 0 var(--el-input-focus-border-color) inset!important}.el-input-group--prepend .el-input-group__prepend .el-select .el-input.is-focus .el-input__inner{box-shadow:1px 0 0 0 var(--el-input-focus-border-color) inset,1px 0 0 0 var(--el-input-focus-border-color),0 1px 0 0 var(--el-input-focus-border-color) inset,0 -1px 0 0 var(--el-input-focus-border-color) inset!important;z-index:2}.el-input-group--prepend .el-input-group__prepend .el-select .el-input.is-focus .el-input__inner:focus{outline:0;z-index:2;box-shadow:1px 0 0 0 var(--el-input-focus-border-color) inset,1px 0 0 0 var(--el-input-focus-border-color),0 1px 0 0 var(--el-input-focus-border-color) inset,0 -1px 0 0 var(--el-input-focus-border-color) inset!important}.el-input-group--prepend .el-input-group__prepend .el-select:hover .el-input__inner{z-index:1;box-shadow:1px 0 0 0 var(--el-input-hover-border-color) inset,1px 0 0 0 var(--el-input-hover-border-color),0 1px 0 0 var(--el-input-hover-border-color) inset,0 -1px 0 0 var(--el-input-hover-border-color) inset!important}.el-input-group--append>.el-input__inner{border-top-right-radius:0;border-bottom-right-radius:0}.el-input-group--append .el-input-group__append .el-select .el-input .el-input__inner{border-top-left-radius:0;border-bottom-left-radius:0;box-shadow:-1px 0 0 0 var(--el-input-border-color),-1px 0 0 0 var(--el-input-border-color) inset,0 1px 0 0 var(--el-input-border-color) inset,0 -1px 0 0 var(--el-input-border-color) inset!important}.el-input-group--append .el-input-group__append .el-select .el-input .el-input__inner:focus{outline:0;z-index:2;box-shadow:-1px 0 0 0 var(--el-input-focus-border-color),-1px 0 0 0 var(--el-input-focus-border-color) inset,0 1px 0 0 var(--el-input-focus-border-color) inset,0 -1px 0 0 var(--el-input-focus-border-color) inset!important}.el-input-group--append .el-input-group__append .el-select .el-input.is-focus .el-input__inner{outline:0;z-index:2;box-shadow:-1px 0 0 0 var(--el-input-focus-border-color),-1px 0 0 0 var(--el-input-focus-border-color) inset,0 1px 0 0 var(--el-input-focus-border-color) inset,0 -1px 0 0 var(--el-input-focus-border-color) inset!important}.el-input-group--append .el-input-group__append .el-select .el-input.is-focus .el-input__inner:focus{outline:0;z-index:2;box-shadow:-1px 0 0 0 var(--el-input-focus-border-color),-1px 0 0 0 var(--el-input-focus-border-color) inset,0 1px 0 0 var(--el-input-focus-border-color) inset,0 -1px 0 0 var(--el-input-focus-border-color) inset!important}.el-input-group--append .el-input-group__append .el-select:hover .el-input__inner{z-index:1;box-shadow:-1px 0 0 0 var(--el-input-hover-border-color),-1px 0 0 0 var(--el-input-hover-border-color) inset,0 1px 0 0 var(--el-input-hover-border-color) inset,0 -1px 0 0 var(--el-input-hover-border-color) inset!important}.el-link{--el-link-font-size:var(--el-font-size-base);--el-link-font-weight:var(--el-font-weight-primary);--el-link-text-color:var(--el-text-color-regular);--el-link-hover-text-color:var(--el-color-primary);--el-link-disabled-text-color:var(--el-text-color-placeholder)}.el-link{display:inline-flex;flex-direction:row;align-items:center;justify-content:center;vertical-align:middle;position:relative;text-decoration:none;outline:0;cursor:pointer;padding:0;font-size:var(--el-link-font-size);font-weight:var(--el-link-font-weight);color:var(--el-link-text-color)}.el-link:hover{color:var(--el-link-hover-text-color)}.el-link.is-underline:hover:after{content:"";position:absolute;left:0;right:0;height:0;bottom:0;border-bottom:1px solid var(--el-link-hover-text-color)}.el-link.is-disabled{color:var(--el-link-disabled-text-color);cursor:not-allowed}.el-link [class*=el-icon-]+span{margin-left:5px}.el-link.el-link--default:after{border-color:var(--el-link-hover-text-color)}.el-link__inner{display:inline-flex;justify-content:center;align-items:center}.el-link.el-link--primary{--el-link-text-color:var(--el-color-primary);--el-link-hover-text-color:var(--el-color-primary-light-3);--el-link-disabled-text-color:var(--el-color-primary-light-5)}.el-link.el-link--primary:after{border-color:var(--el-link-text-color)}.el-link.el-link--primary.is-underline:hover:after{border-color:var(--el-link-text-color)}.el-link.el-link--success{--el-link-text-color:var(--el-color-success);--el-link-hover-text-color:var(--el-color-success-light-3);--el-link-disabled-text-color:var(--el-color-success-light-5)}.el-link.el-link--success:after{border-color:var(--el-link-text-color)}.el-link.el-link--success.is-underline:hover:after{border-color:var(--el-link-text-color)}.el-link.el-link--warning{--el-link-text-color:var(--el-color-warning);--el-link-hover-text-color:var(--el-color-warning-light-3);--el-link-disabled-text-color:var(--el-color-warning-light-5)}.el-link.el-link--warning:after{border-color:var(--el-link-text-color)}.el-link.el-link--warning.is-underline:hover:after{border-color:var(--el-link-text-color)}.el-link.el-link--danger{--el-link-text-color:var(--el-color-danger);--el-link-hover-text-color:var(--el-color-danger-light-3);--el-link-disabled-text-color:var(--el-color-danger-light-5)}.el-link.el-link--danger:after{border-color:var(--el-link-text-color)}.el-link.el-link--danger.is-underline:hover:after{border-color:var(--el-link-text-color)}.el-link.el-link--error{--el-link-text-color:var(--el-color-error);--el-link-hover-text-color:var(--el-color-error-light-3);--el-link-disabled-text-color:var(--el-color-error-light-5)}.el-link.el-link--error:after{border-color:var(--el-link-text-color)}.el-link.el-link--error.is-underline:hover:after{border-color:var(--el-link-text-color)}.el-link.el-link--info{--el-link-text-color:var(--el-color-info);--el-link-hover-text-color:var(--el-color-info-light-3);--el-link-disabled-text-color:var(--el-color-info-light-5)}.el-link.el-link--info:after{border-color:var(--el-link-text-color)}.el-link.el-link--info.is-underline:hover:after{border-color:var(--el-link-text-color)}:root{--el-loading-spinner-size:42px;--el-loading-fullscreen-spinner-size:50px}.el-loading-parent--relative{position:relative!important}.el-loading-parent--hidden{overflow:hidden!important}.el-loading-mask{position:absolute;z-index:2000;background-color:var(--el-mask-color);margin:0;top:0;right:0;bottom:0;left:0;transition:opacity var(--el-transition-duration)}.el-loading-mask.is-fullscreen{position:fixed}.el-loading-mask.is-fullscreen .el-loading-spinner{margin-top:calc((0px - var(--el-loading-fullscreen-spinner-size))/ 2)}.el-loading-mask.is-fullscreen .el-loading-spinner .circular{height:var(--el-loading-fullscreen-spinner-size);width:var(--el-loading-fullscreen-spinner-size)}.el-loading-spinner{top:50%;margin-top:calc((0px - var(--el-loading-spinner-size))/ 2);width:100%;text-align:center;position:absolute}.el-loading-spinner .el-loading-text{color:var(--el-color-primary);margin:3px 0;font-size:14px}.el-loading-spinner .circular{display:inline;height:var(--el-loading-spinner-size);width:var(--el-loading-spinner-size);animation:loading-rotate 2s linear infinite}.el-loading-spinner .path{animation:loading-dash 1.5s ease-in-out infinite;stroke-dasharray:90,150;stroke-dashoffset:0;stroke-width:2;stroke:var(--el-color-primary);stroke-linecap:round}.el-loading-spinner i{color:var(--el-color-primary)}.el-loading-fade-enter-from,.el-loading-fade-leave-to{opacity:0}@keyframes loading-rotate{to{transform:rotate(360deg)}}@keyframes loading-dash{0%{stroke-dasharray:1,200;stroke-dashoffset:0}50%{stroke-dasharray:90,150;stroke-dashoffset:-40px}to{stroke-dasharray:90,150;stroke-dashoffset:-120px}}.el-main{--el-main-padding:20px;display:block;flex:1;flex-basis:auto;overflow:auto;box-sizing:border-box;padding:var(--el-main-padding)}.el-menu{--el-menu-active-color:var(--el-color-primary);--el-menu-text-color:var(--el-text-color-primary);--el-menu-hover-text-color:var(--el-text-color-primary);--el-menu-bg-color:var(--el-fill-color-blank);--el-menu-hover-bg-color:var(--el-color-primary-light-9);--el-menu-item-height:56px;--el-menu-item-font-size:var(--el-font-size-base);--el-menu-item-hover-fill:var(--el-color-primary-light-9);--el-menu-border-color:var(--el-border-color);border-right:solid 1px var(--el-menu-border-color);list-style:none;position:relative;margin:0;padding-left:0;background-color:var(--el-menu-bg-color);box-sizing:border-box}.el-menu--horizontal{display:flex;flex-wrap:nowrap;border-bottom:solid 1px var(--el-menu-border-color);border-right:none}.el-menu--horizontal>.el-menu-item{display:inline-flex;justify-content:center;align-items:center;height:100%;margin:0;border-bottom:2px solid transparent;color:var(--el-menu-text-color)}.el-menu--horizontal>.el-menu-item a,.el-menu--horizontal>.el-menu-item a:hover{color:inherit}.el-menu--horizontal>.el-menu-item:not(.is-disabled):focus,.el-menu--horizontal>.el-menu-item:not(.is-disabled):hover{background-color:#fff}.el-menu--horizontal>.el-sub-menu:focus,.el-menu--horizontal>.el-sub-menu:hover{outline:0}.el-menu--horizontal>.el-sub-menu:hover .el-sub-menu__title{color:var(--el-menu-hover-text-color)}.el-menu--horizontal>.el-sub-menu.is-active .el-sub-menu__title{border-bottom:2px solid var(--el-menu-active-color);color:var(--el-menu-active-color)}.el-menu--horizontal>.el-sub-menu .el-sub-menu__title{height:100%;border-bottom:2px solid transparent;color:var(--el-menu-text-color)}.el-menu--horizontal>.el-sub-menu .el-sub-menu__title:hover{background-color:var(--el-bg-color-overlay)}.el-menu--horizontal>.el-sub-menu .el-sub-menu__icon-arrow{position:static;vertical-align:middle;margin-left:8px;margin-top:-3px}.el-menu--horizontal .el-menu .el-menu-item,.el-menu--horizontal .el-menu .el-sub-menu__title{background-color:var(--el-menu-bg-color);display:flex;align-items:center;height:36px;padding:0 10px;color:var(--el-menu-text-color)}.el-menu--horizontal .el-menu .el-sub-menu__title{padding-right:40px}.el-menu--horizontal .el-menu .el-menu-item.is-active,.el-menu--horizontal .el-menu .el-sub-menu.is-active>.el-sub-menu__title{color:var(--el-menu-active-color)}.el-menu--horizontal .el-menu-item:not(.is-disabled):focus,.el-menu--horizontal .el-menu-item:not(.is-disabled):hover{outline:0;color:var(--el-menu-hover-text-color);background-color:var(--el-menu-hover-bg-color)}.el-menu--horizontal>.el-menu-item.is-active{border-bottom:2px solid var(--el-menu-active-color);color:var(--el-menu-active-color)!important}.el-menu--collapse{width:64px}.el-menu--collapse>.el-menu-item [class^=el-icon],.el-menu--collapse>.el-sub-menu>.el-sub-menu__title [class^=el-icon]{margin:0;vertical-align:middle;width:24px;text-align:center}.el-menu--collapse>.el-menu-item .el-sub-menu__icon-arrow,.el-menu--collapse>.el-sub-menu>.el-sub-menu__title .el-sub-menu__icon-arrow{display:none}.el-menu--collapse>.el-menu-item>span,.el-menu--collapse>.el-sub-menu>.el-sub-menu__title>span{height:0;width:0;overflow:hidden;visibility:hidden;display:inline-block}.el-menu--collapse>.el-menu-item.is-active i{color:inherit}.el-menu--collapse .el-menu .el-sub-menu{min-width:200px}.el-menu--collapse .el-sub-menu{position:relative}.el-menu--collapse .el-sub-menu .el-menu{position:absolute;margin-left:5px;top:0;left:100%;z-index:10;border:1px solid var(--el-border-color-light);border-radius:var(--el-border-radius-small);box-shadow:var(--el-box-shadow-light)}.el-menu--collapse .el-sub-menu.is-opened>.el-sub-menu__title .el-sub-menu__icon-arrow{transform:none}.el-menu--collapse .el-sub-menu.is-active .el-sub-menu__title{color:var(--el-menu-active-color)}.el-menu--popup{z-index:100;min-width:200px;border:none;padding:5px 0;border-radius:var(--el-border-radius-small);box-shadow:var(--el-box-shadow-light)}.el-menu .el-icon{flex-shrink:0}.el-menu-item{display:flex;align-items:center;height:var(--el-menu-item-height);line-height:var(--el-menu-item-height);font-size:var(--el-menu-item-font-size);color:var(--el-menu-text-color);padding:0 20px;list-style:none;cursor:pointer;position:relative;transition:border-color var(--el-transition-duration),background-color var(--el-transition-duration),color var(--el-transition-duration);box-sizing:border-box;white-space:nowrap}.el-menu-item *{vertical-align:bottom}.el-menu-item i{color:inherit}.el-menu-item:focus,.el-menu-item:hover{outline:0}.el-menu-item:hover{background-color:var(--el-menu-hover-bg-color)}.el-menu-item.is-disabled{opacity:.25;cursor:not-allowed;background:0 0!important}.el-menu-item [class^=el-icon]{margin-right:5px;width:24px;text-align:center;font-size:18px;vertical-align:middle}.el-menu-item.is-active{color:var(--el-menu-active-color)}.el-menu-item.is-active i{color:inherit}.el-menu-item .el-menu-tooltip__trigger{position:absolute;left:0;top:0;height:100%;width:100%;display:inline-flex;align-items:center;box-sizing:border-box;padding:0 20px}.el-sub-menu{list-style:none;margin:0;padding-left:0}.el-sub-menu__title{display:flex;align-items:center;height:var(--el-menu-item-height);line-height:var(--el-menu-item-height);font-size:var(--el-menu-item-font-size);color:var(--el-menu-text-color);padding:0 20px;list-style:none;cursor:pointer;position:relative;transition:border-color var(--el-transition-duration),background-color var(--el-transition-duration),color var(--el-transition-duration);box-sizing:border-box;white-space:nowrap}.el-sub-menu__title *{vertical-align:bottom}.el-sub-menu__title i{color:inherit}.el-sub-menu__title:focus,.el-sub-menu__title:hover{outline:0}.el-sub-menu__title.is-disabled{opacity:.25;cursor:not-allowed;background:0 0!important}.el-sub-menu__title:hover{background-color:var(--el-menu-hover-bg-color)}.el-sub-menu .el-menu{border:none}.el-sub-menu .el-menu-item{height:50px;line-height:50px;padding:0 45px;min-width:200px}.el-sub-menu__hide-arrow .el-sub-menu__icon-arrow{display:none!important}.el-sub-menu.is-active .el-sub-menu__title{border-bottom-color:var(--el-menu-active-color)}.el-sub-menu.is-opened>.el-sub-menu__title .el-sub-menu__icon-arrow{transform:rotate(180deg)}.el-sub-menu.is-disabled .el-menu-item,.el-sub-menu.is-disabled .el-sub-menu__title{opacity:.25;cursor:not-allowed;background:0 0!important}.el-sub-menu .el-icon{vertical-align:middle;margin-right:5px;width:24px;text-align:center;font-size:18px}.el-sub-menu .el-icon.el-sub-menu__icon-more{margin-right:0!important}.el-sub-menu .el-sub-menu__icon-arrow{position:absolute;top:50%;right:20px;margin-top:-7px;transition:transform var(--el-transition-duration);font-size:12px;margin-right:0;width:inherit}.el-menu-item-group>ul{padding:0}.el-menu-item-group__title{padding:7px 0 7px 20px;line-height:normal;font-size:12px;color:var(--el-text-color-secondary)}.horizontal-collapse-transition .el-sub-menu__title .el-sub-menu__icon-arrow{transition:var(--el-transition-duration-fast);opacity:0}.el-message-box{--el-messagebox-title-color:var(--el-text-color-primary);--el-messagebox-width:420px;--el-messagebox-border-radius:4px;--el-messagebox-font-size:var(--el-font-size-large);--el-messagebox-content-font-size:var(--el-font-size-base);--el-messagebox-content-color:var(--el-text-color-regular);--el-messagebox-error-font-size:12px;--el-messagebox-padding-primary:15px}.el-message-box{display:inline-block;width:var(--el-messagebox-width);padding-bottom:10px;vertical-align:middle;background-color:var(--el-bg-color);border-radius:var(--el-messagebox-border-radius);border:1px solid var(--el-border-color-lighter);font-size:var(--el-messagebox-font-size);box-shadow:var(--el-box-shadow-light);text-align:left;overflow:hidden;-webkit-backface-visibility:hidden;backface-visibility:hidden}.el-overlay.is-message-box .el-overlay-message-box{text-align:center;position:fixed;top:0;right:0;bottom:0;left:0;overflow:auto}.el-overlay.is-message-box .el-overlay-message-box:after{content:"";display:inline-block;height:100%;width:0;vertical-align:middle}.el-message-box.is-draggable .el-message-box__header{cursor:move;-webkit-user-select:none;user-select:none}.el-message-box__header{position:relative;padding:var(--el-messagebox-padding-primary);padding-bottom:10px}.el-message-box__title{padding-left:0;margin-bottom:0;font-size:var(--el-messagebox-font-size);line-height:1;color:var(--el-messagebox-title-color)}.el-message-box__headerbtn{position:absolute;top:var(--el-messagebox-padding-primary);right:var(--el-messagebox-padding-primary);padding:0;border:none;outline:0;background:0 0;font-size:var(--el-message-close-size,16px);cursor:pointer}.el-message-box__headerbtn .el-message-box__close{color:var(--el-color-info);font-size:inherit}.el-message-box__headerbtn:focus .el-message-box__close,.el-message-box__headerbtn:hover .el-message-box__close{color:var(--el-color-primary)}.el-message-box__content{padding:10px var(--el-messagebox-padding-primary);color:var(--el-messagebox-content-color);font-size:var(--el-messagebox-content-font-size)}.el-message-box__container{position:relative}.el-message-box__input{padding-top:15px}.el-message-box__input div.invalid>input{border-color:var(--el-color-error)}.el-message-box__input div.invalid>input:focus{border-color:var(--el-color-error)}.el-message-box__status{position:absolute;top:50%;transform:translateY(-50%);font-size:24px!important}.el-message-box__status:before{padding-left:1px}.el-message-box__status.el-icon{position:absolute}.el-message-box__status+.el-message-box__message{padding-left:36px;padding-right:12px;word-break:break-word}.el-message-box__status.el-message-box-icon--success{--el-messagebox-color:var(--el-color-success);color:var(--el-messagebox-color)}.el-message-box__status.el-message-box-icon--info{--el-messagebox-color:var(--el-color-info);color:var(--el-messagebox-color)}.el-message-box__status.el-message-box-icon--warning{--el-messagebox-color:var(--el-color-warning);color:var(--el-messagebox-color)}.el-message-box__status.el-message-box-icon--error{--el-messagebox-color:var(--el-color-error);color:var(--el-messagebox-color)}.el-message-box__message{margin:0}.el-message-box__message p{margin:0;line-height:24px}.el-message-box__errormsg{color:var(--el-color-error);font-size:var(--el-messagebox-error-font-size);min-height:18px;margin-top:2px}.el-message-box__btns{padding:5px 15px 0;display:flex;flex-wrap:wrap;justify-content:flex-end;align-items:center}.el-message-box__btns button:nth-child(2){margin-left:10px}.el-message-box__btns-reverse{flex-direction:row-reverse}.el-message-box--center .el-message-box__title{position:relative;display:flex;align-items:center;justify-content:center}.el-message-box--center .el-message-box__status{position:relative;top:auto;padding-right:5px;text-align:center;transform:translateY(-1px)}.el-message-box--center .el-message-box__message{margin-left:0}.el-message-box--center .el-message-box__btns{justify-content:center}.el-message-box--center .el-message-box__content{padding-left:calc(var(--el-messagebox-padding-primary) + 12px);padding-right:calc(var(--el-messagebox-padding-primary) + 12px);text-align:center}.fade-in-linear-enter-active .el-overlay-message-box{animation:msgbox-fade-in var(--el-transition-duration)}.fade-in-linear-leave-active .el-overlay-message-box{animation:msgbox-fade-in var(--el-transition-duration) reverse}@keyframes msgbox-fade-in{0%{transform:translate3d(0,-20px,0);opacity:0}to{transform:translateZ(0);opacity:1}}@keyframes msgbox-fade-out{0%{transform:translateZ(0);opacity:1}to{transform:translate3d(0,-20px,0);opacity:0}}.el-message{--el-message-min-width:380px;--el-message-bg-color:var(--el-color-info-light-9);--el-message-border-color:var(--el-border-color-lighter);--el-message-padding:15px 15px 15px 20px;--el-message-close-size:16px;--el-message-close-icon-color:var(--el-text-color-placeholder);--el-message-close-hover-color:var(--el-text-color-secondary)}.el-message{min-width:var(--el-message-min-width);box-sizing:border-box;border-radius:var(--el-border-radius-base);border-width:var(--el-border-width-base);border-style:var(--el-border-style-base);border-color:var(--el-message-border-color);position:fixed;left:50%;top:20px;transform:translate(-50%);transition:opacity .3s,transform .4s,top .4s;background-color:var(--el-message-bg-color);transition:opacity var(--el-transition-duration),transform .4s,top .4s;padding:var(--el-message-padding);display:flex;align-items:center}.el-message.is-center{justify-content:center}.el-message.is-closable .el-message__content{padding-right:16px}.el-message p{margin:0}.el-message--success{--el-message-bg-color:var(--el-color-success-light-9);--el-message-border-color:var(--el-color-success-light-8);--el-message-text-color:var(--el-color-success)}.el-message--success .el-message__content,.el-message .el-message-icon--success{color:var(--el-message-text-color)}.el-message--info{--el-message-bg-color:var(--el-color-info-light-9);--el-message-border-color:var(--el-color-info-light-8);--el-message-text-color:var(--el-color-info)}.el-message--info .el-message__content,.el-message .el-message-icon--info{color:var(--el-message-text-color)}.el-message--warning{--el-message-bg-color:var(--el-color-warning-light-9);--el-message-border-color:var(--el-color-warning-light-8);--el-message-text-color:var(--el-color-warning)}.el-message--warning .el-message__content,.el-message .el-message-icon--warning{color:var(--el-message-text-color)}.el-message--error{--el-message-bg-color:var(--el-color-error-light-9);--el-message-border-color:var(--el-color-error-light-8);--el-message-text-color:var(--el-color-error)}.el-message--error .el-message__content,.el-message .el-message-icon--error{color:var(--el-message-text-color)}.el-message__icon{margin-right:10px}.el-message .el-message__badge{position:absolute;top:-8px;right:-8px}.el-message__content{padding:0;font-size:14px;line-height:1}.el-message__content:focus{outline-width:0}.el-message .el-message__closeBtn{position:absolute;top:50%;right:15px;transform:translateY(-50%);cursor:pointer;color:var(--el-message-close-icon-color);font-size:var(--el-message-close-size)}.el-message .el-message__closeBtn:focus{outline-width:0}.el-message .el-message__closeBtn:hover{color:var(--el-message-close-hover-color)}.el-message-fade-enter-from,.el-message-fade-leave-to{opacity:0;transform:translate(-50%,-100%)}.el-notification{--el-notification-width:330px;--el-notification-padding:14px 26px 14px 13px;--el-notification-radius:8px;--el-notification-shadow:var(--el-box-shadow-light);--el-notification-border-color:var(--el-border-color-lighter);--el-notification-icon-size:24px;--el-notification-close-font-size:var(--el-message-close-size, 16px);--el-notification-group-margin-left:13px;--el-notification-group-margin-right:8px;--el-notification-content-font-size:var(--el-font-size-base);--el-notification-content-color:var(--el-text-color-regular);--el-notification-title-font-size:16px;--el-notification-title-color:var(--el-text-color-primary);--el-notification-close-color:var(--el-text-color-secondary);--el-notification-close-hover-color:var(--el-text-color-regular)}.el-notification{display:flex;width:var(--el-notification-width);padding:var(--el-notification-padding);border-radius:var(--el-notification-radius);box-sizing:border-box;border:1px solid var(--el-notification-border-color);position:fixed;background-color:var(--el-bg-color-overlay);box-shadow:var(--el-notification-shadow);transition:opacity var(--el-transition-duration),transform var(--el-transition-duration),left var(--el-transition-duration),right var(--el-transition-duration),top .4s,bottom var(--el-transition-duration);overflow-wrap:anywhere;overflow:hidden;z-index:9999}.el-notification.right{right:16px}.el-notification.left{left:16px}.el-notification__group{margin-left:var(--el-notification-group-margin-left);margin-right:var(--el-notification-group-margin-right)}.el-notification__title{font-weight:700;font-size:var(--el-notification-title-font-size);line-height:var(--el-notification-icon-size);color:var(--el-notification-title-color);margin:0}.el-notification__content{font-size:var(--el-notification-content-font-size);line-height:24px;margin:6px 0 0;color:var(--el-notification-content-color);text-align:justify}.el-notification__content p{margin:0}.el-notification .el-notification__icon{height:var(--el-notification-icon-size);width:var(--el-notification-icon-size);font-size:var(--el-notification-icon-size)}.el-notification .el-notification__closeBtn{position:absolute;top:18px;right:15px;cursor:pointer;color:var(--el-notification-close-color);font-size:var(--el-notification-close-font-size)}.el-notification .el-notification__closeBtn:hover{color:var(--el-notification-close-hover-color)}.el-notification .el-notification--success{--el-notification-icon-color:var(--el-color-success);color:var(--el-notification-icon-color)}.el-notification .el-notification--info{--el-notification-icon-color:var(--el-color-info);color:var(--el-notification-icon-color)}.el-notification .el-notification--warning{--el-notification-icon-color:var(--el-color-warning);color:var(--el-notification-icon-color)}.el-notification .el-notification--error{--el-notification-icon-color:var(--el-color-error);color:var(--el-notification-icon-color)}.el-notification-fade-enter-from.right{right:0;transform:translate(100%)}.el-notification-fade-enter-from.left{left:0;transform:translate(-100%)}.el-notification-fade-leave-to{opacity:0}.el-overlay{position:fixed;top:0;right:0;bottom:0;left:0;z-index:2000;height:100%;background-color:var(--el-overlay-color-lighter);overflow:auto}.el-overlay .el-overlay-root{height:0}.el-page-header{display:flex;line-height:24px}.el-page-header__left{display:flex;cursor:pointer;margin-right:40px;position:relative}.el-page-header__left:after{content:"";position:absolute;width:1px;height:16px;right:-20px;top:50%;transform:translateY(-50%);background-color:var(--el-border-color)}.el-page-header__icon{font-size:18px;margin-right:6px;display:flex;align-items:center}.el-page-header__icon .el-icon{font-size:inherit}.el-page-header__title{font-size:14px;font-weight:500}.el-page-header__content{font-size:18px;color:var(--el-text-color-primary)}.el-pagination{--el-pagination-font-size:14px;--el-pagination-bg-color:var(--el-fill-color-blank);--el-pagination-text-color:var(--el-text-color-primary);--el-pagination-border-radius:3px;--el-pagination-button-color:var(--el-text-color-primary);--el-pagination-button-width:32px;--el-pagination-button-height:32px;--el-pagination-button-disabled-color:var(--el-text-color-placeholder);--el-pagination-button-disabled-bg-color:var(--el-fill-color-blank);--el-pagination-button-bg-color:var(--el-fill-color);--el-pagination-hover-color:var(--el-color-primary);--el-pagination-height-extra-small:24px;--el-pagination-line-height-extra-small:var(--el-pagination-height-extra-small);white-space:nowrap;padding:2px 5px;color:var(--el-pagination-text-color);font-weight:400;display:flex;align-items:center}.el-pagination:after,.el-pagination:before{display:table;content:""}.el-pagination:after{clear:both}.el-pagination button,.el-pagination span:not([class*=suffix]){display:flex;justify-content:center;align-items:center;font-size:var(--el-pagination-font-size);min-width:var(--el-pagination-button-width);height:var(--el-pagination-button-height);line-height:var(--el-pagination-button-height);box-sizing:border-box}.el-pagination .el-input__inner{text-align:center;-moz-appearance:textfield;line-height:normal}.el-pagination .el-select .el-input{width:128px}.el-pagination button{border:none;padding:0 6px;background:0 0}.el-pagination button:focus{outline:0}.el-pagination button:hover{color:var(--el-pagination-hover-color)}.el-pagination button:disabled{color:var(--el-pagination-button-disabled-color);background-color:var(--el-pagination-button-disabled-bg-color);cursor:not-allowed}.el-pagination .btn-next,.el-pagination .btn-prev{background:center center no-repeat;background-size:16px;background-color:var(--el-pagination-bg-color);cursor:pointer;margin:0;color:var(--el-pagination-button-color)}.el-pagination .btn-next .el-icon,.el-pagination .btn-prev .el-icon{display:block;font-size:12px;font-weight:700;width:inherit}.el-pagination .el-pager li.is-disabled{color:var(--el-text-color-placeholder);cursor:not-allowed}.el-pagination--small .btn-next,.el-pagination--small .btn-prev,.el-pagination--small .el-pager li,.el-pagination--small .el-pager li.btn-quicknext,.el-pagination--small .el-pager li.btn-quickprev,.el-pagination--small .el-pager li:last-child{border-color:transparent;font-size:var(--el-font-size-extra-small);line-height:var(--el-pagination-line-height-extra-small);height:var(--el-pagination-height-extra-small);min-width:24px}.el-pagination--small .arrow.is-disabled{visibility:hidden}.el-pagination--small .more:before,.el-pagination--small li.more:before{line-height:var(--el-pagination-line-height-extra-small)}.el-pagination--small button,.el-pagination--small span:not([class*=suffix]){height:var(--el-pagination-height-extra-small);line-height:var(--el-pagination-line-height-extra-small);font-size:var(--el-font-size-extra-small)}.el-pagination--small .el-pagination__editor{height:var(--el-pagination-line-height-extra-small)}.el-pagination--small .el-pagination__editor.el-input .el-input__inner{height:var(--el-pagination-height-extra-small)}.el-pagination--small .el-input--small,.el-pagination--small .el-input__inner{height:var(--el-pagination-height-extra-small)!important;line-height:var(--el-pagination-line-height-extra-small)}.el-pagination--small .el-input__suffix,.el-pagination--small .el-input__suffix .el-input__suffix-inner,.el-pagination--small .el-input__suffix .el-input__suffix-inner i.el-select__caret{line-height:var(--el-pagination-line-height-extra-small)}.el-pagination--small .el-select .el-input{width:100px}.el-pagination__sizes{margin:0 16px 0 0;font-weight:400;color:var(--el-text-color-regular)}.el-pagination__sizes+button.btn-prev[type=button]{margin-left:0}.el-pagination__sizes+.el-pager .number:first-child{margin-left:0}.el-pagination__sizes+.el-pager .number:last-child{margin-right:0}.el-pagination__total{margin-right:16px;font-weight:400;color:var(--el-text-color-regular)}.el-pagination__total+button.btn-prev[type=button]{margin-left:0}.el-pagination__total+.el-pager .number:first-child{margin-left:0}.el-pagination__total+.el-pager .number:last-child{margin-right:0}.el-pagination__total[disabled=true]{color:var(--el-text-color-placeholder)}.el-pagination__jump{margin-left:16px;font-weight:400;color:var(--el-text-color-regular)}.el-pagination__jump .el-input__inner{padding:0 3px}.el-pagination__jump[disabled=true]{color:var(--el-text-color-placeholder)}.el-pagination__rightwrapper{flex:1;display:flex;align-items:center;justify-content:flex-end}.el-pagination__editor{line-height:18px;margin:0 8px;height:var(--el-pagination-button-height);min-width:56px;text-align:center;box-sizing:border-box;border-radius:var(--el-pagination-border-radius)}.el-pagination__editor.el-input{width:50px}.el-pagination__editor.el-input .el-input__inner{height:var(--el-pagination-button-height)}.el-pagination__editor .el-input__inner::-webkit-inner-spin-button,.el-pagination__editor .el-input__inner::-webkit-outer-spin-button{-webkit-appearance:none;margin:0}.el-pagination.is-background .btn-next,.el-pagination.is-background .btn-prev,.el-pagination.is-background .el-pager li{margin:0 4px;background-color:var(--el-pagination-button-bg-color);color:var(--el-text-color-regular);min-width:32px;border-radius:2px}.el-pagination.is-background .btn-next.is-disabled,.el-pagination.is-background .btn-prev.is-disabled,.el-pagination.is-background .el-pager li.is-disabled{color:var(--el-text-color-placeholder);background-color:var(--el-disabled-bg-color)}.el-pagination.is-background .btn-next.is-first,.el-pagination.is-background .btn-prev.is-first,.el-pagination.is-background .el-pager li.is-first{margin-left:0}.el-pagination.is-background .btn-next.is-last,.el-pagination.is-background .btn-prev.is-last,.el-pagination.is-background .el-pager li.is-last{margin-right:0}.el-pagination.is-background .btn-next,.el-pagination.is-background .btn-prev{padding:0}.el-pagination.is-background .btn-next:disabled,.el-pagination.is-background .btn-prev:disabled{color:var(--el-text-color-placeholder);background-color:var(--el-disabled-bg-color)}.el-pagination.is-background .btn-next:hover:not([disabled]),.el-pagination.is-background .btn-prev:hover:not([disabled]){color:var(--el-pagination-hover-color)}.el-pagination.is-background .el-pager li:not(.is-disabled):hover{color:var(--el-pagination-hover-color)}.el-pagination.is-background .el-pager li:not(.is-disabled).is-active{background-color:var(--el-color-primary);color:var(--el-color-white);font-weight:700}.el-pagination.is-background.el-pagination--small .btn-next,.el-pagination.is-background.el-pagination--small .btn-prev,.el-pagination.is-background.el-pagination--small .el-pager li{min-width:24px}.el-pager{-webkit-user-select:none;user-select:none;list-style:none;font-size:0;padding:0;margin:0;display:flex;align-items:center}.el-pager li{padding:0 4px;background:var(--el-pagination-bg-color);display:flex;justify-content:center;align-items:center;font-size:var(--el-pagination-font-size);min-width:var(--el-pagination-button-width);height:var(--el-pagination-button-height);line-height:var(--el-pagination-button-height);box-sizing:border-box;cursor:pointer;text-align:center}.el-pager li.btn-quickprev:hover,.el-pager li.btn-quicknext:hover{cursor:pointer}.el-pager li.btn-quicknext,.el-pager li.btn-quickprev{line-height:32px;color:var(--el-pagination-button-color)}.el-pager li.btn-quicknext.is-disabled,.el-pager li.btn-quickprev.is-disabled{color:var(--el-text-color-placeholder);cursor:not-allowed}.el-pager li.btn-quicknext svg,.el-pager li.btn-quickprev svg{pointer-events:none}.el-pager li.is-active+li{border-left:0}.el-pager li:focus-visible{outline:1px solid var(--el-pagination-hover-color)}.el-pager li:hover{color:var(--el-pagination-hover-color)}.el-pager li.is-active{color:var(--el-pagination-hover-color);cursor:default}.el-pager+button.btn-next[type=button]{margin-right:0}.el-popconfirm__main{display:flex;align-items:center}.el-popconfirm__icon{margin-right:5px}.el-popconfirm__action{text-align:right;margin-top:8px}.el-popover{--el-popover-bg-color:var(--el-color-white);--el-popover-font-size:var(--el-font-size-base);--el-popover-border-color:var(--el-border-color-lighter);--el-popover-padding:12px;--el-popover-padding-large:18px 20px;--el-popover-title-font-size:16px;--el-popover-title-text-color:var(--el-text-color-primary);--el-popover-border-radius:4px}.el-popover.el-popper{background:var(--el-popover-bg-color);min-width:150px;border-radius:var(--el-popover-border-radius);border:1px solid var(--el-popover-border-color);padding:var(--el-popover-padding);z-index:var(--el-index-popper);color:var(--el-text-color-regular);line-height:1.4;text-align:justify;font-size:var(--el-popover-font-size);box-shadow:var(--el-box-shadow-light);word-break:break-all}.el-popover.el-popper--plain{padding:var(--el-popover-padding-large)}.el-popover__title{color:var(--el-popover-title-text-color);font-size:var(--el-popover-title-font-size);line-height:1;margin-bottom:12px}.el-popover__reference:focus:hover,.el-popover__reference:focus:not(.focusing){outline-width:0}.el-popover.el-popper:focus,.el-popover.el-popper:focus:active{outline-width:0}.el-progress{position:relative;line-height:1;display:flex;align-items:center}.el-progress__text{font-size:14px;color:var(--el-text-color-regular);margin-left:5px;min-width:50px;line-height:1}.el-progress__text i{vertical-align:middle;display:block}.el-progress--circle,.el-progress--dashboard{display:inline-block}.el-progress--circle .el-progress__text,.el-progress--dashboard .el-progress__text{position:absolute;top:50%;left:0;width:100%;text-align:center;margin:0;transform:translateY(-50%)}.el-progress--circle .el-progress__text i,.el-progress--dashboard .el-progress__text i{vertical-align:middle;display:inline-block}.el-progress--without-text .el-progress__text{display:none}.el-progress--without-text .el-progress-bar{padding-right:0;margin-right:0;display:block}.el-progress--text-inside .el-progress-bar{padding-right:0;margin-right:0}.el-progress.is-success .el-progress-bar__inner{background-color:var(--el-color-success)}.el-progress.is-success .el-progress__text{color:var(--el-color-success)}.el-progress.is-warning .el-progress-bar__inner{background-color:var(--el-color-warning)}.el-progress.is-warning .el-progress__text{color:var(--el-color-warning)}.el-progress.is-exception .el-progress-bar__inner{background-color:var(--el-color-danger)}.el-progress.is-exception .el-progress__text{color:var(--el-color-danger)}.el-progress-bar{flex-grow:1;box-sizing:border-box}.el-progress-bar__outer{height:6px;border-radius:100px;background-color:var(--el-border-color-lighter);overflow:hidden;position:relative;vertical-align:middle}.el-progress-bar__inner{position:absolute;left:0;top:0;height:100%;background-color:var(--el-color-primary);text-align:right;border-radius:100px;line-height:1;white-space:nowrap;transition:width .6s ease}.el-progress-bar__inner:after{display:inline-block;content:"";height:100%;vertical-align:middle}.el-progress-bar__inner--indeterminate{transform:translateZ(0);animation:indeterminate 3s infinite}.el-progress-bar__innerText{display:inline-block;vertical-align:middle;color:#fff;font-size:12px;margin:0 5px}@keyframes progress{0%{background-position:0 0}to{background-position:32px 0}}@keyframes indeterminate{0%{left:-100%}to{left:100%}}.el-radio-button{--el-radio-button-checked-bg-color:var(--el-color-primary);--el-radio-button-checked-text-color:var(--el-color-white);--el-radio-button-checked-border-color:var(--el-color-primary);--el-radio-button-disabled-checked-fill:var(--el-border-color-extra-light)}.el-radio-button{position:relative;display:inline-block;outline:0}.el-radio-button__inner{display:inline-block;line-height:1;white-space:nowrap;vertical-align:middle;background:var(--el-button-bg-color,var(--el-fill-color-blank));border:var(--el-border);font-weight:var(--el-button-font-weight,var(--el-font-weight-primary));border-left:0;color:var(--el-button-text-color,var(--el-text-color-regular));-webkit-appearance:none;text-align:center;box-sizing:border-box;outline:0;margin:0;position:relative;cursor:pointer;transition:var(--el-transition-all);-webkit-user-select:none;user-select:none;padding:8px 15px;font-size:var(--el-font-size-base);border-radius:0}.el-radio-button__inner.is-round{padding:8px 15px}.el-radio-button__inner:hover{color:var(--el-color-primary)}.el-radio-button__inner [class*=el-icon-]{line-height:.9}.el-radio-button__inner [class*=el-icon-]+span{margin-left:5px}.el-radio-button:first-child .el-radio-button__inner{border-left:var(--el-border);border-radius:var(--el-border-radius-base) 0 0 var(--el-border-radius-base);box-shadow:none!important}.el-radio-button__original-radio{opacity:0;outline:0;position:absolute;z-index:-1}.el-radio-button__original-radio:checked+.el-radio-button__inner{color:var(--el-radio-button-checked-text-color,var(--el-color-white));background-color:var(--el-radio-button-checked-bg-color,var(--el-color-primary));border-color:var(--el-radio-button-checked-border-color,var(--el-color-primary));box-shadow:-1px 0 0 0 var(--el-radio-button-checked-border-color,var(--el-color-primary))}.el-radio-button__original-radio:disabled+.el-radio-button__inner{color:var(--el-disabled-text-color);cursor:not-allowed;background-image:none;background-color:var(--el-button-disabled-bg-color,var(--el-fill-color-blank));border-color:var(--el-button-disabled-border-color,var(--el-border-color-light));box-shadow:none}.el-radio-button__original-radio:disabled:checked+.el-radio-button__inner{background-color:var(--el-radio-button-disabled-checked-fill)}.el-radio-button:last-child .el-radio-button__inner{border-radius:0 var(--el-border-radius-base) var(--el-border-radius-base) 0}.el-radio-button:first-child:last-child .el-radio-button__inner{border-radius:var(--el-border-radius-base)}.el-radio-button--large .el-radio-button__inner{padding:12px 19px;font-size:var(--el-font-size-base);border-radius:0}.el-radio-button--large .el-radio-button__inner.is-round{padding:12px 19px}.el-radio-button--small .el-radio-button__inner{padding:5px 11px;font-size:12px;border-radius:0}.el-radio-button--small .el-radio-button__inner.is-round{padding:5px 11px}.el-radio-button:focus:not(.is-focus):not(:active):not(.is-disabled){box-shadow:0 0 2px 2px var(--el-radio-button-checked-border-color)}.el-radio-group{display:inline-flex;align-items:center;flex-wrap:wrap;font-size:0}.el-radio{--el-radio-font-size:var(--el-font-size-base);--el-radio-text-color:var(--el-text-color-regular);--el-radio-font-weight:var(--el-font-weight-primary);--el-radio-input-height:14px;--el-radio-input-width:14px;--el-radio-input-border-radius:var(--el-border-radius-circle);--el-radio-input-bg-color:var(--el-fill-color-blank);--el-radio-input-border:var(--el-border);--el-radio-input-border-color:var(--el-border-color);--el-radio-input-border-color-hover:var(--el-color-primary)}.el-radio{color:var(--el-radio-text-color);font-weight:var(--el-radio-font-weight);position:relative;cursor:pointer;display:inline-flex;align-items:center;white-space:nowrap;outline:0;font-size:var(--el-font-size-base);-webkit-user-select:none;user-select:none;margin-right:32px;height:32px}.el-radio.el-radio--large{height:40px}.el-radio.el-radio--small{height:24px}.el-radio.is-bordered{padding:0 15px 0 9px;border-radius:var(--el-border-radius-base);border:var(--el-border);box-sizing:border-box}.el-radio.is-bordered.is-checked{border-color:var(--el-color-primary)}.el-radio.is-bordered.is-disabled{cursor:not-allowed;border-color:var(--el-border-color-lighter)}.el-radio.is-bordered.el-radio--large{padding:0 19px 0 11px;border-radius:var(--el-border-radius-base)}.el-radio.is-bordered.el-radio--large .el-radio__label{font-size:var(--el-font-size-base)}.el-radio.is-bordered.el-radio--large .el-radio__inner{height:14px;width:14px}.el-radio.is-bordered.el-radio--small{padding:0 11px 0 7px;border-radius:var(--el-border-radius-base)}.el-radio.is-bordered.el-radio--small .el-radio__label{font-size:12px}.el-radio.is-bordered.el-radio--small .el-radio__inner{height:12px;width:12px}.el-radio:last-child{margin-right:0}.el-radio__input{white-space:nowrap;cursor:pointer;outline:0;display:inline-flex;position:relative;vertical-align:middle}.el-radio__input.is-disabled .el-radio__inner{background-color:var(--el-disabled-bg-color);border-color:var(--el-disabled-border-color);cursor:not-allowed}.el-radio__input.is-disabled .el-radio__inner:after{cursor:not-allowed;background-color:var(--el-disabled-bg-color)}.el-radio__input.is-disabled .el-radio__inner+.el-radio__label{cursor:not-allowed}.el-radio__input.is-disabled.is-checked .el-radio__inner{background-color:var(--el-disabled-bg-color);border-color:var(--el-disabled-border-color)}.el-radio__input.is-disabled.is-checked .el-radio__inner:after{background-color:var(--el-text-color-placeholder)}.el-radio__input.is-disabled+span.el-radio__label{color:var(--el-text-color-placeholder);cursor:not-allowed}.el-radio__input.is-checked .el-radio__inner{border-color:var(--el-color-primary);background:var(--el-color-primary)}.el-radio__input.is-checked .el-radio__inner:after{transform:translate(-50%,-50%) scale(1)}.el-radio__input.is-checked+.el-radio__label{color:var(--el-color-primary)}.el-radio__input.is-focus .el-radio__inner{border-color:var(--el-radio-input-border-color-hover)}.el-radio__inner{border:var(--el-radio-input-border);border-radius:var(--el-radio-input-border-radius);width:var(--el-radio-input-width);height:var(--el-radio-input-height);background-color:var(--el-radio-input-bg-color);position:relative;cursor:pointer;display:inline-block;box-sizing:border-box}.el-radio__inner:hover{border-color:var(--el-radio-input-border-color-hover)}.el-radio__inner:after{width:4px;height:4px;border-radius:var(--el-radio-input-border-radius);background-color:var(--el-color-white);content:"";position:absolute;left:50%;top:50%;transform:translate(-50%,-50%) scale(0);transition:transform .15s ease-in}.el-radio__original{opacity:0;outline:0;position:absolute;z-index:-1;top:0;left:0;right:0;bottom:0;margin:0}.el-radio:focus:not(.is-focus):not(:active):not(.is-disabled) .el-radio__inner{box-shadow:0 0 2px 2px var(--el-radio-input-border-color-hover)}.el-radio__label{font-size:var(--el-radio-font-size);padding-left:8px}.el-radio.el-radio--large .el-radio__label{font-size:14px}.el-radio.el-radio--large .el-radio__inner{width:14px;height:14px}.el-radio.el-radio--small .el-radio__label{font-size:12px}.el-radio.el-radio--small .el-radio__inner{width:12px;height:12px}.el-rate{--el-rate-height:20px;--el-rate-font-size:var(--el-font-size-base);--el-rate-icon-size:18px;--el-rate-icon-margin:6px;--el-rate-void-color:var(--el-border-color-darker);--el-rate-fill-color:#f7ba2a;--el-rate-disabled-void-color:var(--el-fill-color);--el-rate-text-color:var(--el-text-color-primary)}.el-rate{display:inline-flex;align-items:center;height:32px}.el-rate:active,.el-rate:focus{outline-width:0}.el-rate__item{cursor:pointer;display:inline-block;position:relative;font-size:0;vertical-align:middle;color:var(--el-rate-void-color)}.el-rate .el-rate__icon{position:relative;display:inline-block;font-size:var(--el-rate-icon-size);margin-right:var(--el-rate-icon-margin);transition:var(--el-transition-duration)}.el-rate .el-rate__icon.hover{transform:scale(1.15)}.el-rate .el-rate__icon .path2{position:absolute;left:0;top:0}.el-rate .el-rate__icon.is-active{color:var(--el-rate-fill-color)}.el-rate__decimal{position:absolute;top:0;left:0;display:inline-block;overflow:hidden;color:var(--el-rate-fill-color)}.el-rate__text{font-size:var(--el-rate-font-size);vertical-align:middle;color:var(--el-rate-text-color)}.el-rate--large{height:40px}.el-rate--small{height:24px}.el-rate.is-disabled .el-rate__item{cursor:auto;color:var(--el-rate-disabled-void-color)}.el-result{--el-result-padding:40px 30px;--el-result-icon-font-size:64px;--el-result-title-font-size:20px;--el-result-title-margin-top:20px;--el-result-subtitle-margin-top:10px;--el-result-extra-margin-top:30px}.el-result{display:flex;justify-content:center;align-items:center;flex-direction:column;text-align:center;box-sizing:border-box;padding:var(--el-result-padding)}.el-result__icon svg{width:var(--el-result-icon-font-size);height:var(--el-result-icon-font-size)}.el-result__title{margin-top:var(--el-result-title-margin-top)}.el-result__title p{margin:0;font-size:var(--el-result-title-font-size);color:var(--el-text-color-primary);line-height:1.3}.el-result__subtitle{margin-top:var(--el-result-subtitle-margin-top)}.el-result__subtitle p{margin:0;font-size:var(--el-font-size-base);color:var(--el-text-color-regular);line-height:1.3}.el-result__extra{margin-top:var(--el-result-extra-margin-top)}.el-result .icon-success{--el-result-color:var(--el-color-success);color:var(--el-result-color)}.el-result .icon-warning{--el-result-color:var(--el-color-warning);color:var(--el-result-color)}.el-result .icon-danger{--el-result-color:var(--el-color-danger);color:var(--el-result-color)}.el-result .icon-info{--el-result-color:var(--el-color-info);color:var(--el-result-color)}.el-result .icon-error{--el-result-color:var(--el-color-error);color:var(--el-result-color)}.el-row{display:flex;flex-wrap:wrap;position:relative;box-sizing:border-box}.el-row.is-justify-center{justify-content:center}.el-row.is-justify-end{justify-content:flex-end}.el-row.is-justify-space-between{justify-content:space-between}.el-row.is-justify-space-around{justify-content:space-around}.el-row.is-justify-space-evenly{justify-content:space-evenly}.el-row.is-align-middle{align-items:center}.el-row.is-align-bottom{align-items:flex-end}.el-scrollbar{--el-scrollbar-opacity:.3;--el-scrollbar-bg-color:var(--el-text-color-secondary);--el-scrollbar-hover-opacity:.5;--el-scrollbar-hover-bg-color:var(--el-text-color-secondary)}.el-scrollbar{overflow:hidden;position:relative;height:100%}.el-scrollbar__wrap{overflow:auto;height:100%}.el-scrollbar__wrap--hidden-default{scrollbar-width:none}.el-scrollbar__wrap--hidden-default::-webkit-scrollbar{display:none}.el-scrollbar__thumb{position:relative;display:block;width:0;height:0;cursor:pointer;border-radius:inherit;background-color:var(--el-scrollbar-bg-color,var(--el-text-color-secondary));transition:var(--el-transition-duration) background-color;opacity:var(--el-scrollbar-opacity,.3)}.el-scrollbar__thumb:hover{background-color:var(--el-scrollbar-hover-bg-color,var(--el-text-color-secondary));opacity:var(--el-scrollbar-hover-opacity,.5)}.el-scrollbar__bar{position:absolute;right:2px;bottom:2px;z-index:1;border-radius:4px}.el-scrollbar__bar.is-vertical{width:6px;top:2px}.el-scrollbar__bar.is-vertical>div{width:100%}.el-scrollbar__bar.is-horizontal{height:6px;left:2px}.el-scrollbar__bar.is-horizontal>div{height:100%}.el-scrollbar-fade-enter-active{transition:opacity .34s ease-out}.el-scrollbar-fade-leave-active{transition:opacity .12s ease-out}.el-scrollbar-fade-enter-from,.el-scrollbar-fade-leave-active{opacity:0}.el-select-dropdown__option-item:hover:not(.hover){background-color:transparent}.el-select-dropdown__list{list-style:none;margin:6px 0!important;padding:0!important;box-sizing:border-box}.el-select-dropdown__option-item{font-size:var(--el-select-font-size);padding:0 32px 0 20px;position:relative;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;color:var(--el-text-color-regular);height:34px;line-height:34px;box-sizing:border-box;cursor:pointer}.el-select-dropdown__option-item.is-disabled{color:var(--el-text-color-placeholder);cursor:not-allowed}.el-select-dropdown__option-item.is-disabled:hover{background-color:var(--el-color-white)}.el-select-dropdown__option-item.is-selected{background-color:var(--el-fill-color-light);font-weight:700}.el-select-dropdown__option-item.is-selected:not(.is-multiple){color:var(--el-color-primary)}.el-select-dropdown__option-item.hover{background-color:var(--el-fill-color-light)!important}.el-select-dropdown__option-item:hover{background-color:var(--el-fill-color-light)}.el-select-dropdown.is-multiple .el-select-dropdown__option-item.is-selected{color:var(--el-color-primary);background-color:var(--el-bg-color-overlay)}.el-select-dropdown.is-multiple .el-select-dropdown__option-item.is-selected .el-icon{position:absolute;right:20px;top:0;height:inherit;font-size:12px}.el-select-dropdown.is-multiple .el-select-dropdown__option-item.is-selected .el-icon svg{height:inherit;vertical-align:middle}.el-select-group{margin:0;padding:0}.el-select-group__wrap{position:relative;list-style:none;margin:0;padding:0}.el-select-group__wrap:not(:last-of-type){padding-bottom:24px}.el-select-group__wrap:not(:last-of-type):after{content:"";position:absolute;display:block;left:20px;right:20px;bottom:12px;height:1px;background:var(--el-border-color-light)}.el-select-group__split-dash{position:absolute;left:20px;right:20px;height:1px;background:var(--el-border-color-light)}.el-select-group__title{padding-left:20px;font-size:12px;color:var(--el-color-info);line-height:30px}.el-select-group .el-select-dropdown__item{padding-left:20px}.el-select-v2{--el-select-border-color-hover:var(--el-border-color-hover);--el-select-disabled-border:var(--el-disabled-border-color);--el-select-font-size:var(--el-font-size-base);--el-select-close-hover-color:var(--el-text-color-secondary);--el-select-input-color:var(--el-text-color-placeholder);--el-select-multiple-input-color:var(--el-text-color-regular);--el-select-input-focus-border-color:var(--el-color-primary);--el-select-input-font-size:14px}.el-select-v2{display:inline-block;position:relative;vertical-align:middle;font-size:14px}.el-select-v2__wrapper{display:flex;align-items:center;flex-wrap:wrap;box-sizing:border-box;cursor:pointer;padding:1px 30px 1px 0;border:1px solid var(--el-border-color);border-radius:var(--el-border-radius-base);transition:border-color var(--el-transition-duration-fast) var(--el-ease-in-out-bezier-function)}.el-select-v2__wrapper:hover{border-color:var(--el-text-color-placeholder)}.el-select-v2__wrapper.is-filterable{cursor:text}.el-select-v2__wrapper.is-focused{border-color:var(--el-color-primary)}.el-select-v2__wrapper.is-hovering:not(.is-focused){border-color:var(--el-text-color-placeholder)}.el-select-v2__wrapper.is-disabled{cursor:not-allowed;background-color:var(--el-fill-color-light);color:var(--el-text-color-placeholder);border-color:var(--el-select-disabled-border)}.el-select-v2__wrapper.is-disabled:hover{border-color:var(--el-select-disabled-border)}.el-select-v2__wrapper.is-disabled.is-focus{border-color:var(--el-input-focus-border-color)}.el-select-v2__wrapper.is-disabled .is-transparent{opacity:1;-webkit-user-select:none;user-select:none}.el-select-v2__wrapper.is-disabled .el-select-v2__caret,.el-select-v2__wrapper.is-disabled .el-select-v2__combobox-input{cursor:not-allowed}.el-select-v2__wrapper .el-select-v2__input-wrapper{box-sizing:border-box;position:relative;margin-inline-start:12px;max-width:100%;overflow:hidden}.el-select-v2__wrapper,.el-select-v2__wrapper .el-select-v2__input-wrapper{line-height:32px}.el-select-v2__wrapper .el-select-v2__input-wrapper input{line-height:24px;height:24px;min-width:4px;width:100%;background-color:transparent;-webkit-appearance:none;appearance:none;background:0 0;border:none;margin:2px 0;outline:0;padding:0}.el-select-v2 .el-select-v2__tags-text{text-overflow:ellipsis;display:inline-flex;justify-content:center;align-items:center;overflow:hidden}.el-select-v2__empty{padding:10px 0;margin:0;text-align:center;color:var(--el-text-color-secondary);font-size:14px}.el-select-v2__popper.el-popper[role=tooltip]{background:var(--el-bg-color-overlay);border:1px solid var(--el-border-color-light);box-shadow:var(--el-box-shadow-light)}.el-select-v2__popper.el-popper[role=tooltip] .el-popper__arrow:before{border:1px solid var(--el-border-color-light)}.el-select-v2__popper.el-popper[role=tooltip][data-popper-placement^=top] .el-popper__arrow:before{border-top-color:transparent;border-left-color:transparent}.el-select-v2__popper.el-popper[role=tooltip][data-popper-placement^=bottom] .el-popper__arrow:before{border-bottom-color:transparent;border-right-color:transparent}.el-select-v2__popper.el-popper[role=tooltip][data-popper-placement^=left] .el-popper__arrow:before{border-left-color:transparent;border-bottom-color:transparent}.el-select-v2__popper.el-popper[role=tooltip][data-popper-placement^=right] .el-popper__arrow:before{border-right-color:transparent;border-top-color:transparent}.el-select-v2--large .el-select-v2__wrapper .el-select-v2__combobox-input{height:32px}.el-select-v2--large .el-select-v2__caret,.el-select-v2--large .el-select-v2__suffix{height:40px}.el-select-v2--large .el-select-v2__placeholder{font-size:14px;line-height:40px}.el-select-v2--small .el-select-v2__wrapper .el-select-v2__combobox-input{height:16px}.el-select-v2--small .el-select-v2__caret,.el-select-v2--small .el-select-v2__suffix{height:24px}.el-select-v2--small .el-select-v2__placeholder{font-size:12px;line-height:24px}.el-select-v2 .el-select-v2__selection>span{display:inline-block}.el-select-v2:hover .el-select-v2__combobox-input{border-color:var(--el-select-border-color-hover)}.el-select-v2 .el-select__selection-text{text-overflow:ellipsis;display:inline-block;overflow-x:hidden;vertical-align:bottom}.el-select-v2 .el-select-v2__combobox-input{padding-right:35px;display:block}.el-select-v2 .el-select-v2__combobox-input:focus{border-color:var(--el-select-input-focus-border-color)}.el-select-v2__input{border:none;outline:0;padding:0;margin-left:15px;color:var(--el-select-multiple-input-color);font-size:var(--el-select-font-size);-webkit-appearance:none;appearance:none;height:28px}.el-select-v2__input.is-small{height:14px}.el-select-v2__close{cursor:pointer;position:absolute;top:8px;z-index:var(--el-index-top);right:25px;color:var(--el-select-input-color);line-height:18px;font-size:var(--el-select-input-font-size)}.el-select-v2__close:hover{color:var(--el-select-close-hover-color)}.el-select-v2__suffix{display:inline-flex;position:absolute;right:12px;height:32px;top:50%;transform:translateY(-50%);color:var(--el-input-icon-color,var(--el-text-color-placeholder))}.el-select-v2__caret{color:var(--el-select-input-color);font-size:var(--el-select-input-font-size);transition:transform var(--el-transition-duration);transform:rotate(180deg);cursor:pointer}.el-select-v2__caret.is-reverse{transform:rotate(0)}.el-select-v2__caret.is-show-close{font-size:var(--el-select-font-size);text-align:center;transform:rotate(180deg);border-radius:var(--el-border-radius-circle);color:var(--el-select-input-color);transition:var(--el-transition-color)}.el-select-v2__caret.is-show-close:hover{color:--el-select-close-hover-color}.el-select-v2__caret.el-icon{height:inherit}.el-select-v2__caret.el-icon svg{vertical-align:middle}.el-select-v2__selection{white-space:normal;z-index:var(--el-index-normal);display:flex;align-items:center;flex-wrap:wrap}.el-select-v2__wrapper{background-color:var(--el-fill-color-blank);border:1px solid var(--el-border-color);border-radius:var(--el-border-radius-base);position:relative;transition:all var(--el-transition-duration) var(--el-ease-in-out-bezier-function)}.el-select-v2__input-calculator{left:0;position:absolute;top:0;visibility:hidden;white-space:pre;z-index:999}.el-select-v2__selected-item{line-height:inherit;height:inherit;-webkit-user-select:none;user-select:none;display:flex}.el-select-v2__placeholder{position:absolute;top:50%;transform:translateY(-50%);margin-inline-start:12px;width:calc(100% - 52px);overflow:hidden;text-overflow:ellipsis;white-space:nowrap;color:var(--el-input-text-color,var(--el-text-color-regular))}.el-select-v2__placeholder.is-transparent{color:var(--el-text-color-placeholder)}.el-select-v2 .el-select-v2__selection .el-tag{box-sizing:border-box;border-color:transparent;margin:2px 0 2px 6px;background-color:var(--el-fill-color)}.el-select-v2 .el-select-v2__selection .el-tag .el-icon-close{background-color:var(--el-text-color-placeholder);right:-7px;color:var(--el-color-white)}.el-select-v2 .el-select-v2__selection .el-tag .el-icon-close:hover{background-color:var(--el-text-color-secondary)}.el-select-v2 .el-select-v2__selection .el-tag .el-icon-close:before{display:block;transform:translateY(.5px)}.el-select-v2.el-select-v2--small .el-select-v2__selection .el-tag{margin:1px 0 1px 6px;height:18px}.el-select-dropdown{z-index:calc(var(--el-index-top) + 1);border-radius:var(--el-border-radius-base);box-sizing:border-box}.el-select-dropdown.is-multiple .el-select-dropdown__item.selected{color:var(--el-color-primary);background-color:var(--el-bg-color-overlay)}.el-select-dropdown.is-multiple .el-select-dropdown__item.selected.hover{background-color:var(--el-fill-color-light)}.el-select-dropdown.is-multiple .el-select-dropdown__item.selected:after{content:"";position:absolute;top:50%;right:20px;border-top:none;border-right:none;background-repeat:no-repeat;background-position:center;background-color:var(--el-color-primary);-webkit-mask:url("data:image/svg+xml;utf8,%3Csvg class='icon' width='200' height='200' viewBox='0 0 1024 1024' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath fill='currentColor' d='M406.656 706.944L195.84 496.256a32 32 0 10-45.248 45.248l256 256 512-512a32 32 0 00-45.248-45.248L406.592 706.944z'%3E%3C/path%3E%3C/svg%3E") no-repeat;mask:url("data:image/svg+xml;utf8,%3Csvg class='icon' width='200' height='200' viewBox='0 0 1024 1024' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath fill='currentColor' d='M406.656 706.944L195.84 496.256a32 32 0 10-45.248 45.248l256 256 512-512a32 32 0 00-45.248-45.248L406.592 706.944z'%3E%3C/path%3E%3C/svg%3E") no-repeat;mask-size:100% 100%;-webkit-mask:url("data:image/svg+xml;utf8,%3Csvg class='icon' width='200' height='200' viewBox='0 0 1024 1024' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath fill='currentColor' d='M406.656 706.944L195.84 496.256a32 32 0 10-45.248 45.248l256 256 512-512a32 32 0 00-45.248-45.248L406.592 706.944z'%3E%3C/path%3E%3C/svg%3E") no-repeat;-webkit-mask-size:100% 100%;transform:translateY(-50%);width:12px;height:12px}.el-select-dropdown .el-select-dropdown__option-item.is-selected:after{content:"";position:absolute;top:50%;right:20px;border-top:none;border-right:none;background-repeat:no-repeat;background-position:center;background-color:var(--el-color-primary);-webkit-mask:url("data:image/svg+xml;utf8,%3Csvg class='icon' width='200' height='200' viewBox='0 0 1024 1024' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath fill='currentColor' d='M406.656 706.944L195.84 496.256a32 32 0 10-45.248 45.248l256 256 512-512a32 32 0 00-45.248-45.248L406.592 706.944z'%3E%3C/path%3E%3C/svg%3E") no-repeat;mask:url("data:image/svg+xml;utf8,%3Csvg class='icon' width='200' height='200' viewBox='0 0 1024 1024' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath fill='currentColor' d='M406.656 706.944L195.84 496.256a32 32 0 10-45.248 45.248l256 256 512-512a32 32 0 00-45.248-45.248L406.592 706.944z'%3E%3C/path%3E%3C/svg%3E") no-repeat;mask-size:100% 100%;-webkit-mask:url("data:image/svg+xml;utf8,%3Csvg class='icon' width='200' height='200' viewBox='0 0 1024 1024' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath fill='currentColor' d='M406.656 706.944L195.84 496.256a32 32 0 10-45.248 45.248l256 256 512-512a32 32 0 00-45.248-45.248L406.592 706.944z'%3E%3C/path%3E%3C/svg%3E") no-repeat;-webkit-mask-size:100% 100%;transform:translateY(-50%);width:12px;height:12px}.el-select-dropdown .el-scrollbar.is-empty .el-select-dropdown__list{padding:0}.el-select-dropdown__empty{padding:10px 0;margin:0;text-align:center;color:var(--el-text-color-secondary);font-size:var(--el-select-font-size)}.el-select-dropdown__wrap{max-height:274px}.el-select-dropdown__list{list-style:none;padding:6px 0;margin:0;box-sizing:border-box}.el-select{--el-select-border-color-hover:var(--el-border-color-hover);--el-select-disabled-border:var(--el-disabled-border-color);--el-select-font-size:var(--el-font-size-base);--el-select-close-hover-color:var(--el-text-color-secondary);--el-select-input-color:var(--el-text-color-placeholder);--el-select-multiple-input-color:var(--el-text-color-regular);--el-select-input-focus-border-color:var(--el-color-primary);--el-select-input-font-size:14px}.el-select{display:inline-block;position:relative;line-height:32px}.el-select__popper.el-popper[role=tooltip]{background:var(--el-bg-color-overlay);border:1px solid var(--el-border-color-light);box-shadow:var(--el-box-shadow-light)}.el-select__popper.el-popper[role=tooltip] .el-popper__arrow:before{border:1px solid var(--el-border-color-light)}.el-select__popper.el-popper[role=tooltip][data-popper-placement^=top] .el-popper__arrow:before{border-top-color:transparent;border-left-color:transparent}.el-select__popper.el-popper[role=tooltip][data-popper-placement^=bottom] .el-popper__arrow:before{border-bottom-color:transparent;border-right-color:transparent}.el-select__popper.el-popper[role=tooltip][data-popper-placement^=left] .el-popper__arrow:before{border-left-color:transparent;border-bottom-color:transparent}.el-select__popper.el-popper[role=tooltip][data-popper-placement^=right] .el-popper__arrow:before{border-right-color:transparent;border-top-color:transparent}.el-select .el-select-tags-wrapper.has-prefix{margin-left:6px}.el-select--large{line-height:40px}.el-select--large .el-select-tags-wrapper.has-prefix{margin-left:8px}.el-select--small{line-height:24px}.el-select--small .el-select-tags-wrapper.has-prefix{margin-left:4px}.el-select .el-select__tags>span{display:inline-block}.el-select:hover:not(.el-select--disabled) .el-input__inner{box-shadow:0 0 0 1px var(--el-select-border-color-hover) inset}.el-select .el-select__tags-text{text-overflow:ellipsis;display:inline-flex;justify-content:center;align-items:center;overflow:hidden}.el-select .el-input__inner{cursor:pointer;display:inline-flex}.el-select .el-input__inner:focus{box-shadow:0 0 0 1px var(--el-select-input-focus-border-color) inset!important}.el-select .el-input{display:flex}.el-select .el-input .el-select__caret{color:var(--el-select-input-color);font-size:var(--el-select-input-font-size);transition:transform var(--el-transition-duration);transform:rotate(180deg);cursor:pointer}.el-select .el-input .el-select__caret.is-reverse{transform:rotate(0)}.el-select .el-input .el-select__caret.is-show-close{font-size:var(--el-select-font-size);text-align:center;transform:rotate(180deg);border-radius:var(--el-border-radius-circle);color:var(--el-select-input-color);transition:var(--el-transition-color)}.el-select .el-input .el-select__caret.is-show-close:hover{color:var(--el-select-close-hover-color)}.el-select .el-input .el-select__caret.el-icon{position:relative;height:inherit;z-index:2}.el-select .el-input.is-disabled .el-input__inner{cursor:not-allowed}.el-select .el-input.is-disabled .el-input__inner:hover{box-shadow:0 0 0 1px var(--el-select-disabled-border) inset}.el-select .el-input.is-disabled .el-select__caret{cursor:not-allowed}.el-select .el-input.is-focus .el-input__inner{box-shadow:0 0 0 1px var(--el-select-input-focus-border-color) inset!important}.el-select__input{border:none;outline:0;padding:0;margin-left:15px;color:var(--el-select-multiple-input-color);font-size:var(--el-select-font-size);-webkit-appearance:none;appearance:none;height:28px;background-color:transparent}.el-select__close{cursor:pointer;position:absolute;top:8px;z-index:var(--el-index-top);right:25px;color:var(--el-select-input-color);line-height:18px;font-size:var(--el-select-input-font-size)}.el-select__close:hover{color:var(--el-select-close-hover-color)}.el-select__tags{position:absolute;line-height:normal;top:50%;transform:translateY(-50%);white-space:normal;z-index:var(--el-index-normal);display:flex;align-items:center;flex-wrap:wrap}.el-select__collapse-tags{white-space:normal;z-index:var(--el-index-normal);display:flex;align-items:center;flex-wrap:wrap}.el-select__collapse-tag{line-height:inherit;height:inherit;display:flex}.el-select .el-select__tags .el-tag{box-sizing:border-box;border-color:transparent;margin:2px 6px 2px 0}.el-select .el-select__tags .el-tag:last-child{margin-right:0}.el-select .el-select__tags .el-tag .el-icon-close{background-color:var(--el-text-color-placeholder);right:-7px;top:0;color:#fff}.el-select .el-select__tags .el-tag .el-icon-close:hover{background-color:var(--el-text-color-secondary)}.el-select .el-select__tags .el-tag .el-icon-close:before{display:block;transform:translateY(.5px)}.el-select .el-select__tags .el-tag--info{background-color:var(--el-fill-color)}.el-skeleton{--el-skeleton-circle-size:var(--el-avatar-size)}.el-skeleton__item{background:var(--el-skeleton-color);display:inline-block;height:16px;border-radius:var(--el-border-radius-base);width:100%}.el-skeleton__circle{border-radius:50%;width:var(--el-skeleton-circle-size);height:var(--el-skeleton-circle-size);line-height:var(--el-skeleton-circle-size)}.el-skeleton__button{height:40px;width:64px;border-radius:4px}.el-skeleton__p{width:100%}.el-skeleton__p.is-last{width:61%}.el-skeleton__p.is-first{width:33%}.el-skeleton__text{width:100%;height:var(--el-font-size-small)}.el-skeleton__caption{height:var(--el-font-size-extra-small)}.el-skeleton__h1{height:var(--el-font-size-extra-large)}.el-skeleton__h3{height:var(--el-font-size-large)}.el-skeleton__h5{height:var(--el-font-size-medium)}.el-skeleton__image{width:unset;display:flex;align-items:center;justify-content:center;border-radius:0}.el-skeleton__image svg{color:var(--el-svg-monochrome-grey);fill:currentColor;width:22%;height:22%}.el-skeleton{--el-skeleton-color:var(--el-fill-color);--el-skeleton-to-color:var(--el-fill-color-darker)}@keyframes el-skeleton-loading{0%{background-position:100% 50%}to{background-position:0 50%}}.el-skeleton{width:100%}.el-skeleton__first-line,.el-skeleton__paragraph{height:16px;margin-top:16px;background:var(--el-skeleton-color)}.el-skeleton.is-animated .el-skeleton__item{background:linear-gradient(90deg,var(--el-skeleton-color) 25%,var(--el-skeleton-to-color) 37%,var(--el-skeleton-color) 63%);background-size:400% 100%;animation:el-skeleton-loading 1.4s ease infinite}.el-slider{--el-slider-main-bg-color:var(--el-color-primary);--el-slider-runway-bg-color:var(--el-border-color-light);--el-slider-stop-bg-color:var(--el-color-white);--el-slider-disabled-color:var(--el-text-color-placeholder);--el-slider-border-radius:3px;--el-slider-height:6px;--el-slider-button-size:20px;--el-slider-button-wrapper-size:36px;--el-slider-button-wrapper-offset:-15px}.el-slider{width:100%;height:32px;display:flex;align-items:center}.el-slider__runway{flex:1;height:var(--el-slider-height);background-color:var(--el-slider-runway-bg-color);border-radius:var(--el-slider-border-radius);position:relative;cursor:pointer}.el-slider__runway.show-input{margin-right:30px;width:auto}.el-slider__runway.is-disabled{cursor:default}.el-slider__runway.is-disabled .el-slider__bar{background-color:var(--el-slider-disabled-color)}.el-slider__runway.is-disabled .el-slider__button{border-color:var(--el-slider-disabled-color)}.el-slider__runway.is-disabled .el-slider__button-wrapper.hover,.el-slider__runway.is-disabled .el-slider__button-wrapper:hover,.el-slider__runway.is-disabled .el-slider__button-wrapper.dragging{cursor:not-allowed}.el-slider__runway.is-disabled .el-slider__button.dragging,.el-slider__runway.is-disabled .el-slider__button.hover,.el-slider__runway.is-disabled .el-slider__button:hover{transform:scale(1)}.el-slider__runway.is-disabled .el-slider__button.hover,.el-slider__runway.is-disabled .el-slider__button:hover,.el-slider__runway.is-disabled .el-slider__button.dragging{cursor:not-allowed}.el-slider__input{flex-shrink:0;width:130px}.el-slider__bar{height:var(--el-slider-height);background-color:var(--el-slider-main-bg-color);border-top-left-radius:var(--el-slider-border-radius);border-bottom-left-radius:var(--el-slider-border-radius);position:absolute}.el-slider__button-wrapper{height:var(--el-slider-button-wrapper-size);width:var(--el-slider-button-wrapper-size);position:absolute;z-index:1;top:var(--el-slider-button-wrapper-offset);transform:translate(-50%);background-color:transparent;text-align:center;-webkit-user-select:none;user-select:none;line-height:normal;outline:0}.el-slider__button-wrapper:after{display:inline-block;content:"";height:100%;vertical-align:middle}.el-slider__button-wrapper.hover,.el-slider__button-wrapper:hover{cursor:grab}.el-slider__button-wrapper.dragging{cursor:grabbing}.el-slider__button{display:inline-block;width:var(--el-slider-button-size);height:var(--el-slider-button-size);vertical-align:middle;border:solid 2px var(--el-slider-main-bg-color);background-color:var(--el-color-white);border-radius:50%;box-sizing:border-box;transition:var(--el-transition-duration-fast);-webkit-user-select:none;user-select:none}.el-slider__button.dragging,.el-slider__button.hover,.el-slider__button:hover{transform:scale(1.2)}.el-slider__button.hover,.el-slider__button:hover{cursor:grab}.el-slider__button.dragging{cursor:grabbing}.el-slider__stop{position:absolute;height:var(--el-slider-height);width:var(--el-slider-height);border-radius:var(--el-border-radius-circle);background-color:var(--el-slider-stop-bg-color);transform:translate(-50%)}.el-slider__marks{top:0;left:12px;width:18px;height:100%}.el-slider__marks-text{position:absolute;transform:translate(-50%);font-size:14px;color:var(--el-color-info);margin-top:15px}.el-slider.is-vertical{position:relative;height:100%;flex:0}.el-slider.is-vertical .el-slider__runway{width:var(--el-slider-height);height:100%;margin:0 16px}.el-slider.is-vertical .el-slider__bar{width:var(--el-slider-height);height:auto;border-radius:0 0 3px 3px}.el-slider.is-vertical .el-slider__button-wrapper{top:auto;left:var(--el-slider-button-wrapper-offset);transform:translateY(50%)}.el-slider.is-vertical .el-slider__stop{transform:translateY(50%)}.el-slider.is-vertical .el-slider__marks-text{margin-top:0;left:15px;transform:translateY(50%)}.el-slider--large{height:40px}.el-slider--small{height:24px}.el-space{display:inline-flex;vertical-align:top}.el-space__item{display:flex;flex-wrap:wrap}.el-space__item>*{flex:1}.el-space--vertical{flex-direction:column}.el-time-spinner{width:100%;white-space:nowrap}.el-spinner{display:inline-block;vertical-align:middle}.el-spinner-inner{animation:rotate 2s linear infinite;width:50px;height:50px}.el-spinner-inner .path{stroke:var(--el-border-color-lighter);stroke-linecap:round;animation:dash 1.5s ease-in-out infinite}@keyframes rotate{to{transform:rotate(360deg)}}@keyframes dash{0%{stroke-dasharray:1,150;stroke-dashoffset:0}50%{stroke-dasharray:90,150;stroke-dashoffset:-35}to{stroke-dasharray:90,150;stroke-dashoffset:-124}}.el-step{position:relative;flex-shrink:1}.el-step:last-of-type .el-step__line{display:none}.el-step:last-of-type.is-flex{flex-basis:auto!important;flex-shrink:0;flex-grow:0}.el-step:last-of-type .el-step__description,.el-step:last-of-type .el-step__main{padding-right:0}.el-step__head{position:relative;width:100%}.el-step__head.is-process{color:var(--el-text-color-primary);border-color:var(--el-text-color-primary)}.el-step__head.is-wait{color:var(--el-text-color-placeholder);border-color:var(--el-text-color-placeholder)}.el-step__head.is-success{color:var(--el-color-success);border-color:var(--el-color-success)}.el-step__head.is-error{color:var(--el-color-danger);border-color:var(--el-color-danger)}.el-step__head.is-finish{color:var(--el-color-primary);border-color:var(--el-color-primary)}.el-step__icon{position:relative;z-index:1;display:inline-flex;justify-content:center;align-items:center;width:24px;height:24px;font-size:14px;box-sizing:border-box;background:var(--el-bg-color-overlay);transition:.15s ease-out}.el-step__icon.is-text{border-radius:50%;border:2px solid;border-color:inherit}.el-step__icon.is-icon{width:40px}.el-step__icon-inner{display:inline-block;-webkit-user-select:none;user-select:none;text-align:center;font-weight:700;line-height:1;color:inherit}.el-step__icon-inner[class*=el-icon]:not(.is-status){font-size:25px;font-weight:400}.el-step__icon-inner.is-status{transform:translateY(1px)}.el-step__line{position:absolute;border-color:inherit;background-color:var(--el-text-color-placeholder)}.el-step__line-inner{display:block;border-width:1px;border-style:solid;border-color:inherit;transition:.15s ease-out;box-sizing:border-box;width:0;height:0}.el-step__main{white-space:normal;text-align:left}.el-step__title{font-size:16px;line-height:38px}.el-step__title.is-process{font-weight:700;color:var(--el-text-color-primary)}.el-step__title.is-wait{color:var(--el-text-color-placeholder)}.el-step__title.is-success{color:var(--el-color-success)}.el-step__title.is-error{color:var(--el-color-danger)}.el-step__title.is-finish{color:var(--el-color-primary)}.el-step__description{padding-right:10%;margin-top:-5px;font-size:12px;line-height:20px;font-weight:400}.el-step__description.is-process{color:var(--el-text-color-primary)}.el-step__description.is-wait{color:var(--el-text-color-placeholder)}.el-step__description.is-success{color:var(--el-color-success)}.el-step__description.is-error{color:var(--el-color-danger)}.el-step__description.is-finish{color:var(--el-color-primary)}.el-step.is-horizontal{display:inline-block}.el-step.is-horizontal .el-step__line{height:2px;top:11px;left:0;right:0}.el-step.is-vertical{display:flex}.el-step.is-vertical .el-step__head{flex-grow:0;width:24px}.el-step.is-vertical .el-step__main{padding-left:10px;flex-grow:1}.el-step.is-vertical .el-step__title{line-height:24px;padding-bottom:8px}.el-step.is-vertical .el-step__line{width:2px;top:0;bottom:0;left:11px}.el-step.is-vertical .el-step__icon.is-icon{width:24px}.el-step.is-center .el-step__head,.el-step.is-center .el-step__main{text-align:center}.el-step.is-center .el-step__description{padding-left:20%;padding-right:20%}.el-step.is-center .el-step__line{left:50%;right:-50%}.el-step.is-simple{display:flex;align-items:center}.el-step.is-simple .el-step__head{width:auto;font-size:0;padding-right:10px}.el-step.is-simple .el-step__icon{background:0 0;width:16px;height:16px;font-size:12px}.el-step.is-simple .el-step__icon-inner[class*=el-icon]:not(.is-status){font-size:18px}.el-step.is-simple .el-step__icon-inner.is-status{transform:scale(.8) translateY(1px)}.el-step.is-simple .el-step__main{position:relative;display:flex;align-items:stretch;flex-grow:1}.el-step.is-simple .el-step__title{font-size:16px;line-height:20px}.el-step.is-simple:not(:last-of-type) .el-step__title{max-width:50%;word-break:break-all}.el-step.is-simple .el-step__arrow{flex-grow:1;display:flex;align-items:center;justify-content:center}.el-step.is-simple .el-step__arrow:after,.el-step.is-simple .el-step__arrow:before{content:"";display:inline-block;position:absolute;height:15px;width:1px;background:var(--el-text-color-placeholder)}.el-step.is-simple .el-step__arrow:before{transform:rotate(-45deg) translateY(-4px);transform-origin:0 0}.el-step.is-simple .el-step__arrow:after{transform:rotate(45deg) translateY(4px);transform-origin:100% 100%}.el-step.is-simple:last-of-type .el-step__arrow{display:none}.el-steps{display:flex}.el-steps--simple{padding:13px 8%;border-radius:4px;background:var(--el-fill-color-light)}.el-steps--horizontal{white-space:nowrap}.el-steps--vertical{height:100%;flex-flow:column}.el-switch{--el-switch-on-color:var(--el-color-primary);--el-switch-off-color:var(--el-border-color);--el-switch-core-border-radius:10px;--el-switch-width:40px;--el-switch-height:20px;--el-switch-button-size:16px}.el-switch{display:inline-flex;align-items:center;position:relative;font-size:14px;line-height:var(--el-switch-height);height:32px;vertical-align:middle}.el-switch.is-disabled .el-switch__core,.el-switch.is-disabled .el-switch__label{cursor:not-allowed}.el-switch__label{transition:var(--el-transition-duration-fast);height:var(--el-switch-height);display:inline-block;font-size:14px;font-weight:500;cursor:pointer;vertical-align:middle;color:var(--el-text-color-primary)}.el-switch__label.is-active{color:var(--el-color-primary)}.el-switch__label--left{margin-right:10px}.el-switch__label--right{margin-left:10px}.el-switch__label *{line-height:1;font-size:14px;display:inline-block}.el-switch__label .el-icon{height:inherit}.el-switch__label .el-icon svg{vertical-align:middle}.el-switch__input{position:absolute;width:0;height:0;opacity:0;margin:0}.el-switch__core{margin:0;display:inline-block;position:relative;width:var(--el-switch-width);height:var(--el-switch-height);border:1px solid var(--el-switch-off-color);outline:0;border-radius:var(--el-switch-core-border-radius);box-sizing:border-box;background:var(--el-switch-off-color);cursor:pointer;transition:border-color var(--el-transition-duration),background-color var(--el-transition-duration);vertical-align:middle}.el-switch__core .el-switch__inner{position:absolute;top:1px;left:1px;transition:all var(--el-transition-duration);width:var(--el-switch-button-size);height:var(--el-switch-button-size);display:flex;justify-content:center;align-items:center;left:50%;white-space:nowrap}.el-switch__core .el-switch__inner .is-icon,.el-switch__core .el-switch__inner .is-text{color:var(--el-color-white);transition:opacity var(--el-transition-duration);position:absolute;-webkit-user-select:none;user-select:none}.el-switch__core .el-switch__action{position:absolute;top:1px;left:1px;border-radius:var(--el-border-radius-circle);transition:all var(--el-transition-duration);width:var(--el-switch-button-size);height:var(--el-switch-button-size);background-color:var(--el-color-white);display:flex;justify-content:center;align-items:center;color:var(--el-switch-off-color)}.el-switch__core .el-switch__action .is-icon,.el-switch__core .el-switch__action .is-text{transition:opacity var(--el-transition-duration);position:absolute;-webkit-user-select:none;user-select:none}.el-switch__core .is-text{font-size:12px}.el-switch__core .is-show{opacity:1}.el-switch__core .is-hide{opacity:0}.el-switch.is-checked .el-switch__core{border-color:var(--el-switch-on-color);background-color:var(--el-switch-on-color)}.el-switch.is-checked .el-switch__core .el-switch__action{left:100%;margin-left:calc(-1px - var(--el-switch-button-size));color:var(--el-switch-on-color)}.el-switch.is-checked .el-switch__core .el-switch__inner{left:50%;white-space:nowrap;margin-left:calc(-1px - var(--el-switch-button-size))}.el-switch.is-disabled{opacity:.6}.el-switch--wide .el-switch__label.el-switch__label--left span{left:10px}.el-switch--wide .el-switch__label.el-switch__label--right span{right:10px}.el-switch .label-fade-enter-from,.el-switch .label-fade-leave-active{opacity:0}.el-switch--large{font-size:14px;height:40px}.el-switch--large .el-switch__label,.el-switch--large .el-switch__label *{font-size:14px}.el-switch--small{font-size:12px;height:24px}.el-switch--small .el-switch__label,.el-switch--small .el-switch__label *{font-size:12px}.el-table-column--selection .cell{padding-left:14px;padding-right:14px}.el-table-filter{border:solid 1px var(--el-border-color-lighter);border-radius:2px;background-color:#fff;box-shadow:var(--el-box-shadow-light);box-sizing:border-box}.el-table-filter__list{padding:5px 0;margin:0;list-style:none;min-width:100px}.el-table-filter__list-item{line-height:36px;padding:0 10px;cursor:pointer;font-size:var(--el-font-size-base)}.el-table-filter__list-item:hover{background-color:var(--el-color-primary-light-9);color:var(--el-color-primary-light-3)}.el-table-filter__list-item.is-active{background-color:var(--el-color-primary);color:#fff}.el-table-filter__content{min-width:100px}.el-table-filter__bottom{border-top:1px solid var(--el-border-color-lighter);padding:8px}.el-table-filter__bottom button{background:0 0;border:none;color:var(--el-text-color-regular);cursor:pointer;font-size:var(--el-font-size-small);padding:0 3px}.el-table-filter__bottom button:hover{color:var(--el-color-primary)}.el-table-filter__bottom button:focus{outline:0}.el-table-filter__bottom button.is-disabled{color:var(--el-disabled-text-color);cursor:not-allowed}.el-table-filter__wrap{max-height:280px}.el-table-filter__checkbox-group{padding:10px}.el-table-filter__checkbox-group label.el-checkbox{display:flex;align-items:center;margin-right:5px;margin-bottom:12px;margin-left:5px;height:unset}.el-table-filter__checkbox-group .el-checkbox:last-child{margin-bottom:0}.el-table{--el-table-border-color:var(--el-border-color-lighter);--el-table-border:1px solid var(--el-table-border-color);--el-table-text-color:var(--el-text-color-regular);--el-table-header-text-color:var(--el-text-color-secondary);--el-table-row-hover-bg-color:var(--el-fill-color-light);--el-table-current-row-bg-color:var(--el-color-primary-light-9);--el-table-header-bg-color:var(--el-fill-color-blank);--el-table-fixed-box-shadow:var(--el-box-shadow-light);--el-table-bg-color:var(--el-fill-color-blank);--el-table-tr-bg-color:var(--el-fill-color-blank);--el-table-expanded-cell-bg-color:var(--el-fill-color-blank);--el-table-fixed-left-column:inset 10px 0 10px -10px rgba(0, 0, 0, .15);--el-table-fixed-right-column:inset -10px 0 10px -10px rgba(0, 0, 0, .15)}.el-table{position:relative;overflow:hidden;box-sizing:border-box;height:-moz-fit-content;height:fit-content;width:100%;max-width:100%;background-color:var(--el-table-bg-color);font-size:14px;color:var(--el-table-text-color)}.el-table__inner-wrapper{position:relative}.el-table__inner-wrapper:before{left:0;bottom:0;width:100%;height:1px;z-index:3}.el-table.has-footer .el-table__inner-wrapper:before{bottom:1px}.el-table__empty-block{position:sticky;left:0;min-height:60px;text-align:center;width:100%;display:flex;justify-content:center;align-items:center}.el-table__empty-text{line-height:60px;width:50%;color:var(--el-text-color-secondary)}.el-table__expand-column .cell{padding:0;text-align:center;-webkit-user-select:none;user-select:none}.el-table__expand-icon{position:relative;cursor:pointer;color:var(--el-text-color-regular);font-size:12px;transition:transform var(--el-transition-duration-fast) ease-in-out;height:20px}.el-table__expand-icon--expanded{transform:rotate(90deg)}.el-table__expand-icon>.el-icon{font-size:12px}.el-table__expanded-cell{background-color:var(--el-table-expanded-cell-bg-color)}.el-table__expanded-cell[class*=cell]{padding:20px 50px}.el-table__expanded-cell:hover{background-color:transparent!important}.el-table__placeholder{display:inline-block;width:20px}.el-table__append-wrapper{overflow:hidden}.el-table--fit{border-right:0;border-bottom:0}.el-table--fit .el-table__cell.gutter{border-right-width:1px}.el-table thead{color:var(--el-table-header-text-color);font-weight:500}.el-table thead.is-group th.el-table__cell{background:var(--el-fill-color-light)}.el-table .el-table__cell{padding:8px 0;min-width:0;box-sizing:border-box;text-overflow:ellipsis;vertical-align:middle;position:relative;text-align:left;z-index:1}.el-table .el-table__cell.is-center{text-align:center}.el-table .el-table__cell.is-right{text-align:right}.el-table .el-table__cell.gutter{width:15px;border-right-width:0;border-bottom-width:0;padding:0}.el-table .el-table__cell.is-hidden>*{visibility:hidden}.el-table .cell{box-sizing:border-box;overflow:hidden;text-overflow:ellipsis;white-space:normal;word-break:break-all;line-height:23px;padding:0 12px}.el-table .cell.el-tooltip{white-space:nowrap;min-width:50px}.el-table--large{font-size:var(--el-font-size-base)}.el-table--large .el-table__cell{padding:12px 0}.el-table--large .cell{padding:0 16px}.el-table--small{font-size:12px}.el-table--small .el-table__cell{padding:4px 0}.el-table--small .cell{padding:0 8px}.el-table tr{background-color:var(--el-table-tr-bg-color)}.el-table tr input[type=checkbox]{margin:0}.el-table td.el-table__cell,.el-table th.el-table__cell.is-leaf{border-bottom:var(--el-table-border)}.el-table th.el-table__cell.is-sortable{cursor:pointer}.el-table th.el-table__cell{-webkit-user-select:none;user-select:none;background-color:var(--el-table-header-bg-color)}.el-table th.el-table__cell>.cell{display:inline-block;box-sizing:border-box;position:relative;vertical-align:middle;width:100%}.el-table th.el-table__cell>.cell.highlight{color:var(--el-color-primary)}.el-table th.el-table__cell.required>div:before{display:inline-block;content:"";width:8px;height:8px;border-radius:50%;background:#ff4d51;margin-right:5px;vertical-align:middle}.el-table td.el-table__cell div{box-sizing:border-box}.el-table td.el-table__cell.gutter{width:0}.el-table--border .el-table__footer-wrapper tr:first-child td:first-child,.el-table--border .el-table__footer-wrapper tr:first-child th:first-child,.el-table--border .el-table__inner-wrapper tr:first-child td:first-child,.el-table--border .el-table__inner-wrapper tr:first-child th:first-child,.el-table--group .el-table__footer-wrapper tr:first-child td:first-child,.el-table--group .el-table__footer-wrapper tr:first-child th:first-child,.el-table--group .el-table__inner-wrapper tr:first-child td:first-child,.el-table--group .el-table__inner-wrapper tr:first-child th:first-child{border-left:var(--el-table-border)}.el-table--border .el-table__footer-wrapper,.el-table--group .el-table__footer-wrapper{border-top:var(--el-table-border)}.el-table--border .el-table__inner-wrapper:after,.el-table--border:after,.el-table--border:before,.el-table__inner-wrapper:before{content:"";position:absolute;background-color:var(--el-table-border-color);z-index:3}.el-table--border .el-table__inner-wrapper:after{left:0;top:0;width:100%;height:1px;z-index:3}.el-table--border:before{top:-1px;left:0;width:1px;height:100%;z-index:3}.el-table--border:after{top:-1px;right:0;width:1px;height:100%;z-index:3}.el-table--border .el-table__inner-wrapper{border-right:none;border-bottom:none}.el-table--border .el-table__footer-wrapper{position:relative}.el-table--border .el-table__footer-wrapper{margin-top:-2px}.el-table--border .el-table__cell{border-right:var(--el-table-border)}.el-table--border .el-table__cell:first-child .cell{padding-left:10px}.el-table--border th.el-table__cell.gutter:last-of-type{border-bottom:var(--el-table-border);border-bottom-width:1px}.el-table--border th.el-table__cell{border-bottom:var(--el-table-border)}.el-table--hidden{visibility:hidden}.el-table__body-wrapper,.el-table__footer-wrapper,.el-table__header-wrapper{width:100%}.el-table__body-wrapper tr td.el-table-fixed-column--left,.el-table__body-wrapper tr td.el-table-fixed-column--right,.el-table__body-wrapper tr th.el-table-fixed-column--left,.el-table__body-wrapper tr th.el-table-fixed-column--right,.el-table__footer-wrapper tr td.el-table-fixed-column--left,.el-table__footer-wrapper tr td.el-table-fixed-column--right,.el-table__footer-wrapper tr th.el-table-fixed-column--left,.el-table__footer-wrapper tr th.el-table-fixed-column--right,.el-table__header-wrapper tr td.el-table-fixed-column--left,.el-table__header-wrapper tr td.el-table-fixed-column--right,.el-table__header-wrapper tr th.el-table-fixed-column--left,.el-table__header-wrapper tr th.el-table-fixed-column--right{position:sticky!important;z-index:2;background:var(--el-bg-color)}.el-table__body-wrapper tr td.el-table-fixed-column--left.is-first-column:before,.el-table__body-wrapper tr td.el-table-fixed-column--left.is-last-column:before,.el-table__body-wrapper tr td.el-table-fixed-column--right.is-first-column:before,.el-table__body-wrapper tr td.el-table-fixed-column--right.is-last-column:before,.el-table__body-wrapper tr th.el-table-fixed-column--left.is-first-column:before,.el-table__body-wrapper tr th.el-table-fixed-column--left.is-last-column:before,.el-table__body-wrapper tr th.el-table-fixed-column--right.is-first-column:before,.el-table__body-wrapper tr th.el-table-fixed-column--right.is-last-column:before,.el-table__footer-wrapper tr td.el-table-fixed-column--left.is-first-column:before,.el-table__footer-wrapper tr td.el-table-fixed-column--left.is-last-column:before,.el-table__footer-wrapper tr td.el-table-fixed-column--right.is-first-column:before,.el-table__footer-wrapper tr td.el-table-fixed-column--right.is-last-column:before,.el-table__footer-wrapper tr th.el-table-fixed-column--left.is-first-column:before,.el-table__footer-wrapper tr th.el-table-fixed-column--left.is-last-column:before,.el-table__footer-wrapper tr th.el-table-fixed-column--right.is-first-column:before,.el-table__footer-wrapper tr th.el-table-fixed-column--right.is-last-column:before,.el-table__header-wrapper tr td.el-table-fixed-column--left.is-first-column:before,.el-table__header-wrapper tr td.el-table-fixed-column--left.is-last-column:before,.el-table__header-wrapper tr td.el-table-fixed-column--right.is-first-column:before,.el-table__header-wrapper tr td.el-table-fixed-column--right.is-last-column:before,.el-table__header-wrapper tr th.el-table-fixed-column--left.is-first-column:before,.el-table__header-wrapper tr th.el-table-fixed-column--left.is-last-column:before,.el-table__header-wrapper tr th.el-table-fixed-column--right.is-first-column:before,.el-table__header-wrapper tr th.el-table-fixed-column--right.is-last-column:before{content:"";position:absolute;top:0;width:10px;bottom:-1px;overflow-x:hidden;overflow-y:hidden;box-shadow:none;touch-action:none;pointer-events:none}.el-table__body-wrapper tr td.el-table-fixed-column--left.is-first-column:before,.el-table__body-wrapper tr td.el-table-fixed-column--right.is-first-column:before,.el-table__body-wrapper tr th.el-table-fixed-column--left.is-first-column:before,.el-table__body-wrapper tr th.el-table-fixed-column--right.is-first-column:before,.el-table__footer-wrapper tr td.el-table-fixed-column--left.is-first-column:before,.el-table__footer-wrapper tr td.el-table-fixed-column--right.is-first-column:before,.el-table__footer-wrapper tr th.el-table-fixed-column--left.is-first-column:before,.el-table__footer-wrapper tr th.el-table-fixed-column--right.is-first-column:before,.el-table__header-wrapper tr td.el-table-fixed-column--left.is-first-column:before,.el-table__header-wrapper tr td.el-table-fixed-column--right.is-first-column:before,.el-table__header-wrapper tr th.el-table-fixed-column--left.is-first-column:before,.el-table__header-wrapper tr th.el-table-fixed-column--right.is-first-column:before{left:-10px}.el-table__body-wrapper tr td.el-table-fixed-column--left.is-last-column:before,.el-table__body-wrapper tr td.el-table-fixed-column--right.is-last-column:before,.el-table__body-wrapper tr th.el-table-fixed-column--left.is-last-column:before,.el-table__body-wrapper tr th.el-table-fixed-column--right.is-last-column:before,.el-table__footer-wrapper tr td.el-table-fixed-column--left.is-last-column:before,.el-table__footer-wrapper tr td.el-table-fixed-column--right.is-last-column:before,.el-table__footer-wrapper tr th.el-table-fixed-column--left.is-last-column:before,.el-table__footer-wrapper tr th.el-table-fixed-column--right.is-last-column:before,.el-table__header-wrapper tr td.el-table-fixed-column--left.is-last-column:before,.el-table__header-wrapper tr td.el-table-fixed-column--right.is-last-column:before,.el-table__header-wrapper tr th.el-table-fixed-column--left.is-last-column:before,.el-table__header-wrapper tr th.el-table-fixed-column--right.is-last-column:before{right:-10px;box-shadow:none}.el-table__body-wrapper tr td.el-table__fixed-right-patch,.el-table__body-wrapper tr th.el-table__fixed-right-patch,.el-table__footer-wrapper tr td.el-table__fixed-right-patch,.el-table__footer-wrapper tr th.el-table__fixed-right-patch,.el-table__header-wrapper tr td.el-table__fixed-right-patch,.el-table__header-wrapper tr th.el-table__fixed-right-patch{position:sticky!important;z-index:2;background:#fff;right:0}.el-table__header-wrapper tr th.el-table-fixed-column--left,.el-table__header-wrapper tr th.el-table-fixed-column--right{background-color:var(--el-table-header-bg-color)}.el-table__body,.el-table__footer,.el-table__header{table-layout:fixed;border-collapse:separate}.el-table__footer-wrapper,.el-table__header-wrapper{overflow:hidden}.el-table__footer-wrapper tbody td.el-table__cell,.el-table__header-wrapper tbody td.el-table__cell{background-color:var(--el-table-row-hover-bg-color);color:var(--el-table-text-color)}.el-table__body-wrapper .el-table-column--selection .el-checkbox,.el-table__header-wrapper .el-table-column--selection .el-checkbox{height:unset}.el-table.is-scrolling-left .el-table-fixed-column--right.is-first-column:before{box-shadow:var(--el-table-fixed-right-column)}.el-table.is-scrolling-left.el-table--border .el-table-fixed-column--left.is-last-column.el-table__cell{border-right:var(--el-table-border)}.el-table.is-scrolling-left th.el-table-fixed-column--left{background-color:var(--el-table-header-bg-color)}.el-table.is-scrolling-right .el-table-fixed-column--left.is-last-column:before{box-shadow:var(--el-table-fixed-left-column)}.el-table.is-scrolling-right .el-table-fixed-column--left.is-last-column.el-table__cell{border-right:none}.el-table.is-scrolling-right th.el-table-fixed-column--right{background-color:var(--el-table-header-bg-color)}.el-table.is-scrolling-middle .el-table-fixed-column--left.is-last-column.el-table__cell{border-right:none}.el-table.is-scrolling-middle .el-table-fixed-column--right.is-first-column:before{box-shadow:var(--el-table-fixed-right-column)}.el-table.is-scrolling-middle .el-table-fixed-column--left.is-last-column:before{box-shadow:var(--el-table-fixed-left-column)}.el-table.is-scrolling-none .el-table-fixed-column--left.is-first-column:before,.el-table.is-scrolling-none .el-table-fixed-column--left.is-last-column:before,.el-table.is-scrolling-none .el-table-fixed-column--right.is-first-column:before,.el-table.is-scrolling-none .el-table-fixed-column--right.is-last-column:before{box-shadow:none}.el-table.is-scrolling-none th.el-table-fixed-column--left,.el-table.is-scrolling-none th.el-table-fixed-column--right{background-color:var(--el-table-header-bg-color)}.el-table__body-wrapper{overflow:hidden;position:relative}.el-table__body-wrapper .el-scrollbar__bar{z-index:2}.el-table .caret-wrapper{display:inline-flex;flex-direction:column;align-items:center;height:14px;width:24px;vertical-align:middle;cursor:pointer;overflow:initial;position:relative}.el-table .sort-caret{width:0;height:0;border:solid 5px transparent;position:absolute;left:7px}.el-table .sort-caret.ascending{border-bottom-color:var(--el-text-color-placeholder);top:-5px}.el-table .sort-caret.descending{border-top-color:var(--el-text-color-placeholder);bottom:-3px}.el-table .ascending .sort-caret.ascending{border-bottom-color:var(--el-color-primary)}.el-table .descending .sort-caret.descending{border-top-color:var(--el-color-primary)}.el-table .hidden-columns{visibility:hidden;position:absolute;z-index:-1}.el-table--striped .el-table__body tr.el-table__row--striped td.el-table__cell{background:var(--el-fill-color-lighter)}.el-table--striped .el-table__body tr.el-table__row--striped.current-row td.el-table__cell{background-color:var(--el-table-current-row-bg-color)}.el-table__body tr.hover-row.current-row>td.el-table__cell,.el-table__body tr.hover-row.el-table__row--striped.current-row>td.el-table__cell,.el-table__body tr.hover-row.el-table__row--striped>td.el-table__cell,.el-table__body tr.hover-row>td.el-table__cell{background-color:var(--el-table-row-hover-bg-color)}.el-table__body tr.current-row>td.el-table__cell{background-color:var(--el-table-current-row-bg-color)}.el-table__column-resize-proxy{position:absolute;left:200px;top:0;bottom:0;width:0;border-left:var(--el-table-border);z-index:10}.el-table__column-filter-trigger{display:inline-block;cursor:pointer}.el-table__column-filter-trigger i{color:var(--el-color-info);font-size:14px;vertical-align:middle}.el-table__border-left-patch{top:0;left:0;width:1px;height:100%;z-index:3;position:absolute;background-color:var(--el-table-border-color)}.el-table__border-bottom-patch{left:0;height:1px;z-index:3;position:absolute;background-color:var(--el-table-border-color)}.el-table__border-right-patch{top:0;height:100%;width:1px;z-index:3;position:absolute;background-color:var(--el-table-border-color)}.el-table--enable-row-transition .el-table__body td.el-table__cell{transition:background-color .25s ease}.el-table--enable-row-hover .el-table__body tr:hover>td.el-table__cell{background-color:var(--el-table-row-hover-bg-color)}.el-table [class*=el-table__row--level] .el-table__expand-icon{display:inline-block;width:12px;line-height:12px;height:12px;text-align:center;margin-right:8px}.el-tabs__header{padding:0;position:relative;margin:0 0 15px}.el-tabs__active-bar{position:absolute;bottom:0;left:0;height:2px;background-color:var(--el-color-primary);z-index:1;transition:width var(--el-transition-duration) var(--el-transition-function-ease-in-out-bezier),transform var(--el-transition-duration) var(--el-transition-function-ease-in-out-bezier);list-style:none}.el-tabs__new-tab{display:flex;align-items:center;justify-content:center;float:right;border:1px solid var(--el-border-color);height:20px;width:20px;line-height:20px;margin:10px 0 10px 10px;border-radius:3px;text-align:center;font-size:12px;color:var(--el-text-color-primary);cursor:pointer;transition:all .15s}.el-tabs__new-tab .is-icon-plus{height:inherit;width:inherit;transform:scale(.8)}.el-tabs__new-tab .is-icon-plus svg{vertical-align:middle}.el-tabs__new-tab:hover{color:var(--el-color-primary)}.el-tabs__nav-wrap{overflow:hidden;margin-bottom:-1px;position:relative}.el-tabs__nav-wrap:after{content:"";position:absolute;left:0;bottom:0;width:100%;height:2px;background-color:var(--el-border-color-light);z-index:var(--el-index-normal)}.el-tabs__nav-wrap.is-scrollable{padding:0 20px;box-sizing:border-box}.el-tabs__nav-scroll{overflow:hidden}.el-tabs__nav-next,.el-tabs__nav-prev{position:absolute;cursor:pointer;line-height:44px;font-size:12px;color:var(--el-text-color-secondary)}.el-tabs__nav-next{right:0}.el-tabs__nav-prev{left:0}.el-tabs__nav{white-space:nowrap;position:relative;transition:transform var(--el-transition-duration);float:left;z-index:calc(var(--el-index-normal) + 1)}.el-tabs__nav.is-stretch{min-width:100%;display:flex}.el-tabs__nav.is-stretch>*{flex:1;text-align:center}.el-tabs__item{padding:0 20px;height:40px;box-sizing:border-box;line-height:40px;display:inline-block;list-style:none;font-size:14px;font-weight:500;color:var(--el-text-color-primary);position:relative}.el-tabs__item:focus,.el-tabs__item:focus:active{outline:0}.el-tabs__item .is-icon-close{border-radius:50%;text-align:center;transition:all var(--el-transition-duration) var(--el-transition-function-ease-in-out-bezier);margin-left:5px}.el-tabs__item .is-icon-close:before{transform:scale(.9);display:inline-block}.el-tabs__item .is-icon-close:hover{background-color:var(--el-text-color-placeholder);color:#fff}.el-tabs__item .is-icon-close svg{margin-top:1px}.el-tabs__item.is-active{color:var(--el-color-primary)}.el-tabs__item:hover{color:var(--el-color-primary);cursor:pointer}.el-tabs__item.is-disabled{color:var(--el-disabled-text-color);cursor:default}.el-tabs__content{overflow:hidden;position:relative}.el-tabs--card>.el-tabs__header{border-bottom:1px solid var(--el-border-color-light)}.el-tabs--card>.el-tabs__header .el-tabs__nav-wrap:after{content:none}.el-tabs--card>.el-tabs__header .el-tabs__nav{border:1px solid var(--el-border-color-light);border-bottom:none;border-radius:4px 4px 0 0;box-sizing:border-box}.el-tabs--card>.el-tabs__header .el-tabs__active-bar{display:none}.el-tabs--card>.el-tabs__header .el-tabs__item .is-icon-close{position:relative;font-size:12px;width:0;height:14px;vertical-align:middle;line-height:15px;overflow:hidden;top:-1px;right:-2px;transform-origin:100% 50%}.el-tabs--card>.el-tabs__header .el-tabs__item{border-bottom:1px solid transparent;border-left:1px solid var(--el-border-color-light);transition:color var(--el-transition-duration) var(--el-transition-function-ease-in-out-bezier),padding var(--el-transition-duration) var(--el-transition-function-ease-in-out-bezier)}.el-tabs--card>.el-tabs__header .el-tabs__item:first-child{border-left:none}.el-tabs--card>.el-tabs__header .el-tabs__item.is-closable:hover{padding-left:13px;padding-right:13px}.el-tabs--card>.el-tabs__header .el-tabs__item.is-closable:hover .is-icon-close{width:14px}.el-tabs--card>.el-tabs__header .el-tabs__item.is-active{border-bottom-color:#fff}.el-tabs--card>.el-tabs__header .el-tabs__item.is-active.is-closable{padding-left:20px;padding-right:20px}.el-tabs--card>.el-tabs__header .el-tabs__item.is-active.is-closable .is-icon-close{width:14px}.el-tabs--border-card{background:var(--el-bg-color-overlay);border:1px solid var(--el-border-color)}.el-tabs--border-card>.el-tabs__content{padding:15px}.el-tabs--border-card>.el-tabs__header{background-color:var(--el-fill-color-light);border-bottom:1px solid var(--el-border-color-light);margin:0}.el-tabs--border-card>.el-tabs__header .el-tabs__nav-wrap:after{content:none}.el-tabs--border-card>.el-tabs__header .el-tabs__item{transition:all var(--el-transition-duration) var(--el-transition-function-ease-in-out-bezier);border:1px solid transparent;margin-top:-1px;color:var(--el-text-color-secondary)}.el-tabs--border-card>.el-tabs__header .el-tabs__item:first-child{margin-left:-1px}.el-tabs--border-card>.el-tabs__header .el-tabs__item+.el-tabs__item{margin-left:-1px}.el-tabs--border-card>.el-tabs__header .el-tabs__item.is-active{color:var(--el-color-primary);background-color:var(--el-bg-color-overlay);border-right-color:var(--el-border-color);border-left-color:var(--el-border-color)}.el-tabs--border-card>.el-tabs__header .el-tabs__item:not(.is-disabled):hover{color:var(--el-color-primary)}.el-tabs--border-card>.el-tabs__header .el-tabs__item.is-disabled{color:var(--el-disabled-text-color)}.el-tabs--border-card>.el-tabs__header .is-scrollable .el-tabs__item:first-child{margin-left:0}.el-tabs--bottom .el-tabs__item.is-bottom:nth-child(2),.el-tabs--bottom .el-tabs__item.is-top:nth-child(2),.el-tabs--top .el-tabs__item.is-bottom:nth-child(2),.el-tabs--top .el-tabs__item.is-top:nth-child(2){padding-left:0}.el-tabs--bottom .el-tabs__item.is-bottom:last-child,.el-tabs--bottom .el-tabs__item.is-top:last-child,.el-tabs--top .el-tabs__item.is-bottom:last-child,.el-tabs--top .el-tabs__item.is-top:last-child{padding-right:0}.el-tabs--bottom .el-tabs--left>.el-tabs__header .el-tabs__item:nth-child(2),.el-tabs--bottom .el-tabs--right>.el-tabs__header .el-tabs__item:nth-child(2),.el-tabs--bottom.el-tabs--border-card>.el-tabs__header .el-tabs__item:nth-child(2),.el-tabs--bottom.el-tabs--card>.el-tabs__header .el-tabs__item:nth-child(2),.el-tabs--top .el-tabs--left>.el-tabs__header .el-tabs__item:nth-child(2),.el-tabs--top .el-tabs--right>.el-tabs__header .el-tabs__item:nth-child(2),.el-tabs--top.el-tabs--border-card>.el-tabs__header .el-tabs__item:nth-child(2),.el-tabs--top.el-tabs--card>.el-tabs__header .el-tabs__item:nth-child(2){padding-left:20px}.el-tabs--bottom .el-tabs--left>.el-tabs__header .el-tabs__item:last-child,.el-tabs--bottom .el-tabs--right>.el-tabs__header .el-tabs__item:last-child,.el-tabs--bottom.el-tabs--border-card>.el-tabs__header .el-tabs__item:last-child,.el-tabs--bottom.el-tabs--card>.el-tabs__header .el-tabs__item:last-child,.el-tabs--top .el-tabs--left>.el-tabs__header .el-tabs__item:last-child,.el-tabs--top .el-tabs--right>.el-tabs__header .el-tabs__item:last-child,.el-tabs--top.el-tabs--border-card>.el-tabs__header .el-tabs__item:last-child,.el-tabs--top.el-tabs--card>.el-tabs__header .el-tabs__item:last-child{padding-right:20px}.el-tabs--bottom .el-tabs__header.is-bottom{margin-bottom:0;margin-top:10px}.el-tabs--bottom.el-tabs--border-card .el-tabs__header.is-bottom{border-bottom:0;border-top:1px solid var(--el-border-color)}.el-tabs--bottom.el-tabs--border-card .el-tabs__nav-wrap.is-bottom{margin-top:-1px;margin-bottom:0}.el-tabs--bottom.el-tabs--border-card .el-tabs__item.is-bottom:not(.is-active){border:1px solid transparent}.el-tabs--bottom.el-tabs--border-card .el-tabs__item.is-bottom{margin:0 -1px -1px}.el-tabs--left,.el-tabs--right{overflow:hidden}.el-tabs--left .el-tabs__header.is-left,.el-tabs--left .el-tabs__header.is-right,.el-tabs--left .el-tabs__nav-scroll,.el-tabs--left .el-tabs__nav-wrap.is-left,.el-tabs--left .el-tabs__nav-wrap.is-right,.el-tabs--right .el-tabs__header.is-left,.el-tabs--right .el-tabs__header.is-right,.el-tabs--right .el-tabs__nav-scroll,.el-tabs--right .el-tabs__nav-wrap.is-left,.el-tabs--right .el-tabs__nav-wrap.is-right{height:100%}.el-tabs--left .el-tabs__active-bar.is-left,.el-tabs--left .el-tabs__active-bar.is-right,.el-tabs--right .el-tabs__active-bar.is-left,.el-tabs--right .el-tabs__active-bar.is-right{top:0;bottom:auto;width:2px;height:auto}.el-tabs--left .el-tabs__nav-wrap.is-left,.el-tabs--left .el-tabs__nav-wrap.is-right,.el-tabs--right .el-tabs__nav-wrap.is-left,.el-tabs--right .el-tabs__nav-wrap.is-right{margin-bottom:0}.el-tabs--left .el-tabs__nav-wrap.is-left>.el-tabs__nav-next,.el-tabs--left .el-tabs__nav-wrap.is-left>.el-tabs__nav-prev,.el-tabs--left .el-tabs__nav-wrap.is-right>.el-tabs__nav-next,.el-tabs--left .el-tabs__nav-wrap.is-right>.el-tabs__nav-prev,.el-tabs--right .el-tabs__nav-wrap.is-left>.el-tabs__nav-next,.el-tabs--right .el-tabs__nav-wrap.is-left>.el-tabs__nav-prev,.el-tabs--right .el-tabs__nav-wrap.is-right>.el-tabs__nav-next,.el-tabs--right .el-tabs__nav-wrap.is-right>.el-tabs__nav-prev{height:30px;line-height:30px;width:100%;text-align:center;cursor:pointer}.el-tabs--left .el-tabs__nav-wrap.is-left>.el-tabs__nav-next i,.el-tabs--left .el-tabs__nav-wrap.is-left>.el-tabs__nav-prev i,.el-tabs--left .el-tabs__nav-wrap.is-right>.el-tabs__nav-next i,.el-tabs--left .el-tabs__nav-wrap.is-right>.el-tabs__nav-prev i,.el-tabs--right .el-tabs__nav-wrap.is-left>.el-tabs__nav-next i,.el-tabs--right .el-tabs__nav-wrap.is-left>.el-tabs__nav-prev i,.el-tabs--right .el-tabs__nav-wrap.is-right>.el-tabs__nav-next i,.el-tabs--right .el-tabs__nav-wrap.is-right>.el-tabs__nav-prev i{transform:rotate(90deg)}.el-tabs--left .el-tabs__nav-wrap.is-left>.el-tabs__nav-prev,.el-tabs--left .el-tabs__nav-wrap.is-right>.el-tabs__nav-prev,.el-tabs--right .el-tabs__nav-wrap.is-left>.el-tabs__nav-prev,.el-tabs--right .el-tabs__nav-wrap.is-right>.el-tabs__nav-prev{left:auto;top:0}.el-tabs--left .el-tabs__nav-wrap.is-left>.el-tabs__nav-next,.el-tabs--left .el-tabs__nav-wrap.is-right>.el-tabs__nav-next,.el-tabs--right .el-tabs__nav-wrap.is-left>.el-tabs__nav-next,.el-tabs--right .el-tabs__nav-wrap.is-right>.el-tabs__nav-next{right:auto;bottom:0}.el-tabs--left .el-tabs__nav-wrap.is-left.is-scrollable,.el-tabs--left .el-tabs__nav-wrap.is-right.is-scrollable,.el-tabs--right .el-tabs__nav-wrap.is-left.is-scrollable,.el-tabs--right .el-tabs__nav-wrap.is-right.is-scrollable{padding:30px 0}.el-tabs--left .el-tabs__nav-wrap.is-left:after,.el-tabs--left .el-tabs__nav-wrap.is-right:after,.el-tabs--right .el-tabs__nav-wrap.is-left:after,.el-tabs--right .el-tabs__nav-wrap.is-right:after{height:100%;width:2px;bottom:auto;top:0}.el-tabs--left .el-tabs__nav.is-left,.el-tabs--left .el-tabs__nav.is-right,.el-tabs--right .el-tabs__nav.is-left,.el-tabs--right .el-tabs__nav.is-right{float:none}.el-tabs--left .el-tabs__item.is-left,.el-tabs--left .el-tabs__item.is-right,.el-tabs--right .el-tabs__item.is-left,.el-tabs--right .el-tabs__item.is-right{display:block}.el-tabs--left .el-tabs__header.is-left{float:left;margin-bottom:0;margin-right:10px}.el-tabs--left .el-tabs__nav-wrap.is-left{margin-right:-1px}.el-tabs--left .el-tabs__nav-wrap.is-left:after{left:auto;right:0}.el-tabs--left .el-tabs__active-bar.is-left{right:0;left:auto}.el-tabs--left .el-tabs__item.is-left{text-align:right}.el-tabs--left.el-tabs--card .el-tabs__active-bar.is-left{display:none}.el-tabs--left.el-tabs--card .el-tabs__item.is-left{border-left:none;border-right:1px solid var(--el-border-color-light);border-bottom:none;border-top:1px solid var(--el-border-color-light);text-align:left}.el-tabs--left.el-tabs--card .el-tabs__item.is-left:first-child{border-right:1px solid var(--el-border-color-light);border-top:none}.el-tabs--left.el-tabs--card .el-tabs__item.is-left.is-active{border:1px solid var(--el-border-color-light);border-right-color:#fff;border-left:none;border-bottom:none}.el-tabs--left.el-tabs--card .el-tabs__item.is-left.is-active:first-child{border-top:none}.el-tabs--left.el-tabs--card .el-tabs__item.is-left.is-active:last-child{border-bottom:none}.el-tabs--left.el-tabs--card .el-tabs__nav{border-radius:4px 0 0 4px;border-bottom:1px solid var(--el-border-color-light);border-right:none}.el-tabs--left.el-tabs--card .el-tabs__new-tab{float:none}.el-tabs--left.el-tabs--border-card .el-tabs__header.is-left{border-right:1px solid var(--el-border-color)}.el-tabs--left.el-tabs--border-card .el-tabs__item.is-left{border:1px solid transparent;margin:-1px 0 -1px -1px}.el-tabs--left.el-tabs--border-card .el-tabs__item.is-left.is-active{border-color:transparent;border-top-color:#d1dbe5;border-bottom-color:#d1dbe5}.el-tabs--right .el-tabs__header.is-right{float:right;margin-bottom:0;margin-left:10px}.el-tabs--right .el-tabs__nav-wrap.is-right{margin-left:-1px}.el-tabs--right .el-tabs__nav-wrap.is-right:after{left:0;right:auto}.el-tabs--right .el-tabs__active-bar.is-right{left:0}.el-tabs--right.el-tabs--card .el-tabs__active-bar.is-right{display:none}.el-tabs--right.el-tabs--card .el-tabs__item.is-right{border-bottom:none;border-top:1px solid var(--el-border-color-light)}.el-tabs--right.el-tabs--card .el-tabs__item.is-right:first-child{border-left:1px solid var(--el-border-color-light);border-top:none}.el-tabs--right.el-tabs--card .el-tabs__item.is-right.is-active{border:1px solid var(--el-border-color-light);border-left-color:#fff;border-right:none;border-bottom:none}.el-tabs--right.el-tabs--card .el-tabs__item.is-right.is-active:first-child{border-top:none}.el-tabs--right.el-tabs--card .el-tabs__item.is-right.is-active:last-child{border-bottom:none}.el-tabs--right.el-tabs--card .el-tabs__nav{border-radius:0 4px 4px 0;border-bottom:1px solid var(--el-border-color-light);border-left:none}.el-tabs--right.el-tabs--border-card .el-tabs__header.is-right{border-left:1px solid var(--el-border-color)}.el-tabs--right.el-tabs--border-card .el-tabs__item.is-right{border:1px solid transparent;margin:-1px -1px -1px 0}.el-tabs--right.el-tabs--border-card .el-tabs__item.is-right.is-active{border-color:transparent;border-top-color:#d1dbe5;border-bottom-color:#d1dbe5}.slideInLeft-transition,.slideInRight-transition{display:inline-block}.slideInRight-enter{animation:slideInRight-enter var(--el-transition-duration)}.slideInRight-leave{position:absolute;left:0;right:0;animation:slideInRight-leave var(--el-transition-duration)}.slideInLeft-enter{animation:slideInLeft-enter var(--el-transition-duration)}.slideInLeft-leave{position:absolute;left:0;right:0;animation:slideInLeft-leave var(--el-transition-duration)}@keyframes slideInRight-enter{0%{opacity:0;transform-origin:0 0;transform:translate(100%)}to{opacity:1;transform-origin:0 0;transform:translate(0)}}@keyframes slideInRight-leave{0%{transform-origin:0 0;transform:translate(0);opacity:1}to{transform-origin:0 0;transform:translate(100%);opacity:0}}@keyframes slideInLeft-enter{0%{opacity:0;transform-origin:0 0;transform:translate(-100%)}to{opacity:1;transform-origin:0 0;transform:translate(0)}}@keyframes slideInLeft-leave{0%{transform-origin:0 0;transform:translate(0);opacity:1}to{transform-origin:0 0;transform:translate(-100%);opacity:0}}.el-tag{--el-tag-font-size:12px;--el-tag-border-radius:4px;--el-tag-border-radius-rounded:9999px}.el-tag{--el-tag-bg-color:var(--el-color-primary-light-9);--el-tag-border-color:var(--el-color-primary-light-8);--el-tag-text-color:var(--el-color-primary);--el-tag-hover-color:var(--el-color-primary);background-color:var(--el-tag-bg-color);border-color:var(--el-tag-border-color);color:var(--el-tag-text-color);display:inline-flex;justify-content:center;align-items:center;height:24px;padding:0 9px;font-size:var(--el-tag-font-size);line-height:1;border-width:1px;border-style:solid;border-radius:var(--el-tag-border-radius);box-sizing:border-box;white-space:nowrap;--el-icon-size:14px}.el-tag.el-tag--success{--el-tag-bg-color:var(--el-color-success-light-9);--el-tag-border-color:var(--el-color-success-light-8);--el-tag-text-color:var(--el-color-success);--el-tag-hover-color:var(--el-color-success)}.el-tag.el-tag--warning{--el-tag-bg-color:var(--el-color-warning-light-9);--el-tag-border-color:var(--el-color-warning-light-8);--el-tag-text-color:var(--el-color-warning);--el-tag-hover-color:var(--el-color-warning)}.el-tag.el-tag--danger{--el-tag-bg-color:var(--el-color-danger-light-9);--el-tag-border-color:var(--el-color-danger-light-8);--el-tag-text-color:var(--el-color-danger);--el-tag-hover-color:var(--el-color-danger)}.el-tag.el-tag--info{--el-tag-bg-color:var(--el-color-info-light-9);--el-tag-border-color:var(--el-color-info-light-8);--el-tag-text-color:var(--el-color-info);--el-tag-hover-color:var(--el-color-info)}.el-tag.el-tag--error{--el-tag-bg-color:var(--el-color-error-light-9);--el-tag-border-color:var(--el-color-error-light-8);--el-tag-text-color:var(--el-color-error);--el-tag-hover-color:var(--el-color-error)}.el-tag.is-hit{border-color:var(--el-color-primary)}.el-tag.is-round{border-radius:var(--el-tag-border-radius-rounded)}.el-tag .el-tag__close{color:var(--el-tag-text-color)}.el-tag .el-tag__close:hover{color:var(--el-color-white);background-color:var(--el-tag-hover-color)}.el-tag .el-icon{border-radius:50%;cursor:pointer;font-size:calc(var(--el-icon-size) - 2px);height:var(--el-icon-size);width:var(--el-icon-size)}.el-tag .el-tag__close{margin-left:6px}.el-tag--dark{--el-tag-bg-color:var(--el-color-primary);--el-tag-border-color:var(--el-color-primary);--el-tag-text-color:var(--el-color-white);--el-tag-hover-color:var(--el-color-primary-light-3)}.el-tag--dark.el-tag--success{--el-tag-bg-color:var(--el-color-success);--el-tag-border-color:var(--el-color-success);--el-tag-text-color:var(--el-color-white);--el-tag-hover-color:var(--el-color-success-light-3)}.el-tag--dark.el-tag--warning{--el-tag-bg-color:var(--el-color-warning);--el-tag-border-color:var(--el-color-warning);--el-tag-text-color:var(--el-color-white);--el-tag-hover-color:var(--el-color-warning-light-3)}.el-tag--dark.el-tag--danger{--el-tag-bg-color:var(--el-color-danger);--el-tag-border-color:var(--el-color-danger);--el-tag-text-color:var(--el-color-white);--el-tag-hover-color:var(--el-color-danger-light-3)}.el-tag--dark.el-tag--info{--el-tag-bg-color:var(--el-color-info);--el-tag-border-color:var(--el-color-info);--el-tag-text-color:var(--el-color-white);--el-tag-hover-color:var(--el-color-info-light-3)}.el-tag--dark.el-tag--error{--el-tag-bg-color:var(--el-color-error);--el-tag-border-color:var(--el-color-error);--el-tag-text-color:var(--el-color-white);--el-tag-hover-color:var(--el-color-error-light-3)}.el-tag--plain{--el-tag-bg-color:var(--el-color-white);--el-tag-border-color:var(--el-color-primary-light-5);--el-tag-text-color:var(--el-color-primary);--el-tag-hover-color:var(--el-color-primary)}.el-tag--plain.el-tag--success{--el-tag-bg-color:var(--el-color-white);--el-tag-border-color:var(--el-color-success-light-5);--el-tag-text-color:var(--el-color-success);--el-tag-hover-color:var(--el-color-success)}.el-tag--plain.el-tag--warning{--el-tag-bg-color:var(--el-color-white);--el-tag-border-color:var(--el-color-warning-light-5);--el-tag-text-color:var(--el-color-warning);--el-tag-hover-color:var(--el-color-warning)}.el-tag--plain.el-tag--danger{--el-tag-bg-color:var(--el-color-white);--el-tag-border-color:var(--el-color-danger-light-5);--el-tag-text-color:var(--el-color-danger);--el-tag-hover-color:var(--el-color-danger)}.el-tag--plain.el-tag--info{--el-tag-bg-color:var(--el-color-white);--el-tag-border-color:var(--el-color-info-light-5);--el-tag-text-color:var(--el-color-info);--el-tag-hover-color:var(--el-color-info)}.el-tag--plain.el-tag--error{--el-tag-bg-color:var(--el-color-white);--el-tag-border-color:var(--el-color-error-light-5);--el-tag-text-color:var(--el-color-error);--el-tag-hover-color:var(--el-color-error)}.el-tag.is-closable{padding-right:5px}.el-tag--large{padding:0 11px;height:32px;--el-icon-size:16px}.el-tag--large .el-tag__close{margin-left:8px}.el-tag--large.is-closable{padding-right:7px}.el-tag--small{padding:0 7px;height:20px;--el-icon-size:12px}.el-tag--small .el-tag__close{margin-left:4px}.el-tag--small.is-closable{padding-right:3px}.el-tag--small .el-icon-close{transform:scale(.8)}.el-tag.el-tag--success.is-hit{border-color:var(--el-color-success)}.el-tag.el-tag--warning.is-hit{border-color:var(--el-color-warning)}.el-tag.el-tag--danger.is-hit{border-color:var(--el-color-danger)}.el-tag.el-tag--info.is-hit{border-color:var(--el-color-info)}.el-tag.el-tag--error.is-hit{border-color:var(--el-color-error)}.time-select{margin:5px 0;min-width:0}.time-select .el-picker-panel__content{max-height:200px;margin:0}.time-select-item{padding:8px 10px;font-size:14px;line-height:20px}.time-select-item.disabled{color:var(--el-datepicker-border-color);cursor:not-allowed}.time-select-item:hover{background-color:var(--el-fill-color-light);font-weight:700;cursor:pointer}.time-select .time-select-item.selected:not(.disabled){color:var(--el-color-primary);font-weight:700}.el-timeline-item{position:relative;padding-bottom:20px}.el-timeline-item__wrapper{position:relative;padding-left:28px;top:-3px}.el-timeline-item__tail{position:absolute;left:4px;height:100%;border-left:2px solid var(--el-timeline-node-color)}.el-timeline-item .el-timeline-item__icon{color:var(--el-color-white);font-size:var(--el-font-size-small)}.el-timeline-item__node{position:absolute;background-color:var(--el-timeline-node-color);border-color:var(--el-timeline-node-color);border-radius:50%;box-sizing:border-box;display:flex;justify-content:center;align-items:center}.el-timeline-item__node--normal{left:-1px;width:var(--el-timeline-node-size-normal);height:var(--el-timeline-node-size-normal)}.el-timeline-item__node--large{left:-2px;width:var(--el-timeline-node-size-large);height:var(--el-timeline-node-size-large)}.el-timeline-item__node.is-hollow{background:var(--el-color-white);border-style:solid;border-width:2px}.el-timeline-item__node--primary{background-color:var(--el-color-primary);border-color:var(--el-color-primary)}.el-timeline-item__node--success{background-color:var(--el-color-success);border-color:var(--el-color-success)}.el-timeline-item__node--warning{background-color:var(--el-color-warning);border-color:var(--el-color-warning)}.el-timeline-item__node--danger{background-color:var(--el-color-danger);border-color:var(--el-color-danger)}.el-timeline-item__node--info{background-color:var(--el-color-info);border-color:var(--el-color-info)}.el-timeline-item__dot{position:absolute;display:flex;justify-content:center;align-items:center}.el-timeline-item__content{color:var(--el-text-color-primary)}.el-timeline-item__timestamp{color:var(--el-text-color-secondary);line-height:1;font-size:var(--el-font-size-small)}.el-timeline-item__timestamp.is-top{margin-bottom:8px;padding-top:4px}.el-timeline-item__timestamp.is-bottom{margin-top:8px}.el-timeline{--el-timeline-node-size-normal:12px;--el-timeline-node-size-large:14px;--el-timeline-node-color:var(--el-border-color-light)}.el-timeline{margin:0;font-size:var(--el-font-size-base);list-style:none}.el-timeline .el-timeline-item:last-child .el-timeline-item__tail{display:none}.el-timeline .el-timeline-item__center{display:flex;align-items:center}.el-timeline .el-timeline-item__center .el-timeline-item__wrapper{width:100%}.el-timeline .el-timeline-item__center .el-timeline-item__tail{top:0}.el-timeline .el-timeline-item__center:first-child .el-timeline-item__tail{height:calc(50% + 10px);top:calc(50% - 10px)}.el-timeline .el-timeline-item__center:last-child .el-timeline-item__tail{display:block;height:calc(50% - 10px)}.el-tooltip-v2__content{--el-tooltip-v2-padding:5px 10px;--el-tooltip-v2-border-radius:4px;--el-tooltip-v2-border-color:var(--el-border-color);border-radius:var(--el-tooltip-v2-border-radius);color:var(--el-color-black);background-color:var(--el-color-white);padding:var(--el-tooltip-v2-padding);border:1px solid var(--el-border-color)}.el-tooltip-v2__arrow{position:absolute;color:var(--el-color-white);width:var(--el-tooltip-v2-arrow-width);height:var(--el-tooltip-v2-arrow-height);pointer-events:none;left:var(--el-tooltip-v2-arrow-x);top:var(--el-tooltip-v2-arrow-y)}.el-tooltip-v2__arrow:before{content:"";width:0;height:0;border:var(--el-tooltip-v2-arrow-border-width) solid transparent;position:absolute}.el-tooltip-v2__arrow:after{content:"";width:0;height:0;border:var(--el-tooltip-v2-arrow-border-width) solid transparent;position:absolute}.el-tooltip-v2__content[data-side^=top] .el-tooltip-v2__arrow{bottom:0}.el-tooltip-v2__content[data-side^=top] .el-tooltip-v2__arrow:before{border-top-color:var(--el-color-white);border-top-width:var(--el-tooltip-v2-arrow-border-width);border-bottom:0;top:calc(100% - 1px)}.el-tooltip-v2__content[data-side^=top] .el-tooltip-v2__arrow:after{border-top-color:var(--el-border-color);border-top-width:var(--el-tooltip-v2-arrow-border-width);border-bottom:0;top:100%;z-index:-1}.el-tooltip-v2__content[data-side^=bottom] .el-tooltip-v2__arrow{top:0}.el-tooltip-v2__content[data-side^=bottom] .el-tooltip-v2__arrow:before{border-bottom-color:var(--el-color-white);border-bottom-width:var(--el-tooltip-v2-arrow-border-width);border-top:0;bottom:calc(100% - 1px)}.el-tooltip-v2__content[data-side^=bottom] .el-tooltip-v2__arrow:after{border-bottom-color:var(--el-border-color);border-bottom-width:var(--el-tooltip-v2-arrow-border-width);border-top:0;bottom:100%;z-index:-1}.el-tooltip-v2__content[data-side^=left] .el-tooltip-v2__arrow{right:0}.el-tooltip-v2__content[data-side^=left] .el-tooltip-v2__arrow:before{border-left-color:var(--el-color-white);border-left-width:var(--el-tooltip-v2-arrow-border-width);border-right:0;left:calc(100% - 1px)}.el-tooltip-v2__content[data-side^=left] .el-tooltip-v2__arrow:after{border-left-color:var(--el-border-color);border-left-width:var(--el-tooltip-v2-arrow-border-width);border-right:0;left:100%;z-index:-1}.el-tooltip-v2__content[data-side^=right] .el-tooltip-v2__arrow{left:0}.el-tooltip-v2__content[data-side^=right] .el-tooltip-v2__arrow:before{border-right-color:var(--el-color-white);border-right-width:var(--el-tooltip-v2-arrow-border-width);border-left:0;right:calc(100% - 1px)}.el-tooltip-v2__content[data-side^=right] .el-tooltip-v2__arrow:after{border-right-color:var(--el-border-color);border-right-width:var(--el-tooltip-v2-arrow-border-width);border-left:0;right:100%;z-index:-1}.el-tooltip-v2__content.is-dark{--el-tooltip-v2-border-color:transparent;background-color:var(--el-color-black);color:var(--el-color-white);border-color:transparent}.el-tooltip-v2__content.is-dark .el-tooltip-v2__arrow{background-color:var(--el-color-black);border-color:transparent}.el-transfer{--el-transfer-border-color:var(--el-border-color-lighter);--el-transfer-border-radius:var(--el-border-radius-base);--el-transfer-panel-width:200px;--el-transfer-panel-header-height:40px;--el-transfer-panel-header-bg-color:var(--el-fill-color-light);--el-transfer-panel-footer-height:40px;--el-transfer-panel-body-height:278px;--el-transfer-item-height:30px;--el-transfer-filter-height:32px}.el-transfer{font-size:var(--el-font-size-base)}.el-transfer__buttons{display:inline-block;vertical-align:middle;padding:0 30px}.el-transfer__button{vertical-align:top}.el-transfer__button:nth-child(2){margin:0 0 0 10px}.el-transfer__button i,.el-transfer__button span{font-size:14px}.el-transfer__button .el-icon+span{margin-left:0}.el-transfer-panel{overflow:hidden;background:var(--el-bg-color-overlay);display:inline-block;text-align:left;vertical-align:middle;width:var(--el-transfer-panel-width);max-height:100%;box-sizing:border-box;position:relative}.el-transfer-panel__body{height:var(--el-transfer-panel-body-height);border-left:1px solid var(--el-transfer-border-color);border-right:1px solid var(--el-transfer-border-color);border-bottom:1px solid var(--el-transfer-border-color);border-bottom-left-radius:var(--el-transfer-border-radius);border-bottom-right-radius:var(--el-transfer-border-radius);overflow:hidden}.el-transfer-panel__body.is-with-footer{border-bottom:none;border-bottom-left-radius:0;border-bottom-right-radius:0}.el-transfer-panel__list{margin:0;padding:6px 0;list-style:none;height:var(--el-transfer-panel-body-height);overflow:auto;box-sizing:border-box}.el-transfer-panel__list.is-filterable{height:calc(100% - var(--el-transfer-filter-height) - 30px);padding-top:0}.el-transfer-panel__item{height:var(--el-transfer-item-height);line-height:var(--el-transfer-item-height);padding-left:15px;display:block!important}.el-transfer-panel__item+.el-transfer-panel__item{margin-left:0}.el-transfer-panel__item.el-checkbox{color:var(--el-text-color-regular)}.el-transfer-panel__item:hover{color:var(--el-color-primary)}.el-transfer-panel__item.el-checkbox .el-checkbox__label{width:100%;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;display:block;box-sizing:border-box;padding-left:22px;line-height:var(--el-transfer-item-height)}.el-transfer-panel__item .el-checkbox__input{position:absolute;top:8px}.el-transfer-panel__filter{text-align:center;margin:15px;box-sizing:border-box;width:auto}.el-transfer-panel__filter .el-input__inner{height:var(--el-transfer-filter-height);width:100%;font-size:12px;display:inline-block;box-sizing:border-box;border-radius:calc(var(--el-transfer-filter-height)/ 2)}.el-transfer-panel__filter .el-icon-circle-close{cursor:pointer}.el-transfer-panel .el-transfer-panel__header{display:flex;align-items:center;height:var(--el-transfer-panel-header-height);background:var(--el-transfer-panel-header-bg-color);margin:0;padding-left:15px;border:1px solid var(--el-transfer-border-color);border-top-left-radius:var(--el-transfer-border-radius);border-top-right-radius:var(--el-transfer-border-radius);box-sizing:border-box;color:var(--el-color-black)}.el-transfer-panel .el-transfer-panel__header .el-checkbox{position:relative;display:flex;width:100%;align-items:center}.el-transfer-panel .el-transfer-panel__header .el-checkbox .el-checkbox__label{font-size:16px;color:var(--el-text-color-primary);font-weight:400}.el-transfer-panel .el-transfer-panel__header .el-checkbox .el-checkbox__label span{position:absolute;right:15px;top:50%;transform:translate3d(0,-50%,0);color:var(--el-text-color-secondary);font-size:12px;font-weight:400}.el-transfer-panel .el-transfer-panel__footer{height:var(--el-transfer-panel-footer-height);background:var(--el-bg-color-overlay);margin:0;padding:0;border:1px solid var(--el-transfer-border-color);border-bottom-left-radius:var(--el-transfer-border-radius);border-bottom-right-radius:var(--el-transfer-border-radius)}.el-transfer-panel .el-transfer-panel__footer:after{display:inline-block;content:"";height:100%;vertical-align:middle}.el-transfer-panel .el-transfer-panel__footer .el-checkbox{padding-left:20px;color:var(--el-text-color-regular)}.el-transfer-panel .el-transfer-panel__empty{margin:0;height:var(--el-transfer-item-height);line-height:var(--el-transfer-item-height);padding:6px 15px 0;color:var(--el-text-color-secondary);text-align:center}.el-transfer-panel .el-checkbox__label{padding-left:8px}.el-transfer-panel .el-checkbox__inner{height:14px;width:14px;border-radius:3px}.el-transfer-panel .el-checkbox__inner:after{height:6px;width:3px;left:4px}.el-tree{--el-tree-node-hover-bg-color:var(--el-fill-color-light);--el-tree-text-color:var(--el-text-color-regular);--el-tree-expand-icon-color:var(--el-text-color-placeholder)}.el-tree{position:relative;cursor:default;background:var(--el-fill-color-blank);color:var(--el-tree-text-color)}.el-tree__empty-block{position:relative;min-height:60px;text-align:center;width:100%;height:100%}.el-tree__empty-text{position:absolute;left:50%;top:50%;transform:translate(-50%,-50%);color:var(--el-text-color-secondary);font-size:var(--el-font-size-base)}.el-tree__drop-indicator{position:absolute;left:0;right:0;height:1px;background-color:var(--el-color-primary)}.el-tree-node{white-space:nowrap;outline:0}.el-tree-node:focus>.el-tree-node__content{background-color:var(--el-tree-node-hover-bg-color)}.el-tree-node.is-drop-inner>.el-tree-node__content .el-tree-node__label{background-color:var(--el-color-primary);color:#fff}.el-tree-node__content{display:flex;align-items:center;height:26px;cursor:pointer}.el-tree-node__content>.el-tree-node__expand-icon{padding:6px;box-sizing:content-box}.el-tree-node__content>label.el-checkbox{margin-right:8px}.el-tree-node__content:hover{background-color:var(--el-tree-node-hover-bg-color)}.el-tree.is-dragging .el-tree-node__content{cursor:move}.el-tree.is-dragging .el-tree-node__content *{pointer-events:none}.el-tree.is-dragging.is-drop-not-allow .el-tree-node__content{cursor:not-allowed}.el-tree-node__expand-icon{cursor:pointer;color:var(--el-tree-expand-icon-color);font-size:12px;transform:rotate(0);transition:transform var(--el-transition-duration) ease-in-out}.el-tree-node__expand-icon.expanded{transform:rotate(90deg)}.el-tree-node__expand-icon.is-leaf{color:transparent;cursor:default}.el-tree-node__expand-icon.is-hidden{visibility:hidden}.el-tree-node__label{font-size:var(--el-font-size-base)}.el-tree-node__loading-icon{margin-right:8px;font-size:var(--el-font-size-base);color:var(--el-tree-expand-icon-color)}.el-tree-node>.el-tree-node__children{overflow:hidden;background-color:transparent}.el-tree-node.is-expanded>.el-tree-node__children{display:block}.el-tree--highlight-current .el-tree-node.is-current>.el-tree-node__content{background-color:var(--el-color-primary-light-9)}.el-tree-select{--el-tree-node-hover-bg-color:var(--el-fill-color-light);--el-tree-text-color:var(--el-text-color-regular);--el-tree-expand-icon-color:var(--el-text-color-placeholder)}.el-tree-select__popper .el-tree-node__expand-icon{margin-left:8px}.el-tree-select__popper .el-tree-node.is-checked>.el-tree-node__content .el-select-dropdown__item.selected:after{content:none}.el-tree-select__popper .el-select-dropdown__item{flex:1;background:0 0!important;padding-left:0;height:20px;line-height:20px}.el-upload{display:inline-flex;justify-content:center;align-items:center;cursor:pointer;outline:0}.el-upload__input{display:none}.el-upload__tip{font-size:12px;color:var(--el-text-color-regular);margin-top:7px}.el-upload iframe{position:absolute;z-index:-1;top:0;left:0;opacity:0}.el-upload--picture-card{--el-upload-picture-card-size:148px;background-color:var(--el-fill-color-lighter);border:1px dashed var(--el-border-color-darker);border-radius:6px;box-sizing:border-box;width:var(--el-upload-picture-card-size);height:var(--el-upload-picture-card-size);cursor:pointer;vertical-align:top;display:inline-flex;justify-content:center;align-items:center}.el-upload--picture-card i{font-size:28px;color:var(--el-text-color-secondary)}.el-upload--picture-card:hover{border-color:var(--el-color-primary);color:var(--el-color-primary)}.el-upload:focus{border-color:var(--el-color-primary);color:var(--el-color-primary)}.el-upload:focus .el-upload-dragger{border-color:var(--el-color-primary)}.el-upload-dragger{background-color:var(--el-fill-color-blank);border:1px dashed var(--el-border-color);border-radius:6px;box-sizing:border-box;width:360px;height:180px;text-align:center;cursor:pointer;position:relative;overflow:hidden}.el-upload-dragger .el-icon--upload{font-size:67px;color:var(--el-text-color-placeholder);margin:40px 0 16px;line-height:50px}.el-upload-dragger+.el-upload__tip{text-align:center}.el-upload-dragger~.el-upload__files{border-top:var(--el-border);margin-top:7px;padding-top:5px}.el-upload-dragger .el-upload__text{color:var(--el-text-color-regular);font-size:14px;text-align:center}.el-upload-dragger .el-upload__text em{color:var(--el-color-primary);font-style:normal}.el-upload-dragger:hover{border-color:var(--el-color-primary)}.el-upload-dragger.is-dragover{background-color:var(--el-color-primary-light-9);border:2px dashed var(--el-color-primary)}.el-upload-list{margin:10px 0 0;padding:0;list-style:none;position:relative}.el-upload-list__item{transition:all .5s cubic-bezier(.55,0,.1,1);font-size:14px;color:var(--el-text-color-regular);margin-bottom:5px;position:relative;box-sizing:border-box;border-radius:4px;width:100%}.el-upload-list__item .el-progress{position:absolute;top:20px;width:100%}.el-upload-list__item .el-progress__text{position:absolute;right:0;top:-13px}.el-upload-list__item .el-progress-bar{margin-right:0;padding-right:0}.el-upload-list__item .el-icon--upload-success{color:var(--el-color-success)}.el-upload-list__item .el-icon--close{display:none;position:absolute;right:5px;top:50%;cursor:pointer;opacity:.75;color:var(--el-text-color-regular);transition:opacity var(--el-transition-duration);transform:translateY(-50%)}.el-upload-list__item .el-icon--close:hover{opacity:1;color:var(--el-color-primary)}.el-upload-list__item .el-icon--close-tip{display:none;position:absolute;right:5px;font-size:12px;cursor:pointer;opacity:1;color:var(--el-color-primary);transform:translateY(-50%)}.el-upload-list__item:hover{background-color:var(--el-fill-color-light)}.el-upload-list__item:hover .el-icon--close{display:inline-flex}.el-upload-list__item:hover .el-progress__text{display:none}.el-upload-list__item .el-upload-list__item-info{display:inline-flex;justify-content:center;flex-direction:column;width:100%;margin-left:4px}.el-upload-list__item.is-success .el-upload-list__item-status-label{display:inline-flex}.el-upload-list__item.is-success .el-upload-list__item-name:focus,.el-upload-list__item.is-success .el-upload-list__item-name:hover{color:var(--el-color-primary);cursor:pointer}.el-upload-list__item.is-success:focus:not(:hover) .el-icon-close-tip{display:inline-block}.el-upload-list__item.is-success:active,.el-upload-list__item.is-success:not(.focusing):focus{outline-width:0}.el-upload-list__item.is-success:active .el-icon--close-tip,.el-upload-list__item.is-success:not(.focusing):focus .el-icon--close-tip{display:none}.el-upload-list__item.is-success:hover .el-upload-list__item-status-label{display:none;opacity:0}.el-upload-list.is-disabled .el-upload-list__item-status-label,.el-upload-list.is-disabled .el-upload-list__item:hover{display:block}.el-upload-list__item-name{color:var(--el-text-color-regular);display:inline-flex;text-align:center;align-items:center;padding:0 4px;overflow:hidden;text-overflow:ellipsis;transition:color var(--el-transition-duration);white-space:nowrap;font-size:var(--el-font-size-base)}.el-upload-list__item-name .el-icon{margin-right:6px;color:var(--el-text-color-secondary)}.el-upload-list__item-status-label{position:absolute;right:5px;top:0;line-height:inherit;display:none;height:100%;justify-content:center;align-items:center;transition:opacity var(--el-transition-duration)}.el-upload-list__item-delete{position:absolute;right:10px;top:0;font-size:12px;color:var(--el-text-color-regular);display:none}.el-upload-list__item-delete:hover{color:var(--el-color-primary)}.el-upload-list--picture-card{--el-upload-list-picture-card-size:148px;display:inline-flex;flex-wrap:wrap;margin:0}.el-upload-list--picture-card .el-upload-list__item{overflow:hidden;background-color:var(--el-fill-color-blank);border:1px solid #c0ccda;border-radius:6px;box-sizing:border-box;width:var(--el-upload-list-picture-card-size);height:var(--el-upload-list-picture-card-size);margin:0 8px 8px 0;padding:0;display:inline-flex}.el-upload-list--picture-card .el-upload-list__item .el-icon--check,.el-upload-list--picture-card .el-upload-list__item .el-icon--circle-check{color:#fff}.el-upload-list--picture-card .el-upload-list__item .el-icon--close{display:none}.el-upload-list--picture-card .el-upload-list__item:hover .el-upload-list__item-status-label{opacity:0;display:block}.el-upload-list--picture-card .el-upload-list__item:hover .el-progress__text{display:block}.el-upload-list--picture-card .el-upload-list__item .el-upload-list__item-name{display:none}.el-upload-list--picture-card .el-upload-list__item-thumbnail{width:100%;height:100%;object-fit:contain}.el-upload-list--picture-card .el-upload-list__item-status-label{right:-15px;top:-6px;width:40px;height:24px;background:var(--el-color-success);text-align:center;transform:rotate(45deg)}.el-upload-list--picture-card .el-upload-list__item-status-label i{font-size:12px;margin-top:11px;transform:rotate(-45deg)}.el-upload-list--picture-card .el-upload-list__item-actions{position:absolute;width:100%;height:100%;left:0;top:0;cursor:default;display:inline-flex;justify-content:center;align-items:center;color:#fff;opacity:0;font-size:20px;background-color:var(--el-overlay-color-lighter);transition:opacity var(--el-transition-duration)}.el-upload-list--picture-card .el-upload-list__item-actions span{display:none;cursor:pointer}.el-upload-list--picture-card .el-upload-list__item-actions span+span{margin-left:1rem}.el-upload-list--picture-card .el-upload-list__item-actions .el-upload-list__item-delete{position:static;font-size:inherit;color:inherit}.el-upload-list--picture-card .el-upload-list__item-actions:hover{opacity:1}.el-upload-list--picture-card .el-upload-list__item-actions:hover span{display:inline-flex}.el-upload-list--picture-card .el-progress{top:50%;left:50%;transform:translate(-50%,-50%);bottom:auto;width:126px}.el-upload-list--picture-card .el-progress .el-progress__text{top:50%}.el-upload-list--picture .el-upload-list__item{overflow:hidden;z-index:0;background-color:var(--el-fill-color-blank);border:1px solid #c0ccda;border-radius:6px;box-sizing:border-box;margin-top:10px;padding:10px}.el-upload-list--picture .el-upload-list__item .el-icon--check,.el-upload-list--picture .el-upload-list__item .el-icon--circle-check{color:#fff}.el-upload-list--picture .el-upload-list__item:hover .el-upload-list__item-status-label{opacity:0;display:block}.el-upload-list--picture .el-upload-list__item:hover .el-progress__text{display:block}.el-upload-list--picture .el-upload-list__item.is-success .el-upload-list__item-name i{display:none}.el-upload-list--picture .el-upload-list__item .el-icon--close{top:5px;transform:translateY(0)}.el-upload-list--picture .el-upload-list__item-thumbnail{display:inline-flex;justify-content:center;align-items:center;width:70px;height:70px;object-fit:contain;position:relative;z-index:1;background-color:var(--el-color-white)}.el-upload-list--picture .el-upload-list__item-status-label{position:absolute;right:-17px;top:-7px;width:46px;height:26px;background:var(--el-color-success);text-align:center;transform:rotate(45deg)}.el-upload-list--picture .el-upload-list__item-status-label i{font-size:12px;margin-top:12px;transform:rotate(-45deg)}.el-upload-list--picture .el-progress{position:relative;top:-7px}.el-upload-cover{position:absolute;left:0;top:0;width:100%;height:100%;overflow:hidden;z-index:10;cursor:default}.el-upload-cover:after{display:inline-block;content:"";height:100%;vertical-align:middle}.el-upload-cover img{display:block;width:100%;height:100%}.el-upload-cover__label{right:-15px;top:-6px;width:40px;height:24px;background:var(--el-color-success);text-align:center;transform:rotate(45deg)}.el-upload-cover__label i{font-size:12px;margin-top:11px;transform:rotate(-45deg);color:#fff}.el-upload-cover__progress{display:inline-block;vertical-align:middle;position:static;width:243px}.el-upload-cover__progress+.el-upload__inner{opacity:0}.el-upload-cover__content{position:absolute;top:0;left:0;width:100%;height:100%}.el-upload-cover__interact{position:absolute;bottom:0;left:0;width:100%;height:100%;background-color:var(--el-overlay-color-light);text-align:center}.el-upload-cover__interact .btn{display:inline-block;color:#fff;font-size:14px;cursor:pointer;vertical-align:middle;transition:var(--el-transition-md-fade);margin-top:60px}.el-upload-cover__interact .btn i{margin-top:0}.el-upload-cover__interact .btn span{opacity:0;transition:opacity .15s linear}.el-upload-cover__interact .btn:not(:first-child){margin-left:35px}.el-upload-cover__interact .btn:hover{transform:translateY(-13px)}.el-upload-cover__interact .btn:hover span{opacity:1}.el-upload-cover__interact .btn i{color:#fff;display:block;font-size:24px;line-height:inherit;margin:0 auto 5px}.el-upload-cover__title{position:absolute;bottom:0;left:0;background-color:#fff;height:36px;width:100%;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;font-weight:400;text-align:left;padding:0 10px;margin:0;line-height:36px;font-size:14px;color:var(--el-text-color-primary)}.el-upload-cover+.el-upload__inner{opacity:0;position:relative;z-index:1}.el-vl__wrapper{position:relative}.el-vl__wrapper:hover .el-virtual-scrollbar,.el-vl__wrapper.always-on .el-virtual-scrollbar{opacity:1}.el-vl__window{scrollbar-width:none}.el-vl__window::-webkit-scrollbar{display:none}.el-virtual-scrollbar{opacity:0;transition:opacity .34s ease-out}.el-vg__wrapper{position:relative}.el-popper{--el-popper-border-radius:var(--el-popover-border-radius, 4px)}.el-popper{position:absolute;border-radius:var(--el-popper-border-radius);padding:5px 11px;z-index:2000;font-size:12px;line-height:20px;min-width:10px;word-wrap:break-word;visibility:visible}.el-popper.is-dark{color:var(--el-bg-color);background:var(--el-text-color-primary);border:1px solid var(--el-text-color-primary)}.el-popper.is-dark .el-popper__arrow:before{border:1px solid var(--el-text-color-primary);background:var(--el-text-color-primary);right:0}.el-popper.is-light{background:var(--el-bg-color-overlay);border:1px solid var(--el-border-color-light)}.el-popper.is-light .el-popper__arrow:before{border:1px solid var(--el-border-color-light);background:var(--el-bg-color-overlay);right:0}.el-popper.is-pure{padding:0}.el-popper__arrow{position:absolute;width:10px;height:10px;z-index:-1}.el-popper__arrow:before{position:absolute;width:10px;height:10px;z-index:-1;content:" ";transform:rotate(45deg);background:var(--el-text-color-primary);box-sizing:border-box}.el-popper[data-popper-placement^=top]>.el-popper__arrow{bottom:-5px}.el-popper[data-popper-placement^=top]>.el-popper__arrow:before{border-bottom-right-radius:2px}.el-popper[data-popper-placement^=bottom]>.el-popper__arrow{top:-5px}.el-popper[data-popper-placement^=bottom]>.el-popper__arrow:before{border-top-left-radius:2px}.el-popper[data-popper-placement^=left]>.el-popper__arrow{right:-5px}.el-popper[data-popper-placement^=left]>.el-popper__arrow:before{border-top-right-radius:2px}.el-popper[data-popper-placement^=right]>.el-popper__arrow{left:-5px}.el-popper[data-popper-placement^=right]>.el-popper__arrow:before{border-bottom-left-radius:2px}.el-popper[data-popper-placement^=top] .el-popper__arrow:before{border-top-color:transparent!important;border-left-color:transparent!important}.el-popper[data-popper-placement^=bottom] .el-popper__arrow:before{border-bottom-color:transparent!important;border-right-color:transparent!important}.el-popper[data-popper-placement^=left] .el-popper__arrow:before{border-left-color:transparent!important;border-bottom-color:transparent!important}.el-popper[data-popper-placement^=right] .el-popper__arrow:before{border-right-color:transparent!important;border-top-color:transparent!important}.el-select-dropdown__item{font-size:var(--el-font-size-base);padding:0 32px 0 20px;position:relative;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;color:var(--el-text-color-regular);height:34px;line-height:34px;box-sizing:border-box;cursor:pointer}.el-select-dropdown__item.is-disabled{color:var(--el-text-color-placeholder);cursor:not-allowed}.el-select-dropdown__item.is-disabled:hover{background-color:var(--el-color-white)}.el-select-dropdown__item.hover,.el-select-dropdown__item:hover{background-color:var(--el-fill-color-light)}.el-select-dropdown__item.selected{color:var(--el-color-primary);font-weight:700}/*!
* 
* ant-design-vue v2.2.8
*
* Copyright 2017-present, ant-design-vue.
* All rights reserved.
*
*/[class^=ant-]::-ms-clear,[class*=ant-]::-ms-clear,[class^=ant-] input::-ms-clear,[class*=ant-] input::-ms-clear,[class^=ant-] input::-ms-reveal,[class*=ant-] input::-ms-reveal{display:none}[class^=ant-],[class*=ant-],[class^=ant-] *,[class*=ant-] *,[class^=ant-] *:before,[class*=ant-] *:before,[class^=ant-] *:after,[class*=ant-] *:after{box-sizing:border-box}html,body{width:100%;height:100%}input::-ms-clear,input::-ms-reveal{display:none}*,*:before,*:after{box-sizing:border-box}html{font-family:sans-serif;line-height:1.15;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;-ms-overflow-style:scrollbar;-webkit-tap-highlight-color:rgba(0,0,0,0)}@-ms-viewport{width:device-width}body{margin:0;color:#000000d9;font-size:14px;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";font-variant:tabular-nums;line-height:1.5715;background-color:#fff;font-feature-settings:"tnum"}[tabindex="-1"]:focus{outline:none!important}hr{box-sizing:content-box;height:0;overflow:visible}h1,h2,h3,h4,h5,h6{margin-top:0;margin-bottom:.5em;color:#000000d9;font-weight:500}p{margin-top:0;margin-bottom:1em}abbr[title],abbr[data-original-title]{text-decoration:underline;-webkit-text-decoration:underline dotted;text-decoration:underline dotted;border-bottom:0;cursor:help}address{margin-bottom:1em;font-style:normal;line-height:inherit}input[type=text],input[type=password],input[type=number],textarea{-webkit-appearance:none}ol,ul,dl{margin-top:0;margin-bottom:1em}ol ol,ul ul,ol ul,ul ol{margin-bottom:0}dt{font-weight:500}dd{margin-bottom:.5em;margin-left:0}blockquote{margin:0 0 1em}dfn{font-style:italic}b,strong{font-weight:bolder}small{font-size:80%}sub,sup{position:relative;font-size:75%;line-height:0;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}a{color:#1890ff;text-decoration:none;background-color:transparent;outline:none;cursor:pointer;transition:color .3s;-webkit-text-decoration-skip:objects}a:hover{color:#40a9ff}a:active{color:#096dd9}a:active,a:hover{text-decoration:none;outline:0}a:focus{text-decoration:none;outline:0}a[disabled]{color:#00000040;cursor:not-allowed}pre,code,kbd,samp{font-size:1em;font-family:SFMono-Regular,Consolas,Liberation Mono,Menlo,Courier,monospace}pre{margin-top:0;margin-bottom:1em;overflow:auto}figure{margin:0 0 1em}img{vertical-align:middle;border-style:none}svg:not(:root){overflow:hidden}a,area,button,[role=button],input:not([type="range"]),label,select,summary,textarea{touch-action:manipulation}table{border-collapse:collapse}caption{padding-top:.75em;padding-bottom:.3em;color:#00000073;text-align:left;caption-side:bottom}th{text-align:inherit}input,button,select,optgroup,textarea{margin:0;color:inherit;font-size:inherit;font-family:inherit;line-height:inherit}button,input{overflow:visible}button,select{text-transform:none}button,html [type=button],[type=reset],[type=submit]{-webkit-appearance:button}button::-moz-focus-inner,[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner{padding:0;border-style:none}input[type=radio],input[type=checkbox]{box-sizing:border-box;padding:0}input[type=date],input[type=time],input[type=datetime-local],input[type=month]{-webkit-appearance:listbox}textarea{overflow:auto;resize:vertical}fieldset{min-width:0;margin:0;padding:0;border:0}legend{display:block;width:100%;max-width:100%;margin-bottom:.5em;padding:0;color:inherit;font-size:1.5em;line-height:inherit;white-space:normal}progress{vertical-align:baseline}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{outline-offset:-2px;-webkit-appearance:none}[type=search]::-webkit-search-cancel-button,[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{font:inherit;-webkit-appearance:button}output{display:inline-block}summary{display:list-item}template{display:none}[hidden]{display:none!important}mark{padding:.2em;background-color:#feffe6}::-moz-selection{color:#fff;background:#1890ff}::selection{color:#fff;background:#1890ff}.clearfix:before,.clearfix:after{display:table;content:""}.clearfix:after{clear:both}.anticon{display:inline-block;color:inherit;font-style:normal;line-height:0;text-align:center;text-transform:none;vertical-align:-.125em;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.anticon>*{line-height:1}.anticon svg{display:inline-block}.anticon:before{display:none}.anticon .anticon-icon{display:block}.anticon[tabindex]{cursor:pointer}.anticon-spin:before{display:inline-block;-webkit-animation:loadingCircle 1s infinite linear;animation:loadingCircle 1s infinite linear}.anticon-spin{display:inline-block;-webkit-animation:loadingCircle 1s infinite linear;animation:loadingCircle 1s infinite linear}.ant-fade-enter,.ant-fade-appear,.ant-fade-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-fade-enter.ant-fade-enter-active,.ant-fade-appear.ant-fade-appear-active{-webkit-animation-name:antFadeIn;animation-name:antFadeIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-fade-leave.ant-fade-leave-active{-webkit-animation-name:antFadeOut;animation-name:antFadeOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-fade-enter,.ant-fade-appear{opacity:0;-webkit-animation-timing-function:linear;animation-timing-function:linear}.ant-fade-leave{-webkit-animation-timing-function:linear;animation-timing-function:linear}.fade-enter,.fade-appear,.fade-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.fade-enter.fade-enter-active,.fade-appear.fade-appear-active{-webkit-animation-name:antFadeIn;animation-name:antFadeIn;-webkit-animation-play-state:running;animation-play-state:running}.fade-leave.fade-leave-active{-webkit-animation-name:antFadeOut;animation-name:antFadeOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.fade-enter,.fade-appear{opacity:0;-webkit-animation-timing-function:linear;animation-timing-function:linear}.fade-leave{-webkit-animation-timing-function:linear;animation-timing-function:linear}@-webkit-keyframes antFadeIn{0%{opacity:0}to{opacity:1}}@keyframes antFadeIn{0%{opacity:0}to{opacity:1}}@-webkit-keyframes antFadeOut{0%{opacity:1}to{opacity:0}}@keyframes antFadeOut{0%{opacity:1}to{opacity:0}}.ant-move-up-enter,.ant-move-up-appear,.ant-move-up-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-move-up-enter.ant-move-up-enter-active,.ant-move-up-appear.ant-move-up-appear-active{-webkit-animation-name:antMoveUpIn;animation-name:antMoveUpIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-move-up-leave.ant-move-up-leave-active{-webkit-animation-name:antMoveUpOut;animation-name:antMoveUpOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-move-up-enter,.ant-move-up-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-move-up-leave{-webkit-animation-timing-function:cubic-bezier(.6,.04,.98,.34);animation-timing-function:cubic-bezier(.6,.04,.98,.34)}.move-up-enter,.move-up-appear,.move-up-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.move-up-enter.move-up-enter-active,.move-up-appear.move-up-appear-active{-webkit-animation-name:antMoveUpIn;animation-name:antMoveUpIn;-webkit-animation-play-state:running;animation-play-state:running}.move-up-leave.move-up-leave-active{-webkit-animation-name:antMoveUpOut;animation-name:antMoveUpOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.move-up-enter,.move-up-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.move-up-leave{-webkit-animation-timing-function:cubic-bezier(.6,.04,.98,.34);animation-timing-function:cubic-bezier(.6,.04,.98,.34)}.ant-move-down-enter,.ant-move-down-appear,.ant-move-down-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-move-down-enter.ant-move-down-enter-active,.ant-move-down-appear.ant-move-down-appear-active{-webkit-animation-name:antMoveDownIn;animation-name:antMoveDownIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-move-down-leave.ant-move-down-leave-active{-webkit-animation-name:antMoveDownOut;animation-name:antMoveDownOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-move-down-enter,.ant-move-down-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-move-down-leave{-webkit-animation-timing-function:cubic-bezier(.6,.04,.98,.34);animation-timing-function:cubic-bezier(.6,.04,.98,.34)}.move-down-enter,.move-down-appear,.move-down-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.move-down-enter.move-down-enter-active,.move-down-appear.move-down-appear-active{-webkit-animation-name:antMoveDownIn;animation-name:antMoveDownIn;-webkit-animation-play-state:running;animation-play-state:running}.move-down-leave.move-down-leave-active{-webkit-animation-name:antMoveDownOut;animation-name:antMoveDownOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.move-down-enter,.move-down-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.move-down-leave{-webkit-animation-timing-function:cubic-bezier(.6,.04,.98,.34);animation-timing-function:cubic-bezier(.6,.04,.98,.34)}.ant-move-left-enter,.ant-move-left-appear,.ant-move-left-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-move-left-enter.ant-move-left-enter-active,.ant-move-left-appear.ant-move-left-appear-active{-webkit-animation-name:antMoveLeftIn;animation-name:antMoveLeftIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-move-left-leave.ant-move-left-leave-active{-webkit-animation-name:antMoveLeftOut;animation-name:antMoveLeftOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-move-left-enter,.ant-move-left-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-move-left-leave{-webkit-animation-timing-function:cubic-bezier(.6,.04,.98,.34);animation-timing-function:cubic-bezier(.6,.04,.98,.34)}.move-left-enter,.move-left-appear,.move-left-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.move-left-enter.move-left-enter-active,.move-left-appear.move-left-appear-active{-webkit-animation-name:antMoveLeftIn;animation-name:antMoveLeftIn;-webkit-animation-play-state:running;animation-play-state:running}.move-left-leave.move-left-leave-active{-webkit-animation-name:antMoveLeftOut;animation-name:antMoveLeftOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.move-left-enter,.move-left-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.move-left-leave{-webkit-animation-timing-function:cubic-bezier(.6,.04,.98,.34);animation-timing-function:cubic-bezier(.6,.04,.98,.34)}.ant-move-right-enter,.ant-move-right-appear,.ant-move-right-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-move-right-enter.ant-move-right-enter-active,.ant-move-right-appear.ant-move-right-appear-active{-webkit-animation-name:antMoveRightIn;animation-name:antMoveRightIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-move-right-leave.ant-move-right-leave-active{-webkit-animation-name:antMoveRightOut;animation-name:antMoveRightOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-move-right-enter,.ant-move-right-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-move-right-leave{-webkit-animation-timing-function:cubic-bezier(.6,.04,.98,.34);animation-timing-function:cubic-bezier(.6,.04,.98,.34)}.move-right-enter,.move-right-appear,.move-right-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.move-right-enter.move-right-enter-active,.move-right-appear.move-right-appear-active{-webkit-animation-name:antMoveRightIn;animation-name:antMoveRightIn;-webkit-animation-play-state:running;animation-play-state:running}.move-right-leave.move-right-leave-active{-webkit-animation-name:antMoveRightOut;animation-name:antMoveRightOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.move-right-enter,.move-right-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.move-right-leave{-webkit-animation-timing-function:cubic-bezier(.6,.04,.98,.34);animation-timing-function:cubic-bezier(.6,.04,.98,.34)}@-webkit-keyframes antMoveDownIn{0%{transform:translateY(100%);transform-origin:0 0;opacity:0}to{transform:translateY(0);transform-origin:0 0;opacity:1}}@keyframes antMoveDownIn{0%{transform:translateY(100%);transform-origin:0 0;opacity:0}to{transform:translateY(0);transform-origin:0 0;opacity:1}}@-webkit-keyframes antMoveDownOut{0%{transform:translateY(0);transform-origin:0 0;opacity:1}to{transform:translateY(100%);transform-origin:0 0;opacity:0}}@keyframes antMoveDownOut{0%{transform:translateY(0);transform-origin:0 0;opacity:1}to{transform:translateY(100%);transform-origin:0 0;opacity:0}}@-webkit-keyframes antMoveLeftIn{0%{transform:translate(-100%);transform-origin:0 0;opacity:0}to{transform:translate(0);transform-origin:0 0;opacity:1}}@keyframes antMoveLeftIn{0%{transform:translate(-100%);transform-origin:0 0;opacity:0}to{transform:translate(0);transform-origin:0 0;opacity:1}}@-webkit-keyframes antMoveLeftOut{0%{transform:translate(0);transform-origin:0 0;opacity:1}to{transform:translate(-100%);transform-origin:0 0;opacity:0}}@keyframes antMoveLeftOut{0%{transform:translate(0);transform-origin:0 0;opacity:1}to{transform:translate(-100%);transform-origin:0 0;opacity:0}}@-webkit-keyframes antMoveRightIn{0%{transform:translate(100%);transform-origin:0 0;opacity:0}to{transform:translate(0);transform-origin:0 0;opacity:1}}@keyframes antMoveRightIn{0%{transform:translate(100%);transform-origin:0 0;opacity:0}to{transform:translate(0);transform-origin:0 0;opacity:1}}@-webkit-keyframes antMoveRightOut{0%{transform:translate(0);transform-origin:0 0;opacity:1}to{transform:translate(100%);transform-origin:0 0;opacity:0}}@keyframes antMoveRightOut{0%{transform:translate(0);transform-origin:0 0;opacity:1}to{transform:translate(100%);transform-origin:0 0;opacity:0}}@-webkit-keyframes antMoveUpIn{0%{transform:translateY(-100%);transform-origin:0 0;opacity:0}to{transform:translateY(0);transform-origin:0 0;opacity:1}}@keyframes antMoveUpIn{0%{transform:translateY(-100%);transform-origin:0 0;opacity:0}to{transform:translateY(0);transform-origin:0 0;opacity:1}}@-webkit-keyframes antMoveUpOut{0%{transform:translateY(0);transform-origin:0 0;opacity:1}to{transform:translateY(-100%);transform-origin:0 0;opacity:0}}@keyframes antMoveUpOut{0%{transform:translateY(0);transform-origin:0 0;opacity:1}to{transform:translateY(-100%);transform-origin:0 0;opacity:0}}@-webkit-keyframes loadingCircle{to{transform:rotate(360deg)}}@keyframes loadingCircle{to{transform:rotate(360deg)}}[ant-click-animating=true],[ant-click-animating-without-extra-node=true]{position:relative}html{--antd-wave-shadow-color: #1890ff;--scroll-bar: 0}[ant-click-animating-without-extra-node=true]:after,.ant-click-animating-node{position:absolute;top:0;right:0;bottom:0;left:0;display:block;border-radius:inherit;box-shadow:0 0 #1890ff;box-shadow:0 0 0 0 var(--antd-wave-shadow-color);opacity:.2;-webkit-animation:fadeEffect 2s cubic-bezier(.08,.82,.17,1),waveEffect .4s cubic-bezier(.08,.82,.17,1);animation:fadeEffect 2s cubic-bezier(.08,.82,.17,1),waveEffect .4s cubic-bezier(.08,.82,.17,1);-webkit-animation-fill-mode:forwards;animation-fill-mode:forwards;content:"";pointer-events:none}@-webkit-keyframes waveEffect{to{box-shadow:0 0 #1890ff;box-shadow:0 0 0 6px var(--antd-wave-shadow-color)}}@keyframes waveEffect{to{box-shadow:0 0 #1890ff;box-shadow:0 0 0 6px var(--antd-wave-shadow-color)}}@-webkit-keyframes fadeEffect{to{opacity:0}}@keyframes fadeEffect{to{opacity:0}}.slide-up-enter,.slide-up-appear,.slide-up-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.slide-up-enter.slide-up-enter-active,.slide-up-appear.slide-up-appear-active{-webkit-animation-name:antSlideUpIn;animation-name:antSlideUpIn;-webkit-animation-play-state:running;animation-play-state:running}.slide-up-leave.slide-up-leave-active{-webkit-animation-name:antSlideUpOut;animation-name:antSlideUpOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.slide-up-enter,.slide-up-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.23,1,.32,1);animation-timing-function:cubic-bezier(.23,1,.32,1)}.slide-up-leave{-webkit-animation-timing-function:cubic-bezier(.755,.05,.855,.06);animation-timing-function:cubic-bezier(.755,.05,.855,.06)}.slide-down-enter,.slide-down-appear,.slide-down-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.slide-down-enter.slide-down-enter-active,.slide-down-appear.slide-down-appear-active{-webkit-animation-name:antSlideDownIn;animation-name:antSlideDownIn;-webkit-animation-play-state:running;animation-play-state:running}.slide-down-leave.slide-down-leave-active{-webkit-animation-name:antSlideDownOut;animation-name:antSlideDownOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.slide-down-enter,.slide-down-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.23,1,.32,1);animation-timing-function:cubic-bezier(.23,1,.32,1)}.slide-down-leave{-webkit-animation-timing-function:cubic-bezier(.755,.05,.855,.06);animation-timing-function:cubic-bezier(.755,.05,.855,.06)}.slide-left-enter,.slide-left-appear,.slide-left-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.slide-left-enter.slide-left-enter-active,.slide-left-appear.slide-left-appear-active{-webkit-animation-name:antSlideLeftIn;animation-name:antSlideLeftIn;-webkit-animation-play-state:running;animation-play-state:running}.slide-left-leave.slide-left-leave-active{-webkit-animation-name:antSlideLeftOut;animation-name:antSlideLeftOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.slide-left-enter,.slide-left-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.23,1,.32,1);animation-timing-function:cubic-bezier(.23,1,.32,1)}.slide-left-leave{-webkit-animation-timing-function:cubic-bezier(.755,.05,.855,.06);animation-timing-function:cubic-bezier(.755,.05,.855,.06)}.slide-right-enter,.slide-right-appear,.slide-right-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.slide-right-enter.slide-right-enter-active,.slide-right-appear.slide-right-appear-active{-webkit-animation-name:antSlideRightIn;animation-name:antSlideRightIn;-webkit-animation-play-state:running;animation-play-state:running}.slide-right-leave.slide-right-leave-active{-webkit-animation-name:antSlideRightOut;animation-name:antSlideRightOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.slide-right-enter,.slide-right-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.23,1,.32,1);animation-timing-function:cubic-bezier(.23,1,.32,1)}.slide-right-leave{-webkit-animation-timing-function:cubic-bezier(.755,.05,.855,.06);animation-timing-function:cubic-bezier(.755,.05,.855,.06)}.ant-slide-up-enter,.ant-slide-up-appear,.ant-slide-up-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-slide-up-enter.ant-slide-up-enter-active,.ant-slide-up-appear.ant-slide-up-appear-active{-webkit-animation-name:antSlideUpIn;animation-name:antSlideUpIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-slide-up-leave.ant-slide-up-leave-active{-webkit-animation-name:antSlideUpOut;animation-name:antSlideUpOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-slide-up-enter,.ant-slide-up-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.23,1,.32,1);animation-timing-function:cubic-bezier(.23,1,.32,1)}.ant-slide-up-leave{-webkit-animation-timing-function:cubic-bezier(.755,.05,.855,.06);animation-timing-function:cubic-bezier(.755,.05,.855,.06)}.ant-slide-down-enter,.ant-slide-down-appear,.ant-slide-down-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-slide-down-enter.ant-slide-down-enter-active,.ant-slide-down-appear.ant-slide-down-appear-active{-webkit-animation-name:antSlideDownIn;animation-name:antSlideDownIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-slide-down-leave.ant-slide-down-leave-active{-webkit-animation-name:antSlideDownOut;animation-name:antSlideDownOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-slide-down-enter,.ant-slide-down-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.23,1,.32,1);animation-timing-function:cubic-bezier(.23,1,.32,1)}.ant-slide-down-leave{-webkit-animation-timing-function:cubic-bezier(.755,.05,.855,.06);animation-timing-function:cubic-bezier(.755,.05,.855,.06)}.ant-slide-left-enter,.ant-slide-left-appear,.ant-slide-left-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-slide-left-enter.ant-slide-left-enter-active,.ant-slide-left-appear.ant-slide-left-appear-active{-webkit-animation-name:antSlideLeftIn;animation-name:antSlideLeftIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-slide-left-leave.ant-slide-left-leave-active{-webkit-animation-name:antSlideLeftOut;animation-name:antSlideLeftOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-slide-left-enter,.ant-slide-left-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.23,1,.32,1);animation-timing-function:cubic-bezier(.23,1,.32,1)}.ant-slide-left-leave{-webkit-animation-timing-function:cubic-bezier(.755,.05,.855,.06);animation-timing-function:cubic-bezier(.755,.05,.855,.06)}.ant-slide-right-enter,.ant-slide-right-appear,.ant-slide-right-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-slide-right-enter.ant-slide-right-enter-active,.ant-slide-right-appear.ant-slide-right-appear-active{-webkit-animation-name:antSlideRightIn;animation-name:antSlideRightIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-slide-right-leave.ant-slide-right-leave-active{-webkit-animation-name:antSlideRightOut;animation-name:antSlideRightOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-slide-right-enter,.ant-slide-right-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.23,1,.32,1);animation-timing-function:cubic-bezier(.23,1,.32,1)}.ant-slide-right-leave{-webkit-animation-timing-function:cubic-bezier(.755,.05,.855,.06);animation-timing-function:cubic-bezier(.755,.05,.855,.06)}@-webkit-keyframes antSlideUpIn{0%{transform:scaleY(.8);transform-origin:0% 0%;opacity:0}to{transform:scaleY(1);transform-origin:0% 0%;opacity:1}}@keyframes antSlideUpIn{0%{transform:scaleY(.8);transform-origin:0% 0%;opacity:0}to{transform:scaleY(1);transform-origin:0% 0%;opacity:1}}@-webkit-keyframes antSlideUpOut{0%{transform:scaleY(1);transform-origin:0% 0%;opacity:1}to{transform:scaleY(.8);transform-origin:0% 0%;opacity:0}}@keyframes antSlideUpOut{0%{transform:scaleY(1);transform-origin:0% 0%;opacity:1}to{transform:scaleY(.8);transform-origin:0% 0%;opacity:0}}@-webkit-keyframes antSlideDownIn{0%{transform:scaleY(.8);transform-origin:100% 100%;opacity:0}to{transform:scaleY(1);transform-origin:100% 100%;opacity:1}}@keyframes antSlideDownIn{0%{transform:scaleY(.8);transform-origin:100% 100%;opacity:0}to{transform:scaleY(1);transform-origin:100% 100%;opacity:1}}@-webkit-keyframes antSlideDownOut{0%{transform:scaleY(1);transform-origin:100% 100%;opacity:1}to{transform:scaleY(.8);transform-origin:100% 100%;opacity:0}}@keyframes antSlideDownOut{0%{transform:scaleY(1);transform-origin:100% 100%;opacity:1}to{transform:scaleY(.8);transform-origin:100% 100%;opacity:0}}@-webkit-keyframes antSlideLeftIn{0%{transform:scaleX(.8);transform-origin:0% 0%;opacity:0}to{transform:scaleX(1);transform-origin:0% 0%;opacity:1}}@keyframes antSlideLeftIn{0%{transform:scaleX(.8);transform-origin:0% 0%;opacity:0}to{transform:scaleX(1);transform-origin:0% 0%;opacity:1}}@-webkit-keyframes antSlideLeftOut{0%{transform:scaleX(1);transform-origin:0% 0%;opacity:1}to{transform:scaleX(.8);transform-origin:0% 0%;opacity:0}}@keyframes antSlideLeftOut{0%{transform:scaleX(1);transform-origin:0% 0%;opacity:1}to{transform:scaleX(.8);transform-origin:0% 0%;opacity:0}}@-webkit-keyframes antSlideRightIn{0%{transform:scaleX(.8);transform-origin:100% 0%;opacity:0}to{transform:scaleX(1);transform-origin:100% 0%;opacity:1}}@keyframes antSlideRightIn{0%{transform:scaleX(.8);transform-origin:100% 0%;opacity:0}to{transform:scaleX(1);transform-origin:100% 0%;opacity:1}}@-webkit-keyframes antSlideRightOut{0%{transform:scaleX(1);transform-origin:100% 0%;opacity:1}to{transform:scaleX(.8);transform-origin:100% 0%;opacity:0}}@keyframes antSlideRightOut{0%{transform:scaleX(1);transform-origin:100% 0%;opacity:1}to{transform:scaleX(.8);transform-origin:100% 0%;opacity:0}}.ant-zoom-enter,.ant-zoom-appear,.ant-zoom-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-zoom-enter.ant-zoom-enter-active,.ant-zoom-appear.ant-zoom-appear-active{-webkit-animation-name:antZoomIn;animation-name:antZoomIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-zoom-leave.ant-zoom-leave-active{-webkit-animation-name:antZoomOut;animation-name:antZoomOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-zoom-enter,.ant-zoom-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-zoom-enter-prepare,.ant-zoom-appear-prepare{transform:none}.ant-zoom-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.zoom-enter,.zoom-appear,.zoom-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.zoom-enter.zoom-enter-active,.zoom-appear.zoom-appear-active{-webkit-animation-name:antZoomIn;animation-name:antZoomIn;-webkit-animation-play-state:running;animation-play-state:running}.zoom-leave.zoom-leave-active{-webkit-animation-name:antZoomOut;animation-name:antZoomOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.zoom-enter,.zoom-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.zoom-enter-prepare,.zoom-appear-prepare{transform:none}.zoom-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.ant-zoom-big-enter,.ant-zoom-big-appear,.ant-zoom-big-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-zoom-big-enter.ant-zoom-big-enter-active,.ant-zoom-big-appear.ant-zoom-big-appear-active{-webkit-animation-name:antZoomBigIn;animation-name:antZoomBigIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-zoom-big-leave.ant-zoom-big-leave-active{-webkit-animation-name:antZoomBigOut;animation-name:antZoomBigOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-zoom-big-enter,.ant-zoom-big-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-zoom-big-enter-prepare,.ant-zoom-big-appear-prepare{transform:none}.ant-zoom-big-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.zoom-big-enter,.zoom-big-appear,.zoom-big-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.zoom-big-enter.zoom-big-enter-active,.zoom-big-appear.zoom-big-appear-active{-webkit-animation-name:antZoomBigIn;animation-name:antZoomBigIn;-webkit-animation-play-state:running;animation-play-state:running}.zoom-big-leave.zoom-big-leave-active{-webkit-animation-name:antZoomBigOut;animation-name:antZoomBigOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.zoom-big-enter,.zoom-big-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.zoom-big-enter-prepare,.zoom-big-appear-prepare{transform:none}.zoom-big-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.ant-zoom-big-fast-enter,.ant-zoom-big-fast-appear,.ant-zoom-big-fast-leave{-webkit-animation-duration:.1s;animation-duration:.1s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-zoom-big-fast-enter.ant-zoom-big-fast-enter-active,.ant-zoom-big-fast-appear.ant-zoom-big-fast-appear-active{-webkit-animation-name:antZoomBigIn;animation-name:antZoomBigIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-zoom-big-fast-leave.ant-zoom-big-fast-leave-active{-webkit-animation-name:antZoomBigOut;animation-name:antZoomBigOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-zoom-big-fast-enter,.ant-zoom-big-fast-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-zoom-big-fast-enter-prepare,.ant-zoom-big-fast-appear-prepare{transform:none}.ant-zoom-big-fast-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.zoom-big-fast-enter,.zoom-big-fast-appear,.zoom-big-fast-leave{-webkit-animation-duration:.1s;animation-duration:.1s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.zoom-big-fast-enter.zoom-big-fast-enter-active,.zoom-big-fast-appear.zoom-big-fast-appear-active{-webkit-animation-name:antZoomBigIn;animation-name:antZoomBigIn;-webkit-animation-play-state:running;animation-play-state:running}.zoom-big-fast-leave.zoom-big-fast-leave-active{-webkit-animation-name:antZoomBigOut;animation-name:antZoomBigOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.zoom-big-fast-enter,.zoom-big-fast-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.zoom-big-fast-enter-prepare,.zoom-big-fast-appear-prepare{transform:none}.zoom-big-fast-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.ant-zoom-up-enter,.ant-zoom-up-appear,.ant-zoom-up-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-zoom-up-enter.ant-zoom-up-enter-active,.ant-zoom-up-appear.ant-zoom-up-appear-active{-webkit-animation-name:antZoomUpIn;animation-name:antZoomUpIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-zoom-up-leave.ant-zoom-up-leave-active{-webkit-animation-name:antZoomUpOut;animation-name:antZoomUpOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-zoom-up-enter,.ant-zoom-up-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-zoom-up-enter-prepare,.ant-zoom-up-appear-prepare{transform:none}.ant-zoom-up-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.zoom-up-enter,.zoom-up-appear,.zoom-up-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.zoom-up-enter.zoom-up-enter-active,.zoom-up-appear.zoom-up-appear-active{-webkit-animation-name:antZoomUpIn;animation-name:antZoomUpIn;-webkit-animation-play-state:running;animation-play-state:running}.zoom-up-leave.zoom-up-leave-active{-webkit-animation-name:antZoomUpOut;animation-name:antZoomUpOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.zoom-up-enter,.zoom-up-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.zoom-up-enter-prepare,.zoom-up-appear-prepare{transform:none}.zoom-up-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.ant-zoom-down-enter,.ant-zoom-down-appear,.ant-zoom-down-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-zoom-down-enter.ant-zoom-down-enter-active,.ant-zoom-down-appear.ant-zoom-down-appear-active{-webkit-animation-name:antZoomDownIn;animation-name:antZoomDownIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-zoom-down-leave.ant-zoom-down-leave-active{-webkit-animation-name:antZoomDownOut;animation-name:antZoomDownOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-zoom-down-enter,.ant-zoom-down-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-zoom-down-enter-prepare,.ant-zoom-down-appear-prepare{transform:none}.ant-zoom-down-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.zoom-down-enter,.zoom-down-appear,.zoom-down-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.zoom-down-enter.zoom-down-enter-active,.zoom-down-appear.zoom-down-appear-active{-webkit-animation-name:antZoomDownIn;animation-name:antZoomDownIn;-webkit-animation-play-state:running;animation-play-state:running}.zoom-down-leave.zoom-down-leave-active{-webkit-animation-name:antZoomDownOut;animation-name:antZoomDownOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.zoom-down-enter,.zoom-down-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.zoom-down-enter-prepare,.zoom-down-appear-prepare{transform:none}.zoom-down-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.ant-zoom-left-enter,.ant-zoom-left-appear,.ant-zoom-left-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-zoom-left-enter.ant-zoom-left-enter-active,.ant-zoom-left-appear.ant-zoom-left-appear-active{-webkit-animation-name:antZoomLeftIn;animation-name:antZoomLeftIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-zoom-left-leave.ant-zoom-left-leave-active{-webkit-animation-name:antZoomLeftOut;animation-name:antZoomLeftOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-zoom-left-enter,.ant-zoom-left-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-zoom-left-enter-prepare,.ant-zoom-left-appear-prepare{transform:none}.ant-zoom-left-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.zoom-left-enter,.zoom-left-appear,.zoom-left-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.zoom-left-enter.zoom-left-enter-active,.zoom-left-appear.zoom-left-appear-active{-webkit-animation-name:antZoomLeftIn;animation-name:antZoomLeftIn;-webkit-animation-play-state:running;animation-play-state:running}.zoom-left-leave.zoom-left-leave-active{-webkit-animation-name:antZoomLeftOut;animation-name:antZoomLeftOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.zoom-left-enter,.zoom-left-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.zoom-left-enter-prepare,.zoom-left-appear-prepare{transform:none}.zoom-left-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.ant-zoom-right-enter,.ant-zoom-right-appear,.ant-zoom-right-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-zoom-right-enter.ant-zoom-right-enter-active,.ant-zoom-right-appear.ant-zoom-right-appear-active{-webkit-animation-name:antZoomRightIn;animation-name:antZoomRightIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-zoom-right-leave.ant-zoom-right-leave-active{-webkit-animation-name:antZoomRightOut;animation-name:antZoomRightOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-zoom-right-enter,.ant-zoom-right-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.ant-zoom-right-enter-prepare,.ant-zoom-right-appear-prepare{transform:none}.ant-zoom-right-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}.zoom-right-enter,.zoom-right-appear,.zoom-right-leave{-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.zoom-right-enter.zoom-right-enter-active,.zoom-right-appear.zoom-right-appear-active{-webkit-animation-name:antZoomRightIn;animation-name:antZoomRightIn;-webkit-animation-play-state:running;animation-play-state:running}.zoom-right-leave.zoom-right-leave-active{-webkit-animation-name:antZoomRightOut;animation-name:antZoomRightOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.zoom-right-enter,.zoom-right-appear{transform:scale(0);opacity:0;-webkit-animation-timing-function:cubic-bezier(.08,.82,.17,1);animation-timing-function:cubic-bezier(.08,.82,.17,1)}.zoom-right-enter-prepare,.zoom-right-appear-prepare{transform:none}.zoom-right-leave{-webkit-animation-timing-function:cubic-bezier(.78,.14,.15,.86);animation-timing-function:cubic-bezier(.78,.14,.15,.86)}@-webkit-keyframes antZoomIn{0%{transform:scale(.2);opacity:0}to{transform:scale(1);opacity:1}}@keyframes antZoomIn{0%{transform:scale(.2);opacity:0}to{transform:scale(1);opacity:1}}@-webkit-keyframes antZoomOut{0%{transform:scale(1)}to{transform:scale(.2);opacity:0}}@keyframes antZoomOut{0%{transform:scale(1)}to{transform:scale(.2);opacity:0}}@-webkit-keyframes antZoomBigIn{0%{transform:none;opacity:0}5%{transform:scale(.8);opacity:0}to{transform:scale(1);opacity:1}}@keyframes antZoomBigIn{0%{transform:none;opacity:0}5%{transform:scale(.8);opacity:0}to{transform:scale(1);opacity:1}}@-webkit-keyframes antZoomBigOut{0%{transform:scale(1)}to{transform:scale(.8);opacity:0}}@keyframes antZoomBigOut{0%{transform:scale(1)}to{transform:scale(.8);opacity:0}}@-webkit-keyframes antZoomUpIn{0%{transform:scale(.8);transform-origin:50% 0%;opacity:0}to{transform:scale(1);transform-origin:50% 0%}}@keyframes antZoomUpIn{0%{transform:scale(.8);transform-origin:50% 0%;opacity:0}to{transform:scale(1);transform-origin:50% 0%}}@-webkit-keyframes antZoomUpOut{0%{transform:scale(1);transform-origin:50% 0%}to{transform:scale(.8);transform-origin:50% 0%;opacity:0}}@keyframes antZoomUpOut{0%{transform:scale(1);transform-origin:50% 0%}to{transform:scale(.8);transform-origin:50% 0%;opacity:0}}@-webkit-keyframes antZoomLeftIn{0%{transform:scale(.8);transform-origin:0% 50%;opacity:0}to{transform:scale(1);transform-origin:0% 50%}}@keyframes antZoomLeftIn{0%{transform:scale(.8);transform-origin:0% 50%;opacity:0}to{transform:scale(1);transform-origin:0% 50%}}@-webkit-keyframes antZoomLeftOut{0%{transform:scale(1);transform-origin:0% 50%}to{transform:scale(.8);transform-origin:0% 50%;opacity:0}}@keyframes antZoomLeftOut{0%{transform:scale(1);transform-origin:0% 50%}to{transform:scale(.8);transform-origin:0% 50%;opacity:0}}@-webkit-keyframes antZoomRightIn{0%{transform:scale(.8);transform-origin:100% 50%;opacity:0}to{transform:scale(1);transform-origin:100% 50%}}@keyframes antZoomRightIn{0%{transform:scale(.8);transform-origin:100% 50%;opacity:0}to{transform:scale(1);transform-origin:100% 50%}}@-webkit-keyframes antZoomRightOut{0%{transform:scale(1);transform-origin:100% 50%}to{transform:scale(.8);transform-origin:100% 50%;opacity:0}}@keyframes antZoomRightOut{0%{transform:scale(1);transform-origin:100% 50%}to{transform:scale(.8);transform-origin:100% 50%;opacity:0}}@-webkit-keyframes antZoomDownIn{0%{transform:scale(.8);transform-origin:50% 100%;opacity:0}to{transform:scale(1);transform-origin:50% 100%}}@keyframes antZoomDownIn{0%{transform:scale(.8);transform-origin:50% 100%;opacity:0}to{transform:scale(1);transform-origin:50% 100%}}@-webkit-keyframes antZoomDownOut{0%{transform:scale(1);transform-origin:50% 100%}to{transform:scale(.8);transform-origin:50% 100%;opacity:0}}@keyframes antZoomDownOut{0%{transform:scale(1);transform-origin:50% 100%}to{transform:scale(.8);transform-origin:50% 100%;opacity:0}}.ant-motion-collapse-legacy{overflow:hidden}.ant-motion-collapse-legacy-active{transition:height .2s cubic-bezier(.645,.045,.355,1),opacity .2s cubic-bezier(.645,.045,.355,1)!important}.ant-motion-collapse{overflow:hidden;transition:height .2s cubic-bezier(.645,.045,.355,1),opacity .2s cubic-bezier(.645,.045,.355,1)!important}.ant-affix{position:fixed;z-index:10}.ant-alert{box-sizing:border-box;margin:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:flex;align-items:center;padding:8px 15px;word-wrap:break-word;border-radius:2px}.ant-alert-content{flex:1;min-width:0}.ant-alert-icon{margin-right:8px}.ant-alert-description{display:none;font-size:14px;line-height:22px}.ant-alert-success{background-color:#f6ffed;border:1px solid #b7eb8f}.ant-alert-success .ant-alert-icon{color:#52c41a}.ant-alert-info{background-color:#e6f7ff;border:1px solid #91d5ff}.ant-alert-info .ant-alert-icon{color:#1890ff}.ant-alert-warning{background-color:#fffbe6;border:1px solid #ffe58f}.ant-alert-warning .ant-alert-icon{color:#faad14}.ant-alert-error{background-color:#fff2f0;border:1px solid #ffccc7}.ant-alert-error .ant-alert-icon{color:#ff4d4f}.ant-alert-close-icon{margin-left:8px;padding:0;overflow:hidden;font-size:12px;line-height:12px;background-color:transparent;border:none;outline:none;cursor:pointer}.ant-alert-close-icon .anticon-close{color:#00000073;transition:color .3s}.ant-alert-close-icon .anticon-close:hover{color:#000000bf}.ant-alert-close-text{color:#00000073;transition:color .3s}.ant-alert-close-text:hover{color:#000000bf}.ant-alert-with-description{align-items:flex-start;padding:15px}.ant-alert-with-description.ant-alert-no-icon{padding:15px}.ant-alert-with-description .ant-alert-icon{margin-right:15px;font-size:24px}.ant-alert-with-description .ant-alert-message{display:block;margin-bottom:4px;color:#000000d9;font-size:16px}.ant-alert-message{color:#000000d9}.ant-alert-with-description .ant-alert-description{display:block}.ant-alert.ant-alert-closing{height:0!important;margin:0;padding-top:0;padding-bottom:0;transform-origin:50% 0;transition:all .3s cubic-bezier(.78,.14,.15,.86)}.ant-alert-slide-up-leave{-webkit-animation:antAlertSlideUpOut .3s cubic-bezier(.78,.14,.15,.86);animation:antAlertSlideUpOut .3s cubic-bezier(.78,.14,.15,.86);-webkit-animation-fill-mode:both;animation-fill-mode:both}.ant-alert-banner{margin-bottom:0;border:0;border-radius:0}@-webkit-keyframes antAlertSlideUpIn{0%{transform:scaleY(0);transform-origin:0% 0%;opacity:0}to{transform:scaleY(1);transform-origin:0% 0%;opacity:1}}@keyframes antAlertSlideUpIn{0%{transform:scaleY(0);transform-origin:0% 0%;opacity:0}to{transform:scaleY(1);transform-origin:0% 0%;opacity:1}}@-webkit-keyframes antAlertSlideUpOut{0%{transform:scaleY(1);transform-origin:0% 0%;opacity:1}to{transform:scaleY(0);transform-origin:0% 0%;opacity:0}}@keyframes antAlertSlideUpOut{0%{transform:scaleY(1);transform-origin:0% 0%;opacity:1}to{transform:scaleY(0);transform-origin:0% 0%;opacity:0}}.ant-anchor{box-sizing:border-box;margin:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;padding:0 0 0 2px}.ant-anchor-wrapper{margin-left:-4px;padding-left:4px;overflow:auto;background-color:transparent}.ant-anchor-ink{position:absolute;top:0;left:0;height:100%}.ant-anchor-ink:before{position:relative;display:block;width:2px;height:100%;margin:0 auto;background-color:#f0f0f0;content:" "}.ant-anchor-ink-ball{position:absolute;left:50%;display:none;width:8px;height:8px;background-color:#fff;border:2px solid #1890ff;border-radius:8px;transform:translate(-50%);transition:top .3s ease-in-out}.ant-anchor-ink-ball.visible{display:inline-block}.ant-anchor.fixed .ant-anchor-ink .ant-anchor-ink-ball{display:none}.ant-anchor-link{padding:7px 0 7px 16px;line-height:1.143}.ant-anchor-link-title{position:relative;display:block;margin-bottom:6px;overflow:hidden;color:#000000d9;white-space:nowrap;text-overflow:ellipsis;transition:all .3s}.ant-anchor-link-title:only-child{margin-bottom:0}.ant-anchor-link-active>.ant-anchor-link-title{color:#1890ff}.ant-anchor-link .ant-anchor-link{padding-top:5px;padding-bottom:5px}.ant-anchor-rtl{direction:rtl}.ant-anchor-rtl.ant-anchor-wrapper{margin-right:-4px;margin-left:0;padding-right:4px;padding-left:0}.ant-anchor-rtl .ant-anchor-ink{right:0;left:auto}.ant-anchor-rtl .ant-anchor-ink-ball{right:50%;left:0;transform:translate(50%)}.ant-anchor-rtl .ant-anchor-link{padding:7px 16px 7px 0}.ant-select-auto-complete{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum"}.ant-select-auto-complete .ant-select-clear{right:13px}.ant-select-single .ant-select-selector{display:flex}.ant-select-single .ant-select-selector .ant-select-selection-search{position:absolute;top:0;right:11px;bottom:0;left:11px}.ant-select-single .ant-select-selector .ant-select-selection-search-input{width:100%}.ant-select-single .ant-select-selector .ant-select-selection-item,.ant-select-single .ant-select-selector .ant-select-selection-placeholder{padding:0;line-height:30px;transition:all .3s}@supports (-moz-appearance: meterbar){.ant-select-single .ant-select-selector .ant-select-selection-item,.ant-select-single .ant-select-selector .ant-select-selection-placeholder{line-height:30px}}.ant-select-single .ant-select-selector .ant-select-selection-item{position:relative;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-select-single .ant-select-selector .ant-select-selection-placeholder{pointer-events:none}.ant-select-single .ant-select-selector:after,.ant-select-single .ant-select-selector .ant-select-selection-item:after,.ant-select-single .ant-select-selector .ant-select-selection-placeholder:after{display:inline-block;width:0;visibility:hidden;content:"\a0"}.ant-select-single.ant-select-show-arrow .ant-select-selection-search{right:25px}.ant-select-single.ant-select-show-arrow .ant-select-selection-item,.ant-select-single.ant-select-show-arrow .ant-select-selection-placeholder{padding-right:18px}.ant-select-single.ant-select-open .ant-select-selection-item{color:#bfbfbf}.ant-select-single:not(.ant-select-customize-input) .ant-select-selector{width:100%;height:32px;padding:0 11px}.ant-select-single:not(.ant-select-customize-input) .ant-select-selector .ant-select-selection-search-input{height:30px}.ant-select-single:not(.ant-select-customize-input) .ant-select-selector:after{line-height:30px}.ant-select-single.ant-select-customize-input .ant-select-selector:after{display:none}.ant-select-single.ant-select-customize-input .ant-select-selector .ant-select-selection-search{position:static;width:100%}.ant-select-single.ant-select-customize-input .ant-select-selector .ant-select-selection-placeholder{position:absolute;right:0;left:0;padding:0 11px}.ant-select-single.ant-select-customize-input .ant-select-selector .ant-select-selection-placeholder:after{display:none}.ant-select-single.ant-select-lg:not(.ant-select-customize-input) .ant-select-selector{height:40px}.ant-select-single.ant-select-lg:not(.ant-select-customize-input) .ant-select-selector:after,.ant-select-single.ant-select-lg:not(.ant-select-customize-input) .ant-select-selector .ant-select-selection-item,.ant-select-single.ant-select-lg:not(.ant-select-customize-input) .ant-select-selector .ant-select-selection-placeholder{line-height:38px}.ant-select-single.ant-select-lg:not(.ant-select-customize-input):not(.ant-select-customize-input) .ant-select-selection-search-input{height:38px}.ant-select-single.ant-select-sm:not(.ant-select-customize-input) .ant-select-selector{height:24px}.ant-select-single.ant-select-sm:not(.ant-select-customize-input) .ant-select-selector:after,.ant-select-single.ant-select-sm:not(.ant-select-customize-input) .ant-select-selector .ant-select-selection-item,.ant-select-single.ant-select-sm:not(.ant-select-customize-input) .ant-select-selector .ant-select-selection-placeholder{line-height:22px}.ant-select-single.ant-select-sm:not(.ant-select-customize-input):not(.ant-select-customize-input) .ant-select-selection-search-input{height:22px}.ant-select-single.ant-select-sm:not(.ant-select-customize-input) .ant-select-selection-search{right:7px;left:7px}.ant-select-single.ant-select-sm:not(.ant-select-customize-input) .ant-select-selector{padding:0 7px}.ant-select-single.ant-select-sm:not(.ant-select-customize-input).ant-select-show-arrow .ant-select-selection-search{right:28px}.ant-select-single.ant-select-sm:not(.ant-select-customize-input).ant-select-show-arrow .ant-select-selection-item,.ant-select-single.ant-select-sm:not(.ant-select-customize-input).ant-select-show-arrow .ant-select-selection-placeholder{padding-right:21px}.ant-select-single.ant-select-lg:not(.ant-select-customize-input) .ant-select-selector{padding:0 11px}.ant-select-selection-overflow{position:relative;display:flex;flex:auto;flex-wrap:wrap;max-width:100%}.ant-select-selection-overflow-item{flex:none;align-self:center;max-width:100%}.ant-select-multiple .ant-select-selector{display:flex;flex-wrap:wrap;align-items:center;padding:1px 4px}.ant-select-show-search.ant-select-multiple .ant-select-selector{cursor:text}.ant-select-disabled.ant-select-multiple .ant-select-selector{background:#f5f5f5;cursor:not-allowed}.ant-select-multiple .ant-select-selector:after{display:inline-block;width:0;margin:2px 0;line-height:24px;content:"\a0"}.ant-select-multiple.ant-select-show-arrow .ant-select-selector,.ant-select-multiple.ant-select-allow-clear .ant-select-selector{padding-right:24px}.ant-select-multiple .ant-select-selection-item{position:relative;display:flex;flex:none;box-sizing:border-box;max-width:100%;height:24px;margin-top:2px;margin-bottom:2px;line-height:22px;background:#f5f5f5;border:1px solid #f0f0f0;border-radius:2px;cursor:default;transition:font-size .3s,line-height .3s,height .3s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-margin-end:4px;margin-inline-end:4px;-webkit-padding-start:8px;padding-inline-start:8px;-webkit-padding-end:4px;padding-inline-end:4px}.ant-select-disabled.ant-select-multiple .ant-select-selection-item{color:#bfbfbf;border-color:#d9d9d9;cursor:not-allowed}.ant-select-multiple .ant-select-selection-item-content{display:inline-block;margin-right:4px;overflow:hidden;white-space:pre;text-overflow:ellipsis}.ant-select-multiple .ant-select-selection-item-remove{color:inherit;font-style:normal;line-height:0;text-align:center;text-transform:none;vertical-align:-.125em;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;display:inline-block;color:#00000073;font-weight:700;font-size:10px;line-height:inherit;cursor:pointer}.ant-select-multiple .ant-select-selection-item-remove>*{line-height:1}.ant-select-multiple .ant-select-selection-item-remove svg{display:inline-block}.ant-select-multiple .ant-select-selection-item-remove:before{display:none}.ant-select-multiple .ant-select-selection-item-remove .ant-select-multiple .ant-select-selection-item-remove-icon{display:block}.ant-select-multiple .ant-select-selection-item-remove>.anticon{vertical-align:-.2em}.ant-select-multiple .ant-select-selection-item-remove:hover{color:#000000bf}.ant-select-multiple .ant-select-selection-overflow-item+.ant-select-selection-overflow-item .ant-select-selection-search{-webkit-margin-start:0;margin-inline-start:0}.ant-select-multiple .ant-select-selection-search{position:relative;max-width:100%;margin-top:2px;margin-bottom:2px;-webkit-margin-start:7px;margin-inline-start:7px}.ant-select-multiple .ant-select-selection-search-input,.ant-select-multiple .ant-select-selection-search-mirror{height:24px;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";line-height:24px;transition:all .3s}.ant-select-multiple .ant-select-selection-search-input{width:100%;min-width:4.1px}.ant-select-multiple .ant-select-selection-search-mirror{position:absolute;top:0;left:0;z-index:999;white-space:pre;visibility:hidden}.ant-select-multiple .ant-select-selection-placeholder{position:absolute;top:50%;right:11px;left:11px;transform:translateY(-50%);transition:all .3s}.ant-select-multiple.ant-select-lg .ant-select-selector:after{line-height:32px}.ant-select-multiple.ant-select-lg .ant-select-selection-item{height:32px;line-height:30px}.ant-select-multiple.ant-select-lg .ant-select-selection-search{height:32px;line-height:32px}.ant-select-multiple.ant-select-lg .ant-select-selection-search-input,.ant-select-multiple.ant-select-lg .ant-select-selection-search-mirror{height:32px;line-height:30px}.ant-select-multiple.ant-select-sm .ant-select-selector:after{line-height:16px}.ant-select-multiple.ant-select-sm .ant-select-selection-item{height:16px;line-height:14px}.ant-select-multiple.ant-select-sm .ant-select-selection-search{height:16px;line-height:16px}.ant-select-multiple.ant-select-sm .ant-select-selection-search-input,.ant-select-multiple.ant-select-sm .ant-select-selection-search-mirror{height:16px;line-height:14px}.ant-select-multiple.ant-select-sm .ant-select-selection-placeholder{left:7px}.ant-select-multiple.ant-select-sm .ant-select-selection-search{-webkit-margin-start:3px;margin-inline-start:3px}.ant-select-multiple.ant-select-lg .ant-select-selection-item{height:32px;line-height:32px}.ant-select-disabled .ant-select-selection-item-remove{display:none}.ant-select{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;cursor:pointer}.ant-select:not(.ant-select-customize-input) .ant-select-selector{position:relative;background-color:#fff;border:1px solid #d9d9d9;border-radius:2px;transition:all .3s cubic-bezier(.645,.045,.355,1)}.ant-select:not(.ant-select-customize-input) .ant-select-selector input{cursor:pointer}.ant-select-show-search.ant-select:not(.ant-select-customize-input) .ant-select-selector{cursor:text}.ant-select-show-search.ant-select:not(.ant-select-customize-input) .ant-select-selector input{cursor:auto}.ant-select-focused:not(.ant-select-disabled).ant-select:not(.ant-select-customize-input) .ant-select-selector{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-select-disabled.ant-select:not(.ant-select-customize-input) .ant-select-selector{color:#00000040;background:#f5f5f5;cursor:not-allowed}.ant-select-multiple.ant-select-disabled.ant-select:not(.ant-select-customize-input) .ant-select-selector{background:#f5f5f5}.ant-select-disabled.ant-select:not(.ant-select-customize-input) .ant-select-selector input{cursor:not-allowed}.ant-select:not(.ant-select-customize-input) .ant-select-selector .ant-select-selection-search-input{margin:0;padding:0;background:transparent;border:none;outline:none;-webkit-appearance:none;-moz-appearance:none;appearance:none}.ant-select:not(.ant-select-customize-input) .ant-select-selector .ant-select-selection-search-input::-webkit-search-cancel-button{display:none;-webkit-appearance:none}.ant-select:not(.ant-select-disabled):hover .ant-select-selector{border-color:#40a9ff;border-right-width:1px!important}.ant-select-selection-item{flex:1;overflow:hidden;white-space:nowrap;text-overflow:ellipsis}@media all and (-ms-high-contrast: none){.ant-select-selection-item *::-ms-backdrop,.ant-select-selection-item{flex:auto}}.ant-select-selection-placeholder{flex:1;overflow:hidden;color:#bfbfbf;white-space:nowrap;text-overflow:ellipsis;pointer-events:none}@media all and (-ms-high-contrast: none){.ant-select-selection-placeholder *::-ms-backdrop,.ant-select-selection-placeholder{flex:auto}}.ant-select-arrow{display:inline-block;color:inherit;font-style:normal;line-height:0;text-transform:none;vertical-align:-.125em;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;position:absolute;top:53%;right:11px;width:12px;height:12px;margin-top:-6px;color:#00000040;font-size:12px;line-height:1;text-align:center;pointer-events:none}.ant-select-arrow>*{line-height:1}.ant-select-arrow svg{display:inline-block}.ant-select-arrow:before{display:none}.ant-select-arrow .ant-select-arrow-icon{display:block}.ant-select-arrow .anticon{vertical-align:top;transition:transform .3s}.ant-select-arrow .anticon>svg{vertical-align:top}.ant-select-arrow .anticon:not(.ant-select-suffix){pointer-events:auto}.ant-select-disabled .ant-select-arrow{cursor:not-allowed}.ant-select-clear{position:absolute;top:50%;right:11px;z-index:1;display:inline-block;width:12px;height:12px;margin-top:-6px;color:#00000040;font-size:12px;font-style:normal;line-height:1;text-align:center;text-transform:none;background:#fff;cursor:pointer;opacity:0;transition:color .3s ease,opacity .15s ease;text-rendering:auto}.ant-select-clear:before{display:block}.ant-select-clear:hover{color:#00000073}.ant-select:hover .ant-select-clear{opacity:1}.ant-select-dropdown{margin:0;color:#000000d9;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:absolute;top:-9999px;left:-9999px;z-index:1050;box-sizing:border-box;padding:4px 0;overflow:hidden;font-size:14px;font-variant:initial;background-color:#fff;border-radius:2px;outline:none;box-shadow:0 2px 8px #00000026}.ant-select-dropdown.ant-slide-up-enter.ant-slide-up-enter-active.ant-select-dropdown-placement-bottomLeft,.ant-select-dropdown.ant-slide-up-appear.ant-slide-up-appear-active.ant-select-dropdown-placement-bottomLeft{-webkit-animation-name:antSlideUpIn;animation-name:antSlideUpIn}.ant-select-dropdown.ant-slide-up-enter.ant-slide-up-enter-active.ant-select-dropdown-placement-topLeft,.ant-select-dropdown.ant-slide-up-appear.ant-slide-up-appear-active.ant-select-dropdown-placement-topLeft{-webkit-animation-name:antSlideDownIn;animation-name:antSlideDownIn}.ant-select-dropdown.ant-slide-up-leave.ant-slide-up-leave-active.ant-select-dropdown-placement-bottomLeft{-webkit-animation-name:antSlideUpOut;animation-name:antSlideUpOut}.ant-select-dropdown.ant-slide-up-leave.ant-slide-up-leave-active.ant-select-dropdown-placement-topLeft{-webkit-animation-name:antSlideDownOut;animation-name:antSlideDownOut}.ant-select-dropdown-hidden{display:none}.ant-select-dropdown-empty{color:#00000040}.ant-select-item-empty{position:relative;display:block;min-height:32px;padding:5px 12px;color:#000000d9;font-weight:400;font-size:14px;line-height:22px;color:#00000040}.ant-select-item{position:relative;display:block;min-height:32px;padding:5px 12px;color:#000000d9;font-weight:400;font-size:14px;line-height:22px;cursor:pointer;transition:background .3s ease}.ant-select-item-group{color:#00000073;font-size:12px;cursor:default}.ant-select-item-option{display:flex}.ant-select-item-option-content{flex:auto;overflow:hidden;white-space:nowrap;text-overflow:ellipsis}.ant-select-item-option-state{flex:none}.ant-select-item-option-active:not(.ant-select-item-option-disabled){background-color:#f5f5f5}.ant-select-item-option-selected:not(.ant-select-item-option-disabled){color:#000000d9;font-weight:600;background-color:#e6f7ff}.ant-select-item-option-selected:not(.ant-select-item-option-disabled) .ant-select-item-option-state{color:#1890ff}.ant-select-item-option-disabled{color:#00000040;cursor:not-allowed}.ant-select-item-option-grouped{padding-left:24px}.ant-select-lg{font-size:16px}.ant-select-borderless .ant-select-selector{background-color:transparent!important;border-color:transparent!important;box-shadow:none!important}.ant-select-rtl{direction:rtl}.ant-select-rtl .ant-select-arrow,.ant-select-rtl .ant-select-clear{right:initial;left:11px}.ant-select-dropdown-rtl{direction:rtl}.ant-select-dropdown-rtl .ant-select-item-option-grouped{padding-right:24px;padding-left:12px}.ant-select-rtl.ant-select-multiple.ant-select-show-arrow .ant-select-selector,.ant-select-rtl.ant-select-multiple.ant-select-allow-clear .ant-select-selector{padding-right:4px;padding-left:24px}.ant-select-rtl.ant-select-multiple .ant-select-selection-item{text-align:right}.ant-select-rtl.ant-select-multiple .ant-select-selection-item-content{margin-right:0;margin-left:4px;text-align:right}.ant-select-rtl.ant-select-multiple .ant-select-selection-search-mirror{right:0;left:auto}.ant-select-rtl.ant-select-multiple .ant-select-selection-placeholder{right:11px;left:auto}.ant-select-rtl.ant-select-multiple.ant-select-sm .ant-select-selection-placeholder{right:7px}.ant-select-rtl.ant-select-single .ant-select-selector .ant-select-selection-item,.ant-select-rtl.ant-select-single .ant-select-selector .ant-select-selection-placeholder{right:0;left:9px;text-align:right}.ant-select-rtl.ant-select-single.ant-select-show-arrow .ant-select-selection-search{right:11px;left:25px}.ant-select-rtl.ant-select-single.ant-select-show-arrow .ant-select-selection-item,.ant-select-rtl.ant-select-single.ant-select-show-arrow .ant-select-selection-placeholder{padding-right:0;padding-left:18px}.ant-select-rtl.ant-select-single.ant-select-sm:not(.ant-select-customize-input).ant-select-show-arrow .ant-select-selection-search{right:6px}.ant-select-rtl.ant-select-single.ant-select-sm:not(.ant-select-customize-input).ant-select-show-arrow .ant-select-selection-item,.ant-select-rtl.ant-select-single.ant-select-sm:not(.ant-select-customize-input).ant-select-show-arrow .ant-select-selection-placeholder{padding-right:0;padding-left:21px}.ant-empty{margin:0 8px;font-size:14px;line-height:1.5715;text-align:center}.ant-empty-image{height:100px;margin-bottom:8px}.ant-empty-image img{height:100%}.ant-empty-image svg{height:100%;margin:auto}.ant-empty-footer{margin-top:16px}.ant-empty-normal{margin:32px 0;color:#00000040}.ant-empty-normal .ant-empty-image{height:40px}.ant-empty-small{margin:8px 0;color:#00000040}.ant-empty-small .ant-empty-image{height:35px}.ant-empty-img-default-ellipse{fill:#f5f5f5;fill-opacity:.8}.ant-empty-img-default-path-1{fill:#aeb8c2}.ant-empty-img-default-path-2{fill:url(#linearGradient-1)}.ant-empty-img-default-path-3{fill:#f5f5f7}.ant-empty-img-default-path-4,.ant-empty-img-default-path-5{fill:#dce0e6}.ant-empty-img-default-g{fill:#fff}.ant-empty-img-simple-ellipse{fill:#f5f5f5}.ant-empty-img-simple-g{stroke:#d9d9d9}.ant-empty-img-simple-path{fill:#fafafa}.ant-empty-rtl{direction:rtl}.ant-input-affix-wrapper{position:relative;display:inline-block;width:100%;padding:4px 11px;color:#000000d9;font-size:14px;line-height:1.5715;background-color:#fff;background-image:none;border:1px solid #d9d9d9;border-radius:2px;transition:all .3s;display:inline-flex}.ant-input-affix-wrapper::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-input-affix-wrapper:-ms-input-placeholder{color:#bfbfbf}.ant-input-affix-wrapper::-webkit-input-placeholder{color:#bfbfbf}.ant-input-affix-wrapper:-moz-placeholder-shown{text-overflow:ellipsis}.ant-input-affix-wrapper:-ms-input-placeholder{text-overflow:ellipsis}.ant-input-affix-wrapper:placeholder-shown{text-overflow:ellipsis}.ant-input-affix-wrapper:focus{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-input-affix-wrapper[disabled]{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-input-affix-wrapper[disabled]:hover{border-color:#d9d9d9;border-right-width:1px!important}textarea.ant-input-affix-wrapper{max-width:100%;height:auto;min-height:32px;line-height:1.5715;vertical-align:bottom;transition:all .3s,height 0s}.ant-input-affix-wrapper-disabled .ant-input[disabled]{background:transparent}.ant-input-affix-wrapper>input.ant-input{padding:0;border:none;outline:none}.ant-input-affix-wrapper>input.ant-input:focus{box-shadow:none}.ant-input-affix-wrapper:before{width:0;visibility:hidden;content:"\a0"}.ant-input-prefix,.ant-input-suffix{display:flex;flex:none;align-items:center}.ant-input-prefix{margin-right:4px}.ant-input-suffix{margin-left:4px}.ant-input{box-sizing:border-box;margin:0;font-variant:tabular-nums;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;width:100%;padding:4px 11px;color:#000000d9;font-size:14px;line-height:1.5715;background-color:#fff;background-image:none;border:1px solid #d9d9d9;border-radius:2px;transition:all .3s}.ant-input::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-input:-ms-input-placeholder{color:#bfbfbf}.ant-input::-webkit-input-placeholder{color:#bfbfbf}.ant-input:-moz-placeholder-shown{text-overflow:ellipsis}.ant-input:-ms-input-placeholder{text-overflow:ellipsis}.ant-input:placeholder-shown{text-overflow:ellipsis}.ant-input:hover{border-color:#40a9ff;border-right-width:1px!important}.ant-input:focus{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-input-disabled{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-input-disabled:hover{border-color:#d9d9d9;border-right-width:1px!important}.ant-input[disabled]{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-input[disabled]:hover{border-color:#d9d9d9;border-right-width:1px!important}textarea.ant-input{max-width:100%;height:auto;min-height:32px;line-height:1.5715;vertical-align:bottom;transition:all .3s,height 0s}.ant-input-lg{padding:6.5px 11px;font-size:16px}.ant-input-sm{padding:0 7px}.ant-input-group{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:table;width:100%;border-collapse:separate;border-spacing:0}.ant-input-group[class*=col-]{float:none;padding-right:0;padding-left:0}.ant-input-group>[class*=col-]{padding-right:8px}.ant-input-group>[class*=col-]:last-child{padding-right:0}.ant-input-group-addon,.ant-input-group-wrap,.ant-input-group>.ant-input{display:table-cell}.ant-input-group-addon:not(:first-child):not(:last-child),.ant-input-group-wrap:not(:first-child):not(:last-child),.ant-input-group>.ant-input:not(:first-child):not(:last-child){border-radius:0}.ant-input-group-addon,.ant-input-group-wrap{width:1px;white-space:nowrap;vertical-align:middle}.ant-input-group-wrap>*{display:block!important}.ant-input-group .ant-input{float:left;width:100%;margin-bottom:0;text-align:inherit}.ant-input-group .ant-input:focus{z-index:1;border-right-width:1px}.ant-input-group .ant-input:hover{z-index:1;border-right-width:1px}.ant-input-group-addon{position:relative;padding:0 11px;color:#000000d9;font-weight:400;font-size:14px;text-align:center;background-color:#fafafa;border:1px solid #d9d9d9;border-radius:2px;transition:all .3s}.ant-input-group-addon .ant-select{margin:-5px -11px}.ant-input-group-addon .ant-select.ant-select-single:not(.ant-select-customize-input) .ant-select-selector{background-color:inherit;border:1px solid transparent;box-shadow:none}.ant-input-group-addon .ant-select-open .ant-select-selector,.ant-input-group-addon .ant-select-focused .ant-select-selector{color:#1890ff}.ant-input-group-addon>i:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;content:""}.ant-input-group>.ant-input:first-child,.ant-input-group-addon:first-child{border-top-right-radius:0;border-bottom-right-radius:0}.ant-input-group>.ant-input:first-child .ant-select .ant-select-selector,.ant-input-group-addon:first-child .ant-select .ant-select-selector{border-top-right-radius:0;border-bottom-right-radius:0}.ant-input-group>.ant-input-affix-wrapper:not(:first-child) .ant-input{border-top-left-radius:0;border-bottom-left-radius:0}.ant-input-group>.ant-input-affix-wrapper:not(:last-child) .ant-input{border-top-right-radius:0;border-bottom-right-radius:0}.ant-input-group-addon:first-child{border-right:0}.ant-input-group-addon:last-child{border-left:0}.ant-input-group>.ant-input:last-child,.ant-input-group-addon:last-child{border-top-left-radius:0;border-bottom-left-radius:0}.ant-input-group>.ant-input:last-child .ant-select .ant-select-selector,.ant-input-group-addon:last-child .ant-select .ant-select-selector{border-top-left-radius:0;border-bottom-left-radius:0}.ant-input-group-lg .ant-input,.ant-input-group-lg>.ant-input-group-addon{padding:6.5px 11px;font-size:16px}.ant-input-group-sm .ant-input,.ant-input-group-sm>.ant-input-group-addon{padding:0 7px}.ant-input-group-lg .ant-select-single .ant-select-selector{height:40px}.ant-input-group-sm .ant-select-single .ant-select-selector{height:24px}.ant-input-group.ant-input-group-compact{display:block}.ant-input-group.ant-input-group-compact:before,.ant-input-group.ant-input-group-compact:after{display:table;content:""}.ant-input-group.ant-input-group-compact:after{clear:both}.ant-input-group.ant-input-group-compact-addon:not(:first-child):not(:last-child),.ant-input-group.ant-input-group-compact-wrap:not(:first-child):not(:last-child),.ant-input-group.ant-input-group-compact>.ant-input:not(:first-child):not(:last-child){border-right-width:1px}.ant-input-group.ant-input-group-compact-addon:not(:first-child):not(:last-child):hover,.ant-input-group.ant-input-group-compact-wrap:not(:first-child):not(:last-child):hover,.ant-input-group.ant-input-group-compact>.ant-input:not(:first-child):not(:last-child):hover{z-index:1}.ant-input-group.ant-input-group-compact-addon:not(:first-child):not(:last-child):focus,.ant-input-group.ant-input-group-compact-wrap:not(:first-child):not(:last-child):focus,.ant-input-group.ant-input-group-compact>.ant-input:not(:first-child):not(:last-child):focus{z-index:1}.ant-input-group.ant-input-group-compact>*{display:inline-block;float:none;vertical-align:top;border-radius:0}.ant-input-group.ant-input-group-compact>.ant-input-affix-wrapper{display:inline-flex}.ant-input-group.ant-input-group-compact>*:not(:last-child){margin-right:-1px;border-right-width:1px}.ant-input-group.ant-input-group-compact .ant-input{float:none}.ant-input-group.ant-input-group-compact>.ant-select>.ant-select-selector,.ant-input-group.ant-input-group-compact>.ant-calendar-picker .ant-input,.ant-input-group.ant-input-group-compact>.ant-select-auto-complete .ant-input,.ant-input-group.ant-input-group-compact>.ant-cascader-picker .ant-input,.ant-input-group.ant-input-group-compact>.ant-mention-wrapper .ant-mention-editor,.ant-input-group.ant-input-group-compact>.ant-time-picker .ant-time-picker-input,.ant-input-group.ant-input-group-compact>.ant-input-group-wrapper .ant-input{border-right-width:1px;border-radius:0}.ant-input-group.ant-input-group-compact>.ant-select>.ant-select-selector:hover,.ant-input-group.ant-input-group-compact>.ant-calendar-picker .ant-input:hover,.ant-input-group.ant-input-group-compact>.ant-select-auto-complete .ant-input:hover,.ant-input-group.ant-input-group-compact>.ant-cascader-picker .ant-input:hover,.ant-input-group.ant-input-group-compact>.ant-mention-wrapper .ant-mention-editor:hover,.ant-input-group.ant-input-group-compact>.ant-time-picker .ant-time-picker-input:hover,.ant-input-group.ant-input-group-compact>.ant-input-group-wrapper .ant-input:hover{z-index:1}.ant-input-group.ant-input-group-compact>.ant-select>.ant-select-selector:focus,.ant-input-group.ant-input-group-compact>.ant-calendar-picker .ant-input:focus,.ant-input-group.ant-input-group-compact>.ant-select-auto-complete .ant-input:focus,.ant-input-group.ant-input-group-compact>.ant-cascader-picker .ant-input:focus,.ant-input-group.ant-input-group-compact>.ant-mention-wrapper .ant-mention-editor:focus,.ant-input-group.ant-input-group-compact>.ant-time-picker .ant-time-picker-input:focus,.ant-input-group.ant-input-group-compact>.ant-input-group-wrapper .ant-input:focus{z-index:1}.ant-input-group.ant-input-group-compact>.ant-select-focused{z-index:1}.ant-input-group.ant-input-group-compact>*:first-child,.ant-input-group.ant-input-group-compact>.ant-select:first-child>.ant-select-selector,.ant-input-group.ant-input-group-compact>.ant-calendar-picker:first-child .ant-input,.ant-input-group.ant-input-group-compact>.ant-select-auto-complete:first-child .ant-input,.ant-input-group.ant-input-group-compact>.ant-cascader-picker:first-child .ant-input,.ant-input-group.ant-input-group-compact>.ant-mention-wrapper:first-child .ant-mention-editor,.ant-input-group.ant-input-group-compact>.ant-time-picker:first-child .ant-time-picker-input{border-top-left-radius:2px;border-bottom-left-radius:2px}.ant-input-group.ant-input-group-compact>*:last-child,.ant-input-group.ant-input-group-compact>.ant-select:last-child>.ant-select-selector,.ant-input-group.ant-input-group-compact>.ant-calendar-picker:last-child .ant-input,.ant-input-group.ant-input-group-compact>.ant-select-auto-complete:last-child .ant-input,.ant-input-group.ant-input-group-compact>.ant-cascader-picker:last-child .ant-input,.ant-input-group.ant-input-group-compact>.ant-cascader-picker-focused:last-child .ant-input,.ant-input-group.ant-input-group-compact>.ant-mention-wrapper:last-child .ant-mention-editor,.ant-input-group.ant-input-group-compact>.ant-time-picker:last-child .ant-time-picker-input{border-right-width:1px;border-top-right-radius:2px;border-bottom-right-radius:2px}.ant-input-group.ant-input-group-compact>.ant-select-auto-complete .ant-input{vertical-align:top}.ant-input-group-wrapper{display:inline-block;width:100%;text-align:start;vertical-align:top}.ant-input-affix-wrapper{box-sizing:border-box;margin:0;font-variant:tabular-nums;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-flex;border:1px solid #d9d9d9;border-radius:2px;padding:4px 11px;width:100%;text-align:start;background-color:#fff;background-image:none;color:#000000d9;font-size:14px;line-height:1.5715}.ant-input-affix-wrapper:hover{border-color:#40a9ff;border-right-width:1px!important}.ant-input-affix-wrapper-disabled{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-input-affix-wrapper-disabled:hover{border-color:#d9d9d9;border-right-width:1px!important}.ant-input-affix-wrapper-focused{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-input-affix-wrapper-lg{padding:6.5px 11px;font-size:16px}.ant-input-affix-wrapper-sm{padding:0 7px}.ant-input-affix-wrapper .ant-input{position:relative;text-align:inherit;border:none;padding:0}.ant-input-affix-wrapper .ant-input:focus{border:none;outline:none;box-shadow:none}.ant-input-affix-wrapper .ant-input-prefix,.ant-input-affix-wrapper .ant-input-suffix{display:flex;align-items:center;color:#000000d9;white-space:nowrap}.ant-input-affix-wrapper .ant-input-prefix :not(.anticon),.ant-input-affix-wrapper .ant-input-suffix :not(.anticon){line-height:1.5715}.ant-input-affix-wrapper .ant-input-disabled~.ant-input-suffix .anticon{color:#00000040;cursor:not-allowed}.ant-input-affix-wrapper .ant-input-prefix{margin-right:4px}.ant-input-affix-wrapper .ant-input-suffix{margin-left:4px}.ant-input-password-icon{color:#00000073;cursor:pointer;transition:all .3s}.ant-input-password-icon:hover{color:#000000d9}.ant-input-clear-icon{color:#00000040;font-size:12px;cursor:pointer;transition:color .3s;margin:0 4px;vertical-align:0}.ant-input-clear-icon:hover{color:#00000073}.ant-input-clear-icon:active{color:#000000d9}.ant-input-clear-icon+i{margin-left:6px}.ant-input-clear-icon-hidden,.ant-input-textarea-clear-icon-hidden{visibility:hidden}.ant-input-affix-wrapper-textarea-with-clear-btn{padding:0!important}.ant-input-affix-wrapper-textarea-with-clear-btn .ant-input{padding:4px 11px}.ant-input-textarea-clear-icon{color:#00000040;font-size:12px;cursor:pointer;transition:color .3s;position:absolute;top:0;right:0;margin:8px 8px 0 0}.ant-input-textarea-clear-icon:hover{color:#00000073}.ant-input-textarea-clear-icon:active{color:#000000d9}.ant-input-textarea-clear-icon+i{margin-left:6px}.ant-input-textarea-show-count:after{display:block;color:#00000073;text-align:right;content:attr(data-count)}.ant-input-search-icon{color:#00000073;cursor:pointer;transition:all .3s}.ant-input-search-icon:hover{color:#000000d9}.ant-input-search-enter-button input{border-right:0}.ant-input-search-enter-button+.ant-input-group-addon,.ant-input-search-enter-button input+.ant-input-group-addon{padding:0;border:0}.ant-input-search-enter-button+.ant-input-group-addon .ant-input-search-button,.ant-input-search-enter-button input+.ant-input-group-addon .ant-input-search-button{border-top-left-radius:0;border-bottom-left-radius:0}.ant-btn{line-height:1.5715;position:relative;display:inline-block;font-weight:400;white-space:nowrap;text-align:center;background-image:none;border:1px solid transparent;box-shadow:0 2px #00000004;cursor:pointer;transition:all .3s cubic-bezier(.645,.045,.355,1);-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;touch-action:manipulation;height:32px;padding:4px 15px;font-size:14px;border-radius:2px;color:#000000d9;background:#fff;border-color:#d9d9d9}.ant-btn>.anticon{line-height:1}.ant-btn,.ant-btn:active,.ant-btn:focus{outline:0}.ant-btn:not([disabled]):hover{text-decoration:none}.ant-btn:not([disabled]):active{outline:0;box-shadow:none}.ant-btn[disabled]{cursor:not-allowed}.ant-btn[disabled]>*{pointer-events:none}.ant-btn-lg{height:40px;padding:6.4px 15px;font-size:16px;border-radius:2px}.ant-btn-sm{height:24px;padding:0 7px;font-size:14px;border-radius:2px}.ant-btn>a:only-child{color:currentColor}.ant-btn>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn:hover,.ant-btn:focus{color:#40a9ff;background:#fff;border-color:#40a9ff}.ant-btn:hover>a:only-child,.ant-btn:focus>a:only-child{color:currentColor}.ant-btn:hover>a:only-child:after,.ant-btn:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn:active{color:#096dd9;background:#fff;border-color:#096dd9}.ant-btn:active>a:only-child{color:currentColor}.ant-btn:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn[disabled],.ant-btn[disabled]:hover,.ant-btn[disabled]:focus,.ant-btn[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn[disabled]>a:only-child,.ant-btn[disabled]:hover>a:only-child,.ant-btn[disabled]:focus>a:only-child,.ant-btn[disabled]:active>a:only-child{color:currentColor}.ant-btn[disabled]>a:only-child:after,.ant-btn[disabled]:hover>a:only-child:after,.ant-btn[disabled]:focus>a:only-child:after,.ant-btn[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn:hover,.ant-btn:focus,.ant-btn:active{text-decoration:none;background:#fff}.ant-btn>span{display:inline-block}.ant-btn-primary{color:#fff;background:#1890ff;border-color:#1890ff;text-shadow:0 -1px 0 rgba(0,0,0,.12);box-shadow:0 2px #0000000b}.ant-btn-primary>a:only-child{color:currentColor}.ant-btn-primary>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-primary:hover,.ant-btn-primary:focus{color:#fff;background:#40a9ff;border-color:#40a9ff}.ant-btn-primary:hover>a:only-child,.ant-btn-primary:focus>a:only-child{color:currentColor}.ant-btn-primary:hover>a:only-child:after,.ant-btn-primary:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-primary:active{color:#fff;background:#096dd9;border-color:#096dd9}.ant-btn-primary:active>a:only-child{color:currentColor}.ant-btn-primary:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-primary[disabled],.ant-btn-primary[disabled]:hover,.ant-btn-primary[disabled]:focus,.ant-btn-primary[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-primary[disabled]>a:only-child,.ant-btn-primary[disabled]:hover>a:only-child,.ant-btn-primary[disabled]:focus>a:only-child,.ant-btn-primary[disabled]:active>a:only-child{color:currentColor}.ant-btn-primary[disabled]>a:only-child:after,.ant-btn-primary[disabled]:hover>a:only-child:after,.ant-btn-primary[disabled]:focus>a:only-child:after,.ant-btn-primary[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-group .ant-btn-primary:not(:first-child):not(:last-child){border-right-color:#40a9ff;border-left-color:#40a9ff}.ant-btn-group .ant-btn-primary:not(:first-child):not(:last-child):disabled{border-color:#d9d9d9}.ant-btn-group .ant-btn-primary:first-child:not(:last-child){border-right-color:#40a9ff}.ant-btn-group .ant-btn-primary:first-child:not(:last-child)[disabled]{border-right-color:#d9d9d9}.ant-btn-group .ant-btn-primary:last-child:not(:first-child),.ant-btn-group .ant-btn-primary+.ant-btn-primary{border-left-color:#40a9ff}.ant-btn-group .ant-btn-primary:last-child:not(:first-child)[disabled],.ant-btn-group .ant-btn-primary+.ant-btn-primary[disabled]{border-left-color:#d9d9d9}.ant-btn-ghost{color:#000000d9;background:transparent;border-color:#d9d9d9}.ant-btn-ghost>a:only-child{color:currentColor}.ant-btn-ghost>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-ghost:hover,.ant-btn-ghost:focus{color:#40a9ff;background:transparent;border-color:#40a9ff}.ant-btn-ghost:hover>a:only-child,.ant-btn-ghost:focus>a:only-child{color:currentColor}.ant-btn-ghost:hover>a:only-child:after,.ant-btn-ghost:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-ghost:active{color:#096dd9;background:transparent;border-color:#096dd9}.ant-btn-ghost:active>a:only-child{color:currentColor}.ant-btn-ghost:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-ghost[disabled],.ant-btn-ghost[disabled]:hover,.ant-btn-ghost[disabled]:focus,.ant-btn-ghost[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-ghost[disabled]>a:only-child,.ant-btn-ghost[disabled]:hover>a:only-child,.ant-btn-ghost[disabled]:focus>a:only-child,.ant-btn-ghost[disabled]:active>a:only-child{color:currentColor}.ant-btn-ghost[disabled]>a:only-child:after,.ant-btn-ghost[disabled]:hover>a:only-child:after,.ant-btn-ghost[disabled]:focus>a:only-child:after,.ant-btn-ghost[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dashed{color:#000000d9;background:#fff;border-color:#d9d9d9;border-style:dashed}.ant-btn-dashed>a:only-child{color:currentColor}.ant-btn-dashed>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dashed:hover,.ant-btn-dashed:focus{color:#40a9ff;background:#fff;border-color:#40a9ff}.ant-btn-dashed:hover>a:only-child,.ant-btn-dashed:focus>a:only-child{color:currentColor}.ant-btn-dashed:hover>a:only-child:after,.ant-btn-dashed:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dashed:active{color:#096dd9;background:#fff;border-color:#096dd9}.ant-btn-dashed:active>a:only-child{color:currentColor}.ant-btn-dashed:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dashed[disabled],.ant-btn-dashed[disabled]:hover,.ant-btn-dashed[disabled]:focus,.ant-btn-dashed[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-dashed[disabled]>a:only-child,.ant-btn-dashed[disabled]:hover>a:only-child,.ant-btn-dashed[disabled]:focus>a:only-child,.ant-btn-dashed[disabled]:active>a:only-child{color:currentColor}.ant-btn-dashed[disabled]>a:only-child:after,.ant-btn-dashed[disabled]:hover>a:only-child:after,.ant-btn-dashed[disabled]:focus>a:only-child:after,.ant-btn-dashed[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-danger{color:#fff;background:#ff7875;border-color:#ff7875;text-shadow:0 -1px 0 rgba(0,0,0,.12);box-shadow:0 2px #0000000b}.ant-btn-danger>a:only-child{color:currentColor}.ant-btn-danger>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-danger:hover,.ant-btn-danger:focus{color:#fff;background:#ffa39e;border-color:#ffa39e}.ant-btn-danger:hover>a:only-child,.ant-btn-danger:focus>a:only-child{color:currentColor}.ant-btn-danger:hover>a:only-child:after,.ant-btn-danger:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-danger:active{color:#fff;background:#d9595b;border-color:#d9595b}.ant-btn-danger:active>a:only-child{color:currentColor}.ant-btn-danger:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-danger[disabled],.ant-btn-danger[disabled]:hover,.ant-btn-danger[disabled]:focus,.ant-btn-danger[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-danger[disabled]>a:only-child,.ant-btn-danger[disabled]:hover>a:only-child,.ant-btn-danger[disabled]:focus>a:only-child,.ant-btn-danger[disabled]:active>a:only-child{color:currentColor}.ant-btn-danger[disabled]>a:only-child:after,.ant-btn-danger[disabled]:hover>a:only-child:after,.ant-btn-danger[disabled]:focus>a:only-child:after,.ant-btn-danger[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-link{color:#1890ff;background:transparent;border-color:transparent;box-shadow:none}.ant-btn-link>a:only-child{color:currentColor}.ant-btn-link>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-link:hover,.ant-btn-link:focus{color:#40a9ff;background:transparent;border-color:#40a9ff}.ant-btn-link:hover>a:only-child,.ant-btn-link:focus>a:only-child{color:currentColor}.ant-btn-link:hover>a:only-child:after,.ant-btn-link:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-link:active{color:#096dd9;background:transparent;border-color:#096dd9}.ant-btn-link:active>a:only-child{color:currentColor}.ant-btn-link:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-link[disabled],.ant-btn-link[disabled]:hover,.ant-btn-link[disabled]:focus,.ant-btn-link[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-link:hover{background:transparent}.ant-btn-link:hover,.ant-btn-link:focus,.ant-btn-link:active{border-color:transparent}.ant-btn-link[disabled],.ant-btn-link[disabled]:hover,.ant-btn-link[disabled]:focus,.ant-btn-link[disabled]:active{color:#00000040;background:transparent;border-color:transparent;text-shadow:none;box-shadow:none}.ant-btn-link[disabled]>a:only-child,.ant-btn-link[disabled]:hover>a:only-child,.ant-btn-link[disabled]:focus>a:only-child,.ant-btn-link[disabled]:active>a:only-child{color:currentColor}.ant-btn-link[disabled]>a:only-child:after,.ant-btn-link[disabled]:hover>a:only-child:after,.ant-btn-link[disabled]:focus>a:only-child:after,.ant-btn-link[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-text{color:#000000d9;background:transparent;border-color:transparent;box-shadow:none}.ant-btn-text>a:only-child{color:currentColor}.ant-btn-text>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-text:hover,.ant-btn-text:focus{color:#40a9ff;background:transparent;border-color:#40a9ff}.ant-btn-text:hover>a:only-child,.ant-btn-text:focus>a:only-child{color:currentColor}.ant-btn-text:hover>a:only-child:after,.ant-btn-text:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-text:active{color:#096dd9;background:transparent;border-color:#096dd9}.ant-btn-text:active>a:only-child{color:currentColor}.ant-btn-text:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-text[disabled],.ant-btn-text[disabled]:hover,.ant-btn-text[disabled]:focus,.ant-btn-text[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-text:hover,.ant-btn-text:focus{color:#000000d9;background:rgba(0,0,0,.018);border-color:transparent}.ant-btn-text:active{color:#000000d9;background:rgba(0,0,0,.028);border-color:transparent}.ant-btn-text[disabled],.ant-btn-text[disabled]:hover,.ant-btn-text[disabled]:focus,.ant-btn-text[disabled]:active{color:#00000040;background:transparent;border-color:transparent;text-shadow:none;box-shadow:none}.ant-btn-text[disabled]>a:only-child,.ant-btn-text[disabled]:hover>a:only-child,.ant-btn-text[disabled]:focus>a:only-child,.ant-btn-text[disabled]:active>a:only-child{color:currentColor}.ant-btn-text[disabled]>a:only-child:after,.ant-btn-text[disabled]:hover>a:only-child:after,.ant-btn-text[disabled]:focus>a:only-child:after,.ant-btn-text[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous{color:#ff4d4f;background:#fff;border-color:#ff4d4f}.ant-btn-dangerous>a:only-child{color:currentColor}.ant-btn-dangerous>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous:hover,.ant-btn-dangerous:focus{color:#ff7875;background:#fff;border-color:#ff7875}.ant-btn-dangerous:hover>a:only-child,.ant-btn-dangerous:focus>a:only-child{color:currentColor}.ant-btn-dangerous:hover>a:only-child:after,.ant-btn-dangerous:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous:active{color:#d9363e;background:#fff;border-color:#d9363e}.ant-btn-dangerous:active>a:only-child{color:currentColor}.ant-btn-dangerous:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous[disabled],.ant-btn-dangerous[disabled]:hover,.ant-btn-dangerous[disabled]:focus,.ant-btn-dangerous[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-dangerous[disabled]>a:only-child,.ant-btn-dangerous[disabled]:hover>a:only-child,.ant-btn-dangerous[disabled]:focus>a:only-child,.ant-btn-dangerous[disabled]:active>a:only-child{color:currentColor}.ant-btn-dangerous[disabled]>a:only-child:after,.ant-btn-dangerous[disabled]:hover>a:only-child:after,.ant-btn-dangerous[disabled]:focus>a:only-child:after,.ant-btn-dangerous[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-primary{color:#fff;background:#ff7875;border-color:#ff7875;text-shadow:0 -1px 0 rgba(0,0,0,.12);box-shadow:0 2px #0000000b}.ant-btn-dangerous.ant-btn-primary>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-primary>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-primary:hover,.ant-btn-dangerous.ant-btn-primary:focus{color:#fff;background:#ffa39e;border-color:#ffa39e}.ant-btn-dangerous.ant-btn-primary:hover>a:only-child,.ant-btn-dangerous.ant-btn-primary:focus>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-primary:hover>a:only-child:after,.ant-btn-dangerous.ant-btn-primary:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-primary:active{color:#fff;background:#d9595b;border-color:#d9595b}.ant-btn-dangerous.ant-btn-primary:active>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-primary:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-primary[disabled],.ant-btn-dangerous.ant-btn-primary[disabled]:hover,.ant-btn-dangerous.ant-btn-primary[disabled]:focus,.ant-btn-dangerous.ant-btn-primary[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-dangerous.ant-btn-primary[disabled]>a:only-child,.ant-btn-dangerous.ant-btn-primary[disabled]:hover>a:only-child,.ant-btn-dangerous.ant-btn-primary[disabled]:focus>a:only-child,.ant-btn-dangerous.ant-btn-primary[disabled]:active>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-primary[disabled]>a:only-child:after,.ant-btn-dangerous.ant-btn-primary[disabled]:hover>a:only-child:after,.ant-btn-dangerous.ant-btn-primary[disabled]:focus>a:only-child:after,.ant-btn-dangerous.ant-btn-primary[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-link{color:#ff4d4f;background:transparent;border-color:transparent;box-shadow:none}.ant-btn-dangerous.ant-btn-link>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-link>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-link:hover,.ant-btn-dangerous.ant-btn-link:focus{color:#40a9ff;background:transparent;border-color:#40a9ff}.ant-btn-dangerous.ant-btn-link:active{color:#096dd9;background:transparent;border-color:#096dd9}.ant-btn-dangerous.ant-btn-link[disabled],.ant-btn-dangerous.ant-btn-link[disabled]:hover,.ant-btn-dangerous.ant-btn-link[disabled]:focus,.ant-btn-dangerous.ant-btn-link[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-dangerous.ant-btn-link:hover,.ant-btn-dangerous.ant-btn-link:focus{color:#ff7875;background:transparent;border-color:transparent}.ant-btn-dangerous.ant-btn-link:hover>a:only-child,.ant-btn-dangerous.ant-btn-link:focus>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-link:hover>a:only-child:after,.ant-btn-dangerous.ant-btn-link:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-link:active{color:#d9363e;background:transparent;border-color:transparent}.ant-btn-dangerous.ant-btn-link:active>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-link:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-link[disabled],.ant-btn-dangerous.ant-btn-link[disabled]:hover,.ant-btn-dangerous.ant-btn-link[disabled]:focus,.ant-btn-dangerous.ant-btn-link[disabled]:active{color:#00000040;background:transparent;border-color:transparent;text-shadow:none;box-shadow:none}.ant-btn-dangerous.ant-btn-link[disabled]>a:only-child,.ant-btn-dangerous.ant-btn-link[disabled]:hover>a:only-child,.ant-btn-dangerous.ant-btn-link[disabled]:focus>a:only-child,.ant-btn-dangerous.ant-btn-link[disabled]:active>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-link[disabled]>a:only-child:after,.ant-btn-dangerous.ant-btn-link[disabled]:hover>a:only-child:after,.ant-btn-dangerous.ant-btn-link[disabled]:focus>a:only-child:after,.ant-btn-dangerous.ant-btn-link[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-text{color:#ff4d4f;background:transparent;border-color:transparent;box-shadow:none}.ant-btn-dangerous.ant-btn-text>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-text>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-text:hover,.ant-btn-dangerous.ant-btn-text:focus{color:#40a9ff;background:transparent;border-color:#40a9ff}.ant-btn-dangerous.ant-btn-text:active{color:#096dd9;background:transparent;border-color:#096dd9}.ant-btn-dangerous.ant-btn-text[disabled],.ant-btn-dangerous.ant-btn-text[disabled]:hover,.ant-btn-dangerous.ant-btn-text[disabled]:focus,.ant-btn-dangerous.ant-btn-text[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-dangerous.ant-btn-text:hover,.ant-btn-dangerous.ant-btn-text:focus{color:#ff7875;background:rgba(0,0,0,.018);border-color:transparent}.ant-btn-dangerous.ant-btn-text:hover>a:only-child,.ant-btn-dangerous.ant-btn-text:focus>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-text:hover>a:only-child:after,.ant-btn-dangerous.ant-btn-text:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-text:active{color:#d9363e;background:rgba(0,0,0,.028);border-color:transparent}.ant-btn-dangerous.ant-btn-text:active>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-text:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-dangerous.ant-btn-text[disabled],.ant-btn-dangerous.ant-btn-text[disabled]:hover,.ant-btn-dangerous.ant-btn-text[disabled]:focus,.ant-btn-dangerous.ant-btn-text[disabled]:active{color:#00000040;background:transparent;border-color:transparent;text-shadow:none;box-shadow:none}.ant-btn-dangerous.ant-btn-text[disabled]>a:only-child,.ant-btn-dangerous.ant-btn-text[disabled]:hover>a:only-child,.ant-btn-dangerous.ant-btn-text[disabled]:focus>a:only-child,.ant-btn-dangerous.ant-btn-text[disabled]:active>a:only-child{color:currentColor}.ant-btn-dangerous.ant-btn-text[disabled]>a:only-child:after,.ant-btn-dangerous.ant-btn-text[disabled]:hover>a:only-child:after,.ant-btn-dangerous.ant-btn-text[disabled]:focus>a:only-child:after,.ant-btn-dangerous.ant-btn-text[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-icon-only{width:32px;height:32px;padding:2.4px 0;font-size:16px;border-radius:2px;vertical-align:-1px}.ant-btn-icon-only>*{font-size:16px}.ant-btn-icon-only.ant-btn-lg{width:40px;height:40px;padding:4.9px 0;font-size:18px;border-radius:2px}.ant-btn-icon-only.ant-btn-lg>*{font-size:18px}.ant-btn-icon-only.ant-btn-sm{width:24px;height:24px;padding:0;font-size:14px;border-radius:2px}.ant-btn-icon-only.ant-btn-sm>*{font-size:14px}.ant-btn-round{height:32px;padding:4px 16px;font-size:14px;border-radius:32px}.ant-btn-round.ant-btn-lg{height:40px;padding:6.4px 20px;font-size:16px;border-radius:40px}.ant-btn-round.ant-btn-sm{height:24px;padding:0 12px;font-size:14px;border-radius:24px}.ant-btn-round.ant-btn-icon-only{width:auto}.ant-btn-circle{min-width:32px;padding-right:0;padding-left:0;text-align:center;border-radius:50%}.ant-btn-circle.ant-btn-lg{min-width:40px;border-radius:50%}.ant-btn-circle.ant-btn-sm{min-width:24px;border-radius:50%}.ant-btn:before{position:absolute;top:-1px;right:-1px;bottom:-1px;left:-1px;z-index:1;display:none;background:#fff;border-radius:inherit;opacity:.35;transition:opacity .2s;content:"";pointer-events:none}.ant-btn .anticon{transition:margin-left .3s cubic-bezier(.645,.045,.355,1)}.ant-btn .anticon.anticon-plus>svg,.ant-btn .anticon.anticon-minus>svg{shape-rendering:optimizeSpeed}.ant-btn.ant-btn-loading{position:relative}.ant-btn.ant-btn-loading:not([disabled]){pointer-events:none}.ant-btn.ant-btn-loading:before{display:block}.ant-btn.ant-btn-loading:not(.ant-btn-circle):not(.ant-btn-circle-outline):not(.ant-btn-icon-only){padding-left:29px}.ant-btn.ant-btn-loading:not(.ant-btn-circle):not(.ant-btn-circle-outline):not(.ant-btn-icon-only) .anticon:not(:last-child){margin-left:-14px}.ant-btn-sm.ant-btn-loading:not(.ant-btn-circle):not(.ant-btn-circle-outline):not(.ant-btn-icon-only){padding-left:24px}.ant-btn-sm.ant-btn-loading:not(.ant-btn-circle):not(.ant-btn-circle-outline):not(.ant-btn-icon-only) .anticon{margin-left:-17px}.ant-btn-group{position:relative;display:inline-flex}.ant-btn-group>.ant-btn,.ant-btn-group>span>.ant-btn{position:relative}.ant-btn-group>.ant-btn:hover,.ant-btn-group>span>.ant-btn:hover,.ant-btn-group>.ant-btn:focus,.ant-btn-group>span>.ant-btn:focus,.ant-btn-group>.ant-btn:active,.ant-btn-group>span>.ant-btn:active{z-index:2}.ant-btn-group>.ant-btn[disabled],.ant-btn-group>span>.ant-btn[disabled]{z-index:0}.ant-btn-group .ant-btn-icon-only{font-size:14px}.ant-btn-group-lg>.ant-btn,.ant-btn-group-lg>span>.ant-btn{height:40px;padding:6.4px 15px;font-size:16px;border-radius:0}.ant-btn-group-lg .ant-btn.ant-btn-icon-only{width:40px;height:40px;padding-right:0;padding-left:0}.ant-btn-group-sm>.ant-btn,.ant-btn-group-sm>span>.ant-btn{height:24px;padding:0 7px;font-size:14px;border-radius:0}.ant-btn-group-sm>.ant-btn>.anticon,.ant-btn-group-sm>span>.ant-btn>.anticon{font-size:14px}.ant-btn-group-sm .ant-btn.ant-btn-icon-only{width:24px;height:24px;padding-right:0;padding-left:0}.ant-btn-group .ant-btn+.ant-btn,.ant-btn+.ant-btn-group,.ant-btn-group span+.ant-btn,.ant-btn-group .ant-btn+span,.ant-btn-group>span+span,.ant-btn-group+.ant-btn,.ant-btn-group+.ant-btn-group{margin-left:-1px}.ant-btn-group .ant-btn-primary+.ant-btn:not(.ant-btn-primary):not([disabled]){border-left-color:transparent}.ant-btn-group .ant-btn{border-radius:0}.ant-btn-group>.ant-btn:first-child,.ant-btn-group>span:first-child>.ant-btn{margin-left:0}.ant-btn-group>.ant-btn:only-child{border-radius:2px}.ant-btn-group>span:only-child>.ant-btn{border-radius:2px}.ant-btn-group>.ant-btn:first-child:not(:last-child),.ant-btn-group>span:first-child:not(:last-child)>.ant-btn{border-top-left-radius:2px;border-bottom-left-radius:2px}.ant-btn-group>.ant-btn:last-child:not(:first-child),.ant-btn-group>span:last-child:not(:first-child)>.ant-btn{border-top-right-radius:2px;border-bottom-right-radius:2px}.ant-btn-group-sm>.ant-btn:only-child{border-radius:2px}.ant-btn-group-sm>span:only-child>.ant-btn{border-radius:2px}.ant-btn-group-sm>.ant-btn:first-child:not(:last-child),.ant-btn-group-sm>span:first-child:not(:last-child)>.ant-btn{border-top-left-radius:2px;border-bottom-left-radius:2px}.ant-btn-group-sm>.ant-btn:last-child:not(:first-child),.ant-btn-group-sm>span:last-child:not(:first-child)>.ant-btn{border-top-right-radius:2px;border-bottom-right-radius:2px}.ant-btn-group>.ant-btn-group{float:left}.ant-btn-group>.ant-btn-group:not(:first-child):not(:last-child)>.ant-btn{border-radius:0}.ant-btn-group>.ant-btn-group:first-child:not(:last-child)>.ant-btn:last-child{padding-right:8px;border-top-right-radius:0;border-bottom-right-radius:0}.ant-btn-group>.ant-btn-group:last-child:not(:first-child)>.ant-btn:first-child{padding-left:8px;border-top-left-radius:0;border-bottom-left-radius:0}.ant-btn-rtl.ant-btn-group .ant-btn+.ant-btn,.ant-btn-rtl.ant-btn+.ant-btn-group,.ant-btn-rtl.ant-btn-group span+.ant-btn,.ant-btn-rtl.ant-btn-group .ant-btn+span,.ant-btn-rtl.ant-btn-group>span+span,.ant-btn-rtl.ant-btn-group+.ant-btn,.ant-btn-rtl.ant-btn-group+.ant-btn-group,.ant-btn-group-rtl.ant-btn-group .ant-btn+.ant-btn,.ant-btn-group-rtl.ant-btn+.ant-btn-group,.ant-btn-group-rtl.ant-btn-group span+.ant-btn,.ant-btn-group-rtl.ant-btn-group .ant-btn+span,.ant-btn-group-rtl.ant-btn-group>span+span,.ant-btn-group-rtl.ant-btn-group+.ant-btn,.ant-btn-group-rtl.ant-btn-group+.ant-btn-group{margin-right:-1px;margin-left:auto}.ant-btn-group.ant-btn-group-rtl{direction:rtl}.ant-btn-group-rtl.ant-btn-group>.ant-btn:first-child:not(:last-child),.ant-btn-group-rtl.ant-btn-group>span:first-child:not(:last-child)>.ant-btn{border-radius:0 2px 2px 0}.ant-btn-group-rtl.ant-btn-group>.ant-btn:last-child:not(:first-child),.ant-btn-group-rtl.ant-btn-group>span:last-child:not(:first-child)>.ant-btn{border-radius:2px 0 0 2px}.ant-btn-group-rtl.ant-btn-group-sm>.ant-btn:first-child:not(:last-child),.ant-btn-group-rtl.ant-btn-group-sm>span:first-child:not(:last-child)>.ant-btn{border-radius:0 2px 2px 0}.ant-btn-group-rtl.ant-btn-group-sm>.ant-btn:last-child:not(:first-child),.ant-btn-group-rtl.ant-btn-group-sm>span:last-child:not(:first-child)>.ant-btn{border-radius:2px 0 0 2px}.ant-btn:focus>span,.ant-btn:active>span{position:relative}.ant-btn>.anticon+span,.ant-btn>span+.anticon{margin-left:8px}.ant-btn-background-ghost{color:#fff;background:transparent!important;border-color:#fff}.ant-btn-background-ghost.ant-btn-primary{color:#1890ff;background:transparent;border-color:#1890ff;text-shadow:none}.ant-btn-background-ghost.ant-btn-primary>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-primary>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-primary:hover,.ant-btn-background-ghost.ant-btn-primary:focus{color:#40a9ff;background:transparent;border-color:#40a9ff}.ant-btn-background-ghost.ant-btn-primary:hover>a:only-child,.ant-btn-background-ghost.ant-btn-primary:focus>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-primary:hover>a:only-child:after,.ant-btn-background-ghost.ant-btn-primary:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-primary:active{color:#096dd9;background:transparent;border-color:#096dd9}.ant-btn-background-ghost.ant-btn-primary:active>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-primary:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-primary[disabled],.ant-btn-background-ghost.ant-btn-primary[disabled]:hover,.ant-btn-background-ghost.ant-btn-primary[disabled]:focus,.ant-btn-background-ghost.ant-btn-primary[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-background-ghost.ant-btn-primary[disabled]>a:only-child,.ant-btn-background-ghost.ant-btn-primary[disabled]:hover>a:only-child,.ant-btn-background-ghost.ant-btn-primary[disabled]:focus>a:only-child,.ant-btn-background-ghost.ant-btn-primary[disabled]:active>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-primary[disabled]>a:only-child:after,.ant-btn-background-ghost.ant-btn-primary[disabled]:hover>a:only-child:after,.ant-btn-background-ghost.ant-btn-primary[disabled]:focus>a:only-child:after,.ant-btn-background-ghost.ant-btn-primary[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-danger{color:#ff7875;background:transparent;border-color:#ff7875;text-shadow:none}.ant-btn-background-ghost.ant-btn-danger>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-danger>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-danger:hover,.ant-btn-background-ghost.ant-btn-danger:focus{color:#ffa39e;background:transparent;border-color:#ffa39e}.ant-btn-background-ghost.ant-btn-danger:hover>a:only-child,.ant-btn-background-ghost.ant-btn-danger:focus>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-danger:hover>a:only-child:after,.ant-btn-background-ghost.ant-btn-danger:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-danger:active{color:#d9595b;background:transparent;border-color:#d9595b}.ant-btn-background-ghost.ant-btn-danger:active>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-danger:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-danger[disabled],.ant-btn-background-ghost.ant-btn-danger[disabled]:hover,.ant-btn-background-ghost.ant-btn-danger[disabled]:focus,.ant-btn-background-ghost.ant-btn-danger[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-background-ghost.ant-btn-danger[disabled]>a:only-child,.ant-btn-background-ghost.ant-btn-danger[disabled]:hover>a:only-child,.ant-btn-background-ghost.ant-btn-danger[disabled]:focus>a:only-child,.ant-btn-background-ghost.ant-btn-danger[disabled]:active>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-danger[disabled]>a:only-child:after,.ant-btn-background-ghost.ant-btn-danger[disabled]:hover>a:only-child:after,.ant-btn-background-ghost.ant-btn-danger[disabled]:focus>a:only-child:after,.ant-btn-background-ghost.ant-btn-danger[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-dangerous{color:#ff7875;background:transparent;border-color:#ff7875;text-shadow:none}.ant-btn-background-ghost.ant-btn-dangerous>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-dangerous>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-dangerous:hover,.ant-btn-background-ghost.ant-btn-dangerous:focus{color:#ffa39e;background:transparent;border-color:#ffa39e}.ant-btn-background-ghost.ant-btn-dangerous:hover>a:only-child,.ant-btn-background-ghost.ant-btn-dangerous:focus>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-dangerous:hover>a:only-child:after,.ant-btn-background-ghost.ant-btn-dangerous:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-dangerous:active{color:#d9595b;background:transparent;border-color:#d9595b}.ant-btn-background-ghost.ant-btn-dangerous:active>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-dangerous:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-dangerous[disabled],.ant-btn-background-ghost.ant-btn-dangerous[disabled]:hover,.ant-btn-background-ghost.ant-btn-dangerous[disabled]:focus,.ant-btn-background-ghost.ant-btn-dangerous[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-background-ghost.ant-btn-dangerous[disabled]>a:only-child,.ant-btn-background-ghost.ant-btn-dangerous[disabled]:hover>a:only-child,.ant-btn-background-ghost.ant-btn-dangerous[disabled]:focus>a:only-child,.ant-btn-background-ghost.ant-btn-dangerous[disabled]:active>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-dangerous[disabled]>a:only-child:after,.ant-btn-background-ghost.ant-btn-dangerous[disabled]:hover>a:only-child:after,.ant-btn-background-ghost.ant-btn-dangerous[disabled]:focus>a:only-child:after,.ant-btn-background-ghost.ant-btn-dangerous[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link{color:#ff7875;background:transparent;border-color:transparent;text-shadow:none}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link:hover,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link:focus{color:#ffa39e;background:transparent;border-color:transparent}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link:hover>a:only-child,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link:focus>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link:hover>a:only-child:after,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link:active{color:#d9595b;background:transparent;border-color:transparent}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link:active>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled],.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]:hover,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]:focus,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]>a:only-child,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]:hover>a:only-child,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]:focus>a:only-child,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]:active>a:only-child{color:currentColor}.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]>a:only-child:after,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]:hover>a:only-child:after,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]:focus>a:only-child:after,.ant-btn-background-ghost.ant-btn-dangerous.ant-btn-link[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-btn-two-chinese-chars:first-letter{letter-spacing:.34em}.ant-btn-two-chinese-chars>*:not(.anticon){margin-right:-.34em;letter-spacing:.34em}.ant-btn-block{width:100%}.ant-btn:empty{display:inline-block;width:0;visibility:hidden;content:"\a0"}a.ant-btn{padding-top:.01px!important;line-height:30px}a.ant-btn-lg{line-height:38px}a.ant-btn-sm{line-height:22px}.ant-btn-rtl{direction:rtl}.ant-btn-group-rtl.ant-btn-group .ant-btn-primary:last-child:not(:first-child),.ant-btn-group-rtl.ant-btn-group .ant-btn-primary+.ant-btn-primary{border-right-color:#40a9ff;border-left-color:#d9d9d9}.ant-btn-group-rtl.ant-btn-group .ant-btn-primary:last-child:not(:first-child)[disabled],.ant-btn-group-rtl.ant-btn-group .ant-btn-primary+.ant-btn-primary[disabled]{border-right-color:#d9d9d9;border-left-color:#40a9ff}.ant-btn-rtl.ant-btn>.ant-btn-loading-icon .anticon{padding-right:0;padding-left:8px}.ant-btn>.ant-btn-loading-icon:only-child .anticon{padding-right:0;padding-left:0}.ant-btn-rtl.ant-btn>.anticon+span,.ant-btn-rtl.ant-btn>span+.anticon{margin-right:8px;margin-left:0}.ant-avatar{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;overflow:hidden;color:#fff;white-space:nowrap;text-align:center;vertical-align:middle;background:#ccc;width:32px;height:32px;line-height:32px;border-radius:50%}.ant-avatar-image{background:transparent}.ant-avatar .ant-image-img{display:block}.ant-avatar-string{position:absolute;left:50%;transform-origin:0 center}.ant-avatar.ant-avatar-icon{font-size:18px}.ant-avatar.ant-avatar-icon>.anticon{margin:0}.ant-avatar-lg{width:40px;height:40px;line-height:40px;border-radius:50%}.ant-avatar-lg-string{position:absolute;left:50%;transform-origin:0 center}.ant-avatar-lg.ant-avatar-icon{font-size:24px}.ant-avatar-lg.ant-avatar-icon>.anticon{margin:0}.ant-avatar-sm{width:24px;height:24px;line-height:24px;border-radius:50%}.ant-avatar-sm-string{position:absolute;left:50%;transform-origin:0 center}.ant-avatar-sm.ant-avatar-icon{font-size:14px}.ant-avatar-sm.ant-avatar-icon>.anticon{margin:0}.ant-avatar-square{border-radius:2px}.ant-avatar>img{display:block;width:100%;height:100%;-o-object-fit:cover;object-fit:cover}.ant-avatar-group{display:inline-flex}.ant-avatar-group .ant-avatar{border:1px solid #fff}.ant-avatar-group .ant-avatar:not(:first-child){margin-left:-8px}.ant-avatar-group-popover .ant-avatar+.ant-avatar{margin-left:3px}.ant-avatar-group-rtl .ant-avatar:not(:first-child){margin-right:-8px;margin-left:0}.ant-avatar-group-popover.ant-popover-rtl .ant-avatar+.ant-avatar{margin-right:3px;margin-left:0}.ant-back-top{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:fixed;right:100px;bottom:50px;z-index:10;width:40px;height:40px;cursor:pointer}.ant-back-top:empty{display:none}.ant-back-top-rtl{right:auto;left:100px;direction:rtl}.ant-back-top-content{width:40px;height:40px;overflow:hidden;color:#fff;text-align:center;background-color:#00000073;border-radius:20px;transition:all .3s}.ant-back-top-content:hover{background-color:#000000d9;transition:all .3s}.ant-back-top-icon{font-size:24px;line-height:40px}@media screen and (max-width: 768px){.ant-back-top{right:60px}}@media screen and (max-width: 480px){.ant-back-top{right:20px}}.ant-badge{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;line-height:1}.ant-badge-count{z-index:1;min-width:20px;height:20px;padding:0 6px;color:#fff;font-weight:400;font-size:12px;line-height:20px;white-space:nowrap;text-align:center;background:#ff4d4f;border-radius:10px;box-shadow:0 0 0 1px #fff}.ant-badge-count a,.ant-badge-count a:hover{color:#fff}.ant-badge-count-sm{min-width:14px;height:14px;padding:0;font-size:12px;line-height:14px;border-radius:7px}.ant-badge-multiple-words{padding:0 8px}.ant-badge-dot{z-index:1;width:6px;min-width:6px;height:6px;background:#ff4d4f;border-radius:100%;box-shadow:0 0 0 1px #fff}.ant-badge-count,.ant-badge-dot,.ant-badge .ant-scroll-number-custom-component{position:absolute;top:0;right:0;transform:translate(50%,-50%);transform-origin:100% 0%}.ant-badge-count.anticon-spin,.ant-badge-dot.anticon-spin,.ant-badge .ant-scroll-number-custom-component.anticon-spin{-webkit-animation:antBadgeLoadingCircle 1s infinite linear;animation:antBadgeLoadingCircle 1s infinite linear}.ant-badge-status{line-height:inherit;vertical-align:baseline}.ant-badge-status-dot{position:relative;top:-1px;display:inline-block;width:6px;height:6px;vertical-align:middle;border-radius:50%}.ant-badge-status-success{background-color:#52c41a}.ant-badge-status-processing{position:relative;background-color:#1890ff}.ant-badge-status-processing:after{position:absolute;top:0;left:0;width:100%;height:100%;border:1px solid #1890ff;border-radius:50%;-webkit-animation:antStatusProcessing 1.2s infinite ease-in-out;animation:antStatusProcessing 1.2s infinite ease-in-out;content:""}.ant-badge-status-default{background-color:#d9d9d9}.ant-badge-status-error{background-color:#ff4d4f}.ant-badge-status-warning{background-color:#faad14}.ant-badge-status-pink,.ant-badge-status-magenta{background:#eb2f96}.ant-badge-status-red{background:#f5222d}.ant-badge-status-volcano{background:#fa541c}.ant-badge-status-orange{background:#fa8c16}.ant-badge-status-yellow{background:#fadb14}.ant-badge-status-gold{background:#faad14}.ant-badge-status-cyan{background:#13c2c2}.ant-badge-status-lime{background:#a0d911}.ant-badge-status-green{background:#52c41a}.ant-badge-status-blue{background:#1890ff}.ant-badge-status-geekblue{background:#2f54eb}.ant-badge-status-purple{background:#722ed1}.ant-badge-status-text{margin-left:8px;color:#000000d9;font-size:14px}.ant-badge-zoom-appear,.ant-badge-zoom-enter{-webkit-animation:antZoomBadgeIn .3s cubic-bezier(.12,.4,.29,1.46);animation:antZoomBadgeIn .3s cubic-bezier(.12,.4,.29,1.46);-webkit-animation-fill-mode:both;animation-fill-mode:both}.ant-badge-zoom-leave{-webkit-animation:antZoomBadgeOut .3s cubic-bezier(.71,-.46,.88,.6);animation:antZoomBadgeOut .3s cubic-bezier(.71,-.46,.88,.6);-webkit-animation-fill-mode:both;animation-fill-mode:both}.ant-badge-not-a-wrapper .ant-badge-zoom-appear,.ant-badge-not-a-wrapper .ant-badge-zoom-enter{-webkit-animation:antNoWrapperZoomBadgeIn .3s cubic-bezier(.12,.4,.29,1.46);animation:antNoWrapperZoomBadgeIn .3s cubic-bezier(.12,.4,.29,1.46)}.ant-badge-not-a-wrapper .ant-badge-zoom-leave{-webkit-animation:antNoWrapperZoomBadgeOut .3s cubic-bezier(.71,-.46,.88,.6);animation:antNoWrapperZoomBadgeOut .3s cubic-bezier(.71,-.46,.88,.6)}.ant-badge-not-a-wrapper:not(.ant-badge-status){vertical-align:middle}.ant-badge-not-a-wrapper .ant-scroll-number-custom-component{transform:none}.ant-badge-not-a-wrapper .ant-scroll-number-custom-component,.ant-badge-not-a-wrapper .ant-scroll-number{position:relative;top:auto;display:block;transform-origin:50% 50%}@-webkit-keyframes antStatusProcessing{0%{transform:scale(.8);opacity:.5}to{transform:scale(2.4);opacity:0}}@keyframes antStatusProcessing{0%{transform:scale(.8);opacity:.5}to{transform:scale(2.4);opacity:0}}.ant-scroll-number{overflow:hidden}.ant-scroll-number-only{position:relative;display:inline-block;height:20px;transition:all .3s cubic-bezier(.645,.045,.355,1);-webkit-transform-style:preserve-3d;-webkit-backface-visibility:hidden}.ant-scroll-number-only>p.ant-scroll-number-only-unit{height:20px;margin:0;-webkit-transform-style:preserve-3d;-webkit-backface-visibility:hidden}.ant-scroll-number-symbol{vertical-align:top}@-webkit-keyframes antZoomBadgeIn{0%{transform:scale(0) translate(50%,-50%);opacity:0}to{transform:scale(1) translate(50%,-50%)}}@keyframes antZoomBadgeIn{0%{transform:scale(0) translate(50%,-50%);opacity:0}to{transform:scale(1) translate(50%,-50%)}}@-webkit-keyframes antZoomBadgeOut{0%{transform:scale(1) translate(50%,-50%)}to{transform:scale(0) translate(50%,-50%);opacity:0}}@keyframes antZoomBadgeOut{0%{transform:scale(1) translate(50%,-50%)}to{transform:scale(0) translate(50%,-50%);opacity:0}}@-webkit-keyframes antNoWrapperZoomBadgeIn{0%{transform:scale(0);opacity:0}to{transform:scale(1)}}@keyframes antNoWrapperZoomBadgeIn{0%{transform:scale(0);opacity:0}to{transform:scale(1)}}@-webkit-keyframes antNoWrapperZoomBadgeOut{0%{transform:scale(1)}to{transform:scale(0);opacity:0}}@keyframes antNoWrapperZoomBadgeOut{0%{transform:scale(1)}to{transform:scale(0);opacity:0}}@-webkit-keyframes antBadgeLoadingCircle{0%{transform-origin:50%}to{transform:translate(50%,-50%) rotate(360deg);transform-origin:50%}}@keyframes antBadgeLoadingCircle{0%{transform-origin:50%}to{transform:translate(50%,-50%) rotate(360deg);transform-origin:50%}}.ant-ribbon-wrapper{position:relative}.ant-ribbon{box-sizing:border-box;margin:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:absolute;top:8px;height:22px;padding:0 8px;color:#fff;line-height:22px;white-space:nowrap;background-color:#1890ff;border-radius:2px}.ant-ribbon-text{color:#fff}.ant-ribbon-corner{position:absolute;top:100%;width:8px;height:8px;color:currentColor;border:4px solid;transform:scaleY(.75);transform-origin:top}.ant-ribbon-corner:after{position:absolute;top:-4px;left:-4px;width:inherit;height:inherit;color:#00000040;border:inherit;content:""}.ant-ribbon-color-pink,.ant-ribbon-color-magenta{color:#eb2f96;background:#eb2f96}.ant-ribbon-color-red{color:#f5222d;background:#f5222d}.ant-ribbon-color-volcano{color:#fa541c;background:#fa541c}.ant-ribbon-color-orange{color:#fa8c16;background:#fa8c16}.ant-ribbon-color-yellow{color:#fadb14;background:#fadb14}.ant-ribbon-color-gold{color:#faad14;background:#faad14}.ant-ribbon-color-cyan{color:#13c2c2;background:#13c2c2}.ant-ribbon-color-lime{color:#a0d911;background:#a0d911}.ant-ribbon-color-green{color:#52c41a;background:#52c41a}.ant-ribbon-color-blue{color:#1890ff;background:#1890ff}.ant-ribbon-color-geekblue{color:#2f54eb;background:#2f54eb}.ant-ribbon-color-purple{color:#722ed1;background:#722ed1}.ant-ribbon.ant-ribbon-placement-end{right:-8px;border-bottom-right-radius:0}.ant-ribbon.ant-ribbon-placement-end .ant-ribbon-corner{right:0;border-color:currentColor transparent transparent currentColor}.ant-ribbon.ant-ribbon-placement-start{left:-8px;border-bottom-left-radius:0}.ant-ribbon.ant-ribbon-placement-start .ant-ribbon-corner{left:0;border-color:currentColor currentColor transparent transparent}.ant-badge-rtl{direction:rtl}.ant-badge-rtl .ant-badge-count,.ant-badge-rtl .ant-badge-dot,.ant-badge-rtl .ant-badge .ant-scroll-number-custom-component{right:auto;left:0;direction:ltr;transform:translate(-50%,-50%);transform-origin:0% 0%}.ant-badge-rtl.ant-badge .ant-scroll-number-custom-component{right:auto;left:0;transform:translate(-50%,-50%);transform-origin:0% 0%}.ant-badge-rtl .ant-badge-status-text{margin-right:8px;margin-left:0}.ant-badge-rtl .ant-badge-zoom-appear,.ant-badge-rtl .ant-badge-zoom-enter{-webkit-animation-name:antZoomBadgeInRtl;animation-name:antZoomBadgeInRtl}.ant-badge-rtl .ant-badge-zoom-leave{-webkit-animation-name:antZoomBadgeOutRtl;animation-name:antZoomBadgeOutRtl}.ant-badge-not-a-wrapper .ant-badge-count{transform:none}.ant-ribbon-rtl{direction:rtl}.ant-ribbon-rtl.ant-ribbon-placement-end{right:unset;left:-8px;border-bottom-right-radius:2px;border-bottom-left-radius:0}.ant-ribbon-rtl.ant-ribbon-placement-end .ant-ribbon-corner{right:unset;left:0;border-color:currentColor currentColor transparent transparent}.ant-ribbon-rtl.ant-ribbon-placement-end .ant-ribbon-corner:after{border-color:currentColor currentColor transparent transparent}.ant-ribbon-rtl.ant-ribbon-placement-start{right:-8px;left:unset;border-bottom-right-radius:0;border-bottom-left-radius:2px}.ant-ribbon-rtl.ant-ribbon-placement-start .ant-ribbon-corner{right:0;left:unset;border-color:currentColor transparent transparent currentColor}.ant-ribbon-rtl.ant-ribbon-placement-start .ant-ribbon-corner:after{border-color:currentColor transparent transparent currentColor}@-webkit-keyframes antZoomBadgeInRtl{0%{transform:scale(0) translate(-50%,-50%);opacity:0}to{transform:scale(1) translate(-50%,-50%)}}@keyframes antZoomBadgeInRtl{0%{transform:scale(0) translate(-50%,-50%);opacity:0}to{transform:scale(1) translate(-50%,-50%)}}@-webkit-keyframes antZoomBadgeOutRtl{0%{transform:scale(1) translate(-50%,-50%)}to{transform:scale(0) translate(-50%,-50%);opacity:0}}@keyframes antZoomBadgeOutRtl{0%{transform:scale(1) translate(-50%,-50%)}to{transform:scale(0) translate(-50%,-50%);opacity:0}}.ant-breadcrumb{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";color:#00000073;font-size:14px}.ant-breadcrumb .anticon{font-size:14px}.ant-breadcrumb a{color:#00000073;transition:color .3s}.ant-breadcrumb a:hover{color:#40a9ff}.ant-breadcrumb>span:last-child{color:#000000d9}.ant-breadcrumb>span:last-child a{color:#000000d9}.ant-breadcrumb>span:last-child .ant-breadcrumb-separator{display:none}.ant-breadcrumb-separator{margin:0 8px;color:#00000073}.ant-breadcrumb-link>.anticon+span,.ant-breadcrumb-link>.anticon+a{margin-left:4px}.ant-breadcrumb-overlay-link>.anticon{margin-left:4px}.ant-breadcrumb-rtl{direction:rtl}.ant-breadcrumb-rtl:before,.ant-breadcrumb-rtl:after{display:table;content:""}.ant-breadcrumb-rtl:after{clear:both}.ant-breadcrumb-rtl>span{float:right}.ant-breadcrumb-rtl .ant-breadcrumb-link>.anticon+span,.ant-breadcrumb-rtl .ant-breadcrumb-link>.anticon+a{margin-right:4px;margin-left:0}.ant-breadcrumb-rtl .ant-breadcrumb-overlay-link>.anticon{margin-right:4px;margin-left:0}.ant-menu-item-danger.ant-menu-item,.ant-menu-item-danger.ant-menu-item:hover,.ant-menu-item-danger.ant-menu-item-active{color:#ff4d4f}.ant-menu-item-danger.ant-menu-item:active{background:#fff1f0}.ant-menu-item-danger.ant-menu-item-selected{color:#ff4d4f}.ant-menu-item-danger.ant-menu-item-selected>a,.ant-menu-item-danger.ant-menu-item-selected>a:hover{color:#ff4d4f}.ant-menu:not(.ant-menu-horizontal) .ant-menu-item-danger.ant-menu-item-selected{background-color:#fff1f0}.ant-menu-inline .ant-menu-item-danger.ant-menu-item:after{border-right-color:#ff4d4f}.ant-menu-dark .ant-menu-item-danger.ant-menu-item,.ant-menu-dark .ant-menu-item-danger.ant-menu-item:hover,.ant-menu-dark .ant-menu-item-danger.ant-menu-item>a{color:#ff4d4f}.ant-menu-dark.ant-menu-dark:not(.ant-menu-horizontal) .ant-menu-item-danger.ant-menu-item-selected{color:#fff;background-color:#ff4d4f}.ant-menu{box-sizing:border-box;margin:0;font-variant:tabular-nums;line-height:1.5715;font-feature-settings:"tnum";padding:0;color:#000000d9;font-size:14px;line-height:0;text-align:left;list-style:none;background:#fff;outline:none;box-shadow:0 2px 8px #00000026;transition:background .3s,width .3s cubic-bezier(.2,0,0,1) 0s}.ant-menu:before,.ant-menu:after{display:table;content:""}.ant-menu:after{clear:both}.ant-menu.ant-menu-root:focus-visible{box-shadow:0 0 0 2px #1890ff33}.ant-menu ul,.ant-menu ol{margin:0;padding:0;list-style:none}.ant-menu-overflow{display:flex}.ant-menu-overflow-item{flex:none}.ant-menu-hidden,.ant-menu-submenu-hidden{display:none}.ant-menu-item-group-title{height:1.5715;padding:8px 16px;color:#00000073;font-size:14px;line-height:1.5715;transition:all .3s}.ant-menu-horizontal .ant-menu-submenu{transition:border-color .3s cubic-bezier(.645,.045,.355,1),background .3s cubic-bezier(.645,.045,.355,1)}.ant-menu-submenu,.ant-menu-submenu-inline{transition:border-color .3s cubic-bezier(.645,.045,.355,1),background .3s cubic-bezier(.645,.045,.355,1),padding .15s cubic-bezier(.645,.045,.355,1)}.ant-menu-submenu-selected{color:#1890ff}.ant-menu-item:active,.ant-menu-submenu-title:active{background:#e6f7ff}.ant-menu-submenu .ant-menu-sub{cursor:initial;transition:background .3s cubic-bezier(.645,.045,.355,1),padding .3s cubic-bezier(.645,.045,.355,1)}.ant-menu-item a{color:#000000d9}.ant-menu-item a:hover{color:#1890ff}.ant-menu-item a:before{position:absolute;top:0;right:0;bottom:0;left:0;background-color:transparent;content:""}.ant-menu-item>.ant-badge a{color:#000000d9}.ant-menu-item>.ant-badge a:hover{color:#1890ff}.ant-menu-item-divider{height:1px;overflow:hidden;line-height:0;background-color:#f0f0f0}.ant-menu-horizontal .ant-menu-item,.ant-menu-horizontal .ant-menu-submenu{margin-top:-1px}.ant-menu-horizontal>.ant-menu-item:hover,.ant-menu-horizontal>.ant-menu-item-active,.ant-menu-horizontal>.ant-menu-submenu .ant-menu-submenu-title:hover{background-color:transparent}.ant-menu-item-selected,.ant-menu-item-selected a,.ant-menu-item-selected a:hover{color:#1890ff}.ant-menu:not(.ant-menu-horizontal) .ant-menu-item-selected{background-color:#e6f7ff}.ant-menu-inline,.ant-menu-vertical,.ant-menu-vertical-left{border-right:1px solid #f0f0f0}.ant-menu-vertical-right{border-left:1px solid #f0f0f0}.ant-menu-vertical.ant-menu-sub,.ant-menu-vertical-left.ant-menu-sub,.ant-menu-vertical-right.ant-menu-sub{min-width:160px;max-height:calc(100vh - 100px);padding:0;overflow:hidden;border-right:0}.ant-menu-vertical.ant-menu-sub:not([class*="-active"]),.ant-menu-vertical-left.ant-menu-sub:not([class*="-active"]),.ant-menu-vertical-right.ant-menu-sub:not([class*="-active"]){overflow-x:hidden;overflow-y:auto}.ant-menu-vertical.ant-menu-sub .ant-menu-item,.ant-menu-vertical-left.ant-menu-sub .ant-menu-item,.ant-menu-vertical-right.ant-menu-sub .ant-menu-item{left:0;margin-left:0;border-right:0}.ant-menu-vertical.ant-menu-sub .ant-menu-item:after,.ant-menu-vertical-left.ant-menu-sub .ant-menu-item:after,.ant-menu-vertical-right.ant-menu-sub .ant-menu-item:after{border-right:0}.ant-menu-vertical.ant-menu-sub>.ant-menu-item,.ant-menu-vertical-left.ant-menu-sub>.ant-menu-item,.ant-menu-vertical-right.ant-menu-sub>.ant-menu-item,.ant-menu-vertical.ant-menu-sub>.ant-menu-submenu,.ant-menu-vertical-left.ant-menu-sub>.ant-menu-submenu,.ant-menu-vertical-right.ant-menu-sub>.ant-menu-submenu{transform-origin:0 0}.ant-menu-horizontal.ant-menu-sub{min-width:114px}.ant-menu-horizontal .ant-menu-item,.ant-menu-horizontal .ant-menu-submenu-title{transition:border-color .3s,background .3s}.ant-menu-item,.ant-menu-submenu-title{position:relative;display:block;margin:0;padding:0 20px;white-space:nowrap;cursor:pointer;transition:border-color .3s,background .3s,padding .3s cubic-bezier(.645,.045,.355,1)}.ant-menu-item .ant-menu-item-icon,.ant-menu-submenu-title .ant-menu-item-icon,.ant-menu-item .anticon,.ant-menu-submenu-title .anticon{min-width:14px;font-size:14px;transition:font-size .15s cubic-bezier(.215,.61,.355,1),margin .3s cubic-bezier(.645,.045,.355,1),color .3s}.ant-menu-item .ant-menu-item-icon+span,.ant-menu-submenu-title .ant-menu-item-icon+span,.ant-menu-item .anticon+span,.ant-menu-submenu-title .anticon+span{margin-left:10px;opacity:1;transition:opacity .3s cubic-bezier(.645,.045,.355,1),margin .3s,color .3s}.ant-menu-item .ant-menu-item-icon.svg,.ant-menu-submenu-title .ant-menu-item-icon.svg{vertical-align:-.125em}.ant-menu-item.ant-menu-item-only-child>.anticon,.ant-menu-submenu-title.ant-menu-item-only-child>.anticon,.ant-menu-item.ant-menu-item-only-child>.ant-menu-item-icon,.ant-menu-submenu-title.ant-menu-item-only-child>.ant-menu-item-icon{margin-right:0}.ant-menu-item:focus-visible,.ant-menu-submenu-title:focus-visible{box-shadow:0 0 0 2px #1890ff33}.ant-menu>.ant-menu-item-divider{height:1px;margin:1px 0;padding:0;overflow:hidden;line-height:0;background-color:#f0f0f0}.ant-menu-submenu-popup{position:absolute;z-index:1050;background:transparent;border-radius:2px;box-shadow:none;transform-origin:0 0}.ant-menu-submenu-popup:before{position:absolute;top:-7px;right:0;bottom:0;left:0;z-index:-1;width:100%;height:100%;opacity:.0001;content:" "}.ant-menu-submenu-placement-rightTop:before{top:0;left:-7px}.ant-menu-submenu>.ant-menu{background-color:#fff;border-radius:2px}.ant-menu-submenu>.ant-menu-submenu-title:after{transition:transform .3s cubic-bezier(.645,.045,.355,1)}.ant-menu-submenu-popup>.ant-menu{background-color:#fff}.ant-menu-submenu-expand-icon,.ant-menu-submenu-arrow{position:absolute;top:50%;right:16px;width:10px;color:#000000d9;transform:translateY(-50%);transition:transform .3s cubic-bezier(.645,.045,.355,1)}.ant-menu-submenu-arrow:before,.ant-menu-submenu-arrow:after{position:absolute;width:6px;height:1.5px;background-color:currentColor;border-radius:2px;transition:background .3s cubic-bezier(.645,.045,.355,1),transform .3s cubic-bezier(.645,.045,.355,1),top .3s cubic-bezier(.645,.045,.355,1),color .3s cubic-bezier(.645,.045,.355,1);content:""}.ant-menu-submenu-arrow:before{transform:rotate(45deg) translateY(-2.5px)}.ant-menu-submenu-arrow:after{transform:rotate(-45deg) translateY(2.5px)}.ant-menu-submenu:hover>.ant-menu-submenu-title>.ant-menu-submenu-expand-icon,.ant-menu-submenu:hover>.ant-menu-submenu-title>.ant-menu-submenu-arrow{color:#1890ff}.ant-menu-inline-collapsed .ant-menu-submenu-arrow:before,.ant-menu-submenu-inline .ant-menu-submenu-arrow:before{transform:rotate(-45deg) translate(2.5px)}.ant-menu-inline-collapsed .ant-menu-submenu-arrow:after,.ant-menu-submenu-inline .ant-menu-submenu-arrow:after{transform:rotate(45deg) translate(-2.5px)}.ant-menu-submenu-horizontal .ant-menu-submenu-arrow{display:none}.ant-menu-submenu-open.ant-menu-submenu-inline>.ant-menu-submenu-title>.ant-menu-submenu-arrow{transform:translateY(-2px)}.ant-menu-submenu-open.ant-menu-submenu-inline>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after{transform:rotate(-45deg) translate(-2.5px)}.ant-menu-submenu-open.ant-menu-submenu-inline>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before{transform:rotate(45deg) translate(2.5px)}.ant-menu-vertical .ant-menu-submenu-selected,.ant-menu-vertical-left .ant-menu-submenu-selected,.ant-menu-vertical-right .ant-menu-submenu-selected{color:#1890ff}.ant-menu-horizontal{line-height:46px;border:0;border-bottom:1px solid #f0f0f0;box-shadow:none}.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-item,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-submenu{margin-top:-1px;margin-bottom:0;padding:0 20px}.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-item:hover,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-submenu:hover,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-item-active,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-submenu-active,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-item-open,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-submenu-open,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-item-selected,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-submenu-selected{color:#1890ff}.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-item:hover:after,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-submenu:hover:after,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-item-active:after,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-submenu-active:after,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-item-open:after,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-submenu-open:after,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-item-selected:after,.ant-menu-horizontal:not(.ant-menu-dark)>.ant-menu-submenu-selected:after{border-bottom:2px solid #1890ff}.ant-menu-horizontal>.ant-menu-item,.ant-menu-horizontal>.ant-menu-submenu{position:relative;top:1px;display:inline-block;vertical-align:bottom}.ant-menu-horizontal>.ant-menu-item:after,.ant-menu-horizontal>.ant-menu-submenu:after{position:absolute;right:20px;bottom:0;left:20px;border-bottom:2px solid transparent;transition:border-color .3s cubic-bezier(.645,.045,.355,1);content:""}.ant-menu-horizontal>.ant-menu-submenu>.ant-menu-submenu-title{padding:0}.ant-menu-horizontal>.ant-menu-item a{color:#000000d9}.ant-menu-horizontal>.ant-menu-item a:hover{color:#1890ff}.ant-menu-horizontal>.ant-menu-item a:before{bottom:-2px}.ant-menu-horizontal>.ant-menu-item-selected a{color:#1890ff}.ant-menu-horizontal:after{display:block;clear:both;height:0;content:" "}.ant-menu-vertical .ant-menu-item,.ant-menu-vertical-left .ant-menu-item,.ant-menu-vertical-right .ant-menu-item,.ant-menu-inline .ant-menu-item{position:relative}.ant-menu-vertical .ant-menu-item:after,.ant-menu-vertical-left .ant-menu-item:after,.ant-menu-vertical-right .ant-menu-item:after,.ant-menu-inline .ant-menu-item:after{position:absolute;top:0;right:0;bottom:0;border-right:3px solid #1890ff;transform:scaleY(.0001);opacity:0;transition:transform .15s cubic-bezier(.215,.61,.355,1),opacity .15s cubic-bezier(.215,.61,.355,1);content:""}.ant-menu-vertical .ant-menu-item,.ant-menu-vertical-left .ant-menu-item,.ant-menu-vertical-right .ant-menu-item,.ant-menu-inline .ant-menu-item,.ant-menu-vertical .ant-menu-submenu-title,.ant-menu-vertical-left .ant-menu-submenu-title,.ant-menu-vertical-right .ant-menu-submenu-title,.ant-menu-inline .ant-menu-submenu-title{height:40px;margin-top:4px;margin-bottom:4px;padding:0 16px;overflow:hidden;line-height:40px;text-overflow:ellipsis}.ant-menu-vertical .ant-menu-submenu,.ant-menu-vertical-left .ant-menu-submenu,.ant-menu-vertical-right .ant-menu-submenu,.ant-menu-inline .ant-menu-submenu{padding-bottom:.02px}.ant-menu-vertical .ant-menu-item:not(:last-child),.ant-menu-vertical-left .ant-menu-item:not(:last-child),.ant-menu-vertical-right .ant-menu-item:not(:last-child),.ant-menu-inline .ant-menu-item:not(:last-child){margin-bottom:8px}.ant-menu-vertical>.ant-menu-item,.ant-menu-vertical-left>.ant-menu-item,.ant-menu-vertical-right>.ant-menu-item,.ant-menu-inline>.ant-menu-item,.ant-menu-vertical>.ant-menu-submenu>.ant-menu-submenu-title,.ant-menu-vertical-left>.ant-menu-submenu>.ant-menu-submenu-title,.ant-menu-vertical-right>.ant-menu-submenu>.ant-menu-submenu-title,.ant-menu-inline>.ant-menu-submenu>.ant-menu-submenu-title{height:40px;line-height:40px}.ant-menu-vertical .ant-menu-item-group-list .ant-menu-submenu-title,.ant-menu-vertical .ant-menu-submenu-title{padding-right:34px}.ant-menu-inline{width:100%}.ant-menu-inline .ant-menu-selected:after,.ant-menu-inline .ant-menu-item-selected:after{transform:scaleY(1);opacity:1;transition:transform .15s cubic-bezier(.645,.045,.355,1),opacity .15s cubic-bezier(.645,.045,.355,1)}.ant-menu-inline .ant-menu-item,.ant-menu-inline .ant-menu-submenu-title{width:calc(100% + 1px)}.ant-menu-inline .ant-menu-item-group-list .ant-menu-submenu-title,.ant-menu-inline .ant-menu-submenu-title{padding-right:34px}.ant-menu-inline.ant-menu-root .ant-menu-item,.ant-menu-inline.ant-menu-root .ant-menu-submenu-title{display:flex;align-items:center;transition:border-color .3s,background .3s,padding .1s cubic-bezier(.215,.61,.355,1)}.ant-menu-inline.ant-menu-root .ant-menu-item>.ant-menu-title-content,.ant-menu-inline.ant-menu-root .ant-menu-submenu-title>.ant-menu-title-content{flex:auto;min-width:0;overflow:hidden;text-overflow:ellipsis}.ant-menu-inline.ant-menu-root .ant-menu-item>*,.ant-menu-inline.ant-menu-root .ant-menu-submenu-title>*{flex:none}.ant-menu.ant-menu-inline-collapsed{width:80px}.ant-menu.ant-menu-inline-collapsed>.ant-menu-item,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-item,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-submenu>.ant-menu-submenu-title,.ant-menu.ant-menu-inline-collapsed>.ant-menu-submenu>.ant-menu-submenu-title{left:0;padding:0 calc(50% - 8px);text-overflow:clip}.ant-menu.ant-menu-inline-collapsed>.ant-menu-item .ant-menu-submenu-arrow,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-item .ant-menu-submenu-arrow,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-submenu>.ant-menu-submenu-title .ant-menu-submenu-arrow,.ant-menu.ant-menu-inline-collapsed>.ant-menu-submenu>.ant-menu-submenu-title .ant-menu-submenu-arrow{opacity:0}.ant-menu.ant-menu-inline-collapsed>.ant-menu-item .ant-menu-item-icon,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-item .ant-menu-item-icon,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-submenu>.ant-menu-submenu-title .ant-menu-item-icon,.ant-menu.ant-menu-inline-collapsed>.ant-menu-submenu>.ant-menu-submenu-title .ant-menu-item-icon,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item .anticon,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-item .anticon,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-submenu>.ant-menu-submenu-title .anticon,.ant-menu.ant-menu-inline-collapsed>.ant-menu-submenu>.ant-menu-submenu-title .anticon{margin:0;font-size:16px;line-height:40px}.ant-menu.ant-menu-inline-collapsed>.ant-menu-item .ant-menu-item-icon+span,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-item .ant-menu-item-icon+span,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-submenu>.ant-menu-submenu-title .ant-menu-item-icon+span,.ant-menu.ant-menu-inline-collapsed>.ant-menu-submenu>.ant-menu-submenu-title .ant-menu-item-icon+span,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item .anticon+span,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-item .anticon+span,.ant-menu.ant-menu-inline-collapsed>.ant-menu-item-group>.ant-menu-item-group-list>.ant-menu-submenu>.ant-menu-submenu-title .anticon+span,.ant-menu.ant-menu-inline-collapsed>.ant-menu-submenu>.ant-menu-submenu-title .anticon+span{display:inline-block;opacity:0}.ant-menu.ant-menu-inline-collapsed .ant-menu-item-icon,.ant-menu.ant-menu-inline-collapsed .anticon{display:inline-block}.ant-menu.ant-menu-inline-collapsed-tooltip{pointer-events:none}.ant-menu.ant-menu-inline-collapsed-tooltip .ant-menu-item-icon,.ant-menu.ant-menu-inline-collapsed-tooltip .anticon{display:none}.ant-menu.ant-menu-inline-collapsed-tooltip a{color:#ffffffd9}.ant-menu.ant-menu-inline-collapsed .ant-menu-item-group-title{padding-right:4px;padding-left:4px;overflow:hidden;white-space:nowrap;text-overflow:ellipsis}.ant-menu-item-group-list{margin:0;padding:0}.ant-menu-item-group-list .ant-menu-item,.ant-menu-item-group-list .ant-menu-submenu-title{padding:0 16px 0 28px}.ant-menu-root.ant-menu-vertical,.ant-menu-root.ant-menu-vertical-left,.ant-menu-root.ant-menu-vertical-right,.ant-menu-root.ant-menu-inline{box-shadow:none}.ant-menu-root.ant-menu-inline-collapsed .ant-menu-item>.ant-menu-inline-collapsed-noicon,.ant-menu-root.ant-menu-inline-collapsed .ant-menu-submenu .ant-menu-submenu-title>.ant-menu-inline-collapsed-noicon{font-size:16px;text-align:center}.ant-menu-sub.ant-menu-inline{padding:0;background:#fafafa;border:0;border-radius:0;box-shadow:none}.ant-menu-sub.ant-menu-inline>.ant-menu-item,.ant-menu-sub.ant-menu-inline>.ant-menu-submenu>.ant-menu-submenu-title{height:40px;line-height:40px;list-style-position:inside;list-style-type:disc}.ant-menu-sub.ant-menu-inline .ant-menu-item-group-title{padding-left:32px}.ant-menu-item-disabled,.ant-menu-submenu-disabled{color:#00000040!important;background:none;cursor:not-allowed}.ant-menu-item-disabled:after,.ant-menu-submenu-disabled:after{border-color:transparent!important}.ant-menu-item-disabled a,.ant-menu-submenu-disabled a{color:#00000040!important;pointer-events:none}.ant-menu-item-disabled>.ant-menu-submenu-title,.ant-menu-submenu-disabled>.ant-menu-submenu-title{color:#00000040!important;cursor:not-allowed}.ant-menu-item-disabled>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before,.ant-menu-submenu-disabled>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before,.ant-menu-item-disabled>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after,.ant-menu-submenu-disabled>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after{background:rgba(0,0,0,.25)!important}.ant-layout-header .ant-menu{line-height:inherit}.ant-menu-light .ant-menu-item:hover,.ant-menu-light .ant-menu-item-active,.ant-menu-light .ant-menu:not(.ant-menu-inline) .ant-menu-submenu-open,.ant-menu-light .ant-menu-submenu-active,.ant-menu-light .ant-menu-submenu-title:hover{color:#1890ff}.ant-menu.ant-menu-dark,.ant-menu-dark .ant-menu-sub,.ant-menu.ant-menu-dark .ant-menu-sub{color:#ffffffa6;background:#001529}.ant-menu.ant-menu-dark .ant-menu-submenu-title .ant-menu-submenu-arrow,.ant-menu-dark .ant-menu-sub .ant-menu-submenu-title .ant-menu-submenu-arrow,.ant-menu.ant-menu-dark .ant-menu-sub .ant-menu-submenu-title .ant-menu-submenu-arrow{opacity:.45;transition:all .3s}.ant-menu.ant-menu-dark .ant-menu-submenu-title .ant-menu-submenu-arrow:after,.ant-menu-dark .ant-menu-sub .ant-menu-submenu-title .ant-menu-submenu-arrow:after,.ant-menu.ant-menu-dark .ant-menu-sub .ant-menu-submenu-title .ant-menu-submenu-arrow:after,.ant-menu.ant-menu-dark .ant-menu-submenu-title .ant-menu-submenu-arrow:before,.ant-menu-dark .ant-menu-sub .ant-menu-submenu-title .ant-menu-submenu-arrow:before,.ant-menu.ant-menu-dark .ant-menu-sub .ant-menu-submenu-title .ant-menu-submenu-arrow:before{background:#fff}.ant-menu-dark.ant-menu-submenu-popup{background:transparent}.ant-menu-dark .ant-menu-inline.ant-menu-sub{background:#000c17}.ant-menu-dark.ant-menu-horizontal{border-bottom:0}.ant-menu-dark.ant-menu-horizontal>.ant-menu-item,.ant-menu-dark.ant-menu-horizontal>.ant-menu-submenu{top:0;margin-top:0;padding:0 20px;border-color:#001529;border-bottom:0}.ant-menu-dark.ant-menu-horizontal>.ant-menu-item:hover{background-color:#1890ff}.ant-menu-dark.ant-menu-horizontal>.ant-menu-item>a:before{bottom:0}.ant-menu-dark .ant-menu-item,.ant-menu-dark .ant-menu-item-group-title,.ant-menu-dark .ant-menu-item>a,.ant-menu-dark .ant-menu-item>span>a{color:#ffffffa6}.ant-menu-dark.ant-menu-inline,.ant-menu-dark.ant-menu-vertical,.ant-menu-dark.ant-menu-vertical-left,.ant-menu-dark.ant-menu-vertical-right{border-right:0}.ant-menu-dark.ant-menu-inline .ant-menu-item,.ant-menu-dark.ant-menu-vertical .ant-menu-item,.ant-menu-dark.ant-menu-vertical-left .ant-menu-item,.ant-menu-dark.ant-menu-vertical-right .ant-menu-item{left:0;margin-left:0;border-right:0}.ant-menu-dark.ant-menu-inline .ant-menu-item:after,.ant-menu-dark.ant-menu-vertical .ant-menu-item:after,.ant-menu-dark.ant-menu-vertical-left .ant-menu-item:after,.ant-menu-dark.ant-menu-vertical-right .ant-menu-item:after{border-right:0}.ant-menu-dark.ant-menu-inline .ant-menu-item,.ant-menu-dark.ant-menu-inline .ant-menu-submenu-title{width:100%}.ant-menu-dark .ant-menu-item:hover,.ant-menu-dark .ant-menu-item-active,.ant-menu-dark .ant-menu-submenu-active,.ant-menu-dark .ant-menu-submenu-open,.ant-menu-dark .ant-menu-submenu-selected,.ant-menu-dark .ant-menu-submenu-title:hover{color:#fff;background-color:transparent}.ant-menu-dark .ant-menu-item:hover>a,.ant-menu-dark .ant-menu-item-active>a,.ant-menu-dark .ant-menu-submenu-active>a,.ant-menu-dark .ant-menu-submenu-open>a,.ant-menu-dark .ant-menu-submenu-selected>a,.ant-menu-dark .ant-menu-submenu-title:hover>a,.ant-menu-dark .ant-menu-item:hover>span>a,.ant-menu-dark .ant-menu-item-active>span>a,.ant-menu-dark .ant-menu-submenu-active>span>a,.ant-menu-dark .ant-menu-submenu-open>span>a,.ant-menu-dark .ant-menu-submenu-selected>span>a,.ant-menu-dark .ant-menu-submenu-title:hover>span>a{color:#fff}.ant-menu-dark .ant-menu-item:hover>.ant-menu-submenu-title>.ant-menu-submenu-arrow,.ant-menu-dark .ant-menu-item-active>.ant-menu-submenu-title>.ant-menu-submenu-arrow,.ant-menu-dark .ant-menu-submenu-active>.ant-menu-submenu-title>.ant-menu-submenu-arrow,.ant-menu-dark .ant-menu-submenu-open>.ant-menu-submenu-title>.ant-menu-submenu-arrow,.ant-menu-dark .ant-menu-submenu-selected>.ant-menu-submenu-title>.ant-menu-submenu-arrow,.ant-menu-dark .ant-menu-submenu-title:hover>.ant-menu-submenu-title>.ant-menu-submenu-arrow{opacity:1}.ant-menu-dark .ant-menu-item:hover>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after,.ant-menu-dark .ant-menu-item-active>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after,.ant-menu-dark .ant-menu-submenu-active>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after,.ant-menu-dark .ant-menu-submenu-open>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after,.ant-menu-dark .ant-menu-submenu-selected>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after,.ant-menu-dark .ant-menu-submenu-title:hover>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after,.ant-menu-dark .ant-menu-item:hover>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before,.ant-menu-dark .ant-menu-item-active>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before,.ant-menu-dark .ant-menu-submenu-active>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before,.ant-menu-dark .ant-menu-submenu-open>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before,.ant-menu-dark .ant-menu-submenu-selected>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before,.ant-menu-dark .ant-menu-submenu-title:hover>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before{background:#fff}.ant-menu-dark .ant-menu-item:hover{background-color:transparent}.ant-menu-dark.ant-menu-dark:not(.ant-menu-horizontal) .ant-menu-item-selected{background-color:#1890ff}.ant-menu-dark .ant-menu-item-selected{color:#fff;border-right:0}.ant-menu-dark .ant-menu-item-selected:after{border-right:0}.ant-menu-dark .ant-menu-item-selected>a,.ant-menu-dark .ant-menu-item-selected>span>a,.ant-menu-dark .ant-menu-item-selected>a:hover,.ant-menu-dark .ant-menu-item-selected>span>a:hover{color:#fff}.ant-menu-dark .ant-menu-item-selected .ant-menu-item-icon,.ant-menu-dark .ant-menu-item-selected .anticon{color:#fff}.ant-menu-dark .ant-menu-item-selected .ant-menu-item-icon+span,.ant-menu-dark .ant-menu-item-selected .anticon+span{color:#fff}.ant-menu.ant-menu-dark .ant-menu-item-selected,.ant-menu-submenu-popup.ant-menu-dark .ant-menu-item-selected{background-color:#1890ff}.ant-menu-dark .ant-menu-item-disabled,.ant-menu-dark .ant-menu-submenu-disabled,.ant-menu-dark .ant-menu-item-disabled>a,.ant-menu-dark .ant-menu-submenu-disabled>a,.ant-menu-dark .ant-menu-item-disabled>span>a,.ant-menu-dark .ant-menu-submenu-disabled>span>a{color:#ffffff59!important;opacity:.8}.ant-menu-dark .ant-menu-item-disabled>.ant-menu-submenu-title,.ant-menu-dark .ant-menu-submenu-disabled>.ant-menu-submenu-title{color:#ffffff59!important}.ant-menu-dark .ant-menu-item-disabled>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before,.ant-menu-dark .ant-menu-submenu-disabled>.ant-menu-submenu-title>.ant-menu-submenu-arrow:before,.ant-menu-dark .ant-menu-item-disabled>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after,.ant-menu-dark .ant-menu-submenu-disabled>.ant-menu-submenu-title>.ant-menu-submenu-arrow:after{background:rgba(255,255,255,.35)!important}.ant-menu.ant-menu-rtl{direction:rtl;text-align:right}.ant-menu-rtl .ant-menu-item-group-title{text-align:right}.ant-menu-rtl.ant-menu-inline,.ant-menu-rtl.ant-menu-vertical{border-right:none;border-left:1px solid #f0f0f0}.ant-menu-rtl.ant-menu-dark.ant-menu-inline,.ant-menu-rtl.ant-menu-dark.ant-menu-vertical{border-left:none}.ant-menu-rtl.ant-menu-vertical.ant-menu-sub>.ant-menu-item,.ant-menu-rtl.ant-menu-vertical-left.ant-menu-sub>.ant-menu-item,.ant-menu-rtl.ant-menu-vertical-right.ant-menu-sub>.ant-menu-item,.ant-menu-rtl.ant-menu-vertical.ant-menu-sub>.ant-menu-submenu,.ant-menu-rtl.ant-menu-vertical-left.ant-menu-sub>.ant-menu-submenu,.ant-menu-rtl.ant-menu-vertical-right.ant-menu-sub>.ant-menu-submenu{transform-origin:top right}.ant-menu-rtl .ant-menu-item .ant-menu-item-icon,.ant-menu-rtl .ant-menu-submenu-title .ant-menu-item-icon,.ant-menu-rtl .ant-menu-item .anticon,.ant-menu-rtl .ant-menu-submenu-title .anticon{margin-right:auto;margin-left:10px}.ant-menu-rtl .ant-menu-item.ant-menu-item-only-child>.ant-menu-item-icon,.ant-menu-rtl .ant-menu-submenu-title.ant-menu-item-only-child>.ant-menu-item-icon,.ant-menu-rtl .ant-menu-item.ant-menu-item-only-child>.anticon,.ant-menu-rtl .ant-menu-submenu-title.ant-menu-item-only-child>.anticon{margin-left:0}.ant-menu-submenu-rtl.ant-menu-submenu-popup{transform-origin:100% 0}.ant-menu-rtl .ant-menu-submenu-vertical>.ant-menu-submenu-title .ant-menu-submenu-arrow,.ant-menu-rtl .ant-menu-submenu-vertical-left>.ant-menu-submenu-title .ant-menu-submenu-arrow,.ant-menu-rtl .ant-menu-submenu-vertical-right>.ant-menu-submenu-title .ant-menu-submenu-arrow,.ant-menu-rtl .ant-menu-submenu-inline>.ant-menu-submenu-title .ant-menu-submenu-arrow{right:auto;left:16px}.ant-menu-rtl .ant-menu-submenu-vertical>.ant-menu-submenu-title .ant-menu-submenu-arrow:before,.ant-menu-rtl .ant-menu-submenu-vertical-left>.ant-menu-submenu-title .ant-menu-submenu-arrow:before,.ant-menu-rtl .ant-menu-submenu-vertical-right>.ant-menu-submenu-title .ant-menu-submenu-arrow:before{transform:rotate(-45deg) translateY(-2px)}.ant-menu-rtl .ant-menu-submenu-vertical>.ant-menu-submenu-title .ant-menu-submenu-arrow:after,.ant-menu-rtl .ant-menu-submenu-vertical-left>.ant-menu-submenu-title .ant-menu-submenu-arrow:after,.ant-menu-rtl .ant-menu-submenu-vertical-right>.ant-menu-submenu-title .ant-menu-submenu-arrow:after{transform:rotate(45deg) translateY(2px)}.ant-menu-rtl.ant-menu-vertical .ant-menu-item:after,.ant-menu-rtl.ant-menu-vertical-left .ant-menu-item:after,.ant-menu-rtl.ant-menu-vertical-right .ant-menu-item:after,.ant-menu-rtl.ant-menu-inline .ant-menu-item:after{right:auto;left:0}.ant-menu-rtl.ant-menu-vertical .ant-menu-item,.ant-menu-rtl.ant-menu-vertical-left .ant-menu-item,.ant-menu-rtl.ant-menu-vertical-right .ant-menu-item,.ant-menu-rtl.ant-menu-inline .ant-menu-item,.ant-menu-rtl.ant-menu-vertical .ant-menu-submenu-title,.ant-menu-rtl.ant-menu-vertical-left .ant-menu-submenu-title,.ant-menu-rtl.ant-menu-vertical-right .ant-menu-submenu-title,.ant-menu-rtl.ant-menu-inline .ant-menu-submenu-title{text-align:right}.ant-menu-rtl.ant-menu-inline .ant-menu-submenu-title{padding-right:0;padding-left:34px}.ant-menu-rtl.ant-menu-vertical .ant-menu-submenu-title{padding-right:16px;padding-left:34px}.ant-menu-rtl.ant-menu-inline-collapsed.ant-menu-vertical .ant-menu-submenu-title{padding:0 calc(50% - 8px)}.ant-menu-rtl .ant-menu-item-group-list .ant-menu-item,.ant-menu-rtl .ant-menu-item-group-list .ant-menu-submenu-title{padding:0 28px 0 16px}.ant-menu-sub.ant-menu-inline{border:0}.ant-menu-rtl.ant-menu-sub.ant-menu-inline .ant-menu-item-group-title{padding-right:32px;padding-left:0}.ant-tooltip{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:absolute;z-index:1060;display:block;max-width:250px;visibility:visible}.ant-tooltip-hidden{display:none}.ant-tooltip-placement-top,.ant-tooltip-placement-topLeft,.ant-tooltip-placement-topRight{padding-bottom:8px}.ant-tooltip-placement-right,.ant-tooltip-placement-rightTop,.ant-tooltip-placement-rightBottom{padding-left:8px}.ant-tooltip-placement-bottom,.ant-tooltip-placement-bottomLeft,.ant-tooltip-placement-bottomRight{padding-top:8px}.ant-tooltip-placement-left,.ant-tooltip-placement-leftTop,.ant-tooltip-placement-leftBottom{padding-right:8px}.ant-tooltip-inner{min-width:30px;min-height:32px;padding:6px 8px;color:#fff;text-align:left;text-decoration:none;word-wrap:break-word;background-color:#000000bf;border-radius:2px;box-shadow:0 2px 8px #00000026}.ant-tooltip-arrow{position:absolute;display:block;width:13.07106781px;height:13.07106781px;overflow:hidden;background:transparent;pointer-events:none}.ant-tooltip-arrow-content{position:absolute;top:0;right:0;bottom:0;left:0;display:block;width:5px;height:5px;margin:auto;background-color:#000000bf;content:"";pointer-events:auto}.ant-tooltip-placement-top .ant-tooltip-arrow,.ant-tooltip-placement-topLeft .ant-tooltip-arrow,.ant-tooltip-placement-topRight .ant-tooltip-arrow{bottom:-5.07106781px}.ant-tooltip-placement-top .ant-tooltip-arrow-content,.ant-tooltip-placement-topLeft .ant-tooltip-arrow-content,.ant-tooltip-placement-topRight .ant-tooltip-arrow-content{box-shadow:3px 3px 7px #00000012;transform:translateY(-6.53553391px) rotate(45deg)}.ant-tooltip-placement-top .ant-tooltip-arrow{left:50%;transform:translate(-50%)}.ant-tooltip-placement-topLeft .ant-tooltip-arrow{left:13px}.ant-tooltip-placement-topRight .ant-tooltip-arrow{right:13px}.ant-tooltip-placement-right .ant-tooltip-arrow,.ant-tooltip-placement-rightTop .ant-tooltip-arrow,.ant-tooltip-placement-rightBottom .ant-tooltip-arrow{left:-5.07106781px}.ant-tooltip-placement-right .ant-tooltip-arrow-content,.ant-tooltip-placement-rightTop .ant-tooltip-arrow-content,.ant-tooltip-placement-rightBottom .ant-tooltip-arrow-content{box-shadow:-3px 3px 7px #00000012;transform:translate(6.53553391px) rotate(45deg)}.ant-tooltip-placement-right .ant-tooltip-arrow{top:50%;transform:translateY(-50%)}.ant-tooltip-placement-rightTop .ant-tooltip-arrow{top:5px}.ant-tooltip-placement-rightBottom .ant-tooltip-arrow{bottom:5px}.ant-tooltip-placement-left .ant-tooltip-arrow,.ant-tooltip-placement-leftTop .ant-tooltip-arrow,.ant-tooltip-placement-leftBottom .ant-tooltip-arrow{right:-5.07106781px}.ant-tooltip-placement-left .ant-tooltip-arrow-content,.ant-tooltip-placement-leftTop .ant-tooltip-arrow-content,.ant-tooltip-placement-leftBottom .ant-tooltip-arrow-content{box-shadow:3px -3px 7px #00000012;transform:translate(-6.53553391px) rotate(45deg)}.ant-tooltip-placement-left .ant-tooltip-arrow{top:50%;transform:translateY(-50%)}.ant-tooltip-placement-leftTop .ant-tooltip-arrow{top:5px}.ant-tooltip-placement-leftBottom .ant-tooltip-arrow{bottom:5px}.ant-tooltip-placement-bottom .ant-tooltip-arrow,.ant-tooltip-placement-bottomLeft .ant-tooltip-arrow,.ant-tooltip-placement-bottomRight .ant-tooltip-arrow{top:-5.07106781px}.ant-tooltip-placement-bottom .ant-tooltip-arrow-content,.ant-tooltip-placement-bottomLeft .ant-tooltip-arrow-content,.ant-tooltip-placement-bottomRight .ant-tooltip-arrow-content{box-shadow:-3px -3px 7px #00000012;transform:translateY(6.53553391px) rotate(45deg)}.ant-tooltip-placement-bottom .ant-tooltip-arrow{left:50%;transform:translate(-50%)}.ant-tooltip-placement-bottomLeft .ant-tooltip-arrow{left:13px}.ant-tooltip-placement-bottomRight .ant-tooltip-arrow{right:13px}.ant-tooltip-pink .ant-tooltip-inner,.ant-tooltip-pink .ant-tooltip-arrow-content,.ant-tooltip-magenta .ant-tooltip-inner,.ant-tooltip-magenta .ant-tooltip-arrow-content{background-color:#eb2f96}.ant-tooltip-red .ant-tooltip-inner,.ant-tooltip-red .ant-tooltip-arrow-content{background-color:#f5222d}.ant-tooltip-volcano .ant-tooltip-inner,.ant-tooltip-volcano .ant-tooltip-arrow-content{background-color:#fa541c}.ant-tooltip-orange .ant-tooltip-inner,.ant-tooltip-orange .ant-tooltip-arrow-content{background-color:#fa8c16}.ant-tooltip-yellow .ant-tooltip-inner,.ant-tooltip-yellow .ant-tooltip-arrow-content{background-color:#fadb14}.ant-tooltip-gold .ant-tooltip-inner,.ant-tooltip-gold .ant-tooltip-arrow-content{background-color:#faad14}.ant-tooltip-cyan .ant-tooltip-inner,.ant-tooltip-cyan .ant-tooltip-arrow-content{background-color:#13c2c2}.ant-tooltip-lime .ant-tooltip-inner,.ant-tooltip-lime .ant-tooltip-arrow-content{background-color:#a0d911}.ant-tooltip-green .ant-tooltip-inner,.ant-tooltip-green .ant-tooltip-arrow-content{background-color:#52c41a}.ant-tooltip-blue .ant-tooltip-inner,.ant-tooltip-blue .ant-tooltip-arrow-content{background-color:#1890ff}.ant-tooltip-geekblue .ant-tooltip-inner,.ant-tooltip-geekblue .ant-tooltip-arrow-content{background-color:#2f54eb}.ant-tooltip-purple .ant-tooltip-inner,.ant-tooltip-purple .ant-tooltip-arrow-content{background-color:#722ed1}.ant-dropdown-menu-item.ant-dropdown-menu-item-danger{color:#ff4d4f}.ant-dropdown-menu-item.ant-dropdown-menu-item-danger:hover{color:#fff;background-color:#ff4d4f}.ant-dropdown{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:absolute;top:-9999px;left:-9999px;z-index:1050;display:block}.ant-dropdown:before{position:absolute;top:-4px;right:0;bottom:-4px;left:-7px;z-index:-9999;opacity:.0001;content:" "}.ant-dropdown-wrap{position:relative}.ant-dropdown-wrap .ant-btn>.anticon-down{display:inline-block;font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0)}:root .ant-dropdown-wrap .ant-btn>.anticon-down{font-size:12px}.ant-dropdown-wrap .anticon-down:before{transition:transform .2s}.ant-dropdown-wrap-open .anticon-down:before{transform:rotate(180deg)}.ant-dropdown-hidden,.ant-dropdown-menu-hidden{display:none}.ant-dropdown-menu{position:relative;margin:0;padding:4px 0;text-align:left;list-style-type:none;background-color:#fff;background-clip:padding-box;border-radius:2px;outline:none;box-shadow:0 2px 8px #00000026;-webkit-transform:translate3d(0,0,0)}.ant-dropdown-menu-item-group-title{padding:5px 12px;color:#00000073;transition:all .3s}.ant-dropdown-menu-submenu-popup{position:absolute;z-index:1050;background:transparent;box-shadow:none;transform-origin:0 0}.ant-dropdown-menu-submenu-popup>.ant-dropdown-menu{transform-origin:0 0}.ant-dropdown-menu-submenu-popup ul,.ant-dropdown-menu-submenu-popup li{list-style:none}.ant-dropdown-menu-submenu-popup ul{margin-right:.3em;margin-left:.3em}.ant-dropdown-menu-item,.ant-dropdown-menu-submenu-title{clear:both;margin:0;padding:5px 12px;color:#000000d9;font-weight:400;font-size:14px;line-height:22px;white-space:nowrap;cursor:pointer;transition:all .3s}.ant-dropdown-menu-item>.anticon:first-child,.ant-dropdown-menu-submenu-title>.anticon:first-child,.ant-dropdown-menu-item>span>.anticon:first-child,.ant-dropdown-menu-submenu-title>span>.anticon:first-child{min-width:12px;margin-right:8px;font-size:12px}.ant-dropdown-menu-item>a,.ant-dropdown-menu-submenu-title>a{display:block;margin:-5px -12px;padding:5px 12px;color:#000000d9;transition:all .3s}.ant-dropdown-menu-item>a:hover,.ant-dropdown-menu-submenu-title>a:hover{color:#000000d9}.ant-dropdown-menu-item-selected,.ant-dropdown-menu-submenu-title-selected,.ant-dropdown-menu-item-selected>a,.ant-dropdown-menu-submenu-title-selected>a{color:#1890ff;background-color:#e6f7ff}.ant-dropdown-menu-item:hover,.ant-dropdown-menu-submenu-title:hover{background-color:#f5f5f5}.ant-dropdown-menu-item-disabled,.ant-dropdown-menu-submenu-title-disabled{color:#00000040;cursor:not-allowed}.ant-dropdown-menu-item-disabled:hover,.ant-dropdown-menu-submenu-title-disabled:hover{color:#00000040;background-color:#fff;cursor:not-allowed}.ant-dropdown-menu-item-divider,.ant-dropdown-menu-submenu-title-divider{height:1px;margin:4px 0;overflow:hidden;line-height:0;background-color:#f0f0f0}.ant-dropdown-menu-item .ant-dropdown-menu-submenu-arrow,.ant-dropdown-menu-submenu-title .ant-dropdown-menu-submenu-arrow{position:absolute;right:8px}.ant-dropdown-menu-item .ant-dropdown-menu-submenu-arrow-icon,.ant-dropdown-menu-submenu-title .ant-dropdown-menu-submenu-arrow-icon{color:#00000073;font-style:normal;display:inline-block;font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0)}:root .ant-dropdown-menu-item .ant-dropdown-menu-submenu-arrow-icon,:root .ant-dropdown-menu-submenu-title .ant-dropdown-menu-submenu-arrow-icon{font-size:12px}.ant-dropdown-menu-item-group-list{margin:0 8px;padding:0;list-style:none}.ant-dropdown-menu-submenu-title{padding-right:26px}.ant-dropdown-menu-submenu-vertical{position:relative}.ant-dropdown-menu-submenu-vertical>.ant-dropdown-menu{position:absolute;top:0;left:100%;min-width:100%;margin-left:4px;transform-origin:0 0}.ant-dropdown-menu-submenu.ant-dropdown-menu-submenu-disabled .ant-dropdown-menu-submenu-title,.ant-dropdown-menu-submenu.ant-dropdown-menu-submenu-disabled .ant-dropdown-menu-submenu-title .ant-dropdown-menu-submenu-arrow-icon{color:#00000040;background-color:#fff;cursor:not-allowed}.ant-dropdown-menu-submenu-selected .ant-dropdown-menu-submenu-title{color:#1890ff}.ant-dropdown.slide-down-enter.slide-down-enter-active.ant-dropdown-placement-bottomLeft,.ant-dropdown.slide-down-appear.slide-down-appear-active.ant-dropdown-placement-bottomLeft,.ant-dropdown.slide-down-enter.slide-down-enter-active.ant-dropdown-placement-bottomCenter,.ant-dropdown.slide-down-appear.slide-down-appear-active.ant-dropdown-placement-bottomCenter,.ant-dropdown.slide-down-enter.slide-down-enter-active.ant-dropdown-placement-bottomRight,.ant-dropdown.slide-down-appear.slide-down-appear-active.ant-dropdown-placement-bottomRight{-webkit-animation-name:antSlideUpIn;animation-name:antSlideUpIn}.ant-dropdown.slide-up-enter.slide-up-enter-active.ant-dropdown-placement-topLeft,.ant-dropdown.slide-up-appear.slide-up-appear-active.ant-dropdown-placement-topLeft,.ant-dropdown.slide-up-enter.slide-up-enter-active.ant-dropdown-placement-topCenter,.ant-dropdown.slide-up-appear.slide-up-appear-active.ant-dropdown-placement-topCenter,.ant-dropdown.slide-up-enter.slide-up-enter-active.ant-dropdown-placement-topRight,.ant-dropdown.slide-up-appear.slide-up-appear-active.ant-dropdown-placement-topRight{-webkit-animation-name:antSlideDownIn;animation-name:antSlideDownIn}.ant-dropdown.slide-down-leave.slide-down-leave-active.ant-dropdown-placement-bottomLeft,.ant-dropdown.slide-down-leave.slide-down-leave-active.ant-dropdown-placement-bottomCenter,.ant-dropdown.slide-down-leave.slide-down-leave-active.ant-dropdown-placement-bottomRight{-webkit-animation-name:antSlideUpOut;animation-name:antSlideUpOut}.ant-dropdown.slide-up-leave.slide-up-leave-active.ant-dropdown-placement-topLeft,.ant-dropdown.slide-up-leave.slide-up-leave-active.ant-dropdown-placement-topCenter,.ant-dropdown.slide-up-leave.slide-up-leave-active.ant-dropdown-placement-topRight{-webkit-animation-name:antSlideDownOut;animation-name:antSlideDownOut}.ant-dropdown-trigger>.anticon.anticon-down,.ant-dropdown-link>.anticon.anticon-down{display:inline-block;font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0)}:root .ant-dropdown-trigger>.anticon.anticon-down,:root .ant-dropdown-link>.anticon.anticon-down{font-size:12px}.ant-dropdown-button{white-space:nowrap}.ant-dropdown-button.ant-btn-group>.ant-btn:last-child:not(:first-child){padding-right:8px;padding-left:8px}.ant-dropdown-button .anticon.anticon-down{display:inline-block;font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0)}:root .ant-dropdown-button .anticon.anticon-down{font-size:12px}.ant-dropdown-menu-dark,.ant-dropdown-menu-dark .ant-dropdown-menu{background:#001529}.ant-dropdown-menu-dark .ant-dropdown-menu-item,.ant-dropdown-menu-dark .ant-dropdown-menu-submenu-title,.ant-dropdown-menu-dark .ant-dropdown-menu-item>a{color:#ffffffa6}.ant-dropdown-menu-dark .ant-dropdown-menu-item .ant-dropdown-menu-submenu-arrow:after,.ant-dropdown-menu-dark .ant-dropdown-menu-submenu-title .ant-dropdown-menu-submenu-arrow:after,.ant-dropdown-menu-dark .ant-dropdown-menu-item>a .ant-dropdown-menu-submenu-arrow:after{color:#ffffffa6}.ant-dropdown-menu-dark .ant-dropdown-menu-item:hover,.ant-dropdown-menu-dark .ant-dropdown-menu-submenu-title:hover,.ant-dropdown-menu-dark .ant-dropdown-menu-item>a:hover{color:#fff;background:transparent}.ant-dropdown-menu-dark .ant-dropdown-menu-item-selected,.ant-dropdown-menu-dark .ant-dropdown-menu-item-selected:hover,.ant-dropdown-menu-dark .ant-dropdown-menu-item-selected>a{color:#fff;background:#1890ff}.ant-fullcalendar{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";border-top:1px solid #d9d9d9;outline:none}.ant-select.ant-fullcalendar-year-select{min-width:80px}.ant-select.ant-fullcalendar-year-select.ant-select-sm{min-width:70px}.ant-select.ant-fullcalendar-month-select{min-width:80px;margin-left:8px}.ant-select.ant-fullcalendar-month-select.ant-select-sm{min-width:70px}.ant-fullcalendar-header{padding:11px 16px 11px 0;text-align:right}.ant-fullcalendar-header .ant-select-dropdown{text-align:left}.ant-fullcalendar-header .ant-radio-group{margin-left:8px;text-align:left}.ant-fullcalendar-header label.ant-radio-button{height:22px;padding:0 10px;line-height:20px}.ant-fullcalendar-date-panel{position:relative;outline:none}.ant-fullcalendar-calendar-body{padding:8px 12px}.ant-fullcalendar table{width:100%;max-width:100%;height:256px;background-color:transparent;border-collapse:collapse}.ant-fullcalendar table,.ant-fullcalendar th,.ant-fullcalendar td{border:0}.ant-fullcalendar td{position:relative}.ant-fullcalendar-calendar-table{margin-bottom:0;border-spacing:0}.ant-fullcalendar-column-header{width:33px;padding:0;line-height:18px;text-align:center}.ant-fullcalendar-column-header .ant-fullcalendar-column-header-inner{display:block;font-weight:400}.ant-fullcalendar-week-number-header .ant-fullcalendar-column-header-inner{display:none}.ant-fullcalendar-month,.ant-fullcalendar-date{text-align:center;transition:all .3s}.ant-fullcalendar-value{display:block;width:24px;height:24px;margin:0 auto;padding:0;color:#000000d9;line-height:24px;background:transparent;border-radius:2px;transition:all .3s}.ant-fullcalendar-value:hover{background:#f5f5f5;cursor:pointer}.ant-fullcalendar-value:active{color:#fff;background:#1890ff}.ant-fullcalendar-month-panel-cell .ant-fullcalendar-value{width:48px}.ant-fullcalendar-today .ant-fullcalendar-value,.ant-fullcalendar-month-panel-current-cell .ant-fullcalendar-value{box-shadow:0 0 0 1px #1890ff inset}.ant-fullcalendar-selected-day .ant-fullcalendar-value,.ant-fullcalendar-month-panel-selected-cell .ant-fullcalendar-value{color:#fff;background:#1890ff}.ant-fullcalendar-disabled-cell-first-of-row .ant-fullcalendar-value{border-top-left-radius:2px;border-bottom-left-radius:2px}.ant-fullcalendar-disabled-cell-last-of-row .ant-fullcalendar-value{border-top-right-radius:2px;border-bottom-right-radius:2px}.ant-fullcalendar-last-month-cell .ant-fullcalendar-value,.ant-fullcalendar-next-month-btn-day .ant-fullcalendar-value{color:#00000040}.ant-fullcalendar-month-panel-table{width:100%;table-layout:fixed;border-collapse:separate}.ant-fullcalendar-content{position:absolute;bottom:-9px;left:0;width:100%}.ant-fullcalendar-fullscreen{border-top:0}.ant-fullcalendar-fullscreen .ant-fullcalendar-table{table-layout:fixed}.ant-fullcalendar-fullscreen .ant-fullcalendar-header .ant-radio-group{margin-left:16px}.ant-fullcalendar-fullscreen .ant-fullcalendar-header label.ant-radio-button{height:32px;line-height:30px}.ant-fullcalendar-fullscreen .ant-fullcalendar-month,.ant-fullcalendar-fullscreen .ant-fullcalendar-date{display:block;height:116px;margin:0 4px;padding:4px 8px;color:#000000d9;text-align:left;border-top:2px solid #f0f0f0;transition:background .3s}.ant-fullcalendar-fullscreen .ant-fullcalendar-month:hover,.ant-fullcalendar-fullscreen .ant-fullcalendar-date:hover{background:#f5f5f5;cursor:pointer}.ant-fullcalendar-fullscreen .ant-fullcalendar-month:active,.ant-fullcalendar-fullscreen .ant-fullcalendar-date:active{background:#bae7ff}.ant-fullcalendar-fullscreen .ant-fullcalendar-column-header{padding-right:12px;padding-bottom:5px;text-align:right}.ant-fullcalendar-fullscreen .ant-fullcalendar-value{width:auto;text-align:right;background:transparent}.ant-fullcalendar-fullscreen .ant-fullcalendar-today .ant-fullcalendar-value{color:#000000d9}.ant-fullcalendar-fullscreen .ant-fullcalendar-month-panel-current-cell .ant-fullcalendar-month,.ant-fullcalendar-fullscreen .ant-fullcalendar-today .ant-fullcalendar-date{background:transparent;border-top-color:#1890ff}.ant-fullcalendar-fullscreen .ant-fullcalendar-month-panel-current-cell .ant-fullcalendar-value,.ant-fullcalendar-fullscreen .ant-fullcalendar-today .ant-fullcalendar-value{box-shadow:none}.ant-fullcalendar-fullscreen .ant-fullcalendar-month-panel-selected-cell .ant-fullcalendar-month,.ant-fullcalendar-fullscreen .ant-fullcalendar-selected-day .ant-fullcalendar-date{background:#e6f7ff}.ant-fullcalendar-fullscreen .ant-fullcalendar-month-panel-selected-cell .ant-fullcalendar-value,.ant-fullcalendar-fullscreen .ant-fullcalendar-selected-day .ant-fullcalendar-value{color:#1890ff}.ant-fullcalendar-fullscreen .ant-fullcalendar-last-month-cell .ant-fullcalendar-date,.ant-fullcalendar-fullscreen .ant-fullcalendar-next-month-btn-day .ant-fullcalendar-date{color:#00000040}.ant-fullcalendar-fullscreen .ant-fullcalendar-content{position:static;width:auto;height:88px;overflow-y:auto}.ant-fullcalendar-disabled-cell .ant-fullcalendar-date,.ant-fullcalendar-disabled-cell .ant-fullcalendar-date:hover{cursor:not-allowed}.ant-fullcalendar-disabled-cell:not(.ant-fullcalendar-today) .ant-fullcalendar-date,.ant-fullcalendar-disabled-cell:not(.ant-fullcalendar-today) .ant-fullcalendar-date:hover{background:transparent}.ant-fullcalendar-disabled-cell .ant-fullcalendar-value{width:auto;color:#00000040;border-radius:0;cursor:not-allowed}.ant-radio-group{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";display:inline-block}.ant-radio-wrapper{box-sizing:border-box;margin:0 8px 0 0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;white-space:nowrap;cursor:pointer}.ant-radio{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;line-height:1;white-space:nowrap;vertical-align:sub;outline:none;cursor:pointer}.ant-radio-wrapper:hover .ant-radio,.ant-radio:hover .ant-radio-inner,.ant-radio-input:focus+.ant-radio-inner{border-color:#1890ff}.ant-radio-input:focus+.ant-radio-inner{box-shadow:0 0 0 3px #1890ff14}.ant-radio-checked:after{position:absolute;top:0;left:0;width:100%;height:100%;border:1px solid #1890ff;border-radius:50%;visibility:hidden;-webkit-animation:antRadioEffect .36s ease-in-out;animation:antRadioEffect .36s ease-in-out;-webkit-animation-fill-mode:both;animation-fill-mode:both;content:""}.ant-radio:hover:after,.ant-radio-wrapper:hover .ant-radio:after{visibility:visible}.ant-radio-inner{position:relative;top:0;left:0;display:block;width:16px;height:16px;background-color:#fff;border-color:#d9d9d9;border-style:solid;border-width:1px;border-radius:100px;transition:all .3s}.ant-radio-inner:after{position:absolute;top:3px;left:3px;display:table;width:8px;height:8px;background-color:#1890ff;border-top:0;border-left:0;border-radius:8px;transform:scale(0);opacity:0;transition:all .3s cubic-bezier(.78,.14,.15,.86);content:" "}.ant-radio-input{position:absolute;top:0;right:0;bottom:0;left:0;z-index:1;cursor:pointer;opacity:0}.ant-radio-checked .ant-radio-inner{border-color:#1890ff}.ant-radio-checked .ant-radio-inner:after{transform:scale(1);opacity:1;transition:all .3s cubic-bezier(.78,.14,.15,.86)}.ant-radio-disabled .ant-radio-inner{background-color:#f5f5f5;border-color:#d9d9d9!important;cursor:not-allowed}.ant-radio-disabled .ant-radio-inner:after{background-color:#0003}.ant-radio-disabled .ant-radio-input{cursor:not-allowed}.ant-radio-disabled+span{color:#00000040;cursor:not-allowed}span.ant-radio+*{padding-right:8px;padding-left:8px}.ant-radio-button-wrapper{position:relative;display:inline-block;height:32px;margin:0;padding:0 15px;color:#000000d9;line-height:30px;background:#fff;border:1px solid #d9d9d9;border-top-width:1.02px;border-left:0;cursor:pointer;transition:color .3s,background .3s,border-color .3s,box-shadow .3s}.ant-radio-button-wrapper a{color:#000000d9}.ant-radio-button-wrapper>.ant-radio-button{display:block;width:0;height:0;margin-left:0}.ant-radio-group-large .ant-radio-button-wrapper{height:40px;font-size:16px;line-height:38px}.ant-radio-group-small .ant-radio-button-wrapper{height:24px;padding:0 7px;line-height:22px}.ant-radio-button-wrapper:not(:first-child):before{position:absolute;top:-1px;left:-1px;display:block;box-sizing:content-box;width:1px;height:100%;padding:1px 0;background-color:#d9d9d9;transition:background-color .3s;content:""}.ant-radio-button-wrapper:first-child{border-left:1px solid #d9d9d9;border-radius:2px 0 0 2px}.ant-radio-button-wrapper:last-child{border-radius:0 2px 2px 0}.ant-radio-button-wrapper:first-child:last-child{border-radius:2px}.ant-radio-button-wrapper:hover{position:relative;color:#1890ff}.ant-radio-button-wrapper:focus-within{box-shadow:0 0 0 3px #1890ff14}.ant-radio-button-wrapper .ant-radio-inner,.ant-radio-button-wrapper input[type=checkbox],.ant-radio-button-wrapper input[type=radio]{width:0;height:0;opacity:0;pointer-events:none}.ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled){z-index:1;color:#1890ff;background:#fff;border-color:#1890ff}.ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled):before{background-color:#1890ff}.ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled):first-child{border-color:#1890ff}.ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled):hover{color:#40a9ff;border-color:#40a9ff}.ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled):hover:before{background-color:#40a9ff}.ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled):active{color:#096dd9;border-color:#096dd9}.ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled):active:before{background-color:#096dd9}.ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled):focus-within{box-shadow:0 0 0 3px #1890ff14}.ant-radio-group-solid .ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled){color:#fff;background:#1890ff;border-color:#1890ff}.ant-radio-group-solid .ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled):hover{color:#fff;background:#40a9ff;border-color:#40a9ff}.ant-radio-group-solid .ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled):active{color:#fff;background:#096dd9;border-color:#096dd9}.ant-radio-group-solid .ant-radio-button-wrapper-checked:not(.ant-radio-button-wrapper-disabled):focus-within{box-shadow:0 0 0 3px #1890ff14}.ant-radio-button-wrapper-disabled{color:#00000040;background-color:#f5f5f5;border-color:#d9d9d9;cursor:not-allowed}.ant-radio-button-wrapper-disabled:first-child,.ant-radio-button-wrapper-disabled:hover{color:#00000040;background-color:#f5f5f5;border-color:#d9d9d9}.ant-radio-button-wrapper-disabled:first-child{border-left-color:#d9d9d9}.ant-radio-button-wrapper-disabled.ant-radio-button-wrapper-checked{color:#00000040;background-color:#e6e6e6;border-color:#d9d9d9;box-shadow:none}@-webkit-keyframes antRadioEffect{0%{transform:scale(1);opacity:.5}to{transform:scale(1.6);opacity:0}}@keyframes antRadioEffect{0%{transform:scale(1);opacity:.5}to{transform:scale(1.6);opacity:0}}@supports (-moz-appearance: meterbar) and (background-blend-mode: difference,normal){.ant-radio{vertical-align:text-bottom}}.ant-card{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;background:#fff;border-radius:2px;transition:all .3s}.ant-card-hoverable{cursor:pointer;transition:box-shadow .3s border-color .3s}.ant-card-hoverable:hover{border-color:#00000017;box-shadow:0 2px 8px #00000017}.ant-card-bordered{border:1px solid #f0f0f0}.ant-card-head{min-height:48px;margin-bottom:-1px;padding:0 24px;color:#000000d9;font-weight:500;font-size:16px;background:transparent;border-bottom:1px solid #f0f0f0;border-radius:2px 2px 0 0}.ant-card-head:before,.ant-card-head:after{display:table;content:""}.ant-card-head:after{clear:both}.ant-card-head-wrapper{display:flex;align-items:center}.ant-card-head-title{display:inline-block;flex:1;padding:16px 0;overflow:hidden;white-space:nowrap;text-overflow:ellipsis}.ant-card-head .ant-tabs{clear:both;margin-bottom:-17px;color:#000000d9;font-weight:400;font-size:14px}.ant-card-head .ant-tabs-bar{border-bottom:1px solid #f0f0f0}.ant-card-extra{float:right;margin-left:auto;padding:16px 0;color:#000000d9;font-weight:400;font-size:14px}.ant-card-body{padding:24px}.ant-card-body:before,.ant-card-body:after{display:table;content:""}.ant-card-body:after{clear:both}.ant-card-contain-grid:not(.ant-card-loading) .ant-card-body{margin:-1px 0 0 -1px;padding:0}.ant-card-grid{float:left;width:33.33%;padding:24px;border:0;border-radius:0;box-shadow:1px 0 #f0f0f0,0 1px #f0f0f0,1px 1px #f0f0f0,1px 0 #f0f0f0 inset,0 1px #f0f0f0 inset;transition:all .3s}.ant-card-grid-hoverable:hover{position:relative;z-index:1;box-shadow:0 2px 8px #00000026}.ant-card-contain-tabs>.ant-card-head .ant-card-head-title{min-height:32px;padding-bottom:0}.ant-card-contain-tabs>.ant-card-head .ant-card-extra{padding-bottom:0}.ant-card-cover>*{display:block;width:100%}.ant-card-cover img{border-radius:2px 2px 0 0}.ant-card-actions{margin:0;padding:0;list-style:none;background:#fafafa;border-top:1px solid #f0f0f0}.ant-card-actions:before,.ant-card-actions:after{display:table;content:""}.ant-card-actions:after{clear:both}.ant-card-actions>li{float:left;margin:12px 0;color:#00000073;text-align:center}.ant-card-actions>li>span{position:relative;display:block;min-width:32px;font-size:14px;line-height:22px;cursor:pointer}.ant-card-actions>li>span:hover{color:#1890ff;transition:color .3s}.ant-card-actions>li>span a:not(.ant-btn),.ant-card-actions>li>span>.anticon{display:inline-block;width:100%;color:#00000073;line-height:22px;transition:color .3s}.ant-card-actions>li>span a:not(.ant-btn):hover,.ant-card-actions>li>span>.anticon:hover{color:#1890ff}.ant-card-actions>li>span>.anticon{font-size:16px;line-height:22px}.ant-card-actions>li:not(:last-child){border-right:1px solid #f0f0f0}.ant-card-type-inner .ant-card-head{padding:0 24px;background:#fafafa}.ant-card-type-inner .ant-card-head-title{padding:12px 0;font-size:14px}.ant-card-type-inner .ant-card-body{padding:16px 24px}.ant-card-type-inner .ant-card-extra{padding:13.5px 0}.ant-card-meta{margin:-4px 0}.ant-card-meta:before,.ant-card-meta:after{display:table;content:""}.ant-card-meta:after{clear:both}.ant-card-meta-avatar{float:left;padding-right:16px}.ant-card-meta-detail{overflow:hidden}.ant-card-meta-detail>div:not(:last-child){margin-bottom:8px}.ant-card-meta-title{overflow:hidden;color:#000000d9;font-weight:500;font-size:16px;white-space:nowrap;text-overflow:ellipsis}.ant-card-meta-description{color:#00000073}.ant-card-loading{overflow:hidden}.ant-card-loading .ant-card-body{-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-card-loading-content p{margin:0}.ant-card-loading-block{height:14px;margin:4px 0;background:linear-gradient(90deg,rgba(207,216,220,.2),rgba(207,216,220,.4),rgba(207,216,220,.2));background-size:600% 600%;border-radius:2px;-webkit-animation:card-loading 1.4s ease infinite;animation:card-loading 1.4s ease infinite}@-webkit-keyframes card-loading{0%,to{background-position:0 50%}50%{background-position:100% 50%}}@keyframes card-loading{0%,to{background-position:0 50%}50%{background-position:100% 50%}}.ant-card-small>.ant-card-head{min-height:36px;padding:0 12px;font-size:14px}.ant-card-small>.ant-card-head>.ant-card-head-wrapper>.ant-card-head-title{padding:8px 0}.ant-card-small>.ant-card-head>.ant-card-head-wrapper>.ant-card-extra{padding:8px 0;font-size:14px}.ant-card-small>.ant-card-body{padding:12px}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-nav-container{height:40px}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-ink-bar{visibility:hidden}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-tab{height:40px;margin:0 2px 0 0;padding:0 16px;line-height:38px;background:#fafafa;border:1px solid #f0f0f0;border-radius:2px 2px 0 0;transition:all .3s cubic-bezier(.645,.045,.355,1)}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-tab-active{height:40px;color:#1890ff;background:#fff;border-color:#f0f0f0;border-bottom:1px solid #fff}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-tab-active:before{border-top:2px solid transparent}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-tab-disabled{color:#1890ff;color:#00000040}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-tab-inactive{padding:0}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-nav-wrap{margin-bottom:0}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-tab .ant-tabs-close-x{width:16px;height:16px;height:14px;margin-right:-5px;margin-left:3px;overflow:hidden;color:#00000073;font-size:12px;vertical-align:middle;transition:all .3s}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-tab .ant-tabs-close-x:hover{color:#000000d9}.ant-tabs.ant-tabs-card .ant-tabs-card-content>.ant-tabs-tabpane,.ant-tabs.ant-tabs-editable-card .ant-tabs-card-content>.ant-tabs-tabpane{transition:none!important}.ant-tabs.ant-tabs-card .ant-tabs-card-content>.ant-tabs-tabpane-inactive,.ant-tabs.ant-tabs-editable-card .ant-tabs-card-content>.ant-tabs-tabpane-inactive{overflow:hidden}.ant-tabs.ant-tabs-card .ant-tabs-card-bar .ant-tabs-tab:hover .anticon-close{opacity:1}.ant-tabs-extra-content{line-height:46.001px}.ant-tabs-extra-content .ant-tabs-new-tab{position:relative;width:20px;height:20px;color:#000000d9;font-size:12px;line-height:20px;text-align:center;border:1px solid #f0f0f0;border-radius:2px;cursor:pointer;transition:all .3s}.ant-tabs-extra-content .ant-tabs-new-tab:hover{color:#1890ff;border-color:#1890ff}.ant-tabs-extra-content .ant-tabs-new-tab svg{position:absolute;top:0;right:0;bottom:0;left:0;margin:auto}.ant-tabs.ant-tabs-large .ant-tabs-extra-content{line-height:57.144px}.ant-tabs.ant-tabs-small .ant-tabs-extra-content{line-height:38.001px}.ant-tabs.ant-tabs-card .ant-tabs-extra-content{line-height:40px}.ant-tabs-vertical.ant-tabs-card .ant-tabs-card-bar.ant-tabs-left-bar .ant-tabs-nav-container,.ant-tabs-vertical.ant-tabs-card .ant-tabs-card-bar.ant-tabs-right-bar .ant-tabs-nav-container{height:100%}.ant-tabs-vertical.ant-tabs-card .ant-tabs-card-bar.ant-tabs-left-bar .ant-tabs-tab,.ant-tabs-vertical.ant-tabs-card .ant-tabs-card-bar.ant-tabs-right-bar .ant-tabs-tab{margin-bottom:8px;border-bottom:1px solid #f0f0f0}.ant-tabs-vertical.ant-tabs-card .ant-tabs-card-bar.ant-tabs-left-bar .ant-tabs-tab-active,.ant-tabs-vertical.ant-tabs-card .ant-tabs-card-bar.ant-tabs-right-bar .ant-tabs-tab-active{padding-bottom:4px}.ant-tabs-vertical.ant-tabs-card .ant-tabs-card-bar.ant-tabs-left-bar .ant-tabs-tab:last-child,.ant-tabs-vertical.ant-tabs-card .ant-tabs-card-bar.ant-tabs-right-bar .ant-tabs-tab:last-child{margin-bottom:8px}.ant-tabs-vertical.ant-tabs-card .ant-tabs-card-bar.ant-tabs-left-bar .ant-tabs-new-tab,.ant-tabs-vertical.ant-tabs-card .ant-tabs-card-bar.ant-tabs-right-bar .ant-tabs-new-tab{width:90%}.ant-tabs-vertical.ant-tabs-card.ant-tabs-left .ant-tabs-card-bar.ant-tabs-left-bar .ant-tabs-nav-wrap{margin-right:0}.ant-tabs-vertical.ant-tabs-card.ant-tabs-left .ant-tabs-card-bar.ant-tabs-left-bar .ant-tabs-tab{margin-right:1px;border-right:0;border-radius:2px 0 0 2px}.ant-tabs-vertical.ant-tabs-card.ant-tabs-left .ant-tabs-card-bar.ant-tabs-left-bar .ant-tabs-tab-active{margin-right:-1px;padding-right:18px}.ant-tabs-vertical.ant-tabs-card.ant-tabs-right .ant-tabs-card-bar.ant-tabs-right-bar .ant-tabs-nav-wrap{margin-left:0}.ant-tabs-vertical.ant-tabs-card.ant-tabs-right .ant-tabs-card-bar.ant-tabs-right-bar .ant-tabs-tab{margin-left:1px;border-left:0;border-radius:0 2px 2px 0}.ant-tabs-vertical.ant-tabs-card.ant-tabs-right .ant-tabs-card-bar.ant-tabs-right-bar .ant-tabs-tab-active{margin-left:-1px;padding-left:18px}.ant-tabs .ant-tabs-card-bar.ant-tabs-bottom-bar .ant-tabs-tab{height:auto;border-top:0;border-bottom:1px solid #f0f0f0;border-radius:0 0 2px 2px}.ant-tabs .ant-tabs-card-bar.ant-tabs-bottom-bar .ant-tabs-tab-active{padding-top:1px;padding-bottom:0;color:#1890ff}.ant-tabs{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;overflow:hidden}.ant-tabs:before,.ant-tabs:after{display:table;content:""}.ant-tabs:after{clear:both}.ant-tabs-ink-bar{position:absolute;bottom:1px;left:0;z-index:1;box-sizing:border-box;width:0;height:2px;background-color:#1890ff;transform-origin:0 0}.ant-tabs-bar{margin:0 0 16px;border-bottom:1px solid #f0f0f0;outline:none;transition:padding .3s cubic-bezier(.645,.045,.355,1)}.ant-tabs-nav-container{position:relative;box-sizing:border-box;margin-bottom:-1px;overflow:hidden;font-size:14px;line-height:1.5715;white-space:nowrap;transition:padding .3s cubic-bezier(.645,.045,.355,1)}.ant-tabs-nav-container:before,.ant-tabs-nav-container:after{display:table;content:""}.ant-tabs-nav-container:after{clear:both}.ant-tabs-nav-container-scrolling{padding-right:32px;padding-left:32px}.ant-tabs-bottom .ant-tabs-bottom-bar{margin-top:16px;margin-bottom:0;border-top:1px solid #f0f0f0;border-bottom:none}.ant-tabs-bottom .ant-tabs-bottom-bar .ant-tabs-ink-bar{top:1px;bottom:auto}.ant-tabs-bottom .ant-tabs-bottom-bar .ant-tabs-nav-container{margin-top:-1px;margin-bottom:0}.ant-tabs-tab-prev,.ant-tabs-tab-next{position:absolute;z-index:2;width:0;height:100%;color:#00000073;text-align:center;background-color:transparent;border:0;cursor:pointer;opacity:0;transition:width .3s cubic-bezier(.645,.045,.355,1),opacity .3s cubic-bezier(.645,.045,.355,1),color .3s cubic-bezier(.645,.045,.355,1);-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;pointer-events:none}.ant-tabs-tab-prev.ant-tabs-tab-arrow-show,.ant-tabs-tab-next.ant-tabs-tab-arrow-show{width:32px;height:100%;opacity:1;pointer-events:auto}.ant-tabs-tab-prev:hover,.ant-tabs-tab-next:hover{color:#000000d9}.ant-tabs-tab-prev-icon,.ant-tabs-tab-next-icon{position:absolute;top:50%;left:50%;font-weight:700;font-style:normal;font-variant:normal;line-height:inherit;text-align:center;text-transform:none;transform:translate(-50%,-50%)}.ant-tabs-tab-prev-icon-target,.ant-tabs-tab-next-icon-target{display:block;display:inline-block;font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0)}:root .ant-tabs-tab-prev-icon-target,:root .ant-tabs-tab-next-icon-target{font-size:12px}.ant-tabs-tab-btn-disabled{cursor:not-allowed}.ant-tabs-tab-btn-disabled,.ant-tabs-tab-btn-disabled:hover{color:#00000040}.ant-tabs-tab-next{right:2px}.ant-tabs-tab-prev{left:0}:root .ant-tabs-tab-prev{filter:none}.ant-tabs-nav-wrap{margin-bottom:-1px;overflow:hidden}.ant-tabs-nav-scroll{overflow:hidden;white-space:nowrap}.ant-tabs-nav{position:relative;display:inline-block;box-sizing:border-box;margin:0;padding-left:0;list-style:none;transition:transform .3s cubic-bezier(.645,.045,.355,1)}.ant-tabs-nav:before,.ant-tabs-nav:after{display:table;content:" "}.ant-tabs-nav:after{clear:both}.ant-tabs-nav .ant-tabs-tab{position:relative;display:inline-block;box-sizing:border-box;height:100%;margin:0 32px 0 0;padding:12px 16px;text-decoration:none;cursor:pointer;transition:color .3s cubic-bezier(.645,.045,.355,1)}.ant-tabs-nav .ant-tabs-tab:before{position:absolute;top:-1px;left:0;width:100%;border-top:2px solid transparent;border-radius:2px 2px 0 0;transition:all .3s;content:"";pointer-events:none}.ant-tabs-nav .ant-tabs-tab:last-child{margin-right:0}.ant-tabs-nav .ant-tabs-tab:hover{color:#40a9ff}.ant-tabs-nav .ant-tabs-tab:active{color:#096dd9}.ant-tabs-nav .ant-tabs-tab .anticon{margin-right:8px}.ant-tabs-nav .ant-tabs-tab-active{color:#1890ff;text-shadow:0 0 .25px currentColor}.ant-tabs-nav .ant-tabs-tab-disabled,.ant-tabs-nav .ant-tabs-tab-disabled:hover{color:#00000040;cursor:not-allowed}.ant-tabs .ant-tabs-large-bar .ant-tabs-nav-container{font-size:16px}.ant-tabs .ant-tabs-large-bar .ant-tabs-tab{padding:16px}.ant-tabs .ant-tabs-small-bar .ant-tabs-nav-container{font-size:14px}.ant-tabs .ant-tabs-small-bar .ant-tabs-tab{padding:8px 16px}.ant-tabs .ant-tabs-centered-bar .ant-tabs-nav-wrap{text-align:center}.ant-tabs-content:before{display:block;overflow:hidden;content:""}.ant-tabs .ant-tabs-top-content,.ant-tabs .ant-tabs-bottom-content{width:100%}.ant-tabs .ant-tabs-top-content>.ant-tabs-tabpane,.ant-tabs .ant-tabs-bottom-content>.ant-tabs-tabpane{flex-shrink:0;width:100%;-webkit-backface-visibility:hidden;opacity:1;transition:opacity .45s}.ant-tabs .ant-tabs-top-content>.ant-tabs-tabpane-inactive,.ant-tabs .ant-tabs-bottom-content>.ant-tabs-tabpane-inactive{height:0;padding:0!important;overflow:hidden;opacity:0;pointer-events:none}.ant-tabs .ant-tabs-top-content>.ant-tabs-tabpane-inactive input,.ant-tabs .ant-tabs-bottom-content>.ant-tabs-tabpane-inactive input{visibility:hidden}.ant-tabs .ant-tabs-top-content.ant-tabs-content-animated,.ant-tabs .ant-tabs-bottom-content.ant-tabs-content-animated{display:flex;flex-direction:row;transition:margin-left .3s cubic-bezier(.645,.045,.355,1);will-change:margin-left}.ant-tabs .ant-tabs-left-bar,.ant-tabs .ant-tabs-right-bar{height:100%;border-bottom:0}.ant-tabs .ant-tabs-left-bar .ant-tabs-tab-arrow-show,.ant-tabs .ant-tabs-right-bar .ant-tabs-tab-arrow-show{width:100%;height:32px}.ant-tabs .ant-tabs-left-bar .ant-tabs-tab,.ant-tabs .ant-tabs-right-bar .ant-tabs-tab{display:block;float:none;margin:0 0 16px;padding:8px 24px}.ant-tabs .ant-tabs-left-bar .ant-tabs-tab:last-child,.ant-tabs .ant-tabs-right-bar .ant-tabs-tab:last-child{margin-bottom:0}.ant-tabs .ant-tabs-left-bar .ant-tabs-extra-content,.ant-tabs .ant-tabs-right-bar .ant-tabs-extra-content{text-align:center}.ant-tabs .ant-tabs-left-bar .ant-tabs-nav-scroll,.ant-tabs .ant-tabs-right-bar .ant-tabs-nav-scroll{width:auto}.ant-tabs .ant-tabs-left-bar .ant-tabs-nav-container,.ant-tabs .ant-tabs-right-bar .ant-tabs-nav-container,.ant-tabs .ant-tabs-left-bar .ant-tabs-nav-wrap,.ant-tabs .ant-tabs-right-bar .ant-tabs-nav-wrap{height:100%}.ant-tabs .ant-tabs-left-bar .ant-tabs-nav-container,.ant-tabs .ant-tabs-right-bar .ant-tabs-nav-container{margin-bottom:0}.ant-tabs .ant-tabs-left-bar .ant-tabs-nav-container.ant-tabs-nav-container-scrolling,.ant-tabs .ant-tabs-right-bar .ant-tabs-nav-container.ant-tabs-nav-container-scrolling{padding:32px 0}.ant-tabs .ant-tabs-left-bar .ant-tabs-nav-wrap,.ant-tabs .ant-tabs-right-bar .ant-tabs-nav-wrap{margin-bottom:0}.ant-tabs .ant-tabs-left-bar .ant-tabs-nav,.ant-tabs .ant-tabs-right-bar .ant-tabs-nav{width:100%}.ant-tabs .ant-tabs-left-bar .ant-tabs-ink-bar,.ant-tabs .ant-tabs-right-bar .ant-tabs-ink-bar{top:0;bottom:auto;left:auto;width:2px;height:0}.ant-tabs .ant-tabs-left-bar .ant-tabs-tab-next,.ant-tabs .ant-tabs-right-bar .ant-tabs-tab-next{right:0;bottom:0;width:100%;height:32px}.ant-tabs .ant-tabs-left-bar .ant-tabs-tab-prev,.ant-tabs .ant-tabs-right-bar .ant-tabs-tab-prev{top:0;width:100%;height:32px}.ant-tabs .ant-tabs-left-content,.ant-tabs .ant-tabs-right-content{width:auto;margin-top:0!important;overflow:hidden}.ant-tabs .ant-tabs-left-bar{float:left;margin-right:-1px;margin-bottom:0;border-right:1px solid #f0f0f0}.ant-tabs .ant-tabs-left-bar .ant-tabs-tab{text-align:right}.ant-tabs .ant-tabs-left-bar .ant-tabs-nav-container,.ant-tabs .ant-tabs-left-bar .ant-tabs-nav-wrap{margin-right:-1px}.ant-tabs .ant-tabs-left-bar .ant-tabs-ink-bar{right:1px}.ant-tabs .ant-tabs-left-content{padding-left:24px;border-left:1px solid #f0f0f0}.ant-tabs .ant-tabs-right-bar{float:right;margin-bottom:0;margin-left:-1px;border-left:1px solid #f0f0f0}.ant-tabs .ant-tabs-right-bar .ant-tabs-nav-container,.ant-tabs .ant-tabs-right-bar .ant-tabs-nav-wrap{margin-left:-1px}.ant-tabs .ant-tabs-right-bar .ant-tabs-ink-bar{left:1px}.ant-tabs .ant-tabs-right-content{padding-right:24px;border-right:1px solid #f0f0f0}.ant-tabs-top .ant-tabs-ink-bar-animated,.ant-tabs-bottom .ant-tabs-ink-bar-animated{transition:transform .3s cubic-bezier(.645,.045,.355,1),width .2s cubic-bezier(.645,.045,.355,1),left .3s cubic-bezier(.645,.045,.355,1)}.ant-tabs-left .ant-tabs-ink-bar-animated,.ant-tabs-right .ant-tabs-ink-bar-animated{transition:transform .3s cubic-bezier(.645,.045,.355,1),height .2s cubic-bezier(.645,.045,.355,1),top .3s cubic-bezier(.645,.045,.355,1)}.no-flex>.ant-tabs-content>.ant-tabs-content-animated,.ant-tabs-no-animation>.ant-tabs-content>.ant-tabs-content-animated{margin-left:0!important;transform:none!important}.no-flex>.ant-tabs-content>.ant-tabs-tabpane-inactive,.ant-tabs-no-animation>.ant-tabs-content>.ant-tabs-tabpane-inactive{height:0;padding:0!important;overflow:hidden;opacity:0;pointer-events:none}.no-flex>.ant-tabs-content>.ant-tabs-tabpane-inactive input,.ant-tabs-no-animation>.ant-tabs-content>.ant-tabs-tabpane-inactive input{visibility:hidden}.ant-tabs-left-content>.ant-tabs-content-animated,.ant-tabs-right-content>.ant-tabs-content-animated{margin-left:0!important;transform:none!important}.ant-tabs-left-content>.ant-tabs-tabpane-inactive,.ant-tabs-right-content>.ant-tabs-tabpane-inactive{height:0;padding:0!important;overflow:hidden;opacity:0;pointer-events:none}.ant-tabs-left-content>.ant-tabs-tabpane-inactive input,.ant-tabs-right-content>.ant-tabs-tabpane-inactive input{visibility:hidden}.ant-row{display:flex;flex-flow:row wrap}.ant-row:before,.ant-row:after{display:flex}.ant-row-no-wrap{flex-wrap:nowrap}.ant-row-start{justify-content:flex-start}.ant-row-center{justify-content:center}.ant-row-end{justify-content:flex-end}.ant-row-space-between{justify-content:space-between}.ant-row-space-around{justify-content:space-around}.ant-row-top{align-items:flex-start}.ant-row-middle{align-items:center}.ant-row-bottom{align-items:flex-end}.ant-col{position:relative;max-width:100%;min-height:1px}.ant-col-24{display:block;flex:0 0 100%;max-width:100%}.ant-col-push-24{left:100%}.ant-col-pull-24{right:100%}.ant-col-offset-24{margin-left:100%}.ant-col-order-24{order:24}.ant-col-23{display:block;flex:0 0 95.83333333%;max-width:95.83333333%}.ant-col-push-23{left:95.83333333%}.ant-col-pull-23{right:95.83333333%}.ant-col-offset-23{margin-left:95.83333333%}.ant-col-order-23{order:23}.ant-col-22{display:block;flex:0 0 91.66666667%;max-width:91.66666667%}.ant-col-push-22{left:91.66666667%}.ant-col-pull-22{right:91.66666667%}.ant-col-offset-22{margin-left:91.66666667%}.ant-col-order-22{order:22}.ant-col-21{display:block;flex:0 0 87.5%;max-width:87.5%}.ant-col-push-21{left:87.5%}.ant-col-pull-21{right:87.5%}.ant-col-offset-21{margin-left:87.5%}.ant-col-order-21{order:21}.ant-col-20{display:block;flex:0 0 83.33333333%;max-width:83.33333333%}.ant-col-push-20{left:83.33333333%}.ant-col-pull-20{right:83.33333333%}.ant-col-offset-20{margin-left:83.33333333%}.ant-col-order-20{order:20}.ant-col-19{display:block;flex:0 0 79.16666667%;max-width:79.16666667%}.ant-col-push-19{left:79.16666667%}.ant-col-pull-19{right:79.16666667%}.ant-col-offset-19{margin-left:79.16666667%}.ant-col-order-19{order:19}.ant-col-18{display:block;flex:0 0 75%;max-width:75%}.ant-col-push-18{left:75%}.ant-col-pull-18{right:75%}.ant-col-offset-18{margin-left:75%}.ant-col-order-18{order:18}.ant-col-17{display:block;flex:0 0 70.83333333%;max-width:70.83333333%}.ant-col-push-17{left:70.83333333%}.ant-col-pull-17{right:70.83333333%}.ant-col-offset-17{margin-left:70.83333333%}.ant-col-order-17{order:17}.ant-col-16{display:block;flex:0 0 66.66666667%;max-width:66.66666667%}.ant-col-push-16{left:66.66666667%}.ant-col-pull-16{right:66.66666667%}.ant-col-offset-16{margin-left:66.66666667%}.ant-col-order-16{order:16}.ant-col-15{display:block;flex:0 0 62.5%;max-width:62.5%}.ant-col-push-15{left:62.5%}.ant-col-pull-15{right:62.5%}.ant-col-offset-15{margin-left:62.5%}.ant-col-order-15{order:15}.ant-col-14{display:block;flex:0 0 58.33333333%;max-width:58.33333333%}.ant-col-push-14{left:58.33333333%}.ant-col-pull-14{right:58.33333333%}.ant-col-offset-14{margin-left:58.33333333%}.ant-col-order-14{order:14}.ant-col-13{display:block;flex:0 0 54.16666667%;max-width:54.16666667%}.ant-col-push-13{left:54.16666667%}.ant-col-pull-13{right:54.16666667%}.ant-col-offset-13{margin-left:54.16666667%}.ant-col-order-13{order:13}.ant-col-12{display:block;flex:0 0 50%;max-width:50%}.ant-col-push-12{left:50%}.ant-col-pull-12{right:50%}.ant-col-offset-12{margin-left:50%}.ant-col-order-12{order:12}.ant-col-11{display:block;flex:0 0 45.83333333%;max-width:45.83333333%}.ant-col-push-11{left:45.83333333%}.ant-col-pull-11{right:45.83333333%}.ant-col-offset-11{margin-left:45.83333333%}.ant-col-order-11{order:11}.ant-col-10{display:block;flex:0 0 41.66666667%;max-width:41.66666667%}.ant-col-push-10{left:41.66666667%}.ant-col-pull-10{right:41.66666667%}.ant-col-offset-10{margin-left:41.66666667%}.ant-col-order-10{order:10}.ant-col-9{display:block;flex:0 0 37.5%;max-width:37.5%}.ant-col-push-9{left:37.5%}.ant-col-pull-9{right:37.5%}.ant-col-offset-9{margin-left:37.5%}.ant-col-order-9{order:9}.ant-col-8{display:block;flex:0 0 33.33333333%;max-width:33.33333333%}.ant-col-push-8{left:33.33333333%}.ant-col-pull-8{right:33.33333333%}.ant-col-offset-8{margin-left:33.33333333%}.ant-col-order-8{order:8}.ant-col-7{display:block;flex:0 0 29.16666667%;max-width:29.16666667%}.ant-col-push-7{left:29.16666667%}.ant-col-pull-7{right:29.16666667%}.ant-col-offset-7{margin-left:29.16666667%}.ant-col-order-7{order:7}.ant-col-6{display:block;flex:0 0 25%;max-width:25%}.ant-col-push-6{left:25%}.ant-col-pull-6{right:25%}.ant-col-offset-6{margin-left:25%}.ant-col-order-6{order:6}.ant-col-5{display:block;flex:0 0 20.83333333%;max-width:20.83333333%}.ant-col-push-5{left:20.83333333%}.ant-col-pull-5{right:20.83333333%}.ant-col-offset-5{margin-left:20.83333333%}.ant-col-order-5{order:5}.ant-col-4{display:block;flex:0 0 16.66666667%;max-width:16.66666667%}.ant-col-push-4{left:16.66666667%}.ant-col-pull-4{right:16.66666667%}.ant-col-offset-4{margin-left:16.66666667%}.ant-col-order-4{order:4}.ant-col-3{display:block;flex:0 0 12.5%;max-width:12.5%}.ant-col-push-3{left:12.5%}.ant-col-pull-3{right:12.5%}.ant-col-offset-3{margin-left:12.5%}.ant-col-order-3{order:3}.ant-col-2{display:block;flex:0 0 8.33333333%;max-width:8.33333333%}.ant-col-push-2{left:8.33333333%}.ant-col-pull-2{right:8.33333333%}.ant-col-offset-2{margin-left:8.33333333%}.ant-col-order-2{order:2}.ant-col-1{display:block;flex:0 0 4.16666667%;max-width:4.16666667%}.ant-col-push-1{left:4.16666667%}.ant-col-pull-1{right:4.16666667%}.ant-col-offset-1{margin-left:4.16666667%}.ant-col-order-1{order:1}.ant-col-0{display:none}.ant-col-push-0{left:auto}.ant-col-pull-0{right:auto}.ant-col-offset-0{margin-left:0}.ant-col-order-0{order:0}.ant-col-push-0.ant-col-rtl{right:auto}.ant-col-pull-0.ant-col-rtl{left:auto}.ant-col-offset-0.ant-col-rtl{margin-right:0}.ant-col-push-1.ant-col-rtl{right:4.16666667%;left:auto}.ant-col-pull-1.ant-col-rtl{right:auto;left:4.16666667%}.ant-col-offset-1.ant-col-rtl{margin-right:4.16666667%;margin-left:0}.ant-col-push-2.ant-col-rtl{right:8.33333333%;left:auto}.ant-col-pull-2.ant-col-rtl{right:auto;left:8.33333333%}.ant-col-offset-2.ant-col-rtl{margin-right:8.33333333%;margin-left:0}.ant-col-push-3.ant-col-rtl{right:12.5%;left:auto}.ant-col-pull-3.ant-col-rtl{right:auto;left:12.5%}.ant-col-offset-3.ant-col-rtl{margin-right:12.5%;margin-left:0}.ant-col-push-4.ant-col-rtl{right:16.66666667%;left:auto}.ant-col-pull-4.ant-col-rtl{right:auto;left:16.66666667%}.ant-col-offset-4.ant-col-rtl{margin-right:16.66666667%;margin-left:0}.ant-col-push-5.ant-col-rtl{right:20.83333333%;left:auto}.ant-col-pull-5.ant-col-rtl{right:auto;left:20.83333333%}.ant-col-offset-5.ant-col-rtl{margin-right:20.83333333%;margin-left:0}.ant-col-push-6.ant-col-rtl{right:25%;left:auto}.ant-col-pull-6.ant-col-rtl{right:auto;left:25%}.ant-col-offset-6.ant-col-rtl{margin-right:25%;margin-left:0}.ant-col-push-7.ant-col-rtl{right:29.16666667%;left:auto}.ant-col-pull-7.ant-col-rtl{right:auto;left:29.16666667%}.ant-col-offset-7.ant-col-rtl{margin-right:29.16666667%;margin-left:0}.ant-col-push-8.ant-col-rtl{right:33.33333333%;left:auto}.ant-col-pull-8.ant-col-rtl{right:auto;left:33.33333333%}.ant-col-offset-8.ant-col-rtl{margin-right:33.33333333%;margin-left:0}.ant-col-push-9.ant-col-rtl{right:37.5%;left:auto}.ant-col-pull-9.ant-col-rtl{right:auto;left:37.5%}.ant-col-offset-9.ant-col-rtl{margin-right:37.5%;margin-left:0}.ant-col-push-10.ant-col-rtl{right:41.66666667%;left:auto}.ant-col-pull-10.ant-col-rtl{right:auto;left:41.66666667%}.ant-col-offset-10.ant-col-rtl{margin-right:41.66666667%;margin-left:0}.ant-col-push-11.ant-col-rtl{right:45.83333333%;left:auto}.ant-col-pull-11.ant-col-rtl{right:auto;left:45.83333333%}.ant-col-offset-11.ant-col-rtl{margin-right:45.83333333%;margin-left:0}.ant-col-push-12.ant-col-rtl{right:50%;left:auto}.ant-col-pull-12.ant-col-rtl{right:auto;left:50%}.ant-col-offset-12.ant-col-rtl{margin-right:50%;margin-left:0}.ant-col-push-13.ant-col-rtl{right:54.16666667%;left:auto}.ant-col-pull-13.ant-col-rtl{right:auto;left:54.16666667%}.ant-col-offset-13.ant-col-rtl{margin-right:54.16666667%;margin-left:0}.ant-col-push-14.ant-col-rtl{right:58.33333333%;left:auto}.ant-col-pull-14.ant-col-rtl{right:auto;left:58.33333333%}.ant-col-offset-14.ant-col-rtl{margin-right:58.33333333%;margin-left:0}.ant-col-push-15.ant-col-rtl{right:62.5%;left:auto}.ant-col-pull-15.ant-col-rtl{right:auto;left:62.5%}.ant-col-offset-15.ant-col-rtl{margin-right:62.5%;margin-left:0}.ant-col-push-16.ant-col-rtl{right:66.66666667%;left:auto}.ant-col-pull-16.ant-col-rtl{right:auto;left:66.66666667%}.ant-col-offset-16.ant-col-rtl{margin-right:66.66666667%;margin-left:0}.ant-col-push-17.ant-col-rtl{right:70.83333333%;left:auto}.ant-col-pull-17.ant-col-rtl{right:auto;left:70.83333333%}.ant-col-offset-17.ant-col-rtl{margin-right:70.83333333%;margin-left:0}.ant-col-push-18.ant-col-rtl{right:75%;left:auto}.ant-col-pull-18.ant-col-rtl{right:auto;left:75%}.ant-col-offset-18.ant-col-rtl{margin-right:75%;margin-left:0}.ant-col-push-19.ant-col-rtl{right:79.16666667%;left:auto}.ant-col-pull-19.ant-col-rtl{right:auto;left:79.16666667%}.ant-col-offset-19.ant-col-rtl{margin-right:79.16666667%;margin-left:0}.ant-col-push-20.ant-col-rtl{right:83.33333333%;left:auto}.ant-col-pull-20.ant-col-rtl{right:auto;left:83.33333333%}.ant-col-offset-20.ant-col-rtl{margin-right:83.33333333%;margin-left:0}.ant-col-push-21.ant-col-rtl{right:87.5%;left:auto}.ant-col-pull-21.ant-col-rtl{right:auto;left:87.5%}.ant-col-offset-21.ant-col-rtl{margin-right:87.5%;margin-left:0}.ant-col-push-22.ant-col-rtl{right:91.66666667%;left:auto}.ant-col-pull-22.ant-col-rtl{right:auto;left:91.66666667%}.ant-col-offset-22.ant-col-rtl{margin-right:91.66666667%;margin-left:0}.ant-col-push-23.ant-col-rtl{right:95.83333333%;left:auto}.ant-col-pull-23.ant-col-rtl{right:auto;left:95.83333333%}.ant-col-offset-23.ant-col-rtl{margin-right:95.83333333%;margin-left:0}.ant-col-push-24.ant-col-rtl{right:100%;left:auto}.ant-col-pull-24.ant-col-rtl{right:auto;left:100%}.ant-col-offset-24.ant-col-rtl{margin-right:100%;margin-left:0}.ant-col-xs-24{display:block;flex:0 0 100%;max-width:100%}.ant-col-xs-push-24{left:100%}.ant-col-xs-pull-24{right:100%}.ant-col-xs-offset-24{margin-left:100%}.ant-col-xs-order-24{order:24}.ant-col-xs-23{display:block;flex:0 0 95.83333333%;max-width:95.83333333%}.ant-col-xs-push-23{left:95.83333333%}.ant-col-xs-pull-23{right:95.83333333%}.ant-col-xs-offset-23{margin-left:95.83333333%}.ant-col-xs-order-23{order:23}.ant-col-xs-22{display:block;flex:0 0 91.66666667%;max-width:91.66666667%}.ant-col-xs-push-22{left:91.66666667%}.ant-col-xs-pull-22{right:91.66666667%}.ant-col-xs-offset-22{margin-left:91.66666667%}.ant-col-xs-order-22{order:22}.ant-col-xs-21{display:block;flex:0 0 87.5%;max-width:87.5%}.ant-col-xs-push-21{left:87.5%}.ant-col-xs-pull-21{right:87.5%}.ant-col-xs-offset-21{margin-left:87.5%}.ant-col-xs-order-21{order:21}.ant-col-xs-20{display:block;flex:0 0 83.33333333%;max-width:83.33333333%}.ant-col-xs-push-20{left:83.33333333%}.ant-col-xs-pull-20{right:83.33333333%}.ant-col-xs-offset-20{margin-left:83.33333333%}.ant-col-xs-order-20{order:20}.ant-col-xs-19{display:block;flex:0 0 79.16666667%;max-width:79.16666667%}.ant-col-xs-push-19{left:79.16666667%}.ant-col-xs-pull-19{right:79.16666667%}.ant-col-xs-offset-19{margin-left:79.16666667%}.ant-col-xs-order-19{order:19}.ant-col-xs-18{display:block;flex:0 0 75%;max-width:75%}.ant-col-xs-push-18{left:75%}.ant-col-xs-pull-18{right:75%}.ant-col-xs-offset-18{margin-left:75%}.ant-col-xs-order-18{order:18}.ant-col-xs-17{display:block;flex:0 0 70.83333333%;max-width:70.83333333%}.ant-col-xs-push-17{left:70.83333333%}.ant-col-xs-pull-17{right:70.83333333%}.ant-col-xs-offset-17{margin-left:70.83333333%}.ant-col-xs-order-17{order:17}.ant-col-xs-16{display:block;flex:0 0 66.66666667%;max-width:66.66666667%}.ant-col-xs-push-16{left:66.66666667%}.ant-col-xs-pull-16{right:66.66666667%}.ant-col-xs-offset-16{margin-left:66.66666667%}.ant-col-xs-order-16{order:16}.ant-col-xs-15{display:block;flex:0 0 62.5%;max-width:62.5%}.ant-col-xs-push-15{left:62.5%}.ant-col-xs-pull-15{right:62.5%}.ant-col-xs-offset-15{margin-left:62.5%}.ant-col-xs-order-15{order:15}.ant-col-xs-14{display:block;flex:0 0 58.33333333%;max-width:58.33333333%}.ant-col-xs-push-14{left:58.33333333%}.ant-col-xs-pull-14{right:58.33333333%}.ant-col-xs-offset-14{margin-left:58.33333333%}.ant-col-xs-order-14{order:14}.ant-col-xs-13{display:block;flex:0 0 54.16666667%;max-width:54.16666667%}.ant-col-xs-push-13{left:54.16666667%}.ant-col-xs-pull-13{right:54.16666667%}.ant-col-xs-offset-13{margin-left:54.16666667%}.ant-col-xs-order-13{order:13}.ant-col-xs-12{display:block;flex:0 0 50%;max-width:50%}.ant-col-xs-push-12{left:50%}.ant-col-xs-pull-12{right:50%}.ant-col-xs-offset-12{margin-left:50%}.ant-col-xs-order-12{order:12}.ant-col-xs-11{display:block;flex:0 0 45.83333333%;max-width:45.83333333%}.ant-col-xs-push-11{left:45.83333333%}.ant-col-xs-pull-11{right:45.83333333%}.ant-col-xs-offset-11{margin-left:45.83333333%}.ant-col-xs-order-11{order:11}.ant-col-xs-10{display:block;flex:0 0 41.66666667%;max-width:41.66666667%}.ant-col-xs-push-10{left:41.66666667%}.ant-col-xs-pull-10{right:41.66666667%}.ant-col-xs-offset-10{margin-left:41.66666667%}.ant-col-xs-order-10{order:10}.ant-col-xs-9{display:block;flex:0 0 37.5%;max-width:37.5%}.ant-col-xs-push-9{left:37.5%}.ant-col-xs-pull-9{right:37.5%}.ant-col-xs-offset-9{margin-left:37.5%}.ant-col-xs-order-9{order:9}.ant-col-xs-8{display:block;flex:0 0 33.33333333%;max-width:33.33333333%}.ant-col-xs-push-8{left:33.33333333%}.ant-col-xs-pull-8{right:33.33333333%}.ant-col-xs-offset-8{margin-left:33.33333333%}.ant-col-xs-order-8{order:8}.ant-col-xs-7{display:block;flex:0 0 29.16666667%;max-width:29.16666667%}.ant-col-xs-push-7{left:29.16666667%}.ant-col-xs-pull-7{right:29.16666667%}.ant-col-xs-offset-7{margin-left:29.16666667%}.ant-col-xs-order-7{order:7}.ant-col-xs-6{display:block;flex:0 0 25%;max-width:25%}.ant-col-xs-push-6{left:25%}.ant-col-xs-pull-6{right:25%}.ant-col-xs-offset-6{margin-left:25%}.ant-col-xs-order-6{order:6}.ant-col-xs-5{display:block;flex:0 0 20.83333333%;max-width:20.83333333%}.ant-col-xs-push-5{left:20.83333333%}.ant-col-xs-pull-5{right:20.83333333%}.ant-col-xs-offset-5{margin-left:20.83333333%}.ant-col-xs-order-5{order:5}.ant-col-xs-4{display:block;flex:0 0 16.66666667%;max-width:16.66666667%}.ant-col-xs-push-4{left:16.66666667%}.ant-col-xs-pull-4{right:16.66666667%}.ant-col-xs-offset-4{margin-left:16.66666667%}.ant-col-xs-order-4{order:4}.ant-col-xs-3{display:block;flex:0 0 12.5%;max-width:12.5%}.ant-col-xs-push-3{left:12.5%}.ant-col-xs-pull-3{right:12.5%}.ant-col-xs-offset-3{margin-left:12.5%}.ant-col-xs-order-3{order:3}.ant-col-xs-2{display:block;flex:0 0 8.33333333%;max-width:8.33333333%}.ant-col-xs-push-2{left:8.33333333%}.ant-col-xs-pull-2{right:8.33333333%}.ant-col-xs-offset-2{margin-left:8.33333333%}.ant-col-xs-order-2{order:2}.ant-col-xs-1{display:block;flex:0 0 4.16666667%;max-width:4.16666667%}.ant-col-xs-push-1{left:4.16666667%}.ant-col-xs-pull-1{right:4.16666667%}.ant-col-xs-offset-1{margin-left:4.16666667%}.ant-col-xs-order-1{order:1}.ant-col-xs-0{display:none}.ant-col-push-0{left:auto}.ant-col-pull-0{right:auto}.ant-col-xs-push-0{left:auto}.ant-col-xs-pull-0{right:auto}.ant-col-xs-offset-0{margin-left:0}.ant-col-xs-order-0{order:0}.ant-col-push-0.ant-col-rtl{right:auto}.ant-col-pull-0.ant-col-rtl{left:auto}.ant-col-xs-push-0.ant-col-rtl{right:auto}.ant-col-xs-pull-0.ant-col-rtl{left:auto}.ant-col-xs-offset-0.ant-col-rtl{margin-right:0}.ant-col-xs-push-1.ant-col-rtl{right:4.16666667%;left:auto}.ant-col-xs-pull-1.ant-col-rtl{right:auto;left:4.16666667%}.ant-col-xs-offset-1.ant-col-rtl{margin-right:4.16666667%;margin-left:0}.ant-col-xs-push-2.ant-col-rtl{right:8.33333333%;left:auto}.ant-col-xs-pull-2.ant-col-rtl{right:auto;left:8.33333333%}.ant-col-xs-offset-2.ant-col-rtl{margin-right:8.33333333%;margin-left:0}.ant-col-xs-push-3.ant-col-rtl{right:12.5%;left:auto}.ant-col-xs-pull-3.ant-col-rtl{right:auto;left:12.5%}.ant-col-xs-offset-3.ant-col-rtl{margin-right:12.5%;margin-left:0}.ant-col-xs-push-4.ant-col-rtl{right:16.66666667%;left:auto}.ant-col-xs-pull-4.ant-col-rtl{right:auto;left:16.66666667%}.ant-col-xs-offset-4.ant-col-rtl{margin-right:16.66666667%;margin-left:0}.ant-col-xs-push-5.ant-col-rtl{right:20.83333333%;left:auto}.ant-col-xs-pull-5.ant-col-rtl{right:auto;left:20.83333333%}.ant-col-xs-offset-5.ant-col-rtl{margin-right:20.83333333%;margin-left:0}.ant-col-xs-push-6.ant-col-rtl{right:25%;left:auto}.ant-col-xs-pull-6.ant-col-rtl{right:auto;left:25%}.ant-col-xs-offset-6.ant-col-rtl{margin-right:25%;margin-left:0}.ant-col-xs-push-7.ant-col-rtl{right:29.16666667%;left:auto}.ant-col-xs-pull-7.ant-col-rtl{right:auto;left:29.16666667%}.ant-col-xs-offset-7.ant-col-rtl{margin-right:29.16666667%;margin-left:0}.ant-col-xs-push-8.ant-col-rtl{right:33.33333333%;left:auto}.ant-col-xs-pull-8.ant-col-rtl{right:auto;left:33.33333333%}.ant-col-xs-offset-8.ant-col-rtl{margin-right:33.33333333%;margin-left:0}.ant-col-xs-push-9.ant-col-rtl{right:37.5%;left:auto}.ant-col-xs-pull-9.ant-col-rtl{right:auto;left:37.5%}.ant-col-xs-offset-9.ant-col-rtl{margin-right:37.5%;margin-left:0}.ant-col-xs-push-10.ant-col-rtl{right:41.66666667%;left:auto}.ant-col-xs-pull-10.ant-col-rtl{right:auto;left:41.66666667%}.ant-col-xs-offset-10.ant-col-rtl{margin-right:41.66666667%;margin-left:0}.ant-col-xs-push-11.ant-col-rtl{right:45.83333333%;left:auto}.ant-col-xs-pull-11.ant-col-rtl{right:auto;left:45.83333333%}.ant-col-xs-offset-11.ant-col-rtl{margin-right:45.83333333%;margin-left:0}.ant-col-xs-push-12.ant-col-rtl{right:50%;left:auto}.ant-col-xs-pull-12.ant-col-rtl{right:auto;left:50%}.ant-col-xs-offset-12.ant-col-rtl{margin-right:50%;margin-left:0}.ant-col-xs-push-13.ant-col-rtl{right:54.16666667%;left:auto}.ant-col-xs-pull-13.ant-col-rtl{right:auto;left:54.16666667%}.ant-col-xs-offset-13.ant-col-rtl{margin-right:54.16666667%;margin-left:0}.ant-col-xs-push-14.ant-col-rtl{right:58.33333333%;left:auto}.ant-col-xs-pull-14.ant-col-rtl{right:auto;left:58.33333333%}.ant-col-xs-offset-14.ant-col-rtl{margin-right:58.33333333%;margin-left:0}.ant-col-xs-push-15.ant-col-rtl{right:62.5%;left:auto}.ant-col-xs-pull-15.ant-col-rtl{right:auto;left:62.5%}.ant-col-xs-offset-15.ant-col-rtl{margin-right:62.5%;margin-left:0}.ant-col-xs-push-16.ant-col-rtl{right:66.66666667%;left:auto}.ant-col-xs-pull-16.ant-col-rtl{right:auto;left:66.66666667%}.ant-col-xs-offset-16.ant-col-rtl{margin-right:66.66666667%;margin-left:0}.ant-col-xs-push-17.ant-col-rtl{right:70.83333333%;left:auto}.ant-col-xs-pull-17.ant-col-rtl{right:auto;left:70.83333333%}.ant-col-xs-offset-17.ant-col-rtl{margin-right:70.83333333%;margin-left:0}.ant-col-xs-push-18.ant-col-rtl{right:75%;left:auto}.ant-col-xs-pull-18.ant-col-rtl{right:auto;left:75%}.ant-col-xs-offset-18.ant-col-rtl{margin-right:75%;margin-left:0}.ant-col-xs-push-19.ant-col-rtl{right:79.16666667%;left:auto}.ant-col-xs-pull-19.ant-col-rtl{right:auto;left:79.16666667%}.ant-col-xs-offset-19.ant-col-rtl{margin-right:79.16666667%;margin-left:0}.ant-col-xs-push-20.ant-col-rtl{right:83.33333333%;left:auto}.ant-col-xs-pull-20.ant-col-rtl{right:auto;left:83.33333333%}.ant-col-xs-offset-20.ant-col-rtl{margin-right:83.33333333%;margin-left:0}.ant-col-xs-push-21.ant-col-rtl{right:87.5%;left:auto}.ant-col-xs-pull-21.ant-col-rtl{right:auto;left:87.5%}.ant-col-xs-offset-21.ant-col-rtl{margin-right:87.5%;margin-left:0}.ant-col-xs-push-22.ant-col-rtl{right:91.66666667%;left:auto}.ant-col-xs-pull-22.ant-col-rtl{right:auto;left:91.66666667%}.ant-col-xs-offset-22.ant-col-rtl{margin-right:91.66666667%;margin-left:0}.ant-col-xs-push-23.ant-col-rtl{right:95.83333333%;left:auto}.ant-col-xs-pull-23.ant-col-rtl{right:auto;left:95.83333333%}.ant-col-xs-offset-23.ant-col-rtl{margin-right:95.83333333%;margin-left:0}.ant-col-xs-push-24.ant-col-rtl{right:100%;left:auto}.ant-col-xs-pull-24.ant-col-rtl{right:auto;left:100%}.ant-col-xs-offset-24.ant-col-rtl{margin-right:100%;margin-left:0}@media (min-width: 576px){.ant-col-sm-24{display:block;flex:0 0 100%;max-width:100%}.ant-col-sm-push-24{left:100%}.ant-col-sm-pull-24{right:100%}.ant-col-sm-offset-24{margin-left:100%}.ant-col-sm-order-24{order:24}.ant-col-sm-23{display:block;flex:0 0 95.83333333%;max-width:95.83333333%}.ant-col-sm-push-23{left:95.83333333%}.ant-col-sm-pull-23{right:95.83333333%}.ant-col-sm-offset-23{margin-left:95.83333333%}.ant-col-sm-order-23{order:23}.ant-col-sm-22{display:block;flex:0 0 91.66666667%;max-width:91.66666667%}.ant-col-sm-push-22{left:91.66666667%}.ant-col-sm-pull-22{right:91.66666667%}.ant-col-sm-offset-22{margin-left:91.66666667%}.ant-col-sm-order-22{order:22}.ant-col-sm-21{display:block;flex:0 0 87.5%;max-width:87.5%}.ant-col-sm-push-21{left:87.5%}.ant-col-sm-pull-21{right:87.5%}.ant-col-sm-offset-21{margin-left:87.5%}.ant-col-sm-order-21{order:21}.ant-col-sm-20{display:block;flex:0 0 83.33333333%;max-width:83.33333333%}.ant-col-sm-push-20{left:83.33333333%}.ant-col-sm-pull-20{right:83.33333333%}.ant-col-sm-offset-20{margin-left:83.33333333%}.ant-col-sm-order-20{order:20}.ant-col-sm-19{display:block;flex:0 0 79.16666667%;max-width:79.16666667%}.ant-col-sm-push-19{left:79.16666667%}.ant-col-sm-pull-19{right:79.16666667%}.ant-col-sm-offset-19{margin-left:79.16666667%}.ant-col-sm-order-19{order:19}.ant-col-sm-18{display:block;flex:0 0 75%;max-width:75%}.ant-col-sm-push-18{left:75%}.ant-col-sm-pull-18{right:75%}.ant-col-sm-offset-18{margin-left:75%}.ant-col-sm-order-18{order:18}.ant-col-sm-17{display:block;flex:0 0 70.83333333%;max-width:70.83333333%}.ant-col-sm-push-17{left:70.83333333%}.ant-col-sm-pull-17{right:70.83333333%}.ant-col-sm-offset-17{margin-left:70.83333333%}.ant-col-sm-order-17{order:17}.ant-col-sm-16{display:block;flex:0 0 66.66666667%;max-width:66.66666667%}.ant-col-sm-push-16{left:66.66666667%}.ant-col-sm-pull-16{right:66.66666667%}.ant-col-sm-offset-16{margin-left:66.66666667%}.ant-col-sm-order-16{order:16}.ant-col-sm-15{display:block;flex:0 0 62.5%;max-width:62.5%}.ant-col-sm-push-15{left:62.5%}.ant-col-sm-pull-15{right:62.5%}.ant-col-sm-offset-15{margin-left:62.5%}.ant-col-sm-order-15{order:15}.ant-col-sm-14{display:block;flex:0 0 58.33333333%;max-width:58.33333333%}.ant-col-sm-push-14{left:58.33333333%}.ant-col-sm-pull-14{right:58.33333333%}.ant-col-sm-offset-14{margin-left:58.33333333%}.ant-col-sm-order-14{order:14}.ant-col-sm-13{display:block;flex:0 0 54.16666667%;max-width:54.16666667%}.ant-col-sm-push-13{left:54.16666667%}.ant-col-sm-pull-13{right:54.16666667%}.ant-col-sm-offset-13{margin-left:54.16666667%}.ant-col-sm-order-13{order:13}.ant-col-sm-12{display:block;flex:0 0 50%;max-width:50%}.ant-col-sm-push-12{left:50%}.ant-col-sm-pull-12{right:50%}.ant-col-sm-offset-12{margin-left:50%}.ant-col-sm-order-12{order:12}.ant-col-sm-11{display:block;flex:0 0 45.83333333%;max-width:45.83333333%}.ant-col-sm-push-11{left:45.83333333%}.ant-col-sm-pull-11{right:45.83333333%}.ant-col-sm-offset-11{margin-left:45.83333333%}.ant-col-sm-order-11{order:11}.ant-col-sm-10{display:block;flex:0 0 41.66666667%;max-width:41.66666667%}.ant-col-sm-push-10{left:41.66666667%}.ant-col-sm-pull-10{right:41.66666667%}.ant-col-sm-offset-10{margin-left:41.66666667%}.ant-col-sm-order-10{order:10}.ant-col-sm-9{display:block;flex:0 0 37.5%;max-width:37.5%}.ant-col-sm-push-9{left:37.5%}.ant-col-sm-pull-9{right:37.5%}.ant-col-sm-offset-9{margin-left:37.5%}.ant-col-sm-order-9{order:9}.ant-col-sm-8{display:block;flex:0 0 33.33333333%;max-width:33.33333333%}.ant-col-sm-push-8{left:33.33333333%}.ant-col-sm-pull-8{right:33.33333333%}.ant-col-sm-offset-8{margin-left:33.33333333%}.ant-col-sm-order-8{order:8}.ant-col-sm-7{display:block;flex:0 0 29.16666667%;max-width:29.16666667%}.ant-col-sm-push-7{left:29.16666667%}.ant-col-sm-pull-7{right:29.16666667%}.ant-col-sm-offset-7{margin-left:29.16666667%}.ant-col-sm-order-7{order:7}.ant-col-sm-6{display:block;flex:0 0 25%;max-width:25%}.ant-col-sm-push-6{left:25%}.ant-col-sm-pull-6{right:25%}.ant-col-sm-offset-6{margin-left:25%}.ant-col-sm-order-6{order:6}.ant-col-sm-5{display:block;flex:0 0 20.83333333%;max-width:20.83333333%}.ant-col-sm-push-5{left:20.83333333%}.ant-col-sm-pull-5{right:20.83333333%}.ant-col-sm-offset-5{margin-left:20.83333333%}.ant-col-sm-order-5{order:5}.ant-col-sm-4{display:block;flex:0 0 16.66666667%;max-width:16.66666667%}.ant-col-sm-push-4{left:16.66666667%}.ant-col-sm-pull-4{right:16.66666667%}.ant-col-sm-offset-4{margin-left:16.66666667%}.ant-col-sm-order-4{order:4}.ant-col-sm-3{display:block;flex:0 0 12.5%;max-width:12.5%}.ant-col-sm-push-3{left:12.5%}.ant-col-sm-pull-3{right:12.5%}.ant-col-sm-offset-3{margin-left:12.5%}.ant-col-sm-order-3{order:3}.ant-col-sm-2{display:block;flex:0 0 8.33333333%;max-width:8.33333333%}.ant-col-sm-push-2{left:8.33333333%}.ant-col-sm-pull-2{right:8.33333333%}.ant-col-sm-offset-2{margin-left:8.33333333%}.ant-col-sm-order-2{order:2}.ant-col-sm-1{display:block;flex:0 0 4.16666667%;max-width:4.16666667%}.ant-col-sm-push-1{left:4.16666667%}.ant-col-sm-pull-1{right:4.16666667%}.ant-col-sm-offset-1{margin-left:4.16666667%}.ant-col-sm-order-1{order:1}.ant-col-sm-0{display:none}.ant-col-push-0{left:auto}.ant-col-pull-0{right:auto}.ant-col-sm-push-0{left:auto}.ant-col-sm-pull-0{right:auto}.ant-col-sm-offset-0{margin-left:0}.ant-col-sm-order-0{order:0}.ant-col-push-0.ant-col-rtl{right:auto}.ant-col-pull-0.ant-col-rtl{left:auto}.ant-col-sm-push-0.ant-col-rtl{right:auto}.ant-col-sm-pull-0.ant-col-rtl{left:auto}.ant-col-sm-offset-0.ant-col-rtl{margin-right:0}.ant-col-sm-push-1.ant-col-rtl{right:4.16666667%;left:auto}.ant-col-sm-pull-1.ant-col-rtl{right:auto;left:4.16666667%}.ant-col-sm-offset-1.ant-col-rtl{margin-right:4.16666667%;margin-left:0}.ant-col-sm-push-2.ant-col-rtl{right:8.33333333%;left:auto}.ant-col-sm-pull-2.ant-col-rtl{right:auto;left:8.33333333%}.ant-col-sm-offset-2.ant-col-rtl{margin-right:8.33333333%;margin-left:0}.ant-col-sm-push-3.ant-col-rtl{right:12.5%;left:auto}.ant-col-sm-pull-3.ant-col-rtl{right:auto;left:12.5%}.ant-col-sm-offset-3.ant-col-rtl{margin-right:12.5%;margin-left:0}.ant-col-sm-push-4.ant-col-rtl{right:16.66666667%;left:auto}.ant-col-sm-pull-4.ant-col-rtl{right:auto;left:16.66666667%}.ant-col-sm-offset-4.ant-col-rtl{margin-right:16.66666667%;margin-left:0}.ant-col-sm-push-5.ant-col-rtl{right:20.83333333%;left:auto}.ant-col-sm-pull-5.ant-col-rtl{right:auto;left:20.83333333%}.ant-col-sm-offset-5.ant-col-rtl{margin-right:20.83333333%;margin-left:0}.ant-col-sm-push-6.ant-col-rtl{right:25%;left:auto}.ant-col-sm-pull-6.ant-col-rtl{right:auto;left:25%}.ant-col-sm-offset-6.ant-col-rtl{margin-right:25%;margin-left:0}.ant-col-sm-push-7.ant-col-rtl{right:29.16666667%;left:auto}.ant-col-sm-pull-7.ant-col-rtl{right:auto;left:29.16666667%}.ant-col-sm-offset-7.ant-col-rtl{margin-right:29.16666667%;margin-left:0}.ant-col-sm-push-8.ant-col-rtl{right:33.33333333%;left:auto}.ant-col-sm-pull-8.ant-col-rtl{right:auto;left:33.33333333%}.ant-col-sm-offset-8.ant-col-rtl{margin-right:33.33333333%;margin-left:0}.ant-col-sm-push-9.ant-col-rtl{right:37.5%;left:auto}.ant-col-sm-pull-9.ant-col-rtl{right:auto;left:37.5%}.ant-col-sm-offset-9.ant-col-rtl{margin-right:37.5%;margin-left:0}.ant-col-sm-push-10.ant-col-rtl{right:41.66666667%;left:auto}.ant-col-sm-pull-10.ant-col-rtl{right:auto;left:41.66666667%}.ant-col-sm-offset-10.ant-col-rtl{margin-right:41.66666667%;margin-left:0}.ant-col-sm-push-11.ant-col-rtl{right:45.83333333%;left:auto}.ant-col-sm-pull-11.ant-col-rtl{right:auto;left:45.83333333%}.ant-col-sm-offset-11.ant-col-rtl{margin-right:45.83333333%;margin-left:0}.ant-col-sm-push-12.ant-col-rtl{right:50%;left:auto}.ant-col-sm-pull-12.ant-col-rtl{right:auto;left:50%}.ant-col-sm-offset-12.ant-col-rtl{margin-right:50%;margin-left:0}.ant-col-sm-push-13.ant-col-rtl{right:54.16666667%;left:auto}.ant-col-sm-pull-13.ant-col-rtl{right:auto;left:54.16666667%}.ant-col-sm-offset-13.ant-col-rtl{margin-right:54.16666667%;margin-left:0}.ant-col-sm-push-14.ant-col-rtl{right:58.33333333%;left:auto}.ant-col-sm-pull-14.ant-col-rtl{right:auto;left:58.33333333%}.ant-col-sm-offset-14.ant-col-rtl{margin-right:58.33333333%;margin-left:0}.ant-col-sm-push-15.ant-col-rtl{right:62.5%;left:auto}.ant-col-sm-pull-15.ant-col-rtl{right:auto;left:62.5%}.ant-col-sm-offset-15.ant-col-rtl{margin-right:62.5%;margin-left:0}.ant-col-sm-push-16.ant-col-rtl{right:66.66666667%;left:auto}.ant-col-sm-pull-16.ant-col-rtl{right:auto;left:66.66666667%}.ant-col-sm-offset-16.ant-col-rtl{margin-right:66.66666667%;margin-left:0}.ant-col-sm-push-17.ant-col-rtl{right:70.83333333%;left:auto}.ant-col-sm-pull-17.ant-col-rtl{right:auto;left:70.83333333%}.ant-col-sm-offset-17.ant-col-rtl{margin-right:70.83333333%;margin-left:0}.ant-col-sm-push-18.ant-col-rtl{right:75%;left:auto}.ant-col-sm-pull-18.ant-col-rtl{right:auto;left:75%}.ant-col-sm-offset-18.ant-col-rtl{margin-right:75%;margin-left:0}.ant-col-sm-push-19.ant-col-rtl{right:79.16666667%;left:auto}.ant-col-sm-pull-19.ant-col-rtl{right:auto;left:79.16666667%}.ant-col-sm-offset-19.ant-col-rtl{margin-right:79.16666667%;margin-left:0}.ant-col-sm-push-20.ant-col-rtl{right:83.33333333%;left:auto}.ant-col-sm-pull-20.ant-col-rtl{right:auto;left:83.33333333%}.ant-col-sm-offset-20.ant-col-rtl{margin-right:83.33333333%;margin-left:0}.ant-col-sm-push-21.ant-col-rtl{right:87.5%;left:auto}.ant-col-sm-pull-21.ant-col-rtl{right:auto;left:87.5%}.ant-col-sm-offset-21.ant-col-rtl{margin-right:87.5%;margin-left:0}.ant-col-sm-push-22.ant-col-rtl{right:91.66666667%;left:auto}.ant-col-sm-pull-22.ant-col-rtl{right:auto;left:91.66666667%}.ant-col-sm-offset-22.ant-col-rtl{margin-right:91.66666667%;margin-left:0}.ant-col-sm-push-23.ant-col-rtl{right:95.83333333%;left:auto}.ant-col-sm-pull-23.ant-col-rtl{right:auto;left:95.83333333%}.ant-col-sm-offset-23.ant-col-rtl{margin-right:95.83333333%;margin-left:0}.ant-col-sm-push-24.ant-col-rtl{right:100%;left:auto}.ant-col-sm-pull-24.ant-col-rtl{right:auto;left:100%}.ant-col-sm-offset-24.ant-col-rtl{margin-right:100%;margin-left:0}}@media (min-width: 768px){.ant-col-md-24{display:block;flex:0 0 100%;max-width:100%}.ant-col-md-push-24{left:100%}.ant-col-md-pull-24{right:100%}.ant-col-md-offset-24{margin-left:100%}.ant-col-md-order-24{order:24}.ant-col-md-23{display:block;flex:0 0 95.83333333%;max-width:95.83333333%}.ant-col-md-push-23{left:95.83333333%}.ant-col-md-pull-23{right:95.83333333%}.ant-col-md-offset-23{margin-left:95.83333333%}.ant-col-md-order-23{order:23}.ant-col-md-22{display:block;flex:0 0 91.66666667%;max-width:91.66666667%}.ant-col-md-push-22{left:91.66666667%}.ant-col-md-pull-22{right:91.66666667%}.ant-col-md-offset-22{margin-left:91.66666667%}.ant-col-md-order-22{order:22}.ant-col-md-21{display:block;flex:0 0 87.5%;max-width:87.5%}.ant-col-md-push-21{left:87.5%}.ant-col-md-pull-21{right:87.5%}.ant-col-md-offset-21{margin-left:87.5%}.ant-col-md-order-21{order:21}.ant-col-md-20{display:block;flex:0 0 83.33333333%;max-width:83.33333333%}.ant-col-md-push-20{left:83.33333333%}.ant-col-md-pull-20{right:83.33333333%}.ant-col-md-offset-20{margin-left:83.33333333%}.ant-col-md-order-20{order:20}.ant-col-md-19{display:block;flex:0 0 79.16666667%;max-width:79.16666667%}.ant-col-md-push-19{left:79.16666667%}.ant-col-md-pull-19{right:79.16666667%}.ant-col-md-offset-19{margin-left:79.16666667%}.ant-col-md-order-19{order:19}.ant-col-md-18{display:block;flex:0 0 75%;max-width:75%}.ant-col-md-push-18{left:75%}.ant-col-md-pull-18{right:75%}.ant-col-md-offset-18{margin-left:75%}.ant-col-md-order-18{order:18}.ant-col-md-17{display:block;flex:0 0 70.83333333%;max-width:70.83333333%}.ant-col-md-push-17{left:70.83333333%}.ant-col-md-pull-17{right:70.83333333%}.ant-col-md-offset-17{margin-left:70.83333333%}.ant-col-md-order-17{order:17}.ant-col-md-16{display:block;flex:0 0 66.66666667%;max-width:66.66666667%}.ant-col-md-push-16{left:66.66666667%}.ant-col-md-pull-16{right:66.66666667%}.ant-col-md-offset-16{margin-left:66.66666667%}.ant-col-md-order-16{order:16}.ant-col-md-15{display:block;flex:0 0 62.5%;max-width:62.5%}.ant-col-md-push-15{left:62.5%}.ant-col-md-pull-15{right:62.5%}.ant-col-md-offset-15{margin-left:62.5%}.ant-col-md-order-15{order:15}.ant-col-md-14{display:block;flex:0 0 58.33333333%;max-width:58.33333333%}.ant-col-md-push-14{left:58.33333333%}.ant-col-md-pull-14{right:58.33333333%}.ant-col-md-offset-14{margin-left:58.33333333%}.ant-col-md-order-14{order:14}.ant-col-md-13{display:block;flex:0 0 54.16666667%;max-width:54.16666667%}.ant-col-md-push-13{left:54.16666667%}.ant-col-md-pull-13{right:54.16666667%}.ant-col-md-offset-13{margin-left:54.16666667%}.ant-col-md-order-13{order:13}.ant-col-md-12{display:block;flex:0 0 50%;max-width:50%}.ant-col-md-push-12{left:50%}.ant-col-md-pull-12{right:50%}.ant-col-md-offset-12{margin-left:50%}.ant-col-md-order-12{order:12}.ant-col-md-11{display:block;flex:0 0 45.83333333%;max-width:45.83333333%}.ant-col-md-push-11{left:45.83333333%}.ant-col-md-pull-11{right:45.83333333%}.ant-col-md-offset-11{margin-left:45.83333333%}.ant-col-md-order-11{order:11}.ant-col-md-10{display:block;flex:0 0 41.66666667%;max-width:41.66666667%}.ant-col-md-push-10{left:41.66666667%}.ant-col-md-pull-10{right:41.66666667%}.ant-col-md-offset-10{margin-left:41.66666667%}.ant-col-md-order-10{order:10}.ant-col-md-9{display:block;flex:0 0 37.5%;max-width:37.5%}.ant-col-md-push-9{left:37.5%}.ant-col-md-pull-9{right:37.5%}.ant-col-md-offset-9{margin-left:37.5%}.ant-col-md-order-9{order:9}.ant-col-md-8{display:block;flex:0 0 33.33333333%;max-width:33.33333333%}.ant-col-md-push-8{left:33.33333333%}.ant-col-md-pull-8{right:33.33333333%}.ant-col-md-offset-8{margin-left:33.33333333%}.ant-col-md-order-8{order:8}.ant-col-md-7{display:block;flex:0 0 29.16666667%;max-width:29.16666667%}.ant-col-md-push-7{left:29.16666667%}.ant-col-md-pull-7{right:29.16666667%}.ant-col-md-offset-7{margin-left:29.16666667%}.ant-col-md-order-7{order:7}.ant-col-md-6{display:block;flex:0 0 25%;max-width:25%}.ant-col-md-push-6{left:25%}.ant-col-md-pull-6{right:25%}.ant-col-md-offset-6{margin-left:25%}.ant-col-md-order-6{order:6}.ant-col-md-5{display:block;flex:0 0 20.83333333%;max-width:20.83333333%}.ant-col-md-push-5{left:20.83333333%}.ant-col-md-pull-5{right:20.83333333%}.ant-col-md-offset-5{margin-left:20.83333333%}.ant-col-md-order-5{order:5}.ant-col-md-4{display:block;flex:0 0 16.66666667%;max-width:16.66666667%}.ant-col-md-push-4{left:16.66666667%}.ant-col-md-pull-4{right:16.66666667%}.ant-col-md-offset-4{margin-left:16.66666667%}.ant-col-md-order-4{order:4}.ant-col-md-3{display:block;flex:0 0 12.5%;max-width:12.5%}.ant-col-md-push-3{left:12.5%}.ant-col-md-pull-3{right:12.5%}.ant-col-md-offset-3{margin-left:12.5%}.ant-col-md-order-3{order:3}.ant-col-md-2{display:block;flex:0 0 8.33333333%;max-width:8.33333333%}.ant-col-md-push-2{left:8.33333333%}.ant-col-md-pull-2{right:8.33333333%}.ant-col-md-offset-2{margin-left:8.33333333%}.ant-col-md-order-2{order:2}.ant-col-md-1{display:block;flex:0 0 4.16666667%;max-width:4.16666667%}.ant-col-md-push-1{left:4.16666667%}.ant-col-md-pull-1{right:4.16666667%}.ant-col-md-offset-1{margin-left:4.16666667%}.ant-col-md-order-1{order:1}.ant-col-md-0{display:none}.ant-col-push-0{left:auto}.ant-col-pull-0{right:auto}.ant-col-md-push-0{left:auto}.ant-col-md-pull-0{right:auto}.ant-col-md-offset-0{margin-left:0}.ant-col-md-order-0{order:0}.ant-col-push-0.ant-col-rtl{right:auto}.ant-col-pull-0.ant-col-rtl{left:auto}.ant-col-md-push-0.ant-col-rtl{right:auto}.ant-col-md-pull-0.ant-col-rtl{left:auto}.ant-col-md-offset-0.ant-col-rtl{margin-right:0}.ant-col-md-push-1.ant-col-rtl{right:4.16666667%;left:auto}.ant-col-md-pull-1.ant-col-rtl{right:auto;left:4.16666667%}.ant-col-md-offset-1.ant-col-rtl{margin-right:4.16666667%;margin-left:0}.ant-col-md-push-2.ant-col-rtl{right:8.33333333%;left:auto}.ant-col-md-pull-2.ant-col-rtl{right:auto;left:8.33333333%}.ant-col-md-offset-2.ant-col-rtl{margin-right:8.33333333%;margin-left:0}.ant-col-md-push-3.ant-col-rtl{right:12.5%;left:auto}.ant-col-md-pull-3.ant-col-rtl{right:auto;left:12.5%}.ant-col-md-offset-3.ant-col-rtl{margin-right:12.5%;margin-left:0}.ant-col-md-push-4.ant-col-rtl{right:16.66666667%;left:auto}.ant-col-md-pull-4.ant-col-rtl{right:auto;left:16.66666667%}.ant-col-md-offset-4.ant-col-rtl{margin-right:16.66666667%;margin-left:0}.ant-col-md-push-5.ant-col-rtl{right:20.83333333%;left:auto}.ant-col-md-pull-5.ant-col-rtl{right:auto;left:20.83333333%}.ant-col-md-offset-5.ant-col-rtl{margin-right:20.83333333%;margin-left:0}.ant-col-md-push-6.ant-col-rtl{right:25%;left:auto}.ant-col-md-pull-6.ant-col-rtl{right:auto;left:25%}.ant-col-md-offset-6.ant-col-rtl{margin-right:25%;margin-left:0}.ant-col-md-push-7.ant-col-rtl{right:29.16666667%;left:auto}.ant-col-md-pull-7.ant-col-rtl{right:auto;left:29.16666667%}.ant-col-md-offset-7.ant-col-rtl{margin-right:29.16666667%;margin-left:0}.ant-col-md-push-8.ant-col-rtl{right:33.33333333%;left:auto}.ant-col-md-pull-8.ant-col-rtl{right:auto;left:33.33333333%}.ant-col-md-offset-8.ant-col-rtl{margin-right:33.33333333%;margin-left:0}.ant-col-md-push-9.ant-col-rtl{right:37.5%;left:auto}.ant-col-md-pull-9.ant-col-rtl{right:auto;left:37.5%}.ant-col-md-offset-9.ant-col-rtl{margin-right:37.5%;margin-left:0}.ant-col-md-push-10.ant-col-rtl{right:41.66666667%;left:auto}.ant-col-md-pull-10.ant-col-rtl{right:auto;left:41.66666667%}.ant-col-md-offset-10.ant-col-rtl{margin-right:41.66666667%;margin-left:0}.ant-col-md-push-11.ant-col-rtl{right:45.83333333%;left:auto}.ant-col-md-pull-11.ant-col-rtl{right:auto;left:45.83333333%}.ant-col-md-offset-11.ant-col-rtl{margin-right:45.83333333%;margin-left:0}.ant-col-md-push-12.ant-col-rtl{right:50%;left:auto}.ant-col-md-pull-12.ant-col-rtl{right:auto;left:50%}.ant-col-md-offset-12.ant-col-rtl{margin-right:50%;margin-left:0}.ant-col-md-push-13.ant-col-rtl{right:54.16666667%;left:auto}.ant-col-md-pull-13.ant-col-rtl{right:auto;left:54.16666667%}.ant-col-md-offset-13.ant-col-rtl{margin-right:54.16666667%;margin-left:0}.ant-col-md-push-14.ant-col-rtl{right:58.33333333%;left:auto}.ant-col-md-pull-14.ant-col-rtl{right:auto;left:58.33333333%}.ant-col-md-offset-14.ant-col-rtl{margin-right:58.33333333%;margin-left:0}.ant-col-md-push-15.ant-col-rtl{right:62.5%;left:auto}.ant-col-md-pull-15.ant-col-rtl{right:auto;left:62.5%}.ant-col-md-offset-15.ant-col-rtl{margin-right:62.5%;margin-left:0}.ant-col-md-push-16.ant-col-rtl{right:66.66666667%;left:auto}.ant-col-md-pull-16.ant-col-rtl{right:auto;left:66.66666667%}.ant-col-md-offset-16.ant-col-rtl{margin-right:66.66666667%;margin-left:0}.ant-col-md-push-17.ant-col-rtl{right:70.83333333%;left:auto}.ant-col-md-pull-17.ant-col-rtl{right:auto;left:70.83333333%}.ant-col-md-offset-17.ant-col-rtl{margin-right:70.83333333%;margin-left:0}.ant-col-md-push-18.ant-col-rtl{right:75%;left:auto}.ant-col-md-pull-18.ant-col-rtl{right:auto;left:75%}.ant-col-md-offset-18.ant-col-rtl{margin-right:75%;margin-left:0}.ant-col-md-push-19.ant-col-rtl{right:79.16666667%;left:auto}.ant-col-md-pull-19.ant-col-rtl{right:auto;left:79.16666667%}.ant-col-md-offset-19.ant-col-rtl{margin-right:79.16666667%;margin-left:0}.ant-col-md-push-20.ant-col-rtl{right:83.33333333%;left:auto}.ant-col-md-pull-20.ant-col-rtl{right:auto;left:83.33333333%}.ant-col-md-offset-20.ant-col-rtl{margin-right:83.33333333%;margin-left:0}.ant-col-md-push-21.ant-col-rtl{right:87.5%;left:auto}.ant-col-md-pull-21.ant-col-rtl{right:auto;left:87.5%}.ant-col-md-offset-21.ant-col-rtl{margin-right:87.5%;margin-left:0}.ant-col-md-push-22.ant-col-rtl{right:91.66666667%;left:auto}.ant-col-md-pull-22.ant-col-rtl{right:auto;left:91.66666667%}.ant-col-md-offset-22.ant-col-rtl{margin-right:91.66666667%;margin-left:0}.ant-col-md-push-23.ant-col-rtl{right:95.83333333%;left:auto}.ant-col-md-pull-23.ant-col-rtl{right:auto;left:95.83333333%}.ant-col-md-offset-23.ant-col-rtl{margin-right:95.83333333%;margin-left:0}.ant-col-md-push-24.ant-col-rtl{right:100%;left:auto}.ant-col-md-pull-24.ant-col-rtl{right:auto;left:100%}.ant-col-md-offset-24.ant-col-rtl{margin-right:100%;margin-left:0}}@media (min-width: 992px){.ant-col-lg-24{display:block;flex:0 0 100%;max-width:100%}.ant-col-lg-push-24{left:100%}.ant-col-lg-pull-24{right:100%}.ant-col-lg-offset-24{margin-left:100%}.ant-col-lg-order-24{order:24}.ant-col-lg-23{display:block;flex:0 0 95.83333333%;max-width:95.83333333%}.ant-col-lg-push-23{left:95.83333333%}.ant-col-lg-pull-23{right:95.83333333%}.ant-col-lg-offset-23{margin-left:95.83333333%}.ant-col-lg-order-23{order:23}.ant-col-lg-22{display:block;flex:0 0 91.66666667%;max-width:91.66666667%}.ant-col-lg-push-22{left:91.66666667%}.ant-col-lg-pull-22{right:91.66666667%}.ant-col-lg-offset-22{margin-left:91.66666667%}.ant-col-lg-order-22{order:22}.ant-col-lg-21{display:block;flex:0 0 87.5%;max-width:87.5%}.ant-col-lg-push-21{left:87.5%}.ant-col-lg-pull-21{right:87.5%}.ant-col-lg-offset-21{margin-left:87.5%}.ant-col-lg-order-21{order:21}.ant-col-lg-20{display:block;flex:0 0 83.33333333%;max-width:83.33333333%}.ant-col-lg-push-20{left:83.33333333%}.ant-col-lg-pull-20{right:83.33333333%}.ant-col-lg-offset-20{margin-left:83.33333333%}.ant-col-lg-order-20{order:20}.ant-col-lg-19{display:block;flex:0 0 79.16666667%;max-width:79.16666667%}.ant-col-lg-push-19{left:79.16666667%}.ant-col-lg-pull-19{right:79.16666667%}.ant-col-lg-offset-19{margin-left:79.16666667%}.ant-col-lg-order-19{order:19}.ant-col-lg-18{display:block;flex:0 0 75%;max-width:75%}.ant-col-lg-push-18{left:75%}.ant-col-lg-pull-18{right:75%}.ant-col-lg-offset-18{margin-left:75%}.ant-col-lg-order-18{order:18}.ant-col-lg-17{display:block;flex:0 0 70.83333333%;max-width:70.83333333%}.ant-col-lg-push-17{left:70.83333333%}.ant-col-lg-pull-17{right:70.83333333%}.ant-col-lg-offset-17{margin-left:70.83333333%}.ant-col-lg-order-17{order:17}.ant-col-lg-16{display:block;flex:0 0 66.66666667%;max-width:66.66666667%}.ant-col-lg-push-16{left:66.66666667%}.ant-col-lg-pull-16{right:66.66666667%}.ant-col-lg-offset-16{margin-left:66.66666667%}.ant-col-lg-order-16{order:16}.ant-col-lg-15{display:block;flex:0 0 62.5%;max-width:62.5%}.ant-col-lg-push-15{left:62.5%}.ant-col-lg-pull-15{right:62.5%}.ant-col-lg-offset-15{margin-left:62.5%}.ant-col-lg-order-15{order:15}.ant-col-lg-14{display:block;flex:0 0 58.33333333%;max-width:58.33333333%}.ant-col-lg-push-14{left:58.33333333%}.ant-col-lg-pull-14{right:58.33333333%}.ant-col-lg-offset-14{margin-left:58.33333333%}.ant-col-lg-order-14{order:14}.ant-col-lg-13{display:block;flex:0 0 54.16666667%;max-width:54.16666667%}.ant-col-lg-push-13{left:54.16666667%}.ant-col-lg-pull-13{right:54.16666667%}.ant-col-lg-offset-13{margin-left:54.16666667%}.ant-col-lg-order-13{order:13}.ant-col-lg-12{display:block;flex:0 0 50%;max-width:50%}.ant-col-lg-push-12{left:50%}.ant-col-lg-pull-12{right:50%}.ant-col-lg-offset-12{margin-left:50%}.ant-col-lg-order-12{order:12}.ant-col-lg-11{display:block;flex:0 0 45.83333333%;max-width:45.83333333%}.ant-col-lg-push-11{left:45.83333333%}.ant-col-lg-pull-11{right:45.83333333%}.ant-col-lg-offset-11{margin-left:45.83333333%}.ant-col-lg-order-11{order:11}.ant-col-lg-10{display:block;flex:0 0 41.66666667%;max-width:41.66666667%}.ant-col-lg-push-10{left:41.66666667%}.ant-col-lg-pull-10{right:41.66666667%}.ant-col-lg-offset-10{margin-left:41.66666667%}.ant-col-lg-order-10{order:10}.ant-col-lg-9{display:block;flex:0 0 37.5%;max-width:37.5%}.ant-col-lg-push-9{left:37.5%}.ant-col-lg-pull-9{right:37.5%}.ant-col-lg-offset-9{margin-left:37.5%}.ant-col-lg-order-9{order:9}.ant-col-lg-8{display:block;flex:0 0 33.33333333%;max-width:33.33333333%}.ant-col-lg-push-8{left:33.33333333%}.ant-col-lg-pull-8{right:33.33333333%}.ant-col-lg-offset-8{margin-left:33.33333333%}.ant-col-lg-order-8{order:8}.ant-col-lg-7{display:block;flex:0 0 29.16666667%;max-width:29.16666667%}.ant-col-lg-push-7{left:29.16666667%}.ant-col-lg-pull-7{right:29.16666667%}.ant-col-lg-offset-7{margin-left:29.16666667%}.ant-col-lg-order-7{order:7}.ant-col-lg-6{display:block;flex:0 0 25%;max-width:25%}.ant-col-lg-push-6{left:25%}.ant-col-lg-pull-6{right:25%}.ant-col-lg-offset-6{margin-left:25%}.ant-col-lg-order-6{order:6}.ant-col-lg-5{display:block;flex:0 0 20.83333333%;max-width:20.83333333%}.ant-col-lg-push-5{left:20.83333333%}.ant-col-lg-pull-5{right:20.83333333%}.ant-col-lg-offset-5{margin-left:20.83333333%}.ant-col-lg-order-5{order:5}.ant-col-lg-4{display:block;flex:0 0 16.66666667%;max-width:16.66666667%}.ant-col-lg-push-4{left:16.66666667%}.ant-col-lg-pull-4{right:16.66666667%}.ant-col-lg-offset-4{margin-left:16.66666667%}.ant-col-lg-order-4{order:4}.ant-col-lg-3{display:block;flex:0 0 12.5%;max-width:12.5%}.ant-col-lg-push-3{left:12.5%}.ant-col-lg-pull-3{right:12.5%}.ant-col-lg-offset-3{margin-left:12.5%}.ant-col-lg-order-3{order:3}.ant-col-lg-2{display:block;flex:0 0 8.33333333%;max-width:8.33333333%}.ant-col-lg-push-2{left:8.33333333%}.ant-col-lg-pull-2{right:8.33333333%}.ant-col-lg-offset-2{margin-left:8.33333333%}.ant-col-lg-order-2{order:2}.ant-col-lg-1{display:block;flex:0 0 4.16666667%;max-width:4.16666667%}.ant-col-lg-push-1{left:4.16666667%}.ant-col-lg-pull-1{right:4.16666667%}.ant-col-lg-offset-1{margin-left:4.16666667%}.ant-col-lg-order-1{order:1}.ant-col-lg-0{display:none}.ant-col-push-0{left:auto}.ant-col-pull-0{right:auto}.ant-col-lg-push-0{left:auto}.ant-col-lg-pull-0{right:auto}.ant-col-lg-offset-0{margin-left:0}.ant-col-lg-order-0{order:0}.ant-col-push-0.ant-col-rtl{right:auto}.ant-col-pull-0.ant-col-rtl{left:auto}.ant-col-lg-push-0.ant-col-rtl{right:auto}.ant-col-lg-pull-0.ant-col-rtl{left:auto}.ant-col-lg-offset-0.ant-col-rtl{margin-right:0}.ant-col-lg-push-1.ant-col-rtl{right:4.16666667%;left:auto}.ant-col-lg-pull-1.ant-col-rtl{right:auto;left:4.16666667%}.ant-col-lg-offset-1.ant-col-rtl{margin-right:4.16666667%;margin-left:0}.ant-col-lg-push-2.ant-col-rtl{right:8.33333333%;left:auto}.ant-col-lg-pull-2.ant-col-rtl{right:auto;left:8.33333333%}.ant-col-lg-offset-2.ant-col-rtl{margin-right:8.33333333%;margin-left:0}.ant-col-lg-push-3.ant-col-rtl{right:12.5%;left:auto}.ant-col-lg-pull-3.ant-col-rtl{right:auto;left:12.5%}.ant-col-lg-offset-3.ant-col-rtl{margin-right:12.5%;margin-left:0}.ant-col-lg-push-4.ant-col-rtl{right:16.66666667%;left:auto}.ant-col-lg-pull-4.ant-col-rtl{right:auto;left:16.66666667%}.ant-col-lg-offset-4.ant-col-rtl{margin-right:16.66666667%;margin-left:0}.ant-col-lg-push-5.ant-col-rtl{right:20.83333333%;left:auto}.ant-col-lg-pull-5.ant-col-rtl{right:auto;left:20.83333333%}.ant-col-lg-offset-5.ant-col-rtl{margin-right:20.83333333%;margin-left:0}.ant-col-lg-push-6.ant-col-rtl{right:25%;left:auto}.ant-col-lg-pull-6.ant-col-rtl{right:auto;left:25%}.ant-col-lg-offset-6.ant-col-rtl{margin-right:25%;margin-left:0}.ant-col-lg-push-7.ant-col-rtl{right:29.16666667%;left:auto}.ant-col-lg-pull-7.ant-col-rtl{right:auto;left:29.16666667%}.ant-col-lg-offset-7.ant-col-rtl{margin-right:29.16666667%;margin-left:0}.ant-col-lg-push-8.ant-col-rtl{right:33.33333333%;left:auto}.ant-col-lg-pull-8.ant-col-rtl{right:auto;left:33.33333333%}.ant-col-lg-offset-8.ant-col-rtl{margin-right:33.33333333%;margin-left:0}.ant-col-lg-push-9.ant-col-rtl{right:37.5%;left:auto}.ant-col-lg-pull-9.ant-col-rtl{right:auto;left:37.5%}.ant-col-lg-offset-9.ant-col-rtl{margin-right:37.5%;margin-left:0}.ant-col-lg-push-10.ant-col-rtl{right:41.66666667%;left:auto}.ant-col-lg-pull-10.ant-col-rtl{right:auto;left:41.66666667%}.ant-col-lg-offset-10.ant-col-rtl{margin-right:41.66666667%;margin-left:0}.ant-col-lg-push-11.ant-col-rtl{right:45.83333333%;left:auto}.ant-col-lg-pull-11.ant-col-rtl{right:auto;left:45.83333333%}.ant-col-lg-offset-11.ant-col-rtl{margin-right:45.83333333%;margin-left:0}.ant-col-lg-push-12.ant-col-rtl{right:50%;left:auto}.ant-col-lg-pull-12.ant-col-rtl{right:auto;left:50%}.ant-col-lg-offset-12.ant-col-rtl{margin-right:50%;margin-left:0}.ant-col-lg-push-13.ant-col-rtl{right:54.16666667%;left:auto}.ant-col-lg-pull-13.ant-col-rtl{right:auto;left:54.16666667%}.ant-col-lg-offset-13.ant-col-rtl{margin-right:54.16666667%;margin-left:0}.ant-col-lg-push-14.ant-col-rtl{right:58.33333333%;left:auto}.ant-col-lg-pull-14.ant-col-rtl{right:auto;left:58.33333333%}.ant-col-lg-offset-14.ant-col-rtl{margin-right:58.33333333%;margin-left:0}.ant-col-lg-push-15.ant-col-rtl{right:62.5%;left:auto}.ant-col-lg-pull-15.ant-col-rtl{right:auto;left:62.5%}.ant-col-lg-offset-15.ant-col-rtl{margin-right:62.5%;margin-left:0}.ant-col-lg-push-16.ant-col-rtl{right:66.66666667%;left:auto}.ant-col-lg-pull-16.ant-col-rtl{right:auto;left:66.66666667%}.ant-col-lg-offset-16.ant-col-rtl{margin-right:66.66666667%;margin-left:0}.ant-col-lg-push-17.ant-col-rtl{right:70.83333333%;left:auto}.ant-col-lg-pull-17.ant-col-rtl{right:auto;left:70.83333333%}.ant-col-lg-offset-17.ant-col-rtl{margin-right:70.83333333%;margin-left:0}.ant-col-lg-push-18.ant-col-rtl{right:75%;left:auto}.ant-col-lg-pull-18.ant-col-rtl{right:auto;left:75%}.ant-col-lg-offset-18.ant-col-rtl{margin-right:75%;margin-left:0}.ant-col-lg-push-19.ant-col-rtl{right:79.16666667%;left:auto}.ant-col-lg-pull-19.ant-col-rtl{right:auto;left:79.16666667%}.ant-col-lg-offset-19.ant-col-rtl{margin-right:79.16666667%;margin-left:0}.ant-col-lg-push-20.ant-col-rtl{right:83.33333333%;left:auto}.ant-col-lg-pull-20.ant-col-rtl{right:auto;left:83.33333333%}.ant-col-lg-offset-20.ant-col-rtl{margin-right:83.33333333%;margin-left:0}.ant-col-lg-push-21.ant-col-rtl{right:87.5%;left:auto}.ant-col-lg-pull-21.ant-col-rtl{right:auto;left:87.5%}.ant-col-lg-offset-21.ant-col-rtl{margin-right:87.5%;margin-left:0}.ant-col-lg-push-22.ant-col-rtl{right:91.66666667%;left:auto}.ant-col-lg-pull-22.ant-col-rtl{right:auto;left:91.66666667%}.ant-col-lg-offset-22.ant-col-rtl{margin-right:91.66666667%;margin-left:0}.ant-col-lg-push-23.ant-col-rtl{right:95.83333333%;left:auto}.ant-col-lg-pull-23.ant-col-rtl{right:auto;left:95.83333333%}.ant-col-lg-offset-23.ant-col-rtl{margin-right:95.83333333%;margin-left:0}.ant-col-lg-push-24.ant-col-rtl{right:100%;left:auto}.ant-col-lg-pull-24.ant-col-rtl{right:auto;left:100%}.ant-col-lg-offset-24.ant-col-rtl{margin-right:100%;margin-left:0}}@media (min-width: 1200px){.ant-col-xl-24{display:block;flex:0 0 100%;max-width:100%}.ant-col-xl-push-24{left:100%}.ant-col-xl-pull-24{right:100%}.ant-col-xl-offset-24{margin-left:100%}.ant-col-xl-order-24{order:24}.ant-col-xl-23{display:block;flex:0 0 95.83333333%;max-width:95.83333333%}.ant-col-xl-push-23{left:95.83333333%}.ant-col-xl-pull-23{right:95.83333333%}.ant-col-xl-offset-23{margin-left:95.83333333%}.ant-col-xl-order-23{order:23}.ant-col-xl-22{display:block;flex:0 0 91.66666667%;max-width:91.66666667%}.ant-col-xl-push-22{left:91.66666667%}.ant-col-xl-pull-22{right:91.66666667%}.ant-col-xl-offset-22{margin-left:91.66666667%}.ant-col-xl-order-22{order:22}.ant-col-xl-21{display:block;flex:0 0 87.5%;max-width:87.5%}.ant-col-xl-push-21{left:87.5%}.ant-col-xl-pull-21{right:87.5%}.ant-col-xl-offset-21{margin-left:87.5%}.ant-col-xl-order-21{order:21}.ant-col-xl-20{display:block;flex:0 0 83.33333333%;max-width:83.33333333%}.ant-col-xl-push-20{left:83.33333333%}.ant-col-xl-pull-20{right:83.33333333%}.ant-col-xl-offset-20{margin-left:83.33333333%}.ant-col-xl-order-20{order:20}.ant-col-xl-19{display:block;flex:0 0 79.16666667%;max-width:79.16666667%}.ant-col-xl-push-19{left:79.16666667%}.ant-col-xl-pull-19{right:79.16666667%}.ant-col-xl-offset-19{margin-left:79.16666667%}.ant-col-xl-order-19{order:19}.ant-col-xl-18{display:block;flex:0 0 75%;max-width:75%}.ant-col-xl-push-18{left:75%}.ant-col-xl-pull-18{right:75%}.ant-col-xl-offset-18{margin-left:75%}.ant-col-xl-order-18{order:18}.ant-col-xl-17{display:block;flex:0 0 70.83333333%;max-width:70.83333333%}.ant-col-xl-push-17{left:70.83333333%}.ant-col-xl-pull-17{right:70.83333333%}.ant-col-xl-offset-17{margin-left:70.83333333%}.ant-col-xl-order-17{order:17}.ant-col-xl-16{display:block;flex:0 0 66.66666667%;max-width:66.66666667%}.ant-col-xl-push-16{left:66.66666667%}.ant-col-xl-pull-16{right:66.66666667%}.ant-col-xl-offset-16{margin-left:66.66666667%}.ant-col-xl-order-16{order:16}.ant-col-xl-15{display:block;flex:0 0 62.5%;max-width:62.5%}.ant-col-xl-push-15{left:62.5%}.ant-col-xl-pull-15{right:62.5%}.ant-col-xl-offset-15{margin-left:62.5%}.ant-col-xl-order-15{order:15}.ant-col-xl-14{display:block;flex:0 0 58.33333333%;max-width:58.33333333%}.ant-col-xl-push-14{left:58.33333333%}.ant-col-xl-pull-14{right:58.33333333%}.ant-col-xl-offset-14{margin-left:58.33333333%}.ant-col-xl-order-14{order:14}.ant-col-xl-13{display:block;flex:0 0 54.16666667%;max-width:54.16666667%}.ant-col-xl-push-13{left:54.16666667%}.ant-col-xl-pull-13{right:54.16666667%}.ant-col-xl-offset-13{margin-left:54.16666667%}.ant-col-xl-order-13{order:13}.ant-col-xl-12{display:block;flex:0 0 50%;max-width:50%}.ant-col-xl-push-12{left:50%}.ant-col-xl-pull-12{right:50%}.ant-col-xl-offset-12{margin-left:50%}.ant-col-xl-order-12{order:12}.ant-col-xl-11{display:block;flex:0 0 45.83333333%;max-width:45.83333333%}.ant-col-xl-push-11{left:45.83333333%}.ant-col-xl-pull-11{right:45.83333333%}.ant-col-xl-offset-11{margin-left:45.83333333%}.ant-col-xl-order-11{order:11}.ant-col-xl-10{display:block;flex:0 0 41.66666667%;max-width:41.66666667%}.ant-col-xl-push-10{left:41.66666667%}.ant-col-xl-pull-10{right:41.66666667%}.ant-col-xl-offset-10{margin-left:41.66666667%}.ant-col-xl-order-10{order:10}.ant-col-xl-9{display:block;flex:0 0 37.5%;max-width:37.5%}.ant-col-xl-push-9{left:37.5%}.ant-col-xl-pull-9{right:37.5%}.ant-col-xl-offset-9{margin-left:37.5%}.ant-col-xl-order-9{order:9}.ant-col-xl-8{display:block;flex:0 0 33.33333333%;max-width:33.33333333%}.ant-col-xl-push-8{left:33.33333333%}.ant-col-xl-pull-8{right:33.33333333%}.ant-col-xl-offset-8{margin-left:33.33333333%}.ant-col-xl-order-8{order:8}.ant-col-xl-7{display:block;flex:0 0 29.16666667%;max-width:29.16666667%}.ant-col-xl-push-7{left:29.16666667%}.ant-col-xl-pull-7{right:29.16666667%}.ant-col-xl-offset-7{margin-left:29.16666667%}.ant-col-xl-order-7{order:7}.ant-col-xl-6{display:block;flex:0 0 25%;max-width:25%}.ant-col-xl-push-6{left:25%}.ant-col-xl-pull-6{right:25%}.ant-col-xl-offset-6{margin-left:25%}.ant-col-xl-order-6{order:6}.ant-col-xl-5{display:block;flex:0 0 20.83333333%;max-width:20.83333333%}.ant-col-xl-push-5{left:20.83333333%}.ant-col-xl-pull-5{right:20.83333333%}.ant-col-xl-offset-5{margin-left:20.83333333%}.ant-col-xl-order-5{order:5}.ant-col-xl-4{display:block;flex:0 0 16.66666667%;max-width:16.66666667%}.ant-col-xl-push-4{left:16.66666667%}.ant-col-xl-pull-4{right:16.66666667%}.ant-col-xl-offset-4{margin-left:16.66666667%}.ant-col-xl-order-4{order:4}.ant-col-xl-3{display:block;flex:0 0 12.5%;max-width:12.5%}.ant-col-xl-push-3{left:12.5%}.ant-col-xl-pull-3{right:12.5%}.ant-col-xl-offset-3{margin-left:12.5%}.ant-col-xl-order-3{order:3}.ant-col-xl-2{display:block;flex:0 0 8.33333333%;max-width:8.33333333%}.ant-col-xl-push-2{left:8.33333333%}.ant-col-xl-pull-2{right:8.33333333%}.ant-col-xl-offset-2{margin-left:8.33333333%}.ant-col-xl-order-2{order:2}.ant-col-xl-1{display:block;flex:0 0 4.16666667%;max-width:4.16666667%}.ant-col-xl-push-1{left:4.16666667%}.ant-col-xl-pull-1{right:4.16666667%}.ant-col-xl-offset-1{margin-left:4.16666667%}.ant-col-xl-order-1{order:1}.ant-col-xl-0{display:none}.ant-col-push-0{left:auto}.ant-col-pull-0{right:auto}.ant-col-xl-push-0{left:auto}.ant-col-xl-pull-0{right:auto}.ant-col-xl-offset-0{margin-left:0}.ant-col-xl-order-0{order:0}.ant-col-push-0.ant-col-rtl{right:auto}.ant-col-pull-0.ant-col-rtl{left:auto}.ant-col-xl-push-0.ant-col-rtl{right:auto}.ant-col-xl-pull-0.ant-col-rtl{left:auto}.ant-col-xl-offset-0.ant-col-rtl{margin-right:0}.ant-col-xl-push-1.ant-col-rtl{right:4.16666667%;left:auto}.ant-col-xl-pull-1.ant-col-rtl{right:auto;left:4.16666667%}.ant-col-xl-offset-1.ant-col-rtl{margin-right:4.16666667%;margin-left:0}.ant-col-xl-push-2.ant-col-rtl{right:8.33333333%;left:auto}.ant-col-xl-pull-2.ant-col-rtl{right:auto;left:8.33333333%}.ant-col-xl-offset-2.ant-col-rtl{margin-right:8.33333333%;margin-left:0}.ant-col-xl-push-3.ant-col-rtl{right:12.5%;left:auto}.ant-col-xl-pull-3.ant-col-rtl{right:auto;left:12.5%}.ant-col-xl-offset-3.ant-col-rtl{margin-right:12.5%;margin-left:0}.ant-col-xl-push-4.ant-col-rtl{right:16.66666667%;left:auto}.ant-col-xl-pull-4.ant-col-rtl{right:auto;left:16.66666667%}.ant-col-xl-offset-4.ant-col-rtl{margin-right:16.66666667%;margin-left:0}.ant-col-xl-push-5.ant-col-rtl{right:20.83333333%;left:auto}.ant-col-xl-pull-5.ant-col-rtl{right:auto;left:20.83333333%}.ant-col-xl-offset-5.ant-col-rtl{margin-right:20.83333333%;margin-left:0}.ant-col-xl-push-6.ant-col-rtl{right:25%;left:auto}.ant-col-xl-pull-6.ant-col-rtl{right:auto;left:25%}.ant-col-xl-offset-6.ant-col-rtl{margin-right:25%;margin-left:0}.ant-col-xl-push-7.ant-col-rtl{right:29.16666667%;left:auto}.ant-col-xl-pull-7.ant-col-rtl{right:auto;left:29.16666667%}.ant-col-xl-offset-7.ant-col-rtl{margin-right:29.16666667%;margin-left:0}.ant-col-xl-push-8.ant-col-rtl{right:33.33333333%;left:auto}.ant-col-xl-pull-8.ant-col-rtl{right:auto;left:33.33333333%}.ant-col-xl-offset-8.ant-col-rtl{margin-right:33.33333333%;margin-left:0}.ant-col-xl-push-9.ant-col-rtl{right:37.5%;left:auto}.ant-col-xl-pull-9.ant-col-rtl{right:auto;left:37.5%}.ant-col-xl-offset-9.ant-col-rtl{margin-right:37.5%;margin-left:0}.ant-col-xl-push-10.ant-col-rtl{right:41.66666667%;left:auto}.ant-col-xl-pull-10.ant-col-rtl{right:auto;left:41.66666667%}.ant-col-xl-offset-10.ant-col-rtl{margin-right:41.66666667%;margin-left:0}.ant-col-xl-push-11.ant-col-rtl{right:45.83333333%;left:auto}.ant-col-xl-pull-11.ant-col-rtl{right:auto;left:45.83333333%}.ant-col-xl-offset-11.ant-col-rtl{margin-right:45.83333333%;margin-left:0}.ant-col-xl-push-12.ant-col-rtl{right:50%;left:auto}.ant-col-xl-pull-12.ant-col-rtl{right:auto;left:50%}.ant-col-xl-offset-12.ant-col-rtl{margin-right:50%;margin-left:0}.ant-col-xl-push-13.ant-col-rtl{right:54.16666667%;left:auto}.ant-col-xl-pull-13.ant-col-rtl{right:auto;left:54.16666667%}.ant-col-xl-offset-13.ant-col-rtl{margin-right:54.16666667%;margin-left:0}.ant-col-xl-push-14.ant-col-rtl{right:58.33333333%;left:auto}.ant-col-xl-pull-14.ant-col-rtl{right:auto;left:58.33333333%}.ant-col-xl-offset-14.ant-col-rtl{margin-right:58.33333333%;margin-left:0}.ant-col-xl-push-15.ant-col-rtl{right:62.5%;left:auto}.ant-col-xl-pull-15.ant-col-rtl{right:auto;left:62.5%}.ant-col-xl-offset-15.ant-col-rtl{margin-right:62.5%;margin-left:0}.ant-col-xl-push-16.ant-col-rtl{right:66.66666667%;left:auto}.ant-col-xl-pull-16.ant-col-rtl{right:auto;left:66.66666667%}.ant-col-xl-offset-16.ant-col-rtl{margin-right:66.66666667%;margin-left:0}.ant-col-xl-push-17.ant-col-rtl{right:70.83333333%;left:auto}.ant-col-xl-pull-17.ant-col-rtl{right:auto;left:70.83333333%}.ant-col-xl-offset-17.ant-col-rtl{margin-right:70.83333333%;margin-left:0}.ant-col-xl-push-18.ant-col-rtl{right:75%;left:auto}.ant-col-xl-pull-18.ant-col-rtl{right:auto;left:75%}.ant-col-xl-offset-18.ant-col-rtl{margin-right:75%;margin-left:0}.ant-col-xl-push-19.ant-col-rtl{right:79.16666667%;left:auto}.ant-col-xl-pull-19.ant-col-rtl{right:auto;left:79.16666667%}.ant-col-xl-offset-19.ant-col-rtl{margin-right:79.16666667%;margin-left:0}.ant-col-xl-push-20.ant-col-rtl{right:83.33333333%;left:auto}.ant-col-xl-pull-20.ant-col-rtl{right:auto;left:83.33333333%}.ant-col-xl-offset-20.ant-col-rtl{margin-right:83.33333333%;margin-left:0}.ant-col-xl-push-21.ant-col-rtl{right:87.5%;left:auto}.ant-col-xl-pull-21.ant-col-rtl{right:auto;left:87.5%}.ant-col-xl-offset-21.ant-col-rtl{margin-right:87.5%;margin-left:0}.ant-col-xl-push-22.ant-col-rtl{right:91.66666667%;left:auto}.ant-col-xl-pull-22.ant-col-rtl{right:auto;left:91.66666667%}.ant-col-xl-offset-22.ant-col-rtl{margin-right:91.66666667%;margin-left:0}.ant-col-xl-push-23.ant-col-rtl{right:95.83333333%;left:auto}.ant-col-xl-pull-23.ant-col-rtl{right:auto;left:95.83333333%}.ant-col-xl-offset-23.ant-col-rtl{margin-right:95.83333333%;margin-left:0}.ant-col-xl-push-24.ant-col-rtl{right:100%;left:auto}.ant-col-xl-pull-24.ant-col-rtl{right:auto;left:100%}.ant-col-xl-offset-24.ant-col-rtl{margin-right:100%;margin-left:0}}@media (min-width: 1600px){.ant-col-xxl-24{display:block;flex:0 0 100%;max-width:100%}.ant-col-xxl-push-24{left:100%}.ant-col-xxl-pull-24{right:100%}.ant-col-xxl-offset-24{margin-left:100%}.ant-col-xxl-order-24{order:24}.ant-col-xxl-23{display:block;flex:0 0 95.83333333%;max-width:95.83333333%}.ant-col-xxl-push-23{left:95.83333333%}.ant-col-xxl-pull-23{right:95.83333333%}.ant-col-xxl-offset-23{margin-left:95.83333333%}.ant-col-xxl-order-23{order:23}.ant-col-xxl-22{display:block;flex:0 0 91.66666667%;max-width:91.66666667%}.ant-col-xxl-push-22{left:91.66666667%}.ant-col-xxl-pull-22{right:91.66666667%}.ant-col-xxl-offset-22{margin-left:91.66666667%}.ant-col-xxl-order-22{order:22}.ant-col-xxl-21{display:block;flex:0 0 87.5%;max-width:87.5%}.ant-col-xxl-push-21{left:87.5%}.ant-col-xxl-pull-21{right:87.5%}.ant-col-xxl-offset-21{margin-left:87.5%}.ant-col-xxl-order-21{order:21}.ant-col-xxl-20{display:block;flex:0 0 83.33333333%;max-width:83.33333333%}.ant-col-xxl-push-20{left:83.33333333%}.ant-col-xxl-pull-20{right:83.33333333%}.ant-col-xxl-offset-20{margin-left:83.33333333%}.ant-col-xxl-order-20{order:20}.ant-col-xxl-19{display:block;flex:0 0 79.16666667%;max-width:79.16666667%}.ant-col-xxl-push-19{left:79.16666667%}.ant-col-xxl-pull-19{right:79.16666667%}.ant-col-xxl-offset-19{margin-left:79.16666667%}.ant-col-xxl-order-19{order:19}.ant-col-xxl-18{display:block;flex:0 0 75%;max-width:75%}.ant-col-xxl-push-18{left:75%}.ant-col-xxl-pull-18{right:75%}.ant-col-xxl-offset-18{margin-left:75%}.ant-col-xxl-order-18{order:18}.ant-col-xxl-17{display:block;flex:0 0 70.83333333%;max-width:70.83333333%}.ant-col-xxl-push-17{left:70.83333333%}.ant-col-xxl-pull-17{right:70.83333333%}.ant-col-xxl-offset-17{margin-left:70.83333333%}.ant-col-xxl-order-17{order:17}.ant-col-xxl-16{display:block;flex:0 0 66.66666667%;max-width:66.66666667%}.ant-col-xxl-push-16{left:66.66666667%}.ant-col-xxl-pull-16{right:66.66666667%}.ant-col-xxl-offset-16{margin-left:66.66666667%}.ant-col-xxl-order-16{order:16}.ant-col-xxl-15{display:block;flex:0 0 62.5%;max-width:62.5%}.ant-col-xxl-push-15{left:62.5%}.ant-col-xxl-pull-15{right:62.5%}.ant-col-xxl-offset-15{margin-left:62.5%}.ant-col-xxl-order-15{order:15}.ant-col-xxl-14{display:block;flex:0 0 58.33333333%;max-width:58.33333333%}.ant-col-xxl-push-14{left:58.33333333%}.ant-col-xxl-pull-14{right:58.33333333%}.ant-col-xxl-offset-14{margin-left:58.33333333%}.ant-col-xxl-order-14{order:14}.ant-col-xxl-13{display:block;flex:0 0 54.16666667%;max-width:54.16666667%}.ant-col-xxl-push-13{left:54.16666667%}.ant-col-xxl-pull-13{right:54.16666667%}.ant-col-xxl-offset-13{margin-left:54.16666667%}.ant-col-xxl-order-13{order:13}.ant-col-xxl-12{display:block;flex:0 0 50%;max-width:50%}.ant-col-xxl-push-12{left:50%}.ant-col-xxl-pull-12{right:50%}.ant-col-xxl-offset-12{margin-left:50%}.ant-col-xxl-order-12{order:12}.ant-col-xxl-11{display:block;flex:0 0 45.83333333%;max-width:45.83333333%}.ant-col-xxl-push-11{left:45.83333333%}.ant-col-xxl-pull-11{right:45.83333333%}.ant-col-xxl-offset-11{margin-left:45.83333333%}.ant-col-xxl-order-11{order:11}.ant-col-xxl-10{display:block;flex:0 0 41.66666667%;max-width:41.66666667%}.ant-col-xxl-push-10{left:41.66666667%}.ant-col-xxl-pull-10{right:41.66666667%}.ant-col-xxl-offset-10{margin-left:41.66666667%}.ant-col-xxl-order-10{order:10}.ant-col-xxl-9{display:block;flex:0 0 37.5%;max-width:37.5%}.ant-col-xxl-push-9{left:37.5%}.ant-col-xxl-pull-9{right:37.5%}.ant-col-xxl-offset-9{margin-left:37.5%}.ant-col-xxl-order-9{order:9}.ant-col-xxl-8{display:block;flex:0 0 33.33333333%;max-width:33.33333333%}.ant-col-xxl-push-8{left:33.33333333%}.ant-col-xxl-pull-8{right:33.33333333%}.ant-col-xxl-offset-8{margin-left:33.33333333%}.ant-col-xxl-order-8{order:8}.ant-col-xxl-7{display:block;flex:0 0 29.16666667%;max-width:29.16666667%}.ant-col-xxl-push-7{left:29.16666667%}.ant-col-xxl-pull-7{right:29.16666667%}.ant-col-xxl-offset-7{margin-left:29.16666667%}.ant-col-xxl-order-7{order:7}.ant-col-xxl-6{display:block;flex:0 0 25%;max-width:25%}.ant-col-xxl-push-6{left:25%}.ant-col-xxl-pull-6{right:25%}.ant-col-xxl-offset-6{margin-left:25%}.ant-col-xxl-order-6{order:6}.ant-col-xxl-5{display:block;flex:0 0 20.83333333%;max-width:20.83333333%}.ant-col-xxl-push-5{left:20.83333333%}.ant-col-xxl-pull-5{right:20.83333333%}.ant-col-xxl-offset-5{margin-left:20.83333333%}.ant-col-xxl-order-5{order:5}.ant-col-xxl-4{display:block;flex:0 0 16.66666667%;max-width:16.66666667%}.ant-col-xxl-push-4{left:16.66666667%}.ant-col-xxl-pull-4{right:16.66666667%}.ant-col-xxl-offset-4{margin-left:16.66666667%}.ant-col-xxl-order-4{order:4}.ant-col-xxl-3{display:block;flex:0 0 12.5%;max-width:12.5%}.ant-col-xxl-push-3{left:12.5%}.ant-col-xxl-pull-3{right:12.5%}.ant-col-xxl-offset-3{margin-left:12.5%}.ant-col-xxl-order-3{order:3}.ant-col-xxl-2{display:block;flex:0 0 8.33333333%;max-width:8.33333333%}.ant-col-xxl-push-2{left:8.33333333%}.ant-col-xxl-pull-2{right:8.33333333%}.ant-col-xxl-offset-2{margin-left:8.33333333%}.ant-col-xxl-order-2{order:2}.ant-col-xxl-1{display:block;flex:0 0 4.16666667%;max-width:4.16666667%}.ant-col-xxl-push-1{left:4.16666667%}.ant-col-xxl-pull-1{right:4.16666667%}.ant-col-xxl-offset-1{margin-left:4.16666667%}.ant-col-xxl-order-1{order:1}.ant-col-xxl-0{display:none}.ant-col-push-0{left:auto}.ant-col-pull-0{right:auto}.ant-col-xxl-push-0{left:auto}.ant-col-xxl-pull-0{right:auto}.ant-col-xxl-offset-0{margin-left:0}.ant-col-xxl-order-0{order:0}.ant-col-push-0.ant-col-rtl{right:auto}.ant-col-pull-0.ant-col-rtl{left:auto}.ant-col-xxl-push-0.ant-col-rtl{right:auto}.ant-col-xxl-pull-0.ant-col-rtl{left:auto}.ant-col-xxl-offset-0.ant-col-rtl{margin-right:0}.ant-col-xxl-push-1.ant-col-rtl{right:4.16666667%;left:auto}.ant-col-xxl-pull-1.ant-col-rtl{right:auto;left:4.16666667%}.ant-col-xxl-offset-1.ant-col-rtl{margin-right:4.16666667%;margin-left:0}.ant-col-xxl-push-2.ant-col-rtl{right:8.33333333%;left:auto}.ant-col-xxl-pull-2.ant-col-rtl{right:auto;left:8.33333333%}.ant-col-xxl-offset-2.ant-col-rtl{margin-right:8.33333333%;margin-left:0}.ant-col-xxl-push-3.ant-col-rtl{right:12.5%;left:auto}.ant-col-xxl-pull-3.ant-col-rtl{right:auto;left:12.5%}.ant-col-xxl-offset-3.ant-col-rtl{margin-right:12.5%;margin-left:0}.ant-col-xxl-push-4.ant-col-rtl{right:16.66666667%;left:auto}.ant-col-xxl-pull-4.ant-col-rtl{right:auto;left:16.66666667%}.ant-col-xxl-offset-4.ant-col-rtl{margin-right:16.66666667%;margin-left:0}.ant-col-xxl-push-5.ant-col-rtl{right:20.83333333%;left:auto}.ant-col-xxl-pull-5.ant-col-rtl{right:auto;left:20.83333333%}.ant-col-xxl-offset-5.ant-col-rtl{margin-right:20.83333333%;margin-left:0}.ant-col-xxl-push-6.ant-col-rtl{right:25%;left:auto}.ant-col-xxl-pull-6.ant-col-rtl{right:auto;left:25%}.ant-col-xxl-offset-6.ant-col-rtl{margin-right:25%;margin-left:0}.ant-col-xxl-push-7.ant-col-rtl{right:29.16666667%;left:auto}.ant-col-xxl-pull-7.ant-col-rtl{right:auto;left:29.16666667%}.ant-col-xxl-offset-7.ant-col-rtl{margin-right:29.16666667%;margin-left:0}.ant-col-xxl-push-8.ant-col-rtl{right:33.33333333%;left:auto}.ant-col-xxl-pull-8.ant-col-rtl{right:auto;left:33.33333333%}.ant-col-xxl-offset-8.ant-col-rtl{margin-right:33.33333333%;margin-left:0}.ant-col-xxl-push-9.ant-col-rtl{right:37.5%;left:auto}.ant-col-xxl-pull-9.ant-col-rtl{right:auto;left:37.5%}.ant-col-xxl-offset-9.ant-col-rtl{margin-right:37.5%;margin-left:0}.ant-col-xxl-push-10.ant-col-rtl{right:41.66666667%;left:auto}.ant-col-xxl-pull-10.ant-col-rtl{right:auto;left:41.66666667%}.ant-col-xxl-offset-10.ant-col-rtl{margin-right:41.66666667%;margin-left:0}.ant-col-xxl-push-11.ant-col-rtl{right:45.83333333%;left:auto}.ant-col-xxl-pull-11.ant-col-rtl{right:auto;left:45.83333333%}.ant-col-xxl-offset-11.ant-col-rtl{margin-right:45.83333333%;margin-left:0}.ant-col-xxl-push-12.ant-col-rtl{right:50%;left:auto}.ant-col-xxl-pull-12.ant-col-rtl{right:auto;left:50%}.ant-col-xxl-offset-12.ant-col-rtl{margin-right:50%;margin-left:0}.ant-col-xxl-push-13.ant-col-rtl{right:54.16666667%;left:auto}.ant-col-xxl-pull-13.ant-col-rtl{right:auto;left:54.16666667%}.ant-col-xxl-offset-13.ant-col-rtl{margin-right:54.16666667%;margin-left:0}.ant-col-xxl-push-14.ant-col-rtl{right:58.33333333%;left:auto}.ant-col-xxl-pull-14.ant-col-rtl{right:auto;left:58.33333333%}.ant-col-xxl-offset-14.ant-col-rtl{margin-right:58.33333333%;margin-left:0}.ant-col-xxl-push-15.ant-col-rtl{right:62.5%;left:auto}.ant-col-xxl-pull-15.ant-col-rtl{right:auto;left:62.5%}.ant-col-xxl-offset-15.ant-col-rtl{margin-right:62.5%;margin-left:0}.ant-col-xxl-push-16.ant-col-rtl{right:66.66666667%;left:auto}.ant-col-xxl-pull-16.ant-col-rtl{right:auto;left:66.66666667%}.ant-col-xxl-offset-16.ant-col-rtl{margin-right:66.66666667%;margin-left:0}.ant-col-xxl-push-17.ant-col-rtl{right:70.83333333%;left:auto}.ant-col-xxl-pull-17.ant-col-rtl{right:auto;left:70.83333333%}.ant-col-xxl-offset-17.ant-col-rtl{margin-right:70.83333333%;margin-left:0}.ant-col-xxl-push-18.ant-col-rtl{right:75%;left:auto}.ant-col-xxl-pull-18.ant-col-rtl{right:auto;left:75%}.ant-col-xxl-offset-18.ant-col-rtl{margin-right:75%;margin-left:0}.ant-col-xxl-push-19.ant-col-rtl{right:79.16666667%;left:auto}.ant-col-xxl-pull-19.ant-col-rtl{right:auto;left:79.16666667%}.ant-col-xxl-offset-19.ant-col-rtl{margin-right:79.16666667%;margin-left:0}.ant-col-xxl-push-20.ant-col-rtl{right:83.33333333%;left:auto}.ant-col-xxl-pull-20.ant-col-rtl{right:auto;left:83.33333333%}.ant-col-xxl-offset-20.ant-col-rtl{margin-right:83.33333333%;margin-left:0}.ant-col-xxl-push-21.ant-col-rtl{right:87.5%;left:auto}.ant-col-xxl-pull-21.ant-col-rtl{right:auto;left:87.5%}.ant-col-xxl-offset-21.ant-col-rtl{margin-right:87.5%;margin-left:0}.ant-col-xxl-push-22.ant-col-rtl{right:91.66666667%;left:auto}.ant-col-xxl-pull-22.ant-col-rtl{right:auto;left:91.66666667%}.ant-col-xxl-offset-22.ant-col-rtl{margin-right:91.66666667%;margin-left:0}.ant-col-xxl-push-23.ant-col-rtl{right:95.83333333%;left:auto}.ant-col-xxl-pull-23.ant-col-rtl{right:auto;left:95.83333333%}.ant-col-xxl-offset-23.ant-col-rtl{margin-right:95.83333333%;margin-left:0}.ant-col-xxl-push-24.ant-col-rtl{right:100%;left:auto}.ant-col-xxl-pull-24.ant-col-rtl{right:auto;left:100%}.ant-col-xxl-offset-24.ant-col-rtl{margin-right:100%;margin-left:0}}.ant-row-rtl{direction:rtl}.ant-carousel{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum"}.ant-carousel .slick-slider{position:relative;display:block;box-sizing:border-box;-webkit-touch-callout:none;touch-action:pan-y;-webkit-tap-highlight-color:transparent}.ant-carousel .slick-list{position:relative;display:block;margin:0;padding:0;overflow:hidden}.ant-carousel .slick-list:focus{outline:none}.ant-carousel .slick-list.dragging{cursor:pointer}.ant-carousel .slick-list .slick-slide{pointer-events:none}.ant-carousel .slick-list .slick-slide input.ant-radio-input,.ant-carousel .slick-list .slick-slide input.ant-checkbox-input{visibility:hidden}.ant-carousel .slick-list .slick-slide.slick-active{pointer-events:auto}.ant-carousel .slick-list .slick-slide.slick-active input.ant-radio-input,.ant-carousel .slick-list .slick-slide.slick-active input.ant-checkbox-input{visibility:visible}.ant-carousel .slick-slider .slick-track,.ant-carousel .slick-slider .slick-list{transform:translateZ(0)}.ant-carousel .slick-track{position:relative;top:0;left:0;display:block}.ant-carousel .slick-track:before,.ant-carousel .slick-track:after{display:table;content:""}.ant-carousel .slick-track:after{clear:both}.slick-loading .ant-carousel .slick-track{visibility:hidden}.ant-carousel .slick-slide{display:none;float:left;height:100%;min-height:1px}[dir=rtl] .ant-carousel .slick-slide{float:right}.ant-carousel .slick-slide img{display:block}.ant-carousel .slick-slide.slick-loading img{display:none}.ant-carousel .slick-slide.dragging img{pointer-events:none}.ant-carousel .slick-initialized .slick-slide{display:block}.ant-carousel .slick-loading .slick-slide{visibility:hidden}.ant-carousel .slick-vertical .slick-slide{display:block;height:auto;border:1px solid transparent}.ant-carousel .slick-arrow.slick-hidden{display:none}.ant-carousel .slick-prev,.ant-carousel .slick-next{position:absolute;top:50%;display:block;width:20px;height:20px;margin-top:-10px;padding:0;color:transparent;font-size:0;line-height:0;background:transparent;border:0;outline:none;cursor:pointer}.ant-carousel .slick-prev:hover,.ant-carousel .slick-next:hover,.ant-carousel .slick-prev:focus,.ant-carousel .slick-next:focus{color:transparent;background:transparent;outline:none}.ant-carousel .slick-prev:hover:before,.ant-carousel .slick-next:hover:before,.ant-carousel .slick-prev:focus:before,.ant-carousel .slick-next:focus:before{opacity:1}.ant-carousel .slick-prev.slick-disabled:before,.ant-carousel .slick-next.slick-disabled:before{opacity:.25}.ant-carousel .slick-prev{left:-25px}.ant-carousel .slick-prev:before{content:"\2190"}.ant-carousel .slick-next{right:-25px}.ant-carousel .slick-next:before{content:"\2192"}.ant-carousel .slick-dots{position:absolute;display:block;width:100%;height:3px;margin:0;padding:0;text-align:center;list-style:none}.ant-carousel .slick-dots-bottom{bottom:12px}.ant-carousel .slick-dots-top{top:12px}.ant-carousel .slick-dots li{position:relative;display:inline-block;margin:0 2px;padding:0;text-align:center;vertical-align:top}.ant-carousel .slick-dots li button{display:block;width:16px;height:3px;padding:0;color:transparent;font-size:0;background:#fff;border:0;border-radius:1px;outline:none;cursor:pointer;opacity:.3;transition:all .5s}.ant-carousel .slick-dots li button:hover,.ant-carousel .slick-dots li button:focus{opacity:.75}.ant-carousel .slick-dots li.slick-active button{width:24px;background:#fff;opacity:1}.ant-carousel .slick-dots li.slick-active button:hover,.ant-carousel .slick-dots li.slick-active button:focus{opacity:1}.ant-carousel-vertical .slick-dots{top:50%;bottom:auto;width:3px;height:auto;transform:translateY(-50%)}.ant-carousel-vertical .slick-dots-left{left:12px}.ant-carousel-vertical .slick-dots-right{right:12px}.ant-carousel-vertical .slick-dots li{margin:0 2px;vertical-align:baseline}.ant-carousel-vertical .slick-dots li button{width:3px;height:16px}.ant-carousel-vertical .slick-dots li.slick-active button{width:3px;height:24px}.ant-cascader{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum"}.ant-cascader-input.ant-input{position:static;width:100%;padding-right:24px;background-color:transparent!important;cursor:pointer}.ant-cascader-picker-show-search .ant-cascader-input.ant-input{position:relative}.ant-cascader-picker{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;background-color:#fff;border-radius:2px;outline:0;cursor:pointer;transition:color .3s}.ant-cascader-picker-with-value .ant-cascader-picker-label{color:transparent}.ant-cascader-picker-disabled{color:#00000040;background:#f5f5f5;cursor:not-allowed}.ant-cascader-picker-disabled .ant-cascader-input{cursor:not-allowed}.ant-cascader-picker:focus .ant-cascader-input{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-cascader-picker-show-search.ant-cascader-picker-focused{color:#00000040}.ant-cascader-picker-label{position:absolute;top:50%;left:0;width:100%;height:20px;margin-top:-10px;padding:0 20px 0 12px;overflow:hidden;line-height:20px;white-space:nowrap;text-overflow:ellipsis}.ant-cascader-picker-clear{position:absolute;top:50%;right:12px;z-index:2;width:12px;height:12px;margin-top:-6px;color:#00000040;font-size:12px;line-height:12px;background:#fff;cursor:pointer;opacity:0;transition:color .3s ease,opacity .15s ease}.ant-cascader-picker-clear:hover{color:#00000073}.ant-cascader-picker:hover .ant-cascader-picker-clear{opacity:1}.ant-cascader-picker-arrow{position:absolute;top:50%;right:12px;z-index:1;width:12px;height:12px;margin-top:-6px;color:#00000040;font-size:12px;line-height:12px;transition:transform .2s}.ant-cascader-picker-arrow.ant-cascader-picker-arrow-expand{transform:rotate(180deg)}.ant-cascader-picker-label:hover+.ant-cascader-input{border-color:#40a9ff;border-right-width:1px!important}.ant-cascader-picker-small .ant-cascader-picker-clear,.ant-cascader-picker-small .ant-cascader-picker-arrow{right:8px}.ant-cascader-menus{position:absolute;z-index:1050;font-size:14px;white-space:nowrap;background:#fff;border-radius:2px;box-shadow:0 2px 8px #00000026}.ant-cascader-menus ul,.ant-cascader-menus ol{margin:0;list-style:none}.ant-cascader-menus-empty,.ant-cascader-menus-hidden{display:none}.ant-cascader-menus.slide-up-enter.slide-up-enter-active.ant-cascader-menus-placement-bottomLeft,.ant-cascader-menus.slide-up-appear.slide-up-appear-active.ant-cascader-menus-placement-bottomLeft{-webkit-animation-name:antSlideUpIn;animation-name:antSlideUpIn}.ant-cascader-menus.slide-up-enter.slide-up-enter-active.ant-cascader-menus-placement-topLeft,.ant-cascader-menus.slide-up-appear.slide-up-appear-active.ant-cascader-menus-placement-topLeft{-webkit-animation-name:antSlideDownIn;animation-name:antSlideDownIn}.ant-cascader-menus.slide-up-leave.slide-up-leave-active.ant-cascader-menus-placement-bottomLeft{-webkit-animation-name:antSlideUpOut;animation-name:antSlideUpOut}.ant-cascader-menus.slide-up-leave.slide-up-leave-active.ant-cascader-menus-placement-topLeft{-webkit-animation-name:antSlideDownOut;animation-name:antSlideDownOut}.ant-cascader-menu{display:inline-block;min-width:111px;height:180px;margin:0;padding:4px 0;overflow:auto;vertical-align:top;list-style:none;border-right:1px solid #f0f0f0;-ms-overflow-style:-ms-autohiding-scrollbar}.ant-cascader-menu:first-child{border-radius:2px 0 0 2px}.ant-cascader-menu:last-child{margin-right:-1px;border-right-color:transparent;border-radius:0 2px 2px 0}.ant-cascader-menu:only-child{border-radius:2px}.ant-cascader-menu-item{padding:5px 12px;line-height:22px;white-space:nowrap;cursor:pointer;transition:all .3s}.ant-cascader-menu-item:hover{background:#f5f5f5}.ant-cascader-menu-item-disabled{color:#00000040;cursor:not-allowed}.ant-cascader-menu-item-disabled:hover{background:transparent}.ant-cascader-menu-item-active:not(.ant-cascader-menu-item-disabled),.ant-cascader-menu-item-active:not(.ant-cascader-menu-item-disabled):hover{font-weight:600;background-color:#e6f7ff}.ant-cascader-menu-item-expand{position:relative;padding-right:24px}.ant-cascader-menu-item-expand .ant-cascader-menu-item-expand-icon,.ant-cascader-menu-item-loading-icon{display:inline-block;font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0);position:absolute;right:12px;color:#00000073}:root .ant-cascader-menu-item-expand .ant-cascader-menu-item-expand-icon,:root .ant-cascader-menu-item-loading-icon{font-size:12px}.ant-cascader-menu-item-disabled.ant-cascader-menu-item-expand .ant-cascader-menu-item-expand-icon,.ant-cascader-menu-item-disabled.ant-cascader-menu-item-loading-icon{color:#00000040}.ant-cascader-menu-item .ant-cascader-menu-item-keyword{color:#ff4d4f}@-webkit-keyframes antCheckboxEffect{0%{transform:scale(1);opacity:.5}to{transform:scale(1.6);opacity:0}}@keyframes antCheckboxEffect{0%{transform:scale(1);opacity:.5}to{transform:scale(1.6);opacity:0}}.ant-checkbox{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;top:-.09em;display:inline-block;line-height:1;white-space:nowrap;vertical-align:middle;outline:none;cursor:pointer}.ant-checkbox-wrapper:hover .ant-checkbox-inner,.ant-checkbox:hover .ant-checkbox-inner,.ant-checkbox-input:focus+.ant-checkbox-inner{border-color:#1890ff}.ant-checkbox-checked:after{position:absolute;top:0;left:0;width:100%;height:100%;border:1px solid #1890ff;border-radius:2px;visibility:hidden;-webkit-animation:antCheckboxEffect .36s ease-in-out;animation:antCheckboxEffect .36s ease-in-out;-webkit-animation-fill-mode:backwards;animation-fill-mode:backwards;content:""}.ant-checkbox:hover:after,.ant-checkbox-wrapper:hover .ant-checkbox:after{visibility:visible}.ant-checkbox-inner{position:relative;top:0;left:0;display:block;width:16px;height:16px;background-color:#fff;border:1px solid #d9d9d9;border-radius:2px;border-collapse:separate;transition:all .3s}.ant-checkbox-inner:after{position:absolute;top:50%;left:22%;display:table;width:5.71428571px;height:9.14285714px;border:2px solid #fff;border-top:0;border-left:0;transform:rotate(45deg) scale(0) translate(-50%,-50%);opacity:0;transition:all .1s cubic-bezier(.71,-.46,.88,.6),opacity .1s;content:" "}.ant-checkbox-input{position:absolute;top:0;right:0;bottom:0;left:0;z-index:1;width:100%;height:100%;cursor:pointer;opacity:0}.ant-checkbox-checked .ant-checkbox-inner:after{position:absolute;display:table;border:2px solid #fff;border-top:0;border-left:0;transform:rotate(45deg) scale(1) translate(-50%,-50%);opacity:1;transition:all .2s cubic-bezier(.12,.4,.29,1.46) .1s;content:" "}.ant-checkbox-checked .ant-checkbox-inner{background-color:#1890ff;border-color:#1890ff}.ant-checkbox-disabled{cursor:not-allowed}.ant-checkbox-disabled.ant-checkbox-checked .ant-checkbox-inner:after{border-color:#00000040;-webkit-animation-name:none;animation-name:none}.ant-checkbox-disabled .ant-checkbox-input{cursor:not-allowed}.ant-checkbox-disabled .ant-checkbox-inner{background-color:#f5f5f5;border-color:#d9d9d9!important}.ant-checkbox-disabled .ant-checkbox-inner:after{border-color:#f5f5f5;border-collapse:separate;-webkit-animation-name:none;animation-name:none}.ant-checkbox-disabled+span{color:#00000040;cursor:not-allowed}.ant-checkbox-disabled:hover:after,.ant-checkbox-wrapper:hover .ant-checkbox-disabled:after{visibility:hidden}.ant-checkbox-wrapper{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";display:inline-block;line-height:unset;cursor:pointer}.ant-checkbox-wrapper.ant-checkbox-wrapper-disabled{cursor:not-allowed}.ant-checkbox-wrapper+.ant-checkbox-wrapper{margin-left:8px}.ant-checkbox+span{padding-right:8px;padding-left:8px}.ant-checkbox-group{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";display:inline-block}.ant-checkbox-group-item{display:inline-block;margin-right:8px}.ant-checkbox-group-item:last-child{margin-right:0}.ant-checkbox-group-item+.ant-checkbox-group-item{margin-left:0}.ant-checkbox-indeterminate .ant-checkbox-inner{background-color:#fff;border-color:#d9d9d9}.ant-checkbox-indeterminate .ant-checkbox-inner:after{top:50%;left:50%;width:8px;height:8px;background-color:#1890ff;border:0;transform:translate(-50%,-50%) scale(1);opacity:1;content:" "}.ant-checkbox-indeterminate.ant-checkbox-disabled .ant-checkbox-inner:after{background-color:#00000040;border-color:#00000040}.ant-collapse{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";background-color:#fafafa;border:1px solid #d9d9d9;border-bottom:0;border-radius:2px}.ant-collapse>.ant-collapse-item{border-bottom:1px solid #d9d9d9}.ant-collapse>.ant-collapse-item:last-child,.ant-collapse>.ant-collapse-item:last-child>.ant-collapse-header{border-radius:0 0 2px 2px}.ant-collapse>.ant-collapse-item>.ant-collapse-header{position:relative;padding:12px 16px 12px 40px;color:#000000d9;line-height:22px;cursor:pointer;transition:all .3s}.ant-collapse>.ant-collapse-item>.ant-collapse-header .ant-collapse-arrow{color:inherit;font-style:normal;line-height:0;text-align:center;text-transform:none;vertical-align:-.125em;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;position:absolute;top:50%;left:16px;display:inline-block;font-size:12px;transform:translateY(-50%)}.ant-collapse>.ant-collapse-item>.ant-collapse-header .ant-collapse-arrow>*{line-height:1}.ant-collapse>.ant-collapse-item>.ant-collapse-header .ant-collapse-arrow svg{display:inline-block}.ant-collapse>.ant-collapse-item>.ant-collapse-header .ant-collapse-arrow:before{display:none}.ant-collapse>.ant-collapse-item>.ant-collapse-header .ant-collapse-arrow .ant-collapse>.ant-collapse-item>.ant-collapse-header .ant-collapse-arrow-icon{display:block}.ant-collapse>.ant-collapse-item>.ant-collapse-header .ant-collapse-arrow svg{transition:transform .24s}.ant-collapse>.ant-collapse-item>.ant-collapse-header .ant-collapse-extra{float:right}.ant-collapse>.ant-collapse-item>.ant-collapse-header:focus{outline:none}.ant-collapse>.ant-collapse-item.ant-collapse-no-arrow>.ant-collapse-header{padding-left:12px}.ant-collapse-icon-position-right>.ant-collapse-item>.ant-collapse-header{padding:12px 40px 12px 16px}.ant-collapse-icon-position-right>.ant-collapse-item>.ant-collapse-header .ant-collapse-arrow{right:16px;left:auto}.ant-collapse-anim-active{transition:height .2s cubic-bezier(.215,.61,.355,1)}.ant-collapse-content{overflow:hidden;color:#000000d9;background-color:#fff;border-top:1px solid #d9d9d9}.ant-collapse-content>.ant-collapse-content-box{padding:16px}.ant-collapse-content-inactive{display:none}.ant-collapse-item:last-child>.ant-collapse-content{border-radius:0 0 2px 2px}.ant-collapse-borderless{background-color:#fafafa;border:0}.ant-collapse-borderless>.ant-collapse-item{border-bottom:1px solid #d9d9d9}.ant-collapse-borderless>.ant-collapse-item:last-child,.ant-collapse-borderless>.ant-collapse-item:last-child .ant-collapse-header{border-radius:0}.ant-collapse-borderless>.ant-collapse-item>.ant-collapse-content{background-color:transparent;border-top:0}.ant-collapse-borderless>.ant-collapse-item>.ant-collapse-content>.ant-collapse-content-box{padding-top:4px}.ant-collapse .ant-collapse-item-disabled>.ant-collapse-header,.ant-collapse .ant-collapse-item-disabled>.ant-collapse-header>.arrow{color:#00000040;cursor:not-allowed}.ant-color-picker{box-sizing:border-box;margin:0;padding:0;color:#000000a6;font-size:14px;font-variant:tabular-nums;line-height:1.5;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;outline:none;cursor:pointer;transition:opacity .3s;min-width:55px}.ant-color-picker .pickr{display:inline-block}.ant-color-picker .pickr .pcr-button{width:18px;height:18px;margin-left:7px}.ant-color-picker .pickr .pcr-button:focus{box-shadow:none}.ant-color-picker.ant-color-picker-disabled{cursor:not-allowed}.ant-color-picker.ant-color-picker-disabled .ant-color-picker-selection{background:#f5f5f5;box-shadow:none;border:1px solid #d9d9d9}.ant-color-picker.ant-color-picker-disabled .ant-color-picker-selection:hover,.ant-color-picker.ant-color-picker-disabled .ant-color-picker-selection:focus,.ant-color-picker.ant-color-picker-disabled .ant-color-picker-selection:active{border:1px solid #d9d9d9;box-shadow:none}.ant-color-picker.ant-color-picker-disabled.ant-color-picker-open .ant-color-picker-icon svg{transform:none}.ant-color-picker-open .ant-color-picker-icon svg{transform:rotate(180deg)}.ant-color-picker-open .ant-color-picker-selection{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-color-picker-selection{display:block;box-sizing:border-box;background-color:#fff;border:1px solid #d9d9d9;border-top-width:1.02px;border-radius:2px;outline:none;transition:all .3s cubic-bezier(.645,.045,.355,1);-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;position:relative;height:32px;cursor:inherit}.ant-color-picker-selection:hover{border-color:#40a9ff;border-right-width:1px!important}.ant-color-picker-icon{display:inline-block;color:inherit;font-style:normal;line-height:0;text-align:center;text-transform:none;vertical-align:-.125em;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;position:absolute;top:50%;right:8px;margin-top:-6px;color:#00000040;font-size:12px;line-height:1;transform-origin:50% 50%}.ant-color-picker-icon>*{line-height:1}.ant-color-picker-icon svg{display:inline-block}.ant-color-picker-icon:before{display:none}.ant-color-picker-icon .ant-color-picker-icon-icon{display:block}.ant-color-picker-icon svg{transition:transform .3s}.ant-color-picker-lg{font-size:16px}.ant-color-picker-lg .ant-color-picker-selection{line-height:28px;height:40px}.ant-color-picker-lg .ant-color-picker-icon{top:20px}.ant-color-picker-sm .ant-color-picker-selection{line-height:12px;height:24px}.ant-color-picker-sm .pickr .pcr-button{width:14px;height:14px}.ant-color-picker-sm .ant-color-picker-icon{right:10px;top:12px;font-size:10px}.ant-comment{position:relative;background-color:inherit}.ant-comment-inner{display:flex;padding:16px 0}.ant-comment-avatar{position:relative;flex-shrink:0;margin-right:12px;cursor:pointer}.ant-comment-avatar img{width:32px;height:32px;border-radius:50%}.ant-comment-content{position:relative;flex:1 1 auto;min-width:1px;font-size:14px;word-wrap:break-word}.ant-comment-content-author{display:flex;flex-wrap:wrap;justify-content:flex-start;margin-bottom:4px;font-size:14px}.ant-comment-content-author>a,.ant-comment-content-author>span{padding-right:8px;font-size:12px;line-height:18px}.ant-comment-content-author-name{color:#00000073;font-size:14px;transition:color .3s}.ant-comment-content-author-name>*{color:#00000073}.ant-comment-content-author-name>*:hover{color:#00000073}.ant-comment-content-author-time{color:#ccc;white-space:nowrap;cursor:auto}.ant-comment-content-detail p{margin-bottom:inherit;white-space:pre-wrap}.ant-comment-actions{margin-top:12px;margin-bottom:inherit;padding-left:0}.ant-comment-actions>li{display:inline-block;color:#00000073}.ant-comment-actions>li>span{margin-right:10px;color:#00000073;font-size:12px;cursor:pointer;transition:color .3s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-comment-actions>li>span:hover{color:#595959}.ant-comment-nested{margin-left:44px}.ant-comment-rtl{direction:rtl}.ant-comment-rtl .ant-comment-avatar{margin-right:0;margin-left:12px}.ant-comment-rtl .ant-comment-content-author>a,.ant-comment-rtl .ant-comment-content-author>span{padding-right:0;padding-left:8px}.ant-comment-rtl .ant-comment-actions{padding-right:0}.ant-comment-rtl .ant-comment-actions>li>span{margin-right:0;margin-left:10px}.ant-comment-rtl .ant-comment-nested{margin-right:44px;margin-left:0}.ant-calendar-picker-container{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:absolute;z-index:1050;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji"}.ant-calendar-picker-container.slide-up-enter.slide-up-enter-active.ant-calendar-picker-container-placement-topLeft,.ant-calendar-picker-container.slide-up-enter.slide-up-enter-active.ant-calendar-picker-container-placement-topRight,.ant-calendar-picker-container.slide-up-appear.slide-up-appear-active.ant-calendar-picker-container-placement-topLeft,.ant-calendar-picker-container.slide-up-appear.slide-up-appear-active.ant-calendar-picker-container-placement-topRight{-webkit-animation-name:antSlideDownIn;animation-name:antSlideDownIn}.ant-calendar-picker-container.slide-up-enter.slide-up-enter-active.ant-calendar-picker-container-placement-bottomLeft,.ant-calendar-picker-container.slide-up-enter.slide-up-enter-active.ant-calendar-picker-container-placement-bottomRight,.ant-calendar-picker-container.slide-up-appear.slide-up-appear-active.ant-calendar-picker-container-placement-bottomLeft,.ant-calendar-picker-container.slide-up-appear.slide-up-appear-active.ant-calendar-picker-container-placement-bottomRight{-webkit-animation-name:antSlideUpIn;animation-name:antSlideUpIn}.ant-calendar-picker-container.slide-up-leave.slide-up-leave-active.ant-calendar-picker-container-placement-topLeft,.ant-calendar-picker-container.slide-up-leave.slide-up-leave-active.ant-calendar-picker-container-placement-topRight{-webkit-animation-name:antSlideDownOut;animation-name:antSlideDownOut}.ant-calendar-picker-container.slide-up-leave.slide-up-leave-active.ant-calendar-picker-container-placement-bottomLeft,.ant-calendar-picker-container.slide-up-leave.slide-up-leave-active.ant-calendar-picker-container-placement-bottomRight{-webkit-animation-name:antSlideUpOut;animation-name:antSlideUpOut}.ant-calendar-picker{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;outline:none;cursor:text;transition:opacity .3s}.ant-calendar-picker-input{outline:none}.ant-calendar-picker-input.ant-input{line-height:1.5715}.ant-calendar-picker-input.ant-input-sm{padding-top:0;padding-bottom:0}.ant-calendar-picker:hover .ant-calendar-picker-input:not(.ant-input-disabled){border-color:#40a9ff}.ant-calendar-picker:focus .ant-calendar-picker-input:not(.ant-input-disabled){border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-calendar-picker-clear,.ant-calendar-picker-icon{position:absolute;top:50%;right:12px;z-index:1;width:14px;height:14px;margin-top:-7px;font-size:12px;line-height:14px;transition:all .3s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-calendar-picker-clear{z-index:2;color:#00000040;font-size:14px;background:#fff;cursor:pointer;opacity:0;pointer-events:none}.ant-calendar-picker-clear:hover{color:#00000073}.ant-calendar-picker:hover .ant-calendar-picker-clear{opacity:1;pointer-events:auto}.ant-calendar-picker-icon{display:inline-block;color:#00000040;font-size:14px;line-height:1}.ant-input-disabled+.ant-calendar-picker-icon{cursor:not-allowed}.ant-calendar-picker-small .ant-calendar-picker-clear,.ant-calendar-picker-small .ant-calendar-picker-icon{right:8px}.ant-calendar{position:relative;width:280px;font-size:14px;line-height:1.5715;text-align:left;list-style:none;background-color:#fff;background-clip:padding-box;border:1px solid #fff;border-radius:2px;outline:none;box-shadow:0 2px 8px #00000026}.ant-calendar-input-wrap{height:34px;padding:6px 10px;border-bottom:1px solid #f0f0f0}.ant-calendar-input{width:100%;height:22px;color:#000000d9;background:#fff;border:0;outline:0;cursor:auto}.ant-calendar-input::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-calendar-input:-ms-input-placeholder{color:#bfbfbf}.ant-calendar-input::-webkit-input-placeholder{color:#bfbfbf}.ant-calendar-input:-moz-placeholder-shown{text-overflow:ellipsis}.ant-calendar-input:-ms-input-placeholder{text-overflow:ellipsis}.ant-calendar-input:placeholder-shown{text-overflow:ellipsis}.ant-calendar-week-number{width:286px}.ant-calendar-week-number-cell{text-align:center}.ant-calendar-header{height:40px;line-height:40px;text-align:center;border-bottom:1px solid #f0f0f0;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-calendar-header a:hover{color:#40a9ff}.ant-calendar-header .ant-calendar-century-select,.ant-calendar-header .ant-calendar-decade-select,.ant-calendar-header .ant-calendar-year-select,.ant-calendar-header .ant-calendar-month-select{display:inline-block;padding:0 2px;color:#000000d9;font-weight:500;line-height:40px}.ant-calendar-header .ant-calendar-century-select-arrow,.ant-calendar-header .ant-calendar-decade-select-arrow,.ant-calendar-header .ant-calendar-year-select-arrow,.ant-calendar-header .ant-calendar-month-select-arrow{display:none}.ant-calendar-header .ant-calendar-prev-century-btn,.ant-calendar-header .ant-calendar-next-century-btn,.ant-calendar-header .ant-calendar-prev-decade-btn,.ant-calendar-header .ant-calendar-next-decade-btn,.ant-calendar-header .ant-calendar-prev-month-btn,.ant-calendar-header .ant-calendar-next-month-btn,.ant-calendar-header .ant-calendar-prev-year-btn,.ant-calendar-header .ant-calendar-next-year-btn{position:absolute;top:0;display:inline-block;padding:0 5px;color:#00000073;font-size:16px;font-family:Arial,Hiragino Sans GB,Microsoft Yahei,"Microsoft Sans Serif",sans-serif;line-height:40px}.ant-calendar-header .ant-calendar-prev-century-btn,.ant-calendar-header .ant-calendar-prev-decade-btn,.ant-calendar-header .ant-calendar-prev-year-btn{left:7px;height:100%}.ant-calendar-header .ant-calendar-prev-century-btn:before,.ant-calendar-header .ant-calendar-prev-decade-btn:before,.ant-calendar-header .ant-calendar-prev-year-btn:before,.ant-calendar-header .ant-calendar-prev-century-btn:after,.ant-calendar-header .ant-calendar-prev-decade-btn:after,.ant-calendar-header .ant-calendar-prev-year-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-header .ant-calendar-prev-century-btn:hover:before,.ant-calendar-header .ant-calendar-prev-decade-btn:hover:before,.ant-calendar-header .ant-calendar-prev-year-btn:hover:before,.ant-calendar-header .ant-calendar-prev-century-btn:hover:after,.ant-calendar-header .ant-calendar-prev-decade-btn:hover:after,.ant-calendar-header .ant-calendar-prev-year-btn:hover:after{border-color:#000000d9}.ant-calendar-header .ant-calendar-prev-century-btn:after,.ant-calendar-header .ant-calendar-prev-decade-btn:after,.ant-calendar-header .ant-calendar-prev-year-btn:after{display:none}.ant-calendar-header .ant-calendar-prev-century-btn:after,.ant-calendar-header .ant-calendar-prev-decade-btn:after,.ant-calendar-header .ant-calendar-prev-year-btn:after{position:relative;left:-3px;display:inline-block}.ant-calendar-header .ant-calendar-next-century-btn,.ant-calendar-header .ant-calendar-next-decade-btn,.ant-calendar-header .ant-calendar-next-year-btn{right:7px;height:100%}.ant-calendar-header .ant-calendar-next-century-btn:before,.ant-calendar-header .ant-calendar-next-decade-btn:before,.ant-calendar-header .ant-calendar-next-year-btn:before,.ant-calendar-header .ant-calendar-next-century-btn:after,.ant-calendar-header .ant-calendar-next-decade-btn:after,.ant-calendar-header .ant-calendar-next-year-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-header .ant-calendar-next-century-btn:hover:before,.ant-calendar-header .ant-calendar-next-decade-btn:hover:before,.ant-calendar-header .ant-calendar-next-year-btn:hover:before,.ant-calendar-header .ant-calendar-next-century-btn:hover:after,.ant-calendar-header .ant-calendar-next-decade-btn:hover:after,.ant-calendar-header .ant-calendar-next-year-btn:hover:after{border-color:#000000d9}.ant-calendar-header .ant-calendar-next-century-btn:after,.ant-calendar-header .ant-calendar-next-decade-btn:after,.ant-calendar-header .ant-calendar-next-year-btn:after{display:none}.ant-calendar-header .ant-calendar-next-century-btn:before,.ant-calendar-header .ant-calendar-next-decade-btn:before,.ant-calendar-header .ant-calendar-next-year-btn:before,.ant-calendar-header .ant-calendar-next-century-btn:after,.ant-calendar-header .ant-calendar-next-decade-btn:after,.ant-calendar-header .ant-calendar-next-year-btn:after{transform:rotate(135deg) scale(.8)}.ant-calendar-header .ant-calendar-next-century-btn:before,.ant-calendar-header .ant-calendar-next-decade-btn:before,.ant-calendar-header .ant-calendar-next-year-btn:before{position:relative;left:3px}.ant-calendar-header .ant-calendar-next-century-btn:after,.ant-calendar-header .ant-calendar-next-decade-btn:after,.ant-calendar-header .ant-calendar-next-year-btn:after{display:inline-block}.ant-calendar-header .ant-calendar-prev-month-btn{left:29px;height:100%}.ant-calendar-header .ant-calendar-prev-month-btn:before,.ant-calendar-header .ant-calendar-prev-month-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-header .ant-calendar-prev-month-btn:hover:before,.ant-calendar-header .ant-calendar-prev-month-btn:hover:after{border-color:#000000d9}.ant-calendar-header .ant-calendar-prev-month-btn:after{display:none}.ant-calendar-header .ant-calendar-next-month-btn{right:29px;height:100%}.ant-calendar-header .ant-calendar-next-month-btn:before,.ant-calendar-header .ant-calendar-next-month-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-header .ant-calendar-next-month-btn:hover:before,.ant-calendar-header .ant-calendar-next-month-btn:hover:after{border-color:#000000d9}.ant-calendar-header .ant-calendar-next-month-btn:after{display:none}.ant-calendar-header .ant-calendar-next-month-btn:before,.ant-calendar-header .ant-calendar-next-month-btn:after{transform:rotate(135deg) scale(.8)}.ant-calendar-body{padding:8px 12px}.ant-calendar table{width:100%;max-width:100%;background-color:transparent;border-collapse:collapse}.ant-calendar table,.ant-calendar th,.ant-calendar td{text-align:center;border:0}.ant-calendar-calendar-table{margin-bottom:0;border-spacing:0}.ant-calendar-column-header{width:33px;padding:6px 0;line-height:18px;text-align:center}.ant-calendar-column-header .ant-calendar-column-header-inner{display:block;font-weight:400}.ant-calendar-week-number-header .ant-calendar-column-header-inner{display:none}.ant-calendar-cell{height:30px;padding:3px 0}.ant-calendar-date{display:block;width:24px;height:24px;margin:0 auto;padding:0;color:#000000d9;line-height:22px;text-align:center;background:transparent;border:1px solid transparent;border-radius:2px;transition:background .3s ease}.ant-calendar-date-panel{position:relative;outline:none}.ant-calendar-date:hover{background:#f5f5f5;cursor:pointer}.ant-calendar-date:active{color:#fff;background:#40a9ff}.ant-calendar-today .ant-calendar-date{color:#1890ff;font-weight:700;border-color:#1890ff}.ant-calendar-selected-day .ant-calendar-date{background:#bae7ff}.ant-calendar-last-month-cell .ant-calendar-date,.ant-calendar-next-month-btn-day .ant-calendar-date,.ant-calendar-last-month-cell .ant-calendar-date:hover,.ant-calendar-next-month-btn-day .ant-calendar-date:hover{color:#00000040;background:transparent;border-color:transparent}.ant-calendar-disabled-cell .ant-calendar-date{position:relative;width:auto;color:#00000040;background:#f5f5f5;border:1px solid transparent;border-radius:0;cursor:not-allowed}.ant-calendar-disabled-cell .ant-calendar-date:hover{background:#f5f5f5}.ant-calendar-disabled-cell.ant-calendar-selected-day .ant-calendar-date:before{position:absolute;top:-1px;left:5px;width:24px;height:24px;background:rgba(0,0,0,.1);border-radius:2px;content:""}.ant-calendar-disabled-cell.ant-calendar-today .ant-calendar-date{position:relative;padding-right:5px;padding-left:5px}.ant-calendar-disabled-cell.ant-calendar-today .ant-calendar-date:before{position:absolute;top:-1px;left:5px;width:24px;height:24px;border:1px solid rgba(0,0,0,.25);border-radius:2px;content:" "}.ant-calendar-disabled-cell-first-of-row .ant-calendar-date{border-top-left-radius:4px;border-bottom-left-radius:4px}.ant-calendar-disabled-cell-last-of-row .ant-calendar-date{border-top-right-radius:4px;border-bottom-right-radius:4px}.ant-calendar-footer{padding:0 12px;line-height:38px;border-top:1px solid #f0f0f0}.ant-calendar-footer:empty{border-top:0}.ant-calendar-footer-btn{display:block;text-align:center}.ant-calendar-footer-extra{text-align:left}.ant-calendar .ant-calendar-today-btn,.ant-calendar .ant-calendar-clear-btn{display:inline-block;margin:0 0 0 8px;text-align:center}.ant-calendar .ant-calendar-today-btn-disabled,.ant-calendar .ant-calendar-clear-btn-disabled{color:#00000040;cursor:not-allowed}.ant-calendar .ant-calendar-today-btn:only-child,.ant-calendar .ant-calendar-clear-btn:only-child{margin:0}.ant-calendar .ant-calendar-clear-btn{position:absolute;top:7px;right:5px;display:none;width:20px;height:20px;margin:0;overflow:hidden;line-height:20px;text-align:center;text-indent:-76px}.ant-calendar .ant-calendar-clear-btn:after{display:inline-block;width:20px;color:#00000040;font-size:14px;line-height:1;text-indent:43px;transition:color .3s ease}.ant-calendar .ant-calendar-clear-btn:hover:after{color:#00000073}.ant-calendar .ant-calendar-ok-btn{position:relative;display:inline-block;font-weight:400;white-space:nowrap;text-align:center;background-image:none;border:1px solid transparent;box-shadow:0 2px #00000004;cursor:pointer;transition:all .3s cubic-bezier(.645,.045,.355,1);-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;touch-action:manipulation;height:32px;color:#fff;background:#1890ff;border-color:#1890ff;text-shadow:0 -1px 0 rgba(0,0,0,.12);box-shadow:0 2px #0000000b;height:24px;padding:0 7px;font-size:14px;border-radius:2px;line-height:22px}.ant-calendar .ant-calendar-ok-btn>.anticon{line-height:1}.ant-calendar .ant-calendar-ok-btn,.ant-calendar .ant-calendar-ok-btn:active,.ant-calendar .ant-calendar-ok-btn:focus{outline:0}.ant-calendar .ant-calendar-ok-btn:not([disabled]):hover{text-decoration:none}.ant-calendar .ant-calendar-ok-btn:not([disabled]):active{outline:0;box-shadow:none}.ant-calendar .ant-calendar-ok-btn[disabled]{cursor:not-allowed}.ant-calendar .ant-calendar-ok-btn[disabled]>*{pointer-events:none}.ant-calendar .ant-calendar-ok-btn-lg{height:40px;padding:6.4px 15px;font-size:16px;border-radius:2px}.ant-calendar .ant-calendar-ok-btn-sm{height:24px;padding:0 7px;font-size:14px;border-radius:2px}.ant-calendar .ant-calendar-ok-btn>a:only-child{color:currentColor}.ant-calendar .ant-calendar-ok-btn>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-calendar .ant-calendar-ok-btn:hover,.ant-calendar .ant-calendar-ok-btn:focus{color:#fff;background:#40a9ff;border-color:#40a9ff}.ant-calendar .ant-calendar-ok-btn:hover>a:only-child,.ant-calendar .ant-calendar-ok-btn:focus>a:only-child{color:currentColor}.ant-calendar .ant-calendar-ok-btn:hover>a:only-child:after,.ant-calendar .ant-calendar-ok-btn:focus>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-calendar .ant-calendar-ok-btn:active{color:#fff;background:#096dd9;border-color:#096dd9}.ant-calendar .ant-calendar-ok-btn:active>a:only-child{color:currentColor}.ant-calendar .ant-calendar-ok-btn:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-calendar .ant-calendar-ok-btn[disabled],.ant-calendar .ant-calendar-ok-btn[disabled]:hover,.ant-calendar .ant-calendar-ok-btn[disabled]:focus,.ant-calendar .ant-calendar-ok-btn[disabled]:active{color:#00000040;background:#f5f5f5;border-color:#d9d9d9;text-shadow:none;box-shadow:none}.ant-calendar .ant-calendar-ok-btn[disabled]>a:only-child,.ant-calendar .ant-calendar-ok-btn[disabled]:hover>a:only-child,.ant-calendar .ant-calendar-ok-btn[disabled]:focus>a:only-child,.ant-calendar .ant-calendar-ok-btn[disabled]:active>a:only-child{color:currentColor}.ant-calendar .ant-calendar-ok-btn[disabled]>a:only-child:after,.ant-calendar .ant-calendar-ok-btn[disabled]:hover>a:only-child:after,.ant-calendar .ant-calendar-ok-btn[disabled]:focus>a:only-child:after,.ant-calendar .ant-calendar-ok-btn[disabled]:active>a:only-child:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;content:""}.ant-calendar-range-picker-input{width:44%;height:99%;text-align:center;background-color:transparent;border:0;outline:0}.ant-calendar-range-picker-input::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-calendar-range-picker-input:-ms-input-placeholder{color:#bfbfbf}.ant-calendar-range-picker-input::-webkit-input-placeholder{color:#bfbfbf}.ant-calendar-range-picker-input:-moz-placeholder-shown{text-overflow:ellipsis}.ant-calendar-range-picker-input:-ms-input-placeholder{text-overflow:ellipsis}.ant-calendar-range-picker-input:placeholder-shown{text-overflow:ellipsis}.ant-calendar-range-picker-input[disabled]{cursor:not-allowed}.ant-calendar-range-picker-separator{display:inline-block;min-width:10px;height:100%;color:#00000073;white-space:nowrap;text-align:center;vertical-align:top;pointer-events:none}.ant-input-disabled .ant-calendar-range-picker-separator{color:#00000040}.ant-calendar-range{width:552px;overflow:hidden}.ant-calendar-range .ant-calendar-date-panel:after{display:block;clear:both;height:0;visibility:hidden;content:"."}.ant-calendar-range-part{position:relative;width:50%}.ant-calendar-range-left{float:left}.ant-calendar-range-left .ant-calendar-time-picker-inner{border-right:1px solid #f0f0f0}.ant-calendar-range-right{float:right}.ant-calendar-range-right .ant-calendar-time-picker-inner{border-left:1px solid #f0f0f0}.ant-calendar-range-middle{position:absolute;left:50%;z-index:1;height:34px;margin:1px 0 0;padding:0 200px 0 0;color:#00000073;line-height:34px;text-align:center;transform:translate(-50%);pointer-events:none}.ant-calendar-range-right .ant-calendar-date-input-wrap{margin-left:-90px}.ant-calendar-range.ant-calendar-time .ant-calendar-range-middle{padding:0 10px 0 0;transform:translate(-50%)}.ant-calendar-range .ant-calendar-today :not(.ant-calendar-disabled-cell) :not(.ant-calendar-last-month-cell) :not(.ant-calendar-next-month-btn-day) .ant-calendar-date{color:#1890ff;background:#bae7ff;border-color:#1890ff}.ant-calendar-range .ant-calendar-selected-start-date .ant-calendar-date,.ant-calendar-range .ant-calendar-selected-end-date .ant-calendar-date{color:#fff;background:#1890ff;border:1px solid transparent}.ant-calendar-range .ant-calendar-selected-start-date .ant-calendar-date:hover,.ant-calendar-range .ant-calendar-selected-end-date .ant-calendar-date:hover{background:#1890ff}.ant-calendar-range.ant-calendar-time .ant-calendar-range-right .ant-calendar-date-input-wrap{margin-left:0}.ant-calendar-range .ant-calendar-input-wrap{position:relative;height:34px}.ant-calendar-range .ant-calendar-input,.ant-calendar-range .ant-calendar-time-picker-input{position:relative;display:inline-block;width:100%;color:#000000d9;font-size:14px;line-height:1.5715;background-color:#fff;background-image:none;border:1px solid #d9d9d9;border-radius:2px;transition:all .3s;height:24px;padding:4px 0;line-height:24px;border:0;box-shadow:none}.ant-calendar-range .ant-calendar-input::-moz-placeholder,.ant-calendar-range .ant-calendar-time-picker-input::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-calendar-range .ant-calendar-input:-ms-input-placeholder,.ant-calendar-range .ant-calendar-time-picker-input:-ms-input-placeholder{color:#bfbfbf}.ant-calendar-range .ant-calendar-input::-webkit-input-placeholder,.ant-calendar-range .ant-calendar-time-picker-input::-webkit-input-placeholder{color:#bfbfbf}.ant-calendar-range .ant-calendar-input:-moz-placeholder-shown,.ant-calendar-range .ant-calendar-time-picker-input:-moz-placeholder-shown{text-overflow:ellipsis}.ant-calendar-range .ant-calendar-input:-ms-input-placeholder,.ant-calendar-range .ant-calendar-time-picker-input:-ms-input-placeholder{text-overflow:ellipsis}.ant-calendar-range .ant-calendar-input:placeholder-shown,.ant-calendar-range .ant-calendar-time-picker-input:placeholder-shown{text-overflow:ellipsis}.ant-calendar-range .ant-calendar-input:hover,.ant-calendar-range .ant-calendar-time-picker-input:hover{border-color:#40a9ff;border-right-width:1px!important}.ant-calendar-range .ant-calendar-input:focus,.ant-calendar-range .ant-calendar-time-picker-input:focus{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-calendar-range .ant-calendar-input-disabled,.ant-calendar-range .ant-calendar-time-picker-input-disabled{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-calendar-range .ant-calendar-input-disabled:hover,.ant-calendar-range .ant-calendar-time-picker-input-disabled:hover{border-color:#d9d9d9;border-right-width:1px!important}.ant-calendar-range .ant-calendar-input[disabled],.ant-calendar-range .ant-calendar-time-picker-input[disabled]{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-calendar-range .ant-calendar-input[disabled]:hover,.ant-calendar-range .ant-calendar-time-picker-input[disabled]:hover{border-color:#d9d9d9;border-right-width:1px!important}textarea.ant-calendar-range .ant-calendar-input,textarea.ant-calendar-range .ant-calendar-time-picker-input{max-width:100%;height:auto;min-height:32px;line-height:1.5715;vertical-align:bottom;transition:all .3s,height 0s}.ant-calendar-range .ant-calendar-input-lg,.ant-calendar-range .ant-calendar-time-picker-input-lg{padding:6.5px 11px;font-size:16px}.ant-calendar-range .ant-calendar-input-sm,.ant-calendar-range .ant-calendar-time-picker-input-sm{padding:0 7px}.ant-calendar-range .ant-calendar-input:focus,.ant-calendar-range .ant-calendar-time-picker-input:focus{box-shadow:none}.ant-calendar-range .ant-calendar-time-picker-icon{display:none}.ant-calendar-range.ant-calendar-week-number{width:574px}.ant-calendar-range.ant-calendar-week-number .ant-calendar-range-part{width:286px}.ant-calendar-range .ant-calendar-year-panel,.ant-calendar-range .ant-calendar-month-panel,.ant-calendar-range .ant-calendar-decade-panel{top:34px}.ant-calendar-range .ant-calendar-month-panel .ant-calendar-year-panel{top:0}.ant-calendar-range .ant-calendar-decade-panel-table,.ant-calendar-range .ant-calendar-year-panel-table,.ant-calendar-range .ant-calendar-month-panel-table{height:208px}.ant-calendar-range .ant-calendar-in-range-cell{position:relative;border-radius:0}.ant-calendar-range .ant-calendar-in-range-cell>div{position:relative;z-index:1}.ant-calendar-range .ant-calendar-in-range-cell:before{position:absolute;top:4px;right:0;bottom:4px;left:0;display:block;background:#e6f7ff;border:0;border-radius:0;content:""}.ant-calendar-range .ant-calendar-footer-extra{float:left}div.ant-calendar-range-quick-selector{text-align:left}div.ant-calendar-range-quick-selector>a{margin-right:8px}.ant-calendar-range .ant-calendar-header,.ant-calendar-range .ant-calendar-month-panel-header,.ant-calendar-range .ant-calendar-year-panel-header,.ant-calendar-range .ant-calendar-decade-panel-header{border-bottom:0}.ant-calendar-range .ant-calendar-body,.ant-calendar-range .ant-calendar-month-panel-body,.ant-calendar-range .ant-calendar-year-panel-body,.ant-calendar-range .ant-calendar-decade-panel-body{border-top:1px solid #f0f0f0}.ant-calendar-range.ant-calendar-time .ant-calendar-time-picker{top:68px;z-index:2;width:100%;height:207px}.ant-calendar-range.ant-calendar-time .ant-calendar-time-picker-panel{height:267px;margin-top:-34px}.ant-calendar-range.ant-calendar-time .ant-calendar-time-picker-inner{height:100%;padding-top:40px;background:none}.ant-calendar-range.ant-calendar-time .ant-calendar-time-picker-combobox{display:inline-block;height:100%;background-color:#fff;border-top:1px solid #f0f0f0}.ant-calendar-range.ant-calendar-time .ant-calendar-time-picker-select{height:100%}.ant-calendar-range.ant-calendar-time .ant-calendar-time-picker-select ul{max-height:100%}.ant-calendar-range.ant-calendar-time .ant-calendar-footer .ant-calendar-time-picker-btn{margin-right:8px}.ant-calendar-range.ant-calendar-time .ant-calendar-today-btn{height:22px;margin:8px 12px;line-height:22px}.ant-calendar-range-with-ranges.ant-calendar-time .ant-calendar-time-picker{height:233px}.ant-calendar-range.ant-calendar-show-time-picker .ant-calendar-body{border-top-color:transparent}.ant-calendar-time-picker{position:absolute;top:40px;width:100%;background-color:#fff}.ant-calendar-time-picker-panel{position:absolute;z-index:1050;width:100%}.ant-calendar-time-picker-inner{position:relative;display:inline-block;width:100%;overflow:hidden;font-size:14px;line-height:1.5;text-align:left;list-style:none;background-color:#fff;background-clip:padding-box;outline:none}.ant-calendar-time-picker-combobox,.ant-calendar-time-picker-column-1,.ant-calendar-time-picker-column-1 .ant-calendar-time-picker-select{width:100%}.ant-calendar-time-picker-column-2 .ant-calendar-time-picker-select{width:50%}.ant-calendar-time-picker-column-3 .ant-calendar-time-picker-select{width:33.33%}.ant-calendar-time-picker-column-4 .ant-calendar-time-picker-select{width:25%}.ant-calendar-time-picker-input-wrap{display:none}.ant-calendar-time-picker-select{position:relative;float:left;height:226px;overflow:hidden;font-size:14px;border-right:1px solid #f0f0f0}.ant-calendar-time-picker-select:hover{overflow-y:auto}.ant-calendar-time-picker-select:first-child{margin-left:0;border-left:0}.ant-calendar-time-picker-select:last-child{border-right:0}.ant-calendar-time-picker-select ul{width:100%;max-height:206px;margin:0;padding:0;list-style:none}.ant-calendar-time-picker-select li{width:100%;height:24px;margin:0;line-height:24px;text-align:center;list-style:none;cursor:pointer;transition:all .3s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-calendar-time-picker-select li:last-child:after{display:block;height:202px;content:""}.ant-calendar-time-picker-select li:hover{background:#f5f5f5}.ant-calendar-time-picker-select li:focus{color:#1890ff;font-weight:600;outline:none}li.ant-calendar-time-picker-select-option-selected{font-weight:600;background:#f5f5f5}li.ant-calendar-time-picker-select-option-disabled{color:#00000040}li.ant-calendar-time-picker-select-option-disabled:hover{background:transparent;cursor:not-allowed}.ant-calendar-time .ant-calendar-day-select{display:inline-block;padding:0 2px;color:#000000d9;font-weight:500;line-height:34px}.ant-calendar-time .ant-calendar-footer{position:relative;height:auto}.ant-calendar-time .ant-calendar-footer-btn{text-align:right}.ant-calendar-time .ant-calendar-footer .ant-calendar-today-btn{float:left;margin:0}.ant-calendar-time .ant-calendar-footer .ant-calendar-time-picker-btn{display:inline-block;margin-right:8px}.ant-calendar-time .ant-calendar-footer .ant-calendar-time-picker-btn-disabled{color:#00000040}.ant-calendar-month-panel{position:absolute;top:0;right:0;bottom:0;left:0;z-index:10;background:#fff;border-radius:2px;outline:none}.ant-calendar-month-panel>div{display:flex;flex-direction:column;height:100%}.ant-calendar-month-panel-hidden{display:none}.ant-calendar-month-panel-header{height:40px;line-height:40px;text-align:center;border-bottom:1px solid #f0f0f0;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;position:relative}.ant-calendar-month-panel-header a:hover{color:#40a9ff}.ant-calendar-month-panel-header .ant-calendar-month-panel-century-select,.ant-calendar-month-panel-header .ant-calendar-month-panel-decade-select,.ant-calendar-month-panel-header .ant-calendar-month-panel-year-select,.ant-calendar-month-panel-header .ant-calendar-month-panel-month-select{display:inline-block;padding:0 2px;color:#000000d9;font-weight:500;line-height:40px}.ant-calendar-month-panel-header .ant-calendar-month-panel-century-select-arrow,.ant-calendar-month-panel-header .ant-calendar-month-panel-decade-select-arrow,.ant-calendar-month-panel-header .ant-calendar-month-panel-year-select-arrow,.ant-calendar-month-panel-header .ant-calendar-month-panel-month-select-arrow{display:none}.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-century-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-decade-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-month-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-month-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-year-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn{position:absolute;top:0;display:inline-block;padding:0 5px;color:#00000073;font-size:16px;font-family:Arial,Hiragino Sans GB,Microsoft Yahei,"Microsoft Sans Serif",sans-serif;line-height:40px}.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-century-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-decade-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-year-btn{left:7px;height:100%}.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-century-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-decade-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-year-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-century-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-decade-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-year-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-century-btn:hover:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-decade-btn:hover:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-year-btn:hover:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-century-btn:hover:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-decade-btn:hover:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-year-btn:hover:after{border-color:#000000d9}.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-century-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-decade-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-year-btn:after{display:none}.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-century-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-decade-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-year-btn:after{position:relative;left:-3px;display:inline-block}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn{right:7px;height:100%}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn:hover:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn:hover:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn:hover:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn:hover:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn:hover:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn:hover:after{border-color:#000000d9}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn:after{display:none}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn:after{transform:rotate(135deg) scale(.8)}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn:before{position:relative;left:3px}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-century-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-decade-btn:after,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-year-btn:after{display:inline-block}.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-month-btn{left:29px;height:100%}.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-month-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-month-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-month-btn:hover:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-month-btn:hover:after{border-color:#000000d9}.ant-calendar-month-panel-header .ant-calendar-month-panel-prev-month-btn:after{display:none}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-month-btn{right:29px;height:100%}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-month-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-month-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-month-btn:hover:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-month-btn:hover:after{border-color:#000000d9}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-month-btn:after{display:none}.ant-calendar-month-panel-header .ant-calendar-month-panel-next-month-btn:before,.ant-calendar-month-panel-header .ant-calendar-month-panel-next-month-btn:after{transform:rotate(135deg) scale(.8)}.ant-calendar-month-panel-body{flex:1}.ant-calendar-month-panel-footer{border-top:1px solid #f0f0f0}.ant-calendar-month-panel-footer .ant-calendar-footer-extra{padding:0 12px}.ant-calendar-month-panel-table{width:100%;height:100%;table-layout:fixed;border-collapse:separate}.ant-calendar-month-panel-selected-cell .ant-calendar-month-panel-month,.ant-calendar-month-panel-selected-cell .ant-calendar-month-panel-month:hover{color:#fff;background:#1890ff}.ant-calendar-month-panel-cell{text-align:center}.ant-calendar-month-panel-cell-disabled .ant-calendar-month-panel-month,.ant-calendar-month-panel-cell-disabled .ant-calendar-month-panel-month:hover{color:#00000040;background:#f5f5f5;cursor:not-allowed}.ant-calendar-month-panel-month{display:inline-block;height:24px;margin:0 auto;padding:0 8px;color:#000000d9;line-height:24px;text-align:center;background:transparent;border-radius:2px;transition:background .3s ease}.ant-calendar-month-panel-month:hover{background:#f5f5f5;cursor:pointer}.ant-calendar-year-panel{position:absolute;top:0;right:0;bottom:0;left:0;z-index:10;background:#fff;border-radius:2px;outline:none}.ant-calendar-year-panel>div{display:flex;flex-direction:column;height:100%}.ant-calendar-year-panel-hidden{display:none}.ant-calendar-year-panel-header{height:40px;line-height:40px;text-align:center;border-bottom:1px solid #f0f0f0;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;position:relative}.ant-calendar-year-panel-header a:hover{color:#40a9ff}.ant-calendar-year-panel-header .ant-calendar-year-panel-century-select,.ant-calendar-year-panel-header .ant-calendar-year-panel-decade-select,.ant-calendar-year-panel-header .ant-calendar-year-panel-year-select,.ant-calendar-year-panel-header .ant-calendar-year-panel-month-select{display:inline-block;padding:0 2px;color:#000000d9;font-weight:500;line-height:40px}.ant-calendar-year-panel-header .ant-calendar-year-panel-century-select-arrow,.ant-calendar-year-panel-header .ant-calendar-year-panel-decade-select-arrow,.ant-calendar-year-panel-header .ant-calendar-year-panel-year-select-arrow,.ant-calendar-year-panel-header .ant-calendar-year-panel-month-select-arrow{display:none}.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-century-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-decade-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-month-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-month-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-year-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn{position:absolute;top:0;display:inline-block;padding:0 5px;color:#00000073;font-size:16px;font-family:Arial,Hiragino Sans GB,Microsoft Yahei,"Microsoft Sans Serif",sans-serif;line-height:40px}.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-century-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-decade-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-year-btn{left:7px;height:100%}.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-century-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-decade-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-year-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-century-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-decade-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-year-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-century-btn:hover:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-decade-btn:hover:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-year-btn:hover:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-century-btn:hover:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-decade-btn:hover:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-year-btn:hover:after{border-color:#000000d9}.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-century-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-decade-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-year-btn:after{display:none}.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-century-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-decade-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-year-btn:after{position:relative;left:-3px;display:inline-block}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn{right:7px;height:100%}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn:hover:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn:hover:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn:hover:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn:hover:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn:hover:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn:hover:after{border-color:#000000d9}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn:after{display:none}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn:after{transform:rotate(135deg) scale(.8)}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn:before{position:relative;left:3px}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-century-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-decade-btn:after,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-year-btn:after{display:inline-block}.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-month-btn{left:29px;height:100%}.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-month-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-month-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-month-btn:hover:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-month-btn:hover:after{border-color:#000000d9}.ant-calendar-year-panel-header .ant-calendar-year-panel-prev-month-btn:after{display:none}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-month-btn{right:29px;height:100%}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-month-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-month-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-month-btn:hover:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-month-btn:hover:after{border-color:#000000d9}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-month-btn:after{display:none}.ant-calendar-year-panel-header .ant-calendar-year-panel-next-month-btn:before,.ant-calendar-year-panel-header .ant-calendar-year-panel-next-month-btn:after{transform:rotate(135deg) scale(.8)}.ant-calendar-year-panel-body{flex:1}.ant-calendar-year-panel-footer{border-top:1px solid #f0f0f0}.ant-calendar-year-panel-footer .ant-calendar-footer-extra{padding:0 12px}.ant-calendar-year-panel-table{width:100%;height:100%;table-layout:fixed;border-collapse:separate}.ant-calendar-year-panel-cell{text-align:center}.ant-calendar-year-panel-year{display:inline-block;height:24px;margin:0 auto;padding:0 8px;color:#000000d9;line-height:24px;text-align:center;background:transparent;border-radius:2px;transition:background .3s ease}.ant-calendar-year-panel-year:hover{background:#f5f5f5;cursor:pointer}.ant-calendar-year-panel-selected-cell .ant-calendar-year-panel-year,.ant-calendar-year-panel-selected-cell .ant-calendar-year-panel-year:hover{color:#fff;background:#1890ff}.ant-calendar-year-panel-last-decade-cell .ant-calendar-year-panel-year,.ant-calendar-year-panel-next-decade-cell .ant-calendar-year-panel-year{color:#00000040;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-calendar-decade-panel{position:absolute;top:0;right:0;bottom:0;left:0;z-index:10;display:flex;flex-direction:column;background:#fff;border-radius:2px;outline:none}.ant-calendar-decade-panel-hidden{display:none}.ant-calendar-decade-panel-header{height:40px;line-height:40px;text-align:center;border-bottom:1px solid #f0f0f0;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;position:relative}.ant-calendar-decade-panel-header a:hover{color:#40a9ff}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-century-select,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-decade-select,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-year-select,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-month-select{display:inline-block;padding:0 2px;color:#000000d9;font-weight:500;line-height:40px}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-century-select-arrow,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-decade-select-arrow,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-year-select-arrow,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-month-select-arrow{display:none}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-century-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-decade-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-month-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-month-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-year-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn{position:absolute;top:0;display:inline-block;padding:0 5px;color:#00000073;font-size:16px;font-family:Arial,Hiragino Sans GB,Microsoft Yahei,"Microsoft Sans Serif",sans-serif;line-height:40px}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-century-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-decade-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-year-btn{left:7px;height:100%}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-century-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-decade-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-year-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-century-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-decade-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-year-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-century-btn:hover:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-decade-btn:hover:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-year-btn:hover:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-century-btn:hover:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-decade-btn:hover:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-year-btn:hover:after{border-color:#000000d9}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-century-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-decade-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-year-btn:after{display:none}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-century-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-decade-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-year-btn:after{position:relative;left:-3px;display:inline-block}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn{right:7px;height:100%}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn:hover:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn:hover:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn:hover:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn:hover:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn:hover:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn:hover:after{border-color:#000000d9}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn:after{display:none}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn:after{transform:rotate(135deg) scale(.8)}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn:before{position:relative;left:3px}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-century-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-decade-btn:after,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-year-btn:after{display:inline-block}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-month-btn{left:29px;height:100%}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-month-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-month-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-month-btn:hover:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-month-btn:hover:after{border-color:#000000d9}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-prev-month-btn:after{display:none}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-month-btn{right:29px;height:100%}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-month-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-month-btn:after{position:relative;top:-1px;display:inline-block;width:8px;height:8px;vertical-align:middle;border:0 solid #aaa;border-width:1.5px 0 0 1.5px;border-radius:1px;transform:rotate(-45deg) scale(.8);transition:all .3s;content:""}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-month-btn:hover:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-month-btn:hover:after{border-color:#000000d9}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-month-btn:after{display:none}.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-month-btn:before,.ant-calendar-decade-panel-header .ant-calendar-decade-panel-next-month-btn:after{transform:rotate(135deg) scale(.8)}.ant-calendar-decade-panel-body{flex:1}.ant-calendar-decade-panel-footer{border-top:1px solid #f0f0f0}.ant-calendar-decade-panel-footer .ant-calendar-footer-extra{padding:0 12px}.ant-calendar-decade-panel-table{width:100%;height:100%;table-layout:fixed;border-collapse:separate}.ant-calendar-decade-panel-cell{white-space:nowrap;text-align:center}.ant-calendar-decade-panel-decade{display:inline-block;height:24px;margin:0 auto;padding:0 6px;color:#000000d9;line-height:24px;text-align:center;background:transparent;border-radius:2px;transition:background .3s ease}.ant-calendar-decade-panel-decade:hover{background:#f5f5f5;cursor:pointer}.ant-calendar-decade-panel-selected-cell .ant-calendar-decade-panel-decade,.ant-calendar-decade-panel-selected-cell .ant-calendar-decade-panel-decade:hover{color:#fff;background:#1890ff}.ant-calendar-decade-panel-last-century-cell .ant-calendar-decade-panel-decade,.ant-calendar-decade-panel-next-century-cell .ant-calendar-decade-panel-decade{color:#00000040;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-calendar-month .ant-calendar-month-header-wrap{position:relative;height:288px}.ant-calendar-month .ant-calendar-month-panel,.ant-calendar-month .ant-calendar-year-panel{top:0;height:100%}.ant-calendar-week-number-cell{opacity:.5}.ant-calendar-week-number .ant-calendar-body tr{cursor:pointer;transition:all .3s}.ant-calendar-week-number .ant-calendar-body tr:hover{background:#e6f7ff}.ant-calendar-week-number .ant-calendar-body tr.ant-calendar-active-week{font-weight:700;background:#bae7ff}.ant-calendar-week-number .ant-calendar-body tr .ant-calendar-selected-day .ant-calendar-date,.ant-calendar-week-number .ant-calendar-body tr .ant-calendar-selected-day:hover .ant-calendar-date{color:#000000d9;background:transparent}.ant-time-picker-panel{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:absolute;z-index:1050;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji"}.ant-time-picker-panel-inner{position:relative;left:-2px;font-size:14px;text-align:left;list-style:none;background-color:#fff;background-clip:padding-box;border-radius:2px;outline:none;box-shadow:0 2px 8px #00000026}.ant-time-picker-panel-input{background:#fff;width:100%;max-width:154px;margin:0;padding:0;line-height:normal;border:0;outline:0;cursor:auto}.ant-time-picker-panel-input::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-time-picker-panel-input:-ms-input-placeholder{color:#bfbfbf}.ant-time-picker-panel-input::-webkit-input-placeholder{color:#bfbfbf}.ant-time-picker-panel-input:-moz-placeholder-shown{text-overflow:ellipsis}.ant-time-picker-panel-input:-ms-input-placeholder{text-overflow:ellipsis}.ant-time-picker-panel-input:placeholder-shown{text-overflow:ellipsis}.ant-time-picker-panel-input-wrap{position:relative;padding:7px 2px 7px 12px;border-bottom:1px solid #f0f0f0}.ant-time-picker-panel-input-invalid{border-color:#ff4d4f}.ant-time-picker-panel-narrow .ant-time-picker-panel-input-wrap{max-width:112px}.ant-time-picker-panel-select{position:relative;float:left;width:56px;max-height:192px;overflow:hidden;font-size:14px;border-left:1px solid #f0f0f0}.ant-time-picker-panel-select:hover{overflow-y:auto}.ant-time-picker-panel-select:first-child{margin-left:0;border-left:0}.ant-time-picker-panel-select:last-child{border-right:0}.ant-time-picker-panel-select:only-child{width:100%}.ant-time-picker-panel-select ul{width:56px;margin:0;padding:0 0 160px;list-style:none}.ant-time-picker-panel-select li{width:100%;height:32px;margin:0;padding:0 0 0 12px;line-height:32px;text-align:left;list-style:none;cursor:pointer;transition:all .3s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-time-picker-panel-select li:focus{color:#1890ff;font-weight:600;outline:none}.ant-time-picker-panel-select li:hover{background:#f5f5f5}li.ant-time-picker-panel-select-option-selected{font-weight:600;background:#f5f5f5}li.ant-time-picker-panel-select-option-selected:hover{background:#f5f5f5}li.ant-time-picker-panel-select-option-disabled{color:#00000040}li.ant-time-picker-panel-select-option-disabled:hover{background:transparent;cursor:not-allowed}li.ant-time-picker-panel-select-option-disabled:focus{color:#00000040;font-weight:inherit}.ant-time-picker-panel-combobox:before,.ant-time-picker-panel-combobox:after{display:table;content:""}.ant-time-picker-panel-combobox:after{clear:both}.ant-time-picker-panel-addon{padding:8px;border-top:1px solid #f0f0f0}.ant-time-picker-panel.slide-up-enter.slide-up-enter-active.ant-time-picker-panel-placement-topLeft,.ant-time-picker-panel.slide-up-enter.slide-up-enter-active.ant-time-picker-panel-placement-topRight,.ant-time-picker-panel.slide-up-appear.slide-up-appear-active.ant-time-picker-panel-placement-topLeft,.ant-time-picker-panel.slide-up-appear.slide-up-appear-active.ant-time-picker-panel-placement-topRight{-webkit-animation-name:antSlideDownIn;animation-name:antSlideDownIn}.ant-time-picker-panel.slide-up-enter.slide-up-enter-active.ant-time-picker-panel-placement-bottomLeft,.ant-time-picker-panel.slide-up-enter.slide-up-enter-active.ant-time-picker-panel-placement-bottomRight,.ant-time-picker-panel.slide-up-appear.slide-up-appear-active.ant-time-picker-panel-placement-bottomLeft,.ant-time-picker-panel.slide-up-appear.slide-up-appear-active.ant-time-picker-panel-placement-bottomRight{-webkit-animation-name:antSlideUpIn;animation-name:antSlideUpIn}.ant-time-picker-panel.slide-up-leave.slide-up-leave-active.ant-time-picker-panel-placement-topLeft,.ant-time-picker-panel.slide-up-leave.slide-up-leave-active.ant-time-picker-panel-placement-topRight{-webkit-animation-name:antSlideDownOut;animation-name:antSlideDownOut}.ant-time-picker-panel.slide-up-leave.slide-up-leave-active.ant-time-picker-panel-placement-bottomLeft,.ant-time-picker-panel.slide-up-leave.slide-up-leave-active.ant-time-picker-panel-placement-bottomRight{-webkit-animation-name:antSlideUpOut;animation-name:antSlideUpOut}.ant-time-picker{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;width:128px;outline:none;cursor:text;transition:opacity .3s}.ant-time-picker-input{position:relative;display:inline-block;width:100%;padding:4px 11px;color:#000000d9;font-size:14px;line-height:1.5715;background-color:#fff;background-image:none;border:1px solid #d9d9d9;border-radius:2px;transition:all .3s}.ant-time-picker-input::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-time-picker-input:-ms-input-placeholder{color:#bfbfbf}.ant-time-picker-input::-webkit-input-placeholder{color:#bfbfbf}.ant-time-picker-input:-moz-placeholder-shown{text-overflow:ellipsis}.ant-time-picker-input:-ms-input-placeholder{text-overflow:ellipsis}.ant-time-picker-input:placeholder-shown{text-overflow:ellipsis}.ant-time-picker-input:hover{border-color:#40a9ff;border-right-width:1px!important}.ant-time-picker-input:focus{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-time-picker-input-disabled{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-time-picker-input-disabled:hover{border-color:#d9d9d9;border-right-width:1px!important}textarea.ant-time-picker-input{max-width:100%;height:auto;min-height:32px;line-height:1.5715;vertical-align:bottom;transition:all .3s,height 0s}.ant-time-picker-input-lg{padding:6.5px 11px;font-size:16px}.ant-time-picker-input-sm{padding:0 7px}.ant-time-picker-input[disabled]{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-time-picker-input[disabled]:hover{border-color:#d9d9d9;border-right-width:1px!important}.ant-time-picker-open{opacity:0}.ant-time-picker-icon,.ant-time-picker-clear{position:absolute;top:50%;right:11px;z-index:1;width:14px;height:14px;margin-top:-7px;color:#00000040;line-height:14px;transition:all .3s cubic-bezier(.645,.045,.355,1);-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-time-picker-icon .ant-time-picker-clock-icon,.ant-time-picker-clear .ant-time-picker-clock-icon{display:block;color:#00000040;line-height:1}.ant-time-picker-clear{z-index:2;background:#fff;opacity:0;pointer-events:none}.ant-time-picker-clear:hover{color:#00000073}.ant-time-picker:hover .ant-time-picker-clear{opacity:1;pointer-events:auto}.ant-time-picker-large .ant-time-picker-input{padding:6.5px 11px;font-size:16px}.ant-time-picker-small .ant-time-picker-input{padding:0 7px}.ant-time-picker-small .ant-time-picker-icon,.ant-time-picker-small .ant-time-picker-clear{right:7px}@media not all and (-webkit-min-device-pixel-ratio: 0),not all and (min-resolution: .001dpcm){@supports (-webkit-appearance: none) and (stroke-color: transparent){.ant-input{line-height:1.5715}}}.ant-tag{box-sizing:border-box;margin:0 8px 0 0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";display:inline-block;height:auto;padding:0 7px;font-size:12px;line-height:20px;white-space:nowrap;background:#fafafa;border:1px solid #d9d9d9;border-radius:2px;opacity:1;transition:all .3s}.ant-tag,.ant-tag a,.ant-tag a:hover{color:#000000d9}.ant-tag>a:first-child:last-child{display:inline-block;margin:0 -8px;padding:0 8px}.ant-tag-close-icon{margin-left:3px;color:#00000073;font-size:10px;cursor:pointer;transition:all .3s}.ant-tag-close-icon:hover{color:#000000d9}.ant-tag-has-color{border-color:transparent}.ant-tag-has-color,.ant-tag-has-color a,.ant-tag-has-color a:hover,.ant-tag-has-color .anticon-close,.ant-tag-has-color .anticon-close:hover{color:#fff}.ant-tag-checkable{background-color:transparent;border-color:transparent;cursor:pointer}.ant-tag-checkable:not(.ant-tag-checkable-checked):hover{color:#1890ff}.ant-tag-checkable:active,.ant-tag-checkable-checked{color:#fff}.ant-tag-checkable-checked{background-color:#1890ff}.ant-tag-checkable:active{background-color:#096dd9}.ant-tag-hidden{display:none}.ant-tag-pink{color:#c41d7f;background:#fff0f6;border-color:#ffadd2}.ant-tag-pink-inverse{color:#fff;background:#eb2f96;border-color:#eb2f96}.ant-tag-magenta{color:#c41d7f;background:#fff0f6;border-color:#ffadd2}.ant-tag-magenta-inverse{color:#fff;background:#eb2f96;border-color:#eb2f96}.ant-tag-red{color:#cf1322;background:#fff1f0;border-color:#ffa39e}.ant-tag-red-inverse{color:#fff;background:#f5222d;border-color:#f5222d}.ant-tag-volcano{color:#d4380d;background:#fff2e8;border-color:#ffbb96}.ant-tag-volcano-inverse{color:#fff;background:#fa541c;border-color:#fa541c}.ant-tag-orange{color:#d46b08;background:#fff7e6;border-color:#ffd591}.ant-tag-orange-inverse{color:#fff;background:#fa8c16;border-color:#fa8c16}.ant-tag-yellow{color:#d4b106;background:#feffe6;border-color:#fffb8f}.ant-tag-yellow-inverse{color:#fff;background:#fadb14;border-color:#fadb14}.ant-tag-gold{color:#d48806;background:#fffbe6;border-color:#ffe58f}.ant-tag-gold-inverse{color:#fff;background:#faad14;border-color:#faad14}.ant-tag-cyan{color:#08979c;background:#e6fffb;border-color:#87e8de}.ant-tag-cyan-inverse{color:#fff;background:#13c2c2;border-color:#13c2c2}.ant-tag-lime{color:#7cb305;background:#fcffe6;border-color:#eaff8f}.ant-tag-lime-inverse{color:#fff;background:#a0d911;border-color:#a0d911}.ant-tag-green{color:#389e0d;background:#f6ffed;border-color:#b7eb8f}.ant-tag-green-inverse{color:#fff;background:#52c41a;border-color:#52c41a}.ant-tag-blue{color:#096dd9;background:#e6f7ff;border-color:#91d5ff}.ant-tag-blue-inverse{color:#fff;background:#1890ff;border-color:#1890ff}.ant-tag-geekblue{color:#1d39c4;background:#f0f5ff;border-color:#adc6ff}.ant-tag-geekblue-inverse{color:#fff;background:#2f54eb;border-color:#2f54eb}.ant-tag-purple{color:#531dab;background:#f9f0ff;border-color:#d3adf7}.ant-tag-purple-inverse{color:#fff;background:#722ed1;border-color:#722ed1}.ant-tag-success{color:#52c41a;background:#f6ffed;border-color:#b7eb8f}.ant-tag-processing{color:#1890ff;background:#e6f7ff;border-color:#91d5ff}.ant-tag-error{color:#f5222d;background:#fff1f0;border-color:#ffa39e}.ant-tag-warning{color:#fa8c16;background:#fff7e6;border-color:#ffd591}.ant-tag>.anticon+span,.ant-tag>span+.anticon{margin-left:7px}.ant-tag.ant-tag-rtl{margin-right:0;margin-left:8px;direction:rtl;text-align:right}.ant-tag-rtl .ant-tag-close-icon{margin-right:3px;margin-left:0}.ant-tag-rtl.ant-tag>.anticon+span,.ant-tag-rtl.ant-tag>span+.anticon{margin-right:7px;margin-left:0}.ant-descriptions-header{display:flex;align-items:center;margin-bottom:20px}.ant-descriptions-title{flex:auto;overflow:hidden;color:#000000d9;font-weight:700;font-size:16px;line-height:1.5715;white-space:nowrap;text-overflow:ellipsis}.ant-descriptions-extra{margin-left:auto;color:#000000d9;font-size:14px}.ant-descriptions-view{width:100%;overflow:hidden;border-radius:2px}.ant-descriptions-view table{width:100%;table-layout:fixed}.ant-descriptions-row>th,.ant-descriptions-row>td{padding-bottom:16px}.ant-descriptions-row:last-child{border-bottom:none}.ant-descriptions-item-label{color:#000000d9;font-weight:400;font-size:14px;line-height:1.5715;text-align:start}.ant-descriptions-item-label:after{content:":";position:relative;top:-.5px;margin:0 8px 0 2px}.ant-descriptions-item-label.ant-descriptions-item-no-colon:after{content:" "}.ant-descriptions-item-no-label:after{margin:0;content:""}.ant-descriptions-item-content{display:table-cell;flex:1;color:#000000d9;font-size:14px;line-height:1.5715;word-break:break-word;overflow-wrap:break-word}.ant-descriptions-item{padding-bottom:0;vertical-align:top}.ant-descriptions-item-container{display:flex}.ant-descriptions-item-container .ant-descriptions-item-label,.ant-descriptions-item-container .ant-descriptions-item-content{display:inline-flex;align-items:baseline}.ant-descriptions-middle .ant-descriptions-row>th,.ant-descriptions-middle .ant-descriptions-row>td{padding-bottom:12px}.ant-descriptions-small .ant-descriptions-row>th,.ant-descriptions-small .ant-descriptions-row>td{padding-bottom:8px}.ant-descriptions-bordered .ant-descriptions-view{border:1px solid #f0f0f0}.ant-descriptions-bordered .ant-descriptions-view>table{table-layout:auto}.ant-descriptions-bordered .ant-descriptions-item-label,.ant-descriptions-bordered .ant-descriptions-item-content{padding:16px 24px;border-right:1px solid #f0f0f0}.ant-descriptions-bordered .ant-descriptions-item-label:last-child,.ant-descriptions-bordered .ant-descriptions-item-content:last-child{border-right:none}.ant-descriptions-bordered .ant-descriptions-item-label{background-color:#fafafa}.ant-descriptions-bordered .ant-descriptions-item-label:after{display:none}.ant-descriptions-bordered .ant-descriptions-row{border-bottom:1px solid #f0f0f0}.ant-descriptions-bordered .ant-descriptions-row:last-child{border-bottom:none}.ant-descriptions-bordered.ant-descriptions-middle .ant-descriptions-item-label,.ant-descriptions-bordered.ant-descriptions-middle .ant-descriptions-item-content{padding:12px 24px}.ant-descriptions-bordered.ant-descriptions-small .ant-descriptions-item-label,.ant-descriptions-bordered.ant-descriptions-small .ant-descriptions-item-content{padding:8px 16px}.ant-descriptions-rtl{direction:rtl}.ant-descriptions-rtl .ant-descriptions-item-label:after{margin:0 2px 0 8px}.ant-descriptions-rtl.ant-descriptions-bordered .ant-descriptions-item-label,.ant-descriptions-rtl.ant-descriptions-bordered .ant-descriptions-item-content{border-right:none;border-left:1px solid #f0f0f0}.ant-descriptions-rtl.ant-descriptions-bordered .ant-descriptions-item-label:last-child,.ant-descriptions-rtl.ant-descriptions-bordered .ant-descriptions-item-content:last-child{border-left:none}.ant-divider{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";border-top:1px solid rgba(0,0,0,.06)}.ant-divider-vertical{position:relative;top:-.06em;display:inline-block;height:.9em;margin:0 8px;vertical-align:middle;border-top:0;border-left:1px solid rgba(0,0,0,.06)}.ant-divider-horizontal{display:flex;clear:both;width:100%;min-width:100%;margin:24px 0}.ant-divider-horizontal.ant-divider-with-text{display:flex;margin:16px 0;color:#000000d9;font-weight:500;font-size:16px;white-space:nowrap;text-align:center;border-top:0;border-top-color:#0000000f}.ant-divider-horizontal.ant-divider-with-text:before,.ant-divider-horizontal.ant-divider-with-text:after{position:relative;top:50%;width:50%;border-top:1px solid transparent;border-top-color:inherit;border-bottom:0;transform:translateY(50%);content:""}.ant-divider-horizontal.ant-divider-with-text-left:before{top:50%;width:5%}.ant-divider-horizontal.ant-divider-with-text-left:after{top:50%;width:95%}.ant-divider-horizontal.ant-divider-with-text-right:before{top:50%;width:95%}.ant-divider-horizontal.ant-divider-with-text-right:after{top:50%;width:5%}.ant-divider-inner-text{display:inline-block;padding:0 1em}.ant-divider-dashed{background:none;border-color:#0000000f;border-style:dashed;border-width:1px 0 0}.ant-divider-horizontal.ant-divider-with-text.ant-divider-dashed{border-top:0}.ant-divider-horizontal.ant-divider-with-text.ant-divider-dashed:before,.ant-divider-horizontal.ant-divider-with-text.ant-divider-dashed:after{border-style:dashed none none}.ant-divider-vertical.ant-divider-dashed{border-width:0 0 0 1px}.ant-divider-plain.ant-divider-with-text{color:#000000d9;font-weight:400;font-size:14px}.ant-divider-rtl{direction:rtl}.ant-divider-rtl.ant-divider-horizontal.ant-divider-with-text-left:before{width:95%}.ant-divider-rtl.ant-divider-horizontal.ant-divider-with-text-left:after{width:5%}.ant-divider-rtl.ant-divider-horizontal.ant-divider-with-text-right:before{width:5%}.ant-divider-rtl.ant-divider-horizontal.ant-divider-with-text-right:after{width:95%}.ant-drawer{position:fixed;z-index:1000;width:0%;height:100%;transition:transform .3s cubic-bezier(.7,.3,.1,1),height 0s ease .3s,width 0s ease .3s}.ant-drawer>*{transition:transform .3s cubic-bezier(.7,.3,.1,1),box-shadow .3s cubic-bezier(.7,.3,.1,1)}.ant-drawer-content-wrapper{position:absolute}.ant-drawer .ant-drawer-content{width:100%;height:100%}.ant-drawer-left,.ant-drawer-right{top:0;width:0%;height:100%}.ant-drawer-left .ant-drawer-content-wrapper,.ant-drawer-right .ant-drawer-content-wrapper{height:100%}.ant-drawer-left.ant-drawer-open,.ant-drawer-right.ant-drawer-open{width:100%;transition:transform .3s cubic-bezier(.7,.3,.1,1)}.ant-drawer-left.ant-drawer-open.no-mask,.ant-drawer-right.ant-drawer-open.no-mask{width:0%}.ant-drawer-left.ant-drawer-open .ant-drawer-content-wrapper{box-shadow:2px 0 8px #00000026}.ant-drawer-right,.ant-drawer-right .ant-drawer-content-wrapper{right:0}.ant-drawer-right.ant-drawer-open .ant-drawer-content-wrapper{box-shadow:-2px 0 8px #00000026}.ant-drawer-right.ant-drawer-open.no-mask{right:1px;transform:translate(1px)}.ant-drawer-top,.ant-drawer-bottom{left:0;width:100%;height:0%}.ant-drawer-top .ant-drawer-content-wrapper,.ant-drawer-bottom .ant-drawer-content-wrapper{width:100%}.ant-drawer-top.ant-drawer-open,.ant-drawer-bottom.ant-drawer-open{height:100%;transition:transform .3s cubic-bezier(.7,.3,.1,1)}.ant-drawer-top.ant-drawer-open.no-mask,.ant-drawer-bottom.ant-drawer-open.no-mask{height:0%}.ant-drawer-top{top:0}.ant-drawer-top.ant-drawer-open .ant-drawer-content-wrapper{box-shadow:0 2px 8px #00000026}.ant-drawer-bottom,.ant-drawer-bottom .ant-drawer-content-wrapper{bottom:0}.ant-drawer-bottom.ant-drawer-open .ant-drawer-content-wrapper{box-shadow:0 -2px 8px #00000026}.ant-drawer-bottom.ant-drawer-open.no-mask{bottom:1px;transform:translateY(1px)}.ant-drawer.ant-drawer-open .ant-drawer-mask{height:100%;opacity:1;transition:none;-webkit-animation:antdDrawerFadeIn .3s cubic-bezier(.7,.3,.1,1);animation:antdDrawerFadeIn .3s cubic-bezier(.7,.3,.1,1)}.ant-drawer-title{margin:0;color:#000000d9;font-weight:500;font-size:16px;line-height:22px}.ant-drawer-content{position:relative;z-index:1;overflow:auto;background-color:#fff;background-clip:padding-box;border:0}.ant-drawer-close{position:absolute;top:0;right:0;z-index:10;display:block;width:56px;height:56px;padding:0;color:#00000073;font-weight:700;font-size:16px;font-style:normal;line-height:56px;text-align:center;text-transform:none;text-decoration:none;background:transparent;border:0;outline:0;cursor:pointer;transition:color .3s;text-rendering:auto}.ant-drawer-close:focus,.ant-drawer-close:hover{color:#000000bf;text-decoration:none}.ant-drawer-header{position:relative;padding:16px 24px;color:#000000d9;background:#fff;border-bottom:1px solid #f0f0f0;border-radius:2px 2px 0 0}.ant-drawer-header-no-title{color:#000000d9;background:#fff}.ant-drawer-body{padding:24px;font-size:14px;line-height:1.5715;word-wrap:break-word}.ant-drawer-wrapper-body{height:100%;overflow:auto}.ant-drawer-mask{position:absolute;top:0;left:0;width:100%;height:0;background-color:#00000073;opacity:0;filter:alpha(opacity=45);transition:opacity .3s linear,height 0s ease .3s}.ant-drawer-open-content{box-shadow:0 4px 12px #00000026}@-webkit-keyframes antdDrawerFadeIn{0%{opacity:0}to{opacity:1}}@keyframes antdDrawerFadeIn{0%{opacity:0}to{opacity:1}}.ant-form-item .ant-mentions,.ant-form-item textarea.ant-input{height:auto}.ant-form-item .ant-upload{background:transparent}.ant-form-item .ant-upload.ant-upload-drag{background:#fafafa}.ant-form-item input[type=radio],.ant-form-item input[type=checkbox]{width:14px;height:14px}.ant-form-item .ant-radio-inline,.ant-form-item .ant-checkbox-inline{display:inline-block;margin-left:8px;font-weight:400;vertical-align:middle;cursor:pointer}.ant-form-item .ant-radio-inline:first-child,.ant-form-item .ant-checkbox-inline:first-child{margin-left:0}.ant-form-item .ant-checkbox-vertical,.ant-form-item .ant-radio-vertical{display:block}.ant-form-item .ant-checkbox-vertical+.ant-checkbox-vertical,.ant-form-item .ant-radio-vertical+.ant-radio-vertical{margin-left:0}.ant-form-item .ant-input-number+.ant-form-text{margin-left:8px}.ant-form-item .ant-input-number-handler-wrap{z-index:2}.ant-form-item .ant-select,.ant-form-item .ant-cascader-picker{width:100%}.ant-form-item .ant-picker-calendar-year-select,.ant-form-item .ant-picker-calendar-month-select,.ant-form-item .ant-input-group .ant-select,.ant-form-item .ant-input-group .ant-cascader-picker{width:auto}.ant-form-inline{display:flex;flex-wrap:wrap}.ant-form-inline .ant-form-item{flex:none;flex-wrap:nowrap;margin-right:16px;margin-bottom:0}.ant-form-inline .ant-form-item-with-help{margin-bottom:24px}.ant-form-inline .ant-form-item>.ant-form-item-label,.ant-form-inline .ant-form-item>.ant-form-item-control{display:inline-block;vertical-align:top}.ant-form-inline .ant-form-item>.ant-form-item-label{flex:none}.ant-form-inline .ant-form-item .ant-form-text,.ant-form-inline .ant-form-item .ant-form-item-has-feedback{display:inline-block}.ant-form-horizontal .ant-form-item-label{flex-grow:0}.ant-form-horizontal .ant-form-item-control{flex:1 1 0}.ant-form-vertical .ant-form-item{flex-direction:column}.ant-form-vertical .ant-form-item-label>label{height:auto}.ant-form-vertical .ant-form-item-label,.ant-col-24.ant-form-item-label,.ant-col-xl-24.ant-form-item-label{padding:0 0 8px;line-height:1.5715;white-space:initial;text-align:left}.ant-form-vertical .ant-form-item-label>label,.ant-col-24.ant-form-item-label>label,.ant-col-xl-24.ant-form-item-label>label{margin:0}.ant-form-vertical .ant-form-item-label>label:after,.ant-col-24.ant-form-item-label>label:after,.ant-col-xl-24.ant-form-item-label>label:after{display:none}.ant-form-rtl.ant-form-vertical .ant-form-item-label,.ant-form-rtl.ant-col-24.ant-form-item-label,.ant-form-rtl.ant-col-xl-24.ant-form-item-label{text-align:right}@media (max-width: 575px){.ant-form-item .ant-form-item-label{padding:0 0 8px;line-height:1.5715;white-space:initial;text-align:left}.ant-form-item .ant-form-item-label>label{margin:0}.ant-form-item .ant-form-item-label>label:after{display:none}.ant-form-rtl.ant-form-item .ant-form-item-label{text-align:right}.ant-form .ant-form-item{flex-wrap:wrap}.ant-form .ant-form-item .ant-form-item-label,.ant-form .ant-form-item .ant-form-item-control{flex:0 0 100%;max-width:100%}.ant-col-xs-24.ant-form-item-label{padding:0 0 8px;line-height:1.5715;white-space:initial;text-align:left}.ant-col-xs-24.ant-form-item-label>label{margin:0}.ant-col-xs-24.ant-form-item-label>label:after{display:none}.ant-form-rtl.ant-col-xs-24.ant-form-item-label{text-align:right}}@media (max-width: 767px){.ant-col-sm-24.ant-form-item-label{padding:0 0 8px;line-height:1.5715;white-space:initial;text-align:left}.ant-col-sm-24.ant-form-item-label>label{margin:0}.ant-col-sm-24.ant-form-item-label>label:after{display:none}.ant-form-rtl.ant-col-sm-24.ant-form-item-label{text-align:right}}@media (max-width: 991px){.ant-col-md-24.ant-form-item-label{padding:0 0 8px;line-height:1.5715;white-space:initial;text-align:left}.ant-col-md-24.ant-form-item-label>label{margin:0}.ant-col-md-24.ant-form-item-label>label:after{display:none}.ant-form-rtl.ant-col-md-24.ant-form-item-label{text-align:right}}@media (max-width: 1199px){.ant-col-lg-24.ant-form-item-label{padding:0 0 8px;line-height:1.5715;white-space:initial;text-align:left}.ant-col-lg-24.ant-form-item-label>label{margin:0}.ant-col-lg-24.ant-form-item-label>label:after{display:none}.ant-form-rtl.ant-col-lg-24.ant-form-item-label{text-align:right}}@media (max-width: 1599px){.ant-col-xl-24.ant-form-item-label{padding:0 0 8px;line-height:1.5715;white-space:initial;text-align:left}.ant-col-xl-24.ant-form-item-label>label{margin:0}.ant-col-xl-24.ant-form-item-label>label:after{display:none}.ant-form-rtl.ant-col-xl-24.ant-form-item-label{text-align:right}}.ant-form-item-explain.ant-form-item-explain-error{color:#ff4d4f}.ant-form-item-explain.ant-form-item-explain-warning{color:#faad14}.ant-form-item-has-feedback .ant-input{padding-right:24px}.ant-form-item-has-feedback .ant-input-affix-wrapper .ant-input-suffix{padding-right:18px}.ant-form-item-has-feedback .ant-input-search:not(.ant-input-search-enter-button) .ant-input-suffix{right:28px}.ant-form-item-has-feedback .ant-switch{margin:2px 0 4px}.ant-form-item-has-feedback>.ant-select .ant-select-arrow,.ant-form-item-has-feedback>.ant-select .ant-select-clear,.ant-form-item-has-feedback :not(.ant-input-group-addon)>.ant-select .ant-select-arrow,.ant-form-item-has-feedback :not(.ant-input-group-addon)>.ant-select .ant-select-clear{right:32px}.ant-form-item-has-feedback>.ant-select .ant-select-selection-selected-value,.ant-form-item-has-feedback :not(.ant-input-group-addon)>.ant-select .ant-select-selection-selected-value{padding-right:42px}.ant-form-item-has-feedback .ant-cascader-picker-arrow{margin-right:19px}.ant-form-item-has-feedback .ant-cascader-picker-clear{right:32px}.ant-form-item-has-feedback .ant-picker,.ant-form-item-has-feedback .ant-picker-large{padding-right:29.2px}.ant-form-item-has-feedback .ant-picker-small{padding-right:25.2px}.ant-form-item-has-feedback.ant-form-item-has-success .ant-form-item-children-icon,.ant-form-item-has-feedback.ant-form-item-has-warning .ant-form-item-children-icon,.ant-form-item-has-feedback.ant-form-item-has-error .ant-form-item-children-icon,.ant-form-item-has-feedback.ant-form-item-is-validating .ant-form-item-children-icon{position:absolute;top:50%;right:0;z-index:1;width:32px;height:20px;margin-top:-10px;font-size:14px;line-height:20px;text-align:center;visibility:visible;-webkit-animation:zoomIn .3s cubic-bezier(.12,.4,.29,1.46);animation:zoomIn .3s cubic-bezier(.12,.4,.29,1.46);pointer-events:none}.ant-form-item-has-success.ant-form-item-has-feedback .ant-form-item-children-icon{color:#52c41a;-webkit-animation-name:diffZoomIn1!important;animation-name:diffZoomIn1!important}.ant-form-item-has-warning .ant-form-item-split{color:#faad14}.ant-form-item-has-warning .ant-input,.ant-form-item-has-warning .ant-input-affix-wrapper,.ant-form-item-has-warning .ant-input:hover,.ant-form-item-has-warning .ant-input-affix-wrapper:hover{background-color:#fff;border-color:#faad14}.ant-form-item-has-warning .ant-input:focus,.ant-form-item-has-warning .ant-input-affix-wrapper:focus,.ant-form-item-has-warning .ant-input-focused,.ant-form-item-has-warning .ant-input-affix-wrapper-focused{border-color:#ffc53d;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #faad1433}.ant-form-item-has-warning .ant-input-disabled,.ant-form-item-has-warning .ant-input-disabled:hover,.ant-form-item-has-warning .ant-input-affix-wrapper-disabled,.ant-form-item-has-warning .ant-input-affix-wrapper-disabled:hover{background-color:#f5f5f5;border-color:#d9d9d9}.ant-form-item-has-warning .ant-input-affix-wrapper-disabled input:focus,.ant-form-item-has-warning .ant-input-affix-wrapper-disabled:hover input:focus{box-shadow:none!important}.ant-form-item-has-warning .ant-calendar-picker-open .ant-calendar-picker-input{border-color:#ffc53d;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #faad1433}.ant-form-item-has-warning .ant-input-prefix{color:#faad14}.ant-form-item-has-warning .ant-input-group-addon{color:#faad14;border-color:#faad14}.ant-form-item-has-warning .has-feedback{color:#faad14}.ant-form-item-has-warning.ant-form-item-has-feedback .ant-form-item-children-icon{color:#faad14;-webkit-animation-name:diffZoomIn3!important;animation-name:diffZoomIn3!important}.ant-form-item-has-warning .ant-select:not(.ant-select-disabled):not(.ant-select-customize-input) .ant-select-selector{background-color:#fff;border-color:#faad14!important}.ant-form-item-has-warning .ant-select:not(.ant-select-disabled):not(.ant-select-customize-input).ant-select-open .ant-select-selector,.ant-form-item-has-warning .ant-select:not(.ant-select-disabled):not(.ant-select-customize-input).ant-select-focused .ant-select-selector{border-color:#ffc53d;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #faad1433}.ant-form-item-has-warning .ant-input-number,.ant-form-item-has-warning .ant-picker{background-color:#fff;border-color:#faad14}.ant-form-item-has-warning .ant-input-number-focused,.ant-form-item-has-warning .ant-picker-focused,.ant-form-item-has-warning .ant-input-number:focus,.ant-form-item-has-warning .ant-picker:focus{border-color:#ffc53d;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #faad1433}.ant-form-item-has-warning .ant-input-number:not([disabled]):hover,.ant-form-item-has-warning .ant-picker:not([disabled]):hover{background-color:#fff;border-color:#faad14}.ant-form-item-has-warning .ant-cascader-picker:focus .ant-cascader-input{border-color:#ffc53d;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #faad1433}.ant-form-item-has-error .ant-form-item-split{color:#ff4d4f}.ant-form-item-has-error .ant-input,.ant-form-item-has-error .ant-input-affix-wrapper,.ant-form-item-has-error .ant-input:hover,.ant-form-item-has-error .ant-input-affix-wrapper:hover{background-color:#fff;border-color:#ff4d4f}.ant-form-item-has-error .ant-input:focus,.ant-form-item-has-error .ant-input-affix-wrapper:focus,.ant-form-item-has-error .ant-input-focused,.ant-form-item-has-error .ant-input-affix-wrapper-focused{border-color:#ff7875;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #ff4d4f33}.ant-form-item-has-error .ant-input-disabled,.ant-form-item-has-error .ant-input-disabled:hover,.ant-form-item-has-error .ant-input-affix-wrapper-disabled,.ant-form-item-has-error .ant-input-affix-wrapper-disabled:hover{background-color:#f5f5f5;border-color:#d9d9d9}.ant-form-item-has-error .ant-input-affix-wrapper-disabled input:focus,.ant-form-item-has-error .ant-input-affix-wrapper-disabled:hover input:focus{box-shadow:none!important}.ant-form-item-has-error .ant-calendar-picker-open .ant-calendar-picker-input{border-color:#ff7875;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #ff4d4f33}.ant-form-item-has-error .ant-input-prefix{color:#ff4d4f}.ant-form-item-has-error .ant-input-group-addon{color:#ff4d4f;border-color:#ff4d4f}.ant-form-item-has-error .has-feedback{color:#ff4d4f}.ant-form-item-has-error.ant-form-item-has-feedback .ant-form-item-children-icon{color:#ff4d4f;-webkit-animation-name:diffZoomIn2!important;animation-name:diffZoomIn2!important}.ant-form-item-has-error .ant-select:not(.ant-select-disabled):not(.ant-select-customize-input) .ant-select-selector{background-color:#fff;border-color:#ff4d4f!important}.ant-form-item-has-error .ant-select:not(.ant-select-disabled):not(.ant-select-customize-input).ant-select-open .ant-select-selector,.ant-form-item-has-error .ant-select:not(.ant-select-disabled):not(.ant-select-customize-input).ant-select-focused .ant-select-selector{border-color:#ff7875;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #ff4d4f33}.ant-form-item-has-error .ant-input-group-addon .ant-select.ant-select-single:not(.ant-select-customize-input) .ant-select-selector{background-color:inherit;border:0;box-shadow:none}.ant-form-item-has-error .ant-select.ant-select-auto-complete .ant-input:focus{border-color:#ff4d4f}.ant-form-item-has-error .ant-input-number,.ant-form-item-has-error .ant-picker{background-color:#fff;border-color:#ff4d4f}.ant-form-item-has-error .ant-input-number-focused,.ant-form-item-has-error .ant-picker-focused,.ant-form-item-has-error .ant-input-number:focus,.ant-form-item-has-error .ant-picker:focus{border-color:#ff7875;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #ff4d4f33}.ant-form-item-has-error .ant-input-number:not([disabled]):hover,.ant-form-item-has-error .ant-picker:not([disabled]):hover{background-color:#fff;border-color:#ff4d4f}.ant-form-item-has-error .ant-mention-wrapper .ant-mention-editor,.ant-form-item-has-error .ant-mention-wrapper .ant-mention-editor:not([disabled]):hover{background-color:#fff;border-color:#ff4d4f}.ant-form-item-has-error .ant-mention-wrapper.ant-mention-active:not([disabled]) .ant-mention-editor,.ant-form-item-has-error .ant-mention-wrapper .ant-mention-editor:not([disabled]):focus{border-color:#ff7875;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #ff4d4f33}.ant-form-item-has-error .ant-cascader-picker:hover .ant-cascader-picker-label:hover+.ant-cascader-input.ant-input{border-color:#ff4d4f}.ant-form-item-has-error .ant-cascader-picker:focus .ant-cascader-input{background-color:#fff;border-color:#ff7875;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #ff4d4f33}.ant-form-item-has-error .ant-transfer-list{border-color:#ff4d4f}.ant-form-item-has-error .ant-transfer-list-search:not([disabled]){border-color:#d9d9d9}.ant-form-item-has-error .ant-transfer-list-search:not([disabled]):hover{border-color:#40a9ff;border-right-width:1px!important}.ant-form-item-has-error .ant-transfer-list-search:not([disabled]):focus{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-form-item-has-error .ant-radio-button-wrapper{border-color:#ff4d4f!important}.ant-form-item-has-error .ant-radio-button-wrapper:not(:first-child):before{background-color:#ff4d4f}.ant-form-item-is-validating.ant-form-item-has-feedback .ant-form-item-children-icon{display:inline-block;color:#1890ff}.ant-form{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum"}.ant-form legend{display:block;width:100%;margin-bottom:20px;padding:0;color:#00000073;font-size:16px;line-height:inherit;border:0;border-bottom:1px solid #d9d9d9}.ant-form label{font-size:14px}.ant-form input[type=search]{box-sizing:border-box}.ant-form input[type=radio],.ant-form input[type=checkbox]{line-height:normal}.ant-form input[type=file]{display:block}.ant-form input[type=range]{display:block;width:100%}.ant-form select[multiple],.ant-form select[size]{height:auto}.ant-form input[type=file]:focus,.ant-form input[type=radio]:focus,.ant-form input[type=checkbox]:focus{outline:thin dotted;outline:5px auto -webkit-focus-ring-color;outline-offset:-2px}.ant-form output{display:block;padding-top:15px;color:#000000d9;font-size:14px;line-height:1.5715}.ant-form .ant-form-text{display:inline-block;padding-right:8px}.ant-form-small .ant-form-item-label>label{height:24px}.ant-form-small .ant-form-item-control-input{min-height:24px}.ant-form-large .ant-form-item-label>label{height:40px}.ant-form-large .ant-form-item-control-input{min-height:40px}.ant-form-item{box-sizing:border-box;margin:0 0 24px;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";vertical-align:top}.ant-form-item-with-help{margin-bottom:0}.ant-form-item-hidden,.ant-form-item-hidden.ant-row{display:none}.ant-form-item-label{display:inline-block;flex-grow:0;overflow:hidden;white-space:nowrap;text-align:right;vertical-align:middle}.ant-form-item-label-left{text-align:left}.ant-form-item-label>label{position:relative;display:inline-flex;align-items:center;height:32px;color:#000000d9;font-size:14px}.ant-form-item-label>label>.anticon{font-size:14px;vertical-align:top}.ant-form-item-label>label.ant-form-item-required:not(.ant-form-item-required-mark-optional):before{display:inline-block;margin-right:4px;color:#ff4d4f;font-size:14px;font-family:SimSun,sans-serif;line-height:1;content:"*"}.ant-form-hide-required-mark .ant-form-item-label>label.ant-form-item-required:not(.ant-form-item-required-mark-optional):before{display:none}.ant-form-item-label>label .ant-form-item-optional{display:inline-block;margin-left:4px;color:#00000073}.ant-form-hide-required-mark .ant-form-item-label>label .ant-form-item-optional{display:none}.ant-form-item-label>label .ant-form-item-tooltip{color:#00000073;cursor:help;-ms-writing-mode:lr-tb;writing-mode:horizontal-tb;-webkit-margin-start:4px;margin-inline-start:4px}.ant-form-item-label>label:after{content:":";position:relative;top:-.5px;margin:0 8px 0 2px}.ant-form-item-label>label.ant-form-item-no-colon:after{content:" "}.ant-form-item-control{display:flex;flex-direction:column;flex-grow:1}.ant-form-item-control:first-child:not([class^="ant-col-"]):not([class*=" ant-col-"]){width:100%}.ant-form-item-control-input{position:relative;display:flex;align-items:center;min-height:32px}.ant-form-item-control-input-content{flex:auto;max-width:100%}.ant-form-item-explain,.ant-form-item-extra{clear:both;min-height:24px;color:#00000073;font-size:14px;line-height:1.5715;transition:color .3s cubic-bezier(.215,.61,.355,1)}.ant-form-item .ant-input-textarea-show-count:after{margin-bottom:-22px}.ant-show-help-enter,.ant-show-help-appear,.ant-show-help-leave{-webkit-animation-duration:.3s;animation-duration:.3s;-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-show-help-enter.ant-show-help-enter-active,.ant-show-help-appear.ant-show-help-appear-active{-webkit-animation-name:antShowHelpIn;animation-name:antShowHelpIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-show-help-leave.ant-show-help-leave-active{-webkit-animation-name:antShowHelpOut;animation-name:antShowHelpOut;-webkit-animation-play-state:running;animation-play-state:running;pointer-events:none}.ant-show-help-enter,.ant-show-help-appear{opacity:0;-webkit-animation-timing-function:cubic-bezier(.645,.045,.355,1);animation-timing-function:cubic-bezier(.645,.045,.355,1)}.ant-show-help-leave{-webkit-animation-timing-function:cubic-bezier(.645,.045,.355,1);animation-timing-function:cubic-bezier(.645,.045,.355,1)}@-webkit-keyframes antShowHelpIn{0%{transform:translateY(-5px);opacity:0}to{transform:translateY(0);opacity:1}}@keyframes antShowHelpIn{0%{transform:translateY(-5px);opacity:0}to{transform:translateY(0);opacity:1}}@-webkit-keyframes antShowHelpOut{to{transform:translateY(-5px);opacity:0}}@keyframes antShowHelpOut{to{transform:translateY(-5px);opacity:0}}@-webkit-keyframes diffZoomIn1{0%{transform:scale(0)}to{transform:scale(1)}}@keyframes diffZoomIn1{0%{transform:scale(0)}to{transform:scale(1)}}@-webkit-keyframes diffZoomIn2{0%{transform:scale(0)}to{transform:scale(1)}}@keyframes diffZoomIn2{0%{transform:scale(0)}to{transform:scale(1)}}@-webkit-keyframes diffZoomIn3{0%{transform:scale(0)}to{transform:scale(1)}}@keyframes diffZoomIn3{0%{transform:scale(0)}to{transform:scale(1)}}.ant-form-rtl{direction:rtl}.ant-form-rtl .ant-form-item-label{text-align:left}.ant-form-rtl .ant-form-item-label>label.ant-form-item-required:before{margin-right:0;margin-left:4px}.ant-form-rtl .ant-form-item-label>label:after{margin:0 2px 0 8px}.ant-form-rtl .ant-form-item-label>label .ant-form-item-optional{margin-right:4px;margin-left:0}.ant-col-rtl .ant-form-item-control:first-child{width:100%}.ant-form-rtl .ant-form-item-has-feedback .ant-input{padding-right:11px;padding-left:24px}.ant-form-rtl .ant-form-item-has-feedback .ant-input-affix-wrapper .ant-input-suffix{padding-right:11px;padding-left:18px}.ant-form-rtl .ant-form-item-has-feedback .ant-input-affix-wrapper .ant-input{padding:0}.ant-form-rtl .ant-form-item-has-feedback .ant-input-search:not(.ant-input-search-enter-button) .ant-input-suffix{right:auto;left:28px}.ant-form-rtl .ant-form-item-has-feedback .ant-input-number{padding-left:18px}.ant-form-rtl .ant-form-item-has-feedback>.ant-select .ant-select-arrow,.ant-form-rtl .ant-form-item-has-feedback>.ant-select .ant-select-clear,.ant-form-rtl .ant-form-item-has-feedback :not(.ant-input-group-addon)>.ant-select .ant-select-arrow,.ant-form-rtl .ant-form-item-has-feedback :not(.ant-input-group-addon)>.ant-select .ant-select-clear{right:auto;left:32px}.ant-form-rtl .ant-form-item-has-feedback>.ant-select .ant-select-selection-selected-value,.ant-form-rtl .ant-form-item-has-feedback :not(.ant-input-group-addon)>.ant-select .ant-select-selection-selected-value{padding-right:0;padding-left:42px}.ant-form-rtl .ant-form-item-has-feedback .ant-cascader-picker-arrow{margin-right:0;margin-left:19px}.ant-form-rtl .ant-form-item-has-feedback .ant-cascader-picker-clear{right:auto;left:32px}.ant-form-rtl .ant-form-item-has-feedback .ant-picker,.ant-form-rtl .ant-form-item-has-feedback .ant-picker-large{padding-right:11px;padding-left:29.2px}.ant-form-rtl .ant-form-item-has-feedback .ant-picker-small{padding-right:7px;padding-left:25.2px}.ant-form-rtl .ant-form-item-has-feedback.ant-form-item-has-success .ant-form-item-children-icon,.ant-form-rtl .ant-form-item-has-feedback.ant-form-item-has-warning .ant-form-item-children-icon,.ant-form-rtl .ant-form-item-has-feedback.ant-form-item-has-error .ant-form-item-children-icon,.ant-form-rtl .ant-form-item-has-feedback.ant-form-item-is-validating .ant-form-item-children-icon{right:auto;left:0}.ant-form-rtl.ant-form-inline .ant-form-item{margin-right:0;margin-left:16px}.ant-image{position:relative;display:inline-block}.ant-image-img{width:100%;height:auto}.ant-image-img-placeholder{background-color:#f5f5f5;background-image:url(data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMTYiIGhlaWdodD0iMTYiIHZpZXdCb3g9IjAgMCAxNiAxNiIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNMTQuNSAyLjVoLTEzQS41LjUgMCAwIDAgMSAzdjEwYS41LjUgMCAwIDAgLjUuNWgxM2EuNS41IDAgMCAwIC41LS41VjNhLjUuNSAwIDAgMC0uNS0uNXpNNS4yODEgNC43NWExIDEgMCAwIDEgMCAyIDEgMSAwIDAgMSAwLTJ6bTguMDMgNi44M2EuMTI3LjEyNyAwIDAgMS0uMDgxLjAzSDIuNzY5YS4xMjUuMTI1IDAgMCAxLS4wOTYtLjIwN2wyLjY2MS0zLjE1NmEuMTI2LjEyNiAwIDAgMSAuMTc3LS4wMTZsLjAxNi4wMTZMNy4wOCAxMC4wOWwyLjQ3LTIuOTNhLjEyNi4xMjYgMCAwIDEgLjE3Ny0uMDE2bC4wMTUuMDE2IDMuNTg4IDQuMjQ0YS4xMjcuMTI3IDAgMCAxLS4wMi4xNzV6IiBmaWxsPSIjOEM4QzhDIiBmaWxsLXJ1bGU9Im5vbnplcm8iLz48L3N2Zz4=);background-repeat:no-repeat;background-position:center center;background-size:30%}.ant-image-placeholder{position:absolute;top:0;right:0;bottom:0;left:0}.ant-image-preview{pointer-events:none;height:100%;text-align:center}.ant-image-preview.zoom-enter,.ant-image-preview.zoom-appear{transform:none;opacity:0;-webkit-animation-duration:.3s;animation-duration:.3s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-image-preview-mask{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1000;height:100%;background-color:#00000073}.ant-image-preview-mask-hidden{display:none}.ant-image-preview-wrap{position:fixed;top:0;right:0;bottom:0;left:0;overflow:auto;outline:0;-webkit-overflow-scrolling:touch}.ant-image-preview-body{position:absolute;top:0;right:0;bottom:0;left:0;overflow:hidden}.ant-image-preview-img{max-width:100%;max-height:100%;vertical-align:middle;transform:scaleZ(1);cursor:-webkit-grab;cursor:grab;transition:transform .3s cubic-bezier(.215,.61,.355,1) 0s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;pointer-events:auto}.ant-image-preview-img-wrapper{position:absolute;top:0;right:0;bottom:0;left:0;transition:transform .3s cubic-bezier(.215,.61,.355,1) 0s}.ant-image-preview-img-wrapper:before{display:inline-block;width:1px;height:50%;margin-right:-1px;content:""}.ant-image-preview-moving .ant-image-preview-img{cursor:-webkit-grabbing;cursor:grabbing}.ant-image-preview-moving .ant-image-preview-img-wrapper{transition-duration:0s}.ant-image-preview-wrap{z-index:1080}.ant-image-preview-operations{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;font-feature-settings:"tnum";position:absolute;top:0;right:0;z-index:1;display:flex;flex-direction:row-reverse;align-items:center;width:100%;color:#ffffffd9;list-style:none;background:rgba(0,0,0,.1);pointer-events:auto}.ant-image-preview-operations-operation{margin-left:12px;padding:12px;cursor:pointer}.ant-image-preview-operations-operation-disabled{color:#ffffff73;pointer-events:none}.ant-image-preview-operations-operation:last-of-type{margin-left:0}.ant-image-preview-operations-icon{font-size:18px}.ant-image-preview-switch-left,.ant-image-preview-switch-right{position:absolute;top:50%;right:10px;z-index:1;display:flex;align-items:center;justify-content:center;width:44px;height:44px;margin-top:-22px;color:#ffffffd9;background:rgba(0,0,0,.1);border-radius:50%;cursor:pointer;pointer-events:auto}.ant-image-preview-switch-left-disabled,.ant-image-preview-switch-right-disabled{color:#ffffff73;cursor:not-allowed}.ant-image-preview-switch-left-disabled>.anticon,.ant-image-preview-switch-right-disabled>.anticon{cursor:not-allowed}.ant-image-preview-switch-left>.anticon,.ant-image-preview-switch-right>.anticon{font-size:18px}.ant-image-preview-switch-left{left:10px}.ant-image-preview-switch-right{right:10px}.ant-input-number{box-sizing:border-box;font-variant:tabular-nums;list-style:none;font-feature-settings:"tnum";position:relative;width:100%;color:#000000d9;font-size:14px;line-height:1.5715;background-color:#fff;background-image:none;transition:all .3s;display:inline-block;width:90px;margin:0;padding:0;border:1px solid #d9d9d9;border-radius:2px}.ant-input-number::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-input-number:-ms-input-placeholder{color:#bfbfbf}.ant-input-number::-webkit-input-placeholder{color:#bfbfbf}.ant-input-number:-moz-placeholder-shown{text-overflow:ellipsis}.ant-input-number:-ms-input-placeholder{text-overflow:ellipsis}.ant-input-number:placeholder-shown{text-overflow:ellipsis}.ant-input-number:focus{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-input-number[disabled]{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-input-number[disabled]:hover{border-color:#d9d9d9;border-right-width:1px!important}textarea.ant-input-number{max-width:100%;height:auto;min-height:32px;line-height:1.5715;vertical-align:bottom;transition:all .3s,height 0s}.ant-input-number-lg{padding:6.5px 11px;font-size:16px}.ant-input-number-sm{padding:0 7px}.ant-input-number-handler{position:relative;display:block;width:100%;height:50%;overflow:hidden;color:#00000073;font-weight:700;line-height:0;text-align:center;transition:all .1s linear}.ant-input-number-handler:active{background:#f4f4f4}.ant-input-number-handler:hover .ant-input-number-handler-up-inner,.ant-input-number-handler:hover .ant-input-number-handler-down-inner{color:#40a9ff}.ant-input-number-handler-up-inner,.ant-input-number-handler-down-inner{display:inline-block;color:inherit;font-style:normal;line-height:0;text-align:center;text-transform:none;vertical-align:-.125em;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;position:absolute;right:4px;width:12px;height:12px;color:#00000073;line-height:12px;transition:all .1s linear;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-input-number-handler-up-inner>*,.ant-input-number-handler-down-inner>*{line-height:1}.ant-input-number-handler-up-inner svg,.ant-input-number-handler-down-inner svg{display:inline-block}.ant-input-number-handler-up-inner:before,.ant-input-number-handler-down-inner:before{display:none}.ant-input-number-handler-up-inner .ant-input-number-handler-up-inner-icon,.ant-input-number-handler-up-inner .ant-input-number-handler-down-inner-icon,.ant-input-number-handler-down-inner .ant-input-number-handler-up-inner-icon,.ant-input-number-handler-down-inner .ant-input-number-handler-down-inner-icon{display:block}.ant-input-number:hover{border-color:#40a9ff;border-right-width:1px!important}.ant-input-number-focused{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-input-number-disabled{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-input-number-disabled:hover{border-color:#d9d9d9;border-right-width:1px!important}.ant-input-number-disabled .ant-input-number-input{cursor:not-allowed}.ant-input-number-disabled .ant-input-number-handler-wrap{display:none}.ant-input-number-input{width:100%;height:30px;padding:0 11px;text-align:left;background-color:transparent;border:0;border-radius:2px;outline:0;transition:all .3s linear;-moz-appearance:textfield!important}.ant-input-number-input::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-input-number-input:-ms-input-placeholder{color:#bfbfbf}.ant-input-number-input::-webkit-input-placeholder{color:#bfbfbf}.ant-input-number-input:-moz-placeholder-shown{text-overflow:ellipsis}.ant-input-number-input:-ms-input-placeholder{text-overflow:ellipsis}.ant-input-number-input:placeholder-shown{text-overflow:ellipsis}.ant-input-number-input[type=number]::-webkit-inner-spin-button,.ant-input-number-input[type=number]::-webkit-outer-spin-button{margin:0;-webkit-appearance:none}.ant-input-number-lg{padding:0;font-size:16px}.ant-input-number-lg input{height:38px}.ant-input-number-sm{padding:0}.ant-input-number-sm input{height:22px;padding:0 7px}.ant-input-number-handler-wrap{position:absolute;top:0;right:0;width:22px;height:100%;background:#fff;border-left:1px solid #d9d9d9;border-radius:0 2px 2px 0;opacity:0;transition:opacity .24s linear .1s}.ant-input-number-handler-wrap .ant-input-number-handler .ant-input-number-handler-up-inner,.ant-input-number-handler-wrap .ant-input-number-handler .ant-input-number-handler-down-inner{display:inline-block;font-size:12px;font-size:7px \	;transform:scale(.58333333) rotate(0);min-width:auto;margin-right:0}:root .ant-input-number-handler-wrap .ant-input-number-handler .ant-input-number-handler-up-inner,:root .ant-input-number-handler-wrap .ant-input-number-handler .ant-input-number-handler-down-inner{font-size:12px}.ant-input-number-handler-wrap:hover .ant-input-number-handler{height:40%}.ant-input-number:hover .ant-input-number-handler-wrap{opacity:1}.ant-input-number-handler-up{border-top-right-radius:2px;cursor:pointer}.ant-input-number-handler-up-inner{top:50%;margin-top:-5px;text-align:center}.ant-input-number-handler-up:hover{height:60%!important}.ant-input-number-handler-down{top:0;border-top:1px solid #d9d9d9;border-bottom-right-radius:2px;cursor:pointer}.ant-input-number-handler-down-inner{top:50%;margin-top:-6px;text-align:center}.ant-input-number-handler-down:hover{height:60%!important}.ant-input-number-handler-up-disabled,.ant-input-number-handler-down-disabled{cursor:not-allowed}.ant-input-number-handler-up-disabled:hover .ant-input-number-handler-up-inner,.ant-input-number-handler-down-disabled:hover .ant-input-number-handler-down-inner{color:#00000040}.ant-layout{display:flex;flex:auto;flex-direction:column;min-height:0;background:#f0f2f5}.ant-layout,.ant-layout *{box-sizing:border-box}.ant-layout.ant-layout-has-sider{flex-direction:row}.ant-layout.ant-layout-has-sider>.ant-layout,.ant-layout.ant-layout-has-sider>.ant-layout-content{width:0}.ant-layout-header,.ant-layout-footer{flex:0 0 auto}.ant-layout-header{height:64px;padding:0 50px;color:#000000d9;line-height:64px;background:#001529}.ant-layout-footer{padding:24px 50px;color:#000000d9;font-size:14px;background:#f0f2f5}.ant-layout-content{flex:auto;min-height:0}.ant-layout-sider{position:relative;min-width:0;background:#001529;transition:all .2s}.ant-layout-sider-children{height:100%;margin-top:-.1px;padding-top:.1px}.ant-layout-sider-children .ant-menu.ant-menu-inline-collapsed{width:auto}.ant-layout-sider-has-trigger{padding-bottom:48px}.ant-layout-sider-right{order:1}.ant-layout-sider-trigger{position:fixed;bottom:0;z-index:1;height:48px;color:#fff;line-height:48px;text-align:center;background:#002140;cursor:pointer;transition:all .2s}.ant-layout-sider-zero-width>*{overflow:hidden}.ant-layout-sider-zero-width-trigger{position:absolute;top:64px;right:-36px;z-index:1;width:36px;height:42px;color:#fff;font-size:18px;line-height:42px;text-align:center;background:#001529;border-radius:0 2px 2px 0;cursor:pointer;transition:background .3s ease}.ant-layout-sider-zero-width-trigger:after{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;transition:all .3s;content:""}.ant-layout-sider-zero-width-trigger:hover:after{background:rgba(255,255,255,.1)}.ant-layout-sider-zero-width-trigger-right{left:-36px;border-radius:2px 0 0 2px}.ant-layout-sider-light{background:#fff}.ant-layout-sider-light .ant-layout-sider-trigger,.ant-layout-sider-light .ant-layout-sider-zero-width-trigger{color:#000000d9;background:#fff}.ant-layout-rtl{direction:rtl}.ant-list{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative}.ant-list *{outline:none}.ant-list-pagination{margin-top:24px;text-align:right}.ant-list-pagination .ant-pagination-options{text-align:left}.ant-list-more{margin-top:12px;text-align:center}.ant-list-more button{padding-right:32px;padding-left:32px}.ant-list-spin{min-height:40px;text-align:center}.ant-list-empty-text{padding:16px;color:#00000040;font-size:14px;text-align:center}.ant-list-items{margin:0;padding:0;list-style:none}.ant-list-item{display:flex;align-items:center;justify-content:space-between;padding:12px 0;color:#000000d9}.ant-list-item-meta{display:flex;flex:1;align-items:flex-start;max-width:100%}.ant-list-item-meta-avatar{margin-right:16px}.ant-list-item-meta-content{flex:1 0;width:0;color:#000000d9}.ant-list-item-meta-title{margin-bottom:4px;color:#000000d9;font-size:14px;line-height:1.5715}.ant-list-item-meta-title>a{color:#000000d9;transition:all .3s}.ant-list-item-meta-title>a:hover{color:#1890ff}.ant-list-item-meta-description{color:#00000073;font-size:14px;line-height:1.5715}.ant-list-item-action{flex:0 0 auto;margin-left:48px;padding:0;font-size:0;list-style:none}.ant-list-item-action>li{position:relative;display:inline-block;padding:0 8px;color:#00000073;font-size:14px;line-height:1.5715;text-align:center}.ant-list-item-action>li:first-child{padding-left:0}.ant-list-item-action-split{position:absolute;top:50%;right:0;width:1px;height:14px;margin-top:-7px;background-color:#f0f0f0}.ant-list-header,.ant-list-footer{background:transparent}.ant-list-header,.ant-list-footer{padding-top:12px;padding-bottom:12px}.ant-list-empty{padding:16px 0;color:#00000073;font-size:12px;text-align:center}.ant-list-split .ant-list-item{border-bottom:1px solid #f0f0f0}.ant-list-split .ant-list-item:last-child{border-bottom:none}.ant-list-split .ant-list-header{border-bottom:1px solid #f0f0f0}.ant-list-split.ant-list-empty .ant-list-footer{border-top:1px solid #f0f0f0}.ant-list-loading .ant-list-spin-nested-loading{min-height:32px}.ant-list-split.ant-list-something-after-last-item .ant-spin-container>.ant-list-items>.ant-list-item:last-child{border-bottom:1px solid #f0f0f0}.ant-list-lg .ant-list-item{padding:16px 24px}.ant-list-sm .ant-list-item{padding:8px 16px}.ant-list-vertical .ant-list-item{align-items:initial}.ant-list-vertical .ant-list-item-main{display:block;flex:1}.ant-list-vertical .ant-list-item-extra{margin-left:40px}.ant-list-vertical .ant-list-item-meta{margin-bottom:16px}.ant-list-vertical .ant-list-item-meta-title{margin-bottom:12px;color:#000000d9;font-size:16px;line-height:24px}.ant-list-vertical .ant-list-item-action{margin-top:16px;margin-left:auto}.ant-list-vertical .ant-list-item-action>li{padding:0 16px}.ant-list-vertical .ant-list-item-action>li:first-child{padding-left:0}.ant-list-grid .ant-col>.ant-list-item{display:block;max-width:100%;margin-bottom:16px;padding-top:0;padding-bottom:0;border-bottom:none}.ant-list-item-no-flex{display:block}.ant-list:not(.ant-list-vertical) .ant-list-item-no-flex .ant-list-item-action{float:right}.ant-list-bordered{border:1px solid #d9d9d9;border-radius:2px}.ant-list-bordered .ant-list-header,.ant-list-bordered .ant-list-footer,.ant-list-bordered .ant-list-item{padding-right:24px;padding-left:24px}.ant-list-bordered .ant-list-pagination{margin:16px 24px}.ant-list-bordered.ant-list-sm .ant-list-item,.ant-list-bordered.ant-list-sm .ant-list-header,.ant-list-bordered.ant-list-sm .ant-list-footer{padding:8px 16px}.ant-list-bordered.ant-list-lg .ant-list-item,.ant-list-bordered.ant-list-lg .ant-list-header,.ant-list-bordered.ant-list-lg .ant-list-footer{padding:16px 24px}@media screen and (max-width: 768px){.ant-list-item-action,.ant-list-vertical .ant-list-item-extra{margin-left:24px}}@media screen and (max-width: 576px){.ant-list-item{flex-wrap:wrap}.ant-list-item-action{margin-left:12px}.ant-list-vertical .ant-list-item{flex-wrap:wrap-reverse}.ant-list-vertical .ant-list-item-main{min-width:220px}.ant-list-vertical .ant-list-item-extra{margin:auto auto 16px}}.ant-list-rtl{direction:rtl;text-align:right}.ant-list-rtl .ReactVirtualized__List .ant-list-item{direction:rtl}.ant-list-rtl .ant-list-pagination{text-align:left}.ant-list-rtl .ant-list-item-meta-avatar{margin-right:0;margin-left:16px}.ant-list-rtl .ant-list-item-action{margin-right:48px;margin-left:0}.ant-list.ant-list-rtl .ant-list-item-action>li:first-child{padding-right:0;padding-left:16px}.ant-list-rtl .ant-list-item-action-split{right:auto;left:0}.ant-list-rtl.ant-list-vertical .ant-list-item-extra{margin-right:40px;margin-left:0}.ant-list-rtl.ant-list-vertical .ant-list-item-action{margin-right:auto}.ant-list-rtl .ant-list-vertical .ant-list-item-action>li:first-child{padding-right:0;padding-left:16px}.ant-list-rtl .ant-list:not(.ant-list-vertical) .ant-list-item-no-flex .ant-list-item-action{float:left}@media screen and (max-width: 768px){.ant-list-rtl .ant-list-item-action,.ant-list-rtl .ant-list-vertical .ant-list-item-extra{margin-right:24px;margin-left:0}}@media screen and (max-width: 576px){.ant-list-rtl .ant-list-item-action{margin-right:22px;margin-left:0}.ant-list-rtl.ant-list-vertical .ant-list-item-extra{margin:auto auto 16px}}.ant-spin{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:absolute;display:none;color:#1890ff;text-align:center;vertical-align:middle;opacity:0;transition:transform .3s cubic-bezier(.78,.14,.15,.86)}.ant-spin-spinning{position:static;display:inline-block;opacity:1}.ant-spin-nested-loading{position:relative}.ant-spin-nested-loading>div>.ant-spin{position:absolute;top:0;left:0;z-index:4;display:block;width:100%;height:100%;max-height:400px}.ant-spin-nested-loading>div>.ant-spin .ant-spin-dot{position:absolute;top:50%;left:50%;margin:-10px}.ant-spin-nested-loading>div>.ant-spin .ant-spin-text{position:absolute;top:50%;width:100%;padding-top:5px;text-shadow:0 1px 2px #fff}.ant-spin-nested-loading>div>.ant-spin.ant-spin-show-text .ant-spin-dot{margin-top:-20px}.ant-spin-nested-loading>div>.ant-spin-sm .ant-spin-dot{margin:-7px}.ant-spin-nested-loading>div>.ant-spin-sm .ant-spin-text{padding-top:2px}.ant-spin-nested-loading>div>.ant-spin-sm.ant-spin-show-text .ant-spin-dot{margin-top:-17px}.ant-spin-nested-loading>div>.ant-spin-lg .ant-spin-dot{margin:-16px}.ant-spin-nested-loading>div>.ant-spin-lg .ant-spin-text{padding-top:11px}.ant-spin-nested-loading>div>.ant-spin-lg.ant-spin-show-text .ant-spin-dot{margin-top:-26px}.ant-spin-container{position:relative;transition:opacity .3s}.ant-spin-container:after{position:absolute;top:0;right:0;bottom:0;left:0;z-index:10;display:none \	;width:100%;height:100%;background:#fff;opacity:0;transition:all .3s;content:"";pointer-events:none}.ant-spin-blur{clear:both;overflow:hidden;opacity:.5;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;pointer-events:none}.ant-spin-blur:after{opacity:.4;pointer-events:auto}.ant-spin-tip{color:#00000073}.ant-spin-dot{position:relative;display:inline-block;font-size:20px;width:1em;height:1em}.ant-spin-dot-item{position:absolute;display:block;width:9px;height:9px;background-color:#1890ff;border-radius:100%;transform:scale(.75);transform-origin:50% 50%;opacity:.3;-webkit-animation:antSpinMove 1s infinite linear alternate;animation:antSpinMove 1s infinite linear alternate}.ant-spin-dot-item:nth-child(1){top:0;left:0}.ant-spin-dot-item:nth-child(2){top:0;right:0;-webkit-animation-delay:.4s;animation-delay:.4s}.ant-spin-dot-item:nth-child(3){right:0;bottom:0;-webkit-animation-delay:.8s;animation-delay:.8s}.ant-spin-dot-item:nth-child(4){bottom:0;left:0;-webkit-animation-delay:1.2s;animation-delay:1.2s}.ant-spin-dot-spin{transform:rotate(45deg);-webkit-animation:antRotate 1.2s infinite linear;animation:antRotate 1.2s infinite linear}.ant-spin-sm .ant-spin-dot{font-size:14px}.ant-spin-sm .ant-spin-dot i{width:6px;height:6px}.ant-spin-lg .ant-spin-dot{font-size:32px}.ant-spin-lg .ant-spin-dot i{width:14px;height:14px}.ant-spin.ant-spin-show-text .ant-spin-text{display:block}@media all and (-ms-high-contrast: none),(-ms-high-contrast: active){.ant-spin-blur{background:#fff;opacity:.5}}@-webkit-keyframes antSpinMove{to{opacity:1}}@keyframes antSpinMove{to{opacity:1}}@-webkit-keyframes antRotate{to{transform:rotate(405deg)}}@keyframes antRotate{to{transform:rotate(405deg)}}.ant-spin-rtl{direction:rtl}.ant-spin-rtl .ant-spin-dot-spin{transform:rotate(-45deg);-webkit-animation-name:antRotateRtl;animation-name:antRotateRtl}@-webkit-keyframes antRotateRtl{to{transform:rotate(-405deg)}}@keyframes antRotateRtl{to{transform:rotate(-405deg)}}.ant-pagination{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum"}.ant-pagination ul,.ant-pagination ol{margin:0;padding:0;list-style:none}.ant-pagination:after{display:block;clear:both;height:0;overflow:hidden;visibility:hidden;content:" "}.ant-pagination-total-text{display:inline-block;height:32px;margin-right:8px;line-height:30px;vertical-align:middle}.ant-pagination-item{display:inline-block;min-width:32px;height:32px;margin-right:8px;font-family:Arial;line-height:30px;text-align:center;vertical-align:middle;list-style:none;background-color:#fff;border:1px solid #d9d9d9;border-radius:2px;outline:0;cursor:pointer;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-pagination-item a{display:block;padding:0 6px;color:#000000d9;transition:none}.ant-pagination-item a:hover{text-decoration:none}.ant-pagination-item:focus,.ant-pagination-item:hover{border-color:#1890ff;transition:all .3s}.ant-pagination-item:focus a,.ant-pagination-item:hover a{color:#1890ff}.ant-pagination-item-active{font-weight:500;background:#fff;border-color:#1890ff}.ant-pagination-item-active a{color:#1890ff}.ant-pagination-item-active:focus,.ant-pagination-item-active:hover{border-color:#40a9ff}.ant-pagination-item-active:focus a,.ant-pagination-item-active:hover a{color:#40a9ff}.ant-pagination-jump-prev,.ant-pagination-jump-next{outline:0}.ant-pagination-jump-prev .ant-pagination-item-container,.ant-pagination-jump-next .ant-pagination-item-container{position:relative}.ant-pagination-jump-prev .ant-pagination-item-container .ant-pagination-item-link-icon,.ant-pagination-jump-next .ant-pagination-item-container .ant-pagination-item-link-icon{display:inline-block;font-size:12px;font-size:12px \	;transform:scale(1) rotate(0);color:#1890ff;letter-spacing:-1px;opacity:0;transition:all .2s}:root .ant-pagination-jump-prev .ant-pagination-item-container .ant-pagination-item-link-icon,:root .ant-pagination-jump-next .ant-pagination-item-container .ant-pagination-item-link-icon{font-size:12px}.ant-pagination-jump-prev .ant-pagination-item-container .ant-pagination-item-link-icon-svg,.ant-pagination-jump-next .ant-pagination-item-container .ant-pagination-item-link-icon-svg{top:0;right:0;bottom:0;left:0;margin:auto}.ant-pagination-jump-prev .ant-pagination-item-container .ant-pagination-item-ellipsis,.ant-pagination-jump-next .ant-pagination-item-container .ant-pagination-item-ellipsis{position:absolute;top:0;right:0;bottom:0;left:0;display:block;margin:auto;color:#00000040;letter-spacing:2px;text-align:center;text-indent:.13em;opacity:1;transition:all .2s}.ant-pagination-jump-prev:focus .ant-pagination-item-link-icon,.ant-pagination-jump-next:focus .ant-pagination-item-link-icon,.ant-pagination-jump-prev:hover .ant-pagination-item-link-icon,.ant-pagination-jump-next:hover .ant-pagination-item-link-icon{opacity:1}.ant-pagination-jump-prev:focus .ant-pagination-item-ellipsis,.ant-pagination-jump-next:focus .ant-pagination-item-ellipsis,.ant-pagination-jump-prev:hover .ant-pagination-item-ellipsis,.ant-pagination-jump-next:hover .ant-pagination-item-ellipsis{opacity:0}.ant-pagination-prev,.ant-pagination-jump-prev,.ant-pagination-jump-next{margin-right:8px}.ant-pagination-prev,.ant-pagination-next,.ant-pagination-jump-prev,.ant-pagination-jump-next{display:inline-block;min-width:32px;height:32px;color:#000000d9;font-family:Arial;line-height:32px;text-align:center;vertical-align:middle;list-style:none;border-radius:2px;cursor:pointer;transition:all .3s}.ant-pagination-prev,.ant-pagination-next{outline:0}.ant-pagination-prev a,.ant-pagination-next a{color:#000000d9;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-pagination-prev:hover a,.ant-pagination-next:hover a{border-color:#40a9ff}.ant-pagination-prev .ant-pagination-item-link,.ant-pagination-next .ant-pagination-item-link{display:block;height:100%;font-size:12px;text-align:center;background-color:#fff;border:1px solid #d9d9d9;border-radius:2px;outline:none;transition:all .3s}.ant-pagination-prev:focus .ant-pagination-item-link,.ant-pagination-next:focus .ant-pagination-item-link,.ant-pagination-prev:hover .ant-pagination-item-link,.ant-pagination-next:hover .ant-pagination-item-link{color:#1890ff;border-color:#1890ff}.ant-pagination-disabled,.ant-pagination-disabled:hover,.ant-pagination-disabled:focus{cursor:not-allowed}.ant-pagination-disabled a,.ant-pagination-disabled:hover a,.ant-pagination-disabled:focus a,.ant-pagination-disabled .ant-pagination-item-link,.ant-pagination-disabled:hover .ant-pagination-item-link,.ant-pagination-disabled:focus .ant-pagination-item-link{color:#00000040;border-color:#d9d9d9;cursor:not-allowed}.ant-pagination-slash{margin:0 10px 0 5px}.ant-pagination-options{display:inline-block;margin-left:16px;vertical-align:middle}.ant-pagination-options-size-changer.ant-select{display:inline-block;width:auto;margin-right:8px}.ant-pagination-options-quick-jumper{display:inline-block;height:32px;line-height:32px;vertical-align:top}.ant-pagination-options-quick-jumper input{position:relative;display:inline-block;width:100%;padding:4px 11px;color:#000000d9;font-size:14px;line-height:1.5715;background-color:#fff;background-image:none;border:1px solid #d9d9d9;border-radius:2px;transition:all .3s;width:50px;margin:0 8px}.ant-pagination-options-quick-jumper input::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-pagination-options-quick-jumper input:-ms-input-placeholder{color:#bfbfbf}.ant-pagination-options-quick-jumper input::-webkit-input-placeholder{color:#bfbfbf}.ant-pagination-options-quick-jumper input:-moz-placeholder-shown{text-overflow:ellipsis}.ant-pagination-options-quick-jumper input:-ms-input-placeholder{text-overflow:ellipsis}.ant-pagination-options-quick-jumper input:placeholder-shown{text-overflow:ellipsis}.ant-pagination-options-quick-jumper input:hover{border-color:#40a9ff;border-right-width:1px!important}.ant-pagination-options-quick-jumper input:focus{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-pagination-options-quick-jumper input-disabled{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-pagination-options-quick-jumper input-disabled:hover{border-color:#d9d9d9;border-right-width:1px!important}.ant-pagination-options-quick-jumper input[disabled]{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-pagination-options-quick-jumper input[disabled]:hover{border-color:#d9d9d9;border-right-width:1px!important}textarea.ant-pagination-options-quick-jumper input{max-width:100%;height:auto;min-height:32px;line-height:1.5715;vertical-align:bottom;transition:all .3s,height 0s}.ant-pagination-options-quick-jumper input-lg{padding:6.5px 11px;font-size:16px}.ant-pagination-options-quick-jumper input-sm{padding:0 7px}.ant-pagination-simple .ant-pagination-prev,.ant-pagination-simple .ant-pagination-next{height:24px;line-height:24px;vertical-align:top}.ant-pagination-simple .ant-pagination-prev .ant-pagination-item-link,.ant-pagination-simple .ant-pagination-next .ant-pagination-item-link{height:24px;border:0}.ant-pagination-simple .ant-pagination-prev .ant-pagination-item-link:after,.ant-pagination-simple .ant-pagination-next .ant-pagination-item-link:after{height:24px;line-height:24px}.ant-pagination-simple .ant-pagination-simple-pager{display:inline-block;height:24px;margin-right:8px}.ant-pagination-simple .ant-pagination-simple-pager input{box-sizing:border-box;height:100%;margin-right:8px;padding:0 6px;text-align:center;background-color:#fff;border:1px solid #d9d9d9;border-radius:2px;outline:none;transition:border-color .3s}.ant-pagination-simple .ant-pagination-simple-pager input:hover{border-color:#1890ff}.ant-pagination.mini .ant-pagination-total-text,.ant-pagination.mini .ant-pagination-simple-pager{height:24px;line-height:24px}.ant-pagination.mini .ant-pagination-item{min-width:24px;height:24px;margin:0;line-height:22px}.ant-pagination.mini .ant-pagination-item:not(.ant-pagination-item-active){background:transparent;border-color:transparent}.ant-pagination.mini .ant-pagination-prev,.ant-pagination.mini .ant-pagination-next{min-width:24px;height:24px;margin:0;line-height:24px}.ant-pagination.mini .ant-pagination-prev .ant-pagination-item-link,.ant-pagination.mini .ant-pagination-next .ant-pagination-item-link{background:transparent;border-color:transparent}.ant-pagination.mini .ant-pagination-prev .ant-pagination-item-link:after,.ant-pagination.mini .ant-pagination-next .ant-pagination-item-link:after{height:24px;line-height:24px}.ant-pagination.mini .ant-pagination-jump-prev,.ant-pagination.mini .ant-pagination-jump-next{height:24px;margin-right:0;line-height:24px}.ant-pagination.mini .ant-pagination-options{margin-left:2px}.ant-pagination.mini .ant-pagination-options-quick-jumper{height:24px;line-height:24px}.ant-pagination.mini .ant-pagination-options-quick-jumper input{padding:0 7px;width:44px}.ant-pagination.ant-pagination-disabled{cursor:not-allowed}.ant-pagination.ant-pagination-disabled .ant-pagination-item{background:#f5f5f5;border-color:#d9d9d9;cursor:not-allowed}.ant-pagination.ant-pagination-disabled .ant-pagination-item a{color:#00000040;background:transparent;border:none;cursor:not-allowed}.ant-pagination.ant-pagination-disabled .ant-pagination-item-active{background:#dbdbdb;border-color:transparent}.ant-pagination.ant-pagination-disabled .ant-pagination-item-active a{color:#fff}.ant-pagination.ant-pagination-disabled .ant-pagination-item-link,.ant-pagination.ant-pagination-disabled .ant-pagination-item-link:hover,.ant-pagination.ant-pagination-disabled .ant-pagination-item-link:focus{color:#00000073;background:#f5f5f5;border-color:#d9d9d9;cursor:not-allowed}.ant-pagination.ant-pagination-disabled .ant-pagination-jump-prev:focus .ant-pagination-item-link-icon,.ant-pagination.ant-pagination-disabled .ant-pagination-jump-next:focus .ant-pagination-item-link-icon,.ant-pagination.ant-pagination-disabled .ant-pagination-jump-prev:hover .ant-pagination-item-link-icon,.ant-pagination.ant-pagination-disabled .ant-pagination-jump-next:hover .ant-pagination-item-link-icon{opacity:0}.ant-pagination.ant-pagination-disabled .ant-pagination-jump-prev:focus .ant-pagination-item-ellipsis,.ant-pagination.ant-pagination-disabled .ant-pagination-jump-next:focus .ant-pagination-item-ellipsis,.ant-pagination.ant-pagination-disabled .ant-pagination-jump-prev:hover .ant-pagination-item-ellipsis,.ant-pagination.ant-pagination-disabled .ant-pagination-jump-next:hover .ant-pagination-item-ellipsis{opacity:1}@media only screen and (max-width: 992px){.ant-pagination-item-after-jump-prev,.ant-pagination-item-before-jump-next{display:none}}@media only screen and (max-width: 576px){.ant-pagination-options{display:none}}.ant-mentions{box-sizing:border-box;margin:0;font-variant:tabular-nums;list-style:none;font-feature-settings:"tnum";width:100%;color:#000000d9;font-size:14px;background-color:#fff;background-image:none;border:1px solid #d9d9d9;border-radius:2px;transition:all .3s;position:relative;display:inline-block;height:auto;padding:0;overflow:hidden;line-height:1.5715;white-space:pre-wrap;vertical-align:bottom}.ant-mentions::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-mentions:-ms-input-placeholder{color:#bfbfbf}.ant-mentions::-webkit-input-placeholder{color:#bfbfbf}.ant-mentions:-moz-placeholder-shown{text-overflow:ellipsis}.ant-mentions:-ms-input-placeholder{text-overflow:ellipsis}.ant-mentions:placeholder-shown{text-overflow:ellipsis}.ant-mentions:hover{border-color:#40a9ff;border-right-width:1px!important}.ant-mentions:focus{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-mentions-disabled{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-mentions-disabled:hover{border-color:#d9d9d9;border-right-width:1px!important}.ant-mentions[disabled]{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-mentions[disabled]:hover{border-color:#d9d9d9;border-right-width:1px!important}textarea.ant-mentions{max-width:100%;height:auto;min-height:32px;line-height:1.5715;vertical-align:bottom;transition:all .3s,height 0s}.ant-mentions-lg{padding:6.5px 11px;font-size:16px}.ant-mentions-sm{padding:0 7px}.ant-mentions-disabled>textarea{color:#00000040;background-color:#f5f5f5;cursor:not-allowed;opacity:1}.ant-mentions-disabled>textarea:hover{border-color:#d9d9d9;border-right-width:1px!important}.ant-mentions-focused{border-color:#40a9ff;border-right-width:1px!important;outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-mentions>textarea,.ant-mentions-measure{min-height:30px;margin:0;padding:4px 11px;overflow:inherit;overflow-x:hidden;overflow-y:auto;font-weight:inherit;font-size:inherit;font-family:inherit;font-style:inherit;font-variant:inherit;font-size-adjust:inherit;font-stretch:inherit;line-height:inherit;direction:inherit;letter-spacing:inherit;white-space:inherit;text-align:inherit;vertical-align:top;word-wrap:break-word;word-break:inherit;-moz-tab-size:inherit;-o-tab-size:inherit;tab-size:inherit}.ant-mentions>textarea{width:100%;border:none;outline:none;resize:none}.ant-mentions>textarea::-moz-placeholder{color:#bfbfbf;opacity:1}.ant-mentions>textarea:-ms-input-placeholder{color:#bfbfbf}.ant-mentions>textarea::-webkit-input-placeholder{color:#bfbfbf}.ant-mentions>textarea:-moz-placeholder-shown{text-overflow:ellipsis}.ant-mentions>textarea:-ms-input-placeholder{text-overflow:ellipsis}.ant-mentions>textarea:placeholder-shown{text-overflow:ellipsis}.ant-mentions>textarea:-moz-read-only{cursor:default}.ant-mentions>textarea:read-only{cursor:default}.ant-mentions-measure{position:absolute;top:0;right:0;bottom:0;left:0;z-index:-1;color:transparent;pointer-events:none}.ant-mentions-measure>span{display:inline-block;min-height:1em}.ant-mentions-dropdown{margin:0;padding:0;color:#000000d9;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:absolute;top:-9999px;left:-9999px;z-index:1050;box-sizing:border-box;font-size:14px;font-variant:initial;background-color:#fff;border-radius:2px;outline:none;box-shadow:0 2px 8px #00000026}.ant-mentions-dropdown-hidden{display:none}.ant-mentions-dropdown-menu{max-height:250px;margin-bottom:0;padding-left:0;overflow:auto;list-style:none;outline:none}.ant-mentions-dropdown-menu-item{position:relative;display:block;min-width:100px;padding:5px 12px;overflow:hidden;color:#000000d9;font-weight:400;line-height:22px;white-space:nowrap;text-overflow:ellipsis;cursor:pointer;transition:background .3s ease}.ant-mentions-dropdown-menu-item:hover{background-color:#f5f5f5}.ant-mentions-dropdown-menu-item:first-child{border-radius:2px 2px 0 0}.ant-mentions-dropdown-menu-item:last-child{border-radius:0 0 2px 2px}.ant-mentions-dropdown-menu-item-disabled{color:#00000040;cursor:not-allowed}.ant-mentions-dropdown-menu-item-disabled:hover{color:#00000040;background-color:#fff;cursor:not-allowed}.ant-mentions-dropdown-menu-item-selected{color:#000000d9;font-weight:600;background-color:#fafafa}.ant-mentions-dropdown-menu-item-active{background-color:#e6f7ff}.ant-message{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:fixed;top:16px;left:0;z-index:1010;width:100%;pointer-events:none}.ant-message-notice{padding:8px;text-align:center}.ant-message-notice:first-child{margin-top:-8px}.ant-message-notice-content{display:inline-block;padding:10px 16px;background:#fff;border-radius:2px;box-shadow:0 4px 12px #00000026;pointer-events:all}.ant-message-success .anticon{color:#52c41a}.ant-message-error .anticon{color:#ff4d4f}.ant-message-warning .anticon{color:#faad14}.ant-message-info .anticon,.ant-message-loading .anticon{color:#1890ff}.ant-message .anticon{position:relative;top:1px;margin-right:8px;font-size:16px}.ant-message-notice.move-up-leave.move-up-leave-active{overflow:hidden;-webkit-animation-name:MessageMoveOut;animation-name:MessageMoveOut;-webkit-animation-duration:.3s;animation-duration:.3s}@-webkit-keyframes MessageMoveOut{0%{max-height:150px;padding:8px;opacity:1}to{max-height:0;padding:0;opacity:0}}@keyframes MessageMoveOut{0%{max-height:150px;padding:8px;opacity:1}to{max-height:0;padding:0;opacity:0}}.ant-modal{box-sizing:border-box;padding:0 0 24px;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;top:100px;width:auto;margin:0 auto;pointer-events:none}.ant-modal-wrap{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1000;overflow:auto;outline:0;-webkit-overflow-scrolling:touch}.ant-modal-title{margin:0;color:#000000d9;font-weight:500;font-size:16px;line-height:22px;word-wrap:break-word}.ant-modal-content{position:relative;background-color:#fff;background-clip:padding-box;border:0;border-radius:2px;box-shadow:0 4px 12px #00000026;pointer-events:auto}.ant-modal-close{position:absolute;top:0;right:0;z-index:10;padding:0;color:#00000073;font-weight:700;line-height:1;text-decoration:none;background:transparent;border:0;outline:0;cursor:pointer;transition:color .3s}.ant-modal-close-x{display:block;width:56px;height:56px;font-size:16px;font-style:normal;line-height:56px;text-align:center;text-transform:none;text-rendering:auto}.ant-modal-close:focus,.ant-modal-close:hover{color:#000000bf;text-decoration:none}.ant-modal-header{padding:16px 24px;color:#000000d9;background:#fff;border-bottom:1px solid #f0f0f0;border-radius:2px 2px 0 0}.ant-modal-body{padding:24px;font-size:14px;line-height:1.5715;word-wrap:break-word}.ant-modal-footer{padding:10px 16px;text-align:right;background:transparent;border-top:1px solid #f0f0f0;border-radius:0 0 2px 2px}.ant-modal-footer button+button{margin-bottom:0;margin-left:8px}.ant-modal.zoom-enter,.ant-modal.zoom-appear{transform:none;opacity:0;-webkit-animation-duration:.3s;animation-duration:.3s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-modal-mask{position:fixed;top:0;right:0;bottom:0;left:0;z-index:1000;height:100%;background-color:#00000073;filter:alpha(opacity=50)}.ant-modal-mask-hidden{display:none}.ant-modal-open{overflow:hidden}.ant-modal-centered{text-align:center}.ant-modal-centered:before{display:inline-block;width:0;height:100%;vertical-align:middle;content:""}.ant-modal-centered .ant-modal{top:0;display:inline-block;text-align:left;vertical-align:middle}@media (max-width: 767px){.ant-modal{max-width:calc(100vw - 16px);margin:8px auto}.ant-modal-centered .ant-modal{flex:1}}.ant-modal-confirm .ant-modal-header{display:none}.ant-modal-confirm .ant-modal-body{padding:32px 32px 24px}.ant-modal-confirm-body-wrapper:before,.ant-modal-confirm-body-wrapper:after{display:table;content:""}.ant-modal-confirm-body-wrapper:after{clear:both}.ant-modal-confirm-body .ant-modal-confirm-title{display:block;overflow:hidden;color:#000000d9;font-weight:500;font-size:16px;line-height:1.4}.ant-modal-confirm-body .ant-modal-confirm-content{margin-top:8px;color:#000000d9;font-size:14px}.ant-modal-confirm-body>.anticon{float:left;margin-right:16px;font-size:22px}.ant-modal-confirm-body>.anticon+.ant-modal-confirm-title+.ant-modal-confirm-content{margin-left:38px}.ant-modal-confirm .ant-modal-confirm-btns{float:right;margin-top:24px}.ant-modal-confirm .ant-modal-confirm-btns button+button{margin-bottom:0;margin-left:8px}.ant-modal-confirm-error .ant-modal-confirm-body>.anticon{color:#ff4d4f}.ant-modal-confirm-warning .ant-modal-confirm-body>.anticon,.ant-modal-confirm-confirm .ant-modal-confirm-body>.anticon{color:#faad14}.ant-modal-confirm-info .ant-modal-confirm-body>.anticon{color:#1890ff}.ant-modal-confirm-success .ant-modal-confirm-body>.anticon{color:#52c41a}.ant-notification{box-sizing:border-box;margin:0 24px 0 0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:fixed;z-index:1010;width:384px;max-width:calc(100vw - 32px)}.ant-notification-topLeft,.ant-notification-bottomLeft{margin-right:0;margin-left:24px}.ant-notification-topLeft .ant-notification-fade-enter.ant-notification-fade-enter-active,.ant-notification-bottomLeft .ant-notification-fade-enter.ant-notification-fade-enter-active,.ant-notification-topLeft .ant-notification-fade-appear.ant-notification-fade-appear-active,.ant-notification-bottomLeft .ant-notification-fade-appear.ant-notification-fade-appear-active{-webkit-animation-name:NotificationLeftFadeIn;animation-name:NotificationLeftFadeIn}.ant-notification-close-icon{font-size:14px;cursor:pointer}.ant-notification-notice{position:relative;margin-bottom:16px;padding:16px 24px;overflow:hidden;line-height:1.5;background:#fff;border-radius:2px;box-shadow:0 4px 12px #00000026}.ant-notification-notice-message{display:inline-block;margin-bottom:8px;color:#000000d9;font-size:16px;line-height:24px}.ant-notification-notice-message-single-line-auto-margin{display:block;width:calc(264px - 100%);max-width:4px;background-color:transparent;pointer-events:none}.ant-notification-notice-message-single-line-auto-margin:before{display:block;content:""}.ant-notification-notice-description{font-size:14px}.ant-notification-notice-closable .ant-notification-notice-message{padding-right:24px}.ant-notification-notice-with-icon .ant-notification-notice-message{margin-bottom:4px;margin-left:48px;font-size:16px}.ant-notification-notice-with-icon .ant-notification-notice-description{margin-left:48px;font-size:14px}.ant-notification-notice-icon{position:absolute;margin-left:4px;font-size:24px;line-height:24px}.anticon.ant-notification-notice-icon-success{color:#52c41a}.anticon.ant-notification-notice-icon-info{color:#1890ff}.anticon.ant-notification-notice-icon-warning{color:#faad14}.anticon.ant-notification-notice-icon-error{color:#ff4d4f}.ant-notification-notice-close{position:absolute;top:16px;right:22px;color:#00000073;outline:none}.ant-notification-notice-close:hover{color:#000000ab}.ant-notification-notice-btn{float:right;margin-top:16px}.ant-notification .notification-fade-effect{-webkit-animation-duration:.24s;animation-duration:.24s;-webkit-animation-timing-function:cubic-bezier(.645,.045,.355,1);animation-timing-function:cubic-bezier(.645,.045,.355,1);-webkit-animation-fill-mode:both;animation-fill-mode:both}.ant-notification-fade-enter,.ant-notification-fade-appear{opacity:0;-webkit-animation-duration:.24s;animation-duration:.24s;-webkit-animation-timing-function:cubic-bezier(.645,.045,.355,1);animation-timing-function:cubic-bezier(.645,.045,.355,1);-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-notification-fade-leave{-webkit-animation-duration:.24s;animation-duration:.24s;-webkit-animation-timing-function:cubic-bezier(.645,.045,.355,1);animation-timing-function:cubic-bezier(.645,.045,.355,1);-webkit-animation-fill-mode:both;animation-fill-mode:both;-webkit-animation-duration:.2s;animation-duration:.2s;-webkit-animation-play-state:paused;animation-play-state:paused}.ant-notification-fade-enter.ant-notification-fade-enter-active,.ant-notification-fade-appear.ant-notification-fade-appear-active{-webkit-animation-name:NotificationFadeIn;animation-name:NotificationFadeIn;-webkit-animation-play-state:running;animation-play-state:running}.ant-notification-fade-leave.ant-notification-fade-leave-active{-webkit-animation-name:NotificationFadeOut;animation-name:NotificationFadeOut;-webkit-animation-play-state:running;animation-play-state:running}@-webkit-keyframes NotificationFadeIn{0%{left:384px;opacity:0}to{left:0;opacity:1}}@keyframes NotificationFadeIn{0%{left:384px;opacity:0}to{left:0;opacity:1}}@-webkit-keyframes NotificationLeftFadeIn{0%{right:384px;opacity:0}to{right:0;opacity:1}}@keyframes NotificationLeftFadeIn{0%{right:384px;opacity:0}to{right:0;opacity:1}}@-webkit-keyframes NotificationFadeOut{0%{max-height:150px;margin-bottom:16px;padding-top:16px 24px;padding-bottom:16px 24px;opacity:1}to{max-height:0;margin-bottom:0;padding-top:0;padding-bottom:0;opacity:0}}@keyframes NotificationFadeOut{0%{max-height:150px;margin-bottom:16px;padding-top:16px 24px;padding-bottom:16px 24px;opacity:1}to{max-height:0;margin-bottom:0;padding-top:0;padding-bottom:0;opacity:0}}.ant-page-header{box-sizing:border-box;margin:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;padding:16px 24px;background-color:#fff}.ant-page-header-ghost{background-color:inherit}.ant-page-header.has-breadcrumb{padding-top:12px}.ant-page-header.has-footer{padding-bottom:0}.ant-page-header-back{margin-right:16px;font-size:16px;line-height:1}.ant-page-header-back-button{color:#1890ff;text-decoration:none;outline:none;transition:color .3s;color:#000;cursor:pointer}.ant-page-header-back-button:focus,.ant-page-header-back-button:hover{color:#40a9ff}.ant-page-header-back-button:active{color:#096dd9}.ant-page-header .ant-divider-vertical{height:14px;margin:0 12px;vertical-align:middle}.ant-breadcrumb+.ant-page-header-heading{margin-top:8px}.ant-page-header-heading{display:flex;justify-content:space-between}.ant-page-header-heading-left{display:flex;align-items:center;margin:4px 0;overflow:hidden}.ant-page-header-heading-title{margin-right:12px;margin-bottom:0;color:#000000d9;font-weight:600;font-size:20px;line-height:32px;overflow:hidden;white-space:nowrap;text-overflow:ellipsis}.ant-page-header-heading .ant-avatar{margin-right:12px}.ant-page-header-heading-sub-title{margin-right:12px;color:#00000073;font-size:14px;line-height:1.5715;overflow:hidden;white-space:nowrap;text-overflow:ellipsis}.ant-page-header-heading-extra{margin:4px 0;white-space:nowrap}.ant-page-header-heading-extra>*{margin-left:12px;white-space:unset}.ant-page-header-heading-extra>*:first-child{margin-left:0}.ant-page-header-content{padding-top:12px}.ant-page-header-footer{margin-top:16px}.ant-page-header-footer .ant-tabs>.ant-tabs-nav{margin:0}.ant-page-header-footer .ant-tabs>.ant-tabs-nav:before{border:none}.ant-page-header-footer .ant-tabs .ant-tabs-tab{padding-top:8px;padding-bottom:8px;font-size:16px}.ant-page-header-compact .ant-page-header-heading{flex-wrap:wrap}.ant-page-header-rtl{direction:rtl}.ant-page-header-rtl .ant-page-header-back{float:right;margin-right:0;margin-left:16px}.ant-page-header-rtl .ant-page-header-heading-title,.ant-page-header-rtl .ant-page-header-heading .ant-avatar{margin-right:0;margin-left:12px}.ant-page-header-rtl .ant-page-header-heading-sub-title{float:right;margin-right:0;margin-left:12px}.ant-page-header-rtl .ant-page-header-heading-tags{float:right}.ant-page-header-rtl .ant-page-header-heading-extra{float:left}.ant-page-header-rtl .ant-page-header-heading-extra>*{margin-right:12px;margin-left:0}.ant-page-header-rtl .ant-page-header-heading-extra>*:first-child{margin-right:0}.ant-page-header-rtl .ant-page-header-footer .ant-tabs-bar .ant-tabs-nav{float:right}.ant-popover{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:absolute;top:0;left:0;z-index:1030;font-weight:400;white-space:normal;text-align:left;cursor:auto;-webkit-user-select:text;-moz-user-select:text;-ms-user-select:text;user-select:text}.ant-popover:after{position:absolute;background:rgba(255,255,255,.01);content:""}.ant-popover-hidden{display:none}.ant-popover-placement-top,.ant-popover-placement-topLeft,.ant-popover-placement-topRight{padding-bottom:10px}.ant-popover-placement-right,.ant-popover-placement-rightTop,.ant-popover-placement-rightBottom{padding-left:10px}.ant-popover-placement-bottom,.ant-popover-placement-bottomLeft,.ant-popover-placement-bottomRight{padding-top:10px}.ant-popover-placement-left,.ant-popover-placement-leftTop,.ant-popover-placement-leftBottom{padding-right:10px}.ant-popover-inner{background-color:#fff;background-clip:padding-box;border-radius:2px;box-shadow:0 2px 8px #00000026;box-shadow:0 0 8px #00000026 \	}@media screen and (-ms-high-contrast: active),(-ms-high-contrast: none){.ant-popover-inner{box-shadow:0 2px 8px #00000026}}.ant-popover-title{min-width:177px;min-height:32px;margin:0;padding:5px 16px 4px;color:#000000d9;font-weight:500;border-bottom:1px solid #f0f0f0}.ant-popover-inner-content{padding:12px 16px;color:#000000d9}.ant-popover-message{position:relative;padding:4px 0 12px;color:#000000d9;font-size:14px}.ant-popover-message>.anticon{position:absolute;top:8px;color:#faad14;font-size:14px}.ant-popover-message-title{padding-left:22px}.ant-popover-buttons{margin-bottom:4px;text-align:right}.ant-popover-buttons button{margin-left:8px}.ant-popover-arrow{position:absolute;display:block;width:8.48528137px;height:8.48528137px;background:transparent;border-style:solid;border-width:4.24264069px;transform:rotate(45deg)}.ant-popover-placement-top>.ant-popover-content>.ant-popover-arrow,.ant-popover-placement-topLeft>.ant-popover-content>.ant-popover-arrow,.ant-popover-placement-topRight>.ant-popover-content>.ant-popover-arrow{bottom:6.2px;border-top-color:transparent;border-right-color:#fff;border-bottom-color:#fff;border-left-color:transparent;box-shadow:3px 3px 7px #00000012}.ant-popover-placement-top>.ant-popover-content>.ant-popover-arrow{left:50%;transform:translate(-50%) rotate(45deg)}.ant-popover-placement-topLeft>.ant-popover-content>.ant-popover-arrow{left:16px}.ant-popover-placement-topRight>.ant-popover-content>.ant-popover-arrow{right:16px}.ant-popover-placement-right>.ant-popover-content>.ant-popover-arrow,.ant-popover-placement-rightTop>.ant-popover-content>.ant-popover-arrow,.ant-popover-placement-rightBottom>.ant-popover-content>.ant-popover-arrow{left:6px;border-top-color:transparent;border-right-color:transparent;border-bottom-color:#fff;border-left-color:#fff;box-shadow:-3px 3px 7px #00000012}.ant-popover-placement-right>.ant-popover-content>.ant-popover-arrow{top:50%;transform:translateY(-50%) rotate(45deg)}.ant-popover-placement-rightTop>.ant-popover-content>.ant-popover-arrow{top:12px}.ant-popover-placement-rightBottom>.ant-popover-content>.ant-popover-arrow{bottom:12px}.ant-popover-placement-bottom>.ant-popover-content>.ant-popover-arrow,.ant-popover-placement-bottomLeft>.ant-popover-content>.ant-popover-arrow,.ant-popover-placement-bottomRight>.ant-popover-content>.ant-popover-arrow{top:6px;border-top-color:#fff;border-right-color:transparent;border-bottom-color:transparent;border-left-color:#fff;box-shadow:-2px -2px 5px #0000000f}.ant-popover-placement-bottom>.ant-popover-content>.ant-popover-arrow{left:50%;transform:translate(-50%) rotate(45deg)}.ant-popover-placement-bottomLeft>.ant-popover-content>.ant-popover-arrow{left:16px}.ant-popover-placement-bottomRight>.ant-popover-content>.ant-popover-arrow{right:16px}.ant-popover-placement-left>.ant-popover-content>.ant-popover-arrow,.ant-popover-placement-leftTop>.ant-popover-content>.ant-popover-arrow,.ant-popover-placement-leftBottom>.ant-popover-content>.ant-popover-arrow{right:6px;border-top-color:#fff;border-right-color:#fff;border-bottom-color:transparent;border-left-color:transparent;box-shadow:3px -3px 7px #00000012}.ant-popover-placement-left>.ant-popover-content>.ant-popover-arrow{top:50%;transform:translateY(-50%) rotate(45deg)}.ant-popover-placement-leftTop>.ant-popover-content>.ant-popover-arrow{top:12px}.ant-popover-placement-leftBottom>.ant-popover-content>.ant-popover-arrow{bottom:12px}.ant-progress{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";display:inline-block}.ant-progress-line{position:relative;width:100%;font-size:14px}.ant-progress-small.ant-progress-line,.ant-progress-small.ant-progress-line .ant-progress-text .anticon{font-size:12px}.ant-progress-outer{display:inline-block;width:100%;margin-right:0;padding-right:0}.ant-progress-show-info .ant-progress-outer{margin-right:calc(-2em - 8px);padding-right:calc(2em + 8px)}.ant-progress-inner{position:relative;display:inline-block;width:100%;overflow:hidden;vertical-align:middle;background-color:#f5f5f5;border-radius:100px}.ant-progress-circle-trail{stroke:#f5f5f5}.ant-progress-circle-path{-webkit-animation:ant-progress-appear .3s;animation:ant-progress-appear .3s}.ant-progress-inner:not(.ant-progress-circle-gradient) .ant-progress-circle-path{stroke:#1890ff}.ant-progress-success-bg,.ant-progress-bg{position:relative;background-color:#1890ff;border-radius:100px;transition:all .4s cubic-bezier(.08,.82,.17,1) 0s}.ant-progress-success-bg{position:absolute;top:0;left:0;background-color:#52c41a}.ant-progress-text{display:inline-block;width:2em;margin-left:8px;color:#00000073;font-size:1em;line-height:1;white-space:nowrap;text-align:left;vertical-align:middle;word-break:normal}.ant-progress-text .anticon{font-size:14px}.ant-progress-status-active .ant-progress-bg:before{position:absolute;top:0;right:0;bottom:0;left:0;background:#fff;border-radius:10px;opacity:0;-webkit-animation:ant-progress-active 2.4s cubic-bezier(.23,1,.32,1) infinite;animation:ant-progress-active 2.4s cubic-bezier(.23,1,.32,1) infinite;content:""}.ant-progress-status-exception .ant-progress-bg{background-color:#ff4d4f}.ant-progress-status-exception .ant-progress-text{color:#ff4d4f}.ant-progress-status-exception .ant-progress-inner:not(.ant-progress-circle-gradient) .ant-progress-circle-path{stroke:#ff4d4f}.ant-progress-status-success .ant-progress-bg{background-color:#52c41a}.ant-progress-status-success .ant-progress-text{color:#52c41a}.ant-progress-status-success .ant-progress-inner:not(.ant-progress-circle-gradient) .ant-progress-circle-path{stroke:#52c41a}.ant-progress-circle .ant-progress-inner{position:relative;line-height:1;background-color:transparent}.ant-progress-circle .ant-progress-text{position:absolute;top:50%;left:50%;width:100%;margin:0;padding:0;color:#000000d9;line-height:1;white-space:normal;text-align:center;transform:translate(-50%,-50%)}.ant-progress-circle .ant-progress-text .anticon{font-size:1.16666667em}.ant-progress-circle.ant-progress-status-exception .ant-progress-text{color:#ff4d4f}.ant-progress-circle.ant-progress-status-success .ant-progress-text{color:#52c41a}@-webkit-keyframes ant-progress-active{0%{width:0;opacity:.1}20%{width:0;opacity:.5}to{width:100%;opacity:0}}@keyframes ant-progress-active{0%{width:0;opacity:.1}20%{width:0;opacity:.5}to{width:100%;opacity:0}}.ant-rate{box-sizing:border-box;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;font-feature-settings:"tnum";display:inline-block;margin:0;padding:0;color:#fadb14;font-size:20px;line-height:unset;list-style:none;outline:none}.ant-rate-disabled .ant-rate-star{cursor:default}.ant-rate-disabled .ant-rate-star:hover{transform:scale(1)}.ant-rate-star{position:relative;display:inline-block;color:inherit;cursor:pointer}.ant-rate-star:not(:last-child){margin-right:8px}.ant-rate-star>div{transition:all .3s}.ant-rate-star>div:hover,.ant-rate-star>div:focus-visible{transform:scale(1.1)}.ant-rate-star>div:focus:not(:focus-visible){outline:0}.ant-rate-star-first,.ant-rate-star-second{color:#f0f0f0;transition:all .3s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-rate-star-first .anticon,.ant-rate-star-second .anticon{vertical-align:middle}.ant-rate-star-first{position:absolute;top:0;left:0;width:50%;height:100%;overflow:hidden;opacity:0}.ant-rate-star-half .ant-rate-star-first,.ant-rate-star-half .ant-rate-star-second{opacity:1}.ant-rate-star-half .ant-rate-star-first,.ant-rate-star-full .ant-rate-star-second{color:inherit}.ant-rate-text{display:inline-block;margin:0 8px;font-size:14px}.ant-rate-rtl{direction:rtl}.ant-rate-rtl .ant-rate-star:not(:last-child){margin-right:0;margin-left:8px}.ant-rate-rtl .ant-rate-star-first{right:0;left:auto}.ant-result{padding:48px 32px}.ant-result-success .ant-result-icon>.anticon{color:#52c41a}.ant-result-error .ant-result-icon>.anticon{color:#ff4d4f}.ant-result-info .ant-result-icon>.anticon{color:#1890ff}.ant-result-warning .ant-result-icon>.anticon{color:#faad14}.ant-result-image{width:250px;height:295px;margin:auto}.ant-result-icon{margin-bottom:24px;text-align:center}.ant-result-icon>.anticon{font-size:72px}.ant-result-title{color:#000000d9;font-size:24px;line-height:1.8;text-align:center}.ant-result-subtitle{color:#00000073;font-size:14px;line-height:1.6;text-align:center}.ant-result-extra{margin:24px 0 0;text-align:center}.ant-result-extra>*{margin-right:8px}.ant-result-extra>*:last-child{margin-right:0}.ant-result-content{margin-top:24px;padding:24px 40px;background-color:#fafafa}.ant-result-rtl{direction:rtl}.ant-result-rtl .ant-result-extra>*{margin-right:0;margin-left:8px}.ant-result-rtl .ant-result-extra>*:last-child{margin-left:0}.ant-skeleton{display:table;width:100%}.ant-skeleton-header{display:table-cell;padding-right:16px;vertical-align:top}.ant-skeleton-header .ant-skeleton-avatar{display:inline-block;vertical-align:top;background:rgba(190,190,190,.2);width:32px;height:32px;line-height:32px}.ant-skeleton-header .ant-skeleton-avatar.ant-skeleton-avatar-circle{border-radius:50%}.ant-skeleton-header .ant-skeleton-avatar-lg{width:40px;height:40px;line-height:40px}.ant-skeleton-header .ant-skeleton-avatar-lg.ant-skeleton-avatar-circle{border-radius:50%}.ant-skeleton-header .ant-skeleton-avatar-sm{width:24px;height:24px;line-height:24px}.ant-skeleton-header .ant-skeleton-avatar-sm.ant-skeleton-avatar-circle{border-radius:50%}.ant-skeleton-content{display:table-cell;width:100%;vertical-align:top}.ant-skeleton-content .ant-skeleton-title{width:100%;height:16px;margin-top:16px;background:rgba(190,190,190,.2);border-radius:4px}.ant-skeleton-content .ant-skeleton-title+.ant-skeleton-paragraph{margin-top:24px}.ant-skeleton-content .ant-skeleton-paragraph{padding:0}.ant-skeleton-content .ant-skeleton-paragraph>li{width:100%;height:16px;list-style:none;background:rgba(190,190,190,.2);border-radius:4px}.ant-skeleton-content .ant-skeleton-paragraph>li:last-child:not(:first-child):not(:nth-child(2)){width:61%}.ant-skeleton-content .ant-skeleton-paragraph>li+li{margin-top:16px}.ant-skeleton-with-avatar .ant-skeleton-content .ant-skeleton-title{margin-top:12px}.ant-skeleton-with-avatar .ant-skeleton-content .ant-skeleton-title+.ant-skeleton-paragraph{margin-top:28px}.ant-skeleton-round .ant-skeleton-content .ant-skeleton-title,.ant-skeleton-round .ant-skeleton-content .ant-skeleton-paragraph>li{border-radius:100px}.ant-skeleton.ant-skeleton-active .ant-skeleton-content .ant-skeleton-title,.ant-skeleton.ant-skeleton-active .ant-skeleton-content .ant-skeleton-paragraph>li{background:linear-gradient(90deg,rgba(190,190,190,.2) 25%,rgba(129,129,129,.24) 37%,rgba(190,190,190,.2) 63%);background-size:400% 100%;-webkit-animation:ant-skeleton-loading 1.4s ease infinite;animation:ant-skeleton-loading 1.4s ease infinite}.ant-skeleton.ant-skeleton-active .ant-skeleton-avatar,.ant-skeleton.ant-skeleton-active .ant-skeleton-button,.ant-skeleton.ant-skeleton-active .ant-skeleton-input,.ant-skeleton.ant-skeleton-active .ant-skeleton-image{background:linear-gradient(90deg,rgba(190,190,190,.2) 25%,rgba(129,129,129,.24) 37%,rgba(190,190,190,.2) 63%);background-size:400% 100%;-webkit-animation:ant-skeleton-loading 1.4s ease infinite;animation:ant-skeleton-loading 1.4s ease infinite}.ant-skeleton-element{display:inline-block;width:auto}.ant-skeleton-element .ant-skeleton-button{display:inline-block;vertical-align:top;background:rgba(190,190,190,.2);border-radius:2px;width:64px;height:32px;line-height:32px}.ant-skeleton-element .ant-skeleton-button.ant-skeleton-button-circle{width:32px;border-radius:50%}.ant-skeleton-element .ant-skeleton-button.ant-skeleton-button-round{border-radius:32px}.ant-skeleton-element .ant-skeleton-button-lg{width:80px;height:40px;line-height:40px}.ant-skeleton-element .ant-skeleton-button-lg.ant-skeleton-button-circle{width:40px;border-radius:50%}.ant-skeleton-element .ant-skeleton-button-lg.ant-skeleton-button-round{border-radius:40px}.ant-skeleton-element .ant-skeleton-button-sm{width:48px;height:24px;line-height:24px}.ant-skeleton-element .ant-skeleton-button-sm.ant-skeleton-button-circle{width:24px;border-radius:50%}.ant-skeleton-element .ant-skeleton-button-sm.ant-skeleton-button-round{border-radius:24px}.ant-skeleton-element .ant-skeleton-avatar{display:inline-block;vertical-align:top;background:rgba(190,190,190,.2);width:32px;height:32px;line-height:32px}.ant-skeleton-element .ant-skeleton-avatar.ant-skeleton-avatar-circle{border-radius:50%}.ant-skeleton-element .ant-skeleton-avatar-lg{width:40px;height:40px;line-height:40px}.ant-skeleton-element .ant-skeleton-avatar-lg.ant-skeleton-avatar-circle{border-radius:50%}.ant-skeleton-element .ant-skeleton-avatar-sm{width:24px;height:24px;line-height:24px}.ant-skeleton-element .ant-skeleton-avatar-sm.ant-skeleton-avatar-circle{border-radius:50%}.ant-skeleton-element .ant-skeleton-input{display:inline-block;vertical-align:top;background:rgba(190,190,190,.2);width:100%;height:32px;line-height:32px}.ant-skeleton-element .ant-skeleton-input-lg{width:100%;height:40px;line-height:40px}.ant-skeleton-element .ant-skeleton-input-sm{width:100%;height:24px;line-height:24px}.ant-skeleton-element .ant-skeleton-image{display:flex;align-items:center;justify-content:center;vertical-align:top;background:rgba(190,190,190,.2);width:96px;height:96px;line-height:96px}.ant-skeleton-element .ant-skeleton-image.ant-skeleton-image-circle{border-radius:50%}.ant-skeleton-element .ant-skeleton-image-path{fill:#bfbfbf}.ant-skeleton-element .ant-skeleton-image-svg{width:48px;height:48px;line-height:48px;max-width:192px;max-height:192px}.ant-skeleton-element .ant-skeleton-image-svg.ant-skeleton-image-circle{border-radius:50%}@-webkit-keyframes ant-skeleton-loading{0%{background-position:100% 50%}to{background-position:0 50%}}@keyframes ant-skeleton-loading{0%{background-position:100% 50%}to{background-position:0 50%}}.ant-skeleton-rtl{direction:rtl}.ant-skeleton-rtl .ant-skeleton-header{padding-right:0;padding-left:16px}.ant-skeleton-rtl.ant-skeleton.ant-skeleton-active .ant-skeleton-content .ant-skeleton-title,.ant-skeleton-rtl.ant-skeleton.ant-skeleton-active .ant-skeleton-content .ant-skeleton-paragraph>li{-webkit-animation-name:ant-skeleton-loading-rtl;animation-name:ant-skeleton-loading-rtl}.ant-skeleton-rtl.ant-skeleton.ant-skeleton-active .ant-skeleton-avatar{-webkit-animation-name:ant-skeleton-loading-rtl;animation-name:ant-skeleton-loading-rtl}@-webkit-keyframes ant-skeleton-loading-rtl{0%{background-position:0% 50%}to{background-position:100% 50%}}@keyframes ant-skeleton-loading-rtl{0%{background-position:0% 50%}to{background-position:100% 50%}}.ant-slider{box-sizing:border-box;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;height:12px;margin:14px 6px 10px;padding:4px 0;cursor:pointer;touch-action:none}.ant-slider-vertical{width:12px;height:100%;margin:6px 10px;padding:0 4px}.ant-slider-vertical .ant-slider-rail{width:4px;height:100%}.ant-slider-vertical .ant-slider-track{width:4px}.ant-slider-vertical .ant-slider-handle{margin-top:-6px;margin-left:-5px}.ant-slider-vertical .ant-slider-mark{top:0;left:12px;width:18px;height:100%}.ant-slider-vertical .ant-slider-mark-text{left:4px;white-space:nowrap}.ant-slider-vertical .ant-slider-step{width:4px;height:100%}.ant-slider-vertical .ant-slider-dot{top:auto;left:2px;margin-bottom:-4px}.ant-slider-tooltip .ant-tooltip-inner{min-width:unset}.ant-slider-with-marks{margin-bottom:28px}.ant-slider-rail{position:absolute;width:100%;height:4px;background-color:#f5f5f5;border-radius:2px;transition:background-color .3s}.ant-slider-track{position:absolute;height:4px;background-color:#91d5ff;border-radius:2px;transition:background-color .3s}.ant-slider-handle{position:absolute;width:14px;height:14px;margin-top:-5px;background-color:#fff;border:solid 2px #91d5ff;border-radius:50%;box-shadow:0;cursor:pointer;transition:border-color .3s,box-shadow .6s,transform .3s cubic-bezier(.18,.89,.32,1.28)}.ant-slider-handle:focus{border-color:#46a6ff;outline:none;box-shadow:0 0 0 5px #1890ff33}.ant-slider-handle.ant-tooltip-open{border-color:#1890ff}.ant-slider:hover .ant-slider-rail{background-color:#e1e1e1}.ant-slider:hover .ant-slider-track{background-color:#69c0ff}.ant-slider:hover .ant-slider-handle:not(.ant-tooltip-open){border-color:#69c0ff}.ant-slider-mark{position:absolute;top:14px;left:0;width:100%;font-size:14px}.ant-slider-mark-text{position:absolute;display:inline-block;color:#00000073;text-align:center;word-break:keep-all;cursor:pointer}.ant-slider-mark-text-active{color:#000000d9}.ant-slider-step{position:absolute;width:100%;height:4px;background:transparent}.ant-slider-dot{position:absolute;top:-2px;width:8px;height:8px;margin-left:-4px;background-color:#fff;border:2px solid #f0f0f0;border-radius:50%;cursor:pointer}.ant-slider-dot:first-child{margin-left:-4px}.ant-slider-dot:last-child{margin-left:-4px}.ant-slider-dot-active{border-color:#8cc8ff}.ant-slider-disabled{cursor:not-allowed}.ant-slider-disabled .ant-slider-track{background-color:#00000040!important}.ant-slider-disabled .ant-slider-handle,.ant-slider-disabled .ant-slider-dot{background-color:#fff;border-color:#00000040!important;box-shadow:none;cursor:not-allowed}.ant-slider-disabled .ant-slider-mark-text,.ant-slider-disabled .ant-slider-dot{cursor:not-allowed!important}.ant-space{display:inline-flex}.ant-space-vertical{flex-direction:column}.ant-space-align-center{align-items:center}.ant-space-align-start{align-items:flex-start}.ant-space-align-end{align-items:flex-end}.ant-space-align-baseline{align-items:baseline}.ant-space-item:empty{display:none}.ant-space-rtl{direction:rtl}.ant-statistic{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum"}.ant-statistic-title{margin-bottom:4px;color:#00000073;font-size:14px}.ant-statistic-content{color:#000000d9;font-size:24px;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji"}.ant-statistic-content-value{display:inline-block;direction:ltr}.ant-statistic-content-prefix,.ant-statistic-content-suffix{display:inline-block}.ant-statistic-content-prefix{margin-right:4px}.ant-statistic-content-suffix{margin-left:4px}.ant-statistic-rtl{direction:rtl}.ant-statistic-rtl .ant-statistic-content-prefix{margin-right:0;margin-left:4px}.ant-statistic-rtl .ant-statistic-content-suffix{margin-right:4px;margin-left:0}.ant-steps{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";display:flex;width:100%;font-size:0}.ant-steps-item{position:relative;display:inline-block;flex:1;overflow:hidden;vertical-align:top}.ant-steps-item-container{outline:none}.ant-steps-item:last-child{flex:none}.ant-steps-item:last-child>.ant-steps-item-container>.ant-steps-item-tail,.ant-steps-item:last-child>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title:after{display:none}.ant-steps-item-icon,.ant-steps-item-content{display:inline-block;vertical-align:top}.ant-steps-item-icon{width:32px;height:32px;margin-right:8px;font-size:16px;font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Roboto,Helvetica Neue,Arial,Noto Sans,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol,"Noto Color Emoji";line-height:32px;text-align:center;border:1px solid rgba(0,0,0,.25);border-radius:32px;transition:background-color .3s,border-color .3s}.ant-steps-item-icon>.ant-steps-icon{position:relative;top:-1px;color:#1890ff;line-height:1}.ant-steps-item-tail{position:absolute;top:12px;left:0;width:100%;padding:0 10px}.ant-steps-item-tail:after{display:inline-block;width:100%;height:1px;background:#f0f0f0;border-radius:1px;transition:background .3s;content:""}.ant-steps-item-title{position:relative;display:inline-block;padding-right:16px;color:#000000d9;font-size:16px;line-height:32px}.ant-steps-item-title:after{position:absolute;top:16px;left:100%;display:block;width:9999px;height:1px;background:#f0f0f0;content:""}.ant-steps-item-subtitle{display:inline;margin-left:8px;color:#00000073;font-weight:400;font-size:14px}.ant-steps-item-description{color:#00000073;font-size:14px}.ant-steps-item-wait .ant-steps-item-icon{background-color:#fff;border-color:#00000040}.ant-steps-item-wait .ant-steps-item-icon>.ant-steps-icon{color:#00000040}.ant-steps-item-wait .ant-steps-item-icon>.ant-steps-icon .ant-steps-icon-dot{background:rgba(0,0,0,.25)}.ant-steps-item-wait>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title{color:#00000073}.ant-steps-item-wait>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title:after{background-color:#f0f0f0}.ant-steps-item-wait>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-description{color:#00000073}.ant-steps-item-wait>.ant-steps-item-container>.ant-steps-item-tail:after{background-color:#f0f0f0}.ant-steps-item-process .ant-steps-item-icon{background-color:#fff;border-color:#1890ff}.ant-steps-item-process .ant-steps-item-icon>.ant-steps-icon{color:#1890ff}.ant-steps-item-process .ant-steps-item-icon>.ant-steps-icon .ant-steps-icon-dot{background:#1890ff}.ant-steps-item-process>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title{color:#000000d9}.ant-steps-item-process>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title:after{background-color:#f0f0f0}.ant-steps-item-process>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-description{color:#000000d9}.ant-steps-item-process>.ant-steps-item-container>.ant-steps-item-tail:after{background-color:#f0f0f0}.ant-steps-item-process .ant-steps-item-icon{background:#1890ff}.ant-steps-item-process .ant-steps-item-icon>.ant-steps-icon{color:#fff}.ant-steps-item-process .ant-steps-item-title{font-weight:500}.ant-steps-item-finish .ant-steps-item-icon{background-color:#fff;border-color:#1890ff}.ant-steps-item-finish .ant-steps-item-icon>.ant-steps-icon{color:#1890ff}.ant-steps-item-finish .ant-steps-item-icon>.ant-steps-icon .ant-steps-icon-dot{background:#1890ff}.ant-steps-item-finish>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title{color:#000000d9}.ant-steps-item-finish>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title:after{background-color:#1890ff}.ant-steps-item-finish>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-description{color:#00000073}.ant-steps-item-finish>.ant-steps-item-container>.ant-steps-item-tail:after{background-color:#1890ff}.ant-steps-item-error .ant-steps-item-icon{background-color:#fff;border-color:#ff4d4f}.ant-steps-item-error .ant-steps-item-icon>.ant-steps-icon{color:#ff4d4f}.ant-steps-item-error .ant-steps-item-icon>.ant-steps-icon .ant-steps-icon-dot{background:#ff4d4f}.ant-steps-item-error>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title{color:#ff4d4f}.ant-steps-item-error>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title:after{background-color:#f0f0f0}.ant-steps-item-error>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-description{color:#ff4d4f}.ant-steps-item-error>.ant-steps-item-container>.ant-steps-item-tail:after{background-color:#f0f0f0}.ant-steps-item.ant-steps-next-error .ant-steps-item-title:after{background:#ff4d4f}.ant-steps .ant-steps-item:not(.ant-steps-item-active)>.ant-steps-item-container[role=button]{cursor:pointer}.ant-steps .ant-steps-item:not(.ant-steps-item-active)>.ant-steps-item-container[role=button] .ant-steps-item-title,.ant-steps .ant-steps-item:not(.ant-steps-item-active)>.ant-steps-item-container[role=button] .ant-steps-item-description,.ant-steps .ant-steps-item:not(.ant-steps-item-active)>.ant-steps-item-container[role=button] .ant-steps-item-icon .ant-steps-icon{transition:color .3s}.ant-steps .ant-steps-item:not(.ant-steps-item-active)>.ant-steps-item-container[role=button]:hover .ant-steps-item-title,.ant-steps .ant-steps-item:not(.ant-steps-item-active)>.ant-steps-item-container[role=button]:hover .ant-steps-item-subtitle,.ant-steps .ant-steps-item:not(.ant-steps-item-active)>.ant-steps-item-container[role=button]:hover .ant-steps-item-description{color:#1890ff}.ant-steps .ant-steps-item:not(.ant-steps-item-active):not(.ant-steps-item-process)>.ant-steps-item-container[role=button]:hover .ant-steps-item-icon{border-color:#1890ff}.ant-steps .ant-steps-item:not(.ant-steps-item-active):not(.ant-steps-item-process)>.ant-steps-item-container[role=button]:hover .ant-steps-item-icon .ant-steps-icon{color:#1890ff}.ant-steps-horizontal:not(.ant-steps-label-vertical) .ant-steps-item{margin-right:16px;white-space:nowrap}.ant-steps-horizontal:not(.ant-steps-label-vertical) .ant-steps-item:last-child{margin-right:0}.ant-steps-horizontal:not(.ant-steps-label-vertical) .ant-steps-item:last-child .ant-steps-item-title{padding-right:0}.ant-steps-horizontal:not(.ant-steps-label-vertical) .ant-steps-item-tail{display:none}.ant-steps-horizontal:not(.ant-steps-label-vertical) .ant-steps-item-description{max-width:140px;white-space:normal}.ant-steps-item-custom .ant-steps-item-icon{height:auto;background:none;border:0}.ant-steps-item-custom .ant-steps-item-icon>.ant-steps-icon{top:0;left:.5px;width:32px;height:32px;font-size:24px;line-height:32px}.ant-steps-item-custom.ant-steps-item-process .ant-steps-item-icon>.ant-steps-icon{color:#1890ff}.ant-steps:not(.ant-steps-vertical) .ant-steps-item-custom .ant-steps-item-icon{width:auto}.ant-steps-small.ant-steps-horizontal:not(.ant-steps-label-vertical) .ant-steps-item{margin-right:12px}.ant-steps-small.ant-steps-horizontal:not(.ant-steps-label-vertical) .ant-steps-item:last-child{margin-right:0}.ant-steps-small .ant-steps-item-icon{width:24px;height:24px;font-size:12px;line-height:24px;text-align:center;border-radius:24px}.ant-steps-small .ant-steps-item-title{padding-right:12px;font-size:14px;line-height:24px}.ant-steps-small .ant-steps-item-title:after{top:12px}.ant-steps-small .ant-steps-item-description{color:#00000073;font-size:14px}.ant-steps-small .ant-steps-item-tail{top:8px}.ant-steps-small .ant-steps-item-custom .ant-steps-item-icon{width:inherit;height:inherit;line-height:inherit;background:none;border:0;border-radius:0}.ant-steps-small .ant-steps-item-custom .ant-steps-item-icon>.ant-steps-icon{font-size:24px;line-height:24px;transform:none}.ant-steps-vertical{display:block}.ant-steps-vertical .ant-steps-item{display:block;overflow:visible}.ant-steps-vertical .ant-steps-item-icon{float:left;margin-right:16px}.ant-steps-vertical .ant-steps-item-content{display:block;min-height:48px;overflow:hidden}.ant-steps-vertical .ant-steps-item-title{line-height:32px}.ant-steps-vertical .ant-steps-item-description{padding-bottom:12px}.ant-steps-vertical>.ant-steps-item>.ant-steps-item-container>.ant-steps-item-tail{position:absolute;top:0;left:16px;width:1px;height:100%;padding:38px 0 6px}.ant-steps-vertical>.ant-steps-item>.ant-steps-item-container>.ant-steps-item-tail:after{width:1px;height:100%}.ant-steps-vertical>.ant-steps-item:not(:last-child)>.ant-steps-item-container>.ant-steps-item-tail{display:block}.ant-steps-vertical>.ant-steps-item>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title:after{display:none}.ant-steps-vertical.ant-steps-small .ant-steps-item-container .ant-steps-item-tail{position:absolute;top:0;left:12px;padding:30px 0 6px}.ant-steps-vertical.ant-steps-small .ant-steps-item-container .ant-steps-item-title{line-height:24px}@media (max-width: 480px){.ant-steps-horizontal.ant-steps-label-horizontal{display:block}.ant-steps-horizontal.ant-steps-label-horizontal .ant-steps-item{display:block;overflow:visible}.ant-steps-horizontal.ant-steps-label-horizontal .ant-steps-item-icon{float:left;margin-right:16px}.ant-steps-horizontal.ant-steps-label-horizontal .ant-steps-item-content{display:block;min-height:48px;overflow:hidden}.ant-steps-horizontal.ant-steps-label-horizontal .ant-steps-item-title{line-height:32px}.ant-steps-horizontal.ant-steps-label-horizontal .ant-steps-item-description{padding-bottom:12px}.ant-steps-horizontal.ant-steps-label-horizontal>.ant-steps-item>.ant-steps-item-container>.ant-steps-item-tail{position:absolute;top:0;left:16px;width:1px;height:100%;padding:38px 0 6px}.ant-steps-horizontal.ant-steps-label-horizontal>.ant-steps-item>.ant-steps-item-container>.ant-steps-item-tail:after{width:1px;height:100%}.ant-steps-horizontal.ant-steps-label-horizontal>.ant-steps-item:not(:last-child)>.ant-steps-item-container>.ant-steps-item-tail{display:block}.ant-steps-horizontal.ant-steps-label-horizontal>.ant-steps-item>.ant-steps-item-container>.ant-steps-item-content>.ant-steps-item-title:after{display:none}.ant-steps-horizontal.ant-steps-label-horizontal.ant-steps-small .ant-steps-item-container .ant-steps-item-tail{position:absolute;top:0;left:12px;padding:30px 0 6px}.ant-steps-horizontal.ant-steps-label-horizontal.ant-steps-small .ant-steps-item-container .ant-steps-item-title{line-height:24px}}.ant-steps-label-vertical .ant-steps-item{overflow:visible}.ant-steps-label-vertical .ant-steps-item-tail{margin-left:58px;padding:3.5px 24px}.ant-steps-label-vertical .ant-steps-item-content{display:block;width:116px;margin-top:8px;text-align:center}.ant-steps-label-vertical .ant-steps-item-icon{display:inline-block;margin-left:42px}.ant-steps-label-vertical .ant-steps-item-title{padding-right:0}.ant-steps-label-vertical .ant-steps-item-title:after{display:none}.ant-steps-label-vertical .ant-steps-item-subtitle{display:block;margin-bottom:4px;margin-left:0;line-height:1.5715}.ant-steps-label-vertical.ant-steps-small:not(.ant-steps-dot) .ant-steps-item-icon{margin-left:46px}.ant-steps-dot .ant-steps-item-title,.ant-steps-dot.ant-steps-small .ant-steps-item-title{line-height:1.5715}.ant-steps-dot .ant-steps-item-tail,.ant-steps-dot.ant-steps-small .ant-steps-item-tail{top:2px;width:100%;margin:0 0 0 70px;padding:0}.ant-steps-dot .ant-steps-item-tail:after,.ant-steps-dot.ant-steps-small .ant-steps-item-tail:after{width:calc(100% - 20px);height:3px;margin-left:12px}.ant-steps-dot .ant-steps-item:first-child .ant-steps-icon-dot,.ant-steps-dot.ant-steps-small .ant-steps-item:first-child .ant-steps-icon-dot{left:2px}.ant-steps-dot .ant-steps-item-icon,.ant-steps-dot.ant-steps-small .ant-steps-item-icon{width:8px;height:8px;margin-left:67px;padding-right:0;line-height:8px;background:transparent;border:0}.ant-steps-dot .ant-steps-item-icon .ant-steps-icon-dot,.ant-steps-dot.ant-steps-small .ant-steps-item-icon .ant-steps-icon-dot{position:relative;float:left;width:100%;height:100%;border-radius:100px;transition:all .3s}.ant-steps-dot .ant-steps-item-icon .ant-steps-icon-dot:after,.ant-steps-dot.ant-steps-small .ant-steps-item-icon .ant-steps-icon-dot:after{position:absolute;top:-12px;left:-26px;width:60px;height:32px;background:rgba(0,0,0,.001);content:""}.ant-steps-dot .ant-steps-item-content,.ant-steps-dot.ant-steps-small .ant-steps-item-content{width:140px}.ant-steps-dot .ant-steps-item-process .ant-steps-item-icon,.ant-steps-dot.ant-steps-small .ant-steps-item-process .ant-steps-item-icon{width:10px;height:10px;line-height:10px}.ant-steps-dot .ant-steps-item-process .ant-steps-item-icon .ant-steps-icon-dot,.ant-steps-dot.ant-steps-small .ant-steps-item-process .ant-steps-item-icon .ant-steps-icon-dot{top:-1px}.ant-steps-vertical.ant-steps-dot .ant-steps-item-icon{margin-top:8px;margin-left:0}.ant-steps-vertical.ant-steps-dot .ant-steps-item>.ant-steps-item-container>.ant-steps-item-tail{top:2px;left:-9px;margin:0;padding:22px 0 4px}.ant-steps-vertical.ant-steps-dot .ant-steps-item:first-child .ant-steps-icon-dot{left:0}.ant-steps-vertical.ant-steps-dot .ant-steps-item-process .ant-steps-icon-dot{left:-2px}.ant-steps-navigation{padding-top:12px}.ant-steps-navigation.ant-steps-small .ant-steps-item-container{margin-left:-12px}.ant-steps-navigation .ant-steps-item{overflow:visible;text-align:center}.ant-steps-navigation .ant-steps-item-container{display:inline-block;height:100%;margin-left:-16px;padding-bottom:12px;text-align:left;transition:opacity .3s}.ant-steps-navigation .ant-steps-item-container .ant-steps-item-content{max-width:auto}.ant-steps-navigation .ant-steps-item-container .ant-steps-item-title{max-width:100%;padding-right:0;overflow:hidden;white-space:nowrap;text-overflow:ellipsis}.ant-steps-navigation .ant-steps-item-container .ant-steps-item-title:after{display:none}.ant-steps-navigation .ant-steps-item:not(.ant-steps-item-active) .ant-steps-item-container[role=button]{cursor:pointer}.ant-steps-navigation .ant-steps-item:not(.ant-steps-item-active) .ant-steps-item-container[role=button]:hover{opacity:.85}.ant-steps-navigation .ant-steps-item:last-child{flex:1}.ant-steps-navigation .ant-steps-item:last-child:after{display:none}.ant-steps-navigation .ant-steps-item:after{position:absolute;top:50%;left:100%;display:inline-block;width:12px;height:12px;margin-top:-14px;margin-left:-2px;border:1px solid rgba(0,0,0,.25);border-bottom:none;border-left:none;transform:rotate(45deg);content:""}.ant-steps-navigation .ant-steps-item:before{position:absolute;bottom:0;left:50%;display:inline-block;width:0;height:3px;background-color:#1890ff;transition:width .3s,left .3s;transition-timing-function:ease-out;content:""}.ant-steps-navigation .ant-steps-item.ant-steps-item-active:before{left:0;width:100%}@media (max-width: 480px){.ant-steps-navigation>.ant-steps-item{margin-right:0!important}.ant-steps-navigation>.ant-steps-item:before{display:none}.ant-steps-navigation>.ant-steps-item.ant-steps-item-active:before{top:0;right:0;left:unset;display:block;width:3px;height:calc(100% - 24px)}.ant-steps-navigation>.ant-steps-item:after{position:relative;top:-2px;left:50%;display:block;width:8px;height:8px;margin-bottom:8px;text-align:center;transform:rotate(135deg)}.ant-steps-navigation>.ant-steps-item>.ant-steps-item-container>.ant-steps-item-tail{visibility:hidden}}.ant-steps-flex-not-supported.ant-steps-horizontal.ant-steps-label-horizontal .ant-steps-item{margin-left:-16px;padding-left:16px;background:#fff}.ant-steps-flex-not-supported.ant-steps-horizontal.ant-steps-label-horizontal.ant-steps-small .ant-steps-item{margin-left:-12px;padding-left:12px}.ant-steps-flex-not-supported.ant-steps-dot .ant-steps-item:last-child{overflow:hidden}.ant-steps-flex-not-supported.ant-steps-dot .ant-steps-item:last-child .ant-steps-icon-dot:after{right:-200px;width:200px}.ant-steps-flex-not-supported.ant-steps-dot .ant-steps-item .ant-steps-icon-dot:before,.ant-steps-flex-not-supported.ant-steps-dot .ant-steps-item .ant-steps-icon-dot:after{position:absolute;top:0;left:-10px;width:10px;height:8px;background:#fff;content:""}.ant-steps-flex-not-supported.ant-steps-dot .ant-steps-item .ant-steps-icon-dot:after{right:-10px;left:auto}.ant-steps-flex-not-supported.ant-steps-dot .ant-steps-item-wait .ant-steps-item-icon>.ant-steps-icon .ant-steps-icon-dot{background:#ccc}.ant-switch{margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;display:inline-block;box-sizing:border-box;min-width:44px;height:22px;line-height:20px;vertical-align:middle;background-color:#00000040;border:1px solid transparent;border-radius:100px;cursor:pointer;transition:all .36s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-switch-inner{display:block;margin-right:6px;margin-left:24px;color:#fff;font-size:12px}.ant-switch-loading-icon,.ant-switch:after{position:absolute;top:1px;left:1px;width:18px;height:18px;background-color:#fff;border-radius:18px;cursor:pointer;transition:all .36s cubic-bezier(.78,.14,.15,.86);content:" "}.ant-switch:after{box-shadow:0 2px 4px #00230b33}.ant-switch:not(.ant-switch-disabled):active:before,.ant-switch:not(.ant-switch-disabled):active:after{width:24px}.ant-switch-loading-icon{z-index:1;display:none;font-size:12px;background:transparent}.ant-switch-loading-icon svg{position:absolute;top:0;right:0;bottom:0;left:0;margin:auto}.ant-switch-loading .ant-switch-loading-icon{display:inline-block;color:#000000d9}.ant-switch-checked.ant-switch-loading .ant-switch-loading-icon{color:#1890ff}.ant-switch:focus{outline:0;box-shadow:0 0 0 2px #1890ff33}.ant-switch:focus:hover{box-shadow:none}.ant-switch-small{min-width:28px;height:16px;line-height:14px}.ant-switch-small .ant-switch-inner{margin-right:3px;margin-left:18px;font-size:12px}.ant-switch-small:after{width:12px;height:12px}.ant-switch-small:active:before,.ant-switch-small:active:after{width:16px}.ant-switch-small .ant-switch-loading-icon{width:12px;height:12px}.ant-switch-small.ant-switch-checked .ant-switch-inner{margin-right:18px;margin-left:3px}.ant-switch-small.ant-switch-checked .ant-switch-loading-icon{left:100%;margin-left:-13px}.ant-switch-small.ant-switch-loading .ant-switch-loading-icon{font-weight:700;transform:scale(.66667)}.ant-switch-checked{background-color:#1890ff}.ant-switch-checked .ant-switch-inner{margin-right:24px;margin-left:6px}.ant-switch-checked:after{left:100%;margin-left:-1px;transform:translate(-100%)}.ant-switch-checked .ant-switch-loading-icon{left:100%;margin-left:-19px}.ant-switch-loading,.ant-switch-disabled{cursor:not-allowed;opacity:.4}.ant-switch-loading *,.ant-switch-disabled *{cursor:not-allowed}.ant-switch-loading:before,.ant-switch-disabled:before,.ant-switch-loading:after,.ant-switch-disabled:after{cursor:not-allowed}@-webkit-keyframes AntSwitchSmallLoadingCircle{0%{transform:rotate(0) scale(.66667);transform-origin:50% 50%}to{transform:rotate(360deg) scale(.66667);transform-origin:50% 50%}}@keyframes AntSwitchSmallLoadingCircle{0%{transform:rotate(0) scale(.66667);transform-origin:50% 50%}to{transform:rotate(360deg) scale(.66667);transform-origin:50% 50%}}.ant-table-wrapper:before,.ant-table-wrapper:after{display:table;content:""}.ant-table-wrapper:after{clear:both}.ant-table{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;clear:both;background:#fff}.ant-table-body{transition:opacity .3s}.ant-table-empty .ant-table-body{overflow-x:auto!important;overflow-y:hidden!important}.ant-table table{width:100%;text-align:left;border-radius:2px 2px 0 0;border-collapse:separate;border-spacing:0}.ant-table-layout-fixed table{table-layout:fixed}.ant-table-thead>tr>th{color:#000000d9;font-weight:500;text-align:left;background:#fafafa;border-bottom:1px solid #f0f0f0;transition:background .3s ease}.ant-table-thead>tr>th[colspan]:not([colspan="1"]){text-align:center}.ant-table-thead>tr>th .anticon-filter,.ant-table-thead>tr>th .ant-table-filter-icon{position:absolute;top:0;right:0;width:28px;height:100%;color:#bfbfbf;font-size:12px;text-align:center;cursor:pointer;transition:all .3s}.ant-table-thead>tr>th .anticon-filter>svg,.ant-table-thead>tr>th .ant-table-filter-icon>svg{position:absolute;top:50%;left:50%;margin-top:-5px;margin-left:-6px}.ant-table-thead>tr>th .ant-table-filter-selected.anticon{color:#1890ff}.ant-table-thead>tr>th .ant-table-column-sorter{display:table-cell;vertical-align:middle}.ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner{height:1em;margin-top:.35em;margin-left:.57142857em;color:#bfbfbf;line-height:1em;text-align:center;transition:all .3s}.ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner .ant-table-column-sorter-up,.ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner .ant-table-column-sorter-down{display:inline-block;font-size:12px;font-size:11px \	;transform:scale(.91666667) rotate(0);display:block;height:1em;line-height:1em;transition:all .3s}:root .ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner .ant-table-column-sorter-up,:root .ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner .ant-table-column-sorter-down{font-size:12px}.ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner .ant-table-column-sorter-up.on,.ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner .ant-table-column-sorter-down.on{color:#1890ff}.ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner-full{margin-top:-.15em}.ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner-full .ant-table-column-sorter-up,.ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner-full .ant-table-column-sorter-down{height:.5em;line-height:.5em}.ant-table-thead>tr>th .ant-table-column-sorter .ant-table-column-sorter-inner-full .ant-table-column-sorter-down{margin-top:.125em}.ant-table-thead>tr>th.ant-table-column-has-actions{position:relative;background-clip:padding-box;-webkit-background-clip:border-box}.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-filters{padding-right:30px!important}.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-filters .anticon-filter.ant-table-filter-open,.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-filters .ant-table-filter-icon.ant-table-filter-open{color:#00000073;background:#e5e5e5}.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-filters:hover .anticon-filter:hover,.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-filters:hover .ant-table-filter-icon:hover{color:#00000073;background:#e5e5e5}.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-filters:hover .anticon-filter:active,.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-filters:hover .ant-table-filter-icon:active{color:#000000d9}.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-sorters{cursor:pointer}.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-sorters:hover{background:#f2f2f2}.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-sorters:hover .anticon-filter,.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-sorters:hover .ant-table-filter-icon{background:#f2f2f2}.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-sorters:active .ant-table-column-sorter-up:not(.on),.ant-table-thead>tr>th.ant-table-column-has-actions.ant-table-column-has-sorters:active .ant-table-column-sorter-down:not(.on){color:#00000073}.ant-table-thead>tr>th .ant-table-header-column{display:inline-block;max-width:100%;vertical-align:top}.ant-table-thead>tr>th .ant-table-header-column .ant-table-column-sorters{display:table}.ant-table-thead>tr>th .ant-table-header-column .ant-table-column-sorters>.ant-table-column-title{display:table-cell;vertical-align:middle}.ant-table-thead>tr>th .ant-table-header-column .ant-table-column-sorters>*:not(.ant-table-column-sorter){position:relative}.ant-table-thead>tr>th .ant-table-header-column .ant-table-column-sorters:before{position:absolute;top:0;right:0;bottom:0;left:0;background:transparent;transition:all .3s;content:""}.ant-table-thead>tr>th .ant-table-header-column .ant-table-column-sorters:hover:before{background:rgba(0,0,0,.04)}.ant-table-thead>tr>th.ant-table-column-has-sorters{-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-table-thead>tr:first-child>th:first-child{border-top-left-radius:2px}.ant-table-thead>tr:first-child>th:last-child{border-top-right-radius:2px}.ant-table-thead>tr:not(:last-child)>th[colspan]{border-bottom:0}.ant-table-tbody>tr>td{border-bottom:1px solid #f0f0f0;transition:background .3s}.ant-table-thead>tr.ant-table-row-hover:not(.ant-table-expanded-row):not(.ant-table-row-selected)>td,.ant-table-tbody>tr.ant-table-row-hover:not(.ant-table-expanded-row):not(.ant-table-row-selected)>td,.ant-table-thead>tr:hover:not(.ant-table-expanded-row):not(.ant-table-row-selected)>td,.ant-table-tbody>tr:hover:not(.ant-table-expanded-row):not(.ant-table-row-selected)>td{background:#e6f7ff}.ant-table-thead>tr.ant-table-row-selected>td.ant-table-column-sort,.ant-table-tbody>tr.ant-table-row-selected>td.ant-table-column-sort{background:#e6f7ff}.ant-table-thead>tr:hover.ant-table-row-selected>td,.ant-table-tbody>tr:hover.ant-table-row-selected>td{background:#dcf4ff}.ant-table-thead>tr:hover.ant-table-row-selected>td.ant-table-column-sort,.ant-table-tbody>tr:hover.ant-table-row-selected>td.ant-table-column-sort{background:#e6f7ff}.ant-table-thead>tr:hover{background:none}.ant-table-footer{position:relative;padding:16px;color:#000000d9;background:#fafafa;border-top:1px solid #f0f0f0;border-radius:0 0 2px 2px}.ant-table-footer:before{position:absolute;top:-1px;left:0;width:100%;height:1px;background:#fafafa;content:""}.ant-table.ant-table-bordered .ant-table-footer{border:1px solid #f0f0f0}.ant-table-title{position:relative;top:1px;padding:16px 0;border-radius:2px 2px 0 0}.ant-table.ant-table-bordered .ant-table-title{padding-right:16px;padding-left:16px;border:1px solid #f0f0f0}.ant-table-title+.ant-table-content{position:relative;border-radius:2px 2px 0 0}.ant-table-bordered .ant-table-title+.ant-table-content,.ant-table-bordered .ant-table-title+.ant-table-content table,.ant-table-bordered .ant-table-title+.ant-table-content .ant-table-thead>tr:first-child>th{border-radius:0}.ant-table-without-column-header .ant-table-title+.ant-table-content,.ant-table-without-column-header table{border-radius:0}.ant-table-without-column-header.ant-table-bordered.ant-table-empty .ant-table-placeholder{border-top:1px solid #f0f0f0;border-radius:2px}.ant-table-tbody>tr.ant-table-row-selected td{color:inherit;background:#e6f7ff}.ant-table-thead>tr>th.ant-table-column-sort{background:#f5f5f5}.ant-table-tbody>tr>td.ant-table-column-sort{background:rgba(0,0,0,.01)}.ant-table-thead>tr>th,.ant-table-tbody>tr>td{padding:16px;overflow-wrap:break-word}.ant-table-expand-icon-th,.ant-table-row-expand-icon-cell{width:50px;min-width:50px;text-align:center}.ant-table-header{overflow:hidden;background:#fafafa}.ant-table-header table{border-radius:2px 2px 0 0}.ant-table-loading{position:relative}.ant-table-loading .ant-table-body{background:#fff;opacity:.5}.ant-table-loading .ant-table-spin-holder{position:absolute;top:50%;left:50%;height:20px;margin-left:-30px;line-height:20px}.ant-table-loading .ant-table-with-pagination{margin-top:-20px}.ant-table-loading .ant-table-without-pagination{margin-top:10px}.ant-table-bordered .ant-table-header>table,.ant-table-bordered .ant-table-body>table,.ant-table-bordered .ant-table-fixed-left table,.ant-table-bordered .ant-table-fixed-right table{border:1px solid #f0f0f0;border-right:0;border-bottom:0}.ant-table-bordered.ant-table-empty .ant-table-placeholder{border-right:1px solid #f0f0f0;border-left:1px solid #f0f0f0}.ant-table-bordered.ant-table-fixed-header .ant-table-header>table{border-bottom:0}.ant-table-bordered.ant-table-fixed-header .ant-table-body>table{border-top-left-radius:0;border-top-right-radius:0}.ant-table-bordered.ant-table-fixed-header .ant-table-header+.ant-table-body>table,.ant-table-bordered.ant-table-fixed-header .ant-table-body-inner>table{border-top:0}.ant-table-bordered .ant-table-thead>tr:not(:last-child)>th{border-bottom:1px solid #f0f0f0}.ant-table-bordered .ant-table-thead>tr>th,.ant-table-bordered .ant-table-tbody>tr>td{border-right:1px solid #f0f0f0}.ant-table-placeholder{position:relative;z-index:1;margin-top:-1px;padding:16px;color:#00000040;font-size:14px;text-align:center;background:#fff;border-top:1px solid #f0f0f0;border-bottom:1px solid #f0f0f0;border-radius:0 0 2px 2px}.ant-table-pagination.ant-pagination{float:right;margin:16px 0}.ant-table-filter-dropdown{position:relative;min-width:96px;margin-left:-8px;background:#fff;border-radius:2px;box-shadow:0 2px 8px #00000026}.ant-table-filter-dropdown .ant-dropdown-menu{max-height:calc(100vh - 130px);overflow-x:hidden;border:0;border-radius:2px 2px 0 0;box-shadow:none}.ant-table-filter-dropdown .ant-dropdown-menu-item>label+span{padding-right:0}.ant-table-filter-dropdown .ant-dropdown-menu-sub{border-radius:2px;box-shadow:0 2px 8px #00000026}.ant-table-filter-dropdown .ant-dropdown-menu .ant-dropdown-submenu-contain-selected .ant-dropdown-menu-submenu-title:after{color:#1890ff;font-weight:700;text-shadow:0 0 2px #bae7ff}.ant-table-filter-dropdown .ant-dropdown-menu-item{overflow:hidden}.ant-table-filter-dropdown>.ant-dropdown-menu>.ant-dropdown-menu-item:last-child,.ant-table-filter-dropdown>.ant-dropdown-menu>.ant-dropdown-menu-submenu:last-child .ant-dropdown-menu-submenu-title{border-radius:0}.ant-table-filter-dropdown-btns{padding:7px 8px;overflow:hidden;border-top:1px solid #f0f0f0}.ant-table-filter-dropdown-link{color:#1890ff}.ant-table-filter-dropdown-link:hover{color:#40a9ff}.ant-table-filter-dropdown-link:active{color:#096dd9}.ant-table-filter-dropdown-link.confirm{float:left}.ant-table-filter-dropdown-link.clear{float:right}.ant-table-selection{white-space:nowrap}.ant-table-selection-select-all-custom{margin-right:4px!important}.ant-table-selection .anticon-down{color:#bfbfbf;transition:all .3s}.ant-table-selection-menu{min-width:96px;margin-top:5px;margin-left:-30px;background:#fff;border-radius:2px;box-shadow:0 2px 8px #00000026}.ant-table-selection-menu .ant-action-down{color:#bfbfbf}.ant-table-selection-down{display:inline-block;padding:0;line-height:1;cursor:pointer}.ant-table-selection-down:hover .anticon-down{color:#0009}.ant-table-row-expand-icon{color:#1890ff;text-decoration:none;cursor:pointer;transition:color .3s;display:inline-block;width:17px;height:17px;color:inherit;line-height:13px;text-align:center;background:#fff;border:1px solid #f0f0f0;border-radius:2px;outline:none;transition:all .3s;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-table-row-expand-icon:focus,.ant-table-row-expand-icon:hover{color:#40a9ff}.ant-table-row-expand-icon:active{color:#096dd9}.ant-table-row-expand-icon:focus,.ant-table-row-expand-icon:hover,.ant-table-row-expand-icon:active{border-color:currentColor}.ant-table-row-expanded:after{content:"-"}.ant-table-row-collapsed:after{content:"+"}.ant-table-row-spaced{visibility:hidden}.ant-table-row-spaced:after{content:"."}.ant-table-row-cell-ellipsis,.ant-table-row-cell-ellipsis .ant-table-column-title{overflow:hidden;white-space:nowrap;text-overflow:ellipsis}.ant-table-row-cell-ellipsis .ant-table-column-title{display:block}.ant-table-row-cell-break-word{word-wrap:break-word;word-break:break-word}tr.ant-table-expanded-row,tr.ant-table-expanded-row:hover{background:#fbfbfb}tr.ant-table-expanded-row td>.ant-table-wrapper{margin:-16px -16px -17px}.ant-table .ant-table-row-indent+.ant-table-row-expand-icon{margin-right:8px}.ant-table-scroll{overflow:auto;overflow-x:hidden}.ant-table-scroll table{min-width:100%}.ant-table-body-inner{height:100%}.ant-table-fixed-header>.ant-table-content>.ant-table-scroll>.ant-table-body{position:relative;background:#fff}.ant-table-fixed-header .ant-table-body-inner{overflow:scroll}.ant-table-fixed-header .ant-table-scroll .ant-table-header{margin-bottom:-20px;padding-bottom:20px;overflow:scroll;opacity:.9999}.ant-table-fixed-header .ant-table-scroll .ant-table-header::-webkit-scrollbar{border:1px solid #f0f0f0;border-width:0 0 1px 0}.ant-table-hide-scrollbar{scrollbar-color:transparent transparent;min-width:unset}.ant-table-hide-scrollbar::-webkit-scrollbar{min-width:inherit;background-color:transparent}.ant-table-bordered.ant-table-fixed-header .ant-table-scroll .ant-table-header::-webkit-scrollbar{border:1px solid #f0f0f0;border-width:1px 1px 1px 0}.ant-table-bordered.ant-table-fixed-header .ant-table-scroll .ant-table-header.ant-table-hide-scrollbar .ant-table-thead>tr:only-child>th:last-child{border-right-color:transparent}.ant-table-fixed-left,.ant-table-fixed-right{position:absolute;top:0;z-index:1;overflow:hidden;border-radius:0;transition:box-shadow .3s ease}.ant-table-fixed-left table,.ant-table-fixed-right table{width:auto;background:#fff}.ant-table-fixed-header .ant-table-fixed-left .ant-table-body-outer .ant-table-fixed,.ant-table-fixed-header .ant-table-fixed-right .ant-table-body-outer .ant-table-fixed{border-radius:0}.ant-table-fixed-left{left:0;box-shadow:6px 0 6px -4px #00000026}.ant-table-fixed-left .ant-table-header{overflow-y:hidden}.ant-table-fixed-left .ant-table-body-inner{margin-right:-20px;padding-right:20px}.ant-table-fixed-header .ant-table-fixed-left .ant-table-body-inner{padding-right:0}.ant-table-fixed-left,.ant-table-fixed-left table{border-radius:2px 0 0}.ant-table-fixed-left .ant-table-thead>tr>th:last-child{border-top-right-radius:0}.ant-table-fixed-right{right:0;box-shadow:-6px 0 6px -4px #00000026}.ant-table-fixed-right,.ant-table-fixed-right table{border-radius:0 2px 0 0}.ant-table-fixed-right .ant-table-expanded-row{color:transparent;pointer-events:none}.ant-table-fixed-right .ant-table-thead>tr>th:first-child{border-top-left-radius:0}.ant-table.ant-table-scroll-position-left .ant-table-fixed-left,.ant-table.ant-table-scroll-position-right .ant-table-fixed-right{box-shadow:none}.ant-table colgroup>col.ant-table-selection-col{width:60px}.ant-table-thead>tr>th.ant-table-selection-column-custom .ant-table-selection{margin-right:-15px}.ant-table-thead>tr>th.ant-table-selection-column,.ant-table-tbody>tr>td.ant-table-selection-column{text-align:center}.ant-table-thead>tr>th.ant-table-selection-column .ant-radio-wrapper,.ant-table-tbody>tr>td.ant-table-selection-column .ant-radio-wrapper{margin-right:0}.ant-table-row[class*=ant-table-row-level-0] .ant-table-selection-column>span{display:inline-block}.ant-table-cell-fix-left,.ant-table-cell-fix-right{position:sticky!important;z-index:1;background:#fff}.ant-table-cell-fix-left-first:after,.ant-table-cell-fix-left-last:after{position:absolute;top:0;right:0;bottom:-1px;width:30px;transform:translate(100%);transition:box-shadow .3s;content:"";pointer-events:none}.ant-table-cell-fix-right-first:after,.ant-table-cell-fix-right-last:after{position:absolute;top:0;bottom:-1px;left:0;width:30px;transform:translate(-100%);transition:box-shadow .3s;content:"";pointer-events:none}.ant-table .ant-table-container:before,.ant-table .ant-table-container:after{position:absolute;top:0;bottom:0;z-index:1;width:30px;transition:box-shadow .3s;content:"";pointer-events:none}.ant-table .ant-table-container:before{left:0}.ant-table .ant-table-container:after{right:0}.ant-table-ping-left:not(.ant-table-has-fix-left) .ant-table-container{position:relative}.ant-table-ping-left:not(.ant-table-has-fix-left) .ant-table-container:before{box-shadow:inset 10px 0 8px -8px #00000026}.ant-table-ping-left .ant-table-cell-fix-left-first:after,.ant-table-ping-left .ant-table-cell-fix-left-last:after{box-shadow:inset 10px 0 8px -8px #00000026}.ant-table-ping-left .ant-table-cell-fix-left-last:before{background-color:transparent!important}.ant-table-ping-right:not(.ant-table-has-fix-right) .ant-table-container{position:relative}.ant-table-ping-right:not(.ant-table-has-fix-right) .ant-table-container:after{box-shadow:inset -10px 0 8px -8px #00000026}.ant-table-ping-right .ant-table-cell-fix-right-first:after,.ant-table-ping-right .ant-table-cell-fix-right-last:after{box-shadow:inset -10px 0 8px -8px #00000026}.ant-table-filter-dropdown .ant-checkbox-wrapper+span,.ant-table-filter-dropdown-submenu .ant-checkbox-wrapper+span{padding-left:8px}@supports (-moz-appearance: meterbar){.ant-table-thead>tr>th.ant-table-column-has-actions{background-clip:padding-box}}.ant-table.ant-table-middle>.ant-table-title,.ant-table.ant-table-middle>.ant-table-content>.ant-table-footer{padding:12px 8px}.ant-table.ant-table-middle>.ant-table-content>.ant-table-header>table>.ant-table-thead>tr>th,.ant-table.ant-table-middle>.ant-table-content>.ant-table-body>table>.ant-table-thead>tr>th,.ant-table.ant-table-middle>.ant-table-content>.ant-table-scroll>.ant-table-header>table>.ant-table-thead>tr>th,.ant-table.ant-table-middle>.ant-table-content>.ant-table-scroll>.ant-table-body>table>.ant-table-thead>tr>th,.ant-table.ant-table-middle>.ant-table-content>.ant-table-fixed-left>.ant-table-header>table>.ant-table-thead>tr>th,.ant-table.ant-table-middle>.ant-table-content>.ant-table-fixed-right>.ant-table-header>table>.ant-table-thead>tr>th,.ant-table.ant-table-middle>.ant-table-content>.ant-table-fixed-left>.ant-table-body-outer>.ant-table-body-inner>table>.ant-table-thead>tr>th,.ant-table.ant-table-middle>.ant-table-content>.ant-table-fixed-right>.ant-table-body-outer>.ant-table-body-inner>table>.ant-table-thead>tr>th,.ant-table.ant-table-middle>.ant-table-content>.ant-table-header>table>.ant-table-tbody>tr>td,.ant-table.ant-table-middle>.ant-table-content>.ant-table-body>table>.ant-table-tbody>tr>td,.ant-table.ant-table-middle>.ant-table-content>.ant-table-scroll>.ant-table-header>table>.ant-table-tbody>tr>td,.ant-table.ant-table-middle>.ant-table-content>.ant-table-scroll>.ant-table-body>table>.ant-table-tbody>tr>td,.ant-table.ant-table-middle>.ant-table-content>.ant-table-fixed-left>.ant-table-header>table>.ant-table-tbody>tr>td,.ant-table.ant-table-middle>.ant-table-content>.ant-table-fixed-right>.ant-table-header>table>.ant-table-tbody>tr>td,.ant-table.ant-table-middle>.ant-table-content>.ant-table-fixed-left>.ant-table-body-outer>.ant-table-body-inner>table>.ant-table-tbody>tr>td,.ant-table.ant-table-middle>.ant-table-content>.ant-table-fixed-right>.ant-table-body-outer>.ant-table-body-inner>table>.ant-table-tbody>tr>td{padding:12px 8px}.ant-table.ant-table-middle tr.ant-table-expanded-row td>.ant-table-wrapper{margin:-12px -8px -13px}.ant-table.ant-table-small>.ant-table-title,.ant-table.ant-table-small>.ant-table-content>.ant-table-footer{padding:8px}.ant-table.ant-table-small>.ant-table-content>.ant-table-header>table>.ant-table-thead>tr>th,.ant-table.ant-table-small>.ant-table-content>.ant-table-body>table>.ant-table-thead>tr>th,.ant-table.ant-table-small>.ant-table-content>.ant-table-scroll>.ant-table-header>table>.ant-table-thead>tr>th,.ant-table.ant-table-small>.ant-table-content>.ant-table-scroll>.ant-table-body>table>.ant-table-thead>tr>th,.ant-table.ant-table-small>.ant-table-content>.ant-table-fixed-left>.ant-table-header>table>.ant-table-thead>tr>th,.ant-table.ant-table-small>.ant-table-content>.ant-table-fixed-right>.ant-table-header>table>.ant-table-thead>tr>th,.ant-table.ant-table-small>.ant-table-content>.ant-table-fixed-left>.ant-table-body-outer>.ant-table-body-inner>table>.ant-table-thead>tr>th,.ant-table.ant-table-small>.ant-table-content>.ant-table-fixed-right>.ant-table-body-outer>.ant-table-body-inner>table>.ant-table-thead>tr>th,.ant-table.ant-table-small>.ant-table-content>.ant-table-header>table>.ant-table-tbody>tr>td,.ant-table.ant-table-small>.ant-table-content>.ant-table-body>table>.ant-table-tbody>tr>td,.ant-table.ant-table-small>.ant-table-content>.ant-table-scroll>.ant-table-header>table>.ant-table-tbody>tr>td,.ant-table.ant-table-small>.ant-table-content>.ant-table-scroll>.ant-table-body>table>.ant-table-tbody>tr>td,.ant-table.ant-table-small>.ant-table-content>.ant-table-fixed-left>.ant-table-header>table>.ant-table-tbody>tr>td,.ant-table.ant-table-small>.ant-table-content>.ant-table-fixed-right>.ant-table-header>table>.ant-table-tbody>tr>td,.ant-table.ant-table-small>.ant-table-content>.ant-table-fixed-left>.ant-table-body-outer>.ant-table-body-inner>table>.ant-table-tbody>tr>td,.ant-table.ant-table-small>.ant-table-content>.ant-table-fixed-right>.ant-table-body-outer>.ant-table-body-inner>table>.ant-table-tbody>tr>td{padding:8px}.ant-table.ant-table-small tr.ant-table-expanded-row td>.ant-table-wrapper{margin:-8px -8px -9px}.ant-table-small .ant-table-selection-column{width:46px;min-width:46px}.ant-timeline{box-sizing:border-box;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;font-feature-settings:"tnum";margin:0;padding:0;list-style:none}.ant-timeline-item{position:relative;margin:0;padding:0 0 20px;font-size:14px;list-style:none}.ant-timeline-item-tail{position:absolute;top:10px;left:4px;height:calc(100% - 10px);border-left:2px solid #f0f0f0}.ant-timeline-item-pending .ant-timeline-item-head{font-size:12px;background-color:transparent}.ant-timeline-item-pending .ant-timeline-item-tail{display:none}.ant-timeline-item-head{position:absolute;width:10px;height:10px;background-color:#fff;border:2px solid transparent;border-radius:100px}.ant-timeline-item-head-blue{color:#1890ff;border-color:#1890ff}.ant-timeline-item-head-red{color:#ff4d4f;border-color:#ff4d4f}.ant-timeline-item-head-green{color:#52c41a;border-color:#52c41a}.ant-timeline-item-head-gray{color:#00000040;border-color:#00000040}.ant-timeline-item-head-custom{position:absolute;top:5.5px;left:5px;width:auto;height:auto;margin-top:0;padding:3px 1px;line-height:1;text-align:center;border:0;border-radius:0;transform:translate(-50%,-50%)}.ant-timeline-item-content{position:relative;top:-7.001px;margin:0 0 0 18px;word-break:break-word}.ant-timeline-item-last>.ant-timeline-item-tail{display:none}.ant-timeline-item-last>.ant-timeline-item-content{min-height:48px}.ant-timeline.ant-timeline-alternate .ant-timeline-item-tail,.ant-timeline.ant-timeline-right .ant-timeline-item-tail,.ant-timeline.ant-timeline-alternate .ant-timeline-item-head,.ant-timeline.ant-timeline-right .ant-timeline-item-head,.ant-timeline.ant-timeline-alternate .ant-timeline-item-head-custom,.ant-timeline.ant-timeline-right .ant-timeline-item-head-custom{left:50%}.ant-timeline.ant-timeline-alternate .ant-timeline-item-head,.ant-timeline.ant-timeline-right .ant-timeline-item-head{margin-left:-4px}.ant-timeline.ant-timeline-alternate .ant-timeline-item-head-custom,.ant-timeline.ant-timeline-right .ant-timeline-item-head-custom{margin-left:1px}.ant-timeline.ant-timeline-alternate .ant-timeline-item-left .ant-timeline-item-content,.ant-timeline.ant-timeline-right .ant-timeline-item-left .ant-timeline-item-content{left:calc(50% - 4px);width:calc(50% - 14px);text-align:left}.ant-timeline.ant-timeline-alternate .ant-timeline-item-right .ant-timeline-item-content,.ant-timeline.ant-timeline-right .ant-timeline-item-right .ant-timeline-item-content{width:calc(50% - 12px);margin:0;text-align:right}.ant-timeline.ant-timeline-right .ant-timeline-item-right .ant-timeline-item-tail,.ant-timeline.ant-timeline-right .ant-timeline-item-right .ant-timeline-item-head,.ant-timeline.ant-timeline-right .ant-timeline-item-right .ant-timeline-item-head-custom{left:calc(100% - 6px)}.ant-timeline.ant-timeline-right .ant-timeline-item-right .ant-timeline-item-content{width:calc(100% - 18px)}.ant-timeline.ant-timeline-pending .ant-timeline-item-last .ant-timeline-item-tail{display:block;height:calc(100% - 14px);border-left:2px dotted #f0f0f0}.ant-timeline.ant-timeline-reverse .ant-timeline-item-last .ant-timeline-item-tail{display:none}.ant-timeline.ant-timeline-reverse .ant-timeline-item-pending .ant-timeline-item-tail{top:15px;display:block;height:calc(100% - 15px);border-left:2px dotted #f0f0f0}.ant-timeline.ant-timeline-reverse .ant-timeline-item-pending .ant-timeline-item-content{min-height:48px}@-webkit-keyframes antCheckboxEffect{0%{transform:scale(1);opacity:.5}to{transform:scale(1.6);opacity:0}}@keyframes antCheckboxEffect{0%{transform:scale(1);opacity:.5}to{transform:scale(1.6);opacity:0}}.ant-transfer-customize-list{display:flex}.ant-transfer-customize-list .ant-transfer-operation{flex:none;align-self:center}.ant-transfer-customize-list .ant-transfer-list{flex:auto;width:auto;height:auto;min-height:200px}.ant-transfer-customize-list .ant-transfer-list-body-with-search{padding-top:0}.ant-transfer-customize-list .ant-transfer-list-body-search-wrapper{position:relative;padding-bottom:0}.ant-transfer-customize-list .ant-transfer-list-body-customize-wrapper{padding:12px}.ant-transfer-customize-list .ant-table-wrapper .ant-table-small{border:0;border-radius:0}.ant-transfer-customize-list .ant-table-wrapper .ant-table-small>.ant-table-content>.ant-table-body>table>.ant-table-thead>tr>th{background:#fafafa}.ant-transfer-customize-list .ant-table-wrapper .ant-table-small>.ant-table-content .ant-table-row:last-child td{border-bottom:1px solid #f0f0f0}.ant-transfer-customize-list .ant-table-wrapper .ant-table-small .ant-table-body{margin:0}.ant-transfer-customize-list .ant-table-wrapper .ant-table-pagination.ant-pagination{margin:16px 0 4px}.ant-transfer{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative}.ant-transfer-disabled .ant-transfer-list{background:#f5f5f5}.ant-transfer-list{position:relative;display:inline-block;width:180px;height:200px;padding-top:40px;vertical-align:middle;border:1px solid #d9d9d9;border-radius:2px}.ant-transfer-list-with-footer{padding-bottom:34px}.ant-transfer-list-search{padding-right:24px;padding-left:8px}.ant-transfer-list-search-action{position:absolute;top:12px;right:12px;bottom:12px;width:28px;color:#00000040;line-height:32px;text-align:center}.ant-transfer-list-search-action .anticon{color:#00000040;transition:all .3s}.ant-transfer-list-search-action .anticon:hover{color:#00000073}span.ant-transfer-list-search-action{pointer-events:none}.ant-transfer-list-header{position:absolute;top:0;left:0;width:100%;padding:7.4995px 12px 8.4995px;overflow:hidden;color:#000000d9;background:#fff;border-bottom:1px solid #f0f0f0;border-radius:2px 2px 0 0}.ant-transfer-list-header-title{position:absolute;right:12px}.ant-transfer-list-header .ant-checkbox-wrapper+span{padding-left:8px}.ant-transfer-list-body{position:relative;height:100%;font-size:14px}.ant-transfer-list-body-search-wrapper{position:absolute;top:0;left:0;width:100%;padding:12px}.ant-transfer-list-body-with-search{padding-top:56px}.ant-transfer-list-content{height:100%;margin:0;padding:0;overflow:auto;list-style:none}.ant-transfer-list-content>.LazyLoad{-webkit-animation:transferHighlightIn 1s;animation:transferHighlightIn 1s}.ant-transfer-list-content-item{min-height:32px;padding:6px 12px;overflow:hidden;white-space:nowrap;text-overflow:ellipsis;transition:all .3s}.ant-transfer-list-content-item>span{padding-right:0}.ant-transfer-list-content-item-text{padding-left:8px}.ant-transfer-list-content-item:not(.ant-transfer-list-content-item-disabled):hover{background-color:#f5f5f5;cursor:pointer}.ant-transfer-list-content-item-disabled{color:#00000040;cursor:not-allowed}.ant-transfer-list-body-not-found{position:absolute;top:50%;width:100%;padding-top:0;color:#00000040;text-align:center;transform:translateY(-50%)}.ant-transfer-list-body-with-search .ant-transfer-list-body-not-found{margin-top:16px}.ant-transfer-list-footer{position:absolute;bottom:0;left:0;width:100%;border-top:1px solid #f0f0f0;border-radius:0 0 2px 2px}.ant-transfer-operation{display:inline-block;margin:0 8px;overflow:hidden;vertical-align:middle}.ant-transfer-operation .ant-btn{display:block}.ant-transfer-operation .ant-btn:first-child{margin-bottom:4px}.ant-transfer-operation .ant-btn .anticon{font-size:12px}@-webkit-keyframes transferHighlightIn{0%{background:#bae7ff}to{background:transparent}}@keyframes transferHighlightIn{0%{background:#bae7ff}to{background:transparent}}@-webkit-keyframes antCheckboxEffect{0%{transform:scale(1);opacity:.5}to{transform:scale(1.6);opacity:0}}@keyframes antCheckboxEffect{0%{transform:scale(1);opacity:.5}to{transform:scale(1.6);opacity:0}}.ant-select-tree-checkbox{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;top:-.09em;display:inline-block;line-height:1;white-space:nowrap;vertical-align:middle;outline:none;cursor:pointer}.ant-select-tree-checkbox-wrapper:hover .ant-select-tree-checkbox-inner,.ant-select-tree-checkbox:hover .ant-select-tree-checkbox-inner,.ant-select-tree-checkbox-input:focus+.ant-select-tree-checkbox-inner{border-color:#1890ff}.ant-select-tree-checkbox-checked:after{position:absolute;top:0;left:0;width:100%;height:100%;border:1px solid #1890ff;border-radius:2px;visibility:hidden;-webkit-animation:antCheckboxEffect .36s ease-in-out;animation:antCheckboxEffect .36s ease-in-out;-webkit-animation-fill-mode:backwards;animation-fill-mode:backwards;content:""}.ant-select-tree-checkbox:hover:after,.ant-select-tree-checkbox-wrapper:hover .ant-select-tree-checkbox:after{visibility:visible}.ant-select-tree-checkbox-inner{position:relative;top:0;left:0;display:block;width:16px;height:16px;background-color:#fff;border:1px solid #d9d9d9;border-radius:2px;border-collapse:separate;transition:all .3s}.ant-select-tree-checkbox-inner:after{position:absolute;top:50%;left:22%;display:table;width:5.71428571px;height:9.14285714px;border:2px solid #fff;border-top:0;border-left:0;transform:rotate(45deg) scale(0) translate(-50%,-50%);opacity:0;transition:all .1s cubic-bezier(.71,-.46,.88,.6),opacity .1s;content:" "}.ant-select-tree-checkbox-input{position:absolute;top:0;right:0;bottom:0;left:0;z-index:1;width:100%;height:100%;cursor:pointer;opacity:0}.ant-select-tree-checkbox-checked .ant-select-tree-checkbox-inner:after{position:absolute;display:table;border:2px solid #fff;border-top:0;border-left:0;transform:rotate(45deg) scale(1) translate(-50%,-50%);opacity:1;transition:all .2s cubic-bezier(.12,.4,.29,1.46) .1s;content:" "}.ant-select-tree-checkbox-checked .ant-select-tree-checkbox-inner{background-color:#1890ff;border-color:#1890ff}.ant-select-tree-checkbox-disabled{cursor:not-allowed}.ant-select-tree-checkbox-disabled.ant-select-tree-checkbox-checked .ant-select-tree-checkbox-inner:after{border-color:#00000040;-webkit-animation-name:none;animation-name:none}.ant-select-tree-checkbox-disabled .ant-select-tree-checkbox-input{cursor:not-allowed}.ant-select-tree-checkbox-disabled .ant-select-tree-checkbox-inner{background-color:#f5f5f5;border-color:#d9d9d9!important}.ant-select-tree-checkbox-disabled .ant-select-tree-checkbox-inner:after{border-color:#f5f5f5;border-collapse:separate;-webkit-animation-name:none;animation-name:none}.ant-select-tree-checkbox-disabled+span{color:#00000040;cursor:not-allowed}.ant-select-tree-checkbox-disabled:hover:after,.ant-select-tree-checkbox-wrapper:hover .ant-select-tree-checkbox-disabled:after{visibility:hidden}.ant-select-tree-checkbox-wrapper{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";display:inline-block;line-height:unset;cursor:pointer}.ant-select-tree-checkbox-wrapper.ant-select-tree-checkbox-wrapper-disabled{cursor:not-allowed}.ant-select-tree-checkbox-wrapper+.ant-select-tree-checkbox-wrapper{margin-left:8px}.ant-select-tree-checkbox+span{padding-right:8px;padding-left:8px}.ant-select-tree-checkbox-group{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";display:inline-block}.ant-select-tree-checkbox-group-item{display:inline-block;margin-right:8px}.ant-select-tree-checkbox-group-item:last-child{margin-right:0}.ant-select-tree-checkbox-group-item+.ant-select-tree-checkbox-group-item{margin-left:0}.ant-select-tree-checkbox-indeterminate .ant-select-tree-checkbox-inner{background-color:#fff;border-color:#d9d9d9}.ant-select-tree-checkbox-indeterminate .ant-select-tree-checkbox-inner:after{top:50%;left:50%;width:8px;height:8px;background-color:#1890ff;border:0;transform:translate(-50%,-50%) scale(1);opacity:1;content:" "}.ant-select-tree-checkbox-indeterminate.ant-select-tree-checkbox-disabled .ant-select-tree-checkbox-inner:after{background-color:#00000040;border-color:#00000040}.ant-select-tree{box-sizing:border-box;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";margin:-4px 0 0;padding:0 4px}.ant-select-tree li{margin:8px 0;padding:0;white-space:nowrap;list-style:none;outline:0}.ant-select-tree li.filter-node>span{font-weight:500}.ant-select-tree li ul{margin:0;padding:0 0 0 18px}.ant-select-tree li .ant-select-tree-node-content-wrapper{display:inline-block;width:calc(100% - 24px);margin:0;padding:3px 5px;color:#000000d9;text-decoration:none;border-radius:2px;cursor:pointer;transition:all .3s}.ant-select-tree li .ant-select-tree-node-content-wrapper:hover{background-color:#f5f5f5}.ant-select-tree li .ant-select-tree-node-content-wrapper.ant-select-tree-node-selected{background-color:#bae7ff}.ant-select-tree li span.ant-select-tree-checkbox{margin:0 4px 0 0}.ant-select-tree li span.ant-select-tree-checkbox+.ant-select-tree-node-content-wrapper{width:calc(100% - 46px)}.ant-select-tree li span.ant-select-tree-switcher,.ant-select-tree li span.ant-select-tree-iconEle{display:inline-block;width:24px;height:24px;margin:0;line-height:22px;text-align:center;vertical-align:middle;border:0 none;outline:none;cursor:pointer}.ant-select-tree li span.ant-select-icon_loading .ant-select-switcher-loading-icon{position:absolute;left:0;display:inline-block;color:#1890ff;font-size:14px;transform:none}.ant-select-tree li span.ant-select-icon_loading .ant-select-switcher-loading-icon svg{position:absolute;top:0;right:0;bottom:0;left:0;margin:auto}.ant-select-tree li span.ant-select-tree-switcher{position:relative}.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher-noop{cursor:auto}.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_open .ant-tree-switcher-icon,.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_open .ant-select-switcher-icon{font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0);display:inline-block;font-weight:700}:root .ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_open .ant-tree-switcher-icon,:root .ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_open .ant-select-switcher-icon{font-size:12px}.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_open .ant-tree-switcher-icon svg,.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_open .ant-select-switcher-icon svg{transition:transform .3s}.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_close .ant-tree-switcher-icon,.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_close .ant-select-switcher-icon{font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0);display:inline-block;font-weight:700}:root .ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_close .ant-tree-switcher-icon,:root .ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_close .ant-select-switcher-icon{font-size:12px}.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_close .ant-tree-switcher-icon svg,.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_close .ant-select-switcher-icon svg{transition:transform .3s}.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_close .ant-select-switcher-icon svg{transform:rotate(-90deg)}.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_open .ant-select-switcher-loading-icon,.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_close .ant-select-switcher-loading-icon{position:absolute;left:0;display:inline-block;width:24px;height:24px;color:#1890ff;font-size:14px;transform:none}.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_open .ant-select-switcher-loading-icon svg,.ant-select-tree li span.ant-select-tree-switcher.ant-select-tree-switcher_close .ant-select-switcher-loading-icon svg{position:absolute;top:0;right:0;bottom:0;left:0;margin:auto}.ant-select-tree .ant-select-tree-treenode-loading .ant-select-tree-iconEle,.ant-select-tree-child-tree{display:none}.ant-select-tree-child-tree-open{display:block}li.ant-select-tree-treenode-disabled>span:not(.ant-select-tree-switcher),li.ant-select-tree-treenode-disabled>.ant-select-tree-node-content-wrapper,li.ant-select-tree-treenode-disabled>.ant-select-tree-node-content-wrapper span{color:#00000040;cursor:not-allowed}li.ant-select-tree-treenode-disabled>.ant-select-tree-node-content-wrapper:hover{background:transparent}.ant-select-tree-icon__open,.ant-select-tree-icon__close{margin-right:2px;vertical-align:top}.ant-select-tree-dropdown{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum"}.ant-select-tree-dropdown .ant-select-dropdown-search{position:sticky;top:0;z-index:1;display:block;padding:4px;background:#fff}.ant-select-tree-dropdown .ant-select-dropdown-search .ant-select-search__field__wrap{width:100%}.ant-select-tree-dropdown .ant-select-dropdown-search .ant-select-search__field{box-sizing:border-box;width:100%;padding:4px 7px;border:1px solid #d9d9d9;border-radius:4px;outline:none}.ant-select-tree-dropdown .ant-select-dropdown-search.ant-select-search--hide{display:none}.ant-select-tree-dropdown .ant-select-not-found{display:block;padding:7px 16px;color:#00000040;cursor:not-allowed}@-webkit-keyframes antCheckboxEffect{0%{transform:scale(1);opacity:.5}to{transform:scale(1.6);opacity:0}}@keyframes antCheckboxEffect{0%{transform:scale(1);opacity:.5}to{transform:scale(1.6);opacity:0}}.ant-tree.ant-tree-directory{position:relative}.ant-tree.ant-tree-directory>li span.ant-tree-switcher,.ant-tree.ant-tree-directory .ant-tree-child-tree>li span.ant-tree-switcher{position:relative;z-index:1}.ant-tree.ant-tree-directory>li span.ant-tree-switcher.ant-tree-switcher-noop,.ant-tree.ant-tree-directory .ant-tree-child-tree>li span.ant-tree-switcher.ant-tree-switcher-noop{pointer-events:none}.ant-tree.ant-tree-directory>li span.ant-tree-checkbox,.ant-tree.ant-tree-directory .ant-tree-child-tree>li span.ant-tree-checkbox{position:relative;z-index:1}.ant-tree.ant-tree-directory>li span.ant-tree-node-content-wrapper,.ant-tree.ant-tree-directory .ant-tree-child-tree>li span.ant-tree-node-content-wrapper{border-radius:0;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}.ant-tree.ant-tree-directory>li span.ant-tree-node-content-wrapper:hover,.ant-tree.ant-tree-directory .ant-tree-child-tree>li span.ant-tree-node-content-wrapper:hover{background:transparent}.ant-tree.ant-tree-directory>li span.ant-tree-node-content-wrapper:hover:before,.ant-tree.ant-tree-directory .ant-tree-child-tree>li span.ant-tree-node-content-wrapper:hover:before{background:#f5f5f5}.ant-tree.ant-tree-directory>li span.ant-tree-node-content-wrapper.ant-tree-node-selected,.ant-tree.ant-tree-directory .ant-tree-child-tree>li span.ant-tree-node-content-wrapper.ant-tree-node-selected{color:#fff;background:transparent}.ant-tree.ant-tree-directory>li span.ant-tree-node-content-wrapper:before,.ant-tree.ant-tree-directory .ant-tree-child-tree>li span.ant-tree-node-content-wrapper:before{position:absolute;right:0;left:0;height:24px;transition:all .3s;content:""}.ant-tree.ant-tree-directory>li span.ant-tree-node-content-wrapper>span,.ant-tree.ant-tree-directory .ant-tree-child-tree>li span.ant-tree-node-content-wrapper>span{position:relative;z-index:1}.ant-tree.ant-tree-directory>li.ant-tree-treenode-selected>span.ant-tree-switcher,.ant-tree.ant-tree-directory .ant-tree-child-tree>li.ant-tree-treenode-selected>span.ant-tree-switcher{color:#fff}.ant-tree.ant-tree-directory>li.ant-tree-treenode-selected>span.ant-tree-checkbox .ant-tree-checkbox-inner,.ant-tree.ant-tree-directory .ant-tree-child-tree>li.ant-tree-treenode-selected>span.ant-tree-checkbox .ant-tree-checkbox-inner{border-color:#1890ff}.ant-tree.ant-tree-directory>li.ant-tree-treenode-selected>span.ant-tree-checkbox.ant-tree-checkbox-checked:after,.ant-tree.ant-tree-directory .ant-tree-child-tree>li.ant-tree-treenode-selected>span.ant-tree-checkbox.ant-tree-checkbox-checked:after{border-color:#fff}.ant-tree.ant-tree-directory>li.ant-tree-treenode-selected>span.ant-tree-checkbox.ant-tree-checkbox-checked .ant-tree-checkbox-inner,.ant-tree.ant-tree-directory .ant-tree-child-tree>li.ant-tree-treenode-selected>span.ant-tree-checkbox.ant-tree-checkbox-checked .ant-tree-checkbox-inner{background:#fff}.ant-tree.ant-tree-directory>li.ant-tree-treenode-selected>span.ant-tree-checkbox.ant-tree-checkbox-checked .ant-tree-checkbox-inner:after,.ant-tree.ant-tree-directory .ant-tree-child-tree>li.ant-tree-treenode-selected>span.ant-tree-checkbox.ant-tree-checkbox-checked .ant-tree-checkbox-inner:after{border-color:#1890ff}.ant-tree.ant-tree-directory>li.ant-tree-treenode-selected>span.ant-tree-node-content-wrapper:before,.ant-tree.ant-tree-directory .ant-tree-child-tree>li.ant-tree-treenode-selected>span.ant-tree-node-content-wrapper:before{background:#1890ff}.ant-tree-checkbox{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";position:relative;top:-.09em;display:inline-block;line-height:1;white-space:nowrap;vertical-align:middle;outline:none;cursor:pointer}.ant-tree-checkbox-wrapper:hover .ant-tree-checkbox-inner,.ant-tree-checkbox:hover .ant-tree-checkbox-inner,.ant-tree-checkbox-input:focus+.ant-tree-checkbox-inner{border-color:#1890ff}.ant-tree-checkbox-checked:after{position:absolute;top:0;left:0;width:100%;height:100%;border:1px solid #1890ff;border-radius:2px;visibility:hidden;-webkit-animation:antCheckboxEffect .36s ease-in-out;animation:antCheckboxEffect .36s ease-in-out;-webkit-animation-fill-mode:backwards;animation-fill-mode:backwards;content:""}.ant-tree-checkbox:hover:after,.ant-tree-checkbox-wrapper:hover .ant-tree-checkbox:after{visibility:visible}.ant-tree-checkbox-inner{position:relative;top:0;left:0;display:block;width:16px;height:16px;background-color:#fff;border:1px solid #d9d9d9;border-radius:2px;border-collapse:separate;transition:all .3s}.ant-tree-checkbox-inner:after{position:absolute;top:50%;left:22%;display:table;width:5.71428571px;height:9.14285714px;border:2px solid #fff;border-top:0;border-left:0;transform:rotate(45deg) scale(0) translate(-50%,-50%);opacity:0;transition:all .1s cubic-bezier(.71,-.46,.88,.6),opacity .1s;content:" "}.ant-tree-checkbox-input{position:absolute;top:0;right:0;bottom:0;left:0;z-index:1;width:100%;height:100%;cursor:pointer;opacity:0}.ant-tree-checkbox-checked .ant-tree-checkbox-inner:after{position:absolute;display:table;border:2px solid #fff;border-top:0;border-left:0;transform:rotate(45deg) scale(1) translate(-50%,-50%);opacity:1;transition:all .2s cubic-bezier(.12,.4,.29,1.46) .1s;content:" "}.ant-tree-checkbox-checked .ant-tree-checkbox-inner{background-color:#1890ff;border-color:#1890ff}.ant-tree-checkbox-disabled{cursor:not-allowed}.ant-tree-checkbox-disabled.ant-tree-checkbox-checked .ant-tree-checkbox-inner:after{border-color:#00000040;-webkit-animation-name:none;animation-name:none}.ant-tree-checkbox-disabled .ant-tree-checkbox-input{cursor:not-allowed}.ant-tree-checkbox-disabled .ant-tree-checkbox-inner{background-color:#f5f5f5;border-color:#d9d9d9!important}.ant-tree-checkbox-disabled .ant-tree-checkbox-inner:after{border-color:#f5f5f5;border-collapse:separate;-webkit-animation-name:none;animation-name:none}.ant-tree-checkbox-disabled+span{color:#00000040;cursor:not-allowed}.ant-tree-checkbox-disabled:hover:after,.ant-tree-checkbox-wrapper:hover .ant-tree-checkbox-disabled:after{visibility:hidden}.ant-tree-checkbox-wrapper{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";display:inline-block;line-height:unset;cursor:pointer}.ant-tree-checkbox-wrapper.ant-tree-checkbox-wrapper-disabled{cursor:not-allowed}.ant-tree-checkbox-wrapper+.ant-tree-checkbox-wrapper{margin-left:8px}.ant-tree-checkbox+span{padding-right:8px;padding-left:8px}.ant-tree-checkbox-group{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";display:inline-block}.ant-tree-checkbox-group-item{display:inline-block;margin-right:8px}.ant-tree-checkbox-group-item:last-child{margin-right:0}.ant-tree-checkbox-group-item+.ant-tree-checkbox-group-item{margin-left:0}.ant-tree-checkbox-indeterminate .ant-tree-checkbox-inner{background-color:#fff;border-color:#d9d9d9}.ant-tree-checkbox-indeterminate .ant-tree-checkbox-inner:after{top:50%;left:50%;width:8px;height:8px;background-color:#1890ff;border:0;transform:translate(-50%,-50%) scale(1);opacity:1;content:" "}.ant-tree-checkbox-indeterminate.ant-tree-checkbox-disabled .ant-tree-checkbox-inner:after{background-color:#00000040;border-color:#00000040}.ant-tree{box-sizing:border-box;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";margin:0;padding:0}.ant-tree-checkbox-checked:after{position:absolute;top:16.67%;left:0;width:100%;height:66.67%}.ant-tree ol,.ant-tree ul{margin:0;padding:0;list-style:none}.ant-tree li{margin:0;padding:4px 0;white-space:nowrap;list-style:none;outline:0}.ant-tree li span[draggable],.ant-tree li span[draggable=true]{line-height:20px;border-top:2px transparent solid;border-bottom:2px transparent solid;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-khtml-user-drag:element;-webkit-user-drag:element}.ant-tree li.drag-over>span[draggable]{color:#fff;background-color:#1890ff;opacity:.8}.ant-tree li.drag-over-gap-top>span[draggable]{border-top-color:#1890ff}.ant-tree li.drag-over-gap-bottom>span[draggable]{border-bottom-color:#1890ff}.ant-tree li.filter-node>span{color:#ff4d4f!important;font-weight:500!important}.ant-tree li.ant-tree-treenode-loading span.ant-tree-switcher.ant-tree-switcher_open .ant-tree-switcher-loading-icon,.ant-tree li.ant-tree-treenode-loading span.ant-tree-switcher.ant-tree-switcher_close .ant-tree-switcher-loading-icon{position:absolute;left:0;display:inline-block;width:24px;height:24px;color:#1890ff;font-size:14px;transform:none}.ant-tree li.ant-tree-treenode-loading span.ant-tree-switcher.ant-tree-switcher_open .ant-tree-switcher-loading-icon svg,.ant-tree li.ant-tree-treenode-loading span.ant-tree-switcher.ant-tree-switcher_close .ant-tree-switcher-loading-icon svg{position:absolute;top:0;right:0;bottom:0;left:0;margin:auto}:root .ant-tree li.ant-tree-treenode-loading span.ant-tree-switcher.ant-tree-switcher_open:after,:root .ant-tree li.ant-tree-treenode-loading span.ant-tree-switcher.ant-tree-switcher_close:after{opacity:0}.ant-tree li ul{margin:0;padding:0 0 0 18px}.ant-tree li .ant-tree-node-content-wrapper{display:inline-block;height:24px;margin:0;padding:0 5px;color:#000000d9;line-height:24px;text-decoration:none;vertical-align:top;border-radius:2px;cursor:pointer;transition:all .3s}.ant-tree li .ant-tree-node-content-wrapper:hover{background-color:#f5f5f5}.ant-tree li .ant-tree-node-content-wrapper.ant-tree-node-selected{background-color:#bae7ff}.ant-tree li span.ant-tree-checkbox{top:initial;height:24px;margin:0 4px 0 2px;padding:4px 0}.ant-tree li span.ant-tree-switcher,.ant-tree li span.ant-tree-iconEle{display:inline-block;width:24px;height:24px;margin:0;line-height:24px;text-align:center;vertical-align:top;border:0 none;outline:none;cursor:pointer}.ant-tree li span.ant-tree-iconEle:empty{display:none}.ant-tree li span.ant-tree-switcher{position:relative}.ant-tree li span.ant-tree-switcher.ant-tree-switcher-noop{cursor:default}.ant-tree li span.ant-tree-switcher.ant-tree-switcher_open .ant-tree-switcher-icon,.ant-tree li span.ant-tree-switcher.ant-tree-switcher_open .ant-select-switcher-icon{font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0);display:inline-block;font-weight:700}:root .ant-tree li span.ant-tree-switcher.ant-tree-switcher_open .ant-tree-switcher-icon,:root .ant-tree li span.ant-tree-switcher.ant-tree-switcher_open .ant-select-switcher-icon{font-size:12px}.ant-tree li span.ant-tree-switcher.ant-tree-switcher_open .ant-tree-switcher-icon svg,.ant-tree li span.ant-tree-switcher.ant-tree-switcher_open .ant-select-switcher-icon svg{transition:transform .3s}.ant-tree li span.ant-tree-switcher.ant-tree-switcher_close .ant-tree-switcher-icon,.ant-tree li span.ant-tree-switcher.ant-tree-switcher_close .ant-select-switcher-icon{font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0);display:inline-block;font-weight:700}:root .ant-tree li span.ant-tree-switcher.ant-tree-switcher_close .ant-tree-switcher-icon,:root .ant-tree li span.ant-tree-switcher.ant-tree-switcher_close .ant-select-switcher-icon{font-size:12px}.ant-tree li span.ant-tree-switcher.ant-tree-switcher_close .ant-tree-switcher-icon svg,.ant-tree li span.ant-tree-switcher.ant-tree-switcher_close .ant-select-switcher-icon svg{transition:transform .3s}.ant-tree li span.ant-tree-switcher.ant-tree-switcher_close .ant-tree-switcher-icon svg{transform:rotate(-90deg)}.ant-tree li:last-child>span.ant-tree-switcher:before,.ant-tree li:last-child>span.ant-tree-iconEle:before{display:none}.ant-tree>li:first-child{padding-top:7px}.ant-tree>li:last-child{padding-bottom:7px}.ant-tree-child-tree>li:first-child{padding-top:8px}.ant-tree-child-tree>li:last-child{padding-bottom:0}li.ant-tree-treenode-disabled>span:not(.ant-tree-switcher),li.ant-tree-treenode-disabled>.ant-tree-node-content-wrapper,li.ant-tree-treenode-disabled>.ant-tree-node-content-wrapper span{color:#00000040;cursor:not-allowed}li.ant-tree-treenode-disabled>.ant-tree-node-content-wrapper:hover{background:transparent}.ant-tree-icon__open,.ant-tree-icon__close{margin-right:2px;vertical-align:top}.ant-tree.ant-tree-show-line li{position:relative}.ant-tree.ant-tree-show-line li span.ant-tree-switcher{color:#00000073;background:#fff}.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher-noop .ant-tree-switcher-icon,.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher-noop .ant-select-switcher-icon{display:inline-block;font-weight:400;font-size:12px}.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher-noop .ant-tree-switcher-icon svg,.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher-noop .ant-select-switcher-icon svg{transition:transform .3s}.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher_open .ant-tree-switcher-icon,.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher_open .ant-select-switcher-icon{display:inline-block;font-weight:400;font-size:12px}.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher_open .ant-tree-switcher-icon svg,.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher_open .ant-select-switcher-icon svg{transition:transform .3s}.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher_close .ant-tree-switcher-icon,.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher_close .ant-select-switcher-icon{display:inline-block;font-weight:400;font-size:12px}.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher_close .ant-tree-switcher-icon svg,.ant-tree.ant-tree-show-line li span.ant-tree-switcher.ant-tree-switcher_close .ant-select-switcher-icon svg{transition:transform .3s}.ant-tree.ant-tree-show-line li:not(:last-child):before{position:absolute;left:12px;width:1px;height:100%;height:calc(100% - 22px);margin:22px 0 0;border-left:1px solid #d9d9d9;content:" "}.ant-tree.ant-tree-icon-hide .ant-tree-treenode-loading .ant-tree-iconEle{display:none}.ant-tree.ant-tree-block-node li .ant-tree-node-content-wrapper{width:calc(100% - 24px)}.ant-tree.ant-tree-block-node li span.ant-tree-checkbox+.ant-tree-node-content-wrapper{width:calc(100% - 46px)}.ant-typography{color:#000000d9;overflow-wrap:break-word}.ant-typography.ant-typography-secondary{color:#00000073}.ant-typography.ant-typography-success{color:#52c41a}.ant-typography.ant-typography-warning{color:#faad14}.ant-typography.ant-typography-danger{color:#ff4d4f}a.ant-typography.ant-typography-danger:active,a.ant-typography.ant-typography-danger:focus,a.ant-typography.ant-typography-danger:hover{color:#ff7875}.ant-typography.ant-typography-disabled{color:#00000040;cursor:not-allowed;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none}div.ant-typography,.ant-typography p{margin-bottom:1em}h1.ant-typography,.ant-typography h1{margin-bottom:.5em;color:#000000d9;font-weight:600;font-size:38px;line-height:1.23}h2.ant-typography,.ant-typography h2{margin-bottom:.5em;color:#000000d9;font-weight:600;font-size:30px;line-height:1.35}h3.ant-typography,.ant-typography h3{margin-bottom:.5em;color:#000000d9;font-weight:600;font-size:24px;line-height:1.35}h4.ant-typography,.ant-typography h4{margin-bottom:.5em;color:#000000d9;font-weight:600;font-size:20px;line-height:1.4}h5.ant-typography,.ant-typography h5{margin-bottom:.5em;color:#000000d9;font-weight:600;font-size:16px;line-height:1.5}.ant-typography+h1.ant-typography,.ant-typography+h2.ant-typography,.ant-typography+h3.ant-typography,.ant-typography+h4.ant-typography,.ant-typography+h5.ant-typography{margin-top:1.2em}.ant-typography div+h1,.ant-typography ul+h1,.ant-typography li+h1,.ant-typography p+h1,.ant-typography h1+h1,.ant-typography h2+h1,.ant-typography h3+h1,.ant-typography h4+h1,.ant-typography h5+h1,.ant-typography div+h2,.ant-typography ul+h2,.ant-typography li+h2,.ant-typography p+h2,.ant-typography h1+h2,.ant-typography h2+h2,.ant-typography h3+h2,.ant-typography h4+h2,.ant-typography h5+h2,.ant-typography div+h3,.ant-typography ul+h3,.ant-typography li+h3,.ant-typography p+h3,.ant-typography h1+h3,.ant-typography h2+h3,.ant-typography h3+h3,.ant-typography h4+h3,.ant-typography h5+h3,.ant-typography div+h4,.ant-typography ul+h4,.ant-typography li+h4,.ant-typography p+h4,.ant-typography h1+h4,.ant-typography h2+h4,.ant-typography h3+h4,.ant-typography h4+h4,.ant-typography h5+h4,.ant-typography div+h5,.ant-typography ul+h5,.ant-typography li+h5,.ant-typography p+h5,.ant-typography h1+h5,.ant-typography h2+h5,.ant-typography h3+h5,.ant-typography h4+h5,.ant-typography h5+h5{margin-top:1.2em}a.ant-typography-ellipsis,span.ant-typography-ellipsis{display:inline-block}a.ant-typography,.ant-typography a{color:#1890ff;outline:none;cursor:pointer;transition:color .3s;text-decoration:none}a.ant-typography:focus,.ant-typography a:focus,a.ant-typography:hover,.ant-typography a:hover{color:#40a9ff}a.ant-typography:active,.ant-typography a:active{color:#096dd9}a.ant-typography:active,.ant-typography a:active,a.ant-typography:hover,.ant-typography a:hover{text-decoration:none}a.ant-typography[disabled],.ant-typography a[disabled],a.ant-typography.ant-typography-disabled,.ant-typography a.ant-typography-disabled{color:#00000040;cursor:not-allowed}a.ant-typography[disabled]:active,.ant-typography a[disabled]:active,a.ant-typography.ant-typography-disabled:active,.ant-typography a.ant-typography-disabled:active,a.ant-typography[disabled]:hover,.ant-typography a[disabled]:hover,a.ant-typography.ant-typography-disabled:hover,.ant-typography a.ant-typography-disabled:hover{color:#00000040}a.ant-typography[disabled]:active,.ant-typography a[disabled]:active,a.ant-typography.ant-typography-disabled:active,.ant-typography a.ant-typography-disabled:active{pointer-events:none}.ant-typography code{margin:0 .2em;padding:.2em .4em .1em;font-size:85%;background:rgba(150,150,150,.1);border:1px solid rgba(100,100,100,.2);border-radius:3px}.ant-typography kbd{margin:0 .2em;padding:.15em .4em .1em;font-size:90%;background:rgba(150,150,150,.06);border:1px solid rgba(100,100,100,.2);border-bottom-width:2px;border-radius:3px}.ant-typography mark{padding:0;background-color:#ffe58f}.ant-typography u,.ant-typography ins{text-decoration:underline;-webkit-text-decoration-skip:ink;text-decoration-skip-ink:auto}.ant-typography s,.ant-typography del{text-decoration:line-through}.ant-typography strong{font-weight:600}.ant-typography-expand,.ant-typography-edit,.ant-typography-copy{color:#1890ff;text-decoration:none;outline:none;cursor:pointer;transition:color .3s;margin-left:4px}.ant-typography-expand:focus,.ant-typography-edit:focus,.ant-typography-copy:focus,.ant-typography-expand:hover,.ant-typography-edit:hover,.ant-typography-copy:hover{color:#40a9ff}.ant-typography-expand:active,.ant-typography-edit:active,.ant-typography-copy:active{color:#096dd9}.ant-typography-copy-success,.ant-typography-copy-success:hover,.ant-typography-copy-success:focus{color:#52c41a}.ant-typography-edit-content{position:relative}div.ant-typography-edit-content{left:-12px;margin-top:-5px;margin-bottom:calc(1em - 5px)}.ant-typography-edit-content-confirm{position:absolute;right:10px;bottom:8px;color:#00000073;pointer-events:none}.ant-typography-edit-content textarea{-moz-transition:none}.ant-typography ul,.ant-typography ol{margin:0 0 1em;padding:0}.ant-typography ul li,.ant-typography ol li{margin:0 0 0 20px;padding:0 0 0 4px}.ant-typography ul{list-style-type:circle}.ant-typography ul ul{list-style-type:disc}.ant-typography ol{list-style-type:decimal}.ant-typography pre,.ant-typography blockquote{margin:1em 0}.ant-typography pre{padding:.4em .6em;white-space:pre-wrap;word-wrap:break-word;background:rgba(150,150,150,.1);border:1px solid rgba(100,100,100,.2);border-radius:3px}.ant-typography pre code{display:inline;margin:0;padding:0;font-size:inherit;font-family:inherit;background:transparent;border:0}.ant-typography blockquote{padding:0 0 0 .6em;border-left:4px solid rgba(100,100,100,.2);opacity:.85}.ant-typography-single-line{white-space:nowrap}.ant-typography-ellipsis-single-line{overflow:hidden;text-overflow:ellipsis}a.ant-typography-ellipsis-single-line,span.ant-typography-ellipsis-single-line{vertical-align:bottom}.ant-typography-ellipsis-multiple-line{display:-webkit-box;overflow:hidden;-webkit-line-clamp:3;-webkit-box-orient:vertical}.ant-typography-rtl{direction:rtl}.ant-typography-rtl .ant-typography-expand,.ant-typography-rtl .ant-typography-edit,.ant-typography-rtl .ant-typography-copy{margin-right:4px;margin-left:0}.ant-typography-rtl .ant-typography-expand{float:left}div.ant-typography-edit-content.ant-typography-rtl{right:-12px;left:auto}.ant-typography-rtl .ant-typography-edit-content-confirm{right:auto;left:10px}.ant-typography-rtl.ant-typography ul li,.ant-typography-rtl.ant-typography ol li{margin:0 20px 0 0;padding:0 4px 0 0}.ant-upload{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum";outline:0}.ant-upload p{margin:0}.ant-upload-btn{display:block;width:100%;outline:none}.ant-upload input[type=file]{cursor:pointer}.ant-upload.ant-upload-select{display:inline-block}.ant-upload.ant-upload-disabled{cursor:not-allowed}.ant-upload.ant-upload-select-picture-card{display:table;float:left;width:104px;height:104px;margin-right:8px;margin-bottom:8px;text-align:center;vertical-align:top;background-color:#fafafa;border:1px dashed #d9d9d9;border-radius:2px;cursor:pointer;transition:border-color .3s ease}.ant-upload.ant-upload-select-picture-card>.ant-upload{display:table-cell;width:100%;height:100%;padding:8px;text-align:center;vertical-align:middle}.ant-upload.ant-upload-select-picture-card:hover{border-color:#1890ff}.ant-upload.ant-upload-drag{position:relative;width:100%;height:100%;text-align:center;background:#fafafa;border:1px dashed #d9d9d9;border-radius:2px;cursor:pointer;transition:border-color .3s}.ant-upload.ant-upload-drag .ant-upload{padding:16px 0}.ant-upload.ant-upload-drag.ant-upload-drag-hover:not(.ant-upload-disabled){border-color:#096dd9}.ant-upload.ant-upload-drag.ant-upload-disabled{cursor:not-allowed}.ant-upload.ant-upload-drag .ant-upload-btn{display:table;height:100%}.ant-upload.ant-upload-drag .ant-upload-drag-container{display:table-cell;vertical-align:middle}.ant-upload.ant-upload-drag:not(.ant-upload-disabled):hover{border-color:#40a9ff}.ant-upload.ant-upload-drag p.ant-upload-drag-icon{margin-bottom:20px}.ant-upload.ant-upload-drag p.ant-upload-drag-icon .anticon{color:#40a9ff;font-size:48px}.ant-upload.ant-upload-drag p.ant-upload-text{margin:0 0 4px;color:#000000d9;font-size:16px}.ant-upload.ant-upload-drag p.ant-upload-hint{color:#00000073;font-size:14px}.ant-upload.ant-upload-drag .anticon-plus{color:#00000040;font-size:30px;transition:all .3s}.ant-upload.ant-upload-drag .anticon-plus:hover,.ant-upload.ant-upload-drag:hover .anticon-plus{color:#00000073}.ant-upload-picture-card-wrapper{display:inline-block;width:100%}.ant-upload-picture-card-wrapper:before,.ant-upload-picture-card-wrapper:after{display:table;content:""}.ant-upload-picture-card-wrapper:after{clear:both}.ant-upload-list{box-sizing:border-box;margin:0;padding:0;color:#000000d9;font-size:14px;font-variant:tabular-nums;line-height:1.5715;list-style:none;font-feature-settings:"tnum"}.ant-upload-list:before,.ant-upload-list:after{display:table;content:""}.ant-upload-list:after{clear:both}.ant-upload-list-item-list-type-text:hover .ant-upload-list-item-name-icon-count-1{padding-right:14px}.ant-upload-list-item-list-type-text:hover .ant-upload-list-item-name-icon-count-2{padding-right:28px}.ant-upload-list-item{position:relative;height:22px;margin-top:8px;font-size:14px}.ant-upload-list-item-name{display:inline-block;width:100%;padding-left:22px;overflow:hidden;white-space:nowrap;text-overflow:ellipsis}.ant-upload-list-item-name-icon-count-1{padding-right:14px}.ant-upload-list-item-card-actions{position:absolute;right:0;opacity:0}.ant-upload-list-item-card-actions.picture{top:25px;line-height:1;opacity:1}.ant-upload-list-item-card-actions .anticon{padding-right:6px;color:#00000073}.ant-upload-list-item-info{height:100%;padding:0 12px 0 4px;transition:background-color .3s}.ant-upload-list-item-info>span{display:block;width:100%;height:100%}.ant-upload-list-item-info .anticon-loading,.ant-upload-list-item-info .anticon-paper-clip{position:absolute;top:5px;color:#00000073;font-size:14px}.ant-upload-list-item .anticon-close{display:inline-block;font-size:12px;font-size:10px \	;transform:scale(.83333333) rotate(0);position:absolute;top:6px;right:4px;color:#00000073;line-height:0;cursor:pointer;opacity:0;transition:all .3s}:root .ant-upload-list-item .anticon-close{font-size:12px}.ant-upload-list-item .anticon-close:hover{color:#000000d9}.ant-upload-list-item:hover .ant-upload-list-item-info{background-color:#f5f5f5}.ant-upload-list-item:hover .anticon-close,.ant-upload-list-item:hover .ant-upload-list-item-card-actions{opacity:1}.ant-upload-list-item-error,.ant-upload-list-item-error .anticon-paper-clip,.ant-upload-list-item-error .ant-upload-list-item-name{color:#ff4d4f}.ant-upload-list-item-error .ant-upload-list-item-card-actions{opacity:1}.ant-upload-list-item-error .ant-upload-list-item-card-actions .anticon{color:#ff4d4f}.ant-upload-list-item-progress{position:absolute;bottom:-12px;width:100%;padding-left:26px;font-size:14px;line-height:0}.ant-upload-list-picture .ant-upload-list-item,.ant-upload-list-picture-card .ant-upload-list-item{position:relative;height:66px;padding:8px;border:1px solid #d9d9d9;border-radius:2px}.ant-upload-list-picture .ant-upload-list-item:hover,.ant-upload-list-picture-card .ant-upload-list-item:hover{background:transparent}.ant-upload-list-picture .ant-upload-list-item-error,.ant-upload-list-picture-card .ant-upload-list-item-error{border-color:#ff4d4f}.ant-upload-list-picture .ant-upload-list-item-info,.ant-upload-list-picture-card .ant-upload-list-item-info{padding:0}.ant-upload-list-picture .ant-upload-list-item:hover .ant-upload-list-item-info,.ant-upload-list-picture-card .ant-upload-list-item:hover .ant-upload-list-item-info{background:transparent}.ant-upload-list-picture .ant-upload-list-item-uploading,.ant-upload-list-picture-card .ant-upload-list-item-uploading{border-style:dashed}.ant-upload-list-picture .ant-upload-list-item-thumbnail,.ant-upload-list-picture-card .ant-upload-list-item-thumbnail{position:absolute;top:8px;left:8px;width:48px;height:48px;font-size:26px;line-height:54px;text-align:center;opacity:.8}.ant-upload-list-picture .ant-upload-list-item-icon,.ant-upload-list-picture-card .ant-upload-list-item-icon{position:absolute;top:50%;left:50%;font-size:26px;transform:translate(-50%,-50%)}.ant-upload-list-picture .ant-upload-list-item-image,.ant-upload-list-picture-card .ant-upload-list-item-image{max-width:100%}.ant-upload-list-picture .ant-upload-list-item-thumbnail img,.ant-upload-list-picture-card .ant-upload-list-item-thumbnail img{display:block;width:48px;height:48px;overflow:hidden}.ant-upload-list-picture .ant-upload-list-item-name,.ant-upload-list-picture-card .ant-upload-list-item-name{display:inline-block;box-sizing:border-box;max-width:100%;margin:0 0 0 8px;padding-right:8px;padding-left:48px;overflow:hidden;line-height:44px;white-space:nowrap;text-overflow:ellipsis;transition:all .3s}.ant-upload-list-picture .ant-upload-list-item-name-icon-count-1,.ant-upload-list-picture-card .ant-upload-list-item-name-icon-count-1{padding-right:18px}.ant-upload-list-picture .ant-upload-list-item-name-icon-count-2,.ant-upload-list-picture-card .ant-upload-list-item-name-icon-count-2{padding-right:36px}.ant-upload-list-picture .ant-upload-list-item-uploading .ant-upload-list-item-name,.ant-upload-list-picture-card .ant-upload-list-item-uploading .ant-upload-list-item-name{line-height:28px}.ant-upload-list-picture .ant-upload-list-item-progress,.ant-upload-list-picture-card .ant-upload-list-item-progress{bottom:14px;width:calc(100% - 24px);margin-top:0;padding-left:56px}.ant-upload-list-picture .anticon-close,.ant-upload-list-picture-card .anticon-close{position:absolute;top:8px;right:8px;line-height:1;opacity:1}.ant-upload-list-picture-card.ant-upload-list:after{display:none}.ant-upload-list-picture-card-container,.ant-upload-list-picture-card .ant-upload-list-item{float:left;width:104px;height:104px;margin:0 8px 8px 0}.ant-upload-list-picture-card .ant-upload-list-item-info{position:relative;height:100%;overflow:hidden}.ant-upload-list-picture-card .ant-upload-list-item-info:before{position:absolute;z-index:1;width:100%;height:100%;background-color:#00000080;opacity:0;transition:all .3s;content:" "}.ant-upload-list-picture-card .ant-upload-list-item:hover .ant-upload-list-item-info:before{opacity:1}.ant-upload-list-picture-card .ant-upload-list-item-actions{position:absolute;top:50%;left:50%;z-index:10;white-space:nowrap;transform:translate(-50%,-50%);opacity:0;transition:all .3s}.ant-upload-list-picture-card .ant-upload-list-item-actions .anticon-eye,.ant-upload-list-picture-card .ant-upload-list-item-actions .anticon-download,.ant-upload-list-picture-card .ant-upload-list-item-actions .anticon-delete{z-index:10;width:16px;margin:0 4px;color:#ffffffd9;font-size:16px;cursor:pointer;transition:all .3s}.ant-upload-list-picture-card .ant-upload-list-item-actions .anticon-eye:hover,.ant-upload-list-picture-card .ant-upload-list-item-actions .anticon-download:hover,.ant-upload-list-picture-card .ant-upload-list-item-actions .anticon-delete:hover{color:#fff}.ant-upload-list-picture-card .ant-upload-list-item-info:hover+.ant-upload-list-item-actions,.ant-upload-list-picture-card .ant-upload-list-item-actions:hover{opacity:1}.ant-upload-list-picture-card .ant-upload-list-item-thumbnail,.ant-upload-list-picture-card .ant-upload-list-item-thumbnail img{position:static;display:block;width:100%;height:100%;-o-object-fit:cover;object-fit:cover}.ant-upload-list-picture-card .ant-upload-list-item-name{display:none;margin:8px 0 0;padding:0;line-height:1.5715;text-align:center}.ant-upload-list-picture-card .anticon-picture+.ant-upload-list-item-name{position:absolute;bottom:10px;display:block}.ant-upload-list-picture-card .ant-upload-list-item-uploading.ant-upload-list-item{background-color:#fafafa}.ant-upload-list-picture-card .ant-upload-list-item-uploading .ant-upload-list-item-info{height:auto}.ant-upload-list-picture-card .ant-upload-list-item-uploading .ant-upload-list-item-info:before,.ant-upload-list-picture-card .ant-upload-list-item-uploading .ant-upload-list-item-info .anticon-eye-o,.ant-upload-list-picture-card .ant-upload-list-item-uploading .ant-upload-list-item-info .anticon-delete{display:none}.ant-upload-list-picture-card .ant-upload-list-item-uploading-text{margin-top:18px;color:#00000073}.ant-upload-list-picture-card .ant-upload-list-item-progress{bottom:32px;padding-left:0}.ant-upload-list .ant-upload-success-icon{color:#52c41a;font-weight:700}.ant-upload-list .ant-upload-animate-enter,.ant-upload-list .ant-upload-animate-leave,.ant-upload-list .ant-upload-animate-inline-enter,.ant-upload-list .ant-upload-animate-inline-leave{-webkit-animation-duration:.3s;animation-duration:.3s;-webkit-animation-fill-mode:cubic-bezier(.78,.14,.15,.86);animation-fill-mode:cubic-bezier(.78,.14,.15,.86)}.ant-upload-list .ant-upload-animate-enter{-webkit-animation-name:uploadAnimateIn;animation-name:uploadAnimateIn}.ant-upload-list .ant-upload-animate-leave{-webkit-animation-name:uploadAnimateOut;animation-name:uploadAnimateOut}.ant-upload-list .ant-upload-animate-inline-enter{-webkit-animation-name:uploadAnimateInlineIn;animation-name:uploadAnimateInlineIn}.ant-upload-list .ant-upload-animate-inline-leave{-webkit-animation-name:uploadAnimateInlineOut;animation-name:uploadAnimateInlineOut}@-webkit-keyframes uploadAnimateIn{0%{height:0;margin:0;padding:0;opacity:0}}@keyframes uploadAnimateIn{0%{height:0;margin:0;padding:0;opacity:0}}@-webkit-keyframes uploadAnimateOut{to{height:0;margin:0;padding:0;opacity:0}}@keyframes uploadAnimateOut{to{height:0;margin:0;padding:0;opacity:0}}@-webkit-keyframes uploadAnimateInlineIn{0%{width:0;height:0;margin:0;padding:0;opacity:0}}@keyframes uploadAnimateInlineIn{0%{width:0;height:0;margin:0;padding:0;opacity:0}}@-webkit-keyframes uploadAnimateInlineOut{to{width:0;height:0;margin:0;padding:0;opacity:0}}@keyframes uploadAnimateInlineOut{to{width:0;height:0;margin:0;padding:0;opacity:0}}.voice_chat[data-v-1c392dd0]{width:1200px;height:410px;background:#FFFFFF;position:relative}.voice_chat .voice_chat_wrapper[data-v-1c392dd0]{top:50%;left:50%;transform:translate(-50%,-50%);position:absolute}.voice_chat .voice_chat_wrapper .voice_chat_btn[data-v-1c392dd0]{width:116px;height:116px;margin-left:54px;border-radius:50%;cursor:pointer;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHQAAAB0CAYAAABUmhYnAAAABmJLR0QA/wD/AP+gvaeTAAAQMElEQVR4nO2de3xU5ZnHv8+ZyYUkJIGEgNwSLpKEKJgMwmqpBlArXW3tVv1Q78qnunbFC9XWlbqrtVpFilY+2lqrYFFWrVK3VUHFLO3qtkguiIaEi5gJF5UQLrmRkJnz7B+TBEJIyFzPGZjvXzNz3ssv+c175r0873uEKKagoDJeEpLHeSEPlQkqmivIKJAU0GSBZIVBQHJHlmaB/QrNIM2gTSC1qG5BdIsDqrWt+fPKyoLDVv5dwSBWC/CHCZN3jHAY5kwVnSHIdGAM4AxxNR5Et6vykYGUeFVKNleM3h3iOsKGrQ11uUrjWs2s2aahs1FmALkWSdmMaomKrEqWutVlZVPaLdJxQmxp6MTJ7gJ1cq0qNwpkWa3nGPYL/FGE5ZVl2R9aLeZYbGPoWNfnaQnqvAW4Aci3WE5/qQKWtYnn2e1l4w5aLQZsYOgE1+5Mp7bfpnA7vg5MNNKoylIx9NGqspwvrRRimaFnTNs+1Otx3IUyD0iySkeIaVbleY/X8fi2jSN3WiEg4oa6XKVxLQz5EcrDHBlOnGwcEljYkmo+WrN2TGskK46oobmumhmGytNEz29ksGwzMOZVlo9aHakKI2LohMk7Rjgc5mLgykjUZ0NeNVXmR2I8G3ZD84rclwgsAzLCXZfNOaiqc6srct4IZyVhM7S4WJ17Gmp/pnA/YISrnihDVVniONx0T7imF8Ni6Bln7xrl9XpeAc4NR/nRjsJ608GcLeuzt4e67JAbml9Yex7onxAGh7rskwpln4hctql89P+GstiQ3goLCmu/g+jqmJn9QBis6PsFhe7LQ1lsyAzNc7mvN0XfAAaEqsxTgARTeCWvyH1zqAoMiaH5Lvcdoiwl9EtZpwIOgd9OLHI/EIrCgjY0v6h2AcqT2GBeOIoRhf+cWFR7X9AFBZM5v9B9HcKyYMuJ0YUienNVWc7vAy0gYCPyityXCqwkdpsNNV5Fr6wuz1kZSOaADM0rrDlHRNZw8qyS2I1DgnwrkCGN34bmF27PRhwVRO/aZbRQb4inqLJsXK0/mfzqFLlcpXGIYwUxMyNBhqmO1woKKuP9yeSXoYfIfJTYdF4EkWlmQspDfuXob8L8KTXfxpS3/MkTIySooXJZZcXoP/cncb/M6YiH3Rib0rOMvYhO6k+8Ur9uuQ6HuThmpqVkimks6k/CE7bQ/CnuCzF5L3hNMYJFlAs2VWR/0Geavi4WFFTGmwkpG7EuYj1Gd7a2Nxw+c9u209t6S9DnLdebmHIvMTPtxOnOtPj5fSXotYV2BHZtJbYcZjdaEB3fWwep1xbqcJh3EzPTjiSpGr220uO20LypOzPE460BUsKlKkZQNHslLmdL2fC9x144fgv1eOYTM9POJDvN9nnHu9CjhXbsAqsB0sOtKlykpxlcODOJ4vMGkD3KSUaGg9ZWZU+dl49LW3m/pIWNn0XtJu1ODiaYZs6GDWMOHP1hD0Pzi9w/AR6LmKwQEh8v3HjtQOZen0pKct9zJv+3rpWFi/ezZZtt9+6eEBG9Z1NZTrcJh56GutyfoRRETlZoGDzYwVOLMimanND12c5dHso3tFG/z0tiosHYMU5cZyXgdPr+7NZW5T8e2sdbq5utkh0sm6rKs7t51c3Qgik7ppqmuS6ymoInOclgxbKhnD4uDoBPPm3jV08doLS85/g7LdVg7vWpXHfVQOLjBVX48b/vZfX7LZGWHRpEXVVlOeWdb7vdl9TUayOvKHgefmBwl5krXmvkmrlfH9dMgIMNJouXHOCmf93D/gMmIvDIgxmMGxsXScmhQ6WbZ12GulylcYrOibyi4Jg2JZGLZvkiYd55t4WHF+7H6z1xvvJP2pj34zra25XEBOGnd0Xnmr3CVS5Xade3scvQVjNrNpBpiaoguGVuKuBreQ89ug/V/uct39DGy682ATD93ERyJ/gVHGALBLJayLyo832Xoaahs62RFDjpaQZnuxIB+OPKJg42mH6X8fyLDXg7sl18QXTGvIkaF3e+Puo3VGdaISYYJp+ZgMPhe13y10MBlVG/z8vGT9s6you+FgqgR3lnAOQW1g5HZYJ1kgIja4ij6/UXNYGPJ2tqPQCMGB61IcYTcwtrh0OHoQY6y1o9gZGWduQG09jk/+22k4MHfXkHDozefclimMXQYaiKzrBUTYCEKlpN8aMnZVPElBnQYagI37BWToxgEWE6gFFQUBmPylirBcUIDoXxLldpnEF8ynhiG45OBpxNMnisYYrGYoZOEpxeZ66hIjFDTxJMMXMNQ2NRfScNYuQaqoywWkeMEKGabSAy0GodMUJGhoGorQ2dc3kKTy7MZGiW48SJw8SFM5N4evEQJoy3+5qpZBiovaP77r5jEBfNSuKyS607Wvf2W9OYcf4ArrrS1t99QAcaKLZW2dTsm2cdadHEuWEcmbTv1GJjEgzE3i20dodvJWTMmJ63O4/nyOv4+MBnduM7gsba2nrO6Q4b6iQx0Xe9dqenx3WbkWD75YVN1b742Ym58T1M+/LrI//go5fS/GV4Rwusq+sZuzLpjCNrpFXV9o/lNVCarBbRF+tKfUe2JyZKjwXoL9xHDJ3aEbngL06nUNgR+vn5Fz3XVM+Z5iu3sdFkU5XtDW0zEBqtVtEXpWVtXUFf50ztbtqWrYfZuctn6r98N7BO06ziAaR3rKv+z996Rj101rm+rK0rVMXGtBmIvVtoY5PJJ5/5QkQumZ2McXTQjMLKP/uCpM+alMAlF/tnalKSMH+eb8fHnjovf/uwu6FFZyUwcoTvdvzh3wMLcYkwDQYqtm6hAG+86fvOjRzh5Jvndt/h+OLLDXy9x9eEH/zZYM4s6F9cUFyc8PjDmYwa6TNsyW8P0npMp2jOFb7+Ymur8s57URGIXW+gantDV73b0hXRd/Wc7qOsQ4eUe++vx+NRBgwQXvzdUL73ne4t+VhGjXSy9NksZpzn+3K890ELK/+7+40qa4iDi2b6ogDfXt1MQwARhRFHqXdkDb9rJnCW1Vr6wuOFQekGhZMTGD3KyYaNbew4agixa7eHHbu8FH9zAAkJwqziJGYVDyApycA0FREYOtTJ2UWJ/PDGVB64b3DX2HLd+lbm37uX9mP6Qwt+OpiC/HhU4f6H9rG3vh/R21ZjyFrJc7nvFeWXVms5EelpBqvfHE5qqsH2mnauuOYrDh3qfossOiuBhb/IYPhpJ56E8HrhpVcaWfTrnpH2U12JLH02CxF4e3UL9yzosa/WpuhPHEOGz88EbL8ForVNaTmknDd9AIPSHQzLcvLB2u4dlS+/8vLK600cOGgyZIiTzIyeY9PGRpN317Rwz4J63lrV3CPSPmOwg+d/k0VyskFTs8ntd9fR1BwdQWSKLJEzimryvcgmq8X0BxF45skhnD/d99v33LIGnlhyoNf0pw1zkpPtZEimg8Ymk7o6L9Vb2vF4jm/QoHSDF36T1bUl4p4Fe3l7dVR0hgBQryNXOs4iaiZK4orS0wxeemEoY3N8U4Gvvt7Eo7/aT9vh4FrR2Jw4nlyYyfiOXWwvvtzIY4v3B603grQnSV2yAOS7ajZHU+R81hAHf3huKKNH+b6D1ZsP88ii/b1uIeyL+HhhzuUp3H5rOklJvqnF11Y28eAj/m18shqB6k3l2fkCkFdU84IgN1otyh/S0wwW/TKTc6cdmT36x8etvPlWMyVrD51wZSQn28m3L0rm+5clc9ow3xfDa8ITSw6wdHlDVJkJgPL7qorsH/paaKH7WoQ/WK3JXxwG/ODKgdx2SxqpqUcGnl4Tamraqd7SzoEDXhqaTOKcQupAg9OGOcjPi+/RYaqsOszDC/ezYaP/rdwOqOjV1WU5KwR8m5UM0V1WiwqUQekGV34/hSu+l9KvIcvRfFzWyorXmlhT0oIZBXMHvaCIjqgqy/myaz0qv8hdTZSf62cYkJ8bz1RXIgUT4xkx3MHYnLiuTUi7dnvYucuDe4eHik/a+Pu6VvYcZ8ksCuk6POPI11m1hCiP0TVN362z8qhlrptvSuXOf/NNwF94adifx2oJgpR0vu764VGRVdbIiREsaphd3nUZOix19CqUr62RFCNQFPYk6d73O993Gbp2rXjU4DVrZMUIFEFfLiub0rW00K1L6PDKctPQ4x4KaCdmFQ/wrYv2Iy5sYu6R9dEHFvTv2PzWVuXlVxrZscv2QWGIwfJu749NkF/k/hQ4I2KK/MTpFMo/Gtl1vFu4eOfdFu6+z/arLD2OhjveoG05Nj680eNR/vSXZv5pamBBYf3B9MK7a+w/KS+iS3t8duwH46dtTY1rj3cTxcerniIc93jVHoEa29ad3qDoM5HTFSMQRPn1sWZCbydaO52Lwd7RgKc4zR4jbsnxLhzX0OqPR9aDPhdeTTGC4JnjnTcPfTwVwlRjERAVwainGM2Gw1zc28VeDd1cMXo3orYPHjvVUOEXlevHfNXb9T43KxmtzY8JVIdeVoyAEN3iOXj4ib6S9GloZWXBYVVsP3N0qmAot/b13DPox+Mmqyqy10BsjtdqRGVFZXlOyYnS9Wt/qKlyF1AftKoYgbJXDfPu/iTsl6GbK0bvxtDr4CQ4tjL6UENlbn+e7gt+PFS9qjTnHVHptbscI0wIC/v7/G3ww1CArLRR94rykf+qYgSGrkui7n5/cvi9BpVfuD0bHOWxZ3KHnXqHw1n42foRO/zJ5PehGVUVY92I+c9A1D5fKgo4ZAiX+WsmBGAoQFX5mH8o/ACw/5J+9OFV9JrKsuwPA8kc8LE21eXZf0G5iVjPN5QoqrdUl+esDLSAoM4pqqrIXi7Iz4IpI8YRVFhQVZHzfDBlhCQwJ9/lvgPliVCVdwqiAj/fVJ79QLAFhcyA/EL3dQjPEyX7TG2EV+HW6vLskKw/h7RF5RW5LxV4FRhwwsQxANpU9erqipw3QlVgyG+RBS73dFN5E8gIddknGfWmwXc3l2aHdKIm5Ic3VpZlf+hwOAtjM0p9oHxsevTsUJsJYTAU4LP1I3ZkpY0uFngQiN5dl6FHVXkqyaibvnljzhfhqCDsvdL8QvcFwEsIQ8Ndl83Zi6HXV5XmvBPOSsJ+Xm5VRfYaDC1E9L/CXZddEZUViE4Kt5kQ4XFjXuGO88Uwn0YpOHHqk4KtKnJbddno9yJVYURPtK6uGPXXJOoKEe7k5A7kbhF4sL3h8JmRNBMsnNmZNGlblscZ9yOFO4E0q3SEmCZVXlDksc0Voy3Z/2/5VF3e1J0Zhsc7T5V5UbzG2ojwjDocj/t2HViH5YZ2Mn7a1tR4T9zNitwQNb+xQqWgyw4723+3bd3pDVbLARsZejQTJ7sL1Mm1mNxgu+GOsk+E10VYHuiaZTixpaGdFBer8+tG97dQYzboTCDfIimbBClRw1w1NCX7vbVrxbYL+7Y29FjyXTWniTJTxZghqt9QGAeE+oFk7QKfq8hHilniMLSkr70kdiOqDD0Wl6s0rkkGj3V4nHlqaC4iEzB1NAZDUDKAFHyGdz49qgloB5oQ6jGpw5Ba1NwsprHF6/RUp+i+7UefKhJt/D+IYnHePFNWNgAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:116px 116px;margin-bottom:17px}.voice_chat .voice_chat_wrapper .voice_chat_btn[data-v-1c392dd0]:hover{width:116px;height:116px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHQAAAB0CAYAAABUmhYnAAAABmJLR0QA/wD/AP+gvaeTAAAP8UlEQVR4nO2de3hU5Z3HP79zZjLJTC6EIFRYQO4gaOuNeqsVdLXU6urzaJddvFW3cb2gJnENSPvstKsmRAm2VNfauvIs1npF65WqxaqAt/WGV1BAUMFaEiDJTJKZOee3f5zcE0LmlpmJ83mePJPJnPO+v8n3/N7zvr/3955XyGD8fs3ZG2yaJIY5XbCniso0hLEK+YCv7ae47RUgAOxpew0INCnsAN2sGJvVtj4e5s3f4vdLKDXfKH4k1QZEQ9mS4BjDtudiyxxb9ERBJgCuBFcTAd0qKusVXetSWVtT49uZ4DqSRloLWlqq7vyS4DxR5oHMUXRaKuwQZJOKrlWbZwJ7vGvuukvCqbBjIKSloGWVjTNFzAtAfwKMTLU9Pdij8JChxqplS/PWpdqYnqSNoJWV9UURI+cyVbkYmJFqewbIRyK60mWHfrt06fB9qTYG0kDQioqGEeo2rwKuxunAZCDSCPY9tiHVt93s25VSS1JV8TU3NI0ybaMMdCHgTZUdCSYAerfLNm6pqfF+kQoDBl3Q0lJ1FxQ3X6GiN9E5nBhqNAM1hbnear9fWgaz4kEVtPyGxjnYxu1kzj0yXj5VdOHy6vw1g1XhoAjaNn6sVZUfD0Z96Yc+4LKlfDDGs0kXtHxx4EeirFQoSXZdac4+kEtrq72PJLOSpAnq96uroSX4M+DngJGsejIMBV1RmOv7j2SFF5MiaPmS4FgsvR84PhnlDwHeUMz5y6tztya64IQLWlHZfJIa9qMowxNd9pBCqFeMs5dX5b2cyGIT2hSWLw6cpWKvyYo5AJThovZzFYuD5yay2IQJWra46SKUR4C8RJX5DcCjqvdXLAqUJqrAhAhaURm8RlTuIfFTWd8ETIU7yxcF/IkoLG5ByxcFlqjobaRBXDiDEeA/KyoDNySioJgpr2y6EJGV8ZaTpQMVKF1W7ft9rAXELERZZeBMEVaTbWYTjSUiP15W5V0dy8kxCXrt4uBxhurzDJ1ZknSjWcU4PZYhTdSCVixpHq+W/TYZO3eZGQjURdQ48ldL83ZEc15UnaLSUnWrZd9HVsyko1BiiP2g36850ZwXlaD5xYFqsuG8QUPguw3Nzf8V5TkDo3xx4IcoT0ZzTpaEoAhn11b5Hh/IwQMSp2xJcIzYujEb0ksZu22DwweSrzSgJtew7dqsmCllhKFy60AOPKCHVlzf9I9qyLPx25Qlbgw9tfbm/L/0e0h/H/r9moNhrEisVVliRW3+e+FC9fR3TL+C7msOLkrV8oMsvRFkitsXKO//mP1QtiQ4Riz9hOx0WLoRtA0m76+DtF8PlYh9HVkx0xGvYcl+vbRPD71ycUOJR83PcNZZZkk/AhK2Dlm2rHB3zw/69NAczHKyYqYzPnLMhX190MtDKyvri8J4PkMYlmyrkoXPKxw20+TQGSYjSoSCfCEcgX37lC3bLDa+b7HjczvVZsbLPjs3dMht/uK9Xf/Yay4zYuRchmammC4XnPw9N3O+7yLX0/tuUlQojBtrMOckN5s/sXj86TC7vspYYYuMVve/Ad0CDr0EVZULB82kBJKfL1x8vocJ4zvvIvV7lG2fWTQ2KW63MGqkMGG8iWnC1CkmV19h8tDqEG+9E0mh5XGg8hN6CNrtMi67PjhbDH1tUI1KAB6PcPXlHr41yhFz+w6bJ9eE2Lqtt/d5vcKck1ycdIIblwtU4d77W3lnozXYZicE2+Co2272vdX+vlunSAy9YPBNip/55+Z0iLn+lQi/+W1Ln2ICBIPKU2vC3Pn7FgIBRQTmn+th1MjMXK1hWN016/gWpaXqBuYPukVxMnmiyeGzTADeftfi0SdC2AO4LW7bbnPPva1YFrjdcNYZ7iRbmiRE/rVNO6CLoPklwXnAiJQYFQenznW6AcGgsvrxEKoDP3fbZzbrXnHun9Onmow+OCO9dGRhcfC09jcd38B5dExm4fMKkyY43vnqGxGCwSjUbOOFF8MdHv3tw8xEmjdo2MIP2n/vEFTRuakxJ3bGjTMw2r7BBx/G1qlpbNKOMen4cRnpoQAd2hkA118fGA0yNXX2xEZRQWcn/evd0XtnO3/f7Qg6fFjGCnqoo2GboJapp6TWntjwejsFbW6OXdBgs/Oam8FTEWHRk6G9ybVlTiqNiZkEpatpND2pNEXE0dAAUNETUmtOlviREwEMJ5FXJqbanCzxopNLS9VtNLY2TSa74Ggo4Mob1jjRsNXM5gwNEUzTnGYYamcFHSKo6jRDRbKCDhFEZZqBMCbVhmRJDCKMN1AKUm1IlsSgUGJAegt6/LEuLlrgoagwdYveDptpcsmFHg7+VtqHBksM0jy770fzcjh8lskxR6VuZDXvNDczZ5iccGzaj+4KDCS9PbS1xQnLDR+eGg8VgeJixzNbWtM+ROgx0PT20N11zj9x5EG9mzu7y4yZOw7ncZnOxRLpI1dsWJGQ05YPUFefCYKmOV/sdKa2xow2cPUQbe++zlyTwjjuscXFzrkNDb0F6zpH+uXO9E/5NBCaUm1Ef3y6xXHDHDeMH9v9+vv6750CTJoYW7aBacIhbamff/u6t2BTJjvlNrcoX3yZ9oK2GiiNqbaiP7ZssztSRNr/ue3s+sqmfo8j6uyjY2tzZx1q4mubV/3go95ZD1Pb6tyy1R5Q8lmKaTUgvT20pUXZ3pYictQRLqRLy6oKr/+fc+M7ZJzBkd+JTlRPDpxxuvPUmIYG5aNN3QWdcIjB8LbmeNMnGZG322BAensowOtvOKINLxamT+vupS+uC7Ov7d533jk5jBs7sG6BacKC+R5KShzB1jwXJtxjR7Pjv+tcIKGwkyKa9ih1BpL+gr69sTOj73vHdffCUAjueyCEZUFODlz+01xmH9Xdk3tSUiJc/tNcZs5wLo6N71u8/mb3Lm5hoXD4LKeut9+NxJXiMmgY1JnHnbhkLvCdVNvSH7YNPp+zLmVEicH2HXa3IUT9HqWuXpk5w8Ttcu6Lsw418eQItjpjyaIig0kTDE452c25Z3s6mtJPt1j87x+dhOuunHNWDmPHGKjCg6tDNDZmgKDKX12iukn7u5zThBdejHDsMS7y8oSzz3Sz/DcWoS77Krz1ToQ9e20W/LOH4mHC6IONtsTpvjPibRvWbYjwxDO9M+0nTTQ5+gjHO9/ZaGXEcAVARTeZx5+0ZAQZsAQiHIbWEMyYZuLzCcOKhPd75OLu3atseM1pnosKnXWhPWluUTa+Z/GHB0K8+U6kV6Z9Qb5w2aUecj1CS6uyclUrLa3J/GYJRGWFy7LsTYaR9vEFADa8GmHGVJMZ002OPtJFQ4Py1J+792QiEXhpfYSX1kcoHiYcNMKgoEBoaVEaGpWdu+xezWs7Pp9QeomHwrZ834cfDbF3XwY0tW2IYW4Sv19zGlqCATIkr8jnFa76d09HKHDDaxH+9GSoz7BdNIw8yOCiBZ2r2F5aF+FPT2XUVtzhpnqvTwDKFzVtyqTM+cJC4crSXEa0DTm+3GXz2BN9rwc9EC6XMzz5wWk5eNoeZPrq6xEefiy6hU+pRz6urfbOcAGIynoVMkbQhgbl13e0cP78HKZOMRlzsMGVpbl8ssXijTctPvjQOuDMyEEjhCO+7WL20S6KhzkXhm3D038O89eXwxkmJoCug/Zm1tAX2pZ3ZwyBoPK7la2ccKyL0091k5cnTJlkMmWSiW0761V27lICAaW5RTFNyMsViocJY0Y799WufPGlzaOPh/hsR2b0aHuiqi9Am6CmJX+JZEa/qBu2DS9viPDWuxbHzXZx7GzH2wwDRo00GDWA7di3bLVY90qE9z6wMtArO1A15QXosjqkYlHw40x/rp+IM802eaLJP4xx4rAjRwp5uc7XrN+j1Nfb7K5Ttm232fyp1eeUWQbyYW21byZ06dmq6FqUjBZU1Wk6u05znTLHzQ9Pc4ILN9U0p8q0ZLO2/ZfOBb82z6TGlixxI53adQhalOd9BvhbSgzKEg9fN9V5n2t/0yGo3y8RhAdTY1OWWFH0D3fdJR3hsm7RIVFWKfT5UMB0YtahJtOnmv1OkbUzZnRn9/28cwa2BUooDOs2hDMhKQxBVnV/34PyRYH3gFmDZlGUmCZU/cKLmeQHlrz9rsW996d9VL6jd9tOr/itiK5SlaWDZ1N0WBa88VaEKZOSp6ja8O57GfD8P9F7ev6pl6AhT+ud7ubcxen8eNWHVmdU0DxZ7LM94V7bUvaKD63wlzSowR2DY1OWWBHhVz2flQv7eaJ1CKuWNM8G/IYTIGT1uf1Kn4LeXlVYp+jvkmtTllgR5I6+njcP/ewK4bblVmDIxsoymIBh2bX7+3C/gtbU+HYCVUkxKUvMiOqNt9yS/9X+Pu930qww17sU5OPEm5UlNnRzKOhb3t8RB9j7TEKKnfaRo28KBnr5ihXSb7TjgNPay6vznxfRbIw31Yjcd2t1wdoDHTagPAXTkjKBuvityhIju23R6wZy4IAEranx7VThQiD9o9VDD0W4dCC7+0IUm6rXVvmeFmS/3eUsyUJqBrr/NkQhKEBBbt4iYH3UNmWJCYXXmurzfh7NOVEJ6vdLRExjAUJ9dKZliRaBOjHlvK6T1wMh6uTNZTflbceSM4BAtOdmGTDNqHF27U3ez6M9MaZs3Noa76uq/AuQAZOGGYclIucvW5q3LpaTY06vXr7U94SIXkK255tIVIXLllV5V8daQFz58suq8leJ8rN4ysjSFV2yvMp3dzwlJGTpdkVl8BoVXZ6o8r6BKPDL2mqfP96CEiZAeWXThYjcTYasM00jLITLa6t8CZl/TqhHlVUGzhThASCDt7QZVFpBFtRWex9JVIEJbyIrKptPROzHFEoSXfZQQqDOEvmn26q8CQ3UJHwR4bKleevUlCPIRpT2j/C6uIxjEi0mJEFQgNqbvJ8X5npPBn4BZOYK2uSgoL9uqvOeeOuNeduSUUHSe6Vli5pOFeReYFSy60pzdiNcVFvlezqZlSR93fby6vznbYMjQP+Y7LrSFpH7bIPDky0mDPK4sWxx8/dF7duBmQc8eAig6CfAVcur858drDoHPRBQWqruguLmK1T0RtJ8A4M4CAK3hAPeqgPlACWalEV2rruucaTtMq4ArgWKUmVHgmkC/R+XLUvb0mAHnZSH6q5c3FDiUXMhwkKU4am2JzakEbijVSK33F5VmNLcq5QL2s5Cf12hu9VTisrFZM499gNEV4Y9rXet8Jc0pNoYSCNBu1JW2ThTxLwA9GLSbbgj1KvysKHGqljnLJNJWgrajt+vrqbm4Om2wTyUucCMFJnyIbAW4ZlCj/dZv1/SdmI/rQXtybU3BA42bZ2rKnMQOQF0Evt7wnHshEG2oLpe0bUum7X9rSVJNzJK0J6Ulqo7b1jjREPM6Yah09SWqRiME5uDVCjBGRa56RweNQFhoEmUOjX4OzY7VHQTKptttT5u3luwNdrErHTi/wG5jWw4PKXrYgAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:116px 116px}.voice_chat .voice_chat_wrapper .voice_chat_btn_title[data-v-1c392dd0]{height:22px;font-family:PingFangSC-Medium;font-size:16px;color:#000;letter-spacing:0;text-align:center;line-height:22px;font-weight:500;margin-bottom:10px}.voice_chat .voice_chat_wrapper .voice_chat_btn_prompt[data-v-1c392dd0]{height:24px;font-family:PingFangSC-Regular;font-size:14px;color:#999;letter-spacing:0;text-align:center;line-height:24px;font-weight:400}.voice_chat .voice_chat_wrapper[data-v-1c392dd0]:after{content:"";display:block;clear:both;visibility:hidden}.voice_chat .voice_chat_dialog_wrapper[data-v-1c392dd0]{width:1200px;height:410px;background:#FFFFFF;position:relative}.voice_chat .voice_chat_dialog_wrapper .dialog_box[data-v-1c392dd0]{width:100%;height:410px;padding:50px 198px 82px 199px;box-sizing:border-box}.voice_chat .voice_chat_dialog_wrapper .dialog_box .dialog_content[data-v-1c392dd0]{width:100%;height:268px;padding:0;overflow:auto}.voice_chat .voice_chat_dialog_wrapper .dialog_box .dialog_content li[data-v-1c392dd0]{list-style-type:none;margin-bottom:33px;display:flex;align-items:center}.voice_chat .voice_chat_dialog_wrapper .dialog_box .dialog_content li[data-v-1c392dd0]:last-of-type(1){margin-bottom:0}.voice_chat .voice_chat_dialog_wrapper .dialog_box .dialog_content li .dialog_content_img_pp[data-v-1c392dd0]{width:60px;height:60px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHgAAAB4CAYAAAA5ZDbSAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAeKADAAQAAAABAAAAeAAAAAAI4lXuAAAfOklEQVR4Ae1dC5RcVZU9r7tDYkiHAAEFJD+SdCAQDCBoYMCAP3AYHZcw/mA5o8IILpYgLlFZM3EAYUbAAT8hDooDaw0IistxqSNKAiogEuQTkHyAhJAQ/r8OhJBOv9n7vHtenbr1qruqu6q7Q+ok9e69555zz71n39+771V1Im8AWrQoHbWsW/Zp65GuXpGuBB9JZU80rbM3lc4kkXFIdzItbdIuvdKdinQnbbIRshoH7xlJZGWayIq2VFaM7pCVF38peWVbdw/at+3RFy5Mp2wVORrAzQeAh6IR0wBYB9ICIPm/gpiXhowgViiT6ycCrGU9dO6F4OIddpDFl5wl9ydJUlR8RVkjhcG2jnha8N103IuvyPFw9jEYoUfDxVNZafN0GWBgKpjIt8bFctX46oi+9Z+DzC0of3HbKPn5t85K1qvOCL5YW0dcFa+/Pm2/Y40cszWVk1HJvwe4Y7WSrDERQ2hR8n08Rt6P3lxwkPooE31NbsaUf/UokZ+N1OmcfhlRdNYl6czerfJZOPATvb2yh1bOwAjA5gDGNXcox6BqVvP0N8LejWmb/OCys5Pfx9UazvSIAfjsb6ZzelL5alsiJ2Bdbcud4kDLeYw4vgfTx1XeyQ2Fflub3I66XXDpF5NfldkbpsSwA3zmpemhmOzORfv/FiNz2OvTKBzQ0e4h0OPPkp8tSBJO58NCw+bQcy5Np7+eymUA9ziDlbtcjkAlm07NLSFdJsM8J1eW5/i+vDKZodBP5EGM6jMuPjNZrPUY4ou5c8jMLrgqHfPKC/IVTMNfhtHRHiCtBIBRbELNFBBmMO1Bs7iFqpzJjFD9a0e3yRe/cWaywao6FOGQAvylS9LjsLp+G5unaWbY4+OBUTAzvGyA58DnoFfJV8exMFBR+cOmn8jL2Fz8y9vfKt858cQEt/LNJ/NzUy0tuDwd/2qPLIKzP0pD5nQL6zVuehZuc/qJ3N2xg3z0otOTh+ute73yTQf4nMvTub09cj2G4XRWrmINdDzLY8hha5Uz/htMvxt3DJ+96AvJj9muZpH5sCnln/Of6WnYPl6KITvaNk8GloVqGIDmozGqkYINoTeqPtp1xZjxcuaCf0xeawYIkTsbY+KbV6c7PvuC/AiofUQN4JIDBRMKpudRKCBsclaT7UK/Te6TDvnwRaclj1q7GxWq/xpVGMtZsCiduHmz/BKAHWrl2uiMQ+Z7HtMOayaVvEyc7/MoHOeT52XifJ83nPqw/VRHIseed0ZyD+vRKGJ7G0YAd9KWzXITboG6vKfVqeZJWDMn5543HmQ4FesoNiGEGt0O9OGGlzva5UP/dnqypFGg0HcNoQUL0/23bJX/S3tlLwUJpTqMctA8WN64B3U719/c1i6fPP/05CeNAMb7eMDlAdxDCS4K25mFxOsoefnI1ASFGCknypC2d334phe+OPW8zydXZh4Z+HXQAGNantWzRf6IKuwaV4OFexwtXQZ2rOTSJm8sS28P+mhrb9IuJ/7b55KfWvsHEtJnA6bzF6Z74Tz5doy4SVULcWunX3OryscZ27f+ZgB07GDW5AEDfOH30p03pfIHLLSz82HK0gIgFq1IG4A2tHNBZFgcoUW3d328vfJyMkqOWnBKwleH6ib6sW7iAwN5TW4GRvMMAA1ZkiGDkGuprqsGZpSvhplnOlF+Sz/zH9zzFPw7b8EA7pNLD9bV2zVeNuMxH8AlLgQwD30ceTiKK89j2j7UbenX5D+46c3w208u/1U6GvG6qG6Av35F+jEAc4oBRWtFceNZSDQZ50g3nqZb+rk/vC+9jxjHK55zX1yLY986iS6vmc67Kp0hW+RuTJ2d+fRZoO3zfNyLVuNTxuf5eEsfs6LIieeemtzgfdFXvOYRfBXX3S14KsSXx0E6/Ybu4XsJexveYFAi33pixgmjGImWfskX9fgPI/nK8xal+5g/+wtrBnjdFrkIYL0tB81ABAPfEFDACBqJgcYRUSDJYxxyzFPQW/rwRMkX9FON/hsPJ16bpniHswaiv/ulC69M52LpvAtnzO22htrml8oWt+mUaSMaKOMjkwCbDuUsXiYXCmjpF/sPc/Xnv/bp5LvBTVWDfnsBegq/q7EQJbTraITHdQSCoWFI0wJHMnnWG/MeGWRa+s5ng/QfRsX5eCy7O/3eF/UL8L//UD4D0A6zQmx9tZB8A84ANVmGCrizYnoWUqalXxoY9Ienqv5LZAKOiC/2skVxjK3qdPH/pBN7NskKjOBdKKhTKSKcSj2xEhU8Cjh+S785/sN59VHnfKr6tyk6PFBxfOtm+TpA2oUD0NZHynDERRjrSGWeAck4ieArDwqMk1r6DfRfr1yOZXRutW89BpdnjvfXb12T7tHTI6vxTlV2eqLDN0i4uIJHtuNZOXlenO9kcxnHa+lnHsh905//Evnglz+V/K/5zYdudfRsEYB7Nu65RtMIP1wzLa6bqcAnU/nIt7XU5PK8ln6z/fc1uLiQiEUFce3FocYaDKodNZOji0RpP9Jivgq5i5Uey1XjO1WNVpOrxt+O9THo3nv2SclvYxcUrsFJj3wBmOxoflRgTTNnglEtbrIWVpOrxjc9C6vJVeObnoXV5KrxTc/CanLV+KZnYTW5anzTs7CanOPjXpajuAJgJ5KV9t3r03GvbZZ1GKk7WflFod90VYsX6Rmvmo7nm2xR6OWqxU1vL9wtHjFXZDe8UPTSRpF7lossW4X+GVrfn76V48NqOp5P+X3eKvKOOXiXabzI8y+L/Ol+kUfXYSLECGq0/bYOmXfmx5M7fD0r1uAtW+TDBNeMM7RewND4uiaHPLunZZ6P09Bw6++5m8gJ7xEhyDvgq/gE+b3vFDlkdqluvs4+Ptj6z5ws8sH5eNaHl5lo+y0IP4T0DLz/QjvmG2/Tx+u1D9z+iTqeKgCG0Mm6WYKUN5ZXiCDig/9KtuHKZakX8nMedaxBjId8FtBsfY4eq4dWOFzeCT6pmfbnHZjZiK/kwwVKjbSPWeGEq5bgoZCjMoAv/2mKCUXm6ygNIGgvC7VRPpWZB00CpRTJjiT9XassNBxRO2VbyKw9bBbbET7aRsdjO+tpP/3DabmIyMersY33H5r08gb5O2+zDGC8hvMJNEx5OsoccAom0gw1HtYQNtpkTYYGjIdsdYzlMdT4EOm/gHWviLb0iLyCbwM1q/5s90vdRZazfQDeH887k/nKfEQt49XtP5GTvdVygJHJgm1Ks95sPdeMsQCdWsDwPPJHmv5dD2YbGtbN058fyFLNrP+dwYa3y/idy5rnP2D1voU3lh5C5AB/57p0JjL3s8oYyBYSZMbNIdrbIEy+fYzHMkzPwuHS3/CsyM9vEXn6eZGtGDUcVbcuzXbSrKfVXevHxgWyels4kPqvfEzkN7dj9/xSZpvhb24TWbU2Gwg01XD7qXT0bJbjrR35fTDAOXorGkjEMXsqqXHGXMMZt6TlU57gaoj4SNNf95TI9TexIeU0FPV/GGASUE+0S2qWfQA0H8X/gDZygLED408DlsBDpgKFi+eTx/rFIVjKD3XPZSjY0s98Qx/FfrN0Q/2HwcrySIoHH+p/7wZ993a3DNUs0yQMUF87gsabdZMJsdAr8pRaaIT+BLwJNo6/dRcKo/2t+JWL9U8HnplEney+kyxWcdMmEfwUomzFxsrr11p/3r+Owq7btx+HQfLMC+CRWCcGCPfCfTdDEn6LRLpheyNs422YMv1m+6+jQ/Y99SPJch3B379B9sdRF6oGsjkalWQ9FUNcrNJaMcqFfI0injurCfos8vijAsA0GOip50RuvDmrm9nvHIffPUT/tbXTZHvQGR5/UuT+lSJPPANujfUfH8qz9lt5964QefbFLGVgTcdN5ntwiBITO8Oj67N1vxtgq09rtD9Q/8Mf8IIsp+9oMJueadQ+oZZIZrwQkm035yarPNOzkExQI/RnTK4El2XTyeZ4q8uBMyvBpSy+dytT98pOlt79DoxIdG3TYb7F85BM0Nyuko2Mk22Y7KiT8iSGb5uVxePrGDxw3W+ayMfej/IgMxT+A6jzWQ8dwajcgdqtQmUVlbiWPs1uZbLG97w4z2Qs9LJFvEj/QDg5Jp4pr3nCOR9l4id/ZdbUWLIyzaPCnTAyf3ErHotiZCv5OgX7bwIwPG6M6ZG1Iq+Ge2jL23Nidgxq6aKQs8phB2S2b707kiiwH0mUkl7WuJ7H+hNTkI5gANyV39ciM7RPeyWF2DvzD9PQ0rTleV6D9fd+C76XWnAaxamW9czrgTrtOzUbmWD3S7vvInIMfmTC6/s2soADZuBNQ4z8mO4PDyq8/f0hWyuxE3Km8fq5T8Ekn8T6WOjrlssGGS3HMAn6WHKnLVqacqJCQW34yQVsCEjaESgceoQaYZyUW86SeaeBvCqGQCvQIP23FYzeTVjT9NYjNMb2DQ88LPLXR7N6sv48DuzEceRkdBI6dcc3ZfW26xRM2VP2EHkMa3Nc/1FoE6fVmLiOv4B7afrFt3/xnzMe5ZnXAf2d0TG5LHRNyZYIXxYfdvCp0ivYhFn9ma9lQnew/kdB7clamd723zemuyKxq/UK3s/iv1aSIcl6Tx4nDx9OOZrHdIg3Un+3CSJ8GhTTXx/Jdqixfcpx54qfI9ZDDTwZk+ewEeLjwZ/dLPIkNmUxzbGRFNV/32l4VwlTfkz5zIEMb5/AqG3Y5+5+M2w/+azI7fdhKbglAOkK455gv32a6z9g09W2laMXZKAyYoAyVMACz8fLdJqkT+fHhFeJslHqbfp4lfrT4bfe5dbcUPBbsHZyrYWatpsRArf/9CDgAt4WsZPQL6Rcx8fBjP3H58B34DlwTNMwuivKKtC38ur1PyrY1Ybt9Ew2iMSCWIjFLbRKWJqh9V6LW16j9MdjauUUGhOP/14HWAOxvxHTIafYmHbDekyy9k/D7Y7ec2fs/Go7ZzLqtf/YE5WjmLdgfKrVLP/jj4vMbMN7tRO1YQFkq7wPLZ+h/5iM5TNNsgpbaPlel3GTtfyMk+nPxgiyzmJ8HhY8iOnZyulL3/IYWvkMufuOyTuZeXMKNky8fyVIZpshyYC2thrPh2b/5QLbowGw5VOHZGVZaPkM/cdkLV+VnT7W9oncZHVahiozESqvQSiUBwl0uG4CAo/5TBs1Sn80b08mWamlcM16nAphFOaNRFa99scUrKtWCMvlSdQuBbt2buCYP5j28344pnrrX499jNlObvY6DVAaN4Px6MnBhYwBywaTQqDxRujPmoJdJ7teRA/Y6AXfGkqRWu1TsGjTxlFt7S263eFJ1CPrMkO0xU+99jtxzMrjVk9canhHYLaZ1wj/sRyWif+dDMsAJiM3mAnlaSRLceaFfO/hweqPwu6S97MxbXgGtyd43GZ1U9t12j8Yp0hc9zy99jrOqcPUuQvetCjqAMvXZDtjmBuQ/XaMIr70ZwPC7D+FR5gss5H+8/7HF0zH8Y9JddKI9pw8goagUnq+i5CZ5lDKUYxp5nu+ZmhBA9efjqm5aCrj6NU61Wm/HTMBgeOumIcmMa1Yk4HGaheNXp50Uca3s9b285SBG7iD0LGKXt9Zsbq83Eb4z6aA4CtM0UCZ9285oKx9IN+onMdIkMnz4Z0i/Z0wJX3giOLTICsvDllmTHxQzntK0mH7i0zfO4uXXQv0mM8eXVQm83jcSPBIeiCyZxb310cez3btLOM4tCWeZlW2im2O3GrE9vBlBPObheZb6uX+dYWoqWAvz6/if7S9E31MeswBqgAGe7Mny/c8xmmHspYf68+eVryWUrce4ukUyx47pvTKaT36RbLs1LfdUwKPy4JN/ybPXftDqzPbkzD6d8XBSyNoEzrWbfdm9vryn9ky/1rawv78D70eAtxdVgAsUrGEXimaoYk8rRWFkOfiygj6nGZ5TDdY4lEe711ZRwWhj1FRqy2+cPeHv+A1HhxcsK18SFE0Kzy+Ibt3JfA8dWoE8XaLx5pc+2m8mv8G638tG9hyDc62GASK5B3I1pMImsXjkPnkRfpdU+qbmllMES1fg6JRNu9VuT4Plvj6zl8eKt1usbyuycUzjY3e3XcWmTjI0csZg4c0y3C7xSPU3J+sQIH/yFY+Q7Q/l6/R/0FdAe5WcLwiCiwjgG4bKuWzE0CGYoWGkWFfEykrp48E18B4iuRtxKNYA3nOzac/XDPrJR5t8laE6zhngpcxgkLv1vrzadHMyZWl8mUCvnJLt+yONzqKDkgqtRwHPngdtllnvvBH25s5akFsT3/+U0G7DMD/qppwBLdLt2CnqGBZgQVg50BCRmUho2K8gBHrL8G5rwowI6JYli+gH3tEJITkKoDLNyFpgrdJ/JBi/Qr7mVihfbZD9RGyXB5LFu3al69GfgCCDzf4MRqoffOht28+qiiTxlhBZoBMl/Fa9VF/AIw/lsyCWJYnHbFgWGGWR3480garX/SQnsCueiyzWta4BtvnoUpML+JxIHe4vjOYTDPaP1j/9aHf3YE1d70u9HAcwSSxYQo4Q3zKpueQTznySQrAAPX5JGcynsnGxGNJe6jQLPv8QhqXhphWrAmdeAjaT9uD8V9f+hiI6ztwVrmCz0+5uVKgEVXc2LgQz+/RKEbAETLPpjBEB6zPzRjL90RAuSFptv2imYNr5lqslzTebPuN8F8+KgvwQ96Kjj3Hyqr1r2Aw4i9/6mhF29Tf1rqsrWVguqxshAd06tXnSQ/XwJj41qN9b4h5tOed0Qj7vKctehWIb4oU2Wu0fR1EbBeMxfEG+n9F27x5ySaMyrXaKFw4Qq33IpanbdjqdBL4JstwIPr8cnT20hAtlchGLzlmo9H2i0Yv74+5NAxV+61tA/Vff/r4M/Qrss6SIKKxzMm5YuARVOYbiNqbmQBp5UJ+ximBYmUW6bOMovtavmus3wh0ZRbpa2UGaH88jlD5MntMfEeKZ89D0f7B1L9G/7/y4XfJ+gzCVJazsXRk/gmtV+cGMBknMTBglREuuS7LcTx1GNJeny+7cYMVE3fOWk7IYLxIfzD2uyaV6mL2eRDBc+e8DU20T98Mpv416q/kb2fxqJKj805dB3Dx6wEbaztljkaL0wDlWEkvz4RP96VfNHp5jGcvxpmjzWaj7POBf9FTpcdxwsVdO+2SmmVfy8ZlsP7rT789kTtpSwEe0yFLNmH94bBj+/jxQCGppG3XSyZDZkjmAkzz05c+p0e+cxXTwzaC+tGnntm1UMtCgml+qtlnx7KlQ3UoC2H/Gm5f+maYMiQLLcH0SNDHNL6YddIp+v1HJhvQ6OXaa8lFDa0H5yGFIW0yOsVkovl0Y3n96fObBTHxrQmeE6vDmmSf59lTCh4J8liSDzVqrT/rOJj2N1sf4KZ4r/sW+jhbgxFB4xbTMBdwDRnP+FkYJI2HrMwhxkeG5vWjz4fvRY/dVmP3ypHUTPuTAW7Rrt3PHM20rz4Lfhuo/2rxP8p+4LgjEz3Y1SmaxoDOYoB8mqKUpd08pxJZd7C5z8vQqqV93GSNB/CLHstx55p/z8hkGRbo5zyzRxmv4+NOn6Oz6J6bO3Z7qJD1UBYMarB9LZODwerkbfg6+7jJGq9GfYjr9EwTOcBjR8steBDNM6128xl+qzL3HYWVb8bIAFXlMS/S5wN7vmge01o8d9WHCqHsZth/65uLd+02en2dmmGf5Vf1VZFPKR/5r1Z9LKW/s/awTyi9+7DkOfTy37Gn21prW3nylB9CS3seCzF+NX2OXsp44lsTnJ5J/enH9anHPg9VYuK6+zTWX6tTM+37upodz2PdjF/Nf7W0H7rPT9lFbrK25gCrgTa5mkY0ngVZPOKZjMoRdERYKUtrmCVzHmX4vjFfV/GftU9kz0n70/dl1mufb1Lyu0DeLuN6LFlj/Qdj3+pLH2g5FkGCeUw2sP0/nj07CU+es7LVKC9Ll6Zjn35NnsRcgrMeEC1zXgiB1UunCjIdI+eRb9TSH3L/YZM479jDS79XWTaCDzkkeRXTwE8UOIBj+DFi00NZHECqjMvX3komPppHsF1+Wbyln/nI+Wcw/sMssMqDS9eXAUwGRuw1BiaNceogUDqVIFIWqkIGflbTjNHSL/lpSP2XyDUBkjyoAPh9h8stAPEhA5WStj5onBmgHPwAuoFqsi39zEf0lflE483yX5u8jnOcH9KGpwqAeUCNefxCqxSBJDHtR2/GLTXCZBga+Ba3vJZ+yYf0Ccn8y7j3uY9bXp/+S+VHx8xLwv0INTIK8FkyC5ekaUfPnbISJ0tTKRD2WZppaVO0zZXJ+Pw4jwX4fKZjGZ8f57X0q/pva9IhM9/99uRR+shTxQhm5vwk6YGj+bcKlRjaFGy9yNZc8hm3fMZVz0Jmu/yWfsk/dG4j/AefXlsErpbPSxHt0Sk/gvH1OSAQYpzEQAEN3SOwlU8ZzaMc4vZRvSDIoKUP3zTAf/Bvil/h+wb9W0SFI5iCvFnGS+EXKEBI54fwBM0q5gDUfPADhuWjtqXfPP+JXHfUwQm+q1FMhkdhLt7Ea1uyVO7CQnlQoUA1Jku1RbmaTF/8ln5N/oObuvF0bNYRByU4Dyym/GFDUTZ21L2/vyv9XG+b/AkbrsrOYLugQuUiZsRr6ZemvMg1Bd6OJSjyr32BS4VK0CqKEbnlrnQRJE/hqFRMqOXBCXF9nos8DV3pxm/pN85/cPP9Rx4sB2MQ8l2cqlR1DfYaO4yVr2AtfpbrrG6OCDA++frMOFiWZxso5pOM39Iv+WIw/sM9corvlJ3WH7jqe0Wgn8u82cnzAOkMipWBpowAdABciwqAM56DrRkt/Ub4DzPiwiPnJrcFl/YZhDHWp0ye+cd70v9C4Z/JGYjYTM1pWHulzwxxkynIaulnbsveWHUDw/vK+w8d5L69Jsg7pk5N8MCzf6ppirZi2ibKGajDMuuF5HOEahhukXxellOSsTwLVa+lX7P/MDXj24JyQq3gsuDgXrVR0+X2+9NZeEl8Kb6otqPX1o0UStDexlJDt1M+0pZPHR83o8Zr6YdlrMB/APjj8w5MrjWf1RLWDTAL/dO96Ul4eetqxg1LjYcE6xYTRy1BJJXpIG3yOrKRsLQKh8t2r98m3583JznV+6SWOH09ILpjWXoB/nrXV6nsASM6CkbgM98AMzkzGvMp29Kv9B8c/Lvx7fIB/yqO+qqGi/m6BtFKkTuXpVfipblPM8fAU6kIOT/6ckEoeB0fj3vE9qwPv9w9YQeZP2tWgt8dqJ/o1wETjjLb/7xMfgo8P6iFOJRiUDQLgmX82HJLP5/u1E94L3CHMXL4QTOyl9hjd9WSHhTANLB6dTrmmVflJkzXf8N0DGCcpgzJ86vFYzlVDJdqOp6/LeujHRtGd8jhB85KVvt21xsfNMA0uPSRdKfe1+QXmFoV5Hor0ZKv8MC6DvyRyYNmJ3+tyKmT0RCAaZMj+blX5bp8ukZEF1mrkKUtjPlM+zwf93nV+F4mjvv0CNfHyF2O38t+7wHTE3zXcvDUMIDVh1iT735IrgBQnzFw7f4231GZRXO0hdYWpklBbjvTv3PHRD6w774Jvm/RGDJ3N6a0UMo9y9PzcRjyNSY9fhqHxRy0KvlaTAB6e9GHW37dO0E+csieyava/gZdmgIw6waQPwF0rwBA+OJIMXnwiiX65r4R9HHUy/7+H/gTeufW8nSob49U5jYNYJq679G0q3eLXI8d9hzubkk6eiOrxovDTKNcx2Qsz5dpeRaajE/7eJxveRbG+d6W5Xme6VloMj7t41iGnsVXpk6e05X82mQbHUaubnTx2ebrpdflMpR8Cku3BlqoFtGF89EY1YhypLiDbOv6aNIf3oS/V9nVVfkusza4QZfInQ0qtaCYZavSE3Hq9W0gubuCCctl4AWEjWdFaAV5QX4QycJtVB9T8ma08SL80a/zMCXjSL+5pP5rrolS6biVmrCxRy4AUP8MlPRRpQeNkgFL20SrspeJ833eSNdHo36Ln448fb99klXasCG4DCnA1p6HHk4PxotEC7E2v92QVOCAFqdiHcUOScsz9FWGhRm6iJrMCNVfj6qeOWdGcoP5YKhC+mVYiK/kPrhaPo3nyucC0EkeVAPQYZyDbkASUV/5kaiPKaq7N5HvtHXKN2bvnoQ/3jO07vY+GlrLwRqAHoU/uvFJJL+CzwyyFSxGHOUjkzxF2WWGKGVIw62PKuA33uWycR3y7cmTE/xliOGjYQfYms4R/dAaORF/x/irmM4OID/G0dJlYFsBBaHJW5alm6afyFOwccnEcbJw92EasdZWC9nmEUfLH0sPx/p8EoD+BwA+IZ+L3ZrLShtgfTbA6/h44/RfR4f5JX4R55p9J8svsTPOfx+jz3oNUeaIBNjavipNR29dK8cD5JPAOxb44DvOgQgWyaNscYQWtY1Yns60sg3aYPTx5hJuea4Z2y7X7b13wil5RBLbvU3QqufS8elGORLr69EA7WjcZM1BHAd9qL5Hz+JxaK30fPJq1U9kDaQXw9aSsfjRuEmTqn8fiMWOFGJzt0laty7d9dUeeRccfigw6gJQXWjIPviURnlRywzgojzwMN2m+Pc4yl2BJH+/895RvbJkypRkdRWVEc3eZgEu8io2ah34Ue+peJI1Cw3bEyB1YpTzr6uOA2idWCc7EbYDtG7wN4Lfjds0/tUZ/u2oZ3rbZeXYVFZiysVPpL0x6P8B5j6YDKHy8CEAAAAASUVORK5CYII=);background-repeat:no-repeat;background-position:center;background-size:60px 60px;margin-right:20px}.voice_chat .voice_chat_dialog_wrapper .dialog_box .dialog_content li .dialog_content_img_user[data-v-1c392dd0]{width:60px;height:60px;transform:scaleX(-1);background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHgAAAB4CAYAAAA5ZDbSAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAeKADAAQAAAABAAAAeAAAAAAI4lXuAAAcv0lEQVR4Ae1dCZhcVZU+r7rTnR1CIIGEDolJiAkgS0QSECEBBhACM/OBwjgug9FvWGYBg1FxHD4/EZBFBZEZRBxxxmHx0yFRYVCCghEQFBjA2MSQkJAQAwkhnbWXevP/p+55feulqrqWV11VbZ+vu+69555z7znnf3ept1UgA4C+87Fw6J9EpqcDmRGmZUbAVGQcXBuFdFSKaVpGSYB/UigdkpKOQKQD9R3gdKB+U1Mg7WFK2lOhtI8XWfl3/xHsVvkG/oCPjUfXLwxndqdlvqRlHhyYHQYyCaClzBmARhAFgEapZcnHAaDVFPMpS18Ex4ushfhvcTA82pySZYvvDFb48o2QN5/q2tYbPxnu39Mt54ahzEfA58PYA4mfEZ2Iysw4r8w5qzM5S0vVx0ywMS2yDCN9WVOzPLDojuBNa6NeU4tB3dl3yz+Erbt2yQKA+hEYdwbSIXEjdSQ69Px8XC5f2dfx8/nkfT7ku3AgPYQA3j1smCz9x1uDPX59veTrDuAbPxEei1HycQzJD6ZD2dcCFQEQH34mwNSri+RdfVT2ZHxVzXt1kXwR+lDbGqTkXozwby/6VvD0Xu3WkEGX6oJu+PvwZGx0rsJIPVXnW1iGvK6XvfMvTDUQzHKbki2lNy5fC30cGD8H2Ndc+W/BL2hKrcnCVDM7brw4PNMBe4IBk2WMD1xWBQrxuniZ8rl41k68Ll6uQB9ALyfQi24PHrTuapHWDOAbLg2Pk275BkbZuzkdkqIRxzz+feOs7MvspQOhqK360X9GmuWyK28LnqK9/U1+DPul75svD/cLd8l1wGIhOuztnzmi6BKtwIcCmmFnfRrgEbOO9bE209w7g2HymSu+GmyJbO6HjMaxH/oBUGHw1UvlIkB6fTotY61jA8ph2zsCwTDMLKWdHKE+6A2lH8jmVEoWX36r3BUEgblc1fBbnKvayW1XhG17dst/AbET2ZFNs0xJEWgOvEI81jW6Psbz461D5UOX3hysoz/VpKoDjO+zZ/WE8l1gOVYdsSHnvKIBPmAGtlaj0uo1Q+YA0Ydfm+HTRy+/NfiJ+lqlD8avKvTo1WHzc1vk2iAtn8KpRNv72DKrQCmYsd4VUPAsNUDNUDfoB4Q+zq2GOB9601H7yWfnXR10x0KRSNHilkhj1sjtl4cT96TlfoAzl7wILBPw09iI1KoYb8Drp+SJ1pScf/FXg/V+aJLIJw7wbf8czuwR+V9Mu225kM2ags2DfAjm4A9Ufbi6rknk9Eu/luwFjUQB/uYV4Rystz8GuJn1Nh+AOYCLDoZCdfnaI9/0LDVZv854hWQK1VVZH/vqzU0pOfuSm4MnratKU7qTCN36qfBMGPgDgDtcg22t2nRrKfi2qTIRphHPk9N6K1vqy6pA5mOg6COGO/FV6rxLbkrmDFgiAH9jUXgBwvw9rLnNGm6AoXig9SjwDox4mey9eH/u+oJzfIF8+LIbg3tc2MpOKgb4m4v1XPISgBSBG22ZnVne4LNNsdYQWI52GsGsEjKD+ohEIN04l33OJddXNpIrAvibnw7noIFHANRwB49hpKkBZymZlmcaFZj3yMD2ZXPxBro+vkbthN+nXPKV8tdkjbMX26Kz3/p0OLMrkMexZoyNgo/WdLplKy7vslE5vgs2EFVuUH/v+OH05pBQTvzEV8rbXTO+JdPtV4UTcQLjCcyrbQqogYmUDfq8vQB1sjYN+7JUHtTPET98hZImmXvxNaV/T8aFjtKIZ6i4W4ZWGzWx41NUmBIcEtYOLWjqylpPAPnv6ik7qJ+JVcH4BdKWwokjxp4xK4UY6pJo5R6cfhSZw1sYIwCR19HneFrHvCvbaPXlB/VLjt/clV1ybUlgZWApXuXOL4RnhT2yFGsuIR2kfo4Agh4GTbJg4ReLv0BRNFB3fSlsS++WZ3HhYGzvd5o8HrJV23nFRQrVmWwhmUJ1fwb6mA03p1rl6Is+X9ylxqKmaF6sT3fiem4KF+oRYJt6/Wk54rMegeZ0bHJRnfEow569tiKZQf2C8UPlWGJBTOx4LpQWBfBd/yoXAYATrUUFD63qxsC1EAFmwBE/B6ACCf6gfgaKiuMHLIhJIWCtzmJu5b3S+64O99su8jIq9AICZ14q2dcbU4iXfT7zBJs0qJ9M/BDKzSNHy6EfuKLwPV5u/Gnsc37sCuQ6gKNTMwW4+yXC9vWGPIKno9nVkWfDlfyoDuxB/UxsKo0fMdnRIdcx1IWIkOSlu78cHodd8xN4wkAHoI4+aOhodVo+jyw2aKPUiURlX9Y69nlJ6Y/A8xBjDxTZD48IjsE/01acTG3Gwy/qiBmWYMqYvLVJ5LEf4jEHPLFk/rELi4fvq9X7PMqWEj/4wl313I98Lv8tuQW/OOOG9G/AgIBHmw+qlc0gphyZZqxmyHSkdWzIyq49FiNehfq4iU2mHCEy4xiAygdH+5l44PBAmn0Knk67L7vzavmvIQVG6O3Y7B57SxbfXo7L/ed14ZnpHvkpgfUPKyooj2ynTRH/MCU/UosyVHBNqUIy+qP3E5k1B+AelhmhNKWWtOk13M5yd//5T19TTfL+v/1M7qtOeUcwpubPGyCaOtCInO6Y2bIDD6cu0YsDj6BThvUO0Gro8yCadZzIkbgRF49y1g3t2Zkxpdr+ay8u/nz0B+UHcwVB7YhXfO/a8GTwHo3zrUwltp35QOq34jpldT6qVH/MAThXelZmnc3XR634K58Teeqhwr1X6n8e/Xkf/uzeD7zlPPaxZlylABawUzHVj5hQLl5MhMVy9SfNEDlhgU5LOVqtPWvPLvhWRAzK9d88jOujzFH8C6u3FBNrNt1zY3gson+qfhmHFo3Vf07BzLvUpl11xmSQRnyTtzorV6A/eRbAPad+wWUkd2OK9uOkeS9u1YofMVPssuHkyplNGLkLDSRNDRCImbHk63dbV2ff6Vjv55PU5ybq+LNd+9km11WpEyO4VvHDWrwwHowsgH96S9gKgD+g4EFSwaK9kFKeG43c9hNMkh2RBix5SetPnI4198zePtlHvRJHcNL+Fxt/vCHoA8TQj00WwNt7ZAHA21fBcyAagFRiXglpNFrJINhO3g4EK1eqP3SEyHv+ItOf9l3nH114U0fN4ieyLzH0Q2SQKQ+j9yMKFkoGlA5UfLCsoCHlCPYBZFn/nQwbS0qf4A7FWahGod2cokFJ+V9q/BXDjAkZOyy/9N/D/QHaGQoUnqFQoNG6Tg8eoFngst7VmXyS+lMOF5k41Sys/5QngLp2A9waxg94nKFYunBFIxivLDoXWA1RoFBpR2A0clFZKE+gI7AT0OfJiyOOr39QfQs5PdtZvprFDxgSS7MrAripSeYrSOAoUJAw0IxPpaw8ZV0L5lDWmutAz9Ihz+mobB79SYdiasb620ik34EZE/hIUr8tBuZzkf5Xoo9ZdL4agI/oRAdml/kacDCz7hVAhRpMo/mPso1y8lk0fpL6049mw41FBJhLVM3jl+oFWI+rH94WzgRYB0ZAIq7ME08bZVllx/eBVjlVyICSJe+O6qi9PvT3n4DtIE5HNhoR4KyYsEjfS/SfflcSP4y6A4kp29ERzCGNZ3rNtsyIZS0R8ckA1GGbqaAhJG9gV6zfhum5EclOU1pMahU/9u+m6RU6ggHOfE4tNIhHm6ZWjvNY7Y5Ik3MNJqbPi/WNSJ3YQddD/PTACmQeY6gjGEYdw92fog2mGsmUEt7QJJDkUVbzKkChDC8Jfe6eeY23EUkBhuG1jB/j5vqfzXzq0e+EuBdCJukumCPTUKIgAXQgsj5XXmVQlZQ+b7HxbUDTDUPRCHYW1yJ+XuwmEdvmnZ0yHeDgJecg/ciAZQOXRkYjliIoK+6OTzWCm5R+I26uGAMS3gWmZPHw48Q4slzt+NEA138Kp8WnN6eH4BX42GFZ52ohPlg24vpsANqIZZ3xKZuUPm+Oa1TyR3Ct4mexY/+4WXJGMzIz7HsbZ2E3iHV6trMyVFIwUalAkuEoyxHwKtVvyboWYr00RtptFxpgrs18Znl/xc+PP/oGwECZ87aBaYYQSYLnG6qjl3wCjVQbs7xruVJ97cOi0kAp49cJgHV6ZCxqFL+s+OsIbpLxtsba8NOFmka6IR0t3M5wjbsD2rbeCgzrQZXo407OhiSCawdnJf5XGj9fH/dMj+cUnfmpGTcizUiNMnhR6gCPDgbW8d+ByrzibzpUtLzJuZlC+yDP+E6W+t1VeaEfO6guZd3JYV2V4X+S8Udbo5oxAkfqba8wyhpXEDl6QRFozliTydTi05xg1uUr0ecVmUYkm55peyX+J6ofyMjMCDZg0DqznGLUSPYGhqvOgA2Wgs46UK58JfqNCjCvA1usKvE/4fiPasaiPEp30UCKjRtgto7oqCQTFM/zINCB7hQr0d8Hzy7yltixuNDQiDT+EJH5B4ls2yzy6go8+rcRXiA+8ZhpgFFFfpLx074yXSqOxA8zM6boJqzB+B0bO+qiXRijDCPUEM8g8ggqMeeOkWRTkhrsDNeKIvUn4K6NmXi6xtpT3Qb7YJz4FY9Xwvj/x+dFXnmxf+KXL/64CW9Usx5hMI6kqDN1ZfJU2QGlYHtHpS9H2XL0h2OL1+jg0vc4TTsSoxm/zrD59Uw8GatqxM/vd6/4o88UhnEHRw4rOXojcnnlc6R6ZcroaHO8SvR5M3sjj9woXjkyXHKqHT/DIhd+xLYZFR0Q0qf3KeyPSj3iyAQplvhgame+fJ4KUU6ZmVIx+iP2Mc2Bl45GVC0efqySjB8B8UIe9cdoYsB28GtShy3QZCooXGAd+crGy1qnY3Kl6vPy4EAlxs4A9n1MMn7WruHkxx8HUkczXkDdgZPSmV2Tk+ZwV9ChxTU4M2ytsrfOjh49HvyDogT9rW+IjBygo3jbWw5gRt/i4/IKPPJRldUz3CXEr5A+lr6OFFGmEBuN/llWJjP6l6nLkc+qtzZK0N+4Blc9GvT0JNwsSOteRnWOmGmcHT+rvoz4FdIntpwtNmV1SJusI1TqBorG8N/jM5/VOOudu75cX/o73sYrfJ51M4XTHwjJ6pdE3sa7OvrynzHTeJUZv4L6wLaZP2uOr8EZ8jrhjEFl+45FAfvKZHU6yllhVKb+pnUi2wH05HeKcNPVOgyB4dMBDUKcgfAD1joIOC2/thIvZMHSw/j0R/yiMMXijxC2N/M36/1NVhZ4KEQgUpmVIGR1SWFKMh0/r3ol6O/CXn7F05m2+LjK1CO06Yb4eBtnr178dcZUiwVHrsbVglTl+Fk31r8eXAQYrxZq59GnRxpsio46aKgwU2ZINBIJi5zbmbF8kvqNdj66q7M3NvUQP8UC+OAbSnvqoOGyEmCl9YgjaCDNx9JoLeE6DDn+E23jJ6lvLzKhLY1Au3e4mNVJ/BQT7F0V2ynzAl4HWaujk8DFAIzAhJBurJhSzslGAHg8q4/aRB2pWP1deHdiNGtkVOv6c8c2Otcbk0r9T0h/LbHNnGZokt/itsrJ2FZnCPOuAo0Sp2AdqaizvAqhoF/YDTzKJqSPn5+VXRgVw0dqTyV93HAxbp1xdzeWpAjhFtxAfOXtpWqJ7MQB6ceCgapl/BQoYEpPuBXABSW8Mgng2JFj067y3Ij261WOmp6On09CfydPoJZBvOxYLpWjy/2LfzcHY5OE/2ynkvgrpmhCR/CQFlnW2ZUdFu1Ah2wvn7xo6qQBrj6L78Qr1ecZrv1xfbVU4sta3ijzJx7fAd1SiV+HCGgUF9dApf5Xqh8OkWU0RUfw7NOCFTByo3/E6AGED103mZJBcnkWyYvWVRvRSJXv1zsdqher/xYumJdzf9ZR7+u1Sfsr8oN+HHVSkcKe2JvuYIrilJD/WfEtMX5YLjbOBaY0k7AoAZRlXEcMNBqs/6hVvitHecqaM8ybnPEr1OfksAXXUkulAybihaDzStXK6PBCfSnEpwl5zdcO6DqKn45e+gIYMoRbLJfpUWhlIgaKgPbKEd/jmZwqUS9e55WL1S93qj3tQhFeZy6WKEudUunNDRkN871e4odQ5wT4ARjYxfXENgmWRtOqG7HGj0a7A49ySerz69IW3ttUIvE054VXiLz7lMwBmk+dgFCGsqWeGuVO/U9r3QCokv8W51Lij1dxdAUt8oD5bKZp+elHwiVgLNhrw4BaTpkU9lMqxXkEPUn95ha8jGVu+W+U5Szw3GMievIfpxRJ3C1zM8b1mlN6ObTqhcwLwKvtP23bqw/wiAMpXodlYunsUwK88DFDWZfbcW34bnwHXcAjx5AkWGyExNQAtLzxKWdqSer3YHe/flXmjkv2VSoRwHKm30L98CoRd/nqJwQ1yFXyv9T4Y1a627edmEQ06mBZiiG+VRkOVV1LkVdnHI955VPQk+OUZ077fOYr0edax6DWA/EG93Ur+9f/YuOHZWYrMfTjlAXw9OkBzJf7CJSBFYHpQCKw/Dc+slHeGq6G/mps+nntuJbUjdlk1fP4+oaLC/3tv4FcKP4Y7fc6DKMwZQFM7pAhcqc1Eo1CAk4kQQRP+QayK6vDlHOyKu/pVKrPJYP3GfMUZi2I13xXo399yJt+9bP/xcSvpUW+HY+NgyCb/fzj4c8Q0FMR07KJDVdDn7+ccgheENSf93HxcuCrfyhtBqmW//kAwQHw8yNPDE6L12dtsqwSg/CadAovBSdC+Sz1+bny4FVDn+d+X8EO9qAp5e+Azc9iUt5pshbgcnq2UaR6uXxmhfGr5H9ks/Xj+sTFomuiOi9DsZz0wvJwOfA9PhqG1qClpmVlS8m3vKX5ePn4pmdpHrlReBvPxHdgWcEjI0kTr2jxNCS/6yqZLZZah1a2lHzLW5qPl49vepbmk3N8DKTlR7w3eC+LccJgzU1YU7/EI1b/nZSuvxA3XlaZzVAespFelfW3Y7/f/jtcXNiYlp6eShaE3hjgRx/l7S1peRkX2/gTOearumdxMD/9cg38t/hj95xz9DqTep2L5158Mnwar4l/N52Mk34/y8GnnF/n5/028vHL0R+6T48M3adbRgxPybBhTdLSEgCYPMb5Rnj5rq607N6dlh07emTHlmbZvQ2nw0D57MzHj+vkk8vHL1k/kGcOnxMcS71clHMNNsGgWS7DG3iewDSdud8Og4Rx41jhiXWSGWrjh2yTURF8MNX6KulrB+hjx04AhH/239qa0v9m4JSCsU24fZQ7fU69adzp38MUo76zE8Du4QyABhzpseHs1lkJ/Grab/Fi94yVlZkv1D/kQgzAyyiWj7SNfJXkr/hNeAfi8Qnm1XF4SmfVCPW6N8/GHEsNVTmf5wSS1ucIbh2F3VdCtGdbs+zBCO4v+y2WpcYP7n5r1nuCTxZy260i+UVGjpDPYABsVnApBissz5SjgqSpK2u9k/PXZBWskr62neRHP9tfavxg3uZRwKYvl/sEuO2wYAsAW0wD+E/wDDQzSgFFT1rn6lXWtU5+NfX7crLUerPXNjFWrjP/FxObvnxD6Psm/pz4y7+TX2K9PTH33Ou1kWOOZifK9ue8iIlK34oy9FtG9UjLyOSm6E48dNm5A4u3szcytUr2l+o/7Hn80GPkJGwkLVoeANnZgpssE2VDf3wh/BCu7OApIjxLbIBYSkFz3ud5ec1aOZ5Wqp+2BtlQ5YQ9ZWYZ8prVrJXjKbvsP/83Nw2RDxUDLs0yU5nvk1Y9G56FWC7FSC5Jr8+Gcwjwx5b5z2d9tmMi6sAZJT7xwNOGPNnP3bDRxOmhnLEQzITo4buGyLr23tWLUzNPpvA0KVOeJuVJln3G4M304/B7xQeg46pHRJfAEGesFkw9OvhJsa4WNYKtMTb8x+fDm3CwLlJ/zCkw7AA2WU1ZzwqfwNOvVuR5+vy9oXXtIhtfxdmjdaXd27xxdQDwAwQ/3pnfcXH57s5ANrzSCy61eDDx/it9ZT/KvBbsE++nHt+G/0NwnvzQzK+Na30J/hcTP+wFbpp6VPHg0gYLsW9vwTzW4+ZVL8pjeDPPXBX0nbDWYK0ZbKn2ZHyng+9wshbXVnmVZsMa6HijsqAROSqPP7dbZh7vfZnNIVMM6w9PNsnyH5V03Gc1y43ZhMki/M2ntmkIMJZyDYv5DGlu2qIDP55naxYnJ6b6KXli6uHyPkzNJW02VJdtlkLr2sOJuI/6CQCC47bXYIJJihxwrdvJENYxz0tvq/5P5KWncHJiG7mVU+sIkfOv7MSjp2ZF6W127grk/htahM8aJUEjRmV+xHrau3DLEaZ3UjwW5Fm8zHIr2/ADf11ri8xtmxGsp3wpVBbA7GDNy+HMsEsex0mQsfFG1FB80FBzyHgE9tnH8DOsCQXRd3bCtFBOv6gLN9Bpb35Vn/l0TyAPf2eIrF8Z96ZP1T4F+PtP78KlgEOPREwwwi0mVKSl8R4tVgo0vu+mWuTEyYdm7nPus7OYQLztWHXh4urfh3Ng4SMweDitZGM0Pso7dfL5vqinHsYVGtx+U02aNCstJ1/QXdJ6zPX7l/c2y6svZa+9SdvJ+67nnIFN2Xi07CPrx8zLQ2onYnnKlFnBk+XaUhHA7HRNe3hmkJYlGMm6cEWjFnXmw3OPizz/K8egUpVp9P4I5Nnd0jaz7zV53R+a5MkfN8m2NyoORVFeMT6HY/dy9MluQEDLYsYGLI9DrTtsknMmzwgeJL9cSsSrte3hBUDzewZyZijj9prtIo/9CDvjteWaV5ne2ImhTDkiLROmpWXkvqHw5wK4E96+FTvllSlZ80IK13wTCUHJhh54iMj7/hJPUGKdtilbZz+0BJC78f/hSTOCe0puOKaQmHfYeJ0JA3+AR0iHs1G+jPNn/50BOdbnYNFFYBjW5lM+mHm3pQUFa/RO/J/XNq2ykRu1Z5kk0tdWhnMA8o83vCJjH7kvc1IiiXYHchu8sf9UgMxbkDAweFHn7IOnl7/mxmOV2Ai2hl/6dXj0g9+X3+zYmlmTjT+Y5o8A3/Y37zx5A9+dT5owubzdcr7WE982HnZ88OxRx8psPFGAk4uDVEwE8FtR2/CmntOTBpd9Jz6CzaGVPw1bf/VbWb62XWYbbzDdOwKHvFOeOeEYee/09+tDB3sLVMipGsBm1/dvCL+yZoUswkWCqvdlfTZCircqhJiSr73wiuCqatrbL0Ff+t3wPRva5SF8XcL1l0Ead7C8NeEQWXDuxcHyakejXwCmE7xp4L6vy7dfeUk+ipeWJL72VztQSbSPVzSmpx4m3z3vn+TjxV7PrbTffgPYDP2fW8JZmzbJktfXyFTj/TmkB02WVeMnyFnnXhLgomj/Ub8DbK4tuSP82PrV8nW8iHS08QZiium448CDZdFfXRbcUQv/agawOXv/7eG/rG+XxXj+F+d1Bg7tN062T5wu1/31xcE1tfSq5gDT+fDqMHX/GPn0lj/JIjwLNLaWAam073ETZcuYcXLzBZ+qLbDmR10AbMYwffie8PwNq+Rq/M9slK9W+L2kcMI0+T12xl887W8CnKStH6o7gC00990SHoDbgr6w9U05H+v0ODxaUle24lUX4f4Hyxu48e6HY/aVq85Y2Pc9yuZbf6Z1FbR8jv9ySThl83r53OYNcvqbr8tEvMKoJl+zcHIifUCbvDZugjy070Fy3UnnBKvz2Vwv/IYA2A8W1+ulE2UBfgzygm1b5QSs2wfhMVK92cCXSyI/eox04bbY10eOlicxDd87eqMsmXd1aTe9JWFHJW00HMC5nH3ga+H4cKicjJv5jtvTKYd37pQpeJfHGNw/3Yon85vxzstm5FP4V3+bcJoQ7yJJ4ypOD26G6wZ4u3Ft9q2hw2V1yxB5Cde0n9mzXR69cHFQ5RuMcnmTLO//ARuOpjxgjP1GAAAAAElFTkSuQmCC);background-repeat:no-repeat;background-position:center;background-size:60px 60px;margin-left:20px}.voice_chat .voice_chat_dialog_wrapper .dialog_box .dialog_content li .dialog_content_dialogue_pp[data-v-1c392dd0]{height:50px;background:#F5F5F5;border-radius:25px;font-family:PingFangSC-Regular;font-size:14px;color:#000;line-height:50px;font-weight:400;padding:0 16px;box-sizing:border-box}.voice_chat .voice_chat_dialog_wrapper .dialog_box .dialog_content li .dialog_content_dialogue_user[data-v-1c392dd0]{height:50px;background:rgba(41,50,225,.9);border-radius:25px;font-family:PingFangSC-Regular;font-size:14px;color:#fff;line-height:50px;font-weight:400;padding:0 16px;box-sizing:border-box}.voice_chat .voice_chat_dialog_wrapper .dialog_box .move_dialogue[data-v-1c392dd0]{justify-content:flex-end}.voice_chat .voice_chat_dialog_wrapper .btn_end_dialog[data-v-1c392dd0]{width:124px;height:42px;line-height:42px;background:#FFFFFF;box-shadow:0 4px 16px #00000017;border-radius:21px;padding:0 24px;box-sizing:border-box;position:absolute;left:50%;bottom:40px;transform:translate(-50%);display:flex;justify-content:space-between;align-items:center;cursor:pointer}.voice_chat .voice_chat_dialog_wrapper .btn_end_dialog span[data-v-1c392dd0]{display:inline-block}.voice_chat .voice_chat_dialog_wrapper .btn_end_dialog span[data-v-1c392dd0]:nth-of-type(1){width:16px;height:16px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABmJLR0QA/wD/AP+gvaeTAAAByElEQVQ4jZWRMWhTURSGv3NeHDR9NoPZBKuCgw6irYv0XR5dupWKZBFxcBREBHFyEEFaKc52LlYoxU4iCA7pLZhBIrpata6CoMT34pC+exw0xaZJaf/tcP7/47/nCgNkaVrKQ1gv5/kZaTbbg3z6/9BybqqVJI//jSWDkV+VykGAzLmZLEkmBwKyJHkksKgiH/o2gvcmspQ5N7sDkDl3x0SuBTg/5P1CP0Ds/VIB5wyuZs7d2gL8npg4ZvDA4PKw9+uD3gpQ8X5DRS4ZzLTT9CiAFp3ODYHXh71/s1u4q/Lq6lvgRSiKmwBqqpMGizuc1WoHaMRFkfeuDJ6ayDRACbOTCh97TbK8XAAX+7WQovhMFJ0wEAUsqHb2Ur+rIorawCaAIvKFEE7tByBmp4GvAqYSwitEavsCiNTM7CWAborMYzaVp+nYXsLZ+PhZgZpF0RMArXi/YWb3QwgrP507vlv4R5qOmOqKiTwcrtc/AUh32XJuTuC6wd04zxek2dw6rI2OHsjL5SsGc8CzIe9vC9g2AEArSaZFdRazqkAD+GZwhL/f+R2ze/Ha2vNt9+itaSCtJLmgqmNmFotZW6OocahefycQev1/AL5Esl64D4xbAAAAAElFTkSuQmCC);background-repeat:no-repeat;background-position:center;background-size:16px 16px}.voice_chat .voice_chat_dialog_wrapper .btn_end_dialog span[data-v-1c392dd0]:nth-of-type(2){height:20px;font-family:PingFangSC-Regular;font-size:14px;color:#f33e3e;text-align:center;font-weight:400;line-height:20px;margin-left:4px}.audioFileIdentification[data-v-4bd99869]{width:1106px;height:270px;padding-top:40px;box-sizing:border-box;display:flex}.audioFileIdentification .public_recognition_speech[data-v-4bd99869]{width:295px;height:230px;padding-top:32px;box-sizing:border-box}.audioFileIdentification .public_recognition_speech .upload_img[data-v-4bd99869]{width:116px;height:116px;background:#2932E1;border-radius:50%;margin-left:98px;cursor:pointer;margin-bottom:20px;display:flex;justify-content:center;align-items:center}.audioFileIdentification .public_recognition_speech .upload_img .upload_img_back[data-v-4bd99869]{width:34.38px;height:30.82px;background:#2932E1;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAYAAAAeP4ixAAAABmJLR0QA/wD/AP+gvaeTAAABnElEQVRoge2Yz0pbQRSHf0dCNnbnVh/BRRdaxS6isXXRtZaCCxfiU7i0+AjFN/APon2AbvoKfYPqzoJKigURPxdOYHK5iUmdm3uN54PAOTeHmd8HkwxcyXEcx3Gc4gE+AW/LzvEsgBXgH3AJzJad578APgaJNi9PBviQkWhzBbwrO19fAMvATY5ELDNXds6eAEsZid9R/Seqr4H5svPmArwHWlHYA2Az6reA06j/CzTKzt0BsJCROAJqwEb0bAOoAycZmcUUGcZSLCKpIelNqA8lfTGzu+yQmd1K+izpe3g0LinJ76WWYhEz+wrcSZqRtJ4nEc3eAquS9iX9MrPdFBmSAoxl+o6j1Wv2uSRdzMzui5jth6QiZeIiVcNFqkbPewSYkrQd2h9mdlh8pI791yQ1Q7tjZmfdZp+6ECckbYX6Ro+39jCZj/b/JqmryOs4Wgk4lvQz1BdFblSoiJm1JLWK3KPNyBwtF6kaLlI1RkZkkL/fJrBXWJJ8+n5lNIjIdPhUkpE5WtbrS6AuaXJIWZ7iPLxOchzHcRynGw8cd1AmwUMUzAAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:34.38px 30.82px;cursor:pointer}.audioFileIdentification .public_recognition_speech .upload_img[data-v-4bd99869]:hover{opacity:.9}.audioFileIdentification .public_recognition_speech .speech_text[data-v-4bd99869]{height:22px;font-family:PingFangSC-Medium;font-size:16px;color:#000;font-weight:500;margin-left:124px;margin-bottom:10px}.audioFileIdentification .public_recognition_speech .speech_text_prompt[data-v-4bd99869]{height:20px;font-family:PingFangSC-Regular;font-size:14px;color:#999;font-weight:400;margin-left:84px}.audioFileIdentification .on_the_cross_speech[data-v-4bd99869]{width:295px;height:230px;padding-top:32px;box-sizing:border-box}.audioFileIdentification .on_the_cross_speech .on_the_upload_img[data-v-4bd99869]{width:116px;height:116px;background:#7278F5;border-radius:50%;margin-left:98px;cursor:pointer;margin-bottom:20px;display:flex;justify-content:center;align-items:center}.audioFileIdentification .on_the_cross_speech .on_the_upload_img .on_the_upload_img_back[data-v-4bd99869]{width:34.38px;height:30.82px;background:#7278F5;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAYAAAAeP4ixAAAABmJLR0QA/wD/AP+gvaeTAAABnElEQVRoge2Yz0pbQRSHf0dCNnbnVh/BRRdaxS6isXXRtZaCCxfiU7i0+AjFN/APon2AbvoKfYPqzoJKigURPxdOYHK5iUmdm3uN54PAOTeHmd8HkwxcyXEcx3Gc4gE+AW/LzvEsgBXgH3AJzJad578APgaJNi9PBviQkWhzBbwrO19fAMvATY5ELDNXds6eAEsZid9R/Seqr4H5svPmArwHWlHYA2Az6reA06j/CzTKzt0BsJCROAJqwEb0bAOoAycZmcUUGcZSLCKpIelNqA8lfTGzu+yQmd1K+izpe3g0LinJ76WWYhEz+wrcSZqRtJ4nEc3eAquS9iX9MrPdFBmSAoxl+o6j1Wv2uSRdzMzui5jth6QiZeIiVcNFqkbPewSYkrQd2h9mdlh8pI791yQ1Q7tjZmfdZp+6ECckbYX6Ro+39jCZj/b/JqmryOs4Wgk4lvQz1BdFblSoiJm1JLWK3KPNyBwtF6kaLlI1RkZkkL/fJrBXWJJ8+n5lNIjIdPhUkpE5WtbrS6AuaXJIWZ7iPLxOchzHcRynGw8cd1AmwUMUzAAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:34.38px 30.82px;cursor:pointer}.audioFileIdentification .on_the_cross_speech .on_the_speech_text[data-v-4bd99869]{height:22px;font-family:PingFangSC-Medium;font-size:16px;color:#000;font-weight:500;margin-left:124px;margin-bottom:10px;display:flex;align-items:center}.audioFileIdentification .on_the_cross_speech .on_the_speech_text .on_the_speech_loading[data-v-4bd99869]{display:inline-block;width:16px;height:16px;background:#7278F5;margin-right:8px}.audioFileIdentification .public_recognition_speech_start[data-v-4bd99869]{width:295px;height:230px;padding-top:32px;box-sizing:border-box;position:relative}.audioFileIdentification .public_recognition_speech_start .public_recognition_speech_content[data-v-4bd99869]{width:100%;position:absolute;top:40px;left:50%;transform:translate(-50%);display:flex;justify-content:center;align-items:center}.audioFileIdentification .public_recognition_speech_start .public_recognition_speech_content .public_recognition_speech_title[data-v-4bd99869]{height:22px;font-family:PingFangSC-Regular;font-size:16px;color:#000;font-weight:400}.audioFileIdentification .public_recognition_speech_start .public_recognition_speech_content .public_recognition_speech_again[data-v-4bd99869]{height:22px;font-family:PingFangSC-Regular;font-size:16px;color:#2932e1;font-weight:400;margin-left:30px;cursor:pointer}.audioFileIdentification .public_recognition_speech_start .public_recognition_speech_content .public_recognition_speech_play[data-v-4bd99869]{height:22px;font-family:PingFangSC-Regular;font-size:16px;color:#2932e1;font-weight:400;margin-left:20px;cursor:pointer}.audioFileIdentification .public_recognition_speech_start .speech_promp[data-v-4bd99869]{position:absolute;top:112px;left:50%;transform:translate(-50%);width:142px;height:44px;background:#2932E1;border-radius:22px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;text-align:center;line-height:44px;font-weight:500;cursor:pointer}.audioFileIdentification .public_recognition_speech_identify[data-v-4bd99869]{width:295px;height:230px;padding-top:32px;box-sizing:border-box;position:relative}.audioFileIdentification .public_recognition_speech_identify .public_recognition_speech_identify_box[data-v-4bd99869]{width:143px;height:44px;background:#7278F5;border-radius:22px;position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);display:flex;justify-content:center;align-items:center;cursor:pointer}.audioFileIdentification .public_recognition_speech_identify .public_recognition_speech_identify_box .public_recognition_speech_identify_back_img[data-v-4bd99869]{width:16px;height:16px}.audioFileIdentification .public_recognition_speech_identify .public_recognition_speech_identify_box .public_recognition__identify_the_promp[data-v-4bd99869]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;font-weight:500;margin-left:12px}.audioFileIdentification .public_recognition_speech_identify_ahain[data-v-4bd99869]{width:295px;height:230px;padding-top:32px;box-sizing:border-box;position:relative;cursor:pointer}.audioFileIdentification .public_recognition_speech_identify_ahain .public_recognition_speech_identify_box_btn[data-v-4bd99869]{width:143px;height:44px;background:#2932E1;border-radius:22px;position:absolute;top:50%;left:50%;transform:translate(-50%,-50%);display:flex;justify-content:center;align-items:center;cursor:pointer}.audioFileIdentification .public_recognition_speech_identify_ahain .public_recognition_speech_identify_box_btn .public_recognition__identify_the_btn[data-v-4bd99869]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;font-weight:500}.audioFileIdentification .public_recognition_point_to[data-v-4bd99869]{width:47px;height:67px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFIAAABwCAYAAACNSCemAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAUqADAAQAAAABAAAAcAAAAAAkHfOMAAAU9UlEQVR4Ae2dW4xkx1nH+5zTvbMXe1nbO9Mzu2vSD3YUZRMUgmISiIQhJBIPIeIh4iIHlJjbQ0SkiBekXAwCxAMWiiwURAARhUSJBIag+AGwklUU2RFyjLETOSEImyyemZ7ddfbqvcx0N7//v6pO95mM7ZnZ6Znu6and6TqnLudU/c+/vu+rr+p013q9Xl7bCzeNQH7q1KkcMBt7gN4klgBYPPfcc/uXlpZu+Vavt+8mLzex1SvD+tj58wfPnTt3eI+dG+eDhnbWarV6qnrx4sXs+vXr+2Dn9OnTpw9s/HKTWyPT0Kb7jTNnztQbjUb92rVrjSKEBqBenZubezHLsu7kQrS+npuRzz//fFn60KFDPYDLALQGnofPnz//w/Pz8wfLAnsHayJgRgJkAwDrV65cqe/fv79RJ9y4caMOkPU8z+udTkcsfXF6eroNxhYDa15tghMrygbQMmHBcO8BaE3MTNiQNIMiet0eOxMi1bgCZLfbNdsEoIa2AB0sDksPkPWGs2fPHierBHmwzKQeV4A8cuRITTJSYIiRCoA3CJgwlmZvoZzetKfZDZE/JCPrHBUA00gyUkp7eXlZclFysgF2heSk5OXKyorkZgP2Uq+ed7s3/vv48eOn+5eczCNp7RrKxqwTIxVMOzGPPw1vQHV+iBslUnneLShycmFh4R7NjsqMCTzI77333lIOyiAXcClMcaChvW9fmjk2MI1WMlhZQ7FbhsJYmUsziIJ3oIxOTCCG7nJFRh4+fNg2JMBkg1obMF24EckoEBU6najVi6LWY7gz/N/abrffBv4JeZebhA8b5KmjGN8+FCOT1oaNZmzEstTkYiVk7AFeBqI1TY86/FG1xVB/N4roeLruJMQa2mU/B2XkICNVII1usZVTwK1blpbsVCGBSmC4H6xPTb0Ldv7kE0880Reqytylwcpm0GmhfqK9a4mRq/sttpIGYCtmp5ipIEamAEEFqv6fPHHixHtxgsymvN0aV6eIOC2mrl6V86KBXIRs9QaaujJVRPMUeTSFGNZ1CFrHyY6J1K13Oc4wi0jDPMowk3oNvB11biLt/uTMzMzXiQXzrgsVZSPjh45mYqRCb2qql2SkE0hCZVtri4cqaxmJdIyyUkObNJUmjU9uAIM99fzxxcUz70N+Tit3twUrm8GhraHL7MYgMb7L/sqGhKg+t1xkSMdh7jSZQbFwDyUe6oMkhjvgdoklDjpNsL9fmv2BBx6oPMTyRmN64KFN29f0R8Zh3dBsZmUlxzMUPEEa0prdaKZD3ULHyxrS3a5mQXXNepSmPOypBjcJwz4Mc9aHEAFZ7wWu//Cdd9754phiV2m2Gfly/kgN3cS6pLVdW3ZkHdFIkC0JsEFjw0oNZ9gJW/MeMjMLQxuGiplmJwVIRHa2YPaHFs6evafSojE9MSPX448U+wDM824YJzb6WAwEz8hEMa3PyqSEmKrDyF5iLNYnLM20ckndGnP2LPvOVKPxhTvuuOPimOIobvQD4EhZWPattiOVjoz0zMcyUoqHYCVDXog7VslmKKyUrOSayE4EpGWkrlLL8nDXnu6uG+a1/OSNGysfwdf5ZpcYw48KkDDNCkMArWVHItPcxWQ76iQpGccMbdmTqm870qX1kTOFtPb29VE8TtNnCCTktYN5Xvzm4uLir4+j87gC5Hr8kanrxJafZqKop4DAtMkDCwWmdh4Ebd2NBrtnRaFsDQlKvk5UJpJU6xhvy+v1PwLQN8SCYxGpL9a8m/FHdruWk2hojHFr7DqGd9e+SnAMhnpRkwxVRiwDbZGPmeQjMSg5n3PkpvAOspNj0rNHL1++8Lm77777+qij6SniZv2RwQkUp4ga1pGY0tpipGCBad5cBNCIw0IWZWRfV3lW344Z+l3ErmuQEI57P3fwllseZIp598gDuRX+yNTJoHAwgTCHZP6Eod3VojgKBqVDgkALMlIWESdBdirRACMMXCBHckh4FFl+rNPt/ily830wOz6qdMfRiU2G1Jyb8UdG+xERhwfIjJQVKZmIToaVZmTVjgy3Jc15AhqpGdR4Hhjp+i7GUfEryM2H5ue//5rU3lGKbZCnBm2FP9LXMiPhD0GD1LSzGRTZaPZJISkfoMVKmUecZYKdKmYk9RniPAYnIzIzhvj1T72wuPiLZMlyGpmQD8kfKbsoyMlVXR20IaXahZtg479lpBDVqWUk+QDKdWSLhoCMkKL64OLi0p+jIOdS+k7HVjatgU1UatAW+iN/wCCXXEReBqKKrPyhgIynQVMDjCXD3Hk905XjwEDJUJCEpm9eXul+nuH+HlXZ6SDzpyiniMPzR9pvKRMJFso/6Rg4mS5qelnIFCLdplADr1s4zmoNELN5xEhuMO9iStnjD1OJmEeCy0mmVPdrnc6hj584cfjcTgFKX/phiP7IeBM0uY+Io9Fe3h3GheFMCqRzECMlRqWGrN0lMEXIEAdmOvuni+KlR+bb7XeFitv/aWUzOLRhyBD9kbIlCUKTMR7MIGloEAMogyyoOAh2pA6jjExAu7IuAq8j4Eha4M5uA+BPzi8sPIjsvFUltjPkKBs13R1M5o/AnMI7roYg752n0ZgaFufayso0x5YBLvMnhuBW4wTSYTsGDoKSjsM1AgBR0fjE99CRqFcyUlqdnKiA4v0jevKZpDv2GasrvHd5pfNljPi3p+ztiM3IHfJHilMGGgPI5k9uewigwNLmDw9KQzuaRAE3QxugEbJ6TIphZ+Alh1Q7vtLpPjy/uPjH27UDJDHSLZMdicbuP2h1dCBouUFsrWnlkLUbZUFIl7H3B1r2px6c8D9MDTVuSxMmWTiMR5DTEOcfxE2ST9Z7aIMxFT6cKz+EGDPkOZedCXAhTXGfp4L0t6f2H/was6Ifi3WHFoVxFy+PAnAHBNY2+yMlL7l3MLJBzTal6CagmGCasW6mS7kcbFaKgC6HuXKV6ISM7oDra7O8OLWw0P4Y5eSgGUqoAAl73Aqe8E74I4VYnK5o3g27ojj00xWplQZ6lIK9FE35YqIZrWQTlSpSRu6OgJYX/iOLi+3H2Nv5umEgWQFyRPyRYBUZSY+BbbDfnCT0SJYyUgBUy0joRwE9DKW7otjqMkrOsregiJ5kVvQ7/fSQe7OfAjJrtVpuUJKR3MR7f3RxtLeXGHSspQaFtNSghS/sZTl041JD8JAH7w+EwG+h4Re7zhWk2xmmfnyodJhvrQ6bwtCOjKRCYB/FjQoJwfwJEKm+WswFfClGN2cKlpHOihirhU6PHzT/ALz9xGJ76VF2z93p3C348BRxdP2R4BcBUV8TOwFD7IoPRDmcWhnBTDNQjygSUYIgHqcU1aDcz8DOZ1hj/1Wd32zIox3p62x2f2RqxDD8kRE8UDOvLQEtI6GjDHIpI4HnoS1AzdVgXKR2BXANHvhbOTlGJfxQp9v79GK7/fClS5duageIR0a6YTLI9TgHtfZ69kdq0NoEGo4/EgpGH2VQJm5yYJjBS4yEnPFccewd3YlDP3IyMTSeQvpfuHzlyjdxgPx8wmKjsQ3yVGnU/ZGxnWFgM2Q9vJUIUGYkcJmdJBHLSHeIjGSIR1amfMoEhqpYNoOS/yKK6K83M8XU0NZVHLZwf6ToqUbGZ57ugJwLLLF8s/DyOR9SOBquGsKc6lgX0NDmMykT+kumR7PyhU2Qi8KIgvqvsr6z7MjVQRkCWbELuXAEmDTu+wFmRc8A5k+trvtK51Y2rVbLDZWMVOHx8EfSeTBPaNIBt10Y0QWdlnYkgKl//kggKkHHCilNx67c670GML+y0G4/SN66XjIQI3tJa3fZQ66g3Wi+KC3QZnyf8KEpIjaQT6MJFOViuXzo9kFGQthp4b5yBhNDq8U2rdNoXZu+inta1emPQhVWfQIx828fqrIVj0/50H8u4HoBPCGisvqQtoHIZl+4gBKVW/nsg6iq1ClB5VTc/jBm0jdg56vuAElN9i3G1x8Z4ekP14AoQs9PS1qdHgooPRfFCuTpIx47wWnOj2U4fj2a/euw86Mc910JrtX/sLJpDQxtCo+pPzJ0iuZzIOkbAEpd1VmZRp7KqajSfaz8eKxyZVkREx8i7PyDdnvpMdj5Wor9QMg1tEn1XZP5Q8Ux9keqLyZg7KxBNboCzIE4gJVYSpWYp/RUTrGeR6ysycE9yM6nFhaWPkieMYs3qZmRu80fSeckI2MfdehOC+CQJnQIKpOYlwBUNR+Tn9JUOpRzvQMIiofaS0v/OviCVmKkrltLc22f8KEWpGPFY+ePHGw83TG71KXYrQSUupk6GjEuy6RLBNCNifOo8rOaYrJOdJ/KVPxzq/2RLCnERxguJ6eFpoHS2HK50RDvj9TyqtJ9zlMnsUYhLRFS0RtXWNkqm+j9kW47qs77WdDH4XJhuCR/pLSuPeT2R8pu5HL80W0uxvCVqiSZTupcjXQukUGDRu407VJDwkdsRgBGdajpqjrkWkQCmjo6cnDlmJfSVZ9whAqfWWif+YmK1t7F/khQAcfQeQE/AJQBcZrTQUdgRaAMpCD1+Rr1eUZfOHRg6mMVIHe7PzKxyegkcAKTDV4AMOSm434dMbfPUo7ngfs9c7Ozv4SSPisgs1ar5ceSZCTo72Z/ZGCcOg4wwiaxT3E6d1rErUynjo7zLP+bfY3662dnZ/+ZJAdPEdPMZnCuzU1e9X3t4e+PZN4Th5Nau5X+SBAJCOgT9AJwAbkAcJC6To8lYd1zRZ69c3Z25v7bb7/9Qv8CtC3akU6bNH+ksDTb6L3BI9a5/+Kx0lGmLGNmD9VqM29sNpuPGqxVHxWtLYOcTfjUDf7IpLXlj5Rm1jQbheyX3nUdKWqzUhsEWCvgpUWWDOL+SL9lo9m0PIladvASaxhLPFqtS/n5a94t/V/Yw6OukYEWlyYmBDUq3Rz8kaiMOOBCfZ26kObyQoYqLkDsAydZYdPiWFV5OiNKxEzM0y3L4OPs21z//uZs87EyfY0DG+QpfZL9kYMABlCzFZ7Mn8w2p98EC18RROGnoa3YYVBGDnrIlZne/DJLTJtXfF97bP2REFij76kir91zbHb29+jvul4EsLJptYLW3vNH1q6j0D7anJl+C69E/4fZtc4PK5uktSfZHwlej/MSy4+ikf8QFrInZ2NBdmQZJtQf+RKK7MOzzZm3Hzt69NkSjA0eWNkMDm0E7cT4I8Hqy0U29cbZ2ek/g4XRSb9BBGNxD22OrfMnxR9JZy8UefFbc7PNdzSbR/5nc9BVa5mRk+SPxMr8UqNenGw2p/+yCsXNnSVG+ipprp0uqWGejhWPuT/yLF91cB/K5N1Hjx59YbBfW3FcUTbYTx7iAnC1HambDfF97aHtjzRIWfYPrN3+CCz87FaAttY1KlPE1f5IbUFBCJf1YKSnitpDrimjgsqIuIrx3fYKpoqupB1qlAnLbszftPmsq2kgWwAQ62EfWSI8Cbn8hThkNdPzi59UkPjn9mqB9qDzrhh3YnDqhvbohkzaqGSqqW6aNlOq221n9eJ3j01PP6K2DjNUGLmb/JHA/fn9U/veOrcNIOoBCcgsmT9JRopi+gYBhXHbH8k6xv8hoX4ZWfih1a4ud2hIH54ippnN4Fyb4TJe/khtuuh1/67bWXkn07uvDgmvl71sRWuPsT/yf5GH9/Gd6Q8cO3bspZft7RAzKspm3PyR2Bh4R/PPNGen/0IDaIg4veqlbZCnUmPmj/xuvV78GrLwEzsNovDT0FbsMCgjV9uRo+KPxLRawUj6q2Zz5v0Y1t9Obd/p2Momae1R90diNT6LTvmNubmZv4WFG3Z1DRNs2bYj/742EwW9Y/e5uZmZfwJAmekjFyrKRv5ILX1pxy7TwfD9kcwa9N5MDOH7IznPGetsAo1bVnoshGnjKNMgp2l6oe+P9MyGaYjMVc1i9L62Zh96X5sBymTHkxRdXO/RaKeJpjFxRqOYDanf5McjPsnvRCyo2KgGK5vBoS1jfHjfHxlf1hSukExTRc0J9FInESATjLei3jVmmJ9iJ8PHRx1ENbuOslHTxYNaMn8Ept7X1tzaLCNv9fvacnAoaI6teTffHxkuwrXEYKXpfe2eN88DC0DhieaA2wXGaYXVgIq/ur+S2cWgafN/Lq+sfHruxIkd+2oFtWcjoX4q/MKSfqPB9Yh7/BaDX+pk15mGnoAuVxFdSIvZQqnTwQ5mjOpbI7VuzXdKxvVtneo7lDKcFr6Avz9SD0yYAaDWtYUzT4GYslrczrPLgPj3LH8+7vuM0UceGekmp7l2an8CMZ0P0x+JKH7y8sWLvz+OIAqfirLRcFWiAJQdmXZaKE1hSPsjLyEk/hEAnw53Gc/PCpDb7o/Mat/gYX2J79m9Op7w9VtdAVIzGykPZKR/z0bKRu/ZkJZqmLHxxPIT8LNcm4KkgtkMpDeP+R5EkZrRGkSfTD/eA0OZSEDig816F5aXbzwCgN9NFx73WH0VmJv6PZvNfH9knu97ambm9q+g8MMvaYw7grH92+aP5IGd58vhP9ts3vEvuw1EYSlGllPEq1evok8aBWZPnWHtX1hChjUOHDjgX1bqdHJGbUc/t1IHFH0LtL+qi2vEb33WNz7XWbZZVqxvgA5fx1UUz+Dy188LjNT8eCsHQ0VGJoOcDvt97aS1N7s/Eivg+8jQrzJHXtrKRo/itYbij0Q1dQHxSaZ2D7PPeteDqAdrZcOaTZ0ZTfmTpxraAKHhrCFb/nCa0hjOTpOiYUewjv2TArgs/IXtlLnAsP/322677fwoMmdYbdoyfyQGD6O4+zQLT/82aSAmRpbK5somvz+SCdJFTJqnYWf4/ZZhPfYRvm5F2WzUH8lcUix8Fo/1aUC0c2OE+zrUpm3aH4mv7EU2ETzO8uf3Jh1EPSHbkcTr/j0b7Ev0Sf69SdHG66VxgRtNE2lWDvblzKlzHLr8pFenYL6tc+UZbMxrTaAvAuB/3XrrrZfWe4NJKVeZ2bBWs+bva4uFzG4Wt3Mvzbg9gIqyYciu5Y+8hBw8gxwsV8DGrZPb0d4KkGhga15A00piB7v83E7tpdmOzm/lPSpAJn8kM5qX8BVeAk+tR+2FdSDgKSLl7I+8cOFCjoPi2smTJ3eVr3AdONx0EWltLYoWuNA6d91111WmeHuycDOwyh/Jn9fmN1N/r05EABAH12H2cNlDYGcR+H83Z8q9vSzjfQAAAABJRU5ErkJggg==) no-repeat;background-position:center;background-size:47px 67px;margin-top:91px;margin-right:67px}.audioFileIdentification .public_recognition_result[data-v-4bd99869]{width:680px;height:230px;background:#FAFAFA;padding:40px 50px 0}.audioFileIdentification .public_recognition_result div[data-v-4bd99869]:nth-of-type(1){height:26px;font-family:PingFangSC-Medium;font-size:16px;color:#666;line-height:26px;font-weight:500;margin-bottom:20px}.audioFileIdentification .public_recognition_result div[data-v-4bd99869]:nth-of-type(2){height:26px;font-family:PingFangSC-Medium;font-size:16px;color:#666;line-height:26px;font-weight:500}.server_input[data-v-1bbbdb64]{width:600px}.server_input .serverAddress[data-v-1bbbdb64]{width:400px}.server_input .serverConnect[data-v-1bbbdb64]{margin-left:20px}.realTime[data-v-1bbbdb64]{width:1106px;height:270px;padding-top:40px;box-sizing:border-box;display:flex}.realTime .public_recognition_speech[data-v-1bbbdb64]{width:295px;height:230px;padding-top:0;box-sizing:border-box}.realTime .public_recognition_speech .endToEndIdentification_start_recorder_img[data-v-1bbbdb64]{width:116px;height:116px;background:#2932E1;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHQAAAB0CAYAAABUmhYnAAAABmJLR0QA/wD/AP+gvaeTAAAQMElEQVR4nO2de3xU5ZnHv8+ZyYUkJIGEgNwSLpKEKJgMwmqpBlArXW3tVv1Q78qnunbFC9XWlbqrtVpFilY+2lqrYFFWrVK3VUHFLO3qtkguiIaEi5gJF5UQLrmRkJnz7B+TBEJIyFzPGZjvXzNz3ssv+c175r0873uEKKagoDJeEpLHeSEPlQkqmivIKJAU0GSBZIVBQHJHlmaB/QrNIM2gTSC1qG5BdIsDqrWt+fPKyoLDVv5dwSBWC/CHCZN3jHAY5kwVnSHIdGAM4AxxNR5Et6vykYGUeFVKNleM3h3iOsKGrQ11uUrjWs2s2aahs1FmALkWSdmMaomKrEqWutVlZVPaLdJxQmxp6MTJ7gJ1cq0qNwpkWa3nGPYL/FGE5ZVl2R9aLeZYbGPoWNfnaQnqvAW4Aci3WE5/qQKWtYnn2e1l4w5aLQZsYOgE1+5Mp7bfpnA7vg5MNNKoylIx9NGqspwvrRRimaFnTNs+1Otx3IUyD0iySkeIaVbleY/X8fi2jSN3WiEg4oa6XKVxLQz5EcrDHBlOnGwcEljYkmo+WrN2TGskK46oobmumhmGytNEz29ksGwzMOZVlo9aHakKI2LohMk7Rjgc5mLgykjUZ0NeNVXmR2I8G3ZD84rclwgsAzLCXZfNOaiqc6srct4IZyVhM7S4WJ17Gmp/pnA/YISrnihDVVniONx0T7imF8Ni6Bln7xrl9XpeAc4NR/nRjsJ608GcLeuzt4e67JAbml9Yex7onxAGh7rskwpln4hctql89P+GstiQ3goLCmu/g+jqmJn9QBis6PsFhe7LQ1lsyAzNc7mvN0XfAAaEqsxTgARTeCWvyH1zqAoMiaH5Lvcdoiwl9EtZpwIOgd9OLHI/EIrCgjY0v6h2AcqT2GBeOIoRhf+cWFR7X9AFBZM5v9B9HcKyYMuJ0YUienNVWc7vAy0gYCPyityXCqwkdpsNNV5Fr6wuz1kZSOaADM0rrDlHRNZw8qyS2I1DgnwrkCGN34bmF27PRhwVRO/aZbRQb4inqLJsXK0/mfzqFLlcpXGIYwUxMyNBhqmO1woKKuP9yeSXoYfIfJTYdF4EkWlmQspDfuXob8L8KTXfxpS3/MkTIySooXJZZcXoP/cncb/M6YiH3Rib0rOMvYhO6k+8Ur9uuQ6HuThmpqVkimks6k/CE7bQ/CnuCzF5L3hNMYJFlAs2VWR/0Geavi4WFFTGmwkpG7EuYj1Gd7a2Nxw+c9u209t6S9DnLdebmHIvMTPtxOnOtPj5fSXotYV2BHZtJbYcZjdaEB3fWwep1xbqcJh3EzPTjiSpGr220uO20LypOzPE460BUsKlKkZQNHslLmdL2fC9x144fgv1eOYTM9POJDvN9nnHu9CjhXbsAqsB0sOtKlykpxlcODOJ4vMGkD3KSUaGg9ZWZU+dl49LW3m/pIWNn0XtJu1ODiaYZs6GDWMOHP1hD0Pzi9w/AR6LmKwQEh8v3HjtQOZen0pKct9zJv+3rpWFi/ezZZtt9+6eEBG9Z1NZTrcJh56GutyfoRRETlZoGDzYwVOLMimanND12c5dHso3tFG/z0tiosHYMU5cZyXgdPr+7NZW5T8e2sdbq5utkh0sm6rKs7t51c3Qgik7ppqmuS6ymoInOclgxbKhnD4uDoBPPm3jV08doLS85/g7LdVg7vWpXHfVQOLjBVX48b/vZfX7LZGWHRpEXVVlOeWdb7vdl9TUayOvKHgefmBwl5krXmvkmrlfH9dMgIMNJouXHOCmf93D/gMmIvDIgxmMGxsXScmhQ6WbZ12GulylcYrOibyi4Jg2JZGLZvkiYd55t4WHF+7H6z1xvvJP2pj34zra25XEBOGnd0Xnmr3CVS5Xade3scvQVjNrNpBpiaoguGVuKuBreQ89ug/V/uct39DGy682ATD93ERyJ/gVHGALBLJayLyo832Xoaahs62RFDjpaQZnuxIB+OPKJg42mH6X8fyLDXg7sl18QXTGvIkaF3e+Puo3VGdaISYYJp+ZgMPhe13y10MBlVG/z8vGT9s6you+FgqgR3lnAOQW1g5HZYJ1kgIja4ij6/UXNYGPJ2tqPQCMGB61IcYTcwtrh0OHoQY6y1o9gZGWduQG09jk/+22k4MHfXkHDozefclimMXQYaiKzrBUTYCEKlpN8aMnZVPElBnQYagI37BWToxgEWE6gFFQUBmPylirBcUIDoXxLldpnEF8ynhiG45OBpxNMnisYYrGYoZOEpxeZ66hIjFDTxJMMXMNQ2NRfScNYuQaqoywWkeMEKGabSAy0GodMUJGhoGorQ2dc3kKTy7MZGiW48SJw8SFM5N4evEQJoy3+5qpZBiovaP77r5jEBfNSuKyS607Wvf2W9OYcf4ArrrS1t99QAcaKLZW2dTsm2cdadHEuWEcmbTv1GJjEgzE3i20dodvJWTMmJ63O4/nyOv4+MBnduM7gsba2nrO6Q4b6iQx0Xe9dqenx3WbkWD75YVN1b742Ym58T1M+/LrI//go5fS/GV4Rwusq+sZuzLpjCNrpFXV9o/lNVCarBbRF+tKfUe2JyZKjwXoL9xHDJ3aEbngL06nUNgR+vn5Fz3XVM+Z5iu3sdFkU5XtDW0zEBqtVtEXpWVtXUFf50ztbtqWrYfZuctn6r98N7BO06ziAaR3rKv+z996Rj101rm+rK0rVMXGtBmIvVtoY5PJJ5/5QkQumZ2McXTQjMLKP/uCpM+alMAlF/tnalKSMH+eb8fHnjovf/uwu6FFZyUwcoTvdvzh3wMLcYkwDQYqtm6hAG+86fvOjRzh5Jvndt/h+OLLDXy9x9eEH/zZYM4s6F9cUFyc8PjDmYwa6TNsyW8P0npMp2jOFb7+Ymur8s57URGIXW+gantDV73b0hXRd/Wc7qOsQ4eUe++vx+NRBgwQXvzdUL73ne4t+VhGjXSy9NksZpzn+3K890ELK/+7+40qa4iDi2b6ogDfXt1MQwARhRFHqXdkDb9rJnCW1Vr6wuOFQekGhZMTGD3KyYaNbew4agixa7eHHbu8FH9zAAkJwqziJGYVDyApycA0FREYOtTJ2UWJ/PDGVB64b3DX2HLd+lbm37uX9mP6Qwt+OpiC/HhU4f6H9rG3vh/R21ZjyFrJc7nvFeWXVms5EelpBqvfHE5qqsH2mnauuOYrDh3qfossOiuBhb/IYPhpJ56E8HrhpVcaWfTrnpH2U12JLH02CxF4e3UL9yzosa/WpuhPHEOGz88EbL8ForVNaTmknDd9AIPSHQzLcvLB2u4dlS+/8vLK600cOGgyZIiTzIyeY9PGRpN317Rwz4J63lrV3CPSPmOwg+d/k0VyskFTs8ntd9fR1BwdQWSKLJEzimryvcgmq8X0BxF45skhnD/d99v33LIGnlhyoNf0pw1zkpPtZEimg8Ymk7o6L9Vb2vF4jm/QoHSDF36T1bUl4p4Fe3l7dVR0hgBQryNXOs4iaiZK4orS0wxeemEoY3N8U4Gvvt7Eo7/aT9vh4FrR2Jw4nlyYyfiOXWwvvtzIY4v3B603grQnSV2yAOS7ajZHU+R81hAHf3huKKNH+b6D1ZsP88ii/b1uIeyL+HhhzuUp3H5rOklJvqnF11Y28eAj/m18shqB6k3l2fkCkFdU84IgN1otyh/S0wwW/TKTc6cdmT36x8etvPlWMyVrD51wZSQn28m3L0rm+5clc9ow3xfDa8ITSw6wdHlDVJkJgPL7qorsH/paaKH7WoQ/WK3JXxwG/ODKgdx2SxqpqUcGnl4Tamraqd7SzoEDXhqaTOKcQupAg9OGOcjPi+/RYaqsOszDC/ezYaP/rdwOqOjV1WU5KwR8m5UM0V1WiwqUQekGV34/hSu+l9KvIcvRfFzWyorXmlhT0oIZBXMHvaCIjqgqy/myaz0qv8hdTZSf62cYkJ8bz1RXIgUT4xkx3MHYnLiuTUi7dnvYucuDe4eHik/a+Pu6VvYcZ8ksCuk6POPI11m1hCiP0TVN362z8qhlrptvSuXOf/NNwF94adifx2oJgpR0vu764VGRVdbIiREsaphd3nUZOix19CqUr62RFCNQFPYk6d73O993Gbp2rXjU4DVrZMUIFEFfLiub0rW00K1L6PDKctPQ4x4KaCdmFQ/wrYv2Iy5sYu6R9dEHFvTv2PzWVuXlVxrZscv2QWGIwfJu749NkF/k/hQ4I2KK/MTpFMo/Gtl1vFu4eOfdFu6+z/arLD2OhjveoG05Nj680eNR/vSXZv5pamBBYf3B9MK7a+w/KS+iS3t8duwH46dtTY1rj3cTxcerniIc93jVHoEa29ad3qDoM5HTFSMQRPn1sWZCbydaO52Lwd7RgKc4zR4jbsnxLhzX0OqPR9aDPhdeTTGC4JnjnTcPfTwVwlRjERAVwainGM2Gw1zc28VeDd1cMXo3orYPHjvVUOEXlevHfNXb9T43KxmtzY8JVIdeVoyAEN3iOXj4ib6S9GloZWXBYVVsP3N0qmAot/b13DPox+Mmqyqy10BsjtdqRGVFZXlOyYnS9Wt/qKlyF1AftKoYgbJXDfPu/iTsl6GbK0bvxtDr4CQ4tjL6UENlbn+e7gt+PFS9qjTnHVHptbscI0wIC/v7/G3ww1CArLRR94rykf+qYgSGrkui7n5/cvi9BpVfuD0bHOWxZ3KHnXqHw1n42foRO/zJ5PehGVUVY92I+c9A1D5fKgo4ZAiX+WsmBGAoQFX5mH8o/ACw/5J+9OFV9JrKsuwPA8kc8LE21eXZf0G5iVjPN5QoqrdUl+esDLSAoM4pqqrIXi7Iz4IpI8YRVFhQVZHzfDBlhCQwJ9/lvgPliVCVdwqiAj/fVJ79QLAFhcyA/EL3dQjPEyX7TG2EV+HW6vLskKw/h7RF5RW5LxV4FRhwwsQxANpU9erqipw3QlVgyG+RBS73dFN5E8gIddknGfWmwXc3l2aHdKIm5Ic3VpZlf+hwOAtjM0p9oHxsevTsUJsJYTAU4LP1I3ZkpY0uFngQiN5dl6FHVXkqyaibvnljzhfhqCDsvdL8QvcFwEsIQ8Ndl83Zi6HXV5XmvBPOSsJ+Xm5VRfYaDC1E9L/CXZddEZUViE4Kt5kQ4XFjXuGO88Uwn0YpOHHqk4KtKnJbddno9yJVYURPtK6uGPXXJOoKEe7k5A7kbhF4sL3h8JmRNBMsnNmZNGlblscZ9yOFO4E0q3SEmCZVXlDksc0Voy3Z/2/5VF3e1J0Zhsc7T5V5UbzG2ojwjDocj/t2HViH5YZ2Mn7a1tR4T9zNitwQNb+xQqWgyw4723+3bd3pDVbLARsZejQTJ7sL1Mm1mNxgu+GOsk+E10VYHuiaZTixpaGdFBer8+tG97dQYzboTCDfIimbBClRw1w1NCX7vbVrxbYL+7Y29FjyXTWniTJTxZghqt9QGAeE+oFk7QKfq8hHilniMLSkr70kdiOqDD0Wl6s0rkkGj3V4nHlqaC4iEzB1NAZDUDKAFHyGdz49qgloB5oQ6jGpw5Ba1NwsprHF6/RUp+i+7UefKhJt/D+IYnHePFNWNgAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:116px 116px;margin-left:98px;cursor:pointer;margin-bottom:20px}.realTime .public_recognition_speech .endToEndIdentification_start_recorder_img[data-v-1bbbdb64]:hover{background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHQAAAB0CAYAAABUmhYnAAAABmJLR0QA/wD/AP+gvaeTAAAP8UlEQVR4nO2de3hU5Z3HP79zZjLJTC6EIFRYQO4gaOuNeqsVdLXU6urzaJddvFW3cb2gJnENSPvstKsmRAm2VNfauvIs1npF65WqxaqAt/WGV1BAUMFaEiDJTJKZOee3f5zcE0LmlpmJ83mePJPJnPO+v8n3/N7zvr/3955XyGD8fs3ZG2yaJIY5XbCniso0hLEK+YCv7ae47RUgAOxpew0INCnsAN2sGJvVtj4e5s3f4vdLKDXfKH4k1QZEQ9mS4BjDtudiyxxb9ERBJgCuBFcTAd0qKusVXetSWVtT49uZ4DqSRloLWlqq7vyS4DxR5oHMUXRaKuwQZJOKrlWbZwJ7vGvuukvCqbBjIKSloGWVjTNFzAtAfwKMTLU9Pdij8JChxqplS/PWpdqYnqSNoJWV9UURI+cyVbkYmJFqewbIRyK60mWHfrt06fB9qTYG0kDQioqGEeo2rwKuxunAZCDSCPY9tiHVt93s25VSS1JV8TU3NI0ybaMMdCHgTZUdCSYAerfLNm6pqfF+kQoDBl3Q0lJ1FxQ3X6GiN9E5nBhqNAM1hbnear9fWgaz4kEVtPyGxjnYxu1kzj0yXj5VdOHy6vw1g1XhoAjaNn6sVZUfD0Z96Yc+4LKlfDDGs0kXtHxx4EeirFQoSXZdac4+kEtrq72PJLOSpAnq96uroSX4M+DngJGsejIMBV1RmOv7j2SFF5MiaPmS4FgsvR84PhnlDwHeUMz5y6tztya64IQLWlHZfJIa9qMowxNd9pBCqFeMs5dX5b2cyGIT2hSWLw6cpWKvyYo5AJThovZzFYuD5yay2IQJWra46SKUR4C8RJX5DcCjqvdXLAqUJqrAhAhaURm8RlTuIfFTWd8ETIU7yxcF/IkoLG5ByxcFlqjobaRBXDiDEeA/KyoDNySioJgpr2y6EJGV8ZaTpQMVKF1W7ft9rAXELERZZeBMEVaTbWYTjSUiP15W5V0dy8kxCXrt4uBxhurzDJ1ZknSjWcU4PZYhTdSCVixpHq+W/TYZO3eZGQjURdQ48ldL83ZEc15UnaLSUnWrZd9HVsyko1BiiP2g36850ZwXlaD5xYFqsuG8QUPguw3Nzf8V5TkDo3xx4IcoT0ZzTpaEoAhn11b5Hh/IwQMSp2xJcIzYujEb0ksZu22DwweSrzSgJtew7dqsmCllhKFy60AOPKCHVlzf9I9qyLPx25Qlbgw9tfbm/L/0e0h/H/r9moNhrEisVVliRW3+e+FC9fR3TL+C7msOLkrV8oMsvRFkitsXKO//mP1QtiQ4Riz9hOx0WLoRtA0m76+DtF8PlYh9HVkx0xGvYcl+vbRPD71ycUOJR83PcNZZZkk/AhK2Dlm2rHB3zw/69NAczHKyYqYzPnLMhX190MtDKyvri8J4PkMYlmyrkoXPKxw20+TQGSYjSoSCfCEcgX37lC3bLDa+b7HjczvVZsbLPjs3dMht/uK9Xf/Yay4zYuRchmammC4XnPw9N3O+7yLX0/tuUlQojBtrMOckN5s/sXj86TC7vspYYYuMVve/Ad0CDr0EVZULB82kBJKfL1x8vocJ4zvvIvV7lG2fWTQ2KW63MGqkMGG8iWnC1CkmV19h8tDqEG+9E0mh5XGg8hN6CNrtMi67PjhbDH1tUI1KAB6PcPXlHr41yhFz+w6bJ9eE2Lqtt/d5vcKck1ycdIIblwtU4d77W3lnozXYZicE2+Co2272vdX+vlunSAy9YPBNip/55+Z0iLn+lQi/+W1Ln2ICBIPKU2vC3Pn7FgIBRQTmn+th1MjMXK1hWN016/gWpaXqBuYPukVxMnmiyeGzTADeftfi0SdC2AO4LW7bbnPPva1YFrjdcNYZ7iRbmiRE/rVNO6CLoPklwXnAiJQYFQenznW6AcGgsvrxEKoDP3fbZzbrXnHun9Onmow+OCO9dGRhcfC09jcd38B5dExm4fMKkyY43vnqGxGCwSjUbOOFF8MdHv3tw8xEmjdo2MIP2n/vEFTRuakxJ3bGjTMw2r7BBx/G1qlpbNKOMen4cRnpoQAd2hkA118fGA0yNXX2xEZRQWcn/evd0XtnO3/f7Qg6fFjGCnqoo2GboJapp6TWntjwejsFbW6OXdBgs/Oam8FTEWHRk6G9ybVlTiqNiZkEpatpND2pNEXE0dAAUNETUmtOlviREwEMJ5FXJqbanCzxopNLS9VtNLY2TSa74Ggo4Mob1jjRsNXM5gwNEUzTnGYYamcFHSKo6jRDRbKCDhFEZZqBMCbVhmRJDCKMN1AKUm1IlsSgUGJAegt6/LEuLlrgoagwdYveDptpcsmFHg7+VtqHBksM0jy770fzcjh8lskxR6VuZDXvNDczZ5iccGzaj+4KDCS9PbS1xQnLDR+eGg8VgeJixzNbWtM+ROgx0PT20N11zj9x5EG9mzu7y4yZOw7ncZnOxRLpI1dsWJGQ05YPUFefCYKmOV/sdKa2xow2cPUQbe++zlyTwjjuscXFzrkNDb0F6zpH+uXO9E/5NBCaUm1Ef3y6xXHDHDeMH9v9+vv6750CTJoYW7aBacIhbamff/u6t2BTJjvlNrcoX3yZ9oK2GiiNqbaiP7ZssztSRNr/ue3s+sqmfo8j6uyjY2tzZx1q4mubV/3go95ZD1Pb6tyy1R5Q8lmKaTUgvT20pUXZ3pYictQRLqRLy6oKr/+fc+M7ZJzBkd+JTlRPDpxxuvPUmIYG5aNN3QWdcIjB8LbmeNMnGZG322BAensowOtvOKINLxamT+vupS+uC7Ov7d533jk5jBs7sG6BacKC+R5KShzB1jwXJtxjR7Pjv+tcIKGwkyKa9ih1BpL+gr69sTOj73vHdffCUAjueyCEZUFODlz+01xmH9Xdk3tSUiJc/tNcZs5wLo6N71u8/mb3Lm5hoXD4LKeut9+NxJXiMmgY1JnHnbhkLvCdVNvSH7YNPp+zLmVEicH2HXa3IUT9HqWuXpk5w8Ttcu6Lsw418eQItjpjyaIig0kTDE452c25Z3s6mtJPt1j87x+dhOuunHNWDmPHGKjCg6tDNDZmgKDKX12iukn7u5zThBdejHDsMS7y8oSzz3Sz/DcWoS77Krz1ToQ9e20W/LOH4mHC6IONtsTpvjPibRvWbYjwxDO9M+0nTTQ5+gjHO9/ZaGXEcAVARTeZx5+0ZAQZsAQiHIbWEMyYZuLzCcOKhPd75OLu3atseM1pnosKnXWhPWluUTa+Z/GHB0K8+U6kV6Z9Qb5w2aUecj1CS6uyclUrLa3J/GYJRGWFy7LsTYaR9vEFADa8GmHGVJMZ002OPtJFQ4Py1J+792QiEXhpfYSX1kcoHiYcNMKgoEBoaVEaGpWdu+xezWs7Pp9QeomHwrZ834cfDbF3XwY0tW2IYW4Sv19zGlqCATIkr8jnFa76d09HKHDDaxH+9GSoz7BdNIw8yOCiBZ2r2F5aF+FPT2XUVtzhpnqvTwDKFzVtyqTM+cJC4crSXEa0DTm+3GXz2BN9rwc9EC6XMzz5wWk5eNoeZPrq6xEefiy6hU+pRz6urfbOcAGIynoVMkbQhgbl13e0cP78HKZOMRlzsMGVpbl8ssXijTctPvjQOuDMyEEjhCO+7WL20S6KhzkXhm3D038O89eXwxkmJoCug/Zm1tAX2pZ3ZwyBoPK7la2ccKyL0091k5cnTJlkMmWSiW0761V27lICAaW5RTFNyMsViocJY0Y799WufPGlzaOPh/hsR2b0aHuiqi9Am6CmJX+JZEa/qBu2DS9viPDWuxbHzXZx7GzH2wwDRo00GDWA7di3bLVY90qE9z6wMtArO1A15QXosjqkYlHw40x/rp+IM802eaLJP4xx4rAjRwp5uc7XrN+j1Nfb7K5Ttm232fyp1eeUWQbyYW21byZ06dmq6FqUjBZU1Wk6u05znTLHzQ9Pc4ILN9U0p8q0ZLO2/ZfOBb82z6TGlixxI53adQhalOd9BvhbSgzKEg9fN9V5n2t/0yGo3y8RhAdTY1OWWFH0D3fdJR3hsm7RIVFWKfT5UMB0YtahJtOnmv1OkbUzZnRn9/28cwa2BUooDOs2hDMhKQxBVnV/34PyRYH3gFmDZlGUmCZU/cKLmeQHlrz9rsW996d9VL6jd9tOr/itiK5SlaWDZ1N0WBa88VaEKZOSp6ja8O57GfD8P9F7ev6pl6AhT+ud7ubcxen8eNWHVmdU0DxZ7LM94V7bUvaKD63wlzSowR2DY1OWWBHhVz2flQv7eaJ1CKuWNM8G/IYTIGT1uf1Kn4LeXlVYp+jvkmtTllgR5I6+njcP/ewK4bblVmDIxsoymIBh2bX7+3C/gtbU+HYCVUkxKUvMiOqNt9yS/9X+Pu930qww17sU5OPEm5UlNnRzKOhb3t8RB9j7TEKKnfaRo28KBnr5ihXSb7TjgNPay6vznxfRbIw31Yjcd2t1wdoDHTagPAXTkjKBuvityhIju23R6wZy4IAEranx7VThQiD9o9VDD0W4dCC7+0IUm6rXVvmeFmS/3eUsyUJqBrr/NkQhKEBBbt4iYH3UNmWJCYXXmurzfh7NOVEJ6vdLRExjAUJ9dKZliRaBOjHlvK6T1wMh6uTNZTflbceSM4BAtOdmGTDNqHF27U3ez6M9MaZs3Noa76uq/AuQAZOGGYclIucvW5q3LpaTY06vXr7U94SIXkK255tIVIXLllV5V8daQFz58suq8leJ8rN4ysjSFV2yvMp3dzwlJGTpdkVl8BoVXZ6o8r6BKPDL2mqfP96CEiZAeWXThYjcTYasM00jLITLa6t8CZl/TqhHlVUGzhThASCDt7QZVFpBFtRWex9JVIEJbyIrKptPROzHFEoSXfZQQqDOEvmn26q8CQ3UJHwR4bKleevUlCPIRpT2j/C6uIxjEi0mJEFQgNqbvJ8X5npPBn4BZOYK2uSgoL9uqvOeeOuNeduSUUHSe6Vli5pOFeReYFSy60pzdiNcVFvlezqZlSR93fby6vznbYMjQP+Y7LrSFpH7bIPDky0mDPK4sWxx8/dF7duBmQc8eAig6CfAVcur858drDoHPRBQWqruguLmK1T0RtJ8A4M4CAK3hAPeqgPlACWalEV2rruucaTtMq4ArgWKUmVHgmkC/R+XLUvb0mAHnZSH6q5c3FDiUXMhwkKU4am2JzakEbijVSK33F5VmNLcq5QL2s5Cf12hu9VTisrFZM499gNEV4Y9rXet8Jc0pNoYSCNBu1JW2ThTxLwA9GLSbbgj1KvysKHGqljnLJNJWgrajt+vrqbm4Om2wTyUucCMFJnyIbAW4ZlCj/dZv1/SdmI/rQXtybU3BA42bZ2rKnMQOQF0Evt7wnHshEG2oLpe0bUum7X9rSVJNzJK0J6Ulqo7b1jjREPM6Yah09SWqRiME5uDVCjBGRa56RweNQFhoEmUOjX4OzY7VHQTKptttT5u3luwNdrErHTi/wG5jWw4PKXrYgAAAABJRU5ErkJggg==)}.realTime .public_recognition_speech .endToEndIdentification_end_recorder_img[data-v-1bbbdb64]{width:116px;height:116px;background:#2932E1;border-radius:50%;display:flex;justify-content:center;align-items:center;margin-left:98px;margin-bottom:20px;cursor:pointer}.realTime .public_recognition_speech .endToEndIdentification_end_recorder_img .endToEndIdentification_end_recorder_img_back[data-v-1bbbdb64]{width:50px;height:50px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAYAAAAeP4ixAAAABmJLR0QA/wD/AP+gvaeTAAABU0lEQVRoge3ZPUoEQRCG4bcWfxDNDIzUW4jiegUTMfUICuaeQIz2BOZi4gEEg0XY0NBIcyMR/8DPYCdapnF2u53ugXqSnqAo6puZbgYGnHOuKUk9SXuSVnLPEkXSicauc83QS9RnvVo3EvWbWqog2XmQGJK2JJ1LWkvVcy5VoyldAH3gAzhL0TDXq7U0sUbzPVKa2iCSViUNJV22PdCsQk9kG9gBjiQttzjPzEJBeg1qWiNpX9KxpOAsuY7fxiQtAlfAPPAA3NbVZb/bDSwwDgEQ/LruQpBGPEhpPEhpPEhpPEhpPEiLvoDv6vo1VFT8R6OZfUo6ADaBu1BdKMhP4DoLM7v5qyYU5B4YAo9m9pZ0qn9SG8TMXoDdlmeJ0oXN3ogHifQ+sUbLdfyeAofAIFXDLEHMbASMUvb0PVKaVEGeq/UpUb88qr+6/c7/1XXOudb8AikhXZ/+ExptAAAAAElFTkSuQmCC);background-repeat:no-repeat;background-position:center;background-size:50px 50px}.realTime .public_recognition_speech .endToEndIdentification_end_recorder_img .endToEndIdentification_end_recorder_img_back[data-v-1bbbdb64]:hover{opacity:.9}.realTime .public_recognition_speech .endToEndIdentification_prompt[data-v-1bbbdb64]{height:22px;font-family:PingFangSC-Medium;font-size:16px;color:#000;font-weight:500;margin-left:124px;margin-bottom:10px}.realTime .public_recognition_speech .speech_text_prompt[data-v-1bbbdb64]{height:20px;font-family:PingFangSC-Regular;font-size:14px;color:#999;font-weight:400;margin-left:105px}.realTime .public_recognition_point_to[data-v-1bbbdb64]{width:47px;height:67px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFIAAABwCAYAAACNSCemAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAUqADAAQAAAABAAAAcAAAAAAkHfOMAAAU9UlEQVR4Ae2dW4xkx1nH+5zTvbMXe1nbO9Mzu2vSD3YUZRMUgmISiIQhJBIPIeIh4iIHlJjbQ0SkiBekXAwCxAMWiiwURAARhUSJBIag+AGwklUU2RFyjLETOSEImyyemZ7ddfbqvcx0N7//v6pO95mM7ZnZ6Znu6and6TqnLudU/c+/vu+rr+p013q9Xl7bCzeNQH7q1KkcMBt7gN4klgBYPPfcc/uXlpZu+Vavt+8mLzex1SvD+tj58wfPnTt3eI+dG+eDhnbWarV6qnrx4sXs+vXr+2Dn9OnTpw9s/HKTWyPT0Kb7jTNnztQbjUb92rVrjSKEBqBenZubezHLsu7kQrS+npuRzz//fFn60KFDPYDLALQGnofPnz//w/Pz8wfLAnsHayJgRgJkAwDrV65cqe/fv79RJ9y4caMOkPU8z+udTkcsfXF6eroNxhYDa15tghMrygbQMmHBcO8BaE3MTNiQNIMiet0eOxMi1bgCZLfbNdsEoIa2AB0sDksPkPWGs2fPHierBHmwzKQeV4A8cuRITTJSYIiRCoA3CJgwlmZvoZzetKfZDZE/JCPrHBUA00gyUkp7eXlZclFysgF2heSk5OXKyorkZgP2Uq+ed7s3/vv48eOn+5eczCNp7RrKxqwTIxVMOzGPPw1vQHV+iBslUnneLShycmFh4R7NjsqMCTzI77333lIOyiAXcClMcaChvW9fmjk2MI1WMlhZQ7FbhsJYmUsziIJ3oIxOTCCG7nJFRh4+fNg2JMBkg1obMF24EckoEBU6najVi6LWY7gz/N/abrffBv4JeZebhA8b5KmjGN8+FCOT1oaNZmzEstTkYiVk7AFeBqI1TY86/FG1xVB/N4roeLruJMQa2mU/B2XkICNVII1usZVTwK1blpbsVCGBSmC4H6xPTb0Ldv7kE0880Reqytylwcpm0GmhfqK9a4mRq/sttpIGYCtmp5ipIEamAEEFqv6fPHHixHtxgsymvN0aV6eIOC2mrl6V86KBXIRs9QaaujJVRPMUeTSFGNZ1CFrHyY6J1K13Oc4wi0jDPMowk3oNvB11biLt/uTMzMzXiQXzrgsVZSPjh45mYqRCb2qql2SkE0hCZVtri4cqaxmJdIyyUkObNJUmjU9uAIM99fzxxcUz70N+Tit3twUrm8GhraHL7MYgMb7L/sqGhKg+t1xkSMdh7jSZQbFwDyUe6oMkhjvgdoklDjpNsL9fmv2BBx6oPMTyRmN64KFN29f0R8Zh3dBsZmUlxzMUPEEa0prdaKZD3ULHyxrS3a5mQXXNepSmPOypBjcJwz4Mc9aHEAFZ7wWu//Cdd9754phiV2m2Gfly/kgN3cS6pLVdW3ZkHdFIkC0JsEFjw0oNZ9gJW/MeMjMLQxuGiplmJwVIRHa2YPaHFs6evafSojE9MSPX448U+wDM824YJzb6WAwEz8hEMa3PyqSEmKrDyF5iLNYnLM20ckndGnP2LPvOVKPxhTvuuOPimOIobvQD4EhZWPattiOVjoz0zMcyUoqHYCVDXog7VslmKKyUrOSayE4EpGWkrlLL8nDXnu6uG+a1/OSNGysfwdf5ZpcYw48KkDDNCkMArWVHItPcxWQ76iQpGccMbdmTqm870qX1kTOFtPb29VE8TtNnCCTktYN5Xvzm4uLir4+j87gC5Hr8kanrxJafZqKop4DAtMkDCwWmdh4Ebd2NBrtnRaFsDQlKvk5UJpJU6xhvy+v1PwLQN8SCYxGpL9a8m/FHdruWk2hojHFr7DqGd9e+SnAMhnpRkwxVRiwDbZGPmeQjMSg5n3PkpvAOspNj0rNHL1++8Lm77777+qij6SniZv2RwQkUp4ga1pGY0tpipGCBad5cBNCIw0IWZWRfV3lW344Z+l3ErmuQEI57P3fwllseZIp598gDuRX+yNTJoHAwgTCHZP6Eod3VojgKBqVDgkALMlIWESdBdirRACMMXCBHckh4FFl+rNPt/ily830wOz6qdMfRiU2G1Jyb8UdG+xERhwfIjJQVKZmIToaVZmTVjgy3Jc15AhqpGdR4Hhjp+i7GUfEryM2H5ue//5rU3lGKbZCnBm2FP9LXMiPhD0GD1LSzGRTZaPZJISkfoMVKmUecZYKdKmYk9RniPAYnIzIzhvj1T72wuPiLZMlyGpmQD8kfKbsoyMlVXR20IaXahZtg479lpBDVqWUk+QDKdWSLhoCMkKL64OLi0p+jIOdS+k7HVjatgU1UatAW+iN/wCCXXEReBqKKrPyhgIynQVMDjCXD3Hk905XjwEDJUJCEpm9eXul+nuH+HlXZ6SDzpyiniMPzR9pvKRMJFso/6Rg4mS5qelnIFCLdplADr1s4zmoNELN5xEhuMO9iStnjD1OJmEeCy0mmVPdrnc6hj584cfjcTgFKX/phiP7IeBM0uY+Io9Fe3h3GheFMCqRzECMlRqWGrN0lMEXIEAdmOvuni+KlR+bb7XeFitv/aWUzOLRhyBD9kbIlCUKTMR7MIGloEAMogyyoOAh2pA6jjExAu7IuAq8j4Eha4M5uA+BPzi8sPIjsvFUltjPkKBs13R1M5o/AnMI7roYg752n0ZgaFufayso0x5YBLvMnhuBW4wTSYTsGDoKSjsM1AgBR0fjE99CRqFcyUlqdnKiA4v0jevKZpDv2GasrvHd5pfNljPi3p+ztiM3IHfJHilMGGgPI5k9uewigwNLmDw9KQzuaRAE3QxugEbJ6TIphZ+Alh1Q7vtLpPjy/uPjH27UDJDHSLZMdicbuP2h1dCBouUFsrWnlkLUbZUFIl7H3B1r2px6c8D9MDTVuSxMmWTiMR5DTEOcfxE2ST9Z7aIMxFT6cKz+EGDPkOZedCXAhTXGfp4L0t6f2H/was6Ifi3WHFoVxFy+PAnAHBNY2+yMlL7l3MLJBzTal6CagmGCasW6mS7kcbFaKgC6HuXKV6ISM7oDra7O8OLWw0P4Y5eSgGUqoAAl73Aqe8E74I4VYnK5o3g27ojj00xWplQZ6lIK9FE35YqIZrWQTlSpSRu6OgJYX/iOLi+3H2Nv5umEgWQFyRPyRYBUZSY+BbbDfnCT0SJYyUgBUy0joRwE9DKW7otjqMkrOsregiJ5kVvQ7/fSQe7OfAjJrtVpuUJKR3MR7f3RxtLeXGHSspQaFtNSghS/sZTl041JD8JAH7w+EwG+h4Re7zhWk2xmmfnyodJhvrQ6bwtCOjKRCYB/FjQoJwfwJEKm+WswFfClGN2cKlpHOihirhU6PHzT/ALz9xGJ76VF2z93p3C348BRxdP2R4BcBUV8TOwFD7IoPRDmcWhnBTDNQjygSUYIgHqcU1aDcz8DOZ1hj/1Wd32zIox3p62x2f2RqxDD8kRE8UDOvLQEtI6GjDHIpI4HnoS1AzdVgXKR2BXANHvhbOTlGJfxQp9v79GK7/fClS5duageIR0a6YTLI9TgHtfZ69kdq0NoEGo4/EgpGH2VQJm5yYJjBS4yEnPFccewd3YlDP3IyMTSeQvpfuHzlyjdxgPx8wmKjsQ3yVGnU/ZGxnWFgM2Q9vJUIUGYkcJmdJBHLSHeIjGSIR1amfMoEhqpYNoOS/yKK6K83M8XU0NZVHLZwf6ToqUbGZ57ugJwLLLF8s/DyOR9SOBquGsKc6lgX0NDmMykT+kumR7PyhU2Qi8KIgvqvsr6z7MjVQRkCWbELuXAEmDTu+wFmRc8A5k+trvtK51Y2rVbLDZWMVOHx8EfSeTBPaNIBt10Y0QWdlnYkgKl//kggKkHHCilNx67c670GML+y0G4/SN66XjIQI3tJa3fZQ66g3Wi+KC3QZnyf8KEpIjaQT6MJFOViuXzo9kFGQthp4b5yBhNDq8U2rdNoXZu+inta1emPQhVWfQIx828fqrIVj0/50H8u4HoBPCGisvqQtoHIZl+4gBKVW/nsg6iq1ClB5VTc/jBm0jdg56vuAElN9i3G1x8Z4ekP14AoQs9PS1qdHgooPRfFCuTpIx47wWnOj2U4fj2a/euw86Mc910JrtX/sLJpDQxtCo+pPzJ0iuZzIOkbAEpd1VmZRp7KqajSfaz8eKxyZVkREx8i7PyDdnvpMdj5Wor9QMg1tEn1XZP5Q8Ux9keqLyZg7KxBNboCzIE4gJVYSpWYp/RUTrGeR6ysycE9yM6nFhaWPkieMYs3qZmRu80fSeckI2MfdehOC+CQJnQIKpOYlwBUNR+Tn9JUOpRzvQMIiofaS0v/OviCVmKkrltLc22f8KEWpGPFY+ePHGw83TG71KXYrQSUupk6GjEuy6RLBNCNifOo8rOaYrJOdJ/KVPxzq/2RLCnERxguJ6eFpoHS2HK50RDvj9TyqtJ9zlMnsUYhLRFS0RtXWNkqm+j9kW47qs77WdDH4XJhuCR/pLSuPeT2R8pu5HL80W0uxvCVqiSZTupcjXQukUGDRu407VJDwkdsRgBGdajpqjrkWkQCmjo6cnDlmJfSVZ9whAqfWWif+YmK1t7F/khQAcfQeQE/AJQBcZrTQUdgRaAMpCD1+Rr1eUZfOHRg6mMVIHe7PzKxyegkcAKTDV4AMOSm434dMbfPUo7ngfs9c7Ozv4SSPisgs1ar5ceSZCTo72Z/ZGCcOg4wwiaxT3E6d1rErUynjo7zLP+bfY3662dnZ/+ZJAdPEdPMZnCuzU1e9X3t4e+PZN4Th5Nau5X+SBAJCOgT9AJwAbkAcJC6To8lYd1zRZ69c3Z25v7bb7/9Qv8CtC3akU6bNH+ksDTb6L3BI9a5/+Kx0lGmLGNmD9VqM29sNpuPGqxVHxWtLYOcTfjUDf7IpLXlj5Rm1jQbheyX3nUdKWqzUhsEWCvgpUWWDOL+SL9lo9m0PIladvASaxhLPFqtS/n5a94t/V/Yw6OukYEWlyYmBDUq3Rz8kaiMOOBCfZ26kObyQoYqLkDsAydZYdPiWFV5OiNKxEzM0y3L4OPs21z//uZs87EyfY0DG+QpfZL9kYMABlCzFZ7Mn8w2p98EC18RROGnoa3YYVBGDnrIlZne/DJLTJtXfF97bP2REFij76kir91zbHb29+jvul4EsLJptYLW3vNH1q6j0D7anJl+C69E/4fZtc4PK5uktSfZHwlej/MSy4+ikf8QFrInZ2NBdmQZJtQf+RKK7MOzzZm3Hzt69NkSjA0eWNkMDm0E7cT4I8Hqy0U29cbZ2ek/g4XRSb9BBGNxD22OrfMnxR9JZy8UefFbc7PNdzSbR/5nc9BVa5mRk+SPxMr8UqNenGw2p/+yCsXNnSVG+ipprp0uqWGejhWPuT/yLF91cB/K5N1Hjx59YbBfW3FcUTbYTx7iAnC1HambDfF97aHtjzRIWfYPrN3+CCz87FaAttY1KlPE1f5IbUFBCJf1YKSnitpDrimjgsqIuIrx3fYKpoqupB1qlAnLbszftPmsq2kgWwAQ62EfWSI8Cbn8hThkNdPzi59UkPjn9mqB9qDzrhh3YnDqhvbohkzaqGSqqW6aNlOq221n9eJ3j01PP6K2DjNUGLmb/JHA/fn9U/veOrcNIOoBCcgsmT9JRopi+gYBhXHbH8k6xv8hoX4ZWfih1a4ud2hIH54ippnN4Fyb4TJe/khtuuh1/67bWXkn07uvDgmvl71sRWuPsT/yf5GH9/Gd6Q8cO3bspZft7RAzKspm3PyR2Bh4R/PPNGen/0IDaIg4veqlbZCnUmPmj/xuvV78GrLwEzsNovDT0FbsMCgjV9uRo+KPxLRawUj6q2Zz5v0Y1t9Obd/p2Momae1R90diNT6LTvmNubmZv4WFG3Z1DRNs2bYj/742EwW9Y/e5uZmZfwJAmekjFyrKRv5ILX1pxy7TwfD9kcwa9N5MDOH7IznPGetsAo1bVnoshGnjKNMgp2l6oe+P9MyGaYjMVc1i9L62Zh96X5sBymTHkxRdXO/RaKeJpjFxRqOYDanf5McjPsnvRCyo2KgGK5vBoS1jfHjfHxlf1hSukExTRc0J9FInESATjLei3jVmmJ9iJ8PHRx1ENbuOslHTxYNaMn8Ept7X1tzaLCNv9fvacnAoaI6teTffHxkuwrXEYKXpfe2eN88DC0DhieaA2wXGaYXVgIq/ur+S2cWgafN/Lq+sfHruxIkd+2oFtWcjoX4q/MKSfqPB9Yh7/BaDX+pk15mGnoAuVxFdSIvZQqnTwQ5mjOpbI7VuzXdKxvVtneo7lDKcFr6Avz9SD0yYAaDWtYUzT4GYslrczrPLgPj3LH8+7vuM0UceGekmp7l2an8CMZ0P0x+JKH7y8sWLvz+OIAqfirLRcFWiAJQdmXZaKE1hSPsjLyEk/hEAnw53Gc/PCpDb7o/Mat/gYX2J79m9Op7w9VtdAVIzGykPZKR/z0bKRu/ZkJZqmLHxxPIT8LNcm4KkgtkMpDeP+R5EkZrRGkSfTD/eA0OZSEDig816F5aXbzwCgN9NFx73WH0VmJv6PZvNfH9knu97ambm9q+g8MMvaYw7grH92+aP5IGd58vhP9ts3vEvuw1EYSlGllPEq1evok8aBWZPnWHtX1hChjUOHDjgX1bqdHJGbUc/t1IHFH0LtL+qi2vEb33WNz7XWbZZVqxvgA5fx1UUz+Dy188LjNT8eCsHQ0VGJoOcDvt97aS1N7s/Eivg+8jQrzJHXtrKRo/itYbij0Q1dQHxSaZ2D7PPeteDqAdrZcOaTZ0ZTfmTpxraAKHhrCFb/nCa0hjOTpOiYUewjv2TArgs/IXtlLnAsP/322677fwoMmdYbdoyfyQGD6O4+zQLT/82aSAmRpbK5somvz+SCdJFTJqnYWf4/ZZhPfYRvm5F2WzUH8lcUix8Fo/1aUC0c2OE+zrUpm3aH4mv7EU2ETzO8uf3Jh1EPSHbkcTr/j0b7Ev0Sf69SdHG66VxgRtNE2lWDvblzKlzHLr8pFenYL6tc+UZbMxrTaAvAuB/3XrrrZfWe4NJKVeZ2bBWs+bva4uFzG4Wt3Mvzbg9gIqyYciu5Y+8hBw8gxwsV8DGrZPb0d4KkGhga15A00piB7v83E7tpdmOzm/lPSpAJn8kM5qX8BVeAk+tR+2FdSDgKSLl7I+8cOFCjoPi2smTJ3eVr3AdONx0EWltLYoWuNA6d91111WmeHuycDOwyh/Jn9fmN1N/r05EABAH12H2cNlDYGcR+H83Z8q9vSzjfQAAAABJRU5ErkJggg==) no-repeat;background-position:center;background-size:47px 67px;margin-top:91px;margin-right:67px}.realTime .public_recognition_result[data-v-1bbbdb64]{width:680px;height:230px;background:#FAFAFA;padding:40px 50px 0}.realTime .public_recognition_result div[data-v-1bbbdb64]:nth-of-type(1){height:26px;font-family:PingFangSC-Medium;font-size:16px;color:#666;line-height:26px;font-weight:500;margin-bottom:20px}.realTime .public_recognition_result div[data-v-1bbbdb64]:nth-of-type(2){height:26px;font-family:PingFangSC-Medium;font-size:16px;color:#666;line-height:26px;font-weight:500}.endToEndIdentification[data-v-0190c31b]{width:1106px;height:270px;padding-top:40px;box-sizing:border-box;display:flex}.endToEndIdentification .public_recognition_speech[data-v-0190c31b]{width:295px;height:230px;padding-top:32px;box-sizing:border-box}.endToEndIdentification .public_recognition_speech .endToEndIdentification_start_recorder_img[data-v-0190c31b]{width:116px;height:116px;background:#2932E1;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHQAAAB0CAYAAABUmhYnAAAABmJLR0QA/wD/AP+gvaeTAAAQMElEQVR4nO2de3xU5ZnHv8+ZyYUkJIGEgNwSLpKEKJgMwmqpBlArXW3tVv1Q78qnunbFC9XWlbqrtVpFilY+2lqrYFFWrVK3VUHFLO3qtkguiIaEi5gJF5UQLrmRkJnz7B+TBEJIyFzPGZjvXzNz3ssv+c175r0873uEKKagoDJeEpLHeSEPlQkqmivIKJAU0GSBZIVBQHJHlmaB/QrNIM2gTSC1qG5BdIsDqrWt+fPKyoLDVv5dwSBWC/CHCZN3jHAY5kwVnSHIdGAM4AxxNR5Et6vykYGUeFVKNleM3h3iOsKGrQ11uUrjWs2s2aahs1FmALkWSdmMaomKrEqWutVlZVPaLdJxQmxp6MTJ7gJ1cq0qNwpkWa3nGPYL/FGE5ZVl2R9aLeZYbGPoWNfnaQnqvAW4Aci3WE5/qQKWtYnn2e1l4w5aLQZsYOgE1+5Mp7bfpnA7vg5MNNKoylIx9NGqspwvrRRimaFnTNs+1Otx3IUyD0iySkeIaVbleY/X8fi2jSN3WiEg4oa6XKVxLQz5EcrDHBlOnGwcEljYkmo+WrN2TGskK46oobmumhmGytNEz29ksGwzMOZVlo9aHakKI2LohMk7Rjgc5mLgykjUZ0NeNVXmR2I8G3ZD84rclwgsAzLCXZfNOaiqc6srct4IZyVhM7S4WJ17Gmp/pnA/YISrnihDVVniONx0T7imF8Ni6Bln7xrl9XpeAc4NR/nRjsJ608GcLeuzt4e67JAbml9Yex7onxAGh7rskwpln4hctql89P+GstiQ3goLCmu/g+jqmJn9QBis6PsFhe7LQ1lsyAzNc7mvN0XfAAaEqsxTgARTeCWvyH1zqAoMiaH5Lvcdoiwl9EtZpwIOgd9OLHI/EIrCgjY0v6h2AcqT2GBeOIoRhf+cWFR7X9AFBZM5v9B9HcKyYMuJ0YUienNVWc7vAy0gYCPyityXCqwkdpsNNV5Fr6wuz1kZSOaADM0rrDlHRNZw8qyS2I1DgnwrkCGN34bmF27PRhwVRO/aZbRQb4inqLJsXK0/mfzqFLlcpXGIYwUxMyNBhqmO1woKKuP9yeSXoYfIfJTYdF4EkWlmQspDfuXob8L8KTXfxpS3/MkTIySooXJZZcXoP/cncb/M6YiH3Rib0rOMvYhO6k+8Ur9uuQ6HuThmpqVkimks6k/CE7bQ/CnuCzF5L3hNMYJFlAs2VWR/0Geavi4WFFTGmwkpG7EuYj1Gd7a2Nxw+c9u209t6S9DnLdebmHIvMTPtxOnOtPj5fSXotYV2BHZtJbYcZjdaEB3fWwep1xbqcJh3EzPTjiSpGr220uO20LypOzPE460BUsKlKkZQNHslLmdL2fC9x144fgv1eOYTM9POJDvN9nnHu9CjhXbsAqsB0sOtKlykpxlcODOJ4vMGkD3KSUaGg9ZWZU+dl49LW3m/pIWNn0XtJu1ODiaYZs6GDWMOHP1hD0Pzi9w/AR6LmKwQEh8v3HjtQOZen0pKct9zJv+3rpWFi/ezZZtt9+6eEBG9Z1NZTrcJh56GutyfoRRETlZoGDzYwVOLMimanND12c5dHso3tFG/z0tiosHYMU5cZyXgdPr+7NZW5T8e2sdbq5utkh0sm6rKs7t51c3Qgik7ppqmuS6ymoInOclgxbKhnD4uDoBPPm3jV08doLS85/g7LdVg7vWpXHfVQOLjBVX48b/vZfX7LZGWHRpEXVVlOeWdb7vdl9TUayOvKHgefmBwl5krXmvkmrlfH9dMgIMNJouXHOCmf93D/gMmIvDIgxmMGxsXScmhQ6WbZ12GulylcYrOibyi4Jg2JZGLZvkiYd55t4WHF+7H6z1xvvJP2pj34zra25XEBOGnd0Xnmr3CVS5Xade3scvQVjNrNpBpiaoguGVuKuBreQ89ug/V/uct39DGy682ATD93ERyJ/gVHGALBLJayLyo832Xoaahs62RFDjpaQZnuxIB+OPKJg42mH6X8fyLDXg7sl18QXTGvIkaF3e+Puo3VGdaISYYJp+ZgMPhe13y10MBlVG/z8vGT9s6you+FgqgR3lnAOQW1g5HZYJ1kgIja4ij6/UXNYGPJ2tqPQCMGB61IcYTcwtrh0OHoQY6y1o9gZGWduQG09jk/+22k4MHfXkHDozefclimMXQYaiKzrBUTYCEKlpN8aMnZVPElBnQYagI37BWToxgEWE6gFFQUBmPylirBcUIDoXxLldpnEF8ynhiG45OBpxNMnisYYrGYoZOEpxeZ66hIjFDTxJMMXMNQ2NRfScNYuQaqoywWkeMEKGabSAy0GodMUJGhoGorQ2dc3kKTy7MZGiW48SJw8SFM5N4evEQJoy3+5qpZBiovaP77r5jEBfNSuKyS607Wvf2W9OYcf4ArrrS1t99QAcaKLZW2dTsm2cdadHEuWEcmbTv1GJjEgzE3i20dodvJWTMmJ63O4/nyOv4+MBnduM7gsba2nrO6Q4b6iQx0Xe9dqenx3WbkWD75YVN1b742Ym58T1M+/LrI//go5fS/GV4Rwusq+sZuzLpjCNrpFXV9o/lNVCarBbRF+tKfUe2JyZKjwXoL9xHDJ3aEbngL06nUNgR+vn5Fz3XVM+Z5iu3sdFkU5XtDW0zEBqtVtEXpWVtXUFf50ztbtqWrYfZuctn6r98N7BO06ziAaR3rKv+z996Rj101rm+rK0rVMXGtBmIvVtoY5PJJ5/5QkQumZ2McXTQjMLKP/uCpM+alMAlF/tnalKSMH+eb8fHnjovf/uwu6FFZyUwcoTvdvzh3wMLcYkwDQYqtm6hAG+86fvOjRzh5Jvndt/h+OLLDXy9x9eEH/zZYM4s6F9cUFyc8PjDmYwa6TNsyW8P0npMp2jOFb7+Ymur8s57URGIXW+gantDV73b0hXRd/Wc7qOsQ4eUe++vx+NRBgwQXvzdUL73ne4t+VhGjXSy9NksZpzn+3K890ELK/+7+40qa4iDi2b6ogDfXt1MQwARhRFHqXdkDb9rJnCW1Vr6wuOFQekGhZMTGD3KyYaNbew4agixa7eHHbu8FH9zAAkJwqziJGYVDyApycA0FREYOtTJ2UWJ/PDGVB64b3DX2HLd+lbm37uX9mP6Qwt+OpiC/HhU4f6H9rG3vh/R21ZjyFrJc7nvFeWXVms5EelpBqvfHE5qqsH2mnauuOYrDh3qfossOiuBhb/IYPhpJ56E8HrhpVcaWfTrnpH2U12JLH02CxF4e3UL9yzosa/WpuhPHEOGz88EbL8ForVNaTmknDd9AIPSHQzLcvLB2u4dlS+/8vLK600cOGgyZIiTzIyeY9PGRpN317Rwz4J63lrV3CPSPmOwg+d/k0VyskFTs8ntd9fR1BwdQWSKLJEzimryvcgmq8X0BxF45skhnD/d99v33LIGnlhyoNf0pw1zkpPtZEimg8Ymk7o6L9Vb2vF4jm/QoHSDF36T1bUl4p4Fe3l7dVR0hgBQryNXOs4iaiZK4orS0wxeemEoY3N8U4Gvvt7Eo7/aT9vh4FrR2Jw4nlyYyfiOXWwvvtzIY4v3B603grQnSV2yAOS7ajZHU+R81hAHf3huKKNH+b6D1ZsP88ii/b1uIeyL+HhhzuUp3H5rOklJvqnF11Y28eAj/m18shqB6k3l2fkCkFdU84IgN1otyh/S0wwW/TKTc6cdmT36x8etvPlWMyVrD51wZSQn28m3L0rm+5clc9ow3xfDa8ITSw6wdHlDVJkJgPL7qorsH/paaKH7WoQ/WK3JXxwG/ODKgdx2SxqpqUcGnl4Tamraqd7SzoEDXhqaTOKcQupAg9OGOcjPi+/RYaqsOszDC/ezYaP/rdwOqOjV1WU5KwR8m5UM0V1WiwqUQekGV34/hSu+l9KvIcvRfFzWyorXmlhT0oIZBXMHvaCIjqgqy/myaz0qv8hdTZSf62cYkJ8bz1RXIgUT4xkx3MHYnLiuTUi7dnvYucuDe4eHik/a+Pu6VvYcZ8ksCuk6POPI11m1hCiP0TVN362z8qhlrptvSuXOf/NNwF94adifx2oJgpR0vu764VGRVdbIiREsaphd3nUZOix19CqUr62RFCNQFPYk6d73O993Gbp2rXjU4DVrZMUIFEFfLiub0rW00K1L6PDKctPQ4x4KaCdmFQ/wrYv2Iy5sYu6R9dEHFvTv2PzWVuXlVxrZscv2QWGIwfJu749NkF/k/hQ4I2KK/MTpFMo/Gtl1vFu4eOfdFu6+z/arLD2OhjveoG05Nj680eNR/vSXZv5pamBBYf3B9MK7a+w/KS+iS3t8duwH46dtTY1rj3cTxcerniIc93jVHoEa29ad3qDoM5HTFSMQRPn1sWZCbydaO52Lwd7RgKc4zR4jbsnxLhzX0OqPR9aDPhdeTTGC4JnjnTcPfTwVwlRjERAVwainGM2Gw1zc28VeDd1cMXo3orYPHjvVUOEXlevHfNXb9T43KxmtzY8JVIdeVoyAEN3iOXj4ib6S9GloZWXBYVVsP3N0qmAot/b13DPox+Mmqyqy10BsjtdqRGVFZXlOyYnS9Wt/qKlyF1AftKoYgbJXDfPu/iTsl6GbK0bvxtDr4CQ4tjL6UENlbn+e7gt+PFS9qjTnHVHptbscI0wIC/v7/G3ww1CArLRR94rykf+qYgSGrkui7n5/cvi9BpVfuD0bHOWxZ3KHnXqHw1n42foRO/zJ5PehGVUVY92I+c9A1D5fKgo4ZAiX+WsmBGAoQFX5mH8o/ACw/5J+9OFV9JrKsuwPA8kc8LE21eXZf0G5iVjPN5QoqrdUl+esDLSAoM4pqqrIXi7Iz4IpI8YRVFhQVZHzfDBlhCQwJ9/lvgPliVCVdwqiAj/fVJ79QLAFhcyA/EL3dQjPEyX7TG2EV+HW6vLskKw/h7RF5RW5LxV4FRhwwsQxANpU9erqipw3QlVgyG+RBS73dFN5E8gIddknGfWmwXc3l2aHdKIm5Ic3VpZlf+hwOAtjM0p9oHxsevTsUJsJYTAU4LP1I3ZkpY0uFngQiN5dl6FHVXkqyaibvnljzhfhqCDsvdL8QvcFwEsIQ8Ndl83Zi6HXV5XmvBPOSsJ+Xm5VRfYaDC1E9L/CXZddEZUViE4Kt5kQ4XFjXuGO88Uwn0YpOHHqk4KtKnJbddno9yJVYURPtK6uGPXXJOoKEe7k5A7kbhF4sL3h8JmRNBMsnNmZNGlblscZ9yOFO4E0q3SEmCZVXlDksc0Voy3Z/2/5VF3e1J0Zhsc7T5V5UbzG2ojwjDocj/t2HViH5YZ2Mn7a1tR4T9zNitwQNb+xQqWgyw4723+3bd3pDVbLARsZejQTJ7sL1Mm1mNxgu+GOsk+E10VYHuiaZTixpaGdFBer8+tG97dQYzboTCDfIimbBClRw1w1NCX7vbVrxbYL+7Y29FjyXTWniTJTxZghqt9QGAeE+oFk7QKfq8hHilniMLSkr70kdiOqDD0Wl6s0rkkGj3V4nHlqaC4iEzB1NAZDUDKAFHyGdz49qgloB5oQ6jGpw5Ba1NwsprHF6/RUp+i+7UefKhJt/D+IYnHePFNWNgAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:116px 116px;margin-left:98px;cursor:pointer;margin-bottom:20px}.endToEndIdentification .public_recognition_speech .endToEndIdentification_start_recorder_img[data-v-0190c31b]:hover{background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHQAAAB0CAYAAABUmhYnAAAABmJLR0QA/wD/AP+gvaeTAAAP8UlEQVR4nO2de3hU5Z3HP79zZjLJTC6EIFRYQO4gaOuNeqsVdLXU6urzaJddvFW3cb2gJnENSPvstKsmRAm2VNfauvIs1npF65WqxaqAt/WGV1BAUMFaEiDJTJKZOee3f5zcE0LmlpmJ83mePJPJnPO+v8n3/N7zvr/3955XyGD8fs3ZG2yaJIY5XbCniso0hLEK+YCv7ae47RUgAOxpew0INCnsAN2sGJvVtj4e5s3f4vdLKDXfKH4k1QZEQ9mS4BjDtudiyxxb9ERBJgCuBFcTAd0qKusVXetSWVtT49uZ4DqSRloLWlqq7vyS4DxR5oHMUXRaKuwQZJOKrlWbZwJ7vGvuukvCqbBjIKSloGWVjTNFzAtAfwKMTLU9Pdij8JChxqplS/PWpdqYnqSNoJWV9UURI+cyVbkYmJFqewbIRyK60mWHfrt06fB9qTYG0kDQioqGEeo2rwKuxunAZCDSCPY9tiHVt93s25VSS1JV8TU3NI0ybaMMdCHgTZUdCSYAerfLNm6pqfF+kQoDBl3Q0lJ1FxQ3X6GiN9E5nBhqNAM1hbnear9fWgaz4kEVtPyGxjnYxu1kzj0yXj5VdOHy6vw1g1XhoAjaNn6sVZUfD0Z96Yc+4LKlfDDGs0kXtHxx4EeirFQoSXZdac4+kEtrq72PJLOSpAnq96uroSX4M+DngJGsejIMBV1RmOv7j2SFF5MiaPmS4FgsvR84PhnlDwHeUMz5y6tztya64IQLWlHZfJIa9qMowxNd9pBCqFeMs5dX5b2cyGIT2hSWLw6cpWKvyYo5AJThovZzFYuD5yay2IQJWra46SKUR4C8RJX5DcCjqvdXLAqUJqrAhAhaURm8RlTuIfFTWd8ETIU7yxcF/IkoLG5ByxcFlqjobaRBXDiDEeA/KyoDNySioJgpr2y6EJGV8ZaTpQMVKF1W7ft9rAXELERZZeBMEVaTbWYTjSUiP15W5V0dy8kxCXrt4uBxhurzDJ1ZknSjWcU4PZYhTdSCVixpHq+W/TYZO3eZGQjURdQ48ldL83ZEc15UnaLSUnWrZd9HVsyko1BiiP2g36850ZwXlaD5xYFqsuG8QUPguw3Nzf8V5TkDo3xx4IcoT0ZzTpaEoAhn11b5Hh/IwQMSp2xJcIzYujEb0ksZu22DwweSrzSgJtew7dqsmCllhKFy60AOPKCHVlzf9I9qyLPx25Qlbgw9tfbm/L/0e0h/H/r9moNhrEisVVliRW3+e+FC9fR3TL+C7msOLkrV8oMsvRFkitsXKO//mP1QtiQ4Riz9hOx0WLoRtA0m76+DtF8PlYh9HVkx0xGvYcl+vbRPD71ycUOJR83PcNZZZkk/AhK2Dlm2rHB3zw/69NAczHKyYqYzPnLMhX190MtDKyvri8J4PkMYlmyrkoXPKxw20+TQGSYjSoSCfCEcgX37lC3bLDa+b7HjczvVZsbLPjs3dMht/uK9Xf/Yay4zYuRchmammC4XnPw9N3O+7yLX0/tuUlQojBtrMOckN5s/sXj86TC7vspYYYuMVve/Ad0CDr0EVZULB82kBJKfL1x8vocJ4zvvIvV7lG2fWTQ2KW63MGqkMGG8iWnC1CkmV19h8tDqEG+9E0mh5XGg8hN6CNrtMi67PjhbDH1tUI1KAB6PcPXlHr41yhFz+w6bJ9eE2Lqtt/d5vcKck1ycdIIblwtU4d77W3lnozXYZicE2+Co2272vdX+vlunSAy9YPBNip/55+Z0iLn+lQi/+W1Ln2ICBIPKU2vC3Pn7FgIBRQTmn+th1MjMXK1hWN016/gWpaXqBuYPukVxMnmiyeGzTADeftfi0SdC2AO4LW7bbnPPva1YFrjdcNYZ7iRbmiRE/rVNO6CLoPklwXnAiJQYFQenznW6AcGgsvrxEKoDP3fbZzbrXnHun9Onmow+OCO9dGRhcfC09jcd38B5dExm4fMKkyY43vnqGxGCwSjUbOOFF8MdHv3tw8xEmjdo2MIP2n/vEFTRuakxJ3bGjTMw2r7BBx/G1qlpbNKOMen4cRnpoQAd2hkA118fGA0yNXX2xEZRQWcn/evd0XtnO3/f7Qg6fFjGCnqoo2GboJapp6TWntjwejsFbW6OXdBgs/Oam8FTEWHRk6G9ybVlTiqNiZkEpatpND2pNEXE0dAAUNETUmtOlviREwEMJ5FXJqbanCzxopNLS9VtNLY2TSa74Ggo4Mob1jjRsNXM5gwNEUzTnGYYamcFHSKo6jRDRbKCDhFEZZqBMCbVhmRJDCKMN1AKUm1IlsSgUGJAegt6/LEuLlrgoagwdYveDptpcsmFHg7+VtqHBksM0jy770fzcjh8lskxR6VuZDXvNDczZ5iccGzaj+4KDCS9PbS1xQnLDR+eGg8VgeJixzNbWtM+ROgx0PT20N11zj9x5EG9mzu7y4yZOw7ncZnOxRLpI1dsWJGQ05YPUFefCYKmOV/sdKa2xow2cPUQbe++zlyTwjjuscXFzrkNDb0F6zpH+uXO9E/5NBCaUm1Ef3y6xXHDHDeMH9v9+vv6750CTJoYW7aBacIhbamff/u6t2BTJjvlNrcoX3yZ9oK2GiiNqbaiP7ZssztSRNr/ue3s+sqmfo8j6uyjY2tzZx1q4mubV/3go95ZD1Pb6tyy1R5Q8lmKaTUgvT20pUXZ3pYictQRLqRLy6oKr/+fc+M7ZJzBkd+JTlRPDpxxuvPUmIYG5aNN3QWdcIjB8LbmeNMnGZG322BAensowOtvOKINLxamT+vupS+uC7Ov7d533jk5jBs7sG6BacKC+R5KShzB1jwXJtxjR7Pjv+tcIKGwkyKa9ih1BpL+gr69sTOj73vHdffCUAjueyCEZUFODlz+01xmH9Xdk3tSUiJc/tNcZs5wLo6N71u8/mb3Lm5hoXD4LKeut9+NxJXiMmgY1JnHnbhkLvCdVNvSH7YNPp+zLmVEicH2HXa3IUT9HqWuXpk5w8Ttcu6Lsw418eQItjpjyaIig0kTDE452c25Z3s6mtJPt1j87x+dhOuunHNWDmPHGKjCg6tDNDZmgKDKX12iukn7u5zThBdejHDsMS7y8oSzz3Sz/DcWoS77Krz1ToQ9e20W/LOH4mHC6IONtsTpvjPibRvWbYjwxDO9M+0nTTQ5+gjHO9/ZaGXEcAVARTeZx5+0ZAQZsAQiHIbWEMyYZuLzCcOKhPd75OLu3atseM1pnosKnXWhPWluUTa+Z/GHB0K8+U6kV6Z9Qb5w2aUecj1CS6uyclUrLa3J/GYJRGWFy7LsTYaR9vEFADa8GmHGVJMZ002OPtJFQ4Py1J+792QiEXhpfYSX1kcoHiYcNMKgoEBoaVEaGpWdu+xezWs7Pp9QeomHwrZ834cfDbF3XwY0tW2IYW4Sv19zGlqCATIkr8jnFa76d09HKHDDaxH+9GSoz7BdNIw8yOCiBZ2r2F5aF+FPT2XUVtzhpnqvTwDKFzVtyqTM+cJC4crSXEa0DTm+3GXz2BN9rwc9EC6XMzz5wWk5eNoeZPrq6xEefiy6hU+pRz6urfbOcAGIynoVMkbQhgbl13e0cP78HKZOMRlzsMGVpbl8ssXijTctPvjQOuDMyEEjhCO+7WL20S6KhzkXhm3D038O89eXwxkmJoCug/Zm1tAX2pZ3ZwyBoPK7la2ccKyL0091k5cnTJlkMmWSiW0761V27lICAaW5RTFNyMsViocJY0Y799WufPGlzaOPh/hsR2b0aHuiqi9Am6CmJX+JZEa/qBu2DS9viPDWuxbHzXZx7GzH2wwDRo00GDWA7di3bLVY90qE9z6wMtArO1A15QXosjqkYlHw40x/rp+IM802eaLJP4xx4rAjRwp5uc7XrN+j1Nfb7K5Ttm232fyp1eeUWQbyYW21byZ06dmq6FqUjBZU1Wk6u05znTLHzQ9Pc4ILN9U0p8q0ZLO2/ZfOBb82z6TGlixxI53adQhalOd9BvhbSgzKEg9fN9V5n2t/0yGo3y8RhAdTY1OWWFH0D3fdJR3hsm7RIVFWKfT5UMB0YtahJtOnmv1OkbUzZnRn9/28cwa2BUooDOs2hDMhKQxBVnV/34PyRYH3gFmDZlGUmCZU/cKLmeQHlrz9rsW996d9VL6jd9tOr/itiK5SlaWDZ1N0WBa88VaEKZOSp6ja8O57GfD8P9F7ev6pl6AhT+ud7ubcxen8eNWHVmdU0DxZ7LM94V7bUvaKD63wlzSowR2DY1OWWBHhVz2flQv7eaJ1CKuWNM8G/IYTIGT1uf1Kn4LeXlVYp+jvkmtTllgR5I6+njcP/ewK4bblVmDIxsoymIBh2bX7+3C/gtbU+HYCVUkxKUvMiOqNt9yS/9X+Pu930qww17sU5OPEm5UlNnRzKOhb3t8RB9j7TEKKnfaRo28KBnr5ihXSb7TjgNPay6vznxfRbIw31Yjcd2t1wdoDHTagPAXTkjKBuvityhIju23R6wZy4IAEranx7VThQiD9o9VDD0W4dCC7+0IUm6rXVvmeFmS/3eUsyUJqBrr/NkQhKEBBbt4iYH3UNmWJCYXXmurzfh7NOVEJ6vdLRExjAUJ9dKZliRaBOjHlvK6T1wMh6uTNZTflbceSM4BAtOdmGTDNqHF27U3ez6M9MaZs3Noa76uq/AuQAZOGGYclIucvW5q3LpaTY06vXr7U94SIXkK255tIVIXLllV5V8daQFz58suq8leJ8rN4ysjSFV2yvMp3dzwlJGTpdkVl8BoVXZ6o8r6BKPDL2mqfP96CEiZAeWXThYjcTYasM00jLITLa6t8CZl/TqhHlVUGzhThASCDt7QZVFpBFtRWex9JVIEJbyIrKptPROzHFEoSXfZQQqDOEvmn26q8CQ3UJHwR4bKleevUlCPIRpT2j/C6uIxjEi0mJEFQgNqbvJ8X5npPBn4BZOYK2uSgoL9uqvOeeOuNeduSUUHSe6Vli5pOFeReYFSy60pzdiNcVFvlezqZlSR93fby6vznbYMjQP+Y7LrSFpH7bIPDky0mDPK4sWxx8/dF7duBmQc8eAig6CfAVcur858drDoHPRBQWqruguLmK1T0RtJ8A4M4CAK3hAPeqgPlACWalEV2rruucaTtMq4ArgWKUmVHgmkC/R+XLUvb0mAHnZSH6q5c3FDiUXMhwkKU4am2JzakEbijVSK33F5VmNLcq5QL2s5Cf12hu9VTisrFZM499gNEV4Y9rXet8Jc0pNoYSCNBu1JW2ThTxLwA9GLSbbgj1KvysKHGqljnLJNJWgrajt+vrqbm4Om2wTyUucCMFJnyIbAW4ZlCj/dZv1/SdmI/rQXtybU3BA42bZ2rKnMQOQF0Evt7wnHshEG2oLpe0bUum7X9rSVJNzJK0J6Ulqo7b1jjREPM6Yah09SWqRiME5uDVCjBGRa56RweNQFhoEmUOjX4OzY7VHQTKptttT5u3luwNdrErHTi/wG5jWw4PKXrYgAAAABJRU5ErkJggg==)}.endToEndIdentification .public_recognition_speech .endToEndIdentification_end_recorder_img[data-v-0190c31b]{width:116px;height:116px;background:#2932E1;border-radius:50%;display:flex;justify-content:center;align-items:center;margin-left:98px;margin-bottom:20px;cursor:pointer}.endToEndIdentification .public_recognition_speech .endToEndIdentification_end_recorder_img .endToEndIdentification_end_recorder_img_back[data-v-0190c31b]{width:50px;height:50px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAYAAAAeP4ixAAAABmJLR0QA/wD/AP+gvaeTAAABU0lEQVRoge3ZPUoEQRCG4bcWfxDNDIzUW4jiegUTMfUICuaeQIz2BOZi4gEEg0XY0NBIcyMR/8DPYCdapnF2u53ugXqSnqAo6puZbgYGnHOuKUk9SXuSVnLPEkXSicauc83QS9RnvVo3EvWbWqog2XmQGJK2JJ1LWkvVcy5VoyldAH3gAzhL0TDXq7U0sUbzPVKa2iCSViUNJV22PdCsQk9kG9gBjiQttzjPzEJBeg1qWiNpX9KxpOAsuY7fxiQtAlfAPPAA3NbVZb/bDSwwDgEQ/LruQpBGPEhpPEhpPEhpPEhpPEiLvoDv6vo1VFT8R6OZfUo6ADaBu1BdKMhP4DoLM7v5qyYU5B4YAo9m9pZ0qn9SG8TMXoDdlmeJ0oXN3ogHifQ+sUbLdfyeAofAIFXDLEHMbASMUvb0PVKaVEGeq/UpUb88qr+6/c7/1XXOudb8AikhXZ/+ExptAAAAAElFTkSuQmCC);background-repeat:no-repeat;background-position:center;background-size:50px 50px}.endToEndIdentification .public_recognition_speech .endToEndIdentification_end_recorder_img .endToEndIdentification_end_recorder_img_back[data-v-0190c31b]:hover{opacity:.9}.endToEndIdentification .public_recognition_speech .endToEndIdentification_prompt[data-v-0190c31b]{height:22px;font-family:PingFangSC-Medium;font-size:16px;color:#000;font-weight:500;margin-left:124px;margin-bottom:10px}.endToEndIdentification .public_recognition_speech .speech_text_prompt[data-v-0190c31b]{height:20px;font-family:PingFangSC-Regular;font-size:14px;color:#999;font-weight:400;margin-left:90px}.endToEndIdentification .public_recognition_point_to[data-v-0190c31b]{width:47px;height:67px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFIAAABwCAYAAACNSCemAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAUqADAAQAAAABAAAAcAAAAAAkHfOMAAAU9UlEQVR4Ae2dW4xkx1nH+5zTvbMXe1nbO9Mzu2vSD3YUZRMUgmISiIQhJBIPIeIh4iIHlJjbQ0SkiBekXAwCxAMWiiwURAARhUSJBIag+AGwklUU2RFyjLETOSEImyyemZ7ddfbqvcx0N7//v6pO95mM7ZnZ6Znu6and6TqnLudU/c+/vu+rr+p013q9Xl7bCzeNQH7q1KkcMBt7gN4klgBYPPfcc/uXlpZu+Vavt+8mLzex1SvD+tj58wfPnTt3eI+dG+eDhnbWarV6qnrx4sXs+vXr+2Dn9OnTpw9s/HKTWyPT0Kb7jTNnztQbjUb92rVrjSKEBqBenZubezHLsu7kQrS+npuRzz//fFn60KFDPYDLALQGnofPnz//w/Pz8wfLAnsHayJgRgJkAwDrV65cqe/fv79RJ9y4caMOkPU8z+udTkcsfXF6eroNxhYDa15tghMrygbQMmHBcO8BaE3MTNiQNIMiet0eOxMi1bgCZLfbNdsEoIa2AB0sDksPkPWGs2fPHierBHmwzKQeV4A8cuRITTJSYIiRCoA3CJgwlmZvoZzetKfZDZE/JCPrHBUA00gyUkp7eXlZclFysgF2heSk5OXKyorkZgP2Uq+ed7s3/vv48eOn+5eczCNp7RrKxqwTIxVMOzGPPw1vQHV+iBslUnneLShycmFh4R7NjsqMCTzI77333lIOyiAXcClMcaChvW9fmjk2MI1WMlhZQ7FbhsJYmUsziIJ3oIxOTCCG7nJFRh4+fNg2JMBkg1obMF24EckoEBU6najVi6LWY7gz/N/abrffBv4JeZebhA8b5KmjGN8+FCOT1oaNZmzEstTkYiVk7AFeBqI1TY86/FG1xVB/N4roeLruJMQa2mU/B2XkICNVII1usZVTwK1blpbsVCGBSmC4H6xPTb0Ldv7kE0880Reqytylwcpm0GmhfqK9a4mRq/sttpIGYCtmp5ipIEamAEEFqv6fPHHixHtxgsymvN0aV6eIOC2mrl6V86KBXIRs9QaaujJVRPMUeTSFGNZ1CFrHyY6J1K13Oc4wi0jDPMowk3oNvB11biLt/uTMzMzXiQXzrgsVZSPjh45mYqRCb2qql2SkE0hCZVtri4cqaxmJdIyyUkObNJUmjU9uAIM99fzxxcUz70N+Tit3twUrm8GhraHL7MYgMb7L/sqGhKg+t1xkSMdh7jSZQbFwDyUe6oMkhjvgdoklDjpNsL9fmv2BBx6oPMTyRmN64KFN29f0R8Zh3dBsZmUlxzMUPEEa0prdaKZD3ULHyxrS3a5mQXXNepSmPOypBjcJwz4Mc9aHEAFZ7wWu//Cdd9754phiV2m2Gfly/kgN3cS6pLVdW3ZkHdFIkC0JsEFjw0oNZ9gJW/MeMjMLQxuGiplmJwVIRHa2YPaHFs6evafSojE9MSPX448U+wDM824YJzb6WAwEz8hEMa3PyqSEmKrDyF5iLNYnLM20ckndGnP2LPvOVKPxhTvuuOPimOIobvQD4EhZWPattiOVjoz0zMcyUoqHYCVDXog7VslmKKyUrOSayE4EpGWkrlLL8nDXnu6uG+a1/OSNGysfwdf5ZpcYw48KkDDNCkMArWVHItPcxWQ76iQpGccMbdmTqm870qX1kTOFtPb29VE8TtNnCCTktYN5Xvzm4uLir4+j87gC5Hr8kanrxJafZqKop4DAtMkDCwWmdh4Ebd2NBrtnRaFsDQlKvk5UJpJU6xhvy+v1PwLQN8SCYxGpL9a8m/FHdruWk2hojHFr7DqGd9e+SnAMhnpRkwxVRiwDbZGPmeQjMSg5n3PkpvAOspNj0rNHL1++8Lm77777+qij6SniZv2RwQkUp4ga1pGY0tpipGCBad5cBNCIw0IWZWRfV3lW344Z+l3ErmuQEI57P3fwllseZIp598gDuRX+yNTJoHAwgTCHZP6Eod3VojgKBqVDgkALMlIWESdBdirRACMMXCBHckh4FFl+rNPt/ily830wOz6qdMfRiU2G1Jyb8UdG+xERhwfIjJQVKZmIToaVZmTVjgy3Jc15AhqpGdR4Hhjp+i7GUfEryM2H5ue//5rU3lGKbZCnBm2FP9LXMiPhD0GD1LSzGRTZaPZJISkfoMVKmUecZYKdKmYk9RniPAYnIzIzhvj1T72wuPiLZMlyGpmQD8kfKbsoyMlVXR20IaXahZtg479lpBDVqWUk+QDKdWSLhoCMkKL64OLi0p+jIOdS+k7HVjatgU1UatAW+iN/wCCXXEReBqKKrPyhgIynQVMDjCXD3Hk905XjwEDJUJCEpm9eXul+nuH+HlXZ6SDzpyiniMPzR9pvKRMJFso/6Rg4mS5qelnIFCLdplADr1s4zmoNELN5xEhuMO9iStnjD1OJmEeCy0mmVPdrnc6hj584cfjcTgFKX/phiP7IeBM0uY+Io9Fe3h3GheFMCqRzECMlRqWGrN0lMEXIEAdmOvuni+KlR+bb7XeFitv/aWUzOLRhyBD9kbIlCUKTMR7MIGloEAMogyyoOAh2pA6jjExAu7IuAq8j4Eha4M5uA+BPzi8sPIjsvFUltjPkKBs13R1M5o/AnMI7roYg752n0ZgaFufayso0x5YBLvMnhuBW4wTSYTsGDoKSjsM1AgBR0fjE99CRqFcyUlqdnKiA4v0jevKZpDv2GasrvHd5pfNljPi3p+ztiM3IHfJHilMGGgPI5k9uewigwNLmDw9KQzuaRAE3QxugEbJ6TIphZ+Alh1Q7vtLpPjy/uPjH27UDJDHSLZMdicbuP2h1dCBouUFsrWnlkLUbZUFIl7H3B1r2px6c8D9MDTVuSxMmWTiMR5DTEOcfxE2ST9Z7aIMxFT6cKz+EGDPkOZedCXAhTXGfp4L0t6f2H/was6Ifi3WHFoVxFy+PAnAHBNY2+yMlL7l3MLJBzTal6CagmGCasW6mS7kcbFaKgC6HuXKV6ISM7oDra7O8OLWw0P4Y5eSgGUqoAAl73Aqe8E74I4VYnK5o3g27ojj00xWplQZ6lIK9FE35YqIZrWQTlSpSRu6OgJYX/iOLi+3H2Nv5umEgWQFyRPyRYBUZSY+BbbDfnCT0SJYyUgBUy0joRwE9DKW7otjqMkrOsregiJ5kVvQ7/fSQe7OfAjJrtVpuUJKR3MR7f3RxtLeXGHSspQaFtNSghS/sZTl041JD8JAH7w+EwG+h4Re7zhWk2xmmfnyodJhvrQ6bwtCOjKRCYB/FjQoJwfwJEKm+WswFfClGN2cKlpHOihirhU6PHzT/ALz9xGJ76VF2z93p3C348BRxdP2R4BcBUV8TOwFD7IoPRDmcWhnBTDNQjygSUYIgHqcU1aDcz8DOZ1hj/1Wd32zIox3p62x2f2RqxDD8kRE8UDOvLQEtI6GjDHIpI4HnoS1AzdVgXKR2BXANHvhbOTlGJfxQp9v79GK7/fClS5duageIR0a6YTLI9TgHtfZ69kdq0NoEGo4/EgpGH2VQJm5yYJjBS4yEnPFccewd3YlDP3IyMTSeQvpfuHzlyjdxgPx8wmKjsQ3yVGnU/ZGxnWFgM2Q9vJUIUGYkcJmdJBHLSHeIjGSIR1amfMoEhqpYNoOS/yKK6K83M8XU0NZVHLZwf6ToqUbGZ57ugJwLLLF8s/DyOR9SOBquGsKc6lgX0NDmMykT+kumR7PyhU2Qi8KIgvqvsr6z7MjVQRkCWbELuXAEmDTu+wFmRc8A5k+trvtK51Y2rVbLDZWMVOHx8EfSeTBPaNIBt10Y0QWdlnYkgKl//kggKkHHCilNx67c670GML+y0G4/SN66XjIQI3tJa3fZQ66g3Wi+KC3QZnyf8KEpIjaQT6MJFOViuXzo9kFGQthp4b5yBhNDq8U2rdNoXZu+inta1emPQhVWfQIx828fqrIVj0/50H8u4HoBPCGisvqQtoHIZl+4gBKVW/nsg6iq1ClB5VTc/jBm0jdg56vuAElN9i3G1x8Z4ekP14AoQs9PS1qdHgooPRfFCuTpIx47wWnOj2U4fj2a/euw86Mc910JrtX/sLJpDQxtCo+pPzJ0iuZzIOkbAEpd1VmZRp7KqajSfaz8eKxyZVkREx8i7PyDdnvpMdj5Wor9QMg1tEn1XZP5Q8Ux9keqLyZg7KxBNboCzIE4gJVYSpWYp/RUTrGeR6ysycE9yM6nFhaWPkieMYs3qZmRu80fSeckI2MfdehOC+CQJnQIKpOYlwBUNR+Tn9JUOpRzvQMIiofaS0v/OviCVmKkrltLc22f8KEWpGPFY+ePHGw83TG71KXYrQSUupk6GjEuy6RLBNCNifOo8rOaYrJOdJ/KVPxzq/2RLCnERxguJ6eFpoHS2HK50RDvj9TyqtJ9zlMnsUYhLRFS0RtXWNkqm+j9kW47qs77WdDH4XJhuCR/pLSuPeT2R8pu5HL80W0uxvCVqiSZTupcjXQukUGDRu407VJDwkdsRgBGdajpqjrkWkQCmjo6cnDlmJfSVZ9whAqfWWif+YmK1t7F/khQAcfQeQE/AJQBcZrTQUdgRaAMpCD1+Rr1eUZfOHRg6mMVIHe7PzKxyegkcAKTDV4AMOSm434dMbfPUo7ngfs9c7Ozv4SSPisgs1ar5ceSZCTo72Z/ZGCcOg4wwiaxT3E6d1rErUynjo7zLP+bfY3662dnZ/+ZJAdPEdPMZnCuzU1e9X3t4e+PZN4Th5Nau5X+SBAJCOgT9AJwAbkAcJC6To8lYd1zRZ69c3Z25v7bb7/9Qv8CtC3akU6bNH+ksDTb6L3BI9a5/+Kx0lGmLGNmD9VqM29sNpuPGqxVHxWtLYOcTfjUDf7IpLXlj5Rm1jQbheyX3nUdKWqzUhsEWCvgpUWWDOL+SL9lo9m0PIladvASaxhLPFqtS/n5a94t/V/Yw6OukYEWlyYmBDUq3Rz8kaiMOOBCfZ26kObyQoYqLkDsAydZYdPiWFV5OiNKxEzM0y3L4OPs21z//uZs87EyfY0DG+QpfZL9kYMABlCzFZ7Mn8w2p98EC18RROGnoa3YYVBGDnrIlZne/DJLTJtXfF97bP2REFij76kir91zbHb29+jvul4EsLJptYLW3vNH1q6j0D7anJl+C69E/4fZtc4PK5uktSfZHwlej/MSy4+ikf8QFrInZ2NBdmQZJtQf+RKK7MOzzZm3Hzt69NkSjA0eWNkMDm0E7cT4I8Hqy0U29cbZ2ek/g4XRSb9BBGNxD22OrfMnxR9JZy8UefFbc7PNdzSbR/5nc9BVa5mRk+SPxMr8UqNenGw2p/+yCsXNnSVG+ipprp0uqWGejhWPuT/yLF91cB/K5N1Hjx59YbBfW3FcUTbYTx7iAnC1HambDfF97aHtjzRIWfYPrN3+CCz87FaAttY1KlPE1f5IbUFBCJf1YKSnitpDrimjgsqIuIrx3fYKpoqupB1qlAnLbszftPmsq2kgWwAQ62EfWSI8Cbn8hThkNdPzi59UkPjn9mqB9qDzrhh3YnDqhvbohkzaqGSqqW6aNlOq221n9eJ3j01PP6K2DjNUGLmb/JHA/fn9U/veOrcNIOoBCcgsmT9JRopi+gYBhXHbH8k6xv8hoX4ZWfih1a4ud2hIH54ippnN4Fyb4TJe/khtuuh1/67bWXkn07uvDgmvl71sRWuPsT/yf5GH9/Gd6Q8cO3bspZft7RAzKspm3PyR2Bh4R/PPNGen/0IDaIg4veqlbZCnUmPmj/xuvV78GrLwEzsNovDT0FbsMCgjV9uRo+KPxLRawUj6q2Zz5v0Y1t9Obd/p2Momae1R90diNT6LTvmNubmZv4WFG3Z1DRNs2bYj/742EwW9Y/e5uZmZfwJAmekjFyrKRv5ILX1pxy7TwfD9kcwa9N5MDOH7IznPGetsAo1bVnoshGnjKNMgp2l6oe+P9MyGaYjMVc1i9L62Zh96X5sBymTHkxRdXO/RaKeJpjFxRqOYDanf5McjPsnvRCyo2KgGK5vBoS1jfHjfHxlf1hSukExTRc0J9FInESATjLei3jVmmJ9iJ8PHRx1ENbuOslHTxYNaMn8Ept7X1tzaLCNv9fvacnAoaI6teTffHxkuwrXEYKXpfe2eN88DC0DhieaA2wXGaYXVgIq/ur+S2cWgafN/Lq+sfHruxIkd+2oFtWcjoX4q/MKSfqPB9Yh7/BaDX+pk15mGnoAuVxFdSIvZQqnTwQ5mjOpbI7VuzXdKxvVtneo7lDKcFr6Avz9SD0yYAaDWtYUzT4GYslrczrPLgPj3LH8+7vuM0UceGekmp7l2an8CMZ0P0x+JKH7y8sWLvz+OIAqfirLRcFWiAJQdmXZaKE1hSPsjLyEk/hEAnw53Gc/PCpDb7o/Mat/gYX2J79m9Op7w9VtdAVIzGykPZKR/z0bKRu/ZkJZqmLHxxPIT8LNcm4KkgtkMpDeP+R5EkZrRGkSfTD/eA0OZSEDig816F5aXbzwCgN9NFx73WH0VmJv6PZvNfH9knu97ambm9q+g8MMvaYw7grH92+aP5IGd58vhP9ts3vEvuw1EYSlGllPEq1evok8aBWZPnWHtX1hChjUOHDjgX1bqdHJGbUc/t1IHFH0LtL+qi2vEb33WNz7XWbZZVqxvgA5fx1UUz+Dy188LjNT8eCsHQ0VGJoOcDvt97aS1N7s/Eivg+8jQrzJHXtrKRo/itYbij0Q1dQHxSaZ2D7PPeteDqAdrZcOaTZ0ZTfmTpxraAKHhrCFb/nCa0hjOTpOiYUewjv2TArgs/IXtlLnAsP/322677fwoMmdYbdoyfyQGD6O4+zQLT/82aSAmRpbK5somvz+SCdJFTJqnYWf4/ZZhPfYRvm5F2WzUH8lcUix8Fo/1aUC0c2OE+zrUpm3aH4mv7EU2ETzO8uf3Jh1EPSHbkcTr/j0b7Ev0Sf69SdHG66VxgRtNE2lWDvblzKlzHLr8pFenYL6tc+UZbMxrTaAvAuB/3XrrrZfWe4NJKVeZ2bBWs+bva4uFzG4Wt3Mvzbg9gIqyYciu5Y+8hBw8gxwsV8DGrZPb0d4KkGhga15A00piB7v83E7tpdmOzm/lPSpAJn8kM5qX8BVeAk+tR+2FdSDgKSLl7I+8cOFCjoPi2smTJ3eVr3AdONx0EWltLYoWuNA6d91111WmeHuycDOwyh/Jn9fmN1N/r05EABAH12H2cNlDYGcR+H83Z8q9vSzjfQAAAABJRU5ErkJggg==) no-repeat;background-position:center;background-size:47px 67px;margin-top:91px;margin-right:67px}.endToEndIdentification .public_recognition_result[data-v-0190c31b]{width:680px;height:230px;background:#FAFAFA;padding:40px 50px 0}.endToEndIdentification .public_recognition_result div[data-v-0190c31b]:nth-of-type(1){height:26px;font-family:PingFangSC-Medium;font-size:16px;color:#666;line-height:26px;font-weight:500;margin-bottom:20px}.endToEndIdentification .public_recognition_result div[data-v-0190c31b]:nth-of-type(2){height:26px;font-family:PingFangSC-Medium;font-size:16px;color:#666;line-height:26px;font-weight:500}.speech_recognition[data-v-02a5e0f4]{width:1200px;height:410px;background:#FFFFFF;padding:40px 50px 50px 44px;position:relative}.speech_recognition .frame[data-v-02a5e0f4]{width:605px;height:50px;border:1px solid #eeeeee;border-radius:25px;position:absolute}.speech_recognition .speech_recognition_mytabs .ant-tabs-tab[data-v-02a5e0f4]{position:relative;display:inline-flex;align-items:center;font-size:14px;background:transparent;border:0;outline:none;cursor:pointer;padding:12px 26px;box-sizing:border-box}.speech_recognition .speech_recognition_mytabs .ant-tabs-tab-active[data-v-02a5e0f4]{height:50px;background:#EEEFFD;border-radius:25px;padding:12px 26px;box-sizing:border-box}.speech_recognition .speech_recognition_mytabs .speech_recognition .speech_recognition_mytabs .ant-tabs-ink-bar[data-v-02a5e0f4],.speech_recognition .speech_recognition_mytabs .ant-tabs-ink-bar[data-v-02a5e0f4]{position:absolute;background:transparent!important;pointer-events:none}.speech_recognition .speech_recognition_mytabs .experience .experience_wrapper .experience_content .experience_tabs .ant-tabs-nav[data-v-02a5e0f4]:before{position:absolute;right:0;left:0;border-bottom:1px solid transparent!important;content:""}.speech_recognition .speech_recognition_mytabs .ant-tabs-top>.ant-tabs-nav[data-v-02a5e0f4]:before,.speech_recognition .speech_recognition_mytabs .ant-tabs-bottom>.ant-tabs-nav[data-v-02a5e0f4]:before,.speech_recognition .speech_recognition_mytabs .ant-tabs-top>div>.ant-tabs-nav[data-v-02a5e0f4]:before,.speech_recognition .speech_recognition_mytabs .ant-tabs-bottom>div>.ant-tabs-nav[data-v-02a5e0f4]:before{position:absolute;right:0;left:0;border-bottom:1px solid transparent!important;content:""}.speech_recognition .speech_recognition_mytabs .ant-tabs-nav[data-v-02a5e0f4]:before{position:absolute;right:0;left:0;border-bottom:1px solid transparent!important;content:""}.speech_recognition[data-v-1ec3a672]{width:1200px;height:410px;background:#FFFFFF;padding:40px 0 50px 50px;box-sizing:border-box;display:flex}.speech_recognition .recognition_text[data-v-1ec3a672]{width:589px;height:320px}.speech_recognition .recognition_text .recognition_text_header[data-v-1ec3a672]{margin-bottom:30px;display:flex;justify-content:space-between;align-items:center}.speech_recognition .recognition_text .recognition_text_header .recognition_text_title[data-v-1ec3a672]{height:26px;font-family:PingFangSC-Medium;font-size:16px;color:#000;letter-spacing:0;line-height:26px;font-weight:500}.speech_recognition .recognition_text .recognition_text_header .recognition_text_random[data-v-1ec3a672]{display:flex;align-items:center;cursor:pointer}.speech_recognition .recognition_text .recognition_text_header .recognition_text_random span[data-v-1ec3a672]{display:inline-block}.speech_recognition .recognition_text .recognition_text_header .recognition_text_random span[data-v-1ec3a672]:nth-of-type(1){width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAABmJLR0QA/wD/AP+gvaeTAAABpklEQVQ4jZWSvWuTURSHn/PmzRuVxo+hiGATKy+0aVohSRcRIYPi9yCijv4DriJS/AMEoUOXgq4FoZvQvYsItWmEIE2sH/EVLOigHdSSJvfnYqLSpI1nuvzOfR7OuVzooyYmvh3q1fN2g4tF+c34xvNMvn61W9+6hWFufTBO4wJYaMYmgNAdJztbK6eWe04QhmuJsVz0MG6NGsYlcA2kAcEV4KBnehrm1gf/Zvz2IZt9FbhEsCDY2FIw8qZ85AvAWP7DDdAtYMGwB+18m8DtSd6X9L26MnQdzP2ZUWtS7Hy1NFTp+QbHC28PJOS/d7LxWjn1qdvFXuUBBC52Bnj2v3BH4HkMm6zWLzSaj84VCsvxjkDip3D7+oGz2c8DhuaTyYI6AlAJ41Q/AgU/TgMvFxet2RGsrhxbAvMzk/WLu+Ae2JRMs+3k9wTmnOk2zh5lJqPxXvBoLpqWqVktpZ+003++ciYf3QQ3gzHT8mzu9Yv0u6MnP+7dv9kqyuyeQTO2xbVKJf21qwBg5ER92Hy7a3AZOAy0gCXE49UwNce8tXZec9vOO9cvuPyUtrJ2lqQAAAAASUVORK5CYII=) no-repeat;background-position:center;background-size:20px 20px;margin-right:5px}.speech_recognition .recognition_text .recognition_text_header .recognition_text_random span[data-v-1ec3a672]:nth-of-type(2){height:20px;font-family:PingFangSC-Regular;font-size:14px;color:#2932e1;letter-spacing:0;font-weight:400}.speech_recognition .recognition_text .recognition_text_field[data-v-1ec3a672]{width:589px;height:264px;background:#FAFAFA}.speech_recognition .recognition_text .recognition_text_field .textToSpeech_content_show_text[data-v-1ec3a672]{width:100%;height:264px;padding:0 30px 30px 0;box-sizing:border-box}.speech_recognition .recognition_text .recognition_text_field .textToSpeech_content_show_text .ant-input[data-v-1ec3a672]{height:208px;resize:none;padding:21px 20px}.speech_recognition .recognition_point_to[data-v-1ec3a672]{width:47px;height:63px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFIAAABwCAYAAACNSCemAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAUqADAAQAAAABAAAAcAAAAAAkHfOMAAAU9UlEQVR4Ae2dW4xkx1nH+5zTvbMXe1nbO9Mzu2vSD3YUZRMUgmISiIQhJBIPIeIh4iIHlJjbQ0SkiBekXAwCxAMWiiwURAARhUSJBIag+AGwklUU2RFyjLETOSEImyyemZ7ddfbqvcx0N7//v6pO95mM7ZnZ6Znu6and6TqnLudU/c+/vu+rr+p013q9Xl7bCzeNQH7q1KkcMBt7gN4klgBYPPfcc/uXlpZu+Vavt+8mLzex1SvD+tj58wfPnTt3eI+dG+eDhnbWarV6qnrx4sXs+vXr+2Dn9OnTpw9s/HKTWyPT0Kb7jTNnztQbjUb92rVrjSKEBqBenZubezHLsu7kQrS+npuRzz//fFn60KFDPYDLALQGnofPnz//w/Pz8wfLAnsHayJgRgJkAwDrV65cqe/fv79RJ9y4caMOkPU8z+udTkcsfXF6eroNxhYDa15tghMrygbQMmHBcO8BaE3MTNiQNIMiet0eOxMi1bgCZLfbNdsEoIa2AB0sDksPkPWGs2fPHierBHmwzKQeV4A8cuRITTJSYIiRCoA3CJgwlmZvoZzetKfZDZE/JCPrHBUA00gyUkp7eXlZclFysgF2heSk5OXKyorkZgP2Uq+ed7s3/vv48eOn+5eczCNp7RrKxqwTIxVMOzGPPw1vQHV+iBslUnneLShycmFh4R7NjsqMCTzI77333lIOyiAXcClMcaChvW9fmjk2MI1WMlhZQ7FbhsJYmUsziIJ3oIxOTCCG7nJFRh4+fNg2JMBkg1obMF24EckoEBU6najVi6LWY7gz/N/abrffBv4JeZebhA8b5KmjGN8+FCOT1oaNZmzEstTkYiVk7AFeBqI1TY86/FG1xVB/N4roeLruJMQa2mU/B2XkICNVII1usZVTwK1blpbsVCGBSmC4H6xPTb0Ldv7kE0880Reqytylwcpm0GmhfqK9a4mRq/sttpIGYCtmp5ipIEamAEEFqv6fPHHixHtxgsymvN0aV6eIOC2mrl6V86KBXIRs9QaaujJVRPMUeTSFGNZ1CFrHyY6J1K13Oc4wi0jDPMowk3oNvB11biLt/uTMzMzXiQXzrgsVZSPjh45mYqRCb2qql2SkE0hCZVtri4cqaxmJdIyyUkObNJUmjU9uAIM99fzxxcUz70N+Tit3twUrm8GhraHL7MYgMb7L/sqGhKg+t1xkSMdh7jSZQbFwDyUe6oMkhjvgdoklDjpNsL9fmv2BBx6oPMTyRmN64KFN29f0R8Zh3dBsZmUlxzMUPEEa0prdaKZD3ULHyxrS3a5mQXXNepSmPOypBjcJwz4Mc9aHEAFZ7wWu//Cdd9754phiV2m2Gfly/kgN3cS6pLVdW3ZkHdFIkC0JsEFjw0oNZ9gJW/MeMjMLQxuGiplmJwVIRHa2YPaHFs6evafSojE9MSPX448U+wDM824YJzb6WAwEz8hEMa3PyqSEmKrDyF5iLNYnLM20ckndGnP2LPvOVKPxhTvuuOPimOIobvQD4EhZWPattiOVjoz0zMcyUoqHYCVDXog7VslmKKyUrOSayE4EpGWkrlLL8nDXnu6uG+a1/OSNGysfwdf5ZpcYw48KkDDNCkMArWVHItPcxWQ76iQpGccMbdmTqm870qX1kTOFtPb29VE8TtNnCCTktYN5Xvzm4uLir4+j87gC5Hr8kanrxJafZqKop4DAtMkDCwWmdh4Ebd2NBrtnRaFsDQlKvk5UJpJU6xhvy+v1PwLQN8SCYxGpL9a8m/FHdruWk2hojHFr7DqGd9e+SnAMhnpRkwxVRiwDbZGPmeQjMSg5n3PkpvAOspNj0rNHL1++8Lm77777+qij6SniZv2RwQkUp4ga1pGY0tpipGCBad5cBNCIw0IWZWRfV3lW344Z+l3ErmuQEI57P3fwllseZIp598gDuRX+yNTJoHAwgTCHZP6Eod3VojgKBqVDgkALMlIWESdBdirRACMMXCBHckh4FFl+rNPt/ily830wOz6qdMfRiU2G1Jyb8UdG+xERhwfIjJQVKZmIToaVZmTVjgy3Jc15AhqpGdR4Hhjp+i7GUfEryM2H5ue//5rU3lGKbZCnBm2FP9LXMiPhD0GD1LSzGRTZaPZJISkfoMVKmUecZYKdKmYk9RniPAYnIzIzhvj1T72wuPiLZMlyGpmQD8kfKbsoyMlVXR20IaXahZtg479lpBDVqWUk+QDKdWSLhoCMkKL64OLi0p+jIOdS+k7HVjatgU1UatAW+iN/wCCXXEReBqKKrPyhgIynQVMDjCXD3Hk905XjwEDJUJCEpm9eXul+nuH+HlXZ6SDzpyiniMPzR9pvKRMJFso/6Rg4mS5qelnIFCLdplADr1s4zmoNELN5xEhuMO9iStnjD1OJmEeCy0mmVPdrnc6hj584cfjcTgFKX/phiP7IeBM0uY+Io9Fe3h3GheFMCqRzECMlRqWGrN0lMEXIEAdmOvuni+KlR+bb7XeFitv/aWUzOLRhyBD9kbIlCUKTMR7MIGloEAMogyyoOAh2pA6jjExAu7IuAq8j4Eha4M5uA+BPzi8sPIjsvFUltjPkKBs13R1M5o/AnMI7roYg752n0ZgaFufayso0x5YBLvMnhuBW4wTSYTsGDoKSjsM1AgBR0fjE99CRqFcyUlqdnKiA4v0jevKZpDv2GasrvHd5pfNljPi3p+ztiM3IHfJHilMGGgPI5k9uewigwNLmDw9KQzuaRAE3QxugEbJ6TIphZ+Alh1Q7vtLpPjy/uPjH27UDJDHSLZMdicbuP2h1dCBouUFsrWnlkLUbZUFIl7H3B1r2px6c8D9MDTVuSxMmWTiMR5DTEOcfxE2ST9Z7aIMxFT6cKz+EGDPkOZedCXAhTXGfp4L0t6f2H/was6Ifi3WHFoVxFy+PAnAHBNY2+yMlL7l3MLJBzTal6CagmGCasW6mS7kcbFaKgC6HuXKV6ISM7oDra7O8OLWw0P4Y5eSgGUqoAAl73Aqe8E74I4VYnK5o3g27ojj00xWplQZ6lIK9FE35YqIZrWQTlSpSRu6OgJYX/iOLi+3H2Nv5umEgWQFyRPyRYBUZSY+BbbDfnCT0SJYyUgBUy0joRwE9DKW7otjqMkrOsregiJ5kVvQ7/fSQe7OfAjJrtVpuUJKR3MR7f3RxtLeXGHSspQaFtNSghS/sZTl041JD8JAH7w+EwG+h4Re7zhWk2xmmfnyodJhvrQ6bwtCOjKRCYB/FjQoJwfwJEKm+WswFfClGN2cKlpHOihirhU6PHzT/ALz9xGJ76VF2z93p3C348BRxdP2R4BcBUV8TOwFD7IoPRDmcWhnBTDNQjygSUYIgHqcU1aDcz8DOZ1hj/1Wd32zIox3p62x2f2RqxDD8kRE8UDOvLQEtI6GjDHIpI4HnoS1AzdVgXKR2BXANHvhbOTlGJfxQp9v79GK7/fClS5duageIR0a6YTLI9TgHtfZ69kdq0NoEGo4/EgpGH2VQJm5yYJjBS4yEnPFccewd3YlDP3IyMTSeQvpfuHzlyjdxgPx8wmKjsQ3yVGnU/ZGxnWFgM2Q9vJUIUGYkcJmdJBHLSHeIjGSIR1amfMoEhqpYNoOS/yKK6K83M8XU0NZVHLZwf6ToqUbGZ57ugJwLLLF8s/DyOR9SOBquGsKc6lgX0NDmMykT+kumR7PyhU2Qi8KIgvqvsr6z7MjVQRkCWbELuXAEmDTu+wFmRc8A5k+trvtK51Y2rVbLDZWMVOHx8EfSeTBPaNIBt10Y0QWdlnYkgKl//kggKkHHCilNx67c670GML+y0G4/SN66XjIQI3tJa3fZQ66g3Wi+KC3QZnyf8KEpIjaQT6MJFOViuXzo9kFGQthp4b5yBhNDq8U2rdNoXZu+inta1emPQhVWfQIx828fqrIVj0/50H8u4HoBPCGisvqQtoHIZl+4gBKVW/nsg6iq1ClB5VTc/jBm0jdg56vuAElN9i3G1x8Z4ekP14AoQs9PS1qdHgooPRfFCuTpIx47wWnOj2U4fj2a/euw86Mc910JrtX/sLJpDQxtCo+pPzJ0iuZzIOkbAEpd1VmZRp7KqajSfaz8eKxyZVkREx8i7PyDdnvpMdj5Wor9QMg1tEn1XZP5Q8Ux9keqLyZg7KxBNboCzIE4gJVYSpWYp/RUTrGeR6ysycE9yM6nFhaWPkieMYs3qZmRu80fSeckI2MfdehOC+CQJnQIKpOYlwBUNR+Tn9JUOpRzvQMIiofaS0v/OviCVmKkrltLc22f8KEWpGPFY+ePHGw83TG71KXYrQSUupk6GjEuy6RLBNCNifOo8rOaYrJOdJ/KVPxzq/2RLCnERxguJ6eFpoHS2HK50RDvj9TyqtJ9zlMnsUYhLRFS0RtXWNkqm+j9kW47qs77WdDH4XJhuCR/pLSuPeT2R8pu5HL80W0uxvCVqiSZTupcjXQukUGDRu407VJDwkdsRgBGdajpqjrkWkQCmjo6cnDlmJfSVZ9whAqfWWif+YmK1t7F/khQAcfQeQE/AJQBcZrTQUdgRaAMpCD1+Rr1eUZfOHRg6mMVIHe7PzKxyegkcAKTDV4AMOSm434dMbfPUo7ngfs9c7Ozv4SSPisgs1ar5ceSZCTo72Z/ZGCcOg4wwiaxT3E6d1rErUynjo7zLP+bfY3662dnZ/+ZJAdPEdPMZnCuzU1e9X3t4e+PZN4Th5Nau5X+SBAJCOgT9AJwAbkAcJC6To8lYd1zRZ69c3Z25v7bb7/9Qv8CtC3akU6bNH+ksDTb6L3BI9a5/+Kx0lGmLGNmD9VqM29sNpuPGqxVHxWtLYOcTfjUDf7IpLXlj5Rm1jQbheyX3nUdKWqzUhsEWCvgpUWWDOL+SL9lo9m0PIladvASaxhLPFqtS/n5a94t/V/Yw6OukYEWlyYmBDUq3Rz8kaiMOOBCfZ26kObyQoYqLkDsAydZYdPiWFV5OiNKxEzM0y3L4OPs21z//uZs87EyfY0DG+QpfZL9kYMABlCzFZ7Mn8w2p98EC18RROGnoa3YYVBGDnrIlZne/DJLTJtXfF97bP2REFij76kir91zbHb29+jvul4EsLJptYLW3vNH1q6j0D7anJl+C69E/4fZtc4PK5uktSfZHwlej/MSy4+ikf8QFrInZ2NBdmQZJtQf+RKK7MOzzZm3Hzt69NkSjA0eWNkMDm0E7cT4I8Hqy0U29cbZ2ek/g4XRSb9BBGNxD22OrfMnxR9JZy8UefFbc7PNdzSbR/5nc9BVa5mRk+SPxMr8UqNenGw2p/+yCsXNnSVG+ipprp0uqWGejhWPuT/yLF91cB/K5N1Hjx59YbBfW3FcUTbYTx7iAnC1HambDfF97aHtjzRIWfYPrN3+CCz87FaAttY1KlPE1f5IbUFBCJf1YKSnitpDrimjgsqIuIrx3fYKpoqupB1qlAnLbszftPmsq2kgWwAQ62EfWSI8Cbn8hThkNdPzi59UkPjn9mqB9qDzrhh3YnDqhvbohkzaqGSqqW6aNlOq221n9eJ3j01PP6K2DjNUGLmb/JHA/fn9U/veOrcNIOoBCcgsmT9JRopi+gYBhXHbH8k6xv8hoX4ZWfih1a4ud2hIH54ippnN4Fyb4TJe/khtuuh1/67bWXkn07uvDgmvl71sRWuPsT/yf5GH9/Gd6Q8cO3bspZft7RAzKspm3PyR2Bh4R/PPNGen/0IDaIg4veqlbZCnUmPmj/xuvV78GrLwEzsNovDT0FbsMCgjV9uRo+KPxLRawUj6q2Zz5v0Y1t9Obd/p2Momae1R90diNT6LTvmNubmZv4WFG3Z1DRNs2bYj/742EwW9Y/e5uZmZfwJAmekjFyrKRv5ILX1pxy7TwfD9kcwa9N5MDOH7IznPGetsAo1bVnoshGnjKNMgp2l6oe+P9MyGaYjMVc1i9L62Zh96X5sBymTHkxRdXO/RaKeJpjFxRqOYDanf5McjPsnvRCyo2KgGK5vBoS1jfHjfHxlf1hSukExTRc0J9FInESATjLei3jVmmJ9iJ8PHRx1ENbuOslHTxYNaMn8Ept7X1tzaLCNv9fvacnAoaI6teTffHxkuwrXEYKXpfe2eN88DC0DhieaA2wXGaYXVgIq/ur+S2cWgafN/Lq+sfHruxIkd+2oFtWcjoX4q/MKSfqPB9Yh7/BaDX+pk15mGnoAuVxFdSIvZQqnTwQ5mjOpbI7VuzXdKxvVtneo7lDKcFr6Avz9SD0yYAaDWtYUzT4GYslrczrPLgPj3LH8+7vuM0UceGekmp7l2an8CMZ0P0x+JKH7y8sWLvz+OIAqfirLRcFWiAJQdmXZaKE1hSPsjLyEk/hEAnw53Gc/PCpDb7o/Mat/gYX2J79m9Op7w9VtdAVIzGykPZKR/z0bKRu/ZkJZqmLHxxPIT8LNcm4KkgtkMpDeP+R5EkZrRGkSfTD/eA0OZSEDig816F5aXbzwCgN9NFx73WH0VmJv6PZvNfH9knu97ambm9q+g8MMvaYw7grH92+aP5IGd58vhP9ts3vEvuw1EYSlGllPEq1evok8aBWZPnWHtX1hChjUOHDjgX1bqdHJGbUc/t1IHFH0LtL+qi2vEb33WNz7XWbZZVqxvgA5fx1UUz+Dy188LjNT8eCsHQ0VGJoOcDvt97aS1N7s/Eivg+8jQrzJHXtrKRo/itYbij0Q1dQHxSaZ2D7PPeteDqAdrZcOaTZ0ZTfmTpxraAKHhrCFb/nCa0hjOTpOiYUewjv2TArgs/IXtlLnAsP/322677fwoMmdYbdoyfyQGD6O4+zQLT/82aSAmRpbK5somvz+SCdJFTJqnYWf4/ZZhPfYRvm5F2WzUH8lcUix8Fo/1aUC0c2OE+zrUpm3aH4mv7EU2ETzO8uf3Jh1EPSHbkcTr/j0b7Ev0Sf69SdHG66VxgRtNE2lWDvblzKlzHLr8pFenYL6tc+UZbMxrTaAvAuB/3XrrrZfWe4NJKVeZ2bBWs+bva4uFzG4Wt3Mvzbg9gIqyYciu5Y+8hBw8gxwsV8DGrZPb0d4KkGhga15A00piB7v83E7tpdmOzm/lPSpAJn8kM5qX8BVeAk+tR+2FdSDgKSLl7I+8cOFCjoPi2smTJ3eVr3AdONx0EWltLYoWuNA6d91111WmeHuycDOwyh/Jn9fmN1N/r05EABAH12H2cNlDYGcR+H83Z8q9vSzjfQAAAABJRU5ErkJggg==) no-repeat;background-position:center;background-size:47px 63px;margin-right:101px;margin-left:100px;margin-top:164px}.speech_recognition .speech_recognition_new .speech_recognition_title[data-v-1ec3a672]{height:26px;font-family:PingFangSC-Medium;font-size:16px;color:#000;line-height:26px;font-weight:500;margin-left:32px;margin-bottom:96px}.speech_recognition .speech_recognition_new .speech_recognition_streaming[data-v-1ec3a672]{width:136px;height:44px;background:#2932E1;border-radius:22px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;font-weight:500;text-align:center;line-height:44px;margin-bottom:40px;cursor:pointer}.speech_recognition .speech_recognition_new .speech_recognition_streaming[data-v-1ec3a672]:hover{opacity:.9}.speech_recognition .speech_recognition_new .streaming_ing_box[data-v-1ec3a672]{display:flex;align-items:center;height:44px;margin-bottom:40px}.speech_recognition .speech_recognition_new .streaming_ing_box .streaming_ing[data-v-1ec3a672]{width:136px;height:44px;background:#7278F5;border-radius:22px;display:flex;justify-content:center;align-items:center;cursor:pointer}.speech_recognition .speech_recognition_new .streaming_ing_box .streaming_ing .streaming_ing_img[data-v-1ec3a672]{width:16px;height:16px}.speech_recognition .speech_recognition_new .streaming_ing_box .streaming_ing .streaming_ing_text[data-v-1ec3a672]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;font-weight:500;margin-left:12px}.speech_recognition .speech_recognition_new .streaming_ing_box .streaming_time[data-v-1ec3a672]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#000;font-weight:500;margin-left:12px}.speech_recognition .speech_recognition_new .streaming_suspended_box[data-v-1ec3a672]{display:flex;align-items:center;height:44px;margin-bottom:40px}.speech_recognition .speech_recognition_new .streaming_suspended_box .streaming_suspended[data-v-1ec3a672]{width:136px;height:44px;background:#2932E1;border-radius:22px;display:flex;justify-content:center;align-items:center;cursor:pointer}.speech_recognition .speech_recognition_new .streaming_suspended_box .streaming_suspended .streaming_suspended_img[data-v-1ec3a672]{width:16px;height:16px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAMCAYAAABbayygAAAABmJLR0QA/wD/AP+gvaeTAAAAJElEQVQYlWP8////fwYGBgZGRkZGBiwAJs+ETRIbGFU4WBUCAN6ICBTe98fbAAAAAElFTkSuQmCC);background-repeat:no-repeat;background-position:center;background-size:16px 16px;margin-right:12px}.speech_recognition .speech_recognition_new .streaming_suspended_box .streaming_suspended .streaming_suspended_text[data-v-1ec3a672]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;font-weight:500;margin-left:12px}.speech_recognition .speech_recognition_new .streaming_suspended_box .suspended_time[data-v-1ec3a672]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#000;font-weight:500;margin-left:12px}.speech_recognition .speech_recognition_new .streaming_continue[data-v-1ec3a672]{width:136px;height:44px;background:#2932E1;border-radius:22px;display:flex;justify-content:center;align-items:center;cursor:pointer;margin-bottom:40px}.speech_recognition .speech_recognition_new .streaming_continue .streaming_continue_img[data-v-1ec3a672]{width:16px;height:16px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAMCAYAAABbayygAAAABmJLR0QA/wD/AP+gvaeTAAAAdklEQVQYlY3QsQkCYRAF4SfmRqaCGJvbglXYgqEFWMV1IWhmE0aWYGYoiCCfwXFgIP868bDsvOCMZSr0vNBhWokDd2wxrsSBK9b/iANHLJJkBEXGM8m+uggnzFviDZvWj2/9VJNWzAWr1o4P7Pza70s8YFaU5wOtLPdya6UrCgAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:16px 16px;margin-right:12px}.speech_recognition .speech_recognition_new .streaming_continue .streaming_continue_text[data-v-1ec3a672]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;font-weight:500}.speech_recognition .speech_recognition_new .speech_recognition_end_to_end[data-v-1ec3a672]{width:136px;height:44px;background:#2932E1;border-radius:22px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;font-weight:500;text-align:center;line-height:44px;cursor:pointer}.speech_recognition .speech_recognition_new .speech_recognition_end_to_end[data-v-1ec3a672]:hover{opacity:.9}.speech_recognition .speech_recognition_new .end_to_end_ing_box[data-v-1ec3a672]{display:flex;align-items:center;height:44px}.speech_recognition .speech_recognition_new .end_to_end_ing_box .end_to_end_ing[data-v-1ec3a672]{width:136px;height:44px;background:#7278F5;border-radius:22px;display:flex;justify-content:center;align-items:center;cursor:pointer}.speech_recognition .speech_recognition_new .end_to_end_ing_box .end_to_end_ing .end_to_end_ing_img[data-v-1ec3a672]{width:16px;height:16px}.speech_recognition .speech_recognition_new .end_to_end_ing_box .end_to_end_ing .end_to_end_ing_text[data-v-1ec3a672]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;font-weight:500;margin-left:12px}.speech_recognition .speech_recognition_new .end_to_end_ing_box .end_to_end_ing_time[data-v-1ec3a672]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#000;font-weight:500;margin-left:12px}.speech_recognition .speech_recognition_new .end_to_end_suspended_box[data-v-1ec3a672]{display:flex;align-items:center;height:44px}.speech_recognition .speech_recognition_new .end_to_end_suspended_box .end_to_end_suspended[data-v-1ec3a672]{width:136px;height:44px;background:#2932E1;border-radius:22px;display:flex;justify-content:center;align-items:center;cursor:pointer}.speech_recognition .speech_recognition_new .end_to_end_suspended_box .end_to_end_suspended .end_to_end_suspended_img[data-v-1ec3a672]{width:16px;height:16px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAMCAYAAABbayygAAAABmJLR0QA/wD/AP+gvaeTAAAAJElEQVQYlWP8////fwYGBgZGRkZGBiwAJs+ETRIbGFU4WBUCAN6ICBTe98fbAAAAAElFTkSuQmCC);background-repeat:no-repeat;background-position:center;background-size:16px 16px;margin-right:12px}.speech_recognition .speech_recognition_new .end_to_end_suspended_box .end_to_end_suspended .end_to_end_suspended_text[data-v-1ec3a672]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;font-weight:500}.speech_recognition .speech_recognition_new .end_to_end_suspended_box .end_to_end_ing_suspended_time[data-v-1ec3a672]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#000;font-weight:500;margin-left:12px}.speech_recognition .speech_recognition_new .end_to_end_continue[data-v-1ec3a672]{width:136px;height:44px;background:#2932E1;border-radius:22px;display:flex;justify-content:center;align-items:center;cursor:pointer}.speech_recognition .speech_recognition_new .end_to_end_continue .end_to_end_continue_img[data-v-1ec3a672]{width:16px;height:16px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAMCAYAAABbayygAAAABmJLR0QA/wD/AP+gvaeTAAAAdklEQVQYlY3QsQkCYRAF4SfmRqaCGJvbglXYgqEFWMV1IWhmE0aWYGYoiCCfwXFgIP868bDsvOCMZSr0vNBhWokDd2wxrsSBK9b/iANHLJJkBEXGM8m+uggnzFviDZvWj2/9VJNWzAWr1o4P7Pza70s8YFaU5wOtLPdya6UrCgAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:16px 16px;margin-right:12px}.speech_recognition .speech_recognition_new .end_to_end_continue .end_to_end_continue_text[data-v-1ec3a672]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;font-weight:500}.voiceprint[data-v-8ecbfa80]{width:1200px;height:410px;background:#FFFFFF;padding:41px 80px 56px;box-sizing:border-box;display:flex}.voiceprint .voiceprint_recording[data-v-8ecbfa80]{width:423px;height:354px;margin-right:66px}.voiceprint .voiceprint_recording .recording_title[data-v-8ecbfa80]{display:flex;align-items:center;margin-bottom:20px}.voiceprint .voiceprint_recording .recording_title div[data-v-8ecbfa80]:nth-of-type(1){width:24px;height:24px;background:rgba(41,50,225,.7);font-family:PingFangSC-Regular;font-size:16px;color:#fff;letter-spacing:0;text-align:center;line-height:24px;font-weight:400;margin-right:16px;border-radius:50%}.voiceprint .voiceprint_recording .recording_title div[data-v-8ecbfa80]:nth-of-type(2){height:26px;font-family:PingFangSC-Regular;font-size:16px;color:#000;line-height:26px;font-weight:400}.voiceprint .voiceprint_recording .recording_btn[data-v-8ecbfa80]{width:143px;height:44px;cursor:pointer;background:#2932E1;padding:0 24px 0 21px;box-sizing:border-box;border-radius:22px;display:flex;align-items:center;margin-bottom:20px;margin-top:10px}.voiceprint .voiceprint_recording .recording_btn[data-v-8ecbfa80]:hover{background:#7278F5}.voiceprint .voiceprint_recording .recording_btn:hover .recording_img[data-v-8ecbfa80]{width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABmJLR0QA/wD/AP+gvaeTAAABcklEQVQ4jbXUsW9PYRTG8ef9pYOQDiJVkXa01ECQYJPoIDGwkyaSbhIdRIiwMBj8BRYsmGwdDCaDpKGxdGp1oIuhWIziY+j7E657b9OKM517nud+c+59z3mTnsAo7uMDvmERF/ve6YPtqoDnOIZ9OI9V3N0O8DbmURr1SXzG1FaBb3CmQ3uI623aoIc5luR9h7aSZO9WgaVH0yX0AbcV/xeIm81T3SwwgZlWYJKrSSaG3iQjPY38qPmpJOe6gG+THK/5WpKDHcBDST7W/ESSxa725/Cs5pfwDjsanqP4iv0YwRoOdwH34MtwC/AUSxV+GreqfqHqs3jd8RW/oFewUHd5gJm6ggt4hCPVdwDrw+c+4ACP6+r99Q9RcBafMNvUW0ekjs5ckhtJlpMsJVnPxgScTPI9ybVSyove7lrAOzGNe/UgLter7N8WAlPouij+iNbBxZ0k47+VdicZw4OG9VUp5cmmwGwM6nij9rLFt9os/AR4hDDbLupsHwAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:20px 20px;margin-right:8.26px}.voiceprint .voiceprint_recording .recording_btn .recording_img[data-v-8ecbfa80]{width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABmJLR0QA/wD/AP+gvaeTAAABcklEQVQ4jbXUsW9PYRTG8ef9pYOQDiJVkXa01ECQYJPoIDGwkyaSbhIdRIiwMBj8BRYsmGwdDCaDpKGxdGp1oIuhWIziY+j7E657b9OKM517nud+c+59z3mTnsAo7uMDvmERF/ve6YPtqoDnOIZ9OI9V3N0O8DbmURr1SXzG1FaBb3CmQ3uI623aoIc5luR9h7aSZO9WgaVH0yX0AbcV/xeIm81T3SwwgZlWYJKrSSaG3iQjPY38qPmpJOe6gG+THK/5WpKDHcBDST7W/ESSxa725/Cs5pfwDjsanqP4iv0YwRoOdwH34MtwC/AUSxV+GreqfqHqs3jd8RW/oFewUHd5gJm6ggt4hCPVdwDrw+c+4ACP6+r99Q9RcBafMNvUW0ekjs5ckhtJlpMsJVnPxgScTPI9ybVSyove7lrAOzGNe/UgLter7N8WAlPouij+iNbBxZ0k47+VdicZw4OG9VUp5cmmwGwM6nij9rLFt9os/AR4hDDbLupsHwAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:20px 20px;margin-right:8.26px}.voiceprint .voiceprint_recording .recording_btn .recording_prompt[data-v-8ecbfa80]{height:20px;font-family:PingFangSC-Regular;font-size:12px;color:#fff;font-weight:400}.voiceprint .voiceprint_recording .recording_btn_the_recording[data-v-8ecbfa80]{width:143px;height:44px;cursor:pointer;background:#7278F5;padding:0 24px 0 21px;box-sizing:border-box;border-radius:22px;display:flex;align-items:center;justify-content:center;margin-bottom:40px}.voiceprint .voiceprint_recording .recording_btn_the_recording .recording_img_the_recording[data-v-8ecbfa80]{width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABmJLR0QA/wD/AP+gvaeTAAAAU0lEQVQ4jWNgGNLg////t6ht4H9S9TCRYQn5rsbmQkKuZkJSSPvwIkEM0zEUGggXIzlSCIGRY+BtDBFsMUVOOsQLKErYJABM7xELaFE4kO+aQQsAMeZRmWvp2dEAAAAASUVORK5CYII=);background-repeat:no-repeat;background-position:center;background-size:20px 20px;margin-right:8.26px}.voiceprint .voiceprint_recording .recording_btn_the_recording .recording_prompt[data-v-8ecbfa80]{height:20px;font-family:PingFangSC-Regular;font-size:12px;color:#fff;font-weight:400}.voiceprint .voiceprint_recording .complete_the_recording_btn[data-v-8ecbfa80]{width:143px;height:44px;cursor:pointer;background:#2932E1;padding:0 24px 0 21px;box-sizing:border-box;border-radius:22px;display:flex;align-items:center;margin-bottom:40px}.voiceprint .voiceprint_recording .complete_the_recording_btn[data-v-8ecbfa80]:hover{background:#7278F5}.voiceprint .voiceprint_recording .complete_the_recording_btn:hover .complete_the_recording_img[data-v-8ecbfa80]{width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABmJLR0QA/wD/AP+gvaeTAAAAU0lEQVQ4jWNgGNLg////t6ht4H9S9TCRYQn5rsbmQkKuZkJSSPvwIkEM0zEUGggXIzlSCIGRY+BtDBFsMUVOOsQLKErYJABM7xELaFE4kO+aQQsAMeZRmWvp2dEAAAAASUVORK5CYII=);background-repeat:no-repeat;background-position:center;background-size:20px 20px;margin-right:8.26px}.voiceprint .voiceprint_recording .complete_the_recording_btn .complete_the_recording_img[data-v-8ecbfa80]{width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABmJLR0QA/wD/AP+gvaeTAAAAU0lEQVQ4jWNgGNLg////t6ht4H9S9TCRYQn5rsbmQkKuZkJSSPvwIkEM0zEUGggXIzlSCIGRY+BtDBFsMUVOOsQLKErYJABM7xELaFE4kO+aQQsAMeZRmWvp2dEAAAAASUVORK5CYII=);background-repeat:no-repeat;background-position:center;background-size:20px 20px;margin-right:8.26px}.voiceprint .voiceprint_recording .complete_the_recording_btn .complete_the_recording_prompt[data-v-8ecbfa80]{height:20px;font-family:PingFangSC-Regular;font-size:12px;color:#fff;font-weight:400}.voiceprint .voiceprint_recording .recording_table[data-v-8ecbfa80]{width:322px}.voiceprint .voiceprint_recording .recording_table .recording_table_box .ant-table-thead>tr>th[data-v-8ecbfa80]{color:#000000d9;font-weight:500;text-align:left;background:rgba(40,50,225,.08);border-bottom:none;transition:background .3s ease;height:22px;font-family:PingFangSC-Regular;font-size:16px;color:#333;font-weight:400}.voiceprint .voiceprint_recording .recording_table .recording_table_box .ant-table-thead>tr>th[data-v-8ecbfa80]:nth-of-type(2){border-left:2px solid white}.voiceprint .voiceprint_recording .recording_table .recording_table_box .ant-table-tbody>tr>td[data-v-8ecbfa80]{border-bottom:1px solid #f0f0f0;transition:background .3s;height:22px;font-family:PingFangSC-Regular;font-size:16px;color:#333;font-weight:400}.voiceprint .voiceprint_recording .recording_input[data-v-8ecbfa80]{width:322px;margin-bottom:20px}.voiceprint .recording_point_to[data-v-8ecbfa80]{width:63px;height:47px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFIAAABwCAYAAACNSCemAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAUqADAAQAAAABAAAAcAAAAAAkHfOMAAAU9UlEQVR4Ae2dW4xkx1nH+5zTvbMXe1nbO9Mzu2vSD3YUZRMUgmISiIQhJBIPIeIh4iIHlJjbQ0SkiBekXAwCxAMWiiwURAARhUSJBIag+AGwklUU2RFyjLETOSEImyyemZ7ddfbqvcx0N7//v6pO95mM7ZnZ6Znu6and6TqnLudU/c+/vu+rr+p013q9Xl7bCzeNQH7q1KkcMBt7gN4klgBYPPfcc/uXlpZu+Vavt+8mLzex1SvD+tj58wfPnTt3eI+dG+eDhnbWarV6qnrx4sXs+vXr+2Dn9OnTpw9s/HKTWyPT0Kb7jTNnztQbjUb92rVrjSKEBqBenZubezHLsu7kQrS+npuRzz//fFn60KFDPYDLALQGnofPnz//w/Pz8wfLAnsHayJgRgJkAwDrV65cqe/fv79RJ9y4caMOkPU8z+udTkcsfXF6eroNxhYDa15tghMrygbQMmHBcO8BaE3MTNiQNIMiet0eOxMi1bgCZLfbNdsEoIa2AB0sDksPkPWGs2fPHierBHmwzKQeV4A8cuRITTJSYIiRCoA3CJgwlmZvoZzetKfZDZE/JCPrHBUA00gyUkp7eXlZclFysgF2heSk5OXKyorkZgP2Uq+ed7s3/vv48eOn+5eczCNp7RrKxqwTIxVMOzGPPw1vQHV+iBslUnneLShycmFh4R7NjsqMCTzI77333lIOyiAXcClMcaChvW9fmjk2MI1WMlhZQ7FbhsJYmUsziIJ3oIxOTCCG7nJFRh4+fNg2JMBkg1obMF24EckoEBU6najVi6LWY7gz/N/abrffBv4JeZebhA8b5KmjGN8+FCOT1oaNZmzEstTkYiVk7AFeBqI1TY86/FG1xVB/N4roeLruJMQa2mU/B2XkICNVII1usZVTwK1blpbsVCGBSmC4H6xPTb0Ldv7kE0880Reqytylwcpm0GmhfqK9a4mRq/sttpIGYCtmp5ipIEamAEEFqv6fPHHixHtxgsymvN0aV6eIOC2mrl6V86KBXIRs9QaaujJVRPMUeTSFGNZ1CFrHyY6J1K13Oc4wi0jDPMowk3oNvB11biLt/uTMzMzXiQXzrgsVZSPjh45mYqRCb2qql2SkE0hCZVtri4cqaxmJdIyyUkObNJUmjU9uAIM99fzxxcUz70N+Tit3twUrm8GhraHL7MYgMb7L/sqGhKg+t1xkSMdh7jSZQbFwDyUe6oMkhjvgdoklDjpNsL9fmv2BBx6oPMTyRmN64KFN29f0R8Zh3dBsZmUlxzMUPEEa0prdaKZD3ULHyxrS3a5mQXXNepSmPOypBjcJwz4Mc9aHEAFZ7wWu//Cdd9754phiV2m2Gfly/kgN3cS6pLVdW3ZkHdFIkC0JsEFjw0oNZ9gJW/MeMjMLQxuGiplmJwVIRHa2YPaHFs6evafSojE9MSPX448U+wDM824YJzb6WAwEz8hEMa3PyqSEmKrDyF5iLNYnLM20ckndGnP2LPvOVKPxhTvuuOPimOIobvQD4EhZWPattiOVjoz0zMcyUoqHYCVDXog7VslmKKyUrOSayE4EpGWkrlLL8nDXnu6uG+a1/OSNGysfwdf5ZpcYw48KkDDNCkMArWVHItPcxWQ76iQpGccMbdmTqm870qX1kTOFtPb29VE8TtNnCCTktYN5Xvzm4uLir4+j87gC5Hr8kanrxJafZqKop4DAtMkDCwWmdh4Ebd2NBrtnRaFsDQlKvk5UJpJU6xhvy+v1PwLQN8SCYxGpL9a8m/FHdruWk2hojHFr7DqGd9e+SnAMhnpRkwxVRiwDbZGPmeQjMSg5n3PkpvAOspNj0rNHL1++8Lm77777+qij6SniZv2RwQkUp4ga1pGY0tpipGCBad5cBNCIw0IWZWRfV3lW344Z+l3ErmuQEI57P3fwllseZIp598gDuRX+yNTJoHAwgTCHZP6Eod3VojgKBqVDgkALMlIWESdBdirRACMMXCBHckh4FFl+rNPt/ily830wOz6qdMfRiU2G1Jyb8UdG+xERhwfIjJQVKZmIToaVZmTVjgy3Jc15AhqpGdR4Hhjp+i7GUfEryM2H5ue//5rU3lGKbZCnBm2FP9LXMiPhD0GD1LSzGRTZaPZJISkfoMVKmUecZYKdKmYk9RniPAYnIzIzhvj1T72wuPiLZMlyGpmQD8kfKbsoyMlVXR20IaXahZtg479lpBDVqWUk+QDKdWSLhoCMkKL64OLi0p+jIOdS+k7HVjatgU1UatAW+iN/wCCXXEReBqKKrPyhgIynQVMDjCXD3Hk905XjwEDJUJCEpm9eXul+nuH+HlXZ6SDzpyiniMPzR9pvKRMJFso/6Rg4mS5qelnIFCLdplADr1s4zmoNELN5xEhuMO9iStnjD1OJmEeCy0mmVPdrnc6hj584cfjcTgFKX/phiP7IeBM0uY+Io9Fe3h3GheFMCqRzECMlRqWGrN0lMEXIEAdmOvuni+KlR+bb7XeFitv/aWUzOLRhyBD9kbIlCUKTMR7MIGloEAMogyyoOAh2pA6jjExAu7IuAq8j4Eha4M5uA+BPzi8sPIjsvFUltjPkKBs13R1M5o/AnMI7roYg752n0ZgaFufayso0x5YBLvMnhuBW4wTSYTsGDoKSjsM1AgBR0fjE99CRqFcyUlqdnKiA4v0jevKZpDv2GasrvHd5pfNljPi3p+ztiM3IHfJHilMGGgPI5k9uewigwNLmDw9KQzuaRAE3QxugEbJ6TIphZ+Alh1Q7vtLpPjy/uPjH27UDJDHSLZMdicbuP2h1dCBouUFsrWnlkLUbZUFIl7H3B1r2px6c8D9MDTVuSxMmWTiMR5DTEOcfxE2ST9Z7aIMxFT6cKz+EGDPkOZedCXAhTXGfp4L0t6f2H/was6Ifi3WHFoVxFy+PAnAHBNY2+yMlL7l3MLJBzTal6CagmGCasW6mS7kcbFaKgC6HuXKV6ISM7oDra7O8OLWw0P4Y5eSgGUqoAAl73Aqe8E74I4VYnK5o3g27ojj00xWplQZ6lIK9FE35YqIZrWQTlSpSRu6OgJYX/iOLi+3H2Nv5umEgWQFyRPyRYBUZSY+BbbDfnCT0SJYyUgBUy0joRwE9DKW7otjqMkrOsregiJ5kVvQ7/fSQe7OfAjJrtVpuUJKR3MR7f3RxtLeXGHSspQaFtNSghS/sZTl041JD8JAH7w+EwG+h4Re7zhWk2xmmfnyodJhvrQ6bwtCOjKRCYB/FjQoJwfwJEKm+WswFfClGN2cKlpHOihirhU6PHzT/ALz9xGJ76VF2z93p3C348BRxdP2R4BcBUV8TOwFD7IoPRDmcWhnBTDNQjygSUYIgHqcU1aDcz8DOZ1hj/1Wd32zIox3p62x2f2RqxDD8kRE8UDOvLQEtI6GjDHIpI4HnoS1AzdVgXKR2BXANHvhbOTlGJfxQp9v79GK7/fClS5duageIR0a6YTLI9TgHtfZ69kdq0NoEGo4/EgpGH2VQJm5yYJjBS4yEnPFccewd3YlDP3IyMTSeQvpfuHzlyjdxgPx8wmKjsQ3yVGnU/ZGxnWFgM2Q9vJUIUGYkcJmdJBHLSHeIjGSIR1amfMoEhqpYNoOS/yKK6K83M8XU0NZVHLZwf6ToqUbGZ57ugJwLLLF8s/DyOR9SOBquGsKc6lgX0NDmMykT+kumR7PyhU2Qi8KIgvqvsr6z7MjVQRkCWbELuXAEmDTu+wFmRc8A5k+trvtK51Y2rVbLDZWMVOHx8EfSeTBPaNIBt10Y0QWdlnYkgKl//kggKkHHCilNx67c670GML+y0G4/SN66XjIQI3tJa3fZQ66g3Wi+KC3QZnyf8KEpIjaQT6MJFOViuXzo9kFGQthp4b5yBhNDq8U2rdNoXZu+inta1emPQhVWfQIx828fqrIVj0/50H8u4HoBPCGisvqQtoHIZl+4gBKVW/nsg6iq1ClB5VTc/jBm0jdg56vuAElN9i3G1x8Z4ekP14AoQs9PS1qdHgooPRfFCuTpIx47wWnOj2U4fj2a/euw86Mc910JrtX/sLJpDQxtCo+pPzJ0iuZzIOkbAEpd1VmZRp7KqajSfaz8eKxyZVkREx8i7PyDdnvpMdj5Wor9QMg1tEn1XZP5Q8Ux9keqLyZg7KxBNboCzIE4gJVYSpWYp/RUTrGeR6ysycE9yM6nFhaWPkieMYs3qZmRu80fSeckI2MfdehOC+CQJnQIKpOYlwBUNR+Tn9JUOpRzvQMIiofaS0v/OviCVmKkrltLc22f8KEWpGPFY+ePHGw83TG71KXYrQSUupk6GjEuy6RLBNCNifOo8rOaYrJOdJ/KVPxzq/2RLCnERxguJ6eFpoHS2HK50RDvj9TyqtJ9zlMnsUYhLRFS0RtXWNkqm+j9kW47qs77WdDH4XJhuCR/pLSuPeT2R8pu5HL80W0uxvCVqiSZTupcjXQukUGDRu407VJDwkdsRgBGdajpqjrkWkQCmjo6cnDlmJfSVZ9whAqfWWif+YmK1t7F/khQAcfQeQE/AJQBcZrTQUdgRaAMpCD1+Rr1eUZfOHRg6mMVIHe7PzKxyegkcAKTDV4AMOSm434dMbfPUo7ngfs9c7Ozv4SSPisgs1ar5ceSZCTo72Z/ZGCcOg4wwiaxT3E6d1rErUynjo7zLP+bfY3662dnZ/+ZJAdPEdPMZnCuzU1e9X3t4e+PZN4Th5Nau5X+SBAJCOgT9AJwAbkAcJC6To8lYd1zRZ69c3Z25v7bb7/9Qv8CtC3akU6bNH+ksDTb6L3BI9a5/+Kx0lGmLGNmD9VqM29sNpuPGqxVHxWtLYOcTfjUDf7IpLXlj5Rm1jQbheyX3nUdKWqzUhsEWCvgpUWWDOL+SL9lo9m0PIladvASaxhLPFqtS/n5a94t/V/Yw6OukYEWlyYmBDUq3Rz8kaiMOOBCfZ26kObyQoYqLkDsAydZYdPiWFV5OiNKxEzM0y3L4OPs21z//uZs87EyfY0DG+QpfZL9kYMABlCzFZ7Mn8w2p98EC18RROGnoa3YYVBGDnrIlZne/DJLTJtXfF97bP2REFij76kir91zbHb29+jvul4EsLJptYLW3vNH1q6j0D7anJl+C69E/4fZtc4PK5uktSfZHwlej/MSy4+ikf8QFrInZ2NBdmQZJtQf+RKK7MOzzZm3Hzt69NkSjA0eWNkMDm0E7cT4I8Hqy0U29cbZ2ek/g4XRSb9BBGNxD22OrfMnxR9JZy8UefFbc7PNdzSbR/5nc9BVa5mRk+SPxMr8UqNenGw2p/+yCsXNnSVG+ipprp0uqWGejhWPuT/yLF91cB/K5N1Hjx59YbBfW3FcUTbYTx7iAnC1HambDfF97aHtjzRIWfYPrN3+CCz87FaAttY1KlPE1f5IbUFBCJf1YKSnitpDrimjgsqIuIrx3fYKpoqupB1qlAnLbszftPmsq2kgWwAQ62EfWSI8Cbn8hThkNdPzi59UkPjn9mqB9qDzrhh3YnDqhvbohkzaqGSqqW6aNlOq221n9eJ3j01PP6K2DjNUGLmb/JHA/fn9U/veOrcNIOoBCcgsmT9JRopi+gYBhXHbH8k6xv8hoX4ZWfih1a4ud2hIH54ippnN4Fyb4TJe/khtuuh1/67bWXkn07uvDgmvl71sRWuPsT/yf5GH9/Gd6Q8cO3bspZft7RAzKspm3PyR2Bh4R/PPNGen/0IDaIg4veqlbZCnUmPmj/xuvV78GrLwEzsNovDT0FbsMCgjV9uRo+KPxLRawUj6q2Zz5v0Y1t9Obd/p2Momae1R90diNT6LTvmNubmZv4WFG3Z1DRNs2bYj/742EwW9Y/e5uZmZfwJAmekjFyrKRv5ILX1pxy7TwfD9kcwa9N5MDOH7IznPGetsAo1bVnoshGnjKNMgp2l6oe+P9MyGaYjMVc1i9L62Zh96X5sBymTHkxRdXO/RaKeJpjFxRqOYDanf5McjPsnvRCyo2KgGK5vBoS1jfHjfHxlf1hSukExTRc0J9FInESATjLei3jVmmJ9iJ8PHRx1ENbuOslHTxYNaMn8Ept7X1tzaLCNv9fvacnAoaI6teTffHxkuwrXEYKXpfe2eN88DC0DhieaA2wXGaYXVgIq/ur+S2cWgafN/Lq+sfHruxIkd+2oFtWcjoX4q/MKSfqPB9Yh7/BaDX+pk15mGnoAuVxFdSIvZQqnTwQ5mjOpbI7VuzXdKxvVtneo7lDKcFr6Avz9SD0yYAaDWtYUzT4GYslrczrPLgPj3LH8+7vuM0UceGekmp7l2an8CMZ0P0x+JKH7y8sWLvz+OIAqfirLRcFWiAJQdmXZaKE1hSPsjLyEk/hEAnw53Gc/PCpDb7o/Mat/gYX2J79m9Op7w9VtdAVIzGykPZKR/z0bKRu/ZkJZqmLHxxPIT8LNcm4KkgtkMpDeP+R5EkZrRGkSfTD/eA0OZSEDig816F5aXbzwCgN9NFx73WH0VmJv6PZvNfH9knu97ambm9q+g8MMvaYw7grH92+aP5IGd58vhP9ts3vEvuw1EYSlGllPEq1evok8aBWZPnWHtX1hChjUOHDjgX1bqdHJGbUc/t1IHFH0LtL+qi2vEb33WNz7XWbZZVqxvgA5fx1UUz+Dy188LjNT8eCsHQ0VGJoOcDvt97aS1N7s/Eivg+8jQrzJHXtrKRo/itYbij0Q1dQHxSaZ2D7PPeteDqAdrZcOaTZ0ZTfmTpxraAKHhrCFb/nCa0hjOTpOiYUewjv2TArgs/IXtlLnAsP/322677fwoMmdYbdoyfyQGD6O4+zQLT/82aSAmRpbK5somvz+SCdJFTJqnYWf4/ZZhPfYRvm5F2WzUH8lcUix8Fo/1aUC0c2OE+zrUpm3aH4mv7EU2ETzO8uf3Jh1EPSHbkcTr/j0b7Ev0Sf69SdHG66VxgRtNE2lWDvblzKlzHLr8pFenYL6tc+UZbMxrTaAvAuB/3XrrrZfWe4NJKVeZ2bBWs+bva4uFzG4Wt3Mvzbg9gIqyYciu5Y+8hBw8gxwsV8DGrZPb0d4KkGhga15A00piB7v83E7tpdmOzm/lPSpAJn8kM5qX8BVeAk+tR+2FdSDgKSLl7I+8cOFCjoPi2smTJ3eVr3AdONx0EWltLYoWuNA6d91111WmeHuycDOwyh/Jn9fmN1N/r05EABAH12H2cNlDYGcR+H83Z8q9vSzjfQAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:63px 47px;margin-right:66px;margin-top:198px}.voiceprint .voiceprint_identify[data-v-8ecbfa80]{width:423px;height:354px}.voiceprint .voiceprint_identify .identify_title[data-v-8ecbfa80]{display:flex;align-items:center;margin-bottom:20px}.voiceprint .voiceprint_identify .identify_title div[data-v-8ecbfa80]:nth-of-type(1){width:24px;height:24px;background:rgba(41,50,225,.7);font-family:PingFangSC-Regular;font-size:16px;color:#fff;letter-spacing:0;text-align:center;line-height:24px;font-weight:400;margin-right:16px;border-radius:50%}.voiceprint .voiceprint_identify .identify_title div[data-v-8ecbfa80]:nth-of-type(2){height:26px;font-family:PingFangSC-Regular;font-size:16px;color:#000;line-height:26px;font-weight:400}.voiceprint .voiceprint_identify .identify_btn[data-v-8ecbfa80]{width:143px;height:44px;cursor:pointer;background:#2932E1;padding:0 24px 0 21px;box-sizing:border-box;border-radius:22px;display:flex;align-items:center;margin-bottom:40px;margin-top:10px}.voiceprint .voiceprint_identify .identify_btn[data-v-8ecbfa80]:hover{background:#7278F5}.voiceprint .voiceprint_identify .identify_btn:hover .identify_img[data-v-8ecbfa80]{width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABmJLR0QA/wD/AP+gvaeTAAABcklEQVQ4jbXUsW9PYRTG8ef9pYOQDiJVkXa01ECQYJPoIDGwkyaSbhIdRIiwMBj8BRYsmGwdDCaDpKGxdGp1oIuhWIziY+j7E657b9OKM517nud+c+59z3mTnsAo7uMDvmERF/ve6YPtqoDnOIZ9OI9V3N0O8DbmURr1SXzG1FaBb3CmQ3uI623aoIc5luR9h7aSZO9WgaVH0yX0AbcV/xeIm81T3SwwgZlWYJKrSSaG3iQjPY38qPmpJOe6gG+THK/5WpKDHcBDST7W/ESSxa725/Cs5pfwDjsanqP4iv0YwRoOdwH34MtwC/AUSxV+GreqfqHqs3jd8RW/oFewUHd5gJm6ggt4hCPVdwDrw+c+4ACP6+r99Q9RcBafMNvUW0ekjs5ckhtJlpMsJVnPxgScTPI9ybVSyove7lrAOzGNe/UgLter7N8WAlPouij+iNbBxZ0k47+VdicZw4OG9VUp5cmmwGwM6nij9rLFt9os/AR4hDDbLupsHwAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:20px 20px;margin-right:8.26px}.voiceprint .voiceprint_identify .identify_btn .identify_img[data-v-8ecbfa80]{width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABmJLR0QA/wD/AP+gvaeTAAABcklEQVQ4jbXUsW9PYRTG8ef9pYOQDiJVkXa01ECQYJPoIDGwkyaSbhIdRIiwMBj8BRYsmGwdDCaDpKGxdGp1oIuhWIziY+j7E657b9OKM517nud+c+59z3mTnsAo7uMDvmERF/ve6YPtqoDnOIZ9OI9V3N0O8DbmURr1SXzG1FaBb3CmQ3uI623aoIc5luR9h7aSZO9WgaVH0yX0AbcV/xeIm81T3SwwgZlWYJKrSSaG3iQjPY38qPmpJOe6gG+THK/5WpKDHcBDST7W/ESSxa725/Cs5pfwDjsanqP4iv0YwRoOdwH34MtwC/AUSxV+GreqfqHqs3jd8RW/oFewUHd5gJm6ggt4hCPVdwDrw+c+4ACP6+r99Q9RcBafMNvUW0ekjs5ckhtJlpMsJVnPxgScTPI9ybVSyove7lrAOzGNe/UgLter7N8WAlPouij+iNbBxZ0k47+VdicZw4OG9VUp5cmmwGwM6nij9rLFt9os/AR4hDDbLupsHwAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:20px 20px;margin-right:8.26px}.voiceprint .voiceprint_identify .identify_btn .identify_prompt[data-v-8ecbfa80]{height:20px;font-family:PingFangSC-Regular;font-size:12px;color:#fff;font-weight:400}.voiceprint .voiceprint_identify .identify_btn_the_recording[data-v-8ecbfa80]{width:143px;height:44px;cursor:pointer;background:#7278F5;padding:0 24px 0 21px;box-sizing:border-box;border-radius:22px;display:flex;align-items:center;justify-content:center;margin-bottom:40px}.voiceprint .voiceprint_identify .identify_btn_the_recording .identify_img_the_recording[data-v-8ecbfa80]{width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABmJLR0QA/wD/AP+gvaeTAAABcklEQVQ4jbXUsW9PYRTG8ef9pYOQDiJVkXa01ECQYJPoIDGwkyaSbhIdRIiwMBj8BRYsmGwdDCaDpKGxdGp1oIuhWIziY+j7E657b9OKM517nud+c+59z3mTnsAo7uMDvmERF/ve6YPtqoDnOIZ9OI9V3N0O8DbmURr1SXzG1FaBb3CmQ3uI623aoIc5luR9h7aSZO9WgaVH0yX0AbcV/xeIm81T3SwwgZlWYJKrSSaG3iQjPY38qPmpJOe6gG+THK/5WpKDHcBDST7W/ESSxa725/Cs5pfwDjsanqP4iv0YwRoOdwH34MtwC/AUSxV+GreqfqHqs3jd8RW/oFewUHd5gJm6ggt4hCPVdwDrw+c+4ACP6+r99Q9RcBafMNvUW0ekjs5ckhtJlpMsJVnPxgScTPI9ybVSyove7lrAOzGNe/UgLter7N8WAlPouij+iNbBxZ0k47+VdicZw4OG9VUp5cmmwGwM6nij9rLFt9os/AR4hDDbLupsHwAAAABJRU5ErkJggg==);background-repeat:no-repeat;background-position:center;background-size:20px 20px;margin-right:8.26px}.voiceprint .voiceprint_identify .identify_btn_the_recording .recording_prompt[data-v-8ecbfa80]{height:20px;font-family:PingFangSC-Regular;font-size:12px;color:#fff;font-weight:400}.voiceprint .voiceprint_identify .identify_complete_the_recording_btn[data-v-8ecbfa80]{width:143px;height:44px;cursor:pointer;background:#2932E1;padding:0 24px 0 21px;box-sizing:border-box;border-radius:22px;display:flex;align-items:center;margin-bottom:40px}.voiceprint .voiceprint_identify .identify_complete_the_recording_btn[data-v-8ecbfa80]:hover{background:#7278F5}.voiceprint .voiceprint_identify .identify_complete_the_recording_btn:hover .identify_complete_the_recording_img[data-v-8ecbfa80]{width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABmJLR0QA/wD/AP+gvaeTAAAAU0lEQVQ4jWNgGNLg////t6ht4H9S9TCRYQn5rsbmQkKuZkJSSPvwIkEM0zEUGggXIzlSCIGRY+BtDBFsMUVOOsQLKErYJABM7xELaFE4kO+aQQsAMeZRmWvp2dEAAAAASUVORK5CYII=);background-repeat:no-repeat;background-position:center;background-size:20px 20px;margin-right:8.26px}.voiceprint .voiceprint_identify .identify_complete_the_recording_btn .identify_complete_the_recording_img[data-v-8ecbfa80]{width:20px;height:20px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABmJLR0QA/wD/AP+gvaeTAAAAU0lEQVQ4jWNgGNLg////t6ht4H9S9TCRYQn5rsbmQkKuZkJSSPvwIkEM0zEUGggXIzlSCIGRY+BtDBFsMUVOOsQLKErYJABM7xELaFE4kO+aQQsAMeZRmWvp2dEAAAAASUVORK5CYII=);background-repeat:no-repeat;background-position:center;background-size:20px 20px;margin-right:8.26px}.voiceprint .voiceprint_identify .identify_complete_the_recording_btn .identify_complete_the_recording_prompt[data-v-8ecbfa80]{height:20px;font-family:PingFangSC-Regular;font-size:12px;color:#fff;font-weight:400}.voiceprint .voiceprint_identify .identify_result[data-v-8ecbfa80]{width:422px;height:184px;text-align:center;line-height:184px;background:#FAFAFA;position:relative}.voiceprint .voiceprint_identify .identify_result .identify_result_default[data-v-8ecbfa80]{font-family:PingFangSC-Regular;font-size:16px;color:#999;font-weight:400}.voiceprint .voiceprint_identify .identify_result .identify_result_content div[data-v-8ecbfa80]:nth-of-type(1){height:22px;font-family:PingFangSC-Regular;font-size:16px;color:#666;font-weight:400;margin-bottom:10px}.voiceprint .voiceprint_identify .identify_result .identify_result_content div[data-v-8ecbfa80]:nth-of-type(2){height:33px;font-family:PingFangSC-Medium;font-size:24px;color:#000;font-weight:500}.voiceprint .action_btn[data-v-8ecbfa80]{display:inline-block;height:22px;font-family:PingFangSC-Regular;font-size:16px;color:#2932e1;text-align:center;font-weight:400;cursor:pointer}.voice_commands[data-v-4a944260]{width:1200px;height:410px;background:#FFFFFF;padding:40px 50px 50px;box-sizing:border-box;display:flex}.voice_commands .voice_commands_traffic[data-v-4a944260]{width:468px;height:320px}.voice_commands .voice_commands_traffic .voice_commands_traffic_title[data-v-4a944260]{height:26px;font-family:PingFangSC-Medium;font-size:16px;color:#000;letter-spacing:0;line-height:26px;font-weight:500;margin-bottom:30px}.voice_commands .voice_commands_traffic .voice_commands_traffic_wrapper[data-v-4a944260]{width:465px;height:264px;position:relative}.voice_commands .voice_commands_traffic .voice_commands_traffic_wrapper .voice_commands_traffic_wrapper_move[data-v-4a944260]{position:absolute;top:50%;left:50%;transform:translate(-50%,-50%)}.voice_commands .voice_commands_traffic .voice_commands_traffic_wrapper .traffic_btn_img_btn[data-v-4a944260]{width:116px;height:116px;background:#2932E1;display:flex;justify-content:center;align-items:center;border-radius:50%;cursor:pointer;margin-bottom:20px;margin-left:84px}.voice_commands .voice_commands_traffic .voice_commands_traffic_wrapper .traffic_btn_img_btn[data-v-4a944260]:hover{width:116px;height:116px;background:#7278F5}.voice_commands .voice_commands_traffic .voice_commands_traffic_wrapper .traffic_btn_img_btn:hover .start_recorder_img[data-v-4a944260]{width:50px;height:50px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHQAAAB0CAYAAABUmhYnAAAABmJLR0QA/wD/AP+gvaeTAAAP8UlEQVR4nO2de3hU5Z3HP79zZjLJTC6EIFRYQO4gaOuNeqsVdLXU6urzaJddvFW3cb2gJnENSPvstKsmRAm2VNfauvIs1npF65WqxaqAt/WGV1BAUMFaEiDJTJKZOee3f5zcE0LmlpmJ83mePJPJnPO+v8n3/N7zvr/3955XyGD8fs3ZG2yaJIY5XbCniso0hLEK+YCv7ae47RUgAOxpew0INCnsAN2sGJvVtj4e5s3f4vdLKDXfKH4k1QZEQ9mS4BjDtudiyxxb9ERBJgCuBFcTAd0qKusVXetSWVtT49uZ4DqSRloLWlqq7vyS4DxR5oHMUXRaKuwQZJOKrlWbZwJ7vGvuukvCqbBjIKSloGWVjTNFzAtAfwKMTLU9Pdij8JChxqplS/PWpdqYnqSNoJWV9UURI+cyVbkYmJFqewbIRyK60mWHfrt06fB9qTYG0kDQioqGEeo2rwKuxunAZCDSCPY9tiHVt93s25VSS1JV8TU3NI0ybaMMdCHgTZUdCSYAerfLNm6pqfF+kQoDBl3Q0lJ1FxQ3X6GiN9E5nBhqNAM1hbnear9fWgaz4kEVtPyGxjnYxu1kzj0yXj5VdOHy6vw1g1XhoAjaNn6sVZUfD0Z96Yc+4LKlfDDGs0kXtHxx4EeirFQoSXZdac4+kEtrq72PJLOSpAnq96uroSX4M+DngJGsejIMBV1RmOv7j2SFF5MiaPmS4FgsvR84PhnlDwHeUMz5y6tztya64IQLWlHZfJIa9qMowxNd9pBCqFeMs5dX5b2cyGIT2hSWLw6cpWKvyYo5AJThovZzFYuD5yay2IQJWra46SKUR4C8RJX5DcCjqvdXLAqUJqrAhAhaURm8RlTuIfFTWd8ETIU7yxcF/IkoLG5ByxcFlqjobaRBXDiDEeA/KyoDNySioJgpr2y6EJGV8ZaTpQMVKF1W7ft9rAXELERZZeBMEVaTbWYTjSUiP15W5V0dy8kxCXrt4uBxhurzDJ1ZknSjWcU4PZYhTdSCVixpHq+W/TYZO3eZGQjURdQ48ldL83ZEc15UnaLSUnWrZd9HVsyko1BiiP2g36850ZwXlaD5xYFqsuG8QUPguw3Nzf8V5TkDo3xx4IcoT0ZzTpaEoAhn11b5Hh/IwQMSp2xJcIzYujEb0ksZu22DwweSrzSgJtew7dqsmCllhKFy60AOPKCHVlzf9I9qyLPx25Qlbgw9tfbm/L/0e0h/H/r9moNhrEisVVliRW3+e+FC9fR3TL+C7msOLkrV8oMsvRFkitsXKO//mP1QtiQ4Riz9hOx0WLoRtA0m76+DtF8PlYh9HVkx0xGvYcl+vbRPD71ycUOJR83PcNZZZkk/AhK2Dlm2rHB3zw/69NAczHKyYqYzPnLMhX190MtDKyvri8J4PkMYlmyrkoXPKxw20+TQGSYjSoSCfCEcgX37lC3bLDa+b7HjczvVZsbLPjs3dMht/uK9Xf/Yay4zYuRchmammC4XnPw9N3O+7yLX0/tuUlQojBtrMOckN5s/sXj86TC7vspYYYuMVve/Ad0CDr0EVZULB82kBJKfL1x8vocJ4zvvIvV7lG2fWTQ2KW63MGqkMGG8iWnC1CkmV19h8tDqEG+9E0mh5XGg8hN6CNrtMi67PjhbDH1tUI1KAB6PcPXlHr41yhFz+w6bJ9eE2Lqtt/d5vcKck1ycdIIblwtU4d77W3lnozXYZicE2+Co2272vdX+vlunSAy9YPBNip/55+Z0iLn+lQi/+W1Ln2ICBIPKU2vC3Pn7FgIBRQTmn+th1MjMXK1hWN016/gWpaXqBuYPukVxMnmiyeGzTADeftfi0SdC2AO4LW7bbnPPva1YFrjdcNYZ7iRbmiRE/rVNO6CLoPklwXnAiJQYFQenznW6AcGgsvrxEKoDP3fbZzbrXnHun9Onmow+OCO9dGRhcfC09jcd38B5dExm4fMKkyY43vnqGxGCwSjUbOOFF8MdHv3tw8xEmjdo2MIP2n/vEFTRuakxJ3bGjTMw2r7BBx/G1qlpbNKOMen4cRnpoQAd2hkA118fGA0yNXX2xEZRQWcn/evd0XtnO3/f7Qg6fFjGCnqoo2GboJapp6TWntjwejsFbW6OXdBgs/Oam8FTEWHRk6G9ybVlTiqNiZkEpatpND2pNEXE0dAAUNETUmtOlviREwEMJ5FXJqbanCzxopNLS9VtNLY2TSa74Ggo4Mob1jjRsNXM5gwNEUzTnGYYamcFHSKo6jRDRbKCDhFEZZqBMCbVhmRJDCKMN1AKUm1IlsSgUGJAegt6/LEuLlrgoagwdYveDptpcsmFHg7+VtqHBksM0jy770fzcjh8lskxR6VuZDXvNDczZ5iccGzaj+4KDCS9PbS1xQnLDR+eGg8VgeJixzNbWtM+ROgx0PT20N11zj9x5EG9mzu7y4yZOw7ncZnOxRLpI1dsWJGQ05YPUFefCYKmOV/sdKa2xow2cPUQbe++zlyTwjjuscXFzrkNDb0F6zpH+uXO9E/5NBCaUm1Ef3y6xXHDHDeMH9v9+vv6750CTJoYW7aBacIhbamff/u6t2BTJjvlNrcoX3yZ9oK2GiiNqbaiP7ZssztSRNr/ue3s+sqmfo8j6uyjY2tzZx1q4mubV/3go95ZD1Pb6tyy1R5Q8lmKaTUgvT20pUXZ3pYictQRLqRLy6oKr/+fc+M7ZJzBkd+JTlRPDpxxuvPUmIYG5aNN3QWdcIjB8LbmeNMnGZG322BAensowOtvOKINLxamT+vupS+uC7Ov7d533jk5jBs7sG6BacKC+R5KShzB1jwXJtxjR7Pjv+tcIKGwkyKa9ih1BpL+gr69sTOj73vHdffCUAjueyCEZUFODlz+01xmH9Xdk3tSUiJc/tNcZs5wLo6N71u8/mb3Lm5hoXD4LKeut9+NxJXiMmgY1JnHnbhkLvCdVNvSH7YNPp+zLmVEicH2HXa3IUT9HqWuXpk5w8Ttcu6Lsw418eQItjpjyaIig0kTDE452c25Z3s6mtJPt1j87x+dhOuunHNWDmPHGKjCg6tDNDZmgKDKX12iukn7u5zThBdejHDsMS7y8oSzz3Sz/DcWoS77Krz1ToQ9e20W/LOH4mHC6IONtsTpvjPibRvWbYjwxDO9M+0nTTQ5+gjHO9/ZaGXEcAVARTeZx5+0ZAQZsAQiHIbWEMyYZuLzCcOKhPd75OLu3atseM1pnosKnXWhPWluUTa+Z/GHB0K8+U6kV6Z9Qb5w2aUecj1CS6uyclUrLa3J/GYJRGWFy7LsTYaR9vEFADa8GmHGVJMZ002OPtJFQ4Py1J+792QiEXhpfYSX1kcoHiYcNMKgoEBoaVEaGpWdu+xezWs7Pp9QeomHwrZ834cfDbF3XwY0tW2IYW4Sv19zGlqCATIkr8jnFa76d09HKHDDaxH+9GSoz7BdNIw8yOCiBZ2r2F5aF+FPT2XUVtzhpnqvTwDKFzVtyqTM+cJC4crSXEa0DTm+3GXz2BN9rwc9EC6XMzz5wWk5eNoeZPrq6xEefiy6hU+pRz6urfbOcAGIynoVMkbQhgbl13e0cP78HKZOMRlzsMGVpbl8ssXijTctPvjQOuDMyEEjhCO+7WL20S6KhzkXhm3D038O89eXwxkmJoCug/Zm1tAX2pZ3ZwyBoPK7la2ccKyL0091k5cnTJlkMmWSiW0761V27lICAaW5RTFNyMsViocJY0Y799WufPGlzaOPh/hsR2b0aHuiqi9Am6CmJX+JZEa/qBu2DS9viPDWuxbHzXZx7GzH2wwDRo00GDWA7di3bLVY90qE9z6wMtArO1A15QXosjqkYlHw40x/rp+IM802eaLJP4xx4rAjRwp5uc7XrN+j1Nfb7K5Ttm232fyp1eeUWQbyYW21byZ06dmq6FqUjBZU1Wk6u05znTLHzQ9Pc4ILN9U0p8q0ZLO2/ZfOBb82z6TGlixxI53adQhalOd9BvhbSgzKEg9fN9V5n2t/0yGo3y8RhAdTY1OWWFH0D3fdJR3hsm7RIVFWKfT5UMB0YtahJtOnmv1OkbUzZnRn9/28cwa2BUooDOs2hDMhKQxBVnV/34PyRYH3gFmDZlGUmCZU/cKLmeQHlrz9rsW996d9VL6jd9tOr/itiK5SlaWDZ1N0WBa88VaEKZOSp6ja8O57GfD8P9F7ev6pl6AhT+ud7ubcxen8eNWHVmdU0DxZ7LM94V7bUvaKD63wlzSowR2DY1OWWBHhVz2flQv7eaJ1CKuWNM8G/IYTIGT1uf1Kn4LeXlVYp+jvkmtTllgR5I6+njcP/ewK4bblVmDIxsoymIBh2bX7+3C/gtbU+HYCVUkxKUvMiOqNt9yS/9X+Pu930qww17sU5OPEm5UlNnRzKOhb3t8RB9j7TEKKnfaRo28KBnr5ihXSb7TjgNPay6vznxfRbIw31Yjcd2t1wdoDHTagPAXTkjKBuvityhIju23R6wZy4IAEranx7VThQiD9o9VDD0W4dCC7+0IUm6rXVvmeFmS/3eUsyUJqBrr/NkQhKEBBbt4iYH3UNmWJCYXXmurzfh7NOVEJ6vdLRExjAUJ9dKZliRaBOjHlvK6T1wMh6uTNZTflbceSM4BAtOdmGTDNqHF27U3ez6M9MaZs3Noa76uq/AuQAZOGGYclIucvW5q3LpaTY06vXr7U94SIXkK255tIVIXLllV5V8daQFz58suq8leJ8rN4ysjSFV2yvMp3dzwlJGTpdkVl8BoVXZ6o8r6BKPDL2mqfP96CEiZAeWXThYjcTYasM00jLITLa6t8CZl/TqhHlVUGzhThASCDt7QZVFpBFtRWex9JVIEJbyIrKptPROzHFEoSXfZQQqDOEvmn26q8CQ3UJHwR4bKleevUlCPIRpT2j/C6uIxjEi0mJEFQgNqbvJ8X5npPBn4BZOYK2uSgoL9uqvOeeOuNeduSUUHSe6Vli5pOFeReYFSy60pzdiNcVFvlezqZlSR93fby6vznbYMjQP+Y7LrSFpH7bIPDky0mDPK4sWxx8/dF7duBmQc8eAig6CfAVcur858drDoHPRBQWqruguLmK1T0RtJ8A4M4CAK3hAPeqgPlACWalEV2rruucaTtMq4ArgWKUmVHgmkC/R+XLUvb0mAHnZSH6q5c3FDiUXMhwkKU4am2JzakEbijVSK33F5VmNLcq5QL2s5Cf12hu9VTisrFZM499gNEV4Y9rXet8Jc0pNoYSCNBu1JW2ThTxLwA9GLSbbgj1KvysKHGqljnLJNJWgrajt+vrqbm4Om2wTyUucCMFJnyIbAW4ZlCj/dZv1/SdmI/rQXtybU3BA42bZ2rKnMQOQF0Evt7wnHshEG2oLpe0bUum7X9rSVJNzJK0J6Ulqo7b1jjREPM6Yah09SWqRiME5uDVCjBGRa56RweNQFhoEmUOjX4OzY7VHQTKptttT5u3luwNdrErHTi/wG5jWw4PKXrYgAAAABJRU5ErkJggg==) no-repeat;background-position:center;background-size:50px 50px}.voice_commands .voice_commands_traffic .voice_commands_traffic_wrapper .traffic_btn_img_btn .start_recorder_img[data-v-4a944260]{width:50px;height:50px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAHQAAAB0CAYAAABUmhYnAAAABmJLR0QA/wD/AP+gvaeTAAAQMElEQVR4nO2de3xU5ZnHv8+ZyYUkJIGEgNwSLpKEKJgMwmqpBlArXW3tVv1Q78qnunbFC9XWlbqrtVpFilY+2lqrYFFWrVK3VUHFLO3qtkguiIaEi5gJF5UQLrmRkJnz7B+TBEJIyFzPGZjvXzNz3ssv+c175r0873uEKKagoDJeEpLHeSEPlQkqmivIKJAU0GSBZIVBQHJHlmaB/QrNIM2gTSC1qG5BdIsDqrWt+fPKyoLDVv5dwSBWC/CHCZN3jHAY5kwVnSHIdGAM4AxxNR5Et6vykYGUeFVKNleM3h3iOsKGrQ11uUrjWs2s2aahs1FmALkWSdmMaomKrEqWutVlZVPaLdJxQmxp6MTJ7gJ1cq0qNwpkWa3nGPYL/FGE5ZVl2R9aLeZYbGPoWNfnaQnqvAW4Aci3WE5/qQKWtYnn2e1l4w5aLQZsYOgE1+5Mp7bfpnA7vg5MNNKoylIx9NGqspwvrRRimaFnTNs+1Otx3IUyD0iySkeIaVbleY/X8fi2jSN3WiEg4oa6XKVxLQz5EcrDHBlOnGwcEljYkmo+WrN2TGskK46oobmumhmGytNEz29ksGwzMOZVlo9aHakKI2LohMk7Rjgc5mLgykjUZ0NeNVXmR2I8G3ZD84rclwgsAzLCXZfNOaiqc6srct4IZyVhM7S4WJ17Gmp/pnA/YISrnihDVVniONx0T7imF8Ni6Bln7xrl9XpeAc4NR/nRjsJ608GcLeuzt4e67JAbml9Yex7onxAGh7rskwpln4hctql89P+GstiQ3goLCmu/g+jqmJn9QBis6PsFhe7LQ1lsyAzNc7mvN0XfAAaEqsxTgARTeCWvyH1zqAoMiaH5Lvcdoiwl9EtZpwIOgd9OLHI/EIrCgjY0v6h2AcqT2GBeOIoRhf+cWFR7X9AFBZM5v9B9HcKyYMuJ0YUienNVWc7vAy0gYCPyityXCqwkdpsNNV5Fr6wuz1kZSOaADM0rrDlHRNZw8qyS2I1DgnwrkCGN34bmF27PRhwVRO/aZbRQb4inqLJsXK0/mfzqFLlcpXGIYwUxMyNBhqmO1woKKuP9yeSXoYfIfJTYdF4EkWlmQspDfuXob8L8KTXfxpS3/MkTIySooXJZZcXoP/cncb/M6YiH3Rib0rOMvYhO6k+8Ur9uuQ6HuThmpqVkimks6k/CE7bQ/CnuCzF5L3hNMYJFlAs2VWR/0Geavi4WFFTGmwkpG7EuYj1Gd7a2Nxw+c9u209t6S9DnLdebmHIvMTPtxOnOtPj5fSXotYV2BHZtJbYcZjdaEB3fWwep1xbqcJh3EzPTjiSpGr220uO20LypOzPE460BUsKlKkZQNHslLmdL2fC9x144fgv1eOYTM9POJDvN9nnHu9CjhXbsAqsB0sOtKlykpxlcODOJ4vMGkD3KSUaGg9ZWZU+dl49LW3m/pIWNn0XtJu1ODiaYZs6GDWMOHP1hD0Pzi9w/AR6LmKwQEh8v3HjtQOZen0pKct9zJv+3rpWFi/ezZZtt9+6eEBG9Z1NZTrcJh56GutyfoRRETlZoGDzYwVOLMimanND12c5dHso3tFG/z0tiosHYMU5cZyXgdPr+7NZW5T8e2sdbq5utkh0sm6rKs7t51c3Qgik7ppqmuS6ymoInOclgxbKhnD4uDoBPPm3jV08doLS85/g7LdVg7vWpXHfVQOLjBVX48b/vZfX7LZGWHRpEXVVlOeWdb7vdl9TUayOvKHgefmBwl5krXmvkmrlfH9dMgIMNJouXHOCmf93D/gMmIvDIgxmMGxsXScmhQ6WbZ12GulylcYrOibyi4Jg2JZGLZvkiYd55t4WHF+7H6z1xvvJP2pj34zra25XEBOGnd0Xnmr3CVS5Xade3scvQVjNrNpBpiaoguGVuKuBreQ89ug/V/uct39DGy682ATD93ERyJ/gVHGALBLJayLyo832Xoaahs62RFDjpaQZnuxIB+OPKJg42mH6X8fyLDXg7sl18QXTGvIkaF3e+Puo3VGdaISYYJp+ZgMPhe13y10MBlVG/z8vGT9s6you+FgqgR3lnAOQW1g5HZYJ1kgIja4ij6/UXNYGPJ2tqPQCMGB61IcYTcwtrh0OHoQY6y1o9gZGWduQG09jk/+22k4MHfXkHDozefclimMXQYaiKzrBUTYCEKlpN8aMnZVPElBnQYagI37BWToxgEWE6gFFQUBmPylirBcUIDoXxLldpnEF8ynhiG45OBpxNMnisYYrGYoZOEpxeZ66hIjFDTxJMMXMNQ2NRfScNYuQaqoywWkeMEKGabSAy0GodMUJGhoGorQ2dc3kKTy7MZGiW48SJw8SFM5N4evEQJoy3+5qpZBiovaP77r5jEBfNSuKyS607Wvf2W9OYcf4ArrrS1t99QAcaKLZW2dTsm2cdadHEuWEcmbTv1GJjEgzE3i20dodvJWTMmJ63O4/nyOv4+MBnduM7gsba2nrO6Q4b6iQx0Xe9dqenx3WbkWD75YVN1b742Ym58T1M+/LrI//go5fS/GV4Rwusq+sZuzLpjCNrpFXV9o/lNVCarBbRF+tKfUe2JyZKjwXoL9xHDJ3aEbngL06nUNgR+vn5Fz3XVM+Z5iu3sdFkU5XtDW0zEBqtVtEXpWVtXUFf50ztbtqWrYfZuctn6r98N7BO06ziAaR3rKv+z996Rj101rm+rK0rVMXGtBmIvVtoY5PJJ5/5QkQumZ2McXTQjMLKP/uCpM+alMAlF/tnalKSMH+eb8fHnjovf/uwu6FFZyUwcoTvdvzh3wMLcYkwDQYqtm6hAG+86fvOjRzh5Jvndt/h+OLLDXy9x9eEH/zZYM4s6F9cUFyc8PjDmYwa6TNsyW8P0npMp2jOFb7+Ymur8s57URGIXW+gantDV73b0hXRd/Wc7qOsQ4eUe++vx+NRBgwQXvzdUL73ne4t+VhGjXSy9NksZpzn+3K890ELK/+7+40qa4iDi2b6ogDfXt1MQwARhRFHqXdkDb9rJnCW1Vr6wuOFQekGhZMTGD3KyYaNbew4agixa7eHHbu8FH9zAAkJwqziJGYVDyApycA0FREYOtTJ2UWJ/PDGVB64b3DX2HLd+lbm37uX9mP6Qwt+OpiC/HhU4f6H9rG3vh/R21ZjyFrJc7nvFeWXVms5EelpBqvfHE5qqsH2mnauuOYrDh3qfossOiuBhb/IYPhpJ56E8HrhpVcaWfTrnpH2U12JLH02CxF4e3UL9yzosa/WpuhPHEOGz88EbL8ForVNaTmknDd9AIPSHQzLcvLB2u4dlS+/8vLK600cOGgyZIiTzIyeY9PGRpN317Rwz4J63lrV3CPSPmOwg+d/k0VyskFTs8ntd9fR1BwdQWSKLJEzimryvcgmq8X0BxF45skhnD/d99v33LIGnlhyoNf0pw1zkpPtZEimg8Ymk7o6L9Vb2vF4jm/QoHSDF36T1bUl4p4Fe3l7dVR0hgBQryNXOs4iaiZK4orS0wxeemEoY3N8U4Gvvt7Eo7/aT9vh4FrR2Jw4nlyYyfiOXWwvvtzIY4v3B603grQnSV2yAOS7ajZHU+R81hAHf3huKKNH+b6D1ZsP88ii/b1uIeyL+HhhzuUp3H5rOklJvqnF11Y28eAj/m18shqB6k3l2fkCkFdU84IgN1otyh/S0wwW/TKTc6cdmT36x8etvPlWMyVrD51wZSQn28m3L0rm+5clc9ow3xfDa8ITSw6wdHlDVJkJgPL7qorsH/paaKH7WoQ/WK3JXxwG/ODKgdx2SxqpqUcGnl4Tamraqd7SzoEDXhqaTOKcQupAg9OGOcjPi+/RYaqsOszDC/ezYaP/rdwOqOjV1WU5KwR8m5UM0V1WiwqUQekGV34/hSu+l9KvIcvRfFzWyorXmlhT0oIZBXMHvaCIjqgqy/myaz0qv8hdTZSf62cYkJ8bz1RXIgUT4xkx3MHYnLiuTUi7dnvYucuDe4eHik/a+Pu6VvYcZ8ksCuk6POPI11m1hCiP0TVN362z8qhlrptvSuXOf/NNwF94adifx2oJgpR0vu764VGRVdbIiREsaphd3nUZOix19CqUr62RFCNQFPYk6d73O993Gbp2rXjU4DVrZMUIFEFfLiub0rW00K1L6PDKctPQ4x4KaCdmFQ/wrYv2Iy5sYu6R9dEHFvTv2PzWVuXlVxrZscv2QWGIwfJu749NkF/k/hQ4I2KK/MTpFMo/Gtl1vFu4eOfdFu6+z/arLD2OhjveoG05Nj680eNR/vSXZv5pamBBYf3B9MK7a+w/KS+iS3t8duwH46dtTY1rj3cTxcerniIc93jVHoEa29ad3qDoM5HTFSMQRPn1sWZCbydaO52Lwd7RgKc4zR4jbsnxLhzX0OqPR9aDPhdeTTGC4JnjnTcPfTwVwlRjERAVwainGM2Gw1zc28VeDd1cMXo3orYPHjvVUOEXlevHfNXb9T43KxmtzY8JVIdeVoyAEN3iOXj4ib6S9GloZWXBYVVsP3N0qmAot/b13DPox+Mmqyqy10BsjtdqRGVFZXlOyYnS9Wt/qKlyF1AftKoYgbJXDfPu/iTsl6GbK0bvxtDr4CQ4tjL6UENlbn+e7gt+PFS9qjTnHVHptbscI0wIC/v7/G3ww1CArLRR94rykf+qYgSGrkui7n5/cvi9BpVfuD0bHOWxZ3KHnXqHw1n42foRO/zJ5PehGVUVY92I+c9A1D5fKgo4ZAiX+WsmBGAoQFX5mH8o/ACw/5J+9OFV9JrKsuwPA8kc8LE21eXZf0G5iVjPN5QoqrdUl+esDLSAoM4pqqrIXi7Iz4IpI8YRVFhQVZHzfDBlhCQwJ9/lvgPliVCVdwqiAj/fVJ79QLAFhcyA/EL3dQjPEyX7TG2EV+HW6vLskKw/h7RF5RW5LxV4FRhwwsQxANpU9erqipw3QlVgyG+RBS73dFN5E8gIddknGfWmwXc3l2aHdKIm5Ic3VpZlf+hwOAtjM0p9oHxsevTsUJsJYTAU4LP1I3ZkpY0uFngQiN5dl6FHVXkqyaibvnljzhfhqCDsvdL8QvcFwEsIQ8Ndl83Zi6HXV5XmvBPOSsJ+Xm5VRfYaDC1E9L/CXZddEZUViE4Kt5kQ4XFjXuGO88Uwn0YpOHHqk4KtKnJbddno9yJVYURPtK6uGPXXJOoKEe7k5A7kbhF4sL3h8JmRNBMsnNmZNGlblscZ9yOFO4E0q3SEmCZVXlDksc0Voy3Z/2/5VF3e1J0Zhsc7T5V5UbzG2ojwjDocj/t2HViH5YZ2Mn7a1tR4T9zNitwQNb+xQqWgyw4723+3bd3pDVbLARsZejQTJ7sL1Mm1mNxgu+GOsk+E10VYHuiaZTixpaGdFBer8+tG97dQYzboTCDfIimbBClRw1w1NCX7vbVrxbYL+7Y29FjyXTWniTJTxZghqt9QGAeE+oFk7QKfq8hHilniMLSkr70kdiOqDD0Wl6s0rkkGj3V4nHlqaC4iEzB1NAZDUDKAFHyGdz49qgloB5oQ6jGpw5Ba1NwsprHF6/RUp+i+7UefKhJt/D+IYnHePFNWNgAAAABJRU5ErkJggg==) no-repeat;background-position:center;background-size:50px 50px}.voice_commands .voice_commands_traffic .voice_commands_traffic_wrapper .traffic_btn_prompt[data-v-4a944260]{height:22px;font-family:PingFangSC-Medium;font-size:16px;color:#000;font-weight:500;margin-bottom:16px;margin-left:110px}.voice_commands .voice_commands_traffic .voice_commands_traffic_wrapper .traffic_btn_list[data-v-4a944260]{height:20px;font-family:PingFangSC-Regular;font-size:12px;color:#999;font-weight:400;width:112%}.voice_commands .voice_point_to[data-v-4a944260]{width:47px;height:63px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAFIAAABwCAYAAACNSCemAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAUqADAAQAAAABAAAAcAAAAAAkHfOMAAAU9UlEQVR4Ae2dW4xkx1nH+5zTvbMXe1nbO9Mzu2vSD3YUZRMUgmISiIQhJBIPIeIh4iIHlJjbQ0SkiBekXAwCxAMWiiwURAARhUSJBIag+AGwklUU2RFyjLETOSEImyyemZ7ddfbqvcx0N7//v6pO95mM7ZnZ6Znu6and6TqnLudU/c+/vu+rr+p013q9Xl7bCzeNQH7q1KkcMBt7gN4klgBYPPfcc/uXlpZu+Vavt+8mLzex1SvD+tj58wfPnTt3eI+dG+eDhnbWarV6qnrx4sXs+vXr+2Dn9OnTpw9s/HKTWyPT0Kb7jTNnztQbjUb92rVrjSKEBqBenZubezHLsu7kQrS+npuRzz//fFn60KFDPYDLALQGnofPnz//w/Pz8wfLAnsHayJgRgJkAwDrV65cqe/fv79RJ9y4caMOkPU8z+udTkcsfXF6eroNxhYDa15tghMrygbQMmHBcO8BaE3MTNiQNIMiet0eOxMi1bgCZLfbNdsEoIa2AB0sDksPkPWGs2fPHierBHmwzKQeV4A8cuRITTJSYIiRCoA3CJgwlmZvoZzetKfZDZE/JCPrHBUA00gyUkp7eXlZclFysgF2heSk5OXKyorkZgP2Uq+ed7s3/vv48eOn+5eczCNp7RrKxqwTIxVMOzGPPw1vQHV+iBslUnneLShycmFh4R7NjsqMCTzI77333lIOyiAXcClMcaChvW9fmjk2MI1WMlhZQ7FbhsJYmUsziIJ3oIxOTCCG7nJFRh4+fNg2JMBkg1obMF24EckoEBU6najVi6LWY7gz/N/abrffBv4JeZebhA8b5KmjGN8+FCOT1oaNZmzEstTkYiVk7AFeBqI1TY86/FG1xVB/N4roeLruJMQa2mU/B2XkICNVII1usZVTwK1blpbsVCGBSmC4H6xPTb0Ldv7kE0880Reqytylwcpm0GmhfqK9a4mRq/sttpIGYCtmp5ipIEamAEEFqv6fPHHixHtxgsymvN0aV6eIOC2mrl6V86KBXIRs9QaaujJVRPMUeTSFGNZ1CFrHyY6J1K13Oc4wi0jDPMowk3oNvB11biLt/uTMzMzXiQXzrgsVZSPjh45mYqRCb2qql2SkE0hCZVtri4cqaxmJdIyyUkObNJUmjU9uAIM99fzxxcUz70N+Tit3twUrm8GhraHL7MYgMb7L/sqGhKg+t1xkSMdh7jSZQbFwDyUe6oMkhjvgdoklDjpNsL9fmv2BBx6oPMTyRmN64KFN29f0R8Zh3dBsZmUlxzMUPEEa0prdaKZD3ULHyxrS3a5mQXXNepSmPOypBjcJwz4Mc9aHEAFZ7wWu//Cdd9754phiV2m2Gfly/kgN3cS6pLVdW3ZkHdFIkC0JsEFjw0oNZ9gJW/MeMjMLQxuGiplmJwVIRHa2YPaHFs6evafSojE9MSPX448U+wDM824YJzb6WAwEz8hEMa3PyqSEmKrDyF5iLNYnLM20ckndGnP2LPvOVKPxhTvuuOPimOIobvQD4EhZWPattiOVjoz0zMcyUoqHYCVDXog7VslmKKyUrOSayE4EpGWkrlLL8nDXnu6uG+a1/OSNGysfwdf5ZpcYw48KkDDNCkMArWVHItPcxWQ76iQpGccMbdmTqm870qX1kTOFtPb29VE8TtNnCCTktYN5Xvzm4uLir4+j87gC5Hr8kanrxJafZqKop4DAtMkDCwWmdh4Ebd2NBrtnRaFsDQlKvk5UJpJU6xhvy+v1PwLQN8SCYxGpL9a8m/FHdruWk2hojHFr7DqGd9e+SnAMhnpRkwxVRiwDbZGPmeQjMSg5n3PkpvAOspNj0rNHL1++8Lm77777+qij6SniZv2RwQkUp4ga1pGY0tpipGCBad5cBNCIw0IWZWRfV3lW344Z+l3ErmuQEI57P3fwllseZIp598gDuRX+yNTJoHAwgTCHZP6Eod3VojgKBqVDgkALMlIWESdBdirRACMMXCBHckh4FFl+rNPt/ily830wOz6qdMfRiU2G1Jyb8UdG+xERhwfIjJQVKZmIToaVZmTVjgy3Jc15AhqpGdR4Hhjp+i7GUfEryM2H5ue//5rU3lGKbZCnBm2FP9LXMiPhD0GD1LSzGRTZaPZJISkfoMVKmUecZYKdKmYk9RniPAYnIzIzhvj1T72wuPiLZMlyGpmQD8kfKbsoyMlVXR20IaXahZtg479lpBDVqWUk+QDKdWSLhoCMkKL64OLi0p+jIOdS+k7HVjatgU1UatAW+iN/wCCXXEReBqKKrPyhgIynQVMDjCXD3Hk905XjwEDJUJCEpm9eXul+nuH+HlXZ6SDzpyiniMPzR9pvKRMJFso/6Rg4mS5qelnIFCLdplADr1s4zmoNELN5xEhuMO9iStnjD1OJmEeCy0mmVPdrnc6hj584cfjcTgFKX/phiP7IeBM0uY+Io9Fe3h3GheFMCqRzECMlRqWGrN0lMEXIEAdmOvuni+KlR+bb7XeFitv/aWUzOLRhyBD9kbIlCUKTMR7MIGloEAMogyyoOAh2pA6jjExAu7IuAq8j4Eha4M5uA+BPzi8sPIjsvFUltjPkKBs13R1M5o/AnMI7roYg752n0ZgaFufayso0x5YBLvMnhuBW4wTSYTsGDoKSjsM1AgBR0fjE99CRqFcyUlqdnKiA4v0jevKZpDv2GasrvHd5pfNljPi3p+ztiM3IHfJHilMGGgPI5k9uewigwNLmDw9KQzuaRAE3QxugEbJ6TIphZ+Alh1Q7vtLpPjy/uPjH27UDJDHSLZMdicbuP2h1dCBouUFsrWnlkLUbZUFIl7H3B1r2px6c8D9MDTVuSxMmWTiMR5DTEOcfxE2ST9Z7aIMxFT6cKz+EGDPkOZedCXAhTXGfp4L0t6f2H/was6Ifi3WHFoVxFy+PAnAHBNY2+yMlL7l3MLJBzTal6CagmGCasW6mS7kcbFaKgC6HuXKV6ISM7oDra7O8OLWw0P4Y5eSgGUqoAAl73Aqe8E74I4VYnK5o3g27ojj00xWplQZ6lIK9FE35YqIZrWQTlSpSRu6OgJYX/iOLi+3H2Nv5umEgWQFyRPyRYBUZSY+BbbDfnCT0SJYyUgBUy0joRwE9DKW7otjqMkrOsregiJ5kVvQ7/fSQe7OfAjJrtVpuUJKR3MR7f3RxtLeXGHSspQaFtNSghS/sZTl041JD8JAH7w+EwG+h4Re7zhWk2xmmfnyodJhvrQ6bwtCOjKRCYB/FjQoJwfwJEKm+WswFfClGN2cKlpHOihirhU6PHzT/ALz9xGJ76VF2z93p3C348BRxdP2R4BcBUV8TOwFD7IoPRDmcWhnBTDNQjygSUYIgHqcU1aDcz8DOZ1hj/1Wd32zIox3p62x2f2RqxDD8kRE8UDOvLQEtI6GjDHIpI4HnoS1AzdVgXKR2BXANHvhbOTlGJfxQp9v79GK7/fClS5duageIR0a6YTLI9TgHtfZ69kdq0NoEGo4/EgpGH2VQJm5yYJjBS4yEnPFccewd3YlDP3IyMTSeQvpfuHzlyjdxgPx8wmKjsQ3yVGnU/ZGxnWFgM2Q9vJUIUGYkcJmdJBHLSHeIjGSIR1amfMoEhqpYNoOS/yKK6K83M8XU0NZVHLZwf6ToqUbGZ57ugJwLLLF8s/DyOR9SOBquGsKc6lgX0NDmMykT+kumR7PyhU2Qi8KIgvqvsr6z7MjVQRkCWbELuXAEmDTu+wFmRc8A5k+trvtK51Y2rVbLDZWMVOHx8EfSeTBPaNIBt10Y0QWdlnYkgKl//kggKkHHCilNx67c670GML+y0G4/SN66XjIQI3tJa3fZQ66g3Wi+KC3QZnyf8KEpIjaQT6MJFOViuXzo9kFGQthp4b5yBhNDq8U2rdNoXZu+inta1emPQhVWfQIx828fqrIVj0/50H8u4HoBPCGisvqQtoHIZl+4gBKVW/nsg6iq1ClB5VTc/jBm0jdg56vuAElN9i3G1x8Z4ekP14AoQs9PS1qdHgooPRfFCuTpIx47wWnOj2U4fj2a/euw86Mc910JrtX/sLJpDQxtCo+pPzJ0iuZzIOkbAEpd1VmZRp7KqajSfaz8eKxyZVkREx8i7PyDdnvpMdj5Wor9QMg1tEn1XZP5Q8Ux9keqLyZg7KxBNboCzIE4gJVYSpWYp/RUTrGeR6ysycE9yM6nFhaWPkieMYs3qZmRu80fSeckI2MfdehOC+CQJnQIKpOYlwBUNR+Tn9JUOpRzvQMIiofaS0v/OviCVmKkrltLc22f8KEWpGPFY+ePHGw83TG71KXYrQSUupk6GjEuy6RLBNCNifOo8rOaYrJOdJ/KVPxzq/2RLCnERxguJ6eFpoHS2HK50RDvj9TyqtJ9zlMnsUYhLRFS0RtXWNkqm+j9kW47qs77WdDH4XJhuCR/pLSuPeT2R8pu5HL80W0uxvCVqiSZTupcjXQukUGDRu407VJDwkdsRgBGdajpqjrkWkQCmjo6cnDlmJfSVZ9whAqfWWif+YmK1t7F/khQAcfQeQE/AJQBcZrTQUdgRaAMpCD1+Rr1eUZfOHRg6mMVIHe7PzKxyegkcAKTDV4AMOSm434dMbfPUo7ngfs9c7Ozv4SSPisgs1ar5ceSZCTo72Z/ZGCcOg4wwiaxT3E6d1rErUynjo7zLP+bfY3662dnZ/+ZJAdPEdPMZnCuzU1e9X3t4e+PZN4Th5Nau5X+SBAJCOgT9AJwAbkAcJC6To8lYd1zRZ69c3Z25v7bb7/9Qv8CtC3akU6bNH+ksDTb6L3BI9a5/+Kx0lGmLGNmD9VqM29sNpuPGqxVHxWtLYOcTfjUDf7IpLXlj5Rm1jQbheyX3nUdKWqzUhsEWCvgpUWWDOL+SL9lo9m0PIladvASaxhLPFqtS/n5a94t/V/Yw6OukYEWlyYmBDUq3Rz8kaiMOOBCfZ26kObyQoYqLkDsAydZYdPiWFV5OiNKxEzM0y3L4OPs21z//uZs87EyfY0DG+QpfZL9kYMABlCzFZ7Mn8w2p98EC18RROGnoa3YYVBGDnrIlZne/DJLTJtXfF97bP2REFij76kir91zbHb29+jvul4EsLJptYLW3vNH1q6j0D7anJl+C69E/4fZtc4PK5uktSfZHwlej/MSy4+ikf8QFrInZ2NBdmQZJtQf+RKK7MOzzZm3Hzt69NkSjA0eWNkMDm0E7cT4I8Hqy0U29cbZ2ek/g4XRSb9BBGNxD22OrfMnxR9JZy8UefFbc7PNdzSbR/5nc9BVa5mRk+SPxMr8UqNenGw2p/+yCsXNnSVG+ipprp0uqWGejhWPuT/yLF91cB/K5N1Hjx59YbBfW3FcUTbYTx7iAnC1HambDfF97aHtjzRIWfYPrN3+CCz87FaAttY1KlPE1f5IbUFBCJf1YKSnitpDrimjgsqIuIrx3fYKpoqupB1qlAnLbszftPmsq2kgWwAQ62EfWSI8Cbn8hThkNdPzi59UkPjn9mqB9qDzrhh3YnDqhvbohkzaqGSqqW6aNlOq221n9eJ3j01PP6K2DjNUGLmb/JHA/fn9U/veOrcNIOoBCcgsmT9JRopi+gYBhXHbH8k6xv8hoX4ZWfih1a4ud2hIH54ippnN4Fyb4TJe/khtuuh1/67bWXkn07uvDgmvl71sRWuPsT/yf5GH9/Gd6Q8cO3bspZft7RAzKspm3PyR2Bh4R/PPNGen/0IDaIg4veqlbZCnUmPmj/xuvV78GrLwEzsNovDT0FbsMCgjV9uRo+KPxLRawUj6q2Zz5v0Y1t9Obd/p2Momae1R90diNT6LTvmNubmZv4WFG3Z1DRNs2bYj/742EwW9Y/e5uZmZfwJAmekjFyrKRv5ILX1pxy7TwfD9kcwa9N5MDOH7IznPGetsAo1bVnoshGnjKNMgp2l6oe+P9MyGaYjMVc1i9L62Zh96X5sBymTHkxRdXO/RaKeJpjFxRqOYDanf5McjPsnvRCyo2KgGK5vBoS1jfHjfHxlf1hSukExTRc0J9FInESATjLei3jVmmJ9iJ8PHRx1ENbuOslHTxYNaMn8Ept7X1tzaLCNv9fvacnAoaI6teTffHxkuwrXEYKXpfe2eN88DC0DhieaA2wXGaYXVgIq/ur+S2cWgafN/Lq+sfHruxIkd+2oFtWcjoX4q/MKSfqPB9Yh7/BaDX+pk15mGnoAuVxFdSIvZQqnTwQ5mjOpbI7VuzXdKxvVtneo7lDKcFr6Avz9SD0yYAaDWtYUzT4GYslrczrPLgPj3LH8+7vuM0UceGekmp7l2an8CMZ0P0x+JKH7y8sWLvz+OIAqfirLRcFWiAJQdmXZaKE1hSPsjLyEk/hEAnw53Gc/PCpDb7o/Mat/gYX2J79m9Op7w9VtdAVIzGykPZKR/z0bKRu/ZkJZqmLHxxPIT8LNcm4KkgtkMpDeP+R5EkZrRGkSfTD/eA0OZSEDig816F5aXbzwCgN9NFx73WH0VmJv6PZvNfH9knu97ambm9q+g8MMvaYw7grH92+aP5IGd58vhP9ts3vEvuw1EYSlGllPEq1evok8aBWZPnWHtX1hChjUOHDjgX1bqdHJGbUc/t1IHFH0LtL+qi2vEb33WNz7XWbZZVqxvgA5fx1UUz+Dy188LjNT8eCsHQ0VGJoOcDvt97aS1N7s/Eivg+8jQrzJHXtrKRo/itYbij0Q1dQHxSaZ2D7PPeteDqAdrZcOaTZ0ZTfmTpxraAKHhrCFb/nCa0hjOTpOiYUewjv2TArgs/IXtlLnAsP/322677fwoMmdYbdoyfyQGD6O4+zQLT/82aSAmRpbK5somvz+SCdJFTJqnYWf4/ZZhPfYRvm5F2WzUH8lcUix8Fo/1aUC0c2OE+zrUpm3aH4mv7EU2ETzO8uf3Jh1EPSHbkcTr/j0b7Ev0Sf69SdHG66VxgRtNE2lWDvblzKlzHLr8pFenYL6tc+UZbMxrTaAvAuB/3XrrrZfWe4NJKVeZ2bBWs+bva4uFzG4Wt3Mvzbg9gIqyYciu5Y+8hBw8gxwsV8DGrZPb0d4KkGhga15A00piB7v83E7tpdmOzm/lPSpAJn8kM5qX8BVeAk+tR+2FdSDgKSLl7I+8cOFCjoPi2smTJ3eVr3AdONx0EWltLYoWuNA6d91111WmeHuycDOwyh/Jn9fmN1N/r05EABAH12H2cNlDYGcR+H83Z8q9vSzjfQAAAABJRU5ErkJggg==) no-repeat;background-position:center;background-size:47px 63px;margin-top:164px;margin-right:82px}.voice_commands .voice_commands_IdentifyTheResults .voice_commands_IdentifyTheResults_title[data-v-4a944260]{height:26px;font-family:PingFangSC-Medium;font-size:16px;color:#000;line-height:26px;font-weight:500;margin-bottom:30px}.voice_commands .voice_commands_IdentifyTheResults .voice_commands_IdentifyTheResults_show[data-v-4a944260]{width:503px;height:264px;background:#FAFAFA;padding:40px 0 0 50px;box-sizing:border-box}.voice_commands .voice_commands_IdentifyTheResults .voice_commands_IdentifyTheResults_show .voice_commands_IdentifyTheResults_show_title[data-v-4a944260]{height:22px;font-family:PingFangSC-Medium;font-size:16px;color:#000;font-weight:500;margin-bottom:30px}.voice_commands .voice_commands_IdentifyTheResults .voice_commands_IdentifyTheResults_show .oice_commands_IdentifyTheResults_show_time[data-v-4a944260],.voice_commands .voice_commands_IdentifyTheResults .voice_commands_IdentifyTheResults_show .oice_commands_IdentifyTheResults_show_money[data-v-4a944260],.voice_commands .voice_commands_IdentifyTheResults .voice_commands_IdentifyTheResults_show .oice_commands_IdentifyTheResults_show_origin[data-v-4a944260]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#666;font-weight:500;margin-bottom:12px}.voice_commands .voice_commands_IdentifyTheResults .voice_commands_IdentifyTheResults_show .oice_commands_IdentifyTheResults_show_destination[data-v-4a944260]{height:20px;font-family:PingFangSC-Medium;font-size:14px;color:#666;font-weight:500}.voice_commands .voice_commands_IdentifyTheResults .voice_commands_IdentifyTheResults_show_loading[data-v-4a944260]{width:503px;height:264px;background:#FAFAFA;padding:40px 0 0 50px;box-sizing:border-box;display:flex;justify-content:center;align-items:center}.voice_commands .end_recorder_img[data-v-4a944260]{width:50px;height:50px;background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADIAAAAyCAYAAAAeP4ixAAAABmJLR0QA/wD/AP+gvaeTAAABU0lEQVRoge3ZPUoEQRCG4bcWfxDNDIzUW4jiegUTMfUICuaeQIz2BOZi4gEEg0XY0NBIcyMR/8DPYCdapnF2u53ugXqSnqAo6puZbgYGnHOuKUk9SXuSVnLPEkXSicauc83QS9RnvVo3EvWbWqog2XmQGJK2JJ1LWkvVcy5VoyldAH3gAzhL0TDXq7U0sUbzPVKa2iCSViUNJV22PdCsQk9kG9gBjiQttzjPzEJBeg1qWiNpX9KxpOAsuY7fxiQtAlfAPPAA3NbVZb/bDSwwDgEQ/LruQpBGPEhpPEhpPEhpPEhpPEiLvoDv6vo1VFT8R6OZfUo6ADaBu1BdKMhP4DoLM7v5qyYU5B4YAo9m9pZ0qn9SG8TMXoDdlmeJ0oXN3ogHifQ+sUbLdfyeAofAIFXDLEHMbASMUvb0PVKaVEGeq/UpUb88qr+6/c7/1XXOudb8AikhXZ/+ExptAAAAAElFTkSuQmCC) no-repeat;background-position:center;background-size:50px 50px}.voice_commands .end_recorder_img[data-v-4a944260]:hover{opacity:.9}.experience{width:100%;height:709px;background-size:100% 709px;background-position:initial}.experience .experience_wrapper{width:1200px;height:709px;margin:0 auto;padding:0;box-sizing:border-box}.experience .experience_wrapper .experience_title{height:42px;font-family:PingFangSC-Semibold;font-size:30px;color:#000;font-weight:600;line-height:42px;text-align:center;margin-bottom:10px}.experience .experience_wrapper .experience_describe{height:22px;font-family:PingFangSC-Regular;font-size:14px;color:#666;letter-spacing:0;text-align:center;line-height:22px;font-weight:400;margin-bottom:30px}.experience .experience_wrapper .experience_content{width:1200px;margin:0 auto;display:flex;justify-content:center}.experience .experience_wrapper .experience_content .experience_tabs{margin-top:15px}.experience .experience_wrapper .experience_content .experience_tabs>.ant-tabs-nav{margin-bottom:20px}.experience .experience_wrapper .experience_content .experience_tabs>.ant-tabs-nav:before{content:none}.experience .experience_wrapper .experience_content .experience_tabs>.ant-tabs-nav .ant-tabs-nav-wrap{justify-content:center}.experience .experience_wrapper .experience_content .experience_tabs>.ant-tabs-nav .ant-tabs-tab{font-size:20px}.experience .experience_wrapper .experience_content .experience_tabs>.ant-tabs-nav .ant-tabs-nav-list{margin-right:-32px;flex:none}.experience .experience_wrapper .experience_content .experience_tabs .ant-tabs-nav:before{position:absolute;right:0;left:0;border-bottom:1px solid #f6f7fe;content:""}.experience:after{content:"";display:block;clear:both;visibility:hidden}.speech_header[data-v-e2003d16]{width:1200px;margin:0 auto;padding-top:50px;box-sizing:border-box}.speech_header[data-v-e2003d16]:after{content:"";display:block;clear:both;visibility:hidden}.speech_header .speech_header_title[data-v-e2003d16]{height:57px;font-family:PingFangSC-Medium;font-size:38px;color:#000;letter-spacing:0;line-height:57px;font-weight:500;margin-bottom:15px}.speech_header .speech_header_describe[data-v-e2003d16]{height:26px;font-family:PingFangSC-Regular;font-size:16px;color:#575757;line-height:26px;font-weight:400;margin-bottom:24px}.speech_header .speech_header_link_box[data-v-e2003d16]{height:40px;margin-bottom:40px;display:flex;align-items:center}.speech_header .speech_header_link[data-v-e2003d16]{display:block;background:#2932E1;width:120px;height:40px;line-height:40px;border-radius:20px;font-family:PingFangSC-Medium;font-size:14px;color:#fff;text-align:center;font-weight:500;margin-right:20px}.speech_header .speech_header_link[data-v-e2003d16]:hover{opacity:.9}.speech_header .speech_header_divider[data-v-e2003d16]{width:1200px;height:1px;background:#D1D1D1;margin-bottom:40px}.speech_header .speech_header_content_wrapper[data-v-e2003d16]{width:1200px;margin:0 auto 20px;display:flex;justify-content:space-between;flex-wrap:wrap}.speech_header .speech_header_content_wrapper .speech_header_module[data-v-e2003d16]{width:384px;background:#FFFFFF;border:1px solid #e0e0e0;box-shadow:4px 8px 12px #0000000d;border-radius:16px;padding:30px 34px 0;box-sizing:border-box;display:flex;margin-bottom:40px}.speech_header .speech_header_content_wrapper .speech_header_module .speech_header_background_img[data-v-e2003d16]{width:46px;height:46px;background-size:46px 46px;background-repeat:no-repeat;background-position:center;margin-right:20px}.speech_header .speech_header_content_wrapper .speech_header_module .speech_header_content[data-v-e2003d16]{padding-top:4px;margin-bottom:32px}.speech_header .speech_header_content_wrapper .speech_header_module .speech_header_content .speech_header_module_title[data-v-e2003d16]{height:26px;font-family:PingFangSC-Medium;font-size:20px;color:#000;letter-spacing:0;line-height:26px;font-weight:500;margin-bottom:10px}.speech_header .speech_header_content_wrapper .speech_header_module .speech_header_content .speech_header_module_introduce[data-v-e2003d16]{font-family:PingFangSC-Regular;font-size:16px;color:#666;letter-spacing:0;font-weight:400}.app{background:url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAC0AAAAWKCAYAAABrA+TvAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAALQKADAAQAAAABAAAFigAAAABI7lAXAABAAElEQVR4AezdbW+c5RUE4HvXmxel0IKKxGvb//+rSgstX4AGQhJiO/vUTlRVAiKISNY+M5clhMCxnzPXOJt8mDi7J0+P2/JGgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgRKBS4vt3U47GLS//jjtu7fz8lzfnHVz9lu7fcZFZ2fb2u7Wmrcu5fR0cXFWs+ebeudd1LybOvxk2394cFu3bkzP9P169v3j47r7p19REfXP3e++fb5ixeDD/58FvGi8PU3x7VdzbfefXcf8dr97X+O6+Lqdfve3d167735L9wPHx7X9a+r128ffni2dsNfFr77/riePDm+yPOnP+7XgwezO3r8eFsPv3v5mnD9NffBB7NfF54+Pa6vv33Zz3VJn358GP37n4urX4O++uplP9d53n9/v959Z/bX3Of/uLyO8uLt/tXv5T76aPbX3D+/uFiXz///wva3vxzW2ZBIf//8Ys3+avrfV5J/EyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQIWAAXVGzkAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQyBAygM3qUggABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECFgAF0Rc1CEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEMgQMIDO6FEKAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAhUCBtAVNQtJgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIEPAADqjRykIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIVAgYQFfULCQBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBDAED6IwepSBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQIWAAXVGzkAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQyBAygM3qUggABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECFgAF0Rc1CEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEMgQMIDO6FEKAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAhUCBtAVNQtJgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIEPAADqjRykIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIVAgYQFfULCQBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBDAED6IwepSBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQIWAAXVGzkAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQyBAygM3qUggABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECFgAF0Rc1CEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEMgQMIDO6FEKAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAhUCBtAVNQtJgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIEPAADqjRykIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIVAgYQFfULCQBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBDAED6IwepSBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQIWAAXVGzkAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQyBAygM3qUggABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECFwKEipZAECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECPyiwHFba3f8xXfdyv9pAH0ra3EUAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgdMIfPHl5Wke9Eaeslv7N/J5fBICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAicQMAA+gTIHkGAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwJsROLyZT+OzECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAwQeDsbLe2bZtw6s9ufH5cywD6Zyz+BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFcgU8/mTsh/vdXl2ufW41kBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAikCRhApzUqDwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFgAQPo4HJFI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJAmYACd1qg8BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIFDKCDyxWNAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJqAAXRao/IQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBYwgA4uVzQCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECaQIG0GmNykOAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgWMAAOrhc0QgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAikCRhApzUqDwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFgAQPo4HJFI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJAmYACd1qg8BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIFDKCDyxWNAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJqAAXRao/IQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBYwgA4uVzQCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECaQIG0GmNykOAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgWMAAOrhc0QgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAikCRhApzUqDwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFgAQPo4HJFI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJAmYACd1qg8BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIFDKCDyxWNAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJqAAXRao/IQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBYwgA4uVzQCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECaQIG0GmNykOAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgWMAAOrhc0QgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAikCRhApzUqDwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFgAQPo4HJFI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJAmYACd1qg8BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIFDKCDyxWNAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJqAAXRao/IQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBYwgA4uVzQCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECaQIG0GmNykOAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgWMAAOrhc0QgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAikCRhApzUqDwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFgAQPo4HJFI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJAmYACd1qg8BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIFDKCDyxWNAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJqAAXRao/IQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBYwgA4uVzQCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECaQIG0GmNykOAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgWMAAOrhc0QgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAikCRhApzUqDwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFgAQPo4HJFI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJAmYACd1qg8BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIFDKCDyxWNAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJqAAXRao/IQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBYwgA4uVzQCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECaQIG0GmNykOAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgWMAAOrhc0QgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAikCRhApzUqDwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFgAQPo4HJFI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJAmYACd1qg8BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIFDKCDyxWNAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJqAAXRao/IQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBYwgA4uVzQCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECaQIG0GmNykOAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgWMAAOrhc0QgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAikCRhApzUqDwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFgAQPo4HJFI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJAmYACd1qg8BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIFDKCDyxWNAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJqAAXRao/IQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBYwgA4uVzQCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECaQIG0GmNykOAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgWMAAOrhc0QgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAikCRhApzUqDwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFgAQPo4HJFI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJAmYACd1qg8BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIFDKCDyxWNAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJqAAXRao/IQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBYwgA4uVzQCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECaQIG0GmNykOAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgWOAQnE00AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAm9U4Nn5tr781+Ub/Zw+2W8XuLyiN4D+7V5+JAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQLnAtq11PcL1djMCV/xrfzOP9lQCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAi8voDvAP36Zj6CAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgSGB/dv19h73dBoHtuFsG0LehCTcQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjcWoG/fnbn1t7WdtjDh8e1bwstLwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECcwUMoOd253ICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECdQIG0HWVC0yAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgroAB9NzuXE6AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgTsAAuq5ygQkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjMFTCAntudywkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjUCRhA11UuMAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIG5AgbQc7tzOQECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIE6AQPousoFJkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIDBXwAB6bncuJ0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIFAnYABdV7nABAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBOYKGEDP7c7lBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBOoEDKDrKheYAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwFwBA+i53bmcAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ2AAXRd5QITIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQmCtgAD23O5cTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQqBMwgK6rXGACBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECcwUMoOd253ICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECdQIG0HWVC0yAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgroAB9NzuXE6AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgTsAAuq5ygQkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjMFTCAntudywkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjUCRhA11UuMAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIG5AgbQc7tzOQECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIE6AQPousoFJkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIDBXwAB6bncuJ0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIFAnYABdV7nABAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBOYKGEDP7c7lBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBOoEDKDrKheYAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwFwBA+i53bmcAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ2AAXRd5QITIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQmCtgAD23O5cTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQqBMwgK6rXGACBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECcwUMoOd253ICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECdQIG0HWVC0yAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgroAB9NzuXE6AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgTsAAuq5ygQkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjMFTCAntudywkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjUCRhA11UuMAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIG5AgbQc7tzOQECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIE6AQPousoFJkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIDBXwAB6bncuJ0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIFAnYABdV7nABAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBOYKGEDP7c7lBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBOoEDKDrKheYAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwFwBA+i53bmcAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ2AAXRd5QITIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQmCtgAD23O5cTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQqBMwgK6rXGACBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECcwUMoOd253ICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECdQIG0HWVC0yAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgroAB9NzuXE6AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgTsAAuq5ygQkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjMFTCAntudywkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjUCRhA11UuMAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIG5AgbQc7tzOQECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIE6AQPousoFJkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIDBXwAB6bncuJ0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIFAnYABdV7nABAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBOYKGEDP7c7lBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBOoEDKDrKheYAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwFwBA+i53bmcAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ2AAXRd5QITIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQmCtgAD23O5cTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQqBMwgK6rXGACBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECcwUMoOd253ICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECdQIG0HWVC0yAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgroAB9NzuXE6AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgTsAAuq5ygQkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjMFTCAntudywkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjUCRhA11UuMAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIG5AgbQc7tzOQECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIE6gUNdYoEJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgROJvDDo+N69uN2sud5ULbA+cW2DKCzO5aOAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIHCjAheXa11cGkDfaAlBD9+2be2D8ohCgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEC4gO8AHV6weAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBUwrs1m7tfYveU5JXPWvbdssAuqpyYQkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECb1fgcLVO/exTE9W3q9z72c/Pt2Vf39u/5AQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTGCRhAj6vMwQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgR6BQyge7uXnAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMA4AQPocZU5mAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECvgAF0b/eSEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEBgnYAA9rjIHEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEOgVMIDu7V5yAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAuMEDKDHVeZgAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAr0CBtC93UtOgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYJyAAfS4yhxMgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoFfAALq3e8kJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIjBMwgB5XmYMJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI9AoYQPd2LzkBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBcQIG0OMqczABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBXgED6N7uJSdAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAwTsAAelxlDiZAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECDQK2AA3du95AQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTGCRhAj6vMwQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgR6BQyge7uXnAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMA4AQPocZU5mAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECvgAF0b/eSEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEBgnYAA9rjIHEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEOgVMIDu7V5yAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAuMEDKDHVeZgAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAr0CBtC93UtOgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYJyAAfS4yhxMgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoFfAALq3e8kJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIjBMwgB5XmYMJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI9AoYQPd2LzkBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBcQIG0OMqczABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBXgED6N7uJSdAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAwTsAAelxlDiZAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECDQK2AA3du95AQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTGCRhAj6vMwQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgR6BQyge7uXnAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMA4AQPocZU5mAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECvgAF0b/eSEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEBgnYAA9rjIHEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEOgVMIDu7V5yAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAuMEDKDHVeZgAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAr0CBtC93UtOgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYJyAAfS4yhxMgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoFfAALq3e8kJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIjBMwgB5XmYMJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI9AoYQPd2LzkBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBcQIG0OMqczABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBXgED6N7uJSdAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAwTsAAelxlDiZAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECDQK2AA3du95AQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTGCRhAj6vMwQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgR6BQyge7uXnAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMA4AQPocZU5mAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECvgAF0b/eSEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEBgnYAA9rjIHEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEOgVMIDu7V5yAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAuMEDKDHVeZgAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAr0CBtC93UtOgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYJyAAfS4yhxMgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoFfAALq3e8kJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIjBMwgB5XmYMJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI9AoYQPd2LzkBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBcQIG0OMqczABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBXgED6N7uJSdAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAwTsAAelxlDiZAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECDQK2AA3du95AQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTGCRhAj6vMwQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgR6BQyge7uXnAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMA4AQPocZU5mAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECvgAF0b/eSEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEBgncDgex93sYAIECBAgQIAAAQIETihwPG7rcNid8IkeRYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBB4tcBh73tAv1rHewgQIECAAAECBAgQWNtm/OzLgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIELg9AubPt6cLlxAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAg8CsCh195v3cTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAicUODRD8e185f1nlDcoyYJPH++LQPoSY25lQABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgX+P7RMT6jgAR+j8D+93ywjyVAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMApBXwH6FNqexYBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBA4CcCu91aZ76l7U9U/CeBVwsYQL/axnsIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAm9d4P79/frkYwvotw7tATECfrbEVCkIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgXwBA+j8jiUkQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECNgAB1TpSAECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE8gUMoPM7lpAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAjIABdEyVghAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDIFzCAzu9YQgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIxAgbQMVUKQoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBfwAA6v2MJCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECMQIGEDHVCkIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgXwBA+j8jiUkQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECNgAB1TpSAECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE8gUMoPM7lpAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAjIABdEyVghAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDIFzCAzu9YQgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIxAgbQMVUKQoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBfwAA6v2MJCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECMQIGEDHVCkIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgXwBA+j8jiUkQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECNgAB1TpSAECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE8gUMoPM7lpAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAjIABdEyVghAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDIFzCAzu9YQgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIxAgbQMVUKQoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBfwAA6v2MJCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECMQIGEDHVCkIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgXwBA+j8jiUkQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECNgAB1TpSAECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE8gUMoPM7lpAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAjIABdEyVghAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDIFzCAzu9YQgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAinP/GgAAQABJREFUAQIECBAgQIAAAQIxAgbQMVUKQoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBfwAA6v2MJCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECMQIGEDHVCkIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgXwBA+j8jiUkQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECNgAB1TpSAECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE8gUMoPM7lpAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAjIABdEyVghAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDIFzCAzu9YQgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIxAgbQMVUKQoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBfwAA6v2MJCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECMQIGEDHVCkIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgXwBA+j8jiUkQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECNgAB1TpSAECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE8gUMoPM7lpAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAjIABdEyVghAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDIFzCAzu9YQgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIxAgbQMVUKQoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBfwAA6v2MJCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECMQIGEDHVCkIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgXwBA+j8jiUkQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECNgAB1TpSAECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE8gUMoPM7lpAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAjIABdEyVghAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDIFzCAzu9YQgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIxAgbQMVUKQoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBfwAA6v2MJCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECMQIGEDHVCkIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgXwBA+j8jiUkQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECNgAB1TpSAECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE8gUMoPM7lpAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAjIABdEyVghAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDIFzCAzu9YQgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIxAgbQMVUKQoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBfwAA6v2MJCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECMQIGEDHVCkIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgXwBA+j8jiUkQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECNgAB1TpSAECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE8gUMoPM7lpAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAjMBh22KyCEKAAAECBAgQIEDg1gjsdrfmFIcQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBKIEDpeXFtBRjQpDgAABAgQIECBw4wLXf8jw7l0L6BsvwgEECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBApMDhzh3DjMhmhSJAgAABAgQIELgxgfNzf8jwxvA9mAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgX2McnFJAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgRiBQ0wSQQgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIE6gePVX9L77Jm/qbeueIGrBQygq+sXngABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIzBa4uNjW1988nx3C9QQIvJbA/rV+tB9MgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBGxTwHaBvEN+jCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgdcX2O22td+9/sf5CAIEMgQMoDN6lIIAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECNQIPHiwX9f/eCNAoFPAz/7O3qUmQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgMFLAAHpkbY4mQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAg0ClgAN3Zu9QECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIERgoYQI+szdEECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEOgUMoDt7l5oAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIDASAED6JG1OZoAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAp4ABdGfvUhMgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAYKWAAPbI2RxMgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDoFDCA7uxdagIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIjBQygR9bmaAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKdAgbQnb1LTYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQGCkgAH0yNocTYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKBTwAC6s3epCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECIwUMIAeWdt/2bubJimOKwqg1T01goU/8P//i9pYDtsgZro9hLcpyW6SnJf3HSLkRSGq3j3XIW0uyNEECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEegoYQPfsXWoCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECWwoYQG9Zm6MJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI9BQwgO7Zu9QECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEthQwgN6yNkcTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQ6ClgAN2zd6kJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIbClgAL1lbY4mQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAg0FPAALpn71ITIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQ2FLAAHrL2hxNgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoKeAAXTP3qUmQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgsKWAAfSWtTmaAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQE8BA+ievUtNgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYEsBA+gta3M0AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZ4CBtA9e5eaAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwJYCBtBb1uZoAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAj0FDKB79i41AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgS0FDKC3rM3RBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBHoKGED37F1qAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAlsKGEBvWZujCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECPQUMIDu2bvUBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBLYUMIDesjZHEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEOgpYADds3epCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECGwpYAC9ZW2OJkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBTwAC6Z+9SEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIENhSwAB6y9ocTYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKCngAF0z96lJkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQILClgAH0lrU5mgABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEBPAQPonr1LTYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQGBLAQPoLWtzNAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIGeAgbQPXuXmgABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMCWAgbQW9bmaAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQI9BQyge/YuNQECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIEtBQygt6zN0QQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgR6ChhA9+xdagIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJbChhAb1mbowkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAj0FDCA7tm71AQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgS2FDCA3rI2RxMgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDoKWAA3bN3qQkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAhsKWAAvWVtjiZAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECDQU8AAumfvUhMgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDYUsAAesvaHE2AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgp4ABdM/epSZAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCwpYAB9Ja1OZoAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBATwED6J69S02AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgSwED6C1rczQBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBngIG0D17l5oAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIDAlgIG0FvW5mgCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECPQUMoHv2LjUBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBLQUMoLeszdEECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEegoYQPfsXWoCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECWwoYQG9Zm6MJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI9BQwgO7Zu9QECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEthQwgN6yNkcTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQ6ClgAN2zd6kJECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIbClgAL1lbY4mQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAg0FPgvN/vPZNLTYAAAQIECBAgUEbgcrmUucUhBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECtQXOl5faB7qOAAECBAgQIECggcDlfjyfRtANmhaRAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIPDdAufzs6HJdyt6AQECBAgQIECAwMMCt9txvN78V0keBvQLCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQLNBK7N8opLgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMDGAufGtzudAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQ+D8F7m//kd5vf/lBgACBXQUMoHdtzt0ECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQOABgV/+cTu+/eUHAQIEdhW47nq4uwkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQ6CfgT4Du17nEBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBM4Pr2x6VeL81Ci0uAQKyAAXRstYIRIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIH/Cvz5T9fj219+ECBAIEHAP80SWpSBAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQBMBA+gmRYtJgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIEHAADqhRRkIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQINBEwgG5StJgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEEgQMoBNalIEAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAEwED6CZFi0mAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgQcAAOqFFGQgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAg0ETCAblK0mAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQSBAygE1qUgQABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEATAQPoJkWLSYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBBwAA6oUUZCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECDQRMIBuUrSYBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBBIEDKATWpSBAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQBMBA+gmRYtJgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIEHAADqhRRkIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQINBEwgG5StJgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEEgQMoBNalIEAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAEwED6CZFi0mAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgQcAAOqFFGQgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAg0ETCAblK0mAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQSBAygE1qUgQABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEATAQPoJkWLSYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBBwAA6oUUZCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECDQRMIBuUrSYBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBBIEDKATWpSBAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQBMBA+gmRYtJgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIEHAADqhRRkIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQINBEwgG5StJgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEEgQMoBNalIEAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAEwED6CZFi0mAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgQcAAOqFFGQgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAg0ETCAblK0mAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQSBAygE1qUgQABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEATAQPoJkWLSYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBBwAA6oUUZCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECDQRMIBuUrSYBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBBIEDKATWpSBAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQBMBA+gmRYtJgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIEHAADqhRRkIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQINBEwgG5StJgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEEgQMoBNalIEAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAEwED6CZFi0mAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgQcAAOqFFGQgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAg0ETCAblK0mAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQSBAygE1qUgQABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEATAQPoJkWLSYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBBwAA6oUUZCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECDQRMIBuUrSYBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBBIEDKATWpSBAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQBMBA+gmRYtJgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIEHAADqhRRkIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQINBEwgG5StJgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEEgQMoBNalIEAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAEwED6CZFi0mAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgQcAAOqFFGQgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAg0ETCAblK0mAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQSBAygE1qUgQABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEATAQPoJkWLSYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBBwAA6oUUZCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECDQRMIBuUrSYBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBBIEDKATWpSBAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQBMBA+gmRYtJgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIEHAADqhRRkIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQINBEwgG5StJgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEEgQMoBNalIEAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAEwED6CZFi0mAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgQcAAOqFFGQgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAg0ETCAblK0mAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQSBM77PSGGDAQIECBAgACBbgL343K5dAstLwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIHj/PzFAtr/DwgQIECAAAECuwmcT8fx/Lzb1e4lQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAg8P0C58cP/uTA72f0BgIECBAgQIDAOoGvX/0GtnXavkSAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIFBN4PRfTq9WiXsIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEPgtgetv/YTnBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQqCZwVjvIPQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoJLAzz+/HpfLpdJJbiFAgEBrAQPo1vULT4AAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJ/JPD6+u3vuP/R3+bnCRAgQGCRgAH0ImifIUCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIF9BC7X47g+7XOvSwkQINBJwAC6U9uyEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMD/JPDXv7wtoP0gQIAAgZIC/gldshZHESBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAwEjCAHql4RoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBASQED6JK1OIoAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZGAAfRIxTMCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBEoKGECXrMVRBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiMBAygRyqeESBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQUsAAumQtjiJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYCRgAD1S8YwAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZICBtAla3EUAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIjAQPokYpnBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiUFDCALlmLowgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQGAkYQI9UPCNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoKSAAXTJWhxFgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMBIwAB6pOIZAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIlBQygS9biKAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIERgIG0CMVzwgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQKClgAF2yFkcRIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIDASMIAeqXhGgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEBJAQPokrU4igABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBkYAB9EjFMwIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIESgoYQJesxVEECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECIwEDKBHKp4RIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIFBSwAC6ZC2OIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgJGAAPVLxjAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBkgIG0CVrcRQBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAiMBA+iRimcECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECJQUMIAuWYujCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAYCRhAj1Q8I0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgpIABdMlaHEWAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwEjAAHqk4hkBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAiUFDKBL1uIoAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgRGAgbQIxXPCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAoKWAAXbIWRxEgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgMBIwgB6peEaAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQEkBA+iStTiKAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIGRgAH0SMUzAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgRKChhAl6zFUQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIjAQMoEcqnhEgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgUFLAALpkLY4iQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQGAkYAA9UvGMAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIGSAgbQJWtxFAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECIwED6JGKZwQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIlBQwgC5Zi6MIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEBgJGECPVDwjQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKCkgAF0yVocRYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIDASMAAeqTiGQECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECJQUMoEvW4igCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBEYCBtAjFc8IECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECgpYABdshZHESBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAwEjCAHql4RoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBASQED6JK1OIoAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZGAAfRIxTMCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBEoKGECXrMVRBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiMBAygRyqeESBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQUsAAumQtjiJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYCRgAD1S8YwAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZICBtAla3EUAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIjAQPokYpnBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiUFDCALlmLowgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQGAkYQI9UPCNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoKSAAXTJWhxFgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMBIwAB6pOIZAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIlBQygS9biKAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIERgIG0CMVzwgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQKClgAF2yFkcRIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIDASMIAeqXhGgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEBJAQPokrU4igABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBkYAB9EjFMwIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIESgqct9u95GGOIkCAAAECBAjMFLheLzNf510ECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECLyTwPnvzwbQ72TvswQIECBAgMBCgZ9+uhzP58IP+hQBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAj9E4Pz44fpDXuylBAgQIECAAIEKAvf7/fjyq9/wVaELNxAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCYIXA+Pc14jXcQIECAAAECBKoKXAygq1bjLgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIPCPjjnx9A80sIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEHgfgfN9PuurBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJAo8PJyP/7+yy0xmkwECBAgUETAALpIEc4gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAgsDr2/b59fM9IYoMBAgQIFBUwAC6aDHOIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwE4C18txXK87XexWAgQIENhVwAB61+bcTYAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgUICnz5ZPxeqwykECBCIFvBvnOh6hSNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCQJWAAndWnNAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSiBQygo+sVjgABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECWgAF0Vp/SECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgWMICOrlc4AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAlkCBtBZfUpDgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIFrAADq6XuEIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZAkYQGf1KQ0BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBaAED6Oh6hSNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCQJWAAndWnNAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSiBQygo+sVjgABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECWgAF0Vp/SECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgWMICOrlc4AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAlkCBtBZfUpDgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIFrAADq6XuEIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZAkYQGf1KQ0BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBaAED6Oh6hSNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCQJWAAndWnNAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSiBQygo+sVjgABAgQIECBAgHJSaigAAEAASURBVAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECWgAF0Vp/SECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgWMICOrlc4AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAlkCBtBZfUpDgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIFrAADq6XuEIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZAkYQGf1KQ0BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBaAED6Oh6hSNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCQJWAAndWnNAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSiBQygo+sVjgABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECWgAF0Vp/SECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgWMICOrlc4AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAlkCBtBZfUpDgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIFrAADq6XuEIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZAkYQGf1KQ0BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBaAED6Oh6hSNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCQJWAAndWnNAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSiBQygo+sVjgABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECWgAF0Vp/SECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgWMICOrlc4AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAlkCBtBZfUpDgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIFrAADq6XuEIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZAkYQGf1KQ0BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBaAED6Oh6hSNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCQJWAAndWnNAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSiBQygo+sVjgABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECWgAF0Vp/SECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgWMICOrlc4AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAlkCBtBZfUpDgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIFrAADq6XuEIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZAkYQGf1KQ0BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBaAED6Oh6hSNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCQJWAAndWnNAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSiBQygo+sVjgABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECWgAF0Vp/SECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgWMICOrlc4AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAlkCBtBZfUpDgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIFrAADq6XuEIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZAkYQGf1KQ0BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBaAED6Oh6hSNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCQJWAAndWnNAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSiBQygo+sVjgABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECWgAF0Vp/SECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgWMICOrlc4AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAlkCBtBZfUpDgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAIFrAADq6XuEIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIZAkYQGf1KQ0BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBaAED6Oh6hSNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCQJWAAndWnNAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSiBQygo+sVjgABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECWgAF0Vp/SECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEIgWOG+36HzCESBAgAABAg8KXP02qQfl/DICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBH6kwPnPf1lA/0hg7yZAgAABArsKfPx4OZ7Py67nu5sAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgVCB86dnw6bQbsUiQIAAAQIPCdzux/Hy8vY/fhAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKCgwPnhgwF0wV6cRIAAAQIE3k3g69v4+eXl3T7vwwQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEPhdgevv/qyfJECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQCGBs9AtTiFAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEA7gV+/3o/b/d4ut8AECBAgQOBRAQPoR+X8OgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECEwQ+PL5fnyZ8B6vIECAAAECXQQMoLs0LScBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAqUEnp6O43YpdZJjCBAgQIDAFgIG0FvU5EgCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBNIE/vbpmhZJHgIECBAgsETAv0GXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIzBAygZyh6BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECSwQMoJcw+wgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAjMEDKBnKHoHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJLBAyglzD7CAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMwQMoGcoegcBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAksEDKCXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIzBAygZyh6BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECSwQMoJcw+wgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAjMEDKBnKHoHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJLBAyglzD7CAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMwQMoGcoegcBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAksEDKCXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIzBAygZyh6BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECSwQMoJcw+wgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAjMEDKBnKHoHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJLBAyglzD7CAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMwQMoGcoegcBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAksEDKCXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIzBAygZyh6BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECSwQMoJcw+wgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAjMEDKBnKHoHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJLBAyglzD7CAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMwQMoGcoegcBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAksEDKCXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIzBAygZyh6BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECSwQMoJcw+wgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAjMEDKBnKHoHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJLBAyglzD7CAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMwQMoGcoegcBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAksEDKCXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIzBAygZyh6BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECSwQMoJcw+wgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAjMEDKBnKHoHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJLBAyglzD7CAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMwQMoGcoegcBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAksEDKCXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIzBAygZyh6BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECSwQMoJcw+wgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAjMEDKBnKHoHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJLBAyglzD7CAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMwQMoGcoegcBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAksEDKCXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIzBAygZyh6BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECSwQMoJcw+wgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAjMEDKBnKHoHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJLBAyglzD7CAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMwQMoGcoegcBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAksEDKCXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIzBAygZyh6BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECSwQMoJcw+wgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAjMEDKBnKHoHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJLBAyglzD7CAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMwQMoGcoegcBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAksEDKCXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIzBAygZyh6BwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECSwQMoJcw+wgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAjMEDKBnKHoHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJLBAyglzD7CAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMwQMoGcoegcBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAksEDKCXMPsIAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQL/Ye8Om+MoriiAzuyOoGJjy/z//xhsnFjC0mqyiYHKBxrQzqr17vRRFSTV1uzcPu9VKh+upWsIKEBfQ9FnECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECDQRUABuguzlxAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgcA0BBehrKPoMAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgS6CChAd2H2EgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEriGgAH0NRZ9BgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEAXAQXoLsxeQoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIDANQQUoK+h6DMIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEOgisJxOa5cXeQkBAgQIENi7wPE47/2K7keAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIFXF1j+9W8F6FefggAECBAgsAuBH97O0/G4i6u4BAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBMoKLDeLn1ZZdjqCESBAgECEwOP5tyms/j5RxKyEJECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgX2B580YBOn+MbkCAAAECrynw5cs6PTy+ZgLvJkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgCh3Gu6qYECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECKQLLOkXkJ8AAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgbEEvv2W3nWsS7stAQIECBAg8LuAAvTvFP4LAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIJAp8+nRJiykiAAAECBAi8kIAC9AvB+lgCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBK4rcDxe9/N8GgECBAgQIJApoACdOTepCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECAwlMM/T9OMHDeihhu6yBAgQIECgIXBonDsmQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAOQEF6HIjEYgAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZaAAnRLxjkBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAuUEFKDLjUQgAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgRaAgrQLRnnBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiUE1CALjcSgQgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQaAkoQLdknBMgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgUE5AAbrcSAQiQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKAloADdknFOgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEA5AQXociMRiAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBloACdEvGOQECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAEC5QQUoMuNRCACBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBFoCCtAtGecECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECJQTUIAuNxKBCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBoCShAt2ScEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQTkAButxIBCJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoCWgAN2ScU6AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQDkBBehyIxGIAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIGWgAJ0S8Y5AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQLlBBSgy41EIAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEWgIK0C0Z5wQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIlBNQgC43EoEIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEGgJKEC3ZJwTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIFBOQAG63EgEIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgJaAA3ZJxToAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAOQEF6HIjEYgAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZaAAnRLxjkBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAuUEFKDLjUQgAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgRaAgrQLRnnBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiUE1CALjcSgQgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQaAkoQLdknBMgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgUE5AAbrcSAQiQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKAloADdknFOgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEA5AQXociMRiAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBloACdEvGOQECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAEC5QQUoMuNRCACBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBFoCCtAtGecECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECJQTUIAuNxKBCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBoCShAt2ScEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQTkAButxIBCJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoCWgAN2ScU6AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQDkBBehyIxGIAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIGWgAJ0S8Y5AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQLlBBSgy41EIAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEWgIK0C0Z5wQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIlBNQgC43EoEIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEGgJKEC3ZJwTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIFBOQAG63EgEIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgJaAA3ZJxToAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAOQEF6HIjEYgAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZaAAnRLxjkBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAuUEFKDLjUQgAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgRaAgrQLRnnBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiUE1CALjcSgQgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQaAkoQLdknBMgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgUE5AAbrcSAQiQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKAloADdknFOgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEA5AQXociMRiAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBloACdEvGOQECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAEC5QQUoMuNRCACBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBFoCCtAtGecECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECJQTUIAuNxKBCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBoCShAt2ScEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQTkAButxIBCJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoCWgAN2ScU6AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQDkBBehyIxGIAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIGWgAJ0S8Y5AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQLlBBSgy41EIAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEWgLL6dT6I+cECBAgQOBlBY7Hl/18n06AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAEC+xNYPn9+2t+t3IgAAQIEIgRubw/TPEdEFZIAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEiggsh0ORJGIQIECAwDACT/7uzTCzdlECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAhcW2B5/14D+tqoPo8AAQIE/lzg40cN6D8X8qcECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAg0BLQfm7JOCdAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoJyAAnS5kQhEgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgEBLYGn9gXMCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBPIFvj48Tf/8Kf8ebkCAAAECBAgQ+E1AAfo3Cf9JgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYEcC8zxPx8O8oxu5CgECBAgQIEDgm4ACtE0gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgsEOB43Gabm8PO7yZKxEgQIAAAQKjC/h/OKNvgPsTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBJQgA4alqgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIERhdQgB59A9yfAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJCAAnTQsEQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgMLqAAvToG+D+BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIEFKCDhiUqAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgdEFFKBH3wD3J0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAkoAAdNCxRCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECIwuoAA9+ga4PwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIEgAQXooGGJSoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQGB0AQXo0TfA/QkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgECShABw1LVAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKjCyhAj74B7k+AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgSEABOmhYohIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAYXUABevQNcH8CBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECQQIK0EHDEpUAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIDA6AIK0KNvgPsTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBJQgA4alqgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIERhdQgB59A9yfAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJCAAnTQsEQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgMLqAAvToG+D+BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIEFKCDhiUqAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgdEFFKBH3wD3J0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAkoAAdNCxRCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECIwuoAA9+ga4PwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIEgAQXooGGJSoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQGB0AQXo0TfA/QkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgECShABw1LVAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKjCyhAj74B7k+AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgSEABOmhYohIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAYXUABevQNcH8CBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECQQIK0EHDEpUAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIDA6AIK0KNvgPsTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBJQgA4alqgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIERhdQgB59A9yfAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJCAAnTQsEQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgMLqAAvToG+D+BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIEFKCDhiUqAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgdEFFKBH3wD3J0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAkoAAdNCxRCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECIwuoAA9+ga4PwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIEgAQXooGGJSoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQGB0AQXo0TfA/QkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgECShABw1LVAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKjCyhAj74B7k+AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgSEABOmhYohIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAYXUABevQNcH8CBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECQQIK0EHDEpUAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIDA6AIK0KNvgPsTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBJQgA4alqgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIERhdQgB59A9yfAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJCAAnTQsEQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgMLqAAvToG+D+BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIIEFKCDhiUqAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgdEFFKBH3wD3J0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAkoAAdNCxRCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECIwuoAA9+ga4PwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIEgAQXooGGJSoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQGB0AQXo0TfA/QkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgECShABw1LVAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKjCyhAj74B7k+AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEAgSEABOmhYohIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAYXUABevQNcH8CBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECQQIK0EHDEpUAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIDA6AIK0KNvgPsTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQCBJQgA4alqgECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIERhdQgB59A9yfAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJCAAnTQsEQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgMLrAcjqNTuD+BAgQyBE4HnOySkqAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBF5CYPn589NLfK7PJECAAIErC8znz/vwwQ/uvzKrjyNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBMAFNurCBiUuAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgZIHl9r0O9MgL4O4ECNQX+Pp1ne7u1/pBJSRAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAh0EloP+cwdmryBAgMDlAv53+nI7TxIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIDA/gTUn/c3UzciQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgsFuBZbc3czECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECFwjc3a/T/S/rBU96hAABAgQIECBAoIeAAnQPZe8gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCoLzBP0+HX36e+6j/Xn5eEBAgQIECAwLACCtDDjt7FCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE/l/gZpmnm3fnFrQvAgQIECBAgACB0gK//p210hmFI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwP8EFKAtAgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMQIK0DGjEpQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQVoO0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQIyAAnTMqAQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBiNuxVAABAAElEQVQgQIAAAQIECBAgQIAAAQIECBAgQEAB2g4QIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAjoAAdMypBCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBQgLYDBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjECChAx4xKUAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEFKDtAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMQIK0DGjEpQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQVoO0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQIyAAnTMqAQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQEAB2g4QIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAjoAAdMypBCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBQgLYDBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjECChAx4xKUAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEFKDtAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMQIK0DGjEpQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQVoO0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQIyAAnTMqAQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQEAB2g4QIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAjoAAdMypBCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBQgLYDBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjECChAx4xKUAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEFKDtAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMQIK0DGjEpQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQVoO0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQIyAAnTMqAQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQEAB2g4QIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAjoAAdMypBCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBQgLYDBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjECChAx4xKUAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEFKDtAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMQIK0DGjEpQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQVoO0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQIyAAnTMqAQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQEAB2g4QIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAjoAAdMypBCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBQgLYDBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjECChAx4xKUAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEFKDtAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMQIK0DGjEpQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQVoO0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQIyAAnTMqAQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQEAB2g4QIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAjoAAdMypBCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBQgLYDBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjECChAx4xKUAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEFKDtAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMQIK0DGjEpQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQVoO0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQIyAAnTMqAQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQEAB2g4QIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAjoAAdMypBCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBQgLYDBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjECChAx4xKUAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEFKDtAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMQIK0DGjEpQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQVoO0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQIyAAnTMqAQlQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQEAB2g4QIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAjoAAdMypBCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBQgLYDBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjECChAx4xKUAIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEFKDtAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECMQIK0DGjEpQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQVoO0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQIzA8vi4xoQVlAABAs8RmOd5Oh6f84TvJUCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBKoLLJ9+VoCuPiT5CBC4TGA5rtPtrR90f5mepwgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQE0BzcCac5GKAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIE/EFje/TD/wbEjAgQI5Arc3a3T4yk3v+QECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAW2D57jsF6DaPPyFAIFHg/pd1mhSgE0cnMwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQ+EuBw19+h28gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAEYGlSA4xCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEQgXu78+/qdcXAQIECBAgQIAAgU4CCtCdoL2GAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQILA7gXmaDuffP/70tLubuRABAgQIECBAgEBhAQXowsMRjQABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQWeDm3Dy5Wc4taF8ECBAgQIAAAQIEOgqc/w6eLwIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECGQIKEBnzElKAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTOAgrQ1oAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgRgBBeiYUQlKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgIACtB0gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBGQAE6ZlSCEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAG0HCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCIEVCAjhmVoAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKEDbAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEYgQUoGNGJSgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQdoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgRgBBeiYUQlKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgIACtB0gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBGQAE6ZlSCEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAG0HCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCIEVCAjhmVoAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKEDbAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEYgQUoGNGJSgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQdoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgRgBBeiYUQlKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgIACtB0gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBGQAE6ZlSCEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAG0HCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCIEVCAjhmVoAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKEDbAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEYgQUoGNGJSgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQdoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgRgBBeiYUQlKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgIACtB0gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBGQAE6ZlSCEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAG0HCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCIEVCAjhmVoAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKEDbAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEYgQUoGNGJSgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQdoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgRgBBeiYUQlKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgIACtB0gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBGQAE6ZlSCEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAG0HCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCIEVCAjhmVoAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKEDbAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEYgQUoGNGJSgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQdoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgRgBBeiYUQlKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgIACtB0gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBGQAE6ZlSCEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAG0HCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCIEVCAjhmVoAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKEDbAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEYgQUoGNGJSgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQdoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgRgBBeiYUQlKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgIACtB0gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBGQAE6ZlSCEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAG0HCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCIEVCAjhmVoAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKEDbAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEYgQUoGNGJSgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQdoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgRgBBeiYUQlKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgIACtB0gQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQCBGQAE6ZlSCEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAG0HCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCIEVCAjhmVoAQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKEDbAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEYgQUoGNGJSgBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQdoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgRgBBeiYUQlKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMDy+AiBAAEC3wTmeZ2OxxkHAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKCswPLTx1PZcIIRINBX4Pvv5+n9OwXovureRoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECDxH4PCcb/a9BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQeE2B5c0//LTX1xyAdxN4fYF5+nL39PoxJCBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI/A2B5e1bPwT6bzj5FgK7FVjX6VyA3u31XIwAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDYmYD2884G6joECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE9iyw7Ply7kaAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQqCbwdP5NvQ+P53/5IkCAAAECBAgQIEDgIgEF6IvYPESAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQeL7APE/Tuq7Tw8Pzn/UEAQIECBAgQIAAAQLfBBSgbQIBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoJPAzc08/fcfXwQIECBAgAABAgQIXC5wuPxRTxIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKCvgAJ0X29vI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgg4AC9AY8jxIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAg0FdAAbqvt7cRIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQILBBQAF6A55HCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDoK6AA3dfb2wgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQ2CCgAL0Bz6MECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECPQVUIDu6+1tBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAhsEFCA3oDnUQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE+gooQPf19jYCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBDYIKEBvwPMoAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJ9BRSg+3p7GwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECGwQUoDfgeZQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgb4CCtB9vb2NAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIENAgrQG/A8SoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAXwEF6L7e3kaAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwAYBBegNeB4lQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKCvgAJ0X29vI0CAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgg4AC9AY8jxIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAg0FdAAbqvt7cRIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQILBBQAF6A55HCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBDoK6AA3dfb2wgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQ2CCgAL0Bz6MECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECPQVUIDu6+1tBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAhsEFCA3oDnUQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE+gooQPf19jYCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBDYIKEBvwPMoAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJ9BRSg+3p7GwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECGwQUoDfgeZQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgb4CCtB9vb2NAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIENAgrQG/A8SoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAg8J/27q7HimqLAmjV6QID0agJ+v9/oKCJDyJ0n2Pz5AfEtO6PrjX3MDEBLrVqrTHrcdqXAAECBAgQIECAAIG5AgrQc729jQABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBBgEF6AY8jxIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgMFdAAXqut7cRIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINAgoADdgOdRAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTmCihAz/X2NgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEGgQUoBvwPEqAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwFwBBei53t5GgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECDgAJ0A55HCRAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBCYK6AAPdfb2wgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQaBBQgG7A8ygBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAnMFFKDnensbAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQINAgrQDXgeJUCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgroAC9FxvbyNAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoEFAAboBz6MECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECMwVUICe6+1tBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAg0CChAN+B5lAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBuQIK0HO9vY0AAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgQYBBegGPI8SIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIDBXQAF6rre3ESBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECDQIKAA3YDnUQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIE5gooQM/19jYCBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBBoEFKAb8DxKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgMBcAQXoud7eRoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAg4ACdAOeRwkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQmCugAD3X29sIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEGgQUIBuwPMoAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQJzBRSg53p7GwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECDQIK0A14HiVAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYK6AAvRcb28jQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKBBQAG6Ac+jBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjMFVCAnuvtbQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQINAgoQDfgeZQAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgbkCCtBzvb2NAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIEGAQXoBjyPEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAwV0ABeq63txEgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAg0CCgAN2A51ECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBOYKKEDP9fY2AgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQaBI77h4anPUpgcYHLvm0X/xnB4l+B8wkQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIGZAse7dxrQM8G9K0vg9at9+/prDeisVF1DgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQInFlAc/PM6diNAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIG/CRwvX+x/+wO/IUDg3wUerrftwQ9O/3ck/ysBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYJDA8d13fgj0IFtjQwV+e3/bfv31GnqdswgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAEC5xbQfj53PrYjQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQOAvAsdffu2XBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIHTCVyvt23f99PtZSECBAgQIECAAAECBJ5HQAH6edy9lQABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEHiCwKfe8/X66d/bE/62v0KAAAECBAgQIECAwAoCCtArpOxGAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBQVODF4Sc/F43O2gQIECBAgAABAgSGCVyGTTaYAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiME1CAHmdrMgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECnQUUoDuDGkeAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwDgBBehxtiYTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQINBZQAG6M6hxBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAiMEzg+3t/GTTeZwD8E7i77dlG7/4eK3xIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECDxV4Hj37vrUv+vvEWgW+Oaby/b61d48xwACBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIE1Bfws3jVzdzUBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBkgLH7ofxlgyu0tK3B6aQZwAAMfdJREFUW6Vt7UqAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIHBmgePHH+7OvJ/dAgR+/uW6ffigBR0QpRMIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAs8ucHn2DSxAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBJwooQD8Ryl8jQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQOD5BY7nX8EGBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQC+By+OPQnv//tprnDkECBAgQIAAAQIECBA4nYAC9OkisRABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEPj/Ai9f7o8Pf/rXPwQIECBAgAABAgQIEMgUePzvPv1DgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBGgIK0DVysiUBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAo8CCtA+AwIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEyggoQJeJyqIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECChA+wYIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECgjoABdJiqLEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAO0bIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgjIACdJmoLEqAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAgAK0b4AAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgTICCtBlorIoAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIK0L4BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTKCChAl4nKogQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKED7BggQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQKCOgAF0mKosSIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIKAA7RsgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKCMgAJ0magsSoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQICAArRvgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBMgIK0GWisigBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQvgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBMoIKECXicqiBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgoQPsGCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAoI6AAXSYqixIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgoADtGyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoIyAAnSZqCxKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgIACtG+AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIEyAgrQZaKyKAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECCtC+AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEyggoQJeJyqIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECChA+wYIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECgjoABdJiqLEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAO0bIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgjIACdJmoLEqAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAgAK0b4AAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgTICCtBlorIoAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIK0L4BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTKCChAl4nKogQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKED7BggQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQKCOgAF0mKosSIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIKAA7RsgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKCMgAJ0magsSoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQICAArRvgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBMgIK0GWisigBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQvgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBMoIKECXicqiBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgoQPsGCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAoI6AAXSYqixIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgoADtGyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoIyAAnSZqCxKgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgIACtG+AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIEyAgrQZaKyKAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECCtC+AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEyggoQJeJyqIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECChA+wYIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECgjoABdJiqLEiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECCgAO0bIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECgjIACdJmoLEqAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAgAK0b4AAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgTICCtBlorIoAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIK0L4BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTKCChAl4nKogQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIKED7BggQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQKCOgAF0mKosSIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIKAA7RsgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKCMgAJ0magsSoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQICAArRvgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBMgIK0GWisigBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgrQvgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBMoIKECXicqiBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgcH+8hnFnguNu2fT/zhnYjQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgME/gePvTw7y3edN/Fvj++8v21Vca0P8ZzgMECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKRApfIqxxFgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECkgAJ0ZKyOIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIJApcLx5owN9pmhvt217+/Z6ppXsQoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQOA0Asdx7KdZxiLbdr0+NqD9Q4AAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIDAFwX8+OcvsvhDAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgTOKHCccSk7ESBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBaQL7vt3f+3/rnebtRQQIECBAgAABAgQIEGgUUIBuBPQ4AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECNQWOO4+7b/XPsL2BAgQIECAAAECBAgQWEjgstCtTiVAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoLiAAnTxAK1PgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYCUBBeiV0nYrAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgeICCtDFA7Q+AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZUEFKBXStutBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIoLKEAXD9D6BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBFYSUIBeKW23EiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECguoABdPEDrEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEFhJQAF6pbTdSoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKC4gAJ08QCtT4AAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQGAlAQXoldJ2KwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIHiAgrQxQO0PgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIGVBBSgV0rbrQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSKCyhAFw/Q+gQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgRWElCAXilttxIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAoLqAAXTxA6xMgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBYSUABeqW03UqAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECguIACdPEArU+AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgJQEF6JXSdisBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACB4gIK0MUDtD4BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBlQQUoFdK260ECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEigsoQBcP0PoECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEVhJQgF4pbbcSIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQKC6gAF08QOsTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQWElAAXqltN1KgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoLiAAnTxAK1PgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYCUBBeiV0nYrAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgeICCtDFA7Q+AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZUEFKBXStutBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIoLKEAXD9D6BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBFYSUIBeKW23EiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECguoABdPEDrEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEFhJQAF6pbTdSoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKC4gAJ08QCtT4AAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQGAlAQXoldJ2KwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIHiAgrQxQO0PgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIGVBBSgV0rbrQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSKCyhAFw/Q+gQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgRWElCAXilttxIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAoLqAAXTxA6xMgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBYSUABeqW03UqAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECguIACdPEArU+AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgJQEF6JXSdisBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACB4gIK0MUDtD4BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBlQQUoFdK260ECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEigsoQBcP0PoECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEVhJQgF4pbbcSIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQKC6gAF08QOsTIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQWElAAXqltN1KgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAoLiAAnTxAK1PgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAYCUBBeiV0nYrAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgeICCtDFA7Q+AQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAgZUEFKBXStutBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBIoLKEAXD9D6BAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBFYSUIBeKW23EiBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECguoABdPEDrEyBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIEFhJQAF6pbTdSoAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQKC4gAJ08QCtT4AAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQGAlAQXoldJ2KwECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIHiAgrQxQO0PgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIGVBBSgV0rbrQQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgSKCyhAFw/Q+gQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgRWElCAXilttxIgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAoLqAAXTxA6xMgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBBYSUABeqW03UqAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECguIACdPEArU+AAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIEBgJQEF6JXSdisBAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACB4gIK0MUDtD4BAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgACBlQSOjx9vUfe+eLFH3eMYAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgT+FDh+evvw5+8CfvXDm7vtOJSgA6J0AgECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAIHPBC6f/Yk/IECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAwEkFFKBPGoy1CBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBD4XOD49tvaHegPv2/bb++vn1/mTwgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQiBM4Xr+qXYDebrfHAnRcLg4iQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQOALAsXbz1+4yB8RIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAroAAdG63DCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECOQJKEDnZeoiAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABArECCtCx0TqMAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ6AAnRepi4iQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECugAB0brcMIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI5AkoQOdl6iICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECsQIK0LHROowAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAnoACdF6mLiJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAQK6AAHRutwwgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjkCShA52XqIgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKxAgrQsdE6jAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECegAJ0XqYuIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAroAAdG63DCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECOQJKEDnZeoiAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABArECCtCx0TqMAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ6AAnRepi4iQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECugAB0brcMIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI5AkoQOdl6iICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECsQIK0LHROowAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAnoACdF6mLiJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAQK6AAHRutwwgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjkCShA52XqIgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKxAgrQsdE6jAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECegAJ0XqYuIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAroAAdG63DCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECOQJKEDnZeoiAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABArECCtCx0TqMAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ6AAnRepi4iQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECugAB0brcMIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI5AkoQOdl6iICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECsQIK0LHROowAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAnoACdF6mLiJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAQK6AAHRutwwgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjkCShA52XqIgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKxAgrQsdE6jAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECegAJ0XqYuIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAroAAdG63DCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECOQJKEDnZeoiAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABArECCtCx0TqMAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ6AAnRepi4iQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECugAB0brcMIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI5AkoQOdl6iICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECsQIK0LHROowAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAnoACdF6mLiJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAQK6AAHRutwwgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjkCShA52XqIgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKxAgrQsdE6jAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECegAJ0XqYuIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAroAAdG63DCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECOQJKEDnZeoiAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABArECCtCx0TqMAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ6AAnRepi4iQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECugAB0brcMIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI5AkoQOdl6iICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECsQIK0LHROowAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAnoACdF6mLiJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAQK6AAHRutwwgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjkCShA52XqIgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKxAgrQsdE6jAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECegAJ0XqYuIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAroAAdG63DCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECOQJKEDnZeoiAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABArECCtCx0TqMAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ6AAnRepi4iQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECugAB0brcMIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI5AkoQOdl6iICBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECsQIK0LHROowAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIBAnoACdF6mLiJAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECAQK6AAHRutwwgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAjkCShA52XqIgIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQKxAgrQsdE6jAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgECegAJ0XqYuIkCAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIBAroAAdG63DCBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECOQJKEDnZeoiAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABArECCtCx0TqMAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAAAECBAgQIECAQJ6AAnRepi4iQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgECugAB0brcMIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQIECBAgAABAgQI5An8Ad8CyURGq4jVAAAAAElFTkSuQmCC) no-repeat};{}

</style>
  </head>
  <body>
    <div id="app"></div>
    
  </body>
</html>


================================================
FILE: demos/streaming_asr_server/web/readme.md
================================================
# paddlespeech serving 网页Demo

![图片](./paddle_web_demo.png)

step1: 开启流式语音识别服务器端

```
# 开启流式语音识别服务
cd PaddleSpeech/demos/streaming_asr_server
paddlespeech_server start --config_file conf/ws_conformer_wenetspeech_application_faster.yaml
```

step2: 谷歌游览器打开 `web`目录下`index.html`

step3: 点击`连接`，验证WebSocket是否成功连接

step4：点击开始录音(弹窗询问，允许录音)


================================================
FILE: demos/streaming_tts_server/README.md
================================================
([简体中文](./README_cn.md)|English)

# Streaming Speech Synthesis Service

## Introduction
This demo is an implementation of starting the streaming speech synthesis service and accessing the service. It can be achieved with a single command using `paddlespeech_server` and `paddlespeech_client` or a few lines of code in python.

For service interface definition, please check:
- [PaddleSpeech Server RESTful API](https://github.com/PaddlePaddle/PaddleSpeech/wiki/PaddleSpeech-Server-RESTful-API)
- [PaddleSpeech Streaming Server WebSocket API](https://github.com/PaddlePaddle/PaddleSpeech/wiki/PaddleSpeech-Server-WebSocket-API)

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

It is recommended to use **paddlepaddle 2.4rc** or above.

You can choose one way from easy, medium and hard to install paddlespeech.

**If you install in easy mode, you need to prepare the yaml file by yourself, you can refer to the yaml file in the conf directory.**

### 2. Prepare config File
The configuration file can be found in `conf/tts_online_application.yaml`.
- `protocol` indicates the network protocol used by the streaming TTS service. Currently, both **http and websocket** are supported.
- `engine_list` indicates the speech engine that will be included in the service to be started, in the format of `<speech task>_<engine type>`.
    - This demo mainly introduces the streaming speech synthesis service, so the speech task should be set to `tts`.
    - the engine type supports two forms: **online**  and **online-onnx**. `online` indicates an engine that uses python for dynamic graph inference; `online-onnx` indicates an engine that uses onnxruntime for inference. The inference speed of online-onnx is faster.
- Streaming TTS engine AM model support: **fastspeech2 and fastspeech2_cnndecoder**; Voc model support: **hifigan and mb_melgan**
- In streaming am inference, one chunk of data is inferred at a time to achieve a streaming effect. Among them, `am_block` indicates the number of valid frames in the chunk, and `am_pad` indicates the number of frames added before and after am_block in a chunk. The existence of am_pad is used to eliminate errors caused by streaming inference and avoid the influence of streaming inference on the quality of synthesized audio.
    - fastspeech2 does not support streaming am inference, so am_pad and am_block have no effect on it.
    - fastspeech2_cnndecoder supports streaming inference. When am_pad=12, streaming inference synthesized audio is consistent with non-streaming synthesized audio.
- In streaming voc inference, one chunk of data is inferred at a time to achieve a streaming effect. Where `voc_block` indicates the number of valid frames in the chunk, and `voc_pad` indicates the number of frames added before and after the voc_block in a chunk. The existence of voc_pad is used to eliminate errors caused by streaming inference and avoid the influence of streaming inference on the quality of synthesized audio.
    - Both hifigan and mb_melgan support streaming voc inference.
    - When the voc model is mb_melgan, when voc_pad=14, the synthetic audio for streaming inference is consistent with the non-streaming synthetic audio; the minimum voc_pad can be set to 7, and the synthetic audio has no abnormal hearing. If the voc_pad is less than 7, the synthetic audio sounds abnormal.
    - When the voc model is hifigan, when voc_pad=19, the streaming inference synthetic audio is consistent with the non-streaming synthetic audio; when voc_pad=14, the synthetic audio has no abnormal hearing.
    - Pad calculation method of streaming vocoder in PaddleSpeech: [AIStudio tutorial](https://aistudio.baidu.com/aistudio/projectdetail/4151335)
- Inference speed: mb_melgan > hifigan; Audio quality: mb_melgan < hifigan
- **Note:** If the service can be started normally in the container, but the client access IP is unreachable, you can try to replace the `host` address in the configuration file with the local IP address.

### 3. Streaming speech synthesis server and client using http protocol
#### 3.1 Server Usage
- Command Line (Recommended)

  Start the service (the configuration file uses http by default):
  ```bash
  paddlespeech_server start --config_file ./conf/tts_online_application.yaml
  ```

  Usage:
  
  ```bash
  paddlespeech_server start --help
  ```
  Arguments:
  - `config_file`: yaml file of the app, default: ./conf/tts_online_application.yaml
  - `log_file`: log file. Default: ./log/paddlespeech.log

  Output:
  ```text
  [2022-04-24 20:05:27,887] [    INFO] - The first response time of the 0 warm up: 1.0123658180236816 s
  [2022-04-24 20:05:28,038] [    INFO] - The first response time of the 1 warm up: 0.15108466148376465 s
  [2022-04-24 20:05:28,191] [    INFO] - The first response time of the 2 warm up: 0.15317344665527344 s
  [2022-04-24 20:05:28,192] [    INFO] - **********************************************************************
  INFO:     Started server process [14638]
  [2022-04-24 20:05:28] [INFO] [server.py:75] Started server process [14638]
  INFO:     Waiting for application startup.
  [2022-04-24 20:05:28] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-04-24 20:05:28] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  [2022-04-24 20:05:28] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)

  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_server import ServerExecutor

  server_executor = ServerExecutor()
  server_executor(
      config_file="./conf/tts_online_application.yaml", 
      log_file="./log/paddlespeech.log")
  ```

  Output:
  ```text
  [2022-04-24 21:00:16,934] [    INFO] - The first response time of the 0 warm up: 1.268730878829956 s
  [2022-04-24 21:00:17,046] [    INFO] - The first response time of the 1 warm up: 0.11168622970581055 s
  [2022-04-24 21:00:17,151] [    INFO] - The first response time of the 2 warm up: 0.10413002967834473 s
  [2022-04-24 21:00:17,151] [    INFO] - **********************************************************************
  INFO:     Started server process [320]
  [2022-04-24 21:00:17] [INFO] [server.py:75] Started server process [320]
  INFO:     Waiting for application startup.
  [2022-04-24 21:00:17] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-04-24 21:00:17] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  [2022-04-24 21:00:17] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  ```

#### 3.2 Streaming TTS client Usage
- Command Line (Recommended)

    Access http streaming TTS service:

    If `127.0.0.1` is not accessible, you need to use the actual service IP address.

    ```bash
    paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
    ```

    Usage:
  
    ```bash
    paddlespeech_client tts_online --help
    ```

    Arguments:
    - `server_ip`: erver ip. Default: 127.0.0.1
    - `port`: server port. Default: 8092
    - `protocol`: Service protocol, choices: [http, websocket], default: http.
    - `input`: (required): Input text to generate.
    - `spk_id`: Speaker id for multi-speaker text to speech. Default: 0
    - `output`: Client output wave filepath. Default: None, which means not to save the audio to the local.
    - `play`: Whether to play audio, play while synthesizing, default value: False, which means not playing. **Playing audio needs to rely on the pyaudio library**.
    - Currently, only the single-speaker model is supported in the code, so `spk_id` does not take effect. Streaming TTS does not support changing sample rate, variable speed and volume.
    
    Output:
    ```text
    [2022-04-24 21:08:18,559] [    INFO] - tts http client start
    [2022-04-24 21:08:21,702] [    INFO] - 句子：您好，欢迎使用百度飞桨语音合成服务。
    [2022-04-24 21:08:21,703] [    INFO] - 首包响应：0.18863153457641602 s
    [2022-04-24 21:08:21,704] [    INFO] - 尾包响应：3.1427218914031982 s
    [2022-04-24 21:08:21,704] [    INFO] - 音频时长：3.825 s
    [2022-04-24 21:08:21,704] [    INFO] - RTF: 0.8216266382753459
    [2022-04-24 21:08:21,739] [    INFO] - 音频保存至：output.wav

    ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import TTSOnlineClientExecutor
  import json

  executor = TTSOnlineClientExecutor()
  executor(
      input="您好，欢迎使用百度飞桨语音合成服务。",
      server_ip="127.0.0.1",
      port=8092,
      protocol="http",
      spk_id=0,
      output="./output.wav",
      play=False)

  ```

  Output:
  ```text
  [2022-04-24 21:11:13,798] [    INFO] - tts http client start
  [2022-04-24 21:11:16,800] [    INFO] - 句子：您好，欢迎使用百度飞桨语音合成服务。
  [2022-04-24 21:11:16,801] [    INFO] - 首包响应：0.18234872817993164 s
  [2022-04-24 21:11:16,801] [    INFO] - 尾包响应：3.0013909339904785 s
  [2022-04-24 21:11:16,802] [    INFO] - 音频时长：3.825 s
  [2022-04-24 21:11:16,802] [    INFO] - RTF: 0.7846773683635238
  [2022-04-24 21:11:16,837] [    INFO] - 音频保存至：./output.wav
  ```

### 4. Streaming speech synthesis server and client using websocket protocol
#### 4.1 Server Usage
- Command Line (Recommended)
  First modify the configuration file `conf/tts_online_application.yaml`, **set `protocol` to `websocket`**.
  Start the service:
  ```bash
  paddlespeech_server start --config_file ./conf/tts_online_application.yaml
  ```

  Usage:
  
  ```bash
  paddlespeech_server start --help
  ```
  Arguments:
  - `config_file`: yaml file of the app, default: ./conf/tts_online_application.yaml
  - `log_file`: log file. Default: ./log/paddlespeech.log

  Output:
  ```text
  [2022-04-27 10:18:09,107] [    INFO] - The first response time of the 0 warm up: 1.1551103591918945 s
  [2022-04-27 10:18:09,219] [    INFO] - The first response time of the 1 warm up: 0.11204338073730469 s
  [2022-04-27 10:18:09,324] [    INFO] - The first response time of the 2 warm up: 0.1051797866821289 s
  [2022-04-27 10:18:09,325] [    INFO] - **********************************************************************
  INFO:     Started server process [17600]
  [2022-04-27 10:18:09] [INFO] [server.py:75] Started server process [17600]
  INFO:     Waiting for application startup.
  [2022-04-27 10:18:09] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-04-27 10:18:09] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  [2022-04-27 10:18:09] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_server import ServerExecutor

  server_executor = ServerExecutor()
  server_executor(
      config_file="./conf/tts_online_application.yaml", 
      log_file="./log/paddlespeech.log")
  ```

  Output:
  ```text
  [2022-04-27 10:20:16,660] [    INFO] - The first response time of the 0 warm up: 1.0945196151733398 s
  [2022-04-27 10:20:16,773] [    INFO] - The first response time of the 1 warm up: 0.11222052574157715 s
  [2022-04-27 10:20:16,878] [    INFO] - The first response time of the 2 warm up: 0.10494542121887207 s
  [2022-04-27 10:20:16,878] [    INFO] - **********************************************************************
  INFO:     Started server process [23466]
  [2022-04-27 10:20:16] [INFO] [server.py:75] Started server process [23466]
  INFO:     Waiting for application startup.
  [2022-04-27 10:20:16] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-04-27 10:20:16] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  [2022-04-27 10:20:16] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  ```

#### 4.2 Streaming TTS client Usage
- Command Line (Recommended)

    Access websocket streaming TTS service:

    If `127.0.0.1` is not accessible, you need to use the actual service IP address.

    ```bash
    paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol websocket --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
    ```

    Usage:
  
    ```bash
    paddlespeech_client tts_online --help
    ```

    Arguments:
    - `server_ip`: erver ip. Default: 127.0.0.1
    - `port`: server port. Default: 8092
    - `protocol`: Service protocol, choices: [http, websocket], default: http.
    - `input`: (required): Input text to generate.
    - `spk_id`: Speaker id for multi-speaker text to speech. Default: 0
    - `output`: Client output wave filepath. Default: None, which means not to save the audio to the local.
    - `play`: Whether to play audio, play while synthesizing, default value: False, which means not playing. **Playing audio needs to rely on the pyaudio library**.
    - Currently, only the single-speaker model is supported in the code, so `spk_id` does not take effect. Streaming TTS does not support changing sample rate, variable speed and volume.
    

    Output:
    ```text
    [2022-04-27 10:21:04,262] [    INFO] - tts websocket client start
    [2022-04-27 10:21:04,496] [    INFO] - 句子：您好，欢迎使用百度飞桨语音合成服务。
    [2022-04-27 10:21:04,496] [    INFO] - 首包响应：0.2124948501586914 s
    [2022-04-27 10:21:07,483] [    INFO] - 尾包响应：3.199106454849243 s
    [2022-04-27 10:21:07,484] [    INFO] - 音频时长：3.825 s
    [2022-04-27 10:21:07,484] [    INFO] - RTF: 0.8363677006141812
    [2022-04-27 10:21:07,516] [    INFO] - 音频保存至：output.wav
    ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import TTSOnlineClientExecutor
  import json

  executor = TTSOnlineClientExecutor()
  executor(
      input="您好，欢迎使用百度飞桨语音合成服务。",
      server_ip="127.0.0.1",
      port=8092,
      protocol="websocket",
      spk_id=0,
      output="./output.wav",
      play=False)
  ```

  Output:
  ```text
  [2022-04-27 10:22:48,852] [    INFO] - tts websocket client start
  [2022-04-27 10:22:49,080] [    INFO] - 句子：您好，欢迎使用百度飞桨语音合成服务。
  [2022-04-27 10:22:49,080] [    INFO] - 首包响应：0.21017956733703613 s
  [2022-04-27 10:22:52,100] [    INFO] - 尾包响应：3.2304444313049316 s
  [2022-04-27 10:22:52,101] [    INFO] - 音频时长：3.825 s
  [2022-04-27 10:22:52,101] [    INFO] - RTF: 0.8445606356352762
  [2022-04-27 10:22:52,134] [    INFO] - 音频保存至：./output.wav
  ```


================================================
FILE: demos/streaming_tts_server/README_cn.md
================================================
(简体中文|[English](./README.md))

# 流式语音合成服务

## 介绍
这个 demo 是一个启动流式语音合成服务和访问该服务的实现。 它可以通过使用 `paddlespeech_server` 和 `paddlespeech_client` 的单个命令或 python 的几行代码来实现。

服务接口定义请参考:
- [PaddleSpeech Server RESTful API](https://github.com/PaddlePaddle/PaddleSpeech/wiki/PaddleSpeech-Server-RESTful-API)
- [PaddleSpeech Streaming Server WebSocket API](https://github.com/PaddlePaddle/PaddleSpeech/wiki/PaddleSpeech-Server-WebSocket-API)
## 使用方法
### 1. 安装
请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

推荐使用 **paddlepaddle 2.4rc** 或以上版本。

你可以从简单，中等，困难 几种方式中选择一种方式安装 PaddleSpeech。

**如果使用简单模式安装，需要自行准备 yaml 文件，可参考 conf 目录下的 yaml 文件。**

### 2. 准备配置文件
配置文件可参见 `conf/tts_online_application.yaml` 。
- `protocol` 表示该流式 TTS 服务使用的网络协议，目前支持 **http 和 websocket** 两种。
- `engine_list` 表示即将启动的服务将会包含的语音引擎，格式为 <语音任务>_<引擎类型>。
    - 该 demo 主要介绍流式语音合成服务，因此语音任务应设置为 tts。
    - 目前引擎类型支持两种形式：**online** 表示使用python进行动态图推理的引擎；**online-onnx** 表示使用 onnxruntime 进行推理的引擎。其中，online-onnx 的推理速度更快。
- 流式 TTS 引擎的 AM 模型支持：**fastspeech2 以及 fastspeech2_cnndecoder**; Voc 模型支持：**hifigan, mb_melgan**
- 流式 am 推理中，每次会对一个 chunk 的数据进行推理以达到流式的效果。其中 `am_block` 表示 chunk 中的有效帧数，`am_pad` 表示一个 chunk 中 am_block 前后各加的帧数。am_pad 的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
    - fastspeech2 不支持流式 am 推理，因此 am_pad 与 am_block 对它无效
    - fastspeech2_cnndecoder 支持流式推理，当 am_pad=12 时，流式推理合成音频与非流式合成音频一致
- 流式 voc 推理中，每次会对一个 chunk 的数据进行推理以达到流式的效果。其中 `voc_block` 表示 chunk 中的有效帧数，`voc_pad` 表示一个 chunk 中 voc_block 前后各加的帧数。voc_pad 的存在用于消除流式推理产生的误差，避免由流式推理对合成音频质量的影响。
    - hifigan, mb_melgan 均支持流式 voc 推理
    - 当 voc 模型为 mb_melgan，当 voc_pad=14 时，流式推理合成音频与非流式合成音频一致；voc_pad 最小可以设置为7，合成音频听感上没有异常，若 voc_pad 小于7，合成音频听感上存在异常。
    - 当 voc 模型为 hifigan，当 voc_pad=19 时，流式推理合成音频与非流式合成音频一致；当 voc_pad=14 时，合成音频听感上没有异常。
    - PaddleSpeech 中流式声码器 Pad 计算方法: [AIStudio 教程](https://aistudio.baidu.com/aistudio/projectdetail/4151335)
- 推理速度：mb_melgan > hifigan; 音频质量：mb_melgan < hifigan
- **注意：** 如果在容器里可正常启动服务，但客户端访问 ip 不可达，可尝试将配置文件中 `host` 地址换成本地 ip 地址。


### 3. 使用 http 协议的流式语音合成服务端及客户端使用方法
#### 3.1 服务端使用方法
- 命令行 (推荐使用)

  启动服务（配置文件默认使用http）：
  ```bash
  paddlespeech_server start --config_file ./conf/tts_online_application.yaml
  ```

  使用方法：
  
  ```bash
  paddlespeech_server start --help
  ```
  参数:
  - `config_file`: 服务的配置文件，默认： ./conf/tts_online_application.yaml
  - `log_file`: log 文件. 默认：./log/paddlespeech.log

  输出:
  ```text
  [2022-04-24 20:05:27,887] [    INFO] - The first response time of the 0 warm up: 1.0123658180236816 s
  [2022-04-24 20:05:28,038] [    INFO] - The first response time of the 1 warm up: 0.15108466148376465 s
  [2022-04-24 20:05:28,191] [    INFO] - The first response time of the 2 warm up: 0.15317344665527344 s
  [2022-04-24 20:05:28,192] [    INFO] - **********************************************************************
  INFO:     Started server process [14638]
  [2022-04-24 20:05:28] [INFO] [server.py:75] Started server process [14638]
  INFO:     Waiting for application startup.
  [2022-04-24 20:05:28] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-04-24 20:05:28] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  [2022-04-24 20:05:28] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_server import ServerExecutor

  server_executor = ServerExecutor()
  server_executor(
      config_file="./conf/tts_online_application.yaml", 
      log_file="./log/paddlespeech.log")
  ```

  输出:
  ```text
  [2022-04-24 21:00:16,934] [    INFO] - The first response time of the 0 warm up: 1.268730878829956 s
  [2022-04-24 21:00:17,046] [    INFO] - The first response time of the 1 warm up: 0.11168622970581055 s
  [2022-04-24 21:00:17,151] [    INFO] - The first response time of the 2 warm up: 0.10413002967834473 s
  [2022-04-24 21:00:17,151] [    INFO] - **********************************************************************
  INFO:     Started server process [320]
  [2022-04-24 21:00:17] [INFO] [server.py:75] Started server process [320]
  INFO:     Waiting for application startup.
  [2022-04-24 21:00:17] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-04-24 21:00:17] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  [2022-04-24 21:00:17] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  ```

#### 3.2 客户端使用方法
- 命令行 (推荐使用)

    访问 http 流式TTS服务：

    若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

    ```bash
    paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
    ```

    使用帮助:
  
    ```bash
    paddlespeech_client tts_online --help
    ```

    参数:
    - `server_ip`: 服务端ip地址，默认: 127.0.0.1。
    - `port`: 服务端口，默认: 8092。
    - `protocol`: 服务协议，可选 [http, websocket], 默认: http。
    - `input`: (必须输入): 待合成的文本。
    - `spk_id`: 说话人 id，用于多说话人语音合成，默认值： 0。
    - `output`: 客户端输出音频的路径， 默认值：None，表示不保存音频。
    - `play`: 是否播放音频，边合成边播放， 默认值：False，表示不播放。**播放音频需要依赖pyaudio库**。
    - 目前代码中只支持单说话人的模型，因此 spk_id 的选择并不生效。流式 TTS 不支持更换采样率，变速和变音量等功能。

    
    输出:
    ```text
    [2022-04-24 21:08:18,559] [    INFO] - tts http client start
    [2022-04-24 21:08:21,702] [    INFO] - 句子：您好，欢迎使用百度飞桨语音合成服务。
    [2022-04-24 21:08:21,703] [    INFO] - 首包响应：0.18863153457641602 s
    [2022-04-24 21:08:21,704] [    INFO] - 尾包响应：3.1427218914031982 s
    [2022-04-24 21:08:21,704] [    INFO] - 音频时长：3.825 s
    [2022-04-24 21:08:21,704] [    INFO] - RTF: 0.8216266382753459
    [2022-04-24 21:08:21,739] [    INFO] - 音频保存至：output.wav

    ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import TTSOnlineClientExecutor
  import json

  executor = TTSOnlineClientExecutor()
  executor(
      input="您好，欢迎使用百度飞桨语音合成服务。",
      server_ip="127.0.0.1",
      port=8092,
      protocol="http",
      spk_id=0,
      output="./output.wav",
      play=False)

  ```

  输出:
  ```bash
  [2022-04-24 21:11:13,798] [    INFO] - tts http client start
  [2022-04-24 21:11:16,800] [    INFO] - 句子：您好，欢迎使用百度飞桨语音合成服务。
  [2022-04-24 21:11:16,801] [    INFO] - 首包响应：0.18234872817993164 s
  [2022-04-24 21:11:16,801] [    INFO] - 尾包响应：3.0013909339904785 s
  [2022-04-24 21:11:16,802] [    INFO] - 音频时长：3.825 s
  [2022-04-24 21:11:16,802] [    INFO] - RTF: 0.7846773683635238
  [2022-04-24 21:11:16,837] [    INFO] - 音频保存至：./output.wav
  ```
 
### 4. 使用 websocket 协议的流式语音合成服务端及客户端使用方法
#### 4.1 服务端使用方法
- 命令行 (推荐使用)
  首先修改配置文件 `conf/tts_online_application.yaml`， **将 `protocol` 设置为 `websocket`**。
  启动服务：
  ```bash
  paddlespeech_server start --config_file ./conf/tts_online_application.yaml
  ```

  使用方法：
  
  ```bash
  paddlespeech_server start --help
  ```
  参数:
  - `config_file`: 服务的配置文件，默认： ./conf/tts_online_application.yaml
  - `log_file`: log 文件. 默认：./log/paddlespeech.log

  输出:
  ```text
  [2022-04-27 10:18:09,107] [    INFO] - The first response time of the 0 warm up: 1.1551103591918945 s
  [2022-04-27 10:18:09,219] [    INFO] - The first response time of the 1 warm up: 0.11204338073730469 s
  [2022-04-27 10:18:09,324] [    INFO] - The first response time of the 2 warm up: 0.1051797866821289 s
  [2022-04-27 10:18:09,325] [    INFO] - **********************************************************************
  INFO:     Started server process [17600]
  [2022-04-27 10:18:09] [INFO] [server.py:75] Started server process [17600]
  INFO:     Waiting for application startup.
  [2022-04-27 10:18:09] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-04-27 10:18:09] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  [2022-04-27 10:18:09] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_server import ServerExecutor

  server_executor = ServerExecutor()
  server_executor(
      config_file="./conf/tts_online_application.yaml", 
      log_file="./log/paddlespeech.log")
  ```

  输出:
  ```text
  [2022-04-27 10:20:16,660] [    INFO] - The first response time of the 0 warm up: 1.0945196151733398 s
  [2022-04-27 10:20:16,773] [    INFO] - The first response time of the 1 warm up: 0.11222052574157715 s
  [2022-04-27 10:20:16,878] [    INFO] - The first response time of the 2 warm up: 0.10494542121887207 s
  [2022-04-27 10:20:16,878] [    INFO] - **********************************************************************
  INFO:     Started server process [23466]
  [2022-04-27 10:20:16] [INFO] [server.py:75] Started server process [23466]
  INFO:     Waiting for application startup.
  [2022-04-27 10:20:16] [INFO] [on.py:45] Waiting for application startup.
  INFO:     Application startup complete.
  [2022-04-27 10:20:16] [INFO] [on.py:59] Application startup complete.
  INFO:     Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  [2022-04-27 10:20:16] [INFO] [server.py:211] Uvicorn running on http://0.0.0.0:8092 (Press CTRL+C to quit)
  ```

#### 4.2 客户端使用方法
- 命令行 (推荐使用)

    访问 websocket 流式 TTS 服务：

    若 `127.0.0.1` 不能访问，则需要使用实际服务 IP 地址

    ```bash
    paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol websocket --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.wav
    ```

    使用帮助:
  
    ```bash
    paddlespeech_client tts_online --help
    ```

    参数:
    - `server_ip`: 服务端ip地址，默认: 127.0.0.1。
    - `port`: 服务端口，默认: 8092。
    - `protocol`: 服务协议，可选 [http, websocket], 默认: http。
    - `input`: (必须输入): 待合成的文本。
    - `spk_id`: 说话人 id，用于多说话人语音合成，默认值： 0。
    - `output`: 客户端输出音频的路径， 默认值：None，表示不保存音频。
    - `play`: 是否播放音频，边合成边播放， 默认值：False，表示不播放。**播放音频需要依赖pyaudio库**。
    - 目前代码中只支持单说话人的模型，因此 spk_id 的选择并不生效。流式 TTS 不支持更换采样率，变速和变音量等功能。


    输出:
    ```text
    [2022-04-27 10:21:04,262] [    INFO] - tts websocket client start
    [2022-04-27 10:21:04,496] [    INFO] - 句子：您好，欢迎使用百度飞桨语音合成服务。
    [2022-04-27 10:21:04,496] [    INFO] - 首包响应：0.2124948501586914 s
    [2022-04-27 10:21:07,483] [    INFO] - 尾包响应：3.199106454849243 s
    [2022-04-27 10:21:07,484] [    INFO] - 音频时长：3.825 s
    [2022-04-27 10:21:07,484] [    INFO] - RTF: 0.8363677006141812
    [2022-04-27 10:21:07,516] [    INFO] - 音频保存至：output.wav
    ```

- Python API
  ```python
  from paddlespeech.server.bin.paddlespeech_client import TTSOnlineClientExecutor
  import json

  executor = TTSOnlineClientExecutor()
  executor(
      input="您好，欢迎使用百度飞桨语音合成服务。",
      server_ip="127.0.0.1",
      port=8092,
      protocol="websocket",
      spk_id=0,
      output="./output.wav",
      play=False)
  ```

  输出:
  ```text
    [2022-04-27 10:22:48,852] [    INFO] - tts websocket client start
    [2022-04-27 10:22:49,080] [    INFO] - 句子：您好，欢迎使用百度飞桨语音合成服务。
    [2022-04-27 10:22:49,080] [    INFO] - 首包响应：0.21017956733703613 s
    [2022-04-27 10:22:52,100] [    INFO] - 尾包响应：3.2304444313049316 s
    [2022-04-27 10:22:52,101] [    INFO] - 音频时长：3.825 s
    [2022-04-27 10:22:52,101] [    INFO] - RTF: 0.8445606356352762
    [2022-04-27 10:22:52,134] [    INFO] - 音频保存至：./output.wav
  ```


================================================
FILE: demos/streaming_tts_server/client.sh
================================================
#!/bin/bash

# http client test
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8092 --protocol http --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.http.wav

# websocket client test
# If `127.0.0.1` is not accessible, you need to use the actual service IP address.
paddlespeech_client tts_online --server_ip 127.0.0.1 --port 8192 --protocol websocket --input "您好，欢迎使用百度飞桨语音合成服务。" --output output.ws.wav


================================================
FILE: demos/streaming_tts_server/conf/tts_online_application.yaml
================================================
# This is the parameter configuration file for streaming tts server.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8092

# The task format in the engin_list is: <speech task>_<engine type>
# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
# protocol choices = ['websocket', 'http'] 
protocol: 'http'
engine_list: ['tts_online-onnx']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online: 
    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']   
    # fastspeech2_cnndecoder_csmsc support streaming am infer.     
    am: 'fastspeech2_csmsc'   
    am_config: 
    am_ckpt: 
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 
    

    # voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
    # Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
    voc: 'mb_melgan_csmsc'
    voc_config: 
    voc_ckpt: 
    voc_stat: 

    # others
    lang: 'zh'
    device: 'cpu' # set 'gpu:id' or 'cpu'
    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
    am_block: 72
    am_pad: 12
    # voc_pad and voc_block voc model to streaming voc infer,
    # when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
    # when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
    voc_block: 36
    voc_pad: 14
    

#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx: 
    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
    # fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.        
    am: 'fastspeech2_cnndecoder_csmsc_onnx' 
    # am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
    # if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
    am_ckpt:   # list
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 
    am_sample_rate: 24000
    am_sess_conf:
        device: "cpu" # set 'gpu:id' or 'cpu'
        use_trt: False
        cpu_threads: 4

    # voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
    # Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
    voc: 'mb_melgan_csmsc_onnx'
    voc_ckpt: 
    voc_sample_rate: 24000
    voc_sess_conf:
        device: "cpu" # set 'gpu:id' or 'cpu'
        use_trt: False
        cpu_threads: 4

    # others
    lang: 'zh'
    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
    am_block: 72
    am_pad: 12
    # voc_pad and voc_block voc model to streaming voc infer,
    # when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
    # when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
    voc_block: 36
    voc_pad: 14
    # voc_upsample should be same as n_shift on voc config.
    voc_upsample: 300
    

================================================
FILE: demos/streaming_tts_server/conf/tts_online_ws_application.yaml
================================================
# This is the parameter configuration file for streaming tts server.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8192

# The task format in the engin_list is: <speech task>_<engine type>
# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
# protocol choices = ['websocket', 'http'] 
protocol: 'websocket'
engine_list: ['tts_online-onnx']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online: 
    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']   
    # fastspeech2_cnndecoder_csmsc support streaming am infer.     
    am: 'fastspeech2_csmsc'   
    am_config: 
    am_ckpt: 
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 
        

    # voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
    # Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
    voc: 'mb_melgan_csmsc'
    voc_config: 
    voc_ckpt: 
    voc_stat: 

    # others
    lang: 'zh'
    device: 'cpu' # set 'gpu:id' or 'cpu'
    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
    am_block: 72
    am_pad: 12
    # voc_pad and voc_block voc model to streaming voc infer,
    # when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
    # when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
    voc_block: 36
    voc_pad: 14
    

#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx: 
    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
    # fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.        
    am: 'fastspeech2_cnndecoder_csmsc_onnx' 
    # am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
    # if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
    am_ckpt:   # list
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 
    am_sample_rate: 24000
    am_sess_conf:
        device: "cpu" # set 'gpu:id' or 'cpu'
        use_trt: False
        cpu_threads: 4

    # voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
    # Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
    voc: 'mb_melgan_csmsc_onnx'
    voc_ckpt: 
    voc_sample_rate: 24000
    voc_sess_conf:
        device: "cpu" # set 'gpu:id' or 'cpu'
        use_trt: False
        cpu_threads: 4

    # others
    lang: 'zh'
    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
    am_block: 72
    am_pad: 12
    # voc_pad and voc_block voc model to streaming voc infer,
    # when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
    # when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
    voc_block: 36
    voc_pad: 14
    # voc_upsample should be same as n_shift on voc config.
    voc_upsample: 300
    

================================================
FILE: demos/streaming_tts_server/server.sh
================================================
#!/bin/bash

# http server
paddlespeech_server start --config_file ./conf/tts_online_application.yaml &> tts.http.log &


# websocket server
paddlespeech_server start --config_file ./conf/tts_online_ws_application.yaml &> tts.ws.log &


================================================
FILE: demos/streaming_tts_serving_fastdeploy/README.md
================================================
([简体中文](./README_cn.md)|English)

# Streaming Speech Synthesis Service

## Introduction
This demo is an implementation of starting the streaming speech synthesis service and accessing the service.

`Server` must be started in the docker, while `Client` does not have to be in the docker.

**The streaming_tts_serving under the path of this article ($PWD) contains the configuration and code of the model, which needs to be mapped to the docker for use.**

## Usage
### 1. Server
#### 1.1 Docker

```bash
docker pull registry.baidubce.com/paddlepaddle/fastdeploy_serving_cpu_only:22.09
docker run -dit  --net=host --name fastdeploy --shm-size="1g" -v $PWD:/models registry.baidubce.com/paddlepaddle/fastdeploy_serving_cpu_only:22.09
docker exec -it -u root fastdeploy bash
```

#### 1.2 Installation(inside the docker)
```bash
apt-get install build-essential python3-dev libssl-dev libffi-dev libxml2 libxml2-dev libxslt1-dev zlib1g-dev libsndfile1 language-pack-zh-hans wget zip
pip3 install paddlespeech
export LC_ALL="zh_CN.UTF-8"
export LANG="zh_CN.UTF-8"
export LANGUAGE="zh_CN:zh:en_US:en"
```

#### 1.3 Download models(inside the docker)
```bash
cd /models/streaming_tts_serving/1
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip
unzip fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
unzip mb_melgan_csmsc_onnx_0.2.0.zip
```
**For the convenience of users, we recommend that you use the command `docker -v` to map $PWD (streaming_tts_service and the configuration and code of the model contained therein) to the docker path `/models`. You can also use other methods, but regardless of which method you use, the final model directory and structure in the docker are shown in the following figure.**

<p align="center">
  <img src="./tree.png" />
</p>

#### 1.4 Start the server(inside the docker)

```bash
fastdeployserver --model-repository=/models --model-control-mode=explicit --load-model=streaming_tts_serving
```
Arguments:
  - `model-repository`(required): Path of model storage.
  - `model-control-mode`(required): The mode of loading the model. At present, you can use 'explicit'.
  - `load-model`(required): Name of the model to be loaded.
  - `http-port`(optional): Port for http service. Default: `8000`. This is not used in our example.
  - `grpc-port`(optional): Port for grpc service. Default: `8001`.
  - `metrics-port`(optional): Port for metrics service. Default: `8002`. This is not used in our example.

### 2. Client
#### 2.1 Installation
```bash
pip3 install tritonclient[all]
```

#### 2.2 Send request
```bash
python3 /models/streaming_tts_serving/stream_client.py
```


================================================
FILE: demos/streaming_tts_serving_fastdeploy/README_cn.md
================================================
(简体中文|[English](./README.md))

# 流式语音合成服务

## 介绍

本文介绍了使用FastDeploy搭建流式语音合成服务的方法。

`服务端`必须在docker内启动,而`客户端`不是必须在docker容器内.

**本文所在路径`($PWD)下的streaming_tts_serving里包含模型的配置和代码`(服务端会加载模型和代码以启动服务),需要将其映射到docker中使用。**

## 使用
### 1. 服务端
#### 1.1 Docker
```bash
docker pull registry.baidubce.com/paddlepaddle/fastdeploy_serving_cpu_only:22.09
docker run -dit  --net=host --name fastdeploy --shm-size="1g" -v $PWD:/models registry.baidubce.com/paddlepaddle/fastdeploy_serving_cpu_only:22.09
docker exec -it -u root fastdeploy bash
```

#### 1.2 安装(在docker内)
```bash
apt-get install build-essential python3-dev libssl-dev libffi-dev libxml2 libxml2-dev libxslt1-dev zlib1g-dev libsndfile1 language-pack-zh-hans wget zip
pip3 install paddlespeech
export LC_ALL="zh_CN.UTF-8"
export LANG="zh_CN.UTF-8"
export LANGUAGE="zh_CN:zh:en_US:en"
```

#### 1.3 下载模型(在docker内)
```bash
cd /models/streaming_tts_serving/1
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip
unzip fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
unzip mb_melgan_csmsc_onnx_0.2.0.zip
```
**为了方便用户使用，我们推荐用户使用1.1中的`docker -v`命令将`$PWD(streaming_tts_serving及里面包含的模型的配置和代码)映射到了docker内的/models路径`,用户也可以使用其他办法,但无论使用哪种方法,最终在docker内的模型目录及结构如下图所示。**

<p align="center">
  <img src="./tree.png" />
</p>

#### 1.4 启动服务端(在docker内)
```bash
fastdeployserver --model-repository=/models --model-control-mode=explicit --load-model=streaming_tts_serving
```

参数:
  - `model-repository`(required): 整套模型streaming_tts_serving存放的路径.
  - `model-control-mode`(required): 模型加载的方式,现阶段, 使用'explicit'即可.
  - `load-model`(required): 需要加载的模型的名称.
  - `http-port`(optional): HTTP服务的端口号. 默认: `8000`. 本示例中未使用该端口.
  - `grpc-port`(optional): GRPC服务的端口号. 默认: `8001`.
  - `metrics-port`(optional): 服务端指标的端口号. 默认: `8002`. 本示例中未使用该端口.

### 2. 客户端
#### 2.1 安装
```bash
pip3 install tritonclient[all]
```

#### 2.2 发送请求
```bash
python3 /models/streaming_tts_serving/stream_client.py
```


================================================
FILE: demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py
================================================
import codecs
import json
import math
import sys
import threading
import time

import numpy as np
import onnxruntime as ort
import triton_python_backend_utils as pb_utils

from paddlespeech.server.utils.util import denorm
from paddlespeech.server.utils.util import get_chunks
from paddlespeech.t2s.frontend.zh_frontend import Frontend

voc_block = 36
voc_pad = 14
am_block = 72
am_pad = 12
voc_upsample = 300

# 模型路径
dir_name = "/models/streaming_tts_serving/1/"
phones_dict = dir_name + "fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0/phone_id_map.txt"
am_stat_path = dir_name + "fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0/speech_stats.npy"

onnx_am_encoder = dir_name + "fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0/fastspeech2_csmsc_am_encoder_infer.onnx"
onnx_am_decoder = dir_name + "fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0/fastspeech2_csmsc_am_decoder.onnx"
onnx_am_postnet = dir_name + "fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0/fastspeech2_csmsc_am_postnet.onnx"
onnx_voc_melgan = dir_name + "mb_melgan_csmsc_onnx_0.2.0/mb_melgan_csmsc.onnx"

frontend = Frontend(phone_vocab_path=phones_dict, tone_vocab_path=None)
am_mu, am_std = np.load(am_stat_path)

# 用CPU推理
providers = ['CPUExecutionProvider']

# 配置ort session
sess_options = ort.SessionOptions()

# 创建session
am_encoder_infer_sess = ort.InferenceSession(
    onnx_am_encoder, providers=providers, sess_options=sess_options)
am_decoder_sess = ort.InferenceSession(
    onnx_am_decoder, providers=providers, sess_options=sess_options)
am_postnet_sess = ort.InferenceSession(
    onnx_am_postnet, providers=providers, sess_options=sess_options)
voc_melgan_sess = ort.InferenceSession(
    onnx_voc_melgan, providers=providers, sess_options=sess_options)


def depadding(data, chunk_num, chunk_id, block, pad, upsample):
    """
    Streaming inference removes the result of pad inference
    """
    front_pad = min(chunk_id * block, pad)
    # first chunk
    if chunk_id == 0:
        data = data[:block * upsample]
    # last chunk
    elif chunk_id == chunk_num - 1:
        data = data[front_pad * upsample:]
    # middle chunk
    else:
        data = data[front_pad * upsample:(front_pad + block) * upsample]

    return data


class TritonPythonModel:
    """Your Python model must use the same class name. Every Python model
    that is created must have "TritonPythonModel" as the class name.
    """

    def initialize(self, args):
        """`initialize` is called only once when the model is being loaded.
        Implementing `initialize` function is optional. This function allows
        the model to initialize any state associated with this model.
        Parameters
        ----------
        args : dict
          Both keys and values are strings. The dictionary keys and values are:
          * model_config: A JSON string containing the model configuration
          * model_instance_kind: A string containing model instance kind
          * model_instance_device_id: A string containing model instance device ID
          * model_repository: Model repository path
          * model_version: Model version
          * model_name: Model name
        """
        sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
        print(sys.getdefaultencoding())
        # You must parse model_config. JSON string is not parsed here
        self.model_config = model_config = json.loads(args['model_config'])
        print("model_config:", self.model_config)

        using_decoupled = pb_utils.using_decoupled_model_transaction_policy(
            model_config)

        if not using_decoupled:
            raise pb_utils.TritonModelException(
                """the model `{}` can generate any number of responses per request,
                enable decoupled transaction policy in model configuration to
                serve this model""".format(args['model_name']))

        self.input_names = []
        for input_config in self.model_config["input"]:
            self.input_names.append(input_config["name"])
        print("input:", self.input_names)

        self.output_names = []
        self.output_dtype = []
        for output_config in self.model_config["output"]:
            self.output_names.append(output_config["name"])
            dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
            self.output_dtype.append(dtype)
        print("output:", self.output_names)

        # To keep track of response threads so that we can delay
        # the finalizing the model until all response threads
        # have completed.
        self.inflight_thread_count = 0
        self.inflight_thread_count_lck = threading.Lock()

    def execute(self, requests):
        """`execute` must be implemented in every Python model. `execute`
        function receives a list of pb_utils.InferenceRequest as the only
        argument. This function is called when an inference is requested
        for this model. Depending on the batching configuration (e.g. Dynamic
        Batching) used, `requests` may contain multiple requests. Every
        Python model, must create one pb_utils.InferenceResponse for every
        pb_utils.InferenceRequest in `requests`. If there is an error, you can
        set the error argument when creating a pb_utils.InferenceResponse.
        Parameters
        ----------
        requests : list
          A list of pb_utils.InferenceRequest
        Returns
        -------
        list
          A list of pb_utils.InferenceResponse. The length of this list must
          be the same as `requests`
        """

        # This model does not support batching, so 'request_count' should always
        # be 1.
        if len(requests) != 1:
            raise pb_utils.TritonModelException("unsupported batch size " + len(
                requests))

        input_data = []
        for idx in range(len(self.input_names)):
            data = pb_utils.get_input_tensor_by_name(requests[0],
                                                     self.input_names[idx])
            data = data.as_numpy()
            data = data[0].decode('utf-8')
            input_data.append(data)
        text = input_data[0]

        # Start a separate thread to send the responses for the request. The
        # sending back the responses is delegated to this thread.
        thread = threading.Thread(
            target=self.response_thread,
            args=(requests[0].get_response_sender(), text))
        thread.daemon = True
        with self.inflight_thread_count_lck:
            self.inflight_thread_count += 1

        thread.start()
        # Unlike in non-decoupled model transaction policy, execute function
        # here returns no response. A return from this function only notifies
        # Triton that the model instance is ready to receive another request. As
        # we are not waiting for the response thread to complete here, it is
        # possible that at any give time the model may be processing multiple
        # requests. Depending upon the request workload, this may lead to a lot
        # of requests being processed by a single model instance at a time. In
        # real-world models, the developer should be mindful of when to return
        # from execute and be willing to accept next request.
        return None

    def response_thread(self, response_sender, text):
        input_ids = frontend.get_input_ids(
            text, merge_sentences=False, get_tone_ids=False)
        phone_ids = input_ids["phone_ids"]
        for i in range(len(phone_ids)):
            part_phone_ids = phone_ids[i].numpy()
            voc_chunk_id = 0

            orig_hs = am_encoder_infer_sess.run(
                None, input_feed={'text': part_phone_ids})
            orig_hs = orig_hs[0]

            # streaming voc chunk info
            mel_len = orig_hs.shape[1]
            voc_chunk_num = math.ceil(mel_len / voc_block)
            start = 0
            end = min(voc_block + voc_pad, mel_len)

            # streaming am
            hss = get_chunks(orig_hs, am_block, am_pad, "am")
            am_chunk_num = len(hss)
            for i, hs in enumerate(hss):
                am_decoder_output = am_decoder_sess.run(
                    None, input_feed={'xs': hs})
                am_postnet_output = am_postnet_sess.run(
                    None,
                    input_feed={
                        'xs': np.transpose(am_decoder_output[0], (0, 2, 1))
                    })
                am_output_data = am_decoder_output + np.transpose(
                    am_postnet_output[0], (0, 2, 1))
                normalized_mel = am_output_data[0][0]

                sub_mel = denorm(normalized_mel, am_mu, am_std)
                sub_mel = depadding(sub_mel, am_chunk_num, i, am_block, am_pad,
                                    1)

                if i == 0:
                    mel_streaming = sub_mel
                else:
                    mel_streaming = np.concatenate(
                        (mel_streaming, sub_mel), axis=0)

                # streaming voc
                # 当流式AM推理的mel帧数大于流式voc推理的chunk size，开始进行流式voc 推理
                while (mel_streaming.shape[0] >= end and
                       voc_chunk_id < voc_chunk_num):
                    voc_chunk = mel_streaming[start:end, :]

                    sub_wav = voc_melgan_sess.run(
                        output_names=None, input_feed={'logmel': voc_chunk})
                    sub_wav = depadding(sub_wav[0], voc_chunk_num, voc_chunk_id,
                                        voc_block, voc_pad, voc_upsample)

                    output_np = np.array(sub_wav, dtype=self.output_dtype[0])
                    out_tensor1 = pb_utils.Tensor(self.output_names[0],
                                                  output_np)

                    status = 0 if voc_chunk_id != (voc_chunk_num - 1) else 1
                    output_status = np.array(
                        [status], dtype=self.output_dtype[1])
                    out_tensor2 = pb_utils.Tensor(self.output_names[1],
                                                  output_status)

                    inference_response = pb_utils.InferenceResponse(
                        output_tensors=[out_tensor1, out_tensor2])

                    #yield sub_wav
                    response_sender.send(inference_response)

                    voc_chunk_id += 1
                    start = max(0, voc_chunk_id * voc_block - voc_pad)
                    end = min((voc_chunk_id + 1) * voc_block + voc_pad, mel_len)

        # We must close the response sender to indicate to Triton that we are
        # done sending responses for the corresponding request. We can't use the
        # response sender after closing it. The response sender is closed by
        # setting the TRITONSERVER_RESPONSE_COMPLETE_FINAL.
        response_sender.send(
            flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)

        with self.inflight_thread_count_lck:
            self.inflight_thread_count -= 1

    def finalize(self):
        """`finalize` is called only once when the model is being unloaded.
        Implementing `finalize` function is OPTIONAL. This function allows
        the model to perform any necessary clean ups before exit.
        Here we will wait for all response threads to complete sending
        responses.
        """
        print('Finalize invoked')

        inflight_threads = True
        cycles = 0
        logging_time_sec = 5
        sleep_time_sec = 0.1
        cycle_to_log = (logging_time_sec / sleep_time_sec)
        while inflight_threads:
            with self.inflight_thread_count_lck:
                inflight_threads = (self.inflight_thread_count != 0)
                if (cycles % cycle_to_log == 0):
                    print(
                        f"Waiting for {self.inflight_thread_count} response threads to complete..."
                    )
            if inflight_threads:
                time.sleep(sleep_time_sec)
                cycles += 1

        print('Finalize complete...')


================================================
FILE: demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/config.pbtxt
================================================
name: "streaming_tts_serving"
backend: "python"
max_batch_size: 0
model_transaction_policy {
  decoupled: True
}
input [
  {
    name: "INPUT_0"
    data_type: TYPE_STRING
    dims: [ 1 ]
  }
]

output [
  {
    name: "OUTPUT_0"
    data_type: TYPE_FP32
    dims: [ -1, 1 ]
  },
  {
    name: "status"
    data_type: TYPE_BOOL
    dims: [ 1 ]
  }
]

instance_group [
  {
      count: 1
      kind: KIND_CPU
  }
]


================================================
FILE: demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/stream_client.py
================================================
#!/usr/bin/env python
import argparse
import queue
import sys
from functools import partial

import numpy as np
import tritonclient.grpc as grpcclient
from tritonclient.utils import *

FLAGS = None


class UserData:
    def __init__(self):
        self._completed_requests = queue.Queue()


# Define the callback function. Note the last two parameters should be
# result and error. InferenceServerClient would povide the results of an
# inference as grpcclient.InferResult in result. For successful
# inference, error will be None, otherwise it will be an object of
# tritonclientutils.InferenceServerException holding the error details
def callback(user_data, result, error):
    if error:
        user_data._completed_requests.put(error)
    else:
        user_data._completed_requests.put(result)


def async_stream_send(triton_client, values, request_id, model_name):

    infer_inputs = []
    outputs = []
    for idx, data in enumerate(values):
        data = np.array([data.encode('utf-8')], dtype=np.object_)
        infer_input = grpcclient.InferInput('INPUT_0', [len(data)], "BYTES")
        infer_input.set_data_from_numpy(data)
        infer_inputs.append(infer_input)

        outputs.append(grpcclient.InferRequestedOutput('OUTPUT_0'))
        # Issue the asynchronous sequence inference.
        triton_client.async_stream_infer(
            model_name=model_name,
            inputs=infer_inputs,
            outputs=outputs,
            request_id=request_id)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-v',
        '--verbose',
        action="store_true",
        required=False,
        default=False,
        help='Enable verbose output')
    parser.add_argument(
        '-u',
        '--url',
        type=str,
        required=False,
        default='localhost:8001',
        help='Inference server URL and it gRPC port. Default is localhost:8001.')

    FLAGS = parser.parse_args()

    # We use custom "sequence" models which take 1 input
    # value. The output is the accumulated value of the inputs. See
    # src/custom/sequence.
    model_name = "streaming_tts_serving"

    values = ["哈哈哈哈"]

    request_id = "0"

    string_result0_list = []

    user_data = UserData()

    # It is advisable to use client object within with..as clause
    # when sending streaming requests. This ensures the client
    # is closed when the block inside with exits.
    with grpcclient.InferenceServerClient(
            url=FLAGS.url, verbose=FLAGS.verbose) as triton_client:
        try:
            # Establish stream
            triton_client.start_stream(callback=partial(callback, user_data))
            # Now send the inference sequences...
            async_stream_send(triton_client, values, request_id, model_name)
        except InferenceServerException as error:
            print(error)
            sys.exit(1)

        # Retrieve results...
        recv_count = 0
        result_dict = {}
        status = True
        while True:
            data_item = user_data._completed_requests.get()
            if type(data_item) == InferenceServerException:
                raise data_item
            else:
                this_id = data_item.get_response().id
                if this_id not in result_dict.keys():
                    result_dict[this_id] = []
                result_dict[this_id].append((recv_count, data_item))
                sub_wav = data_item.as_numpy('OUTPUT_0')
                status = data_item.as_numpy('status')
                print('sub_wav = ', sub_wav, "subwav.shape = ", sub_wav.shape)
                print('status = ', status)
                if status[0] == 1:
                    break
            recv_count += 1

    print("PASS: stream_client")


================================================
FILE: demos/style_fs2/README.md
================================================
([简体中文](./README_cn.md)|English)

# Style FastSpeech2
## Introduction
[FastSpeech2](https://arxiv.org/abs/2006.04558)  is a classical acoustic model for Text-to-Speech synthesis, which introduces controllable speech input, including `phoneme duration`、 `energy` and `pitch`. 

In the prediction phase, you can change these controllable variables to get some interesting results.

For example:

1. The `duration` control in `FastSpeech2` can control the speed of audios will keep the `pitch`. (in some speech tools, increasing the speed will increase the pitch and vice versa.)

2. When we set the `pitch` of one sentence to a mean value and set the `tones` of phones to `1`, we will get a `robot-style` timbre.

3. When we raise the `pitch` of an adult female (with a fixed scale ratio), we will get a `child-style` timbre.

The `duration` and `pitch` of different phonemes in a sentence can have different scale ratios. You can set different scale ratios to emphasize or weaken the pronunciation of some phonemes.
## Usage
Run the following command line to get started:
```
./run.sh
```
In `run.sh`, it will execute `source path.sh` firstly, which will set the environment variants.

If you would like to try your sentence, please replace the sentence in `sentences.txt`.

For more details, please see `style_syn.py`

The audio samples are in [style-control-in-fastspeech2](https://paddlespeech.readthedocs.io/en/latest/tts/demo.html#style-control-in-fastspeech2)


================================================
FILE: demos/style_fs2/README_cn.md
================================================
(简体中文|[English](./README.md))

# Style FastSpeech2

## 简介

[FastSpeech2](https://arxiv.org/abs/2006.04558)  是用于语音合成的经典声学模型，它引入了可控语音输入，包括 `phoneme duration` 、 `energy` 和 `pitch` 。

在预测阶段，您可以更改这些变量以获得一些有趣的结果。

例如:

1.  `FastSpeech2` 中的 `duration` 可以控制音频的速度 ，并保持 `pitch` 。（在某些语音工具中，增加速度将增加音调，反之亦然。）
2. 当我们将一个句子的 `pitch` 设置为平均值并将音素的 `tones` 设置为 `1` 时，我们将获得 `robot-style` 的音色。
3. 当我们提高成年女性的 `pitch` （比例固定）时，我们会得到 `child-style` 的音色。

句子中不同音素的 `duration` 和 `pitch` 可以具有不同的比例。您可以设置不同的音阶比例来强调或削弱某些音素的发音。

## 运行

运行以下命令行开始：

```
./run.sh
```

在 `run.sh`, 会首先执行 `source path.sh` 去设置好环境变量。

如果您想尝试您的句子，请替换 `sentences.txt`中的句子。

更多的细节，请查看 `style_syn.py`。

语音样例可以在 [style-control-in-fastspeech2](https://paddlespeech.readthedocs.io/en/latest/tts/demo.html#style-control-in-fastspeech2) 查看。


================================================
FILE: demos/style_fs2/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
MODEL=fastspeech2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}


================================================
FILE: demos/style_fs2/run.sh
================================================
#!/bin/bash
source path.sh

gpus=0
stage=0
stop_stage=100

# with the following command, you can choice the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

mkdir -p download

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # download pretrained tts models and unzip
    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip
    unzip -d download download/pwg_baker_ckpt_0.4.zip
    wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip
    unzip -d download download/fastspeech2_nosil_baker_ckpt_0.4.zip
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # run tts
    CUDA_VISIBLE_DEVICES=${gpus} \
    python3 style_syn.py \
        --fastspeech2-config=download/fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
        --fastspeech2-checkpoint=download/fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
        --fastspeech2-stat=download/fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy \
        --fastspeech2-pitch-stat=download/fastspeech2_nosil_baker_ckpt_0.4/pitch_stats.npy \
        --fastspeech2-energy-stat=download/fastspeech2_nosil_baker_ckpt_0.4/energy_stats.npy \
        --pwg-config=download/pwg_baker_ckpt_0.4/pwg_default.yaml \
        --pwg-checkpoint=download/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --pwg-stat=download/pwg_baker_ckpt_0.4/pwg_stats.npy \
        --text=./sentences.txt \
        --output-dir=output \
        --phones-dict=download/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt
fi


================================================
FILE: demos/style_fs2/sentences.txt
================================================
000 谁知青蛙一落地，竟变成了一位英俊的王子。于是遵照国王的意思，他做了公主的亲密伴侣。


================================================
FILE: demos/style_fs2/style_syn.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import numpy as np
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.frontend.zh_frontend import Frontend
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2
from paddlespeech.t2s.models.fastspeech2 import StyleFastSpeech2Inference
from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
from paddlespeech.t2s.models.parallel_wavegan import PWGInference
from paddlespeech.t2s.modules.normalizer import ZScore


def evaluate(args, fastspeech2_config, pwg_config):

    # construct dataset for evaluation
    sentences = []
    with open(args.text, 'rt') as f:
        for line in f:
            items = line.strip().split()
            utt_id = items[0]
            sentence = "".join(items[1:])
            sentences.append((utt_id, sentence))

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = fastspeech2_config.n_mels
    model = FastSpeech2(
        idim=vocab_size, odim=odim, **fastspeech2_config["model"])

    model.set_state_dict(
        paddle.load(args.fastspeech2_checkpoint)["main_params"])
    model.eval()

    vocoder = PWGGenerator(**pwg_config["generator_params"])
    vocoder.set_state_dict(paddle.load(args.pwg_checkpoint)["generator_params"])
    vocoder.remove_weight_norm()
    vocoder.eval()
    print("model done!")

    frontend = Frontend(phone_vocab_path=args.phones_dict)
    print("frontend done!")

    stat = np.load(args.fastspeech2_stat)
    mu, std = stat
    mu = paddle.to_tensor(mu)
    std = paddle.to_tensor(std)
    fastspeech2_normalizer = ZScore(mu, std)

    stat = np.load(args.pwg_stat)
    mu, std = stat
    mu = paddle.to_tensor(mu)
    std = paddle.to_tensor(std)
    pwg_normalizer = ZScore(mu, std)

    fastspeech2_inference = StyleFastSpeech2Inference(
        fastspeech2_normalizer, model, args.fastspeech2_pitch_stat,
        args.fastspeech2_energy_stat)
    fastspeech2_inference.eval()

    pwg_inference = PWGInference(pwg_normalizer, vocoder)
    pwg_inference.eval()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    styles = ["normal", "robot", "1.2xspeed", "0.8xspeed", "child_voice"]
    for style in styles:
        robot = False
        durations = None
        durations_scale = None
        durations_bias = None
        pitch = None
        pitch_scale = None
        pitch_bias = None
        energy = None
        energy_scale = None
        energy_bias = None
        if style == "robot":
            # all tones in phones be `1`
            # all pitch should be the same, we use mean here
            robot = True
        if style == "1.2xspeed":
            durations_scale = 1 / 1.2
        if style == "0.8xspeed":
            durations_scale = 1 / 0.8
        if style == "child_voice":
            pitch_scale = 1.3
        sub_output_dir = output_dir / style
        sub_output_dir.mkdir(parents=True, exist_ok=True)
        for utt_id, sentence in sentences:
            input_ids = frontend.get_input_ids(
                sentence, merge_sentences=True, robot=robot)
            phone_ids = input_ids["phone_ids"][0]

            with paddle.no_grad():
                mel = fastspeech2_inference(
                    phone_ids,
                    durations=durations,
                    durations_scale=durations_scale,
                    durations_bias=durations_bias,
                    pitch=pitch,
                    pitch_scale=pitch_scale,
                    pitch_bias=pitch_bias,
                    energy=energy,
                    energy_scale=energy_scale,
                    energy_bias=energy_bias,
                    robot=robot)
                wav = pwg_inference(mel)

            sf.write(
                str(sub_output_dir / (utt_id + ".wav")),
                wav.numpy(),
                samplerate=fastspeech2_config.fs)
            print(f"{style}_{utt_id} done!")


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(
        description="Synthesize with fastspeech2 & parallel wavegan.")
    parser.add_argument(
        "--fastspeech2-config", type=str, help="fastspeech2 config file.")
    parser.add_argument(
        "--fastspeech2-checkpoint",
        type=str,
        help="fastspeech2 checkpoint to load.")
    parser.add_argument(
        "--fastspeech2-stat",
        type=str,
        help="mean and standard deviation used to normalize spectrogram when training fastspeech2."
    )
    parser.add_argument(
        "--fastspeech2-pitch-stat",
        type=str,
        help="mean and standard deviation used to normalize pitch when training fastspeech2"
    )
    parser.add_argument(
        "--fastspeech2-energy-stat",
        type=str,
        help="mean and standard deviation used to normalize energy when training fastspeech2."
    )
    parser.add_argument(
        "--pwg-config", type=str, help="parallel wavegan config file.")
    parser.add_argument(
        "--pwg-checkpoint",
        type=str,
        help="parallel wavegan generator parameters to load.")
    parser.add_argument(
        "--pwg-stat",
        type=str,
        help="mean and standard deviation used to normalize spectrogram when training parallel wavegan."
    )
    parser.add_argument(
        "--phones-dict",
        type=str,
        default="phone_id_map.txt",
        help="phone vocabulary file.")
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument("--verbose", type=int, default=1, help="verbose.")

    args = parser.parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    with open(args.fastspeech2_config) as f:
        fastspeech2_config = CfgNode(yaml.safe_load(f))
    with open(args.pwg_config) as f:
        pwg_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(fastspeech2_config)
    print(pwg_config)

    evaluate(args, fastspeech2_config, pwg_config)


if __name__ == "__main__":
    main()


================================================
FILE: demos/text_to_speech/README.md
================================================
([简体中文](./README_cn.md)|English)
# TTS (Text To Speech)

## Introduction
Text-to-speech (TTS) is a natural language modeling process that requires changing units of text into units of speech for audio presentation. 

This demo is an implementation to generate audio from the given text. It can be done by a single command or a few lines in python using `PaddleSpeech`. 

## Usage
### 1. Installation
see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

You can choose one way from easy, medium and hard to install paddlespeech.

### 2. Prepare Input
The input of this demo should be a text of the specific language that can be passed via argument.
### 3. Usage
- Command Line (Recommended)
    The default acoustic model is `Fastspeech2`, and the default vocoder is `HiFiGAN`, the default inference method is dygraph inference. 
    - Chinese
        ```bash
        paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！"
        ```
    - Batch Process
        ```bash
        echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
        ```
    - Chinese, use `SpeedySpeech` as the acoustic model
        ```bash
        paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
        ```
    - Chinese, multi-speaker
    
        You can change `spk_id` here.
        ```bash
        paddlespeech tts --am fastspeech2_aishell3 --voc pwgan_aishell3 --input "你好，欢迎使用百度飞桨深度学习框架！" --spk_id 0
        ```
    
     - English
        ```bash
        paddlespeech tts --am fastspeech2_ljspeech --voc pwgan_ljspeech --lang en --input "hello world"
        ```
    - English, multi-speaker
    
        You can change `spk_id` here.
        ```bash
        paddlespeech tts --am fastspeech2_vctk --voc pwgan_vctk --input "hello, boys" --lang en --spk_id 0
        ```
    - Chinese English Mixed, multi-speaker
        You can change `spk_id` here.
        ```bash
        # The `am` must be `fastspeech2_mix`!
        # The `lang` must be `mix`!
        # The voc must be chinese datasets' voc now!
        # spk 174 is csmcc, spk 175 is ljspeech
        paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 174 --output mix_spk174.wav
        paddlespeech tts --am fastspeech2_mix --voc hifigan_aishell3 --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 174 --output mix_spk174_aishell3.wav
        paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175_pwgan.wav
        paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175.wav
        ```
    - Chinese English Mixed, single male spk
        ```bash
        # male mix tts
        # The `lang` must be `mix`!
        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --output male_mix_fs2_pwgan.wav
        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --output male_mix_fs2_hifigan.wav
        ```
    - Cantonese
        ```bash
        paddlespeech tts --am fastspeech2_canton --voc pwgan_aishell3 --input "各个国家有各个国家嘅国歌" --lang canton --spk_id 10
        ```
    - Use ONNXRuntime infer：
        ```bash
        paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output default.wav --use_onnx True
        paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！" --output ss.wav --use_onnx True
        paddlespeech tts --voc mb_melgan_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！" --output mb.wav --use_onnx True
        paddlespeech tts --voc pwgan_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！" --output pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_aishell3 --voc pwgan_aishell3 --input "你好，欢迎使用百度飞桨深度学习框架！" --spk_id 0 --output aishell3_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_aishell3 --voc hifigan_aishell3 --input "你好，欢迎使用百度飞桨深度学习框架！" --spk_id 0 --output aishell3_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output lj_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_ljspeech --voc hifigan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output lj_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_vctk --voc pwgan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 --output vctk_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_vctk --voc hifigan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 --output vctk_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang zh --input "你好，欢迎使用百度飞桨深度学习框架！" --output male_zh_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output male_en_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output male_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang zh --input "你好，欢迎使用百度飞桨深度学习框架！" --output male_zh_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output male_en_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_mix --voc hifigan_male --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output male_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --spk_id 174 --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output mix_fs2_pwgan_csmsc_spk174.wav --use_onnx True
        paddlespeech tts --am fastspeech2_canton --voc pwgan_aishell3 --lang canton --spk_id 10 --input "各个国家有各个国家嘅国歌" --output output_canton.wav --use_onnx True
        ```

  Usage:
  
  ```bash
  paddlespeech tts --help
  ```
  Arguments:
  - `input`(required): Input text to generate..
  - `am`: Acoustic model type of tts task. Default: `fastspeech2_csmsc`.
  - `am_config`: Config of acoustic model. Use default config when it is None. Default: `None`.
  - `am_ckpt`: Acoustic model checkpoint. Use pretrained model when it is None. Default: `None`.
  - `am_stat`: Mean and standard deviation used to normalize spectrogram when training acoustic model. Default: `None`.
  - `phones_dict`: Phone vocabulary file. Default: `None`.
  - `tones_dict`: Tone vocabulary file. Default: `None`.
  - `speaker_dict`: speaker id map file. Default: `None`.
  - `spk_id`: Speaker id for multi speaker acoustic model. Default: `0`.
  - `voc`: Vocoder type of tts task. Default: `pwgan_csmsc`.
  - `voc_config`: Config of vocoder. Use default config when it is None. Default: `None`.
  - `voc_ckpt`: Vocoder checkpoint. Use pretrained model when it is None. Default: `None`.
  - `voc_stat`: Mean and standard deviation used to normalize spectrogram when training vocoder. Default: `None`.
  - `lang`: Language of tts task. Default: `zh`.
  - `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.
  - `output`: Output wave filepath. Default: `output.wav`.
  - `use_onnx`: whether to usen ONNXRuntime inference.
  - `fs`: sample rate for ONNX models when use specified model files.

  Output:
  ```bash
  [2021-12-09 20:49:58,955] [    INFO] [log.py] [L57] - Wave file has been generated: output.wav
  ```

- Python API
    - Dygraph infer:
        ```python
        import paddle
        from paddlespeech.cli.tts import TTSExecutor
        tts_executor = TTSExecutor()
        wav_file = tts_executor(
            text='今天的天气不错啊',
            output='output.wav',
            am='fastspeech2_csmsc',
            am_config=None,
            am_ckpt=None,
            am_stat=None,
            spk_id=0,
            phones_dict=None,
            tones_dict=None,
            speaker_dict=None,
            voc='pwgan_csmsc',
            voc_config=None,
            voc_ckpt=None,
            voc_stat=None,
            lang='zh',
            device=paddle.get_device())
        print('Wave file has been generated: {}'.format(wav_file))
        ```
    - ONNXRuntime infer:
        ```python
        from paddlespeech.cli.tts import TTSExecutor
        tts_executor = TTSExecutor()
        wav_file = tts_executor(
            text='对数据集进行预处理',
            output='output.wav',
            am='fastspeech2_csmsc',
            voc='hifigan_csmsc',
            lang='zh',
            use_onnx=True,
            cpu_threads=2)
        ```
 
  Output:
  ```bash
  Wave file has been generated: output.wav
  ```

### 4. Pretrained Models
Here is a list of pretrained models released by PaddleSpeech that can be used by command and python API:

- Acoustic model
  | Model | Language |
  | :--- | :---: |
  |      speedyspeech_csmsc      |    zh    |
  |      fastspeech2_csmsc       |    zh    |
  |     fastspeech2_ljspeech     |    en    |
  |     fastspeech2_aishell3     |    zh    |
  |       fastspeech2_vctk       |    en    |
  | fastspeech2_cnndecoder_csmsc |    zh    |
  |       fastspeech2_mix        |   mix    |
  |       tacotron2_csmsc        |    zh    |
  |      tacotron2_ljspeech      |    en    |
  |       fastspeech2_male       |    zh    |
  |       fastspeech2_male       |    en    |
  |       fastspeech2_male       |   mix    |
  |       fastspeech2_canton     |  canton  |

- Vocoder
  | Model | Language |
  | :--- | :---: |
  |         pwgan_csmsc          |    zh    |
  |        pwgan_ljspeech        |    en    |
  |        pwgan_aishell3        |    zh    |
  |          pwgan_vctk          |    en    |
  |       mb_melgan_csmsc        |    zh    |
  |      style_melgan_csmsc      |    zh    |
  |        hifigan_csmsc         |    zh    |
  |       hifigan_ljspeech       |    en    |
  |       hifigan_aishell3       |    zh    |
  |         hifigan_vctk         |    en    |
  |        wavernn_csmsc         |    zh    |
  |         pwgan_male           |    zh    |
  |        hifigan_male          |    zh    |


================================================
FILE: demos/text_to_speech/README_cn.md
================================================
(简体中文|[English](./README.md))

# 语音合成
## 介绍
语音合成是一种自然语言建模过程，其将文本转换为语音以进行音频演示。

这个 demo 是一个从给定文本生成音频的实现，它可以通过使用 `PaddleSpeech` 的单个命令或 python 中的几行代码来实现。
## 使用方法
### 1. 安装
请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

你可以从 easy，medium，hard 三种方式中选择一种方式安装。

### 2. 准备输入

这个 demo 的输入是通过参数传递的特定语言的文本。
### 3. 使用方法
- 命令行 (推荐使用)
     默认的声学模型是 `Fastspeech2`，默认的声码器是 `HiFiGAN`，默认推理方式是动态图推理。
    - 中文
        ```bash
        paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！"
        ```
    - 批处理
        ```bash
        echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts
        ```
    - 中文，使用 `SpeedySpeech` 作为声学模型
        ```bash
        paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！"
        ```
    - 中文， 多说话人
    
        你可以改变 `spk_id`。
        ```bash
        paddlespeech tts --am fastspeech2_aishell3 --voc pwgan_aishell3 --input "你好，欢迎使用百度飞桨深度学习框架！" --spk_id 0
        ```
    
     - 英文
        ```bash
        paddlespeech tts --am fastspeech2_ljspeech --voc pwgan_ljspeech --lang en --input "hello world"
        ```
    - 英文，多说话人
    
        你可以改变 `spk_id`。
        ```bash
        paddlespeech tts --am fastspeech2_vctk --voc pwgan_vctk --input "hello, boys" --lang en --spk_id 0
        ```
    - 中英文混合，多说话人
        你可以改变 `spk_id`。
        ```bash
        # The `am` must be `fastspeech2_mix`!
        # The `lang` must be `mix`!
        # The voc must be chinese datasets' voc now!
        # spk 174 is csmcc, spk 175 is ljspeech
        paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 174 --output mix_spk174.wav
        paddlespeech tts --am fastspeech2_mix --voc hifigan_aishell3 --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --spk_id 174 --output mix_spk174_aishell3.wav
        paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175_pwgan.wav
        paddlespeech tts --am fastspeech2_mix --voc hifigan_csmsc --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --spk_id 175 --output mix_spk175.wav
        ```
    - 中英文混合，单个男性说话人
        ```bash
        # male mix tts
        # The `lang` must be `mix`!
        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --output male_mix_fs2_pwgan.wav
        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang mix --input "我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN." --output male_mix_fs2_hifigan.wav
        ```
    - 粤语
        ```bash
        paddlespeech tts --am fastspeech2_canton --voc pwgan_aishell3 --input "各个国家有各个国家嘅国歌" --lang canton --spk_id 10
        ```
    - 使用 ONNXRuntime 推理：
        ```bash
        paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output default.wav --use_onnx True
        paddlespeech tts --am speedyspeech_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！" --output ss.wav --use_onnx True
        paddlespeech tts --voc mb_melgan_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！" --output mb.wav --use_onnx True
        paddlespeech tts --voc pwgan_csmsc --input "你好，欢迎使用百度飞桨深度学习框架！" --output pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_aishell3 --voc pwgan_aishell3 --input "你好，欢迎使用百度飞桨深度学习框架！" --spk_id 0 --output aishell3_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_aishell3 --voc hifigan_aishell3 --input "你好，欢迎使用百度飞桨深度学习框架！" --spk_id 0 --output aishell3_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output lj_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_ljspeech --voc hifigan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output lj_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_vctk --voc pwgan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 --output vctk_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_vctk --voc hifigan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 --output vctk_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang zh --input "你好，欢迎使用百度飞桨深度学习框架！" --output male_zh_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output male_en_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_male --voc pwgan_male --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output male_fs2_pwgan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang zh --input "你好，欢迎使用百度飞桨深度学习框架！" --output male_zh_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_male --voc hifigan_male --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." --output male_en_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_mix --voc hifigan_male --lang mix --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output male_fs2_hifigan.wav --use_onnx True
        paddlespeech tts --am fastspeech2_mix --voc pwgan_csmsc --lang mix --spk_id 174 --input "热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！" --output mix_fs2_pwgan_csmsc_spk174.wav --use_onnx True
        paddlespeech tts --am fastspeech2_canton --voc pwgan_aishell3 --lang canton --spk_id 10 --input "各个国家有各个国家嘅国歌" --output output_canton.wav --use_onnx True
        ```

  使用方法：
  
  ```bash
  paddlespeech tts --help
  ```
  参数：
  - `input`(必须输入)：用于合成音频的文本。
  - `am`：TTS 任务的声学模型， 默认值：`fastspeech2_csmsc`。
  - `am_config`：声学模型的配置文件，若不设置则使用默认配置，默认值：`None`。
  - `am_ckpt`：声学模型的参数文件，若不设置则下载预训练模型使用，默认值：`None`。
  - `am_stat`：训练声学模型时用于正则化 mel 频谱图的均值标准差文件，默认值：`None`。
  - `phones_dict`：音素词表文件， 默认值：`None`。
  - `tones_dict`：声调词表文件， 默认值：`None`。
  - `speaker_dict`：说话人词表文件， 默认值：`None`。
  - `spk_id`：说话人 id， 默认值： `0`。
  - `voc`：TTS 任务的声码器， 默认值： `pwgan_csmsc`。
  - `voc_config`：声码器的配置文件，若不设置则使用默认配置，默认值：`None`。
  - `voc_ckpt`：声码器的参数文件，若不设置则下载预训练模型使用，默认值：`None`。
  - `voc_stat`：训练声码器时用于正则化 mel 频谱图的均值标准差文件，默认值：`None`。
  - `lang`：TTS 任务的语言， 默认值：`zh`。
  - `device`：执行预测的设备， 默认值：当前系统下 paddlepaddle 的默认 device。
  - `output`：输出音频的路径， 默认值：`output.wav`。
  - `use_onnx`: 是否使用 ONNXRuntime 进行推理。
  - `fs`: 使用特定 ONNX 模型时的采样率。

  输出：
  ```bash
  [2021-12-09 20:49:58,955] [    INFO] [log.py] [L57] - Wave file has been generated: output.wav
  ```

- Python API
     - 动态图推理:
        ```python
        import paddle
        from paddlespeech.cli.tts import TTSExecutor
        tts_executor = TTSExecutor()
        wav_file = tts_executor(
            text='今天的天气不错啊',
            output='output.wav',
            am='fastspeech2_csmsc',
            am_config=None,
            am_ckpt=None,
            am_stat=None,
            spk_id=0,
            phones_dict=None,
            tones_dict=None,
            speaker_dict=None,
            voc='pwgan_csmsc',
            voc_config=None,
            voc_ckpt=None,
            voc_stat=None,
            lang='zh',
            device=paddle.get_device())
        print('Wave file has been generated: {}'.format(wav_file))
        ```
    -  ONNXRuntime 推理:
        ```python
        from paddlespeech.cli.tts import TTSExecutor
        tts_executor = TTSExecutor()
        wav_file = tts_executor(
            text='对数据集进行预处理',
            output='output.wav',
            am='fastspeech2_csmsc',
            voc='hifigan_csmsc',
            lang='zh',
            use_onnx=True,
            cpu_threads=2)
        ```
 
  输出：
  ```bash
  Wave file has been generated: output.wav
  ```

### 4. 预训练模型
以下是 PaddleSpeech 提供的可以被命令行和 python API 使用的预训练模型列表：

- 声学模型
  | 模型 | 语言 |
  | :--- | :---: |
  |      speedyspeech_csmsc      |    zh    |
  |      fastspeech2_csmsc       |    zh    |
  |     fastspeech2_ljspeech     |    en    |
  |     fastspeech2_aishell3     |    zh    |
  |       fastspeech2_vctk       |    en    |
  | fastspeech2_cnndecoder_csmsc |    zh    |
  |       fastspeech2_mix        |   mix    |
  |       tacotron2_csmsc        |    zh    |
  |      tacotron2_ljspeech      |    en    |
  |       fastspeech2_male       |    zh    |
  |       fastspeech2_male       |    en    |
  |       fastspeech2_male       |   mix    |
  |       fastspeech2_canton     |  canton  |

- 声码器
  | 模型 | 语言 |
  | :--- | :---: |
  |         pwgan_csmsc          |    zh    |
  |        pwgan_ljspeech        |    en    |
  |        pwgan_aishell3        |    zh    |
  |          pwgan_vctk          |    en    |
  |       mb_melgan_csmsc        |    zh    |
  |      style_melgan_csmsc      |    zh    |
  |        hifigan_csmsc         |    zh    |
  |       hifigan_ljspeech       |    en    |
  |       hifigan_aishell3       |    zh    |
  |         hifigan_vctk         |    en    |
  |        wavernn_csmsc         |    zh    |
  |         pwgan_male           |    zh    |
  |        hifigan_male          |    zh    |


================================================
FILE: demos/text_to_speech/run.sh
================================================
#!/bin/bash

# single process
paddlespeech tts --input 今天的天气不错啊

# Batch process
echo -e "1 欢迎光临。\n2 谢谢惠顾。" | paddlespeech tts

# Text Frontend
paddlespeech tts --input 今天是2022/10/29,最低温度是-3℃.


================================================
FILE: demos/whisper/README.md
================================================
([简体中文](./README_cn.md)|English)

## Introduction
Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.

Whisper model trained by OpenAI whisper https://github.com/openai/whisper

## Usage
 ### 1. Installation
 see [installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

 You can choose one way from easy, medium and hard to install paddlespeech.

 ### 2. Prepare Input File
 The input of this demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

 Here are sample files for this demo that can be downloaded:
 ```bash
 wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 ```

 ### 3. Usage
 - Command Line(Recommended)
   ```bash
   # to recognize text 
   paddlespeech whisper --task transcribe --input ./zh.wav

   # to change model English-Only base size model
   paddlespeech whisper --lang en --size base --task transcribe  --input ./en.wav

   # to recognize text and translate to English
   paddlespeech whisper --task translate --input ./zh.wav
   
   ```

   Usage:
   ```bash
   paddlespeech whisper --help
   ```
   Arguments:
   - `input`(required): Audio file to recognize.
   - `model`: Model type of asr task. Default: `whisper`.
   - `task`: Output type. Default: `transcribe`.
   - `lang`: Model language. Default: ``. Use `en` to choice English-only model. Now [medium,base,small,tiny] size can support English-only.
   - `size`: Model size for decode. Default: `turbo`. Now can support [turbo,large,medium,base,small,tiny].
   - `language`: Set decode language. Default: `None`. Forcibly set the recognized language, which is determined by the model itself by default. 
   - `sample_rate`: Sample rate of the model. Default: `16000`. Other sampling rates are not supported now.
   - `config`: Config of asr task. Use pretrained model when it is None. Default: `None`.
   - `ckpt_path`: Model checkpoint. Use pretrained model when it is None. Default: `None`.
   - `yes`: No additional parameters required. Once set this parameter, it means accepting the request of the program by default, which includes transforming the audio sample rate. Default: `False`.
   - `device`: Choose device to execute model inference. Default: default device of paddlepaddle in current environment.
   - `verbose`: Show the log information.


 - Python API
   ```python
   import paddle
   from paddlespeech.cli.whisper import WhisperExecutor

   whisper_executor = WhisperExecutor()

   # to recognize text 
   text = whisper_executor(
       model='whisper',
       task='transcribe',
       sample_rate=16000,
       config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
       ckpt_path=None,
       audio_file='./zh.wav',
       device=paddle.get_device())
   print('ASR Result: \n{}'.format(text))

   # to recognize text and translate to English
   feature = whisper_executor(
       model='whisper',
       task='translate',
       size='large', # For the translation function, is it better to use large or medium model
       sample_rate=16000,
       config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
       ckpt_path=None,
       audio_file='./zh.wav',
       device=paddle.get_device())
   print('Representation: \n{}'.format(feature))
   ```

   Output:
   ```bash
   Transcribe Result:
   Detected language: Chinese
   [00:00.000 --> 00:05.000] 我认为跑步最重要的就是给我带来了身体健康
   {'text': '我认为跑步最重要的就是给我带来了身体健康', 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 5.0, 'text': '我认为跑步最重要的就是给我带来了身体健康', 'tokens': [50364, 1654, 7422, 97, 13992, 32585, 31429, 8661, 24928, 1546, 5620, 49076, 4845, 99, 34912, 19847, 29485, 44201, 6346, 115, 50614], 'temperature': 0.0, 'avg_logprob': -0.23577967557040128, 'compression_ratio': 0.28169014084507044, 'no_speech_prob': 0.028302080929279327}], 'language': 'zh'}

   Translate Result:
   Detected language: Chinese
   [00:00.000 --> 00:05.000]  I think the most important thing about running is that it brings me good health.
   {'text': ' I think the most important thing about running is that it brings me good health.', 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 5.0, 'text': ' I think the most important thing about running is that it brings me good health.', 'tokens': [50364, 286, 519, 264, 881, 1021, 551, 466, 2614, 307, 300, 309, 5607, 385, 665, 1585, 13, 50614], 'temperature': 0.0, 'avg_logprob': -0.47945233395225123, 'compression_ratio': 1.095890410958904, 'no_speech_prob': 0.028302080929279327}], 'language': 'zh'}


================================================
FILE: demos/whisper/README_cn.md
================================================
(简体中文|[English](./README.md))

# Whisper模型
## 介绍
Whisper是一种通用的语音识别模型。它是在多种音频的大数据集上训练的，也是一个多任务模型，可以执行多语言语音识别以及语音翻译和语言识别。

Whisper模型由OpenAI Whisper训练 https://github.com/openai/whisper

## 使用方法
### 1. 安装
 请看[安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)。

 你可以从 easy，medium，hard 三中方式中选择一种方式安装。

### 2. 准备输入
 这个 demo 的输入应该是一个 WAV 文件（`.wav`），并且采样率必须与模型的采样率相同。

 可以下载此 demo 的示例音频：
 ```bash
 wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
 ```

### 3. 使用方法
 - 命令行 (推荐使用)
   ```bash

   # 识别文本
   paddlespeech whisper --task transcribe --input ./zh.wav

   #选择只支持英文的模型，并且更换不同大小的模型
   paddlespeech whisper --lang en --size base --task transcribe  --input ./en.wav

   # 将语音翻译成英语
   paddlespeech whisper --task translate --input ./zh.wav
   ```
  使用方法：
   ```bash
   paddlespeech whisper --help
   ```
   参数：
   - `input`(必须输入)：用于识别的音频文件。
   - `model`：ASR 任务的模型，默认值：`whisper`。
   - `task`：输出类别，默认值：`transcribe`。
   - `lang`: 模型语言，默认值：``，使用`en`选择只支持英文的模型，目前可选择`en`的模型有[medium,base,small,tiny]。
   - `size`: 模型大小，默认值：`turbo`，目前支持[turbo,large,medium,base,small,tiny]。
   - `language`：设定解码语言，默认值：`None`，强制设定识别出的语言，默认为模型自行判定。
   - `sample_rate`：音频采样率，默认值：`16000`，目前Whisper暂不支持其他采样率。
   - `config`：ASR 任务的参数文件，若不设置则使用预训练模型中的默认配置，默认值：`None`。
   - `ckpt_path`：模型参数文件，若不设置则下载解码模型使用，默认值：`None`。
   - `yes`；不需要设置额外的参数，一旦设置了该参数，说明你默认同意程序的所有请求，其中包括自动转换输入音频的采样率。默认值：`False`。
   - `device`：执行预测的设备，默认值：当前系统下 paddlepaddle 的默认 device。
   - `verbose`: 如果使用，显示 logger 信息。


- Python API
   ```python
   import paddle
   from paddlespeech.cli.whisper import WhisperExecutor

   whisper_executor = WhisperExecutor()

   # 识别文本
   text = whisper_executor(
       model='whisper',
       task='transcribe',
       sample_rate=16000,
       config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
       ckpt_path=None,
       audio_file='./zh.wav',
       device=paddle.get_device())
   print('ASR Result: \n{}'.format(text))

    # 将语音翻译成英语
   feature = whisper_executor(
       model='whisper',
       task='translate',
       size='large', # For the translation function, is it better to use large or medium model
       sample_rate=16000,
       config=None,  # Set `config` and `ckpt_path` to None to use pretrained model.
       ckpt_path=None,
       audio_file='./zh.wav',
       device=paddle.get_device())
   print('Representation: \n{}'.format(feature))
   ```


   输出：
   ```bash
   Transcribe Result:
   Detected language: Chinese
   [00:00.000 --> 00:05.000] 我认为跑步最重要的就是给我带来了身体健康
   {'text': '我认为跑步最重要的就是给我带来了身体健康', 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 5.0, 'text': '我认为跑步最重要的就是给我带来了身体健康', 'tokens': [50364, 1654, 7422, 97, 13992, 32585, 31429, 8661, 24928, 1546, 5620, 49076, 4845, 99, 34912, 19847, 29485, 44201, 6346, 115, 50614], 'temperature': 0.0, 'avg_logprob': -0.23577967557040128, 'compression_ratio': 0.28169014084507044, 'no_speech_prob': 0.028302080929279327}], 'language': 'zh'}

   Translate Result:
   Detected language: Chinese
   [00:00.000 --> 00:05.000]  I think the most important thing about running is that it brings me good health.
   {'text': ' I think the most important thing about running is that it brings me good health.', 'segments': [{'id': 0, 'seek': 0, 'start': 0.0, 'end': 5.0, 'text': ' I think the most important thing about running is that it brings me good health.', 'tokens': [50364, 286, 519, 264, 881, 1021, 551, 466, 2614, 307, 300, 309, 5607, 385, 665, 1585, 13, 50614], 'temperature': 0.0, 'avg_logprob': -0.47945233395225123, 'compression_ratio': 1.095890410958904, 'no_speech_prob': 0.028302080929279327}], 'language': 'zh'}


================================================
FILE: demos/whisper/run.sh
================================================
#!/bin/bash

# audio download
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav

# to recognize text 
paddlespeech whisper --task transcribe --input ./zh.wav

# to recognize text and translate to English
paddlespeech whisper --task translate --input ./zh.wav

# to change model English-Only model
paddlespeech whisper --lang en --size base --task transcribe  --input ./en.wav

================================================
FILE: docker/ubuntu16-gpu/Dockerfile
================================================
FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu16.04

RUN echo "deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial main restricted \n\
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates main restricted \n\
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial universe \n\
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates universe \n\
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial multiverse \n\
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates multiverse \n\
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-backports main restricted universe multiverse \n\
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security main restricted \n\
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security universe \n\
deb [trusted=true] http://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security multiverse" > /etc/apt/sources.list

RUN apt-get update && apt-get install -y inetutils-ping wget vim curl cmake git sox libsndfile1 libpng12-dev \
    libpng-dev swig libzip-dev openssl bc libflac* libgdk-pixbuf2.0-dev libpango1.0-dev libcairo2-dev \
    libgtk2.0-dev pkg-config zip unzip zlib1g-dev libreadline-dev libbz2-dev liblapack-dev libjpeg-turbo8-dev \
    sudo lrzsz libsqlite3-dev libx11-dev libsm6 apt-utils libopencv-dev libavcodec-dev libavformat-dev \
    libswscale-dev locales liblzma-dev python-lzma m4 libxext-dev strace libibverbs-dev libpcre3 libpcre3-dev \
    build-essential libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev xz-utils \
    libfreetype6-dev libxslt1-dev libxml2-dev libgeos-3.5.0 libgeos-dev && apt-get install -y --allow-downgrades \
    --allow-change-held-packages libnccl2 libnccl-dev && DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata \
    && /bin/cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && dpkg-reconfigure -f noninteractive tzdata && \
    cd /usr/lib/x86_64-linux-gnu && ln -s libcudnn.so.8 libcudnn.so && \
    cd /usr/local/cuda-11.2/targets/x86_64-linux/lib  && ln -s libcublas.so.11.4.1.1043 libcublas.so && \
    ln -s libcusolver.so.11.1.0.152 libcusolver.so && ln -s libcusparse.so.11 libcusparse.so && \
    ln -s libcufft.so.10.4.1.152 libcufft.so

RUN echo "set meta-flag on" >> /etc/inputrc && echo "set convert-meta off" >> /etc/inputrc && \
    locale-gen en_US.UTF-8 && /sbin/ldconfig -v && groupadd -g 10001 paddle && \
    useradd -m -s /bin/bash -N -u 10001 paddle -g paddle && chmod g+w /etc/passwd && \
    echo "paddle ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers

ENV LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 LANGUAGE=en_US.UTF-8 TZ=Asia/Shanghai

# official download site: https://www.python.org/ftp/python/3.7.13/Python-3.7.13.tgz
RUN wget https://cdn.npmmirror.com/binaries/python/3.7.13/Python-3.7.13.tgz && tar xvf Python-3.7.13.tgz && \
    cd Python-3.7.13 && ./configure --prefix=/home/paddle/python3.7 && make -j8 && make install && \
    rm -rf ../Python-3.7.13 ../Python-3.7.13.tgz && chown -R paddle:paddle /home/paddle/python3.7

RUN cd /tmp && wget https://mirrors.sjtug.sjtu.edu.cn/gnu/gmp/gmp-6.1.0.tar.bz2 && tar xvf gmp-6.1.0.tar.bz2 && \
    cd gmp-6.1.0 && ./configure --prefix=/usr/local && make -j8 && make install && \
    rm -rf ../gmp-6.1.0.tar.bz2 ../gmp-6.1.0 && cd /tmp && \
    wget https://www.mpfr.org/mpfr-3.1.4/mpfr-3.1.4.tar.bz2 && tar xvf mpfr-3.1.4.tar.bz2 && cd mpfr-3.1.4 && \
    ./configure --prefix=/usr/local && make -j8 && make install && rm -rf ../mpfr-3.1.4.tar.bz2 ../mpfr-3.1.4 && \
    cd /tmp && wget https://mirrors.sjtug.sjtu.edu.cn/gnu/mpc/mpc-1.0.3.tar.gz && tar xvf mpc-1.0.3.tar.gz && \
    cd mpc-1.0.3 && ./configure --prefix=/usr/local && make -j8 && make install && \
    rm -rf ../mpc-1.0.3.tar.gz ../mpc-1.0.3 && cd /tmp && \
    wget http://www.mirrorservice.org/sites/sourceware.org/pub/gcc/infrastructure/isl-0.18.tar.bz2 && \
    tar xvf isl-0.18.tar.bz2 && cd isl-0.18 && ./configure --prefix=/usr/local && make -j8 && make install \
    && rm -rf ../isl-0.18.tar.bz2 ../isl-0.18 && cd /tmp && \
    wget http://mirrors.ustc.edu.cn/gnu/gcc/gcc-8.2.0/gcc-8.2.0.tar.gz --no-check-certificate && \
    tar xvf gcc-8.2.0.tar.gz && cd gcc-8.2.0 && unset LIBRARY_PATH && ./configure --prefix=/home/paddle/gcc82 \
    --enable-threads=posix --disable-checking --disable-multilib --enable-languages=c,c++ --with-gmp=/usr/local \
    --with-mpfr=/usr/local --with-mpc=/usr/local --with-isl=/usr/local && make -j8 && make install && \
    rm -rf ../gcc-8.2.0.tar.gz ../gcc-8.2.0 && chown -R paddle:paddle /home/paddle/gcc82

WORKDIR /home/paddle
ENV PATH=/home/paddle/python3.7/bin:/home/paddle/gcc82/bin:${PATH} \
    LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/cuda-11.2/targets/x86_64-linux/lib:${LD_LIBRARY_PATH}

RUN mkdir -p ~/.pip && echo "[global]" > ~/.pip/pip.conf && \
    echo "index-url=https://mirror.baidu.com/pypi/simple" >> ~/.pip/pip.conf && \
    echo "trusted-host=mirror.baidu.com" >> ~/.pip/pip.conf && \
    python3 -m pip install --upgrade pip && \
    pip install paddlepaddle-gpu==2.4.0rc0.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html && \
    rm -rf ~/.cache/pip

RUN git clone https://github.com/PaddlePaddle/PaddleSpeech.git && cd PaddleSpeech && \
    pip3 install pytest-runner paddleaudio -i https://pypi.tuna.tsinghua.edu.cn/simple && \
    pip3 install -e .[develop] -i https://pypi.tuna.tsinghua.edu.cn/simple && \
    pip3 install importlib-metadata==4.2.0 urllib3==1.25.10 -i https://pypi.tuna.tsinghua.edu.cn/simple && \
    rm -rf ~/.cache/pip && \
    sudo cp -f /home/paddle/gcc82/lib64/libstdc++.so.6.0.25 /usr/lib/x86_64-linux-gnu/libstdc++.so.6 && \
    chown -R paddle:paddle /home/paddle/PaddleSpeech

USER paddle
CMD ['bash']


================================================
FILE: docker/ubuntu18-cpu/Dockerfile
================================================
FROM registry.baidubce.com/paddlepaddle/paddle:2.2.2
LABEL maintainer="paddlesl@baidu.com"

RUN apt-get update \
  && apt-get install libsndfile-dev libsndfile1 \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*

RUN git clone --depth 1 https://github.com/PaddlePaddle/PaddleSpeech.git /home/PaddleSpeech  
RUN pip3 uninstall mccabe -y ; exit 0;
RUN pip3 install multiprocess==0.70.12 importlib-metadata==4.2.0 dill==0.3.4

WORKDIR /home/PaddleSpeech/
RUN python setup.py bdist_wheel
RUN pip install dist/*.whl -i https://pypi.tuna.tsinghua.edu.cn/simple

CMD ['bash']


================================================
FILE: docker/ubuntu20-cpu/Dockerfile
================================================
FROM registry.baidubce.com/paddlepaddle/paddle:3.0.0b1
LABEL maintainer="ext_paddle_oss@baidu.com"

RUN apt-get update \
  && apt-get install libsndfile-dev libsndfile1 \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/*

RUN git clone --depth 1 https://github.com/PaddlePaddle/PaddleSpeech.git /home/PaddleSpeech  
RUN pip3 uninstall mccabe -y ; exit 0;
RUN pip3 install multiprocess==0.70.12 importlib-metadata==4.2.0 dill==0.3.4

WORKDIR /home/PaddleSpeech/
RUN python setup.py bdist_wheel
RUN pip install dist/*.whl -i https://pypi.tuna.tsinghua.edu.cn/simple

CMD ['bash']


================================================
FILE: docs/Makefile
================================================
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS    ?=
SPHINXBUILD   ?= sphinx-build
SOURCEDIR     = source
BUILDDIR      = build

# Put it first so that "make" without argument is like "make help".
help:
	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


================================================
FILE: docs/requirements.txt
================================================
braceexpand
editdistance
g2p_en
g2pM
h5py
inflect
jsonlines
kaldiio
keyboard
librosa==0.8.1
loguru
matplotlib
myst-parser
nara_wpe
numpydoc
onnxruntime>=1.11.0
opencc
paddlenlp
# use paddlepaddle == 2.3.* according to: https://github.com/PaddlePaddle/Paddle/issues/48243
paddlepaddle>=2.2.2,<2.4.0
paddlespeech_ctcdecoders
paddlespeech_feat
pandas
pattern_singleton
ppdiffusers>=0.9.0
praatio>=5.0.0, <=5.1.1
prettytable
pypinyin-dict
pypinyin<=0.44.0
python-dateutil
pyworld>=0.2.12
recommonmark>=0.5.0
resampy
sacrebleu
sphinx
sphinx-autobuild
sphinx-markdown-tables
sphinx_rtd_theme
textgrid
timer
ToJyutping==0.2.1
typeguard==2.13.3
webrtcvad
websockets
yacs~=0.1.8
zhon


================================================
FILE: docs/source/_static/custom.css
================================================
.wy-nav-content {
    max-width: 80%;
}
.table table{ background:#b9b9b9} 
.table table td{ background:#FFF; } 


================================================
FILE: docs/source/api/modules.rst
================================================
paddlespeech
============

.. toctree::
   :maxdepth: 4

   paddlespeech


================================================
FILE: docs/source/api/paddlespeech.audio.features.layers.rst
================================================
paddlespeech.audio.features.layers module
=========================================

.. automodule:: paddlespeech.audio.features.layers
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.features.rst
================================================
paddlespeech.audio.features package
===================================

.. automodule:: paddlespeech.audio.features
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.audio.features.layers


================================================
FILE: docs/source/api/paddlespeech.audio.io.rst
================================================
paddlespeech.audio.io package
=============================

.. automodule:: paddlespeech.audio.io
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.rst
================================================
paddlespeech.audio package
==========================

.. automodule:: paddlespeech.audio
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.audio.features
   paddlespeech.audio.functional
   paddlespeech.audio.io
   paddlespeech.audio.kaldi
   paddlespeech.audio.metric
   paddlespeech.audio.sox_effects
   paddlespeech.audio.streamdata
   paddlespeech.audio.text
   paddlespeech.audio.transform
   paddlespeech.audio.utils


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.autodecode.rst
================================================
paddlespeech.audio.streamdata.autodecode module
===============================================

.. automodule:: paddlespeech.audio.streamdata.autodecode
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.cache.rst
================================================
paddlespeech.audio.streamdata.cache module
==========================================

.. automodule:: paddlespeech.audio.streamdata.cache
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.compat.rst
================================================
paddlespeech.audio.streamdata.compat module
===========================================

.. automodule:: paddlespeech.audio.streamdata.compat
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.extradatasets.rst
================================================
paddlespeech.audio.streamdata.extradatasets module
==================================================

.. automodule:: paddlespeech.audio.streamdata.extradatasets
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.filters.rst
================================================
paddlespeech.audio.streamdata.filters module
============================================

.. automodule:: paddlespeech.audio.streamdata.filters
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.gopen.rst
================================================
paddlespeech.audio.streamdata.gopen module
==========================================

.. automodule:: paddlespeech.audio.streamdata.gopen
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.handlers.rst
================================================
paddlespeech.audio.streamdata.handlers module
=============================================

.. automodule:: paddlespeech.audio.streamdata.handlers
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.mix.rst
================================================
paddlespeech.audio.streamdata.mix module
========================================

.. automodule:: paddlespeech.audio.streamdata.mix
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.paddle_utils.rst
================================================
paddlespeech.audio.streamdata.paddle\_utils module
==================================================

.. automodule:: paddlespeech.audio.streamdata.paddle_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.pipeline.rst
================================================
paddlespeech.audio.streamdata.pipeline module
=============================================

.. automodule:: paddlespeech.audio.streamdata.pipeline
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.rst
================================================
paddlespeech.audio.streamdata package
=====================================

.. automodule:: paddlespeech.audio.streamdata
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.audio.streamdata.autodecode
   paddlespeech.audio.streamdata.cache
   paddlespeech.audio.streamdata.compat
   paddlespeech.audio.streamdata.extradatasets
   paddlespeech.audio.streamdata.filters
   paddlespeech.audio.streamdata.gopen
   paddlespeech.audio.streamdata.handlers
   paddlespeech.audio.streamdata.mix
   paddlespeech.audio.streamdata.paddle_utils
   paddlespeech.audio.streamdata.pipeline
   paddlespeech.audio.streamdata.shardlists
   paddlespeech.audio.streamdata.tariterators
   paddlespeech.audio.streamdata.utils
   paddlespeech.audio.streamdata.writer


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.shardlists.rst
================================================
paddlespeech.audio.streamdata.shardlists module
===============================================

.. automodule:: paddlespeech.audio.streamdata.shardlists
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.tariterators.rst
================================================
paddlespeech.audio.streamdata.tariterators module
=================================================

.. automodule:: paddlespeech.audio.streamdata.tariterators
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.utils.rst
================================================
paddlespeech.audio.streamdata.utils module
==========================================

.. automodule:: paddlespeech.audio.streamdata.utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.streamdata.writer.rst
================================================
paddlespeech.audio.streamdata.writer module
===========================================

.. automodule:: paddlespeech.audio.streamdata.writer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.text.rst
================================================
paddlespeech.audio.text package
===============================

.. automodule:: paddlespeech.audio.text
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.audio.text.text_featurizer
   paddlespeech.audio.text.utility


================================================
FILE: docs/source/api/paddlespeech.audio.text.text_featurizer.rst
================================================
paddlespeech.audio.text.text\_featurizer module
===============================================

.. automodule:: paddlespeech.audio.text.text_featurizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.text.utility.rst
================================================
paddlespeech.audio.text.utility module
======================================

.. automodule:: paddlespeech.audio.text.utility
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.transform.add_deltas.rst
================================================
paddlespeech.audio.transform.add\_deltas module
===============================================

.. automodule:: paddlespeech.audio.transform.add_deltas
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.transform.channel_selector.rst
================================================
paddlespeech.audio.transform.channel\_selector module
=====================================================

.. automodule:: paddlespeech.audio.transform.channel_selector
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.transform.cmvn.rst
================================================
paddlespeech.audio.transform.cmvn module
========================================

.. automodule:: paddlespeech.audio.transform.cmvn
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.transform.functional.rst
================================================
paddlespeech.audio.transform.functional module
==============================================

.. automodule:: paddlespeech.audio.transform.functional
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.transform.perturb.rst
================================================
paddlespeech.audio.transform.perturb module
===========================================

.. automodule:: paddlespeech.audio.transform.perturb
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.transform.rst
================================================
paddlespeech.audio.transform package
====================================

.. automodule:: paddlespeech.audio.transform
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.audio.transform.add_deltas
   paddlespeech.audio.transform.channel_selector
   paddlespeech.audio.transform.cmvn
   paddlespeech.audio.transform.functional
   paddlespeech.audio.transform.perturb
   paddlespeech.audio.transform.spec_augment
   paddlespeech.audio.transform.spectrogram
   paddlespeech.audio.transform.transform_interface
   paddlespeech.audio.transform.transformation
   paddlespeech.audio.transform.wpe


================================================
FILE: docs/source/api/paddlespeech.audio.transform.spec_augment.rst
================================================
paddlespeech.audio.transform.spec\_augment module
=================================================

.. automodule:: paddlespeech.audio.transform.spec_augment
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.transform.spectrogram.rst
================================================
paddlespeech.audio.transform.spectrogram module
===============================================

.. automodule:: paddlespeech.audio.transform.spectrogram
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.transform.transform_interface.rst
================================================
paddlespeech.audio.transform.transform\_interface module
========================================================

.. automodule:: paddlespeech.audio.transform.transform_interface
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.transform.transformation.rst
================================================
paddlespeech.audio.transform.transformation module
==================================================

.. automodule:: paddlespeech.audio.transform.transformation
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.transform.wpe.rst
================================================
paddlespeech.audio.transform.wpe module
=======================================

.. automodule:: paddlespeech.audio.transform.wpe
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.utils.check_kwargs.rst
================================================
paddlespeech.audio.utils.check\_kwargs module
=============================================

.. automodule:: paddlespeech.audio.utils.check_kwargs
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.utils.download.rst
================================================
paddlespeech.audio.utils.download module
========================================

.. automodule:: paddlespeech.audio.utils.download
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.utils.dynamic_import.rst
================================================
paddlespeech.audio.utils.dynamic\_import module
===============================================

.. automodule:: paddlespeech.audio.utils.dynamic_import
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.utils.error.rst
================================================
paddlespeech.audio.utils.error module
=====================================

.. automodule:: paddlespeech.audio.utils.error
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.utils.log.rst
================================================
paddlespeech.audio.utils.log module
===================================

.. automodule:: paddlespeech.audio.utils.log
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.utils.numeric.rst
================================================
paddlespeech.audio.utils.numeric module
=======================================

.. automodule:: paddlespeech.audio.utils.numeric
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.utils.rst
================================================
paddlespeech.audio.utils package
================================

.. automodule:: paddlespeech.audio.utils
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.audio.utils.check_kwargs
   paddlespeech.audio.utils.download
   paddlespeech.audio.utils.dynamic_import
   paddlespeech.audio.utils.error
   paddlespeech.audio.utils.log
   paddlespeech.audio.utils.numeric
   paddlespeech.audio.utils.sox_utils
   paddlespeech.audio.utils.tensor_utils
   paddlespeech.audio.utils.time


================================================
FILE: docs/source/api/paddlespeech.audio.utils.tensor_utils.rst
================================================
paddlespeech.audio.utils.tensor\_utils module
=============================================

.. automodule:: paddlespeech.audio.utils.tensor_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.audio.utils.time.rst
================================================
paddlespeech.audio.utils.time module
====================================

.. automodule:: paddlespeech.audio.utils.time
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.asr.infer.rst
================================================
paddlespeech.cli.asr.infer module
=================================

.. automodule:: paddlespeech.cli.asr.infer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.asr.rst
================================================
paddlespeech.cli.asr package
============================

.. automodule:: paddlespeech.cli.asr
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cli.asr.infer


================================================
FILE: docs/source/api/paddlespeech.cli.base_commands.rst
================================================
paddlespeech.cli.base\_commands module
======================================

.. automodule:: paddlespeech.cli.base_commands
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.cls.infer.rst
================================================
paddlespeech.cli.cls.infer module
=================================

.. automodule:: paddlespeech.cli.cls.infer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.cls.rst
================================================
paddlespeech.cli.cls package
============================

.. automodule:: paddlespeech.cli.cls
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cli.cls.infer


================================================
FILE: docs/source/api/paddlespeech.cli.download.rst
================================================
paddlespeech.cli.download module
================================

.. automodule:: paddlespeech.cli.download
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.entry.rst
================================================
paddlespeech.cli.entry module
=============================

.. automodule:: paddlespeech.cli.entry
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.executor.rst
================================================
paddlespeech.cli.executor module
================================

.. automodule:: paddlespeech.cli.executor
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.kws.infer.rst
================================================
paddlespeech.cli.kws.infer module
=================================

.. automodule:: paddlespeech.cli.kws.infer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.kws.rst
================================================
paddlespeech.cli.kws package
============================

.. automodule:: paddlespeech.cli.kws
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cli.kws.infer


================================================
FILE: docs/source/api/paddlespeech.cli.log.rst
================================================
paddlespeech.cli.log module
===========================

.. automodule:: paddlespeech.cli.log
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.rst
================================================
paddlespeech.cli package
========================

.. automodule:: paddlespeech.cli
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cli.asr
   paddlespeech.cli.cls
   paddlespeech.cli.kws
   paddlespeech.cli.st
   paddlespeech.cli.text
   paddlespeech.cli.tts
   paddlespeech.cli.vector

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cli.base_commands
   paddlespeech.cli.download
   paddlespeech.cli.entry
   paddlespeech.cli.executor
   paddlespeech.cli.log
   paddlespeech.cli.utils


================================================
FILE: docs/source/api/paddlespeech.cli.st.infer.rst
================================================
paddlespeech.cli.st.infer module
================================

.. automodule:: paddlespeech.cli.st.infer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.st.rst
================================================
paddlespeech.cli.st package
===========================

.. automodule:: paddlespeech.cli.st
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cli.st.infer


================================================
FILE: docs/source/api/paddlespeech.cli.text.infer.rst
================================================
paddlespeech.cli.text.infer module
==================================

.. automodule:: paddlespeech.cli.text.infer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.text.rst
================================================
paddlespeech.cli.text package
=============================

.. automodule:: paddlespeech.cli.text
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cli.text.infer


================================================
FILE: docs/source/api/paddlespeech.cli.tts.infer.rst
================================================
paddlespeech.cli.tts.infer module
=================================

.. automodule:: paddlespeech.cli.tts.infer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.tts.rst
================================================
paddlespeech.cli.tts package
============================

.. automodule:: paddlespeech.cli.tts
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cli.tts.infer


================================================
FILE: docs/source/api/paddlespeech.cli.utils.rst
================================================
paddlespeech.cli.utils module
=============================

.. automodule:: paddlespeech.cli.utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.vector.infer.rst
================================================
paddlespeech.cli.vector.infer module
====================================

.. automodule:: paddlespeech.cli.vector.infer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cli.vector.rst
================================================
paddlespeech.cli.vector package
===============================

.. automodule:: paddlespeech.cli.vector
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cli.vector.infer


================================================
FILE: docs/source/api/paddlespeech.cls.exps.panns.deploy.rst
================================================
paddlespeech.cls.exps.panns.deploy package
==========================================

.. automodule:: paddlespeech.cls.exps.panns.deploy
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4


================================================
FILE: docs/source/api/paddlespeech.cls.exps.panns.rst
================================================
paddlespeech.cls.exps.panns package
===================================

.. automodule:: paddlespeech.cls.exps.panns
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cls.exps.panns.deploy

Submodules
----------

.. toctree::
   :maxdepth: 4


================================================
FILE: docs/source/api/paddlespeech.cls.exps.rst
================================================
paddlespeech.cls.exps package
=============================

.. automodule:: paddlespeech.cls.exps
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cls.exps.panns


================================================
FILE: docs/source/api/paddlespeech.cls.models.panns.classifier.rst
================================================
paddlespeech.cls.models.panns.classifier module
===============================================

.. automodule:: paddlespeech.cls.models.panns.classifier
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cls.models.panns.panns.rst
================================================
paddlespeech.cls.models.panns.panns module
==========================================

.. automodule:: paddlespeech.cls.models.panns.panns
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.cls.models.panns.rst
================================================
paddlespeech.cls.models.panns package
=====================================

.. automodule:: paddlespeech.cls.models.panns
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cls.models.panns.classifier
   paddlespeech.cls.models.panns.panns


================================================
FILE: docs/source/api/paddlespeech.cls.models.rst
================================================
paddlespeech.cls.models package
===============================

.. automodule:: paddlespeech.cls.models
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cls.models.panns


================================================
FILE: docs/source/api/paddlespeech.cls.rst
================================================
paddlespeech.cls package
========================

.. automodule:: paddlespeech.cls
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.cls.exps
   paddlespeech.cls.models


================================================
FILE: docs/source/api/paddlespeech.kws.exps.mdtc.collate.rst
================================================
paddlespeech.kws.exps.mdtc.collate module
=========================================

.. automodule:: paddlespeech.kws.exps.mdtc.collate
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.kws.exps.mdtc.compute_det.rst
================================================
paddlespeech.kws.exps.mdtc.compute\_det module
==============================================

.. automodule:: paddlespeech.kws.exps.mdtc.compute_det
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.kws.exps.mdtc.plot_det_curve.rst
================================================
paddlespeech.kws.exps.mdtc.plot\_det\_curve module
==================================================

.. automodule:: paddlespeech.kws.exps.mdtc.plot_det_curve
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.kws.exps.mdtc.rst
================================================
paddlespeech.kws.exps.mdtc package
==================================

.. automodule:: paddlespeech.kws.exps.mdtc
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.kws.exps.mdtc.collate
   paddlespeech.kws.exps.mdtc.compute_det
   paddlespeech.kws.exps.mdtc.score
   paddlespeech.kws.exps.mdtc.train


================================================
FILE: docs/source/api/paddlespeech.kws.exps.mdtc.score.rst
================================================
paddlespeech.kws.exps.mdtc.score module
=======================================

.. automodule:: paddlespeech.kws.exps.mdtc.score
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.kws.exps.mdtc.train.rst
================================================
paddlespeech.kws.exps.mdtc.train module
=======================================

.. automodule:: paddlespeech.kws.exps.mdtc.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.kws.exps.rst
================================================
paddlespeech.kws.exps package
=============================

.. automodule:: paddlespeech.kws.exps
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.kws.exps.mdtc


================================================
FILE: docs/source/api/paddlespeech.kws.models.loss.rst
================================================
paddlespeech.kws.models.loss module
===================================

.. automodule:: paddlespeech.kws.models.loss
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.kws.models.mdtc.rst
================================================
paddlespeech.kws.models.mdtc module
===================================

.. automodule:: paddlespeech.kws.models.mdtc
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.kws.models.rst
================================================
paddlespeech.kws.models package
===============================

.. automodule:: paddlespeech.kws.models
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.kws.models.loss
   paddlespeech.kws.models.mdtc


================================================
FILE: docs/source/api/paddlespeech.kws.rst
================================================
paddlespeech.kws package
========================

.. automodule:: paddlespeech.kws
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.kws.exps
   paddlespeech.kws.models


================================================
FILE: docs/source/api/paddlespeech.resource.model_alias.rst
================================================
paddlespeech.resource.model\_alias module
=========================================

.. automodule:: paddlespeech.resource.model_alias
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.resource.pretrained_models.rst
================================================
paddlespeech.resource.pretrained\_models module
===============================================

.. automodule:: paddlespeech.resource.pretrained_models
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.resource.resource.rst
================================================
paddlespeech.resource.resource module
=====================================

.. automodule:: paddlespeech.resource.resource
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.resource.rst
================================================
paddlespeech.resource package
=============================

.. automodule:: paddlespeech.resource
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.resource.model_alias
   paddlespeech.resource.pretrained_models
   paddlespeech.resource.resource


================================================
FILE: docs/source/api/paddlespeech.rst
================================================
paddlespeech package
====================

.. automodule:: paddlespeech
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.audio
   paddlespeech.cli
   paddlespeech.cls
   paddlespeech.kws
   paddlespeech.resource
   paddlespeech.s2t
   paddlespeech.server
   paddlespeech.t2s
   paddlespeech.text
   paddlespeech.utils
   paddlespeech.vector

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.version


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.beam_search.batch_beam_search.rst
================================================
paddlespeech.s2t.decoders.beam\_search.batch\_beam\_search module
=================================================================

.. automodule:: paddlespeech.s2t.decoders.beam_search.batch_beam_search
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.beam_search.beam_search.rst
================================================
paddlespeech.s2t.decoders.beam\_search.beam\_search module
==========================================================

.. automodule:: paddlespeech.s2t.decoders.beam_search.beam_search
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.beam_search.rst
================================================
paddlespeech.s2t.decoders.beam\_search package
==============================================

.. automodule:: paddlespeech.s2t.decoders.beam_search
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.decoders.beam_search.batch_beam_search
   paddlespeech.s2t.decoders.beam_search.beam_search


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.ctcdecoder.decoders_deprecated.rst
================================================
paddlespeech.s2t.decoders.ctcdecoder.decoders\_deprecated module
================================================================

.. automodule:: paddlespeech.s2t.decoders.ctcdecoder.decoders_deprecated
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.ctcdecoder.rst
================================================
paddlespeech.s2t.decoders.ctcdecoder package
============================================

.. automodule:: paddlespeech.s2t.decoders.ctcdecoder
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.decoders.ctcdecoder.decoders_deprecated
   paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper.rst
================================================
paddlespeech.s2t.decoders.ctcdecoder.swig\_wrapper module
=========================================================

.. automodule:: paddlespeech.s2t.decoders.ctcdecoder.swig_wrapper
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.recog.rst
================================================
paddlespeech.s2t.decoders.recog module
======================================

.. automodule:: paddlespeech.s2t.decoders.recog
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.rst
================================================
paddlespeech.s2t.decoders package
=================================

.. automodule:: paddlespeech.s2t.decoders
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.decoders.beam_search
   paddlespeech.s2t.decoders.ctcdecoder
   paddlespeech.s2t.decoders.scorers

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.decoders.recog
   paddlespeech.s2t.decoders.utils


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.scorers.ctc.rst
================================================
paddlespeech.s2t.decoders.scorers.ctc module
============================================

.. automodule:: paddlespeech.s2t.decoders.scorers.ctc
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.scorers.ctc_prefix_score.rst
================================================
paddlespeech.s2t.decoders.scorers.ctc\_prefix\_score module
===========================================================

.. automodule:: paddlespeech.s2t.decoders.scorers.ctc_prefix_score
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.scorers.length_bonus.rst
================================================
paddlespeech.s2t.decoders.scorers.length\_bonus module
======================================================

.. automodule:: paddlespeech.s2t.decoders.scorers.length_bonus
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.scorers.rst
================================================
paddlespeech.s2t.decoders.scorers package
=========================================

.. automodule:: paddlespeech.s2t.decoders.scorers
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.decoders.scorers.ctc
   paddlespeech.s2t.decoders.scorers.ctc_prefix_score
   paddlespeech.s2t.decoders.scorers.length_bonus
   paddlespeech.s2t.decoders.scorers.scorer_interface


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.scorers.scorer_interface.rst
================================================
paddlespeech.s2t.decoders.scorers.scorer\_interface module
==========================================================

.. automodule:: paddlespeech.s2t.decoders.scorers.scorer_interface
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.decoders.utils.rst
================================================
paddlespeech.s2t.decoders.utils module
======================================

.. automodule:: paddlespeech.s2t.decoders.utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.rst
================================================
paddlespeech.s2t.exps.deepspeech2.bin.deploy package
====================================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.deploy
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.deepspeech2.bin.deploy.runtime
   paddlespeech.s2t.exps.deepspeech2.bin.deploy.server


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.runtime.rst
================================================
paddlespeech.s2t.exps.deepspeech2.bin.deploy.runtime module
===========================================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.deploy.runtime
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.deploy.server.rst
================================================
paddlespeech.s2t.exps.deepspeech2.bin.deploy.server module
==========================================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.deploy.server
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.export.rst
================================================
paddlespeech.s2t.exps.deepspeech2.bin.export module
===================================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.export
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.rst
================================================
paddlespeech.s2t.exps.deepspeech2.bin package
=============================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.deepspeech2.bin.deploy

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.deepspeech2.bin.export
   paddlespeech.s2t.exps.deepspeech2.bin.test
   paddlespeech.s2t.exps.deepspeech2.bin.test_export
   paddlespeech.s2t.exps.deepspeech2.bin.test_wav
   paddlespeech.s2t.exps.deepspeech2.bin.train


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.test.rst
================================================
paddlespeech.s2t.exps.deepspeech2.bin.test module
=================================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.test
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.test_export.rst
================================================
paddlespeech.s2t.exps.deepspeech2.bin.test\_export module
=========================================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.test_export
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.test_wav.rst
================================================
paddlespeech.s2t.exps.deepspeech2.bin.test\_wav module
======================================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.test_wav
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.bin.train.rst
================================================
paddlespeech.s2t.exps.deepspeech2.bin.train module
==================================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2.bin.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.model.rst
================================================
paddlespeech.s2t.exps.deepspeech2.model module
==============================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2.model
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.deepspeech2.rst
================================================
paddlespeech.s2t.exps.deepspeech2 package
=========================================

.. automodule:: paddlespeech.s2t.exps.deepspeech2
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.deepspeech2.bin

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.deepspeech2.model


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.rst
================================================
paddlespeech.s2t.exps package
=============================

.. automodule:: paddlespeech.s2t.exps
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.deepspeech2
   paddlespeech.s2t.exps.u2
   paddlespeech.s2t.exps.u2_kaldi
   paddlespeech.s2t.exps.u2_st


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2.bin.alignment.rst
================================================
paddlespeech.s2t.exps.u2.bin.alignment module
=============================================

.. automodule:: paddlespeech.s2t.exps.u2.bin.alignment
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2.bin.export.rst
================================================
paddlespeech.s2t.exps.u2.bin.export module
==========================================

.. automodule:: paddlespeech.s2t.exps.u2.bin.export
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2.bin.rst
================================================
paddlespeech.s2t.exps.u2.bin package
====================================

.. automodule:: paddlespeech.s2t.exps.u2.bin
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.u2.bin.alignment
   paddlespeech.s2t.exps.u2.bin.export
   paddlespeech.s2t.exps.u2.bin.test
   paddlespeech.s2t.exps.u2.bin.test_wav
   paddlespeech.s2t.exps.u2.bin.train


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2.bin.test.rst
================================================
paddlespeech.s2t.exps.u2.bin.test module
========================================

.. automodule:: paddlespeech.s2t.exps.u2.bin.test
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2.bin.test_wav.rst
================================================
paddlespeech.s2t.exps.u2.bin.test\_wav module
=============================================

.. automodule:: paddlespeech.s2t.exps.u2.bin.test_wav
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2.bin.train.rst
================================================
paddlespeech.s2t.exps.u2.bin.train module
=========================================

.. automodule:: paddlespeech.s2t.exps.u2.bin.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2.model.rst
================================================
paddlespeech.s2t.exps.u2.model module
=====================================

.. automodule:: paddlespeech.s2t.exps.u2.model
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2.rst
================================================
paddlespeech.s2t.exps.u2 package
================================

.. automodule:: paddlespeech.s2t.exps.u2
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.u2.bin

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.u2.model


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_kaldi.bin.rst
================================================
paddlespeech.s2t.exps.u2\_kaldi.bin package
===========================================

.. automodule:: paddlespeech.s2t.exps.u2_kaldi.bin
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.u2_kaldi.bin.test
   paddlespeech.s2t.exps.u2_kaldi.bin.train


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_kaldi.bin.test.rst
================================================
paddlespeech.s2t.exps.u2\_kaldi.bin.test module
===============================================

.. automodule:: paddlespeech.s2t.exps.u2_kaldi.bin.test
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_kaldi.bin.train.rst
================================================
paddlespeech.s2t.exps.u2\_kaldi.bin.train module
================================================

.. automodule:: paddlespeech.s2t.exps.u2_kaldi.bin.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_kaldi.model.rst
================================================
paddlespeech.s2t.exps.u2\_kaldi.model module
============================================

.. automodule:: paddlespeech.s2t.exps.u2_kaldi.model
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_kaldi.rst
================================================
paddlespeech.s2t.exps.u2\_kaldi package
=======================================

.. automodule:: paddlespeech.s2t.exps.u2_kaldi
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.u2_kaldi.bin

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.u2_kaldi.model


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_st.bin.export.rst
================================================
paddlespeech.s2t.exps.u2\_st.bin.export module
==============================================

.. automodule:: paddlespeech.s2t.exps.u2_st.bin.export
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_st.bin.rst
================================================
paddlespeech.s2t.exps.u2\_st.bin package
========================================

.. automodule:: paddlespeech.s2t.exps.u2_st.bin
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.u2_st.bin.export
   paddlespeech.s2t.exps.u2_st.bin.test
   paddlespeech.s2t.exps.u2_st.bin.train


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_st.bin.test.rst
================================================
paddlespeech.s2t.exps.u2\_st.bin.test module
============================================

.. automodule:: paddlespeech.s2t.exps.u2_st.bin.test
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_st.bin.train.rst
================================================
paddlespeech.s2t.exps.u2\_st.bin.train module
=============================================

.. automodule:: paddlespeech.s2t.exps.u2_st.bin.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_st.model.rst
================================================
paddlespeech.s2t.exps.u2\_st.model module
=========================================

.. automodule:: paddlespeech.s2t.exps.u2_st.model
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.exps.u2_st.rst
================================================
paddlespeech.s2t.exps.u2\_st package
====================================

.. automodule:: paddlespeech.s2t.exps.u2_st
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.u2_st.bin

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.exps.u2_st.model


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.audio.rst
================================================
paddlespeech.s2t.frontend.audio module
======================================

.. automodule:: paddlespeech.s2t.frontend.audio
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.augmentation.rst
================================================
paddlespeech.s2t.frontend.augmentor.augmentation module
=======================================================

.. automodule:: paddlespeech.s2t.frontend.augmentor.augmentation
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.base.rst
================================================
paddlespeech.s2t.frontend.augmentor.base module
===============================================

.. automodule:: paddlespeech.s2t.frontend.augmentor.base
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.impulse_response.rst
================================================
paddlespeech.s2t.frontend.augmentor.impulse\_response module
============================================================

.. automodule:: paddlespeech.s2t.frontend.augmentor.impulse_response
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.noise_perturb.rst
================================================
paddlespeech.s2t.frontend.augmentor.noise\_perturb module
=========================================================

.. automodule:: paddlespeech.s2t.frontend.augmentor.noise_perturb
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.online_bayesian_normalization.rst
================================================
paddlespeech.s2t.frontend.augmentor.online\_bayesian\_normalization module
==========================================================================

.. automodule:: paddlespeech.s2t.frontend.augmentor.online_bayesian_normalization
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.resample.rst
================================================
paddlespeech.s2t.frontend.augmentor.resample module
===================================================

.. automodule:: paddlespeech.s2t.frontend.augmentor.resample
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.rst
================================================
paddlespeech.s2t.frontend.augmentor package
===========================================

.. automodule:: paddlespeech.s2t.frontend.augmentor
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.frontend.augmentor.augmentation
   paddlespeech.s2t.frontend.augmentor.base
   paddlespeech.s2t.frontend.augmentor.impulse_response
   paddlespeech.s2t.frontend.augmentor.noise_perturb
   paddlespeech.s2t.frontend.augmentor.online_bayesian_normalization
   paddlespeech.s2t.frontend.augmentor.resample
   paddlespeech.s2t.frontend.augmentor.shift_perturb
   paddlespeech.s2t.frontend.augmentor.spec_augment
   paddlespeech.s2t.frontend.augmentor.speed_perturb
   paddlespeech.s2t.frontend.augmentor.volume_perturb


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.shift_perturb.rst
================================================
paddlespeech.s2t.frontend.augmentor.shift\_perturb module
=========================================================

.. automodule:: paddlespeech.s2t.frontend.augmentor.shift_perturb
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.spec_augment.rst
================================================
paddlespeech.s2t.frontend.augmentor.spec\_augment module
========================================================

.. automodule:: paddlespeech.s2t.frontend.augmentor.spec_augment
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.speed_perturb.rst
================================================
paddlespeech.s2t.frontend.augmentor.speed\_perturb module
=========================================================

.. automodule:: paddlespeech.s2t.frontend.augmentor.speed_perturb
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.augmentor.volume_perturb.rst
================================================
paddlespeech.s2t.frontend.augmentor.volume\_perturb module
==========================================================

.. automodule:: paddlespeech.s2t.frontend.augmentor.volume_perturb
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.featurizer.audio_featurizer.rst
================================================
paddlespeech.s2t.frontend.featurizer.audio\_featurizer module
=============================================================

.. automodule:: paddlespeech.s2t.frontend.featurizer.audio_featurizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.featurizer.rst
================================================
paddlespeech.s2t.frontend.featurizer package
============================================

.. automodule:: paddlespeech.s2t.frontend.featurizer
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.frontend.featurizer.audio_featurizer
   paddlespeech.s2t.frontend.featurizer.speech_featurizer
   paddlespeech.s2t.frontend.featurizer.text_featurizer


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.featurizer.speech_featurizer.rst
================================================
paddlespeech.s2t.frontend.featurizer.speech\_featurizer module
==============================================================

.. automodule:: paddlespeech.s2t.frontend.featurizer.speech_featurizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.featurizer.text_featurizer.rst
================================================
paddlespeech.s2t.frontend.featurizer.text\_featurizer module
============================================================

.. automodule:: paddlespeech.s2t.frontend.featurizer.text_featurizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.normalizer.rst
================================================
paddlespeech.s2t.frontend.normalizer module
===========================================

.. automodule:: paddlespeech.s2t.frontend.normalizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.rst
================================================
paddlespeech.s2t.frontend package
=================================

.. automodule:: paddlespeech.s2t.frontend
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.frontend.augmentor
   paddlespeech.s2t.frontend.featurizer

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.frontend.audio
   paddlespeech.s2t.frontend.normalizer
   paddlespeech.s2t.frontend.speech
   paddlespeech.s2t.frontend.utility


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.speech.rst
================================================
paddlespeech.s2t.frontend.speech module
=======================================

.. automodule:: paddlespeech.s2t.frontend.speech
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.frontend.utility.rst
================================================
paddlespeech.s2t.frontend.utility module
========================================

.. automodule:: paddlespeech.s2t.frontend.utility
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.io.batchfy.rst
================================================
paddlespeech.s2t.io.batchfy module
==================================

.. automodule:: paddlespeech.s2t.io.batchfy
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.io.collator.rst
================================================
paddlespeech.s2t.io.collator module
===================================

.. automodule:: paddlespeech.s2t.io.collator
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.io.converter.rst
================================================
paddlespeech.s2t.io.converter module
====================================

.. automodule:: paddlespeech.s2t.io.converter
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.io.dataloader.rst
================================================
paddlespeech.s2t.io.dataloader module
=====================================

.. automodule:: paddlespeech.s2t.io.dataloader
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.io.dataset.rst
================================================
paddlespeech.s2t.io.dataset module
==================================

.. automodule:: paddlespeech.s2t.io.dataset
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.io.reader.rst
================================================
paddlespeech.s2t.io.reader module
=================================

.. automodule:: paddlespeech.s2t.io.reader
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.io.rst
================================================
paddlespeech.s2t.io package
===========================

.. automodule:: paddlespeech.s2t.io
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.io.batchfy
   paddlespeech.s2t.io.collator
   paddlespeech.s2t.io.converter
   paddlespeech.s2t.io.dataloader
   paddlespeech.s2t.io.dataset
   paddlespeech.s2t.io.reader
   paddlespeech.s2t.io.sampler
   paddlespeech.s2t.io.utility


================================================
FILE: docs/source/api/paddlespeech.s2t.io.sampler.rst
================================================
paddlespeech.s2t.io.sampler module
==================================

.. automodule:: paddlespeech.s2t.io.sampler
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.io.utility.rst
================================================
paddlespeech.s2t.io.utility module
==================================

.. automodule:: paddlespeech.s2t.io.utility
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.models.asr_interface.rst
================================================
paddlespeech.s2t.models.asr\_interface module
=============================================

.. automodule:: paddlespeech.s2t.models.asr_interface
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.models.ds2.conv.rst
================================================
paddlespeech.s2t.models.ds2.conv module
=======================================

.. automodule:: paddlespeech.s2t.models.ds2.conv
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.models.ds2.deepspeech2.rst
================================================
paddlespeech.s2t.models.ds2.deepspeech2 module
==============================================

.. automodule:: paddlespeech.s2t.models.ds2.deepspeech2
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.models.ds2.rst
================================================
paddlespeech.s2t.models.ds2 package
===================================

.. automodule:: paddlespeech.s2t.models.ds2
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.models.ds2.conv
   paddlespeech.s2t.models.ds2.deepspeech2


================================================
FILE: docs/source/api/paddlespeech.s2t.models.lm.dataset.rst
================================================
paddlespeech.s2t.models.lm.dataset module
=========================================

.. automodule:: paddlespeech.s2t.models.lm.dataset
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.models.lm.rst
================================================
paddlespeech.s2t.models.lm package
==================================

.. automodule:: paddlespeech.s2t.models.lm
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.models.lm.dataset
   paddlespeech.s2t.models.lm.transformer


================================================
FILE: docs/source/api/paddlespeech.s2t.models.lm.transformer.rst
================================================
paddlespeech.s2t.models.lm.transformer module
=============================================

.. automodule:: paddlespeech.s2t.models.lm.transformer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.models.lm_interface.rst
================================================
paddlespeech.s2t.models.lm\_interface module
============================================

.. automodule:: paddlespeech.s2t.models.lm_interface
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.models.rst
================================================
paddlespeech.s2t.models package
===============================

.. automodule:: paddlespeech.s2t.models
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.models.ds2
   paddlespeech.s2t.models.lm
   paddlespeech.s2t.models.u2
   paddlespeech.s2t.models.u2_st

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.models.asr_interface
   paddlespeech.s2t.models.lm_interface
   paddlespeech.s2t.models.st_interface


================================================
FILE: docs/source/api/paddlespeech.s2t.models.st_interface.rst
================================================
paddlespeech.s2t.models.st\_interface module
============================================

.. automodule:: paddlespeech.s2t.models.st_interface
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.models.u2.rst
================================================
paddlespeech.s2t.models.u2 package
==================================

.. automodule:: paddlespeech.s2t.models.u2
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.models.u2.u2
   paddlespeech.s2t.models.u2.updater


================================================
FILE: docs/source/api/paddlespeech.s2t.models.u2.u2.rst
================================================
paddlespeech.s2t.models.u2.u2 module
====================================

.. automodule:: paddlespeech.s2t.models.u2.u2
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.models.u2.updater.rst
================================================
paddlespeech.s2t.models.u2.updater module
=========================================

.. automodule:: paddlespeech.s2t.models.u2.updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.models.u2_st.rst
================================================
paddlespeech.s2t.models.u2\_st package
======================================

.. automodule:: paddlespeech.s2t.models.u2_st
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.models.u2_st.u2_st


================================================
FILE: docs/source/api/paddlespeech.s2t.models.u2_st.u2_st.rst
================================================
paddlespeech.s2t.models.u2\_st.u2\_st module
============================================

.. automodule:: paddlespeech.s2t.models.u2_st.u2_st
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.activation.rst
================================================
paddlespeech.s2t.modules.activation module
==========================================

.. automodule:: paddlespeech.s2t.modules.activation
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.align.rst
================================================
paddlespeech.s2t.modules.align module
=====================================

.. automodule:: paddlespeech.s2t.modules.align
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.attention.rst
================================================
paddlespeech.s2t.modules.attention module
=========================================

.. automodule:: paddlespeech.s2t.modules.attention
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.cmvn.rst
================================================
paddlespeech.s2t.modules.cmvn module
====================================

.. automodule:: paddlespeech.s2t.modules.cmvn
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.conformer_convolution.rst
================================================
paddlespeech.s2t.modules.conformer\_convolution module
======================================================

.. automodule:: paddlespeech.s2t.modules.conformer_convolution
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.crf.rst
================================================
paddlespeech.s2t.modules.crf module
===================================

.. automodule:: paddlespeech.s2t.modules.crf
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.ctc.rst
================================================
paddlespeech.s2t.modules.ctc module
===================================

.. automodule:: paddlespeech.s2t.modules.ctc
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.decoder.rst
================================================
paddlespeech.s2t.modules.decoder module
=======================================

.. automodule:: paddlespeech.s2t.modules.decoder
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.decoder_layer.rst
================================================
paddlespeech.s2t.modules.decoder\_layer module
==============================================

.. automodule:: paddlespeech.s2t.modules.decoder_layer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.embedding.rst
================================================
paddlespeech.s2t.modules.embedding module
=========================================

.. automodule:: paddlespeech.s2t.modules.embedding
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.encoder.rst
================================================
paddlespeech.s2t.modules.encoder module
=======================================

.. automodule:: paddlespeech.s2t.modules.encoder
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.encoder_layer.rst
================================================
paddlespeech.s2t.modules.encoder\_layer module
==============================================

.. automodule:: paddlespeech.s2t.modules.encoder_layer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.initializer.rst
================================================
paddlespeech.s2t.modules.initializer module
===========================================

.. automodule:: paddlespeech.s2t.modules.initializer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.loss.rst
================================================
paddlespeech.s2t.modules.loss module
====================================

.. automodule:: paddlespeech.s2t.modules.loss
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.mask.rst
================================================
paddlespeech.s2t.modules.mask module
====================================

.. automodule:: paddlespeech.s2t.modules.mask
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.positionwise_feed_forward.rst
================================================
paddlespeech.s2t.modules.positionwise\_feed\_forward module
===========================================================

.. automodule:: paddlespeech.s2t.modules.positionwise_feed_forward
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.rst
================================================
paddlespeech.s2t.modules package
================================

.. automodule:: paddlespeech.s2t.modules
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.modules.activation
   paddlespeech.s2t.modules.align
   paddlespeech.s2t.modules.attention
   paddlespeech.s2t.modules.cmvn
   paddlespeech.s2t.modules.conformer_convolution
   paddlespeech.s2t.modules.crf
   paddlespeech.s2t.modules.ctc
   paddlespeech.s2t.modules.decoder
   paddlespeech.s2t.modules.decoder_layer
   paddlespeech.s2t.modules.embedding
   paddlespeech.s2t.modules.encoder
   paddlespeech.s2t.modules.encoder_layer
   paddlespeech.s2t.modules.initializer
   paddlespeech.s2t.modules.loss
   paddlespeech.s2t.modules.mask
   paddlespeech.s2t.modules.positionwise_feed_forward
   paddlespeech.s2t.modules.subsampling


================================================
FILE: docs/source/api/paddlespeech.s2t.modules.subsampling.rst
================================================
paddlespeech.s2t.modules.subsampling module
===========================================

.. automodule:: paddlespeech.s2t.modules.subsampling
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.rst
================================================
paddlespeech.s2t package
========================

.. automodule:: paddlespeech.s2t
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.decoders
   paddlespeech.s2t.exps
   paddlespeech.s2t.frontend
   paddlespeech.s2t.io
   paddlespeech.s2t.models
   paddlespeech.s2t.modules
   paddlespeech.s2t.training
   paddlespeech.s2t.utils


================================================
FILE: docs/source/api/paddlespeech.s2t.training.cli.rst
================================================
paddlespeech.s2t.training.cli module
====================================

.. automodule:: paddlespeech.s2t.training.cli
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.extensions.evaluator.rst
================================================
paddlespeech.s2t.training.extensions.evaluator module
=====================================================

.. automodule:: paddlespeech.s2t.training.extensions.evaluator
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.extensions.extension.rst
================================================
paddlespeech.s2t.training.extensions.extension module
=====================================================

.. automodule:: paddlespeech.s2t.training.extensions.extension
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.extensions.plot.rst
================================================
paddlespeech.s2t.training.extensions.plot module
================================================

.. automodule:: paddlespeech.s2t.training.extensions.plot
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.extensions.rst
================================================
paddlespeech.s2t.training.extensions package
============================================

.. automodule:: paddlespeech.s2t.training.extensions
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.training.extensions.evaluator
   paddlespeech.s2t.training.extensions.extension
   paddlespeech.s2t.training.extensions.plot


================================================
FILE: docs/source/api/paddlespeech.s2t.training.gradclip.rst
================================================
paddlespeech.s2t.training.gradclip module
=========================================

.. automodule:: paddlespeech.s2t.training.gradclip
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.optimizer.rst
================================================
paddlespeech.s2t.training.optimizer module
==========================================

.. automodule:: paddlespeech.s2t.training.optimizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.reporter.rst
================================================
paddlespeech.s2t.training.reporter module
=========================================

.. automodule:: paddlespeech.s2t.training.reporter
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.rst
================================================
paddlespeech.s2t.training package
=================================

.. automodule:: paddlespeech.s2t.training
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.training.extensions
   paddlespeech.s2t.training.triggers
   paddlespeech.s2t.training.updaters

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.training.cli
   paddlespeech.s2t.training.gradclip
   paddlespeech.s2t.training.optimizer
   paddlespeech.s2t.training.reporter
   paddlespeech.s2t.training.scheduler
   paddlespeech.s2t.training.timer
   paddlespeech.s2t.training.trainer


================================================
FILE: docs/source/api/paddlespeech.s2t.training.scheduler.rst
================================================
paddlespeech.s2t.training.scheduler module
==========================================

.. automodule:: paddlespeech.s2t.training.scheduler
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.timer.rst
================================================
paddlespeech.s2t.training.timer module
======================================

.. automodule:: paddlespeech.s2t.training.timer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.trainer.rst
================================================
paddlespeech.s2t.training.trainer module
========================================

.. automodule:: paddlespeech.s2t.training.trainer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.triggers.compare_value_trigger.rst
================================================
paddlespeech.s2t.training.triggers.compare\_value\_trigger module
=================================================================

.. automodule:: paddlespeech.s2t.training.triggers.compare_value_trigger
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.triggers.interval_trigger.rst
================================================
paddlespeech.s2t.training.triggers.interval\_trigger module
===========================================================

.. automodule:: paddlespeech.s2t.training.triggers.interval_trigger
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.triggers.limit_trigger.rst
================================================
paddlespeech.s2t.training.triggers.limit\_trigger module
========================================================

.. automodule:: paddlespeech.s2t.training.triggers.limit_trigger
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.triggers.rst
================================================
paddlespeech.s2t.training.triggers package
==========================================

.. automodule:: paddlespeech.s2t.training.triggers
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.training.triggers.compare_value_trigger
   paddlespeech.s2t.training.triggers.interval_trigger
   paddlespeech.s2t.training.triggers.limit_trigger
   paddlespeech.s2t.training.triggers.time_trigger
   paddlespeech.s2t.training.triggers.utils


================================================
FILE: docs/source/api/paddlespeech.s2t.training.triggers.time_trigger.rst
================================================
paddlespeech.s2t.training.triggers.time\_trigger module
=======================================================

.. automodule:: paddlespeech.s2t.training.triggers.time_trigger
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.triggers.utils.rst
================================================
paddlespeech.s2t.training.triggers.utils module
===============================================

.. automodule:: paddlespeech.s2t.training.triggers.utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.updaters.rst
================================================
paddlespeech.s2t.training.updaters package
==========================================

.. automodule:: paddlespeech.s2t.training.updaters
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.training.updaters.standard_updater
   paddlespeech.s2t.training.updaters.updater


================================================
FILE: docs/source/api/paddlespeech.s2t.training.updaters.standard_updater.rst
================================================
paddlespeech.s2t.training.updaters.standard\_updater module
===========================================================

.. automodule:: paddlespeech.s2t.training.updaters.standard_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.training.updaters.updater.rst
================================================
paddlespeech.s2t.training.updaters.updater module
=================================================

.. automodule:: paddlespeech.s2t.training.updaters.updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.asr_utils.rst
================================================
paddlespeech.s2t.utils.asr\_utils module
========================================

.. automodule:: paddlespeech.s2t.utils.asr_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.bleu_score.rst
================================================
paddlespeech.s2t.utils.bleu\_score module
=========================================

.. automodule:: paddlespeech.s2t.utils.bleu_score
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.check_kwargs.rst
================================================
paddlespeech.s2t.utils.check\_kwargs module
===========================================

.. automodule:: paddlespeech.s2t.utils.check_kwargs
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.checkpoint.rst
================================================
paddlespeech.s2t.utils.checkpoint module
========================================

.. automodule:: paddlespeech.s2t.utils.checkpoint
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.cli_readers.rst
================================================
paddlespeech.s2t.utils.cli\_readers module
==========================================

.. automodule:: paddlespeech.s2t.utils.cli_readers
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.cli_utils.rst
================================================
paddlespeech.s2t.utils.cli\_utils module
========================================

.. automodule:: paddlespeech.s2t.utils.cli_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.cli_writers.rst
================================================
paddlespeech.s2t.utils.cli\_writers module
==========================================

.. automodule:: paddlespeech.s2t.utils.cli_writers
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.ctc_utils.rst
================================================
paddlespeech.s2t.utils.ctc\_utils module
========================================

.. automodule:: paddlespeech.s2t.utils.ctc_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.dynamic_import.rst
================================================
paddlespeech.s2t.utils.dynamic\_import module
=============================================

.. automodule:: paddlespeech.s2t.utils.dynamic_import
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.dynamic_pip_install.rst
================================================
paddlespeech.s2t.utils.dynamic\_pip\_install module
===================================================

.. automodule:: paddlespeech.s2t.utils.dynamic_pip_install
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.error_rate.rst
================================================
paddlespeech.s2t.utils.error\_rate module
=========================================

.. automodule:: paddlespeech.s2t.utils.error_rate
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.layer_tools.rst
================================================
paddlespeech.s2t.utils.layer\_tools module
==========================================

.. automodule:: paddlespeech.s2t.utils.layer_tools
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.log.rst
================================================
paddlespeech.s2t.utils.log module
=================================

.. automodule:: paddlespeech.s2t.utils.log
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.mp_tools.rst
================================================
paddlespeech.s2t.utils.mp\_tools module
=======================================

.. automodule:: paddlespeech.s2t.utils.mp_tools
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.profiler.rst
================================================
paddlespeech.s2t.utils.profiler module
======================================

.. automodule:: paddlespeech.s2t.utils.profiler
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.rst
================================================
paddlespeech.s2t.utils package
==============================

.. automodule:: paddlespeech.s2t.utils
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.s2t.utils.asr_utils
   paddlespeech.s2t.utils.bleu_score
   paddlespeech.s2t.utils.check_kwargs
   paddlespeech.s2t.utils.checkpoint
   paddlespeech.s2t.utils.cli_readers
   paddlespeech.s2t.utils.cli_utils
   paddlespeech.s2t.utils.cli_writers
   paddlespeech.s2t.utils.ctc_utils
   paddlespeech.s2t.utils.dynamic_import
   paddlespeech.s2t.utils.dynamic_pip_install
   paddlespeech.s2t.utils.error_rate
   paddlespeech.s2t.utils.layer_tools
   paddlespeech.s2t.utils.log
   paddlespeech.s2t.utils.mp_tools
   paddlespeech.s2t.utils.profiler
   paddlespeech.s2t.utils.socket_server
   paddlespeech.s2t.utils.spec_augment
   paddlespeech.s2t.utils.tensor_utils
   paddlespeech.s2t.utils.text_grid
   paddlespeech.s2t.utils.utility


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.socket_server.rst
================================================
paddlespeech.s2t.utils.socket\_server module
============================================

.. automodule:: paddlespeech.s2t.utils.socket_server
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.spec_augment.rst
================================================
paddlespeech.s2t.utils.spec\_augment module
===========================================

.. automodule:: paddlespeech.s2t.utils.spec_augment
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.tensor_utils.rst
================================================
paddlespeech.s2t.utils.tensor\_utils module
===========================================

.. automodule:: paddlespeech.s2t.utils.tensor_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.text_grid.rst
================================================
paddlespeech.s2t.utils.text\_grid module
========================================

.. automodule:: paddlespeech.s2t.utils.text_grid
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.s2t.utils.utility.rst
================================================
paddlespeech.s2t.utils.utility module
=====================================

.. automodule:: paddlespeech.s2t.utils.utility
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.base_commands.rst
================================================
paddlespeech.server.base\_commands module
=========================================

.. automodule:: paddlespeech.server.base_commands
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.bin.paddlespeech_client.rst
================================================
paddlespeech.server.bin.paddlespeech\_client module
===================================================

.. automodule:: paddlespeech.server.bin.paddlespeech_client
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.bin.paddlespeech_server.rst
================================================
paddlespeech.server.bin.paddlespeech\_server module
===================================================

.. automodule:: paddlespeech.server.bin.paddlespeech_server
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.bin.rst
================================================
paddlespeech.server.bin package
===============================

.. automodule:: paddlespeech.server.bin
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.bin.paddlespeech_client
   paddlespeech.server.bin.paddlespeech_server


================================================
FILE: docs/source/api/paddlespeech.server.engine.acs.python.rst
================================================
paddlespeech.server.engine.acs.python package
=============================================

.. automodule:: paddlespeech.server.engine.acs.python
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4


================================================
FILE: docs/source/api/paddlespeech.server.engine.acs.rst
================================================
paddlespeech.server.engine.acs package
======================================

.. automodule:: paddlespeech.server.engine.acs
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.acs.python


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.online.ctc_endpoint.rst
================================================
paddlespeech.server.engine.asr.online.ctc\_endpoint module
==========================================================

.. automodule:: paddlespeech.server.engine.asr.online.ctc_endpoint
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.online.ctc_search.rst
================================================
paddlespeech.server.engine.asr.online.ctc\_search module
========================================================

.. automodule:: paddlespeech.server.engine.asr.online.ctc_search
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.online.onnx.asr_engine.rst
================================================
paddlespeech.server.engine.asr.online.onnx.asr\_engine module
=============================================================

.. automodule:: paddlespeech.server.engine.asr.online.onnx.asr_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.online.onnx.rst
================================================
paddlespeech.server.engine.asr.online.onnx package
==================================================

.. automodule:: paddlespeech.server.engine.asr.online.onnx
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.asr.online.onnx.asr_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.online.paddleinference.asr_engine.rst
================================================
paddlespeech.server.engine.asr.online.paddleinference.asr\_engine module
========================================================================

.. automodule:: paddlespeech.server.engine.asr.online.paddleinference.asr_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.online.paddleinference.rst
================================================
paddlespeech.server.engine.asr.online.paddleinference package
=============================================================

.. automodule:: paddlespeech.server.engine.asr.online.paddleinference
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.asr.online.paddleinference.asr_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.online.python.asr_engine.rst
================================================
paddlespeech.server.engine.asr.online.python.asr\_engine module
===============================================================

.. automodule:: paddlespeech.server.engine.asr.online.python.asr_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.online.python.rst
================================================
paddlespeech.server.engine.asr.online.python package
====================================================

.. automodule:: paddlespeech.server.engine.asr.online.python
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.asr.online.python.asr_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.online.rst
================================================
paddlespeech.server.engine.asr.online package
=============================================

.. automodule:: paddlespeech.server.engine.asr.online
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.asr.online.onnx
   paddlespeech.server.engine.asr.online.paddleinference
   paddlespeech.server.engine.asr.online.python

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.asr.online.ctc_endpoint
   paddlespeech.server.engine.asr.online.ctc_search


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.paddleinference.asr_engine.rst
================================================
paddlespeech.server.engine.asr.paddleinference.asr\_engine module
=================================================================

.. automodule:: paddlespeech.server.engine.asr.paddleinference.asr_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.paddleinference.rst
================================================
paddlespeech.server.engine.asr.paddleinference package
======================================================

.. automodule:: paddlespeech.server.engine.asr.paddleinference
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.asr.paddleinference.asr_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.python.asr_engine.rst
================================================
paddlespeech.server.engine.asr.python.asr\_engine module
========================================================

.. automodule:: paddlespeech.server.engine.asr.python.asr_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.python.rst
================================================
paddlespeech.server.engine.asr.python package
=============================================

.. automodule:: paddlespeech.server.engine.asr.python
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.asr.python.asr_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.asr.rst
================================================
paddlespeech.server.engine.asr package
======================================

.. automodule:: paddlespeech.server.engine.asr
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.asr.online
   paddlespeech.server.engine.asr.paddleinference
   paddlespeech.server.engine.asr.python


================================================
FILE: docs/source/api/paddlespeech.server.engine.base_engine.rst
================================================
paddlespeech.server.engine.base\_engine module
==============================================

.. automodule:: paddlespeech.server.engine.base_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.cls.paddleinference.cls_engine.rst
================================================
paddlespeech.server.engine.cls.paddleinference.cls\_engine module
=================================================================

.. automodule:: paddlespeech.server.engine.cls.paddleinference.cls_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.cls.paddleinference.rst
================================================
paddlespeech.server.engine.cls.paddleinference package
======================================================

.. automodule:: paddlespeech.server.engine.cls.paddleinference
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.cls.paddleinference.cls_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.cls.python.cls_engine.rst
================================================
paddlespeech.server.engine.cls.python.cls\_engine module
========================================================

.. automodule:: paddlespeech.server.engine.cls.python.cls_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.cls.python.rst
================================================
paddlespeech.server.engine.cls.python package
=============================================

.. automodule:: paddlespeech.server.engine.cls.python
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.cls.python.cls_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.cls.rst
================================================
paddlespeech.server.engine.cls package
======================================

.. automodule:: paddlespeech.server.engine.cls
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.cls.paddleinference
   paddlespeech.server.engine.cls.python


================================================
FILE: docs/source/api/paddlespeech.server.engine.engine_factory.rst
================================================
paddlespeech.server.engine.engine\_factory module
=================================================

.. automodule:: paddlespeech.server.engine.engine_factory
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.engine_pool.rst
================================================
paddlespeech.server.engine.engine\_pool module
==============================================

.. automodule:: paddlespeech.server.engine.engine_pool
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.engine_warmup.rst
================================================
paddlespeech.server.engine.engine\_warmup module
================================================

.. automodule:: paddlespeech.server.engine.engine_warmup
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.rst
================================================
paddlespeech.server.engine package
==================================

.. automodule:: paddlespeech.server.engine
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.acs
   paddlespeech.server.engine.asr
   paddlespeech.server.engine.cls
   paddlespeech.server.engine.text
   paddlespeech.server.engine.tts
   paddlespeech.server.engine.vector

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.base_engine
   paddlespeech.server.engine.engine_factory
   paddlespeech.server.engine.engine_pool
   paddlespeech.server.engine.engine_warmup


================================================
FILE: docs/source/api/paddlespeech.server.engine.text.python.rst
================================================
paddlespeech.server.engine.text.python package
==============================================

.. automodule:: paddlespeech.server.engine.text.python
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.text.python.text_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.text.python.text_engine.rst
================================================
paddlespeech.server.engine.text.python.text\_engine module
==========================================================

.. automodule:: paddlespeech.server.engine.text.python.text_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.text.rst
================================================
paddlespeech.server.engine.text package
=======================================

.. automodule:: paddlespeech.server.engine.text
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.text.python


================================================
FILE: docs/source/api/paddlespeech.server.engine.tts.online.onnx.rst
================================================
paddlespeech.server.engine.tts.online.onnx package
==================================================

.. automodule:: paddlespeech.server.engine.tts.online.onnx
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.tts.online.onnx.tts_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.tts.online.onnx.tts_engine.rst
================================================
paddlespeech.server.engine.tts.online.onnx.tts\_engine module
=============================================================

.. automodule:: paddlespeech.server.engine.tts.online.onnx.tts_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.tts.online.python.rst
================================================
paddlespeech.server.engine.tts.online.python package
====================================================

.. automodule:: paddlespeech.server.engine.tts.online.python
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.tts.online.python.tts_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.tts.online.python.tts_engine.rst
================================================
paddlespeech.server.engine.tts.online.python.tts\_engine module
===============================================================

.. automodule:: paddlespeech.server.engine.tts.online.python.tts_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.tts.online.rst
================================================
paddlespeech.server.engine.tts.online package
=============================================

.. automodule:: paddlespeech.server.engine.tts.online
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.tts.online.onnx
   paddlespeech.server.engine.tts.online.python


================================================
FILE: docs/source/api/paddlespeech.server.engine.tts.paddleinference.rst
================================================
paddlespeech.server.engine.tts.paddleinference package
======================================================

.. automodule:: paddlespeech.server.engine.tts.paddleinference
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.tts.paddleinference.tts_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.tts.paddleinference.tts_engine.rst
================================================
paddlespeech.server.engine.tts.paddleinference.tts\_engine module
=================================================================

.. automodule:: paddlespeech.server.engine.tts.paddleinference.tts_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.tts.python.rst
================================================
paddlespeech.server.engine.tts.python package
=============================================

.. automodule:: paddlespeech.server.engine.tts.python
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.tts.python.tts_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.tts.python.tts_engine.rst
================================================
paddlespeech.server.engine.tts.python.tts\_engine module
========================================================

.. automodule:: paddlespeech.server.engine.tts.python.tts_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.tts.rst
================================================
paddlespeech.server.engine.tts package
======================================

.. automodule:: paddlespeech.server.engine.tts
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.tts.online
   paddlespeech.server.engine.tts.paddleinference
   paddlespeech.server.engine.tts.python


================================================
FILE: docs/source/api/paddlespeech.server.engine.vector.python.rst
================================================
paddlespeech.server.engine.vector.python package
================================================

.. automodule:: paddlespeech.server.engine.vector.python
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.vector.python.vector_engine


================================================
FILE: docs/source/api/paddlespeech.server.engine.vector.python.vector_engine.rst
================================================
paddlespeech.server.engine.vector.python.vector\_engine module
==============================================================

.. automodule:: paddlespeech.server.engine.vector.python.vector_engine
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.engine.vector.rst
================================================
paddlespeech.server.engine.vector package
=========================================

.. automodule:: paddlespeech.server.engine.vector
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.engine.vector.python


================================================
FILE: docs/source/api/paddlespeech.server.entry.rst
================================================
paddlespeech.server.entry module
================================

.. automodule:: paddlespeech.server.entry
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.executor.rst
================================================
paddlespeech.server.executor module
===================================

.. automodule:: paddlespeech.server.executor
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.restful.acs_api.rst
================================================
paddlespeech.server.restful.acs\_api module
===========================================

.. automodule:: paddlespeech.server.restful.acs_api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.restful.api.rst
================================================
paddlespeech.server.restful.api module
======================================

.. automodule:: paddlespeech.server.restful.api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.restful.asr_api.rst
================================================
paddlespeech.server.restful.asr\_api module
===========================================

.. automodule:: paddlespeech.server.restful.asr_api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.restful.cls_api.rst
================================================
paddlespeech.server.restful.cls\_api module
===========================================

.. automodule:: paddlespeech.server.restful.cls_api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.restful.request.rst
================================================
paddlespeech.server.restful.request module
==========================================

.. automodule:: paddlespeech.server.restful.request
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.restful.response.rst
================================================
paddlespeech.server.restful.response module
===========================================

.. automodule:: paddlespeech.server.restful.response
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.restful.rst
================================================
paddlespeech.server.restful package
===================================

.. automodule:: paddlespeech.server.restful
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.restful.acs_api
   paddlespeech.server.restful.api
   paddlespeech.server.restful.asr_api
   paddlespeech.server.restful.cls_api
   paddlespeech.server.restful.request
   paddlespeech.server.restful.response
   paddlespeech.server.restful.text_api
   paddlespeech.server.restful.tts_api
   paddlespeech.server.restful.vector_api


================================================
FILE: docs/source/api/paddlespeech.server.restful.text_api.rst
================================================
paddlespeech.server.restful.text\_api module
============================================

.. automodule:: paddlespeech.server.restful.text_api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.restful.tts_api.rst
================================================
paddlespeech.server.restful.tts\_api module
===========================================

.. automodule:: paddlespeech.server.restful.tts_api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.restful.vector_api.rst
================================================
paddlespeech.server.restful.vector\_api module
==============================================

.. automodule:: paddlespeech.server.restful.vector_api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.rst
================================================
paddlespeech.server package
===========================

.. automodule:: paddlespeech.server
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.bin
   paddlespeech.server.engine
   paddlespeech.server.restful
   paddlespeech.server.tests
   paddlespeech.server.utils
   paddlespeech.server.ws

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.base_commands
   paddlespeech.server.entry
   paddlespeech.server.executor
   paddlespeech.server.util


================================================
FILE: docs/source/api/paddlespeech.server.tests.asr.offline.http_client.rst
================================================
paddlespeech.server.tests.asr.offline.http\_client module
=========================================================

.. automodule:: paddlespeech.server.tests.asr.offline.http_client
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.tests.asr.offline.rst
================================================
paddlespeech.server.tests.asr.offline package
=============================================

.. automodule:: paddlespeech.server.tests.asr.offline
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.tests.asr.offline.http_client


================================================
FILE: docs/source/api/paddlespeech.server.tests.asr.rst
================================================
paddlespeech.server.tests.asr package
=====================================

.. automodule:: paddlespeech.server.tests.asr
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.tests.asr.offline


================================================
FILE: docs/source/api/paddlespeech.server.tests.rst
================================================
paddlespeech.server.tests package
=================================

.. automodule:: paddlespeech.server.tests
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.tests.asr


================================================
FILE: docs/source/api/paddlespeech.server.util.rst
================================================
paddlespeech.server.util module
===============================

.. automodule:: paddlespeech.server.util
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.utils.audio_handler.rst
================================================
paddlespeech.server.utils.audio\_handler module
===============================================

.. automodule:: paddlespeech.server.utils.audio_handler
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.utils.audio_process.rst
================================================
paddlespeech.server.utils.audio\_process module
===============================================

.. automodule:: paddlespeech.server.utils.audio_process
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.utils.buffer.rst
================================================
paddlespeech.server.utils.buffer module
=======================================

.. automodule:: paddlespeech.server.utils.buffer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.utils.config.rst
================================================
paddlespeech.server.utils.config module
=======================================

.. automodule:: paddlespeech.server.utils.config
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.utils.errors.rst
================================================
paddlespeech.server.utils.errors module
=======================================

.. automodule:: paddlespeech.server.utils.errors
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.utils.exception.rst
================================================
paddlespeech.server.utils.exception module
==========================================

.. automodule:: paddlespeech.server.utils.exception
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.utils.onnx_infer.rst
================================================
paddlespeech.server.utils.onnx\_infer module
============================================

.. automodule:: paddlespeech.server.utils.onnx_infer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.utils.paddle_predictor.rst
================================================
paddlespeech.server.utils.paddle\_predictor module
==================================================

.. automodule:: paddlespeech.server.utils.paddle_predictor
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.utils.rst
================================================
paddlespeech.server.utils package
=================================

.. automodule:: paddlespeech.server.utils
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.utils.audio_handler
   paddlespeech.server.utils.audio_process
   paddlespeech.server.utils.buffer
   paddlespeech.server.utils.config
   paddlespeech.server.utils.errors
   paddlespeech.server.utils.exception
   paddlespeech.server.utils.onnx_infer
   paddlespeech.server.utils.paddle_predictor
   paddlespeech.server.utils.util
   paddlespeech.server.utils.vad


================================================
FILE: docs/source/api/paddlespeech.server.utils.util.rst
================================================
paddlespeech.server.utils.util module
=====================================

.. automodule:: paddlespeech.server.utils.util
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.utils.vad.rst
================================================
paddlespeech.server.utils.vad module
====================================

.. automodule:: paddlespeech.server.utils.vad
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.ws.api.rst
================================================
paddlespeech.server.ws.api module
=================================

.. automodule:: paddlespeech.server.ws.api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.ws.asr_api.rst
================================================
paddlespeech.server.ws.asr\_api module
======================================

.. automodule:: paddlespeech.server.ws.asr_api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.server.ws.rst
================================================
paddlespeech.server.ws package
==============================

.. automodule:: paddlespeech.server.ws
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.server.ws.api
   paddlespeech.server.ws.asr_api
   paddlespeech.server.ws.tts_api


================================================
FILE: docs/source/api/paddlespeech.server.ws.tts_api.rst
================================================
paddlespeech.server.ws.tts\_api module
======================================

.. automodule:: paddlespeech.server.ws.tts_api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.audio.audio.rst
================================================
paddlespeech.t2s.audio.audio module
===================================

.. automodule:: paddlespeech.t2s.audio.audio
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.audio.codec.rst
================================================
paddlespeech.t2s.audio.codec module
===================================

.. automodule:: paddlespeech.t2s.audio.codec
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.audio.rst
================================================
paddlespeech.t2s.audio package
==============================

.. automodule:: paddlespeech.t2s.audio
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.audio.audio
   paddlespeech.t2s.audio.codec
   paddlespeech.t2s.audio.spec_normalizer


================================================
FILE: docs/source/api/paddlespeech.t2s.audio.spec_normalizer.rst
================================================
paddlespeech.t2s.audio.spec\_normalizer module
==============================================

.. automodule:: paddlespeech.t2s.audio.spec_normalizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.datasets.am_batch_fn.rst
================================================
paddlespeech.t2s.datasets.am\_batch\_fn module
==============================================

.. automodule:: paddlespeech.t2s.datasets.am_batch_fn
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.datasets.batch.rst
================================================
paddlespeech.t2s.datasets.batch module
======================================

.. automodule:: paddlespeech.t2s.datasets.batch
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.datasets.data_table.rst
================================================
paddlespeech.t2s.datasets.data\_table module
============================================

.. automodule:: paddlespeech.t2s.datasets.data_table
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.datasets.dataset.rst
================================================
paddlespeech.t2s.datasets.dataset module
========================================

.. automodule:: paddlespeech.t2s.datasets.dataset
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.datasets.get_feats.rst
================================================
paddlespeech.t2s.datasets.get\_feats module
===========================================

.. automodule:: paddlespeech.t2s.datasets.get_feats
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.datasets.ljspeech.rst
================================================
paddlespeech.t2s.datasets.ljspeech module
=========================================

.. automodule:: paddlespeech.t2s.datasets.ljspeech
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.datasets.preprocess_utils.rst
================================================
paddlespeech.t2s.datasets.preprocess\_utils module
==================================================

.. automodule:: paddlespeech.t2s.datasets.preprocess_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.datasets.rst
================================================
paddlespeech.t2s.datasets package
=================================

.. automodule:: paddlespeech.t2s.datasets
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.datasets.am_batch_fn
   paddlespeech.t2s.datasets.batch
   paddlespeech.t2s.datasets.data_table
   paddlespeech.t2s.datasets.dataset
   paddlespeech.t2s.datasets.get_feats
   paddlespeech.t2s.datasets.ljspeech
   paddlespeech.t2s.datasets.preprocess_utils
   paddlespeech.t2s.datasets.sampler
   paddlespeech.t2s.datasets.vocoder_batch_fn


================================================
FILE: docs/source/api/paddlespeech.t2s.datasets.sampler.rst
================================================
paddlespeech.t2s.datasets.sampler module
========================================

.. automodule:: paddlespeech.t2s.datasets.sampler
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.datasets.vocoder_batch_fn.rst
================================================
paddlespeech.t2s.datasets.vocoder\_batch\_fn module
===================================================

.. automodule:: paddlespeech.t2s.datasets.vocoder_batch_fn
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ernie_sat.align.rst
================================================
paddlespeech.t2s.exps.ernie\_sat.align module
=============================================

.. automodule:: paddlespeech.t2s.exps.ernie_sat.align
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ernie_sat.normalize.rst
================================================
paddlespeech.t2s.exps.ernie\_sat.normalize module
=================================================

.. automodule:: paddlespeech.t2s.exps.ernie_sat.normalize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ernie_sat.preprocess.rst
================================================
paddlespeech.t2s.exps.ernie\_sat.preprocess module
==================================================

.. automodule:: paddlespeech.t2s.exps.ernie_sat.preprocess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ernie_sat.rst
================================================
paddlespeech.t2s.exps.ernie\_sat package
========================================

.. automodule:: paddlespeech.t2s.exps.ernie_sat
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.ernie_sat.align
   paddlespeech.t2s.exps.ernie_sat.normalize
   paddlespeech.t2s.exps.ernie_sat.preprocess
   paddlespeech.t2s.exps.ernie_sat.synthesize
   paddlespeech.t2s.exps.ernie_sat.synthesize_e2e
   paddlespeech.t2s.exps.ernie_sat.train
   paddlespeech.t2s.exps.ernie_sat.utils


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ernie_sat.synthesize.rst
================================================
paddlespeech.t2s.exps.ernie\_sat.synthesize module
==================================================

.. automodule:: paddlespeech.t2s.exps.ernie_sat.synthesize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ernie_sat.synthesize_e2e.rst
================================================
paddlespeech.t2s.exps.ernie\_sat.synthesize\_e2e module
=======================================================

.. automodule:: paddlespeech.t2s.exps.ernie_sat.synthesize_e2e
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ernie_sat.train.rst
================================================
paddlespeech.t2s.exps.ernie\_sat.train module
=============================================

.. automodule:: paddlespeech.t2s.exps.ernie_sat.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ernie_sat.utils.rst
================================================
paddlespeech.t2s.exps.ernie\_sat.utils module
=============================================

.. automodule:: paddlespeech.t2s.exps.ernie_sat.utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.fastspeech2.gen_gta_mel.rst
================================================
paddlespeech.t2s.exps.fastspeech2.gen\_gta\_mel module
======================================================

.. automodule:: paddlespeech.t2s.exps.fastspeech2.gen_gta_mel
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.fastspeech2.normalize.rst
================================================
paddlespeech.t2s.exps.fastspeech2.normalize module
==================================================

.. automodule:: paddlespeech.t2s.exps.fastspeech2.normalize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.fastspeech2.preprocess.rst
================================================
paddlespeech.t2s.exps.fastspeech2.preprocess module
===================================================

.. automodule:: paddlespeech.t2s.exps.fastspeech2.preprocess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.fastspeech2.rst
================================================
paddlespeech.t2s.exps.fastspeech2 package
=========================================

.. automodule:: paddlespeech.t2s.exps.fastspeech2
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.fastspeech2.gen_gta_mel
   paddlespeech.t2s.exps.fastspeech2.normalize
   paddlespeech.t2s.exps.fastspeech2.preprocess
   paddlespeech.t2s.exps.fastspeech2.train
   paddlespeech.t2s.exps.fastspeech2.vc2_infer


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.fastspeech2.train.rst
================================================
paddlespeech.t2s.exps.fastspeech2.train module
==============================================

.. automodule:: paddlespeech.t2s.exps.fastspeech2.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.fastspeech2.vc2_infer.rst
================================================
paddlespeech.t2s.exps.fastspeech2.vc2\_infer module
===================================================

.. automodule:: paddlespeech.t2s.exps.fastspeech2.vc2_infer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.hifigan.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.hifigan package
==================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.hifigan
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.gan_vocoder.hifigan.train


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.hifigan.train.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.hifigan.train module
=======================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.hifigan.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.multi\_band\_melgan package
==============================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan.train


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan.train.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.multi\_band\_melgan.train module
===================================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.normalize.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.normalize module
===================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.normalize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.parallelwave\_gan package
============================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.synthesize_from_wav
   paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.train


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.synthesize_from_wav.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.parallelwave\_gan.synthesize\_from\_wav module
=================================================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.synthesize_from_wav
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.train.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.parallelwave\_gan.train module
=================================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.preprocess.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.preprocess module
====================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.preprocess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder package
==========================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.gan_vocoder.hifigan
   paddlespeech.t2s.exps.gan_vocoder.multi_band_melgan
   paddlespeech.t2s.exps.gan_vocoder.parallelwave_gan
   paddlespeech.t2s.exps.gan_vocoder.style_melgan

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.gan_vocoder.normalize
   paddlespeech.t2s.exps.gan_vocoder.preprocess
   paddlespeech.t2s.exps.gan_vocoder.synthesize


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.style_melgan.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.style\_melgan package
========================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.style_melgan
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.gan_vocoder.style_melgan.train


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.style_melgan.train.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.style\_melgan.train module
=============================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.style_melgan.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.gan_vocoder.synthesize.rst
================================================
paddlespeech.t2s.exps.gan\_vocoder.synthesize module
====================================================

.. automodule:: paddlespeech.t2s.exps.gan_vocoder.synthesize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.inference.rst
================================================
paddlespeech.t2s.exps.inference module
======================================

.. automodule:: paddlespeech.t2s.exps.inference
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.inference_streaming.rst
================================================
paddlespeech.t2s.exps.inference\_streaming module
=================================================

.. automodule:: paddlespeech.t2s.exps.inference_streaming
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ort_predict.rst
================================================
paddlespeech.t2s.exps.ort\_predict module
=========================================

.. automodule:: paddlespeech.t2s.exps.ort_predict
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ort_predict_e2e.rst
================================================
paddlespeech.t2s.exps.ort\_predict\_e2e module
==============================================

.. automodule:: paddlespeech.t2s.exps.ort_predict_e2e
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.ort_predict_streaming.rst
================================================
paddlespeech.t2s.exps.ort\_predict\_streaming module
====================================================

.. automodule:: paddlespeech.t2s.exps.ort_predict_streaming
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.rst
================================================
paddlespeech.t2s.exps package
=============================

.. automodule:: paddlespeech.t2s.exps
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.ernie_sat
   paddlespeech.t2s.exps.fastspeech2
   paddlespeech.t2s.exps.gan_vocoder
   paddlespeech.t2s.exps.speedyspeech
   paddlespeech.t2s.exps.tacotron2
   paddlespeech.t2s.exps.transformer_tts
   paddlespeech.t2s.exps.vits
   paddlespeech.t2s.exps.waveflow
   paddlespeech.t2s.exps.wavernn

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.inference
   paddlespeech.t2s.exps.inference_streaming
   paddlespeech.t2s.models.vits.monotonic_align
   paddlespeech.t2s.exps.ort_predict
   paddlespeech.t2s.exps.ort_predict_e2e
   paddlespeech.t2s.exps.ort_predict_streaming
   paddlespeech.t2s.exps.stream_play_tts
   paddlespeech.t2s.exps.syn_utils
   paddlespeech.t2s.exps.synthesize
   paddlespeech.t2s.exps.synthesize_e2e
   paddlespeech.t2s.exps.synthesize_streaming
   paddlespeech.t2s.exps.voice_cloning


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.speedyspeech.gen_gta_mel.rst
================================================
paddlespeech.t2s.exps.speedyspeech.gen\_gta\_mel module
=======================================================

.. automodule:: paddlespeech.t2s.exps.speedyspeech.gen_gta_mel
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.speedyspeech.inference.rst
================================================
paddlespeech.t2s.exps.speedyspeech.inference module
===================================================

.. automodule:: paddlespeech.t2s.exps.speedyspeech.inference
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.speedyspeech.normalize.rst
================================================
paddlespeech.t2s.exps.speedyspeech.normalize module
===================================================

.. automodule:: paddlespeech.t2s.exps.speedyspeech.normalize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.speedyspeech.preprocess.rst
================================================
paddlespeech.t2s.exps.speedyspeech.preprocess module
====================================================

.. automodule:: paddlespeech.t2s.exps.speedyspeech.preprocess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.speedyspeech.rst
================================================
paddlespeech.t2s.exps.speedyspeech package
==========================================

.. automodule:: paddlespeech.t2s.exps.speedyspeech
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.speedyspeech.gen_gta_mel
   paddlespeech.t2s.exps.speedyspeech.inference
   paddlespeech.t2s.exps.speedyspeech.normalize
   paddlespeech.t2s.exps.speedyspeech.preprocess
   paddlespeech.t2s.exps.speedyspeech.synthesize_e2e
   paddlespeech.t2s.exps.speedyspeech.train


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.speedyspeech.synthesize_e2e.rst
================================================
paddlespeech.t2s.exps.speedyspeech.synthesize\_e2e module
=========================================================

.. automodule:: paddlespeech.t2s.exps.speedyspeech.synthesize_e2e
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.speedyspeech.train.rst
================================================
paddlespeech.t2s.exps.speedyspeech.train module
===============================================

.. automodule:: paddlespeech.t2s.exps.speedyspeech.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.stream_play_tts.rst
================================================
paddlespeech.t2s.exps.stream\_play\_tts module
==============================================

.. automodule:: paddlespeech.t2s.exps.stream_play_tts
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.syn_utils.rst
================================================
paddlespeech.t2s.exps.syn\_utils module
=======================================

.. automodule:: paddlespeech.t2s.exps.syn_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.synthesize.rst
================================================
paddlespeech.t2s.exps.synthesize module
=======================================

.. automodule:: paddlespeech.t2s.exps.synthesize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.synthesize_e2e.rst
================================================
paddlespeech.t2s.exps.synthesize\_e2e module
============================================

.. automodule:: paddlespeech.t2s.exps.synthesize_e2e
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.synthesize_streaming.rst
================================================
paddlespeech.t2s.exps.synthesize\_streaming module
==================================================

.. automodule:: paddlespeech.t2s.exps.synthesize_streaming
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.tacotron2.normalize.rst
================================================
paddlespeech.t2s.exps.tacotron2.normalize module
================================================

.. automodule:: paddlespeech.t2s.exps.tacotron2.normalize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.tacotron2.preprocess.rst
================================================
paddlespeech.t2s.exps.tacotron2.preprocess module
=================================================

.. automodule:: paddlespeech.t2s.exps.tacotron2.preprocess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.tacotron2.rst
================================================
paddlespeech.t2s.exps.tacotron2 package
=======================================

.. automodule:: paddlespeech.t2s.exps.tacotron2
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.tacotron2.normalize
   paddlespeech.t2s.exps.tacotron2.preprocess
   paddlespeech.t2s.exps.tacotron2.train


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.tacotron2.train.rst
================================================
paddlespeech.t2s.exps.tacotron2.train module
============================================

.. automodule:: paddlespeech.t2s.exps.tacotron2.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.transformer_tts.normalize.rst
================================================
paddlespeech.t2s.exps.transformer\_tts.normalize module
=======================================================

.. automodule:: paddlespeech.t2s.exps.transformer_tts.normalize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.transformer_tts.preprocess.rst
================================================
paddlespeech.t2s.exps.transformer\_tts.preprocess module
========================================================

.. automodule:: paddlespeech.t2s.exps.transformer_tts.preprocess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.transformer_tts.rst
================================================
paddlespeech.t2s.exps.transformer\_tts package
==============================================

.. automodule:: paddlespeech.t2s.exps.transformer_tts
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.transformer_tts.normalize
   paddlespeech.t2s.exps.transformer_tts.preprocess
   paddlespeech.t2s.exps.transformer_tts.synthesize
   paddlespeech.t2s.exps.transformer_tts.synthesize_e2e
   paddlespeech.t2s.exps.transformer_tts.train


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.transformer_tts.synthesize.rst
================================================
paddlespeech.t2s.exps.transformer\_tts.synthesize module
========================================================

.. automodule:: paddlespeech.t2s.exps.transformer_tts.synthesize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.transformer_tts.synthesize_e2e.rst
================================================
paddlespeech.t2s.exps.transformer\_tts.synthesize\_e2e module
=============================================================

.. automodule:: paddlespeech.t2s.exps.transformer_tts.synthesize_e2e
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.transformer_tts.train.rst
================================================
paddlespeech.t2s.exps.transformer\_tts.train module
===================================================

.. automodule:: paddlespeech.t2s.exps.transformer_tts.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.vits.normalize.rst
================================================
paddlespeech.t2s.exps.vits.normalize module
===========================================

.. automodule:: paddlespeech.t2s.exps.vits.normalize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.vits.preprocess.rst
================================================
paddlespeech.t2s.exps.vits.preprocess module
============================================

.. automodule:: paddlespeech.t2s.exps.vits.preprocess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.vits.rst
================================================
paddlespeech.t2s.exps.vits package
==================================

.. automodule:: paddlespeech.t2s.exps.vits
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.vits.normalize
   paddlespeech.t2s.exps.vits.preprocess
   paddlespeech.t2s.exps.vits.synthesize
   paddlespeech.t2s.exps.vits.synthesize_e2e
   paddlespeech.t2s.exps.vits.train
   paddlespeech.t2s.exps.vits.voice_cloning


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.vits.synthesize.rst
================================================
paddlespeech.t2s.exps.vits.synthesize module
============================================

.. automodule:: paddlespeech.t2s.exps.vits.synthesize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.vits.synthesize_e2e.rst
================================================
paddlespeech.t2s.exps.vits.synthesize\_e2e module
=================================================

.. automodule:: paddlespeech.t2s.exps.vits.synthesize_e2e
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.vits.train.rst
================================================
paddlespeech.t2s.exps.vits.train module
=======================================

.. automodule:: paddlespeech.t2s.exps.vits.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.vits.voice_cloning.rst
================================================
paddlespeech.t2s.exps.vits.voice\_cloning module
================================================

.. automodule:: paddlespeech.t2s.exps.vits.voice_cloning
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.voice_cloning.rst
================================================
paddlespeech.t2s.exps.voice\_cloning module
===========================================

.. automodule:: paddlespeech.t2s.exps.voice_cloning
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.waveflow.config.rst
================================================
paddlespeech.t2s.exps.waveflow.config module
============================================

.. automodule:: paddlespeech.t2s.exps.waveflow.config
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.waveflow.ljspeech.rst
================================================
paddlespeech.t2s.exps.waveflow.ljspeech module
==============================================

.. automodule:: paddlespeech.t2s.exps.waveflow.ljspeech
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.waveflow.preprocess.rst
================================================
paddlespeech.t2s.exps.waveflow.preprocess module
================================================

.. automodule:: paddlespeech.t2s.exps.waveflow.preprocess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.waveflow.rst
================================================
paddlespeech.t2s.exps.waveflow package
======================================

.. automodule:: paddlespeech.t2s.exps.waveflow
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.waveflow.config
   paddlespeech.t2s.exps.waveflow.ljspeech
   paddlespeech.t2s.exps.waveflow.preprocess
   paddlespeech.t2s.exps.waveflow.synthesize
   paddlespeech.t2s.exps.waveflow.train


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.waveflow.synthesize.rst
================================================
paddlespeech.t2s.exps.waveflow.synthesize module
================================================

.. automodule:: paddlespeech.t2s.exps.waveflow.synthesize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.waveflow.train.rst
================================================
paddlespeech.t2s.exps.waveflow.train module
===========================================

.. automodule:: paddlespeech.t2s.exps.waveflow.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.wavernn.rst
================================================
paddlespeech.t2s.exps.wavernn package
=====================================

.. automodule:: paddlespeech.t2s.exps.wavernn
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.exps.wavernn.synthesize
   paddlespeech.t2s.exps.wavernn.train


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.wavernn.synthesize.rst
================================================
paddlespeech.t2s.exps.wavernn.synthesize module
===============================================

.. automodule:: paddlespeech.t2s.exps.wavernn.synthesize
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.exps.wavernn.train.rst
================================================
paddlespeech.t2s.exps.wavernn.train module
==========================================

.. automodule:: paddlespeech.t2s.exps.wavernn.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.arpabet.rst
================================================
paddlespeech.t2s.frontend.arpabet module
========================================

.. automodule:: paddlespeech.t2s.frontend.arpabet
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.g2pw.dataset.rst
================================================
paddlespeech.t2s.frontend.g2pw.dataset module
=============================================

.. automodule:: paddlespeech.t2s.frontend.g2pw.dataset
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.g2pw.onnx_api.rst
================================================
paddlespeech.t2s.frontend.g2pw.onnx\_api module
===============================================

.. automodule:: paddlespeech.t2s.frontend.g2pw.onnx_api
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.g2pw.rst
================================================
paddlespeech.t2s.frontend.g2pw package
======================================

.. automodule:: paddlespeech.t2s.frontend.g2pw
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.frontend.g2pw.dataset
   paddlespeech.t2s.frontend.g2pw.onnx_api
   paddlespeech.t2s.frontend.g2pw.utils


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.g2pw.utils.rst
================================================
paddlespeech.t2s.frontend.g2pw.utils module
===========================================

.. automodule:: paddlespeech.t2s.frontend.g2pw.utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.generate_lexicon.rst
================================================
paddlespeech.t2s.frontend.generate\_lexicon module
==================================================

.. automodule:: paddlespeech.t2s.frontend.generate_lexicon
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.mix_frontend.rst
================================================
paddlespeech.t2s.frontend.mix\_frontend module
==============================================

.. automodule:: paddlespeech.t2s.frontend.mix_frontend
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.normalizer.abbrrviation.rst
================================================
paddlespeech.t2s.frontend.normalizer.abbrrviation module
========================================================

.. automodule:: paddlespeech.t2s.frontend.normalizer.abbrrviation
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.normalizer.acronyms.rst
================================================
paddlespeech.t2s.frontend.normalizer.acronyms module
====================================================

.. automodule:: paddlespeech.t2s.frontend.normalizer.acronyms
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.normalizer.normalizer.rst
================================================
paddlespeech.t2s.frontend.normalizer.normalizer module
======================================================

.. automodule:: paddlespeech.t2s.frontend.normalizer.normalizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.normalizer.numbers.rst
================================================
paddlespeech.t2s.frontend.normalizer.numbers module
===================================================

.. automodule:: paddlespeech.t2s.frontend.normalizer.numbers
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.normalizer.rst
================================================
paddlespeech.t2s.frontend.normalizer package
============================================

.. automodule:: paddlespeech.t2s.frontend.normalizer
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.frontend.normalizer.abbrrviation
   paddlespeech.t2s.frontend.normalizer.acronyms
   paddlespeech.t2s.frontend.normalizer.normalizer
   paddlespeech.t2s.frontend.normalizer.numbers
   paddlespeech.t2s.frontend.normalizer.width


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.normalizer.width.rst
================================================
paddlespeech.t2s.frontend.normalizer.width module
=================================================

.. automodule:: paddlespeech.t2s.frontend.normalizer.width
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.phonectic.rst
================================================
paddlespeech.t2s.frontend.phonectic module
==========================================

.. automodule:: paddlespeech.t2s.frontend.phonectic
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.punctuation.rst
================================================
paddlespeech.t2s.frontend.punctuation module
============================================

.. automodule:: paddlespeech.t2s.frontend.punctuation
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.rst
================================================
paddlespeech.t2s.frontend package
=================================

.. automodule:: paddlespeech.t2s.frontend
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.frontend.g2pw
   paddlespeech.t2s.frontend.normalizer
   paddlespeech.t2s.frontend.zh_normalization

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.frontend.arpabet
   paddlespeech.t2s.frontend.generate_lexicon
   paddlespeech.t2s.frontend.mix_frontend
   paddlespeech.t2s.frontend.phonectic
   paddlespeech.t2s.frontend.punctuation
   paddlespeech.t2s.frontend.tone_sandhi
   paddlespeech.t2s.frontend.vocab
   paddlespeech.t2s.frontend.zh_frontend


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.tone_sandhi.rst
================================================
paddlespeech.t2s.frontend.tone\_sandhi module
=============================================

.. automodule:: paddlespeech.t2s.frontend.tone_sandhi
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.vocab.rst
================================================
paddlespeech.t2s.frontend.vocab module
======================================

.. automodule:: paddlespeech.t2s.frontend.vocab
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.zh_frontend.rst
================================================
paddlespeech.t2s.frontend.zh\_frontend module
=============================================

.. automodule:: paddlespeech.t2s.frontend.zh_frontend
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.zh_normalization.char_convert.rst
================================================
paddlespeech.t2s.frontend.zh\_normalization.char\_convert module
================================================================

.. automodule:: paddlespeech.t2s.frontend.zh_normalization.char_convert
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.zh_normalization.chronology.rst
================================================
paddlespeech.t2s.frontend.zh\_normalization.chronology module
=============================================================

.. automodule:: paddlespeech.t2s.frontend.zh_normalization.chronology
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.zh_normalization.constants.rst
================================================
paddlespeech.t2s.frontend.zh\_normalization.constants module
============================================================

.. automodule:: paddlespeech.t2s.frontend.zh_normalization.constants
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.zh_normalization.num.rst
================================================
paddlespeech.t2s.frontend.zh\_normalization.num module
======================================================

.. automodule:: paddlespeech.t2s.frontend.zh_normalization.num
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.zh_normalization.phonecode.rst
================================================
paddlespeech.t2s.frontend.zh\_normalization.phonecode module
============================================================

.. automodule:: paddlespeech.t2s.frontend.zh_normalization.phonecode
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.zh_normalization.quantifier.rst
================================================
paddlespeech.t2s.frontend.zh\_normalization.quantifier module
=============================================================

.. automodule:: paddlespeech.t2s.frontend.zh_normalization.quantifier
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.zh_normalization.rst
================================================
paddlespeech.t2s.frontend.zh\_normalization package
===================================================

.. automodule:: paddlespeech.t2s.frontend.zh_normalization
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.frontend.zh_normalization.char_convert
   paddlespeech.t2s.frontend.zh_normalization.chronology
   paddlespeech.t2s.frontend.zh_normalization.constants
   paddlespeech.t2s.frontend.zh_normalization.num
   paddlespeech.t2s.frontend.zh_normalization.phonecode
   paddlespeech.t2s.frontend.zh_normalization.quantifier
   paddlespeech.t2s.frontend.zh_normalization.text_normlization


================================================
FILE: docs/source/api/paddlespeech.t2s.frontend.zh_normalization.text_normlization.rst
================================================
paddlespeech.t2s.frontend.zh\_normalization.text\_normlization module
=====================================================================

.. automodule:: paddlespeech.t2s.frontend.zh_normalization.text_normlization
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.ernie_sat.ernie_sat.rst
================================================
paddlespeech.t2s.models.ernie\_sat.ernie\_sat module
====================================================

.. automodule:: paddlespeech.t2s.models.ernie_sat.ernie_sat
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.ernie_sat.ernie_sat_updater.rst
================================================
paddlespeech.t2s.models.ernie\_sat.ernie\_sat\_updater module
=============================================================

.. automodule:: paddlespeech.t2s.models.ernie_sat.ernie_sat_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.ernie_sat.rst
================================================
paddlespeech.t2s.models.ernie\_sat package
==========================================

.. automodule:: paddlespeech.t2s.models.ernie_sat
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.ernie_sat.ernie_sat
   paddlespeech.t2s.models.ernie_sat.ernie_sat_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.models.fastspeech2.fastspeech2.rst
================================================
paddlespeech.t2s.models.fastspeech2.fastspeech2 module
======================================================

.. automodule:: paddlespeech.t2s.models.fastspeech2.fastspeech2
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.fastspeech2.fastspeech2_updater.rst
================================================
paddlespeech.t2s.models.fastspeech2.fastspeech2\_updater module
===============================================================

.. automodule:: paddlespeech.t2s.models.fastspeech2.fastspeech2_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.fastspeech2.rst
================================================
paddlespeech.t2s.models.fastspeech2 package
===========================================

.. automodule:: paddlespeech.t2s.models.fastspeech2
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.fastspeech2.fastspeech2
   paddlespeech.t2s.models.fastspeech2.fastspeech2_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.models.hifigan.hifigan.rst
================================================
paddlespeech.t2s.models.hifigan.hifigan module
==============================================

.. automodule:: paddlespeech.t2s.models.hifigan.hifigan
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.hifigan.hifigan_updater.rst
================================================
paddlespeech.t2s.models.hifigan.hifigan\_updater module
=======================================================

.. automodule:: paddlespeech.t2s.models.hifigan.hifigan_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.hifigan.rst
================================================
paddlespeech.t2s.models.hifigan package
=======================================

.. automodule:: paddlespeech.t2s.models.hifigan
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.hifigan.hifigan
   paddlespeech.t2s.models.hifigan.hifigan_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.models.melgan.melgan.rst
================================================
paddlespeech.t2s.models.melgan.melgan module
============================================

.. automodule:: paddlespeech.t2s.models.melgan.melgan
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.melgan.multi_band_melgan_updater.rst
================================================
paddlespeech.t2s.models.melgan.multi\_band\_melgan\_updater module
==================================================================

.. automodule:: paddlespeech.t2s.models.melgan.multi_band_melgan_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.melgan.rst
================================================
paddlespeech.t2s.models.melgan package
======================================

.. automodule:: paddlespeech.t2s.models.melgan
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.melgan.melgan
   paddlespeech.t2s.models.melgan.multi_band_melgan_updater
   paddlespeech.t2s.models.melgan.style_melgan
   paddlespeech.t2s.models.melgan.style_melgan_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.models.melgan.style_melgan.rst
================================================
paddlespeech.t2s.models.melgan.style\_melgan module
===================================================

.. automodule:: paddlespeech.t2s.models.melgan.style_melgan
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.melgan.style_melgan_updater.rst
================================================
paddlespeech.t2s.models.melgan.style\_melgan\_updater module
============================================================

.. automodule:: paddlespeech.t2s.models.melgan.style_melgan_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan.rst
================================================
paddlespeech.t2s.models.parallel\_wavegan.parallel\_wavegan module
==================================================================

.. automodule:: paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan_updater.rst
================================================
paddlespeech.t2s.models.parallel\_wavegan.parallel\_wavegan\_updater module
===========================================================================

.. automodule:: paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.parallel_wavegan.rst
================================================
paddlespeech.t2s.models.parallel\_wavegan package
=================================================

.. automodule:: paddlespeech.t2s.models.parallel_wavegan
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan
   paddlespeech.t2s.models.parallel_wavegan.parallel_wavegan_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.models.rst
================================================
paddlespeech.t2s.models package
===============================

.. automodule:: paddlespeech.t2s.models
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.ernie_sat
   paddlespeech.t2s.models.fastspeech2
   paddlespeech.t2s.models.hifigan
   paddlespeech.t2s.models.melgan
   paddlespeech.t2s.models.parallel_wavegan
   paddlespeech.t2s.models.speedyspeech
   paddlespeech.t2s.models.tacotron2
   paddlespeech.t2s.models.transformer_tts
   paddlespeech.t2s.models.vits
   paddlespeech.t2s.models.wavernn

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.waveflow


================================================
FILE: docs/source/api/paddlespeech.t2s.models.speedyspeech.rst
================================================
paddlespeech.t2s.models.speedyspeech package
============================================

.. automodule:: paddlespeech.t2s.models.speedyspeech
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.speedyspeech.speedyspeech
   paddlespeech.t2s.models.speedyspeech.speedyspeech_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.models.speedyspeech.speedyspeech.rst
================================================
paddlespeech.t2s.models.speedyspeech.speedyspeech module
========================================================

.. automodule:: paddlespeech.t2s.models.speedyspeech.speedyspeech
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.speedyspeech.speedyspeech_updater.rst
================================================
paddlespeech.t2s.models.speedyspeech.speedyspeech\_updater module
=================================================================

.. automodule:: paddlespeech.t2s.models.speedyspeech.speedyspeech_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.tacotron2.rst
================================================
paddlespeech.t2s.models.tacotron2 package
=========================================

.. automodule:: paddlespeech.t2s.models.tacotron2
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.tacotron2.tacotron2
   paddlespeech.t2s.models.tacotron2.tacotron2_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.models.tacotron2.tacotron2.rst
================================================
paddlespeech.t2s.models.tacotron2.tacotron2 module
==================================================

.. automodule:: paddlespeech.t2s.models.tacotron2.tacotron2
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.tacotron2.tacotron2_updater.rst
================================================
paddlespeech.t2s.models.tacotron2.tacotron2\_updater module
===========================================================

.. automodule:: paddlespeech.t2s.models.tacotron2.tacotron2_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.transformer_tts.rst
================================================
paddlespeech.t2s.models.transformer\_tts package
================================================

.. automodule:: paddlespeech.t2s.models.transformer_tts
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.transformer_tts.transformer_tts
   paddlespeech.t2s.models.transformer_tts.transformer_tts_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.models.transformer_tts.transformer_tts.rst
================================================
paddlespeech.t2s.models.transformer\_tts.transformer\_tts module
================================================================

.. automodule:: paddlespeech.t2s.models.transformer_tts.transformer_tts
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.transformer_tts.transformer_tts_updater.rst
================================================
paddlespeech.t2s.models.transformer\_tts.transformer\_tts\_updater module
=========================================================================

.. automodule:: paddlespeech.t2s.models.transformer_tts.transformer_tts_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.duration_predictor.rst
================================================
paddlespeech.t2s.models.vits.duration\_predictor module
=======================================================

.. automodule:: paddlespeech.t2s.models.vits.duration_predictor
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.flow.rst
================================================
paddlespeech.t2s.models.vits.flow module
========================================

.. automodule:: paddlespeech.t2s.models.vits.flow
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.generator.rst
================================================
paddlespeech.t2s.models.vits.generator module
=============================================

.. automodule:: paddlespeech.t2s.models.vits.generator
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.monotonic_align.core.rst
================================================
paddlespeech.t2s.models.vits.monotonic\_align.core module
=========================================================

.. automodule:: paddlespeech.t2s.models.vits.monotonic_align.core
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.monotonic_align.rst
================================================
paddlespeech.t2s.models.vits.monotonic\_align package
=====================================================

.. automodule:: paddlespeech.t2s.models.vits.monotonic_align
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.vits.monotonic_align.core
   paddlespeech.t2s.models.vits.monotonic_align.setup


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.monotonic_align.setup.rst
================================================
paddlespeech.t2s.models.vits.monotonic\_align.setup module
==========================================================

.. automodule:: paddlespeech.t2s.models.vits.monotonic_align.setup
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.posterior_encoder.rst
================================================
paddlespeech.t2s.models.vits.posterior\_encoder module
======================================================

.. automodule:: paddlespeech.t2s.models.vits.posterior_encoder
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.residual_coupling.rst
================================================
paddlespeech.t2s.models.vits.residual\_coupling module
======================================================

.. automodule:: paddlespeech.t2s.models.vits.residual_coupling
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.rst
================================================
paddlespeech.t2s.models.vits package
====================================

.. automodule:: paddlespeech.t2s.models.vits
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.vits.wavenet

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.vits.duration_predictor
   paddlespeech.t2s.models.vits.flow
   paddlespeech.t2s.models.vits.generator
   paddlespeech.t2s.models.vits.posterior_encoder
   paddlespeech.t2s.models.vits.residual_coupling
   paddlespeech.t2s.models.vits.text_encoder
   paddlespeech.t2s.models.vits.transform
   paddlespeech.t2s.models.vits.vits
   paddlespeech.t2s.models.vits.vits_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.text_encoder.rst
================================================
paddlespeech.t2s.models.vits.text\_encoder module
=================================================

.. automodule:: paddlespeech.t2s.models.vits.text_encoder
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.transform.rst
================================================
paddlespeech.t2s.models.vits.transform module
=============================================

.. automodule:: paddlespeech.t2s.models.vits.transform
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.vits.rst
================================================
paddlespeech.t2s.models.vits.vits module
========================================

.. automodule:: paddlespeech.t2s.models.vits.vits
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.vits_updater.rst
================================================
paddlespeech.t2s.models.vits.vits\_updater module
=================================================

.. automodule:: paddlespeech.t2s.models.vits.vits_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.wavenet.residual_block.rst
================================================
paddlespeech.t2s.models.vits.wavenet.residual\_block module
===========================================================

.. automodule:: paddlespeech.t2s.models.vits.wavenet.residual_block
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.wavenet.rst
================================================
paddlespeech.t2s.models.vits.wavenet package
============================================

.. automodule:: paddlespeech.t2s.models.vits.wavenet
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.vits.wavenet.residual_block
   paddlespeech.t2s.models.vits.wavenet.wavenet


================================================
FILE: docs/source/api/paddlespeech.t2s.models.vits.wavenet.wavenet.rst
================================================
paddlespeech.t2s.models.vits.wavenet.wavenet module
===================================================

.. automodule:: paddlespeech.t2s.models.vits.wavenet.wavenet
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.waveflow.rst
================================================
paddlespeech.t2s.models.waveflow module
=======================================

.. automodule:: paddlespeech.t2s.models.waveflow
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.wavernn.rst
================================================
paddlespeech.t2s.models.wavernn package
=======================================

.. automodule:: paddlespeech.t2s.models.wavernn
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.models.wavernn.wavernn
   paddlespeech.t2s.models.wavernn.wavernn_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.models.wavernn.wavernn.rst
================================================
paddlespeech.t2s.models.wavernn.wavernn module
==============================================

.. automodule:: paddlespeech.t2s.models.wavernn.wavernn
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.models.wavernn.wavernn_updater.rst
================================================
paddlespeech.t2s.models.wavernn.wavernn\_updater module
=======================================================

.. automodule:: paddlespeech.t2s.models.wavernn.wavernn_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.activation.rst
================================================
paddlespeech.t2s.modules.activation module
==========================================

.. automodule:: paddlespeech.t2s.modules.activation
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.causal_conv.rst
================================================
paddlespeech.t2s.modules.causal\_conv module
============================================

.. automodule:: paddlespeech.t2s.modules.causal_conv
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.conformer.convolution.rst
================================================
paddlespeech.t2s.modules.conformer.convolution module
=====================================================

.. automodule:: paddlespeech.t2s.modules.conformer.convolution
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.conformer.encoder_layer.rst
================================================
paddlespeech.t2s.modules.conformer.encoder\_layer module
========================================================

.. automodule:: paddlespeech.t2s.modules.conformer.encoder_layer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.conformer.rst
================================================
paddlespeech.t2s.modules.conformer package
==========================================

.. automodule:: paddlespeech.t2s.modules.conformer
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.modules.conformer.convolution
   paddlespeech.t2s.modules.conformer.encoder_layer


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.conv.rst
================================================
paddlespeech.t2s.modules.conv module
====================================

.. automodule:: paddlespeech.t2s.modules.conv
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.geometry.rst
================================================
paddlespeech.t2s.modules.geometry module
========================================

.. automodule:: paddlespeech.t2s.modules.geometry
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.layer_norm.rst
================================================
paddlespeech.t2s.modules.layer\_norm module
===========================================

.. automodule:: paddlespeech.t2s.modules.layer_norm
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.losses.rst
================================================
paddlespeech.t2s.modules.losses module
======================================

.. automodule:: paddlespeech.t2s.modules.losses
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.masked_fill.rst
================================================
paddlespeech.t2s.modules.masked\_fill module
============================================

.. automodule:: paddlespeech.t2s.modules.masked_fill
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.nets_utils.rst
================================================
paddlespeech.t2s.modules.nets\_utils module
===========================================

.. automodule:: paddlespeech.t2s.modules.nets_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.normalizer.rst
================================================
paddlespeech.t2s.modules.normalizer module
==========================================

.. automodule:: paddlespeech.t2s.modules.normalizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.positional_encoding.rst
================================================
paddlespeech.t2s.modules.positional\_encoding module
====================================================

.. automodule:: paddlespeech.t2s.modules.positional_encoding
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.pqmf.rst
================================================
paddlespeech.t2s.modules.pqmf module
====================================

.. automodule:: paddlespeech.t2s.modules.pqmf
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.predictor.duration_predictor.rst
================================================
paddlespeech.t2s.modules.predictor.duration\_predictor module
=============================================================

.. automodule:: paddlespeech.t2s.modules.predictor.duration_predictor
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.predictor.length_regulator.rst
================================================
paddlespeech.t2s.modules.predictor.length\_regulator module
===========================================================

.. automodule:: paddlespeech.t2s.modules.predictor.length_regulator
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.predictor.rst
================================================
paddlespeech.t2s.modules.predictor package
==========================================

.. automodule:: paddlespeech.t2s.modules.predictor
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.modules.predictor.duration_predictor
   paddlespeech.t2s.modules.predictor.length_regulator
   paddlespeech.t2s.modules.predictor.variance_predictor


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.predictor.variance_predictor.rst
================================================
paddlespeech.t2s.modules.predictor.variance\_predictor module
=============================================================

.. automodule:: paddlespeech.t2s.modules.predictor.variance_predictor
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.residual_block.rst
================================================
paddlespeech.t2s.modules.residual\_block module
===============================================

.. automodule:: paddlespeech.t2s.modules.residual_block
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.residual_stack.rst
================================================
paddlespeech.t2s.modules.residual\_stack module
===============================================

.. automodule:: paddlespeech.t2s.modules.residual_stack
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.rst
================================================
paddlespeech.t2s.modules package
================================

.. automodule:: paddlespeech.t2s.modules
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.modules.conformer
   paddlespeech.t2s.modules.predictor
   paddlespeech.t2s.modules.tacotron2
   paddlespeech.t2s.modules.transformer

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.modules.activation
   paddlespeech.t2s.modules.causal_conv
   paddlespeech.t2s.modules.conv
   paddlespeech.t2s.modules.geometry
   paddlespeech.t2s.modules.layer_norm
   paddlespeech.t2s.modules.losses
   paddlespeech.t2s.modules.masked_fill
   paddlespeech.t2s.modules.nets_utils
   paddlespeech.t2s.modules.normalizer
   paddlespeech.t2s.modules.positional_encoding
   paddlespeech.t2s.modules.pqmf
   paddlespeech.t2s.modules.residual_block
   paddlespeech.t2s.modules.residual_stack
   paddlespeech.t2s.modules.style_encoder
   paddlespeech.t2s.modules.tade_res_block
   paddlespeech.t2s.modules.upsample


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.style_encoder.rst
================================================
paddlespeech.t2s.modules.style\_encoder module
==============================================

.. automodule:: paddlespeech.t2s.modules.style_encoder
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.tacotron2.attentions.rst
================================================
paddlespeech.t2s.modules.tacotron2.attentions module
====================================================

.. automodule:: paddlespeech.t2s.modules.tacotron2.attentions
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.tacotron2.decoder.rst
================================================
paddlespeech.t2s.modules.tacotron2.decoder module
=================================================

.. automodule:: paddlespeech.t2s.modules.tacotron2.decoder
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.tacotron2.encoder.rst
================================================
paddlespeech.t2s.modules.tacotron2.encoder module
=================================================

.. automodule:: paddlespeech.t2s.modules.tacotron2.encoder
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.tacotron2.rst
================================================
paddlespeech.t2s.modules.tacotron2 package
==========================================

.. automodule:: paddlespeech.t2s.modules.tacotron2
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.modules.tacotron2.attentions
   paddlespeech.t2s.modules.tacotron2.decoder
   paddlespeech.t2s.modules.tacotron2.encoder


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.tade_res_block.rst
================================================
paddlespeech.t2s.modules.tade\_res\_block module
================================================

.. automodule:: paddlespeech.t2s.modules.tade_res_block
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.attention.rst
================================================
paddlespeech.t2s.modules.transformer.attention module
=====================================================

.. automodule:: paddlespeech.t2s.modules.transformer.attention
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.decoder.rst
================================================
paddlespeech.t2s.modules.transformer.decoder module
===================================================

.. automodule:: paddlespeech.t2s.modules.transformer.decoder
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.decoder_layer.rst
================================================
paddlespeech.t2s.modules.transformer.decoder\_layer module
==========================================================

.. automodule:: paddlespeech.t2s.modules.transformer.decoder_layer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.embedding.rst
================================================
paddlespeech.t2s.modules.transformer.embedding module
=====================================================

.. automodule:: paddlespeech.t2s.modules.transformer.embedding
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.encoder.rst
================================================
paddlespeech.t2s.modules.transformer.encoder module
===================================================

.. automodule:: paddlespeech.t2s.modules.transformer.encoder
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.encoder_layer.rst
================================================
paddlespeech.t2s.modules.transformer.encoder\_layer module
==========================================================

.. automodule:: paddlespeech.t2s.modules.transformer.encoder_layer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.lightconv.rst
================================================
paddlespeech.t2s.modules.transformer.lightconv module
=====================================================

.. automodule:: paddlespeech.t2s.modules.transformer.lightconv
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.mask.rst
================================================
paddlespeech.t2s.modules.transformer.mask module
================================================

.. automodule:: paddlespeech.t2s.modules.transformer.mask
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.multi_layer_conv.rst
================================================
paddlespeech.t2s.modules.transformer.multi\_layer\_conv module
==============================================================

.. automodule:: paddlespeech.t2s.modules.transformer.multi_layer_conv
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.positionwise_feed_forward.rst
================================================
paddlespeech.t2s.modules.transformer.positionwise\_feed\_forward module
=======================================================================

.. automodule:: paddlespeech.t2s.modules.transformer.positionwise_feed_forward
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.repeat.rst
================================================
paddlespeech.t2s.modules.transformer.repeat module
==================================================

.. automodule:: paddlespeech.t2s.modules.transformer.repeat
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.rst
================================================
paddlespeech.t2s.modules.transformer package
============================================

.. automodule:: paddlespeech.t2s.modules.transformer
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.modules.transformer.attention
   paddlespeech.t2s.modules.transformer.decoder
   paddlespeech.t2s.modules.transformer.decoder_layer
   paddlespeech.t2s.modules.transformer.embedding
   paddlespeech.t2s.modules.transformer.encoder
   paddlespeech.t2s.modules.transformer.encoder_layer
   paddlespeech.t2s.modules.transformer.lightconv
   paddlespeech.t2s.modules.transformer.mask
   paddlespeech.t2s.modules.transformer.multi_layer_conv
   paddlespeech.t2s.modules.transformer.positionwise_feed_forward
   paddlespeech.t2s.modules.transformer.repeat
   paddlespeech.t2s.modules.transformer.subsampling


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.transformer.subsampling.rst
================================================
paddlespeech.t2s.modules.transformer.subsampling module
=======================================================

.. automodule:: paddlespeech.t2s.modules.transformer.subsampling
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.modules.upsample.rst
================================================
paddlespeech.t2s.modules.upsample module
========================================

.. automodule:: paddlespeech.t2s.modules.upsample
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.rst
================================================
paddlespeech.t2s package
========================

.. automodule:: paddlespeech.t2s
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.audio
   paddlespeech.t2s.datasets
   paddlespeech.t2s.exps
   paddlespeech.t2s.frontend
   paddlespeech.t2s.models
   paddlespeech.t2s.modules
   paddlespeech.t2s.training
   paddlespeech.t2s.utils


================================================
FILE: docs/source/api/paddlespeech.t2s.training.cli.rst
================================================
paddlespeech.t2s.training.cli module
====================================

.. automodule:: paddlespeech.t2s.training.cli
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.default_config.rst
================================================
paddlespeech.t2s.training.default\_config module
================================================

.. automodule:: paddlespeech.t2s.training.default_config
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.experiment.rst
================================================
paddlespeech.t2s.training.experiment module
===========================================

.. automodule:: paddlespeech.t2s.training.experiment
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.extension.rst
================================================
paddlespeech.t2s.training.extension module
==========================================

.. automodule:: paddlespeech.t2s.training.extension
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.extensions.evaluator.rst
================================================
paddlespeech.t2s.training.extensions.evaluator module
=====================================================

.. automodule:: paddlespeech.t2s.training.extensions.evaluator
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.extensions.rst
================================================
paddlespeech.t2s.training.extensions package
============================================

.. automodule:: paddlespeech.t2s.training.extensions
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.training.extensions.evaluator
   paddlespeech.t2s.training.extensions.snapshot
   paddlespeech.t2s.training.extensions.visualizer


================================================
FILE: docs/source/api/paddlespeech.t2s.training.extensions.snapshot.rst
================================================
paddlespeech.t2s.training.extensions.snapshot module
====================================================

.. automodule:: paddlespeech.t2s.training.extensions.snapshot
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.extensions.visualizer.rst
================================================
paddlespeech.t2s.training.extensions.visualizer module
======================================================

.. automodule:: paddlespeech.t2s.training.extensions.visualizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.optimizer.rst
================================================
paddlespeech.t2s.training.optimizer module
==========================================

.. automodule:: paddlespeech.t2s.training.optimizer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.reporter.rst
================================================
paddlespeech.t2s.training.reporter module
=========================================

.. automodule:: paddlespeech.t2s.training.reporter
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.rst
================================================
paddlespeech.t2s.training package
=================================

.. automodule:: paddlespeech.t2s.training
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.training.extensions
   paddlespeech.t2s.training.triggers
   paddlespeech.t2s.training.updaters

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.training.cli
   paddlespeech.t2s.training.default_config
   paddlespeech.t2s.training.experiment
   paddlespeech.t2s.training.extension
   paddlespeech.t2s.training.optimizer
   paddlespeech.t2s.training.reporter
   paddlespeech.t2s.training.seeding
   paddlespeech.t2s.training.trainer
   paddlespeech.t2s.training.trigger
   paddlespeech.t2s.training.updater


================================================
FILE: docs/source/api/paddlespeech.t2s.training.seeding.rst
================================================
paddlespeech.t2s.training.seeding module
========================================

.. automodule:: paddlespeech.t2s.training.seeding
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.trainer.rst
================================================
paddlespeech.t2s.training.trainer module
========================================

.. automodule:: paddlespeech.t2s.training.trainer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.trigger.rst
================================================
paddlespeech.t2s.training.trigger module
========================================

.. automodule:: paddlespeech.t2s.training.trigger
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.triggers.interval_trigger.rst
================================================
paddlespeech.t2s.training.triggers.interval\_trigger module
===========================================================

.. automodule:: paddlespeech.t2s.training.triggers.interval_trigger
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.triggers.limit_trigger.rst
================================================
paddlespeech.t2s.training.triggers.limit\_trigger module
========================================================

.. automodule:: paddlespeech.t2s.training.triggers.limit_trigger
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.triggers.rst
================================================
paddlespeech.t2s.training.triggers package
==========================================

.. automodule:: paddlespeech.t2s.training.triggers
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.training.triggers.interval_trigger
   paddlespeech.t2s.training.triggers.limit_trigger
   paddlespeech.t2s.training.triggers.time_trigger


================================================
FILE: docs/source/api/paddlespeech.t2s.training.triggers.time_trigger.rst
================================================
paddlespeech.t2s.training.triggers.time\_trigger module
=======================================================

.. automodule:: paddlespeech.t2s.training.triggers.time_trigger
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.updater.rst
================================================
paddlespeech.t2s.training.updater module
========================================

.. automodule:: paddlespeech.t2s.training.updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.training.updaters.rst
================================================
paddlespeech.t2s.training.updaters package
==========================================

.. automodule:: paddlespeech.t2s.training.updaters
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.training.updaters.standard_updater


================================================
FILE: docs/source/api/paddlespeech.t2s.training.updaters.standard_updater.rst
================================================
paddlespeech.t2s.training.updaters.standard\_updater module
===========================================================

.. automodule:: paddlespeech.t2s.training.updaters.standard_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.utils.checkpoint.rst
================================================
paddlespeech.t2s.utils.checkpoint module
========================================

.. automodule:: paddlespeech.t2s.utils.checkpoint
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.utils.display.rst
================================================
paddlespeech.t2s.utils.display module
=====================================

.. automodule:: paddlespeech.t2s.utils.display
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.utils.error_rate.rst
================================================
paddlespeech.t2s.utils.error\_rate module
=========================================

.. automodule:: paddlespeech.t2s.utils.error_rate
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.utils.h5_utils.rst
================================================
paddlespeech.t2s.utils.h5\_utils module
=======================================

.. automodule:: paddlespeech.t2s.utils.h5_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.utils.internals.rst
================================================
paddlespeech.t2s.utils.internals module
=======================================

.. automodule:: paddlespeech.t2s.utils.internals
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.utils.layer_tools.rst
================================================
paddlespeech.t2s.utils.layer\_tools module
==========================================

.. automodule:: paddlespeech.t2s.utils.layer_tools
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.utils.mp_tools.rst
================================================
paddlespeech.t2s.utils.mp\_tools module
=======================================

.. automodule:: paddlespeech.t2s.utils.mp_tools
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.utils.profiler.rst
================================================
paddlespeech.t2s.utils.profiler module
======================================

.. automodule:: paddlespeech.t2s.utils.profiler
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.t2s.utils.rst
================================================
paddlespeech.t2s.utils package
==============================

.. automodule:: paddlespeech.t2s.utils
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.t2s.utils.checkpoint
   paddlespeech.t2s.utils.display
   paddlespeech.t2s.utils.error_rate
   paddlespeech.t2s.utils.h5_utils
   paddlespeech.t2s.utils.internals
   paddlespeech.t2s.utils.layer_tools
   paddlespeech.t2s.utils.mp_tools
   paddlespeech.t2s.utils.profiler
   paddlespeech.t2s.utils.scheduler


================================================
FILE: docs/source/api/paddlespeech.t2s.utils.scheduler.rst
================================================
paddlespeech.t2s.utils.scheduler module
=======================================

.. automodule:: paddlespeech.t2s.utils.scheduler
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.text.exps.ernie_linear.avg_model.rst
================================================
paddlespeech.text.exps.ernie\_linear.avg\_model module
======================================================

.. automodule:: paddlespeech.text.exps.ernie_linear.avg_model
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.text.exps.ernie_linear.punc_restore.rst
================================================
paddlespeech.text.exps.ernie\_linear.punc\_restore module
=========================================================

.. automodule:: paddlespeech.text.exps.ernie_linear.punc_restore
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.text.exps.ernie_linear.rst
================================================
paddlespeech.text.exps.ernie\_linear package
============================================

.. automodule:: paddlespeech.text.exps.ernie_linear
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.text.exps.ernie_linear.avg_model
   paddlespeech.text.exps.ernie_linear.punc_restore
   paddlespeech.text.exps.ernie_linear.test
   paddlespeech.text.exps.ernie_linear.train


================================================
FILE: docs/source/api/paddlespeech.text.exps.ernie_linear.test.rst
================================================
paddlespeech.text.exps.ernie\_linear.test module
================================================

.. automodule:: paddlespeech.text.exps.ernie_linear.test
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.text.exps.ernie_linear.train.rst
================================================
paddlespeech.text.exps.ernie\_linear.train module
=================================================

.. automodule:: paddlespeech.text.exps.ernie_linear.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.text.exps.rst
================================================
paddlespeech.text.exps package
==============================

.. automodule:: paddlespeech.text.exps
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.text.exps.ernie_linear


================================================
FILE: docs/source/api/paddlespeech.text.models.ernie_crf.model.rst
================================================
paddlespeech.text.models.ernie\_crf.model module
================================================

.. automodule:: paddlespeech.text.models.ernie_crf.model
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.text.models.ernie_crf.rst
================================================
paddlespeech.text.models.ernie\_crf package
===========================================

.. automodule:: paddlespeech.text.models.ernie_crf
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.text.models.ernie_crf.model


================================================
FILE: docs/source/api/paddlespeech.text.models.ernie_linear.dataset.rst
================================================
paddlespeech.text.models.ernie\_linear.dataset module
=====================================================

.. automodule:: paddlespeech.text.models.ernie_linear.dataset
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.text.models.ernie_linear.ernie_linear.rst
================================================
paddlespeech.text.models.ernie\_linear.ernie\_linear module
===========================================================

.. automodule:: paddlespeech.text.models.ernie_linear.ernie_linear
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.text.models.ernie_linear.ernie_linear_updater.rst
================================================
paddlespeech.text.models.ernie\_linear.ernie\_linear\_updater module
====================================================================

.. automodule:: paddlespeech.text.models.ernie_linear.ernie_linear_updater
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.text.models.ernie_linear.rst
================================================
paddlespeech.text.models.ernie\_linear package
==============================================

.. automodule:: paddlespeech.text.models.ernie_linear
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.text.models.ernie_linear.dataset
   paddlespeech.text.models.ernie_linear.ernie_linear
   paddlespeech.text.models.ernie_linear.ernie_linear_updater


================================================
FILE: docs/source/api/paddlespeech.text.models.rst
================================================
paddlespeech.text.models package
================================

.. automodule:: paddlespeech.text.models
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.text.models.ernie_crf
   paddlespeech.text.models.ernie_linear


================================================
FILE: docs/source/api/paddlespeech.text.rst
================================================
paddlespeech.text package
=========================

.. automodule:: paddlespeech.text
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.text.exps
   paddlespeech.text.models


================================================
FILE: docs/source/api/paddlespeech.utils.dynamic_import.rst
================================================
paddlespeech.utils.dynamic\_import module
=========================================

.. automodule:: paddlespeech.utils.dynamic_import
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.utils.env.rst
================================================
paddlespeech.utils.env module
=============================

.. automodule:: paddlespeech.utils.env
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.utils.rst
================================================
paddlespeech.utils package
==========================

.. automodule:: paddlespeech.utils
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.utils.dynamic_import
   paddlespeech.utils.env


================================================
FILE: docs/source/api/paddlespeech.vector.cluster.diarization.rst
================================================
paddlespeech.vector.cluster.diarization module
==============================================

.. automodule:: paddlespeech.vector.cluster.diarization
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.cluster.plda.rst
================================================
paddlespeech.vector.cluster.plda module
=======================================

.. automodule:: paddlespeech.vector.cluster.plda
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.cluster.rst
================================================
paddlespeech.vector.cluster package
===================================

.. automodule:: paddlespeech.vector.cluster
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.vector.cluster.diarization
   paddlespeech.vector.cluster.plda


================================================
FILE: docs/source/api/paddlespeech.vector.exps.ge2e.audio_processor.rst
================================================
paddlespeech.vector.exps.ge2e.audio\_processor module
=====================================================

.. automodule:: paddlespeech.vector.exps.ge2e.audio_processor
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.exps.ge2e.config.rst
================================================
paddlespeech.vector.exps.ge2e.config module
===========================================

.. automodule:: paddlespeech.vector.exps.ge2e.config
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.exps.ge2e.dataset_processors.rst
================================================
paddlespeech.vector.exps.ge2e.dataset\_processors module
========================================================

.. automodule:: paddlespeech.vector.exps.ge2e.dataset_processors
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.exps.ge2e.inference.rst
================================================
paddlespeech.vector.exps.ge2e.inference module
==============================================

.. automodule:: paddlespeech.vector.exps.ge2e.inference
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.exps.ge2e.preprocess.rst
================================================
paddlespeech.vector.exps.ge2e.preprocess module
===============================================

.. automodule:: paddlespeech.vector.exps.ge2e.preprocess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.exps.ge2e.random_cycle.rst
================================================
paddlespeech.vector.exps.ge2e.random\_cycle module
==================================================

.. automodule:: paddlespeech.vector.exps.ge2e.random_cycle
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.exps.ge2e.rst
================================================
paddlespeech.vector.exps.ge2e package
=====================================

.. automodule:: paddlespeech.vector.exps.ge2e
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.vector.exps.ge2e.audio_processor
   paddlespeech.vector.exps.ge2e.config
   paddlespeech.vector.exps.ge2e.dataset_processors
   paddlespeech.vector.exps.ge2e.inference
   paddlespeech.vector.exps.ge2e.preprocess
   paddlespeech.vector.exps.ge2e.random_cycle
   paddlespeech.vector.exps.ge2e.speaker_verification_dataset
   paddlespeech.vector.exps.ge2e.train


================================================
FILE: docs/source/api/paddlespeech.vector.exps.ge2e.speaker_verification_dataset.rst
================================================
paddlespeech.vector.exps.ge2e.speaker\_verification\_dataset module
===================================================================

.. automodule:: paddlespeech.vector.exps.ge2e.speaker_verification_dataset
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.exps.ge2e.train.rst
================================================
paddlespeech.vector.exps.ge2e.train module
==========================================

.. automodule:: paddlespeech.vector.exps.ge2e.train
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.exps.rst
================================================
paddlespeech.vector.exps package
================================

.. automodule:: paddlespeech.vector.exps
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.vector.exps.ge2e


================================================
FILE: docs/source/api/paddlespeech.vector.io.augment.rst
================================================
paddlespeech.vector.io.augment module
=====================================

.. automodule:: paddlespeech.vector.io.augment
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.io.batch.rst
================================================
paddlespeech.vector.io.batch module
===================================

.. automodule:: paddlespeech.vector.io.batch
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.io.dataset.rst
================================================
paddlespeech.vector.io.dataset module
=====================================

.. automodule:: paddlespeech.vector.io.dataset
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.io.dataset_from_json.rst
================================================
paddlespeech.vector.io.dataset\_from\_json module
=================================================

.. automodule:: paddlespeech.vector.io.dataset_from_json
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.io.embedding_norm.rst
================================================
paddlespeech.vector.io.embedding\_norm module
=============================================

.. automodule:: paddlespeech.vector.io.embedding_norm
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.io.rst
================================================
paddlespeech.vector.io package
==============================

.. automodule:: paddlespeech.vector.io
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.vector.io.augment
   paddlespeech.vector.io.batch
   paddlespeech.vector.io.dataset
   paddlespeech.vector.io.dataset_from_json
   paddlespeech.vector.io.embedding_norm
   paddlespeech.vector.io.signal_processing


================================================
FILE: docs/source/api/paddlespeech.vector.io.signal_processing.rst
================================================
paddlespeech.vector.io.signal\_processing module
================================================

.. automodule:: paddlespeech.vector.io.signal_processing
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.models.ecapa_tdnn.rst
================================================
paddlespeech.vector.models.ecapa\_tdnn module
=============================================

.. automodule:: paddlespeech.vector.models.ecapa_tdnn
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.models.lstm_speaker_encoder.rst
================================================
paddlespeech.vector.models.lstm\_speaker\_encoder module
========================================================

.. automodule:: paddlespeech.vector.models.lstm_speaker_encoder
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.models.rst
================================================
paddlespeech.vector.models package
==================================

.. automodule:: paddlespeech.vector.models
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.vector.models.ecapa_tdnn
   paddlespeech.vector.models.lstm_speaker_encoder


================================================
FILE: docs/source/api/paddlespeech.vector.modules.layer.rst
================================================
paddlespeech.vector.modules.layer module
========================================

.. automodule:: paddlespeech.vector.modules.layer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.modules.loss.rst
================================================
paddlespeech.vector.modules.loss module
=======================================

.. automodule:: paddlespeech.vector.modules.loss
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.modules.rst
================================================
paddlespeech.vector.modules package
===================================

.. automodule:: paddlespeech.vector.modules
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.vector.modules.layer
   paddlespeech.vector.modules.loss
   paddlespeech.vector.modules.sid_model


================================================
FILE: docs/source/api/paddlespeech.vector.modules.sid_model.rst
================================================
paddlespeech.vector.modules.sid\_model module
=============================================

.. automodule:: paddlespeech.vector.modules.sid_model
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.rst
================================================
paddlespeech.vector package
===========================

.. automodule:: paddlespeech.vector
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddlespeech.vector.cluster
   paddlespeech.vector.exps
   paddlespeech.vector.io
   paddlespeech.vector.models
   paddlespeech.vector.modules
   paddlespeech.vector.training
   paddlespeech.vector.utils


================================================
FILE: docs/source/api/paddlespeech.vector.training.rst
================================================
paddlespeech.vector.training package
====================================

.. automodule:: paddlespeech.vector.training
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.vector.training.scheduler
   paddlespeech.vector.training.seeding


================================================
FILE: docs/source/api/paddlespeech.vector.training.scheduler.rst
================================================
paddlespeech.vector.training.scheduler module
=============================================

.. automodule:: paddlespeech.vector.training.scheduler
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.training.seeding.rst
================================================
paddlespeech.vector.training.seeding module
===========================================

.. automodule:: paddlespeech.vector.training.seeding
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.utils.rst
================================================
paddlespeech.vector.utils package
=================================

.. automodule:: paddlespeech.vector.utils
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddlespeech.vector.utils.time
   paddlespeech.vector.utils.vector_utils


================================================
FILE: docs/source/api/paddlespeech.vector.utils.time.rst
================================================
paddlespeech.vector.utils.time module
=====================================

.. automodule:: paddlespeech.vector.utils.time
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.vector.utils.vector_utils.rst
================================================
paddlespeech.vector.utils.vector\_utils module
==============================================

.. automodule:: paddlespeech.vector.utils.vector_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/api/paddlespeech.version.rst
================================================
paddlespeech.version module
===========================

.. automodule:: paddlespeech.version
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/asr/PPASR.md
================================================
([简体中文](./PPASR_cn.md)|English)
# PP-ASR

## Catalogue
- [1. Introduction](#1)
- [2. Characteristic](#2)
- [3. Tutorials](#3)
    - [3.1 Pre-trained Models](#31)
    - [3.2 Training](#32)
    - [3.3 Inference](#33)
    - [3.4 Service Deployment](#33)
    - [3.5 Customized Auto Speech Recognition and Deployment](#33)
- [4. Quick Start](#4)

<a name="1"></a>
## 1. Introduction

PP-ASR is a tool to provide ASR(Automatic speech recognition) function. It provides a variety of Chinese and English models and supports model training. It also supports model inference using the command line. In addition, PP-ASR supports the deployment of streaming models and customized ASR.

<a name="2"></a>
## 2. Characteristic
The basic process of ASR is shown in the figure below:  
<center><img src=https://user-images.githubusercontent.com/87408988/168259962-cbe2008b-47b6-443d-9566-d77a5ca2eb25.png width="800" ></center>


The main characteristics of PP-ASR are shown below:
-  Provides pre-trained models on Chinese/English open source datasets: aishell(Chinese), wenetspeech(Chinese) and librispeech(English). The models include deepspeech2 and conformer/transformer.
-  Support model training on Chinese/English datasets.
-  Support model inference using the command line. You can use to use `paddlespeech asr --model xxx --input xxx.wav` to use the pre-trained model to do model inference. 
-  Support deployment of streaming ASR server. Besides ASR function, the server supports timestamp function.
-  Support customized auto speech recognition and deployment.

<a name="3"></a>
## 3. Tutorials

<a name="31"></a>
## 3.1 Pre-trained Models
The support pre-trained model list: [released_model](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/released_model.md).  
The model with good effect are Ds2 Online Wenetspeech ASR0 Model and Conformer Online Wenetspeech ASR1 Model. Both two models support streaming ASR.  
For more information about model design, you can refer to the aistudio tutorial:
- [Deepspeech2](https://aistudio.baidu.com/aistudio/projectdetail/3866807)
- [Transformer](https://aistudio.baidu.com/aistudio/projectdetail/3470110)

<a name="32"></a>
## 3.2 Training
The referenced script for model training is stored in [examples](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples) and stored according to "examples/dataset/model". The dataset mainly supports aishell and librispeech. The model supports deepspeech2 and u2(conformer/transformer).
The specific steps of executing the script are recorded in `run.sh`.

For more information, you can refer to [asr1](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell/asr1)


<a name="33"></a>
## 3.3 Inference

PP-ASR supports use `paddlespeech asr --model xxx --input xxx.wav` to use the pre-trained model to do model inference after install `paddlespeech` by `pip install paddlespeech`.

Specific supported functions include:

- Prediction of single audio
- Use the pipe to predict multiple audio
- Support RTF calculation

For specific usage, please refer to: [speech_recognition](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_recognition/README_cn.md) 


<a name="34"></a>
## 3.4 Service Deployment

PP-ASR supports the service deployment of streaming ASR. Support the simultaneous use of speech recognition and punctuation processing.

Demo of ASR Server: [streaming_asr_server](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/streaming_asr_server)

![image](https://user-images.githubusercontent.com/87408988/168255342-1fc790c0-16f4-4540-a861-db239076727c.png)

Display of using ASR server on Web page: [streaming_asr_demo_video](https://paddlespeech.readthedocs.io/en/latest/streaming_asr_demo_video.html)


For more information about service deployment, you can refer to the aistudio tutorial:
- [Streaming service - model part](https://aistudio.baidu.com/aistudio/projectdetail/3839884)
- [Streaming service](https://aistudio.baidu.com/aistudio/projectdetail/4017905)

<a name="35"></a>
## 3.5 Customized Auto Speech Recognition and Deployment

For customized auto speech recognition and deployment, PP-ASR provides feature extraction(fbank) => Inference model（Scoring Library）=> C++ program of TLG（WFST, token, lexion, grammer). For specific usage, please refer to: [speechx](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/speechx)   
If you want to quickly use it, you can refer to [custom_streaming_asr](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/custom_streaming_asr/README_cn.md)

For more information about customized auto speech recognition and deployment, you can refer to the aistudio tutorial:
- [Customized Auto Speech Recognition](https://aistudio.baidu.com/aistudio/projectdetail/4021561)


<a name="4"></a>

## 4. Quick Start

To use PP-ASR, you can see here [install](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md), It supplies three methods to install `paddlespeech`, which are **Easy**, **Medium** and **Hard**. If you want to experience the inference function of paddlespeech, you can use **Easy** installation method.


================================================
FILE: docs/source/asr/PPASR_cn.md
================================================
(简体中文|[English](./PPASR.md))
# PP-ASR

## 目录
- [1. 简介](#1)
- [2. 特点](#2)
- [3. 使用教程](#3)
    - [3.1 预训练模型](#31)
    - [3.2 模型训练](#32)
    - [3.3 模型推理](#33)
    - [3.4 服务部署](#33)
    - [3.5 支持个性化场景部署](#33)
- [4. 快速开始](#4)

<a name="1"></a>
## 1. 简介

PP-ASR 是一个 提供 ASR 功能的工具。其提供了多种中文和英文的模型，支持模型的训练，并且支持使用命令行的方式进行模型的推理。 PP-ASR 也支持流式模型的部署，以及个性化场景的部署。

<a name="2"></a>
## 2. 特点
语音识别的基本流程如下图所示：  
<center><img src=https://user-images.githubusercontent.com/87408988/168259962-cbe2008b-47b6-443d-9566-d77a5ca2eb25.png width="800" ></center>


PP-ASR 的主要特点如下：
-  提供在中/英文开源数据集 aishell （中文），wenetspeech（中文），librispeech （英文）上的预训练模型。模型包含 deepspeech2 模型以及 conformer/transformer 模型。
-  支持中/英文的模型训练功能。
-  支持命令行方式的模型推理，可使用 `paddlespeech asr --model xxx --input xxx.wav` 方式调用各个预训练模型进行推理。
-  支持流式 ASR 的服务部署，也支持输出时间戳。
-  支持个性化场景的部署。

<a name="3"></a>
## 3. 使用教程

<a name="31"></a>
## 3.1 预训练模型
支持的预训练模型列表：[released_model](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/released_model.md)。
其中效果较好的模型为 Ds2 Online Wenetspeech ASR0 Model 以及 Conformer Online Wenetspeech ASR1 Model。 两个模型都支持流式 ASR。
更多关于模型设计的部分，可以参考 AIStudio 教程：
- [Deepspeech2](https://aistudio.baidu.com/aistudio/projectdetail/3866807)
- [Transformer](https://aistudio.baidu.com/aistudio/projectdetail/3470110)

<a name="32"></a>
## 3.2 模型训练

模型的训练的参考脚本存放在 [examples](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples) 中，并按照 `examples/数据集/模型` 存放，数据集主要支持 aishell 和 librispeech，模型支持 deepspeech2 模型和 u2 (conformer/transformer) 模型。
具体的执行脚本的步骤记录在 `run.sh` 当中。具体可参考： [asr1](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell/asr1)


<a name="33"></a>
## 3.3 模型推理

PP-ASR 支持在使用`pip install paddlespeech`后 使用命令行的方式来使用预训练模型进行推理。

具体支持的功能包括：

- 对单条音频进行预测
- 使用管道的方式对多条音频进行预测
- 支持 RTF 的计算

具体的使用方式可以参考： [speech_recognition](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speech_recognition/README_cn.md) 


<a name="34"></a>
## 3.4 服务部署

PP-ASR 支持流式ASR的服务部署。支持 语音识别 + 标点处理两个功能同时使用。

server 的 demo： [streaming_asr_server](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/streaming_asr_server)

![image](https://user-images.githubusercontent.com/87408988/168255342-1fc790c0-16f4-4540-a861-db239076727c.png)

网页上使用 asr server 的效果展示：[streaming_asr_demo_video](https://paddlespeech.readthedocs.io/en/latest/streaming_asr_demo_video.html)

关于服务部署方面的更多资料，可以参考 AIStudio 教程：
- [流式服务-模型部分](https://aistudio.baidu.com/aistudio/projectdetail/3839884)
- [流式服务](https://aistudio.baidu.com/aistudio/projectdetail/4017905)

<a name="35"></a>
## 3.5 支持个性化场景部署

针对个性化场景部署，提供了特征提取（fbank） => 推理模型（打分库）=> TLG（WFST， token, lexion, grammer）的 C++ 程序。具体参考 [speechx](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/speechx)。  
如果想快速了解和使用，可以参考： [custom_streaming_asr](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/custom_streaming_asr/README_cn.md)

关于支持个性化场景部署的更多资料，可以参考 AIStudio 教程：
- [定制化识别](https://aistudio.baidu.com/aistudio/projectdetail/4021561)


<a name="4"></a>

## 4. 快速开始

关于如果使用 PP-ASR，可以看这里的 [install](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)，其中提供了 **简单**、**中等**、**困难** 三种安装方式。如果想体验 paddlespeech 的推理功能，可以用 **简单** 安装方式。


================================================
FILE: docs/source/asr/data_preparation.md
================================================
# Data Preparation

## Generate Manifest

*DeepSpeech2 on PaddlePaddle* accepts a textual **manifest** file as its data set interface. A manifest file summarizes a set of speech data, with each line containing some meta data (e.g. file path, transcription, duration) of one audio clip, in [JSON](http://www.json.org/) format, such as:

```
{"audio_filepath": "/home/work/.cache/paddle/Libri/134686/1089-134686-0001.flac", "duration": 3.275, "text": "stuff it into you his belly counselled him"}
{"audio_filepath": "/home/work/.cache/paddle/Libri/134686/1089-134686-0007.flac", "duration": 4.275, "text": "a cold lucid indifference reigned in his soul"}
```
To use your custom data, you only need to generate such manifest files to summarize the dataset. Given such summarized manifests, training, inference and all other modules can be aware of where to access the audio files, as well as their meta data including the transcription labels.

For how to generate such manifest files, please refer to `examples/librispeech/local/librispeech.py`, which will download data and generate manifest files for LibriSpeech dataset.

## Compute Mean & Stddev for Normalizer

To perform z-score normalization (zero-mean, unit stddev) upon audio features, we have to estimate in advance the mean and standard deviation of the features, with some training samples:

```bash
python3 utils/compute_mean_std.py \
--num_samples 2000 \
--spectrum_type linear \
--manifest_path examples/librispeech/data/manifest.train \
--output_path examples/librispeech/data/mean_std.npz
```

It will compute the mean and standard deviations of the power spectrum feature with 2000 random sampled audio clips listed in `examples/librispeech/data/manifest.train` and save the results to `examples/librispeech/data/mean_std.npz` for further usage.


## Build Vocabulary

A vocabulary of possible characters is required to convert the transcription into a list of token indices for training, and in decoding, to convert from a list of indices back to the text again. Such a character-based vocabulary can be built with `utils/build_vocab.py`.

```bash
python3 utils/build_vocab.py \
--count_threshold 0 \
--vocab_path examples/librispeech/data/eng_vocab.txt \
--manifest_paths examples/librispeech/data/manifest.train
```

It will write a vocabulary file `examples/librispeech/data/vocab.txt` with all transcription text in `examples/librispeech/data/manifest.train`, without vocabulary truncation (`--count_threshold 0`).


================================================
FILE: docs/source/asr/feature_list.md
================================================
# Features

### Dataset
* Aishell
* Librispeech
* THCHS30
* TIMIT

### Speech Recognition

* Non-Streaming
  * [Baidu's DeepSpeech2](http://proceedings.mlr.press/v48/amodei16.pdf)
  * [Transformer](https://arxiv.org/abs/1706.03762)
  * [Conformer](https://arxiv.org/abs/2005.08100)

* Streaming
  * [Baidu's DeepSpeech2](http://proceedings.mlr.press/v48/amodei16.pdf)
  * [U2](https://arxiv.org/pdf/2012.05481.pdf)

### Language Model

* Ngram

### Decoder

* ctc greedy
* ctc prefix beam search
* greedy
* beam search
* attention rescore

### Deployment

* Paddle Inference

### Aligment  

* MFA  
* CTC Alignment  

### Speech Frontend

* Audio
  * Auto Gain
* Feature
  * kaldi fbank
  * kaldi mfcc
  * linear
  * delta detla

### Speech Augmentation

* Audio
  - Volume Perturbation
  - Speed Perturbation
  - Shifting Perturbation
  - Online Bayesian normalization
  - Noise Perturbation
  - Impulse Response
* Spectrum
  - SpecAugment
  - Adaptive SpecAugment

### Tokenizer

* Chinese/English Character
* English Word
* Sentence Piece

### Word Segmentation

*  [mmseg](http://technology.chtsai.org/mmseg/)

### Grapheme To Phoneme

* syllable
* phoneme


================================================
FILE: docs/source/asr/models_introduction.md
================================================
# Models introduction
## Streaming DeepSpeech2
The implemented architecture of Deepspeech2 online model is based on [Deepspeech2 model](https://arxiv.org/pdf/1512.02595.pdf) with some changes.
The model is mainly composed of 2D convolution subsampling layers and stacked single-direction rnn layers.

To illustrate the model implementation clearly, 3 parts are described in detail.  
- Data Preparation
- Encoder
- Decoder

In addition, the training process and the testing process are also introduced.

The architecture of the model is shown in Fig.1.

<p align="center">
    <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/ds2onlineModel.png" width=800>
    <br/>Fig.1 The Arcitecture of deepspeech2 online model
</p>


### Data Preparation
#### Vocabulary
For English data, the vocabulary dictionary is composed of 26 English characters with " ' ", space, \<blank\> and \<eos\>. The \<blank\> represents the blank label in CTC, the \<unk\> represents the unknown character and the \<eos\> represents the start and the end characters. For mandarin, the vocabulary dictionary is composed of Chinese characters statistics from the training set, and three additional characters are added. The added characters are \<blank\>, \<unk\> and \<eos\>.  For both English and mandarin data, we set the default indexes that \<blank\>=0, \<unk\>=1 and \<eos\>= last index.
```
# The code to build vocabulary
cd examples/aishell/s0
python3 ../../../utils/build_vocab.py \
    --unit_type="char" \
    --count_threshold=0 \
    --vocab_path="data/vocab.txt" \
    --manifest_paths "data/manifest.train.raw" "data/manifest.dev.raw"

# vocabulary for aishell dataset (Mandarin)
vi examples/aishell/s0/data/vocab.txt

# vocabulary for librispeech dataset (English)
vi examples/librispeech/s0/data/vocab.txt
```

#### CMVN
For CMVN, a subset of or full of the training set is selected and be used to compute the feature mean and std.
```
# The code to compute the feature mean and std
cd examples/aishell/s0
python3 ../../../utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --spectrum_type="linear" \
    --delta_delta=false \
    --stride_ms=10.0 \
    --window_ms=20.0 \
    --sample_rate=16000 \
    --use_dB_normalization=True \
    --num_samples=2000 \
    --num_workers=10 \
    --output_path="data/mean_std.json"

```

#### Feature Extraction
For feature extraction, three methods are implemented, which are linear (FFT without using filter bank), fbank and mfcc.
Currently, the released deepspeech2 online model uses the linear feature extraction method.
```
The code for feature extraction
vi paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
```

### Encoder
The encoder is composed of two 2D convolution subsampling layers and several stacked single-direction rnn layers. The 2D convolution subsampling layers extract feature representation from the raw audio feature and reduce the length of the audio feature at the same time. After passing through the convolution subsampling layers, then the feature representation is input into the stacked rnn layers. For the stacked rnn layers, LSTM cell and GRU cell are provided to use. Adding one fully connected (fc) layer after the stacked rnn layers are optional. If the number of stacked rnn layers is less than 5, adding one fc layer after stacked rnn layers are recommended.

The code of Encoder is in:
```
vi paddlespeech/s2t/models/ds2_online/deepspeech2.py
```

### Decoder
To get the character possibilities of each frame, the feature representation of each frame output from the encoder is input into a projection layer which is implemented as a dense layer to do feature projection. The output dim of the projection layer is the same as the vocabulary size. After the projection layer, the softmax function is used to transform the frame-level feature representation be the possibilities of characters. While making model inference, the character possibilities of each frame are input into the CTC decoder to get the final speech recognition results.

The code of the decoder is in:
```
# The code of constructing the decoder in model
vi paddlespeech/s2t/models/ds2_online/deepspeech2.py
# The code of CTC Decoder
vi paddlespeech/s2t/modules/ctc.py
```

### Training Process
Using the command below, you can train the deepspeech2 online model.
```
cd examples/aishell/s0
bash run.sh --stage 0 --stop_stage 2 --model_type online --conf_path conf/deepspeech2_online.yaml
```
The detail commands are:
```  
# The code for training in the run.sh
set -e
source path.sh

gpus=2,3,5,7
stage=0
stop_stage=5
conf_path=conf/deepspeech2_online.yaml     # conf/deepspeech2.yaml | conf/deepspeech2_online.yaml
avg_num=1
model_type=online    # online | offline

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt} ${model_type}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh exp/${ckpt}/checkpoints ${avg_num}
fi
```
By using the command above, the training process can be started. There are 5 stages in "run.sh", and the first 3 stages are used for the training process. Stage 0 is used for data preparation, in which the dataset will be downloaded, and the manifest files of the datasets, vocabulary dictionary, and CMVN file will be generated in "./data/". Stage 1 is used for training the model, the log files and model checkpoint are saved in "exp/deepspeech2_online/". Stage 2 is used to generate the final model for predicting by averaging the top-k model parameters based on validation loss.  

### Testing Process
Using the command below, you can test the deepspeech2 online model.
```
bash run.sh --stage 3 --stop_stage 5 --model_type online --conf_path conf/deepspeech2_online.yaml
```
The detail commands are:
```
conf_path=conf/deepspeech2_online.yaml
avg_num=1
model_type=online
avg_ckpt=avg_${avg_num}

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=2 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type}|| exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # export ckpt avg_n
    CUDA_VISIBLE_DEVICES=5 ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # test export ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test_export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}|| exit -1
fi
```
After the training process, we use stages 3,4,5 for the testing process. Stage 3 is for testing the model generated in stage 2 and provided the CER index of the test set. Stage 4 is for transforming the model from a dynamic graph to a static graph by using "paddle.jit" library. Stage 5 is for testing the model in a static graph.

## Non-Streaming DeepSpeech2
The deepspeech2 offline model is similar to the deepspeech2 online model. The main difference between them is the offline model uses the stacked bi-directional rnn layers while the online model uses the single direction rnn layers and the fc layer is not used. For the stacked bi-directional rnn layers in the offline model, the rnn cell and gru cell are provided to use.

The arcitecture of the model is shown in Fig.2.
<p align="center">
    <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/ds2offlineModel.png" width=800>
    <br/>Fig.2 The Arcitecture of deepspeech2 offline model
</p>


For data preparation and decoder, the deepspeech2 offline model is the same as the deepspeech2 online model.

The code of encoder and decoder for deepspeech2 offline model is in:
```
vi paddlespeech/s2t/models/ds2/deepspeech2.py
```

The training process and testing process of deepspeech2 offline model is very similar to deepspeech2 online model.
Only some changes should be noticed.

For training and testing, the "model_type" and the "conf_path" must be set.
```
# Training offline
cd examples/aishell/s0
bash run.sh --stage 0 --stop_stage 2 --model_type offline --conf_path conf/deepspeech2.yaml
 ```
```
# Testing offline
cd examples/aishell/s0
bash run.sh --stage 3 --stop_stage 5 --model_type offline --conf_path conf/deepspeech2.yaml
```


================================================
FILE: docs/source/asr/ngram_lm.md
================================================
# Ngram LM

## Prepare Language Model

A language model is required to improve the decoder's performance. We have prepared two language models (with lossy compression) for users to download and try. One is for English and the other is for Mandarin. The bash script to download LM is example's `local/download_lm_*.sh`.

For example, users can simply run this to download the prepared mandarin language models:

```bash
cd examples/aishell
source path.sh
bash local/download_lm_ch.sh
```
If you wish to train your own better language model, please refer to [KenLM](https://github.com/kpu/kenlm) for tutorials.
Here we provide some tips to show how we prepare our English and Mandarin language models.
You can take it as a reference when you train your own.

### English LM

The English corpus is from the [Common Crawl Repository](http://commoncrawl.org) and you can download it from [statmt](http://data.statmt.org/ngrams/deduped_en). We use part en.00 to train our English language model. There are some preprocessing steps before training:

  * Characters not in \['A-Za-z0-9\s'\] (\s represents whitespace characters) are removed and Arabic numbers are converted to English numbers like 1000 to one thousand.
  * Repeated whitespace characters are squeezed to one and the beginning whitespace characters are removed. Notice that all transcriptions are lowercase, so all characters are converted to lowercase.
  * Top 400,000 most frequent words are selected to build the vocabulary and the rest are replaced with 'UNKNOWNWORD'.

Now the preprocessing is done and we get a clean corpus to train the language model. Our released language model is trained with arguments '-o 5 --prune 0 1 1 1 1'. '-o 5' means the max order of the language model is 5. '--prune 0 1 1 1 1' represents count thresholds for each order and more specifically it will prune singletons for orders two and higher. To save disk storage we convert the ARPA file to 'trie' binary file with arguments '-a 22 -q 8 -b 8'. '-a' represents the maximum number of leading bits of pointers in 'trie' to chop. '-q -b' are quantization parameters for probability and backoff.

### Mandarin LM

Different from the English language model, the Mandarin language model is character-based where each token is a Chinese character. We use the internal corpus to train the released Mandarin language models. The corpus contains billions of tokens. The preprocessing has a tiny difference from the English language model and the main steps include:

  * The beginning and trailing whitespace characters are removed.
  * English punctuations and Chinese punctuations are removed.
  * A whitespace character between two tokens is inserted.

Please notice that the released language models only contain Chinese simplified characters. After preprocessing is done we can begin to train the language model. The key training arguments for small LM are '-o 5 --prune 0 1 2 4 4' and '-o 5' for large LM. Please refer above section for the meaning of each argument. We also convert the ARPA file to a binary file using default settings.


================================================
FILE: docs/source/asr/quick_start.md
================================================
# Quick Start of Speech-to-Text
Several shell scripts provided in `./examples/tiny/local` will help us to quickly give it a try, for most major modules, including data preparation, model training, case inference, and model evaluation, with a few public datasets (e.g. [LibriSpeech](http://www.openslr.org/12/), [Aishell](http://www.openslr.org/33)). Reading these examples will also help you to understand how to make it work with your data.

Some of the scripts in `./examples` are not configured with GPUs. If you want to train with 8 GPUs, please modify `CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7`. If you don't have any GPU available, please set `CUDA_VISIBLE_DEVICES=` to use CPUs instead. Besides, if an out-of-memory problem occurs, just reduce `batch_size` to fit.

Let's take a tiny sampled subset of [LibriSpeech dataset](http://www.openslr.org/12/) for instance.

- Go to the directory

    ```bash
    cd examples/tiny
    ```
    Notice that this is only a toy example with a tiny sampled subset of LibriSpeech. If you would like to try with the complete dataset (would take several days for training), please go to `examples/librispeech` instead.
- Source env
    ```bash
    source path.sh
    ```
    **Must do this before you start to do anything.**
    Set `MAIN_ROOT` as project dir. Using the default `deepspeech2` model as `MODEL`, you can change this in the script.
- Main entry point
    ```bash
    bash run.sh
    ```
    This is just a demo, please make sure every `step` works well before the next `step`.

More detailed information is provided in the following sections. Wish you a happy journey with the *DeepSpeech on PaddlePaddle* ASR engine!

## Training a model
The key steps of training for the Mandarin language are the same as that of the English language and we have also provided an example for Mandarin training with Aishell in `examples/aishell/local`. As mentioned above, please execute `sh data.sh`, `sh train.sh` and `sh test.sh` to do data preparation, training, and testing correspondingly.

## Evaluate a Model
To evaluate a model's performance quantitatively, please run:
```bash
CUDA_VISIBLE_DEVICES=0 bash local/test.sh
```
The error rate (default: word error rate; can be set with `error_rate_type`) will be printed.

We provide two types of CTC decoders: *CTC greedy decoder* and *CTC beam search decoder*. The *CTC greedy decoder* is an implementation of the simple best-path decoding algorithm, selecting at each timestep the most likely token, thus being greedy and locally optimal. The [*CTC beam search decoder*](https://arxiv.org/abs/1408.2873) otherwise utilizes a heuristic breadth-first graph search for reaching near-global optimality; it also requires a pre-trained KenLM language model for better scoring and ranking. The decoder type can be set with the argument `decoding_method`.


================================================
FILE: docs/source/audio/_static/custom.css
================================================
.wy-nav-content {
    max-width: 80%;
}
.table table{ background:#b9b9b9} 
.table table td{ background:#FFF; } 


================================================
FILE: docs/source/audio/_templates/module.rst_t
================================================
{%- if show_headings %}
{{- basename | e | heading }}

{% endif -%}
.. automodule:: {{ qualname }}
{%- for option in automodule_options %}
   :{{ option }}:
{%- endfor %}


================================================
FILE: docs/source/audio/_templates/package.rst_t
================================================
{%- macro automodule(modname, options) -%}
.. automodule:: {{ modname }}
{%- for option in options %}
   :{{ option }}:
{%- endfor %}
{%- endmacro %}

{%- macro toctree(docnames) -%}
.. toctree::
   :maxdepth: {{ maxdepth }}
{% for docname in docnames %}
   {{ docname }}
{%- endfor %}
{%- endmacro %}

{%- if is_namespace %}
{{- [pkgname, "namespace"] | join(" ") | e | heading }}
{% else %}
{{- pkgname | e | heading }}
{% endif %}

{%- if is_namespace %}
.. py:module:: {{ pkgname }}
{% endif %}

{%- if modulefirst and not is_namespace %}
{{ automodule(pkgname, automodule_options) }}
{% endif %}

{%- if subpackages %}
Subpackages
-----------

{{ toctree(subpackages) }}
{% endif %}

{%- if submodules %}
Submodules
----------
{% if separatemodules %}
{{ toctree(submodules) }}
{% else %}
{%- for submodule in submodules %}
{% if show_headings %}
{{- submodule | e | heading(2) }}
{% endif %}
{{ automodule(submodule, automodule_options) }}
{% endfor %}
{%- endif %}
{%- endif %}

{%- if not modulefirst and not is_namespace %}
Module contents
---------------

{{ automodule(pkgname, automodule_options) }}
{% endif %}


================================================
FILE: docs/source/audio/_templates/toc.rst_t
================================================
{{ header | heading }}

.. toctree::
   :maxdepth: {{ maxdepth }}
{% for docname in docnames %}
   {{ docname }}
{%- endfor %}


================================================
FILE: docs/source/audio/conf.py
================================================
# -*- coding: utf-8 -*-
#
# Configuration file for the Sphinx documentation builder.
#
# This file does only contain a selection of the most common options. For a
# full list see the documentation:
# http://www.sphinx-doc.org/en/master/config
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import os
import sys
sys.path.insert(0, os.path.abspath('../..'))

# -- Project information -----------------------------------------------------

project = 'PaddleAudio'
copyright = '2022, PaddlePaddle'
author = 'PaddlePaddle'

# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = '0.2.0'

# -- General configuration ---------------------------------------------------

# If your documentation needs a minimal Sphinx version, state it here.
#
# needs_sphinx = '1.0'

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.intersphinx',
    'sphinx.ext.mathjax',
    'sphinx.ext.viewcode',
    'sphinx.ext.napoleon',
]

napoleon_google_docstring = True

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'

# The master toctree document.
master_doc = 'index'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = []

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = None

# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#

import sphinx_rtd_theme
html_theme = 'sphinx_rtd_theme'
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
smartquotes = False

# Theme options are theme-specific and customize the look and feel of a theme
# further.  For a list of options available for each theme, see the
# documentation.
#
# html_theme_options = {}

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_logo = '../images/paddle.png'
html_css_files = [
    'custom.css',
]

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# The default sidebars (for documents that don't match any pattern) are
# defined by theme itself.  Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``.
#
# html_sidebars = {}

# -- Options for HTMLHelp output ---------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'PaddleAudiodoc'

# -- Options for LaTeX output ------------------------------------------------

latex_elements = {
    # The paper size ('letterpaper' or 'a4paper').
    #
    # 'papersize': 'letterpaper',

    # The font size ('10pt', '11pt' or '12pt').
    #
    # 'pointsize': '10pt',

    # Additional stuff for the LaTeX preamble.
    #
    # 'preamble': '',

    # Latex figure (float) alignment
    #
    # 'figure_align': 'htbp',
}

# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
#  author, documentclass [howto, manual, or own class]).
latex_documents = [
    (master_doc, 'PaddleAudio.tex', 'PaddleAudio Documentation', 'PaddlePaddle',
     'manual'),
]

# -- Options for manual page output ------------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [(master_doc, 'paddleaudio', 'PaddleAudio Documentation', [author],
              1)]

# -- Options for Texinfo output ----------------------------------------------

# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
#  dir menu entry, description, category)
texinfo_documents = [
    (master_doc, 'PaddleAudio', 'PaddleAudio Documentation', author,
     'PaddleAudio', 'One line description of project.', 'Miscellaneous'),
]

# -- Options for Epub output -------------------------------------------------

# Bibliographic Dublin Core info.
epub_title = project

# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#
# epub_identifier = ''

# A unique identification for the text.
#
# epub_uid = ''

# A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html']

# -- Extension configuration -------------------------------------------------

# -- Options for intersphinx extension ---------------------------------------

# Example configuration for intersphinx: refer to the Python standard library.
intersphinx_mapping = {'https://docs.python.org/': None}


================================================
FILE: docs/source/audio/index.rst
================================================
.. PaddleAudio documentation master file, created by
   sphinx-quickstart on Tue Mar 22 15:57:16 2022.
   You can adapt this file completely to your liking, but it should at least
   contain the root `toctree` directive.

Welcome to PaddleAudio's documentation!
=======================================

.. toctree::
   :maxdepth: 1

   Index <self>


API References
--------------

.. toctree::
   :maxdepth: 2
   :titlesonly:

   paddleaudio

================================================
FILE: docs/source/audio_api/modules.rst
================================================
audio
=====

.. toctree::
   :maxdepth: 4

   paddleaudio


================================================
FILE: docs/source/audio_api/paddleaudio.backends.common.rst
================================================
paddleaudio.backends.common module
==================================

.. automodule:: paddleaudio.backends.common
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.backends.no_backend.rst
================================================
paddleaudio.backends.no\_backend module
=======================================

.. automodule:: paddleaudio.backends.no_backend
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.backends.rst
================================================
paddleaudio.backends package
============================

.. automodule:: paddleaudio.backends
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddleaudio.backends.common
   paddleaudio.backends.no_backend
   paddleaudio.backends.soundfile_backend
   paddleaudio.backends.sox_io_backend
   paddleaudio.backends.utils


================================================
FILE: docs/source/audio_api/paddleaudio.backends.soundfile_backend.rst
================================================
paddleaudio.backends.soundfile\_backend module
==============================================

.. automodule:: paddleaudio.backends.soundfile_backend
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.backends.sox_io_backend.rst
================================================
paddleaudio.backends.sox\_io\_backend module
============================================

.. automodule:: paddleaudio.backends.sox_io_backend
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.backends.utils.rst
================================================
paddleaudio.backends.utils module
=================================

.. automodule:: paddleaudio.backends.utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.compliance.kaldi.rst
================================================
paddleaudio.compliance.kaldi module
===================================

.. automodule:: paddleaudio.compliance.kaldi
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.compliance.librosa.rst
================================================
paddleaudio.compliance.librosa module
=====================================

.. automodule:: paddleaudio.compliance.librosa
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.compliance.rst
================================================
paddleaudio.compliance package
==============================

.. automodule:: paddleaudio.compliance
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddleaudio.compliance.kaldi
   paddleaudio.compliance.librosa


================================================
FILE: docs/source/audio_api/paddleaudio.datasets.dataset.rst
================================================
paddleaudio.datasets.dataset module
===================================

.. automodule:: paddleaudio.datasets.dataset
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.datasets.esc50.rst
================================================
paddleaudio.datasets.esc50 module
=================================

.. automodule:: paddleaudio.datasets.esc50
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.datasets.gtzan.rst
================================================
paddleaudio.datasets.gtzan module
=================================

.. automodule:: paddleaudio.datasets.gtzan
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.datasets.hey_snips.rst
================================================
paddleaudio.datasets.hey\_snips module
======================================

.. automodule:: paddleaudio.datasets.hey_snips
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.datasets.rirs_noises.rst
================================================
paddleaudio.datasets.rirs\_noises module
========================================

.. automodule:: paddleaudio.datasets.rirs_noises
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.datasets.rst
================================================
paddleaudio.datasets package
============================

.. automodule:: paddleaudio.datasets
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddleaudio.datasets.dataset
   paddleaudio.datasets.esc50
   paddleaudio.datasets.gtzan
   paddleaudio.datasets.hey_snips
   paddleaudio.datasets.rirs_noises
   paddleaudio.datasets.tess
   paddleaudio.datasets.urban_sound
   paddleaudio.datasets.voxceleb


================================================
FILE: docs/source/audio_api/paddleaudio.datasets.tess.rst
================================================
paddleaudio.datasets.tess module
================================

.. automodule:: paddleaudio.datasets.tess
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.datasets.urban_sound.rst
================================================
paddleaudio.datasets.urban\_sound module
========================================

.. automodule:: paddleaudio.datasets.urban_sound
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.datasets.voxceleb.rst
================================================
paddleaudio.datasets.voxceleb module
====================================

.. automodule:: paddleaudio.datasets.voxceleb
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.features.layers.rst
================================================
paddleaudio.features.layers module
==================================

.. automodule:: paddleaudio.features.layers
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.features.rst
================================================
paddleaudio.features package
============================

.. automodule:: paddleaudio.features
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddleaudio.features.layers


================================================
FILE: docs/source/audio_api/paddleaudio.functional.functional.rst
================================================
paddleaudio.functional.functional module
========================================

.. automodule:: paddleaudio.functional.functional
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.functional.rst
================================================
paddleaudio.functional package
==============================

.. automodule:: paddleaudio.functional
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddleaudio.functional.functional
   paddleaudio.functional.window


================================================
FILE: docs/source/audio_api/paddleaudio.functional.window.rst
================================================
paddleaudio.functional.window module
====================================

.. automodule:: paddleaudio.functional.window
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.kaldi.kaldi.rst
================================================
paddleaudio.kaldi.kaldi module
==============================

.. automodule:: paddleaudio.kaldi.kaldi
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.kaldi.rst
================================================
paddleaudio.kaldi package
=========================

.. automodule:: paddleaudio.kaldi
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddleaudio.kaldi.kaldi


================================================
FILE: docs/source/audio_api/paddleaudio.metric.eer.rst
================================================
paddleaudio.metric.eer module
=============================

.. automodule:: paddleaudio.metric.eer
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.metric.rst
================================================
paddleaudio.metric package
==========================

.. automodule:: paddleaudio.metric
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddleaudio.metric.eer


================================================
FILE: docs/source/audio_api/paddleaudio.rst
================================================
paddleaudio package
===================

.. automodule:: paddleaudio
   :members:
   :undoc-members:
   :show-inheritance:

Subpackages
-----------

.. toctree::
   :maxdepth: 4

   paddleaudio.backends
   paddleaudio.compliance
   paddleaudio.datasets
   paddleaudio.features
   paddleaudio.functional
   paddleaudio.kaldi
   paddleaudio.metric
   paddleaudio.sox_effects
   paddleaudio.utils


================================================
FILE: docs/source/audio_api/paddleaudio.sox_effects.rst
================================================
paddleaudio.sox\_effects package
================================

.. automodule:: paddleaudio.sox_effects
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddleaudio.sox_effects.sox_effects


================================================
FILE: docs/source/audio_api/paddleaudio.sox_effects.sox_effects.rst
================================================
paddleaudio.sox\_effects.sox\_effects module
============================================

.. automodule:: paddleaudio.sox_effects.sox_effects
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.utils.download.rst
================================================
paddleaudio.utils.download module
=================================

.. automodule:: paddleaudio.utils.download
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.utils.env.rst
================================================
paddleaudio.utils.env module
============================

.. automodule:: paddleaudio.utils.env
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.utils.error.rst
================================================
paddleaudio.utils.error module
==============================

.. automodule:: paddleaudio.utils.error
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.utils.log.rst
================================================
paddleaudio.utils.log module
============================

.. automodule:: paddleaudio.utils.log
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.utils.numeric.rst
================================================
paddleaudio.utils.numeric module
================================

.. automodule:: paddleaudio.utils.numeric
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.utils.rst
================================================
paddleaudio.utils package
=========================

.. automodule:: paddleaudio.utils
   :members:
   :undoc-members:
   :show-inheritance:

Submodules
----------

.. toctree::
   :maxdepth: 4

   paddleaudio.utils.download
   paddleaudio.utils.env
   paddleaudio.utils.error
   paddleaudio.utils.log
   paddleaudio.utils.numeric
   paddleaudio.utils.sox_utils
   paddleaudio.utils.tensor_utils
   paddleaudio.utils.time


================================================
FILE: docs/source/audio_api/paddleaudio.utils.sox_utils.rst
================================================
paddleaudio.utils.sox\_utils module
===================================

.. automodule:: paddleaudio.utils.sox_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.utils.tensor_utils.rst
================================================
paddleaudio.utils.tensor\_utils module
======================================

.. automodule:: paddleaudio.utils.tensor_utils
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/audio_api/paddleaudio.utils.time.rst
================================================
paddleaudio.utils.time module
=============================

.. automodule:: paddleaudio.utils.time
   :members:
   :undoc-members:
   :show-inheritance:


================================================
FILE: docs/source/cls/custom_dataset.md
================================================
# Customize Dataset for Audio Classification

Following this tutorial you can customize your dataset for audio classification task by using `paddlespeech`.

A base class of classification dataset is `paddlespeech.audio.datasets.dataset.AudioClassificationDataset`. To customize your dataset you should write a dataset class derived from `AudioClassificationDataset`. 

Assuming you have some wave files that stored in your own directory. You should prepare a meta file with the information of filepaths and labels. For example the absolute path of it is `/PATH/TO/META_FILE.txt`:
```
/PATH/TO/WAVE_FILE/1.wav cat
/PATH/TO/WAVE_FILE/2.wav cat
/PATH/TO/WAVE_FILE/3.wav dog
/PATH/TO/WAVE_FILE/4.wav dog
```
Here is an example to build your custom dataset in `custom_dataset.py`:

```python
from paddlespeech.audio.datasets.dataset import AudioClassificationDataset

class CustomDataset(AudioClassificationDataset):
    meta_file = '/PATH/TO/META_FILE.txt'
    # List all the class labels
    label_list = [
        'cat',
        'dog',
    ]

    def __init__(self, **kwargs):
        files, labels = self._get_data()
        super(CustomDataset, self).__init__(
            files=files, labels=labels, feat_type='raw', **kwargs)

    def _get_data(self):
        '''
        This method offer information of wave files and labels.
        '''
        files = []
        labels = []

        with open(self.meta_file) as f:
            for line in f:
                file, label_str = line.strip().split(' ')
                files.append(file)
                labels.append(self.label_list.index(label_str))

        return files, labels
```

Then you can build dataset and data loader from `CustomDataset`:
```python
import paddle
from paddlespeech.audio.transform.spectrogram import LogMelSpectrogram

from custom_dataset import CustomDataset

# Feature config should be align with pretrained model
sample_rate = 32000
feat_conf = {
  'sr': sample_rate,
  'n_fft': 1024,
  'hop_length': 320,
  'window': 'hann',
  'win_length': 1024,
  'f_min': 50.0,
  'f_max': 14000.0,
  'n_mels': 64,
}

train_ds = CustomDataset(sample_rate=sample_rate)
feature_extractor = LogMelSpectrogram(**feat_conf)

train_sampler = paddle.io.DistributedBatchSampler(
    train_ds, batch_size=4, shuffle=True, drop_last=False)
train_loader = paddle.io.DataLoader(
    train_ds,
    batch_sampler=train_sampler,
    return_list=True,
    use_buffer_reader=True)
```

Train model with `CustomDataset`:
```python
from paddlespeech.cls.models import cnn14
from paddlespeech.cls.models import SoundClassifier

backbone = cnn14(pretrained=True, extract_embedding=True)
model = SoundClassifier(backbone, num_class=len(train_ds.label_list))
optimizer = paddle.optimizer.Adam(
    learning_rate=1e-6, parameters=model.parameters())
criterion = paddle.nn.loss.CrossEntropyLoss()

steps_per_epoch = len(train_sampler)
epochs = 10
for epoch in range(1, epochs + 1):
    model.train()

    for batch_idx, batch in enumerate(train_loader):
        waveforms, labels = batch
        # Need a padding when lengths of waveforms differ in a batch.
        feats = feature_extractor(waveforms)        
        feats = paddle.transpose(feats, [0, 2, 1])
        logits = model(feats)
        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()
        if isinstance(optimizer._learning_rate,
                        paddle.optimizer.lr.LRScheduler):
            optimizer._learning_rate.step()
        optimizer.clear_grad()

        # Calculate loss
        avg_loss = float(loss)

        # Calculate metrics
        preds = paddle.argmax(logits, axis=1)
        num_corrects = (preds == labels).numpy().sum()
        num_samples = feats.shape[0]

        avg_acc = num_corrects / num_samples

        print_msg = 'Epoch={}/{}, Step={}/{}'.format(
            epoch, epochs, batch_idx + 1, steps_per_epoch)
        print_msg += ' loss={:.4f}'.format(avg_loss)
        print_msg += ' acc={:.4f}'.format(avg_acc)
        print_msg += ' lr={:.6f}'.format(optimizer.get_lr())
        print(print_msg)
```

If you want to save the checkpoint of model and evaluate from a specific dataset, please see `paddlespeech/cli/exp/panns/train.py` for more details.


================================================
FILE: docs/source/cls/quick_start.md
================================================
# Quick Start of Audio Classification
Several shell scripts provided in `./examples/esc50/cls0` will help us to quickly give it a try, for most major modules, including data preparation, model training, model evaluation, with [ESC50](ttps://github.com/karolpiczak/ESC-50) dataset.

Some of the scripts in `./examples` are not configured with GPUs. If you want to train with 8 GPUs, please modify `CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7`. If you don't have any GPU available, please set `CUDA_VISIBLE_DEVICES=` to use CPUs instead.

Let's start a audio classification task with the following steps:

- Go to the directory

    ```bash
    cd examples/esc50/cls0
    ```

- Source env
    ```bash
    source path.sh
    ```

- Main entry point
    ```bash
    CUDA_VISIBLE_DEVICES=0 ./run.sh 1
    ```

This demo includes fine-tuning, evaluating and deploying a audio classificatio model. More detailed information is provided in the following sections. 

## Fine-tuning a model
PANNs([PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition](https://arxiv.org/pdf/1912.10211.pdf)) are pretrained models with [Audioset](https://research.google.com/audioset/). They can be easily used to extract audio embeddings for audio classification task.

To start a model fine-tuning, please run:
```bash
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
feat_backend=numpy
./local/train.sh ${ngpu} ${feat_backend}
```

## Deploy a model
Once you save a model checkpoint, you can export it to static graph and deploy by python scirpt:

- Export to a static graph
    ```bash
    ./local/export.sh ${ckpt_dir} ./export
    ```
    The argument `ckpt_dir` should be a directory in which a model checkpoint stored, for example `checkpoint/epoch_50`.

    The static graph will be exported to `./export`.

- Inference
    ```bash
    ./local/static_model_infer.sh ${infer_device} ./export ${audio_file}
    ```
    The argument `infer_device` can be `cpu` or `gpu`, and it means which device to be used to infer. And `audio_file` should be a wave file with name `*.wav`.


================================================
FILE: docs/source/conf.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import os
import sys

import recommonmark.parser
import sphinx_rtd_theme
sys.path.insert(0, os.path.abspath('../..'))
sys.path.insert(0, os.path.abspath('../../audio'))


autodoc_mock_imports = ["soundfile", "librosa"]

# -- Project information -----------------------------------------------------

project = 'paddle speech'
copyright = '2021, paddlespeech-developers'
author = 'paddlespeech-developers'

# The full version, including alpha/beta/rc tags
release = '2.1'

# -- General configuration ---------------------------------------------------
source_parsers = {
    '.md': recommonmark.parser.CommonMarkParser,
}
source_suffix = ['.rst', '.md']

master_doc = 'index'
pygments_style = 'sphinx'
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.viewcode',
    "sphinx_rtd_theme",
    'sphinx.ext.mathjax',
    'numpydoc',
    'sphinx.ext.autosummary',
    'myst_parser',
]

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']

# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = [
    '_build',
]
# -- Options for HTML output -------------------------------------------------

# The theme to use for HTML and HTML Help pages.  See the documentation for
# a list of builtin themes.
#

html_theme = 'sphinx_rtd_theme'
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
smartquotes = False

# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_logo = '../images/paddle.png'
html_css_files = [
    'custom.css',
]

# -- Extension configuration -------------------------------------------------
# numpydoc_show_class_members = False


================================================
FILE: docs/source/demo_video.rst
================================================
Demo Video 
==================

.. raw:: html
    
    <video controls width="1024">

    <source src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/PaddleSpeech_Demo.mp4"
            type="video/mp4">

    Sorry, your browser doesn't support embedded videos.
    </video>


================================================
FILE: docs/source/dependencies.md
================================================
# The Dependencies
## By apt-get
### The base dependencies:
```
bc flac jq vim tig tree pkg-config libsndfile1 libflac-dev libvorbis-dev libboost-dev swig python3-dev
```
### The dependencies of kenlm:
```
build-essential cmake libboost-system-dev libboost-thread-dev libboost-program-options-dev libboost-test-dev libeigen3-dev zlib1g-dev libbz2-dev liblzma-dev gcc-5 g++-5
```
### The dependencies of sox:
```
libvorbis-dev libmp3lame-dev libmad-ocaml-dev
```
## By make or setup
```
kenlm
sox
mfa
openblas
kaldi
sctk
AutoLog
swig-decoder
python_kaldi_features
```


================================================
FILE: docs/source/index.rst
================================================
Welcome to PaddleSpeech
==============================================

**PaddleSpeech** is a Speech toolkits implemented by paddlepaddle.


Contents
--------

.. toctree::
   :maxdepth: 1
   :caption: Introduction

   introduction

.. toctree::
   :maxdepth: 1
   :caption: Quick Start

   install
   asr/quick_start
   tts/quick_start
   
.. toctree::
   :maxdepth: 1
   :caption: Speech-to-Text

   asr/models_introduction
   asr/data_preparation
   asr/feature_list
   asr/ngram_lm

.. toctree::
   :maxdepth: 1
   :caption: Text-to-Speech

   tts/basic_usage
   tts/advanced_usage
   tts/zh_text_frontend
   tts/models_introduction
   tts/gan_vocoder
   tts/demo
   tts/demo_2
   

.. toctree::
   :maxdepth: 1
   :caption: Released Models

   released_model

.. toctree::
   :maxdepth: 1
   :caption: Demos

   demo_video
   streaming_asr_demo_video
   tts_demo_video
   streaming_tts_demo_video


.. toctree::
   :maxdepth: 1
   :caption: Acknowledgement

   asr/reference


.. toctree::
   :maxdepth: 2
   :caption: API Reference

   paddleaudio <audio_api/paddleaudio>
   paddlespeech.audio <api/paddlespeech.audio>
   paddlespeech.cli <api/paddlespeech.cli>
   paddlespeech.cls <api/paddlespeech.cls>
   paddlespeech.kws <api/paddlespeech.kws>
   paddlespeech.resource <api/paddlespeech.resource>
   paddlespeech.s2t <api/paddlespeech.s2t>
   paddlespeech.server <api/paddlespeech.server>
   paddlespeech.t2s <api/paddlespeech.t2s>
   paddlespeech.text <api/paddlespeech.text>
   paddlespeech.utils <api/ppaddlespeech.utils>
   paddlespeech.vector <api/paddlespeech.vector>


================================================
FILE: docs/source/install.md
================================================
([简体中文](./install_cn.md)|English)
# Installation
There are 3 ways to use `PaddleSpeech`. According to the degree of difficulty, the 3 ways can be divided into **Easy**, **Medium**, and **Hard**. You can choose one of the 3 ways to install `PaddleSpeech`.

| Way | Function                                                     | Support|
|:---- |:----------------------------------------------------------- |:----|
| Easy     | (1) Use command-line functions of PaddleSpeech. <br> (2) Experience PaddleSpeech on Ai Studio. | Linux, Mac(not support M1 chip)，Windows ( For more information about installation, see [#1195](https://github.com/PaddlePaddle/PaddleSpeech/discussions/1195)) |
| Medium     | Support major functions ，such as using the` ready-made `examples and using PaddleSpeech to train your model.                                           | Linux, Mac(not support M1 chip, not support training models)，Windows (not support training models) |
| Hard     | Support full function of Paddlespeech, including using join ctc decoder with kaldi([asr2](../../examples/librispeech/asr2 )), training n-gram language model, Montreal-Forced-Aligner, and so on. And you are more able to be a developer! | Ubuntu |

## Prerequisites
- Python >= 3.7
- PaddlePaddle latest version (please refer to the [Installation Guide](https://www.paddlepaddle.org.cn/documentation/docs/en/beginners_guide/index_en.html))
- C++ compilation environment
- Tip: For Linux and Mac, do not use command `sh` instead of command `bash` in installation document.
- Tip: We recommand you to install `paddlepaddle` from https://mirror.baidu.com/pypi/simple and install `paddlespeech` from https://pypi.tuna.tsinghua.edu.cn/simple. 

## Easy: Get the Basic Function (Support Linux, Mac, and Windows)
- If you are newer to `PaddleSpeech` and want to experience it easily without your machine. We recommend you to use [AI Studio](https://aistudio.baidu.com/aistudio/index) to experience it. There is a step-by-step [tutorial](https://aistudio.baidu.com/aistudio/education/group/info/25130) for `PaddleSpeech`, and you can use the basic function of `PaddleSpeech` with a free machine.
- If you want to use the command line function of Paddlespeech, you need to complete the following steps to install `PaddleSpeech`. For more information about how to use the command line function, you can see the [cli](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/cli).
### Install Conda
Conda is a management system of the environment. You can go to [miniconda](https://docs.conda.io/en/latest/miniconda.html)  (select a version py>=3.7) to download and install the conda.
And then Install  conda dependencies for `paddlespeech` :

```bash
conda install -y -c conda-forge sox libsndfile bzip2
```
### Install C++ Compilation Environment 
(If you already have C++ compilation environment, you can miss this step.)
#### Windows
You need to install `Visual Studio` to make the C++ compilation environment.

https://visualstudio.microsoft.com/visual-cpp-build-tools/

You can also see [#1195](https://github.com/PaddlePaddle/PaddleSpeech/discussions/1195) for more help.

#### Mac
```bash
brew install gcc
```
#### Linux
```bash
#  centos
sudo yum install gcc gcc-c++
```
```bash
# ubuntu
sudo apt install build-essential
```
```bash
# Others
conda install -y -c gcc_linux-64=8.4.0 gxx_linux-64=8.4.0
```
### Install PaddleSpeech 
Some users may fail to install `kaldiio` due to the default download source, you can install `pytest-runner` at first；
```bash
pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple
```
Then you can use the following commands:
```bash
pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
pip install paddlespeech -i https://pypi.tuna.tsinghua.edu.cn/simple
```
You can also specify the version of paddlepaddle or install the develop version. 
```bash
# install 2.4.1 version. Note, 2.4.1 is just an example, please follow the minimum dependency of paddlepaddle for your selection
pip install paddlepaddle==2.4.1 -i https://mirror.baidu.com/pypi/simple
# install develop version
pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html
```
> If you encounter problem with downloading **nltk_data** while using paddlespeech, it maybe due to your poor network, we suggest you download the [nltk_data](https://paddlespeech.cdn.bcebos.com/Parakeet/tools/nltk_data.tar.gz) provided by us, and extract it to your `${HOME}`.

> If you fail to install paddlespeech-ctcdecoders, you only can not use deepspeech2 model inference. For other models, it doesn't matter.
 
## Medium: Get the Major Functions (Support Linux, mac and windows not support training)
If you want to get the major function of  `paddlespeech`, you need to do following steps:
### Git clone PaddleSpeech
You need to `git clone` this repository at first.
```bash
git clone https://github.com/PaddlePaddle/PaddleSpeech.git
cd PaddleSpeech
```

### Install Conda
Conda is a management system of the environment. You can go to [minicoda](https://docs.conda.io/en/latest/miniconda.html) to select a version (py>=3.7). For windows, you can follow the installing guide step by step and for linux and mac, you can use the following commands:
```bash
# download the miniconda
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -P tools/
# install the miniconda
bash tools/Miniconda3-latest-Linux-x86_64.sh -b
# conda init
$HOME/miniconda3/bin/conda init
# activate the conda
bash
```
Then you can create a conda virtual environment using the following command:
```bash
conda create -y -p tools/venv python=3.8
```
Activate the conda virtual environment:
```bash
conda activate tools/venv
```
Install  conda dependencies for `paddlespeech` :
```bash
conda install -y -c conda-forge sox libsndfile swig bzip2
```
### Install C++ Compilation Environment 
(If you already have C++ compilation environment, you can miss this step.)
Do not forget to install `gcc` and `gxx` on your system.
You can choose to use the scripts below to install them.

```bash
#  centos
sudo yum install gcc gcc-c++
```
```bash
# ubuntu
sudo apt install build-essential
```
```bash
# Others
conda install -y -c gcc_linux-64=8.4.0 gxx_linux-64=8.4.0
```
(Hip: Do not use the last script if you want to install by **Hard** way):
### Install PaddlePaddle
You can choose the `PaddlePaddle` version based on your system. For example, for CUDA 10.2, CuDNN7.6 install paddlepaddle-gpu 2.4.1:
```bash
# Note, 2.4.1 is just an example, please follow the minimum dependency of paddlepaddle for your selection
python3 -m pip install paddlepaddle-gpu==2.4.1 -i https://mirror.baidu.com/pypi/simple
```
You can also install the develop version of paddlepaddle. For example, for CUDA 10.2, CuDNN7.6 install paddlepaddle-gpu develop:
```bash
python3 -m pip install paddlepaddle-gpu==0.0.0.post102 -f https://www.paddlepaddle.org.cn/whl/linux/gpu/develop.html
```
### Install PaddleSpeech 
You can install  `paddlespeech`  by the following command，then you can use the `ready-made` examples in `paddlespeech` :
```bash
# Some users may fail to install `kaldiio` due to the default download source, you can install `pytest-runner` at first；
pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple
# Make sure you are in the root directory of PaddleSpeech
pip install . -i https://pypi.tuna.tsinghua.edu.cn/simple
```

## Hard: Get the Full Function (Support Ubuntu)
### Prerequisites
- Ubuntu >= 16.04.
- choice 1: working with `Ubuntu` Docker Container.
- choice 2: working on `Ubuntu` with `root` privilege. 

To avoid the trouble of environment setup, running in a Docker container is highly recommended. Otherwise, if you work on `Ubuntu` with `root` privilege, you can still complete the installation.

### Choice 1: Running in Docker Container (Recommend)
Docker is an open-source tool to build, ship, and run distributed applications in an isolated environment. If you  do not have a Docker environment, please refer to [Docker](https://www.docker.com/). If you will use GPU version, you also need to install [nvidia-docker](https://github.com/NVIDIA/nvidia-docker).

We provide docker images containing the latest PaddleSpeech code, and all environment and package dependencies are pre-installed. All you have to do is to **pull and run the docker image**. Then you can enjoy PaddleSpeech without any extra steps.

Get these images and guidance in [docker hub](https://hub.docker.com/repository/docker/paddlecloud/paddlespeech), including CPU, GPU, ROCm environment versions. 

If you have some customized requirements about automatic building docker images, you can get it in github repo [PaddlePaddle/PaddleCloud](https://github.com/PaddlePaddle/PaddleCloud/tree/main/tekton).

### Choice 2: Running in Ubuntu with Root Privilege
- Install `build-essential` by apt
```bash
sudo apt install build-essential
```
- Clone this repository
```bash
git clone https://github.com/PaddlePaddle/PaddleSpeech.git
# Enter the PaddleSpeech dir
cd PaddleSpeech
```
### Install the Conda
```bash
# download the miniconda
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -P tools/
# install the miniconda
bash tools/Miniconda3-latest-Linux-x86_64.sh -b
# conda init
$HOME/miniconda3/bin/conda init
# use the "bash" command to make the conda environment works
bash
# create a conda virtual environment
conda create -y -p tools/venv python=3.8
# Activate the conda virtual environment:
conda activate tools/venv
# Install the conda packages
conda install -y -c conda-forge sox libsndfile swig bzip2 libflac bc
```
### Install PaddlePaddle
Make sure you have GPU and the paddlepaddle version is right. For example, for CUDA 10.2, CuDNN7.6 install paddle 2.4.1:
```bash
# Note, 2.4 is just an example, please follow the minimum dependency of paddlepaddle for your selection
python3 -m pip install paddlepaddle-gpu==2.4.1 -i https://mirror.baidu.com/pypi/simple
```
You can also install the develop version of paddlepaddle. For example, for CUDA 10.2, CuDNN7.6 install paddlepaddle-gpu develop:
```bash
python3 -m pip install paddlepaddle-gpu==0.0.0.post102 -f https://www.paddlepaddle.org.cn/whl/linux/gpu/develop.html
```
### Install PaddleSpeech in Developing Mode
Some users may fail to install `kaldiio` due to the default download source, you can install `pytest-runner` at first:
```bash
pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple
```
Then install PaddleSpeech:
```bash
pip install -e .[develop] -i https://pypi.tuna.tsinghua.edu.cn/simple
```
### Install the Kaldi (Optional)
```bash
pushd tools
bash extras/install_openblas.sh
bash extras/install_kaldi.sh
popd
```


================================================
FILE: docs/source/install_cn.md
================================================
(简体中文|[English](./install.md))
# 安装方法
`PaddleSpeech` 有三种安装方法。根据安装的难易程度，这三种方法可以分为 **简单**, **中等** 和 **困难**.
| 方式 | 功能                                                         | 支持系统            |
| :--- | :----------------------------------------------------------- | :------------------ |
| 简单 | (1) 使用 PaddleSpeech 的命令行功能. <br> (2) 在 Aistudio上体验 PaddleSpeech. | Linux, Mac(不支持M1芯片)，Windows (安装详情查看[#1195](https://github.com/PaddlePaddle/PaddleSpeech/discussions/1195)) |
| 中等 | 支持 PaddleSpeech 主要功能，比如使用已有 examples 中的模型和使用 PaddleSpeech 来训练自己的模型. | Linux, Mac(不支持M1芯片，不支持训练), Windows(不支持训练)               |
| 困难 | 支持 PaddleSpeech 的各项功能，包含结合 kaldi 使用 join ctc decoder 方式解码 ([asr2](../../examples/librispeech/asr2 ))，训练语言模型,使用强制对齐等。并且你更能成为一名开发者！ | Ubuntu              |
## 先决条件
- Python >= 3.7
- 最新版本的 PaddlePaddle (请看 [安装向导](https://www.paddlepaddle.org.cn/documentation/docs/en/beginners_guide/index_en.html))
- C++ 编译环境
- 提示: 对于 Linux 和 Mac，请不要使用 `sh` 代替安装文档中的 `bash`
- 提示: 我们建议在安装 `paddlepaddle` 的时候使用百度源 https://mirror.baidu.com/pypi/simple ，而在安装 `paddlespeech` 的时候使用清华源 https://pypi.tuna.tsinghua.edu.cn/simple 。

## 简单： 获取基本功能(支持 Linux，Mac 和 Windows)
- 如果你是一个刚刚接触 `PaddleSpeech` 的新人并且想要很方便地体验一下该项目。我们建议你体验一下 [AI Studio](https://aistudio.baidu.com/aistudio/index)。我们在 AI Studio上面建立了一个让你一步一步运行体验来使用 `PaddleSpeech` 的[教程](https://aistudio.baidu.com/aistudio/education/group/info/25130)。
- 如果你想使用 `PaddleSpeech` 的命令行功能，你需要跟随下面的步骤来安装 `PaddleSpeech`。如果你想了解更多关于使用 `PaddleSpeech` 命令行功能的信息，你可以参考 [cli](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/paddlespeech/cli)。
### 安装 Conda
Conda是一个包管理的环境。你可以前往 [miniconda](https://docs.conda.io/en/latest/miniconda.html) 去下载并安装 conda（请下载 py>=3.7 的版本）。
然后你需要安装 `paddlespeech` 的 conda 依赖:
```bash
conda install -y -c conda-forge sox libsndfile bzip2
```
### 安装 C++ 编译环境
(如果你系统上已经安装了 C++ 编译环境，请忽略这一步。)
#### Windows
对于 Windows 系统，需要安装 `Visual Studio` 来完成 C++ 编译环境的安装。

https://visualstudio.microsoft.com/visual-cpp-build-tools/

你可以前往讨论区[#1195](https://github.com/PaddlePaddle/PaddleSpeech/discussions/1195)获取更多帮助。

#### Mac
```bash
brew install gcc
```
#### Linux
```bash
#  centos
sudo yum install gcc gcc-c++
```
```bash
# ubuntu
sudo apt install build-essential
```
```bash
# Others
conda install -y -c gcc_linux-64=8.4.0 gxx_linux-64=8.4.0
```
### 安装 PaddleSpeech
部分用户系统由于默认源的问题，安装中会出现kaldiio安转出错的问题，建议首先安装pytest-runner:
```bash
pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple 
```
然后你可以使用如下命令：
```bash
pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
pip install paddlespeech -i https://pypi.tuna.tsinghua.edu.cn/simple
```
你也可以安装指定版本的paddlepaddle，或者安装 develop 版本。
```bash
# 安装2.3.1版本. 注意：2.3.1只是一个示例，请按照对paddlepaddle的最小依赖进行选择。
pip install paddlepaddle==2.3.1 -i https://mirror.baidu.com/pypi/simple
# 安装 develop 版本
pip install paddlepaddle==0.0.0 -f https://www.paddlepaddle.org.cn/whl/linux/cpu-mkl/develop.html
```
> 如果您在使用 paddlespeech 的过程中遇到关于下载 **nltk_data** 的问题，可能是您的网络不佳，我们建议您下载我们提供的 [nltk_data](https://paddlespeech.cdn.bcebos.com/Parakeet/tools/nltk_data.tar.gz) 并解压缩到您的 `${HOME}` 目录下。

> 如果出现 paddlespeech-ctcdecoders 无法安装的问题，无须担心，这个只影响 deepspeech2 模型的推理，不影响其他模型的使用。

## 中等： 获取主要功能（支持 Linux， Mac 和 Windows 不支持训练）
如果你想要使用 `paddlespeech` 的主要功能。你需要完成以下几个步骤
### Git clone PaddleSpeech
你需要先 git clone 本仓库
```bash
git clone https://github.com/PaddlePaddle/PaddleSpeech.git
cd PaddleSpeech
```
### 安装 Conda
Conda 是一个包管理的环境。你可以前往 [minicoda](https://docs.conda.io/en/latest/miniconda.html) 去下载并安装 conda（请下载 py>=3.7 的版本）。windows 系统可以使用 conda 的向导安装，linux 和 mac 可以使用以下的命令：
```bash
# 下载 miniconda
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -P tools/
# 安装 miniconda
bash tools/Miniconda3-latest-Linux-x86_64.sh -b
# conda 初始化
$HOME/miniconda3/bin/conda init
# 激活 conda
bash
```
然后你可以创建一个 conda 的虚拟环境：
```bash
conda create -y -p tools/venv python=3.8
```
激活 conda 虚拟环境：
```bash
conda activate tools/venv
```
安装 `paddlespeech` 的 conda 依赖：
```bash
conda install -y -c conda-forge sox libsndfile swig bzip2
```
### 安装 C++ 编译环境
(如果你系统上已经安装了 C++ 编译环境，请忽略这一步。)
你可以使用如下的步骤来安装 C++ 的编译环境 `gcc`  和 `gxx`：
```bash
#  centos
sudo yum install gcc gcc-c++
```
```bash
# ubuntu
sudo apt install build-essential
```
```bash
# Others
conda install -y -c gcc_linux-64=8.4.0 gxx_linux-64=8.4.0
```
（提示： 如果你想使用**困难**方式完成安装，请不要使用最后一条命令）
### 安装 PaddlePaddle
你可以根据系统配置选择 PaddlePaddle 版本，例如系统使用 CUDA 10.2， CuDNN7.6，你可以安装 paddlepaddle-gpu 2.4.1：
```bash
# 注意：2.4.1 只是一个示例，请按照对paddlepaddle的最小依赖进行选择。
python3 -m pip install paddlepaddle-gpu==2.4.1 -i https://mirror.baidu.com/pypi/simple
```
你也可以安装 develop 版本的PaddlePaddle. 例如系统使用 CUDA 10.2， CuDNN7.6 ，你可以安装 paddlepaddle-gpu develop:
```bash
python3 -m pip install paddlepaddle-gpu==0.0.0.post102 -f https://www.paddlepaddle.org.cn/whl/linux/gpu/develop.html
```
### 安装 PaddleSpeech
最后安装 `paddlespeech`，这样你就可以使用 `paddlespeech` 中已有的 examples：
```bash
# 部分用户系统由于默认源的问题，安装中会出现 kaldiio 安转出错的问题，建议首先安装pytest-runner:
pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple 
# 请确保目前处于PaddleSpeech项目的根目录
pip install . -i https://pypi.tuna.tsinghua.edu.cn/simple
```
## 困难： 获取所有功能（支持 Ubuntu）
### 先决条件
- Ubuntu >= 16.04
- 选择 1： 使用`Ubuntu` docker。
- 选择 2： 使用`Ubuntu` ，并且拥有 root 权限。

为了避免各种环境配置问题，我们非常推荐你使用 docker 容器。如果你不想使用 docker，但是可以使用拥有 root 权限的 Ubuntu 系统，你也可以完成**困难**方式的安装。
### 选择1： 使用 Docker 容器（推荐）
Docker 是一种开源工具，用于在和系统本身环境相隔离的环境中构建、发布和运行各类应用程序。如果您没有 Docker 运行环境，请参考 [Docker 官网](https://www.docker.com/)进行安装，如果您准备使用 GPU 版本镜像，还需要提前安装好 [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) 。 

我们提供了包含最新 PaddleSpeech 代码的 docker 镜像，并预先安装好了所有的环境和库依赖，您只需要**拉取并运行 docker 镜像**，无需其他任何额外操作，即可开始享用 PaddleSpeech 的所有功能。

在 [Docker Hub](https://hub.docker.com/repository/docker/paddlecloud/paddlespeech) 中获取这些镜像及相应的使用指南，包括 CPU、GPU、ROCm 版本。

如果您对自动化制作 docker 镜像感兴趣，或有自定义需求，请访问 [PaddlePaddle/PaddleCloud](https://github.com/PaddlePaddle/PaddleCloud/tree/main/tekton) 做进一步了解。
完成这些以后，你就可以在 docker 容器中执行训练、推理和超参 fine-tune。
### 选择2： 使用有 root 权限的 Ubuntu
- 使用apt安装 `build-essential`
```bash
sudo apt install build-essential
```
- 克隆 `PaddleSpeech` 仓库
```bash
git clone https://github.com/PaddlePaddle/PaddleSpeech.git
# 进入PaddleSpeech目录
cd PaddleSpeech
```
### 安装 Conda
```bash
# 下载 miniconda
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -P tools/
# 安装 miniconda
bash tools/Miniconda3-latest-Linux-x86_64.sh -b
# conda 初始化
$HOME/miniconda3/bin/conda init
# 激活 conda
bash
# 创建 Conda 虚拟环境
conda create -y -p tools/venv python=3.8
# 激活 Conda 虚拟环境:
conda activate tools/venv
# 安装 Conda 包
conda install -y -c conda-forge sox libsndfile swig bzip2 libflac bc
```
### 安装 PaddlePaddle
请确认你系统是否有 GPU，并且使用了正确版本的 paddlepaddle。例如系统使用 CUDA 10.2, CuDNN7.6 ，你可以安装 paddlepaddle-gpu 2.4.1：
```bash
# 注意：2.4.1 只是一个示例，请按照对paddlepaddle的最小依赖进行选择。
python3 -m pip install paddlepaddle-gpu==2.4.1 -i https://mirror.baidu.com/pypi/simple
```
你也可以安装 develop 版本的PaddlePaddle. 例如系统使用 CUDA 10.2， CuDNN7.6 ，你可以安装 paddlepaddle-gpu develop:
```bash
python3 -m pip install paddlepaddle-gpu==0.0.0.post102 -f https://www.paddlepaddle.org.cn/whl/linux/gpu/develop.html
```
### 用开发者模式安装 PaddleSpeech
部分用户系统由于默认源的问题，安装中会出现 kaldiio 安转出错的问题，建议首先安装 pytest-runner:
```bash
pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple 
```
然后安装 PaddleSpeech：
```bash
pip install -e .[develop] -i https://pypi.tuna.tsinghua.edu.cn/simple
```
### 安装 Kaldi（可选）
```bash
pushd tools
bash extras/install_openblas.sh
bash extras/install_kaldi.sh
popd
```


================================================
FILE: docs/source/introduction.md
================================================
# PaddleSpeech

## What is PaddleSpeech?
PaddleSpeech is an open-source toolkit on the PaddlePaddle platform for two critical tasks in Speech -  Speech-to-Text (Automatic Speech Recognition, ASR) and Text-to-Speech Synthesis (TTS), with modules involving state-of-art and influential models.

## What can PaddleSpeech do?

### Speech-to-Text
PaddleSpeech ASR mainly consists of components below:
- Implementation of models and commonly used neural network layers.
- Dataset abstraction and common data preprocessing pipelines.
- Ready-to-run experiments.

PaddleSpeech ASR provides you with a complete ASR pipeline, including:
- Data Preparation
    - Build vocabulary
    - Compute Cepstral mean and variance normalization (CMVN)
    - Featrue extraction
        - linear
        - fbank (also support kaldi feature)
        - mfcc
- Acoustic Models
    - Deepspeech2 (Streaming and Non-Streaming)
    - Transformer (Streaming and Non-Streaming)
    - Conformer (Streaming and Non-Streaming)
- Decoder
    - ctc greedy search (used in DeepSpeech2, Transformer and Conformer)
    - ctc beam search (used in DeepSpeech2, Transformer and Conformer)
    - attention decoding (used in Transformer and Conformer)
    - attention rescoring (used in Transformer and Conformer)

Speech-to-Text helps you train the ASR model very simply.

### Text-to-Speech
TTS mainly consists of components below:
- Implementation of models and commonly used neural network layers.
- Dataset abstraction and common data preprocessing pipelines.
- Ready-to-run experiments.

PaddleSpeech TTS provides you with a complete TTS pipeline, including:
- Text FrontEnd
    - Rule based Chinese frontend.
- Acoustic Models
    - FastSpeech2
    - SpeedySpeech
    - TransformerTTS
    - Tacotron2
- Vocoders
    - Multi Band MelGAN
    - Parallel WaveGAN
    - WaveFlow
- Voice Cloning
    - Transfer Learning from Speaker Verification to Multispeaker Text-to-Speech Synthesis
    - GE2E

Text-to-Speech helps you to train TTS models with simple commands.


================================================
FILE: docs/source/reference.md
================================================
# Reference

We borrowed a lot of code from these repos to build `model` and `engine`, thanks for these great works and the open-source community!

* [espnet](https://github.com/espnet/espnet/blob/master/LICENSE)
- Apache-2.0 License
- python/shell `utils`
- kaldi feat preprocessing
- data pipe line and `transformer`
- some tts models, like `fastspeech2` and GAN-based `vocoder`

* [wenet](https://github.com/wenet-e2e/wenet/blob/main/LICENSE)
- Apache-2.0 License
- U2 model
- Building TLG based Graph
- websocket server & client

* [kaldi](https://github.com/kaldi-asr/kaldi/blob/master/COPYING)
- Apache-2.0 License
- shell/perl/python utils.
- feature bins.
- WFST based decoding for LM integration.

* [delta](https://github.com/Delta-ML/delta/blob/master/LICENSE)
- Apache-2.0 License
- `engine` arch

* [speechbrain](https://github.com/speechbrain/speechbrain/blob/develop/LICENSE)
- Apache-2.0 License
- ECAPA-TDNN SV model
- ASR with CTC and pre-trained wav2vec2 models.


* [chainer](https://github.com/chainer/chainer/blob/master/LICENSE)
- MIT License
- Updater, Trainer, and some utils.

* [librosa](https://github.com/librosa/librosa/blob/main/LICENSE.md)
- ISC License
- Audio feature

* [ThreadPool](https://github.com/progschj/ThreadPool/blob/master/COPYING)
- zlib License
- ThreadPool

* [g2pW](https://github.com/GitYCC/g2pW/blob/master/LICENCE)
- Apache-2.0 license

*[transformers](https://github.com/huggingface/transformers)
- Apache-2.0 License
- Wav2vec2.0


================================================
FILE: docs/source/released_model.md
================================================
# Released Models

> !!! Since PaddlePaddle support 0-D tensor from 2.5.0, PaddleSpeech Static model will not work for it, please re-export static model.

## Speech-to-Text Models

### Speech Recognition Model
Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER | Hours of speech | Example Link | Inference Type | static_model | 
:-------------:| :------------:| :-----: | -----: | :-----: |:-----:| :-----:  | :-----:  | :-----: | :-----: | :-----: |
[Ds2 Online Wenetspeech ASR0 Model](https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz) | Wenetspeech Dataset | Char-based | 1.2 GB  | 2 Conv + 5 LSTM layers | 0.152 (test\_net, w/o LM) <br> 0.2417 (test\_meeting, w/o LM) <br> 0.053 (aishell, w/ LM) |-| 10000 h | - | onnx/inference/python |-|
[Ds2 Online Aishell ASR0 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB  | 2 Conv + 5 LSTM layers | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) | onnx/inference/python |-|
[Ds2 Offline Aishell ASR0 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz)| Aishell Dataset | Char-based | 1.4 GB | 2 Conv + 5 bidirectional LSTM layers| 0.0554 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) | inference/python |-|
[Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) 0.1879 (test\_meeting) |-| 10000 h |- | python |-|
[Conformer U2PP Online Wenetspeech ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz) | WenetSpeech Dataset | Char-based | 540 MB  | Encoder:Conformer, Decoder:BiTransformer, Decoding method: Attention rescoring| 0.047198 (aishell test\_-1) 0.059212 (aishell test\_16) |-| 10000 h |- | python |[FP32](https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz) </br>[INT8](https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_quant_1.3.0.model.tar.gz) |
[Conformer Online Aishell ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.5.0.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.051968 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) | python |-|
[Conformer Offline Aishell ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.0.1.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0460 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) | python |-|
[Transformer Aishell ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer  Aishell ASR1](../../examples/aishell/asr1) | python |-|
[Ds2 Offline Librispeech ASR0 Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr0/asr0_deepspeech2_offline_librispeech_ckpt_1.0.1.model.tar.gz)| Librispeech Dataset | Char-based | 1.3 GB | 2 Conv + 5 bidirectional LSTM layers| - |0.0467| 960 h | [Ds2 Offline Librispeech ASR0](../../examples/librispeech/asr0) | inference/python |-|
[Conformer Librispeech ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 191 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0338 | 960 h | [Conformer Librispeech ASR1](../../examples/librispeech/asr1) | python |-|
[Transformer Librispeech ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr1/asr1_transformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0381 | 960 h | [Transformer Librispeech ASR1](../../examples/librispeech/asr1) | python |-|
[Transformer Librispeech ASR2 Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr2/asr2_transformer_librispeech_ckpt_0.1.1.model.tar.gz) | Librispeech Dataset | subword-based | 131 MB  | Encoder:Transformer, Decoder:Transformer, Decoding method: JoinCTC w/ LM |-| 0.0240 | 960 h | [Transformer Librispeech ASR2](../../examples/librispeech/asr2) | python |-|
[Conformer TALCS ASR1 Model](https://paddlespeech.cdn.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz) | TALCS Dataset | subword-based | 470 MB | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring |-| 0.0844 | 587 h | [Conformer TALCS ASR1](../../examples/tal_cs/asr1) | python |-|

### Self-Supervised Pre-trained Model
Model | Pre-Train Method | Pre-Train Data | Finetune Data | Size | Descriptions | CER | WER |  Example Link |
:-------------:| :------------:| :-----: | -----: | :-----: |:-----:| :-----:  | :-----:  | :-----: | 
[Wav2vec2-large-960h-lv60-self Model](https://paddlespeech.cdn.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | - | 1.18 GB |Pre-trained Wav2vec2.0 Model | - | - | - | 
[Wav2vec2ASR-large-960h-librispeech Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.1.model.tar.gz) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | Librispeech (960 h) | 718 MB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | - | 0.0189 | [Wav2vecASR Librispeech ASR3](../../examples/librispeech/asr3) |
[Wav2vec2-large-wenetspeech-self Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2-large-wenetspeech-self_ckpt_1.3.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | - | 714 MB |Pre-trained Wav2vec2.0 Model | - | - | - | 
[Wav2vec2ASR-large-aishell1 Model](https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | aishell1 (train set) | 1.18 GB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | 0.0510 | - | - |
[Hubert-large-lv60 Model](https://paddlespeech.cdn.bcebos.com/hubert/hubert-large-lv60.pdparams) | hubert | LV-60k Dataset | - | 1.18 GB |Pre-trained hubert Model | - | - | - | 
[Hubert-large-100h-librispeech Model](https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr4/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz) | hubert | LV-60k Dataset | librispeech train-clean-100 | 1.27 GB |Encoder: Hubert, Decoder: Linear + CTC, Decoding method: Greedy search | - | 0.0587 | [HubertASR Librispeech ASR4](../../examples/librispeech/asr4) |

### Whisper Model
Demo Link | Training Data | Size | Descriptions | CER | Model 
:-----------: | :-----:| :-------: | :-----: | :-----: |:---------:|
[Whisper](../../demos/whisper) | 680kh from internet | large: 5.8G,</br>medium: 2.9G,</br>small: 923M,</br>base: 277M,</br>tiny: 145M | Encoder:Transformer,</br> Decoder:Transformer, </br>Decoding method: </br>Greedy search | 0.027 </br>(large, Librispeech) | [whisper-large](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-large-model.tar.gz) </br>[whisper-medium](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-medium-model.tar.gz) </br>[whisper-medium-English-only](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-medium-en-model.tar.gz) </br>[whisper-small](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-small-model.tar.gz) </br>[whisper-small-English-only](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-small-en-model.tar.gz) </br>[whisper-base](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-base-model.tar.gz) </br>[whisper-base-English-only](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-base-en-model.tar.gz) </br>[whisper-tiny](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-model.tar.gz) </br>[whisper-tiny-English-only](https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-en-model.tar.gz)

### Language Model based on NGram
|Language Model | Training Data | Token-based | Size | Descriptions|
| :------------: | :------------: | :------------: | :------------: | :------------: |
[English LM](https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm) |  [CommonCrawl(en.00)](http://web-language-models.s3-website-us-east-1.amazonaws.com/ngrams/en/deduped/en.00.deduped.xz) | Word-based | 8.3 GB | Pruned with 0 1 1 1 1; <br/> About 1.85 billion n-grams; <br/> 'trie'  binary with '-a 22 -q 8 -b 8'
[Mandarin LM Small](https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm) | Baidu Internal Corpus | Char-based | 2.8 GB | Pruned with 0 1 2 4 4; <br/> About 0.13 billion n-grams; <br/> 'probing' binary with default settings
[Mandarin LM Large](https://deepspeech.bj.bcebos.com/zh_lm/zhidao_giga.klm) | Baidu Internal Corpus | Char-based | 70.4 GB | No Pruning; <br/> About 3.7 billion n-grams; <br/> 'probing' binary with default settings

### Speech Translation Models

| Model | Training Data | Token-based | Size | Descriptions | BLEU | Example Link |
| :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: |
| (only for CLI)[Transformer FAT-ST MTL En-Zh](https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/st1_transformer_mtl_noam_ted-en-zh_ckpt_0.1.1.model.tar.gz) | Ted-En-Zh| Spm| | Encoder:Transformer, Decoder:Transformer, <br />Decoding method: Attention | 20.80 | [Transformer Ted-En-Zh ST1](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/ted_en_zh/st1) |

## Text-to-Speech Models

### Acoustic Models
Model Type | Dataset| Example Link | Pretrained Models|Static / ONNX / Paddle-Lite Models|Size (static)
:-------------:| :------------:| :-----: | :-----:| :-----:| :-----:
Tacotron2|LJSpeech|[tacotron2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts0)|[tacotron2_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip)|||
Tacotron2|CSMSC|[tacotron2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts0)|[tacotron2_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip)|[tacotron2_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_static_0.2.0.zip)|103MB|
TransformerTTS| LJSpeech| [transformer-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts1)|[transformer_tts_ljspeech_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/transformer_tts/transformer_tts_ljspeech_ckpt_0.4.zip)|||
SpeedySpeech| CSMSC | [speedyspeech-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts2)|[speedyspeech_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_ckpt_0.2.0.zip)|[speedyspeech_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_static_0.2.0.zip) </br> [speedyspeech_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip) </br> [speedyspeech_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_pdlite_1.3.0.zip)|13MB|
FastSpeech2| CSMSC |[fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)|[fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)|[fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip) </br> [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip) </br> [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)|157MB|
FastSpeech2-Conformer| CSMSC |[fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)|[fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)|||
FastSpeech2-CNNDecoder| CSMSC| [fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)| [fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip) |  [fastspeech2_cnndecoder_csmsc_static_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_static_1.0.0.zip) </br>[fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip)  </br>[fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip)  </br>[fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip)  </br> [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip) </br> [fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip)| 84MB|
FastSpeech2| AISHELL-3 |[fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3)|[fastspeech2_aishell3_ckpt_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip)|[fastspeech2_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip) </br> [fastspeech2_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip) </br> [fastspeech2_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_pdlite_1.3.0.zip) |147MB|
FastSpeech2| LJSpeech |[fastspeech2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts3)|[fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)|[fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip) </br> [fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip) </br> [fastspeech2_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_pdlite_1.3.0.zip)|145MB|
FastSpeech2| VCTK |[fastspeech2-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/tts3)|[fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)|[fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip) </br> [fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip) </br> [fastspeech2_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_pdlite_1.3.0.zip)| 145MB|
FastSpeech2| ZH_EN |[fastspeech2-zh_en](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/zh_en_tts/tts3)|[fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)|[fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip) </br> [fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip) | 145MB|
FastSpeech2| male-zh ||[fastspeech2_male_zh_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_ckpt_1.4.0.zip)|[fastspeech2_male_zh_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_static_1.4.0.zip) </br> [fastspeech2_male_zh_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_onnx_1.4.0.zip) |146MB|
FastSpeech2| male-en ||[fastspeech2_male_en_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_ckpt_1.4.0.zip)|[fastspeech2_male_en_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_static_1.4.0.zip) </br> [fastspeech2_male_en_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_onnx_1.4.0.zip) |145MB|
FastSpeech2| male-mix ||[fastspeech2_male_mix_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_ckpt_1.4.0.zip)|[fastspeech2_male_mix_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_static_1.4.0.zip) </br> [fastspeech2_male_mix_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_onnx_1.4.0.zip) |146MB|
FastSpeech2| Cantonese |[fastspeech2-canton](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/canton/tts3)|[fastspeech2_canton_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_ckpt_1.4.0.zip)|[fastspeech2_canton_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_static_1.4.0.zip)</br>[fastspeech2_canton_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_onnx_1.4.0.zip)|146MB|

### Vocoders
Model Type | Dataset| Example Link | Pretrained Models| Static / ONNX / Paddle-Lite Models|Size (static)
:-----:| :-----:| :-----: | :-----:| :-----:| :-----:
WaveFlow| LJSpeech |[waveflow-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc0)|[waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/waveflow/waveflow_ljspeech_ckpt_0.3.zip)|||
Parallel WaveGAN| CSMSC |[PWGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1)|[pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip)|[pwg_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip) </br> [pwgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_onnx_0.2.0.zip) </br> [pwgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_pdlite_1.3.0.zip)|4.8MB|
Parallel WaveGAN| LJSpeech |[PWGAN-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1)|[pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip)|[pwgan_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_static_1.1.0.zip) </br> [pwgan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_onnx_1.1.0.zip) </br> [pwgan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_pdlite_1.3.0.zip)|4.8MB|
Parallel WaveGAN| AISHELL-3 |[PWGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1)|[pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip)| [pwgan_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_1.1.0.zip) </br> [pwgan_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_onnx_1.1.0.zip) </br> [pwgan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_pdlite_1.3.0.zip)|4.8MB|
Parallel WaveGAN| VCTK |[PWGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc1)|[pwg_vctk_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.5.zip)|[pwgan_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_static_1.1.0.zip) </br> [pwgan_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_onnx_1.1.0.zip) </br> [pwgan_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_pdlite_1.3.0.zip)|4.8MB|
|Multi Band MelGAN | CSMSC |[MB MelGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc3) | [mb_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip) <br>[mb_melgan_baker_finetune_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_baker_finetune_ckpt_0.5.zip)|[mb_melgan_csmsc_static_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_0.1.1.zip) </br> [mb_melgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip) </br> [mb_melgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_pdlite_1.3.0.zip)|7.6MB|
Style MelGAN | CSMSC |[Style MelGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc4)|[style_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip)| | |
HiFiGAN | CSMSC |[HiFiGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc5)|[hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip)|[hifigan_csmsc_static_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip) </br> [hifigan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_onnx_0.2.0.zip) </br> [hifigan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_pdlite_1.3.0.zip)|46MB|
HiFiGAN | LJSpeech |[HiFiGAN-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc5)|[hifigan_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip)|[hifigan_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_static_1.1.0.zip) </br> [hifigan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_onnx_1.1.0.zip) </br> [hifigan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_pdlite_1.3.0.zip) |49MB|
HiFiGAN | AISHELL-3 |[HiFiGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)|[hifigan_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip) </br> [hifigan_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip) </br> [hifigan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_pdlite_1.3.0.zip)|46MB|
HiFiGAN | VCTK |[HiFiGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5)|[hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip)|[hifigan_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip) </br> [hifigan_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip) </br> [hifigan_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_pdlite_1.3.0.zip)|46MB|
WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc6)|[wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)|[wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)|18MB|
Parallel WaveGAN| Male ||[pwg_male_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.4.0.zip)|[pwgan_male_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_static_1.4.0.zip) </br> [pwgan_male_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_onnx_1.4.0.zip)|4.8M|
HiFiGAN| Male ||[hifigan_male_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_ckpt_1.4.0.zip)|[hifigan_male_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_static_1.4.0.zip) </br> [hifigan_male_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_onnx_1.4.0.zip)|46M|


### Voice Cloning
Model Type | Dataset| Example Link | Pretrained Models
:-------------:| :------------:| :-----: | :-----: | 
GE2E| AISHELL-3, etc. |[ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e)|[ge2e_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip)
GE2E + Tacotron2| AISHELL-3 |[ge2e-Tacotron2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc0)|[tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip)
GE2E + FastSpeech2 | AISHELL-3  |[ge2e-fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc1)|[fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip)


## Audio Classification Models

Model Type | Dataset| Example Link | Pretrained Models | Static Models 
:-------------:| :------------:| :-----: | :-----: | :-----:
PANN | Audioset| [audioset_tagging_cnn](https://github.com/qiuqiangkong/audioset_tagging_cnn) | [panns_cnn6.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams), [panns_cnn10.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams), [panns_cnn14.pdparams](https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams) | [panns_cnn6_static.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz)(18M), [panns_cnn10_static.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz)(19M), [panns_cnn14_static.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz)(289M) 
PANN | ESC-50 |[pann-esc50](../../examples/esc50/cls0)|[esc50_cnn6.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/esc50/esc50_cnn6.tar.gz), [esc50_cnn10.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/esc50/esc50_cnn10.tar.gz), [esc50_cnn14.tar.gz](https://paddlespeech.cdn.bcebos.com/cls/esc50/esc50_cnn14.tar.gz)

## Speaker Verification Models

Model Type | Dataset| Example Link | Pretrained Models | Static Models 
:-------------:| :------------:| :-----: | :-----: | :-----:
ECAPA-TDNN | VoxCeleb| [voxceleb_ecapatdnn](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0) | [ecapatdnn.tar.gz](https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz) | -

## Punctuation Restoration Models
Model Type | Dataset| Example Link | Pretrained Models
:-------------:| :------------:| :-----: | :-----:
Ernie Linear | IWLST2012_zh |[iwslt2012_punc0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/iwslt2012/punc0)|[ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/text/ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip)


================================================
FILE: docs/source/streaming_asr_demo_video.rst
================================================
Streaming ASR Demo Video
==================

.. raw:: html
     
    <video controls width="1024">

    <source src="https://paddlespeech.cdn.bcebos.com/demos/asr_demos/streaming_ASR_slice.mp4" type="video/mp4">
    Sorry, your browser doesn't support embedded videos.
    </video>


================================================
FILE: docs/source/streaming_tts_demo_video.rst
================================================
Streaming TTS Demo Video
==================

.. raw:: html
     
    <video controls width="1024">

    <source src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/streaming_tts_demo.mp4"
            type="video/mp4">
    Sorry, your browser doesn't support embedded videos.
    </video>


================================================
FILE: docs/source/tts/PPTTS.md
================================================
([简体中文](./PPTTS_cn.md)|English)

# PPTTS

- [1. Introduction](#1)
- [2. Characteristic](#2)
- [3. Benchmark](#3)
- [4. Demo](#4)
- [5. Tutorials](#5)
    - [5.1 Training and Inference Optimization](#51)
    - [5.2 Characteristic APPs of TTS](#52)
    - [5.3 TTS Server](#53)

<a name="1"></a>
## 1. Introduction

PP-TTS is a streaming speech synthesis system developed by PaddleSpeech. Based on the implementation of [SOTA Algorithms](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/released_model.md#text-to-speech-models), a faster inference engine is used to realize streaming speech synthesis technology to meet the needs of commercial speech interaction scenarios.

#### PP-TTS
Pipline of TTS：
<center><img src=https://ai-studio-static-online.cdn.bcebos.com/ea69ae1faff84940a59c7079d16b3a8db2741d2c423846f68822f4a7f28726e9 width="600" ></center>

PP-TTS provides a Chinese streaming speech synthesis system based on FastSpeech2 and HiFiGAN by default:

- Text Frontend： The rule-based Chinese text frontend system is adopted to optimize Chinese text such as text normalization, polyphony, and tone sandhi.
- Acoustic Model: The decoder of FastSpeech2 is improved so that it can be stream synthesized
- Vocoder: Streaming synthesis of GAN vocoder is supported
- Inference Engine： Using ONNXRuntime to optimize the inference of TTS models, so that the TTS system can also achieve RTF < 1 on low-voltage, meeting the requirements of streaming synthesis

<a name="2"></a>
## 2. Characteristic
- Open source leading Chinese TTS system
- Using ONNXRuntime to optimize the inference of TTS models
- The only open-source streaming TTS system
- Easy disassembly: Developers can easily replace different acoustic models and vocoders in different languages, use different inference engines (Paddle dynamic graph, PaddleInference, ONNXRuntime, etc.), and use different network services (HTTP, WebSocket)

<a name="3"></a>
## 3. Benchmark
PaddleSpeech TTS models' benchmark: [TTS-Benchmark](https://github.com/PaddlePaddle/PaddleSpeech/wiki/TTS-Benchmark)。

<a name="4"></a>
## 4. Demo 
See: [Streaming TTS Demo Video](https://paddlespeech.readthedocs.io/en/latest/streaming_tts_demo_video.html)

<a name="5"></a>
## 5. Tutorials

<a name="51"></a>
### 5.1 Training and Inference Optimization

Default FastSpeech2: [tts3/run.sh](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/tts3/run.sh)

Streaming FastSpeech2: [tts3/run_cnndecoder.sh](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/tts3/run_cnndecoder.sh)

HiFiGAN：[voc5/run.sh](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/voc5/run.sh)

<a name="52"></a>
### 5.2 Characteristic APPs of TTS
text_to_speech - convert text into speech: [text_to_speech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/text_to_speech)

style_fs2 - multi style control for FastSpeech2 model: [style_fs2](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/style_fs2)

story talker - book reader based on OCR and TTS: [story_talker](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/story_talker)

metaverse - 2D AR with TTS: [metaverse](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/metaverse)

<a name="53"></a>
### 5.3 TTS Server

Non-streaming TTS Server: [speech_server](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/speech_server)

Streaming TTS Server: [streaming_tts_server](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/streaming_tts_server)


For more tutorials please see: [PP-TTS：流式语音合成原理及服务部署
](https://aistudio.baidu.com/aistudio/projectdetail/3885352)


================================================
FILE: docs/source/tts/PPTTS_cn.md
================================================
(简体中文|[English](./PPTTS.md))

# PP-TTS

- [1. 简介](#1)
- [2. 特性](#2)
- [3. Benchmark](#3)
- [4. 效果展示](#4)
- [5. 使用教程](#5)
    - [5.1 模型训练与推理优化](#51)
    - [5.2 语音合成特色应用](#52)
    - [5.3 语音合成服务搭建](#53)

<a name="1"></a>
## 1. 简介

PP-TTS 是 PaddleSpeech 自研的流式语音合成系统。在实现[前沿算法](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/released_model.md#text-to-speech-models)的基础上，使用了更快的推理引擎，实现了流式语音合成技术，使其满足商业语音交互场景的需求。

#### PP-TTS
语音合成基本流程如下图所示：
<center><img src=https://ai-studio-static-online.cdn.bcebos.com/ea69ae1faff84940a59c7079d16b3a8db2741d2c423846f68822f4a7f28726e9 width="600" ></center>

PP-TTS 默认提供基于 FastSpeech2 声学模型和 HiFiGAN 声码器的中文流式语音合成系统：

- 文本前端：采用基于规则的中文文本前端系统，对文本正则、多音字、变调等中文文本场景进行了优化。
- 声学模型：对 FastSpeech2 模型的 Decoder 进行改进，使其可以流式合成
- 声码器：支持对 GAN Vocoder 的流式合成
- 推理引擎：使用 ONNXRuntime 推理引擎优化模型推理性能，使得语音合成系统在低压 CPU 上也能达到 RTF<1，满足流式合成的要求

<a name="2"></a>
## 2. 特性
- 开源领先的中文语音合成系统
- 使用 ONNXRuntime 推理引擎优化模型推理性能
- 唯一开源的流式语音合成系统
- 易拆卸性：可以很方便地更换不同语种上的不同声学模型和声码器、使用不同的推理引擎（Paddle 动态图、PaddleInference 和 ONNXRuntime 等）、使用不同的网络服务（HTTP、Websocket）

<a name="3"></a>
## 3. Benchmark
PaddleSpeech TTS 模型之间的性能对比，请查看 [TTS-Benchmark](https://github.com/PaddlePaddle/PaddleSpeech/wiki/TTS-Benchmark)。

<a name="4"></a>
## 4. 效果展示 
请参考：[Streaming TTS Demo Video](https://paddlespeech.readthedocs.io/en/latest/streaming_tts_demo_video.html)

<a name="5"></a>
## 5. 使用教程

<a name="51"></a>
### 5.1 模型训练与推理优化

Default FastSpeech2：[tts3/run.sh](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/tts3/run.sh)

流式 FastSpeech2：[tts3/run_cnndecoder.sh](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/tts3/run_cnndecoder.sh)

HiFiGAN：[voc5/run.sh](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/voc5/run.sh)

<a name="52"></a>
### 5.2 语音合成特色应用
一键式实现语音合成：[text_to_speech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/text_to_speech)

个性化语音合成 - 基于 FastSpeech2 模型的个性化语音合成：[style_fs2](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/style_fs2)

会说话的故事书 - 基于 OCR 和语音合成的会说话的故事书：[story_talker](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/story_talker)

元宇宙 - 基于语音合成的 2D 增强现实：[metaverse](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/metaverse)

<a name="53"></a>
### 5.3 语音合成服务搭建

一键式搭建非流式语音合成服务：[speech_server](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/speech_server)

一键式搭建流式语音合成服务：[streaming_tts_server](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/streaming_tts_server)


更多教程，包括模型设计、模型训练、推理部署等，请参考 AIStudio 教程：[PP-TTS：流式语音合成原理及服务部署
](https://aistudio.baidu.com/aistudio/projectdetail/3885352)


================================================
FILE: docs/source/tts/README.md
================================================
# Parakeet
Parakeet aims to provide a flexible, efficient, and state-of-the-art text-to-speech toolkit for the open-source community. It is built on PaddlePaddle dynamic graph and includes many influential TTS models.  

<div align="center">
  <img src="../../images/logo.png" width=300 /> <br>
</div>

## Overview

To facilitate exploiting the existing TTS models directly and developing the new ones, Parakeet selects typical models and provides their reference implementations in PaddlePaddle. Furthermore, Parakeet abstracts the TTS pipeline and standardizes the procedure of data preprocessing, common modules sharing, model configuration, and the process of training and synthesis. The models supported here include Text FrontEnd, end-to-end Acoustic models, and Vocoders:

- Text FrontEnd
  - Rule-based Chinese frontend.

- Acoustic Models
  - [【FastSpeech2】FastSpeech 2: Fast and High-Quality End-to-End Text to Speech](https://arxiv.org/abs/2006.04558)
  - [【SpeedySpeech】SpeedySpeech: Efficient Neural Speech Synthesis](https://arxiv.org/abs/2008.03802)
  - [【Transformer TTS】Neural Speech Synthesis with Transformer Network](https://arxiv.org/abs/1809.08895)
  - [【Tacotron2】Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions](https://arxiv.org/abs/1712.05884)
- Vocoders
  - [【Parallel WaveGAN】Parallel WaveGAN: A fast waveform generation model based on generative adversarial networks with multi-resolution spectrogram](https://arxiv.org/abs/1910.11480)
  - [【WaveFlow】WaveFlow: A Compact Flow-based Model for Raw Audio](https://arxiv.org/abs/1912.01219)
- Voice Cloning
  - [Transfer Learning from Speaker Verification to Multispeaker Text-to-Speech Synthesis](https://arxiv.org/pdf/1806.04558v4.pdf)
  - [【GE2E】Generalized End-to-End Loss for Speaker Verification](https://arxiv.org/abs/1710.10467)


## Audio samples

Check our [website](https://paddlespeech.readthedocs.io/en/latest/tts/demo.html) for audio sampels.

## Released Model

### Acoustic Model

#### FastSpeech2/FastPitch
1. [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/fastspeech2_nosil_baker_ckpt_0.4.zip)
2. [fastspeech2_nosil_aishell3_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/fastspeech2_nosil_aishell3_ckpt_0.4.zip)
3. [fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)

#### SpeedySpeech
1. [speedyspeech_nosil_baker_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/speedyspeech_nosil_baker_ckpt_0.5.zip)

#### TransformerTTS

1. [transformer_tts_ljspeech_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/transformer_tts_ljspeech_ckpt_0.4.zip)

#### Tacotron2

1. [tacotron2_ljspeech_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/tacotron2_ljspeech_ckpt_0.3.zip)
2. [tacotron2_ljspeech_ckpt_0.3_alternative.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/tacotron2_ljspeech_ckpt_0.3_alternative.zip)

### Vocoder

#### WaveFlow

1. [waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/waveflow_ljspeech_ckpt_0.3.zip)

#### Parallel WaveGAN

1. [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/pwg_baker_ckpt_0.4.zip)
2. [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/pwg_ljspeech_ckpt_0.5.zip)

### Voice Cloning

#### Tacotron2_AISHELL3

1. [tacotron2_aishell3_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/tacotron2_aishell3_ckpt_0.3.zip)

#### GE2E

1. [ge2e_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/ge2e_ckpt_0.3.zip)


================================================
FILE: docs/source/tts/advanced_usage.md
================================================

# Advanced Usage
This section covers how to extend TTS by implementing your models and experiments. Guidelines on implementation are also elaborated.

For the general deep learning experiment, there are several parts to deal with:
1. Preprocess the data according to the needs of the model, and iterate the dataset by batch.
2. Define the model, optimizer, and other components.
3. Write out the training process (generally including forward / backward calculation, parameter update, log recording, visualization, periodic evaluation, etc.).
5. Configure and run the experiment.

## PaddleSpeech TTS's Model Components
To balance the reusability and function of models, we divide models into several types according to their characteristics.

For the commonly used modules that can be used as part of other larger models, we try to implement them as simple and universal as possible, because they will be reused. Modules with trainable parameters are generally implemented as subclasses of `paddle.nn.Layer`. Modules without trainable parameters can be directly implemented as a function, and its input and output are `paddle.Tensor`.

Models for a specific task are implemented as subclasses of `paddle.nn.Layer`. Models could be simple, like a single-layer RNN. For complicated models, it is recommended to split the model into different components.

For a seq-to-seq model, it's natural to split it into encoder and decoder. For a model composed of several similar layers, it's natural to extract the sublayer as a separate layer.

There are two common ways to define a model which consists of several modules.

1. Define a module given the specifications. Here is an example with a multilayer perceptron.
    ```python
    class MLP(nn.Layer):
        def __init__(self, input_size, hidden_size, output_size):
            self.linear1 = nn.Linear(input_size, hidden_size)
            self.linear2 = nn.Linear(hidden_size, output_size)

        def forward(self, x):
            return self.linear2(paddle.tanh(self.linear1(x))

    module = MLP(16, 32, 4) # intialize a module
    ```
    When the module is intended to be a generic and reusable layer that can be integrated into a larger model, we prefer to define it in this way.

    For considerations of readability and usability, we strongly recommend **NOT** to pack specifications into a single object. Here’s an example below.
    ```python
    class MLP(nn.Layer):
        def __init__(self, hparams):
            self.linear1 = nn.Linear(hparams.input_size, hparams.hidden_size)
            self.linear2 = nn.Linear(hparams.hidden_size, hparams.output_size)

        def forward(self, x):
            return self.linear2(paddle.tanh(self.linear1(x))
    ```
    For a module defined in this way, it’s harder for the user to initialize an instance. Users have to read the code to check what attributes are used.

    Also, code in this style tends to be abused by passing a huge config object to initialize every module used in an experiment, though each module may not need the whole configuration.

    We prefer to be explicit.

2. Define a module as a combination given its components. Here is an example of a sequence-to-sequence model.
    ```python
    class Seq2Seq(nn.Layer):
        def __init__(self, encoder, decoder):
            self.encoder = encoder
            self.decoder = decoder

        def forward(self, x):
            encoder_output = self.encoder(x)
            output = self.decoder(encoder_output)
            return output

    encoder = Encoder(...)
    decoder = Decoder(...)
    # compose two components
    model = Seq2Seq(encoder, decoder)
    ```
    When a model is complicated and made up of several components, each of which has a separate functionality, and can be replaced by other components with the same functionality, we prefer to define it in this way.

In the directory structure of PaddleSpeech TTS, modules with high reusability are placed in `paddlespeech.t2s.modules`, but models for specific tasks are placed in `paddlespeech.t2s.models`. When developing a new model, developers need to consider the feasibility of splitting the modules, and the degree of generality of the modules and place them in appropriate directories.

## PaddleSpeech TTS's Data Components
Another critical component for a deep learning project is data.
PaddleSpeech TTS uses the following methods for training data:
1. Preprocess the data.
2. Load the preprocessed data for training.

Previously, we wrote the preprocessing in the `__getitem__` of the Dataset, which will process when accessing a certain batch sample, but encountered some problems:

1.  Efficiency problem. Even if Paddle has a design to load data asynchronously, when the batch size is large, each sample needs to be preprocessed and set up batches, which takes a lot of time, and may even seriously slow down the training process.
2. Data filtering problem. Some filtering conditions depend on the features of the processed sample. For example, filtering samples that are too short according to text length. If the text length can only be known after `__getitem__`,  every time you filter, the entire dataset needed to be loaded once!  In addition, if you do not pre-filter, A small exception (such as too short text ) in  `__getitem__` will cause an exception in the entire data flow, which is not feasible, because `collate_fn `  presupposes that the acquisition of each sample can be normal. Even if some special flags, such as `None`, are used to mark data acquisition failures, and skip `collate_fn`, it will change batch_size.

Therefore, it is not realistic to put preprocessing entirely on `__getitem__`. We use the method mentioned above instead.
During preprocessing, we can do filtering, We can also save more intermediate features, such as text length, audio length, etc., which can be used for subsequent filtering. Because of the habit of TTS field, data is stored in multiple files, and the processed results are stored in `npy` format.

Use a list-like way to store metadata and store the file path in it, so that you can not be restricted by the specific storage location of the file. In addition to the file path, other metadata can also be stored in it. For example, the path of the text, the path of the audio, the path of the spectrum, the number of frames, the number of sampling points, and so on.

Then for the path, there are multiple opening methods,  such as `sf.read`, `np.load`, etc., so it's best to use a parameter that can be input, we don't even want to determine the reading method by its extension, it's best to let the users input it, in this way, users can define their method to parse the data.

So we learned from the design of `DataFrame`, but our construction method is simpler, only need a `list of dicts`, a dict represents a record, and it's convenient to interact with formats such as `json`, `yaml`. For each selected field, we need to give a parser (called `converter` in the interface), and that's it.

Then we need to select a format for saving metadata to the hard disk. There are two square brackets when storing the list of records in `json`, which is not convenient for stream reading and writing, so we use `jsonlines`. We don't use `yaml` because it occupies too many rows when storing the list of records.

Meanwhile, `cache` is added here, and a multi-process Manager is used to share memory between multiple processes. When `num_workers` is used, it is guaranteed that each sub process will not cache a copy.

The implementation of `DataTable` can be found in `paddlespeech/t2s/datasets/data_table.py`.
```python
class DataTable(Dataset):
    """Dataset to load and convert data for general purpose.

    Parameters
    ----------
    data : List[Dict[str, Any]]
        Metadata, a list of meta datum, each of which is composed of
        several fields
    fields : List[str], optional
        Fields to use, if not specified, all the fields in the data are
        used, by default None
    converters : Dict[str, Callable], optional
        Converters used to process each field, by default None
    use_cache : bool, optional
        Whether to use a cache, by default False

    Raises
    ------
    ValueError
        If there is some field that does not exist in data.
    ValueError
        If there is some field in converters that does not exist in fields.
    """

    def __init__(self,
                 data: List[Dict[str, Any]],
                 fields: List[str]=None,
                 converters: Dict[str, Callable]=None,
                 use_cache: bool=False):
```
Its `__getitem__` method is to parse each field with their parser and then compose a dictionary to return.
```python
def _convert(self, meta_datum: Dict[str, Any]) -> Dict[str, Any]:
    """Convert a meta datum to an example by applying the corresponding
    converters to each field requested.

    Parameters
    ----------
    meta_datum : Dict[str, Any]
        Meta datum

    Returns
    -------
    Dict[str, Any]
        Converted example
    """
    example = {}
    for field in self.fields:
        converter = self.converters.get(field, None)
        meta_datum_field = meta_datum[field]
        if converter is not None:
            converted_field = converter(meta_datum_field)
        else:
            converted_field = meta_datum_field
        example[field] = converted_field
    return example
```

## PaddleSpeech TTS's Training Components
A typical training process includes the following processes:
1. Iterate the dataset.
2. Process batch data.
3. Neural network forward/backward calculation.
4. Parameter update.
5. Evaluate the model on the validation dataset, when some special conditions are reached.
6. Write logs, visualize, and in some cases save necessary intermediate results.
7. Save the state of the model and optimizer.

Here, we mainly introduce the training-related components of TTS in Pa and why we designed it like this.
### Global Reporter
When training and modifying Deep Learning models，logging is often needed, and it has even become the key to model debugging and modifying. We usually use various visualization tools，such as ,  `visualdl` in `paddle`, `tensorboard` in `tensorflow`  and `vidsom`, `wnb` ,etc. Besides, `logging` and `print` are usually used for a different purpose.

In these tools, `print` is the simplest，it doesn't have the concept of  `logger` and `handler` in `logging` 、 `summarywriter`  and `logdir` in `tensorboard`, when printing, there is no need for `global_step` ，It's light enough to appear anywhere in the code, and it's printed to a common stdout. Of course, its customizability is limited, for example, it is no longer intuitive when printing dictionaries or more complex objects. And it's fleeting, people need to use redirection to save information.

For TTS models development，we hope to have a more universal multimedia stdout, which is a tool similar to `tensorboard`, which allows many multimedia forms, but it needs a `summary writer` when using, and a `step` when writing information. If the data are images or voices,  some format control parameters are needed.

This will destroy the modular design to a certain extent. For example, If my model is composed of multiple sublayers, and I want to record some important information in the forward method of some sublayers. For this reason, I may need to pass the `summary writer` to these sublayers, but for the sublayers, its function is the calculation, it should not have extra considerations, and it's also difficult for us to tolerate that the initialization of an `nn.Linear` has an optional `visualizer` in the method. And, for a calculation module, **HOW** can it know the global step? These are things related to the training process!

Therefore, a more common approach is not to put writing_log_code in the definition of layer, but return it, then obtain them during training, and write them to `summary writer`.  However, the return values need to be modified.  `summary writer ` is a broadcaster at the training level, and then each module transmits information to it by modifying the return values.

We think this method is a little ugly. We prefer to return the necessary information only rather than change the return values to accommodate visualization and recording.  When you need to report some information, you should be able to report it without difficulty. So we imitate the design of `chainer` and use the `global repoter`.

It takes advantage of the globality of Python's module-level variables and the effect of context manager.

There is a module-level variable in  `paddlespeech/t2s/training/reporter.py`  `OBSERVATIONS`，which is a `Dict` to store key-value.
```python
# paddlespeech/t2s/training/reporter.py

@contextlib.contextmanager
def scope(observations):
    # make `observation` the target to report to.
    # it is basically a dictionary that stores temporary observations
    global OBSERVATIONS
    old = OBSERVATIONS
    OBSERVATIONS = observations

    try:
        yield
    finally:
        OBSERVATIONS = old
```

Then we implement a context manager `scope`, which is used to switch the variables bound by the name of `OBSERVATIONS`. Then a `getter` function is defined to get the dictionary bound by `OBSERVATIONS`.
```python
def get_observations():
    global OBSERVATIONS
    return OBSERVATIONS
```
Then we define a function to get  the current `OBSERVATIONS`，and write key-value pair into it.
```python
def report(name, value):
    # a simple function to report named value
    # you can use it everywhere, it will get the default target and writ to it
    # you can think of it as std.out
    observations = get_observations()
    if observations is None:
        return
    else:
        observations[name] = value
```
The test code following shows the usage method.
- use `first` as the current `OBSERVATION`, write `first_begin=1`,
- then, open the second `OBSERVATION`, write `second_begin=2`,
- then, open the third `OBSERVATION`, write  `third_begin=3`
- exit the third `OBSERVATION` , we back to the second  `OBSERVATION` automatically
- write some context in the second `OBSERVATION` , then exit it, and   we back to the first  `OBSERVATION` automatically
```python
def test_reporter_scope():
    first = {}
    second = {}
    third = {}

    with scope(first):
        report("first_begin", 1)
        with scope(second):
            report("second_begin", 2)
            with scope(third):
                report("third_begin", 3)
                report("third_end", 4)
            report("seconf_end", 5)
        report("first_end", 6)

    assert first == {'first_begin': 1, 'first_end': 6}
    assert second == {'second_begin': 2, 'seconf_end': 5}
    assert third == {'third_begin': 3, 'third_end': 4}
```

In this way, when we write modular components, we can directly call `report`.  The caller will decide where to report as long as it's ready for `OBSERVATION`, then it opens a `scope` and calls the component within this `scope`.

 The `Trainer` in PaddleSpeech TTS report the information in this way.
```python
while True:
    self.observation = {}
    # set observation as the report target
    # you can use report freely in Updater.update()

    # updating parameters and state
    with scope(self.observation):
        update() # training for a step is defined here
```
### Updater: Model Training Process

To maintain the purity of function and the reusability of code, we abstract the model code into a subclass of  `paddle.nn.Layer`, and write the core computing functions in it.

We tend to write the forward process of training in `forward()`, but only write to the prediction result, not to the loss. Therefore, this module can be called by a larger module.

However, when we compose an experiment, we need to add some other things, such as the training process, evaluation process, checkpoint saving, visualization, and the like. In this process, we will encounter some things that only exist in the training process, such as `optimizer`, `learning rate scheduler`, `visualizer`, etc. These things are not part of the model, they should **NOT** be written in the model code.

We made an abstraction for these intermediate processes, that is, `Updater`, which takes the `model`, `optimizer`, and `data stream` as input, and its function is training. Since there may be differences in training methods of different models, we tend to write a corresponding `Updater` for each model. But this is different from the final training script, there is still a certain degree of encapsulation, just to extract the details of regular saving, visualization, evaluation, etc., and only retain the most basic function, that is,  training the model.

### Visualizer
Because we choose observation as the communication mode, we can simply write the things in observation into `visualizer`.

## PaddleSpeech TTS's Configuration Components
Deep learning experiments often have many options to configure. These configurations can be roughly divided into several categories.
1. Data source and data processing mode configuration.
2. Save path configuration of experimental results.
3. Data preprocessing mode configuration.
4. Model structure and hyperparameter configuration.
5. Training process configuration.

It’s common to change the running configuration to compare results. To keep track of running configuration, we use `yaml` configuration files.

Also, we want to interact with command-line options. Some options that usually change according to running environments are provided by command line arguments. In addition, we want to override an option in the config file without editing it.

Taking these requirements into consideration, we use [yacs](https://github.com/rbgirshick/yacs) as a config management tool. Other tools like [omegaconf](https://github.com/omry/omegaconf) are also powerful and have similar functions.

In each example provided, there is a `config.py`,  the default config is defined at `conf/default.yaml`. If you want to get the default config, import `config.py` and call `get_cfg_defaults()` to get it. Then it can be updated with `yaml` config file or command-line arguments if needed.

For details about how to use yacs in experiments, see [yacs](https://github.com/rbgirshick/yacs).

The following is the basic  `ArgumentParser`:
1. `--config`  is used to support configuration file parsing, and the configuration file itself handles the unique options of each experiment.
2. `--train-metadata` is the path to the training data.
3.  `--output-dir` is the dir to save the training results.（if there are checkpoints in  `checkpoints/` of  `--output-dir` , it defaults to reload the newest checkpoint to train)
4. `--ngpu` determine operation modes，`--ngpu` refers to the number of training processes. If `ngpu` > 0, it means using GPU, else CPU is used.

Developers can refer to the examples in `examples` to write the default configuration file when adding new experiments.

## PaddleSpeech TTS's Experiment template

The experimental codes in PaddleSpeech TTS are generally organized as follows:

```text
.
├──  README.md               (help information)
├──  conf
│     └── default.yaml       (default config)
├──  local
│    ├──  preprocess.sh      (script to call data preprocessing.py)
│    ├──  synthesize.sh      (script to call synthesis.py)  
│    ├──  synthesize_e2e.sh  (script to call synthesis_e2e.py)
│    └──train.sh             (script to call train.py)
├── path.sh                  (script include paths to be sourced)
└── run.sh                   (script to call scripts in local)
```
The `*.py` files called by above `*.sh` are located `${BIN_DIR}/`

We add a named argument. `--output-dir` to each training script to specify the output directory. The directory structure is as follows, developers should follow this specification:
```text
exp/default/
├── checkpoints/
│   ├── records.jsonl        (record file)
│   └── snapshot_iter_*.pdz  (checkpoint files)
├── config.yaml              (config file of this experiment)
├── vdlrecords.*.log         (visualdl record file)
├── worker_*.log             (text logging, one file per process)
├── validation/              (output dir during training, information_iter_*/ is the output of each step, if necessary)
├── inference/               (output dir of exported static graph model, which is only used in the final stage of training, if implemented)
└── test/                    (output dir of synthesis results)
```

You can view the examples we provide in `examples`. These experiments are provided to users as examples that can be run directly. Users are welcome to add new models and experiments and contribute code to PaddleSpeech.


================================================
FILE: docs/source/tts/demo.rst
================================================
Audio Sample 
==================

The main processes of TTS include:

1. Convert the original text into characters/phonemes, through ``text frontend`` module.

2. Convert characters/phonemes into acoustic features , such as linear spectrogram, mel spectrogram, LPC features, etc. through ``Acoustic models``.

3. Convert acoustic features into waveforms through ``Vocoders``.

When training ``Tacotron2``、``TransformerTTS`` and ``WaveFlow``, we use English single speaker TTS dataset `LJSpeech <https://keithito.com/LJ-Speech-Dataset/>`_  by default. However, when training ``SpeedySpeech``, ``FastSpeech2`` and ``ParallelWaveGAN``, we use Chinese single speaker dataset `CSMSC <https://test.data-baker.com/data/index/source/>`_ by default. 

In the future, ``PaddleSpeech TTS`` will mainly use Chinese TTS datasets for default examples.

Here, we will display three types of audio samples:

1. Analysis/synthesis (ground-truth spectrograms + Vocoder)

2. TTS (Acoustic model + Vocoder)

3. Chinese TTS with/without text frontend (mainly tone sandhi)

Analysis/synthesis
--------------------------

Audio samples generated from ground-truth spectrograms with a vocoder.

.. raw:: html
     
    <b>LJSpeech(English)</b>
    <br>
    </br>
    
    <div class="table">
    <table border="2" cellspacing="1" cellpadding="1"> 
        <tr>
            <th align="center"> Text </th>
            <th align="center"> GT </th>
            <th align="center"> WaveFlow </th>
        </tr>
        <tr>
            <td >Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                
            
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_0.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>in being comparatively modern.</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>

            </td>
            <td>
             <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_1.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
            </audio>
            </td>
        </tr>
        <tr>
            <td>For although the Chinese took impressions from wood blocks engraved in relief for centuries before the woodcutters of the Netherlands, by a similar process</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_2.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>produced the block books, which were the immediate predecessors of the true printed book</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_3.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/ljspeech_gt/LJ001-0005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/waveflow_res128_ljspeech_samples_1.0/step_2000k_sentence_4.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
    </table>
    
    <br>
    </br>

    <b>CSMSC(Chinese)</b>
    <br>
    </br>

    <table border="2" cellspacing="1" cellpadding="1">
        <tr>
            <th align="center"> Text </th>
            <th align="center"> GT (convert to 24k) </th>
            <th align="center"> ParallelWaveGAN </th>
        </tr>
        <tr>
            <td>昨日，这名“伤者”与医生全部被警方依法刑事拘留</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009901.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009901.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>钱伟长想到上海来办学校是经过深思熟虑的。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009902.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009902.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>她见我一进门就骂，吃饭时也骂，骂得我抬不起头。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009903.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009903.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>李述德在离开之前，只说了一句“柱驼杀父亲了”</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009904.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009904.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>

        </tr>
        <tr>
            <td>这种车票和保险单捆绑出售属于重复性购买。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/baker_gt_24k/009905.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/pwg_baker_ckpt_0.4/009905.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>  
        </tr>    
    </table>
    </div>
    <br>
    <br>

TTS
-------------------

Audio samples generated by a TTS system. Text is first transformed into spectrogram by a text-to-spectrogram model, then the spectrogram is converted into raw audio by a vocoder.

.. raw:: html

    <b>LJSpeech(English)</b>
    <br>
    </br>
    <div class="table">
    <table border="2" cellspacing="1" cellpadding="1"> 
        <tr>
            <th align="center"> Text </th>
            <th align="center"> TransformerTTS + WaveFlow </th>
            <th align="center"> Tacotron2 + WaveFlow </th>
        </tr>
        <tr>
            <td>Life was like a box of chocolates, you never know what you're gonna get.</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                        <source
                            src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/001.wav"
                            type="audio/wav">
                        Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td> 
                <audio controls="controls" style="width: 220px;">
                        <source
                            src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_1.wav"
                            type="audio/wav">
                        Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>With great power there must come great responsibility.</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                        <source
                            src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/002.wav"
                            type="audio/wav">
                        Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td> 
            <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_2.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>To be or not to be, that’s a question.</td>
            <td>
            <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>

            <td> 
            <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_3.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>

        <tr>
            <td>A man can be destroyed but not defeated.</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>

            <td> 
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_4.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>Do not, for one repulse, give up the purpose that you resolved to effort.</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>

            <td> 
            <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_5.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>Death is just a part of life, something we're all destined to do.</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/006.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>

            <td> 
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_6.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>I think it's hard winning a war with words. </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>

            <td> 
            <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_7.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>Don’t argue with the people of strong determination, because they may change the fact!</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>

            <td> 
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_8.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>Love you three thousand times.</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/transformer_tts_ljspeech_ckpt_0.4_waveflow_ljspeech_ckpt_0.3/009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>

            <td> 
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/tacotron2_ljspeech_waveflow_samples_0.2/sentence_9.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>   
    </table>
    
    <br>
    </br>

    <b>CSMSC(Chinese)</b>
    <br>
    </br>

    <table border="2" cellspacing="1" cellpadding="1"> 
        <tr>
            <th align="center"> Text </th>
            <th align="center"> SpeedySpeech + ParallelWaveGAN </th>
            <th align="center"> FastSpeech2 + ParallelWaveGAN </th>
        </tr>
        <tr>
            <td>凯莫瑞安联合体的经济崩溃，迫在眉睫。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>对于所有想要离开那片废土，去寻找更美好生活的人来说。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>克哈，是你们所有人安全的港湾。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>

        <tr>
            <td>为了保护尤摩扬人民不受异虫的残害，我所做的，比他们自己的领导委员会都多。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>无论他们如何诽谤我，我将继续为所有泰伦人的最大利益，而努力奋斗。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>身为你们的元首，我带领泰伦人实现了人类统治领地和经济的扩张。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/006.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/006.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>我们将继续成长，用行动回击那些只会说风凉话，不愿意和我们相向而行的害群之马。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>帝国武装力量，无数的优秀儿女，正时刻守卫着我们的家园大门，但是他们孤木难支。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>凡是今天应征入伍者，所获的所有刑罚罪责，减半。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speedyspeech_baker_ckpt_0.4_pwg_baker_ckpt_0.4/009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_nosil_baker_ckpt_0.4_parallel_wavegan_baker_ckpt_0.4/009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>   
    </table>

    <br>
    </br>

    <table border="2" cellspacing="1" cellpadding="1"> 
        <tr>
            <th align="center"> FastSpeech2-Conformer + </br>ParallelWaveGAN </th>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>

        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/006.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fastspeech2_conformer_baker_ckpt_0.5_pwg_baker_ckpt_0.4/009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>   
    </table>
    </div>
    <br>
    <br>


Multi-Speaker TTS
-------------------

PaddleSpeech also support Multi-Speaker TTS, we provide the audio demos generated by FastSpeech2 + ParallelWaveGAN, we use AISHELL-3 Multi-Speaker TTS dataset. Each line is a different person.


.. raw:: html

    <div class="table">
    <table border="2" cellspacing="1" cellpadding="1">
        <tr>
            <th align="center"> Target Timbre </th>
            <th align="center"> Generated </th>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/0.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/0_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/1.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/1_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/2.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/2_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/3.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/3_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/4.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/4_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/5.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/5_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/6.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/6_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/7.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/7_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/8.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/8_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/9.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/9_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/10.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/10_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/11.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/11_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/12.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/12_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/13.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/13_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/14.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/14_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/15.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/15_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/16.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/16_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/17.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/17_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/18.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/18_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/target/19.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/fs2_aishell3_demos/generated/19_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>

    <table>
    <div>
    <br>
    <br>
        

Style control in FastSpeech2
--------------------------------------
In our FastSpeech2, we can control ``duration``, ``pitch`` and ``energy``.

We provide the audio demos of duration control here. ``duration`` means the duration of phonemes, when we reduce duration, the speed of audios will increase, and when we incerase ``duration``, the speed of audios will reduce.

The ``duration`` of different phonemes in a sentence can have different scale ratios (when you want to slow down one word and keep the other words' speed in a sentence). Here we use a fixed scale ratio for different phonemes to control the ``speed`` of audios.

The duration control in FastSpeech2 can control the speed of audios will keep the pitch. (in some speech tool, increase the speed will increase the pitch, and vice versa.)

.. raw:: html

    <div class="table">
    <table border="2" cellspacing="1" cellpadding="1">
        <tr>
            <th align="center"> Speed(0.8x) </th>
            <th align="center"> Speed(1x) </th>
            <th align="center"> Speed(1.2x) </th>
        </tr>
        <tr>
             <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
             <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
             <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
             <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
             <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
             <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
             <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
             <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 250px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>

    <table>
    <div>
    <br>
    <br>

We provide the audio demos of pitch control here. 

When we set pitch of one sentence to a mean value and set ``tones`` of phones to ``1``, we will get a ``robot-style`` timbre.

When we raise the pitch of an adult female (with a fixed scale ratio), we will get a ``child-style`` timbre.

The ``pitch`` of different phonemes in a sentence can also have different scale ratios.

The nomal audios are in the second column of the previous table.

.. raw:: html

    <div class="table">
    <table border="2" cellspacing="1" cellpadding="1">
        <tr>
            <th align="center"> Robot </th>
            <th align="center"> Child </th>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice/001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice/002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice/003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice//004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice//005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice//007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice//008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice//009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>

    <table>
    <div>
    <br>
    <br>


Chinese TTS with/without text frontend
--------------------------------------

We provide a complete Chinese text frontend module in ``PaddleSpeech TTS``. ``Text Normalization`` and ``G2P`` are the most important modules in text frontend, We assume that the texts are normalized already, and mainly compare ``G2P`` module here.

We use ``FastSpeech2`` + ``ParallelWaveGAN`` here.

.. raw:: html

    <div class="table">
    <table border="2" cellspacing="1" cellpadding="1">
        <tr>
            <th align="center"> Text</th>
            <th align="center"> With Text Frontend </th>
            <th align="center"> Without Text Frontend </th>
        </tr>
        <tr>
            <td>他只是一个纸老虎。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>手表厂有五种好产品。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>老板的轿车需要保养。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>我们所有人都好喜欢你呀。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>岂有此理。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>虎骨酒多少钱一瓶。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/006.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/006.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>这件事情需要冷处理。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>这个老奶奶是个大喇叭。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>我喜欢说相声。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>有一天，我路过了一栋楼。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/with_frontend/010.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/without_frontend/010.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>

    <table>
    </div>
    <br>
    <br> 


Finetune FastSpeech2 for CSMSC
--------------------------------------

Finetuning demos of `tts_finetune/tts3 <https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/tts_finetune/tts3>`_  for CSMSC dataset.

When finetuning for CSMSC, we thought ``Freeze encoder`` > ``Non Frozen`` > ``Freeze encoder && duration_predictor`` for audio quality.

.. raw:: html

    <div class="table">
    CSMSC reference audio (fastspeech2_csmsc + hifigan_aishlle3 in CLI): 欢迎使用飞桨语音套件。
    <br>
    <br>
    <audio controls="controls" style="width: 220px;">
        <source
            src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/ref_fastspeech2_csmsc_hifigan_aishell3.wav"
            type="audio/wav">
        Your browser does not support the <code>audio</code> element.
    </audio>
    <br>
    <br>
    <table border="2" cellspacing="1" cellpadding="1">
        <tr>
            <th align="center"> Frozen Method</th>
            <th align="center"> train_num=10, </br> bs=10, </br> epoch=100, </br> lr=1e-4 </th>
            <th align="center"> train_num=18, </br> bs=18, </br> epoch=100, </br> lr=1e-4 </th>
            <th align="center"> train_num=97, </br> bs=64, </br> epoch=100, </br> lr=1e-4 </th>
            <th align="center"> train_num=196, </br> bs=64, </br> epoch=100, </br> lr=1e-4 </th>
        </tr>
        <tr>
            <td>Non Frozen</td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train10_bn10_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train18_bn18_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train97_bn64_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train196_bn64_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>Freeze encoder</td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train10_fr_encoder_bn10_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train18_fr_encoder_bn18_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train97_fr_encoder_bn64_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train196_fr_encoder_bn64_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>Freeze encoder && </br> duration_predictor</td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train10_fr_encoder_duration_bn10_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train18_fr_encoder_duration_bn18_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train97_fr_encoder_duration_bn64_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 150px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/finetune/train196_fr_encoder_duration_bn64_epoch100_lr0.0001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
    <table>
    </div>
    <br>
    <br> 


================================================
FILE: docs/source/tts/demo_2.rst
================================================
Audio Sample (PaddleSpeech TTS VS Espnet TTS)
==================

This is an audio demo page to contrast PaddleSpeech TTS and Espnet TTS, We use their respective modules (Text Frontend, Acoustic model and Vocoder) here.
We use Espnet's released models here.

FastSpeech2 + Parallel WaveGAN in CSMSC

.. raw:: html
     
    
    <div class="table">
    <table border="2" cellspacing="1" cellpadding="1"> 
        <tr>
            <th align="center"> Text </th>
            <th align="center"> Espent TTS </th>
            <th align="center"> PaddleSpeech TTS </th>
        </tr>
        <tr>
            <td>早上好，今天是2020/10/29，最低温度是-3°C。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/001.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>你好，我的编号是37249，很高兴为您服务。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/002.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>我们公司有37249个人。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/003.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>我出生于2005年10月8日。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/004.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>我们习惯在12:30吃中午饭。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/005.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>只要有超过3/4的人投票同意，你就会成为我们的新班长。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/006.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/006.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>我要买一只价值999.9元的手表。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/007.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>我的手机号是18544139121，欢迎来电。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/008.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>明天有62%的概率降雨。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/009.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>手表厂有五种好产品。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/010.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/010.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>跑马场有五百匹很勇敢的千里马。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/011.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/011.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>有一天，我看到了一栋楼，我顿感不妙，因为我看不清里面有没有人。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/012.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/012.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>史小姐拿着小雨伞去找她的老保姆了。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/013.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/013.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        <tr>
            <td>不要相信这个老奶奶说的话，她一点儿也不好。</td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/espent/014.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
            <td>
                <audio controls="controls" style="width: 220px;">
                    <source
                        src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/parakeet_espnet_fs2_pwg_demo/tn_g2p/parakeet/014.wav"
                        type="audio/wav">
                    Your browser does not support the <code>audio</code> element.
                </audio>
            </td>
        </tr>
        </table>
        </div>


================================================
FILE: docs/source/tts/gan_vocoder.md
================================================
# GAN Vocoders
This is a brief introduction of GAN Vocoders, we mainly introduce the losses of different vocoders here.

Model  | Generator Loss |Discriminator Loss
:-------------:| :------------:| :-----
Mel GAN | adversial loss <br> Feature Matching  | Multi-Scale Discriminator |
Parallel Wave GAN |adversial loss <br> Multi-resolution STFT loss  | adversial loss|
Multi-Band Mel GAN | adversial loss <br> full band Multi-resolution STFT loss <br> sub band Multi-resolution STFT loss |Multi-Scale Discriminator|
HiFi GAN |adversial loss <br> Feature Matching <br>  Mel-Spectrogram Loss | Multi-Scale Discriminator <br> Multi-Period Discriminator|


================================================
FILE: docs/source/tts/models_introduction.md
================================================
# Models introduction
TTS system mainly includes three modules: `Text Frontend`, `Acoustic model` and `Vocoder`. We introduce a rule-based Chinese text frontend in [zh_text_frontend](./zh_text_frontend.md). Here, we will introduce acoustic models and vocoders, which are trainable.

The main processes of TTS include:
1. Convert the original text into characters/phonemes, through the `text frontend` module.
2. Convert characters/phonemes into acoustic features, such as linear spectrogram, mel spectrogram, LPC features, etc. through `Acoustic models`.
3. Convert acoustic features into waveforms through `Vocoders`.

A simple text frontend module can be implemented by rules. Acoustic models and vocoders need to be trained. The models provided by PaddleSpeech TTS are acoustic models and vocoders.

## Acoustic Models
### Modeling Objectives of Acoustic Models
Modeling the mapping relationship between text sequences and speech features：
```text
text X = {x1,...,xM}
specch Y = {y1,...yN}
```
Modeling Objectives:
```text
Ω = argmax p(Y|X,Ω)
```
### Modeling process of Acoustic Models
At present, there are two mainstream acoustic model structures.

- Frame level acoustic model:
   - Duration model (M Tokens - > N Frames).
   - Acoustic decoder (N Frames - > N Frames).

<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/frame_level_am.png" width=500 /> <br>
</div>

- Sequence to sequence acoustic model:
    - M Tokens - > N Frames.

<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/seq2seq_am.png" width=500 /> <br>
</div>

### Tacotron2
 [Tacotron](https://arxiv.org/abs/1703.10135)  is the first end-to-end acoustic model based on deep learning, and it is also the most widely used acoustic model.

[Tacotron2](https://arxiv.org/abs/1712.05884) is the Improvement of Tacotron.
#### Tacotron
**Features of Tacotron:**
- Encoder.
   - CBHG.
   - Input: character sequence.
- Decoder.
    - Global soft attention.
    - unidirectional RNN.
    - Autoregressive teacher force training (input real speech feature).
    - Multi frame prediction.
    - CBHG postprocess.
    - Vocoder: Griffin-Lim.
<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/tacotron.png" width=700 /> <br>
</div>

**Advantage of Tacotron:**
- No need for complex text frontend analysis modules.
- No need for an additional duration model.
- Greatly simplifies the acoustic model construction process and reduces the dependence of speech synthesis tasks on domain knowledge.

**Disadvantages of Tacotron:**
- The CBHG  is complex and the amount of parameters is relatively large.
- Global soft attention.
- Poor stability for speech synthesis tasks.
- In training, the less the number of speech frames predicted at each moment, the more difficult it is to train.
-  Phase problem in Griffin-Lim causes speech distortion during wave reconstruction.
- The autoregressive decoder cannot be stopped during the generation process.

#### Tacotron2
**Features of Tacotron2:**
- Reduction of parameters.
   - CBHG -> PostNet (3 Conv layers + BLSTM or 5 Conv layers).
   - remove Attention RNN.
- Speech distortion caused by Griffin-Lim.
    - WaveNet.
- Improvements of PostNet.
   - CBHG -> 5 Conv layers.
   -  The input and output of the PostNet calculate `L2` loss with real Mel spectrogram.
   - Residual connection.
- Bad stop in an autoregressive decoder.
   - Predict whether it should stop at each moment of decoding (stop token).
   - Set a threshold to determine whether to stop generating when decoding.
- Stability of attention.
   - Location-aware attention.
   - The alignment matrix of the previous time is considered at step `t` of the decoder.

<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/tacotron2.png" width=500 /> <br>
</div>

You can find PaddleSpeech TTS's tacotron2 with LJSpeech dataset example at [examples/ljspeech/tts0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts0).

### TransformerTTS
**Disadvantages of the Tacotrons:**
- Encoder and decoder are relatively weak at global information modeling
   - Vanishing gradient of RNN.
   - Fixed-length context modeling problem in CNN kernel.
- Training is relatively inefficient.
- The attention is not robust enough and the stability is poor.

Transformer TTS is a combination of Tacotron2 and Transformer.

#### Transformer
 [Transformer](https://arxiv.org/abs/1706.03762) is a seq2seq model based entirely on an attention mechanism.

**Features of Transformer:**
- Encoder.
    - `N` blocks based on self-attention mechanism.
    - Positional Encoding.
- Decoder.
    - `N` blocks based on self-attention mechanism.
    - Add Mask to the self-attention in blocks to cover up the information after the `t` step.
    - Attentions between encoder and decoder.
    - Positional Encoding.

<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/transformer.png" width=500 /> <br>
</div>

#### Transformer TTS
Transformer TTS is a seq2seq acoustic model based on Transformer and Tacotron2.

**Motivations：**
- RNNs in Tacotron2  make the inefficiency of training.
- Vanishing gradient of RNN makes the model's ability to model long-term contexts weak.
- Self-attention doesn't contain any recursive structure which can be trained in parallel.
- Self-attention can model global context information well.

**Features of Transformer TTS:**
- Add conv based PreNet in encoder and decoder.
- Stop Token in decoder controls when to stop autoregressive generation.
- Add PostNet after decoder to improve the quality of synthetic speech.
- Scaled position encoding.
    - Uniform scale position encoding may have a negative impact on input or output sequences.

<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/transformer_tts.png" width=500 /> <br>
</div>

**Disadvantages of Transformer TTS:**
- The ability of position encoding for timing information is still relatively weak.
- The ability to perceive local information is weak, and local information is more related to pronunciation.
- Stability is worse than Tacotron2.

You can find PaddleSpeech TTS's Transformer TTS with LJSpeech dataset example at [examples/ljspeech/tts1](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts1).


### FastSpeech2
**Disadvantage of seq2seq models:**
- In the seq2seq model based on attention, no matter how to improve the attention mechanism, it's difficult to avoid generation errors in the decoding stage.

Frame-level acoustic models use duration models to determine the pronunciation duration of phonemes, and the frame-level mapping does not have the uncertainty of sequence generation.

In seq2saq models, the concept of duration models is used as the alignment module of two sequences to replace attention, which can avoid the uncertainty in attention, and significantly improve the stability of the seq2saq models.

#### FastSpeech
Instead of using the encoder-attention-decoder based architecture as adopted by most seq2seq based autoregressive and non-autoregressive generation, [FastSpeech](https://arxiv.org/abs/1905.09263) is a novel feed-forward structure, which can generate a target mel spectrogram sequence in parallel.

**Features of FastSpeech:**
- Encoder: based on Transformer.
- Change `FFN` to `CNN` in self-attention.
    -  Model local dependency.
- Length regulator.
    - Use real phoneme durations to expand the output frame of the encoder during training.
- Non-autoregressive decode.
    -  Improve generation efficiency.

**Length predictor:**
- Pretrain a TransformerTTS model.
- Get alignment matrix of train data.
- Calculate the phoneme durations according to the probability of the alignment matrix.
- Use the output of the encoder to predict the phoneme durations and calculate the MSE loss.
- Use real phoneme durations to expand the output frame of the encoder during training.
- Use phoneme durations predicted by the duration model to expand the frame during prediction.
    - Attentrion can not control phoneme durations. The explicit duration modeling can control durations through duration coefficient (duration coefficient is `1` during training).

**Advantages of non-autoregressive decoder:**
- The built-in duration model of the seq2seq model has converted the input length `M` to the output length `N`.
- The length of the output is known, `stop token` is no longer used, avoiding the problem of being unable to stop.
• Can be generated in parallel (decoding time is less affected by sequence length)

<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/fastspeech.png" width=800 /> <br>
</div>

#### FastPitch
[FastPitch](https://arxiv.org/abs/2006.06873) follows FastSpeech. A single pitch value is predicted for every temporal location, which improves the overall quality of synthesized speech.

<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/fastpitch.png" width=500 /> <br>
</div>

#### FastSpeech2
**Disadvantages of FastSpeech:**
- The teacher-student distillation pipeline is complicated and time-consuming.
- The duration extracted from the teacher model is not accurate enough.
- The target mel spectrograms distilled from the teacher model suffer from information loss due to data simplification.

[FastSpeech2](https://arxiv.org/abs/2006.04558)  addresses the issues in FastSpeech and better solves the one-to-many mapping problem in TTS.

**Features of FastSpeech2:**
- Directly train the model with the ground-truth target instead of the simplified output from the teacher.
- Introducing more variation information of speech as conditional inputs, extract `duration`, `pitch`, and `energy` from speech waveform and directly take them as conditional inputs in training and use predicted values in inference.

FastSpeech2 is similar to FastPitch but introduces more variation information of the speech.

<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/fastspeech2.png" width=800 /> <br>
</div>

You can find PaddleSpeech TTS's FastSpeech2/FastPitch with CSMSC dataset example at [examples/csmsc/tts3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3), We use token-averaged pitch and energy values introduced in FastPitch rather than frame-level ones in FastSpeech2.

### SpeedySpeech
[SpeedySpeech](https://arxiv.org/abs/2008.03802) simplify the teacher-student architecture of FastSpeech and provide a fast and stable training procedure.

**Features of SpeedySpeech:**
- Use a simpler, smaller, and faster-to-train convolutional teacher model ([Deepvoice3](https://arxiv.org/abs/1710.07654) and [DCTTS](https://arxiv.org/abs/1710.08969)) with a single attention layer instead of Transformer used in FastSpeech.  
- Show that self-attention layers in the student network are not needed for high-quality speech synthesis.
- Describe a simple data augmentation technique that can be used early in the training to make the teacher network robust to sequential error propagation.

<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/speedyspeech.png" width=500 /> <br>
</div>

You can find PaddleSpeech TTS's SpeedySpeech with CSMSC dataset example at [examples/csmsc/tts2](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts2).

## Vocoders
In speech synthesis, the main task of the vocoder is to convert the spectral parameters predicted by the acoustic model into the final speech waveform.

Taking into account the short-term change frequency of the waveform, the acoustic model usually avoids direct modeling of the speech waveform, but firstly models the spectral features extracted from the speech waveform, and then reconstructs the waveform by the decoding part of the vocoder.

A vocoder usually consists of a pair of encoders and decoders for speech analysis and synthesis. The encoder estimates the parameters, and then the decoder restores the speech.

Vocoders based on neural networks usually is speech synthesis, which learns the mapping relationship from spectral features to waveforms through training data.

### Categories of neural vocodes
- Autoregression
    - WaveNet
    - WaveRNN
    - LPCNet

- Flow
    - **WaveFlow**
    - WaveGlow
    - FloWaveNet
    - Parallel WaveNet
- GAN
    - WaveGAN
    - **Parallel WaveGAN**
    - **MelGAN**
    - **Style MelGAN**
    - **Multi Band MelGAN**
    - **HiFi GAN**
- VAE
    - Wave-VAE
- Diffusion
    - WaveGrad
    - DiffWave

**Motivations of GAN-based vocoders:**
- Modeling speech signals by estimating probability distribution usually has high requirements for the expression ability of the model itself. In addition, specific assumptions need to be made about the distribution of waveforms.
- Although autoregressive neural vocoders can obtain high-quality synthetic speech, such models usually have a **slow generation speed**.
- The training of inverse autoregressive flow vocoders is complex, and they also require the modeling capability of long-term context information.
- Vocoders based on Bipartite Transformation converge slowly and are complex.
- GAN-based vocoders don't need to make assumptions about the speech distribution and train through adversarial learning.

Here, we introduce a Flow-based vocoder WaveFlow and a GAN-based vocoder Parallel WaveGAN.

### WaveFlow
 [WaveFlow](https://arxiv.org/abs/1912.01219) is proposed by Baidu Research.

**Features of WaveFlow:**
- It can synthesize 22.05 kHz high-fidelity speech around 40x faster than real-time on an Nvidia V100 GPU without engineered inference kernels, which is faster than [WaveGlow](https://github.com/NVIDIA/waveglow) and several orders of magnitude faster than WaveNet.
- It is a small-footprint flow-based model for raw audio. It has only 5.9M parameters, which is 15x smaller than WaveGlow (87.9M).
- It is directly trained with maximum likelihood without probability density distillation and auxiliary losses as used in [Parallel WaveNet](https://arxiv.org/abs/1711.10433) and [ClariNet](https://openreview.net/pdf?id=HklY120cYm), which simplifies the training pipeline and reduces the cost of development.

You can find PaddleSpeech TTS's WaveFlow with LJSpeech dataset example at [examples/ljspeech/voc0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc0).

### Parallel WaveGAN
[Parallel WaveGAN](https://arxiv.org/abs/1910.11480) trains a non-autoregressive WaveNet variant as a generator in a GAN-based training method.

**Features of Parallel WaveGAN:**

- Use non-causal convolution instead of causal convolution.
- The input is random Gaussian white noise.
- The model is non-autoregressive both in training and prediction, which is fast
- Multi-resolution STFT loss.

<div align="left">
  <img src="https://raw.githubusercontent.com/PaddlePaddle/PaddleSpeech/develop/docs/images/pwg.png" width=600 /> <br>
</div>

You can find PaddleSpeech TTS's Parallel WaveGAN with CSMSC example at [examples/csmsc/voc1](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1).


================================================
FILE: docs/source/tts/quick_start.md
================================================
([简体中文](./quick_start_cn.md)|English)
# Quick Start of Text-to-Speech
The examples in PaddleSpeech are mainly classified by datasets, the TTS datasets we mainly used are:
* CSMCS (Mandarin single speaker)
* AISHELL3 (Mandarin multiple speakers)
* LJSpeech (English single speaker)
* VCTK (English multiple speakers)

The models in PaddleSpeech TTS have the following mapping relationship:
* tts0 - Tacotron2
* tts1 - TransformerTTS
* tts2 - SpeedySpeech
* tts3 - FastSpeech2
* voc0 - WaveFlow
* voc1 - Parallel WaveGAN
* voc2 - MelGAN
* voc3 - MultiBand MelGAN
* voc4 - Style MelGAN
* voc5 - HiFiGAN
* vc0 - Tacotron2 Voice Clone with GE2E
* vc1 - FastSpeech2 Voice Clone with GE2E

## Quick Start

Let's take a FastSpeech2 + Parallel WaveGAN with CSMSC dataset for instance. [examples/csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc)

### Train Parallel WaveGAN with CSMSC
- Go to the directory
    ```bash
    cd examples/csmsc/voc1
    ```
- Source env
    ```bash
    source path.sh
    ```
    **Must do this before you start to do anything.**
    Set `MAIN_ROOT` as project dir. Using `parallelwave_gan` model as `MODEL`.

- Main entrypoint
    ```bash
    bash run.sh
    ```
    This is just a demo, please make sure source data have been prepared well and every `step` works well before the next `step`.
### Train FastSpeech2 with CSMSC
- Go to the directory
    ```bash
    cd examples/csmsc/tts3
    ```
- Source env
    ```bash
    source path.sh
    ```
    **Must do this before you start to do anything.**
    Set `MAIN_ROOT` as project dir. Using `fastspeech2` model as `MODEL`.
- Main entry point
    ```bash
    bash run.sh
    ```
    This is just a demo, please make sure source data have been prepared well and every `step` works well before the next `step`.

The steps in `run.sh` mainly include:
- source path.
- preprocess the dataset,
- train the model.
- synthesize waveform from metadata.jsonl.
- synthesize waveform from a text file. (in acoustic models)
- inference using a static model. (optional)

For more details, you can see `README.md` in examples.

## Pipeline of TTS
This section shows how to use pretrained models provided by TTS and make an inference with them.

Pretrained models in TTS are provided in an archive. Extract it to get a folder like this:
**Acoustic Models:**
```text
checkpoint_name
├── default.yaml
├── snapshot_iter_*.pdz
├── speech_stats.npy
├── phone_id_map.txt
├── spk_id_map.txt (optional)
└── tone_id_map.txt (optional)
```
**Vocoders:**
```text
checkpoint_name
├── default.yaml  
├── snapshot_iter_*.pdz
└── stats.npy  
```
- `default.yaml` stores the config used to train the model.
- `snapshot_iter_*.pdz` is the checkpoint file, where `*` is the steps it has been trained.
- `*_stats.npy` is the stats file of the feature if it has been normalized before training.
- `phone_id_map.txt` is the map of phonemes to phoneme_ids.
- `tone_id_map.txt` is the map of tones to tones_ids, when you split tones and phones before training acoustic models. (for example in our csmsc/speedyspeech example)
- `spk_id_map.txt` is the map of speakers to spk_ids in multi-spk acoustic models. (for example in our aishell3/fastspeech2 example)

The example code below shows how to use the models for prediction.
### Acoustic Models (text to spectrogram)
The code below shows how to use a `FastSpeech2` model.  After loading the pretrained model, use it and the normalizer object to construct a prediction object，then use `fastspeech2_inferencet(phone_ids)` to generate spectrograms, which can be further used to synthesize raw audio with a vocoder.

```python
from pathlib import Path
import numpy as np
import paddle
import yaml
from yacs.config import CfgNode
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Inference
from paddlespeech.t2s.modules.normalizer import ZScore
# examples/fastspeech2/baker/frontend.py
from frontend import Frontend

# load the pretrained model
checkpoint_dir = Path("fastspeech2_nosil_baker_ckpt_0.4")
with open(checkpoint_dir / "phone_id_map.txt", "r") as f:
    phn_id = [line.strip().split() for line in f.readlines()]
vocab_size = len(phn_id)
with open(checkpoint_dir / "default.yaml") as f:
    fastspeech2_config = CfgNode(yaml.safe_load(f))
odim = fastspeech2_config.n_mels
model = FastSpeech2(
    idim=vocab_size, odim=odim, **fastspeech2_config["model"])
model.set_state_dict(
    paddle.load(args.fastspeech2_checkpoint)["main_params"])
model.eval()

# load stats file
stat = np.load(checkpoint_dir / "speech_stats.npy")
mu, std = stat
mu = paddle.to_tensor(mu)
std = paddle.to_tensor(std)
fastspeech2_normalizer = ZScore(mu, std)

# construct a prediction object
fastspeech2_inference = FastSpeech2Inference(fastspeech2_normalizer, model)

# load Chinese Frontend
frontend = Frontend(checkpoint_dir / "phone_id_map.txt")

# text to spectrogram
sentence = "你好吗？"
input_ids = frontend.get_input_ids(sentence, merge_sentences=True)
phone_ids = input_ids["phone_ids"]
flags = 0
# The output of Chinese text frontend is segmented
for part_phone_ids in phone_ids:
    with paddle.no_grad():
        temp_mel = fastspeech2_inference(part_phone_ids)
        if flags == 0:
            mel = temp_mel
            flags = 1
        else:
            mel = paddle.concat([mel, temp_mel])
```

### Vocoder (spectrogram to wave)
The code below shows how to use a  ` Parallel WaveGAN` model. Like the example above, after loading the pretrained model, use it and the normalizer object to construct a prediction object，then use `pwg_inference(mel)` to generate raw audio (in wav format).

```python
from pathlib import Path
import numpy as np
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode
from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
from paddlespeech.t2s.models.parallel_wavegan import PWGInference
from paddlespeech.t2s.modules.normalizer import ZScore

# load the pretrained model
checkpoint_dir = Path("parallel_wavegan_baker_ckpt_0.4")
with open(checkpoint_dir / "pwg_default.yaml") as f:
    pwg_config = CfgNode(yaml.safe_load(f))
vocoder = PWGGenerator(**pwg_config["generator_params"])
vocoder.set_state_dict(paddle.load(args.pwg_params))
vocoder.remove_weight_norm()
vocoder.eval()

# load stats file
stat = np.load(checkpoint_dir / "pwg_stats.npy")
mu, std = stat
mu = paddle.to_tensor(mu)
std = paddle.to_tensor(std)
pwg_normalizer = ZScore(mu, std)

# construct a prediction object
pwg_inference = PWGInference(pwg_normalizer, vocoder)

# spectrogram to wave
wav = pwg_inference(mel)
sf.write(
        audio_path,
        wav.numpy(),
        samplerate=fastspeech2_config.fs)
```


================================================
FILE: docs/source/tts/quick_start_cn.md
================================================
(简体中文|[English](./quick_start.md))
# 语音合成快速开始
这些PaddleSpeech中的样例主要按数据集分类，我们主要使用的TTS数据集有：

* CSMCS (普通话单发音人)
* AISHELL3 (普通话多发音人)
* LJSpeech (英文单发音人)
* VCTK (英文多发音人)

PaddleSpeech 的 TTS 模型具有以下映射关系：

* tts0 - Tacotron2
* tts1 - TransformerTTS
* tts2 - SpeedySpeech
* tts3 - FastSpeech2
* voc0 - WaveFlow
* voc1 - Parallel WaveGAN
* voc2 - MelGAN
* voc3 - MultiBand MelGAN
* voc4 - Style MelGAN
* voc5 - HiFiGAN
* vc0 - Tacotron2 Voice Clone with GE2E
* vc1 - FastSpeech2 Voice Clone with GE2E

## 快速开始

让我们以 FastSpeech2 + Parallel WaveGAN 和 CSMSC 数据集 为例. [examples/csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc)

### 用 CSMSC 数据集训练 Parallel WaveGAN

- 进入目录
    ```bash
    cd examples/csmsc/voc1
    ```
- 设置环境变量
    ```bash
    source path.sh
    ```
    **在你开始做任何事情之前，必须先做这步**
    将 `MAIN_ROOT` 设置为项目目录. 使用 `parallelwave_gan` 模型作为 `MODEL`.

- 运行
    ```bash
    bash run.sh
    ```
    这只是一个演示，请确保源数据已经准备好，并且在下一个 `step` 之前每个 `step` 都运行正常.
### 用CSMSC数据集训练FastSpeech2

- 进入目录
    ```bash
    cd examples/csmsc/tts3
    ```
    
- 设置环境变量
    ```bash
    source path.sh
    ```
    **在你开始做任何事情之前，必须先做这步**
    将 `MAIN_ROOT` 设置为项目目录. 使用 `fastspeech2` 模型作为 `MODEL` 。
    
- 运行
    ```bash
    bash run.sh
    ```
    这只是一个演示，请确保源数据已经准备好，并且在下一个 `step` 之前每个 `step` 都运行正常。

`run.sh` 中主要包括以下步骤：

- 设置路径。
- 预处理数据集，
- 训练模型。
- 从 `metadata.jsonl` 中合成波形
- 从文本文件合成波形。（在声学模型中）
- 使用静态模型进行推理。（可选）

有关更多详细信息，请参见 examples 中的 `README.md`

## TTS 流水线
本节介绍如何使用 TTS 提供的预训练模型，并对其进行推理。

TTS中的预训练模型在压缩包中提供。将其解压缩以获得如下文件夹：
**Acoustic Models:**

```text
checkpoint_name
├── default.yaml
├── snapshot_iter_*.pdz
├── speech_stats.npy
├── phone_id_map.txt
├── spk_id_map.txt (optional)
└── tone_id_map.txt (optional)
```
**Vocoders:**
```text
checkpoint_name
├── default.yaml  
├── snapshot_iter_*.pdz
└── stats.npy  
```
- `default.yaml` 存储用于训练模型的配置。
- `snapshot_iter_*.pdz` 是检查点文件，其中`*`是它经过训练的步骤。
- `*_stats.npy` 是特征的统计文件，如果它在训练前已被标准化。
- `phone_id_map.txt` 是音素到音素 ID 的映射关系。
- `tone_id_map.txt` 是在训练声学模型之前分割音调和拼音时，音调到音调 ID 的映射关系。（例如在 csmsc/speedyspeech 的示例中）
- `spk_id_map.txt` 是多发音人声学模型中 "发音人" 到 "spk_ids" 的映射关系。

下面的示例代码显示了如何使用模型进行预测。
### Acoustic Models 声学模型（文本到频谱图）
下面的代码显示了如何使用 `FastSpeech2` 模型。加载预训练模型后，使用它和 normalizer 对象构建预测对象，然后使用 `fastspeech2_inferencet(phone_ids)` 生成频谱图，频谱图可进一步用于使用声码器合成原始音频。

```python
from pathlib import Path
import numpy as np
import paddle
import yaml
from yacs.config import CfgNode
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Inference
from paddlespeech.t2s.modules.normalizer import ZScore
# examples/fastspeech2/baker/frontend.py
from frontend import Frontend

# 加载预训练模型
checkpoint_dir = Path("fastspeech2_nosil_baker_ckpt_0.4")
with open(checkpoint_dir / "phone_id_map.txt", "r") as f:
    phn_id = [line.strip().split() for line in f.readlines()]
vocab_size = len(phn_id)
with open(checkpoint_dir / "default.yaml") as f:
    fastspeech2_config = CfgNode(yaml.safe_load(f))
odim = fastspeech2_config.n_mels
model = FastSpeech2(
    idim=vocab_size, odim=odim, **fastspeech2_config["model"])
model.set_state_dict(
    paddle.load(args.fastspeech2_checkpoint)["main_params"])
model.eval()

# 加载特征文件
stat = np.load(checkpoint_dir / "speech_stats.npy")
mu, std = stat
mu = paddle.to_tensor(mu)
std = paddle.to_tensor(std)
fastspeech2_normalizer = ZScore(mu, std)

# 构建预测对象
fastspeech2_inference = FastSpeech2Inference(fastspeech2_normalizer, model)

# load Chinese Frontend
frontend = Frontend(checkpoint_dir / "phone_id_map.txt")

# 构建一个中文前端
sentence = "你好吗？"
input_ids = frontend.get_input_ids(sentence, merge_sentences=True)
phone_ids = input_ids["phone_ids"]
flags = 0
# 构建预测对象加载中文前端，对中文文本前端的输出进行分段
for part_phone_ids in phone_ids:
    with paddle.no_grad():
        temp_mel = fastspeech2_inference(part_phone_ids)
        if flags == 0:
            mel = temp_mel
            flags = 1
        else:
            mel = paddle.concat([mel, temp_mel])
```

### Vcoder声码器（谱图到波形）
下面的代码显示了如何使用 `Parallel WaveGAN` 模型。像上面的例子一样，加载预训练模型后，使用它和 normalizer 对象构建预测对象，然后使用 `pwg_inference(mel)` 生成原始音频（ wav 格式）。

```python
from pathlib import Path
import numpy as np
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode
from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
from paddlespeech.t2s.models.parallel_wavegan import PWGInference
from paddlespeech.t2s.modules.normalizer import ZScore

# 加载预训练模型
checkpoint_dir = Path("parallel_wavegan_baker_ckpt_0.4")
with open(checkpoint_dir / "pwg_default.yaml") as f:
    pwg_config = CfgNode(yaml.safe_load(f))
vocoder = PWGGenerator(**pwg_config["generator_params"])
vocoder.set_state_dict(paddle.load(args.pwg_params))
vocoder.remove_weight_norm()
vocoder.eval()

# 加载特征文件
stat = np.load(checkpoint_dir / "pwg_stats.npy")
mu, std = stat
mu = paddle.to_tensor(mu)
std = paddle.to_tensor(std)
pwg_normalizer = ZScore(mu, std)

# 加载预训练模型构造预测对象
pwg_inference = PWGInference(pwg_normalizer, vocoder)

# 频谱图到波形
wav = pwg_inference(mel)
sf.write(
        audio_path,
        wav.numpy(),
        samplerate=fastspeech2_config.fs)
```


================================================
FILE: docs/source/tts/svs_music_score.md
================================================
本人非音乐专业人士，如文档中有误欢迎指正。

# 一、常见基础
## 1.1 简谱和音名（note）
<p align="left">
  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/seven.png" width="300"/>
</p>

上图从左往右的黑键音名分别是：C#/Db，D#/Db，F#/Db，G#/Ab，A#/Bb
钢琴88键如下图，分为大字一组，大字组，小字组，小字一组，小字二组，小字三组，小字四组。分别对应音名的后缀是 1 2 3 4 5 6，例如小字一组（C大调）包含的键分别为： C4，C#4/Db4，D4，D#4/Eb4，E4，F4，F#4/Gb4，G4，G#4/Ab4，A4，A#4/Bb4，B4  
钢琴八度音就是12345671八个音，最后一个音是高1。**遵循：全全半全全全半** 就会得到 1 2 3 4 5 6 7 (高)1 的音

<p align="left">
  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/piano_88.png" />
</p>

## 1.2 十二大调
“#”表示升调

<p align="left">
  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/up.png" />
</p>

“b”表示降调

<p align="left">
  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/down.png" />
</p>

什么大调表示Do(简谱1) 这个音从哪个键开始，例如D大调，则用D这个键来表示 Do这个音。
下图是十二大调下简谱与音名的对应表。

<p align="left">
  <img src="../../../docs/images/note_map.png" />
</p>


## 1.3 Tempo
Tempo 用于表示速度（Speed of the beat/pulse），一分钟里面有几拍（beats per mimute BPM）

<p align="left">
  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/note_beat.png" width="450"/>
</p>

whole note -->  4 beats</br>
half note --> 2 beats</br>
quarter note --> 1 beat</br>
eighth note --> 1/2 beat</br>
sixteenth note --> 1/4 beat</br> 


# 二、应用试验
## 2.1 从谱中获取 music scores
music scores 包含：note，note_dur，is_slur

<p align="left">
  <img src="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/pu.png" width="600"/>
</p>

从左上角的谱信息 *bE* 可以得出该谱子是 **降E大调**，可以对应1.2小节十二大调简谱音名对照表根据 简谱获取对应的note
从左上角的谱信息 *quarter note* 可以得出该谱子的速度是 **一分钟95拍（beat）**，一拍的时长 = **60/95 = 0.631578s**
从左上角的谱信息 *4/4* 可以得出该谱子表示四分音符为一拍（分母的4），每小节有4拍（分子的4）

从该简谱上可以获取 music score 如下：

|text |phone |简谱（辅助）后面的点表示高八音 |note （从小字组开始算） |几拍（辅助） |note_dur |is_slur|
:-------------:| :------------:| :-----: | -----: | :-----: |:-----:| :-----:  |
|小 |x   |5  |A#3/Bb3 |半 |0.315789 |0 |
|   |iao |5  |A#3/Bb3 |半 |0.315789 |0 |
|酒 |j   |1. |D#4/Eb4 |半 |0.315789 |0 |
|   |iu  |1. |D#4/Eb4 |半 |0.315789 |0 |
|窝 |w   |2. |F4      |半 |0.315789 |0 |
|   |o   |2. |F4      |半 |0.315789 |0 |
|长 |ch  |3. |G4      |半 |0.315789 |0 |
|   |ang |3. |G4      |半 |0.315789 |0 |
|   |ang |1. |D#4/Eb4 |半 |0.315789 |1 |
|睫 |j   |1. |D#4/Eb4 |半 |0.315789 |0 |
|   |ie  |1. |D#4/Eb4 |半 |0.315789 |0 |
|   |ie  |5  |A#3/Bb3 |半 |0.315789 |1 |
|毛 |m   |5  |A#3/Bb3 |一 |0.631578 |0 |
|   |ao  |5  |A#3/Bb3 |一 |0.631578 |0 |
|是 |sh  |5  |A#3/Bb3 |半 |0.315789 |0 |
|   |i   |5  |A#3/Bb3 |半 |0.315789 |0 |
|你 |n   |3. |G4      |半 |0.315789 |0 |
|   |i   |3. |G4      |半 |0.315789 |0 |
|最 |z   |2. |F4      |半 |0.315789 |0 |
|   |ui  |2. |F4      |半 |0.315789 |0 |
|美 |m   |3. |G4      |半 |0.315789 |0 |
|   |ei  |3. |G4      |半 |0.315789 |0 |
|的 |d   |2. |F4      |半 |0.315789 |0 |
|   |e   |2. |F4      |半 |0.315789 |0 |
|记 |j   |7  |D4      |半 |0.315789 |0 |
|   |i   |7  |D4      |半 |0.315789 |0 |
|号 |h   |5  |A#3/Bb3 |半 |0.315789 |0 |
|   |ao  |5  |A#3/Bb3 |半 |0.315789 |0 |


## 2.2 一些实验

<div align = "center">
<table style="width:100%">
  <thead>
    <tr>
      <th> 序号  </th>
      <th width="500"> 说明  </th>
      <th> 合成音频（diffsinger_opencpop + pwgan_opencpop） </th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td > 1 </td>
      <td > 原始 opencpop 标注的 notes，note_durs，is_slurs，升F大调，起始在小字组（第3组） </td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test1.wav" rel="nofollow">
            <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td > 2 </td>
      <td > 原始 opencpop 标注的 notes 和 is_slurs，note_durs 改变（从谱子获取） </td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test2.wav" rel="nofollow">
            <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td > 3 </td>
      <td > 原始 opencpop 标注的 notes 去掉 rest（毛字一拍），is_slurs 和 note_durs 改变（从谱子获取） </td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test3.wav" rel="nofollow">
            <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td > 4 </td>
      <td > 从谱子获取 notes，note durs，is_slurs，不含 rest（毛字一拍），起始在小字一组（第3组） </td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test4.wav" rel="nofollow">
            <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td > 5 </td>
      <td > 从谱子获取 notes，note durs，is_slurs，加上 rest （毛字半拍，rest半拍），起始在小字一组（第3组）</td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test5.wav" rel="nofollow">
            <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td > 6 </td>
      <td > 从谱子获取 notes， is_slurs，包含 rest，note_durs 从原始标注获取，起始在小字一组（第3组） </td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test6.wav" rel="nofollow">
            <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
    </tr>
    <tr>
      <td > 7 </td>
      <td > 从谱子获取 notes，note durs，is_slurs，不含 rest（毛字一拍），起始在小字一组（第4组） </td>
      <td align = "center">
      <a href="https://paddlespeech.cdn.bcebos.com/t2s/svs/svs_music_scores/test7.wav" rel="nofollow">
            <img align="center" src="../../../docs/images/audio_icon.png" width="200 style="max-width: 100%;"></a><br>
      </td>
    </tr>
    
  </tbody>
</table>

</div>


上述实验表明通过该方法来提取 music score 是可行的，但是在应用中可以**灵活地在歌词中加"AP"(用来表示吸气声)和"SP"(用来表示停顿声)**，对应的在 **note 上加 rest**，会使得整体的歌声合成更自然。
除此之外，还要考虑哪一个大调并且以哪一组为起始**得到的 note 在训练数据集中出现过**，如若推理时传入训练数据中没有见过的 note， 合成出来的音频可能不是我们期待的音调。


# 三、其他
## 3.1 读取midi

```python
import mido
mid = mido.MidiFile('2093.midi')
```


================================================
FILE: docs/source/tts/test_sentence.txt
================================================
001 早上好，今天是2020/10/29，最低温度是-3°C。
002 你好，我的编号是37249，很高兴为您服务。
003 我们公司有37249个人。
004 我出生于2005年10月8日。
005 我们习惯在12:30吃中午饭。
006 只要有超过3/4的人投票同意，你就会成为我们的新班长。
007 我要买一只价值999.9元的手表。
008 我的手机号是18544139121，欢迎来电。
009 明天有62%的概率降雨。
010 手表厂有五种好产品。
011 跑马场有五百匹很勇敢的千里马。
012 有一天，我看到了一栋楼，我顿感不妙，因为我看不清里面有没有人。
013 史小姐拿着小雨伞去找她的老保姆了。
014 不要相信这个老奶奶说的话，她一点儿也不好。

================================================
FILE: docs/source/tts/tts_datasets.md
================================================
# TTS Datasets
<!--
see https://openslr.org/
-->
## Mandarin
- [CSMSC](https://www.data-baker.com/open_source.html): Chinese Standard Mandarin Speech Copus
    - Duration/h: 12
    - Number of Sentences: 10,000
    - Size: 2.14GB
    - Speaker: 1 female, ages 20 ~30
    - Sample Rate: 48 kHz、16bit
    - Mean Words per Clip: 16
- [AISHELL-3](http://www.aishelltech.com/aishell_3)
    - Duration/h: 85
    - Number of Sentences: 88,035
    - Size: 17.75GB
    - Speaker: 218
    - Sample Rate: 44.1 kHz、16bit 

## English
- [LJSpeech](https://keithito.com/LJ-Speech-Dataset/)
    - Duration/h: 24
    - Number of Sentences: 13,100
    - Size: 2.56GB
    - Speaker: 1, age 20 ~30
    - Sample Rate: 22050 Hz、16bit
    - Mean Words per Clip: 17.23
- [VCTK](https://datashare.ed.ac.uk/handle/10283/3443)
    - Number of Sentences: 44,583
    - Size: 10.94GB
    - Speaker: 110 
    - Sample Rate: 48 kHz、16bit
    - Mean Words per Clip: 17.23

## Japanese
<!--
see https://sites.google.com/site/shinnosuketakamichi/publication/corpus
-->

- [tri-jek](https://sites.google.com/site/shinnosuketakamichi/research-topics/tri-jek_corpus): Japanese-English-Korean tri-lingual corpus
- [JSSS-misc](https://sites.google.com/site/shinnosuketakamichi/research-topics/jsss-misc_corpus): misc tasks of JSSS corpus
- [JTubeSpeech](https://github.com/sarulab-speech/jtubespeech): Corpus of Japanese speech collected from YouTube
- [J-MAC](https://sites.google.com/site/shinnosuketakamichi/research-topics/j-mac_corpus): Japanese multi-speaker audiobook corpus
- [J-KAC](https://sites.google.com/site/shinnosuketakamichi/research-topics/j-kac_corpus): Japanese Kamishibai and audiobook corpus
- [JMD](https://sites.google.com/site/shinnosuketakamichi/research-topics/jmd_corpus): Japanese multi-dialect corpus
- [JSSS](https://sites.google.com/site/shinnosuketakamichi/research-topics/jsss_corpus): Japanese multi-style (summarization and simplification) corpus
- [RWCP-SSD-Onomatopoeia](https://www.ksuke.net/dataset/rwcp-ssd-onomatopoeia): onomatopoeic word dataset for environmental sounds 
- [Life-m](https://sites.google.com/site/shinnosuketakamichi/research-topics/life-m_corpus): landmark image-themed music corpus
- [PJS](https://sites.google.com/site/shinnosuketakamichi/research-topics/pjs_corpus): Phoneme-balanced Japanese singing voice corpus
- [JVS-MuSiC](https://sites.google.com/site/shinnosuketakamichi/research-topics/jvs_music): Japanese multi-speaker singing-voice corpus
- [JVS](https://sites.google.com/site/shinnosuketakamichi/research-topics/jvs_corpus): Japanese multi-speaker voice corpus
- [JSUT-book](https://sites.google.com/site/shinnosuketakamichi/publication/jsut-book): audiobook corpus by a single Japanese speaker
- [JSUT-vi](https://sites.google.com/site/shinnosuketakamichi/publication/jsut-vi): vocal imitation corpus by a single Japanese speaker
- [JSUT-song](https://sites.google.com/site/shinnosuketakamichi/publication/jsut-song): singing voice corpus by a single Japanese singer
- [JSUT](https://sites.google.com/site/shinnosuketakamichi/publication/jsut): a large-scaled corpus of reading-style Japanese speech by a single speaker

## Emotions
### English
- [CREMA-D](https://github.com/CheyneyComputerScience/CREMA-D)
- [Seen and Unseen emotional style transfer for voice conversion with a new emotional speech dataset](https://kunzhou9646.github.io/controllable-evc/)
    - paper : [Seen and Unseen emotional style transfer for voice conversion with a new emotional speech dataset](https://arxiv.org/abs/2010.14794)
### Mandarin
- [EMOVIE Dataset](https://viem-ccy.github.io/EMOVIE/dataset_release )
    - paper: [EMOVIE: A Mandarin Emotion Speech Dataset with a Simple Emotional Text-to-Speech Model](https://arxiv.org/abs/2106.09317)
- MASC
    - paper: [MASC: A Speech Corpus in Mandarin for Emotion Analysis and Affective Speaker Recognition](https://ieeexplore.ieee.org/document/4013501)
### English && Mandarin
- [Emotional Voice Conversion: Theory, Databases and ESD](https://github.com/HLTSingapore/Emotional-Speech-Data)    
    - paper: [Emotional Voice Conversion: Theory, Databases and ESD](https://arxiv.org/abs/2105.14762) 

## Music
- [GiantMIDI-Piano](https://github.com/bytedance/GiantMIDI-Piano)
- [MAESTRO Dataset](https://magenta.tensorflow.org/datasets/maestro)
     - [tf code](https://www.tensorflow.org/tutorials/audio/music_generation) 
- [Opencpop](https://wenet.org.cn/opencpop/)


================================================
FILE: docs/source/tts/tts_papers.md
================================================
# TTS Papers
## Text Frontend
### Polyphone
- [【g2pM】g2pM: A Neural Grapheme-to-Phoneme Conversion Package for Mandarin Chinese Based on a New Open Benchmark Dataset](https://arxiv.org/abs/2004.03136)
- [Disambiguation of Chinese Polyphones in an End-to-End Framework with Semantic Features Extracted by Pre-trained BERT](https://www1.se.cuhk.edu.hk/~hccl/publications/pub/201909_INTERSPEECH_DongyangDAI.pdf)
- [Polyphone Disambiguation in Mandarin Chinese with Semi-Supervised Learning](https://www.isca-speech.org/archive/pdfs/interspeech_2021/shi21d_interspeech.pdf)
  * github: https://github.com/PaperMechanica/SemiPPL
- [WikipediaHomographData](https://github.com/google-research-datasets/WikipediaHomographData)
### Text Normalization
#### English
- [applenob/text_normalization](https://github.com/applenob/text_normalization)
### G2P
#### English
- [cmusphinx/g2p-seq2seq](https://github.com/cmusphinx/g2p-seq2seq)

## Acoustic Models
- [【AdaSpeech3】AdaSpeech 3: Adaptive Text to Speech for Spontaneous Style](https://arxiv.org/abs/2107.02530)
- [【AdaSpeech2】AdaSpeech 2: Adaptive Text to Speech with Untranscribed Data](https://arxiv.org/abs/2104.09715)
- [【AdaSpeech】AdaSpeech: Adaptive Text to Speech for Custom Voice](https://arxiv.org/abs/2103.00993)
- [【FastSpeech2】FastSpeech 2: Fast and High-Quality End-to-End Text to Speech](https://arxiv.org/abs/2006.04558)
- [【FastPitch】FastPitch: Parallel Text-to-speech with Pitch Prediction](https://arxiv.org/abs/2006.06873)
- [【SpeedySpeech】SpeedySpeech: Efficient Neural Speech Synthesis](https://arxiv.org/abs/2008.03802)
- [【FastSpeech】FastSpeech: Fast, Robust and Controllable Text to Speech](https://arxiv.org/abs/1905.09263)
- [【Transformer TTS】Neural Speech Synthesis with Transformer Network](https://arxiv.org/abs/1809.08895)
- [【Tacotron2】Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions](https://arxiv.org/abs/1712.05884)

## Vocoders
- [【RefineGAN】RefineGAN: Universally Generating Waveform Better than Ground Truth with Highly Accurate Pitch and Intensity Responses](https://arxiv.org/abs/2111.00962)
- [【Fre-GAN】Fre-GAN: Adversarial Frequency-consistent Audio Synthesis](https://arxiv.org/abs/2106.02297)
- [【StyleMelGAN】StyleMelGAN: An Efficient High-Fidelity Adversarial Vocoder with Temporal Adaptive Normalization](https://arxiv.org/abs/2011.01557)
- [【Multi-band MelGAN】Multi-band MelGAN: Faster Waveform Generation for High-Quality Text-to-Speech](https://arxiv.org/abs/2005.05106)
- [【HiFi-GAN】HiFi-GAN: Generative Adversarial Networks for Efficient and High Fidelity Speech Synthesis](https://arxiv.org/abs/2010.05646)
- [【VocGAN】VocGAN: A High-Fidelity Real-time Vocoder with a Hierarchically-nested Adversarial Network](https://arxiv.org/abs/2007.15256)
- [【Parallel WaveGAN】Parallel WaveGAN: A fast waveform generation model based on generative adversarial networks with multi-resolution spectrogram](https://arxiv.org/abs/1910.11480)
- [【MelGAN】MelGAN: Generative Adversarial Networks for Conditional Waveform Synthesis](https://arxiv.org/abs/1910.06711)
- [【WaveFlow】WaveFlow: A Compact Flow-based Model for Raw Audio](https://arxiv.org/abs/1912.01219)
- [【LPCNet】LPCNet: Improving Neural Speech Synthesis Through Linear Prediction](https://arxiv.org/abs/1810.11846)
- [【WaveRNN】Efficient Neural Audio Synthesis](https://arxiv.org/abs/1802.08435)
## GAN TTS

- [【GAN TTS】High Fidelity Speech Synthesis with Adversarial Networks](https://arxiv.org/abs/1909.11646)

## Voice Cloning
- [【SV2TTS】Transfer Learning from Speaker Verification to Multispeaker Text-to-Speech Synthesis](https://arxiv.org/abs/1806.04558)
- [【GE2E】Generalized End-to-End Loss for Speaker Verification](https://arxiv.org/abs/1710.10467)


================================================
FILE: docs/source/tts/zh_text_frontend.md
================================================
# Chinese Rule-Based Text Frontend
A TTS system mainly includes three modules: `Text Frontend`, `Acoustic model` and `Vocoder`. We provide a complete Chinese text frontend module in PaddleSpeech TTS, see exapmles in [examples/other/tn](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/tn) and [examples/other/g2p](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/g2p).

A text frontend module mainly includes:
 - Text Segmentation
 - Text Normalization (TN)
 - Word Segmentation (mainly in Chinese)
 - Part-of-Speech
 - Prosody
 - G2P (Grapheme-to-Phoneme, include Polyphone and Tone Sandhi, etc.)
 - Linguistic Features/Charactors/Phonemes

```text
• text: 90 后为中华人民共和国成立 70 周年准备了大礼
• Text Normalization: 九零后为中华人民共和国成立七十周年准备了大礼
• Word Segmentation: 九零后/为/中华人民/共和国/成立/七十/周年/准备/了/大礼
• G2P:
    jiu3 ling2 hou4 wei4 zhong1 hua2 ren2 min2 gong4 he2 guo2 ...
• Prosody (prosodic words #1, prosodic phrases #2, intonation phrases #3, sentence #4):
    九零后#1为中华人民#1共和国#2成立七十周年#3准备了大礼#4
```

Among them, Text Normalization and G2P are the most important modules. We mainly introduce them here.

## Text Normalization
### Supported NSW (Non-Standard-Word) Normalization

|NSW type|raw|normalized|
|:--|:-|:-|
|serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九|
|cardinal|这块黄金重达324.75克<br>我们班的最高总分为583分|这块黄金重达三百二十四点七五克<br>我们班的最高总分为五百八十三分|
|numeric range |12\~23<br>-1.5\~2|十二到二十三<br>负一点五到二|
|date|她出生于86年8月18日，她弟弟出生于1995年3月1日|她出生于八六年八月十八日， 她弟弟出生于一九九五年三月一日|
|time|等会请在12:05请通知我|等会请在十二点零五分请通知我
|temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度
|fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票|
|percentage|明天有62％的概率降雨|明天有百分之六十二的概率降雨|
|money|随便来几个价格12块5，34.5元，20.1万|随便来几个价格十二块五，三十四点五元，二十点一万|
|telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|


## Grapheme-to-Phoneme
In Chinese, G2P is a very complex module, which mainly includes **polyphone**  and **tone sandhi**.

We use [g2pM](https://github.com/kakaobrain/g2pM) and [pypinyin](https://github.com/mozillazg/python-pinyin)  as the default g2p tools. They can solve the problem of polyphones to a certain extent. In the future, we intend to use a trainable language model (for example, [BERT](https://arxiv.org/abs/1810.04805)) for polyphones.

However, g2pM and pypinyin do not perform well in tone sandhi, we use rules to solve this problem, which requires relevant linguistic knowledge.

The **tone sandhi** in Chinese mainly include:

 - soft tone sandhi (轻声变调)
 - "一" "不" tone sandhi ("一" "不" 变调)
 - three tone sandhi  (三声变调)

For ease of understanding, we list the tone sandhi rules in Chinese here
### 1. 轻声变调
|  |cases  |
|:--|:-|
| 语气助词“吧、呢、啊”等 | 吃吧、走吗、去呢、跑啊 |
| 结构助词：“的、地、得”| 我的书、慢慢地走、跑得很快等 |
|有的轻声音节和非轻声音节构成对比区别意义 |买卖：一指生意；二指买和卖。 <br/> 地道：一指纯粹、真正；二指地下通道。<br> 大意：一指没有注意；二指主要的意思。 <br/>  东西：一指各种事物；二指东面与西面。<br>  言语：一指所说的话；二指开口，招呼。<br/>运气：一指一种锻炼的方法。二指幸运。<br> |
|名词的后缀：“们、子、头”|你们、房子、石头 |
|名词或动词的第二个重叠音节 | 奶奶、姐姐、爸爸、试试、看看、说说、问问 |
|名词后面表示方位的：“上、下、里” |桌上、地下、院里  |
| 动态助词：“了、着、过” | 走了、看着、去过|
| 作宾语的人称代词：“我、你、他” | 找我、请你、麻烦他。 |
| 约定俗成 | 匀称、盘算、枇杷、篱笆、活泼、玄乎。狐狸、学生、拾掇、麻烦、蛤蟆、石榴。玫瑰、凉快、萝卜、朋友、奴才、云彩。脑袋、老爷、老婆、嘴巴、指头、指甲。委屈、喇叭、讲究、打发、打听、喜欢。点心、伙计、打扮、哑巴、女婿、首饰。自在、吓唬、力气、漂亮、队伍、地方。痛快、念叨、笑语、丈夫、志气、钥匙。月亮、正经、位置、秀气、上司、悟性。告示、动静、热闹、屁股、阔气、意思。等 |


### 2. "一" "不" 变调
#### "一" 变调
|  | 是否变调 | cases|
|:--|:-|:-|
| 单独念 | 否 | 第一、一楼|
| 序数 |否  | |
| 用在语句末尾 | 否 | |
| 去声前变阳平（四声前变二声） |  | 一栋yí dòng、一段yí duàn、一律yí lǜ、一路yí lù|
| 非去声前变去声（非四声前变四声） |  | 阴平（一声）<br>一发yì fā 、一端yì duān、一天yì tiān、一忽yì hū<br>阳平（二声）<br>一叠yì dié 、一同yì tóng 、一头yì tóu 、一条yì tiáo<br>上声（三声）<br>一统yì tǒng、一体yì tǐ、一览yì lǎn、一口yì kǒu|
|轻读，当“一”嵌在重叠式的动词之间  |  | 听一听 tīng yi tīng|

#### "不" 变调
|  | 是否变调 | cases|
|:--|:-|:-|
|单独念|否  | |
| 用在语句末尾| 否  | 我不|
|去声前变阳平（四声前变成二声）  |  | 不怕bú pà、不妙bú miào、不犯bú fàn、不忿bú fèn|
| 轻读，不”夹在重叠动词或重叠形容词之间、夹在动词和补语之间 |  |懂不懂 dǒng bu dǒng 、看不清 kàn bu qīng |


### 3. 三声变调
|  | 子类别| 如何变调|cases|
|:--|:-|:-|:-|
|单独念 |  | 否|  |
|句末 |  | 否|  |
|在句中停顿并没被后音节影响  |  |否 |  |
|三声+三声  |  | 二声+三声|保险、保养、党委、尽管、老板、本领、引导、古老、敏感、鼓舞、永远、语法、口语、岛屿、保姆、远景、北海、首长、母语 |
| 三个三声相连| 双音节+单音节（“双单格”结构）| 前两个变二声|演讲稿、跑马场、展览馆、管理组、水彩笔、蒙古语、选取法、古典舞、虎骨酒、洗脸水、草稿纸|
|  | 单音节+双音节（“单双格”结构）|第二个变二声|史小姐、党小组、好小伙、跑百米、纸老虎、李厂长、老保姆、冷处理、很友好、小雨伞|
|  | 单音节+单音节+单音节（“单三格”结构）| 前两个变二声| 软懒散、稳准狠|
| 更多三声音节相连时|  | 按语意与若干二字组成三字组，然后按以上变调规律处理|岂有 / 此理。<br>请你 / 给我 / 打点儿 / 洗脸水。<br>手表厂 / 有五种 /好产品。|

## References

 - [chinese_text_normalization](https://github.com/speechio/chinese_text_normalization)
 - [声调篇｜这些“一、不”变调规律，你不得不知](https://zhuanlan.zhihu.com/p/36156170)
 - [TTS前端模块中的普通话变调规则](https://zhuanlan.zhihu.com/p/65091429)
 - [轻声和变调](https://wenku.baidu.com/view/ad2016d94693daef5ef73db1.html)
 - [必读轻声词语表546条](http://www.chaziwang.com/article-view-504.html)


================================================
FILE: docs/source/tts_demo_video.rst
================================================
TTS Demo Video
==================

.. raw:: html
     
    <video controls width="1024">

    <source src="https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/paddle2021_with_me.mp4"
            type="video/mp4">
    Sorry, your browser doesn't support embedded videos.
    </video>


================================================
FILE: docs/source/vpr/PPVPR.md
================================================
([简体中文](./PPVPR_cn.md)|English)
# PP-VPR

## Catalogue
- [1. Introduction](#1)
- [2. Characteristic](#2)
- [3. Tutorials](#3)
    - [3.1 Pre-trained Models](#31)
    - [3.2 Training](#32)
    - [3.3 Inference](#33)
    - [3.4 Service Deployment](#33)
- [4. Quick Start](#4)

<a name="1"></a>
## 1. Introduction

PP-VPR is a tool that provides voice print feature extraction and retrieval functions.  Provides a variety of quasi-industrial solutions, easy to solve the difficult problems in complex scenes, support the use of command line model reasoning.  PP-VPR also supports interface operations and container deployment.  

<a name="2"></a>
## 2. Characteristic
The basic process of VPR is shown in the figure below:  
<center><img src=https://ai-studio-static-online.cdn.bcebos.com/3aed59b8c8874046ad19fe583d15a8dd53c5b33e68db4383b79706e5add5c2d0 width="800" ></center>


The main characteristics of PP-ASR are shown below:
-  Provides pre-trained models on Chinese open source datasets: VoxCeleb(English). The models include ecapa-tdnn.
-  Support model training/evaluation.
-  Support model inference using the command line. You can use to use `paddlespeech vector --task spk --input xxx.wav` to use the pre-trained model to do model inference. 
-  Support interface operations and container deployment.

<a name="3"></a>
## 3. Tutorials

<a name="31"></a>
## 3.1 Pre-trained Models
The support pre-trained model list: [released_model](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/released_model.md).  
For more information about model design, you can refer to the aistudio tutorial:
- [ecapa-tdnn](https://aistudio.baidu.com/aistudio/projectdetail/4027664)

<a name="32"></a>
## 3.2 Training
The referenced script for model training is stored in [examples](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples) and stored according to "examples/dataset/model". The dataset mainly supports VoxCeleb. The model supports ecapa-tdnn.
The specific steps of executing the script are recorded in `run.sh`.

For more information, you can refer to [sv0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0)


<a name="33"></a>
## 3.3 Inference

PP-VPR supports use `paddlespeech vector --task spk --input xxx.wav` to use the pre-trained model to do inference after install `paddlespeech` by `pip install paddlespeech`.

Specific supported functions include:

- Prediction of single audio
- Score the similarity between the two audios
- Support RTF calculation

For specific usage, please refer to: [speaker_verification](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speaker_verification/README_cn.md) 


<a name="34"></a>
## 3.4 Service Deployment

PP-VPR supports Docker containerized service deployment.  Through Milvus, MySQL performs high performance library building search.  

Demo of VPR Server: [audio_searching](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/audio_searching)

![arch](https://ai-studio-static-online.cdn.bcebos.com/7b32dd0200084866863095677e8b40d3b725b867d2e6439e9cf21514e235dfd5)

For more information about service deployment, you can refer to the aistudio tutorial:
- [speaker_recognition](https://aistudio.baidu.com/aistudio/projectdetail/4027664)

<a name="4"></a>

## 4. Quick Start

To use PP-VPR, you can see here [install](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md), It supplies three methods to install `paddlespeech`, which are **Easy**, **Medium** and **Hard**. If you want to experience the inference function of paddlespeech, you can use **Easy** installation method.


================================================
FILE: docs/source/vpr/PPVPR_cn.md
================================================
(简体中文|[English](./PPVPR.md))
# PP-VPR

## 目录
- [1. 简介](#1)
- [2. 特点](#2)
- [3. 使用教程](#3)
    - [3.1 预训练模型](#31)
    - [3.2 模型训练](#32)
    - [3.3 模型推理](#33)
    - [3.4 服务部署](#33)
- [4. 快速开始](#4)

<a name="1"></a>
## 1. 简介

PP-VPR 是一个 提供声纹特征提取，检索功能的工具。提供了多种准工业化的方案，轻松搞定复杂场景中的难题，支持使用命令行的方式进行模型的推理。 PP-VPR 也支持界面化的操作，容器化的部署。

<a name="2"></a>
## 2. 特点
VPR 的基本流程如下图所示：  
<center><img src=https://ai-studio-static-online.cdn.bcebos.com/3aed59b8c8874046ad19fe583d15a8dd53c5b33e68db4383b79706e5add5c2d0 width="800" ></center>


PP-VPR 的主要特点如下：
-  提供在英文开源数据集 VoxCeleb（英文）上的预训练模型，ecapa-tdnn。
-  支持模型训练评估功能。
-  支持命令行方式的模型推理，可使用 `paddlespeech vector --task spk --input xxx.wav` 方式调用预训练模型进行推理。
-  支持 VPR 的服务容器化部署，界面化操作。


<a name="3"></a>
## 3. 使用教程

<a name="31"></a>
## 3.1 预训练模型
支持的预训练模型列表：[released_model](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/released_model.md)。
更多关于模型设计的部分，可以参考 AIStudio 教程：
- [ecapa-tdnn](https://aistudio.baidu.com/aistudio/projectdetail/4027664)

<a name="32"></a>
## 3.2 模型训练

模型的训练的参考脚本存放在 [examples](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples) 中，并按照 `examples/数据集/模型` 存放，数据集主要支持 VoxCeleb，模型支持 ecapa-tdnn 模型。
具体的执行脚本的步骤记录在 `run.sh` 当中。具体可参考： [sv0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0)


<a name="33"></a>
## 3.3 模型推理

PP-VPR 支持在使用`pip install paddlespeech`后 使用命令行的方式来使用预训练模型进行推理。

具体支持的功能包括：

- 对单条音频进行预测
- 对两条音频进行打分
- 支持 RTF 的计算

具体的使用方式可以参考： [speaker_verification](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/speaker_verification/README_cn.md) 


<a name="34"></a>
## 3.4 服务部署

PP-VPR 支持 Docker 容器化服务部署。通过 Milvus, MySQL 进行高性能建库检索。

server 的 demo： [audio_searching](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/audio_searching)

![arch](https://ai-studio-static-online.cdn.bcebos.com/7b32dd0200084866863095677e8b40d3b725b867d2e6439e9cf21514e235dfd5)


关于服务部署方面的更多资料，可以参考 AIStudio 教程：
- [speaker_recognition](https://aistudio.baidu.com/aistudio/projectdetail/4027664)

<a name="4"></a>

## 4. 快速开始

关于如何使用 PP-VPR，可以看这里的 [install](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install_cn.md)，其中提供了 **简单**、**中等**、**困难** 三种安装方式。如果想体验 paddlespeech 的推理功能，可以用 **简单** 安装方式。


================================================
FILE: docs/topic/ctc/ctc_loss.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "piano-accent",
   "metadata": {},
   "source": [
    "# Derivative of CTC Loss\n",
    "\n",
    "关于CTC的介绍已经有很多不错的教程了，但是完整的描述CTCLoss的前向和反向过程的很少，而且有些公式推导省略和错误。本文主要关注CTC Loss的梯度是如何计算的，关于CTC的介绍这里不做过多赘述，具体参看文末参考。\n",
    "\n",
    "CTC主要应用于语音和OCR中，已语音[Deepspeech2](https://arxiv.org/abs/1512.02595)模型为例，CTC的网络一般如下图所示，包含softmax和CTCLoss两部分。反向传播需要求得loss L相对于logits $u^i$​的梯度。下面先介绍CTCLoss的前向计算。\n",
    "\n",
    "> 图片来源于文末参考\n",
    "\n",
    "![img](./img/ctc_loss_backward_1.png)\n",
    "\n",
    "## CTC Loss 的计算\n",
    "\n",
    "CTC中path的定义与概率的计算如下：\n",
    "\n",
    "<img src=\"./img/ctc_loss_prob_pi_x.png\" alt=\"image-20211104200811966\" style=\"zoom:50%;\" />\n",
    "\n",
    "path 是 $ L'^T$​​的元素，用 $ \\pi $​​表示。 $ \\textbf{x} $​​ 是输入特征，$\\textbf{y}$​​ 是输出label， 都是序列。 $ L $​​ 是输出的 vocab, L‘ 是 $ L \\cup {blank}$​​。 $y_{\\pi_{t}}^t$​​ 表示在t时刻，$\\pi_{t}$​​ label时的观察概率。其中$\\pi_{t}$​​ 表示 $\\pi$​​ path在t时刻的label。$\\pi$​​ 是 $\\textbf{y}$​​ 与 $ \\textbf{x}$​​ 的一个alignment，长度是$T$​​，取值空间为$L'$​​​。path也称为alignment。\n",
    "\n",
    "公式（2）解释了给定输入 $\\textbf{x}$​ ，输出 $ \\pi $​  path 的概率，即从时间t=1到T每个时间点的概率 $y_{\\pi_{t}}^t$​ 相乘。\n",
    "\n",
    "求出单条path后，就可以计算$p(l \\mid x)$​ 的概率，计算如下：\n",
    "\n",
    "<img src=\"./img/ctc_loss_prob_l_x.png\" alt=\"image-20211104202358513\" style=\"zoom:50%;\" />\n",
    "\n",
    "这里边 $\\mathcal{B}$ 就是映射， 即所有多对一的映射（many-to-one mapping )的集合。 这样就算出来对应一个真正的 label $\\textbf{l}$ 的概率了，这里是求和。 求和的原因就是 aab 和 abb 都是对应成ab, 所以 aab 的概率 + abb 的概率才是生成ab的概率。 \n",
    "\n",
    "公式（3）解释了给定输入 $\\mathbf{x}$​​​​​​ ，求输出$\\mathbf{l}$​​​​​​ 的概率， 即所有集合 $\\mathcal{B}^{-1} (\\mathbf{l})$​​​​​​​​​​ 中 path的概率和。\n",
    "\n",
    "### CTC forward-backward 算法\n",
    "\n",
    "CTC的优化采用算最大似然估计[MLE (maximum likelihood estimation)](https://en.wikipedia.org/wiki/Maximum_likelihood_estimation), 这个和神经网络本身的训练过程是一致的。\n",
    "\n",
    "这个CTC 计算过程类似HMM的 [forward-backward algorithm](https://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm)，下面就是这个算法的推导过程：\n",
    "\n",
    "<img src=\"./img/ctc_loss_alpha_definition.png\" alt=\"image-20211104203040307\" style=\"zoom:50%;\" />\n",
    "\n",
    "上图中的定义很清楚， 但是$ \\alpha_{t-1}(s) $ and $ \\alpha_{t-1}(s-1)$ 和 $\\alpha_t(s)$ 的关系也不那么好看出来，下图给出了具体的关于 $\\alpha_t(s)$ 的推导过程：\n",
    "\n",
    "<img src=\"./img/ctc_loss_alpha_recurse.png\" alt=\"image-20211108155714843\" style=\"zoom:50%;\" />\n",
    "\n",
    "<img src=\"./img/ctc_loss_alpha_recurse_2.png\" alt=\"image-20211109153011816\" style=\"zoom:50%;\" />\n",
    "\n",
    "这里的公式比较适合用下面的图来理解，$\\alpha_1(1)$​​​​ 其实对应的就是下图中左上角白色的圆圈。 就是上来第一个是blank 的概率， 而 $\\alpha_1(2)$​​​​是label l 的第一个字母。 这里边我们假设每个字母之间都插入了空白，即label l扩展成l'，例如，l=[a, b, b, c]， l'=[-, a, -, b, -, b, -, c, -]。  然后对于其他圆点，在时间是1 的情况下概率都是 0. Figure 3中横轴是时间 t，从左到右是1到T；纵轴是s（sequence），从上到下是 1 到 $\\mathbf{\\mid l' \\mid}$​​​​.\n",
    "\n",
    "<img src=\"./img/ctc_loss_cat_lattice.png\" alt=\"image-20211108155918442\" style=\"zoom:50%;\" />\n",
    "\n",
    "接下来我们分析递归公式 (resursion)，更多介绍可以参看 [2]. 公式6分情况考虑:\n",
    "\n",
    "* 第一种情况就是当前的label是blank， 或者 $\\mathbf{l'}_{s}= \\mathbf{l'}_{s-2}$​​​​​​​(相邻是重复字符)：\n",
    "\n",
    "  ![img](https://distill.pub/2017/ctc/assets/cost_no_skip.svg)\n",
    "\n",
    "  这个时候他的概率来自于过去t-1的两个label 概率， 也就是 $a_{t-1} (s)$​​ 和 $a_{t-1} (s-1)$​​​ 。\n",
    "\n",
    "  $ a_{t-1} (s)$​​ 就是说当前的 sequence 已经是s 了，figure 3中表现为横跳， blank -->blank（例如t=3, s=3）；\n",
    "\n",
    "  而 $a_{t-1} (s-1) $是说明当前的字符还不够， 需要再加一个， 所以在figure 3中就是斜跳，从黑色圆圈到白色圆圈（例如，t=3, s=5）。\n",
    "\n",
    "  仔细观察figure 3， 除了第一排的白色圆圈， 其他白色圆圈都有两个输入， 就是上述的两种情况。  当然判断blank 的方法也可以是判断$I'_{s-2} = I'_{s}$​. 这种情况也是说明$I'_{s}$​​​ 是blank, 因为每一个字符必须用 blank 隔开， 即使是相同字符。\n",
    "\n",
    "* 第二章情况 也可以用类似逻辑得出， 只不过当前的状态s 是黑色圆圈， 有三种情况输入。\n",
    "\n",
    "  ![img](https://distill.pub/2017/ctc/assets/cost_regular.svg)\n",
    "\n",
    "最终的概率就如公式8 所示， 这个计算过程就是 CTC forward algroirthm， 基于 Fig. 3 的左边的初始条件。\n",
    "\n",
    "<img src=\"./img/ctc_loss_forward_loss.png\" alt=\"image-20211108162544982\" style=\"zoom:50%;\" />\n",
    "\n",
    "基于Fig. 3 右边的初始条件，我们还是可以计算出一个概率， 那个就是 **CTC backward**. 这里我就不详细介绍了， 直接截图。\n",
    "\n",
    "<img src=\"./img/ctc_loss_backward_recurse.png\" alt=\"image-20211108162859876\" style=\"zoom:50%;\" />\n",
    "\n",
    "这样一直做乘法， 数字值越来越小，很快就会underflow。 这个时候就需要做 scaling.\n",
    "\n",
    "<img src=\"./img/ctc_loss_rescale_loss.png\" alt=\"image-20211108163526616\" style=\"zoom:50%;\" />\n",
    "\n",
    "算出了forward probability 和 backward probability 有什么用呢， 解释如下图。\n",
    "\n",
    "<img src=\"./img/ctc_loss_forward_backward.png\" alt=\"image-20211108164110404\" style=\"zoom:50%;\" />\n",
    "\n",
    "上图是说 forward probability and backward probability 的乘积， 代表了这个 sequence $\\mathbf{l}$ t时刻，是s label 的 所有paths 的概率。 这样的话 我们就计算了 Fig. 3 中的每个圆圈的概率。为什么$\\alpha_t(s)\\beta_t(s)$ 中多出一个 $y^t_{\\mathbf{l'_s}}$ ，这是因为它在 $\\alpha$  和 $\\beta$ 中都包含该项，合并公式后就多出一项。\n",
    "\n",
    "<img src=\"./img/ctc_loss_forward_backward_to_loss.png\" alt=\"image-20211109143104052\" style=\"zoom:50%;\" />\n",
    "\n",
    "$p(\\mathbf{l}|\\mathbf{x})$​  可以通过任意时刻 t 的所有 s 的 foward-backward 概率计算得来。取负对数后就是单个样本的NLL（Negative Log Likelihood）。\n",
    "\n",
    "### 总结\n",
    "\n",
    "总结一下，根据前向概率计算CTCLoss函数，可以得出如下结论：\n",
    "\n",
    "1. 对于时序长度为T的输入序列x和输出序列z，前向概率：\n",
    "   $$\n",
    "   \\begin{split}\n",
    "   \\alpha_t(s) &= \\sum_{ \\underset{\\pi_t=l'_s}{\\pi \\in \\mathcal{B}^{-1}(z)} } p(\\pi_{1:t}|x) \\newline\n",
    "   \\alpha_1(1) &= y_{-}^1 ; \\quad \\alpha_1(2)=y^1_{l'_2}, \\quad \\alpha_1(s)=0, \\forall s > 2  \\newline\n",
    "   \\alpha_t(s) &= 0, \\quad \\forall s < |l'| - 2(T-t) - 1 ,\\quad \\text{or} \\quad \\forall s < 1 \\newline\n",
    "   \\alpha_t(s) &=\n",
    "    \\begin{cases}\n",
    "      (\\alpha_{t-1}(s) + \\alpha_{t-1}(s-1) ) y^t_{l'_s} & \\text{if $l'_s=b$ or $l'_{s-2} = l'_s$​}  \\newline\n",
    "      (\\alpha_{t-1}(s) + \\alpha_{t-1}(s-1) + \\alpha_{t-1}(s-2))y^t_{l'_s} & \\text{otherwise}\\newline\n",
    "    \\end{cases} \n",
    "   \\end{split}\n",
    "   $$\n",
    "\n",
    "2. 利用 $\\alpha_t(s)$ 计算CTCLoss：\n",
    "   $$\n",
    "   -ln(p(l \\mid x)) = -ln(\\alpha_{T}(|l'|)+\\alpha_{T}(|l'|-1))\n",
    "   $$\n",
    "\n",
    "根据后向概率计算CTCLoss函数，可以得出如下结论：\n",
    "\n",
    "1. 对于时序长度为T的输入序列x和输出序列z，后向概率：  \n",
    "   $$\n",
    "   \\begin{split}\n",
    "   \\beta_t(s) &= \\sum_{ \\underset{\\pi_t=l'_s}{\\pi \\in \\mathcal{B}^{-1}(z)} } p(\\pi_{t:T}|x) \\newline\n",
    "   \\beta_T(|l'|) &= y_{-}^T ; \\quad \\beta_T(|l'|-1)=y^T_{l'_{|l'|-1}}, \\quad \\beta_T(s)=0, \\forall s < |l'| - 1  \\newline\n",
    "   \\beta_t(s) &= 0, \\text{$\\forall s > 2t$ or $\\forall s < |l'|$} \\newline\n",
    "   \\beta_t(s) &=\n",
    "        \\begin{cases}\n",
    "          (\\beta_{t+1}(s) + \\beta_{t+1}(s+1) ) y^t_{l'_s} & \\text{if $l'_s=b$ or $l'_{s+2} = l'_s$}  \\newline\n",
    "          (\\beta_{t+1}(s) + \\beta_{t+1}(s+1) + \\beta_{t+1}(s+2))y^t_{l'_s} & \\text{otherwise}\\newline\n",
    "        \\end{cases}\n",
    "   \\end{split}\n",
    "   $$\n",
    "\n",
    " 2. 利用 $\\beta_t(s)$计算CTCLoss：\n",
    "\n",
    "$$\n",
    "-ln(p(l \\mid x)) = -ln(\\beta_{1}(1)+\\beta_{1}(2)) \\newline\n",
    "$$\n",
    "\n",
    "根据任意时刻的前向概率和后向概率计算CTC Loss函数，得到如下结论：\n",
    "\n",
    "1. 对于任意时刻t，利用前向概率和后向概率计算CTCLoss：\n",
    "\n",
    "$$\n",
    "p(l \\mid x) = \\sum_{s=1}^{|l'|} \\frac{\\alpha_t(s)\\beta_t(s)}{y_{l'_s}^t}  \\newline\n",
    "-ln(p(l \\mid x)) = -ln( \\sum_{s=1}^{|l'|} \\frac{\\alpha_t(s) \\beta_t(s)}{y_{l'_s}^t} )\n",
    "$$\n",
    "我们已经得到CTCLoss的计算方法，接下来对其进行求导。\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "viral-fitting",
   "metadata": {},
   "source": [
    "## CTC梯度计算\n",
    "\n",
    "### 微分公式\n",
    "\n",
    "在计算梯度前，我们先回顾下基本的微分公式： \n",
    "$$\n",
    "C' = 0 \\\\\n",
    "x'  = 1 \\newline\n",
    "x^n  =  n \\cdot x^{n-1} \\newline\n",
    "(e^x)' =  e^x \\newline\n",
    "log(x)' =  \\frac{1}{x} \\newline\n",
    "(u + v)' = u' + v' \\newline\n",
    "(\\frac{u}{v})' =  \\frac{u'v-uv'}{v^2} \\newline\n",
    "\\frac{\\mathrm{d}f(g(x))}{\\mathrm{d}x} = \\frac{\\mathrm{d}f(g(x))}{\\mathrm{d}g(x)} \\cdot \\frac{\\mathrm{d}g(x)}{\\mathrm{d}x}\n",
    "$$\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "starting-sender",
   "metadata": {},
   "source": [
    "### CTC梯度\n",
    "\n",
    "最大似然估计训练就是最大化训练集中每一个分类的对数概率，即最小化Eq. 12。\n",
    "\n",
    "<img src=\"./img/ctc_loss_gradient_of_y_hat.png\" alt=\"image-20211108164206136\" style=\"zoom:50%;\" />\n",
    "\n",
    "最后就是算微分了， 整个推导过程就是加法和乘法， 都可以微分。 $\\mathit{O}^{ML}$关于神经网络的输出 $y^t_k$的梯度见Eq. 13。因为训练样本是相互独立的，所以可以单独考虑每个样本，公式如Eq.13。\n",
    "\n",
    "下面是CTCLoss的梯度计算：\n",
    "\n",
    "<img src=\"./img/ctc_loss_gradient_with_y.png\" alt=\"image-20211109143622448\" style=\"zoom:50%;\" />\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "stretch-order",
   "metadata": {},
   "source": [
    "### CTC梯度推导\n",
    "\n",
    "回顾下之前的公式，便于理解后续推导过程。  \n",
    "\n",
    "$$\n",
    "p(l \\mid x) = \\sum_{s=1}^{|l'|} \\frac{\\alpha_t(s)\\beta_t(s)}{y_{l'_s}^t}  \\\\\n",
    "\\begin{equation}\n",
    "\\alpha_t(s) \\beta_t(s) = \\sum_{ \\underset{\\pi_t=l'_s}{\\pi \\in \\mathcal{B}^{-1}(l):} }  y^t_{l'_s}  \\prod_{t=1}^T y^t_{\\pi_t}\n",
    "\\end{equation}\n",
    "$$\n",
    "\n",
    "其中Eq. 15的计算过程如下：  \n",
    "\n",
    "$$\n",
    "\\begin{align*}\n",
    "\\frac{\\partial p(\n",
    "l \\mid x)}{\\partial y_k^t}\n",
    "    & = \\sum_{s \\in lab(z,k)} \\frac{ \\partial \\frac{ \\alpha_t(s) \\beta_t(s)}{y_{k}^t}}{\\partial y_k^t}  \n",
    "    \\newline\n",
    "    & = \\sum_{s \\in lab(z,k)} \\frac{(\\alpha_t(s)\\beta_t(s))’y_k^t - \\alpha_t(s)\\beta_t(s){y_k^t}'}{{y_k^t}^2}\n",
    "    \\newline\n",
    "    &= \\sum_{s \\in lab(z,k)} \\frac{(  \\prod_{t'=1}^{t-1} y^{t'}_{\\pi_{t'}} \\cdot y_k^t \\cdot y_k^t \\cdot   \\prod_{t'=t+1}^{T} y^{t'}_{\\pi_{t'}}   )’ y_k^t - \\alpha_t(s)\\beta_t(s){y_k^t}'}{{y_k^t}^2}\n",
    "    \\newline\n",
    "     &= \\sum_{s \\in lab(z,k)} \\frac{2\\alpha_t(s)\\beta_t(s) - \\alpha_t(s)\\beta_t(s)}{{y_k^t}^2}\n",
    "    \\newline\n",
    "      &= \\sum_{s \\in lab(z,k)} \\frac{\\alpha_t(s)\\beta_t(s)}{{y_k^t}^2}\n",
    "      \\newline\n",
    "      &= \\frac{1}{{y_k^t}^2}  \\sum_{s \\in lab(z,k)} \\alpha_t(s)\\beta_t(s)  \\tag{1} \\newline\n",
    "\\end{align*}\n",
    "$$\n",
    "\n",
    "\n",
    "NLL的公式推导如下：\n",
    "$$\n",
    "\\begin{split}\n",
    "\\frac{\\partial {ln(p(l \\mid x))} }{ \\partial y^t_k }\n",
    "  &= \\frac{1}{p(l \\mid x)} \\frac{ \\partial{p(l \\mid x)} }{ \\partial y_k^t } \\newline\n",
    "  &= \\frac{1}{p(l \\mid x) {y^t_k}^2 } \\sum_{s \\in lab(z,k)} \\alpha_t(s)\\beta_t(s) \n",
    "\\end{split}\n",
    "\\tag{2}\n",
    "$$\n",
    "\n",
    "\n",
    "已经算出了CTCLoss对于 $y_k^t$​ 的梯度，接下来我们需要计算 CTCLoss对于$u^t_k$​（logits）的梯度。套用链式法则，并替换$y^t_k$​ 为 $y^t_{k'}$​，结果如下图。图中 $k'$​ 表示vocab中的某一个token，$K$​​ 是vocab的大小。\n",
    "\n",
    "![](./img/ctc_loss_backward_2.png)\n",
    "\n",
    "图中公式4根据链式法则得到：\n",
    "$$\n",
    "- \\frac{ \\partial ln(p(l \\mid x)) }{ \\partial u^t_k }\n",
    "  = - \\sum_{k'=1}^{K} \\frac{ \\partial ln(p(l \\mid x)) }{ \\partial y^t_{k'} } \\frac{ \\partial y^t_{k'} }{ \\partial u^t_k }  \\tag{4}\n",
    "$$\n",
    "图中公式3是softmax的梯度，参考 [4]，计算过程如下：\n",
    "$$\n",
    "softmax(j) = S_j  = \\frac{ e^{a_j} }{ \\sum_{k=1}^K e^{a_k} }, \\enspace \\forall j \\in 1 \\dots K\n",
    "$$\n",
    "\n",
    "$$\n",
    "\\begin{split}\n",
    "\\frac{ \\partial S_i }{ \\partial a_j}\n",
    "  &= \\frac{ \\partial (\\frac{ e^{ a_i } }{ \\sum_k e^{ a_k } }) } { \\partial a_j }\n",
    "  \\newline\n",
    "  &= \n",
    "  \\begin{cases}\n",
    "  \t\\frac{ e^a_i \\sum - e^a_j e^a_i }{ \\sum^2 } \n",
    "  \t&= \\frac{ e^a_i }{ \\sum } \\frac{ \\sum - e^a_j }{ \\sum } \\newline\n",
    "    &= S_i(1-S_j)   & \\text{i = j, $\\sum$ stands for $\\sum_{k=1}^K e^a_k$} \n",
    "  \t\\newline\n",
    "  \t\\frac{ 0 - e^a_j e^a_i }{ \\sum^2 }  \n",
    "  \t&= - \\frac{ e^a_j }{ \\sum } \\frac{ e^a_i }{ \\sum }  \\newline\n",
    "    &= -S_j S_i  & \\text{i $\\neq$ j, $\\sum$ stands for $\\sum_{k=1}^K e^a_k$}\n",
    "  \\end{cases}\n",
    "  \\newline\n",
    "  &= \n",
    "  \\begin{cases}\n",
    "  S_i(1 - S_j) & \\text{$i = j$} \n",
    "  \\newline\n",
    "  -S_j S_i = S_i (0 - S_j) & \\text{$i \\neq j$}\n",
    "  \\end{cases}\n",
    "  \\newline\n",
    "  &= S_i (\\delta_{ij} - S_j )\n",
    "\\end{split}\n",
    "\\tag{3}\n",
    "$$\n",
    "$$\n",
    "\\delta_{ij} =\n",
    " \\begin{cases}\n",
    " 1 & \\text{if i = j} \\newline\n",
    " 0 & \\text{otherwise}\n",
    " \\end{cases}\n",
    "$$\n",
    "\n",
    "\n",
    "\n",
    "下图中黄色框中的部分表示公式（1），即遍历所有的vocab中的token，其结果是$p(l \\mid x)$​。这是因为label $l$​ 中的token一定在vocab中，且 $s \\in lab(l, k')$​ 可以是空集。当 $k'$​ 在 l 中，s 则为label中token是$k'$​的概率；当$k'$​​​不在l中，s为空，概率为0。\n",
    "\n",
    "![img](./img/ctc_loss_backward_3.png)\n",
    "\n",
    "公式（2），（3）带入（4），并结合公式（1）的结果如上图右边，即：\n",
    "$$\n",
    "\\begin{split}\n",
    "- \\frac{ \\partial ln(p(l \\mid x)) }{ \\partial u^t_k } &= \n",
    "\t- \\sum_{k'=1}^K \\frac{ \\partial ln(p(l \\mid x)) }{ \\partial y^t_{k'} }  \\frac{ \\partial y^t_{k'}}{ \\partial u^t_k } \\newline\n",
    "\t&= - \\sum_{k'=1}^K \\frac{  y^t_{k'}( \\delta_{kk'} -  y^t_k )  }{ p(l \\mid x) {y^t_{k'}}^2 } \\sum_{s \\in lab(l, k') } \\alpha_t(s) \\beta_t(s) \\newline\n",
    "\t&= - \\sum_{k'=1}^K \\frac{  \\delta_{kk'} -  y^t_k  }{ p(l \\mid x) y^t_{k'} } \\sum_{s \\in lab(l, k') } \\alpha_t(s) \\beta_t(s) \\newline\n",
    "\t&=  \\sum_{k'=1}^K \\frac{ y^t_k  - \\delta_{kk'} }{ p(l \\mid x) y^t_{k'} } \\sum_{s \\in lab(l, k') } \\alpha_t(s) \\beta_t(s)   \\newline\n",
    "\t&= \\sum_{k'=1}^K \\frac{ y^t }{ p(l \\mid x) y^t_{k'} }  \\sum_{s \\in lab(l, k') } \\alpha_t(s) \\beta_t(s) - \\sum_{k'=1}^K \\frac{ \\delta_{kk'} }{ p(l \\mid x) y^t_{k'} }  \\sum_{s \\in lab(l, k') } \\alpha_t(s) \\beta_t(s) \\newline\n",
    "\t&=  \\frac{ y^t_k }{ p(l \\mid x) } ( \\sum_{k'=1}^K \\frac{1}{y^t_{k'}} \\sum_{s \\in lab(l, k') } \\alpha_t(s) \\beta_t(s) ) - \\sum_{k'=1}^K \\frac{ \\delta_{kk'} }{ p(l \\mid x) y^t_{k'} }  \\sum_{s \\in lab(l, k') } \\alpha_t(s) \\beta_t(s) \\newline\n",
    "\t&=  \\frac{ y^t_k }{ p(l \\mid x) } p(l \\mid x)   - \\sum_{k'=1}^K \\frac{ \\delta_{kk'} }{ p(l \\mid x) y^t_{k'} }  \\sum_{s \\in lab(l, k') } \\alpha_t(s) \\beta_t(s) \\newline\n",
    "\t&= y^t_k - \\frac{ 1 }{ p(l \\mid x) y^t_k } \\sum_{s \\in lab(l, k)} \\alpha_t(s) \\beta_t(s)  \\newline\n",
    "\\end{split}\n",
    "$$\n",
    "最终，为了通过softmax层传播CTCLoss的梯度，需要计算目标函数与 logits $u^t_k$ 的偏微分，即Eq. 16: \n",
    "    $$\n",
    "    \\begin{align*}\n",
    "    \\hat{\\alpha}_t(s) & \\overset{def}{=} \\frac{ \\alpha_t(s) }{ C_t } ,\\enspace C_t \\overset{def}{=} \\sum_s \\alpha_t(s) \n",
    "    \\newline\n",
    "    \\hat{\\beta}_t(s) & \\overset{def}{=} \\frac{ \\beta_t(s) }{ D_t } ,\\enspace D_t \\overset{def}{=} \\sum_s \\beta_t(s) \n",
    "    \\newline\n",
    "    - \\frac{ \\partial ln(p(l \\mid x)) }{ \\partial u^t_k } &= y^t_k - \\frac{1}{y^t_k \\sum_{s=1}^{\\mid l' \\mid} \\frac{ \\hat{\\alpha}_t(s) \\hat{\\beta}_t(s) }{ y^t_{l'_s} } } \\sum_{s \\in lab(l, k)} \\hat{\\alpha}_t(s) \\hat{\\beta}_t(s) \\tag{16} \n",
    "    \\newline\n",
    "    \\end{align*}\n",
    "    $$"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "informative-maria",
   "metadata": {},
   "source": [
    "### 总结\n",
    "\n",
    "* 通过动态规划算法计算$\\alpha_t(s)$ 和 $\\beta_t(s)$\n",
    "\n",
    "* 通过$\\alpha_t(s)$ 计算 $p(l \\mid x)=\\alpha_T(\\mid l' \\mid) + \\alpha_T(\\mid l' \\mid -1)$\n",
    "\n",
    "* 通过$\\alpha_t(s)$ 和 $\\beta_t(s)$\n",
    "\n",
    "* 计算CTcLoss函数的导数:  \n",
    "    $$\n",
    "    \\begin{split}\n",
    "    - \\frac{ \\partial ln(p(l \\mid x)) }{ \\partial u^t_k } \n",
    "    &= y^t_k - \\frac{ 1 }{ p(l \\mid x) y^t_k } \\sum_{s \\in lab(l, k)} \\alpha_t(s) \\beta_t(s)  \n",
    "    \\newline\n",
    "    &= y^t_k - \\frac{1}{y^t_k \\sum_{s=1}^{\\mid l' \\mid} \\frac{ \\hat{\\alpha}_t(s) \\hat{\\beta}_t(s) }{ y^t_{l'_s} } } \\sum_{s \\in lab(l, k)} \\hat{\\alpha}_t(s) \\hat{\\beta}_t(s) \n",
    "    \\newline\n",
    "    \\end{split}\n",
    "    \\tag{16}\n",
    "    $$"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "41637c03",
   "metadata": {},
   "source": [
    "## Source Code\n",
    "本人在 [warp-ctc](https://github.com/zh794390558/warp-ctc) 上加了注释，并调整 index 的索引方式，便于理解代码。\n",
    "对比上面的公式推导和lattice图可以快速理解 ctc 实现。"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "coordinated-music",
   "metadata": {},
   "source": [
    "## Reference\n",
    "\n",
    "[[1] A. Graves, S. Fernandez, F. Gomez, J. Schmidhuber. Connectionist Temporal lassification: Labeling Unsegmented Sequence Data  with Recurrent Neural Networks. ICML 2006, Pittsburgh, USA,  pp. 369-376.](http://www.cs.toronto.edu/~graves/icml_2006.pdf)\n",
    "\n",
    "[[2] Sequence ModelingWith CTC](https://distill.pub/2017/ctc/)\n",
    "\n",
    "[[3] NLP 之 CTC Loss 的工作原理](https://www.jianshu.com/p/e073c9d91b20)\n",
    "\n",
    "[[4] The Softmax function and its derivative](https://eli.thegreenplace.net/2016/the-softmax-function-and-its-derivative/)\n",
    "\n",
    "[[5] CTC Algorithm Explained Part 1：Training the Network（CTC算法详解之训练篇）](https://xiaodu.io/ctc-explained/)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "closing-candy",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


================================================
FILE: docs/topic/ctc/ctc_loss_compare.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ff6ff1e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "33af5f76",
   "metadata": {},
   "outputs": [],
   "source": [
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "9b566b73",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fatal: destination path 'warp-ctc' already exists and is not an empty directory.\r\n"
     ]
    }
   ],
   "source": [
    "!git clone https://github.com/SeanNaren/warp-ctc.git"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4a087a09",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc\n"
     ]
    }
   ],
   "source": [
    "%cd warp-ctc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f55dc29a",
   "metadata": {},
   "outputs": [],
   "source": [
    "mkdir -p build"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "fe79f4cf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build\n"
     ]
    }
   ],
   "source": [
    "cd build"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3d25c718",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-- cuda found TRUE\n",
      "-- Building shared library with GPU support\n",
      "-- Configuring done\n",
      "-- Generating done\n",
      "-- Build files have been written to: /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build\n"
     ]
    }
   ],
   "source": [
    "!cmake .."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "7a4238f1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 11%] \u001b[32m\u001b[1mLinking CXX shared library libwarpctc.so\u001b[0m\n",
      "[ 33%] Built target warpctc\n",
      "[ 44%] \u001b[32m\u001b[1mLinking CXX executable test_cpu\u001b[0m\n",
      "[ 55%] \u001b[32m\u001b[1mLinking CXX executable test_gpu\u001b[0m\n",
      "[ 77%] Built target test_cpu\n",
      "[100%] Built target test_gpu\n"
     ]
    }
   ],
   "source": [
    "!make -j"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "31761a31",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc\n"
     ]
    }
   ],
   "source": [
    "cd .."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f53316f6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding\n"
     ]
    }
   ],
   "source": [
    "cd pytorch_binding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "084f1e49",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "running install\n",
      "running bdist_egg\n",
      "running egg_info\n",
      "writing warpctc_pytorch.egg-info/PKG-INFO\n",
      "writing dependency_links to warpctc_pytorch.egg-info/dependency_links.txt\n",
      "writing top-level names to warpctc_pytorch.egg-info/top_level.txt\n",
      "writing manifest file 'warpctc_pytorch.egg-info/SOURCES.txt'\n",
      "installing library code to build/bdist.linux-x86_64/egg\n",
      "running install_lib\n",
      "running build_py\n",
      "running build_ext\n",
      "building 'warpctc_pytorch._warp_ctc' extension\n",
      "Emitting ninja build file /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/build.ninja...\n",
      "Compiling objects...\n",
      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
      "ninja: no work to do.\n",
      "g++ -pthread -B /workspace/zhanghui/DeepSpeech-2.x/tools/venv/compiler_compat -Wl,--sysroot=/ -shared -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath-link,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath-link,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o -L/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/lib -L/usr/local/cuda/lib64 -lwarpctc -lc10 -ltorch -ltorch_cpu -ltorch_python -lcudart -lc10_cuda -ltorch_cuda -o build/lib.linux-x86_64-3.9/warpctc_pytorch/_warp_ctc.cpython-39-x86_64-linux-gnu.so -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build\n",
      "creating build/bdist.linux-x86_64/egg\n",
      "creating build/bdist.linux-x86_64/egg/warpctc_pytorch\n",
      "copying build/lib.linux-x86_64-3.9/warpctc_pytorch/__init__.py -> build/bdist.linux-x86_64/egg/warpctc_pytorch\n",
      "copying build/lib.linux-x86_64-3.9/warpctc_pytorch/_warp_ctc.cpython-39-x86_64-linux-gnu.so -> build/bdist.linux-x86_64/egg/warpctc_pytorch\n",
      "byte-compiling build/bdist.linux-x86_64/egg/warpctc_pytorch/__init__.py to __init__.cpython-39.pyc\n",
      "creating stub loader for warpctc_pytorch/_warp_ctc.cpython-39-x86_64-linux-gnu.so\n",
      "byte-compiling build/bdist.linux-x86_64/egg/warpctc_pytorch/_warp_ctc.py to _warp_ctc.cpython-39.pyc\n",
      "creating build/bdist.linux-x86_64/egg/EGG-INFO\n",
      "copying warpctc_pytorch.egg-info/PKG-INFO -> build/bdist.linux-x86_64/egg/EGG-INFO\n",
      "copying warpctc_pytorch.egg-info/SOURCES.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n",
      "copying warpctc_pytorch.egg-info/dependency_links.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n",
      "copying warpctc_pytorch.egg-info/top_level.txt -> build/bdist.linux-x86_64/egg/EGG-INFO\n",
      "writing build/bdist.linux-x86_64/egg/EGG-INFO/native_libs.txt\n",
      "zip_safe flag not set; analyzing archive contents...\n",
      "warpctc_pytorch.__pycache__._warp_ctc.cpython-39: module references __file__\n",
      "creating 'dist/warpctc_pytorch-0.1-py3.9-linux-x86_64.egg' and adding 'build/bdist.linux-x86_64/egg' to it\n",
      "removing 'build/bdist.linux-x86_64/egg' (and everything under it)\n",
      "Processing warpctc_pytorch-0.1-py3.9-linux-x86_64.egg\n",
      "removing '/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/warpctc_pytorch-0.1-py3.9-linux-x86_64.egg' (and everything under it)\n",
      "creating /workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/warpctc_pytorch-0.1-py3.9-linux-x86_64.egg\n",
      "Extracting warpctc_pytorch-0.1-py3.9-linux-x86_64.egg to /workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages\n",
      "warpctc-pytorch 0.1 is already the active version in easy-install.pth\n",
      "\n",
      "Installed /workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/warpctc_pytorch-0.1-py3.9-linux-x86_64.egg\n",
      "Processing dependencies for warpctc-pytorch==0.1\n",
      "Finished processing dependencies for warpctc-pytorch==0.1\n"
     ]
    }
   ],
   "source": [
    "!python setup.py install"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "ee4ca9e3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Python 3.9.5\r\n"
     ]
    }
   ],
   "source": [
    "!python -V"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "59255ed8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc\n"
     ]
    }
   ],
   "source": [
    "cd .."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "1dae09b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import warpctc_pytorch as wp\n",
    "import paddle.nn as pn\n",
    "import paddle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "83d0762e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1.10.0+cu102'"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "62501e2c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2.2.1'"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "paddle.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "9e8e0f40",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([2, 1, 5])\n",
      "2.4628584384918213\n",
      "[[[ 0.17703122 -0.70812464  0.17703122  0.17703122  0.17703122]]\n",
      "\n",
      " [[ 0.17703122  0.17703122 -0.70812464  0.17703122  0.17703122]]]\n"
     ]
    }
   ],
   "source": [
    "# warpctc_pytorch CTCLoss\n",
    "probs = torch.FloatTensor([[\n",
    "        [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n",
    "    ]]).transpose(0, 1).contiguous()\n",
    "print(probs.size())\n",
    "labels = torch.IntTensor([1, 2])\n",
    "label_sizes = torch.IntTensor([2])\n",
    "probs_sizes = torch.IntTensor([2])\n",
    "probs.requires_grad_(True)\n",
    "bs = probs.size(1)\n",
    "\n",
    "ctc_loss = wp.CTCLoss(size_average=False, length_average=False)\n",
    "cost = ctc_loss(probs, labels, probs_sizes, label_sizes)\n",
    "cost = cost.sum() / bs\n",
    "print(cost.item())\n",
    "cost.backward()\n",
    "print(probs.grad.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "2cd46569",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.4628584384918213\n",
      "[[[ 0.1770312 -0.7081248  0.1770312  0.1770312  0.1770312]]\n",
      "\n",
      " [[ 0.1770312  0.1770312 -0.7081248  0.1770312  0.1770312]]]\n"
     ]
    }
   ],
   "source": [
    "# pytorch CTCLoss\n",
    "probs = torch.FloatTensor([[\n",
    "        [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n",
    "    ]]).transpose(0, 1).contiguous()\n",
    "labels = torch.IntTensor([1, 2])\n",
    "label_sizes = torch.IntTensor([2])\n",
    "probs_sizes = torch.IntTensor([2])\n",
    "probs.requires_grad_(True)\n",
    "bs = probs.size(1)\n",
    "\n",
    "log_probs = torch.log_softmax(probs, axis=-1)\n",
    "\n",
    "ctc_loss1 = nn.CTCLoss(reduction='none')\n",
    "cost = ctc_loss1(log_probs, labels, probs_sizes, label_sizes)\n",
    "cost = cost.sum() / bs\n",
    "print(cost.item())\n",
    "cost.backward()\n",
    "print(probs.grad.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "85c3461a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2, 1, 5]\n",
      "[1, 2]\n",
      "2.4628584384918213\n",
      "[[[ 0.17703122 -0.70812464  0.17703122  0.17703122  0.17703122]]\n",
      "\n",
      " [[ 0.17703122  0.17703122 -0.70812464  0.17703122  0.17703122]]]\n"
     ]
    }
   ],
   "source": [
    "# Paddle CTCLoss\n",
    "paddle.set_device('cpu')\n",
    "probs = paddle.to_tensor([[\n",
    "        [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1],\n",
    "    ]]).transpose([1,0,2])\n",
    "print(probs.shape) # (T, B, D)\n",
    "labels = paddle.to_tensor([[1, 2]], dtype='int32') #（B，L)\n",
    "print(labels.shape)\n",
    "label_sizes = paddle.to_tensor([2], dtype='int64')\n",
    "probs_sizes = paddle.to_tensor([2], dtype='int64')\n",
    "bs = paddle.shape(probs)[1]\n",
    "probs.stop_gradient=False\n",
    "\n",
    "ctc_loss = pn.CTCLoss(reduction='none')\n",
    "cost = ctc_loss(probs, labels, probs_sizes, label_sizes)\n",
    "cost = cost.sum() / bs\n",
    "print(cost.item())\n",
    "cost.backward()\n",
    "print(probs.grad.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8cdf76c2",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "2c305eaf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([2, 1, 5])\n",
      "2.4628584384918213\n",
      "[[[ 0.17703117 -0.7081247   0.17703117  0.17703117  0.17703117]]\n",
      "\n",
      " [[ 0.17703117  0.17703117 -0.7081247   0.17703117  0.17703117]]]\n"
     ]
    }
   ],
   "source": [
    "# warpctc_pytorch CTCLoss, log_softmax idempotent\n",
    "probs = torch.FloatTensor([[\n",
    "        [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n",
    "    ]]).transpose(0, 1).contiguous()\n",
    "print(probs.size())\n",
    "labels = torch.IntTensor([1, 2])\n",
    "label_sizes = torch.IntTensor([2])\n",
    "probs_sizes = torch.IntTensor([2])\n",
    "probs.requires_grad_(True)\n",
    "bs = probs.size(1)\n",
    "\n",
    "ctc_loss = wp.CTCLoss(size_average=False, length_average=False)\n",
    "\n",
    "log_probs = torch.log_softmax(probs, axis=-1)\n",
    "cost = ctc_loss(log_probs, labels, probs_sizes, label_sizes)\n",
    "cost = cost.sum() / bs\n",
    "print(cost.item())\n",
    "cost.backward()\n",
    "print(probs.grad.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "443336f0",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


================================================
FILE: docs/topic/ctc/ctc_loss_speed_compare.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a1e738e0",
   "metadata": {},
   "source": [
    "## 获取测试的 logit 数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "29d3368b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "hlens.npy\n",
      "logits.npy\n",
      "ys_lens.npy\n",
      "ys_pad.npy\n"
     ]
    }
   ],
   "source": [
    "!mkdir -p ./test_data\n",
    "!test -f ./test_data/ctc_loss_compare_data.tgz || wget -P ./test_data https://paddlespeech.cdn.bcebos.com/datasets/unit_test/asr/ctc_loss_compare_data.tgz\n",
    "!tar xzvf test_data/ctc_loss_compare_data.tgz -C ./test_data\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "240caf1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import time\n",
    "\n",
    "data_dir=\"./test_data\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "91bad949",
   "metadata": {},
   "outputs": [],
   "source": [
    "logits_np = np.load(os.path.join(data_dir, \"logits.npy\"))\n",
    "ys_pad_np = np.load(os.path.join(data_dir, \"ys_pad.npy\"))\n",
    "hlens_np = np.load(os.path.join(data_dir, \"hlens.npy\"))\n",
    "ys_lens_np = np.load(os.path.join(data_dir, \"ys_lens.npy\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4cef2f15",
   "metadata": {},
   "source": [
    "## 使用 torch 的 ctc loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "90612004",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1.10.1+cu102'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import torch\n",
    "torch.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "00799f97",
   "metadata": {},
   "outputs": [],
   "source": [
    "def torch_ctc_loss(use_cpu):\n",
    "    if use_cpu:\n",
    "        device = torch.device(\"cpu\")\n",
    "    else:\n",
    "        device = torch.device(\"cuda\")\n",
    "\n",
    "    reduction_type = \"sum\" \n",
    "\n",
    "    ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)\n",
    "\n",
    "    ys_hat = torch.tensor(logits_np, device = device)\n",
    "    ys_pad = torch.tensor(ys_pad_np, device = device)\n",
    "    hlens = torch.tensor(hlens_np, device = device)\n",
    "    ys_lens = torch.tensor(ys_lens_np, device = device)\n",
    "\n",
    "    ys_hat = ys_hat.transpose(0, 1)\n",
    "    \n",
    "    # 开始计算时间\n",
    "    start_time = time.time()\n",
    "    ys_hat = ys_hat.log_softmax(2)\n",
    "    loss = ctc_loss(ys_hat, ys_pad, hlens, ys_lens)\n",
    "    end_time = time.time()\n",
    "    \n",
    "    loss = loss / ys_hat.size(1)\n",
    "    return end_time - start_time, loss.item()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ba47b5a4",
   "metadata": {},
   "source": [
    "## 使用 paddle 的 ctc loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "6882a06e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'2.2.2'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import paddle\n",
    "paddle.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3cfa3b7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def paddle_ctc_loss(use_cpu):    \n",
    "    import paddle.nn as pn\n",
    "    if use_cpu:\n",
    "        device = \"cpu\"\n",
    "    else:\n",
    "        device = \"gpu\"\n",
    "\n",
    "    paddle.set_device(device)\n",
    "\n",
    "    logits = paddle.to_tensor(logits_np)\n",
    "    ys_pad = paddle.to_tensor(ys_pad_np,dtype='int32')\n",
    "    hlens = paddle.to_tensor(hlens_np, dtype='int64')\n",
    "    ys_lens = paddle.to_tensor(ys_lens_np, dtype='int64')\n",
    "\n",
    "    logits = logits.transpose([1,0,2])\n",
    "\n",
    "    ctc_loss = pn.CTCLoss(reduction='sum')\n",
    "    # 开始计算时间\n",
    "    start_time = time.time()\n",
    "    pn_loss = ctc_loss(logits, ys_pad, hlens, ys_lens)\n",
    "    end_time = time.time()\n",
    "    \n",
    "    pn_loss = pn_loss / logits.shape[1]\n",
    "    return end_time - start_time, pn_loss.item()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "40413ef9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU, iteration 10\n",
      "torch_ctc_loss 159.17137145996094\n",
      "paddle_ctc_loss 159.16574096679688\n",
      "paddle average time 1.718252992630005\n",
      "torch average time 0.17536230087280275\n",
      "paddle time / torch time (cpu) 9.798303193320452\n",
      "\n",
      "GPU, iteration 10\n",
      "torch_ctc_loss 159.172119140625\n",
      "paddle_ctc_loss 159.17205810546875\n",
      "paddle average time 0.018606925010681154\n",
      "torch average time 0.0026710033416748047\n",
      "paddle time / torch time (gpu) 6.966267963938231\n"
     ]
    }
   ],
   "source": [
    "# 使用 CPU\n",
    "\n",
    "iteration = 10\n",
    "use_cpu = True\n",
    "torch_total_time = 0\n",
    "paddle_total_time = 0\n",
    "for _ in range(iteration):\n",
    "    cost_time, torch_loss = torch_ctc_loss(use_cpu)\n",
    "    torch_total_time += cost_time\n",
    "for _ in range(iteration):\n",
    "    cost_time, paddle_loss = paddle_ctc_loss(use_cpu)\n",
    "    paddle_total_time += cost_time\n",
    "print (\"CPU, iteration\", iteration)\n",
    "print (\"torch_ctc_loss\", torch_loss)\n",
    "print (\"paddle_ctc_loss\", paddle_loss)\n",
    "print (\"paddle average time\", paddle_total_time / iteration)\n",
    "print (\"torch average time\", torch_total_time / iteration)\n",
    "print (\"paddle time / torch time (cpu)\" , paddle_total_time/ torch_total_time)\n",
    "\n",
    "print (\"\")\n",
    "\n",
    "# 使用 GPU\n",
    "\n",
    "use_cpu = False\n",
    "torch_total_time = 0\n",
    "paddle_total_time = 0\n",
    "for _ in range(iteration):\n",
    "    cost_time, torch_loss  = torch_ctc_loss(use_cpu)\n",
    "    torch_total_time += cost_time\n",
    "for _ in range(iteration):\n",
    "    cost_time, paddle_loss = paddle_ctc_loss(use_cpu)\n",
    "    paddle_total_time += cost_time\n",
    "print (\"GPU, iteration\", iteration)\n",
    "print (\"torch_ctc_loss\", torch_loss)\n",
    "print (\"paddle_ctc_loss\", paddle_loss)\n",
    "print (\"paddle average time\", paddle_total_time / iteration)\n",
    "print (\"torch average time\", torch_total_time / iteration)\n",
    "print (\"paddle time / torch time (gpu)\" , paddle_total_time/ torch_total_time)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7cdf8697",
   "metadata": {},
   "source": [
    "## 其他: 使用 PaddleSpeech 中的 ctcloss 查一下loss值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "73fad81d",
   "metadata": {},
   "outputs": [],
   "source": [
    "logits_np = np.load(os.path.join(data_dir, \"logits.npy\"))\n",
    "ys_pad_np = np.load(os.path.join(data_dir, \"ys_pad.npy\"))\n",
    "hlens_np = np.load(os.path.join(data_dir, \"hlens.npy\"))\n",
    "ys_lens_np = np.load(os.path.join(data_dir, \"ys_lens.npy\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "2b41e45d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2022-02-25 11:34:34.143 | INFO     | paddlespeech.s2t.modules.loss:__init__:41 - CTCLoss Loss reduction: sum, div-bs: True\n",
      "2022-02-25 11:34:34.143 | INFO     | paddlespeech.s2t.modules.loss:__init__:42 - CTCLoss Grad Norm Type: instance\n",
      "2022-02-25 11:34:34.144 | INFO     | paddlespeech.s2t.modules.loss:__init__:73 - CTCLoss() kwargs:{'norm_by_times': True}, not support: {'norm_by_batchsize': False, 'norm_by_total_logits_len': False}\n",
      "loss 159.17205810546875\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/dygraph/math_op_patch.py:253: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.float32, but right dtype is paddle.int32, the right dtype will convert to paddle.float32\n",
      "  format(lhs_dtype, rhs_dtype, lhs_dtype))\n"
     ]
    }
   ],
   "source": [
    "use_cpu = False\n",
    "\n",
    "from paddlespeech.s2t.modules.loss import CTCLoss\n",
    "\n",
    "if use_cpu:\n",
    "    device = \"cpu\"\n",
    "else:\n",
    "    device = \"gpu\"\n",
    "\n",
    "paddle.set_device(device)\n",
    "\n",
    "blank_id=0\n",
    "reduction_type='sum'\n",
    "batch_average= True\n",
    "grad_norm_type='instance'\n",
    "\n",
    "criterion = CTCLoss(\n",
    "        blank=blank_id,\n",
    "        reduction=reduction_type,\n",
    "        batch_average=batch_average,\n",
    "        grad_norm_type=grad_norm_type)\n",
    "\n",
    "logits = paddle.to_tensor(logits_np)\n",
    "ys_pad = paddle.to_tensor(ys_pad_np,dtype='int32')\n",
    "hlens = paddle.to_tensor(hlens_np, dtype='int64')\n",
    "ys_lens = paddle.to_tensor(ys_lens_np, dtype='int64')\n",
    "\n",
    "pn_ctc_loss = criterion(logits, ys_pad, hlens, ys_lens)\n",
    "print(\"loss\", pn_ctc_loss.item())\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "de525d38",
   "metadata": {},
   "source": [
    "## 结论\n",
    "在 CPU 环境下： torch 的 CTC loss 的计算速度是 paddle 的 9.8 倍  \n",
    "在 GPU 环境下： torch 的 CTC loss 的计算速度是 paddle 的 6.87 倍\n",
    "\n",
    "## 其他结论\n",
    "torch 的 ctc loss 在 CPU 和 GPU 下 都没有完全对齐。其中CPU的前向对齐精度大约为 1e-2。 GPU 的前向对齐精度大约为 1e-4 。"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}


================================================
FILE: docs/topic/frontend/g2p.md
================================================
# g2p 字典设计
<!--
modified from https://zhuanlan.zhihu.com/p/349600439
-->
本文主要讲语音合成的 g2p (grapheme to phoneme) 部分。

代码: [generate_lexicon.py](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/other/mfa/local/generate_lexicon.py) （代码可能与此处的描述有些许出入，以代码为准，生成的带 tone 带儿化的 pinyin 字典参考 [simple.lexicon](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/csmsc/tts3/local/simple.lexicon)）

## ARPAbet
对于英文 TTS，常用的 g2p 是通过查询 CMUDict 来实现，而 CMUDict 注音使用的系统是 ARPAbet，具体含义参见 [CMU 发音词典](http://www.speech.cs.cmu.edu/cgi-bin/cmudict/)。

它包含 39 个 phoneme， 不包含音词汇重音的变体:

| Phoneme       | Example | Translation |
|:-------------:|:-------:|:-----------:|
|     AA        |  odd    |    AA D     |
|     AE        |  at     |    AE T     |
|     AH        |  hut    |    HH AH T  |
|     AO        |  ought  |    AO T     |
|     AW        |  cow    |    K AW     |
|     AY        |  hide   |    HH AY D  |
|     B         |  be     |    B IY     |
|     CH        |  cheese |    CH IY Z  |
|     D         |  dee    |    D IY     |
|     DH        |  thee   |    DH IY    |
|     EH        |  Ed     |    EH D     |
|     ER        |  hurt   |    HH ER T  |
|     EY        |  ate    |    EY T     |
|     F         |  fee    |    F IY     |
|     G         |  green  |    G R IY N |
|     HH        |  he     |    HH IY    |
|     IH        |  it     |    IH T     |
|     IY        |  eat    |    IY T     |
|     JH        |  gee    |    JH IY    |
|     K         |  key    |    K IY     |
|     L         |  lee    |    L IY     |
|     M         |  me     |    M IY     |
|     N         |  knee   |    N IY     |
|     NG        |  ping   |    P IH NG  |
|     OW        |  oat    |    OW T     |
|     OY        |  toy    |    T OY     |
|     P         |  pee    |    P IY     |
|     R         |  read   |    R IY D   |
|     S         |  sea    |    S IY     |
|     SH        |  she    |    SH IY    |
|     T         |  tea    |    T IY     |
|     TH        |  theta  |    TH EY T AH|
|     UH        |  hood   |    HH UH D  |
|     UW        |  two    |    T UW     |
|     V         |  vee    |    V IY     |
|     W         |  we     |    W IY     |
|     Y         |  yield  |    Y IY L D |
|     Z         |  zee    |    Z IY     |
|     ZH        |  seizure|    S IY ZH ER|

另外还包含三个重音标记，

0 — No stress
1 — Primary stress
2 — Secondary stress

其中重音标记附在元音后面。当只需要音标而不需要重音标记的时候也可以直接省略。

CMUDict 只是一个词典，当出现了不在词典中的词时（OOV），可以求助其他工具可以根据拼写得到对应的发音，如: 
  - [Lexicon Tool](http://www.speech.cs.cmu.edu/tools)
  - [g2p-seq2seq](https://github.com/cmusphinx/g2p-seq2seq)

## 中文注音系统

中文普通话的注音系统存在许多套，比如汉语拼音 (pinyin)， 注音符号 (bopomofo)， 国语注音符第二式， 威妥玛拼音等。而且有一些并非注音方案，是拉丁化方案，因此为了符号系统的经济性，会做一些互补符号的简并，比如汉语拼音中的 `i` 的代表了三个音位， `e` 代表了两个音位（单用的情况很少， 单用时写作 `ê`）；也有一些简写，比如 `bpmf` 后的 `o` 是 `uo` 的简写， `ui` 是 `uei` 的简写，` iu` 是 `iou` 的简写， `un` 是 `uen` 的简写， `ao` 是为了书写避免形近而改掉的 `au`， `y` 和 `w` 是为了连续书写时作为分隔而产生的零声母， `ü` 在 `j`、 `q`、 `x` 后面省略两点（中国大陆使用美式键盘打字的时候，一般只有在“女”、 “律”、“略”和“虐”这一类的字里面用 `v` 代替 `ü`，而在 `j`、 `q`、 `x` 后面的时候则仍用 `u` ），有鼻韵母 `uang` 而没有 `ueng`，但是又有 `weng` 这个音节之类的问题， 有 `ong` 韵母但是又没有单用的情形。其实这些都是汉语拼音作为拉丁化方案而做的一系列的修改。

另外，汉语的声调是用了特殊符号来标调型，用字母记录的时候常用 `12345` 或者 `1234`、轻音不标等手段。

另外还有两个比较突出的问题是**儿化**和**变调**（参考 [zh_text_frontend](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/tts/zh_text_frontend.md)）。对于具体的数据集，也可能有不同的标注方案。一般我们为汉字标音是标字调而不标变调，但是**标贝数据集是标变调的**（但是也没有把所有的变调都正确标出来）。儿化在汉语书写和拼音中也是一个比较麻烦的事情，虽然正字法中说到可以用小字号的儿表示儿化，但是这种发音由字号这种排版要素来表达的手法未免过于崎岖，所以鲜见有人真的这么排版，只有在某些书籍中，强调此事的时候见过。另外，在儿化的标音方式上，鼻韵母需要去掉韵尾然后换成 r，这么一来，如果直接抽取拼音的字符串表示，那么可能出现的音节就会超过 1400， 甚至进入一种含糊的状态，不清楚一共有多少个有效音节，即使是韵母，也会因此扩展近一倍。

因为存在这样的情形，再考虑到不同的数据集自带的拼音 transcription 的风格可能不同，所以需要考虑进行转换，在内部转成统一的表示。既然这个过程是必要的，那么我们可以大胆设计一个内部方案。

这里设计的原则是：

1. 有效符号集仅切分为声母和韵母，不作声母，介音，韵腹，韵尾的切分；

2. 尽可能把不同的音用不同的符号表示，比如 `i` 的 `e` 会被拆分为 3 和 2 个符号， `u` 和 `ü` 开头的韵母分开，这是为了 TTS 系统的建议性考虑的，我们选择尽量反映语音的现实情况，而不把注音系统里面的奇怪规则留给模型去学习；

3. 不包含零声母 `y`， `w`之类的形式上的符号，因为如果这些符号不发声或者发声极短，那么可以不加入音符序列中，以期待 attention 更对角；

4. 声调和韵母不结合为一个符号，而是分开，这样可以**减少词汇量**，使得符号的 embedding 得到更充分的训练，也更能反映声调语言的特点（数据集少时推荐这么做）；

5. 儿化的标音方式采用拆分的方式处理， 但是增设一个特殊符号 `&r` 来表示儿化的 `r`，它和一般的 `er` 不同，以区分实际读音的区别。

6. 更加贴近注音符号，把 `in` 写作 `ien`，`ing` 写作 `ieng`， `un` 写作 `uen`， `ong` 写作 `ueng`， `iong` 写作 `üeng`。其中 `in` 和 `ing` 的转写纯属偏好，无论用什么符号写，都可以被转为一个 index， 只要它们的使用情况不发声变化就可以。而 `ong` 写作 `ueng` 则是有实际差别的，如果 `ong` 作为一个韵母，那么 `weng` 经过修改之后会变成 `ueng`， 就会同时有 `ueng` 和 `ong`。而如果不细究音值上的微妙差异，`ong` 就是 `ung` 的一种奇怪表示， 在注意符号中， 它就记作 `ㄨㄥ`。而 `iong` 则是 `ㄩㄥ`。

7. `ui`， `iu` 都展开为 `uei` 和 `iou` ， 纯属偏好，对实际结果没有影响。`bpmf `后的 `o` 展开为 `uo`，这个则是为了和单独的 `o` 区分开（哦， 和波里面的韵母的发音其实不同）。

8. 所有的 `ü `都有 `v` 代替，无论是单独作韵母， 还是复韵母和鼻韵母。

9. 把停顿以 `#1` 等方式纳入其中， 把 `<pad>` `<unk>` `<s>` `</s>` 这些为了处理符号系列的特殊符号也加入其中，多一些特殊词汇并不会对 Embedding 产生什么影响。

于是我们可以的通过一套规则系统，把标贝的**拼音标注**转换成我们需要的形式。（当然，如果是别的数据集的实际标注不同，那么转换规则也要作一些修改)

在实际使用中文数据集时，我们仅使用其提供的**拼音标注**，而不使用**音素标注**（PhoneLabel），因为不同的数据集有不同的标注规则，而且有的数据集是没有**音素标注**的（如，aishell3）

我们的做法和维基百科上的汉语拼音音节列表更接近 [汉语拼音音节列表](https://zh.wikipedia.org/zh-hans/%E6%B1%89%E8%AF%AD%E6%8B%BC%E9%9F%B3%E9%9F%B3%E8%8A%82%E5%88%97%E8%A1%A8)

转换之后，符号列表是：

声母基本没有什么争议，共 21 个:
|声母|
|:--:|
|b|
|p|
|m|
|f|
|d|
|t|
|n|
|l|
|g|
|k|
|h|
|j|
|q|
|x|
|zh|
|ch|
|sh|
|r|
|z|
|c|
|s|

韵母和儿化韵尾（共 41个）
|韵母|解释|
|:----:|:-----------: |
|ii     |`zi`，`ci`， `si` 里面的韵母 `i`|
|iii    |`zhi`， `chi`， `shi`， `ri` 里面的韵母 `i`|
|a    |啊，卡|
|o    |哦|
|e    |恶，个|
|ea    |ê|
|ai    |爱，在|
|ei    |诶，薇|
|ao    |奥，脑|
|ou    |欧，勾|
|an    |安，单|
|en    |恩，痕|
|ang    |盎，刚|
|eng    |嗯，更|
|er    |儿|
|i    |一|
|ia    |鸦，家|
|io    |哟|
|ie    |叶，界|
|iai    |崖（台语发音）|
|iao    |要，教|
|iou    |有，久|
|ian    |言，眠|
|ien    |因，新|
|iang    |样，降|
|ieng    |英，晶
|u    |无，卢|
|ua    |哇，瓜|
|uo    |我，波|
|uai    |外，怪|
|uei    |位，贵|
|uan    |万，乱|
|uen    |问，论|
|uang   |网，光|
|ueng   |翁，共|
|v      |玉，曲，`ü`|
|ve     |月，却|
|van    |源，倦|
|ven    |韵，君|
|veng   |永，炯|
|&r     |儿化韵尾|


================================================
FILE: docs/topic/gan_vocoder/gan_vocoder.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GAN Vocoders 总览\n",
    "\n",
    "Loss 函数简称与全称的对应关系\n",
    "\n",
    "|Short Name|Full Name|\n",
    ":-----:|:-----|\n",
    "|adv|adversial loss|\n",
    "|FM|Feature Matching|\n",
    "|MSD|Multi-Scale Discriminator|\n",
    "|mr-STFT|Multi-resolution STFT loss|\n",
    "|fmr-STFT|full band Multi-resolution STFT loss|\n",
    "|smr-STFT|sub band Multi-resolution STFT loss|\n",
    "|Mel|Mel-Spectrogram Loss|\n",
    "|MPD|Multi-Period Discriminator|\n",
    "|FB-RAWs|Filter Bank Random Window Discriminators|\n",
    "\n",
    "<br></br>\n",
    "csmsc 数据集上 GAN Vocoder 整体对比如下， \n ",
    "\n",
    "测试机器：1 x Tesla V100-32G 40 core Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz\n ",
    "\n",
    "测试环境：Python 3.7.0, paddlepaddle 2.2.0\n",
    "\n",
    "Model|Date|Input|Generator<br>Loss|Discriminator<br>Loss|Need<br>Finetune|Training<br>Steps|Finetune<br>Steps|Batch<br>Size|ips<br>(gen only)<br>(gen + dis)|Static Model<br>Size (gen)|RTF<br>(GPU)|\n",
    ":-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|\n",
    "Mel GAN|9 Dec 2019|mel|adv<br>FM |MSD|——|——|——|——|——|——|——|\n",
    "Parallel Wave GAN |6 Feb 2020|mel<br>noise|adv<br>mr-STFT|adv|No|40W|——|8|18<br>10|5.1MB|0.01786|\n",
    "HiFi GAN|23 Oct 2020|mel|adv<br>FM<br>Mel|MSD<br>MPD|Yes|250W|no need|16|——<br>31|50MB|0.00825|\n",
    "Multi-Band Mel GAN|17 Nov 2020|mel|adv<br>fmr-STFT<br>smr-STFT|MSD|Yes|100W|100W<br><font size=1>(not good enough,<br>need to adjust parameters)</font>|64|305<br>148|8.2MB|0.00457|\n",
    "Style Mel GAN|12 Feb 2021|mel<br>noise|adv<br>mr-STFT|FB-RAWs|No|150W|——|32|58<br>24|——|0.01343|\n",
    "\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 网络结构\n",
    "## Mel GAN\n",
    "<center><img src=\"./imgs/melgan.png\"></center>\n",
    "<br><center>Mel GAN 网络结构图</center></br>\n",
    "\n",
    "## Parallel Wave GAN\n",
    "<center><img src=\"./imgs/pwg.png\"></center>\n",
    "<br><center>Parallel Wave GAN 网络结构图</center></br>\n",
    "\n",
    "## HiFi GAN\n",
    "<center><img src=\"./imgs/hifigan_gen.png\" width=900></center>\n",
    "<br><center>HiFi GAN 生成器网络结构图</center></br>\n",
    "\n",
    "<br></br>\n",
    "\n",
    "<center><img src=\"./imgs/hifigan_dis.png\" width=900></center>\n",
    "<br><center>HiFi GAN 判别器网络结构图</center></br>\n",
    "\n",
    "## Multi-Band Mel GAN\n",
    "<center><img src=\"./imgs/mb_melgan.png\" width=500></center>\n",
    "<br><center>Multi-Band Mel GAN 网络结构图</center></br>\n",
    "\n",
    "## Style Mel GAN\n",
    "<center><img src=\"./imgs/style_melgan_TADE.png\" width=500></center>\n",
    "<br><center>Style Mel GAN TADE 网络结构图</center></br>\n",
    "\n",
    "<br></br>\n",
    "\n",
    "<center><img src=\"./imgs/style_melgan_gen.png\" width=500></center>\n",
    "<br><center>Style Mel GAN 生成器网络结构图</center></br>\n",
    "\n",
    "<br></br>\n",
    "\n",
    "<center><img src=\"./imgs/style_melgan_dis.png\" width=500></center>\n",
    "<br><center>Style Mel GAN 判别器网络结构图</center></br>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 需要注意的点\n",
    "## 输入\n",
    "1. 一般情况下，若训练时输入中没有 `noise`，容易过拟合，需要 finetune\n",
    "    - 参考 [espent issue](https://github.com/espnet/espnet/issues/3536)\n",
    "2. 若输入中有 `noise`, 在预测时需要自己在 `inference` 代码中生成 `noise`, 而不能作为参数输入给 `inference`, 否则动转静可能走不通\n",
    "    - 参考 [pwgan 动转静修复 pr](https://github.com/PaddlePaddle/Parakeet/pull/132/files)\n",
    "\n",
    "\n",
    "## 生成器\n",
    "1. `hop_size` 和 `n_shift` 的含义一样\n",
    "2. `upsample_scales` 的乘积一定等于 `hop_size`\n",
    "3. `采样点 = hop_size * 帧数`\n",
    "4. `librosa 帧数 = 采样点 // hop_size + 1`, 具体要不要 `+1` 看不同的库，看 `center` 这个参数 \n",
    "5. `Mel GAN` 和 `Multi-Band Mel GAN` 生成器的代码是一样的，只是参数不一样，通道数不一样\n",
    "6. `Parallel Wave GAN` 的生成器是 `WaveNet` like\n",
    "    - 用非因果卷积替换了因果卷积\n",
    "    - 输入是满足高斯分布的随机噪声\n",
    "    - 训练和预测时都是非自回归的\n",
    "7. `Style MelGAN` 的 noise 的上采样需要额外注意，输入的长度是固定的\n",
    "    - `batch_max_steps(24000) == prod(noise_upsample_scales)(80) * prod(upsample_scales)(300, n_shift)`\n",
    "\n",
    "## 判别器\n",
    "1. HiFi GAN 判别器的能力很强\n",
    "\n",
    "## 速度\n",
    "1. 为什么 `Multi-Band Mel GAN` 的预测会更快？因为上采样的倍数变为了原来的 `1/4`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# FFT 在语音合成声码器上的应用\n",
    "\n",
    "语音合成是一种将任意文本转换成语音的技术，目前在深度学习领域，语音合成主要分为 `3` 个模块:\n",
    "- 文本前端\n",
    "- 声学模型\n",
    "- 声码器\n",
    "\n",
    "其中，文本前端模块将输入文本转换为音素序列或语言学特征；声学模型将音素序列或语言学特征转换为声学特征，在语音合成领域，常用的声学特征是 mel 频谱；声码器将声学特征转换为语音波形。\n",
    "\n",
    "声码器的输入是频域特征 mel 频谱图，输出是对应的语音波形。\n",
    "\n",
    "STFT 全称 Short-Time Fourier Transform，短时傅里叶变换，它是用滑动帧 FFT 生成频率与时间的 2D 矩阵，通常被称为频谱图（Spectrogram）, 而人耳对于频率的敏感程度是非线性的，可以通过 mel 三角滤波器对频谱图处理，生成 mel 频谱图。\n",
    "\n",
    "生成 mel 频谱图的计算离不开 fft 系列的算子，若模型的输入是 mel 频谱图，可以使用 `librosa` 等科学计算库进行计算再输入模型。然而，现有的大多数基于 `GAN` 的声码器模型，在计算 `loss` 时需要将生成器合成的音频及原始音频转换到频率域再做计算，这时需要用到短时傅里叶变换算子 `stft`，且由于 `stft` 算子出现在了模型图中，其需要参与到模型的前向和反向计算过程中，此时，则需要深度学习框架提供 `stft` 算子。\n",
    "\n",
    "最新的 `PaddleSpeech` 语音合成模块的声码器，用到了 paddle 2.2.0 提供的 fft 系列算子 `paddle.signal.stft`。\n",
    "\n",
    "`PaddleSpeech` 模型库目前已经实现的基于 `GAN` 的声码器包括 `Parallel WaveGAN`、`Multi Band MelGAN`、`HiFiGAN` 和 `Style MelGAN`，这些模型的 `loss` 中都包含基于 `stft` 算子的 `loss`，其中主要包含 `Multi-resolution STFT loss` 和 `Mel-Spectrogram Loss`。\n",
    "\n",
    "`Multi-resolution STFT loss` 公式如下所示：\n",
    "\n",
    "![image](./imgs/stft_loss_0.png)\n",
    "\n",
    "![image](./imgs/stft_loss_1.png)\n",
    "\n",
    "![image](./imgs/stft_loss_2.png)\n",
    "\n",
    "\n",
    "`Mel-Spectrogram Loss` 公式如下所示：\n",
    "\n",
    "![image](./imgs/mel_loss.png)\n",
    "\n",
    "\n",
    "其中 `Φ` 表示将音频转换为对应 mel 频谱的函数。\n",
    "\n",
    "如上述公式所示，现在主流的基于 `GAN` 的声码器的 `loss` 设计需要用到 `stft`，在 Paddle 中尚未实现 fft 系列算子时，`PaddleSpeech` 模型库使用基于 `Conv1D` 算子的函数来模拟 `stft` 算子，然而经过计算，该模拟函数前向结果正确，反向梯度计算结果不正确，这导致了模型收敛效果不佳，听感略差于竞品。\n",
    "\n",
    "Paddle 主框架中加入 fft 系列算子后，我们将语音合成声码器 loss 模块中的基于 `Conv1D` 的 `stft` 均替换为 `paddle.signal.stft`，在模型收敛效果和合成音频听感上，`paddle.signal.stft` 的效果明显优于基于 `Conv1D` 的 `stft` 实现。\n",
    "\n",
    "以 `Parallel WaveGAN` 模型为例，我们复现了基于 `Pytorch` 和基于 `Paddle` 的 `Parallel WaveGAN`，并保持模型结构完全一致，在相同的实验环境下，基于 `Paddle` 的模型收敛速度比基于 `Pytorch` 的模型快 `10.4%`, 而基于 `Conv1D` 的 `stft` 实现的 Paddle 模型的收敛速度和收敛效果和收敛速度差于基于 `Pytorch` 的模型，更明显差于基于 `Paddle` 的模型，所以可以认为 `paddle.signal.stft` 算子大幅度提升了 `Parallel WaveGAN` 模型的效果。\n",
    "\n",
    "![image](https://paddlespeech.cdn.bcebos.com/Parakeet/docs/images/pwg_vs.png)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.7.0 64-bit ('yt_py37_develop': venv)",
   "language": "python",
   "name": "python37064bitytpy37developvenv88cd689abeac41d886f9210a708a170b"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}


================================================
FILE: docs/topic/package_release/python_package_release.md
================================================
# 简化安装与发包

## 问题：

1. [如何去除 ubuntu 的 apt 安装依赖？](#conda-代替系统依赖)
2. [如何支持普通用户和开发者两种安装的需求，尽量减少普通用户所需的依赖？](#区分install模式和develop模式)
3. [如何进行 python 包的动态安装？](#python-包的动态安装)
4. [如何进行 python 项目编包？](#python-编包方法)
5. [发包前要有什么准备？](#关于发包前的准备工作)
6. [发 C++ 包需要注意的东西？](#manylinux)


## conda 代替系统依赖

conda 可以用来代替一些 apt-get 安装的系统依赖，这样可以让项目适用于除了 ubuntu 以外的系统。

使用 conda 可以安装 sox、 libsndfile、swig 等 paddlespeech 需要的依赖：

```bash
conda install -y -c conda-forge sox libsndfile
```

部分系统会缺少 libbzip2 库，这个 paddlespeech 也是需要的，这也可以用 conda 安装：

```bash
conda install -y -c bzip2
```

conda 也可以安装 linux 的 C++ 的依赖：

```bash
conda install -y -c gcc_linux-64=8.4.0 gxx_linux-64=8.4.0
```

#### 剩余问题：使用 conda 环境编译 kenlm 失败。目前在 conda 环境下编译 kenlm 会出现链接失败的问题

目前知道需要的依赖：

```bash
conda install -c conda-forge eigen boost cmake
```

## 区分install模式和develop模式

可以在 setup.py 中划分 install 的依赖（基本依赖）和 develop 的依赖 （开发者额外依赖）。 setup_info 中 `install_requires` 设置 install 的依赖，而在 `extras_require` 中设置 `develop` key 为 develop 的依赖。
普通安装可以使用：

```bash
pip install . 
```

另外使用 pip 安装已发的包也是使用普通安装的：

```
pip install paddlespeech
```

而开发者可以使用如下方式安装，这样不仅会安装 install 的依赖，也会安装 develop 的依赖， 即：最后安装的依赖 = install 依赖 + develop 依赖：

```bash
pip install -e .[develop]
```

## python 包的动态安装

可以使用 pip 包来实现动态安装：

```python
import pip
if int(pip.__version__.split('.')[0]) > 9:
        from pip._internal import main
    else:
        from pip import main
    main(['install', package_name])
```

## python 编包方法

#### 创建 pypi的账号

创建 pypi 账号

#### 下载 twine

```
pip install twine
```

#### python 编包

编写好 python 包的 setup.py, 然后使用如下命令编 wheel 包：

```bash
python setup.py bdist_wheel
```

如果要编源码包，用如下命令：

```bash
python setup.py sdist
```

#### 上传包

```bash
twine upload dist/wheel包
```

输入账号和密码后就可以上传 wheel 包了

#### 关于python 包的发包信息

主要可以参考这个[文档](https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/?highlight=find_packages)


## 关于发包前的准备工作

#### 拉分支
在发包之前需要拉分支。例如需要发 0.1.0 版本的正式包，则需要拉一个 r0.1 的分支。并且在这个 r0.1 分支的包上面打 0.1.0 的tag。在拉分支之前可以选择性的使用 rc 版本发一个正式版前的试用包，例如0.1.0rc0，等到rc包测试通过后，再拉分支（如果是发 0.1.1 包，则 merge r0.1分支），打tag，完成发包。

关于打tag的命令，可以参考[git 基础](https://git-scm.com/book/zh/v2/Git-%E5%9F%BA%E7%A1%80-%E6%89%93%E6%A0%87%E7%AD%BE)。使用轻量标签即可:
```bash
git tag r0.1.1 commit_id
```
然后使用`git push` 把本地 tag 传到远程 repo 上即可 
```bash
git push origin r0.1.1
```
打完 tag 后要记得编写 release note。

最后，发包准备工作的步骤可以总结为：  
- 用 develop 分支发 rc 包
- rc 包通过后拉分支
- 打 tag
- 发包
- 编写 release note


## ManyLinux

为了让有 C++ 依赖的 pip wheel 包可以适用于更多的 linux 系统，需要降低其本身的 glibc 的依赖。这就需要让 pip wheel 包在 manylinux 的 docker 下编包。关于查看系统的 glibc 版本，可以使用命令：`ldd --version`。

### Manylinux

关于 Manylinux，主要可以参考 Github 项目的说明[ github many linux](https://github.com/pypa/manylinux)。
manylinux1 支持 Centos5以上， manylinux2010 支持 Centos 6 以上，manylinux2014 支持Centos 7 以上。
目前使用 manylinux2010 基本可以满足所有的 linux 生产环境需求。（不建议使用manylinux1，系统较老，难度较大）

### 拉取 manylinux2010

```bash
docker pull quay.io/pypa/manylinux2010_x86_64
```

### 使用 manylinux2010

启动 manylinux2010 docker。

```bash
docker run -it xxxxxx
```

在 manylinux2010 的docker环境自带 swig 和各种类型的 python 版本。这里注意不要自己下载 conda 来安装环境来编译 pip 包，要用 docker 本身的环境来编包。
设置python：

```bash
export PATH="/opt/python/cp38-cp38/bin/:$PATH"
#export PATH="/opt/python/cp39-cp39/bin/:$PATH"
```

随后正常编包，编包后需要使用 [auditwheel](https://github.com/pypa/auditwheel) 来降低编好的wheel包的版本。
显示 wheel 包的 glibc 依赖版本

```bash
auditwheel show wheel包
```

降低 wheel包的版本

```bash
auditwheel repair wheel包
```


================================================
FILE: docs/tutorial/.gitkeep
================================================


================================================
FILE: docs/tutorial/asr/tutorial_deepspeech2.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "<a href=\"https://github.com/PaddlePaddle/PaddleSpeech\"><img style=\"position: absolute; z-index: 999; top: 0; right: 0; border: 0; width: 128px; height: 128px;\" src=\"https://nosir.github.io/cleave.js/images/right-graphite@2x.png\" alt=\"Fork me on GitHub\"></a>\n",
    "\n",
    "\n",
    "# 语音识别——DeepSpeech2\n",
    "  \n",
    "# 0. 视频理解与字幕"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 下载demo视频\n",
    "!test -f work/source/subtitle_demo1.mp4 || wget https://paddlespeech.cdn.bcebos.com/demos/asr_demos/subtitle_demo1.mp4 -P work/source/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import IPython.display as dp\n",
    "from IPython.display import HTML\n",
    "html_str = '''\n",
    "<video controls width=\"600\" height=\"360\" src=\"{}\">animation</video>\n",
    "'''.format(\"work/source/subtitle_demo1.mp4 \")\n",
    "dp.display(HTML(html_str))\n",
    "print (\"ASR结果为：当我说我可以把三十年的经验变成一个准确的算法他们说不可能当我说我们十个人就能实现对十九个城市变电站七乘二十四小时的实时监管他们说不可能\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "> Demo实现：[Dhttps://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/automatic_video_subtitiles/](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/automatic_video_subtitiles/)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "# 1. 前言"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## 1.1 背景知识\n",
    "语音识别(Automatic Speech Recognition, ASR) 是一项从一段音频中提取出语言文字内容的任务。\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/0231a71b0617485d85586d232f65db6379115befdf014068bd90fb15c5786c94\"/>\n",
    "<br>\n",
    "(出处：DLHLP 李宏毅 语音识别课程PPT)\n",
    "</div>\n",
    "\n",
    "目前该技术已经广泛应用于我们的工作和生活当中，包括生活中使用手机的语音转写，工作上使用的会议记录等等。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## 1.2 发展历史\n",
    "\n",
    "\n",
    "* 早期，生成模型流行阶段：GMM-HMM (上世纪90年代)\n",
    "* 深度学习爆发初期： DNN，CTC[1] （2006）\n",
    "* RNN流行，Attention提出初期: RNN-T[2]（2013）, DeepSpeech[3] (2014)， DeepSpeech2 [4] (2016)， LAS[5]（2016）\n",
    "* Attetion is all you need提出开始[6]: Transformer[6]（2017），Transformer-transducer[7]（2020） Conformer[8] （2020\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/d6060426bba341a187422803c0f8ac2e2162c5c5422e4070a3425c09f7801379\" height=1300, width=1000 />\n",
    "</div>\n",
    "\n",
    "Deepspeech2模型包含了CNN，RNN，CTC等深度学习语音识别的基本技术，因此本教程采用了Deepspeech2作为讲解深度学习语音识别的开篇内容。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# 2. 实战：使用 DeepSpeech2 进行语音识别的流程\n",
    "\n",
    "Deepspeech2 模型，其主要分为3个部分：\n",
    "1. 特征提取模块：此处使用 linear 特征，也就是将音频信息由时域转到频域后的信息。\n",
    "2. Encoder：多层神经网络，用于对特征进行编码。\n",
    "3. CTC Decoder： 采用了 CTC 损失函数训练；使用 CTC 解码得到结果。\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/f7268c47d55e487cbb97f123785cab248f4371072381465a8b43af33f4abdf83\" height=1000, width=800/>\n",
    "</div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## 2.1 Deepspeech2 模型结构\n",
    "\n",
    "### 2.1.1 Encoder\n",
    "\n",
    "\n",
    "Encoder 主要采用了 2 层降采样的 CNN（subsampling Convolution layer）和多层 RNN（Recurrent Neural Network）层组成。\n",
    "\n",
    "其中降采样的 CNN 主要用途在提取局部特征，减少模型输入的帧数，降低计算量，并易于模型收敛。\n",
    "\n",
    "\n",
    "  \n",
    "#### 2.1.1.1 CNN: Receptive field\n",
    "\n",
    "假如以 $F_j$ 代表 $L_j$ 的 cnn 滤波器大小, $S_i$ 代表 $L_i$ 的CNN滤波器跳跃长度，并设定 $S_0 = 1$。那么 $L_k$ 的感受野大小可以由以下公式计算：\n",
    "\n",
    "$$\\boxed{R_k = 1 + \\sum_{j=1}^{k} [(F_j - 1) \\prod_{i=0}^{j-1} S_i]}$$\n",
    "在下面的例子中, $F_1 = F_2 = 3$ 并且 $S_1 = S_2 = 2$, 因此可以得到 $R_2 = 1 + 2\\cdot 1 + 2\\cdot 2 = 7$\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/b3c53b7d1b2944acb91520454f5b27be0e0e0af110e24959942d85e87992d6d0\"  />\n",
    "</div>\n",
    "\n",
    "  \n",
    "#### 2.1.1.2 RNN\n",
    "\n",
    "  而多层 RNN 的作用在于获取语音的上下文信息，这样可以获得更加准确的信息，并一定程度上进行语义消歧。\n",
    "  \n",
    "Deepspeech2 的模型中 RNNCell 可以选用 GRU 或者 LSTM。\n",
    "  \n",
    "\n",
    "#### 2.1.1.3 Softmax\n",
    "而最后 softmax 层将特征向量映射到为一个字表长度的向量，向量中存储了当前 step 结果预测为字表中每个字的概率。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "### 2.1.2 Decoder\n",
    "Decoder 的作用主要是将 Encoder 输出的概率解码为最终的文字结果。\n",
    "\n",
    "对于 CTC 的解码主要有3种方式：\n",
    "\n",
    "* CTC greedy search \n",
    "\n",
    "* CTC beam search \n",
    "\n",
    "* CTC Prefix beam search\n",
    "\n",
    "#### 2.1.2.1 CTC Greedy Search\n",
    "\n",
    "在每个时间点选择后验概率最大的 label 加入候选序列中，最后对候选序列进行后处理，就得到解码结果。\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/d6e90cf5d20047ddbcdba5ff13c6c96a9ffca20faaa84927a6012fa1927e5b8d\" height=800, width=500 />\n",
    "</div>\n",
    "\n",
    "\n",
    "#### 2.1.2.2 CTC Beam Search\n",
    "\n",
    "CTC Beam Search 的方式是有 beam size 个候选序列，并在每个时间点生成新的最好的 beam size 个候选序列。\n",
    "最后在 beam size 个候选序列中选择概率最高的序列生成最终结果。\n",
    "\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/98560261bc584c209a339396296d56559c6daa9a9bb74532b014c6d7ee4b7a5f\" height=1200, width=800 />\n",
    "  <br>\n",
    "  引用自[9]\n",
    "</div>\n",
    "\n",
    "#### 2.1.2.3 CTC Prefix Beam Search\n",
    "\n",
    "CTC prefix beam search和 CTC beam search 的主要区别在于：\n",
    "\n",
    "CTC beam search 在解码过程中产生的候选有可能产生重复项，而这些重复项在 CTC beam search 的计算过程中是各自独立的，占用了 beam 数，降低解码的多样性和鲁棒性。\n",
    "\n",
    "而 CTC prefix beam search 在解码过程中合并了重复项的概率，提升解码的鲁棒性和多样性。\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/3d8c949abd0948f1be35ad2b3cf9a0100be1dcc771724d9681b5b31fc1fd4fa6\" height=1200, width=800  />\n",
    "  <br>\n",
    "  引用自[9]\n",
    "</div>\n",
    "\n",
    "CTC prefix beam search 计算过程如下图所示：\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/7a779a8687054d559515153d9ab35aa5e9f54676f5b64984960c62f6998a0ce3\" height=1200, width=800  />\n",
    "  <br>\n",
    "  引用自[10]\n",
    "</div>\n",
    "\n",
    "\n",
    "> [CTCLoss](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/topic/ctc/) 相关介绍参看 [Topic](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/topic/) 内容。\n",
    "\n",
    "#### 2.1.2.4 使用 N-gram 语言模型\n",
    "\n",
    "对于解码的候选结果的打分，除了有声学模型的分数外，还会有额外的语言模型分以及长度惩罚分。\n",
    "\n",
    "\n",
    "设定 $W$ 为解码结果，$X$ 为输入语音， $\\alpha$ 和 $\\beta$ 为设定的超参数。\n",
    "则最终分数的计算公式为:\n",
    "$$\n",
    "score = P_{am}(W \\mid X) \\cdot P_{lm}(W) ^ \\alpha \\cdot |W|^\\beta\n",
    "$$\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "## 2.2 准备工作\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "### 2.2.1 安装 paddlespeech\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!pip install -U pip paddlepaddle-gpu && pip install paddlespeech"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "  \n",
    "### 2.2.2 准备工作目录\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!mkdir -p ./work/workspace_asr_ds2\n",
    "%cd ./work/workspace_asr_ds2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "### 2.2.3 获取预训练模型和相关文件\n",
    "  \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!test -f ds2.model.tar.gz || wget -nc https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/ds2.model.tar.gz\n",
    "!tar xzvf ds2.model.tar.gz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 构建一个数据增强的配置文件，由于预测不需要数据增强，因此文件为空即可\n",
    "!touch conf/augmentation.json\n",
    "# 下载语言模型\n",
    "!mkdir -p data/lm\n",
    "!test -f ./data/lm/zh_giga.no_cna_cmn.prune01244.klm || wget -nc https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm -P data/lm\n",
    "# 获取用于预测的音频文件\n",
    "!test -f ./data/demo_01_03.wav || wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P ./data/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import IPython\n",
    "IPython.display.Audio('./data/demo_01_03.wav')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 快速体验识别结果\n",
    "!paddlespeech asr --input ./data/demo_01_03.wav"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "\n",
    "### 2.2.4 导入python包\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import paddle\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "from yacs.config import CfgNode\n",
    "\n",
    "from paddlespeech.s2t.frontend.speech import SpeechSegment\n",
    "from paddlespeech.s2t.frontend.normalizer import FeatureNormalizer\n",
    "from paddlespeech.s2t.frontend.featurizer.audio_featurizer import AudioFeaturizer\n",
    "from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer\n",
    "\n",
    "from paddlespeech.s2t.io.collator import SpeechCollator\n",
    "\n",
    "from paddlespeech.s2t.models.ds2 import DeepSpeech2Model\n",
    "\n",
    "from  matplotlib import pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "\n",
    "### 2.2.5 设置预训练模型的路径\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "config_path = \"conf/deepspeech2.yaml\" \n",
    "checkpoint_path = \"./exp/deepspeech/checkpoints/avg_1.pdparams\"\n",
    "audio_file = \"data/demo_01_03.wav\"\n",
    "\n",
    "\n",
    "# 读取 conf 文件并结构化\n",
    "ds2_config = CfgNode(new_allowed=True)\n",
    "ds2_config.merge_from_file(config_path)\n",
    "print(ds2_config)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "\n",
    "## 2.3 获取特征\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 2.3.1 语音特征介绍\n",
    "  \n",
    "#### 2.3.1.1 语音特征提取整体流程图\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/54aefbc16dbf4487a7abe38b0210e5dbf1bb0c74fbe4459f94880a06950269f9\" height=1200, width=800  />\n",
    "<br>\n",
    "由\"莊永松、柯上優 DLHLP - HW1 End-to-end Speech Recognition PPT\" 修改得\n",
    "</div>\n",
    "\n",
    "#### 2.3.1.2 fbank 提取过程简化图\n",
    "\n",
    "\n",
    "fbank 特征提取大致可以分为3个步骤：\n",
    "\n",
    "1. 语音时域信号经过增强，然后进行分帧。\n",
    "\n",
    "2. 每一帧数据加窗后经过离散傅立叶变换（DFT）得到频谱图。\n",
    "\n",
    "3. 将频谱图的特征经过 Mel 滤波器得到 logmel fbank 特征。\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/08f7ccecc848495599c350aa2c440071b818ba0465734dd29701a2ff149f0a8c\" height=1200, width=800 />\n",
    "<br>\n",
    "由\"DLHLP 李宏毅 语音识别课程PPT\" 修改得\n",
    "</div>\n",
    "\n",
    "#### 2.3.1.3 CMVN 计算过程\n",
    "\n",
    "对于所有获取的特征，模型在使用前会使用 CMVN 的方式进行归一化\n",
    "\n",
    "<div align=center>\n",
    "  <img src=\"https://ai-studio-static-online.cdn.bcebos.com/46df63199d88481d9a2713a45ce63d00220e8ac42f9940e886282017758b54bf\" height=1200, width=800  />\n",
    "</div>\n",
    "\n",
    " "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "  \n",
    "### 2.3.2 构建音频特征提取对象\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "feat_config = ds2_config.collator\n",
    "audio_featurizer = AudioFeaturizer(\n",
    "    spectrum_type=feat_config.spectrum_type,\n",
    "    feat_dim=feat_config.feat_dim,\n",
    "    delta_delta=feat_config.delta_delta,\n",
    "    stride_ms=feat_config.stride_ms,\n",
    "    window_ms=feat_config.window_ms,\n",
    "    n_fft=feat_config.n_fft,\n",
    "    max_freq=feat_config.max_freq,\n",
    "    target_sample_rate=feat_config.target_sample_rate,\n",
    "    use_dB_normalization=feat_config.use_dB_normalization,\n",
    "    target_dB=feat_config.target_dB,\n",
    "    dither=feat_config.dither)\n",
    "feature_normalizer = FeatureNormalizer(feat_config.mean_std_filepath) if feat_config.mean_std_filepath else None"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "  \n",
    "### 2.3.3 提取音频的特征\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 'None' 只是一个占位符，因为预测的时候不需要reference\n",
    "speech_segment = SpeechSegment.from_file(audio_file, \"None\")\n",
    "audio_feature = audio_featurizer.featurize(speech_segment)\n",
    "audio_feature_i = feature_normalizer.apply(audio_feature)\n",
    "\n",
    "audio_len = audio_feature_i.shape[0]\n",
    "audio_len = paddle.to_tensor(audio_len)\n",
    "audio_feature = paddle.to_tensor(audio_feature_i, dtype='float32')\n",
    "audio_feature = paddle.unsqueeze(audio_feature, axis=0)\n",
    "print(f\"shape: {audio_feature.shape}\")\n",
    "\n",
    "plt.figure()\n",
    "plt.imshow(audio_feature_i.T, origin='lower')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "  \n",
    "## 2.4 使用模型获得结果\n",
    "    \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "  \n",
    "### 2.4.1 构建Deepspeech2模型\n",
    "  \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model_conf = ds2_config.model\n",
    "# input dim is feature size\n",
    "model_conf.input_dim = 161\n",
    "# output_dim is vocab size\n",
    "model_conf.output_dim = 4301\n",
    "model = DeepSpeech2Model.from_config(model_conf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "  \n",
    "### 2.4.2 加载预训练的模型\n",
    "  \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model_dict = paddle.load(checkpoint_path)\n",
    "model.set_state_dict(model_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "  \n",
    "### 2.4.3 进行预测\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "decoding_config = ds2_config.decoding\n",
    "print (decoding_config)\n",
    "text_feature = TextFeaturizer(unit_type='char',\n",
    "                            vocab=ds2_config.collator.vocab_filepath)\n",
    "\n",
    "\n",
    "result_transcripts = model.decode(\n",
    "        audio_feature,\n",
    "        audio_len,\n",
    "        text_feature.vocab_list,\n",
    "        decoding_method=decoding_config.decoding_method,\n",
    "        lang_model_path=decoding_config.lang_model_path,\n",
    "        beam_alpha=decoding_config.alpha,\n",
    "        beam_beta=decoding_config.beta,\n",
    "        beam_size=decoding_config.beam_size,\n",
    "        cutoff_prob=decoding_config.cutoff_prob,\n",
    "        cutoff_top_n=decoding_config.cutoff_top_n,\n",
    "        num_processes=decoding_config.num_proc_bsearch)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print (\"预测结果为:\")\n",
    "print (result_transcripts[0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "# 3. 总结\n",
    "\n",
    "* CTC 帮助模型学习语音和 label 之间的 alignment。\n",
    "* CTC 可以做到帧同步解码，非常适合做流式模型。\n",
    "* CTC 的输出是之间是独立的，相对于 Seq2Seq 其建模能力差，一般需要外挂 LM 才能得到好的结果。\n",
    "\n",
    "\n",
    "# 4. 作业 \n",
    "1. 使用开发模式安装 [PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)  \n",
    "环境要求：docker, Ubuntu 16.04，root user。  \n",
    "参考安装方法：[使用Docker安装paddlespeech](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md#hard-get-the-full-funciton-on-your-mechine)\n",
    "\n",
    "2. 跑通 example/aishell/asr1 中的 conformer 模型，完成训练和预测。 \n",
    "\n",
    "3. 按照 example 的格式使用自己的数据集训练 ASR 模型。 \n",
    "\n",
    "\n",
    "# 5. 关注 PaddleSpeech\n",
    "\n",
    "请关注我们的 [Github Repo](https://github.com/PaddlePaddle/PaddleSpeech/)，非常欢迎加入以下微信群参与讨论：\n",
    "- 扫描二维码\n",
    "- 添加运营小姐姐微信\n",
    "- 通过后回复【语音】\n",
    "- 系统自动邀请加入技术群\n",
    "\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/87bc7da42bcc401bae41d697f13d8b362bfdfd7198f14096b6d46b4004f09613\" width=\"300\" height=\"300\" ></center>\n",
    "\n",
    "\n",
    "# 5. 参考文献\n",
    "\n",
    "[1] Graves A, Fernández S, Gomez F, et al. Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks[C]//Proceedings of the 23rd international conference on Machine learning. 2006: 369-376.\n",
    "\n",
    "[2] Graves A, Mohamed A, Hinton G. Speech recognition with deep recurrent neural networks[C]//2013 IEEE international conference on acoustics, speech and signal processing. Ieee, 2013: 6645-6649.\n",
    "\n",
    "[3] Hannun A, Case C, Casper J, et al. Deep speech: Scaling up end-to-end speech recognition[J]. arXiv preprint arXiv:1412.5567, 2014.\n",
    "\n",
    "[4] Amodei D, Ananthanarayanan S, Anubhai R, et al. Deep speech 2: End-to-end speech recognition in english and mandarin[C]//International conference on machine learning. PMLR, 2016: 173-182.\n",
    "\n",
    "[5] Chan W, Jaitly N, Le Q, et al. Listen, attend and spell: A neural network for large vocabulary conversational speech recognition[C]//2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2016: 4960-4964.\n",
    "\n",
    "[6] Vaswani A, Shazeer N, Parmar N, et al. Attention is all you need[C]//Advances in neural information processing systems. 2017: 5998-6008.\n",
    "\n",
    "[7] Zhang Q, Lu H, Sak H, et al. Transformer transducer: A streamable speech recognition model with transformer encoders and rnn-t loss[C]//ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2020: 7829-7833.\n",
    "\n",
    "[8] Gulati A, Qin J, Chiu C C, et al. Conformer: Convolution-augmented transformer for speech recognition[J]. arXiv preprint arXiv:2005.08100, 2020.\n",
    "\n",
    "[9] Retrieved 2021-12-6，from \"Sequence Modeling With CTC\": https://distill.pub/2017/ctc/#inference\n",
    "\n",
    "[10] Hannun A Y, Maas A L, Jurafsky D, et al. First-pass large vocabulary continuous speech recognition using bi-directional recurrent dnns[J]. arXiv preprint arXiv:1408.2873, 2014."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "py35-paddle1.2.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: docs/tutorial/asr/tutorial_transformer.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "<a href=\"https://github.com/PaddlePaddle/PaddleSpeech\"><img style=\"position: absolute; z-index: 999; top: 0; right: 0; border: 0; width: 128px; height: 128px;\" src=\"https://nosir.github.io/cleave.js/images/right-graphite@2x.png\" alt=\"Fork me on GitHub\"></a>\n",
    "  \n",
    "# 使用 Transformer 进行语音识别\n",
    "\n",
    "# 0. 视频理解与字幕"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 下载demo视频\n",
    "!test -f work/source/subtitle_demo1.mp4 || wget -c https://paddlespeech.cdn.bcebos.com/demos/asr_demos/subtitle_demo1.mp4 -P work/source/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import IPython.display as dp\n",
    "from IPython.display import HTML\n",
    "html_str = '''\n",
    "<video controls width=\"600\" height=\"360\" src=\"{}\">animation</video>\n",
    "'''.format(\"work/source/subtitle_demo1.mp4 \")\n",
    "dp.display(HTML(html_str))\n",
    "print (\"ASR结果为：当我说我可以把三十年的经验变成一个准确的算法他们说不可能当我说我们十个人就能实现对十九个城市变电站七乘二十四小时的实时监管他们说不可能\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "> Demo实现：[https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/automatic_video_subtitiles/](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/demos/automatic_video_subtitiles/)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# 1. 前言\n",
    "\n",
    "## 1.1 背景知识\n",
    "语音识别(Automatic Speech Recognition, ASR) 是一项从一段音频中提取出语言文字内容的任务。  \n",
    "目前该技术已经广泛应用于我们的工作和生活当中，包括生活中使用手机的语音转写，工作上使用的会议记录等等。\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/0231a71b0617485d85586d232f65db6379115befdf014068bd90fb15c5786c94\"/>\n",
    "<br>\n",
    "(出处：DLHLP 李宏毅 语音识别课程PPT)\n",
    "</div>\n",
    "<br></br>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## 1.2 发展历史\n",
    "\n",
    "\n",
    "* 早期，生成模型流行阶段：GMM-HMM (上世纪90年代)\n",
    "* 深度学习爆发初期： DNN，CTC[1] （2006）\n",
    "* RNN 流行，Attention 提出初期: RNN-T[2]（2013）, DeepSpeech[3] (2014)， DeepSpeech2 [4] (2016)， LAS[5]（2016）\n",
    "* Attetion is all you need 提出开始[6]: Transformer[6]（2017），Transformer-transducer[7]（2020） Conformer[8] （2020）\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/d6060426bba341a187422803c0f8ac2e2162c5c5422e4070a3425c09f7801379\" height=1300, width=1000 />\n",
    "</div>\n",
    "\n",
    "目前 Transformer 和 Conformer 是语音识别领域的主流模型，因此本教程采用了 Transformer 作为讲解的主要内容，并在课后作业中步骤了 Conformer 的相关练习。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "# 2. 实战：使用Transformer进行语音识别的流程\n",
    "\n",
    "CTC 的输出相互独立，使得每一帧利用上下文的信息的能力不足。\n",
    "\n",
    "而 seq2seq（Transformer，Conformer） 的模型采用自回归的解码方式，所以其建模能力更强，但不便于支持流式。\n",
    "\n",
    "对于Transformer模型，它的Encoder可以有效对语音特征的上下文进行建模。而它的Decoder具有语言模型的能力，能够将语言模型融合进整个模型中，是真正意义上的端到端模型。\n",
    "\n",
    "\n",
    "下面简单介绍下 Transformer 语音识别模型，其主要分为 2 个部分：\n",
    "\n",
    "\t- Encoder：声学特征会首先进入 Encoder，产生高层特征编码。\n",
    "\n",
    "    - Decoder：Decoder 利用 Encoder 产生的特征编码解码得到预测结果。\n",
    "    \n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/13bec64ab9544a3a91205a9633d9f015f2ddb0c3586d49ffb39307daed0229a0\" height=40%, width=50%/>\n",
    "</div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## 2.1 准备工作\n",
    "\n",
    "### 2.1.1 安装 paddlespeech"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!pip install -U pip paddlepaddle-gpu && pip install paddlespeech"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 2.1.2 准备工作目录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!mkdir -p ./work/workspace_asr\n",
    "%cd ./work/workspace_asr"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "### 2.1.3 获取预训练模型和音频文件\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 获取模型\n",
    "!test -f transformer.model.tar.gz || wget -nc https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/transformer.model.tar.gz\n",
    "!tar xzvf transformer.model.tar.gz\n",
    "\n",
    "# 获取用于预测的音频文件\n",
    "!test -f ./data/demo_01_03.wav || wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P ./data/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import IPython\n",
    "IPython.display.Audio('./data/demo_01_03.wav')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 快速体验识别结果\n",
    "!paddlespeech asr --input ./data/demo_01_03.wav"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 2.1.4 导入python包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import paddle\n",
    "import soundfile\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "from yacs.config import CfgNode\n",
    "from paddlespeech.audio.transform.spectrogram import LogMelSpectrogramKaldi\n",
    "from paddlespeech.audio.transform.cmvn import GlobalCMVN\n",
    "from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer\n",
    "from paddlespeech.s2t.models.u2 import U2Model\n",
    "\n",
    "from matplotlib import pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 2.1.5 设置预训练模型的路径"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "config_path = \"conf/transformer.yaml\" \n",
    "checkpoint_path = \"./exp/transformer/checkpoints/avg_20.pdparams\"\n",
    "decoding_method = \"attention\"\n",
    "audio_file = \"data/demo_01_03.wav\"\n",
    "\n",
    "# 读取 conf 文件并结构化\n",
    "transformer_config = CfgNode(new_allowed=True)\n",
    "transformer_config.merge_from_file(config_path)\n",
    "transformer_config.decoding.decoding_method = decoding_method\n",
    "print(transformer_config)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## 2.2 获取特征\n",
    "\n",
    "### 2.2.1 音频特征 logfbank\n",
    "\n",
    "#### 2.2.1.1 语音特征提取整体流程图\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/54aefbc16dbf4487a7abe38b0210e5dbf1bb0c74fbe4459f94880a06950269f9\" height=1200, width=800 />\n",
    "<br>\n",
    "由\"莊永松、柯上優 DLHLP - HW1 End-to-end Speech Recognition PPT\" 修改得\n",
    "</div>\n",
    "\n",
    "#### 2.2.1.2 logfbank 提取过程简化图\n",
    "\n",
    "logfbank 特征提取大致可以分为 3 个步骤：\n",
    "\n",
    "1. 语音时域信号经过预加重（信号高频分量补偿），然后进行分帧。\n",
    "\n",
    "2. 每一帧数据加窗后经过离散傅立叶变换（DFT）得到频谱图。\n",
    "\n",
    "3. 将频谱图的特征经过 Mel 滤波器得到 logmel fbank 特征。\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/08f7ccecc848495599c350aa2c440071b818ba0465734dd29701a2ff149f0a8c\"/>\n",
    "<br>\n",
    "由\"DLHLP 李宏毅 语音识别课程 PPT\" 修改得\n",
    "</div>\n",
    "\n",
    "#### 2.2.1.3 CMVN 计算过程\n",
    "\n",
    "对于所有获取的特征，模型在使用前会使用 CMVN 的方式进行归一化\n",
    "\n",
    "<div align=center>\n",
    "  <img src=\"https://ai-studio-static-online.cdn.bcebos.com/46df63199d88481d9a2713a45ce63d00220e8ac42f9940e886282017758b54bf\"/>\n",
    "</div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 2.2.2 构建音频特征提取对象"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 构建 logmel 特征\n",
    "logmel_kaldi= LogMelSpectrogramKaldi(\n",
    "            fs= 16000,\n",
    "            n_mels= 80,\n",
    "            n_shift= 160,\n",
    "            win_length= 400,\n",
    "            dither= True)\n",
    "\n",
    "# 特征减均值除以方差\n",
    "cmvn = GlobalCMVN(\n",
    "    cmvn_path=\"data/mean_std.json\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 2.2.3 提取音频的特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "array, _ = soundfile.read(audio_file, dtype=\"int16\")\n",
    "array = logmel_kaldi(array, train=False)\n",
    "audio_feature_i = cmvn(array)\n",
    "audio_len = audio_feature_i.shape[0]\n",
    "\n",
    "audio_len = paddle.to_tensor(audio_len)\n",
    "audio_feature = paddle.to_tensor(audio_feature_i, dtype='float32')\n",
    "audio_feature = paddle.unsqueeze(audio_feature, axis=0)\n",
    "print (audio_feature.shape)\n",
    "\n",
    "plt.figure()\n",
    "plt.imshow(audio_feature_i.T, origin='lower')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## 2.3 使用模型获得结果\n",
    "\n",
    "### 2.3.1 Transofomer 语音识别模型的结构\n",
    "\n",
    "\n",
    "Transformer 模型主要由 2 个部分组成，包括 Transformer Encoder 和 Transformer Decoder。 \n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/1edcd4ef683c4ef981b375ab8df388b40e3afc5f439f47f1a6f2f230908b63b1\" height=50%, width=50%  />\n",
    "</div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 2.3.2 Transformer Encoder\n",
    "\n",
    "Transformer encoder 主要是对音频的原始特征（这里原始特征使用的是 80 维 logfbank）进行特征编码，其输入是 logfbank，输出是特征编码。包含：\n",
    "\n",
    "* 位置编码（position encoding）\n",
    "* 降采样模块(subsampling embedding)： 由2层降采样的 CNN 构成。\n",
    "* Transformer Encoder Layer  ： \n",
    "    * self-attention： 主要特点是Q(query), K(key)和V(value)都是用了相同的值\n",
    "    * Feed forward Layer： 由两层全连接层构建，其特点是保持了输入和输出的特征维度是一致的。\n",
    "\n",
    "\n",
    "#### 2.3.2.1 Self-Attention\n",
    "\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/72ffd9016d3841149723be2dde2a48c495ce8a95358946bca3736053812c788c\" height=50%, width=50%  />\n",
    "</div>\n",
    "\n",
    "其主要步骤可以分为三步：\n",
    "\n",
    "1. `Q` 和 `K` 的向量通过求内积的方式计算相似度，经过 scale 和 softmax 后，获得每个 `Q` 和所有`K` 之间的 score。\n",
    "\n",
    "2. 将每个 `Q` 和所有 `K` 之间的 score 和 `V` 进行相乘，再将相乘后的结果求和，得到 self-attetion 的输出向量。\n",
    "\n",
    "3. 使用多个 Attetion 模块均进行第一步和第二步，并将最后的输出向量进行合并，得到最终 Multi-Head Self-Attention 的输出。\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/fcdef1992e6d4c909403d603062d09e4d5adaff0226e4367b35d27aea2da1303\" height=30%, width=30%  />\n",
    "</div>\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 2.3.3 Transformer Decoder\n",
    "\n",
    "Transformer 的 Decoder 用于获取最后的输出结果。其结构和 Encoder 有一定的相似性，也具有 Attention 模块和 Feed forward layer。\n",
    "主要的不同点有 2 个：\n",
    "1. Decoder 采用的是一种自回归的方式进行解码。\n",
    "2. Decoder 在 Multi-head self-attention 和 Feed forward layer 模块之间增加了一层 Multi-head cross-attention 层用于获取 Encoder 得到的特征编码。\n",
    "\n",
    "\n",
    "#### 2.3.3.1 Masked Multi-head Self-Attention\n",
    "细心的同学可能发现了，Decoder 的一个 Multi-head self-attention 前面有一个 mask 。增加了这个 mask 的原因在于进行 Decoder 训练的时候，Decoder 的输入是一句完整的句子，而不是像预测这样一步步输入句子的前缀。\n",
    "\n",
    "为了模拟预测的过程，Decoder 训练的时候需要用 mask 遮住句子。 例如 `T=1` 时，就要 mask 输入中除第一个字符以外其他的字符，`T=2` 的时候则需要 mask 除前两个字符以外的其余字符。\n",
    "\n",
    "#### 2.3.3.2 Cross Attention\n",
    "\n",
    "Decoder 在每一步的解码过程中，都会利用 Encoder 的输出的特征编码进行 cross-attention。\n",
    "\n",
    "其中Decoder会将自回结果的编码作为 Attention 中的 `Q` ，而 Encoder 输出的特征编码作为 `K` 和 `V` 来完成 attetion 计算，从而利用 Encoder 提取的音频信息。\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/8e93122eb65344ea885a8af9014de4569b7c9c9f55aa45f7ac17ba2d0b0af260\" hegith=30%, width=30% />\n",
    "</div>\n",
    "\n",
    "#### 2.3.3.3 Decoder的自回归解码 \n",
    "\n",
    "其采用了一种自回归的结构，即 Decoder 的上一个时间点的输出会作为下一个时间点的输入。\n",
    "\n",
    "另外，计算的过程中，Decoder 会利用 Encoder 的输出信息。\n",
    "\n",
    "如果使用贪心（greedy）的方式，Decoder 的解码过程如下：\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/0acaf9f243304120832018b83a4b7c67b8d578f710ce4eeba6062ab9661ef9e7\" hegith=50%, width=50% />\n",
    "</div>\n",
    "\n",
    "使用 greedy 模式解码比较简单，但是很有可能会在解码过程中丢失整体上效果更好的解码结果。\n",
    "\n",
    "因此我们实际使用的是 beam search 方式的解码，beam search 模式下的 decoder 的解码过程如下：\n",
    "\n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/367f8f7cd4b4451ab45dd883045c500d941f0d235fca4ad2a3ccb925ec59aea2\" hegith=50%, width=50%/>\n",
    "</div>\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 2.3.4 模型训练\n",
    "\n",
    "模型训练同时使用了 CTC 损失和 cross entropy 交叉熵损失进行损失函数的计算。\n",
    "\n",
    "其中 Encoder 输出的特征直接进入 CTC Decoder 得到 CTC 损失。\n",
    "\n",
    "而 Decoder 的输出使用 cross entropy 损失。\n",
    " \n",
    "<div align=center>\n",
    "<img src=\"https://ai-studio-static-online.cdn.bcebos.com/fe1d3864f18f4df0a9ab3df8dc4e361a693250b387344273952315ca14d30732\"/>\n",
    "  <br>\n",
    "  (由\"莊永松、柯上優 DLHLP - HW1 End-to-end Speech Recognition PPT\" 修改得)\n",
    "</div>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "\n",
    "### 2.3.5 构建Transformer模型\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model_conf = transformer_config.model\n",
    "# input_dim 存储的是特征的纬度\n",
    "model_conf.input_dim = 80\n",
    "# output_dim 存储的字表的长度\n",
    "model_conf.output_dim = 4233 \n",
    "print (\"model_conf\", model_conf)\n",
    "model = U2Model.from_config(model_conf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "  \n",
    "### 2.3.6 加载预训练的模型\n",
    "      \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model_dict = paddle.load(checkpoint_path)\n",
    "model.set_state_dict(model_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "\n",
    "### 2.3.7 进行预测\n",
    "      "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "decoding_config = transformer_config.decoding\n",
    "text_feature = TextFeaturizer(unit_type='char',\n",
    "                            vocab=transformer_config.collator.vocab_filepath)\n",
    "\n",
    "\n",
    "result_transcripts = model.decode(\n",
    "            audio_feature,\n",
    "            audio_len,\n",
    "            text_feature=text_feature,\n",
    "            decoding_method=decoding_config.decoding_method,\n",
    "            beam_size=decoding_config.beam_size,\n",
    "            ctc_weight=decoding_config.ctc_weight,\n",
    "            decoding_chunk_size=decoding_config.decoding_chunk_size,\n",
    "            num_decoding_left_chunks=decoding_config.num_decoding_left_chunks,\n",
    "            simulate_streaming=decoding_config.simulate_streaming)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "print (\"预测结果对应的token id为:\")\n",
    "print (result_transcripts[1][0])\n",
    "print (\"预测结果为:\")\n",
    "print (result_transcripts[0][0])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# 3. 作业 \n",
    "1. 使用开发模式安装 [PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)  \n",
    "环境要求：docker, Ubuntu 16.04，root user。  \n",
    "参考安装方法：[使用Docker安装paddlespeech](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md#hard-get-the-full-funciton-on-your-mechine)\n",
    "\n",
    "2. 跑通 example/aishell/asr1 中的 conformer 模型，完成训练和预测。 \n",
    "\n",
    "3. 按照 example 的格式使用自己的数据集训练 ASR 模型。      \n",
    "\n",
    "# 4. 关注 PaddleSpeech\n",
    "\n",
    "请关注我们的 [Github Repo](https://github.com/PaddlePaddle/PaddleSpeech/)，非常欢迎加入以下微信群参与讨论：\n",
    "- 扫描二维码\n",
    "- 添加运营小姐姐微信\n",
    "- 通过后回复【语音】\n",
    "- 系统自动邀请加入技术群\n",
    "\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/87bc7da42bcc401bae41d697f13d8b362bfdfd7198f14096b6d46b4004f09613\" width=\"300\" height=\"300\" ></center>\n",
    "\n",
    "# 5. 参考文献\n",
    "\n",
    "[1] Graves A, Fernández S, Gomez F, et al. Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks[C]//Proceedings of the 23rd international conference on Machine learning. 2006: 369-376.\n",
    "\n",
    "[2] Graves A, Mohamed A, Hinton G. Speech recognition with deep recurrent neural networks[C]//2013 IEEE international conference on acoustics, speech and signal processing. Ieee, 2013: 6645-6649.\n",
    "\n",
    "[3] Hannun A, Case C, Casper J, et al. Deep speech: Scaling up end-to-end speech recognition[J]. arXiv preprint arXiv:1412.5567, 2014.\n",
    "\n",
    "[4] Amodei D, Ananthanarayanan S, Anubhai R, et al. Deep speech 2: End-to-end speech recognition in english and mandarin[C]//International conference on machine learning. PMLR, 2016: 173-182.\n",
    "\n",
    "[5] Chan W, Jaitly N, Le Q, et al. Listen, attend and spell: A neural network for large vocabulary conversational speech recognition[C]//2016 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2016: 4960-4964.\n",
    "\n",
    "[6] Vaswani A, Shazeer N, Parmar N, et al. Attention is all you need[C]//Advances in neural information processing systems. 2017: 5998-6008.\n",
    "\n",
    "[7] Zhang Q, Lu H, Sak H, et al. Transformer transducer: A streamable speech recognition model with transformer encoders and rnn-t loss[C]//ICASSP 2020-2020 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). IEEE, 2020: 7829-7833.\n",
    "\n",
    "[8] Gulati A, Qin J, Chiu C C, et al. Conformer: Convolution-augmented transformer for speech recognition[J]. arXiv preprint arXiv:2005.08100, 2020."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "py35-paddle1.2.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: docs/tutorial/cls/cls_tutorial.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a href=\"https://github.com/PaddlePaddle/PaddleSpeech\"><img style=\"position: absolute; z-index: 999; top: 0; right: 0; border: 0; width: 128px; height: 128px;\" src=\"https://nosir.github.io/cleave.js/images/right-graphite@2x.png\" alt=\"Fork me on GitHub\"></a>\n",
    "\n",
    "# 1. 识别声音\n",
    "  \n",
    " 通过听取声音，人的大脑会获取到大量的信息，其中的一个场景是识别和归类，如：识别熟悉的亲人或朋友的声音、识别不同乐器发出的声音和识别不同环境产生的声音，等等。\n",
    "\n",
    " 我们可以根据不同声音的特征（频率，音色等）进行区分，这种区分行为的本质，就是对声音进行分类。</font>\n",
    "\n",
    "声音分类根据用途还可以继续细分：\n",
    "\n",
    "* 副语言识别：说话人识别（Speaker Recognition）, 情绪识别（Speech Emotion Recognition），性别分类（Speaker gender classification）\n",
    "* 音乐识别：音乐流派分类（Music Genre Classification）\n",
    "* 场景识别：环境声音分类（Environmental Sound Classification）\n",
    "* 声音事件检测：各个环境中的声学事件检测\n",
    " \n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/2b3fdd6dd3b24360ab7448e1aa47bb93d7610aaf79fd4f25aa0a8ff131493261\"></center>\n",
    "<center>图片来源：http://speech.ee.ntu.edu.tw/~tlkagk/courses/DLHLP20/Speaker%20(v3).pdf</center>\n",
    "\n",
    "## 1.1 Audio Tagging\n",
    "使用 [PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech) 的预训练模型对一段音频做实时的声音检测，结果如下视频所示。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%HTML\n",
    "<center><video width=\"800\" controls>\n",
    "  <source src=\"https://paddlespeech.cdn.bcebos.com/PaddleAudio/audio_tagging_demo.mp4\" type=\"video/mp4\">\n",
    "</video></center>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. 音频和特征提取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 环境准备：安装paddlespeech\n",
    "!pip install -U pip paddlepaddle-gpu && pip install paddlespeech"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "import IPython\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "\n",
    "## 2.1 数字音频\n",
    "\n",
    "### 2.1.1 声音信号和音频文件\n",
    "  \n",
    "下面通过一个例子观察音频文件的波形，直观地了解数字音频文件的包含的内容。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 获取示例音频\n",
    "!test -f ./dog.wav || wget https://paddlespeech.cdn.bcebos.com/PaddleAudio/dog.wav\n",
    "IPython.display.Audio('./dog.wav')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from paddlespeech.audio.backends import load\n",
    "data, sr = load(file='./dog.wav', mono=True, dtype='float32')  # 单通道，float32音频样本点\n",
    "print('wav shape: {}'.format(data.shape))\n",
    "print('sample rate: {}'.format(sr))\n",
    "\n",
    "# 展示音频波形\n",
    "plt.figure()\n",
    "plt.plot(data)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!paddlespeech cls --input ./dog.wav"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.2 音频特征提取\n",
    "\n",
    "### 2.2.1 短时傅里叶变换\n",
    "\n",
    "  对于一段音频，一般会将整段音频进行分帧，每一帧含有一定长度的信号数据，一般使用 `25ms`，帧与帧之间的移动距离称为帧移，一般使用 `10ms`，然后对每一帧的信号数据加窗后，进行短时傅立叶变换（STFT）得到时频谱。\n",
    "  \n",
    "通过按照上面的对一段音频进行分帧后，我们可以用傅里叶变换来分析每一帧信号的频率特性。将每一帧的频率信息拼接后，可以获得该音频不同时刻的频率特征——Spectrogram，也称作为语谱图。\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/8ef98c95137442a797c9204e1108e585facf7124ee964edc845f2c849a39347f\"></center>\n",
    "<center>图片参考：DLHLP 李宏毅 语音识别课程PPT；https://www.shong.win/2016/04/09/fft/</center>\n",
    "\n",
    "<br></br>\n",
    "下面例子采用 `paddle.signal.stft` 演示如何提取示例音频的频谱特征，并进行可视化："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import paddle\n",
    "import numpy as np\n",
    "\n",
    "data, sr = load(file='./dog.wav', sr=32000, mono=True, dtype='float32')\n",
    "x = paddle.to_tensor(data)\n",
    "n_fft = 1024\n",
    "win_length = 1024\n",
    "hop_length = 320\n",
    "\n",
    "# [D, T]\n",
    "spectrogram = paddle.signal.stft(x, n_fft=n_fft, win_length=win_length, hop_length=hop_length, onesided=True)  \n",
    "print('spectrogram.shape: {}'.format(spectrogram.shape))\n",
    "print('spectrogram.dtype: {}'.format(spectrogram.dtype))\n",
    "\n",
    "\n",
    "spec = np.log(np.abs(spectrogram.numpy())**2)\n",
    "plt.figure()\n",
    "plt.title(\"Log Power Spectrogram\")\n",
    "plt.imshow(spec[:100, :], origin='lower')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2.2 LogFBank\n",
    "\n",
    "研究表明，人类对声音的感知是非线性的，随着声音频率的增加，人对更高频率的声音的区分度会不断下降。\n",
    "\n",
    "例如同样是相差 500Hz 的频率，一般人可以轻松分辨出声音中 500Hz 和 1,000Hz 之间的差异，但是很难分辨出 10,000Hz 和 10,500Hz 之间的差异。\n",
    "\n",
    "因此，学者提出了梅尔频率，在该频率计量方式下，人耳对相同数值的频率变化的感知程度是一样的。\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/18fac30a88bd46c88a6a8bfdec580b42ff3f6b6ef0b54bb68cb1c217f31c18d7\" width=500></center>\n",
    "<center>图片来源：https://www.researchgate.net/figure/Curve-relationship-between-frequency-signal-with-its-mel-frequency-scale-Algorithm-1_fig3_221910348</center>\n",
    "\n",
    "关于梅尔频率的计算，其会对原始频率的低频的部分进行较多的采样，从而对应更多的频率，而对高频的声音进行较少的采样，从而对应较少的频率。使得人耳对梅尔频率的低频和高频的区分性一致。\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/7762cef8fa0e4b10b7f566a0e705609af7704f6a1d2b4e8bac44abe724f9c866\" ></center>\n",
    "<center>图片来源：https://ww2.mathworks.cn/help/audio/ref/mfcc.html</center>\n",
    "\n",
    "Mel Fbank 的计算过程如下，而我们一般都是使用 LogFBank 作为识别特征：\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/e7e6c2e221f642af9e618de768dada99258ec5d97b314035b21dd3e217941a67\" ></center>\n",
    "<center>图片来源：https://ww2.mathworks.cn/help/audio/ref/mfcc.html</center>\n",
    "\n",
    "<br></br>\n",
    "下面例子采用 `paddlespeech.audio.transform.spectrogram.LogMelSpectrogram` 演示如何提取示例音频的 LogFBank:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from paddlespeech.audio.transform.spectrogram import LogMelSpectrogram\n",
    "\n",
    "f_min=50.0\n",
    "f_max=14000.0\n",
    "n_mels=64\n",
    "\n",
    "#   - sr: 音频文件的采样率。\n",
    "#   - n_fft: FFT样本点个数。\n",
    "#   - hop_length: 音频帧之间的间隔。\n",
    "#   - win_length: 窗函数的长度。\n",
    "#   - window: 窗函数种类。\n",
    "#   - n_mels: 梅尔刻度数量。\n",
    "feature_extractor2 = LogMelSpectrogram(\n",
    "    sr=sr, \n",
    "    n_fft=n_fft, \n",
    "    hop_length=hop_length, \n",
    "    win_length=win_length, \n",
    "    window='hann', \n",
    "    f_min=f_min,\n",
    "    f_max=f_max,\n",
    "    n_mels=n_mels)\n",
    "\n",
    "x = paddle.to_tensor(data).unsqueeze(0)     # [B, L]\n",
    "log_fbank = feature_extractor2(x) # [B, D, T]\n",
    "log_fbank = log_fbank.squeeze(0) # [D, T]\n",
    "print('log_fbank.shape: {}'.format(log_fbank.shape))\n",
    "\n",
    "plt.figure()\n",
    "plt.imshow(log_fbank.numpy(), origin='lower')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.3 声音分类方法\n",
    "\n",
    "### 2.3.1 传统机器学习方法\n",
    "在传统的声音和信号的研究领域中，声音特征是一类包含丰富先验知识的手工特征，如频谱图、梅尔频谱和梅尔频率倒谱系数等。\n",
    "  \n",
    "因此在一些分类的应用上，可以采用传统的机器学习方法例如决策树、svm和随机森林等方法。\n",
    "  \n",
    "一个典型的应用案例是：男声和女声分类。\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/943905088eef48b48e4b94f7ff4c475060937868ca474b61bdcc55fc155b283e\" width=800></center>\n",
    "<center>图片来源：https://journals.plos.org/plosone/article/figure?id=10.1371/journal.pone.0179403.g001</center>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.3.2 深度学习方法\n",
    "传统机器学习方法可以捕捉声音特征的差异（例如男声和女声的声音在音高上往往差异较大）并实现分类任务。\n",
    "  \n",
    "而深度学习方法则可以突破特征的限制，更灵活的组网方式和更深的网络层次，可以更好地提取声音的高层特征，从而获得更好的分类指标。\n",
    "\n",
    "随着深度学习算法的快速发展和在分类任务上的优异表现，当下流行的声音分类模型无一不是采用深度学习网络搭建而成的，如 [AudioCLIP[1]](https://arxiv.org/pdf/2106.13043v1.pdf)、[PANNs[2]](https://arxiv.org/pdf/1912.10211v5.pdf) 和 [Audio Spectrogram Transformer[3]](https://arxiv.org/pdf/2104.01778v3.pdf) 等。\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/bc2c0352c4124b1d866696fd5d8165efbdca5d60f21648729258b62981ef600a\" ></center>\n",
    "<center>图片来源：https://towardsdatascience.com/audio-deep-learning-made-simple-sound-classification-step-by-step-cebc936bbe5</center>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.3.3 Pretrain + Finetune\n",
    "\n",
    "\n",
    "在声音分类和声音检测的场景中（如环境声音分类、情绪识别和音乐流派分类等）由于可获取的据集有限，且语音数据标注的成本高，用户可以收集到的数据集体量往往较小，这种数据量稀少的情况对于模型训练是非常不利的。\n",
    "\n",
    "预训练模型能够减少领域数据的需求量，并达到较高的识别准确率。在CV和NLP领域中，有诸如 MobileNet、VGG19、YOLO、BERT 和 ERNIE 等开源的预训练模型，在图像检测、图像分类、文本分类和文本生成等各自领域内的任务中，使用预训练模型在下游任务的数据集上进行 finetune ，往往可以更快和更容易获得较好的效果和指标。\n",
    "\n",
    "相较于 CV 领域的 ImageNet 数据集，谷歌在 2017 年开放了一个大规模的音频数据集 [AudioSet[4]](https://ieeexplore.ieee.org/document/7952261)，它是目前最大的用于音频分类任务的数据集。该数据集包含了 632 类的音频类别以及 2084320 条人工标记的每段 10 秒长度的声音剪辑片段（包括 527 个标签），数据总时长为 5,800 小时。\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/334a00c3ca4d4feb90982bb882897eeae2c82a6521b54b46bc64cb68289cdd92\" width=480></center>\n",
    "<center>图片来源：https://research.google.com/audioset/ontology/index.html</center>\n",
    "  \n",
    "`PANNs`([PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition[2]](https://arxiv.org/pdf/1912.10211.pdf))是基于 AudioSet 数据集训练的声音分类/识别的模型，其中`PANNs-CNN14`在测试集上取得了较好的效果：mAP 为 0.431，AUC 为 0.973，d-prime 为 2.732，经过预训练后，该模型可以用于提取音频的 embbedding ，适合用于声音分类和声音检测等下游任务。本示例将使用 `PANNs` 的预训练模型 Finetune 完成声音分类的任务。\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/812d3268cc5b46c88bd23fb9ebaa89196081a14409724b4c87e96498c78c930e\" width=480></center>\n",
    "  \n",
    "本教程选取 `PANNs` 中的预训练模型 `cnn14` 作为 backbone，用于提取声音的深层特征，`SoundClassifer`创建下游的分类网络，实现对输入音频的分类。\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/1954041f63ae49e2bc1f858ca43433140dfc70a513a8479aa9eb5ca8841cb2ac\" width=600></center>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 3. 实践：环境声音分类\n",
    "\n",
    "## 3.1 数据集准备\n",
    "\n",
    "此课程选取了[ESC-50: Dataset for Environmental Sound Classification[5]](https://github.com/karolpiczak/ESC-50) 数据集作为示例。\n",
    "  \n",
    "ESC-50是一个包含有 2000 个带标签的环境声音样本，音频样本采样率为 44,100Hz 的单通道音频文件，所有样本根据标签被划分为 50 个类别，每个类别有 40 个样本。\n",
    "\n",
    "音频样本可分为 5 个主要类别：\n",
    "  - 动物声音（Animals）\n",
    "  - 自然界产生的声音和水声（Natural soundscapes & water sounds）\n",
    "  - 人类发出的非语言声音（Human, non-speech sounds）\n",
    "  - 室内声音（Interior/domestic sounds）\n",
    "  - 室外声音和一般噪声（Exterior/urban noises）。\n",
    "\n",
    "\n",
    "ESC-50 数据集中的提供的 `meta/esc50.csv` 文件包含的部分信息如下：\n",
    "```\n",
    "   filename,fold,target,category,esc10,src_file,take\n",
    "   1-100038-A-14.wav,1,14,chirping_birds,False,100038,A\n",
    "   1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A\n",
    "   1-101296-A-19.wav,1,19,thunderstorm,False,101296,A\n",
    "   ...\n",
    "```\n",
    "\n",
    "  - filename: 音频文件名字。 \n",
    "  - fold: 数据集自身提供的N-Fold验证信息，用于切分训练集和验证集。\n",
    "  - target: 标签数值。\n",
    "  - category: 标签文本信息。\n",
    "  - esc10: 文件是否为ESC-10的数据集子集。\n",
    "  - src_file: 原始音频文件前缀。\n",
    "  - take: 原始文件的截取段落信息。\n",
    "  \n",
    "在此声音分类的任务中，我们将`target`作为训练过程的分类标签。\n",
    "\n",
    "### 3.1.1 数据集初始化\n",
    "调用以下代码自动下载并读取数据集音频文件，创建训练集和验证集。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from paddlespeech.audio.datasets import ESC50\n",
    "\n",
    "train_ds = ESC50(mode='train', sample_rate=sr)\n",
    "dev_ds = ESC50(mode='dev', sample_rate=sr)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.1.2 特征提取\n",
    "通过下列代码，用 `paddlespeech.audio.transform.spectrogram.LogMelSpectrogram` 初始化一个音频特征提取器，在训练过程中实时提取音频的 LogFBank 特征，其中主要的参数如下：  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_extractor = LogMelSpectrogram(\n",
    "    sr=sr, \n",
    "    n_fft=n_fft, \n",
    "    hop_length=hop_length, \n",
    "    win_length=win_length, \n",
    "    window='hann', \n",
    "    f_min=f_min,\n",
    "    f_max=f_max,\n",
    "    n_mels=n_mels)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.2 模型\n",
    "\n",
    "### 3.2.1 选取预训练模型\n",
    "\n",
    "选取`cnn14`作为 backbone，用于提取音频的特征："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from paddlespeech.cls.models import cnn14\n",
    "backbone = cnn14(pretrained=True, extract_embedding=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.2.2 构建分类模型\n",
    "\n",
    "`SoundClassifer`接收`cnn14`作为backbone模型，并创建下游的分类网络："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import paddle.nn as nn\n",
    "\n",
    "\n",
    "class SoundClassifier(nn.Layer):\n",
    "\n",
    "    def __init__(self, backbone, num_class, dropout=0.1):\n",
    "        super().__init__()\n",
    "        self.backbone = backbone\n",
    "        self.dropout = nn.Dropout(dropout)\n",
    "        self.fc = nn.Linear(self.backbone.emb_size, num_class)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = x.unsqueeze(1)\n",
    "        x = self.backbone(x)\n",
    "        x = self.dropout(x)\n",
    "        logits = self.fc(x)\n",
    "\n",
    "        return logits\n",
    "\n",
    "model = SoundClassifier(backbone, num_class=len(ESC50.label_list))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.3 Finetune"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1. 创建 DataLoader "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 16\n",
    "train_loader = paddle.io.DataLoader(train_ds, batch_size=batch_size, shuffle=True)\n",
    "dev_loader = paddle.io.DataLoader(dev_ds, batch_size=batch_size,)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2. 定义优化器和 Loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = paddle.optimizer.Adam(learning_rate=1e-4, parameters=model.parameters())\n",
    "criterion = paddle.nn.loss.CrossEntropyLoss()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3. 启动模型训练 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from paddlespeech.audio.utils import logger\n",
    "\n",
    "epochs = 20\n",
    "steps_per_epoch = len(train_loader)\n",
    "log_freq = 10\n",
    "eval_freq = 10\n",
    "\n",
    "for epoch in range(1, epochs + 1):\n",
    "    model.train()\n",
    "\n",
    "    avg_loss = 0\n",
    "    num_corrects = 0\n",
    "    num_samples = 0\n",
    "    for batch_idx, batch in enumerate(train_loader):\n",
    "        waveforms, labels = batch\n",
    "        feats = feature_extractor(waveforms)\n",
    "        feats = paddle.transpose(feats, [0, 2, 1])  # [B, N, T] -> [B, T, N]\n",
    "        logits = model(feats)\n",
    "\n",
    "        loss = criterion(logits, labels)\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "        if isinstance(optimizer._learning_rate,\n",
    "                      paddle.optimizer.lr.LRScheduler):\n",
    "            optimizer._learning_rate.step()\n",
    "        optimizer.clear_grad()\n",
    "\n",
    "        # Calculate loss\n",
    "        avg_loss += float(loss)\n",
    "\n",
    "        # Calculate metrics\n",
    "        preds = paddle.argmax(logits, axis=1)\n",
    "        num_corrects += (preds == labels).numpy().sum()\n",
    "        num_samples += feats.shape[0]\n",
    "\n",
    "        if (batch_idx + 1) % log_freq == 0:\n",
    "            lr = optimizer.get_lr()\n",
    "            avg_loss /= log_freq\n",
    "            avg_acc = num_corrects / num_samples\n",
    "\n",
    "            print_msg = 'Epoch={}/{}, Step={}/{}'.format(\n",
    "                epoch, epochs, batch_idx + 1, steps_per_epoch)\n",
    "            print_msg += ' loss={:.4f}'.format(avg_loss)\n",
    "            print_msg += ' acc={:.4f}'.format(avg_acc)\n",
    "            print_msg += ' lr={:.6f}'.format(lr)\n",
    "            logger.train(print_msg)\n",
    "\n",
    "            avg_loss = 0\n",
    "            num_corrects = 0\n",
    "            num_samples = 0\n",
    "\n",
    "    if epoch % eval_freq == 0 and batch_idx + 1 == steps_per_epoch:\n",
    "        model.eval()\n",
    "        num_corrects = 0\n",
    "        num_samples = 0\n",
    "        with logger.processing('Evaluation on validation dataset'):\n",
    "            for batch_idx, batch in enumerate(dev_loader):\n",
    "                waveforms, labels = batch\n",
    "                feats = feature_extractor(waveforms)\n",
    "                feats = paddle.transpose(feats, [0, 2, 1])\n",
    "                \n",
    "                logits = model(feats)\n",
    "\n",
    "                preds = paddle.argmax(logits, axis=1)\n",
    "                num_corrects += (preds == labels).numpy().sum()\n",
    "                num_samples += feats.shape[0]\n",
    "\n",
    "        print_msg = '[Evaluation result]'\n",
    "        print_msg += ' dev_acc={:.4f}'.format(num_corrects / num_samples)\n",
    "\n",
    "        logger.eval(print_msg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.4 音频预测\n",
    "\n",
    "执行预测，获取 Top K 分类结果："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "top_k = 10\n",
    "wav_file = './dog.wav'\n",
    "\n",
    "waveform, _ = load(wav_file, sr)\n",
    "feats = feature_extractor(paddle.to_tensor(paddle.to_tensor(waveform).unsqueeze(0)))\n",
    "feats = paddle.transpose(feats, [0, 2, 1])  # [B, N, T] -> [B, T, N]\n",
    "print(feats.shape)\n",
    "\n",
    "logits = model(feats)\n",
    "probs = nn.functional.softmax(logits, axis=1).numpy()\n",
    "\n",
    "sorted_indices = probs[0].argsort()\n",
    "\n",
    "msg = f'[{wav_file}]\\n'\n",
    "for idx in sorted_indices[-1:-top_k-1:-1]:\n",
    "    msg += f'{ESC50.label_list[idx]}: {probs[0][idx]:.5f}\\n'\n",
    "print(msg)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4. 作业\n",
    "1. 使用开发模式安装 [PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)  \n",
    "环境要求：docker, Ubuntu 16.04，root user。  \n",
    "参考安装方法：[使用Docker安装paddlespeech](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md#hard-get-the-full-funciton-on-your-mechine)\n",
    "1. 在 [MusicSpeech](http://marsyas.info/downloads/datasets.html) 数据集上完成 music/speech 二分类。  \n",
    "2. 在 [GTZAN Genre Collection](http://marsyas.info/downloads/datasets.html) 音乐分类数据集上利用 PANNs 预训练模型实现音乐类别十分类。\n",
    "\n",
    "关于如何自定义分类数据集，请参考文档 [PaddleSpeech/docs/source/cls/custom_dataset.md](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/cls/custom_dataset.md)\n",
    "\n",
    "# 5. 关注 PaddleSpeech\n",
    "\n",
    "请关注我们的 [Github Repo](https://github.com/PaddlePaddle/PaddleSpeech/)，非常欢迎加入以下微信群参与讨论：\n",
    "- 扫描二维码\n",
    "- 添加运营小姐姐微信\n",
    "- 通过后回复【语音】\n",
    "- 系统自动邀请加入技术群\n",
    "\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/87bc7da42bcc401bae41d697f13d8b362bfdfd7198f14096b6d46b4004f09613\" width=\"300\" height=\"300\" ></center>\n",
    "\n",
    "# 6. 参考文献\n",
    "\n",
    "[1] Guzhov, A., Raue, F., Hees, J., & Dengel, A.R. (2021). AudioCLIP: Extending CLIP to Image, Text and Audio. ArXiv, abs/2106.13043.\n",
    "  \n",
    "[2] Kong, Q., Cao, Y., Iqbal, T., Wang, Y., Wang, W., & Plumbley, M.D. (2020). PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition. IEEE/ACM Transactions on Audio, Speech, and Language Processing, 28, 2880-2894.\n",
    "  \n",
    "[3] Gong, Y., Chung, Y., & Glass, J.R. (2021). AST: Audio Spectrogram Transformer. ArXiv, abs/2104.01778.\n",
    "  \n",
    "[4] Gemmeke, J.F., Ellis, D.P., Freedman, D., Jansen, A., Lawrence, W., Moore, R.C., Plakal, M., & Ritter, M. (2017). Audio Set: An ontology and human-labeled dataset for audio events. 2017 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 776-780.\n",
    "\n",
    "[5] Piczak, K.J. (2015). ESC: Dataset for Environmental Sound Classification. Proceedings of the 23rd ACM international conference on Multimedia.\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "py37",
   "language": "python",
   "name": "py37"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: docs/tutorial/st/st_tutorial.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "<a href=\"https://github.com/PaddlePaddle/PaddleSpeech\"><img style=\"position: absolute; z-index: 999; top: 0; right: 0; border: 0; width: 128px; height: 128px;\" src=\"https://nosir.github.io/cleave.js/images/right-graphite@2x.png\" alt=\"Fork me on GitHub\"></a>\n",
    "\n",
    "\n",
    "# End-to-End Speech (to Text) Translation "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# 前言"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## 背景知识\n",
    "语音翻译（ST, Speech Translation）是一项从一段源语言音频中翻译出目标语言的任务。\n",
    "本章主要针对语音到文本的翻译，比如，从一段英文语音中，得到中文的翻译文本。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## 基本方法\n",
    "### 级联模型（Cascaded）,  ASR -> MT\n",
    "级联模型由独立的两个模型，语音识别模型（ASR）和机器翻译模型（MT）组成。先通过 ASR 模型从语音中识别出源语言的相应文本，在利用 MT 模型将相应文本翻译成目标语言。\n",
    "![cascaded](https://ai-studio-static-online.cdn.bcebos.com/af40e4d580764d1cb07e7f889d31e4e4e2f0839753a543bbad2e6334b48ad8cd)\n",
    "\n",
    "\n",
    "### 端到端模型 （End-to-End） \n",
    "端到端模型不显式对输入语音做文字识别，而直接生成翻译结果。\n",
    "![e2e](https://ai-studio-static-online.cdn.bcebos.com/2043ffd3d9e34054b542dcd67ebb6e4441d91e8d4bf148a88fdc7ca452356c95)\n",
    "\n",
    "\n",
    "相对于端到端模型，级联模型存在以下一些问题:\n",
    "\n",
    "1.错误传播（error propagation），由 ASR 识别错误所产生的错误文本，也会传递给 MT 模型，往往会导致生成更糟糕的翻译结果。\n",
    "\n",
    "2.时延叠加（latency accumulation），因为使用两个级联的模型，需要对输入数据进行多次处理，实际的时延是两个模型时延的累加，效率低于端到端模型。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# 使用Transformer进行端到端语音翻译的基本流程\n",
    "## 基础模型\n",
    "由于 ASR 章节已经介绍了 Transformer 以及语音特征抽取，在此便不做过多介绍，感兴趣的同学可以去相关章节进行了解。\n",
    "\n",
    "本小节，主要讨论利用 transformer（seq2seq）进行ST与ASR的异同。\n",
    "\n",
    "相似之处在于，两者都可以看做是从语音（speech）到文本（text）的任务。将语音作为输入，而将文字作为输出，区别只在于生成结果是对应语言的识别结果，还是另一语言的翻译结果。\n",
    "\n",
    "因此，我们只需要将数据中的目标文本替换为翻译文本（$Y$），便可利用 ASR 的模型结构实现语音翻译。\n",
    "\n",
    "规范化地讲，对于 ASR，利用包含语音（$S$）和转写文本（$X$）的数据集，训练得到一个模型 $M_{ASR}$，能对任意输入的源语言语音 $\\hat{S}$ 进行文字识别，输出结果 $\\hat{X}$。\n",
    "\n",
    "而ST的语料集，通常包含语音（$S$）、转写文本（$X$）以及翻译文本（$Y$），只需将ASR实践中的转写文本$X$替换为对应的翻译文本 $Y$，便可利用同样的流程得到一个翻译模型 $M_{ST}$，其能对任意输入的源语言语音 $\\hat{S}$ 进行翻译，输出结果 $\\hat{Y}$\n",
    "\n",
    "值得注意的是，相较于 ASR 任务而言，在 ST 中，因为翻译文本与源语音不存在单调对齐（monotonic aligned）的性质，因此 CTC 模块不能将翻译结果作为目标来使用，此处涉及一些学术细节，感兴趣的同学可以自行去了解 [CTC](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/topic/ctc/ctc_loss.ipynb) 的具体内容。\n",
    "\n",
    "> 我们会在 [PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech) 中放一些 Topic 的技术文章（如 [CTC](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/topic/ctc/ctc_loss.ipynb) ），欢迎大家 star 关注。\n",
    "\n",
    "## 辅助任务训练，提升效果（ASR MTL）\n",
    "\n",
    "相比与 ASR 任务，ST 任务对于数据的标注和获取更加困难，通常很难获取大量的训练数据。\n",
    "\n",
    "因此，我们讲讨论如何更有效利用已有数据，提升 ST 模型的效果。\n",
    "\n",
    "1.先利用 ASR 对模型进行预训练，得到一个编码器能够有效的捕捉语音中的语义信息，在此基础上再进一步利用翻译数据训练ST模型。\n",
    "\n",
    "2.相较于 ASR 任务的二元组数据（$S$,$X$），通常包含三元组数据（$S$,$X$,$Y$）的ST任务能够自然有效的进行多任务学习。\n",
    "顾名思义，我们可以将ASR任务作为辅助任务，将两个任务进行联合训练，利用ASR任务的辅助提升 ST 模型的效果。\n",
    "具体上讲，如图所示，可以利用一个共享的编码器对语音进行编码，同时利用两个独立的解码器，分别执行 ASR 和 ST 任务。\n",
    "![mtl](https://ai-studio-static-online.cdn.bcebos.com/3896f104527947cbba8f459d4477b0a8cb9e4d527d6f4da29bb7792d69b66d3c)\n",
    "我们将实战中进行演示。\n",
    "\n",
    "## 引入预训练模型，提升效果 （FAT-ST PT）\n",
    "\n",
    "相比于文本到文本的机器翻译具有充足的语料（通常上百万条），语音到文本的翻译的语料很匮乏。那是否可以将文本到文本的翻译语料利用上来提升 ST 的模型效果呢？答案是肯定的。\n",
    "\n",
    "FAT 模型[1],借鉴了 Bert[2] 和 TLM[3]的 masked language model 预训练思路，并将其拓展到语音翻译的跨语言、跨模态（语音和文本）的场景。可以应对三元组（$S$,$X$,$Y$）中任意的单一或组合的数据类型。\n",
    "举例来说，它可以利用纯语音或文本数据集（$S$|$X$|$Y$），也可以利用 ASR 数据集（$S$,$Y$），甚至文本翻译数据（$X$,$Y$)。在这种预训练模型的基础上进行 ST 的训练，能够有效解决训练数据匮乏的困境，提升最终的翻译效果。\n",
    "![fat](https://ai-studio-static-online.cdn.bcebos.com/ef4b677e4cdf466fa5264d3b9dc976326e3c2046b1704c7da693015a71fc8a68)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# 实战\n",
    "## ST 多任务学习，将 ASR 作为辅助任务\n",
    "### 数据集: [Ted语音翻译数据集](http://www.nlpr.ia.ac.cn/cip/dataset.htm)（英文语音$\\rightarrow$中文文本）[4]\n",
    "## 准备工作\n",
    "## 特征抽取\n",
    "参考语音识别的相关章节，略。\n",
    "## 多任务模型\n",
    "Transformer 内容参考语音识别的相关章节，略。\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Stage 1 准备工作\n",
    "### 安装 paddlespeech"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!pip install -U pip paddlepaddle-gpu && pip install paddlespeech"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 导入 python 包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import paddle\n",
    "import numpy as np\n",
    "import kaldiio\n",
    "import subprocess\n",
    "from kaldiio import WriteHelper\n",
    "from yacs.config import CfgNode\n",
    "import IPython.display as dp\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "\n",
    "from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer\n",
    "from paddlespeech.s2t.models.u2_st import U2STModel"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 获取预训练模型和参数并配置"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!wget -nc https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/fat_st_ted-en-zh.tar.gz\n",
    "!tar xzvf fat_st_ted-en-zh.tar.gz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "config_path = \"conf/transformer_mtl_noam.yaml\" \n",
    "\n",
    "# 读取 conf 文件并结构化\n",
    "st_config = CfgNode(new_allowed=True)\n",
    "st_config.merge_from_file(config_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 下载并配置 kaldi 环境"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!wget -nc https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz\n",
    "!tar xzvf kaldi_bins.tar.gz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "kaldi_bins_path = os.path.abspath('kaldi_bins')\n",
    "print(kaldi_bins_path)\n",
    "if 'LD_LIBRARY_PATH' not in os.environ:\n",
    "    os.environ['LD_LIBRARY_PATH'] = f'{kaldi_bins_path}'\n",
    "else:\n",
    "    os.environ['LD_LIBRARY_PATH'] += f':{kaldi_bins_path}'\n",
    "os.environ['PATH'] += f':{kaldi_bins_path}'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Stage 2 获取特征\n",
    "### 提取 kaldi 特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def get_kaldi_feat(wav_path, config=st_config):\n",
    "    \"\"\"\n",
    "        Input preprocess and return paddle.Tensor stored in self.input.\n",
    "        Input content can be a file(wav).\n",
    "    \"\"\"\n",
    "    wav_file = os.path.abspath(wav_path)\n",
    "    cmvn = config.collator.cmvn_path\n",
    "    utt_name = '_tmp'\n",
    "\n",
    "    # Get the object for feature extraction\n",
    "    fbank_extract_command = [\n",
    "        'compute-fbank-feats', '--num-mel-bins=80', '--verbose=2',\n",
    "        '--sample-frequency=16000', 'scp:-', 'ark:-'\n",
    "    ]\n",
    "    fbank_extract_process = subprocess.Popen(fbank_extract_command,\n",
    "                                                stdin=subprocess.PIPE,\n",
    "                                                stdout=subprocess.PIPE,\n",
    "                                                stderr=subprocess.PIPE)\n",
    "    fbank_extract_process.stdin.write(\n",
    "        f'{utt_name} {wav_file}'.encode('utf8'))\n",
    "    fbank_extract_process.stdin.close()\n",
    "    fbank_feat = dict(kaldiio.load_ark(\n",
    "        fbank_extract_process.stdout))[utt_name]\n",
    "\n",
    "    extract_command = ['compute-kaldi-pitch-feats', 'scp:-', 'ark:-']\n",
    "    pitch_extract_process = subprocess.Popen(extract_command,\n",
    "                                                stdin=subprocess.PIPE,\n",
    "                                                stdout=subprocess.PIPE,\n",
    "                                                stderr=subprocess.PIPE)\n",
    "    pitch_extract_process.stdin.write(\n",
    "        f'{utt_name} {wav_file}'.encode('utf8'))\n",
    "    process_command = ['process-kaldi-pitch-feats', 'ark:', 'ark:-']\n",
    "    pitch_process = subprocess.Popen(process_command,\n",
    "                                        stdin=pitch_extract_process.stdout,\n",
    "                                        stdout=subprocess.PIPE,\n",
    "                                        stderr=subprocess.PIPE)\n",
    "    pitch_extract_process.stdin.close()\n",
    "    pitch_feat = dict(kaldiio.load_ark(\n",
    "        pitch_process.stdout))[utt_name]\n",
    "    concated_feat = np.concatenate((fbank_feat, pitch_feat), axis=1)\n",
    "    raw_feat = f\"{utt_name}.raw\"\n",
    "    with WriteHelper(f'ark,scp:{raw_feat}.ark,{raw_feat}.scp') as writer:\n",
    "        writer(utt_name, concated_feat)\n",
    "    cmvn_command = [\n",
    "        \"apply-cmvn\", \"--norm-vars=true\", cmvn, f'scp:{raw_feat}.scp',\n",
    "        'ark:-'\n",
    "    ]\n",
    "    cmvn_process = subprocess.Popen(cmvn_command,\n",
    "                                    stdout=subprocess.PIPE,\n",
    "                                    stderr=subprocess.PIPE)\n",
    "    process_command = ['copy-feats', '--compress=true', 'ark:-', 'ark:-']\n",
    "    process = subprocess.Popen(process_command,\n",
    "                                stdin=cmvn_process.stdout,\n",
    "                                stdout=subprocess.PIPE,\n",
    "                                stderr=subprocess.PIPE)\n",
    "    norm_feat = dict(kaldiio.load_ark(process.stdout))[utt_name]\n",
    "    audio = paddle.to_tensor(norm_feat).unsqueeze(0)\n",
    "    audio_len = paddle.to_tensor(audio.shape[1], dtype='int64')\n",
    "    return audio, audio_len"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 构建文本特征提取对象"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "text_feature = TextFeaturizer(\n",
    "                unit_type=st_config.collator.unit_type,\n",
    "                vocab=st_config.collator.vocab_filepath,\n",
    "                spm_model_prefix=st_config.collator.spm_model_prefix)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "## Stage 3 使用模型获得结果"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 构建 ST 模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model_conf = st_config.model\n",
    "model_conf.input_dim = st_config.collator.feat_dim\n",
    "model_conf.output_dim = text_feature.vocab_size\n",
    "print(model_conf)\n",
    "model = U2STModel.from_config(model_conf)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 加载预训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "params_path =  \"exp/transformer_mtl_noam/checkpoints/fat_st_ted-en-zh.pdparams\"\n",
    "model_dict = paddle.load(params_path)\n",
    "model.set_state_dict(model_dict)\n",
    "model.eval()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "### 预测"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# 下载wav\n",
    "!wget -nc https://paddlespeech.cdn.bcebos.com/PaddleAudio/74109_0147917-0156334.wav\n",
    "!wget -nc https://paddlespeech.cdn.bcebos.com/PaddleAudio/120221_0278694-0283831.wav\n",
    "!wget -nc https://paddlespeech.cdn.bcebos.com/PaddleAudio/15427_0822000-0833000.wav\n",
    "\n",
    "wav_file = '74109_0147917-0156334.wav'\n",
    "# wav_file = '120221_0278694-0283831.wav'\n",
    "# wav_file = '15427_0822000-0833000.wav'\n",
    "\n",
    "transcript = \"my hair is short like a boy 's and i wear boy 's clothes but i 'm a girl and you know how sometimes you like to wear a pink dress and sometimes you like to wear your comfy jammies\"\n",
    "dp.Audio(wav_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "audio, audio_len = get_kaldi_feat(wav_file)\n",
    "cfg = st_config.decoding\n",
    "\n",
    "\n",
    "res = model.decode(audio,\n",
    "            audio_len,\n",
    "            text_feature=text_feature,\n",
    "            decoding_method=cfg.decoding_method,\n",
    "            beam_size=cfg.beam_size,\n",
    "            word_reward=cfg.word_reward,\n",
    "            decoding_chunk_size=cfg.decoding_chunk_size,\n",
    "            num_decoding_left_chunks=cfg.num_decoding_left_chunks,\n",
    "            simulate_streaming=cfg.simulate_streaming)\n",
    "print(\"对应英文: {}\".format(transcript))\n",
    "print(\"翻译结果: {}\".format(\"\".join(res[0].split())))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# 参考文献\n",
    "\n",
    "1.Zheng, Renjie, Junkun Chen, Mingbo Ma, and Liang Huang. \"Fused acoustic and text encoding for multimodal bilingual pretraining and speech translation.\" ICML 2021.\n",
    "\n",
    "2.Devlin, Jacob, Ming-Wei Chang, Kenton Lee, and Kristina Toutanova. \"Bert: Pre-training of deep bidirectional transformers for language understanding.\" NAACL 2019.\n",
    "\n",
    "3.Conneau, Alexis, and Guillaume Lample. \"Cross-lingual language model pretraining.\" NIPS 2019.\n",
    "\n",
    "4.Liu, Yuchen, Hao Xiong, Zhongjun He, Jiajun Zhang, Hua Wu, Haifeng Wang, and Chengqing Zong. \"End-to-end speech translation with knowledge distillation.\" Interspeech 2019."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# PaddleSpeech\n",
    "\n",
    "请关注我们的 [Github Repo](https://github.com/PaddlePaddle/PaddleSpeech/)，非常欢迎加入以下微信群参与讨论：\n",
    "- 扫描二维码\n",
    "- 添加运营小姐姐微信\n",
    "- 通过后回复【语音】\n",
    "- 系统自动邀请加入技术群\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/87bc7da42bcc401bae41d697f13d8b362bfdfd7198f14096b6d46b4004f09613\" width=\"300\" height=\"300\" ></center>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "py35-paddle1.2.0"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: docs/tutorial/tts/tts_tutorial.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a href=\"https://github.com/PaddlePaddle/PaddleSpeech\"><img style=\"position: absolute; z-index: 999; top: 0; right: 0; border: 0; width: 128px; height: 128px;\" src=\"https://nosir.github.io/cleave.js/images/right-graphite@2x.png\" alt=\"Fork me on GitHub\"></a>\n",
    "\n",
    "# 『听』和『说』\n",
    "人类通过听觉获取的信息大约占所有感知信息的 20% ~ 30%。声音存储了丰富的语义以及时序信息，由专门负责听觉的器官接收信号，产生一系列连锁刺激后，在人类大脑的皮层听区进行处理分析，获取语义和知识。近年来，随着深度学习算法上的进步以及不断丰厚的硬件资源条件，**文本转语音（Text-to-Speech, TTS）** 技术在移动、虚拟娱乐等领域得到了广泛的应用。</font>\n",
    "## \"听\"书\n",
    "使用 [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) 直接获取书籍上的文字。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# download demo sources\n",
    "!mkdir download\n",
    "!wget -P download https://paddlespeech.cdn.bcebos.com/tutorial/tts/ocr_result.jpg\n",
    "!wget -P download https://paddlespeech.cdn.bcebos.com/tutorial/tts/ocr.wav\n",
    "!wget -P download https://paddlespeech.cdn.bcebos.com/tutorial/tts/tts_lips.mp4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import IPython.display as dp\n",
    "from PIL import Image\n",
    "img_path = 'download/ocr_result.jpg'\n",
    "im = Image.open(img_path)\n",
    "dp.display(im)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用 [PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)，阅读上一步识别出来的文字。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dp.Audio(\"download/ocr.wav\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "具体实现代码详见 [Story Talker](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/story_talker)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 偶像开口说话\n",
    "*元宇宙来袭，构造你的虚拟人！* 看看 [PaddleGAN](https://github.com/PaddlePaddle/PaddleGAN) 怎样合成唇形，让WiFi之母——海蒂·拉玛说话。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.display import HTML\n",
    "html_str = '''\n",
    "<video controls width=\"600\" height=\"360\" src=\"{}\">animation</video>\n",
    "'''.format(\"download/tts_lips.mp4\")\n",
    "dp.display(HTML(html_str))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "具体实现代码请参考 [Metaverse](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/metaverse)。\n",
    "\n",
    "下面让我们来系统地学习语音方面的知识，看看怎样使用 **PaddleSpeech** 实现基本的语音功能，以及怎样结合光学字符识别（Optical Character Recognition，OCR）、自然语言处理（Natural Language Processing，NLP）等技术“听”书、让名人开口说话。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 前言\n",
    "## 背景知识\n",
    "为了更好地了解文本转语音任务的要素，我们先简要地回顾一下文本转语音的发展历史。如果你对此已经有所了解，或希望能尽快使用代码实现，请直接跳至[实践](#实践)。\n",
    "### 定义\n",
    "<!----\n",
    "Note: \n",
    "1.此句抄自 [李沐Dive into Dive Learning](https://zh-v2.d2l.ai/chapter_introduction/index.html)\n",
    "2.修改参考A survey on Neural Speech Sysnthesis.\n",
    "---> \n",
    "文本转语音，又称语音合成（Speech Sysnthesis），指的是将一段文本按照一定需求转化成对应的音频，这种特性决定了的输出数据比输入输入长得多。文本转语音是一项包含了语义学、声学、数字信号处理以及机器学习的等多项学科的交叉任务。虽然辨识低质量音频文件的内容对人类来说很容易，但这对计算机来说并非易事。\n",
    "\n",
    "按照不同的应用需求，更广义的语音合成研究包括：*语音转换*，例如说话人转换、语音到歌唱转换、语音情感转换、口音转换等；*歌唱合成*，例如歌词到歌唱转换、可视语音合成等。\n",
    "\n",
    "### 发展历史\n",
    "\n",
    "<!--\n",
    "以下摘自维基百科 https://en.wikipedia.org/wiki/Speech_synthesis\n",
    "--->\n",
    "\n",
    "在第二次工业革命之前，语音的合成主要以机械式的音素合成为主。1779年，德裔丹麦科学家 Christian Gottlieb Kratzenstein 建造了人类的声道模型，使其可以产生五个长元音。1791年， Wolfgang von Kempelen 添加了唇和舌的模型，使其能够发出辅音和元音。贝尔实验室于20世纪30年代发明了声码器（Vocoder），将语音自动分解为音调和共振，此项技术由 Homer Dudley 改进为键盘式合成器并于 1939年纽约世界博览会展出。\n",
    "\n",
    "第一台基于计算机的语音合成系统起源于20世纪50年代。1961年，IBM 的 John Larry Kelly，以及 Louis Gerstman 使用 IBM 704 计算机合成语音，成为贝尔实验室最著名的成就之一。1975年，第一代语音合成系统之一 —— MUSA（MUltichannel Speaking Automation）问世，其由一个独立的硬件和配套的软件组成。1978年发行的第二个版本也可以进行无伴奏演唱。90 年代的主流是采用 MIT 和贝尔实验室的系统，并结合自然语言处理模型。\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/55035de353b042cd8c4468819b2d36e2fcc89bffdf2b442fa4c7b0b5499e1592\"></center>\n",
    "\n",
    "### 主流方法\n",
    "\n",
    "当前的主流方法分为**基于统计参数的语音合成**、**波形拼接语音合成**、**混合方法**以及**端到端神经网络语音合成**。基于参数的语音合成包含隐马尔可夫模型（Hidden Markov Model,HMM）以及深度学习网络（Deep Neural Network，DNN）。端到端的方法保函声学模型+声码器以及“完全”端到端方法。\n",
    "\n",
    "\n",
    "## 基于深度学习的语音合成技术\n",
    "\n",
    "### 语音合成基本知识\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/10859679d74745ab82fb6f5c9984a95152c25b0e3dce4515b120c8997a6752d8\"></center>\n",
    "<br></br>\n",
    "\n",
    "语音合成流水线包含 <font color=\"#ff0000\">**文本前端（Text Frontend）**</font> 、<font color=\"#ff0000\">**声学模型（Acoustic Model）**</font> 和 <font color=\"#ff0000\">**声码器（Vocoder）**</font> 三个主要模块:\n",
    "- 通过文本前端模块将原始文本转换为字符/音素。\n",
    "- 通过声学模型将字符/音素转换为声学特征，如线性频谱图、mel 频谱图、LPC 特征等。\n",
    "- 通过声码器将声学特征转换为波形。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 实践"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 安装 paddlespeech"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install -U pip paddlepaddle-gpu && pip install paddlespeech"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "环境安装请参考 [Installation](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md) 教程。 \n",
    "下面使用 **PaddleSpeech** 提供的预训练模型合成中文语音。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据及模型准备"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 获取PaddlePaddle预训练模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip\n",
    "!unzip -d download download/pwg_baker_ckpt_0.4.zip\n",
    "!wget -P download https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip\n",
    "!unzip -d download download/fastspeech2_nosil_baker_ckpt_0.4.zip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!tree download/pwg_baker_ckpt_0.4\n",
    "!tree download/fastspeech2_nosil_baker_ckpt_0.4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 导入 Python 包"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 本项目的依赖需要用到 nltk 包，但是有时会因为网络原因导致不好下载，此处手动下载一下放到百度服务器的包\n",
    "!wget https://paddlespeech.cdn.bcebos.com/Parakeet/tools/nltk_data.tar.gz\n",
    "!tar zxvf nltk_data.tar.gz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 设置 gpu 环境\n",
    "%env CUDA_VISIBLE_DEVICES=0\n",
    "\n",
    "import logging\n",
    "import sys\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import argparse\n",
    "import os\n",
    "from pathlib import Path\n",
    "import IPython.display as dp\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import paddle\n",
    "import soundfile as sf\n",
    "import yaml\n",
    "from paddlespeech.t2s.frontend.zh_frontend import Frontend\n",
    "from paddlespeech.t2s.models.fastspeech2 import FastSpeech2\n",
    "from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Inference\n",
    "from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator\n",
    "from paddlespeech.t2s.models.parallel_wavegan import PWGInference\n",
    "from paddlespeech.t2s.modules.normalizer import ZScore\n",
    "from yacs.config import CfgNode"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 设置预训练模型的路径"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fastspeech2_config = \"download/fastspeech2_nosil_baker_ckpt_0.4/default.yaml\"\n",
    "fastspeech2_checkpoint = \"download/fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz\"\n",
    "fastspeech2_stat = \"download/fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy\"\n",
    "pwg_config = \"download/pwg_baker_ckpt_0.4/pwg_default.yaml\"\n",
    "pwg_checkpoint = \"download/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz\"\n",
    "pwg_stat = \"download/pwg_baker_ckpt_0.4/pwg_stats.npy\"\n",
    "phones_dict = \"download/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt\"\n",
    "# 读取 conf 配置文件并结构化\n",
    "with open(fastspeech2_config) as f:\n",
    "    fastspeech2_config = CfgNode(yaml.safe_load(f))\n",
    "with open(pwg_config) as f:\n",
    "    pwg_config = CfgNode(yaml.safe_load(f))\n",
    "print(\"========Config========\")\n",
    "print(fastspeech2_config)\n",
    "print(\"---------------------\")\n",
    "print(pwg_config)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 文本前端（Text Frontend）\n",
    "\n",
    "一个文本前端模块主要包含:\n",
    "- 分段（Text Segmentation）\n",
    "- 文本正则化（Text Normalization, TN）\n",
    "- 分词（Word Segmentation, 主要是在中文中）\n",
    "- 词性标注（Part-of-Speech, PoS）\n",
    "- 韵律预测（Prosody）\n",
    "- 字音转换（Grapheme-to-Phoneme，G2P）\n",
    "<font size=2>（Grapheme: **语言**书写系统的最小有意义单位; Phoneme: 区分单词的最小**语音**单位）</font>\n",
    "    - 多音字（Polyphone）\n",
    "    - 变调（Tone Sandhi）\n",
    "        - “一”、“不”变\n",
    "        - 三声变调\n",
    "        - 轻声变调\n",
    "        - 儿化音\n",
    "        - 方言\n",
    "- ...\n",
    "\n",
    "（输入给声学模型之前，还需要把音素序列转换为 id）\n",
    "\n",
    "\n",
    "其中最重要的模块是<font color=\"#ff0000\"> 文本正则化 </font>模块和<font color=\"#ff0000\"> 字音转换（TTS 中更常用 G2P 代指） </font>模块。\n",
    "\n",
    "\n",
    "各模块输出示例:\n",
    "```text\n",
    "• Text: 全国一共有112所211高校\n",
    "• Text Normalization: 全国一共有一百一十二所二一一高校\n",
    "• Word Segmentation: 全国/一共/有/一百一十二/所/二一一/高校/\n",
    "• G2P（注意此句中“一”的读音）:\n",
    "    quan2 guo2 yi2 gong4 you3 yi4 bai3 yi1 shi2 er4 suo3 er4 yao1 yao1 gao1 xiao4\n",
    "    （可以进一步把声母和韵母分开）\n",
    "    q uan2 g uo2 y i2 g ong4 y ou3 y i4 b ai3 y i1 sh i2 er4 s uo3 er4 y ao1 y ao1 g ao1 x iao4\n",
    "    （把音调和声韵母分开）\n",
    "    q uan g uo y i g ong y ou y i b ai y i sh i er s uo er y ao y ao g ao x iao\n",
    "    0 2 0 2 0 2 0 4 0 3 ...\n",
    "• Prosody (prosodic words #1, prosodic phrases #2, intonation phrases #3, sentence #4):\n",
    "    全国#2一共有#2一百#1一十二所#2二一一#1高校#4\n",
    "    （分词的结果一般是固定的，但是不同人习惯不同，可能有不同的韵律）\n",
    "```\n",
    "\n",
    "文本前端模块的设计需要结合很多专业的语义学知识和经验。人类在读文本的时候可以自然而然地读出正确的发音，但是这些先验知识计算机并不知晓。\n",
    "例如，对于一个句子的分词：\n",
    "\n",
    "```text\n",
    "我也想过过过儿过过的生活\n",
    "我也想/过过/过儿/过过的/生活\n",
    "\n",
    "货拉拉拉不拉拉布拉多\n",
    "货拉拉/拉不拉/拉布拉多\n",
    "\n",
    "南京市长江大桥\n",
    "南京市长/江大桥\n",
    "南京市/长江大桥\n",
    "```\n",
    "或者是词的变调和儿化音：\n",
    "```\n",
    "你要不要和我们一起出去玩？\n",
    "你要不（2声）要和我们一（4声）起出去玩（儿）？\n",
    "\n",
    "不好，我要一个人出去。\n",
    "不（4声）好，我要一（2声）个人出去。\n",
    "\n",
    "（以下每个词的所有字都是三声的，请你读一读，体会一下在读的时候，是否每个字都被读成了三声？）\n",
    "纸老虎、虎骨酒、展览馆、岂有此理、手表厂有五种好产品\n",
    "```\n",
    "又或是多音字，这类情况通常需要先正确分词：\n",
    "```text\n",
    "人要行，干一行行一行，一行行行行行;\n",
    "人要是不行，干一行不行一行，一行不行行行不行。\n",
    "\n",
    "佟大为妻子产下一女\n",
    "\n",
    "海水朝朝朝朝朝朝朝落\n",
    "浮云长长长长长长长消\n",
    "```\n",
    "\n",
    "PaddleSpeech Text-to-Speech的文本前端解决方案:\n",
    "- [文本正则](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/tn)\n",
    "- [G2P](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/g2p)\n",
    "    - 多音字模块: pypinyin/g2pM\n",
    "    - 变调模块: 用分词 + 规则"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 构造文本前端对象\n",
    "传入`phones_dict`，把相应的`phones`转换成`phone_ids`。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 传入 phones_dict 会把相应的 phones 转换成 phone_ids\n",
    "frontend = Frontend(phone_vocab_path=phones_dict)\n",
    "print(\"Frontend done!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 调用文本前端\n",
    "\n",
    "文本前端对输入数据进行正则化时会进行分句，若`merge_sentences`设置为`False`，则所有分句的 `phone_ids` 构成一个 `List`；若设置为`True`，`input_ids[\"phone_ids\"][0]`则表示整句的`phone_ids`。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# input = \"我每天中午12:00起床\"\n",
    "# input = \"我出生于2005/11/08，那天的最低气温达到-10°C\"\n",
    "input = \"你好，欢迎使用百度飞桨框架进行深度学习研究！\"\n",
    "input_ids = frontend.get_input_ids(input, merge_sentences=True, print_info=True)\n",
    "phone_ids = input_ids[\"phone_ids\"][0]\n",
    "print(\"phone_ids:%s\"%phone_ids)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 用深度学习实现文本前端\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/85a5cd8aef1e444cbb980a2f1f184316247bbb7870a34925a77b799802df8ef0\"></center>\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 声学模型（Acoustic Model）\n",
    "\n",
    "声学模型将字符/音素转换为声学特征，如线性频谱图、mel 频谱图、LPC 特征等，声学特征以 “帧” 为单位，一般一帧是 10ms 左右，一个音素一般对应 5~20 帧左右, 声学模型需要解决的是 <font color=\"#ff0000\">“不等长序列间的映射问题”</font>，“不等长”是指，同一个人发不同音素的持续时间不同，同一个人在不同时刻说同一句话的语速可能不同，对应各个音素的持续时间不同，不同人说话的特色不同，对应各个音素的持续时间不同。这是一个困难的“一对多”问题。\n",
    "```\n",
    "# 卡尔普陪外孙玩滑梯\n",
    "000001|baker_corpus|sil 20 k 12 a2 4 er2 10 p 12 u3 12 p 9 ei2 9 uai4 15 s 11 uen1 12 uan2 14 h 10 ua2 11 t 15 i1 16 sil 20\n",
    "```\n",
    "\n",
    "声学模型主要分为自回归模型和非自回归模型，其中自回归模型在 `t` 时刻的预测需要依赖 `t-1` 时刻的输出作为输入，预测时间长，但是音质相对较好，非自回归模型不存在预测上的依赖关系，预测时间快，音质相对较差。\n",
    "\n",
    "主流声学模型发展的脉络:\n",
    "- 自回归模型:\n",
    "    - Tacotron\n",
    "    - Tacotron2\n",
    "    - Transformer TTS\n",
    "- 非自回归模型:\n",
    "    - FastSpeech\n",
    "    - SpeedySpeech\n",
    "    - FastPitch\n",
    "    - FastSpeech2\n",
    "    - ...\n",
    " \n",
    "在本教程中，我们使用 `FastSpeech2` 作为声学模型。\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/6b6d671713ec4d20a0e60653c7a5d4ae3c35b1d1e58b4cc39e0bc82ad4a341d9\"></center>\n",
    "<br><center> FastSpeech2 网络结构图</center></br>\n",
    "\n",
    "\n",
    "PaddleSpeech TTS 实现的 FastSpeech2 与论文不同的地方在于，我们使用的是 phone 级别的 `pitch` 和 `energy`(与 FastPitch 类似)，这样的合成结果可以更加**稳定**。\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/862c21456c784c41a83a308b7d9707f0810cc3b3c6f94ed48c60f5d32d0072f0\"></center>\n",
    "<br><center> FastPitch 网络结构图</center></br>\n",
    "\n",
    "更多关于[语音合成模型的发展及改进](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/tts/models_introduction.md)。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 初始化声学模型 FastSpeech2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(phones_dict, \"r\") as f:\n",
    "    phn_id = [line.strip().split() for line in f.readlines()]\n",
    "vocab_size = len(phn_id)\n",
    "print(\"vocab_size:\", vocab_size)\n",
    "odim = fastspeech2_config.n_mels\n",
    "model = FastSpeech2(\n",
    "    idim=vocab_size, odim=odim, **fastspeech2_config[\"model\"])\n",
    "# 加载预训练模型参数\n",
    "model.set_state_dict(paddle.load(fastspeech2_checkpoint)[\"main_params\"])\n",
    "# 推理阶段不启用 batch norm 和 dropout\n",
    "model.eval()\n",
    "stat = np.load(fastspeech2_stat)\n",
    "# 读取数据预处理阶段数据集的均值和标准差\n",
    "mu, std = stat\n",
    "mu, std = paddle.to_tensor(mu), paddle.to_tensor(std)\n",
    "# 构造归一化的新模型\n",
    "fastspeech2_normalizer = ZScore(mu, std)\n",
    "fastspeech2_inference = FastSpeech2Inference(fastspeech2_normalizer, model)\n",
    "fastspeech2_inference.eval()\n",
    "print(\"FastSpeech2 done!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 调用声学模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with paddle.no_grad():\n",
    "    mel = fastspeech2_inference(phone_ids)\n",
    "print(\"shepe of mel (n_frames x n_mels):\")\n",
    "print(mel.shape)\n",
    "# 绘制声学模型输出的 mel 频谱\n",
    "fig, ax = plt.subplots(figsize=(16, 6))\n",
    "im = ax.imshow(mel.T, aspect='auto',origin='lower')\n",
    "plt.title('Mel Spectrogram')\n",
    "plt.xlabel('Time')\n",
    "plt.ylabel('Frequency')\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 声码器（Vocoder）\n",
    "声码器将声学特征转换为波形。声码器需要解决的是 <font color=\"#ff0000\">“信息缺失的补全问题”</font>。信息缺失是指，在音频波形转换为频谱图的时候，存在**相位信息**的缺失，在频谱图转换为 mel 频谱图的时候，存在**频域压缩**导致的信息缺失；假设音频的采样率是16kHZ, 一帧的音频有 10ms，也就是说，1s 的音频有 16000 个采样点，而 1s 中包含 100 帧，每一帧有 160 个采样点，声码器的作用就是将一个频谱帧变成音频波形的 160 个采样点，所以声码器中一般会包含**上采样**模块。\n",
    "\n",
    "与声学模型类似，声码器也分为自回归模型和非自回归模型, 更细致的分类如下:\n",
    "\n",
    "- Autoregression\n",
    "    - WaveNet\n",
    "    - WaveRNN\n",
    "    - LPCNet\n",
    "- Flow\n",
    "    - <font color=\"#ff0000\">WaveFlow</font>\n",
    "    - WaveGlow\n",
    "    - FloWaveNet\n",
    "    - Parallel WaveNet\n",
    "- GAN\n",
    "\t- WaveGAN\n",
    "    - <font color=\"#ff0000\">Parallel WaveGAN</font>\n",
    "    - <font color=\"#ff0000\">MelGAN</font>\n",
    "    - <font color=\"#ff0000\">Style MelGAN</font>\n",
    "    - <font color=\"#ff0000\">Multi Band MelGAN</font>\n",
    "    - <font color=\"#ff0000\">HiFi GAN</font>\n",
    "- VAE\n",
    "    - Wave-VAE\n",
    "- Diffusion\n",
    "    - WaveGrad\n",
    "    - DiffWave\n",
    "\n",
    "PaddleSpeech TTS 主要实现了百度的 `WaveFlow` 和一些主流的 GAN Vocoder, 在本教程中，我们使用 `Parallel WaveGAN` 作为声码器。\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/9eafa4e5642d45309e6e8883bff46380407b3858d0934bf5896868281316ce94\" width=\"700\"></center>\n",
    "<br><center>图1：Parallel WaveGAN 网络结构图</center></br>\n",
    "\n",
    "各 GAN Vocoder 的生成器和判别器的 Loss 的区别如下表格所示:\n",
    "\n",
    "Model  | Generator Loss |Discriminator Loss\n",
    ":-------------:| :------------:| :-----\n",
    "Mel GAN| adversial loss <br> Feature Matching  | Multi-Scale Discriminator |\n",
    "Parallel Wave GAN|adversial loss <br> Multi-resolution STFT loss  | adversial loss|\n",
    "Multi-Band Mel GAN | adversial loss <br> full band Multi-resolution STFT loss <br> sub band Multi-resolution STFT loss |Multi-Scale Discriminator|\n",
    "HiFi GAN |adversial loss <br> Feature Matching <br>  Mel-Spectrogram Loss | Multi-Scale Discriminator <br> Multi-Period Discriminator| \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 初始化声码器 Parallel WaveGAN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "vocoder = PWGGenerator(**pwg_config[\"generator_params\"])\n",
    "# 模型加载预训练参数\n",
    "vocoder.set_state_dict(paddle.load(pwg_checkpoint)[\"generator_params\"])\n",
    "vocoder.remove_weight_norm()\n",
    "# 推理阶段不启用 batch norm 和 dropout\n",
    "vocoder.eval()\n",
    "# 读取数据预处理阶段数据集的均值和标准差\n",
    "stat = np.load(pwg_stat)\n",
    "mu, std = stat\n",
    "mu, std = paddle.to_tensor(mu), paddle.to_tensor(std)\n",
    "pwg_normalizer = ZScore(mu, std)\n",
    "# 构建归一化的模型\n",
    "pwg_inference = PWGInference(pwg_normalizer, vocoder)\n",
    "pwg_inference.eval()\n",
    "print(\"Parallel WaveGAN done!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 调用声码器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with paddle.no_grad():\n",
    "    wav = pwg_inference(mel)\n",
    "print(\"shepe of wav (time x n_channels):%s\"%wav.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 绘制声码器输出的波形图\n",
    "wave_data = wav.numpy().T\n",
    "time = np.arange(0, wave_data.shape[1]) * (1.0 / fastspeech2_config.fs)\n",
    "fig, ax = plt.subplots(figsize=(16, 6))\n",
    "plt.plot(time, wave_data[0])\n",
    "plt.title('Waveform')\n",
    "plt.xlabel('Time (seconds)')\n",
    "plt.ylabel('Amplitude (normed)')\n",
    "plt.tight_layout()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 播放音频"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dp.Audio(wav.numpy().T, rate=fastspeech2_config.fs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 保存音频"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!mkdir output\n",
    "sf.write(\n",
    "    \"output/output.wav\",\n",
    "    wav.numpy(),\n",
    "    samplerate=fastspeech2_config.fs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 进阶 —— 个性化调节\n",
    "FastSpeech2 模型可以个性化地调节音素时长、音调和能量，通过一些简单的调节就可以获得一些有意思的效果。\n",
    "\n",
    "例如对于以下的原始音频`\"凯莫瑞安联合体的经济崩溃，迫在眉睫\"`。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 原始音频\n",
    "dp.display(dp.Audio(url=\"https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1_001.wav\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# speed x 1.2\n",
    "dp.display(dp.Audio(url=\"https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x1.2_001.wav\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# speed x 0.8\n",
    "dp.display(dp.Audio(url=\"https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/speed/x0.8_001.wav\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pitch x 1.3(童声)\n",
    "dp.display(dp.Audio(url=\"https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/child_voice/001.wav\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# robot\n",
    "dp.display(dp.Audio(url=\"https://paddlespeech.cdn.bcebos.com/Parakeet/docs/demos/robot/001.wav\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "具体实现代码请参考 [Style FastSpeech2](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/demos/style_fs2)。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 用 PaddleSpeech 训练 TTS 模型\n",
    "PaddleSpeech 的 examples 是按照 数据集/模型 的结构安排的:\n",
    "```text\n",
    "examples   \n",
    "├── aishell3\n",
    "│   ├── README.md\n",
    "│   ├── tts3\n",
    "│   └── vc0\n",
    "├── csmsc\n",
    "│   ├── README.md\n",
    "│   ├── tts2\n",
    "│   ├── tts3\n",
    "│   ├── voc1\n",
    "│   └── voc3\n",
    "├── ...\n",
    "└── ...\n",
    "```\n",
    "我们在每个数据集的 README.md 介绍了子目录和模型的对应关系, 在 TTS 中有如下对应关系:\n",
    "```text\n",
    "tts0 - Tacotron2\n",
    "tts1 - TransformerTTS\n",
    "tts2 - SpeedySpeech\n",
    "tts3 - FastSpeech2\n",
    "voc0 - WaveFlow\n",
    "voc1 - Parallel WaveGAN\n",
    "voc2 - MelGAN\n",
    "voc3 - MultiBand MelGAN\n",
    "```\n",
    "### 基于 CSMCS 数据集训练 FastSpeech2 模型\n",
    "```bash\n",
    "git clone https://github.com/PaddlePaddle/PaddleSpeech.git\n",
    "cd examples/csmsc/tts3\n",
    "```\n",
    "根据 README.md, 下载 CSMCS 数据集和其对应的强制对齐文件, 并放置在对应的位置\n",
    "```bash\n",
    "./run.sh\n",
    "```\n",
    "`run.sh` 中包含预处理、训练、合成、静态图推理等步骤:\n",
    "\n",
    "```bash\n",
    "#!/bin/bash\n",
    "set -e\n",
    "source path.sh\n",
    "gpus=0,1\n",
    "stage=0\n",
    "stop_stage=100\n",
    "conf_path=conf/default.yaml\n",
    "train_output_path=exp/default\n",
    "ckpt_name=snapshot_iter_153.pdz\n",
    "\n",
    "# with the following command, you can choice the stage range you want to run\n",
    "# such as `./run.sh --stage 0 --stop-stage 0`\n",
    "# this can not be mixed use with `$1`, `$2` ...\n",
    "source ${MAIN_ROOT}/utils/parse_options.sh || exit 1\n",
    "\n",
    "if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then\n",
    "    # prepare data\n",
    "    bash ./local/preprocess.sh ${conf_path} || exit -1\n",
    "fi\n",
    "if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then\n",
    "    # train model, all `ckpt` under `train_output_path/checkpoints/` dir\n",
    "    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1\n",
    "fi\n",
    "if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then\n",
    "    # synthesize, vocoder is pwgan\n",
    "    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1\n",
    "fi\n",
    "if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then\n",
    "    # synthesize_e2e, vocoder is pwgan\n",
    "    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1\n",
    "fi\n",
    "if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then\n",
    "    # inference with static model\n",
    "    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1\n",
    "fi\n",
    "```\n",
    "\n",
    "### 基于 CSMCS 数据集训练 Parallel WaveGAN 模型\n",
    "```bash\n",
    "git clone https://github.com/PaddlePaddle/PaddleSpeech.git\n",
    "cd examples/csmsc/voc1\n",
    "```\n",
    "根据 README.md, 下载 CSMCS 数据集和其对应的强制对齐文件, 并放置在对应的位置\n",
    "```bash\n",
    "./run.sh\n",
    "```\n",
    "`run.sh` 中包含预处理、训练、合成等步骤:\n",
    "```bash\n",
    "#!/bin/bash\n",
    "set -e\n",
    "source path.sh\n",
    "gpus=0,1\n",
    "stage=0\n",
    "stop_stage=100\n",
    "conf_path=conf/default.yaml\n",
    "train_output_path=exp/default\n",
    "ckpt_name=snapshot_iter_5000.pdz\n",
    "\n",
    "# with the following command, you can choice the stage range you want to run\n",
    "# such as `./run.sh --stage 0 --stop-stage 0`\n",
    "# this can not be mixed use with `$1`, `$2` ...\n",
    "source ${MAIN_ROOT}/utils/parse_options.sh || exit 1\n",
    "\n",
    "if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then\n",
    "    # prepare data\n",
    "    ./local/preprocess.sh ${conf_path} || exit -1\n",
    "fi\n",
    "if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then\n",
    "    # train model, all `ckpt` under `train_output_path/checkpoints/` dir\n",
    "    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1\n",
    "fi\n",
    "if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then\n",
    "    # synthesize\n",
    "    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1\n",
    "fi\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# FAQ\n",
    "\n",
    "- 需要注意的问题\n",
    "- 经验与分享\n",
    "- 用户的其他问题"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 作业\n",
    "在 CSMSC 数据集上利用 FastSpeech2 和 Parallel WaveGAN 实现一个中文 TTS 系统。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 关注 PaddleSpeech\n",
    "请关注我们的 [Github Repo](https://github.com/PaddlePaddle/PaddleSpeech/)，非常欢迎加入以下微信群参与讨论：\n",
    "- 扫描二维码\n",
    "- 添加运营小姐姐微信\n",
    "- 通过后回复【语音】\n",
    "- 系统自动邀请加入技术群\n",
    "\n",
    "<center><img src=\"https://ai-studio-static-online.cdn.bcebos.com/bca0bc75dce14b53af44e374e64fc91aeeb13c075c894d6aabed033148f65377\" ></center>\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}


================================================
FILE: examples/aishell/.gitignore
================================================
data
ckpt*
demo_cache
*log
*profile

================================================
FILE: examples/aishell/README.md
================================================
# ASR

* asr0 - deepspeech2 Streaming/Non-Streaming
* asr1 - transformer/conformer Streaming/Non-Streaming
* ~~asr2 - transformer/conformer Streaming/Non-Streaming with Kaldi feature~~
* asr3 - wav2vec2 Non-Streaming


## Data

| Data Subset         | Duration in Seconds   |
| ------------------- | --------------------- |
| data/manifest.train | 1.23 ~ 14.53125       |
| data/manifest.dev   | 1.645 ~ 12.533        |
| data/manifest.test  | 1.859125 ~ 14.6999375 |


================================================
FILE: examples/aishell/asr0/.gitignore
================================================
exp
data
*log


================================================
FILE: examples/aishell/asr0/README.md
================================================
# DeepSpeech2 offline/online ASR with Aishell
This example contains code used to train a DeepSpeech2 offline or online model with [Aishell dataset](http://www.openslr.org/resources/33)

## Overview
All the scripts you need are in the `run.sh`. There are several stages in the `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Export the static graph model                                |
| 5     | Test the static graph model                                  |
| 6     | Infer the single audio file                                  |

You can choose to run a range of stages by setting the `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in the `run.sh` in detail.
## The environment variables
The path.sh contains the environment variable. 
```bash
source path.sh
```
This script needs to be run first.  

And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.

## The local variables
Some local variables are set in the `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`,  it means you only use CPU. 

`stage` denotes the number of the stage you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`model_type` denotes the model type: offline or online
`audio file` denotes the file path of the single file you want to infer in stage 6
`ckpt` denotes the checkpoint prefix of the model, e.g. "deepspeech2"

You can set the local variables (except `ckpt`)  when you use the `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line.:
```bash
bash run.sh --gpus 0,1 --avg_num 1
```
## Stage 0: Data processing
To use this example, you need to process data firstly and you can use stage 0 in the `run.sh` to do this. The code is shown below:

```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.
If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
source path.sh
bash ./local/data.sh
```
After processing the data, the `data` directory will look like this:
```bash
data/
|-- dev.meta
|-- lang_char
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
`-- train.meta
```
## Stage 1: Model training
If you want to train the model. you can use stage 1 in the `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
Or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/deepspeech2.yaml deepspeech2
```
If you want to use GPU, you can run these scripts in the command line (suppose you have only 1 GPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES=0 ./local/train.sh conf/deepspeech2.yaml deepspeech2
```

## Stage 2:  Top-k Models Averaging
After training the model,  we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model.  We can use stage 2 to do this, and the code is shown below:
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model,  you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).

```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/deepspeech2.yaml deepspeech2
avg.sh best exp/deepspeech2/checkpoints 1
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of the test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it,  you can use the script below to execute stage 0, stage 1,  stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/deepspeech2.yaml deepspeech2
avg.sh best exp/deepspeech2/checkpoints 1
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/deepspeech2.yaml conf/tuning/decode.yaml exp/deepspeech2/checkpoints/avg_10
```
## Pretrained Model
You can get the pretrained models from [this](../../../docs/source/released_model.md).

using the `tar` scripts to unpack the model and then you can use the script to test the model.

For example:
```
wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz
tar xzvf asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz
source path.sh
# If you have process the data and get the manifest file， you can skip the following 2 steps
bash local/data.sh --stage -1 --stop_stage -1
bash local/data.sh --stage 2  --stop_stage 2

CUDA_VISIBLE_DEVICES= ./local/test.sh conf/deepspeech2.yaml exp/deepspeech2/checkpoints/avg_10
```
The performance of the released models are shown in [this](./RESULTS.md)
## Stage 4: Static graph model Export
This stage is to transform dygraph to static graph.
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # export ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}
 fi
```
If you already have a dynamic graph model, you can run this script:
```bash
source path.sh
./local/export.sh conf/deepspeech2.yaml exp/deepspeech2/checkpoints/avg_10 exp/deepspeech2/checkpoints/avg_10.jit
```
## Stage 5: Static graph Model Testing
Similar to stage 3, the static graph model can also be tested.
```bash
 if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
     # test export ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test_export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}|| exit -1
 fi
```
If you already have exported the static graph, you can run this script:
```bash
CUDA_VISIBLE_DEVICES= ./local/test_export.sh conf/deepspeech2.yaml conf/tuning/decode.yaml exp/deepspeech2/checkpoints/avg_10.jit
```
## Stage 6: Single Audio File Inference
In some situations, you want to use the trained model to do the inference for the single audio file. You can use stage  5. The code is shown below
```bash
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
     # test a single .wav file
     CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} ${audio_file}
 fi
```
you can train the model by yourself, or you can download the pretrained model by the script below:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz
tar xzvf asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz
```
You can download the audio demo:
```bash
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
```
You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
```bash
CUDA_VISIBLE_DEVICES= ./local/test_wav.sh conf/deepspeech2.yaml conf/tuning/decode.yaml exp/deepspeech2/checkpoints/avg_10 data/demo_01_03.wav
```


================================================
FILE: examples/aishell/asr0/RESULTS.md
================================================
# Aishell-1

## Deepspeech2 Streaming

| Model | Number of Params | Release | Config | Test set | Valid Loss | CER | 
| --- | --- | --- | --- | --- | --- | --- | 
| DeepSpeech2 | 45.18M | r0.2.0 | conf/deepspeech2_online.yaml + U2 Data pipline and spec aug + fbank161 | test | 6.876979827880859 | 0.0666 |
| DeepSpeech2 | 45.18M | r0.2.0 | conf/deepspeech2_online.yaml + spec aug + fbank161 | test | 7.679287910461426 | 0.0718 |
| DeepSpeech2 | 45.18M | r0.2.0 | conf/deepspeech2_online.yaml + spec aug | test | 7.708217620849609| 0.078 |
| DeepSpeech2 | 45.18M | v2.2.0 | conf/deepspeech2_online.yaml + spec aug | test | 7.994938373565674 | 0.080 |  

## Deepspeech2 Non-Streaming

| Model | Number of Params | Release | Config | Test set | Valid Loss | CER |  
| --- | --- | --- | --- | --- | --- | --- |
| DeepSpeech2 | 122.3M | r1.0.1 | conf/deepspeech2.yaml + U2 Data pipline and spec aug + fbank161 | test | 5.780756044387817 | 0.055400 | 
| DeepSpeech2 | 58.4M | v2.2.0 | conf/deepspeech2.yaml + spec aug | test | 5.738585948944092 | 0.064000 |  
| DeepSpeech2 | 58.4M | v2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 |  
| DeepSpeech2 | 58.4M | v2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 |
| DeepSpeech2 | 58.4M | v2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 |  
| --- | --- | --- | --- | --- | --- | --- |  
| DeepSpeech2 | 58.4M | v1.8.5 | - | test | - | 0.080447 |


================================================
FILE: examples/aishell/asr0/conf/deepspeech2.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test
min_input_len: 0.0
max_input_len: 27.0 # second
min_output_len: 0.0
max_output_len: .inf
min_output_input_ratio: 0.00
max_output_input_ratio: .inf

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 161
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 64
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 8
subsampling_factor: 1
num_encs: 1

############################################
#           Network Architecture           #
############################################
num_conv_layers: 2
num_rnn_layers: 5
rnn_layer_size: 1024
rnn_direction: bidirect # [forward, bidirect]
num_fc_layers: 0
fc_layers_size_list: -1,
use_gru: False 
blank_id: 0
  
  
###########################################
#                Training                 #
###########################################
n_epoch: 50
accum_grad: 1
lr: 5.0e-4
lr_decay: 0.93
weight_decay: 1.0e-6
global_grad_clip: 3.0
dist_sampler: False
log_interval: 1
checkpoint:
  kbest_n: 50
  latest_n: 5

  
================================================
FILE: examples/aishell/asr0/conf/deepspeech2_online.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test
min_input_len: 0.0
max_input_len: 27.0 # second
min_output_len: 0.0
max_output_len: .inf
min_output_input_ratio: 0.00
max_output_input_ratio: .inf

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 161
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 64
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 8
subsampling_factor: 1
num_encs: 1

############################################
#           Network Architecture           #
############################################
num_conv_layers: 2
num_rnn_layers: 5
rnn_layer_size: 1024
rnn_direction: forward # [forward, bidirect]
num_fc_layers: 0
fc_layers_size_list: -1,
use_gru: False 
blank_id: 0
  
  
###########################################
#                Training                 #
###########################################
n_epoch: 30
accum_grad: 1
lr: 5.0e-4
lr_decay: 0.93
weight_decay: 1.0e-6
global_grad_clip: 3.0
dist_sampler: False
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5

  
================================================
FILE: examples/aishell/asr0/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 16000
    n_mels: 161
    n_shift: 160
    win_length: 400
    dither: 0.1
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/aishell/asr0/conf/tuning/chunk_decode.yaml
================================================
decode_batch_size: 32
error_rate_type: cer 
decoding_method: ctc_beam_search
lang_model_path: data/lm/zh_giga.no_cna_cmn.prune01244.klm
alpha: 2.2 #1.9
beta: 4.3
beam_size: 500
cutoff_prob: 0.99
cutoff_top_n: 40
num_proc_bsearch: 10


================================================
FILE: examples/aishell/asr0/conf/tuning/decode.yaml
================================================
decode_batch_size: 128
error_rate_type: cer 
decoding_method: ctc_beam_search
lang_model_path: data/lm/zh_giga.no_cna_cmn.prune01244.klm
alpha: 2.2
beta: 4.3
beam_size: 500
cutoff_prob: 0.99
cutoff_top_n: 40
num_proc_bsearch: 10


================================================
FILE: examples/aishell/asr0/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100

dict_dir=data/lang_char

. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;

mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/aishell/aishell.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/aishell"

    if [ $? -ne 0 ]; then
        echo "Prepare Aishell failed. Terminated."
        exit 1
    fi

    for dataset in train dev test; do
        mv data/manifest.${dataset} data/manifest.${dataset}.raw
    done
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --spectrum_type="fbank" \
    --feat_dim=161 \
    --delta_delta=false \
    --stride_ms=10 \
    --window_ms=25 \
    --sample_rate=16000 \
    --use_dB_normalization=False \
    --num_samples=2000 \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # download data, generate manifests
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type="char" \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths "data/manifest.train.raw" "data/manifest.dev.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for dataset in train dev test; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
                --cmvn_path "data/mean_std.json" \
                --unit_type "char" \
                --vocab_path="${dict_dir}/vocab.txt" \
                --manifest_path="data/manifest.${dataset}.raw" \
                --output_path="data/manifest.${dataset}"

        if [ $? -ne 0 ]; then
                echo "Formt mnaifest failed. Terminated."
                exit 1
        fi
    } &
    done
    wait
fi

echo "Aishell data preparation done."
exit 0


================================================
FILE: examples/aishell/asr0/local/download_lm_ch.sh
================================================
#!/bin/bash

. ${MAIN_ROOT}/utils/utility.sh

DIR=data/lm
mkdir -p ${DIR}

URL='https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
MD5="29e02312deb2e59b3c8686c7966d4fe3"
TARGET=${DIR}/zh_giga.no_cna_cmn.prune01244.klm

echo "Start downloading the language model. The language model is large, please wait for a moment ..."
download $URL $MD5 $TARGET > /dev/null 2>&1
if [ $? -ne 0 ]; then
    echo "Fail to download the language model!"
    exit 1
else
    echo "Download the language model sucessfully"
fi


exit 0


================================================
FILE: examples/aishell/asr0/local/export.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: $0 config_path ckpt_prefix jit_model_path"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3

python3 -u ${BIN_DIR}/export.py \
--ngpu ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}

if [ $? -ne 0 ]; then
    echo "Failed in export!"
    exit 1
fi


exit 0


================================================
FILE: examples/aishell/asr0/local/test.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

stage=0
stop_stage=100
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

# download language model
bash local/download_lm_ch.sh
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # format the reference test file
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_ref data/manifest.test.raw \
        --trans_ref data/manifest.test.text

    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${ckpt_prefix}.rsl \
    --checkpoint_path ${ckpt_prefix}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi

    # format the hyp file
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.rsl \
        --trans_hyp ${ckpt_prefix}.rsl.text

    python3 ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
        data/manifest.test.text ${ckpt_prefix}.rsl.text > ${ckpt_prefix}.error
fi

if [ ${stage} -le 101 ] && [ ${stop_stage} -ge 101 ]; then
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_ref data/manifest.test.raw \
        --trans_ref_sclite data/manifest.test.text.sclite

    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.rsl \
        --trans_hyp_sclite ${ckpt_prefix}.rsl.text.sclite

    mkdir -p ${ckpt_prefix}_sclite
    sclite -i wsj -r data/manifest.test.text.sclite -h  ${ckpt_prefix}.rsl.text.sclite  -e utf-8 -o all -O ${ckpt_prefix}_sclite -c NOASCII
fi

exit 0


================================================
FILE: examples/aishell/asr0/local/test_export.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
jit_model_export_path=$3

# download language model
bash local/download_lm_ch.sh > /dev/null 2>&1
if [ $? -ne 0 ]; then
   exit 1
fi

python3 -u ${BIN_DIR}/test_export.py \
--ngpu ${ngpu} \
--config ${config_path} \
--decode_cfg ${decode_config_path} \
--result_file ${jit_model_export_path}.rsl \
--export_path ${jit_model_export_path}

if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
    exit 1
fi


exit 0


================================================
FILE: examples/aishell/asr0/local/test_wav.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_file=$4

mkdir -p data
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_file} ]; then
    echo "Plase input the right audio_file path"
    exit 1
fi

# download language model
bash local/download_lm_ch.sh
if [ $? -ne 0 ]; then
   exit 1
fi

python3 -u ${BIN_DIR}/test_wav.py \
--ngpu ${ngpu} \
--config ${config_path} \
--decode_cfg ${decode_config_path} \
--result_file ${ckpt_prefix}.rsl \
--checkpoint_path ${ckpt_prefix} \
--audio_file ${audio_file}

if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
    exit 1
fi


exit 0


================================================
FILE: examples/aishell/asr0/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 2 ] || [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# seed may break model convergence
seed=10086
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

# default memory allocator strategy may case gpu training hang
# for no OOM raised when memory exhausted
export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/aishell/asr0/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

MODEL=deepspeech2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/aishell/asr0/run.sh
================================================
#!/bin/bash
set -e
source path.sh

gpus=0,1,2,3
stage=0
stop_stage=100
conf_path=conf/deepspeech2.yaml    #conf/deepspeech2.yaml or conf/deepspeech2_online.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=10
audio_file=data/demo_01_03.wav

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}|| exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # export ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # test export ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test_export.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}.jit|| exit -1
fi

# Optionally, you can add LM and test it with runtime.
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    # test a single .wav file
    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi


================================================
FILE: examples/aishell/asr1/.gitignore
================================================
data
exp
log


================================================
FILE: examples/aishell/asr1/README.md
================================================
# Transformer/Conformer ASR with Aishell
This example contains code used to train a [u2](https://arxiv.org/pdf/2012.05481.pdf) model (Transformer or [Conformer](https://arxiv.org/pdf/2005.08100.pdf) model) with [Aishell dataset](http://www.openslr.org/resources/33)
## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Get ctc alignment of test data using the final model         |
| 5     | Infer the single audio file                                  |

You can choose to run a range of stages by setting `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in `run.sh` in detail.
## The Environment Variables
The path.sh contains the environment variables. 
```bash
source path.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of the stage you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`audio_file` denotes the file path of the single file you want to infer in stage 5
`ckpt` denotes the checkpoint prefix of the model, e.g. "conformer"
You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line.:
```bash
bash run.sh --gpus 0,1 --avg_num 20
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in `run.sh` to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```

Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
source path.sh
bash ./local/data.sh
```
After processing the data, the ``data`` directory will look like this:
```bash
data/
|-- dev.meta
|-- lang_char
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
`-- train.meta
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
```
## Stage 2: Top-k Models Averaging
After training the model, we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model. We can use stage 2 to do this, and the code is shown below:
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh`is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
avg.sh best exp/conformer/checkpoints 20
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of the test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
avg.sh best exp/conformer/checkpoints 20
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/conformer.yaml exp/conformer/checkpoints/avg_20
```
## Pretrained Model
You can get the pretrained transformer or conformer from [this](../../../docs/source/released_model.md)

using the `tar` scripts to unpack the model and then you can use the script to test the model.

For example:
```
wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz
tar xzvf asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz
source path.sh
# If you have process the data and get the manifest file， you can skip the following 2 steps
bash local/data.sh --stage -1 --stop_stage -1
bash local/data.sh --stage 2 --stop_stage 2
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/transformer.yaml exp/transformer/checkpoints/avg_20
```
[The performance of the released models](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/aishell/asr1/RESULTS.md)
## Stage 4: CTC Alignment 
If you want to get the alignment between the audio and the text, you can use the ctc alignment. The code of this stage is shown below:
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # ctc alignment of test data
     CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train the model, test it and do the alignment, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 4
```
or if you only need to train a model and do the alignment, you can use these scripts to escape stage 3(test stage):
```bash
bash run.sh --stage 0 --stop_stage 2
bash run.sh --stage 4 --stop_stage 4
```
or you can also use these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
avg.sh best exp/conformer/checkpoints 20
# test stage is optional
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/conformer.yaml exp/conformer/checkpoints/avg_20
CUDA_VISIBLE_DEVICES= ./local/align.sh conf/conformer.yaml exp/conformer/checkpoints/avg_20
```
## Stage 5: Single Audio File Inference
In some situations, you want to use the trained model to do the inference for the single audio file. You can use stage 5. The code is shown below
```bash
 if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
     # test a single .wav file
     CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
 fi
```
you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz
tar xzvf transformer.model.tar.gz
```
You can download the audio demo:
```bash
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
```
You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result by running the script below.
```bash
CUDA_VISIBLE_DEVICES= ./local/test_wav.sh conf/transformer.yaml exp/transformer/checkpoints/avg_20 data/demo_01_03.wav
```


================================================
FILE: examples/aishell/asr1/RESULTS.md
================================================
# Aishell

## RoFormer Streaming
paddle version: 2.5.0  
paddlespeech version: 1.5.0

Tesla V100-SXM2-32GB: 1 node, 4 card
Global BachSize: 32 * 4
Training Done: 1 day, 12:56:39.639646
### `decoding.decoding_chunk_size=16`

> chunk_size=16, ((16 - 1) * 4 + 7) * 10ms = (16 * 4 + 3) * 10ms = 670ms

| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size & Left Chunks | Loss | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- | --- |  
| roformer | 44.80M | conf/chunk_roformer.yaml | spec_aug | test | attention | 16, -1 | - |  5.63 |  
| roformer | 44.80M | conf/chunk_roformer.yaml | spec_aug | test | ctc_greedy_search | 16, -1 | - | 6.13 |  
| roformer | 44.80M | conf/chunk_roformer.yaml | spec_aug | test | ctc_prefix_beam_search | 16, -1 | - | 6.13 |  
| roformer | 44.80M | conf/chunk_roformer.yaml | spec_aug | test | attention_rescoring | 16, -1 |  - | 5.44 |  

### `decoding.decoding_chunk_size=-1`

| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size & Left Chunks | Loss | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- | --- |  
| roformer | 44.80M | conf/chunk_roformer.yaml | spec_aug | test | attention | -1, -1 | - | 5.39 |  
| roformer | 44.80M | conf/chunk_roformer.yaml | spec_aug | test | ctc_greedy_search | -1, -1 | - |  5.51 |  
| roformer | 44.80M | conf/chunk_roformer.yaml | spec_aug | test | ctc_prefix_beam_search | -1, -1 | - | 5.51 | 
| roformer | 44.80M | conf/chunk_roformer.yaml | spec_aug | test | attention_rescoring | -1, -1 |  - | 4.99 |  


## Conformer Streaming
paddle version: 2.2.2  
paddlespeech version: 1.4.1  
Need set `decoding.decoding_chunk_size=16` when decoding.

| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size & Left Chunks | Loss | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- | --- |  
| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | attention | 16, -1 | - | 0.056102 |  
| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | ctc_greedy_search | 16, -1 | - | 0.058160 |  
| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | ctc_prefix_beam_search | 16, -1 | - | 0.058160 |  
| conformer | 47.06M | conf/chunk_conformer.yaml | spec_aug | test | attention_rescoring | 16, -1 |  - | 0.051968 |  


## Conformer
paddle version: 2.2.2  
paddlespeech version: 1.0.1
| Model | Params | Config | Augmentation| Test set | Decode method | Loss | CER |
| --- | --- | --- | --- | --- | --- | --- | --- | 
| conformer | 47.07M  | conf/conformer.yaml | spec_aug | test | attention | - | 0.0522 |
| conformer | 47.07M  | conf/conformer.yaml | spec_aug | test | ctc_greedy_search | - | 0.0481 |
| conformer | 47.07M  | conf/conformer.yaml | spec_aug | test | ctc_prefix_beam_search | - | 0.0480 | 
| conformer | 47.07M  | conf/conformer.yaml | spec_aug | test | attention_rescoring | - | 0.0460 | 


## Transformer 

| Model | Params | Config | Augmentation| Test set | Decode method | Loss | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- |  
| transformer | 31.95M  | conf/transformer.yaml | spec_aug | test | attention | 3.8103787302970886 | 0.056588 |  
| transformer | 31.95M  | conf/transformer.yaml | spec_aug | test | ctc_greedy_search | 3.8103787302970886 | 0.059932 |  
| transformer | 31.95M  | conf/transformer.yaml | spec_aug | test | ctc_prefix_beam_search | 3.8103787302970886 | 0.059989 |  
| transformer | 31.95M  | conf/transformer.yaml | spec_aug | test | attention_rescoring | 3.8103787302970886 | 0.052273 |  


================================================
FILE: examples/aishell/asr1/conf/augmentation.json
================================================
[
  {
    "type": "speed",
    "params": {
      "min_speed_rate": 0.9,
      "max_speed_rate": 1.1,
      "num_rates": 3
    },
    "prob": 0.0
  },
  {
    "type": "shift",
    "params": {
      "min_shift_ms": -5,
      "max_shift_ms": 5
    },
    "prob": 1.0
  },
  {
    "type": "specaug",
    "params": {
      "W": 0,
      "warp_mode": "PIL",
      "F": 10,
      "n_freq_masks": 2,
      "T": 50,
      "n_time_masks": 2,
      "p": 1.0,
      "adaptive_number_ratio": 0,
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20,
      "replace_with_zero": true
    },
    "prob": 1.0
  }
]


================================================
FILE: examples/aishell/asr1/conf/chunk_conformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1   # sublayer output dropout
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    cnn_module_kernel: 15
    use_cnn_module: True
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    selfattention_layer_type: 'rel_selfattn'
    causal: true
    use_dynamic_chunk: true
    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
    use_dynamic_left_chunk: false
# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1  # sublayer output dropout
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0
# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false
    init_type: 'kaiming_uniform' # !Warning: need to convergence

###########################################
#                   Data                  #
###########################################

train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test


###########################################
#              Dataloader                 #
###########################################

vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 2
subsampling_factor: 1
num_encs: 1

###########################################
#                 Training                #
###########################################
n_epoch: 240 
accum_grad: 1
global_grad_clip: 5.0
dist_sampler: True
optim: adam
optim_conf:
  lr: 0.001
  weight_decay: 1.0e-6
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/aishell/asr1/conf/chunk_roformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1   # sublayer output dropout
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    cnn_module_kernel: 15
    use_cnn_module: True
    activation_type: 'swish'
    pos_enc_layer_type: 'rope_pos' # abs_pos, rel_pos, rope_pos
    selfattention_layer_type: 'rel_selfattn' # unused
    causal: true
    use_dynamic_chunk: true
    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
    use_dynamic_left_chunk: false
# decoder related
decoder: transformer # transformer, bitransformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    r_num_blocks: 0    # only for bitransformer
    dropout_rate: 0.1  # sublayer output dropout
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0
# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    reverse_weight: 0.0 # only for bitransformer
    length_normalized_loss: false
    init_type: 'kaiming_uniform' # !Warning: need to convergence

###########################################
#                   Data                  #
###########################################

train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test


###########################################
#              Dataloader                 #
###########################################

vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 2
subsampling_factor: 1
num_encs: 1

###########################################
#                 Training                #
###########################################
n_epoch: 240 
accum_grad: 1
global_grad_clip: 5.0
dist_sampler: True
optim: adam
optim_conf:
  lr: 0.001
  weight_decay: 1.0e-6
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/aishell/asr1/conf/chunk_roformer_bidecoder.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1   # sublayer output dropout
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    cnn_module_kernel: 15
    use_cnn_module: True
    activation_type: 'swish'
    pos_enc_layer_type: 'rope_pos' # abs_pos, rel_pos, rope_pos
    selfattention_layer_type: 'rel_selfattn' # unused
    causal: true
    use_dynamic_chunk: true
    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
    use_dynamic_left_chunk: false
# decoder related
decoder: bitransformer # transformer, bitransformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 3
    r_num_blocks: 3    # only for bitransformer
    dropout_rate: 0.1  # sublayer output dropout
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0
# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    reverse_weight: 0.3 # only for bitransformer
    length_normalized_loss: false
    init_type: 'kaiming_uniform' # !Warning: need to convergence

###########################################
#                   Data                  #
###########################################

train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test


###########################################
#              Dataloader                 #
###########################################

vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 2
subsampling_factor: 1
num_encs: 1

###########################################
#                 Training                #
###########################################
n_epoch: 240 
accum_grad: 1
global_grad_clip: 5.0
dist_sampler: True
optim: adam
optim_conf:
  lr: 0.001
  weight_decay: 1.0e-6
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/aishell/asr1/conf/chunk_squeezeformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: squeezeformer
encoder_conf:
    encoder_dim: 256    # dimension of attention
    output_size: 256    # dimension of output
    attention_heads: 4
    num_blocks: 12      # the number of encoder blocks
    reduce_idx: 5
    recover_idx: 11
    feed_forward_expansion_factor: 8
    input_dropout_rate: 0.1
    feed_forward_dropout_rate: 0.1
    attention_dropout_rate: 0.1
    adaptive_scale: true
    cnn_module_kernel: 31
    normalize_before: false
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    time_reduction_layer_type: 'stream'
    causal: true
    use_dynamic_chunk: true
    use_dynamic_left_chunk: false

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1  # sublayer output dropout
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0
# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false
    init_type: 'kaiming_uniform' # !Warning: need to convergence

###########################################
#                   Data                  #
###########################################

train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test


###########################################
#              Dataloader                 #
###########################################

vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 2
subsampling_factor: 1
num_encs: 1

###########################################
#                 Training                #
###########################################
n_epoch: 240 
accum_grad: 1
global_grad_clip: 5.0
dist_sampler: True
optim: adam
optim_conf:
  lr: 0.001
  weight_decay: 1.0e-6
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/aishell/asr1/conf/conformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    cnn_module_kernel: 15
    use_cnn_module: True
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    selfattention_layer_type: 'rel_selfattn'

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false
    init_type: 'kaiming_uniform' # !Warning: need to convergence

###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 2
subsampling_factor: 1
num_encs: 1

###########################################
#                Training                 #
###########################################
n_epoch: 150 
accum_grad: 8
global_grad_clip: 5.0
dist_sampler: False
optim: adam
optim_conf:
  lr: 0.002
  weight_decay: 1.0e-6
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/aishell/asr1/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 16000
    n_mels: 80
    n_shift: 160
    win_length: 400
    dither: 0.1
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/aishell/asr1/conf/squeezeformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: squeezeformer
encoder_conf:
    encoder_dim: 256    # dimension of attention
    output_size: 256    # dimension of output
    attention_heads: 4
    num_blocks: 12      # the number of encoder blocks
    reduce_idx: 5
    recover_idx: 11
    feed_forward_expansion_factor: 8
    input_dropout_rate: 0.1
    feed_forward_dropout_rate: 0.1
    attention_dropout_rate: 0.1
    adaptive_scale: true
    cnn_module_kernel: 31
    normalize_before: false
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    time_reduction_layer_type: 'conv1d'

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false
    init_type: 'kaiming_uniform' # !Warning: need to convergence

###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 2
subsampling_factor: 1
num_encs: 1

###########################################
#                Training                 #
###########################################
n_epoch: 150 
accum_grad: 8
global_grad_clip: 5.0
dist_sampler: False
optim: adam
optim_conf:
  lr: 0.002
  weight_decay: 1.0e-6
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/aishell/asr1/conf/transformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1   # sublayer output dropout
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true
# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1  # sublayer output dropout
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false

###########################################
#                   Data                  #
###########################################
# https://yaml.org/type/float.html
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test


###########################################
#              Dataloader                 #
###########################################
unit_type: 'char'
vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 64 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config: conf/preprocess.yaml 
num_workers: 2
subsampling_factor: 1
num_encs: 1

###########################################
#                 Training                #
###########################################
n_epoch: 30
accum_grad: 2
global_grad_clip: 5.0
dist_sampler: False
optim: adam
optim_conf:
  lr: 0.002
  weight_decay: 1.0e-6
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/aishell/asr1/conf/tuning/chunk_decode.yaml
================================================
beam_size: 10
decode_batch_size: 128
error_rate_type: cer 
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: 16 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: True  # simulate streaming inference. Defaults to False.


================================================
FILE: examples/aishell/asr1/conf/tuning/decode.yaml
================================================
beam_size: 10
decode_batch_size: 128
error_rate_type: cer 
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.


================================================
FILE: examples/aishell/asr1/local/aishell_train_lms.sh
================================================
#!/bin/bash

# To be run from one directory above this script.
. ./path.sh

text=data/local/lm/text
lexicon=data/local/dict/lexicon.txt

for f in "$text" "$lexicon"; do
  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
done

# Check SRILM tools
if ! which ngram-count > /dev/null; then
    echo "srilm tools are not found, please download it and install it from: "
    echo "http://www.speech.sri.com/projects/srilm/download.html"
    echo "Then add the tools to your PATH"
    exit 1
fi

# This script takes no arguments.  It assumes you have already run
# aishell_data_prep.sh.
# It takes as input the files
# data/local/lm/text
# data/local/dict/lexicon.txt
dir=data/local/lm
mkdir -p $dir


cleantext=$dir/text.no_oov

cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
  {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
  > $cleantext || exit 1;

cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
   sort -nr > $dir/word.counts || exit 1;

# Get counts from acoustic training transcripts, and add  one-count
# for each word in the lexicon (but not silence, we don't want it
# in the LM-- we'll add it optionally later).
cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
  cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
   sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;

cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist

heldout_sent=10000 # Don't change this if you want result to be comparable with
    # kaldi_lm results
mkdir -p $dir
cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
  head -$heldout_sent > $dir/heldout
cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
  tail -n +$heldout_sent > $dir/train

ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
  -map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.arpa
ngram -lm $dir/lm.arpa -ppl $dir/heldout

================================================
FILE: examples/aishell/asr1/local/align.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

batch_size=1
output_dir=${ckpt_prefix}
mkdir -p ${output_dir}

# align dump in `result_file`
# .tier, .TextGrid dump in `dir of result_file`
python3 -u ${BIN_DIR}/alignment.py \
--ngpu ${ngpu} \
--config ${config_path} \
--decode_cfg ${decode_config_path} \
--result_file ${output_dir}/${type}.align \
--checkpoint_path ${ckpt_prefix} \
--opts decode.decode_batch_size ${batch_size}

if [ $? -ne 0 ]; then
    echo "Failed in ctc alignment!"
    exit 1
fi

exit 0


================================================
FILE: examples/aishell/asr1/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100
dict_dir=data/lang_char

. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;

mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/aishell/aishell.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/aishell"

    if [ $? -ne 0 ]; then
        echo "Prepare Aishell failed. Terminated."
        exit 1
    fi

    for dataset in train dev test; do
        mv data/manifest.${dataset} data/manifest.${dataset}.raw
    done
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --spectrum_type="fbank" \
    --feat_dim=80 \
    --delta_delta=false \
    --stride_ms=10 \
    --window_ms=25 \
    --sample_rate=16000 \
    --use_dB_normalization=False \
    --num_samples=-1 \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # download data, generate manifests
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type="char" \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths "data/manifest.train.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for dataset in train dev test; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
            --cmvn_path "data/mean_std.json" \
            --unit_type "char" \
            --vocab_path="${dict_dir}/vocab.txt" \
            --manifest_path="data/manifest.${dataset}.raw" \
            --output_path="data/manifest.${dataset}"

        if [ $? -ne 0 ]; then
            echo "Formt mnaifest failed. Terminated."
            exit 1
        fi
    } &
    done
    wait
fi

echo "Aishell data preparation done."
exit 0


================================================
FILE: examples/aishell/asr1/local/export.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: $0 config_path ckpt_prefix jit_model_path"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3

python3 -u ${BIN_DIR}/export.py \
--ngpu ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}


if [ $? -ne 0 ]; then
    echo "Failed in export!"
    exit 1
fi


exit 0


================================================
FILE: examples/aishell/asr1/local/test.sh
================================================
#!/bin/bash

set -e

stage=0
stop_stage=100

source utils/parse_options.sh || exit 1;

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."


if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # format the reference test file
    python ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_ref data/manifest.test.raw \
        --trans_ref data/manifest.test.text

    for type in attention ctc_greedy_search; do
        echo "decoding ${type}"
        if [ ${chunk_mode} == true ];then
            # stream decoding only support batchsize=1
            batch_size=1
        else
            batch_size=64
        fi
        output_dir=${ckpt_prefix}
        mkdir -p ${output_dir}
        python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${output_dir}/${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}

        if [ $? -ne 0 ]; then
            echo "Failed in evaluation!"
            exit 1

        fi
        # format the hyp file
        python ${MAIN_ROOT}/utils/format_rsl.py \
            --origin_hyp ${output_dir}/${type}.rsl \
            --trans_hyp ${output_dir}/${type}.rsl.text
        python ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
            data/manifest.test.text ${output_dir}/${type}.rsl.text > ${output_dir}/${type}.error 

    done

    for type in ctc_prefix_beam_search attention_rescoring; do
        echo "decoding ${type}"
        batch_size=1
        output_dir=${ckpt_prefix}
        mkdir -p ${output_dir}
        python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${output_dir}/${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}

        if [ $? -ne 0 ]; then
            echo "Failed in evaluation!"
            exit 1
        fi
        python ${MAIN_ROOT}/utils/format_rsl.py \
            --origin_hyp ${output_dir}/${type}.rsl \
            --trans_hyp ${output_dir}/${type}.rsl.text
        python ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
            data/manifest.test.text ${output_dir}/${type}.rsl.text > ${output_dir}/${type}.error 
    done
fi

if [ ${stage} -le 101 ] && [ ${stop_stage} -ge 101 ]; then
    echo "using sclite to compute cer..."
    # format the reference test file for sclite
    python ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_ref data/manifest.test.raw \
        --trans_ref_sclite data/manifest.test.text.sclite
    
    output_dir=${ckpt_prefix}
    for type in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do
        python ${MAIN_ROOT}/utils/format_rsl.py \
            --origin_hyp ${output_dir}/${type}.rsl \
            --trans_hyp_sclite ${output_dir}/${type}.rsl.text.sclite

        mkdir -p ${output_dir}/${type}_sclite
        sclite -i wsj -r data/manifest.test.text.sclite -h  ${output_dir}/${type}.rsl.text.sclite  -e utf-8 -o all -O ${output_dir}/${type}_sclite -c NOASCII
    done
fi

exit 0


================================================
FILE: examples/aishell/asr1/local/test_wav.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_file=$4

mkdir -p data
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_file} ]; then
    echo "Plase input the right audio_file path"
    exit 1
fi

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

for type in  attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test_wav.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size} \
    --audio_file ${audio_file}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done
exit 0


================================================
FILE: examples/aishell/asr1/local/tlg.sh
================================================
#!/bin/bash

set -eo pipefail

stage=-1
stop_stage=100
corpus=aishell
lmtype=srilm

source utils/parse_options.sh

data=${MAIN_ROOT}/dataset/${corpus}
lexicon=$data/resource_aishell/lexicon.txt
text=$data/data_aishell/transcript/aishell_transcript_v0.8.txt

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # 7.1 Prepare dict
    unit_file=data/vocab.txt
    mkdir -p data/local/dict
    cp $unit_file data/local/dict/units.txt
    utils/fst/prepare_dict.py \
        --unit_file $unit_file \
        --in_lexicon ${lexicon} \
        --out_lexicon data/local/dict/lexicon.txt
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # 7.2 Train lm
    lm=data/local/lm
    mkdir -p data/train
    mkdir -p $lm
    utils/manifest_key_value.py \
        --manifest_path data/manifest.train \
        --output_path data/train
    utils/filter_scp.pl data/train/text \
        $text > $lm/text
    if [ $lmtype == 'srilm' ];then
        local/aishell_train_lms.sh
    else
        utils/ngram_train.sh --order 3 $lm/text $lm/lm.arpa
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then 
    # 7.3 Build decoding TLG
    utils/fst/compile_lexicon_token_fst.sh \
        data/local/dict data/local/tmp data/local/lang
    utils/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;
fi

echo "Aishell build TLG done."
exit 0


================================================
FILE: examples/aishell/asr1/local/train.sh
================================================
#!/bin/bash

profiler_options=
benchmark_batch_size=0
benchmark_max_step=0

# seed may break model convergence
seed=0

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

if [ ${seed} != 0  ]; then
    export FLAGS_cudnn_deterministic=True
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
fi

if [ $# -lt 2 ] || [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi
echo ${ips_config}

mkdir -p exp

# default memory allocator strategy may case gpu training hang
# for no OOM raised when memory exhausted
export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--seed ${seed} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler-options "${profiler_options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--seed ${seed} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler-options "${profiler_options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}
fi


if [ ${seed} != 0  ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/aishell/asr1/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

# model exp
MODEL=u2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


# srilm
export LIBLBFGS=${MAIN_ROOT}/tools/liblbfgs-1.10
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${LIBLBFGS}/lib/.libs
export SRILM=${MAIN_ROOT}/tools/srilm
export PATH=${PATH}:${SRILM}/bin:${SRILM}/bin/i686-m64

# Kaldi
export KALDI_ROOT=${MAIN_ROOT}/tools/kaldi
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present, can not using Kaldi!"
[ -f $KALDI_ROOT/tools/config/common_path.sh ] && . $KALDI_ROOT/tools/config/common_path.sh


================================================
FILE: examples/aishell/asr1/run.sh
================================================
#!/bin/bash
source path.sh
set -e

gpus=0,1,2,3
stage=0
stop_stage=50
conf_path=conf/conformer.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=30
audio_file=data/demo_01_03.wav

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # ctc alignment of test data
    CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

# Optionally, you can add LM and test it with runtime.
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # test a single .wav file
    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi

# Not supported at now!!!
if [ ${stage} -le 51 ] && [ ${stop_stage} -ge 51 ]; then
     # export ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi

# Need further installation! Read the install.md to complete further installation
if [ ${stage} -le 101 ] && [ ${stop_stage} -ge 101 ]; then
    echo "warning: deps on kaldi and srilm, please make sure installed."
    # train lm and build TLG
    ./local/tlg.sh --corpus aishell --lmtype srilm
fi


================================================
FILE: examples/aishell/asr3/README.md
================================================
# Wav2vec2ASR with Aishell
This example contains code used to finetune [wav2vec2.0](https://https://arxiv.org/pdf/2006.11477.pdf) model with [Aishell dataset](http://www.openslr.org/resources/33)
## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset<br>       (5) Download the pretrained wav2vec2 model |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Infer the single audio file                                  |


You can choose to run a range of stages by setting `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in `run.sh` in detail.
## The Environment Variables
The path.sh contains the environment variables. 
```bash
. ./path.sh
. ./cmd.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of stages you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`audio file` denotes the file path of the single file you want to infer in stage 5
`ckpt` denotes the checkpoint prefix of the model, e.g. "wav2vec2ASR"

You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line:
```bash
bash run.sh --gpus 0,1 --avg_num 20
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in `run.sh` to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
```
After processing the data, the `data` directory will look like this:
```bash
data/
|-- dev.meta
|-- lang_char
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
|-- train.meta
|-- train.csv
|-- dev.csv
|-- test.csv
```

Stage 0 also downloads the Chinese pre-trained [wav2vec2](https://paddlespeech.cdn.bcebos.com/wav2vec/chinese-wav2vec2-large.pdparams) model.
```bash
mkdir -p exp/wav2vec2
wget -P exp/wav2vec2 https://paddlespeech.cdn.bcebos.com/wav2vec/chinese-wav2vec2-large.pdparams
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/wav2vec2ASR.yaml wav2vec2ASR
```
## Stage 2: Top-k Models Averaging
After training the model, we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model. We can use stage 2 to do this, and the code is shown below. Note: We only train one epoch for wav2vec2ASR, thus the `avg_num` is set to 1.
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).

```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/wav2vec2ASR.yaml wav2vec2ASR
avg.sh best exp/wav2vec2ASR/checkpoints 1
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/wav2vec2ASR.yaml wav2vec2ASR
avg.sh best exp/wav2vec2ASR/checkpoints 1
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/wav2vec2ASR.yaml conf/tuning/decode.yaml exp/wav2vec2ASR/checkpoints/avg_1
```
## Pretrained Model
You can get the pretrained wav2vec2ASR from [this](../../../docs/source/released_model.md).

using the `tar` scripts to unpack the model and then you can use the script to test the model.

For example:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz
tar xzvf wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz
source path.sh
# If you have process the data and get the manifest file， you can skip the following 2 steps
bash local/data.sh --stage -1 --stop_stage -1
bash local/data.sh --stage 2 --stop_stage 2
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/wav2vec2ASR.yaml conf/tuning/decode.yaml exp/wav2vec2ASR/checkpoints/avg_1
```
The performance of the released models are shown in [here](./RESULTS.md).


## Stage 4: Single Audio File Inference
In some situations, you want to use the trained model to do the inference for the single audio file. You can use stage 5. The code is shown below
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # test a single .wav file
     CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
 fi
```
you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz
tar xzvf wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz
```
You can download the audio demo:
```bash
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
```
You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
```bash
CUDA_VISIBLE_DEVICES= ./local/test_wav.sh conf/wav2vec2ASR.yaml conf/tuning/decode.yaml exp/wav2vec2ASR/checkpoints/avg_1 data/demo_01_03.wav
```


================================================
FILE: examples/aishell/asr3/RESULT.md
================================================
# AISHELL

## Version

* paddle version: develop (commit id: daea892c67e85da91906864de40ce9f6f1b893ae)
* paddlespeech version: develop (commit id: c14b4238b256693281e59605abff7c9435b3e2b2)
* paddlenlp version: 2.5.2 

## Device
* python: 3.7
* cuda: 10.2
* cudnn: 7.6

## Result
train: Epoch 80, 2*V100-32G, batchsize:5
| Model | Params | Config | Augmentation| Test set | Decode method | WER |  
| --- | --- | --- | --- | --- | --- | --- |
| wav2vec2ASR | 324.49 M | conf/wav2vec2ASR.yaml | spec_aug | test-set | greedy search | 5.1009 |  


================================================
FILE: examples/aishell/asr3/cmd.sh
================================================
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
#   --time <time>: Limit the maximum time to execute.
#   --mem <mem>: Limit the maximum memory usage.
#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
#   --num-threads <ngpu>: Specify the number of CPU core.
#   --gpu <ngpu>: Specify the number of GPU devices.
#   --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

    # The other usage
    export train_cmd="run.pl"
    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
    export cuda_cmd="run.pl"
    # Used for "*_recog.py"
    export decode_cmd="run.pl"

# "qsub" (SGE, Torque, PBS, etc.)
elif [ "${cmd_backend}" = sge ]; then
    # The default setting is written in conf/queue.conf.
    # You must change "-q g.q" for the "queue" for your environment.
    # To know the "queue" names, type "qhost -q"
    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

    export train_cmd="queue.pl"
    export cuda_cmd="queue.pl"
    export decode_cmd="queue.pl"

# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

    export train_cmd="slurm.pl"
    export cuda_cmd="slurm.pl"
    export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
    # You have to create ".queue/machines" to specify the host to execute jobs.
    # e.g. .queue/machines
    #   host1
    #   host2
    #   host3
    # Assuming you can login them without any password, i.e. You have to set ssh keys.

    export train_cmd="ssh.pl"
    export cuda_cmd="ssh.pl"
    export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

    export train_cmd="queue.pl --mem 2G"
    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
    export decode_cmd="queue.pl --mem 4G"

else
    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
    return 1
fi


================================================
FILE: examples/aishell/asr3/conf/preprocess.yaml
================================================
process:
    # use raw audio
  - type: wav_process


================================================
FILE: examples/aishell/asr3/conf/train_with_wav2vec.yaml
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/recipes/AISHELL-1/ASR/CTC/hparams/train_with_wav2vec.yaml)

# ############################################################################
# Model: CTC-wav2vec2
# Encoder: wav2vec2
# Decoder: -
# Tokens: Char
# losses: CTC
# Training: AISHELL-1
# Authors:  Yingzhi WANG 2022
# ############################################################################

output_folder: !ref data
cer_file: !ref <output_folder>/cer.txt
save_folder: !ref <output_folder>/save
train_log: !ref <output_folder>/train_log.txt

# Data files
data_folder: data/aishell # e,g./path/to/aishell

skip_prep: False
ckpt_interval_minutes: 15 # save checkpoint every N min
train_data: !ref <output_folder>/train.csv
valid_data: !ref <output_folder>/dev.csv
test_data: !ref <output_folder>/test.csv

wav2vec2_hub: TencentGameMate/chinese-wav2vec2-large

# Training parameters
number_of_epochs: 80
lr: 1.0
lr_wav2vec: 0.0001
sorting: ascending
auto_mix_prec: False
sample_rate: 16000

# With data_parallel batch_size is split into N jobs
# With DDP batch_size is multiplied by N jobs
# Must be 8 per GPU to fit 32GB of VRAM
batch_size: 5
test_batch_size: 1 # need set to 1 when decoding

dynamic_batching: False
dynamic_batch_sampler:
   feats_hop_size: 0.01
   max_batch_len: 15 # in terms of "duration" in annotations by default, second here
   left_bucket_len: 200 # old implementation attributs
   multiplier: 1.1 # old implementation attributs
   shuffle_ex: False # if true re-creates batches at each epoch shuffling examples.
   num_buckets: 10 # floor(log(max_batch_len/left_bucket_len, multiplier)) + 1
   batch_ordering: ascending

num_workers: 6

# Dataloader options
train_dataloader_opts:
   batch_size: !ref <batch_size>
   num_workers: !ref <num_workers>
valid_dataloader_opts:
   batch_size: !ref <test_batch_size>
   num_workers: !ref <num_workers>
test_dataloader_opts:
   batch_size: !ref <test_batch_size>
   num_workers: !ref <num_workers>

wav2vec_output_dim: 1024
dnn_neurons: 1024
freeze_wav2vec: False
dropout: 0.15

tokenizer: !apply:paddlenlp.transformers.AutoTokenizer.from_pretrained
   pretrained_model_name_or_path: bert-base-chinese
# bert-base-chinese tokens length
output_neurons: 21128

# Decoding parameters
# Be sure that the bos and eos index match with the BPEs ones
blank_index: 0

# AISHELL-1 has spaces between words in the transcripts,
# which Chinese writing normally does not do.
# If remove_spaces, spaces are removed
# from the transcript before computing CER.
# (e.g., 祝 可爱 的 你 —> 祝可爱的你)
remove_spaces: True
split_tokens: !apply:operator.not_ [!ref <remove_spaces>]


================================================
FILE: examples/aishell/asr3/conf/tuning/decode.yaml
================================================
decode_batch_size: 1
error_rate_type: cer
decoding_method: ctc_greedy_search  # 'ctc_greedy_search', 'ctc_prefix_beam_search'
beam_size: 10


================================================
FILE: examples/aishell/asr3/conf/wav2vec2ASR.yaml
================================================
############################################
#          Network Architecture           #
############################################
freeze_wav2vec2: False
normalize_wav: True
output_norm: True
init_type: 'kaiming_uniform' # !Warning: need to convergence
enc:
  input_shape: 1024
  dnn_blocks: 3
  dnn_neurons: 1024
  activation: True
  normalization: True
  dropout_rate: [0.15, 0.15, 0.0]
ctc:
  enc_n_units: 1024
  blank_id: 0
  dropout_rate: 0.0

audio_augment:
  speeds: [90, 100, 110]

spec_augment:
  time_warp: True
  time_warp_window: 5
  time_warp_mode: bicubic
  freq_mask: True
  n_freq_mask: 2
  time_mask: True
  n_time_mask: 2
  replace_with_zero: False
  freq_mask_width: 30
  time_mask_width: 40
wav2vec2_params_path: exp/wav2vec2/chinese-wav2vec2-large.pdparams


############################################
#               Wav2Vec2.0                 #
############################################
# vocab_size: 1000000
hidden_size: 1024
num_hidden_layers: 24
num_attention_heads: 16
intermediate_size: 4096
hidden_act: gelu
hidden_dropout: 0.1
activation_dropout: 0.0
attention_dropout: 0.1
feat_proj_dropout: 0.1
feat_quantizer_dropout: 0.0
final_dropout: 0.0
layerdrop: 0.1
initializer_range: 0.02
layer_norm_eps: 1e-5
feat_extract_norm: layer
feat_extract_activation: gelu
conv_dim: [512, 512, 512, 512, 512, 512, 512]
conv_stride: [5, 2, 2, 2, 2, 2, 2]
conv_kernel: [10, 3, 3, 3, 3, 2, 2]
conv_bias: True
num_conv_pos_embeddings: 128
num_conv_pos_embedding_groups: 16
do_stable_layer_norm: True
apply_spec_augment: False
mask_channel_length: 10
mask_channel_min_space: 1
mask_channel_other: 0.0
mask_channel_prob: 0.0
mask_channel_selection: static
mask_feature_length: 10
mask_feature_min_masks: 0
mask_feature_prob: 0.0
mask_time_length: 10
mask_time_min_masks: 2
mask_time_min_space: 1
mask_time_other: 0.0
mask_time_prob: 0.075
mask_time_selection: static
num_codevectors_per_group: 320
num_codevector_groups: 2
contrastive_logits_temperature: 0.1
num_negatives: 100
codevector_dim: 256
proj_codevector_dim: 256
diversity_loss_weight: 0.1
use_weighted_layer_sum: False
# pad_token_id: 0
# bos_token_id: 1
# eos_token_id: 2
add_adapter: False
adapter_kernel_size: 3
adapter_stride: 2
num_adapter_layers: 3
output_hidden_size: None

###########################################
#                   Data                  #
###########################################

train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test
vocab_filepath: data/lang_char/vocab.txt 

###########################################
#              Dataloader                 #
###########################################

unit_type: 'char'
tokenizer: bert-base-chinese
mean_std_filepath: 
preprocess_config: conf/preprocess.yaml
sortagrad: -1 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 5  # Different batch_size may cause large differences in results
maxlen_in: 51200000000  # if input length  > maxlen-in batchsize is automatically reduced
maxlen_out: 1500000  # if output length > maxlen-out batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 6
subsampling_factor: 1
num_encs: 1
dist_sampler: True
shortest_first: True
return_lens_rate: True

###########################################
#        use speechbrain dataloader       #
###########################################
use_sb_pipeline: True  # whether use speechbrain pipeline. Default is True.
sb_pipeline_conf: conf/train_with_wav2vec.yaml

###########################################
#                 Training                #
###########################################
n_epoch: 80
accum_grad: 1
global_grad_clip: 5.0

model_optim: sgd
model_optim_conf:
  lr: 1.0
  weight_decay: 0.0

wav2vec2_optim: adam
wav2vec2_optim_conf:
  lr: 0.0001
  weight_decay: 0.0

model_scheduler: newbobscheduler
model_scheduler_conf:
  improvement_threshold: 0.0025
  annealing_factor: 0.8
  patient: 0
wav2vec2_scheduler: newbobscheduler
wav2vec2_scheduler_conf:
  improvement_threshold: 0.0025
  annealing_factor: 0.9
  patient: 0
log_interval: 1
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/aishell/asr3/conf/wav2vec2ASR_adadelta.yaml
================================================
############################################
#          Network Architecture           #
############################################
freeze_wav2vec2: False
normalize_wav: True
output_norm: True
init_type: 'kaiming_uniform' # !Warning: need to convergence
enc:
  input_shape: 1024
  dnn_blocks: 3
  dnn_neurons: 1024
  activation: True
  normalization: True
  dropout_rate: [0.15, 0.15, 0.0]
ctc:
  enc_n_units: 1024
  blank_id: 0
  dropout_rate: 0.0

audio_augment:
  speeds: [90, 100, 110]

spec_augment:
  time_warp: True
  time_warp_window: 5
  time_warp_mode: bicubic
  freq_mask: True
  n_freq_mask: 2
  time_mask: True
  n_time_mask: 2
  replace_with_zero: False
  freq_mask_width: 30
  time_mask_width: 40
wav2vec2_params_path: exp/wav2vec2/chinese-wav2vec2-large.pdparams


############################################
#               Wav2Vec2.0                 #
############################################
# vocab_size: 1000000
hidden_size: 1024
num_hidden_layers: 24
num_attention_heads: 16
intermediate_size: 4096
hidden_act: gelu
hidden_dropout: 0.1
activation_dropout: 0.0
attention_dropout: 0.1
feat_proj_dropout: 0.1
feat_quantizer_dropout: 0.0
final_dropout: 0.0
layerdrop: 0.1
initializer_range: 0.02
layer_norm_eps: 1e-5
feat_extract_norm: layer
feat_extract_activation: gelu
conv_dim: [512, 512, 512, 512, 512, 512, 512]
conv_stride: [5, 2, 2, 2, 2, 2, 2]
conv_kernel: [10, 3, 3, 3, 3, 2, 2]
conv_bias: True
num_conv_pos_embeddings: 128
num_conv_pos_embedding_groups: 16
do_stable_layer_norm: True
apply_spec_augment: False
mask_channel_length: 10
mask_channel_min_space: 1
mask_channel_other: 0.0
mask_channel_prob: 0.0
mask_channel_selection: static
mask_feature_length: 10
mask_feature_min_masks: 0
mask_feature_prob: 0.0
mask_time_length: 10
mask_time_min_masks: 2
mask_time_min_space: 1
mask_time_other: 0.0
mask_time_prob: 0.075
mask_time_selection: static
num_codevectors_per_group: 320
num_codevector_groups: 2
contrastive_logits_temperature: 0.1
num_negatives: 100
codevector_dim: 256
proj_codevector_dim: 256
diversity_loss_weight: 0.1
use_weighted_layer_sum: False
# pad_token_id: 0
# bos_token_id: 1
# eos_token_id: 2
add_adapter: False
adapter_kernel_size: 3
adapter_stride: 2
num_adapter_layers: 3
output_hidden_size: None

###########################################
#                   Data                  #
###########################################

train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test
vocab_filepath: data/lang_char/vocab.txt 

###########################################
#              Dataloader                 #
###########################################

unit_type: 'char'
tokenizer: bert-base-chinese
mean_std_filepath: 
preprocess_config: conf/preprocess.yaml
sortagrad: -1 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 5  # Different batch_size may cause large differences in results
maxlen_in: 51200000000  # if input length  > maxlen-in batchsize is automatically reduced
maxlen_out: 1500000  # if output length > maxlen-out batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 6
subsampling_factor: 1
num_encs: 1
dist_sampler: True
shortest_first: True
return_lens_rate: True

###########################################
#        use speechbrain dataloader       #
###########################################
use_sb_pipeline: True  # whether use speechbrain pipeline. Default is True.
sb_pipeline_conf: conf/train_with_wav2vec.yaml

###########################################
#                 Training                #
###########################################
n_epoch: 80
accum_grad: 1
global_grad_clip: 5.0

model_optim: adadelta
model_optim_conf:
  lr: 1.0
  weight_decay: 0.0
  rho: 0.95
  epsilon: 1.0e-8

wav2vec2_optim: adam
wav2vec2_optim_conf:
  lr: 0.0001
  weight_decay: 0.0

model_scheduler: newbobscheduler
model_scheduler_conf:
  improvement_threshold: 0.0025
  annealing_factor: 0.8
  patient: 0
wav2vec2_scheduler: newbobscheduler
wav2vec2_scheduler_conf:
  improvement_threshold: 0.0025
  annealing_factor: 0.9
  patient: 0
log_interval: 1
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/aishell/asr3/local/aishell_prepare.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Modified from speechbrain 2023
# (https://github.com/speechbrain/speechbrain/blob/develop/recipes/AISHELL-1/aishell_prepare.py)
import argparse
import csv
import glob
import logging
import os

from paddlespeech.s2t.io.speechbrain.dataio import read_audio

logger = logging.getLogger(__name__)

DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--data_folder",
    default=DATA_HOME + "/Aishell",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--save_folder",
    default="data/",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
parser.add_argument(
    "--skip_prep",
    default=False,
    type=bool,
    help="If True, skip data preparation. (default: %(default)s)")
args = parser.parse_args()


def prepare_aishell(data_folder, save_folder, skip_prep=False):
    """
    This function prepares the AISHELL-1 dataset.
    If the folder does not exist, the zip file will be extracted. If the zip file does not exist, it will be downloaded.
    data_folder : path to AISHELL-1 dataset.
    save_folder: path where to store the manifest csv files.
    skip_prep: If True, skip data preparation.
    """
    if skip_prep:
        return

    # Create filename-to-transcript dictionary
    filename2transcript = {}
    with open(
            os.path.join(data_folder,
                         "data_aishell/transcript/aishell_transcript_v0.8.txt"),
            "r", ) as f:
        lines = f.readlines()
        for line in lines:
            key = line.split()[0]
            value = " ".join(line.split()[1:])
            filename2transcript[key] = value

    splits = [
        "train",
        "dev",
        "test",
    ]
    ID_start = 0  # needed to have a unique ID for each audio
    for split in splits:
        new_filename = os.path.join(save_folder, split) + ".csv"
        if os.path.exists(new_filename):
            continue
        logger.info("Preparing %s..." % new_filename)

        csv_output = [["ID", "duration", "wav", "transcript"]]
        entry = []

        all_wavs = glob.glob(
            os.path.join(data_folder, "data_aishell/wav") + "/" + split +
            "/*/*.wav")
        for i in range(len(all_wavs)):
            filename = all_wavs[i].split("/")[-1].split(".wav")[0]
            if filename not in filename2transcript:
                continue
            signal = read_audio(all_wavs[i])
            duration = signal.shape[0] / 16000
            transcript_ = filename2transcript[filename]
            csv_line = [
                ID_start + i,
                str(duration),
                all_wavs[i],
                transcript_,
            ]
            entry.append(csv_line)

        csv_output = csv_output + entry

        with open(new_filename, mode="w") as csv_f:
            csv_writer = csv.writer(
                csv_f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
            for line in csv_output:
                csv_writer.writerow(line)

        msg = "\t%s successfully created!" % (new_filename)
        logger.info(msg)

        ID_start += len(all_wavs)


def main():
    if args.data_folder.startswith('~'):
        args.data_folder = os.path.expanduser(args.data_folder)

    prepare_aishell(args.data_folder, args.save_folder, skip_prep=False)

    print("Data csv prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: examples/aishell/asr3/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=3
dict_dir=data/lang_char

. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;

mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/aishell/aishell.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/aishell"

    #generate csv file for speechbrain dataloader
    python3 local/aishell_prepare.py \
    --data_folder="${TARGET_DIR}/aishell" \
    --save_folder="data/"


    if [ $? -ne 0 ]; then
        echo "Prepare Aishell failed. Terminated."
        exit 1
    fi

    for dataset in train dev test; do
        mv data/manifest.${dataset} data/manifest.${dataset}.raw
    done
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --spectrum_type="fbank" \
    --feat_dim=80 \
    --delta_delta=false \
    --stride_ms=10 \
    --window_ms=25 \
    --sample_rate=16000 \
    --use_dB_normalization=False \
    --num_samples=-1 \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # download data, generate manifests
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type="char" \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths "data/manifest.train.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for dataset in train dev test; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
            --cmvn_path "data/mean_std.json" \
            --unit_type "char" \
            --vocab_path="${dict_dir}/vocab.txt" \
            --manifest_path="data/manifest.${dataset}.raw" \
            --output_path="data/manifest.${dataset}"

        if [ $? -ne 0 ]; then
            echo "Formt mnaifest failed. Terminated."
            exit 1
        fi
    } &
    done
    wait
fi
echo "Aishell data preparation done."

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    mkdir -p exp/wav2vec2
    echo "Pretrained wav2vec2 model download"
    wget -P exp/wav2vec2 https://paddlespeech.cdn.bcebos.com/wav2vec/chinese-wav2vec2-large.pdparams
fi

exit 0


================================================
FILE: examples/aishell/asr3/local/test.sh
================================================
#!/bin/bash

set -e

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

expdir=exp
datadir=data

train_set=train

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

# download language model
#bash local/download_lm_en.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

python3 ${MAIN_ROOT}/utils/format_rsl.py \
    --origin_ref data/manifest.test.raw \
    --trans_ref data/manifest.test.text


for type in ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=1
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.${type}.rsl \
        --trans_hyp ${ckpt_prefix}.${type}.rsl.text

    python3 ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
        data/manifest.test.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
    echo "decoding ${type} done."
done

for type in ctc_prefix_beam_search; do
    echo "decoding ${type}"
    batch_size=1
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.${type}.rsl \
        --trans_hyp ${ckpt_prefix}.${type}.rsl.text

    python3 ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
        data/manifest.test.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
    echo "decoding ${type} done."
done

echo "Finished"

exit 0


================================================
FILE: examples/aishell/asr3/local/test_wav.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_file=$4

mkdir -p data
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_file} ]; then
    echo "Plase input the right audio_file path"
    exit 1
fi

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

for type in ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test_wav.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size} \
    --audio_file ${audio_file}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done
exit 0


================================================
FILE: examples/aishell/asr3/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 2 ] || [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
resume=$3
ips=$4

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# seed may break model convergence
seed=2
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

# export FLAGS_cudnn_exhaustive_search=true
# export FLAGS_conv_workspace_size_limit=4000
# export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed} \
--resume ${resume}
else
python3 -m paddle.distributed.launch --log_dir=${ckpt_name} --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed} \
--resume ${resume}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/aishell/asr3/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/tools/sctk/bin:${PWD}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


MODEL=wav2vec2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/aishell/asr3/run.sh
================================================
#!/bin/bash
set -e

. ./path.sh || exit 1;
. ./cmd.sh || exit 1;

gpus=0,1,2,3
stage=0
stop_stage=4
conf_path=conf/wav2vec2ASR.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=1
resume=         # xx e.g. 30
export FLAGS_cudnn_deterministic=1
. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

audio_file=data/demo_01_03.wav

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}" 

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${resume} ${ips} 
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh last exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # greedy search decoder
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1

fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # test a single .wav file
    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi


================================================
FILE: examples/aishell3/README.md
================================================
# Aishell3

* tts0 - Tacotron2
* tts1 - TransformerTTS
* tts2 - SpeedySpeech
* tts3 - FastSpeech2
* voc0 - WaveFlow
* voc1 - Parallel WaveGAN
* voc2 - MelGAN
* voc3 - MultiBand MelGAN
* vc0 - Tacotron2 Voice Cloning with GE2E
* vc1 - FastSpeech2 Voice Cloning with GE2E
* vc2 - FastSpeech2 Voice Cloning with ECAPA-TDNN
* ernie_sat - ERNIE-SAT


================================================
FILE: examples/aishell3/ernie_sat/README.md
================================================
# ERNIE-SAT with AISHELL-3 dataset
[ERNIE-SAT](https://arxiv.org/abs/2211.03545) speech-text joint pretraining framework, which achieves SOTA results in cross-lingual multi-speaker speech synthesis and cross-lingual speech editing tasks, It can be applied to a series of scenarios such as Speech Editing, personalized Speech Synthesis, and Voice Cloning.

## Model Framework
In ERNIE-SAT, we propose two innovations:
- In the pretraining process, the phonemes corresponding to Chinese and English are used as input to achieve cross-language and personalized soft phoneme mapping
- The joint mask learning of speech and text is used to realize the alignment of speech and text

<p align="center">
    <img src="https://user-images.githubusercontent.com/24568452/186110814-1b9c6618-a0ab-4c0c-bb3d-3d860b0e8cc2.png" />
</p>

## Dataset
### Download and Extract
Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.

### Get MFA Result and Extract
We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/data_aishell3`.
Assume the path to the MFA result of AISHELL-3 is `./aishell3_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.

```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, speaker, and id of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.

### Synthesizing
We use [HiFiGAN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5) as the neural vocoder.

Download pretrained HiFiGAN model from [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) and unzip it.
```bash
unzip hifigan_aishell3_ckpt_0.2.0.zip
```
HiFiGAN checkpoint contains files listed below.
```text
hifigan_aishell3_ckpt_0.2.0
├── default.yaml                    # default config used to train HiFiGAN
├── feats_stats.npy                 # statistics used to normalize spectrogram when training HiFiGAN
└── snapshot_iter_2500000.pdz       # generator parameters of HiFiGAN
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
##  Speech Synthesis and Speech Editing
### Prepare
**prepare aligner**
```bash
mkdir -p tools/aligner
cd tools
# download MFA
wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz
# extract MFA
tar xvf montreal-forced-aligner_linux.tar.gz
# fix .so of MFA
cd montreal-forced-aligner/lib
ln -snf libpython3.6m.so.1.0 libpython3.6m.so
cd -
# download align models and dicts
cd aligner
wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/aishell3_model.zip
wget https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/vctk_model.zip
wget https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
cd ../../
```
**prepare pretrained FastSpeech2 models**

ERNIE-SAT use FastSpeech2 as phoneme duration predictor:
```bash
mkdir download
cd download
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip
unzip fastspeech2_conformer_baker_ckpt_0.5.zip
unzip fastspeech2_nosil_ljspeech_ckpt_0.5.zip
cd ../
```
**prepare source data**
```bash
mkdir source
cd source
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540307.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540428.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/LJ050-0278.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p243_313.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p299_096.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/this_was_not_the_show_for_me.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/README.md
cd ../
```

You can check the text of downloaded wavs in `source/README.md`.
### Speech Synthesis and Speech Editing
```bash
./run.sh --stage 3 --stop-stage 3 --gpus 0
```
`stage 3` of `run.sh` calls `local/synthesize_e2e.sh`. `synthesize_e2e.sh` is a script for performing both **Speech Synthesis** and **Speech Editing** tasks by default. It converts input text into speech for synthesis and modifies existing speech based on new text content for editing.

You can modify `--wav_path`、`--old_str` and `--new_str` yourself, `--old_str`  should be the text corresponding to the audio of  `--wav_path`, `--new_str` should be designed according to `--task_name`, both `--source_lang` and `--target_lang` should be `zh` for model trained with AISHELL3 dataset.
## Pretrained Model
Pretrained ErnieSAT model:
- [erniesat_aishell3_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_ckpt_1.2.0.zip)

Model | Step | eval/mlm_loss | eval/loss
:-------------:| :------------:| :-----: | :-----:
default| 8(gpu) x 289500|51.723782|51.723782


================================================
FILE: examples/aishell3/ernie_sat/conf/default.yaml
================================================
# This configuration tested on 8 GPUs (A100) with 80GB GPU memory.
# It takes around 3 days to finish the training,You can adjust
# batch_size、num_workers here and ngpu in local/train.sh for your machine
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

mean_phn_span: 8
mlm_prob: 0.8

###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 40
num_workers: 8

###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    text_masking: false
    postnet_layers: 5
    postnet_filts: 5
    postnet_chans: 256
    encoder_type: conformer
    decoder_type: conformer
    enc_input_layer: sega_mlm
    enc_pre_speech_layer: 0
    enc_cnn_module_kernel: 7
    enc_attention_dim: 384
    enc_attention_heads: 2
    enc_linear_units: 1536
    enc_num_blocks: 4
    enc_dropout_rate: 0.2
    enc_positional_dropout_rate: 0.2
    enc_attention_dropout_rate: 0.2
    enc_normalize_before: true
    enc_macaron_style: true
    enc_use_cnn_module: true
    enc_selfattention_layer_type: legacy_rel_selfattn
    enc_activation_type: swish
    enc_pos_enc_layer_type: legacy_rel_pos
    enc_positionwise_layer_type: conv1d
    enc_positionwise_conv_kernel_size: 3
    dec_cnn_module_kernel: 31
    dec_attention_dim: 384
    dec_attention_heads: 2
    dec_linear_units: 1536
    dec_num_blocks: 4
    dec_dropout_rate: 0.2
    dec_positional_dropout_rate: 0.2
    dec_attention_dropout_rate: 0.2
    dec_macaron_style: true
    dec_use_cnn_module: true
    dec_selfattention_layer_type: legacy_rel_selfattn
    dec_activation_type: swish
    dec_pos_enc_layer_type: legacy_rel_pos
    dec_positionwise_layer_type: conv1d
    dec_positionwise_conv_kernel_size: 3

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
scheduler_params:
    d_model: 384
    warmup_steps: 4000
grad_clip: 1.0

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 1500
num_snapshots: 50

###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 0

token_list:
- <blank>
- <unk>
- d
- sp
- sh
- ii
- j
- zh
- l
- x
- b
- g
- uu
- e5
- h
- q
- m
- i1
- t
- z
- ch
- f
- s
- u4
- ix4
- i4
- n
- i3
- iu3
- vv
- ian4
- ix2
- r
- e4
- ai4
- k
- ing2
- a1
- en2
- ui4
- ong1
- uo3
- u2
- u3
- ao4
- ee
- p
- an1
- eng2
- i2
- in1
- c
- ai2
- ian2
- e2
- an4
- ing4
- v4
- ai3
- a5
- ian3
- eng1
- ong4
- ang4
- ian1
- ing1
- iy4
- ao3
- ang1
- uo4
- u1
- iao4
- iu4
- a4
- van2
- ie4
- ang2
- ou4
- iang4
- ix1
- er4
- iy1
- e1
- en1
- ui2
- an3
- ei4
- ong2
- uo1
- ou3
- uo2
- iao1
- ou1
- an2
- uan4
- ia4
- ia1
- ang3
- v3
- iu2
- iao3
- in4
- a3
- ei3
- iang3
- v2
- eng4
- en3
- aa
- uan1
- v1
- ao1
- ve4
- ie3
- ai1
- ing3
- iang1
- a2
- ui1
- en4
- en5
- in3
- uan3
- e3
- ie1
- ve2
- ei2
- in2
- ix3
- uan2
- iang2
- ie2
- ua4
- ou2
- uai4
- er2
- eng3
- uang3
- un1
- ong3
- uang4
- vn4
- un2
- iy3
- iz4
- ui3
- iao2
- iong4
- un4
- van4
- ao2
- uang1
- iy5
- o2
- ei1
- ua1
- iu1
- uang2
- er5
- o1
- un3
- vn1
- vn2
- o4
- ve1
- van3
- ua2
- er3
- iong3
- van1
- ia2
- iy2
- ia3
- iong1
- uo5
- oo
- ve3
- ou5
- uai3
- ian5
- iong2
- uai2
- uai1
- ua3
- vn3
- ia5
- ie5
- ueng1
- o5
- o3
- iang5
- ei5
- <sos/eos>


================================================
FILE: examples/aishell3/ernie_sat/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./aishell3_alignment_tone \
        --output durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=aishell3 \
        --rootdir=~/datasets/data_aishell3/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/aishell3/ernie_sat/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=0

# hifigan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize.py \
        --erniesat_config=${config_path} \
        --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --erniesat_stat=dump/train/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt
fi


================================================
FILE: examples/aishell3/ernie_sat/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    echo 'speech synthesize !'
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize_e2e.py \
        --task_name=synthesize \
        --wav_path=source/SSB03540307.wav \
        --old_str='请播放歌曲小苹果' \
        --new_str='歌曲真好听' \
        --source_lang=zh \
        --target_lang=zh \
        --erniesat_config=${config_path} \
        --phones_dict=dump/phone_id_map.txt \
        --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --erniesat_stat=dump/train/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --output_name=exp/pred_gen.wav
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo 'speech edit !'
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize_e2e.py \
        --task_name=edit \
        --wav_path=source/SSB03540428.wav \
        --old_str='今天天气很好' \
        --new_str='今天心情很好' \
        --source_lang=zh \
        --target_lang=zh \
        --erniesat_config=${config_path} \
        --phones_dict=dump/phone_id_map.txt \
        --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --erniesat_stat=dump/train/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --output_name=exp/pred_edit.wav
fi


================================================
FILE: examples/aishell3/ernie_sat/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=8 \
    --phones-dict=dump/phone_id_map.txt


================================================
FILE: examples/aishell3/ernie_sat/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=ernie_sat
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}

================================================
FILE: examples/aishell3/ernie_sat/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1,2,3,4,5,6,7
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_289500.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/aishell3/tts3/README.md
================================================
# FastSpeech2 with AISHELL-3
This example contains code used to train a [Fastspeech2](https://arxiv.org/abs/2006.04558) model with [AISHELL-3](http://www.aishelltech.com/aishell_3).

AISHELL-3 is a large-scale and high-fidelity multi-speaker Mandarin speech corpus that could be used to train multi-speaker Text-to-Speech (TTS) systems.

We use AISHELL-3 to train a multi-speaker fastspeech2 model here.
## Dataset
### Download and Extract
Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.
 
### Get MFA Result and Extract
We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/data_aishell3`.
Assume the path to the MFA result of AISHELL-3 is `./aishell3_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```

### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.
```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech、pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, the path of pitch features, a path of energy features, speaker, and id of each utterance.

### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]
                [--speaker-dict SPEAKER_DICT] [--voice-cloning VOICE_CLONING]

Train a FastSpeech2 model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       fastspeech2 config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu=0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --speaker-dict SPEAKER_DICT
                        speaker id map file for multiple speaker model.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.
6. `--speaker-dict` is the path of the speaker id map file when training a multi-speaker FastSpeech2.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the neural vocoder.
Download the pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
```bash
unzip pwg_aishell3_ckpt_0.5.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_aishell3_ckpt_0.5
├── default.yaml                   # default config used to train parallel wavegan
├── feats_stats.npy                # statistics used to normalize spectrogram when training parallel wavegan
└── snapshot_iter_1000000.pdz      # generator parameters of parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
```text
usage: synthesize.py [-h]
                     [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--tones_dict TONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING]
                     [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.
```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat`, `--phones_dict` `--speaker_dict` are arguments for acoustic model, which correspond to the 5 files in the fastspeech2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model
Pretrained FastSpeech2 model with no silence in the edge of audios:
- [fastspeech2_aishell3_ckpt_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip)
- [fastspeech2_conformer_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_aishell3_ckpt_0.2.0.zip) (Thanks for [@awmmmm](https://github.com/awmmmm)'s contribution)

The static model can be downloaded here:
- [fastspeech2_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip)

The PIR static model can be downloaded here:
- [fastspeech2_aishell3_static_pir_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)

The ONNX model can be downloaded here:
- [fastspeech2_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip)

The Paddle-Lite model can be downloaded here:
- [fastspeech2_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_pdlite_1.3.0.zip)

FastSpeech2 checkpoint contains files listed below.

```text
fastspeech2_aishell3_ckpt_1.1.0
├── default.yaml            # default config used to train fastspeech2
├── energy_stats.npy        # statistics used to normalize energy when training fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── pitch_stats.npy         # statistics used to normalize pitch when training fastspeech2
├── snapshot_iter_96400.pdz # model parameters and optimizer states
├── speaker_id_map.txt      # speaker id map file when training a multi-speaker fastspeech2
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```
You can use the following scripts to synthesize for `${BIN_DIR}/../../assets/sentences.txt` using pretrained fastspeech2 and parallel wavegan models.
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=fastspeech2_aishell3 \
  --am_config=fastspeech2_aishell3_ckpt_1.1.0/default.yaml \
  --am_ckpt=fastspeech2_aishell3_ckpt_1.1.0/snapshot_iter_96400.pdz \
  --am_stat=fastspeech2_aishell3_ckpt_1.1.0/speech_stats.npy \
  --voc=pwgan_aishell3 \
  --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
  --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
  --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
  --lang=zh \
  --text=${BIN_DIR}/../../assets/sentences.txt \
  --output_dir=exp/default/test_e2e \
  --phones_dict=fastspeech2_aishell3_ckpt_1.1.0/phone_id_map.txt \
  --speaker_dict=fastspeech2_aishell3_ckpt_1.1.0/speaker_id_map.txt \
  --spk_id=0 \
  --inference_dir=exp/default/inference
```


================================================
FILE: examples/aishell3/tts3/conf/conformer.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Maximum f0 for pitch extraction.
f0max: 400         # Minimum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 32
num_workers: 4


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    encoder_type: conformer           # encoder type
    decoder_type: conformer           # decoder type
    conformer_pos_enc_layer_type: rel_pos        # conformer positional encoding type
    conformer_self_attn_layer_type: rel_selfattn # conformer self-attention type
    conformer_activation_type: swish             # conformer activation type
    use_macaron_style_in_conformer: true         # whether to use macaron style in conformer
    use_cnn_in_conformer: true                   # whether to use CNN in conformer
    conformer_enc_kernel_size: 7                 # kernel size in CNN module of conformer-based encoder
    conformer_dec_kernel_size: 31                # kernel size in CNN module of conformer-based decoder
    init_type: xavier_uniform         # initialization type
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: true   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: false # whether to stop the gradient from energy predictor to encoder
    spk_embed_dim: 256                         # speaker embedding dimension
    spk_embed_integration_type: concat         # speaker embedding integration type


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
    optim: adam              # optimizer type
    learning_rate: 0.001     # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 1000
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/aishell3/tts3/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 2


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    init_type: xavier_uniform         # initialization type
    init_enc_alpha: 1.0               # initial value of alpha of encoder scaled position encoding
    init_dec_alpha: 1.0               # initial value of alpha of decoder scaled position encoding
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder
    spk_embed_dim: 256                         # speaker embedding dimension
    spk_embed_integration_type: concat         # speaker embedding integration type


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
    optim: adam               # optimizer type
    learning_rate: 0.001      # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 200
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/aishell3/tts3/local/inference.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_aishell3 \
        --voc=pwgan_aishell3 \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_aishell3 \
        --voc=hifigan_aishell3 \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0
fi


================================================
FILE: examples/aishell3/tts3/local/lite_predict.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=fastspeech2_aishell3 \
        --voc=pwgan_aishell3 \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=fastspeech2_aishell3 \
        --voc=hifigan_aishell3 \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0
fi


================================================
FILE: examples/aishell3/tts3/local/ort_predict.sh
================================================
train_output_path=$1

stage=0
stop_stage=0

# e2e, synthesize from text
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_aishell3 \
        --voc=pwgan_aishell3 \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2 \
        --spk_id=0

fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_aishell3 \
        --voc=hifigan_aishell3 \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2 \
        --spk_id=0
fi


================================================
FILE: examples/aishell3/tts3/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./aishell3_alignment_tone \
        --output durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=aishell3 \
        --rootdir=~/datasets/data_aishell3/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="pitch"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="energy"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/aishell3/tts3/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_aishell3 \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_aishell3 \
        --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
        --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
        --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_aishell3 \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/aishell3/tts3/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_aishell3 \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_aishell3 \
        --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
        --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
        --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0 \
        --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "in hifigan syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_aishell3 \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0 \
        --inference_dir=${train_output_path}/inference
    fi


================================================
FILE: examples/aishell3/tts3/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=2 \
    --phones-dict=dump/phone_id_map.txt \
    --speaker-dict=dump/speaker_id_map.txt


================================================
FILE: examples/aishell3/tts3/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_482.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # install paddle2onnx
    pip install paddle2onnx --upgrade
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx fastspeech2_aishell3
    # considering the balance between speed and quality, we recommend that you use hifigan as vocoder
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_aishell3
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_aishell3
    
fi

# inference with onnxruntime, use fastspeech2 + pwgan by default
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    ./local/ort_predict.sh ${train_output_path}
fi

if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
    ./local/export2lite.sh ${train_output_path} inference pdlite fastspeech2_aishell3 x86
    ./local/export2lite.sh ${train_output_path} inference pdlite pwgan_aishell3 x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite hifigan_aishell3 x86
fi

if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/lite_predict.sh ${train_output_path} || exit -1
fi


================================================
FILE: examples/aishell3/vc0/README.md
================================================
# Tacotron2 + AISHELL-3 Voice Cloning
This example contains code used to train a [Tacotron2](https://arxiv.org/abs/1712.05884) model with [AISHELL-3](http://www.aishelltech.com/aishell_3). The trained model can be used in Voice Cloning Task, We refer to the model structure of  [Transfer Learning from Speaker Veriﬁcation to Multispeaker Text-To-Speech Synthesis](https://arxiv.org/pdf/1806.04558.pdf). The general steps are as follows:
1. Speaker Encoder: We use Speaker Verification to train a speaker encoder. Datasets used in this task are different from those used in `Tacotron2` because the transcriptions are not needed, we use more datasets, refer to  [ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e).
2. Synthesizer: We use the trained speaker encoder to generate speaker embedding for each sentence in AISHELL-3. This embedding is an extra input of  `Tacotron2` which will be concated with encoder outputs.
3. Vocoder: We use [Parallel Wave GAN](http://arxiv.org/abs/1910.11480) as the neural Vocoder, refer to [voc1](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1).

## Dataset
### Download and Extract
Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.

### Get MFA Result and Extract
We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for Tacotron2, the durations of MFA are not needed here.
You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Pretrained GE2E Model
We use pretrained GE2E model to generate speaker embedding for each sentence.

Download pretrained GE2E model from here [ge2e_ckpt_0.3.zip](https://bj.bcebos.com/paddlespeech/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip), and `unzip` it.

## Get Started
Assume the path to the dataset is `~/datasets/data_aishell3`.
Assume the path to the MFA result of AISHELL-3 is `./aishell3_alignment_tone`.
Assume the path to the pretrained ge2e model is `./ge2e_ckpt_0.3`.

Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize waveform from `metadata.jsonl`.
5. start a voice cloning inference.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/preprocess.sh ${conf_path} ${ge2e_ckpt_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.
```text
dump
├── dev
│   ├── norm
│   └── raw
├── embed
│   ├── SSB0005
│   ├── SSB0009
│   ├── ...
│   └── ...
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └──  raw
└── train
    ├── norm
    ├── raw
    └── speech_stats.npy
```
The `embed` contains the generated speaker embedding for each sentence in AISHELL-3, which has the same file structure with wav files and the format is  `.npy`.

The computing time of utterance embedding can be x hours.

The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, speaker, and id of each utterance.

The preprocessing step is very similar to that one of [tts0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts0), but there is one more `ge2e/inference` step here.

### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
The training step is very similar to that one of [tts0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts0), but we should set `--voice-cloning=True` when calling `${BIN_DIR}/train.py`.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the neural vocoder.
Download pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
```bash
unzip pwg_aishell3_ckpt_0.5.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_aishell3_ckpt_0.5
├── default.yaml                   # default config used to train parallel wavegan
├── feats_stats.npy                # statistics used to normalize spectrogram when training parallel wavegan
└── snapshot_iter_1000000.pdz      # generator parameters of parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
The synthesizing step is very similar to that one of [tts0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts0), but we should set `--voice-cloning=True` when calling `${BIN_DIR}/../synthesize.py`.

### Voice Cloning
Assume there are some  reference audios in `./ref_audio`
```text
ref_audio
├── 001238.wav
├── LJ015-0254.wav
└── audio_self_test.mp3
```
`./local/voice_cloning.sh` calls `${BIN_DIR}/../voice_cloning.py`

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} ${ge2e_params_path} ${ref_audio_dir}
```

## Pretrained Model
- [tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip)


Model | Step | eval/loss | eval/l1_loss | eval/mse_loss | eval/bce_loss| eval/attn_loss
:-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
default| 2(gpu) x 37596|0.58704|0.39623|0.15073|0.039|1.9981e-04|

Tacotron2 checkpoint contains files listed below.
(There is no need for `speaker_id_map.txt` here )

```text
tacotron2_aishell3_ckpt_vc0_0.2.0
├── default.yaml            # default config used to train tacotron2
├── phone_id_map.txt        # phone vocabulary file when training tacotron2
├── snapshot_iter_37596.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training tacotron2
```

## More
We strongly recommend that you use [FastSpeech2 + AISHELL-3 Voice Cloning](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc1) which works better.


================================================
FILE: examples/aishell3/vc0/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 2

###########################################################
#                       MODEL SETTING                     #
###########################################################
model:                          # keyword arguments for the selected model
    embed_dim: 512               # char or phn embedding dimension
    elayers: 1                   # number of blstm layers in encoder
    eunits: 512                  # number of blstm units
    econv_layers: 3              # number of convolutional layers in encoder
    econv_chans: 512             # number of channels in convolutional layer
    econv_filts: 5               # filter size of convolutional layer
    atype: location              # attention function type
    adim: 512                    # attention dimension
    aconv_chans: 32              # number of channels in convolutional layer of attention
    aconv_filts: 15              # filter size of convolutional layer of attention
    cumulate_att_w: True         # whether to cumulate attention weight
    dlayers: 2                   # number of lstm layers in decoder
    dunits: 1024                 # number of lstm units in decoder
    prenet_layers: 2             # number of layers in prenet
    prenet_units: 256            # number of units in prenet
    postnet_layers: 5            # number of layers in postnet
    postnet_chans: 512           # number of channels in postnet
    postnet_filts: 5             # filter size of postnet layer
    output_activation: null      # activation function for the final output
    use_batch_norm: True         # whether to use batch normalization in encoder
    use_concate: True            # whether to concatenate encoder embedding with decoder outputs
    use_residual: False          # whether to use residual connection in encoder
    dropout_rate: 0.5            # dropout rate
    zoneout_rate: 0.1            # zoneout rate
    reduction_factor: 1          # reduction factor
    spk_embed_dim: 256           # speaker embedding dimension
    spk_embed_integration_type: concat # how to integrate speaker embedding


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True            # whether to apply masking for padded part in loss calculation
    bce_pos_weight: 5.0          # weight of positive sample in binary cross entropy calculation
    use_guided_attn_loss: True   # whether to use guided attention loss
    guided_attn_loss_sigma: 0.4  # sigma of guided attention loss
    guided_attn_loss_lambda: 1.0 # strength of guided attention loss


##########################################################
#                  OPTIMIZER SETTING                     #
##########################################################
optimizer:
    optim: adam              # optimizer type
    learning_rate: 1.0e-03   # learning rate
    epsilon: 1.0e-06         # epsilon
    weight_decay: 0.0        # weight decay coefficient

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 100
num_snapshots: 5

###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 42

================================================
FILE: examples/aishell3/vc0/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1
ge2e_ckpt_path=$2

# gen speaker embedding
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${MAIN_ROOT}/paddlespeech/vector/exps/ge2e/inference.py \
        --input=~/datasets/data_aishell3/train/wav/ \
        --output=dump/embed \
        --checkpoint_path=${ge2e_ckpt_path}
fi

# copy from tts3/preprocess
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./aishell3_alignment_tone \
        --output durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=aishell3 \
        --rootdir=~/datasets/data_aishell3/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True \
        --spk_emb_dir=dump/embed
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # normalize and covert phone to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/aishell3/vc0/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

python3 ${BIN_DIR}/../synthesize.py \
    --am=tacotron2_aishell3 \
    --am_config=${config_path} \
    --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --am_stat=dump/train/speech_stats.npy \
    --voc=pwgan_aishell3 \
    --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
    --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
    --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
    --test_metadata=dump/test/norm/metadata.jsonl \
    --output_dir=${train_output_path}/test \
    --phones_dict=dump/phone_id_map.txt \
    --speaker_dict=dump/speaker_id_map.txt \
    --voice-cloning=True


================================================
FILE: examples/aishell3/vc0/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=2 \
    --phones-dict=dump/phone_id_map.txt \
    --voice-cloning=True

================================================
FILE: examples/aishell3/vc0/local/voice_cloning.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
ge2e_params_path=$4
ref_audio_dir=$5

python3 ${BIN_DIR}/../voice_cloning.py \
    --am=tacotron2_aishell3 \
    --am_config=${config_path} \
    --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --am_stat=dump/train/speech_stats.npy \
    --voc=pwgan_aishell3 \
    --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
    --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
    --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
    --ge2e_params_path=${ge2e_params_path} \
    --text="凯莫瑞安联合体的经济崩溃迫在眉睫。" \
    --input-dir=${ref_audio_dir} \
    --output-dir=${train_output_path}/vc_syn \
    --phones-dict=dump/phone_id_map.txt


================================================
FILE: examples/aishell3/vc0/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_482.pdz
ref_audio_dir=ref_audio

# not include ".pdparams" here
ge2e_ckpt_path=./ge2e_ckpt_0.3/step-3000000

# include ".pdparams" here
ge2e_params_path=${ge2e_ckpt_path}.pdparams

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    CUDA_VISIBLE_DEVICES=${gpus} ./local/preprocess.sh ${conf_path} ${ge2e_ckpt_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} ${ge2e_params_path} ${ref_audio_dir} || exit -1
fi


================================================
FILE: examples/aishell3/vc1/README.md
================================================
# FastSpeech2 + AISHELL-3 Voice Cloning
This example contains code used to train a [FastSpeech2](https://arxiv.org/abs/2006.04558) model with [AISHELL-3](http://www.aishelltech.com/aishell_3). The trained model can be used in Voice Cloning Task, We refer to the model structure of  [Transfer Learning from Speaker Veriﬁcation to Multispeaker Text-To-Speech Synthesis](https://arxiv.org/pdf/1806.04558.pdf). The general steps are as follows:
1. Speaker Encoder: We use Speaker Verification to train a speaker encoder. Datasets used in this task are different from those used in `FastSpeech2` because the transcriptions are not needed, we use more datasets, refer to  [ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e).
2. Synthesizer: We use the trained speaker encoder to generate speaker embedding for each sentence in AISHELL-3. This embedding is an extra input of  `FastSpeech2` which will be concated with encoder outputs.
3. Vocoder: We use [Parallel Wave GAN](http://arxiv.org/abs/1910.11480) as the neural Vocoder, refer to [voc1](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1).

## Dataset
### Download and Extract
Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.

### Get MFA Result and Extract
We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Pretrained GE2E Model
We use pretrained GE2E model to generate speaker embedding for each sentence.

Download pretrained GE2E model from here [ge2e_ckpt_0.3.zip](https://bj.bcebos.com/paddlespeech/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip), and `unzip` it.

## Get Started
Assume the path to the dataset is `~/datasets/data_aishell3`.
Assume the path to the MFA result of AISHELL-3 is `./aishell3_alignment_tone`.
Assume the path to the pretrained ge2e model is `./ge2e_ckpt_0.3`.

Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize waveform from `metadata.jsonl`.
5. start a voice cloning inference.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/preprocess.sh ${conf_path} ${ge2e_ckpt_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.
```text
dump
├── dev
│   ├── norm
│   └── raw
├── embed
│   ├── SSB0005
│   ├── SSB0009
│   ├── ...
│   └── ...
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └──  raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    └── speech_stats.npy
```
The `embed` contains the generated speaker embedding for each sentence in AISHELL-3, which has the same file structure with wav files and the format is  `.npy`.

The computing time of utterance embedding can be x hours.

The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech、pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, the path of pitch features, the path of energy features, speaker, and id of each utterance.

The preprocessing step is very similar to that one of [tts3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3), but there is one more `ge2e/inference` step here.

### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
The training step is very similar to that one of [tts3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3), but we should set `--voice-cloning=True` when calling `${BIN_DIR}/train.py`.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the neural vocoder.
Download pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
```bash
unzip pwg_aishell3_ckpt_0.5.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_aishell3_ckpt_0.5
├── default.yaml                   # default config used to train parallel wavegan
├── feats_stats.npy                # statistics used to normalize spectrogram when training parallel wavegan
└── snapshot_iter_1000000.pdz      # generator parameters of parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
The synthesizing step is very similar to that one of [tts3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3), but we should set `--voice-cloning=True` when calling `${BIN_DIR}/../synthesize.py`.

### Voice Cloning
Assume there are some reference audios in `./ref_audio`
```text
ref_audio
├── 001238.wav
├── LJ015-0254.wav
└── audio_self_test.mp3
```
`./local/voice_cloning.sh` calls `${BIN_DIR}/../voice_cloning.py`

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} ${ge2e_params_path} ${ref_audio_dir}
```
## Pretrained Model
- [fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip)

Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
:-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
default|2(gpu) x 96400|0.99699|0.62013|0.053057|0.11954| 0.20426|

FastSpeech2 checkpoint contains files listed below.
(There is no need for `speaker_id_map.txt` here )

```text
fastspeech2_nosil_aishell3_ckpt_vc1_0.5
├── default.yaml            # default config used to train fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── snapshot_iter_96400.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```


================================================
FILE: examples/aishell3/vc1/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 2


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    init_type: xavier_uniform         # initialization type
    init_enc_alpha: 1.0               # initial value of alpha of encoder scaled position encoding
    init_dec_alpha: 1.0               # initial value of alpha of decoder scaled position encoding
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder
    spk_embed_dim: 256                         # speaker embedding dimension
    spk_embed_integration_type: concat         # speaker embedding integration type


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
  optim: adam               # optimizer type
  learning_rate: 0.001     # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 200
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/aishell3/vc1/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1
ge2e_ckpt_path=$2

# gen speaker embedding
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${MAIN_ROOT}/paddlespeech/vector/exps/ge2e/inference.py \
        --input=~/datasets/data_aishell3/train/wav/ \
        --output=dump/embed \
        --checkpoint_path=${ge2e_ckpt_path}
fi

# copy from tts3/preprocess
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./aishell3_alignment_tone \
        --output durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=aishell3 \
        --rootdir=~/datasets/data_aishell3/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True \
        --spk_emb_dir=dump/embed
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="pitch"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="energy"
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/aishell3/vc1/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

python3 ${BIN_DIR}/../synthesize.py \
    --am=fastspeech2_aishell3 \
    --am_config=${config_path} \
    --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --am_stat=dump/train/speech_stats.npy \
    --voc=pwgan_aishell3 \
    --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
    --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
    --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
    --test_metadata=dump/test/norm/metadata.jsonl \
    --output_dir=${train_output_path}/test \
    --phones_dict=dump/phone_id_map.txt \
    --speaker_dict=dump/speaker_id_map.txt \
    --voice-cloning=True


================================================
FILE: examples/aishell3/vc1/local/voice_cloning.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
ge2e_params_path=$4
ref_audio_dir=$5

python3 ${BIN_DIR}/../voice_cloning.py \
    --am=fastspeech2_aishell3 \
    --am_config=${config_path} \
    --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --am_stat=dump/train/speech_stats.npy \
    --voc=pwgan_aishell3 \
    --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
    --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
    --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
    --ge2e_params_path=${ge2e_params_path} \
    --text="凯莫瑞安联合体的经济崩溃迫在眉睫。" \
    --input-dir=${ref_audio_dir} \
    --output-dir=${train_output_path}/vc_syn \
    --phones-dict=dump/phone_id_map.txt


================================================
FILE: examples/aishell3/vc1/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_482.pdz
ref_audio_dir=ref_audio

# not include ".pdparams" here
ge2e_ckpt_path=./ge2e_ckpt_0.3/step-3000000

# include ".pdparams" here
ge2e_params_path=${ge2e_ckpt_path}.pdparams

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    CUDA_VISIBLE_DEVICES=${gpus} ./local/preprocess.sh ${conf_path} ${ge2e_ckpt_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} ${ge2e_params_path} ${ref_audio_dir} || exit -1
fi


================================================
FILE: examples/aishell3/vc2/README.md
================================================
# FastSpeech2 + AISHELL-3 Voice Cloning (ECAPA-TDNN)
This example contains code used to train a [FastSpeech2](https://arxiv.org/abs/2006.04558) model with [AISHELL-3](http://www.aishelltech.com/aishell_3). The trained model can be used in Voice Cloning Task, We refer to the model structure of  [Transfer Learning from Speaker Veriﬁcation to Multispeaker Text-To-Speech Synthesis](https://arxiv.org/pdf/1806.04558.pdf). The general steps are as follows:
1. Speaker Encoder: We use Speaker Verification to train a speaker encoder. Datasets used in this task are different from those used in `FastSpeech2` because the transcriptions are not needed, we use more datasets, refer to  [ECAPA-TDNN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/voxceleb/sv0).
2. Synthesizer: We use the trained speaker encoder to generate speaker embedding for each sentence in AISHELL-3. This embedding is an extra input of  `FastSpeech2` which will be concated with encoder outputs.
3. Vocoder: We use [Parallel Wave GAN](http://arxiv.org/abs/1910.11480) as the neural Vocoder, refer to [voc1](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1).

## Dataset
### Download and Extract
Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.

### Get MFA Result and Extract
We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/data_aishell3`.
Assume the path to the MFA result of AISHELL-3 is `./aishell3_alignment_tone`.

Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize waveform from `metadata.jsonl`.
5. start a voice cloning inference.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.
```text
dump
├── dev
│   ├── norm
│   └── raw
├── embed
│   ├── SSB0005
│   ├── SSB0009
│   ├── ...
│   └── ...
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └──  raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    └── speech_stats.npy
```
The `embed` contains the generated speaker embedding for each sentence in AISHELL-3, which has the same file structure with wav files and the format is  `.npy`.

The computing time of utterance embedding can be x hours.

The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech、pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, the path of pitch features, the path of energy features, speaker, and id of each utterance.

The preprocessing step is very similar to that one of [tts3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3), but there is one more `ECAPA-TDNN/inference` step here.

### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
The training step is very similar to that one of [tts3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3), but we should set `--voice-cloning=True` when calling `${BIN_DIR}/train.py`.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the neural vocoder.
Download pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
```bash
unzip pwg_aishell3_ckpt_0.5.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_aishell3_ckpt_0.5
├── default.yaml                   # default config used to train parallel wavegan
├── feats_stats.npy                # statistics used to normalize spectrogram when training parallel wavegan
└── snapshot_iter_1000000.pdz      # generator parameters of parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
The synthesizing step is very similar to that one of [tts3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3), but we should set `--voice-cloning=True` when calling `${BIN_DIR}/../synthesize.py`.

### Voice Cloning
Assume there are some reference audios in `./ref_audio` (the format must be wav here)
```text
ref_audio
├── 001238.wav
├── LJ015-0254.wav
└── audio_self_test.wav
```
`./local/voice_cloning.sh` calls `${BIN_DIR}/../voice_cloning.py`

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} ${ref_audio_dir}
```
## Pretrained Model
- [fastspeech2_aishell3_ckpt_vc2_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_vc2_1.2.0.zip)

Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
:-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
default|2(gpu) x 96400|0.991855|0.599517|0.052142|0.094877| 0.245318|

FastSpeech2 checkpoint contains files listed below.
(There is no need for `speaker_id_map.txt` here )

```text
fastspeech2_aishell3_ckpt_vc2_1.2.0
├── default.yaml            # default config used to train fastspeech2
├── energy_stats.npy        # statistics used to normalize energy when training fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── pitch_stats.npy         # statistics used to normalize pitch when training fastspeech2
├── snapshot_iter_96400.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```


================================================
FILE: examples/aishell3/vc2/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 2


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    init_type: xavier_uniform         # initialization type
    init_enc_alpha: 1.0               # initial value of alpha of encoder scaled position encoding
    init_dec_alpha: 1.0               # initial value of alpha of decoder scaled position encoding
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder
    spk_embed_dim: 192                         # speaker embedding dimension
    spk_embed_integration_type: concat         # speaker embedding integration type


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
  optim: adam               # optimizer type
  learning_rate: 0.001     # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 200
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/aishell3/vc2/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

# gen speaker embedding
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/vc2_infer.py \
        --input=~/datasets/data_aishell3/train/wav/ \
        --output=dump/embed \
        --num-cpu=20
fi

# copy from tts3/preprocess
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./aishell3_alignment_tone \
        --output durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=aishell3 \
        --rootdir=~/datasets/data_aishell3/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True \
        --spk_emb_dir=dump/embed
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="pitch"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="energy"
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/aishell3/vc2/local/voice_cloning.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
ref_audio_dir=$4

python3 ${BIN_DIR}/../voice_cloning.py \
    --am=fastspeech2_aishell3 \
    --am_config=${config_path} \
    --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --am_stat=dump/train/speech_stats.npy \
    --voc=pwgan_aishell3 \
    --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
    --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
    --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
    --text="凯莫瑞安联合体的经济崩溃迫在眉睫。" \
    --input-dir=${ref_audio_dir} \
    --output-dir=${train_output_path}/vc_syn \
    --phones-dict=dump/phone_id_map.txt \
    --use_ecapa=True


================================================
FILE: examples/aishell3/vc2/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_96400.pdz
ref_audio_dir=ref_audio


# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    CUDA_VISIBLE_DEVICES=${gpus} ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} ${ref_audio_dir} || exit -1
fi


================================================
FILE: examples/aishell3/vits/README.md
================================================
# VITS with AISHELL-3
This example contains code used to train a [VITS](https://arxiv.org/abs/2106.06103) model with [AISHELL-3](http://www.aishelltech.com/aishell_3).

AISHELL-3 is a large-scale and high-fidelity multi-speaker Mandarin speech corpus that could be used to train multi-speaker Text-to-Speech (TTS) systems.

We use AISHELL-3 to train a multi-speaker VITS model here.
## Dataset
### Download and Extract
Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.

### Get MFA Result and Extract
We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for VITS, the durations of MFA are not needed here.
You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/data_aishell3`.
Assume the path to the MFA result of AISHELL-3 is `./aishell3_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from a text file.

```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```

### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── feats_stats.npy
    ├── norm
    └── raw
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains wave and linear spectrogram of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, feats, feats_lengths, the path of linear spectrogram features, the path of raw waves, speaker, and the id of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]
                [--speaker-dict SPEAKER_DICT] [--voice-cloning VOICE_CLONING]

Train a VITS model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       config file to overwrite default config.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --speaker-dict SPEAKER_DICT
                        speaker id map file for multiple speaker model.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.
6. `--speaker-dict` is the path of the speaker id map file when training a multi-speaker VITS.

### Synthesizing

`./local/synthesize.sh` calls `${BIN_DIR}/synthesize.py`, which can synthesize waveform from `metadata.jsonl`.

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--config CONFIG] [--ckpt CKPT]
                     [--phones_dict PHONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with VITS

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       Config of VITS.
  --ckpt CKPT           Checkpoint file of VITS.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize_e2e.py [-h] [--config CONFIG] [--ckpt CKPT]
                         [--phones_dict PHONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with VITS

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       Config of VITS.
  --ckpt CKPT           Checkpoint file of VITS.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--config`, `--ckpt`, `--phones_dict` and `--speaker_dict` are arguments for acoustic model, which correspond to the 3 files in the VITS pretrained model.
2. `--lang` is the model language, which can be `zh` or `en`.
3. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
4. `--text` is the text file, which contains sentences to synthesize.
5. `--output_dir` is the directory to save synthesized audio files.
6. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

<!-- TODO display these after we trained the model -->
<!-- 
## Pretrained Model

The pretrained model can be downloaded here:

- [vits_aishell3_ckpt_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/vits/vits_aishell3_ckpt_1.1.0.zip) (add_blank=true)

VITS checkpoint contains files listed below.
```text
vits_aishell3_ckpt_1.1.0
├── default.yaml              # default config used to train vitx
├── phone_id_map.txt          # phone vocabulary file when training vits
├── speaker_id_map.txt        # speaker id map file when training a multi-speaker vits
└── snapshot_iter_333000.pdz  # model parameters and optimizer states
```

ps: This ckpt is not good enough, a better result is training

You can use the following scripts to synthesize for `${BIN_DIR}/../sentences.txt` using pretrained VITS.

```bash
source path.sh
add_blank=true

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/synthesize_e2e.py \
    --config=vits_aishell3_ckpt_1.1.0/default.yaml \
    --ckpt=vits_aishell3_ckpt_1.1.0/snapshot_iter_333000.pdz \
    --phones_dict=vits_aishell3_ckpt_1.1.0/phone_id_map.txt \
    --speaker_dict=vits_aishell3_ckpt_1.1.0/speaker_id_map.txt \
    --output_dir=exp/default/test_e2e \
    --text=${BIN_DIR}/../../assets/sentences.txt \
    --add-blank=${add_blank} 
```
-->


================================================
FILE: examples/aishell3/vits/conf/default.yaml
================================================
# This configuration tested on 4 GPUs (V100) with 32GB GPU
# memory. It takes around 2 weeks to finish the training
# but 100k iters model should generate reasonable results.
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 22050         # sr
n_fft: 1024        # FFT size (samples).
n_shift: 256       # Hop size (samples). 12.5ms
win_length: null   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.


##########################################################
#                  TTS MODEL SETTING                     #
##########################################################
model:
    # generator related
    generator_type: vits_generator
    generator_params:
        hidden_channels: 192
        global_channels: 256
        segment_size: 32
        text_encoder_attention_heads: 2
        text_encoder_ffn_expand: 4
        text_encoder_blocks: 6
        text_encoder_positionwise_layer_type: "conv1d"
        text_encoder_positionwise_conv_kernel_size: 3
        text_encoder_positional_encoding_layer_type: "rel_pos"
        text_encoder_self_attention_layer_type: "rel_selfattn"
        text_encoder_activation_type: "swish"
        text_encoder_normalize_before: True
        text_encoder_dropout_rate: 0.1
        text_encoder_positional_dropout_rate: 0.0
        text_encoder_attention_dropout_rate: 0.1
        use_macaron_style_in_text_encoder: True
        use_conformer_conv_in_text_encoder: False
        text_encoder_conformer_kernel_size: -1
        decoder_kernel_size: 7
        decoder_channels: 512
        decoder_upsample_scales: [8, 8, 2, 2]
        decoder_upsample_kernel_sizes: [16, 16, 4, 4]
        decoder_resblock_kernel_sizes: [3, 7, 11]
        decoder_resblock_dilations: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
        use_weight_norm_in_decoder: True
        posterior_encoder_kernel_size: 5
        posterior_encoder_layers: 16
        posterior_encoder_stacks: 1
        posterior_encoder_base_dilation: 1
        posterior_encoder_dropout_rate: 0.0
        use_weight_norm_in_posterior_encoder: True
        flow_flows: 4
        flow_kernel_size: 5
        flow_base_dilation: 1
        flow_layers: 4
        flow_dropout_rate: 0.0
        use_weight_norm_in_flow: True
        use_only_mean_in_flow: True
        stochastic_duration_predictor_kernel_size: 3
        stochastic_duration_predictor_dropout_rate: 0.5
        stochastic_duration_predictor_flows: 4
        stochastic_duration_predictor_dds_conv_layers: 3
    # discriminator related
    discriminator_type: hifigan_multi_scale_multi_period_discriminator
    discriminator_params:
        scales: 1
        scale_downsample_pooling: "AvgPool1D"
        scale_downsample_pooling_params:
            kernel_size: 4
            stride: 2
            padding: 2
        scale_discriminator_params:
            in_channels: 1
            out_channels: 1
            kernel_sizes: [15, 41, 5, 3]
            channels: 128
            max_downsample_channels: 1024
            max_groups: 16
            bias: True
            downsample_scales: [2, 2, 4, 4, 1]
            nonlinear_activation: "leakyrelu"
            nonlinear_activation_params:
                negative_slope: 0.1
            use_weight_norm: True
            use_spectral_norm: False
        follow_official_norm: False
        periods: [2, 3, 5, 7, 11]
        period_discriminator_params:
            in_channels: 1
            out_channels: 1
            kernel_sizes: [5, 3]
            channels: 32
            downsample_scales: [3, 3, 3, 3, 1]
            max_downsample_channels: 1024
            bias: True
            nonlinear_activation: "leakyrelu"
            nonlinear_activation_params:
                negative_slope: 0.1
            use_weight_norm: True
            use_spectral_norm: False
    # others
    sampling_rate: 22050          # needed in the inference for saving wav
    cache_generator_outputs: True # whether to cache generator outputs in the training
          
###########################################################
#                        LOSS SETTING                     #
###########################################################
# loss function related
generator_adv_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    loss_type: mse                   # loss type, "mse" or "hinge"
discriminator_adv_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    loss_type: mse                   # loss type, "mse" or "hinge"
feat_match_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    average_by_layers: False         # whether to average loss value by #layers of each discriminator
    include_final_outputs: True      # whether to include final outputs for loss calculation
mel_loss_params:
    fs: 22050          # must be the same as the training data
    fft_size: 1024        # fft points
    hop_size: 256    # hop size
    win_length: null   # window length
    window: hann       # window type
    num_mels: 80         # number of Mel basis
    fmin: 0            # minimum frequency for Mel basis
    fmax: null         # maximum frequency for Mel basis
    log_base: null     # null represent natural log

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_adv: 1.0        # loss scaling coefficient for adversarial loss
lambda_mel: 45.0       # loss scaling coefficient for Mel loss
lambda_feat_match: 2.0 # loss scaling coefficient for feat match loss
lambda_dur: 1.0        # loss scaling coefficient for duration loss
lambda_kl: 1.0         # loss scaling coefficient for KL divergence loss
# others
sampling_rate: 22050          # needed in the inference for saving wav
cache_generator_outputs: True # whether to cache generator outputs in the training


###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 50              # Batch size.
num_workers: 4              # Number of workers in DataLoader.

##########################################################
#            OPTIMIZER & SCHEDULER SETTING               #
##########################################################
# optimizer setting for generator
generator_optimizer_params:
    beta1: 0.8
    beta2: 0.99
    epsilon: 1.0e-9
    weight_decay: 0.0
generator_scheduler: exponential_decay
generator_scheduler_params:
    learning_rate: 2.0e-4
    gamma: 0.999875                   

# optimizer setting for discriminator
discriminator_optimizer_params:
    beta1: 0.8
    beta2: 0.99
    epsilon: 1.0e-9
    weight_decay: 0.0
discriminator_scheduler: exponential_decay
discriminator_scheduler_params:
    learning_rate: 2.0e-4          
    gamma: 0.999875
generator_first: False # whether to start updating generator first

##########################################################
#                OTHER TRAINING SETTING                  #
##########################################################
num_snapshots: 10            # max number of snapshots to keep while training
train_max_steps: 350000      # Number of training steps. == total_iters / ngpus, total_iters = 1000000
save_interval_steps: 1000    # Interval steps to save checkpoint.
eval_interval_steps: 250     # Interval steps to evaluate the network.
seed: 777                    # random seed number


================================================
FILE: examples/aishell3/vits/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1
add_blank=$2

# copy from tts3/preprocess
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./aishell3_alignment_tone \
        --output durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=aishell3 \
        --rootdir=~/datasets/data_aishell3/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --add-blank=${add_blank} \
        --skip-wav-copy

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --add-blank=${add_blank} \
        --skip-wav-copy

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --add-blank=${add_blank} \
        --skip-wav-copy
fi


================================================
FILE: examples/aishell3/vits/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=0
stop_stage=0

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize.py \
        --config=${config_path} \
        --ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test
fi


================================================
FILE: examples/aishell3/vits/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
add_blank=$4

stage=0
stop_stage=0


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize_e2e.py \
        --am=vits_aishell3 \
        --config=${config_path} \
        --ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0 \
        --output_dir=${train_output_path}/test_e2e \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --add-blank=${add_blank}
fi


================================================
FILE: examples/aishell3/vits/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

# install monotonic_align
cd ${MAIN_ROOT}/paddlespeech/t2s/models/vits/monotonic_align
python3 setup.py build_ext --inplace
cd -

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=4 \
    --phones-dict=dump/phone_id_map.txt \
    --speaker-dict=dump/speaker_id_map.txt


================================================
FILE: examples/aishell3/vits/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=vits
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}

================================================
FILE: examples/aishell3/vits/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1,2,3
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_153.pdz
add_blank=true

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} ${add_blank}|| exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} ${add_blank}|| exit -1
fi


================================================
FILE: examples/aishell3/vits-vc/README.md
================================================
# VITS with AISHELL-3
This example contains code used to train a [VITS](https://arxiv.org/abs/2106.06103) model with [AISHELL-3](http://www.aishelltech.com/aishell_3). The trained model can be used in Voice Cloning Task, We refer to the model structure of  [Transfer Learning from Speaker Veriﬁcation to Multispeaker Text-To-Speech Synthesis](https://arxiv.org/pdf/1806.04558.pdf). The general steps are as follows:
1. Speaker Encoder: We use Speaker Verification to train a speaker encoder. Datasets used in this task are different from those used in `VITS` because the transcriptions are not needed, we use more datasets, refer to  [ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e).
2. Synthesizer and Vocoder: We use the trained speaker encoder to generate speaker embedding for each sentence in AISHELL-3. This embedding is an extra input of `VITS` which will be concated with encoder outputs. The vocoder is part of `VITS` due to its special structure.

## Dataset
### Download and Extract
Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.

### Get MFA Result and Extract
We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for VITS, the durations of MFA are not needed here.
You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Pretrained GE2E Model
We use pretrained GE2E model to generate speaker embedding for each sentence.

Download pretrained GE2E model from here [ge2e_ckpt_0.3.zip](https://bj.bcebos.com/paddlespeech/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip), and `unzip` it.

## Get Started
Assume the path to the dataset is `~/datasets/data_aishell3`.
Assume the path to the MFA result of AISHELL-3 is `./aishell3_alignment_tone`.
Assume the path to the pretrained ge2e model is `./ge2e_ckpt_0.3`.

Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize waveform from `metadata.jsonl`.
5. start a voice cloning inference.

```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```

### Data Preprocessing
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/preprocess.sh ${conf_path} ${ge2e_ckpt_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── embed
│   ├── SSB0005
│   ├── SSB0009
│   ├── ...
│   └── ...
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── feats_stats.npy
    ├── norm
    └── raw
```
The `embed` contains the generated speaker embedding for each sentence in AISHELL-3, which has the same file structure with wav files and the format is  `.npy`.

The computing time of utterance embedding can be x hours.

The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains wave and linear spectrogram of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, feats, feats_lengths, the path of linear spectrogram features, the path of raw waves, speaker, and the id of each utterance.

The preprocessing step is very similar to that one of [vits](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vits), but there is one more `ge2e/inference` step here.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
The training step is very similar to that one of [vits](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vits), but we should set `--voice-cloning=True` when calling `${BIN_DIR}/train.py`.

### Synthesizing

`./local/synthesize.sh` calls `${BIN_DIR}/synthesize.py`, which can synthesize waveform from `metadata.jsonl`.

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--config CONFIG] [--ckpt CKPT]
                     [--phones_dict PHONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with VITS

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       Config of VITS.
  --ckpt CKPT           Checkpoint file of VITS.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
The synthesizing step is very similar to that one of [vits](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vits), but we should set `--voice-cloning=True` when calling `${BIN_DIR}/../synthesize.py`.

### Voice Cloning
Assume there are some  reference audios in `./ref_audio`
```text
ref_audio
├── 001238.wav
├── LJ015-0254.wav
└── audio_self_test.mp3
```
`./local/voice_cloning.sh` calls `${BIN_DIR}/voice_cloning.py`

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} ${ge2e_params_path} ${add_blank} ${ref_audio_dir}
```

If you want to convert a speaker audio file to refered speaker, run:

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} ${ge2e_params_path} ${add_blank} ${ref_audio_dir} ${src_audio_path}
```

<!-- TODO display these after we trained the model -->
<!-- 
## Pretrained Model

The pretrained model can be downloaded here:

- [vits_vc_aishell3_ckpt_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/vits/vits_vc_aishell3_ckpt_1.1.0.zip) (add_blank=true)

VITS checkpoint contains files listed below.
(There is no need for `speaker_id_map.txt` here )

```text
vits_vc_aishell3_ckpt_1.1.0
├── default.yaml              # default config used to train vitx
├── phone_id_map.txt          # phone vocabulary file when training vits
└── snapshot_iter_333000.pdz  # model parameters and optimizer states
```

ps: This ckpt is not good enough, a better result is training

-->


================================================
FILE: examples/aishell3/vits-vc/conf/default.yaml
================================================
# This configuration tested on 4 GPUs (V100) with 32GB GPU
# memory. It takes around 2 weeks to finish the training
# but 100k iters model should generate reasonable results.
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 22050         # sr
n_fft: 1024        # FFT size (samples).
n_shift: 256       # Hop size (samples). 12.5ms
win_length: null   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.


##########################################################
#                  TTS MODEL SETTING                     #
##########################################################
model:
    # generator related
    generator_type: vits_generator
    generator_params:
        hidden_channels: 192
        spk_embed_dim: 256
        global_channels: 256
        segment_size: 32
        text_encoder_attention_heads: 2
        text_encoder_ffn_expand: 4
        text_encoder_blocks: 6
        text_encoder_positionwise_layer_type: "conv1d"
        text_encoder_positionwise_conv_kernel_size: 3
        text_encoder_positional_encoding_layer_type: "rel_pos"
        text_encoder_self_attention_layer_type: "rel_selfattn"
        text_encoder_activation_type: "swish"
        text_encoder_normalize_before: True
        text_encoder_dropout_rate: 0.1
        text_encoder_positional_dropout_rate: 0.0
        text_encoder_attention_dropout_rate: 0.1
        use_macaron_style_in_text_encoder: True
        use_conformer_conv_in_text_encoder: False
        text_encoder_conformer_kernel_size: -1
        decoder_kernel_size: 7
        decoder_channels: 512
        decoder_upsample_scales: [8, 8, 2, 2]
        decoder_upsample_kernel_sizes: [16, 16, 4, 4]
        decoder_resblock_kernel_sizes: [3, 7, 11]
        decoder_resblock_dilations: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
        use_weight_norm_in_decoder: True
        posterior_encoder_kernel_size: 5
        posterior_encoder_layers: 16
        posterior_encoder_stacks: 1
        posterior_encoder_base_dilation: 1
        posterior_encoder_dropout_rate: 0.0
        use_weight_norm_in_posterior_encoder: True
        flow_flows: 4
        flow_kernel_size: 5
        flow_base_dilation: 1
        flow_layers: 4
        flow_dropout_rate: 0.0
        use_weight_norm_in_flow: True
        use_only_mean_in_flow: True
        stochastic_duration_predictor_kernel_size: 3
        stochastic_duration_predictor_dropout_rate: 0.5
        stochastic_duration_predictor_flows: 4
        stochastic_duration_predictor_dds_conv_layers: 3
    # discriminator related
    discriminator_type: hifigan_multi_scale_multi_period_discriminator
    discriminator_params:
        scales: 1
        scale_downsample_pooling: "AvgPool1D"
        scale_downsample_pooling_params:
            kernel_size: 4
            stride: 2
            padding: 2
        scale_discriminator_params:
            in_channels: 1
            out_channels: 1
            kernel_sizes: [15, 41, 5, 3]
            channels: 128
            max_downsample_channels: 1024
            max_groups: 16
            bias: True
            downsample_scales: [2, 2, 4, 4, 1]
            nonlinear_activation: "leakyrelu"
            nonlinear_activation_params:
                negative_slope: 0.1
            use_weight_norm: True
            use_spectral_norm: False
        follow_official_norm: False
        periods: [2, 3, 5, 7, 11]
        period_discriminator_params:
            in_channels: 1
            out_channels: 1
            kernel_sizes: [5, 3]
            channels: 32
            downsample_scales: [3, 3, 3, 3, 1]
            max_downsample_channels: 1024
            bias: True
            nonlinear_activation: "leakyrelu"
            nonlinear_activation_params:
                negative_slope: 0.1
            use_weight_norm: True
            use_spectral_norm: False
    # others
    sampling_rate: 22050          # needed in the inference for saving wav
    cache_generator_outputs: True # whether to cache generator outputs in the training
          
###########################################################
#                        LOSS SETTING                     #
###########################################################
# loss function related
generator_adv_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    loss_type: mse                   # loss type, "mse" or "hinge"
discriminator_adv_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    loss_type: mse                   # loss type, "mse" or "hinge"
feat_match_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    average_by_layers: False         # whether to average loss value by #layers of each discriminator
    include_final_outputs: True      # whether to include final outputs for loss calculation
mel_loss_params:
    fs: 22050          # must be the same as the training data
    fft_size: 1024        # fft points
    hop_size: 256    # hop size
    win_length: null   # window length
    window: hann       # window type
    num_mels: 80         # number of Mel basis
    fmin: 0            # minimum frequency for Mel basis
    fmax: null         # maximum frequency for Mel basis
    log_base: null     # null represent natural log

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_adv: 1.0        # loss scaling coefficient for adversarial loss
lambda_mel: 45.0       # loss scaling coefficient for Mel loss
lambda_feat_match: 2.0 # loss scaling coefficient for feat match loss
lambda_dur: 1.0        # loss scaling coefficient for duration loss
lambda_kl: 1.0         # loss scaling coefficient for KL divergence loss
# others
sampling_rate: 22050          # needed in the inference for saving wav
cache_generator_outputs: True # whether to cache generator outputs in the training


###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 50              # Batch size.
num_workers: 4              # Number of workers in DataLoader.

##########################################################
#            OPTIMIZER & SCHEDULER SETTING               #
##########################################################
# optimizer setting for generator
generator_optimizer_params:
    beta1: 0.8
    beta2: 0.99
    epsilon: 1.0e-9
    weight_decay: 0.0
generator_scheduler: exponential_decay
generator_scheduler_params:
    learning_rate: 2.0e-4
    gamma: 0.999875                   

# optimizer setting for discriminator
discriminator_optimizer_params:
    beta1: 0.8
    beta2: 0.99
    epsilon: 1.0e-9
    weight_decay: 0.0
discriminator_scheduler: exponential_decay
discriminator_scheduler_params:
    learning_rate: 2.0e-4          
    gamma: 0.999875
generator_first: False # whether to start updating generator first

##########################################################
#                OTHER TRAINING SETTING                  #
##########################################################
num_snapshots: 10            # max number of snapshots to keep while training
train_max_steps: 350000      # Number of training steps. == total_iters / ngpus, total_iters = 1000000
save_interval_steps: 1000    # Interval steps to save checkpoint.
eval_interval_steps: 250     # Interval steps to evaluate the network.
seed: 777                    # random seed number


================================================
FILE: examples/aishell3/vits-vc/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1
add_blank=$2
ge2e_ckpt_path=$3

# gen speaker embedding
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${MAIN_ROOT}/paddlespeech/vector/exps/ge2e/inference.py \
        --input=~/datasets/data_aishell3/train/wav/ \
        --output=dump/embed \
        --checkpoint_path=${ge2e_ckpt_path}
fi

# copy from tts3/preprocess
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./aishell3_alignment_tone \
        --output durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=aishell3 \
        --rootdir=~/datasets/data_aishell3/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True \
        --spk_emb_dir=dump/embed
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats"
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --add-blank=${add_blank} \
        --skip-wav-copy

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --add-blank=${add_blank} \
        --skip-wav-copy

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --add-blank=${add_blank} \
        --skip-wav-copy
fi


================================================
FILE: examples/aishell3/vits-vc/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=0
stop_stage=0

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize.py \
        --config=${config_path} \
        --ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --phones_dict=dump/phone_id_map.txt \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --voice-cloning=True
fi


================================================
FILE: examples/aishell3/vits-vc/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

# install monotonic_align
cd ${MAIN_ROOT}/paddlespeech/t2s/models/vits/monotonic_align
python3 setup.py build_ext --inplace
cd -

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=4 \
    --phones-dict=dump/phone_id_map.txt \
    --voice-cloning=True


================================================
FILE: examples/aishell3/vits-vc/local/voice_cloning.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
ge2e_params_path=$4
add_blank=$5
ref_audio_dir=$6
src_audio_path=$7

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/voice_cloning.py \
    --config=${config_path} \
    --ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --ge2e_params_path=${ge2e_params_path} \
    --phones_dict=dump/phone_id_map.txt \
    --text="凯莫瑞安联合体的经济崩溃迫在眉睫。" \
    --audio-path=${src_audio_path} \
    --input-dir=${ref_audio_dir} \
    --output-dir=${train_output_path}/vc_syn \
    --add-blank=${add_blank}


================================================
FILE: examples/aishell3/vits-vc/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=vits
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}

================================================
FILE: examples/aishell3/vits-vc/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1,2,3
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_153.pdz
add_blank=true
ref_audio_dir=ref_audio
src_audio_path=''

# not include ".pdparams" here
ge2e_ckpt_path=./ge2e_ckpt_0.3/step-3000000

# include ".pdparams" here
ge2e_params_path=${ge2e_ckpt_path}.pdparams

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    CUDA_VISIBLE_DEVICES=${gpus} ./local/preprocess.sh ${conf_path} ${add_blank}  ${ge2e_ckpt_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_cloning.sh ${conf_path} ${train_output_path} ${ckpt_name} \
        ${ge2e_params_path} ${add_blank} ${ref_audio_dir} ${src_audio_path} || exit -1
fi


================================================
FILE: examples/aishell3/voc1/README.md
================================================
# Parallel WaveGAN with AISHELL-3
This example contains code used to train a [parallel wavegan](http://arxiv.org/abs/1910.11480) model with [AISHELL-3](http://www.aishelltech.com/aishell_3).

AISHELL-3 is a large-scale and high-fidelity multi-speaker Mandarin speech corpus that could be used to train multi-speaker Text-to-Speech (TTS) systems.
## Dataset
### Download and Extract
Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.

### Get MFA Result and Extract
We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/data_aishell3`.
Assume the path to the MFA result of AISHELL-3 is `./aishell3_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, run the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```

The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--batch-size BATCH_SIZE] [--max-iter MAX_ITER]
                [--run-benchmark RUN_BENCHMARK]
                [--profiler_options PROFILER_OPTIONS]

Train a ParallelWaveGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       ParallelWaveGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.

benchmark:
  arguments related to benchmark.

  --batch-size BATCH_SIZE
                        batch size.
  --max-iter MAX_ITER   train max steps.
  --run-benchmark RUN_BENCHMARK
                        runing benchmark or not, if True, use the --batch-size
                        and --max-iter.
  --profiler_options PROFILER_OPTIONS
                        The option of profiler, which should be in format
                        "key1=value1;key2=value2;key3=value3".
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` parallel wavegan config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory. If you use the pretrained model, use the `snapshot_iter_1000000.pdz `.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Models
Pretrained models can be downloaded here:
- [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip)

The static model can be downloaded here:
- [pwgan_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_1.1.0.zip)

The PIR static model can be downloaded here:
- [pwgan_aishell3_static_pir_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)

The ONNX model can be downloaded here:
- [pwgan_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_onnx_1.1.0.zip)

The Paddle-Lite model can be downloaded here:
- [pwgan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_pdlite_1.3.0.zip)

Model | Step | eval/generator_loss | eval/log_stft_magnitude_loss:| eval/spectral_convergence_loss 
:-------------:| :------------:| :-----: | :-----: | :--------:
default| 1(gpu) x 400000|1.968762|0.759008|0.218524

Parallel WaveGAN checkpoint contains files listed below.

```text
pwg_aishell3_ckpt_0.5
├── default.yaml                   # default config used to train parallel wavegan
├── feats_stats.npy                # statistics used to normalize spectrogram when training parallel wavegan
└── snapshot_iter_1000000.pdz      # generator parameters of parallel wavegan
```
## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/aishell3/voc1/conf/default.yaml
================================================
# This is the hyperparameter configuration file for Parallel WaveGAN.
# Please make sure this is adjusted for the VCTK corpus. If you want to
# apply to the other dataset, you might need to carefully change some parameters.
# This configuration requires 12 GB GPU memory and takes ~3 days on RTX TITAN.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples). 
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 1        # Number of input channels.
    out_channels: 1       # Number of output channels.
    kernel_size: 3        # Kernel size of dilated convolution.
    layers: 30            # Number of residual block layers.
    stacks: 3             # Number of stacks i.e., dilation cycles.
    residual_channels: 64 # Number of channels in residual conv.
    gate_channels: 128    # Number of channels in gated conv.
    skip_channels: 64     # Number of channels in skip conv.
    aux_channels: 80      # Number of channels for auxiliary feature conv.
                          # Must be the same as num_mels.
    aux_context_window: 2 # Context window size for auxiliary feature.
                          # If set to 2, previous 2 and future 2 frames will be considered.
    dropout: 0.0          # Dropout rate. 0.0 means no dropout applied.
    use_weight_norm: True # Whether to use weight norm.
                          # If set to true, it will be applied to all of the conv layers.
    upsample_scales: [4, 5, 3, 5]     # Upsampling scales. prod(upsample_scales) == n_shift

###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    in_channels: 1        # Number of input channels.
    out_channels: 1       # Number of output channels.
    kernel_size: 3        # Number of output channels.
    layers: 10            # Number of conv layers.
    conv_channels: 64     # Number of chnn layers.
    bias: True            # Whether to use bias parameter in conv.
    use_weight_norm: True # Whether to use weight norm.
                          # If set to true, it will be applied to all of the conv layers.
    nonlinear_activation: "leakyrelu" # Nonlinear function after each conv.
    nonlinear_activation_params:      # Nonlinear function parameters
        negative_slope: 0.2           # Alpha in leakyrelu.

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
stft_loss_params:
    fft_sizes: [1024, 2048, 512]  # List of FFT size for STFT-based loss.
    hop_sizes: [120, 240, 50]     # List of hop size for STFT-based loss
    win_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
    window: "hann"                # Window function for STFT-based loss

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_adv: 4.0  # Loss balancing coefficient.

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 8              # Batch size.
batch_max_steps: 24000     # Length of each audio in batch. Make sure dividable by n_shift.
num_workers: 2             # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    epsilon: 1.0e-6            # Generator's epsilon.
    weight_decay: 0.0      # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 0.0001             # Generator's learning rate.
    step_size: 200000      # Generator's scheduler step size.
    gamma: 0.5             # Generator's scheduler gamma.
                           # At each step size, lr will be multiplied by this parameter.
generator_grad_norm: 10    # Generator's gradient norm.
discriminator_optimizer_params:
    epsilon: 1.0e-6            # Discriminator's epsilon.
    weight_decay: 0.0      # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 0.00005            # Discriminator's learning rate. 
    step_size: 200000      # Discriminator's scheduler step size.
    gamma: 0.5             # Discriminator's scheduler gamma.
                           # At each step size, lr will be multiplied by this parameter.
discriminator_grad_norm: 1 # Discriminator's gradient norm.

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
discriminator_train_start_steps: 100000 # Number of steps to start to train discriminator.
train_max_steps: 1000000                # Number of training steps.
save_interval_steps: 5000               # Interval steps to save checkpoint.
eval_interval_steps: 1000               # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_save_intermediate_results: 4  # Number of results to be saved as intermediate results.
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/aishell3/voc1/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./aishell3_alignment_tone \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/../preprocess.py \
        --rootdir=~/datasets/data_aishell3/ \
        --dataset=aishell3 \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --cut-sil=True \
        --num-cpu=20
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize, dev and test should use train's stats
    echo "Normalize ..."
   
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy

    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
    
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
fi


================================================
FILE: examples/aishell3/voc1/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_5000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/aishell3/voc5/README.md
================================================
# HiFiGAN with AISHELL-3
This example contains code used to train a [HiFiGAN](https://arxiv.org/abs/2010.05646) model with [AISHELL-3](http://www.aishelltech.com/aishell_3).

AISHELL-3 is a large-scale and high-fidelity multi-speaker Mandarin speech corpus that could be used to train multi-speaker Text-to-Speech (TTS) systems.
## Dataset
### Download and Extract
Download AISHELL-3 from it's [Official Website](http://www.aishelltech.com/aishell_3) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/data_aishell3`.
### Get MFA Result and Extract
We use [MFA2.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for aishell3_fastspeech2.
You can download from here [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/data_aishell3`.
Assume the path to the MFA result of AISHELL-3 is `./aishell3_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, run the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```

The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU]

Train a HiFiGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       HiFiGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
## Pretrained Models
The pretrained model can be downloaded here:
- [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)

The static model can be downloaded here:
- [hifigan_aishell3_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip)

The PIR static model can be downloaded here:
- [hifigan_aishell3_static_pir_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)

The ONNX model can be downloaded here:
- [hifigan_aishell3_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip)

The Paddle-Lite model can be downloaded here:
- [hifigan_aishell3_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_pdlite_1.3.0.zip)

Model | Step | eval/generator_loss | eval/mel_loss| eval/feature_matching_loss
:-------------:| :------------:| :-----: | :-----: | :--------:
default| 1(gpu) x 2500000|24.060|0.1068|7.499

HiFiGAN checkpoint contains files listed below.

```text
hifigan_aishell3_ckpt_0.2.0
├── default.yaml                  # default config used to train hifigan
├── feats_stats.npy               # statistics used to normalize spectrogram when training hifigan
└── snapshot_iter_2500000.pdz     # generator parameters of hifigan
```

## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/aishell3/voc5/conf/default.yaml
================================================
# This is the configuration file for AISHELL-3 dataset.
# This configuration is based on HiFiGAN V1, which is
# an official configuration. But I found that the optimizer
# setting does not work well with my implementation.
# So I changed optimizer settings as follows:
# - AdamW -> Adam
# - betas: [0.8, 0.99] -> betas: [0.5, 0.9]
# - Scheduler: ExponentialLR -> MultiStepLR
# To match the shift size difference, the upsample scales
# is also modified from the original 256 shift setting.
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 80                       # Number of input channels.
    out_channels: 1                       # Number of output channels.
    channels: 512                         # Number of initial channels.
    kernel_size: 7                        # Kernel size of initial and final conv layers.
    upsample_scales: [5, 5, 4, 3]         # Upsampling scales.
    upsample_kernel_sizes: [10, 10, 8, 6] # Kernel size for upsampling layers.
    resblock_kernel_sizes: [3, 7, 11]     # Kernel size for residual blocks.
    resblock_dilations:                   # Dilations for residual blocks.
        - [1, 3, 5]
        - [1, 3, 5]
        - [1, 3, 5]
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.


###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    scales: 3                              # Number of multi-scale discriminator.
    scale_downsample_pooling: "AvgPool1D"  # Pooling operation for scale discriminator.
    scale_downsample_pooling_params:
        kernel_size: 4                     # Pooling kernel size.
        stride: 2                          # Pooling stride.
        padding: 2                         # Padding size.
    scale_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [15, 41, 5, 3]       # List of kernel sizes.
        channels: 128                      # Initial number of channels.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        max_groups: 16                     # Maximum number of groups in downsampling conv layers.
        bias: True
        downsample_scales: [4, 4, 4, 4, 1] # Downsampling scales.
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:
            negative_slope: 0.1
    follow_official_norm: True             # Whether to follow the official norm setting.
    periods: [2, 3, 5, 7, 11]              # List of period for multi-period discriminator.
    period_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [5, 3]               # List of kernel sizes.
        channels: 32                       # Initial number of channels.
        downsample_scales: [3, 3, 3, 3, 1] # Downsampling scales.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: False                 # Whether to use multi-resolution STFT loss.
use_mel_loss: True                   # Whether to use Mel-spectrogram loss.
mel_loss_params:
    fs: 24000
    fft_size: 2048
    hop_size: 300
    win_length: 1200
    window: "hann"
    num_mels: 80
    fmin: 0
    fmax: 12000
    log_base: null
generator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
discriminator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
use_feat_match_loss: True
feat_match_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
    average_by_layers: False         # Whether to average loss by #layers in each discriminator.
    include_final_outputs: False     # Whether to include final outputs in feat match loss calculation.

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_aux: 45.0       # Loss balancing coefficient for STFT loss.
lambda_adv: 1.0        # Loss balancing coefficient for adversarial loss.
lambda_feat_match: 2.0 # Loss balancing coefficient for feat match loss..

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 16              # Batch size.
batch_max_steps: 8400       # Length of each audio in batch. Make sure dividable by hop_size.
num_workers: 2              # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 2.0e-4               # Generator's learning rate.
    gamma: 0.5                          # Generator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000
generator_grad_norm: -1                 # Generator's gradient norm.
discriminator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 2.0e-4               # Discriminator's learning rate.
    gamma: 0.5                          # Discriminator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000    
discriminator_grad_norm: -1             # Discriminator's gradient norm.            

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
generator_train_start_steps: 1     # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000           # Number of training steps.
save_interval_steps: 5000         # Interval steps to save checkpoint.
eval_interval_steps: 1000          # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/aishell3/voc5/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_5000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/aishell3_vctk/README.md
================================================
# Mixed Chinese and English TTS with AISHELL3 and VCTK datasets
* ernie_sat - ERNIE-SAT


================================================
FILE: examples/aishell3_vctk/ernie_sat/README.md
================================================
# ERNIE-SAT with AISHELL-3 and VCTK dataset
[ERNIE-SAT](https://arxiv.org/abs/2211.03545) speech-text joint pretraining framework, which achieves SOTA results in cross-lingual multi-speaker speech synthesis and cross-lingual speech editing tasks, It can be applied to a series of scenarios such as Speech Editing, personalized Speech Synthesis, and Voice Cloning.

## Model Framework
In ERNIE-SAT, we propose two innovations:
- In the pretraining process, the phonemes corresponding to Chinese and English are used as input to achieve cross-language and personalized soft phoneme mapping
- The joint mask learning of speech and text is used to realize the alignment of speech and text

<p align="center">
    <img src="https://user-images.githubusercontent.com/24568452/186110814-1b9c6618-a0ab-4c0c-bb3d-3d860b0e8cc2.png" />
</p>

## Dataset
### Download and Extract
Download all datasets and extract it to `~/datasets`:
- The aishell3 dataset is in the directory `~/datasets/data_aishell3`
- The vctk dataset is in the directory `~/datasets/VCTK-Corpus-0.92`

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for the fastspeech2 training.
You can download from here:
- [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz) 
- [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz)

Or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Get Started
Assume the paths to the datasets are:
- `~/datasets/data_aishell3` 
- `~/datasets/VCTK-Corpus-0.92`

Assume the path to the MFA results of the datasets are:
- `./aishell3_alignment_tone`
- `./vctk_alignment`

Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.

```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, speaker, and id of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.

### Synthesizing
We use [HiFiGAN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5) as the neural vocoder.

Download pretrained HiFiGAN model from [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) and unzip it.
```bash
unzip hifigan_aishell3_ckpt_0.2.0.zip
```
HiFiGAN checkpoint contains files listed below.
```text
hifigan_aishell3_ckpt_0.2.0
├── default.yaml                    # default config used to train HiFiGAN
├── feats_stats.npy                 # statistics used to normalize spectrogram when training HiFiGAN
└── snapshot_iter_2500000.pdz       # generator parameters of HiFiGAN
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
synthesize, vocoder is `hifigan`

##  Speech Synthesis and Speech Editing
### Prepare

**prepare aligner**
```bash
mkdir -p tools/aligner
cd tools
# download MFA
wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz
# extract MFA
tar xvf montreal-forced-aligner_linux.tar.gz
# fix .so of MFA
cd montreal-forced-aligner/lib
ln -snf libpython3.6m.so.1.0 libpython3.6m.so
cd -
# download align models and dicts
cd aligner
wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/aishell3_model.zip
wget https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/vctk_model.zip
wget https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
cd ../../
```
**prepare pretrained FastSpeech2 models**

ERNIE-SAT use FastSpeech2 as phoneme duration predictor:
```bash
mkdir download
cd download
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip
unzip fastspeech2_conformer_baker_ckpt_0.5.zip
unzip fastspeech2_nosil_ljspeech_ckpt_0.5.zip
cd ../
```
**prepare source data**
```bash
mkdir source
cd source
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540307.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540428.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/LJ050-0278.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p243_313.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p299_096.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/this_was_not_the_show_for_me.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/README.md
cd ../
```
You can check the text of downloaded wavs in `source/README.md`.
### Cross Language Voice Cloning
```bash
./run.sh --stage 3 --stop-stage 3 --gpus 0
```
`stage 3` of `run.sh` calls `local/synthesize_e2e.sh`. `synthesize_e2e.sh` is a script for end-to-end speech synthesis, supporting cross-language speech synthesis tasks, including English-to-Chinese (en → zh) and Chinese-to-English (zh → en).

You can modify  `--wav_path`、`--old_str` and `--new_str` yourself, `--old_str` should be the text corresponding to the audio of  `--wav_path`, `--new_str` should be designed according to `--task_name`, `--source_lang` and `--target_lang` should be different in this example.
## Pretrained Model
Pretrained ErnieSAT model:
- [erniesat_aishell3_vctk_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_aishell3_vctk_ckpt_1.2.0.zip)

Model | Step | eval/text_mlm_loss | eval/mlm_loss | eval/loss
:-------------:| :------------:| :-----: | :-----:| :-----:
default| 8(gpu) x 489000|0.000001|52.477642 |52.477642


================================================
FILE: examples/aishell3_vctk/ernie_sat/conf/default.yaml
================================================
# This configuration tested on 8 GPUs (A100) with 80GB GPU memory.
# It takes around 4 days to finish the training,You can adjust
# batch_size、num_workers here and ngpu in local/train.sh for your machine
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

mean_phn_span: 8
mlm_prob: 0.8

###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 40
num_workers: 8

###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    text_masking: true
    postnet_layers: 5
    postnet_filts: 5
    postnet_chans: 256
    encoder_type: conformer
    decoder_type: conformer
    enc_input_layer: sega_mlm
    enc_pre_speech_layer: 0
    enc_cnn_module_kernel: 7
    enc_attention_dim: 384
    enc_attention_heads: 2
    enc_linear_units: 1536
    enc_num_blocks: 4
    enc_dropout_rate: 0.2
    enc_positional_dropout_rate: 0.2
    enc_attention_dropout_rate: 0.2
    enc_normalize_before: true
    enc_macaron_style: true
    enc_use_cnn_module: true
    enc_selfattention_layer_type: legacy_rel_selfattn
    enc_activation_type: swish
    enc_pos_enc_layer_type: legacy_rel_pos
    enc_positionwise_layer_type: conv1d
    enc_positionwise_conv_kernel_size: 3
    dec_cnn_module_kernel: 31
    dec_attention_dim: 384
    dec_attention_heads: 2
    dec_linear_units: 1536
    dec_num_blocks: 4
    dec_dropout_rate: 0.2
    dec_positional_dropout_rate: 0.2
    dec_attention_dropout_rate: 0.2
    dec_macaron_style: true
    dec_use_cnn_module: true
    dec_selfattention_layer_type: legacy_rel_selfattn
    dec_activation_type: swish
    dec_pos_enc_layer_type: legacy_rel_pos
    dec_positionwise_layer_type: conv1d
    dec_positionwise_conv_kernel_size: 3

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
scheduler_params:
    d_model: 384
    warmup_steps: 4000
grad_clip: 1.0

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 1500
num_snapshots: 50

###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 0

token_list:
- <blank>
- <unk>
- AH0
- T
- N
- sp
- S
- R
- D
- L
- Z
- DH
- IH1
- K
- W
- M
- EH1
- AE1
- ER0
- B
- IY1
- P
- V
- IY0
- F
- HH
- AA1
- AY1
- AH1
- EY1
- IH0
- AO1
- OW1
- UW1
- G
- NG
- SH
- Y
- TH
- ER1
- JH
- UH1
- AW1
- CH
- IH2
- OW0
- OW2
- EY2
- EH2
- UW0
- OY1
- ZH
- EH0
- AY2
- AW2
- AA2
- AE2
- IY2
- AH2
- AE0
- AO2
- AY0
- AO0
- UW2
- UH2
- AA0
- EY0
- AW0
- UH0
- ER2
- OY2
- OY0
- d
- sh
- ii
- j
- zh
- l
- x
- b
- g
- uu
- e5
- h
- q
- m
- i1
- t
- z
- ch
- f
- s
- u4
- ix4
- i4
- n
- i3
- iu3
- vv
- ian4
- ix2
- r
- e4
- ai4
- k
- ing2
- a1
- en2
- ui4
- ong1
- uo3
- u2
- u3
- ao4
- ee
- p
- an1
- eng2
- i2
- in1
- c
- ai2
- ian2
- e2
- an4
- ing4
- v4
- ai3
- a5
- ian3
- eng1
- ong4
- ang4
- ian1
- ing1
- iy4
- ao3
- ang1
- uo4
- u1
- iao4
- iu4
- a4
- van2
- ie4
- ang2
- ou4
- iang4
- ix1
- er4
- iy1
- e1
- en1
- ui2
- an3
- ei4
- ong2
- uo1
- ou3
- uo2
- iao1
- ou1
- an2
- uan4
- ia4
- ia1
- ang3
- v3
- iu2
- iao3
- in4
- a3
- ei3
- iang3
- v2
- eng4
- en3
- aa
- uan1
- v1
- ao1
- ve4
- ie3
- ai1
- ing3
- iang1
- a2
- ui1
- en4
- en5
- in3
- uan3
- e3
- ie1
- ve2
- ei2
- in2
- ix3
- uan2
- iang2
- ie2
- ua4
- ou2
- uai4
- er2
- eng3
- uang3
- un1
- ong3
- uang4
- vn4
- un2
- iy3
- iz4
- ui3
- iao2
- iong4
- un4
- van4
- ao2
- uang1
- iy5
- o2
- ei1
- ua1
- iu1
- uang2
- er5
- o1
- un3
- vn1
- vn2
- o4
- ve1
- van3
- ua2
- er3
- iong3
- van1
- ia2
- iy2
- ia3
- iong1
- uo5
- oo
- ve3
- ou5
- uai3
- ian5
- iong2
- uai2
- uai1
- ua3
- vn3
- ia5
- ie5
- ueng1
- o5
- o3
- iang5
- ei5
- <sos/eos>


================================================
FILE: examples/aishell3_vctk/ernie_sat/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results for aishell3 ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./aishell3_alignment_tone \
        --output durations_aishell3.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results for vctk ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./vctk_alignment \
        --output durations_vctk.txt \
        --config=${config_path}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get durations from MFA's result
    echo "concat durations_aishell3.txt and durations_vctk.txt to durations.txt"
    cat durations_aishell3.txt durations_vctk.txt > durations.txt
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=aishell3 \
        --rootdir=~/datasets/data_aishell3/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=vctk \
        --rootdir=~/datasets/VCTK-Corpus-0.92/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"
fi

if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/aishell3_vctk/ernie_sat/local/synthesize_e2e.sh
================================================
# not ready yet
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    echo 'speech cross language from en to zh !'
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize_e2e.py \
        --task_name=synthesize \
        --wav_path=source/p243_313.wav \
        --old_str='For that reason cover should not be given' \
        --new_str='今天天气很好' \
        --source_lang=en \
        --target_lang=zh \
        --erniesat_config=${config_path} \
        --phones_dict=dump/phone_id_map.txt \
        --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --erniesat_stat=dump/train/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --output_name=exp/pred_clone_en_zh.wav
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo 'speech cross language from zh to en !'
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize_e2e.py \
        --task_name=synthesize \
        --wav_path=source/SSB03540307.wav \
        --old_str='请播放歌曲小苹果' \
        --new_str="Thank you" \
        --source_lang=zh \
        --target_lang=en \
        --erniesat_config=${config_path} \
        --phones_dict=dump/phone_id_map.txt \
        --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --erniesat_stat=dump/train/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --output_name=exp/pred_clone_zh_en.wav
fi


================================================
FILE: examples/aishell3_vctk/ernie_sat/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1,2,3,4,5,6,7
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_489000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is hifigan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, run both speech synthesis from Chinese to English and English to Chinese
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/ami/README.md
================================================
# Speaker Diarization on AMI corpus

* sd0 - speaker diarization by AHC,SC base on embeddings


================================================
FILE: examples/ami/sd0/.gitignore
================================================
results

================================================
FILE: examples/ami/sd0/README.md
================================================
# Speaker Diarization on AMI corpus

## About the AMI corpus:
"The AMI Meeting Corpus consists of 100 hours of meeting recordings. The recordings use a range of signals synchronized to a common timeline. These include close-talking and far-field microphones, individual and room-view video cameras, and output from a slide projector and an electronic whiteboard. During the meetings, the participants also have unsynchronized pens available to them that record what is written. The meetings were recorded in English using three different rooms with different acoustic properties, and include mostly non-native speakers." See [ami overview](http://groups.inf.ed.ac.uk/ami/corpus/overview.shtml) for more details.

## About the example
The script performs diarization using x-vectors(TDNN,ECAPA-TDNN) on the AMI mix-headset data. We demonstrate the use of different clustering methods: AHC, spectral.

## How to Run
### prepare annotations and audios
Download AMI corpus, You need around 10GB of free space to get whole data
The signals are too large to package in this way, so you need to use the chooser to indicate which ones you wish to download

```bash
## download  annotations
wget http://groups.inf.ed.ac.uk/ami/AMICorpusAnnotations/ami_public_manual_1.6.2.zip && unzip ami_public_manual_1.6.2.zip
```

then please follow https://groups.inf.ed.ac.uk/ami/download/ to download the Signals:
1) Select one or more AMI meetings: the IDs please follow ./ami_split.py
2) Select media streams: Just select Headset mix

### start running
Use the following command to run diarization on AMI corpus.
```bash
./run.sh  --data_folder ./amicorpus  --manual_annot_folder ./ami_public_manual_1.6.2
```

## Best performance in terms of Diarization Error Rate (DER).
  | System | Mic. |Orcl. (Dev)|Orcl. (Eval)| Est. (Dev) |Est. (Eval)|
  | --------|-------- | ---------|----------- | --------|-----------|
  | ECAPA-TDNN + SC  | HeadsetMix| 1.54 % | 3.07 %| 1.56 %| 3.28 %  |


================================================
FILE: examples/ami/sd0/conf/ecapa_tdnn.yaml
================================================
###########################################################
#                AMI DATA PREPARE SETTING               #
###########################################################
split_type: 'full_corpus_asr'
skip_TNO: True
# Options for mic_type: 'Mix-Lapel', 'Mix-Headset', 'Array1', 'Array1-01', 'BeamformIt'
mic_type: 'Mix-Headset'
vad_type: 'oracle'
max_subseg_dur: 3.0
overlap: 1.5
# Some more exp folders (for cleaner structure).
embedding_dir: emb #!ref <save_folder>/emb
meta_data_dir: metadata #!ref <save_folder>/metadata
ref_rttm_dir: ref_rttms #!ref <save_folder>/ref_rttms
sys_rttm_dir: sys_rttms #!ref <save_folder>/sys_rttms
der_dir: DER #!ref <save_folder>/DER


###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
# currently, we only support fbank
sr: 16000           # sample rate
n_mels: 80
window_size: 400     #25ms, sample rate 16000, 25 * 16000 / 1000 = 400 
hop_size: 160        #10ms, sample rate 16000, 10 * 16000 / 1000 = 160
#left_frames: 0
#right_frames: 0
#deltas: False


###########################################################
#                       MODEL SETTING                     #
###########################################################
# currently, we only support ecapa-tdnn in the ecapa_tdnn.yaml
# if we want use another model, please choose another configuration yaml file
seed: 1234
emb_dim: 192
batch_size: 16
model:
  input_size: 80
  channels: [1024, 1024, 1024, 1024, 3072]
  kernel_sizes: [5, 3, 3, 3, 1]
  dilations: [1, 2, 3, 4, 1]
  attention_channels: 128
  lin_neurons: 192
# Will automatically download ECAPA-TDNN model (best).

###########################################################
#               SPECTRAL CLUSTERING SETTING               #
###########################################################
backend: 'SC' # options: 'kmeans' # Note: kmeans goes only with cos affinity
affinity: 'cos'  # options: cos, nn
max_num_spkrs: 10
oracle_n_spkrs: True


###########################################################
#                  DER EVALUATION SETTING                 #
###########################################################
ignore_overlap: True
forgiveness_collar: 0.25


================================================
FILE: examples/ami/sd0/local/ami_prepare.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Data preparation.

Download: http://groups.inf.ed.ac.uk/ami/download/

Prepares metadata files (JSON) from manual annotations "segments/" using RTTM format (Oracle VAD).
"""
import argparse
import glob
import json
import logging
import os
import xml.etree.ElementTree as et

from ami_splits import get_AMI_split
from dataio import load_pkl
from dataio import save_pkl

from paddlespeech.utils.argparse import strtobool

logger = logging.getLogger(__name__)
SAMPLERATE = 16000


def prepare_ami(
        data_folder,
        manual_annot_folder,
        save_folder,
        ref_rttm_dir,
        meta_data_dir,
        split_type="full_corpus_asr",
        skip_TNO=True,
        mic_type="Mix-Headset",
        vad_type="oracle",
        max_subseg_dur=3.0,
        overlap=1.5, ):
    """
    Prepares reference RTTM and JSON files for the AMI dataset.

    Arguments
    ---------
    data_folder : str
        Path to the folder where the original amicorpus is stored.
    manual_annot_folder : str
        Directory where the manual annotations are stored.
    save_folder : str
        The save directory in results.
    ref_rttm_dir : str
        Directory to store reference RTTM files.
    meta_data_dir : str
        Directory to store the meta data (json) files.
    split_type : str
        Standard dataset split. See ami_splits.py for more information.
        Allowed split_type: "scenario_only", "full_corpus" or "full_corpus_asr"
    skip_TNO: bool
        Skips TNO meeting recordings if True.
    mic_type : str
        Type of microphone to be used.
    vad_type : str
        Type of VAD. Kept for future when VAD will be added.
    max_subseg_dur : float
        Duration in seconds of a subsegments to be prepared from larger segments.
    overlap : float
        Overlap duration in seconds between adjacent subsegments

    Example
    -------
    >>> from dataset.ami.ami_prepare import prepare_ami
    >>> data_folder = '/home/data/ami/amicorpus/'
    >>> manual_annot_folder = '/home/data/ami/ami_public_manual/'
    >>> save_folder = './results/
    >>> split_type = 'full_corpus_asr'
    >>> mic_type = 'Mix-Headset'
    >>> prepare_ami(data_folder, manual_annot_folder, save_folder, split_type, mic_type)
    """

    # Meta files
    meta_files = [
        os.path.join(meta_data_dir, "ami_train." + mic_type + ".subsegs.json"),
        os.path.join(meta_data_dir, "ami_dev." + mic_type + ".subsegs.json"),
        os.path.join(meta_data_dir, "ami_eval." + mic_type + ".subsegs.json"),
    ]

    # Create configuration for easily skipping data_preparation stage
    conf = {
        "data_folder": data_folder,
        "save_folder": save_folder,
        "ref_rttm_dir": ref_rttm_dir,
        "meta_data_dir": meta_data_dir,
        "split_type": split_type,
        "skip_TNO": skip_TNO,
        "mic_type": mic_type,
        "vad": vad_type,
        "max_subseg_dur": max_subseg_dur,
        "overlap": overlap,
        "meta_files": meta_files,
    }

    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    # Setting output option files.
    opt_file = "opt_ami_prepare." + mic_type + ".pkl"

    # Check if this phase is already done (if so, skip it)
    if skip(save_folder, conf, meta_files, opt_file):
        logger.info(
            "Skipping data preparation, as it was completed in previous run.")
        return

    msg = "\tCreating meta-data file for the AMI Dataset.."
    logger.debug(msg)

    # Get the split
    train_set, dev_set, eval_set = get_AMI_split(split_type)

    # Prepare RTTM from XML(manual annot) and store are groundtruth
    # Create ref_RTTM directory
    if not os.path.exists(ref_rttm_dir):
        os.makedirs(ref_rttm_dir)

    # Create reference RTTM files
    splits = ["train", "dev", "eval"]
    for i in splits:
        rttm_file = ref_rttm_dir + "/fullref_ami_" + i + ".rttm"
        if i == "train":
            prepare_segs_for_RTTM(
                train_set,
                rttm_file,
                data_folder,
                manual_annot_folder,
                i,
                skip_TNO, )
        if i == "dev":
            prepare_segs_for_RTTM(
                dev_set,
                rttm_file,
                data_folder,
                manual_annot_folder,
                i,
                skip_TNO, )
        if i == "eval":
            prepare_segs_for_RTTM(
                eval_set,
                rttm_file,
                data_folder,
                manual_annot_folder,
                i,
                skip_TNO, )

    # Create meta_files for splits
    meta_data_dir = meta_data_dir
    if not os.path.exists(meta_data_dir):
        os.makedirs(meta_data_dir)

    for i in splits:
        rttm_file = ref_rttm_dir + "/fullref_ami_" + i + ".rttm"
        meta_filename_prefix = "ami_" + i
        prepare_metadata(
            rttm_file,
            meta_data_dir,
            data_folder,
            meta_filename_prefix,
            max_subseg_dur,
            overlap,
            mic_type, )

    save_opt_file = os.path.join(save_folder, opt_file)
    save_pkl(conf, save_opt_file)


def get_RTTM_per_rec(segs, spkrs_list, rec_id):
    """Prepares rttm for each recording
    """

    rttm = []

    # Prepare header
    for spkr_id in spkrs_list:
        # e.g. SPKR-INFO ES2008c 0 <NA> <NA> <NA> unknown ES2008c.A_PM <NA> <NA>
        line = ("SPKR-INFO " + rec_id + " 0 <NA> <NA> <NA> unknown " + spkr_id +
                " <NA> <NA>")
        rttm.append(line)

    # Append remaining lines
    for row in segs:
        # e.g. SPEAKER ES2008c 0 37.880 0.590 <NA> <NA> ES2008c.A_PM <NA> <NA>

        if float(row[1]) < float(row[0]):
            msg1 = (
                "Possibly Incorrect Annotation Found!! transcriber_start (%s) > transcriber_end (%s)"
                % (row[0], row[1]))
            msg2 = (
                "Excluding this incorrect row from the RTTM : %s, %s, %s, %s" %
                (rec_id, row[0], str(round(float(row[1]) - float(row[0]), 4)),
                 str(row[2]), ))
            logger.info(msg1)
            logger.info(msg2)
            continue

        line = ("SPEAKER " + rec_id + " 0 " + str(round(float(row[0]), 4)) + " "
                + str(round(float(row[1]) - float(row[0]), 4)) + " <NA> <NA> " +
                str(row[2]) + " <NA> <NA>")
        rttm.append(line)

    return rttm


def prepare_segs_for_RTTM(list_ids, out_rttm_file, audio_dir, annot_dir,
                          split_type, skip_TNO):

    RTTM = []  # Stores all RTTMs clubbed together for a given dataset split

    for main_meet_id in list_ids:

        # Skip TNO meetings from dev and eval sets
        if (main_meet_id.startswith("TS") and split_type != "train" and
                skip_TNO is True):
            msg = ("Skipping TNO meeting in AMI " + str(split_type) + " set : "
                   + str(main_meet_id))
            logger.info(msg)
            continue

        list_sessions = glob.glob(audio_dir + "/" + main_meet_id + "*")
        list_sessions.sort()

        for sess in list_sessions:
            rec_id = os.path.basename(sess)
            path = annot_dir + "/segments/" + rec_id
            f = path + ".*.segments.xml"
            list_spkr_xmls = glob.glob(f)
            list_spkr_xmls.sort()  # A, B, C, D, E etc (Speakers)
            segs = []
            spkrs_list = (
                [])  # Since non-scenario recordings contains 3-5 speakers

            for spkr_xml_file in list_spkr_xmls:

                # Speaker ID
                spkr = os.path.basename(spkr_xml_file).split(".")[1]
                spkr_ID = rec_id + "." + spkr
                spkrs_list.append(spkr_ID)

                # Parse xml tree
                tree = et.parse(spkr_xml_file)
                root = tree.getroot()

                # Start, end and speaker_ID from xml file
                segs = segs + [[
                    elem.attrib["transcriber_start"],
                    elem.attrib["transcriber_end"],
                    spkr_ID,
                ] for elem in root.iter("segment")]

            # Sort rows as per the start time (per recording)
            segs.sort(key=lambda x: float(x[0]))

            rttm_per_rec = get_RTTM_per_rec(segs, spkrs_list, rec_id)
            RTTM = RTTM + rttm_per_rec

    # Write one RTTM as groundtruth. For example, "fullref_eval.rttm"
    with open(out_rttm_file, "w") as f:
        for item in RTTM:
            f.write("%s\n" % item)


def is_overlapped(end1, start2):
    """Returns True if the two segments overlap

    Arguments
    ---------
    end1 : float
        End time of the first segment.
    start2 : float
        Start time of the second segment.
    """

    if start2 > end1:
        return False
    else:
        return True


def merge_rttm_intervals(rttm_segs):
    """Merges adjacent segments in rttm if they overlap.
    """
    # For one recording
    # rec_id = rttm_segs[0][1]
    rttm_segs.sort(key=lambda x: float(x[3]))

    # first_seg = rttm_segs[0] # first interval.. as it is
    merged_segs = [rttm_segs[0]]
    strt = float(rttm_segs[0][3])
    end = float(rttm_segs[0][3]) + float(rttm_segs[0][4])

    for row in rttm_segs[1:]:
        s = float(row[3])
        e = float(row[3]) + float(row[4])

        if is_overlapped(end, s):
            # Update only end. The strt will be same as in last segment
            # Just update last row in the merged_segs
            end = max(end, e)
            merged_segs[-1][3] = str(round(strt, 4))
            merged_segs[-1][4] = str(round((end - strt), 4))
            merged_segs[-1][7] = "overlap"  # previous_row[7] + '-'+ row[7]
        else:
            # Add a new disjoint segment
            strt = s
            end = e
            merged_segs.append(row)  # this will have 1 spkr ID

    return merged_segs


def get_subsegments(merged_segs, max_subseg_dur=3.0, overlap=1.5):
    """Divides bigger segments into smaller sub-segments
    """

    shift = max_subseg_dur - overlap
    subsegments = []

    # These rows are in RTTM format
    for row in merged_segs:
        seg_dur = float(row[4])
        rec_id = row[1]

        if seg_dur > max_subseg_dur:
            num_subsegs = int(seg_dur / shift)
            # Taking 0.01 sec as small step
            seg_start = float(row[3])
            seg_end = seg_start + seg_dur

            # Now divide this segment (new_row) in smaller subsegments
            for i in range(num_subsegs):
                subseg_start = seg_start + i * shift
                subseg_end = min(subseg_start + max_subseg_dur - 0.01, seg_end)
                subseg_dur = subseg_end - subseg_start

                new_row = [
                    "SPEAKER",
                    rec_id,
                    "0",
                    str(round(float(subseg_start), 4)),
                    str(round(float(subseg_dur), 4)),
                    "<NA>",
                    "<NA>",
                    row[7],
                    "<NA>",
                    "<NA>",
                ]

                subsegments.append(new_row)

                # Break if exceeding the boundary
                if subseg_end >= seg_end:
                    break
        else:
            subsegments.append(row)

    return subsegments


def prepare_metadata(rttm_file, save_dir, data_dir, filename, max_subseg_dur,
                     overlap, mic_type):
    # Read RTTM, get unique meeting_IDs (from RTTM headers)
    # For each MeetingID. select that meetID -> merge -> subsegment -> json -> append

    # Read RTTM
    RTTM = []
    with open(rttm_file, "r") as f:
        for line in f:
            entry = line[:-1]
            RTTM.append(entry)

    spkr_info = filter(lambda x: x.startswith("SPKR-INFO"), RTTM)
    rec_ids = list(set([row.split(" ")[1] for row in spkr_info]))
    rec_ids.sort()  # sorting just to make JSON look in proper sequence

    # For each recording merge segments and then perform subsegmentation
    MERGED_SEGMENTS = []
    SUBSEGMENTS = []
    for rec_id in rec_ids:
        segs_iter = filter(lambda x: x.startswith("SPEAKER " + str(rec_id)),
                           RTTM)
        gt_rttm_segs = [row.split(" ") for row in segs_iter]

        # Merge, subsegment and then convert to json format.
        merged_segs = merge_rttm_intervals(
            gt_rttm_segs)  # We lose speaker_ID after merging
        MERGED_SEGMENTS = MERGED_SEGMENTS + merged_segs

        # Divide segments into smaller sub-segments
        subsegs = get_subsegments(merged_segs, max_subseg_dur, overlap)
        SUBSEGMENTS = SUBSEGMENTS + subsegs

    # Write segment AND sub-segments (in RTTM format)
    segs_file = save_dir + "/" + filename + ".segments.rttm"
    subsegment_file = save_dir + "/" + filename + ".subsegments.rttm"

    with open(segs_file, "w") as f:
        for row in MERGED_SEGMENTS:
            line_str = " ".join(row)
            f.write("%s\n" % line_str)

    with open(subsegment_file, "w") as f:
        for row in SUBSEGMENTS:
            line_str = " ".join(row)
            f.write("%s\n" % line_str)

    # Create JSON from subsegments
    json_dict = {}
    for row in SUBSEGMENTS:
        rec_id = row[1]
        strt = str(round(float(row[3]), 4))
        end = str(round((float(row[3]) + float(row[4])), 4))
        subsegment_ID = rec_id + "_" + strt + "_" + end
        dur = row[4]
        start_sample = int(float(strt) * SAMPLERATE)
        end_sample = int(float(end) * SAMPLERATE)

        # If multi-mic audio is selected
        if mic_type == "Array1":
            wav_file_base_path = (data_dir + "/" + rec_id + "/audio/" + rec_id +
                                  "." + mic_type + "-")

            f = []  # adding all 8 mics
            for i in range(8):
                f.append(wav_file_base_path + str(i + 1).zfill(2) + ".wav")
            audio_files_path_list = f

            # Note: key "files" with 's' is used for multi-mic
            json_dict[subsegment_ID] = {
                "wav": {
                    "files": audio_files_path_list,
                    "duration": float(dur),
                    "start": int(start_sample),
                    "stop": int(end_sample),
                },
            }
        else:
            # Single mic audio
            wav_file_path = (data_dir + "/" + rec_id + "/audio/" + rec_id + "."
                             + mic_type + ".wav")

            # Note: key "file" without 's' is used for single-mic
            json_dict[subsegment_ID] = {
                "wav": {
                    "file": wav_file_path,
                    "duration": float(dur),
                    "start": int(start_sample),
                    "stop": int(end_sample),
                },
            }

    out_json_file = save_dir + "/" + filename + "." + mic_type + ".subsegs.json"
    with open(out_json_file, mode="w") as json_f:
        json.dump(json_dict, json_f, indent=2)

    msg = "%s JSON prepared" % (out_json_file)
    logger.debug(msg)


def skip(save_folder, conf, meta_files, opt_file):
    """
    Detects if the AMI data_preparation has been already done.
    If the preparation has been done, we can skip it.

    Returns
    -------
    bool
        if True, the preparation phase can be skipped.
        if False, it must be done.
    """
    # Checking if meta (json) files are available
    skip = True
    for file_path in meta_files:
        if not os.path.isfile(file_path):
            skip = False

    # Checking saved options
    save_opt_file = os.path.join(save_folder, opt_file)
    if skip is True:
        if os.path.isfile(save_opt_file):
            opts_old = load_pkl(save_opt_file)
            if opts_old == conf:
                skip = True
            else:
                skip = False
        else:
            skip = False

    return skip


if __name__ == '__main__':

    parser = argparse.ArgumentParser(
        prog='python ami_prepare.py  --data_folder /home/data/ami/amicorpus \
            --manual_annot_folder /home/data/ami/ami_public_manual_1.6.2 \
            --save_folder ./results/ --ref_rttm_dir ./results/ref_rttms \
            --meta_data_dir ./results/metadata',
        description='AMI Data preparation')
    parser.add_argument(
        '--data_folder',
        required=True,
        help='Path to the folder where the original amicorpus is stored')
    parser.add_argument(
        '--manual_annot_folder',
        required=True,
        help='Directory where the manual annotations are stored')
    parser.add_argument(
        '--save_folder', required=True, help='The save directory in results')
    parser.add_argument(
        '--ref_rttm_dir',
        required=True,
        help='Directory to store reference RTTM files')
    parser.add_argument(
        '--meta_data_dir',
        required=True,
        help='Directory to store the meta data (json) files')
    parser.add_argument(
        '--split_type',
        default="full_corpus_asr",
        help='Standard dataset split. See ami_splits.py for more information')
    parser.add_argument(
        '--skip_TNO',
        default=True,
        type=strtobool,
        help='Skips TNO meeting recordings if True')
    parser.add_argument(
        '--mic_type',
        default="Mix-Headset",
        help='Type of microphone to be used')
    parser.add_argument(
        '--vad_type',
        default="oracle",
        help='Type of VAD. Kept for future when VAD will be added')
    parser.add_argument(
        '--max_subseg_dur',
        default=3.0,
        type=float,
        help='Duration in seconds of a subsegments to be prepared from larger segments'
    )
    parser.add_argument(
        '--overlap',
        default=1.5,
        type=float,
        help='Overlap duration in seconds between adjacent subsegments')

    args = parser.parse_args()

    prepare_ami(args.data_folder, args.manual_annot_folder, args.save_folder,
                args.ref_rttm_dir, args.meta_data_dir)


================================================
FILE: examples/ami/sd0/local/ami_splits.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
AMI corpus contained 100 hours of meeting recording.
This script returns the standard train, dev and eval split for AMI corpus.
For more information on dataset please refer to http://groups.inf.ed.ac.uk/ami/corpus/datasets.shtml
"""

ALLOWED_OPTIONS = ["scenario_only", "full_corpus", "full_corpus_asr"]


def get_AMI_split(split_option):
    """
    Prepares train, dev, and test sets for given split_option

    Arguments
    ---------
    split_option: str
        The standard split option.
        Allowed options: "scenario_only", "full_corpus", "full_corpus_asr"

    Returns
    -------
        Meeting IDs for train, dev, and test sets for given split_option
    """

    if split_option not in ALLOWED_OPTIONS:
        print(
            f'Invalid split "{split_option}" requested!\nValid split_options are: ',
            ALLOWED_OPTIONS, )
        return

    if split_option == "scenario_only":

        train_set = [
            "ES2002",
            "ES2005",
            "ES2006",
            "ES2007",
            "ES2008",
            "ES2009",
            "ES2010",
            "ES2012",
            "ES2013",
            "ES2015",
            "ES2016",
            "IS1000",
            "IS1001",
            "IS1002",
            "IS1003",
            "IS1004",
            "IS1005",
            "IS1006",
            "IS1007",
            "TS3005",
            "TS3008",
            "TS3009",
            "TS3010",
            "TS3011",
            "TS3012",
        ]

        dev_set = [
            "ES2003",
            "ES2011",
            "IS1008",
            "TS3004",
            "TS3006",
        ]

        test_set = [
            "ES2004",
            "ES2014",
            "IS1009",
            "TS3003",
            "TS3007",
        ]

    if split_option == "full_corpus":
        # List of train: SA (TRAINING PART OF SEEN DATA)
        train_set = [
            "ES2002",
            "ES2005",
            "ES2006",
            "ES2007",
            "ES2008",
            "ES2009",
            "ES2010",
            "ES2012",
            "ES2013",
            "ES2015",
            "ES2016",
            "IS1000",
            "IS1001",
            "IS1002",
            "IS1003",
            "IS1004",
            "IS1005",
            "IS1006",
            "IS1007",
            "TS3005",
            "TS3008",
            "TS3009",
            "TS3010",
            "TS3011",
            "TS3012",
            "EN2001",
            "EN2003",
            "EN2004",
            "EN2005",
            "EN2006",
            "EN2009",
            "IN1001",
            "IN1002",
            "IN1005",
            "IN1007",
            "IN1008",
            "IN1009",
            "IN1012",
            "IN1013",
            "IN1014",
            "IN1016",
        ]

        # List of dev: SB (DEV PART OF SEEN DATA)
        dev_set = [
            "ES2003",
            "ES2011",
            "IS1008",
            "TS3004",
            "TS3006",
            "IB4001",
            "IB4002",
            "IB4003",
            "IB4004",
            "IB4010",
            "IB4011",
        ]

        # List of test: SC (UNSEEN DATA FOR EVALUATION)
        # Note that IB4005 does not appear because it has speakers in common with two sets of data.
        test_set = [
            "ES2004",
            "ES2014",
            "IS1009",
            "TS3003",
            "TS3007",
            "EN2002",
        ]

    if split_option == "full_corpus_asr":
        train_set = [
            "ES2002",
            "ES2003",
            "ES2005",
            "ES2006",
            "ES2007",
            "ES2008",
            "ES2009",
            "ES2010",
            "ES2012",
            "ES2013",
            "ES2014",
            "ES2015",
            "ES2016",
            "IS1000",
            "IS1001",
            "IS1002",
            "IS1003",
            "IS1004",
            "IS1005",
            "IS1006",
            "IS1007",
            "TS3005",
            "TS3006",
            "TS3007",
            "TS3008",
            "TS3009",
            "TS3010",
            "TS3011",
            "TS3012",
            "EN2001",
            "EN2003",
            "EN2004",
            "EN2005",
            "EN2006",
            "EN2009",
            "IN1001",
            "IN1002",
            "IN1005",
            "IN1007",
            "IN1008",
            "IN1009",
            "IN1012",
            "IN1013",
            "IN1014",
            "IN1016",
        ]

        dev_set = [
            "ES2011",
            "IS1008",
            "TS3004",
            "IB4001",
            "IB4002",
            "IB4003",
            "IB4004",
            "IB4010",
            "IB4011",
        ]

        test_set = [
            "ES2004",
            "IS1009",
            "TS3003",
            "EN2002",
        ]

    return train_set, dev_set, test_set


================================================
FILE: examples/ami/sd0/local/compute_embdding.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
import os
import pickle
import sys

import numpy as np
import paddle
from paddle.io import BatchSampler
from paddle.io import DataLoader
from tqdm.contrib import tqdm
from yacs.config import CfgNode

from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.cluster.diarization import EmbeddingMeta
from paddlespeech.vector.io.batch import batch_feature_normalize
from paddlespeech.vector.io.dataset_from_json import JSONDataset
from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn
from paddlespeech.vector.modules.sid_model import SpeakerIdetification
from paddlespeech.vector.training.seeding import seed_everything

# Logger setup
logger = Log(__name__).getlog()


def prepare_subset_json(full_meta_data, rec_id, out_meta_file):
    """Prepares metadata for a given recording ID.

    Arguments
    ---------
    full_meta_data : json
        Full meta (json) containing all the recordings
    rec_id : str
        The recording ID for which meta (json) has to be prepared
    out_meta_file : str
        Path of the output meta (json) file.
    """

    subset = {}
    for key in full_meta_data:
        k = str(key)
        if k.startswith(rec_id):
            subset[key] = full_meta_data[key]

    with open(out_meta_file, mode="w") as json_f:
        json.dump(subset, json_f, indent=2)


def create_dataloader(json_file, batch_size):
    """Creates the datasets and their data processing pipelines.
    This is used for multi-mic processing.
    """

    # create datasets
    dataset = JSONDataset(
        json_file=json_file,
        feat_type='melspectrogram',
        n_mels=config.n_mels,
        window_size=config.window_size,
        hop_length=config.hop_size)

    # create dataloader
    batch_sampler = BatchSampler(dataset, batch_size=batch_size, shuffle=True)
    dataloader = DataLoader(dataset,
                            batch_sampler=batch_sampler,
                            collate_fn=lambda x: batch_feature_normalize(
                                x, mean_norm=True, std_norm=False),
                            return_list=True)

    return dataloader


def main(args, config):
    # set the training device, cpu or gpu
    paddle.set_device(args.device)
    # set the random seed
    seed_everything(config.seed)

    # stage1: build the dnn backbone model network
    ecapa_tdnn = EcapaTdnn(**config.model)

    # stage2: build the speaker verification eval instance with backbone model
    model = SpeakerIdetification(backbone=ecapa_tdnn, num_class=1)

    # stage3: load the pre-trained model
    #         we get the last model from the epoch and save_interval
    args.load_checkpoint = os.path.abspath(
        os.path.expanduser(args.load_checkpoint))

    # load model checkpoint to sid model
    state_dict = paddle.load(
        os.path.join(args.load_checkpoint, 'model.pdparams'))
    model.set_state_dict(state_dict)
    logger.info(f'Checkpoint loaded from {args.load_checkpoint}')

    # set the model to eval mode
    model.eval()

    # load meta data
    meta_file = os.path.join(
        args.data_dir,
        config.meta_data_dir,
        "ami_" + args.dataset + "." + config.mic_type + ".subsegs.json", )
    with open(meta_file, "r") as f:
        full_meta = json.load(f)

    # get all the recording IDs in this dataset.
    all_keys = full_meta.keys()
    A = [word.rstrip().split("_")[0] for word in all_keys]
    all_rec_ids = list(set(A[1:]))
    all_rec_ids.sort()
    split = "AMI_" + args.dataset
    i = 1

    msg = "Extra embdding for " + args.dataset + " set"
    logger.info(msg)

    if len(all_rec_ids) <= 0:
        msg = "No recording IDs found! Please check if meta_data json file is properly generated."
        logger.error(msg)
        sys.exit()

    # extra different recordings embdding in a dataset.
    for rec_id in tqdm(all_rec_ids):
        # This tag will be displayed in the log.
        tag = ("[" + str(args.dataset) + ": " + str(i) + "/" +
               str(len(all_rec_ids)) + "]")
        i = i + 1

        # log message.
        msg = "Embdding %s : %s " % (tag, rec_id)
        logger.debug(msg)

        # embedding directory.
        if not os.path.exists(
                os.path.join(args.data_dir, config.embedding_dir, split)):
            os.makedirs(
                os.path.join(args.data_dir, config.embedding_dir, split))

        # file to store embeddings.
        emb_file_name = rec_id + "." + config.mic_type + ".emb_stat.pkl"
        diary_stat_emb_file = os.path.join(args.data_dir, config.embedding_dir,
                                           split, emb_file_name)

        # prepare a metadata (json) for one recording. This is basically a subset of full_meta.
        # lets keep this meta-info in embedding directory itself.
        json_file_name = rec_id + "." + config.mic_type + ".json"
        meta_per_rec_file = os.path.join(args.data_dir, config.embedding_dir,
                                         split, json_file_name)

        # write subset (meta for one recording) json metadata.
        prepare_subset_json(full_meta, rec_id, meta_per_rec_file)

        # prepare data loader.
        diary_set_loader = create_dataloader(meta_per_rec_file,
                                             config.batch_size)

        # extract embeddings (skip if already done).
        if not os.path.isfile(diary_stat_emb_file):
            logger.debug("Extracting deep embeddings")
            embeddings = np.empty(shape=[0, config.emb_dim], dtype=np.float64)
            segset = []

            for batch_idx, batch in enumerate(tqdm(diary_set_loader)):
                # extrac the audio embedding
                ids, feats, lengths = batch['ids'], batch['feats'], batch[
                    'lengths']
                seg = [x for x in ids]
                segset = segset + seg
                emb = model.backbone(feats, lengths).squeeze(
                    -1).numpy()  # (N, emb_size, 1) -> (N, emb_size)
                embeddings = np.concatenate((embeddings, emb), axis=0)

            segset = np.array(segset, dtype="|O")
            stat_obj = EmbeddingMeta(
                segset=segset,
                stats=embeddings, )
            logger.debug("Saving Embeddings...")
            with open(diary_stat_emb_file, "wb") as output:
                pickle.dump(stat_obj, output)

        else:
            logger.debug("Skipping embedding extraction (as already present).")


# Begin experiment!
if __name__ == "__main__":
    parser = argparse.ArgumentParser(__doc__)
    parser.add_argument(
        '--device',
        default="gpu",
        help="Select which device to perform diarization, defaults to gpu.")
    parser.add_argument(
        "--config", default=None, type=str, help="configuration file")
    parser.add_argument(
        "--data-dir",
        default="../save/",
        type=str,
        help="processsed data directory")
    parser.add_argument(
        "--dataset",
        choices=['dev', 'eval'],
        default="dev",
        type=str,
        help="Select which dataset to extra embdding, defaults to dev")
    parser.add_argument(
        "--load-checkpoint",
        type=str,
        default='',
        help="Directory to load model checkpoint to compute embeddings.")
    args = parser.parse_args()
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    config.freeze()

    main(args, config)


================================================
FILE: examples/ami/sd0/local/dataio.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Data reading and writing.
"""
import os
import pickle


def save_pkl(obj, file):
    """Save an object in pkl format.

    Arguments
    ---------
    obj : object
        Object to save in pkl format
    file : str
        Path to the output file
    sampling_rate : int
        Sampling rate of the audio file, TODO: this is not used?

    Example
    -------
    >>> tmpfile = os.path.join(getfixture('tmpdir'), "example.pkl")
    >>> save_pkl([1, 2, 3, 4, 5], tmpfile)
    >>> load_pkl(tmpfile)
    [1, 2, 3, 4, 5]
    """
    with open(file, "wb") as f:
        pickle.dump(obj, f)


def load_pickle(pickle_path):
    """Utility function for loading .pkl pickle files.

    Arguments
    ---------
    pickle_path : str
        Path to pickle file.

    Returns
    -------
    out : object
        Python object loaded from pickle.
    """
    with open(pickle_path, "rb") as f:
        out = pickle.load(f)
    return out


def load_pkl(file):
    """Loads a pkl file.

    For an example, see `save_pkl`.

    Arguments
    ---------
    file : str
        Path to the input pkl file.

    Returns
    -------
    The loaded object.
    """

    # Deals with the situation where two processes are trying
    # to access the same label dictionary by creating a lock
    count = 100
    while count > 0:
        if os.path.isfile(file + ".lock"):
            time.sleep(1)
            count -= 1
        else:
            break

    try:
        open(file + ".lock", "w").close()
        with open(file, "rb") as f:
            return pickle.load(f)
    finally:
        if os.path.isfile(file + ".lock"):
            os.remove(file + ".lock")


================================================
FILE: examples/ami/sd0/local/experiment.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import glob
import json
import os
import pickle
import shutil
import sys

import numpy as np
from tqdm.contrib import tqdm
from yacs.config import CfgNode

from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.cluster import diarization as diar
from utils.DER import DER

# Logger setup
logger = Log(__name__).getlog()


def diarize_dataset(
        full_meta,
        split_type,
        n_lambdas,
        pval,
        save_dir,
        config,
        n_neighbors=10, ):
    """This function diarizes all the recordings in a given dataset. It performs
    computation of embedding and clusters them using spectral clustering (or other backends).
    The output speaker boundary file is stored in the RTTM format.
    """

    # prepare `spkr_info` only once when Oracle num of speakers is selected.
    # spkr_info is essential to obtain number of speakers from groundtruth.
    if config.oracle_n_spkrs is True:
        full_ref_rttm_file = os.path.join(save_dir, config.ref_rttm_dir,
                                          "fullref_ami_" + split_type + ".rttm")
        rttm = diar.read_rttm(full_ref_rttm_file)

        spkr_info = list(  # noqa F841
            filter(lambda x: x.startswith("SPKR-INFO"), rttm))

    # get all the recording IDs in this dataset.
    all_keys = full_meta.keys()
    A = [word.rstrip().split("_")[0] for word in all_keys]
    all_rec_ids = list(set(A[1:]))
    all_rec_ids.sort()
    split = "AMI_" + split_type
    i = 1

    # adding tag for directory path.
    type_of_num_spkr = "oracle" if config.oracle_n_spkrs else "est"
    tag = (type_of_num_spkr + "_" + str(config.affinity) + "_" + config.backend)

    # make out rttm dir
    out_rttm_dir = os.path.join(save_dir, config.sys_rttm_dir, config.mic_type,
                                split, tag)
    if not os.path.exists(out_rttm_dir):
        os.makedirs(out_rttm_dir)

    # diarizing different recordings in a dataset.
    for rec_id in tqdm(all_rec_ids):
        # this tag will be displayed in the log.
        tag = ("[" + str(split_type) + ": " + str(i) + "/" +
               str(len(all_rec_ids)) + "]")
        i = i + 1

        # log message.
        msg = "Diarizing %s : %s " % (tag, rec_id)
        logger.debug(msg)

        # load embeddings.
        emb_file_name = rec_id + "." + config.mic_type + ".emb_stat.pkl"
        diary_stat_emb_file = os.path.join(save_dir, config.embedding_dir,
                                           split, emb_file_name)
        if not os.path.isfile(diary_stat_emb_file):
            msg = "Embdding file %s not found! Please check if embdding file is properly generated." % (
                diary_stat_emb_file)
            logger.error(msg)
            sys.exit()
        with open(diary_stat_emb_file, "rb") as in_file:
            diary_obj = pickle.load(in_file)

        out_rttm_file = out_rttm_dir + "/" + rec_id + ".rttm"

        # processing starts from here.
        if config.oracle_n_spkrs is True:
            # oracle num of speakers.
            num_spkrs = diar.get_oracle_num_spkrs(rec_id, spkr_info)
        else:
            if config.affinity == "nn":
                # num of speakers tunned on dev set (only for nn affinity).
                num_spkrs = n_lambdas
            else:
                # num of speakers will be estimated using max eigen gap for cos based affinity.
                # so adding None here. Will use this None later-on.
                num_spkrs = None

        if config.backend == "kmeans":
            diar.do_kmeans_clustering(
                diary_obj,
                out_rttm_file,
                rec_id,
                num_spkrs,
                pval, )

        if config.backend == "SC":
            # go for Spectral Clustering (SC).
            diar.do_spec_clustering(
                diary_obj,
                out_rttm_file,
                rec_id,
                num_spkrs,
                pval,
                config.affinity,
                n_neighbors, )

        # can used for AHC later. Likewise one can add different backends here.
        if config.backend == "AHC":
            # call AHC
            threshold = pval  # pval for AHC is nothing but threshold.
            diar.do_AHC(diary_obj, out_rttm_file, rec_id, num_spkrs, threshold)

    # once all RTTM outputs are generated, concatenate individual RTTM files to obtain single RTTM file.
    # this is not needed but just staying with the standards.
    concate_rttm_file = out_rttm_dir + "/sys_output.rttm"
    logger.debug("Concatenating individual RTTM files...")
    with open(concate_rttm_file, "w") as cat_file:
        for f in glob.glob(out_rttm_dir + "/*.rttm"):
            if f == concate_rttm_file:
                continue
            with open(f, "r") as indi_rttm_file:
                shutil.copyfileobj(indi_rttm_file, cat_file)

    msg = "The system generated RTTM file for %s set : %s" % (
        split_type, concate_rttm_file, )
    logger.debug(msg)

    return concate_rttm_file


def dev_pval_tuner(full_meta, save_dir, config):
    """Tuning p_value for affinity matrix.
    The p_value used so that only p% of the values in each row is retained.
    """

    DER_list = []
    prange = np.arange(0.002, 0.015, 0.001)

    n_lambdas = None  # using it as flag later.
    for p_v in prange:
        # Process whole dataset for value of p_v.
        concate_rttm_file = diarize_dataset(full_meta, "dev", n_lambdas, p_v,
                                            save_dir, config)

        ref_rttm_file = os.path.join(save_dir, config.ref_rttm_dir,
                                     "fullref_ami_dev.rttm")
        sys_rttm_file = concate_rttm_file
        [MS, FA, SER, DER_] = DER(
            ref_rttm_file,
            sys_rttm_file,
            config.ignore_overlap,
            config.forgiveness_collar, )

        DER_list.append(DER_)

        if config.oracle_n_spkrs is True and config.backend == "kmeans":
            # no need of p_val search. Note p_val is needed for SC for both oracle and est num of speakers.
            # p_val is needed in oracle_n_spkr=False when using kmeans backend.
            break

    # Take p_val that gave minmum DER on Dev dataset.
    tuned_p_val = prange[DER_list.index(min(DER_list))]

    return tuned_p_val


def dev_ahc_threshold_tuner(full_meta, save_dir, config):
    """Tuning threshold for affinity matrix. This function is called when AHC is used as backend.
    """

    DER_list = []
    prange = np.arange(0.0, 1.0, 0.1)

    n_lambdas = None  # using it as flag later.

    # Note: p_val is threshold in case of AHC.
    for p_v in prange:
        # Process whole dataset for value of p_v.
        concate_rttm_file = diarize_dataset(full_meta, "dev", n_lambdas, p_v,
                                            save_dir, config)

        ref_rttm = os.path.join(save_dir, config.ref_rttm_dir,
                                "fullref_ami_dev.rttm")
        sys_rttm = concate_rttm_file
        [MS, FA, SER, DER_] = DER(
            ref_rttm,
            sys_rttm,
            config.ignore_overlap,
            config.forgiveness_collar, )

        DER_list.append(DER_)

        if config.oracle_n_spkrs is True:
            break  # no need of threshold search.

    # Take p_val that gave minmum DER on Dev dataset.
    tuned_p_val = prange[DER_list.index(min(DER_list))]

    return tuned_p_val


def dev_nn_tuner(full_meta, split_type, save_dir, config):
    """Tuning n_neighbors on dev set. Assuming oracle num of speakers.
    This is used when nn based affinity is selected.
    """

    DER_list = []
    pval = None

    # Now assumming oracle num of speakers.
    n_lambdas = 4

    for nn in range(5, 15):

        # Process whole dataset for value of n_lambdas.
        concate_rttm_file = diarize_dataset(full_meta, "dev", n_lambdas, p_v,
                                            save_dir, config, nn)

        ref_rttm = os.path.join(save_dir, config.ref_rttm_dir,
                                "fullref_ami_dev.rttm")
        sys_rttm = concate_rttm_file
        [MS, FA, SER, DER_] = DER(
            ref_rttm,
            sys_rttm,
            config.ignore_overlap,
            config.forgiveness_collar, )

        DER_list.append([nn, DER_])

        if config.oracle_n_spkrs is True and config.backend == "kmeans":
            break

    DER_list.sort(key=lambda x: x[1])
    tunned_nn = DER_list[0]

    return tunned_nn[0]


def dev_tuner(full_meta, split_type, save_dir, config):
    """Tuning n_components on dev set. Used for nn based affinity matrix.
    Note: This is a very basic tunning for nn based affinity.
    This is work in progress till we find a better way.
    """

    DER_list = []
    pval = None
    for n_lambdas in range(1, config.max_num_spkrs + 1):

        # Process whole dataset for value of n_lambdas.
        concate_rttm_file = diarize_dataset(full_meta, "dev", n_lambdas, p_v,
                                            save_dir, config)

        ref_rttm = os.path.join(save_dir, config.ref_rttm_dir,
                                "fullref_ami_dev.rttm")
        sys_rttm = concate_rttm_file
        [MS, FA, SER, DER_] = DER(
            ref_rttm,
            sys_rttm,
            config.ignore_overlap,
            config.forgiveness_collar, )

        DER_list.append(DER_)

    # Take n_lambdas with minmum DER.
    tuned_n_lambdas = DER_list.index(min(DER_list)) + 1

    return tuned_n_lambdas


def main(args, config):
    # AMI Dev Set: Tune hyperparams on dev set.
    # Read the embdding file for dev set generated during embdding compute
    dev_meta_file = os.path.join(
        args.data_dir,
        config.meta_data_dir,
        "ami_dev." + config.mic_type + ".subsegs.json", )
    with open(dev_meta_file, "r") as f:
        meta_dev = json.load(f)

    full_meta = meta_dev

    # Processing starts from here
    # Following few lines selects option for different backend and affinity matrices. Finds best values for hyperameters using dev set.
    ref_rttm_file = os.path.join(args.data_dir, config.ref_rttm_dir,
                                 "fullref_ami_dev.rttm")
    best_nn = None
    if config.affinity == "nn":
        logger.info("Tuning for nn (Multiple iterations over AMI Dev set)")
        best_nn = dev_nn_tuner(full_meta, args.data_dir, config)

    n_lambdas = None
    best_pval = None

    if config.affinity == "cos" and (config.backend == "SC" or
                                     config.backend == "kmeans"):
        # oracle num_spkrs or not, doesn't matter for kmeans and SC backends
        # cos: Tune for the best pval for SC /kmeans (for unknown num of spkrs)
        logger.info(
            "Tuning for p-value for SC (Multiple iterations over AMI Dev set)")
        best_pval = dev_pval_tuner(full_meta, args.data_dir, config)

    elif config.backend == "AHC":
        logger.info("Tuning for threshold-value for AHC")
        best_threshold = dev_ahc_threshold_tuner(full_meta, args.data_dir,
                                                 config)
        best_pval = best_threshold
    else:
        # NN for unknown num of speakers (can be used in future)
        if config.oracle_n_spkrs is False:
            # nn: Tune num of number of components (to be updated later)
            logger.info(
                "Tuning for number of eigen components for NN (Multiple iterations over AMI Dev set)"
            )
            # dev_tuner used for tuning num of components in NN. Can be used in future.
            n_lambdas = dev_tuner(full_meta, args.data_dir, config)

    # load 'dev' and 'eval' metadata files.
    full_meta_dev = full_meta  # current full_meta is for 'dev'
    eval_meta_file = os.path.join(
        args.data_dir,
        config.meta_data_dir,
        "ami_eval." + config.mic_type + ".subsegs.json", )
    with open(eval_meta_file, "r") as f:
        full_meta_eval = json.load(f)

    # tag to be appended to final output DER files. Writing DER for individual files.
    type_of_num_spkr = "oracle" if config.oracle_n_spkrs else "est"
    tag = (
        type_of_num_spkr + "_" + str(config.affinity) + "." + config.mic_type)

    # perform final diarization on 'dev' and 'eval' with best hyperparams.
    final_DERs = {}
    out_der_dir = os.path.join(args.data_dir, config.der_dir)
    if not os.path.exists(out_der_dir):
        os.makedirs(out_der_dir)

    for split_type in ["dev", "eval"]:
        if split_type == "dev":
            full_meta = full_meta_dev
        else:
            full_meta = full_meta_eval

        # performing diarization.
        msg = "Diarizing using best hyperparams: " + split_type + " set"
        logger.info(msg)
        out_boundaries = diarize_dataset(
            full_meta,
            split_type,
            n_lambdas=n_lambdas,
            pval=best_pval,
            n_neighbors=best_nn,
            save_dir=args.data_dir,
            config=config)

        # computing DER.
        msg = "Computing DERs for " + split_type + " set"
        logger.info(msg)
        ref_rttm = os.path.join(args.data_dir, config.ref_rttm_dir,
                                "fullref_ami_" + split_type + ".rttm")
        sys_rttm = out_boundaries
        [MS, FA, SER, DER_vals] = DER(
            ref_rttm,
            sys_rttm,
            config.ignore_overlap,
            config.forgiveness_collar,
            individual_file_scores=True, )

        # writing DER values to a file. Append tag.
        der_file_name = split_type + "_DER_" + tag
        out_der_file = os.path.join(out_der_dir, der_file_name)
        msg = "Writing DER file to: " + out_der_file
        logger.info(msg)
        diar.write_ders_file(ref_rttm, DER_vals, out_der_file)

        msg = ("AMI " + split_type + " set DER = %s %%\n" %
               (str(round(DER_vals[-1], 2))))
        logger.info(msg)
        final_DERs[split_type] = round(DER_vals[-1], 2)

    # final print DERs
    msg = (
        "Final Diarization Error Rate (%%) on AMI corpus: Dev = %s %% | Eval = %s %%\n"
        % (str(final_DERs["dev"]), str(final_DERs["eval"])))
    logger.info(msg)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(__doc__)
    parser.add_argument(
        "--config", default=None, type=str, help="configuration file")
    parser.add_argument(
        "--data-dir",
        default="../data/",
        type=str,
        help="processsed data directory")
    args = parser.parse_args()
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    config.freeze()

    main(args, config)


================================================
FILE: examples/ami/sd0/local/process.sh
================================================
#!/bin/bash

stage=0
set=L

. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
set -o pipefail

data_folder=$1
manual_annot_folder=$2
save_folder=$3
pretrained_model_dir=$4
conf_path=$5
device=$6

ref_rttm_dir=${save_folder}/ref_rttms
meta_data_dir=${save_folder}/metadata

if [ ${stage} -le 0 ]; then
    echo "AMI Data preparation"
    python local/ami_prepare.py  --data_folder ${data_folder} \
            --manual_annot_folder ${manual_annot_folder} \
            --save_folder ${save_folder} --ref_rttm_dir ${ref_rttm_dir} \
            --meta_data_dir ${meta_data_dir} 
    
    if [ $? -ne 0 ]; then
        echo "Prepare AMI failed. Please check log message."
        exit 1
    fi
    echo "AMI data preparation done."           
fi

if [ ${stage} -le 1 ]; then
    # extra embddings for dev and eval dataset
    for name in dev eval; do
        python local/compute_embdding.py --config ${conf_path} \
                --data-dir ${save_folder} \
                --device ${device} \
                --dataset ${name} \
                --load-checkpoint ${pretrained_model_dir}
    done
fi

if [ ${stage} -le 2 ]; then
    # tune hyperparams on dev set
    # perform final diarization on 'dev' and 'eval' with best hyperparams
    python local/experiment.py --config ${conf_path} \
            --data-dir ${save_folder}
fi


================================================
FILE: examples/ami/sd0/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

# model exp
#MODEL=ECAPA_TDNN
#export BIN_DIR=${MAIN_ROOT}/paddlespeech/vector/exps/${MODEL}/bin


================================================
FILE: examples/ami/sd0/run.sh
================================================
#!/bin/bash

. ./path.sh || exit 1;
set -e

stage=0

#TARGET_DIR=${MAIN_ROOT}/dataset/ami
TARGET_DIR=/home/dataset/AMI
data_folder=${TARGET_DIR}/amicorpus #e.g., /path/to/amicorpus/
manual_annot_folder=${TARGET_DIR}/ami_public_manual_1.6.2 #e.g., /path/to/ami_public_manual_1.6.2/

save_folder=./save
pretraind_model_dir=${save_folder}/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1/model
conf_path=conf/ecapa_tdnn.yaml
device=gpu

. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

if [ $stage -le 1 ]; then
    # Download the pretrained model
    wget https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1.tar.gz
    mkdir -p ${save_folder} && tar -xvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1.tar.gz -C ${save_folder}
    rm -rf sv0_ecapa_tdnn_voxceleb12_ckpt_0_1_1.tar.gz
    echo "download the pretrained ECAPA-TDNN Model to path: "${pretraind_model_dir}
fi

if [ $stage -le 2 ]; then
    # Tune hyperparams on dev set and perform final diarization on dev and eval with best hyperparams.
    echo ${data_folder} ${manual_annot_folder} ${save_folder} ${pretraind_model_dir} ${conf_path}
    bash ./local/process.sh ${data_folder} ${manual_annot_folder} \
        ${save_folder} ${pretraind_model_dir} ${conf_path} ${device} || exit 1
fi


================================================
FILE: examples/callcenter/README.md
================================================
# Callcenter 8k sample rate

This recipe only has model/data config for 8k ASR, user need to prepare data and generate manifest metafile. You can see Aishell or Libripseech.


================================================
FILE: examples/callcenter/asr1/.gitignore
================================================
data
exp
*.profile


================================================
FILE: examples/callcenter/asr1/RESULTS.md
================================================
# MandarinK8

## Conformer

| Model | Params | Config | Augmentation| Test set | Decode method | Loss | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- |  
| conformer | 45.73 M | conf/conformer.yaml | spec_aug + shift | test | attention | 2.1794936656951904 | 0.102304 |  
| conformer | 45.73 M | conf/conformer.yaml | spec_aug + shift | test | ctc_greedy_search | 2.1794936656951904 | 0.084295 |  
| conformer | 45.73 M | conf/conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | 2.1794936656951904 | 0.084340 |  
| conformer | 45.73 M | conf/conformer.yaml | spec_aug + shift | test | attention_rescoring | 2.1794936656951904 | 0.081675 |  


## Chunk Conformer

| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size & Left Chunks | Loss | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- | --- |  
| conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | attention | 16, -1 | 2.23287845  | 0.087982 |  
| conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | ctc_greedy_search | 16, -1 | 2.23287845  | 0.086962 |  
| conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | ctc_prefix_beam_search | 16, -1 | 2.23287845 | 0.086741 |  
| conformer | 45.73 M | conf/chunk_conformer.yaml | spec_aug + shift | test | attention_rescoring | 16, -1 | 2.23287845 | 0.083495 |


================================================
FILE: examples/callcenter/asr1/conf/augmentation.json
================================================
[
  {
    "type": "speed",
    "params": {
      "min_speed_rate": 0.9,
      "max_speed_rate": 1.1,
      "num_rates": 3
    },
    "prob": 0.0
  },
  {
    "type": "shift",
    "params": {
      "min_shift_ms": -5,
      "max_shift_ms": 5
    },
    "prob": 1.0
  },
  {
    "type": "specaug",
    "params": {
      "F": 10,
      "T": 50,
      "n_freq_masks": 2,
      "n_time_masks": 2,
      "p": 1.0,
      "W": 80,
      "adaptive_number_ratio": 0,
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20,
      "replace_with_zero": true
    },
    "prob": 1.0
  }
]


================================================
FILE: examples/callcenter/asr1/conf/chunk_conformer.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test

  
###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'char'
spm_model_prefix: ''
preprocess_config: conf/preprocess.yaml
batch_size: 32
raw_wav: True  # use raw_wav or kaldi feature
spectrum_type: fbank #linear, mfcc, fbank
feat_dim: 80
delta_delta: False
dither: 1.0
target_sample_rate: 8000
max_freq: None
n_fft: None
stride_ms: 10.0
window_ms: 25.0
use_dB_normalization: True 
target_dB: -20
random_seed: 0
keep_transcription_text: False
sortagrad: True 
shuffle_method: batch_shuffle
num_workers: 2


############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    use_cnn_module: True
    cnn_module_kernel: 15
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    selfattention_layer_type: 'rel_selfattn'
    causal: true
    use_dynamic_chunk: true
    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
    use_dynamic_left_chunk: false

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false

###########################################
#                Training                 #
###########################################
n_epoch: 240
accum_grad: 4
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 0.001
  weight_decay: 1.0e-6
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/callcenter/asr1/conf/conformer.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test


###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'char'
spm_model_prefix: ''
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 64
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1

############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    use_cnn_module: True
    cnn_module_kernel: 15
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    selfattention_layer_type: 'rel_selfattn'

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 100 # 50 will be lowest 
accum_grad: 4
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 0.002
  weight_decay: 1.0e-6
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/callcenter/asr1/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 8000
    n_mels: 80
    n_shift: 160
    win_length: 400
    dither: 0.1
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/callcenter/asr1/conf/tuning/chunk_decode.yaml
================================================
decode_batch_size: 128
error_rate_type: cer 
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
beam_size: 10
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: true  # simulate streaming inference. Defaults to False.

================================================
FILE: examples/callcenter/asr1/conf/tuning/decode.yaml
================================================
decode_batch_size: 128
error_rate_type: cer 
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
beam_size: 10
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.


================================================
FILE: examples/callcenter/asr1/local/align.sh
================================================
#! /usr/bin/env bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

ckpt_name=$(basename ${ckpt_prefxi})

mkdir -p exp


batch_size=1
output_dir=${ckpt_prefix}
mkdir -p ${output_dir}

# align dump in `result_file`
# .tier, .TextGrid dump in `dir of result_file`
python3 -u ${BIN_DIR}/alignment.py \
--ngpu ${ngpu} \
--config ${config_path} \
--decode_cfg ${decode_config_path} \
--result_file ${output_dir}/${type}.align \
--checkpoint_path ${ckpt_prefix} \
--opts decode.decode_batch_size ${batch_size}

if [ $? -ne 0 ]; then
    echo "Failed in ctc alignment!"
    exit 1
fi

exit 0


================================================
FILE: examples/callcenter/asr1/local/data.sh
================================================
#! /usr/bin/env bash

stage=-1
stop_stage=100
dict_dir=data/lang_char

source ${MAIN_ROOT}/utils/parse_options.sh

mkdir -p data
mkdir -p ${dict_dir}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    for dataset in train dev test; do
        mv data/manifest.${dataset} data/manifest.${dataset}.raw
    done
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --spectrum_type="fbank" \
    --feat_dim=80 \
    --delta_delta=false \
    --stride_ms=10 \
    --window_ms=25 \
    --sample_rate=8000 \
    --use_dB_normalization=False \
    --num_samples=-1 \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # download data, generate manifests
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type="char" \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths "data/manifest.train.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for dataset in train dev test; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
            --cmvn_path "data/mean_std.json" \
            --unit_type "char" \
            --vocab_path="${dict_dir}/vocab.txt" \
            --manifest_path="data/manifest.${dataset}.raw" \
            --output_path="data/manifest.${dataset}"

        if [ $? -ne 0 ]; then
            echo "Formt mnaifest failed. Terminated."
            exit 1
        fi
    } &
    done
    wait
fi

echo "data preparation done."
exit 0


================================================
FILE: examples/callcenter/asr1/local/download_lm_ch.sh
================================================
#!/bin/bash

. ${MAIN_ROOT}/utils/utility.sh

DIR=data/lm
mkdir -p ${DIR}

URL='https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
MD5="29e02312deb2e59b3c8686c7966d4fe3"
TARGET=${DIR}/zh_giga.no_cna_cmn.prune01244.klm


echo "Download language model ..."
download $URL $MD5 $TARGET
if [ $? -ne 0 ]; then
    echo "Fail to download the language model!"
    exit 1
fi


exit 0


================================================
FILE: examples/callcenter/asr1/local/export.sh
================================================
#! /usr/bin/env bash

if [ $# != 3 ];then
    echo "usage: $0 config_path ckpt_prefix jit_model_path"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3

python3 -u ${BIN_DIR}/export.py \
--ngpu ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}


if [ $? -ne 0 ]; then
    echo "Failed in export!"
    exit 1
fi


exit 0


================================================
FILE: examples/callcenter/asr1/local/test.sh
================================================
#! /usr/bin/env bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3


ckpt_name=$(basename ${ckpt_prefxi})

mkdir -p exp

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi


for type in attention ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

for type in ctc_prefix_beam_search attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

exit 0


================================================
FILE: examples/callcenter/asr1/local/train.sh
================================================
#! /usr/bin/env bash

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

echo "using ${device}..."

mkdir -p exp

# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/callcenter/asr1/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8 
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


MODEL=u2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/callcenter/asr1/run.sh
================================================
#!/bin/bash
set -e
source path.sh

gpus=0,1,2,3
stage=0
stop_stage=50
conf_path=conf/conformer.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=20

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # ctc alignment of test data
    CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 51 ] && [ ${stop_stage} -ge 51 ]; then
    # export ckpt avg_n
    CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi


================================================
FILE: examples/canton/tts3/README.md
================================================
# FastSpeech2 with Cantonese language

## Dataset
### Download and Extract
If you don't have the Cantonese datasets mentioned above, please download and unzip  [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-daily-use-sentence/) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-in-the-vehicle/) under `~/datasets/`.

To obtain better performance, please combine these two datasets together as follows:

```bash
mkdir -p ~/datasets/canton_all/WAV
cp -r ~/datasets/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence/WAV/* ~/datasets/canton_all/WAV
cp -r ~/datasets/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle/WAV/* ~/datasets/canton_all/WAV
```

After that, it should be look like:
```
~/datasets/canton_all
│   └── WAV
│       └──G0001
│       └──G0002
│       ...
│       └──G0071
│       └──G0072
```


### Get MFA Result and Extract
We use [MFA1.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for canton_fastspeech2.
You can train your MFA model reference to [canton_mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.
We here provide the MFA results of these two datasets. [canton_alignment.zip](https://paddlespeech.cdn.bcebos.com/MFA/Canton/canton_alignment.zip)

## Get Started
Assume the path to the Cantonese MFA result of the two datsets mentioned above is `./canton_alignment`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - `--stage` controls the vocoder model during synthesis (0 = pwgan, 1 = hifigan).
    - synthesize waveform from text file.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```

### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.
```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech、pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, the path of pitch features, a path of energy features, speaker, and id of each utterance.

### Training details can refer to the script of [examples/aishell3/tts3](../../aishell3/tts3).

## Pretrained Model
Pretrained FastSpeech2 model with no silence in the edge of audios:
- [fastspeech2_canton_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_ckpt_1.4.0.zip)

The static model can be downloaded here:
- [fastspeech2_canton_static_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_static_1.4.0.zip)

The ONNX model can be downloaded here:  
- [fastspeech2_canton_onnx_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_onnx_1.4.0.zip)

FastSpeech2 checkpoint contains files listed below.

```text
fastspeech2_canton_ckpt_1.4.0
├── default.yaml            # default config used to train fastspeech2
├── energy_stats.npy        # statistics used to normalize energy when training fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── pitch_stats.npy         # statistics used to normalize pitch when training fastspeech2
├── snapshot_iter_140000.pdz # model parameters and optimizer states
├── speaker_id_map.txt      # speaker id map file when training a multi-speaker fastspeech2
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the neural vocoder.
Download the pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.
```bash
unzip pwg_aishell3_ckpt_0.5.zip
```

You can use the following scripts to synthesize for `${BIN_DIR}/../../assets/sentences_canton.txt` using pretrained fastspeech2 and parallel wavegan models.
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=fastspeech2_aishell3 \
  --am_config=fastspeech2_canton_ckpt_1.4.0/default.yaml \
  --am_ckpt=fastspeech2_canton_ckpt_1.4.0/snapshot_iter_140000.pdz \
  --am_stat=fastspeech2_canton_ckpt_1.4.0/speech_stats.npy \
  --voc=pwgan_aishell3 \
  --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
  --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
  --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
  --lang=canton \
  --text=${BIN_DIR}/../../assets/sentences_canton.txt \
  --output_dir=exp/default/test_e2e \
  --phones_dict=fastspeech2_canton_ckpt_1.4.0/phone_id_map.txt \
  --speaker_dict=fastspeech2_canton_ckpt_1.4.0/speaker_id_map.txt \
  --spk_id=10 \
  --inference_dir=exp/default/inference
```


================================================
FILE: examples/canton/tts3/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
# The canton datasets we use are different from others like Databaker or LJSpeech, 
# we set it to 110 to avoid too many zero-pitch problem. 
# Reference: https://github.com/JeremyCCHsu/Python-Wrapper-for-World-Vocoder/issues/38
f0min: 110          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 32
num_workers: 2


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    init_type: xavier_uniform         # initialization type
    init_enc_alpha: 1.0               # initial value of alpha of encoder scaled position encoding
    init_dec_alpha: 1.0               # initial value of alpha of decoder scaled position encoding
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder
    spk_embed_dim: 256                         # speaker embedding dimension
    spk_embed_integration_type: concat         # speaker embedding integration type


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
    optim: adam               # optimizer type
    learning_rate: 0.001      # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 1000
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/canton/tts3/local/inference.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_canton \
        --voc=pwgan_aishell3 \
        --spk_id=10 \
        --text=${BIN_DIR}/../../assets/sentences_canton.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --lang=canton
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_canton \
        --voc=mb_melgan_csmsc \
        --spk_id=10 \
        --text=${BIN_DIR}/../../assets/sentences_canton.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --lang=canton
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_canton \
        --voc=hifigan_csmsc \
        --spk_id=10 \
        --text=${BIN_DIR}/../../assets/sentences_canton.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --lang=canton
fi

# wavernn
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_canton \
        --voc=wavernn_csmsc \
        --spk_id=10 \
        --text=${BIN_DIR}/../../assets/sentences_canton.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --lang=canton
fi


================================================
FILE: examples/canton/tts3/local/ort_predict.sh
================================================
train_output_path=$1

stage=0
stop_stage=0

# e2e, synthesize from text
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_canton \
        --voc=pwgan_aishell3 \
        --spk_id=10 \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/sentences_canton.txt \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --lang=canton \
        --device=cpu \
        --cpu_threads=2
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_canton \
        --voc=mb_melgan_csmsc \
        --spk_id=10 \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/sentences_canton.txt \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --lang=canton \
        --device=cpu \
        --cpu_threads=2
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_canton \
        --voc=hifigan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/sentences_canton.txt \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --lang=canton \
        --device=cpu \
        --cpu_threads=2
fi


================================================
FILE: examples/canton/tts3/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./canton_alignment \
        --output durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=canton \
        --rootdir=~/datasets/canton_all \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="pitch"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="energy"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/canton/tts3/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_canton \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_aishell3 \
        --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
        --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
        --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
        --lang=canton \
        --text=${BIN_DIR}/../../assets/sentences_canton.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=10 \
        --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "in hifigan syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_canton \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --lang=canton \
        --text=${BIN_DIR}/../../assets/sentences_canton.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=10 \
        --inference_dir=${train_output_path}/inference
    fi


================================================
FILE: examples/canton/tts3/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default

ckpt_name=snapshot_iter_140000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
fi

# paddle2onnx, please make sure the static models are in ${train_output_path}/inference first
# we have only tested the following models so far
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # install paddle2onnx
    pip install paddle2onnx --upgrade
    ../../csmsc/tts3/local/paddle2onnx.sh ${train_output_path} inference inference_onnx fastspeech2_canton
    # considering the balance between speed and quality, we recommend that you use hifigan as vocoder
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_csmsc
    ../../csmsc/tts3/local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_aishell3 
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx mb_melgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_csmsc
    
fi

# inference with onnxruntime
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    ./local/ort_predict.sh ${train_output_path}
fi


================================================
FILE: examples/csmsc/README.md
================================================

# CSMSC

* tts0 - Tacotron2
* tts1 - TransformerTTS
* tts2 - SpeedySpeech
* tts3 - FastSpeech2
* tts3_rhy - Using prosody labels in FastSpeech2
* voc0 - WaveFlow
* voc1 - Parallel WaveGAN
* voc2 - MelGAN
* voc3 - MultiBand MelGAN
* voc4 - Style MelGAN
* voc5 - HiFiGAN
* voc6 - WaveRNN


================================================
FILE: examples/csmsc/jets/README.md
================================================
# JETS with CSMSC
This example contains code used to train a [JETS](https://arxiv.org/abs/2203.16852v1) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).

## Dataset
### Download and Extract
Download CSMSC from it's [official website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

The structure of the folder is listed below.

```text
└─ Wave
    └─ .wav files (audio speech)
└─ PhoneLabeling
    └─ .interval files (alignment between phoneme and duration)
└─ ProsodyLabeling
   └─ 000001-010000.txt (text with prosodic by pinyin)
```

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes and durations for JETS.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from a text file.

```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── feats_stats.npy
    ├── norm
    └── raw
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains wave、mel spectrogram、speech、pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, the path of feats, feats_lengths, the path of pitch features, the path of energy features, the path of raw waves, speaker, and the id of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]

Train a JETS model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       config file to overwrite default config.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.

### Synthesizing

`./local/synthesize.sh` calls `${BIN_DIR}/synthesize.py`, which can synthesize waveform from `metadata.jsonl`.

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```

`./local/synthesize_e2e.sh` calls `${BIN_DIR}/synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
```

## Pretrained Model

The pretrained model can be downloaded here:

- [jets_csmsc_ckpt_1.5.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/jets_csmsc_ckpt_1.5.0.zip)

The static model can be downloaded here:

- [jets_csmsc_static_1.5.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/jets_csmsc_static_1.5.0.zip)


================================================
FILE: examples/csmsc/jets/conf/default.yaml
================================================
# This configuration tested on 4 GPUs (V100) with 32GB GPU
# memory. It takes around 2 weeks to finish the training
# but 100k iters model should generate reasonable results.
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

n_mels: 80
fs: 22050         # sr
n_fft: 1024        # FFT size (samples).
n_shift: 256       # Hop size (samples). 12.5ms
win_length: null   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.
fmin: 0            # minimum frequency for Mel basis
fmax: null         # maximum frequency for Mel basis
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


##########################################################
#                  TTS MODEL SETTING                     #
##########################################################
model:
    # generator related
    generator_type: jets_generator
    generator_params:
        adim: 256         # attention dimension
        aheads: 2         # number of attention heads
        elayers: 4        # number of encoder layers
        eunits: 1024      # number of encoder ff units
        dlayers: 4        # number of decoder layers
        dunits: 1024      # number of decoder ff units
        positionwise_layer_type: conv1d   # type of position-wise layer
        positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
        duration_predictor_layers: 2      # number of layers of duration predictor
        duration_predictor_chans: 256     # number of channels of duration predictor
        duration_predictor_kernel_size: 3 # filter size of duration predictor
        use_masking: True                 # whether to apply masking for padded part in loss calculation
        encoder_normalize_before: True    # whether to perform layer normalization before the input
        decoder_normalize_before: True    # whether to perform layer normalization before the input
        encoder_type: transformer           # encoder type
        decoder_type: transformer           # decoder type
        conformer_rel_pos_type: latest               # relative positional encoding type
        conformer_pos_enc_layer_type: rel_pos        # conformer positional encoding type
        conformer_self_attn_layer_type: rel_selfattn # conformer self-attention type
        conformer_activation_type: swish             # conformer activation type
        use_macaron_style_in_conformer: true         # whether to use macaron style in conformer
        use_cnn_in_conformer: true                   # whether to use CNN in conformer
        conformer_enc_kernel_size: 7                 # kernel size in CNN module of conformer-based encoder
        conformer_dec_kernel_size: 31                # kernel size in CNN module of conformer-based decoder
        init_type: xavier_uniform                    # initialization type
        init_enc_alpha: 1.0                          # initial value of alpha for encoder
        init_dec_alpha: 1.0                          # initial value of alpha for decoder
        transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
        transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
        transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
        transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
        transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
        transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
        pitch_predictor_layers: 5                    # number of conv layers in pitch predictor
        pitch_predictor_chans: 256                   # number of channels of conv layers in pitch predictor
        pitch_predictor_kernel_size: 5               # kernel size of conv layers in pitch predictor
        pitch_predictor_dropout: 0.5                 # dropout rate in pitch predictor
        pitch_embed_kernel_size: 1                   # kernel size of conv embedding layer for pitch
        pitch_embed_dropout: 0.0                     # dropout rate after conv embedding layer for pitch
        stop_gradient_from_pitch_predictor: true     # whether to stop the gradient from pitch predictor to encoder
        energy_predictor_layers: 2                   # number of conv layers in energy predictor
        energy_predictor_chans: 256                  # number of channels of conv layers in energy predictor
        energy_predictor_kernel_size: 3              # kernel size of conv layers in energy predictor
        energy_predictor_dropout: 0.5                # dropout rate in energy predictor
        energy_embed_kernel_size: 1                  # kernel size of conv embedding layer for energy
        energy_embed_dropout: 0.0                    # dropout rate after conv embedding layer for energy
        stop_gradient_from_energy_predictor: false   # whether to stop the gradient from energy predictor to encoder
        generator_out_channels: 1
        generator_channels: 512
        generator_global_channels: -1
        generator_kernel_size: 7
        generator_upsample_scales: [8, 8, 2, 2]
        generator_upsample_kernel_sizes: [16, 16, 4, 4]
        generator_resblock_kernel_sizes: [3, 7, 11]
        generator_resblock_dilations: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
        generator_use_additional_convs: true
        generator_bias: true
        generator_nonlinear_activation: "leakyrelu"
        generator_nonlinear_activation_params:
            negative_slope: 0.1
        generator_use_weight_norm: true
        segment_size: 64              # segment size for random windowed discriminator

    # discriminator related
    discriminator_type: hifigan_multi_scale_multi_period_discriminator
    discriminator_params:
        scales: 1
        scale_downsample_pooling: "AvgPool1D"
        scale_downsample_pooling_params:
            kernel_size: 4
            stride: 2
            padding: 2
        scale_discriminator_params:
            in_channels: 1
            out_channels: 1
            kernel_sizes: [15, 41, 5, 3]
            channels: 128
            max_downsample_channels: 1024
            max_groups: 16
            bias: True
            downsample_scales: [2, 2, 4, 4, 1]
            nonlinear_activation: "leakyrelu"
            nonlinear_activation_params:
                negative_slope: 0.1
            use_weight_norm: True
            use_spectral_norm: False
        follow_official_norm: False
        periods: [2, 3, 5, 7, 11]
        period_discriminator_params:
            in_channels: 1
            out_channels: 1
            kernel_sizes: [5, 3]
            channels: 32
            downsample_scales: [3, 3, 3, 3, 1]
            max_downsample_channels: 1024
            bias: True
            nonlinear_activation: "leakyrelu"
            nonlinear_activation_params:
                negative_slope: 0.1
            use_weight_norm: True
            use_spectral_norm: False
    # others
    sampling_rate: 22050          # needed in the inference for saving wav
    cache_generator_outputs: True # whether to cache generator outputs in the training
use_alignment_module: False       # whether to use alignment module
      
###########################################################
#                        LOSS SETTING                     #
###########################################################
# loss function related
generator_adv_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    loss_type: mse                   # loss type, "mse" or "hinge"
discriminator_adv_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    loss_type: mse                   # loss type, "mse" or "hinge"
feat_match_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    average_by_layers: False         # whether to average loss value by #layers of each discriminator
    include_final_outputs: True      # whether to include final outputs for loss calculation
mel_loss_params:
    fs: 22050          # must be the same as the training data
    fft_size: 1024        # fft points
    hop_size: 256    # hop size
    win_length: null   # window length
    window: hann       # window type
    num_mels: 80         # number of Mel basis
    fmin: 0            # minimum frequency for Mel basis
    fmax: null         # maximum frequency for Mel basis
    log_base: null     # null represent natural log

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_adv: 1.0        # loss scaling coefficient for adversarial loss
lambda_mel: 45.0       # loss scaling coefficient for Mel loss
lambda_feat_match: 2.0 # loss scaling coefficient for feat match loss
lambda_var: 1.0        # loss scaling coefficient for duration loss
lambda_align: 2.0         # loss scaling coefficient for KL divergence loss
# others
sampling_rate: 22050          # needed in the inference for saving wav
cache_generator_outputs: True # whether to cache generator outputs in the training


# extra module for additional inputs
pitch_extract: dio           # pitch extractor type
pitch_extract_conf:
    reduction_factor: 1
    use_token_averaged_f0: false
pitch_normalize: global_mvn  # normalizer for the pitch feature
energy_extract: energy       # energy extractor type
energy_extract_conf:
    reduction_factor: 1
    use_token_averaged_energy: false
energy_normalize: global_mvn # normalizer for the energy feature


###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 32              # Batch size.
num_workers: 4              # Number of workers in DataLoader.

##########################################################
#            OPTIMIZER & SCHEDULER SETTING               #
##########################################################
# optimizer setting for generator
generator_optimizer_params:
    beta1: 0.8
    beta2: 0.99
    epsilon: 1.0e-9
    weight_decay: 0.0
generator_scheduler: exponential_decay
generator_scheduler_params:
    learning_rate: 2.0e-4
    gamma: 0.999875                   

# optimizer setting for discriminator
discriminator_optimizer_params:
    beta1: 0.8
    beta2: 0.99
    epsilon: 1.0e-9
    weight_decay: 0.0
discriminator_scheduler: exponential_decay
discriminator_scheduler_params:
    learning_rate: 2.0e-4          
    gamma: 0.999875
generator_first: True # whether to start updating generator first

##########################################################
#                OTHER TRAINING SETTING                  #
##########################################################
num_snapshots: 10            # max number of snapshots to keep while training
train_max_steps: 350000      # Number of training steps. == total_iters / ngpus, total_iters = 1000000
save_interval_steps: 1000    # Interval steps to save checkpoint.
eval_interval_steps: 250     # Interval steps to evaluate the network.
seed: 777                    # random seed number


================================================
FILE: examples/csmsc/jets/local/inference.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=jets_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi


================================================
FILE: examples/csmsc/jets/local/preprocess.sh
================================================
#!/bin/bash
set -e
stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./baker_alignment_tone \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=baker \
        --rootdir=~/datasets/BZNSYP/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True \
        --token_average=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="pitch"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="energy"

fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/csmsc/jets/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=0
stop_stage=0

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize.py \
        --config=${config_path} \
        --ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --phones_dict=dump/phone_id_map.txt \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test
fi


================================================
FILE: examples/csmsc/jets/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=0


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize_e2e.py \
        --am=jets_csmsc \
        --config=${config_path} \
        --ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --phones_dict=dump/phone_id_map.txt \
        --output_dir=${train_output_path}/test_e2e \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --inference_dir=${train_output_path}/inference
fi


================================================
FILE: examples/csmsc/jets/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=1 \
    --phones-dict=dump/phone_id_map.txt


================================================
FILE: examples/csmsc/jets/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=jets
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}


================================================
FILE: examples/csmsc/jets/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_150000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path}|| exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
fi


================================================
FILE: examples/csmsc/tts0/README.md
================================================
# Tacotron2 with CSMSC
This example contains code used to train a [Tacotron2](https://arxiv.org/abs/1712.05884) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).

## Dataset
### Download and Extract
Download CSMSC from it's [Official Website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for Tacotron2, the durations of MFA are not needed here.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from a text file.

```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, speaker, and the id of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]

Train a Tacotron2 model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       tacotron2 config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1) as the neural vocoder.
Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
```bash
unzip pwg_baker_ckpt_0.4.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_baker_ckpt_0.4
├── pwg_default.yaml               # default config used to train parallel wavegan
├── pwg_snapshot_iter_400000.pdz   # model parameters of parallel wavegan
└── pwg_stats.npy                  # statistics used to normalize spectrogram when training parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can use `0-4` to select the vocoder in {`pwgan`, `multi band melgan`, `style melgan`, ` hifigan`, `wavernn`}

```text
usage: synthesize.py [-h]
                     [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--tones_dict TONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING]
                     [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can use `0,1,3,4` to select the vocoder in {`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}

```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat` and `--phones_dict` are arguments for acoustic model, which correspond to the 4 files in the Tacotron2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.


## Pretrained Model
Pretrained Tacotron2 model with no silence in the edge of audios:
- [tacotron2_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip)

The static model can be downloaded here:
- [tacotron2_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_static_0.2.0.zip)


Model | Step | eval/loss | eval/l1_loss | eval/mse_loss | eval/bce_loss| eval/attn_loss 
:-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
default| 1(gpu) x 30600|0.57185|0.39614|0.14642|0.029|5.8e-05|

Tacotron2 checkpoint contains files listed below.
```text
tacotron2_csmsc_ckpt_0.2.0
├── default.yaml            # default config used to train Tacotron2
├── phone_id_map.txt        # phone vocabulary file when training Tacotron2
├── snapshot_iter_30600.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training Tacotron2
```
You can use the following scripts to synthesize for `${BIN_DIR}/../../assets/sentences.txt` using pretrained Tacotron2 and parallel wavegan models.
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=tacotron2_csmsc \
  --am_config=tacotron2_csmsc_ckpt_0.2.0/default.yaml \
  --am_ckpt=tacotron2_csmsc_ckpt_0.2.0/snapshot_iter_30600.pdz \
  --am_stat=tacotron2_csmsc_ckpt_0.2.0/speech_stats.npy  \
  --voc=pwgan_csmsc \
  --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
  --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
  --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
  --lang=zh \
  --text=${BIN_DIR}/../../assets/sentences.txt \
  --output_dir=exp/default/test_e2e \
  --inference_dir=exp/default/inference \
  --phones_dict=tacotron2_csmsc_ckpt_0.2.0/phone_id_map.txt
```


================================================
FILE: examples/csmsc/tts0/conf/default.yaml
================================================
# This configuration is for Paddle to train Tacotron 2. Compared to the
# original paper, this configuration additionally use the guided attention
# loss to accelerate the learning of the diagonal attention. It requires
# only a single GPU with 12 GB memory and it takes ~1 days to finish the
# training on Titan V.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 2

###########################################################
#                       MODEL SETTING                     #
###########################################################
model:                          # keyword arguments for the selected model
    embed_dim: 512               # char or phn embedding dimension
    elayers: 1                   # number of blstm layers in encoder
    eunits: 512                  # number of blstm units
    econv_layers: 3              # number of convolutional layers in encoder
    econv_chans: 512             # number of channels in convolutional layer
    econv_filts: 5               # filter size of convolutional layer
    atype: location              # attention function type
    adim: 512                    # attention dimension
    aconv_chans: 32              # number of channels in convolutional layer of attention
    aconv_filts: 15              # filter size of convolutional layer of attention
    cumulate_att_w: True         # whether to cumulate attention weight
    dlayers: 2                   # number of lstm layers in decoder
    dunits: 1024                 # number of lstm units in decoder
    prenet_layers: 2             # number of layers in prenet
    prenet_units: 256            # number of units in prenet
    postnet_layers: 5            # number of layers in postnet
    postnet_chans: 512           # number of channels in postnet
    postnet_filts: 5             # filter size of postnet layer
    output_activation: null      # activation function for the final output
    use_batch_norm: True         # whether to use batch normalization in encoder
    use_concate: True            # whether to concatenate encoder embedding with decoder outputs
    use_residual: False          # whether to use residual connection in encoder
    dropout_rate: 0.5            # dropout rate
    zoneout_rate: 0.1            # zoneout rate
    reduction_factor: 1          # reduction factor
    spk_embed_dim: null          # speaker embedding dimension


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True            # whether to apply masking for padded part in loss calculation
    bce_pos_weight: 5.0          # weight of positive sample in binary cross entropy calculation
    use_guided_attn_loss: True   # whether to use guided attention loss
    guided_attn_loss_sigma: 0.4  # sigma of guided attention loss
    guided_attn_loss_lambda: 1.0 # strength of guided attention loss


##########################################################
#                  OPTIMIZER SETTING                     #
##########################################################
optimizer:
    optim: adam              # optimizer type
    learning_rate: 1.0e-03   # learning rate
    epsilon: 1.0e-06         # epsilon
    weight_decay: 0.0        # weight decay coefficient

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 200
num_snapshots: 5

###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 42

================================================
FILE: examples/csmsc/tts0/local/inference.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=tacotron2_csmsc \
        --voc=pwgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=tacotron2_csmsc \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=tacotron2_csmsc \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi

================================================
FILE: examples/csmsc/tts0/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./baker_alignment_tone \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=baker \
        --rootdir=~/datasets/BZNSYP/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/csmsc/tts0/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=tacotron2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt || exit -1
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=tacotron2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt || exit -1
fi

# style melgan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=tacotron2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt || exit -1
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=tacotron2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt || exit -1
fi

# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=tacotron2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt || exit -1
fi


================================================
FILE: examples/csmsc/tts0/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# TODO: tacotron2 动转静的结果没有动态图的响亮, 可能还是 decode 的时候某个函数动静不对齐
# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=tacotron2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference || exit -1
        
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=tacotron2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference || exit -1
fi

# the pretrained models haven't release now
# style melgan
# style melgan's Dygraph to Static Graph is not ready now
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=tacotron2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt || exit -1
        # --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=tacotron2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference || exit -1
fi

# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=tacotron2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference || exit -1
fi


================================================
FILE: examples/csmsc/tts0/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=1 \
    --phones-dict=dump/phone_id_map.txt

================================================
FILE: examples/csmsc/tts0/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=tacotron2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}


================================================
FILE: examples/csmsc/tts0/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_153.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default 0
    # use 1-4 to select the vocoder in {multi band melgan, style melgan, hifigan, wavernn}
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default 0
    # use 1,3,4 to select the vocoder in {multi band melgan, hifigan, wavernn}
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
fi


================================================
FILE: examples/csmsc/tts2/README.md
================================================
# SpeedySpeech with CSMSC
This example contains code used to train a [SpeedySpeech](http://arxiv.org/abs/2008.03802) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html). NOTE that we only implement the student part of the Speedyspeech model. The ground truth alignment used to train the model is extracted from the dataset using [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner).

## Dataset
### Download and Extract
Download CSMSC from it's [Official Website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

The structure of the folder is listed below.

```text
└─ Wave
    └─ .wav files (audio speech)
└─ PhoneLabeling
    └─ .interval files (alignment between phoneme and duration)
└─ ProsodyLabeling
   └─ 000001-010000.txt (text with prosodic by pinyin)
```

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for SPEEDYSPEECH.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to  [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from a text file.
5. inference using the static model.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, run the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```

The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, tones, durations, the path of the spectrogram, and the id of each utterance.

### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
```
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--use-relative-path USE_RELATIVE_PATH]
                [--phones-dict PHONES_DICT] [--tones-dict TONES_DICT]

Train a Speedyspeech model with a single speaker dataset.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --use-relative-path USE_RELATIVE_PATH
                        whether use relative path in metadata
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --tones-dict TONES_DICT
                        tone vocabulary file.
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.
6. `--tones-dict` is the path of the tone vocabulary file.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1) as the neural vocoder.
Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
```bash
unzip pwg_baker_ckpt_0.4.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_baker_ckpt_0.4
├── pwg_default.yaml               # default config used to train parallel wavegan
├── pwg_snapshot_iter_400000.pdz   # model parameters of parallel wavegan
└── pwg_stats.npy                  # statistics used to normalize spectrogram when training parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can use `0-4` to select the vocoder in {`pwgan`, `multi band melgan`, `style melgan`, `hifigan`, `wavernn`}

```text
usage: synthesize.py [-h]
                     [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--tones_dict TONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING]
                     [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can use `0,1,3,4` to select the vocoder in {`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}

```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat`, `--phones_dict` and `--tones_dict` are arguments for acoustic model, which correspond to the 5 files in the speedyspeech pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Inferencing
After synthesizing, we will get static models of speedyspeech and pwgan in `${train_output_path}/inference`.
`./local/inference.sh` calls `${BIN_DIR}/inference.py`, which provides a paddle static model inference example for speedyspeech + pwgan synthesize.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path}
```

## Pretrained Model
Pretrained SpeedySpeech model with no silence in the edge of audios:
- [speedyspeech_nosil_baker_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_ckpt_0.5.zip)
- [speedyspeech_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_ckpt_0.2.0.zip)

The static model can be downloaded here:
- [speedyspeech_nosil_baker_static_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip)
- [speedyspeech_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_static_0.2.0.zip)

The ONNX model can be downloaded here:
- [speedyspeech_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip)

The Paddle-Lite model can be downloaded here:
- [speedyspeech_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_pdlite_1.3.0.zip)


Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/ssim_loss
:-------------:| :------------:| :-----: | :-----: | :--------:|:--------:
default| 1(gpu) x 11400|0.79532|0.400246|0.030259| 0.36482

SpeedySpeech checkpoint contains files listed below.

```text
speedyspeech_csmsc_ckpt_0.2.0
├── default.yaml            # default config used to train speedyspeech
├── feats_stats.npy         # statistics used to normalize spectrogram when training speedyspeech
├── phone_id_map.txt        # phone vocabulary file when training speedyspeech
├── snapshot_iter_30600.pdz # model parameters and optimizer states
└── tone_id_map.txt         # tone vocabulary file when training speedyspeech
```
You can use the following scripts to synthesize for `${BIN_DIR}/../../assets/sentences.txt` using pretrained speedyspeech and parallel wavegan models.
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=speedyspeech_csmsc \
  --am_config=speedyspeech_csmsc_ckpt_0.2.0/default.yaml \
  --am_ckpt=speedyspeech_csmsc_ckpt_0.2.0/snapshot_iter_30600.pdz \
  --am_stat=speedyspeech_csmsc_ckpt_0.2.0/feats_stats.npy \
  --voc=pwgan_csmsc \
  --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
  --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
  --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
  --lang=zh \
  --text=${BIN_DIR}/../../assets/sentences.txt \
  --output_dir=exp/default/test_e2e \
  --inference_dir=exp/default/inference \
  --phones_dict=speedyspeech_csmsc_ckpt_0.2.0/phone_id_map.txt \
  --tones_dict=speedyspeech_csmsc_ckpt_0.2.0/tone_id_map.txt
```


================================================
FILE: examples/csmsc/tts2/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000           # Sampling rate.
n_fft: 2048         # FFT size (samples).
n_shift: 300        # Hop size (samples). 12.5ms
win_length: 1200    # Window length (samples). 50ms
                    # If set to null, it will be the same as fft_size.
window: "hann"      # Window function.
n_mels: 80          # Number of mel basis.
fmin: 80            # Minimum freq in mel basis calculation.
fmax: 7600          # Maximum frequency in mel basis calculation.

###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 4

###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    encoder_hidden_size: 128
    encoder_kernel_size: 3
    encoder_dilations: [1, 3, 9, 27, 1, 3, 9, 27, 1, 1]
    duration_predictor_hidden_size: 128
    decoder_hidden_size: 128
    decoder_output_size: 80
    decoder_kernel_size: 3
    decoder_dilations: [1, 3, 9, 27, 1, 3, 9, 27, 1, 3, 9, 27, 1, 3, 9, 27, 1, 1]

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
    optim: adam              # optimizer type
    learning_rate: 0.002     # learning rate
    max_grad_norm: 1

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 200
num_snapshots: 5

###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086

================================================
FILE: examples/csmsc/tts2/local/inference.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=pwgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
fi


================================================
FILE: examples/csmsc/tts2/local/inference_mlu.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device mlu
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device mlu
fi


================================================
FILE: examples/csmsc/tts2/local/inference_npu.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=pwgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device npu
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device npu
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device npu
fi


================================================
FILE: examples/csmsc/tts2/local/inference_xpu.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=pwgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device xpu
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device xpu
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=speedyspeech_csmsc \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device xpu
fi


================================================
FILE: examples/csmsc/tts2/local/lite_predict.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=speedyspeech_csmsc \
        --voc=pwgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=speedyspeech_csmsc \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=speedyspeech_csmsc \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
fi


================================================
FILE: examples/csmsc/tts2/local/ort_predict.sh
================================================
train_output_path=$1

stage=0
stop_stage=0

# e2e, synthesize from text
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=speedyspeech_csmsc \
        --voc=pwgan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device=cpu \
        --cpu_threads=2
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=speedyspeech_csmsc \
        --voc=mb_melgan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device=cpu \
        --cpu_threads=2
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=speedyspeech_csmsc \
        --voc=hifigan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --device=cpu \
        --cpu_threads=2
fi

# synthesize from metadata
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    python3 ${BIN_DIR}/../ort_predict.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=speedyspeech_csmsc \
        --voc=hifigan_csmsc \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/onnx_infer_out \
        --device=cpu \
        --cpu_threads=2
fi


================================================
FILE: examples/csmsc/tts2/local/preprocess.sh
================================================
#!/bin/bash


stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./baker_alignment_tone \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=baker \
        --rootdir=~/datasets/BZNSYP/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True \
        --use-relative-path=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats" \
        --use-relative-path=True
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/tone to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --tones-dict=dump/tone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --use-relative-path=True

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --tones-dict=dump/tone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --use-relative-path=True

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --tones-dict=dump/tone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --use-relative-path=True

fi


================================================
FILE: examples/csmsc/tts2/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
fi

# style melgan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
fi

# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --tones_dict=dump/tone_id_map.txt \
        --phones_dict=dump/phone_id_map.txt
fi


================================================
FILE: examples/csmsc/tts2/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference
fi

# the pretrained models haven't release now
# style melgan
# style melgan's Dygraph to Static Graph is not ready now
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt
        # --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference
fi

# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference
fi


================================================
FILE: examples/csmsc/tts2/local/synthesize_e2e_mlu.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=0

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nmlu=1
fi

# the pretrained models haven't release now
# style melgan
# style melgan's Dygraph to Static Graph is not ready now
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nmlu=1
        # --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nmlu=1
fi

# wavernn
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in wavernn syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nmlu=1
fi


================================================
FILE: examples/csmsc/tts2/local/synthesize_e2e_npu.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nnpu=1


fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nnpu=1
fi

# the pretrained models haven't release now
# style melgan
# style melgan's Dygraph to Static Graph is not ready now
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nnpu=1
        # --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nnpu=1
fi

# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nnpu=1
fi


================================================
FILE: examples/csmsc/tts2/local/synthesize_e2e_xpu.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nxpu=1
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nxpu=1
fi

# the pretrained models haven't release now
# style melgan
# style melgan's Dygraph to Static Graph is not ready now
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
        # --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nxpu=1
fi

# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nxpu=1
fi


================================================
FILE: examples/csmsc/tts2/local/synthesize_mlu.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=0
stop_stage=0

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nmlu=1
fi

# style melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nmlu=1
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "in hifigan syn"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nmlu=1
fi

# wavernn
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in wavernn syn"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --tones_dict=dump/tone_id_map.txt \
        --phones_dict=dump/phone_id_map.txt \
        --ngpu=0 \
        --nmlu=1
fi


================================================
FILE: examples/csmsc/tts2/local/synthesize_npu.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nnpu=1
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nnpu=1
fi

# style melgan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nnpu=1
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nnpu=1
fi

# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --tones_dict=dump/tone_id_map.txt \
        --phones_dict=dump/phone_id_map.txt \
        --ngpu=0 \
        --nnpu=1
fi


================================================
FILE: examples/csmsc/tts2/local/synthesize_xpu.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
fi

# style melgan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --tones_dict=dump/tone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
fi

# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=speedyspeech_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/feats_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --tones_dict=dump/tone_id_map.txt \
        --phones_dict=dump/phone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
fi


================================================
FILE: examples/csmsc/tts2/local/train.sh
================================================

#!/bin/bash

config_path=$1
train_output_path=$2

python ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=1 \
    --phones-dict=dump/phone_id_map.txt \
    --tones-dict=dump/tone_id_map.txt \
    --use-relative-path=True


================================================
FILE: examples/csmsc/tts2/local/train_mlu.sh
================================================

#!/bin/bash

config_path=$1
train_output_path=$2
# export MLU_VISIBLE_DEVICES=8
python ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=0 \
    --nmlu=2 \
    --phones-dict=dump/phone_id_map.txt \
    --tones-dict=dump/tone_id_map.txt \
    --use-relative-path=True


================================================
FILE: examples/csmsc/tts2/local/train_npu.sh
================================================

#!/bin/bash

config_path=$1
train_output_path=$2

python ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=0 \
    --nnpu=1 \
    --phones-dict=dump/phone_id_map.txt \
    --tones-dict=dump/tone_id_map.txt \
    --use-relative-path=True


================================================
FILE: examples/csmsc/tts2/local/train_xpu.sh
================================================

#!/bin/bash

config_path=$1
train_output_path=$2

python ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=0 \
    --nxpu=1 \
    --phones-dict=dump/phone_id_map.txt \
    --tones-dict=dump/tone_id_map.txt \
    --use-relative-path=True


================================================
FILE: examples/csmsc/tts2/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=speedyspeech
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}

================================================
FILE: examples/csmsc/tts2/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_76.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default 0
    # use 1-4 to select the vocoder to use {multi band melgan, style melgan, hifigan, wavernn}
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default 0
    # use 1,3,4 to select the vocoder to use {multi band melgan, hifigan, wavernn}
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
fi

# paddle2onnx, please make sure the static models are in ${train_output_path}/inference first
# we have only tested the following models so far
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # install paddle2onnx
    pip install paddle2onnx --upgrade
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx speedyspeech_csmsc
    # considering the balance between speed and quality, we recommend that you use hifigan as vocoder
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx mb_melgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_csmsc
fi

# inference with onnxruntime
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    ./local/ort_predict.sh ${train_output_path}
fi

# must run after stage 3 (which stage generated static models)
if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
    ./local/export2lite.sh ${train_output_path} inference pdlite speedyspeech_csmsc x86
    ./local/export2lite.sh ${train_output_path} inference pdlite pwgan_csmsc x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite mb_melgan_csmsc x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite hifigan_csmsc x86
fi

if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/lite_predict.sh ${train_output_path} || exit -1
fi

# PTQ_static
if [ ${stage} -le 9 ] && [ ${stop_stage} -ge 9 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/PTQ_static.sh  ${train_output_path} speedyspeech_csmsc || exit -1
fi


================================================
FILE: examples/csmsc/tts2/run_mlu.sh
================================================
#!/bin/bash

set -e
source path.sh
export CUSTOM_DEVICE_BLACK_LIST=elementwise_max
mlus=0
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_30600.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    FLAGS_selected_mlus=${mlus} ./local/train_mlu.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default
    FLAGS_selected_mlus=${mlus} ./local/synthesize_mlu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default
    FLAGS_selected_mlus=${mlus} ./local/synthesize_e2e_mlu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model
    FLAGS_selected_mlus=${mlus} ./local/inference_mlu.sh ${train_output_path} || exit -1
fi

# paddle2onnx, please make sure the static models are in ${train_output_path}/inference first
# we have only tested the following models so far
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # install paddle2onnx
    pip install paddle2onnx --upgrade
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx speedyspeech_csmsc
    # considering the balance between speed and quality, we recommend that you use hifigan as vocoder
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx mb_melgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_csmsc
fi

# inference with onnxruntime
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    ./local/ort_predict.sh ${train_output_path}
fi

# must run after stage 3 (which stage generated static models)
if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
    ./local/export2lite.sh ${train_output_path} inference pdlite speedyspeech_csmsc x86
    ./local/export2lite.sh ${train_output_path} inference pdlite pwgan_csmsc x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite mb_melgan_csmsc x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite hifigan_csmsc x86
fi

if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/lite_predict.sh ${train_output_path} || exit -1
fi

# PTQ_static
if [ ${stage} -le 9 ] && [ ${stop_stage} -ge 9 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/PTQ_static.sh  ${train_output_path} speedyspeech_csmsc || exit -1
fi


================================================
FILE: examples/csmsc/tts2/run_npu.sh
================================================
#!/bin/bash

set -e
source path.sh

npus=0
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_76.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run_xpu.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    FLAGS_selected_npus=${npus} ./local/train_npu.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default
    FLAGS_selected_npus=${npus} ./local/synthesize_npu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default
    FLAGS_selected_npus=${npus} ./local/synthesize_e2e_npu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model
    FLAGS_selected_npus=${npus} ./local/inference_npu.sh ${train_output_path} || exit -1
fi


================================================
FILE: examples/csmsc/tts2/run_xpu.sh
================================================
#!/bin/bash

set -e
source path.sh

xpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_76.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run_xpu.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    FLAGS_selected_xpus=${xpus} ./local/train_xpu.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default
    FLAGS_selected_xpus=${xpus} ./local/synthesize_xpu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default
    FLAGS_selected_xpus=${xpus} ./local/synthesize_e2e_xpu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model
    FLAGS_selected_xpus=${xpus} ./local/inference_xpu.sh ${train_output_path} || exit -1
fi


================================================
FILE: examples/csmsc/tts3/README.md
================================================
([简体中文](./README_cn.md)|English)
# FastSpeech2 with CSMSC
This example contains code used to train a [Fastspeech2](https://arxiv.org/abs/2006.04558) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).

## Dataset
### Download and Extract
Download CSMSC from it's [Official Website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from a text file.
5. inference using the static model.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech、pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, the path of pitch features, the path of energy features, speaker, and the id of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]
                [--speaker-dict SPEAKER_DICT] [--voice-cloning VOICE_CLONING]

Train a FastSpeech2 model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       fastspeech2 config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu=0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --speaker-dict SPEAKER_DICT
                        speaker id map file for multiple speaker model.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1) as the neural vocoder.
Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
```bash
unzip pwg_baker_ckpt_0.4.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_baker_ckpt_0.4
├── pwg_default.yaml               # default config used to train parallel wavegan
├── pwg_snapshot_iter_400000.pdz   # model parameters of parallel wavegan
└── pwg_stats.npy                  # statistics used to normalize spectrogram when training parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can use `0-4` to select the vocoder in {`pwgan`, `multi band melgan`, `style melgan`, `hifigan`, `wavernn`}

```text
usage: synthesize.py [-h]
                     [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--tones_dict TONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING]
                     [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can use `0,1,3,4` to select the vocoder in {`pwgan`, `multi band melgan`, `hifigan`, `wavernn`}

```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.

```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat` and `--phones_dict` are arguments for acoustic model, which correspond to the 4 files in the fastspeech2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Inferencing
After synthesizing, we will get static models of fastspeech2 and pwgan in `${train_output_path}/inference`.
`./local/inference.sh` calls `${BIN_DIR}/inference.py`, which provides a paddle static model inference example for fastspeech2 + pwgan synthesize.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path}
```

## Pretrained Model
Pretrained FastSpeech2 model with no silence in the edge of audios:
- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
- [fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)
- [fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip)

The static model can be downloaded here:
- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)
- [fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip)
- [fastspeech2_cnndecoder_csmsc_static_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_static_1.0.0.zip)
- [fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip)

The ONNX model can be downloaded here:
- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)
- [fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip)
- [fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip)

The Paddle-Lite model can be downloaded here:
> please compile develop version of Paddle-Lite to export and run TTS models, cause TTS models are supported by https://github.com/PaddlePaddle/Paddle-Lite/pull/9587 and https://github.com/PaddlePaddle/Paddle-Lite/pull/9706
- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)
- [fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_pdlite_1.3.0.zip)
- [fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_pdlite_1.3.0.zip)

Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
:-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
default| 2(gpu) x 76000|1.0991|0.59132|0.035815|0.31915|0.15287|
conformer| 2(gpu) x 76000|1.0675|0.56103|0.035869|0.31553|0.15509|
cnndecoder| 1(gpu) x 153000|1.1153|0.61475|0.03380|0.30414|0.14707|

FastSpeech2 checkpoint contains files listed below.
```text
fastspeech2_nosil_baker_ckpt_0.4
├── default.yaml            # default config used to train fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── snapshot_iter_76000.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```
You can use the following scripts to synthesize for `${BIN_DIR}/../../assets/sentences.txt` using pretrained fastspeech2 and parallel wavegan models.

If you want to use fastspeech2_conformer, you must delete this line `--inference_dir=exp/default/inference \` to skip the step of dygraph to static graph, cause we haven't tested dygraph to static graph for fastspeech2_conformer till now.
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=fastspeech2_csmsc \
  --am_config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
  --am_ckpt=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
  --am_stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy  \
  --voc=pwgan_csmsc \
  --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
  --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
  --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
  --lang=zh \
  --text=${BIN_DIR}/../../assets/sentences.txt \
  --output_dir=exp/default/test_e2e \
  --inference_dir=exp/default/inference \
  --phones_dict=fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt
```


================================================
FILE: examples/csmsc/tts3/README_cn.md
================================================
(简体中文|[English](./README.md))
# 用 CSMSC 数据集训练 FastSpeech2 模型

本用例包含用于训练 [Fastspeech2](https://arxiv.org/abs/2006.04558) 模型的代码，使用 [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html) 数据集。

## 数据集
### 下载并解压
从 [官方网站](https://test.data-baker.com/data/index/TNtts/) 下载数据集

### 获取MFA结果并解压
我们使用 [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) 去获得 fastspeech2 的音素持续时间。
你们可以从这里下载 [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), 或参考 [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) 训练你自己的模型。

## 开始
假设数据集的路径是 `~/datasets/BZNSYP`.
假设CSMSC的MFA结果路径为 `./baker_alignment_tone`.
运行下面的命令会进行如下操作：

1. **设置原路径**。
2. 对数据集进行预处理。
3. 训练模型
4. 合成波形
    - 从 `metadata.jsonl` 合成波形。
    - 从文本文件合成波形。
5. 使用静态模型进行推理。
```bash
./run.sh
```
您可以选择要运行的一系列阶段，或者将 `stage` 设置为 `stop-stage` 以仅使用一个阶段，例如，运行以下命令只会预处理数据集。
```bash
./run.sh --stage 0 --stop-stage 0
```
### 数据预处理
```bash
./local/preprocess.sh ${conf_path}
```
当它完成时。将在当前目录中创建 `dump` 文件夹。转储文件夹的结构如下所示。

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    └── speech_stats.npy
```

数据集分为三个部分，即 `train` 、 `dev` 和 `test` ，每个部分都包含一个 `norm` 和 `raw` 子文件夹。原始文件夹包含每个话语的语音、音调和能量特征，而 `norm` 文件夹包含规范化的特征。用于规范化特征的统计数据是从 `dump/train/*_stats.npy` 中的训练集计算出来的。

此外，还有一个 `metadata.jsonl` 在每个子文件夹中。它是一个类似表格的文件，包含音素、文本长度、语音长度、持续时间、语音特征路径、音调特征路径、能量特征路径、说话人和每个话语的 id。

### 模型训练
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` 调用 `${BIN_DIR}/train.py` 。
以下是完整的帮助信息。

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]
                [--speaker-dict SPEAKER_DICT] [--voice-cloning VOICE_CLONING]

Train a FastSpeech2 model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       fastspeech2 config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu=0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --speaker-dict SPEAKER_DICT
                        speaker id map file for multiple speaker model.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
```
1. `--config` 是一个 yaml 格式的配置文件，用于覆盖默认配置，位于 `conf/default.yaml`.
2. `--train-metadata` 和 `--dev-metadata` 应为 `dump` 文件夹中 `train` 和 `dev` 下的规范化元数据文件
3. `--output-dir` 是保存结果的目录。 检查点保存在此目录中的 `checkpoints/` 目录下。
4. `--ngpu` 要使用的 GPU 数，如果 ngpu==0，则使用 cpu 。
5. `--phones-dict` 是音素词汇表文件的路径。

### 合成
我们使用 [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1) 作为神经声码器（vocoder）。
从 [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) 下载预训练的 parallel wavegan 模型并将其解压。

```bash
unzip pwg_baker_ckpt_0.4.zip
```
Parallel WaveGAN 检查点包含如下文件。
```text
pwg_baker_ckpt_0.4
├── pwg_default.yaml               # 用于训练 parallel wavegan 的默认配置
├── pwg_snapshot_iter_400000.pdz   # parallel wavegan 的模型参数
└── pwg_stats.npy                  # 训练平行波形时用于规范化谱图的统计数据
```
`./local/synthesize.sh` 调用 `${BIN_DIR}/../synthesize.py` 即可从 `metadata.jsonl`中合成波形。

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
最后一位参数 `0` 用于控制合成过程中使用的声码器模型。该参数的取值范围为 `0-4`，分别对应以下五种声码器模型：`pwgan`、`multi band melgan`、`style melgan`、`hifigan` 和 `wavernn`。

```text
usage: synthesize.py [-h]
                     [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--tones_dict TONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING]
                     [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
`./local/synthesize_e2e.sh` 调用 `${BIN_DIR}/../synthesize_e2e.py`，即可从文本文件中合成波形。

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
最后一位参数 `0` 用于控制合成过程中使用的声码器模型。该参数的取值范围为 {`0,1,3,4`}，分别对应以下四种声码器模型：`pwgan`、`multi band melgan`、`hifigan` 和 `wavernn`。

```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--am` 声学模型格式是否符合 {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat` 和 `--phones_dict` 是声学模型的参数，对应于 fastspeech2 预训练模型中的 4 个文件。
3. `--voc` 声码器(vocoder)格式是否符合 {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` 是声码器的参数，对应于 parallel wavegan 预训练模型中的 3 个文件。
5. `--lang` 对应模型的语言可以是 `zh` 或 `en` 。
6. `--test_metadata` 应为 `dump` 文件夹中 `test` 下的规范化元数据文件、
7. `--text` 是文本文件，其中包含要合成的句子。
8. `--output_dir` 是保存合成音频文件的目录。
9. `--ngpu` 要使用的GPU数，如果 ngpu==0，则使用 cpu 。

### 推理
在合成之后，我们将在 `${train_output_path}/inference` 中得到 fastspeech2 和 pwgan 的静态模型
`./local/inference.sh` 调用 `${BIN_DIR}/inference.py` 为 fastspeech2 + pwgan 综合提供了一个 paddle 静态模型推理示例。

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path}
```

## 预训练模型
预先训练的 FastSpeech2 模型，在音频边缘没有空白音频：
- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)
- [fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)

静态模型可以在这里下载 [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip).

Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
:-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
default| 2(gpu) x 76000|1.0991|0.59132|0.035815|0.31915|0.15287|
conformer| 2(gpu) x 76000|1.0675|0.56103|0.035869|0.31553|0.15509|

FastSpeech2检查点包含下列文件。
```text
fastspeech2_nosil_baker_ckpt_0.4
├── default.yaml            # 用于训练 fastspeech2 的默认配置
├── phone_id_map.txt        # 训练 fastspeech2 时的音素词汇文件
├── snapshot_iter_76000.pdz # 模型参数和优化器状态
└── speech_stats.npy        # 训练 fastspeech2 时用于规范化频谱图的统计数据
```
您可以使用以下脚本通过使用预训练的 fastspeech2 和 parallel wavegan 模型为 `${BIN_DIR}/../../assets/sentences.txt` 合成句子
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=fastspeech2_csmsc \
  --am_config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
  --am_ckpt=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
  --am_stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy  \
  --voc=pwgan_csmsc \
  --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
  --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
  --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
  --lang=zh \
  --text=${BIN_DIR}/../../assets/sentences.txt \
  --output_dir=exp/default/test_e2e \
  --inference_dir=exp/default/inference \
  --phones_dict=fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt
```


================================================
FILE: examples/csmsc/tts3/conf/cnndecoder.yaml
================================================
# use CNND
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 4


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    encoder_type: transformer           # encoder type
    decoder_type: cnndecoder           # decoder type
    init_type: xavier_uniform         # initialization type
    init_enc_alpha: 1.0               # initial value of alpha of encoder scaled position encoding
    init_dec_alpha: 1.0               # initial value of alpha of decoder scaled position encoding
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    cnn_dec_dropout_rate: 0.2                    # dropout rate for cnn decoder layer
    cnn_postnet_dropout_rate: 0.2
    cnn_postnet_resblock_kernel_sizes: [256, 256] # kernel sizes for residual block of cnn_postnet
    cnn_postnet_kernel_size: 5                   # kernel size of cnn_postnet
    cnn_decoder_embedding_dim: 256
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
  optim: adam              # optimizer type
  learning_rate: 0.001     # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 1000
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/csmsc/tts3/conf/conformer.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 4


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    encoder_type: conformer           # encoder type
    decoder_type: conformer           # decoder type
    conformer_pos_enc_layer_type: rel_pos        # conformer positional encoding type
    conformer_self_attn_layer_type: rel_selfattn # conformer self-attention type
    conformer_activation_type: swish             # conformer activation type
    use_macaron_style_in_conformer: True         # whether to use macaron style in conformer
    use_cnn_in_conformer: True                   # whether to use CNN in conformer
    conformer_enc_kernel_size: 7                 # kernel size in CNN module of conformer-based encoder
    conformer_dec_kernel_size: 31                # kernel size in CNN module of conformer-based decoder
    init_type: xavier_uniform         # initialization type
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
  optim: adam              # optimizer type
  learning_rate: 0.001     # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 1000
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/csmsc/tts3/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 4


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    init_type: xavier_uniform         # initialization type
    init_enc_alpha: 1.0               # initial value of alpha of encoder scaled position encoding
    init_dec_alpha: 1.0               # initial value of alpha of decoder scaled position encoding
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
    optim: adam              # optimizer type
    learning_rate: 0.001     # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 1000
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/csmsc/tts3/local/PTQ_dynamic.sh
================================================
train_output_path=$1
model_name=$2
weight_bits=$3

python3 ${BIN_DIR}/../PTQ_dynamic.py \
    --inference_dir ${train_output_path}/inference \
    --model_name ${model_name} \
    --weight_bits ${weight_bits}

================================================
FILE: examples/csmsc/tts3/local/PTQ_static.sh
================================================
train_output_path=$1
model_name=$2

python3 ${BIN_DIR}/../PTQ_static.py \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --inference_dir ${train_output_path}/inference \
    --model_name ${model_name} \
    --onnx_format=True

================================================
FILE: examples/csmsc/tts3/local/export2lite.sh
================================================
train_output_path=$1
model_dir=$2
output_dir=$3
model=$4
valid_targets=$5

model_name=${model%_*}
echo model_name: ${model_name}

suffix=${valid_targets%,*}

mkdir -p ${train_output_path}/${output_dir}

paddle_lite_opt \
    --model_file ${train_output_path}/${model_dir}/${model}.pdmodel \
    --param_file  ${train_output_path}/${model_dir}/${model}.pdiparams \
    --optimize_out ${train_output_path}/${output_dir}/${model}_${suffix} \
    --valid_targets ${valid_targets}


================================================
FILE: examples/csmsc/tts3/local/inference.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_csmsc \
        --voc=pwgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_csmsc \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_csmsc \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi

# wavernn
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_csmsc \
        --voc=wavernn_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi

================================================
FILE: examples/csmsc/tts3/local/inference_streaming.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference_streaming.py \
        --inference_dir=${train_output_path}/inference_streaming \
        --am=fastspeech2_csmsc \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out_streaming \
        --phones_dict=dump/phone_id_map.txt \
        --am_streaming=True
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference_streaming.py \
        --inference_dir=${train_output_path}/inference_streaming \
        --am=fastspeech2_csmsc \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out_streaming \
        --phones_dict=dump/phone_id_map.txt \
        --am_streaming=True
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../inference_streaming.py \
        --inference_dir=${train_output_path}/inference_streaming \
        --am=fastspeech2_csmsc \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out_streaming \
        --phones_dict=dump/phone_id_map.txt \
        --am_streaming=True
fi


================================================
FILE: examples/csmsc/tts3/local/inference_xpu.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_csmsc \
        --voc=pwgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --device xpu
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_csmsc \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --device xpu
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_csmsc \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --device xpu
fi

# wavernn
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_csmsc \
        --voc=wavernn_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --device xpu
fi

================================================
FILE: examples/csmsc/tts3/local/lite_predict.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=fastspeech2_csmsc \
        --voc=pwgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=fastspeech2_csmsc \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=fastspeech2_csmsc \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt
fi


================================================
FILE: examples/csmsc/tts3/local/lite_predict_streaming.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../lite_predict_streaming.py \
        --inference_dir=${train_output_path}/pdlite_streaming \
        --am=fastspeech2_csmsc \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out_streaming \
        --phones_dict=dump/phone_id_map.txt \
        --am_streaming=True
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../lite_predict_streaming.py \
        --inference_dir=${train_output_path}/pdlite_streaming \
        --am=fastspeech2_csmsc \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out_streaming \
        --phones_dict=dump/phone_id_map.txt \
        --am_streaming=True
fi

# hifigan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../lite_predict_streaming.py \
        --inference_dir=${train_output_path}/pdlite_streaming \
        --am=fastspeech2_csmsc \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out_streaming \
        --phones_dict=dump/phone_id_map.txt \
        --am_streaming=True
fi


================================================
FILE: examples/csmsc/tts3/local/ort_predict.sh
================================================
train_output_path=$1

stage=0
stop_stage=0

# e2e, synthesize from text
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_csmsc \
        --voc=pwgan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_csmsc \
        --voc=mb_melgan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_csmsc \
        --voc=hifigan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2
fi

# synthesize from metadata, take hifigan as an example
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    python3 ${BIN_DIR}/../ort_predict.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_csmsc \
        --voc=hifigan_csmsc \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/onnx_infer_out \
        --device=cpu \
        --cpu_threads=2
fi

================================================
FILE: examples/csmsc/tts3/local/ort_predict_streaming.sh
================================================
train_output_path=$1

stage=0
stop_stage=0

# e2e, synthesize from text
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../ort_predict_streaming.py \
        --inference_dir=${train_output_path}/inference_onnx_streaming \
        --am=fastspeech2_csmsc \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_streaming \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2 \
        --am_streaming=True
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../ort_predict_streaming.py \
        --inference_dir=${train_output_path}/inference_onnx_streaming \
        --am=fastspeech2_csmsc \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_streaming \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2 \
        --am_streaming=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../ort_predict_streaming.py \
        --inference_dir=${train_output_path}/inference_onnx_streaming \
        --am=fastspeech2_csmsc \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_streaming \
        --text=${BIN_DIR}/../../assets/csmsc_test.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2 \
        --am_streaming=True
fi


================================================
FILE: examples/csmsc/tts3/local/paddle2onnx.sh
================================================
train_output_path=$1
model_dir=$2
output_dir=$3
model=$4

enable_dev_version=True

model_name=${model%_*}
echo model_name: ${model_name}

if [ ${model_name} = 'mb_melgan' ] ;then
    enable_dev_version=False
fi

mkdir -p ${train_output_path}/${output_dir}

paddle2onnx \
    --model_dir ${train_output_path}/${model_dir} \
    --model_filename ${model}.pdmodel \
    --params_filename ${model}.pdiparams \
    --save_file ${train_output_path}/${output_dir}/${model}.onnx \
    --opset_version 11 \
    --enable_dev_version ${enable_dev_version}

================================================
FILE: examples/csmsc/tts3/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./baker_alignment_tone \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=baker \
        --rootdir=~/datasets/BZNSYP/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="pitch"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="energy"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/csmsc/tts3/local/simple.lexicon
================================================
a1  a1
a2  a2
a3  a3
a4  a4
a5  a5
ar1  ar1
ar2  ar2
ar3  ar3
ar4  ar4
ar5  ar5
ai1  ai1
ai2  ai2
ai3  ai3
ai4  ai4
ai5  ai5
air1  air1
air2  air2
air3  air3
air4  air4
air5  air5
ao1  ao1
ao2  ao2
ao3  ao3
ao4  ao4
ao5  ao5
aor1  aor1
aor2  aor2
aor3  aor3
aor4  aor4
aor5  aor5
an1  an1
an2  an2
an3  an3
an4  an4
an5  an5
anr1  anr1
anr2  anr2
anr3  anr3
anr4  anr4
anr5  anr5
ang1  ang1
ang2  ang2
ang3  ang3
ang4  ang4
ang5  ang5
angr1  angr1
angr2  angr2
angr3  angr3
angr4  angr4
angr5  angr5
e1  e1
e2  e2
e3  e3
e4  e4
e5  e5
er1  er1
er2  er2
er3  er3
er4  er4
er5  er5
ei1  ei1
ei2  ei2
ei3  ei3
ei4  ei4
ei5  ei5
eir1  eir1
eir2  eir2
eir3  eir3
eir4  eir4
eir5  eir5
en1  en1
en2  en2
en3  en3
en4  en4
en5  en5
enr1  enr1
enr2  enr2
enr3  enr3
enr4  enr4
enr5  enr5
eng1  eng1
eng2  eng2
eng3  eng3
eng4  eng4
eng5  eng5
engr1  engr1
engr2  engr2
engr3  engr3
engr4  engr4
engr5  engr5
o1  o1
o2  o2
o3  o3
o4  o4
o5  o5
or1  or1
or2  or2
or3  or3
or4  or4
or5  or5
ou1  ou1
ou2  ou2
ou3  ou3
ou4  ou4
ou5  ou5
our1  our1
our2  our2
our3  our3
our4  our4
our5  our5
ong1  ong1
ong2  ong2
ong3  ong3
ong4  ong4
ong5  ong5
ongr1  ongr1
ongr2  ongr2
ongr3  ongr3
ongr4  ongr4
ongr5  ongr5
yi1  i1
yi2  i2
yi3  i3
yi4  i4
yi5  i5
yir1  ir1
yir2  ir2
yir3  ir3
yir4  ir4
yir5  ir5
ya1  ia1
ya2  ia2
ya3  ia3
ya4  ia4
ya5  ia5
yar1  iar1
yar2  iar2
yar3  iar3
yar4  iar4
yar5  iar5
yao1  iao1
yao2  iao2
yao3  iao3
yao4  iao4
yao5  iao5
yaor1  iaor1
yaor2  iaor2
yaor3  iaor3
yaor4  iaor4
yaor5  iaor5
yan1  ian1
yan2  ian2
yan3  ian3
yan4  ian4
yan5  ian5
yanr1  ianr1
yanr2  ianr2
yanr3  ianr3
yanr4  ianr4
yanr5  ianr5
yang1  iang1
yang2  iang2
yang3  iang3
yang4  iang4
yang5  iang5
yangr1  iangr1
yangr2  iangr2
yangr3  iangr3
yangr4  iangr4
yangr5  iangr5
ye1  ie1
ye2  ie2
ye3  ie3
ye4  ie4
ye5  ie5
yer1  ier1
yer2  ier2
yer3  ier3
yer4  ier4
yer5  ier5
yo1  io1
yo2  io2
yo3  io3
yo4  io4
yo5  io5
yor1  ior1
yor2  ior2
yor3  ior3
yor4  ior4
yor5  ior5
you1  iou1
you2  iou2
you3  iou3
you4  iou4
you5  iou5
your1  iour1
your2  iour2
your3  iour3
your4  iour4
your5  iour5
yong1  iong1
yong2  iong2
yong3  iong3
yong4  iong4
yong5  iong5
yongr1  iongr1
yongr2  iongr2
yongr3  iongr3
yongr4  iongr4
yongr5  iongr5
yin1  in1
yin2  in2
yin3  in3
yin4  in4
yin5  in5
yinr1  inr1
yinr2  inr2
yinr3  inr3
yinr4  inr4
yinr5  inr5
ying1  ing1
ying2  ing2
ying3  ing3
ying4  ing4
ying5  ing5
yingr1  ingr1
yingr2  ingr2
yingr3  ingr3
yingr4  ingr4
yingr5  ingr5
wu1  u1
wu2  u2
wu3  u3
wu4  u4
wu5  u5
wur1  ur1
wur2  ur2
wur3  ur3
wur4  ur4
wur5  ur5
wa1  ua1
wa2  ua2
wa3  ua3
wa4  ua4
wa5  ua5
war1  uar1
war2  uar2
war3  uar3
war4  uar4
war5  uar5
wai1  uai1
wai2  uai2
wai3  uai3
wai4  uai4
wai5  uai5
wair1  uair1
wair2  uair2
wair3  uair3
wair4  uair4
wair5  uair5
wan1  uan1
wan2  uan2
wan3  uan3
wan4  uan4
wan5  uan5
wanr1  uanr1
wanr2  uanr2
wanr3  uanr3
wanr4  uanr4
wanr5  uanr5
wang1  uang1
wang2  uang2
wang3  uang3
wang4  uang4
wang5  uang5
wangr1  uangr1
wangr2  uangr2
wangr3  uangr3
wangr4  uangr4
wangr5  uangr5
wei1  uei1
wei2  uei2
wei3  uei3
wei4  uei4
wei5  uei5
weir1  ueir1
weir2  ueir2
weir3  ueir3
weir4  ueir4
weir5  ueir5
wo1  uo1
wo2  uo2
wo3  uo3
wo4  uo4
wo5  uo5
wor1  uor1
wor2  uor2
wor3  uor3
wor4  uor4
wor5  uor5
wen1  uen1
wen2  uen2
wen3  uen3
wen4  uen4
wen5  uen5
wenr1  uenr1
wenr2  uenr2
wenr3  uenr3
wenr4  uenr4
wenr5  uenr5
weng1  ueng1
weng2  ueng2
weng3  ueng3
weng4  ueng4
weng5  ueng5
wengr1  uengr1
wengr2  uengr2
wengr3  uengr3
wengr4  uengr4
wengr5  uengr5
yu1  v1
yu2  v2
yu3  v3
yu4  v4
yu5  v5
yur1  vr1
yur2  vr2
yur3  vr3
yur4  vr4
yur5  vr5
yue1  ve1
yue2  ve2
yue3  ve3
yue4  ve4
yue5  ve5
yuer1  ver1
yuer2  ver2
yuer3  ver3
yuer4  ver4
yuer5  ver5
yuan1  van1
yuan2  van2
yuan3  van3
yuan4  van4
yuan5  van5
yuanr1  vanr1
yuanr2  vanr2
yuanr3  vanr3
yuanr4  vanr4
yuanr5  vanr5
yun1  vn1
yun2  vn2
yun3  vn3
yun4  vn4
yun5  vn5
yunr1  vnr1
yunr2  vnr2
yunr3  vnr3
yunr4  vnr4
yunr5  vnr5
ba1 b a1
ba2 b a2
ba3 b a3
ba4 b a4
ba5 b a5
bar1 b ar1
bar2 b ar2
bar3 b ar3
bar4 b ar4
bar5 b ar5
bai1 b ai1
bai2 b ai2
bai3 b ai3
bai4 b ai4
bai5 b ai5
bair1 b air1
bair2 b air2
bair3 b air3
bair4 b air4
bair5 b air5
bao1 b ao1
bao2 b ao2
bao3 b ao3
bao4 b ao4
bao5 b ao5
baor1 b aor1
baor2 b aor2
baor3 b aor3
baor4 b aor4
baor5 b aor5
ban1 b an1
ban2 b an2
ban3 b an3
ban4 b an4
ban5 b an5
banr1 b anr1
banr2 b anr2
banr3 b anr3
banr4 b anr4
banr5 b anr5
bang1 b ang1
bang2 b ang2
bang3 b ang3
bang4 b ang4
bang5 b ang5
bangr1 b angr1
bangr2 b angr2
bangr3 b angr3
bangr4 b angr4
bangr5 b angr5
be1 b e1
be2 b e2
be3 b e3
be4 b e4
be5 b e5
ber1 b er1
ber2 b er2
ber3 b er3
ber4 b er4
ber5 b er5
bei1 b ei1
bei2 b ei2
bei3 b ei3
bei4 b ei4
bei5 b ei5
beir1 b eir1
beir2 b eir2
beir3 b eir3
beir4 b eir4
beir5 b eir5
ben1 b en1
ben2 b en2
ben3 b en3
ben4 b en4
ben5 b en5
benr1 b enr1
benr2 b enr2
benr3 b enr3
benr4 b enr4
benr5 b enr5
beng1 b eng1
beng2 b eng2
beng3 b eng3
beng4 b eng4
beng5 b eng5
bengr1 b engr1
bengr2 b engr2
bengr3 b engr3
bengr4 b engr4
bengr5 b engr5
bo1 b o1
bo2 b o2
bo3 b o3
bo4 b o4
bo5 b o5
bor1 b or1
bor2 b or2
bor3 b or3
bor4 b or4
bor5 b or5
bou1 b ou1
bou2 b ou2
bou3 b ou3
bou4 b ou4
bou5 b ou5
bour1 b our1
bour2 b our2
bour3 b our3
bour4 b our4
bour5 b our5
bi1 b i1
bi2 b i2
bi3 b i3
bi4 b i4
bi5 b i5
bir1 b ir1
bir2 b ir2
bir3 b ir3
bir4 b ir4
bir5 b ir5
bia1 b ia1
bia2 b ia2
bia3 b ia3
bia4 b ia4
bia5 b ia5
biar1 b iar1
biar2 b iar2
biar3 b iar3
biar4 b iar4
biar5 b iar5
biao1 b iao1
biao2 b iao2
biao3 b iao3
biao4 b iao4
biao5 b iao5
biaor1 b iaor1
biaor2 b iaor2
biaor3 b iaor3
biaor4 b iaor4
biaor5 b iaor5
bian1 b ian1
bian2 b ian2
bian3 b ian3
bian4 b ian4
bian5 b ian5
bianr1 b ianr1
bianr2 b ianr2
bianr3 b ianr3
bianr4 b ianr4
bianr5 b ianr5
biang1 b iang1
biang2 b iang2
biang3 b iang3
biang4 b iang4
biang5 b iang5
biangr1 b iangr1
biangr2 b iangr2
biangr3 b iangr3
biangr4 b iangr4
biangr5 b iangr5
bie1 b ie1
bie2 b ie2
bie3 b ie3
bie4 b ie4
bie5 b ie5
bier1 b ier1
bier2 b ier2
bier3 b ier3
bier4 b ier4
bier5 b ier5
bio1 b io1
bio2 b io2
bio3 b io3
bio4 b io4
bio5 b io5
bior1 b ior1
bior2 b ior2
bior3 b ior3
bior4 b ior4
bior5 b ior5
biu1 b iou1
biu2 b iou2
biu3 b iou3
biu4 b iou4
biu5 b iou5
biur1 b iour1
biur2 b iour2
biur3 b iour3
biur4 b iour4
biur5 b iour5
biong1 b iong1
biong2 b iong2
biong3 b iong3
biong4 b iong4
biong5 b iong5
biongr1 b iongr1
biongr2 b iongr2
biongr3 b iongr3
biongr4 b iongr4
biongr5 b iongr5
bin1 b in1
bin2 b in2
bin3 b in3
bin4 b in4
bin5 b in5
binr1 b inr1
binr2 b inr2
binr3 b inr3
binr4 b inr4
binr5 b inr5
bing1 b ing1
bing2 b ing2
bing3 b ing3
bing4 b ing4
bing5 b ing5
bingr1 b ingr1
bingr2 b ingr2
bingr3 b ingr3
bingr4 b ingr4
bingr5 b ingr5
bu1 b u1
bu2 b u2
bu3 b u3
bu4 b u4
bu5 b u5
bur1 b ur1
bur2 b ur2
bur3 b ur3
bur4 b ur4
bur5 b ur5
pa1 p a1
pa2 p a2
pa3 p a3
pa4 p a4
pa5 p a5
par1 p ar1
par2 p ar2
par3 p ar3
par4 p ar4
par5 p ar5
pai1 p ai1
pai2 p ai2
pai3 p ai3
pai4 p ai4
pai5 p ai5
pair1 p air1
pair2 p air2
pair3 p air3
pair4 p air4
pair5 p air5
pao1 p ao1
pao2 p ao2
pao3 p ao3
pao4 p ao4
pao5 p ao5
paor1 p aor1
paor2 p aor2
paor3 p aor3
paor4 p aor4
paor5 p aor5
pan1 p an1
pan2 p an2
pan3 p an3
pan4 p an4
pan5 p an5
panr1 p anr1
panr2 p anr2
panr3 p anr3
panr4 p anr4
panr5 p anr5
pang1 p ang1
pang2 p ang2
pang3 p ang3
pang4 p ang4
pang5 p ang5
pangr1 p angr1
pangr2 p angr2
pangr3 p angr3
pangr4 p angr4
pangr5 p angr5
pe1 p e1
pe2 p e2
pe3 p e3
pe4 p e4
pe5 p e5
per1 p er1
per2 p er2
per3 p er3
per4 p er4
per5 p er5
pei1 p ei1
pei2 p ei2
pei3 p ei3
pei4 p ei4
pei5 p ei5
peir1 p eir1
peir2 p eir2
peir3 p eir3
peir4 p eir4
peir5 p eir5
pen1 p en1
pen2 p en2
pen3 p en3
pen4 p en4
pen5 p en5
penr1 p enr1
penr2 p enr2
penr3 p enr3
penr4 p enr4
penr5 p enr5
peng1 p eng1
peng2 p eng2
peng3 p eng3
peng4 p eng4
peng5 p eng5
pengr1 p engr1
pengr2 p engr2
pengr3 p engr3
pengr4 p engr4
pengr5 p engr5
po1 p o1
po2 p o2
po3 p o3
po4 p o4
po5 p o5
por1 p or1
por2 p or2
por3 p or3
por4 p or4
por5 p or5
pou1 p ou1
pou2 p ou2
pou3 p ou3
pou4 p ou4
pou5 p ou5
pour1 p our1
pour2 p our2
pour3 p our3
pour4 p our4
pour5 p our5
pi1 p i1
pi2 p i2
pi3 p i3
pi4 p i4
pi5 p i5
pir1 p ir1
pir2 p ir2
pir3 p ir3
pir4 p ir4
pir5 p ir5
pia1 p ia1
pia2 p ia2
pia3 p ia3
pia4 p ia4
pia5 p ia5
piar1 p iar1
piar2 p iar2
piar3 p iar3
piar4 p iar4
piar5 p iar5
piao1 p iao1
piao2 p iao2
piao3 p iao3
piao4 p iao4
piao5 p iao5
piaor1 p iaor1
piaor2 p iaor2
piaor3 p iaor3
piaor4 p iaor4
piaor5 p iaor5
pian1 p ian1
pian2 p ian2
pian3 p ian3
pian4 p ian4
pian5 p ian5
pianr1 p ianr1
pianr2 p ianr2
pianr3 p ianr3
pianr4 p ianr4
pianr5 p ianr5
piang1 p iang1
piang2 p iang2
piang3 p iang3
piang4 p iang4
piang5 p iang5
piangr1 p iangr1
piangr2 p iangr2
piangr3 p iangr3
piangr4 p iangr4
piangr5 p iangr5
pie1 p ie1
pie2 p ie2
pie3 p ie3
pie4 p ie4
pie5 p ie5
pier1 p ier1
pier2 p ier2
pier3 p ier3
pier4 p ier4
pier5 p ier5
pio1 p io1
pio2 p io2
pio3 p io3
pio4 p io4
pio5 p io5
pior1 p ior1
pior2 p ior2
pior3 p ior3
pior4 p ior4
pior5 p ior5
piu1 p iou1
piu2 p iou2
piu3 p iou3
piu4 p iou4
piu5 p iou5
piur1 p iour1
piur2 p iour2
piur3 p iour3
piur4 p iour4
piur5 p iour5
piong1 p iong1
piong2 p iong2
piong3 p iong3
piong4 p iong4
piong5 p iong5
piongr1 p iongr1
piongr2 p iongr2
piongr3 p iongr3
piongr4 p iongr4
piongr5 p iongr5
pin1 p in1
pin2 p in2
pin3 p in3
pin4 p in4
pin5 p in5
pinr1 p inr1
pinr2 p inr2
pinr3 p inr3
pinr4 p inr4
pinr5 p inr5
ping1 p ing1
ping2 p ing2
ping3 p ing3
ping4 p ing4
ping5 p ing5
pingr1 p ingr1
pingr2 p ingr2
pingr3 p ingr3
pingr4 p ingr4
pingr5 p ingr5
pu1 p u1
pu2 p u2
pu3 p u3
pu4 p u4
pu5 p u5
pur1 p ur1
pur2 p ur2
pur3 p ur3
pur4 p ur4
pur5 p ur5
ma1 m a1
ma2 m a2
ma3 m a3
ma4 m a4
ma5 m a5
mar1 m ar1
mar2 m ar2
mar3 m ar3
mar4 m ar4
mar5 m ar5
mai1 m ai1
mai2 m ai2
mai3 m ai3
mai4 m ai4
mai5 m ai5
mair1 m air1
mair2 m air2
mair3 m air3
mair4 m air4
mair5 m air5
mao1 m ao1
mao2 m ao2
mao3 m ao3
mao4 m ao4
mao5 m ao5
maor1 m aor1
maor2 m aor2
maor3 m aor3
maor4 m aor4
maor5 m aor5
man1 m an1
man2 m an2
man3 m an3
man4 m an4
man5 m an5
manr1 m anr1
manr2 m anr2
manr3 m anr3
manr4 m anr4
manr5 m anr5
mang1 m ang1
mang2 m ang2
mang3 m ang3
mang4 m ang4
mang5 m ang5
mangr1 m angr1
mangr2 m angr2
mangr3 m angr3
mangr4 m angr4
mangr5 m angr5
me1 m e1
me2 m e2
me3 m e3
me4 m e4
me5 m e5
mer1 m er1
mer2 m er2
mer3 m er3
mer4 m er4
mer5 m er5
mei1 m ei1
mei2 m ei2
mei3 m ei3
mei4 m ei4
mei5 m ei5
meir1 m eir1
meir2 m eir2
meir3 m eir3
meir4 m eir4
meir5 m eir5
men1 m en1
men2 m en2
men3 m en3
men4 m en4
men5 m en5
menr1 m enr1
menr2 m enr2
menr3 m enr3
menr4 m enr4
menr5 m enr5
meng1 m eng1
meng2 m eng2
meng3 m eng3
meng4 m eng4
meng5 m eng5
mengr1 m engr1
mengr2 m engr2
mengr3 m engr3
mengr4 m engr4
mengr5 m engr5
mo1 m o1
mo2 m o2
mo3 m o3
mo4 m o4
mo5 m o5
mor1 m or1
mor2 m or2
mor3 m or3
mor4 m or4
mor5 m or5
mou1 m ou1
mou2 m ou2
mou3 m ou3
mou4 m ou4
mou5 m ou5
mour1 m our1
mour2 m our2
mour3 m our3
mour4 m our4
mour5 m our5
mi1 m i1
mi2 m i2
mi3 m i3
mi4 m i4
mi5 m i5
mir1 m ir1
mir2 m ir2
mir3 m ir3
mir4 m ir4
mir5 m ir5
mia1 m ia1
mia2 m ia2
mia3 m ia3
mia4 m ia4
mia5 m ia5
miar1 m iar1
miar2 m iar2
miar3 m iar3
miar4 m iar4
miar5 m iar5
miao1 m iao1
miao2 m iao2
miao3 m iao3
miao4 m iao4
miao5 m iao5
miaor1 m iaor1
miaor2 m iaor2
miaor3 m iaor3
miaor4 m iaor4
miaor5 m iaor5
mian1 m ian1
mian2 m ian2
mian3 m ian3
mian4 m ian4
mian5 m ian5
mianr1 m ianr1
mianr2 m ianr2
mianr3 m ianr3
mianr4 m ianr4
mianr5 m ianr5
miang1 m iang1
miang2 m iang2
miang3 m iang3
miang4 m iang4
miang5 m iang5
miangr1 m iangr1
miangr2 m iangr2
miangr3 m iangr3
miangr4 m iangr4
miangr5 m iangr5
mie1 m ie1
mie2 m ie2
mie3 m ie3
mie4 m ie4
mie5 m ie5
mier1 m ier1
mier2 m ier2
mier3 m ier3
mier4 m ier4
mier5 m ier5
mio1 m io1
mio2 m io2
mio3 m io3
mio4 m io4
mio5 m io5
mior1 m ior1
mior2 m ior2
mior3 m ior3
mior4 m ior4
mior5 m ior5
miu1 m iou1
miu2 m iou2
miu3 m iou3
miu4 m iou4
miu5 m iou5
miur1 m iour1
miur2 m iour2
miur3 m iour3
miur4 m iour4
miur5 m iour5
miong1 m iong1
miong2 m iong2
miong3 m iong3
miong4 m iong4
miong5 m iong5
miongr1 m iongr1
miongr2 m iongr2
miongr3 m iongr3
miongr4 m iongr4
miongr5 m iongr5
min1 m in1
min2 m in2
min3 m in3
min4 m in4
min5 m in5
minr1 m inr1
minr2 m inr2
minr3 m inr3
minr4 m inr4
minr5 m inr5
ming1 m ing1
ming2 m ing2
ming3 m ing3
ming4 m ing4
ming5 m ing5
mingr1 m ingr1
mingr2 m ingr2
mingr3 m ingr3
mingr4 m ingr4
mingr5 m ingr5
mu1 m u1
mu2 m u2
mu3 m u3
mu4 m u4
mu5 m u5
mur1 m ur1
mur2 m ur2
mur3 m ur3
mur4 m ur4
mur5 m ur5
fa1 f a1
fa2 f a2
fa3 f a3
fa4 f a4
fa5 f a5
far1 f ar1
far2 f ar2
far3 f ar3
far4 f ar4
far5 f ar5
fai1 f ai1
fai2 f ai2
fai3 f ai3
fai4 f ai4
fai5 f ai5
fair1 f air1
fair2 f air2
fair3 f air3
fair4 f air4
fair5 f air5
fao1 f ao1
fao2 f ao2
fao3 f ao3
fao4 f ao4
fao5 f ao5
faor1 f aor1
faor2 f aor2
faor3 f aor3
faor4 f aor4
faor5 f aor5
fan1 f an1
fan2 f an2
fan3 f an3
fan4 f an4
fan5 f an5
fanr1 f anr1
fanr2 f anr2
fanr3 f anr3
fanr4 f anr4
fanr5 f anr5
fang1 f ang1
fang2 f ang2
fang3 f ang3
fang4 f ang4
fang5 f ang5
fangr1 f angr1
fangr2 f angr2
fangr3 f angr3
fangr4 f angr4
fangr5 f angr5
fe1 f e1
fe2 f e2
fe3 f e3
fe4 f e4
fe5 f e5
fer1 f er1
fer2 f er2
fer3 f er3
fer4 f er4
fer5 f er5
fei1 f ei1
fei2 f ei2
fei3 f ei3
fei4 f ei4
fei5 f ei5
feir1 f eir1
feir2 f eir2
feir3 f eir3
feir4 f eir4
feir5 f eir5
fen1 f en1
fen2 f en2
fen3 f en3
fen4 f en4
fen5 f en5
fenr1 f enr1
fenr2 f enr2
fenr3 f enr3
fenr4 f enr4
fenr5 f enr5
feng1 f eng1
feng2 f eng2
feng3 f eng3
feng4 f eng4
feng5 f eng5
fengr1 f engr1
fengr2 f engr2
fengr3 f engr3
fengr4 f engr4
fengr5 f engr5
fo1 f o1
fo2 f o2
fo3 f o3
fo4 f o4
fo5 f o5
for1 f or1
for2 f or2
for3 f or3
for4 f or4
for5 f or5
fou1 f ou1
fou2 f ou2
fou3 f ou3
fou4 f ou4
fou5 f ou5
four1 f our1
four2 f our2
four3 f our3
four4 f our4
four5 f our5
fu1 f u1
fu2 f u2
fu3 f u3
fu4 f u4
fu5 f u5
fur1 f ur1
fur2 f ur2
fur3 f ur3
fur4 f ur4
fur5 f ur5
da1 d a1
da2 d a2
da3 d a3
da4 d a4
da5 d a5
dar1 d ar1
dar2 d ar2
dar3 d ar3
dar4 d ar4
dar5 d ar5
dai1 d ai1
dai2 d ai2
dai3 d ai3
dai4 d ai4
dai5 d ai5
dair1 d air1
dair2 d air2
dair3 d air3
dair4 d air4
dair5 d air5
dao1 d ao1
dao2 d ao2
dao3 d ao3
dao4 d ao4
dao5 d ao5
daor1 d aor1
daor2 d aor2
daor3 d aor3
daor4 d aor4
daor5 d aor5
dan1 d an1
dan2 d an2
dan3 d an3
dan4 d an4
dan5 d an5
danr1 d anr1
danr2 d anr2
danr3 d anr3
danr4 d anr4
danr5 d anr5
dang1 d ang1
dang2 d ang2
dang3 d ang3
dang4 d ang4
dang5 d ang5
dangr1 d angr1
dangr2 d angr2
dangr3 d angr3
dangr4 d angr4
dangr5 d angr5
de1 d e1
de2 d e2
de3 d e3
de4 d e4
de5 d e5
der1 d er1
der2 d er2
der3 d er3
der4 d er4
der5 d er5
dei1 d ei1
dei2 d ei2
dei3 d ei3
dei4 d ei4
dei5 d ei5
deir1 d eir1
deir2 d eir2
deir3 d eir3
deir4 d eir4
deir5 d eir5
den1 d en1
den2 d en2
den3 d en3
den4 d en4
den5 d en5
denr1 d enr1
denr2 d enr2
denr3 d enr3
denr4 d enr4
denr5 d enr5
deng1 d eng1
deng2 d eng2
deng3 d eng3
deng4 d eng4
deng5 d eng5
dengr1 d engr1
dengr2 d engr2
dengr3 d engr3
dengr4 d engr4
dengr5 d engr5
dou1 d ou1
dou2 d ou2
dou3 d ou3
dou4 d ou4
dou5 d ou5
dour1 d our1
dour2 d our2
dour3 d our3
dour4 d our4
dour5 d our5
dong1 d ong1
dong2 d ong2
dong3 d ong3
dong4 d ong4
dong5 d ong5
dongr1 d ongr1
dongr2 d ongr2
dongr3 d ongr3
dongr4 d ongr4
dongr5 d ongr5
di1 d i1
di2 d i2
di3 d i3
di4 d i4
di5 d i5
dir1 d ir1
dir2 d ir2
dir3 d ir3
dir4 d ir4
dir5 d ir5
dia1 d ia1
dia2 d ia2
dia3 d ia3
dia4 d ia4
dia5 d ia5
diar1 d iar1
diar2 d iar2
diar3 d iar3
diar4 d iar4
diar5 d iar5
diao1 d iao1
diao2 d iao2
diao3 d iao3
diao4 d iao4
diao5 d iao5
diaor1 d iaor1
diaor2 d iaor2
diaor3 d iaor3
diaor4 d iaor4
diaor5 d iaor5
dian1 d ian1
dian2 d ian2
dian3 d ian3
dian4 d ian4
dian5 d ian5
dianr1 d ianr1
dianr2 d ianr2
dianr3 d ianr3
dianr4 d ianr4
dianr5 d ianr5
diang1 d iang1
diang2 d iang2
diang3 d iang3
diang4 d iang4
diang5 d iang5
diangr1 d iangr1
diangr2 d iangr2
diangr3 d iangr3
diangr4 d iangr4
diangr5 d iangr5
die1 d ie1
die2 d ie2
die3 d ie3
die4 d ie4
die5 d ie5
dier1 d ier1
dier2 d ier2
dier3 d ier3
dier4 d ier4
dier5 d ier5
dio1 d io1
dio2 d io2
dio3 d io3
dio4 d io4
dio5 d io5
dior1 d ior1
dior2 d ior2
dior3 d ior3
dior4 d ior4
dior5 d ior5
diu1 d iou1
diu2 d iou2
diu3 d iou3
diu4 d iou4
diu5 d iou5
diur1 d iour1
diur2 d iour2
diur3 d iour3
diur4 d iour4
diur5 d iour5
diong1 d iong1
diong2 d iong2
diong3 d iong3
diong4 d iong4
diong5 d iong5
diongr1 d iongr1
diongr2 d iongr2
diongr3 d iongr3
diongr4 d iongr4
diongr5 d iongr5
din1 d in1
din2 d in2
din3 d in3
din4 d in4
din5 d in5
dinr1 d inr1
dinr2 d inr2
dinr3 d inr3
dinr4 d inr4
dinr5 d inr5
ding1 d ing1
ding2 d ing2
ding3 d ing3
ding4 d ing4
ding5 d ing5
dingr1 d ingr1
dingr2 d ingr2
dingr3 d ingr3
dingr4 d ingr4
dingr5 d ingr5
du1 d u1
du2 d u2
du3 d u3
du4 d u4
du5 d u5
dur1 d ur1
dur2 d ur2
dur3 d ur3
dur4 d ur4
dur5 d ur5
duan1 d uan1
duan2 d uan2
duan3 d uan3
duan4 d uan4
duan5 d uan5
duanr1 d uanr1
duanr2 d uanr2
duanr3 d uanr3
duanr4 d uanr4
duanr5 d uanr5
dui1 d uei1
dui2 d uei2
dui3 d uei3
dui4 d uei4
dui5 d uei5
duir1 d ueir1
duir2 d ueir2
duir3 d ueir3
duir4 d ueir4
duir5 d ueir5
duo1 d uo1
duo2 d uo2
duo3 d uo3
duo4 d uo4
duo5 d uo5
duor1 d uor1
duor2 d uor2
duor3 d uor3
duor4 d uor4
duor5 d uor5
dun1 d uen1
dun2 d uen2
dun3 d uen3
dun4 d uen4
dun5 d uen5
dunr1 d uenr1
dunr2 d uenr2
dunr3 d uenr3
dunr4 d uenr4
dunr5 d uenr5
ta1 t a1
ta2 t a2
ta3 t a3
ta4 t a4
ta5 t a5
tar1 t ar1
tar2 t ar2
tar3 t ar3
tar4 t ar4
tar5 t ar5
tai1 t ai1
tai2 t ai2
tai3 t ai3
tai4 t ai4
tai5 t ai5
tair1 t air1
tair2 t air2
tair3 t air3
tair4 t air4
tair5 t air5
tao1 t ao1
tao2 t ao2
tao3 t ao3
tao4 t ao4
tao5 t ao5
taor1 t aor1
taor2 t aor2
taor3 t aor3
taor4 t aor4
taor5 t aor5
tan1 t an1
tan2 t an2
tan3 t an3
tan4 t an4
tan5 t an5
tanr1 t anr1
tanr2 t anr2
tanr3 t anr3
tanr4 t anr4
tanr5 t anr5
tang1 t ang1
tang2 t ang2
tang3 t ang3
tang4 t ang4
tang5 t ang5
tangr1 t angr1
tangr2 t angr2
tangr3 t angr3
tangr4 t angr4
tangr5 t angr5
te1 t e1
te2 t e2
te3 t e3
te4 t e4
te5 t e5
ter1 t er1
ter2 t er2
ter3 t er3
ter4 t er4
ter5 t er5
tei1 t ei1
tei2 t ei2
tei3 t ei3
tei4 t ei4
tei5 t ei5
teir1 t eir1
teir2 t eir2
teir3 t eir3
teir4 t eir4
teir5 t eir5
ten1 t en1
ten2 t en2
ten3 t en3
ten4 t en4
ten5 t en5
tenr1 t enr1
tenr2 t enr2
tenr3 t enr3
tenr4 t enr4
tenr5 t enr5
teng1 t eng1
teng2 t eng2
teng3 t eng3
teng4 t eng4
teng5 t eng5
tengr1 t engr1
tengr2 t engr2
tengr3 t engr3
tengr4 t engr4
tengr5 t engr5
tou1 t ou1
tou2 t ou2
tou3 t ou3
tou4 t ou4
tou5 t ou5
tour1 t our1
tour2 t our2
tour3 t our3
tour4 t our4
tour5 t our5
tong1 t ong1
tong2 t ong2
tong3 t ong3
tong4 t ong4
tong5 t ong5
tongr1 t ongr1
tongr2 t ongr2
tongr3 t ongr3
tongr4 t ongr4
tongr5 t ongr5
ti1 t i1
ti2 t i2
ti3 t i3
ti4 t i4
ti5 t i5
tir1 t ir1
tir2 t ir2
tir3 t ir3
tir4 t ir4
tir5 t ir5
tia1 t ia1
tia2 t ia2
tia3 t ia3
tia4 t ia4
tia5 t ia5
tiar1 t iar1
tiar2 t iar2
tiar3 t iar3
tiar4 t iar4
tiar5 t iar5
tiao1 t iao1
tiao2 t iao2
tiao3 t iao3
tiao4 t iao4
tiao5 t iao5
tiaor1 t iaor1
tiaor2 t iaor2
tiaor3 t iaor3
tiaor4 t iaor4
tiaor5 t iaor5
tian1 t ian1
tian2 t ian2
tian3 t ian3
tian4 t ian4
tian5 t ian5
tianr1 t ianr1
tianr2 t ianr2
tianr3 t ianr3
tianr4 t ianr4
tianr5 t ianr5
tiang1 t iang1
tiang2 t iang2
tiang3 t iang3
tiang4 t iang4
tiang5 t iang5
tiangr1 t iangr1
tiangr2 t iangr2
tiangr3 t iangr3
tiangr4 t iangr4
tiangr5 t iangr5
tie1 t ie1
tie2 t ie2
tie3 t ie3
tie4 t ie4
tie5 t ie5
tier1 t ier1
tier2 t ier2
tier3 t ier3
tier4 t ier4
tier5 t ier5
tio1 t io1
tio2 t io2
tio3 t io3
tio4 t io4
tio5 t io5
tior1 t ior1
tior2 t ior2
tior3 t ior3
tior4 t ior4
tior5 t ior5
tiu1 t iou1
tiu2 t iou2
tiu3 t iou3
tiu4 t iou4
tiu5 t iou5
tiur1 t iour1
tiur2 t iour2
tiur3 t iour3
tiur4 t iour4
tiur5 t iour5
tiong1 t iong1
tiong2 t iong2
tiong3 t iong3
tiong4 t iong4
tiong5 t iong5
tiongr1 t iongr1
tiongr2 t iongr2
tiongr3 t iongr3
tiongr4 t iongr4
tiongr5 t iongr5
tin1 t in1
tin2 t in2
tin3 t in3
tin4 t in4
tin5 t in5
tinr1 t inr1
tinr2 t inr2
tinr3 t inr3
tinr4 t inr4
tinr5 t inr5
ting1 t ing1
ting2 t ing2
ting3 t ing3
ting4 t ing4
ting5 t ing5
tingr1 t ingr1
tingr2 t ingr2
tingr3 t ingr3
tingr4 t ingr4
tingr5 t ingr5
tu1 t u1
tu2 t u2
tu3 t u3
tu4 t u4
tu5 t u5
tur1 t ur1
tur2 t ur2
tur3 t ur3
tur4 t ur4
tur5 t ur5
tuan1 t uan1
tuan2 t uan2
tuan3 t uan3
tuan4 t uan4
tuan5 t uan5
tuanr1 t uanr1
tuanr2 t uanr2
tuanr3 t uanr3
tuanr4 t uanr4
tuanr5 t uanr5
tui1 t uei1
tui2 t uei2
tui3 t uei3
tui4 t uei4
tui5 t uei5
tuir1 t ueir1
tuir2 t ueir2
tuir3 t ueir3
tuir4 t ueir4
tuir5 t ueir5
tuo1 t uo1
tuo2 t uo2
tuo3 t uo3
tuo4 t uo4
tuo5 t uo5
tuor1 t uor1
tuor2 t uor2
tuor3 t uor3
tuor4 t uor4
tuor5 t uor5
tun1 t uen1
tun2 t uen2
tun3 t uen3
tun4 t uen4
tun5 t uen5
tunr1 t uenr1
tunr2 t uenr2
tunr3 t uenr3
tunr4 t uenr4
tunr5 t uenr5
na1 n a1
na2 n a2
na3 n a3
na4 n a4
na5 n a5
nar1 n ar1
nar2 n ar2
nar3 n ar3
nar4 n ar4
nar5 n ar5
nai1 n ai1
nai2 n ai2
nai3 n ai3
nai4 n ai4
nai5 n ai5
nair1 n air1
nair2 n air2
nair3 n air3
nair4 n air4
nair5 n air5
nao1 n ao1
nao2 n ao2
nao3 n ao3
nao4 n ao4
nao5 n ao5
naor1 n aor1
naor2 n aor2
naor3 n aor3
naor4 n aor4
naor5 n aor5
nan1 n an1
nan2 n an2
nan3 n an3
nan4 n an4
nan5 n an5
nanr1 n anr1
nanr2 n anr2
nanr3 n anr3
nanr4 n anr4
nanr5 n anr5
nang1 n ang1
nang2 n ang2
nang3 n ang3
nang4 n ang4
nang5 n ang5
nangr1 n angr1
nangr2 n angr2
nangr3 n angr3
nangr4 n angr4
nangr5 n angr5
ne1 n e1
ne2 n e2
ne3 n e3
ne4 n e4
ne5 n e5
ner1 n er1
ner2 n er2
ner3 n er3
ner4 n er4
ner5 n er5
nei1 n ei1
nei2 n ei2
nei3 n ei3
nei4 n ei4
nei5 n ei5
neir1 n eir1
neir2 n eir2
neir3 n eir3
neir4 n eir4
neir5 n eir5
nen1 n en1
nen2 n en2
nen3 n en3
nen4 n en4
nen5 n en5
nenr1 n enr1
nenr2 n enr2
nenr3 n enr3
nenr4 n enr4
nenr5 n enr5
neng1 n eng1
neng2 n eng2
neng3 n eng3
neng4 n eng4
neng5 n eng5
nengr1 n engr1
nengr2 n engr2
nengr3 n engr3
nengr4 n engr4
nengr5 n engr5
nou1 n ou1
nou2 n ou2
nou3 n ou3
nou4 n ou4
nou5 n ou5
nour1 n our1
nour2 n our2
nour3 n our3
nour4 n our4
nour5 n our5
nong1 n ong1
nong2 n ong2
nong3 n ong3
nong4 n ong4
nong5 n ong5
nongr1 n ongr1
nongr2 n ongr2
nongr3 n ongr3
nongr4 n ongr4
nongr5 n ongr5
ni1 n i1
ni2 n i2
ni3 n i3
ni4 n i4
ni5 n i5
nir1 n ir1
nir2 n ir2
nir3 n ir3
nir4 n ir4
nir5 n ir5
nia1 n ia1
nia2 n ia2
nia3 n ia3
nia4 n ia4
nia5 n ia5
niar1 n iar1
niar2 n iar2
niar3 n iar3
niar4 n iar4
niar5 n iar5
niao1 n iao1
niao2 n iao2
niao3 n iao3
niao4 n iao4
niao5 n iao5
niaor1 n iaor1
niaor2 n iaor2
niaor3 n iaor3
niaor4 n iaor4
niaor5 n iaor5
nian1 n ian1
nian2 n ian2
nian3 n ian3
nian4 n ian4
nian5 n ian5
nianr1 n ianr1
nianr2 n ianr2
nianr3 n ianr3
nianr4 n ianr4
nianr5 n ianr5
niang1 n iang1
niang2 n iang2
niang3 n iang3
niang4 n iang4
niang5 n iang5
niangr1 n iangr1
niangr2 n iangr2
niangr3 n iangr3
niangr4 n iangr4
niangr5 n iangr5
nie1 n ie1
nie2 n ie2
nie3 n ie3
nie4 n ie4
nie5 n ie5
nier1 n ier1
nier2 n ier2
nier3 n ier3
nier4 n ier4
nier5 n ier5
nio1 n io1
nio2 n io2
nio3 n io3
nio4 n io4
nio5 n io5
nior1 n ior1
nior2 n ior2
nior3 n ior3
nior4 n ior4
nior5 n ior5
niu1 n iou1
niu2 n iou2
niu3 n iou3
niu4 n iou4
niu5 n iou5
niur1 n iour1
niur2 n iour2
niur3 n iour3
niur4 n iour4
niur5 n iour5
niong1 n iong1
niong2 n iong2
niong3 n iong3
niong4 n iong4
niong5 n iong5
niongr1 n iongr1
niongr2 n iongr2
niongr3 n iongr3
niongr4 n iongr4
niongr5 n iongr5
nin1 n in1
nin2 n in2
nin3 n in3
nin4 n in4
nin5 n in5
ninr1 n inr1
ninr2 n inr2
ninr3 n inr3
ninr4 n inr4
ninr5 n inr5
ning1 n ing1
ning2 n ing2
ning3 n ing3
ning4 n ing4
ning5 n ing5
ningr1 n ingr1
ningr2 n ingr2
ningr3 n ingr3
ningr4 n ingr4
ningr5 n ingr5
nu1 n u1
nu2 n u2
nu3 n u3
nu4 n u4
nu5 n u5
nur1 n ur1
nur2 n ur2
nur3 n ur3
nur4 n ur4
nur5 n ur5
nuan1 n uan1
nuan2 n uan2
nuan3 n uan3
nuan4 n uan4
nuan5 n uan5
nuanr1 n uanr1
nuanr2 n uanr2
nuanr3 n uanr3
nuanr4 n uanr4
nuanr5 n uanr5
nui1 n uei1
nui2 n uei2
nui3 n uei3
nui4 n uei4
nui5 n uei5
nuir1 n ueir1
nuir2 n ueir2
nuir3 n ueir3
nuir4 n ueir4
nuir5 n ueir5
nuo1 n uo1
nuo2 n uo2
nuo3 n uo3
nuo4 n uo4
nuo5 n uo5
nuor1 n uor1
nuor2 n uor2
nuor3 n uor3
nuor4 n uor4
nuor5 n uor5
nun1 n uen1
nun2 n uen2
nun3 n uen3
nun4 n uen4
nun5 n uen5
nunr1 n uenr1
nunr2 n uenr2
nunr3 n uenr3
nunr4 n uenr4
nunr5 n uenr5
nv1 n v1
nv2 n v2
nv3 n v3
nv4 n v4
nv5 n v5
nvr1 n vr1
nvr2 n vr2
nvr3 n vr3
nvr4 n vr4
nvr5 n vr5
nve1 n ve1
nve2 n ve2
nve3 n ve3
nve4 n ve4
nve5 n ve5
nver1 n ver1
nver2 n ver2
nver3 n ver3
nver4 n ver4
nver5 n ver5
la1 l a1
la2 l a2
la3 l a3
la4 l a4
la5 l a5
lar1 l ar1
lar2 l ar2
lar3 l ar3
lar4 l ar4
lar5 l ar5
lai1 l ai1
lai2 l ai2
lai3 l ai3
lai4 l ai4
lai5 l ai5
lair1 l air1
lair2 l air2
lair3 l air3
lair4 l air4
lair5 l air5
lao1 l ao1
lao2 l ao2
lao3 l ao3
lao4 l ao4
lao5 l ao5
laor1 l aor1
laor2 l aor2
laor3 l aor3
laor4 l aor4
laor5 l aor5
lan1 l an1
lan2 l an2
lan3 l an3
lan4 l an4
lan5 l an5
lanr1 l anr1
lanr2 l anr2
lanr3 l anr3
lanr4 l anr4
lanr5 l anr5
lang1 l ang1
lang2 l ang2
lang3 l ang3
lang4 l ang4
lang5 l ang5
langr1 l angr1
langr2 l angr2
langr3 l angr3
langr4 l angr4
langr5 l angr5
le1 l e1
le2 l e2
le3 l e3
le4 l e4
le5 l e5
ler1 l er1
ler2 l er2
ler3 l er3
ler4 l er4
ler5 l er5
lei1 l ei1
lei2 l ei2
lei3 l ei3
lei4 l ei4
lei5 l ei5
leir1 l eir1
leir2 l eir2
leir3 l eir3
leir4 l eir4
leir5 l eir5
len1 l en1
len2 l en2
len3 l en3
len4 l en4
len5 l en5
lenr1 l enr1
lenr2 l enr2
lenr3 l enr3
lenr4 l enr4
lenr5 l enr5
leng1 l eng1
leng2 l eng2
leng3 l eng3
leng4 l eng4
leng5 l eng5
lengr1 l engr1
lengr2 l engr2
lengr3 l engr3
lengr4 l engr4
lengr5 l engr5
lo1 l o1
lo2 l o2
lo3 l o3
lo4 l o4
lo5 l o5
lor1 l or1
lor2 l or2
lor3 l or3
lor4 l or4
lor5 l or5
lou1 l ou1
lou2 l ou2
lou3 l ou3
lou4 l ou4
lou5 l ou5
lour1 l our1
lour2 l our2
lour3 l our3
lour4 l our4
lour5 l our5
long1 l ong1
long2 l ong2
long3 l ong3
long4 l ong4
long5 l ong5
longr1 l ongr1
longr2 l ongr2
longr3 l ongr3
longr4 l ongr4
longr5 l ongr5
li1 l i1
li2 l i2
li3 l i3
li4 l i4
li5 l i5
lir1 l ir1
lir2 l ir2
lir3 l ir3
lir4 l ir4
lir5 l ir5
lia1 l ia1
lia2 l ia2
lia3 l ia3
lia4 l ia4
lia5 l ia5
liar1 l iar1
liar2 l iar2
liar3 l iar3
liar4 l iar4
liar5 l iar5
liao1 l iao1
liao2 l iao2
liao3 l iao3
liao4 l iao4
liao5 l iao5
liaor1 l iaor1
liaor2 l iaor2
liaor3 l iaor3
liaor4 l iaor4
liaor5 l iaor5
lian1 l ian1
lian2 l ian2
lian3 l ian3
lian4 l ian4
lian5 l ian5
lianr1 l ianr1
lianr2 l ianr2
lianr3 l ianr3
lianr4 l ianr4
lianr5 l ianr5
liang1 l iang1
liang2 l iang2
liang3 l iang3
liang4 l iang4
liang5 l iang5
liangr1 l iangr1
liangr2 l iangr2
liangr3 l iangr3
liangr4 l iangr4
liangr5 l iangr5
lie1 l ie1
lie2 l ie2
lie3 l ie3
lie4 l ie4
lie5 l ie5
lier1 l ier1
lier2 l ier2
lier3 l ier3
lier4 l ier4
lier5 l ier5
lio1 l io1
lio2 l io2
lio3 l io3
lio4 l io4
lio5 l io5
lior1 l ior1
lior2 l ior2
lior3 l ior3
lior4 l ior4
lior5 l ior5
liu1 l iou1
liu2 l iou2
liu3 l iou3
liu4 l iou4
liu5 l iou5
liur1 l iour1
liur2 l iour2
liur3 l iour3
liur4 l iour4
liur5 l iour5
liong1 l iong1
liong2 l iong2
liong3 l iong3
liong4 l iong4
liong5 l iong5
liongr1 l iongr1
liongr2 l iongr2
liongr3 l iongr3
liongr4 l iongr4
liongr5 l iongr5
lin1 l in1
lin2 l in2
lin3 l in3
lin4 l in4
lin5 l in5
linr1 l inr1
linr2 l inr2
linr3 l inr3
linr4 l inr4
linr5 l inr5
ling1 l ing1
ling2 l ing2
ling3 l ing3
ling4 l ing4
ling5 l ing5
lingr1 l ingr1
lingr2 l ingr2
lingr3 l ingr3
lingr4 l ingr4
lingr5 l ingr5
lu1 l u1
lu2 l u2
lu3 l u3
lu4 l u4
lu5 l u5
lur1 l ur1
lur2 l ur2
lur3 l ur3
lur4 l ur4
lur5 l ur5
luan1 l uan1
luan2 l uan2
luan3 l uan3
luan4 l uan4
luan5 l uan5
luanr1 l uanr1
luanr2 l uanr2
luanr3 l uanr3
luanr4 l uanr4
luanr5 l uanr5
lui1 l uei1
lui2 l uei2
lui3 l uei3
lui4 l uei4
lui5 l uei5
luir1 l ueir1
luir2 l ueir2
luir3 l ueir3
luir4 l ueir4
luir5 l ueir5
luo1 l uo1
luo2 l uo2
luo3 l uo3
luo4 l uo4
luo5 l uo5
luor1 l uor1
luor2 l uor2
luor3 l uor3
luor4 l uor4
luor5 l uor5
lun1 l uen1
lun2 l uen2
lun3 l uen3
lun4 l uen4
lun5 l uen5
lunr1 l uenr1
lunr2 l uenr2
lunr3 l uenr3
lunr4 l uenr4
lunr5 l uenr5
lv1 l v1
lv2 l v2
lv3 l v3
lv4 l v4
lv5 l v5
lvr1 l vr1
lvr2 l vr2
lvr3 l vr3
lvr4 l vr4
lvr5 l vr5
lve1 l ve1
lve2 l ve2
lve3 l ve3
lve4 l ve4
lve5 l ve5
lver1 l ver1
lver2 l ver2
lver3 l ver3
lver4 l ver4
lver5 l ver5
ga1 g a1
ga2 g a2
ga3 g a3
ga4 g a4
ga5 g a5
gar1 g ar1
gar2 g ar2
gar3 g ar3
gar4 g ar4
gar5 g ar5
gai1 g ai1
gai2 g ai2
gai3 g ai3
gai4 g ai4
gai5 g ai5
gair1 g air1
gair2 g air2
gair3 g air3
gair4 g air4
gair5 g air5
gao1 g ao1
gao2 g ao2
gao3 g ao3
gao4 g ao4
gao5 g ao5
gaor1 g aor1
gaor2 g aor2
gaor3 g aor3
gaor4 g aor4
gaor5 g aor5
gan1 g an1
gan2 g an2
gan3 g an3
gan4 g an4
gan5 g an5
ganr1 g anr1
ganr2 g anr2
ganr3 g anr3
ganr4 g anr4
ganr5 g anr5
gang1 g ang1
gang2 g ang2
gang3 g ang3
gang4 g ang4
gang5 g ang5
gangr1 g angr1
gangr2 g angr2
gangr3 g angr3
gangr4 g angr4
gangr5 g angr5
ge1 g e1
ge2 g e2
ge3 g e3
ge4 g e4
ge5 g e5
ger1 g er1
ger2 g er2
ger3 g er3
ger4 g er4
ger5 g er5
gei1 g ei1
gei2 g ei2
gei3 g ei3
gei4 g ei4
gei5 g ei5
geir1 g eir1
geir2 g eir2
geir3 g eir3
geir4 g eir4
geir5 g eir5
gen1 g en1
gen2 g en2
gen3 g en3
gen4 g en4
gen5 g en5
genr1 g enr1
genr2 g enr2
genr3 g enr3
genr4 g enr4
genr5 g enr5
geng1 g eng1
geng2 g eng2
geng3 g eng3
geng4 g eng4
geng5 g eng5
gengr1 g engr1
gengr2 g engr2
gengr3 g engr3
gengr4 g engr4
gengr5 g engr5
gou1 g ou1
gou2 g ou2
gou3 g ou3
gou4 g ou4
gou5 g ou5
gour1 g our1
gour2 g our2
gour3 g our3
gour4 g our4
gour5 g our5
gong1 g ong1
gong2 g ong2
gong3 g ong3
gong4 g ong4
gong5 g ong5
gongr1 g ongr1
gongr2 g ongr2
gongr3 g ongr3
gongr4 g ongr4
gongr5 g ongr5
gu1 g u1
gu2 g u2
gu3 g u3
gu4 g u4
gu5 g u5
gur1 g ur1
gur2 g ur2
gur3 g ur3
gur4 g ur4
gur5 g ur5
gua1 g ua1
gua2 g ua2
gua3 g ua3
gua4 g ua4
gua5 g ua5
guar1 g uar1
guar2 g uar2
guar3 g uar3
guar4 g uar4
guar5 g uar5
guai1 g uai1
guai2 g uai2
guai3 g uai3
guai4 g uai4
guai5 g uai5
guair1 g uair1
guair2 g uair2
guair3 g uair3
guair4 g uair4
guair5 g uair5
guan1 g uan1
guan2 g uan2
guan3 g uan3
guan4 g uan4
guan5 g uan5
guanr1 g uanr1
guanr2 g uanr2
guanr3 g uanr3
guanr4 g uanr4
guanr5 g uanr5
guang1 g uang1
guang2 g uang2
guang3 g uang3
guang4 g uang4
guang5 g uang5
guangr1 g uangr1
guangr2 g uangr2
guangr3 g uangr3
guangr4 g uangr4
guangr5 g uangr5
gui1 g uei1
gui2 g uei2
gui3 g uei3
gui4 g uei4
gui5 g uei5
guir1 g ueir1
guir2 g ueir2
guir3 g ueir3
guir4 g ueir4
guir5 g ueir5
guo1 g uo1
guo2 g uo2
guo3 g uo3
guo4 g uo4
guo5 g uo5
guor1 g uor1
guor2 g uor2
guor3 g uor3
guor4 g uor4
guor5 g uor5
gun1 g uen1
gun2 g uen2
gun3 g uen3
gun4 g uen4
gun5 g uen5
gunr1 g uenr1
gunr2 g uenr2
gunr3 g uenr3
gunr4 g uenr4
gunr5 g uenr5
ka1 k a1
ka2 k a2
ka3 k a3
ka4 k a4
ka5 k a5
kar1 k ar1
kar2 k ar2
kar3 k ar3
kar4 k ar4
kar5 k ar5
kai1 k ai1
kai2 k ai2
kai3 k ai3
kai4 k ai4
kai5 k ai5
kair1 k air1
kair2 k air2
kair3 k air3
kair4 k air4
kair5 k air5
kao1 k ao1
kao2 k ao2
kao3 k ao3
kao4 k ao4
kao5 k ao5
kaor1 k aor1
kaor2 k aor2
kaor3 k aor3
kaor4 k aor4
kaor5 k aor5
kan1 k an1
kan2 k an2
kan3 k an3
kan4 k an4
kan5 k an5
kanr1 k anr1
kanr2 k anr2
kanr3 k anr3
kanr4 k anr4
kanr5 k anr5
kang1 k ang1
kang2 k ang2
kang3 k ang3
kang4 k ang4
kang5 k ang5
kangr1 k angr1
kangr2 k angr2
kangr3 k angr3
kangr4 k angr4
kangr5 k angr5
ke1 k e1
ke2 k e2
ke3 k e3
ke4 k e4
ke5 k e5
ker1 k er1
ker2 k er2
ker3 k er3
ker4 k er4
ker5 k er5
kei1 k ei1
kei2 k ei2
kei3 k ei3
kei4 k ei4
kei5 k ei5
keir1 k eir1
keir2 k eir2
keir3 k eir3
keir4 k eir4
keir5 k eir5
ken1 k en1
ken2 k en2
ken3 k en3
ken4 k en4
ken5 k en5
kenr1 k enr1
kenr2 k enr2
kenr3 k enr3
kenr4 k enr4
kenr5 k enr5
keng1 k eng1
keng2 k eng2
keng3 k eng3
keng4 k eng4
keng5 k eng5
kengr1 k engr1
kengr2 k engr2
kengr3 k engr3
kengr4 k engr4
kengr5 k engr5
kou1 k ou1
kou2 k ou2
kou3 k ou3
kou4 k ou4
kou5 k ou5
kour1 k our1
kour2 k our2
kour3 k our3
kour4 k our4
kour5 k our5
kong1 k ong1
kong2 k ong2
kong3 k ong3
kong4 k ong4
kong5 k ong5
kongr1 k ongr1
kongr2 k ongr2
kongr3 k ongr3
kongr4 k ongr4
kongr5 k ongr5
ku1 k u1
ku2 k u2
ku3 k u3
ku4 k u4
ku5 k u5
kur1 k ur1
kur2 k ur2
kur3 k ur3
kur4 k ur4
kur5 k ur5
kua1 k ua1
kua2 k ua2
kua3 k ua3
kua4 k ua4
kua5 k ua5
kuar1 k uar1
kuar2 k uar2
kuar3 k uar3
kuar4 k uar4
kuar5 k uar5
kuai1 k uai1
kuai2 k uai2
kuai3 k uai3
kuai4 k uai4
kuai5 k uai5
kuair1 k uair1
kuair2 k uair2
kuair3 k uair3
kuair4 k uair4
kuair5 k uair5
kuan1 k uan1
kuan2 k uan2
kuan3 k uan3
kuan4 k uan4
kuan5 k uan5
kuanr1 k uanr1
kuanr2 k uanr2
kuanr3 k uanr3
kuanr4 k uanr4
kuanr5 k uanr5
kuang1 k uang1
kuang2 k uang2
kuang3 k uang3
kuang4 k uang4
kuang5 k uang5
kuangr1 k uangr1
kuangr2 k uangr2
kuangr3 k uangr3
kuangr4 k uangr4
kuangr5 k uangr5
kui1 k uei1
kui2 k uei2
kui3 k uei3
kui4 k uei4
kui5 k uei5
kuir1 k ueir1
kuir2 k ueir2
kuir3 k ueir3
kuir4 k ueir4
kuir5 k ueir5
kuo1 k uo1
kuo2 k uo2
kuo3 k uo3
kuo4 k uo4
kuo5 k uo5
kuor1 k uor1
kuor2 k uor2
kuor3 k uor3
kuor4 k uor4
kuor5 k uor5
kun1 k uen1
kun2 k uen2
kun3 k uen3
kun4 k uen4
kun5 k uen5
kunr1 k uenr1
kunr2 k uenr2
kunr3 k uenr3
kunr4 k uenr4
kunr5 k uenr5
ha1 h a1
ha2 h a2
ha3 h a3
ha4 h a4
ha5 h a5
har1 h ar1
har2 h ar2
har3 h ar3
har4 h ar4
har5 h ar5
hai1 h ai1
hai2 h ai2
hai3 h ai3
hai4 h ai4
hai5 h ai5
hair1 h air1
hair2 h air2
hair3 h air3
hair4 h air4
hair5 h air5
hao1 h ao1
hao2 h ao2
hao3 h ao3
hao4 h ao4
hao5 h ao5
haor1 h aor1
haor2 h aor2
haor3 h aor3
haor4 h aor4
haor5 h aor5
han1 h an1
han2 h an2
han3 h an3
han4 h an4
han5 h an5
hanr1 h anr1
hanr2 h anr2
hanr3 h anr3
hanr4 h anr4
hanr5 h anr5
hang1 h ang1
hang2 h ang2
hang3 h ang3
hang4 h ang4
hang5 h ang5
hangr1 h angr1
hangr2 h angr2
hangr3 h angr3
hangr4 h angr4
hangr5 h angr5
he1 h e1
he2 h e2
he3 h e3
he4 h e4
he5 h e5
her1 h er1
her2 h er2
her3 h er3
her4 h er4
her5 h er5
hei1 h ei1
hei2 h ei2
hei3 h ei3
hei4 h ei4
hei5 h ei5
heir1 h eir1
heir2 h eir2
heir3 h eir3
heir4 h eir4
heir5 h eir5
hen1 h en1
hen2 h en2
hen3 h en3
hen4 h en4
hen5 h en5
henr1 h enr1
henr2 h enr2
henr3 h enr3
henr4 h enr4
henr5 h enr5
heng1 h eng1
heng2 h eng2
heng3 h eng3
heng4 h eng4
heng5 h eng5
hengr1 h engr1
hengr2 h engr2
hengr3 h engr3
hengr4 h engr4
hengr5 h engr5
hou1 h ou1
hou2 h ou2
hou3 h ou3
hou4 h ou4
hou5 h ou5
hour1 h our1
hour2 h our2
hour3 h our3
hour4 h our4
hour5 h our5
hong1 h ong1
hong2 h ong2
hong3 h ong3
hong4 h ong4
hong5 h ong5
hongr1 h ongr1
hongr2 h ongr2
hongr3 h ongr3
hongr4 h ongr4
hongr5 h ongr5
hu1 h u1
hu2 h u2
hu3 h u3
hu4 h u4
hu5 h u5
hur1 h ur1
hur2 h ur2
hur3 h ur3
hur4 h ur4
hur5 h ur5
hua1 h ua1
hua2 h ua2
hua3 h ua3
hua4 h ua4
hua5 h ua5
huar1 h uar1
huar2 h uar2
huar3 h uar3
huar4 h uar4
huar5 h uar5
huai1 h uai1
huai2 h uai2
huai3 h uai3
huai4 h uai4
huai5 h uai5
huair1 h uair1
huair2 h uair2
huair3 h uair3
huair4 h uair4
huair5 h uair5
huan1 h uan1
huan2 h uan2
huan3 h uan3
huan4 h uan4
huan5 h uan5
huanr1 h uanr1
huanr2 h uanr2
huanr3 h uanr3
huanr4 h uanr4
huanr5 h uanr5
huang1 h uang1
huang2 h uang2
huang3 h uang3
huang4 h uang4
huang5 h uang5
huangr1 h uangr1
huangr2 h uangr2
huangr3 h uangr3
huangr4 h uangr4
huangr5 h uangr5
hui1 h uei1
hui2 h uei2
hui3 h uei3
hui4 h uei4
hui5 h uei5
huir1 h ueir1
huir2 h ueir2
huir3 h ueir3
huir4 h ueir4
huir5 h ueir5
huo1 h uo1
huo2 h uo2
huo3 h uo3
huo4 h uo4
huo5 h uo5
huor1 h uor1
huor2 h uor2
huor3 h uor3
huor4 h uor4
huor5 h uor5
hun1 h uen1
hun2 h uen2
hun3 h uen3
hun4 h uen4
hun5 h uen5
hunr1 h uenr1
hunr2 h uenr2
hunr3 h uenr3
hunr4 h uenr4
hunr5 h uenr5
zha1 zh a1
zha2 zh a2
zha3 zh a3
zha4 zh a4
zha5 zh a5
zhar1 zh ar1
zhar2 zh ar2
zhar3 zh ar3
zhar4 zh ar4
zhar5 zh ar5
zhai1 zh ai1
zhai2 zh ai2
zhai3 zh ai3
zhai4 zh ai4
zhai5 zh ai5
zhair1 zh air1
zhair2 zh air2
zhair3 zh air3
zhair4 zh air4
zhair5 zh air5
zhao1 zh ao1
zhao2 zh ao2
zhao3 zh ao3
zhao4 zh ao4
zhao5 zh ao5
zhaor1 zh aor1
zhaor2 zh aor2
zhaor3 zh aor3
zhaor4 zh aor4
zhaor5 zh aor5
zhan1 zh an1
zhan2 zh an2
zhan3 zh an3
zhan4 zh an4
zhan5 zh an5
zhanr1 zh anr1
zhanr2 zh anr2
zhanr3 zh anr3
zhanr4 zh anr4
zhanr5 zh anr5
zhang1 zh ang1
zhang2 zh ang2
zhang3 zh ang3
zhang4 zh ang4
zhang5 zh ang5
zhangr1 zh angr1
zhangr2 zh angr2
zhangr3 zh angr3
zhangr4 zh angr4
zhangr5 zh angr5
zhe1 zh e1
zhe2 zh e2
zhe3 zh e3
zhe4 zh e4
zhe5 zh e5
zher1 zh er1
zher2 zh er2
zher3 zh er3
zher4 zh er4
zher5 zh er5
zhei1 zh ei1
zhei2 zh ei2
zhei3 zh ei3
zhei4 zh ei4
zhei5 zh ei5
zheir1 zh eir1
zheir2 zh eir2
zheir3 zh eir3
zheir4 zh eir4
zheir5 zh eir5
zhen1 zh en1
zhen2 zh en2
zhen3 zh en3
zhen4 zh en4
zhen5 zh en5
zhenr1 zh enr1
zhenr2 zh enr2
zhenr3 zh enr3
zhenr4 zh enr4
zhenr5 zh enr5
zheng1 zh eng1
zheng2 zh eng2
zheng3 zh eng3
zheng4 zh eng4
zheng5 zh eng5
zhengr1 zh engr1
zhengr2 zh engr2
zhengr3 zh engr3
zhengr4 zh engr4
zhengr5 zh engr5
zhou1 zh ou1
zhou2 zh ou2
zhou3 zh ou3
zhou4 zh ou4
zhou5 zh ou5
zhour1 zh our1
zhour2 zh our2
zhour3 zh our3
zhour4 zh our4
zhour5 zh our5
zhong1 zh ong1
zhong2 zh ong2
zhong3 zh ong3
zhong4 zh ong4
zhong5 zh ong5
zhongr1 zh ongr1
zhongr2 zh ongr2
zhongr3 zh ongr3
zhongr4 zh ongr4
zhongr5 zh ongr5
zhi1 zh iii1
zhi2 zh iii2
zhi3 zh iii3
zhi4 zh iii4
zhi5 zh iii5
zhir1 zh iiir1
zhir2 zh iiir2
zhir3 zh iiir3
zhir4 zh iiir4
zhir5 zh iiir5
zhu1 zh u1
zhu2 zh u2
zhu3 zh u3
zhu4 zh u4
zhu5 zh u5
zhur1 zh ur1
zhur2 zh ur2
zhur3 zh ur3
zhur4 zh ur4
zhur5 zh ur5
zhua1 zh ua1
zhua2 zh ua2
zhua3 zh ua3
zhua4 zh ua4
zhua5 zh ua5
zhuar1 zh uar1
zhuar2 zh uar2
zhuar3 zh uar3
zhuar4 zh uar4
zhuar5 zh uar5
zhuai1 zh uai1
zhuai2 zh uai2
zhuai3 zh uai3
zhuai4 zh uai4
zhuai5 zh uai5
zhuair1 zh uair1
zhuair2 zh uair2
zhuair3 zh uair3
zhuair4 zh uair4
zhuair5 zh uair5
zhuan1 zh uan1
zhuan2 zh uan2
zhuan3 zh uan3
zhuan4 zh uan4
zhuan5 zh uan5
zhuanr1 zh uanr1
zhuanr2 zh uanr2
zhuanr3 zh uanr3
zhuanr4 zh uanr4
zhuanr5 zh uanr5
zhuang1 zh uang1
zhuang2 zh uang2
zhuang3 zh uang3
zhuang4 zh uang4
zhuang5 zh uang5
zhuangr1 zh uangr1
zhuangr2 zh uangr2
zhuangr3 zh uangr3
zhuangr4 zh uangr4
zhuangr5 zh uangr5
zhui1 zh uei1
zhui2 zh uei2
zhui3 zh uei3
zhui4 zh uei4
zhui5 zh uei5
zhuir1 zh ueir1
zhuir2 zh ueir2
zhuir3 zh ueir3
zhuir4 zh ueir4
zhuir5 zh ueir5
zhuo1 zh uo1
zhuo2 zh uo2
zhuo3 zh uo3
zhuo4 zh uo4
zhuo5 zh uo5
zhuor1 zh uor1
zhuor2 zh uor2
zhuor3 zh uor3
zhuor4 zh uor4
zhuor5 zh uor5
zhun1 zh uen1
zhun2 zh uen2
zhun3 zh uen3
zhun4 zh uen4
zhun5 zh uen5
zhunr1 zh uenr1
zhunr2 zh uenr2
zhunr3 zh uenr3
zhunr4 zh uenr4
zhunr5 zh uenr5
cha1 ch a1
cha2 ch a2
cha3 ch a3
cha4 ch a4
cha5 ch a5
char1 ch ar1
char2 ch ar2
char3 ch ar3
char4 ch ar4
char5 ch ar5
chai1 ch ai1
chai2 ch ai2
chai3 ch ai3
chai4 ch ai4
chai5 ch ai5
chair1 ch air1
chair2 ch air2
chair3 ch air3
chair4 ch air4
chair5 ch air5
chao1 ch ao1
chao2 ch ao2
chao3 ch ao3
chao4 ch ao4
chao5 ch ao5
chaor1 ch aor1
chaor2 ch aor2
chaor3 ch aor3
chaor4 ch aor4
chaor5 ch aor5
chan1 ch an1
chan2 ch an2
chan3 ch an3
chan4 ch an4
chan5 ch an5
chanr1 ch anr1
chanr2 ch anr2
chanr3 ch anr3
chanr4 ch anr4
chanr5 ch anr5
chang1 ch ang1
chang2 ch ang2
chang3 ch ang3
chang4 ch ang4
chang5 ch ang5
changr1 ch angr1
changr2 ch angr2
changr3 ch angr3
changr4 ch angr4
changr5 ch angr5
che1 ch e1
che2 ch e2
che3 ch e3
che4 ch e4
che5 ch e5
cher1 ch er1
cher2 ch er2
cher3 ch er3
cher4 ch er4
cher5 ch er5
chei1 ch ei1
chei2 ch ei2
chei3 ch ei3
chei4 ch ei4
chei5 ch ei5
cheir1 ch eir1
cheir2 ch eir2
cheir3 ch eir3
cheir4 ch eir4
cheir5 ch eir5
chen1 ch en1
chen2 ch en2
chen3 ch en3
chen4 ch en4
chen5 ch en5
chenr1 ch enr1
chenr2 ch enr2
chenr3 ch enr3
chenr4 ch enr4
chenr5 ch enr5
cheng1 ch eng1
cheng2 ch eng2
cheng3 ch eng3
cheng4 ch eng4
cheng5 ch eng5
chengr1 ch engr1
chengr2 ch engr2
chengr3 ch engr3
chengr4 ch engr4
chengr5 ch engr5
chou1 ch ou1
chou2 ch ou2
chou3 ch ou3
chou4 ch ou4
chou5 ch ou5
chour1 ch our1
chour2 ch our2
chour3 ch our3
chour4 ch our4
chour5 ch our5
chong1 ch ong1
chong2 ch ong2
chong3 ch ong3
chong4 ch ong4
chong5 ch ong5
chongr1 ch ongr1
chongr2 ch ongr2
chongr3 ch ongr3
chongr4 ch ongr4
chongr5 ch ongr5
chi1 ch iii1
chi2 ch iii2
chi3 ch iii3
chi4 ch iii4
chi5 ch iii5
chir1 ch iiir1
chir2 ch iiir2
chir3 ch iiir3
chir4 ch iiir4
chir5 ch iiir5
chu1 ch u1
chu2 ch u2
chu3 ch u3
chu4 ch u4
chu5 ch u5
chur1 ch ur1
chur2 ch ur2
chur3 ch ur3
chur4 ch ur4
chur5 ch ur5
chua1 ch ua1
chua2 ch ua2
chua3 ch ua3
chua4 ch ua4
chua5 ch ua5
chuar1 ch uar1
chuar2 ch uar2
chuar3 ch uar3
chuar4 ch uar4
chuar5 ch uar5
chuai1 ch uai1
chuai2 ch uai2
chuai3 ch uai3
chuai4 ch uai4
chuai5 ch uai5
chuair1 ch uair1
chuair2 ch uair2
chuair3 ch uair3
chuair4 ch uair4
chuair5 ch uair5
chuan1 ch uan1
chuan2 ch uan2
chuan3 ch uan3
chuan4 ch uan4
chuan5 ch uan5
chuanr1 ch uanr1
chuanr2 ch uanr2
chuanr3 ch uanr3
chuanr4 ch uanr4
chuanr5 ch uanr5
chuang1 ch uang1
chuang2 ch uang2
chuang3 ch uang3
chuang4 ch uang4
chuang5 ch uang5
chuangr1 ch uangr1
chuangr2 ch uangr2
chuangr3 ch uangr3
chuangr4 ch uangr4
chuangr5 ch uangr5
chui1 ch uei1
chui2 ch uei2
chui3 ch uei3
chui4 ch uei4
chui5 ch uei5
chuir1 ch ueir1
chuir2 ch ueir2
chuir3 ch ueir3
chuir4 ch ueir4
chuir5 ch ueir5
chuo1 ch uo1
chuo2 ch uo2
chuo3 ch uo3
chuo4 ch uo4
chuo5 ch uo5
chuor1 ch uor1
chuor2 ch uor2
chuor3 ch uor3
chuor4 ch uor4
chuor5 ch uor5
chun1 ch uen1
chun2 ch uen2
chun3 ch uen3
chun4 ch uen4
chun5 ch uen5
chunr1 ch uenr1
chunr2 ch uenr2
chunr3 ch uenr3
chunr4 ch uenr4
chunr5 ch uenr5
sha1 sh a1
sha2 sh a2
sha3 sh a3
sha4 sh a4
sha5 sh a5
shar1 sh ar1
shar2 sh ar2
shar3 sh ar3
shar4 sh ar4
shar5 sh ar5
shai1 sh ai1
shai2 sh ai2
shai3 sh ai3
shai4 sh ai4
shai5 sh ai5
shair1 sh air1
shair2 sh air2
shair3 sh air3
shair4 sh air4
shair5 sh air5
shao1 sh ao1
shao2 sh ao2
shao3 sh ao3
shao4 sh ao4
shao5 sh ao5
shaor1 sh aor1
shaor2 sh aor2
shaor3 sh aor3
shaor4 sh aor4
shaor5 sh aor5
shan1 sh an1
shan2 sh an2
shan3 sh an3
shan4 sh an4
shan5 sh an5
shanr1 sh anr1
shanr2 sh anr2
shanr3 sh anr3
shanr4 sh anr4
shanr5 sh anr5
shang1 sh ang1
shang2 sh ang2
shang3 sh ang3
shang4 sh ang4
shang5 sh ang5
shangr1 sh angr1
shangr2 sh angr2
shangr3 sh angr3
shangr4 sh angr4
shangr5 sh angr5
she1 sh e1
she2 sh e2
she3 sh e3
she4 sh e4
she5 sh e5
sher1 sh er1
sher2 sh er2
sher3 sh er3
sher4 sh er4
sher5 sh er5
shei1 sh ei1
shei2 sh ei2
shei3 sh ei3
shei4 sh ei4
shei5 sh ei5
sheir1 sh eir1
sheir2 sh eir2
sheir3 sh eir3
sheir4 sh eir4
sheir5 sh eir5
shen1 sh en1
shen2 sh en2
shen3 sh en3
shen4 sh en4
shen5 sh en5
shenr1 sh enr1
shenr2 sh enr2
shenr3 sh enr3
shenr4 sh enr4
shenr5 sh enr5
sheng1 sh eng1
sheng2 sh eng2
sheng3 sh eng3
sheng4 sh eng4
sheng5 sh eng5
shengr1 sh engr1
shengr2 sh engr2
shengr3 sh engr3
shengr4 sh engr4
shengr5 sh engr5
shou1 sh ou1
shou2 sh ou2
shou3 sh ou3
shou4 sh ou4
shou5 sh ou5
shour1 sh our1
shour2 sh our2
shour3 sh our3
shour4 sh our4
shour5 sh our5
shi1 sh iii1
shi2 sh iii2
shi3 sh iii3
shi4 sh iii4
shi5 sh iii5
shir1 sh iiir1
shir2 sh iiir2
shir3 sh iiir3
shir4 sh iiir4
shir5 sh iiir5
shu1 sh u1
shu2 sh u2
shu3 sh u3
shu4 sh u4
shu5 sh u5
shur1 sh ur1
shur2 sh ur2
shur3 sh ur3
shur4 sh ur4
shur5 sh ur5
shua1 sh ua1
shua2 sh ua2
shua3 sh ua3
shua4 sh ua4
shua5 sh ua5
shuar1 sh uar1
shuar2 sh uar2
shuar3 sh uar3
shuar4 sh uar4
shuar5 sh uar5
shuai1 sh uai1
shuai2 sh uai2
shuai3 sh uai3
shuai4 sh uai4
shuai5 sh uai5
shuair1 sh uair1
shuair2 sh uair2
shuair3 sh uair3
shuair4 sh uair4
shuair5 sh uair5
shuan1 sh uan1
shuan2 sh uan2
shuan3 sh uan3
shuan4 sh uan4
shuan5 sh uan5
shuanr1 sh uanr1
shuanr2 sh uanr2
shuanr3 sh uanr3
shuanr4 sh uanr4
shuanr5 sh uanr5
shuang1 sh uang1
shuang2 sh uang2
shuang3 sh uang3
shuang4 sh uang4
shuang5 sh uang5
shuangr1 sh uangr1
shuangr2 sh uangr2
shuangr3 sh uangr3
shuangr4 sh uangr4
shuangr5 sh uangr5
shui1 sh uei1
shui2 sh uei2
shui3 sh uei3
shui4 sh uei4
shui5 sh uei5
shuir1 sh ueir1
shuir2 sh ueir2
shuir3 sh ueir3
shuir4 sh ueir4
shuir5 sh ueir5
shuo1 sh uo1
shuo2 sh uo2
shuo3 sh uo3
shuo4 sh uo4
shuo5 sh uo5
shuor1 sh uor1
shuor2 sh uor2
shuor3 sh uor3
shuor4 sh uor4
shuor5 sh uor5
shun1 sh uen1
shun2 sh uen2
shun3 sh uen3
shun4 sh uen4
shun5 sh uen5
shunr1 sh uenr1
shunr2 sh uenr2
shunr3 sh uenr3
shunr4 sh uenr4
shunr5 sh uenr5
ra1 r a1
ra2 r a2
ra3 r a3
ra4 r a4
ra5 r a5
rar1 r ar1
rar2 r ar2
rar3 r ar3
rar4 r ar4
rar5 r ar5
rai1 r ai1
rai2 r ai2
rai3 r ai3
rai4 r ai4
rai5 r ai5
rair1 r air1
rair2 r air2
rair3 r air3
rair4 r air4
rair5 r air5
rao1 r ao1
rao2 r ao2
rao3 r ao3
rao4 r ao4
rao5 r ao5
raor1 r aor1
raor2 r aor2
raor3 r aor3
raor4 r aor4
raor5 r aor5
ran1 r an1
ran2 r an2
ran3 r an3
ran4 r an4
ran5 r an5
ranr1 r anr1
ranr2 r anr2
ranr3 r anr3
ranr4 r anr4
ranr5 r anr5
rang1 r ang1
rang2 r ang2
rang3 r ang3
rang4 r ang4
rang5 r ang5
rangr1 r angr1
rangr2 r angr2
rangr3 r angr3
rangr4 r angr4
rangr5 r angr5
re1 r e1
re2 r e2
re3 r e3
re4 r e4
re5 r e5
rer1 r er1
rer2 r er2
rer3 r er3
rer4 r er4
rer5 r er5
rei1 r ei1
rei2 r ei2
rei3 r ei3
rei4 r ei4
rei5 r ei5
reir1 r eir1
reir2 r eir2
reir3 r eir3
reir4 r eir4
reir5 r eir5
ren1 r en1
ren2 r en2
ren3 r en3
ren4 r en4
ren5 r en5
renr1 r enr1
renr2 r enr2
renr3 r enr3
renr4 r enr4
renr5 r enr5
reng1 r eng1
reng2 r eng2
reng3 r eng3
reng4 r eng4
reng5 r eng5
rengr1 r engr1
rengr2 r engr2
rengr3 r engr3
rengr4 r engr4
rengr5 r engr5
rou1 r ou1
rou2 r ou2
rou3 r ou3
rou4 r ou4
rou5 r ou5
rour1 r our1
rour2 r our2
rour3 r our3
rour4 r our4
rour5 r our5
rong1 r ong1
rong2 r ong2
rong3 r ong3
rong4 r ong4
rong5 r ong5
rongr1 r ongr1
rongr2 r ongr2
rongr3 r ongr3
rongr4 r ongr4
rongr5 r ongr5
ri1 r iii1
ri2 r iii2
ri3 r iii3
ri4 r iii4
ri5 r iii5
rir1 r iiir1
rir2 r iiir2
rir3 r iiir3
rir4 r iiir4
rir5 r iiir5
ru1 r u1
ru2 r u2
ru3 r u3
ru4 r u4
ru5 r u5
rur1 r ur1
rur2 r ur2
rur3 r ur3
rur4 r ur4
rur5 r ur5
ruan1 r uan1
ruan2 r uan2
ruan3 r uan3
ruan4 r uan4
ruan5 r uan5
ruanr1 r uanr1
ruanr2 r uanr2
ruanr3 r uanr3
ruanr4 r uanr4
ruanr5 r uanr5
rui1 r uei1
rui2 r uei2
rui3 r uei3
rui4 r uei4
rui5 r uei5
ruir1 r ueir1
ruir2 r ueir2
ruir3 r ueir3
ruir4 r ueir4
ruir5 r ueir5
ruo1 r uo1
ruo2 r uo2
ruo3 r uo3
ruo4 r uo4
ruo5 r uo5
ruor1 r uor1
ruor2 r uor2
ruor3 r uor3
ruor4 r uor4
ruor5 r uor5
run1 r uen1
run2 r uen2
run3 r uen3
run4 r uen4
run5 r uen5
runr1 r uenr1
runr2 r uenr2
runr3 r uenr3
runr4 r uenr4
runr5 r uenr5
za1 z a1
za2 z a2
za3 z a3
za4 z a4
za5 z a5
zar1 z ar1
zar2 z ar2
zar3 z ar3
zar4 z ar4
zar5 z ar5
zai1 z ai1
zai2 z ai2
zai3 z ai3
zai4 z ai4
zai5 z ai5
zair1 z air1
zair2 z air2
zair3 z air3
zair4 z air4
zair5 z air5
zao1 z ao1
zao2 z ao2
zao3 z ao3
zao4 z ao4
zao5 z ao5
zaor1 z aor1
zaor2 z aor2
zaor3 z aor3
zaor4 z aor4
zaor5 z aor5
zan1 z an1
zan2 z an2
zan3 z an3
zan4 z an4
zan5 z an5
zanr1 z anr1
zanr2 z anr2
zanr3 z anr3
zanr4 z anr4
zanr5 z anr5
zang1 z ang1
zang2 z ang2
zang3 z ang3
zang4 z ang4
zang5 z ang5
zangr1 z angr1
zangr2 z angr2
zangr3 z angr3
zangr4 z angr4
zangr5 z angr5
ze1 z e1
ze2 z e2
ze3 z e3
ze4 z e4
ze5 z e5
zer1 z er1
zer2 z er2
zer3 z er3
zer4 z er4
zer5 z er5
zei1 z ei1
zei2 z ei2
zei3 z ei3
zei4 z ei4
zei5 z ei5
zeir1 z eir1
zeir2 z eir2
zeir3 z eir3
zeir4 z eir4
zeir5 z eir5
zen1 z en1
zen2 z en2
zen3 z en3
zen4 z en4
zen5 z en5
zenr1 z enr1
zenr2 z enr2
zenr3 z enr3
zenr4 z enr4
zenr5 z enr5
zeng1 z eng1
zeng2 z eng2
zeng3 z eng3
zeng4 z eng4
zeng5 z eng5
zengr1 z engr1
zengr2 z engr2
zengr3 z engr3
zengr4 z engr4
zengr5 z engr5
zou1 z ou1
zou2 z ou2
zou3 z ou3
zou4 z ou4
zou5 z ou5
zour1 z our1
zour2 z our2
zour3 z our3
zour4 z our4
zour5 z our5
zong1 z ong1
zong2 z ong2
zong3 z ong3
zong4 z ong4
zong5 z ong5
zongr1 z ongr1
zongr2 z ongr2
zongr3 z ongr3
zongr4 z ongr4
zongr5 z ongr5
zi1 z ii1
zi2 z ii2
zi3 z ii3
zi4 z ii4
zi5 z ii5
zir1 z iir1
zir2 z iir2
zir3 z iir3
zir4 z iir4
zir5 z iir5
zu1 z u1
zu2 z u2
zu3 z u3
zu4 z u4
zu5 z u5
zur1 z ur1
zur2 z ur2
zur3 z ur3
zur4 z ur4
zur5 z ur5
zuan1 z uan1
zuan2 z uan2
zuan3 z uan3
zuan4 z uan4
zuan5 z uan5
zuanr1 z uanr1
zuanr2 z uanr2
zuanr3 z uanr3
zuanr4 z uanr4
zuanr5 z uanr5
zui1 z uei1
zui2 z uei2
zui3 z uei3
zui4 z uei4
zui5 z uei5
zuir1 z ueir1
zuir2 z ueir2
zuir3 z ueir3
zuir4 z ueir4
zuir5 z ueir5
zuo1 z uo1
zuo2 z uo2
zuo3 z uo3
zuo4 z uo4
zuo5 z uo5
zuor1 z uor1
zuor2 z uor2
zuor3 z uor3
zuor4 z uor4
zuor5 z uor5
zun1 z uen1
zun2 z uen2
zun3 z uen3
zun4 z uen4
zun5 z uen5
zunr1 z uenr1
zunr2 z uenr2
zunr3 z uenr3
zunr4 z uenr4
zunr5 z uenr5
ca1 c a1
ca2 c a2
ca3 c a3
ca4 c a4
ca5 c a5
car1 c ar1
car2 c ar2
car3 c ar3
car4 c ar4
car5 c ar5
cai1 c ai1
cai2 c ai2
cai3 c ai3
cai4 c ai4
cai5 c ai5
cair1 c air1
cair2 c air2
cair3 c air3
cair4 c air4
cair5 c air5
cao1 c ao1
cao2 c ao2
cao3 c ao3
cao4 c ao4
cao5 c ao5
caor1 c aor1
caor2 c aor2
caor3 c aor3
caor4 c aor4
caor5 c aor5
can1 c an1
can2 c an2
can3 c an3
can4 c an4
can5 c an5
canr1 c anr1
canr2 c anr2
canr3 c anr3
canr4 c anr4
canr5 c anr5
cang1 c ang1
cang2 c ang2
cang3 c ang3
cang4 c ang4
cang5 c ang5
cangr1 c angr1
cangr2 c angr2
cangr3 c angr3
cangr4 c angr4
cangr5 c angr5
ce1 c e1
ce2 c e2
ce3 c e3
ce4 c e4
ce5 c e5
cer1 c er1
cer2 c er2
cer3 c er3
cer4 c er4
cer5 c er5
cei1 c ei1
cei2 c ei2
cei3 c ei3
cei4 c ei4
cei5 c ei5
ceir1 c eir1
ceir2 c eir2
ceir3 c eir3
ceir4 c eir4
ceir5 c eir5
cen1 c en1
cen2 c en2
cen3 c en3
cen4 c en4
cen5 c en5
cenr1 c enr1
cenr2 c enr2
cenr3 c enr3
cenr4 c enr4
cenr5 c enr5
ceng1 c eng1
ceng2 c eng2
ceng3 c eng3
ceng4 c eng4
ceng5 c eng5
cengr1 c engr1
cengr2 c engr2
cengr3 c engr3
cengr4 c engr4
cengr5 c engr5
cou1 c ou1
cou2 c ou2
cou3 c ou3
cou4 c ou4
cou5 c ou5
cour1 c our1
cour2 c our2
cour3 c our3
cour4 c our4
cour5 c our5
cong1 c ong1
cong2 c ong2
cong3 c ong3
cong4 c ong4
cong5 c ong5
congr1 c ongr1
congr2 c ongr2
congr3 c ongr3
congr4 c ongr4
congr5 c ongr5
ci1 c ii1
ci2 c ii2
ci3 c ii3
ci4 c ii4
ci5 c ii5
cir1 c iir1
cir2 c iir2
cir3 c iir3
cir4 c iir4
cir5 c iir5
cu1 c u1
cu2 c u2
cu3 c u3
cu4 c u4
cu5 c u5
cur1 c ur1
cur2 c ur2
cur3 c ur3
cur4 c ur4
cur5 c ur5
cuan1 c uan1
cuan2 c uan2
cuan3 c uan3
cuan4 c uan4
cuan5 c uan5
cuanr1 c uanr1
cuanr2 c uanr2
cuanr3 c uanr3
cuanr4 c uanr4
cuanr5 c uanr5
cui1 c uei1
cui2 c uei2
cui3 c uei3
cui4 c uei4
cui5 c uei5
cuir1 c ueir1
cuir2 c ueir2
cuir3 c ueir3
cuir4 c ueir4
cuir5 c ueir5
cuo1 c uo1
cuo2 c uo2
cuo3 c uo3
cuo4 c uo4
cuo5 c uo5
cuor1 c uor1
cuor2 c uor2
cuor3 c uor3
cuor4 c uor4
cuor5 c uor5
cun1 c uen1
cun2 c uen2
cun3 c uen3
cun4 c uen4
cun5 c uen5
cunr1 c uenr1
cunr2 c uenr2
cunr3 c uenr3
cunr4 c uenr4
cunr5 c uenr5
sa1 s a1
sa2 s a2
sa3 s a3
sa4 s a4
sa5 s a5
sar1 s ar1
sar2 s ar2
sar3 s ar3
sar4 s ar4
sar5 s ar5
sai1 s ai1
sai2 s ai2
sai3 s ai3
sai4 s ai4
sai5 s ai5
sair1 s air1
sair2 s air2
sair3 s air3
sair4 s air4
sair5 s air5
sao1 s ao1
sao2 s ao2
sao3 s ao3
sao4 s ao4
sao5 s ao5
saor1 s aor1
saor2 s aor2
saor3 s aor3
saor4 s aor4
saor5 s aor5
san1 s an1
san2 s an2
san3 s an3
san4 s an4
san5 s an5
sanr1 s anr1
sanr2 s anr2
sanr3 s anr3
sanr4 s anr4
sanr5 s anr5
sang1 s ang1
sang2 s ang2
sang3 s ang3
sang4 s ang4
sang5 s ang5
sangr1 s angr1
sangr2 s angr2
sangr3 s angr3
sangr4 s angr4
sangr5 s angr5
se1 s e1
se2 s e2
se3 s e3
se4 s e4
se5 s e5
ser1 s er1
ser2 s er2
ser3 s er3
ser4 s er4
ser5 s er5
sei1 s ei1
sei2 s ei2
sei3 s ei3
sei4 s ei4
sei5 s ei5
seir1 s eir1
seir2 s eir2
seir3 s eir3
seir4 s eir4
seir5 s eir5
sen1 s en1
sen2 s en2
sen3 s en3
sen4 s en4
sen5 s en5
senr1 s enr1
senr2 s enr2
senr3 s enr3
senr4 s enr4
senr5 s enr5
seng1 s eng1
seng2 s eng2
seng3 s eng3
seng4 s eng4
seng5 s eng5
sengr1 s engr1
sengr2 s engr2
sengr3 s engr3
sengr4 s engr4
sengr5 s engr5
sou1 s ou1
sou2 s ou2
sou3 s ou3
sou4 s ou4
sou5 s ou5
sour1 s our1
sour2 s our2
sour3 s our3
sour4 s our4
sour5 s our5
song1 s ong1
song2 s ong2
song3 s ong3
song4 s ong4
song5 s ong5
songr1 s ongr1
songr2 s ongr2
songr3 s ongr3
songr4 s ongr4
songr5 s ongr5
si1 s ii1
si2 s ii2
si3 s ii3
si4 s ii4
si5 s ii5
sir1 s iir1
sir2 s iir2
sir3 s iir3
sir4 s iir4
sir5 s iir5
su1 s u1
su2 s u2
su3 s u3
su4 s u4
su5 s u5
sur1 s ur1
sur2 s ur2
sur3 s ur3
sur4 s ur4
sur5 s ur5
suan1 s uan1
suan2 s uan2
suan3 s uan3
suan4 s uan4
suan5 s uan5
suanr1 s uanr1
suanr2 s uanr2
suanr3 s uanr3
suanr4 s uanr4
suanr5 s uanr5
sui1 s uei1
sui2 s uei2
sui3 s uei3
sui4 s uei4
sui5 s uei5
suir1 s ueir1
suir2 s ueir2
suir3 s ueir3
suir4 s ueir4
suir5 s ueir5
suo1 s uo1
suo2 s uo2
suo3 s uo3
suo4 s uo4
suo5 s uo5
suor1 s uor1
suor2 s uor2
suor3 s uor3
suor4 s uor4
suor5 s uor5
sun1 s uen1
sun2 s uen2
sun3 s uen3
sun4 s uen4
sun5 s uen5
sunr1 s uenr1
sunr2 s uenr2
sunr3 s uenr3
sunr4 s uenr4
sunr5 s uenr5
ji1 j i1
ji2 j i2
ji3 j i3
ji4 j i4
ji5 j i5
jir1 j ir1
jir2 j ir2
jir3 j ir3
jir4 j ir4
jir5 j ir5
jia1 j ia1
jia2 j ia2
jia3 j ia3
jia4 j ia4
jia5 j ia5
jiar1 j iar1
jiar2 j iar2
jiar3 j iar3
jiar4 j iar4
jiar5 j iar5
jiao1 j iao1
jiao2 j iao2
jiao3 j iao3
jiao4 j iao4
jiao5 j iao5
jiaor1 j iaor1
jiaor2 j iaor2
jiaor3 j iaor3
jiaor4 j iaor4
jiaor5 j iaor5
jian1 j ian1
jian2 j ian2
jian3 j ian3
jian4 j ian4
jian5 j ian5
jianr1 j ianr1
jianr2 j ianr2
jianr3 j ianr3
jianr4 j ianr4
jianr5 j ianr5
jiang1 j iang1
jiang2 j iang2
jiang3 j iang3
jiang4 j iang4
jiang5 j iang5
jiangr1 j iangr1
jiangr2 j iangr2
jiangr3 j iangr3
jiangr4 j iangr4
jiangr5 j iangr5
jie1 j ie1
jie2 j ie2
jie3 j ie3
jie4 j ie4
jie5 j ie5
jier1 j ier1
jier2 j ier2
jier3 j ier3
jier4 j ier4
jier5 j ier5
jio1 j io1
jio2 j io2
jio3 j io3
jio4 j io4
jio5 j io5
jior1 j ior1
jior2 j ior2
jior3 j ior3
jior4 j ior4
jior5 j ior5
jiu1 j iou1
jiu2 j iou2
jiu3 j iou3
jiu4 j iou4
jiu5 j iou5
jiur1 j iour1
jiur2 j iour2
jiur3 j iour3
jiur4 j iour4
jiur5 j iour5
jiong1 j iong1
jiong2 j iong2
jiong3 j iong3
jiong4 j iong4
jiong5 j iong5
jiongr1 j iongr1
jiongr2 j iongr2
jiongr3 j iongr3
jiongr4 j iongr4
jiongr5 j iongr5
jin1 j in1
jin2 j in2
jin3 j in3
jin4 j in4
jin5 j in5
jinr1 j inr1
jinr2 j inr2
jinr3 j inr3
jinr4 j inr4
jinr5 j inr5
jing1 j ing1
jing2 j ing2
jing3 j ing3
jing4 j ing4
jing5 j ing5
jingr1 j ingr1
jingr2 j ingr2
jingr3 j ingr3
jingr4 j ingr4
jingr5 j ingr5
ju1 j v1
ju2 j v2
ju3 j v3
ju4 j v4
ju5 j v5
jur1 j vr1
jur2 j vr2
jur3 j vr3
jur4 j vr4
jur5 j vr5
jue1 j ve1
jue2 j ve2
jue3 j ve3
jue4 j ve4
jue5 j ve5
juer1 j ver1
juer2 j ver2
juer3 j ver3
juer4 j ver4
juer5 j ver5
juan1 j van1
juan2 j van2
juan3 j van3
juan4 j van4
juan5 j van5
juanr1 j vanr1
juanr2 j vanr2
juanr3 j vanr3
juanr4 j vanr4
juanr5 j vanr5
jun1 j vn1
jun2 j vn2
jun3 j vn3
jun4 j vn4
jun5 j vn5
junr1 j vnr1
junr2 j vnr2
junr3 j vnr3
junr4 j vnr4
junr5 j vnr5
qi1 q i1
qi2 q i2
qi3 q i3
qi4 q i4
qi5 q i5
qir1 q ir1
qir2 q ir2
qir3 q ir3
qir4 q ir4
qir5 q ir5
qia1 q ia1
qia2 q ia2
qia3 q ia3
qia4 q ia4
qia5 q ia5
qiar1 q iar1
qiar2 q iar2
qiar3 q iar3
qiar4 q iar4
qiar5 q iar5
qiao1 q iao1
qiao2 q iao2
qiao3 q iao3
qiao4 q iao4
qiao5 q iao5
qiaor1 q iaor1
qiaor2 q iaor2
qiaor3 q iaor3
qiaor4 q iaor4
qiaor5 q iaor5
qian1 q ian1
qian2 q ian2
qian3 q ian3
qian4 q ian4
qian5 q ian5
qianr1 q ianr1
qianr2 q ianr2
qianr3 q ianr3
qianr4 q ianr4
qianr5 q ianr5
qiang1 q iang1
qiang2 q iang2
qiang3 q iang3
qiang4 q iang4
qiang5 q iang5
qiangr1 q iangr1
qiangr2 q iangr2
qiangr3 q iangr3
qiangr4 q iangr4
qiangr5 q iangr5
qie1 q ie1
qie2 q ie2
qie3 q ie3
qie4 q ie4
qie5 q ie5
qier1 q ier1
qier2 q ier2
qier3 q ier3
qier4 q ier4
qier5 q ier5
qio1 q io1
qio2 q io2
qio3 q io3
qio4 q io4
qio5 q io5
qior1 q ior1
qior2 q ior2
qior3 q ior3
qior4 q ior4
qior5 q ior5
qiu1 q iou1
qiu2 q iou2
qiu3 q iou3
qiu4 q iou4
qiu5 q iou5
qiur1 q iour1
qiur2 q iour2
qiur3 q iour3
qiur4 q iour4
qiur5 q iour5
qiong1 q iong1
qiong2 q iong2
qiong3 q iong3
qiong4 q iong4
qiong5 q iong5
qiongr1 q iongr1
qiongr2 q iongr2
qiongr3 q iongr3
qiongr4 q iongr4
qiongr5 q iongr5
qin1 q in1
qin2 q in2
qin3 q in3
qin4 q in4
qin5 q in5
qinr1 q inr1
qinr2 q inr2
qinr3 q inr3
qinr4 q inr4
qinr5 q inr5
qing1 q ing1
qing2 q ing2
qing3 q ing3
qing4 q ing4
qing5 q ing5
qingr1 q ingr1
qingr2 q ingr2
qingr3 q ingr3
qingr4 q ingr4
qingr5 q ingr5
qu1 q v1
qu2 q v2
qu3 q v3
qu4 q v4
qu5 q v5
qur1 q vr1
qur2 q vr2
qur3 q vr3
qur4 q vr4
qur5 q vr5
que1 q ve1
que2 q ve2
que3 q ve3
que4 q ve4
que5 q ve5
quer1 q ver1
quer2 q ver2
quer3 q ver3
quer4 q ver4
quer5 q ver5
quan1 q van1
quan2 q van2
quan3 q van3
quan4 q van4
quan5 q van5
quanr1 q vanr1
quanr2 q vanr2
quanr3 q vanr3
quanr4 q vanr4
quanr5 q vanr5
qun1 q vn1
qun2 q vn2
qun3 q vn3
qun4 q vn4
qun5 q vn5
qunr1 q vnr1
qunr2 q vnr2
qunr3 q vnr3
qunr4 q vnr4
qunr5 q vnr5
xi1 x i1
xi2 x i2
xi3 x i3
xi4 x i4
xi5 x i5
xir1 x ir1
xir2 x ir2
xir3 x ir3
xir4 x ir4
xir5 x ir5
xia1 x ia1
xia2 x ia2
xia3 x ia3
xia4 x ia4
xia5 x ia5
xiar1 x iar1
xiar2 x iar2
xiar3 x iar3
xiar4 x iar4
xiar5 x iar5
xiao1 x iao1
xiao2 x iao2
xiao3 x iao3
xiao4 x iao4
xiao5 x iao5
xiaor1 x iaor1
xiaor2 x iaor2
xiaor3 x iaor3
xiaor4 x iaor4
xiaor5 x iaor5
xian1 x ian1
xian2 x ian2
xian3 x ian3
xian4 x ian4
xian5 x ian5
xianr1 x ianr1
xianr2 x ianr2
xianr3 x ianr3
xianr4 x ianr4
xianr5 x ianr5
xiang1 x iang1
xiang2 x iang2
xiang3 x iang3
xiang4 x iang4
xiang5 x iang5
xiangr1 x iangr1
xiangr2 x iangr2
xiangr3 x iangr3
xiangr4 x iangr4
xiangr5 x iangr5
xie1 x ie1
xie2 x ie2
xie3 x ie3
xie4 x ie4
xie5 x ie5
xier1 x ier1
xier2 x ier2
xier3 x ier3
xier4 x ier4
xier5 x ier5
xio1 x io1
xio2 x io2
xio3 x io3
xio4 x io4
xio5 x io5
xior1 x ior1
xior2 x ior2
xior3 x ior3
xior4 x ior4
xior5 x ior5
xiu1 x iou1
xiu2 x iou2
xiu3 x iou3
xiu4 x iou4
xiu5 x iou5
xiur1 x iour1
xiur2 x iour2
xiur3 x iour3
xiur4 x iour4
xiur5 x iour5
xiong1 x iong1
xiong2 x iong2
xiong3 x iong3
xiong4 x iong4
xiong5 x iong5
xiongr1 x iongr1
xiongr2 x iongr2
xiongr3 x iongr3
xiongr4 x iongr4
xiongr5 x iongr5
xin1 x in1
xin2 x in2
xin3 x in3
xin4 x in4
xin5 x in5
xinr1 x inr1
xinr2 x inr2
xinr3 x inr3
xinr4 x inr4
xinr5 x inr5
xing1 x ing1
xing2 x ing2
xing3 x ing3
xing4 x ing4
xing5 x ing5
xingr1 x ingr1
xingr2 x ingr2
xingr3 x ingr3
xingr4 x ingr4
xingr5 x ingr5
xu1 x v1
xu2 x v2
xu3 x v3
xu4 x v4
xu5 x v5
xur1 x vr1
xur2 x vr2
xur3 x vr3
xur4 x vr4
xur5 x vr5
xue1 x ve1
xue2 x ve2
xue3 x ve3
xue4 x ve4
xue5 x ve5
xuer1 x ver1
xuer2 x ver2
xuer3 x ver3
xuer4 x ver4
xuer5 x ver5
xuan1 x van1
xuan2 x van2
xuan3 x van3
xuan4 x van4
xuan5 x van5
xuanr1 x vanr1
xuanr2 x vanr2
xuanr3 x vanr3
xuanr4 x vanr4
xuanr5 x vanr5
xun1 x vn1
xun2 x vn2
xun3 x vn3
xun4 x vn4
xun5 x vn5
xunr1 x vnr1
xunr2 x vnr2
xunr3 x vnr3
xunr4 x vnr4
xunr5 x vnr5


================================================
FILE: examples/csmsc/tts3/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt || exit -1
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt || exit -1
fi

# style melgan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt || exit -1
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt || exit -1
fi

# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt || exit -1
fi


================================================
FILE: examples/csmsc/tts3/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference || exit -1
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference || exit -1
fi

# the pretrained models haven't release now
# style melgan
# style melgan's Dygraph to Static Graph is not ready now
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt || exit -1
        # --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference || exit -1
fi


# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference || exit -1
fi


================================================
FILE: examples/csmsc/tts3/local/synthesize_e2e_xpu.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nxpu=1
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nxpu=1
fi

# the pretrained models haven't release now
# style melgan
# style melgan's Dygraph to Static Graph is not ready now
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
        # --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nxpu=1
fi


# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --ngpu=0 \
        --nxpu=1
fi


================================================
FILE: examples/csmsc/tts3/local/synthesize_streaming.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_streaming.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e_streaming \
        --phones_dict=dump/phone_id_map.txt \
        --am_streaming=True \
        --inference_dir=${train_output_path}/inference_streaming
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_streaming.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e_streaming \
        --phones_dict=dump/phone_id_map.txt \
        --am_streaming=True \
        --inference_dir=${train_output_path}/inference_streaming
fi

# the pretrained models haven't release now
# style melgan
# style melgan's Dygraph to Static Graph is not ready now
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_streaming.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e_streaming \
        --phones_dict=dump/phone_id_map.txt \
        --am_streaming=True
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_streaming.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e_streaming \
        --phones_dict=dump/phone_id_map.txt \
        --am_streaming=True \
        --inference_dir=${train_output_path}/inference_streaming
fi


================================================
FILE: examples/csmsc/tts3/local/synthesize_xpu.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
fi

# style melgan
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
fi

# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn"
    FLAGS_allocator_strategy=naive_best_fit \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --ngpu=0 \
        --nxpu=1
fi


================================================
FILE: examples/csmsc/tts3/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=1 \
    --phones-dict=dump/phone_id_map.txt

================================================
FILE: examples/csmsc/tts3/local/train_xpu.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=0 \
    --nxpu=1 \
    --phones-dict=dump/phone_id_map.txt

================================================
FILE: examples/csmsc/tts3/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=fastspeech2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}


================================================
FILE: examples/csmsc/tts3/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_153.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default 0
    # use 1-4 to select the vocoder in {multi band melgan, style melgan, hifigan, wavernn}
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default 0
    # use 1,3,4 to select the vocoder in {multi band melgan, hifigan, wavernn}
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
fi

# paddle2onnx, please make sure the static models are in ${train_output_path}/inference first
# we have only tested the following models so far
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # install paddle2onnx
    pip install paddle2onnx --upgrade
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx fastspeech2_csmsc
    # considering the balance between speed and quality, we recommend that you use hifigan as vocoder
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx mb_melgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_csmsc
    
fi

# inference with onnxruntime
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    ./local/ort_predict.sh ${train_output_path}
fi

# must run after stage 3 (which stage generated static models)
if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
    # NOTE by yuantian 2022.11.21: please compile develop version of Paddle-Lite to export and run TTS models,
    #                   cause TTS models are supported by https://github.com/PaddlePaddle/Paddle-Lite/pull/9587 
    #                   and https://github.com/PaddlePaddle/Paddle-Lite/pull/9706
    ./local/export2lite.sh ${train_output_path} inference pdlite fastspeech2_csmsc x86
    ./local/export2lite.sh ${train_output_path} inference pdlite pwgan_csmsc x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite mb_melgan_csmsc x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite hifigan_csmsc x86
fi

if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/lite_predict.sh ${train_output_path} || exit -1
fi

# PTQ_dynamic
if [ ${stage} -le 9 ] && [ ${stop_stage} -ge 9 ]; then
    ./local/PTQ_dynamic.sh  ${train_output_path} fastspeech2_csmsc 8
    # ./local/PTQ_dynamic.sh  ${train_output_path} pwgan_csmsc 8
    # ./local/PTQ_dynamic.sh  ${train_output_path} mb_melgan_csmsc 8
    # ./local/PTQ_dynamic.sh  ${train_output_path} hifigan_csmsc 8
fi

# PTQ_static
if [ ${stage} -le 10 ] && [ ${stop_stage} -ge 10 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/PTQ_static.sh  ${train_output_path} fastspeech2_csmsc || exit -1
fi


================================================
FILE: examples/csmsc/tts3/run_cnndecoder.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/cnndecoder.yaml
train_output_path=exp/cnndecoder
ckpt_name=snapshot_iter_153.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

# synthesize_e2e non-streaming
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

# inference non-streaming
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
fi

# synthesize_e2e streaming
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # synthesize_e2e, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_streaming.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

# inference streaming
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    # inference with static model, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference_streaming.sh ${train_output_path} || exit -1
fi

# paddle2onnx non streaming
if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
    # install paddle2onnx
    pip install paddle2onnx --upgrade
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx fastspeech2_csmsc
    # considering the balance between speed and quality, we recommend that you use hifigan as vocoder
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx mb_melgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_csmsc
fi

# onnxruntime non streaming
if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
    ./local/ort_predict.sh ${train_output_path}
fi

# paddle2onnx streaming
if [ ${stage} -le 9 ] && [ ${stop_stage} -ge 9 ]; then
    # install paddle2onnx
    pip install paddle2onnx --upgrade
    # streaming acoustic model
    ./local/paddle2onnx.sh ${train_output_path} inference_streaming inference_onnx_streaming fastspeech2_csmsc_am_encoder_infer
    ./local/paddle2onnx.sh ${train_output_path} inference_streaming inference_onnx_streaming fastspeech2_csmsc_am_decoder
    ./local/paddle2onnx.sh ${train_output_path} inference_streaming inference_onnx_streaming fastspeech2_csmsc_am_postnet
    # considering the balance between speed and quality, we recommend that you use hifigan as vocoder
    ./local/paddle2onnx.sh ${train_output_path} inference_streaming inference_onnx_streaming pwgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference_streaming inference_onnx_streaming mb_melgan_csmsc
    # ./local/paddle2onnx.sh ${train_output_path} inference_streaming inference_onnx_streaming hifigan_csmsc
fi

# onnxruntime streaming
if [ ${stage} -le 10 ] && [ ${stop_stage} -ge 10 ]; then
    ./local/ort_predict_streaming.sh ${train_output_path}
fi

# must run after stage 3 (which stage generated static models)
if [ ${stage} -le 11 ] && [ ${stop_stage} -ge 11 ]; then
    ./local/export2lite.sh ${train_output_path} inference pdlite fastspeech2_csmsc x86
    ./local/export2lite.sh ${train_output_path} inference pdlite pwgan_csmsc x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite mb_melgan_csmsc x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite hifigan_csmsc x86
fi

if [ ${stage} -le 12 ] && [ ${stop_stage} -ge 12 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/lite_predict.sh ${train_output_path} || exit -1
fi

# must run after stage 5 (which stage generated static models)
if [ ${stage} -le 13 ] && [ ${stop_stage} -ge 13 ]; then
    # streaming acoustic model
    ./local/export2lite.sh ${train_output_path} inference_streaming pdlite_streaming fastspeech2_csmsc_am_encoder_infer x86
    ./local/export2lite.sh ${train_output_path} inference_streaming pdlite_streaming fastspeech2_csmsc_am_decoder x86
    ./local/export2lite.sh ${train_output_path} inference_streaming pdlite_streaming fastspeech2_csmsc_am_postnet x86
    ./local/export2lite.sh ${train_output_path} inference_streaming pdlite_streaming pwgan_csmsc x86
    # ./local/export2lite.sh ${train_output_path} inference_streaming pdlite_streaming mb_melgan_csmsc x86
    # ./local/export2lite.sh ${train_output_path} inference_streaming pdlite_streaming hifigan_csmsc x86
fi

if [ ${stage} -le 14 ] && [ ${stop_stage} -ge 14 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/lite_predict_streaming.sh ${train_output_path} || exit -1
fi

# PTQ_static
if [ ${stage} -le 15 ] && [ ${stop_stage} -ge 15 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/PTQ_static.sh  ${train_output_path} fastspeech2_csmsc || exit -1
fi

================================================
FILE: examples/csmsc/tts3/run_xpu.sh
================================================
#!/bin/bash

set -e
source path.sh

xpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_153.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    FLAGS_selected_xpus=${xpus} ./local/train_xpu.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default
    FLAGS_selected_xpus=${xpus} ./local/synthesize_xpu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default
    FLAGS_selected_xpus=${xpus} ./local/synthesize_e2e_xpu.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model, vocoder is pwgan by default
    FLAGS_selected_xpus=${xpus} ./local/inference_xpu.sh ${train_output_path} || exit -1
fi


================================================
FILE: examples/csmsc/tts3_rhy/README.md
================================================
# This example mainly follows the FastSpeech2 with CSMSC
This example contains code used to train a rhythm version of [Fastspeech2](https://arxiv.org/abs/2006.04558) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).

## Dataset
### Download and Extract
Download CSMSC from it's [Official Website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
You can directly download the rhythm version of MFA result from here [baker_alignment_tone.zip](https://paddlespeech.cdn.bcebos.com/Rhy_e2e/baker_alignment_tone.zip), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
Remember in our repo, you should add `--rhy-with-duration` flag to obtain the rhythm information.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from a text file.
5. inference using the static model.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech、pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, the path of pitch features, the path of energy features, speaker, and the id of each utterance.

# For more details, You can refer to [FastSpeech2 with CSMSC](../tts3)

## Pretrained Model
Pretrained FastSpeech2 model for end-to-end rhythm version:
- [fastspeech2_rhy_csmsc_ckpt_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_rhy_csmsc_ckpt_1.3.0.zip)

This FastSpeech2 checkpoint contains files listed below.
```text
fastspeech2_rhy_csmsc_ckpt_1.3.0
├── default.yaml             # default config used to train fastspeech2
├── phone_id_map.txt         # phone vocabulary file when training fastspeech2
├── snapshot_iter_153000.pdz # model parameters and optimizer states
├── durations.txt            # the intermediate output of preprocess.sh
├── energy_stats.npy
├── pitch_stats.npy
└── speech_stats.npy         # statistics used to normalize spectrogram when training fastspeech2
```


================================================
FILE: examples/csmsc/tts3_rhy/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_csmsc \
        --voc_config=pwg_baker_ckpt_0.4/pwg_default.yaml \
        --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \
        --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --use_rhy=True
fi

# for more GAN Vocoders
# multi band melgan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=mb_melgan_csmsc \
        --voc_config=mb_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\
        --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --use_rhy=True
fi

# the pretrained models haven't release now
# style melgan
# style melgan's Dygraph to Static Graph is not ready now
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=style_melgan_csmsc \
        --voc_config=style_melgan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --use_rhy=True
        # --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "in hifigan syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --use_rhy=True
fi


# wavernn
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "in wavernn syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_csmsc \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=wavernn_csmsc \
        --voc_config=wavernn_csmsc_ckpt_0.2.0/default.yaml \
        --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \
        --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --inference_dir=${train_output_path}/inference \
        --use_rhy=True
fi


================================================
FILE: examples/csmsc/tts3_rhy/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_153.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ### please place the mfa result of rhythm here
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default 0
    # use 1-4 to select the vocoder in {multi band melgan, style melgan, hifigan, wavernn}
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default 0
    # use 1,3,4 to select the vocoder in {multi band melgan, hifigan, wavernn}
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi


================================================
FILE: examples/csmsc/vits/README.md
================================================
# VITS with CSMSC
This example contains code used to train a [VITS](https://arxiv.org/abs/2106.06103) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).

## Dataset
### Download and Extract
Download CSMSC from it's [Official Website](https://test.data-baker.com/data/index/source).

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for VITS, the durations of MFA are not needed here.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from a text file.

```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── feats_stats.npy
    ├── norm
    └── raw
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains wave and linear spectrogram of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, feats, feats_lengths, the path of linear spectrogram features, the path of raw waves, speaker, and the id of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]

Train a VITS model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       config file to overwrite default config.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.

### Synthesizing

`./local/synthesize.sh` calls `${BIN_DIR}/synthesize.py`, which can synthesize waveform from `metadata.jsonl`.

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--config CONFIG] [--ckpt CKPT]
                     [--phones_dict PHONES_DICT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with VITS

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       Config of VITS.
  --ckpt CKPT           Checkpoint file of VITS.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize_e2e.py [-h] [--config CONFIG] [--ckpt CKPT]
                         [--phones_dict PHONES_DICT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with VITS

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       Config of VITS.
  --ckpt CKPT           Checkpoint file of VITS.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--config`, `--ckpt`, and `--phones_dict` are arguments for acoustic model, which correspond to the 3 files in the VITS pretrained model.
2. `--lang` is the model language, which can be `zh` or `en`.
3. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
4. `--text` is the text file, which contains sentences to synthesize.
5. `--output_dir` is the directory to save synthesized audio files.
6. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model

The pretrained model can be downloaded here:

- [vits_csmsc_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/vits/vits_csmsc_ckpt_1.4.0.zip) (add_blank=true)

VITS checkpoint contains files listed below.
```text
vits_csmsc_ckpt_1.4.0
├── default.yaml                    # default config used to train vitx
├── phone_id_map.txt                # phone vocabulary file when training vits
└── snapshot_iter_150000.pdz  # model parameters and optimizer states
```

ps: This ckpt is not good enough, a better result is training

You can use the following scripts to synthesize for `${BIN_DIR}/../sentences.txt` using pretrained VITS.

```bash
source path.sh
add_blank=true

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/synthesize_e2e.py \
    --config=vits_csmsc_ckpt_1.4.0/default.yaml \
    --ckpt=vits_csmsc_ckpt_1.4.0/snapshot_iter_150000.pdz \
    --phones_dict=vits_csmsc_ckpt_1.4.0/phone_id_map.txt \
    --output_dir=exp/default/test_e2e \
    --text=${BIN_DIR}/../../assets/sentences.txt \
    --add-blank=${add_blank} 
```


================================================
FILE: examples/csmsc/vits/conf/default.yaml
================================================
# This configuration tested on 4 GPUs (V100) with 32GB GPU
# memory. It takes around 2 weeks to finish the training
# but 100k iters model should generate reasonable results.
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 22050         # sr
n_fft: 1024        # FFT size (samples).
n_shift: 256       # Hop size (samples). 12.5ms
win_length: null   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.


##########################################################
#                  TTS MODEL SETTING                     #
##########################################################
model:
    # generator related
    generator_type: vits_generator
    generator_params:
        hidden_channels: 192
        spks: -1
        global_channels: -1
        segment_size: 32
        text_encoder_attention_heads: 2
        text_encoder_ffn_expand: 4
        text_encoder_blocks: 6
        text_encoder_positionwise_layer_type: "conv1d"
        text_encoder_positionwise_conv_kernel_size: 3
        text_encoder_positional_encoding_layer_type: "rel_pos"
        text_encoder_self_attention_layer_type: "rel_selfattn"
        text_encoder_activation_type: "swish"
        text_encoder_normalize_before: True
        text_encoder_dropout_rate: 0.1
        text_encoder_positional_dropout_rate: 0.0
        text_encoder_attention_dropout_rate: 0.1
        use_macaron_style_in_text_encoder: True
        use_conformer_conv_in_text_encoder: False
        text_encoder_conformer_kernel_size: -1
        decoder_kernel_size: 7
        decoder_channels: 512
        decoder_upsample_scales: [8, 8, 2, 2]
        decoder_upsample_kernel_sizes: [16, 16, 4, 4]
        decoder_resblock_kernel_sizes: [3, 7, 11]
        decoder_resblock_dilations: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
        use_weight_norm_in_decoder: True
        posterior_encoder_kernel_size: 5
        posterior_encoder_layers: 16
        posterior_encoder_stacks: 1
        posterior_encoder_base_dilation: 1
        posterior_encoder_dropout_rate: 0.0
        use_weight_norm_in_posterior_encoder: True
        flow_flows: 4
        flow_kernel_size: 5
        flow_base_dilation: 1
        flow_layers: 4
        flow_dropout_rate: 0.0
        use_weight_norm_in_flow: True
        use_only_mean_in_flow: True
        stochastic_duration_predictor_kernel_size: 3
        stochastic_duration_predictor_dropout_rate: 0.5
        stochastic_duration_predictor_flows: 4
        stochastic_duration_predictor_dds_conv_layers: 3
    # discriminator related
    discriminator_type: hifigan_multi_scale_multi_period_discriminator
    discriminator_params:
        scales: 1
        scale_downsample_pooling: "AvgPool1D"
        scale_downsample_pooling_params:
            kernel_size: 4
            stride: 2
            padding: 2
        scale_discriminator_params:
            in_channels: 1
            out_channels: 1
            kernel_sizes: [15, 41, 5, 3]
            channels: 128
            max_downsample_channels: 1024
            max_groups: 16
            bias: True
            downsample_scales: [2, 2, 4, 4, 1]
            nonlinear_activation: "leakyrelu"
            nonlinear_activation_params:
                negative_slope: 0.1
            use_weight_norm: True
            use_spectral_norm: False
        follow_official_norm: False
        periods: [2, 3, 5, 7, 11]
        period_discriminator_params:
            in_channels: 1
            out_channels: 1
            kernel_sizes: [5, 3]
            channels: 32
            downsample_scales: [3, 3, 3, 3, 1]
            max_downsample_channels: 1024
            bias: True
            nonlinear_activation: "leakyrelu"
            nonlinear_activation_params:
                negative_slope: 0.1
            use_weight_norm: True
            use_spectral_norm: False
    # others
    sampling_rate: 22050          # needed in the inference for saving wav
    cache_generator_outputs: True # whether to cache generator outputs in the training
          
###########################################################
#                        LOSS SETTING                     #
###########################################################
# loss function related
generator_adv_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    loss_type: mse                   # loss type, "mse" or "hinge"
discriminator_adv_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    loss_type: mse                   # loss type, "mse" or "hinge"
feat_match_loss_params:
    average_by_discriminators: False # whether to average loss value by #discriminators
    average_by_layers: False         # whether to average loss value by #layers of each discriminator
    include_final_outputs: True      # whether to include final outputs for loss calculation
mel_loss_params:
    fs: 22050          # must be the same as the training data
    fft_size: 1024        # fft points
    hop_size: 256    # hop size
    win_length: null   # window length
    window: hann       # window type
    num_mels: 80         # number of Mel basis
    fmin: 0            # minimum frequency for Mel basis
    fmax: null         # maximum frequency for Mel basis
    log_base: null     # null represent natural log

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_adv: 1.0        # loss scaling coefficient for adversarial loss
lambda_mel: 45.0       # loss scaling coefficient for Mel loss
lambda_feat_match: 2.0 # loss scaling coefficient for feat match loss
lambda_dur: 1.0        # loss scaling coefficient for duration loss
lambda_kl: 1.0         # loss scaling coefficient for KL divergence loss
# others
sampling_rate: 22050          # needed in the inference for saving wav
cache_generator_outputs: True # whether to cache generator outputs in the training


###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 64              # Batch size.
num_workers: 4              # Number of workers in DataLoader.

##########################################################
#            OPTIMIZER & SCHEDULER SETTING               #
##########################################################
# optimizer setting for generator
generator_optimizer_params:
    beta1: 0.8
    beta2: 0.99
    epsilon: 1.0e-9
    weight_decay: 0.0
generator_scheduler: exponential_decay
generator_scheduler_params:
    learning_rate: 2.0e-4
    gamma: 0.999875                   

# optimizer setting for discriminator
discriminator_optimizer_params:
    beta1: 0.8
    beta2: 0.99
    epsilon: 1.0e-9
    weight_decay: 0.0
discriminator_scheduler: exponential_decay
discriminator_scheduler_params:
    learning_rate: 2.0e-4          
    gamma: 0.999875
generator_first: False # whether to start updating generator first

##########################################################
#                OTHER TRAINING SETTING                  #
##########################################################
num_snapshots: 10            # max number of snapshots to keep while training
max_epoch: 1000              # Number of training epochs.
save_interval_epochs: 1      # Interval epochs to save checkpoint.
eval_interval_epochs: 1      # Interval steps to evaluate the network.
seed: 777                    # random seed number


================================================
FILE: examples/csmsc/vits/local/inference.sh
================================================
#!/bin/bash

train_output_path=$1
add_blank=$2

stage=0
stop_stage=0

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=vits_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --add-blank=${add_blank}
fi

================================================
FILE: examples/csmsc/vits/local/lite_predict.sh
================================================
#!/bin/bash

train_output_path=$1
add_blank=$2

stage=0
stop_stage=0

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=vits_csmsc \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --add-blank=${add_blank}
fi


================================================
FILE: examples/csmsc/vits/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1
add_blank=$2

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./baker_alignment_tone \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=baker \
        --rootdir=~/datasets/BZNSYP/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --add-blank=${add_blank} \
        --skip-wav-copy

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --add-blank=${add_blank} \
        --skip-wav-copy

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --feats-stats=dump/train/feats_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt \
        --add-blank=${add_blank} \
        --skip-wav-copy
fi


================================================
FILE: examples/csmsc/vits/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=0
stop_stage=0

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize.py \
        --config=${config_path} \
        --ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --phones_dict=dump/phone_id_map.txt \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test
fi


================================================
FILE: examples/csmsc/vits/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
add_blank=$4

stage=0
stop_stage=0


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize_e2e.py \
        --am=vits_csmsc \
        --config=${config_path} \
        --ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --phones_dict=dump/phone_id_map.txt \
        --output_dir=${train_output_path}/test_e2e \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --add-blank=${add_blank} #\
        # --inference_dir=${train_output_path}/inference
fi


================================================
FILE: examples/csmsc/vits/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

# install monotonic_align
cd ${MAIN_ROOT}/paddlespeech/t2s/models/vits/monotonic_align
python3 setup.py build_ext --inplace
cd -

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=4 \
    --phones-dict=dump/phone_id_map.txt


================================================
FILE: examples/csmsc/vits/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=vits
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}

================================================
FILE: examples/csmsc/vits/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1,2,3
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_153.pdz
add_blank=true

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} ${add_blank}|| exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} ${add_blank}|| exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} ${add_blank}|| exit -1
fi

# # not ready yet for operator missing in Paddle2ONNX
# # paddle2onnx, please make sure the static models are in ${train_output_path}/inference first
# # we have only tested the following models so far
# if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
#     # install paddle2onnx
#     pip install paddle2onnx --upgrade
#     ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx vits_csmsc
# fi

# # inference with onnxruntime
# if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
#     ./local/ort_predict.sh ${train_output_path}
# fi

# not ready yet for operator missing in Paddle-Lite
# must run after stage 3 (which stage generated static models)
if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
    # NOTE by yuantian 2022.11.21: please compile develop version of Paddle-Lite to export and run TTS models,
    #                   cause TTS models are supported by https://github.com/PaddlePaddle/Paddle-Lite/pull/10128
    # vits can only run in arm
    ./local/export2lite.sh ${train_output_path} inference pdlite vits_csmsc arm
fi

if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/lite_predict.sh ${train_output_path} || exit -1
fi


================================================
FILE: examples/csmsc/voc1/README.md
================================================
# Parallel WaveGAN with CSMSC
This example contains code used to train a [parallel wavegan](http://arxiv.org/abs/1910.11480) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).
## Dataset
### Download and Extract
Download CSMSC from it's [official website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

After processing the data, the ``BZNSYP`` directory will look like this:
```text
BZNSYP
├── Wave
│    └─ *.wav files (audio speech)
├── PhoneLabeling
│    └─ *.interval files (alignment between phoneme and duration)
└── ProsodyLabeling
     └─ 000001-010000.txt (text with prosodic by pinyin)
```
This experiment only uses *.wav files from the Wave file

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut silence at the edge of audio.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to  [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--batch-size BATCH_SIZE] [--max-iter MAX_ITER]
                [--run-benchmark RUN_BENCHMARK]
                [--profiler_options PROFILER_OPTIONS]

Train a ParallelWaveGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       ParallelWaveGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.

benchmark:
  arguments related to benchmark.

  --batch-size BATCH_SIZE
                        batch size.
  --max-iter MAX_ITER   train max steps.
  --run-benchmark RUN_BENCHMARK
                        runing benchmark or not, if True, use the --batch-size
                        and --max-iter.
  --profiler_options PROFILER_OPTIONS
                        The option of profiler, which should be in format
                        "key1=value1;key2=value2;key3=value3".
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc1) as the neural vocoder.
Download pretrained parallel wavegan model from [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip) and unzip it.
```bash
unzip pwg_baker_ckpt_0.4.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_baker_ckpt_0.4
├── pwg_default.yaml               # default config used to train parallel wavegan
├── pwg_snapshot_iter_400000.pdz   # model parameters of parallel wavegan
└── pwg_stats.npy                  # statistics used to normalize spectrogram when training parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` parallel wavegan config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

We use [Fastspeech2](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3) as the acoustic model.
Download pretrained fastspeech2_nosil model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)and unzip it.
```bash
unzip fastspeech2_nosil_baker_ckpt_0.4.zip
```
Fastspeech2 checkpoint contains files listed below.
```text
fastspeech2_nosil_baker_ckpt_0.4
├── default.yaml            # default config used to train fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── snapshot_iter_76000.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```

`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.

```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat` and `--phones_dict` are arguments for acoustic model, which correspond to the 4 files in the fastspeech2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Models
The pretrained model can be downloaded here:
- [pwg_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip)
- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)

The static model can be downloaded here:
- [pwg_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip)
- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)

The ONNX model can be downloaded here:
- [pwgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_onnx_0.2.0.zip)
- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)

The Paddle-Lite model can be downloaded here:
- [pwgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_pdlite_1.3.0.zip)
- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)

Model | Step | eval/generator_loss | eval/log_stft_magnitude_loss| eval/spectral_convergence_loss
:-------------:| :------------:| :-----: | :-----: | :--------:
default| 1(gpu) x 400000|1.948763|0.670098|0.248882

Parallel WaveGAN checkpoint contains files listed below.

```text
pwg_baker_ckpt_0.4
├── pwg_default.yaml              # default config used to train parallel wavegan
├── pwg_snapshot_iter_400000.pdz  # generator parameters of parallel wavegan
└── pwg_stats.npy                 # statistics used to normalize spectrogram when training parallel wavegan
```

FastSpeech2 checkpoint contains files listed below.

```text
fastspeech2_nosil_baker_ckpt_0.4
├── default.yaml            # default config used to train fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── snapshot_iter_76000.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```

## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/csmsc/voc1/conf/default.yaml
================================================
# This is the hyperparameter configuration file for Parallel WaveGAN.
# Please make sure this is adjusted for the CSMSC dataset. If you want to
# apply to the other dataset, you might need to carefully change some parameters.
# This configuration requires 12 GB GPU memory and takes ~3 days on RTX TITAN.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)


###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 1        # Number of input channels.
    out_channels: 1       # Number of output channels.
    kernel_size: 3        # Kernel size of dilated convolution.
    layers: 30            # Number of residual block layers.
    stacks: 3             # Number of stacks i.e., dilation cycles.
    residual_channels: 64 # Number of channels in residual conv.
    gate_channels: 128    # Number of channels in gated conv.
    skip_channels: 64     # Number of channels in skip conv.
    aux_channels: 80      # Number of channels for auxiliary feature conv.
                          # Must be the same as num_mels.
    aux_context_window: 2 # Context window size for auxiliary feature.
                          # If set to 2, previous 2 and future 2 frames will be considered.
    dropout: 0.0          # Dropout rate. 0.0 means no dropout applied.
    bias: True            # use bias in residual blocks
    use_weight_norm: True # Whether to use weight norm.
                          # If set to true, it will be applied to all of the conv layers.
    use_causal_conv: False               # use causal conv in residual blocks and upsample layers
    upsample_scales: [4, 5, 3, 5]     # Upsampling scales. Prodcut of these must be the same as hop size.
    interpolate_mode: "nearest" # upsample net interpolate mode
    freq_axis_kernel_size: 1 # upsamling net: convolution kernel size in frequencey axis
    nonlinear_activation: null
    nonlinear_activation_params: {}

###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    in_channels: 1        # Number of input channels.
    out_channels: 1       # Number of output channels.
    kernel_size: 3        # Number of output channels.
    layers: 10            # Number of conv layers.
    conv_channels: 64     # Number of chnn layers.
    bias: True            # Whether to use bias parameter in conv.
    use_weight_norm: True # Whether to use weight norm.
                          # If set to true, it will be applied to all of the conv layers.
    nonlinear_activation: "leakyrelu" # Nonlinear function after each conv.
    nonlinear_activation_params:      # Nonlinear function parameters
        negative_slope: 0.2           # Alpha in leakyrelu.

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
stft_loss_params:
    fft_sizes: [1024, 2048, 512]  # List of FFT size for STFT-based loss.
    hop_sizes: [120, 240, 50]     # List of hop size for STFT-based loss
    win_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
    window: "hann"         # Window function for STFT-based loss

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_adv: 4.0  # Loss balancing coefficient.

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 8              # Batch size.
batch_max_steps: 25500     # Length of each audio in batch. Make sure dividable by n_shift.
num_workers: 2             # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    epsilon: 1.0e-6        # Generator's epsilon.
    weight_decay: 0.0      # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 0.0001  # Generator's learning rate.
    step_size: 200000      # Generator's scheduler step size.
    gamma: 0.5             # Generator's scheduler gamma.
                           # At each step size, lr will be multiplied by this parameter.
generator_grad_norm: 10    # Generator's gradient norm.
discriminator_optimizer_params:
    epsilon: 1.0e-6            # Discriminator's epsilon.
    weight_decay: 0.0          # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 0.00005     # Discriminator's learning rate.
    step_size: 200000          # Discriminator's scheduler step size.
    gamma: 0.5                 # Discriminator's scheduler gamma.
                               # At each step size, lr will be multiplied by this parameter.
discriminator_grad_norm: 1     # Discriminator's gradient norm.

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
discriminator_train_start_steps: 100000 # Number of steps to start to train discriminator.
train_max_steps: 400000                 # Number of training steps.
save_interval_steps: 5000               # Interval steps to save checkpoint.
eval_interval_steps: 1000               # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_save_intermediate_results: 4  # Number of results to be saved as intermediate results.
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/csmsc/voc1/local/PTQ_static.sh
================================================
train_output_path=$1
model_name=$2

python3 ${BIN_DIR}/../../PTQ_static.py \
    --dev-metadata=dump/dev/raw/metadata.jsonl \
    --inference_dir ${train_output_path}/inference \
    --model_name ${model_name} \
    --onnx_format=True 

================================================
FILE: examples/csmsc/voc1/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./baker_alignment_tone \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/../preprocess.py \
        --rootdir=~/datasets/BZNSYP/ \
        --dataset=baker \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --cut-sil=True \
        --num-cpu=20
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize, dev and test should use train's stats
    echo "Normalize ..."
   
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
        
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
    
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
fi


================================================
FILE: examples/csmsc/voc1/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --test-metadata=dump/test/norm/metadata.jsonl \
    --output-dir=${train_output_path}/test \
    --generator-type=pwgan || exit -1


================================================
FILE: examples/csmsc/voc1/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../../synthesize_e2e.py \
    --am=fastspeech2_csmsc \
    --am_config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
    --am_ckpt=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
    --am_stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy \
    --voc=pwgan_csmsc \
    --voc_config=${config_path} \
    --voc_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --voc_stat=dump/train/feats_stats.npy \
    --lang=zh \
    --text=${BIN_DIR}/../../../assets/sentences.txt \
    --output_dir=${train_output_path}/test_e2e \
    --phones_dict=fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt \
    --inference_dir=${train_output_path}/inference || exit -1


================================================
FILE: examples/csmsc/voc1/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

FLAGS_cudnn_exhaustive_search=true \
FLAGS_conv_workspace_size_limit=4000 \
python ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=1


================================================
FILE: examples/csmsc/voc1/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=parallelwave_gan
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/gan_vocoder/${MODEL}

================================================
FILE: examples/csmsc/voc1/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_5000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

# PTQ_static
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/PTQ_static.sh  ${train_output_path} pwgan_csmsc || exit -1
fi


================================================
FILE: examples/csmsc/voc3/README.md
================================================
# Multi Band MelGAN with CSMSC
This example contains code used to train a [Multi Band MelGAN](https://arxiv.org/abs/2005.05106) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).
## Dataset
### Download and Extract
Download CSMSC from it's [official website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU]

Train a Multi-Band MelGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       Multi-Band MelGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
We use [MultiBand MelGAN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc3) as the neural vocoder.

Download pretrained MultiBand MelGAN model from [mb_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip) and unzip it.
```bash
unzip mb_melgan_csmsc_ckpt_0.1.1.zip
```
MultiBand MelGAN checkpoint contains files listed below.
```text
mb_melgan_csmsc_ckpt_0.1.1
├── default.yaml                    # default config used to train MultiBand MelGAN
├── feats_stats.npy                 # statistics used to normalize spectrogram when training MultiBand MelGAN
└── snapshot_iter_1000000.pdz       # generator parameters of MultiBand MelGAN
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` multi band melgan config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

We use [Fastspeech2](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3) as the acoustic model.
Download pretrained fastspeech2_nosil model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)and unzip it.
```bash
unzip fastspeech2_nosil_baker_ckpt_0.4.zip
```
Fastspeech2 checkpoint contains files listed below.
```text
fastspeech2_nosil_baker_ckpt_0.4
├── default.yaml            # default config used to train fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── snapshot_iter_76000.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```

`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]
Synthesize with acoustic model & vocoder
optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat` and `--phones_dict` are arguments for acoustic model, which correspond to the 4 files in the fastspeech2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Fine-tuning
Since there is no `noise` in the input of Multi-Band MelGAN, the audio quality is not so good (see [espnet issue](https://github.com/espnet/espnet/issues/3536#issuecomment-916035415)), we refer to the method proposed in [HiFiGAN](https://arxiv.org/abs/2010.05646),  finetune Multi-Band MelGAN with the predicted mel-spectrogram from `FastSpeech2`.

The length of mel-spectrograms should align with the length of wavs, so we should generate mels using ground truth alignment.

But since we are fine-tuning, we should use the statistics computed during the training step.

You should first download pretrained `FastSpeech2` model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip) and `unzip` it.

Assume the path to the dump-dir of training step is `dump`.
Assume the path to the duration result of CSMSC is `durations.txt` (generated during the training step's preprocessing).
Assume the path to the pretrained `FastSpeech2` model is `fastspeech2_nosil_baker_ckpt_0.4`.
\
The `finetune.sh` can
1. **source path**.
2. generate ground truth alignment mels.
3. link `*_wave.npy` from `dump` to `dump_finetune` (because we only use new mels, the wavs are the ones used during the training step).
4. copy features' stats from `dump` to `dump_finetune`.
5. normalize the ground truth alignment mels.
6. finetune the model.

Before finetune, make sure that the pretrained model is in `finetune.sh` 's `${output-dir}/checkpoints`, and there is a `records.jsonl` in it to refer to this pretrained model
```text
exp/finetune/checkpoints
├── records.jsonl
└── snapshot_iter_1000000.pdz
```
The content of `records.jsonl` should be as follows (change `"path"` to your ckpt path):
```
{"time": "2021-11-21 15:11:20.337311", "path": "~/PaddleSpeech/examples/csmsc/voc3/exp/finetune/checkpoints/snapshot_iter_1000000.pdz", "iteration": 1000000}
```
Run the command below 
```bash
./finetune.sh
```
By default, `finetune.sh` will use `conf/finetune.yaml` as config, the dump-dir is `dump_finetune`, the experiment dir is `exp/finetune`.

TODO: 
The hyperparameter of `finetune.yaml` is not good enough, a smaller `learning_rate` should be used (more `milestones` should be set).

## Pretrained Models
The pretrained model can be downloaded here:
- [mb_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip)
- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)

The finetuned model can be downloaded here:
- [mb_melgan_baker_finetune_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_baker_finetune_ckpt_0.5.zip)

The static model can be downloaded here:
- [mb_melgan_csmsc_static_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_0.1.1.zip)
- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)

The PIR static model can be downloaded here:
- [mb_melgan_csmsc_static_pir_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_pir_0.1.1.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)

The ONNX model can be downloaded here:
- [mb_melgan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip)
- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)

The Paddle-Lite model can be downloaded here:
- [mb_melgan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_pdlite_1.3.0.zip)
- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)

Model | Step | eval/generator_loss | eval/log_stft_magnitude_loss|eval/spectral_convergence_loss |eval/sub_log_stft_magnitude_loss|eval/sub_spectral_convergence_loss
:-------------:| :------------:| :-----: | :-----: | :--------:| :--------:| :--------:
default| 1(gpu) x 1000000| 2.4851|0.71778 |0.2761 |0.66334 |0.2777|
finetune| 1(gpu) x 1000000|3.196967|0.977804| 0.778484| 0.889576 |0.776756 |

Multi Band MelGAN checkpoint contains files listed below.

```text
mb_melgan_csmsc_ckpt_0.1.1
├── default.yaml                  # default config used to train multi band melgan
├── feats_stats.npy               # statistics used to normalize spectrogram when training multi band melgan
└── snapshot_iter_1000000.pdz     # generator parameters of multi band melgan
```

FastSpeech2 checkpoint contains files listed below.

```text
fastspeech2_nosil_baker_ckpt_0.4
├── default.yaml            # default config used to train fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── snapshot_iter_76000.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```

## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/csmsc/voc3/conf/default.yaml
================================================
# This is the hyperparameter configuration file for MelGAN.
# Please make sure this is adjusted for the CSMSC dataset. If you want to
# apply to the other dataset, you might need to carefully change some parameters.
# This configuration requires ~ 8GB memory and will finish within 7 days on Titan V.

# This configuration is based on full-band MelGAN but the hop size and sampling
# rate is different from the paper (16kHz vs 24kHz). The number of iterations
# is not shown in the paper so currently we train 1M iterations (not sure enough
# to converge).

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 80               # Number of input channels.
    out_channels: 4               # Number of output channels.
    kernel_size: 7                # Kernel size of initial and final conv layers.
    channels: 384                 # Initial number of channels for conv layers.
    upsample_scales: [5, 5, 3]    # List of Upsampling scales. prod(upsample_scales) x out_channels == n_shift
    stack_kernel_size: 3          # Kernel size of dilated conv layers in residual stack.
    stacks: 4                     # Number of stacks in a single residual stack module.
    use_weight_norm: True         # Whether to use weight normalization.
    use_causal_conv: False        # Whether to use causal convolution.
    use_final_nonlinear_activation: True


###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    in_channels: 1                    # Number of input channels.
    out_channels: 1                   # Number of output channels.
    scales: 3                         # Number of multi-scales.
    downsample_pooling: "AvgPool1D"   # Pooling type for the input downsampling.
    downsample_pooling_params:        # Parameters of the above pooling function.
        kernel_size: 4
        stride: 2
        padding: 1
        exclusive: True
    kernel_sizes: [5, 3]              # List of kernel size.
    channels: 16                      # Number of channels of the initial conv layer.
    max_downsample_channels: 512      # Maximum number of channels of downsampling layers.
    downsample_scales: [4, 4, 4]      # List of downsampling scales.
    nonlinear_activation: "leakyrelu" # Nonlinear activation function.
    nonlinear_activation_params:      # Parameters of nonlinear activation function.
        negative_slope: 0.2
    use_weight_norm: True             # Whether to use weight norm.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: True
stft_loss_params:
    fft_sizes: [1024, 2048, 512]  # List of FFT size for STFT-based loss.
    hop_sizes: [120, 240, 50]     # List of hop size for STFT-based loss.
    win_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
    window: "hann"                # Window function for STFT-based loss
use_subband_stft_loss: True
subband_stft_loss_params:
    fft_sizes: [384, 683, 171]  # List of FFT size for STFT-based loss.
    hop_sizes: [30, 60, 10]     # List of hop size for STFT-based loss
    win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
    window: "hann"              # Window function for STFT-based loss

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
use_feat_match_loss: False # Whether to use feature matching loss.
lambda_adv: 2.5            # Loss balancing coefficient for adversarial loss.

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 64             # Batch size.
batch_max_steps: 16200     # Length of each audio in batch. Make sure dividable by n_shift.
num_workers: 2             # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    epsilon: 1.0e-7                     # Generator's epsilon.
    weight_decay: 0.0                   # Generator's weight decay coefficient.

generator_grad_norm: -1                 # Generator's gradient norm.
generator_scheduler_params:
    learning_rate: 1.0e-3               # Generator's learning rate.
    gamma: 0.5                          # Generator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 100000
        - 200000
        - 300000
        - 400000
        - 500000
        - 600000
discriminator_optimizer_params:
    epsilon: 1.0e-7                         # Discriminator's epsilon.
    weight_decay: 0.0                       # Discriminator's weight decay coefficient.
  
discriminator_grad_norm: -1                 # Discriminator's gradient norm.
discriminator_scheduler_params:
    learning_rate: 1.0e-3                   # Discriminator's learning rate.
    gamma: 0.5                              # Discriminator's scheduler gamma.
    milestones:                             # At each milestone, lr will be multiplied by gamma.
        - 100000
        - 200000
        - 300000
        - 400000
        - 500000
        - 600000

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
discriminator_train_start_steps: 200000 # Number of steps to start to train discriminator.
train_max_steps: 1000000                # Number of training steps.
save_interval_steps: 5000               # Interval steps to save checkpoint.
eval_interval_steps: 1000               # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/csmsc/voc3/conf/finetune.yaml
================================================
# This is the hyperparameter configuration file for MelGAN.
# Please make sure this is adjusted for the CSMSC dataset. If you want to
# apply to the other dataset, you might need to carefully change some parameters.
# This configuration requires ~ 8GB memory and will finish within 7 days on Titan V.

# This configuration is based on full-band MelGAN but the hop size and sampling
# rate is different from the paper (16kHz vs 24kHz). The number of iterations
# is not shown in the paper so currently we train 1M iterations (not sure enough
# to converge). 

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 80               # Number of input channels.
    out_channels: 4               # Number of output channels.
    kernel_size: 7                # Kernel size of initial and final conv layers.
    channels: 384                 # Initial number of channels for conv layers.
    upsample_scales: [5, 5, 3]    # List of Upsampling scales. prod(upsample_scales) x out_channels == n_shift
    stack_kernel_size: 3          # Kernel size of dilated conv layers in residual stack.
    stacks: 4                     # Number of stacks in a single residual stack module.
    use_weight_norm: True         # Whether to use weight normalization.
    use_causal_conv: False        # Whether to use causal convolution.
    use_final_nonlinear_activation: True


###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    in_channels: 1                    # Number of input channels.
    out_channels: 1                   # Number of output channels.
    scales: 3                         # Number of multi-scales.
    downsample_pooling: "AvgPool1D"   # Pooling type for the input downsampling.
    downsample_pooling_params:        # Parameters of the above pooling function.
        kernel_size: 4
        stride: 2
        padding: 1
        exclusive: True
    kernel_sizes: [5, 3]              # List of kernel size.
    channels: 16                      # Number of channels of the initial conv layer.
    max_downsample_channels: 512      # Maximum number of channels of downsampling layers.
    downsample_scales: [4, 4, 4]      # List of downsampling scales.
    nonlinear_activation: "leakyrelu" # Nonlinear activation function.
    nonlinear_activation_params:      # Parameters of nonlinear activation function.
        negative_slope: 0.2
    use_weight_norm: True             # Whether to use weight norm.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: True
stft_loss_params:
    fft_sizes: [1024, 2048, 512]  # List of FFT size for STFT-based loss.
    hop_sizes: [120, 240, 50]     # List of hop size for STFT-based loss
    win_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
    window: "hann"                # Window function for STFT-based loss
use_subband_stft_loss: True
subband_stft_loss_params:
    fft_sizes: [384, 683, 171]  # List of FFT size for STFT-based loss.
    hop_sizes: [30, 60, 10]     # List of hop size for STFT-based loss.
    win_lengths: [150, 300, 60] # List of window length for STFT-based loss.
    window: "hann"              # Window function for STFT-based loss

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
use_feat_match_loss: False # Whether to use feature matching loss.
lambda_adv: 2.5            # Loss balancing coefficient for adversarial loss.

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 64             # Batch size.
batch_max_steps: 16200     # Length of each audio in batch. Make sure dividable by n_shift.
num_workers: 2             # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    epsilon: 1.0e-7                     # Generator's epsilon.
    weight_decay: 0.0                   # Generator's weight decay coefficient.

generator_grad_norm: -1                 # Generator's gradient norm.
generator_scheduler_params:
    learning_rate: 1.0e-3               # Generator's learning rate.
    gamma: 0.5                          # Generator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 100000
        - 200000
        - 300000
        - 400000
        - 500000
        - 600000
discriminator_optimizer_params:
    epsilon: 1.0e-7                         # Discriminator's epsilon.
    weight_decay: 0.0                       # Discriminator's weight decay coefficient.
  
discriminator_grad_norm: -1                 # Discriminator's gradient norm.
discriminator_scheduler_params:
    learning_rate: 1.0e-3                   # Discriminator's learning rate.
    gamma: 0.5                              # Discriminator's scheduler gamma.
    milestones:                             # At each milestone, lr will be multiplied by gamma.
        - 100000
        - 200000
        - 300000
        - 400000
        - 500000
        - 600000

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
discriminator_train_start_steps: 200000 # Number of steps to start to train discriminator.
train_max_steps: 2000000                # Number of training steps.
save_interval_steps: 1000               # Interval steps to save checkpoint.
eval_interval_steps: 1000               # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/csmsc/voc3/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --test-metadata=dump/test/norm/metadata.jsonl \
    --output-dir=${train_output_path}/test \
    --generator-type=mb_melgan


================================================
FILE: examples/csmsc/voc3/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../../synthesize_e2e.py \
    --am=fastspeech2_csmsc \
    --am_config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
    --am_ckpt=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
    --am_stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy \
    --voc=mb_melgan_csmsc \
    --voc_config=${config_path} \
    --voc_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --voc_stat=dump/train/feats_stats.npy \
    --lang=zh \
    --text=${BIN_DIR}/../../assets/sentences.txt \
    --output_dir=${train_output_path}/test_e2e \
    --phones_dict=dump/phone_id_map.txt \
    --inference_dir=${train_output_path}/inference

================================================
FILE: examples/csmsc/voc3/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=multi_band_melgan
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/gan_vocoder/${MODEL}

================================================
FILE: examples/csmsc/voc3/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_50000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is MultiBand MelGAN by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

# PTQ_static
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/PTQ_static.sh  ${train_output_path} mb_melgan_csmsc || exit -1
fi


================================================
FILE: examples/csmsc/voc4/README.md
================================================
# Style MelGAN with CSMSC
This example contains code used to train a [Style MelGAN](https://arxiv.org/abs/2011.01557) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).
## Dataset
### Download and Extract
Download CSMSC from it's [official website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU]

Train a Style MelGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       Style MelGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` style melgan config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Models
The pretrained model can be downloaded here:
- [style_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip)

The static model of Style MelGAN is not available now.

Style MelGAN checkpoint contains files listed below.

```text
hifigan_csmsc_ckpt_0.1.1
├── default.yaml                    # default config used to train style melgan
├── feats_stats.npy                  # statistics used to normalize spectrogram when training style melgan 
└── snapshot_iter_1500000.pdz     # generator parameters of style melgan
```

## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/csmsc/voc4/conf/default.yaml
================================================
# This is the configuration file for CSMSC dataset.This configuration is based 
# on StyleMelGAN paper but uses MSE loss instead of Hinge loss. And I found that
# batch_size = 8 is also working good. So maybe if you want to accelerate the training, 
# you can reduce the batch size (e.g. 8 or 16). Upsampling scales is modified to 
# fit the shift size 300 pt.
# NOTE: batch_max_steps(24000) == prod(noise_upsample_scales)(80) * prod(upsample_scales)(300)

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 128              # Number of input channels.
    aux_channels: 80
    channels: 64                  # Initial number of channels for conv layers.
    out_channels: 1               # Number of output channels.
    kernel_size: 9                # Kernel size of initial and final conv layers.
    dilation: 2
    bias: True
    noise_upsample_scales: [10, 2, 2, 2]
    noise_upsample_activation: "leakyrelu"
    noise_upsample_activation_params:
        negative_slope: 0.2
    upsample_scales: [5, 1, 5, 1, 3, 1, 2, 2, 1] # List of Upsampling scales. prod(upsample_scales) == n_shift
    upsample_mode: "nearest"
    gated_function: "softmax"
    use_weight_norm: True                        # Whether to use weight normalization.

###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    repeats: 4
    window_sizes: [512, 1024, 2048, 4096]
    pqmf_params:
        - [1, None, None, None]
        - [2, 62, 0.26700, 9.0]
        - [4, 62, 0.14200, 9.0]
        - [8, 62, 0.07949, 9.0]
    discriminator_params:
        out_channels: 1               # Number of output channels.
        kernel_sizes: [5, 3]          # List of kernel size.
        channels: 16                  # Number of channels of the initial conv layer.
        max_downsample_channels: 512  # Maximum number of channels of downsampling layers.
        bias: True
        downsample_scales: [4, 4, 4, 1]   # List of downsampling scales.
        nonlinear_activation: "leakyrelu" # Nonlinear activation function.
        nonlinear_activation_params:      # Parameters of nonlinear activation function.
            negative_slope: 0.2
    use_weight_norm: True                 # Whether to use weight norm.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: True
stft_loss_params:
    fft_sizes: [1024, 2048, 512]  # List of FFT size for STFT-based loss.
    hop_sizes: [120, 240, 50]     # List of hop size for STFT-based loss
    win_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
    window: "hann"                # Window function for STFT-based loss
lambda_aux: 1.0                   # Loss balancing coefficient for aux loss.

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_adv: 1.0 # Loss balancing coefficient for adv loss.
generator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
discriminator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 32              # Batch size.
# batch_max_steps(24000) == prod(noise_upsample_scales)(80) * prod(upsample_scales)(300, n_shift)
batch_max_steps: 24000      # Length of each audio in batch. Make sure dividable by n_shift.
num_workers: 2              # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                       # Generator's weight decay coefficient.
generator_scheduler_params:  
    learning_rate: 1.0e-4                   # Generator's learning rate.
    gamma: 0.5                              # Generator's scheduler gamma.
    milestones:                             # At each milestone, lr will be multiplied by gamma.
        - 100000
        - 300000
        - 500000
        - 700000
        - 900000
generator_grad_norm: -1                     # Generator's gradient norm.
discriminator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                       # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 2.0e-4                   # Discriminator's learning rate.
    gamma: 0.5                              # Discriminator's scheduler gamma.
    milestones:                             # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000
discriminator_grad_norm: -1                 # Discriminator's gradient norm.

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
discriminator_train_start_steps: 100000 # Number of steps to start to train discriminator.
train_max_steps: 1500000                # Number of training steps.
save_interval_steps: 5000               # Interval steps to save checkpoint.
eval_interval_steps: 1000               # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/csmsc/voc4/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --test-metadata=dump/test/norm/metadata.jsonl \
    --output-dir=${train_output_path}/test \
    --generator-type=style_melgan


================================================
FILE: examples/csmsc/voc4/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=style_melgan
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/gan_vocoder/${MODEL}

================================================
FILE: examples/csmsc/voc4/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_50000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/csmsc/voc5/README.md
================================================
# HiFiGAN with CSMSC
This example contains code used to train a [HiFiGAN](https://arxiv.org/abs/2010.05646) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).
## Dataset
### Download and Extract
Download CSMSC from it's [official website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

The structure of the folder is listed below.

```text
└─ Wave
    └─ .wav files (audio speech)
└─ PhoneLabeling
    └─ .interval files (alignment between phoneme and duration)
└─ ProsodyLabeling
   └─ 000001-010000.txt (text with prosodic by pinyin)
```

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut silence at the edge of audio.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU]

Train a HiFiGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       HiFiGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
We use [HiFiGAN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc5) as the neural vocoder.

Download pretrained HiFiGAN model from [hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip) and unzip it.
```bash
unzip hifigan_csmsc_ckpt_0.1.1.zip
```
HiFiGAN checkpoint contains files listed below.
```text
hifigan_csmsc_ckpt_0.1.1
├── default.yaml                    # default config used to train HiFiGAN
├── feats_stats.npy                 # statistics used to normalize spectrogram when training HiFiGAN
└── snapshot_iter_2500000.pdz       # generator parameters of HiFiGAN
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

We use [Fastspeech2](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3) as the acoustic model.
Download pretrained fastspeech2_nosil model from [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)and unzip it.
```bash
unzip fastspeech2_nosil_baker_ckpt_0.4.zip
```
Fastspeech2 checkpoint contains files listed below.
```text
fastspeech2_nosil_baker_ckpt_0.4
├── default.yaml            # default config used to train fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── snapshot_iter_76000.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```

`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]
Synthesize with acoustic model & vocoder
optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat` and `--phones_dict` are arguments for acoustic model, which correspond to the 4 files in the fastspeech2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Models
The pretrained model can be downloaded here:
- [hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip)
- [fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)

The static model can be downloaded here:
- [hifigan_csmsc_static_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip)
- [fastspeech2_nosil_baker_static_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip)

The PIR static model can be downloaded here:
- [hifigan_csmsc_static_pir_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_pir_0.1.1.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)

The ONNX model can be downloaded here:
- [hifigan_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_onnx_0.2.0.zip)
- [fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)

The Paddle-Lite model can be downloaded here:
- [hifigan_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_pdlite_1.3.0.zip)
- [fastspeech2_csmsc_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_pdlite_1.3.0.zip)

Model | Step | eval/generator_loss | eval/mel_loss| eval/feature_matching_loss
:-------------:| :------------:| :-----: | :-----: | :--------:
default| 1(gpu) x 2500000|24.927|0.1262|7.554

HiFiGAN checkpoint contains files listed below.

```text
hifigan_csmsc_ckpt_0.1.1
├── default.yaml                  # default config used to train hifigan
├── feats_stats.npy               # statistics used to normalize spectrogram when training hifigan
└── snapshot_iter_2500000.pdz     # generator parameters of hifigan
```

FastSpeech2 checkpoint contains files listed below.

```text
fastspeech2_nosil_baker_ckpt_0.4
├── default.yaml            # default config used to train fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── snapshot_iter_76000.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```

## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/csmsc/voc5/conf/default.yaml
================================================
# This is the configuration file for CSMSC dataset.
# This configuration is based on HiFiGAN V1, which is an official configuration. 
# But I found that the optimizer setting does not work well with my implementation.
# So I changed optimizer settings as follows:
# - AdamW -> Adam
# - betas: [0.8, 0.99] -> betas: [0.5, 0.9]
# - Scheduler: ExponentialLR -> MultiStepLR
# To match the shift size difference, the upsample scales is also modified from the original 256 shift setting.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 80                       # Number of input channels.
    out_channels: 1                       # Number of output channels.
    channels: 512                         # Number of initial channels.
    kernel_size: 7                        # Kernel size of initial and final conv layers.
    upsample_scales: [5, 5, 4, 3]         # Upsampling scales.
    upsample_kernel_sizes: [10, 10, 8, 6] # Kernel size for upsampling layers.
    resblock_kernel_sizes: [3, 7, 11]     # Kernel size for residual blocks.
    resblock_dilations:                   # Dilations for residual blocks.
        - [1, 3, 5]
        - [1, 3, 5]
        - [1, 3, 5]
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.


###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    scales: 3                              # Number of multi-scale discriminator.
    scale_downsample_pooling: "AvgPool1D"  # Pooling operation for scale discriminator.
    scale_downsample_pooling_params:
        kernel_size: 4                     # Pooling kernel size.
        stride: 2                          # Pooling stride.
        padding: 2                         # Padding size.
    scale_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [15, 41, 5, 3]       # List of kernel sizes.
        channels: 128                      # Initial number of channels.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        max_groups: 16                     # Maximum number of groups in downsampling conv layers.
        bias: True
        downsample_scales: [4, 4, 4, 4, 1] # Downsampling scales.
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:
            negative_slope: 0.1
    follow_official_norm: True             # Whether to follow the official norm setting.
    periods: [2, 3, 5, 7, 11]              # List of period for multi-period discriminator.
    period_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [5, 3]               # List of kernel sizes.
        channels: 32                       # Initial number of channels.
        downsample_scales: [3, 3, 3, 3, 1] # Downsampling scales.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: False                 # Whether to use multi-resolution STFT loss.
use_mel_loss: True                   # Whether to use Mel-spectrogram loss.
mel_loss_params:
    fs: 24000
    fft_size: 2048
    hop_size: 300
    win_length: 1200
    window: "hann"
    num_mels: 80
    fmin: 0
    fmax: 12000
    log_base: null
generator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
discriminator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
use_feat_match_loss: True
feat_match_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
    average_by_layers: False         # Whether to average loss by #layers in each discriminator.
    include_final_outputs: False     # Whether to include final outputs in feat match loss calculation.

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_aux: 45.0       # Loss balancing coefficient for STFT loss.
lambda_adv: 1.0        # Loss balancing coefficient for adversarial loss.
lambda_feat_match: 2.0 # Loss balancing coefficient for feat match loss..

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 16              # Batch size.
batch_max_steps: 8400       # Length of each audio in batch. Make sure dividable by hop_size.
num_workers: 2              # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 2.0e-4               # Generator's learning rate.
    gamma: 0.5                          # Generator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000
generator_grad_norm: -1                 # Generator's gradient norm.
discriminator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 2.0e-4               # Discriminator's learning rate.
    gamma: 0.5                          # Discriminator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000    
discriminator_grad_norm: -1             # Discriminator's gradient norm.            

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
generator_train_start_steps: 1     # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000           # Number of training steps.
save_interval_steps: 5000         # Interval steps to save checkpoint.
eval_interval_steps: 1000          # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/csmsc/voc5/conf/finetune.yaml
================================================
# This is the configuration file for CSMSC dataset.
# This configuration is based on HiFiGAN V1, which is an official configuration. 
# But I found that the optimizer setting does not work well with my implementation.
# So I changed optimizer settings as follows:
# - AdamW -> Adam
# - betas: [0.8, 0.99] -> betas: [0.5, 0.9]
# - Scheduler: ExponentialLR -> MultiStepLR
# To match the shift size difference, the upsample scales is also modified from the original 256 shift setting.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 80                       # Number of input channels.
    out_channels: 1                       # Number of output channels.
    channels: 512                         # Number of initial channels.
    kernel_size: 7                        # Kernel size of initial and final conv layers.
    upsample_scales: [5, 5, 4, 3]         # Upsampling scales.
    upsample_kernel_sizes: [10, 10, 8, 6] # Kernel size for upsampling layers.
    resblock_kernel_sizes: [3, 7, 11]     # Kernel size for residual blocks.
    resblock_dilations:                   # Dilations for residual blocks.
        - [1, 3, 5]
        - [1, 3, 5]
        - [1, 3, 5]
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.


###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    scales: 3                              # Number of multi-scale discriminator.
    scale_downsample_pooling: "AvgPool1D"  # Pooling operation for scale discriminator.
    scale_downsample_pooling_params:
        kernel_size: 4                     # Pooling kernel size.
        stride: 2                          # Pooling stride.
        padding: 2                         # Padding size.
    scale_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [15, 41, 5, 3]       # List of kernel sizes.
        channels: 128                      # Initial number of channels.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        max_groups: 16                     # Maximum number of groups in downsampling conv layers.
        bias: True
        downsample_scales: [4, 4, 4, 4, 1] # Downsampling scales.
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:
            negative_slope: 0.1
    follow_official_norm: True             # Whether to follow the official norm setting.
    periods: [2, 3, 5, 7, 11]              # List of period for multi-period discriminator.
    period_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [5, 3]               # List of kernel sizes.
        channels: 32                       # Initial number of channels.
        downsample_scales: [3, 3, 3, 3, 1] # Downsampling scales.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: False                 # Whether to use multi-resolution STFT loss.
use_mel_loss: True                   # Whether to use Mel-spectrogram loss.
mel_loss_params:
    fs: 24000
    fft_size: 2048
    hop_size: 300
    win_length: 1200
    window: "hann"
    num_mels: 80
    fmin: 0
    fmax: 12000
    log_base: null
generator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
discriminator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
use_feat_match_loss: True
feat_match_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
    average_by_layers: False         # Whether to average loss by #layers in each discriminator.
    include_final_outputs: False     # Whether to include final outputs in feat match loss calculation.

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_aux: 45.0       # Loss balancing coefficient for STFT loss.
lambda_adv: 1.0        # Loss balancing coefficient for adversarial loss.
lambda_feat_match: 2.0 # Loss balancing coefficient for feat match loss..

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 16              # Batch size.
batch_max_steps: 8400       # Length of each audio in batch. Make sure dividable by hop_size.
num_workers: 2              # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 2.0e-4               # Generator's learning rate.
    gamma: 0.5                          # Generator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000
generator_grad_norm: -1                 # Generator's gradient norm.
discriminator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 2.0e-4               # Discriminator's learning rate.
    gamma: 0.5                          # Discriminator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000    
discriminator_grad_norm: -1             # Discriminator's gradient norm.            

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
generator_train_start_steps: 1     # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000           # Number of training steps.
save_interval_steps: 10000         # Interval steps to save checkpoint.
eval_interval_steps: 1000          # Interval steps to evaluate the network.
log_interval_steps: 100            # Interval steps to record the training log.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/csmsc/voc5/conf/iSTFT.yaml
================================================
# This is the configuration file for CSMSC dataset.
# This configuration is based on HiFiGAN V1, which is an official configuration. 
# But I found that the optimizer setting does not work well with my implementation.
# So I changed optimizer settings as follows:
# - AdamW -> Adam
# - betas: [0.8, 0.99] -> betas: [0.5, 0.9]
# - Scheduler: ExponentialLR -> MultiStepLR
# To match the shift size difference, the upsample scales is also modified from the original 256 shift setting.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    use_istft: True                       # Use iSTFTNet.
    istft_layer_id: 2                     # Use istft after istft_layer_id layers of upsample layer if use_istft=True.
    n_fft: 2048                           # FFT size (samples) in feature extraction.
    win_length: 1200                      # Window length (samples) in feature extraction.
    in_channels: 80                       # Number of input channels.
    out_channels: 1                       # Number of output channels.
    channels: 512                         # Number of initial channels.
    kernel_size: 7                        # Kernel size of initial and final conv layers.
    upsample_scales:  [5, 5, 4, 3]        # Upsampling scales.
    upsample_kernel_sizes: [10, 10, 8, 6] # Kernel size for upsampling layers.
    resblock_kernel_sizes: [3, 7, 11]     # Kernel size for residual blocks.
    resblock_dilations:                   # Dilations for residual blocks.
        - [1, 3, 5]
        - [1, 3, 5]
        - [1, 3, 5]
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.


###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    scales: 3                              # Number of multi-scale discriminator.
    scale_downsample_pooling: "AvgPool1D"  # Pooling operation for scale discriminator.
    scale_downsample_pooling_params:
        kernel_size: 4                     # Pooling kernel size.
        stride: 2                          # Pooling stride.
        padding: 2                         # Padding size.
    scale_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [15, 41, 5, 3]       # List of kernel sizes.
        channels: 128                      # Initial number of channels.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        max_groups: 16                     # Maximum number of groups in downsampling conv layers.
        bias: True
        downsample_scales: [4, 4, 4, 4, 1] # Downsampling scales.
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:
            negative_slope: 0.1
    follow_official_norm: True             # Whether to follow the official norm setting.
    periods: [2, 3, 5, 7, 11]              # List of period for multi-period discriminator.
    period_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [5, 3]               # List of kernel sizes.
        channels: 32                       # Initial number of channels.
        downsample_scales: [3, 3, 3, 3, 1] # Downsampling scales.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: False                 # Whether to use multi-resolution STFT loss.
use_mel_loss: True                   # Whether to use Mel-spectrogram loss.
mel_loss_params:
    fs: 24000
    fft_size: 2048
    hop_size: 300
    win_length: 1200
    window: "hann"
    num_mels: 80
    fmin: 0
    fmax: 12000
    log_base: null
generator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
discriminator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
use_feat_match_loss: True
feat_match_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
    average_by_layers: False         # Whether to average loss by #layers in each discriminator.
    include_final_outputs: False     # Whether to include final outputs in feat match loss calculation.

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_aux: 45.0       # Loss balancing coefficient for STFT loss.
lambda_adv: 1.0        # Loss balancing coefficient for adversarial loss.
lambda_feat_match: 2.0 # Loss balancing coefficient for feat match loss..

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 16              # Batch size.
batch_max_steps: 8400       # Length of each audio in batch. Make sure dividable by hop_size.
num_workers: 2              # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 2.0e-4               # Generator's learning rate.
    gamma: 0.5                          # Generator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000
generator_grad_norm: -1                 # Generator's gradient norm.
discriminator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 2.0e-4               # Discriminator's learning rate.
    gamma: 0.5                          # Discriminator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000    
discriminator_grad_norm: -1             # Discriminator's gradient norm.            

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
generator_train_start_steps: 1     # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000          # Number of training steps.
save_interval_steps: 5000         # Interval steps to save checkpoint.
eval_interval_steps: 1000          # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/csmsc/voc5/finetune.sh
================================================
#!/bin/bash

source path.sh

gpus=0
stage=0
stop_stage=100

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${MAIN_ROOT}/paddlespeech/t2s/exps/fastspeech2/gen_gta_mel.py \
        --fastspeech2-config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
        --fastspeech2-checkpoint=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
        --fastspeech2-stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy \
        --dur-file=durations.txt \
        --output-dir=dump_finetune \
        --phones-dict=fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt \
        --dataset=baker \
        --rootdir=~/datasets/BZNSYP/
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${MAIN_ROOT}/utils/link_wav.py \
        --old-dump-dir=dump \
        --dump-dir=dump_finetune
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    cp dump/train/feats_stats.npy dump_finetune/train/
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize, dev and test should use train's stats
    echo "Normalize ..."
   
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump_finetune/train/raw/metadata.jsonl \
        --dumpdir=dump_finetune/train/norm \
        --stats=dump_finetune/train/feats_stats.npy \
        --skip-wav-copy
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump_finetune/dev/raw/metadata.jsonl \
        --dumpdir=dump_finetune/dev/norm \
        --stats=dump_finetune/train/feats_stats.npy \
        --skip-wav-copy
    
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump_finetune/test/raw/metadata.jsonl \
        --dumpdir=dump_finetune/test/norm \
        --stats=dump_finetune/train/feats_stats.npy \
        --skip-wav-copy
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} \
    FLAGS_cudnn_exhaustive_search=true \
    FLAGS_conv_workspace_size_limit=4000 \
    python ${BIN_DIR}/train.py \
        --train-metadata=dump_finetune/train/norm/metadata.jsonl \
        --dev-metadata=dump_finetune/dev/norm/metadata.jsonl \
        --config=conf/finetune.yaml \
        --output-dir=exp/finetune \
        --ngpu=1
fi 

================================================
FILE: examples/csmsc/voc5/iSTFTNet.md
================================================
# iSTFTNet with CSMSC

This example contains code used to train a [iSTFTNet](https://arxiv.org/abs/2203.02395) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).

## Dataset
### Download and Extract
Download CSMSC from it's [official website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

The structure of the folder is listed below.

```text
└─ Wave
    └─ .wav files (audio speech)
└─ PhoneLabeling
    └─ .interval files (alignment between phoneme and duration)
└─ ProsodyLabeling
   └─ 000001-010000.txt (text with prosodic by pinyin)
```

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut silence at the edge of audio.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU]

Train a HiFiGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       HiFiGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/iSTFT.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Models

The pretrained model can be downloaded here:

- [iSTFTNet_csmsc_ckpt.zip](https://pan.baidu.com/s/1SNDlRWOGOcbbrKf5w-TJaA?pwd=r1e5)

iSTFTNet checkpoint contains files listed below.

```text
iSTFTNet_csmsc_ckpt
├── iSTFT.yaml                    # config used to train iSTFTNet
├── feats_stats.npy               # statistics used to normalize spectrogram when training hifigan
└── snapshot_iter_50000.pdz       # generator parameters of hifigan
```

A Comparison between iSTFTNet and Hifigan
|  Model   |      Step      | eval/generator_loss | eval/mel_loss | eval/feature_matching_loss |  rtf   |
|:--------:|:--------------:|:-------------------:|:-------------:|:--------------------------:| :---: |
| hifigan  | 1(gpu) x 50000 |       13.989        |    0.14683    |           1.3484           |  0.01767   |
| istftNet | 1(gpu) x 50000 |       13.319        |    0.14818    |           1.1069           |  0.01069   |

> Rtf is tested on the CSMSC test dataset, and the test environment is aistudio v100 16G 1GPU, the test command is `./run.sh --stage 2 --stop-stage 2`

The pretained hifigan model int the comparison  can be downloaded here:

- [hifigan_csmsc_ckpt.zip](https://pan.baidu.com/s/1pGY6RYV7yEB_5hRI_JoWig?pwd=tcaj)

## Acknowledgement

We adapted some code from https://github.com/rishikksh20/iSTFTNet-pytorch.git.


================================================
FILE: examples/csmsc/voc5/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --test-metadata=dump/test/norm/metadata.jsonl \
    --output-dir=${train_output_path}/test \
    --generator-type=hifigan


================================================
FILE: examples/csmsc/voc5/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../../synthesize_e2e.py \
    --am=fastspeech2_csmsc \
    --am_config=fastspeech2_nosil_baker_ckpt_0.4/default.yaml \
    --am_ckpt=fastspeech2_nosil_baker_ckpt_0.4/snapshot_iter_76000.pdz \
    --am_stat=fastspeech2_nosil_baker_ckpt_0.4/speech_stats.npy \
    --voc=hifigan_csmsc \
    --voc_config=${config_path} \
    --voc_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --voc_stat=dump/train/feats_stats.npy \
    --lang=zh \
    --text=${BIN_DIR}/../../assets/sentences.txt \
    --output_dir=${train_output_path}/test_e2e \
    --phones_dict=dump/phone_id_map.txt \
    --inference_dir=${train_output_path}/inference

================================================
FILE: examples/csmsc/voc5/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=hifigan
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/gan_vocoder/${MODEL}

================================================
FILE: examples/csmsc/voc5/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_50000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is hifigan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

# PTQ_static
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/PTQ_static.sh  ${train_output_path} hifigan_csmsc || exit -1
fi

================================================
FILE: examples/csmsc/voc6/README.md
================================================
# WaveRNN with CSMSC
This example contains code used to train a [WaveRNN](https://arxiv.org/abs/1802.08435) model with [Chinese Standard Mandarin Speech Copus](https://www.data-baker.com/open_source.html).
## Dataset
### Download and Extract
Download CSMSC from it's [official website](https://test.data-baker.com/data/index/TNtts/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/BZNSYP`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut silence at the edge of audio.
You can download from here [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz), or train your MFA model reference to  [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/BZNSYP`.
Assume the path to the MFA result of CSMSC is `./baker_alignment_tone`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU]

Train a WaveRNN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       WaveRNN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--config CONFIG] [--checkpoint CHECKPOINT]
                     [--test-metadata TEST_METADATA] [--output-dir OUTPUT_DIR]
                     [--ngpu NGPU]

Synthesize with WaveRNN.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       Vocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` wavernn config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Models
The pretrained model can be downloaded here:
- [wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)

The static model can be downloaded here:
- [wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)
- [wavernn_csmsc_static_1.0.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_1.0.0.zip) (fix bug for paddle 2.3)

Model | Step | eval/loss
:-------------:|:------------:| :------------:
default| 1(gpu) x 400000|2.602768

WaveRNN checkpoint contains files listed below.

```text
wavernn_csmsc_ckpt_0.2.0
├── default.yaml                   # default config used to train wavernn
├── feats_stats.npy                # statistics used to normalize spectrogram when training wavernn
└── snapshot_iter_400000.pdz       # parameters of wavernn
```


================================================
FILE: examples/csmsc/voc6/conf/default.yaml
================================================

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)
mu_law: True             # Recommended to suppress noise if using raw bitsexit()


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    rnn_dims: 512                     # Hidden dims of RNN Layers.
    fc_dims: 512
    bits: 9                           # Bit depth of signal
    aux_context_window: 2             # Context window size for auxiliary feature.
                                      # If set to 2, previous 2 and future 2 frames will be considered.
    aux_channels: 80                  # Number of channels for auxiliary feature conv.
                                      # Must be the same as num_mels.
    upsample_scales: [4, 5, 3, 5]     # Upsampling scales. Prodcut of these must be the same as hop size, same with pwgan here
    compute_dims: 128                 # Dims of Conv1D in MelResNet.
    res_out_dims: 128                 # Dims of output in MelResNet.
    res_blocks: 10                    # Number of residual blocks.
    mode: RAW                         # either 'raw'(softmax on raw bits) or 'mold' (sample from mixture of logistics)
inference:
    gen_batched: True                 # whether to genenate sample in batch mode
    target: 12000                     # target number of samples to be generated in each batch entry
    overlap: 600                      # number of samples for crossfading between batches


###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 64              # Batch size.
batch_max_steps: 4500       # Length of each audio in batch. Make sure dividable by hop_size.
num_workers: 2              # Number of workers in DataLoader.

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
grad_clip: 4.0
learning_rate: 1.0e-4                


###########################################################
#                    INTERVAL SETTING                     #
###########################################################

train_max_steps: 400000               # Number of training steps.
save_interval_steps: 5000             # Interval steps to save checkpoint.
eval_interval_steps: 1000             # Interval steps to evaluate the network.
gen_eval_samples_interval_steps: 5000 # the iteration interval of generating valid samples
generate_num: 5                       # number of samples to generate at each checkpoint

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/csmsc/voc6/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./baker_alignment_tone \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/../gan_vocoder/preprocess.py \
        --rootdir=~/datasets/BZNSYP/ \
        --dataset=baker \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --cut-sil=True \
        --num-cpu=20
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize, dev and test should use train's stats
    echo "Normalize ..."
   
    python3 ${BIN_DIR}/../gan_vocoder/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy

    python3 ${BIN_DIR}/../gan_vocoder/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
    
    python3 ${BIN_DIR}/../gan_vocoder/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
fi


================================================
FILE: examples/csmsc/voc6/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/synthesize.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --test-metadata=dump/test/norm/metadata.jsonl \
    --output-dir=${train_output_path}/test


================================================
FILE: examples/csmsc/voc6/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=wavernn
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}

================================================
FILE: examples/csmsc/voc6/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
test_input=dump/dump_gta_test
ckpt_name=snapshot_iter_100000.pdz

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # prepare data
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/esc50/README.md
================================================
# 声音分类

声音分类和检测是声音算法的一个热门研究方向。  

对于声音分类任务，传统机器学习的一个常用做法是首先人工提取音频的时域和频域的多种特征并做特征选择、组合、变换等，然后基于SVM或决策树进行分类。而端到端的深度学习则通常利用深度网络如RNN，CNN等直接对声间波形(waveform)或时频特征(time-frequency)进行特征学习(representation learning)和分类预测。

在IEEE ICASSP 2017 大会上，谷歌开放了一个大规模的音频数据集[Audioset](https://research.google.com/audioset/)。该数据集包含了 632 类的音频类别以及 2,084,320 条人工标记的每段 **10 秒**长度的声音剪辑片段（来源于YouTube视频）。目前该数据集已经有 210万 个已标注的视频数据，5800 小时的音频数据，经过标记的声音样本的标签类别为 527。

`PANNs`([PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition](https://arxiv.org/pdf/1912.10211.pdf))是基于Audioset数据集训练的声音分类/识别的模型。经过预训练后，模型可以用于提取音频的embbedding。本示例将使用`PANNs`的预训练模型Finetune完成声音分类的任务。


## 模型简介

PaddleAudio提供了PANNs的CNN14、CNN10和CNN6的预训练模型，可供用户选择使用：
- CNN14: 该模型主要包含12个卷积层和2个全连接层，模型参数的数量为 79.6M，embbedding维度是 2048。
- CNN10: 该模型主要包含8个卷积层和2个全连接层，模型参数的数量为 4.9M，embbedding维度是 512。
- CNN6: 该模型主要包含4个卷积层和2个全连接层，模型参数的数量为 4.5M，embbedding维度是 512。


## 数据集

[ESC-50: Dataset for Environmental Sound Classification](https://github.com/karolpiczak/ESC-50) 是一个包含有 2000 个带标签的时长为 **5 秒**的环境声音样本，音频样本采样率为 44,100Hz 的单通道音频文件，所有样本根据标签被划分为 50 个类别，每个类别有 40 个样本。

## 模型指标

根据 `ESC-50` 提供的fold信息，对数据集进行 5-fold 的 fine-tune 训练和评估，平均准确率如下：

|Model|Acc|
|--|--|
|CNN14| 0.9500
|CNN10| 0.8975
|CNN6| 0.8825

## 快速开始

### 模型训练

运行下面的命令，可在训练集上进行模型的finetune，支持单机的单卡训练和多卡训练。

启动训练:
```shell
$ CUDA_VISIBLE_DEVICES=0 ./run.sh 1 conf/panns.yaml
```

训练的参数可在 `conf/panns.yaml` 的 `training` 中配置，其中：
- `epochs`: 训练轮次，默认为 50。
- `learning_rate`: Fine-tune的学习率；默认为5e-5。
- `batch_size`: 批处理大小，请结合显存情况进行调整，若出现显存不足，请适当调低这一参数；默认为 16。
- `num_workers`: Dataloader获取数据的子进程数。默认为0，加载数据的流程在主进程执行。
- `checkpoint_dir`: 模型参数文件和optimizer参数文件的保存目录，默认为`./checkpoint`。
- `save_freq`: 训练过程中的模型保存频率，默认为 10。
- `log_freq`: 训练过程中的信息打印频率，默认为 10。

示例代码中使用的预训练模型为`CNN14`，如果想更换为其他预训练模型，可通过修改 `conf/panns.yaml` 的 `model` 中配置：
```yaml
# CNN14
model:
  backbone: 'paddlespeech.cls.models:cnn14'
```
```yaml
# CNN10
model:
  backbone: 'paddlespeech.cls.models:cnn10'
```
```yaml
# CNN6
model:
  backbone: 'paddlespeech.cls.models:cnn6'
```

### 模型预测

```shell
$ CUDA_VISIBLE_DEVICES=0 ./run.sh 2 conf/panns.yaml
```

训练的参数可在 `conf/panns.yaml` 的 `predicting` 中配置，其中：
- `audio_file`: 指定预测的音频文件。
- `top_k`: 预测显示的top k标签的得分，默认为 1。
- `checkpoint`: 模型参数checkpoint文件。

输出的预测结果如下：
```
[/audio/dog.wav]
Dog: 0.9999538660049438
Clock tick: 1.3341237718123011e-05
Cat: 6.579841738130199e-06
```

### 模型部署

#### 1. 动转静

模型训练结束后，可以将已保存的动态图参数导出成静态图的模型和参数，然后实施静态图的部署。

```shell
$ CUDA_VISIBLE_DEVICES=0 ./run.sh 3 ./checkpoint/epoch_50/model.pdparams ./export
```

`paddlespeech/cls/exps/panns/export_model.py` 脚本中可支持配置的参数：
- `checkpoint`: 模型参数checkpoint文件。
- `output_dir`: 导出静态图模型和参数文件的保存目录。

导出的静态图模型和参数文件如下：
```sh
$ tree export
export
├── inference.pdiparams
├── inference.pdiparams.info
└── inference.pdmodel
```

#### 2. 模型部署和预测

`paddlespeech/cls/exps/panns/deploy/predict.py` 脚本使用了`paddle.inference`模块下的api，提供了python端部署的示例：

```shell
$ CUDA_VISIBLE_DEVICES=0 ./run.sh 4 cpu ./export /audio/dog.wav
```

`paddlespeech/cls/exps/panns/deploy/predict.py` 脚本中可支持配置的主要参数：
- `device`: 指定模型预测时使用的设备。
- `model_dir`: 导出静态图模型和参数文件的保存目录。
- `wav`: 指定预测的音频文件。

## Reference
* [PANNs(PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition)](https://arxiv.org/abs/1912.10211)


================================================
FILE: examples/esc50/RESULTS.md
================================================
## Metrics

5-fold cross validation accuracy on [ESC-50](https://github.com/karolpiczak/ESC-50) dataset:

|Model|Acc|
|--|--|
|CNN14| 0.9500
|CNN10| 0.8975
|CNN6| 0.8825


================================================
FILE: examples/esc50/cls0/conf/panns.yaml
================================================
data:
  dataset: 'paddle.audio.datasets:ESC50'
  num_classes: 50
  train:
    mode: 'train'
    split: 1
  dev:
    mode: 'dev'
    split: 1

model:
  backbone: 'paddlespeech.cls.models:cnn14'

feature:
  sr: 32000
  n_fft: 1024
  hop_length: 320
  window: 'hann'
  win_length: 1024
  f_min: 50.0
  f_max: 14000.0
  n_mels: 64

training:
  epochs: 50
  learning_rate: 0.00005
  num_workers: 2
  batch_size: 16
  checkpoint_dir: './checkpoint'
  save_freq: 10
  log_freq: 10

predicting:
  audio_file: '/audio/dog.wav'
  top_k: 10
  checkpoint: './checkpoint/epoch_50/model.pdparams'


================================================
FILE: examples/esc50/cls0/local/export.sh
================================================
#!/bin/bash

ckpt=$1
output_dir=$2

python3 ${BIN_DIR}/export_model.py \
--checkpoint ${ckpt} \
--output_dir ${output_dir}


================================================
FILE: examples/esc50/cls0/local/infer.sh
================================================
#!/bin/bash

python3 ${BIN_DIR}/predict.py \
--cfg_path=$1


================================================
FILE: examples/esc50/cls0/local/static_model_infer.sh
================================================
#!/bin/bash

device=$1
model_dir=$2
audio_file=$3

python3 ${BIN_DIR}/deploy/predict.py \
--device ${device} \
--model_dir ${model_dir} \
--wav ${audio_file} 


================================================
FILE: examples/esc50/cls0/local/train.sh
================================================
#!/bin/bash

ngpu=$1
cfg_path=$2

if [ ${ngpu} -gt 0 ]; then
    python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES ${BIN_DIR}/train.py \
    --cfg_path ${cfg_path}
else
    python3 ${BIN_DIR}/train.py \
    --cfg_path ${cfg_path}
fi


================================================
FILE: examples/esc50/cls0/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=panns
export BIN_DIR=${MAIN_ROOT}/paddlespeech/cls/exps/${MODEL}

================================================
FILE: examples/esc50/cls0/run.sh
================================================
#!/bin/bash
set -e
source path.sh

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')

stage=$1
stop_stage=100

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    cfg_path=$2
    ./local/train.sh ${ngpu} ${cfg_path} || exit -1
    exit 0
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    cfg_path=$2
    ./local/infer.sh ${cfg_path} || exit -1
    exit 0
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    ckpt=$2
    output_dir=$3
    ./local/export.sh ${ckpt} ${output_dir} || exit -1
    exit 0
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    infer_device=$2
    graph_dir=$3
    audio_file=$4
    ./local/static_model_infer.sh ${infer_device} ${graph_dir} ${audio_file} || exit -1
    exit 0
fi


================================================
FILE: examples/hey_snips/README.md
================================================

## Metrics

We mesure FRRs with fixing false alarms in one hour:
the release model: https://paddlespeech.cdn.bcebos.com/kws/heysnips/kws0_mdtc_heysnips_ckpt.tar.gz 
|Model|False Alarm| False Reject Rate|
|--|--|--|
|MDTC| 1| 0.003559 |


================================================
FILE: examples/hey_snips/kws0/README.md
================================================
# MDTC Keyword Spotting with HeySnips Dataset

## Dataset

Before running scripts, you **MUST** follow this instruction to download the dataset: https://github.com/sonos/keyword-spotting-research-datasets

After you download and decompress the dataset archive, you should **REPLACE** the value of `data_dir` in `conf/*.yaml` to complete dataset config.

## Get Started

In this section, we will train the [MDTC](https://arxiv.org/pdf/2102.13552.pdf) model and evaluate on "Hey Snips" dataset.

```sh
CUDA_VISIBLE_DEVICES=0,1 ./run.sh conf/mdtc.yaml
```

This script contains training and scoring steps. You can just set the `CUDA_VISIBLE_DEVICES` environment var to run on single gpu or multi-gpus.

The vars `stage` and `stop_stage` in `./run.sh` controls the running steps:
- stage 1: Training from scratch.
- stage 2: Evaluating model on test dataset and computing detection error tradeoff(DET) of all trigger thresholds.
- stage 3: Plotting the DET cruve for visualizaiton.


================================================
FILE: examples/hey_snips/kws0/conf/mdtc.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
dataset: 'paddleaudio.datasets:HeySnips'
data_dir: '../tests/hey_snips_research_6k_en_train_eval_clean_ter'

############################################
#           Network Architecture           #
############################################
backbone: 'paddlespeech.kws.models:MDTC'
num_keywords: 1
stack_num: 3
stack_size: 4
in_channels: 80
res_channels: 32
kernel_size: 5

###########################################
#                Feature                  #
###########################################
feat_type: 'kaldi_fbank'
sample_rate: 16000
frame_shift: 10
frame_length: 25
n_mels: 80

###########################################
#                Training                 #
###########################################
epochs: 100
num_workers: 16
batch_size: 100
checkpoint_dir: './checkpoint'
save_freq: 10
log_freq: 10
learning_rate: 0.001
weight_decay: 0.00005
grad_clip: 5.0

###########################################
#                Scoring                  #
###########################################
batch_size: 100
num_workers: 16
checkpoint: './checkpoint/epoch_100/model.pdparams'
score_file: './scores.txt'
stats_file: './stats.0.txt'
img_file: './det.png'


================================================
FILE: examples/hey_snips/kws0/local/plot.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if [ $# != 3 ];then
    echo "usage: ${0} config_path checkpoint output_file"
    exit -1
fi

keyword=$1
stats_file=$2
img_file=$3

python3 ${BIN_DIR}/plot_det_curve.py --keyword_label ${keyword} --stats_file ${stats_file} --img_file ${img_file}


================================================
FILE: examples/hey_snips/kws0/local/score.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if [ $# != 4 ];then
    echo "usage: ${0} checkpoint score_file stats_file"
    exit -1
fi

cfg_path=$1
ckpt=$2
score_file=$3
stats_file=$4

python3 ${BIN_DIR}/score.py --config ${cfg_path} --ckpt ${ckpt} --score_file ${score_file} || exit -1
python3 ${BIN_DIR}/compute_det.py --config ${cfg_path} --score_file ${score_file} --stats_file ${stats_file} || exit -1


================================================
FILE: examples/hey_snips/kws0/local/train.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if [ $# != 2 ];then
    echo "usage: ${0} num_gpus config_path"
    exit -1
fi

ngpu=$1
cfg_path=$2

if [ ${ngpu} -gt 0 ]; then
    python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES ${BIN_DIR}/train.py \
    --config ${cfg_path}
else
    echo "set CUDA_VISIBLE_DEVICES to enable multi-gpus trainning."
    python3 ${BIN_DIR}/train.py \
    --config ${cfg_path}
fi


================================================
FILE: examples/hey_snips/kws0/path.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

MODEL=mdtc
export BIN_DIR=${MAIN_ROOT}/paddlespeech/kws/exps/${MODEL}

================================================
FILE: examples/hey_snips/kws0/run.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
source path.sh

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')

if [ $# != 1 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path"
    exit -1
fi

stage=1
stop_stage=3

cfg_path=$1

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    ./local/train.sh ${ngpu} ${cfg_path} || exit -1
fi

ckpt=./checkpoint/epoch_100/model.pdparams
score_file=./scores.txt
stats_file=./stats.0.txt
img_file=./det.png

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    ./local/score.sh ${cfg_path} ${ckpt} ${score_file} ${stats_file} || exit -1
fi

keyword=HeySnips
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    ./local/plot.sh ${keyword} ${stats_file} ${img_file} || exit -1
fi

================================================
FILE: examples/iwslt2012/punc0/README.md
================================================
# Punctuation Restoration with IWLST2012-Zh

## Get Started
### Data Preprocessing
```bash
./run.sh --stage 0 --stop-stage 0
```
### Model Training
```bash
./run.sh --stage 1 --stop-stage 1
```
### Testing
```bash
./run.sh --stage 2 --stop-stage 2
```
### Punctuation Restoration
```bash
./run.sh --stage 3 --stop-stage 3
```
## Pretrained Model
The pretrained model can be downloaded here:

[ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/text/ernie_linear_p3_iwslt2012_zh_ckpt_0.1.1.zip)

[ernie-3.0-base.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-3.0-base.tar.gz)

[ernie-3.0-medium.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-3.0-medium.tar.gz)

[ernie-3.0-micro.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-3.0-micro.tar.gz)

[ernie-mini.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-mini.tar.gz)

[ernie-nano.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-nano.tar.gz)

[ernie-tiny.tar.gz](https://paddlespeech.cdn.bcebos.com/punc_restore/ernie-tiny.tar.gz)

### Test Result
- Ernie 1.0
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.510955  |0.526462  |0.820755  |0.619391|
    |Recall     |0.517433  |0.564179  |0.861386  |0.647666|
    |F1         |0.514173  |0.544669  |0.840580  |0.633141|
- Ernie-tiny
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.733177  |0.721448  |0.754717  |0.736447|
    |Recall     |0.380740  |0.524646  |0.733945  |0.546443|
    |F1         |0.501204  |0.607506  |0.744186  |0.617632|
- Ernie-3.0-base-zh
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.805947  |0.764160  |0.858491  |0.809532|
    |Recall     |0.399070  |0.567978  |0.850467  |0.605838|
    |F1         |0.533817  |0.651623  |0.854460  |0.679967|
- Ernie-3.0-medium-zh
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.730829  |0.699164  |0.707547  |0.712514|
    |Recall     |0.388196  |0.533286  |0.797872  |0.573118|
    |F1         |0.507058  |0.605062  |0.750000  |0.620707|
- Ernie-3.0-mini-zh
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.757433  |0.708449  |0.707547  |0.724477|
    |Recall     |0.355752  |0.506977  |0.735294  |0.532674|
    |F1         |0.484121  |0.591015  |0.721154  |0.598763|
- Ernie-3.0-micro-zh
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.733959  |0.679666  |0.726415  |0.713347|
    |Recall     |0.332742  |0.483487  |0.712963  |0.509731|
    |F1         |0.457896  |0.565033  |0.719626  |0.580852|
- Ernie-3.0-nano-zh
    |       |COMMA  |  PERIOD | QUESTION | OVERALL|
    |:-----:|:-----:|:-----:|:-----:|:-----:|  
    |Precision  |0.693271  |0.682451  |0.754717  |0.710146|
    |Recall     |0.327784  |0.491968  |0.666667  |0.495473|
    |F1         |0.445114  |0.571762  |0.707965  |0.574947|


================================================
FILE: examples/iwslt2012/punc0/RESULTS.md
================================================
# iwslt2012

## Ernie

|       |COMMA  |  PERIOD | QUESTION | OVERALL|
|:-----:|:-----:|:-----:|:-----:|:-----:|  
|Precision  |0.510955  |0.526462  |0.820755  |0.619391|
|Recall     |0.517433  |0.564179  |0.861386  |0.647666|
|F1         |0.514173  |0.544669  |0.840580  |0.633141|


================================================
FILE: examples/iwslt2012/punc0/conf/default.yaml
================================================
###########################################################
#                       DATA SETTING                      #
###########################################################
dataset_type: Ernie
train_path: data/iwslt2012_zh/train.txt
dev_path: data/iwslt2012_zh/dev.txt
test_path: data/iwslt2012_zh/test.txt
batch_size: 64
num_workers: 2
data_params: 
    pretrained_token: ernie-1.0
    punc_path: data/iwslt2012_zh/punc_vocab
    seq_len: 100


###########################################################
#                       MODEL SETTING                     #
###########################################################
model_type: ErnieLinear
model:
    pretrained_token: ernie-1.0
    num_classes: 4

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer_params:
    weight_decay: 1.0e-6               # weight decay coefficient.

scheduler_params:
    learning_rate: 1.0e-5               # learning rate.
    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 20
num_snapshots: 5

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/iwslt2012/punc0/conf/ernie-3.0-base.yaml
================================================
###########################################################
#                       DATA SETTING                      #
###########################################################
dataset_type: Ernie
train_path: data/iwslt2012_zh/train.txt
dev_path: data/iwslt2012_zh/dev.txt
test_path: data/iwslt2012_zh/test.txt
batch_size: 64
num_workers: 2
data_params: 
    pretrained_token: ernie-3.0-base-zh
    punc_path: data/iwslt2012_zh/punc_vocab
    seq_len: 100


###########################################################
#                       MODEL SETTING                     #
###########################################################
model_type: ErnieLinear
model:
    pretrained_token: ernie-3.0-base-zh
    num_classes: 4

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer_params:
    weight_decay: 1.0e-6               # weight decay coefficient.

scheduler_params:
    learning_rate: 1.0e-5               # learning rate.
    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 20
num_snapshots: 5

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/iwslt2012/punc0/conf/ernie-3.0-medium.yaml
================================================
###########################################################
#                       DATA SETTING                      #
###########################################################
dataset_type: Ernie
train_path: data/iwslt2012_zh/train.txt
dev_path: data/iwslt2012_zh/dev.txt
test_path: data/iwslt2012_zh/test.txt
batch_size: 64
num_workers: 2
data_params: 
    pretrained_token: ernie-3.0-medium-zh
    punc_path: data/iwslt2012_zh/punc_vocab
    seq_len: 100


###########################################################
#                       MODEL SETTING                     #
###########################################################
model_type: ErnieLinear
model:
    pretrained_token: ernie-3.0-medium-zh
    num_classes: 4

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer_params:
    weight_decay: 1.0e-6               # weight decay coefficient.

scheduler_params:
    learning_rate: 1.0e-5               # learning rate.
    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 20
num_snapshots: 5

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/iwslt2012/punc0/conf/ernie-3.0-mini.yaml
================================================
###########################################################
#                       DATA SETTING                      #
###########################################################
dataset_type: Ernie
train_path: data/iwslt2012_zh/train.txt
dev_path: data/iwslt2012_zh/dev.txt
test_path: data/iwslt2012_zh/test.txt
batch_size: 64
num_workers: 2
data_params: 
    pretrained_token: ernie-3.0-mini-zh
    punc_path: data/iwslt2012_zh/punc_vocab
    seq_len: 100


###########################################################
#                       MODEL SETTING                     #
###########################################################
model_type: ErnieLinear
model:
    pretrained_token: ernie-3.0-mini-zh
    num_classes: 4

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer_params:
    weight_decay: 1.0e-6               # weight decay coefficient.

scheduler_params:
    learning_rate: 1.0e-5               # learning rate.
    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 20
num_snapshots: 5

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/iwslt2012/punc0/conf/ernie-3.0-nano-zh.yaml
================================================
###########################################################
#                       DATA SETTING                      #
###########################################################
dataset_type: Ernie
train_path: data/iwslt2012_zh/train.txt
dev_path: data/iwslt2012_zh/dev.txt
test_path: data/iwslt2012_zh/test.txt
batch_size: 64
num_workers: 2
data_params: 
    pretrained_token: ernie-3.0-nano-zh
    punc_path: data/iwslt2012_zh/punc_vocab
    seq_len: 100


###########################################################
#                       MODEL SETTING                     #
###########################################################
model_type: ErnieLinear
model:
    pretrained_token: ernie-3.0-nano-zh
    num_classes: 4

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer_params:
    weight_decay: 1.0e-6               # weight decay coefficient.

scheduler_params:
    learning_rate: 1.0e-5               # learning rate.
    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 20
num_snapshots: 5

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/iwslt2012/punc0/conf/ernie-tiny.yaml
================================================
###########################################################
#                       DATA SETTING                      #
###########################################################
dataset_type: Ernie
train_path: data/iwslt2012_zh/train.txt
dev_path: data/iwslt2012_zh/dev.txt
test_path: data/iwslt2012_zh/test.txt
batch_size: 64
num_workers: 2
data_params: 
    pretrained_token: ernie-tiny
    punc_path: data/iwslt2012_zh/punc_vocab
    seq_len: 100


###########################################################
#                       MODEL SETTING                     #
###########################################################
model_type: ErnieLinear
model:
    pretrained_token: ernie-tiny
    num_classes: 4

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer_params:
    weight_decay: 1.0e-6               # weight decay coefficient.

scheduler_params:
    learning_rate: 1.0e-5               # learning rate.
    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 20
num_snapshots: 5

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/iwslt2012/punc0/local/data.sh
================================================
#!/bin/bash

if [ ! -d data ]; then
    wget -c https://paddlespeech.cdn.bcebos.com/datasets/iwslt2012.tar.gz
    tar -xzf iwslt2012.tar.gz
fi

echo "Finish data preparation."
exit 0


================================================
FILE: examples/iwslt2012/punc0/local/preprocess.py
================================================
import argparse


def process_sentence(line):
    if line == '':
        return ''
    res = line[0]
    for i in range(1, len(line)):
        res += (' ' + line[i])
    return res


if __name__ == "__main__":
    paser = argparse.ArgumentParser(description="Input filename")
    paser.add_argument('-input_file')
    paser.add_argument('-output_file')
    sentence_cnt = 0
    args = paser.parse_args()
    with open(args.input_file, 'r') as f:
        with open(args.output_file, 'w') as write_f:
            while True:
                line = f.readline()
                if line:
                    sentence_cnt += 1
                    write_f.write(process_sentence(line))
                else:
                    break
    print('preprocess over')
    print('total sentences number:', sentence_cnt)


================================================
FILE: examples/iwslt2012/punc0/local/punc_restore.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
text=$4
ckpt_prefix=${ckpt_name%.*}

python3 ${BIN_DIR}/punc_restore.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --text=${text}


================================================
FILE: examples/iwslt2012/punc0/local/test.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

ckpt_prefix=${ckpt_name%.*}

python3 ${BIN_DIR}/test.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name}


================================================
FILE: examples/iwslt2012/punc0/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=1


================================================
FILE: examples/iwslt2012/punc0/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=${PWD}/../../../

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

MODEL=ernie_linear
export BIN_DIR=${MAIN_ROOT}/paddlespeech/text/exps/${MODEL}


================================================
FILE: examples/iwslt2012/punc0/run.sh
================================================
#!/bin/bash
set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_12840.pdz
text=今天的天气真不错啊你下午有空吗我想约你一起去吃饭

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/data.sh
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
   CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
   CUDA_VISIBLE_DEVICES=${gpus} ./local/punc_restore.sh ${conf_path} ${train_output_path} ${ckpt_name} ${text}|| exit -1
fi

================================================
FILE: examples/librispeech/.gitignore
================================================
data
exp
log
ckpt*


================================================
FILE: examples/librispeech/README.md
================================================
# ASR

* asr0 - deepspeech2 Streaming/Non-Streaming
* asr1 - transformer/conformer Streaming/Non-Streaming
* asr2 - transformer/conformer Streaming/Non-Streaming with Kaldi feature
* asr3 - wav2vecASR, ASR model with pre-trained wav2vec2 and CTC

## Data
| Data Subset | Duration in Seconds |
| --- | --- |
| data/manifest.train |  0.83s ~ 29.735s |
| data/manifest.dev | 1.065 ~ 35.155s |  
| data/manifest.test-clean | 1.285s ~ 34.955s |


================================================
FILE: examples/librispeech/asr0/README.md
================================================
# DeepSpeech2 offline/online ASR with Librispeech
This example contains code used to train a DeepSpeech2 offline or online model with [[Librispeech dataset](http://www.openslr.org/resources/12)](http://www.openslr.org/resources/33)
## Overview
All the scripts you need are in the `run.sh`. There are several stages in the `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Export the static graph model     |
| 5      | Test the static graph model      |
| 6     | Infer the single audio file                                  |

You can choose to run a range of stages by setting the `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in the `run.sh` in detail.
## The environment variables
The path.sh contains the environment variable. 
```bash
source path.sh
```
This script needs to be run first.  

And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The local variables
Some local variables are set in the `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`,  it means you only use CPU. 
`stage` denotes the number of the stage you want to start from in the experiments.
`stop stage` denotes the number of stages you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`model_type` denotes the model type: offline or online
`audio file` denotes the file path of the single file you want to infer in stage 6
`ckpt` denotes the checkpoint prefix of the model, e.g. "deepspeech2"

You can set the local variables (except `ckpt`)  when you use the `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line.:
```bash
bash run.sh --gpus 0,1 --avg_num 1
```
## Stage 0: Data processing
To use this example, you need to process data firstly and  you can use stage 0 in the `run.sh` to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
source path.sh
bash ./local/data.sh
```
After processing the data, the `data` directory will look like this:
```bash
data/
|-- dev.meta
|-- lang_char
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
`-- train.meta
```
## Stage 1: Model training
If you want to train the model. you can use stage 1 in the `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/deepspeech2.yaml deepspeech2
```
## Stage 2:  Top-k Models Averaging
After training the model,  we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model.  We can use stage 2 to do this, and the code is shown below:
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model,  you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/deepspeech2.yaml deepspeech2
avg.sh best exp/deepspeech2/checkpoints 1
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of the test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it,  you can use the script below to execute stage 0, stage 1,  stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/deepspeech2.yaml deepspeech2
avg.sh best exp/deepspeech2/checkpoints 1
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/deepspeech2.yaml conf/tuning/decode.yaml exp/deepspeech2/checkpoints/avg_1
```
## Stage 4: Static graph model Export
This stage is to transform dygraph to static graph.
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # export ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}
 fi
```
If you already have a dynamic graph model, you can run this script:
```bash
source path.sh
./local/export.sh deepspeech2.yaml exp/deepspeech2/checkpoints/avg_1 exp/deepspeech2/checkpoints/avg_1.jit offline
```
## Stage 5: Static graph Model Testing
Similar to stage 3, the static graph model can also be tested.
```bash
 if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
     # test export ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test_export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}|| exit -1
 fi
```
If you already have exported the static graph, you can run this script:
```bash
CUDA_VISIBLE_DEVICES= ./local/test_export.sh conf/deepspeech2.yaml exp/deepspeech2/checkpoints/avg_1.jit offline
```
## Stage 6: Single Audio File Inference
In some situations, you want to use the trained model to do the inference for the single audio file. You can use stage  5. The code is shown below
```bash
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
     # test a single .wav file
     CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} ${audio_file}
 fi
```
You can download the audio demo:
```bash
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
```
You can train a model by yourself, then you need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
```bash
CUDA_VISIBLE_DEVICES= ./local/test_wav.sh conf/deepspeech2.yaml conf/tuning/decode.yaml exp/deepspeech2/checkpoints/avg_1 data/demo_002_en.wav
```


================================================
FILE: examples/librispeech/asr0/RESULTS.md
================================================
# LibriSpeech

## Deepspeech2 Non-Streaming
| Model | Params | release |  Config | Test set | Loss | WER |  
| --- | --- | --- | --- | --- | --- | --- |  
| DeepSpeech2 | 113.96M | r1.0.1 | conf/deepspeech2.yaml + U2 Data pipline and spec aug + fbank161 | test-clean | 10.76069622039795 | 0.046700 | 
| DeepSpeech2 | 42.96M | 2.2.0 | conf/deepspeech2.yaml + spec_aug | test-clean | 14.49190807 | 0.067283 |  
| DeepSpeech2 | 42.96M | 2.1.0 | conf/deepspeech2.yaml | test-clean | 15.184467315673828 | 0.072154 |  
| DeepSpeech2 | 42.96M | 2.0.0 | conf/deepspeech2.yaml | test-clean | - | 0.073973 |  
| DeepSpeech2 | 42.96M | 1.8.5 | - | test-clean | - | 0.074939 |  


================================================
FILE: examples/librispeech/asr0/conf/deepspeech2.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev-clean
test_manifest: data/manifest.test-clean
min_input_len: 0.0
max_input_len: 30.0 # second
min_output_len: 0.0
max_output_len: .inf
min_output_input_ratio: 0.00
max_output_input_ratio: .inf

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 161
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 64
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 8
subsampling_factor: 1
num_encs: 1

############################################
#           Network Architecture           #
############################################
num_conv_layers: 2
num_rnn_layers: 5
rnn_layer_size: 1024
rnn_direction: bidirect
num_fc_layers: 0
fc_layers_size_list: -1
use_gru: False 
blank_id: 0
  

###########################################
#                Training                 #
###########################################
n_epoch: 15
accum_grad: 1
lr: 5.0e-4
lr_decay: 0.93
weight_decay: 1.0e-6
global_grad_clip: 5.0
dist_sampler: False
log_interval: 1
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/librispeech/asr0/conf/deepspeech2_online.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev-clean
test_manifest: data/manifest.test-clean
min_input_len: 0.0
max_input_len: 30.0 # second
min_output_len: 0.0
max_output_len: .inf
min_output_input_ratio: 0.00
max_output_input_ratio: .inf

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 161
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 64
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 8
subsampling_factor: 1
num_encs: 1

############################################
#           Network Architecture           #
############################################
num_conv_layers: 2
num_rnn_layers: 5
rnn_layer_size: 1024
rnn_direction: forward
num_fc_layers: 0
fc_layers_size_list: -1
use_gru: False 
blank_id: 0
  

###########################################
#                Training                 #
###########################################
n_epoch: 65
accum_grad: 1
lr: 5.0e-4
lr_decay: 0.93
weight_decay: 1.0e-6
global_grad_clip: 5.0
log_interval: 1
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/librispeech/asr0/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 16000
    n_mels: 161
    n_shift: 160
    win_length: 400
    dither: 0.1
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/librispeech/asr0/conf/tuning/chunk_decode.yaml
================================================
decode_batch_size: 128
error_rate_type: wer
decoding_method: ctc_beam_search
lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
alpha: 1.9
beta: 0.3
beam_size: 500
cutoff_prob: 1.0
cutoff_top_n: 40
num_proc_bsearch: 8

================================================
FILE: examples/librispeech/asr0/conf/tuning/decode.yaml
================================================
decode_batch_size: 128
error_rate_type: wer
decoding_method: ctc_beam_search
lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
alpha: 1.9
beta: 0.3
beam_size: 500
cutoff_prob: 1.0
cutoff_top_n: 40
num_proc_bsearch: 8

================================================
FILE: examples/librispeech/asr0/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100

unit_type=char
dict_dir=data/lang_char

source ${MAIN_ROOT}/utils/parse_options.sh

mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/librispeech/librispeech.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/librispeech" \
    --full_download="True"

    if [ $? -ne 0 ]; then
        echo "Prepare LibriSpeech failed. Terminated."
        exit 1
    fi

    for set in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
        mv data/manifest.${set} data/manifest.${set}.raw
    done

    rm -rf data/manifest.train.raw data/manifest.dev.raw  data/manifest.test.raw
    for set in train-clean-100 train-clean-360 train-other-500; do
        cat data/manifest.${set}.raw >> data/manifest.train.raw
    done

    for set in dev-clean dev-other; do
        cat data/manifest.${set}.raw >> data/manifest.dev.raw
    done

    for set in test-clean test-other; do
        cat data/manifest.${set}.raw >> data/manifest.test.raw
    done
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --num_samples=2000 \
    --spectrum_type="fbank" \
    --feat_dim=161 \
    --delta_delta=false \
    --sample_rate=16000 \
    --stride_ms=10 \
    --window_ms=25 \
    --use_dB_normalization=False \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type ${unit_type} \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths="data/manifest.train.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for set in train dev test dev-clean dev-other test-clean test-other; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
        --cmvn_path "data/mean_std.json" \
        --unit_type ${unit_type} \
        --vocab_path="${dict_dir}/vocab.txt" \
        --manifest_path="data/manifest.${set}.raw" \
        --output_path="data/manifest.${set}"

        if [ $? -ne 0 ]; then
            echo "Formt mnaifest.${set} failed. Terminated."
            exit 1
        fi
    }&
    done
    wait
fi

echo "LibriSpeech Data preparation done."
exit 0


================================================
FILE: examples/librispeech/asr0/local/download_lm_en.sh
================================================
#!/bin/bash

. ${MAIN_ROOT}/utils/utility.sh

DIR=data/lm
mkdir -p ${DIR}

URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
MD5="099a601759d467cd0a8523ff939819c5"
TARGET=${DIR}/common_crawl_00.prune01111.trie.klm

echo "Start downloading the language model. The language model is large, please wait for a moment ..."
download $URL $MD5 $TARGET > /dev/null 2>&1
if [ $? -ne 0 ]; then
    echo "Fail to download the language model!"
    exit 1
else
    echo "Download the language model sucessfully"
fi


exit 0


================================================
FILE: examples/librispeech/asr0/local/export.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: $0 config_path ckpt_prefix jit_model_path"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3

python3 -u ${BIN_DIR}/export.py \
--ngpu ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}

if [ $? -ne 0 ]; then
    echo "Failed in export!"
    exit 1
fi


exit 0


================================================
FILE: examples/librispeech/asr0/local/test.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi
stage=0
stop_stage=100

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

# download language model
bash local/download_lm_en.sh
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # format the reference test file
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_ref data/manifest.test-clean.raw \
        --trans_ref data/manifest.test-clean.text

    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${ckpt_prefix}.rsl \
    --checkpoint_path ${ckpt_prefix}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi

    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.rsl \
        --trans_hyp ${ckpt_prefix}.rsl.text

    python3 ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
        data/manifest.test-clean.text ${ckpt_prefix}.rsl.text > ${ckpt_prefix}.error
fi

if [ ${stage} -le 101 ] && [ ${stop_stage} -ge 101 ]; then
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_ref data/manifest.test-clean.raw \
        --trans_ref_sclite data/manifest.test.text-clean.sclite

    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.rsl \
        --trans_hyp_sclite ${ckpt_prefix}.rsl.text.sclite

    mkdir -p ${ckpt_prefix}_sclite
    sclite -i wsj -r data/manifest.test-clean.text.sclite -h  ${ckpt_prefix}.rsl.text.sclite  -e utf-8 -o all -O ${ckpt_prefix}_sclite -c NOASCII
fi


exit 0


================================================
FILE: examples/librispeech/asr0/local/test_wav.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_file=$4

mkdir -p data
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_file} ]; then
    echo "Plase input the right audio_file path"
    exit 1
fi

# download language model
bash local/download_lm_en.sh
if [ $? -ne 0 ]; then
   exit 1
fi

python3 -u ${BIN_DIR}/test_wav.py \
--ngpu ${ngpu} \
--config ${config_path} \
--decode_cfg ${decode_config_path} \
--result_file ${ckpt_prefix}.rsl \
--checkpoint_path ${ckpt_prefix} \
--audio_file ${audio_file}

if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
    exit 1
fi


exit 0


================================================
FILE: examples/librispeech/asr0/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

# default memory allocator strategy may case gpu training hang
# for no OOM raised when memory exhausted
export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/librispeech/asr0/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8 
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


MODEL=deepspeech2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/librispeech/asr0/run.sh
================================================
#!/bin/bash
set -e
source path.sh

gpus=0,1,2,3
stage=0
stop_stage=100
conf_path=conf/deepspeech2.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=5
audio_file=data/demo_002_en.wav

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}|| exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # export ckpt avg_n
    CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # test export ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test_export.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}.jit|| exit -1
fi

if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    # test a single .wav file
    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi


================================================
FILE: examples/librispeech/asr1/.gitignore
================================================
data
exp
log
*.profile


================================================
FILE: examples/librispeech/asr1/README.md
================================================
# Transformer/Conformer ASR with Librispeech
This example contains code used to train [u2](https://arxiv.org/pdf/2012.05481.pdf) model (Transformer or [Conformer](https://arxiv.org/pdf/2005.08100.pdf) model) with [Librispeech dataset](http://www.openslr.org/resources/12)
## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset<br>       (5) Get the sentencepiece model |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Get ctc alignment of test data using the final model         |
| 5     | Infer the single audio file                                  |

You can choose to run a range of stages by setting `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in `run.sh` in detail.
## The Environment Variables
The path.sh contains the environment variables. 
```bash
. ./path.sh
. ./cmd.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of stages you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`audio file` denotes the file path of the single file you want to infer in stage 5
`ckpt` denotes the checkpoint prefix of the model, e.g. "conformer"

You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line:
```bash
bash run.sh --gpus 0,1 --avg_num 20
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in `run.sh` to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
```
After processing the data, the `data` directory will look like this:
```bash
data/
|-- dev.meta
|-- lang_char
|   `-- bpe_unigram_5000.model
|   `-- bpe_unigram_5000.vocab
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
`-- train.meta
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
```
## Stage 2: Top-k Models Averaging
After training the model, we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model. We can use stage 2 to do this, and the code is shown below:
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).

```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
avg.sh best exp/conformer/checkpoints 20
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
avg.sh best exp/conformer/checkpoints 20
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/conformer.yaml conf/tuning/decode.yaml exp/conformer/checkpoints/avg_20
```
## Pretrained Model
You can get the pretrained transformer or conformer from [this](../../../docs/source/released_model.md).

using the `tar` scripts to unpack the model and then you can use the script to test the model.

For example:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz
tar xzvf asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz
source path.sh
# If you have process the data and get the manifest file， you can skip the following 2 steps
bash local/data.sh --stage -1 --stop_stage -1
bash local/data.sh --stage 2 --stop_stage 2
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/conformer.yaml conf/tuning/decode.yaml exp/conformer/checkpoints/avg_20
```
The performance of the released models are shown in [here](./RESULTS.md).

## Stage 4: CTC Alignment 
If you want to get the alignment between the audio and the text, you can use the ctc alignment. The code of this stage is shown below:
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # ctc alignment of test data
     CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train the model, test it and do the alignment, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 4
```
or if you only need to train a model and do the alignment, you can use these scripts to escape stage 3(test stage):
```bash
bash run.sh --stage 0 --stop_stage 2
bash run.sh --stage 4 --stop_stage 4
```
or you can also use these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
avg.sh best exp/conformer/checkpoints 20
# test stage is optional
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/conformer.yaml conf/tuning/decode.yaml exp/conformer/checkpoints/avg_20
CUDA_VISIBLE_DEVICES= ./local/align.sh conf/conformer.yaml conf/tuning/decode.yaml exp/conformer/checkpoints/avg_20
```
## Stage 5: Single Audio File Inference
In some situations, you want to use the trained model to do the inference for the single audio file. You can use stage 5. The code is shown below
```bash
 if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
     # test a single .wav file
     CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
 fi
```
you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr1/asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz
tar xzvf asr1_conformer_librispeech_ckpt_0.1.1.model.tar.gz
```
You can download the audio demo:
```bash
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
```
You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
```bash
CUDA_VISIBLE_DEVICES= ./local/test_wav.sh conf/conformer.yaml conf/tuning/decode.yaml exp/conformer/checkpoints/avg_20 data/demo_002_en.wav
```


================================================
FILE: examples/librispeech/asr1/RESULTS.md
================================================
# LibriSpeech

## Conformer
train: Epoch 70, 4 V100-32G, best avg: 20

| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
| --- | --- | --- | --- | --- | --- | --- | --- |
| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-clean | attention | 6.433612394332886 | 0.039771 |  
| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-clean | ctc_greedy_search | 6.433612394332886 | 0.040342 |  
| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-clean | ctc_prefix_beam_search | 6.433612394332886 | 0.040342 |  
| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-clean | attention_rescoring | 6.433612394332886 | 0.033761 |  


## Conformer Streaming

| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size & Left Chunks | Loss | WER |  
| --- | --- | --- | --- | --- | --- | --- | --- | --- |  
| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | attention | 16, -1 | 7.11 | 0.063193 |  
| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | ctc_greedy_search | 16, -1 | 7.11 | 0.082394 |  
| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | ctc_prefix_beam_search | 16, -1 | 7.11 | 0.082156 |  
| conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug + shift | test-clean | attention_rescoring | 16, -1 | 7.11 | 0.071000 |  


## Transformer

train: Epoch 120, 4 V100-32G, 27 Day, best avg: 10

| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
| --- | --- | --- | --- | --- | --- | --- | --- |
| transformer | 32.52 M | conf/transformer.yaml | spec_aug  | test-clean | attention | 6.382194232940674 | 0.049661 |  
| transformer | 32.52 M | conf/transformer.yaml | spec_aug  | test-clean | ctc_greedy_search | 6.382194232940674 | 0.049566 |  
| transformer | 32.52 M | conf/transformer.yaml | spec_aug  | test-clean | ctc_prefix_beam_search | 6.382194232940674 | 0.049585 |  
| transformer | 32.52 M | conf/transformer.yaml | spec_aug  | test-clean | attention_rescoring | 6.382194232940674 | 0.038135 |


================================================
FILE: examples/librispeech/asr1/cmd.sh
================================================
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
#   --time <time>: Limit the maximum time to execute.
#   --mem <mem>: Limit the maximum memory usage.
#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
#   --num-threads <ngpu>: Specify the number of CPU core.
#   --gpu <ngpu>: Specify the number of GPU devices.
#   --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

    # The other usage
    export train_cmd="run.pl"
    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
    export cuda_cmd="run.pl"
    # Used for "*_recog.py"
    export decode_cmd="run.pl"

# "qsub" (SGE, Torque, PBS, etc.)
elif [ "${cmd_backend}" = sge ]; then
    # The default setting is written in conf/queue.conf.
    # You must change "-q g.q" for the "queue" for your environment.
    # To know the "queue" names, type "qhost -q"
    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

    export train_cmd="queue.pl"
    export cuda_cmd="queue.pl"
    export decode_cmd="queue.pl"

# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

    export train_cmd="slurm.pl"
    export cuda_cmd="slurm.pl"
    export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
    # You have to create ".queue/machines" to specify the host to execute jobs.
    # e.g. .queue/machines
    #   host1
    #   host2
    #   host3
    # Assuming you can login them without any password, i.e. You have to set ssh keys.

    export train_cmd="ssh.pl"
    export cuda_cmd="ssh.pl"
    export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

    export train_cmd="queue.pl --mem 2G"
    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
    export decode_cmd="queue.pl --mem 4G"

else
    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
    return 1
fi


================================================
FILE: examples/librispeech/asr1/conf/augmentation.json
================================================
[
  {
    "type": "speed",
    "params": {
      "min_speed_rate": 0.9,
      "max_speed_rate": 1.1,
      "num_rates": 3
    },
    "prob": 0.0
  },
  {
    "type": "shift",
    "params": {
      "min_shift_ms": -5,
      "max_shift_ms": 5
    },
    "prob": 1.0
  },
  {
    "type": "specaug",
    "params": {
      "W": 0,
      "warp_mode": "PIL",
      "F": 10,
      "n_freq_masks": 2,
      "T": 50,
      "n_time_masks": 2,
      "p": 1.0,
      "adaptive_number_ratio": 0,
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20,
      "replace_with_zero": true
    },
    "prob": 1.0
  }
]


================================================
FILE: examples/librispeech/asr1/conf/chunk_conformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    use_cnn_module: True
    cnn_module_kernel: 15
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    selfattention_layer_type: 'rel_selfattn'
    causal: True
    use_dynamic_chunk: true
    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
    use_dynamic_left_chunk: false

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'spm'
spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
mean_std_filepath: ""
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 16
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0 
num_workers: 0
subsampling_factor: 1
num_encs: 1

###########################################
#                 Training                #
###########################################
n_epoch: 120
accum_grad: 8
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 0.001
  weight_decay: 1.0e-06 
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/librispeech/asr1/conf/chunk_transformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true
    use_dynamic_chunk: true
    use_dynamic_left_chunk: false

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false

###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test


###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'spm'
spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
mean_std_filepath: ""
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 64
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1


###########################################
#                 Training                #
###########################################
n_epoch: 120
accum_grad: 1
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 0.001
  weight_decay: 1.0e-06
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5

================================================
FILE: examples/librispeech/asr1/conf/conformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    use_cnn_module: True
    cnn_module_kernel: 15
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    selfattention_layer_type: 'rel_selfattn'

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    ctc_grad_norm_type: null 
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test-clean


###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'spm'
spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
mean_std_filepath: ""
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 16
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1
  

###########################################
#                 Training                #
###########################################
n_epoch: 70
accum_grad: 8
global_grad_clip: 3.0
optim: adam
optim_conf:
  lr: 0.004
  weight_decay: 1.0e-06
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/librispeech/asr1/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 16000
    n_mels: 80
    n_shift: 160
    win_length: 400
    dither: 0.1
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/librispeech/asr1/conf/transformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test-clean

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt
unit_type: 'spm'
spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
mean_std_filepath: ""
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1


###########################################
#                 Training                #
###########################################
n_epoch: 120 
accum_grad: 4
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 0.004
  weight_decay: 1.0e-06
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/librispeech/asr1/conf/tuning/chunk_decode.yaml
================================================
decode_batch_size: 128
error_rate_type: wer
decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
beam_size: 10
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: true  # simulate streaming inference. Defaults to False.


================================================
FILE: examples/librispeech/asr1/conf/tuning/decode.yaml
================================================
decode_batch_size: 64
error_rate_type: wer
decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
beam_size: 10
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.


================================================
FILE: examples/librispeech/asr1/local/align.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

batch_size=1
output_dir=${ckpt_prefix}
mkdir -p ${output_dir}

# align dump in `result_file`
# .tier, .TextGrid dump in `dir of result_file`
python3 -u ${BIN_DIR}/alignment.py \
--ngpu ${ngpu} \
--config ${config_path} \
--decode_cfg ${decode_config_path} \
--result_file ${output_dir}/${type}.align \
--checkpoint_path ${ckpt_prefix} \
--opts decode.decode_batch_size ${batch_size}

if [ $? -ne 0 ]; then
    echo "Failed in ctc alignment!"
    exit 1
fi

exit 0


================================================
FILE: examples/librispeech/asr1/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100
dict_dir=data/lang_char

# bpemode (unigram or bpe)
nbpe=5000
bpemode=unigram
bpeprefix="${dict_dir}/bpe_${bpemode}_${nbpe}"

stride_ms=10
window_ms=25
sample_rate=16000
feat_dim=80

source ${MAIN_ROOT}/utils/parse_options.sh


mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/librispeech/librispeech.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/librispeech" \
    --full_download="True"

    if [ $? -ne 0 ]; then
        echo "Prepare LibriSpeech failed. Terminated."
        exit 1
    fi

    for sub in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
        mv data/manifest.${sub} data/manifest.${sub}.raw
    done

    rm -rf data/manifest.train.raw data/manifest.dev.raw  data/manifest.test.raw
    for sub in train-clean-100 train-clean-360 train-other-500; do
        cat data/manifest.${sub}.raw >> data/manifest.train.raw
    done

    for sub in dev-clean dev-other; do
        cat data/manifest.${sub}.raw >> data/manifest.dev.raw
    done

    for sub in test-clean test-other; do
        cat data/manifest.${sub}.raw >> data/manifest.test.raw
    done
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --num_samples=-1 \
    --spectrum_type="fbank" \
    --feat_dim=${feat_dim} \
    --delta_delta=false \
    --sample_rate=${sample_rate} \
    --stride_ms=${stride_ms} \
    --window_ms=${window_ms} \
    --use_dB_normalization=False \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type "spm" \
    --spm_vocab_size=${nbpe} \
    --spm_mode ${bpemode} \
    --spm_model_prefix ${bpeprefix} \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths="data/manifest.train.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for sub in train dev test dev-clean dev-other test-clean test-other; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
        --cmvn_path "data/mean_std.json" \
        --unit_type "spm" \
        --spm_model_prefix ${bpeprefix} \
        --vocab_path="${dict_dir}/vocab.txt" \
        --manifest_path="data/manifest.${sub}.raw" \
        --output_path="data/manifest.${sub}"

        if [ $? -ne 0 ]; then
            echo "Formt mnaifest failed. Terminated."
            exit 1
        fi
    }&
    done
    wait

    for sub in train dev; do
        mv data/manifest.${sub} data/manifest.${sub}.fmt
    done
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    for sub in train dev; do
        remove_longshortdata.py --maxframes 3000 --maxchars 400 --stride_ms ${stride_ms} data/manifest.${sub}.fmt data/manifest.${sub}
    done
fi

echo "LibriSpeech Data preparation done."
exit 0


================================================
FILE: examples/librispeech/asr1/local/download_lm_en.sh
================================================
#!/bin/bash

. ${MAIN_ROOT}/utils/utility.sh

DIR=data/lm
mkdir -p ${DIR}

URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
MD5="099a601759d467cd0a8523ff939819c5"
TARGET=${DIR}/common_crawl_00.prune01111.trie.klm

echo "Download language model ..."
download $URL $MD5 $TARGET
if [ $? -ne 0 ]; then
    echo "Fail to download the language model!"
    exit 1
fi


exit 0


================================================
FILE: examples/librispeech/asr1/local/export.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: $0 config_path ckpt_prefix jit_model_path"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3

python3 -u ${BIN_DIR}/export.py \
--ngpu ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}


if [ $? -ne 0 ]; then
    echo "Failed in export!"
    exit 1
fi


exit 0


================================================
FILE: examples/librispeech/asr1/local/test.sh
================================================
#!/bin/bash

set -e

expdir=exp
datadir=data
nj=32

lmtag=

recog_set="test-clean test-other dev-clean dev-other"
recog_set="test-clean"


stage=0
stop_stage=100

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi
echo "chunk mode ${chunk_mode}"


# download language model
#bash local/download_lm_en.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # format the reference test file
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_ref data/manifest.test-clean.raw \
        --trans_ref data/manifest.test-clean.text

    for type in attention; do
        echo "decoding ${type}"
        if [ ${chunk_mode} == true ];then
            # stream decoding only support batchsize=1
            batch_size=1
        else
            batch_size=64
        fi
        python3 -u ${BIN_DIR}/test.py \
            --ngpu ${ngpu} \
            --config ${config_path} \
            --decode_cfg ${decode_config_path} \
            --result_file ${ckpt_prefix}.${type}.rsl \
            --checkpoint_path ${ckpt_prefix} \
            --opts decode.decoding_method ${type} \
            --opts decode.decode_batch_size ${batch_size}

        if [ $? -ne 0 ]; then
            echo "Failed in evaluation!"
            exit 1
        fi
        python3 ${MAIN_ROOT}/utils/format_rsl.py \
            --origin_hyp ${ckpt_prefix}.${type}.rsl \
            --trans_hyp ${ckpt_prefix}.${type}.rsl.text

        python3 ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
            data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
        echo "decoding ${type} done."
    done

    for type in ctc_greedy_search; do
        echo "decoding ${type}"
        if [ ${chunk_mode} == true ];then
            # stream decoding only support batchsize=1
            batch_size=1
        else
            batch_size=64
        fi
        python3 -u ${BIN_DIR}/test.py \
            --ngpu ${ngpu} \
            --config ${config_path} \
            --decode_cfg ${decode_config_path} \
            --result_file ${ckpt_prefix}.${type}.rsl \
            --checkpoint_path ${ckpt_prefix} \
            --opts decode.decoding_method ${type} \
            --opts decode.decode_batch_size ${batch_size}

        if [ $? -ne 0 ]; then
            echo "Failed in evaluation!"
            exit 1
        fi
        python3 ${MAIN_ROOT}/utils/format_rsl.py \
            --origin_hyp ${ckpt_prefix}.${type}.rsl \
            --trans_hyp ${ckpt_prefix}.${type}.rsl.text

        python3 utils/compute-wer.py --char=1 --v=1 \
            data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
        echo "decoding ${type} done."
    done


    for type in ctc_prefix_beam_search attention_rescoring; do
        echo "decoding ${type}"
        batch_size=1
        python3 -u ${BIN_DIR}/test.py \
            --ngpu ${ngpu} \
            --config ${config_path} \
            --decode_cfg ${decode_config_path} \
            --result_file ${ckpt_prefix}.${type}.rsl \
            --checkpoint_path ${ckpt_prefix} \
            --opts decode.decoding_method ${type} \
            --opts decode.decode_batch_size ${batch_size}

        if [ $? -ne 0 ]; then
            echo "Failed in evaluation!"
            exit 1
        fi
        python3 ${MAIN_ROOT}/utils/format_rsl.py \
            --origin_hyp ${ckpt_prefix}.${type}.rsl \
            --trans_hyp ${ckpt_prefix}.${type}.rsl.text

        python3 ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
            data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
        echo "decoding ${type} done."
    done
fi

if [ ${stage} -le 101 ] && [ ${stop_stage} -ge 101 ]; then
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_ref data/manifest.test-clean.raw \
        --trans_ref_sclite data/manifest.test.text-clean.sclite


    output_dir=${ckpt_prefix}
    for type in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do
        python ${MAIN_ROOT}/utils/format_rsl.py \
            --origin_hyp ${output_dir}/${type}.rsl \
            --trans_hyp_sclite ${output_dir}/${type}.rsl.text.sclite

        mkdir -p ${output_dir}/${type}_sclite
        sclite -i wsj -r data/manifest.test-clean.text.sclite -h  ${output_dir}/${type}.rsl.text.sclite  -e utf-8 -o all -O ${output_dir}/${type}_sclite -c NOASCII
    done
fi


echo "Finished"

exit 0


================================================
FILE: examples/librispeech/asr1/local/test_wav.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_file=$4

mkdir -p data
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_file} ]; then
    echo "Plase input the right audio_file path"
    exit 1
fi

# bpemode (unigram or bpe)
nbpe=5000
bpemode=unigram
bpeprefix="data/bpe_${bpemode}_${nbpe}"
bpemodel=${bpeprefix}.model

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi


for type in attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test_wav.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size} \
    --audio_file ${audio_file}

    #score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel}.model --wer true ${expdir}/${decode_dir} ${dict}
    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

exit 0


================================================
FILE: examples/librispeech/asr1/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

# export FLAGS_cudnn_exhaustive_search=true
# export FLAGS_conv_workspace_size_limit=4000

# default memory allocator strategy may case gpu training hang
# for no OOM raised when memory exhausted
export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/librispeech/asr1/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/tools/sctk/bin:${PWD}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


MODEL=u2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/librispeech/asr1/run.sh
================================================
#!/bin/bash
set -e

. ./path.sh || exit 1;
. ./cmd.sh || exit 1;

gpus=0,1,2,3
stage=0
stop_stage=50
conf_path=conf/transformer.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=30
audio_file=data/demo_002_en.wav

. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # ctc alignment of test data
    CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # test a single .wav file
    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi

if [ ${stage} -le 51 ] && [ ${stop_stage} -ge 51 ]; then
     # export ckpt avg_n
     CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi


================================================
FILE: examples/librispeech/asr2/.gitignore
================================================
dump
fbank
exp
data


================================================
FILE: examples/librispeech/asr2/README.md
================================================
# Transformer/Conformer ASR with Librispeech ASR2

This example contains code used to train a [u2](https://arxiv.org/pdf/2012.05481.pdf) model (Transformer or [Conformer](https://arxiv.org/pdf/2005.08100.pdf) model) with [Librispeech dataset](http://www.openslr.org/resources/12) and use some functions in kaldi.

To use this example, you need to install Kaldi first.

## Overview

All the scripts you need are in ```run.sh```. There are several stages in ```run.sh```, and each stage has its function.

| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset<br>       (5) Get the sentencepiece model |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Join ctc decoder and use transformer language model to score |
| 5     | Get ctc alignment of test data using the final model         |
| 6     | Calculate the perplexity of transformer language model        |


You can choose to run a range of stages by setting `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in `run.sh` in detail.
## The Environment Variables
The path.sh contains the environment variables. 
```bash
. ./path.sh
. ./cmd.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of the stage you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`dict_path` denotes the path of the vocabulary file.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`ckpt` denotes the checkpoint prefix of the model, e.g. "transformer"

You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line.:
```bash
bash run.sh --gpus 0,1 --avg_num 10
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in ```run.sh```to do this. The code is shown below:

```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run

```bash
bash run.sh --stage 0 --stop_stage 0
```

You can also just run these scripts in your command line.

```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
```

After processing the data, the ``data`` directory will look like this:

```bash
data/
├── dev
├── dev_clean
├── dev-clean.meta
├── dev_org
├── dev_other
├── dev-other.meta
├── lang_char
├── manifest.dev
├── manifest.dev-clean
├── manifest.dev-clean.raw
├── manifest.dev-other
├── manifest.dev-other.raw
├── manifest.dev.raw
├── manifest.test-clean
├── manifest.test-clean.raw
├── manifest.test-other
├── manifest.test-other.raw
├── manifest.test.raw
├── manifest.train
├── manifest.train-clean-100.raw
├── manifest.train-clean-360.raw
├── manifest.train-other-500.raw
├── manifest.train.raw
├── temp1
├── temp2
├── temp3
├── test_clean
├── test-clean.meta
├── test_other
├── test-other.meta
├── train_960
├── train_960_org
├── train_clean_100
├── train-clean-100.meta
├── train_clean_360
├── train-clean-360.meta
├── train_other_500
├── train-other-500.meta
├── train_sp
└── train_sp_org
```

## Stage 1: Model Training
If you want to train the model. you can use stage 1 in ```run.sh```. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer.yaml transformer
```
## Stage 2: Top-k Models Averaging
After training the model, we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the last K models and average the parameters of the models to get the final model. We can use stage 2 to do this, and the code is shown below:
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh latest exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer.yaml transformer
avg.sh best exp/transformer/checkpoints 10
```
## Stage 3: Model Testing
Stage 3 is to evaluate the model performance with an attention rescore decoder. The code of this stage is shown below:
```bash
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # attetion resocre decoder
    ./local/test.sh ${conf_path} ${dict_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer.yaml transformer
avg.sh latest exp/transformer/checkpoints 10
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/transformer.yaml data/train_960_unigram5000_units.txt exp/transformer/checkpoints/avg_10
```
## Stage 4: Model Testing with Join CTC Decoder
Stage 4 is to evaluate the model performance with the join ctc decoder. The code of this stage is shown below:
```bash
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # join ctc decoder, use transformerlm to score
    ./local/recog.sh  --ckpt_prefix exp/${ckpt}/checkpoints/${avg_ckpt}
fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 4 :
```bash
bash run.sh --stage 0 --stop_stage 3
bash run.sh --stage 4 --stop_stage 4
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer.yaml transformer
avg.sh latest exp/transformer/checkpoints 10
./local/recog.sh  --ckpt_prefix exp/transformer/checkpoints/avg_10
```
## Pretrained Model
You can get the pretrained models from [this](../../../docs/source/released_model.md).

using the `tar` scripts to unpack the model and then you can use the script to test the model.

For example:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr2/asr2_transformer_librispeech_ckpt_0.1.1.model.tar.gz
tar xzvf asr2_transformer_librispeech_ckpt_0.1.1.model.tar.gz
source path.sh
# If you have process the data and get the manifest file， you can skip the following 2 steps
bash local/data.sh --stage -1 --stop_stage -1
bash local/data.sh --stage 2 --stop_stage 2

CUDA_VISIBLE_DEVICES= ./local/test.sh conf/transformer.yaml exp/ctc/checkpoints/avg_10
```
The performance of the released models are shown [here](./RESULTS.md).

Compare with [ESPNET](https://github.com/espnet/espnet/blob/master/egs/librispeech/asr1/RESULTS.md#pytorch-large-transformer-with-specaug-4-gpus--transformer-lm-4-gpus) we using 8gpu, but the model size (aheads4-adim256) small than it.
## Stage 5: CTC Alignment 
If you want to get the alignment between the audio and the text, you can use the ctc alignment. The code of this stage is shown below:
```bash
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # ctc alignment of test data
    CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} ${dict_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi
```
If you want to train the model, test it and do the alignment, you can use the script below to execute stage 0, stage 1, stage 2, stage 3, stage 4, and stage 5:
```bash
bash run.sh --stage 0 --stop_stage 5
```
or if you only need to train a model and do the alignment, you can use these scripts to escape stage 3(test stage):
```bash
bash run.sh --stage 0 --stop_stage 2
bash run.sh --stage 5 --stop_stage 5
```
or you can also use these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer.yaml transformer
avg.sh best exp/transformer/checkpoints 20
CUDA_VISIBLE_DEVICES= ./local/align.sh conf/transformer.yaml data/train_960_unigram5000_units.txt exp/transformer/checkpoints/avg_10
```
## Stage 6: Perplexity Calculation 
This stage is for calculating the perplexity of the transformer language model. The code of this stage is shown below:
```bash
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    ./local/cacu_perplexity.sh || exit -1
fi
```
If you only want to calculate the perplexity of the transformer language model, you can use this script:

```bash
bash run.sh --stage 6 --stop_stage 6
```


================================================
FILE: examples/librispeech/asr2/RESULTS.md
================================================
# LibriSpeech


## Transformer

| Model | Params | GPUS | Averaged Model | Config | Augmentation| Loss |  
| :-: | :-: | :------------: | :------------: | :-: | :-: | :-: |     
| transformer | 32.52M | 8 Tesla V100-SXM2-32GB | 10-best val_loss | conf/transformer.yaml | spec_aug | 6.3197922706604 |  

### Attention Rescore

| Test Set | Decode Method | #Snt | #Wrd | Corr | Sub | Del | Ins | Err | S.Err |  
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| test-clean | attention | 2620 | 52576 | 96.4 | 2.5 | 1.1 | 0.4 | 4.0 | 34.7 |  
| test-clean | ctc_greedy_search | 2620 | 52576 | 95.9 | 3.7 | 0.4 | 0.5 | 4.6 | 48.0 |  
| test-clean | ctc_prefix_beamsearch | 2620 | 52576 | 95.9 | 3.7 | 0.4 | 0.5 | 4.6 | 47.6 |  
| test-clean | attention_rescore | 2620 | 52576 | 96.8 | 2.9 | 0.3 | 0.4 | 3.7 | 38.0 |  

### JoinCTC

| Test Set | Decode Method | #Snt | #Wrd | Corr | Sub | Del | Ins | Err | S.Err |  
| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |
| test-clean | join_ctc_only_att | 2620 | 52576 | 96.1 | 2.5 | 1.4 | 0.4 | 4.4 | 34.7 |  
| test-clean | join_ctc_w/o_lm | 2620 | 52576 | 97.2 | 2.6 | 0.3 | 0.4 | 3.2 | 34.9 |  
| test-clean | join_ctc_w_lm | 2620 | 52576 | 97.9 | 1.8 | 0.2 | 0.3 | 2.4 | 27.8 |  

Compare with [ESPNET](https://github.com/espnet/espnet/blob/master/egs/librispeech/asr1/RESULTS.md#pytorch-large-transformer-with-specaug-4-gpus--transformer-lm-4-gpus)
we using 8gpu, but model size (aheads4-adim256) small than it.


================================================
FILE: examples/librispeech/asr2/cmd.sh
================================================
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
#   --time <time>: Limit the maximum time to execute.
#   --mem <mem>: Limit the maximum memory usage.
#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
#   --num-threads <ngpu>: Specify the number of CPU core.
#   --gpu <ngpu>: Specify the number of GPU devices.
#   --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

    # The other usage
    export train_cmd="run.pl"
    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
    export cuda_cmd="run.pl"
    # Used for "*_recog.py"
    export decode_cmd="run.pl"

# "qsub" (SGE, Torque, PBS, etc.)
elif [ "${cmd_backend}" = sge ]; then
    # The default setting is written in conf/queue.conf.
    # You must change "-q g.q" for the "queue" for your environment.
    # To know the "queue" names, type "qhost -q"
    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

    export train_cmd="queue.pl"
    export cuda_cmd="queue.pl"
    export decode_cmd="queue.pl"

# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

    export train_cmd="slurm.pl"
    export cuda_cmd="slurm.pl"
    export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
    # You have to create ".queue/machines" to specify the host to execute jobs.
    # e.g. .queue/machines
    #   host1
    #   host2
    #   host3
    # Assuming you can login them without any password, i.e. You have to set ssh keys.

    export train_cmd="ssh.pl"
    export cuda_cmd="ssh.pl"
    export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

    export train_cmd="queue.pl --mem 2G"
    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
    export decode_cmd="queue.pl --mem 4G"

else
    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
    return 1
fi


================================================
FILE: examples/librispeech/asr2/conf/augmentation.json
================================================
[
  {
    "type": "specaug",
    "params": {
      "W": 5,
      "warp_mode": "PIL",
      "F": 30,
      "n_freq_masks": 2,
      "T": 40,
      "n_time_masks": 2,
      "p": 1.0,
      "adaptive_number_ratio": 0,
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20,
      "replace_with_zero": false
    },
    "prob": 1.0
  }
]


================================================
FILE: examples/librispeech/asr2/conf/decode/decode.yaml
================================================
batchsize: 0
beam-size: 60
ctc-weight: 0.4
lm-weight: 0.6
maxlenratio: 0.0
minlenratio: 0.0
penalty: 0.0


================================================
FILE: examples/librispeech/asr2/conf/decode/decode_att.yaml
================================================
batchsize: 0
beam-size: 60
ctc-weight: 0.0
lm-weight: 0.0
maxlenratio: 0.0
minlenratio: 0.0
penalty: 0.0


================================================
FILE: examples/librispeech/asr2/conf/decode/decode_base.yaml
================================================
decode_batch_size: 1
error_rate_type: wer
decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
beam_size: 10
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.


================================================
FILE: examples/librispeech/asr2/conf/decode/decode_ctc.yaml
================================================
batchsize: 0
beam-size: 60
ctc-weight: 0.4
lm-weight: 0.0
maxlenratio: 0.0
minlenratio: 0.0
penalty: 0.0


================================================
FILE: examples/librispeech/asr2/conf/decode/decode_wo_lm.yaml
================================================
batchsize: 0
beam-size: 60
ctc-weight: 0.4
lm-weight: 0.0
maxlenratio: 0.0
minlenratio: 0.0
penalty: 0.0

================================================
FILE: examples/librispeech/asr2/conf/fbank.conf
================================================
--sample-frequency=16000 
--num-mel-bins=80


================================================
FILE: examples/librispeech/asr2/conf/lm/transformer.yaml
================================================
model_module: transformer

data:
    unit_type: spm

model:
    n_vocab: 5002
    pos_enc: null
    embed_unit: 128
    att_unit: 512
    head: 8
    unit: 2048
    layer: 16
    dropout_rate: 0.5
    emb_dropout_rate: 0.0
    att_dropout_rate: 0.0
    tie_weights: False 

decoding:
    batch_size: 30
    num_workers: 2


================================================
FILE: examples/librispeech/asr2/conf/pitch.conf
================================================
--sample-frequency=16000


================================================
FILE: examples/librispeech/asr2/conf/preprocess.yaml
================================================
process:
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/librispeech/asr2/conf/transformer.yaml
================================================
# https://yaml.org/type/float.html
############################################
#           Network Architecture           #
############################################
cmvn_file:  
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false

###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test-clean

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/train_960_unigram5000_units.txt
unit_type: spm
spm_model_prefix: data/lang_char/train_960_unigram5000
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 30 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config: conf/preprocess.yaml 
num_workers: 0
subsampling_factor: 1
num_encs: 1


###########################################
#                Training                 #
###########################################
n_epoch: 120
accum_grad: 2
log_interval: 1
checkpoint:
  kbest_n: 50
  latest_n: 5

optim: adam
optim_conf:
  global_grad_clip: 5.0
  weight_decay: 1.0e-06
scheduler: warmuplr     
scheduler_conf:
  lr: 0.004
  warmup_steps: 25000
  lr_decay: 1.0


================================================
FILE: examples/librispeech/asr2/local/align.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path dict_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
dict_path=$3
ckpt_prefix=$4

batch_size=1
output_dir=${ckpt_prefix}
mkdir -p ${output_dir}

# align dump in `result_file`
# .tier, .TextGrid dump in `dir of result_file`
python3 -u ${BIN_DIR}/test.py \
--model-name 'u2_kaldi' \
--run-mode 'align' \
--dict-path ${dict_path} \
--ngpu ${ngpu} \
--config ${config_path} \
--decode_cfg ${decode_config_path} \
--result-file ${output_dir}/${type}.align \
--checkpoint_path ${ckpt_prefix} \
--opts decode.decode_batch_size ${batch_size}

if [ $? -ne 0 ]; then
    echo "Failed in ctc alignment!"
    exit 1
fi

exit 0


================================================
FILE: examples/librispeech/asr2/local/cacu_perplexity.sh
================================================
#!/bin/bash

set -e

stage=-1
stop_stage=100

expdir=exp
datadir=data

ngpu=0

# lm params
rnnlm_config_path=conf/lm/transformer.yaml
lmexpdir=exp/lm/transformer
lang_model=transformerLM.pdparams

#data path
test_set=${datadir}/test_clean/text
test_set_lower=${datadir}/test_clean/text_lower
train_set=train_960

# bpemode (unigram or bpe)
nbpe=5000
bpemode=unigram
bpeprefix=${datadir}/lang_char/${train_set}_${bpemode}${nbpe}
bpemodel=${bpeprefix}.model

vocabfile=${bpeprefix}_units.txt
vocabfile_lower=${bpeprefix}_units_lower.txt

output_dir=${expdir}/lm/transformer/perplexity

mkdir -p ${output_dir}

# Transform the data upper case to lower
if [ -f ${vocabfile} ]; then
    tr A-Z a-z < ${vocabfile} > ${vocabfile_lower}
fi

if [ -f ${test_set} ]; then
    tr A-Z a-z < ${test_set} > ${test_set_lower}
fi

python ${LM_BIN_DIR}/cacu_perplexity.py \
    --rnnlm ${lmexpdir}/${lang_model} \
    --rnnlm-conf ${rnnlm_config_path} \
    --vocab_path ${vocabfile_lower} \
    --bpeprefix ${bpeprefix} \
    --text_path ${test_set_lower} \
    --output_dir ${output_dir} \
    --ngpu ${ngpu}


================================================
FILE: examples/librispeech/asr2/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100
nj=32
debugmode=1
dumpdir=dump   # directory to dump full features
N=0            # number of minibatches to be used (mainly for debugging). "0" uses all minibatches.
verbose=0      # verbose option
resume=        # Resume the training from snapshot

# feature configuration
do_delta=false

# Set this to somewhere where you want to put your data, or where
# someone else has already put it.  You'll want to change this
# if you're not on the CLSP grid.
datadir=${MAIN_ROOT}/dataset/

# bpemode (unigram or bpe)
nbpe=5000
bpemode=unigram

source ${MAIN_ROOT}/utils/parse_options.sh

# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -e
set -u
set -o pipefail

train_set=train_960
train_sp=train_sp
train_dev=dev
recog_set="test_clean test_other dev_clean dev_other"


mkdir -p data
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/librispeech/librispeech.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/librispeech" \
    --full_download="True"

    if [ $? -ne 0 ]; then
        echo "Prepare LibriSpeech failed. Terminated."
        exit 1
    fi

    for set in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
        mv data/manifest.${set} data/manifest.${set}.raw
    done

    rm -rf data/manifest.train.raw data/manifest.dev.raw  data/manifest.test.raw
    for set in train-clean-100 train-clean-360 train-other-500; do
        cat data/manifest.${set}.raw >> data/manifest.train.raw
    done

    for set in dev-clean dev-other; do
        cat data/manifest.${set}.raw >> data/manifest.dev.raw
    done

    for set in test-clean test-other; do
        cat data/manifest.${set}.raw >> data/manifest.test.raw
    done
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    ### Task dependent. You have to make data the following preparation part by yourself.
    ### But you can utilize Kaldi recipes in most cases
    echo "stage 0: Data preparation"
    for part in dev-clean test-clean dev-other test-other train-clean-100 train-clean-360 train-other-500; do
        # use underscore-separated names in data directories.
        local/data_prep.sh ${datadir}/librispeech/${part}/LibriSpeech/${part} data/${part//-/_}
    done
fi

feat_tr_dir=${dumpdir}/${train_set}/delta${do_delta}; mkdir -p ${feat_tr_dir}
feat_sp_dir=${dumpdir}/${train_sp}/delta${do_delta}; mkdir -p ${feat_sp_dir}
feat_dt_dir=${dumpdir}/${train_dev}/delta${do_delta}; mkdir -p ${feat_dt_dir}
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    ### Task dependent. You have to design training and dev sets by yourself.
    ### But you can utilize Kaldi recipes in most cases
    echo "stage 1: Feature Generation"
    fbankdir=fbank
    # Generate the fbank features; by default 80-dimensional fbanks with pitch on each frame
    for x in dev_clean test_clean dev_other test_other train_clean_100 train_clean_360 train_other_500; do
        steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj ${nj} --write_utt2num_frames true \
            data/${x} exp/make_fbank/${x} ${fbankdir}
        utils/fix_data_dir.sh data/${x}
    done

    utils/combine_data.sh --extra_files utt2num_frames data/${train_set}_org data/train_clean_100 data/train_clean_360 data/train_other_500
    utils/combine_data.sh --extra_files utt2num_frames data/${train_dev}_org data/dev_clean data/dev_other
    utils/perturb_data_dir_speed.sh 0.9  data/${train_set}_org  data/temp1
    utils/perturb_data_dir_speed.sh 1.0  data/${train_set}_org  data/temp2
    utils/perturb_data_dir_speed.sh 1.1  data/${train_set}_org  data/temp3

    utils/combine_data.sh --extra-files utt2uniq data/${train_sp}_org data/temp1 data/temp2 data/temp3

    # remove utt having more than 3000 frames
    # remove utt having more than 400 characters
    remove_longshortdata.sh --maxframes 3000 --maxchars 400 data/${train_set}_org data/${train_set}
    remove_longshortdata.sh --maxframes 3000 --maxchars 400 data/${train_sp}_org data/${train_sp}
    remove_longshortdata.sh --maxframes 3000 --maxchars 400 data/${train_dev}_org data/${train_dev}
    steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj $nj  --write_utt2num_frames true \
            data/train_sp  exp/make_fbank/train_sp  ${fbankdir}
    utils/fix_data_dir.sh data/train_sp
    # compute global CMVN
    compute-cmvn-stats scp:data/${train_sp}/feats.scp data/${train_sp}/cmvn.ark

    # dump features for training
    dump.sh --cmd "$train_cmd" --nj ${nj} --do_delta ${do_delta} \
        data/${train_sp}/feats.scp data/${train_sp}/cmvn.ark exp/dump_feats/train ${feat_sp_dir}
    dump.sh --cmd "$train_cmd" --nj ${nj} --do_delta ${do_delta} \
        data/${train_dev}/feats.scp data/${train_sp}/cmvn.ark exp/dump_feats/dev ${feat_dt_dir}
    for rtask in ${recog_set}; do
        feat_recog_dir=${dumpdir}/${rtask}/delta${do_delta}; mkdir -p ${feat_recog_dir}
        dump.sh --cmd "$train_cmd" --nj ${nj} --do_delta ${do_delta} \
            data/${rtask}/feats.scp data/${train_sp}/cmvn.ark exp/dump_feats/recog/${rtask} \
            ${feat_recog_dir}
    done
fi

dict=data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
bpemodel=data/lang_char/${train_set}_${bpemode}${nbpe}
echo "dictionary: ${dict}"
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    ### Task dependent. You have to check non-linguistic symbols used in the corpus.
    echo "stage 2: Dictionary and Json Data Preparation"
    mkdir -p data/lang_char/
    echo "<unk> 1" > ${dict} # <unk> must be 1, 0 will be used for "blank" in CTC
    cut -f 2- -d" " data/${train_set}/text > data/lang_char/input.txt
    spm_train --input=data/lang_char/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000
    spm_encode --model=${bpemodel}.model --output_format=piece < data/lang_char/input.txt | tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
    wc -l ${dict}

    # make json labels
    data2json.sh --nj ${nj} --feat ${feat_sp_dir}/feats.scp --bpecode ${bpemodel}.model \
        data/${train_sp} ${dict} > ${feat_sp_dir}/data_${bpemode}${nbpe}.json
    data2json.sh --nj ${nj} --feat ${feat_dt_dir}/feats.scp --bpecode ${bpemodel}.model \
        data/${train_dev} ${dict} > ${feat_dt_dir}/data_${bpemode}${nbpe}.json

    for rtask in ${recog_set}; do
        feat_recog_dir=${dumpdir}/${rtask}/delta${do_delta}
        data2json.sh --nj ${nj} --feat ${feat_recog_dir}/feats.scp --bpecode ${bpemodel}.model \
            data/${rtask} ${dict} > ${feat_recog_dir}/data_${bpemode}${nbpe}.json
    done
fi


if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # make json labels
    python3 local/espnet_json_to_manifest.py --json-file ${feat_sp_dir}/data_${bpemode}${nbpe}.json --manifest-file data/manifest.train
    python3 local/espnet_json_to_manifest.py --json-file ${feat_dt_dir}/data_${bpemode}${nbpe}.json --manifest-file data/manifest.dev

    for rtask in ${recog_set}; do
        feat_recog_dir=${dumpdir}/${rtask}/delta${do_delta}
        python3 local/espnet_json_to_manifest.py --json-file ${feat_recog_dir}/data_${bpemode}${nbpe}.json --manifest-file data/manifest.${rtask//_/-}
    done
fi

echo "LibriSpeech Data preparation done."
exit 0


================================================
FILE: examples/librispeech/asr2/local/data_prep.sh
================================================
#!/usr/bin/env bash

# Copyright 2014  Vassil Panayotov
#           2014  Johns Hopkins University (author: Daniel Povey)
# Apache 2.0

if [ "$#" -ne 2 ]; then
  echo "Usage: $0 <src-dir> <dst-dir>"
  echo "e.g.: $0 /export/a15/vpanayotov/data/LibriSpeech/dev-clean data/dev-clean"
  exit 1
fi

src=$1
dst=$2

# all utterances are FLAC compressed
if ! which flac >&/dev/null; then
   echo "Please install 'flac' on ALL worker nodes!"
   exit 1
fi

spk_file=$src/../SPEAKERS.TXT

mkdir -p $dst || exit 1

[ ! -d $src ] && echo "$0: no such directory $src" && exit 1
[ ! -f $spk_file ] && echo "$0: expected file $spk_file to exist" && exit 1


wav_scp=$dst/wav.scp; [[ -f "$wav_scp" ]] && rm $wav_scp
trans=$dst/text; [[ -f "$trans" ]] && rm $trans
utt2spk=$dst/utt2spk; [[ -f "$utt2spk" ]] && rm $utt2spk
spk2gender=$dst/spk2gender; [[ -f $spk2gender ]] && rm $spk2gender

for reader_dir in $(find -L $src -mindepth 1 -maxdepth 1 -type d | sort); do
  reader=$(basename $reader_dir)
  if ! [ $reader -eq $reader ]; then  # not integer.
    echo "$0: unexpected subdirectory name $reader"
    exit 1
  fi

  reader_gender=$(egrep "^$reader[ ]+\|" $spk_file | awk -F'|' '{gsub(/[ ]+/, ""); print tolower($2)}')
  if [ "$reader_gender" != 'm' ] && [ "$reader_gender" != 'f' ]; then
    echo "Unexpected gender: '$reader_gender'"
    exit 1
  fi

  for chapter_dir in $(find -L $reader_dir/ -mindepth 1 -maxdepth 1 -type d | sort); do
    chapter=$(basename $chapter_dir)
    if ! [ "$chapter" -eq "$chapter" ]; then
      echo "$0: unexpected chapter-subdirectory name $chapter"
      exit 1
    fi

    find -L $chapter_dir/ -iname "*.flac" | sort | xargs -I% basename % .flac | \
      awk -v "dir=$chapter_dir" '{printf "%s flac -c -d -s %s/%s.flac |\n", $0, dir, $0}' >>$wav_scp|| exit 1

    chapter_trans=$chapter_dir/${reader}-${chapter}.trans.txt
    [ ! -f  $chapter_trans ] && echo "$0: expected file $chapter_trans to exist" && exit 1
    cat $chapter_trans >>$trans

    # NOTE: For now we are using per-chapter utt2spk. That is each chapter is considered
    #       to be a different speaker. This is done for simplicity and because we want
    #       e.g. the CMVN to be calculated per-chapter
    awk -v "reader=$reader" -v "chapter=$chapter" '{printf "%s %s-%s\n", $1, reader, chapter}' \
      <$chapter_trans >>$utt2spk || exit 1

    # reader -> gender map (again using per-chapter granularity)
    echo "${reader}-${chapter} $reader_gender" >>$spk2gender
  done
done

spk2utt=$dst/spk2utt
utils/utt2spk_to_spk2utt.pl <$utt2spk >$spk2utt || exit 1

ntrans=$(wc -l <$trans)
nutt2spk=$(wc -l <$utt2spk)
! [ "$ntrans" -eq "$nutt2spk" ] && \
  echo "Inconsistent #transcripts($ntrans) and #utt2spk($nutt2spk)" && exit 1

utils/validate_data_dir.sh --no-feats $dst || exit 1

echo "$0: successfully prepared data in $dst"

exit 0


================================================
FILE: examples/librispeech/asr2/local/download_lm_en.sh
================================================
#!/bin/bash

. ${MAIN_ROOT}/utils/utility.sh

DIR=data/lm
mkdir -p ${DIR}

URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
MD5="099a601759d467cd0a8523ff939819c5"
TARGET=${DIR}/common_crawl_00.prune01111.trie.klm

echo "Download language model ..."
download $URL $MD5 $TARGET
if [ $? -ne 0 ]; then
    echo "Fail to download the language model!"
    exit 1
fi


exit 0


================================================
FILE: examples/librispeech/asr2/local/espnet_json_to_manifest.py
================================================
#!/usr/bin/env python
import argparse
import json


def main(args):
    with open(args.json_file, 'r') as fin:
        data_json = json.load(fin)

    # manifest format:
    # {"input": [
    #       {"feat": "dev/deltafalse/feats.1.ark:842920", "name": "input1", "shape": [349, 83]}
    #  ], 
    #  "output": [
    #       {"name": "target1", "shape": [12, 5002], "text": "NO APOLLO", "token": "▁NO ▁A PO LL O", "tokenid": "3144 482 352 269 317"}
    #  ], 
    #  "utt2spk": "116-288045", 
    #  "utt": "116-288045-0019"}
    with open(args.manifest_file, 'w') as fout:
        for key, value in data_json['utts'].items():
            value['utt'] = key
            fout.write(json.dumps(value, ensure_ascii=False))
            fout.write("\n")


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '--json-file', type=str, default=None, help="espnet data json file.")
    parser.add_argument(
        '--manifest-file',
        type=str,
        default='maniefst.train',
        help='manifest data json line file.')
    args = parser.parse_args()
    main(args)


================================================
FILE: examples/librispeech/asr2/local/export.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: $0 config_path ckpt_prefix jit_model_path"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3

python3 -u ${BIN_DIR}/test.py \
--model-name 'u2_kaldi' \
--run-mode 'export' \
--ngpu ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}


if [ $? -ne 0 ]; then
    echo "Failed in export!"
    exit 1
fi


exit 0


================================================
FILE: examples/librispeech/asr2/local/recog.sh
================================================
#!/bin/bash

set -e

expdir=exp
datadir=data
nj=32
tag=

# decode config
decode_config=conf/decode/decode.yaml

# lm params
rnnlm_config_path=conf/lm/transformer.yaml
lmexpdir=exp/lm
lang_model=rnnlm.pdparams
lmtag='transformer'

train_set=train_960
recog_set="test-clean test-other dev-clean dev-other"
recog_set="test-clean"

# bpemode (unigram or bpe)
nbpe=5000
bpemode=unigram
bpeprefix=data/lang_char/${train_set}_${bpemode}${nbpe}
bpemodel=${bpeprefix}.model

# bin params
config_path=conf/transformer.yaml
dict=data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
ckpt_prefix=

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

if [ -z ${ckpt_prefix} ]; then
    echo "usage: $0 --ckpt_prefix ckpt_prefix"
    exit 1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

ckpt_dir=$(dirname `dirname ${ckpt_prefix}`)
echo "ckpt dir: ${ckpt_dir}"

ckpt_tag=$(basename ${ckpt_prefix})
echo "ckpt tag: ${ckpt_tag}"

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi
echo "chunk mode: ${chunk_mode}"
echo "decode conf: ${decode_config}"
echo "lm conf: ${rnnlm_config_path}"
echo "lm model: ${lmexpdir}/${lang_model}"


# download language model
#bash local/download_lm_en.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi


# download rnnlm
mkdir -p ${lmexpdir}
if [ ! -f ${lmexpdir}/${lang_model} ]; then
    wget -c -O ${lmexpdir}/${lang_model} https://deepspeech.bj.bcebos.com/transformer_lm/transformerLM.pdparams
fi


pids=() # initialize pids

for dmethd in join_ctc; do
(
    echo "${dmethd} decoding"
    for rtask in ${recog_set}; do
    (
        echo "${rtask} dataset"
        decode_dir=${ckpt_dir}/decode/decode_${rtask/-/_}_${dmethd}_$(basename ${config_path%.*})_${lmtag}_${ckpt_tag}_${tag}
        feat_recog_dir=${datadir}
        mkdir -p ${decode_dir}
        mkdir -p ${feat_recog_dir}

        # split data
        split_json.sh manifest.${rtask} ${nj}

        #### use CPU for decoding
        ngpu=0

        # set batchsize 0 to disable batch decoding
        ${decode_cmd} JOB=1:${nj} ${decode_dir}/log/decode.JOB.log \
            python3 -u ${BIN_DIR}/recog.py \
                --api v2 \
                --config ${decode_config} \
                --ngpu ${ngpu} \
                --batchsize 0 \
                --checkpoint_path ${ckpt_prefix} \
                --dict-path ${dict} \
                --recog-json ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask} \
                --result-label ${decode_dir}/data.JOB.json \
                --model-conf ${config_path} \
                --model ${ckpt_prefix}.pdparams \
                --rnnlm-conf ${rnnlm_config_path} \
                --rnnlm ${lmexpdir}/${lang_model}

        score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel} --wer false ${decode_dir} ${dict}

    ) &
    pids+=($!) # store background pids
    i=0; for pid in "${pids[@]}"; do wait ${pid} || ((++i)); done
    [ ${i} -gt 0 ] && echo "$0: ${i} background jobs are failed." || true
    done
)
done

echo "Finished"

exit 0


================================================
FILE: examples/librispeech/asr2/local/test.sh
================================================
#!/bin/bash

set -e

expdir=exp
datadir=data
nj=32

lmtag='nolm'

train_set=train_960
recog_set="test-clean test-other dev-clean dev-other"
recog_set="test-clean"

# bpemode (unigram or bpe)
nbpe=5000
bpemode=unigram
bpeprefix=data/lang_char/${train_set}_${bpemode}${nbpe}
bpemodel=${bpeprefix}.model

config_path=conf/transformer.yaml
decode_config_path=conf/decode/decode_base.yaml
dict=data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
ckpt_prefix=

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

if [ -z ${ckpt_prefix} ]; then
    echo "usage: $0 --ckpt_prefix ckpt_prefix"
    exit 1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

ckpt_dir=$(dirname `dirname ${ckpt_prefix}`)
echo "ckpt dir: ${ckpt_dir}"

ckpt_tag=$(basename ${ckpt_prefix})
echo "ckpt tag: ${ckpt_tag}"

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi
echo "chunk mode: ${chunk_mode}"


# download language model
#bash local/download_lm_en.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

pids=() # initialize pids

for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do
(
    echo "decode method: ${dmethd}"
    for rtask in ${recog_set}; do
    (
        echo "dataset: ${rtask}"
        decode_dir=${ckpt_dir}/decode/decode_${rtask/-/_}_${dmethd}_$(basename ${config_path%.*})_${lmtag}_${ckpt_tag}
        feat_recog_dir=${datadir}
        mkdir -p ${decode_dir}
        mkdir -p ${feat_recog_dir}

        # split data
        split_json.sh manifest.${rtask} ${nj}

        #### use CPU for decoding
        ngpu=0

        # set batchsize 0 to disable batch decoding
        batch_size=1
        ${decode_cmd} JOB=1:${nj} ${decode_dir}/log/decode.JOB.log \
            python3 -u ${BIN_DIR}/test.py \
            --model-name u2_kaldi \
            --run-mode test \
            --ngpu ${ngpu} \
            --dict-path ${dict} \
            --config ${config_path} \
            --decode_cfg ${decode_config_path} \
            --checkpoint_path ${ckpt_prefix} \
            --result-file ${decode_dir}/data.JOB.json \
            --opts decode.decoding_method ${dmethd} \
            --opts decode.decode_batch_size ${batch_size} \
            --opts test_manifest ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask}

        score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel} --wer false ${decode_dir} ${dict}

    ) &
    pids+=($!) # store background pids
    i=0; for pid in "${pids[@]}"; do wait ${pid} || ((++i)); done
    [ ${i} -gt 0 ] && echo "$0: ${i} background jobs are failed." || true
    done
)
done

echo "Finished"

exit 0


================================================
FILE: examples/librispeech/asr2/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

# default memory allocator strategy may case gpu training hang
# for no OOM raised when memory exhausted
export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--model-name u2_kaldi \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--model-name u2_kaldi \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/librispeech/asr2/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/tools/sctk/bin:${MAIN_ROOT}/utils:${PWD}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


MODEL=u2_kaldi
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin

LM_MODEL=transformer
export LM_BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/lm/${LM_MODEL}/bin


# srilm
export LIBLBFGS=${MAIN_ROOT}/tools/liblbfgs-1.10
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${LIBLBFGS}/lib/.libs
export SRILM=${MAIN_ROOT}/tools/srilm
export PATH=${PATH}:${SRILM}/bin:${SRILM}/bin/i686-m64

# Kaldi
export KALDI_ROOT=${MAIN_ROOT}/tools/kaldi
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present, can not using Kaldi!"
[ -f $KALDI_ROOT/tools/config/common_path.sh ] && . $KALDI_ROOT/tools/config/common_path.sh


================================================
FILE: examples/librispeech/asr2/run.sh
================================================
#!/bin/bash

set -e

. ./path.sh || exit 1;
. ./cmd.sh || exit 1;

gpus=0,1,2,3,4,5,6,7
stage=0
stop_stage=50
conf_path=conf/transformer.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/decode/decode_base.yaml
dict_path=data/lang_char/train_960_unigram5000_units.txt
avg_num=10

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh latest exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # attetion resocre decoder
    ./local/test.sh ${conf_path} ${decode_conf_path} ${dict_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # join ctc decoder, use transformerlm to score
    ./local/recog.sh  --ckpt_prefix exp/${ckpt}/checkpoints/${avg_ckpt}
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # ctc alignment of test data
    CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} ${decode_conf_path} ${dict_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    ./local/cacu_perplexity.sh || exit -1
fi

if [ ${stage} -le 51 ] && [ ${stop_stage} -ge 51 ]; then
    # export ckpt avg_n
    ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi


================================================
FILE: examples/librispeech/asr3/README.md
================================================
# Wav2vec2ASR with Librispeech
This example contains code used to finetune [wav2vec2.0](https://https://arxiv.org/pdf/2006.11477.pdf) model with [Librispeech dataset](http://www.openslr.org/resources/12)
## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset<br>       (5) Download the pretrained wav2vec2 model |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Infer the single audio file                                  |


You can choose to run a range of stages by setting `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in `run.sh` in detail.
## The Environment Variables
The path.sh contains the environment variables. 
```bash
. ./path.sh
. ./cmd.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of stages you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`audio file` denotes the file path of the single file you want to infer in stage 5
`ckpt` denotes the checkpoint prefix of the model, e.g. "wav2vec2ASR"

You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line:
```bash
bash run.sh --gpus 0,1 --avg_num 20
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in `run.sh` to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
```
After processing the data, the `data` directory will look like this:
```bash
data/
|-- dev.meta
|-- lang_char
|   `-- bpe_unigram_5000.model
|   `-- bpe_unigram_5000.vocab
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
`-- train.meta
```

Stage 0 also downloads the pre-trained [wav2vec2](https://paddlespeech.cdn.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams) model.
```bash
mkdir -p exp/wav2vec2
wget -P exp/wav2vec2 https://paddlespeech.cdn.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/wav2vec2ASR.yaml wav2vec2ASR
```
## Stage 2: Top-k Models Averaging
After training the model, we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model. We can use stage 2 to do this, and the code is shown below. Note: We only train one epoch for wav2vec2ASR, thus the `avg_num` is set to 1.
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).

```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/wav2vec2ASR.yaml wav2vec2ASR
avg.sh best exp/wav2vec2ASR/checkpoints 1
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/wav2vec2ASR.yaml wav2vec2ASR
avg.sh best exp/wav2vec2ASR/checkpoints 1
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/wav2vec2ASR.yaml conf/tuning/decode.yaml exp/wav2vec2ASR/checkpoints/avg_1
```
## Pretrained Model
You can get the pretrained wav2vec2ASR from [this](../../../docs/source/released_model.md).

using the `tar` scripts to unpack the model and then you can use the script to test the model.

For example:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz
tar xzvf wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz
source path.sh
# If you have process the data and get the manifest file， you can skip the following 2 steps
bash local/data.sh --stage -1 --stop_stage -1
bash local/data.sh --stage 2 --stop_stage 2
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/wav2vec2ASR.yaml conf/tuning/decode.yaml exp/wav2vec2ASR/checkpoints/avg_1
```
The performance of the released models are shown in [here](./RESULTS.md).


## Stage 4: Single Audio File Inference
In some situations, you want to use the trained model to do the inference for the single audio file. You can use stage 5. The code is shown below
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # test a single .wav file
     CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
 fi
```
you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz
tar xzvf wav2vec2ASR-large-960h-librispeech_ckpt_1.3.0.model.tar.gz
```
You can download the audio demo:
```bash
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
```
You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
```bash
CUDA_VISIBLE_DEVICES= ./local/test_wav.sh conf/wav2vec2ASR.yaml conf/tuning/decode.yaml exp/wav2vec2ASR/checkpoints/avg_1 data/demo_002_en.wav
```


================================================
FILE: examples/librispeech/asr3/RESULTS.md
================================================
# LibriSpeech

## Wav2VecASR
train: Epoch 1, 1*V100-32G, batchsize: 6

| Model | Params | Config | Augmentation| Test set | Decode method | WER |  
| --- | --- | --- | --- | --- | --- | --- |
| wav2vec2ASR | 302.86 M | conf/wav2vec2ASR.yaml | spec_aug | test-clean | greedy search | 0.018906 |  


================================================
FILE: examples/librispeech/asr3/cmd.sh
================================================
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
#   --time <time>: Limit the maximum time to execute.
#   --mem <mem>: Limit the maximum memory usage.
#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
#   --num-threads <ngpu>: Specify the number of CPU core.
#   --gpu <ngpu>: Specify the number of GPU devices.
#   --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

    # The other usage
    export train_cmd="run.pl"
    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
    export cuda_cmd="run.pl"
    # Used for "*_recog.py"
    export decode_cmd="run.pl"

# "qsub" (SGE, Torque, PBS, etc.)
elif [ "${cmd_backend}" = sge ]; then
    # The default setting is written in conf/queue.conf.
    # You must change "-q g.q" for the "queue" for your environment.
    # To know the "queue" names, type "qhost -q"
    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

    export train_cmd="queue.pl"
    export cuda_cmd="queue.pl"
    export decode_cmd="queue.pl"

# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

    export train_cmd="slurm.pl"
    export cuda_cmd="slurm.pl"
    export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
    # You have to create ".queue/machines" to specify the host to execute jobs.
    # e.g. .queue/machines
    #   host1
    #   host2
    #   host3
    # Assuming you can login them without any password, i.e. You have to set ssh keys.

    export train_cmd="ssh.pl"
    export cuda_cmd="ssh.pl"
    export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

    export train_cmd="queue.pl --mem 2G"
    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
    export decode_cmd="queue.pl --mem 4G"

else
    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
    return 1
fi


================================================
FILE: examples/librispeech/asr3/conf/preprocess.yaml
================================================
process:
    # use raw audio
  - type: wav_process


================================================
FILE: examples/librispeech/asr3/conf/tuning/decode.yaml
================================================
decode_batch_size: 1
error_rate_type: wer
decoding_method: ctc_greedy_search  # 'ctc_greedy_search', 'ctc_prefix_beam_search'
beam_size: 10


================================================
FILE: examples/librispeech/asr3/conf/wav2vec2ASR.yaml
================================================
############################################
#          Network Architecture           #
############################################
freeze_wav2vec2: True
normalize_wav: True
output_norm: True
init_type: 'kaiming_uniform' # !Warning: need to convergence
enc:
  input_shape: 1024
  dnn_blocks: 2
  dnn_neurons: 1024
  activation: True
ctc:
  enc_n_units: 1024
  blank_id: 0
  dropout_rate: 0.0
wav2vec2_params_path: "exp/wav2vec2/wav2vec2-large-960h-lv60-self.pdparams"

############################################
#               Wav2Vec2.0                 #
############################################
hidden_size: 1024
num_hidden_layers: 24
num_attention_heads: 16
intermediate_size: 4096
hidden_act: "gelu"
hidden_dropout: 0.1
activation_dropout: 0.1
attention_dropout: 0.1
feat_proj_dropout: 0.1
feat_quantizer_dropout: 0.0
final_dropout: 0.1
layerdrop: 0.1
initializer_range: 0.02
layer_norm_eps: 1e-5
feat_extract_norm: "layer"
feat_extract_activation: "gelu"
conv_dim: [512, 512, 512, 512, 512, 512, 512]
conv_stride: [5, 2, 2, 2, 2, 2, 2]
conv_kernel: [10, 3, 3, 3, 3, 2, 2]
conv_bias: True
num_conv_pos_embeddings: 128
num_conv_pos_embedding_groups: 16
do_stable_layer_norm: True
apply_spec_augment: False
mask_time_prob: 0.05
mask_time_length: 10
mask_time_min_masks: 2
mask_feature_prob: 0.0
mask_feature_length: 10
mask_feature_min_masks: 0
num_codevectors_per_group: 320
num_codevector_groups: 2
contrastive_logits_temperature: 0.1
num_negatives: 100
codevector_dim: 256
proj_codevector_dim: 256
diversity_loss_weight: 0.1
ctc_loss_reduction: "sum"
ctc_zero_infinity: False
use_weighted_layer_sum: False
add_adapter: False
adapter_kernel_size: 3
adapter_stride: 2
num_adapter_layers: 3
output_hidden_size: None

###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test-clean

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'char'
mean_std_filepath: ""
preprocess_config: conf/preprocess.yaml
sortagrad: -1 # Feed samples from shortest to longest ; -1: enabled for all epochs 0: disabled other: enabled for 'other' epochs 
batch_size: 6  # Different batch_size may cause large differences in results
maxlen_in: 51200000000  # if input length  > maxlen-in batchsize is automatically reduced
maxlen_out: 1500000  # if output length > maxlen-out batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1
dist_sampler: True
shortest_first: True
return_lens_rate: True
  
############################################
#             Data Augmentation            #
############################################
audio_augment:  # for raw audio 
  sample_rate: 16000
  speeds: [95, 100, 105]

###########################################
#                 Training                #
###########################################
n_epoch: 1
accum_grad: 1
global_grad_clip: 5.0
model_optim: adadelta
model_optim_conf:
  lr: 0.9
  epsilon: 1.0e-6
  rho: 0.95
model_scheduler: constantlr    
model_scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
wav2vec2_optim: adadelta
wav2vec2_optim_conf:
  lr: 0.9
  epsilon: 1.0e-6
  rho: 0.95
wav2vec2_scheduler: constantlr    
wav2vec2_scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 1
checkpoint:
  kbest_n: 50
  latest_n: 5

================================================
FILE: examples/librispeech/asr3/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100

unit_type=char
dict_dir=data/lang_char

source ${MAIN_ROOT}/utils/parse_options.sh

mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/librispeech/librispeech.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/librispeech" \
    --full_download="True"

    if [ $? -ne 0 ]; then
        echo "Prepare LibriSpeech failed. Terminated."
        exit 1
    fi

    for set in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
        mv data/manifest.${set} data/manifest.${set}.raw
    done

    rm -rf data/manifest.train.raw data/manifest.dev.raw  data/manifest.test.raw
    for set in train-clean-100 train-clean-360 train-other-500; do
        cat data/manifest.${set}.raw >> data/manifest.train.raw
    done

    for set in dev-clean dev-other; do
        cat data/manifest.${set}.raw >> data/manifest.dev.raw
    done

    for set in test-clean test-other; do
        cat data/manifest.${set}.raw >> data/manifest.test.raw
    done
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --num_samples=2000 \
    --spectrum_type="fbank" \
    --feat_dim=161 \
    --delta_delta=false \
    --sample_rate=16000 \
    --stride_ms=10 \
    --window_ms=25 \
    --use_dB_normalization=False \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type ${unit_type} \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths="data/manifest.train.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for set in train dev test dev-clean dev-other test-clean test-other; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
        --cmvn_path "data/mean_std.json" \
        --unit_type ${unit_type} \
        --vocab_path="${dict_dir}/vocab.txt" \
        --manifest_path="data/manifest.${set}.raw" \
        --output_path="data/manifest.${set}"

        if [ $? -ne 0 ]; then
            echo "Formt mnaifest.${set} failed. Terminated."
            exit 1
        fi
    }&
    done
    wait
fi

echo "LibriSpeech Data preparation done."

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    mkdir -p exp/wav2vec2
    echo "Pretrained wav2vec2 model download"
    wget -P exp/wav2vec2 https://paddlespeech.cdn.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams
fi

exit 0

================================================
FILE: examples/librispeech/asr3/local/test.sh
================================================
#!/bin/bash

set -e

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

expdir=exp
datadir=data

train_set=train_960
recog_set="test-clean test-other dev-clean dev-other"
recog_set="test-clean"

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

# download language model
#bash local/download_lm_en.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

python3 ${MAIN_ROOT}/utils/format_rsl.py \
    --origin_ref data/manifest.test-clean.raw \
    --trans_ref data/manifest.test-clean.text


for type in ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=16
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.${type}.rsl \
        --trans_hyp ${ckpt_prefix}.${type}.rsl.text

    python3 ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
        data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
    echo "decoding ${type} done."
done

for type in ctc_prefix_beam_search; do
    echo "decoding ${type}"
    batch_size=1
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.${type}.rsl \
        --trans_hyp ${ckpt_prefix}.${type}.rsl.text

    python3 ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
        data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
    echo "decoding ${type} done."
done

echo "Finished"

exit 0


================================================
FILE: examples/librispeech/asr3/local/test_wav.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_file=$4

mkdir -p data
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_file} ]; then
    echo "Plase input the right audio_file path"
    exit 1
fi

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

for type in ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test_wav.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size} \
    --audio_file ${audio_file}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done
exit 0


================================================
FILE: examples/librispeech/asr3/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
resume=$3
ips=$4

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# seed may break model convergence
seed=1988
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

# export FLAGS_cudnn_exhaustive_search=true
# export FLAGS_conv_workspace_size_limit=4000
export FLAGS_allocator_strategy=naive_best_fit
if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed} \
--resume ${resume}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed} \
--resume ${resume}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/librispeech/asr3/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/tools/sctk/bin:${PWD}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/wav2vec2/bin


================================================
FILE: examples/librispeech/asr3/run.sh
================================================
#!/bin/bash
set -e

. ./path.sh || exit 1;
. ./cmd.sh || exit 1;

gpus=0
stage=0
stop_stage=4
conf_path=conf/wav2vec2ASR.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=1
resume=         # xx e.g. 30

. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

audio_file=data/demo_002_en.wav

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${resume} ${ips} 
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # greedy search decoder
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # test a single .wav file
    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi

================================================
FILE: examples/librispeech/asr4/README.md
================================================
# Hubert2ASR with Librispeech
This example contains code used to finetune [hubert](https://arxiv.org/abs/2106.07447) model with [Librispeech dataset](http://www.openslr.org/resources/12)
## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset<br>       (5) Download the pretrained wav2vec2 model |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Infer the single audio file                                  |


You can choose to run a range of stages by setting `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in `run.sh` in detail.
## The Environment Variables
The path.sh contains the environment variables. 
```bash
. ./path.sh
. ./cmd.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of stages you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`audio file` denotes the file path of the single file you want to infer in stage 5
`ckpt` denotes the checkpoint prefix of the model, e.g. "hubertASR"

You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line:
```bash
bash run.sh --gpus 0,1 --avg_num 20
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in `run.sh` to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
```
After processing the data, the `data` directory will look like this:
```bash
data/
|-- dev.meta
|-- lang_char
|   `-- bpe_unigram_5000.model
|   `-- bpe_unigram_5000.vocab
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
`-- train.meta
```

Stage 0 also downloads the pre-trained [hubert](https://paddlespeech.cdn.bcebos.com/hubert/hubert-large-lv60.pdparams) model.
```bash
mkdir -p exp/hubert
wget -P exp/hubert https://paddlespeech.cdn.bcebos.com/hubert/hubert-large-lv60.pdparams
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/hubertASR.yaml hubertASR
```
## Stage 2: Top-k Models Averaging
After training the model, we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model. We can use stage 2 to do this, and the code is shown below. Note: We only train one epoch for hubertASR, thus the `avg_num` is set to 1.
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).

```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/hubertASR.yaml hubertASR
avg.sh best exp/hubertASR/checkpoints 1
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/hubertASR.yaml hubertASR
avg.sh best exp/hubertASR/checkpoints 1
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/hubertASR.yaml conf/tuning/decode.yaml exp/hubertASR/checkpoints/avg_1
```
## Pretrained Model
You can get the pretrained hubertASR from [this](../../../docs/source/released_model.md).

using the `tar` scripts to unpack the model and then you can use the script to test the model.

For example:
```bash
wget https://paddlespeech.cdn.bcebos.com/hubert/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz
tar xzvf hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz
source path.sh
# If you have process the data and get the manifest file， you can skip the following 2 steps
bash local/data.sh --stage -1 --stop_stage -1
bash local/data.sh --stage 2 --stop_stage 2
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/hubertASR.yaml conf/tuning/decode.yaml exp/hubertASR/checkpoints/avg_1
```
The performance of the released models are shown in [here](./RESULTS.md).


## Stage 4: Single Audio File Inference
In some situations, you want to use the trained model to do the inference for the single audio file. You can use stage 5. The code is shown below
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # test a single .wav file
     CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
 fi
```
you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
```bash
wget https://paddlespeech.cdn.bcebos.com/hubert/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz
tar xzvf hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz
```
You can download the audio demo:
```bash
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
```
You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
```bash
CUDA_VISIBLE_DEVICES= ./local/test_wav.sh conf/hubertASR.yaml conf/tuning/decode.yaml exp/hubertASR/checkpoints/avg_1 data/demo_002_en.wav
```


================================================
FILE: examples/librispeech/asr4/RESULTS.md
================================================
# LibriSpeech

## hubertASR
Fintuning on train-clean-100
train: Epoch 3, 1*V100-32G, batchsize: 4, accum_grad: 8

| Model | Params | Config | Augmentation| Test set | Decode method | WER |  
| --- | --- | --- | --- | --- | --- | --- |
| hubertASR | 326.16M | conf/hubertASR.yaml | spec_aug | test-clean | greedy search | 0.05868 |  


================================================
FILE: examples/librispeech/asr4/cmd.sh
================================================
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
#   --time <time>: Limit the maximum time to execute.
#   --mem <mem>: Limit the maximum memory usage.
#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
#   --num-threads <ngpu>: Specify the number of CPU core.
#   --gpu <ngpu>: Specify the number of GPU devices.
#   --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

    # The other usage
    export train_cmd="run.pl"
    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
    export cuda_cmd="run.pl"
    # Used for "*_recog.py"
    export decode_cmd="run.pl"

# "qsub" (SGE, Torque, PBS, etc.)
elif [ "${cmd_backend}" = sge ]; then
    # The default setting is written in conf/queue.conf.
    # You must change "-q g.q" for the "queue" for your environment.
    # To know the "queue" names, type "qhost -q"
    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

    export train_cmd="queue.pl"
    export cuda_cmd="queue.pl"
    export decode_cmd="queue.pl"

# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

    export train_cmd="slurm.pl"
    export cuda_cmd="slurm.pl"
    export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
    # You have to create ".queue/machines" to specify the host to execute jobs.
    # e.g. .queue/machines
    #   host1
    #   host2
    #   host3
    # Assuming you can login them without any password, i.e. You have to set ssh keys.

    export train_cmd="ssh.pl"
    export cuda_cmd="ssh.pl"
    export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

    export train_cmd="queue.pl --mem 2G"
    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
    export decode_cmd="queue.pl --mem 4G"

else
    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
    return 1
fi


================================================
FILE: examples/librispeech/asr4/conf/config.json
================================================
{
  "_name_or_path": "facebook/hubert-large-ll60k",
  "activation_dropout": 0.0,
  "apply_spec_augment": true,
  "architectures": [
    "HubertModel"
  ],
  "attention_dropout": 0.1,
  "bos_token_id": 1,
  "conv_bias": true,
  "conv_dim": [
    512,
    512,
    512,
    512,
    512,
    512,
    512
  ],
  "conv_kernel": [
    10,
    3,
    3,
    3,
    3,
    2,
    2
  ],
  "conv_stride": [
    5,
    2,
    2,
    2,
    2,
    2,
    2
  ],
  "ctc_loss_reduction": "sum",
  "ctc_zero_infinity": false,
  "do_stable_layer_norm": true,
  "eos_token_id": 2,
  "feat_extract_activation": "gelu",
  "feat_extract_dropout": 0.0,
  "feat_extract_norm": "layer",
  "feat_proj_dropout": 0.1,
  "final_dropout": 0.0,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "layerdrop": 0.1,
  "mask_channel_length": 10,
  "mask_channel_min_space": 1,
  "mask_channel_other": 0.0,
  "mask_channel_prob": 0.0,
  "mask_channel_selection": "static",
  "mask_feature_length": 10,
  "mask_feature_prob": 0.0,
  "mask_time_length": 10,
  "mask_time_min_space": 1,
  "mask_time_other": 0.0,
  "mask_time_prob": 0.075,
  "mask_time_selection": "static",
  "model_type": "hubert",
  "num_attention_heads": 16,
  "num_conv_pos_embedding_groups": 16,
  "num_conv_pos_embeddings": 128,
  "num_feat_extract_layers": 7,
  "num_hidden_layers": 24,
  "pad_token_id": 0,
  "transformers_version": "4.10.0.dev0",
  "vocab_size": 32,
  "tokenizer_class": "Wav2Vec2CTCTokenizer"
}


================================================
FILE: examples/librispeech/asr4/conf/hubertASR.yaml
================================================
############################################
#          Network Architecture           #
############################################
freeze_hubert: False
normalize_wav: True
output_norm: True
init_type: kaiming_uniform # !Warning: need to convergence
enc:
  input_shape: 1024
  dnn_blocks: 2
  dnn_neurons: 1024
  activation: True
ctc:
  enc_n_units: 1024
  blank_id: 0
  dropout_rate: 0.0
hubert_params_path: "exp/hubert/hubert-large-lv60.pdparams"


task_cfg:
  label_rate: 50.0
  sample_rate: 16000
  normalize: True
  enable_padding: False
  max_keep_size: None
  max_sample_size: 250000
  min_sample_size: 32000
  single_target: False
  random_crop: True
  pad_audio: False

model_cfg:
  dropout_input: 0.0
  final_dropout: 0.0
  dropout: 0.0
  attention_dropout: 0.0
  activation_dropout: 0.1
  apply_mask: True
  mask_length: 10
  mask_prob: 0.5
  mask_selection: static
  mask_other: 0.0
  no_mask_overlap: False
  mask_channel_length: 64
  mask_channel_prob: 0.25
  mask_channel_selection: static
  mask_channel_other: 0.0
  no_mask_channel_overlap: False
  feature_grad_mult: 0.0
  layerdrop: 0.1
  normalize: True
  fp16: True
  label_rate: 50
  extractor_mode: layer_norm
  encoder_layers: 24
  encoder_embed_dim: 1024
  encoder_ffn_embed_dim: 4096
  encoder_attention_heads: 16
  activation_fn: gelu
  encoder_layerdrop: 0.1
  dropout_features: 0.0
  final_dim: 768
  untie_final_proj: True
  layer_norm_first: True
  conv_feature_layers: "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2"
  conv_bias: False
  logit_temp: 0.1
  target_glu: False
  mask_min_space: 1
  mask_channel_min_space: 1
  conv_pos: 128
  conv_pos_groups: 16
  latent_temp: [2.0, 0.5, 0.999995]
  skip_masked: False
  skip_nomask: True

###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train-clean-100
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test-clean

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
unit_type: char
mean_std_filepath: ""
preprocess_config: conf/preprocess.yaml
sortagrad: -1 # Feed samples from shortest to longest ; -1: enabled for all epochs 0: disabled other: enabled for other epochs 
batch_size: 4  # Different batch_size may cause large differences in results
maxlen_in: 1500  # if input length  > maxlen-in batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1
dist_sampler: True
shortest_first: True
return_lens_rate: True
  
############################################
#             Data Augmentation            #
############################################
audio_augment:  # for raw audio 
  sample_rate: 16000
  speeds: [95, 100, 105]

###########################################
#                 Training                #
###########################################
n_epoch: 3
accum_grad: 8
global_grad_clip: 5.0
model_optim: adadelta
model_optim_conf:
  lr: 1.0
  epsilon: 1.0e-6
  rho: 0.95
model_scheduler: constantlr    
model_scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
hubert_optim: adadelta
hubert_optim_conf:
  lr: 0.95
  epsilon: 1.0e-6
  rho: 0.95
hubert_scheduler: constantlr    
hubert_scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 1
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/librispeech/asr4/conf/preprocess.yaml
================================================
process:
    # use raw audio
  - type: wav_process


================================================
FILE: examples/librispeech/asr4/conf/preprocessor_config.json
================================================
{
  "do_normalize": true,
  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
  "feature_size": 1,
  "padding_side": "right",
  "padding_value": 0,
  "return_attention_mask": true,
  "sampling_rate": 16000
}


================================================
FILE: examples/librispeech/asr4/conf/tuning/decode.yaml
================================================
decode_batch_size: 1
error_rate_type: wer
decoding_method: ctc_greedy_search  # 'ctc_greedy_search', 'ctc_prefix_beam_search'
beam_size: 10


================================================
FILE: examples/librispeech/asr4/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100

unit_type=char
dict_dir=data/lang_char

source ${MAIN_ROOT}/utils/parse_options.sh

mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/librispeech/librispeech.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/librispeech" \
    --full_download="True"

    if [ $? -ne 0 ]; then
        echo "Prepare LibriSpeech failed. Terminated."
        exit 1
    fi

    for set in train-clean-100 train-clean-360 train-other-500 dev-clean dev-other test-clean test-other; do
        mv data/manifest.${set} data/manifest.${set}.raw
    done

    rm -rf data/manifest.train.raw data/manifest.dev.raw  data/manifest.test.raw
    for set in train-clean-100 train-clean-360 train-other-500; do
        cat data/manifest.${set}.raw >> data/manifest.train.raw
    done

    for set in dev-clean dev-other; do
        cat data/manifest.${set}.raw >> data/manifest.dev.raw
    done

    for set in test-clean test-other; do
        cat data/manifest.${set}.raw >> data/manifest.test.raw
    done
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --num_samples=2000 \
    --spectrum_type="fbank" \
    --feat_dim=161 \
    --delta_delta=false \
    --sample_rate=16000 \
    --stride_ms=10 \
    --window_ms=25 \
    --use_dB_normalization=False \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type ${unit_type} \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths="data/manifest.train.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for set in train dev test dev-clean dev-other test-clean test-other; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
        --cmvn_path "data/mean_std.json" \
        --unit_type ${unit_type} \
        --vocab_path="${dict_dir}/vocab.txt" \
        --manifest_path="data/manifest.${set}.raw" \
        --output_path="data/manifest.${set}"

        if [ $? -ne 0 ]; then
            echo "Formt mnaifest.${set} failed. Terminated."
            exit 1
        fi
    }&
    done
    wait
fi

echo "LibriSpeech Data preparation done."

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    mkdir -p exp/hubert
    echo "Pretrained hubert model download"
    wget -P exp/hubert https://paddlespeech.cdn.bcebos.com/hubert/hubert-large-lv60.pdparams
fi

exit 0

================================================
FILE: examples/librispeech/asr4/local/test.sh
================================================
#!/bin/bash

set -e

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

expdir=exp
datadir=data

recog_set="test-clean test-other dev-clean dev-other"
recog_set="test-clean"

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

# download language model
#bash local/download_lm_en.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

python3 ${MAIN_ROOT}/utils/format_rsl.py \
    --origin_ref data/manifest.test-clean.raw \
    --trans_ref data/manifest.test-clean.text


for type in ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=16
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.${type}.rsl \
        --trans_hyp ${ckpt_prefix}.${type}.rsl.text

    python3 ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
        data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
    echo "decoding ${type} done."
done

for type in ctc_prefix_beam_search; do
    echo "decoding ${type}"
    batch_size=1
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.${type}.rsl \
        --trans_hyp ${ckpt_prefix}.${type}.rsl.text

    python3 ${MAIN_ROOT}/utils/compute-wer.py --char=1 --v=1 \
        data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
    echo "decoding ${type} done."
done

echo "Finished"

exit 0


================================================
FILE: examples/librispeech/asr4/local/test_wav.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_file=$4

mkdir -p data
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_file} ]; then
    echo "Plase input the right audio_file path"
    exit 1
fi

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

for type in ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test_wav.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size} \
    --audio_file ${audio_file}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done
exit 0


================================================
FILE: examples/librispeech/asr4/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
resume=$3
ips=$4

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# seed may break model convergence
seed=1988
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

# export FLAGS_cudnn_exhaustive_search=true
# export FLAGS_conv_workspace_size_limit=4000
export FLAGS_allocator_strategy=naive_best_fit
if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed} \
--resume ${resume}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed} \
--resume ${resume}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/librispeech/asr4/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/tools/sctk/bin:${PWD}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/hubert/bin


================================================
FILE: examples/librispeech/asr4/run.sh
================================================
#!/bin/bash
set -e

. ./path.sh || exit 1;
. ./cmd.sh || exit 1;

gpus=0
stage=0
stop_stage=0
conf_path=conf/hubertASR.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=1
resume=         # xx e.g. 30

. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

audio_file=data/demo_002_en.wav

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${resume} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # greedy search decoder
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # test a single .wav file
    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi


================================================
FILE: examples/librispeech/asr5/README.md
================================================
# WavLM2ASR with Librispeech
This example contains code used to finetune [WavLM](https://arxiv.org/abs/2110.13900) model with [Librispeech dataset](http://www.openslr.org/resources/12)
## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset<br>       (5) Download the pretrained wav2vec2 model |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Infer the single audio file                                  |


You can choose to run a range of stages by setting `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in `run.sh` in detail.
## The Environment Variables
The path.sh contains the environment variables. 
```bash
. ./path.sh
. ./cmd.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of stages you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`audio file` denotes the file path of the single file you want to infer in stage 5
`ckpt` denotes the checkpoint prefix of the model, e.g. "WavLMASR"

You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line:
```bash
bash run.sh --gpus 0,1 --avg_num 20
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in `run.sh` to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
```
After processing the data, the `data` directory will look like this:
```bash
data/
|-- dev.meta
|-- lang_char
|   `-- bpe_unigram_5000.model
|   `-- bpe_unigram_5000.vocab
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
`-- train.meta
```

Stage 0 also downloads the pre-trained [wavlm](https://paddlespeech.cdn.bcebos.com/wavlm/wavlm-base-plus.pdparams) model.
```bash
mkdir -p exp/wavlm
wget -P exp/wavlm https://paddlespeech.cdn.bcebos.com/wavlm/wavlm-base-plus.pdparams
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/wavlmASR.yaml wavlmASR
```
## Stage 2: Top-k Models Averaging
After training the model, we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model. We can use stage 2 to do this, and the code is shown below. Note: We only train one epoch for wavlmASR, thus the `avg_num` is set to 1.
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).

```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/wavlmASR.yaml wavlmASR
avg.sh best exp/wavlmASR/checkpoints 1
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/wavlmASR.yaml wavlmASR
avg.sh best exp/wavlmASR/checkpoints 1
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/wavlmASR.yaml conf/tuning/decode.yaml exp/wavlmASR/checkpoints/avg_1
```
## Pretrained Model
You can get the pretrained wavlmASR from [this](../../../docs/source/released_model.md).

using the `tar` scripts to unpack the model and then you can use the script to test the model.

For example:
```bash
wget https://paddlespeech.cdn.bcebos.com/wavlm/wavlmASR-base-100h-librispeech_ckpt_1.4.0.model.tar.gz
tar xzvf wavlmASR-base-100h-librispeech_ckpt_1.4.0.model.tar.gz
source path.sh
# If you have process the data and get the manifest file， you can skip the following 2 steps
bash local/data.sh --stage -1 --stop_stage -1
bash local/data.sh --stage 2 --stop_stage 2
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/wavlmASR.yaml conf/tuning/decode.yaml exp/wavlmASR/checkpoints/avg_1
```
The performance of the released models are shown in [here](./RESULTS.md).


## Stage 4: Single Audio File Inference
In some situations, you want to use the trained model to do the inference for the single audio file. You can use stage 5. The code is shown below
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # test a single .wav file
     CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
 fi
```
you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
```bash
wget https://paddlespeech.cdn.bcebos.com/wavlm/wavlm_baseplus_libriclean_100h.tar.gz
tar xzvf wavlm_baseplus_libriclean_100h.tar.gz
```
You can download the audio demo:
```bash
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
```
You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
```bash
CUDA_VISIBLE_DEVICES= ./local/test_wav.sh conf/wavlmASR.yaml conf/tuning/decode.yaml exp/wavlmASR/checkpoints/avg_1 data/demo_002_en.wav
```


================================================
FILE: examples/librispeech/asr5/RESULTS.md
================================================
# LibriSpeech

## WavLMASR
Fintuning on train-clean-100
train: Epoch 16, 4*A800-80G, batchsize: 16, accum_grad: 8

| Model | Params | Config | Augmentation| Test set | Decode method | WER |  
| --- | --- | --- | --- | --- | --- | --- |
| WavLMASR | 326.16M | conf/wavlmasr.yaml | spec_aug | test-clean | greedy search | 0.0561 |  


================================================
FILE: examples/librispeech/asr5/avg.sh
================================================
#! /usr/bin/env bash

if [ $# != 3 ]; then
    echo "usage: ${0} [best|latest] ckpt_dir avg_num"
    exit -1
fi

avg_mode=${1} # best,latest
ckpt_dir=${2}
average_num=${3}
decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams

if [ $avg_mode == best ];then
    # best
    python avg_model.py \
    --dst_model ${decode_checkpoint} \
    --ckpt_dir ${ckpt_dir}  \
    --num ${average_num} \
    --val_best
else
    # latest
    python avg_model.py \
    --dst_model ${decode_checkpoint} \
    --ckpt_dir ${ckpt_dir}  \
    --num ${average_num}
fi

if [ $? -ne 0 ]; then
    echo "Failed in avg ckpt!"
    exit 1
fi

exit 0


================================================
FILE: examples/librispeech/asr5/cmd.sh
================================================
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
#   --time <time>: Limit the maximum time to execute.
#   --mem <mem>: Limit the maximum memory usage.
#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
#   --num-threads <ngpu>: Specify the number of CPU core.
#   --gpu <ngpu>: Specify the number of GPU devices.
#   --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

    # The other usage
    export train_cmd="run.pl"
    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
    export cuda_cmd="run.pl"
    # Used for "*_recog.py"
    export decode_cmd="run.pl"

# "qsub" (SGE, Torque, PBS, etc.)
elif [ "${cmd_backend}" = sge ]; then
    # The default setting is written in conf/queue.conf.
    # You must change "-q g.q" for the "queue" for your environment.
    # To know the "queue" names, type "qhost -q"
    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

    export train_cmd="queue.pl"
    export cuda_cmd="queue.pl"
    export decode_cmd="queue.pl"

# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

    export train_cmd="slurm.pl"
    export cuda_cmd="slurm.pl"
    export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
    # You have to create ".queue/machines" to specify the host to execute jobs.
    # e.g. .queue/machines
    #   host1
    #   host2
    #   host3
    # Assuming you can login them without any password, i.e. You have to set ssh keys.

    export train_cmd="ssh.pl"
    export cuda_cmd="ssh.pl"
    export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

    export train_cmd="queue.pl --mem 2G"
    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
    export decode_cmd="queue.pl --mem 4G"

else
    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
    return 1
fi


================================================
FILE: examples/librispeech/asr5/compute_wer.py
================================================
# Copyright 2021 Mobvoi Inc. All Rights Reserved.
# flake8: noqa
import codecs
import re
import sys
import unicodedata

remove_tag = True
spacelist = [' ', '\t', '\r', '\n']
puncts = [
    '!', ',', '?', '、', '。', '！', '，', '；', '？', '：', '「', '」', '︰', '『', '』',
    '《', '》'
]


def characterize(string):
    res = []
    i = 0
    while i < len(string):
        char = string[i]
        if char in puncts:
            i += 1
            continue
        cat1 = unicodedata.category(char)
        #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
            i += 1
            continue
        if cat1 == 'Lo':  # letter-other
            res.append(char)
            i += 1
        else:
            # some input looks like: <unk><noise>, we want to separate it to two words.
            sep = ' '
            if char == '<': sep = '>'
            j = i + 1
            while j < len(string):
                c = string[j]
                if ord(c) >= 128 or (c in spacelist) or (c == sep):
                    break
                j += 1
            if j < len(string) and string[j] == '>':
                j += 1
            res.append(string[i:j])
            i = j
    return res


def stripoff_tags(x):
    if not x: return ''
    chars = []
    i = 0
    T = len(x)
    while i < T:
        if x[i] == '<':
            while i < T and x[i] != '>':
                i += 1
            i += 1
        else:
            chars.append(x[i])
            i += 1
    return ''.join(chars)


def normalize(sentence, ignore_words, cs, split=None):
    """ sentence, ignore_words are both in unicode
    """
    new_sentence = []
    for token in sentence:
        x = token
        if not cs:
            x = x.upper()
        if x in ignore_words:
            continue
        if remove_tag:
            x = stripoff_tags(x)
        if not x:
            continue
        if split and x in split:
            new_sentence += split[x]
        else:
            new_sentence.append(x)
    return new_sentence


class Calculator:
    def __init__(self):
        self.data = {}
        self.space = []
        self.cost = {}
        self.cost['cor'] = 0
        self.cost['sub'] = 1
        self.cost['del'] = 1
        self.cost['ins'] = 1

    def calculate(self, lab, rec):
        # Initialization
        lab.insert(0, '')
        rec.insert(0, '')
        while len(self.space) < len(lab):
            self.space.append([])
        for row in self.space:
            for element in row:
                element['dist'] = 0
                element['error'] = 'non'
            while len(row) < len(rec):
                row.append({'dist': 0, 'error': 'non'})
        for i in range(len(lab)):
            self.space[i][0]['dist'] = i
            self.space[i][0]['error'] = 'del'
        for j in range(len(rec)):
            self.space[0][j]['dist'] = j
            self.space[0][j]['error'] = 'ins'
        self.space[0][0]['error'] = 'non'
        for token in lab:
            if token not in self.data and len(token) > 0:
                self.data[token] = {
                    'all': 0,
                    'cor': 0,
                    'sub': 0,
                    'ins': 0,
                    'del': 0
                }
        for token in rec:
            if token not in self.data and len(token) > 0:
                self.data[token] = {
                    'all': 0,
                    'cor': 0,
                    'sub': 0,
                    'ins': 0,
                    'del': 0
                }
        # Computing edit distance
        for i, lab_token in enumerate(lab):
            for j, rec_token in enumerate(rec):
                if i == 0 or j == 0:
                    continue
                min_dist = sys.maxsize
                min_error = 'none'
                dist = self.space[i - 1][j]['dist'] + self.cost['del']
                error = 'del'
                if dist < min_dist:
                    min_dist = dist
                    min_error = error
                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
                error = 'ins'
                if dist < min_dist:
                    min_dist = dist
                    min_error = error
                if lab_token == rec_token:
                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
                    error = 'cor'
                else:
                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
                    error = 'sub'
                if dist < min_dist:
                    min_dist = dist
                    min_error = error
                self.space[i][j]['dist'] = min_dist
                self.space[i][j]['error'] = min_error
        # Tracing back
        result = {
            'lab': [],
            'rec': [],
            'all': 0,
            'cor': 0,
            'sub': 0,
            'ins': 0,
            'del': 0
        }
        i = len(lab) - 1
        j = len(rec) - 1
        while True:
            if self.space[i][j]['error'] == 'cor':  # correct
                if len(lab[i]) > 0:
                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
                    result['all'] = result['all'] + 1
                    result['cor'] = result['cor'] + 1
                result['lab'].insert(0, lab[i])
                result['rec'].insert(0, rec[j])
                i = i - 1
                j = j - 1
            elif self.space[i][j]['error'] == 'sub':  # substitution
                if len(lab[i]) > 0:
                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
                    result['all'] = result['all'] + 1
                    result['sub'] = result['sub'] + 1
                result['lab'].insert(0, lab[i])
                result['rec'].insert(0, rec[j])
                i = i - 1
                j = j - 1
            elif self.space[i][j]['error'] == 'del':  # deletion
                if len(lab[i]) > 0:
                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
                    result['all'] = result['all'] + 1
                    result['del'] = result['del'] + 1
                result['lab'].insert(0, lab[i])
                result['rec'].insert(0, "")
                i = i - 1
            elif self.space[i][j]['error'] == 'ins':  # insertion
                if len(rec[j]) > 0:
                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
                    result['ins'] = result['ins'] + 1
                result['lab'].insert(0, "")
                result['rec'].insert(0, rec[j])
                j = j - 1
            elif self.space[i][j]['error'] == 'non':  # starting point
                break
            else:  # shouldn't reach here
                print(
                    'this should not happen , i = {i} , j = {j} , error = {error}'.
                    format(i=i, j=j, error=self.space[i][j]['error']))
        return result

    def overall(self):
        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
        for token in self.data:
            result['all'] = result['all'] + self.data[token]['all']
            result['cor'] = result['cor'] + self.data[token]['cor']
            result['sub'] = result['sub'] + self.data[token]['sub']
            result['ins'] = result['ins'] + self.data[token]['ins']
            result['del'] = result['del'] + self.data[token]['del']
        return result

    def cluster(self, data):
        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
        for token in data:
            if token in self.data:
                result['all'] = result['all'] + self.data[token]['all']
                result['cor'] = result['cor'] + self.data[token]['cor']
                result['sub'] = result['sub'] + self.data[token]['sub']
                result['ins'] = result['ins'] + self.data[token]['ins']
                result['del'] = result['del'] + self.data[token]['del']
        return result

    def keys(self):
        return list(self.data.keys())


def width(string):
    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)


def default_cluster(word):
    unicode_names = [unicodedata.name(char) for char in word]
    for i in reversed(range(len(unicode_names))):
        if unicode_names[i].startswith('DIGIT'):  # 1
            unicode_names[i] = 'Number'  # 'DIGIT'
        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')):
            # 明 / 郎
            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
              unicode_names[i].startswith('LATIN SMALL LETTER')):
            # A / a
            unicode_names[i] = 'English'  # 'LATIN LETTER'
        elif unicode_names[i].startswith('HIRAGANA LETTER'):  # は こ め
            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
        elif (unicode_names[i].startswith('AMPERSAND') or
              unicode_names[i].startswith('APOSTROPHE') or
              unicode_names[i].startswith('COMMERCIAL AT') or
              unicode_names[i].startswith('DEGREE CELSIUS') or
              unicode_names[i].startswith('EQUALS SIGN') or
              unicode_names[i].startswith('FULL STOP') or
              unicode_names[i].startswith('HYPHEN-MINUS') or
              unicode_names[i].startswith('LOW LINE') or
              unicode_names[i].startswith('NUMBER SIGN') or
              unicode_names[i].startswith('PLUS SIGN') or
              unicode_names[i].startswith('SEMICOLON')):
            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
            del unicode_names[i]
        else:
            return 'Other'
    if len(unicode_names) == 0:
        return 'Other'
    if len(unicode_names) == 1:
        return unicode_names[0]
    for i in range(len(unicode_names) - 1):
        if unicode_names[i] != unicode_names[i + 1]:
            return 'Other'
    return unicode_names[0]


def usage():
    print(
        "compute-wer.py : compute word error rate (WER) and align recognition results and references."
    )
    print(
        "         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer"
    )


def main():
    # python utils/compute-wer.py --char=1 --v=1 ref hyp > rsl.error
    if len(sys.argv) == 1:
        usage()
        sys.exit(0)
    calculator = Calculator()
    cluster_file = ''
    ignore_words = set()
    tochar = False
    verbose = 1
    padding_symbol = ' '
    case_sensitive = False
    max_words_per_line = sys.maxsize
    split = None
    while len(sys.argv) > 3:
        a = '--maxw='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):]
            del sys.argv[1]
            max_words_per_line = int(b)
            continue
        a = '--rt='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):].lower()
            del sys.argv[1]
            remove_tag = (b == 'true') or (b != '0')
            continue
        a = '--cs='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):].lower()
            del sys.argv[1]
            case_sensitive = (b == 'true') or (b != '0')
            continue
        a = '--cluster='
        if sys.argv[1].startswith(a):
            cluster_file = sys.argv[1][len(a):]
            del sys.argv[1]
            continue
        a = '--splitfile='
        if sys.argv[1].startswith(a):
            split_file = sys.argv[1][len(a):]
            del sys.argv[1]
            split = dict()
            with codecs.open(split_file, 'r', 'utf-8') as fh:
                for line in fh:  # line in unicode
                    words = line.strip().split()
                    if len(words) >= 2:
                        split[words[0]] = words[1:]
            continue
        a = '--ig='
        if sys.argv[1].startswith(a):
            ignore_file = sys.argv[1][len(a):]
            del sys.argv[1]
            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
                for line in fh:  # line in unicode
                    line = line.strip()
                    if len(line) > 0:
                        ignore_words.add(line)
            continue
        a = '--char='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):].lower()
            del sys.argv[1]
            tochar = (b == 'true') or (b != '0')
            continue
        a = '--v='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):].lower()
            del sys.argv[1]
            verbose = 0
            try:
                verbose = int(b)
            except:
                if b == 'true' or b != '0':
                    verbose = 1
            continue
        a = '--padding-symbol='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):].lower()
            del sys.argv[1]
            if b == 'space':
                padding_symbol = ' '
            elif b == 'underline':
                padding_symbol = '_'
            continue
        if True or sys.argv[1].startswith('-'):
            #ignore invalid switch
            del sys.argv[1]
            continue

    if not case_sensitive:
        ig = set([w.upper() for w in ignore_words])
        ignore_words = ig

    default_clusters = {}
    default_words = {}

    ref_file = sys.argv[1]
    hyp_file = sys.argv[2]
    rec_set = {}
    if split and not case_sensitive:
        newsplit = dict()
        for w in split:
            words = split[w]
            for i in range(len(words)):
                words[i] = words[i].upper()
            newsplit[w.upper()] = words
        split = newsplit

    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
        for line in fh:
            if tochar:
                array = characterize(line)
            else:
                array = line.strip().split()
            if len(array) == 0: continue
            fid = array[0]
            rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive,
                                     split)

    # compute error rate on the interaction of reference file and hyp file
    for line in open(ref_file, 'r', encoding='utf-8'):
        if tochar:
            array = characterize(line)
        else:
            array = line.rstrip('\n').split()
        if len(array) == 0: continue
        fid = array[0]
        if fid not in rec_set:
            continue
        lab = normalize(array[1:], ignore_words, case_sensitive, split)
        rec = rec_set[fid]
        if verbose:
            print('\nutt: %s' % fid)

        for word in rec + lab:
            if word not in default_words:
                default_cluster_name = default_cluster(word)
                if default_cluster_name not in default_clusters:
                    default_clusters[default_cluster_name] = {}
                if word not in default_clusters[default_cluster_name]:
                    default_clusters[default_cluster_name][word] = 1
                default_words[word] = default_cluster_name

        result = calculator.calculate(lab, rec)
        if verbose:
            if result['all'] != 0:
                wer = float(result['ins'] + result['sub'] + result[
                    'del']) * 100.0 / result['all']
            else:
                wer = 0.0
            print('WER: %4.2f %%' % wer, end=' ')
            print('N=%d C=%d S=%d D=%d I=%d' %
                  (result['all'], result['cor'], result['sub'], result['del'],
                   result['ins']))
            space = {}
            space['lab'] = []
            space['rec'] = []
            for idx in range(len(result['lab'])):
                len_lab = width(result['lab'][idx])
                len_rec = width(result['rec'][idx])
                length = max(len_lab, len_rec)
                space['lab'].append(length - len_lab)
                space['rec'].append(length - len_rec)
            upper_lab = len(result['lab'])
            upper_rec = len(result['rec'])
            lab1, rec1 = 0, 0
            while lab1 < upper_lab or rec1 < upper_rec:
                if verbose > 1:
                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
                else:
                    print('lab:', end=' ')
                lab2 = min(upper_lab, lab1 + max_words_per_line)
                for idx in range(lab1, lab2):
                    token = result['lab'][idx]
                    print('{token}'.format(token=token), end='')
                    for n in range(space['lab'][idx]):
                        print(padding_symbol, end='')
                    print(' ', end='')
                print()
                if verbose > 1:
                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
                else:
                    print('rec:', end=' ')
                rec2 = min(upper_rec, rec1 + max_words_per_line)
                for idx in range(rec1, rec2):
                    token = result['rec'][idx]
                    print('{token}'.format(token=token), end='')
                    for n in range(space['rec'][idx]):
                        print(padding_symbol, end='')
                    print(' ', end='')
                print('\n', end='\n')
                lab1 = lab2
                rec1 = rec2

    if verbose:
        print(
            '==========================================================================='
        )
        print()

    result = calculator.overall()
    if result['all'] != 0:
        wer = float(result['ins'] + result['sub'] + result[
            'del']) * 100.0 / result['all']
    else:
        wer = 0.0
    print('Overall -> %4.2f %%' % wer, end=' ')
    print('N=%d C=%d S=%d D=%d I=%d' %
          (result['all'], result['cor'], result['sub'], result['del'],
           result['ins']))
    if not verbose:
        print()

    if verbose:
        for cluster_id in default_clusters:
            result = calculator.cluster(
                [k for k in default_clusters[cluster_id]])
            if result['all'] != 0:
                wer = float(result['ins'] + result['sub'] + result[
                    'del']) * 100.0 / result['all']
            else:
                wer = 0.0
            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
            print('N=%d C=%d S=%d D=%d I=%d' %
                  (result['all'], result['cor'], result['sub'], result['del'],
                   result['ins']))
        if len(cluster_file) > 0:  # compute separated WERs for word clusters
            cluster_id = ''
            cluster = []
            for line in open(cluster_file, 'r', encoding='utf-8'):
                for token in line.decode('utf-8').rstrip('\n').split():
                    # end of cluster reached, like </Keyword>
                    if token[0:2] == '</' and token[len(token)-1] == '>' and \
                       token.lstrip('</').rstrip('>') == cluster_id :
                        result = calculator.cluster(cluster)
                        if result['all'] != 0:
                            wer = float(result['ins'] + result['sub'] + result[
                                'del']) * 100.0 / result['all']
                        else:
                            wer = 0.0
                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
                        print('N=%d C=%d S=%d D=%d I=%d' %
                              (result['all'], result['cor'], result['sub'],
                               result['del'], result['ins']))
                        cluster_id = ''
                        cluster = []
                    # begin of cluster reached, like <Keyword>
                    elif token[0] == '<' and token[len(token)-1] == '>' and \
                         cluster_id == '' :
                        cluster_id = token.lstrip('<').rstrip('>')
                        cluster = []
                    # general terms, like WEATHER / CAR / ...
                    else:
                        cluster.append(token)
        print()
        print(
            '==========================================================================='
        )


if __name__ == '__main__':
    main()


================================================
FILE: examples/librispeech/asr5/conf/preprocess.yaml
================================================
process:
    # use raw audio
  - type: wav_process


================================================
FILE: examples/librispeech/asr5/conf/preprocessor_config.json
================================================
{
  "do_normalize": true,
  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
  "feature_size": 1,
  "padding_side": "right",
  "padding_value": 0,
  "return_attention_mask": true,
  "sampling_rate": 16000
}


================================================
FILE: examples/librispeech/asr5/conf/tuning/decode.yaml
================================================
decode_batch_size: 1
error_rate_type: wer
decoding_method: "ctc_greedy_search"  # 'ctc_greedy_search', 'ctc_prefix_beam_search'
beam_size: 10


================================================
FILE: examples/librispeech/asr5/conf/wavlmASR.yaml
================================================
############################################
#          Network Architecture           #
############################################
freeze_wavlm: False
normalize_wav: True
output_norm: True
init_type: kaiming_uniform # !Warning: need to convergence
enc:
  input_shape: 768
  dnn_blocks: 2
  dnn_neurons: 768
  activation: True
  normalization: True
  dropout_rate: [0.15, 0]
ctc:
  enc_n_units: 768
  blank_id: 0
  dropout_rate: 0.0
wavlm_params_path: exp/wavlm/wavlm-base-plus.pdparams


task_cfg:
  label_rate: 50.0
  sample_rate: 16000
  normalize: True
  enable_padding: False
  max_keep_size: None
  max_sample_size: 250000
  min_sample_size: 32000
  dropout_input: 0.1
  final_dropout: 0.0
  dropout: 0.1
  attention_dropout: 0.0
  activation_dropout: 0.1
  apply_mask: True
  mask_length: 10
  mask_prob: 0.5
  mask_selection: static
  mask_other: 0.0
  no_mask_overlap: False
  mask_channel_length: 10
  mask_channel_prob: 0.0
  mask_channel_selection: static
  mask_channel_other: 0.0
  no_mask_channel_overlap: False
  feature_grad_mult: 0.0
  layerdrop: 0.1
  fp16: True
  extractor_mode: layer_norm
  encoder_layers: 12
  encoder_embed_dim: 768
  encoder_ffn_embed_dim: 3072
  encoder_attention_heads: 12
  activation_fn: gelu
  encoder_layerdrop: 0.0
  dropout_features: 0.0
  final_dim: 768
  untie_final_proj: True
  layer_norm_first: True
  conv_feature_layers: "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2"
  conv_bias: False
  logit_temp: 0.1
  target_glu: False
  mask_min_space: 1
  mask_channel_min_space: 1
  conv_pos: 128
  conv_pos_groups: 16
  latent_temp: [2.0, 0.5, 0.999995]
  skip_masked: False
  skip_nomask: True

###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test-clean

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
unit_type: char
mean_std_filepath: ""
preprocess_config: conf/preprocess.yaml
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs 0: disabled other: enabled for other epochs 
batch_size: 8  # Different batch_size may cause large differences in results
maxlen_in: 51200000000  # if input length  > maxlen-in batchsize is automatically reduced
maxlen_out: 160000
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1
dist_sampler: True
shortest_first: False
return_lens_rate: True
  
############################################
#             Data Augmentation            #
############################################
audio_augment:  # for raw audio 
  sample_rate: 16000
  speeds: [90, 100, 110]

###########################################
#                 Training                #
###########################################
n_epoch: 10
accum_grad: 8
global_grad_clip: 5.0
model_scheduler: newbobscheduler
model_scheduler_conf:
  improvement_threshold: 0.0025
  annealing_factor: 0.8
  patient: 0
model_optim: adam
model_optim_conf:
  lr: 0.0001
  weight_decay: 0.0
# I changed this
wavlm_optim: adam
wavlm_optim_conf:
  lr: 0.00005
  weight_decay: 0.0
wavlm_scheduler: constantlr    
wavlm_scheduler_conf:
  warmup_steps: 1000
  lr_decay: 1.0
log_interval: 1
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/librispeech/asr5/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100

unit_type=char
dict_dir=data/lang_char

source ${MAIN_ROOT}/utils/parse_options.sh

mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/librispeech/librispeech.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/librispeech" \
    --full_download="False"

    if [ $? -ne 0 ]; then
        echo "Prepare LibriSpeech failed. Terminated."
        exit 1
    fi

    for set in train-clean-100 dev-clean test-clean; do
        mv data/manifest.${set} data/manifest.${set}.raw
    done

    rm -rf data/manifest.train.raw data/manifest.dev.raw  data/manifest.test.raw
    for set in train-clean-100; do
        cat data/manifest.${set}.raw >> data/manifest.train.raw
    done

    for set in dev-clean; do
        cat data/manifest.${set}.raw >> data/manifest.dev.raw
    done

    for set in test-clean; do
        cat data/manifest.${set}.raw >> data/manifest.test.raw
    done
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --num_samples=2000 \
    --spectrum_type="fbank" \
    --feat_dim=161 \
    --delta_delta=false \
    --sample_rate=16000 \
    --stride_ms=10 \
    --window_ms=25 \
    --use_dB_normalization=False \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type ${unit_type} \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths="data/manifest.train.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for set in train dev test dev-clean test-clean; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
        --cmvn_path "data/mean_std.json" \
        --unit_type ${unit_type} \
        --vocab_path="${dict_dir}/vocab.txt" \
        --manifest_path="data/manifest.${set}.raw" \
        --output_path="data/manifest.${set}"

        if [ $? -ne 0 ]; then
            echo "Formt manifest.${set} failed. Terminated."
            exit 1
        fi
    }&
    done
    wait
fi

echo "LibriSpeech Data preparation done."

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    mkdir -p exp/wavlm
    echo "Pretrained wavlm model download"
    wget -P exp/wavlm https://paddlespeech.cdn.bcebos.com/wavlm/wavlm-base-plus.pdparams
fi

exit 0

================================================
FILE: examples/librispeech/asr5/local/test.sh
================================================
#!/bin/bash

set -e

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

expdir=exp
datadir=data

recog_set="test-clean test-other dev-clean dev-other"
recog_set="test-clean"

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

# download language model
#bash local/download_lm_en.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

python3 ${MAIN_ROOT}/utils/format_rsl.py \
    --origin_ref data/manifest.test-clean.raw \
    --trans_ref data/manifest.test-clean.text


for type in ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=16
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.${type}.rsl \
        --trans_hyp ${ckpt_prefix}.${type}.rsl.text

    python3 compute_wer.py --char=1 --v=1 \
        data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
    echo "decoding ${type} done."
done

for type in ctc_prefix_beam_search; do
    echo "decoding ${type}"
    batch_size=1
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
    python3 ${MAIN_ROOT}/utils/format_rsl.py \
        --origin_hyp ${ckpt_prefix}.${type}.rsl \
        --trans_hyp ${ckpt_prefix}.${type}.rsl.text

    python3 compute_wer.py --char=1 --v=1 \
        data/manifest.test-clean.text ${ckpt_prefix}.${type}.rsl.text > ${ckpt_prefix}.${type}.error
    echo "decoding ${type} done."
done

echo "Finished"

exit 0


================================================
FILE: examples/librispeech/asr5/local/test_wav.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_file=$4

mkdir -p data
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_file} ]; then
    echo "Plase input the right audio_file path"
    exit 1
fi

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

for type in ctc_greedy_search; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test_wav.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size} \
    --audio_file ${audio_file}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done
exit 0


================================================
FILE: examples/librispeech/asr5/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
resume=$3
ips=$4

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# seed may break model convergence
seed=1988
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

# export FLAGS_cudnn_exhaustive_search=true
# export FLAGS_conv_workspace_size_limit=4000
export FLAGS_allocator_strategy=naive_best_fit
if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed} \
--resume ${resume}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed} \
--resume ${resume}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/librispeech/asr5/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/tools/sctk/bin:${PWD}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
# export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/wavlm/bin


================================================
FILE: examples/librispeech/asr5/run.sh
================================================
#!/bin/bash
set -e

. ./path.sh || exit 1;
. ./cmd.sh || exit 1;

gpus=0,1,2
stage=0
stop_stage=3
conf_path=conf/wavlmASR.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=3
resume=         # xx e.g. 30

. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

audio_file=data/demo_002_en.wav

# avg_ckpt=avg_${avg_num}
avg_ckpt=4
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${resume} ${ips}
fi

# if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
#     # avg n best model
#     ./avg.sh best exp/${ckpt}/checkpoints ${avg_num}
# fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # greedy search decoder
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # test a single .wav file
    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi


================================================
FILE: examples/ljspeech/README.md
================================================

# LJSpeech

* tts0 - Tacotron2
* tts1 - TransformerTTS
* tts2 - SpeedySpeech
* tts3 - FastSpeech2
* voc0 - WaveFlow
* voc1 - Parallel WaveGAN
* voc2 - MelGAN
* voc3 - MultiBand MelGAN


================================================
FILE: examples/ljspeech/tts0/README.md
================================================
# Tacotron2 with LJSpeech-1.1
This example contains code used to train a [Tacotron2](https://arxiv.org/abs/1712.05884) model with [LJSpeech-1.1](https://keithito.com/LJ-Speech-Dataset/)

## Dataset
### Download and Extract
Download LJSpeech-1.1 from it's [Official Website](https://keithito.com/LJ-Speech-Dataset/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/LJSpeech-1.1`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get phonemes for Tacotron2, the durations of MFA are not needed here.
You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/LJSpeech-1.1`.
Assume the path to the MFA result of LJSpeech-1.1 is `./ljspeech_alignment`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from a text file.

```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, speaker, and the id of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]

Train a Tacotron2 model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       tacotron2 config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1) as the neural vocoder.
Download pretrained parallel wavegan model from [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip) and unzip it.
```bash
unzip pwg_ljspeech_ckpt_0.5.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_ljspeech_ckpt_0.5
├── pwg_default.yaml              # default config used to train parallel wavegan
├── pwg_snapshot_iter_400000.pdz  # generator parameters of parallel wavegan
└── pwg_stats.npy                 # statistics used to normalize spectrogram when training parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h]
                     [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--tones_dict TONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING]
                     [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat` and `--phones_dict` are arguments for acoustic model, which correspond to the 4 files in the Tacotron2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.


## Pretrained Model
Pretrained Tacotron2 model with no silence in the edge of audios:
- [tacotron2_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip)


Model | Step | eval/loss | eval/l1_loss | eval/mse_loss | eval/bce_loss| eval/attn_loss 
:-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
default| 1(gpu) x 60300|0.554092|0.394260|0.141046|0.018747|3.8e-05|

Tacotron2 checkpoint contains files listed below.
```text
tacotron2_ljspeech_ckpt_0.2.0
├── default.yaml            # default config used to train Tacotron2
├── phone_id_map.txt        # phone vocabulary file when training Tacotron2
├── snapshot_iter_60300.pdz # model parameters and optimizer states
└── speech_stats.npy        # statistics used to normalize spectrogram when training Tacotron2
```
You can use the following scripts to synthesize for `${BIN_DIR}/../sentences_en.txt` using pretrained Tacotron2 and parallel wavegan models.
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=tacotron2_ljspeech \
  --am_config=tacotron2_ljspeech_ckpt_0.2.0/default.yaml \
  --am_ckpt=tacotron2_ljspeech_ckpt_0.2.0/snapshot_iter_60300.pdz \
  --am_stat=tacotron2_ljspeech_ckpt_0.2.0/speech_stats.npy  \
  --voc=pwgan_ljspeech\
  --voc_config=pwg_ljspeech_ckpt_0.5/pwg_default.yaml \
  --voc_ckpt=pwg_ljspeech_ckpt_0.5/pwg_snapshot_iter_400000.pdz  \
  --voc_stat=pwg_ljspeech_ckpt_0.5/pwg_stats.npy \
  --lang=en \
  --text=${BIN_DIR}/../../assets/sentences_en.txt \
  --output_dir=exp/default/test_e2e \
  --phones_dict=tacotron2_ljspeech_ckpt_0.2.0/phone_id_map.txt
```


================================================
FILE: examples/ljspeech/tts0/conf/default.yaml
================================================
# This configuration is for Paddle to train Tacotron 2. Compared to the
# original paper, this configuration additionally use the guided attention
# loss to accelerate the learning of the diagonal attention. It requires
# only a single GPU with 12 GB memory and it takes ~1 days to finish the
# training on Titan V.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 22050                # Sampling rate.
n_fft: 1024              # FFT size (samples).
n_shift: 256             # Hop size (samples). 11.6ms
win_length: null         # Window length (samples).
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 2

###########################################################
#                       MODEL SETTING                     #
###########################################################
model:                          # keyword arguments for the selected model
    embed_dim: 512               # char or phn embedding dimension
    elayers: 1                   # number of blstm layers in encoder
    eunits: 512                  # number of blstm units
    econv_layers: 3              # number of convolutional layers in encoder
    econv_chans: 512             # number of channels in convolutional layer
    econv_filts: 5               # filter size of convolutional layer
    atype: location              # attention function type
    adim: 512                    # attention dimension
    aconv_chans: 32              # number of channels in convolutional layer of attention
    aconv_filts: 15              # filter size of convolutional layer of attention
    cumulate_att_w: True         # whether to cumulate attention weight
    dlayers: 2                   # number of lstm layers in decoder
    dunits: 1024                 # number of lstm units in decoder
    prenet_layers: 2             # number of layers in prenet
    prenet_units: 256            # number of units in prenet
    postnet_layers: 5            # number of layers in postnet
    postnet_chans: 512           # number of channels in postnet
    postnet_filts: 5             # filter size of postnet layer
    output_activation: null      # activation function for the final output
    use_batch_norm: True         # whether to use batch normalization in encoder
    use_concate: True            # whether to concatenate encoder embedding with decoder outputs
    use_residual: False          # whether to use residual connection in encoder
    dropout_rate: 0.5            # dropout rate
    zoneout_rate: 0.1            # zoneout rate
    reduction_factor: 1          # reduction factor
    spk_embed_dim: null          # speaker embedding dimension


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True            # whether to apply masking for padded part in loss calculation
    bce_pos_weight: 5.0          # weight of positive sample in binary cross entropy calculation
    use_guided_attn_loss: True   # whether to use guided attention loss
    guided_attn_loss_sigma: 0.4  # sigma of guided attention loss
    guided_attn_loss_lambda: 1.0 # strength of guided attention loss


##########################################################
#                  OPTIMIZER SETTING                     #
##########################################################
optimizer:
    optim: adam              # optimizer type
    learning_rate: 1.0e-03   # learning rate
    epsilon: 1.0e-06         # epsilon
    weight_decay: 0.0        # weight decay coefficient

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 300
num_snapshots: 5

###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 42


================================================
FILE: examples/ljspeech/tts0/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./ljspeech_alignment \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=ljspeech \
        --rootdir=~/datasets/LJSpeech-1.1/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/ljspeech/tts0/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize.py \
    --am=tacotron2_ljspeech \
    --am_config=${config_path} \
    --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --am_stat=dump/train/speech_stats.npy \
    --voc=pwgan_ljspeech \
    --voc_config=pwg_ljspeech_ckpt_0.5/pwg_default.yaml \
    --voc_ckpt=pwg_ljspeech_ckpt_0.5/pwg_snapshot_iter_400000.pdz  \
    --voc_stat=pwg_ljspeech_ckpt_0.5/pwg_stats.npy \
    --test_metadata=dump/test/norm/metadata.jsonl \
    --output_dir=${train_output_path}/test \
    --phones_dict=dump/phone_id_map.txt


================================================
FILE: examples/ljspeech/tts0/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
# TODO: dygraph to static graph is not good for tacotron2_ljspeech now
FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
    --am=tacotron2_ljspeech \
    --am_config=${config_path} \
    --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --am_stat=dump/train/speech_stats.npy \
    --voc=pwgan_ljspeech \
    --voc_config=pwg_ljspeech_ckpt_0.5/pwg_default.yaml \
    --voc_ckpt=pwg_ljspeech_ckpt_0.5/pwg_snapshot_iter_400000.pdz  \
    --voc_stat=pwg_ljspeech_ckpt_0.5/pwg_stats.npy \
    --lang=en \
    --text=${BIN_DIR}/../../assets/sentences_en.txt \
    --output_dir=${train_output_path}/test_e2e \
    --phones_dict=dump/phone_id_map.txt \
    # --inference_dir=${train_output_path}/inference

================================================
FILE: examples/ljspeech/tts0/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_201.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/ljspeech/tts1/README.md
================================================
# TransformerTTS with LJSpeech
## Dataset
### Download and Extract
Download LJSpeech-1.1 from it's [Official Website](https://keithito.com/LJ-Speech-Dataset/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/LJSpeech-1.1`.
## Get Started
Assume the path to the dataset is `~/datasets/LJSpeech-1.1` and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/LJSpeech-1.1`.

Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.
```text
dump
├── dev
│ ├── norm
│ └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│  ├── norm
│  └── raw
└── train
    ├── norm
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains the speech feature of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/speech_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, the path of speech features, speaker, and id of each utterance.

### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]

Train a TransformerTTS model with LJSpeech TTS dataset.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       TransformerTTS config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.

## Synthesizing
We use [waveflow](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc0) as the neural vocoder.
Download Pretrained WaveFlow Model with residual channel equals 128 from [waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/waveflow/waveflow_ljspeech_ckpt_0.3.zip) and unzip it.
```bash
unzip waveflow_ljspeech_ckpt_0.3.zip
```
WaveFlow checkpoint contains files listed below.
```text
waveflow_ljspeech_ckpt_0.3
├── config.yaml           # default config used to train waveflow
└── step-2000000.pdparams # model parameters of waveflow
```
`./local/synthesize.sh` calls `${BIN_DIR}/synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--transformer-tts-config TRANSFORMER_TTS_CONFIG]
                     [--transformer-tts-checkpoint TRANSFORMER_TTS_CHECKPOINT]
                     [--transformer-tts-stat TRANSFORMER_TTS_STAT]
                     [--waveflow-config WAVEFLOW_CONFIG]
                     [--waveflow-checkpoint WAVEFLOW_CHECKPOINT]
                     [--phones-dict PHONES_DICT]
                     [--test-metadata TEST_METADATA] [--output-dir OUTPUT_DIR]
                     [--ngpu NGPU]

Synthesize with transformer tts & waveflow.

optional arguments:
  -h, --help            show this help message and exit
  --transformer-tts-config TRANSFORMER_TTS_CONFIG
                        transformer tts config file.
  --transformer-tts-checkpoint TRANSFORMER_TTS_CHECKPOINT
                        transformer tts checkpoint to load.
  --transformer-tts-stat TRANSFORMER_TTS_STAT
                        mean and standard deviation used to normalize
                        spectrogram when training transformer tts.
  --waveflow-config WAVEFLOW_CONFIG
                        waveflow config file.
  --waveflow-checkpoint WAVEFLOW_CHECKPOINT
                        waveflow checkpoint to load.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --test-metadata TEST_METADATA
                        test metadata.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize_e2e.py [-h]
                         [--transformer-tts-config TRANSFORMER_TTS_CONFIG]
                         [--transformer-tts-checkpoint TRANSFORMER_TTS_CHECKPOINT]
                         [--transformer-tts-stat TRANSFORMER_TTS_STAT]
                         [--waveflow-config WAVEFLOW_CONFIG]
                         [--waveflow-checkpoint WAVEFLOW_CHECKPOINT]
                         [--phones-dict PHONES_DICT] [--text TEXT]
                         [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with transformer tts & waveflow.

optional arguments:
  -h, --help            show this help message and exit
  --transformer-tts-config TRANSFORMER_TTS_CONFIG
                        transformer tts config file.
  --transformer-tts-checkpoint TRANSFORMER_TTS_CHECKPOINT
                        transformer tts checkpoint to load.
  --transformer-tts-stat TRANSFORMER_TTS_STAT
                        mean and standard deviation used to normalize
                        spectrogram when training transformer tts.
  --waveflow-config WAVEFLOW_CONFIG
                        waveflow config file.
  --waveflow-checkpoint WAVEFLOW_CHECKPOINT
                        waveflow checkpoint to load.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```
1. `--transformer-tts-config`, `--transformer-tts-checkpoint`, `--transformer-tts-stat` and `--phones-dict` are arguments for transformer_tts, which correspond to the 4 files in the transformer_tts pretrained model.
2. `--waveflow-config`, `--waveflow-checkpoint` are arguments for waveflow, which correspond to the 2 files in the waveflow pretrained model.
3. `--test-metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
4. `--text` is the text file, which contains sentences to synthesize.
5. `--output-dir` is the directory to save synthesized audio files.
6. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model
Pretrained Model can be downloaded here:
- [transformer_tts_ljspeech_ckpt_0.4.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/transformer_tts/transformer_tts_ljspeech_ckpt_0.4.zip)

TransformerTTS  checkpoint contains files listed below.
```text
transformer_tts_ljspeech_ckpt_0.4
├── default.yaml             # default config used to train transformer_tts
├── phone_id_map.txt         # phone vocabulary file when training transformer_tts
├── snapshot_iter_201500.pdz # model parameters and optimizer states
└── speech_stats.npy         # statistics used to normalize spectrogram when training transformer_tts
```
You can use the following scripts to synthesize for `${BIN_DIR}/../sentences_en.txt` using pretrained transformer_tts  and waveflow models.
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/synthesize_e2e.py \
  --transformer-tts-config=transformer_tts_ljspeech_ckpt_0.4/default.yaml \
  --transformer-tts-checkpoint=transformer_tts_ljspeech_ckpt_0.4/snapshot_iter_201500.pdz \
  --transformer-tts-stat=transformer_tts_ljspeech_ckpt_0.4/speech_stats.npy \
  --waveflow-config=waveflow_ljspeech_ckpt_0.3/config.yaml \
  --waveflow-checkpoint=waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams \
  --text=${BIN_DIR}/../../assets/sentences_en.txt \
  --output-dir=exp/default/test_e2e \
  --phones-dict=transformer_tts_ljspeech_ckpt_0.4/phone_id_map.txt
```


================================================
FILE: examples/ljspeech/tts1/conf/default.yaml
================================================

fs : 22050              # Hz, sample rate
n_fft : 1024            # FFT size (samples). 
win_length : 1024       # Window length (samples). 46.4ms
n_shift : 256           # Hop size (samples). 11.6ms
fmin : 0                # Hz, min frequency when converting to mel
fmax : 8000             # Hz, max frequency when converting to mel
n_mels : 80             # mel bands
window: "hann"          # Window function.

###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 16
num_workers: 2

##########################################################
#                  TTS MODEL SETTING                     #
##########################################################
tts: transformertts        # model architecture
model:                     # keyword arguments for the selected model
    embed_dim: 0           # embedding dimension in encoder prenet
    eprenet_conv_layers: 0 # number of conv layers in encoder prenet
                           # if set to 0, no encoder prenet will be used
    eprenet_conv_filts: 0  # filter size of conv layers in encoder prenet
    eprenet_conv_chans: 0  # number of channels of conv layers in encoder prenet
    dprenet_layers: 2      # number of layers in decoder prenet
    dprenet_units: 256     # number of units in decoder prenet
    adim: 512              # attention dimension
    aheads: 8              # number of attention heads
    elayers: 6             # number of encoder layers
    eunits: 1024           # number of encoder ff units
    dlayers: 6             # number of decoder layers
    dunits: 1024           # number of decoder ff units
    positionwise_layer_type: conv1d  # type of position-wise layer
    positionwise_conv_kernel_size: 1 # kernel size of position wise conv layer
    postnet_layers: 5                # number of layers of postnet
    postnet_filts: 5                 # filter size of conv layers in postnet
    postnet_chans: 256               # number of channels of conv layers in postnet
    use_scaled_pos_enc: True         # whether to use scaled positional encoding
    encoder_normalize_before: True   # whether to perform layer normalization before the input
    decoder_normalize_before: True   # whether to perform layer normalization before the input
    reduction_factor: 1              # reduction factor
    init_type: xavier_uniform        # initialization type
    init_enc_alpha: 1.0              # initial value of alpha of encoder scaled position encoding
    init_dec_alpha: 1.0              # initial value of alpha of decoder scaled position encoding
    eprenet_dropout_rate: 0.0        # dropout rate for encoder prenet
    dprenet_dropout_rate: 0.5        # dropout rate for decoder prenet
    postnet_dropout_rate: 0.5        # dropout rate for postnet
    transformer_enc_dropout_rate: 0.1                # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.1     # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.1           # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.1                # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.1     # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.1           # dropout rate for transformer decoder attention layer
    transformer_enc_dec_attn_dropout_rate: 0.1       # dropout rate for transformer encoder-decoder attention layer
    num_heads_applied_guided_attn: 2                 # number of heads to apply guided attention loss
    num_layers_applied_guided_attn: 2                # number of layers to apply guided attention loss  
    

###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                  # whether to apply masking for padded part in loss calculation
    loss_type: L1
    use_guided_attn_loss: True         # whether to use guided attention loss
    guided_attn_loss_sigma: 0.4        # sigma in guided attention loss
    guided_attn_loss_lambda: 10.0      # lambda in guided attention loss
    modules_applied_guided_attn: ["encoder-decoder"] # modules to apply guided attention loss
    bce_pos_weight: 5.0              # weight of positive sample in binary cross entropy calculation


##########################################################
#            OPTIMIZER & SCHEDULER SETTING               #
##########################################################
optimizer:
    optim: adam               # optimizer type
    learning_rate: 0.001      # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 500
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086

================================================
FILE: examples/ljspeech/tts1/local/preprocess.sh
================================================
#!/bin/bash

stage=1
stop_stage=100

config_path=$1

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py  \
        --dataset=ljspeech \
        --rootdir=~/datasets/LJSpeech-1.1/ \
        --dumpdir=dump \
        --config-path=conf/default.yaml \
        --num-cpu=8
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/ljspeech/tts1/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/synthesize.py \
    --transformer-tts-config=${config_path} \
    --transformer-tts-checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --transformer-tts-stat=dump/train/speech_stats.npy \
    --waveflow-config=waveflow_ljspeech_ckpt_0.3/config.yaml \
    --waveflow-checkpoint=waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams \
    --test-metadata=dump/test/norm/metadata.jsonl \
    --output-dir=${train_output_path}/test \
    --phones-dict=dump/phone_id_map.txt


================================================
FILE: examples/ljspeech/tts1/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/synthesize_e2e.py \
    --transformer-tts-config=${config_path} \
    --transformer-tts-checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --transformer-tts-stat=dump/train/speech_stats.npy \
    --waveflow-config=waveflow_ljspeech_ckpt_0.3/config.yaml \
    --waveflow-checkpoint=waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams \
    --text=${BIN_DIR}/../../assets/sentences_en.txt \
    --output-dir=${train_output_path}/test_e2e \
    --phones-dict=dump/phone_id_map.txt


================================================
FILE: examples/ljspeech/tts1/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=2 \
    --phones-dict=dump/phone_id_map.txt


================================================
FILE: examples/ljspeech/tts1/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=transformer_tts
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}


================================================
FILE: examples/ljspeech/tts1/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_403.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/ljspeech/tts3/README.md
================================================
# FastSpeech2 with LJSpeech-1.1
This example contains code used to train a [Fastspeech2](https://arxiv.org/abs/2006.04558) model with [LJSpeech-1.1](https://keithito.com/LJ-Speech-Dataset/).

## Dataset
### Download and Extract
Download LJSpeech-1.1 from it's [Official Website](https://keithito.com/LJ-Speech-Dataset/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/LJSpeech-1.1`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/LJSpeech-1.1`.
Assume the path to the MFA result of LJSpeech-1.1 is `./ljspeech_alignment`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech、pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, the path of pitch features, the path of energy features, speaker, and id of each utterance.

### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]
                [--speaker-dict SPEAKER_DICT] [--voice-cloning VOICE_CLONING]

Train a FastSpeech2 model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       fastspeech2 config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu=0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --speaker-dict SPEAKER_DICT
                        speaker id map file for multiple speaker model.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/voc1) as the neural vocoder.
Download pretrained parallel wavegan model from [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip) and unzip it.
```bash
unzip pwg_ljspeech_ckpt_0.5.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_ljspeech_ckpt_0.5
├── pwg_default.yaml              # default config used to train parallel wavegan
├── pwg_snapshot_iter_400000.pdz  # generator parameters of parallel wavegan
└── pwg_stats.npy                 # statistics used to normalize spectrogram when training parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.

```text
usage: synthesize.py [-h]
                     [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--tones_dict TONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING]
                     [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.

```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat` and `--phones_dict` are arguments for acoustic model, which correspond to the 4 files in the fastspeech2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model
Pretrained FastSpeech2 model with no silence in the edge of audios:
- [fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)

The static model can be downloaded here:
- [fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip)

The ONNX model can be downloaded here:
- [fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip)

The Paddle-Lite model can be downloaded here:
- [fastspeech2_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_pdlite_1.3.0.zip)


Model | Step | eval/loss | eval/l1_loss | eval/duration_loss | eval/pitch_loss| eval/energy_loss 
:-------------:| :------------:| :-----: | :-----: | :--------: |:--------:|:---------:
default| 2(gpu) x 100000| 1.505682|0.612104| 0.045505| 0.62792| 0.220147


FastSpeech2 checkpoint contains files listed below.
```text
fastspeech2_nosil_ljspeech_ckpt_0.5
├── default.yaml             # default config used to train fastspeech2
├── phone_id_map.txt         # phone vocabulary file when training fastspeech2
├── snapshot_iter_100000.pdz # model parameters and optimizer states
└── speech_stats.npy         # statistics used to normalize spectrogram when training fastspeech2
```
You can use the following scripts to synthesize for `${BIN_DIR}/../sentences_en.txt` using pretrained fastspeech2 and parallel wavegan models.
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=fastspeech2_ljspeech \
  --am_config=fastspeech2_nosil_ljspeech_ckpt_0.5/default.yaml \
  --am_ckpt=fastspeech2_nosil_ljspeech_ckpt_0.5/snapshot_iter_100000.pdz \
  --am_stat=fastspeech2_nosil_ljspeech_ckpt_0.5/speech_stats.npy \
  --voc=pwgan_ljspeech\
  --voc_config=pwg_ljspeech_ckpt_0.5/pwg_default.yaml \
  --voc_ckpt=pwg_ljspeech_ckpt_0.5/pwg_snapshot_iter_400000.pdz  \
  --voc_stat=pwg_ljspeech_ckpt_0.5/pwg_stats.npy \
  --lang=en \
  --text=${BIN_DIR}/../../assets/sentences_en.txt \
  --output_dir=exp/default/test_e2e \
  --inference_dir=exp/default/inference \
  --phones_dict=fastspeech2_nosil_ljspeech_ckpt_0.5/phone_id_map.txt
```


================================================
FILE: examples/ljspeech/tts3/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 22050          # sr
n_fft: 1024        # FFT size (samples). 
n_shift: 256       # Hop size (samples). 11.6ms
win_length: null   # Window length (samples).
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 4


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    init_type: xavier_uniform         # initialization type
    init_enc_alpha: 1.0               # initial value of alpha of encoder scaled position encoding
    init_dec_alpha: 1.0               # initial value of alpha of decoder scaled position encoding
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
  optim: adam               # optimizer type
  learning_rate: 0.001     # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 1000
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/ljspeech/tts3/local/inference.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_ljspeech \
        --voc=pwgan_ljspeech \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --lang=en
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_ljspeech \
        --voc=hifigan_ljspeech \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --lang=en
fi


================================================
FILE: examples/ljspeech/tts3/local/lite_predict.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=fastspeech2_ljspeech \
        --voc=pwgan_ljspeech \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --lang=en
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=fastspeech2_ljspeech \
        --voc=hifigan_ljspeech \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --lang=en
fi


================================================
FILE: examples/ljspeech/tts3/local/ort_predict.sh
================================================
train_output_path=$1

stage=0
stop_stage=0

# e2e, synthesize from text
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_ljspeech \
        --voc=pwgan_ljspeech\
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/sentences_en.txt  \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2 \
        --lang=en

fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_ljspeech \
        --voc=hifigan_ljspeech \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/sentences_en.txt  \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2 \
        --lang=en
fi


================================================
FILE: examples/ljspeech/tts3/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./ljspeech_alignment \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=ljspeech \
        --rootdir=~/datasets/LJSpeech-1.1/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=8 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="pitch"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="energy"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/ljspeech/tts3/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_ljspeech \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_ljspeech \
        --voc_config=pwg_ljspeech_ckpt_0.5/pwg_default.yaml \
        --voc_ckpt=pwg_ljspeech_ckpt_0.5/pwg_snapshot_iter_400000.pdz  \
        --voc_stat=pwg_ljspeech_ckpt_0.5/pwg_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_ljspeech \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_ljspeech \
        --voc_config=hifigan_ljspeech_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_ljspeech_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_ljspeech_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt
fi


================================================
FILE: examples/ljspeech/tts3/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_ljspeech \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_ljspeech \
        --voc_config=pwg_ljspeech_ckpt_0.5/pwg_default.yaml \
        --voc_ckpt=pwg_ljspeech_ckpt_0.5/pwg_snapshot_iter_400000.pdz  \
        --voc_stat=pwg_ljspeech_ckpt_0.5/pwg_stats.npy \
        --lang=en \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/test_e2e \
        --inference_dir=${train_output_path}/inference \
        --phones_dict=dump/phone_id_map.txt
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_ljspeech \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_ljspeech \
        --voc_config=hifigan_ljspeech_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_ljspeech_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_ljspeech_ckpt_0.2.0/feats_stats.npy \
        --lang=en \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/test_e2e \
        --inference_dir=${train_output_path}/inference \
        --phones_dict=dump/phone_id_map.txt
fi


================================================
FILE: examples/ljspeech/tts3/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_201.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
fi

# paddle2onnx, please make sure the static models are in ${train_output_path}/inference first
# we have only tested the following models so far
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # install paddle2onnx
    pip install paddle2onnx --upgrade
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx fastspeech2_ljspeech
    # considering the balance between speed and quality, we recommend that you use hifigan as vocoder
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_ljspeech
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_ljspeech
fi

# inference with onnxruntime, use fastspeech2 + pwgan by default
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    ./local/ort_predict.sh ${train_output_path}
fi

# must run after stage 3 (which stage generated static models)
if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
    ./local/export2lite.sh ${train_output_path} inference pdlite fastspeech2_ljspeech x86
    ./local/export2lite.sh ${train_output_path} inference pdlite pwgan_ljspeech x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite hifigan_ljspeech x86
fi

if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/lite_predict.sh ${train_output_path} || exit -1
fi

================================================
FILE: examples/ljspeech/voc0/README.md
================================================
# WaveFlow with LJSpeech
## Dataset
### Download and Extract
Download LJSpeech-1.1 from it's [Official Website](https://keithito.com/LJ-Speech-Dataset/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/LJSpeech-1.1`.
## Get Started
Assume the path to the dataset is `~/datasets/LJSpeech-1.1`.
Assume the path to the Tacotron2 generated mels is `../tts0/output/test`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs from mels.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${preprocess_path}
```
### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${preprocess_path} ${train_output_path}
```
The training script requires 4 command line arguments.
1. `--data` is the path of the training dataset.
2. `--output` is the path of the output directory.
3. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

If you want distributed training, set a larger `--ngpu` (e.g. 4). Note that distributed training with cpu is not supported yet.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/synthesize.py`, which can synthesize waveform from mels.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${input_mel_path} ${train_output_path} ${ckpt_name}
```

Synthesize waveform.
1. We assume the `--input` is a directory containing several mel spectrograms(log magnitude) in `.npy` format.
2. The output would be saved in the `--output` directory, containing several `.wav` files, each with the same name as the mel spectrogram does.
3. `--checkpoint_path` should be the path of the parameter file (`.pdparams`) to load. Note that the extention name `.pdparmas` is not included here.
6. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model
Pretrained Model with residual channel equals 128 can be downloaded here:
- [waveflow_ljspeech_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/waveflow/waveflow_ljspeech_ckpt_0.3.zip)


================================================
FILE: examples/ljspeech/voc0/local/preprocess.sh
================================================
#!/bin/bash

preprocess_path=$1

python3 ${BIN_DIR}/preprocess.py \
    --input=~/datasets/LJSpeech-1.1 \
    --output=${preprocess_path}

================================================
FILE: examples/ljspeech/voc0/local/synthesize.sh
================================================
#!/bin/bash

input_mel_path=$1
train_output_path=$2
ckpt_name=$3

python ${BIN_DIR}/synthesize.py \
    --input=${input_mel_path} \
    --output=${train_output_path}/wavs/ \
    --checkpoint_path=${train_output_path}/checkpoints/${ckpt_name} \
    --ngpu=1

================================================
FILE: examples/ljspeech/voc0/local/train.sh
================================================
#!/bin/bash

preprocess_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --data=${preprocess_path} \
    --output=${train_output_path} \
    --ngpu=1

================================================
FILE: examples/ljspeech/voc0/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=waveflow
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}

================================================
FILE: examples/ljspeech/voc0/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

preprocess_path=preprocessed_ljspeech
train_output_path=output
# mel generated by Tacotron2
input_mel_path=${preprocess_path}/mel_test
ckpt_name=step-10000

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${preprocess_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${preprocess_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    mkdir -p ${preprocess_path}/mel_test
    cp ${preprocess_path}/mel/LJ050-001*.npy ${preprocess_path}/mel_test/
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${input_mel_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/ljspeech/voc1/README.md
================================================
# Parallel WaveGAN with the LJSpeech-1.1
This example contains code used to train a [parallel wavegan](http://arxiv.org/abs/1910.11480) model with [LJSpeech-1.1](https://keithito.com/LJ-Speech-Dataset/).
## Dataset
### Download and Extract
Download LJSpeech-1.1 from it's [Official Website](https://keithito.com/LJ-Speech-Dataset/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/LJSpeech-1.1`.
### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/LJSpeech-1.1`.
Assume the path to the MFA result of LJSpeech-1.1 is `./ljspeech_alignment`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```

The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--batch-size BATCH_SIZE] [--max-iter MAX_ITER]
                [--run-benchmark RUN_BENCHMARK]
                [--profiler_options PROFILER_OPTIONS]

Train a ParallelWaveGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       ParallelWaveGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.

benchmark:
  arguments related to benchmark.

  --batch-size BATCH_SIZE
                        batch size.
  --max-iter MAX_ITER   train max steps.
  --run-benchmark RUN_BENCHMARK
                        runing benchmark or not, if True, use the --batch-size
                        and --max-iter.
  --profiler_options PROFILER_OPTIONS
                        The option of profiler, which should be in format
                        "key1=value1;key2=value2;key3=value3".
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` parallel wavegan config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model
Pretrained models can be downloaded here:
- [pwg_ljspeech_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip)

The static model can be downloaded here:
- [pwgan_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_static_1.1.0.zip)

The ONNX model can be downloaded here:
- [pwgan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_onnx_1.1.0.zip)

The Paddle-Lite model can be downloaded here:
- [pwgan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_pdlite_1.3.0.zip)


Parallel WaveGAN checkpoint contains files listed below.

```text
pwg_ljspeech_ckpt_0.5
├── pwg_default.yaml              # default config used to train parallel wavegan
├── pwg_snapshot_iter_400000.pdz  # generator parameters of parallel wavegan
└── pwg_stats.npy                 # statistics used to normalize spectrogram when training parallel wavegan
```
## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/ljspeech/voc1/conf/default.yaml
================================================
# This is the hyperparameter configuration file for Parallel WaveGAN.
# Please make sure this is adjusted for the LJSpeech dataset. If you want to
# apply to the other dataset, you might need to carefully change some parameters.
# This configuration requires 12 GB GPU memory and takes ~3 days on TITAN V.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 22050                # Sampling rate.
n_fft: 1024              # FFT size (samples).
n_shift: 256             # Hop size (samples). 11.6ms
win_length: null         # Window length (samples).
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 1        # Number of input channels.
    out_channels: 1       # Number of output channels.
    kernel_size: 3        # Kernel size of dilated convolution.
    layers: 30            # Number of residual block layers.
    stacks: 3             # Number of stacks i.e., dilation cycles.
    residual_channels: 64 # Number of channels in residual conv.
    gate_channels: 128    # Number of channels in gated conv.
    skip_channels: 64     # Number of channels in skip conv.
    aux_channels: 80      # Number of channels for auxiliary feature conv.
                          # Must be the same as num_mels.
    aux_context_window: 2 # Context window size for auxiliary feature.
                          # If set to 2, previous 2 and future 2 frames will be considered.
    dropout: 0.0          # Dropout rate. 0.0 means no dropout applied.
    use_weight_norm: True # Whether to use weight norm.
                          # If set to true, it will be applied to all of the conv layers.
    upsample_scales: [4, 4, 4, 4]     # Upsampling scales. prod(upsample_scales) == n_shift

###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    in_channels: 1        # Number of input channels.
    out_channels: 1       # Number of output channels.
    kernel_size: 3        # Number of output channels.
    layers: 10            # Number of conv layers.
    conv_channels: 64     # Number of chnn layers.
    bias: True            # Whether to use bias parameter in conv.
    use_weight_norm: True # Whether to use weight norm.
                          # If set to true, it will be applied to all of the conv layers.
    nonlinear_activation: "leakyrelu" # Nonlinear function after each conv.
    nonlinear_activation_params:      # Nonlinear function parameters
        negative_slope: 0.2           # Alpha in leakyrelu.

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
stft_loss_params:
    fft_sizes: [1024, 2048, 512]  # List of FFT size for STFT-based loss.
    hop_sizes: [120, 240, 50]     # List of hop size for STFT-based loss
    win_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
    window: "hann"                # Window function for STFT-based loss

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_adv: 4.0  # Loss balancing coefficient.

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 8              # Batch size.
batch_max_steps: 25600     # Length of each audio in batch. Make sure dividable by n_shift.
num_workers: 2             # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    epsilon: 1.0e-6        # Generator's epsilon.
    weight_decay: 0.0      # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 0.0001  # Generator's learning rate.
    step_size: 200000      # Generator's scheduler step size.
    gamma: 0.5             # Generator's scheduler gamma.
                           # At each step size, lr will be multiplied by this parameter.
generator_grad_norm: 10    # Generator's gradient norm.
discriminator_optimizer_params:
    epsilon: 1.0e-6         # Discriminator's epsilon.
    weight_decay: 0.0       # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 0.00005  # Discriminator's learning rate.
    step_size: 200000       # Discriminator's scheduler step size.
    gamma: 0.5              # Discriminator's scheduler gamma.
                            # At each step size, lr will be multiplied by this parameter.
discriminator_grad_norm: 1  # Discriminator's gradient norm.

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
discriminator_train_start_steps: 100000 # Number of steps to start to train discriminator.
train_max_steps: 400000                 # Number of training steps.
save_interval_steps: 5000               # Interval steps to save checkpoint.
eval_interval_steps: 1000               # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_save_intermediate_results: 4  # Number of results to be saved as intermediate results.
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random

================================================
FILE: examples/ljspeech/voc1/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./ljspeech_alignment \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/../preprocess.py \
        --rootdir=~/datasets/LJSpeech-1.1/ \
        --dataset=ljspeech \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --cut-sil=True \
        --num-cpu=20
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize, dev and test should use train's stats
    echo "Normalize ..."
   
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy

    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
    
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
fi


================================================
FILE: examples/ljspeech/voc1/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_5000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/ljspeech/voc5/README.md
================================================
# HiFiGAN with the LJSpeech-1.1
This example contains code used to train a [HiFiGAN](https://arxiv.org/abs/2010.05646) model with [LJSpeech-1.1](https://keithito.com/LJ-Speech-Dataset/).
## Dataset
### Download and Extract
Download LJSpeech-1.1 from it's [Official Website](https://keithito.com/LJ-Speech-Dataset/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/LJSpeech-1.1`.
### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
You can download from here [ljspeech_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.

## Get Started
Assume the path to the dataset is `~/datasets/LJSpeech-1.1`.
Assume the path to the MFA result of LJSpeech-1.1 is `./ljspeech_alignment`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```

The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU]

Train a HiFiGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       HiFiGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` parallel wavegan config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model
The pretrained model can be downloaded here:
- [hifigan_ljspeech_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip)

The static model can be downloaded here:
- [hifigan_ljspeech_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_static_1.1.0.zip)

The ONNX model can be downloaded here:
- [hifigan_ljspeech_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_onnx_1.1.0.zip)

The Paddle-Lite model can be downloaded here:
- [hifigan_ljspeech_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_pdlite_1.3.0.zip)

Model | Step | eval/generator_loss | eval/mel_loss| eval/feature_matching_loss
:-------------:| :------------:| :-----: | :-----: | :--------:
default| 1(gpu) x 2500000|24.492|0.115|7.227

HiFiGAN checkpoint contains files listed below.

```text
hifigan_ljspeech_ckpt_0.2.0
├── default.yaml                  # default config used to train hifigan
├── feats_stats.npy               # statistics used to normalize spectrogram when training hifigan
└── snapshot_iter_2500000.pdz     # generator parameters of hifigan
```

## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/ljspeech/voc5/conf/default.yaml
================================================
# This is the configuration file for LJSpeech dataset.
# This configuration is based on HiFiGAN V1, which is an official configuration. 
# But I found that the optimizer setting does not work well with my implementation.
# So I changed optimizer settings as follows:
# - AdamW -> Adam
# - betas: [0.8, 0.99] -> betas: [0.5, 0.9]
# - Scheduler: ExponentialLR -> MultiStepLR
# To match the shift size difference, the upsample scales is also modified from the original 256 shift setting.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 22050                # Sampling rate.
n_fft: 1024              # FFT size (samples).
n_shift: 256             # Hop size (samples). 11.6ms
win_length: null         # Window length (samples).
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 80                       # Number of input channels.
    out_channels: 1                       # Number of output channels.
    channels: 512                         # Number of initial channels.
    kernel_size: 7                        # Kernel size of initial and final conv layers.
    upsample_scales: [8, 8, 2, 2]         # Upsampling scales.
    upsample_kernel_sizes: [16, 16, 4, 4] # Kernel size for upsampling layers.
    resblock_kernel_sizes: [3, 7, 11]     # Kernel size for residual blocks.
    resblock_dilations:                   # Dilations for residual blocks.
        - [1, 3, 5]
        - [1, 3, 5]
        - [1, 3, 5]
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.


###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    scales: 3                              # Number of multi-scale discriminator.
    scale_downsample_pooling: "AvgPool1D"  # Pooling operation for scale discriminator.
    scale_downsample_pooling_params:
        kernel_size: 4                     # Pooling kernel size.
        stride: 2                          # Pooling stride.
        padding: 2                         # Padding size.
    scale_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [15, 41, 5, 3]       # List of kernel sizes.
        channels: 128                      # Initial number of channels.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        max_groups: 16                     # Maximum number of groups in downsampling conv layers.
        bias: True
        downsample_scales: [4, 4, 4, 4, 1] # Downsampling scales.
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:
            negative_slope: 0.1
    follow_official_norm: True             # Whether to follow the official norm setting.
    periods: [2, 3, 5, 7, 11]              # List of period for multi-period discriminator.
    period_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [5, 3]               # List of kernel sizes.
        channels: 32                       # Initial number of channels.
        downsample_scales: [3, 3, 3, 3, 1] # Downsampling scales.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: False                 # Whether to use multi-resolution STFT loss.
use_mel_loss: True                   # Whether to use Mel-spectrogram loss.
mel_loss_params:
    fs: 22050
    fft_size: 1024
    hop_size: 256
    win_length: null
    window: "hann"
    num_mels: 80
    fmin: 0
    fmax: 11025
    log_base: null
generator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
discriminator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
use_feat_match_loss: True
feat_match_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
    average_by_layers: False         # Whether to average loss by #layers in each discriminator.
    include_final_outputs: False     # Whether to include final outputs in feat match loss calculation.

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_aux: 45.0       # Loss balancing coefficient for STFT loss.
lambda_adv: 1.0        # Loss balancing coefficient for adversarial loss.
lambda_feat_match: 2.0 # Loss balancing coefficient for feat match loss..

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 16              # Batch size.
batch_max_steps: 8192       # Length of each audio in batch. Make sure dividable by hop_size.
num_workers: 2              # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 2.0e-4               # Generator's learning rate.
    gamma: 0.5                          # Generator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000
generator_grad_norm: -1                 # Generator's gradient norm.
discriminator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 2.0e-4               # Discriminator's learning rate.
    gamma: 0.5                          # Discriminator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000    
discriminator_grad_norm: -1             # Discriminator's gradient norm.            

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
generator_train_start_steps: 1     # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000           # Number of training steps.
save_interval_steps: 5000         # Interval steps to save checkpoint.
eval_interval_steps: 1000          # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/ljspeech/voc5/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_5000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/mustc/st1/cmd.sh
================================================
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
#   --time <time>: Limit the maximum time to execute.
#   --mem <mem>: Limit the maximum memory usage.
#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
#   --num-threads <ngpu>: Specify the number of CPU core.
#   --gpu <ngpu>: Specify the number of GPU devices.
#   --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

    # The other usage
    export train_cmd="run.pl"
    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
    export cuda_cmd="run.pl"
    # Used for "*_recog.py"
    export decode_cmd="run.pl"

# "qsub" (SGE, Torque, PBS, etc.)
elif [ "${cmd_backend}" = sge ]; then
    # The default setting is written in conf/queue.conf.
    # You must change "-q g.q" for the "queue" for your environment.
    # To know the "queue" names, type "qhost -q"
    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

    export train_cmd="queue.pl"
    export cuda_cmd="queue.pl"
    export decode_cmd="queue.pl"

# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

    export train_cmd="slurm.pl"
    export cuda_cmd="slurm.pl"
    export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
    # You have to create ".queue/machines" to specify the host to execute jobs.
    # e.g. .queue/machines
    #   host1
    #   host2
    #   host3
    # Assuming you can login them without any password, i.e. You have to set ssh keys.

    export train_cmd="ssh.pl"
    export cuda_cmd="ssh.pl"
    export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

    export train_cmd="queue.pl --mem 2G"
    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
    export decode_cmd="queue.pl --mem 4G"

else
    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
    return 1
fi


================================================
FILE: examples/mustc/st1/conf/fbank.conf
================================================
--sample-frequency=16000 
--num-mel-bins=80


================================================
FILE: examples/mustc/st1/conf/pitch.conf
================================================
--sample-frequency=16000


================================================
FILE: examples/mustc/st1/conf/transformer_de.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.de.train
dev_manifest: data/manifest.de.dev
test_manifest: data/manifest.de.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_1spm/train_sp.en-de.de_bpe8000_units_tc.txt
unit_type: 'spm'
spm_model_prefix: data/lang_1spm/train_sp.en-de.de_bpe8000_tc
mean_std_filepath: ""
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: None
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.0
    ctc_weight: 0.0
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 40
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 0.
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5

================================================
FILE: examples/mustc/st1/conf/transformer_es.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.es.train
dev_manifest: data/manifest.es.dev
test_manifest: data/manifest.es.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_1spm/train_sp.en-es.es_bpe8000_units_tc.txt
unit_type: 'spm'
spm_model_prefix: data/lang_1spm/train_sp.en-es.es_bpe8000_tc
mean_std_filepath: ""
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: None
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.0
    ctc_weight: 0.0
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 40
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 0.
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5

================================================
FILE: examples/mustc/st1/conf/transformer_fr.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.fr.train
dev_manifest: data/manifest.fr.dev
test_manifest: data/manifest.fr.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_1spm/train_sp.en-fr.fr_bpe8000_units_tc.txt
unit_type: 'spm'
spm_model_prefix: data/lang_1spm/train_sp.en-fr.fr_bpe8000_tc
mean_std_filepath: ""
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: None
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.0
    ctc_weight: 0.0
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 40
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 0.
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5

================================================
FILE: examples/mustc/st1/conf/transformer_it.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.it.train
dev_manifest: data/manifest.it.dev
test_manifest: data/manifest.it.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_1spm/train_sp.en-it.it_bpe8000_units_tc.txt
unit_type: 'spm'
spm_model_prefix: data/lang_1spm/train_sp.en-it.it_bpe8000_tc
mean_std_filepath: ""
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: None
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.0
    ctc_weight: 0.0
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 40
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 0.
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5

================================================
FILE: examples/mustc/st1/conf/transformer_nl.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.nl.train
dev_manifest: data/manifest.nl.dev
test_manifest: data/manifest.nl.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_1spm/train_sp.en-nl.nl_bpe8000_units_tc.txt
unit_type: 'spm'
spm_model_prefix: data/lang_1spm/train_sp.en-nl.nl_bpe8000_tc
mean_std_filepath: ""
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: None
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.0
    ctc_weight: 0.0
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 40
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 0.
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5

================================================
FILE: examples/mustc/st1/conf/transformer_pt.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.pt.train
dev_manifest: data/manifest.pt.dev
test_manifest: data/manifest.pt.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_1spm/train_sp.en-pt.pt_bpe8000_units_tc.txt
unit_type: 'spm'
spm_model_prefix: data/lang_1spm/train_sp.en-pt.pt_bpe8000_tc
mean_std_filepath: ""
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: None
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.0
    ctc_weight: 0.0
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 40
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 0.
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5

================================================
FILE: examples/mustc/st1/conf/transformer_ro.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.ro.train
dev_manifest: data/manifest.ro.dev
test_manifest: data/manifest.ro.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_1spm/train_sp.en-ro.ro_bpe8000_units_tc.txt
unit_type: 'spm'
spm_model_prefix: data/lang_1spm/train_sp.en-ro.ro_bpe8000_tc
mean_std_filepath: ""
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: None
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.0
    ctc_weight: 0.0
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 40
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 0.
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5

================================================
FILE: examples/mustc/st1/conf/transformer_ru.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.ru.train
dev_manifest: data/manifest.ru.dev
test_manifest: data/manifest.ru.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_1spm/train_sp.en-ru.ru_bpe8000_units_tc.txt
unit_type: 'spm'
spm_model_prefix: data/lang_1spm/train_sp.en-ru.ru_bpe8000_tc
mean_std_filepath: ""
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: None
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.0
    ctc_weight: 0.0
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 40
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 0.
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5

================================================
FILE: examples/mustc/st1/local/augmentation.json
================================================
[
  {
    "type": "specaug",
    "params": {
      "W": 5,
      "warp_mode": "PIL",
      "F": 30,
      "n_freq_masks": 2,
      "T": 40,
      "n_time_masks": 2,
      "p": 1.0,
      "adaptive_number_ratio": 0,
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20,
      "replace_with_zero": false
    },
    "prob": 1.0
  }
]


================================================
FILE: examples/mustc/st1/local/data.sh
================================================
#!/bin/bash

# Copyright 2019 Kyoto University (Hirofumi Inaguma)
#           2021 PaddlePaddle
#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)

set -e
set -u

stage=-1
stop_stage=10

# bpemode (unigram or bpe)
tgt_lang=
nbpe=8000
bpemode=bpe
must_c=
dumpdir=data/dump
do_delta=false
tgt_case=tc
src_case=lc.rm
source ${MAIN_ROOT}/utils/parse_options.sh

TARGET_DIR=${MAIN_ROOT}/examples/dataset
mkdir -p ${TARGET_DIR}
mkdir -p data

train_set=train_sp.en-${tgt_lang}.${tgt_lang}
train_dev=dev.en-${tgt_lang}.${tgt_lang}
trans_set=""
for lang in $(echo ${tgt_lang} | tr '_' ' '); do
    trans_set="${trans_set} tst-COMMON.en-${lang}.${lang}"
done


if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    if [ ! -e ${must_c} ]; then
        echo "Error: Dataset is not avaiable. Please download and unzip the dataset"
        echo "Link of Must-c v1, https://ict.fbk.eu/must-c/."
        exit 1
    fi
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    echo "stage 0: Data Preparation"
    for lang in $(echo ${tgt_lang} | tr '_' ' '); do
        local/data_prep.sh ${must_c} ${lang}
    done
fi

feat_tr_dir=${dumpdir}/${train_set}/delta${do_delta}; mkdir -p ${feat_tr_dir}
feat_dt_dir=${dumpdir}/${train_dev}/delta${do_delta}; mkdir -p ${feat_dt_dir}
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    ### Task dependent. You have to design training and dev sets by yourself.
    ### But you can utilize Kaldi recipes in most cases
    echo "stage 1: Feature Generation"
    fbankdir=fbank
    # Generate the fbank features; by default 80-dimensional fbanks with pitch on each frame
    for lang in $(echo ${tgt_lang} | tr '_' ' '); do
        for x in train.en-${tgt_lang} dev.en-${tgt_lang} tst-COMMON.en-${tgt_lang} tst-HE.en-${tgt_lang}; do
            steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 32 --write_utt2num_frames true \
                data/${x} data/make_fbank/${x} ${fbankdir}
        done
    done

    # speed-perturbed
    utils/perturb_data_dir_speed.sh 0.9 data/train.en-${tgt_lang} data/temp1.${tgt_lang}
    utils/perturb_data_dir_speed.sh 1.0 data/train.en-${tgt_lang} data/temp2.${tgt_lang}
    utils/perturb_data_dir_speed.sh 1.1 data/train.en-${tgt_lang} data/temp3.${tgt_lang}
    utils/combine_data.sh --extra-files utt2uniq data/train_sp.en-${tgt_lang} \
        data/temp1.${tgt_lang} data/temp2.${tgt_lang} data/temp3.${tgt_lang}
    rm -r data/temp1.${tgt_lang} data/temp2.${tgt_lang} data/temp3.${tgt_lang}
    utils/fix_data_dir.sh data/train_sp.en-${tgt_lang}
    steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 32 --write_utt2num_frames true \
        data/train_sp.en-${tgt_lang} data/make_fbank/train_sp.en-${tgt_lang} ${fbankdir}
    for lang in en ${tgt_lang}; do
        awk -v p="sp0.9-" '{printf("%s %s%s\n", $1, p, $1);}' data/train.en-${tgt_lang}/utt2spk > data/train_sp.en-${tgt_lang}/utt_map
        utils/apply_map.pl -f 1 data/train_sp.en-${tgt_lang}/utt_map <data/train.en-${tgt_lang}/text.tc.${lang} >data/train_sp.en-${tgt_lang}/text.tc.${lang}
        utils/apply_map.pl -f 1 data/train_sp.en-${tgt_lang}/utt_map <data/train.en-${tgt_lang}/text.lc.${lang} >data/train_sp.en-${tgt_lang}/text.lc.${lang}
        utils/apply_map.pl -f 1 data/train_sp.en-${tgt_lang}/utt_map <data/train.en-${tgt_lang}/text.lc.rm.${lang} >data/train_sp.en-${tgt_lang}/text.lc.rm.${lang}
        awk -v p="sp1.0-" '{printf("%s %s%s\n", $1, p, $1);}' data/train.en-${tgt_lang}/utt2spk > data/train_sp.en-${tgt_lang}/utt_map
        utils/apply_map.pl -f 1 data/train_sp.en-${tgt_lang}/utt_map <data/train.en-${tgt_lang}/text.tc.${lang} >>data/train_sp.en-${tgt_lang}/text.tc.${lang}
        utils/apply_map.pl -f 1 data/train_sp.en-${tgt_lang}/utt_map <data/train.en-${tgt_lang}/text.lc.${lang} >>data/train_sp.en-${tgt_lang}/text.lc.${lang}
        utils/apply_map.pl -f 1 data/train_sp.en-${tgt_lang}/utt_map <data/train.en-${tgt_lang}/text.lc.rm.${lang} >>data/train_sp.en-${tgt_lang}/text.lc.rm.${lang}
        awk -v p="sp1.1-" '{printf("%s %s%s\n", $1, p, $1);}' data/train.en-${tgt_lang}/utt2spk > data/train_sp.en-${tgt_lang}/utt_map
        utils/apply_map.pl -f 1 data/train_sp.en-${tgt_lang}/utt_map <data/train.en-${tgt_lang}/text.tc.${lang} >>data/train_sp.en-${tgt_lang}/text.tc.${lang}
        utils/apply_map.pl -f 1 data/train_sp.en-${tgt_lang}/utt_map <data/train.en-${tgt_lang}/text.lc.${lang} >>data/train_sp.en-${tgt_lang}/text.lc.${lang}
        utils/apply_map.pl -f 1 data/train_sp.en-${tgt_lang}/utt_map <data/train.en-${tgt_lang}/text.lc.rm.${lang} >>data/train_sp.en-${tgt_lang}/text.lc.rm.${lang}
    done

    # Divide into source and target languages
    for x in train_sp.en-${tgt_lang} dev.en-${tgt_lang} tst-COMMON.en-${tgt_lang} tst-HE.en-${tgt_lang}; do
        local/divide_lang.sh ${x} ${tgt_lang}
    done

    for x in train_sp.en-${tgt_lang} dev.en-${tgt_lang}; do
        # remove utt having more than 3000 frames
        # remove utt having more than 400 characters
        for lang in ${tgt_lang} en; do
            remove_longshortdata.sh --maxframes 3000 --maxchars 400 data/${x}.${lang} data/${x}.${lang}.tmp
        done

        # Match the number of utterances between source and target languages
        # extract commocn lines
        cut -f 1 -d " " data/${x}.en.tmp/text > data/${x}.${tgt_lang}.tmp/reclist1
        cut -f 1 -d " " data/${x}.${tgt_lang}.tmp/text > data/${x}.${tgt_lang}.tmp/reclist2
        comm -12 data/${x}.${tgt_lang}.tmp/reclist1 data/${x}.${tgt_lang}.tmp/reclist2 > data/${x}.en.tmp/reclist

        for lang in ${tgt_lang} en; do
            reduce_data_dir.sh data/${x}.${lang}.tmp data/${x}.en.tmp/reclist data/${x}.${lang}
            utils/fix_data_dir.sh --utt_extra_files "text.tc text.lc text.lc.rm" data/${x}.${lang}
        done
        rm -rf data/${x}.*.tmp
    done

    # compute global CMVN
    compute-cmvn-stats scp:data/${train_set}/feats.scp data/${train_set}/cmvn.ark

    # dump features for training
    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_tr_dir}/storage ]; then
      utils/create_split_dir.pl \
          /export/b{14,15,16,17}/${USER}/espnet-data/egs/must_c/st1/dump/${train_set}/delta${do_delta}/storage \
          ${feat_tr_dir}/storage
    fi
    if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_dt_dir}/storage ]; then
      utils/create_split_dir.pl \
          /export/b{14,15,16,17}/${USER}/espnet-data/egs/must_c/st1/dump/${train_dev}/delta${do_delta}/storage \
          ${feat_dt_dir}/storage
    fi
    dump.sh --cmd "$train_cmd" --nj 80 --do_delta $do_delta \
        data/${train_set}/feats.scp data/${train_set}/cmvn.ark data/dump_feats/${train_set} ${feat_tr_dir}
    dump.sh --cmd "$train_cmd" --nj 32 --do_delta $do_delta \
        data/${train_dev}/feats.scp data/${train_set}/cmvn.ark data/dump_feats/${train_dev} ${feat_dt_dir}
    for ttask in ${trans_set}; do
        feat_trans_dir=${dumpdir}/${ttask}/delta${do_delta}; mkdir -p ${feat_trans_dir}
        dump.sh --cmd "$train_cmd" --nj 32 --do_delta $do_delta \
            data/${ttask}/feats.scp data/${train_set}/cmvn.ark data/dump_feats/trans/${ttask} \
            ${feat_trans_dir}
    done
fi

dict=data/lang_1spm/${train_set}_${bpemode}${nbpe}_units_${tgt_case}.txt
nlsyms=data/lang_1spm/${train_set}_non_lang_syms_${tgt_case}.txt
bpemodel=data/lang_1spm/${train_set}_${bpemode}${nbpe}_${tgt_case}
echo "dictionary: ${dict}"
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    ### Task dependent. You have to check non-linguistic symbols used in the corpus.
    echo "stage 2: Dictionary and Json Data Preparation"
    mkdir -p data/lang_1spm/
    export LC_ALL=C.UTF-8

    echo "make a non-linguistic symbol list for all languages"
    grep sp1.0 data/train_sp.en-${tgt_lang}.*/text.${tgt_case} | cut -f 2- -d' ' | grep -o -P '&[^;]*;'| sort | uniq > ${nlsyms}
    cat ${nlsyms}

    echo "make a joint source and target dictionary"
    echo "<unk> 1" > ${dict} # <unk> must be 1, 0 will be used for "blank" in CTC
    offset=$(wc -l < ${dict})
    grep sp1.0 data/train_sp.en-${tgt_lang}.${tgt_lang}/text.${tgt_case} | cut -f 2- -d' ' | grep -v -e '^\s*$' > data/lang_1spm/input_${tgt_lang}.txt
    grep sp1.0 data/train_sp.en-${tgt_lang}.en/text.${src_case} | cut -f 2- -d' ' | grep -v -e '^\s*$' >> data/lang_1spm/input_${tgt_lang}.txt
    spm_train --user_defined_symbols="$(tr "\n" "," < ${nlsyms})" --input=data/lang_1spm/input_${tgt_lang}.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000 --character_coverage=1.0
    spm_encode --model=${bpemodel}.model --output_format=piece < data/lang_1spm/input_${tgt_lang}.txt | tr ' ' '\n' | sort | uniq | awk -v offset=${offset} '{print $0 " " NR+offset}' >> ${dict}
    wc -l ${dict}

    echo "make json files"
    data2json.sh --nj 16 --feat ${feat_tr_dir}/feats.scp --text data/${train_set}/text.${tgt_case} --bpecode ${bpemodel}.model --lang ${tgt_lang} \
        data/${train_set} ${dict} > ${feat_tr_dir}/data_${bpemode}${nbpe}.${tgt_case}.json
    data2json.sh --feat ${feat_dt_dir}/feats.scp --text data/${train_dev}/text.${tgt_case} --bpecode ${bpemodel}.model --lang ${tgt_lang} \
        data/${train_dev} ${dict} > ${feat_dt_dir}/data_${bpemode}${nbpe}.${tgt_case}.json
    for ttask in ${trans_set}; do
        feat_trans_dir=${dumpdir}/${ttask}/delta${do_delta}
        data2json.sh --feat ${feat_trans_dir}/feats.scp --text data/${ttask}/text.${tgt_case} --bpecode ${bpemodel}.model --lang ${tgt_lang} \
            data/${ttask} ${dict} > ${feat_trans_dir}/data_${bpemode}${nbpe}.${tgt_case}.json
    done
    echo "update json (add source references)"
    # update json (add source references)
    for x in ${train_set} ${train_dev}; do
        feat_dir=${dumpdir}/${x}/delta${do_delta}
        data_dir=data/$(echo ${x} | cut -f 1 -d ".").en-${tgt_lang}.en
        update_json.sh --text ${data_dir}/text.${src_case} --bpecode ${bpemodel}.model \
            ${feat_dir}/data_${bpemode}${nbpe}.${tgt_case}.json ${data_dir} ${dict}
    done
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    x=(${train_set} ${train_dev} ${trans_set})
    y=(train dev test)
    for (( i=0; i<${#x[*]}; ++i)); do
        echo ${x[$i]} ${y[$i]}
        feat_dir=${dumpdir}/${x[$i]}/delta${do_delta}
        data_dir=data/$(echo ${x[$i]} | cut -f 1 -d ".").en-${tgt_lang}.en
        python3 ${MAIN_ROOT}/utils/espnet_json_to_manifest.py \
                --json-file ${feat_dir}/data_${bpemode}${nbpe}.${tgt_case}.json \
                --manifest-file data/manifest.${tgt_lang}.${y[$i]}
        echo "Process done for ${y[$i]} set from ${x[$i]}"
    done
fi


echo "MuST-C ${tgt_lang} Data preparation done."
exit 0


================================================
FILE: examples/mustc/st1/local/data_prep.sh
================================================
#!/bin/bash

# Copyright 2019 Kyoto University (Hirofumi Inaguma)
#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)

export LC_ALL=C

source ${MAIN_ROOT}/utils/parse_options.sh

if [ "$#" -ne 2 ]; then
    echo "Usage: $0 <src-dir>"
    echo "e.g.: $0 /n/rd11/corpora_8/MUSTC_v1.0 target_lang"
    exit 1;
fi

tgt_lang=$2

for set in train dev tst-COMMON tst-HE; do
    src=$1/en-${tgt_lang}/data/${set}
    dst=data/local/en-${tgt_lang}/${set}

    [ ! -d ${src} ] && echo "$0: no such directory ${src}" && exit 1;

    wav_dir=${src}/wav
    trans_dir=${src}/txt
    yml=${trans_dir}/${set}.yaml
    en=${trans_dir}/${set}.en
    tgt=${trans_dir}/${set}.${tgt_lang}

    mkdir -p ${dst} || exit 1;

    [ ! -d ${wav_dir} ] && echo "$0: no such directory ${wav_dir}" && exit 1;
    [ ! -d ${trans_dir} ] && echo "$0: no such directory ${trans_dir}" && exit 1;
    [ ! -f ${yml} ] && echo "$0: expected file ${yml} to exist" && exit 1;
    [ ! -f ${en} ] && echo "$0: expected file ${en} to exist" && exit 1;
    [ ! -f ${tgt} ] && echo "$0: expected file ${tgt} to exist" && exit 1;

    wav_scp=${dst}/wav.scp; [[ -f "${wav_scp}" ]] && rm ${wav_scp}
    trans_en=${dst}/text.en; [[ -f "${trans_en}" ]] && rm ${trans_en}
    trans_tgt=${dst}/text.${tgt_lang}; [[ -f "${trans_tgt}" ]] && rm ${trans_tgt}
    utt2spk=${dst}/utt2spk; [[ -f "${utt2spk}" ]] && rm ${utt2spk}
    spk2utt=${dst}/spk2utt; [[ -f "${spk2utt}" ]] && rm ${spk2utt}
    segments=${dst}/segments; [[ -f "${segments}" ]] && rm ${segments}

    # error check
    n=$(cat ${yml} | grep duration | wc -l)
    n_en=$(cat ${en} | wc -l)
    n_tgt=$(cat ${tgt} | wc -l)
    [ ${n} -ne ${n_en} ] && echo "Warning: expected ${n} data data files, found ${n_en}" && exit 1;
    [ ${n} -ne ${n_tgt} ] && echo "Warning: expected ${n} data data files, found ${n_tgt}" && exit 1;

    # (1a) Transcriptions and translations preparation
    # make basic transcription file (add segments info)
    cp ${yml} ${dst}/.yaml0
    grep duration ${dst}/.yaml0 > ${dst}/.yaml1
    awk '{
        duration=$3; offset=$5; spkid=$7;
        gsub(",","",duration);
        gsub(",","",offset);
        gsub(",","",spkid);
        gsub("spk.","",spkid);
        duration=sprintf("%.7f", duration);
        if ( duration < 0.2 ) extendt=sprintf("%.7f", (0.2-duration)/2);
        else extendt=0;
        offset=sprintf("%.7f", offset);
        startt=offset-extendt;
        endt=offset+duration+extendt;
        printf("ted_%05d_%07.0f_%07.0f\n", spkid, int(1000*startt+0.5), int(1000*endt+0.5));
    }' ${dst}/.yaml1 > ${dst}/.yaml2
    # NOTE: Extend the lengths of short utterances (< 0.2s) rather than exclude them

    cp ${en} ${dst}/en.org
    cp ${tgt} ${dst}/${tgt_lang}.org

    for lang in en ${tgt_lang}; do
        # normalize punctuation
        normalize-punctuation.perl -l ${lang} < ${dst}/${lang}.org > ${dst}/${lang}.norm

        # lowercasing
        lowercase.perl < ${dst}/${lang}.norm > ${dst}/${lang}.norm.lc
        cp ${dst}/${lang}.norm ${dst}/${lang}.norm.tc

        # remove punctuation
        local/remove_punctuation.pl < ${dst}/${lang}.norm.lc > ${dst}/${lang}.norm.lc.rm

        # tokenization
        tokenizer.perl -l ${lang} -q < ${dst}/${lang}.norm.tc > ${dst}/${lang}.norm.tc.tok
        tokenizer.perl -l ${lang} -q < ${dst}/${lang}.norm.lc > ${dst}/${lang}.norm.lc.tok
        tokenizer.perl -l ${lang} -q < ${dst}/${lang}.norm.lc.rm > ${dst}/${lang}.norm.lc.rm.tok

        paste -d " " ${dst}/.yaml2 ${dst}/${lang}.norm.tc.tok | sort > ${dst}/text.tc.${lang}
        paste -d " " ${dst}/.yaml2 ${dst}/${lang}.norm.lc.tok | sort > ${dst}/text.lc.${lang}
        paste -d " " ${dst}/.yaml2 ${dst}/${lang}.norm.lc.rm.tok | sort > ${dst}/text.lc.rm.${lang}

        # save original and cleaned punctuation
        lowercase.perl < ${dst}/${lang}.org | text2token.py -s 0 -n 1 | tr " " "\n" \
            | sort | uniq | grep -v -e '^\s*$' | awk '{print $0 " " NR+1}' > ${dst}/punctuation.${lang}
        lowercase.perl < ${dst}/${lang}.norm.tc | text2token.py -s 0 -n 1 | tr " " "\n" \
            | sort | uniq | grep -v -e '^\s*$' | awk '{print $0 " " NR+1}' > ${dst}/punctuation.clean.${lang}
    done


    # error check
    n=$(cat ${dst}/.yaml2 | wc -l)
    n_en=$(cat ${dst}/en.norm.tc.tok | wc -l)
    n_tgt=$(cat ${dst}/${tgt_lang}.norm.tc.tok | wc -l)
    [ ${n} -ne ${n_en} ] && echo "Warning: expected ${n} data data files, found ${n_en}" && exit 1;
    [ ${n} -ne ${n_tgt} ] && echo "Warning: expected ${n} data data files, found ${n_tgt}" && exit 1;


    # (1c) Make segments files from transcript
    #segments file format is: utt-id start-time end-time, e.g.:
    #ted_00001_0003501_0003684 ted_0001 003.501 0003.684
    awk '{
        segment=$1; split(segment,S,"[_]");
        spkid=S[1] "_" S[2]; startf=S[3]; endf=S[4];
        printf("%s %s %.2f %.2f\n", segment, spkid, startf/1000, endf/1000);
    }' < ${dst}/text.tc.${tgt_lang} | uniq | sort > ${dst}/segments

    awk '{
        segment=$1; split(segment,S,"[_]");
        spkid=S[1] "_" S[2];
        printf("%s cat '${wav_dir}'/%s_%d.wav |\n", spkid, S[1], S[2]);
    }' < ${dst}/text.tc.${tgt_lang} | uniq | sort > ${dst}/wav.scp

    awk '{
        segment=$1; split(segment,S,"[_]");
        spkid=S[1] "_" S[2]; print $1 " " spkid
    }' ${dst}/segments | uniq | sort > ${dst}/utt2spk

    cat ${dst}/utt2spk | utils/utt2spk_to_spk2utt.pl | sort > ${dst}/spk2utt

    # error check
    n_en=$(cat ${dst}/text.tc.en | wc -l)
    n_tgt=$(cat ${dst}/text.tc.${tgt_lang} | wc -l)
    [ ${n_en} -ne ${n_tgt} ] && echo "Warning: expected ${n_en} data data files, found ${n_tgt}" && exit 1;

    # Copy stuff intoc its final locations [this has been moved from the format_data script]
    mkdir -p data/${set}.en-${tgt_lang}

    # remove duplicated utterances (the same offset)
    echo "remove duplicate lines..."
    cut -d ' ' -f 1 ${dst}/text.tc.en | sort | uniq -c | sort -n -k1 -r | grep -v '1 ted' \
        | sed 's/^[ \t]*//' > ${dst}/duplicate_lines
    cut -d ' ' -f 1 ${dst}/text.tc.en | sort | uniq -c | sort -n -k1 -r | grep '1 ted' \
        | cut -d '1' -f 2- | sed 's/^[ \t]*//' > ${dst}/reclist
    reduce_data_dir.sh ${dst} ${dst}/reclist data/${set}.en-${tgt_lang}
    for l in en ${tgt_lang}; do
        for case in tc lc lc.rm; do
            cp ${dst}/text.${case}.${l} data/${set}.en-${tgt_lang}/text.${case}.${l}
        done
    done
    utils/fix_data_dir.sh --utt_extra_files \
        "text.tc.en text.lc.en text.lc.rm.en text.tc.${tgt_lang} text.lc.${tgt_lang} text.lc.rm.${tgt_lang}" \
        data/${set}.en-${tgt_lang}

    # error check
    n_seg=$(cat data/${set}.en-${tgt_lang}/segments | wc -l)
    n_text=$(cat data/${set}.en-${tgt_lang}/text.tc.${tgt_lang} | wc -l)
    [ ${n_seg} -ne ${n_text} ] && echo "Warning: expected ${n_seg} data data files, found ${n_text}" && exit 1;

    echo "$0: successfully prepared data in ${dst}"
done


================================================
FILE: examples/mustc/st1/local/divide_lang.sh
================================================
#!/bin/bash

# Copyright 2019 Kyoto University (Hirofumi Inaguma)
#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)

. ./path.sh

if [ "$#" -ne 2 ]; then
    echo "Usage: $0 <set> <lang>>"
    echo "e.g.: $0 dev"
    exit 1
fi

set=$1
lang=$2
export LC_ALL=en_US.UTF-8
# Copy stuff intoc its final locations [this has been moved from the format_data script]
# for En
mkdir -p data/${set}.en
for f in spk2utt utt2spk segments wav.scp feats.scp utt2num_frames; do
    if [ -f data/${set}/${f} ]; then
        sort data/${set}/${f} > data/${set}.en/${f}
    fi
done
sort data/${set}/text.lc.rm.en | sed $'s/[^[:print:]]//g' > data/${set}.en/text  # dummy
sort data/${set}/text.tc.en | sed $'s/[^[:print:]]//g' > data/${set}.en/text.tc
sort data/${set}/text.lc.en | sed $'s/[^[:print:]]//g' > data/${set}.en/text.lc
sort data/${set}/text.lc.rm.en | sed $'s/[^[:print:]]//g' > data/${set}.en/text.lc.rm
utils/fix_data_dir.sh --utt_extra_files "text.tc text.lc text.lc.rm" data/${set}.en
if [ -f data/${set}.en/feats.scp ]; then
    utils/validate_data_dir.sh data/${set}.en || exit 1;
else
    utils/validate_data_dir.sh --no-feats --no-wav data/${set}.en || exit 1;
fi

# for target language
mkdir -p data/${set}.${lang}
for f in spk2utt utt2spk segments wav.scp feats.scp utt2num_frames; do
    if [ -f data/${set}/${f} ]; then
        sort data/${set}/${f} > data/${set}.${lang}/${f}
    fi
done
sort data/${set}/text.tc.${lang} | sed $'s/[^[:print:]]//g' > data/${set}.${lang}/text  # dummy
sort data/${set}/text.tc.${lang} | sed $'s/[^[:print:]]//g' > data/${set}.${lang}/text.tc
sort data/${set}/text.lc.${lang} | sed $'s/[^[:print:]]//g' > data/${set}.${lang}/text.lc
sort data/${set}/text.lc.rm.${lang} | sed $'s/[^[:print:]]//g' > data/${set}.${lang}/text.lc.rm
utils/fix_data_dir.sh --utt_extra_files "text.tc text.lc text.lc.rm" data/${set}.${lang}
if [ -f data/${set}.${lang}/feats.scp ]; then
    utils/validate_data_dir.sh data/${set}.${lang} || exit 1;
else
    utils/validate_data_dir.sh --no-feats --no-wav data/${set}.${lang} || exit 1;
fi


================================================
FILE: examples/mustc/st1/local/remove_punctuation.pl
================================================
#!/usr/bin/perl

use warnings;
use strict;

binmode(STDIN,":utf8");
binmode(STDOUT,":utf8");

while(<STDIN>) {
  $_ = " $_ ";

  # remove punctuation except apostrophe
  s/<space>/spacemark/g;  # for scoring
  s/'/apostrophe/g;
  s/[[:punct:]]//g;
  s/apostrophe/'/g;
  s/spacemark/<space>/g;  # for scoring

  # remove whitespace
  s/\s+/ /g;
  s/^\s+//;
  s/\s+$//;

  print "$_\n";
}


================================================
FILE: examples/mustc/st1/local/test.sh
================================================
#! /usr/bin/env bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix lang"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
tgt_lang=$4

for type in fullsentence; do
    echo "decoding ${type}"
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
    echo $PATH
    python3 ${MAIN_ROOT}/utils/rsl2trn.py --rsl ${ckpt_prefix}.${type}.rsl \
                            --hyp ${ckpt_prefix}.${type}.hyp \
                            --ref ${ckpt_prefix}.${type}.ref
    if ! which tokenizer.perl > /dev/null; then
    echo "Error: it seems that moses is not installed." >&2
    echo "Error: please install moses as follows." >&2
    echo "Error: cd ${MAIN_ROOT}/tools && make moses.done" >&2
    return 1
    fi
    detokenizer.perl -l ${tgt_lang} -q < ${ckpt_prefix}.${type}.hyp > ${ckpt_prefix}.${type}.hyp.detok
    detokenizer.perl -l ${tgt_lang} -q < ${ckpt_prefix}.${type}.ref > ${ckpt_prefix}.${type}.ref.detok
    echo "Detokenized BLEU:"
    sacrebleu ${ckpt_prefix}.${type}.ref.detok -i ${ckpt_prefix}.${type}.hyp.detok


done

exit 0


================================================
FILE: examples/mustc/st1/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 3 ] && [ $# -gt 4 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ckpt_path ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
ckpt_path=$3
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi


mkdir -p exp

# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--checkpoint_path "${ckpt_path}" \
--seed ${seed}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--checkpoint_path "${ckpt_path}" \
--seed ${seed}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/mustc/st1/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${MAIN_ROOT}/tools/moses/scripts/tokenizer:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8 
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

if ! which tokenizer.perl > /dev/null; then
    echo "Error: moses is required in this example." >&2
    echo "Error: it seems that moses is not installed." >&2
    echo "Error: please install moses as follows." >&2
    echo "Error: cd ${MAIN_ROOT}/tools && git clone https://github.com/moses-smt/mosesdecoder.git moses" >&2
    return 1
fi

MODEL=u2_st
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin

# Kaldi
export KALDI_ROOT=${MAIN_ROOT}/tools/kaldi
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present, can not using Kaldi!"
[ -f $KALDI_ROOT/tools/config/common_path.sh ] && . $KALDI_ROOT/tools/config/common_path.sh

================================================
FILE: examples/mustc/st1/run.sh
================================================
#!/bin/bash
set -e
. ./path.sh || exit 1;
. ./cmd.sh || exit 1;

gpus=0,1,2,3
stage=0
stop_stage=3
conf_path=conf/transformer_es.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
must_c_path=
lang=es
avg_num=5
ckpt_path= #  (finetune from FAT-ST or ASR pretrained model)
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh --tgt_lang ${lang} --must_c ${must_c_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt} "${ckpt_path}" ${ips} 
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num} 
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${lang} || exit -1
fi


================================================
FILE: examples/opencpop/README.md
================================================

# Opencpop

* svs1 - DiffSinger
* voc1 - Parallel WaveGAN
* voc5 - HiFiGAN


================================================
FILE: examples/opencpop/svs1/README.md
================================================
([简体中文](./README_cn.md)|English)
# DiffSinger with Opencpop
This example contains code used to train a [DiffSinger](https://arxiv.org/abs/2105.02446) model with [Mandarin singing corpus](https://wenet.org.cn/opencpop/).

## Dataset
### Download and Extract
Download Opencpop from it's [Official Website](https://wenet.org.cn/opencpop/download/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/Opencpop`.

### pip install
<!-- Comment: Cause ppdiffusers will install newest huggingface_hub, but cached_download function has been removed, So need to install the specified version.>
<!-- TODO: If the corresponding dependency library is OK, it needs to be deleted.-->
```shell
pip install huggingface_hub==0.25.2
```


## Get Started
Assume the path to the dataset is `~/datasets/Opencpop`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - (Supporting) synthesize waveform from a text file. 
5. (Supporting) inference using the static model.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    ├── speech_stats.npy
    └── speech_stretchs.npy

```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech, pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`. `speech_stretchs.npy` contains the minimum and maximum values of each dimension of the mel spectrum, which is used for linear stretching before training/inference of the diffusion module.
Note: Since the training effect of non-norm features is due to norm, the features saved under `norm` are features that have not been normed.


Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains utterance id, speaker id, phones, text_lengths, speech_lengths, phone durations, the path of speech features, the path of pitch features, the path of energy features, note, note durations, slur.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]
                [--speaker-dict SPEAKER_DICT] [--speech-stretchs SPEECH_STRETCHS]

Train a FastSpeech2 model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       diffsinger config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu=0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --speaker-dict SPEAKER_DICT
                        speaker id map file for multiple speaker model.
  --speech-stretchs SPEECH_STRETCHS
                        min amd max mel for stretching.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.
6. `--speech-stretchs` is the path of mel's min-max data file.

### Synthesizing
We use parallel wavegan as the neural vocoder.
Download pretrained parallel wavegan model from [pwgan_opencpop_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/svs/opencpop/pwgan_opencpop_ckpt_1.4.0.zip) and unzip it.
```bash
unzip pwgan_opencpop_ckpt_1.4.0.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwgan_opencpop_ckpt_1.4.0.zip
├── default.yaml                   # default config used to train parallel wavegan
├── snapshot_iter_100000.pdz       # model parameters of parallel wavegan
└── feats_stats.npy                # statistics used to normalize spectrogram when training parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
use `pwgan` model as vocoder.

```text
usage: synthesize.py [-h]
                     [--am {diffsinger_opencpop}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--voc {pwgan_opencpop}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]
                     [--speech_stretchs SPEECH_STRETCHS]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
                        Choose acoustic model type of tts task.
       {diffsinger_opencpop} Choose acoustic model type of svs task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
        {pwgan_opencpop, hifigan_opencpop} Choose vocoder type of svs task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
  --speech-stretchs     SPEECH_STRETCHS
                        The min and max values of the mel spectrum, using on diffusion of diffsinger.
```

`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file. 
`local/pinyin_to_phone.txt` comes from the readme of the opencpop dataset, indicating the mapping from pinyin to phonemes in opencpop.

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.

```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]
                         [--pinyin_phone PINYIN_PHONE]
                         [--speech_stretchs SPEECH_STRETCHS]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
       {diffsinger_opencpop} Choose acoustic model type of svs task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
        {pwgan_opencpop, hifigan_opencpop} Choose vocoder type of svs task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           {zh, en, mix, canton} Choose language type of tts task.
                        {sing} Choose language type of svs task.
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize file, a 'utt_id sentence' pair per line for tts task.
                        A '{ utt_id input_type (is word) text notes note_durs}' or '{utt_id input_type (is phoneme) phones notes note_durs is_slurs}' pair per line for svs task.
  --output_dir OUTPUT_DIR
                        output dir.
  --pinyin_phone PINYIN_PHONE
                        pinyin to phone map file, using on sing_frontend.
  --speech_stretchs SPEECH_STRETCHS
                        The min and max values of the mel spectrum, using on diffusion of diffsinger.
```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat` and `--phones_dict` are arguments for acoustic model, which correspond to the 4 files in the diffsinger pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is language. `zh`, `en`, `mix` and `canton` for tts task. `sing` for tts task.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
10. `--inference_dir` is the directory to save static models. If this line is not added, it will not be generated and saved as a static model.
11. `--pinyin_phone` pinyin to phone map file, using on sing_frontend.
12. `--speech_stretchs` The min and max values of the mel spectrum, using on diffusion of diffsinger.

Note: At present, the diffsinger model does not support dynamic to static, so do not add `--inference_dir`.


## Pretrained Model
Pretrained DiffSinger model:
- [diffsinger_opencpop_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/svs/opencpop/diffsinger_opencpop_ckpt_1.4.0.zip)

DiffSinger checkpoint contains files listed below.
```text
diffsinger_opencpop_ckpt_1.4.0.zip
├── default.yaml             # default config used to train diffsinger
├── energy_stats.npy         # statistics used to normalize energy when training diffsinger if norm is needed
├── phone_id_map.txt         # phone vocabulary file when training diffsinger
├── pinyin_to_phone.txt      # pinyin-to-phoneme mapping file when training diffsinger
├── pitch_stats.npy          # statistics used to normalize pitch when training diffsinger if norm is needed 
├── snapshot_iter_160000.pdz # model parameters of diffsinger
├── speech_stats.npy         # statistics used to normalize mel when training diffsinger if norm is needed
└── speech_stretchs.npy      # min and max values to use for mel spectral stretching before training diffusion

```

You can use the following scripts to synthesize for `${BIN_DIR}/../sentences_sing.txt` using pretrained diffsinger and parallel wavegan models.

```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=diffsinger_opencpop \
  --am_config=diffsinger_opencpop_ckpt_1.4.0/default.yaml \
  --am_ckpt=diffsinger_opencpop_ckpt_1.4.0/snapshot_iter_160000.pdz \
  --am_stat=diffsinger_opencpop_ckpt_1.4.0/speech_stats.npy  \
  --voc=pwgan_opencpop \
  --voc_config=pwgan_opencpop_ckpt_1.4.0/default.yaml \
  --voc_ckpt=pwgan_opencpop_ckpt_1.4.0/snapshot_iter_100000.pdz \
  --voc_stat=pwgan_opencpop_ckpt_1.4.0/feats_stats.npy \
  --lang=sing \
  --text=${BIN_DIR}/../../assets/sentences_sing.txt \
  --output_dir=exp/default/test_e2e \
  --phones_dict=diffsinger_opencpop_ckpt_1.4.0/phone_id_map.txt \
  --pinyin_phone=diffsinger_opencpop_ckpt_1.4.0/pinyin_to_phone.txt \
  --speech_stretchs=diffsinger_opencpop_ckpt_1.4.0/speech_stretchs.npy
  
```


================================================
FILE: examples/opencpop/svs1/README_cn.md
================================================
(简体中文|[English](./README.md))
# 用 Opencpop 数据集训练 DiffSinger 模型

本用例包含用于训练 [DiffSinger](https://arxiv.org/abs/2105.02446) 模型的代码，使用 [Mandarin singing corpus](https://wenet.org.cn/opencpop/) 数据集。

## 数据集
### 下载并解压
从 [官方网站](https://wenet.org.cn/opencpop/download/) 下载数据集

### pip 安装
<!-- 注释: 因为ppdiffusion会安装最新的huggingface_hub，但cached_download功能已被删除，所以需要安装指定的版本。>
<!-- 待完成: 如果相应的依赖库正常，则将其删除。-->
```shell
pip install huggingface_hub==0.25.2
```

## 开始
假设数据集的路径是 `~/datasets/Opencpop`.
运行下面的命令会进行如下操作：

1. **设置原路径**。
2. 对数据集进行预处理。
3. 训练模型
4. 合成波形
    - 从 `metadata.jsonl` 合成波形。
    - （支持中）从文本文件合成波形。
5. （支持中）使用静态模型进行推理。
```bash
./run.sh
```
您可以选择要运行的一系列阶段，或者将 `stage` 设置为 `stop-stage` 以仅使用一个阶段，例如，运行以下命令只会预处理数据集。
```bash
./run.sh --stage 0 --stop-stage 0
```
### 数据预处理
```bash
./local/preprocess.sh ${conf_path}
```
当它完成时。将在当前目录中创建 `dump` 文件夹。转储文件夹的结构如下所示。

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    ├── speech_stats.npy
    └── speech_stretchs.npy
```

数据集分为三个部分，即 `train` 、 `dev` 和 `test` ，每个部分都包含一个 `norm` 和 `raw` 子文件夹。原始文件夹包含每个话语的语音、音调和能量特征，而 `norm` 文件夹包含规范化的特征。用于规范化特征的统计数据是从 `dump/train/*_stats.npy` 中的训练集计算出来的。`speech_stretchs.npy` 中包含 mel谱每个维度上的最小值和最大值，用于 diffusion 模块训练/推理前的线性拉伸。
注意：由于非 norm 特征训练效果由于 norm，因此 `norm` 下保存的特征是未经过 norm 的特征。


此外，还有一个 `metadata.jsonl` 在每个子文件夹中。它是一个类似表格的文件，包含话语id，音色id，音素、文本长度、语音长度、音素持续时间、语音特征路径、音调特征路径、能量特征路径、音调，音调持续时间，是否为转音。

### 模型训练
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` 调用 `${BIN_DIR}/train.py` 。
以下是完整的帮助信息。

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]
                [--speaker-dict SPEAKER_DICT] [--speech-stretchs SPEECH_STRETCHS]

Train a DiffSinger model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       diffsinger config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu=0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --speaker-dict SPEAKER_DICT
                        speaker id map file for multiple speaker model.
  --speech-stretchs SPEECH_STRETCHS
                        min amd max mel for stretching.
```
1. `--config` 是一个 yaml 格式的配置文件，用于覆盖默认配置，位于 `conf/default.yaml`.
2. `--train-metadata` 和 `--dev-metadata` 应为 `dump` 文件夹中 `train` 和 `dev` 下的规范化元数据文件
3. `--output-dir` 是保存结果的目录。 检查点保存在此目录中的 `checkpoints/` 目录下。
4. `--ngpu` 要使用的 GPU 数，如果 ngpu==0，则使用 cpu 。
5. `--phones-dict` 是音素词汇表文件的路径。
6. `--speech-stretchs` mel的最小最大值数据的文件路径。

### 合成
我们使用 parallel opencpop 作为神经声码器（vocoder）。
从 [pwgan_opencpop_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/svs/opencpop/pwgan_opencpop_ckpt_1.4.0.zip) 下载预训练的 parallel wavegan 模型并将其解压。

```bash
unzip pwgan_opencpop_ckpt_1.4.0.zip
```
Parallel WaveGAN 检查点包含如下文件。
```text
pwgan_opencpop_ckpt_1.4.0.zip
├── default.yaml               # 用于训练 parallel wavegan 的默认配置
├── snapshot_iter_100000.pdz   # parallel wavegan 的模型参数
└── feats_stats.npy            # 训练平行波形时用于规范化谱图的统计数据
```
`./local/synthesize.sh` 调用 `${BIN_DIR}/../synthesize.py` 即可从 `metadata.jsonl`中合成波形。

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
使用 `pwgan` 模型作为声码器。

```text
usage: synthesize.py [-h]
                     [--am {diffsinger_opencpop}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--voc {pwgan_opencpop}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]
                     [--speech_stretchs SPEECH_STRETCHS]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
                        Choose acoustic model type of tts task.
       {diffsinger_opencpop} Choose acoustic model type of svs task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
        {pwgan_opencpop, hifigan_opencpop} Choose vocoder type of svs task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
  --speech-stretchs     SPEECH_STRETCHS
                        The min and max values of the mel spectrum, using on diffusion of diffsinger.
```

`./local/synthesize_e2e.sh` 调用 `${BIN_DIR}/../synthesize_e2e.py`，即可从文本文件中合成波形。
`local/pinyin_to_phone.txt`来源于opencpop数据集中的README，表示opencpop中拼音到音素的映射。

```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
最后一位参数 `0` 用于选择合成时使用的声码器模型，取值为 `0` 或 `1`，分别对应使用 `pwgan` 或 `hifigan` 模型作为声码器。

```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]
                         [--pinyin_phone PINYIN_PHONE]
                         [--speech_stretchs SPEECH_STRETCHS]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
       {diffsinger_opencpop} Choose acoustic model type of svs task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
        {pwgan_opencpop, hifigan_opencpop} Choose vocoder type of svs task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           {zh, en, mix, canton} Choose language type of tts task.
                        {sing} Choose language type of svs task.
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize file, a 'utt_id sentence' pair per line for tts task.
                        A '{ utt_id input_type (is word) text notes note_durs}' or '{utt_id input_type (is phoneme) phones notes note_durs is_slurs}' pair per line for svs task.
  --output_dir OUTPUT_DIR
                        output dir.
  --pinyin_phone PINYIN_PHONE
                        pinyin to phone map file, using on sing_frontend.
  --speech_stretchs SPEECH_STRETCHS
                        The min and max values of the mel spectrum, using on diffusion of diffsinger.
```
1. `--am` 声学模型格式是否符合 {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat` 和 `--phones_dict` 是声学模型的参数，对应于 diffsinger 预训练模型中的 4 个文件。
3. `--voc` 声码器(vocoder)格式是否符合 {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` 是声码器的参数，对应于 parallel wavegan 预训练模型中的 3 个文件。
5. `--lang` tts对应模型的语言可以是 `zh`、`en`、`mix`和`canton`。 svs 对应的语言是 `sing` 。
6. `--test_metadata` 应为 `dump` 文件夹中 `test` 下的规范化元数据文件、
7. `--text` 是文本文件，其中包含要合成的句子。
8. `--output_dir` 是保存合成音频文件的目录。
9. `--ngpu` 要使用的GPU数，如果 ngpu==0，则使用 cpu。
10. `--inference_dir` 静态模型保存的目录。如果不加这一行，就不会生并保存成静态模型。
11. `--pinyin_phone` 拼音到音素的映射文件。
12. `--speech_stretchs` mel谱的最大最小值用于diffsinger中diffusion之前的线性拉伸。

注意： 目前 diffsinger 模型还不支持动转静，所以不要加 `--inference_dir`。


## 预训练模型
预先训练的 DiffSinger 模型：
- [diffsinger_opencpop_ckpt_1.4.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/svs/opencpop/diffsinger_opencpop_ckpt_1.4.0.zip)


DiffSinger 检查点包含下列文件。
```text
diffsinger_opencpop_ckpt_1.4.0.zip
├── default.yaml             # 用于训练 diffsinger 的默认配置
├── energy_stats.npy         # 训练 diffsinger 时如若需要 norm energy 会使用到的统计数据 
├── phone_id_map.txt         # 训练 diffsinger 时的音素词汇文件
├── pinyin_to_phone.txt      # 训练 diffsinger 时的拼音到音素映射文件
├── pitch_stats.npy          # 训练 diffsinger 时如若需要 norm pitch 会使用到的统计数据 
├── snapshot_iter_160000.pdz # 模型参数和优化器状态
├── speech_stats.npy         # 训练 diffsinger 时用于规范化频谱图的统计数据
└── speech_stretchs.npy      # 训练 diffusion 前用于 mel 谱拉伸的最小及最大值

```
您可以使用以下脚本通过使用预训练的 diffsinger 和 parallel wavegan 模型为 `${BIN_DIR}/../sentences_sing.txt` 合成句子
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=diffsinger_opencpop \
  --am_config=diffsinger_opencpop_ckpt_1.4.0/default.yaml \
  --am_ckpt=diffsinger_opencpop_ckpt_1.4.0/snapshot_iter_160000.pdz \
  --am_stat=diffsinger_opencpop_ckpt_1.4.0/speech_stats.npy  \
  --voc=pwgan_opencpop \
  --voc_config=pwgan_opencpop_ckpt_1.4.0/default.yaml \
  --voc_ckpt=pwgan_opencpop_ckpt_1.4.0/snapshot_iter_100000.pdz \
  --voc_stat=pwgan_opencpop_ckpt_1.4.0/feats_stats.npy \
  --lang=sing \
  --text=${BIN_DIR}/../../assets/sentences_sing.txt \
  --output_dir=exp/default/test_e2e \
  --phones_dict=diffsinger_opencpop_ckpt_1.4.0/phone_id_map.txt \
  --pinyin_phone=diffsinger_opencpop_ckpt_1.4.0/pinyin_to_phone.txt \
  --speech_stretchs=diffsinger_opencpop_ckpt_1.4.0/speech_stretchs.npy
  
```


================================================
FILE: examples/opencpop/svs1/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 512         # FFT size (samples).
n_shift: 128       # Hop size (samples). 12.5ms
win_length: 512    # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 30           # Minimum frequency of Mel basis.
fmax: 12000        # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 750         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 48     # batch size
num_workers: 1     # number of gpu


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    # music score related
    note_num: 300                                     # number of note
    is_slur_num: 2                                    # number of slur
    # fastspeech2 module options
    use_energy_pred: False                            # whether use energy predictor
    use_postnet: False                                # whether use postnet

    # fastspeech2 module
    fastspeech2_params:
        adim: 256                                     # attention dimension
        aheads: 2                                     # number of attention heads
        elayers: 4                                    # number of encoder layers
        eunits: 1024                                  # number of encoder ff units
        dlayers: 4                                    # number of decoder layers
        dunits: 1024                                  # number of decoder ff units
        positionwise_layer_type: conv1d-linear        # type of position-wise layer
        positionwise_conv_kernel_size: 9              # kernel size of position wise conv layer
        transformer_enc_dropout_rate: 0.1             # dropout rate for transformer encoder layer
        transformer_enc_positional_dropout_rate: 0.1  # dropout rate for transformer encoder positional encoding
        transformer_enc_attn_dropout_rate: 0.0        # dropout rate for transformer encoder attention layer
        transformer_activation_type: "gelu"           # Activation function type in transformer.
        encoder_normalize_before: True                # whether to perform layer normalization before the input
        decoder_normalize_before: True                # whether to perform layer normalization before the input
        reduction_factor: 1                           # reduction factor
        init_type: xavier_uniform                     # initialization type
        init_enc_alpha: 1.0                           # initial value of alpha of encoder scaled position encoding
        init_dec_alpha: 1.0                           # initial value of alpha of decoder scaled position encoding
        use_scaled_pos_enc: True                      # whether to use scaled positional encoding
        transformer_dec_dropout_rate: 0.1             # dropout rate for transformer decoder layer
        transformer_dec_positional_dropout_rate: 0.1  # dropout rate for transformer decoder positional encoding
        transformer_dec_attn_dropout_rate: 0.0        # dropout rate for transformer decoder attention layer
        duration_predictor_layers: 5                  # number of layers of duration predictor
        duration_predictor_chans: 256                 # number of channels of duration predictor
        duration_predictor_kernel_size: 3             # filter size of duration predictor
        duration_predictor_dropout_rate: 0.5          # dropout rate in energy predictor
        pitch_predictor_layers: 5                     # number of conv layers in pitch predictor
        pitch_predictor_chans: 256                    # number of channels of conv layers in pitch predictor
        pitch_predictor_kernel_size: 5                # kernel size of conv layers in pitch predictor
        pitch_predictor_dropout: 0.5                  # dropout rate in pitch predictor
        pitch_embed_kernel_size: 1                    # kernel size of conv embedding layer for pitch
        pitch_embed_dropout: 0.0                      # dropout rate after conv embedding layer for pitch
        stop_gradient_from_pitch_predictor: True      # whether to stop the gradient from pitch predictor to encoder
        energy_predictor_layers: 2                    # number of conv layers in energy predictor
        energy_predictor_chans: 256                   # number of channels of conv layers in energy predictor
        energy_predictor_kernel_size: 3               # kernel size of conv layers in energy predictor
        energy_predictor_dropout: 0.5                 # dropout rate in energy predictor
        energy_embed_kernel_size: 1                   # kernel size of conv embedding layer for energy
        energy_embed_dropout: 0.0                     # dropout rate after conv embedding layer for energy
        stop_gradient_from_energy_predictor: False    # whether to stop the gradient from energy predictor to encoder
        postnet_layers: 5                             # number of layers of postnet
        postnet_filts: 5                              # filter size of conv layers in postnet
        postnet_chans: 256                            # number of channels of conv layers in postnet
        postnet_dropout_rate: 0.5                     # dropout rate for postnet
 
    # denoiser module
    denoiser_params:
        in_channels: 80                               # Number of channels of the input mel-spectrogram
        out_channels: 80                              # Number of channels of the output mel-spectrogram
        kernel_size: 3                                # Kernel size of the residual blocks inside                           
        layers: 20                                    # Number of residual blocks inside
        stacks: 5                                     # The number of groups to split the residual blocks into
        residual_channels: 256                        # Residual channel of the residual blocks
        gate_channels: 512                            # Gate channel of the residual blocks
        skip_channels: 256                            # Skip channel of the residual blocks
        aux_channels: 256                             # Auxiliary channel of the residual blocks
        dropout: 0.1                                  # Dropout of the residual blocks
        bias: True                                    # Whether to use bias in residual blocks
        use_weight_norm: False                        # Whether to use weight norm in all convolutions
        init_type: "kaiming_normal"                   # Type of initialize weights of a neural network module


    diffusion_params:
        num_train_timesteps: 100                      # The number of timesteps between the noise and the real during training
        beta_start: 0.0001                            # beta start parameter for the scheduler
        beta_end: 0.06                                # beta end parameter for the scheduler
        beta_schedule: "linear"                       # beta schedule parameter for the scheduler
        num_max_timesteps: 100                        # The max timestep transition from real to noise
        stretch: True                                 # whether to stretch before diffusion


###########################################################
#                       UPDATER SETTING                   #
###########################################################
fs2_updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation

ds_updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
# fastspeech2 optimizer
fs2_optimizer:
    optim: adam              # optimizer type
    learning_rate: 0.001     # learning rate

# diffusion optimizer
ds_optimizer_params:
    beta1: 0.9
    beta2: 0.98
    weight_decay: 0.0

ds_scheduler_params:
    learning_rate: 0.001              
    gamma: 0.5                          
    step_size: 50000
ds_grad_norm: 1


###########################################################
#                    INTERVAL SETTING                     #
###########################################################
only_train_diffusion: True                 # Whether to freeze fastspeech2 parameters when training diffusion
ds_train_start_steps: 160000              # Number of steps to start to train diffusion module.
train_max_steps: 320000                   # Number of training steps.
save_interval_steps: 2000                 # Interval steps to save checkpoint.
eval_interval_steps: 2000                 # Interval steps to evaluate the network.
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/opencpop/svs1/local/pinyin_to_phone.txt
================================================
a|a
ai|ai
an|an
ang|ang
ao|ao
ba|b a
bai|b ai
ban|b an
bang|b ang
bao|b ao
bei|b ei
ben|b en
beng|b eng
bi|b i
bian|b ian
biao|b iao
bie|b ie
bin|b in
bing|b ing
bo|b o
bu|b u
ca|c a
cai|c ai
can|c an
cang|c ang
cao|c ao
ce|c e
cei|c ei
cen|c en
ceng|c eng
cha|ch a
chai|ch ai
chan|ch an
chang|ch ang
chao|ch ao
che|ch e
chen|ch en
cheng|ch eng
chi|ch i
chong|ch ong
chou|ch ou
chu|ch u
chua|ch ua
chuai|ch uai
chuan|ch uan
chuang|ch uang
chui|ch ui
chun|ch un
chuo|ch uo
ci|c i
cong|c ong
cou|c ou
cu|c u
cuan|c uan
cui|c ui
cun|c un
cuo|c uo
da|d a
dai|d ai
dan|d an
dang|d ang
dao|d ao
de|d e
dei|d ei
den|d en
deng|d eng
di|d i
dia|d ia
dian|d ian
diao|d iao
die|d ie
ding|d ing
diu|d iu
dong|d ong
dou|d ou
du|d u
duan|d uan
dui|d ui
dun|d un
duo|d uo
e|e
ei|ei
en|en
eng|eng
er|er
fa|f a
fan|f an
fang|f ang
fei|f ei
fen|f en
feng|f eng
fo|f o
fou|f ou
fu|f u
ga|g a
gai|g ai
gan|g an
gang|g ang
gao|g ao
ge|g e
gei|g ei
gen|g en
geng|g eng
gong|g ong
gou|g ou
gu|g u
gua|g ua
guai|g uai
guan|g uan
guang|g uang
gui|g ui
gun|g un
guo|g uo
ha|h a
hai|h ai
han|h an
hang|h ang
hao|h ao
he|h e
hei|h ei
hen|h en
heng|h eng
hm|h m
hng|h ng
hong|h ong
hou|h ou
hu|h u
hua|h ua
huai|h uai
huan|h uan
huang|h uang
hui|h ui
hun|h un
huo|h uo
ji|j i
jia|j ia
jian|j ian
jiang|j iang
jiao|j iao
jie|j ie
jin|j in
jing|j ing
jiong|j iong
jiu|j iu
ju|j v
juan|j van
jue|j ve
jun|j vn
ka|k a
kai|k ai
kan|k an
kang|k ang
kao|k ao
ke|k e
kei|k ei
ken|k en
keng|k eng
kong|k ong
kou|k ou
ku|k u
kua|k ua
kuai|k uai
kuan|k uan
kuang|k uang
kui|k ui
kun|k un
kuo|k uo
la|l a
lai|l ai
lan|l an
lang|l ang
lao|l ao
le|l e
lei|l ei
leng|l eng
li|l i
lia|l ia
lian|l ian
liang|l iang
liao|l iao
lie|l ie
lin|l in
ling|l ing
liu|l iu
lo|l o
long|l ong
lou|l ou
lu|l u
luan|l uan
lun|l un
luo|l uo
lv|l v
lve|l ve
m|m
ma|m a
mai|m ai
man|m an
mang|m ang
mao|m ao
me|m e
mei|m ei
men|m en
meng|m eng
mi|m i
mian|m ian
miao|m iao
mie|m ie
min|m in
ming|m ing
miu|m iu
mo|m o
mou|m ou
mu|m u
n|n
na|n a
nai|n ai
nan|n an
nang|n ang
nao|n ao
ne|n e
nei|n ei
nen|n en
neng|n eng
ng|n g
ni|n i
nian|n ian
niang|n iang
niao|n iao
nie|n ie
nin|n in
ning|n ing
niu|n iu
nong|n ong
nou|n ou
nu|n u
nuan|n uan
nun|n un
nuo|n uo
nv|n v
nve|n ve
o|o
ou|ou
pa|p a
pai|p ai
pan|p an
pang|p ang
pao|p ao
pei|p ei
pen|p en
peng|p eng
pi|p i
pian|p ian
piao|p iao
pie|p ie
pin|p in
ping|p ing
po|p o
pou|p ou
pu|p u
qi|q i
qia|q ia
qian|q ian
qiang|q iang
qiao|q iao
qie|q ie
qin|q in
qing|q ing
qiong|q iong
qiu|q iu
qu|q v
quan|q van
que|q ve
qun|q vn
ran|r an
rang|r ang
rao|r ao
re|r e
ren|r en
reng|r eng
ri|r i
rong|r ong
rou|r ou
ru|r u
rua|r ua
ruan|r uan
rui|r ui
run|r un
ruo|r uo
sa|s a
sai|s ai
san|s an
sang|s ang
sao|s ao
se|s e
sen|s en
seng|s eng
sha|sh a
shai|sh ai
shan|sh an
shang|sh ang
shao|sh ao
she|sh e
shei|sh ei
shen|sh en
sheng|sh eng
shi|sh i
shou|sh ou
shu|sh u
shua|sh ua
shuai|sh uai
shuan|sh uan
shuang|sh uang
shui|sh ui
shun|sh un
shuo|sh uo
si|s i
song|s ong
sou|s ou
su|s u
suan|s uan
sui|s ui
sun|s un
suo|s uo
ta|t a
tai|t ai
tan|t an
tang|t ang
tao|t ao
te|t e
tei|t ei
teng|t eng
ti|t i
tian|t ian
tiao|t iao
tie|t ie
ting|t ing
tong|t ong
tou|t ou
tu|t u
tuan|t uan
tui|t ui
tun|t un
tuo|t uo
wa|w a
wai|w ai
wan|w an
wang|w ang
wei|w ei
wen|w en
weng|w eng
wo|w o
wu|w u
xi|x i
xia|x ia
xian|x ian
xiang|x iang
xiao|x iao
xie|x ie
xin|x in
xing|x ing
xiong|x iong
xiu|x iu
xu|x v
xuan|x van
xue|x ve
xun|x vn
ya|y a
yan|y an
yang|y ang
yao|y ao
ye|y e
yi|y i
yin|y in
ying|y ing
yo|y o
yong|y ong
you|y ou
yu|y v
yuan|y van
yue|y ve
yun|y vn
za|z a
zai|z ai
zan|z an
zang|z ang
zao|z ao
ze|z e
zei|z ei
zen|z en
zeng|z eng
zha|zh a
zhai|zh ai
zhan|zh an
zhang|zh ang
zhao|zh ao
zhe|zh e
zhei|zh ei
zhen|zh en
zheng|zh eng
zhi|zh i
zhong|zh ong
zhou|zh ou
zhu|zh u
zhua|zh ua
zhuai|zh uai
zhuan|zh uan
zhuang|zh uang
zhui|zh ui
zhun|zh un
zhuo|zh uo
zi|z i
zong|z ong
zou|z ou
zu|z u
zuan|z uan
zui|z ui
zun|z un
zuo|z uo

================================================
FILE: examples/opencpop/svs1/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=opencpop \
        --rootdir=~/datasets/Opencpop/segments \
        --dumpdir=dump \
        --label-file=~/datasets/Opencpop/segments/transcriptions.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="pitch"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="energy"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # Get feature(mel) extremum for diffusion stretch
    echo "Get feature(mel) extremum  ..."
    python3 ${BIN_DIR}/get_minmax.py \
        --metadata=dump/train/norm/metadata.jsonl \
        --speech-stretchs=dump/train/speech_stretchs.npy
fi


================================================
FILE: examples/opencpop/svs1/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=diffsinger_opencpop \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_opencpop \
        --voc_config=pwgan_opencpop_ckpt_1.4.0/default.yaml \
        --voc_ckpt=pwgan_opencpop_ckpt_1.4.0/snapshot_iter_100000.pdz \
        --voc_stat=pwgan_opencpop_ckpt_1.4.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
	--speech_stretchs=dump/train/speech_stretchs.npy
fi


================================================
FILE: examples/opencpop/svs1/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=diffsinger_opencpop \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_opencpop \
        --voc_config=pwgan_opencpop_ckpt_1.4.0/default.yaml \
        --voc_ckpt=pwgan_opencpop_ckpt_1.4.0/snapshot_iter_100000.pdz \
        --voc_stat=pwgan_opencpop_ckpt_1.4.0/feats_stats.npy \
        --lang=sing \
        --text=${BIN_DIR}/../../assets/sentences_sing.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speech_stretchs=dump/train/speech_stretchs.npy \
        --pinyin_phone=local/pinyin_to_phone.txt
fi

# for more GAN Vocoders
# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "in hifigan syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=diffsinger_opencpop \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_opencpop \
        --voc_config=hifigan_opencpop_ckpt_1.4.0/default.yaml \
        --voc_ckpt=hifigan_opencpop_ckpt_1.4.0/snapshot_iter_625000.pdz \
        --voc_stat=hifigan_opencpop_ckpt_1.4.0/feats_stats.npy \
        --lang=sing \
        --text=${BIN_DIR}/../../assets/sentences_sing.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speech_stretchs=dump/train/speech_stretchs.npy \
        --pinyin_phone=local/pinyin_to_phone.txt
        
fi


================================================
FILE: examples/opencpop/svs1/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=1 \
    --phones-dict=dump/phone_id_map.txt \
    --speech-stretchs=dump/train/speech_stretchs.npy


================================================
FILE: examples/opencpop/svs1/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=diffsinger
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}


================================================
FILE: examples/opencpop/svs1/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_320000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default, use 1 will use hifigan as vocoder
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi


================================================
FILE: examples/opencpop/voc1/README.md
================================================
# Parallel WaveGAN with Opencpop
This example contains code used to train a [parallel wavegan](http://arxiv.org/abs/1910.11480) model with [Mandarin singing corpus](https://wenet.org.cn/opencpop/).

## Dataset
### Download and Extract
Download Opencpop from it's [Official Website](https://wenet.org.cn/opencpop/download/) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/Opencpop`.

## Get Started
Assume the path to the dataset is `~/datasets/Opencpop`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--batch-size BATCH_SIZE] [--max-iter MAX_ITER]
                [--run-benchmark RUN_BENCHMARK]
                [--profiler_options PROFILER_OPTIONS]

Train a ParallelWaveGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       ParallelWaveGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.

benchmark:
  arguments related to benchmark.

  --batch-size BATCH_SIZE
                        batch size.
  --max-iter MAX_ITER   train max steps.
  --run-benchmark RUN_BENCHMARK
                        runing benchmark or not, if True, use the --batch-size
                        and --max-iter.
  --profiler_options PROFILER_OPTIONS
                        The option of profiler, which should be in format
                        "key1=value1;key2=value2;key3=value3".
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` parallel wavegan config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Models
The pretrained model can be downloaded here:
- [pwgan_opencpop_ckpt_1.4.0](https://paddlespeech.cdn.bcebos.com/t2s/svs/opencpop/pwgan_opencpop_ckpt_1.4.0.zip)


Parallel WaveGAN checkpoint contains files listed below.

```text
pwgan_opencpop_ckpt_1.4.0
├── default.yaml                    # default config used to train parallel wavegan
├── snapshot_iter_100000.pdz        # generator parameters of parallel wavegan
└── feats_stats.npy                 # statistics used to normalize spectrogram when training parallel wavegan
```
## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/opencpop/voc1/conf/default.yaml
================================================
# This is the hyperparameter configuration file for Parallel WaveGAN.
# Please make sure this is adjusted for the CSMSC dataset. If you want to
# apply to the other dataset, you might need to carefully change some parameters.
# This configuration requires 12 GB GPU memory and takes ~3 days on RTX TITAN.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 512              # FFT size (samples).
n_shift: 128             # Hop size (samples). 12.5ms
win_length: 512         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 30                 # Minimum freq in mel basis calculation. (Hz)
fmax: 12000               # Maximum frequency in mel basis calculation. (Hz)


###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 1        # Number of input channels.
    out_channels: 1       # Number of output channels.
    kernel_size: 3        # Kernel size of dilated convolution.
    layers: 30            # Number of residual block layers.
    stacks: 3             # Number of stacks i.e., dilation cycles.
    residual_channels: 64 # Number of channels in residual conv.
    gate_channels: 128    # Number of channels in gated conv.
    skip_channels: 64     # Number of channels in skip conv.
    aux_channels: 80      # Number of channels for auxiliary feature conv.
                          # Must be the same as num_mels.
    aux_context_window: 2 # Context window size for auxiliary feature.
                          # If set to 2, previous 2 and future 2 frames will be considered.
    dropout: 0.0          # Dropout rate. 0.0 means no dropout applied.
    bias: True            # use bias in residual blocks
    use_weight_norm: True # Whether to use weight norm.
                          # If set to true, it will be applied to all of the conv layers.
    use_causal_conv: False               # use causal conv in residual blocks and upsample layers
    upsample_scales: [8, 4, 2, 2]     # Upsampling scales. Prodcut of these must be the same as hop size.
    interpolate_mode: "nearest" # upsample net interpolate mode
    freq_axis_kernel_size: 1 # upsamling net: convolution kernel size in frequencey axis
    nonlinear_activation: null
    nonlinear_activation_params: {}

###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    in_channels: 1        # Number of input channels.
    out_channels: 1       # Number of output channels.
    kernel_size: 3        # Number of output channels.
    layers: 10            # Number of conv layers.
    conv_channels: 64     # Number of chnn layers.
    bias: True            # Whether to use bias parameter in conv.
    use_weight_norm: True # Whether to use weight norm.
                          # If set to true, it will be applied to all of the conv layers.
    nonlinear_activation: "leakyrelu" # Nonlinear function after each conv.
    nonlinear_activation_params:      # Nonlinear function parameters
        negative_slope: 0.2           # Alpha in leakyrelu.

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
stft_loss_params:
    fft_sizes: [1024, 2048, 512]  # List of FFT size for STFT-based loss.
    hop_sizes: [120, 240, 50]     # List of hop size for STFT-based loss
    win_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
    window: "hann"         # Window function for STFT-based loss

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_adv: 4.0  # Loss balancing coefficient.

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 8              # Batch size.
batch_max_steps: 25500     # Length of each audio in batch. Make sure dividable by n_shift.
num_workers: 1             # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    epsilon: 1.0e-6        # Generator's epsilon.
    weight_decay: 0.0      # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 0.0001  # Generator's learning rate.
    step_size: 200000      # Generator's scheduler step size.
    gamma: 0.5             # Generator's scheduler gamma.
                           # At each step size, lr will be multiplied by this parameter.
generator_grad_norm: 10    # Generator's gradient norm.
discriminator_optimizer_params:
    epsilon: 1.0e-6            # Discriminator's epsilon.
    weight_decay: 0.0          # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 0.00005     # Discriminator's learning rate.
    step_size: 200000          # Discriminator's scheduler step size.
    gamma: 0.5                 # Discriminator's scheduler gamma.
                               # At each step size, lr will be multiplied by this parameter.
discriminator_grad_norm: 1     # Discriminator's gradient norm.

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
discriminator_train_start_steps: 100000 # Number of steps to start to train discriminator.
train_max_steps: 400000                 # Number of training steps.
save_interval_steps: 5000               # Interval steps to save checkpoint.
eval_interval_steps: 1000               # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_save_intermediate_results: 4  # Number of results to be saved as intermediate results.
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/opencpop/voc1/local/dygraph_to_static.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../../dygraph_to_static.py \
    --type=voc \
    --voc=pwgan_opencpop \
    --voc_config=${config_path} \
    --voc_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --voc_stat=dump/train/feats_stats.npy \
    --inference_dir=exp/default/inference/


================================================
FILE: examples/opencpop/voc1/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1


if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/../preprocess.py \
        --rootdir=~/datasets/Opencpop/segments/ \
        --dataset=opencpop \
        --dumpdir=dump \
        --dur-file=~/datasets/Opencpop/segments/transcriptions.txt \
        --config=${config_path} \
        --cut-sil=False \
        --num-cpu=20
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize, dev and test should use train's stats
    echo "Normalize ..."
   
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --stats=dump/train/feats_stats.npy
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --stats=dump/train/feats_stats.npy
    
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --stats=dump/train/feats_stats.npy
fi


================================================
FILE: examples/opencpop/voc1/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_100000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

# dygraph to static
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/dygraph_to_static.sh  ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

# PTQ_static
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/PTQ_static.sh  ${train_output_path} pwgan_opencpop || exit -1
fi


================================================
FILE: examples/opencpop/voc5/conf/default.yaml
================================================
# This is the configuration file for CSMSC dataset.
# This configuration is based on HiFiGAN V1, which is an official configuration. 
# But I found that the optimizer setting does not work well with my implementation.
# So I changed optimizer settings as follows:
# - AdamW -> Adam
# - betas: [0.8, 0.99] -> betas: [0.5, 0.9]
# - Scheduler: ExponentialLR -> MultiStepLR
# To match the shift size difference, the upsample scales is also modified from the original 256 shift setting.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 512              # FFT size (samples).
n_shift: 128             # Hop size (samples). 12.5ms
win_length: 512         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 12000               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 80                       # Number of input channels.
    out_channels: 1                       # Number of output channels.
    channels: 512                         # Number of initial channels.
    kernel_size: 7                        # Kernel size of initial and final conv layers.
    upsample_scales: [8, 4, 2, 2]         # Upsampling scales.
    upsample_kernel_sizes: [16, 8, 4, 4] # Kernel size for upsampling layers.
    resblock_kernel_sizes: [3, 7, 11]     # Kernel size for residual blocks.
    resblock_dilations:                   # Dilations for residual blocks.
        - [1, 3, 5]
        - [1, 3, 5]
        - [1, 3, 5]
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.


###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    scales: 3                              # Number of multi-scale discriminator.
    scale_downsample_pooling: "AvgPool1D"  # Pooling operation for scale discriminator.
    scale_downsample_pooling_params:
        kernel_size: 4                     # Pooling kernel size.
        stride: 2                          # Pooling stride.
        padding: 2                         # Padding size.
    scale_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [15, 41, 5, 3]       # List of kernel sizes.
        channels: 128                      # Initial number of channels.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        max_groups: 16                     # Maximum number of groups in downsampling conv layers.
        bias: True
        downsample_scales: [4, 4, 4, 4, 1] # Downsampling scales.
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:
            negative_slope: 0.1
    follow_official_norm: True             # Whether to follow the official norm setting.
    periods: [2, 3, 5, 7, 11]              # List of period for multi-period discriminator.
    period_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [5, 3]               # List of kernel sizes.
        channels: 32                       # Initial number of channels.
        downsample_scales: [3, 3, 3, 3, 1] # Downsampling scales.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: False                 # Whether to use multi-resolution STFT loss.
use_mel_loss: True                   # Whether to use Mel-spectrogram loss.
mel_loss_params:
    fs: 24000
    fft_size: 512
    hop_size: 128
    win_length: 512
    window: "hann"
    num_mels: 80
    fmin: 30
    fmax: 12000
    log_base: null
generator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
discriminator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
use_feat_match_loss: True
feat_match_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
    average_by_layers: False         # Whether to average loss by #layers in each discriminator.
    include_final_outputs: False     # Whether to include final outputs in feat match loss calculation.

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_aux: 45.0       # Loss balancing coefficient for STFT loss.
lambda_adv: 1.0        # Loss balancing coefficient for adversarial loss.
lambda_feat_match: 2.0 # Loss balancing coefficient for feat match loss..

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 16              # Batch size.
batch_max_steps: 8400       # Length of each audio in batch. Make sure dividable by hop_size.
num_workers: 1              # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 2.0e-4               # Generator's learning rate.
    gamma: 0.5                          # Generator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000
generator_grad_norm: -1                 # Generator's gradient norm.
discriminator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 2.0e-4               # Discriminator's learning rate.
    gamma: 0.5                          # Discriminator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000    
discriminator_grad_norm: -1             # Discriminator's gradient norm.            

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
generator_train_start_steps: 1     # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000           # Number of training steps.
save_interval_steps: 5000         # Interval steps to save checkpoint.
eval_interval_steps: 1000          # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 4                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/opencpop/voc5/conf/finetune.yaml
================================================
# This is the configuration file for CSMSC dataset.
# This configuration is based on HiFiGAN V1, which is an official configuration. 
# But I found that the optimizer setting does not work well with my implementation.
# So I changed optimizer settings as follows:
# - AdamW -> Adam
# - betas: [0.8, 0.99] -> betas: [0.5, 0.9]
# - Scheduler: ExponentialLR -> MultiStepLR
# To match the shift size difference, the upsample scales is also modified from the original 256 shift setting.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 512              # FFT size (samples).
n_shift: 128             # Hop size (samples). 12.5ms
win_length: 512         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 12000               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 80                       # Number of input channels.
    out_channels: 1                       # Number of output channels.
    channels: 512                         # Number of initial channels.
    kernel_size: 7                        # Kernel size of initial and final conv layers.
    upsample_scales: [8, 4, 2, 2]         # Upsampling scales.
    upsample_kernel_sizes: [16, 8, 4, 4] # Kernel size for upsampling layers.
    resblock_kernel_sizes: [3, 7, 11]     # Kernel size for residual blocks.
    resblock_dilations:                   # Dilations for residual blocks.
        - [1, 3, 5]
        - [1, 3, 5]
        - [1, 3, 5]
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.


###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    scales: 3                              # Number of multi-scale discriminator.
    scale_downsample_pooling: "AvgPool1D"  # Pooling operation for scale discriminator.
    scale_downsample_pooling_params:
        kernel_size: 4                     # Pooling kernel size.
        stride: 2                          # Pooling stride.
        padding: 2                         # Padding size.
    scale_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [15, 41, 5, 3]       # List of kernel sizes.
        channels: 128                      # Initial number of channels.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        max_groups: 16                     # Maximum number of groups in downsampling conv layers.
        bias: True
        downsample_scales: [4, 4, 4, 4, 1] # Downsampling scales.
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:
            negative_slope: 0.1
    follow_official_norm: True             # Whether to follow the official norm setting.
    periods: [2, 3, 5, 7, 11]              # List of period for multi-period discriminator.
    period_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [5, 3]               # List of kernel sizes.
        channels: 32                       # Initial number of channels.
        downsample_scales: [3, 3, 3, 3, 1] # Downsampling scales.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: False                 # Whether to use multi-resolution STFT loss.
use_mel_loss: True                   # Whether to use Mel-spectrogram loss.
mel_loss_params:
    fs: 24000
    fft_size: 512
    hop_size: 128
    win_length: 512
    window: "hann"
    num_mels: 80
    fmin: 30
    fmax: 12000
    log_base: null
generator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
discriminator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
use_feat_match_loss: True
feat_match_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
    average_by_layers: False         # Whether to average loss by #layers in each discriminator.
    include_final_outputs: False     # Whether to include final outputs in feat match loss calculation.

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_aux: 45.0       # Loss balancing coefficient for STFT loss.
lambda_adv: 1.0        # Loss balancing coefficient for adversarial loss.
lambda_feat_match: 2.0 # Loss balancing coefficient for feat match loss..

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
#batch_size: 16              # Batch size.
batch_size: 1              # Batch size.
batch_max_steps: 8400       # Length of each audio in batch. Make sure dividable by hop_size.
num_workers: 1              # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 2.0e-4               # Generator's learning rate.
    gamma: 0.5                          # Generator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000
generator_grad_norm: -1                 # Generator's gradient norm.
discriminator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 2.0e-4               # Discriminator's learning rate.
    gamma: 0.5                          # Discriminator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000    
discriminator_grad_norm: -1             # Discriminator's gradient norm.            

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
generator_train_start_steps: 1     # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2600000           # Number of training steps.
save_interval_steps: 5000         # Interval steps to save checkpoint.
eval_interval_steps: 1000          # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 4                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/opencpop/voc5/finetune.sh
================================================
#!/bin/bash

source path.sh

gpus=0
stage=0
stop_stage=100

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${MAIN_ROOT}/paddlespeech/t2s/exps/diffsinger/gen_gta_mel.py \
        --diffsinger-config=diffsinger_opencpop_ckpt_1.4.0/default.yaml \
        --diffsinger-checkpoint=diffsinger_opencpop_ckpt_1.4.0/snapshot_iter_160000.pdz \
        --diffsinger-stat=diffsinger_opencpop_ckpt_1.4.0/speech_stats.npy \
        --diffsinger-stretch=diffsinger_opencpop_ckpt_1.4.0/speech_stretchs.npy \
        --dur-file=~/datasets/Opencpop/segments/transcriptions.txt \
        --output-dir=dump_finetune \
        --phones-dict=diffsinger_opencpop_ckpt_1.4.0/phone_id_map.txt \
        --dataset=opencpop \
        --rootdir=~/datasets/Opencpop/segments/
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${MAIN_ROOT}/utils/link_wav.py \
        --old-dump-dir=dump \
        --dump-dir=dump_finetune
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    cp dump/train/feats_stats.npy dump_finetune/train/
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize, dev and test should use train's stats
    echo "Normalize ..."
   
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump_finetune/train/raw/metadata.jsonl \
        --dumpdir=dump_finetune/train/norm \
        --stats=dump_finetune/train/feats_stats.npy
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump_finetune/dev/raw/metadata.jsonl \
        --dumpdir=dump_finetune/dev/norm \
        --stats=dump_finetune/train/feats_stats.npy
    
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump_finetune/test/raw/metadata.jsonl \
        --dumpdir=dump_finetune/test/norm \
        --stats=dump_finetune/train/feats_stats.npy
fi

# create finetune env
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "create finetune env"
    python3 local/prepare_env.py \
        --pretrained_model_dir=exp/default/checkpoints/ \
        --output_dir=exp/finetune/
fi 

# finetune
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} \
    FLAGS_cudnn_exhaustive_search=true \
    FLAGS_conv_workspace_size_limit=4000 \
    python ${BIN_DIR}/train.py \
        --train-metadata=dump_finetune/train/norm/metadata.jsonl \
        --dev-metadata=dump_finetune/dev/norm/metadata.jsonl \
        --config=conf/finetune.yaml \
        --output-dir=exp/finetune \
        --ngpu=1
fi 


================================================
FILE: examples/opencpop/voc5/local/dygraph_to_static.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../../dygraph_to_static.py \
    --type=voc \
    --voc=hifigan_opencpop \
    --voc_config=${config_path} \
    --voc_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
    --voc_stat=dump/train/feats_stats.npy \
    --inference_dir=exp/default/inference/


================================================
FILE: examples/opencpop/voc5/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_2500000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

# dygraph to static
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/dygraph_to_static.sh  ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

# PTQ_static
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/PTQ_static.sh  ${train_output_path} hifigan_opencpop || exit -1
fi


================================================
FILE: examples/other/augmentation/augmentation.json
================================================
[
  {
    "type": "noise",
    "params": {
      "min_snr_dB": 40,
      "max_snr_dB": 50,
      "noise_manifest_path": "datasets/manifest.noise"
    },
    "prob": 0.6
  },
  {
    "type": "impulse",
    "params": {
      "impulse_manifest_path": "datasets/manifest.impulse"
    },
    "prob": 0.5
  },
  {
    "type": "speed",
    "params": {
      "min_speed_rate": 0.95,
      "max_speed_rate": 1.05,
      "num_rates": 3
    },
    "prob": 0.5
  },
  {
    "type": "shift",
    "params": {
      "min_shift_ms": -5,
      "max_shift_ms": 5
    },
    "prob": 1.0
  },
  {
    "type": "volume",
    "params": {
      "min_gain_dBFS": -10,
      "max_gain_dBFS": 10
    },
    "prob": 0.0
  },
  {
    "type": "bayesian_normal",
    "params": {
      "target_db": -20,
      "prior_db": -20,
      "prior_samples": 100
    },
    "prob": 0.0
  },
  {
    "type": "specaug",
    "params": {
      "W": 80,
      "warp_mode": "PIL",
      "F": 10,
      "n_freq_masks": 2,
      "T": 50,
      "n_time_masks": 2,
      "p": 1.0,
      "adaptive_number_ratio": 0,
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20,
      "replace_with_zero": false
    },
    "prob": 1.0
  }
]


================================================
FILE: examples/other/cc-cedict/.gitignore
================================================
data
exp


================================================
FILE: examples/other/cc-cedict/README.md
================================================
# [CC-CEDICT](https://cc-cedict.org/wiki/)

What is CC-CEDICT?
CC-CEDICT is a continuation of the CEDICT project.
The objective of the CEDICT project was to create an online, downloadable (as opposed to searchable-only) public-domain Chinese-English dictionary.
CEDICT was started by Paul Andrew Denisowski in October 1997.
For the most part, the project is modeled on Jim Breen's highly successful EDICT (Japanese-English dictionary) project and is intended to be a collaborative effort,
with users providing entries and corrections to the main file.


## Parse CC-CEDICT to Json format

1. Parse to Json

```
run.sh
```

2. Result

```
exp/
|-- cedict
`-- cedict.json

0 directories, 2 files
```

```
4c4bffc84e24467fe1b2ea9ba37ed6b6  exp/cedict
3adf504dacd13886f88cc9fe3b37c75d  exp/cedict.json
```

```
==> exp/cedict <==
# CC-CEDICT
# Community maintained free Chinese-English dictionary.
#
# Published by MDBG
#
# License:
# Creative Commons Attribution-ShareAlike 4.0 International License
# https://creativecommons.org/licenses/by-sa/4.0/
#
# Referenced works:

==> exp/cedict.json <==
{"traditional": "2019\u51a0\u72c0\u75c5\u6bd2\u75c5", "simplified": "2019\u51a0\u72b6\u75c5\u6bd2\u75c5", "pinyin": "er4 ling2 yi1 jiu3 guan1 zhuang4 bing4 du2 bing4", "english": "COVID-19, the coronavirus disease identified in 2019"}
{"traditional": "21\u4e09\u9ad4\u7d9c\u5408\u75c7", "simplified": "21\u4e09\u4f53\u7efc\u5408\u75c7", "pinyin": "er4 shi2 yi1 san1 ti3 zong1 he2 zheng4", "english": "trisomy"}
{"traditional": "3C", "simplified": "3C", "pinyin": "san1 C", "english": "abbr. for computers, communications, and consumer electronics"}
{"traditional": "3P", "simplified": "3P", "pinyin": "san1 P", "english": "(slang) threesome"}
{"traditional": "3Q", "simplified": "3Q", "pinyin": "san1 Q", "english": "(Internet slang) thank you (loanword)"}
{"traditional": "421", "simplified": "421", "pinyin": "si4 er4 yi1", "english": "four grandparents, two parents and an only child"}
{"traditional": "502\u81a0", "simplified": "502\u80f6", "pinyin": "wu3 ling2 er4 jiao1", "english": "cyanoacrylate glue"}
{"traditional": "88", "simplified": "88", "pinyin": "ba1 ba1", "english": "(Internet slang) bye-bye (alternative for \u62dc\u62dc[bai2 bai2])"}
{"traditional": "996", "simplified": "996", "pinyin": "jiu3 jiu3 liu4", "english": "9am-9pm, six days a week (work schedule)"}
{"traditional": "A", "simplified": "A", "pinyin": "A", "english": "(slang) (Tw) to steal"}
```


================================================
FILE: examples/other/cc-cedict/local/parser.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# https://github.com/rubber-duck-dragon/rubber-duck-dragon.github.io/blob/master/cc-cedict_parser/parser.py
#A parser for the CC-Cedict. Convert the Chinese-English dictionary into a list of python dictionaries with "traditional","simplified", "pinyin", and "english" keys.
#Make sure that the cedict_ts.u8 file is in the same folder as this file, and that the name matches the file name on line 13.
#Before starting, open the CEDICT text file and delete the copyright information at the top. Otherwise the program will try to parse it and you will get an error message.
#Characters that are commonly used as surnames have two entries in CC-CEDICT. This program will remove the surname entry if there is another entry for the character. If you want to include the surnames, simply delete lines 59 and 60.
#This code was written by Franki Allegra in February 2020.
import json
import sys

# usage: bin ccedict dump.json

with open(sys.argv[1], 'rt') as file:
    text = file.read()
    lines = text.split('\n')
    dict_lines = list(lines)

    def parse_line(line):
        parsed = {}
        if line == '':
            dict_lines.remove(line)
            return 0
        if line.startswith('#'):
            return 0
        if line.startswith('%'):
            return 0
        line = line.rstrip('/')
        line = line.split('/')
        if len(line) <= 1:
            return 0
        english = line[1]
        char_and_pinyin = line[0].split('[')
        characters = char_and_pinyin[0]
        characters = characters.split()
        traditional = characters[0]
        simplified = characters[1]
        pinyin = char_and_pinyin[1]
        pinyin = pinyin.rstrip()
        pinyin = pinyin.rstrip("]")
        parsed['traditional'] = traditional
        parsed['simplified'] = simplified
        parsed['pinyin'] = pinyin
        parsed['english'] = english
        list_of_dicts.append(parsed)

    def remove_surnames():
        for x in range(len(list_of_dicts) - 1, -1, -1):
            if "surname " in list_of_dicts[x]['english']:
                if list_of_dicts[x]['traditional'] == list_of_dicts[x + 1][
                        'traditional']:
                    list_of_dicts.pop(x)

    def main():

        #make each line into a dictionary
        print("Parsing dictionary . . .")
        for line in dict_lines:
            parse_line(line)

        #remove entries for surnames from the data (optional):
        print("Removing Surnames . . .")
        remove_surnames()

        print("Saving to database (this may take a few minutes) . . .")
        with open(sys.argv[2], 'wt') as fout:
            for one_dict in list_of_dicts:
                json_str = json.dumps(one_dict)
                fout.write(json_str + "\n")
        print('Done!')


list_of_dicts = []
parsed_dict = main()


================================================
FILE: examples/other/cc-cedict/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=/usr/local/lib/:${LD_LIBRARY_PATH}


================================================
FILE: examples/other/cc-cedict/run.sh
================================================
#!/bin/bash

# CC-CEDICT download: https://www.mdbg.net/chinese/dictionary?page=cc-cedict
# The word dictionary of this website is based on CC-CEDICT.
# CC-CEDICT is a continuation of the CEDICT project started by Paul Denisowski in 1997 with the
# aim to provide a complete downloadable Chinese to English dictionary with pronunciation in pinyin for the Chinese characters.
# This website allows you to easily add new entries or correct existing entries in CC-CEDICT.
# Submitted entries will be checked and processed frequently and released for download in CEDICT format on this page.

set -e
source path.sh

stage=-1
stop_stage=100


source ${MAIN_ROOT}/utils/parse_options.sh || exit -1


cedict_url=https://www.mdbg.net/chinese/export/cedict/cedict_1_0_ts_utf-8_mdbg.zip
cedict=cedict_1_0_ts_utf-8_mdbg.zip

mkdir -p data

if [ $stage -le -1 ] && [ $stop_stage -ge -1 ];then
    test -f data/${cedict} || wget -O data/${cedict} ${cedict_url}
    pushd data
    unzip ${cedict}
    popd

fi

mkdir -p exp

if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then
    cp data/cedict_ts.u8 exp/cedict
    python3 local/parser.py exp/cedict exp/cedict.json
fi


================================================
FILE: examples/other/g2p/README.md
================================================
# G2P
For g2p, we use BZNSYP's phone label as the ground truth and we delete silence tokens in labels and predicted phones.

You should Download BZNSYP from its [Official Website](https://test.data-baker.com/data/index/source) and extract it. Assume the path to the dataset is `~/datasets/BZNSYP`.

We use `WER` as an evaluation criterion.

# Start
Run the command below to get the results of the test.

```bash
cd ../../../tools
bash extras/install_sclite.sh
cd -
./run.sh
```

The `avg WER` of g2p is: 0.024075726733983775

```text
     ,--------------------------------------------------------------------.
     |                         ./exp/g2p/text.g2p                         |
     |--------------------------------------------------------------------|
     | SPKR   | # Snt    # Wrd  | Corr    Sub    Del    Ins    Err  S.Err |
     | Sum/Avg|  9996   299181  | 97.6    2.4    0.0    0.0    2.4   49.0 |
     `--------------------------------------------------------------------'
```


================================================
FILE: examples/other/g2p/compare_badcase.py
================================================
# -*- encoding:utf-8 -*-
import re
import sys
'''
@arthur: david_95

Assum you executed g2p test twice, the WER rate have some gap, you would like to see what sentences error cause your rate up.
so you may get test result ( exp/g2p )into two directories, as exp/prefolder and exp/curfolder
run this program as  "python compare_badcase.py prefolder curfolder"
then you will get diffrences between two run, uuid, phonetics, chinese samples

examples: python compare_badcase.py  exp/g2p_laotouzi  exp/g2p
in this example:  exp/g2p_laotouzi  and  exp/g2p  are two folders with two g2p tests result

'''


def compare(prefolder, curfolder):
    '''
    compare file of text.g2p.pra in two folders
    result P1 will be prefolder ; P2 will be curfolder, just about the sequence you input in argvs
    '''

    linecnt = 0
    pre_block = []
    cur_block = []
    zh_lines = []
    with open(prefolder + "/text.g2p.pra", "r") as pre_file, open(
            curfolder + "/text.g2p.pra", "r") as cur_file:
        for pre_line, cur_line in zip(pre_file, cur_file):
            linecnt += 1

            if linecnt < 11:  #skip non-data head in files
                continue
            else:
                pre_block.append(pre_line.strip())
                cur_block.append(cur_line.strip())
                if pre_line.strip().startswith(
                        "Eval:") and pre_line.strip() != cur_line.strip():
                    uuid = pre_block[-5].replace("id: (baker_", "").replace(")",
                                                                            "")
                    with open("data/g2p/text", 'r') as txt:
                        conlines = txt.readlines()

                        for line in conlines:
                            if line.strip().startswith(uuid.strip()):
                                print(line)
                                zh_lines.append(re.sub(r"#[1234]", "", line))
                                break

                    print("*" + cur_block[-3])  # ref
                    print("P1 " + pre_block[-2])
                    print("P2 " + cur_block[-2])
                    print("P1 " + pre_block[-1])
                    print("P2 " + cur_block[-1] + "\n\n")
                    pre_block = []
                    cur_block = []

    print("\n")
    print(str.join("\n", zh_lines))


if __name__ == '__main__':
    assert len(
        sys.argv) == 3, "Usage: python compare_badcase.py %prefolder %curfolder"
    compare(sys.argv[1], sys.argv[2])


================================================
FILE: examples/other/g2p/get_g2p_data.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from collections import defaultdict
from pathlib import Path

from praatio import textgrid


def get_baker_data(root_dir):
    alignment_files = sorted(
        list((root_dir / "PhoneLabeling").rglob("*.interval")))
    text_file = root_dir / "ProsodyLabeling/000001-010000.txt"
    text_file = Path(text_file).expanduser()
    # filter out several files that have errors in annotation
    exclude = {'000611', '000662', '002365', '005107'}
    alignment_files = [f for f in alignment_files if f.stem not in exclude]
    data_dict = defaultdict(dict)
    for alignment_fp in alignment_files:
        alignment = textgrid.openTextgrid(
            alignment_fp, includeEmptyIntervals=True)
        # only with baker's annotation
        utt_id = alignment.tierNameList[0].split(".")[0]
        intervals = alignment.getTier(alignment.tierNameList[0]).entries
        phones = []
        for interval in intervals:
            label = interval.label
            phones.append(label)
        data_dict[utt_id]["phones"] = phones
    for line in open(text_file, "r"):
        if line.startswith("0"):
            utt_id, raw_text = line.strip().split()
            if utt_id in data_dict:
                data_dict[utt_id]['text'] = raw_text
        else:
            pinyin = line.strip().split()
            if utt_id in data_dict:
                data_dict[utt_id]['pinyin'] = pinyin
    return data_dict


def get_g2p_phones(data_dict, frontend):
    for utt_id in data_dict:
        g2p_phones = frontend.get_phonemes(data_dict[utt_id]['text'])
        data_dict[utt_id]["g2p_phones"] = g2p_phones
    return data_dict


def main():
    parser = argparse.ArgumentParser(description="g2p example.")
    parser.add_argument(
        "--root-dir",
        default=None,
        type=str,
        help="directory to baker dataset.")
    parser.add_argument(
        "--output-dir",
        default="data/g2p",
        type=str,
        help="directory to output.")

    args = parser.parse_args()
    root_dir = Path(args.root_dir).expanduser()
    output_dir = Path(args.output_dir).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    assert root_dir.is_dir()
    data_dict = get_baker_data(root_dir)
    raw_path = output_dir / "text"
    ref_path = output_dir / "text.ref"
    wf_raw = open(raw_path, "w")
    wf_ref = open(ref_path, "w")
    for utt_id in data_dict:
        wf_raw.write(utt_id + " " + data_dict[utt_id]['text'] + "\n")
        wf_ref.write(utt_id + " " + " ".join(data_dict[utt_id]['phones']) +
                     "\n")


if __name__ == "__main__":
    main()


================================================
FILE: examples/other/g2p/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}


================================================
FILE: examples/other/g2p/run.sh
================================================
#!/bin/bash

source path.sh
USE_SCLITE=true

# test g2p
if [ ! -d ~/datasets/BZNSYP ];then
    echo "Please download BZNSYP dataset"
    exit
fi
echo "Start get g2p test data ..."
python3 get_g2p_data.py --root-dir=~/datasets/BZNSYP --output-dir=data/g2p
echo "Start test g2p ..."
python3 test_g2p.py --input-dir=data/g2p --output-dir=exp/g2p

# whether use sclite to get more detail information of WER
if [ "$USE_SCLITE" = true ];then
    echo "Start sclite g2p ..."
    ${MAIN_ROOT}/tools/extras/sctk/bin/sclite -i wsj -r ./exp/g2p/text.ref.clean trn -h ./exp/g2p/text.g2p trn -e utf-8 -o all
fi


================================================
FILE: examples/other/g2p/test_g2p.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import re
from pathlib import Path

from paddlespeech.t2s.frontend.zh_frontend import Frontend as zhFrontend
from paddlespeech.t2s.utils.error_rate import word_errors

SILENCE_TOKENS = {"sp", "sil", "sp1", "spl"}


def text_cleaner(raw_text):
    text = re.sub('#[1-4]|“|”|（|）', '', raw_text)
    text = text.replace("…。", "。")
    text = re.sub('：|；|——|……|、|…|—', '，', text)
    return text


def get_avg_wer(raw_dict, ref_dict, frontend, output_dir):
    edit_distances = []
    ref_lens = []
    wf_g2p = open(output_dir / "text.g2p", "w")
    wf_ref = open(output_dir / "text.ref.clean", "w")
    for utt_id in raw_dict:
        if utt_id not in ref_dict:
            continue
        raw_text = raw_dict[utt_id]
        text = text_cleaner(raw_text)
        g2p_phones = frontend.get_phonemes(text)
        g2p_phones = sum(g2p_phones, [])
        gt_phones = ref_dict[utt_id].split(" ")
        # delete silence tokens in predicted phones and ground truth phones
        g2p_phones = [phn for phn in g2p_phones if phn not in SILENCE_TOKENS]
        gt_phones = [phn for phn in gt_phones if phn not in SILENCE_TOKENS]
        gt_phones = " ".join(gt_phones)
        g2p_phones = " ".join(g2p_phones)
        wf_ref.write(gt_phones + "(baker_" + utt_id + ")" + "\n")
        wf_g2p.write(g2p_phones + "(baker_" + utt_id + ")" + "\n")
        edit_distance, ref_len = word_errors(gt_phones, g2p_phones)
        edit_distances.append(edit_distance)
        ref_lens.append(ref_len)

    return sum(edit_distances) / sum(ref_lens)


def main():
    parser = argparse.ArgumentParser(description="g2p example.")
    parser.add_argument(
        "--input-dir",
        default="data/g2p",
        type=str,
        help="directory to preprocessed test data.")
    parser.add_argument(
        "--output-dir",
        default="exp/g2p",
        type=str,
        help="directory to save g2p results.")

    args = parser.parse_args()
    input_dir = Path(args.input_dir).expanduser()
    output_dir = Path(args.output_dir).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    assert input_dir.is_dir()
    raw_dict, ref_dict = dict(), dict()
    raw_path = input_dir / "text"
    ref_path = input_dir / "text.ref"

    with open(raw_path, "r") as rf:
        for line in rf:
            line = line.strip()
            line_list = line.split(" ")
            utt_id, raw_text = line_list[0], " ".join(line_list[1:])
            raw_dict[utt_id] = raw_text
    with open(ref_path, "r") as rf:
        for line in rf:
            line = line.strip()
            line_list = line.split(" ")
            utt_id, phones = line_list[0], " ".join(line_list[1:])
            ref_dict[utt_id] = phones
    frontend = zhFrontend()
    avg_wer = get_avg_wer(raw_dict, ref_dict, frontend, output_dir)
    print("The avg WER of g2p is:", avg_wer)


if __name__ == "__main__":
    main()


================================================
FILE: examples/other/ge2e/README.md
================================================
# Speaker Encoder
This experiment trains a speaker encoder with speaker verification as to its task. It is done as a part of the experiment of transfer learning from speaker verification to multispeaker text-to-speech synthesis, which can be found at [examples/aishell3/vc0](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc0). The trained speaker encoder is used to extract utterance embeddings from utterances.
## Model
The model used in this experiment is the speaker encoder with text-independent speaker verification task in [GENERALIZED END-TO-END LOSS FOR SPEAKER VERIFICATION](https://arxiv.org/pdf/1710.10467.pdf). GE2E-softmax loss is used.

## Download Datasets
Currently supported datasets are  Librispeech-other-500, VoxCeleb, VoxCeleb2,ai-datatang-200zh, magicdata, which can be downloaded from the corresponding webpage.

1. Librispeech/train-other-500
   An English multispeaker dataset，[URL](https://www.openslr.org/resources/12/train-other-500.tar.gz)，only the `train-other-500` subset is used.
2. VoxCeleb1
   An English multispeaker dataset，[URL](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1.html), Audio Files from Dev A to Dev D should be downloaded, combined, and extracted.
3. VoxCeleb2
   An English multispeaker dataset，[URL](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1.html), Audio Files from Dev A to Dev H should be downloaded, combined, and extracted.
4. Aidatatang-200zh
   A Mandarin Chinese multispeaker dataset ，[URL](https://www.openslr.org/62/).
5. magicdata
   A Mandarin Chinese multispeaker dataset ，[URL](https://www.openslr.org/68/).

If you want to use other datasets, you can also download and preprocess them as long as they meet the requirements described below.

## Get Started

```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
`./local/preprocess.sh` calls `${BIN_DIR}/preprocess.py`.
```bash
./local/preprocess.sh ${datasets_root} ${preprocess_path} ${dataset_names}
```
Assume datasets_root is `~/datasets/GE2E`, and it has the following structure（We only use `train-other-500` for simplicity）:
```Text
GE2E
├── LibriSpeech
└── (other datasets)
```
Multispeaker datasets are used as training data, though the transcriptions are not used. To enlarge the amount of data used for training, several multispeaker datasets are combined. The preprocessed datasets are organized in a file structure described below. The mel spectrogram of each utterance is saved in `.npy` format. The dataset is 2-stratified (speaker-utterance). Since multiple datasets are combined, to avoid conflict in speaker id, the dataset name is prepended to the speaker ids.

```text
dataset_root
├── dataset01_speaker01/
│   ├── utterance01.npy
│   ├── utterance02.npy
│   └── utterance03.npy
├── dataset01_speaker02/
│   ├── utterance01.npy
│   ├── utterance02.npy
│   └── utterance03.npy
├── dataset02_speaker01/
│   ├── utterance01.npy
│   ├── utterance02.npy
│   └── utterance03.npy
└── dataset02_speaker02/
    ├── utterance01.npy
    ├── utterance02.npy
    └── utterance03.npy
```
In `${BIN_DIR}/preprocess.py`:
1. `--datasets_root` is the directory that contains several extracted dataset
2.  `--output_dir` is the directory to save the preprocessed dataset
3.  `--dataset_names` is the dataset to preprocess. If there are multiple datasets in `--datasets_root` to preprocess, the names can be joined with a comma. Currently supported dataset names are  librispeech_other, voxceleb1, voxceleb2, aidatatang_200zh, and magicdata.

### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${preprocess_path} ${train_output_path}
```
In `${BIN_DIR}/train.py`:
1. `--data` is the path to the preprocessed dataset.
2. `--output` is the directory to save results，usually a subdirectory of `runs`. It contains visualdl log files, text log files, config files, and a `checkpoints` directory, which contains parameter files and optimizer state files. If `--output` already has some training results in it, the most recent parameter file and optimizer state file are loaded before training.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `CUDA_VISIBLE_DEVICES` can be used to specify visible devices with cuda.

Other options are described below.

- `--config` is a `.yaml` config file used to override the default config(which is coded in `config.py`).
- `--opts` is a command-line option to further override config files. It should be the last command-line options passed with multiple key-value pairs separated by spaces.
- `--checkpoint_path` specifies the checkpoint to load before training, extension is not included. A parameter file ( `.pdparams`) and an optimizer state file ( `.pdopt`) with the same name is used. This option has a higher priority than auto-resuming from the `--output` directory.

###  Inferencing
When training is done, run the command below to generate utterance embedding for each utterance in a dataset.
`./local/inference.sh` calls `${BIN_DIR}/inference.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${infer_input} ${infer_output} ${train_output_path} ${ckpt_name}
```
In `${BIN_DIR}/inference.py`:
1. `--input` is the path of the dataset used for inference.
2. `--output` is the directory to save the processed results. It has the same file structure as the input dataset. Each utterance in the dataset has a corresponding utterance embedding file in the `*.npy` format.
3. `--checkpoint_path` is the path of the checkpoint to use, extension not included.
4. `--pattern` is the wildcard pattern to filter audio files for inference, defaults to `*.wav`.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model
The pretrained model is first trained to 1560k steps at Librispeech-other-500 and voxceleb1. Then trained at aidatatang_200h and magic_data to 3000k steps.

Download URL [ge2e_ckpt_0.3.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip).

## References

1. [Generalized End-to-end Loss for Speaker Verification](https://arxiv.org/pdf/1710.10467.pdf)
2. [Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis](https://arxiv.org/pdf/1806.04558.pdf)


================================================
FILE: examples/other/ge2e/local/inference.sh
================================================
#!/bin/bash

#generate utterance embedding for each utterance in a dataset.
infer_input=$1
infer_output=$2
train_output_path=$3
ckpt_name=$4

python3 ${BIN_DIR}/inference.py \
    --input=${infer_input} \
    --output=${infer_output} \
    --checkpoint_path=${train_output_path}/checkpoints/${ckpt_name} \
    --ngpu=1


================================================
FILE: examples/other/ge2e/local/preprocess.sh
================================================
#!/bin/bash
datasets_root=$1
preprocess_path=$2
dataset_names=$3

python3 ${BIN_DIR}/preprocess.py \
    --datasets_root=${datasets_root} \
    --output_dir=${preprocess_path} \
    --dataset_names=${dataset_names}

================================================
FILE: examples/other/ge2e/local/train.sh
================================================
#!/bin/bash

preprocess_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --data=${preprocess_path} \
    --output=${train_output_path} \
    --ngpu=1

================================================
FILE: examples/other/ge2e/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=ge2e
export BIN_DIR=${MAIN_ROOT}/paddlespeech/vector/exps/${MODEL}


================================================
FILE: examples/other/ge2e/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0
stage=0
stop_stage=100

datasets_root=~/datasets/GE2E
preprocess_path=dump
dataset_names=librispeech_other
train_output_path=output
infer_input=infer_input
infer_output=infer_output
ckpt_name=step-10000

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${datasets_root} ${preprocess_path} ${dataset_names} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${preprocess_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${infer_input} ${infer_output} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/other/mfa/README.md
================================================
# Use Montreal-Forced-Aligner
Here is an example to use [MFA1.x](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner).
Run the following script to get started, for more detail, please see `run.sh`.
```bash
./run.sh
```
# Rhythm tags for MFA
If you want to get rhythm tags with duration through MFA tool, you may add flag `--rhy-with-duration` in the first two commands in `run.sh`
Note that only CSMSC dataset is supported so far, and we replace `#` with `sp` in rhythm tags for MFA.

# MFA for Cantonese language
First, go download these datasets [Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-daily-use-sentence/) and [Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle](https://magichub.com/datasets/guangzhou-cantonese-scripted-speech-corpus-in-the-vehicle/) under `~/datasets/`.
Then,
```bash
./run_canton.sh
```


================================================
FILE: examples/other/mfa/local/detect_oov.py
================================================
#!/usr/bin/env python3
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
from collections import OrderedDict
from pathlib import Path


def detect_oov(corpus_dir, lexicon_path, transcription_pattern="*.lab"):
    corpus_dir = Path(corpus_dir)

    lexicon = OrderedDict()
    with open(lexicon_path, 'rt') as f:
        for line in f:
            syllable, phonemes = line.split(maxsplit=1)
            lexicon[syllable] = phonemes

    for fp in corpus_dir.glob(transcription_pattern):
        syllables = fp.read_text().strip().split()
        for s in syllables:
            if s not in lexicon:
                logging.warning(f"{fp.relative_to(corpus_dir)} has OOV {s} .")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="detect oov in a corpus given a lexicon")
    parser.add_argument(
        "corpus_dir", type=str, help="corpus dir for MFA alignment.")
    parser.add_argument("lexicon_path", type=str, help="dictionary to use.")
    parser.add_argument(
        "--pattern", type=str, default="*.lab", help="dictionary to use.")
    args = parser.parse_args()
    print(args)

    detect_oov(args.corpus_dir, args.lexicon_path, args.pattern)


================================================
FILE: examples/other/mfa/local/generate_canton_lexicon_wavlabs.py
================================================
import argparse
import os
import re
import shutil

import ToJyutping


def check(str):
    my_re = re.compile(r'[A-Za-z]', re.S)
    res = re.findall(my_re, str)
    if len(res):
        return True
    else:
        return False


INITIALS = [
    'aa', 'aai', 'aak', 'aap', 'aat', 'aau', 'ai', 'au', 'ap', 'at', 'ak', 'a',
    'p', 'b', 'e', 'ts', 't', 'dz', 'd', 'kw', 'k', 'gw', 'g', 'f', 'h', 'l',
    'm', 'ng', 'n', 's', 'y', 'w', 'c', 'z', 'j', 'ong', 'on', 'ou', 'oi', 'ok',
    'o', 'uk', 'ung'
]


def get_lines(canton):
    for init in INITIALS:
        if canton.startswith(init):
            c, v = canton[:len(init)], canton[len(init):]
            return canton + ' ' + c + ' ' + v
    return canton + ' ' + canton


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Generate lexicon for Cantonese pinyin to phoneme for MFA")
    parser.add_argument(
        "--output_lexicon", type=str, help="Path to save lexicon.")
    parser.add_argument(
        "--output_wavlabs",
        type=str,
        help="Path of wavs and labs for MFA training.")
    parser.add_argument(
        "--inputs", type=str, nargs="+", help="Path to the cantonese datasets.")
    args = parser.parse_args()

    os.mkdir(args.output_wavlabs)

    utterance_info = []
    all_canton = []
    for input_ in args.inputs:
        utt = "UTTRANSINFO.txt" if "Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence" in input_ else "UTTERANCEINFO.txt"
        input_utttxt = os.path.join(input_, utt)

        with open(input_utttxt, 'r') as f:
            utterance_info = f.readlines()[1:]

        for utterance_line in utterance_info:
            _, wav_name, spk, _, text = utterance_line.split('\t')
            text = text.strip().replace(' ', '')
            # check the characters and drop the short text.
            if not check(text) and len(text) > 2:
                source_path = os.path.join(input_, 'WAV', spk, wav_name)
                out_spk_path = os.path.join(args.output_wavlabs, spk)
                os.makedirs(out_spk_path, exist_ok=True)
                target_path = os.path.join(out_spk_path, wav_name)

                shutil.copy(source_path, target_path)

                lab_name = wav_name.split('.')[0] + '.lab'
                lab_target_path = os.path.join(out_spk_path, lab_name)
                canton_list = ToJyutping.get_jyutping_text(text)
                with open(lab_target_path, 'w') as f:
                    f.write(canton_list)

                canton_list = canton_list.split(' ')
                all_canton.extend(canton_list)
    all_canton = sorted(list(set(all_canton)))

    with open(args.output_lexicon, 'w') as f:
        for canton in all_canton:
            f.write(get_lines(canton) + '\n')


================================================
FILE: examples/other/mfa/local/generate_lexicon.py
================================================
#!/usr/bin/env python3
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generate lexicon and symbols for Mandarin Chinese phonology.
The lexicon is used for Montreal Force Aligner.

Note that syllables are used as word in this lexicon. Since syllables rather 
than words are used in transcriptions produced by `reorganize_baker.py`.

We make this choice to better leverage other software for chinese text to 
pinyin tools like pypinyin. This is the convention for G2P in Chinese.
"""
import argparse
import re
from collections import OrderedDict

INITIALS = [
    'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh',
    'r', 'z', 'c', 's', 'j', 'q', 'x'
]

FINALS = [
    'a', 'ai', 'ao', 'an', 'ang', 'e', 'er', 'ei', 'en', 'eng', 'o', 'ou',
    'ong', 'ii', 'iii', 'i', 'ia', 'iao', 'ian', 'iang', 'ie', 'io', 'iou',
    'iong', 'in', 'ing', 'u', 'ua', 'uai', 'uan', 'uang', 'uei', 'uo', 'uen',
    'ueng', 'v', 've', 'van', 'vn'
]

SPECIALS = ['sil', 'sp']


def rule(C, V, R, T):
    """Generate a syllable given the initial, the final, erhua indicator, and tone.
    Orthographical rules for pinyin are applied. (special case for y, w, ui, un, iu)
    
    Note that in this system, 'ü' is alway written as 'v' when appeared in phoneme, but converted to 
    'u' in syllables when certain conditions are satisfied.
    
    'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'.

    Erhua is possibly applied to every finals, except for finals that already ends with 'r'.

    When a syllable is impossible or does not have any characters with this pronunciation, return None
    to filter it out.
    """

    # 不可拼的音节, ii 只能和 z, c, s 拼
    if V in ["ii"] and (C not in ['z', 'c', 's']):
        return None
    # iii 只能和 zh, ch, sh, r 拼
    if V in ['iii'] and (C not in ['zh', 'ch', 'sh', 'r']):
        return None

    # 齐齿呼或者撮口呼不能和 f, g, k, h, zh, ch, sh, r, z, c, s
    if (V not in ['ii', 'iii']) and V[0] in ['i', 'v'] and (
            C in ['f', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's']):
        return None

    # 撮口呼只能和 j, q, x l, n 拼
    if V.startswith("v"):
        # v, ve 只能和 j ,q , x, n, l 拼
        if V in ['v', 've']:
            if C not in ['j', 'q', 'x', 'n', 'l', '']:
                return None
        # 其他只能和 j, q, x 拼
        else:
            if C not in ['j', 'q', 'x', '']:
                return None

    # j, q, x 只能和齐齿呼或者撮口呼拼
    if (C in ['j', 'q', 'x']) and not (
        (V not in ['ii', 'iii']) and V[0] in ['i', 'v']):
        return None

    # b, p ,m, f 不能和合口呼拼，除了 u 之外
    # bm p, m, f 不能和撮口呼拼
    if (C in ['b', 'p', 'm', 'f']) and ((V[0] in ['u', 'v'] and V != "u") or
                                        V == 'ong'):
        return None

    # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼
    if V in ['ua', 'uai',
             'uang'] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']:
        return None

    # sh 和 ong 不能拼
    if V == 'ong' and C in ['sh']:
        return None

    # o 和 gkh, zh ch sh r z c s 不能拼
    if V == "o" and C in [
            'd', 't', 'n', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's'
    ]:
        return None

    # ueng 只是 weng 这个 ad-hoc 其他情况下都是 ong
    if V == 'ueng' and C != '':
        return

    # 非儿化的 er 只能单独存在
    if V == 'er' and C != '':
        return None

    if C == '':
        if V in ["i", "in", "ing"]:
            C = 'y'
        elif V == 'u':
            C = 'w'
        elif V.startswith('i') and V not in ["ii", "iii"]:
            C = 'y'
            V = V[1:]
        elif V.startswith('u'):
            C = 'w'
            V = V[1:]
        elif V.startswith('v'):
            C = 'yu'
            V = V[1:]
    else:
        if C in ['j', 'q', 'x']:
            if V.startswith('v'):
                V = re.sub('v', 'u', V)
        if V == 'iou':
            V = 'iu'
        elif V == 'uei':
            V = 'ui'
        elif V == 'uen':
            V = 'un'
    result = C + V

    # Filter  er 不能再儿化
    if result.endswith('r') and R == 'r':
        return None

    # ii and iii, change back to i
    result = re.sub(r'i+', 'i', result)

    result = result + R + T
    return result


def generate_lexicon(with_tone=False, with_erhua=False):
    """Generate lexicon for Mandarin Chinese."""
    syllables = OrderedDict()

    for C in [''] + INITIALS:
        for V in FINALS:
            for R in [''] if not with_erhua else ['', 'r']:
                for T in [''] if not with_tone else ['1', '2', '3', '4', '5']:
                    result = rule(C, V, R, T)
                    if result:
                        syllables[result] = f'{C} {V}{R}{T}'
    return syllables


def generate_symbols(lexicon):
    """Generate phoneme list for a lexicon."""
    symbols = set()
    for p in SPECIALS:
        symbols.add(p)
    for syllable, phonemes in lexicon.items():
        phonemes = phonemes.split()
        for p in phonemes:
            symbols.add(p)
    return sorted(list(symbols))


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Generate lexicon for Chinese pinyin to phoneme for MFA")
    parser.add_argument("output", type=str, help="Path to save lexicon.")
    parser.add_argument(
        "--with-tone", action="store_true", help="whether to consider tone.")
    parser.add_argument(
        "--with-r", action="store_true", help="whether to consider erhua.")
    parser.add_argument(
        "--rhy-with-duration",
        action="store_true", )
    args = parser.parse_args()

    lexicon = generate_lexicon(args.with_tone, args.with_r)
    symbols = generate_symbols(lexicon)

    with open(args.output + ".lexicon", 'wt') as f:
        if args.rhy_with_duration:
            f.write("sp1 sp1\nsp2 sp2\nsp3 sp3\nsp4 sp4\n")
        for k, v in lexicon.items():
            f.write(f"{k} {v}\n")

    with open(args.output + ".symbols", 'wt') as f:
        for s in symbols:
            f.write(s + "\n")

    print("Done!")


================================================
FILE: examples/other/mfa/local/reorganize_aishell3.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script to reorganize AISHELL-3 dataset so as to use Montreal Force
Aligner to align transcription and audio.

Please refer to https://montreal-forced-aligner.readthedocs.io/en/latest/data_prep.html
for more details about Montreal Force Aligner's requirements on cotpus.

For scripts to reorganize other corpus, please refer to 
 https://github.com/MontrealCorpusTools/MFA-reorganization-scripts
for more details.
"""
import argparse
import os
from pathlib import Path
from typing import Union


def link_wav(root_dir: Union[str, Path], output_dir: Union[str, Path]):
    for sub_set in {'train', 'test'}:
        wav_dir = root_dir / sub_set / 'wav'
        new_dir = output_dir / sub_set
        new_dir.mkdir(parents=True, exist_ok=True)

        for spk_dir in os.listdir(wav_dir):
            sub_dir = wav_dir / spk_dir
            new_sub_dir = new_dir / spk_dir
            os.symlink(sub_dir, new_sub_dir)


def write_lab(root_dir: Union[str, Path],
              output_dir: Union[str, Path],
              script_type='pinyin'):
    for sub_set in {'train', 'test'}:
        text_path = root_dir / sub_set / 'content.txt'
        new_dir = output_dir / sub_set

        with open(text_path, 'r') as rf:
            for line in rf:
                wav_id, context = line.strip().split('\t')
                spk_id = wav_id[:7]
                transcript_name = wav_id.split('.')[0] + '.lab'
                transcript_path = new_dir / spk_id / transcript_name
                context_list = context.split()
                word_list = context_list[0:-1:2]
                pinyin_list = context_list[1::2]
                wf = open(transcript_path, 'w')
                if script_type == 'word':
                    # add space between chinese char
                    new_context = ' '.join(word_list)
                elif script_type == 'pinyin':
                    new_context = ' '.join(pinyin_list)
                wf.write(new_context + '\n')


def reorganize_aishell3(root_dir: Union[str, Path],
                        output_dir: Union[str, Path],
                        script_type='pinyin'):
    output_dir.mkdir(parents=True, exist_ok=True)
    link_wav(root_dir, output_dir)
    write_lab(root_dir, output_dir, script_type)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Reorganize AISHELL-3 dataset for MFA")
    parser.add_argument(
        "--root-dir", type=str, default="", help="path to AISHELL-3 dataset.")
    parser.add_argument(
        "--output-dir",
        type=str,
        help="path to save outputs (audio and transcriptions)")
    parser.add_argument(
        "--script-type",
        type=str,
        default="pinyin",
        help="type of lab ('word'/'pinyin')")

    args = parser.parse_args()
    root_dir = Path(args.root_dir).expanduser()
    output_dir = Path(args.output_dir).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    reorganize_aishell3(root_dir, output_dir, args.script_type)


================================================
FILE: examples/other/mfa/local/reorganize_baker.py
================================================
#!/usr/bin/env python3
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script to reorganize Baker dataset so as to use Montreal Force
Aligner to align transcription and audio.

Please refer to https://montreal-forced-aligner.readthedocs.io/en/latest/data_prep.html
for more details about Montreal Force Aligner's requirements on cotpus.

For scripts to reorganize other corpus, please refer to 
 https://github.com/MontrealCorpusTools/MFA-reorganization-scripts
for more details.
"""
import argparse
import os
import re
import shutil
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import Union

import librosa
import soundfile as sf
from tqdm import tqdm

repalce_dict = {
    "；": "",
    "。": "",
    "：": "",
    "—": "",
    "）": "",
    "，": "",
    "“": "",
    "（": "",
    "、": "",
    "…": "",
    "！": "",
    "？": "",
    "”": ""
}


def get_transcripts(path: Union[str, Path]):
    transcripts = {}

    with open(path) as f:
        lines = f.readlines()

    for i in range(0, len(lines), 2):
        sentence_id = lines[i].split()[0]
        transcription = lines[i + 1].strip()
        transcripts[sentence_id] = transcription

    return transcripts


def resample_and_save(source, target, sr=16000):
    wav, _ = librosa.load(str(source), sr=sr)
    sf.write(str(target), wav, samplerate=sr, subtype='PCM_16')
    return target


def reorganize_baker(root_dir: Union[str, Path],
                     output_dir: Union[str, Path]=None,
                     resample_audio=False,
                     rhy_dur=False):
    root_dir = Path(root_dir).expanduser()
    if rhy_dur:
        transcript_path = root_dir / "ProsodyLabeling" / "000001-010000_rhy.txt"
    else:
        transcript_path = root_dir / "ProsodyLabeling" / "000001-010000.txt"
    transcriptions = get_transcripts(transcript_path)

    wave_dir = root_dir / "Wave"
    wav_paths = sorted(list(wave_dir.glob("*.wav")))
    output_dir = Path(output_dir).expanduser()
    assert wave_dir != output_dir, "Don't use an the original wav's directory as output_dir"

    output_dir.mkdir(parents=True, exist_ok=True)

    if resample_audio:
        with ThreadPoolExecutor(os.cpu_count()) as pool:
            with tqdm(total=len(wav_paths), desc="resampling") as pbar:
                futures = []
                for wav_path in wav_paths:
                    future = pool.submit(resample_and_save, wav_path,
                                         output_dir / wav_path.name)
                    future.add_done_callback(lambda p: pbar.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    results.append(ft.result())
    else:
        for wav_path in tqdm(wav_paths, desc="copying"):
            shutil.copyfile(wav_path, output_dir / wav_path.name)

    for sentence_id, transcript in tqdm(
            transcriptions.items(), desc="transcription process"):
        with open(output_dir / (sentence_id + ".lab"), 'wt') as f:
            f.write(transcript)
            f.write('\n')
    print("Done!")


def insert_rhy(sentence_first, sentence_second):
    sub = '#'
    return_words = []
    sentence_first = sentence_first.translate(str.maketrans(repalce_dict))
    rhy_idx = [substr.start() for substr in re.finditer(sub, sentence_first)]
    re_rhy_idx = []
    sentence_first_ = sentence_first.replace("#1", "").replace(
        "#2", "").replace("#3", "").replace("#4", "")
    sentence_seconds = sentence_second.split(" ")
    for i, w in enumerate(rhy_idx):
        re_rhy_idx.append(w - i * 2)
    i = 0
    # print("re_rhy_idx: ", re_rhy_idx)
    for sentence_s in (sentence_seconds):
        return_words.append(sentence_s)
        if i < len(re_rhy_idx) and len(return_words) - i == re_rhy_idx[i]:
            return_words.append("sp" + sentence_first[rhy_idx[i] + 1:rhy_idx[i]
                                                      + 2])
            i = i + 1
    return return_words


def normalize_rhy(root_dir: Union[str, Path]):
    root_dir = Path(root_dir).expanduser()
    transcript_path = root_dir / "ProsodyLabeling" / "000001-010000.txt"
    target_transcript_path = root_dir / "ProsodyLabeling" / "000001-010000_rhy.txt"

    with open(transcript_path) as f:
        lines = f.readlines()

    with open(target_transcript_path, 'wt') as f:
        for i in range(0, len(lines), 2):
            sentence_first = lines[i]  #第一行直接保存
            f.write(sentence_first)
            transcription = lines[i + 1].strip()
            f.write("\t" + " ".join(
                insert_rhy(sentence_first.split('\t')[1], transcription)) +
                    "\n")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Reorganize Baker dataset for MFA")
    parser.add_argument("--root-dir", type=str, help="path to baker dataset.")
    parser.add_argument(
        "--output-dir",
        type=str,
        help="path to save outputs (audio and transcriptions)")
    parser.add_argument(
        "--resample-audio",
        action="store_true",
        help="To resample audio files or just copy them")
    parser.add_argument(
        "--rhy-with-duration",
        action="store_true", )
    args = parser.parse_args()

    if args.rhy_with_duration:
        normalize_rhy(args.root_dir)
    reorganize_baker(args.root_dir, args.output_dir, args.resample_audio,
                     args.rhy_with_duration)


================================================
FILE: examples/other/mfa/local/reorganize_ljspeech.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script to reorganize LJSpeech-1.1 dataset so as to use Montreal Force
Aligner to align transcription and audio.

Please refer to https://montreal-forced-aligner.readthedocs.io/en/latest/data_prep.html
for more details about Montreal Force Aligner's requirements on cotpus.

For scripts to reorganize other corpus, please refer to 
 https://github.com/MontrealCorpusTools/MFA-reorganization-scripts
for more details.
"""
import argparse
import os
from pathlib import Path
from typing import Union


def link_wav(root_dir: Union[str, Path], output_dir: Union[str, Path]):
    wav_dir = root_dir / 'wavs'
    for spk_dir in os.listdir(wav_dir):
        sub_dir = wav_dir / spk_dir
        new_sub_dir = output_dir / spk_dir
        os.symlink(sub_dir, new_sub_dir)


def write_lab(root_dir: Union[str, Path], output_dir: Union[str, Path]):

    text_path = root_dir / 'metadata.csv'
    with open(text_path, 'r') as rf:
        for line in rf:
            line_list = line.strip().split('|')
            utt = line_list[0]
            raw_text = line_list[-1]
            transcript_name = utt + '.lab'
            transcript_path = output_dir / transcript_name
            with open(transcript_path, 'w') as wf:
                wf.write(raw_text + '\n')


def reorganize_ljspeech(root_dir: Union[str, Path],
                        output_dir: Union[str, Path]):

    link_wav(root_dir, output_dir)
    write_lab(root_dir, output_dir)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Reorganize LJSpeech-1.1 dataset for MFA")
    parser.add_argument(
        "--root-dir", type=str, help="path to LJSpeech-1.1 dataset.")
    parser.add_argument(
        "--output-dir",
        type=str,
        help="path to save outputs (audio and transcriptions)")
    args = parser.parse_args()
    root_dir = Path(args.root_dir).expanduser()
    output_dir = Path(args.output_dir).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    reorganize_ljspeech(root_dir, output_dir)


================================================
FILE: examples/other/mfa/local/reorganize_vctk.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Script to reorganize VCTK dataset so as to use Montreal Force
Aligner to align transcription and audio.

Please refer to https://montreal-forced-aligner.readthedocs.io/en/latest/data_prep.html
for more details about Montreal Force Aligner's requirements on cotpus.

For scripts to reorganize other corpus, please refer to 
 https://github.com/MontrealCorpusTools/MFA-reorganization-scripts
for more details.
"""
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import Union

import librosa
import soundfile as sf
from tqdm import tqdm


def resample_and_save(source, target, sr=16000):
    wav, _ = librosa.load(str(source), sr=sr)
    sf.write(str(target), wav, samplerate=sr, subtype='PCM_16')
    return target


def write_wav(root_dir: Union[str, Path], output_dir: Union[str, Path]):
    wav_paths = []
    new_wav_paths = []

    wav_dir = root_dir / 'wav48_silence_trimmed'
    new_dir = output_dir
    new_dir.mkdir(parents=True, exist_ok=True)

    for spk_dir in os.listdir(wav_dir):
        # no txt for p315
        # no mic2 for p280 and p362
        if spk_dir not in {'log.txt', 'p315', 'p280', 'p362'}:
            sub_dir = wav_dir / spk_dir
            new_sub_dir = new_dir / spk_dir
            new_sub_dir.mkdir(parents=True, exist_ok=True)
            for wav_name in os.listdir(sub_dir):
                # mic1 have very low frequency noises
                pre_wav_name = "_".join(wav_name.split("_")[:2])
                if "mic2" in wav_name:
                    wav_paths.append(str(sub_dir / wav_name))
                    # remove "_mic2" in wav_name and replace ".flac" with ".wav"
                    new_wav_name = pre_wav_name + ".wav"
                    new_wav_paths.append(str(new_sub_dir / new_wav_name))

    assert len(new_wav_paths) == len(wav_paths)

    with ThreadPoolExecutor(os.cpu_count()) as pool:
        with tqdm(total=len(wav_paths), desc="resampling") as pbar:
            futures = []
            for i, wav_path in enumerate(wav_paths):
                future = pool.submit(resample_and_save, wav_path,
                                     new_wav_paths[i])
                future.add_done_callback(lambda p: pbar.update())
                futures.append(future)

            results = []
            for ft in futures:
                results.append(ft.result())


def write_txt(root_dir: Union[str, Path], output_dir: Union[str, Path]):
    txt_dir = root_dir / 'txt'

    new_dir = output_dir
    new_dir.mkdir(parents=True, exist_ok=True)
    for spk_dir in os.listdir(txt_dir):
        # no txt for p315
        if spk_dir not in {'log.txt', 'p315', 'p280', 'p362'}:
            sub_dir = txt_dir / spk_dir
            new_sub_dir = new_dir / spk_dir
            for txt_name in os.listdir(sub_dir):
                rf = open(sub_dir / txt_name, "r")
                wf = open(new_sub_dir / txt_name, "w")
                for line in rf:
                    wf.write(line)


def reorganize_vctk(root_dir: Union[str, Path], output_dir: Union[str, Path]):
    output_dir.mkdir(parents=True, exist_ok=True)
    write_wav(root_dir, output_dir)
    write_txt(root_dir, output_dir)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Reorganize VCTK-Corpus-0.92 dataset for MFA")
    parser.add_argument(
        "--root-dir",
        type=str,
        default="",
        help="path to VCTK-Corpus-0.92 dataset.")
    parser.add_argument(
        "--output-dir",
        type=str,
        help="path to save outputs (audio and transcriptions)")

    args = parser.parse_args()
    root_dir = Path(args.root_dir).expanduser()
    output_dir = Path(args.output_dir).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    reorganize_vctk(root_dir, output_dir)


================================================
FILE: examples/other/mfa/run.sh
================================================
exp=exp
data=data

mkdir -p $exp
mkdir -p $data

LEXICON_NAME='simple'
MFA_DOWNLOAD_DIR=local/

if [ ! -f "$exp/$LEXICON_NAME.lexicon" ]; then
    echo "generating lexicon..."
    python local/generate_lexicon.py "$exp/$LEXICON_NAME" --with-r --with-tone
    echo "lexicon done"
fi

if [ ! -d $exp/baker_corpus ]; then
    echo "reorganizing baker corpus..."
    python local/reorganize_baker.py --root-dir=~/datasets/BZNSYP --output-dir=$exp/baker_corpus --resample-audio
    echo "reorganization done. Check output in $exp/baker_corpus."
    echo "audio files are resampled to 16kHz"
    echo "transcription for each audio file is saved with the same namd in $exp/baker_corpus "
fi


echo "detecting oov..."
python local/detect_oov.py $exp/baker_corpus $exp/"$LEXICON_NAME.lexicon"
echo "detecting oov done. you may consider regenerate lexicon if there is unexpected OOVs."


if [ ! -f "$MFA_DOWNLOAD_DIR/montreal-forced-aligner_linux.tar.gz" ]; then
    echo "downloading mfa..."
    (cd $MFA_DOWNLOAD_DIR && wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz)
    echo "download mfa done!"
fi

if [ ! -d "$MFA_DOWNLOAD_DIR/montreal-forced-aligner" ]; then
    echo "extracting mfa..."
    (cd $MFA_DOWNLOAD_DIR && tar xvf "montreal-forced-aligner_linux.tar.gz")
    echo "extraction done!"
fi

export PATH="$MFA_DOWNLOAD_DIR/montreal-forced-aligner/bin"

if [ ! -d "$exp/baker_alignment" ]; then
    echo "Start MFA training..."
    PATH=$MFA_DOWNLOAD_DIR/montreal-forced-aligner/bin/:$PATH \
    LD_LIBRARY_PATH=$MFA_DOWNLOAD_DIR/montreal-forced-aligner/lib/:$LD_LIBRARY_PATH \
    ./$MFA_DOWNLOAD_DIR/montreal-forced-aligner/bin/mfa_train_and_align \
        $exp/baker_corpus "$exp/$LEXICON_NAME.lexicon" $exp/baker_alignment -o $exp/baker_model --clean --verbose -j 10 --temp_directory $exp/.mfa_train_and_align
    echo "training done!"
    echo "results: $exp/baker_alignment"
    echo "model: $exp/baker_model"
fi


================================================
FILE: examples/other/mfa/run_canton.sh
================================================
exp=exp

mkdir -p $exp
LEXICON_NAME='canton'
MFA_DOWNLOAD_DIR=local/

if [ ! -f "$exp/$LEXICON_NAME.lexicon" ]; then
    echo "generating lexicon and training data..."
    python local/generate_canton_lexicon_wavlabs.py --output_lexicon "$exp/$LEXICON_NAME.lexicon" --output_wavlabs "$exp/$LEXICON_NAME"_wavlabs --inputs ~/datasets/Guangzhou_Cantonese_Scripted_Speech_Corpus_Daily_Use_Sentence ~/datasets/Guangzhou_Cantonese_Scripted_Speech_Corpus_in_Vehicle
    echo "lexicon and training data done"
fi

if [ ! -f "$MFA_DOWNLOAD_DIR/montreal-forced-aligner_linux.tar.gz" ]; then
    echo "downloading mfa..."
    (cd $MFA_DOWNLOAD_DIR && wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz)
    echo "download mfa done!"
fi

if [ ! -d "$MFA_DOWNLOAD_DIR/montreal-forced-aligner" ]; then
    echo "extracting mfa..."
    (cd $MFA_DOWNLOAD_DIR && tar xvf "montreal-forced-aligner_linux.tar.gz")
    echo "extraction done!"
fi

export PATH="$MFA_DOWNLOAD_DIR/montreal-forced-aligner/bin"
if [ ! -d "$exp/canton_alignment" ]; then
    echo "Start MFA training..."
    PATH=$MFA_DOWNLOAD_DIR/montreal-forced-aligner/bin/:$PATH \
    LD_LIBRARY_PATH=$MFA_DOWNLOAD_DIR/montreal-forced-aligner/lib/:$LD_LIBRARY_PATH \
    ./$MFA_DOWNLOAD_DIR/montreal-forced-aligner/bin/mfa_train_and_align \
        "$exp/$LEXICON_NAME"_wavlabs "$exp/$LEXICON_NAME.lexicon" $exp/canton_alignment -o $exp/canton_model --clean --verbose -j 10 --temp_directory $exp/.mfa_train_and_align
    echo "training done!"
    echo "results: $exp/canton_alignment"
    echo "model: $exp/canton_model"
fi


================================================
FILE: examples/other/ngram_lm/.gitignore
================================================
exp/


================================================
FILE: examples/other/ngram_lm/README.md
================================================
# Ngram LM

* s0 - kenlm ngram lm


================================================
FILE: examples/other/ngram_lm/s0/.gitignore
================================================
data/lm


================================================
FILE: examples/other/ngram_lm/s0/README.md
================================================
# Ngram LM

Train chinese chararctor ngram lm by [kenlm](https://github.com/kpu/kenlm).

## Run
```
. path.sh
bash run.sh
```

## Results

```
exp/
|-- text
|-- text.char.tn
|-- text.word.tn
|-- text_zh_char_o5_p0_1_2_4_4_a22_q8_b8.arpa
|-- text_zh_char_o5_p0_1_2_4_4_a22_q8_b8.arpa.klm.bin
|-- text_zh_word_o3_p0_0_0_a22_q8_b8.arpa
`-- text_zh_word_o3_p0_0_0_a22_q8_b8.arpa.klm.bin

0 directories, 7 files
```

```
3ae083627b9b6cef1a82d574d8483f97  exp/text
d97da252d2a63a662af22f98af30cb8c  exp/text.char.tn
c18b03005bd094dbfd9b46442be361fd  exp/text.word.tn
73dbf50097896eda33985e11e1ba9a3a  exp/text_zh_char_o5_p0_1_2_4_4_a22_q8_b8.arpa
01334e2044c474b99c4f2ffbed790626  exp/text_zh_char_o5_p0_1_2_4_4_a22_q8_b8.arpa.klm.bin
36a42de548045b54662411ae7982c77f  exp/text_zh_word_o3_p0_0_0_a22_q8_b8.arpa
332422803ffd73dd7ffd16cd2b0abcd5  exp/text_zh_word_o3_p0_0_0_a22_q8_b8.arpa.klm.bin
```

```
==> exp/text <==
少先队员因该为老人让坐
祛痘印可以吗？有效果吗？
不知这款牛奶口感怎样？ 小孩子喝行吗！
是转基因油?
我家宝宝13斤用多大码的
会起坨吗？
请问给送上楼吗？
亲是送赁上门吗
送货时候有外包装没有还是直接发货过来
会不会有坏的？

==> exp/text.char.tn <==
少 先 队 员 因 该 为 老 人 让 坐
祛 痘 印 可 以 吗 有 效 果 吗
不 知 这 款 牛 奶 口 感 怎 样 小 孩 子 喝 行 吗
是 转 基 因 油
我 家 宝 宝 十 三 斤 用 多 大 码 的
会 起 坨 吗
请 问 给 送 上 楼 吗
亲 是 送 赁 上 门 吗
送 货 时 候 有 外 包 装 没 有 还 是 直 接 发 货 过 来
会 不 会 有 坏 的

==> exp/text.word.tn <==
少先队员 因该 为 老人 让 坐
祛痘 印 可以 吗 有 效果 吗
不知 这 款 牛奶 口感 怎样 小孩子 喝行 吗
是 转基因 油
我家 宝宝 十三斤 用多大码 的
会起 坨 吗
请问 给 送 上楼 吗
亲是 送赁 上门 吗
送货 时候 有 外包装 没有 还是 直接 发货 过来
会 不会 有坏 的

==> exp/text_zh_char_o5_p0_1_2_4_4_a22_q8_b8.arpa <==
\data\
ngram 1=587
ngram 2=395
ngram 3=100
ngram 4=2
ngram 5=0

\1-grams:
-3.272324       <unk>   0
0       <s>     -0.36706257

==> exp/text_zh_word_o3_p0_0_0_a22_q8_b8.arpa <==
\data\
ngram 1=689
ngram 2=1398
ngram 3=1506

\1-grams:
-3.1755018      <unk>   0
0       <s>     -0.23069073
-1.2318869      </s>    0
-3.067262       少先队员        -0.051341705
```


================================================
FILE: examples/other/ngram_lm/s0/data/README.md
================================================
text_correct.txt: https://github.com/shibing624/pycorrector/raw/master/tests/test_file.txt
custom_confusion.txt: https://github.com/shibing624/pycorrector/raw/master/tests/custom_confusion.txt


================================================
FILE: examples/other/ngram_lm/s0/data/custom_confusion.txt
================================================
#变体	本体	本体词词频（可省略）
兴高彩列	兴高采烈	100
吹唐人	吹糖人	100
百年家具	百年家居
泄药	泻药
称做	称作	100
化学成份	化学成分	100
天地无垠	天地无限	100
欲妄	欲望
满头大汉	满头大汗
一阙	一阕	300
斗音	抖音	100
人材	人才	100
微亅信	微信	100
微·信	微信	1000
一毛不陪	一毛不赔	200
无线大	无限大	1
正加	增加
相对得	相对地
越来越底	越来越低
个纾	个数
曾加	增加
情怳	情况
持续的	持续地
不断的	不断地
成现	呈现
家重	加重
中国子	中国字
计录	记录
一落千仗	一落千丈
婴而	婴儿
负贵	富贵
旁遍	旁边
续承	继承
约来越晚	越来越晚
提共	提供
行成	形成
读哩	独立
忙绿	忙碌
年经人	年轻人
智识	知识
一但	一旦
指摘	指责
懹人民自己选择	让人民自己选择
紧裤	辛苦
放心地生活	放心的生活
但靠一份收入	单靠一份收入
多付出地几十倍	多付出的几十倍
化钱	花钱
依些	一些
耐心的教育	耐心地教育
多某些人来说	对某些人来说
列子	例子
普便	普遍
单纯的说	单纯地说
相对得坏处	相对的坏处
改很多幼稚园	盖很多幼稚园
不断的增加	不断地增加
谈讨	探讨
小还	小孩
未普边	未普遍
考良	考量
过的不错	过得不错
手医学治疗	受医学治疗
数为至少四个人	数位至少四个人
小子化	少子化
间直	简直
每办法	没办法
青跨	情况
最总	最终
僱佣	僱用
渐渐的形成	渐渐地形成
渐渐的增加	渐渐地增加
不断的改变	不断地改变
不断的提高	不断地提高
莫些年	某些年
成工	成功
线然	显然
一长争段	一长争短
在加上	再加上
一后	以后
自起	自己
年轻得夫妇	年轻的夫妇
真的事对	真的是对
养小还	养小孩
不彷	不妨
疏缓	舒缓
上叙	上述
或着市	或者是
学习地意愿	学习的意愿
按部就班	按步就班
全面的规划	全面地规划
纾缓	舒缓
年少气胜	年少气盛
新家坡	新加坡
变的很轻松	变得很轻松
我门	我们
月来越好	越来越好
发生国	发生过
以开发国家	已开发国家
以开发	已开发
赛翁失马	塞翁失马
非常的宝贵	非常地宝贵
列如	例如
非常的珍贵	非常地珍贵
二零五龄年	二零五零年
証明	证明
一间一间的关必	一间一间地关闭
缠能收割	才能收割
朗朗称道	琅琅称道
缠能拯救	才能拯救
付其责	负其责
还而	孩儿
自然而然的	自然而然地
现再	现在
一间一间的在建造	一间一间地在建造
惰胎	堕胎
给与	给予
少字化	少子化
狂大	广大
至到	直到
作些政策	做些政策
想望	向往
低免	抵免
展生	产生
源料	原料
好好的准备	好好地准备
庄况	状况
因发	引发
自记	自己
桃论	讨论
负单	负担
而以	而已
政符	政府
邀情	邀请
郑加	增加
全心全力的	全心全力地
进一份力	尽一份力
不断的努力	不断地努力
厉害关系	利害关系
相像	现象
题及	提及
剥歇	剥削
下起	下去
了所学校	各所学校
很好得一份工作	很好的一份工作
来的这么快	来得这么快
言前	眼前
所担任的是有限	所担任的事有限
放面	方面
办发	办法
有趣的说到	有趣地说到
坚帧不移	坚贞不移
心力绞碎	心力交瘁
开发看经济	开发和经济
有效的规划	有效地规划
流守	留守
生济	生计
带欠缺	但欠缺
堆动	推动
表达的是	表达的事
所以问题	所有问题
由其固定的速度	有其固定的速度
乐与减少	乐于减少
变的困难	变得困难
多馀	多于
长远的看	长远地看
血浓与水	血浓于水
表面的看	表面地看
推称出新	推陈出新
脱离萍	脱离贫
时间一天一天的过去	时间一天一天地过去
提昌	提倡
续多	许多
现进	先进
销费率	消费率
直接的被感受	直接地被感受
陪养	培养
主在乡下	住在乡下
弄多的很快	弄多得很快
他门	他们
持续的下降	持续地下降
相对的减少	相对地减少
相对的提高	相对地提高
适当的生育	适当地生育
成功的制定	成功地制定
青少年门	青少年们
慎重的希望	慎重地希望
娇傲	骄傲
亲爱的少年门	亲爱的少年们
少子华	少子化
生上	身上
不断的鼓励	不断地鼓励
具造	制造
单初	当初
发掌	发展
大幅度的提升	大幅度地提升
负担地因素	负担的因素
亘多医疗费用	更多医疗费用
餵了	为了
照成	造成
教育基精	教育基金
装造性	创造性
观察关	观察官
怎么形成地	怎么形成的
比年长者来的多	比年长者来得多
不单	不但
变的更良好	变得更良好
变的更大	变得更大
壤大众理解	让大众理解
上免得分析	上面的分析
已经再发生了	已经在发生了
带来得危机	带来的危机
恶讯循环	恶性循环
他得生活费	他的生活费
以开发地区	已开发地区
长其	长期
经额	金额
在过几十年	再过几十年
作为探讨	做为探讨
系根据	是根据
位于	为于
份公司	分公司
要作塑胶袋	要做塑胶袋
还不过	还不够
逃战	挑战
再服务业	在服务业
招莫	招募
坐者	坐着
仔细的观察	仔细地观察
三个凑皮匠	三个臭皮匠
个中各样	各种各样
不啻	不是
即史	即使
岂有此里	岂有此理
组宠物的人	租宠物的人
浅溥	浅薄
处里	处理
他们得不负责任的态度	他们的不负责任的态度
出租得事	出租的事
这样以来	这样一来
题高	提高
厌力	压力
事情阿	事情啊
又意思	有意思
建慷	健康
真恭喜妳阿	真恭喜妳啊
抱见阿	抱歉啊
幸苦	辛苦
参家	参加
回来台湾	会来台湾
生体	身体
总视	总是
一把很大的化	一把很大的花
出你	祝你
高心	高兴
应为	因为
洗碗	希望
那理	那里
根她坐公共汽车	跟她坐公共汽车
公克	功课
根林美美去	跟林美美去
一前	以前
傍边	旁边
总明	聪明
最难得课	最难的课
在马上去	再马上去
建在	现在
事等我女朋友的	是等我女朋友的
客爱	可爱
象片	相片
也雪	也许
录的	绿的
纲来	刚来
让后	然后
真得是坏人	真的是坏人
提以	提议
大的家	他的家
从是	总是
有处	有趣
课目	科目
看得是美国电影	看的是美国电影
兴奋得	兴奋地
控的时间	空的时间
内容懂不清楚	内容都不清楚
谊起	一起
打公车	搭公车
近去	进去
林美妹	林美美
唷意思	有意思
蔘加	参加
再开一个好把	再开一个好吧
兴起不好	心情不好
庆祝地会	庆祝的会
漂漂亮亮地风景	漂漂亮亮的风景
再庆祝一次把	再庆祝一次吧
找的时间	找个时间
间妈妈	见妈妈
无会	舞会
解我的朋友	请我的朋友
日字	日子
但然	当然
障爱文	张爱文
已定	一定
澳是	要是
长爱文	张爱文
理物	礼物
延后	然后
合问她	和问她
恨高兴	很高兴
一让	一样
帮别的学生的芒	帮别的学生的忙
沾着	站着
怒朋友	女朋友
下明	小明
老师文学生	老师问学生
他清她吃饭	他请她吃饭
还没作	还没做
座公车	坐公车
打篹	打算
沾起来	站起来
再起床呢	在起床呢
再黑板写的东西	在黑板写的东西
不输服	不舒服
很哩	很累
警张	紧张
劳天	聊天
很来	很累
多要	都要
奴朋友	女朋友
高兴的不得了	高兴得不得了
里学校很近	离学校很近
音龠	音乐
合唱父系语法	和常复习语法
税觉	睡觉
觉得贰	觉得饿
对不气	对不起
情你去吃饭	请你去吃饭
不局道	不知道
你好马	你好吗
岑么样	怎么样
可以马	可以吗
我想情你	我想请你
火者站	火车站
聊体	聊天
最前的子	最前的字
座捷运	坐捷运
哪哩	哪里
十只路口	十字路口
不智道	不知道
怎门办	怎么办
点视机	电视机
已后	以后
妳会时后	妳回时候
漂凉	漂亮
票凉	漂亮
又漂亮的衣服	有漂亮的衣服
清卖给我	请买给我
红虹	红红
待你的国家	到你的国家
太样兵	太阳饼
不拘导	不知道
不咀导	不知道
这哩	这里
成市	城市
发山	爬山
风镜	风景
他杏林	他姓林
痕累	很累
方假	放假
风京	风景
座火车	坐火车
他的华	他的话
户然	忽然
理面	里面
做回去	坐回去
出们	出门
愿赖	原来
吧手	把手
厅路上	听路上
定车	停车
高现	高兴
排照	拍照
他的婉	他的腕
原意	愿意
没人琣他	没人陪他
歹去	带去
自即	自己
开是	开始
和一点饮料	喝一点饮料
录行	旅行
谈着谈者	谈着谈着
漂亮的术	漂亮的树
戴我	载我
作天	昨天
很日	很热
照篇	照片
我借你们	我接你们
握着我的朋友	我找我的朋友
台配种战	台北总站
来我的家完	来我的家玩
营为	因为
她门	她们
些日会	生日会
右遍	右边
开时	开始
做八八的公车	坐八八的公车
对们	对门
学玩了	学完了
票漂亮亮	漂漂亮亮
喜暗	喜欢
括大风	刮大风
批具	啤酒
骠了	漂亮
瞅天	秋天
难德	南德
不太施	不太湿
冬天件	冬天间
愁天	秋天
塞太阳	晒太阳
跟美	很美
单让	当然
感在	改在
有孔	有空
吃葚么	吃什么
西欢	喜欢
泰泰	太太
词反	吃饭
其末考	期末考
台湾采	台湾菜
他们的采	他们的菜
设么	什么
共喜	恭喜
日本蔡	日本菜
餐订	餐厅
公课	功课
大们口	大门口
有没有控	有没有空
刘信	留信
六根礼拜日	六跟礼拜日
永敢	勇敢
辛亏	幸亏
估停站	古亭站
异子	椅子
胡涂	糊涂
真巧呕	真巧喔
奴生	女生
好朋有	好朋友
莫斯汉堡	摩斯汉堡
不可望	不可忘
中文壳	中文课
棒他	帮他
怎嚜走	怎么走
吃凉	吃惊
冈来	刚来
勇敢得	勇敢地
风友	朋友
走天	昨天
湖涂	糊涂
怎嚜	怎么
真得	值得
哪理	哪里
票亮	漂亮
又高又受	又高又瘦
化了半个小时	花了半个小时
杆快	赶快
学习的很愉快	学习得很愉快
怎吗	怎么
十子路口	十字路口
开方	开放
息缓	喜欢
交相机	照相机
请办我	请帮我
爬上	爬山
录色	绿色
各我妈妈	给我妈妈
交马上停车	叫马上停车
米了	迷了
交到了	就到了
号吗	号码
很葬	很脏
换给	还给
清流	请留
名子	名字
把输给弄丢	把书给弄丢
播爱	博爱
大搂	大楼
逃论室	讨论室
坻里	地理
奇给我	寄给我
座飞机	坐飞机
再机场	在机场
便很帅	变很帅
腾她	等她
路行	旅行
梅天	每天
她九打电话	她就打电话
美邦法	没办法
不好以时	不好意思
有疑点藤	有一点疼
里拜四	礼拜四
李佬时	李老师
工课	功课
身提	身体
可使	可是
靠事	考试
重要得东西	重要的东西
寮解	了解
情你	请你
而截	而且
昭显机	照相机
够猫	狗猫
最有名的采	最有名的菜
波澜	波兰
西办加话	西班牙话
东四	东西
台弯	台湾
学子	学字
银董	运动
而接	而且
有控	有空
陆行	旅行
忍识	认识
式式看	试试看
拨兰	波兰
一支狗	一只狗
管哩	管理
将学金	奖学金
再波兰	在波兰
下了可	下了课
回越南得时候	回越南的时候
台湾得风景	台湾的风景
朋有	朋友
气个月	七个月
从德国莱	从德国来
文花	文化
吃部下	吃不下
清假	请假
烤试	考试
血绿	血缘
我票告诉您	我要告诉您
候天	后天
惜来看	借来看
上果	上课
吗吗	妈妈
师什么公车	是什么公车
岗来	刚来
挑了	跳了
座７１６号的公车	坐７１６号的公车
三十分锺	三十分钟
七点锺	七点钟
公车佔	公车站
邓的时候	等的时候
依服	衣服
睡过偷	睡过头
佑对	有对
乎很大的吸	呼很大的吸
一整天水乐	一整天睡了
等以等	等一等
只有妳	只有你
可况	何况
学姣	学校
跳无	跳舞
恨情张	很紧张
恨票亮	很漂亮
俩个	两个
悾怕	恐怕
北老师骂他	怕老师骂他
十五分种	十五分钟
必较多	比较多
睡过头把	睡过头吧
我要座得巴士	我要坐的巴士
来不起	来不及
才陪吵醒	才被吵醒
撘公车	搭公车
眼靓	眼睛
台我无聊	太过无聊
大明哏他朋友	大明跟他朋友
跳五	跳舞
３０份钟	３０分钟
座呢个公车	坐那个公车
热恼	热闹
手摽	手表
苏嘉	暑假
不好亿思	不好意思
怎么杨	怎么样
妳怎么了	你怎么了
没有控	没有空
根冒	感冒
卧是李	我是李
即然	既然
台杯	台北
里拜	礼拜
希冠	希望
开兴	开心
烤师	考试
热心的不得了	热心得不得了
结昏	结婚
边请卡	邀请卡
注在	住在
密鲁	秘鲁
过的很开心	过得很开心
寰麟	婚礼
我回想妳们	我会想你们
绍待卡	招待卡
妳们	你们
幸福快乐的过	幸福快乐地过
公司排我	公司派我
常不一样	穿不一样
做二九七的公车	坐二九七的公车
几希	继续
今天得计画	今天的计画
开心得	开心地
钟就	终究
共车	公车
兔然	突然
林雨	淋雨
做公车	坐公车
好好地经验	好好的经验
涂然	突然
讨润以下	讨论一下
去万	去玩
高兴得	高兴地
考食物	烤食物
在考	在烤
美里	美丽
交他的朋友	教他的朋友
休葸	休息
拟越来越漂亮	妳越来越漂亮
对阿	对啊
一扁吃	一边吃
高高兴兴的	高高兴兴地
修系	休息
搜已	所以
早藏	早上
玩的横高兴	玩得很高兴
做神马	做什么
努生	女生
高行	高兴
一点鹅	一点饿
点蔡	点菜
交弮	交卷
贵定	规定
公作	工作
炽爱	挚爱
系望	希望
你道德地旁	你到的地方
除去完	出去玩
里拜天	礼拜天
暱友打算	你有打算
新家波	新加坡
考利以下	考虑一下
一起去把	一起去吧
寄怪	奇怪
休息一点	休息一天
我要会我的国家	我要回我的国家
四班牙	西班牙
完五月	玩五月
踏青我们	他请我们
是天两夜	四天两夜
忆起去	一起去
好久没间	好久没见
放便	方便
玩着玩者	玩着玩着
裁板	裁判
清等我	请等我
座一下	坐一下
请座	请坐
板球	棒球
测天	这天
馔衣服	穿衣服
情坐	请坐
经张	紧张
辕动比赛	运动比赛
埤酒	啤酒
看般球	看棒球
餲得不得了	渴得不得了
絣干	饼干
形像代言人 形象代言人
化夏子孙 华夏子孙
华为泡影 化为泡影
秘密歪斜 秘密外泄
计画 计划
忠于等到了 终于等到了
未日来临 末日来临
眼晴 眼睛
游刀有余 游刃有余
唐僧帅徒 唐僧师徒
太概 大概
一揽子货币 一篮子货币
举足无措 手足无措
凭藉 凭借
令人發指 令人髮指
绅仕 绅士
粘豆包 黏豆包
磬竹难书 罄竹难书
严惩不怠 严惩不贷
戮力同心 勠力同心
罚角球 发角球
综合症 综合征
单独二胎 单独二孩
蛰人 蜇人
泄秘 泄密
伏法 服法
羊羯子 羊蝎子
泻湖 潟湖
家俱 家具
精萃 精粹
兴亡周期率 兴亡周期律
震憾 震撼
中华人名共和国 中华人民共和国
大人常委会 人大常委会
中国共产常 中国共产党
科学发展现 科学发展观
扶贪 扶贫
严谨公款吃喝 严禁公款吃喝
按纳	按捺
案语	按语
百废具兴	百废俱兴
百页窗	百叶窗
班白	斑白
颁白	斑白
班驳	斑驳
胞子	孢子
保镳	保镖
保母	保姆
褓姆	保姆
辈份	辈分
本份	本分
笔划	笔画
必恭必敬	毕恭毕敬
编者案	编者按
萹豆	扁豆
稨豆	扁豆
藊豆	扁豆
标识	标志
鬓脚	鬓角
禀承	秉承
补靪	补丁
补钉	补丁
参预	参与
惨澹	惨淡
差迟	差池
搀和	掺和
搀假	掺假
搀杂	掺杂
刬除	铲除
倘佯	徜徉
车箱	车厢
澈底	彻底
沈思	沉思
趁心	称心
成份	成分
澄彻	澄澈
侈糜	侈靡
筹画	筹划
筹马	筹码
踌蹰	踌躇
出谋画策	出谋划策
喘嘘嘘	喘吁吁
磁器	瓷器
赐与	赐予
粗卤	粗鲁
搭当	搭档
搭挡	搭档
搭赸	搭讪
答讪	搭讪
答覆	答复
带孝	戴孝
耽心	担心
耽忧	担忧
担搁	耽搁
澹泊	淡泊
澹然	淡然
倒楣	倒霉
低徊	低回
雕敝	凋敝
雕弊	凋敝
雕零	凋零
雕落	凋落
雕谢	凋谢
跌荡	跌宕
跌交	跌跤
蹀血	喋血
丁宁	叮咛
定单	订单
定户	订户
定婚	订婚
定货	订货
定阅	订阅
枓拱	斗拱
枓栱	斗拱
逗遛	逗留
斗趣儿	逗趣儿
独脚戏	独角戏
端五	端午
二簧	二黄
贰心	二心
发人深醒	发人深省
蕃衍	繁衍
分付	吩咐
份量	分量
份内	分内
份外	分外
忿忿	愤愤
丰富多采	丰富多彩
疯瘫	风瘫
疯颠	疯癫
疯疯颠颠	疯疯癫癫
锋铓	锋芒
伏侍	服侍
服事	服侍
伏输	服输
伏罪	服罪
负嵎顽抗	负隅顽抗
傅会	附会
覆信	复信
复辙	覆辙
干与	干预
告戒	告诫
梗直	耿直
鲠直	耿直
恭惟	恭维
勾划	勾画
勾联	勾连
孤苦零丁	孤苦伶仃
孤负	辜负
骨董	古董
股分	股份
骨瘦如豺	骨瘦如柴
关连	关联
光采	光彩
归根结柢	归根结底
规戒	规诫
鬼哭狼嗥	鬼哭狼嚎
过份	过分
虾蟆	蛤蟆
含胡	含糊
涵蓄	含蓄
寒伧	寒碜
喝采	喝彩
喝倒采	喝倒彩
哄动	轰动
宏扬	弘扬
红通通	红彤彤
弘论	宏论
弘图	宏图
鸿图	宏图
弘愿	宏愿
弘旨	宏旨
鸿福	洪福
胡臭	狐臭
胡蝶	蝴蝶
胡涂	糊涂
虎魄	琥珀
花着	花招
豁拳	划拳
搳拳	划拳
恍忽	恍惚
晖映	辉映
混水摸鱼	浑水摸鱼
火伴	伙伴
机伶	机灵
激忿	激愤
计画	计划
记念	纪念
寄与	寄予
茄克	夹克
佳宾	嘉宾
驾御	驾驭
架式	架势
嫁装	嫁妆
简炼	简练
骄奢淫佚	骄奢淫逸
脚门	角门
狡滑	狡猾
脚根	脚跟
叫化子	叫花子
精采	精彩
鸠合	纠合
鸠集	纠集
脚色	角色
刻期	克期
刻日	克日
刻划	刻画
阔老	阔佬
蓝缕	褴褛
烂缦	烂漫
烂熳	烂漫
狼籍	狼藉
狼头	榔头
累坠	累赘
黎黑	黧黑
联贯	连贯
联接	连接
联绵	连绵
联缀	连缀
连袂	联袂
连翩	联翩
踉蹡	踉跄
嘹喨	嘹亮
撩乱	缭乱
零丁	伶仃
囹圉	囹圄
蹓跶	溜达
留连	流连
喽罗	喽啰
卤莽	鲁莽
录象	录像
录相	录像
落腮胡子	络腮胡子
落漠	落寞
落莫	落寞
痲痹	麻痹
痲风	麻风
痲疹	麻疹
蚂蜂	马蜂
马糊	马虎
门坎	门槛
糜费	靡费
绵联	绵连
摹仿	模仿
模胡	模糊
摹拟	模拟
模写	摹写
磨擦	摩擦
磨拳擦掌	摩拳擦掌
魔难	磨难
眽眽	脉脉
谋画	谋划
那末	那么
内哄	内讧
凝炼	凝练
牛崽裤	牛仔裤
钮扣	纽扣
掱手	扒手
蟠根错节	盘根错节
盘据	盘踞
蟠踞	盘踞
蟠据	盘踞
蟠曲	盘曲
盘陁	盘陀
盘石	磐石
蟠石	磐石
盘跚	蹒跚
旁皇	彷徨
披星带月	披星戴月
疲塌	疲沓
飘泊	漂泊
飘流	漂流
漂零	飘零
飘飖	飘摇
平空	凭空
牵联	牵连
蕉萃	憔悴
清彻	清澈
情素	情愫
惓惓	拳拳
劝戒	劝诫
热呼呼	热乎乎
热呼	热乎
热中	热衷
人材	人才
日蚀	日食
入坐	入座
色采	色彩
杀一警百	杀一儆百
沙鱼	鲨鱼
山查	山楂
舢舨	舢板
梢公	艄公
奢糜	奢靡
伸雪	申雪
神彩	神采
湿渌渌	湿漉漉
十锦	什锦
收伏	收服
首坐	首座
书柬	书简
思惟	思维
死心踏地	死心塌地
塌实	踏实
菾菜	甜菜
挺而走险	铤而走险
透澈	透彻
图象	图像
推委	推诿
玩艺儿	玩意儿
委过	诿过
污七八糟	乌七八糟
无动于中	无动于衷
无宁	毋宁
无庸	毋庸
五采缤纷	五彩缤纷
五痨七伤	五劳七伤
瘜肉	息肉
希罕	稀罕
希奇	稀奇
希少	稀少
希世	稀世
希有	稀有
噏动	翕动
洗炼	洗练
贤慧	贤惠
香纯	香醇
香菰	香菇
像貌	相貌
萧洒	潇洒
小题大作	小题大做
卸傤	卸载
信口开合	信口开河
惺松	惺忪
秀外惠中	秀外慧中
叙文	序文
叙言	序言
训戒	训诫
压伏	压服
压韵	押韵
雅片	鸦片
洋琴	扬琴
要末	要么
夜消	夜宵
一槌定音	一锤定音
一古脑儿	一股脑儿
衣衿	衣襟
衣著	衣着
义无返顾	义无反顾
霪雨	淫雨
赢余	盈余
影象	影像
余辉	余晖
鱼具	渔具
鱼网	渔网
预会	与会
预闻	与闻
御手	驭手
豫备	预备
元来	原来
元煤	原煤
源源本本	原原本本
元元本本	原原本本
原故	缘故
原由	缘由
月蚀	月食
月芽	月牙
云豆	芸豆
杂遝	杂沓
再接再砺	再接再厉
斩新	崭新
展转	辗转
颤栗	战栗
帐本	账本
折衷	折中
这末	这么
正经八摆	正经八百
脂麻	芝麻
支解	肢解
枝解	肢解
直捷了当	直截了当
直接了当	直截了当
指手划脚	指手画脚
赒济	周济
转游	转悠
装璜	装潢
姿式	姿势
子细	仔细
自各儿	自个儿
左证	佐证
安份守己	安分守己
暗度陈仓	暗渡陈仓
把势	把式
班配	般配
棒锤	棒槌
棒棰	棒槌
暴光	曝光
报导	报道
悲忿	悲愤
背理	悖理
比画	比划
笔心	笔芯
荜路蓝缕	筚路蓝缕
辨白	辩白
辩辞	辩词
波浪鼓	拨浪鼓
泼浪鼓	拨浪鼓
部份	部分
菜子	菜籽
仓惶	仓皇
仓黄	仓皇
仓遑	仓皇
策画	策划
常年累月	长年累月
唱工	唱功
潮呼呼	潮乎乎
潮忽忽	潮乎乎
撤消	撤销
承上起下	承上启下
吃里扒外	吃里爬外
踟躇	踟蹰
串连	串联
辞汇	词汇
词令	辞令
搭拉	耷拉
答理	搭理
哒哒	嗒嗒
搭裢	褡裢
搭连	褡裢
褡连	褡裢
褡联	褡裢
打冷颤	打冷战
大放厥辞	大放厥词
铛铛	当当
当做	当作
捣腾	倒腾
悼辞	悼词
得意扬扬	得意洋洋
灯心	灯芯
滴里嘟噜	嘀里嘟噜
掉包	调包
钉梢	盯梢
丢三拉四	丢三落四
掉换	调换
东不拉	冬不拉
遁辞	遁词
哆唆	哆嗦
峨嵋山	峨眉山
发楞	发愣
翻然醒悟	幡然醒悟
反覆	反复
忿恨	愤恨
忿怒	愤怒
夫倡妇随	夫唱妇随
浮图	浮屠
辐凑	辐辏
福份	福分
俯首贴耳	俯首帖耳
赋与	赋予
夹肢窝	胳肢窝
格登	咯噔
根柢	根底
梗咽	哽咽
宫庭	宫廷
钩勒	勾勒
勾针	钩针
够戗	够呛
孤另另	孤零零
孤伶伶	孤零零
轱轳	轱辘
毂辘	轱辘
固步自封	故步自封
故技	故伎
锢疾	痼疾
固疾	痼疾
刮刮叫	呱呱叫
呵腰	哈腰
寒颤	寒战
嚎啕	号啕
号咷	号啕
嚎咷	号啕
好高务远	好高骛远
和事老	和事佬
贺辞	贺词
黑鼓隆咚	黑咕隆咚
黑古龙冬	黑咕隆咚
黑鸦鸦	黑压压
轰堂大笑	哄堂大笑
轰笑	哄笑
宏亮	洪亮
呼嗤	呼哧
呼蚩	呼哧
呼吃	呼哧
花狸狐哨	花里胡哨
花梢	花哨
花稍	花哨
花消	花销
黄历	皇历
混身	浑身
浑沌	混沌
辑逸	辑佚
给与	给予
记录片	纪录片
记要	纪要
趼子	茧子
交待	交代
脚鸭子	脚丫子
脚指	脚趾
叫真	较真
菁华	精华
警省	警醒
酒钟	酒盅
倔犟	倔强
开消	开销
砍大山	侃大山
看做	看作
夸大其辞	夸大其词
宽洪大量	宽宏大量
老趼	老茧
乐和和	乐呵呵
乐孜孜	乐滋滋
利害 厉害
伶牙利齿	伶牙俐齿
流言飞语	流言蜚语
蹓弯儿	遛弯儿
乱烘烘	乱哄哄
罗纹	螺纹
慢道	漫道
慢说	漫说
毛骨耸然	毛骨悚然
毛骨竦然	毛骨悚然
冒然	贸然
棉子	棉籽
藐小	渺小
渺视	藐视
渺远	邈远
溟溟	冥冥
摸棱两可	模棱两可
秣马利兵	秣马厉兵
秣马砺兵	秣马厉兵
木犀	木樨
闹轰轰	闹哄哄
闹烘烘	闹哄哄
粘稠	黏稠
粘糊	黏糊
粘土	黏土
粘性	黏性
粘液	黏液
念道	念叨
暖呼呼	暖乎乎
扒犁	爬犁
判辞	判词
皮簧	皮黄
慓悍	剽悍
飘渺	缥缈
漂渺	缥缈
飘眇	缥缈
飘邈	缥缈
凭白无故	平白无故
匍伏	匍匐
起程	启程
启锚	起锚
起迄	起讫
气门心	气门芯
牵就	迁就
遣辞	遣词
枪枝	枪支
情份	情分
屈伏	屈服
取销	取消
雀瘢	雀斑
热剌剌	热辣辣
如雷灌耳	如雷贯耳
散逸	散佚
沙锅	砂锅
沙壶	砂壶
沙浆	砂浆
沙糖	砂糖
杀风景	煞风景
杀尾	煞尾
刹时	霎时
山颠	山巅
扇风点火	煽风点火
闪烁其辞	闪烁其词
上方宝剑	尚方宝剑
深醒	深省
甚么	什么
神甫	神父
省分	省份
拾遗补阙	拾遗补缺
士女画	仕女画
视阈	视域
誓辞	誓词
授与	授予
摔交	摔跤
水份	水分
水长船高	水涨船高
思辩	思辨
死气白赖	死乞白赖
宿愿	夙愿
夙来	素来
夙敌	宿敌
夙儒	宿儒
夙怨	宿怨
梯己 体己
题辞	题词
俶傥	倜傥
瞳人	瞳仁
产玲	产龄
退色	褪色
托咐	托付
顽耍	玩耍
玩皮	顽皮
惟独	唯独
惟恐	唯恐
惟利是图	唯利是图
惟命是从	唯命是从
惟其	唯其
惟我独尊	唯我独尊
惟一	唯一
萎顿	委顿
委宛	委婉
委罪	诿罪
委靡	萎靡
委谢	萎谢
文彩	文采
无精打彩	无精打采
无尚	无上
欷歔	唏嘘
喜孜孜	喜滋滋
陷井	陷阱
项练	项链
销歇	消歇
消魂	销魂
兴高彩烈	兴高采烈
雄纠纠	雄赳赳
旋涡	漩涡
薰陶	熏陶
丫鬟	丫环
压宝	押宝
哑吧	哑巴
哑叭	哑巴
言不由中	言不由衷
一倡百和	一唱百和
一蹋糊涂	一塌糊涂
一榻糊涂	一塌糊涂
一相情愿	一厢情愿
引伸	引申
硬梆梆	硬邦邦
硬帮帮	硬邦邦
渔汛	鱼汛
鱼鼓	渔鼓
约莫	约摸
殒落	陨落
在坐	在座
糟踏	糟蹋
糟塌	糟蹋
张惶	张皇
照像	照相
珍羞	珍馐
真象	真相
枝梧	支吾
枝捂	支吾
装聋做哑	装聋作哑
妆束	装束
装做	装作
子畜	仔畜
子猪	仔猪
子粒	籽粒
子棉	籽棉
子实	籽实
走露	走漏
做弊	作弊
做美	作美
做弄	作弄
做声	作声
做秀	作秀
座落	坐落
坐次	座次
坐位	座位
旁证博引 旁征博引
谈笑风声 谈笑风生
美仑美幻 美轮美奂
坐阵 坐镇
不径而走 不胫而走
飘亮  漂亮
青纯  清纯
体晾 体谅
发杨广大 发扬光大
浪废水 浪费水
通货膨涨 通货膨胀
迫不急待 迫不及待
堵注  赌注

================================================
FILE: examples/other/ngram_lm/s0/data/text_correct.txt
================================================
少先队员因该为老人让坐
祛痘印可以吗？有效果吗？
不知这款牛奶口感怎样？ 小孩子喝行吗！
是转基因油?
我家宝宝13斤用多大码的
会起坨吗？
请问给送上楼吗？
亲是送赁上门吗
送货时候有外包装没有还是直接发货过来
会不会有坏的？
这个米煮粥好还煮饭好吃
有送的马克杯吗？
这纸尿裤分男孩女孩使用吗
买的路由器老是断网，拔了跳过路由器就可以用了
能泡开不？辣度几
请问这个米蒸出来是一粒一粒的还是一坨一坨的？
水和其他商品一样送货上门，还是自提呀？
快两个月的孩子 要穿什么码的
买回来会不会过期？
洗的还干净把吧
路由器怎么样啊，掉线严重吗？
你好这米是五斤还是十斤
收安费不
给送开果器吗
这纸好用吗？我看有不少的差评
自用好用吗
请问袜子穿久了会往下掉吗？
每一卷是独立包装的吗？
这个火龙果口味怎么样？甜不甜？
买这个送红杯吗？
一袋子多少斤
这款拉拉裤有味道吗？超市买的没有味道，不知道这个怎么样
我想问下拉拉裤上面那个贴的用来干嘛的，怎么用
这里边有没有枣核
玫瑰和薰衣草哪个好闻
这个冰糖质量怎么样，有杂质吗
倒水的时候漏吗
请问大家，这个水壶烧出来的水有异味吗？因为给宝宝用所以很在意，谢谢大家
这米煮出来糯吗？
这在款子好用吗？有香味吗？
到底是棉花的材质还是化纤的无纺布啊 求问？
我用360手机能充电几次
亲这纸好用吗？值得买吗？
24瓶？还是12瓶
是否是真的纸？
适用机洗吗?
好吃不好吃啊
真的好用吗？我也想买 
你们拿到是什么版本的
这水和超市一样吗？质量保证吗？
可以丢进马桶冲吗？
纸会不会粗？
这个翠的还不是不催的呀。。没有吃的那种不脆
这个好用吗
这纸有香味的吗？
是最近的生产日期吗
赠品是什么呀
这是两瓶还是一瓶的价格？
请问这是硬壳还是软壳？
亲，苹果收到后有坏的吗？
适合两人用吗
这个直接喝好不好喝   还是要热一下
纸有木有刺鼻气味？
酸不酸？？？
这啤好渴吗?
跟安慕希哪个比较好喝？
好用么，主要是带宝宝出去玩的时候用的多？
刚出生的宝宝用什么码？
能当洗手液吗？
是不是很小包的那一种？50块有24包便宜的有点不敢相信
好用吗，会不会起会不会起坨？
这个口可以直接放饮水机上用吗？
这种纸掉粉末吗
手机好用吗？会卡吗
开盖里面是拉环的吗？
这个电池真的需要一直换吗？
好用吗？是不是正品？
请问有尿显吗
容易发烫吗
苹果有腊吗
这油有这么好吗？不是过期的吧
这个夏天用会不会红屁股？透气性好吗
你好。 我想问下这个是尿不湿吗 ？
这奶为啥这么便宜？
你们买的酱油会没有颜色吗，像水一样，看着都没胃口
这个是机诜，还是手洗
这个卫生巾带香味吗？
这种洗发水好用吗
有餡嗎？好不好吃
纸质不会好差吗？
亲们，此米是真空包装吗？
是软毛的吗？！！
请问大家德运牌子的好喝还是安佳的？
这纸好用吗，薄嘛
这壶保温吗
这个威露士货到了就是跟图片上的一样吗？只要是图片上显示的都有吗？
你们买的牛奶是最近日期吗
这个除菌液，是单独放在滚筒洗衣机除菌液格，还是与洗衣液混合放在洗衣液格？
请问你们的三只松鼠寄回来的时候是用袋子装着的吗
1kg是不是两斤？
洗衣皂怎么样啊，味道重吗，用之后好不好清洗啊。
我要请问你这个是不是那个拉拉裤吗？这个花纹是不是拉拉裤？
好多人都说小米运动升级后手环就连不上了，你们有没有这种情况？
这部手机运行速度快不快？
新生儿可以用吗 抽一张会带出来很多张吗
洗后有香味吗
体验装有多少片
银装怎么样？会漏尿吗？你们都是多久换一次的？？（我家大概2-3个小时左右，宝宝醒一回换一次）
声音大吗？好用不？
抽纸有味吗
苹果好吃吗？打过蜡吗？是不是坏的很多？
70g和80g得区别是啥？
袋装的和瓶装的洗衣液是一样的么？
噪音很大吗
烧出来的水会不会很多一块一块的东西
这个吹风真心好用吗？我今晚下单什么时候到
请问各位宝妈 这个乳垫的背胶粘吗
M号的你们给宝宝用到多大啊？几个月？我家宝宝3个月5㎏重，用花王的M号觉得小了。不知道这个怎么样？
这个喝了能找到女朋友吗
这袜子耐不耐穿
请问好用么  是正品么
怎么储藏 我买了两天在常温阴凉处放着下层有些化了 需要放冰箱冷冻吗
这批苏打水是否有股消毒水的味道？
质量怎么样，看到那么多差评，我不敢买了。
会不会有烂的
为什么我买的用完之后没香味
甜吗？？？？
我看到评论里的差评说大米里有虫，是真的吗？
要放冰箱冷藏吗
好不好吃啊
这油怎么样   炒菜香不香
这纸擦手时有屑吗？
是正品的吗？
好用吗
这个特浓的苦不苦
这个好用吗？
米里真的有虫吗
是金装的吗？
双内胆有什么区别，两个一样的吗？
请问这款水可以降尿酸吗？
好用吗这个
购物袋结实吗，能放重东西吗
你好，请问这款可以剃头发刮光头吗
这个纸巾质量如何？好用吗？
好用吗？小孩子喜欢吗？
亲。煮面时会糊锅不
包邮吗运费多少
会一抽就两三张一起抽起来吗？
一箱几桶油呀
这个吹风机分冷风和热风吗
发什么快递呢
请问一下，有些枸杞说是不要洗，你们的是否建议洗呢？
请问纸有异味吗？我以前买过一箱就是这个居然有异味。
这是6个么  怎么觉得有好多
我买的荣耀10横滑home键进入后台这个操作成功率特别低，你们也是这样吗？
你们的有塑料味吗，机械的
小米路由器真心说的有这么差吗
请问大家这款刮的干净吗？谢谢
会有塑料味吗
质量真的很差吗？不敢买
这纸有气味吗
我买两箱怎么要运费
这个标准果好吃吗，酸不酸
稀吗？是不是有种兑了水的感觉？
威露士和滴露的消毒液哪个更好用呢？
曰期是几月份的
手机容易折弯吗？
我家宝宝25斤XL会紧吗？
这款200克一箱的纸张和10卷手提的价格相差那么多 质量一样吗？
豆浆可以打吗
电量有百分比吗
用快递送过来瓶子会不会打破
是三相电吗，有空调摇控器吧
拿它送人，有问题吗？？
安幕希好喝吗？
这款纸尿裤好用吗？和尤妮佳比较哪个好用些？
2层厚吗？是不是一到水就烂了
为什么我宝宝拉粑粑后面总是漏出来我已经贴的很牢了，10斤的宝宝用S号也不小啊你们用了没这种情况吗？
这个产品好用吗？
刷毛柔软度咋样，这么便宜，会不会是很小个的
会不会有过敏的情况呀
请问是辣条吗
这种米只能煮粥不能煮饭吗
可以开袋即食吗？
这米好吃吗？
这个充电宝充满电需要多久
这个奶开了可以保质喝两天吗
这种薰衣草的洗衣液怎么样
你们的小米六边框掉漆了吗？？？
这个是机洗用还是手洗用的啊
厚度怎么样、起球吗感谢大哥大姐们
这个好喝还是康师傅红茶好喝
这种洁面膏会不会过敏，我上次用的火山岩冰感洁面啫喱对那种过敏，但听别人说那种稀的本来就特别容易过敏，不知道这种洁面膏会不会过敏！
这杯那么多差评，是真的吗，吓得我都不敢买了
枣是免洗的吗？
这个尿不湿尿过会起坨吗
感觉和苏菲比哪个更好用呢？
煮出来的饭香吗？
你好！请问这个水壶烧水开了是自动切电吗？
这个跟 原木纯品 那个啥区别？不是原木纸浆做的？
能放冰箱吗
纸有味道吗？
2016全国高考卷答题模板
2016全国大考卷答题模板
2016全国低考卷答题模板
床前明月光，疑是地上霜
床前星星光，疑是地上霜
床前白月光，疑是地上霜
落霞与孤鹜齐飞，秋水共长天一色
落霞与孤鹜齐跑，秋水共长天一色
落霞与孤鹜双飞，秋水共长天一色
众里寻他千百度，蓦然回首，那人却在，灯火阑珊处
众里寻她千百度，蓦然回首，那人却在，灯火阑珊处
众里寻ta千百度，蓦然回首，那人却在，灯火阑珊处
吸烟的人容*得癌症
就只听着我*妈所说的话，
就接受环境污*用化肥和农药，
是或者接受环境污染用化肥和农药，
现在的香港比从前的*荣很多。
现在的香港比*前的饭荣很多。


================================================
FILE: examples/other/ngram_lm/s0/local/build_zh_lm.sh
================================================
#!/bin/bash
set -e

stage=0
stop_stage=100

order=5
mem=80%
prune=0
a=22
q=8
b=8

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

if [ $# != 3 ]; then
    echo "$0 token_type exp/text exp/text.arpa"
    echo $@
    exit 1
fi

# char or word
type=$1
text=$2
arpa=$3

if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then
    # text tn & wordseg preprocess
    echo "process text."
    python3 ${MAIN_ROOT}/utils/zh_tn.py --token_type ${type} ${text} ${text}.${type}.tn
fi

if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then
    # train ngram lm
    echo "build lm."
    bash ${MAIN_ROOT}/utils/ngram_train.sh --order ${order} --mem ${mem} --prune "${prune}" ${text}.${type}.tn ${arpa}
fi

================================================
FILE: examples/other/ngram_lm/s0/local/download_lm_zh.sh
================================================
#! /usr/bin/env bash

. ${MAIN_ROOT}/utils/utility.sh

DIR=data/lm
mkdir -p ${DIR}

URL='https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
MD5="29e02312deb2e59b3c8686c7966d4fe3"
TARGET=${DIR}/zh_giga.no_cna_cmn.prune01244.klm


if [ -e $TARGET ];then
    echo "already have lm"
    exit 0;
fi

echo "Download language model ..."
download $URL $MD5 $TARGET
if [ $? -ne 0 ]; then
    echo "Fail to download the language model!"
    exit 1
fi


exit 0


================================================
FILE: examples/other/ngram_lm/s0/local/kenlm_score_test.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import time

import jieba
import kenlm

language_model_path = sys.argv[1]
assert os.path.exists(language_model_path)

start = time.time()
model = kenlm.Model(language_model_path)
print(f"load kenLM cost: {time.time() - start}s")

sentence = '盘点不怕被税的海淘网站❗️海淘向来便宜又保真！'
sentence_char_split = ' '.join(list(sentence))
sentence_word_split = ' '.join(jieba.lcut(sentence))


def test_score():
    print('Loaded language model: %s' % language_model_path)

    print(sentence)
    print(model.score(sentence))
    print(list(model.full_scores(sentence)))
    for i, v in enumerate(model.full_scores(sentence)):
        print(i, v)

    print(sentence_char_split)
    print(model.score(sentence_char_split))
    print(list(model.full_scores(sentence_char_split)))
    split_size = 0
    for i, v in enumerate(model.full_scores(sentence_char_split)):
        print(i, v)
        split_size += 1
    assert split_size == len(
        sentence_char_split.split()) + 1, "error split size."

    print(sentence_word_split)
    print(model.score(sentence_word_split))
    print(list(model.full_scores(sentence_word_split)))
    for i, v in enumerate(model.full_scores(sentence_word_split)):
        print(i, v)


def test_full_scores_chars():
    print('Loaded language model: %s' % language_model_path)
    print(sentence_char_split)
    # Show scores and n-gram matches
    words = ['<s>'] + list(sentence) + ['</s>']
    for i, (prob, length,
            oov) in enumerate(model.full_scores(sentence_char_split)):
        print('{0} {1}: {2}'.format(prob, length, ' '.join(words[i + 2 - length:
                                                                 i + 2])))
        if oov:
            print('\t"{0}" is an OOV'.format(words[i + 1]))

    print("-" * 42)
    # Find out-of-vocabulary words
    oov = []
    for w in words:
        if w not in model:
            print('"{0}" is an OOV'.format(w))
            oov.append(w)
    assert oov == ["❗", "️", "！"], 'error oov'


def test_full_scores_words():
    print('Loaded language model: %s' % language_model_path)
    print(sentence_word_split)
    # Show scores and n-gram matches
    words = ['<s>'] + sentence_word_split.split() + ['</s>']
    for i, (prob, length,
            oov) in enumerate(model.full_scores(sentence_word_split)):
        print('{0} {1}: {2}'.format(prob, length, ' '.join(words[i + 2 - length:
                                                                 i + 2])))
        if oov:
            print('\t"{0}" is an OOV'.format(words[i + 1]))

    print("-" * 42)
    # Find out-of-vocabulary words
    oov = []
    for w in words:
        if w not in model:
            print('"{0}" is an OOV'.format(w))
            oov.append(w)
    # zh_giga.no_cna_cmn.prune01244.klm is chinese character LM 
    assert oov == ["盘点", "不怕", "网站", "❗", "️", "海淘", "向来", "便宜", "保真",
                   "！"], 'error oov'


def test_full_scores_chars_length():
    """test bos eos size"""
    print('Loaded language model: %s' % language_model_path)
    r = list(model.full_scores(sentence_char_split))
    n = list(model.full_scores(sentence_char_split, bos=False, eos=False))
    print(r)
    print(n)
    assert len(r) == len(n) + 1

    # bos=False, eos=False, input len == output len
    print(len(n), len(sentence_char_split.split()))
    assert len(n) == len(sentence_char_split.split())

    k = list(model.full_scores(sentence_char_split, bos=False, eos=True))
    print(k, len(k))


def test_ppl_sentence():
    """测试句子粒度的ppl得分"""
    sentence_char_split1 = ' '.join('先救挨饿的人，然后治疗病人。')
    sentence_char_split2 = ' '.join('先就挨饿的人，然后治疗病人。')
    n = model.perplexity(sentence_char_split1)
    print('1', n)
    n = model.perplexity(sentence_char_split2)
    print(n)

    part_char_split1 = ' '.join('先救挨饿的人')
    part_char_split2 = ' '.join('先就挨饿的人')
    n = model.perplexity(part_char_split1)
    print('2', n)
    n = model.perplexity(part_char_split2)
    print(n)

    part_char_split1 = '先救挨'
    part_char_split2 = '先就挨'
    n1 = model.perplexity(part_char_split1)
    print('3', n1)
    n2 = model.perplexity(part_char_split2)
    print(n2)
    assert n1 == n2

    part_char_split1 = '先 救 挨'
    part_char_split2 = '先 就 挨'
    n1 = model.perplexity(part_char_split1)
    print('4', n1)
    n2 = model.perplexity(part_char_split2)
    print(n2)

    part_char_split1 = '先 救 挨 饿 的 人'
    part_char_split2 = '先 就 挨 饿 的 人'
    n1 = model.perplexity(part_char_split1)
    print('5', n1)
    n2 = model.perplexity(part_char_split2)
    print(n2)

    part_char_split1 = '先 救 挨 饿 的 人 ，'
    part_char_split2 = '先 就 挨 饿 的 人 ，'
    n1 = model.perplexity(part_char_split1)
    print('6', n1)
    n2 = model.perplexity(part_char_split2)
    print(n2)

    part_char_split1 = '先 救 挨 饿 的 人 ， 然 后 治 疗 病 人'
    part_char_split2 = '先 就 挨 饿 的 人 ， 然 后 治 疗 病 人'
    n1 = model.perplexity(part_char_split1)
    print('7', n1)
    n2 = model.perplexity(part_char_split2)
    print(n2)

    part_char_split1 = '先 救 挨 饿 的 人 ， 然 后 治 疗 病 人 。'
    part_char_split2 = '先 就 挨 饿 的 人 ， 然 后 治 疗 病 人 。'
    n1 = model.perplexity(part_char_split1)
    print('8', n1)
    n2 = model.perplexity(part_char_split2)
    print(n2)


if __name__ == '__main__':
    test_score()
    test_full_scores_chars()
    test_full_scores_words()
    test_full_scores_chars_length()
    test_ppl_sentence()


================================================
FILE: examples/other/ngram_lm/s0/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=/usr/local/lib/:${LD_LIBRARY_PATH}


================================================
FILE: examples/other/ngram_lm/s0/requirements.txt
================================================
jieba>=0.39

================================================
FILE: examples/other/ngram_lm/s0/run.sh
================================================
#!/bin/bash
set -e
source path.sh

stage=0
stop_stage=100

source ${MAIN_ROOT}/utils/parse_options.sh || exit -1

python3 -c 'import kenlm;' || { echo "kenlm package not install!"; exit -1; }

if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then
    # case 1, test kenlm
    # download language model
    bash local/download_lm_zh.sh
    if [ $? -ne 0 ]; then
       exit 1
    fi

    # test kenlm `score` and `full_score`
    python local/kenlm_score_test.py data/lm/zh_giga.no_cna_cmn.prune01244.klm
fi

mkdir -p exp
cp data/text_correct.txt exp/text

if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then
    # case 2, chinese chararctor ngram lm build
    # output: xxx.arpa xxx.kenlm.bin
    input=exp/text
    token_type=char
    lang=zh
    order=5
    prune="0 1 2 4 4"
    a=22
    q=8
    b=8
    output=${input}_${lang}_${token_type}_o${order}_p${prune// /_}_a${a}_q${q}_b${b}.arpa
    echo "build ${token_type} lm."
    bash local/build_zh_lm.sh --order ${order} --prune "${prune}" --a ${a} --q ${a} --b ${b} ${token_type} ${input} ${output}
fi

if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then
    # case 2, chinese chararctor ngram lm build
    # output: xxx.arpa xxx.kenlm.bin
    input=exp/text
    token_type=word
    lang=zh
    order=3
    prune="0 0 0"
    a=22
    q=8
    b=8
    output=${input}_${lang}_${token_type}_o${order}_p${prune// /_}_a${a}_q${q}_b${b}.arpa
    echo "build ${token_type} lm."
    bash local/build_zh_lm.sh --order ${order} --prune "${prune}" --a ${a} --q ${a} --b ${b} ${token_type} ${input} ${output}
fi


================================================
FILE: examples/other/punctuation_restoration/README.md
================================================
# Punctation Restoration

Please using [PaddleSpeechTask](https://github.com/745165806/PaddleSpeechTask) to do this task.


================================================
FILE: examples/other/rhy/README.md
================================================
# Prosody Prediction with CSMSC and AISHELL-3

## Get Started
### Data Preprocessing
```bash
./run.sh --stage 0 --stop-stage 0
```
### Model Training
```bash
./run.sh --stage 1 --stop-stage 1
```
### Testing
```bash
./run.sh --stage 2 --stop-stage 2
```
### Prosody Prediction
```bash
./run.sh --stage 3 --stop-stage 3
```
## Pretrained Model
The pretrained model can be downloaded here:

[ernie-1.0_aishellcsmsc_ckpt_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/rhy_predict/ernie-1.0_aishellcsmsc_ckpt_1.3.0.zip)

And you should put it into `exp/${YOUREXP}/checkpoints` folder.

## Rhythm mapping
Four punctuation marks are used to denote the rhythm marks respectively:
|ryh_token|csmsc|aishll3|
|:---: |:---: |:---: |
|%|#1|%|
|`|#2||
|~|#3||
|$|#4|$|

## Prediction Results
|       |  #1  |  #2 |  #3  |  #4  |
|:-----:|:-----:|:-----:|:-----:|:-----:|  
|Precision  |0.90  |0.66  |0.91  |0.90|
|Recall     |0.92  |0.62  |0.83  |0.85|
|F1         |0.91  |0.64  |0.87  |0.87|


================================================
FILE: examples/other/rhy/conf/default.yaml
================================================
###########################################################
#                       DATA SETTING                      #
###########################################################
dataset_type: Ernie
train_path: data/train.txt
dev_path: data/dev.txt
test_path: data/test.txt
batch_size: 64
num_workers: 2
data_params: 
    pretrained_token: ernie-1.0
    punc_path: data/rhy_token
    seq_len: 100


###########################################################
#                       MODEL SETTING                     #
###########################################################
model_type: ErnieLinear
model:
    pretrained_token: ernie-1.0
    num_classes: 5

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer_params:
    weight_decay: 1.0e-6               # weight decay coefficient.

scheduler_params:
    learning_rate: 1.0e-5               # learning rate.
    gamma: 0.9999                          # scheduler gamma must between(0.0, 1.0) and closer to 1.0 is better.

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 20
num_snapshots: 5

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/other/rhy/data/rhy_token
================================================
%
`
~
$

================================================
FILE: examples/other/rhy/local/data.sh
================================================
#!/bin/bash

if [ ! -f 000001-010000.txt ]; then
    wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/rhy_predict/000001-010000.txt
fi

if [ ! -f label_train-set.txt ]; then
    wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/rhy_predict/label_train-set.txt
fi


aishell_data=$1
csmsc_data=$2
processed_path=$3

python3 ./local/pre_for_sp_csmsc.py \
    --data=${csmsc_data} \
    --processed_path=${processed_path}

python3 ./local/pre_for_sp_aishell.py \
    --data=${aishell_data} \
    --processed_path=${processed_path}


echo "Finish data preparation."
exit 0


================================================
FILE: examples/other/rhy/local/pre_for_sp_aishell.py
================================================
#!/usr/bin/env python3
import argparse
import os
import re

# This is the replacement for rhythm labels to predict.
# 韵律标签的代替
replace_ = {"#1": "%", "#2": "`", "#3": "~", "#4": "$"}


def replace_rhy_with_punc(line):
    # r'[：、，；。？！,.:;"?!”’《》【】<=>{}()（）#&@“”^_|…\\]%*$', '', line)     #参考check_oov.py,
    line = re.sub(r'[：、，；。？！,.:;"?!’《》【】<=>{}()（）#&@“”^_|…\\]%*$', '', line)
    for r in replace_.keys():
        if r in line:
            line = line.replace(r, replace_[r])
    return line


def pre_and_write(data, file):
    with open(file, 'a') as rf:
        for d in data:
            d = d.split('|')[2].strip()
            # d = replace_rhy_with_punc(d)
            d = ' '.join(d) + ' \n'
            rf.write(d)


def main():
    parser = argparse.ArgumentParser(
        description="Train a Rhy prediction model.")
    parser.add_argument("--data", type=str, default="label_train-set.txt")
    parser.add_argument(
        "--processed_path", type=str, default="../data/rhy_predict")
    args = parser.parse_args()
    os.makedirs(args.processed_path, exist_ok=True)

    with open(args.data) as rf:
        text = rf.readlines()[5:]
    len_ = len(text)
    lens = [int(len_ * 0.9), int(len_ * 0.05), int(len_ * 0.05)]
    files = ['train.txt', 'test.txt', 'dev.txt']

    i = 0
    for l_, file in zip(lens, files):
        file = os.path.join(args.processed_path, file)
        pre_and_write(text[i:i + l_], file)
        i = i + l_


if __name__ == "__main__":
    main()


================================================
FILE: examples/other/rhy/local/pre_for_sp_csmsc.py
================================================
#!/usr/bin/env python3
import argparse
import os
import re

replace_ = {"#1": "%", "#2": "`", "#3": "~", "#4": "$"}


def replace_rhy_with_punc(line):
    # r'[：、，；。？！,.:;"?!”’《》【】<=>{}()（）#&@“”^_|…\\]%*$', '', line)     #参考check_oov.py,
    line = re.sub(r'^$\*%', '', line)
    for r in replace_.keys():
        if r in line:
            line = line.replace(r, replace_[r])
    return line


def pre_and_write(data, file):
    with open(file, 'w') as rf:
        for d in data:
            d = d.split('\t')[1].strip()
            d = replace_rhy_with_punc(d)
            d = ' '.join(d) + ' \n'
            rf.write(d)


def main():
    parser = argparse.ArgumentParser(
        description="Train a Rhy prediction model.")
    parser.add_argument("--data", type=str, default="label_train-set.txt")
    parser.add_argument(
        "--processed_path", type=str, default="../data/rhy_predict")
    args = parser.parse_args()
    print(args.data, args.processed_path)
    os.makedirs(args.processed_path, exist_ok=True)

    with open(args.data) as rf:
        rf = rf.readlines()
    text = rf[0::2]
    len_ = len(text)
    lens = [int(len_ * 0.9), int(len_ * 0.05), int(len_ * 0.05)]
    files = ['train.txt', 'test.txt', 'dev.txt']

    i = 0
    for l_, file in zip(lens, files):
        file = os.path.join(args.processed_path, file)
        pre_and_write(text[i:i + l_], file)
        i = i + l_


if __name__ == "__main__":
    main()


================================================
FILE: examples/other/rhy/local/rhy_predict.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
text=$4
ckpt_prefix=${ckpt_name%.*}

python3 ${BIN_DIR}/punc_restore.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --text=${text}


================================================
FILE: examples/other/rhy/local/test.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3
print_eval=$4

ckpt_prefix=${ckpt_name%.*}

python3 ${BIN_DIR}/test.py \
    --config=${config_path} \
    --checkpoint=${train_output_path}/checkpoints/${ckpt_name} \
    --print_eval=${print_eval}

================================================
FILE: examples/other/rhy/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=1


================================================
FILE: examples/other/rhy/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=${PWD}/../../../

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

MODEL=ernie_linear
export BIN_DIR=${MAIN_ROOT}/paddlespeech/text/exps/${MODEL}


================================================
FILE: examples/other/rhy/run.sh
================================================
#!/bin/bash
set -e
source path.sh

gpus=0
stage=0
stop_stage=100

data=data
mkdir -p $data

aishell_data=label_train-set.txt
csmsc_data=000001-010000.txt

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_4680.pdz
text=我们城市的复苏有赖于他强有力的政策。
print_eval=false

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/data.sh ${aishell_data} ${csmsc_data} ${data}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
   CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} ${train_output_path} ${ckpt_name} ${print_eval} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
   CUDA_VISIBLE_DEVICES=${gpus} ./local/rhy_predict.sh ${conf_path} ${train_output_path} ${ckpt_name} ${text}|| exit -1
fi

================================================
FILE: examples/other/spm/.gitignore
================================================
data


================================================
FILE: examples/other/spm/README.md
================================================
# [SentencePiece Model](https://github.com/google/sentencepiece)

## Run
Train a `spm` model for English tokenizer.

```
. path.sh
bash run.sh
```

## Results

```
data/
└── lang_char
    ├── input.bpe
    ├── input.decode
    ├── input.txt
    ├── train_unigram100.model
    ├── train_unigram100_units.txt
    └── train_unigram100.vocab

1 directory, 6 files
```

```
b5a230c26c61db5c36f34e503102f936  data/lang_char/input.bpe
ec5a9b24acc35469229e41256ceaf77d  data/lang_char/input.decode
ec5a9b24acc35469229e41256ceaf77d  data/lang_char/input.txt
124bf3fe7ce3b73b1994234c15268577  data/lang_char/train_unigram100.model
0df2488cc8eaace95eb12713facb5cf0  data/lang_char/train_unigram100_units.txt
46360cac35c751310e8e8ffd3a034cb5  data/lang_char/train_unigram100.vocab
```

```
==> data/lang_char/input.txt <==
mister quilter is the apostle of the middle classes and we are glad to welcome his gospel
nor is mister quilter's manner less interesting than his matter
he tells us that at this festive season of the year with christmas and roast beef looming before us similes drawn from eating and its results occur most readily to the mind
he has grave doubts whether sir frederick leighton's work is really greek after all and can discover in it but little of rocky ithaca
linnell's pictures are a sort of up guards and at em paintings and mason's exquisite idylls are as national as a jingo poem mister birket foster's landscapes smile at one much in the same way that mister carker used to flash his teeth and mister john collier gives his sitter a cheerful slap on the back before he says like a shampooer in a turkish bath next man
it is obviously unnecessary for us to point out how luminous these criticisms are how delicate in expression
on the general principles of art mister quilter writes with equal lucidity
painting he tells us is of a different quality to mathematics and finish in art is adding more fact
as for etchings they are of two kinds british and foreign
he laments most bitterly the divorce that has been made between decorative art and what we usually call pictures makes the customary appeal to the last judgment and reminds us that in the great days of art michael angelo was the furnishing upholsterer

==> data/lang_char/input.bpe <==
▁mi ster ▁quilter ▁ is ▁the ▁a p ost le ▁o f ▁the ▁mi d d le ▁c las s es ▁ and ▁we ▁ar e ▁g l a d ▁ to ▁we l c om e ▁h is ▁g o s pe l
▁ n or ▁ is ▁mi ster ▁quilter ' s ▁ma nne r ▁ l ess ▁in ter es t ing ▁tha n ▁h is ▁ma t ter
▁h e ▁ t e ll s ▁us ▁tha t ▁ at ▁ t h is ▁f es t ive ▁ s e ason ▁o f ▁the ▁ y e ar ▁w ith ▁ ch r is t m a s ▁ and ▁ro a s t ▁be e f ▁ l o om ing ▁be fore ▁us ▁ s i mile s ▁d r a w n ▁f r om ▁ e at ing ▁ and ▁it s ▁re s u l t s ▁o c c ur ▁m ost ▁re a di l y ▁ to ▁the ▁ mind
▁h e ▁ ha s ▁g r a v e ▁d o u b t s ▁w h e t h er ▁ s i r ▁f r e d er ic k ▁ l eig h to n ' s ▁w or k ▁ is ▁re all y ▁gre e k ▁a f ter ▁ all ▁ and ▁c a n ▁di s c o v er ▁in ▁it ▁b u t ▁li t t le ▁o f ▁ro ck y ▁it ha c a
▁li nne ll ' s ▁ p ic tur es ▁ar e ▁a ▁ s or t ▁o f ▁ u p ▁g u ar d s ▁ and ▁ at ▁ em ▁painting s ▁ and ▁m ason ' s ▁ e x q u is i t e ▁ i d y ll s ▁ar e ▁a s ▁ n at ion a l ▁a s ▁a ▁ j ing o ▁ p o em ▁mi ster ▁b i r k e t ▁f o ster ' s ▁ l and s c a pe s ▁ s mile ▁ at ▁on e ▁m u ch ▁in ▁the ▁ s a m e ▁w a y ▁tha t ▁mi ster ▁c ar k er ▁us e d ▁ to ▁f las h ▁h is ▁ t e e t h ▁ and ▁mi ster ▁ j o h n ▁c o ll i er ▁g ive s ▁h is ▁ s i t ter ▁a ▁ ch e er f u l ▁ s l a p ▁on ▁the ▁b a ck ▁be fore ▁h
e ▁ s a y s ▁li k e ▁a ▁ s ha m p o o er ▁in ▁a ▁ tur k is h ▁b at h ▁ n e x t ▁ma n
▁it ▁ is ▁o b v i o u s l y ▁ u nne c ess ar y ▁for ▁us ▁ to ▁ p o i n t ▁o u t ▁h o w ▁ l u m i n o u s ▁the s e ▁c rit ic is m s ▁ar e ▁h o w ▁d e l ic at e ▁in ▁ e x p r ess ion
▁on ▁the ▁g e n er a l ▁ p r i n c i p l es ▁o f ▁ar t ▁mi ster ▁quilter ▁w rit es ▁w ith ▁ e qual ▁ l u c i di t y
▁painting ▁h e ▁ t e ll s ▁us ▁ is ▁o f ▁a ▁di f f er e n t ▁ qual i t y ▁ to ▁ma t h em at ic s ▁ and ▁f i nish ▁in ▁ar t ▁ is ▁a d d ing ▁m or e ▁f a c t
▁a s ▁for ▁ e t ch ing s ▁the y ▁ar e ▁o f ▁ t w o ▁ k i n d s ▁b rit is h ▁ and ▁for eig n
▁h e ▁ l a ment s ▁m ost ▁b i t ter l y ▁the ▁di v or c e ▁tha t ▁ ha s ▁be e n ▁ma d e ▁be t w e e n ▁d e c or at ive ▁ar t ▁ and ▁w ha t ▁we ▁us u all y ▁c all ▁ p ic tur es ▁ma k es ▁the ▁c u s t om ar y ▁a p pe a l ▁ to ▁the ▁ las t ▁ j u d g ment ▁ and ▁re mind s ▁us ▁tha t ▁in ▁the ▁gre at ▁d a y s ▁o f ▁ar t ▁mi c ha e l ▁a n g e l o ▁w a s ▁the ▁f ur nish ing ▁ u p h o l ster er

==> data/lang_char/input.decode <==
mister quilter is the apostle of the middle classes and we are glad to welcome his gospel
nor is mister quilter's manner less interesting than his matter
he tells us that at this festive season of the year with christmas and roast beef looming before us similes drawn from eating and its results occur most readily to the mind
he has grave doubts whether sir frederick leighton's work is really greek after all and can discover in it but little of rocky ithaca
linnell's pictures are a sort of up guards and at em paintings and mason's exquisite idylls are as national as a jingo poem mister birket foster's landscapes smile at one much in the same way that mister carker used to flash his teeth and mister john collier gives his sitter a cheerful slap on the back before he says like a shampooer in a turkish bath next man
it is obviously unnecessary for us to point out how luminous these criticisms are how delicate in expression
on the general principles of art mister quilter writes with equal lucidity
painting he tells us is of a different quality to mathematics and finish in art is adding more fact
as for etchings they are of two kinds british and foreign
he laments most bitterly the divorce that has been made between decorative art and what we usually call pictures makes the customary appeal to the last judgment and reminds us that in the great days of art michael angelo was the furnishing upholsterer


==> data/lang_char/train_unigram100_units.txt <==
<blank> 0
<unk> 1
' 2
a 3
all 4
and 5
ar 6
ason 7
at 8
b 9

==> data/lang_char/train_unigram100.vocab <==
<unk>   0
<s>     0
</s>    0
▁       -2.01742
e       -2.7203
s       -2.82989
t       -2.99689
l       -3.53267
n       -3.84935
o       -3.88229
```


================================================
FILE: examples/other/spm/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


================================================
FILE: examples/other/spm/run.sh
================================================
#!/usr/bin/env bash

set -e

source path.sh


stage=0
stop_stage=100
# bpemode (unigram or bpe)
nbpe=100
bpemode=unigram


source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

train_set=train
dict=data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
bpemodel=data/lang_char/${train_set}_${bpemode}${nbpe}

echo "dictionary: ${dict}"
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    ### Task dependent. You have to check non-linguistic symbols used in the corpus.
    echo "stage 2: Dictionary and Json Data Preparation"
    mkdir -p data/lang_char/

    echo "<blank> 0" > ${dict} # 0 will be used for "blank" in CTC
    echo "<unk> 1" >> ${dict} # <unk> must be 1

    # we borrowed these code and scripts which are related bpe from ESPnet.
    cut -f 2- -d" " text > data/lang_char/input.txt
    ${MAIN_ROOT}/utils/spm_train --input=data/lang_char/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000
    ${MAIN_ROOT}/utils/spm_encode --model=${bpemodel}.model --output_format=piece < data/lang_char/input.txt | tr ' ' '\n' | sort | uniq | awk '{print $0 " " NR+1}' >> ${dict}
    num_token=$(cat $dict | wc -l)
    echo "<sos/eos> $num_token" >> $dict # <eos>
    wc -l ${dict}
fi

${MAIN_ROOT}/utils/spm_encode --model=${bpemodel}.model --output_format=piece < data/lang_char/input.txt > data/lang_char/input.bpe
${MAIN_ROOT}/utils/spm_decode --model=${bpemodel}.model --input_format=piece < data/lang_char/input.bpe | sed -e "s/▁/ /g" > data/lang_char/input.decode


================================================
FILE: examples/other/spm/text
================================================
text-1 mister quilter is the apostle of the middle classes and we are glad to welcome his gospel
test-2 nor is mister quilter's manner less interesting than his matter
test-3 he tells us that at this festive season of the year with christmas and roast beef looming before us similes drawn from eating and its results occur most readily to the mind
test-4 he has grave doubts whether sir frederick leighton's work is really greek after all and can discover in it but little of rocky ithaca
test-5 linnell's pictures are a sort of up guards and at em paintings and mason's exquisite idylls are as national as a jingo poem mister birket foster's landscapes smile at one much in the same way that mister carker used to flash his teeth and mister john collier gives his sitter a cheerful slap on the back before he says like a shampooer in a turkish bath next man
test-6 it is obviously unnecessary for us to point out how luminous these criticisms are how delicate in expression
test-7 on the general principles of art mister quilter writes with equal lucidity
test-8 painting he tells us is of a different quality to mathematics and finish in art is adding more fact
test-9 as for etchings they are of two kinds british and foreign
test-10 he laments most bitterly the divorce that has been made between decorative art and what we usually call pictures makes the customary appeal to the last judgment and reminds us that in the great days of art michael angelo was the furnishing upholsterer


================================================
FILE: examples/other/tn/README.md
================================================
# Text Normalization
For text normalization, the test data is  `data/textnorm_test_cases.txt`, we use `|` as the separator of raw_data and normed_data.

We use `CER` as an evaluation criterion.
## Start
Run the command below to get the results of the test.
```bash
cd ../../../tools
bash extras/install_sclite.sh
cd -
./run.sh
```
The `avg CER` of text normalization is: 0.00730093543235227
```text
      ,-----------------------------------------------------------------.
      |        | # Snt  # Wrd | Corr    Sub    Del    Ins    Err  S.Err |
      |--------+--------------+-----------------------------------------|
      | Sum/Avg|  125    2254 | 99.4    0.1    0.5    0.2    0.8    4.8 |
      `-----------------------------------------------------------------'
```


================================================
FILE: examples/other/tn/data/textnorm_test_cases.txt
================================================
今天的最低气温达到-10°C.|今天的最低气温达到零下十度.
只要有33/4的人同意，就可以通过决议。|只要有四分之三十三的人同意，就可以通过决议。
1945年5月2日，苏联士兵在德国国会大厦上升起了胜利旗，象征着攻占柏林并战胜了纳粹德国。|一九四五年五月二日，苏联士兵在德国国会大厦上升起了胜利旗，象征着攻占柏林并战胜了纳粹德国。
4月16日，清晨的战斗以炮击揭幕，数以千计的大炮和喀秋莎火箭炮开始炮轰德军阵地，炮击持续了数天之久。|四月十六日，清晨的战斗以炮击揭幕，数以千计的大炮和喀秋莎火箭炮开始炮轰德军阵地，炮击持续了数天之久。
如果剩下的30.6%是过去，那么还有69.4%.|如果剩下的百分之三十点六是过去，那么还有百分之六十九点四.
事情发生在2020/03/31的上午8:00.|事情发生在二零二零年三月三十一日的上午八点.
警方正在找一支.22口径的手枪。|警方正在找一支零点二二口径的手枪。
欢迎致电中国联通，北京2022年冬奥会官方合作伙伴为您服务|欢迎致电中国联通，北京二零二二年冬奥会官方合作伙伴为您服务
充值缴费请按1，查询话费及余量请按2，跳过本次提醒请按井号键。|充值缴费请按一，查询话费及余量请按二，跳过本次提醒请按井号键。
快速解除流量封顶请按星号键，腾讯王卡产品介绍、使用说明、特权及活动请按9，查询话费、套餐余量、积分及活动返款请按1，手机上网流量开通及取消请按2，查询本机号码及本号所使用套餐请按4，密码修改及重置请按5，紧急开机请按6，挂失请按7，查询充值记录请按8，其它自助服务及人工服务请按0|快速解除流量封顶请按星号键，腾讯王卡产品介绍、使用说明、特权及活动请按九，查询话费、套餐余量、积分及活动返款请按一，手机上网流量开通及取消请按二，查询本机号码及本号所使用套餐请按四，密码修改及重置请按五，紧急开机请按六，挂失请按七，查询充值记录请按八，其它自助服务及人工服务请按零
智能客服助理快速查话费、查流量请按9，了解北京联通业务请按1，宽带IPTV新装、查询请按2，障碍报修请按3，充值缴费请按4，投诉建议请按5，政企业务请按7，人工服务请按0，for english severice press star key|智能客服助理快速查话费、查流量请按九，了解北京联通业务请按一，宽带IPTV新装、查询请按二，障碍报修请按三，充值缴费请按四，投诉建议请按五，政企业务请按七，人工服务请按零，for english severice press star key
您的帐户当前可用余额为63.89元，本月消费为2.17元。您的消费、套餐余量和其它信息将以短信形式下发，请您注意查收。谢谢使用，再见！。|您的帐户当前可用余额为六十三点八九元，本月消费为二点一七元。您的消费、套餐余量和其它信息将以短信形式下发，请您注意查收。谢谢使用，再见！。
您的帐户当前可用余额为负15.5元，本月消费为59.6元。您的消费、套餐余量和其它信息将以短信形式下发，请您注意查收。谢谢使用，再见！。|您的帐户当前可用余额为负十五点五元，本月消费为五十九点六元。您的消费、套餐余量和其它信息将以短信形式下发，请您注意查收。谢谢使用，再见！。
尊敬的客户，您目前的话费余额为负14.60元，已低于10元，为保证您的通信畅通，请及时缴纳费用。|尊敬的客户，您目前的话费余额为负十四点六元，已低于十元，为保证您的通信畅通，请及时缴纳费用。
您的流量已用完，为避免您产生额外费用，建议您根据需求开通一个流量包以作补充。|您的流量已用完，为避免您产生额外费用，建议您根据需求开通一个流量包以作补充。
您可以直接说，查询话费及余量、开通流量包、缴费，您也可以说出其它需求，请问有什么可以帮您？|您可以直接说，查询话费及余量、开通流量包、缴费，您也可以说出其它需求，请问有什么可以帮您？
您的账户当前可用余额为负36.00元，本月消费36.00元。|您的账户当前可用余额为负三十六元，本月消费三十六元。
请问你是电话13985608526的机主吗？|请问你是电话一三九八五六零八五二六的机主吗？
如您对处理结果不满意，可拨打中国联通集团投诉电话10015进行投诉，按本地通话费收费，返回自助服务请按井号键|如您对处理结果不满意，可拨打中国联通集团投诉电话一零零一五进行投诉，按本地通话费收费，返回自助服务请按井号键
“26314”号VIP客服代表为您服务。|“二六三一四”号VIP客服代表为您服务。
尊敬的5G用户，欢迎您致电中国联通|尊敬的五G用户，欢迎您致电中国联通
首先是应用了M1芯片的iPad Pro，新款的iPad Pro支持5G，这也是苹果的第二款5G产品线。|首先是应用了M一芯片的iPad Pro，新款的iPad Pro支持五G，这也是苹果的第二款五G产品线。
除此之外，摄像头方面再次升级，增加了前摄全新超广角摄像头，支持人物居中功能，搭配超广角可实现视频中始终让人物居中效果。|除此之外，摄像头方面再次升级，增加了前摄全新超广角摄像头，支持人物居中功能，搭配超广角可实现视频中始终让人物居中效果。
屏幕方面，iPad Pro 12.9版本支持XDR体验的Mini-LEDS显示屏，支持HDR10、杜比视界，还支持杜比全景声。|屏幕方面，iPad Pro 十二点九版本支持XDR体验的Mini-LEDS显示屏，支持HDR十、杜比视界，还支持杜比全景声。
iPad Pro的秒控键盘这次也推出白色版本。|iPad Pro的秒控键盘这次也推出白色版本。
售价方面，11英寸版本售价799美元起，12.9英寸售价1099美元起。|售价方面，十一英寸版本售价七百九十九美元起，十二点九英寸售价一千零九十九美元起。
这块黄金重达324.75克|这块黄金重达三百二十四点七五克
她出生于86年8月18日，她弟弟出生于1995年3月1日|她出生于八六年八月十八日，她弟弟出生于一九九五年三月一日
电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九
现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票
随便来几个价格12块5，34.5元，20.1万|随便来几个价格十二块五，三十四点五元，二十点一万
明天有62%的概率降雨|明天有百分之六十二的概率降雨
这是固话0421-33441122|这是固话零四二一三三四四一一二二
这是手机+86 18544139121|这是手机八六一八五四四一三九一二一
小王的身高是153.5cm,梦想是打篮球!我觉得有0.1%的可能性。|小王的身高是一百五十三点五厘米,梦想是打篮球!我觉得有百分之零点一的可能性。
不管三七二十一|不管三七二十一
九九八十一难|九九八十一难
2018年5月23号上午10点10分|二零一八年五月二十三号上午十点十分
10076|一零零七六
32.68%|百分之三十二点六八
比分测试17:16|比分测试十七比十六
比分测试37:16|比分测试三十七比十六
1.1|一点一
一点一滴|一点一滴
八九十|八九十
1个人一定要|一个人一定要
10000棵树|一万棵树
1234个人|一千二百三十四个人
35553座楼|三万五千五百五十三座楼
15873690|一五八七三六九零
27930122|二七九三零一二二
85307499|八五三零七四九九
26149787|二六一四九七八七
15964862|一五九六四八六二
45698723|四五六九八七二三
48615964|四八六一五九六四
17864589|一七八六四五八九
123加456|一百二十三加四百五十六
9786加3384|九千七百八十六加三千三百八十四
发电站每天发电30029度电|发电站每天发电三万零二十九度电
银行月交易总额七千九百零三亿元|银行月交易总额七千九百零三亿元
深圳每月平均工资在13000元|深圳每月平均工资在一万三千元
每月房租要交1500元|每月房租要交一千五百元
我每月交通费用在400元左右|我每月交通费用在四百元左右
本月开销费用是51328元|本月开销费用是五万一千三百二十八元
如果你中了五千万元奖金会分我一半吗|如果你中了五千万元奖金会分我一半吗
这个月工资我发了3529元|这个月工资我发了三千五百二十九元
学会了这个技能你至少可以涨薪5000元|学会了这个技能你至少可以涨薪五千元
我们的会议时间定在9点25分开始|我们的会议时间定在九点二十五分开始
上课时间是8点15分请不要迟到|上课时间是八点十五分请不要迟到
昨天你9点21分才到教室|昨天你九点二十一分才到教室
今天是2019年1月31号|今天是二零一九年一月三十一号
今年的除夕夜是2019年2月4号|今年的除夕夜是二零一九年二月四号
这根水管的长度不超过35米|这根水管的长度不超过三十五米
400米是最短的长跑距离|四百米是最短的长跑距离
最高的撑杆跳为11米|最高的撑杆跳为十一米
等会请在12:05请通知我|等会请在十二点零五分请通知我
23点15分开始|二十三点十五分开始
你生日那天我会送你999朵玫瑰|你生日那天我会送你九百九十九朵玫瑰
给我1双鞋我可以跳96米远|给我一双鞋我可以跳九十六米远
虽然我们的身高相差356毫米也不影响我们交往|虽然我们的身高相差三百五十六毫米也不影响我们交往
我们班的最高总分为583分|我们班的最高总分为五百八十三分
今天考试老师多扣了我21分|今天考试老师多扣了我二十一分
我量过这张桌子总长为1.37米|我量过这张桌子总长为一点三七米
乘务员身高必须超过185公分|乘务员身高必须超过一百八十五公分
这台电脑分辨率为1024|这台电脑分辨率为一零二四
手机价格不超过1500元|手机价格不超过一千五百元
101.23|一百零一点二三
123.116|一百二十三点一一六
456.147|四百五十六点一四七
0.1594|零点一五九四
3.1415|三点一四一五
0.112233|零点一一二二三三
0.1|零点一
40001.987|四万零一点九八七
56.878|五十六点八七八
0.00123|零点零零一二三
0.0001|零点零零零一
0.92015|零点九二零一五
999.0001|九百九十九点零零零一
10000.123|一万点一二三
666.555|六百六十六点五五五
444.789|四百四十四点七八九
789.666|七百八十九点六六六
0.12345|零点一二三四五
1.05649|一点零五六四九
环比上调1.86%|环比上调百分之一点八六
环比分别下跌3.46%及微涨0.70%|环比分别下跌百分之三点四六及微涨百分之零点七
单价在30000元的二手房购房个案当中|单价在三万元的二手房购房个案当中
6月仍有7%单价在30000元的房源|六月仍有百分之七单价在三万元的房源
最终也只是以总积分1分之差屈居第2|最终也只是以总积分一分之差屈居第二
中新网8月29日电今日|中新网八月二十九日电今日
自6月底呼和浩特市率先宣布取消限购后|自六月底呼和浩特市率先宣布取消限购后
仅1个多月的时间里|仅一个多月的时间里
除了北京上海广州深圳4个一线城市和三亚之外|除了北京上海广州深圳四个一线城市和三亚之外
46个限购城市当中|四十六个限购城市当中
41个已正式取消或变相放松了限购|四十一个已正式取消或变相放松了限购
其中包括对拥有一套住房并已结清相应购房贷款的家庭|其中包括对拥有一套住房并已结清相应购房贷款的家庭
这个后来被称为930新政策的措施|这个后来被称为九三零新政策的措施
今年有望超三百亿美元|今年有望超三百亿美元
就连一向看多的任志强|就连一向看多的任志强
近期也一反常态地发表看空言论|近期也一反常态地发表看空言论
985|九八五
12~23|十二到二十三
12-23|十二到二十三
25cm²|二十五平方厘米
25m|米


================================================
FILE: examples/other/tn/get_textnorm_data.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path


def main():
    parser = argparse.ArgumentParser(description="text normalization example.")
    parser.add_argument(
        "--test-file",
        default="data/textnorm_test_cases.txt",
        type=str,
        help="path of text normalization test file.")
    parser.add_argument(
        "--output-dir",
        default="data/textnorm",
        type=str,
        help="directory to output.")

    args = parser.parse_args()
    test_file = Path(args.test_file).expanduser()
    output_dir = Path(args.output_dir).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)

    raw_path = output_dir / "text"
    ref_path = output_dir / "text.ref"
    wf_raw = open(raw_path, "w")
    wf_ref = open(ref_path, "w")

    with open(test_file, "r") as rf:
        for i, line in enumerate(rf):
            raw_text, normed_text = line.strip().split("|")
            wf_raw.write("utt_" + str(i) + " " + raw_text + "\n")
            wf_ref.write("utt_" + str(i) + " " + normed_text + "\n")


if __name__ == "__main__":
    main()


================================================
FILE: examples/other/tn/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}


================================================
FILE: examples/other/tn/run.sh
================================================
#!/bin/bash

source path.sh

USE_SCLITE=true

# test text normalization
echo "Start get text normalization test data ..."
python3 get_textnorm_data.py --test-file=data/textnorm_test_cases.txt --output-dir=data/textnorm
echo "Start test text normalization ..."
python3 test_textnorm.py --input-dir=data/textnorm --output-dir=exp/textnorm

# whether use sclite to get more detail information of WER
if [ "$USE_SCLITE" = true ];then
    echo "Start sclite textnorm ..."
    ${MAIN_ROOT}/tools/sctk/bin/sclite -i wsj -r ./exp/textnorm/text.ref.clean trn -h ./exp/textnorm/text.tn trn -e utf-8 -o all
fi

================================================
FILE: examples/other/tn/test_textnorm.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import re
from pathlib import Path

from paddlespeech.t2s.frontend.zh_normalization.text_normlization import TextNormalizer
from paddlespeech.t2s.utils.error_rate import char_errors


# delete english characters
# e.g. "你好aBC" -> "你 好"
def del_en_add_space(input: str):
    output = re.sub('[a-zA-Z]', '', input)
    output = [char + " " for char in output]
    output = "".join(output).strip()
    return output


def get_avg_cer(raw_dict, ref_dict, text_normalizer, output_dir):
    edit_distances = []
    ref_lens = []
    wf_ref = open(output_dir / "text.ref.clean", "w")
    wf_tn = open(output_dir / "text.tn", "w")
    for text_id in raw_dict:
        if text_id not in ref_dict:
            continue
        raw_text = raw_dict[text_id]
        gt_text = ref_dict[text_id]
        textnorm_text = text_normalizer.normalize_sentence(raw_text)

        gt_text = del_en_add_space(gt_text)
        textnorm_text = del_en_add_space(textnorm_text)
        wf_ref.write(gt_text + "(" + text_id + ")" + "\n")
        wf_tn.write(textnorm_text + "(" + text_id + ")" + "\n")
        edit_distance, ref_len = char_errors(gt_text, textnorm_text)
        edit_distances.append(edit_distance)
        ref_lens.append(ref_len)

    return sum(edit_distances) / sum(ref_lens)


def main():
    parser = argparse.ArgumentParser(description="text normalization example.")
    parser.add_argument(
        "--input-dir",
        default="data/textnorm",
        type=str,
        help="directory to preprocessed test data.")
    parser.add_argument(
        "--output-dir",
        default="exp/textnorm",
        type=str,
        help="directory to save textnorm results.")

    args = parser.parse_args()
    input_dir = Path(args.input_dir).expanduser()
    output_dir = Path(args.output_dir).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    assert input_dir.is_dir()
    raw_dict, ref_dict = dict(), dict()
    raw_path = input_dir / "text"
    ref_path = input_dir / "text.ref"

    with open(raw_path, "r") as rf:
        for line in rf:
            line = line.strip()
            line_list = line.split(" ")
            text_id, raw_text = line_list[0], " ".join(line_list[1:])
            raw_dict[text_id] = raw_text
    with open(ref_path, "r") as rf:
        for line in rf:
            line = line.strip()
            line_list = line.split(" ")
            text_id, normed_text = line_list[0], " ".join(line_list[1:])
            ref_dict[text_id] = normed_text

    text_normalizer = TextNormalizer()

    avg_cer = get_avg_cer(raw_dict, ref_dict, text_normalizer, output_dir)
    print("The avg CER of text normalization is:", avg_cer)


if __name__ == "__main__":
    main()


================================================
FILE: examples/other/tts_finetune/tts3/README.md
================================================
# Finetune your own AM based on FastSpeech2 with multi-speakers dataset.
This example shows how to finetune your own AM based on FastSpeech2 with multi-speakers dataset. For finetuning Chinese data, we use part of csmsc's data (top 200) and Fastspeech2 pretrained model with AISHELL-3. For finetuning English data, we use part of ljspeech's data (top 200) and Fastspeech2 pretrained model with VCTK. The example is implemented according to this [discussion](https://github.com/PaddlePaddle/PaddleSpeech/discussions/1842). Thanks to the developer for the idea.

For more information on training Fastspeech2 with AISHELL-3, You can refer [examples/aishell3/tts3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3). For more information on training Fastspeech2 with VCTK, You can refer [examples/vctk/tts3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/tts3).


## Prepare 
### Download Pretrained model
Assume the path to the model is `./pretrained_models`. </br>
If you want to finetune Chinese pretrained model, you need to download Fastspeech2 pretrained model with AISHELL-3: [fastspeech2_aishell3_ckpt_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip) for finetuning. Download HiFiGAN pretrained model with aishell3: [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) for synthesis.

```bash
mkdir -p pretrained_models && cd pretrained_models
# pretrained fastspeech2 model
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip 
unzip fastspeech2_aishell3_ckpt_1.1.0.zip
# pretrained hifigan model
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip
unzip hifigan_aishell3_ckpt_0.2.0.zip
cd ../
```


If you want to finetune English pretrained model, you need to download Fastspeech2 pretrained model with VCTK: [fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip) for finetuning. Download HiFiGAN pretrained model with VCTK: [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip) for synthesis.

```bash
mkdir -p pretrained_models && cd pretrained_models
# pretrained fastspeech2 model
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip 
unzip fastspeech2_vctk_ckpt_1.2.0.zip
# pretrained hifigan model
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip
unzip hifigan_vctk_ckpt_0.2.0.zip
cd ../
```

If you want to finetune Chinese-English Mixed pretrained model, you need to download Fastspeech2 pretrained model with mix datasets: [fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip) for finetuning. Download HiFiGAN pretrained model with aishell3: [hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) for synthesis.

```bash
mkdir -p pretrained_models && cd pretrained_models
# pretrained fastspeech2 model
wget https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip
unzip fastspeech2_mix_ckpt_1.2.0.zip
# pretrained hifigan model
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip
unzip hifigan_aishell3_ckpt_0.2.0.zip
cd ../
```

### Prepare your data
Assume the path to the dataset is `./input` which contains a speaker folder. Speaker folder contains audio files (*.wav) and label file (labels.txt). The format of the audio file is wav. The format of the label file is: utt_id|pronunciation. </br>

If you want to finetune Chinese pretrained model, you need to prepare Chinese data. Chinese label example: 
```
000001|ka2 er2 pu3 pei2 wai4 sun1 wan2 hua2 ti1
```

Here is a Chinese data example of the first 200 data of csmsc.

```bash
mkdir -p input && cd input
wget https://paddlespeech.cdn.bcebos.com/datasets/csmsc_mini.zip
unzip csmsc_mini.zip
cd ../
```

If you want to finetune English pretrained model, you need to prepare English data. English label example: 
```
LJ001-0001|Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition
```

Here is an English data example of the first 200 data of ljspeech.

```bash
mkdir -p input && cd input
wget https://paddlespeech.cdn.bcebos.com/datasets/ljspeech_mini.zip
unzip ljspeech_mini.zip
cd ../
```

If you want to finetune Chinese-English Mixed pretrained model, you need to prepare Chinese data or English data. Here is a Chinese data example of the first 12 data of SSB0005 (the speaker of aishell3).

```bash
mkdir -p input && cd input
wget https://paddlespeech.cdn.bcebos.com/datasets/SSB0005_mini.zip
unzip SSB0005_mini.zip
cd ../
```

### Download MFA tools and pretrained model
Assume the path to the MFA tool is `./tools`. Download [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz).

```bash
mkdir -p tools && cd tools
# mfa tool
wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz
tar xvf montreal-forced-aligner_linux.tar.gz
cp montreal-forced-aligner/lib/libpython3.6m.so.1.0 montreal-forced-aligner/lib/libpython3.6m.so
mkdir -p aligner && cd aligner
```

If you want to get mfa result of Chinese data, you need to download pretrained MFA models with aishell3: [aishell3_model.zip](https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/aishell3_model.zip) and unzip it.

```bash
# pretrained mfa model for Chinese data
wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/aishell3_model.zip
unzip aishell3_model.zip
wget https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
cd ../../
```

If you want to get mfa result of English data, you need to download pretrained MFA models with vctk: [vctk_model.zip](https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/vctk_model.zip) and unzip it.

```bash
# pretrained mfa model for English data
wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/vctk_model.zip
unzip vctk_model.zip
wget https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
cd ../../
```

When "Prepare" done. The structure of the current directory is similar to the following.
```text
├── input
│   ├── csmsc_mini
│   │   ├── 000001.wav
│   │   ├── 000002.wav
│   │   ├── 000003.wav
│   │   ├── ...
│   │   ├── 000200.wav
│   │   ├── labels.txt
│   └── csmsc_mini.zip
├── pretrained_models
│   ├── fastspeech2_aishell3_ckpt_1.1.0
│   │   ├── default.yaml
│   │   ├── energy_stats.npy
│   │   ├── phone_id_map.txt
│   │   ├── pitch_stats.npy
│   │   ├── snapshot_iter_96400.pdz
│   │   ├── speaker_id_map.txt
│   │   └── speech_stats.npy
│   ├── fastspeech2_aishell3_ckpt_1.1.0.zip
│   ├── hifigan_aishell3_ckpt_0.2.0    
│   │   ├── default.yaml
│   │   ├── feats_stats.npy
│   │   └── snapshot_iter_2500000.pdz
│   └── hifigan_aishell3_ckpt_0.2.0.zip
└── tools
    ├── aligner
    │   ├── aishell3_model
    │   ├── aishell3_model.zip
    │   └── simple.lexicon
    ├── montreal-forced-aligner
    │   ├── bin
    │   ├── lib
    │   └── pretrained_models
    └── montreal-forced-aligner_linux.tar.gz
    ...

```

### Set finetune.yaml
`conf/finetune.yaml` contains some configurations for fine-tuning. You can try various options to fine better result. The value of frozen_layers can be change according `conf/fastspeech2_layers.txt` which is the model layer of fastspeech2.

Arguments:
  - `batch_size`: finetune batch size which should be less than or equal to the number of training samples. Default: -1, means 64 which same to pretrained model
  - `learning_rate`: learning rate. Default: 0.0001
  - `num_snapshots`: number of save models. Default: -1, means 5 which same to pretrained model
  - `frozen_layers`: frozen layers. must be a list. If you don't want to frozen any layer, set []. 


## Get Started
For finetuning Chinese pretrained model, execute `./run.sh`. For finetuning English pretrained model, execute `./run_en.sh`. For finetuning Chinese-English Mixed pretrained model, execute `./run_mix.sh`. </br>
Run the command below to
1. **source path**.
2. finetune the model. 
3. synthesize wavs.
    - synthesize waveform from text file.

```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to run only one stage.

### Model Finetune

Finetune a FastSpeech2 model. 

```bash
./run.sh --stage 0 --stop-stage 5
```
`stage 5` of `run.sh` calls `local/finetune.py`, here's the complete help message.

```text
usage: finetune.py [-h] [--pretrained_model_dir PRETRAINED_MODEL_DIR]
                [--dump_dir DUMP_DIR] [--output_dir OUTPUT_DIR] [--ngpu NGPU]
                [--epoch EPOCH] [--finetune_config FINETUNE_CONFIG]

optional arguments:
  -h, --help           Show this help message and exit
  --pretrained_model_dir PRETRAINED_MODEL_DIR
                       Path to pretrained model
  --dump_dir DUMP_DIR
                       directory to save feature files and metadata
  --output_dir OUTPUT_DIR      
                       Directory to save finetune model 
  --ngpu NGPU          The number of gpu, if ngpu=0, use cpu
  --epoch EPOCH        The epoch of finetune
  --finetune_config FINETUNE_CONFIG        
                       Path to finetune config file
```

1. `--pretrained_model_dir` is the directory incluing pretrained fastspeech2_aishell3 model.
2. `--dump_dir` is the directory including audio feature and metadata.
3. `--output_dir` is the directory to save finetune model.
4. `--ngpu` is the number of gpu, if ngpu=0, use cpu
5. `--epoch` is the epoch of finetune.
6. `--finetune_config` is the path to finetune config file
 

### Synthesizing
To synthesize Chinese audio, We use [HiFiGAN with aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5) as the neural vocoder.
Assume the path to the hifigan model is `./pretrained_models`. Download the pretrained HiFiGAN model from [hifigan_aishell3_ckpt_0.2.0](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip) and unzip it.

To synthesize English audio, We use [HiFiGAN with vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5) as the neural vocoder.
Assume the path to the hifigan model is `./pretrained_models`. Download the pretrained HiFiGAN model from [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip) and unzip it.


Modify `ckpt` in `run.sh` to the final model in `exp/default/checkpoints`.
```bash
./run.sh --stage 6 --stop-stage 6
```
`stage 6` of `run.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.

```text
usage: synthesize_e2e.py [-h]
                         [--am {fastspeech2_aishell3,fastspeech2_vctk}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_aishell3, pwgan_vctk, hifigan_aishell3, hifigan_vctk}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {fastspeech2_aishell3, fastspeech2_vctk}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_aishell3, pwgan_vctk, hifigan_aishell3, hifigan_vctk}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```

1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat`, `--phones_dict` `--speaker_dict` are arguments for acoustic model, which correspond to the 5 files in the fastspeech2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--text` is the text file, which contains sentences to synthesize.
7.  `--output_dir` is the directory to save synthesized audio files.
8. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.


### Tips
If you want to get better audio quality, you can use more audios to finetune or change configuration parameters in `conf/finetune.yaml`.</br>
More finetune results can be found on [finetune-fastspeech2-for-csmsc](https://paddlespeech.readthedocs.io/en/latest/tts/demo.html#finetune-fastspeech2-for-csmsc).</br>
The results show the effect on csmsc_mini: Freeze encoder > Non Frozen > Freeze encoder && duration_predictor.


================================================
FILE: examples/other/tts_finetune/tts3/conf/fastspeech2_layers.txt
================================================
epoch
iteration
main_params
main_optimizer
spk_embedding_table.weight
encoder.embed.0.weight
encoder.embed.1.alpha
encoder.encoders.0.self_attn.linear_q.weight
encoder.encoders.0.self_attn.linear_q.bias
encoder.encoders.0.self_attn.linear_k.weight
encoder.encoders.0.self_attn.linear_k.bias
encoder.encoders.0.self_attn.linear_v.weight
encoder.encoders.0.self_attn.linear_v.bias
encoder.encoders.0.self_attn.linear_out.weight
encoder.encoders.0.self_attn.linear_out.bias
encoder.encoders.0.feed_forward.w_1.weight
encoder.encoders.0.feed_forward.w_1.bias
encoder.encoders.0.feed_forward.w_2.weight
encoder.encoders.0.feed_forward.w_2.bias
encoder.encoders.0.norm1.weight
encoder.encoders.0.norm1.bias
encoder.encoders.0.norm2.weight
encoder.encoders.0.norm2.bias
encoder.encoders.1.self_attn.linear_q.weight
encoder.encoders.1.self_attn.linear_q.bias
encoder.encoders.1.self_attn.linear_k.weight
encoder.encoders.1.self_attn.linear_k.bias
encoder.encoders.1.self_attn.linear_v.weight
encoder.encoders.1.self_attn.linear_v.bias
encoder.encoders.1.self_attn.linear_out.weight
encoder.encoders.1.self_attn.linear_out.bias
encoder.encoders.1.feed_forward.w_1.weight
encoder.encoders.1.feed_forward.w_1.bias
encoder.encoders.1.feed_forward.w_2.weight
encoder.encoders.1.feed_forward.w_2.bias
encoder.encoders.1.norm1.weight
encoder.encoders.1.norm1.bias
encoder.encoders.1.norm2.weight
encoder.encoders.1.norm2.bias
encoder.encoders.2.self_attn.linear_q.weight
encoder.encoders.2.self_attn.linear_q.bias
encoder.encoders.2.self_attn.linear_k.weight
encoder.encoders.2.self_attn.linear_k.bias
encoder.encoders.2.self_attn.linear_v.weight
encoder.encoders.2.self_attn.linear_v.bias
encoder.encoders.2.self_attn.linear_out.weight
encoder.encoders.2.self_attn.linear_out.bias
encoder.encoders.2.feed_forward.w_1.weight
encoder.encoders.2.feed_forward.w_1.bias
encoder.encoders.2.feed_forward.w_2.weight
encoder.encoders.2.feed_forward.w_2.bias
encoder.encoders.2.norm1.weight
encoder.encoders.2.norm1.bias
encoder.encoders.2.norm2.weight
encoder.encoders.2.norm2.bias
encoder.encoders.3.self_attn.linear_q.weight
encoder.encoders.3.self_attn.linear_q.bias
encoder.encoders.3.self_attn.linear_k.weight
encoder.encoders.3.self_attn.linear_k.bias
encoder.encoders.3.self_attn.linear_v.weight
encoder.encoders.3.self_attn.linear_v.bias
encoder.encoders.3.self_attn.linear_out.weight
encoder.encoders.3.self_attn.linear_out.bias
encoder.encoders.3.feed_forward.w_1.weight
encoder.encoders.3.feed_forward.w_1.bias
encoder.encoders.3.feed_forward.w_2.weight
encoder.encoders.3.feed_forward.w_2.bias
encoder.encoders.3.norm1.weight
encoder.encoders.3.norm1.bias
encoder.encoders.3.norm2.weight
encoder.encoders.3.norm2.bias
encoder.after_norm.weight
encoder.after_norm.bias
spk_projection.weight
spk_projection.bias
duration_predictor.conv.0.0.weight
duration_predictor.conv.0.0.bias
duration_predictor.conv.0.2.weight
duration_predictor.conv.0.2.bias
duration_predictor.conv.1.0.weight
duration_predictor.conv.1.0.bias
duration_predictor.conv.1.2.weight
duration_predictor.conv.1.2.bias
duration_predictor.linear.weight
duration_predictor.linear.bias
pitch_predictor.conv.0.0.weight
pitch_predictor.conv.0.0.bias
pitch_predictor.conv.0.2.weight
pitch_predictor.conv.0.2.bias
pitch_predictor.conv.1.0.weight
pitch_predictor.conv.1.0.bias
pitch_predictor.conv.1.2.weight
pitch_predictor.conv.1.2.bias
pitch_predictor.conv.2.0.weight
pitch_predictor.conv.2.0.bias
pitch_predictor.conv.2.2.weight
pitch_predictor.conv.2.2.bias
pitch_predictor.conv.3.0.weight
pitch_predictor.conv.3.0.bias
pitch_predictor.conv.3.2.weight
pitch_predictor.conv.3.2.bias
pitch_predictor.conv.4.0.weight
pitch_predictor.conv.4.0.bias
pitch_predictor.conv.4.2.weight
pitch_predictor.conv.4.2.bias
pitch_predictor.linear.weight
pitch_predictor.linear.bias
pitch_embed.0.weight
pitch_embed.0.bias
energy_predictor.conv.0.0.weight
energy_predictor.conv.0.0.bias
energy_predictor.conv.0.2.weight
energy_predictor.conv.0.2.bias
energy_predictor.conv.1.0.weight
energy_predictor.conv.1.0.bias
energy_predictor.conv.1.2.weight
energy_predictor.conv.1.2.bias
energy_predictor.linear.weight
energy_predictor.linear.bias
energy_embed.0.weight
energy_embed.0.bias
decoder.embed.0.alpha
decoder.encoders.0.self_attn.linear_q.weight
decoder.encoders.0.self_attn.linear_q.bias
decoder.encoders.0.self_attn.linear_k.weight
decoder.encoders.0.self_attn.linear_k.bias
decoder.encoders.0.self_attn.linear_v.weight
decoder.encoders.0.self_attn.linear_v.bias
decoder.encoders.0.self_attn.linear_out.weight
decoder.encoders.0.self_attn.linear_out.bias
decoder.encoders.0.feed_forward.w_1.weight
decoder.encoders.0.feed_forward.w_1.bias
decoder.encoders.0.feed_forward.w_2.weight
decoder.encoders.0.feed_forward.w_2.bias
decoder.encoders.0.norm1.weight
decoder.encoders.0.norm1.bias
decoder.encoders.0.norm2.weight
decoder.encoders.0.norm2.bias
decoder.encoders.1.self_attn.linear_q.weight
decoder.encoders.1.self_attn.linear_q.bias
decoder.encoders.1.self_attn.linear_k.weight
decoder.encoders.1.self_attn.linear_k.bias
decoder.encoders.1.self_attn.linear_v.weight
decoder.encoders.1.self_attn.linear_v.bias
decoder.encoders.1.self_attn.linear_out.weight
decoder.encoders.1.self_attn.linear_out.bias
decoder.encoders.1.feed_forward.w_1.weight
decoder.encoders.1.feed_forward.w_1.bias
decoder.encoders.1.feed_forward.w_2.weight
decoder.encoders.1.feed_forward.w_2.bias
decoder.encoders.1.norm1.weight
decoder.encoders.1.norm1.bias
decoder.encoders.1.norm2.weight
decoder.encoders.1.norm2.bias
decoder.encoders.2.self_attn.linear_q.weight
decoder.encoders.2.self_attn.linear_q.bias
decoder.encoders.2.self_attn.linear_k.weight
decoder.encoders.2.self_attn.linear_k.bias
decoder.encoders.2.self_attn.linear_v.weight
decoder.encoders.2.self_attn.linear_v.bias
decoder.encoders.2.self_attn.linear_out.weight
decoder.encoders.2.self_attn.linear_out.bias
decoder.encoders.2.feed_forward.w_1.weight
decoder.encoders.2.feed_forward.w_1.bias
decoder.encoders.2.feed_forward.w_2.weight
decoder.encoders.2.feed_forward.w_2.bias
decoder.encoders.2.norm1.weight
decoder.encoders.2.norm1.bias
decoder.encoders.2.norm2.weight
decoder.encoders.2.norm2.bias
decoder.encoders.3.self_attn.linear_q.weight
decoder.encoders.3.self_attn.linear_q.bias
decoder.encoders.3.self_attn.linear_k.weight
decoder.encoders.3.self_attn.linear_k.bias
decoder.encoders.3.self_attn.linear_v.weight
decoder.encoders.3.self_attn.linear_v.bias
decoder.encoders.3.self_attn.linear_out.weight
decoder.encoders.3.self_attn.linear_out.bias
decoder.encoders.3.feed_forward.w_1.weight
decoder.encoders.3.feed_forward.w_1.bias
decoder.encoders.3.feed_forward.w_2.weight
decoder.encoders.3.feed_forward.w_2.bias
decoder.encoders.3.norm1.weight
decoder.encoders.3.norm1.bias
decoder.encoders.3.norm2.weight
decoder.encoders.3.norm2.bias
decoder.after_norm.weight
decoder.after_norm.bias
feat_out.weight
feat_out.bias
postnet.postnet.0.0.weight
postnet.postnet.0.1.weight
postnet.postnet.0.1.bias
postnet.postnet.0.1._mean
postnet.postnet.0.1._variance
postnet.postnet.1.0.weight
postnet.postnet.1.1.weight
postnet.postnet.1.1.bias
postnet.postnet.1.1._mean
postnet.postnet.1.1._variance
postnet.postnet.2.0.weight
postnet.postnet.2.1.weight
postnet.postnet.2.1.bias
postnet.postnet.2.1._mean
postnet.postnet.2.1._variance
postnet.postnet.3.0.weight
postnet.postnet.3.1.weight
postnet.postnet.3.1.bias
postnet.postnet.3.1._mean
postnet.postnet.3.1._variance
postnet.postnet.4.0.weight
postnet.postnet.4.1.weight
postnet.postnet.4.1.bias
postnet.postnet.4.1._mean
postnet.postnet.4.1._variance


================================================
FILE: examples/other/tts_finetune/tts3/conf/finetune.yaml
================================================
###########################################################
#                 PARAS SETTING               #
###########################################################
# Set to -1 to indicate that the parameter is the same as the pretrained model configuration

batch_size: -1
learning_rate: 0.0001     # learning rate
num_snapshots: -1

# frozen_layers should be a list
# if you don't need to freeze, set frozen_layers to []
# fastspeech2 layers can be found on conf/fastspeech2_layers.txt
# example: frozen_layers: ["encoder", "duration_predictor"]
frozen_layers: ["encoder"]


================================================
FILE: examples/other/tts_finetune/tts3/local/check_oov.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import re
from pathlib import Path
from typing import Dict
from typing import List
from typing import Union

DICT_EN = 'tools/aligner/cmudict-0.7b'
DICT_ZH = 'tools/aligner/simple.lexicon'
MODEL_DIR_EN = 'tools/aligner/vctk_model.zip'
MODEL_DIR_ZH = 'tools/aligner/aishell3_model.zip'
MFA_PHONE_EN = 'tools/aligner/vctk_model/meta.yaml'
MFA_PHONE_ZH = 'tools/aligner/aishell3_model/meta.yaml'
MFA_PATH = 'tools/montreal-forced-aligner/bin'
os.environ['PATH'] = MFA_PATH + '/:' + os.environ['PATH']


def check_phone(label_file: Union[str, Path],
                pronunciation_phones: Dict[str, str],
                mfa_phones: List[str],
                am_phones: List[str],
                oov_record: str="./oov_info.txt",
                lang: str="zh"):
    """Check whether the phoneme corresponding to the audio text content 
    is in the phoneme list of the pretrained mfa model to ensure that the alignment is normal.
    Check whether the phoneme corresponding to the audio text content 
    is in the phoneme list of the pretrained am model to ensure finetune (normalize) is normal.

    Args:
        label_file (Union[str, Path]): label file, format: utt_id|phone seq
        pronunciation_phones (dict): pronunciation to phones map dict
        mfa_phones (list): the phone list of pretrained mfa model
        am_phones (list): the phone list of pretrained mfa model

    Returns:
        oov_words (list): oov words
        oov_files (list): utt id list that exist oov
        oov_file_words (dict): the oov file and oov phone in this file
    """
    oov_words = []
    oov_files = []
    oov_file_words = {}

    with open(label_file, "r") as f:
        for line in f.readlines():
            utt_id = line.split("|")[0]
            transcription = line.strip().split("|")[1]
            transcription = re.sub(
                r'[：、，；。？！,.:;"?!”’《》【】<=>{}()（）#&@“”^_|…\\]', '',
                transcription)
            if lang == "en":
                transcription = transcription.upper()
            flag = 0
            temp_oov_words = []
            for word in transcription.split(" "):
                if word not in pronunciation_phones.keys():
                    temp_oov_words.append(word)
                    flag = 1
                    if word not in oov_words:
                        oov_words.append(word)
                else:
                    for p in pronunciation_phones[word]:
                        if p not in mfa_phones or p not in am_phones:
                            temp_oov_words.append(word)
                            flag = 1
                            if word not in oov_words:
                                oov_words.append(word)
            if flag == 1:
                oov_files.append(utt_id)
                oov_file_words[utt_id] = temp_oov_words

    if oov_record is not None:
        with open(oov_record, "w") as fw:
            fw.write("oov_words: " + str(oov_words) + "\n")
            fw.write("oov_files: " + str(oov_files) + "\n")
            fw.write("oov_file_words: " + str(oov_file_words) + "\n")

    return oov_words, oov_files, oov_file_words


def get_pronunciation_phones(lexicon_file: Union[str, Path]):
    # pronunciation to phones
    pronunciation_phones = {}
    with open(lexicon_file, "r") as f2:
        for line in f2.readlines():
            line_list = line.strip().split(" ")
            pronunciation = line_list[0]
            if line_list[1] == '':
                phones = line_list[2:]
            else:
                phones = line_list[1:]
            pronunciation_phones[pronunciation] = phones

    return pronunciation_phones


def get_mfa_phone(mfa_phone_file: Union[str, Path]):
    # get phones from pretrained mfa model (meta.yaml)
    mfa_phones = []
    with open(mfa_phone_file, "r") as f:
        for line in f.readlines():
            if line.startswith("-"):
                phone = line.strip().split(" ")[-1]
                mfa_phones.append(phone)

    return mfa_phones


def get_am_phone(am_phone_file: Union[str, Path]):
    # get phones from pretrained am model (phone_id_map.txt)
    am_phones = []
    with open(am_phone_file, "r") as f:
        for line in f.readlines():
            phone = line.strip().split(" ")[0]
            am_phones.append(phone)

    return am_phones


def get_check_result(label_file: Union[str, Path],
                     am_phone_file: Union[str, Path],
                     input_dir: Union[str, Path],
                     newdir_name: str="newdir",
                     lang: str="zh"):
    """Check if there is any audio in the input that contains the oov word according to label_file.
       Copy audio that does not contain oov word to input_dir / newdir_name.
       Generate label file and save to input_dir / newdir_name.


    Args:
        label_file (Union[str, Path]): input audio label file, format: utt|pronunciation 
        am_phone_file (Union[str, Path]): pretrained am model phone file
        input_dir (Union[str, Path]): input dir
        newdir_name (str): directory name saved after checking oov
        lang (str): input audio language
    """

    if lang == 'en':
        lexicon_file = DICT_EN
        mfa_phone_file = MFA_PHONE_EN
    elif lang == 'zh':
        lexicon_file = DICT_ZH
        mfa_phone_file = MFA_PHONE_ZH
    else:
        print('please input right lang!!')

    pronunciation_phones = get_pronunciation_phones(lexicon_file)
    mfa_phones = get_mfa_phone(mfa_phone_file)
    am_phones = get_am_phone(am_phone_file)
    oov_words, oov_files, oov_file_words = check_phone(
        label_file=label_file,
        pronunciation_phones=pronunciation_phones,
        mfa_phones=mfa_phones,
        am_phones=am_phones,
        oov_record="./oov_info.txt",
        lang=lang)

    input_dir = Path(input_dir).expanduser()
    new_dir = input_dir / newdir_name
    new_dir.mkdir(parents=True, exist_ok=True)
    with open(label_file, "r") as f:
        for line in f.readlines():
            utt_id = line.split("|")[0]
            if utt_id not in oov_files:
                transcription = line.split("|")[1].strip()
                wav_file = str(input_dir) + "/" + utt_id + ".wav"
                new_wav_file = str(new_dir) + "/" + utt_id + ".wav"
                os.system("cp %s %s" % (wav_file, new_wav_file))
                single_file = str(new_dir) + "/" + utt_id + ".txt"
                with open(single_file, "w") as fw:
                    fw.write(transcription)


if __name__ == '__main__':
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--input_dir",
        type=str,
        default="./input/csmsc_mini",
        help="directory containing audio and label file")

    parser.add_argument(
        "--pretrained_model_dir",
        type=str,
        default="./pretrained_models/fastspeech2_aishell3_ckpt_1.1.0",
        help="Path to pretrained model")

    parser.add_argument(
        "--newdir_name",
        type=str,
        default="newdir",
        help="directory name saved after checking oov")

    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        choices=['zh', 'en'],
        help='Choose input audio language. zh or en')

    args = parser.parse_args()

    # if args.lang == 'en':
    #     lexicon_file = DICT_EN
    #     mfa_phone_file = MFA_PHONE_EN
    # elif args.lang == 'zh':
    #     lexicon_file = DICT_ZH
    #     mfa_phone_file = MFA_PHONE_ZH
    # else:
    #     print('please input right lang!!')
    assert args.lang == "zh" or args.lang == "en", "please input right lang! zh or en"

    input_dir = Path(args.input_dir).expanduser()
    pretrained_model_dir = Path(args.pretrained_model_dir).expanduser()
    am_phone_file = pretrained_model_dir / "phone_id_map.txt"
    label_file = input_dir / "labels.txt"

    get_check_result(
        label_file=label_file,
        am_phone_file=am_phone_file,
        input_dir=input_dir,
        newdir_name=args.newdir_name,
        lang=args.lang)


================================================
FILE: examples/other/tts_finetune/tts3/local/extract_feature.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
from operator import itemgetter
from pathlib import Path
from typing import Dict
from typing import Union

import jsonlines
import numpy as np
import yaml
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.datasets.get_feats import Energy
from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.datasets.get_feats import Pitch
from paddlespeech.t2s.datasets.preprocess_utils import get_phn_dur
from paddlespeech.t2s.datasets.preprocess_utils import merge_silence
from paddlespeech.t2s.exps.fastspeech2.preprocess import process_sentences


def read_stats(stats_file: Union[str, Path]):
    scaler = StandardScaler()
    scaler.mean_ = np.load(stats_file)[0]
    scaler.scale_ = np.load(stats_file)[1]
    scaler.n_features_in_ = scaler.mean_.shape[0]
    return scaler


def get_stats(pretrained_model_dir: Path):
    speech_stats_file = pretrained_model_dir / "speech_stats.npy"
    pitch_stats_file = pretrained_model_dir / "pitch_stats.npy"
    energy_stats_file = pretrained_model_dir / "energy_stats.npy"
    speech_scaler = read_stats(speech_stats_file)
    pitch_scaler = read_stats(pitch_stats_file)
    energy_scaler = read_stats(energy_stats_file)

    return speech_scaler, pitch_scaler, energy_scaler


def get_map(duration_file: Union[str, Path],
            dump_dir: Path,
            pretrained_model_dir: Path,
            replace_spkid: int=0):
    """get phone map and speaker map, save on dump_dir

    Args:
        duration_file (str): durantions.txt
        dump_dir (Path): dump dir
        pretrained_model_dir (Path): pretrained model dir
        replace_spkid (int): replace spk id 
    """
    # copy phone map file from pretrained model path
    phones_dict = dump_dir / "phone_id_map.txt"
    os.system("cp %s %s" %
              (pretrained_model_dir / "phone_id_map.txt", phones_dict))

    # create a new speaker map file, replace the previous speakers.
    sentences, speaker_set = get_phn_dur(duration_file)
    merge_silence(sentences)
    speakers = sorted(list(speaker_set))
    num = len(speakers)
    speaker_dict = dump_dir / "speaker_id_map.txt"
    spk_dict = {}
    # get raw spkid-spk dict 
    with open(pretrained_model_dir / "speaker_id_map.txt", 'r') as fr:
        for line in fr.readlines():
            spk = line.strip().split(" ")[0]
            spk_id = line.strip().split(" ")[1]
            spk_dict[spk_id] = spk

    # replace spk on spkid-spk dict
    assert replace_spkid + num - 1 < len(
        spk_dict), "Please set correct replace spk id."
    for i, spk in enumerate(speakers):
        spk_dict[str(replace_spkid + i)] = spk

    # write a new spk map file
    with open(speaker_dict, 'w') as f:
        for spk_id in spk_dict.keys():
            f.write(spk_dict[spk_id] + ' ' + spk_id + '\n')

    vocab_phones = {}
    with open(phones_dict, 'rt') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    for phn, id in phn_id:
        vocab_phones[phn] = int(id)

    vocab_speaker = {}
    with open(speaker_dict, 'rt') as f:
        spk_id = [line.strip().split() for line in f.readlines()]
    for spk, id in spk_id:
        vocab_speaker[spk] = int(id)

    return sentences, vocab_phones, vocab_speaker


def get_extractor(config):
    # Extractor
    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax)
    pitch_extractor = Pitch(
        sr=config.fs,
        hop_length=config.n_shift,
        f0min=config.f0min,
        f0max=config.f0max)
    energy_extractor = Energy(
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window)

    return mel_extractor, pitch_extractor, energy_extractor


def normalize(speech_scaler,
              pitch_scaler,
              energy_scaler,
              vocab_phones: Dict,
              vocab_speaker: Dict,
              raw_dump_dir: Path,
              type: str):

    dumpdir = raw_dump_dir / type / "norm"
    dumpdir = Path(dumpdir).expanduser()
    dumpdir.mkdir(parents=True, exist_ok=True)

    # get dataset
    metadata_file = raw_dump_dir / type / "raw" / "metadata.jsonl"
    with jsonlines.open(metadata_file, 'r') as reader:
        metadata = list(reader)
    dataset = DataTable(
        metadata,
        converters={
            "speech": np.load,
            "pitch": np.load,
            "energy": np.load,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    # process each file
    output_metadata = []

    for item in tqdm(dataset):
        utt_id = item['utt_id']
        speech = item['speech']
        pitch = item['pitch']
        energy = item['energy']
        # normalize
        speech = speech_scaler.transform(speech)
        speech_dir = dumpdir / "data_speech"
        speech_dir.mkdir(parents=True, exist_ok=True)
        speech_path = speech_dir / f"{utt_id}_speech.npy"
        np.save(speech_path, speech.astype(np.float32), allow_pickle=False)

        pitch = pitch_scaler.transform(pitch)
        pitch_dir = dumpdir / "data_pitch"
        pitch_dir.mkdir(parents=True, exist_ok=True)
        pitch_path = pitch_dir / f"{utt_id}_pitch.npy"
        np.save(pitch_path, pitch.astype(np.float32), allow_pickle=False)

        energy = energy_scaler.transform(energy)
        energy_dir = dumpdir / "data_energy"
        energy_dir.mkdir(parents=True, exist_ok=True)
        energy_path = energy_dir / f"{utt_id}_energy.npy"
        np.save(energy_path, energy.astype(np.float32), allow_pickle=False)

        phone_ids = [vocab_phones[p] for p in item['phones']]
        spk_id = vocab_speaker[item["speaker"]]
        record = {
            "utt_id": item['utt_id'],
            "spk_id": spk_id,
            "text": phone_ids,
            "text_lengths": item['text_lengths'],
            "speech_lengths": item['speech_lengths'],
            "durations": item['durations'],
            "speech": str(speech_path),
            "pitch": str(pitch_path),
            "energy": str(energy_path)
        }
        # add spk_emb for voice cloning
        if "spk_emb" in item:
            record["spk_emb"] = str(item["spk_emb"])

        output_metadata.append(record)
    output_metadata.sort(key=itemgetter('utt_id'))
    output_metadata_path = Path(dumpdir) / "metadata.jsonl"
    with jsonlines.open(output_metadata_path, 'w') as writer:
        for item in output_metadata:
            writer.write(item)
    logging.info(f"metadata dumped into {output_metadata_path}")


def extract_feature(duration_file: str,
                    config,
                    input_dir: Path,
                    dump_dir: Path,
                    pretrained_model_dir: Path,
                    replace_spkid: int=0):

    sentences, vocab_phones, vocab_speaker = get_map(
        duration_file, dump_dir, pretrained_model_dir, replace_spkid)
    mel_extractor, pitch_extractor, energy_extractor = get_extractor(config)

    wav_files = sorted(list((input_dir).rglob("*.wav")))
    # split data into 3 sections, train: len(wav_files) - 2, dev: 1, test: 1
    num_train = len(wav_files) - 2
    num_dev = 1
    print(num_train, num_dev)

    train_wav_files = wav_files[:num_train]
    dev_wav_files = wav_files[num_train:num_train + num_dev]
    test_wav_files = wav_files[num_train + num_dev:]

    train_dump_dir = dump_dir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dump_dir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dump_dir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    # process for the 3 sections
    num_cpu = 4
    cut_sil = True
    spk_emb_dir = None
    write_metadata_method = "w"
    speech_scaler, pitch_scaler, energy_scaler = get_stats(pretrained_model_dir)

    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            sentences=sentences,
            output_dir=train_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=num_cpu,
            cut_sil=cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=write_metadata_method)
        # norm
        normalize(speech_scaler, pitch_scaler, energy_scaler, vocab_phones,
                  vocab_speaker, dump_dir, "train")

    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            sentences=sentences,
            output_dir=dev_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=num_cpu,
            cut_sil=cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=write_metadata_method)
        # norm
        normalize(speech_scaler, pitch_scaler, energy_scaler, vocab_phones,
                  vocab_speaker, dump_dir, "dev")

    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            sentences=sentences,
            output_dir=test_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=num_cpu,
            cut_sil=cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=write_metadata_method)

        # norm
        normalize(speech_scaler, pitch_scaler, energy_scaler, vocab_phones,
                  vocab_speaker, dump_dir, "test")


if __name__ == '__main__':
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--duration_file",
        type=str,
        default="./durations.txt",
        help="duration file")

    parser.add_argument(
        "--input_dir",
        type=str,
        default="./input/baker_mini/newdir",
        help="directory containing audio and label file")

    parser.add_argument(
        "--dump_dir", type=str, default="./dump", help="dump dir")

    parser.add_argument(
        "--pretrained_model_dir",
        type=str,
        default="./pretrained_models/fastspeech2_aishell3_ckpt_1.1.0",
        help="Path to pretrained model")

    parser.add_argument(
        "--replace_spkid", type=int, default=0, help="replace spk id")

    args = parser.parse_args()

    input_dir = Path(args.input_dir).expanduser()
    dump_dir = Path(args.dump_dir).expanduser()
    dump_dir.mkdir(parents=True, exist_ok=True)
    pretrained_model_dir = Path(args.pretrained_model_dir).expanduser()

    # read config
    config_file = pretrained_model_dir / "default.yaml"
    with open(config_file) as f:
        config = CfgNode(yaml.safe_load(f))

    extract_feature(
        duration_file=args.duration_file,
        config=config,
        input_dir=input_dir,
        dump_dir=dump_dir,
        pretrained_model_dir=pretrained_model_dir,
        replace_spkid=args.replace_spkid)


================================================
FILE: examples/other/tts_finetune/tts3/local/finetune.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path
from typing import List

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import fastspeech2_multi_spk_batch_fn
from paddlespeech.t2s.datasets.am_batch_fn import fastspeech2_single_spk_batch_fn
from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Evaluator
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Updater
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.optimizer import build_optimizers
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer


class TrainArgs():
    def __init__(self,
                 ngpu,
                 config_file,
                 dump_dir: Path,
                 output_dir: Path,
                 frozen_layers: List[str]):
        # config: fastspeech2 config file.
        self.config = str(config_file)
        self.train_metadata = str(dump_dir / "train/norm/metadata.jsonl")
        self.dev_metadata = str(dump_dir / "dev/norm/metadata.jsonl")
        # model output dir.
        self.output_dir = str(output_dir)
        self.ngpu = ngpu
        self.phones_dict = str(dump_dir / "phone_id_map.txt")
        self.speaker_dict = str(dump_dir / "speaker_id_map.txt")
        self.voice_cloning = False
        # frozen layers
        self.frozen_layers = frozen_layers


def freeze_layer(model, layers: List[str]):
    """freeze layers

    Args:
        layers (List[str]): frozen layers
    """
    for layer in layers:
        for param in eval("model." + layer + ".parameters()"):
            param.trainable = False


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
    world_size = paddle.distributed.get_world_size()
    if world_size > 1:
        paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )
    fields = [
        "text", "text_lengths", "speech", "speech_lengths", "durations",
        "pitch", "energy"
    ]
    converters = {"speech": np.load, "pitch": np.load, "energy": np.load}
    spk_num = None
    if args.speaker_dict is not None:
        print("multiple speaker fastspeech2!")
        collate_fn = fastspeech2_multi_spk_batch_fn
        with open(args.speaker_dict, 'rt') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
        fields += ["spk_id"]
    elif args.voice_cloning:
        print("Training voice cloning!")
        collate_fn = fastspeech2_multi_spk_batch_fn
        fields += ["spk_emb"]
        converters["spk_emb"] = np.load
    else:
        print("single speaker fastspeech2!")
        collate_fn = fastspeech2_single_spk_batch_fn
    print("spk_num:", spk_num)

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=fields,
        converters=converters, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=fields,
        converters=converters, )

    # collate function and dataloader
    train_batch_size = min(len(train_metadata), config.batch_size)
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=train_batch_size,
        shuffle=True,
        drop_last=True)

    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        shuffle=False,
        drop_last=False,
        batch_size=config.batch_size,
        collate_fn=collate_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_mels
    model = FastSpeech2(
        idim=vocab_size, odim=odim, spk_num=spk_num, **config["model"])

    # freeze layer
    if args.frozen_layers != []:
        freeze_layer(model, args.frozen_layers)

    if world_size > 1:
        model = DataParallel(model)
    print("model done!")

    optimizer = build_optimizers(model, **config["optimizer"])
    print("optimizer done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = FastSpeech2Updater(
        model=model,
        optimizer=optimizer,
        dataloader=train_dataloader,
        output_dir=output_dir,
        **config["updater"])

    trainer = Trainer(updater, (config.max_epoch, 'epoch'), output_dir)

    evaluator = FastSpeech2Evaluator(
        model, dev_dataloader, output_dir=output_dir, **config["updater"])

    if dist.get_rank() == 0:
        trainer.extend(evaluator, trigger=(1, "epoch"))
        trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots), trigger=(1, 'epoch'))
    trainer.run()


if __name__ == '__main__':
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--pretrained_model_dir",
        type=str,
        default="./pretrained_models/fastspeech2_aishell3_ckpt_1.1.0",
        help="Path to pretrained model")

    parser.add_argument(
        "--dump_dir",
        type=str,
        default="./dump",
        help="directory to save feature files and metadata.")

    parser.add_argument(
        "--output_dir",
        type=str,
        default="./exp/default/",
        help="directory to save finetune model.")

    parser.add_argument(
        "--ngpu", type=int, default=2, help="if ngpu=0, use cpu.")

    parser.add_argument("--epoch", type=int, default=100, help="finetune epoch")
    parser.add_argument(
        "--finetune_config",
        type=str,
        default="./finetune.yaml",
        help="Path to finetune config file")

    args = parser.parse_args()

    dump_dir = Path(args.dump_dir).expanduser()
    dump_dir.mkdir(parents=True, exist_ok=True)
    output_dir = Path(args.output_dir).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    pretrained_model_dir = Path(args.pretrained_model_dir).expanduser()

    # read config
    config_file = pretrained_model_dir / "default.yaml"
    with open(config_file) as f:
        config = CfgNode(yaml.safe_load(f))
    config.max_epoch = config.max_epoch + args.epoch

    with open(args.finetune_config) as f2:
        finetune_config = CfgNode(yaml.safe_load(f2))
    config.batch_size = finetune_config.batch_size if finetune_config.batch_size > 0 else config.batch_size
    config.optimizer.learning_rate = finetune_config.learning_rate if finetune_config.learning_rate > 0 else config.optimizer.learning_rate
    config.num_snapshots = finetune_config.num_snapshots if finetune_config.num_snapshots > 0 else config.num_snapshots
    frozen_layers = finetune_config.frozen_layers
    assert type(frozen_layers) == list, "frozen_layers should be set a list."

    # create a new args for training
    train_args = TrainArgs(args.ngpu, config_file, dump_dir, output_dir,
                           frozen_layers)

    # finetune models
    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (train_args, config), nprocs=args.ngpu)
    else:
        train_sp(train_args, config)


================================================
FILE: examples/other/tts_finetune/tts3/local/generate_duration.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

from utils.gen_duration_from_textgrid import gen_duration_from_textgrid

if __name__ == '__main__':
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--mfa_dir",
        type=str,
        default="./mfa_result",
        help="directory to save aligned files")

    args = parser.parse_args()

    fs = 24000
    n_shift = 300
    duration_file = "./durations.txt"
    mfa_dir = Path(args.mfa_dir).expanduser()
    mfa_dir.mkdir(parents=True, exist_ok=True)

    gen_duration_from_textgrid(mfa_dir, duration_file, fs, n_shift)


================================================
FILE: examples/other/tts_finetune/tts3/local/get_mfa_result.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from pathlib import Path
from typing import Union

DICT_EN = 'tools/aligner/cmudict-0.7b'
DICT_ZH = 'tools/aligner/simple.lexicon'
MODEL_DIR_EN = 'tools/aligner/vctk_model.zip'
MODEL_DIR_ZH = 'tools/aligner/aishell3_model.zip'
MFA_PHONE_EN = 'tools/aligner/vctk_model/meta.yaml'
MFA_PHONE_ZH = 'tools/aligner/aishell3_model/meta.yaml'
MFA_PATH = 'tools/montreal-forced-aligner/bin'
os.environ['PATH'] = MFA_PATH + '/:' + os.environ['PATH']


def get_mfa_result(
        input_dir: Union[str, Path],
        mfa_dir: Union[str, Path],
        lang: str='en', ):
    """get mfa result

    Args:
        input_dir (Union[str, Path]): input dir including wav file and label
        mfa_dir (Union[str, Path]): mfa result dir
        lang (str, optional): input audio language. Defaults to 'en'.
    """
    # MFA
    if lang == 'en':
        DICT = DICT_EN
        MODEL_DIR = MODEL_DIR_EN

    elif lang == 'zh':
        DICT = DICT_ZH
        MODEL_DIR = MODEL_DIR_ZH
    else:
        print('please input right lang!!')

    CMD = 'mfa_align' + ' ' + str(
        input_dir) + ' ' + DICT + ' ' + MODEL_DIR + ' ' + str(mfa_dir)
    os.system(CMD)


if __name__ == '__main__':
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--input_dir",
        type=str,
        default="./input/baker_mini/newdir",
        help="directory containing audio and label file")

    parser.add_argument(
        "--mfa_dir",
        type=str,
        default="./mfa_result",
        help="directory to save aligned files")

    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        choices=['zh', 'en'],
        help='Choose input audio language. zh or en')

    args = parser.parse_args()

    get_mfa_result(
        input_dir=args.input_dir, mfa_dir=args.mfa_dir, lang=args.lang)


================================================
FILE: examples/other/tts_finetune/tts3/local/prepare_env.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from pathlib import Path


def generate_finetune_env(output_dir: Path, pretrained_model_dir: Path):

    output_dir = output_dir / "checkpoints/"
    output_dir = output_dir.resolve()
    output_dir.mkdir(parents=True, exist_ok=True)

    model_path = sorted(list((pretrained_model_dir).rglob("*.pdz")))[0]
    model_path = model_path.resolve()
    iter = int(str(model_path).split("_")[-1].split(".")[0])
    model_file = str(model_path).split("/")[-1]

    os.system("cp %s %s" % (model_path, output_dir))

    records_file = output_dir / "records.jsonl"
    with open(records_file, "w") as f:
        line = "\"time\": \"2022-08-06 07:51:53.463650\", \"path\": \"%s\", \"iteration\": %d" % (
            str(output_dir / model_file), iter)
        f.write("{" + line + "}" + "\n")


if __name__ == '__main__':
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--pretrained_model_dir",
        type=str,
        default="./pretrained_models/fastspeech2_aishell3_ckpt_1.1.0",
        help="Path to pretrained model")

    parser.add_argument(
        "--output_dir",
        type=str,
        default="./exp/default/",
        help="directory to save finetune model.")

    args = parser.parse_args()

    output_dir = Path(args.output_dir).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    pretrained_model_dir = Path(args.pretrained_model_dir).expanduser()

    generate_finetune_env(output_dir, pretrained_model_dir)


================================================
FILE: examples/other/tts_finetune/tts3/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=fastspeech2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}


================================================
FILE: examples/other/tts_finetune/tts3/run.sh
================================================
#!/bin/bash

set -e
source path.sh


input_dir=./input/csmsc_mini
newdir_name="newdir"
new_dir=${input_dir}/${newdir_name}
pretrained_model_dir=./pretrained_models/fastspeech2_aishell3_ckpt_1.1.0
mfa_dir=./mfa_result
dump_dir=./dump
output_dir=./exp/default
lang=zh
ngpu=1
finetune_config=./conf/finetune.yaml
replace_spkid=0

ckpt=snapshot_iter_96699

gpus=1
CUDA_VISIBLE_DEVICES=${gpus}
stage=0
stop_stage=100


# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

# check oov
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    echo "check oov"
    python3 local/check_oov.py \
        --input_dir=${input_dir} \
        --pretrained_model_dir=${pretrained_model_dir} \
        --newdir_name=${newdir_name} \
        --lang=${lang}
fi

# get mfa result
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "get mfa result"
    python3 local/get_mfa_result.py \
        --input_dir=${new_dir} \
        --mfa_dir=${mfa_dir} \
        --lang=${lang}
fi

# generate durations.txt
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "generate durations.txt"
    python3 local/generate_duration.py \
        --mfa_dir=${mfa_dir} 
fi

# extract feature
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "extract feature"
    python3 local/extract_feature.py \
        --duration_file="./durations.txt" \
        --input_dir=${new_dir} \
        --dump_dir=${dump_dir} \
        --pretrained_model_dir=${pretrained_model_dir} \
        --replace_spkid=$replace_spkid
fi

# create finetune env
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "create finetune env"
    python3 local/prepare_env.py \
        --pretrained_model_dir=${pretrained_model_dir} \
        --output_dir=${output_dir}
fi

# finetune
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    echo "finetune..."
    python3 local/finetune.py \
        --pretrained_model_dir=${pretrained_model_dir} \
        --dump_dir=${dump_dir} \
        --output_dir=${output_dir} \
        --ngpu=${ngpu} \
        --epoch=100 \
        --finetune_config=${finetune_config}
fi

# synthesize e2e
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    echo "in hifigan syn_e2e"
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_aishell3 \
        --am_config=${pretrained_model_dir}/default.yaml \
        --am_ckpt=${output_dir}/checkpoints/${ckpt}.pdz \
        --am_stat=${pretrained_model_dir}/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=pretrained_models/hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=pretrained_models/hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=pretrained_models/hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --lang=zh \
        --text=${BIN_DIR}/../../assets/sentences.txt \
        --output_dir=./test_e2e/ \
        --phones_dict=${dump_dir}/phone_id_map.txt \
        --speaker_dict=${dump_dir}/speaker_id_map.txt \
        --spk_id=$replace_spkid
fi


================================================
FILE: examples/other/tts_finetune/tts3/run_en.sh
================================================
#!/bin/bash

set -e
source path.sh

input_dir=./input/ljspeech_mini
newdir_name="newdir"
new_dir=${input_dir}/${newdir_name}
pretrained_model_dir=./pretrained_models/fastspeech2_vctk_ckpt_1.2.0
mfa_dir=./mfa_result
dump_dir=./dump
output_dir=./exp/default
lang=en
ngpu=1
finetune_config=./conf/finetune.yaml
replace_spkid=0

ckpt=snapshot_iter_66300

gpus=1
CUDA_VISIBLE_DEVICES=${gpus}
stage=0
stop_stage=100


# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

# check oov
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    echo "check oov"
    python3 local/check_oov.py \
        --input_dir=${input_dir} \
        --pretrained_model_dir=${pretrained_model_dir} \
        --newdir_name=${newdir_name} \
        --lang=${lang}
fi

# get mfa result
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "get mfa result"
    python3 local/get_mfa_result.py \
        --input_dir=${new_dir} \
        --mfa_dir=${mfa_dir} \
        --lang=${lang}
fi

# generate durations.txt
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "generate durations.txt"
    python3 local/generate_duration.py \
        --mfa_dir=${mfa_dir} 
fi

# extract feature
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "extract feature"
    python3 local/extract_feature.py \
        --duration_file="./durations.txt" \
        --input_dir=${new_dir} \
        --dump_dir=${dump_dir} \
        --pretrained_model_dir=${pretrained_model_dir} \
        --replace_spkid=$replace_spkid
fi

# create finetune env
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "create finetune env"
    python3 local/prepare_env.py \
        --pretrained_model_dir=${pretrained_model_dir} \
        --output_dir=${output_dir}
fi

# finetune
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    echo "finetune..."
    python3 local/finetune.py \
        --pretrained_model_dir=${pretrained_model_dir} \
        --dump_dir=${dump_dir} \
        --output_dir=${output_dir} \
        --ngpu=${ngpu} \
        --epoch=100 \
        --finetune_config=${finetune_config}
fi

# synthesize e2e
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    echo "in hifigan syn_e2e"
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_vctk \
        --am_config=${pretrained_model_dir}/default.yaml \
        --am_ckpt=${output_dir}/checkpoints/${ckpt}.pdz \
        --am_stat=${pretrained_model_dir}/speech_stats.npy \
        --voc=hifigan_vctk \
        --voc_config=pretrained_models/hifigan_vctk_ckpt_0.2.0/default.yaml \
        --voc_ckpt=pretrained_models/hifigan_vctk_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=pretrained_models/hifigan_vctk_ckpt_0.2.0/feats_stats.npy \
        --lang=en \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=./test_e2e/ \
        --phones_dict=${dump_dir}/phone_id_map.txt \
        --speaker_dict=${dump_dir}/speaker_id_map.txt \
        --spk_id=$replace_spkid
fi


================================================
FILE: examples/other/tts_finetune/tts3/run_mix.sh
================================================
#!/bin/bash

set -e
source path.sh


input_dir=./input/SSB0005_mini
newdir_name="newdir"
new_dir=${input_dir}/${newdir_name}
pretrained_model_dir=./pretrained_models/fastspeech2_mix_ckpt_1.2.0
mfa_dir=./mfa_result
dump_dir=./dump
output_dir=./exp/default
lang=zh
ngpu=1
finetune_config=./conf/finetune.yaml
replace_spkid=174  # csmsc: 174, ljspeech: 175, aishell3: 0~173, vctk: 176

ckpt=snapshot_iter_99300

gpus=1
CUDA_VISIBLE_DEVICES=${gpus}
stage=0
stop_stage=100


# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

# check oov
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    echo "check oov"
    python3 local/check_oov.py \
        --input_dir=${input_dir} \
        --pretrained_model_dir=${pretrained_model_dir} \
        --newdir_name=${newdir_name} \
        --lang=${lang}
fi

# get mfa result
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "get mfa result"
    python3 local/get_mfa_result.py \
        --input_dir=${new_dir} \
        --mfa_dir=${mfa_dir} \
        --lang=${lang}
fi

# generate durations.txt
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "generate durations.txt"
    python3 local/generate_duration.py \
        --mfa_dir=${mfa_dir} 
fi

# extract feature
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "extract feature"
    python3 local/extract_feature.py \
        --duration_file="./durations.txt" \
        --input_dir=${new_dir} \
        --dump_dir=${dump_dir} \
        --pretrained_model_dir=${pretrained_model_dir} \
        --replace_spkid=$replace_spkid

fi

# create finetune env
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    echo "create finetune env"
    python3 local/prepare_env.py \
        --pretrained_model_dir=${pretrained_model_dir} \
        --output_dir=${output_dir}
fi

# finetune
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    echo "finetune..."
    python3 local/finetune.py \
        --pretrained_model_dir=${pretrained_model_dir} \
        --dump_dir=${dump_dir} \
        --output_dir=${output_dir} \
        --ngpu=${ngpu} \
        --epoch=100 \
        --finetune_config=${finetune_config}
fi

# synthesize e2e
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    echo "in hifigan syn_e2e"
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_aishell3 \
        --am_config=${pretrained_model_dir}/default.yaml \
        --am_ckpt=${output_dir}/checkpoints/${ckpt}.pdz \
        --am_stat=${pretrained_model_dir}/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=pretrained_models/hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=pretrained_models/hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=pretrained_models/hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --lang=mix \
        --text=${BIN_DIR}/../../assets/sentences_mix.txt \
        --output_dir=./test_e2e/ \
        --phones_dict=${dump_dir}/phone_id_map.txt \
        --speaker_dict=${dump_dir}/speaker_id_map.txt \
        --spk_id=$replace_spkid
fi


================================================
FILE: examples/tal_cs/asr1/README.md
================================================
# Transformer/Conformer ASR with TALCS
This example contains code used to train [u2](https://arxiv.org/pdf/2012.05481.pdf) model (Transformer or [Conformer](https://arxiv.org/pdf/2005.08100.pdf) model) with [TALCS dataset](https://ai.100tal.com/dataset)
## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset<br>       (5) Get the sentencepiece model |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Get ctc alignment of test data using the final model         |
| 5     | Infer the single audio file                                  |

You can choose to run a range of stages by setting `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in `run.sh` in detail.
## The Environment Variables
The path.sh contains the environment variables. 
```bash
. ./path.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of stages you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`audio file` denotes the file path of the single file you want to infer in stage 5
`ckpt` denotes the checkpoint prefix of the model, e.g. "conformer"

You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line:
```bash
bash run.sh --gpus 0,1 --avg_num 10
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in `run.sh` to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
. ./path.sh
bash ./local/data.sh
```
After processing the data, the `data` directory will look like this:
```bash
data/
|-- dev_set.meta
|-- lang_char
|   `-- bpe_bpe_11297.model
|   `-- bpe_bpe_11297.vocab
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test_set.meta
`-- train_set.meta
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
```
## Stage 2: Top-k Models Averaging
After training the model, we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model. We can use stage 2 to do this, and the code is shown below:
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).

```bash
. ./path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
avg.sh best exp/conformer/checkpoints 10
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/conformer.yaml conformer
avg.sh best exp/conformer/checkpoints 10
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/conformer.yaml conf/tuning/decode.yaml exp/conformer/checkpoints/avg_10
```
## Pretrained Model
You can get the pretrained transformer or conformer from [this](../../../docs/source/released_model.md).

using the `tar` scripts to unpack the model and then you can use the script to test the model.

For example:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz
tar xzvf asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz
source path.sh
# If you have process the data and get the manifest file， you can skip the following 2 steps
bash local/data.sh --stage -1 --stop_stage -1
bash local/data.sh --stage 2 --stop_stage 2
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/conformer.yaml conf/tuning/decode.yaml exp/conformer/checkpoints/avg_10
```
The performance of the released models are shown in [here](./RESULTS.md).

## Stage 5: Single Audio File Inference
In some situations, you want to use the trained model to do the inference for the single audio file. You can use stage 5. The code is shown below
```bash
 if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
     # test a single .wav file
     CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
 fi
```
you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below:
```bash
wget https://paddlespeech.cdn.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz
tar xzvf asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz
```
You can download the audio demo:
```bash
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
```
You need to prepare an audio file or use the audio demo above, please confirm the sample rate of the audio is 16K. You can get the result of the audio demo by running the script below.
```bash
CUDA_VISIBLE_DEVICES= ./local/test_wav.sh conf/conformer.yaml conf/tuning/decode.yaml exp/conformer/checkpoints/avg_10 data/demo_01_03.wav
```


================================================
FILE: examples/tal_cs/asr1/RESULTS.md
================================================
# TALCS
2023.1.6, commit id: fa724285f3b799b97b4348ad3b1084afc0764f9b (conformer)
2025.8.11, commit id: 4f62ff05b7c9974d5642b26306ff3c7140c84312 (chunk_conformer)

## Conformer
train: Epoch 100, 3 V100-32G, best avg: 10

| Model | Params | Config | Augmentation| Test set | Decode method | Loss | MER |  
| --- | --- | --- | --- | --- | --- | --- | --- |
| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-set | attention | 9.85091028213501 | 0.102786 |  
| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-set | ctc_greedy_search | 9.85091028213501 | 0.103538 |  
| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-set | ctc_prefix_beam_search | 9.85091028213501 | 0.103317 |  
| conformer | 47.63 M | conf/conformer.yaml | spec_aug | test-set | attention_rescoring | 9.85091028213501 | 0.084374 | 
| chunk_conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug | test-set | attention | 9.897139549255371 | 0.080488 |
| chunk_conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug | test-set | ctc_greedy_search | 9.897139549255371 | 0.093244 |
| chunk_conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug | test-set | ctc_prefix_beam_search | 9.897139549255371 | 0.093251 |
| chunk_conformer | 47.63 M | conf/chunk_conformer.yaml | spec_aug | test-set | attention_rescoring | 9.897139549255371 | 0.079193 | 


================================================
FILE: examples/tal_cs/asr1/conf/chunk_conformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 512    # dimension of attention
    attention_heads: 8
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1   # sublayer output dropout
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    cnn_module_kernel: 15
    use_cnn_module: True
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    selfattention_layer_type: 'rel_selfattn'
    causal: true
    use_dynamic_chunk: true
    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
    use_dynamic_left_chunk: false
# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 8
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1  # sublayer output dropout
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0
# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false
    init_type: 'kaiming_uniform' # !Warning: need to convergence

###########################################
#                   Data                  #
###########################################

train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test


###########################################
#              Dataloader                 #
###########################################

vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: 'data/lang_char/bpe_bpe_11297'
unit_type: 'spm'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 20.0
window_ms: 30.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 2
subsampling_factor: 1
num_encs: 1

###########################################
#                 Training                #
###########################################
n_epoch: 100 
accum_grad: 4
global_grad_clip: 5.0
dist_sampler: False
optim: adam
optim_conf:
  lr: 0.002
  weight_decay: 1.0e-6
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/tal_cs/asr1/conf/conformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 512    # dimension of attention
    attention_heads: 8
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    cnn_module_kernel: 15
    use_cnn_module: True
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    selfattention_layer_type: 'rel_selfattn'

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 8
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false
    init_type: 'kaiming_uniform' # !Warning: need to convergence

###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: 'data/lang_char/bpe_bpe_11297'
unit_type: 'spm'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 20.0
window_ms: 30.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 5
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 2
subsampling_factor: 1
num_encs: 1

###########################################
#                Training                 #
###########################################
n_epoch: 100 
accum_grad: 4
global_grad_clip: 5.0
dist_sampler: False
optim: adam
optim_conf:
  lr: 0.002
  weight_decay: 1.0e-6
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/tal_cs/asr1/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 16000
    n_mels: 80
    n_shift: 160
    win_length: 400
    dither: 1.0
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/tal_cs/asr1/conf/tuning/chunk_decode.yaml
================================================
beam_size: 10
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
reverse_weight: 0.3 # reverse weight for attention rescoring decode mode.
decoding_chunk_size: 16 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: True  # simulate streaming inference. Defaults to False.
decode_batch_size: 128
error_rate_type: cer 


================================================
FILE: examples/tal_cs/asr1/conf/tuning/decode.yaml
================================================
beam_size: 10
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
#reverse_weight: 0.3 # reverse weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.
decode_batch_size: 1
error_rate_type: cer 


================================================
FILE: examples/tal_cs/asr1/local/data.sh
================================================
#!/bin/bash
stage=-1
stop_stage=100
dict_dir=data/lang_char

# bpemode (unigram or bpe)
nbpe=11297
bpemode=bpe
bpeprefix="${dict_dir}/bpe_${bpemode}_${nbpe}"

stride_ms=20
window_ms=30
sample_rate=16000
feat_dim=80

source ${MAIN_ROOT}/utils/parse_options.sh


mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

#prepare data
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    if [ ! -d "${MAIN_ROOT}/dataset/tal_cs/TALCS_corpus" ]; then
        echo "${MAIN_ROOT}/dataset/tal_cs/TALCS_corpus does not exist. Please download tal_cs data and unpack it from https://ai.100tal.com/dataset first."
        echo "data md5 reference: 4c879b3c9c05365fc9dee1fc68713afe"
        exit
    fi
    # create manifest json file from TALCS_corpus
    python ${MAIN_ROOT}/dataset/tal_cs/tal_cs.py --target_dir ${MAIN_ROOT}/dataset/tal_cs/TALCS_corpus/ --manifest_prefix data/
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --num_samples=-1 \
    --spectrum_type="fbank" \
    --feat_dim=${feat_dim}  \
    --delta_delta=false \
    --sample_rate=${sample_rate} \
    --stride_ms=${stride_ms} \
    --window_ms=${window_ms} \
    --use_dB_normalization=False \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"
    echo "compute mean and stddev done."
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    #use train_set build dict
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type 'spm' \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt"  \
    --manifest_paths="data/manifest.train.raw"  \
    --spm_mode=${bpemode} \
    --spm_vocab_size=${nbpe}  \
    --spm_model_prefix=${bpeprefix} \
    --spm_character_coverage=1 
    echo "build dict done."
fi

#use new dict format data
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for sub in train dev test ; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
        --cmvn_path "data/mean_std.json" \
        --unit_type "spm" \
        --spm_model_prefix ${bpeprefix} \
        --vocab_path="${dict_dir}/vocab.txt" \
        --manifest_path="data/manifest.${sub}.raw" \
        --output_path="data/manifest.${sub}"

        if [ $? -ne 0 ]; then
            echo "Formt mnaifest failed. Terminated."
            exit 1
        fi
    }&
    done
    wait
    echo "format data done."
fi


================================================
FILE: examples/tal_cs/asr1/local/test.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi


for type in attention ctc_greedy_search; do
    echo "decoding ${type}"
    if [ ${chunk_mode} == true ];then
        # stream decoding only support batchsize=1
        batch_size=1
    else
        batch_size=64
    fi
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

for type in ctc_prefix_beam_search attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

exit 0


================================================
FILE: examples/tal_cs/asr1/local/test_wav.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_file=$4

mkdir -p data
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_file} ]; then
    echo "Plase input the right audio_file path"
    exit 1
fi

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

for type in  attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test_wav.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size} \
    --audio_file ${audio_file}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done
exit 0


================================================
FILE: examples/tal_cs/asr1/local/train.sh
================================================
#!/bin/bash

profiler_options=
benchmark_batch_size=0
benchmark_max_step=0

# seed may break model convergence
seed=0

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

if [ ${seed} != 0  ]; then
    export FLAGS_cudnn_deterministic=True
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
fi

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi
echo ${ips_config}

mkdir -p exp

# default memory allocator strategy may case gpu training hang
# for no OOM raised when memory exhausted
export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--seed ${seed} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler-options "${profiler_options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--seed ${seed} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler-options "${profiler_options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}
fi


if [ ${seed} != 0  ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/tal_cs/asr1/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

# model exp
MODEL=u2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/tal_cs/asr1/run.sh
================================================
#!/bin/bash
source path.sh || exit 1;
set -e

gpus=0,1,2,3
stage=0
stop_stage=50
conf_path=conf/conformer.yaml
ips=  #xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx
decode_conf_path=conf/tuning/decode.yaml
average_checkpoint=true
avg_num=10

. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

audio_file="data/demo_01_03.wav"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # test a single .wav file
    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi

# Not supported at now!!!
if [ ${stage} -le 51 ] && [ ${stop_stage} -ge 51 ]; then
     # export ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi

================================================
FILE: examples/ted_en_zh/README.md
================================================
# TED En -> Zh

* st0 - conformer/transformer speech translation


================================================
FILE: examples/ted_en_zh/st0/.gitignore
================================================
TED-En-Zh
data
exp


================================================
FILE: examples/ted_en_zh/st0/README.md
================================================

# Transformer/Conformer ST0 with TED_En_Zh
This example contains code used to train a Transformer or [Conformer](http://arxiv.org/abs/2008.03802) model with TED_EN_Zh
## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.

You need to download TED_En_Zh dataset by yourself.

| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Calculate the CMVN of the train dataset <br>       (2) Get the vocabulary file <br>       (3) Get the manifest files of the train, development and test dataset<br> |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |

You can choose to run a range of stages by setting `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run ```stage 0```, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in `run.sh` in detail.

## The Environment Variables
The path.sh contains the environment variables. 
```bash
source path.h
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.

## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of stages you want to start from in the experiments.
`stop_stage` denotes the number of stages you want to end at in the experiments. 
`conf_path`denotes the config path of the model.
`data_path` denotes the path of the dataset.
`avg_num`denotes the number K of top-K models you want to average to get the final model.
`ckpt` denotes the checkpoint prefix of the model, e.g. "transformer_mtl_noam"

You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line.:
```bash
bash run.sh --gpus 0,1 --avg_num 5
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in ```run.sh```to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.
If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
source path.h
bash ./local/data.sh
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in ```run.sh```. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.h
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer_mtl_noam.yaml transformer_mtl_noam
```
## Stage 2: Top-k Models Averaging
After training the model,  we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model.  We can use stage 2 to do this, and the code is shown below:
```bash
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi
```
The ```avg.sh```is in the ```../../../utils/```which is define in the ```path.sh```.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.h
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer_mtl_noam.yaml transformer_mtl_noam
avg.sh best exp/transformer_mtl_noam/checkpoints 5
```
## Stage 3: Model Testing
The  stage 3 is to evaluate the model performance. The code of this stage is shown below:
```bash
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.h
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer_mtl_noam.yaml transformer_mtl_noam
avg.sh latest exp/transformer_mtl_noam/checkpoints 5
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/transformer_mtl_noam.yaml conf/tuning/decode.yaml exp/transformer_mtl_noam/checkpoints/avg_5
```
The performance of the released models are shown below:
### Transformer
| Model               | Params | Config                           | Char-BLEU |
| ------------------- | ------ | -------------------------------- | --------- |
| Transformer+ASR MTL | 50.26M | conf/transformer_joint_noam.yaml | 17.38     |


================================================
FILE: examples/ted_en_zh/st0/RESULTS.md
================================================

# TED En-Zh

## Dataset

| Data Subset | Duration in Seconds |
| --- | --- |
| data/manifest.train | 0.942 ~ 60   |
| data/manifest.dev   | 1.151 ~ 39   |
| data/manifest.test  | 1.1 ~ 42.746 |

## Transformer
| Model | Params | Config | Char-BLEU |
| --- | --- | --- | --- |
| Transformer+ASR MTL | 50.26M | conf/transformer_joint_noam.yaml | 17.38 |


================================================
FILE: examples/ted_en_zh/st0/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 16000
    n_mels: 80
    n_shift: 160
    win_length: 400
    dither: 0.1
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/ted_en_zh/st0/conf/transformer.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test
min_input_len: 0.05  # second
max_input_len: 30.0 # second
min_output_len: 0.0 # tokens
max_output_len: 400.0 # tokens
min_output_input_ratio: 0.01
max_output_input_ratio: 20.0

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt
unit_type: 'spm'
spm_model_prefix: data/lang_char/bpe_unigram_8000
mean_std_filepath: ""
preprocess_config: conf/preprocess.yaml
batch_size: 16
maxlen_in: 5  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
raw_wav: True  # use raw_wav or kaldi feature
spectrum_type: fbank #linear, mfcc, fbank
feat_dim: 80
delta_delta: False
dither: 1.0
target_sample_rate: 16000
max_freq: None
n_fft: None
stride_ms: 10.0
window_ms: 25.0
use_dB_normalization: True
target_dB: -20
random_seed: 0
keep_transcription_text: False
sortagrad: True 
shuffle_method: batch_shuffle
num_workers: 2


############################################
#           Network Architecture           #
############################################
cmvn_file: "data/mean_std.json"
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.0
    ctc_weight: 0.0
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false

###########################################
#                Training                 #
###########################################
n_epoch: 120
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 1.0e-06
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/ted_en_zh/st0/conf/transformer_mtl_noam.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test
min_input_len: 0.05  # second
max_input_len: 30.0 # second
min_output_len: 0.0 # tokens
max_output_len: 400.0 # tokens
min_output_input_ratio: 0.01
max_output_input_ratio: 20.0

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt
unit_type: 'spm'
spm_model_prefix: data/lang_char/bpe_unigram_8000
mean_std_filepath: ""
preprocess_config: conf/preprocess.yaml
batch_size: 16
maxlen_in: 5  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
raw_wav: True  # use raw_wav or kaldi feature
spectrum_type: fbank #linear, mfcc, fbank
feat_dim: 80
delta_delta: False
dither: 1.0
target_sample_rate: 16000
max_freq: None
n_fft: None
stride_ms: 10.0
window_ms: 25.0
use_dB_normalization: True
target_dB: -20
random_seed: 0
keep_transcription_text: False
sortagrad: True 
shuffle_method: batch_shuffle
num_workers: 2


############################################
#           Network Architecture           #
############################################
cmvn_file: "data/mean_std.json"
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.5
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 120
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 1.0e-06
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/ted_en_zh/st0/conf/tuning/decode.yaml
================================================
batch_size: 1
error_rate_type: char-bleu
decoding_method: fullsentence  # 'fullsentence', 'simultaneous'
beam_size: 10
word_reward: 0.7
maxlenratio: 0.3
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.

================================================
FILE: examples/ted_en_zh/st0/local/data.sh
================================================
#!/bin/bash

set -e

stage=-1
stop_stage=100
dict_dir=data/lang_char

# bpemode (unigram or bpe)
nbpe=8000
bpemode=unigram
bpeprefix="${dict_dir}/bpe_${bpemode}_${nbpe}"
data_dir=./TED-En-Zh


. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;


TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}
mkdir -p data
mkdir -p ${dict_dir}


if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    if [ ! -e ${data_dir} ]; then
        echo "Error: ${data_dir} Dataset is not avaiable. Please download and unzip the dataset"
        echo "Download Link: https://pan.baidu.com/s/18L-59wgeS96WkObISrytQQ Passwd: bva0"
        echo "The tree of the directory should be:"
        echo "."
        echo "|-- En-Zh"
        echo "|-- test-segment"
        echo "    |-- tst2010"
        echo "    |-- ..."
        echo "|-- train-split"
        echo "    |-- train-segment"
        echo "|-- README.md"

        exit 1
    fi

    # generate manifests
    python3 ${TARGET_DIR}/ted_en_zh/ted_en_zh.py \
    --manifest_prefix="data/manifest" \
    --src-dir="${data_dir}"

    echo "Complete raw data pre-process."
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --num_samples=-1 \
    --spectrum_type="fbank" \
    --feat_dim=80 \
    --delta_delta=false \
    --sample_rate=16000 \
    --stride_ms=10 \
    --window_ms=25 \
    --use_dB_normalization=False \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type "spm" \
    --spm_vocab_size=${nbpe} \
    --spm_mode ${bpemode} \
    --spm_model_prefix ${bpeprefix} \
    --spm_character_coverage 1. \
    --vocab_path="${dict_dir}/vocab.txt" \
    --text_keys 'text' \
    --manifest_paths="data/manifest.train.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for set in train dev test; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
        --cmvn_path "data/mean_std.json" \
        --unit_type "spm" \
        --spm_model_prefix ${bpeprefix} \
        --vocab_path="${dict_dir}/vocab.txt" \
        --manifest_path="data/manifest.${set}.raw" \
        --output_path="data/manifest.${set}"

        if [ $? -ne 0 ]; then
            echo "Formt mnaifest failed. Terminated."
            exit 1
        fi
    }&
    done
    wait
fi

echo "Ted En-Zh Data preparation done."
exit 0


================================================
FILE: examples/ted_en_zh/st0/local/test.sh
================================================
#! /usr/bin/env bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

for type in fullsentence; do
    echo "decoding ${type}"
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

exit 0


================================================
FILE: examples/ted_en_zh/st0/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/ted_en_zh/st0/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8 
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


MODEL=u2_st
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/ted_en_zh/st0/run.sh
================================================
#!/bin/bash
set -e
source path.sh

gpus=0,1,2,3
stage=0
stop_stage=50
conf_path=conf/transformer_mtl_noam.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=5
data_path=./TED_EnZh # path to unzipped data
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh --data_dir ${data_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 51 ] && [ ${stop_stage} -ge 51 ]; then
    # export ckpt avg_n
    CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi


================================================
FILE: examples/ted_en_zh/st1/.gitignore
================================================
TED_EnZh
data
exp


================================================
FILE: examples/ted_en_zh/st1/README.md
================================================
# Transformer/Conformer ST1 with TED_En_Zh
This example contains code used to train a Transformer or [Conformer](http://arxiv.org/abs/2008.03802) model with TED_EN_Zh.

To use this example, you need to install Kaldi first.

The main difference between st0 and st1 is that st1 uses kaldi feature.
## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.

You need to download TED_En_Zh dataset by yourself.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Calculate the CMVN of the train dataset <br>       (2) Get the vocabulary file <br>       (3) Get the manifest files of the train, development and test dataset<br> |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |

You can choose to run a range of stages by setting `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in ```run.sh```in detail.
## The Environment Variables
The path.sh contains the environment variables. 
```bash
. ./path.sh
. ./cmd.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
 `stage` denotes the number of the stage you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`data_path` denotes the path of the dataset.
`avg_num`denotes the number K of top-K models you want to average to get the final model.
`ckpt` denotes the checkpoint prefix of the model, e.g. "transformer_mtl_noam"

You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line.:
```bash
bash run.sh --gpus 0,1 --avg_num 5
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in ```run.sh```to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in ```run.sh```. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    if [ -n "${ckpt_path}" ]; then
        echo "Finetune from Pretrained Model" ${ckpt_path}
        ./local/download_pretrain.sh || exit -1
    fi 
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} "${ckpt_path}"
fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer_mtl_noam.yaml transformer_mtl_noam ""
```
## Stage 2: Top-k Models Averaging
After training the model,  we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model.  We can use stage 2 to do this, and the code is shown below:
```bash
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi
```
The ```avg.sh```is in the ```../../../utils/```which is define in the ```path.sh```.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer_mtl_noam.yaml transformer_mtl_noam
avg.sh best exp/transformer_mtl_noam/checkpoints 5
```
## Stage 3: Model Testing
The  stage 3 is to evaluate the model performance. The code of this stage is shown below:
```bash
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
. ./cmd.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer_mtl_noam.yaml transformer_mtl_noam
avg.sh latest exp/transformer_mtl_noam/checkpoints 5
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/transformer_mtl_noam.yaml exp/transformer_mtl_noam/checkpoints/avg_5
```
The performance of the released models are shown below:
### Transformer
| Model | Params | Config | Val loss | Char-BLEU |
| --- | --- | --- | --- | --- |
| FAT + Transformer+ASR MTL | 50.26M | conf/transformer_mtl_noam.yaml | 62.86 | 19.45 |
| FAT + Transformer+ASR MTL with word reward | 50.26M | conf/transformer_mtl_noam.yaml | 62.86 | 20.80 |


================================================
FILE: examples/ted_en_zh/st1/RESULTS.md
================================================

# TED En-Zh

## Dataset

| Data Subset | Duration in Frames |
| --- | --- |
| data/manifest.train | 94.2 ~ 6000   |
| data/manifest.dev   | 115.1 ~ 3900   |
| data/manifest.test  | 110 ~ 4274.6 |

## Transformer
| Model | Params | Config | Val loss | Char-BLEU |
| --- | --- | --- | --- | --- |
| FAT + Transformer+ASR MTL | 50.26M | conf/transformer_mtl_noam.yaml | 69.91 | 20.26 |
| FAT + Transformer+ASR MTL with word reward | 50.26M | conf/transformer_mtl_noam.yaml | 62.86 | 20.80 |


================================================
FILE: examples/ted_en_zh/st1/cmd.sh
================================================
# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
# Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
# e.g.
#   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
#
# Options:
#   --time <time>: Limit the maximum time to execute.
#   --mem <mem>: Limit the maximum memory usage.
#   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
#   --num-threads <ngpu>: Specify the number of CPU core.
#   --gpu <ngpu>: Specify the number of GPU devices.
#   --config: Change the configuration file from default.
#
# "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
# The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
# e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
# Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
#
# run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
# These options are mapping to specific options for each backend and
# it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
# If jobs failed, your configuration might be wrong for your environment.
#
#
# The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
#   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
# =========================================================~


# Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
cmd_backend='local'

# Local machine, without any Job scheduling system
if [ "${cmd_backend}" = local ]; then

    # The other usage
    export train_cmd="run.pl"
    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
    export cuda_cmd="run.pl"
    # Used for "*_recog.py"
    export decode_cmd="run.pl"

# "qsub" (SGE, Torque, PBS, etc.)
elif [ "${cmd_backend}" = sge ]; then
    # The default setting is written in conf/queue.conf.
    # You must change "-q g.q" for the "queue" for your environment.
    # To know the "queue" names, type "qhost -q"
    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.

    export train_cmd="queue.pl"
    export cuda_cmd="queue.pl"
    export decode_cmd="queue.pl"

# "sbatch" (Slurm)
elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

    export train_cmd="slurm.pl"
    export cuda_cmd="slurm.pl"
    export decode_cmd="slurm.pl"

elif [ "${cmd_backend}" = ssh ]; then
    # You have to create ".queue/machines" to specify the host to execute jobs.
    # e.g. .queue/machines
    #   host1
    #   host2
    #   host3
    # Assuming you can login them without any password, i.e. You have to set ssh keys.

    export train_cmd="ssh.pl"
    export cuda_cmd="ssh.pl"
    export decode_cmd="ssh.pl"

# This is an example of specifying several unique options in the JHU CLSP cluster setup.
# Users can modify/add their own command options according to their cluster environments.
elif [ "${cmd_backend}" = jhu ]; then

    export train_cmd="queue.pl --mem 2G"
    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
    export decode_cmd="queue.pl --mem 4G"

else
    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
    return 1
fi


================================================
FILE: examples/ted_en_zh/st1/conf/fbank.conf
================================================
--sample-frequency=16000 
--num-mel-bins=80


================================================
FILE: examples/ted_en_zh/st1/conf/pitch.conf
================================================
--sample-frequency=16000


================================================
FILE: examples/ted_en_zh/st1/conf/preprocess.yaml
================================================
process:
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/ted_en_zh/st1/conf/transformer.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/ted_en_zh_bpe8000.txt
unit_type: 'spm'
spm_model_prefix: data/lang_char/ted_en_zh_bpe8000
mean_std_filepath: ""
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: None
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.0
    ctc_weight: 0.0
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 40
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 0.
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/ted_en_zh/st1/conf/transformer_mtl_noam.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/ted_en_zh_bpe8000.txt
unit_type: 'spm'
spm_model_prefix: data/lang_char/ted_en_zh_bpe8000
mean_std_filepath: ""
# preprocess_config: conf/augmentation.json
batch_size: 20
feat_dim: 83
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config:
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: None
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    asr_weight: 0.5
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 40
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 2.5
  weight_decay: 0.
scheduler: noam    
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 50
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/ted_en_zh/st1/conf/tuning/decode.yaml
================================================

batch_size: 1
error_rate_type: char-bleu
decoding_method: fullsentence  # 'fullsentence', 'simultaneous'
beam_size: 10
word_reward: 0.7
maxlenratio: 0.3
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.

================================================
FILE: examples/ted_en_zh/st1/local/convert_torch_to_paddle.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

import paddle
import torch

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()


def torch2paddle(args):
    paddle.set_device('cpu')
    paddle_model_dict = {}
    torch_model = torch.load(args.torch_ckpt, map_location='cpu')
    cnt = 0
    for k, v in torch_model['model'].items():
        # encoder.embed.* --> encoder.embed.*
        if k.startswith('encoder.embed'):
            if v.ndim == 2:
                v = v.transpose(0, 1)
            paddle_model_dict[k] = v.numpy()
            cnt += 1
            logger.info(
                f"Convert torch weight: {k} to paddlepaddle weight: {k}, shape is {v.shape}"
            )

        # encoder.after_norm.* --> encoder.after_norm.*
        # encoder.after_norm.* --> decoder.after_norm.*
        # encoder.after_norm.* --> st_decoder.after_norm.*
        if k.startswith('encoder.after_norm'):
            paddle_model_dict[k] = v.numpy()
            cnt += 1
            paddle_model_dict[k.replace('en', 'de')] = v.numpy()
            logger.info(
                f"Convert torch weight: {k} to paddlepaddle weight: {k.replace('en','de')}, shape is {v.shape}"
            )
            paddle_model_dict['st_' + k.replace('en', 'de')] = v.numpy()
            logger.info(
                f"Convert torch weight: {k} to paddlepaddle weight: {'st_'+ k.replace('en','de')}, shape is {v.shape}"
            )
            cnt += 2

        # encoder.encoders.* --> encoder.encoders.*
        # encoder.encoders.* (last six layers) --> decoder.encoders.* (first six layers)
        # encoder.encoders.* (last six layers) --> st_decoder.encoders.* (first six layers)
        if k.startswith('encoder.encoders'):
            if v.ndim == 2:
                v = v.transpose(0, 1)
            paddle_model_dict[k] = v.numpy()
            logger.info(
                f"Convert torch weight: {k} to paddlepaddle weight: {k}, shape is {v.shape}"
            )
            cnt += 1
            origin_k = k
            k_split = k.split('.')
            if int(k_split[2]) >= 6:
                k = k.replace(k_split[2], str(int(k_split[2]) - 6))
                paddle_model_dict[k.replace('en', 'de')] = v.numpy()
                logger.info(
                    f"Convert torch weight: {origin_k} to paddlepaddle weight: {k.replace('en','de')}, shape is {v.shape}"
                )
                paddle_model_dict['st_' + k.replace('en', 'de')] = v.numpy()
                logger.info(
                    f"Convert torch weight: {origin_k} to paddlepaddle weight: {'st_'+ k.replace('en','de')}, shape is {v.shape}"
                )
                cnt += 2
    logger.info(f"Convert {cnt} weights totally from torch to paddlepaddle")
    paddle.save(paddle_model_dict, args.paddle_ckpt)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        '--torch_ckpt',
        type=str,
        default='/home/snapshot.ep.98',
        help="Path to torch checkpoint.")
    parser.add_argument(
        '--paddle_ckpt',
        type=str,
        default='paddle.98.pdparams',
        help="Path to save paddlepaddle checkpoint.")
    args = parser.parse_args()
    torch2paddle(args)


================================================
FILE: examples/ted_en_zh/st1/local/data.sh
================================================
#!/bin/bash

set -e

stage=-1
stop_stage=100
dict_dir=data/lang_char

# bpemode (unigram or bpe)
nbpe=8000
bpemode=bpe
bpeprefix="${dict_dir}/bpe_${bpemode}_${nbpe}"
data_dir=./TED_EnZh
target_dir=data/ted_en_zh
dumpdir=data/dump
do_delta=false
nj=20

source ${MAIN_ROOT}/utils/parse_options.sh

TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}
mkdir -p data
mkdir -p ${dict_dir}


if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    if [ ! -e ${data_dir} ]; then
        echo "Error: Dataset is not avaiable. Please download and unzip the dataset"
        echo "Download Link: https://pan.baidu.com/s/18L-59wgeS96WkObISrytQQ Passwd: bva0"
        echo "The tree of the directory should be:"
        echo "."
        echo "|-- En-Zh"
        echo "|-- test-segment"
        echo "    |-- tst2010"
        echo "    |-- ..."
        echo "|-- train-split"
        echo "    |-- train-segment"
        echo "|-- README.md"

        exit 1
    fi

    # extract data 
    echo "data Extraction"
    python3 local/ted_en_zh.py \
    --tgt-dir=${target_dir} \
    --src-dir=${data_dir}

fi
prep_dir=${target_dir}/data_prep 
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    ### Task dependent. You have to make data the following preparation part by yourself.
    ### But you can utilize Kaldi recipes in most cases
    echo "stage 0: Data preparation"
    for set in train dev test; do
    # for set in train; do
        dst=${target_dir}/${set}
        for lang in en zh; do

            if [ ${lang} = 'en' ]; then
                echo "remove punctuation $lang"
                # remove punctuation
                local/remove_punctuation.pl < ${dst}/${lang}.org > ${dst}/${lang}.raw
            else
                cp ${dst}/${lang}.org ${dst}/${lang}.raw
            fi

            paste -d " " ${dst}/.yaml ${dst}/${lang}.raw | sort > ${dst}/text.${lang}


        done
        # error check
        n=$(cat ${dst}/.yaml | wc -l)
        n_en=$(cat ${dst}/en.raw | wc -l)
        n_tgt=$(cat ${dst}/zh.raw | wc -l)
        [ ${n} -ne ${n_en} ] && echo "Warning: expected ${n} data data files, found ${n_en}" && exit 1;
        [ ${n} -ne ${n_tgt} ] && echo "Warning: expected ${n} data data files, found ${n_tgt}" && exit 1;

        echo "done text processing"
        cat ${dst}/wav.scp.org | uniq | sort -k1,1 -u > ${dst}/wav.scp
        cat ${dst}/utt2spk.org | uniq | sort -k1,1 -u > ${dst}/utt2spk

        cat ${dst}/utt2spk | utils/utt2spk_to_spk2utt.pl | sort -k1,1 -u > ${dst}/spk2utt
        rm -rf ${prep_dir}/${set}.en-zh
        mkdir -p ${prep_dir}/${set}.en-zh
        echo "remove duplicate lines..."
        cut -d ' ' -f 1 ${dst}/text.en | sort | uniq -c | sort -n -k1 -r | grep -v '1 ted-en-zh' \
            | sed 's/^[ \t]*//' > ${dst}/duplicate_lines
        cut -d ' ' -f 1 ${dst}/text.en | sort | uniq -c | sort -n -k1 -r | grep '1 ted-en-zh' \
            | cut -d '1' -f 2- | sed 's/^[ \t]*//' > ${dst}/reclist
        reduce_data_dir.sh ${dst} ${dst}/reclist ${prep_dir}/${set}.en-zh
        echo "done wav processing"
        for l in en zh; do
            cp ${dst}/text.${l} ${prep_dir}/${set}.en-zh/text.${l}
        done
        utils/fix_data_dir.sh --utt_extra_files \
        "text.en text.zh" \
        ${prep_dir}/${set}.en-zh
    done
fi

feat_tr_dir=${dumpdir}/train_sp/delta${do_delta}; mkdir -p ${feat_tr_dir}
feat_dt_dir=${dumpdir}/dev/delta${do_delta}; mkdir -p ${feat_dt_dir}
feat_trans_dir=${dumpdir}/test/delta${do_delta}; mkdir -p ${feat_trans_dir}
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    ### Task dependent. You have to design training and dev sets by yourself.
    ### But you can utilize Kaldi recipes in most cases
    echo "stage 1: Feature Generation"
    fbankdir=data/fbank
    # Generate the fbank features; by default 80-dimensional fbanks with pitch on each frame
    for x in train dev test; do
        steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj ${nj} --write_utt2num_frames true \
            ${prep_dir}/${x}.en-zh data/make_fbank/${x} ${fbankdir}
    done
    
    echo "speed perturbation"
    utils/perturb_data_dir_speed.sh 0.9 ${prep_dir}/train.en-zh ${prep_dir}/temp1.en-zh
    utils/perturb_data_dir_speed.sh 1.0 ${prep_dir}/train.en-zh ${prep_dir}/temp2.en-zh
    utils/perturb_data_dir_speed.sh 1.1 ${prep_dir}/train.en-zh ${prep_dir}/temp3.en-zh

    utils/combine_data.sh --extra-files utt2uniq ${prep_dir}/train_sp.en-zh \
    ${prep_dir}/temp1.en-zh ${prep_dir}/temp2.en-zh ${prep_dir}/temp3.en-zh
    rm -r ${prep_dir}/temp*.en-zh 
    utils/fix_data_dir.sh ${prep_dir}/train_sp.en-zh

    steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj ${nj} --write_utt2num_frames true \
        ${prep_dir}/train_sp.en-zh exp/make_fbank/train_sp.en-zh ${fbankdir}

    for lang in en zh; do
        cat /dev/null > ${prep_dir}/train_sp.en-zh/text.${lang}
        for p in "sp0.9-" "sp1.0-" "sp1.1-"; do
            awk -v p=${p} '{printf("%s %s%s\n", $1, p, $1);}' ${prep_dir}/train.en-zh/utt2spk > ${prep_dir}/train_sp.en-zh/utt_map
            utils/apply_map.pl -f 1 ${prep_dir}/train_sp.en-zh/utt_map < ${prep_dir}/train.en-zh/text.${lang} >>${prep_dir}/train_sp.en-zh/text.${lang}
        done
    done

    for x in train_sp dev test; do
        local/divide_lang.sh ${prep_dir}/${x}.en-zh zh
    done

    for x in train_sp dev; do
        # remove utt having more than 3000 frames
        # remove utt having more than 400 characters
        for lang in zh en; do
            remove_longshortdata.sh --maxframes 3000 --maxchars 400 ${prep_dir}/${x}.en-zh.${lang} ${prep_dir}/${x}.en-zh.${lang}.tmp
        done
        cut -f 1 -d " " ${prep_dir}/${x}.en-zh.en.tmp/text > ${prep_dir}/${x}.en-zh.${lang}.tmp/reclist1
        cut -f 1 -d " " ${prep_dir}/${x}.en-zh.${lang}.tmp/text > ${prep_dir}/${x}.en-zh.${lang}.tmp/reclist2
        comm -12 ${prep_dir}/${x}.en-zh.${lang}.tmp/reclist1 ${prep_dir}/${x}.en-zh.${lang}.tmp/reclist2 > ${prep_dir}/${x}.en-zh.en.tmp/reclist

        for lang in zh en; do
            reduce_data_dir.sh ${prep_dir}/${x}.en-zh.${lang}.tmp ${prep_dir}/${x}.en-zh.en.tmp/reclist ${prep_dir}/${x}.en-zh.${lang}
            utils/fix_data_dir.sh  ${prep_dir}/${x}.en-zh.${lang}
        done
        rm -rf ${prep_dir}/${x}.en-zh.*.tmp
    done

    compute-cmvn-stats scp:${prep_dir}/train_sp.en-zh.zh/feats.scp ${prep_dir}/train_sp.en-zh.zh/cmvn.ark

    dump.sh --cmd "$train_cmd" --nj ${nj} --do_delta $do_delta \
        ${prep_dir}/train_sp.en-zh.zh/feats.scp ${prep_dir}/train_sp.en-zh.zh/cmvn.ark ${prep_dir}/dump_feats/train_sp.en-zh.zh ${feat_tr_dir}
    dump.sh --cmd "$train_cmd" --nj ${nj} --do_delta $do_delta \
        ${prep_dir}/dev.en-zh.zh/feats.scp ${prep_dir}/train_sp.en-zh.zh/cmvn.ark ${prep_dir}/dump_feats/dev.en-zh.zh ${feat_dt_dir}
    dump.sh --cmd "$train_cmd" --nj ${nj} --do_delta $do_delta \
        ${prep_dir}/test.en-zh.zh/feats.scp ${prep_dir}/train_sp.en-zh.zh/cmvn.ark ${prep_dir}/dump_feats/test.en-zh.zh ${feat_trans_dir}
fi

dict=${dict_dir}/ted_en_zh_${bpemode}${nbpe}.txt
nlsyms=${dict_dir}/ted_en_zh_non_lang_syms.txt
bpemodel=${dict_dir}/ted_en_zh_${bpemode}${nbpe}
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "stage 2: Dictionary and Json Data Preparation"

    echo "make a joint source and target dictionary"
    echo "<unk> 1" > ${dict} # <unk> must be 1, 0 will be used for "blank" in CTC
    offset=$(wc -l < ${dict})
    grep sp1.0 ${prep_dir}/train_sp.en-zh.*/text | cut -f 2- -d' ' | grep -v -e '^\s*$' > ${dict_dir}/input.txt
    spm_train  --input=${dict_dir}/input.txt --vocab_size=${nbpe} --model_type=${bpemode} --model_prefix=${bpemodel} --input_sentence_size=100000000 --character_coverage=1.0
    spm_encode --model=${bpemodel}.model --output_format=piece < ${dict_dir}/input.txt | tr ' ' '\n' | sort | uniq | awk -v offset=${offset} '{print $0 " " NR+offset}' >> ${dict}
    wc -l ${dict}

    echo "make json files"
    data2json.sh --nj ${nj} --feat ${feat_tr_dir}/feats.scp --text ${prep_dir}/train_sp.en-zh.zh/text --bpecode ${bpemodel}.model --lang zh \
        ${prep_dir}/train_sp.en-zh.zh ${dict} > ${feat_tr_dir}/data_${bpemode}${nbpe}.json
    data2json.sh --feat ${feat_dt_dir}/feats.scp --text ${prep_dir}/dev.en-zh.zh/text --bpecode ${bpemodel}.model --lang zh \
        ${prep_dir}/dev.en-zh.zh ${dict} > ${feat_dt_dir}/data_${bpemode}${nbpe}.json
    data2json.sh --feat ${feat_trans_dir}/feats.scp --text ${prep_dir}/test.en-zh.zh/text --bpecode ${bpemodel}.model --lang zh \
        ${prep_dir}/test.en-zh.zh ${dict} > ${feat_trans_dir}/data_${bpemode}${nbpe}.json
    echo "update json (add source references)"
    # update json (add source references)
    for x in train_sp dev; do
        feat_dir=${dumpdir}/${x}/delta${do_delta}
        data_dir=${prep_dir}/$(echo ${x} | cut -f 1 -d ".").en-zh.en
        update_json.sh --text ${data_dir}/text --bpecode ${bpemodel}.model \
            ${feat_dir}/data_${bpemode}${nbpe}.json ${data_dir} ${dict}
    done
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    x=(${feat_tr_dir} ${feat_dt_dir} ${feat_trans_dir})
    y=(train dev test)
    echo "stage 3: Format the Json Data"
    for (( i=0; i<${#x[*]}; ++i)); do
        python3 ${MAIN_ROOT}/utils/espnet_json_to_manifest.py \
         --json-file ${x[$i]}/data_${bpemode}${nbpe}.json \
         --manifest-file data/manifest.${y[$i]}
    done
fi
echo "Ted En-Zh Data preparation done."
exit 0


================================================
FILE: examples/ted_en_zh/st1/local/divide_lang.sh
================================================
#!/bin/bash

# Copyright 2019 Kyoto University (Hirofumi Inaguma)
#           2021 PaddlePaddle
#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)

. ./path.sh

if [ "$#" -ne 2 ]; then
    echo "Usage: $0 <set> <lang>>"
    echo "e.g.: $0 dev"
    exit 1
fi

set=$1
lang=$2
export LC_ALL=en_US.UTF-8
# Copy stuff intoc its final locations [this has been moved from the format_data script]
# for En
mkdir -p ${set}.en
for f in spk2utt utt2spk segments wav.scp feats.scp utt2num_frames; do
    if [ -f ${set}/${f} ]; then
        sort ${set}/${f} > ${set}.en/${f}
    fi
done
sort ${set}/text.en | sed $'s/[^[:print:]]//g' > ${set}.en/text 

utils/fix_data_dir.sh ${set}.en
if [ -f ${set}.en/feats.scp ]; then
    utils/validate_data_dir.sh ${set}.en || exit 1;
else
    utils/validate_data_dir.sh --no-feats --no-wav ${set}.en || exit 1;
fi

# for target language
mkdir -p ${set}.${lang}
for f in spk2utt utt2spk segments wav.scp feats.scp utt2num_frames; do
    if [ -f ${set}/${f} ]; then
        sort ${set}/${f} > ${set}.${lang}/${f}
    fi
done
sort ${set}/text.${lang} | sed $'s/[^[:print:]]//g' > ${set}.${lang}/text 
utils/fix_data_dir.sh  ${set}.${lang}
if [ -f ${set}.${lang}/feats.scp ]; then
    utils/validate_data_dir.sh ${set}.${lang} || exit 1;
else
    utils/validate_data_dir.sh --no-feats --no-wav ${set}.${lang} || exit 1;
fi


================================================
FILE: examples/ted_en_zh/st1/local/download_pretrain.sh
================================================
#!/bin/bash

# download pytorch weight
wget https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/snapshot.ep.98 --no-check-certificate

# convert pytorch weight to paddlepaddle
python local/convert_torch_to_paddle.py \
--torch_ckpt snapshot.ep.98 \
--paddle_ckpt paddle.98.pdparams

# Or you can download converted weights
# wget https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/paddle.98.pdparams --no-check-certificate

if [ $? -ne 0 ]; then
    echo "Failed in downloading and coverting!"
    exit 1
fi

exit 0

================================================
FILE: examples/ted_en_zh/st1/local/remove_punctuation.pl
================================================
#!/usr/bin/perl

use warnings;
use strict;

binmode(STDIN,":utf8");
binmode(STDOUT,":utf8");

while(<STDIN>) {
  $_ = " $_ ";

  # remove punctuation except apostrophe
  s/<space>/spacemark/g;  # for scoring
  s/'/apostrophe/g;
  s/[[:punct:]]//g;
  s/apostrophe/'/g;
  s/spacemark/<space>/g;  # for scoring

  # remove whitespace
  s/\s+/ /g;
  s/^\s+//;
  s/\s+$//;

  print "$_\n";
}


================================================
FILE: examples/ted_en_zh/st1/local/ted_en_zh.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import codecs
import os


# org_split = 'train-split/train-segment'
# text_file = 'En-Zh/train.en-zh'
# data_split = 'train'
def data_process(src_dir, tgt_dir, wav_dir_list, text_file_list,
                 data_split_list):

    for org_split, text_file, data_split in zip(wav_dir_list, text_file_list,
                                                data_split_list):
        local_data_split_dir = os.path.join(tgt_dir, data_split)

        os.makedirs(local_data_split_dir, exist_ok=True)
        utts = []
        utt2spk = {}
        with open(os.path.join(local_data_split_dir, 'wav.scp.org'), 'w') as wav_wf, \
            open(os.path.join(local_data_split_dir, 'utt2spk.org'), 'w') as utt2spk_wf:
            for files in os.listdir(os.path.join(src_dir, org_split)):
                files = files.strip()
                file_path = os.path.join(src_dir, org_split, files)
                size = os.path.getsize(file_path)
                if size <= 30000:
                    continue
                utt = files.split('.')[0]
                audio_name = utt.split('_')[0]
                #format the name of utterance 
                while len(audio_name) < 6:
                    utt = '0' + utt
                    audio_name = '0' + audio_name
                utt = 'ted-en-zh-' + utt
                utts.append(utt)
                spk = utt.split('_')[0]
                utt2spk[utt] = spk
                assert len(spk) == 16, "%r" % spk
                print(utt, 'cat', os.path.abspath(file_path), '|', file=wav_wf)
            for utt in sorted(utts):
                print(utt, utt2spk[utt], file=utt2spk_wf)

        with open(os.path.join(local_data_split_dir, 'en.org'), 'w') as en_wf, \
            open(os.path.join(local_data_split_dir, 'zh.org'), 'w') as zh_wf, \
            open(os.path.join(local_data_split_dir, '.yaml'), 'w') as yaml_wf, \
            codecs.open(os.path.join(src_dir, text_file), 'r', encoding='utf-8',
                        errors='ignore') as rf:
            count = 0
            for line in rf:
                line = line.strip()
                line_spl = line.split('\t')
                assert len(line_spl) == 3, "%r" % line
                wav, en, zh = line_spl
                assert wav.endswith('wav'), "%r" % wav[-3:]
                utt = wav.split('.')[0]
                audio_name = utt.split('_')[0]
                while len(audio_name) < 6:
                    utt = '0' + utt
                    audio_name = '0' + audio_name
                utt = 'ted-en-zh-' + utt
                print(utt, file=yaml_wf)
                print(en.lower(), file=en_wf)
                print(zh, file=zh_wf)
                count += 1
            print('%s set lines count: %d' % (data_split, count))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)

    parser.add_argument(
        "--src-dir",
        default="",
        type=str,
        help="Directory to kaldi splited data. (default: %(default)s)")
    parser.add_argument(
        "--tgt-dir",
        default="local/ted_en_zh",
        type=str,
        help="Directory to save processed data. (default: %(default)s)")
    args = parser.parse_args()

    wav_dir_list = [
        'train-split/train-segment', 'test-segment/tst2014',
        'test-segment/tst2015'
    ]
    text_file_list = [
        'En-Zh/train.en-zh', 'En-Zh/tst2014.en-zh', 'En-Zh/tst2015.en-zh'
    ]
    data_split_list = ['train', 'dev', 'test']
    data_process(args.src_dir, args.tgt_dir, wav_dir_list, text_file_list,
                 data_split_list)


================================================
FILE: examples/ted_en_zh/st1/local/test.sh
================================================
#! /usr/bin/env bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

for type in fullsentence; do
    echo "decoding ${type}"
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

exit 0


================================================
FILE: examples/ted_en_zh/st1/local/train.sh
================================================
#!/bin/bash

if [ $# -lt 3 ] && [ $# -gt 4 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2
ckpt_path=$3
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

mkdir -p exp

# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
fi

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--checkpoint_path "${ckpt_path}" \
--seed ${seed}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--checkpoint_path "${ckpt_path}" \
--seed ${seed}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/ted_en_zh/st1/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PWD}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8 
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


MODEL=u2_st
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin

# Kaldi
export KALDI_ROOT=${MAIN_ROOT}/tools/kaldi
[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present, can not using Kaldi!"
[ -f $KALDI_ROOT/tools/config/common_path.sh ] && . $KALDI_ROOT/tools/config/common_path.sh

================================================
FILE: examples/ted_en_zh/st1/run.sh
================================================
#!/bin/bash
set -e
. ./path.sh || exit 1;
. ./cmd.sh || exit 1;

gpus=0,1,2,3
stage=1
stop_stage=4
conf_path=conf/transformer_mtl_noam.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
ckpt_path= # paddle.98 # (finetune from FAT-ST pretrained model)
avg_num=5
data_path=./TED_EnZh # path to unzipped data
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh --data_dir ${data_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    if [ -n "${ckpt_path}" ]; then
        echo "Finetune from Pretrained Model" ${ckpt_path}
        ./local/download_pretrain.sh || exit -1
    fi
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} "${ckpt_path}" ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi


================================================
FILE: examples/tess/README.md
================================================
# 背景

TESS音频情绪分类任务。
从而校验和测试 paddle.audio 的feature, backend等相关模块。

本实验采用了PaddleSpeech提供了PANNs的CNN14的预训练模型进行finetune：
- CNN14: 该模型主要包含12个卷积层和2个全连接层，模型参数的数量为 79.6M，embbedding维度是 2048。

`PANNs`([PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition](https://arxiv.org/pdf/1912.10211.pdf))是基于Audioset数据集训练的声音分类/识别的模型。经过预训练后，模型可以用于提取音频的embbedding。本示例将使用`PANNs`的预训练模型Finetune完成声音分类的任务。

## 数据集

[TESS: Toronto emotional speech set](https://tspace.library.utoronto.ca/handle/1807/24487) 是一个包含有 200 个目标词的时长为 2 ~ 3 秒的音频,七种情绪的数据集。由两个女演员录制(24岁和64岁),其中情绪分别是愤怒,恶心,害怕,高兴,惊喜,伤心,平淡。

## 模型指标

根据 `TESS` 提供的fold信息，对数据集进行 5-fold 的 fine-tune 训练和评估，dev准确率如下：

|Model|feat_type|Acc| note |
|--|--|--| -- |
|CNN14| mfcc | 0.9929 |3 epoch |
|CNN14| logmelspectrogram | 0.9983 | 3 epoch |
|CNN14| spectrogram| 0.95 | 11 epoch |
|CNN14| melspectrogram| 0.9375 | 17 epoch |

### 模型训练

启动训练:
```shell
$ CUDA_VISIBLE_DEVICES=0 ./run.sh 1 conf/panns_mfcc.yaml
$ CUDA_VISIBLE_DEVICES=0 ./run.sh 1 conf/panns_logmelspectrogram.yaml
$ CUDA_VISIBLE_DEVICES=0 ./run.sh 1 conf/panns_melspectrogram.yaml
$ CUDA_VISIBLE_DEVICES=0 ./run.sh 1 conf/panns_spectrogram.yaml
```


================================================
FILE: examples/tess/cls0/conf/panns_logmelspectrogram.yaml
================================================
data:
  dataset: 'paddle.audio.datasets:TESS'
  num_classes: 7
  train:
    mode: 'train'
    split: 1
    feat_type: 'logmelspectrogram'
  dev:
    mode: 'dev'
    split: 1
    feat_type: 'logmelspectrogram'

model:
  backbone: 'paddlespeech.cls.models:cnn14'

feature:
  n_fft: 1024
  hop_length: 320
  window: 'hann'
  win_length: 1024
  f_min: 50.0
  f_max: 14000.0
  n_mels: 64

training:
  epochs: 5
  learning_rate: 0.0005
  num_workers: 2
  batch_size: 128
  checkpoint_dir: './checkpoint_logmelspectrogram'
  save_freq: 1
  log_freq: 1


================================================
FILE: examples/tess/cls0/conf/panns_melspectrogram.yaml
================================================
data:
  dataset: 'paddle.audio.datasets:TESS'
  num_classes: 7
  train:
    mode: 'train'
    split: 1
    feat_type: 'melspectrogram'
  dev:
    mode: 'dev'
    split: 1
    feat_type: 'melspectrogram'

model:
  backbone: 'paddlespeech.cls.models:cnn14'

feature:
  n_fft: 1024
  hop_length: 320
  window: 'hann'
  win_length: 1024
  f_min: 50.0
  f_max: 14000.0
  n_mels: 64

training:
  epochs: 10
  learning_rate: 0.0005
  num_workers: 2
  batch_size: 128
  checkpoint_dir: './checkpoint_melspectrogram'
  save_freq: 1
  log_freq: 1


================================================
FILE: examples/tess/cls0/conf/panns_mfcc.yaml
================================================
data:
  dataset: 'paddle.audio.datasets:TESS'
  num_classes: 7
  train:
    mode: 'train'
    split: 1
    feat_type: 'mfcc'
  dev:
    mode: 'dev'
    split: 1
    feat_type: 'mfcc'

model:
  backbone: 'paddlespeech.cls.models:cnn14'

feature:
  n_fft: 1024
  hop_length: 320
  window: 'hann'
  win_length: 1024
  f_min: 50.0
  f_max: 14000.0
  n_mfcc: 64
  n_mels: 64

training:
  epochs: 5
  learning_rate: 0.0005
  num_workers: 2
  batch_size: 128
  checkpoint_dir: './checkpoint_mfcc'
  save_freq: 1
  log_freq: 1


================================================
FILE: examples/tess/cls0/conf/panns_spectrogram.yaml
================================================
data:
  dataset: 'paddle.audio.datasets:TESS'
  num_classes: 7
  train:
    mode: 'train'
    split: 1
    feat_type: 'spectrogram'
  dev:
    mode: 'dev'
    split: 1
    feat_type: 'spectrogram'

model:
  backbone: 'paddlespeech.cls.models:cnn14'

feature:
  n_fft: 126
  hop_length: 320
  window: 'hann'

training:
  epochs: 10
  learning_rate: 0.0005
  num_workers: 2
  batch_size: 128
  checkpoint_dir: './checkpoint_spectrogram'
  save_freq: 1
  log_freq: 1


================================================
FILE: examples/tess/cls0/local/train.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os

import paddle
import yaml

from paddlespeech.audio.utils import logger
from paddlespeech.audio.utils.time import Timer
from paddlespeech.cls.models import SoundClassifier
from paddlespeech.utils.dynamic_import import dynamic_import

# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--cfg_path", type=str, required=True)
args = parser.parse_args()
# yapf: enable


def _collate_features(batch):
    # (feat, label)
    # (( n_mels, length), label)
    feats = []
    labels = []
    lengths = []
    for sample in batch:
        feats.append(paddle.transpose(sample[0], perm=[1, 0]))
        lengths.append(sample[0].shape[1])
        labels.append(sample[1])

    max_length = max(lengths)
    for i in range(len(feats)):
        feats[i] = paddle.nn.functional.pad(
            feats[i], [0, max_length - feats[i].shape[0], 0, 0],
            data_format='NLC')

    return paddle.stack(feats), paddle.to_tensor(labels), paddle.to_tensor(
        lengths)


if __name__ == "__main__":
    nranks = paddle.distributed.get_world_size()
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()
    local_rank = paddle.distributed.get_rank()

    args.cfg_path = os.path.abspath(os.path.expanduser(args.cfg_path))
    with open(args.cfg_path, 'r') as f:
        config = yaml.safe_load(f)

    model_conf = config['model']
    data_conf = config['data']
    feat_conf = config['feature']
    feat_type = data_conf['train']['feat_type']
    training_conf = config['training']

    # Dataset

    # set audio backend, make sure paddleaudio >= 1.0.2 installed.
    paddle.audio.backends.set_backend('soundfile')

    ds_class = dynamic_import(data_conf['dataset'])
    train_ds = ds_class(**data_conf['train'], **feat_conf)
    dev_ds = ds_class(**data_conf['dev'], **feat_conf)
    train_sampler = paddle.io.DistributedBatchSampler(
        train_ds,
        batch_size=training_conf['batch_size'],
        shuffle=True,
        drop_last=False)
    train_loader = paddle.io.DataLoader(
        train_ds,
        batch_sampler=train_sampler,
        num_workers=training_conf['num_workers'],
        return_list=True,
        use_buffer_reader=True,
        collate_fn=_collate_features)

    # Model
    backbone_class = dynamic_import(model_conf['backbone'])
    backbone = backbone_class(pretrained=True, extract_embedding=True)
    model = SoundClassifier(backbone, num_class=data_conf['num_classes'])
    model = paddle.DataParallel(model)
    optimizer = paddle.optimizer.Adam(
        learning_rate=training_conf['learning_rate'],
        parameters=model.parameters())
    criterion = paddle.nn.loss.CrossEntropyLoss()

    steps_per_epoch = len(train_sampler)
    timer = Timer(steps_per_epoch * training_conf['epochs'])
    timer.start()

    for epoch in range(1, training_conf['epochs'] + 1):
        model.train()

        avg_loss = 0
        num_corrects = 0
        num_samples = 0
        for batch_idx, batch in enumerate(train_loader):
            feats, labels, length = batch  # feats-->(N, length, n_mels)

            logits = model(feats)

            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                optimizer._learning_rate.step()
            optimizer.clear_grad()

            # Calculate loss
            avg_loss += float(loss)

            # Calculate metrics
            preds = paddle.argmax(logits, axis=1)
            num_corrects += (preds == labels).numpy().sum()
            num_samples += feats.shape[0]

            timer.count()

            if (batch_idx + 1
                ) % training_conf['log_freq'] == 0 and local_rank == 0:
                lr = optimizer.get_lr()
                avg_loss /= training_conf['log_freq']
                avg_acc = num_corrects / num_samples

                print_msg = feat_type + ' Epoch={}/{}, Step={}/{}'.format(
                    epoch, training_conf['epochs'], batch_idx + 1,
                    steps_per_epoch)
                print_msg += ' loss={:.4f}'.format(avg_loss)
                print_msg += ' acc={:.4f}'.format(avg_acc)
                print_msg += ' lr={:.6f} step/sec={:.2f} | ETA {}'.format(
                    lr, timer.timing, timer.eta)
                logger.train(print_msg)

                avg_loss = 0
                num_corrects = 0
                num_samples = 0

        if epoch % training_conf[
                'save_freq'] == 0 and batch_idx + 1 == steps_per_epoch and local_rank == 0:
            dev_sampler = paddle.io.BatchSampler(
                dev_ds,
                batch_size=training_conf['batch_size'],
                shuffle=False,
                drop_last=False)
            dev_loader = paddle.io.DataLoader(
                dev_ds,
                batch_sampler=dev_sampler,
                num_workers=training_conf['num_workers'],
                return_list=True,
                use_buffer_reader=True,
                collate_fn=_collate_features)

            model.eval()
            num_corrects = 0
            num_samples = 0
            with logger.processing('Evaluation on validation dataset'):
                for batch_idx, batch in enumerate(dev_loader):
                    feats, labels, length = batch
                    logits = model(feats)

                    preds = paddle.argmax(logits, axis=1)
                    num_corrects += (preds == labels).numpy().sum()
                    num_samples += feats.shape[0]

            print_msg = '[Evaluation result] ' + str(feat_type)
            print_msg += ' dev_acc={:.4f}'.format(num_corrects / num_samples)

            logger.eval(print_msg)

            # Save model
            save_dir = os.path.join(training_conf['checkpoint_dir'],
                                    'epoch_{}'.format(epoch))
            logger.info('Saving model checkpoint to {}'.format(save_dir))
            paddle.save(model.state_dict(),
                        os.path.join(save_dir, 'model.pdparams'))
            paddle.save(optimizer.state_dict(),
                        os.path.join(save_dir, 'model.pdopt'))


================================================
FILE: examples/tess/cls0/local/train.sh
================================================
#!/bin/bash

ngpu=$1
cfg_path=$2

if [ ${ngpu} -gt 0 ]; then
    python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES local/train.py \
    --cfg_path ${cfg_path}
else
    python3 local/train.py \
    --cfg_path ${cfg_path}
fi


================================================
FILE: examples/tess/cls0/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=panns
export BIN_DIR=${MAIN_ROOT}/paddlespeech/cls/exps/${MODEL}

================================================
FILE: examples/tess/cls0/run.sh
================================================
#!/bin/bash
set -e
source path.sh

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')

stage=$1
stop_stage=100

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    cfg_path=$2
    ./local/train.sh ${ngpu} ${cfg_path} || exit -1
    exit 0
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    cfg_path=$2
    ./local/infer.sh ${cfg_path} || exit -1
    exit 0
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    ckpt=$2
    output_dir=$3
    ./local/export.sh ${ckpt} ${output_dir} || exit -1
    exit 0
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    infer_device=$2
    graph_dir=$3
    audio_file=$4
    ./local/static_model_infer.sh ${infer_device} ${graph_dir} ${audio_file} || exit -1
    exit 0
fi


================================================
FILE: examples/thchs30/README.md
================================================
# thchs30

* align0 - mfa alignment


================================================
FILE: examples/thchs30/align0/README.md
================================================
# THCHS-30 数据集强制对齐实验
-----
本实验对 THCHS-30 中文数据集用 [Montreal-Forced-Aligner](https://montreal-forced-aligner.readthedocs.io/en/latest/index.html) 进行强制对齐。
THCHS-30 的文本标注数据分为：
 1. 汉字级别（word），该数据集用空格对词进行了划分，我们在使用时按照将不同字之间按空格划分
 2. 音节级别（syllable），即汉语中的一个拼音
 3. 音素级别（phone），一个拼音有多个音素组成，汉语的声母韵母可以理解为音素，不同的数据集有各自的音素标准，THCHS-30 数据集与标贝 BZNSYP 数据集的音素标准略有不同

 数据 A11_0 文本示例如下：
```
绿 是 阳春 烟 景 大块 文章 的 底色 四月 的 林 峦 更是 绿 得 鲜活 秀媚 诗意 盎然↩
lv4 shi4 yang2 chun1 yan1 jing3 da4 kuai4 wen2 zhang1 de5 di3 se4 si4 yue4 de5 lin2 luan2 geng4 shi4 lv4 de5 xian1 huo2 xiu4 mei4 shi1 yi4 ang4 ran2↩
l v4 sh ix4 ii iang2 ch un1 ii ian1 j ing3 d a4 k uai4 uu un2 zh ang1 d e5 d i3 s e4 s iy4 vv ve4 d e5 l in2 l uan2 g eng4 sh ix4 l v4 d e5 x ian1 h uo2 x iu4 m ei4 sh ix1 ii i4 aa ang4 r an2
```
## 开始实验
---
在本项目的 根目录/tools 执行
```
make
```
下载 MFA 的可执行包（也会同时下载本项目所需的其他工具）
执行如下命令：
```
cd a0
./run.sh
```
应用程序会自动下载 THCHS-30数据集，处理成 MFA 所需的文件格式并开始训练，您可以修改 `run.sh` 中的参数 `LEXICON_NAME` 来决定您需要强制对齐的级别（word、syllable 和 phone）
## MFA 所使用的字典
---
MFA 字典的格式请参考: [MFA 官方文档](https://montreal-forced-aligner.readthedocs.io/en/latest/)
phone.lexicon 直接使用的是 `THCHS-30/data_thchs30/lm_phone/lexicon.txt`
word.lexicon 考虑到了中文的多音字，使用**带概率的字典**, 生成规则请参考 `local/gen_word2phone.py`
`syllable.lexicon` 获取自 [DNSun/thchs30-pinyin2tone](https://github.com/DNSun/thchs30-pinyin2tone)
## 对齐结果
---
我们提供了三种级别 MFA 训练好的对齐结果、模型和字典（`syllable.lexicon`  在 `data/dict` 中，`phone.lexicon` 和` word.lexicon` 运行数据预处理代码后会自动从原始数据集复制或生成）

**phone 级别：** [phone.lexicon](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/phone/phone.lexicon)、 [对齐结果](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/phone/thchs30_alignment.tar.gz)、[模型](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/phone/thchs30_model.zip)
**syllabel 级别：** [syllable.lexicon](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/syllable/syllable.lexicon)、[对齐结果](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/syllable/thchs30_alignment.tar.gz)、[模型](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/syllable/thchs30_model.zip)
**word 级别：** [word.lexicon](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/word/word.lexicon)、[对齐结果](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/word/thchs30_alignment.tar.gz)、[模型](https://paddlespeech.cdn.bcebos.com/MFA/THCHS30/word/thchs30_model.zip)

随后，您可以参考 [MFA 官方文档](https://montreal-forced-aligner.readthedocs.io/en/latest/) 使用我们给您提供好的模型直接对自己的数据集进行强制对齐，注意，您需要使用和模型对应的 lexicon 文件，当文本是汉字时，您需要用空格把不同的**汉字**（而不是词语）分开


================================================
FILE: examples/thchs30/align0/data/dict/syllable.lexicon
================================================
A0 aa a0
A1 aa a1
A2 aa a2
A3 aa a3
A4 aa a4
AI0 aa ai0
AI1 aa ai1
AI2 aa ai2
AI3 aa ai3
AI4 aa ai4
AN0 aa an0
AN1 aa an1
AN2 aa an2
AN3 aa an3
AN4 aa an4
ANG0 aa ang0
ANG1 aa ang1
ANG2 aa ang2
ANG3 aa ang3
ANG4 aa ang4
AO0 aa ao0
AO1 aa ao1
AO2 aa ao2
AO3 aa ao3
AO4 aa ao4
BA0 b a0
BA1 b a1
BA2 b a2
BA3 b a3
BA4 b a4
BAI0 b ai0
BAI1 b ai1
BAI2 b ai2
BAI3 b ai3
BAI4 b ai4
BAN0 b an0
BAN1 b an1
BAN2 b an2
BAN3 b an3
BAN4 b an4
BANG0 b ang0
BANG1 b ang1
BANG2 b ang2
BANG3 b ang3
BANG4 b ang4
BAO0 b ao0
BAO1 b ao1
BAO2 b ao2
BAO3 b ao3
BAO4 b ao4
BEI0 b ei0
BEI1 b ei1
BEI2 b ei2
BEI3 b ei3
BEI4 b ei4
BEN0 b en0
BEN1 b en1
BEN2 b en2
BEN3 b en3
BEN4 b en4
BENG0 b eng0
BENG1 b eng1
BENG2 b eng2
BENG3 b eng3
BENG4 b eng4
BI0 b i0
BI1 b i1
BI2 b i2
BI3 b i3
BI4 b i4
BIAN0 b ian0
BIAN1 b ian1
BIAN2 b ian2
BIAN3 b ian3
BIAN4 b ian4
BIAO0 b iao0
BIAO1 b iao1
BIAO2 b iao2
BIAO3 b iao3
BIAO4 b iao4
BIE0 b ie0
BIE1 b ie1
BIE2 b ie2
BIE3 b ie3
BIE4 b ie4
BIN0 b in0
BIN1 b in1
BIN2 b in2
BIN3 b in3
BIN4 b in4
BING0 b ing0
BING1 b ing1
BING2 b ing2
BING3 b ing3
BING4 b ing4
BO0 b o0
BO1 b o1
BO2 b o2
BO3 b o3
BO4 b o4
BU0 b u0
BU1 b u1
BU2 b u2
BU3 b u3
BU4 b u4
CA0 c a0
CA1 c a1
CA2 c a2
CA3 c a3
CA4 c a4
CAI0 c ai0
CAI1 c ai1
CAI2 c ai2
CAI3 c ai3
CAI4 c ai4
CAN0 c an0
CAN1 c an1
CAN2 c an2
CAN3 c an3
CAN4 c an4
CANG0 c ang0
CANG1 c ang1
CANG2 c ang2
CANG3 c ang3
CANG4 c ang4
CAO0 c ao0
CAO1 c ao1
CAO2 c ao2
CAO3 c ao3
CAO4 c ao4
CE0 c e0
CE1 c e1
CE2 c e2
CE3 c e3
CE4 c e4
CEN0 c en0
CEN1 c en1
CEN2 c en2
CEN3 c en3
CEN4 c en4
CENG0 c eng0
CENG1 c eng1
CENG2 c eng2
CENG3 c eng3
CENG4 c eng4
CHA0 ch a0
CHA1 ch a1
CHA2 ch a2
CHA3 ch a3
CHA4 ch a4
CHAI0 ch ai0
CHAI1 ch ai1
CHAI2 ch ai2
CHAI3 ch ai3
CHAI4 ch ai4
CHAN0 ch an0
CHAN1 ch an1
CHAN2 ch an2
CHAN3 ch an3
CHAN4 ch an4
CHANG0 ch ang0
CHANG1 ch ang1
CHANG2 ch ang2
CHANG3 ch ang3
CHANG4 ch ang4
CHAO0 ch ao0
CHAO1 ch ao1
CHAO2 ch ao2
CHAO3 ch ao3
CHAO4 ch ao4
CHE0 ch e0
CHE1 ch e1
CHE2 ch e2
CHE3 ch e3
CHE4 ch e4
CHEN0 ch en0
CHEN1 ch en1
CHEN2 ch en2
CHEN3 ch en3
CHEN4 ch en4
CHENG0 ch eng0
CHENG1 ch eng1
CHENG2 ch eng2
CHENG3 ch eng3
CHENG4 ch eng4
CHI0 ch ix0
CHI1 ch ix1
CHI2 ch ix2
CHI3 ch ix3
CHI4 ch ix4
CHONG0 ch ong0
CHONG1 ch ong1
CHONG2 ch ong2
CHONG3 ch ong3
CHONG4 ch ong4
CHOU0 ch ou0
CHOU1 ch ou1
CHOU2 ch ou2
CHOU3 ch ou3
CHOU4 ch ou4
CHU0 ch u0
CHU1 ch u1
CHU2 ch u2
CHU3 ch u3
CHU4 ch u4
CHUAI0 ch uai0
CHUAI1 ch uai1
CHUAI2 ch uai2
CHUAI3 ch uai3
CHUAI4 ch uai4
CHUAN0 ch uan0
CHUAN1 ch uan1
CHUAN2 ch uan2
CHUAN3 ch uan3
CHUAN4 ch uan4
CHUANG0 ch uang0
CHUANG1 ch uang1
CHUANG2 ch uang2
CHUANG3 ch uang3
CHUANG4 ch uang4
CHUI0 ch ui0
CHUI1 ch ui1
CHUI2 ch ui2
CHUI3 ch ui3
CHUI4 ch ui4
CHUN0 ch un0
CHUN1 ch un1
CHUN2 ch un2
CHUN3 ch un3
CHUN4 ch un4
CHUO0 ch uo0
CHUO1 ch uo1
CHUO2 ch uo2
CHUO3 ch uo3
CHUO4 ch uo4
CI0 c iy0
CI1 c iy1
CI2 c iy2
CI3 c iy3
CI4 c iy4
CONG0 c ong0
CONG1 c ong1
CONG2 c ong2
CONG3 c ong3
CONG4 c ong4
COU0 c ou0
COU1 c ou1
COU2 c ou2
COU3 c ou3
COU4 c ou4
CU0 c u0
CU1 c u1
CU2 c u2
CU3 c u3
CU4 c u4
CUAN0 c uan0
CUAN1 c uan1
CUAN2 c uan2
CUAN3 c uan3
CUAN4 c uan4
CUI0 c ui0
CUI1 c ui1
CUI2 c ui2
CUI3 c ui3
CUI4 c ui4
CUN0 c un0
CUN1 c un1
CUN2 c un2
CUN3 c un3
CUN4 c un4
CUO0 c uo0
CUO1 c uo1
CUO2 c uo2
CUO3 c uo3
CUO4 c uo4
DA0 d a0
DA1 d a1
DA2 d a2
DA3 d a3
DA4 d a4
DAI0 d ai0
DAI1 d ai1
DAI2 d ai2
DAI3 d ai3
DAI4 d ai4
DAN0 d an0
DAN1 d an1
DAN2 d an2
DAN3 d an3
DAN4 d an4
DANG0 d ang0
DANG1 d ang1
DANG2 d ang2
DANG3 d ang3
DANG4 d ang4
DAO0 d ao0
DAO1 d ao1
DAO2 d ao2
DAO3 d ao3
DAO4 d ao4
DE0 d e0
DE1 d e1
DE2 d e2
DE3 d e3
DE4 d e4
DEI0 d ei0
DEI1 d ei1
DEI2 d ei2
DEI3 d ei3
DEI4 d ei4
DEN0 d en0
DEN1 d en1
DEN2 d en2
DEN3 d en3
DEN4 d en4
DENG0 d eng0
DENG1 d eng1
DENG2 d eng2
DENG3 d eng3
DENG4 d eng4
DI0 d i0
DI1 d i1
DI2 d i2
DI3 d i3
DI4 d i4
DIA0 d ia0
DIA1 d ia1
DIA2 d ia2
DIA3 d ia3
DIA4 d ia4
DIAN0 d ian0
DIAN1 d ian1
DIAN2 d ian2
DIAN3 d ian3
DIAN4 d ian4
DIAO0 d iao0
DIAO1 d iao1
DIAO2 d iao2
DIAO3 d iao3
DIAO4 d iao4
DIE0 d ie0
DIE1 d ie1
DIE2 d ie2
DIE3 d ie3
DIE4 d ie4
DING0 d ing0
DING1 d ing1
DING2 d ing2
DING3 d ing3
DING4 d ing4
DIU0 d iu0
DIU1 d iu1
DIU2 d iu2
DIU3 d iu3
DIU4 d iu4
DONG0 d ong0
DONG1 d ong1
DONG2 d ong2
DONG3 d ong3
DONG4 d ong4
DOU0 d ou0
DOU1 d ou1
DOU2 d ou2
DOU3 d ou3
DOU4 d ou4
DU0 d u0
DU1 d u1
DU2 d u2
DU3 d u3
DU4 d u4
DUAN0 d uan0
DUAN1 d uan1
DUAN2 d uan2
DUAN3 d uan3
DUAN4 d uan4
DUI0 d ui0
DUI1 d ui1
DUI2 d ui2
DUI3 d ui3
DUI4 d ui4
DUN0 d un0
DUN1 d un1
DUN2 d un2
DUN3 d un3
DUN4 d un4
DUO0 d uo0
DUO1 d uo1
DUO2 d uo2
DUO3 d uo3
DUO4 d uo4
E0 ee e0
E1 ee e1
E2 ee e2
E3 ee e3
E4 ee e4
EN0 ee en0
EN1 ee en1
EN2 ee en2
EN3 ee en3
EN4 ee en4
ER0 ee er0
ER1 ee er1
ER2 ee er2
ER3 ee er3
ER4 ee er4
FA0 f a0
FA1 f a1
FA2 f a2
FA3 f a3
FA4 f a4
FAN0 f an0
FAN1 f an1
FAN2 f an2
FAN3 f an3
FAN4 f an4
FANG0 f ang0
FANG1 f ang1
FANG2 f ang2
FANG3 f ang3
FANG4 f ang4
FEI0 f ei0
FEI1 f ei1
FEI2 f ei2
FEI3 f ei3
FEI4 f ei4
FEN0 f en0
FEN1 f en1
FEN2 f en2
FEN3 f en3
FEN4 f en4
FENG0 f eng0
FENG1 f eng1
FENG2 f eng2
FENG3 f eng3
FENG4 f eng4
FO0 f o0
FO1 f o1
FO2 f o2
FO3 f o3
FO4 f o4
FOU0 f ou0
FOU1 f ou1
FOU2 f ou2
FOU3 f ou3
FOU4 f ou4
FU0 f u0
FU1 f u1
FU2 f u2
FU3 f u3
FU4 f u4
GA0 g a0
GA1 g a1
GA2 g a2
GA3 g a3
GA4 g a4
GAI0 g ai0
GAI1 g ai1
GAI2 g ai2
GAI3 g ai3
GAI4 g ai4
GAN0 g an0
GAN1 g an1
GAN2 g an2
GAN3 g an3
GAN4 g an4
GANG0 g ang0
GANG1 g ang1
GANG2 g ang2
GANG3 g ang3
GANG4 g ang4
GAO0 g ao0
GAO1 g ao1
GAO2 g ao2
GAO3 g ao3
GAO4 g ao4
GE0 g e0
GE1 g e1
GE2 g e2
GE3 g e3
GE4 g e4
GEI0 g ei0
GEI1 g ei1
GEI2 g ei2
GEI3 g ei3
GEI4 g ei4
GEN0 g en0
GEN1 g en1
GEN2 g en2
GEN3 g en3
GEN4 g en4
GENG0 g eng0
GENG1 g eng1
GENG2 g eng2
GENG3 g eng3
GENG4 g eng4
GONG0 g ong0
GONG1 g ong1
GONG2 g ong2
GONG3 g ong3
GONG4 g ong4
GOU0 g ou0
GOU1 g ou1
GOU2 g ou2
GOU3 g ou3
GOU4 g ou4
GU0 g u0
GU1 g u1
GU2 g u2
GU3 g u3
GU4 g u4
GUA0 g ua0
GUA1 g ua1
GUA2 g ua2
GUA3 g ua3
GUA4 g ua4
GUAI0 g uai0
GUAI1 g uai1
GUAI2 g uai2
GUAI3 g uai3
GUAI4 g uai4
GUAN0 g uan0
GUAN1 g uan1
GUAN2 g uan2
GUAN3 g uan3
GUAN4 g uan4
GUANG0 g uang0
GUANG1 g uang1
GUANG2 g uang2
GUANG3 g uang3
GUANG4 g uang4
GUI0 g ui0
GUI1 g ui1
GUI2 g ui2
GUI3 g ui3
GUI4 g ui4
GUN0 g un0
GUN1 g un1
GUN2 g un2
GUN3 g un3
GUN4 g un4
GUO0 g uo0
GUO1 g uo1
GUO2 g uo2
GUO3 g uo3
GUO4 g uo4
HA0 h a0
HA1 h a1
HA2 h a2
HA3 h a3
HA4 h a4
HAI0 h ai0
HAI1 h ai1
HAI2 h ai2
HAI3 h ai3
HAI4 h ai4
HAN0 h an0
HAN1 h an1
HAN2 h an2
HAN3 h an3
HAN4 h an4
HANG0 h ang0
HANG1 h ang1
HANG2 h ang2
HANG3 h ang3
HANG4 h ang4
HAO0 h ao0
HAO1 h ao1
HAO2 h ao2
HAO3 h ao3
HAO4 h ao4
HE0 h e0
HE1 h e1
HE2 h e2
HE3 h e3
HE4 h e4
HEI0 h ei0
HEI1 h ei1
HEI2 h ei2
HEI3 h ei3
HEI4 h ei4
HEN0 h en0
HEN1 h en1
HEN2 h en2
HEN3 h en3
HEN4 h en4
HENG0 h eng0
HENG1 h eng1
HENG2 h eng2
HENG3 h eng3
HENG4 h eng4
HONG0 h ong0
HONG1 h ong1
HONG2 h ong2
HONG3 h ong3
HONG4 h ong4
HOU0 h ou0
HOU1 h ou1
HOU2 h ou2
HOU3 h ou3
HOU4 h ou4
HU0 h u0
HU1 h u1
HU2 h u2
HU3 h u3
HU4 h u4
HUA0 h ua0
HUA1 h ua1
HUA2 h ua2
HUA3 h ua3
HUA4 h ua4
HUAI0 h uai0
HUAI1 h uai1
HUAI2 h uai2
HUAI3 h uai3
HUAI4 h uai4
HUAN0 h uan0
HUAN1 h uan1
HUAN2 h uan2
HUAN3 h uan3
HUAN4 h uan4
HUANG0 h uang0
HUANG1 h uang1
HUANG2 h uang2
HUANG3 h uang3
HUANG4 h uang4
HUI0 h ui0
HUI1 h ui1
HUI2 h ui2
HUI3 h ui3
HUI4 h ui4
HUN0 h un0
HUN1 h un1
HUN2 h un2
HUN3 h un3
HUN4 h un4
HUO0 h uo0
HUO1 h uo1
HUO2 h uo2
HUO3 h uo3
HUO4 h uo4
JI0 j i0
JI1 j i1
JI2 j i2
JI3 j i3
JI4 j i4
JIA0 j ia0
JIA1 j ia1
JIA2 j ia2
JIA3 j ia3
JIA4 j ia4
JIAN0 j ian0
JIAN1 j ian1
JIAN2 j ian2
JIAN3 j ian3
JIAN4 j ian4
JIANG0 j iang0
JIANG1 j iang1
JIANG2 j iang2
JIANG3 j iang3
JIANG4 j iang4
JIAO0 j iao0
JIAO1 j iao1
JIAO2 j iao2
JIAO3 j iao3
JIAO4 j iao4
JIE0 j ie0
JIE1 j ie1
JIE2 j ie2
JIE3 j ie3
JIE4 j ie4
JIN0 j in0
JIN1 j in1
JIN2 j in2
JIN3 j in3
JIN4 j in4
JING0 j ing0
JING1 j ing1
JING2 j ing2
JING3 j ing3
JING4 j ing4
JIONG0 j iong0
JIONG1 j iong1
JIONG2 j iong2
JIONG3 j iong3
JIONG4 j iong4
JIU0 j iu0
JIU1 j iu1
JIU2 j iu2
JIU3 j iu3
JIU4 j iu4
JU0 j v0
JU1 j v1
JU2 j v2
JU3 j v3
JU4 j v4
JUAN0 j van0
JUAN1 j van1
JUAN2 j van2
JUAN3 j van3
JUAN4 j van4
JUE0 j ve0
JUE1 j ve1
JUE2 j ve2
JUE3 j ve3
JUE4 j ve4
JUN0 j vn0
JUN1 j vn1
JUN2 j vn2
JUN3 j vn3
JUN4 j vn4
KA0 k a0
KA1 k a1
KA2 k a2
KA3 k a3
KA4 k a4
KAI0 k ai0
KAI1 k ai1
KAI2 k ai2
KAI3 k ai3
KAI4 k ai4
KAN0 k an0
KAN1 k an1
KAN2 k an2
KAN3 k an3
KAN4 k an4
KANG0 k ang0
KANG1 k ang1
KANG2 k ang2
KANG3 k ang3
KANG4 k ang4
KAO0 k ao0
KAO1 k ao1
KAO2 k ao2
KAO3 k ao3
KAO4 k ao4
KE0 k e0
KE1 k e1
KE2 k e2
KE3 k e3
KE4 k e4
KEI0 k ei0
KEI1 k ei1
KEI2 k ei2
KEI3 k ei3
KEI4 k ei4
KEN0 k en0
KEN1 k en1
KEN2 k en2
KEN3 k en3
KEN4 k en4
KENG0 k eng0
KENG1 k eng1
KENG2 k eng2
KENG3 k eng3
KENG4 k eng4
KONG0 k ong0
KONG1 k ong1
KONG2 k ong2
KONG3 k ong3
KONG4 k ong4
KOU0 k ou0
KOU1 k ou1
KOU2 k ou2
KOU3 k ou3
KOU4 k ou4
KU0 k u0
KU1 k u1
KU2 k u2
KU3 k u3
KU4 k u4
KUA0 k ua0
KUA1 k ua1
KUA2 k ua2
KUA3 k ua3
KUA4 k ua4
KUAI0 k uai0
KUAI1 k uai1
KUAI2 k uai2
KUAI3 k uai3
KUAI4 k uai4
KUAN0 k uan0
KUAN1 k uan1
KUAN2 k uan2
KUAN3 k uan3
KUAN4 k uan4
KUANG0 k uang0
KUANG1 k uang1
KUANG2 k uang2
KUANG3 k uang3
KUANG4 k uang4
KUI0 k ui0
KUI1 k ui1
KUI2 k ui2
KUI3 k ui3
KUI4 k ui4
KUN0 k un0
KUN1 k un1
KUN2 k un2
KUN3 k un3
KUN4 k un4
KUO0 k uo0
KUO1 k uo1
KUO2 k uo2
KUO3 k uo3
KUO4 k uo4
LA0 l a0
LA1 l a1
LA2 l a2
LA3 l a3
LA4 l a4
LAI0 l ai0
LAI1 l ai1
LAI2 l ai2
LAI3 l ai3
LAI4 l ai4
LAN0 l an0
LAN1 l an1
LAN2 l an2
LAN3 l an3
LAN4 l an4
LANG0 l ang0
LANG1 l ang1
LANG2 l ang2
LANG3 l ang3
LANG4 l ang4
LAO0 l ao0
LAO1 l ao1
LAO2 l ao2
LAO3 l ao3
LAO4 l ao4
LE0 l e0
LE1 l e1
LE2 l e2
LE3 l e3
LE4 l e4
LEI0 l ei0
LEI1 l ei1
LEI2 l ei2
LEI3 l ei3
LEI4 l ei4
LENG0 l eng0
LENG1 l eng1
LENG2 l eng2
LENG3 l eng3
LENG4 l eng4
LI0 l i0
LI1 l i1
LI2 l i2
LI3 l i3
LI4 l i4
LIA0 l ia0
LIA1 l ia1
LIA2 l ia2
LIA3 l ia3
LIA4 l ia4
LIAN0 l ian0
LIAN1 l ian1
LIAN2 l ian2
LIAN3 l ian3
LIAN4 l ian4
LIANG0 l iang0
LIANG1 l iang1
LIANG2 l iang2
LIANG3 l iang3
LIANG4 l iang4
LIAO0 l iao0
LIAO1 l iao1
LIAO2 l iao2
LIAO3 l iao3
LIAO4 l iao4
LIE0 l ie0
LIE1 l ie1
LIE2 l ie2
LIE3 l ie3
LIE4 l ie4
LIN0 l in0
LIN1 l in1
LIN2 l in2
LIN3 l in3
LIN4 l in4
LING0 l ing0
LING1 l ing1
LING2 l ing2
LING3 l ing3
LING4 l ing4
LIU0 l iu0
LIU1 l iu1
LIU2 l iu2
LIU3 l iu3
LIU4 l iu4
LONG0 l ong0
LONG1 l ong1
LONG2 l ong2
LONG3 l ong3
LONG4 l ong4
LOU0 l ou0
LOU1 l ou1
LOU2 l ou2
LOU3 l ou3
LOU4 l ou4
LU0 l u0
LU1 l u1
LU2 l u2
LU3 l u3
LU4 l u4
LUAN0 l uan0
LUAN1 l uan1
LUAN2 l uan2
LUAN3 l uan3
LUAN4 l uan4
LUE0 l ve0
LUE1 l ve1
LUE2 l ve2
LUE3 l ve3
LUE4 l ve4
LVE0 l ve0
LVE1 l ve1
LVE2 l ve2
LVE3 l ve3
LVE4 l ve4
LUN0 l un0
LUN1 l un1
LUN2 l un2
LUN3 l un3
LUN4 l un4
LUO0 l uo0
LUO1 l uo1
LUO2 l uo2
LUO3 l uo3
LUO4 l uo4
LV0 l v0
LV1 l v1
LV2 l v2
LV3 l v3
LV4 l v4
MA0 m a0
MA1 m a1
MA2 m a2
MA3 m a3
MA4 m a4
MAI0 m ai0
MAI1 m ai1
MAI2 m ai2
MAI3 m ai3
MAI4 m ai4
MAN0 m an0
MAN1 m an1
MAN2 m an2
MAN3 m an3
MAN4 m an4
MANG0 m ang0
MANG1 m ang1
MANG2 m ang2
MANG3 m ang3
MANG4 m ang4
MAO0 m ao0
MAO1 m ao1
MAO2 m ao2
MAO3 m ao3
MAO4 m ao4
ME0 m e0
ME1 m e1
ME2 m e2
ME3 m e3
ME4 m e4
MEI0 m ei0
MEI1 m ei1
MEI2 m ei2
MEI3 m ei3
MEI4 m ei4
MEN0 m en0
MEN1 m en1
MEN2 m en2
MEN3 m en3
MEN4 m en4
MENG0 m eng0
MENG1 m eng1
MENG2 m eng2
MENG3 m eng3
MENG4 m eng4
MI0 m i0
MI1 m i1
MI2 m i2
MI3 m i3
MI4 m i4
MIAN0 m ian0
MIAN1 m ian1
MIAN2 m ian2
MIAN3 m ian3
MIAN4 m ian4
MIAO0 m iao0
MIAO1 m iao1
MIAO2 m iao2
MIAO3 m iao3
MIAO4 m iao4
MIE0 m ie0
MIE1 m ie1
MIE2 m ie2
MIE3 m ie3
MIE4 m ie4
MIN0 m in0
MIN1 m in1
MIN2 m in2
MIN3 m in3
MIN4 m in4
MING0 m ing0
MING1 m ing1
MING2 m ing2
MING3 m ing3
MING4 m ing4
MIU0 m iu0
MIU1 m iu1
MIU2 m iu2
MIU3 m iu3
MIU4 m iu4
MO0 m o0
MO1 m o1
MO2 m o2
MO3 m o3
MO4 m o4
MOU0 m ou0
MOU1 m ou1
MOU2 m ou2
MOU3 m ou3
MOU4 m ou4
MU0 m u0
MU1 m u1
MU2 m u2
MU3 m u3
MU4 m u4
NA0 n a0
NA1 n a1
NA2 n a2
NA3 n a3
NA4 n a4
NAI0 n ai0
NAI1 n ai1
NAI2 n ai2
NAI3 n ai3
NAI4 n ai4
NAN0 n an0
NAN1 n an1
NAN2 n an2
NAN3 n an3
NAN4 n an4
NANG0 n ang0
NANG1 n ang1
NANG2 n ang2
NANG3 n ang3
NANG4 n ang4
NAO0 n ao0
NAO1 n ao1
NAO2 n ao2
NAO3 n ao3
NAO4 n ao4
NE0 n e0
NE1 n e1
NE2 n e2
NE3 n e3
NE4 n e4
NEI0 n ei0
NEI1 n ei1
NEI2 n ei2
NEI3 n ei3
NEI4 n ei4
NEN0 n en0
NEN1 n en1
NEN2 n en2
NEN3 n en3
NEN4 n en4
NENG0 n eng0
NENG1 n eng1
NENG2 n eng2
NENG3 n eng3
NENG4 n eng4
NI0 n i0
NI1 n i1
NI2 n i2
NI3 n i3
NI4 n i4
NIAN0 n ian0
NIAN1 n ian1
NIAN2 n ian2
NIAN3 n ian3
NIAN4 n ian4
NIANG0 n iang0
NIANG1 n iang1
NIANG2 n iang2
NIANG3 n iang3
NIANG4 n iang4
NIAO0 n iao0
NIAO1 n iao1
NIAO2 n iao2
NIAO3 n iao3
NIAO4 n iao4
NIE0 n ie0
NIE1 n ie1
NIE2 n ie2
NIE3 n ie3
NIE4 n ie4
NIN0 n in0
NIN1 n in1
NIN2 n in2
NIN3 n in3
NIN4 n in4
NING0 n ing0
NING1 n ing1
NING2 n ing2
NING3 n ing3
NING4 n ing4
NIU0 n iu0
NIU1 n iu1
NIU2 n iu2
NIU3 n iu3
NIU4 n iu4
NONG0 n ong0
NONG1 n ong1
NONG2 n ong2
NONG3 n ong3
NONG4 n ong4
NU0 n u0
NU1 n u1
NU2 n u2
NU3 n u3
NU4 n u4
NUAN0 n uan0
NUAN1 n uan1
NUAN2 n uan2
NUAN3 n uan3
NUAN4 n uan4
NUE0 n ve0
NUE1 n ve1
NUE2 n ve2
NUE3 n ve3
NUE4 n ve4
NVE0 n ve0
NVE1 n ve1
NVE2 n ve2
NVE3 n ve3
NVE4 n ve4
NUO0 n uo0
NUO1 n uo1
NUO2 n uo2
NUO3 n uo3
NUO4 n uo4
NV0 n v0
NV1 n v1
NV2 n v2
NV3 n v3
NV4 n v4
O0 oo o0
O1 oo o1
O2 oo o2
O3 oo o3
O4 oo o4
OU0 oo ou0
OU1 oo ou1
OU2 oo ou2
OU3 oo ou3
OU4 oo ou4
PA0 p a0
PA1 p a1
PA2 p a2
PA3 p a3
PA4 p a4
PAI0 p ai0
PAI1 p ai1
PAI2 p ai2
PAI3 p ai3
PAI4 p ai4
PAN0 p an0
PAN1 p an1
PAN2 p an2
PAN3 p an3
PAN4 p an4
PANG0 p ang0
PANG1 p ang1
PANG2 p ang2
PANG3 p ang3
PANG4 p ang4
PAO0 p ao0
PAO1 p ao1
PAO2 p ao2
PAO3 p ao3
PAO4 p ao4
PEI0 p ei0
PEI1 p ei1
PEI2 p ei2
PEI3 p ei3
PEI4 p ei4
PEN0 p en0
PEN1 p en1
PEN2 p en2
PEN3 p en3
PEN4 p en4
PENG0 p eng0
PENG1 p eng1
PENG2 p eng2
PENG3 p eng3
PENG4 p eng4
PI0 p i0
PI1 p i1
PI2 p i2
PI3 p i3
PI4 p i4
PIAN0 p ian0
PIAN1 p ian1
PIAN2 p ian2
PIAN3 p ian3
PIAN4 p ian4
PIAO0 p iao0
PIAO1 p iao1
PIAO2 p iao2
PIAO3 p iao3
PIAO4 p iao4
PIE0 p ie0
PIE1 p ie1
PIE2 p ie2
PIE3 p ie3
PIE4 p ie4
PIN0 p in0
PIN1 p in1
PIN2 p in2
PIN3 p in3
PIN4 p in4
PING0 p ing0
PING1 p ing1
PING2 p ing2
PING3 p ing3
PING4 p ing4
PO0 p o0
PO1 p o1
PO2 p o2
PO3 p o3
PO4 p o4
POU0 p ou0
POU1 p ou1
POU2 p ou2
POU3 p ou3
POU4 p ou4
PU0 p u0
PU1 p u1
PU2 p u2
PU3 p u3
PU4 p u4
QI0 q i0
QI1 q i1
QI2 q i2
QI3 q i3
QI4 q i4
QIA0 q ia0
QIA1 q ia1
QIA2 q ia2
QIA3 q ia3
QIA4 q ia4
QIAN0 q ian0
QIAN1 q ian1
QIAN2 q ian2
QIAN3 q ian3
QIAN4 q ian4
QIANG0 q iang0
QIANG1 q iang1
QIANG2 q iang2
QIANG3 q iang3
QIANG4 q iang4
QIAO0 q iao0
QIAO1 q iao1
QIAO2 q iao2
QIAO3 q iao3
QIAO4 q iao4
QIE0 q ie0
QIE1 q ie1
QIE2 q ie2
QIE3 q ie3
QIE4 q ie4
QIN0 q in0
QIN1 q in1
QIN2 q in2
QIN3 q in3
QIN4 q in4
QING0 q ing0
QING1 q ing1
QING2 q ing2
QING3 q ing3
QING4 q ing4
QIONG0 q iong0
QIONG1 q iong1
QIONG2 q iong2
QIONG3 q iong3
QIONG4 q iong4
QIU0 q iu0
QIU1 q iu1
QIU2 q iu2
QIU3 q iu3
QIU4 q iu4
QU0 q v0
QU1 q v1
QU2 q v2
QU3 q v3
QU4 q v4
QUAN0 q van0
QUAN1 q van1
QUAN2 q van2
QUAN3 q van3
QUAN4 q van4
QUE0 q ve0
QUE1 q ve1
QUE2 q ve2
QUE3 q ve3
QUE4 q ve4
QUN0 q vn0
QUN1 q vn1
QUN2 q vn2
QUN3 q vn3
QUN4 q vn4
RAN0 r an0
RAN1 r an1
RAN2 r an2
RAN3 r an3
RAN4 r an4
RANG0 r ang0
RANG1 r ang1
RANG2 r ang2
RANG3 r ang3
RANG4 r ang4
RAO0 r ao0
RAO1 r ao1
RAO2 r ao2
RAO3 r ao3
RAO4 r ao4
RE0 r e0
RE1 r e1
RE2 r e2
RE3 r e3
RE4 r e4
REN0 r en0
REN1 r en1
REN2 r en2
REN3 r en3
REN4 r en4
RENG0 r eng0
RENG1 r eng1
RENG2 r eng2
RENG3 r eng3
RENG4 r eng4
RI0 r iz0
RI1 r iz1
RI2 r iz2
RI3 r iz3
RI4 r iz4
RONG0 r ong0
RONG1 r ong1
RONG2 r ong2
RONG3 r ong3
RONG4 r ong4
ROU0 r ou0
ROU1 r ou1
ROU2 r ou2
ROU3 r ou3
ROU4 r ou4
RU0 r u0
RU1 r u1
RU2 r u2
RU3 r u3
RU4 r u4
RUAN0 r uan0
RUAN1 r uan1
RUAN2 r uan2
RUAN3 r uan3
RUAN4 r uan4
RUI0 r ui0
RUI1 r ui1
RUI2 r ui2
RUI3 r ui3
RUI4 r ui4
RUN0 r un0
RUN1 r un1
RUN2 r un2
RUN3 r un3
RUN4 r un4
RUO0 r uo0
RUO1 r uo1
RUO2 r uo2
RUO3 r uo3
RUO4 r uo4
SA0 s a0
SA1 s a1
SA2 s a2
SA3 s a3
SA4 s a4
SAI0 s ai0
SAI1 s ai1
SAI2 s ai2
SAI3 s ai3
SAI4 s ai4
SAN0 s an0
SAN1 s an1
SAN2 s an2
SAN3 s an3
SAN4 s an4
SANG0 s ang0
SANG1 s ang1
SANG2 s ang2
SANG3 s ang3
SANG4 s ang4
SAO0 s ao0
SAO1 s ao1
SAO2 s ao2
SAO3 s ao3
SAO4 s ao4
SE0 s e0
SE1 s e1
SE2 s e2
SE3 s e3
SE4 s e4
SEN0 s en0
SEN1 s en1
SEN2 s en2
SEN3 s en3
SEN4 s en4
SENG0 s eng0
SENG1 s eng1
SENG2 s eng2
SENG3 s eng3
SENG4 s eng4
SHA0 sh a0
SHA1 sh a1
SHA2 sh a2
SHA3 sh a3
SHA4 sh a4
SHAI0 sh ai0
SHAI1 sh ai1
SHAI2 sh ai2
SHAI3 sh ai3
SHAI4 sh ai4
SHAN0 sh an0
SHAN1 sh an1
SHAN2 sh an2
SHAN3 sh an3
SHAN4 sh an4
SHANG0 sh ang0
SHANG1 sh ang1
SHANG2 sh ang2
SHANG3 sh ang3
SHANG4 sh ang4
SHAO0 sh ao0
SHAO1 sh ao1
SHAO2 sh ao2
SHAO3 sh ao3
SHAO4 sh ao4
SHE0 sh e0
SHE1 sh e1
SHE2 sh e2
SHE3 sh e3
SHE4 sh e4
SHEI0 sh ei0
SHEI1 sh ei1
SHEI2 sh ei2
SHEI3 sh ei3
SHEI4 sh ei4
SHEN0 sh en0
SHEN1 sh en1
SHEN2 sh en2
SHEN3 sh en3
SHEN4 sh en4
SHENG0 sh eng0
SHENG1 sh eng1
SHENG2 sh eng2
SHENG3 sh eng3
SHENG4 sh eng4
SHI0 sh ix0
SHI1 sh ix1
SHI2 sh ix2
SHI3 sh ix3
SHI4 sh ix4
SHOU0 sh ou0
SHOU1 sh ou1
SHOU2 sh ou2
SHOU3 sh ou3
SHOU4 sh ou4
SHU0 sh u0
SHU1 sh u1
SHU2 sh u2
SHU3 sh u3
SHU4 sh u4
SHUA0 sh ua0
SHUA1 sh ua1
SHUA2 sh ua2
SHUA3 sh ua3
SHUA4 sh ua4
SHUAI0 sh uai0
SHUAI1 sh uai1
SHUAI2 sh uai2
SHUAI3 sh uai3
SHUAI4 sh uai4
SHUAN0 sh uan0
SHUAN1 sh uan1
SHUAN2 sh uan2
SHUAN3 sh uan3
SHUAN4 sh uan4
SHUANG0 sh uang0
SHUANG1 sh uang1
SHUANG2 sh uang2
SHUANG3 sh uang3
SHUANG4 sh uang4
SHUI0 sh ui0
SHUI1 sh ui1
SHUI2 sh ui2
SHUI3 sh ui3
SHUI4 sh ui4
SHUN0 sh un0
SHUN1 sh un1
SHUN2 sh un2
SHUN3 sh un3
SHUN4 sh un4
SHUO0 sh uo0
SHUO1 sh uo1
SHUO2 sh uo2
SHUO3 sh uo3
SHUO4 sh uo4
SI0 s iy0
SI1 s iy1
SI2 s iy2
SI3 s iy3
SI4 s iy4
SONG0 s ong0
SONG1 s ong1
SONG2 s ong2
SONG3 s ong3
SONG4 s ong4
SOU0 s ou0
SOU1 s ou1
SOU2 s ou2
SOU3 s ou3
SOU4 s ou4
SU0 s u0
SU1 s u1
SU2 s u2
SU3 s u3
SU4 s u4
SUAN0 s uan0
SUAN1 s uan1
SUAN2 s uan2
SUAN3 s uan3
SUAN4 s uan4
SUI0 s ui0
SUI1 s ui1
SUI2 s ui2
SUI3 s ui3
SUI4 s ui4
SUN0 s un0
SUN1 s un1
SUN2 s un2
SUN3 s un3
SUN4 s un4
SUO0 s uo0
SUO1 s uo1
SUO2 s uo2
SUO3 s uo3
SUO4 s uo4
TA0 t a0
TA1 t a1
TA2 t a2
TA3 t a3
TA4 t a4
TAI0 t ai0
TAI1 t ai1
TAI2 t ai2
TAI3 t ai3
TAI4 t ai4
TAN0 t an0
TAN1 t an1
TAN2 t an2
TAN3 t an3
TAN4 t an4
TANG0 t ang0
TANG1 t ang1
TANG2 t ang2
TANG3 t ang3
TANG4 t ang4
TAO0 t ao0
TAO1 t ao1
TAO2 t ao2
TAO3 t ao3
TAO4 t ao4
TE0 t e0
TE1 t e1
TE2 t e2
TE3 t e3
TE4 t e4
TENG0 t eng0
TENG1 t eng1
TENG2 t eng2
TENG3 t eng3
TENG4 t eng4
TI0 t i0
TI1 t i1
TI2 t i2
TI3 t i3
TI4 t i4
TIAN0 t ian0
TIAN1 t ian1
TIAN2 t ian2
TIAN3 t ian3
TIAN4 t ian4
TIAO0 t iao0
TIAO1 t iao1
TIAO2 t iao2
TIAO3 t iao3
TIAO4 t iao4
TIE0 t ie0
TIE1 t ie1
TIE2 t ie2
TIE3 t ie3
TIE4 t ie4
TING0 t ing0
TING1 t ing1
TING2 t ing2
TING3 t ing3
TING4 t ing4
TONG0 t ong0
TONG1 t ong1
TONG2 t ong2
TONG3 t ong3
TONG4 t ong4
TOU0 t ou0
TOU1 t ou1
TOU2 t ou2
TOU3 t ou3
TOU4 t ou4
TU0 t u0
TU1 t u1
TU2 t u2
TU3 t u3
TU4 t u4
TUAN0 t uan0
TUAN1 t uan1
TUAN2 t uan2
TUAN3 t uan3
TUAN4 t uan4
TUI0 t ui0
TUI1 t ui1
TUI2 t ui2
TUI3 t ui3
TUI4 t ui4
TUN0 t un0
TUN1 t un1
TUN2 t un2
TUN3 t un3
TUN4 t un4
TUO0 t uo0
TUO1 t uo1
TUO2 t uo2
TUO3 t uo3
TUO4 t uo4
WA0 uu ua0
WA1 uu ua1
WA2 uu ua2
WA3 uu ua3
WA4 uu ua4
WAI0 uu uai0
WAI1 uu uai1
WAI2 uu uai2
WAI3 uu uai3
WAI4 uu uai4
WAN0 uu uan0
WAN1 uu uan1
WAN2 uu uan2
WAN3 uu uan3
WAN4 uu uan4
WANG0 uu uang0
WANG1 uu uang1
WANG2 uu uang2
WANG3 uu uang3
WANG4 uu uang4
WEI0 uu ui0
WEI1 uu ui1
WEI2 uu ui2
WEI3 uu ui3
WEI4 uu ui4
WEN0 uu un0
WEN1 uu un1
WEN2 uu un2
WEN3 uu un3
WEN4 uu un4
WENG0 uu ueng0
WENG1 uu ueng1
WENG2 uu ueng2
WENG3 uu ueng3
WENG4 uu ueng4
WO0 uu uo0
WO1 uu uo1
WO2 uu uo2
WO3 uu uo3
WO4 uu uo4
WU0 uu u0
WU1 uu u1
WU2 uu u2
WU3 uu u3
WU4 uu u4
XI0 x i0
XI1 x i1
XI2 x i2
XI3 x i3
XI4 x i4
XIA0 x ia0
XIA1 x ia1
XIA2 x ia2
XIA3 x ia3
XIA4 x ia4
XIAN0 x ian0
XIAN1 x ian1
XIAN2 x ian2
XIAN3 x ian3
XIAN4 x ian4
XIANG0 x iang0
XIANG1 x iang1
XIANG2 x iang2
XIANG3 x iang3
XIANG4 x iang4
XIAO0 x iao0
XIAO1 x iao1
XIAO2 x iao2
XIAO3 x iao3
XIAO4 x iao4
XIE0 x ie0
XIE1 x ie1
XIE2 x ie2
XIE3 x ie3
XIE4 x ie4
XIN0 x in0
XIN1 x in1
XIN2 x in2
XIN3 x in3
XIN4 x in4
XING0 x ing0
XING1 x ing1
XING2 x ing2
XING3 x ing3
XING4 x ing4
XIONG0 x iong0
XIONG1 x iong1
XIONG2 x iong2
XIONG3 x iong3
XIONG4 x iong4
XIU0 x iu0
XIU1 x iu1
XIU2 x iu2
XIU3 x iu3
XIU4 x iu4
XU0 x v0
XU1 x v1
XU2 x v2
XU3 x v3
XU4 x v4
XUAN0 x van0
XUAN1 x van1
XUAN2 x van2
XUAN3 x van3
XUAN4 x van4
XUE0 x ve0
XUE1 x ve1
XUE2 x ve2
XUE3 x ve3
XUE4 x ve4
XUN0 x vn0
XUN1 x vn1
XUN2 x vn2
XUN3 x vn3
XUN4 x vn4
YA0 ii ia0
YA1 ii ia1
YA2 ii ia2
YA3 ii ia3
YA4 ii ia4
YAN0 ii ian0
YAN1 ii ian1
YAN2 ii ian2
YAN3 ii ian3
YAN4 ii ian4
YANG0 ii iang0
YANG1 ii iang1
YANG2 ii iang2
YANG3 ii iang3
YANG4 ii iang4
YAO0 ii iao0
YAO1 ii iao1
YAO2 ii iao2
YAO3 ii iao3
YAO4 ii iao4
YE0 ii ie0
YE1 ii ie1
YE2 ii ie2
YE3 ii ie3
YE4 ii ie4
YI0 ii i0
YI1 ii i1
YI2 ii i2
YI3 ii i3
YI4 ii i4
YIN0 ii in0
YIN1 ii in1
YIN2 ii in2
YIN3 ii in3
YIN4 ii in4
YING0 ii ing0
YING1 ii ing1
YING2 ii ing2
YING3 ii ing3
YING4 ii ing4
YO0 ii ou0
YO1 ii ou1
YO2 ii ou2
YO3 ii ou3
YO4 ii ou4
YONG0 ii iong0
YONG1 ii iong1
YONG2 ii iong2
YONG3 ii iong3
YONG4 ii iong4
YOU0 ii iu0
YOU1 ii iu1
YOU2 ii iu2
YOU3 ii iu3
YOU4 ii iu4
YU0 vv v0
YU1 vv v1
YU2 vv v2
YU3 vv v3
YU4 vv v4
YUAN0 vv van0
YUAN1 vv van1
YUAN2 vv van2
YUAN3 vv van3
YUAN4 vv van4
YUE0 vv ve0
YUE1 vv ve1
YUE2 vv ve2
YUE3 vv ve3
YUE4 vv ve4
YUN0 vv vn0
YUN1 vv vn1
YUN2 vv vn2
YUN3 vv vn3
YUN4 vv vn4
YUO0 ii ou0
YUO1 ii ou1
YUO2 ii ou2
YUO3 ii ou3
YUO4 ii ou4
ZA0 z a0
ZA1 z a1
ZA2 z a2
ZA3 z a3
ZA4 z a4
ZAI0 z ai0
ZAI1 z ai1
ZAI2 z ai2
ZAI3 z ai3
ZAI4 z ai4
ZAN0 z an0
ZAN1 z an1
ZAN2 z an2
ZAN3 z an3
ZAN4 z an4
ZANG0 z ang0
ZANG1 z ang1
ZANG2 z ang2
ZANG3 z ang3
ZANG4 z ang4
ZAO0 z ao0
ZAO1 z ao1
ZAO2 z ao2
ZAO3 z ao3
ZAO4 z ao4
ZE0 z e0
ZE1 z e1
ZE2 z e2
ZE3 z e3
ZE4 z e4
ZEI0 z ei0
ZEI1 z ei1
ZEI2 z ei2
ZEI3 z ei3
ZEI4 z ei4
ZEN0 z en0
ZEN1 z en1
ZEN2 z en2
ZEN3 z en3
ZEN4 z en4
ZENG0 z eng0
ZENG1 z eng1
ZENG2 z eng2
ZENG3 z eng3
ZENG4 z eng4
ZHA0 zh a0
ZHA1 zh a1
ZHA2 zh a2
ZHA3 zh a3
ZHA4 zh a4
ZHAI0 zh ai0
ZHAI1 zh ai1
ZHAI2 zh ai2
ZHAI3 zh ai3
ZHAI4 zh ai4
ZHAN0 zh an0
ZHAN1 zh an1
ZHAN2 zh an2
ZHAN3 zh an3
ZHAN4 zh an4
ZHANG0 zh ang0
ZHANG1 zh ang1
ZHANG2 zh ang2
ZHANG3 zh ang3
ZHANG4 zh ang4
ZHAO0 zh ao0
ZHAO1 zh ao1
ZHAO2 zh ao2
ZHAO3 zh ao3
ZHAO4 zh ao4
ZHE0 zh e0
ZHE1 zh e1
ZHE2 zh e2
ZHE3 zh e3
ZHE4 zh e4
ZHEI0 zh ei0
ZHEI1 zh ei1
ZHEI2 zh ei2
ZHEI3 zh ei3
ZHEI4 zh ei4
ZHEN0 zh en0
ZHEN1 zh en1
ZHEN2 zh en2
ZHEN3 zh en3
ZHEN4 zh en4
ZHENG0 zh eng0
ZHENG1 zh eng1
ZHENG2 zh eng2
ZHENG3 zh eng3
ZHENG4 zh eng4
ZHI0 zh ix0
ZHI1 zh ix1
ZHI2 zh ix2
ZHI3 zh ix3
ZHI4 zh ix4
ZHONG0 zh ong0
ZHONG1 zh ong1
ZHONG2 zh ong2
ZHONG3 zh ong3
ZHONG4 zh ong4
ZHOU0 zh ou0
ZHOU1 zh ou1
ZHOU2 zh ou2
ZHOU3 zh ou3
ZHOU4 zh ou4
ZHU0 zh u0
ZHU1 zh u1
ZHU2 zh u2
ZHU3 zh u3
ZHU4 zh u4
ZHUA0 zh ua0
ZHUA1 zh ua1
ZHUA2 zh ua2
ZHUA3 zh ua3
ZHUA4 zh ua4
ZHUAI0 zh uai0
ZHUAI1 zh uai1
ZHUAI2 zh uai2
ZHUAI3 zh uai3
ZHUAI4 zh uai4
ZHUAN0 zh uan0
ZHUAN1 zh uan1
ZHUAN2 zh uan2
ZHUAN3 zh uan3
ZHUAN4 zh uan4
ZHUANG0 zh uang0
ZHUANG1 zh uang1
ZHUANG2 zh uang2
ZHUANG3 zh uang3
ZHUANG4 zh uang4
ZHUI0 zh ui0
ZHUI1 zh ui1
ZHUI2 zh ui2
ZHUI3 zh ui3
ZHUI4 zh ui4
ZHUN0 zh un0
ZHUN1 zh un1
ZHUN2 zh un2
ZHUN3 zh un3
ZHUN4 zh un4
ZHUO0 zh uo0
ZHUO1 zh uo1
ZHUO2 zh uo2
ZHUO3 zh uo3
ZHUO4 zh uo4
ZI0 z iy0
ZI1 z iy1
ZI2 z iy2
ZI3 z iy3
ZI4 z iy4
ZONG0 z ong0
ZONG1 z ong1
ZONG2 z ong2
ZONG3 z ong3
ZONG4 z ong4
ZOU0 z ou0
ZOU1 z ou1
ZOU2 z ou2
ZOU3 z ou3
ZOU4 z ou4
ZU0 z u0
ZU1 z u1
ZU2 z u2
ZU3 z u3
ZU4 z u4
ZUAN0 z uan0
ZUAN1 z uan1
ZUAN2 z uan2
ZUAN3 z uan3
ZUAN4 z uan4
ZUI0 z ui0
ZUI1 z ui1
ZUI2 z ui2
ZUI3 z ui3
ZUI4 z ui4
ZUN0 z un0
ZUN1 z un1
ZUN2 z un2
ZUN3 z un3
ZUN4 z un4
ZUO0 z uo0
ZUO1 z uo1
ZUO2 z uo2
ZUO3 z uo3
ZUO4 z uo4
EI0 ee ei0
EI1 ee ei1
EI2 ee ei2
EI3 ee ei3
EI4 ee ei4
TEI0 t ei0
TEI1 t ei1
TEI2 t ei2
TEI3 t ei3
TEI4 t ei4
HNG0 ee eng0
HNG1 ee eng1
HNG2 ee eng2
HNG3 ee eng3
HNG4 ee eng4
LO0 l o0
LO1 l o1
LO2 l o2
LO3 l o3
LO4 l o4
N0 ee en0
N1 ee en1
N2 ee en2
N3 ee en3
N4 ee en4
NG0 ee eng0
NG1 ee eng1
NG2 ee eng2
NG3 ee eng3
NG4 ee eng4
NOU0 n ao0
NOU1 n ao1
NOU2 n ao2
NOU3 n ao3
NOU4 n ao4
SEI0 s ei0
SEI1 s ei1
SEI2 s ei2
SEI3 s ei3
SEI4 s ei4
A5 aa a5
AI5 aa ai5
AN5 aa an5
ANG5 aa ang5
AO5 aa ao5
BA5 b a5
BAI5 b ai5
BAN5 b an5
BANG5 b ang5
BAO5 b ao5
BEI5 b ei5
BEN5 b en5
BENG5 b eng5
BI5 b i5
BIAN5 b ian5
BIAO5 b iao5
BIE5 b ie5
BIN5 b in5
BING5 b ing5
BO5 b o5
BU5 b u5
CA5 c a5
CAI5 c ai5
CAN5 c an5
CANG5 c ang5
CAO5 c ao5
CE5 c e5
CEN5 c en5
CENG5 c eng5
CHA5 ch a5
CHAI5 ch ai5
CHAN5 ch an5
CHANG5 ch ang5
CHAO5 ch ao5
CHE5 ch e5
CHEN5 ch en5
CHENG5 ch eng5
CHI5 ch ix5
CHONG5 ch ong5
CHOU5 ch ou5
CHU5 ch u5
CHUAI5 ch uai5
CHUAN5 ch uan5
CHUANG5 ch uang5
CHUI5 ch ui5
CHUN5 ch un5
CHUO5 ch uo5
CI5 c iy5
CONG5 c ong5
COU5 c ou5
CU5 c u5
CUAN5 c uan5
CUI5 c ui5
CUN5 c un5
CUO5 c uo5
DA5 d a5
DAI5 d ai5
DAN5 d an5
DANG5 d ang5
DAO5 d ao5
DE5 d e5
DEI5 d ei5
DEN5 d en5
DENG5 d eng5
DI5 d i5
DIA5 d ia5
DIAN5 d ian5
DIAO5 d iao5
DIE5 d ie5
DING5 d ing5
DIU5 d iu5
DONG5 d ong5
DOU5 d ou5
DU5 d u5
DUAN5 d uan5
DUI5 d ui5
DUN5 d un5
DUO5 d uo5
E5 ee e5
EN5 ee en5
ER5 ee er5
FA5 f a5
FAN5 f an5
FANG5 f ang5
FEI5 f ei5
FEN5 f en5
FENG5 f eng5
FO5 f o5
FOU5 f ou5
FU5 f u5
GA5 g a5
GAI5 g ai5
GAN5 g an5
GANG5 g ang5
GAO5 g ao5
GE5 g e5
GEI5 g ei5
GEN5 g en5
GENG5 g eng5
GONG5 g ong5
GOU5 g ou5
GU5 g u5
GUA5 g ua5
GUAI5 g uai5
GUAN5 g uan5
GUANG5 g uang5
GUI5 g ui5
GUN5 g un5
GUO5 g uo5
HA5 h a5
HAI5 h ai5
HAN5 h an5
HANG5 h ang5
HAO5 h ao5
HE5 h e5
HEI5 h ei5
HEN5 h en5
HENG5 h eng5
HONG5 h ong5
HOU5 h ou5
HU5 h u5
HUA5 h ua5
HUAI5 h uai5
HUAN5 h uan5
HUANG5 h uang5
HUI5 h ui5
HUN5 h un5
HUO5 h uo5
JI5 j i5
JIA5 j ia5
JIAN5 j ian5
JIANG5 j iang5
JIAO5 j iao5
JIE5 j ie5
JIN5 j in5
JING5 j ing5
JIONG5 j iong5
JIU5 j iu5
JU5 j v5
JUAN5 j van5
JUE5 j ve5
JUN5 j vn5
KA5 k a5
KAI5 k ai5
KAN5 k an5
KANG5 k ang5
KAO5 k ao5
KE5 k e5
KEI5 k ei5
KEN5 k en5
KENG5 k eng5
KONG5 k ong5
KOU5 k ou5
KU5 k u5
KUA5 k ua5
KUAI5 k uai5
KUAN5 k uan5
KUANG5 k uang5
KUI5 k ui5
KUN5 k un5
KUO5 k uo5
LA5 l a5
LAI5 l ai5
LAN5 l an5
LANG5 l ang5
LAO5 l ao5
LE5 l e5
LEI5 l ei5
LENG5 l eng5
LI5 l i5
LIA5 l ia5
LIAN5 l ian5
LIANG5 l iang5
LIAO5 l iao5
LIE5 l ie5
LIN5 l in5
LING5 l ing5
LIU5 l iu5
LONG5 l ong5
LOU5 l ou5
LU5 l u5
LUAN5 l uan5
LUE5 l ve5
LVE5 l ve5
LUN5 l un5
LUO5 l uo5
LV5 l v5
MA5 m a5
MAI5 m ai5
MAN5 m an5
MANG5 m ang5
MAO5 m ao5
ME5 m e5
MEI5 m ei5
MEN5 m en5
MENG5 m eng5
MI5 m i5
MIAN5 m ian5
MIAO5 m iao5
MIE5 m ie5
MIN5 m in5
MING5 m ing5
MIU5 m iu5
MO5 m o5
MOU5 m ou5
MU5 m u5
NA5 n a5
NAI5 n ai5
NAN5 n an5
NANG5 n ang5
NAO5 n ao5
NE5 n e5
NEI5 n ei5
NEN5 n en5
NENG5 n eng5
NI5 n i5
NIAN5 n ian5
NIANG5 n iang5
NIAO5 n iao5
NIE5 n ie5
NIN5 n in5
NING5 n ing5
NIU5 n iu5
NONG5 n ong5
NU5 n u5
NUAN5 n uan5
NUE5 n ve5
NVE5 n ve5
NUO5 n uo5
NV5 n v5
O5 oo o5
OU5 oo ou5
PA5 p a5
PAI5 p ai5
PAN5 p an5
PANG5 p ang5
PAO5 p ao5
PEI5 p ei5
PEN5 p en5
PENG5 p eng5
PI5 p i5
PIAN5 p ian5
PIAO5 p iao5
PIE5 p ie5
PIN5 p in5
PING5 p ing5
PO5 p o5
POU5 p ou5
PU5 p u5
QI5 q i5
QIA5 q ia5
QIAN5 q ian5
QIANG5 q iang5
QIAO5 q iao5
QIE5 q ie5
QIN5 q in5
QING5 q ing5
QIONG5 q iong5
QIU5 q iu5
QU5 q v5
QUAN5 q van5
QUE5 q ve5
QUN5 q vn5
RAN5 r an5
RANG5 r ang5
RAO5 r ao5
RE5 r e5
REN5 r en5
RENG5 r eng5
RI5 r iz5
RONG5 r ong5
ROU5 r ou5
RU5 r u5
RUAN5 r uan5
RUI5 r ui5
RUN5 r un5
RUO5 r uo5
SA5 s a5
SAI5 s ai5
SAN5 s an5
SANG5 s ang5
SAO5 s ao5
SE5 s e5
SEN5 s en5
SENG5 s eng5
SHA5 sh a5
SHAI5 sh ai5
SHAN5 sh an5
SHANG5 sh ang5
SHAO5 sh ao5
SHE5 sh e5
SHEI5 sh ei5
SHEN5 sh en5
SHENG5 sh eng5
SHI5 sh ix5
SHOU5 sh ou5
SHU5 sh u5
SHUA5 sh ua5
SHUAI5 sh uai5
SHUAN5 sh uan5
SHUANG5 sh uang5
SHUI5 sh ui5
SHUN5 sh un5
SHUO5 sh uo5
SI5 s iy5
SONG5 s ong5
SOU5 s ou5
SU5 s u5
SUAN5 s uan5
SUI5 s ui5
SUN5 s un5
SUO5 s uo5
TA5 t a5
TAI5 t ai5
TAN5 t an5
TANG5 t ang5
TAO5 t ao5
TE5 t e5
TENG5 t eng5
TI5 t i5
TIAN5 t ian5
TIAO5 t iao5
TIE5 t ie5
TING5 t ing5
TONG5 t ong5
TOU5 t ou5
TU5 t u5
TUAN5 t uan5
TUI5 t ui5
TUN5 t un5
TUO5 t uo5
WA5 uu ua5
WAI5 uu uai5
WAN5 uu uan5
WANG5 uu uang5
WEI5 uu ui5
WEN5 uu un5
WENG5 uu ueng5
WO5 uu uo5
WU5 uu u5
XI5 x i5
XIA5 x ia5
XIAN5 x ian5
XIANG5 x iang5
XIAO5 x iao5
XIE5 x ie5
XIN5 x in5
XING5 x ing5
XIONG5 x iong5
XIU5 x iu5
XU5 x v5
XUAN5 x van5
XUE5 x ve5
XUN5 x vn5
YA5 ii ia5
YAN5 ii ian5
YANG5 ii iang5
YAO5 ii iao5
YE5 ii ie5
YI5 ii i5
YIN5 ii in5
YING5 ii ing5
YO5 ii ou5
YONG5 ii iong5
YOU5 ii iu5
YU5 vv v5
YUAN5 vv van5
YUE5 vv ve5
YUN5 vv vn5
YUO5 ii ou5
ZA5 z a5
ZAI5 z ai5
ZAN5 z an5
ZANG5 z ang5
ZAO5 z ao5
ZE5 z e5
ZEI5 z ei5
ZEN5 z en5
ZENG5 z eng5
ZHA5 zh a5
ZHAI5 zh ai5
ZHAN5 zh an5
ZHANG5 zh ang5
ZHAO5 zh ao5
ZHE5 zh e5
ZHEI5 zh ei5
ZHEN5 zh en5
ZHENG5 zh eng5
ZHI5 zh ix5
ZHONG5 zh ong5
ZHOU5 zh ou5
ZHU5 zh u5
ZHUA5 zh ua5
ZHUAI5 zh uai5
ZHUAN5 zh uan5
ZHUANG5 zh uang5
ZHUI5 zh ui5
ZHUN5 zh un5
ZHUO5 zh uo5
ZI5 z iy5
ZONG5 z ong5
ZOU5 z ou5
ZU5 z u5
ZUAN5 z uan5
ZUI5 z ui5
ZUN5 z un5
ZUO5 z uo5
EI5 ee ei5
TEI5 t ei5
HNG5 ee eng5
LO5 l o5
N5 ee en5
NG5 ee eng5
NOU5 n ao5
SEI5 s ei5

================================================
FILE: examples/thchs30/align0/local/data.sh
================================================
#! /usr/bin/env bash

stage=-1
stop_stage=100

source ${MAIN_ROOT}/utils/parse_options.sh

mkdir -p data
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}
LEXICON_NAME=$1

# download data, generate manifests
if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    python3 ${TARGET_DIR}/thchs30/thchs30.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/thchs30"

    if [ $? -ne 0 ]; then
        echo "Prepare THCHS-30 failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # dump manifest to data/
    python3 ${MAIN_ROOT}/utils/dump_manifest.py --manifest-path=data/manifest.train --output-dir=data
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # copy files to data/dict to gen word.lexicon
    cp  ${TARGET_DIR}/thchs30/data_thchs30/lm_word/lexicon.txt data/dict/lm_word_lexicon_1
    cp  ${TARGET_DIR}/thchs30/resource/dict/lexicon.txt data/dict/lm_word_lexicon_2
    # copy phone.lexicon to data/dict
    cp  ${TARGET_DIR}/thchs30/data_thchs30/lm_phone/lexicon.txt data/dict/phone.lexicon
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # gen word.lexicon
    python local/gen_word2phone.py  --lexicon-files="data/dict/lm_word_lexicon_1 data/dict/lm_word_lexicon_2" --output-path=data/dict/word.lexicon
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # reorganize dataset for MFA
    if [ ! -d $EXP_DIR/thchs30_corpus ]; then
        echo "reorganizing thchs30 corpus..."
        python local/reorganize_thchs30.py --root-dir=data --output-dir=data/thchs30_corpus --script-type=$LEXICON_NAME
        echo "reorganization done."
    fi
fi

echo "THCHS-30  data preparation done."
exit 0


================================================
FILE: examples/thchs30/align0/local/gen_word2phone.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Gen Chinese characters to THCHS30-30 phone lexicon using THCHS30-30's lexicon
file1: THCHS-30/data_thchs30/lm_word/lexicon.txt
file2: THCHS-30/resource/dict/lexicon.txt
"""
import argparse
from collections import defaultdict
from pathlib import Path
from typing import List
from typing import Union

# key: (cn, ('ee', 'er4'))，value: count
cn_phones_counter = defaultdict(int)
# key: cn, value: list of (phones, num)
cn_counter = defaultdict(list)
# key: cn, value: list of (phones, probabilities)
cn_counter_p = defaultdict(list)


def is_Chinese(ch):
    if '\u4e00' <= ch <= '\u9fff':
        return True
    return False


def proc_line(line: str):
    line = line.strip()
    if is_Chinese(line[0]):
        line_list = line.split()
        cn_list = line_list[0]
        phone_list = line_list[1:]
        if len(cn_list) == len(phone_list) / 2:
            new_phone_list = [(phone_list[i], phone_list[i + 1])
                              for i in range(0, len(phone_list), 2)]
            assert len(cn_list) == len(new_phone_list)
            for idx, cn in enumerate(cn_list):
                phones = new_phone_list[idx]
                cn_phones_counter[(cn, phones)] += 1


"""
example lines of output
the first column is a Chinese character
the second is the probability of this pronunciation
and the rest are the phones of this pronunciation
一 0.22 ii i1↩
一 0.45 ii i4↩
一 0.32 ii i2↩
一 0.01 ii i5
"""


def gen_lexicon(lexicon_files: List[Union[str, Path]],
                output_path: Union[str, Path]):
    for file_path in lexicon_files:
        with open(file_path, "r") as f1:
            for line in f1:
                proc_line(line)

    for key in cn_phones_counter:
        cn = key[0]
        cn_counter[cn].append((key[1], cn_phones_counter[key]))

    for key in cn_counter:
        phone_count_list = cn_counter[key]
        count_sum = sum([x[1] for x in phone_count_list])
        for item in phone_count_list:
            p = item[1] / count_sum
            p = round(p, 2)
            if p > 0:
                cn_counter_p[key].append((item[0], p))

    with open(output_path, "w") as wf:
        for key in cn_counter_p:
            phone_p_list = cn_counter_p[key]
            for item in phone_p_list:
                phones, p = item
                wf.write(key + " " + str(p) + " " + " ".join(phones) + "\n")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Gen Chinese characters to phone lexicon for THCHS-30 dataset"
    )
    # A line of word_lexicon:
    # 一丁点 ii i4 d ing1 d ian3
    # the first is word, and the rest are the phones of the word, and the len of phones is twice of the word's len
    parser.add_argument(
        "--lexicon-files",
        type=str,
        default="data/dict/lm_word_lexicon_1 data/dict/lm_word_lexicon_2",
        help="lm_word_lexicon files")
    parser.add_argument(
        "--output-path",
        type=str,
        default="data/dict/word.lexicon",
        help="path to save output word2phone lexicon")
    args = parser.parse_args()
    lexicon_files = args.lexicon_files.split(" ")
    output_path = Path(args.output_path).expanduser()

    gen_lexicon(lexicon_files, output_path)


================================================
FILE: examples/thchs30/align0/local/reorganize_thchs30.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Recorganize THCHS-30 for MFA
read manifest.train from root-dir
Link *.wav to output-dir
dump *.lab from manifest.train, such as: text、syllable and phone
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
"""
import argparse
import os
from pathlib import Path
from typing import Union


def link_wav(root_dir: Union[str, Path], output_dir: Union[str, Path]):
    wav_scp_path = root_dir / 'wav.scp'
    with open(wav_scp_path, 'r') as rf:
        for line in rf:
            utt, feat = line.strip().split()
            wav_path = feat
            wav_name = wav_path.split("/")[-1]
            new_wav_path = output_dir / wav_name
            os.symlink(wav_path, new_wav_path)


def write_lab(root_dir: Union[str, Path],
              output_dir: Union[str, Path],
              script_type='phone'):
    # script_type can in {'word', 'syllable', 'phone'}
    json_name = 'text.' + script_type
    json_path = root_dir / json_name
    with open(json_path, 'r') as rf:
        for line in rf:
            line = line.strip().split()
            utt_id = line[0]
            context = ' '.join(line[1:])
            transcript_name = utt_id + '.lab'
            transcript_path = output_dir / transcript_name
            with open(transcript_path, 'wt') as wf:
                if script_type == 'word':
                    # add space between chinese char
                    context = ''.join([f + ' ' for f in context])[:-1]
                wf.write(context + "\n")


def reorganize_thchs30(root_dir: Union[str, Path],
                       output_dir: Union[str, Path]=None,
                       script_type='phone'):
    output_dir.mkdir(parents=True, exist_ok=True)
    link_wav(root_dir, output_dir)
    write_lab(root_dir, output_dir, script_type)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Reorganize THCHS-30 dataset for MFA")
    parser.add_argument("--root-dir", type=str, help="path to thchs30 dataset.")
    parser.add_argument(
        "--output-dir",
        type=str,
        help="path to save outputs (audio and transcriptions)")

    parser.add_argument(
        "--script-type",
        type=str,
        default="phone",
        help="type of lab ('word'/'syllable'/'phone')")

    args = parser.parse_args()
    root_dir = Path(args.root_dir).expanduser()
    output_dir = Path(args.output_dir).expanduser()
    reorganize_thchs30(root_dir, output_dir, args.script_type)


================================================
FILE: examples/thchs30/align0/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8 
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

# MFA is in tools
export PATH=${MAIN_ROOT}/tools/montreal-forced-aligner/bin:$PATH

================================================
FILE: examples/thchs30/align0/run.sh
================================================
#!/bin/bash
set -e
source path.sh
stage=0
stop_stage=100
EXP_DIR=exp
# LEXICON_NAME in {'phone', 'syllable', 'word'}
LEXICON_NAME='phone'
# set MFA num_jobs as half of machine's cpu core number
NUM_JOBS=$((`nproc`/2))
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

# download dataset、unzip and generate manifest 
# gen lexicon relink gen dump
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    echo "Start prepare thchs30 data for MFA ..."
    bash ./local/data.sh $LEXICON_NAME || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # run MFA
    if [ ! -d "$EXP_DIR/thchs30_alignment" ]; then
        echo "Start MFA training ..."
        mfa_train_and_align data/thchs30_corpus data/dict/$LEXICON_NAME.lexicon $EXP_DIR/thchs30_alignment -o $EXP_DIR/thchs30_model --clean --verbose --temp_directory exp/.mfa_train_and_align --num_jobs $NUM_JOBS
        echo "MFA training done! \nresults: $EXP_DIR/thchs30_alignment \nmodel: $EXP_DIR/thchs30_model\n"
    fi
fi


================================================
FILE: examples/timit/README.md
================================================
# TIMIT

* asr1 - transformer Streaming/Non-Streaming


================================================
FILE: examples/timit/asr1/.gitignore
================================================
data
exp
test.profile


================================================
FILE: examples/timit/asr1/README.md
================================================
# Transformer ASR with Timit
The phoneme-based continuous speech corpus is a collaboration between Texas Instruments, MIT, and SRI International. The [Timit](https://catalog.ldc.upenn.edu/docs/LDC93S1/) dataset has a voice sampling frequency of 16 khz and contains a total of 6,300 sentences, with 630 people from 8 major U.S. dialects speaking a given 10 sentences each, all sentences are manually segmented and marked at the phone level. Seventy percent of the speakers are male; most of the speakers are white adults.

## Dataset
### Download and Extract
Download TIMIT from it's [official website](https://catalog.ldc.upenn.edu/LDC93S1) and extract it to `~/datasets`. Assume unzip the dataset in the directory `~/datasets/timit`.

## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Get ctc alignment of test data using the final model         |

You can choose to run a range of stages by setting `stage` and `stop_stage `.

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in `run.sh` in detail.
## The Environment Variables
The path.sh contains the environment variables.
```bash
source path.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of the stage you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`audio_file` denotes the file path of the single file you want to infer in stage 5
`ckpt` denotes the checkpoint prefix of the model, e.g. "conformer"
You can set the local variables (except `ckpt`) when you use `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line.:
```bash
bash run.sh --gpus 0,1,2,3 --avg_num 10
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in `run.sh` to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/timit_data_prep.sh ${TIMIT_path}
     bash ./local/data.sh || exit -1
 fi
```

Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
source path.sh
bash ./local/timit_data_prep.sh ${TIMIT_path}
bash ./local/data.sh
```
After processing the data, the ``data`` directory will look like this:
```bash
data/
|-- lang_char
|   `-- vocab.txt
|-- local
|   `-- dev_sph.flist
|   `-- dev_sph.scp
|   `-- dev.text
|   `-- dev.trans
|   `-- dev.uttids
|   `-- test_sph.flist
|   `-- test_sph.scp
|   `-- test.text
|   `-- test.trans
|   `-- test.uttids
|   `-- train_sph.flist
|   `-- train_sph.scp
|   `-- train.text
|   `-- train.trans
|   `-- train.uttids
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line.
```bash
source path.sh
bash ./local/timit_data_prep.sh ${TIMIT_path}
bash ./local/data.sh
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh conf/transformer.yaml transformer
```
## Stage 2: Top-k Models Averaging
After training the model, we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model. We can use stage 2 to do this, and the code is shown below:
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh`is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line.
```bash
bash ./local/timit_data_prep.sh ${TIMIT_path}
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh conf/transformer.yaml transformer
avg.sh best exp/conformer/checkpoints 10
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of the test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line.
```bash
source path.sh
bash ./local/timit_data_prep.sh ${TIMIT_path}
bash ./local/data.sh
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh conf/transformer.yaml transformer
avg.sh best exp/transformer/checkpoints 10
CUDA_VISIBLE_DEVICES=0 ./local/test.sh conf/transformer.yaml exp/transformer/checkpoints/avg_10
```
## Stage 4: CTC Alignment 
If you want to get the alignment between the audio and the text, you can use the ctc alignment. The code of this stage is shown below:
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # ctc alignment of test data
     CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train the model, test it and do the alignment, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 4
```
or if you only need to train a model and do the alignment, you can use these scripts to escape stage 3(test stage):
```bash
bash run.sh --stage 0 --stop_stage 2
bash run.sh --stage 4 --stop_stage 4
```
or you can also use these scripts in the command line.
```bash
source path.sh
bash ./local/timit_data_prep.sh ${TIMIT_path}
bash ./local/data.sh
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh conf/transformer.yaml transformer
avg.sh best exp/transformer/checkpoints 10
# test stage is optional
CUDA_VISIBLE_DEVICES=0 ./local/test.sh conf/transformer.yaml exp/transformer/checkpoints/avg_10
CUDA_VISIBLE_DEVICES=0 ./local/align.sh conf/transformer.yaml exp/transformer/checkpoints/avg_10
```


================================================
FILE: examples/timit/asr1/RESULTS.md
================================================
# TIMIT

### Transformer
| Model | Params | Config | Decode method | Loss |  PER |
| --- | --- | --- | --- | --- | --- |
| transformer | 5.17M | conf/transformer.yaml | attention              | 46.41119385 | 0.396950 |
| transformer | 5.17M | conf/transformer.yaml | ctc_greedy_search      | 46.41119385 | 0.182259 |
| transformer | 5.17M | conf/transformer.yaml | ctc_prefix_beam_search | 46.41119385 | 0.188080 |
| transformer | 5.17M | conf/transformer.yaml | attention_rescore      | 46.41119385 | 0.184199 |


================================================
FILE: examples/timit/asr1/conf/augmentation.json
================================================
[
  {
    "type": "shift",
    "params": {
      "min_shift_ms": -5,
      "max_shift_ms": 5
    },
    "prob": 1.0
  },
  {
    "type": "speed",
    "params": {
      "min_speed_rate": 0.9,
      "max_speed_rate": 1.1,
      "num_rates": 3
    },
    "prob": 0.0
  },
  {
    "type": "specaug",
    "params": {
      "F": 10,
      "T": 50,
      "n_freq_masks": 2,
      "n_time_masks": 2,
      "p": 1.0,
      "W": 80,
      "adaptive_number_ratio": 0,
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20,
      "replace_with_zero": true,
      "warp_mode": "PIL"
    },
    "prob": 1.0
  }
]


================================================
FILE: examples/timit/asr1/conf/dev_spk.list
================================================
faks0
fdac1
fjem0
mgwt0
mjar0
mmdb1
mmdm2
mpdf0
fcmh0
fkms0
mbdg0
mbwm0
mcsh0
fadg0
fdms0
fedw0
mgjf0
mglb0
mrtk0
mtaa0
mtdt0
mthc0
mwjg0
fnmr0
frew0
fsem0
mbns0
mmjr0
mdls0
mdlf0
mdvc0
mers0
fmah0
fdrw0
mrcs0
mrjm4
fcal1
mmwh0
fjsj0
majc0
mjsw0
mreb0
fgjd0
fjmg0
mroa0
mteb0
mjfc0
mrjr0
fmml0
mrws1

================================================
FILE: examples/timit/asr1/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 16000
    n_mels: 80
    n_shift: 160
    win_length: 400
    dither: 0.1
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/timit/asr1/conf/test_spk.list
================================================
mdab0
mwbt0
felc0
mtas1
mwew0
fpas0
mjmp0
mlnt0
fpkt0
mlll0
mtls0
fjlm0
mbpm0
mklt0
fnlp0
mcmj0
mjdh0
fmgd0
mgrt0
mnjm0
fdhc0
mjln0
mpam0
fmld0

================================================
FILE: examples/timit/asr1/conf/transformer.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.train
dev_manifest: data/manifest.dev
test_manifest: data/manifest.test

###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt
spm_model_prefix: ''
unit_type: "word"
mean_std_filepath: ""
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 64
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1


############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 128    # dimension of attention
    attention_heads: 4
    linear_units: 1024  # the number of units of position-wise feed forward
    num_blocks: 6      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 1024
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.5
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                Training                 #
###########################################
n_epoch: 50
accum_grad: 1
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 0.004
  weight_decay: 1.0e-6
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 1200
  lr_decay: 1.0
log_interval: 10
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/timit/asr1/conf/tuning/decode.yaml
================================================
decode_batch_size: 64
error_rate_type: wer
decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
beam_size: 10
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.


================================================
FILE: examples/timit/asr1/local/align.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

batch_size=1
output_dir=${ckpt_prefix}
mkdir -p ${output_dir}

# align dump in `result_file`
# .tier, .TextGrid dump in `dir of result_file`
python3 -u ${BIN_DIR}/alignment.py \
--ngpu ${ngpu} \
--config ${config_path} \
--decode_cfg ${decode_config_path} \
--result_file ${output_dir}/${type}.align \
--checkpoint_path ${ckpt_prefix} \
--opts decode.decode_batch_size ${batch_size}

if [ $? -ne 0 ]; then
    echo "Failed in ctc alignment!"
    exit 1
fi

exit 0


================================================
FILE: examples/timit/asr1/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100

dict_dir=data/lang_char

unit_type=word
TIMIT_path=

. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;

mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}


if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/timit/timit_kaldi_standard_split.py \
    --manifest_prefix="data/manifest" \
    --src="data/local" \

    if [ $? -ne 0 ]; then
        echo "Prepare TIMIT failed. Terminated."
        exit 1
    fi
fi


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    num_workers=$(nproc)
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.train.raw" \
    --num_samples=-1 \
    --spectrum_type="fbank" \
    --feat_dim=80 \
    --delta_delta=false \
    --sample_rate=16000 \
    --stride_ms=10 \
    --window_ms=25 \
    --use_dB_normalization=False \
    --num_workers=${num_workers} \
    --output_path="data/mean_std.json"

    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type ${unit_type} \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths="data/manifest.train.raw"

    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    for set in train dev test; do
    {
        python3 ${MAIN_ROOT}/utils/format_data.py \
        --cmvn_path "data/mean_std.json" \
        --unit_type ${unit_type} \
        --vocab_path="${dict_dir}/vocab.txt" \
        --manifest_path="data/manifest.${set}.raw" \
        --output_path="data/manifest.${set}"

        if [ $? -ne 0 ]; then
            echo "Formt mnaifest.${set} failed. Terminated."
            exit 1
        fi
    }&
    done
    wait
fi

echo "TIMIT Data preparation done."
exit 0


================================================
FILE: examples/timit/asr1/local/export.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: $0 config_path ckpt_prefix jit_model_path"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3

python3 -u ${BIN_DIR}/export.py \
--ngpu ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}


if [ $? -ne 0 ]; then
    echo "Failed in export!"
    exit 1
fi


exit 0


================================================
FILE: examples/timit/asr1/local/test.sh
================================================
#!/bin/bash

set -e

stage=0
stop_stage=50

. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi


ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi


# download language model
#bash local/download_lm_en.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    for type in attention ctc_greedy_search; do
        echo "decoding ${type}"
        if [ ${chunk_mode} == true ];then
            # stream decoding only support batchsize=1
            batch_size=1
        else
            batch_size=64
        fi
        python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}
    
        if [ $? -ne 0 ]; then
            echo "Failed in evaluation!"
            exit 1
        fi
    done
fi


if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    for type in ctc_prefix_beam_search; do
        echo "decoding ${type}"
        batch_size=1
        python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu}  \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}
    
        if [ $? -ne 0 ]; then
            echo "Failed in evaluation!"
            exit 1
        fi
    done
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    for type in attention_rescoring; do
        echo "decoding ${type}"
        batch_size=1
        python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu}  \
        --config ${config_path} \
        --decode_cfg ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
        --opts decode.decoding_method ${type} \
        --opts decode.decode_batch_size ${batch_size}
    
        if [ $? -ne 0 ]; then
            echo "Failed in evaluation!"
            exit 1
        fi
    done
fi

exit 0


================================================
FILE: examples/timit/asr1/local/timit_data_prep.sh
================================================
#!/usr/bin/env bash

# Copyright 2013   (Authors: Bagher BabaAli, Daniel Povey, Arnab Ghoshal)
#           2014   Brno University of Technology (Author: Karel Vesely)
# Apache 2.0.

if [ $# -ne 1 ]; then
   echo "Argument should be the Timit directory, see ../run.sh for example."
   exit 1;
fi

dir=`pwd`/data/local
mkdir -p $dir
local=`pwd`/local
utils=`pwd`/utils
conf=`pwd`/conf

function error_exit () {
  echo -e "$@" >&2; exit 1;
}
PROG=$(basename $0)

[ -f $conf/test_spk.list ] || error_exit "$PROG line $LINENO: Eval-set speaker list not found.";
[ -f $conf/dev_spk.list ] || error_exit "$PROG line $LINENO: dev-set speaker list not found.";

# First check if the train & test directories exist (these can either be upper-
# or lower-cased
if [ ! -d $*/TRAIN -o ! -d $*/TEST ] && [ ! -d $*/train -o ! -d $*/test ]; then
  echo "timit_data_prep.sh: Spot check of command line argument failed"
  echo "Command line argument must be absolute pathname to TIMIT directory"
  echo "with name like /export/corpora5/LDC/LDC93S1/timit/TIMIT"
  exit 1;
fi

# Now check what case the directory structure is
uppercased=false
train_dir=train
test_dir=test
if [ -d $*/TRAIN ]; then
  uppercased=true
  train_dir=TRAIN
  test_dir=TEST
fi

tmpdir=$(mktemp -d /tmp/kaldi.XXXX);
trap 'rm -rf "$tmpdir"' EXIT

# Get the list of speakers. The list of speakers in the 24-speaker core test
# set and the 50-speaker development set must be supplied to the script. All
# speakers in the 'train' directory are used for training.
if $uppercased; then
  tr '[:lower:]' '[:upper:]' < $conf/dev_spk.list > $tmpdir/dev_spk
  tr '[:lower:]' '[:upper:]' < $conf/test_spk.list > $tmpdir/test_spk
  ls -d "$*"/TRAIN/DR*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
else
  tr '[:upper:]' '[:lower:]' < $conf/dev_spk.list > $tmpdir/dev_spk
  tr '[:upper:]' '[:lower:]' < $conf/test_spk.list > $tmpdir/test_spk
  ls -d "$*"/train/dr*/* | sed -e "s:^.*/::" > $tmpdir/train_spk
fi

cd $dir
for x in train dev test; do
  # First, find the list of audio files (use only si & sx utterances).
  # Note: train & test sets are under different directories, but doing find on
  # both and grepping for the speakers will work correctly.
  find $*/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.WAV' \
    | grep -f $tmpdir/${x}_spk > ${x}_sph.flist

  sed -e 's:.*/\(.*\)/\(.*\).\(WAV\|wav\)$:\1_\2:' ${x}_sph.flist \
    > $tmpdir/${x}_sph.uttids
  paste $tmpdir/${x}_sph.uttids ${x}_sph.flist \
    | sort -k1,1 > ${x}_sph.scp

  cat ${x}_sph.scp | awk '{print $1}' > ${x}.uttids

  # Now, Convert the transcripts into our format (no normalization yet)
  # Get the transcripts: each line of the output contains an utterance
  # ID followed by the transcript.
  find $*/{$train_dir,$test_dir} -not \( -iname 'SA*' \) -iname '*.PHN' \
    | grep -f $tmpdir/${x}_spk > $tmpdir/${x}_phn.flist
  sed -e 's:.*/\(.*\)/\(.*\).\(PHN\|phn\)$:\1_\2:' $tmpdir/${x}_phn.flist \
    > $tmpdir/${x}_phn.uttids
  while read line; do
    [ -f $line ] || error_exit "Cannot find transcription file '$line'";
    cut -f3 -d' ' "$line" | tr '\n' ' ' | perl -ape 's: *$:\n:;'
  done < $tmpdir/${x}_phn.flist > $tmpdir/${x}_phn.trans
  paste $tmpdir/${x}_phn.uttids $tmpdir/${x}_phn.trans \
    | sort -k1,1 > ${x}.trans

  # Do normalization steps.
  cat ${x}.trans | $local/timit_norm_trans.pl -i - -m $conf/phones.60-48-39.map -to 39 | sort > $x.text || exit 1;

done

echo "Data preparation succeeded"

================================================
FILE: examples/timit/asr1/local/timit_norm_trans.pl
================================================
#!/usr/bin/env perl
use warnings; #sed replacement for -w perl parameter

# Copyright 2012  Arnab Ghoshal

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.


# This script normalizes the TIMIT phonetic transcripts that have been 
# extracted in a format where each line contains an utterance ID followed by 
# the transcript, e.g.:
# fcke0_si1111 h# hh ah dx ux w iy dcl d ix f ay n ih q h#

my $usage = "Usage: timit_norm_trans.pl -i transcript -m phone_map -from [60|48] -to [48|39] > normalized\n
Normalizes phonetic transcriptions for TIMIT, by mapping the phones to a 
smaller set defined by the -m option. This script assumes that the mapping is 
done in the \"standard\" fashion, i.e. to 48 or 39 phones.  The input is 
assumed to have 60 phones (+1 for glottal stop, which is deleted), but that can
be changed using the -from option. The input format is assumed to be utterance 
ID followed by transcript on the same line.\n";

use strict;
use Getopt::Long;
die "$usage" unless(@ARGV >= 1);
my ($in_trans, $phone_map, $num_phones_out);
my $num_phones_in = 60;
GetOptions ("i=s" => \$in_trans,          # Input transcription
	    "m=s" => \$phone_map,         # File containing phone mappings
	    "from=i" => \$num_phones_in,  # Input #phones: must be 60 or 48
	    "to=i" => \$num_phones_out ); # Output #phones: must be 48 or 39

die $usage unless(defined($in_trans) && defined($phone_map) && 
		  defined($num_phones_out));
if ($num_phones_in != 60 && $num_phones_in != 48) {
  die "Can only used 60 or 48 for -from (used $num_phones_in)."
}
if ($num_phones_out != 48 && $num_phones_out != 39) {
  die "Can only used 48 or 39 for -to (used $num_phones_out)."
}
unless ($num_phones_out < $num_phones_in) {
  die "Argument to -from ($num_phones_in) must be greater than that to -to ($num_phones_out)."
}


open(M, "<$phone_map") or die "Cannot open mappings file '$phone_map': $!";
my (%phonemap, %seen_phones);
my $num_seen_phones = 0;
while (<M>) {
  chomp;
  next if ($_ =~ /^q\s*.*$/); # Ignore glottal stops.
  m:^(\S+)\s+(\S+)\s+(\S+)$: or die "Bad line: $_";
  my $mapped_from = ($num_phones_in == 60)? $1 : $2;
  my $mapped_to = ($num_phones_out == 48)? $2 : $3;
  if (!defined($seen_phones{$mapped_to})) {
    $seen_phones{$mapped_to} = 1;
    $num_seen_phones += 1;
  }
  $phonemap{$mapped_from} = $mapped_to;
}
if ($num_seen_phones != $num_phones_out) {
  die "Trying to map to $num_phones_out phones, but seen only $num_seen_phones";
}

open(T, "<$in_trans") or die "Cannot open transcription file '$in_trans': $!";
while (<T>) {
  chomp;
  $_ =~ m:^(\S+)\s+(.+): or die "Bad line: $_";
  my $utt_id = $1;
  my $trans = $2;

  $trans =~ s/q//g;  # Remove glottal stops.
  $trans =~ s/^\s*//; $trans =~ s/\s*$//;  # Normalize spaces

  print $utt_id;
  for my $phone (split(/\s+/, $trans)) {
    if(exists $phonemap{$phone}) { print " $phonemap{$phone}"; }
    if(not exists $phonemap{$phone}) { print " $phone"; }
  }
  print "\n";
}

================================================
FILE: examples/timit/asr1/local/train.sh
================================================
#!/bin/bash

if [ $# != 2 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_name=$2

mkdir -p exp

# seed may break model convergence
seed=0
if [ ${seed} != 0  ]; then
    export FLAGS_cudnn_deterministic=True
fi

# default memory allocator strategy may case gpu training hang
# for no OOM raised when memory exhausted
export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
fi

if [ ${seed} != 0 ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/timit/asr1/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8 
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


MODEL=u2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/timit/asr1/run.sh
================================================
#!/bin/bash
set -e

. path.sh || exit 1;

gpus=0,1,2,3
stage=0
stop_stage=50
conf_path=conf/transformer.yaml
decode_conf_path=conf/tuning/decode.yaml
avg_num=10
TIMIT_path=~/datasets/timit/data/lisa/data/timit/raw/TIMIT

. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/timit_data_prep.sh ${TIMIT_path}
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # ctc alignment of test data
    CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 51 ] && [ ${stop_stage} -ge 51 ]; then
     # export ckpt avg_n
     CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi


================================================
FILE: examples/tiny/.gitignore
================================================
ckpt*
data


================================================
FILE: examples/tiny/README.md
================================================
* asr0 - deepspeech2 Streaming/Non-Streaming
* asr1 - transformer/conformer Streaming/Non-Streaming
* asr2 - transformer/conformer Streaming/Non-Streaming with Kaldi feature


================================================
FILE: examples/tiny/asr0/.gitignore
================================================
data
exp
*log


================================================
FILE: examples/tiny/asr0/README.md
================================================
# DeepSpeech2 offline/online ASR with Tiny
This example contains code used to train a DeepSpeech2 offline or online model with Tiny dataset(a part of [[Librispeech dataset](http://www.openslr.org/resources/12)](http://www.openslr.org/resources/33))
## Overview
All the scripts you need are in the `run.sh`. There are several stages in the `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Export the static graph model                                |

You can choose to run a range of stages by setting the `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:

```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in the `run.sh` in detail.

## The environment variables
The path.sh contains the environment variable. 
```bash
source path.sh
```
This script needs to be run first.  

And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.

## The local variables
Some local variables are set in the `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`,  it means you only use CPU. 
`stage` denotes the number of stages you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num` denotes the number K of top-K models you want to average to get the final model.
`model_type`denotes the model type: offline or online
`ckpt` denotes the checkpoint prefix of the model, e.g. "deepspeech2"

You can set the local variables (except `ckpt`)  when you use the `run.sh`

For example, you can set the `gpus` and `avg_num` when you use the command line.:
```bash
bash run.sh --gpus 0,1 --avg_num 1
```
## Stage 0: Data processing
To use this example, you need to process data firstly and you can use stage 0 in the `run.sh` to do this. The code is shown below:
```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run

```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
source path.sh
bash ./local/data.sh
```
After processing the data, the `data` directory will look like this:
```bash
data/
|-- dev.meta
|-- lang_char
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
`-- train.meta
```
## Stage 1: Model training
If you want to train the model. you can use stage 1 in the ```run.sh```. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/deepspeech2.yaml deepspeech2
```
## Stage 2:  Top-k Models Averaging
After training the model,  we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model.  We can use stage 2 to do this, and the code is shown below:
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model,  you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).

```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/deepspeech2.yaml deepspeech2
avg.sh best exp/deepspeech2/checkpoints 1
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of the test stage is shown below:

```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}|| exit -1
 fi
```
If you want to train a model and test it,  you can use the script below to execute stage 0, stage 1,  stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/deepspeech2.yaml deepspeech2
avg.sh best exp/deepspeech2/checkpoints 1
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/deepspeech2.yaml conf/tuning/decode.yaml exp/deepspeech2/checkpoints/avg_1
```
## Stage 4: Static graph model Export
This stage is to transform dygraph to static graph.
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # export ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}
 fi
```
If you already have a dynamic graph model, you can run this script:
```bash
source path.sh
./local/export.sh deepspeech2.yaml exp/deepspeech2/checkpoints/avg_1 exp/deepspeech2/checkpoints/avg_1.jit offline
```


================================================
FILE: examples/tiny/asr0/conf/deepspeech2.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.tiny
dev_manifest: data/manifest.tiny
test_manifest: data/manifest.tiny 
min_input_len: 0.0
max_input_len: 30.0
min_output_len: 0.0
max_output_len: 400.0
min_output_input_ratio: 0.05
max_output_input_ratio: 10.0


###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 161
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 4
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 8
subsampling_factor: 1
num_encs: 1
  
############################################
#           Network Architecture           #
############################################
num_conv_layers: 2
num_rnn_layers: 3
rnn_layer_size: 2048
rnn_direction: bidirect # [forward, bidirect]
num_fc_layers: 0
fc_layers_size_list: -1,
use_gru: False 
blank_id: 0
  

###########################################
#                Training                 #
###########################################
n_epoch: 5
accum_grad: 1
lr: 1.0e-5 
lr_decay: 0.8 
weight_decay: 1.0e-6
global_grad_clip: 5.0
dist_sampler: False
log_interval: 1
checkpoint:
  kbest_n: 3
  latest_n: 2


================================================
FILE: examples/tiny/asr0/conf/deepspeech2_online.yaml
================================================
# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.tiny
dev_manifest: data/manifest.tiny
test_manifest: data/manifest.tiny 
min_input_len: 0.0
max_input_len: 30.0
min_output_len: 0.0
max_output_len: 400.0
min_output_input_ratio: 0.05
max_output_input_ratio: 10.0


###########################################
#              Dataloader                 #
###########################################
vocab_filepath: data/lang_char/vocab.txt 
spm_model_prefix: ''
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
feat_dim: 161
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 4
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 8
subsampling_factor: 1
num_encs: 1
 
############################################
#           Network Architecture           #
############################################
num_conv_layers: 2
num_rnn_layers: 4
rnn_layer_size: 2048
rnn_direction: forward
num_fc_layers: 2
fc_layers_size_list: 512, 256
use_gru: True 
blank_id: 0
  

###########################################
#                Training                 #
###########################################
n_epoch: 5
accum_grad: 1
lr: 1.0e-5 
lr_decay: 1.0 
weight_decay: 1.0e-6
global_grad_clip: 5.0
dist_sampler: False
log_interval: 1
checkpoint:
  kbest_n: 3
  latest_n: 2


================================================
FILE: examples/tiny/asr0/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 16000
    n_mels: 161
    n_shift: 160
    win_length: 400
    dither: 0.1
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/tiny/asr0/conf/tuning/chunk_decode.yaml
================================================
decode_batch_size: 128
error_rate_type: wer
decoding_method: ctc_beam_search
lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
alpha: 2.5
beta: 0.3
beam_size: 500
cutoff_prob: 1.0
cutoff_top_n: 40
num_proc_bsearch: 8


================================================
FILE: examples/tiny/asr0/conf/tuning/decode.yaml
================================================
decode_batch_size: 128
error_rate_type: wer
decoding_method: ctc_beam_search
lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
alpha: 2.5
beta: 0.3
beam_size: 500
cutoff_prob: 1.0
cutoff_top_n: 40
num_proc_bsearch: 8


================================================
FILE: examples/tiny/asr0/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100

unit_type=char
dict_dir=data/lang_char

. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;

mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/librispeech/librispeech.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/librispeech" \
    --full_download="False"
    
    if [ $? -ne 0 ]; then
        echo "Prepare LibriSpeech failed. Terminated."
        exit 1
    fi
    
    head -n 64 data/manifest.dev-clean  > data/manifest.tiny.raw
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.tiny.raw" \
    --num_samples=64 \
    --spectrum_type="linear" \
    --delta_delta=false \
    --sample_rate=16000 \
    --stride_ms=10 \
    --window_ms=20 \
    --use_dB_normalization=False \
    --num_workers=2 \
    --output_path="data/mean_std.json"
    
    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type ${unit_type} \
    --count_threshold=0 \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths="data/manifest.tiny.raw"
    
    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    python3 ${MAIN_ROOT}/utils/format_data.py \
    --cmvn_path "data/mean_std.json" \
    --unit_type ${unit_type} \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_path="data/manifest.tiny.raw" \
    --output_path="data/manifest.tiny"
    
    
    if [ $? -ne 0 ]; then
        echo "Formt mnaifest failed. Terminated."
        exit 1
    fi
fi

echo "LibriSpeech Data preparation done."
exit 0


================================================
FILE: examples/tiny/asr0/local/download_lm_en.sh
================================================
#!/bin/bash

. ${MAIN_ROOT}/utils/utility.sh

DIR=data/lm
mkdir -p ${DIR}

URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
MD5="099a601759d467cd0a8523ff939819c5"
TARGET=${DIR}/common_crawl_00.prune01111.trie.klm

echo "Start downloading the language model. The language model is large, please wait for a moment ..."
download $URL $MD5 $TARGET > /dev/null 2>&1
if [ $? -ne 0 ]; then
    echo "Fail to download the language model!"
    exit 1
else
    echo "Download the language model sucessfully"
fi


exit 0


================================================
FILE: examples/tiny/asr0/local/export.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: $0 config_path ckpt_prefix jit_model_path"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3

python3 -u ${BIN_DIR}/export.py \
--ngpu ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}

if [ $? -ne 0 ]; then
    echo "Failed in export!"
    exit 1
fi


exit 0


================================================
FILE: examples/tiny/asr0/local/test.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

# download language model
bash local/download_lm_en.sh
if [ $? -ne 0 ]; then
   exit 1
fi

python3 -u ${BIN_DIR}/test.py \
--ngpu ${ngpu} \
--config ${config_path} \
--decode_cfg ${decode_config_path} \
--result_file ${ckpt_prefix}.rsl \
--checkpoint_path ${ckpt_prefix}

if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
    exit 1
fi


exit 0


================================================
FILE: examples/tiny/asr0/local/train.sh
================================================
#!/bin/bash

profiler_options=

# seed may break model convergence
seed=0

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

if [ ${seed} != 0  ]; then
    export FLAGS_cudnn_deterministic=True
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
fi

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# default memory allocator strategy may case gpu training hang
# for no OOM raised when memory exhausted
export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler-options "${profiler_options}" \
--seed ${seed}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler-options "${profiler_options}" \
--seed ${seed}
fi

if [ ${seed} != 0  ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/tiny/asr0/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8 
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


MODEL=deepspeech2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/tiny/asr0/run.sh
================================================
#!/bin/bash
set -e
source path.sh

gpus=4
stage=0
stop_stage=100
conf_path=conf/deepspeech2.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=1
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}') ###ckpt = deepspeech2
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt}|| exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # export ckpt avg_n
    CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi


================================================
FILE: examples/tiny/asr1/.gitignore
================================================
data
exp
log


================================================
FILE: examples/tiny/asr1/README.md
================================================
# Transformer/Conformer ASR with Tiny
This example contains code used to train a [u2](https://arxiv.org/pdf/2012.05481.pdf) model (Transformer or [Conformer](https://arxiv.org/pdf/2005.08100.pdf) model) with Tiny dataset(a part of [[Librispeech dataset](http://www.openslr.org/resources/12)](http://www.openslr.org/resources/33))
## Overview
All the scripts you need are in `run.sh`. There are several stages in `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the dataset <br>       (2) Calculate the CMVN of the train dataset <br>       (3) Get the vocabulary file <br>       (4) Get the manifest files of the train, development and test dataset<br>       (5) Get the sentencepiece model |
| 1     | Train the model                                              |
| 2     | Get the final model by averaging the top-k models, set k = 1 means to choose the best model |
| 3     | Test the final model performance                             |
| 4     | Get ctc alignment of test data using the final model         |

You can choose to run a range of stages by setting `stage` and `stop_stage`. 

For example, if you want to execute the code in stage 2 and stage 3, you can run this script:
```bash
bash run.sh --stage 2 --stop_stage 3
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run ```stage 0```, you can use the script below:
```bash
bash run.sh --stage 0 --stop_stage 0
```
The document below will describe the scripts in ```run.sh```in detail.
## The Environment Variables
The path.sh contains the environment variables. 
```bash
. ./path.sh
```
This script needs to be run first. And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.
## The Local Variables
Some local variables are set in `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`, it means you only use CPU. 
`stage` denotes the number of stage you want the start from in the experiments.
`stop stage` denotes the number of stage you want the stop at in the experiments. 
`conf_path` denotes the config path of the model.
`avg_num`denotes the number K of top-K models you want to average to get the final model.
`ckpt` denotes the checkpoint prefix of the model, e.g. "transformerr"
Youtransformer local variables (except `ckpt`) when you use `run.sh`
For example, you can set the `gpus` and `avg_num` when you use the command line.:
```bash
bash run.sh --gpus 0,1 --avg_num 1
```
## Stage 0: Data Processing
To use this example, you need to process data firstly and you can use stage 0 in ```run.sh```to do this. The code is shown below:

```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data.

If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
. ./path.sh
bash ./local/data.sh
```
After processing the data, the ``data`` directory will look like this:
```bash
data/
|-- dev.meta
|-- lang_char
|   `-- bpe_unigram_200.model
|   `-- bpe_unigram_200.vocab
|   `-- vocab.txt
|-- manifest.dev
|-- manifest.dev.raw
|-- manifest.test
|-- manifest.test.raw
|-- manifest.train
|-- manifest.train.raw
|-- mean_std.json
|-- test.meta
`-- train.meta
```
## Stage 1: Model Training
If you want to train the model. you can use stage 1 in ```run.sh```. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer.yaml transformer
```## Stage 2: Top-k Models Averaging
After training the model, we need to get the final model for testing and inference. In every epoch, the model checkpoint is saved, so we can choose the best model from them based on the validation loss or we can sort them and average the parameters of the top-k models to get the final model. We can use stage 2 to do this, and the code is shown below:
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # avg n best model
     avg.sh best exp/${ckpt}/checkpoints ${avg_num}
 fi
```
The `avg.sh` is in the `../../../utils/` which is define in the `path.sh`.
If you want to get the final model, you can use the script below to execute stage 0, stage 1, and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer.yaml transformer
avg.sh best exp/transformer/checkpoints 1
```
## Stage 3: Model Testing
The test stage is to evaluate the model performance. The code of test stage is shown below:
```bash
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train a model and test it, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 3
```
or you can run these scripts in the command line (only use CPU).
```bash
. ./path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer.yaml transformer
avg.sh best exp/transformer/checkpoints 1
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/transformer.yaml exp/transformer/checkpoints/avg_1
```
## Stage 4: CTC Alignment 
If you want to get the alignment between the audio and the text, you can use the ctc alignment. The code of this stage is shown below:
```bash
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
     # ctc alignment of test data
     CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
```
If you want to train the model, test it and do the alignment, you can use the script below to execute stage 0, stage 1, stage 2, and stage 3 :
```bash
bash run.sh --stage 0 --stop_stage 4
```
or if you only need to train a model and do the alignment, you can use these scripts to escape stage 3 (test stage):

```bash
bash run.sh --stage 0 --stop_stage 2
bash run.sh --stage 4 --stop_stage 4
```
or you can also use these scripts in the command line (only use CPU).
```bash
. ./path.sh
bash ./local/data.sh
CUDA_VISIBLE_DEVICES= ./local/train.sh conf/transformer.yaml transformer
avg.sh best exp/transformer/checkpoints 1
# test stage is optional
CUDA_VISIBLE_DEVICES= ./local/test.sh conf/transformer.yaml exp/transformer/checkpoints/avg_1
CUDA_VISIBLE_DEVICES= ./local/align.sh conf/transformer.yaml exp/transformer/checkpoints/avg_1
```


================================================
FILE: examples/tiny/asr1/conf/augmentation.json
================================================
[
  {
    "type": "speed",
    "params": {
      "min_speed_rate": 0.9,
      "max_speed_rate": 1.1,
      "num_rates": 3
    },
    "prob": 1.0
  },
  {
    "type": "shift",
    "params": {
      "min_shift_ms": -5,
      "max_shift_ms": 5
    },
    "prob": 1.0
  },
  {
    "type": "specaug",
    "params": {
      "W": 0,
      "warp_mode": "PIL",
      "F": 10,
      "n_freq_masks": 2,
      "T": 50,
      "n_time_masks": 2,
      "p": 1.0,
      "adaptive_number_ratio": 0,
      "adaptive_size_ratio": 0,
      "max_n_time_masks": 20,
      "replace_with_zero": true
    },
    "prob": 1.0
  }
]


================================================
FILE: examples/tiny/asr1/conf/chunk_confermer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: "data/mean_std.json"
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    use_cnn_module: True
    cnn_module_kernel: 15
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    selfattention_layer_type: 'rel_selfattn'
    causal: True
    use_dynamic_chunk: True
    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
    use_dynamic_left_chunk: false

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.tiny
dev_manifest: data/manifest.tiny
test_manifest: data/manifest.tiny

  
###########################################
#              Dataloader                 #
###########################################
mean_std_filepath: ""
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'spm'
spm_model_prefix: 'data/lang_char/bpe_unigram_200'
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 4
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
preprocess_config: conf/preprocess.yaml 
num_workers: 0
subsampling_factor: 1
num_encs: 1
  

###########################################
#                 Training                #
###########################################
n_epoch: 5
accum_grad: 1
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 0.001
  weight_decay: 1.0e-06
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 1
checkpoint:
  kbest_n: 10
  latest_n: 1


================================================
FILE: examples/tiny/asr1/conf/chunk_transformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: "data/mean_std.json"
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true
    use_dynamic_chunk: true
    use_dynamic_left_chunk: false

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.tiny
dev_manifest: data/manifest.tiny
test_manifest: data/manifest.tiny
  
###########################################
#              Dataloader                 #
###########################################
mean_std_filepath: ""
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'spm'
spm_model_prefix: 'data/lang_char/bpe_unigram_200'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 4
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1


###########################################
#                 Training                #
###########################################
n_epoch: 5
accum_grad: 1
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 0.002
  weight_decay: 1.0e-06
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 1
checkpoint:
  kbest_n: 10
  latest_n: 1


================================================
FILE: examples/tiny/asr1/conf/conformer.yaml
================================================
# https://yaml.org/type/float.html
############################################
#           Network Architecture           #
############################################
cmvn_file: "data/mean_std.json"
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true
    use_cnn_module: True
    cnn_module_kernel: 15
    activation_type: 'swish'
    pos_enc_layer_type: 'rel_pos'
    selfattention_layer_type: 'rel_selfattn'

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false


###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.tiny
dev_manifest: data/manifest.tiny
test_manifest: data/manifest.tiny
  

###########################################
#              Dataloader                 #
###########################################
mean_std_filepath: ""
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'spm'
spm_model_prefix: 'data/lang_char/bpe_unigram_200'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 4
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1


###########################################
#                 Training                #
###########################################
n_epoch: 5
accum_grad: 4
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 0.002
  weight_decay: 1.0e-06
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 1
checkpoint:
  kbest_n: 10
  latest_n: 1


================================================
FILE: examples/tiny/asr1/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 16000
    n_mels: 80
    n_shift: 160
    win_length: 400
    dither: 0.1
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/tiny/asr1/conf/transformer.yaml
================================================
# https://yaml.org/type/float.html
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: transformer
encoder_conf:
    output_size: 256    # dimension of attention
    attention_heads: 4
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: true

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 4
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false

###########################################
#                   Data                  #
###########################################
train_manifest: data/manifest.tiny
dev_manifest: data/manifest.tiny
test_manifest: data/manifest.tiny
  
###########################################
#              Dataloader                 #
###########################################
mean_std_filepath: data/mean_std.json
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'spm'
spm_model_prefix: 'data/lang_char/bpe_unigram_200'
preprocess_config: conf/preprocess.yaml
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 4
maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1


###########################################
#                 Training                #
###########################################
n_epoch: 5
accum_grad: 1
global_grad_clip: 5.0
optim: adam
optim_conf:
  lr: 0.002
  weight_decay: 1.0e-06
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 1
checkpoint:
  kbest_n: 2
  latest_n: 1


================================================
FILE: examples/tiny/asr1/conf/tuning/chunk_decode.yaml
================================================
decode_batch_size: 8 #64
error_rate_type: wer
decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
beam_size: 10
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.

================================================
FILE: examples/tiny/asr1/conf/tuning/decode.yaml
================================================
decode_batch_size: 8 #64
error_rate_type: wer
decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
beam_size: 10
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.


================================================
FILE: examples/tiny/asr1/local/align.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

batch_size=1
output_dir=${ckpt_prefix}
mkdir -p ${output_dir}

# align dump in `result_file`
# .tier, .TextGrid dump in `dir of result_file`
python3 -u ${BIN_DIR}/alignment.py \
--ngpu ${ngpu} \
--config ${config_path} \
--decode_cfg ${decode_config_path} \
--result_file ${output_dir}/${type}.align \
--checkpoint_path ${ckpt_prefix} \
--opts decode.decode_batch_size ${batch_size}

if [ $? -ne 0 ]; then
    echo "Failed in ctc alignment!"
    exit 1
fi

exit 0


================================================
FILE: examples/tiny/asr1/local/data.sh
================================================
#!/bin/bash

stage=-1
stop_stage=100

dict_dir=data/lang_char

# bpemode (unigram or bpe)
nbpe=200
bpemode=unigram
bpeprefix="${dict_dir}/bpe_${bpemode}_${nbpe}"

. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;

mkdir -p data
mkdir -p ${dict_dir}
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data, generate manifests
    python3 ${TARGET_DIR}/librispeech/librispeech.py \
    --manifest_prefix="data/manifest" \
    --target_dir="${TARGET_DIR}/librispeech" \
    --full_download="False"
    
    if [ $? -ne 0 ]; then
        echo "Prepare LibriSpeech failed. Terminated."
        exit 1
    fi
    
    head -n 64 data/manifest.dev-clean  > data/manifest.tiny.raw
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # compute mean and stddev for normalizer
    python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
    --manifest_path="data/manifest.tiny.raw" \
    --num_samples=64 \
    --spectrum_type="fbank" \
    --feat_dim=80 \
    --delta_delta=false \
    --sample_rate=16000 \
    --stride_ms=10 \
    --window_ms=25 \
    --use_dB_normalization=False \
    --num_workers=2 \
    --output_path="data/mean_std.json"
    
    if [ $? -ne 0 ]; then
        echo "Compute mean and stddev failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # build vocabulary
    python3 ${MAIN_ROOT}/utils/build_vocab.py \
    --unit_type "spm" \
    --spm_vocab_size=${nbpe} \
    --spm_mode ${bpemode} \
    --spm_model_prefix ${bpeprefix} \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_paths="data/manifest.tiny.raw"
    
    if [ $? -ne 0 ]; then
        echo "Build vocabulary failed. Terminated."
        exit 1
    fi
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # format manifest with tokenids, vocab size
    python3 ${MAIN_ROOT}/utils/format_data.py \
    --cmvn_path "data/mean_std.json" \
    --unit_type "spm" \
    --spm_model_prefix ${bpeprefix} \
    --vocab_path="${dict_dir}/vocab.txt" \
    --manifest_path="data/manifest.tiny.raw" \
    --output_path="data/manifest.tiny"
    
    
    if [ $? -ne 0 ]; then
        echo "Formt mnaifest failed. Terminated."
        exit 1
    fi
fi

echo "LibriSpeech Data preparation done."
exit 0


================================================
FILE: examples/tiny/asr1/local/export.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: $0 config_path ckpt_prefix jit_model_path"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3

python3 -u ${BIN_DIR}/export.py \
--ngpu ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}


if [ $? -ne 0 ]; then
    echo "Failed in export!"
    exit 1
fi


exit 0


================================================
FILE: examples/tiny/asr1/local/test.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_en.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

for type in attention ctc_greedy_search; do
    echo "decoding ${type}"
    if [ ${chunk_mode} == true ];then
        # stream decoding only support batchsize=1
        batch_size=1
    else
        batch_size=64
    fi
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

for type in ctc_prefix_beam_search attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

exit 0


================================================
FILE: examples/tiny/asr1/local/train.sh
================================================
#!/bin/bash

profiler_options=
benchmark_batch_size=0
benchmark_max_step=0

# seed may break model convergence
seed=0

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

if [ ${seed} != 0  ]; then
    export FLAGS_cudnn_deterministic=True
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
fi

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi

mkdir -p exp

# default memory allocator strategy may case gpu training hang
# for no OOM raised when memory exhausted
export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--seed ${seed} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler-options "${profiler_options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}
else
python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--seed ${seed} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler-options "${profiler_options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}
fi


if [ ${seed} != 0  ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/tiny/asr1/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8 
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/


MODEL=u2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/tiny/asr1/run.sh
================================================
#!/bin/bash
set -e
source path.sh

gpus=4
stage=0
stop_stage=50
conf_path=conf/transformer.yaml
ips=            #xx.xx.xx.xx,xx.xx.xx.xx
decode_conf_path=conf/tuning/decode.yaml
avg_num=1

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus}  ./local/train.sh ${conf_path} ${ckpt} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # ctc alignment of test data
    CUDA_VISIBLE_DEVICES=${gpus} ./local/align.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 51 ] && [ ${stop_stage} -ge 51 ]; then
    # export ckpt avg_n
    CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi


================================================
FILE: examples/vctk/README.md
================================================

# VCTK

* tts0 - Tacotron2
* tts1 - TransformerTTS
* tts2 - SpeedySpeech
* tts3 - FastSpeech2
* voc0 - WaveFlow
* voc1 - Parallel WaveGAN
* voc2 - MelGAN
* voc3 - MultiBand MelGAN
* ernie_sat - ERNIE-SAT
* vc3 - StarGANv2-VC


================================================
FILE: examples/vctk/ernie_sat/README.md
================================================
# ERNIE-SAT with VCTK dataset
[ERNIE-SAT](https://arxiv.org/abs/2211.03545) speech-text joint pretraining framework, which achieves SOTA results in cross-lingual multi-speaker speech synthesis and cross-lingual speech editing tasks, It can be applied to a series of scenarios such as Speech Editing, personalized Speech Synthesis, and Voice Cloning.

## Model Framework
In ERNIE-SAT, we propose two innovations:
- In the pretraining process, the phonemes corresponding to Chinese and English are used as input to achieve cross-language and personalized soft phoneme mapping
- The joint mask learning of speech and text is used to realize the alignment of speech and text

<p align="center">
    <img src="https://user-images.githubusercontent.com/24568452/186110814-1b9c6618-a0ab-4c0c-bb3d-3d860b0e8cc2.png" />
</p>

## Dataset
### Download and Extract the dataset
Download VCTK-0.92 from it's [Official Website](https://datashare.ed.ac.uk/handle/10283/3443) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/VCTK-Corpus-0.92`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
You can download from here [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
ps: we remove three speakers in VCTK-0.92 (see [reorganize_vctk.py](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/other/mfa/local/reorganize_vctk.py)):
1. `p315`, because of no text for it.
2. `p280` and `p362`, because no *_mic2.flac (which is better than *_mic1.flac) for  them.

## Get Started
Assume the path to the dataset is `~/datasets/VCTK-Corpus-0.92`.
Assume the path to the MFA result of VCTK is `./vctk_alignment`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, speaker, and id of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.

### Synthesizing
We use [HiFiGAN](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5) as the neural vocoder.

Download pretrained HiFiGAN model from [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip) and unzip it.
```bash
unzip hifigan_vctk_ckpt_0.2.0.zip
```
HiFiGAN checkpoint contains files listed below.
```text
hifigan_vctk_ckpt_0.2.0
├── default.yaml                    # default config used to train HiFiGAN
├── feats_stats.npy                 # statistics used to normalize spectrogram when training HiFiGAN
└── snapshot_iter_2500000.pdz       # generator parameters of HiFiGAN
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```

##  Speech Synthesis and Speech Editing

### Prepare
**prepare aligner**
```bash
mkdir -p tools/aligner
cd tools
# download MFA
wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz
# extract MFA
tar xvf montreal-forced-aligner_linux.tar.gz
# fix .so of MFA
cd montreal-forced-aligner/lib
ln -snf libpython3.6m.so.1.0 libpython3.6m.so
cd -
# download align models and dicts
cd aligner
wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/aishell3_model.zip
wget https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/simple.lexicon
wget https://paddlespeech.cdn.bcebos.com/MFA/ernie_sat/vctk_model.zip
wget https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/cmudict-0.7b
cd ../../
```
**prepare pretrained FastSpeech2 models**

ERNIE-SAT use FastSpeech2 as phoneme duration predictor:
```bash
mkdir download
cd download
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip
unzip fastspeech2_conformer_baker_ckpt_0.5.zip
unzip fastspeech2_nosil_ljspeech_ckpt_0.5.zip
cd ../
```
**prepare source data**
```bash
mkdir source
cd source
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540307.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/SSB03540428.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/LJ050-0278.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p243_313.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/p299_096.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/this_was_not_the_show_for_me.wav
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/source/README.md
cd ../
```
You can check the text of downloaded wavs in `source/README.md`.
### Speech Synthesis and Speech Editing
```bash
./run.sh --stage 3 --stop-stage 3 --gpus 0
```

You can modify `--wav_path`、`--old_str` and `--new_str` yourself, `--old_str` should be the text corresponding to the audio of  `--wav_path`, `--new_str` should be designed according to `--task_name`, both `--source_lang` and `--target_lang` should be `en` for model trained with VCTK dataset.
## Pretrained Model
Pretrained ErnieSAT model:
- [erniesat_vctk_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/ernie_sat/erniesat_vctk_ckpt_1.2.0.zip)

Model | Step | eval/mlm_loss | eval/loss
:-------------:| :------------:| :-----: | :-----:
default| 8(gpu) x 199500|57.622215|57.622215


================================================
FILE: examples/vctk/ernie_sat/conf/default.yaml
================================================
# This configuration tested on 8 GPUs (A100) with 80GB GPU memory.
# It takes around 2 days to finish the training,You can adjust
# batch_size、num_workers here and ngpu in local/train.sh for your machine
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

mean_phn_span: 8
mlm_prob: 0.8

###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 40
num_workers: 8

###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    text_masking: false
    postnet_layers: 5
    postnet_filts: 5
    postnet_chans: 256
    encoder_type: conformer
    decoder_type: conformer
    enc_input_layer: sega_mlm
    enc_pre_speech_layer: 0
    enc_cnn_module_kernel: 7
    enc_attention_dim: 384
    enc_attention_heads: 2
    enc_linear_units: 1536
    enc_num_blocks: 4
    enc_dropout_rate: 0.2
    enc_positional_dropout_rate: 0.2
    enc_attention_dropout_rate: 0.2
    enc_normalize_before: true
    enc_macaron_style: true
    enc_use_cnn_module: true
    enc_selfattention_layer_type: legacy_rel_selfattn
    enc_activation_type: swish
    enc_pos_enc_layer_type: legacy_rel_pos
    enc_positionwise_layer_type: conv1d
    enc_positionwise_conv_kernel_size: 3
    dec_cnn_module_kernel: 31
    dec_attention_dim: 384
    dec_attention_heads: 2
    dec_linear_units: 1536
    dec_num_blocks: 4
    dec_dropout_rate: 0.2
    dec_positional_dropout_rate: 0.2
    dec_attention_dropout_rate: 0.2
    dec_macaron_style: true
    dec_use_cnn_module: true
    dec_selfattention_layer_type: legacy_rel_selfattn
    dec_activation_type: swish
    dec_pos_enc_layer_type: legacy_rel_pos
    dec_positionwise_layer_type: conv1d
    dec_positionwise_conv_kernel_size: 3

###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
scheduler_params:
    d_model: 384
    warmup_steps: 4000
grad_clip: 1.0

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 1500
num_snapshots: 50

###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 0

token_list:
- <blank>
- <unk>
- AH0
- T
- N
- sp
- D
- S
- R
- L
- IH1
- DH
- AE1
- M
- EH1
- K
- Z
- W
- HH
- ER0
- AH1
- IY1
- P
- V
- F
- B
- AY1
- IY0
- EY1
- AA1
- AO1
- UW1
- IH0
- OW1
- NG
- G
- SH
- ER1
- Y
- TH
- AW1
- CH
- UH1
- IH2
- JH
- OW0
- EH2
- OY1
- AY2
- EH0
- EY2
- UW0
- AE2
- AA2
- OW2
- AH2
- ZH
- AO2
- IY2
- AE0
- UW2
- AY0
- AA0
- AO0
- AW2
- EY0
- UH2
- ER2
- OY2
- UH0
- AW0
- OY0
- <sos/eos>


================================================
FILE: examples/vctk/ernie_sat/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./vctk_alignment \
        --output durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=vctk \
        --rootdir=~/datasets/VCTK-Corpus-0.92/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/vctk/ernie_sat/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=0

# hifigan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize.py \
        --erniesat_config=${config_path} \
        --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --erniesat_stat=dump/train/speech_stats.npy \
        --voc=hifigan_vctk \
        --voc_config=hifigan_vctk_ckpt_0.2.0/default.yaml  \
        --voc_ckpt=hifigan_vctk_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_vctk_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt
fi


================================================
FILE: examples/vctk/ernie_sat/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    echo 'speech synthesize !'
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize_e2e.py \
        --task_name=synthesize \
        --wav_path=source/p243_313.wav \
        --old_str='For that reason cover should not be given' \
        --new_str='I love you very much do you love me' \
        --source_lang=en \
        --target_lang=en \
        --erniesat_config=${config_path} \
        --phones_dict=dump/phone_id_map.txt \
        --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --erniesat_stat=dump/train/speech_stats.npy \
        --voc=hifigan_vctk \
        --voc_config=hifigan_vctk_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_vctk_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_vctk_ckpt_0.2.0/feats_stats.npy \
        --output_name=exp/pred_gen.wav
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo 'speech edit !'
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/synthesize_e2e.py \
        --task_name=edit \
        --wav_path=source/p243_313.wav \
        --old_str='For that reason cover should not be given' \
        --new_str='For that reason cover is not impossible to be given' \
        --source_lang=en \
        --target_lang=en \
        --erniesat_config=${config_path} \
        --phones_dict=dump/phone_id_map.txt \
        --erniesat_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --erniesat_stat=dump/train/speech_stats.npy \
        --voc=hifigan_vctk \
        --voc_config=hifigan_vctk_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_vctk_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_vctk_ckpt_0.2.0/feats_stats.npy \
        --output_name=exp/pred_edit.wav
fi


================================================
FILE: examples/vctk/ernie_sat/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1,2,3,4,5,6,7
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_199500.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is hifigan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize, run both speech synthesize and speech edit
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/vctk/tts3/README.md
================================================
# FastSpeech2 with the VCTK
This example contains code used to train a [Fastspeech2](https://arxiv.org/abs/2006.04558) model with [VCTK](https://datashare.ed.ac.uk/handle/10283/3443).

## Dataset
### Download and Extract the dataset
Download VCTK-0.92 from it's [Official Website](https://datashare.ed.ac.uk/handle/10283/3443) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/VCTK-Corpus-0.92`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for fastspeech2.
You can download from here [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
ps: we remove three speakers in VCTK-0.92 (see [reorganize_vctk.py](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/other/mfa/local/reorganize_vctk.py)):
1. `p315`, because of no text for it.
2. `p280` and `p362`, because no *_mic2.flac (which is better than *_mic1.flac) for  them.

## Get Started
Assume the path to the dataset is `~/datasets/VCTK-Corpus-0.92`.
Assume the path to the MFA result of VCTK is `./vctk_alignment`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech、pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, the path of pitch features, the path of energy features, speaker, and id of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]
                [--speaker-dict SPEAKER_DICT] [--voice-cloning VOICE_CLONING]

Train a FastSpeech2 model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       fastspeech2 config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu=0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --speaker-dict SPEAKER_DICT
                        speaker id map file for multiple speaker model.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--phones-dict` is the path of the phone vocabulary file.

### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc1) as the neural vocoder.

Download pretrained parallel wavegan model from [pwg_vctk_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip) and unzip it.
```bash
unzip pwg_vctk_ckpt_0.1.1.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_vctk_ckpt_0.1.1
├── default.yaml                   # default config used to train parallel wavegan
├── snapshot_iter_1500000.pdz      # generator parameters of parallel wavegan
└── feats_stats.npy                # statistics used to normalize spectrogram when training parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.

```text
usage: synthesize.py [-h]
                     [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--tones_dict TONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING]
                     [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.
```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0
```
The last number controls the vocoder model during synthesis, which can be `0` or `1`, use `pwgan` or `hifigan` model as vocoder.

```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat`, `--phones_dict` `--speaker_dict` are arguments for acoustic model, which correspond to the 5 files in the fastspeech2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model
Pretrained FastSpeech2 model with no silence in the edge of audios:
- [fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)

The static model can be downloaded here:
- [fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip)

The PIR static model can be downloaded here:
 - [fastspeech2_vctk_static_pir_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)

The ONNX model can be downloaded here:
- [fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip)

The Paddle-Lite model can be downloaded here:
- [fastspeech2_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_pdlite_1.3.0.zip)

FastSpeech2 checkpoint contains files listed below.
```text
fastspeech2_vctk_ckpt_1.2.0
├── default.yaml            # default config used to train fastspeech2
├── energy_stats.npy        # statistics used to normalize energy when training fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── pitch_stats.npy         # statistics used to normalize pitch when training fastspeech2
├── snapshot_iter_66200.pdz # model parameters and optimizer states
├── speaker_id_map.txt      # speaker id map file when training a multi-speaker fastspeech2
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```
You can use the following scripts to synthesize for `${BIN_DIR}/../sentences.txt` using pretrained fastspeech2 and parallel wavegan models.
```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=fastspeech2_vctk \
  --am_config=fastspeech2_vctk_ckpt_1.2.0/default.yaml \
  --am_ckpt=fastspeech2_vctk_ckpt_1.2.0/snapshot_iter_66200.pdz \
  --am_stat=fastspeech2_vctk_ckpt_1.2.0/speech_stats.npy \
  --voc=pwgan_vctk \
  --voc_config=pwg_vctk_ckpt_0.1.1/default.yaml  \
  --voc_ckpt=pwg_vctk_ckpt_0.1.1/snapshot_iter_1500000.pdz \
  --voc_stat=pwg_vctk_ckpt_0.1.1/feats_stats.npy \
  --lang=en \
  --text=${BIN_DIR}/../../assets/sentences_en.txt \
  --output_dir=exp/default/test_e2e \
  --phones_dict=fastspeech2_vctk_ckpt_1.2.0/phone_id_map.txt \
  --speaker_dict=fastspeech2_vctk_ckpt_1.2.0/speaker_id_map.txt \
  --spk_id=0 \
  --inference_dir=exp/default/inference
```


================================================
FILE: examples/vctk/tts3/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length.(in samples) 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 2


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    init_type: xavier_uniform         # initialization type
    init_enc_alpha: 1.0               # initial value of alpha of encoder scaled position encoding
    init_dec_alpha: 1.0               # initial value of alpha of decoder scaled position encoding
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder
    spk_embed_dim: 256                         # speaker embedding dimension
    spk_embed_integration_type: concat         # speaker embedding integration type


###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
    optim: adam               # optimizer type
    learning_rate: 0.001      # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 200
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/vctk/tts3/local/inference.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_vctk \
        --voc=pwgan_vctk \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0 \
        --lang=en
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_vctk \
        --voc=hifigan_vctk \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0 \
        --lang=en
fi


================================================
FILE: examples/vctk/tts3/local/lite_predict.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=fastspeech2_vctk \
        --voc=pwgan_vctk \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0 \
        --lang=en
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../lite_predict.py \
        --inference_dir=${train_output_path}/pdlite \
        --am=fastspeech2_vctk \
        --voc=hifigan_vctk \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/lite_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0 \
        --lang=en
fi


================================================
FILE: examples/vctk/tts3/local/ort_predict.sh
================================================
train_output_path=$1

stage=0
stop_stage=0

# e2e, synthesize from text
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_vctk \
        --voc=pwgan_vctk \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2 \
        --spk_id=0 \
        --lang=en

fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_vctk \
        --voc=hifigan_vctk \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=2 \
        --spk_id=0 \
        --lang=en
fi


================================================
FILE: examples/vctk/tts3/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./vctk_alignment \
        --output durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=vctk \
        --rootdir=~/datasets/VCTK-Corpus-0.92/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="pitch"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="energy"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize and covert phone/speaker to id, dev and test should use train's stats
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/vctk/tts3/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_vctk \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_vctk \
        --voc_config=pwg_vctk_ckpt_0.1.1/default.yaml  \
        --voc_ckpt=pwg_vctk_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=pwg_vctk_ckpt_0.1.1/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_vctk \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_vctk \
        --voc_config=hifigan_vctk_ckpt_0.2.0/default.yaml  \
        --voc_ckpt=hifigan_vctk_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_vctk_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/vctk/tts3/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=${4:-0}
stop_stage=${4:-0}

# pwgan
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_vctk \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_vctk \
        --voc_config=pwg_vctk_ckpt_0.1.1/default.yaml  \
        --voc_ckpt=pwg_vctk_ckpt_0.1.1/snapshot_iter_1500000.pdz \
        --voc_stat=pwg_vctk_ckpt_0.1.1/feats_stats.npy \
        --lang=en \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0 \
        --inference_dir=${train_output_path}/inference
fi

# hifigan
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_vctk \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_vctk \
        --voc_config=hifigan_vctk_ckpt_0.2.0/default.yaml  \
        --voc_ckpt=hifigan_vctk_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_vctk_ckpt_0.2.0/feats_stats.npy \
        --lang=en \
        --text=${BIN_DIR}/../../assets/sentences_en.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=0 \
        --inference_dir=${train_output_path}/inference
fi


================================================
FILE: examples/vctk/tts3/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_331.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default 0, use 1 will use hifigan as vocoder
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by 0, use 1 will use hifigan as vocoder
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} 0 || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # install paddle2onnx
    pip install paddle2onnx --upgrade
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx fastspeech2_vctk
    # considering the balance between speed and quality, we recommend that you use hifigan as vocoder
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_vctk
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_vctk
    
fi

# inference with onnxruntime, use fastspeech2 + pwgan by default
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    ./local/ort_predict.sh ${train_output_path}
fi

# must run after stage 3 (which stage generated static models)
if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
    ./local/export2lite.sh ${train_output_path} inference pdlite fastspeech2_vctk x86
    ./local/export2lite.sh ${train_output_path} inference pdlite pwgan_vctk x86
    # ./local/export2lite.sh ${train_output_path} inference pdlite hifigan_vctk x86
fi

if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
    CUDA_VISIBLE_DEVICES=${gpus} ./local/lite_predict.sh ${train_output_path} || exit -1
fi

================================================
FILE: examples/vctk/vc3/README.md
================================================
You can download test source audios from [test_wav.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/starganv2vc/test_wav.zip).


Test Voice Conversion:

```bash
wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/starganv2vc/test_wav.zip
unzip test_wav.zip
./run.sh --stage 2 --stop-stage 2 --gpus 0
```


================================================
FILE: examples/vctk/vc3/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
# 源码 load 的时候用的 24k, 提取 mel 用的 16k, 后续 load 和提取 mel 都要改成 24k
fs: 16000
n_fft: 2048
n_shift: 300
win_length: 1200   # Window length.(in samples) 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

fmin: 0           # Minimum frequency of Mel basis.
fmax: 8000        # Maximum frequency of Mel basis.  sr // 2
n_mels: 80
# only for StarGANv2 VC
norm:             # None here
htk: True
power: 2.0


###########################################################
#                       MODEL SETTING                     #
###########################################################
generator_params:
    dim_in: 64
    style_dim: 64
    max_conv_dim: 512
    w_hpf: 0
    F0_channel: 256
mapping_network_params:
    num_domains: 20      # num of speakers in StarGANv2
    latent_dim: 16
    style_dim: 64        # same as style_dim in generator_params
    hidden_dim: 512      # same as max_conv_dim in generator_params
style_encoder_params:
    dim_in: 64           # same as dim_in in generator_params
    style_dim: 64        # same as style_dim in generator_params
    num_domains: 20      # same as num_domains in generator_params
    max_conv_dim: 512    # same as max_conv_dim in generator_params
discriminator_params:
    dim_in: 64           # same as dim_in in generator_params
    num_domains: 20      # same as num_domains in mapping_network_params
    max_conv_dim: 512    # same as max_conv_dim in generator_params
    repeat_num: 4
asr_params:
    input_dim: 80
    hidden_dim: 256
    n_token: 80
    token_embedding_dim: 256

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
loss_params:
    g_loss:
        lambda_sty: 1.
        lambda_cyc: 5.
        lambda_ds: 1.
        lambda_norm: 1.
        lambda_asr: 10.
        lambda_f0: 5.
        lambda_f0_sty: 0.1
        lambda_adv: 2.
        lambda_adv_cls: 0.5
        norm_bias: 0.5
    d_loss:
        lambda_reg: 1.
        lambda_adv_cls: 0.1
        lambda_con_reg: 10.

    adv_cls_epoch: 50
    con_reg_epoch: 30
        

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 5               # Batch size.
num_workers: 2              # Number of workers in DataLoader.
max_mel_length: 192

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    beta1: 0.0
    beta2: 0.99
    weight_decay: 1.0e-4                   
    epsilon: 1.0e-9
generator_scheduler_params:
    max_learning_rate: 2.0e-4
    phase_pct: 0.0
    divide_factor: 1
    total_steps: 200000                # train_max_steps
    end_learning_rate: 2.0e-4
style_encoder_optimizer_params:
    beta1: 0.0
    beta2: 0.99
    weight_decay: 1.0e-4                 
    epsilon: 1.0e-9
style_encoder_scheduler_params:
    max_learning_rate: 2.0e-4
    phase_pct: 0.0
    divide_factor: 1
    total_steps: 200000                # train_max_steps
    end_learning_rate: 2.0e-4
mapping_network_optimizer_params:
    beta1: 0.0
    beta2: 0.99
    weight_decay: 1.0e-4                 
    epsilon: 1.0e-9
mapping_network_scheduler_params:
    max_learning_rate: 2.0e-6
    phase_pct: 0.0
    divide_factor: 1
    total_steps: 200000                # train_max_steps
    end_learning_rate: 2.0e-6
discriminator_optimizer_params:
    beta1: 0.0
    beta2: 0.99
    weight_decay: 1.0e-4                 
    epsilon: 1.0e-9
discriminator_scheduler_params:
    max_learning_rate: 2.0e-4
    phase_pct: 0.0
    divide_factor: 1
    total_steps: 200000                # train_max_steps
    end_learning_rate: 2.0e-4        

###########################################################
#                    TRAINING SETTING                     #
###########################################################
max_epoch: 150
num_snapshots: 5
seed: 1

================================================
FILE: examples/vctk/vc3/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=vctk \
        --rootdir=~/datasets/VCTK-Corpus-0.92/ \
        --dumpdir=dump \
        --config=${config_path} \
        --num-cpu=20

fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speaker-dict=dump/speaker_id_map.txt

fi


================================================
FILE: examples/vctk/vc3/local/train.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2

python3 ${BIN_DIR}/train.py \
    --train-metadata=dump/train/norm/metadata.jsonl \
    --dev-metadata=dump/dev/norm/metadata.jsonl \
    --config=${config_path} \
    --output-dir=${train_output_path} \
    --ngpu=1 \
    --speaker-dict=dump/speaker_id_map.txt


================================================
FILE: examples/vctk/vc3/local/voice_conversion.sh
================================================
#!/bin/bash

config_path=$1
source_path=$2
output_dir=$3

python3 ${BIN_DIR}/vc.py \
    --config_path=${config_path} \
    --source_path=${source_path}\
    --output_dir=${output_dir} 

================================================
FILE: examples/vctk/vc3/path.sh
================================================
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

MODEL=starganv2_vc
export BIN_DIR=${MAIN_ROOT}/paddlespeech/t2s/exps/${MODEL}


================================================
FILE: examples/vctk/vc3/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_331.pdz
source_path=test_wav/goat_01.wav
output_dir=vc_output

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

# not ready now
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi
# not ready now
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/voice_conversion.sh ${conf_path} ${source_path} ${output_dir}|| exit -1
fi


================================================
FILE: examples/vctk/voc1/README.md
================================================
# Parallel WaveGAN with VCTK
This example contains code used to train a [parallel wavegan](http://arxiv.org/abs/1910.11480) model with [VCTK](https://datashare.ed.ac.uk/handle/10283/3443).

## Dataset
### Download and Extract
Download VCTK-0.92 from it's [Official Website](https://datashare.ed.ac.uk/handle/10283/3443) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/VCTK-Corpus-0.92`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
You can download from here [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
ps: we remove three speakers in VCTK-0.92 (see [reorganize_vctk.py](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/other/mfa/local/reorganize_vctk.py)):
1. `p315`, because of no text for it.
2. `p280` and `p362`, because no *_mic2.flac (which is better than *_mic1.flac) for  them.

## Get Started
Assume the path to the dataset is `~/datasets/VCTK-Corpus-0.92`.
Assume the path to the MFA result of VCTK is `./vctk_alignment`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```

The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--batch-size BATCH_SIZE] [--max-iter MAX_ITER]
                [--run-benchmark RUN_BENCHMARK]
                [--profiler_options PROFILER_OPTIONS]

Train a ParallelWaveGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       ParallelWaveGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.

benchmark:
  arguments related to benchmark.

  --batch-size BATCH_SIZE
                        batch size.
  --max-iter MAX_ITER   train max steps.
  --run-benchmark RUN_BENCHMARK
                        runing benchmark or not, if True, use the --batch-size
                        and --max-iter.
  --profiler_options PROFILER_OPTIONS
                        The option of profiler, which should be in format
                        "key1=value1;key2=value2;key3=value3".
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` parallel wavegan config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory. If you use the pretrained model, use the `pwg_snapshot_iter_400000.pdz`.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model
Pretrained models can be downloaded here:
- [pwg_vctk_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip)

The static model can be downloaded here:
- [pwgan_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_static_1.1.0.zip)

The ONNX model can be downloaded here:
- [pwgan_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_onnx_1.1.0.zip)

The Paddle-Lite model can be downloaded here:
- [pwgan_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_pdlite_1.3.0.zip)


Parallel WaveGAN checkpoint contains files listed below.

```text
pwg_vctk_ckpt_0.1.1
├── default.yaml                   # default config used to train parallel wavegan
├── snapshot_iter_1500000.pdz      # generator parameters of parallel wavegan
└── feats_stats.npy                # statistics used to normalize spectrogram when training parallel wavegan
```
## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/vctk/voc1/conf/default.yaml
================================================
# This is the hyperparameter configuration file for Parallel WaveGAN.
# Please make sure this is adjusted for the VCTK corpus. If you want to
# apply to the other dataset, you might need to carefully change some parameters.
# This configuration requires 12 GB GPU memory and takes ~3 days on RTX TITAN.

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 1        # Number of input channels.
    out_channels: 1       # Number of output channels.
    kernel_size: 3        # Kernel size of dilated convolution.
    layers: 30            # Number of residual block layers.
    stacks: 3             # Number of stacks i.e., dilation cycles.
    residual_channels: 64 # Number of channels in residual conv.
    gate_channels: 128    # Number of channels in gated conv.
    skip_channels: 64     # Number of channels in skip conv.
    aux_channels: 80      # Number of channels for auxiliary feature conv.
                          # Must be the same as num_mels.
    aux_context_window: 2 # Context window size for auxiliary feature.
                          # If set to 2, previous 2 and future 2 frames will be considered.
    dropout: 0.0          # Dropout rate. 0.0 means no dropout applied.
    use_weight_norm: True # Whether to use weight norm.
                          # If set to true, it will be applied to all of the conv layers.
    upsample_scales: [4, 5, 3, 5]     # Upsampling scales. prod(upsample_scales) == n_shift

###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    in_channels: 1        # Number of input channels.
    out_channels: 1       # Number of output channels.
    kernel_size: 3        # Number of output channels.
    layers: 10            # Number of conv layers.
    conv_channels: 64     # Number of chnn layers.
    bias: True            # Whether to use bias parameter in conv.
    use_weight_norm: True # Whether to use weight norm.
                          # If set to true, it will be applied to all of the conv layers.
    nonlinear_activation: "leakyrelu" # Nonlinear function after each conv.
    nonlinear_activation_params:      # Nonlinear function parameters
        negative_slope: 0.2           # Alpha in leakyrelu.

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
stft_loss_params:
    fft_sizes: [1024, 2048, 512]  # List of FFT size for STFT-based loss.
    hop_sizes: [120, 240, 50]     # List of hop size for STFT-based loss
    win_lengths: [600, 1200, 240] # List of window length for STFT-based loss.
    window: "hann"                # Window function for STFT-based loss

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_adv: 4.0  # Loss balancing coefficient.

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 6              # Batch size.
batch_max_steps: 24000     # Length of each audio in batch. Make sure dividable by n_shift.
num_workers: 2             # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    epsilon: 1.0e-6            # Generator's epsilon.
    weight_decay: 0.0      # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 0.0001             # Generator's learning rate.
    step_size: 200000      # Generator's scheduler step size.
    gamma: 0.5             # Generator's scheduler gamma.
                           # At each step size, lr will be multiplied by this parameter.
generator_grad_norm: 10    # Generator's gradient norm.
discriminator_optimizer_params:
    epsilon: 1.0e-6            # Discriminator's epsilon.
    weight_decay: 0.0      # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 0.00005            # Discriminator's learning rate. 
    step_size: 200000      # Discriminator's scheduler step size.
    gamma: 0.5             # Discriminator's scheduler gamma.
                           # At each step size, lr will be multiplied by this parameter.
discriminator_grad_norm: 1 # Discriminator's gradient norm.

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
discriminator_train_start_steps: 100000 # Number of steps to start to train discriminator.
train_max_steps: 1500000                # Number of training steps.
save_interval_steps: 5000               # Interval steps to save checkpoint.
eval_interval_steps: 1000               # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_save_intermediate_results: 4  # Number of results to be saved as intermediate results.
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/vctk/voc1/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # get durations from MFA's result
    echo "Generate durations.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=./vctk_alignment \
        --output=durations.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # extract features
    echo "Extract features ..."
    python3 ${BIN_DIR}/../preprocess.py \
        --rootdir=~/datasets/VCTK-Corpus-0.92/ \
        --dataset=vctk \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --cut-sil=True \
        --num-cpu=20
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # get features' stats(mean and std)
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="feats"
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # normalize, dev and test should use train's stats
    echo "Normalize ..."
   
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy

    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
    
    python3 ${BIN_DIR}/../normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --stats=dump/train/feats_stats.npy \
        --skip-wav-copy
fi


================================================
FILE: examples/vctk/voc1/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_5000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/vctk/voc5/README.md
================================================
# HiFiGAN with VCTK
This example contains code used to train a [HiFiGAN](https://arxiv.org/abs/2010.05646) model with [VCTK](https://datashare.ed.ac.uk/handle/10283/3443).

## Dataset
### Download and Extract
Download VCTK-0.92 from it's [Official Website](https://datashare.ed.ac.uk/handle/10283/3443) and extract it to `~/datasets`. Then the dataset is in the directory `~/datasets/VCTK-Corpus-0.92`.

### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) results to cut the silence in the edge of audio.
You can download from here [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz), or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) of our repo.
ps: we remove three speakers in VCTK-0.92 (see [reorganize_vctk.py](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/examples/other/mfa/local/reorganize_vctk.py)):
1. `p315`, because of no text for it.
2. `p280` and `p362`, because no *_mic2.flac (which is better than *_mic1.flac) for  them.

## Get Started
Assume the path to the dataset is `~/datasets/VCTK-Corpus-0.92`.
Assume the path to the MFA result of VCTK is `./vctk_alignment`.
Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
```bash
./run.sh
```
You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```
### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.

```text
dump
├── dev
│   ├── norm
│   └── raw
├── test
│   ├── norm
│   └── raw
└── train
    ├── norm
    ├── raw
    └── feats_stats.npy
```

The dataset is split into 3 parts, namely `train`, `dev`, and `test`, each of which contains a `norm` and `raw` subfolder. The `raw` folder contains the log magnitude of the mel spectrogram of each utterance, while the norm folder contains the normalized spectrogram. The statistics used to normalize the spectrogram are computed from the training set, which is located in `dump/train/feats_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains id and paths to the spectrogram of each utterance.

### Model Training
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
`./local/train.sh` calls `${BIN_DIR}/train.py`.
Here's the complete help message.

```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU]

Train a HiFiGAN model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       HiFiGAN config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```

1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

### Synthesizing
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h] [--generator-type GENERATOR_TYPE] [--config CONFIG]
                     [--checkpoint CHECKPOINT] [--test-metadata TEST_METADATA]
                     [--output-dir OUTPUT_DIR] [--ngpu NGPU]

Synthesize with GANVocoder.

optional arguments:
  -h, --help            show this help message and exit
  --generator-type GENERATOR_TYPE
                        type of GANVocoder, should in {pwgan, mb_melgan,
                        style_melgan, } now
  --config CONFIG       GANVocoder config file.
  --checkpoint CHECKPOINT
                        snapshot to load.
  --test-metadata TEST_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu == 0, use cpu.
```


1. `--config` config file. You should use the same config with which the model is trained.
2. `--checkpoint` is the checkpoint to load. Pick one of the checkpoints from `checkpoints` inside the training output directory.
3. `--test-metadata` is the metadata of the test dataset. Use the `metadata.jsonl` in the `dev/norm` subfolder from the processed directory.
4. `--output-dir` is the directory to save the synthesized audio files.
5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.

## Pretrained Model
The pretrained model can be downloaded here:
- [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip)

The static model can be downloaded here:
- [hifigan_vctk_static_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip)

The PIR static model can be downloaded here:
- [hifigan_vctk_static_pir_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_pir_1.1.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)

The ONNX model can be downloaded here:
- [hifigan_vctk_onnx_1.1.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip)

The Paddle-Lite model can be downloaded here:
- [hifigan_vctk_pdlite_1.3.0.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_pdlite_1.3.0.zip)


Model | Step | eval/generator_loss | eval/mel_loss| eval/feature_matching_loss
:-------------:| :------------:| :-----: | :-----: | :--------:
default| 1(gpu) x 2500000|58.092|0.1234|24.384

HiFiGAN checkpoint contains files listed below.

```text
hifigan_vctk_ckpt_0.2.0
├── default.yaml                  # default config used to train hifigan
├── feats_stats.npy               # statistics used to normalize spectrogram when training hifigan
└── snapshot_iter_2500000.pdz     # generator parameters of hifigan
```

## Acknowledgement
We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN.


================================================
FILE: examples/vctk/voc5/conf/default.yaml
================================================
# This is the configuration file for VCTK dataset.
# This configuration is based on HiFiGAN V1, which is
# an official configuration. But I found that the optimizer
# setting does not work well with my implementation.
# So I changed optimizer settings as follows:
# - AdamW -> Adam
# - betas: [0.8, 0.99] -> betas: [0.5, 0.9]
# - Scheduler: ExponentialLR -> MultiStepLR
# To match the shift size difference, the upsample scales
# is also modified from the original 256 shift setting.
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
fs: 24000                # Sampling rate.
n_fft: 2048              # FFT size (samples).
n_shift: 300             # Hop size (samples). 12.5ms
win_length: 1200         # Window length (samples). 50ms
                         # If set to null, it will be the same as fft_size.
window: "hann"           # Window function.
n_mels: 80               # Number of mel basis.
fmin: 80                 # Minimum freq in mel basis calculation. (Hz)
fmax: 7600               # Maximum frequency in mel basis calculation. (Hz)

###########################################################
#         GENERATOR NETWORK ARCHITECTURE SETTING          #
###########################################################
generator_params:
    in_channels: 80                       # Number of input channels.
    out_channels: 1                       # Number of output channels.
    channels: 512                         # Number of initial channels.
    kernel_size: 7                        # Kernel size of initial and final conv layers.
    upsample_scales: [5, 5, 4, 3]         # Upsampling scales.
    upsample_kernel_sizes: [10, 10, 8, 6] # Kernel size for upsampling layers.
    resblock_kernel_sizes: [3, 7, 11]     # Kernel size for residual blocks.
    resblock_dilations:                   # Dilations for residual blocks.
        - [1, 3, 5]
        - [1, 3, 5]
        - [1, 3, 5]
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.


###########################################################
#       DISCRIMINATOR NETWORK ARCHITECTURE SETTING        #
###########################################################
discriminator_params:
    scales: 3                              # Number of multi-scale discriminator.
    scale_downsample_pooling: "AvgPool1D"  # Pooling operation for scale discriminator.
    scale_downsample_pooling_params:
        kernel_size: 4                     # Pooling kernel size.
        stride: 2                          # Pooling stride.
        padding: 2                         # Padding size.
    scale_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [15, 41, 5, 3]       # List of kernel sizes.
        channels: 128                      # Initial number of channels.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        max_groups: 16                     # Maximum number of groups in downsampling conv layers.
        bias: True
        downsample_scales: [4, 4, 4, 4, 1] # Downsampling scales.
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:
            negative_slope: 0.1
    follow_official_norm: True             # Whether to follow the official norm setting.
    periods: [2, 3, 5, 7, 11]              # List of period for multi-period discriminator.
    period_discriminator_params:
        in_channels: 1                     # Number of input channels.
        out_channels: 1                    # Number of output channels.
        kernel_sizes: [5, 3]               # List of kernel sizes.
        channels: 32                       # Initial number of channels.
        downsample_scales: [3, 3, 3, 3, 1] # Downsampling scales.
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
    

###########################################################
#                   STFT LOSS SETTING                     #
###########################################################
use_stft_loss: False                 # Whether to use multi-resolution STFT loss.
use_mel_loss: True                   # Whether to use Mel-spectrogram loss.
mel_loss_params:
    fs: 24000
    fft_size: 2048
    hop_size: 300
    win_length: 1200
    window: "hann"
    num_mels: 80
    fmin: 0
    fmax: 12000
    log_base: null
generator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
discriminator_adv_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
use_feat_match_loss: True
feat_match_loss_params:
    average_by_discriminators: False # Whether to average loss by #discriminators.
    average_by_layers: False         # Whether to average loss by #layers in each discriminator.
    include_final_outputs: False     # Whether to include final outputs in feat match loss calculation.

###########################################################
#               ADVERSARIAL LOSS SETTING                  #
###########################################################
lambda_aux: 45.0       # Loss balancing coefficient for STFT loss.
lambda_adv: 1.0        # Loss balancing coefficient for adversarial loss.
lambda_feat_match: 2.0 # Loss balancing coefficient for feat match loss..

###########################################################
#                  DATA LOADER SETTING                    #
###########################################################
batch_size: 16              # Batch size.
batch_max_steps: 8400       # Length of each audio in batch. Make sure dividable by hop_size.
num_workers: 2              # Number of workers in DataLoader.

###########################################################
#             OPTIMIZER & SCHEDULER SETTING               #
###########################################################
generator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Generator's weight decay coefficient.
generator_scheduler_params:
    learning_rate: 2.0e-4               # Generator's learning rate.
    gamma: 0.5                          # Generator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000
generator_grad_norm: -1                 # Generator's gradient norm.
discriminator_optimizer_params:
    beta1: 0.5
    beta2: 0.9
    weight_decay: 0.0                   # Discriminator's weight decay coefficient.
discriminator_scheduler_params:
    learning_rate: 2.0e-4               # Discriminator's learning rate.
    gamma: 0.5                          # Discriminator's scheduler gamma.
    milestones:                         # At each milestone, lr will be multiplied by gamma.
        - 200000
        - 400000
        - 600000
        - 800000    
discriminator_grad_norm: -1             # Discriminator's gradient norm.            

###########################################################
#                    INTERVAL SETTING                     #
###########################################################
generator_train_start_steps: 1     # Number of steps to start to train discriminator.
discriminator_train_start_steps: 0 # Number of steps to start to train discriminator.
train_max_steps: 2500000           # Number of training steps.
save_interval_steps: 5000         # Interval steps to save checkpoint.
eval_interval_steps: 1000          # Interval steps to evaluate the network.

###########################################################
#                     OTHER SETTING                       #
###########################################################
num_snapshots: 10                 # max number of snapshots to keep while training
seed: 42                          # random seed for paddle, random, and np.random


================================================
FILE: examples/vctk/voc5/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0
stage=0
stop_stage=100

conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_5000.pdz

# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi


================================================
FILE: examples/voxceleb/README.md
================================================

dataset info refer to [VoxCeleb](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/index.html#about)

sv0 - speaker verfication with softmax backend etc, all python code
      more info refer to the sv0/readme.txt

sv1 - dependence on kaldi, speaker verfication with plda/sc backend, 
      more info refer to the sv1/readme.txt


## VoxCeleb2 preparation

VoxCeleb2 audio files are released in m4a format. All the VoxCeleb2 m4a audio files must be converted in wav files before feeding them in PaddleSpeech. 
Please, follow these steps to prepare the dataset correctly:

1. Download Voxceleb2.
You can find download instructions here: http://www.robots.ox.ac.uk/~vgg/data/voxceleb/

2. Convert .m4a to wav
VoxCeleb2 stores files with the m4a audio format. To use them in PaddleSpeech,  you have to convert all the m4a audio files into wav files.

``` shell
ffmpeg -y -i %s -ac 1 -vn -acodec pcm_s16le -ar 16000 %s
```

You can do the conversion using ffmpeg  https://gist.github.com/seungwonpark/4f273739beef2691cd53b5c39629d830). This operation might take several hours and should be only once.

3. Put all the wav files in a folder called `wav`. You should have something like `voxceleb2/wav/id*/*.wav` (e.g, `voxceleb2/wav/id00012/21Uxsk56VDQ/00001.wav`)


## voxceleb dataset summary


|dataset | vox1 - dev | vox1 - test |vox2 - dev| vox2 - test|
|---------|-----------|------------|-----------|----------|
|spks    |  1211       |40     |      5994        | 118|
|utts     | 148642    | 4874   | 1092009     |36273|
| time(h) | 340.4 | 11.2  | 2360.2  |79.9 |


## trial summary

| trial     | filename |  nums | positive | negative |
|--------|-----------|--------|-------|------|
| VoxCeleb1 | veri_test.txt | 37720 | 18860 | 18860 | 
| VoxCeleb1(cleaned) | veri_test2.txt | 37611 | 18802 | 18809 |
| VoxCeleb1-H | list_test_hard.txt | 552536 | 276270 | 276266 |
|VoxCeleb1-H(cleaned) |list_test_hard2.txt | 550894 | 275488 | 275406 |
|VoxCeleb1-E | list_test_all.txt | 581480 | 290743 | 290737 | 
|VoxCeleb1-E(cleaned) | list_test_all2.txt |579818 |289921 |289897 |


================================================
FILE: examples/voxceleb/sv0/README.md
================================================
# ECAPA-TDNN with VoxCeleb
This example contains code used to train a ECAPA-TDNN model with [VoxCeleb dataset](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/index.html#about)

## Overview
All the scripts you need are in the `run.sh`. There are several stages in the `run.sh`, and each stage has its function.
| Stage | Function                                                     |
|:---- |:----------------------------------------------------------- |
| 0     | Process data. It includes: <br>       (1) Download the VoxCeleb1 dataset <br>       (2) Download the VoxCeleb2 dataset  <br>       (3) Convert the VoxCeleb2 m4a to wav format <br>       (4) Get the manifest files of the train, development and test dataset <br> (5) Download the RIR Noise dataset and Get the noise manifest files for augmentation |
| 1     | Train the model                                              |
| 2     | Test the speaker verification with VoxCeleb trial|

You can choose to run a range of stages by setting the `stage` and `stop_stage `. 

For example, if you want to execute the code in stage 1 and stage 2, you can run this script:
```bash
bash run.sh --stage 1 --stop_stage 2
```
Or you can set `stage` equal to `stop-stage` to only run one stage.
For example, if you only want to run `stage 0`, you can use the script below:
```bash
bash run.sh --stage 1 --stop_stage 1
```
The document below will describe the scripts in the `run.sh` in detail.
## The environment variables
The path.sh contains the environment variable. 
```bash
source path.sh
```
This script needs to be run first.  

And another script is also needed:
```bash
source ${MAIN_ROOT}/utils/parse_options.sh
```
It will support the way of using `--variable value` in the shell scripts.

## The local variables
Some local variables are set in the `run.sh`. 
`gpus` denotes the GPU number you want to use. If you set `gpus=`,  it means you only use CPU. 
`stage` denotes the number of the stage you want to start from in the experiments.
`stop stage` denotes the number of the stage you want to end at in the experiments. 
`conf_path` denotes the config path of the model.
`exp_dir` denotes the experiment directory, e.g. "exp/ecapa-tdnn-vox12-big/"

You can set the local variables when you use the `run.sh`

For example, you can set the `gpus` when you use the command line.:
```bash
bash run.sh --gpus 0,1 
```
## Stage 0: Data processing
To use this example, you need to process data firstly and you can use stage 0 in the `run.sh` to do this. The code is shown below:

```bash
 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
     # prepare data
     bash ./local/data.sh || exit -1
 fi
```
Stage 0 is for processing the data. If you only want to process the data. You can run
```bash
bash run.sh --stage 0 --stop_stage 0
```
You can also just run these scripts in your command line.
```bash
source path.sh
bash ./local/data.sh
```
After processing the data, the `data` directory will look like this:
```bash
data/
├── rir_noise
│   ├── csv
│   │   ├── noise.csv
│   │   └── rir.csv
│   ├── manifest.pointsource_noises
│   ├── manifest.real_rirs_isotropic_noises
│   └── manifest.simulated_rirs
├── vox
│   ├── csv
│   │   ├── dev.csv
│   │   ├── enroll.csv
│   │   ├── test.csv
│   │   └── train.csv
│   └── meta
│       └── label2id.txt
└── vox1
    ├── list_test_all2.txt
    ├── list_test_all.txt
    ├── list_test_hard2.txt
    ├── list_test_hard.txt
    ├── manifest.dev
    ├── manifest.test
    ├── veri_test2.txt
    ├── veri_test.txt
    ├── voxceleb1.dev.meta
    └── voxceleb1.test.meta
```
## Stage 1: Model training
If you want to train the model. you can use stage 1 in the `run.sh`. The code is shown below. 
```bash
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # train model, all `ckpt` under `exp` dir
     CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt}
 fi
```
If you want to train the model, you can use the script below to execute stage 0 and stage 1:
```bash
bash run.sh --stage 0 --stop_stage 1
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh ./data/ conf/ecapa_tdnn.yaml
CUDA_VISIBLE_DEVICES= ./local/train.sh ./data/ exp/ecapa-tdnn-vox12-big/ conf/ecapa_tdnn.yaml
```
## Stage 2: Model Testing
The test stage is to evaluate the model performance. The code of the test stage is shown below:
```bash
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # test ckpt avg_n
     CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${dir} ${exp_dir} ${conf_path} || exit -1
 fi
```
If you want to train a model and test it,  you can use the script below to execute stage 0, stage 1 and stage 2:
```bash
bash run.sh --stage 0 --stop_stage 2
```
or you can run these scripts in the command line (only use CPU).
```bash
source path.sh
bash ./local/data.sh ./data/ conf/ecapa_tdnn.yaml
CUDA_VISIBLE_DEVICES= ./local/train.sh ./data/ exp/ecapa-tdnn-vox12-big/ conf/ecapa_tdnn.yaml
CUDA_VISIBLE_DEVICES= ./local/test.sh ./data/ exp/ecapa-tdnn-vox12-big/ conf/ecapa_tdnn.yaml
```

## 3: Pretrained Model
You can get the pretrained models from [this](../../../docs/source/released_model.md).

using the `tar` scripts to unpack the model and then you can use the script to test the model.

For example:
```
wget https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz
tar -xvf sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1.tar.gz
source path.sh
# If you have processed the data and get the manifest file， you can skip the following 2 steps

CUDA_VISIBLE_DEVICES= bash ./local/test.sh ./data sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_1/model/ conf/ecapa_tdnn.yaml
```
The performance of the released models are shown in [this](./RESULT.md)


================================================
FILE: examples/voxceleb/sv0/RESULT.md
================================================
# VoxCeleb

## ECAPA-TDNN 

| Model | Number of Params | Release | Config | dim | Test set |  Cosine | Cosine + S-Norm | 
| --- | --- | --- | --- | --- | --- | --- | ---- |
| ECAPA-TDNN | 85M | 0.2.1 | conf/ecapa_tdnn.yaml | 192 | test | 0.8188 | 0.7815|

> [SpeechBrain result](https://github.com/speechbrain/speechbrain/tree/develop/recipes/VoxCeleb/SpeakerRec#speaker-verification-using-ecapa-tdnn-embeddings):  
> EER = 0.90% (voxceleb1 + voxceleb2) without s-norm  
> EER = 0.80% (voxceleb1 + voxceleb2) with s-norm.  


================================================
FILE: examples/voxceleb/sv0/conf/ecapa_tdnn.yaml
================================================
###########################################
#                Data                 #
###########################################
augment: True
batch_size: 32
num_workers: 2
num_speakers: 7205 # 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41
shuffle: True
skip_prep: False
split_ratio: 0.9
chunk_duration: 3.0 # seconds
random_chunk: True
verification_file: data/vox1/veri_test2.txt

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
# currently, we only support fbank
sr: 16000           # sample rate
n_mels: 80
window_size: 400     #25ms, sample rate 16000, 25 * 16000 / 1000 = 400 
hop_size: 160        #10ms, sample rate 16000, 10 * 16000 / 1000 = 160

###########################################################
#                       MODEL SETTING                     #
###########################################################
# currently, we only support ecapa-tdnn in the ecapa_tdnn.yaml
# if we want use another model, please choose another configuration yaml file
model:
  input_size: 80
  channels: [1024, 1024, 1024, 1024, 3072]
  kernel_sizes: [5, 3, 3, 3, 1]
  dilations: [1, 2, 3, 4, 1]
  attention_channels: 128
  lin_neurons: 192

###########################################
#                Training                 #
###########################################
seed: 1986 # according from speechbrain configuration
epochs: 10
save_interval: 10
log_interval: 10
learning_rate: 1e-8
max_lr: 1e-3
step_size: 140000


###########################################
#                loss                     #
###########################################
margin: 0.2
scale: 30

###########################################
#                Testing                  #
###########################################
global_embedding_norm: True
embedding_mean_norm: True
embedding_std_norm: False

###########################################
#                score-norm               #
###########################################
score_norm: s-norm
cohort_size: 20000 # amount of imposter utterances in normalization cohort
n_train_snts: 400000 # used for normalization stats


================================================
FILE: examples/voxceleb/sv0/conf/ecapa_tdnn_small.yaml
================================================
###########################################
#                Data                 #
###########################################
augment: True
batch_size: 32
num_workers: 2
num_speakers: 1211 # 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41
shuffle: True
skip_prep: False
split_ratio: 0.9
chunk_duration: 3.0 # seconds
random_chunk: True
verification_file: data/vox1/veri_test2.txt

###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################
# currently, we only support fbank
sr: 16000           # sample rate
n_mels: 80
window_size: 400     #25ms, sample rate 16000, 25 * 16000 / 1000 = 400 
hop_size: 160        #10ms, sample rate 16000, 10 * 16000 / 1000 = 160

###########################################################
#                       MODEL SETTING                     #
###########################################################
# currently, we only support ecapa-tdnn in the ecapa_tdnn.yaml
# if we want use another model, please choose another configuration yaml file
model:
  input_size: 80
  channels: [512, 512, 512, 512, 1536]
  kernel_sizes: [5, 3, 3, 3, 1]
  dilations: [1, 2, 3, 4, 1]
  attention_channels: 128
  lin_neurons: 192

###########################################
#                Training                 #
###########################################
seed: 1986 # according from speechbrain configuration
epochs: 100
save_interval: 10
log_interval: 10
learning_rate: 1e-8
max_lr: 1e-3
step_size: 140000

###########################################
#                loss                     #
###########################################
margin: 0.2
scale: 30

###########################################
#                Testing                  #
###########################################
global_embedding_norm: True
embedding_mean_norm: True
embedding_std_norm: False

###########################################
#                score-norm               #
###########################################
score_norm: s-norm
cohort_size: 20000 # amount of imposter utterances in normalization cohort
n_train_snts: 400000 # used for normalization stats


================================================
FILE: examples/voxceleb/sv0/local/convert.sh
================================================
# copy this to root directory of data and 
# chmod a+x convert.sh
# ./convert.sh
# https://unix.stackexchange.com/questions/103920/parallelize-a-bash-for-loop
dir=$1
open_sem(){
    mkfifo pipe-$$
    exec 3<>pipe-$$
    rm pipe-$$
    local i=$1
    for((;i>0;i--)); do
        printf %s 000 >&3
    done
}
run_with_lock(){
    local x
    read -u 3 -n 3 x && ((0==x)) || exit $x
    (
     ( "$@"; )
    printf '%.3d' $? >&3
    )&
}
N=32 # number of vCPU
open_sem $N
for f in $(find ${dir} -name "*.m4a"); do
    run_with_lock ffmpeg -loglevel panic -i "$f" -ar 16000 "${f%.*}.wav"
done


================================================
FILE: examples/voxceleb/sv0/local/data.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
stage=0
stop_stage=100

. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;

if [ $# -ne 2 ] ; then
   echo "Usage: $0 [options] <data-dir> <conf-path>";
   echo "e.g.: $0 ./data/ conf/ecapa_tdnn.yaml"
   echo "Options: "
   echo "  --stage <stage|-1>               # Used to run a partially-completed data process from somewhere in the middle."
   echo "  --stop-stage <stop-stage|100>    # Used to run a partially-completed data process stop stage in the middle"
   exit 1;
fi

dir=$1
conf_path=$2
mkdir -p ${dir}

# Generally the `MAIN_ROOT` refers to the root of PaddleSpeech,
# which is defined in the path.sh
# And we will download the voxceleb data and rirs noise to ${MAIN_ROOT}/dataset
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
   # download data, generate manifests
   # we will generate the manifest.{dev,test} file from ${TARGET_DIR}/voxceleb/vox1/{dev,test} directory
   # and generate the meta info and download the trial file
   # manifest.dev: 148642
   # manifest.test: 4847
   echo "Start to download vox1 dataset and generate the manifest files "
   python3 ${TARGET_DIR}/voxceleb/voxceleb1.py \
      --manifest_prefix="${dir}/vox1/manifest" \
      --target_dir="${TARGET_DIR}/voxceleb/vox1/"

   if [ $? -ne 0 ]; then
      echo "Prepare voxceleb1 failed. Terminated."
      exit 1
   fi

fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
   # download voxceleb2 data
   # we will download the data and unzip the package
   # and we will store the m4a file in ${TARGET_DIR}/voxceleb/vox2/{dev,test}
   echo "start to download vox2 dataset"
   python3 ${TARGET_DIR}/voxceleb/voxceleb2.py \
      --download \
      --target_dir="${TARGET_DIR}/voxceleb/vox2/"

   if [ $? -ne 0 ]; then
      echo "Download voxceleb2 dataset failed. Terminated."
      exit 1
   fi

fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
   # convert the m4a to wav
   # and we will not delete the original m4a file
   echo "start to convert the m4a to wav"
   bash local/convert.sh ${TARGET_DIR}/voxceleb/vox2/ || exit 1;
   
   if [ $? -ne 0 ]; then
      echo "Convert voxceleb2 dataset from m4a to wav failed. Terminated."
      exit 1
   fi
   echo "m4a convert to wav operation finished"
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
   # generate the vox2 manifest file from wav file
   # we will generate the ${dir}/vox2/manifest.vox2
   # because we use all the vox2 dataset to train, so collect all the vox2 data in one file
   echo "start generate the vox2 manifest files"
   python3 ${TARGET_DIR}/voxceleb/voxceleb2.py \
      --generate \
      --manifest_prefix="${dir}/vox2/manifest" \
      --target_dir="${TARGET_DIR}/voxceleb/vox2/"

   if [ $? -ne 0 ]; then
      echo "Prepare voxceleb2 dataset failed. Terminated."
      exit 1
   fi
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
   # generate the vox csv file
   # Currently, our training system use csv file for dataset
   echo "convert the json format to csv format to be compatible with training process"
   python3 local/make_vox_csv_dataset_from_json.py\
      --train "${dir}/vox1/manifest.dev" "${dir}/vox2/manifest.vox2"\
      --test "${dir}/vox1/manifest.test" \
      --target_dir "${dir}/vox/" \
      --config ${conf_path}

   if [ $? -ne 0 ]; then
      echo "Prepare voxceleb failed. Terminated."
      exit 1
   fi
fi

if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
   # generate the open rir noise manifest file
   echo "generate the open rir noise manifest file"
   python3 ${TARGET_DIR}/rir_noise/rir_noise.py\
      --manifest_prefix="${dir}/rir_noise/manifest" \
      --target_dir="${TARGET_DIR}/rir_noise/"

   if [ $? -ne 0 ]; then
      echo "Prepare rir_noise failed. Terminated."
      exit 1
   fi
fi

if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
   # generate the open rir noise manifest file
   echo "generate the open rir noise csv file"
   python3 local/make_rirs_noise_csv_dataset_from_json.py \
      --noise_dir="${TARGET_DIR}/rir_noise/" \
      --data_dir="${dir}/rir_noise/" \
      --config ${conf_path}

   if [ $? -ne 0 ]; then
      echo "Prepare rir_noise failed. Terminated."
      exit 1
   fi
fi


================================================
FILE: examples/voxceleb/sv0/local/data_prepare.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

import paddle
from yacs.config import CfgNode

from paddlespeech.audio.datasets.voxceleb import VoxCeleb
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.augment import build_augment_pipeline
from paddlespeech.vector.training.seeding import seed_everything

logger = Log(__name__).getlog()


def main(args, config):

    # stage0: set the cpu device, all data prepare process will be done in cpu mode
    paddle.set_device("cpu")
    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    # stage 1: generate the voxceleb csv file
    # Note: this may occurs c++ exception, but the program will execute fine
    # so we ignore the exception 
    # we explicitly pass the vox2 base path to data prepare and generate the audio info
    logger.info("start to generate the voxceleb dataset info")
    train_dataset = VoxCeleb(
        'train', target_dir=args.data_dir, vox2_base_path=config.vox2_base_path)

    # stage 2: generate the augment noise csv file
    if config.augment:
        logger.info("start to generate the augment dataset info")
        augment_pipeline = build_augment_pipeline(target_dir=args.data_dir)


if __name__ == "__main__":
    # yapf: disable
    parser = argparse.ArgumentParser(__doc__)
    parser.add_argument("--data-dir",
                        default="./data/",
                        type=str,
                        help="data directory")
    parser.add_argument("--config",
                        default=None,
                        type=str,
                        help="configuration file")
    args = parser.parse_args()
    # yapf: enable

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    config.freeze()
    print(config)

    main(args, config)


================================================
FILE: examples/voxceleb/sv0/local/emb.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

. ./path.sh

stage=0
stop_stage=100
exp_dir=exp/ecapa-tdnn-vox12-big/            # experiment directory
conf_path=conf/ecapa_tdnn.yaml
audio_path="demo/voxceleb/00001.wav"
use_gpu=true

. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;

if [ $# -ne 0 ] ; then
   echo "Usage: $0 [options]";
   echo "e.g.: $0 ./data/ exp/voxceleb12/ conf/ecapa_tdnn.yaml"
   echo "Options: "
   echo "  --use-gpu <true,false|true>      # specify is gpu is to be used for training"
   echo "  --stage <stage|-1>               # Used to run a partially-completed data process from somewhere in the middle."
   echo "  --stop-stage <stop-stage|100>    # Used to run a partially-completed data process stop stage in the middle"
   echo "  --exp-dir                        # experiment directorh, where is has the model.pdparams"
   echo "  --conf-path                      # configuration file for extracting the embedding"
   echo "  --audio-path                     # audio-path, which will be processed to extract the embedding"
   exit 1;
fi

# set the test device
device="cpu"
if ${use_gpu}; then
    device="gpu"
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # extract the audio embedding
    python3 ${BIN_DIR}/extract_emb.py --device ${device} \
            --config ${conf_path} \
            --audio-path ${audio_path} --load-checkpoint ${exp_dir}
fi

================================================
FILE: examples/voxceleb/sv0/local/make_rirs_noise_csv_dataset_from_json.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Convert the PaddleSpeech jsonline format data to csv format data in voxceleb experiment.
Currently, Speaker Identificaton Training process use csv format.
"""
import argparse
import csv
import os
from typing import List

import tqdm
from yacs.config import CfgNode

from paddlespeech.audio.backends import soundfile_load as load_audio
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.utils.vector_utils import get_chunks

logger = Log(__name__).getlog()


def get_chunks_list(wav_file: str,
                    split_chunks: bool,
                    base_path: str,
                    chunk_duration: float=3.0) -> List[List[str]]:
    """Get the single audio file info 

    Args:
        wav_file (list): the wav audio file and get this audio segment info list
        split_chunks (bool): audio split flag
        base_path (str): the audio base path 
        chunk_duration (float): the chunk duration. 
                                if set the split_chunks, we split the audio into multi-chunks segment.
    """
    waveform, sr = load_audio(wav_file)
    audio_id = wav_file.split("/rir_noise/")[-1].split(".")[0]
    audio_duration = waveform.shape[0] / sr

    ret = []
    if split_chunks and audio_duration > chunk_duration:  # Split into pieces of self.chunk_duration seconds.
        uniq_chunks_list = get_chunks(chunk_duration, audio_id, audio_duration)

        for idx, chunk in enumerate(uniq_chunks_list):
            s, e = chunk.split("_")[-2:]  # Timestamps of start and end
            start_sample = int(float(s) * sr)
            end_sample = int(float(e) * sr)

            # currently, all vector csv data format use one representation
            # id, duration, wav, start, stop, label
            # in rirs noise, all the label name is 'noise'
            # the label is string type and we will convert it to integer type in training
            ret.append([
                chunk, audio_duration, wav_file, start_sample, end_sample,
                "noise"
            ])
    else:  # Keep whole audio.
        ret.append(
            [audio_id, audio_duration, wav_file, 0, waveform.shape[0], "noise"])
    return ret


def generate_csv(wav_files,
                 output_file: str,
                 base_path: str,
                 split_chunks: bool=True):
    """Prepare the csv file according the wav files

    Args:
        wav_files (list): all the audio list to prepare the csv file
        output_file (str): the output csv file
        config (CfgNode): yaml configuration content
        split_chunks (bool): audio split flag
    """
    logger.info(f'Generating csv: {output_file}')
    header = ["utt_id", "duration", "wav", "start", "stop", "label"]
    csv_lines = []
    for item in tqdm.tqdm(wav_files):
        csv_lines.extend(
            get_chunks_list(
                item, base_path=base_path, split_chunks=split_chunks))

    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))

    with open(output_file, mode="w") as csv_f:
        csv_writer = csv.writer(
            csv_f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(header)
        for line in csv_lines:
            csv_writer.writerow(line)


def prepare_data(args, config):
    """Convert the jsonline format to csv format

    Args:
        args (argparse.Namespace): scripts args
        config (CfgNode): yaml configuration content
    """
    # if external config set the skip_prep flat, we will do nothing
    if config.skip_prep:
        return

    base_path = args.noise_dir
    wav_path = os.path.join(base_path, "RIRS_NOISES")
    logger.info(f"base path: {base_path}")
    logger.info(f"wav path: {wav_path}")
    rir_list = os.path.join(wav_path, "real_rirs_isotropic_noises", "rir_list")
    rir_files = []
    with open(rir_list, 'r') as f:
        for line in f.readlines():
            rir_file = line.strip().split(' ')[-1]
            rir_files.append(os.path.join(base_path, rir_file))

    noise_list = os.path.join(wav_path, "pointsource_noises", "noise_list")
    noise_files = []
    with open(noise_list, 'r') as f:
        for line in f.readlines():
            noise_file = line.strip().split(' ')[-1]
            noise_files.append(os.path.join(base_path, noise_file))

    csv_path = os.path.join(args.data_dir, 'csv')
    logger.info(f"csv path: {csv_path}")
    generate_csv(
        rir_files, os.path.join(csv_path, 'rir.csv'), base_path=base_path)
    generate_csv(
        noise_files, os.path.join(csv_path, 'noise.csv'), base_path=base_path)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--noise_dir",
        default=None,
        required=True,
        help="The noise dataset dataset directory.")
    parser.add_argument(
        "--data_dir",
        default=None,
        required=True,
        help="The target directory stores the csv files")
    parser.add_argument(
        "--config",
        default=None,
        required=True,
        type=str,
        help="configuration file")
    args = parser.parse_args()

    # parse the yaml config file
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    # prepare the csv file from jsonlines files
    prepare_data(args, config)


================================================
FILE: examples/voxceleb/sv0/local/make_vox_csv_dataset_from_json.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Convert the PaddleSpeech jsonline format data to csv format data in voxceleb experiment.
Currently, Speaker Identificaton Training process use csv format.
"""
import argparse
import csv
import json
import os
import random

import tqdm
from yacs.config import CfgNode

from paddlespeech.audio.backends import soundfile_load as load_audio
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.utils.vector_utils import get_chunks

logger = Log(__name__).getlog()


def prepare_csv(wav_files, output_file, config, split_chunks=True):
    """Prepare the csv file according the wav files

    Args:
        wav_files (list): all the audio list to prepare the csv file
        output_file (str): the output csv file
        config (CfgNode): yaml configuration content
        split_chunks (bool, optional): audio split flag. Defaults to True.
    """
    if not os.path.exists(os.path.dirname(output_file)):
        os.makedirs(os.path.dirname(output_file))
    csv_lines = []
    header = ["utt_id", "duration", "wav", "start", "stop", "label"]
    # voxceleb meta info for each training utterance segment
    # we extract a segment from a utterance to train 
    # and the segment' period is between start and stop time point in the original wav file
    # each field in the meta info means as follows:
    # utt_id: the utterance segment name, which is uniq in training dataset
    # duration: the total utterance time
    # wav: utterance file path, which should be absoulute path
    # start: start point in the original wav file sample point range
    # stop: stop point in the original wav file sample point range
    # label: the utterance segment's label name, 
    #        which is speaker name in speaker verification domain
    for item in tqdm.tqdm(wav_files, total=len(wav_files)):
        item = json.loads(item.strip())
        audio_id = item['utt'].replace(".wav",
                                       "")  # we remove the wav suffix name
        audio_duration = item['feat_shape'][0]
        wav_file = item['feat']
        label = audio_id.split('-')[
            0]  # speaker name in speaker verification domain
        waveform, sr = load_audio(wav_file)
        if split_chunks:
            uniq_chunks_list = get_chunks(config.chunk_duration, audio_id,
                                          audio_duration)
            for chunk in uniq_chunks_list:
                s, e = chunk.split("_")[-2:]  # Timestamps of start and end
                start_sample = int(float(s) * sr)
                end_sample = int(float(e) * sr)
                # id, duration, wav, start, stop, label
                # in vector, the label in speaker id
                csv_lines.append([
                    chunk, audio_duration, wav_file, start_sample, end_sample,
                    label
                ])
        else:
            csv_lines.append([
                audio_id, audio_duration, wav_file, 0, waveform.shape[0], label
            ])

    with open(output_file, mode="w") as csv_f:
        csv_writer = csv.writer(
            csv_f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(header)
        for line in csv_lines:
            csv_writer.writerow(line)


def get_enroll_test_list(dataset_list, verification_file):
    """Get the enroll and test utterance list from all the voxceleb1 test utterance dataset.
       Generally, we get the enroll and test utterances from the verfification file.
       The verification file format as follows:
       target/nontarget enroll-utt test-utt,
       we set 0 as nontarget and 1 as target, eg:
       0 a.wav b.wav
       1 a.wav a.wav

    Args:
        dataset_list (list): all the dataset to get the test utterances
        verification_file (str): voxceleb1 trial file
    """
    logger.info(f"verification file: {verification_file}")
    enroll_audios = set()
    test_audios = set()
    with open(verification_file, 'r') as f:
        for line in f:
            _, enroll_file, test_file = line.strip().split(' ')
            enroll_audios.add('-'.join(enroll_file.split('/')))
            test_audios.add('-'.join(test_file.split('/')))

    enroll_files = []
    test_files = []
    for dataset in dataset_list:
        with open(dataset, 'r') as f:
            for line in f:
                # audio_id may be in enroll and test at the same time
                # eg: 1 a.wav a.wav
                # the audio a.wav is enroll and test file at the same time
                audio_id = json.loads(line.strip())['utt']
                if audio_id in enroll_audios:
                    enroll_files.append(line)
                if audio_id in test_audios:
                    test_files.append(line)

    enroll_files = sorted(enroll_files)
    test_files = sorted(test_files)

    return enroll_files, test_files


def get_train_dev_list(dataset_list, target_dir, split_ratio):
    """Get the train and dev utterance list from all the training utterance dataset.
       Generally, we use the split_ratio as the train dataset ratio,
       and the remaining utterance (ratio is 1 - split_ratio) is the dev dataset

    Args:
        dataset_list (list): all the dataset to get the all utterances
        target_dir (str): the target train and dev directory, 
                          we will create the csv directory to store the {train,dev}.csv file
        split_ratio (float): train dataset ratio in all utterance list
    """
    logger.info("start to get train and dev utt list")
    if not os.path.exists(os.path.join(target_dir, "meta")):
        os.makedirs(os.path.join(target_dir, "meta"))

    audio_files = []
    speakers = set()
    for dataset in dataset_list:
        with open(dataset, 'r') as f:
            for line in f:
                # the label is speaker name
                label_name = json.loads(line.strip())['utt2spk']
                speakers.add(label_name)
                audio_files.append(line.strip())
    speakers = sorted(speakers)
    logger.info(f"we get {len(speakers)} speakers from all the train dataset")

    with open(os.path.join(target_dir, "meta", "label2id.txt"), 'w') as f:
        for label_id, label_name in enumerate(speakers):
            f.write(f'{label_name} {label_id}\n')
    logger.info(
        f'we store the speakers to {os.path.join(target_dir, "meta", "label2id.txt")}'
    )

    # the split_ratio is for train dataset 
    # the remaining is for dev dataset
    split_idx = int(split_ratio * len(audio_files))
    audio_files = sorted(audio_files)
    random.shuffle(audio_files)
    train_files, dev_files = audio_files[:split_idx], audio_files[split_idx:]
    logger.info(
        f"we get train utterances: {len(train_files)}, dev utterance: {len(dev_files)}"
    )
    return train_files, dev_files


def prepare_data(args, config):
    """Convert the jsonline format to csv format

    Args:
        args (argparse.Namespace): scripts args
        config (CfgNode): yaml configuration content
    """
    # stage0: set the random seed
    random.seed(config.seed)

    # if external config set the skip_prep flat, we will do nothing
    if config.skip_prep:
        return

    # stage 1: prepare the enroll and test csv file
    #          And we generate the speaker to label file label2id.txt
    logger.info("start to prepare the data csv file")
    enroll_files, test_files = get_enroll_test_list(
        [args.test], verification_file=config.verification_file)
    prepare_csv(
        enroll_files,
        os.path.join(args.target_dir, "csv", "enroll.csv"),
        config,
        split_chunks=False)
    prepare_csv(
        test_files,
        os.path.join(args.target_dir, "csv", "test.csv"),
        config,
        split_chunks=False)

    # stage 2: prepare the train and dev csv file
    #          we get the train dataset ratio as config.split_ratio
    #          and the remaining is dev dataset
    logger.info("start to prepare the data csv file")
    train_files, dev_files = get_train_dev_list(
        args.train, target_dir=args.target_dir, split_ratio=config.split_ratio)
    prepare_csv(train_files,
                os.path.join(args.target_dir, "csv", "train.csv"), config)
    prepare_csv(dev_files,
                os.path.join(args.target_dir, "csv", "dev.csv"), config)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--train",
        required=True,
        nargs='+',
        help="The jsonline files list for train.")
    parser.add_argument(
        "--test", required=True, help="The jsonline file for test")
    parser.add_argument(
        "--target_dir",
        default=None,
        required=True,
        help="The target directory stores the csv files and meta file.")
    parser.add_argument(
        "--config",
        default=None,
        required=True,
        type=str,
        help="configuration file")
    args = parser.parse_args()

    # parse the yaml config file
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    # prepare the csv file from jsonlines files
    prepare_data(args, config)


================================================
FILE: examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py
================================================
#!/usr/bin/python3
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Make VoxCeleb1 trial of kaldi format
this script creat the test trial from kaldi trial voxceleb1_test_v2.txt or official trial veri_test2.txt 
to kaldi trial format
"""
import argparse
import codecs
import os

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--voxceleb_trial",
    default="voxceleb1_test_v2",
    type=str,
    help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt"
)
parser.add_argument(
    "--trial",
    default="data/test/trial",
    type=str,
    help="Kaldi format trial file")
args = parser.parse_args()


def main(voxceleb_trial, trial):
    """
        VoxCeleb provide several trial file, which format is different with kaldi format.

        VoxCeleb format's meaning is as following:
        --------------------------------
        target_or_nontarget path1 path2
        --------------------------------
        target_or_nontarget is an integer: 1 target                 path1 is equal to path2
                                           0 nontarget              path1 is unequal to path2    
        path1: spkr_id/rec_id/name
        path2: spkr_id/rec_id/name

        Kaldi format's meaning is as following:
        ---------------------------------------
        utt_id1 utt_id2 target_or_nontarget
        ---------------------------------------
        utt_id1: utterance identification or speaker identification
        utt_id2: utterance identification or speaker identification
        target_or_nontarget is an string: 'target' utt_id1 is equal to  utt_id2
                                        'nontarget' utt_id2 is unequal to utt_id2
    """
    print("Start convert the voxceleb trial to kaldi format")
    if not os.path.exists(voxceleb_trial):
        raise RuntimeError(
            "{} does not exist. Pleas input the correct file path".format(
                voxceleb_trial))

    trial_dirname = os.path.dirname(trial)
    if not os.path.exists(trial_dirname):
        os.mkdir(trial_dirname)

    with codecs.open(voxceleb_trial, 'r', encoding='utf-8') as f, \
         codecs.open(trial, 'w', encoding='utf-8') as w:
        for line in f:
            target_or_nontarget, path1, path2 = line.strip().split()

            utt_id1 = "-".join(path1.split("/"))
            utt_id2 = "-".join(path2.split("/"))
            target = "nontarget"
            if int(target_or_nontarget):
                target = "target"
            w.write("{} {} {}\n".format(utt_id1, utt_id2, target))
    print("Convert the voxceleb trial to kaldi format successfully")


if __name__ == "__main__":
    main(args.voxceleb_trial, args.trial)


================================================
FILE: examples/voxceleb/sv0/local/test.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

stage=1
stop_stage=100
use_gpu=true    # if true, we run on GPU.

. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;

if [ $# -ne 3 ] ; then
   echo "Usage: $0 [options] <data-dir> <exp-dir> <conf-path>";
   echo "e.g.: $0 ./data/ exp/voxceleb12/ conf/ecapa_tdnn.yaml"
   echo "Options: "
   echo "  --use-gpu <true,false|true>      # specify is gpu is to be used for training"
   echo "  --stage <stage|-1>               # Used to run a partially-completed data process from somewhere in the middle."
   echo "  --stop-stage <stop-stage|100>    # Used to run a partially-completed data process stop stage in the middle"
   exit 1;
fi

dir=$1
exp_dir=$2
conf_path=$3

# get the gpu nums for training
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

# setting training device
device="cpu"
if ${use_gpu}; then
    device="gpu"
fi
if [ $ngpu -le 0 ]; then 
    echo "no gpu, training in cpu mode"
    device='cpu'
    use_gpu=false
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
   # test the model and compute the eer metrics
   python3 ${BIN_DIR}/test.py \
         --data-dir ${dir} \
         --load-checkpoint ${exp_dir} \
         --config ${conf_path} \
         --device ${device}
fi


================================================
FILE: examples/voxceleb/sv0/local/train.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

stage=0
stop_stage=100
use_gpu=true    # if true, we run on GPU.

. ${MAIN_ROOT}/utils/parse_options.sh || exit -1;

if [ $# -ne 3 ] ; then
   echo "Usage: $0 [options] <data-dir> <exp-dir> <conf-path>";
   echo "e.g.: $0 ./data/ exp/voxceleb12/ conf/ecapa_tdnn.yaml"
   echo "Options: "
   echo "  --use-gpu <true,false|true>      # specify is gpu is to be used for training"
   echo "  --stage <stage|-1>               # Used to run a partially-completed data process from somewhere in the middle."
   echo "  --stop-stage <stop-stage|100>    # Used to run a partially-completed data process stop stage in the middle"
   exit 1;
fi

dir=$1
exp_dir=$2
conf_path=$3

# get the gpu nums for training
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

# setting training device
device="cpu"
if ${use_gpu}; then
    device="gpu"
fi
if [ $ngpu -le 0 ]; then 
    echo "no gpu, training in cpu mode"
    device='cpu'
    use_gpu=false
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train the speaker identification task with voxceleb data
    # and we will create the trained model parameters in ${exp_dir}/model.pdparams as the soft link
    # Note: we will store the log file in exp/log directory
    if $use_gpu; then
        python3 -m paddle.distributed.launch --gpus=$CUDA_VISIBLE_DEVICES \
            ${BIN_DIR}/train.py --device ${device} --checkpoint-dir ${exp_dir} \
            --data-dir ${dir} --config ${conf_path}
    else
        python3 \
            ${BIN_DIR}/train.py --device ${device} --checkpoint-dir ${exp_dir} \
            --data-dir ${dir} --config ${conf_path}
    fi
fi 

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0

================================================
FILE: examples/voxceleb/sv0/path.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

MODEL=ecapa_tdnn
export BIN_DIR=${MAIN_ROOT}/paddlespeech/vector/exps/${MODEL}

================================================
FILE: examples/voxceleb/sv0/run.sh
================================================
#!/bin/bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

. ./path.sh
set -e

#######################################################################
# stage 0: data prepare, including voxceleb1 download and generate {train,dev,enroll,test}.csv
#          voxceleb2 data is m4a format, so we need convert the m4a to wav yourselves with the script local/convert.sh
# stage 1: train the speaker identification model
# stage 2: test speaker identification 
# stage 3: (todo)extract the training embeding to train the LDA and PLDA
######################################################################

# the vox2 dataset is stored in m4a format, we need to convert the audio from m4a to wav yourself
# and put all of them to ${MAIN_ROOT}/datasets/vox2
# we will find the wav from ${MAIN_ROOT}/datasets/vox1/{dev,test}/wav and ${MAIN_ROOT}/datasets/vox2/wav

stage=0
stop_stage=50

# data directory
# if we set the variable ${dir}, we will store the wav info to this directory
# otherwise, we will store the wav info to data/vox1 and data/vox2 directory respectively
# vox2 wav path, we must convert the m4a format to wav format    
dir=data/                                 # data info directory   

exp_dir=exp/ecapa-tdnn-vox12-big/            # experiment directory
conf_path=conf/ecapa_tdnn.yaml          
gpus=0,1,2,3

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

mkdir -p ${exp_dir}

if [ $stage -le 0 ] && [ ${stop_stage} -ge 0 ]; then 
     # stage 0: data prepare for vox1 and vox2, vox2 must be converted from m4a to wav
     bash ./local/data.sh ${dir} ${conf_path}|| exit -1;
fi

if [ $stage -le 1 ] && [ ${stop_stage} -ge 1 ]; then
     # stage 1: train the speaker identification model
     CUDA_VISIBLE_DEVICES=${gpus} bash ./local/train.sh ${dir} ${exp_dir} ${conf_path} 
fi

if [ $stage -le 2 ] && [ ${stop_stage} -ge 2 ]; then
     # stage 2: get the speaker verification scores with cosine function
     #          now we only support use cosine to get the scores
     CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh ${dir} ${exp_dir} ${conf_path}
fi

# if [ $stage -le 3 ]; then
#      # stage 3: extract the training embeding to train the LDA and PLDA
#      # todo: extract the training embedding
# fi 


================================================
FILE: examples/wenetspeech/README.md
================================================
* asr0 - deepspeech2 Streaming/Non-Streaming
* asr1 - transformer/conformer Streaming/Non-Streaming
* asr2 - transformer/conformer Streaming/Non-Streaming with Kaldi feature

# [WenetSpeech](https://github.com/wenet-e2e/WenetSpeech)

A 10000+ Hours Multi-domain Chinese Corpus for Speech Recognition

## Description

### Creation

All the data are collected from YouTube and Podcast. Optical character recognition (OCR) and automatic speech recognition (ASR) techniques are adopted to label each YouTube and Podcast recording, respectively. To improve the quality of the corpus, we use a novel end-to-end label error detection method to further validate and filter the data.

### Categories

In summary, WenetSpeech groups all data into 3 categories, as the following table shows:

| Set        | Hours | Confidence  | Usage                                 |
|------------|-------|-------------|---------------------------------------|
| High Label | 10005 | >=0.95      | Supervised Training                   |
| Weak Label | 2478  | [0.6, 0.95] | Semi-supervised or noise training     |
| Unlabel    | 9952  | /           | Unsupervised training or Pre-training |
| In Total   | 22435 | /           | All above                             |

### High Label Data

We classify the high label into 10 groups according to its domain, speaking style, and scenarios.

| Domain      | Youtube | Podcast | Total  |
|-------------|---------|---------|--------|
| audiobook   | 0       | 250.9   | 250.9  |
| commentary  | 112.6   | 135.7   | 248.3  |
| documentary | 386.7   | 90.5    | 477.2  |
| drama       | 4338.2  | 0       | 4338.2 |
| interview   | 324.2   | 614     | 938.2  |
| news        | 0       | 868     | 868    |
| reading     | 0       | 1110.2  | 1110.2 |
| talk        | 204     | 90.7    | 294.7  |
| variety     | 603.3   | 224.5   | 827.8  |
| others      | 144     | 507.5   | 651.5  |
| Total       | 6113    | 3892    | 10005  |

As shown in the following table, we provide 3 training subsets, namely `S`, `M`, and `L` for building ASR systems on different data scales.

| Training Subsets | Confidence  | Hours |
|------------------|-------------|-------|
| L                | [0.95, 1.0] | 10005 |
| M                | 1.0         | 1000  |
| S                | 1.0         | 100   |

### Evaluation Sets

| Evaluation Sets | Hours | Source       | Description                                                                             |
|-----------------|-------|--------------|-----------------------------------------------------------------------------------------|
| DEV             | 20    | Internet     | Specially designed for some speech tools which require cross-validation set in training |
| TEST\_NET       | 23    | Internet     | Match test                                                                              |
| TEST\_MEETING   | 15    | Real meeting | Mismatch test which is a far-field, conversational, spontaneous, and meeting dataset   |


================================================
FILE: examples/wenetspeech/asr0/RESULTS.md
================================================
# Wenetspeech

## Deepspeech2 Streaming

| Model | Number of Params | Release | Config | Test set | Valid Loss | CER | 
| --- | --- | --- | --- | --- | --- | --- | 
| DeepSpeech2 | 1.2G | r1.0.0a | conf/deepspeech2\_online.yaml + spec aug + fbank161, w/o LM | test\_net | 13.307 | 15.02 |
| DeepSpeech2 | 1.2G | r1.0.0a | conf/deepspeech2\_online.yaml + spec aug + fbank161, w/o LM | test\_meeting | 13.307 | 24.17 |


================================================
FILE: examples/wenetspeech/asr1/.gitignore
================================================
data
exp
*.profile


================================================
FILE: examples/wenetspeech/asr1/README.md
================================================
## Pack Model

pack model to tar.gz, e.g.

```bash
./utils/pack_model.sh  --preprocess_conf conf/preprocess.yaml --dict data/vocab.txt conf/conformer.yaml '' data/mean_std.json exp/conformer/checkpoints/wenetspeec
h.pdparams 

```

show model.tar.gz
```
tar tf model.tar.gz 
```

other way is:

```bash
tar cvzf asr1_chunk_conformer_u2_wenetspeech_ckpt_1.1.0.model.tar.gz model.yaml conf/tuning/ conf/chunk_conformer.yaml conf/preprocess.yaml data/mean_std.json exp/chunk_conformer/checkpoints/
```

## Export Static Model

>> Need Paddle >= 2.4

>> `data/test_meeting/data.list`
>> {"input": [{"name": "input1", "shape": [3.2230625, 80], "feat": "/home/PaddleSpeech/dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0163.wav", "filetype": "sound"}], "output": [{"name": "target1", "shape": [9, 5538], "text": "\u697c\u5e02\u8c03\u63a7\u5c06\u53bb\u5411\u4f55\u65b9", "token": "\u697c \u5e02 \u8c03 \u63a7 \u5c06 \u53bb \u5411 \u4f55 \u65b9", "tokenid": "1891 1121 3502 1543 1018 477 528 163 1657"}], "utt": "BAC009S0764W0163", "utt2spk": "S0764"}

>> Test Wav: 
>> wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
### U2 chunk conformer
>> UiDecoder
>> Make sure `reverse_weight` in config is `0.0`
>> https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2_wenetspeech_ckpt_1.1.0.model.tar.gz
```
tar zxvf asr1_chunk_conformer_u2_wenetspeech_ckpt_1.1.0.model.tar.gz
./local/export.sh conf/chunk_conformer.yaml exp/chunk_conformer/checkpoints/avg_10 ./export.ji
```

### U2++ chunk conformer
>> BiDecoder
>> https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.0.model.tar.gz
>> Make sure `reverse_weight` in config is not `0.0`

```
./local/export.sh conf/chunk_conformer_u2pp.yaml exp/chunk_conformer/checkpoints/avg_10 ./export.ji
```


================================================
FILE: examples/wenetspeech/asr1/RESULTS.md
================================================
# WenetSpeech

## Conformer Streaming

| Model | Params | Config | Augmentation| Test set | Decode method | Valid Loss | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- |
| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug  | test net | attention | 9.329 | 0.1102 |  
| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug  | test net | ctc_greedy_search | 9.329 | 0.1207 |  
| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug  | test net | ctc_prefix_beam_search | 9.329 | 0.1203 |  
| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug  | test net | attention_rescoring | 9.329  | 0.1100 |  
| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug  | test meeting | attention | 9.329 | 0.1992 |  
| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug  | test meeting | ctc_greedy_search | 9.329 | 0.1960 |  
| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug  | test meeting | ctc_prefix_beam_search | 9.329 | 0.1946 |  
| conformer_online | 123.47 M | conf/chunk_conformer.yaml | spec_aug  | test meeting | attention_rescoring | 9.329  | 0.1879|  

## Conformer

| Model | Params | Config | Augmentation| Test set | Decode method | Loss | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- |
| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | dev | attention |  |  |  
| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | test net | ctc_greedy_search |  |  |  
| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | test meeting | ctc_prefix_beam_search |  |  |  
| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | test net | attention_rescoring |  |  |  


## Conformer Pretrain Model

Pretrain model from http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wenetspeech/20211025_conformer_exp.tar.gz

| Model | Params | Config | Augmentation| Test set | Decode method | Loss | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- |
| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | attention | - | 0.048456 |  
| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | - | 0.052534 |  
| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | - | 0.052915 |  
| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | attention_rescoring | - | 0.047904 |  


## Conformer Steaming Pretrained Model

Pretrain model from https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz

| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- |
| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention | 16 | 0.056273 |  
| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | 16 | 0.078918 |  
| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | 16 | 0.079080 |  
| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention_rescoring | 16 | 0.054401 |

| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- |
| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention | -1 | 0.050767 |  
| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | -1 | 0.061884 |  
| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | -1 | 0.062056 |  
| conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention_rescoring | -1 |  0.052110 |


## U2PP Steaming Pretrained Model

Pretrain model from https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz

| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- |
| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention | 16 | 0.057031 |  
| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | 16 | 0.068826 |  
| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | 16 | 0.069111 |  
| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention_rescoring | 16 | 0.059213 |

| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size | CER |  
| --- | --- | --- | --- | --- | --- | --- | --- |
| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention | -1 | 0.049256 |  
| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | -1 | 0.052086 |  
| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | -1 | 0.052267 |  
| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention_rescoring | -1 |  0.047198 |


================================================
FILE: examples/wenetspeech/asr1/conf/chunk_conformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 512    # dimension of attention
    attention_heads: 8
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    use_cnn_module: True
    cnn_module_kernel: 15
    activation_type: swish
    pos_enc_layer_type: rel_pos
    selfattention_layer_type: rel_selfattn
    causal: true
    use_dynamic_chunk: true
    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
    use_dynamic_left_chunk: false
# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 8
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    reverse_weight: 0.0 # unidecoder
    length_normalized_loss: false
    init_type: 'kaiming_uniform' 

# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/train_l/data.list
dev_manifest: data/dev/data.list
test_manifest: data/test_meeting/data.list

###########################################
#              Dataloader                 #
###########################################
use_streaming_data: True
unit_type: 'char'
vocab_filepath: data/lang_char/vocab.txt 
preprocess_config: conf/preprocess.yaml
spm_model_prefix: ''
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32
do_filter: True
maxlen_in: 1200  # if do_filter == False && input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 100  # if do_filter == False && output length > maxlen-out, batchsize is automatically reduced
minlen_in: 10
minlen_out: 0
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1


###########################################
#                 Training                #
###########################################
n_epoch: 26
accum_grad: 32
global_grad_clip: 5.0
dist_sampler: True
log_interval: 1
checkpoint:
  kbest_n: 50
  latest_n: 5
optim: adam
optim_conf:
  lr: 0.001
  weight_decay: 1.0e-6
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 5000
  lr_decay: 1.0


================================================
FILE: examples/wenetspeech/asr1/conf/chunk_conformer_u2pp.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file: 
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 512    # dimension of attention
    attention_heads: 8
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.1
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    use_cnn_module: True
    cnn_module_kernel: 15
    activation_type: swish
    pos_enc_layer_type: rel_pos
    selfattention_layer_type: rel_selfattn
    causal: true
    use_dynamic_chunk: true
    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
    use_dynamic_left_chunk: false
# decoder related
decoder: bitransformer
decoder_conf:
    attention_heads: 8
    linear_units: 2048
    num_blocks: 3     # the number of encoder blocks
    r_num_blocks: 3   #only for bitransformer
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.1
    src_attention_dropout_rate: 0.1

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false
    reverse_weight: 0.3    # only for bitransformer decoder
    init_type: 'kaiming_uniform' # !Warning: need to convergence

###########################################
#                   Data                  #
###########################################
train_manifest: data/train_l/data.list
dev_manifest: data/dev/data.list
test_manifest: data/test_meeting/data.list

###########################################
#              Dataloader                 #
###########################################
use_stream_data: True
vocab_filepath: data/lang_char/vocab.txt 
unit_type: 'char'
preprocess_config: conf/preprocess.yaml
spm_model_prefix: ''
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32
do_filter: True
maxlen_in: 1200  # if do_filter == False && input length  > maxlen-in, batchsize is automatically reduced
maxlen_out: 100  # if do_filter == False && output length > maxlen-out, batchsize is automatically reduced
minlen_in: 10
minlen_out: 0
minibatches: 0 # for debug
batch_count: auto
batch_bins: 0 
batch_frames_in: 0
batch_frames_out: 0
batch_frames_inout: 0
num_workers: 0
subsampling_factor: 1
num_encs: 1

###########################################
#                 Training                #
###########################################
n_epoch: 150 
accum_grad: 8
global_grad_clip: 5.0
dist_sampler: False
optim: adam
optim_conf:
  lr: 0.002
  weight_decay: 1.0e-6
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 25000
  lr_decay: 1.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5


================================================
FILE: examples/wenetspeech/asr1/conf/conformer.yaml
================================================
############################################
#           Network Architecture           #
############################################
cmvn_file_type: "json"
# encoder related
encoder: conformer
encoder_conf:
    output_size: 512    # dimension of attention
    attention_heads: 8
    linear_units: 2048  # the number of units of position-wise feed forward
    num_blocks: 12      # the number of encoder blocks
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    attention_dropout_rate: 0.0
    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
    normalize_before: True
    use_cnn_module: True
    cnn_module_kernel: 15
    cnn_module_norm: layer_norm
    activation_type: swish
    pos_enc_layer_type: rel_pos
    selfattention_layer_type: rel_selfattn

# decoder related
decoder: transformer
decoder_conf:
    attention_heads: 8
    linear_units: 2048
    num_blocks: 6
    dropout_rate: 0.1
    positional_dropout_rate: 0.1
    self_attention_dropout_rate: 0.0
    src_attention_dropout_rate: 0.0

# hybrid CTC/attention
model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false
    init_type: 'kaiming_uniform' # !Warning: need to convergence

# https://yaml.org/type/float.html
###########################################
#                   Data                  #
###########################################
train_manifest: data/train_l/data.list
dev_manifest: data/dev/data.list
test_manifest: data/test_meeting/data.list

###########################################
#              Dataloader                 #
###########################################
use_stream_data: True
unit_type: 'char'
vocab_filepath: data/lang_char/vocab.txt 
preprocess_config: conf/preprocess.yaml
cmvn_file: data/mean_std.json
spm_model_prefix: ''
feat_dim: 80
stride_ms: 10.0
window_ms: 25.0
dither: 0.1
sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
batch_size: 32
minlen_in: 10
maxlen_in: 1200  # if input length(number of frames) > maxlen-in, data is automatically removed
minlen_out: 0
maxlen_out: 150  # if output length(number of tokens) > maxlen-out, data is automatically removed
resample_rate: 16000
shuffle_size: 1500 # read number of 'shuffle_size' data as a chunk, shuffle the data in the chunk
sort_size: 1000  # read number of 'sort_size' data as a chunk, sort the data in the chunk 
num_workers: 8
prefetch_factor: 10
dist_sampler: True
num_encs: 1

###########################################
#                 Training                #
###########################################
n_epoch: 32 
accum_grad: 32
global_grad_clip: 5.0
log_interval: 100
checkpoint:
  kbest_n: 50
  latest_n: 5
optim: adam
optim_conf:
  lr: 0.001
  weight_decay: 1.0e-6
scheduler: warmuplr     
scheduler_conf:
  warmup_steps: 5000
  lr_decay: 1.0


================================================
FILE: examples/wenetspeech/asr1/conf/preprocess.yaml
================================================
process:
  # extract kaldi fbank from PCM
  - type: fbank_kaldi
    fs: 16000
    n_mels: 80
    n_shift: 160
    win_length: 400
    dither: 1.0
  - type: cmvn_json
    cmvn_path: data/mean_std.json
  # these three processes are a.k.a. SpecAugument
  - type: time_warp
    max_time_warp: 5
    inplace: true
    mode: PIL
  - type: freq_mask
    F: 30
    n_mask: 2
    inplace: true
    replace_with_zero: false
  - type: time_mask
    T: 40
    n_mask: 2
    inplace: true
    replace_with_zero: false


================================================
FILE: examples/wenetspeech/asr1/conf/tuning/chunk_decode.yaml
================================================
beam_size: 10
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
reverse_weight: 0.3 # reverse weight for attention rescoring decode mode.
decoding_chunk_size: 16 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: True  # simulate streaming inference. Defaults to False.
decode_batch_size: 128
error_rate_type: cer 


================================================
FILE: examples/wenetspeech/asr1/conf/tuning/decode.yaml
================================================
beam_size: 10
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
reverse_weight: 0.3 # reverse weight for attention rescoring decode mode.
decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
simulate_streaming: False  # simulate streaming inference. Defaults to False.
decode_batch_size: 128
error_rate_type: cer 


================================================
FILE: examples/wenetspeech/asr1/local/data.sh
================================================
#!/bin/bash

# Copyright 2021  Mobvoi Inc(Author: Di Wu, Binbin Zhang)
#                 NPU, ASLP Group (Author: Qijie Shao)
#
# Modified from wenet(https://github.com/wenet-e2e/wenet)

stage=-1
stop_stage=100

# Use your own data path. You need to download the WenetSpeech dataset by yourself.
wenetspeech_data_dir=./wenetspeech
# Make sure you have 1.2T for ${shards_dir}
shards_dir=./wenetspeech_shards

#wenetspeech training set
set=L
train_set=train_`echo $set | tr 'A-Z' 'a-z'`
dev_set=dev
test_sets="test_net test_meeting"

cmvn=true
cmvn_sampling_divisor=20 # 20 means 5% of the training data to estimate cmvn


. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
set -u
set -o pipefail


mkdir -p data
TARGET_DIR=${MAIN_ROOT}/dataset
mkdir -p ${TARGET_DIR}

if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then
    # download data
    echo "Please follow https://github.com/wenet-e2e/WenetSpeech to download the data."
    exit 0;
fi

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    echo "Data preparation"
    local/wenetspeech_data_prep.sh \
        --train-subset $set \
        $wenetspeech_data_dir \
        data || exit 1;
fi

dict=data/lang_char/vocab.txt
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "Make a dictionary"
    echo "dictionary: ${dict}"
    mkdir -p $(dirname $dict)
    echo "<blank>" > ${dict} # 0 will be used for "blank" in CTC
    echo "<unk>" >> ${dict} # <unk> must be 1
    echo "▁" >> ${dict} # ▁ is for space
    utils/text2token.py -s 1 -n 1 --space "▁" data/${train_set}/text \
        | cut -f 2- -d" " | tr " " "\n" \
        | sort | uniq | grep -a -v -e '^\s*$' \
        | grep -v "▁" \
        | awk '{print $0}' >> ${dict} \
        || exit 1;
    echo "<eos>" >> $dict
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
  echo "Compute cmvn"
  # Here we use all the training data, you can sample some some data to save time
  # BUG!!! We should use the segmented data for CMVN
  if $cmvn; then
    full_size=`cat data/${train_set}/wav.scp | wc -l`
    sampling_size=$((full_size / cmvn_sampling_divisor))
    shuf -n $sampling_size data/$train_set/wav.scp \
      > data/$train_set/wav.scp.sampled
    python3 utils/compute_cmvn_stats.py \
    --num_workers 16 \
    --train_config $train_config \
    --in_scp data/$train_set/wav.scp.sampled \
    --out_cmvn data/$train_set/mean_std.json \
    || exit 1;
  fi
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
  echo "Making shards, please wait..."
  RED='\033[0;31m'
  NOCOLOR='\033[0m'
  echo -e "It requires ${RED}1.2T ${NOCOLOR}space for $shards_dir, please make sure you have enough space"
  echo -e "It takes about ${RED}12 ${NOCOLOR}hours with 32 threads"
  for x in $dev_set $test_sets ${train_set}; do
    dst=$shards_dir/$x
    mkdir -p $dst
    utils/make_filted_shard_list.py --num_node 1 --num_gpus_per_node 8 --num_utts_per_shard 1000 \
      --do_filter --resample 16000  \
      --num_threads 32 --segments data/$x/segments \
      data/$x/wav.scp data/$x/text \
      $(realpath $dst) data/$x/data.list
  done
fi

echo "Wenetspeech data preparation done."
exit 0


================================================
FILE: examples/wenetspeech/asr1/local/export.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: $0 config_path ckpt_prefix jit_model_path"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3


# export can not using StreamdataDataloader, set use_stream_dta False
# u2: reverse_weight should be 0.0
# u2pp: reverse_weight should be same with config file. e.g. 0.3
python3 -u ${BIN_DIR}/export.py \
--ngpu ${ngpu} \
--config ${config_path} \
--opts use_stream_data False \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}


if [ $? -ne 0 ]; then
    echo "Failed in export!"
    exit 1
fi


exit 0


================================================
FILE: examples/wenetspeech/asr1/local/extract_meta.py
================================================
# Copyright 2021  Xiaomi Corporation (Author: Yongqing Wang)
#                 Mobvoi Inc(Author: Di Wu, Binbin Zhang)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
import os
import sys


def get_args():
    parser = argparse.ArgumentParser(description="""
      This script is used to process raw json dataset of WenetSpeech,
      where the long wav is splitinto segments and
      data of wenet format is generated.
      """)
    parser.add_argument('input_json', help="""Input json file of WenetSpeech""")
    parser.add_argument('output_dir', help="""Output dir for prepared data""")

    args = parser.parse_args()
    return args


def meta_analysis(input_json, output_dir):
    input_dir = os.path.dirname(input_json)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    try:
        with open(input_json, 'r') as injson:
            json_data = json.load(injson)
    except Exception:
        sys.exit(f'Failed to load input json file: {input_json}')
    else:
        if json_data['audios'] is not None:
            with open(f'{output_dir}/text', 'w') as utt2text, \
                 open(f'{output_dir}/segments', 'w') as segments, \
                 open(f'{output_dir}/utt2dur', 'w') as utt2dur, \
                 open(f'{output_dir}/wav.scp', 'w') as wavscp, \
                 open(f'{output_dir}/utt2subsets', 'w') as utt2subsets, \
                 open(f'{output_dir}/reco2dur', 'w') as reco2dur:
                for long_audio in json_data['audios']:
                    try:
                        long_audio_path = os.path.realpath(
                            os.path.join(input_dir, long_audio['path']))
                        aid = long_audio['aid']
                        segments_lists = long_audio['segments']
                        duration = long_audio['duration']
                        assert (os.path.exists(long_audio_path))
                    except AssertionError:
                        print(f'''Warning: {aid} something is wrong,
                                  maybe AssertionError, skipped''')
                        continue
                    except Exception:
                        print(f'''Warning: {aid} something is wrong, maybe the
                                  error path: {long_audio_path}, skipped''')
                        continue
                    else:
                        wavscp.write(f'{aid}\t{long_audio_path}\n')
                        reco2dur.write(f'{aid}\t{duration}\n')
                        for segment_file in segments_lists:
                            try:
                                sid = segment_file['sid']
                                start_time = segment_file['begin_time']
                                end_time = segment_file['end_time']
                                dur = end_time - start_time
                                text = segment_file['text']
                                segment_subsets = segment_file["subsets"]
                            except Exception:
                                print(f'''Warning: {segment_file} something
                                          is wrong, skipped''')
                                continue
                            else:
                                utt2text.write(f'{sid}\t{text}\n')
                                segments.write(
                                    f'{sid}\t{aid}\t{start_time}\t{end_time}\n')
                                utt2dur.write(f'{sid}\t{dur}\n')
                                segment_sub_names = " ".join(segment_subsets)
                                utt2subsets.write(
                                    f'{sid}\t{segment_sub_names}\n')


def main():
    args = get_args()

    meta_analysis(args.input_json, args.output_dir)


if __name__ == '__main__':
    main()


================================================
FILE: examples/wenetspeech/asr1/local/process_opus.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2021  NPU, ASLP Group (Author: Qijie Shao)
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# process_opus.py: segmentation and downsampling of opus audio
# usage: python3 process_opus.py wav.scp segments output_wav.scp
import os
import sys

from pydub import AudioSegment


def read_file(wav_scp, segments):
    wav_scp_dict = {}
    with open(wav_scp, 'r', encoding='UTF-8') as fin:
        for line_str in fin:
            wav_id, path = line_str.strip().split()
            wav_scp_dict[wav_id] = path

    utt_list = []
    seg_path_list = []
    start_time_list = []
    end_time_list = []
    with open(segments, 'r', encoding='UTF-8') as fin:
        for line_str in fin:
            arr = line_str.strip().split()
            assert len(arr) == 4
            utt_list.append(arr[0])
            seg_path_list.append(wav_scp_dict[arr[1]])
            start_time_list.append(float(arr[2]))
            end_time_list.append(float(arr[3]))
    return utt_list, seg_path_list, start_time_list, end_time_list


# TODO(Qijie): Fix the process logic
def output(output_wav_scp, utt_list, seg_path_list, start_time_list,
           end_time_list):
    num_utts = len(utt_list)
    step = int(num_utts * 0.01)
    with open(output_wav_scp, 'w', encoding='UTF-8') as fout:
        previous_wav_path = ""
        for i in range(num_utts):
            utt_id = utt_list[i]
            current_wav_path = seg_path_list[i]
            output_dir = (os.path.dirname(current_wav_path)) \
                .replace("audio", 'audio_seg')
            seg_wav_path = os.path.join(output_dir, utt_id + '.wav')

            # if not os.path.exists(output_dir):
            #     os.makedirs(output_dir)

            if current_wav_path != previous_wav_path:
                source_wav = AudioSegment.from_file(current_wav_path)
            previous_wav_path = current_wav_path

            start = int(start_time_list[i] * 1000)
            end = int(end_time_list[i] * 1000)
            target_audio = source_wav[start:end].set_frame_rate(16000)
            target_audio.export(seg_wav_path, format="wav")

            fout.write("{} {}\n".format(utt_id, seg_wav_path))
            if i % step == 0:
                print("seg wav finished: {}%".format(int(i / step)))


def main():
    wav_scp = sys.argv[1]
    segments = sys.argv[2]
    output_wav_scp = sys.argv[3]

    utt_list, seg_path_list, start_time_list, end_time_list \
        = read_file(wav_scp, segments)
    output(output_wav_scp, utt_list, seg_path_list, start_time_list,
           end_time_list)


if __name__ == '__main__':
    main()


================================================
FILE: examples/wenetspeech/asr1/local/quant.sh
================================================
#!/bin/bash

# ./local/quant.sh conf/chunk_conformer_u2pp.yaml conf/tuning/chunk_decode.yaml exp/chunk_conformer_u2pp/checkpoints/avg_10 data/wav.aishell.test.scp 
if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_scp"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_scp=$4

mkdir -p data
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_scp} ]; then
    echo "Plase input the right audio_scp path"
    exit 1
fi


chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

for type in  attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/quant.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size} \
    --num_utts 200 \
    --audio_scp ${audio_scp}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done
exit 0


================================================
FILE: examples/wenetspeech/asr1/local/test.sh
================================================
#!/bin/bash

if [ $# != 3 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi


for type in attention ctc_greedy_search; do
    echo "decoding ${type}"
    if [ ${chunk_mode} == true ];then
        # stream decoding only support batchsize=1
        batch_size=1
    else
        batch_size=64
    fi
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

for type in ctc_prefix_beam_search attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done

exit 0


================================================
FILE: examples/wenetspeech/asr1/local/test_wav.sh
================================================
#!/bin/bash

if [ $# != 4 ];then
    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

config_path=$1
decode_config_path=$2
ckpt_prefix=$3
audio_file=$4

mkdir -p data
wget -nc https://paddlespeech.cdn.bcebos.com/datasets/single_wav/zh/demo_01_03.wav -P data/
if [ $? -ne 0 ]; then
   exit 1
fi

if [ ! -f ${audio_file} ]; then
    echo "Plase input the right audio_file path"
    exit 1
fi


chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi

# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

for type in  attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test_wav.py \
    --debug True \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_cfg ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decode.decoding_method ${type} \
    --opts decode.decode_batch_size ${batch_size} \
    --audio_file ${audio_file}

    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
        exit 1
    fi
done
exit 0


================================================
FILE: examples/wenetspeech/asr1/local/train.sh
================================================
#!/bin/bash

profiler_options=
benchmark_batch_size=0
benchmark_max_step=0

# seed may break model convergence
seed=0

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

if [ ${seed} != 0  ]; then
    export FLAGS_cudnn_deterministic=True
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
fi

if [ $# -lt 2 ] && [ $# -gt 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name ips(optional)"
    exit -1
fi

config_path=$1
ckpt_name=$2
ips=$3

if [ ! $ips ];then
  ips_config=
else
  ips_config="--ips="${ips}
fi
echo ${ips_config}

mkdir -p exp

# default memory allocator strategy may case gpu training hang
# for no OOM raised when memory exhausted
export FLAGS_allocator_strategy=naive_best_fit

if [ ${ngpu} == 0 ]; then
python3 -u ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--seed ${seed} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler-options "${profiler_options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}
else
NCCL_SOCKET_IFNAME=eth0 python3 -m paddle.distributed.launch --gpus=${CUDA_VISIBLE_DEVICES} ${ips_config} ${BIN_DIR}/train.py \
--ngpu ${ngpu} \
--seed ${seed} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler-options "${profiler_options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}
fi


if [ ${seed} != 0  ]; then
    unset FLAGS_cudnn_deterministic
fi

if [ $? -ne 0 ]; then
    echo "Failed in training!"
    exit 1
fi

exit 0


================================================
FILE: examples/wenetspeech/asr1/local/wenetspeech_data_prep.sh
================================================
#!/usr/bin/env bash

# Copyright 2021  Xiaomi Corporation (Author: Yongqing Wang)
#                 Seasalt AI, Inc (Author: Guoguo Chen)
#                 Mobvoi Inc(Author: Di Wu, Binbin Zhang)
#                 NPU, ASLP Group (Author: Qijie Shao)

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
set -o pipefail

stage=1
prefix=
train_subset=L

. ./utils/parse_options.sh || exit 1;

filter_by_id () {
  idlist=$1
  input=$2
  output=$3
  field=1
  if [ $# -eq 4 ]; then
    field=$4
  fi
  cat $input | perl -se '
    open(F, "<$idlist") || die "Could not open id-list file $idlist";
    while(<F>) {
      @A = split;
      @A>=1 || die "Invalid id-list file line $_";
      $seen{$A[0]} = 1;
    }
    while(<>) {
      @A = split;
      @A > 0 || die "Invalid file line $_";
      @A >= $field || die "Invalid file line $_";
      if ($seen{$A[$field-1]}) {
        print $_;
      }
    }' -- -idlist="$idlist" -field="$field" > $output ||\
  (echo "$0: filter_by_id() error: $input" && exit 1) || exit 1;
}

subset_data_dir () {
  utt_list=$1
  src_dir=$2
  dest_dir=$3
  mkdir -p $dest_dir || exit 1;
  # wav.scp text segments utt2dur
  filter_by_id $utt_list $src_dir/utt2dur $dest_dir/utt2dur ||\
    (echo "$0: subset_data_dir() error: $src_dir/utt2dur" && exit 1) || exit 1;
  filter_by_id $utt_list $src_dir/text $dest_dir/text ||\
    (echo "$0: subset_data_dir() error: $src_dir/text" && exit 1) || exit 1;
  filter_by_id $utt_list $src_dir/segments $dest_dir/segments ||\
    (echo "$0: subset_data_dir() error: $src_dir/segments" && exit 1) || exit 1;
  awk '{print $2}' $dest_dir/segments | sort | uniq > $dest_dir/reco
  filter_by_id $dest_dir/reco $src_dir/wav.scp $dest_dir/wav.scp ||\
    (echo "$0: subset_data_dir() error: $src_dir/wav.scp" && exit 1) || exit 1;
  rm -f $dest_dir/reco
}

if [ $# -ne 2 ]; then
  echo "Usage: $0 [options] <wenetspeech-dataset-dir> <data-dir>"
  echo " e.g.: $0 --train-subset L /disk1/audio_data/wenetspeech/ data/"
  echo ""
  echo "This script takes the WenetSpeech source directory, and prepares the"
  echo "WeNet format data directory."
  echo "  --prefix <prefix>                # Prefix for output data directory."
  echo "  --stage <stage>                  # Processing stage."
  echo "  --train-subset <L|M|S|W>     # Train subset to be created."
  exit 1
fi

wenetspeech_dir=$1
data_dir=$2

declare -A subsets
subsets=(
  [L]="train_l"
  [M]="train_m"
  [S]="train_s"
  [W]="train_w"
  [DEV]="dev"
  [TEST_NET]="test_net"
  [TEST_MEETING]="test_meeting")

prefix=${prefix:+${prefix}_}

corpus_dir=$data_dir/${prefix}corpus/
if [ $stage -le 1 ]; then
  echo "$0: Extract meta into $corpus_dir"
  # Sanity check.
  [ ! -f $wenetspeech_dir/WenetSpeech.json ] &&\
    echo "$0: Please download $wenetspeech_dir/WenetSpeech.json!" && exit 1;
  [ ! -d $wenetspeech_dir/audio ] &&\
    echo "$0: Please download $wenetspeech_dir/audio!" && exit 1;

  [ ! -d $corpus_dir ] && mkdir -p $corpus_dir

  # Files to be created:
  # wav.scp text segments utt2dur
  python3 local/extract_meta.py \
    $wenetspeech_dir/WenetSpeech.json $corpus_dir || exit 1;
fi

if [ $stage -le 2 ]; then
  echo "$0: Split data to train, dev, test_net, and test_meeting"
  [ ! -f $corpus_dir/utt2subsets ] &&\
    echo "$0: No such file $corpus_dir/utt2subsets!" && exit 1;
  for label in $train_subset DEV TEST_NET TEST_MEETING; do
    if [ ! ${subsets[$label]+set} ]; then
      echo "$0: Subset $label is not defined in WenetSpeech.json." && exit 1;
    fi
    subset=${subsets[$label]}
    [ ! -d $data_dir/${prefix}$subset ] && mkdir -p $data_dir/${prefix}$subset
    cat $corpus_dir/utt2subsets | \
       awk -v s=$label '{for (i=2;i<=NF;i++) if($i==s) print $0;}' \
       > $corpus_dir/${prefix}${subset}_utt_list|| exit 1;
    subset_data_dir $corpus_dir/${prefix}${subset}_utt_list \
      $corpus_dir $data_dir/${prefix}$subset || exit 1;
  done
fi

echo "$0: Done"


================================================
FILE: examples/wenetspeech/asr1/path.sh
================================================
export MAIN_ROOT=`realpath ${PWD}/../../../`

export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C

export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}

export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/

# model exp
MODEL=u2
export BIN_DIR=${MAIN_ROOT}/paddlespeech/s2t/exps/${MODEL}/bin


================================================
FILE: examples/wenetspeech/asr1/run.sh
================================================
#!/bin/bash

. path.sh || exit 1;
set -e

gpus=0,1,2,3,4,5,6,7
stage=0
stop_stage=100
conf_path=conf/conformer.yaml
ips=  #xxx.xxx.xxx.xxx,xxx.xxx.xxx.xxx
decode_conf_path=conf/tuning/decode.yaml
average_checkpoint=true
avg_num=10

. ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
echo "checkpoint name ${ckpt}"

audio_file="data/demo_01_03.wav"

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    bash ./local/data.sh || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path}  ${ckpt} ${ips}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # ctc alignment of test data
    CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # export ckpt avg_n
    CUDA_VISIBLE_DEVICES=0 ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi

if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
    # test a single .wav file
    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
fi

if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
    # export quant model, plesae see local/quant.sh
fi


================================================
FILE: examples/zh_en_tts/tts3/.gitignore
================================================
data
exp


================================================
FILE: examples/zh_en_tts/tts3/README.md
================================================

# Mixed Chinese and English TTS with CSMSC, LJSpeech-1.1, AISHELL-3 and VCTK datasets

This example contains code used to train a [Fastspeech2](https://arxiv.org/abs/2006.04558) model with [CSMSC](https://www.data-baker.com/open_source.html), [LJSpeech-1.1](https://keithito.com/LJ-Speech-Dataset/), [AISHELL3](http://www.aishelltech.com/aishell_3) and [VCTK](https://datashare.ed.ac.uk/handle/10283/3443) datasets.


## Dataset
### Download and Extract
Download all datasets and extract it to `./data`:
- The CSMSC dataset is in the directory `./data/BZNSYP`
- The Ljspeech dataset is in the directory `./data/LJSpeech-1.1`
- The aishell3 dataset is in the directory `./data/data_aishell3`
- The vctk dataset is in the directory `./data/VCTK-Corpus-0.92`
 
### Get MFA Result and Extract
We use [MFA](https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner) to get durations for the fastspeech2 training.
You can download from here:
- [baker_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz)
- [ljspeech_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz)
- [aishell3_alignment_tone.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz) 
- [vctk_alignment.tar.gz](https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz)

Or train your MFA model reference to [mfa example](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/mfa) (use MFA1.x now) of our repo.

## Get Started
Assume the paths to the datasets are:
- `./data/BZNSYP`
- `./data/LJSpeech-1.1`
- `./data/data_aishell3` 
- `./data/VCTK-Corpus-0.92`

Assume the path to the MFA results of the datasets are:
- `./data/mfa/baker_alignment_tone`
- `./data/mfa/ljspeech_alignment`
- `./data/mfa/aishell3_alignment_tone`
- `./data/mfa/vctk_alignment`

Run the command below to
1. **source path**.
2. preprocess the dataset.
3. train the model.
4. synthesize wavs.
    - synthesize waveform from `metadata.jsonl`.
    - synthesize waveform from text file.
```bash
./run.sh
```

You can choose a range of stages you want to run, or set `stage` equal to `stop-stage` to use only one stage, for example, running the following command will only preprocess the dataset.
```bash
./run.sh --stage 0 --stop-stage 0
```

### Data Preprocessing
```bash
./local/preprocess.sh ${conf_path} ${datasets_root_dir} ${mfa_root_dir}
```
When it is done. A `dump` folder is created in the current directory. The structure of the dump folder is listed below.
```text
dump
├── dev
│   ├── norm
│   └── raw
├── phone_id_map.txt
├── speaker_id_map.txt
├── test
│   ├── norm
│   └── raw
└── train
    ├── energy_stats.npy
    ├── norm
    ├── pitch_stats.npy
    ├── raw
    └── speech_stats.npy
```
The dataset is split into 3 parts, namely `train`, `dev`, and` test`, each of which contains a `norm` and `raw` subfolder. The raw folder contains speech, pitch and energy features of each utterance, while the norm folder contains normalized ones. The statistics used to normalize features are computed from the training set, which is located in `dump/train/*_stats.npy`.

Also, there is a `metadata.jsonl` in each subfolder. It is a table-like file that contains phones, text_lengths, speech_lengths, durations, the path of speech features, the path of pitch features, a path of energy features, speaker, and id of each utterance.


### Model Training
`./local/train.sh` calls `${BIN_DIR}/train.py`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path}
```
Here's the complete help message.
```text
usage: train.py [-h] [--config CONFIG] [--train-metadata TRAIN_METADATA]
                [--dev-metadata DEV_METADATA] [--output-dir OUTPUT_DIR]
                [--ngpu NGPU] [--phones-dict PHONES_DICT]
                [--speaker-dict SPEAKER_DICT] [--voice-cloning VOICE_CLONING]

Train a FastSpeech2 model.

optional arguments:
  -h, --help            show this help message and exit
  --config CONFIG       fastspeech2 config file.
  --train-metadata TRAIN_METADATA
                        training data.
  --dev-metadata DEV_METADATA
                        dev data.
  --output-dir OUTPUT_DIR
                        output dir.
  --ngpu NGPU           if ngpu=0, use cpu.
  --phones-dict PHONES_DICT
                        phone vocabulary file.
  --speaker-dict SPEAKER_DICT
                        speaker id map file for multiple speaker model.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
```
1. `--config` is a config file in yaml format to overwrite the default config, which can be found at `conf/default.yaml`.
2. `--train-metadata` and `--dev-metadata` should be the metadata file in the normalized subfolder of `train` and `dev` in the `dump` folder.
3. `--output-dir` is the directory to save the results of the experiment. Checkpoints are saved in `checkpoints/` inside this directory.
4. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.
5. `--phones-dict` is the path of the phone vocabulary file.
6. `--speaker-dict` is the path of the speaker id map file when training a multi-speaker FastSpeech2.

We have **added module speaker classifier** with reference to [Learning to Speak Fluently in a Foreign Language: Multilingual Speech Synthesis and Cross-Language Voice Cloning](https://arxiv.org/pdf/1907.04448.pdf). The main parameter configuration: `config["model"]["enable_speaker_classifier"]`, `config["model"]["hidden_sc_dim"]` and `config["updater"]["spk_loss_scale"]` in `conf/default.yaml`. The current experimental results show that this module can decouple text information and speaker information, and more experiments are still being sorted out. This module is currently not enabled by default, if you are interested, you can try it yourself.


### Synthesizing
We use [parallel wavegan](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc1) as the default neural vocoder.
Download the pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) and unzip it.

When speaker is `174` (csmsc), use csmsc's vocoder is better than aishell3's, we recommend that you use [hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip), please check `stage 2`  of `synthesize_e2e.sh`.

But if speaker is `175` (ljspeech), we **don't** recommend you to use ljspeech's vocoder, because ljspeech's vocoders are trained on sample rate 22.05kHz, but this acoustic model is trained on sample rate 24kHz, you can use csmsc's vocoder also, because ljspeech and csmsc are both female speakers.

For speakers in aishell3 and vctk, we recommend you use aishell3 or vctk's vocoders, because ljspeech and csmsc are both female speakers, there vocoders may not perform well for male speakers in aishell3 and vctk, you can check speaker name and spk_id in `dump/speaker_id_map.txt` and check speakers' information ( Age / Gender / Accents / region, etc ) in [this issue](https://github.com/PaddlePaddle/PaddleSpeech/issues/1620) and choose the `spk_id` you want.


```bash
unzip pwg_aishell3_ckpt_0.5.zip
```
Parallel WaveGAN checkpoint contains files listed below.
```text
pwg_aishell3_ckpt_0.5
├── default.yaml                   # default config used to train parallel wavegan
├── feats_stats.npy                # statistics used to normalize spectrogram when training parallel wavegan
└── snapshot_iter_1000000.pdz      # generator parameters of parallel wavegan
```
`./local/synthesize.sh` calls `${BIN_DIR}/../synthesize.py`, which can synthesize waveform from `metadata.jsonl`.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize.py [-h]
                     [--am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3, fastspeech2_mix}]
                     [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                     [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                     [--tones_dict TONES_DICT] [--speaker_dict SPEAKER_DICT]
                     [--voice-cloning VOICE_CLONING]
                     [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}]
                     [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                     [--voc_stat VOC_STAT] [--ngpu NGPU]
                     [--test_metadata TEST_METADATA] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech,tacotron2_aishell3, fastspeech2_mix}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --voice-cloning VOICE_CLONING
                        whether training voice cloning model.
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,wavernn_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,style_melgan_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --ngpu NGPU           if ngpu == 0, use cpu.
  --test_metadata TEST_METADATA
                        test metadata.
  --output_dir OUTPUT_DIR
                        output dir.


```
`./local/synthesize_e2e.sh` calls `${BIN_DIR}/../synthesize_e2e.py`, which can synthesize waveform from text file.
```bash
CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name}
```
```text
usage: synthesize_e2e.py [-h]
                         [--am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech, fastspeech2_mix}]
                         [--am_config AM_CONFIG] [--am_ckpt AM_CKPT]
                         [--am_stat AM_STAT] [--phones_dict PHONES_DICT]
                         [--tones_dict TONES_DICT]
                         [--speaker_dict SPEAKER_DICT] [--spk_id SPK_ID]
                         [--voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}]
                         [--voc_config VOC_CONFIG] [--voc_ckpt VOC_CKPT]
                         [--voc_stat VOC_STAT] [--lang LANG]
                         [--inference_dir INFERENCE_DIR] [--ngpu NGPU]
                         [--text TEXT] [--output_dir OUTPUT_DIR]

Synthesize with acoustic model & vocoder

optional arguments:
  -h, --help            show this help message and exit
  --am {speedyspeech_csmsc,speedyspeech_aishell3,fastspeech2_csmsc,fastspeech2_ljspeech,fastspeech2_aishell3,fastspeech2_vctk,tacotron2_csmsc,tacotron2_ljspeech, fastspeech2_mix}
                        Choose acoustic model type of tts task.
  --am_config AM_CONFIG
                        Config of acoustic model.
  --am_ckpt AM_CKPT     Checkpoint file of acoustic model.
  --am_stat AM_STAT     mean and standard deviation used to normalize
                        spectrogram when training acoustic model.
  --phones_dict PHONES_DICT
                        phone vocabulary file.
  --tones_dict TONES_DICT
                        tone vocabulary file.
  --speaker_dict SPEAKER_DICT
                        speaker id map file.
  --spk_id SPK_ID       spk id for multi speaker acoustic model
  --voc {pwgan_csmsc,pwgan_ljspeech,pwgan_aishell3,pwgan_vctk,mb_melgan_csmsc,style_melgan_csmsc,hifigan_csmsc,hifigan_ljspeech,hifigan_aishell3,hifigan_vctk,wavernn_csmsc}
                        Choose vocoder type of tts task.
  --voc_config VOC_CONFIG
                        Config of voc.
  --voc_ckpt VOC_CKPT   Checkpoint file of voc.
  --voc_stat VOC_STAT   mean and standard deviation used to normalize
                        spectrogram when training voc.
  --lang LANG           Choose model language. zh or en or mix
  --inference_dir INFERENCE_DIR
                        dir to save inference models
  --ngpu NGPU           if ngpu == 0, use cpu.
  --text TEXT           text to synthesize, a 'utt_id sentence' pair per line.
  --output_dir OUTPUT_DIR
                        output dir.
```
1. `--am` is acoustic model type with the format {model_name}_{dataset}
2. `--am_config`, `--am_ckpt`, `--am_stat`, `--phones_dict` `--speaker_dict` are arguments for acoustic model, which correspond to the 5 files in the fastspeech2 pretrained model.
3. `--voc` is vocoder type with the format {model_name}_{dataset}
4. `--voc_config`, `--voc_ckpt`, `--voc_stat` are arguments for vocoder, which correspond to the 3 files in the parallel wavegan pretrained model.
5. `--lang` is the model language, which can be `zh` or `en` or `mix`.
6. `--test_metadata` should be the metadata file in the normalized subfolder of `test`  in the `dump` folder.
7. `--text` is the text file, which contains sentences to synthesize.
8. `--output_dir` is the directory to save synthesized audio files.
9. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu.


## Pretrained Model

Pretrained FastSpeech2 model with no silence in the edge of audios:
- [fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)
- [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip)

The static model can be downloaded here:
- [fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip)

The PIR static model can be downloaded here:
- [fastspeech2_mix_static_pir_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_pir_0.2.0.zip) (Run PIR model need to set FLAGS_enable_pir_api=1, and PIR model only worked with paddlepaddle>=3.0.0b2)

The ONNX model can be downloaded here:
- [fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip)

FastSpeech2 checkpoint contains files listed below.

```text
fastspeech2_mix_ckpt_1.2.0
├── default.yaml            # default config used to train fastspeech2
├── energy_stats.npy        # statistics used to energy spectrogram when training fastspeech2
├── phone_id_map.txt        # phone vocabulary file when training fastspeech2
├── pitch_stats.npy         # statistics used to normalize pitch when training fastspeech2
├── snapshot_iter_99200.pdz # model parameters and optimizer states
├── speaker_id_map.txt      # speaker id map file when training a multi-speaker fastspeech2
└── speech_stats.npy        # statistics used to normalize spectrogram when training fastspeech2
```


You can use the following scripts to synthesize for `${BIN_DIR}/../sentences_mix.txt` using pretrained fastspeech2 and parallel wavegan models.
`174` means baker speaker, `175` means ljspeech speaker. For other speaker information, please see `speaker_id_map.txt`.

```bash
source path.sh

FLAGS_allocator_strategy=naive_best_fit \
FLAGS_fraction_of_gpu_memory_to_use=0.01 \
python3 ${BIN_DIR}/../synthesize_e2e.py \
  --am=fastspeech2_mix \
  --am_config=exp/pretrain/fastspeech2_mix_ckpt_1.2.0/default.yaml \
  --am_ckpt=exp/pretrain/fastspeech2_mix_ckpt_1.2.0/snapshot_iter_99200.pdz \
  --am_stat=exp/pretrain/fastspeech2_mix_ckpt_1.2.0/speech_stats.npy \
  --phones_dict=exp/pretrain/fastspeech2_mix_ckpt_1.2.0/phone_id_map.txt \
  --speaker_dict=exp/pretrain/fastspeech2_mix_ckpt_1.2.0/speaker_id_map.txt \
  --spk_id=174 \
  --voc=pwgan_aishell3 \
  --voc_config=exp/pretrain/pwg_aishell3_ckpt_0.5/default.yaml \
  --voc_ckpt=exp/pretrain/pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
  --voc_stat=exp/pretrain/pwg_aishell3_ckpt_0.5/feats_stats.npy \
  --lang=mix \
  --text=${BIN_DIR}/../../assets/sentences_mix.txt \
  --output_dir=exp/default/test_e2e \
  --inference_dir=exp/default/inference
```


================================================
FILE: examples/zh_en_tts/tts3/conf/default.yaml
================================================
###########################################################
#                FEATURE EXTRACTION SETTING               #
###########################################################

fs: 24000          # sr
n_fft: 2048        # FFT size (samples).
n_shift: 300       # Hop size (samples). 12.5ms
win_length: 1200   # Window length (samples). 50ms
                   # If set to null, it will be the same as fft_size.
window: "hann"     # Window function.

# Only used for feats_type != raw

fmin: 80           # Minimum frequency of Mel basis.
fmax: 7600         # Maximum frequency of Mel basis.
n_mels: 80         # The number of mel basis.

# Only used for the model using pitch features (e.g. FastSpeech2)
f0min: 80          # Minimum f0 for pitch extraction.
f0max: 400         # Maximum f0 for pitch extraction.


###########################################################
#                       DATA SETTING                      #
###########################################################
batch_size: 64
num_workers: 2


###########################################################
#                       MODEL SETTING                     #
###########################################################
model:
    adim: 384         # attention dimension
    aheads: 2         # number of attention heads
    elayers: 4        # number of encoder layers
    eunits: 1536      # number of encoder ff units
    dlayers: 4        # number of decoder layers
    dunits: 1536      # number of decoder ff units
    positionwise_layer_type: conv1d   # type of position-wise layer
    positionwise_conv_kernel_size: 3  # kernel size of position wise conv layer
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
    encoder_normalize_before: True    # whether to perform layer normalization before the input
    decoder_normalize_before: True    # whether to perform layer normalization before the input
    reduction_factor: 1               # reduction factor
    init_type: xavier_uniform         # initialization type
    init_enc_alpha: 1.0               # initial value of alpha of encoder scaled position encoding
    init_dec_alpha: 1.0               # initial value of alpha of decoder scaled position encoding
    transformer_enc_dropout_rate: 0.2            # dropout rate for transformer encoder layer
    transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding
    transformer_enc_attn_dropout_rate: 0.2       # dropout rate for transformer encoder attention layer
    transformer_dec_dropout_rate: 0.2            # dropout rate for transformer decoder layer
    transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
    stop_gradient_from_energy_predictor: False # whether to stop the gradient from energy predictor to encoder
    spk_embed_dim: 256                         # speaker embedding dimension
    spk_embed_integration_type: concat         # speaker embedding integration type
    enable_speaker_classifier: False           # Whether to use speaker classifier module
    hidden_sc_dim: 256                         # The hidden layer dim of speaker classifier
    

###########################################################
#                       UPDATER SETTING                   #
###########################################################
updater:
    use_masking: True                 # whether to apply masking for padded part in loss calculation
    spk_loss_scale: 0.02              # The scales of speaker classifier loss


###########################################################
#                     OPTIMIZER SETTING                   #
###########################################################
optimizer:
    optim: adam               # optimizer type
    learning_rate: 0.001      # learning rate

###########################################################
#                     TRAINING SETTING                    #
###########################################################
max_epoch: 200
num_snapshots: 5


###########################################################
#                       OTHER SETTING                     #
###########################################################
seed: 10086


================================================
FILE: examples/zh_en_tts/tts3/local/inference.sh
================================================
#!/bin/bash

train_output_path=$1

stage=0
stop_stage=0

# voc: pwgan_aishell3
# the spk_id=174 means baker speaker, default
# the spk_id=175 means ljspeech speaker
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_mix \
        --voc=pwgan_aishell3 \
        --text=${BIN_DIR}/../../assets/sentences_mix.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --lang=mix \
        --spk_id=174 
fi


# voc: hifigan_aishell3
# the spk_id=174 means baker speaker, default
# the spk_id=175 means ljspeech speaker
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_mix \
        --voc=hifigan_aishell3 \
        --text=${BIN_DIR}/../../assets/sentences_mix.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --lang=mix \
        --spk_id=174
fi

# voc: hifigan_csmsc
# when speaker is 174 (csmsc), use csmsc's vocoder is better than aishell3's
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../inference.py \
        --inference_dir=${train_output_path}/inference \
        --am=fastspeech2_mix \
        --voc=hifigan_csmsc \
        --text=${BIN_DIR}/../../assets/sentences_mix.txt \
        --output_dir=${train_output_path}/pd_infer_out \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --lang=mix \
        --spk_id=174
fi


================================================
FILE: examples/zh_en_tts/tts3/local/mfa_download.sh
================================================
#!/bin/bash

exp=exp
mfa=$exp/mfa

mkdir -p $mfa

pushd $mfa

wget -c https://paddlespeech.cdn.bcebos.com/MFA/BZNSYP/with_tone/baker_alignment_tone.tar.gz &
wget -c https://paddlespeech.cdn.bcebos.com/MFA/LJSpeech-1.1/ljspeech_alignment.tar.gz &
wget -c https://paddlespeech.cdn.bcebos.com/MFA/AISHELL-3/with_tone/aishell3_alignment_tone.tar.gz &
wget -c https://paddlespeech.cdn.bcebos.com/MFA/VCTK-Corpus-0.92/vctk_alignment.tar.gz &
wait

popd


================================================
FILE: examples/zh_en_tts/tts3/local/model_download.sh
================================================
#!/bin/bash

exp=exp
pretrain=$exp/pretrain

mkdir -p $pretrain

pushd $pretrain

wget -c https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip &
wget -c https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip &
wait

popd


================================================
FILE: examples/zh_en_tts/tts3/local/ort_predict.sh
================================================
train_output_path=$1

stage=0
stop_stage=0

# e2e, synthesize from text
# voc: pwgan_aishell3
# the spk_id=174 means baker speaker, default
# the spk_id=175 means ljspeech speaker
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_mix \
        --voc=pwgan_aishell3 \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/sentences_mix.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=4 \
        --lang=mix \
        --spk_id=174
fi


# voc: hifigan_aishell3
# the spk_id=174 means baker speaker, default
# the spk_id=175 means ljspeech speaker
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_mix \
        --voc=hifigan_aishell3 \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/sentences_mix.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=4 \
        --lang=mix \
        --spk_id=174
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    python3 ${BIN_DIR}/../ort_predict_e2e.py \
        --inference_dir=${train_output_path}/inference_onnx \
        --am=fastspeech2_mix \
        --voc=hifigan_csmsc \
        --output_dir=${train_output_path}/onnx_infer_out_e2e \
        --text=${BIN_DIR}/../../assets/sentences_mix.txt \
        --phones_dict=dump/phone_id_map.txt \
        --device=cpu \
        --cpu_threads=4 \
        --lang=mix \
        --spk_id=174
fi


================================================
FILE: examples/zh_en_tts/tts3/local/preprocess.sh
================================================
#!/bin/bash

stage=0
stop_stage=100

config_path=$1
datasets_root_dir=$2
mfa_root_dir=$3

# 1. get durations from MFA's result
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    echo "Generate durations_baker.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=${mfa_root_dir}/baker_alignment_tone \
        --output durations_baker.txt \
        --config=${config_path}
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "Generate durations_ljspeech.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=${mfa_root_dir}/ljspeech_alignment \
        --output durations_ljspeech.txt \
        --config=${config_path}
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "Generate durations_aishell3.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=${mfa_root_dir}/aishell3_alignment_tone \
        --output durations_aishell3.txt \
        --config=${config_path}
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    echo "Generate durations_vctk.txt from MFA results ..."
    python3 ${MAIN_ROOT}/utils/gen_duration_from_textgrid.py \
        --inputdir=${mfa_root_dir}/vctk_alignment \
        --output durations_vctk.txt \
        --config=${config_path}
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # concat duration file
    echo "concat durations_baker.txt, durations_ljspeech.txt, durations_aishell3.txt and durations_vctk.txt to durations.txt"
    cat durations_baker.txt durations_ljspeech.txt durations_aishell3.txt durations_vctk.txt > durations.txt
fi

# 2. extract features
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    echo "Extract baker features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=baker \
        --rootdir=${datasets_root_dir}/BZNSYP/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True \
        --write_metadata_method=a
fi

if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    echo "Extract ljspeech features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=ljspeech \
        --rootdir=${datasets_root_dir}/LJSpeech-1.1/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True \
        --write_metadata_method=a
fi

if [ ${stage} -le 7 ] && [ ${stop_stage} -ge 7 ]; then
    echo "Extract aishell3 features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=aishell3 \
        --rootdir=${datasets_root_dir}/data_aishell3/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True \
        --write_metadata_method=a
fi

if [ ${stage} -le 8 ] && [ ${stop_stage} -ge 8 ]; then
    echo "Extract vctk features ..."
    python3 ${BIN_DIR}/preprocess.py \
        --dataset=vctk \
        --rootdir=${datasets_root_dir}/VCTK-Corpus-0.92/ \
        --dumpdir=dump \
        --dur-file=durations.txt \
        --config=${config_path} \
        --num-cpu=20 \
        --cut-sil=True \
        --write_metadata_method=a
fi


# 3. get features' stats(mean and std)
if [ ${stage} -le 9 ] && [ ${stop_stage} -ge 9 ]; then
    echo "Get features' stats ..."
    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="speech"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="pitch"

    python3 ${MAIN_ROOT}/utils/compute_statistics.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --field-name="energy"
fi


# 4. normalize and covert phone/speaker to id, dev and test should use train's stats
if [ ${stage} -le 10 ] && [ ${stop_stage} -ge 10 ]; then
    echo "Normalize ..."
    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/train/raw/metadata.jsonl \
        --dumpdir=dump/train/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/dev/raw/metadata.jsonl \
        --dumpdir=dump/dev/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt

    python3 ${BIN_DIR}/normalize.py \
        --metadata=dump/test/raw/metadata.jsonl \
        --dumpdir=dump/test/norm \
        --speech-stats=dump/train/speech_stats.npy \
        --pitch-stats=dump/train/pitch_stats.npy \
        --energy-stats=dump/train/energy_stats.npy \
        --phones-dict=dump/phone_id_map.txt \
        --speaker-dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/zh_en_tts/tts3/local/synthesize.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=0

# voc: pwgan_aishell3
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_mix \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_aishell3 \
        --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
        --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
        --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt
fi


# voc: hifigan_aishell3
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize.py \
        --am=fastspeech2_mix \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --test_metadata=dump/test/norm/metadata.jsonl \
        --output_dir=${train_output_path}/test \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt
fi


================================================
FILE: examples/zh_en_tts/tts3/local/synthesize_e2e.sh
================================================
#!/bin/bash

config_path=$1
train_output_path=$2
ckpt_name=$3

stage=0
stop_stage=0

# voc: pwgan_aishell3
# the spk_id=174 means baker speaker, default.
# the spk_id=175 means ljspeech speaker
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_mix \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=pwgan_aishell3 \
        --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \
        --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \
        --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \
        --lang=mix \
        --text=${BIN_DIR}/../../assets/sentences_mix.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=174 \
        --inference_dir=${train_output_path}/inference
fi

# voc: hifigan_aishell3
# the spk_id=174 means baker speaker, default
# the spk_id=175 means ljspeech speaker
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    echo "in hifigan syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_mix \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_aishell3 \
        --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \
        --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \
        --lang=mix \
        --text=${BIN_DIR}/../../assets/sentences_mix.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=174 \
        --inference_dir=${train_output_path}/inference
fi


# voc: hifigan_csmsc
# when speaker is 174 (csmsc), use csmsc's vocoder is better than aishell3's
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    echo "in csmsc's hifigan syn_e2e"
    FLAGS_allocator_strategy=naive_best_fit \
    FLAGS_fraction_of_gpu_memory_to_use=0.01 \
    python3 ${BIN_DIR}/../synthesize_e2e.py \
        --am=fastspeech2_mix \
        --am_config=${config_path} \
        --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \
        --am_stat=dump/train/speech_stats.npy \
        --voc=hifigan_csmsc \
        --voc_config=hifigan_csmsc_ckpt_0.1.1/default.yaml \
        --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \
        --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \
        --lang=mix \
        --text=${BIN_DIR}/../../assets/sentences_mix.txt \
        --output_dir=${train_output_path}/test_e2e \
        --phones_dict=dump/phone_id_map.txt \
        --speaker_dict=dump/speaker_id_map.txt \
        --spk_id=174 \
        --inference_dir=${train_output_path}/inference
fi

================================================
FILE: examples/zh_en_tts/tts3/run.sh
================================================
#!/bin/bash

set -e
source path.sh

gpus=0,1
stage=0
stop_stage=100

datasets_root_dir=./data
mfa_root_dir=./data/mfa
conf_path=conf/default.yaml
train_output_path=exp/default
ckpt_name=snapshot_iter_99200.pdz


# with the following command, you can choose the stage range you want to run
# such as `./run.sh --stage 0 --stop-stage 0`
# this can not be mixed use with `$1`, `$2` ...
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # prepare data
    ./local/preprocess.sh ${conf_path} ${datasets_root_dir} ${mfa_root_dir} || exit -1
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `train_output_path/checkpoints/` dir
    CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${train_output_path} || exit -1
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # synthesize, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # synthesize_e2e, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/synthesize_e2e.sh ${conf_path} ${train_output_path} ${ckpt_name} || exit -1
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # inference with static model, vocoder is pwgan by default
    CUDA_VISIBLE_DEVICES=${gpus} ./local/inference.sh ${train_output_path} || exit -1
fi

if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # install paddle2onnx
    pip install paddle2onnx --upgrade
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx fastspeech2_mix
    # considering the balance between speed and quality, we recommend that you use hifigan as vocoder
    ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx pwgan_aishell3
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_aishell3
    # ./local/paddle2onnx.sh ${train_output_path} inference inference_onnx hifigan_csmsc
fi

# inference with onnxruntime, use fastspeech2 + pwgan by default
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
    ./local/ort_predict.sh ${train_output_path}
fi


================================================
FILE: paddlespeech/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import _locale
_locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8'])


================================================
FILE: paddlespeech/audio/.gitignore
================================================
fc_patch/


================================================
FILE: paddlespeech/audio/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import backends
from . import compliance
from . import datasets
from . import functional
from . import streamdata
from . import text
from . import transform


================================================
FILE: paddlespeech/audio/backends/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .soundfile_backend import depth_convert
from .soundfile_backend import load
from .soundfile_backend import normalize
from .soundfile_backend import resample
from .soundfile_backend import soundfile_load
from .soundfile_backend import soundfile_save
from .soundfile_backend import to_mono


================================================
FILE: paddlespeech/audio/backends/common.py
================================================
# Token from https://github.com/pytorch/audio/blob/main/torchaudio/backend/common.py with modification.


class AudioInfo:
    """return of info function.

    This class is used by :ref:`"sox_io" backend<sox_io_backend>` and
    :ref:`"soundfile" backend with the new interface<soundfile_backend>`.

    :ivar int sample_rate: Sample rate
    :ivar int num_frames: The number of frames
    :ivar int num_channels: The number of channels
    :ivar int bits_per_sample: The number of bits per sample. This is 0 for lossy formats,
        or when it cannot be accurately inferred.
    :ivar str encoding: Audio encoding
        The values encoding can take are one of the following:

            * ``PCM_S``: Signed integer linear PCM
            * ``PCM_U``: Unsigned integer linear PCM
            * ``PCM_F``: Floating point linear PCM
            * ``FLAC``: Flac, Free Lossless Audio Codec
            * ``ULAW``: Mu-law
            * ``ALAW``: A-law
            * ``MP3`` : MP3, MPEG-1 Audio Layer III
            * ``VORBIS``: OGG Vorbis
            * ``AMR_WB``: Adaptive Multi-Rate
            * ``AMR_NB``: Adaptive Multi-Rate Wideband
            * ``OPUS``: Opus
            * ``HTK``: Single channel 16-bit PCM
            * ``UNKNOWN`` : None of above
    """

    def __init__(
            self,
            sample_rate: int,
            num_frames: int,
            num_channels: int,
            bits_per_sample: int,
            encoding: str, ):
        self.sample_rate = sample_rate
        self.num_frames = num_frames
        self.num_channels = num_channels
        self.bits_per_sample = bits_per_sample
        self.encoding = encoding

    def __str__(self):
        return (f"AudioMetaData("
                f"sample_rate={self.sample_rate}, "
                f"num_frames={self.num_frames}, "
                f"num_channels={self.num_channels}, "
                f"bits_per_sample={self.bits_per_sample}, "
                f"encoding={self.encoding}"
                f")")


================================================
FILE: paddlespeech/audio/backends/soundfile_backend.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import warnings
from typing import Optional
from typing import Tuple

import numpy as np
import paddle
import resampy
import soundfile
from scipy.io import wavfile

from ..utils import depth_convert
from ..utils import ParameterError
from .common import AudioInfo

__all__ = [
    'resample',
    'to_mono',
    'normalize',
    'save',
    'soundfile_save',
    'load',
    'soundfile_load',
    'info',
]
NORMALMIZE_TYPES = ['linear', 'gaussian']
MERGE_TYPES = ['ch0', 'ch1', 'random', 'average']
RESAMPLE_MODES = ['kaiser_best', 'kaiser_fast']
EPS = 1e-8


def resample(y: np.ndarray,
             src_sr: int,
             target_sr: int,
             mode: str='kaiser_fast') -> np.ndarray:
    """Audio resampling.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        src_sr (int): Source sample rate.
        target_sr (int): Target sample rate.
        mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.

    Returns:
        np.ndarray: `y` resampled to `target_sr`
    """

    if mode == 'kaiser_best':
        warnings.warn(
            f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \
        we recommend the mode kaiser_fast in large scale audio training')

    if not isinstance(y, np.ndarray):
        raise ParameterError(
            'Only support numpy np.ndarray, but received y in {type(y)}')

    if mode not in RESAMPLE_MODES:
        raise ParameterError(f'resample mode must in {RESAMPLE_MODES}')

    return resampy.resample(y, src_sr, target_sr, filter=mode)


def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray:
    """Convert sterior audio to mono.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        merge_type (str, optional): Merge type to generate mono waveform. Defaults to 'average'.

    Returns:
        np.ndarray: `y` with mono channel.
    """

    if merge_type not in MERGE_TYPES:
        raise ParameterError(
            f'Unsupported merge type {merge_type}, available types are {MERGE_TYPES}'
        )
    if y.ndim > 2:
        raise ParameterError(
            f'Unsupported audio array,  y.ndim > 2, the shape is {y.shape}')
    if y.ndim == 1:  # nothing to merge
        return y

    if merge_type == 'ch0':
        return y[0]
    if merge_type == 'ch1':
        return y[1]
    if merge_type == 'random':
        return y[np.random.randint(0, 2)]

    # need to do averaging according to dtype

    if y.dtype == 'float32':
        y_out = (y[0] + y[1]) * 0.5
    elif y.dtype == 'int16':
        y_out = y.astype('int32')
        y_out = (y_out[0] + y_out[1]) // 2
        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
                        np.iinfo(y.dtype).max).astype(y.dtype)

    elif y.dtype == 'int8':
        y_out = y.astype('int16')
        y_out = (y_out[0] + y_out[1]) // 2
        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
                        np.iinfo(y.dtype).max).astype(y.dtype)
    else:
        raise ParameterError(f'Unsupported dtype: {y.dtype}')
    return y_out


def soundfile_load_(file: os.PathLike,
                    offset: Optional[float]=None,
                    dtype: str='int16',
                    duration: Optional[int]=None) -> Tuple[np.ndarray, int]:
    """Load audio using soundfile library. This function load audio file using libsndfile.

    Args:
        file (os.PathLike): File of waveform.
        offset (Optional[float], optional): Offset to the start of waveform. Defaults to None.
        dtype (str, optional): Data type of waveform. Defaults to 'int16'.
        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.

    Returns:
        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
    """
    with soundfile.SoundFile(file) as sf_desc:
        sr_native = sf_desc.samplerate
        if offset:
            sf_desc.seek(int(offset * sr_native))
        if duration is not None:
            frame_duration = int(duration * sr_native)
        else:
            frame_duration = -1
        y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T

    return y, sf_desc.samplerate


def normalize(y: np.ndarray, norm_type: str='linear',
              mul_factor: float=1.0) -> np.ndarray:
    """Normalize an input audio with additional multiplier.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
        mul_factor (float, optional): Scaling factor. Defaults to 1.0.

    Returns:
        np.ndarray: `y` after normalization.
    """

    if norm_type == 'linear':
        amax = np.max(np.abs(y))
        factor = 1.0 / (amax + EPS)
        y = y * factor * mul_factor
    elif norm_type == 'gaussian':
        amean = np.mean(y)
        astd = np.std(y)
        astd = max(astd, EPS)
        y = mul_factor * (y - amean) / astd
    else:
        raise NotImplementedError(f'norm_type should be in {NORMALMIZE_TYPES}')

    return y


def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
    """Save audio file to disk. This function saves audio to disk using scipy.io.wavfile, with additional step to convert input waveform to int16.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        sr (int): Sample rate.
        file (os.PathLike): Path of audio file to save.
    """
    if not file.endswith('.wav'):
        raise ParameterError(
            f'only .wav file supported, but dst file name is: {file}')

    if sr <= 0:
        raise ParameterError(
            f'Sample rate should be larger than 0, received sr = {sr}')

    if y.dtype not in ['int16', 'int8']:
        warnings.warn(
            f'input data type is {y.dtype}, will convert data to int16 format before saving'
        )
        y_out = depth_convert(y, 'int16')
    else:
        y_out = y

    wavfile.write(file, sr, y_out)


def soundfile_load(
        file: os.PathLike,
        sr: Optional[int]=None,
        mono: bool=True,
        merge_type: str='average',  # ch0,ch1,random,average
        normal: bool=True,
        norm_type: str='linear',
        norm_mul_factor: float=1.0,
        offset: float=0.0,
        duration: Optional[int]=None,
        dtype: str='float32',
        resample_mode: str='kaiser_fast') -> Tuple[np.ndarray, int]:
    """Load audio file from disk. This function loads audio from disk using using audio backend.

    Args:
        file (os.PathLike): Path of audio file to load.
        sr (Optional[int], optional): Sample rate of loaded waveform. Defaults to None.
        mono (bool, optional): Return waveform with mono channel. Defaults to True.
        merge_type (str, optional): Merge type of multi-channels waveform. Defaults to 'average'.
        normal (bool, optional): Waveform normalization. Defaults to True.
        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
        norm_mul_factor (float, optional): Scaling factor. Defaults to 1.0.
        offset (float, optional): Offset to the start of waveform. Defaults to 0.0.
        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.
        dtype (str, optional): Data type of waveform. Defaults to 'float32'.
        resample_mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.

    Returns:
        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
    """

    y, r = soundfile_load_(file, offset=offset, dtype=dtype, duration=duration)

    if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)):
        raise ParameterError(f'audio file {file} looks empty')

    if mono:
        y = to_mono(y, merge_type)

    if sr is not None and sr != r:
        y = resample(y, r, sr, mode=resample_mode)
        r = sr

    if normal:
        y = normalize(y, norm_type, norm_mul_factor)
    elif dtype in ['int8', 'int16']:
        # still need to do normalization, before depth conversion
        y = normalize(y, 'linear', 1.0)

    y = depth_convert(y, dtype)
    return y, r


#The code below is taken from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py, with some modifications.


def _get_subtype_for_wav(dtype: paddle.dtype,
                         encoding: str,
                         bits_per_sample: int):
    if not encoding:
        if not bits_per_sample:
            subtype = {
                paddle.uint8: "PCM_U8",
                paddle.int16: "PCM_16",
                paddle.int32: "PCM_32",
                paddle.float32: "FLOAT",
                paddle.float64: "DOUBLE",
            }.get(dtype)
            if not subtype:
                raise ValueError(f"Unsupported dtype for wav: {dtype}")
            return subtype
        if bits_per_sample == 8:
            return "PCM_U8"
        return f"PCM_{bits_per_sample}"
    if encoding == "PCM_S":
        if not bits_per_sample:
            return "PCM_32"
        if bits_per_sample == 8:
            raise ValueError("wav does not support 8-bit signed PCM encoding.")
        return f"PCM_{bits_per_sample}"
    if encoding == "PCM_U":
        if bits_per_sample in (None, 8):
            return "PCM_U8"
        raise ValueError("wav only supports 8-bit unsigned PCM encoding.")
    if encoding == "PCM_F":
        if bits_per_sample in (None, 32):
            return "FLOAT"
        if bits_per_sample == 64:
            return "DOUBLE"
        raise ValueError("wav only supports 32/64-bit float PCM encoding.")
    if encoding == "ULAW":
        if bits_per_sample in (None, 8):
            return "ULAW"
        raise ValueError("wav only supports 8-bit mu-law encoding.")
    if encoding == "ALAW":
        if bits_per_sample in (None, 8):
            return "ALAW"
        raise ValueError("wav only supports 8-bit a-law encoding.")
    raise ValueError(f"wav does not support {encoding}.")


def _get_subtype_for_sphere(encoding: str, bits_per_sample: int):
    if encoding in (None, "PCM_S"):
        return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32"
    if encoding in ("PCM_U", "PCM_F"):
        raise ValueError(f"sph does not support {encoding} encoding.")
    if encoding == "ULAW":
        if bits_per_sample in (None, 8):
            return "ULAW"
        raise ValueError("sph only supports 8-bit for mu-law encoding.")
    if encoding == "ALAW":
        return "ALAW"
    raise ValueError(f"sph does not support {encoding}.")


def _get_subtype(dtype: paddle.dtype,
                 format: str,
                 encoding: str,
                 bits_per_sample: int):
    if format == "wav":
        return _get_subtype_for_wav(dtype, encoding, bits_per_sample)
    if format == "flac":
        if encoding:
            raise ValueError("flac does not support encoding.")
        if not bits_per_sample:
            return "PCM_16"
        if bits_per_sample > 24:
            raise ValueError("flac does not support bits_per_sample > 24.")
        return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}"
    if format in ("ogg", "vorbis"):
        if encoding or bits_per_sample:
            raise ValueError(
                "ogg/vorbis does not support encoding/bits_per_sample.")
        return "VORBIS"
    if format == "sph":
        return _get_subtype_for_sphere(encoding, bits_per_sample)
    if format in ("nis", "nist"):
        return "PCM_16"
    raise ValueError(f"Unsupported format: {format}")


def save(
        filepath: str,
        src: paddle.Tensor,
        sample_rate: int,
        channels_first: bool=True,
        compression: Optional[float]=None,
        format: Optional[str]=None,
        encoding: Optional[str]=None,
        bits_per_sample: Optional[int]=None, ):
    """Save audio data to file.

    Note:
        The formats this function can handle depend on the soundfile installation.
        This function is tested on the following formats;

        * WAV

            * 32-bit floating-point
            * 32-bit signed integer
            * 16-bit signed integer
            * 8-bit unsigned integer

        * FLAC
        * OGG/VORBIS
        * SPHERE

    Note:
        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,

    Args:
        filepath (str or pathlib.Path): Path to audio file.
        src (paddle.Tensor): Audio data to save. must be 2D tensor.
        sample_rate (int): sampling rate
        channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
            otherwise `[time, channel]`.
        compression (float of None, optional): Not used.
            It is here only for interface compatibility reason with "sox_io" backend.
        format (str or None, optional): Override the audio format.
            When ``filepath`` argument is path-like object, audio format is
            inferred from file extension. If the file extension is missing or
            different, you can specify the correct format with this argument.

            When ``filepath`` argument is file-like object,
            this argument is required.

            Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
            ``"flac"`` and ``"sph"``.
        encoding (str or None, optional): Changes the encoding for supported formats.
            This argument is effective only for supported formats, such as
            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are:

                - ``"PCM_S"`` (signed integer Linear PCM)
                - ``"PCM_U"`` (unsigned integer Linear PCM)
                - ``"PCM_F"`` (floating point PCM)
                - ``"ULAW"`` (mu-law)
                - ``"ALAW"`` (a-law)

        bits_per_sample (int or None, optional): Changes the bit depth for the
            supported formats.
            When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``,
            you can change the bit depth.
            Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``.

    Supported formats/encodings/bit depth/compression are:

    ``"wav"``
        - 32-bit floating-point PCM
        - 32-bit signed integer PCM
        - 24-bit signed integer PCM
        - 16-bit signed integer PCM
        - 8-bit unsigned integer PCM
        - 8-bit mu-law
        - 8-bit a-law

        Note:
            Default encoding/bit depth is determined by the dtype of
            the input Tensor.

    ``"flac"``
        - 8-bit
        - 16-bit (default)
        - 24-bit

    ``"ogg"``, ``"vorbis"``
        - Doesn't accept changing configuration.

    ``"sph"``
        - 8-bit signed integer PCM
        - 16-bit signed integer PCM
        - 24-bit signed integer PCM
        - 32-bit signed integer PCM (default)
        - 8-bit mu-law
        - 8-bit a-law
        - 16-bit a-law
        - 24-bit a-law
        - 32-bit a-law

    """
    if src.ndim != 2:
        raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.")
    if compression is not None:
        warnings.warn(
            '`save` function of "soundfile" backend does not support "compression" parameter. '
            "The argument is silently ignored.")
    if hasattr(filepath, "write"):
        if format is None:
            raise RuntimeError(
                "`format` is required when saving to file object.")
        ext = format.lower()
    else:
        ext = str(filepath).split(".")[-1].lower()

    if bits_per_sample not in (None, 8, 16, 24, 32, 64):
        raise ValueError("Invalid bits_per_sample.")
    if bits_per_sample == 24:
        warnings.warn(
            "Saving audio with 24 bits per sample might warp samples near -1. "
            "Using 16 bits per sample might be able to avoid this.")
    subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample)

    # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format,
    # so we extend the extensions manually here
    if ext in ["nis", "nist", "sph"] and format is None:
        format = "NIST"

    if channels_first:
        src = src.t()

    soundfile.write(
        file=filepath,
        data=src,
        samplerate=sample_rate,
        subtype=subtype,
        format=format)


_SUBTYPE2DTYPE = {
    "PCM_S8": "int8",
    "PCM_U8": "uint8",
    "PCM_16": "int16",
    "PCM_32": "int32",
    "FLOAT": "float32",
    "DOUBLE": "float64",
}


def load(
        filepath: str,
        frame_offset: int=0,
        num_frames: int=-1,
        normalize: bool=True,
        channels_first: bool=True,
        format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]:
    """Load audio data from file.

    Note:
        The formats this function can handle depend on the soundfile installation.
        This function is tested on the following formats;

        * WAV

            * 32-bit floating-point
            * 32-bit signed integer
            * 16-bit signed integer
            * 8-bit unsigned integer

        * FLAC
        * OGG/VORBIS
        * SPHERE

    By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
    ``float32`` dtype and the shape of `[channel, time]`.
    The samples are normalized to fit in the range of ``[-1.0, 1.0]``.

    When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
    signed integer and 8-bit unsigned integer (24-bit signed integer is not supported),
    by providing ``normalize=False``, this function can return integer Tensor, where the samples
    are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor
    for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM.

    ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as
    ``flac`` and ``mp3``.
    For these formats, this function always returns ``float32`` Tensor with values normalized to
    ``[-1.0, 1.0]``.

    Note:
        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend.

    Args:
        filepath (path-like object or file-like object):
            Source of audio data.
        frame_offset (int, optional):
            Number of frames to skip before start reading data.
        num_frames (int, optional):
            Maximum number of frames to read. ``-1`` reads all the remaining samples,
            starting from ``frame_offset``.
            This function may return the less number of frames if there is not enough
            frames in the given file.
        normalize (bool, optional):
            When ``True``, this function always return ``float32``, and sample values are
            normalized to ``[-1.0, 1.0]``.
            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
            integer type.
            This argument has no effect for formats other than integer WAV type.
        channels_first (bool, optional):
            When True, the returned Tensor has dimension `[channel, time]`.
            Otherwise, the returned Tensor's dimension is `[time, channel]`.
        format (str or None, optional):
            Not used. PySoundFile does not accept format hint.

    Returns:
        (paddle.Tensor, int): Resulting Tensor and sample rate.
            If the input file has integer wav format and normalization is off, then it has
            integer type, else ``float32`` type. If ``channels_first=True``, it has
            `[channel, time]` else `[time, channel]`.
    """
    with soundfile.SoundFile(filepath, "r") as file_:
        if file_.format != "WAV" or normalize:
            dtype = "float32"
        elif file_.subtype not in _SUBTYPE2DTYPE:
            raise ValueError(f"Unsupported subtype: {file_.subtype}")
        else:
            dtype = _SUBTYPE2DTYPE[file_.subtype]

        frames = file_._prepare_read(frame_offset, None, num_frames)
        waveform = file_.read(frames, dtype, always_2d=True)
        sample_rate = file_.samplerate

    waveform = paddle.to_tensor(waveform)
    if channels_first:
        waveform = paddle.transpose(waveform, perm=[1, 0])
    return waveform, sample_rate


# Mapping from soundfile subtype to number of bits per sample.
# This is mostly heuristical and the value is set to 0 when it is irrelevant
# (lossy formats) or when it can't be inferred.
# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard:
# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony,
# the default seems to be 8 bits but it can be compressed further to 4 bits.
# The dict is inspired from
# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94
_SUBTYPE_TO_BITS_PER_SAMPLE = {
    "PCM_S8": 8,  # Signed 8 bit data
    "PCM_16": 16,  # Signed 16 bit data
    "PCM_24": 24,  # Signed 24 bit data
    "PCM_32": 32,  # Signed 32 bit data
    "PCM_U8": 8,  # Unsigned 8 bit data (WAV and RAW only)
    "FLOAT": 32,  # 32 bit float data
    "DOUBLE": 64,  # 64 bit float data
    "ULAW": 8,  # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
    "ALAW": 8,  # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
    "IMA_ADPCM": 0,  # IMA ADPCM.
    "MS_ADPCM": 0,  # Microsoft ADPCM.
    "GSM610":
    0,  # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate)
    "VOX_ADPCM": 0,  # OKI / Dialogix ADPCM
    "G721_32": 0,  # 32kbs G721 ADPCM encoding.
    "G723_24": 0,  # 24kbs G723 ADPCM encoding.
    "G723_40": 0,  # 40kbs G723 ADPCM encoding.
    "DWVW_12": 12,  # 12 bit Delta Width Variable Word encoding.
    "DWVW_16": 16,  # 16 bit Delta Width Variable Word encoding.
    "DWVW_24": 24,  # 24 bit Delta Width Variable Word encoding.
    "DWVW_N": 0,  # N bit Delta Width Variable Word encoding.
    "DPCM_8": 8,  # 8 bit differential PCM (XI only)
    "DPCM_16": 16,  # 16 bit differential PCM (XI only)
    "VORBIS": 0,  # Xiph Vorbis encoding. (lossy)
    "ALAC_16": 16,  # Apple Lossless Audio Codec (16 bit).
    "ALAC_20": 20,  # Apple Lossless Audio Codec (20 bit).
    "ALAC_24": 24,  # Apple Lossless Audio Codec (24 bit).
    "ALAC_32": 32,  # Apple Lossless Audio Codec (32 bit).
}


def _get_bit_depth(subtype):
    if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE:
        warnings.warn(
            f"The {subtype} subtype is unknown to PaddleAudio. As a result, the bits_per_sample "
            "attribute will be set to 0. If you are seeing this warning, please "
            "report by opening an issue on github (after checking for existing/closed ones). "
            "You may otherwise ignore this warning.")
    return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0)


_SUBTYPE_TO_ENCODING = {
    "PCM_S8": "PCM_S",
    "PCM_16": "PCM_S",
    "PCM_24": "PCM_S",
    "PCM_32": "PCM_S",
    "PCM_U8": "PCM_U",
    "FLOAT": "PCM_F",
    "DOUBLE": "PCM_F",
    "ULAW": "ULAW",
    "ALAW": "ALAW",
    "VORBIS": "VORBIS",
}


def _get_encoding(format: str, subtype: str):
    if format == "FLAC":
        return "FLAC"
    return _SUBTYPE_TO_ENCODING.get(subtype, "UNKNOWN")


def info(filepath: str, format: Optional[str]=None) -> AudioInfo:
    """Get signal information of an audio file.

    Note:
        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,

    Args:
        filepath (path-like object or file-like object):
            Source of audio data.
        format (str or None, optional):
            Not used. PySoundFile does not accept format hint.

    Returns:
        AudioInfo: meta data of the given audio.

    """
    sinfo = soundfile.info(filepath)
    return AudioInfo(
        sinfo.samplerate,
        sinfo.frames,
        sinfo.channels,
        bits_per_sample=_get_bit_depth(sinfo.subtype),
        encoding=_get_encoding(sinfo.format, sinfo.subtype), )


================================================
FILE: paddlespeech/audio/compliance/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import kaldi
from . import librosa


================================================
FILE: paddlespeech/audio/compliance/kaldi.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from torchaudio(https://github.com/pytorch/audio)
import math
from typing import Tuple

import paddle
from paddle import Tensor

from ..functional import create_dct
from ..functional.window import get_window

__all__ = [
    'spectrogram',
    'fbank',
    'mfcc',
]

# window types
HANNING = 'hann'
HAMMING = 'hamming'
POVEY = 'povey'
RECTANGULAR = 'rect'
BLACKMAN = 'blackman'


def _get_epsilon(dtype):
    return paddle.to_tensor(1e-07, dtype=dtype)


def _next_power_of_2(x: int) -> int:
    return 1 if x == 0 else 2**(x - 1).bit_length()


def _get_strided(waveform: Tensor,
                 window_size: int,
                 window_shift: int,
                 snip_edges: bool) -> Tensor:
    assert waveform.dim() == 1
    num_samples = waveform.shape[0]

    if snip_edges:
        if num_samples < window_size:
            return paddle.empty((0, 0), dtype=waveform.dtype)
        else:
            m = 1 + (num_samples - window_size) // window_shift
    else:
        reversed_waveform = paddle.flip(waveform, [0])
        m = (num_samples + (window_shift // 2)) // window_shift
        pad = window_size // 2 - window_shift // 2
        pad_right = reversed_waveform
        if pad > 0:
            pad_left = reversed_waveform[-pad:]
            waveform = paddle.concat((pad_left, waveform, pad_right), axis=0)
        else:
            waveform = paddle.concat((waveform[-pad:], pad_right), axis=0)

    return paddle.signal.frame(waveform, window_size, window_shift)[:, :m].T


def _feature_window_function(
        window_type: str,
        window_size: int,
        blackman_coeff: float,
        dtype: int, ) -> Tensor:
    if window_type == "hann":
        return get_window('hann', window_size, fftbins=False, dtype=dtype)
    elif window_type == "hamming":
        return get_window('hamming', window_size, fftbins=False, dtype=dtype)
    elif window_type == "povey":
        return get_window(
            'hann', window_size, fftbins=False, dtype=dtype).pow(0.85)
    elif window_type == "rect":
        return paddle.ones([window_size], dtype=dtype)
    elif window_type == "blackman":
        a = 2 * math.pi / (window_size - 1)
        window_function = paddle.arange(window_size, dtype=dtype)
        return (blackman_coeff - 0.5 * paddle.cos(a * window_function) +
                (0.5 - blackman_coeff) * paddle.cos(2 * a * window_function)
                ).astype(dtype)
    else:
        raise Exception('Invalid window type ' + window_type)


def _get_log_energy(strided_input: Tensor, epsilon: Tensor,
                    energy_floor: float) -> Tensor:
    log_energy = paddle.maximum(strided_input.pow(2).sum(1), epsilon).log()
    if energy_floor == 0.0:
        return log_energy
    return paddle.maximum(
        log_energy,
        paddle.to_tensor(math.log(energy_floor), dtype=strided_input.dtype))


def _get_waveform_and_window_properties(
        waveform: Tensor,
        channel: int,
        sr: int,
        frame_shift: float,
        frame_length: float,
        round_to_power_of_two: bool,
        preemphasis_coefficient: float) -> Tuple[Tensor, int, int, int]:
    channel = max(channel, 0)
    assert channel < waveform.shape[0], (
        'Invalid channel {} for size {}'.format(channel, waveform.shape[0]))
    waveform = waveform[channel, :]  # size (n)
    window_shift = int(
        sr * frame_shift *
        0.001)  # pass frame_shift and frame_length in milliseconds
    window_size = int(sr * frame_length * 0.001)
    padded_window_size = _next_power_of_2(
        window_size) if round_to_power_of_two else window_size

    assert 2 <= window_size <= len(waveform), (
        'choose a window size {} that is [2, {}]'.format(window_size,
                                                         len(waveform)))
    assert 0 < window_shift, '`window_shift` must be greater than 0'
    assert padded_window_size % 2 == 0, 'the padded `window_size` must be divisible by two.' \
                                        ' use `round_to_power_of_two` or change `frame_length`'
    assert 0. <= preemphasis_coefficient <= 1.0, '`preemphasis_coefficient` must be between [0,1]'
    assert sr > 0, '`sr` must be greater than zero'
    return waveform, window_shift, window_size, padded_window_size


def _get_window(waveform: Tensor,
                padded_window_size: int,
                window_size: int,
                window_shift: int,
                window_type: str,
                blackman_coeff: float,
                snip_edges: bool,
                raw_energy: bool,
                energy_floor: float,
                dither: float,
                remove_dc_offset: bool,
                preemphasis_coefficient: float) -> Tuple[Tensor, Tensor]:
    dtype = waveform.dtype
    epsilon = _get_epsilon(dtype)

    # (m, window_size)
    strided_input = _get_strided(waveform, window_size, window_shift,
                                 snip_edges)

    if dither != 0.0:
        x = paddle.maximum(epsilon,
                           paddle.rand(strided_input.shape, dtype=dtype))
        rand_gauss = paddle.sqrt(-2 * x.log()) * paddle.cos(2 * math.pi * x)
        strided_input = strided_input + rand_gauss * dither

    if remove_dc_offset:
        row_means = paddle.mean(strided_input, axis=1).unsqueeze(1)  # (m, 1)
        strided_input = strided_input - row_means

    if raw_energy:
        signal_log_energy = _get_log_energy(strided_input, epsilon,
                                            energy_floor)  # (m)

    if preemphasis_coefficient != 0.0:
        # npu only support mode=constant right now
        if paddle.get_device().startswith('npu'):
            mode = 'constant'
        else:
            mode = 'replicate'

        offset_strided_input = paddle.nn.functional.pad(
            strided_input.unsqueeze(0), (1, 0), data_format='NCL',
            mode=mode).squeeze(0)  # (m, window_size + 1)
        strided_input = strided_input - preemphasis_coefficient * offset_strided_input[:, :
                                                                                       -1]

    window_function = _feature_window_function(
        window_type, window_size, blackman_coeff,
        dtype).unsqueeze(0)  # (1, window_size)
    strided_input = strided_input * window_function  # (m, window_size)

    # (m, padded_window_size)
    if padded_window_size != window_size:
        padding_right = padded_window_size - window_size
        strided_input = paddle.nn.functional.pad(
            strided_input.unsqueeze(0), (0, padding_right),
            data_format='NCL',
            mode='constant',
            value=0).squeeze(0)

    if not raw_energy:
        signal_log_energy = _get_log_energy(strided_input, epsilon,
                                            energy_floor)  # size (m)

    return strided_input, signal_log_energy


def _subtract_column_mean(tensor: Tensor, subtract_mean: bool) -> Tensor:
    if subtract_mean:
        col_means = paddle.mean(tensor, axis=0).unsqueeze(0)
        tensor = tensor - col_means
    return tensor


def spectrogram(waveform: Tensor,
                blackman_coeff: float=0.42,
                channel: int=-1,
                dither: float=0.0,
                energy_floor: float=1.0,
                frame_length: float=25.0,
                frame_shift: float=10.0,
                preemphasis_coefficient: float=0.97,
                raw_energy: bool=True,
                remove_dc_offset: bool=True,
                round_to_power_of_two: bool=True,
                sr: int=16000,
                snip_edges: bool=True,
                subtract_mean: bool=False,
                window_type: str="povey") -> Tensor:
    """Compute and return a spectrogram from a waveform. The output is identical to Kaldi's.

    Args:
        waveform (Tensor): A waveform tensor with shape `(C, T)`.
        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
        channel (int, optional): Select the channel of waveform. Defaults to -1.
        dither (float, optional): Dithering constant . Defaults to 0.0.
        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
            to FFT. Defaults to True.
        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
        window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".

    Returns:
        Tensor: A spectrogram tensor with shape `(m, padded_window_size // 2 + 1)` where m is the number of frames
            depends on frame_length and frame_shift.
    """
    dtype = waveform.dtype
    epsilon = _get_epsilon(dtype)

    waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties(
        waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two,
        preemphasis_coefficient)

    strided_input, signal_log_energy = _get_window(
        waveform, padded_window_size, window_size, window_shift, window_type,
        blackman_coeff, snip_edges, raw_energy, energy_floor, dither,
        remove_dc_offset, preemphasis_coefficient)

    # (m, padded_window_size // 2 + 1, 2)
    fft = paddle.fft.rfft(strided_input)

    power_spectrum = paddle.maximum(
        fft.abs().pow(2.), epsilon).log()  # (m, padded_window_size // 2 + 1)
    power_spectrum[:, 0] = signal_log_energy

    power_spectrum = _subtract_column_mean(power_spectrum, subtract_mean)
    return power_spectrum


def _inverse_mel_scale_scalar(mel_freq: float) -> float:
    return 700.0 * (math.exp(mel_freq / 1127.0) - 1.0)


def _inverse_mel_scale(mel_freq: Tensor) -> Tensor:
    return 700.0 * ((mel_freq / 1127.0).exp() - 1.0)


def _mel_scale_scalar(freq: float) -> float:
    return 1127.0 * math.log(1.0 + freq / 700.0)


def _mel_scale(freq: Tensor) -> Tensor:
    return 1127.0 * (1.0 + freq / 700.0).log()


def _vtln_warp_freq(vtln_low_cutoff: float,
                    vtln_high_cutoff: float,
                    low_freq: float,
                    high_freq: float,
                    vtln_warp_factor: float,
                    freq: Tensor) -> Tensor:
    assert vtln_low_cutoff > low_freq, 'be sure to set the vtln_low option higher than low_freq'
    assert vtln_high_cutoff < high_freq, 'be sure to set the vtln_high option lower than high_freq [or negative]'
    l = vtln_low_cutoff * max(1.0, vtln_warp_factor)
    h = vtln_high_cutoff * min(1.0, vtln_warp_factor)
    scale = 1.0 / vtln_warp_factor
    Fl = scale * l
    Fh = scale * h
    assert l > low_freq and h < high_freq
    scale_left = (Fl - low_freq) / (l - low_freq)
    scale_right = (high_freq - Fh) / (high_freq - h)
    res = paddle.empty_like(freq)

    outside_low_high_freq = paddle.less_than(freq, paddle.to_tensor(low_freq)) \
        | paddle.greater_than(freq, paddle.to_tensor(high_freq))
    before_l = paddle.less_than(freq, paddle.to_tensor(l))
    before_h = paddle.less_than(freq, paddle.to_tensor(h))
    after_h = paddle.greater_equal(freq, paddle.to_tensor(h))

    res[after_h] = high_freq + scale_right * (freq[after_h] - high_freq)
    res[before_h] = scale * freq[before_h]
    res[before_l] = low_freq + scale_left * (freq[before_l] - low_freq)
    res[outside_low_high_freq] = freq[outside_low_high_freq]

    return res


def _vtln_warp_mel_freq(vtln_low_cutoff: float,
                        vtln_high_cutoff: float,
                        low_freq,
                        high_freq: float,
                        vtln_warp_factor: float,
                        mel_freq: Tensor) -> Tensor:
    return _mel_scale(
        _vtln_warp_freq(vtln_low_cutoff, vtln_high_cutoff, low_freq, high_freq,
                        vtln_warp_factor, _inverse_mel_scale(mel_freq)))


def _get_mel_banks(num_bins: int,
                   window_length_padded: int,
                   sample_freq: float,
                   low_freq: float,
                   high_freq: float,
                   vtln_low: float,
                   vtln_high: float,
                   vtln_warp_factor: float) -> Tuple[Tensor, Tensor]:
    assert num_bins > 3, 'Must have at least 3 mel bins'
    assert window_length_padded % 2 == 0
    num_fft_bins = window_length_padded / 2
    nyquist = 0.5 * sample_freq

    if high_freq <= 0.0:
        high_freq += nyquist

    assert (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq), \
        ('Bad values in options: low-freq {} and high-freq {} vs. nyquist {}'.format(low_freq, high_freq, nyquist))

    fft_bin_width = sample_freq / window_length_padded
    mel_low_freq = _mel_scale_scalar(low_freq)
    mel_high_freq = _mel_scale_scalar(high_freq)

    mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1)

    if vtln_high < 0.0:
        vtln_high += nyquist

    assert vtln_warp_factor == 1.0 or ((low_freq < vtln_low < high_freq) and
                                       (0.0 < vtln_high < high_freq) and (vtln_low < vtln_high)), \
        ('Bad values in options: vtln-low {} and vtln-high {}, versus '
         'low-freq {} and high-freq {}'.format(vtln_low, vtln_high, low_freq, high_freq))

    bin = paddle.arange(num_bins, dtype=paddle.float32).unsqueeze(1)
    # left_mel = mel_low_freq + bin * mel_freq_delta  # (num_bins, 1)
    # center_mel = mel_low_freq + (bin + 1.0) * mel_freq_delta  # (num_bins, 1)
    # right_mel = mel_low_freq + (bin + 2.0) * mel_freq_delta  # (num_bins, 1)
    left_mel = mel_low_freq + bin * mel_freq_delta  # (num_bins, 1)
    center_mel = left_mel + mel_freq_delta
    right_mel = center_mel + mel_freq_delta

    if vtln_warp_factor != 1.0:
        left_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq,
                                       vtln_warp_factor, left_mel)
        center_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq,
                                         high_freq, vtln_warp_factor,
                                         center_mel)
        right_mel = _vtln_warp_mel_freq(vtln_low, vtln_high, low_freq,
                                        high_freq, vtln_warp_factor, right_mel)

    center_freqs = _inverse_mel_scale(center_mel)  # (num_bins)
    # (1, num_fft_bins)
    mel = _mel_scale(fft_bin_width * paddle.arange(
        num_fft_bins, dtype=paddle.float32)).unsqueeze(0)

    # (num_bins, num_fft_bins)
    up_slope = (mel - left_mel) / (center_mel - left_mel)
    down_slope = (right_mel - mel) / (right_mel - center_mel)

    if vtln_warp_factor == 1.0:
        bins = paddle.maximum(
            paddle.zeros([1]), paddle.minimum(up_slope, down_slope))
    else:
        bins = paddle.zeros_like(up_slope)
        up_idx = paddle.greater_than(mel, left_mel) & paddle.less_than(
            mel, center_mel)
        down_idx = paddle.greater_than(mel, center_mel) & paddle.less_than(
            mel, right_mel)
        bins[up_idx] = up_slope[up_idx]
        bins[down_idx] = down_slope[down_idx]

    return bins, center_freqs


def fbank(waveform: Tensor,
          blackman_coeff: float=0.42,
          channel: int=-1,
          dither: float=0.0,
          energy_floor: float=1.0,
          frame_length: float=25.0,
          frame_shift: float=10.0,
          high_freq: float=0.0,
          htk_compat: bool=False,
          low_freq: float=20.0,
          n_mels: int=23,
          preemphasis_coefficient: float=0.97,
          raw_energy: bool=True,
          remove_dc_offset: bool=True,
          round_to_power_of_two: bool=True,
          sr: int=16000,
          snip_edges: bool=True,
          subtract_mean: bool=False,
          use_energy: bool=False,
          use_log_fbank: bool=True,
          use_power: bool=True,
          vtln_high: float=-500.0,
          vtln_low: float=100.0,
          vtln_warp: float=1.0,
          window_type: str="povey") -> Tensor:
    """Compute and return filter banks from a waveform. The output is identical to Kaldi's.

    Args:
        waveform (Tensor): A waveform tensor with shape `(C, T)`. `C` is in the range [0,1]. 
        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
        channel (int, optional): Select the channel of waveform. Defaults to -1.
        dither (float, optional): Dithering constant . Defaults to 0.0.
        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
        high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0.
        htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False.
        low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0.
        n_mels (int, optional): Number of output mel bins. Defaults to 23.
        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
            to FFT. Defaults to True.
        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
        use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
        use_log_fbank (bool, optional): Return log fbank when it is set True. Defaults to True.
        use_power (bool, optional): Whether to use power instead of magnitude. Defaults to True.
        vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0.
        vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0.
        vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0.
        window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".

    Returns:
        Tensor: A filter banks tensor with shape `(m, n_mels)`.
    """
    dtype = waveform.dtype

    waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties(
        waveform, channel, sr, frame_shift, frame_length, round_to_power_of_two,
        preemphasis_coefficient)

    strided_input, signal_log_energy = _get_window(
        waveform, padded_window_size, window_size, window_shift, window_type,
        blackman_coeff, snip_edges, raw_energy, energy_floor, dither,
        remove_dc_offset, preemphasis_coefficient)

    # (m, padded_window_size // 2 + 1)
    spectrum = paddle.fft.rfft(strided_input).abs()
    if use_power:
        spectrum = spectrum.pow(2.)

    # (n_mels, padded_window_size // 2)
    mel_energies, _ = _get_mel_banks(n_mels, padded_window_size, sr, low_freq,
                                     high_freq, vtln_low, vtln_high, vtln_warp)
    # mel_energies = mel_energies.astype(dtype)
    assert mel_energies.dtype == dtype

    # (n_mels, padded_window_size // 2 + 1)
    mel_energies = paddle.nn.functional.pad(
        mel_energies.unsqueeze(0), (0, 1),
        data_format='NCL',
        mode='constant',
        value=0).squeeze(0)

    # (m, n_mels)
    mel_energies = paddle.mm(spectrum, mel_energies.T)
    if use_log_fbank:
        mel_energies = paddle.maximum(mel_energies, _get_epsilon(dtype)).log()

    if use_energy:
        signal_log_energy = signal_log_energy.unsqueeze(1)
        if htk_compat:
            mel_energies = paddle.concat(
                (mel_energies, signal_log_energy), axis=1)
        else:
            mel_energies = paddle.concat(
                (signal_log_energy, mel_energies), axis=1)

    # (m, n_mels + 1)
    mel_energies = _subtract_column_mean(mel_energies, subtract_mean)
    return mel_energies


def _get_dct_matrix(n_mfcc: int, n_mels: int) -> Tensor:
    dct_matrix = create_dct(n_mels, n_mels, 'ortho')
    dct_matrix[:, 0] = math.sqrt(1 / float(n_mels))
    dct_matrix = dct_matrix[:, :n_mfcc]  # (n_mels, n_mfcc)
    return dct_matrix


def _get_lifter_coeffs(n_mfcc: int, cepstral_lifter: float) -> Tensor:
    i = paddle.arange(n_mfcc)
    return 1.0 + 0.5 * cepstral_lifter * paddle.sin(math.pi * i /
                                                    cepstral_lifter)


def mfcc(waveform: Tensor,
         blackman_coeff: float=0.42,
         cepstral_lifter: float=22.0,
         channel: int=-1,
         dither: float=0.0,
         energy_floor: float=1.0,
         frame_length: float=25.0,
         frame_shift: float=10.0,
         high_freq: float=0.0,
         htk_compat: bool=False,
         low_freq: float=20.0,
         n_mfcc: int=13,
         n_mels: int=23,
         preemphasis_coefficient: float=0.97,
         raw_energy: bool=True,
         remove_dc_offset: bool=True,
         round_to_power_of_two: bool=True,
         sr: int=16000,
         snip_edges: bool=True,
         subtract_mean: bool=False,
         use_energy: bool=False,
         vtln_high: float=-500.0,
         vtln_low: float=100.0,
         vtln_warp: float=1.0,
         window_type: str="povey") -> Tensor:
    """Compute and return mel frequency cepstral coefficients from a waveform. The output is
            identical to Kaldi's.

    Args:
        waveform (Tensor): A waveform tensor with shape `(C, T)`.
        blackman_coeff (float, optional): Coefficient for Blackman window.. Defaults to 0.42.
        cepstral_lifter (float, optional): Scaling of output mfccs. Defaults to 22.0.
        channel (int, optional): Select the channel of waveform. Defaults to -1.
        dither (float, optional): Dithering constant . Defaults to 0.0.
        energy_floor (float, optional): Floor on energy of the output Spectrogram. Defaults to 1.0.
        frame_length (float, optional): Frame length in milliseconds. Defaults to 25.0.
        frame_shift (float, optional): Shift between adjacent frames in milliseconds. Defaults to 10.0.
        high_freq (float, optional): The upper cut-off frequency. Defaults to 0.0.
        htk_compat (bool, optional): Put energy to the last when it is set True. Defaults to False.
        low_freq (float, optional): The lower cut-off frequency. Defaults to 20.0.
        n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 13.
        n_mels (int, optional): Number of output mel bins. Defaults to 23.
        preemphasis_coefficient (float, optional): Preemphasis coefficient for input waveform. Defaults to 0.97.
        raw_energy (bool, optional): Whether to compute before preemphasis and windowing. Defaults to True.
        remove_dc_offset (bool, optional): Whether to subtract mean from waveform on frames. Defaults to True.
        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
            to FFT. Defaults to True.
        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
        use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
        vtln_high (float, optional): High inflection point in piecewise linear VTLN warping function. Defaults to -500.0.
        vtln_low (float, optional): Low inflection point in piecewise linear VTLN warping function. Defaults to 100.0.
        vtln_warp (float, optional): Vtln warp factor. Defaults to 1.0.
        window_type (str, optional): Choose type of window for FFT computation. Defaults to POVEY.

    Returns:
        Tensor: A mel frequency cepstral coefficients tensor with shape `(m, n_mfcc)`.
    """
    assert n_mfcc <= n_mels, 'n_mfcc cannot be larger than n_mels: %d vs %d' % (
        n_mfcc, n_mels)

    dtype = waveform.dtype

    # (m, n_mels + use_energy)
    feature = fbank(
        waveform=waveform,
        blackman_coeff=blackman_coeff,
        channel=channel,
        dither=dither,
        energy_floor=energy_floor,
        frame_length=frame_length,
        frame_shift=frame_shift,
        high_freq=high_freq,
        htk_compat=htk_compat,
        low_freq=low_freq,
        n_mels=n_mels,
        preemphasis_coefficient=preemphasis_coefficient,
        raw_energy=raw_energy,
        remove_dc_offset=remove_dc_offset,
        round_to_power_of_two=round_to_power_of_two,
        sr=sr,
        snip_edges=snip_edges,
        subtract_mean=False,
        use_energy=use_energy,
        use_log_fbank=True,
        use_power=True,
        vtln_high=vtln_high,
        vtln_low=vtln_low,
        vtln_warp=vtln_warp,
        window_type=window_type)

    if use_energy:
        # (m)
        signal_log_energy = feature[:, n_mels if htk_compat else 0]
        mel_offset = int(not htk_compat)
        feature = feature[:, mel_offset:(n_mels + mel_offset)]

    # (n_mels, n_mfcc)
    dct_matrix = _get_dct_matrix(n_mfcc, n_mels).astype(dtype=dtype)

    # (m, n_mfcc)
    feature = feature.matmul(dct_matrix)

    if cepstral_lifter != 0.0:
        # (1, n_mfcc)
        lifter_coeffs = _get_lifter_coeffs(n_mfcc, cepstral_lifter).unsqueeze(0)
        feature *= lifter_coeffs.astype(dtype=dtype)

    if use_energy:
        feature[:, 0] = signal_log_energy

    if htk_compat:
        energy = feature[:, 0].unsqueeze(1)  # (m, 1)
        feature = feature[:, 1:]  # (m, n_mfcc - 1)
        if not use_energy:
            energy *= math.sqrt(2)

        feature = paddle.concat((feature, energy), axis=1)

    feature = _subtract_column_mean(feature, subtract_mean)
    return feature


================================================
FILE: paddlespeech/audio/compliance/librosa.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from librosa(https://github.com/librosa/librosa)
import warnings
from typing import List
from typing import Optional
from typing import Union

import numpy as np
import scipy
from numpy.lib.stride_tricks import as_strided
from scipy import signal

from ..utils import depth_convert
from ..utils import ParameterError

__all__ = [
    # dsp
    'stft',
    'mfcc',
    'hz_to_mel',
    'mel_to_hz',
    'mel_frequencies',
    'power_to_db',
    'compute_fbank_matrix',
    'melspectrogram',
    'spectrogram',
    'mu_encode',
    'mu_decode',
    # augmentation
    'depth_augment',
    'spect_augment',
    'random_crop1d',
    'random_crop2d',
    'adaptive_spect_augment',
]


def _pad_center(data: np.ndarray, size: int, axis: int=-1,
                **kwargs) -> np.ndarray:
    """Pad an array to a target length along a target axis.

    This differs from `np.pad` by centering the data prior to padding,
    analogous to `str.center`
    """

    kwargs.setdefault("mode", "constant")
    n = data.shape[axis]
    lpad = int((size - n) // 2)
    lengths = [(0, 0)] * data.ndim
    lengths[axis] = (lpad, int(size - n - lpad))

    if lpad < 0:
        raise ParameterError(("Target size ({size:d}) must be "
                              "at least input size ({n:d})"))

    return np.pad(data, lengths, **kwargs)


def _split_frames(x: np.ndarray,
                  frame_length: int,
                  hop_length: int,
                  axis: int=-1) -> np.ndarray:
    """Slice a data array into (overlapping) frames.

    This function is aligned with librosa.frame
    """

    if not isinstance(x, np.ndarray):
        raise ParameterError(
            f"Input must be of type numpy.ndarray, given type(x)={type(x)}")

    if x.shape[axis] < frame_length:
        raise ParameterError(f"Input is too short (n={x.shape[axis]:d})"
                             f" for frame_length={frame_length:d}")

    if hop_length < 1:
        raise ParameterError(f"Invalid hop_length: {hop_length:d}")

    if axis == -1 and not x.flags["F_CONTIGUOUS"]:
        warnings.warn(f"librosa.util.frame called with axis={axis} "
                      "on a non-contiguous input. This will result in a copy.")
        x = np.asfortranarray(x)
    elif axis == 0 and not x.flags["C_CONTIGUOUS"]:
        warnings.warn(f"librosa.util.frame called with axis={axis} "
                      "on a non-contiguous input. This will result in a copy.")
        x = np.ascontiguousarray(x)

    n_frames = 1 + (x.shape[axis] - frame_length) // hop_length
    strides = np.asarray(x.strides)

    new_stride = np.prod(strides[strides > 0] // x.itemsize) * x.itemsize

    if axis == -1:
        shape = list(x.shape)[:-1] + [frame_length, n_frames]
        strides = list(strides) + [hop_length * new_stride]

    elif axis == 0:
        shape = [n_frames, frame_length] + list(x.shape)[1:]
        strides = [hop_length * new_stride] + list(strides)

    else:
        raise ParameterError(f"Frame axis={axis} must be either 0 or -1")

    return as_strided(x, shape=shape, strides=strides)


def _check_audio(y, mono=True) -> bool:
    """Determine whether a variable contains valid audio data.

    The audio y must be a np.ndarray, ether 1-channel or two channel
    """
    if not isinstance(y, np.ndarray):
        raise ParameterError("Audio data must be of type numpy.ndarray")
    if y.ndim > 2:
        raise ParameterError(
            f"Invalid shape for audio ndim={y.ndim:d}, shape={y.shape}")

    if mono and y.ndim == 2:
        raise ParameterError(
            f"Invalid shape for mono audio ndim={y.ndim:d}, shape={y.shape}")

    if (mono and len(y) == 0) or (not mono and y.shape[1] < 0):
        raise ParameterError(f"Audio is empty ndim={y.ndim:d}, shape={y.shape}")

    if not np.issubdtype(y.dtype, np.floating):
        raise ParameterError("Audio data must be floating-point")

    if not np.isfinite(y).all():
        raise ParameterError("Audio buffer is not finite everywhere")

    return True


def hz_to_mel(frequencies: Union[float, List[float], np.ndarray],
              htk: bool=False) -> np.ndarray:
    """Convert Hz to Mels.

    Args:
        frequencies (Union[float, List[float], np.ndarray]): Frequencies in Hz.
        htk (bool, optional): Use htk scaling. Defaults to False.

    Returns:
        np.ndarray: Frequency in mels.
    """
    freq = np.asanyarray(frequencies)

    if htk:
        return 2595.0 * np.log10(1.0 + freq / 700.0)

    # Fill in the linear part
    f_min = 0.0
    f_sp = 200.0 / 3

    mels = (freq - f_min) / f_sp

    # Fill in the log-scale part

    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    logstep = np.log(6.4) / 27.0  # step size for log region

    if freq.ndim:
        # If we have array data, vectorize
        log_t = freq >= min_log_hz
        mels[log_t] = min_log_mel + \
            np.log(freq[log_t] / min_log_hz) / logstep
    elif freq >= min_log_hz:
        # If we have scalar data, heck directly
        mels = min_log_mel + np.log(freq / min_log_hz) / logstep

    return mels


def mel_to_hz(mels: Union[float, List[float], np.ndarray],
              htk: int=False) -> np.ndarray:
    """Convert mel bin numbers to frequencies.

    Args:
        mels (Union[float, List[float], np.ndarray]): Frequency in mels.
        htk (bool, optional): Use htk scaling. Defaults to False.

    Returns:
        np.ndarray: Frequencies in Hz.
    """
    mel_array = np.asanyarray(mels)

    if htk:
        return 700.0 * (10.0**(mel_array / 2595.0) - 1.0)

    # Fill in the linear scale
    f_min = 0.0
    f_sp = 200.0 / 3
    freqs = f_min + f_sp * mel_array

    # And now the nonlinear scale
    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    logstep = np.log(6.4) / 27.0  # step size for log region

    if mel_array.ndim:
        # If we have vector data, vectorize
        log_t = mel_array >= min_log_mel
        freqs[log_t] = min_log_hz * \
            np.exp(logstep * (mel_array[log_t] - min_log_mel))
    elif mel_array >= min_log_mel:
        # If we have scalar data, check directly
        freqs = min_log_hz * np.exp(logstep * (mel_array - min_log_mel))

    return freqs


def mel_frequencies(n_mels: int=128,
                    fmin: float=0.0,
                    fmax: float=11025.0,
                    htk: bool=False) -> np.ndarray:
    """Compute mel frequencies.

    Args:
        n_mels (int, optional): Number of mel bins. Defaults to 128.
        fmin (float, optional): Minimum frequency in Hz. Defaults to 0.0.
        fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0.
        htk (bool, optional): Use htk scaling. Defaults to False.

    Returns:
        np.ndarray: Vector of n_mels frequencies in Hz with shape `(n_mels,)`.
    """
    # 'Center freqs' of mel bands - uniformly spaced between limits
    min_mel = hz_to_mel(fmin, htk=htk)
    max_mel = hz_to_mel(fmax, htk=htk)

    mels = np.linspace(min_mel, max_mel, n_mels)

    return mel_to_hz(mels, htk=htk)


def fft_frequencies(sr: int, n_fft: int) -> np.ndarray:
    """Compute fourier frequencies.

    Args:
        sr (int): Sample rate.
        n_fft (int): FFT size.

    Returns:
        np.ndarray: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`.
    """
    return np.linspace(0, float(sr) / 2, int(1 + n_fft // 2), endpoint=True)


def compute_fbank_matrix(sr: int,
                         n_fft: int,
                         n_mels: int=128,
                         fmin: float=0.0,
                         fmax: Optional[float]=None,
                         htk: bool=False,
                         norm: str="slaney",
                         dtype: type=np.float32) -> np.ndarray:
    """Compute fbank matrix.

    Args:
        sr (int): Sample rate.
        n_fft (int): FFT size.
        n_mels (int, optional): Number of mel bins. Defaults to 128.
        fmin (float, optional): Minimum frequency in Hz. Defaults to 0.0.
        fmax (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
        htk (bool, optional): Use htk scaling. Defaults to False.
        norm (str, optional): Type of normalization. Defaults to "slaney".
        dtype (type, optional): Data type. Defaults to np.float32.


    Returns:
        np.ndarray: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`.
    """
    if norm != "slaney":
        raise ParameterError('norm must set to slaney')

    if fmax is None:
        fmax = float(sr) / 2

    # Initialize the weights
    n_mels = int(n_mels)
    weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)

    # Center freqs of each FFT bin
    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    mel_f = mel_frequencies(n_mels + 2, fmin=fmin, fmax=fmax, htk=htk)

    fdiff = np.diff(mel_f)
    ramps = np.subtract.outer(mel_f, fftfreqs)

    for i in range(n_mels):
        # lower and upper slopes for all bins
        lower = -ramps[i] / fdiff[i]
        upper = ramps[i + 2] / fdiff[i + 1]

        # .. then intersect them with each other and zero
        weights[i] = np.maximum(0, np.minimum(lower, upper))

    if norm == "slaney":
        # Slaney-style mel is scaled to be approx constant energy per channel
        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
        weights *= enorm[:, np.newaxis]

    # Only check weights if f_mel[0] is positive
    if not np.all((mel_f[:-2] == 0) | (weights.max(axis=1) > 0)):
        # This means we have an empty channel somewhere
        warnings.warn("Empty filters detected in mel frequency basis. "
                      "Some channels will produce empty responses. "
                      "Try increasing your sampling rate (and fmax) or "
                      "reducing n_mels.")

    return weights


def stft(x: np.ndarray,
         n_fft: int=2048,
         hop_length: Optional[int]=None,
         win_length: Optional[int]=None,
         window: str="hann",
         center: bool=True,
         dtype: type=np.complex64,
         pad_mode: str="reflect") -> np.ndarray:
    """Short-time Fourier transform (STFT).

    Args:
        x (np.ndarray): Input waveform in one dimension.
        n_fft (int, optional): FFT size. Defaults to 2048.
        hop_length (Optional[int], optional): Number of steps to advance between adjacent windows. Defaults to None.
        win_length (Optional[int], optional): The size of window. Defaults to None.
        window (str, optional): A string of window specification. Defaults to "hann".
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        dtype (type, optional): Data type of STFT results. Defaults to np.complex64.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".

    Returns:
        np.ndarray: The complex STFT output with shape `(n_fft//2 + 1, num_frames)`.
    """
    _check_audio(x)

    # By default, use the entire frame
    if win_length is None:
        win_length = n_fft

    # Set the default hop, if it's not already specified
    if hop_length is None:
        hop_length = int(win_length // 4)

    fft_window = signal.get_window(window, win_length, fftbins=True)

    # Pad the window out to n_fft size
    fft_window = _pad_center(fft_window, n_fft)

    # Reshape so that the window can be broadcast
    fft_window = fft_window.reshape((-1, 1))

    # Pad the time series so that frames are centered
    if center:
        if n_fft > x.shape[-1]:
            warnings.warn(
                f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}"
            )
        x = np.pad(x, int(n_fft // 2), mode=pad_mode)

    elif n_fft > x.shape[-1]:
        raise ParameterError(
            f"n_fft={n_fft} is too small for input signal of length={x.shape[-1]}"
        )

    # Window the time series.
    x_frames = _split_frames(x, frame_length=n_fft, hop_length=hop_length)
    # Pre-allocate the STFT matrix
    stft_matrix = np.empty(
        (int(1 + n_fft // 2), x_frames.shape[1]), dtype=dtype, order="F")
    fft = np.fft  # use numpy fft as default
    # Constrain STFT block sizes to 256 KB
    MAX_MEM_BLOCK = 2**8 * 2**10
    # how many columns can we fit within MAX_MEM_BLOCK?
    n_columns = MAX_MEM_BLOCK // (stft_matrix.shape[0] * stft_matrix.itemsize)
    n_columns = max(n_columns, 1)

    for bl_s in range(0, stft_matrix.shape[1], n_columns):
        bl_t = min(bl_s + n_columns, stft_matrix.shape[1])
        stft_matrix[:, bl_s:bl_t] = fft.rfft(
            fft_window * x_frames[:, bl_s:bl_t], axis=0)

    return stft_matrix


def power_to_db(spect: np.ndarray,
                ref: float=1.0,
                amin: float=1e-10,
                top_db: Optional[float]=80.0) -> np.ndarray:
    """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way.

    Args:
        spect (np.ndarray): STFT power spectrogram of an input waveform.
        ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
        amin (float, optional): Minimum threshold. Defaults to 1e-10.
        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to 80.0.

    Returns:
        np.ndarray: Power spectrogram in db scale.
    """
    spect = np.asarray(spect)

    if amin <= 0:
        raise ParameterError("amin must be strictly positive")

    if np.issubdtype(spect.dtype, np.complexfloating):
        warnings.warn(
            "power_to_db was called on complex input so phase "
            "information will be discarded. To suppress this warning, "
            "call power_to_db(np.abs(D)**2) instead.")
        magnitude = np.abs(spect)
    else:
        magnitude = spect

    if callable(ref):
        # User supplied a function to calculate reference power
        ref_value = ref(magnitude)
    else:
        ref_value = np.abs(ref)

    log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))
    log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value))

    if top_db is not None:
        if top_db < 0:
            raise ParameterError("top_db must be non-negative")
        log_spec = np.maximum(log_spec, log_spec.max() - top_db)

    return log_spec


def mfcc(x: np.ndarray,
         sr: int=16000,
         spect: Optional[np.ndarray]=None,
         n_mfcc: int=20,
         dct_type: int=2,
         norm: str="ortho",
         lifter: int=0,
         **kwargs) -> np.ndarray:
    """Mel-frequency cepstral coefficients (MFCCs)

    Args:
        x (np.ndarray): Input waveform in one dimension.
        sr (int, optional): Sample rate. Defaults to 16000.
        spect (Optional[np.ndarray], optional): Input log-power Mel spectrogram. Defaults to None.
        n_mfcc (int, optional): Number of cepstra in MFCC. Defaults to 20.
        dct_type (int, optional): Discrete cosine transform (DCT) type. Defaults to 2.
        norm (str, optional): Type of normalization. Defaults to "ortho".
        lifter (int, optional): Cepstral filtering. Defaults to 0.

    Returns:
        np.ndarray: Mel frequency cepstral coefficients array with shape `(n_mfcc, num_frames)`.
    """
    if spect is None:
        spect = melspectrogram(x, sr=sr, **kwargs)

    M = scipy.fftpack.dct(spect, axis=0, type=dct_type, norm=norm)[:n_mfcc]

    if lifter > 0:
        factor = np.sin(np.pi * np.arange(1, 1 + n_mfcc, dtype=M.dtype) /
                        lifter)
        return M * factor[:, np.newaxis]
    elif lifter == 0:
        return M
    else:
        raise ParameterError(
            f"MFCC lifter={lifter} must be a non-negative number")


def melspectrogram(x: np.ndarray,
                   sr: int=16000,
                   window_size: int=512,
                   hop_length: int=320,
                   n_mels: int=64,
                   fmin: float=50.0,
                   fmax: Optional[float]=None,
                   window: str='hann',
                   center: bool=True,
                   pad_mode: str='reflect',
                   power: float=2.0,
                   to_db: bool=True,
                   ref: float=1.0,
                   amin: float=1e-10,
                   top_db: Optional[float]=None) -> np.ndarray:
    """Compute mel-spectrogram.

    Args:
        x (np.ndarray): Input waveform in one dimension.
        sr (int, optional): Sample rate. Defaults to 16000.
        window_size (int, optional): Size of FFT and window length. Defaults to 512.
        hop_length (int, optional): Number of steps to advance between adjacent windows. Defaults to 320.
        n_mels (int, optional): Number of mel bins. Defaults to 64.
        fmin (float, optional): Minimum frequency in Hz. Defaults to 50.0.
        fmax (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
        window (str, optional): A string of window specification. Defaults to "hann".
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".
        power (float, optional): Exponent for the magnitude melspectrogram. Defaults to 2.0.
        to_db (bool, optional): Enable db scale. Defaults to True.
        ref (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
        amin (float, optional): Minimum threshold. Defaults to 1e-10.
        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None.

    Returns:
        np.ndarray: The mel-spectrogram in power scale or db scale with shape `(n_mels, num_frames)`.
    """
    _check_audio(x, mono=True)
    if len(x) <= 0:
        raise ParameterError('The input waveform is empty')

    if fmax is None:
        fmax = sr // 2
    if fmin < 0 or fmin >= fmax:
        raise ParameterError('fmin and fmax must statisfy 0<fmin<fmax')

    s = stft(
        x,
        n_fft=window_size,
        hop_length=hop_length,
        win_length=window_size,
        window=window,
        center=center,
        pad_mode=pad_mode)

    spect_power = np.abs(s)**power
    fb_matrix = compute_fbank_matrix(
        sr=sr, n_fft=window_size, n_mels=n_mels, fmin=fmin, fmax=fmax)
    mel_spect = np.matmul(fb_matrix, spect_power)
    if to_db:
        return power_to_db(mel_spect, ref=ref, amin=amin, top_db=top_db)
    else:
        return mel_spect


def spectrogram(x: np.ndarray,
                sr: int=16000,
                window_size: int=512,
                hop_length: int=320,
                window: str='hann',
                center: bool=True,
                pad_mode: str='reflect',
                power: float=2.0) -> np.ndarray:
    """Compute spectrogram.

    Args:
        x (np.ndarray): Input waveform in one dimension.
        sr (int, optional): Sample rate. Defaults to 16000.
        window_size (int, optional): Size of FFT and window length. Defaults to 512.
        hop_length (int, optional): Number of steps to advance between adjacent windows. Defaults to 320.
        window (str, optional): A string of window specification. Defaults to "hann".
        center (bool, optional): Whether to pad `x` to make that the :math:`t \times hop\\_length` at the center of `t`-th frame. Defaults to True.
        pad_mode (str, optional): Choose padding pattern when `center` is `True`. Defaults to "reflect".
        power (float, optional): Exponent for the magnitude melspectrogram. Defaults to 2.0.

    Returns:
        np.ndarray: The STFT spectrogram in power scale `(n_fft//2 + 1, num_frames)`.
    """

    s = stft(
        x,
        n_fft=window_size,
        hop_length=hop_length,
        win_length=window_size,
        window=window,
        center=center,
        pad_mode=pad_mode)

    return np.abs(s)**power


def mu_encode(x: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray:
    """Mu-law encoding. Encode waveform based on mu-law companding. When quantized is True, the result will be converted to integer in range `[0,mu-1]`. Otherwise, the resulting waveform is in range `[-1,1]`.

    Args:
        x (np.ndarray): The input waveform to encode.
        mu (int, optional): The endoceding parameter. Defaults to 255.
        quantized (bool, optional): If `True`, quantize the encoded values into `1 + mu` distinct integer values. Defaults to True.

    Returns:
        np.ndarray: The mu-law encoded waveform.
    """
    mu = 255
    y = np.sign(x) * np.log1p(mu * np.abs(x)) / np.log1p(mu)
    if quantized:
        y = np.floor((y + 1) / 2 * mu + 0.5)  # convert to [0 , mu-1]
    return y


def mu_decode(y: np.ndarray, mu: int=255, quantized: bool=True) -> np.ndarray:
    """Mu-law decoding. Compute the mu-law decoding given an input code. It assumes that the input `y` is in range `[0,mu-1]` when quantize is True and `[-1,1]` otherwise.

    Args:
        y (np.ndarray): The encoded waveform.
        mu (int, optional): The endoceding parameter. Defaults to 255.
        quantized (bool, optional): If `True`, the input is assumed to be quantized to `1 + mu` distinct integer values. Defaults to True.

    Returns:
        np.ndarray: The mu-law decoded waveform.
    """
    if mu < 1:
        raise ParameterError('mu is typically set as 2**k-1, k=1, 2, 3,...')

    mu = mu - 1
    if quantized:  # undo the quantization
        y = y * 2 / mu - 1
    x = np.sign(y) / mu * ((1 + mu)**np.abs(y) - 1)
    return x


def _randint(high: int) -> int:
    """Generate one random integer in range [0 high)

     This is a helper function for random data augmentation
    """
    return int(np.random.randint(0, high=high))


def depth_augment(y: np.ndarray,
                  choices: List=['int8', 'int16'],
                  probs: List[float]=[0.5, 0.5]) -> np.ndarray:
    """ Audio depth augmentation. Do audio depth augmentation to simulate the distortion brought by quantization.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        choices (List, optional): A list of data type to depth conversion. Defaults to ['int8', 'int16'].
        probs (List[float], optional): Probabilities to depth conversion. Defaults to [0.5, 0.5].

    Returns:
        np.ndarray: The augmented waveform.
    """
    assert len(probs) == len(
        choices
    ), 'number of choices {} must be equal to size of probs {}'.format(
        len(choices), len(probs))
    depth = np.random.choice(choices, p=probs)
    src_depth = y.dtype
    y1 = depth_convert(y, depth)
    y2 = depth_convert(y1, src_depth)

    return y2


def adaptive_spect_augment(spect: np.ndarray,
                           tempo_axis: int=0,
                           level: float=0.1) -> np.ndarray:
    """Do adaptive spectrogram augmentation. The level of the augmentation is govern by the parameter level, ranging from 0 to 1, with 0 represents no augmentation.

    Args:
        spect (np.ndarray): Input spectrogram.
        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
        level (float, optional): The level factor of masking. Defaults to 0.1.

    Returns:
        np.ndarray: The augmented spectrogram.
    """
    assert spect.ndim == 2., 'only supports 2d tensor or numpy array'
    if tempo_axis == 0:
        nt, nf = spect.shape
    else:
        nf, nt = spect.shape

    time_mask_width = int(nt * level * 0.5)
    freq_mask_width = int(nf * level * 0.5)

    num_time_mask = int(10 * level)
    num_freq_mask = int(10 * level)

    if tempo_axis == 0:
        for _ in range(num_time_mask):
            start = _randint(nt - time_mask_width)
            spect[start:start + time_mask_width, :] = 0
        for _ in range(num_freq_mask):
            start = _randint(nf - freq_mask_width)
            spect[:, start:start + freq_mask_width] = 0
    else:
        for _ in range(num_time_mask):
            start = _randint(nt - time_mask_width)
            spect[:, start:start + time_mask_width] = 0
        for _ in range(num_freq_mask):
            start = _randint(nf - freq_mask_width)
            spect[start:start + freq_mask_width, :] = 0

    return spect


def spect_augment(spect: np.ndarray,
                  tempo_axis: int=0,
                  max_time_mask: int=3,
                  max_freq_mask: int=3,
                  max_time_mask_width: int=30,
                  max_freq_mask_width: int=20) -> np.ndarray:
    """Do spectrogram augmentation in both time and freq axis.

    Args:
        spect (np.ndarray): Input spectrogram.
        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.
        max_time_mask (int, optional): Maximum number of time masking. Defaults to 3.
        max_freq_mask (int, optional): Maximum number of frequency masking. Defaults to 3.
        max_time_mask_width (int, optional): Maximum width of time masking. Defaults to 30.
        max_freq_mask_width (int, optional): Maximum width of frequency masking. Defaults to 20.

    Returns:
        np.ndarray: The augmented spectrogram.
    """
    assert spect.ndim == 2., 'only supports 2d tensor or numpy array'
    if tempo_axis == 0:
        nt, nf = spect.shape
    else:
        nf, nt = spect.shape

    num_time_mask = _randint(max_time_mask)
    num_freq_mask = _randint(max_freq_mask)

    time_mask_width = _randint(max_time_mask_width)
    freq_mask_width = _randint(max_freq_mask_width)

    if tempo_axis == 0:
        for _ in range(num_time_mask):
            start = _randint(nt - time_mask_width)
            spect[start:start + time_mask_width, :] = 0
        for _ in range(num_freq_mask):
            start = _randint(nf - freq_mask_width)
            spect[:, start:start + freq_mask_width] = 0
    else:
        for _ in range(num_time_mask):
            start = _randint(nt - time_mask_width)
            spect[:, start:start + time_mask_width] = 0
        for _ in range(num_freq_mask):
            start = _randint(nf - freq_mask_width)
            spect[start:start + freq_mask_width, :] = 0

    return spect


def random_crop1d(y: np.ndarray, crop_len: int) -> np.ndarray:
    """ Random cropping on a input waveform.

    Args:
        y (np.ndarray): Input waveform array in 1D.
        crop_len (int): Length of waveform to crop.

    Returns:
        np.ndarray: The cropped waveform.
    """
    if y.ndim != 1:
        'only accept 1d tensor or numpy array'
    n = len(y)
    idx = _randint(n - crop_len)
    return y[idx:idx + crop_len]


def random_crop2d(s: np.ndarray, crop_len: int,
                  tempo_axis: int=0) -> np.ndarray:
    """ Random cropping on a spectrogram.

    Args:
        s (np.ndarray): Input spectrogram in 2D.
        crop_len (int): Length of spectrogram to crop.
        tempo_axis (int, optional): Indicate the tempo axis. Defaults to 0.

    Returns:
        np.ndarray: The cropped spectrogram.
    """
    if tempo_axis >= s.ndim:
        raise ParameterError('axis out of range')

    n = s.shape[tempo_axis]
    idx = _randint(high=n - crop_len)
    sli = [slice(None) for i in range(s.ndim)]
    sli[tempo_axis] = slice(idx, idx + crop_len)
    out = s[tuple(sli)]
    return out


================================================
FILE: paddlespeech/audio/datasets/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .esc50 import ESC50
from .voxceleb import VoxCeleb


================================================
FILE: paddlespeech/audio/datasets/dataset.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List

import numpy as np
import paddle

from ..backends.soundfile_backend import soundfile_load as load_audio
from ..compliance.kaldi import fbank as kaldi_fbank
from ..compliance.kaldi import mfcc as kaldi_mfcc
from ..compliance.librosa import melspectrogram
from ..compliance.librosa import mfcc

feat_funcs = {
    'raw': None,
    'melspectrogram': melspectrogram,
    'mfcc': mfcc,
    'kaldi_fbank': kaldi_fbank,
    'kaldi_mfcc': kaldi_mfcc,
}


class AudioClassificationDataset(paddle.io.Dataset):
    """
    Base class of audio classification dataset.
    """

    def __init__(self,
                 files: List[str],
                 labels: List[int],
                 feat_type: str='raw',
                 sample_rate: int=None,
                 **kwargs):
        """
        Ags:
            files (:obj:`List[str]`): A list of absolute path of audio files.
            labels (:obj:`List[int]`): Labels of audio files.
            feat_type (:obj:`str`, `optional`, defaults to `raw`):
                It identifies the feature type that user wants to extract of an audio file.
        """
        super(AudioClassificationDataset, self).__init__()

        if feat_type not in feat_funcs.keys():
            raise RuntimeError(
                f"Unknown feat_type: {feat_type}, it must be one in {list(feat_funcs.keys())}"
            )

        self.files = files
        self.labels = labels

        self.feat_type = feat_type
        self.sample_rate = sample_rate
        self.feat_config = kwargs  # Pass keyword arguments to customize feature config

    def _get_data(self, input_file: str):
        raise NotImplementedError

    def _convert_to_record(self, idx):
        file, label = self.files[idx], self.labels[idx]

        if self.sample_rate is None:
            waveform, sample_rate = load_audio(file)
        else:
            waveform, sample_rate = load_audio(file, sr=self.sample_rate)

        feat_func = feat_funcs[self.feat_type]

        record = {}
        if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']:
            waveform = paddle.to_tensor(waveform).unsqueeze(0)  # (C, T)
            record['feat'] = feat_func(
                waveform=waveform, sr=self.sample_rate, **self.feat_config)
        else:
            record['feat'] = feat_func(
                waveform, sample_rate,
                **self.feat_config) if feat_func else waveform
        record['label'] = label
        return record

    def __getitem__(self, idx):
        record = self._convert_to_record(idx)
        if self.feat_type in ['kaldi_fbank', 'kaldi_mfcc']:
            return self.keys[idx], record['feat'], record['label']
        else:
            return np.array(record['feat']).transpose(), np.array(
                record['label'], dtype=np.int64)

    def __len__(self):
        return len(self.files)


================================================
FILE: paddlespeech/audio/datasets/esc50.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import os
from typing import List
from typing import Tuple

from ...utils.env import DATA_HOME
from ..utils.download import download_and_decompress
from .dataset import AudioClassificationDataset

__all__ = ['ESC50']


class ESC50(AudioClassificationDataset):
    """
    The ESC-50 dataset is a labeled collection of 2000 environmental audio recordings
    suitable for benchmarking methods of environmental sound classification. The dataset
    consists of 5-second-long recordings organized into 50 semantical classes (with
    40 examples per class)

    Reference:
        ESC: Dataset for Environmental Sound Classification
        http://dx.doi.org/10.1145/2733373.2806390
    """

    archieves = [
        {
            'url':
            'https://paddleaudio.bj.bcebos.com/datasets/ESC-50-master.zip',
            'md5': '7771e4b9d86d0945acce719c7a59305a',
        },
    ]
    label_list = [
        # Animals
        'Dog',
        'Rooster',
        'Pig',
        'Cow',
        'Frog',
        'Cat',
        'Hen',
        'Insects (flying)',
        'Sheep',
        'Crow',
        # Natural soundscapes & water sounds
        'Rain',
        'Sea waves',
        'Crackling fire',
        'Crickets',
        'Chirping birds',
        'Water drops',
        'Wind',
        'Pouring water',
        'Toilet flush',
        'Thunderstorm',
        # Human, non-speech sounds
        'Crying baby',
        'Sneezing',
        'Clapping',
        'Breathing',
        'Coughing',
        'Footsteps',
        'Laughing',
        'Brushing teeth',
        'Snoring',
        'Drinking, sipping',
        # Interior/domestic sounds
        'Door knock',
        'Mouse click',
        'Keyboard typing',
        'Door, wood creaks',
        'Can opening',
        'Washing machine',
        'Vacuum cleaner',
        'Clock alarm',
        'Clock tick',
        'Glass breaking',
        # Exterior/urban noises
        'Helicopter',
        'Chainsaw',
        'Siren',
        'Car horn',
        'Engine',
        'Train',
        'Church bells',
        'Airplane',
        'Fireworks',
        'Hand saw',
    ]
    meta = os.path.join('ESC-50-master', 'meta', 'esc50.csv')
    meta_info = collections.namedtuple(
        'META_INFO',
        ('filename', 'fold', 'target', 'category', 'esc10', 'src_file', 'take'))
    audio_path = os.path.join('ESC-50-master', 'audio')

    def __init__(self,
                 mode: str='train',
                 split: int=1,
                 feat_type: str='raw',
                 **kwargs):
        """
        Ags:
            mode (:obj:`str`, `optional`, defaults to `train`):
                It identifies the dataset mode (train or dev).
            split (:obj:`int`, `optional`, defaults to 1):
                It specify the fold of dev dataset.
            feat_type (:obj:`str`, `optional`, defaults to `raw`):
                It identifies the feature type that user wants to extract of an audio file.
        """
        files, labels = self._get_data(mode, split)
        super(ESC50, self).__init__(
            files=files, labels=labels, feat_type=feat_type, **kwargs)

    def _get_meta_info(self) -> List[collections.namedtuple]:
        ret = []
        with open(os.path.join(DATA_HOME, self.meta), 'r') as rf:
            for line in rf.readlines()[1:]:
                ret.append(self.meta_info(*line.strip().split(',')))
        return ret

    def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
            download_and_decompress(self.archieves, DATA_HOME)

        meta_info = self._get_meta_info()

        files = []
        labels = []
        for sample in meta_info:
            filename, fold, target, _, _, _, _ = sample
            if mode == 'train' and int(fold) != split:
                files.append(os.path.join(DATA_HOME, self.audio_path, filename))
                labels.append(int(target))

            if mode != 'train' and int(fold) == split:
                files.append(os.path.join(DATA_HOME, self.audio_path, filename))
                labels.append(int(target))

        return files, labels


================================================
FILE: paddlespeech/audio/datasets/voxceleb.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import csv
import glob
import os
import random
from multiprocessing import cpu_count
from typing import List

from paddle.io import Dataset
from pathos.multiprocessing import Pool
from tqdm import tqdm

from ...utils.env import DATA_HOME
from ..backends.soundfile_backend import soundfile_load as load_audio
from ..utils.download import decompress
from ..utils.download import download_and_decompress
from .dataset import feat_funcs

__all__ = ['VoxCeleb']


class VoxCeleb(Dataset):
    source_url = 'https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/'
    archieves_audio_dev = [
        {
            'url': source_url + 'vox1_dev_wav_partaa',
            'md5': 'e395d020928bc15670b570a21695ed96',
        },
        {
            'url': source_url + 'vox1_dev_wav_partab',
            'md5': 'bbfaaccefab65d82b21903e81a8a8020',
        },
        {
            'url': source_url + 'vox1_dev_wav_partac',
            'md5': '017d579a2a96a077f40042ec33e51512',
        },
        {
            'url': source_url + 'vox1_dev_wav_partad',
            'md5': '7bb1e9f70fddc7a678fa998ea8b3ba19',
        },
    ]
    archieves_audio_test = [
        {
            'url': source_url + 'vox1_test_wav.zip',
            'md5': '185fdc63c3c739954633d50379a3d102',
        },
    ]
    archieves_meta = [
        {
            'url':
            'https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt',
            'md5':
            'b73110731c9223c1461fe49cb48dddfc',
        },
    ]

    num_speakers = 1211  # 1211 vox1, 5994 vox2, 7205 vox1+2, test speakers: 41
    sample_rate = 16000
    meta_info = collections.namedtuple(
        'META_INFO', ('id', 'duration', 'wav', 'start', 'stop', 'spk_id'))
    base_path = os.path.join(DATA_HOME, 'vox1')
    wav_path = os.path.join(base_path, 'wav')
    meta_path = os.path.join(base_path, 'meta')
    veri_test_file = os.path.join(meta_path, 'veri_test2.txt')
    csv_path = os.path.join(base_path, 'csv')
    subsets = ['train', 'dev', 'enroll', 'test']

    def __init__(
            self,
            subset: str='train',
            feat_type: str='raw',
            random_chunk: bool=True,
            chunk_duration: float=3.0,  # seconds
            split_ratio: float=0.9,  # train split ratio
            seed: int=0,
            target_dir: str=None,
            vox2_base_path=None,
            **kwargs):
        """VoxCeleb data prepare and get the specific dataset audio info

        Args:
            subset (str, optional): dataset name, such as train, dev, enroll or test. Defaults to 'train'.
            feat_type (str, optional): feat type, such raw, melspectrogram(fbank) or mfcc . Defaults to 'raw'.
            random_chunk (bool, optional): random select a duration from audio. Defaults to True.
            chunk_duration (float, optional): chunk duration if random_chunk flag is set. Defaults to 3.0.
            target_dir (str, optional): data dir, audio info will be stored in this directory. Defaults to None.
            vox2_base_path (_type_, optional): vox2 directory. vox2 data must be converted from m4a to wav. Defaults to None.
        """
        assert subset in self.subsets, \
            'Dataset subset must be one in {}, but got {}'.format(self.subsets, subset)

        self.subset = subset
        self.spk_id2label = {}
        self.feat_type = feat_type
        self.feat_config = kwargs
        self.random_chunk = random_chunk
        self.chunk_duration = chunk_duration
        self.split_ratio = split_ratio
        self.target_dir = target_dir if target_dir else VoxCeleb.base_path
        self.vox2_base_path = vox2_base_path

        # if we set the target dir, we will change the vox data info data from base path to target dir
        VoxCeleb.csv_path = os.path.join(
            target_dir, "voxceleb", 'csv') if target_dir else VoxCeleb.csv_path
        VoxCeleb.meta_path = os.path.join(
            target_dir, "voxceleb",
            'meta') if target_dir else VoxCeleb.meta_path
        VoxCeleb.veri_test_file = os.path.join(VoxCeleb.meta_path,
                                               'veri_test2.txt')
        # self._data = self._get_data()[:1000]  # KP: Small dataset test.
        self._data = self._get_data()
        super(VoxCeleb, self).__init__()

        # Set up a seed to reproduce training or predicting result.
        # random.seed(seed)

    def _get_data(self):
        # Download audio files.
        # We need the users to decompress all vox1/dev/wav and vox1/test/wav/ to vox1/wav/ dir
        # so, we check the vox1/wav dir status
        print(f"wav base path: {self.wav_path}")
        if not os.path.isdir(self.wav_path):
            print("start to download the voxceleb1 dataset")
            download_and_decompress(  # multi-zip parts concatenate to vox1_dev_wav.zip
                self.archieves_audio_dev,
                self.base_path,
                decompress=False)
            download_and_decompress(  # download the vox1_test_wav.zip and unzip
                self.archieves_audio_test,
                self.base_path,
                decompress=True)

            # Download all parts and concatenate the files into one zip file.
            dev_zipfile = os.path.join(self.base_path, 'vox1_dev_wav.zip')
            print(f'Concatenating all parts to: {dev_zipfile}')
            os.system(
                f'cat {os.path.join(self.base_path, "vox1_dev_wav_parta*")} > {dev_zipfile}'
            )

            # Extract all audio files of dev and test set.
            decompress(dev_zipfile, self.base_path)

        # Download meta files.
        if not os.path.isdir(self.meta_path):
            print("prepare the meta data")
            download_and_decompress(
                self.archieves_meta, self.meta_path, decompress=False)

        # Data preparation.
        if not os.path.isdir(self.csv_path):
            os.makedirs(self.csv_path)
            self.prepare_data()

        data = []
        print(
            f"read the {self.subset} from {os.path.join(self.csv_path, f'{self.subset}.csv')}"
        )
        with open(os.path.join(self.csv_path, f'{self.subset}.csv'), 'r') as rf:
            for line in rf.readlines()[1:]:
                audio_id, duration, wav, start, stop, spk_id = line.strip(
                ).split(',')
                data.append(
                    self.meta_info(audio_id,
                                   float(duration), wav,
                                   int(start), int(stop), spk_id))

        with open(os.path.join(self.meta_path, 'spk_id2label.txt'), 'r') as f:
            for line in f.readlines():
                spk_id, label = line.strip().split(' ')
                self.spk_id2label[spk_id] = int(label)

        return data

    def _convert_to_record(self, idx: int):
        sample = self._data[idx]

        record = {}
        # To show all fields in a namedtuple: `type(sample)._fields`
        for field in type(sample)._fields:
            record[field] = getattr(sample, field)

        waveform, sr = load_audio(record['wav'])

        # random select a chunk audio samples from the audio
        if self.random_chunk:
            num_wav_samples = waveform.shape[0]
            num_chunk_samples = int(self.chunk_duration * sr)
            start = random.randint(0, num_wav_samples - num_chunk_samples - 1)
            stop = start + num_chunk_samples
        else:
            start = record['start']
            stop = record['stop']

        waveform = waveform[start:stop]

        assert self.feat_type in feat_funcs.keys(), \
            f"Unknown feat_type: {self.feat_type}, it must be one in {list(feat_funcs.keys())}"
        feat_func = feat_funcs[self.feat_type]
        feat = feat_func(
            waveform, sr=sr, **self.feat_config) if feat_func else waveform

        record.update({'feat': feat})
        if self.subset in ['train',
                           'dev']:  # Labels are available in train and dev.
            record.update({'label': self.spk_id2label[record['spk_id']]})

        return record

    @staticmethod
    def _get_chunks(seg_dur, audio_id, audio_duration):
        num_chunks = int(audio_duration / seg_dur)  # all in milliseconds

        chunk_lst = [
            audio_id + "_" + str(i * seg_dur) + "_" + str(i * seg_dur + seg_dur)
            for i in range(num_chunks)
        ]
        return chunk_lst

    def _get_audio_info(self, wav_file: str,
                        split_chunks: bool) -> List[List[str]]:
        waveform, sr = load_audio(wav_file)
        spk_id, sess_id, utt_id = wav_file.split("/")[-3:]
        audio_id = '-'.join([spk_id, sess_id, utt_id.split(".")[0]])
        audio_duration = waveform.shape[0] / sr

        ret = []
        if split_chunks:  # Split into pieces of self.chunk_duration seconds.
            uniq_chunks_list = self._get_chunks(self.chunk_duration, audio_id,
                                                audio_duration)

            for chunk in uniq_chunks_list:
                s, e = chunk.split("_")[-2:]  # Timestamps of start and end
                start_sample = int(float(s) * sr)
                end_sample = int(float(e) * sr)
                # id, duration, wav, start, stop, spk_id
                ret.append([
                    chunk, audio_duration, wav_file, start_sample, end_sample,
                    spk_id
                ])
        else:  # Keep whole audio.
            ret.append([
                audio_id, audio_duration, wav_file, 0, waveform.shape[0], spk_id
            ])
        return ret

    def generate_csv(self,
                     wav_files: List[str],
                     output_file: str,
                     split_chunks: bool=True):
        print(f'Generating csv: {output_file}')
        header = ["id", "duration", "wav", "start", "stop", "spk_id"]
        # Note: this may occurs c++ exception, but the program will execute fine
        # so we can ignore the exception 
        with Pool(cpu_count()) as p:
            infos = list(
                tqdm(
                    p.imap(lambda x: self._get_audio_info(x, split_chunks),
                           wav_files),
                    total=len(wav_files)))

        csv_lines = []
        for info in infos:
            csv_lines.extend(info)

        with open(output_file, mode="w") as csv_f:
            csv_writer = csv.writer(
                csv_f, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
            csv_writer.writerow(header)
            for line in csv_lines:
                csv_writer.writerow(line)

    def prepare_data(self):
        # Audio of speakers in veri_test_file should not be included in training set.
        print("start to prepare the data csv file")
        enroll_files = set()
        test_files = set()
        # get the enroll and test audio file path
        with open(self.veri_test_file, 'r') as f:
            for line in f.readlines():
                _, enrol_file, test_file = line.strip().split(' ')
                enroll_files.add(os.path.join(self.wav_path, enrol_file))
                test_files.add(os.path.join(self.wav_path, test_file))
            enroll_files = sorted(enroll_files)
            test_files = sorted(test_files)

        # get the enroll and test speakers
        test_spks = set()
        for file in (enroll_files + test_files):
            spk = file.split('/wav/')[1].split('/')[0]
            test_spks.add(spk)

        # get all the train and dev audios file path
        audio_files = []
        speakers = set()
        print("Getting file list...")
        for path in [self.wav_path, self.vox2_base_path]:
            # if vox2 directory is not set and vox2 is not a directory 
            # we will not process this directory
            if not path or not os.path.exists(path):
                print(f"{path} is an invalid path, please check again, "
                      "and we will ignore the vox2 base path")
                continue
            for file in glob.glob(
                    os.path.join(path, "**", "*.wav"), recursive=True):
                spk = file.split('/wav/')[1].split('/')[0]
                if spk in test_spks:
                    continue
                speakers.add(spk)
                audio_files.append(file)

        print(
            f"start to generate the {os.path.join(self.meta_path, 'spk_id2label.txt')}"
        )
        # encode the train and dev speakers label to spk_id2label.txt
        with open(os.path.join(self.meta_path, 'spk_id2label.txt'), 'w') as f:
            for label, spk_id in enumerate(
                    sorted(speakers)):  # 1211 vox1, 5994 vox2, 7205 vox1+2
                f.write(f'{spk_id} {label}\n')

        audio_files = sorted(audio_files)
        random.shuffle(audio_files)
        split_idx = int(self.split_ratio * len(audio_files))
        # split_ratio to train
        train_files, dev_files = audio_files[:split_idx], audio_files[
            split_idx:]

        self.generate_csv(train_files, os.path.join(self.csv_path, 'train.csv'))
        self.generate_csv(dev_files, os.path.join(self.csv_path, 'dev.csv'))

        self.generate_csv(
            enroll_files,
            os.path.join(self.csv_path, 'enroll.csv'),
            split_chunks=False)
        self.generate_csv(
            test_files,
            os.path.join(self.csv_path, 'test.csv'),
            split_chunks=False)

    def __getitem__(self, idx):
        return self._convert_to_record(idx)

    def __len__(self):
        return len(self._data)


================================================
FILE: paddlespeech/audio/functional/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .functional import compute_fbank_matrix
from .functional import create_dct
from .functional import fft_frequencies
from .functional import hz_to_mel
from .functional import mel_frequencies
from .functional import mel_to_hz
from .functional import power_to_db


================================================
FILE: paddlespeech/audio/functional/functional.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from librosa(https://github.com/librosa/librosa)
import math
from typing import Optional
from typing import Union

import paddle
from paddle import Tensor

__all__ = [
    'hz_to_mel',
    'mel_to_hz',
    'mel_frequencies',
    'fft_frequencies',
    'compute_fbank_matrix',
    'power_to_db',
    'create_dct',
]


def hz_to_mel(freq: Union[Tensor, float],
              htk: bool=False) -> Union[Tensor, float]:
    """Convert Hz to Mels.

    Args:
        freq (Union[Tensor, float]): The input tensor with arbitrary shape.
        htk (bool, optional): Use htk scaling. Defaults to False.

    Returns:
        Union[Tensor, float]: Frequency in mels.
    """

    if htk:
        if isinstance(freq, Tensor):
            return 2595.0 * paddle.log10(1.0 + freq / 700.0)
        else:
            return 2595.0 * math.log10(1.0 + freq / 700.0)

    # Fill in the linear part
    f_min = 0.0
    f_sp = 200.0 / 3

    mels = (freq - f_min) / f_sp

    # Fill in the log-scale part

    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    logstep = math.log(6.4) / 27.0  # step size for log region

    if isinstance(freq, Tensor):
        target = min_log_mel + paddle.log(
            freq / min_log_hz + 1e-10) / logstep  # prevent nan with 1e-10
        mask = (freq > min_log_hz).astype(freq.dtype)
        mels = target * mask + mels * (
            1 - mask)  # will replace by masked_fill OP in future
    else:
        if freq >= min_log_hz:
            mels = min_log_mel + math.log(freq / min_log_hz + 1e-10) / logstep

    return mels


def mel_to_hz(mel: Union[float, Tensor],
              htk: bool=False) -> Union[float, Tensor]:
    """Convert mel bin numbers to frequencies.

    Args:
        mel (Union[float, Tensor]): The mel frequency represented as a tensor with arbitrary shape.
        htk (bool, optional): Use htk scaling. Defaults to False.

    Returns:
        Union[float, Tensor]: Frequencies in Hz.
    """
    if htk:
        return 700.0 * (10.0**(mel / 2595.0) - 1.0)

    f_min = 0.0
    f_sp = 200.0 / 3
    freqs = f_min + f_sp * mel
    # And now the nonlinear scale
    min_log_hz = 1000.0  # beginning of log region (Hz)
    min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
    logstep = math.log(6.4) / 27.0  # step size for log region
    if isinstance(mel, Tensor):
        target = min_log_hz * paddle.exp(logstep * (mel - min_log_mel))
        mask = (mel > min_log_mel).astype(mel.dtype)
        freqs = target * mask + freqs * (
            1 - mask)  # will replace by masked_fill OP in future
    else:
        if mel >= min_log_mel:
            freqs = min_log_hz * math.exp(logstep * (mel - min_log_mel))

    return freqs


def mel_frequencies(n_mels: int=64,
                    f_min: float=0.0,
                    f_max: float=11025.0,
                    htk: bool=False,
                    dtype: str='float32') -> Tensor:
    """Compute mel frequencies.

    Args:
        n_mels (int, optional): Number of mel bins. Defaults to 64.
        f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0.
        fmax (float, optional): Maximum frequency in Hz. Defaults to 11025.0.
        htk (bool, optional): Use htk scaling. Defaults to False.
        dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'.

    Returns:
        Tensor: Tensor of n_mels frequencies in Hz with shape `(n_mels,)`.
    """
    # 'Center freqs' of mel bands - uniformly spaced between limits
    min_mel = hz_to_mel(f_min, htk=htk)
    max_mel = hz_to_mel(f_max, htk=htk)
    mels = paddle.linspace(min_mel, max_mel, n_mels, dtype=dtype)
    freqs = mel_to_hz(mels, htk=htk)
    return freqs


def fft_frequencies(sr: int, n_fft: int, dtype: str='float32') -> Tensor:
    """Compute fourier frequencies.

    Args:
        sr (int): Sample rate.
        n_fft (int): Number of fft bins.
        dtype (str, optional): The data type of the return frequencies. Defaults to 'float32'.

    Returns:
        Tensor: FFT frequencies in Hz with shape `(n_fft//2 + 1,)`.
    """
    return paddle.linspace(0, float(sr) / 2, int(1 + n_fft // 2), dtype=dtype)


def compute_fbank_matrix(sr: int,
                         n_fft: int,
                         n_mels: int=64,
                         f_min: float=0.0,
                         f_max: Optional[float]=None,
                         htk: bool=False,
                         norm: Union[str, float]='slaney',
                         dtype: str='float32') -> Tensor:
    """Compute fbank matrix.

    Args:
        sr (int): Sample rate.
        n_fft (int): Number of fft bins.
        n_mels (int, optional): Number of mel bins. Defaults to 64.
        f_min (float, optional): Minimum frequency in Hz. Defaults to 0.0.
        f_max (Optional[float], optional): Maximum frequency in Hz. Defaults to None.
        htk (bool, optional): Use htk scaling. Defaults to False.
        norm (Union[str, float], optional): Type of normalization. Defaults to 'slaney'.
        dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.

    Returns:
        Tensor: Mel transform matrix with shape `(n_mels, n_fft//2 + 1)`.
    """

    if f_max is None:
        f_max = float(sr) / 2

    # Initialize the weights
    weights = paddle.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)

    # Center freqs of each FFT bin
    fftfreqs = fft_frequencies(sr=sr, n_fft=n_fft, dtype=dtype)

    # 'Center freqs' of mel bands - uniformly spaced between limits
    mel_f = mel_frequencies(
        n_mels + 2, f_min=f_min, f_max=f_max, htk=htk, dtype=dtype)

    fdiff = mel_f[1:] - mel_f[:-1]  #np.diff(mel_f)
    ramps = mel_f.unsqueeze(1) - fftfreqs.unsqueeze(0)
    #ramps = np.subtract.outer(mel_f, fftfreqs)

    for i in range(n_mels):
        # lower and upper slopes for all bins
        lower = -ramps[i] / fdiff[i]
        upper = ramps[i + 2] / fdiff[i + 1]

        # .. then intersect them with each other and zero
        weights[i] = paddle.maximum(
            paddle.zeros_like(lower), paddle.minimum(lower, upper))

    # Slaney-style mel is scaled to be approx constant energy per channel
    if norm == 'slaney':
        enorm = 2.0 / (mel_f[2:n_mels + 2] - mel_f[:n_mels])
        weights *= enorm.unsqueeze(1)
    elif isinstance(norm, int) or isinstance(norm, float):
        weights = paddle.nn.functional.normalize(weights, p=norm, axis=-1)

    return weights


def power_to_db(spect: Tensor,
                ref_value: float=1.0,
                amin: float=1e-10,
                top_db: Optional[float]=None) -> Tensor:
    """Convert a power spectrogram (amplitude squared) to decibel (dB) units. The function computes the scaling `10 * log10(x / ref)` in a numerically stable way.

    Args:
        spect (Tensor): STFT power spectrogram.
        ref_value (float, optional): The reference value. If smaller than 1.0, the db level of the signal will be pulled up accordingly. Otherwise, the db level is pushed down. Defaults to 1.0.
        amin (float, optional): Minimum threshold. Defaults to 1e-10.
        top_db (Optional[float], optional): Threshold the output at `top_db` below the peak. Defaults to None.

    Returns:
        Tensor: Power spectrogram in db scale.
    """
    if amin <= 0:
        raise Exception("amin must be strictly positive")

    if ref_value <= 0:
        raise Exception("ref_value must be strictly positive")

    ones = paddle.ones_like(spect)
    log_spec = 10.0 * paddle.log10(paddle.maximum(ones * amin, spect))
    log_spec -= 10.0 * math.log10(max(ref_value, amin))

    if top_db is not None:
        if top_db < 0:
            raise Exception("top_db must be non-negative")
        log_spec = paddle.maximum(log_spec, ones * (log_spec.max() - top_db))

    return log_spec


def create_dct(n_mfcc: int,
               n_mels: int,
               norm: Optional[str]='ortho',
               dtype: str='float32') -> Tensor:
    """Create a discrete cosine transform(DCT) matrix.

    Args:
        n_mfcc (int): Number of mel frequency cepstral coefficients. 
        n_mels (int): Number of mel filterbanks.
        norm (Optional[str], optional): Normalization type. Defaults to 'ortho'.
        dtype (str, optional): The data type of the return matrix. Defaults to 'float32'.

    Returns:
        Tensor: The DCT matrix with shape `(n_mels, n_mfcc)`.
    """
    n = paddle.arange(n_mels, dtype=dtype)
    k = paddle.arange(n_mfcc, dtype=dtype).unsqueeze(1)
    dct = paddle.cos(math.pi / float(n_mels) * (n + 0.5) *
                     k)  # size (n_mfcc, n_mels)
    if norm is None:
        dct *= 2.0
    else:
        assert norm == "ortho"
        dct[0] *= 1.0 / math.sqrt(2.0)
        dct *= math.sqrt(2.0 / float(n_mels))
    return dct.T


================================================
FILE: paddlespeech/audio/functional/window.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
import math
from typing import List
from typing import Tuple
from typing import Union

import paddle
from paddle import Tensor


class WindowFunctionRegister(object):
    def __init__(self):
        self._functions_dict = dict()

    def register(self):
        def add_subfunction(func):
            name = func.__name__
            self._functions_dict[name] = func
            return func

        return add_subfunction

    def get(self, name):
        return self._functions_dict[name]


window_function_register = WindowFunctionRegister()


@window_function_register.register()
def _cat(x: List[Tensor], data_type: str) -> Tensor:
    l = [paddle.to_tensor(_, data_type) for _ in x]
    return paddle.concat(l)


@window_function_register.register()
def _acosh(x: Union[Tensor, float]) -> Tensor:
    if isinstance(x, float):
        return math.log(x + math.sqrt(x**2 - 1))
    return paddle.log(x + paddle.sqrt(paddle.square(x) - 1))


@window_function_register.register()
def _extend(M: int, sym: bool) -> bool:
    """Extend window by 1 sample if needed for DFT-even symmetry."""
    if not sym:
        return M + 1, True
    else:
        return M, False


@window_function_register.register()
def _len_guards(M: int) -> bool:
    """Handle small or incorrect window lengths."""
    if int(M) != M or M < 0:
        raise ValueError('Window length M must be a non-negative integer')

    return M <= 1


@window_function_register.register()
def _truncate(w: Tensor, needed: bool) -> Tensor:
    """Truncate window by 1 sample if needed for DFT-even symmetry."""
    if needed:
        return w[:-1]
    else:
        return w


@window_function_register.register()
def _general_gaussian(M: int, p, sig, sym: bool=True,
                      dtype: str='float64') -> Tensor:
    """Compute a window with a generalized Gaussian shape.
    This function is consistent with scipy.signal.windows.general_gaussian().
    """
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
    w = paddle.exp(-0.5 * paddle.abs(n / sig)**(2 * p))

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _general_cosine(M: int, a: float, sym: bool=True,
                    dtype: str='float64') -> Tensor:
    """Compute a generic weighted sum of cosine terms window.
    This function is consistent with scipy.signal.windows.general_cosine().
    """
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)
    fac = paddle.linspace(-math.pi, math.pi, M, dtype=dtype)
    w = paddle.zeros((M, ), dtype=dtype)
    for k in range(len(a)):
        w += a[k] * paddle.cos(k * fac)
    return _truncate(w, needs_trunc)


@window_function_register.register()
def _general_hamming(M: int, alpha: float, sym: bool=True,
                     dtype: str='float64') -> Tensor:
    """Compute a generalized Hamming window.
    This function is consistent with scipy.signal.windows.general_hamming()
    """
    return _general_cosine(M, [alpha, 1.0 - alpha], sym, dtype=dtype)


@window_function_register.register()
def _taylor(M: int,
            nbar=4,
            sll=30,
            norm=True,
            sym: bool=True,
            dtype: str='float64') -> Tensor:
    """Compute a Taylor window.
    The Taylor window taper function approximates the Dolph-Chebyshev window's
    constant sidelobe level for a parameterized number of near-in sidelobes.
    """
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)
    # Original text uses a negative sidelobe level parameter and then negates
    # it in the calculation of B. To keep consistent with other methods we
    # assume the sidelobe level parameter to be positive.
    B = 10**(sll / 20)
    A = _acosh(B) / math.pi
    s2 = nbar**2 / (A**2 + (nbar - 0.5)**2)
    ma = paddle.arange(1, nbar, dtype=dtype)

    Fm = paddle.empty((nbar - 1, ), dtype=dtype)
    signs = paddle.empty_like(ma)
    signs[::2] = 1
    signs[1::2] = -1
    m2 = ma * ma
    for mi in range(len(ma)):
        numer = signs[mi] * paddle.prod(1 - m2[mi] / s2 / (A**2 + (ma - 0.5)**2
                                                           ))
        if mi == 0:
            denom = 2 * paddle.prod(1 - m2[mi] / m2[mi + 1:])
        elif mi == len(ma) - 1:
            denom = 2 * paddle.prod(1 - m2[mi] / m2[:mi])
        else:
            denom = (2 * paddle.prod(1 - m2[mi] / m2[:mi]) *
                     paddle.prod(1 - m2[mi] / m2[mi + 1:]))

        Fm[mi] = numer / denom

    def W(n):
        return 1 + 2 * paddle.matmul(
            Fm.unsqueeze(0),
            paddle.cos(2 * math.pi * ma.unsqueeze(1) *
                       (n - M / 2.0 + 0.5) / M), )

    w = W(paddle.arange(0, M, dtype=dtype))

    # normalize (Note that this is not described in the original text [1])
    if norm:
        scale = 1.0 / W((M - 1) / 2)
        w *= scale
    w = w.squeeze()
    return _truncate(w, needs_trunc)


@window_function_register.register()
def _hamming(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
    """Compute a Hamming window.
    The Hamming window is a taper formed by using a raised cosine with
    non-zero endpoints, optimized to minimize the nearest side lobe.
    """
    return _general_hamming(M, 0.54, sym, dtype=dtype)


@window_function_register.register()
def _hann(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
    """Compute a Hann window.
    The Hann window is a taper formed by using a raised cosine or sine-squared
    with ends that touch zero.
    """
    return _general_hamming(M, 0.5, sym, dtype=dtype)


@window_function_register.register()
def _tukey(M: int, alpha=0.5, sym: bool=True, dtype: str='float64') -> Tensor:
    """Compute a Tukey window.
    The Tukey window is also known as a tapered cosine window.
    """
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)

    if alpha <= 0:
        return paddle.ones((M, ), dtype=dtype)
    elif alpha >= 1.0:
        return hann(M, sym=sym)

    M, needs_trunc = _extend(M, sym)

    n = paddle.arange(0, M, dtype=dtype)
    width = int(alpha * (M - 1) / 2.0)
    n1 = n[0:width + 1]
    n2 = n[width + 1:M - width - 1]
    n3 = n[M - width - 1:]

    w1 = 0.5 * (1 + paddle.cos(math.pi * (-1 + 2.0 * n1 / alpha / (M - 1))))
    w2 = paddle.ones(n2.shape, dtype=dtype)
    w3 = 0.5 * (1 + paddle.cos(math.pi * (-2.0 / alpha + 1 + 2.0 * n3 / alpha /
                                          (M - 1))))
    w = paddle.concat([w1, w2, w3])

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _gaussian(M: int, std: float, sym: bool=True,
              dtype: str='float64') -> Tensor:
    """Compute a Gaussian window.
    The Gaussian widows has a Gaussian shape defined by the standard deviation(std).
    """
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    n = paddle.arange(0, M, dtype=dtype) - (M - 1.0) / 2.0
    sig2 = 2 * std * std
    w = paddle.exp(-(n**2) / sig2)

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _exponential(M: int,
                 center=None,
                 tau=1.0,
                 sym: bool=True,
                 dtype: str='float64') -> Tensor:
    """Compute an exponential (or Poisson) window."""
    if sym and center is not None:
        raise ValueError("If sym==True, center must be None.")
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    if center is None:
        center = (M - 1) / 2

    n = paddle.arange(0, M, dtype=dtype)
    w = paddle.exp(-paddle.abs(n - center) / tau)

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _triang(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
    """Compute a triangular window."""
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    n = paddle.arange(1, (M + 1) // 2 + 1, dtype=dtype)
    if M % 2 == 0:
        w = (2 * n - 1.0) / M
        w = paddle.concat([w, w[::-1]])
    else:
        w = 2 * n / (M + 1.0)
        w = paddle.concat([w, w[-2::-1]])

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _bohman(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
    """Compute a Bohman window.
    The Bohman window is the autocorrelation of a cosine window.
    """
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)

    fac = paddle.abs(paddle.linspace(-1, 1, M, dtype=dtype)[1:-1])
    w = (1 - fac) * paddle.cos(math.pi * fac) + 1.0 / math.pi * paddle.sin(
        math.pi * fac)
    w = _cat([0, w, 0], dtype)

    return _truncate(w, needs_trunc)


@window_function_register.register()
def _blackman(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
    """Compute a Blackman window.
    The Blackman window is a taper formed by using the first three terms of
    a summation of cosines. It was designed to have close to the minimal
    leakage possible.  It is close to optimal, only slightly worse than a
    Kaiser window.
    """
    return _general_cosine(M, [0.42, 0.50, 0.08], sym, dtype=dtype)


@window_function_register.register()
def _cosine(M: int, sym: bool=True, dtype: str='float64') -> Tensor:
    """Compute a window with a simple cosine shape."""
    if _len_guards(M):
        return paddle.ones((M, ), dtype=dtype)
    M, needs_trunc = _extend(M, sym)
    w = paddle.sin(math.pi / M * (paddle.arange(0, M, dtype=dtype) + 0.5))

    return _truncate(w, needs_trunc)


def get_window(
        window: Union[str, Tuple[str, float]],
        win_length: int,
        fftbins: bool=True,
        dtype: str='float64', ) -> Tensor:
    """Return a window of a given length and type.

    Args:
        window (Union[str, Tuple[str, float]]): The window function applied to the signal before the Fourier transform. Supported window functions: 'hamming', 'hann', 'gaussian', 'general_gaussian', 'exponential', 'triang', 'bohman', 'blackman', 'cosine', 'tukey', 'taylor'.
        win_length (int): Number of samples.
        fftbins (bool, optional): If True, create a "periodic" window. Otherwise, create a "symmetric" window, for use in filter design. Defaults to True.
        dtype (str, optional): The data type of the return window. Defaults to 'float64'.

    Returns:
        Tensor: The window represented as a tensor.

    Examples:
        .. code-block:: python

            import paddle

            n_fft = 512
            cosine_window = paddle.audio.functional.get_window('cosine', n_fft)

            std = 7
            gaussian_window = paddle.audio.functional.get_window(('gaussian',std), n_fft)
    """
    sym = not fftbins

    args = ()
    if isinstance(window, tuple):
        winstr = window[0]
        if len(window) > 1:
            args = window[1:]
    elif isinstance(window, str):
        if window in ['gaussian', 'exponential']:
            raise ValueError("The '" + window + "' window needs one or "
                             "more parameters -- pass a tuple.")
        else:
            winstr = window
    else:
        raise ValueError("%s as window type is not supported." %
                         str(type(window)))

    try:
        winfunc = window_function_register.get('_' + winstr)
    except KeyError as e:
        raise ValueError("Unknown window type.") from e

    params = (win_length, ) + args
    kwargs = {'sym': sym}
    return winfunc(*params, dtype=dtype, **kwargs)


================================================
FILE: paddlespeech/audio/streamdata/__init__.py
================================================
# Copyright (c) 2017-2019 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# See the LICENSE file for licensing terms (BSD-style).
# Modified from https://github.com/webdataset/webdataset
#
# flake8: noqa
from .cache import cached_tarfile_samples
from .cache import cached_tarfile_to_samples
from .cache import lru_cleanup
from .cache import pipe_cleaner
from .compat import FluidWrapper
from .compat import WebDataset
from .compat import WebLoader
from .extradatasets import MockDataset
from .extradatasets import with_epoch
from .extradatasets import with_length
from .filters import associate
from .filters import audio_cmvn
from .filters import audio_compute_fbank
from .filters import audio_data_filter
from .filters import audio_padding
from .filters import audio_resample
from .filters import audio_spec_aug
from .filters import audio_tokenize
from .filters import batched
from .filters import decode
from .filters import detshuffle
from .filters import extract_keys
from .filters import getfirst
from .filters import info
from .filters import map
from .filters import map_dict
from .filters import map_tuple
from .filters import pipelinefilter
from .filters import placeholder
from .filters import rename
from .filters import rename_keys
from .filters import select
from .filters import shuffle
from .filters import slice
from .filters import sort
from .filters import to_tuple
from .filters import transform_with
from .filters import unbatched
from .filters import xdecode
from .handlers import ignore_and_continue
from .handlers import ignore_and_stop
from .handlers import reraise_exception
from .handlers import warn_and_continue
from .handlers import warn_and_stop
from .mix import RandomMix
from .mix import RoundRobin
from .pipeline import DataPipeline
from .shardlists import MultiShardSample
from .shardlists import non_empty
from .shardlists import resampled
from .shardlists import ResampledShards
from .shardlists import shardspec
from .shardlists import SimpleShardList
from .shardlists import single_node_only
from .shardlists import split_by_node
from .shardlists import split_by_worker
from .tariterators import tarfile_samples
from .tariterators import tarfile_to_samples
from .utils import PipelineStage
from .utils import repeatedly
from .writer import numpy_dumps
from .writer import ShardWriter
from .writer import TarWriter


================================================
FILE: paddlespeech/audio/streamdata/autodecode.py
================================================
#
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
# Modified from https://github.com/webdataset/webdataset
#
"""Automatically decode webdataset samples."""
import io
import json
import os
import pickle
import re
import tempfile
from functools import partial

import numpy as np
"""Extensions passed on to the image decoder."""
image_extensions = "jpg jpeg png ppm pgm pbm pnm".split()

################################################################
# handle basic datatypes
################################################################


def paddle_loads(data):
    """Load data using paddle.loads, importing paddle only if needed.

    :param data: data to be decoded
    """
    import io

    import paddle

    stream = io.BytesIO(data)
    return paddle.load(stream)


def tenbin_loads(data):
    from . import tenbin

    return tenbin.decode_buffer(data)


def msgpack_loads(data):
    import msgpack

    return msgpack.unpackb(data)


def npy_loads(data):
    import numpy.lib.format

    stream = io.BytesIO(data)
    return numpy.lib.format.read_array(stream)


def cbor_loads(data):
    import cbor

    return cbor.loads(data)


decoders = {
    "txt": lambda data: data.decode("utf-8"),
    "text": lambda data: data.decode("utf-8"),
    "transcript": lambda data: data.decode("utf-8"),
    "cls": lambda data: int(data),
    "cls2": lambda data: int(data),
    "index": lambda data: int(data),
    "inx": lambda data: int(data),
    "id": lambda data: int(data),
    "json": lambda data: json.loads(data),
    "jsn": lambda data: json.loads(data),
    "pyd": lambda data: pickle.loads(data),
    "pickle": lambda data: pickle.loads(data),
    "pdparams": lambda data: paddle_loads(data),
    "ten": tenbin_loads,
    "tb": tenbin_loads,
    "mp": msgpack_loads,
    "msg": msgpack_loads,
    "npy": npy_loads,
    "npz": lambda data: np.load(io.BytesIO(data)),
    "cbor": cbor_loads,
}


def basichandlers(key, data):
    """Handle basic file decoding.

    This function is usually part of the post= decoders.
    This handles the following forms of decoding:

    - txt -> unicode string
    - cls cls2 class count index inx id -> int
    - json jsn -> JSON decoding
    - pyd pickle -> pickle decoding
    - pdparams -> paddle.loads
    - ten tenbin -> fast tensor loading
    - mp messagepack msg -> messagepack decoding
    - npy -> Python NPY decoding

    :param key: file name extension
    :param data: binary data to be decoded
    """
    extension = re.sub(r".*[.]", "", key)

    if extension in decoders:
        return decoders[extension](data)

    return None


################################################################
# Generic extension handler.
################################################################


def call_extension_handler(key, data, f, extensions):
    """Call the function f with the given data if the key matches the extensions.

    :param key: actual key found in the sample
    :param data: binary data
    :param f: decoder function
    :param extensions: list of matching extensions
    """
    extension = key.lower().split(".")
    for target in extensions:
        target = target.split(".")
        if len(target) > len(extension):
            continue
        if extension[-len(target):] == target:
            return f(data)
    return None


def handle_extension(extensions, f):
    """Return a decoder function for the list of extensions.

    Extensions can be a space separated list of extensions.
    Extensions can contain dots, in which case the corresponding number
    of extension components must be present in the key given to f.
    Comparisons are case insensitive.

    Examples:
    handle_extension("jpg jpeg", my_decode_jpg)  # invoked for any file.jpg
    handle_extension("seg.jpg", special_case_jpg)  # invoked only for file.seg.jpg
    """
    extensions = extensions.lower().split()
    return partial(call_extension_handler, f=f, extensions=extensions)


################################################################
# handle images
################################################################

imagespecs = {
    "l8": ("numpy", "uint8", "l"),
    "rgb8": ("numpy", "uint8", "rgb"),
    "rgba8": ("numpy", "uint8", "rgba"),
    "l": ("numpy", "float", "l"),
    "rgb": ("numpy", "float", "rgb"),
    "rgba": ("numpy", "float", "rgba"),
    "paddlel8": ("paddle", "uint8", "l"),
    "paddlergb8": ("paddle", "uint8", "rgb"),
    "paddlergba8": ("paddle", "uint8", "rgba"),
    "paddlel": ("paddle", "float", "l"),
    "paddlergb": ("paddle", "float", "rgb"),
    "paddle": ("paddle", "float", "rgb"),
    "paddlergba": ("paddle", "float", "rgba"),
    "pill": ("pil", None, "l"),
    "pil": ("pil", None, "rgb"),
    "pilrgb": ("pil", None, "rgb"),
    "pilrgba": ("pil", None, "rgba"),
}


class ImageHandler:
    """Decode image data using the given `imagespec`.

    The `imagespec` specifies whether the image is decoded
    to numpy/paddle/pi, decoded to uint8/float, and decoded
    to l/rgb/rgba:

    - l8: numpy uint8 l
    - rgb8: numpy uint8 rgb
    - rgba8: numpy uint8 rgba
    - l: numpy float l
    - rgb: numpy float rgb
    - rgba: numpy float rgba
    - paddlel8: paddle uint8 l
    - paddlergb8: paddle uint8 rgb
    - paddlergba8: paddle uint8 rgba
    - paddlel: paddle float l
    - paddlergb: paddle float rgb
    - paddle: paddle float rgb
    - paddlergba: paddle float rgba
    - pill: pil None l
    - pil: pil None rgb
    - pilrgb: pil None rgb
    - pilrgba: pil None rgba

    """

    def __init__(self, imagespec, extensions=image_extensions):
        """Create an image handler.

        :param imagespec: short string indicating the type of decoding
        :param extensions: list of extensions the image handler is invoked for
        """
        if imagespec not in list(imagespecs.keys()):
            raise ValueError("Unknown imagespec: %s" % imagespec)
        self.imagespec = imagespec.lower()
        self.extensions = extensions

    def __call__(self, key, data):
        """Perform image decoding.

        :param key: file name extension
        :param data: binary data
        """
        import PIL.Image

        extension = re.sub(r".*[.]", "", key)
        if extension.lower() not in self.extensions:
            return None
        imagespec = self.imagespec
        atype, etype, mode = imagespecs[imagespec]
        with io.BytesIO(data) as stream:
            img = PIL.Image.open(stream)
            img.load()
            img = img.convert(mode.upper())
        if atype == "pil":
            return img
        elif atype == "numpy":
            result = np.asarray(img)
            if result.dtype != np.uint8:
                raise ValueError("ImageHandler: numpy image must be uint8")
            if etype == "uint8":
                return result
            else:
                return result.astype("f") / 255.0
        elif atype == "paddle":
            import paddle

            result = np.asarray(img)
            if result.dtype != np.uint8:
                raise ValueError("ImageHandler: paddle image must be uint8")
            if etype == "uint8":
                result = np.array(result.transpose(2, 0, 1))
                return paddle.tensor(result)
            else:
                result = np.array(result.transpose(2, 0, 1))
                return paddle.tensor(result) / 255.0
        return None


def imagehandler(imagespec, extensions=image_extensions):
    """Create an image handler.

    This is just a lower case alias for ImageHander.

    :param imagespec: textual image spec
    :param extensions: list of extensions the handler should be applied for
    """
    return ImageHandler(imagespec, extensions)


################################################################
# torch video
################################################################
'''
def torch_video(key, data):
    """Decode video using the torchvideo library.

    :param key: file name extension
    :param data: data to be decoded
    """
    extension = re.sub(r".*[.]", "", key)
    if extension not in "mp4 ogv mjpeg avi mov h264 mpg webm wmv".split():
        return None

    import torchvision.io

    with tempfile.TemporaryDirectory() as dirname:
        fname = os.path.join(dirname, f"file.{extension}")
        with open(fname, "wb") as stream:
            stream.write(data)
        return torchvision.io.read_video(fname, pts_unit="sec")
'''

################################################################
# paddlespeech.audio
################################################################


def paddle_audio(key, data):
    """Decode audio using the paddleaudio library.

    :param key: file name extension
    :param data: data to be decoded
    """
    extension = re.sub(r".*[.]", "", key)
    if extension not in ["flac", "mp3", "sox", "wav", "m4a", "ogg", "wma"]:
        return None

    with tempfile.TemporaryDirectory() as dirname:
        fname = os.path.join(dirname, f"file.{extension}")
        with open(fname, "wb") as stream:
            stream.write(data)
        return paddlespeech.audio.backends.soundfile_load(fname)


################################################################
# special class for continuing decoding
################################################################


class Continue:
    """Special class for continuing decoding.

    This is mostly used for decompression, as in:

        def decompressor(key, data):
            if key.endswith(".gz"):
                return Continue(key[:-3], decompress(data))
            return None
    """

    def __init__(self, key, data):
        """__init__.

        :param key:
        :param data:
        """
        self.key, self.data = key, data


def gzfilter(key, data):
    """Decode .gz files.

    This decodes compressed files and the continues decoding.

    :param key: file name extension
    :param data: binary data
    """
    import gzip

    if not key.endswith(".gz"):
        return None
    decompressed = gzip.open(io.BytesIO(data)).read()
    return Continue(key[:-3], decompressed)


################################################################
# decode entire training amples
################################################################

default_pre_handlers = [gzfilter]
default_post_handlers = [basichandlers]


class Decoder:
    """Decode samples using a list of handlers.

    For each key/data item, this iterates through the list of
    handlers until some handler returns something other than None.
    """

    def __init__(self, handlers, pre=None, post=None, only=None, partial=False):
        """Create a Decoder.

        :param handlers: main list of handlers
        :param pre: handlers called before the main list (.gz handler by default)
        :param post: handlers called after the main list (default handlers by default)
        :param only: a list of extensions; when give, only ignores files with those extensions
        :param partial: allow partial decoding (i.e., don't decode fields that aren't of type bytes)
        """
        if isinstance(only, str):
            only = only.split()
        self.only = only if only is None else set(only)
        if pre is None:
            pre = default_pre_handlers
        if post is None:
            post = default_post_handlers
        assert all(callable(h)
                   for h in handlers), f"one of {handlers} not callable"
        assert all(callable(h) for h in pre), f"one of {pre} not callable"
        assert all(callable(h) for h in post), f"one of {post} not callable"
        self.handlers = pre + handlers + post
        self.partial = partial

    def decode1(self, key, data):
        """Decode a single field of a sample.

        :param key: file name extension
        :param data: binary data
        """
        key = "." + key
        for f in self.handlers:
            result = f(key, data)
            if isinstance(result, Continue):
                key, data = result.key, result.data
                continue
            if result is not None:
                return result
        return data

    def decode(self, sample):
        """Decode an entire sample.

        :param sample: the sample, a dictionary of key value pairs
        """
        result = {}
        assert isinstance(sample, dict), sample
        for k, v in list(sample.items()):
            if k[0] == "_":
                if isinstance(v, bytes):
                    v = v.decode("utf-8")
                result[k] = v
                continue
            if self.only is not None and k not in self.only:
                result[k] = v
                continue
            assert v is not None
            if self.partial:
                if isinstance(v, bytes):
                    result[k] = self.decode1(k, v)
                else:
                    result[k] = v
            else:
                assert isinstance(v, bytes)
                result[k] = self.decode1(k, v)
        return result

    def __call__(self, sample):
        """Decode an entire sample.

        :param sample: the sample
        """
        assert isinstance(sample, dict), (len(sample), sample)
        return self.decode(sample)


================================================
FILE: paddlespeech/audio/streamdata/cache.py
================================================
# Copyright (c) 2017-2019 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# See the LICENSE file for licensing terms (BSD-style).
# Modified from https://github.com/webdataset/webdataset
import os
import random
import re
import sys
from urllib.parse import urlparse

from . import filters
from . import gopen
from .handlers import reraise_exception
from .tariterators import tar_file_and_group_expander

default_cache_dir = os.environ.get("WDS_CACHE", "./_cache")
default_cache_size = float(os.environ.get("WDS_CACHE_SIZE", "1e18"))


def lru_cleanup(cache_dir, cache_size, keyfn=os.path.getctime, verbose=False):
    """Performs cleanup of the file cache in cache_dir using an LRU strategy,
    keeping the total size of all remaining files below cache_size."""
    if not os.path.exists(cache_dir):
        return
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(cache_dir):
        for filename in filenames:
            total_size += os.path.getsize(os.path.join(dirpath, filename))
    if total_size <= cache_size:
        return
    # sort files by last access time
    files = []
    for dirpath, dirnames, filenames in os.walk(cache_dir):
        for filename in filenames:
            files.append(os.path.join(dirpath, filename))
    files.sort(key=keyfn, reverse=True)
    # delete files until we're under the cache size
    while len(files) > 0 and total_size > cache_size:
        fname = files.pop()
        total_size -= os.path.getsize(fname)
        if verbose:
            print("# deleting %s" % fname, file=sys.stderr)
        os.remove(fname)


def download(url, dest, chunk_size=1024**2, verbose=False):
    """Download a file from `url` to `dest`."""
    temp = dest + f".temp{os.getpid()}"
    with gopen.gopen(url) as stream:
        with open(temp, "wb") as f:
            while True:
                data = stream.read(chunk_size)
                if not data:
                    break
                f.write(data)
    os.rename(temp, dest)


def pipe_cleaner(spec):
    """Guess the actual URL from a "pipe:" specification."""
    if spec.startswith("pipe:"):
        spec = spec[5:]
        words = spec.split(" ")
        for word in words:
            if re.match(r"^(https?|gs|ais|s3)", word):
                return word
    return spec


def get_file_cached(
        spec,
        cache_size=-1,
        cache_dir=None,
        url_to_name=pipe_cleaner,
        verbose=False, ):
    if cache_size == -1:
        cache_size = default_cache_size
    if cache_dir is None:
        cache_dir = default_cache_dir
    url = url_to_name(spec)
    parsed = urlparse(url)
    dirname, filename = os.path.split(parsed.path)
    dirname = dirname.lstrip("/")
    dirname = re.sub(r"[:/|;]", "_", dirname)
    destdir = os.path.join(cache_dir, dirname)
    os.makedirs(destdir, exist_ok=True)
    dest = os.path.join(cache_dir, dirname, filename)
    if not os.path.exists(dest):
        if verbose:
            print("# downloading %s to %s" % (url, dest), file=sys.stderr)
        lru_cleanup(cache_dir, cache_size, verbose=verbose)
        download(spec, dest, verbose=verbose)
    return dest


def get_filetype(fname):
    with os.popen("file '%s'" % fname) as f:
        ftype = f.read()
    return ftype


def check_tar_format(fname):
    """Check whether a file is a tar archive."""
    ftype = get_filetype(fname)
    return "tar archive" in ftype or "gzip compressed" in ftype


verbose_cache = int(os.environ.get("WDS_VERBOSE_CACHE", "0"))


def cached_url_opener(
        data,
        handler=reraise_exception,
        cache_size=-1,
        cache_dir=None,
        url_to_name=pipe_cleaner,
        validator=check_tar_format,
        verbose=False,
        always=False, ):
    """Given a stream of url names (packaged in `dict(url=url)`), yield opened streams."""
    verbose = verbose or verbose_cache
    for sample in data:
        assert isinstance(sample, dict), sample
        assert "url" in sample
        url = sample["url"]
        attempts = 5
        try:
            if not always and os.path.exists(url):
                dest = url
            else:
                dest = get_file_cached(
                    url,
                    cache_size=cache_size,
                    cache_dir=cache_dir,
                    url_to_name=url_to_name,
                    verbose=verbose, )
            if verbose:
                print("# opening %s" % dest, file=sys.stderr)
            assert os.path.exists(dest)
            if not validator(dest):
                ftype = get_filetype(dest)
                with open(dest, "rb") as f:
                    data = f.read(200)
                os.remove(dest)
                raise ValueError(
                    "%s (%s) is not a tar archive, but a %s, contains %s" %
                    (dest, url, ftype, repr(data)))
            try:
                stream = open(dest, "rb")
                sample.update(stream=stream)
                yield sample
            except FileNotFoundError as exn:
                # dealing with race conditions in lru_cleanup
                attempts -= 1
                if attempts > 0:
                    time.sleep(random.random() * 10)
                    continue
                raise exn
        except Exception as exn:
            exn.args = exn.args + (url, )
            if handler(exn):
                continue
            else:
                break


def cached_tarfile_samples(
        src,
        handler=reraise_exception,
        cache_size=-1,
        cache_dir=None,
        verbose=False,
        url_to_name=pipe_cleaner,
        always=False, ):
    streams = cached_url_opener(
        src,
        handler=handler,
        cache_size=cache_size,
        cache_dir=cache_dir,
        verbose=verbose,
        url_to_name=url_to_name,
        always=always, )
    samples = tar_file_and_group_expander(streams, handler=handler)
    return samples


cached_tarfile_to_samples = filters.pipelinefilter(cached_tarfile_samples)


================================================
FILE: paddlespeech/audio/streamdata/compat.py
================================================
# Copyright (c) 2017-2019 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# See the LICENSE file for licensing terms (BSD-style).
# Modified from https://github.com/webdataset/webdataset
import yaml

from . import autodecode
from . import cache
from . import filters
from . import shardlists
from . import tariterators
from .filters import reraise_exception
from .paddle_utils import DataLoader
from .paddle_utils import IterableDataset
from .pipeline import DataPipeline


class FluidInterface:
    def batched(self, batchsize):
        return self.compose(filters.batched(batchsize))

    def dynamic_batched(self, max_frames_in_batch):
        return self.compose(filter.dynamic_batched(max_frames_in_batch))

    def unbatched(self):
        return self.compose(filters.unbatched())

    def listed(self, batchsize, partial=True):
        return self.compose(
            filters.batched(), batchsize=batchsize, collation_fn=None)

    def unlisted(self):
        return self.compose(filters.unlisted())

    def log_keys(self, logfile=None):
        return self.compose(filters.log_keys(logfile))

    def shuffle(self, size, **kw):
        if size < 1:
            return self
        else:
            return self.compose(filters.shuffle(size, **kw))

    def map(self, f, handler=reraise_exception):
        return self.compose(filters.map(f, handler=handler))

    def decode(self,
               *args,
               pre=None,
               post=None,
               only=None,
               partial=False,
               handler=reraise_exception):
        handlers = [
            autodecode.ImageHandler(x) if isinstance(x, str) else x
            for x in args
        ]
        decoder = autodecode.Decoder(
            handlers, pre=pre, post=post, only=only, partial=partial)
        return self.map(decoder, handler=handler)

    def map_dict(self, handler=reraise_exception, **kw):
        return self.compose(filters.map_dict(handler=handler, **kw))

    def select(self, predicate, **kw):
        return self.compose(filters.select(predicate, **kw))

    def to_tuple(self, *args, handler=reraise_exception):
        return self.compose(filters.to_tuple(*args, handler=handler))

    def map_tuple(self, *args, handler=reraise_exception):
        return self.compose(filters.map_tuple(*args, handler=handler))

    def slice(self, *args):
        return self.compose(filters.slice(*args))

    def rename(self, **kw):
        return self.compose(filters.rename(**kw))

    def rsample(self, p=0.5):
        return self.compose(filters.rsample(p))

    def rename_keys(self, *args, **kw):
        return self.compose(filters.rename_keys(*args, **kw))

    def extract_keys(self, *args, **kw):
        return self.compose(filters.extract_keys(*args, **kw))

    def xdecode(self, *args, **kw):
        return self.compose(filters.xdecode(*args, **kw))

    def audio_data_filter(self, *args, **kw):
        return self.compose(filters.audio_data_filter(*args, **kw))

    def audio_tokenize(self, *args, **kw):
        return self.compose(filters.audio_tokenize(*args, **kw))

    def resample(self, *args, **kw):
        return self.compose(filters.resample(*args, **kw))

    def audio_compute_fbank(self, *args, **kw):
        return self.compose(filters.audio_compute_fbank(*args, **kw))

    def audio_spec_aug(self, *args, **kw):
        return self.compose(filters.audio_spec_aug(*args, **kw))

    def sort(self, size=500):
        return self.compose(filters.sort(size))

    def audio_padding(self):
        return self.compose(filters.audio_padding())

    def audio_cmvn(self, cmvn_file):
        return self.compose(filters.audio_cmvn(cmvn_file))


class WebDataset(DataPipeline, FluidInterface):
    """Small fluid-interface wrapper for DataPipeline."""

    def __init__(
            self,
            urls,
            handler=reraise_exception,
            resampled=False,
            repeat=False,
            shardshuffle=None,
            cache_size=0,
            cache_dir=None,
            detshuffle=False,
            nodesplitter=shardlists.single_node_only,
            verbose=False, ):
        super().__init__()
        if isinstance(urls, IterableDataset):
            assert not resampled
            self.append(urls)
        elif isinstance(urls, str) and (urls.endswith(".yaml") or
                                        urls.endswith(".yml")):
            with (open(urls)) as stream:
                spec = yaml.safe_load(stream)
            assert "datasets" in spec
            self.append(shardlists.MultiShardSample(spec))
        elif isinstance(urls, dict):
            assert "datasets" in urls
            self.append(shardlists.MultiShardSample(urls))
        elif resampled:
            self.append(shardlists.ResampledShards(urls))
        else:
            self.append(shardlists.SimpleShardList(urls))
            self.append(nodesplitter)
            self.append(shardlists.split_by_worker)
            if shardshuffle is True:
                shardshuffle = 100
            if shardshuffle is not None:
                if detshuffle:
                    self.append(filters.detshuffle(shardshuffle))
                else:
                    self.append(filters.shuffle(shardshuffle))
        if cache_size == 0:
            self.append(tariterators.tarfile_to_samples(handler=handler))
        else:
            assert cache_size == -1 or cache_size > 0
            self.append(
                cache.cached_tarfile_to_samples(
                    handler=handler,
                    verbose=verbose,
                    cache_size=cache_size,
                    cache_dir=cache_dir, ))


class FluidWrapper(DataPipeline, FluidInterface):
    """Small fluid-interface wrapper for DataPipeline."""

    def __init__(self, initial):
        super().__init__()
        self.append(initial)


class WebLoader(DataPipeline, FluidInterface):
    def __init__(self, *args, **kw):
        super().__init__(DataLoader(*args, **kw))


================================================
FILE: paddlespeech/audio/streamdata/extradatasets.py
================================================
#
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
# Modified from https://github.com/webdataset/webdataset
#
"""Train PyTorch models directly from POSIX tar archive.

Code works locally or over HTTP connections.
"""
from . import utils
from .paddle_utils import IterableDataset
from .utils import PipelineStage


class MockDataset(IterableDataset):
    """MockDataset.

    A mock dataset for performance testing and unit testing.
    """

    def __init__(self, sample, length):
        """Create a mock dataset instance.

        :param sample: the sample to be returned repeatedly
        :param length: the length of the mock dataset
        """
        self.sample = sample
        self.length = length

    def __iter__(self):
        """Return an iterator over this mock dataset."""
        for i in range(self.length):
            yield self.sample


class repeatedly(IterableDataset, PipelineStage):
    """Repeatedly yield samples from a dataset."""

    def __init__(self, source, nepochs=None, nbatches=None, length=None):
        """Create an instance of Repeatedly.

        :param nepochs: repeat for a maximum of nepochs
        :param nbatches: repeat for a maximum of nbatches
        """
        self.source = source
        self.length = length
        self.nbatches = nbatches

    def invoke(self, source):
        """Return an iterator that iterates repeatedly over a source."""
        return utils.repeatedly(
            source,
            nepochs=self.nepochs,
            nbatches=self.nbatches, )


class with_epoch(IterableDataset):
    """Change the actual and nominal length of an IterableDataset.

    This will continuously iterate through the original dataset, but
    impose new epoch boundaries at the given length/nominal.
    This exists mainly as a workaround for the odd logic in DataLoader.
    It is also useful for choosing smaller nominal epoch sizes with
    very large datasets.

    """

    def __init__(self, dataset, length):
        """Chop the dataset to the given length.

        :param dataset: IterableDataset
        :param length: declared length of the dataset
        :param nominal: nominal length of dataset (if different from declared)
        """
        super().__init__()
        self.length = length
        self.source = None

    def __getstate__(self):
        """Return the pickled state of the dataset.

        This resets the dataset iterator, since that can't be pickled.
        """
        result = dict(self.__dict__)
        result["source"] = None
        return result

    def invoke(self, dataset):
        """Return an iterator over the dataset.

        This iterator returns as many samples as given by the `length`
        parameter.
        """
        if self.source is None:
            self.source = iter(dataset)
        for i in range(self.length):
            try:
                sample = next(self.source)
            except StopIteration:
                self.source = iter(dataset)
                try:
                    sample = next(self.source)
                except StopIteration:
                    return
            yield sample
        self.source = None


class with_length(IterableDataset, PipelineStage):
    """Repeatedly yield samples from a dataset."""

    def __init__(self, dataset, length):
        """Create an instance of Repeatedly.

        :param dataset: source dataset
        :param length: stated length
        """
        super().__init__()
        self.dataset = dataset
        self.length = length

    def invoke(self, dataset):
        """Return an iterator that iterates repeatedly over a source."""
        return iter(dataset)

    def __len__(self):
        """Return the user specified length."""
        return self.length


================================================
FILE: paddlespeech/audio/streamdata/filters.py
================================================
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
#
# Modified from https://github.com/webdataset/webdataset
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""A collection of iterators for data transformations.

These functions are plain iterator functions. You can find curried versions
in webdataset.filters, and you can find IterableDataset wrappers in
webdataset.processing.
"""
import io
import itertools
import os
import random
import re
import sys
import time
from fnmatch import fnmatch
from functools import reduce

import paddle

from . import autodecode
from . import utils
from ..transform.cmvn import GlobalCMVN
from ..transform.spec_augment import freq_mask
from ..transform.spec_augment import time_mask
from ..transform.spec_augment import time_warp
from ..utils.tensor_utils import pad_sequence
from .utils import PipelineStage
from paddlespeech.audio import backends
from paddlespeech.audio.compliance import kaldi


class FilterFunction(object):
    """Helper class for currying pipeline stages.

    We use this roundabout construct becauce it can be pickled.
    """

    def __init__(self, f, *args, **kw):
        """Create a curried function."""
        self.f = f
        self.args = args
        self.kw = kw

    def __call__(self, data):
        """Call the curried function with the given argument."""
        return self.f(data, *self.args, **self.kw)

    def __str__(self):
        """Compute a string representation."""
        return f"<{self.f.__name__} {self.args} {self.kw}>"

    def __repr__(self):
        """Compute a string representation."""
        return f"<{self.f.__name__} {self.args} {self.kw}>"


class RestCurried(object):
    """Helper class for currying pipeline stages.

    We use this roundabout construct because it can be pickled.
    """

    def __init__(self, f):
        """Store the function for future currying."""
        self.f = f

    def __call__(self, *args, **kw):
        """Curry with the given arguments."""
        return FilterFunction(self.f, *args, **kw)


def pipelinefilter(f):
    """Turn the decorated function into one that is partially applied for
    all arguments other than the first."""
    result = RestCurried(f)
    return result


def reraise_exception(exn):
    """Reraises the given exception; used as a handler.

    :param exn: exception
    """
    raise exn


def identity(x):
    """Return the argument."""
    return x


def compose2(f, g):
    """Compose two functions, g(f(x))."""
    return lambda x: g(f(x))


def compose(*args):
    """Compose a sequence of functions (left-to-right)."""
    return reduce(compose2, args)


def pipeline(source, *args):
    """Write an input pipeline; first argument is source, rest are filters."""
    if len(args) == 0:
        return source
    return compose(*args)(source)


def getfirst(a, keys, default=None, missing_is_error=True):
    """Get the first matching key from a dictionary.

    Keys can be specified as a list, or as a string of keys separated by ';'.
    """
    if isinstance(keys, str):
        assert " " not in keys
        keys = keys.split(";")
    for k in keys:
        if k in a:
            return a[k]
    if missing_is_error:
        raise ValueError(f"didn't find {keys} in {list(a.keys())}")
    return default


def parse_field_spec(fields):
    """Parse a specification for a list of fields to be extracted.

    Keys are separated by spaces in the spec. Each key can itself
    be composed of key alternatives separated by ';'.
    """
    if isinstance(fields, str):
        fields = fields.split()
    return [field.split(";") for field in fields]


def transform_with(sample, transformers):
    """Transform a list of values using a list of functions.

    sample: list of values
    transformers: list of functions

    If there are fewer transformers than inputs, or if a transformer
    function is None, then the identity function is used for the
    corresponding sample fields.
    """
    if transformers is None or len(transformers) == 0:
        return sample
    result = list(sample)
    assert len(transformers) <= len(sample)
    for i in range(len(transformers)):  # skipcq: PYL-C0200
        f = transformers[i]
        if f is not None:
            result[i] = f(sample[i])
    return result


###
# Iterators
###


def _info(data, fmt=None, n=3, every=-1, width=50, stream=sys.stderr, name=""):
    """Print information about the samples that are passing through.

    :param data: source iterator
    :param fmt: format statement (using sample dict as keyword)
    :param n: when to stop
    :param every: how often to print
    :param width: maximum width
    :param stream: output stream
    :param name: identifier printed before any output
    """
    for i, sample in enumerate(data):
        if i < n or (every > 0 and (i + 1) % every == 0):
            if fmt is None:
                print("---", name, file=stream)
                for k, v in sample.items():
                    print(k, repr(v)[:width], file=stream)
            else:
                print(fmt.format(**sample), file=stream)
        yield sample


info = pipelinefilter(_info)


def pick(buf, rng):
    k = rng.randint(0, len(buf) - 1)
    sample = buf[k]
    buf[k] = buf[-1]
    buf.pop()
    return sample


def _shuffle(data, bufsize=1000, initial=100, rng=None, handler=None):
    """Shuffle the data in the stream.

    This uses a buffer of size `bufsize`. Shuffling at
    startup is less random; this is traded off against
    yielding samples quickly.

    data: iterator
    bufsize: buffer size for shuffling
    returns: iterator
    rng: either random module or random.Random instance

    """
    if rng is None:
        rng = random.Random(int((os.getpid() + time.time()) * 1e9))
    initial = min(initial, bufsize)
    buf = []
    for sample in data:
        buf.append(sample)
        if len(buf) < bufsize:
            try:
                buf.append(next(data))  # skipcq: PYL-R1708
            except StopIteration:
                pass
        if len(buf) >= initial:
            yield pick(buf, rng)
    while len(buf) > 0:
        yield pick(buf, rng)


shuffle = pipelinefilter(_shuffle)


class detshuffle(PipelineStage):
    def __init__(self, bufsize=1000, initial=100, seed=0, epoch=-1):
        self.bufsize = bufsize
        self.initial = initial
        self.seed = seed
        self.epoch = epoch

    def run(self, src):
        self.epoch += 1
        rng = random.Random()
        rng.seed((self.seed, self.epoch))
        return _shuffle(src, self.bufsize, self.initial, rng)


def _select(data, predicate):
    """Select samples based on a predicate.

    :param data: source iterator
    :param predicate: predicate (function)
    """
    for sample in data:
        if predicate(sample):
            yield sample


select = pipelinefilter(_select)


def _log_keys(data, logfile=None):
    import fcntl

    if logfile is None or logfile == "":
        for sample in data:
            yield sample
    else:
        with open(logfile, "a") as stream:
            for i, sample in enumerate(data):
                buf = f"{i}\t{sample.get('__worker__')}\t{sample.get('__rank__')}\t{sample.get('__key__')}\n"
                try:
                    fcntl.flock(stream.fileno(), fcntl.LOCK_EX)
                    stream.write(buf)
                finally:
                    fcntl.flock(stream.fileno(), fcntl.LOCK_UN)
                yield sample


log_keys = pipelinefilter(_log_keys)


def _minedecode(x):
    if isinstance(x, str):
        return autodecode.imagehandler(x)
    else:
        return x


def _decode(data, *args, handler=reraise_exception, **kw):
    """Decode data based on the decoding functions given as arguments."""
    decoder = _minedecode
    handlers = [decoder(x) for x in args]
    f = autodecode.Decoder(handlers, **kw)

    for sample in data:
        assert isinstance(sample, dict), sample
        try:
            decoded = f(sample)
        except Exception as exn:  # skipcq: PYL-W0703
            if handler(exn):
                continue
            else:
                break
        yield decoded


decode = pipelinefilter(_decode)


def _map(data, f, handler=reraise_exception):
    """Map samples."""
    for sample in data:
        try:
            result = f(sample)
        except Exception as exn:
            if handler(exn):
                continue
            else:
                break
        if result is None:
            continue
        if isinstance(sample, dict) and isinstance(result, dict):
            result["__key__"] = sample.get("__key__")
        yield result


map = pipelinefilter(_map)


def _rename(data, handler=reraise_exception, keep=True, **kw):
    """Rename samples based on keyword arguments."""
    for sample in data:
        try:
            if not keep:
                yield {
                    k: getfirst(sample, v, missing_is_error=True)
                    for k, v in kw.items()
                }
            else:

                def listify(v):
                    return v.split(";") if isinstance(v, str) else v

                to_be_replaced = {x for v in kw.values() for x in listify(v)}
                result = {
                    k: v
                    for k, v in sample.items() if k not in to_be_replaced
                }
                result.update({
                    k: getfirst(sample, v, missing_is_error=True)
                    for k, v in kw.items()
                })
                yield result
        except Exception as exn:
            if handler(exn):
                continue
            else:
                break


rename = pipelinefilter(_rename)


def _associate(data, associator, **kw):
    """Associate additional data with samples."""
    for sample in data:
        if callable(associator):
            extra = associator(sample["__key__"])
        else:
            extra = associator.get(sample["__key__"], {})
        sample.update(extra)  # destructive
        yield sample


associate = pipelinefilter(_associate)


def _map_dict(data, handler=reraise_exception, **kw):
    """Map the entries in a dict sample with individual functions."""
    assert len(list(kw.keys())) > 0
    for key, f in kw.items():
        assert callable(f), (key, f)

    for sample in data:
        assert isinstance(sample, dict)
        try:
            for k, f in kw.items():
                sample[k] = f(sample[k])
        except Exception as exn:
            if handler(exn):
                continue
            else:
                break
        yield sample


map_dict = pipelinefilter(_map_dict)


def _to_tuple(data,
              *args,
              handler=reraise_exception,
              missing_is_error=True,
              none_is_error=None):
    """Convert dict samples to tuples."""
    if none_is_error is None:
        none_is_error = missing_is_error
    if len(args) == 1 and isinstance(args[0], str) and " " in args[0]:
        args = args[0].split()

    for sample in data:
        try:
            result = tuple([
                getfirst(sample, f, missing_is_error=missing_is_error)
                for f in args
            ])
            if none_is_error and any(x is None for x in result):
                raise ValueError(f"to_tuple {args} got {sample.keys()}")
            yield result
        except Exception as exn:
            if handler(exn):
                continue
            else:
                break


to_tuple = pipelinefilter(_to_tuple)


def _map_tuple(data, *args, handler=reraise_exception):
    """Map the entries of a tuple with individual functions."""
    args = [f if f is not None else utils.identity for f in args]
    for f in args:
        assert callable(f), f
    for sample in data:
        assert isinstance(sample, (list, tuple))
        sample = list(sample)
        n = min(len(args), len(sample))
        try:
            for i in range(n):
                sample[i] = args[i](sample[i])
        except Exception as exn:
            if handler(exn):
                continue
            else:
                break
        yield tuple(sample)


map_tuple = pipelinefilter(_map_tuple)


def _unlisted(data):
    """Turn batched data back into unbatched data."""
    for batch in data:
        assert isinstance(batch, list), sample
        for sample in batch:
            yield sample


unlisted = pipelinefilter(_unlisted)


def _unbatched(data):
    """Turn batched data back into unbatched data."""
    for sample in data:
        assert isinstance(sample, (tuple, list)), sample
        assert len(sample) > 0
        for i in range(len(sample[0])):
            yield tuple(x[i] for x in sample)


unbatched = pipelinefilter(_unbatched)


def _rsample(data, p=0.5):
    """Randomly subsample a stream of data."""
    assert p >= 0.0 and p <= 1.0
    for sample in data:
        if random.uniform(0.0, 1.0) < p:
            yield sample


rsample = pipelinefilter(_rsample)

slice = pipelinefilter(itertools.islice)


def _extract_keys(source,
                  *patterns,
                  duplicate_is_error=True,
                  ignore_missing=False):
    for sample in source:
        result = []
        for pattern in patterns:
            pattern = pattern.split(";") if isinstance(pattern,
                                                       str) else pattern
            matches = [
                x for x in sample.keys()
                if any(fnmatch("." + x, p) for p in pattern)
            ]
            if len(matches) == 0:
                if ignore_missing:
                    continue
                else:
                    raise ValueError(
                        f"Cannot find {pattern} in sample keys {sample.keys()}.")
            if len(matches) > 1 and duplicate_is_error:
                raise ValueError(
                    f"Multiple sample keys {sample.keys()} match {pattern}.")
            value = sample[matches[0]]
            result.append(value)
        yield tuple(result)


extract_keys = pipelinefilter(_extract_keys)


def _rename_keys(source,
                 *args,
                 keep_unselected=False,
                 must_match=True,
                 duplicate_is_error=True,
                 **kw):
    renamings = [(pattern, output) for output, pattern in args]
    renamings += [(pattern, output) for output, pattern in kw.items()]
    for sample in source:
        new_sample = {}
        matched = {k: False for k, _ in renamings}
        for path, value in sample.items():
            fname = re.sub(r".*/", "", path)
            new_name = None
            for pattern, name in renamings[::-1]:
                if fnmatch(fname.lower(), pattern):
                    matched[pattern] = True
                    new_name = name
                    break
            if new_name is None:
                if keep_unselected:
                    new_sample[path] = value
                continue
            if new_name in new_sample:
                if duplicate_is_error:
                    raise ValueError(
                        f"Duplicate value in sample {sample.keys()} after rename."
                    )
                continue
            new_sample[new_name] = value
        if must_match and not all(matched.values()):
            raise ValueError(
                f"Not all patterns ({matched}) matched sample keys ({sample.keys()})."
            )

        yield new_sample


rename_keys = pipelinefilter(_rename_keys)


def decode_bin(stream):
    return stream.read()


def decode_text(stream):
    binary = stream.read()
    return binary.decode("utf-8")


def decode_pickle(stream):
    return pickle.load(stream)


default_decoders = [
    ("*.bin", decode_bin),
    ("*.txt", decode_text),
    ("*.pyd", decode_pickle),
]


def find_decoder(decoders, path):
    fname = re.sub(r".*/", "", path)
    if fname.startswith("__"):
        return lambda x: x
    for pattern, fun in decoders[::-1]:
        if fnmatch(fname.lower(), pattern) or fnmatch("." + fname.lower(),
                                                      pattern):
            return fun
    return None


def _xdecode(
        source,
        *args,
        must_decode=True,
        defaults=default_decoders,
        **kw, ):
    decoders = list(defaults) + list(args)
    decoders += [("*." + k, v) for k, v in kw.items()]
    for sample in source:
        new_sample = {}
        for path, data in sample.items():
            if path.startswith("__"):
                new_sample[path] = data
                continue
            decoder = find_decoder(decoders, path)
            if decoder is False:
                value = data
            elif decoder is None:
                if must_decode:
                    raise ValueError(f"No decoder found for {path}.")
                value = data
            else:
                if isinstance(data, bytes):
                    data = io.BytesIO(data)
                value = decoder(data)
            new_sample[path] = value
        yield new_sample


xdecode = pipelinefilter(_xdecode)


def _audio_data_filter(source,
                       frame_shift=10,
                       max_length=10240,
                       min_length=10,
                       token_max_length=200,
                       token_min_length=1,
                       min_output_input_ratio=0.0005,
                       max_output_input_ratio=1):
    """ Filter sample according to feature and label length
        Inplace operation.

        Args::
            source: Iterable[{fname, wav, label, sample_rate}]
            frame_shift: length of frame shift (ms)
            max_length: drop utterance which is greater than max_length(10ms)
            min_length: drop utterance which is less than min_length(10ms)
            token_max_length: drop utterance which is greater than
                token_max_length, especially when use char unit for
                english modeling
            token_min_length: drop utterance which is
                less than token_max_length
            min_output_input_ratio: minimal ration of
                token_length / feats_length(10ms)
            max_output_input_ratio: maximum ration of
                token_length / feats_length(10ms)

        Returns:
            Iterable[{fname, wav, label, sample_rate}]
    """
    for sample in source:
        assert 'sample_rate' in sample
        assert 'wav' in sample
        assert 'label' in sample
        # sample['wav'] is paddle.Tensor, we have 100 frames every second (default)
        num_frames = sample['wav'].shape[1] / sample['sample_rate'] * (
            1000 / frame_shift)
        if num_frames < min_length:
            continue
        if num_frames > max_length:
            continue
        if len(sample['label']) < token_min_length:
            continue
        if len(sample['label']) > token_max_length:
            continue
        if num_frames != 0:
            if len(sample['label']) / num_frames < min_output_input_ratio:
                continue
            if len(sample['label']) / num_frames > max_output_input_ratio:
                continue
        yield sample


audio_data_filter = pipelinefilter(_audio_data_filter)


def _audio_tokenize(source,
                    symbol_table,
                    bpe_model=None,
                    non_lang_syms=None,
                    split_with_space=False):
    """ Decode text to chars or BPE
        Inplace operation

        Args:
            source: Iterable[{fname, wav, txt, sample_rate}]

        Returns:
            Iterable[{fname, wav, txt, tokens, label, sample_rate}]
    """
    if non_lang_syms is not None:
        non_lang_syms_pattern = re.compile(r"(\[[^\[\]]+\]|<[^<>]+>|{[^{}]+})")
    else:
        non_lang_syms = {}
        non_lang_syms_pattern = None

    if bpe_model is not None:
        import sentencepiece as spm
        sp = spm.SentencePieceProcessor()
        sp.load(bpe_model)
    else:
        sp = None

    for sample in source:
        assert 'txt' in sample
        txt = sample['txt'].strip()
        if non_lang_syms_pattern is not None:
            parts = non_lang_syms_pattern.split(txt.upper())
            parts = [w for w in parts if len(w.strip()) > 0]
        else:
            parts = [txt]

        label = []
        tokens = []
        for part in parts:
            if part in non_lang_syms:
                tokens.append(part)
            else:
                if bpe_model is not None:
                    tokens.extend(__tokenize_by_bpe_model(sp, part))
                else:
                    if split_with_space:
                        part = part.split(" ")
                    for ch in part:
                        if ch == ' ':
                            ch = "<space>"
                        tokens.append(ch)

        for ch in tokens:
            if ch in symbol_table:
                label.append(symbol_table[ch])
            elif '<unk>' in symbol_table:
                label.append(symbol_table['<unk>'])

        sample['tokens'] = tokens
        sample['label'] = label
        yield sample


audio_tokenize = pipelinefilter(_audio_tokenize)


def _audio_resample(source, resample_rate=16000):
    """ Resample data.
        Inplace operation.

        Args:
            data: Iterable[{fname, wav, label, sample_rate}]
            resample_rate: target resample rate

        Returns:
            Iterable[{fname, wav, label, sample_rate}]
    """
    for sample in source:
        assert 'sample_rate' in sample
        assert 'wav' in sample
        sample_rate = sample['sample_rate']
        waveform = sample['wav']
        if sample_rate != resample_rate:
            sample['sample_rate'] = resample_rate
            sample['wav'] = paddle.to_tensor(
                backends.soundfile_backend.resample(
                    waveform.numpy(),
                    src_sr=sample_rate,
                    target_sr=resample_rate))
        yield sample


audio_resample = pipelinefilter(_audio_resample)


def _audio_compute_fbank(source,
                         num_mel_bins=80,
                         frame_length=25,
                         frame_shift=10,
                         dither=0.0):
    """ Extract fbank

        Args:
            source: Iterable[{fname, wav, label, sample_rate}]
            num_mel_bins: number of mel filter bank
            frame_length: length of one frame (ms)
            frame_shift: length of frame shift (ms)
            dither: value of dither

        Returns:
            Iterable[{fname, feat, label}]
    """
    for sample in source:
        assert 'sample_rate' in sample
        assert 'wav' in sample
        assert 'fname' in sample
        assert 'label' in sample
        sample_rate = sample['sample_rate']
        waveform = sample['wav']
        waveform = waveform * (1 << 15)
        # Only keep fname, feat, label
        mat = kaldi.fbank(
            waveform,
            n_mels=num_mel_bins,
            frame_length=frame_length,
            frame_shift=frame_shift,
            dither=dither,
            energy_floor=0.0,
            sr=sample_rate)
        yield dict(fname=sample['fname'], label=sample['label'], feat=mat)


audio_compute_fbank = pipelinefilter(_audio_compute_fbank)


def _audio_spec_aug(
        source,
        max_w=5,
        w_inplace=True,
        w_mode="PIL",
        max_f=30,
        num_f_mask=2,
        f_inplace=True,
        f_replace_with_zero=False,
        max_t=40,
        num_t_mask=2,
        t_inplace=True,
        t_replace_with_zero=False, ):
    """ Do spec augmentation
        Inplace operation

        Args:
            source: Iterable[{fname, feat, label}]
            max_w: max width of time warp
            w_inplace: whether to inplace the original data while time warping
            w_mode: time warp mode
            max_f: max width of freq mask
            num_f_mask: number of freq mask to apply
            f_inplace: whether to inplace the original data while frequency masking
            f_replace_with_zero: use zero to mask
            max_t: max width of time mask
            num_t_mask: number of time mask to apply
            t_inplace: whether to inplace the original data while time masking
            t_replace_with_zero: use zero to mask
            
        Returns
            Iterable[{fname, feat, label}]
     """
    for sample in source:
        x = sample['feat']
        x = x.numpy()
        x = time_warp(x, max_time_warp=max_w, inplace=w_inplace, mode=w_mode)
        x = freq_mask(
            x,
            F=max_f,
            n_mask=num_f_mask,
            inplace=f_inplace,
            replace_with_zero=f_replace_with_zero)
        x = time_mask(
            x,
            T=max_t,
            n_mask=num_t_mask,
            inplace=t_inplace,
            replace_with_zero=t_replace_with_zero)
        sample['feat'] = paddle.to_tensor(x, dtype=paddle.float32)
        yield sample


audio_spec_aug = pipelinefilter(_audio_spec_aug)


def _sort(source, sort_size=500):
    """ Sort the data by feature length.
        Sort is used after shuffle and before batch, so we can group
        utts with similar lengths into a batch, and `sort_size` should
        be less than `shuffle_size`

        Args:
            source: Iterable[{fname, feat, label}]
            sort_size: buffer size for sort

        Returns:
            Iterable[{fname, feat, label}]
    """

    buf = []
    for sample in source:
        buf.append(sample)
        if len(buf) >= sort_size:
            buf.sort(key=lambda x: x['feat'].shape[0])
            for x in buf:
                yield x
            buf = []
    # The sample left over
    buf.sort(key=lambda x: x['feat'].shape[0])
    for x in buf:
        yield x


sort = pipelinefilter(_sort)


def _batched(source, batch_size=16):
    """ Static batch the data by `batch_size`

        Args:
            data: Iterable[{fname, feat, label}]
            batch_size: batch size

        Returns:
            Iterable[List[{fname, feat, label}]]
    """
    buf = []
    for sample in source:
        buf.append(sample)
        if len(buf) >= batch_size:
            yield buf
            buf = []
    if len(buf) > 0:
        yield buf


batched = pipelinefilter(_batched)


def dynamic_batched(source, max_frames_in_batch=12000):
    """ Dynamic batch the data until the total frames in batch
        reach `max_frames_in_batch`

        Args:
            source: Iterable[{fname, feat, label}]
            max_frames_in_batch: max_frames in one batch

        Returns:
            Iterable[List[{fname, feat, label}]]
    """
    buf = []
    longest_frames = 0
    for sample in source:
        assert 'feat' in sample
        assert isinstance(sample['feat'], paddle.Tensor)
        new_sample_frames = sample['feat'].size(0)
        longest_frames = max(longest_frames, new_sample_frames)
        frames_after_padding = longest_frames * (len(buf) + 1)
        if frames_after_padding > max_frames_in_batch:
            yield buf
            buf = [sample]
            longest_frames = new_sample_frames
        else:
            buf.append(sample)
    if len(buf) > 0:
        yield buf


def _audio_padding(source):
    """ Padding the data into training data

        Args:
            source: Iterable[List[{fname, feat, label}]]

        Returns:
            Iterable[Tuple(fname, feats, labels, feats lengths, label lengths)]
    """
    for sample in source:
        assert isinstance(sample, list)
        feats_length = paddle.to_tensor(
            [x['feat'].shape[0] for x in sample], dtype="int64")
        order = paddle.argsort(feats_length, descending=True)
        feats_lengths = paddle.to_tensor(
            [sample[i]['feat'].shape[0] for i in order], dtype="int64")
        sorted_feats = [sample[i]['feat'] for i in order]
        sorted_keys = [sample[i]['fname'] for i in order]
        sorted_labels = [
            paddle.to_tensor(sample[i]['label'], dtype="int32") for i in order
        ]
        label_lengths = paddle.to_tensor(
            [x.shape[0] for x in sorted_labels], dtype="int64")
        padded_feats = pad_sequence(
            sorted_feats, batch_first=True, padding_value=0)
        padding_labels = pad_sequence(
            sorted_labels, batch_first=True, padding_value=-1)

        yield (sorted_keys, padded_feats, feats_lengths, padding_labels,
               label_lengths)


audio_padding = pipelinefilter(_audio_padding)


def _audio_cmvn(source, cmvn_file):
    global_cmvn = GlobalCMVN(cmvn_file)
    for batch in source:
        sorted_keys, padded_feats, feats_lengths, padding_labels, label_lengths = batch
        padded_feats = padded_feats.numpy()
        padded_feats = global_cmvn(padded_feats)
        padded_feats = paddle.to_tensor(padded_feats, dtype=paddle.float32)
        yield (sorted_keys, padded_feats, feats_lengths, padding_labels,
               label_lengths)


audio_cmvn = pipelinefilter(_audio_cmvn)


def _placeholder(source):
    for data in source:
        yield data


placeholder = pipelinefilter(_placeholder)


================================================
FILE: paddlespeech/audio/streamdata/gopen.py
================================================
#
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
#
"""Open URLs by calling subcommands."""
import os
import re
import sys
from subprocess import PIPE
from subprocess import Popen
from urllib.parse import urlparse

# global used for printing additional node information during verbose output
info = {}


class Pipe:
    """Wrapper class for subprocess.Pipe.

    This class looks like a stream from the outside, but it checks
    subprocess status and handles timeouts with exceptions.
    This way, clients of the class do not need to know that they are
    dealing with subprocesses.

    :param *args: passed to `subprocess.Pipe`
    :param **kw: passed to `subprocess.Pipe`
    :param timeout: timeout for closing/waiting
    :param ignore_errors: don't raise exceptions on subprocess errors
    :param ignore_status: list of status codes to ignore
    """

    def __init__(
            self,
            *args,
            mode=None,
            timeout=7200.0,
            ignore_errors=False,
            ignore_status=[],
            **kw, ):
        """Create an IO Pipe."""
        self.ignore_errors = ignore_errors
        self.ignore_status = [0] + ignore_status
        self.timeout = timeout
        self.args = (args, kw)
        if mode[0] == "r":
            self.proc = Popen(*args, stdout=PIPE, **kw)
            self.stream = self.proc.stdout
            if self.stream is None:
                raise ValueError(f"{args}: couldn't open")
        elif mode[0] == "w":
            self.proc = Popen(*args, stdin=PIPE, **kw)
            self.stream = self.proc.stdin
            if self.stream is None:
                raise ValueError(f"{args}: couldn't open")
        self.status = None

    def __str__(self):
        return f"<Pipe {self.args}>"

    def check_status(self):
        """Poll the process and handle any errors."""
        status = self.proc.poll()
        if status is not None:
            self.wait_for_child()

    def wait_for_child(self):
        """Check the status variable and raise an exception if necessary."""
        verbose = int(os.environ.get("GOPEN_VERBOSE", 0))
        if self.status is not None and verbose:
            # print(f"(waiting again [{self.status} {os.getpid()}:{self.proc.pid}])", file=sys.stderr)
            return
        self.status = self.proc.wait()
        if verbose:
            print(
                f"pipe exit [{self.status} {os.getpid()}:{self.proc.pid}] {self.args} {info}",
                file=sys.stderr, )
        if self.status not in self.ignore_status and not self.ignore_errors:
            raise Exception(f"{self.args}: exit {self.status} (read) {info}")

    def read(self, *args, **kw):
        """Wrap stream.read and checks status."""
        result = self.stream.read(*args, **kw)
        self.check_status()
        return result

    def write(self, *args, **kw):
        """Wrap stream.write and checks status."""
        result = self.stream.write(*args, **kw)
        self.check_status()
        return result

    def readLine(self, *args, **kw):
        """Wrap stream.readLine and checks status."""
        result = self.stream.readLine(*args, **kw)
        self.status = self.proc.poll()
        self.check_status()
        return result

    def close(self):
        """Wrap stream.close, wait for the subprocess, and handle errors."""
        self.stream.close()
        self.status = self.proc.wait(self.timeout)
        self.wait_for_child()

    def __enter__(self):
        """Context handler."""
        return self

    def __exit__(self, etype, value, traceback):
        """Context handler."""
        self.close()


def set_options(obj,
                timeout=None,
                ignore_errors=None,
                ignore_status=None,
                handler=None):
    """Set options for Pipes.

    This function can be called on any stream. It will set pipe options only
    when its argument is a pipe.

    :param obj: any kind of stream
    :param timeout: desired timeout
    :param ignore_errors: desired ignore_errors setting
    :param ignore_status: desired ignore_status setting
    :param handler: desired error handler
    """
    if not isinstance(obj, Pipe):
        return False
    if timeout is not None:
        obj.timeout = timeout
    if ignore_errors is not None:
        obj.ignore_errors = ignore_errors
    if ignore_status is not None:
        obj.ignore_status = ignore_status
    if handler is not None:
        obj.handler = handler
    return True


def gopen_file(url, mode="rb", bufsize=8192):
    """Open a file.

    This works for local files, files over HTTP, and pipe: files.

    :param url: URL to be opened
    :param mode: mode to open it with
    :param bufsize: requested buffer size
    """
    return open(url, mode)


def gopen_pipe(url, mode="rb", bufsize=8192):
    """Use gopen to open a pipe.

    :param url: a pipe: URL
    :param mode: desired mode
    :param bufsize: desired buffer size
    """
    assert url.startswith("pipe:")
    cmd = url[5:]
    if mode[0] == "r":
        return Pipe(
            cmd,
            mode=mode,
            shell=True,
            bufsize=bufsize,
            ignore_status=[141], )  # skipcq: BAN-B604
    elif mode[0] == "w":
        return Pipe(
            cmd,
            mode=mode,
            shell=True,
            bufsize=bufsize,
            ignore_status=[141], )  # skipcq: BAN-B604
    else:
        raise ValueError(f"{mode}: unknown mode")


def gopen_curl(url, mode="rb", bufsize=8192):
    """Open a URL with `curl`.

    :param url: url (usually, http:// etc.)
    :param mode: file mode
    :param bufsize: buffer size
    """
    if mode[0] == "r":
        cmd = f"curl -s -L '{url}'"
        return Pipe(
            cmd,
            mode=mode,
            shell=True,
            bufsize=bufsize,
            ignore_status=[141, 23], )  # skipcq: BAN-B604
    elif mode[0] == "w":
        cmd = f"curl -s -L -T - '{url}'"
        return Pipe(
            cmd,
            mode=mode,
            shell=True,
            bufsize=bufsize,
            ignore_status=[141, 26], )  # skipcq: BAN-B604
    else:
        raise ValueError(f"{mode}: unknown mode")


def gopen_htgs(url, mode="rb", bufsize=8192):
    """Open a URL with `curl`.

    :param url: url (usually, http:// etc.)
    :param mode: file mode
    :param bufsize: buffer size
    """
    if mode[0] == "r":
        url = re.sub(r"(?i)^htgs://", "gs://", url)
        cmd = f"curl -s -L '{url}'"
        return Pipe(
            cmd,
            mode=mode,
            shell=True,
            bufsize=bufsize,
            ignore_status=[141, 23], )  # skipcq: BAN-B604
    elif mode[0] == "w":
        raise ValueError(f"{mode}: cannot write")
    else:
        raise ValueError(f"{mode}: unknown mode")


def gopen_gsutil(url, mode="rb", bufsize=8192):
    """Open a URL with `curl`.

    :param url: url (usually, http:// etc.)
    :param mode: file mode
    :param bufsize: buffer size
    """
    if mode[0] == "r":
        cmd = f"gsutil cat '{url}'"
        return Pipe(
            cmd,
            mode=mode,
            shell=True,
            bufsize=bufsize,
            ignore_status=[141, 23], )  # skipcq: BAN-B604
    elif mode[0] == "w":
        cmd = f"gsutil cp - '{url}'"
        return Pipe(
            cmd,
            mode=mode,
            shell=True,
            bufsize=bufsize,
            ignore_status=[141, 26], )  # skipcq: BAN-B604
    else:
        raise ValueError(f"{mode}: unknown mode")


def gopen_error(url, *args, **kw):
    """Raise a value error.

    :param url: url
    :param args: other arguments
    :param kw: other keywords
    """
    raise ValueError(f"{url}: no gopen handler defined")


"""A dispatch table mapping URL schemes to handlers."""
gopen_schemes = dict(
    __default__=gopen_error,
    pipe=gopen_pipe,
    http=gopen_curl,
    https=gopen_curl,
    sftp=gopen_curl,
    ftps=gopen_curl,
    scp=gopen_curl,
    gs=gopen_gsutil,
    htgs=gopen_htgs, )


def gopen(url, mode="rb", bufsize=8192, **kw):
    """Open the URL.

    This uses the `gopen_schemes` dispatch table to dispatch based
    on scheme.

    Support for the following schemes is built-in: pipe, file,
    http, https, sftp, ftps, scp.

    When no scheme is given the url is treated as a file.

    You can use the OPEN_VERBOSE argument to get info about
    files being opened.

    :param url: the source URL
    :param mode: the mode ("rb", "r")
    :param bufsize: the buffer size
    """
    global fallback_gopen
    verbose = int(os.environ.get("GOPEN_VERBOSE", 0))
    if verbose:
        print("GOPEN", url, info, file=sys.stderr)
    assert mode in ["rb", "wb"], mode
    if url == "-":
        if mode == "rb":
            return sys.stdin.buffer
        elif mode == "wb":
            return sys.stdout.buffer
        else:
            raise ValueError(f"unknown mode {mode}")
    pr = urlparse(url)
    if pr.scheme == "":
        bufsize = int(os.environ.get("GOPEN_BUFFER", -1))
        return open(url, mode, buffering=bufsize)
    if pr.scheme == "file":
        bufsize = int(os.environ.get("GOPEN_BUFFER", -1))
        return open(pr.path, mode, buffering=bufsize)
    handler = gopen_schemes["__default__"]
    handler = gopen_schemes.get(pr.scheme, handler)
    return handler(url, mode, bufsize, **kw)


def reader(url, **kw):
    """Open url with gopen and mode "rb".

    :param url: source URL
    :param kw: other keywords forwarded to gopen
    """
    return gopen(url, "rb", **kw)


================================================
FILE: paddlespeech/audio/streamdata/handlers.py
================================================
#
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
#
"""Pluggable exception handlers.

These are functions that take an exception as an argument and then return...

- the exception (in order to re-raise it)
- True (in order to continue and ignore the exception)
- False (in order to ignore the exception and stop processing)

They are used as handler= arguments in much of the library.
"""
import time
import warnings


def reraise_exception(exn):
    """Call in an exception handler to re-raise the exception."""
    raise exn


def ignore_and_continue(exn):
    """Call in an exception handler to ignore any exception and continue."""
    return True


def warn_and_continue(exn):
    """Call in an exception handler to ignore any exception, isssue a warning, and continue."""
    warnings.warn(repr(exn))
    time.sleep(0.5)
    return True


def ignore_and_stop(exn):
    """Call in an exception handler to ignore any exception and stop further processing."""
    return False


def warn_and_stop(exn):
    """Call in an exception handler to ignore any exception and stop further processing."""
    warnings.warn(repr(exn))
    time.sleep(0.5)
    return False


================================================
FILE: paddlespeech/audio/streamdata/mix.py
================================================
#
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
# Modified from https://github.com/webdataset/webdataset
#
"""Classes for mixing samples from multiple sources."""
import random

import numpy as np

from .paddle_utils import IterableDataset


def round_robin_shortest(*sources):
    i = 0
    while True:
        try:
            sample = next(sources[i % len(sources)])
            yield sample
        except StopIteration:
            break
        i += 1


def round_robin_longest(*sources):
    i = 0
    while len(sources) > 0:
        try:
            sample = next(sources[i])
            i += 1
            yield sample
        except StopIteration:
            del sources[i]


class RoundRobin(IterableDataset):
    def __init__(self, datasets, longest=False):
        self.datasets = datasets
        self.longest = longest

    def __iter__(self):
        """Return an iterator over the sources."""
        sources = [iter(d) for d in self.datasets]
        if self.longest:
            return round_robin_longest(*sources)
        else:
            return round_robin_shortest(*sources)


def random_samples(sources, probs=None, longest=False):
    if probs is None:
        probs = [1] * len(sources)
    else:
        probs = list(probs)
    while len(sources) > 0:
        cum = (np.array(probs) / np.sum(probs)).cumsum()
        r = random.random()
        i = np.searchsorted(cum, r)
        try:
            yield next(sources[i])
        except StopIteration:
            if longest:
                del sources[i]
                del probs[i]
            else:
                break


class RandomMix(IterableDataset):
    def __init__(self, datasets, probs=None, longest=False):
        self.datasets = datasets
        self.probs = probs
        self.longest = longest

    def __iter__(self):
        """Return an iterator over the sources."""
        sources = [iter(d) for d in self.datasets]
        return random_samples(sources, self.probs, longest=self.longest)


================================================
FILE: paddlespeech/audio/streamdata/paddle_utils.py
================================================
#
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
# Modified from https://github.com/webdataset/webdataset
#
"""Mock implementations of paddle interfaces when paddle is not available."""

try:
    from paddle.io import DataLoader
    from paddle.io import IterableDataset
except ModuleNotFoundError:

    class IterableDataset:
        """Empty implementation of IterableDataset when paddle is not available."""

        pass

    class DataLoader:
        """Empty implementation of DataLoader when paddle is not available."""

        pass


================================================
FILE: paddlespeech/audio/streamdata/pipeline.py
================================================
# Copyright (c) 2017-2019 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# See the LICENSE file for licensing terms (BSD-style).
# Modified from https://github.com/webdataset/webdataset
#%%
import copy
import sys
from itertools import islice

from .paddle_utils import DataLoader
from .paddle_utils import IterableDataset
from .utils import PipelineStage


def add_length_method(obj):
    def length(self):
        return self.size

    Combined = type(
        obj.__class__.__name__ + "_Length",
        (obj.__class__, IterableDataset),
        {"__len__": length}, )
    obj.__class__ = Combined
    return obj


class DataPipeline(IterableDataset, PipelineStage):
    """A pipeline starting with an IterableDataset and a series of filters."""

    def __init__(self, *args, **kwargs):
        super().__init__()
        self.pipeline = []
        self.length = -1
        self.repetitions = 1
        self.nsamples = -1
        for arg in args:
            if arg is None:
                continue
            if isinstance(arg, list):
                self.pipeline.extend(arg)
            else:
                self.pipeline.append(arg)

    def invoke(self, f, *args, **kwargs):
        """Apply a pipeline stage, possibly to the output of a previous stage."""
        if isinstance(f, PipelineStage):
            return f.run(*args, **kwargs)
        if isinstance(f, (IterableDataset, DataLoader)) and len(args) == 0:
            return iter(f)
        if isinstance(f, list):
            return iter(f)
        if callable(f):
            result = f(*args, **kwargs)
            return result
        raise ValueError(f"{f}: not a valid pipeline stage")

    def iterator1(self):
        """Create an iterator through one epoch in the pipeline."""
        source = self.invoke(self.pipeline[0])
        for step in self.pipeline[1:]:
            source = self.invoke(step, source)
        return source

    def iterator(self):
        """Create an iterator through the entire dataset, using the given number of repetitions."""
        for i in range(self.repetitions):
            for sample in self.iterator1():
                yield sample

    def __iter__(self):
        """Create an iterator through the pipeline, repeating and slicing as requested."""
        if self.repetitions != 1:
            if self.nsamples > 0:
                return islice(self.iterator(), self.nsamples)
            else:
                return self.iterator()
        else:
            return self.iterator()

    def stage(self, i):
        """Return pipeline stage i."""
        return self.pipeline[i]

    def append(self, f):
        """Append a pipeline stage (modifies the object)."""
        self.pipeline.append(f)
        return self

    def append_list(self, *args):
        for arg in args:
            self.pipeline.append(arg)
        return self

    def compose(self, *args):
        """Append a pipeline stage to a copy of the pipeline and returns the copy."""
        result = copy.copy(self)
        for arg in args:
            result.append(arg)
        return result

    def with_length(self, n):
        """Add a __len__ method returning the desired value.

        This does not change the actual number of samples in an epoch.
        PyTorch IterableDataset should not have a __len__ method.
        This is provided only as a workaround for some broken training environments
        that require a __len__ method.
        """
        self.size = n
        return add_length_method(self)

    def with_epoch(self, nsamples=-1, nbatches=-1):
        """Change the epoch to return the given number of samples/batches.

        The two arguments mean the same thing."""
        self.repetitions = sys.maxsize
        self.nsamples = max(nsamples, nbatches)
        return self

    def repeat(self, nepochs=-1, nbatches=-1):
        """Repeat iterating through the dataset for the given #epochs up to the given #samples."""
        if nepochs > 0:
            self.repetitions = nepochs
            self.nsamples = nbatches
        else:
            self.repetitions = sys.maxsize
            self.nsamples = nbatches
        return self


================================================
FILE: paddlespeech/audio/streamdata/shardlists.py
================================================
#
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
#
# Modified from https://github.com/webdataset/webdataset
"""Train PyTorch models directly from POSIX tar archive.

Code works locally or over HTTP connections.
"""
import os
import random
import sys
import time
from dataclasses import dataclass
from dataclasses import field
from itertools import islice
from typing import List

import braceexpand
import yaml

from . import utils
from ..utils.log import Logger
from .filters import pipelinefilter
from .paddle_utils import IterableDataset
logger = Logger(__name__)


def expand_urls(urls):
    if isinstance(urls, str):
        urllist = urls.split("::")
        result = []
        for url in urllist:
            result.extend(braceexpand.braceexpand(url))
        return result
    else:
        return list(urls)


class SimpleShardList(IterableDataset):
    """An iterable dataset yielding a list of urls."""

    def __init__(self, urls, seed=None):
        """Iterate through the list of shards.

        :param urls: a list of URLs as a Python list or brace notation string
        """
        super().__init__()
        urls = expand_urls(urls)
        self.urls = urls
        assert isinstance(self.urls[0], str)
        self.seed = seed

    def __len__(self):
        return len(self.urls)

    def __iter__(self):
        """Return an iterator over the shards."""
        urls = self.urls.copy()
        if self.seed is not None:
            random.Random(self.seed).shuffle(urls)
        for url in urls:
            yield dict(url=url)


def split_by_node(src, group=None):
    rank, world_size, worker, num_workers = utils.paddle_worker_info(
        group=group)
    logger.info(f"world_size:{world_size}, rank:{rank}")
    if world_size > 1:
        for s in islice(src, rank, None, world_size):
            yield s
    else:
        for s in src:
            yield s


def single_node_only(src, group=None):
    rank, world_size, worker, num_workers = utils.paddle_worker_info(
        group=group)
    if world_size > 1:
        raise ValueError(
            "input pipeline needs to be reconfigured for multinode training")
    for s in src:
        yield s


def split_by_worker(src):
    rank, world_size, worker, num_workers = utils.paddle_worker_info()
    logger.info(f"num_workers:{num_workers}, worker:{worker}")
    if num_workers > 1:
        for s in islice(src, worker, None, num_workers):
            yield s
    else:
        for s in src:
            yield s


def resampled_(src, n=sys.maxsize):
    import random

    seed = time.time()
    try:
        seed = open("/dev/random", "rb").read(20)
    except Exception as exn:
        print(repr(exn)[:50], file=sys.stderr)
    rng = random.Random(seed)
    print("# resampled loading", file=sys.stderr)
    items = list(src)
    print(
        f"# resampled got {len(items)} samples, yielding {n}", file=sys.stderr)
    for i in range(n):
        yield rng.choice(items)


resampled = pipelinefilter(resampled_)


def non_empty(src):
    count = 0
    for s in src:
        yield s
        count += 1
    if count == 0:
        raise ValueError(
            "pipeline stage received no data at all and this was declared as an error"
        )


@dataclass
class MSSource:
    """Class representing a data source."""

    name: str = ""
    perepoch: int = -1
    resample: bool = False
    urls: List[str] = field(default_factory=list)


default_rng = random.Random()


def expand(s):
    return os.path.expanduser(os.path.expandvars(s))


class MultiShardSample(IterableDataset):
    def __init__(self, fname):
        """Construct a shardlist from multiple sources using a YAML spec."""
        self.epoch = -1
        self.parse_spec(fname)

    def parse_spec(self, fname):
        self.rng = default_rng  # capture default_rng if we fork
        if isinstance(fname, dict):
            spec = fname
            fname = "{dict}"
        else:
            with open(fname) as stream:
                spec = yaml.safe_load(stream)
        assert set(spec.keys()).issubset(
            set("prefix datasets buckets".split())), list(spec.keys())
        prefix = expand(spec.get("prefix", ""))
        self.sources = []
        for ds in spec["datasets"]:
            assert set(ds.keys()).issubset(
                set("buckets name shards resample choose".split())), list(
                    ds.keys())
            buckets = ds.get("buckets", spec.get("buckets", []))
            if isinstance(buckets, str):
                buckets = [buckets]
            buckets = [expand(s) for s in buckets]
            if buckets == []:
                buckets = [""]
            assert len(
                buckets
            ) == 1, f"{buckets}: FIXME support for multiple buckets unimplemented"
            bucket = buckets[0]
            name = ds.get("name", "@" + bucket)
            urls = ds["shards"]
            if isinstance(urls, str):
                urls = [urls]
            # urls = [u for url in urls for u in braceexpand.braceexpand(url)]
            urls = [
                prefix + os.path.join(bucket, u)
                for url in urls for u in braceexpand.braceexpand(expand(url))
            ]
            resample = ds.get("resample", -1)
            nsample = ds.get("choose", -1)
            if nsample > len(urls):
                raise ValueError(
                    f"perepoch {nsample} must be no greater than the number of shards"
                )
            if (nsample > 0) and (resample > 0):
                raise ValueError("specify only one of perepoch or choose")
            entry = MSSource(
                name=name, urls=urls, perepoch=nsample, resample=resample)
            self.sources.append(entry)
            print(f"# {name} {len(urls)} {nsample}", file=sys.stderr)

    def set_epoch(self, seed):
        """Set the current epoch (for consistent shard selection among nodes)."""
        self.rng = random.Random(seed)

    def get_shards_for_epoch(self):
        result = []
        for source in self.sources:
            if source.resample > 0:
                # sample with replacement
                l = self.rng.choices(source.urls, k=source.resample)
            elif source.perepoch > 0:
                # sample without replacement
                l = list(source.urls)
                self.rng.shuffle(l)
                l = l[:source.perepoch]
            else:
                l = list(source.urls)
            result += l
        self.rng.shuffle(result)
        return result

    def __iter__(self):
        shards = self.get_shards_for_epoch()
        for shard in shards:
            yield dict(url=shard)


def shardspec(spec):
    if spec.endswith(".yaml"):
        return MultiShardSample(spec)
    else:
        return SimpleShardList(spec)


class ResampledShards(IterableDataset):
    """An iterable dataset yielding a list of urls."""

    def __init__(
            self,
            urls,
            nshards=sys.maxsize,
            worker_seed=None,
            deterministic=False, ):
        """Sample shards from the shard list with replacement.

        :param urls: a list of URLs as a Python list or brace notation string
        """
        super().__init__()
        urls = expand_urls(urls)
        self.urls = urls
        assert isinstance(self.urls[0], str)
        self.nshards = nshards
        self.worker_seed = utils.paddle_worker_seed if worker_seed is None else worker_seed
        self.deterministic = deterministic
        self.epoch = -1

    def __iter__(self):
        """Return an iterator over the shards."""
        self.epoch += 1
        if self.deterministic:
            seed = utils.make_seed(self.worker_seed(), self.epoch)
        else:
            seed = utils.make_seed(self.worker_seed(), self.epoch,
                                   os.getpid(), time.time_ns(), os.urandom(4))
        if os.environ.get("WDS_SHOW_SEED", "0") == "1":
            print(f"# ResampledShards seed {seed}")
        self.rng = random.Random(seed)
        for _ in range(self.nshards):
            index = self.rng.randint(0, len(self.urls) - 1)
            yield dict(url=self.urls[index])


================================================
FILE: paddlespeech/audio/streamdata/soundfile.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import warnings
from typing import Optional
from typing import Tuple

import numpy as np
import paddle
import resampy
import soundfile
from scipy.io import wavfile

from ..utils import depth_convert
from ..utils import ParameterError
from .common import AudioInfo

__all__ = [
    'resample',
    'to_mono',
    'normalize',
    'save',
    'soundfile_save',
    'load',
    'soundfile_load',
    'info',
]
NORMALMIZE_TYPES = ['linear', 'gaussian']
MERGE_TYPES = ['ch0', 'ch1', 'random', 'average']
RESAMPLE_MODES = ['kaiser_best', 'kaiser_fast']
EPS = 1e-8


def resample(y: np.ndarray,
             src_sr: int,
             target_sr: int,
             mode: str='kaiser_fast') -> np.ndarray:
    """Audio resampling.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        src_sr (int): Source sample rate.
        target_sr (int): Target sample rate.
        mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.

    Returns:
        np.ndarray: `y` resampled to `target_sr`
    """

    if mode == 'kaiser_best':
        warnings.warn(
            f'Using resampy in kaiser_best to {src_sr}=>{target_sr}. This function is pretty slow, \
        we recommend the mode kaiser_fast in large scale audio training')

    if not isinstance(y, np.ndarray):
        raise ParameterError(
            'Only support numpy np.ndarray, but received y in {type(y)}')

    if mode not in RESAMPLE_MODES:
        raise ParameterError(f'resample mode must in {RESAMPLE_MODES}')

    return resampy.resample(y, src_sr, target_sr, filter=mode)


def to_mono(y: np.ndarray, merge_type: str='average') -> np.ndarray:
    """Convert sterior audio to mono.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        merge_type (str, optional): Merge type to generate mono waveform. Defaults to 'average'.

    Returns:
        np.ndarray: `y` with mono channel.
    """

    if merge_type not in MERGE_TYPES:
        raise ParameterError(
            f'Unsupported merge type {merge_type}, available types are {MERGE_TYPES}'
        )
    if y.ndim > 2:
        raise ParameterError(
            f'Unsupported audio array,  y.ndim > 2, the shape is {y.shape}')
    if y.ndim == 1:  # nothing to merge
        return y

    if merge_type == 'ch0':
        return y[0]
    if merge_type == 'ch1':
        return y[1]
    if merge_type == 'random':
        return y[np.random.randint(0, 2)]

    # need to do averaging according to dtype

    if y.dtype == 'float32':
        y_out = (y[0] + y[1]) * 0.5
    elif y.dtype == 'int16':
        y_out = y.astype('int32')
        y_out = (y_out[0] + y_out[1]) // 2
        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
                        np.iinfo(y.dtype).max).astype(y.dtype)

    elif y.dtype == 'int8':
        y_out = y.astype('int16')
        y_out = (y_out[0] + y_out[1]) // 2
        y_out = np.clip(y_out, np.iinfo(y.dtype).min,
                        np.iinfo(y.dtype).max).astype(y.dtype)
    else:
        raise ParameterError(f'Unsupported dtype: {y.dtype}')
    return y_out


def soundfile_load_(file: os.PathLike,
                    offset: Optional[float]=None,
                    dtype: str='int16',
                    duration: Optional[int]=None) -> Tuple[np.ndarray, int]:
    """Load audio using soundfile library. This function load audio file using libsndfile.

    Args:
        file (os.PathLike): File of waveform.
        offset (Optional[float], optional): Offset to the start of waveform. Defaults to None.
        dtype (str, optional): Data type of waveform. Defaults to 'int16'.
        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.

    Returns:
        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
    """
    with soundfile.SoundFile(file) as sf_desc:
        sr_native = sf_desc.samplerate
        if offset:
            sf_desc.seek(int(offset * sr_native))
        if duration is not None:
            frame_duration = int(duration * sr_native)
        else:
            frame_duration = -1
        y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T

    return y, sf_desc.samplerate


def normalize(y: np.ndarray, norm_type: str='linear',
              mul_factor: float=1.0) -> np.ndarray:
    """Normalize an input audio with additional multiplier.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
        mul_factor (float, optional): Scaling factor. Defaults to 1.0.

    Returns:
        np.ndarray: `y` after normalization.
    """

    if norm_type == 'linear':
        amax = np.max(np.abs(y))
        factor = 1.0 / (amax + EPS)
        y = y * factor * mul_factor
    elif norm_type == 'gaussian':
        amean = np.mean(y)
        astd = np.std(y)
        astd = max(astd, EPS)
        y = mul_factor * (y - amean) / astd
    else:
        raise NotImplementedError(f'norm_type should be in {NORMALMIZE_TYPES}')

    return y


def soundfile_save(y: np.ndarray, sr: int, file: os.PathLike) -> None:
    """Save audio file to disk. This function saves audio to disk using scipy.io.wavfile, with additional step to convert input waveform to int16.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        sr (int): Sample rate.
        file (os.PathLike): Path of audio file to save.
    """
    if not file.endswith('.wav'):
        raise ParameterError(
            f'only .wav file supported, but dst file name is: {file}')

    if sr <= 0:
        raise ParameterError(
            f'Sample rate should be larger than 0, received sr = {sr}')

    if y.dtype not in ['int16', 'int8']:
        warnings.warn(
            f'input data type is {y.dtype}, will convert data to int16 format before saving'
        )
        y_out = depth_convert(y, 'int16')
    else:
        y_out = y

    wavfile.write(file, sr, y_out)


def soundfile_load(
        file: os.PathLike,
        sr: Optional[int]=None,
        mono: bool=True,
        merge_type: str='average',  # ch0,ch1,random,average
        normal: bool=True,
        norm_type: str='linear',
        norm_mul_factor: float=1.0,
        offset: float=0.0,
        duration: Optional[int]=None,
        dtype: str='float32',
        resample_mode: str='kaiser_fast') -> Tuple[np.ndarray, int]:
    """Load audio file from disk. This function loads audio from disk using using audio backend.

    Args:
        file (os.PathLike): Path of audio file to load.
        sr (Optional[int], optional): Sample rate of loaded waveform. Defaults to None.
        mono (bool, optional): Return waveform with mono channel. Defaults to True.
        merge_type (str, optional): Merge type of multi-channels waveform. Defaults to 'average'.
        normal (bool, optional): Waveform normalization. Defaults to True.
        norm_type (str, optional): Type of normalization. Defaults to 'linear'.
        norm_mul_factor (float, optional): Scaling factor. Defaults to 1.0.
        offset (float, optional): Offset to the start of waveform. Defaults to 0.0.
        duration (Optional[int], optional): Duration of waveform to read. Defaults to None.
        dtype (str, optional): Data type of waveform. Defaults to 'float32'.
        resample_mode (str, optional): The resampling filter to use. Defaults to 'kaiser_fast'.

    Returns:
        Tuple[np.ndarray, int]: Waveform in ndarray and its samplerate.
    """

    y, r = soundfile_load_(file, offset=offset, dtype=dtype, duration=duration)

    if not ((y.ndim == 1 and len(y) > 0) or (y.ndim == 2 and len(y[0]) > 0)):
        raise ParameterError(f'audio file {file} looks empty')

    if mono:
        y = to_mono(y, merge_type)

    if sr is not None and sr != r:
        y = resample(y, r, sr, mode=resample_mode)
        r = sr

    if normal:
        y = normalize(y, norm_type, norm_mul_factor)
    elif dtype in ['int8', 'int16']:
        # still need to do normalization, before depth conversion
        y = normalize(y, 'linear', 1.0)

    y = depth_convert(y, dtype)
    return y, r


#The code below is taken from: https://github.com/pytorch/audio/blob/main/torchaudio/backend/soundfile_backend.py, with some modifications.


def _get_subtype_for_wav(dtype: paddle.dtype,
                         encoding: str,
                         bits_per_sample: int):
    if not encoding:
        if not bits_per_sample:
            subtype = {
                paddle.uint8: "PCM_U8",
                paddle.int16: "PCM_16",
                paddle.int32: "PCM_32",
                paddle.float32: "FLOAT",
                paddle.float64: "DOUBLE",
            }.get(dtype)
            if not subtype:
                raise ValueError(f"Unsupported dtype for wav: {dtype}")
            return subtype
        if bits_per_sample == 8:
            return "PCM_U8"
        return f"PCM_{bits_per_sample}"
    if encoding == "PCM_S":
        if not bits_per_sample:
            return "PCM_32"
        if bits_per_sample == 8:
            raise ValueError("wav does not support 8-bit signed PCM encoding.")
        return f"PCM_{bits_per_sample}"
    if encoding == "PCM_U":
        if bits_per_sample in (None, 8):
            return "PCM_U8"
        raise ValueError("wav only supports 8-bit unsigned PCM encoding.")
    if encoding == "PCM_F":
        if bits_per_sample in (None, 32):
            return "FLOAT"
        if bits_per_sample == 64:
            return "DOUBLE"
        raise ValueError("wav only supports 32/64-bit float PCM encoding.")
    if encoding == "ULAW":
        if bits_per_sample in (None, 8):
            return "ULAW"
        raise ValueError("wav only supports 8-bit mu-law encoding.")
    if encoding == "ALAW":
        if bits_per_sample in (None, 8):
            return "ALAW"
        raise ValueError("wav only supports 8-bit a-law encoding.")
    raise ValueError(f"wav does not support {encoding}.")


def _get_subtype_for_sphere(encoding: str, bits_per_sample: int):
    if encoding in (None, "PCM_S"):
        return f"PCM_{bits_per_sample}" if bits_per_sample else "PCM_32"
    if encoding in ("PCM_U", "PCM_F"):
        raise ValueError(f"sph does not support {encoding} encoding.")
    if encoding == "ULAW":
        if bits_per_sample in (None, 8):
            return "ULAW"
        raise ValueError("sph only supports 8-bit for mu-law encoding.")
    if encoding == "ALAW":
        return "ALAW"
    raise ValueError(f"sph does not support {encoding}.")


def _get_subtype(dtype: paddle.dtype,
                 format: str,
                 encoding: str,
                 bits_per_sample: int):
    if format == "wav":
        return _get_subtype_for_wav(dtype, encoding, bits_per_sample)
    if format == "flac":
        if encoding:
            raise ValueError("flac does not support encoding.")
        if not bits_per_sample:
            return "PCM_16"
        if bits_per_sample > 24:
            raise ValueError("flac does not support bits_per_sample > 24.")
        return "PCM_S8" if bits_per_sample == 8 else f"PCM_{bits_per_sample}"
    if format in ("ogg", "vorbis"):
        if encoding or bits_per_sample:
            raise ValueError(
                "ogg/vorbis does not support encoding/bits_per_sample.")
        return "VORBIS"
    if format == "sph":
        return _get_subtype_for_sphere(encoding, bits_per_sample)
    if format in ("nis", "nist"):
        return "PCM_16"
    raise ValueError(f"Unsupported format: {format}")


def save(
        filepath: str,
        src: paddle.Tensor,
        sample_rate: int,
        channels_first: bool=True,
        compression: Optional[float]=None,
        format: Optional[str]=None,
        encoding: Optional[str]=None,
        bits_per_sample: Optional[int]=None, ):
    """Save audio data to file.

    Note:
        The formats this function can handle depend on the soundfile installation.
        This function is tested on the following formats;

        * WAV

            * 32-bit floating-point
            * 32-bit signed integer
            * 16-bit signed integer
            * 8-bit unsigned integer

        * FLAC
        * OGG/VORBIS
        * SPHERE

    Note:
        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,

    Args:
        filepath (str or pathlib.Path): Path to audio file.
        src (paddle.Tensor): Audio data to save. must be 2D tensor.
        sample_rate (int): sampling rate
        channels_first (bool, optional): If ``True``, the given tensor is interpreted as `[channel, time]`,
            otherwise `[time, channel]`.
        compression (float of None, optional): Not used.
            It is here only for interface compatibility reason with "sox_io" backend.
        format (str or None, optional): Override the audio format.
            When ``filepath`` argument is path-like object, audio format is
            inferred from file extension. If the file extension is missing or
            different, you can specify the correct format with this argument.

            When ``filepath`` argument is file-like object,
            this argument is required.

            Valid values are ``"wav"``, ``"ogg"``, ``"vorbis"``,
            ``"flac"`` and ``"sph"``.
        encoding (str or None, optional): Changes the encoding for supported formats.
            This argument is effective only for supported formats, such as
            ``"wav"``, ``""flac"`` and ``"sph"``. Valid values are:

                - ``"PCM_S"`` (signed integer Linear PCM)
                - ``"PCM_U"`` (unsigned integer Linear PCM)
                - ``"PCM_F"`` (floating point PCM)
                - ``"ULAW"`` (mu-law)
                - ``"ALAW"`` (a-law)

        bits_per_sample (int or None, optional): Changes the bit depth for the
            supported formats.
            When ``format`` is one of ``"wav"``, ``"flac"`` or ``"sph"``,
            you can change the bit depth.
            Valid values are ``8``, ``16``, ``24``, ``32`` and ``64``.

    Supported formats/encodings/bit depth/compression are:

    ``"wav"``
        - 32-bit floating-point PCM
        - 32-bit signed integer PCM
        - 24-bit signed integer PCM
        - 16-bit signed integer PCM
        - 8-bit unsigned integer PCM
        - 8-bit mu-law
        - 8-bit a-law

        Note:
            Default encoding/bit depth is determined by the dtype of
            the input Tensor.

    ``"flac"``
        - 8-bit
        - 16-bit (default)
        - 24-bit

    ``"ogg"``, ``"vorbis"``
        - Doesn't accept changing configuration.

    ``"sph"``
        - 8-bit signed integer PCM
        - 16-bit signed integer PCM
        - 24-bit signed integer PCM
        - 32-bit signed integer PCM (default)
        - 8-bit mu-law
        - 8-bit a-law
        - 16-bit a-law
        - 24-bit a-law
        - 32-bit a-law

    """
    if src.ndim != 2:
        raise ValueError(f"Expected 2D Tensor, got {src.ndim}D.")
    if compression is not None:
        warnings.warn(
            '`save` function of "soundfile" backend does not support "compression" parameter. '
            "The argument is silently ignored.")
    if hasattr(filepath, "write"):
        if format is None:
            raise RuntimeError(
                "`format` is required when saving to file object.")
        ext = format.lower()
    else:
        ext = str(filepath).split(".")[-1].lower()

    if bits_per_sample not in (None, 8, 16, 24, 32, 64):
        raise ValueError("Invalid bits_per_sample.")
    if bits_per_sample == 24:
        warnings.warn(
            "Saving audio with 24 bits per sample might warp samples near -1. "
            "Using 16 bits per sample might be able to avoid this.")
    subtype = _get_subtype(src.dtype, ext, encoding, bits_per_sample)

    # sph is a extension used in TED-LIUM but soundfile does not recognize it as NIST format,
    # so we extend the extensions manually here
    if ext in ["nis", "nist", "sph"] and format is None:
        format = "NIST"

    if channels_first:
        src = src.t()

    soundfile.write(
        file=filepath,
        data=src,
        samplerate=sample_rate,
        subtype=subtype,
        format=format)


_SUBTYPE2DTYPE = {
    "PCM_S8": "int8",
    "PCM_U8": "uint8",
    "PCM_16": "int16",
    "PCM_32": "int32",
    "FLOAT": "float32",
    "DOUBLE": "float64",
}


def load(
        filepath: str,
        frame_offset: int=0,
        num_frames: int=-1,
        normalize: bool=True,
        channels_first: bool=True,
        format: Optional[str]=None, ) -> Tuple[paddle.Tensor, int]:
    """Load audio data from file.

    Note:
        The formats this function can handle depend on the soundfile installation.
        This function is tested on the following formats;

        * WAV

            * 32-bit floating-point
            * 32-bit signed integer
            * 16-bit signed integer
            * 8-bit unsigned integer

        * FLAC
        * OGG/VORBIS
        * SPHERE

    By default (``normalize=True``, ``channels_first=True``), this function returns Tensor with
    ``float32`` dtype and the shape of `[channel, time]`.
    The samples are normalized to fit in the range of ``[-1.0, 1.0]``.

    When the input format is WAV with integer type, such as 32-bit signed integer, 16-bit
    signed integer and 8-bit unsigned integer (24-bit signed integer is not supported),
    by providing ``normalize=False``, this function can return integer Tensor, where the samples
    are expressed within the whole range of the corresponding dtype, that is, ``int32`` tensor
    for 32-bit signed PCM, ``int16`` for 16-bit signed PCM and ``uint8`` for 8-bit unsigned PCM.

    ``normalize`` parameter has no effect on 32-bit floating-point WAV and other formats, such as
    ``flac`` and ``mp3``.
    For these formats, this function always returns ``float32`` Tensor with values normalized to
    ``[-1.0, 1.0]``.

    Note:
        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend.

    Args:
        filepath (path-like object or file-like object):
            Source of audio data.
        frame_offset (int, optional):
            Number of frames to skip before start reading data.
        num_frames (int, optional):
            Maximum number of frames to read. ``-1`` reads all the remaining samples,
            starting from ``frame_offset``.
            This function may return the less number of frames if there is not enough
            frames in the given file.
        normalize (bool, optional):
            When ``True``, this function always return ``float32``, and sample values are
            normalized to ``[-1.0, 1.0]``.
            If input file is integer WAV, giving ``False`` will change the resulting Tensor type to
            integer type.
            This argument has no effect for formats other than integer WAV type.
        channels_first (bool, optional):
            When True, the returned Tensor has dimension `[channel, time]`.
            Otherwise, the returned Tensor's dimension is `[time, channel]`.
        format (str or None, optional):
            Not used. PySoundFile does not accept format hint.

    Returns:
        (paddle.Tensor, int): Resulting Tensor and sample rate.
            If the input file has integer wav format and normalization is off, then it has
            integer type, else ``float32`` type. If ``channels_first=True``, it has
            `[channel, time]` else `[time, channel]`.
    """
    with soundfile.SoundFile(filepath, "r") as file_:
        if file_.format != "WAV" or normalize:
            dtype = "float32"
        elif file_.subtype not in _SUBTYPE2DTYPE:
            raise ValueError(f"Unsupported subtype: {file_.subtype}")
        else:
            dtype = _SUBTYPE2DTYPE[file_.subtype]

        frames = file_._prepare_read(frame_offset, None, num_frames)
        waveform = file_.read(frames, dtype, always_2d=True)
        sample_rate = file_.samplerate

    waveform = paddle.to_tensor(waveform)
    if channels_first:
        waveform = paddle.transpose(waveform, perm=[1, 0])
    return waveform, sample_rate


# Mapping from soundfile subtype to number of bits per sample.
# This is mostly heuristical and the value is set to 0 when it is irrelevant
# (lossy formats) or when it can't be inferred.
# For ADPCM (and G72X) subtypes, it's hard to infer the bit depth because it's not part of the standard:
# According to https://en.wikipedia.org/wiki/Adaptive_differential_pulse-code_modulation#In_telephony,
# the default seems to be 8 bits but it can be compressed further to 4 bits.
# The dict is inspired from
# https://github.com/bastibe/python-soundfile/blob/744efb4b01abc72498a96b09115b42a4cabd85e4/soundfile.py#L66-L94
_SUBTYPE_TO_BITS_PER_SAMPLE = {
    "PCM_S8": 8,  # Signed 8 bit data
    "PCM_16": 16,  # Signed 16 bit data
    "PCM_24": 24,  # Signed 24 bit data
    "PCM_32": 32,  # Signed 32 bit data
    "PCM_U8": 8,  # Unsigned 8 bit data (WAV and RAW only)
    "FLOAT": 32,  # 32 bit float data
    "DOUBLE": 64,  # 64 bit float data
    "ULAW": 8,  # U-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
    "ALAW": 8,  # A-Law encoded. See https://en.wikipedia.org/wiki/G.711#Types
    "IMA_ADPCM": 0,  # IMA ADPCM.
    "MS_ADPCM": 0,  # Microsoft ADPCM.
    "GSM610":
    0,  # GSM 6.10 encoding. (Wikipedia says 1.625 bit depth?? https://en.wikipedia.org/wiki/Full_Rate)
    "VOX_ADPCM": 0,  # OKI / Dialogix ADPCM
    "G721_32": 0,  # 32kbs G721 ADPCM encoding.
    "G723_24": 0,  # 24kbs G723 ADPCM encoding.
    "G723_40": 0,  # 40kbs G723 ADPCM encoding.
    "DWVW_12": 12,  # 12 bit Delta Width Variable Word encoding.
    "DWVW_16": 16,  # 16 bit Delta Width Variable Word encoding.
    "DWVW_24": 24,  # 24 bit Delta Width Variable Word encoding.
    "DWVW_N": 0,  # N bit Delta Width Variable Word encoding.
    "DPCM_8": 8,  # 8 bit differential PCM (XI only)
    "DPCM_16": 16,  # 16 bit differential PCM (XI only)
    "VORBIS": 0,  # Xiph Vorbis encoding. (lossy)
    "ALAC_16": 16,  # Apple Lossless Audio Codec (16 bit).
    "ALAC_20": 20,  # Apple Lossless Audio Codec (20 bit).
    "ALAC_24": 24,  # Apple Lossless Audio Codec (24 bit).
    "ALAC_32": 32,  # Apple Lossless Audio Codec (32 bit).
}


def _get_bit_depth(subtype):
    if subtype not in _SUBTYPE_TO_BITS_PER_SAMPLE:
        warnings.warn(
            f"The {subtype} subtype is unknown to PaddleAudio. As a result, the bits_per_sample "
            "attribute will be set to 0. If you are seeing this warning, please "
            "report by opening an issue on github (after checking for existing/closed ones). "
            "You may otherwise ignore this warning.")
    return _SUBTYPE_TO_BITS_PER_SAMPLE.get(subtype, 0)


_SUBTYPE_TO_ENCODING = {
    "PCM_S8": "PCM_S",
    "PCM_16": "PCM_S",
    "PCM_24": "PCM_S",
    "PCM_32": "PCM_S",
    "PCM_U8": "PCM_U",
    "FLOAT": "PCM_F",
    "DOUBLE": "PCM_F",
    "ULAW": "ULAW",
    "ALAW": "ALAW",
    "VORBIS": "VORBIS",
}


def _get_encoding(format: str, subtype: str):
    if format == "FLAC":
        return "FLAC"
    return _SUBTYPE_TO_ENCODING.get(subtype, "UNKNOWN")


def info(filepath: str, format: Optional[str]=None) -> AudioInfo:
    """Get signal information of an audio file.

    Note:
        ``filepath`` argument is intentionally annotated as ``str`` only, even though it accepts
        ``pathlib.Path`` object as well. This is for the consistency with ``"sox_io"`` backend,

    Args:
        filepath (path-like object or file-like object):
            Source of audio data.
        format (str or None, optional):
            Not used. PySoundFile does not accept format hint.

    Returns:
        AudioInfo: meta data of the given audio.

    """
    sinfo = soundfile.info(filepath)
    return AudioInfo(
        sinfo.samplerate,
        sinfo.frames,
        sinfo.channels,
        bits_per_sample=_get_bit_depth(sinfo.subtype),
        encoding=_get_encoding(sinfo.format, sinfo.subtype), )


================================================
FILE: paddlespeech/audio/streamdata/tariterators.py
================================================
#
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
# Modified from https://github.com/webdataset/webdataset
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""Low level iteration functions for tar archives."""
import random
import re
import tarfile

import braceexpand

from . import filters
from . import gopen
from .handlers import reraise_exception

trace = False
meta_prefix = "__"
meta_suffix = "__"

import paddle
import numpy as np
from paddlespeech.audio.backends import soundfile_load

AUDIO_FORMAT_SETS = set(['flac', 'mp3', 'm4a', 'ogg', 'opus', 'wav', 'wma'])


def base_plus_ext(path):
    """Split off all file extensions.

    Returns base, allext.

    :param path: path with extensions
    :param returns: path with all extensions removed

    """
    match = re.match(r"^((?:.*/|)[^.]+)[.]([^/]*)$", path)
    if not match:
        return None, None
    return match.group(1), match.group(2)


def valid_sample(sample):
    """Check whether a sample is valid.

    :param sample: sample to be checked
    """
    return (sample is not None and isinstance(sample, dict) and
            len(list(sample.keys())) > 0 and not sample.get("__bad__", False))


# FIXME: UNUSED
def shardlist(urls, *, shuffle=False):
    """Given a list of URLs, yields that list, possibly shuffled."""
    if isinstance(urls, str):
        urls = braceexpand.braceexpand(urls)
    else:
        urls = list(urls)
    if shuffle:
        random.shuffle(urls)
    for url in urls:
        yield dict(url=url)


def url_opener(data, handler=reraise_exception, **kw):
    """Given a stream of url names (packaged in `dict(url=url)`), yield opened streams."""
    for sample in data:
        assert isinstance(sample, dict), sample
        assert "url" in sample
        url = sample["url"]
        try:
            stream = gopen.gopen(url, **kw)
            sample.update(stream=stream)
            yield sample
        except Exception as exn:
            exn.args = exn.args + (url, )
            if handler(exn):
                continue
            else:
                break


def tar_file_iterator(fileobj,
                      skip_meta=r"__[^/]*__($|/)",
                      handler=reraise_exception):
    """Iterate over tar file, yielding filename, content pairs for the given tar stream.

    :param fileobj: byte stream suitable for tarfile
    :param skip_meta: regexp for keys that are skipped entirely (Default value = r"__[^/]*__($|/)")

    """
    stream = tarfile.open(fileobj=fileobj, mode="r:*")
    for tarinfo in stream:
        fname = tarinfo.name
        try:
            if not tarinfo.isreg():
                continue
            if fname is None:
                continue
            if ("/" not in fname and fname.startswith(meta_prefix) and
                    fname.endswith(meta_suffix)):
                # skipping metadata for now
                continue
            if skip_meta is not None and re.match(skip_meta, fname):
                continue

            name = tarinfo.name
            pos = name.rfind('.')
            assert pos > 0
            prefix, postfix = name[:pos], name[pos + 1:]
            if postfix == 'wav':
                waveform, sample_rate = soundfile_load(
                    stream.extractfile(tarinfo), normal=False)
                result = dict(
                    fname=prefix, wav=waveform, sample_rate=sample_rate)
            else:
                txt = stream.extractfile(tarinfo).read().decode('utf8').strip()
                result = dict(fname=prefix, txt=txt)
            #result = dict(fname=fname, data=data)
            yield result
            stream.members = []
        except Exception as exn:
            if hasattr(exn, "args") and len(exn.args) > 0:
                exn.args = (exn.args[0] + " @ " + str(fileobj), ) + exn.args[1:]
            if handler(exn):
                continue
            else:
                break
    del stream


def tar_file_and_group_iterator(fileobj,
                                skip_meta=r"__[^/]*__($|/)",
                                handler=reraise_exception):
    """ Expand a stream of open tar files into a stream of tar file contents.
        And groups the file with same prefix

        Args:
            data: Iterable[{src, stream}]

        Returns:
            Iterable[{key, wav, txt, sample_rate}]
    """
    stream = tarfile.open(fileobj=fileobj, mode="r:*")
    prev_prefix = None
    example = {}
    valid = True
    for tarinfo in stream:
        name = tarinfo.name
        pos = name.rfind('.')
        assert pos > 0
        prefix, postfix = name[:pos], name[pos + 1:]
        if prev_prefix is not None and prefix != prev_prefix:
            example['fname'] = prev_prefix
            if valid:
                yield example
            example = {}
            valid = True
        with stream.extractfile(tarinfo) as file_obj:
            try:
                if postfix == 'txt':
                    example['txt'] = file_obj.read().decode('utf8').strip()
                elif postfix in AUDIO_FORMAT_SETS:
                    waveform, sample_rate = soundfile_load(
                        file_obj, normal=False)
                    waveform = paddle.to_tensor(
                        np.expand_dims(np.array(waveform), 0),
                        dtype=paddle.float32)

                    example['wav'] = waveform
                    example['sample_rate'] = sample_rate
                else:
                    example[postfix] = file_obj.read()
            except Exception as exn:
                if hasattr(exn, "args") and len(exn.args) > 0:
                    exn.args = (exn.args[0] + " @ " + str(fileobj),
                                ) + exn.args[1:]
                if handler(exn):
                    continue
                else:
                    break
                valid = False
            #  logging.warning('error to parse {}'.format(name))
        prev_prefix = prefix
    if prev_prefix is not None:
        example['fname'] = prev_prefix
        yield example
    stream.close()


def tar_file_expander(data, handler=reraise_exception):
    """Expand a stream of open tar files into a stream of tar file contents.

    This returns an iterator over (filename, file_contents).
    """
    for source in data:
        url = source["url"]
        try:
            assert isinstance(source, dict)
            assert "stream" in source
            for sample in tar_file_iterator(source["stream"]):
                assert (isinstance(sample, dict) and "data" in sample and
                        "fname" in sample)
                sample["__url__"] = url
                yield sample
        except Exception as exn:
            exn.args = exn.args + (source.get("stream"), source.get("url"))
            if handler(exn):
                continue
            else:
                break


def tar_file_and_group_expander(data, handler=reraise_exception):
    """Expand a stream of open tar files into a stream of tar file contents.

    This returns an iterator over (filename, file_contents).
    """
    for source in data:
        url = source["url"]
        try:
            assert isinstance(source, dict)
            assert "stream" in source
            for sample in tar_file_and_group_iterator(source["stream"]):
                assert (isinstance(sample, dict) and "wav" in sample and
                        "txt" in sample and "fname" in sample)
                sample["__url__"] = url
                yield sample
        except Exception as exn:
            exn.args = exn.args + (source.get("stream"), source.get("url"))
            if handler(exn):
                continue
            else:
                break


def group_by_keys(data,
                  keys=base_plus_ext,
                  lcase=True,
                  suffixes=None,
                  handler=None):
    """Return function over iterator that groups key, value pairs into samples.

    :param keys: function that splits the key into key and extension (base_plus_ext)
    :param lcase: convert suffixes to lower case (Default value = True)
    """
    current_sample = None
    for filesample in data:
        assert isinstance(filesample, dict)
        fname, value = filesample["fname"], filesample["data"]
        prefix, suffix = keys(fname)
        if trace:
            print(
                prefix,
                suffix,
                current_sample.keys()
                if isinstance(current_sample, dict) else None, )
        if prefix is None:
            continue
        if lcase:
            suffix = suffix.lower()
        if current_sample is None or prefix != current_sample["__key__"]:
            if valid_sample(current_sample):
                yield current_sample
            current_sample = dict(__key__=prefix, __url__=filesample["__url__"])
        if suffix in current_sample:
            raise ValueError(
                f"{fname}: duplicate file name in tar file {suffix} {current_sample.keys()}"
            )
        if suffixes is None or suffix in suffixes:
            current_sample[suffix] = value
    if valid_sample(current_sample):
        yield current_sample


def tarfile_samples(src, handler=reraise_exception):
    streams = url_opener(src, handler=handler)
    samples = tar_file_and_group_expander(streams, handler=handler)
    return samples


tarfile_to_samples = filters.pipelinefilter(tarfile_samples)


================================================
FILE: paddlespeech/audio/streamdata/utils.py
================================================
#
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
#
# Modified from https://github.com/webdataset/webdataset
"""Miscellaneous utility functions."""
import importlib
import itertools as itt
import os
import re
import sys
from typing import Any
from typing import Callable
from typing import Iterator
from typing import Union

from ..utils.log import Logger

logger = Logger(__name__)


def make_seed(*args):
    seed = 0
    for arg in args:
        seed = (seed * 31 + hash(arg)) & 0x7FFFFFFF
    return seed


class PipelineStage:
    def invoke(self, *args, **kw):
        raise NotImplementedError


def identity(x: Any) -> Any:
    """Return the argument as is."""
    return x


def safe_eval(s: str, expr: str="{}"):
    """Evaluate the given expression more safely."""
    if re.sub("[^A-Za-z0-9_]", "", s) != s:
        raise ValueError(f"safe_eval: illegal characters in: '{s}'")
    return eval(expr.format(s))


def lookup_sym(sym: str, modules: list):
    """Look up a symbol in a list of modules."""
    for mname in modules:
        module = importlib.import_module(mname, package="webdataset")
        result = getattr(module, sym, None)
        if result is not None:
            return result
    return None


def repeatedly0(loader: Iterator,
                nepochs: int=sys.maxsize,
                nbatches: int=sys.maxsize):
    """Repeatedly returns batches from a DataLoader."""
    for epoch in range(nepochs):
        for sample in itt.islice(loader, nbatches):
            yield sample


def guess_batchsize(batch: Union[tuple, list]):
    """Guess the batch size by looking at the length of the first element in a tuple."""
    return len(batch[0])


def repeatedly(
        source: Iterator,
        nepochs: int=None,
        nbatches: int=None,
        nsamples: int=None,
        batchsize: Callable[..., int]=guess_batchsize, ):
    """Repeatedly yield samples from an iterator."""
    epoch = 0
    batch = 0
    total = 0
    while True:
        for sample in source:
            yield sample
            batch += 1
            if nbatches is not None and batch >= nbatches:
                return
            if nsamples is not None:
                total += guess_batchsize(sample)
                if total >= nsamples:
                    return
        epoch += 1
        if nepochs is not None and epoch >= nepochs:
            return


def paddle_worker_info(group=None):
    """Return node and worker info for PyTorch and some distributed environments."""
    rank = 0
    world_size = 1
    worker = 0
    num_workers = 1
    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
        rank = int(os.environ["RANK"])
        world_size = int(os.environ["WORLD_SIZE"])
    else:
        try:
            import paddle.distributed
            group = group or paddle.distributed.get_group()
            rank = paddle.distributed.get_rank()
            world_size = paddle.distributed.get_world_size()
        except ModuleNotFoundError:
            pass
    if "WORKER" in os.environ and "NUM_WORKERS" in os.environ:
        worker = int(os.environ["WORKER"])
        num_workers = int(os.environ["NUM_WORKERS"])
    else:
        try:
            from paddle.io import get_worker_info
            worker_info = get_worker_info()
            if worker_info is not None:
                worker = worker_info.id
                num_workers = worker_info.num_workers
        except ModuleNotFoundError as E:
            logger.info(f"not found {E}")
            exit(-1)

    return rank, world_size, worker, num_workers


def paddle_worker_seed(group=None):
    """Compute a distinct, deterministic RNG seed for each worker and node."""
    rank, world_size, worker, num_workers = paddle_worker_info(group=group)
    return rank * 1000 + worker


================================================
FILE: paddlespeech/audio/streamdata/writer.py
================================================
#
# Copyright (c) 2017-2021 NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# This file is part of the WebDataset library.
# See the LICENSE file for licensing terms (BSD-style).
# Modified from https://github.com/webdataset/webdataset
#
"""Classes and functions for writing tar files and WebDataset files."""
import io
import json
import pickle
import re
import tarfile
import time
from typing import Any
from typing import Callable
from typing import Optional
from typing import Union

import numpy as np

from . import gopen


def imageencoder(image: Any, format: str="PNG"):  # skipcq: PYL-W0622
    """Compress an image using PIL and return it as a string.

    Can handle float or uint8 images.

    :param image: ndarray representing an image
    :param format: compression format (PNG, JPEG, PPM)

    """
    import PIL

    assert isinstance(image, (PIL.Image.Image, np.ndarray)), type(image)

    if isinstance(image, np.ndarray):
        if image.dtype in [np.dtype("f"), np.dtype("d")]:
            if not (np.amin(image) > -0.001 and np.amax(image) < 1.001):
                raise ValueError(
                    f"image values out of range {np.amin(image)} {np.amax(image)}"
                )
            image = np.clip(image, 0.0, 1.0)
            image = np.array(image * 255.0, "uint8")
        assert image.ndim in [2, 3]
        if image.ndim == 3:
            assert image.shape[2] in [1, 3]
        image = PIL.Image.fromarray(image)
    if format.upper() == "JPG":
        format = "JPEG"
    elif format.upper() in ["IMG", "IMAGE"]:
        format = "PPM"
    if format == "JPEG":
        opts = dict(quality=100)
    else:
        opts = {}
    with io.BytesIO() as result:
        image.save(result, format=format, **opts)
        return result.getvalue()


def bytestr(data: Any):
    """Convert data into a bytestring.

    Uses str and ASCII encoding for data that isn't already in string format.

    :param data: data
    """
    if isinstance(data, bytes):
        return data
    if isinstance(data, str):
        return data.encode("ascii")
    return str(data).encode("ascii")


def paddle_dumps(data: Any):
    """Dump data into a bytestring using paddle.dumps.

    This delays importing paddle until needed.

    :param data: data to be dumped
    """
    import io

    import paddle

    stream = io.BytesIO()
    paddle.save(data, stream)
    return stream.getvalue()


def numpy_dumps(data: np.ndarray):
    """Dump data into a bytestring using numpy npy format.

    :param data: data to be dumped
    """
    import io

    import numpy.lib.format

    stream = io.BytesIO()
    numpy.lib.format.write_array(stream, data)
    return stream.getvalue()


def numpy_npz_dumps(data: np.ndarray):
    """Dump data into a bytestring using numpy npz format.

    :param data: data to be dumped
    """
    import io

    stream = io.BytesIO()
    np.savez_compressed(stream, **data)
    return stream.getvalue()


def tenbin_dumps(x):
    from . import tenbin

    if isinstance(x, list):
        return memoryview(tenbin.encode_buffer(x))
    else:
        return memoryview(tenbin.encode_buffer([x]))


def cbor_dumps(x):
    import cbor

    return cbor.dumps(x)


def mp_dumps(x):
    import msgpack

    return msgpack.packb(x)


def add_handlers(d, keys, value):
    if isinstance(keys, str):
        keys = keys.split()
    for k in keys:
        d[k] = value


def make_handlers():
    """Create a list of handlers for encoding data."""
    handlers = {}
    add_handlers(handlers, "cls cls2 class count index inx id",
                 lambda x: str(x).encode("ascii"))
    add_handlers(handlers, "txt text transcript", lambda x: x.encode("utf-8"))
    add_handlers(handlers, "html htm", lambda x: x.encode("utf-8"))
    add_handlers(handlers, "pyd pickle", pickle.dumps)
    add_handlers(handlers, "pdparams", paddle_dumps)
    add_handlers(handlers, "npy", numpy_dumps)
    add_handlers(handlers, "npz", numpy_npz_dumps)
    add_handlers(handlers, "ten tenbin tb", tenbin_dumps)
    add_handlers(handlers, "json jsn", lambda x: json.dumps(x).encode("utf-8"))
    add_handlers(handlers, "mp msgpack msg", mp_dumps)
    add_handlers(handlers, "cbor", cbor_dumps)
    add_handlers(handlers, "jpg jpeg img image",
                 lambda data: imageencoder(data, "jpg"))
    add_handlers(handlers, "png", lambda data: imageencoder(data, "png"))
    add_handlers(handlers, "pbm", lambda data: imageencoder(data, "pbm"))
    add_handlers(handlers, "pgm", lambda data: imageencoder(data, "pgm"))
    add_handlers(handlers, "ppm", lambda data: imageencoder(data, "ppm"))
    return handlers


default_handlers = make_handlers()


def encode_based_on_extension1(data: Any, tname: str, handlers: dict):
    """Encode data based on its extension and a dict of handlers.

    :param data: data
    :param tname: file extension
    :param handlers: handlers
    """
    if tname[0] == "_":
        if not isinstance(data, str):
            raise ValueError("the values of metadata must be of string type")
        return data
    extension = re.sub(r".*\.", "", tname).lower()
    if isinstance(data, bytes):
        return data
    if isinstance(data, str):
        return data.encode("utf-8")
    handler = handlers.get(extension)
    if handler is None:
        raise ValueError(f"no handler found for {extension}")
    return handler(data)


def encode_based_on_extension(sample: dict, handlers: dict):
    """Encode an entire sample with a collection of handlers.

    :param sample: data sample (a dict)
    :param handlers: handlers for encoding
    """
    return {
        k: encode_based_on_extension1(v, k, handlers)
        for k, v in list(sample.items())
    }


def make_encoder(spec: Union[bool, str, dict, Callable]):
    """Make an encoder function from a specification.

    :param spec: specification
    """
    if spec is False or spec is None:

        def encoder(x):
            """Do not encode at all."""
            return x

    elif callable(spec):
        encoder = spec
    elif isinstance(spec, dict):

        def f(sample):
            """Encode based on extension."""
            return encode_based_on_extension(sample, spec)

        encoder = f

    elif spec is True:
        handlers = default_handlers

        def g(sample):
            """Encode based on extension."""
            return encode_based_on_extension(sample, handlers)

        encoder = g

    else:
        raise ValueError(f"{spec}: unknown decoder spec")
    if not callable(encoder):
        raise ValueError(f"{spec} did not yield a callable encoder")
    return encoder


class TarWriter:
    """A class for writing dictionaries to tar files.

    :param fileobj: fileobj: file name for tar file (.tgz/.tar) or open file descriptor
    :param encoder: sample encoding (Default value = True)
    :param compress:  (Default value = None)

    `True` will use an encoder that behaves similar to the automatic
    decoder for `Dataset`. `False` disables encoding and expects byte strings
    (except for metadata, which must be strings). The `encoder` argument can
    also be a `callable`, or a dictionary mapping extensions to encoders.

    The following code will add two file to the tar archive: `a/b.png` and
    `a/b.output.png`.

    ```Python
        tarwriter = TarWriter(stream)
        image = imread("b.jpg")
        image2 = imread("b.out.jpg")
        sample = {"__key__": "a/b", "png": image, "output.png": image2}
        tarwriter.write(sample)
    ```
    """

    def __init__(
            self,
            fileobj,
            user: str="bigdata",
            group: str="bigdata",
            mode: int=0o0444,
            compress: Optional[bool]=None,
            encoder: Union[None, bool, Callable]=True,
            keep_meta: bool=False, ):
        """Create a tar writer.

        :param fileobj: stream to write data to
        :param user: user for tar files
        :param group: group for tar files
        :param mode: mode for tar files
        :param compress: desired compression
        :param encoder: encoder function
        :param keep_meta: keep metadata (entries starting with "_")
        """
        if isinstance(fileobj, str):
            if compress is False:
                tarmode = "w|"
            elif compress is True:
                tarmode = "w|gz"
            else:
                tarmode = "w|gz" if fileobj.endswith("gz") else "w|"
            fileobj = gopen.gopen(fileobj, "wb")
            self.own_fileobj = fileobj
        else:
            tarmode = "w|gz" if compress is True else "w|"
            self.own_fileobj = None
        self.encoder = make_encoder(encoder)
        self.keep_meta = keep_meta
        self.stream = fileobj
        self.tarstream = tarfile.open(fileobj=fileobj, mode=tarmode)

        self.user = user
        self.group = group
        self.mode = mode
        self.compress = compress

    def __enter__(self):
        """Enter context."""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Exit context."""
        self.close()

    def close(self):
        """Close the tar file."""
        self.tarstream.close()
        if self.own_fileobj is not None:
            self.own_fileobj.close()
            self.own_fileobj = None

    def write(self, obj):
        """Write a dictionary to the tar file.

        :param obj: dictionary of objects to be stored
        :returns: size of the entry

        """
        total = 0
        obj = self.encoder(obj)
        if "__key__" not in obj:
            raise ValueError("object must contain a __key__")
        for k, v in list(obj.items()):
            if k[0] == "_":
                continue
            if not isinstance(v, (bytes, bytearray, memoryview)):
                raise ValueError(
                    f"{k} doesn't map to a bytes after encoding ({type(v)})")
        key = obj["__key__"]
        for k in sorted(obj.keys()):
            if k == "__key__":
                continue
            if not self.keep_meta and k[0] == "_":
                continue
            v = obj[k]
            if isinstance(v, str):
                v = v.encode("utf-8")
            now = time.time()
            ti = tarfile.TarInfo(key + "." + k)
            ti.size = len(v)
            ti.mtime = now
            ti.mode = self.mode
            ti.uname = self.user
            ti.gname = self.group
            if not isinstance(v, (bytes, bytearray, memoryview)):
                raise ValueError(
                    f"converter didn't yield bytes: {k}, {type(v)}")
            stream = io.BytesIO(v)
            self.tarstream.addfile(ti, stream)
            total += ti.size
        return total


class ShardWriter:
    """Like TarWriter but splits into multiple shards."""

    def __init__(
            self,
            pattern: str,
            maxcount: int=100000,
            maxsize: float=3e9,
            post: Optional[Callable]=None,
            start_shard: int=0,
            **kw, ):
        """Create a ShardWriter.

        :param pattern: output file pattern
        :param maxcount: maximum number of records per shard (Default value = 100000)
        :param maxsize: maximum size of each shard (Default value = 3e9)
        :param kw: other options passed to TarWriter
        """
        self.verbose = 1
        self.kw = kw
        self.maxcount = maxcount
        self.maxsize = maxsize
        self.post = post

        self.tarstream = None
        self.shard = start_shard
        self.pattern = pattern
        self.total = 0
        self.count = 0
        self.size = 0
        self.fname = None
        self.next_stream()

    def next_stream(self):
        """Close the current stream and move to the next."""
        self.finish()
        self.fname = self.pattern % self.shard
        if self.verbose:
            print(
                "# writing",
                self.fname,
                self.count,
                "%.1f GB" % (self.size / 1e9),
                self.total, )
        self.shard += 1
        stream = open(self.fname, "wb")
        self.tarstream = TarWriter(stream, **self.kw)
        self.count = 0
        self.size = 0

    def write(self, obj):
        """Write a sample.

        :param obj: sample to be written
        """
        if (self.tarstream is None or self.count >= self.maxcount or
                self.size >= self.maxsize):
            self.next_stream()
        size = self.tarstream.write(obj)
        self.count += 1
        self.total += 1
        self.size += size

    def finish(self):
        """Finish all writing (use close instead)."""
        if self.tarstream is not None:
            self.tarstream.close()
            assert self.fname is not None
            if callable(self.post):
                self.post(self.fname)
            self.tarstream = None

    def close(self):
        """Close the stream."""
        self.finish()
        del self.tarstream
        del self.shard
        del self.count
        del self.size

    def __enter__(self):
        """Enter context."""
        return self

    def __exit__(self, *args, **kw):
        """Exit context."""
        self.close()


================================================
FILE: paddlespeech/audio/text/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/audio/text/text_featurizer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the text featurizer class."""
from pprint import pformat
from typing import Union

import sentencepiece as spm

from ..utils.log import Logger
from .utility import BLANK
from .utility import EOS
from .utility import load_dict
from .utility import MASKCTC
from .utility import SOS
from .utility import SPACE
from .utility import UNK

logger = Logger(__name__)

__all__ = ["TextFeaturizer"]


class TextFeaturizer():
    def __init__(self, unit_type, vocab, spm_model_prefix=None, maskctc=False):
        """Text featurizer, for processing or extracting features from text.

        Currently, it supports char/word/sentence-piece level tokenizing and conversion into
        a list of token indices. Note that the token indexing order follows the
        given vocabulary file.

        Args:
            unit_type (str): unit type, e.g. char, word, spm
            vocab Option[str, list]: Filepath to load vocabulary for token indices conversion, or vocab list.
            spm_model_prefix (str, optional): spm model prefix. Defaults to None.
        """
        assert unit_type in ('char', 'spm', 'word')
        self.unit_type = unit_type
        self.unk = UNK
        self.maskctc = maskctc

        if vocab:
            self.vocab_dict, self._id2token, self.vocab_list, self.unk_id, self.eos_id, self.blank_id = self._load_vocabulary_from_file(
                vocab, maskctc)
            self.vocab_size = len(self.vocab_list)
        else:
            logger.warning("TextFeaturizer: not have vocab file or vocab list.")

        if unit_type == 'spm':
            spm_model = spm_model_prefix + '.model'
            self.sp = spm.SentencePieceProcessor()
            self.sp.Load(spm_model)

    def tokenize(self, text, replace_space=True):
        if self.unit_type == 'char':
            tokens = self.char_tokenize(text, replace_space)
        elif self.unit_type == 'word':
            tokens = self.word_tokenize(text)
        else:  # spm
            tokens = self.spm_tokenize(text)
        return tokens

    def detokenize(self, tokens):
        if self.unit_type == 'char':
            text = self.char_detokenize(tokens)
        elif self.unit_type == 'word':
            text = self.word_detokenize(tokens)
        else:  # spm
            text = self.spm_detokenize(tokens)
        return text

    def featurize(self, text):
        """Convert text string to a list of token indices.

        Args:
            text (str): Text to process.

        Returns:
            List[int]: List of token indices.
        """
        tokens = self.tokenize(text)
        ids = []
        for token in tokens:
            if token not in self.vocab_dict:
                logger.debug(f"Text Token: {token} -> {self.unk}")
                token = self.unk
            ids.append(self.vocab_dict[token])
        return ids

    def defeaturize(self, idxs):
        """Convert a list of token indices to text string,
        ignore index after eos_id.

        Args:
            idxs (List[int]): List of token indices.

        Returns:
            str: Text.
        """
        tokens = []
        for idx in idxs:
            if idx == self.eos_id:
                break
            tokens.append(self._id2token[idx])
        text = self.detokenize(tokens)
        return text

    def char_tokenize(self, text, replace_space=True):
        """Character tokenizer.

        Args:
            text (str): text string.
            replace_space (bool): False only used by build_vocab.py.

        Returns:
            List[str]: tokens.
        """
        text = text.strip()
        if replace_space:
            text_list = [SPACE if item == " " else item for item in list(text)]
        else:
            text_list = list(text)
        return text_list

    def char_detokenize(self, tokens):
        """Character detokenizer.

        Args:
            tokens (List[str]): tokens.

        Returns:
           str: text string.
        """
        tokens = [t.replace(SPACE, " ") for t in tokens]
        return "".join(tokens)

    def word_tokenize(self, text):
        """Word tokenizer, separate by <space>."""
        return text.strip().split()

    def word_detokenize(self, tokens):
        """Word detokenizer, separate by <space>."""
        return " ".join(tokens)

    def spm_tokenize(self, text):
        """spm tokenize.

        Args:
            text (str): text string.

        Returns:
            List[str]: sentence pieces str code
        """
        stats = {"num_empty": 0, "num_filtered": 0}

        def valid(line):
            return True

        def encode(l):
            return self.sp.EncodeAsPieces(l)

        def encode_line(line):
            line = line.strip()
            if len(line) > 0:
                line = encode(line)
                if valid(line):
                    return line
                else:
                    stats["num_filtered"] += 1
            else:
                stats["num_empty"] += 1
            return None

        enc_line = encode_line(text)
        return enc_line

    def spm_detokenize(self, tokens, input_format='piece'):
        """spm detokenize.

        Args:
            ids (List[str]): tokens.

        Returns:
            str: text
        """
        if input_format == "piece":

            def decode(l):
                return "".join(self.sp.DecodePieces(l))
        elif input_format == "id":

            def decode(l):
                return "".join(self.sp.DecodeIds(l))

        return decode(tokens)

    def _load_vocabulary_from_file(self, vocab: Union[str, list],
                                   maskctc: bool):
        """Load vocabulary from file."""
        if isinstance(vocab, list):
            vocab_list = vocab
        else:
            vocab_list = load_dict(vocab, maskctc)
        assert vocab_list is not None
        logger.debug(f"Vocab: {pformat(vocab_list)}")

        id2token = dict(
            [(idx, token) for (idx, token) in enumerate(vocab_list)])
        token2id = dict(
            [(token, idx) for (idx, token) in enumerate(vocab_list)])

        blank_id = vocab_list.index(BLANK) if BLANK in vocab_list else -1
        maskctc_id = vocab_list.index(MASKCTC) if MASKCTC in vocab_list else -1
        unk_id = vocab_list.index(UNK) if UNK in vocab_list else -1
        eos_id = vocab_list.index(EOS) if EOS in vocab_list else -1
        sos_id = vocab_list.index(SOS) if SOS in vocab_list else -1
        space_id = vocab_list.index(SPACE) if SPACE in vocab_list else -1

        logger.info(f"BLANK id: {blank_id}")
        logger.info(f"UNK id: {unk_id}")
        logger.info(f"EOS id: {eos_id}")
        logger.info(f"SOS id: {sos_id}")
        logger.info(f"SPACE id: {space_id}")
        logger.info(f"MASKCTC id: {maskctc_id}")
        return token2id, id2token, vocab_list, unk_id, eos_id, blank_id


================================================
FILE: paddlespeech/audio/text/utility.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains data helper functions."""
import json
import math
import tarfile
from collections import namedtuple
from typing import List
from typing import Optional
from typing import Text

import jsonlines
import numpy as np

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = [
    "load_dict", "load_cmvn", "read_manifest", "rms_to_db", "rms_to_dbfs",
    "max_dbfs", "mean_dbfs", "gain_db_to_ratio", "normalize_audio", "SOS",
    "EOS", "UNK", "BLANK", "MASKCTC", "SPACE", "convert_samples_to_float32",
    "convert_samples_from_float32"
]

IGNORE_ID = -1
# `sos` and `eos` using same token
SOS = "<eos>"
EOS = SOS
UNK = "<unk>"
BLANK = "<blank>"
MASKCTC = "<mask>"
SPACE = "<space>"


def load_dict(dict_path: Optional[Text], maskctc=False) -> Optional[List[Text]]:
    if dict_path is None:
        return None

    with open(dict_path, "r") as f:
        dictionary = f.readlines()
    # first token is `<blank>`
    # multi line: `<blank> 0\n`
    # one line: `<blank>`
    # space is relpace with <space>
    char_list = [entry[:-1].split(" ")[0] for entry in dictionary]
    if BLANK not in char_list:
        char_list.insert(0, BLANK)
    if EOS not in char_list:
        char_list.append(EOS)
    # for non-autoregressive maskctc model
    if maskctc and MASKCTC not in char_list:
        char_list.append(MASKCTC)
    return char_list


def read_manifest(
        manifest_path,
        max_input_len=float('inf'),
        min_input_len=0.0,
        max_output_len=float('inf'),
        min_output_len=0.0,
        max_output_input_ratio=float('inf'),
        min_output_input_ratio=0.0, ):
    """Load and parse manifest file.

    Args:
        manifest_path ([type]): Manifest file to load and parse.
        max_input_len ([type], optional): maximum output seq length,
            in seconds for raw wav, in frame numbers for feature data.
            Defaults to float('inf').
        min_input_len (float, optional): minimum input seq length,
            in seconds for raw wav, in frame numbers for feature data.
            Defaults to 0.0.
        max_output_len (float, optional): maximum input seq length,
            in modeling units. Defaults to 500.0.
        min_output_len (float, optional): minimum input seq length,
            in modeling units. Defaults to 0.0.
        max_output_input_ratio (float, optional):
            maximum output seq length/output seq length ratio. Defaults to 10.0.
        min_output_input_ratio (float, optional):
            minimum output seq length/output seq length ratio. Defaults to 0.05.

    Raises:
        IOError: If failed to parse the manifest.

    Returns:
        List[dict]: Manifest parsing results.
    """
    manifest = []
    with jsonlines.open(manifest_path, 'r') as reader:
        for json_data in reader:
            feat_len = json_data["input"][0]["shape"][
                0] if "input" in json_data and "shape" in json_data["input"][
                    0] else 1.0
            token_len = json_data["output"][0]["shape"][
                0] if "output" in json_data and "shape" in json_data["output"][
                    0] else 1.0
            conditions = [
                feat_len >= min_input_len,
                feat_len <= max_input_len,
                token_len >= min_output_len,
                token_len <= max_output_len,
                token_len / feat_len >= min_output_input_ratio,
                token_len / feat_len <= max_output_input_ratio,
            ]
            if all(conditions):
                manifest.append(json_data)
    return manifest


# Tar File read
TarLocalData = namedtuple('TarLocalData', ['tar2info', 'tar2object'])


def parse_tar(file):
    """Parse a tar file to get a tarfile object
    and a map containing tarinfoes
    """
    result = {}
    f = tarfile.open(file)
    for tarinfo in f.getmembers():
        result[tarinfo.name] = tarinfo
    return f, result


def subfile_from_tar(file, local_data=None):
    """Get subfile object from tar.

    tar:tarpath#filename

    It will return a subfile object from tar file
    and cached tar file info for next reading request.
    """
    tarpath, filename = file.split(':', 1)[1].split('#', 1)

    if local_data is None:
        local_data = TarLocalData(tar2info={}, tar2object={})

    assert isinstance(local_data, TarLocalData)

    if 'tar2info' not in local_data.__dict__:
        local_data.tar2info = {}
    if 'tar2object' not in local_data.__dict__:
        local_data.tar2object = {}

    if tarpath not in local_data.tar2info:
        fobj, infos = parse_tar(tarpath)
        local_data.tar2info[tarpath] = infos
        local_data.tar2object[tarpath] = fobj
    else:
        fobj = local_data.tar2object[tarpath]
        infos = local_data.tar2info[tarpath]
    return fobj.extractfile(infos[filename])


def rms_to_db(rms: float):
    """Root Mean Square to dB.

    Args:
        rms ([float]): root mean square

    Returns:
        float: dB
    """
    return 20.0 * math.log10(max(1e-16, rms))


def rms_to_dbfs(rms: float):
    """Root Mean Square to dBFS.
    https://fireattack.wordpress.com/2017/02/06/replaygain-loudness-normalization-and-applications/
    Audio is mix of sine wave, so 1 amp sine wave's Full scale is 0.7071, equal to -3.0103dB.

    dB = dBFS + 3.0103
    dBFS = db - 3.0103
    e.g. 0 dB = -3.0103 dBFS

    Args:
        rms ([float]): root mean square

    Returns:
        float: dBFS
    """
    return rms_to_db(rms) - 3.0103


def max_dbfs(sample_data: np.ndarray):
    """Peak dBFS based on the maximum energy sample.

    Args:
        sample_data ([np.ndarray]): float array, [-1, 1].

    Returns:
        float: dBFS
    """
    # Peak dBFS based on the maximum energy sample. Will prevent overdrive if used for normalization.
    return rms_to_dbfs(max(abs(np.min(sample_data)), abs(np.max(sample_data))))


def mean_dbfs(sample_data):
    """Peak dBFS based on the RMS energy.

    Args:
        sample_data ([np.ndarray]): float array, [-1, 1].

    Returns:
        float: dBFS
    """
    return rms_to_dbfs(
        math.sqrt(np.mean(np.square(sample_data, dtype=np.float64))))


def gain_db_to_ratio(gain_db: float):
    """dB to ratio

    Args:
        gain_db (float): gain in dB

    Returns:
        float: scale in amp
    """
    return math.pow(10.0, gain_db / 20.0)


def normalize_audio(sample_data: np.ndarray, dbfs: float=-3.0103):
    """Nomalize audio to dBFS.

    Args:
        sample_data (np.ndarray): input wave samples, [-1, 1].
        dbfs (float, optional): target dBFS. Defaults to -3.0103.

    Returns:
        np.ndarray: normalized wave
    """
    return np.maximum(
        np.minimum(sample_data * gain_db_to_ratio(dbfs - max_dbfs(sample_data)),
                   1.0), -1.0)


def _load_json_cmvn(json_cmvn_file):
    """ Load the json format cmvn stats file and calculate cmvn

    Args:
        json_cmvn_file: cmvn stats file in json format

    Returns:
        a numpy array of [means, vars]
    """
    with open(json_cmvn_file) as f:
        cmvn_stats = json.load(f)

    means = cmvn_stats['mean_stat']
    variance = cmvn_stats['var_stat']
    count = cmvn_stats['frame_num']
    for i in range(len(means)):
        means[i] /= count
        variance[i] = variance[i] / count - means[i] * means[i]
        if variance[i] < 1.0e-20:
            variance[i] = 1.0e-20
        variance[i] = 1.0 / math.sqrt(variance[i])
    cmvn = np.array([means, variance])
    return cmvn


def _load_kaldi_cmvn(kaldi_cmvn_file):
    """ Load the kaldi format cmvn stats file and calculate cmvn

    Args:
        kaldi_cmvn_file:  kaldi text style global cmvn file, which
           is generated by:
           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn

    Returns:
        a numpy array of [means, vars]
    """
    means = []
    variance = []
    with open(kaldi_cmvn_file, 'r') as fid:
        # kaldi binary file start with '\0B'
        if fid.read(2) == '\0B':
            logger.error('kaldi cmvn binary file is not supported, please '
                         'recompute it by: compute-cmvn-stats --binary=false '
                         ' scp:feats.scp global_cmvn')
            sys.exit(1)
        fid.seek(0)
        arr = fid.read().split()
        assert (arr[0] == '[')
        assert (arr[-2] == '0')
        assert (arr[-1] == ']')
        feat_dim = int((len(arr) - 2 - 2) / 2)
        for i in range(1, feat_dim + 1):
            means.append(float(arr[i]))
        count = float(arr[feat_dim + 1])
        for i in range(feat_dim + 2, 2 * feat_dim + 2):
            variance.append(float(arr[i]))

    for i in range(len(means)):
        means[i] /= count
        variance[i] = variance[i] / count - means[i] * means[i]
        if variance[i] < 1.0e-20:
            variance[i] = 1.0e-20
        variance[i] = 1.0 / math.sqrt(variance[i])
    cmvn = np.array([means, variance])
    return cmvn


def load_cmvn(cmvn_file: str, filetype: str):
    """load cmvn from file.

    Args:
        cmvn_file (str): cmvn path.
        filetype (str): file type, optional[npz, json, kaldi].

    Raises:
        ValueError: file type not support.

    Returns:
        Tuple[np.ndarray, np.ndarray]: mean, istd
    """
    assert filetype in ['npz', 'json', 'kaldi'], filetype
    filetype = filetype.lower()
    if filetype == "json":
        cmvn = _load_json_cmvn(cmvn_file)
    elif filetype == "kaldi":
        cmvn = _load_kaldi_cmvn(cmvn_file)
    elif filetype == "npz":
        eps = 1e-14
        npzfile = np.load(cmvn_file)
        mean = np.squeeze(npzfile["mean"])
        std = np.squeeze(npzfile["std"])
        istd = 1 / (std + eps)
        cmvn = [mean, istd]
    else:
        raise ValueError(f"cmvn file type no support: {filetype}")
    return cmvn[0], cmvn[1]


def convert_samples_to_float32(samples):
    """Convert sample type to float32.

    Audio sample type is usually integer or float-point.
    Integers will be scaled to [-1, 1] in float32.

    PCM16 -> PCM32
    """
    float32_samples = samples.astype('float32')
    if samples.dtype in np.sctypes['int']:
        bits = np.iinfo(samples.dtype).bits
        float32_samples *= (1. / 2**(bits - 1))
    elif samples.dtype in np.sctypes['float']:
        pass
    else:
        raise TypeError("Unsupported sample type: %s." % samples.dtype)
    return float32_samples


def convert_samples_from_float32(samples, dtype):
    """Convert sample type from float32 to dtype.

    Audio sample type is usually integer or float-point. For integer
    type, float32 will be rescaled from [-1, 1] to the maximum range
    supported by the integer type.

    PCM32 -> PCM16
    """
    dtype = np.dtype(dtype)
    output_samples = samples.copy()
    if dtype in np.sctypes['int']:
        bits = np.iinfo(dtype).bits
        output_samples *= (2**(bits - 1) / 1.)
        min_val = np.iinfo(dtype).min
        max_val = np.iinfo(dtype).max
        output_samples[output_samples > max_val] = max_val
        output_samples[output_samples < min_val] = min_val
    elif samples.dtype in np.sctypes['float']:
        min_val = np.finfo(dtype).min
        max_val = np.finfo(dtype).max
        output_samples[output_samples > max_val] = max_val
        output_samples[output_samples < min_val] = min_val
    else:
        raise TypeError("Unsupported sample type: %s." % samples.dtype)
    return output_samples.astype(dtype)


================================================
FILE: paddlespeech/audio/transform/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/audio/transform/add_deltas.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import numpy as np


def delta(feat, window):
    assert window > 0
    delta_feat = np.zeros_like(feat)
    for i in range(1, window + 1):
        delta_feat[:-i] += i * feat[i:]
        delta_feat[i:] += -i * feat[:-i]
        delta_feat[-i:] += i * feat[-1]
        delta_feat[:i] += -i * feat[0]
    delta_feat /= 2 * sum(i**2 for i in range(1, window + 1))
    return delta_feat


def add_deltas(x, window=2, order=2):
    """
    Args:
        x (np.ndarray): speech feat, (T, D).

    Return:
        np.ndarray: (T, (1+order)*D)
    """
    feats = [x]
    for _ in range(order):
        feats.append(delta(feats[-1], window))
    return np.concatenate(feats, axis=1)


class AddDeltas():
    def __init__(self, window=2, order=2):
        self.window = window
        self.order = order

    def __repr__(self):
        return "{name}(window={window}, order={order}".format(
            name=self.__class__.__name__, window=self.window, order=self.order)

    def __call__(self, x):
        return add_deltas(x, window=self.window, order=self.order)


================================================
FILE: paddlespeech/audio/transform/channel_selector.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import numpy


class ChannelSelector():
    """Select 1ch from multi-channel signal"""

    def __init__(self, train_channel="random", eval_channel=0, axis=1):
        self.train_channel = train_channel
        self.eval_channel = eval_channel
        self.axis = axis

    def __repr__(self):
        return ("{name}(train_channel={train_channel}, "
                "eval_channel={eval_channel}, axis={axis})".format(
                    name=self.__class__.__name__,
                    train_channel=self.train_channel,
                    eval_channel=self.eval_channel,
                    axis=self.axis, ))

    def __call__(self, x, train=True):
        # Assuming x: [Time, Channel] by default

        if x.ndim <= self.axis:
            # If the dimension is insufficient, then unsqueeze
            # (e.g [Time] -> [Time, 1])
            ind = tuple(
                slice(None) if i < x.ndim else None
                for i in range(self.axis + 1))
            x = x[ind]

        if train:
            channel = self.train_channel
        else:
            channel = self.eval_channel

        if channel == "random":
            ch = numpy.random.randint(0, x.shape[self.axis])
        else:
            ch = channel

        ind = tuple(
            slice(None) if i != self.axis else ch for i in range(x.ndim))
        return x[ind]


================================================
FILE: paddlespeech/audio/transform/cmvn.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import io
import json

import h5py
import kaldiio
import numpy as np


class CMVN():
    "Apply Global/Spk CMVN/iverserCMVN."

    def __init__(
            self,
            stats,
            norm_means=True,
            norm_vars=False,
            filetype="mat",
            utt2spk=None,
            spk2utt=None,
            reverse=False,
            std_floor=1.0e-20, ):
        self.stats_file = stats
        self.norm_means = norm_means
        self.norm_vars = norm_vars
        self.reverse = reverse

        if isinstance(stats, dict):
            stats_dict = dict(stats)
        else:
            # Use for global CMVN
            if filetype == "mat":
                stats_dict = {None: kaldiio.load_mat(stats)}
            # Use for global CMVN
            elif filetype == "npy":
                stats_dict = {None: np.load(stats)}
            # Use for speaker CMVN
            elif filetype == "ark":
                self.accept_uttid = True
                stats_dict = dict(kaldiio.load_ark(stats))
            # Use for speaker CMVN
            elif filetype == "hdf5":
                self.accept_uttid = True
                stats_dict = h5py.File(stats)
            else:
                raise ValueError("Not supporting filetype={}".format(filetype))

        if utt2spk is not None:
            self.utt2spk = {}
            with io.open(utt2spk, "r", encoding="utf-8") as f:
                for line in f:
                    utt, spk = line.rstrip().split(None, 1)
                    self.utt2spk[utt] = spk
        elif spk2utt is not None:
            self.utt2spk = {}
            with io.open(spk2utt, "r", encoding="utf-8") as f:
                for line in f:
                    spk, utts = line.rstrip().split(None, 1)
                    for utt in utts.split():
                        self.utt2spk[utt] = spk
        else:
            self.utt2spk = None

        # Kaldi makes a matrix for CMVN which has a shape of (2, feat_dim + 1),
        # and the first vector contains the sum of feats and the second is
        # the sum of squares. The last value of the first, i.e. stats[0,-1],
        # is the number of samples for this statistics.
        self.bias = {}
        self.scale = {}
        for spk, stats in stats_dict.items():
            assert len(stats) == 2, stats.shape

            count = stats[0, -1]

            # If the feature has two or more dimensions
            if not (np.isscalar(count) or isinstance(count, (int, float))):
                # The first is only used
                count = count.flatten()[0]

            mean = stats[0, :-1] / count
            # V(x) = E(x^2) - (E(x))^2
            var = stats[1, :-1] / count - mean * mean
            std = np.maximum(np.sqrt(var), std_floor)
            self.bias[spk] = -mean
            self.scale[spk] = 1 / std

    def __repr__(self):
        return ("{name}(stats_file={stats_file}, "
                "norm_means={norm_means}, norm_vars={norm_vars}, "
                "reverse={reverse})".format(
                    name=self.__class__.__name__,
                    stats_file=self.stats_file,
                    norm_means=self.norm_means,
                    norm_vars=self.norm_vars,
                    reverse=self.reverse, ))

    def __call__(self, x, uttid=None):
        if self.utt2spk is not None:
            spk = self.utt2spk[uttid]
        else:
            spk = uttid

        if not self.reverse:
            # apply cmvn
            if self.norm_means:
                x = np.add(x, self.bias[spk])
            if self.norm_vars:
                x = np.multiply(x, self.scale[spk])

        else:
            # apply reverse cmvn
            if self.norm_vars:
                x = np.divide(x, self.scale[spk])
            if self.norm_means:
                x = np.subtract(x, self.bias[spk])

        return x


class UtteranceCMVN():
    "Apply Utterance CMVN"

    def __init__(self, norm_means=True, norm_vars=False, std_floor=1.0e-20):
        self.norm_means = norm_means
        self.norm_vars = norm_vars
        self.std_floor = std_floor

    def __repr__(self):
        return "{name}(norm_means={norm_means}, norm_vars={norm_vars})".format(
            name=self.__class__.__name__,
            norm_means=self.norm_means,
            norm_vars=self.norm_vars, )

    def __call__(self, x, uttid=None):
        # x: [Time, Dim]
        square_sums = (x**2).sum(axis=0)
        mean = x.mean(axis=0)

        if self.norm_means:
            x = np.subtract(x, mean)

        if self.norm_vars:
            var = square_sums / x.shape[0] - mean**2
            std = np.maximum(np.sqrt(var), self.std_floor)
            x = np.divide(x, std)

        return x


class GlobalCMVN():
    "Apply Global CMVN"

    def __init__(self,
                 cmvn_path,
                 norm_means=True,
                 norm_vars=True,
                 std_floor=1.0e-20):
        # cmvn_path: Option[str, dict]
        cmvn = cmvn_path
        self.cmvn = cmvn
        self.norm_means = norm_means
        self.norm_vars = norm_vars
        self.std_floor = std_floor
        if isinstance(cmvn, dict):
            cmvn_stats = cmvn
        else:
            with open(cmvn) as f:
                cmvn_stats = json.load(f)
        self.count = cmvn_stats['frame_num']
        self.mean = np.array(cmvn_stats['mean_stat']) / self.count
        self.square_sums = np.array(cmvn_stats['var_stat'])
        self.var = self.square_sums / self.count - self.mean**2
        self.std = np.maximum(np.sqrt(self.var), self.std_floor)

    def __repr__(self):
        return f"""{self.__class__.__name__}(
            cmvn_path={self.cmvn},
            norm_means={self.norm_means},
            norm_vars={self.norm_vars},)"""

    def __call__(self, x, uttid=None):
        # x: [Time, Dim]
        if self.norm_means:
            x = np.subtract(x, self.mean)

        if self.norm_vars:
            x = np.divide(x, self.std)
        return x


================================================
FILE: paddlespeech/audio/transform/functional.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import inspect

from paddlespeech.audio.transform.transform_interface import TransformInterface
from paddlespeech.audio.utils.check_kwargs import check_kwargs


class FuncTrans(TransformInterface):
    """Functional Transformation

    WARNING:
        Builtin or C/C++ functions may not work properly
        because this class heavily depends on the `inspect` module.

    Usage:

    >>> def foo_bar(x, a=1, b=2):
    ...     '''Foo bar
    ...     :param x: input
    ...     :param int a: default 1
    ...     :param int b: default 2
    ...     '''
    ...     return x + a - b


    >>> class FooBar(FuncTrans):
    ...     _func = foo_bar
    ...     __doc__ = foo_bar.__doc__
    """

    _func = None

    def __init__(self, **kwargs):
        self.kwargs = kwargs
        check_kwargs(self.func, kwargs)

    def __call__(self, x):
        return self.func(x, **self.kwargs)

    @classmethod
    def add_arguments(cls, parser):
        fname = cls._func.__name__.replace("_", "-")
        group = parser.add_argument_group(fname + " transformation setting")
        for k, v in cls.default_params().items():
            # TODO(karita): get help and choices from docstring?
            attr = k.replace("_", "-")
            group.add_argument(f"--{fname}-{attr}", default=v, type=type(v))
        return parser

    @property
    def func(self):
        return type(self)._func

    @classmethod
    def default_params(cls):
        try:
            d = dict(inspect.signature(cls._func).parameters)
        except ValueError:
            d = dict()
        return {
            k: v.default
            for k, v in d.items() if v.default != inspect.Parameter.empty
        }

    def __repr__(self):
        params = self.default_params()
        params.update(**self.kwargs)
        ret = self.__class__.__name__ + "("
        if len(params) == 0:
            return ret + ")"
        for k, v in params.items():
            ret += "{}={}, ".format(k, v)
        return ret[:-2] + ")"


================================================
FILE: paddlespeech/audio/transform/perturb.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import io
import os
import sys

import h5py
import librosa
import numpy
import numpy as np
import scipy
import soundfile


class SoundHDF5File():
    """Collecting sound files to a HDF5 file

    >>> f = SoundHDF5File('a.flac.h5', mode='a')
    >>> array = np.random.randint(0, 100, 100, dtype=np.int16)
    >>> f['id'] = (array, 16000)
    >>> array, rate = f['id']


    :param: str filepath:
    :param: str mode:
    :param: str format: The type used when saving wav. flac, nist, htk, etc.
    :param: str dtype:

    """

    def __init__(self,
                 filepath,
                 mode="r+",
                 format=None,
                 dtype="int16",
                 **kwargs):
        self.filepath = filepath
        self.mode = mode
        self.dtype = dtype

        self.file = h5py.File(filepath, mode, **kwargs)
        if format is None:
            # filepath = a.flac.h5 -> format = flac
            second_ext = os.path.splitext(os.path.splitext(filepath)[0])[1]
            format = second_ext[1:]
            if format.upper() not in soundfile.available_formats():
                # If not found, flac is selected
                format = "flac"

        # This format affects only saving
        self.format = format

    def __repr__(self):
        return '<SoundHDF5 file "{}" (mode {}, format {}, type {})>'.format(
            self.filepath, self.mode, self.format, self.dtype)

    def create_dataset(self, name, shape=None, data=None, **kwds):
        f = io.BytesIO()
        array, rate = data
        soundfile.write(f, array, rate, format=self.format)
        self.file.create_dataset(
            name, shape=shape, data=np.void(f.getvalue()), **kwds)

    def __setitem__(self, name, data):
        self.create_dataset(name, data=data)

    def __getitem__(self, key):
        data = self.file[key][()]
        f = io.BytesIO(data.tobytes())
        array, rate = soundfile.read(f, dtype=self.dtype)
        return array, rate

    def keys(self):
        return self.file.keys()

    def values(self):
        for k in self.file:
            yield self[k]

    def items(self):
        for k in self.file:
            yield k, self[k]

    def __iter__(self):
        return iter(self.file)

    def __contains__(self, item):
        return item in self.file

    def __len__(self):
        return len(self.file)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.file.close()

    def close(self):
        self.file.close()


class SpeedPerturbation():
    """SpeedPerturbation

    The speed perturbation in kaldi uses sox-speed instead of sox-tempo,
    and sox-speed just to resample the input,
    i.e pitch and tempo are changed both.

    "Why use speed option instead of tempo -s in SoX for speed perturbation"
    https://groups.google.com/forum/#!topic/kaldi-help/8OOG7eE4sZ8

    Warning:
        This function is very slow because of resampling.
        I recommmend to apply speed-perturb outside the training using sox.

    """

    def __init__(
            self,
            lower=0.9,
            upper=1.1,
            utt2ratio=None,
            keep_length=True,
            res_type="kaiser_best",
            seed=None, ):
        self.res_type = res_type
        self.keep_length = keep_length
        self.state = numpy.random.RandomState(seed)

        if utt2ratio is not None:
            self.utt2ratio = {}
            # Use the scheduled ratio for each utterances
            self.utt2ratio_file = utt2ratio
            self.lower = None
            self.upper = None
            self.accept_uttid = True

            with open(utt2ratio, "r") as f:
                for line in f:
                    utt, ratio = line.rstrip().split(None, 1)
                    ratio = float(ratio)
                    self.utt2ratio[utt] = ratio
        else:
            self.utt2ratio = None
            # The ratio is given on runtime randomly
            self.lower = lower
            self.upper = upper

    def __repr__(self):
        if self.utt2ratio is None:
            return "{}(lower={}, upper={}, " "keep_length={}, res_type={})".format(
                self.__class__.__name__,
                self.lower,
                self.upper,
                self.keep_length,
                self.res_type, )
        else:
            return "{}({}, res_type={})".format(
                self.__class__.__name__, self.utt2ratio_file, self.res_type)

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x
        x = x.astype(numpy.float32)
        if self.accept_uttid:
            ratio = self.utt2ratio[uttid]
        else:
            ratio = self.state.uniform(self.lower, self.upper)

        # Note1: resample requires the sampling-rate of input and output,
        #        but actually only the ratio is used.
        y = librosa.resample(
            x, orig_sr=ratio, target_sr=1, res_type=self.res_type)

        if self.keep_length:
            diff = abs(len(x) - len(y))
            if len(y) > len(x):
                # Truncate noise
                y = y[diff // 2:-((diff + 1) // 2)]
            elif len(y) < len(x):
                # Assume the time-axis is the first: (Time, Channel)
                pad_width = [(diff // 2, (diff + 1) // 2)] + [
                    (0, 0) for _ in range(y.ndim - 1)
                ]
                y = numpy.pad(
                    y, pad_width=pad_width, constant_values=0, mode="constant")
        return y


class SpeedPerturbationSox():
    """SpeedPerturbationSox

    The speed perturbation in kaldi uses sox-speed instead of sox-tempo,
    and sox-speed just to resample the input,
    i.e pitch and tempo are changed both.

    To speed up or slow down the sound of a file,
    use speed to modify the pitch and the duration of the file.
    This raises the speed and reduces the time.
    The default factor is 1.0 which makes no change to the audio.
    2.0 doubles speed, thus time length is cut by a half and pitch is one interval higher.

    "Why use speed option instead of tempo -s in SoX for speed perturbation"
    https://groups.google.com/forum/#!topic/kaldi-help/8OOG7eE4sZ8

    tempo option:
    sox -t wav input.wav -t wav output.tempo0.9.wav tempo -s 0.9

    speed option:
    sox -t wav input.wav -t wav output.speed0.9.wav speed 0.9

    If we use speed option like above, the pitch of audio also will be changed,
    but the tempo option does not change the pitch.
    """

    def __init__(
            self,
            lower=0.9,
            upper=1.1,
            utt2ratio=None,
            keep_length=True,
            sr=16000,
            seed=None, ):
        self.sr = sr
        self.keep_length = keep_length
        self.state = numpy.random.RandomState(seed)

        try:
            import soxbindings as sox
        except ImportError:
            try:
                from paddlespeech.s2t.utils import dynamic_pip_install
                package = "sox"
                dynamic_pip_install.install(package)
                package = "soxbindings"
                if sys.platform != "win32":
                    dynamic_pip_install.install(package)
                import soxbindings as sox
            except Exception:
                raise RuntimeError(
                    "Can not install soxbindings on your system.")
        self.sox = sox

        if utt2ratio is not None:
            self.utt2ratio = {}
            # Use the scheduled ratio for each utterances
            self.utt2ratio_file = utt2ratio
            self.lower = None
            self.upper = None
            self.accept_uttid = True

            with open(utt2ratio, "r") as f:
                for line in f:
                    utt, ratio = line.rstrip().split(None, 1)
                    ratio = float(ratio)
                    self.utt2ratio[utt] = ratio
        else:
            self.utt2ratio = None
            # The ratio is given on runtime randomly
            self.lower = lower
            self.upper = upper

    def __repr__(self):
        if self.utt2ratio is None:
            return f"""{self.__class__.__name__}(
                lower={self.lower},
                upper={self.upper},
                keep_length={self.keep_length},
                sample_rate={self.sr})"""

        else:
            return f"""{self.__class__.__name__}(
                utt2ratio={self.utt2ratio_file},
                sample_rate={self.sr})"""

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x

        x = x.astype(numpy.float32)
        if self.accept_uttid:
            ratio = self.utt2ratio[uttid]
        else:
            ratio = self.state.uniform(self.lower, self.upper)

        tfm = self.sox.Transformer()
        tfm.set_globals(multithread=False)
        tfm.speed(ratio)
        y = tfm.build_array(input_array=x, sample_rate_in=self.sr)

        if self.keep_length:
            diff = abs(len(x) - len(y))
            if len(y) > len(x):
                # Truncate noise
                y = y[diff // 2:-((diff + 1) // 2)]
            elif len(y) < len(x):
                # Assume the time-axis is the first: (Time, Channel)
                pad_width = [(diff // 2, (diff + 1) // 2)] + [
                    (0, 0) for _ in range(y.ndim - 1)
                ]
                y = numpy.pad(
                    y, pad_width=pad_width, constant_values=0, mode="constant")

        if y.ndim == 2 and x.ndim == 1:
            # (T, C) -> (T)
            y = y.sequence(1)
        return y


class BandpassPerturbation():
    """BandpassPerturbation

    Randomly dropout along the frequency axis.

    The original idea comes from the following:
        "randomly-selected frequency band was cut off under the constraint of
         leaving at least 1,000 Hz band within the range of less than 4,000Hz."
        (The Hitachi/JHU CHiME-5 system: Advances in speech recognition for
         everyday home environments using multiple microphone arrays;
         http://spandh.dcs.shef.ac.uk/chime_workshop/papers/CHiME_2018_paper_kanda.pdf)

    """

    def __init__(self, lower=0.0, upper=0.75, seed=None, axes=(-1, )):
        self.lower = lower
        self.upper = upper
        self.state = numpy.random.RandomState(seed)
        # x_stft: (Time, Channel, Freq)
        self.axes = axes

    def __repr__(self):
        return "{}(lower={}, upper={})".format(self.__class__.__name__,
                                               self.lower, self.upper)

    def __call__(self, x_stft, uttid=None, train=True):
        if not train:
            return x_stft

        if x_stft.ndim == 1:
            raise RuntimeError("Input in time-freq domain: "
                               "(Time, Channel, Freq) or (Time, Freq)")

        ratio = self.state.uniform(self.lower, self.upper)
        axes = [i if i >= 0 else x_stft.ndim - i for i in self.axes]
        shape = [s if i in axes else 1 for i, s in enumerate(x_stft.shape)]

        mask = self.state.randn(*shape) > ratio
        x_stft *= mask
        return x_stft


class VolumePerturbation():
    def __init__(self,
                 lower=-1.6,
                 upper=1.6,
                 utt2ratio=None,
                 dbunit=True,
                 seed=None):
        self.dbunit = dbunit
        self.utt2ratio_file = utt2ratio
        self.lower = lower
        self.upper = upper
        self.state = numpy.random.RandomState(seed)

        if utt2ratio is not None:
            # Use the scheduled ratio for each utterances
            self.utt2ratio = {}
            self.lower = None
            self.upper = None
            self.accept_uttid = True

            with open(utt2ratio, "r") as f:
                for line in f:
                    utt, ratio = line.rstrip().split(None, 1)
                    ratio = float(ratio)
                    self.utt2ratio[utt] = ratio
        else:
            # The ratio is given on runtime randomly
            self.utt2ratio = None

    def __repr__(self):
        if self.utt2ratio is None:
            return "{}(lower={}, upper={}, dbunit={})".format(
                self.__class__.__name__, self.lower, self.upper, self.dbunit)
        else:
            return '{}("{}", dbunit={})'.format(
                self.__class__.__name__, self.utt2ratio_file, self.dbunit)

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x

        x = x.astype(numpy.float32)

        if self.accept_uttid:
            ratio = self.utt2ratio[uttid]
        else:
            ratio = self.state.uniform(self.lower, self.upper)
        if self.dbunit:
            ratio = 10**(ratio / 20)
        return x * ratio


class NoiseInjection():
    """Add isotropic noise"""

    def __init__(
            self,
            utt2noise=None,
            lower=-20,
            upper=-5,
            utt2ratio=None,
            filetype="list",
            dbunit=True,
            seed=None, ):
        self.utt2noise_file = utt2noise
        self.utt2ratio_file = utt2ratio
        self.filetype = filetype
        self.dbunit = dbunit
        self.lower = lower
        self.upper = upper
        self.state = numpy.random.RandomState(seed)

        if utt2ratio is not None:
            # Use the scheduled ratio for each utterances
            self.utt2ratio = {}
            with open(utt2noise, "r") as f:
                for line in f:
                    utt, snr = line.rstrip().split(None, 1)
                    snr = float(snr)
                    self.utt2ratio[utt] = snr
        else:
            # The ratio is given on runtime randomly
            self.utt2ratio = None

        if utt2noise is not None:
            self.utt2noise = {}
            if filetype == "list":
                with open(utt2noise, "r") as f:
                    for line in f:
                        utt, filename = line.rstrip().split(None, 1)
                        signal, rate = soundfile.read(filename, dtype="int16")
                        # Load all files in memory
                        self.utt2noise[utt] = (signal, rate)

            elif filetype == "sound.hdf5":
                self.utt2noise = SoundHDF5File(utt2noise, "r")
            else:
                raise ValueError(filetype)
        else:
            self.utt2noise = None

        if utt2noise is not None and utt2ratio is not None:
            if set(self.utt2ratio) != set(self.utt2noise):
                raise RuntimeError("The uttids mismatch between {} and {}".
                                   format(utt2ratio, utt2noise))

    def __repr__(self):
        if self.utt2ratio is None:
            return "{}(lower={}, upper={}, dbunit={})".format(
                self.__class__.__name__, self.lower, self.upper, self.dbunit)
        else:
            return '{}("{}", dbunit={})'.format(
                self.__class__.__name__, self.utt2ratio_file, self.dbunit)

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x
        x = x.astype(numpy.float32)

        # 1. Get ratio of noise to signal in sound pressure level
        if uttid is not None and self.utt2ratio is not None:
            ratio = self.utt2ratio[uttid]
        else:
            ratio = self.state.uniform(self.lower, self.upper)

        if self.dbunit:
            ratio = 10**(ratio / 20)
        scale = ratio * numpy.sqrt((x**2).mean())

        # 2. Get noise
        if self.utt2noise is not None:
            # Get noise from the external source
            if uttid is not None:
                noise, rate = self.utt2noise[uttid]
            else:
                # Randomly select the noise source
                noise = self.state.choice(list(self.utt2noise.values()))
            # Normalize the level
            noise /= numpy.sqrt((noise**2).mean())

            # Adjust the noise length
            diff = abs(len(x) - len(noise))
            offset = self.state.randint(0, diff)
            if len(noise) > len(x):
                # Truncate noise
                noise = noise[offset:-(diff - offset)]
            else:
                noise = numpy.pad(
                    noise, pad_width=[offset, diff - offset], mode="wrap")

        else:
            # Generate white noise
            noise = self.state.normal(0, 1, x.shape)

        # 3. Add noise to signal
        return x + noise * scale


class RIRConvolve():
    def __init__(self, utt2rir, filetype="list"):
        self.utt2rir_file = utt2rir
        self.filetype = filetype

        self.utt2rir = {}
        if filetype == "list":
            with open(utt2rir, "r") as f:
                for line in f:
                    utt, filename = line.rstrip().split(None, 1)
                    signal, rate = soundfile.read(filename, dtype="int16")
                    self.utt2rir[utt] = (signal, rate)

        elif filetype == "sound.hdf5":
            self.utt2rir = SoundHDF5File(utt2rir, "r")
        else:
            raise NotImplementedError(filetype)

    def __repr__(self):
        return '{}("{}")'.format(self.__class__.__name__, self.utt2rir_file)

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x

        x = x.astype(numpy.float32)

        if x.ndim != 1:
            # Must be single channel
            raise RuntimeError(
                "Input x must be one dimensional array, but got {}".format(
                    x.shape))

        rir, rate = self.utt2rir[uttid]
        if rir.ndim == 2:
            # FIXME(kamo): Use chainer.convolution_1d?
            # return [Time, Channel]
            return numpy.stack(
                [scipy.convolve(x, r, mode="same") for r in rir], axis=-1)
        else:
            return scipy.convolve(x, rir, mode="same")


================================================
FILE: paddlespeech/audio/transform/spec_augment.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Spec Augment module for preprocessing i.e., data augmentation"""
import random

import numpy
from PIL import Image

from .functional import FuncTrans


def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"):
    """time warp for spec augment

    move random center frame by the random width ~ uniform(-window, window)
    :param numpy.ndarray x: spectrogram (time, freq)
    :param int max_time_warp: maximum time frames to warp
    :param bool inplace: overwrite x with the result
    :param str mode: "PIL" (default, fast, not differentiable) or "sparse_image_warp"
        (slow, differentiable)
    :returns numpy.ndarray: time warped spectrogram (time, freq)
    """
    window = max_time_warp
    if window == 0:
        return x

    if mode == "PIL":
        t = x.shape[0]
        if t - window <= window:
            return x
        # NOTE: randrange(a, b) emits a, a + 1, ..., b - 1
        center = random.randrange(window, t - window)
        warped = random.randrange(center - window, center +
                                  window) + 1  # 1 ... t - 1

        left = Image.fromarray(x[:center]).resize((x.shape[1], warped),
                                                  Image.BICUBIC)
        right = Image.fromarray(x[center:]).resize((x.shape[1], t - warped),
                                                   Image.BICUBIC)
        if inplace:
            x[:warped] = left
            x[warped:] = right
            return x
        return numpy.concatenate((left, right), 0)
    elif mode == "sparse_image_warp":
        import paddle

        from espnet.utils import spec_augment

        # TODO(karita): make this differentiable again
        return spec_augment.time_warp(paddle.to_tensor(x), window).numpy()
    else:
        raise NotImplementedError("unknown resize mode: " + mode +
                                  ", choose one from (PIL, sparse_image_warp).")


class TimeWarp(FuncTrans):
    _func = time_warp
    __doc__ = time_warp.__doc__

    def __call__(self, x, train):
        if not train:
            return x
        return super().__call__(x)


def freq_mask(x, F=30, n_mask=2, replace_with_zero=True, inplace=False):
    """freq mask for spec agument

    :param numpy.ndarray x: (time, freq)
    :param int n_mask: the number of masks
    :param bool inplace: overwrite
    :param bool replace_with_zero: pad zero on mask if true else use mean
    """
    if inplace:
        cloned = x
    else:
        cloned = x.copy()

    num_mel_channels = cloned.shape[1]
    fs = numpy.random.randint(0, F, size=(n_mask, 2))

    for f, mask_end in fs:
        f_zero = random.randrange(0, num_mel_channels - f)
        mask_end += f_zero

        # avoids randrange error if values are equal and range is empty
        if f_zero == f_zero + f:
            continue

        if replace_with_zero:
            cloned[:, f_zero:mask_end] = 0
        else:
            cloned[:, f_zero:mask_end] = cloned.mean()
    return cloned


class FreqMask(FuncTrans):
    _func = freq_mask
    __doc__ = freq_mask.__doc__

    def __call__(self, x, train):
        if not train:
            return x
        return super().__call__(x)


def time_mask(spec, T=40, n_mask=2, replace_with_zero=True, inplace=False):
    """freq mask for spec agument

    :param numpy.ndarray spec: (time, freq)
    :param int n_mask: the number of masks
    :param bool inplace: overwrite
    :param bool replace_with_zero: pad zero on mask if true else use mean
    """
    if inplace:
        cloned = spec
    else:
        cloned = spec.copy()
    len_spectro = cloned.shape[0]
    ts = numpy.random.randint(0, T, size=(n_mask, 2))
    for t, mask_end in ts:
        # avoid randint range error
        if len_spectro - t <= 0:
            continue
        t_zero = random.randrange(0, len_spectro - t)

        # avoids randrange error if values are equal and range is empty
        if t_zero == t_zero + t:
            continue

        mask_end += t_zero
        if replace_with_zero:
            cloned[t_zero:mask_end] = 0
        else:
            cloned[t_zero:mask_end] = cloned.mean()
    return cloned


class TimeMask(FuncTrans):
    _func = time_mask
    __doc__ = time_mask.__doc__

    def __call__(self, x, train):
        if not train:
            return x
        return super().__call__(x)


def spec_augment(
        x,
        resize_mode="PIL",
        max_time_warp=80,
        max_freq_width=27,
        n_freq_mask=2,
        max_time_width=100,
        n_time_mask=2,
        inplace=True,
        replace_with_zero=True, ):
    """spec agument

    apply random time warping and time/freq masking
    default setting is based on LD (Librispeech double) in Table 2
        https://arxiv.org/pdf/1904.08779.pdf

    :param numpy.ndarray x: (time, freq)
    :param str resize_mode: "PIL" (fast, nondifferentiable) or "sparse_image_warp"
        (slow, differentiable)
    :param int max_time_warp: maximum frames to warp the center frame in spectrogram (W)
    :param int freq_mask_width: maximum width of the random freq mask (F)
    :param int n_freq_mask: the number of the random freq mask (m_F)
    :param int time_mask_width: maximum width of the random time mask (T)
    :param int n_time_mask: the number of the random time mask (m_T)
    :param bool inplace: overwrite intermediate array
    :param bool replace_with_zero: pad zero on mask if true else use mean
    """
    assert isinstance(x, numpy.ndarray)
    assert x.ndim == 2
    x = time_warp(x, max_time_warp, inplace=inplace, mode=resize_mode)
    x = freq_mask(
        x,
        max_freq_width,
        n_freq_mask,
        inplace=inplace,
        replace_with_zero=replace_with_zero, )
    x = time_mask(
        x,
        max_time_width,
        n_time_mask,
        inplace=inplace,
        replace_with_zero=replace_with_zero, )
    return x


class SpecAugment(FuncTrans):
    _func = spec_augment
    __doc__ = spec_augment.__doc__

    def __call__(self, x, train):
        if not train:
            return x
        return super().__call__(x)


================================================
FILE: paddlespeech/audio/transform/spectrogram.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import librosa
import numpy as np
import paddle
from python_speech_features import logfbank

from paddlespeech.audio.compliance import kaldi


def stft(x,
         n_fft,
         n_shift,
         win_length=None,
         window="hann",
         center=True,
         pad_mode="reflect"):
    # x: [Time, Channel]
    if x.ndim == 1:
        single_channel = True
        # x: [Time] -> [Time, Channel]
        x = x[:, None]
    else:
        single_channel = False
    x = x.astype(np.float32)

    # FIXME(kamo): librosa.stft can't use multi-channel?
    # x: [Time, Channel, Freq]
    x = np.stack(
        [
            librosa.stft(
                y=x[:, ch],
                n_fft=n_fft,
                hop_length=n_shift,
                win_length=win_length,
                window=window,
                center=center,
                pad_mode=pad_mode, ).T for ch in range(x.shape[1])
        ],
        axis=1, )

    if single_channel:
        # x: [Time, Channel, Freq] -> [Time, Freq]
        x = x[:, 0]
    return x


def istft(x, n_shift, win_length=None, window="hann", center=True):
    # x: [Time, Channel, Freq]
    if x.ndim == 2:
        single_channel = True
        # x: [Time, Freq] -> [Time, Channel, Freq]
        x = x[:, None, :]
    else:
        single_channel = False

    # x: [Time, Channel]
    x = np.stack(
        [
            librosa.istft(
                stft_matrix=x[:, ch].T,  # [Time, Freq] -> [Freq, Time]
                hop_length=n_shift,
                win_length=win_length,
                window=window,
                center=center, ) for ch in range(x.shape[1])
        ],
        axis=1, )

    if single_channel:
        # x: [Time, Channel] -> [Time]
        x = x[:, 0]
    return x


def stft2logmelspectrogram(x_stft,
                           fs,
                           n_mels,
                           n_fft,
                           fmin=None,
                           fmax=None,
                           eps=1e-10):
    # x_stft: (Time, Channel, Freq) or (Time, Freq)
    fmin = 0 if fmin is None else fmin
    fmax = fs / 2 if fmax is None else fmax

    # spc: (Time, Channel, Freq) or (Time, Freq)
    spc = np.abs(x_stft)
    # mel_basis: (Mel_freq, Freq)
    mel_basis = librosa.filters.mel(
        sr=fs, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax)
    # lmspc: (Time, Channel, Mel_freq) or (Time, Mel_freq)
    lmspc = np.log10(np.maximum(eps, np.dot(spc, mel_basis.T)))

    return lmspc


def spectrogram(x, n_fft, n_shift, win_length=None, window="hann"):
    # x: (Time, Channel) -> spc: (Time, Channel, Freq)
    spc = np.abs(stft(x, n_fft, n_shift, win_length, window=window))
    return spc


def logmelspectrogram(
        x,
        fs,
        n_mels,
        n_fft,
        n_shift,
        win_length=None,
        window="hann",
        fmin=None,
        fmax=None,
        eps=1e-10,
        pad_mode="reflect", ):
    # stft: (Time, Channel, Freq) or (Time, Freq)
    x_stft = stft(
        x,
        n_fft=n_fft,
        n_shift=n_shift,
        win_length=win_length,
        window=window,
        pad_mode=pad_mode, )

    return stft2logmelspectrogram(
        x_stft,
        fs=fs,
        n_mels=n_mels,
        n_fft=n_fft,
        fmin=fmin,
        fmax=fmax,
        eps=eps)


class Spectrogram():
    def __init__(self, n_fft, n_shift, win_length=None, window="hann"):
        self.n_fft = n_fft
        self.n_shift = n_shift
        self.win_length = win_length
        self.window = window

    def __repr__(self):
        return ("{name}(n_fft={n_fft}, n_shift={n_shift}, "
                "win_length={win_length}, window={window})".format(
                    name=self.__class__.__name__,
                    n_fft=self.n_fft,
                    n_shift=self.n_shift,
                    win_length=self.win_length,
                    window=self.window, ))

    def __call__(self, x):
        return spectrogram(
            x,
            n_fft=self.n_fft,
            n_shift=self.n_shift,
            win_length=self.win_length,
            window=self.window, )


class LogMelSpectrogram():
    def __init__(
            self,
            fs,
            n_mels,
            n_fft,
            n_shift,
            win_length=None,
            window="hann",
            fmin=None,
            fmax=None,
            eps=1e-10, ):
        self.fs = fs
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.n_shift = n_shift
        self.win_length = win_length
        self.window = window
        self.fmin = fmin
        self.fmax = fmax
        self.eps = eps

    def __repr__(self):
        return ("{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
                "n_shift={n_shift}, win_length={win_length}, window={window}, "
                "fmin={fmin}, fmax={fmax}, eps={eps}))".format(
                    name=self.__class__.__name__,
                    fs=self.fs,
                    n_mels=self.n_mels,
                    n_fft=self.n_fft,
                    n_shift=self.n_shift,
                    win_length=self.win_length,
                    window=self.window,
                    fmin=self.fmin,
                    fmax=self.fmax,
                    eps=self.eps, ))

    def __call__(self, x):
        return logmelspectrogram(
            x,
            fs=self.fs,
            n_mels=self.n_mels,
            n_fft=self.n_fft,
            n_shift=self.n_shift,
            win_length=self.win_length,
            window=self.window, )


class Stft2LogMelSpectrogram():
    def __init__(self, fs, n_mels, n_fft, fmin=None, fmax=None, eps=1e-10):
        self.fs = fs
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.fmin = fmin
        self.fmax = fmax
        self.eps = eps

    def __repr__(self):
        return ("{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
                "fmin={fmin}, fmax={fmax}, eps={eps}))".format(
                    name=self.__class__.__name__,
                    fs=self.fs,
                    n_mels=self.n_mels,
                    n_fft=self.n_fft,
                    fmin=self.fmin,
                    fmax=self.fmax,
                    eps=self.eps, ))

    def __call__(self, x):
        return stft2logmelspectrogram(
            x,
            fs=self.fs,
            n_mels=self.n_mels,
            n_fft=self.n_fft,
            fmin=self.fmin,
            fmax=self.fmax, )


class Stft():
    def __init__(
            self,
            n_fft,
            n_shift,
            win_length=None,
            window="hann",
            center=True,
            pad_mode="reflect", ):
        self.n_fft = n_fft
        self.n_shift = n_shift
        self.win_length = win_length
        self.window = window
        self.center = center
        self.pad_mode = pad_mode

    def __repr__(self):
        return ("{name}(n_fft={n_fft}, n_shift={n_shift}, "
                "win_length={win_length}, window={window},"
                "center={center}, pad_mode={pad_mode})".format(
                    name=self.__class__.__name__,
                    n_fft=self.n_fft,
                    n_shift=self.n_shift,
                    win_length=self.win_length,
                    window=self.window,
                    center=self.center,
                    pad_mode=self.pad_mode, ))

    def __call__(self, x):
        return stft(
            x,
            self.n_fft,
            self.n_shift,
            win_length=self.win_length,
            window=self.window,
            center=self.center,
            pad_mode=self.pad_mode, )


class IStft():
    def __init__(self, n_shift, win_length=None, window="hann", center=True):
        self.n_shift = n_shift
        self.win_length = win_length
        self.window = window
        self.center = center

    def __repr__(self):
        return ("{name}(n_shift={n_shift}, "
                "win_length={win_length}, window={window},"
                "center={center})".format(
                    name=self.__class__.__name__,
                    n_shift=self.n_shift,
                    win_length=self.win_length,
                    window=self.window,
                    center=self.center, ))

    def __call__(self, x):
        return istft(
            x,
            self.n_shift,
            win_length=self.win_length,
            window=self.window,
            center=self.center, )


class LogMelSpectrogramKaldi():
    def __init__(
            self,
            fs=16000,
            n_mels=80,
            n_shift=160,  # unit:sample, 10ms
            win_length=400,  # unit:sample, 25ms
            energy_floor=0.0,
            dither=0.1):
        """
        The Kaldi implementation of LogMelSpectrogram 
        Args:
            fs (int): sample rate of the audio
            n_mels (int): number of mel filter banks
            n_shift (int): number of points in a frame shift
            win_length (int): number of points in a frame windows
            energy_floor (float): Floor on energy in Spectrogram computation (absolute)
            dither (float): Dithering constant

        Returns:
            LogMelSpectrogramKaldi
        """

        self.fs = fs
        self.n_mels = n_mels
        num_point_ms = fs / 1000
        self.n_frame_length = win_length / num_point_ms
        self.n_frame_shift = n_shift / num_point_ms
        self.energy_floor = energy_floor
        self.dither = dither

    def __repr__(self):
        return (
            "{name}(fs={fs}, n_mels={n_mels}, "
            "n_frame_shift={n_frame_shift}, n_frame_length={n_frame_length}, "
            "dither={dither}))".format(
                name=self.__class__.__name__,
                fs=self.fs,
                n_mels=self.n_mels,
                n_frame_shift=self.n_frame_shift,
                n_frame_length=self.n_frame_length,
                dither=self.dither, ))

    def __call__(self, x, train):
        """
        Args:
            x (np.ndarray): shape (Ti,)
            train (bool): True, train mode.

        Raises:
            ValueError: not support (Ti, C)

        Returns:
            np.ndarray: (T, D)
        """
        dither = self.dither if train else 0.0
        if x.ndim != 1:
            raise ValueError("Not support x: [Time, Channel]")
        waveform = paddle.to_tensor(np.expand_dims(x, 0), dtype=paddle.float32)
        mat = kaldi.fbank(
            waveform,
            n_mels=self.n_mels,
            frame_length=self.n_frame_length,
            frame_shift=self.n_frame_shift,
            dither=dither,
            energy_floor=self.energy_floor,
            sr=self.fs)
        mat = np.squeeze(mat.numpy())
        return mat


class WavProcess():
    def __init__(self):
        """
        Args:
            dither (float): Dithering constant

        Returns:
        """

    def __call__(self, x):
        """
        Args:
            x (np.ndarray): shape (Ti,)
            train (bool): True, train mode.

        Raises:
            ValueError: not support (Ti, C)

        Returns:
            np.ndarray: (T, D)
        """
        if x.ndim != 1:
            raise ValueError("Not support x: [Time, Channel]")
        waveform = x.astype("float32") / 32768.0
        waveform = np.expand_dims(waveform, -1)
        return waveform


class LogMelSpectrogramKaldi_decay():
    def __init__(
            self,
            fs=16000,
            n_mels=80,
            n_fft=512,  # fft point
            n_shift=160,  # unit:sample, 10ms
            win_length=400,  # unit:sample, 25ms
            window="povey",
            fmin=20,
            fmax=None,
            eps=1e-10,
            dither=1.0):
        self.fs = fs
        self.n_mels = n_mels
        self.n_fft = n_fft
        if n_shift > win_length:
            raise ValueError("Stride size must not be greater than "
                             "window size.")
        self.n_shift = n_shift / fs  # unit: ms
        self.win_length = win_length / fs  # unit: ms

        self.window = window
        self.fmin = fmin
        if fmax is None:
            fmax_ = fmax if fmax else self.fs / 2
        elif fmax > int(self.fs / 2):
            raise ValueError("fmax must not be greater than half of "
                             "sample rate.")
        self.fmax = fmax_

        self.eps = eps
        self.remove_dc_offset = True
        self.preemph = 0.97
        self.dither = dither  # only work in train mode

    def __repr__(self):
        return (
            "{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
            "n_shift={n_shift}, win_length={win_length}, preemph={preemph}, window={window}, "
            "fmin={fmin}, fmax={fmax}, eps={eps}, dither={dither}))".format(
                name=self.__class__.__name__,
                fs=self.fs,
                n_mels=self.n_mels,
                n_fft=self.n_fft,
                n_shift=self.n_shift,
                preemph=self.preemph,
                win_length=self.win_length,
                window=self.window,
                fmin=self.fmin,
                fmax=self.fmax,
                eps=self.eps,
                dither=self.dither, ))

    def __call__(self, x, train):
        """

        Args:
            x (np.ndarray): shape (Ti,)
            train (bool): True, train mode.

        Raises:
            ValueError: not support (Ti, C)

        Returns:
            np.ndarray: (T, D)
        """
        dither = self.dither if train else 0.0
        if x.ndim != 1:
            raise ValueError("Not support x: [Time, Channel]")

        if x.dtype in np.sctypes['float']:
            # PCM32 -> PCM16
            bits = np.iinfo(np.int16).bits
            x = x * 2**(bits - 1)

        # logfbank need PCM16 input
        y = logfbank(
            signal=x,
            samplerate=self.fs,
            winlen=self.win_length,  # unit ms
            winstep=self.n_shift,  # unit ms
            nfilt=self.n_mels,
            nfft=self.n_fft,
            lowfreq=self.fmin,
            highfreq=self.fmax,
            dither=dither,
            remove_dc_offset=self.remove_dc_offset,
            preemph=self.preemph,
            wintype=self.window)
        return y


================================================
FILE: paddlespeech/audio/transform/transform_interface.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)


class TransformInterface:
    """Transform Interface"""

    def __call__(self, x):
        raise NotImplementedError("__call__ method is not implemented")

    @classmethod
    def add_arguments(cls, parser):
        return parser

    def __repr__(self):
        return self.__class__.__name__ + "()"


class Identity(TransformInterface):
    """Identity Function"""

    def __call__(self, x):
        return x


================================================
FILE: paddlespeech/audio/transform/transformation.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Transformation module."""
import copy
import io
import logging
from collections import OrderedDict
from collections.abc import Sequence
from inspect import signature

import yaml

from ..utils.dynamic_import import dynamic_import

import_alias = dict(
    identity="paddlespeech.audio.transform.transform_interface:Identity",
    time_warp="paddlespeech.audio.transform.spec_augment:TimeWarp",
    time_mask="paddlespeech.audio.transform.spec_augment:TimeMask",
    freq_mask="paddlespeech.audio.transform.spec_augment:FreqMask",
    spec_augment="paddlespeech.audio.transform.spec_augment:SpecAugment",
    speed_perturbation="paddlespeech.audio.transform.perturb:SpeedPerturbation",
    speed_perturbation_sox="paddlespeech.audio.transform.perturb:SpeedPerturbationSox",
    volume_perturbation="paddlespeech.audio.transform.perturb:VolumePerturbation",
    noise_injection="paddlespeech.audio.transform.perturb:NoiseInjection",
    bandpass_perturbation="paddlespeech.audio.transform.perturb:BandpassPerturbation",
    rir_convolve="paddlespeech.audio.transform.perturb:RIRConvolve",
    delta="paddlespeech.audio.transform.add_deltas:AddDeltas",
    cmvn="paddlespeech.audio.transform.cmvn:CMVN",
    utterance_cmvn="paddlespeech.audio.transform.cmvn:UtteranceCMVN",
    fbank="paddlespeech.audio.transform.spectrogram:LogMelSpectrogram",
    spectrogram="paddlespeech.audio.transform.spectrogram:Spectrogram",
    wav_process="paddlespeech.audio.transform.spectrogram:WavProcess",
    stft="paddlespeech.audio.transform.spectrogram:Stft",
    istft="paddlespeech.audio.transform.spectrogram:IStft",
    stft2fbank="paddlespeech.audio.transform.spectrogram:Stft2LogMelSpectrogram",
    wpe="paddlespeech.audio.transform.wpe:WPE",
    channel_selector="paddlespeech.audio.transform.channel_selector:ChannelSelector",
    fbank_kaldi="paddlespeech.audio.transform.spectrogram:LogMelSpectrogramKaldi",
    cmvn_json="paddlespeech.audio.transform.cmvn:GlobalCMVN")


class Transformation():
    """Apply some functions to the mini-batch

    Examples:
        >>> kwargs = {"process": [{"type": "fbank",
        ...                        "n_mels": 80,
        ...                        "fs": 16000},
        ...                       {"type": "cmvn",
        ...                        "stats": "data/train/cmvn.ark",
        ...                        "norm_vars": True},
        ...                       {"type": "delta", "window": 2, "order": 2}]}
        >>> transform = Transformation(kwargs)
        >>> bs = 10
        >>> xs = [np.random.randn(100, 80).astype(np.float32)
        ...       for _ in range(bs)]
        >>> xs = transform(xs)
    """

    def __init__(self, conffile=None):
        if conffile is not None:
            if isinstance(conffile, dict):
                self.conf = copy.deepcopy(conffile)
            else:
                with io.open(conffile, encoding="utf-8") as f:
                    self.conf = yaml.safe_load(f)
                    assert isinstance(self.conf, dict), type(self.conf)
        else:
            self.conf = {"mode": "sequential", "process": []}

        self.functions = OrderedDict()
        if self.conf.get("mode", "sequential") == "sequential":
            for idx, process in enumerate(self.conf["process"]):
                assert isinstance(process, dict), type(process)
                opts = dict(process)
                process_type = opts.pop("type")
                class_obj = dynamic_import(process_type, import_alias)
                # TODO(karita): assert issubclass(class_obj, TransformInterface)
                try:
                    self.functions[idx] = class_obj(**opts)
                except TypeError:
                    try:
                        signa = signature(class_obj)
                    except ValueError:
                        # Some function, e.g. built-in function, are failed
                        pass
                    else:
                        logging.error("Expected signature: {}({})".format(
                            class_obj.__name__, signa))
                    raise
        else:
            raise NotImplementedError(
                "Not supporting mode={}".format(self.conf["mode"]))

    def __repr__(self):
        rep = "\n" + "\n".join("    {}: {}".format(k, v)
                               for k, v in self.functions.items())
        return "{}({})".format(self.__class__.__name__, rep)

    def __call__(self, xs, uttid_list=None, **kwargs):
        """Return new mini-batch

        :param Union[Sequence[np.ndarray], np.ndarray] xs:
        :param Union[Sequence[str], str] uttid_list:
        :return: batch:
        :rtype: List[np.ndarray]
        """
        if not isinstance(xs, Sequence):
            is_batch = False
            xs = [xs]
        else:
            is_batch = True

        if isinstance(uttid_list, str):
            uttid_list = [uttid_list for _ in range(len(xs))]

        if self.conf.get("mode", "sequential") == "sequential":
            for idx in range(len(self.conf["process"])):
                func = self.functions[idx]
                # TODO(karita): use TrainingTrans and UttTrans to check __call__ args
                # Derive only the args which the func has
                try:
                    param = signature(func).parameters
                except ValueError:
                    # Some function, e.g. built-in function, are failed
                    param = {}
                _kwargs = {k: v for k, v in kwargs.items() if k in param}
                try:
                    if uttid_list is not None and "uttid" in param:
                        xs = [
                            func(x, u, **_kwargs)
                            for x, u in zip(xs, uttid_list)
                        ]
                    else:
                        xs = [func(x, **_kwargs) for x in xs]
                except Exception:
                    logging.fatal("Catch a exception from {}th func: {}".format(
                        idx, func))
                    raise
        else:
            raise NotImplementedError(
                "Not supporting mode={}".format(self.conf["mode"]))

        if is_batch:
            return xs
        else:
            return xs[0]


================================================
FILE: paddlespeech/audio/transform/wpe.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from nara_wpe.wpe import wpe


class WPE(object):
    def __init__(self,
                 taps=10,
                 delay=3,
                 iterations=3,
                 psd_context=0,
                 statistics_mode="full"):
        self.taps = taps
        self.delay = delay
        self.iterations = iterations
        self.psd_context = psd_context
        self.statistics_mode = statistics_mode

    def __repr__(self):
        return ("{name}(taps={taps}, delay={delay}"
                "iterations={iterations}, psd_context={psd_context}, "
                "statistics_mode={statistics_mode})".format(
                    name=self.__class__.__name__,
                    taps=self.taps,
                    delay=self.delay,
                    iterations=self.iterations,
                    psd_context=self.psd_context,
                    statistics_mode=self.statistics_mode, ))

    def __call__(self, xs):
        """Return enhanced

        :param np.ndarray xs: (Time, Channel, Frequency)
        :return: enhanced_xs
        :rtype: np.ndarray

        """
        # nara_wpe.wpe: (F, C, T)
        xs = wpe(
            xs.transpose((2, 1, 0)),
            taps=self.taps,
            delay=self.delay,
            iterations=self.iterations,
            psd_context=self.psd_context,
            statistics_mode=self.statistics_mode, )
        return xs.transpose(2, 1, 0)


================================================
FILE: paddlespeech/audio/utils/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ...utils.env import DATA_HOME
from ...utils.env import MODEL_HOME
from .download import decompress
from .download import download_and_decompress
from .download import load_state_dict_from_url
from .error import ParameterError
from .log import Logger
from .log import logger
from .numeric import depth_convert
from .numeric import pcm16to32
from .time import seconds_to_hms
from .time import Timer


================================================
FILE: paddlespeech/audio/utils/check_kwargs.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import inspect


def check_kwargs(func, kwargs, name=None):
    """check kwargs are valid for func

    If kwargs are invalid, raise TypeError as same as python default
    :param function func: function to be validated
    :param dict kwargs: keyword arguments for func
    :param str name: name used in TypeError (default is func name)
    """
    try:
        params = inspect.signature(func).parameters
    except ValueError:
        return
    if name is None:
        name = func.__name__
    for k in kwargs.keys():
        if k not in params:
            raise TypeError(
                f"{name}() got an unexpected keyword argument '{k}'")


================================================
FILE: paddlespeech/audio/utils/download.py
================================================
# Copyright (c) 2021  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Dict
from typing import List

from paddle.framework import load as load_state_dict
from paddle.utils import download

from .log import logger

download.logger = logger

__all__ = [
    'decompress',
    'download_and_decompress',
    'load_state_dict_from_url',
]


def decompress(file: str):
    """
    Extracts all files from a compressed file.
    """
    assert os.path.isfile(file), "File: {} not exists.".format(file)
    download._decompress(file)


def download_and_decompress(archives: List[Dict[str, str]],
                            path: str,
                            decompress: bool=True):
    """
    Download archieves and decompress to specific path.
    """
    if not os.path.isdir(path):
        os.makedirs(path)

    for archive in archives:
        assert 'url' in archive and 'md5' in archive, \
            'Dictionary keys of "url" and "md5" are required in the archive, but got: {list(archieve.keys())}'
        download.get_path_from_url(
            archive['url'], path, archive['md5'], decompress=decompress)


def load_state_dict_from_url(url: str, path: str, md5: str=None):
    """
    Download and load a state dict from url
    """
    if not os.path.isdir(path):
        os.makedirs(path)

    download.get_path_from_url(url, path, md5)
    return load_state_dict(os.path.join(path, os.path.basename(url)))


================================================
FILE: paddlespeech/audio/utils/dynamic_import.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import importlib

__all__ = ["dynamic_import"]


def dynamic_import(import_path, alias=dict()):
    """dynamic import module and class

    :param str import_path: syntax 'module_name:class_name'
        e.g., 'paddlespeech.s2t.models.u2:U2Model'
    :param dict alias: shortcut for registered class
    :return: imported class
    """
    if import_path not in alias and ":" not in import_path:
        raise ValueError(
            "import_path should be one of {} or "
            'include ":", e.g. "paddlespeech.s2t.models.u2:U2Model" : '
            "{}".format(set(alias), import_path))
    if ":" not in import_path:
        import_path = alias[import_path]

    module_name, objname = import_path.split(":")
    m = importlib.import_module(module_name)
    return getattr(m, objname)


================================================
FILE: paddlespeech/audio/utils/error.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = ['ParameterError']


class ParameterError(Exception):
    """Exception class for Parameter checking"""
    pass


================================================
FILE: paddlespeech/audio/utils/log.py
================================================
# Copyright (c) 2021  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import functools
import logging
import threading
import time

import colorlog

__all__ = [
    'Logger',
    'logger',
]

log_config = {
    'DEBUG': {
        'level': 10,
        'color': 'purple'
    },
    'INFO': {
        'level': 20,
        'color': 'green'
    },
    'TRAIN': {
        'level': 21,
        'color': 'cyan'
    },
    'EVAL': {
        'level': 22,
        'color': 'blue'
    },
    'WARNING': {
        'level': 30,
        'color': 'yellow'
    },
    'ERROR': {
        'level': 40,
        'color': 'red'
    },
    'CRITICAL': {
        'level': 50,
        'color': 'bold_red'
    }
}


class Logger(object):
    '''
    Deafult logger in PaddleAudio
    Args:
        name(str) : Logger name, default is 'PaddleAudio'
    '''

    def __init__(self, name: str=None):
        name = 'PaddleAudio' if not name else name
        self.name = name
        self.logger = logging.getLogger(name)

        for key, conf in log_config.items():
            logging.addLevelName(conf['level'], key)
            self.__dict__[key] = functools.partial(self.__call__, conf['level'])
            self.__dict__[key.lower()] = functools.partial(self.__call__,
                                                           conf['level'])

        self.format = colorlog.ColoredFormatter(
            '%(log_color)s[%(asctime)-15s] [%(levelname)8s]%(reset)s - %(message)s',
            log_colors={key: conf['color']
                        for key, conf in log_config.items()})

        self.handler = logging.StreamHandler()
        self.handler.setFormatter(self.format)

        self.logger.addHandler(self.handler)
        self.logLevel = 'DEBUG'
        self.logger.setLevel(logging.DEBUG)
        self.logger.propagate = False
        self._is_enable = True

    def disable(self):
        self._is_enable = False

    def enable(self):
        self._is_enable = True

    @property
    def is_enable(self) -> bool:
        return self._is_enable

    def __call__(self, log_level: str, msg: str):
        if not self.is_enable:
            return

        self.logger.log(log_level, self.name + " | " + msg)

    @contextlib.contextmanager
    def use_terminator(self, terminator: str):
        old_terminator = self.handler.terminator
        self.handler.terminator = terminator
        yield
        self.handler.terminator = old_terminator

    @contextlib.contextmanager
    def processing(self, msg: str, interval: float=0.1):
        '''
        Continuously print a progress bar with rotating special effects.
        Args:
            msg(str): Message to be printed.
            interval(float): Rotation interval. Default to 0.1.
        '''
        end = False

        def _printer():
            index = 0
            flags = ['\\', '|', '/', '-']
            while not end:
                flag = flags[index % len(flags)]
                with self.use_terminator('\r'):
                    self.info('{}: {}'.format(msg, flag))
                time.sleep(interval)
                index += 1

        t = threading.Thread(target=_printer)
        t.start()
        yield
        end = True


logger = Logger()


================================================
FILE: paddlespeech/audio/utils/numeric.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Union

import numpy as np

__all__ = ["pcm16to32", "depth_convert"]


def pcm16to32(audio: np.ndarray) -> np.ndarray:
    """pcm int16 to float32

    Args:
        audio (np.ndarray): Waveform with dtype of int16.

    Returns:
        np.ndarray: Waveform with dtype of float32.
    """
    if audio.dtype == np.int16:
        audio = audio.astype("float32")
        bits = np.iinfo(np.int16).bits
        audio = audio / (2**(bits - 1))
    return audio


def _safe_cast(y: np.ndarray, dtype: Union[type, str]) -> np.ndarray:
    """Data type casting in a safe way, i.e., prevent overflow or underflow.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        dtype (Union[type, str]): Data type of waveform.

    Returns:
        np.ndarray: `y` after safe casting.
    """
    if 'float' in str(y.dtype):
        return np.clip(y, np.finfo(dtype).min,
                       np.finfo(dtype).max).astype(dtype)
    else:
        return np.clip(y, np.iinfo(dtype).min,
                       np.iinfo(dtype).max).astype(dtype)


def depth_convert(y: np.ndarray, dtype: Union[type, str]) -> np.ndarray:
    """Convert audio array to target dtype safely. 
    This function convert audio waveform to a target dtype, with addition steps of
    preventing overflow/underflow and preserving audio range.

    Args:
        y (np.ndarray): Input waveform array in 1D or 2D.
        dtype (Union[type, str]): Data type of waveform.

    Returns:
        np.ndarray: `y` after safe casting.
    """

    SUPPORT_DTYPE = ['int16', 'int8', 'float32', 'float64']
    if y.dtype not in SUPPORT_DTYPE:
        raise ParameterError(
            'Unsupported audio dtype, '
            f'y.dtype is {y.dtype}, supported dtypes are {SUPPORT_DTYPE}')

    if dtype not in SUPPORT_DTYPE:
        raise ParameterError(
            'Unsupported audio dtype, '
            f'target dtype  is {dtype}, supported dtypes are {SUPPORT_DTYPE}')

    if dtype == y.dtype:
        return y

    if dtype == 'float64' and y.dtype == 'float32':
        return _safe_cast(y, dtype)
    if dtype == 'float32' and y.dtype == 'float64':
        return _safe_cast(y, dtype)

    if dtype == 'int16' or dtype == 'int8':
        if y.dtype in ['float64', 'float32']:
            factor = np.iinfo(dtype).max
            y = np.clip(y * factor, np.iinfo(dtype).min,
                        np.iinfo(dtype).max).astype(dtype)
            y = y.astype(dtype)
        else:
            if dtype == 'int16' and y.dtype == 'int8':
                factor = np.iinfo('int16').max / np.iinfo('int8').max - EPS
                y = y.astype('float32') * factor
                y = y.astype('int16')

            else:  # dtype == 'int8' and y.dtype=='int16':
                y = y.astype('int32') * np.iinfo('int8').max / \
                    np.iinfo('int16').max
                y = y.astype('int8')

    if dtype in ['float32', 'float64']:
        org_dtype = y.dtype
        y = y.astype(dtype) / np.iinfo(org_dtype).max
    return y


================================================
FILE: paddlespeech/audio/utils/tensor_utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unility functions for Transformer."""
from typing import List
from typing import Tuple

import paddle

from .log import Logger

__all__ = ["pad_sequence", "add_sos_eos", "th_accuracy", "has_tensor"]

logger = Logger(__name__)


def has_tensor(val):
    if isinstance(val, (list, tuple)):
        for item in val:
            if has_tensor(item):
                return True
    elif isinstance(val, dict):
        for k, v in val.items():
            if has_tensor(v):
                return True
    else:
        return paddle.is_tensor(val)


def pad_sequence(sequences: List[paddle.Tensor],
                 batch_first: bool=False,
                 padding_value: float=0.0) -> paddle.Tensor:
    r"""Pad a list of variable length Tensors with ``padding_value``

    ``pad_sequence`` stacks a list of Tensors along a new dimension,
    and pads them to equal length. For example, if the input is list of
    sequences with size ``L x *`` and if batch_first is False, and ``T x B x *``
    otherwise.

    `B` is batch size. It is equal to the number of elements in ``sequences``.
    `T` is length of the longest sequence.
    `L` is length of the sequence.
    `*` is any number of trailing dimensions, including none.

    Example:
        >>> from paddle.nn.utils.rnn import pad_sequence
        >>> a = paddle.ones(25, 300)
        >>> b = paddle.ones(22, 300)
        >>> c = paddle.ones(15, 300)
        >>> pad_sequence([a, b, c]).shape
        paddle.Tensor([25, 3, 300])

    Note:
        This function returns a Tensor of size ``T x B x *`` or ``B x T x *``
        where `T` is the length of the longest sequence. This function assumes
        trailing dimensions and type of all the Tensors in sequences are same.

    Args:
        sequences (list[Tensor]): list of variable length sequences.
        batch_first (bool, optional): output will be in ``B x T x *`` if True, or in
            ``T x B x *`` otherwise
        padding_value (float, optional): value for padded elements. Default: 0.

    Returns:
        Tensor of size ``T x B x *`` if :attr:`batch_first` is ``False``.
        Tensor of size ``B x T x *`` otherwise
    """

    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
    max_size = paddle.shape(sequences[0])
    # (TODO Hui Zhang): slice not support `end==start`
    # trailing_dims = max_size[1:]
    trailing_dims = tuple(
        max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
    max_len = max([s.shape[0] for s in sequences])
    if batch_first:
        out_dims = (len(sequences), max_len) + trailing_dims
    else:
        out_dims = (max_len, len(sequences)) + trailing_dims
    out_tensor = paddle.full(out_dims, padding_value, sequences[0].dtype)
    for i, tensor in enumerate(sequences):
        length = tensor.shape[0]
        # use index notation to prevent duplicate references to the tensor
        if batch_first:
            # TODO (Hui Zhang): set_value op not support `end==start`
            # TODO (Hui Zhang): set_value op not support int16
            # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
            # out_tensor[i, :length, ...] = tensor
            if length != 0:
                out_tensor[i, :length] = tensor
            else:
                out_tensor[i, length] = tensor
        else:
            # TODO (Hui Zhang): set_value op not support `end==start`
            # out_tensor[:length, i, ...] = tensor
            if length != 0:
                out_tensor[:length, i] = tensor
            else:
                out_tensor[length, i] = tensor

    return out_tensor


def add_sos_eos(ys_pad: paddle.Tensor, sos: int, eos: int,
                ignore_id: int) -> Tuple[paddle.Tensor, paddle.Tensor]:
    """Add <sos> and <eos> labels.
    Args:
        ys_pad (paddle.Tensor): batch of padded target sequences (B, Lmax)
        sos (int): index of <sos>
        eos (int): index of <eeos>
        ignore_id (int): index of padding
    Returns:
        ys_in (paddle.Tensor) : (B, Lmax + 1)
        ys_out (paddle.Tensor) : (B, Lmax + 1)
    Examples:
        >>> sos_id = 10
        >>> eos_id = 11
        >>> ignore_id = -1
        >>> ys_pad
        tensor([[ 1,  2,  3,  4,  5],
                [ 4,  5,  6, -1, -1],
                [ 7,  8,  9, -1, -1]], dtype=paddle.int32)
        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
        >>> ys_in
        tensor([[10,  1,  2,  3,  4,  5],
                [10,  4,  5,  6, 11, 11],
                [10,  7,  8,  9, 11, 11]])
        >>> ys_out
        tensor([[ 1,  2,  3,  4,  5, 11],
                [ 4,  5,  6, 11, -1, -1],
                [ 7,  8,  9, 11, -1, -1]])
    """
    # TODO(Hui Zhang): using comment code,
    # _sos = paddle.to_tensor(
    #    [sos], dtype=ys_pad.dtype, stop_gradient=True, place=ys_pad.place)
    # _eos = paddle.to_tensor(
    #    [eos], dtype=ys_pad.dtype, stop_gradient=True, place=ys_pad.place)
    # ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
    # ys_in = [paddle.concat([_sos, y], axis=0) for y in ys]
    # ys_out = [paddle.concat([y, _eos], axis=0) for y in ys]
    # return pad_sequence(ys_in, padding_value=eos).transpose([1,0]), pad_sequence(ys_out, padding_value=ignore_id).transpose([1,0])

    B = ys_pad.shape[0]
    _sos = paddle.full([B, 1], sos, dtype=ys_pad.dtype)
    _eos = paddle.full([B, 1], eos, dtype=ys_pad.dtype)
    ys_in = paddle.cat([_sos, ys_pad], dim=1)
    mask_pad = (ys_in == ignore_id)
    ys_in = ys_in.masked_fill(mask_pad, eos)

    ys_out = paddle.cat([ys_pad, _eos], dim=1)
    ys_out = ys_out.masked_fill(mask_pad, eos)
    mask_eos = (ys_out == ignore_id)
    ys_out = ys_out.masked_fill(mask_eos, eos)
    ys_out = ys_out.masked_fill(mask_pad, ignore_id)
    return ys_in, ys_out


def th_accuracy(pad_outputs: paddle.Tensor,
                pad_targets: paddle.Tensor,
                ignore_label: int) -> float:
    """Calculate accuracy.
    Args:
        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
        pad_targets (LongTensor): Target label tensors (B, Lmax, D).
        ignore_label (int): Ignore label id.
    Returns:
        float: Accuracy value (0.0 - 1.0).
    """
    pad_pred = pad_outputs.reshape(
        [pad_targets.shape[0], pad_targets.shape[1],
         pad_outputs.shape[1]]).argmax(2)
    mask = pad_targets != ignore_label
    #TODO(Hui Zhang): sum not support bool type
    # numerator = paddle.sum(
    #     pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
    numerator = (
        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
    numerator = paddle.sum(numerator.type_as(pad_targets))
    #TODO(Hui Zhang): sum not support bool type
    # denominator = paddle.sum(mask)
    denominator = paddle.sum(mask.type_as(pad_targets))
    return float(numerator) / float(denominator)


def reverse_pad_list(ys_pad: paddle.Tensor,
                     ys_lens: paddle.Tensor,
                     pad_value: float=-1.0) -> paddle.Tensor:
    """Reverse padding for the list of tensors.
    Args:
        ys_pad (tensor): The padded tensor (B, Tokenmax).
        ys_lens (tensor): The lens of token seqs (B)
        pad_value (int): Value for padding.
    Returns:
        Tensor: Padded tensor (B, Tokenmax).
    Examples:
        >>> x
        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
        >>> pad_list(x, 0)
        tensor([[4, 3, 2, 1],
                [7, 6, 5, 0],
                [9, 8, 0, 0]])
    """
    r_ys_pad = pad_sequence([(paddle.flip(y.int()[:i], [0]))
                             for y, i in zip(ys_pad, ys_lens)], True, pad_value)
    return r_ys_pad


def st_reverse_pad_list(ys_pad: paddle.Tensor,
                        ys_lens: paddle.Tensor,
                        sos: float,
                        eos: float) -> paddle.Tensor:
    """Reverse padding for the list of tensors.
    Args:
        ys_pad (tensor): The padded tensor (B, Tokenmax).
        ys_lens (tensor): The lens of token seqs (B)
    Returns:
        Tensor: Padded tensor (B, Tokenmax).
    Examples:
        >>> x
        tensor([[1, 2, 3, 4], [5, 6, 7, 0], [8, 9, 0, 0]])
        >>> pad_list(x, 0)
        tensor([[4, 3, 2, 1],
                [7, 6, 5, 0],
                [9, 8, 0, 0]])
    """
    # Equal to:
    #   >>> r_hyps = reverse_pad_list(r_hyps, r_hyps_lens, float(self.ignore_id))
    #   >>> r_hyps, _ = add_sos_eos(r_hyps, self.sos, self.eos, self.ignore_id)
    B = ys_pad.shape[0]
    _sos = paddle.full([B, 1], sos, dtype=ys_pad.dtype)
    max_len = paddle.max(ys_lens)
    index_range = paddle.arange(0, max_len, 1)
    seq_len_expand = ys_lens.unsqueeze(1)
    seq_mask = seq_len_expand > index_range  # (beam, max_len)

    index = (seq_len_expand - 1) - index_range  # (beam, max_len)
    #   >>> index
    #   >>> tensor([[ 2,  1,  0],
    #   >>>         [ 2,  1,  0],
    #   >>>         [ 0, -1, -2]])
    index = index * seq_mask.astype(index.dtype)

    #   >>> index
    #   >>> tensor([[2, 1, 0],
    #   >>>         [2, 1, 0],
    #   >>>         [0, 0, 0]])
    def paddle_gather(x, dim, index):
        index_shape = index.shape
        index_flatten = index.flatten()
        if dim < 0:
            dim = len(x.shape) + dim
        nd_index = []
        for k in range(len(x.shape)):
            if k == dim:
                nd_index.append(index_flatten)
            else:
                reshape_shape = [1] * len(x.shape)
                reshape_shape[k] = x.shape[k]
                x_arange = paddle.arange(x.shape[k], dtype=index.dtype)
                x_arange = x_arange.reshape(reshape_shape)
                dim_index = paddle.expand(x_arange, index_shape).flatten()
                nd_index.append(dim_index)
        ind2 = paddle.transpose(paddle.stack(nd_index), [1, 0]).astype("int64")
        paddle_out = paddle.gather_nd(x, ind2).reshape(index_shape)
        return paddle_out

    r_hyps = paddle_gather(ys_pad, 1, index)
    #   >>> r_hyps
    #   >>> tensor([[3, 2, 1],
    #   >>>         [4, 8, 9],
    #   >>>         [2, 2, 2]])
    _eos = paddle.full([1], eos, dtype=r_hyps.dtype)
    r_hyps = paddle.where(seq_mask, r_hyps, _eos)
    #   >>> r_hyps
    #   >>> tensor([[3, 2, 1],
    #   >>>         [4, 8, 9],
    #   >>>         [2, eos, eos]])

    r_hyps = paddle.cat([_sos, r_hyps], dim=1)
    # r_hyps = paddle.concat([hyps[:, 0:1], r_hyps], axis=1)
    #   >>> r_hyps
    #   >>> tensor([[sos, 3, 2, 1],
    #   >>>         [sos, 4, 8, 9],
    #   >>>         [sos, 2, eos, eos]])
    return r_hyps


================================================
FILE: paddlespeech/audio/utils/time.py
================================================
# Copyright (c) 2021  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import time

__all__ = [
    'Timer',
    'seconds_to_hms',
]


class Timer(object):
    '''Calculate runing speed and estimated time of arrival(ETA)'''

    def __init__(self, total_step: int):
        self.total_step = total_step
        self.last_start_step = 0
        self.current_step = 0
        self._is_running = True

    def start(self):
        self.last_time = time.time()
        self.start_time = time.time()

    def stop(self):
        self._is_running = False
        self.end_time = time.time()

    def count(self) -> int:
        if not self.current_step >= self.total_step:
            self.current_step += 1
        return self.current_step

    @property
    def timing(self) -> float:
        run_steps = self.current_step - self.last_start_step
        self.last_start_step = self.current_step
        time_used = time.time() - self.last_time
        self.last_time = time.time()
        return run_steps / time_used

    @property
    def is_running(self) -> bool:
        return self._is_running

    @property
    def eta(self) -> str:
        if not self.is_running:
            return '00:00:00'
        scale = self.total_step / self.current_step
        remaining_time = (time.time() - self.start_time) * scale
        return seconds_to_hms(remaining_time)


def seconds_to_hms(seconds: int) -> str:
    '''Convert the number of seconds to hh:mm:ss'''
    h = math.floor(seconds / 3600)
    m = math.floor((seconds - h * 3600) / 60)
    s = int(seconds - h * 3600 - m * 60)
    hms_str = '{:0>2}:{:0>2}:{:0>2}'.format(h, m, s)
    return hms_str


================================================
FILE: paddlespeech/audiotools/README.md
================================================
Audiotools is a comprehensive toolkit designed for audio processing and analysis, providing robust solutions for audio signal processing, data management, model training, and evaluation.

### Directory Structure

```
.
├── audiotools
│   ├── README.md
│   ├── __init__.py
│   ├── core
│   │   ├── __init__.py
│   │   ├── _julius.py
│   │   ├── audio_signal.py
│   │   ├── display.py
│   │   ├── dsp.py
│   │   ├── effects.py
│   │   ├── ffmpeg.py
│   │   ├── loudness.py
│   │   └── util.py
│   ├── data
│   │   ├── __init__.py
│   │   ├── datasets.py
│   │   ├── preprocess.py
│   │   └── transforms.py
│   ├── metrics
│   │   ├── __init__.py
│   │   └── quality.py
│   ├── ml
│   │   ├── __init__.py
│   │   ├── accelerator.py
│   │   ├── basemodel.py
│   │   └── decorators.py
│   ├── requirements.txt
│   └── post.py
├── tests
│   └── audiotools
│       ├── core
│       │   ├── test_audio_signal.py
│       │   ├── test_bands.py
│       │   ├── test_display.py
│       │   ├── test_dsp.py
│       │   ├── test_effects.py
│       │   ├── test_fftconv.py
│       │   ├── test_grad.py
│       │   ├── test_highpass.py
│       │   ├── test_loudness.py
│       │   ├── test_lowpass.py
│       │   └── test_util.py
│       ├── data
│       │   ├── test_datasets.py
│       │   ├── test_preprocess.py
│       │   └── test_transforms.py
│       ├── ml
│       │   ├── test_decorators.py
│       │   └── test_model.py
│       └── test_post.py

```

- **core**: Contains the core class AudioSignal, which is responsible for the fundamental representation and manipulation of audio signals.

- **data**: Primarily dedicated to storing and processing datasets, including classes and functions for data preprocessing, ensuring efficient loading and transformation of audio data.

- **metrics**: Implements functions for various audio evaluation metrics, enabling precise assessment of the performance of audio models and processing algorithms.

- **ml**: Comprises classes and methods related to model training, supporting the construction, training, and optimization of machine learning models in the context of audio.

This project aims to provide developers and researchers with an efficient and flexible framework to foster innovation and exploration across various domains of audio technology.


================================================
FILE: paddlespeech/audiotools/__init__.py
================================================
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import metrics
from . import ml
from . import post
from .core import AudioSignal
from .core import highpass_filter
from .core import highpass_filters
from .core import Meter
from .core import STFTParams
from .core import util
from .data import datasets
from .data import preprocess
from .data import transforms


================================================
FILE: paddlespeech/audiotools/core/__init__.py
================================================
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import util
from ...t2s.modules import fft_conv1d
from ...t2s.modules import FFTConv1D
from ._julius import highpass_filter
from ._julius import highpass_filters
from ._julius import lowpass_filter
from ._julius import LowPassFilter
from ._julius import LowPassFilters
from ._julius import pure_tone
from ._julius import resample_frac
from ._julius import split_bands
from ._julius import SplitBands
from .audio_signal import AudioSignal
from .audio_signal import STFTParams
from .loudness import Meter


================================================
FILE: paddlespeech/audiotools/core/_julius.py
================================================
# MIT License, Copyright (c) 2020 Alexandre Défossez.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
# 
# Modified from julius(https://github.com/adefossez/julius/tree/main/julius)
"""
Implementation of a FFT based 1D convolution in PaddlePaddle.
While FFT is used in some cases for small kernel sizes, it is not the default for long ones, e.g. 512.
This module implements efficient FFT based convolutions for such cases. A typical
application is for evaluating FIR filters with a long receptive field, typically
evaluated with a stride of 1.
"""
import inspect
import math
import sys
import typing
from typing import Optional
from typing import Sequence

import paddle
import paddle.nn as nn
import paddle.nn.functional as F

from paddlespeech.utils import satisfy_paddle_version

__all__ = [
    'highpass_filter', 'highpass_filters', 'lowpass_filter', 'LowPassFilter',
    'LowPassFilters', 'pure_tone', 'resample_frac', 'split_bands', 'SplitBands'
]


def simple_repr(obj, attrs: Optional[Sequence[str]]=None, overrides: dict={}):
    """
    Return a simple representation string for `obj`.
    If `attrs` is not None, it should be a list of attributes to include.
    """
    params = inspect.signature(obj.__class__).parameters
    attrs_repr = []
    if attrs is None:
        attrs = list(params.keys())
    for attr in attrs:
        display = False
        if attr in overrides:
            value = overrides[attr]
        elif hasattr(obj, attr):
            value = getattr(obj, attr)
        else:
            continue
        if attr in params:
            param = params[attr]
            if param.default is inspect._empty or value != param.default:  # type: ignore
                display = True
        else:
            display = True

        if display:
            attrs_repr.append(f"{attr}={value}")
    return f"{obj.__class__.__name__}({','.join(attrs_repr)})"


def sinc(x: paddle.Tensor):
    """
    Implementation of sinc, i.e. sin(x) / x

    __Warning__: the input is not multiplied by `pi`!
    """
    return paddle.where(
        x == 0,
        paddle.to_tensor(1.0, dtype=x.dtype, place=x.place),
        paddle.sin(x) / x, )


class ResampleFrac(paddle.nn.Layer):
    """
    Resampling from the sample rate `old_sr` to `new_sr`.
    """

    def __init__(self,
                 old_sr: int,
                 new_sr: int,
                 zeros: int=24,
                 rolloff: float=0.945):
        """
        Args:
            old_sr (int): sample rate of the input signal x.
            new_sr (int): sample rate of the output.
            zeros (int): number of zero crossing to keep in the sinc filter.
            rolloff (float): use a lowpass filter that is `rolloff * new_sr / 2`,
                to ensure sufficient margin due to the imperfection of the FIR filter used.
                Lowering this value will reduce anti-aliasing, but will reduce some of the
                highest frequencies.

        Shape:

            - Input: `[*, T]`
            - Output: `[*, T']` with `T' = int(new_sr * T / old_sr)`


        .. caution::
            After dividing `old_sr` and `new_sr` by their GCD, both should be small
            for this implementation to be fast.

        >>> import paddle
        >>> resample = ResampleFrac(4, 5)
        >>> x = paddle.randn([1000])
        >>> print(len(resample(x)))
        1250
        """
        super().__init__()
        if not isinstance(old_sr, int) or not isinstance(new_sr, int):
            raise ValueError("old_sr and new_sr should be integers")
        gcd = math.gcd(old_sr, new_sr)
        self.old_sr = old_sr // gcd
        self.new_sr = new_sr // gcd
        self.zeros = zeros
        self.rolloff = rolloff

        self._init_kernels()

    def _init_kernels(self):
        if self.old_sr == self.new_sr:
            return

        kernels = []
        sr = min(self.new_sr, self.old_sr)
        sr *= self.rolloff

        self._width = math.ceil(self.zeros * self.old_sr / sr)
        idx = paddle.arange(
            -self._width, self._width + self.old_sr, dtype="float32")
        for i in range(self.new_sr):
            t = (-i / self.new_sr + idx / paddle.full(idx.shape, self.old_sr)
                 ) * sr
            t = paddle.clip(t, -self.zeros, self.zeros)
            t *= math.pi
            window = paddle.cos(t / self.zeros / 2)**2
            kernel = sinc(t) * window
            # Renormalize kernel to ensure a constant signal is preserved.
            kernel = kernel / kernel.sum()
            kernels.append(kernel)

        _kernel = paddle.stack(kernels).reshape([self.new_sr, 1, -1])
        self.kernel = self.create_parameter(
            shape=_kernel.shape,
            dtype=_kernel.dtype, )
        self.kernel.set_value(_kernel)

    def forward(
            self,
            x: paddle.Tensor,
            output_length: Optional[int]=None,
            full: bool=False, ):
        """
        Resample x.
        Args:
            x (Tensor): signal to resample, time should be the last dimension
            output_length (None or int): This can be set to the desired output length
                (last dimension). Allowed values are between 0 and
                ceil(length * new_sr / old_sr). When None (default) is specified, the
                floored output length will be used. In order to select the largest possible
                size, use the `full` argument.
            full (bool): return the longest possible output from the input. This can be useful
                if you chain resampling operations, and want to give the `output_length` only
                for the last one, while passing `full=True` to all the other ones.
        """
        if self.old_sr == self.new_sr:
            return x
        shape = x.shape
        _dtype = x.dtype
        length = x.shape[-1]
        x = x.reshape([-1, length])
        x = F.pad(
            x.unsqueeze(1),
            [self._width, self._width + self.old_sr],
            mode="replicate",
            data_format="NCL", ).astype(self.kernel.dtype)
        ys = F.conv1d(x, self.kernel, stride=self.old_sr, data_format="NCL")
        y = ys.transpose(
            [0, 2, 1]).reshape(list(shape[:-1]) + [-1]).astype(_dtype)

        float_output_length = paddle.to_tensor(
            self.new_sr * length / self.old_sr, dtype="float32")
        max_output_length = paddle.ceil(float_output_length).astype("int64")
        default_output_length = paddle.floor(float_output_length).astype(
            "int64")

        if output_length is None:
            applied_output_length = (max_output_length
                                     if full else default_output_length)
        elif output_length < 0 or output_length > max_output_length:
            raise ValueError(
                f"output_length must be between 0 and {max_output_length.numpy()}"
            )
        else:
            applied_output_length = paddle.to_tensor(
                output_length, dtype="int64")
            if full:
                raise ValueError(
                    "You cannot pass both full=True and output_length")
        return y[..., :applied_output_length]

    def __repr__(self):
        return simple_repr(self)


def resample_frac(
        x: paddle.Tensor,
        old_sr: int,
        new_sr: int,
        zeros: int=24,
        rolloff: float=0.945,
        output_length: Optional[int]=None,
        full: bool=False, ):
    """
    Functional version of `ResampleFrac`, refer to its documentation for more information.

    ..warning::
        If you call repeatidly this functions with the same sample rates, then the
        resampling kernel will be recomputed everytime. For best performance, you should use
        and cache an instance of `ResampleFrac`.
    """
    return ResampleFrac(old_sr, new_sr, zeros, rolloff)(x, output_length, full)


def pad_to(tensor: paddle.Tensor,
           target_length: int,
           mode: str="constant",
           value: float=0.0):
    """
    Pad the given tensor to the given length, with 0s on the right.
    """
    return F.pad(
        tensor, (0, target_length - tensor.shape[-1]),
        mode=mode,
        value=value,
        data_format="NCL")


def pure_tone(freq: float, sr: float=128, dur: float=4, device=None):
    """
    Return a pure tone, i.e. cosine.

    Args:
        freq (float): frequency (in Hz)
        sr (float): sample rate (in Hz)
        dur (float): duration (in seconds)
    """
    time = paddle.arange(int(sr * dur), dtype="float32") / sr
    return paddle.cos(2 * math.pi * freq * time)


class LowPassFilters(nn.Layer):
    """
    Bank of low pass filters.
    """

    def __init__(self,
                 cutoffs: Sequence[float],
                 stride: int=1,
                 pad: bool=True,
                 zeros: float=8,
                 fft: Optional[bool]=None,
                 dtype="float32"):
        super().__init__()
        self.cutoffs = list(cutoffs)
        if min(self.cutoffs) < 0:
            raise ValueError("Minimum cutoff must be larger than zero.")
        if max(self.cutoffs) > 0.5:
            raise ValueError("A cutoff above 0.5 does not make sense.")
        self.stride = stride
        self.pad = pad
        self.zeros = zeros
        self.half_size = int(zeros / min([c for c in self.cutoffs if c > 0]) /
                             2)
        if fft is None:
            fft = self.half_size > 32
        self.fft = fft

        # Create filters
        window = paddle.audio.functional.get_window(
            "hann", 2 * self.half_size + 1, fftbins=False, dtype=dtype)
        time = paddle.arange(
            -self.half_size, self.half_size + 1, dtype="float32")
        filters = []
        for cutoff in cutoffs:
            if cutoff == 0:
                filter_ = paddle.zeros_like(time)
            else:
                filter_ = 2 * cutoff * window * sinc(2 * cutoff * math.pi *
                                                     time)
                # Normalize filter
                filter_ /= paddle.sum(filter_)
            filters.append(filter_)
        filters = paddle.stack(filters)[:, None]
        self.filters = self.create_parameter(
            shape=filters.shape,
            default_initializer=nn.initializer.Constant(value=0.0),
            dtype="float32",
            is_bias=False,
            attr=paddle.ParamAttr(trainable=False), )
        self.filters.set_value(filters)

    def forward(self, _input):
        shape = list(_input.shape)
        _input = _input.reshape([-1, 1, shape[-1]])
        if self.pad:
            _input = F.pad(
                _input, (self.half_size, self.half_size),
                mode="replicate",
                data_format="NCL")
        if self.fft:
            from paddlespeech.t2s.modules import fft_conv1d
            out = fft_conv1d(_input, self.filters, stride=self.stride)
        else:
            out = F.conv1d(_input, self.filters, stride=self.stride)

        shape.insert(0, len(self.cutoffs))
        shape[-1] = out.shape[-1]
        return out.transpose([1, 0, 2]).reshape(shape)


class LowPassFilter(nn.Layer):
    """
    Same as `LowPassFilters` but applies a single low pass filter.
    """

    def __init__(self,
                 cutoff: float,
                 stride: int=1,
                 pad: bool=True,
                 zeros: float=8,
                 fft: Optional[bool]=None):
        super().__init__()
        self._lowpasses = LowPassFilters([cutoff], stride, pad, zeros, fft)

    @property
    def cutoff(self):
        return self._lowpasses.cutoffs[0]

    @property
    def stride(self):
        return self._lowpasses.stride

    @property
    def pad(self):
        return self._lowpasses.pad

    @property
    def zeros(self):
        return self._lowpasses.zeros

    @property
    def fft(self):
        return self._lowpasses.fft

    def forward(self, _input):
        return self._lowpasses(_input)[0]


def lowpass_filters(
        _input: paddle.Tensor,
        cutoffs: Sequence[float],
        stride: int=1,
        pad: bool=True,
        zeros: float=8,
        fft: Optional[bool]=None, ):
    """
    Functional version of `LowPassFilters`, refer to this class for more information.
    """
    return LowPassFilters(cutoffs, stride, pad, zeros, fft)(_input)


def lowpass_filter(_input: paddle.Tensor,
                   cutoff: float,
                   stride: int=1,
                   pad: bool=True,
                   zeros: float=8,
                   fft: Optional[bool]=None):
    """
    Same as `lowpass_filters` but with a single cutoff frequency.
    Output will not have a dimension inserted in the front.
    """
    return lowpass_filters(_input, [cutoff], stride, pad, zeros, fft)[0]


class HighPassFilters(paddle.nn.Layer):
    """
    Bank of high pass filters. See `julius.lowpass.LowPassFilters` for more
    details on the implementation.

    Args:
        cutoffs (list[float]): list of cutoff frequencies, in [0, 0.5] expressed as `f/f_s` where
            f_s is the samplerate and `f` is the cutoff frequency.
            The upper limit is 0.5, because a signal sampled at `f_s` contains only
            frequencies under `f_s / 2`.
        stride (int): how much to decimate the output. Probably not a good idea
            to do so with a high pass filters though...
        pad (bool): if True, appropriately pad the _input with zero over the edge. If `stride=1`,
            the output will have the same length as the _input.
        zeros (float): Number of zero crossings to keep.
            Controls the receptive field of the Finite Impulse Response filter.
            For filters with low cutoff frequency, e.g. 40Hz at 44.1kHz,
            it is a bad idea to set this to a high value.
            This is likely appropriate for most use. Lower values
            will result in a faster filter, but with a slower attenuation around the
            cutoff frequency.
        fft (bool or None): if True, uses `julius.fftconv` rather than PyTorch convolutions.
            If False, uses PyTorch convolutions. If None, either one will be chosen automatically
            depending on the effective filter size.


    ..warning::
        All the filters will use the same filter size, aligned on the lowest
        frequency provided. If you combine a lot of filters with very diverse frequencies, it might
        be more efficient to split them over multiple modules with similar frequencies.

    Shape:

        - Input: `[*, T]`
        - Output: `[F, *, T']`, with `T'=T` if `pad` is True and `stride` is 1, and
            `F` is the numer of cutoff frequencies.

    >>> highpass = HighPassFilters([1/4])
    >>> x = paddle.randn([4, 12, 21, 1024])
    >>> list(highpass(x).shape)
    [1, 4, 12, 21, 1024]
    """

    def __init__(self,
                 cutoffs: Sequence[float],
                 stride: int=1,
                 pad: bool=True,
                 zeros: float=8,
                 fft: Optional[bool]=None):
        super().__init__()
        self._lowpasses = LowPassFilters(cutoffs, stride, pad, zeros, fft)

    @property
    def cutoffs(self):
        return self._lowpasses.cutoffs

    @property
    def stride(self):
        return self._lowpasses.stride

    @property
    def pad(self):
        return self._lowpasses.pad

    @property
    def zeros(self):
        return self._lowpasses.zeros

    @property
    def fft(self):
        return self._lowpasses.fft

    def forward(self, _input):
        lows = self._lowpasses(_input)

        # We need to extract the right portion of the _input in case
        # pad is False or stride > 1
        if self.pad:
            start, end = 0, _input.shape[-1]
        else:
            start = self._lowpasses.half_size
            end = -start
        _input = _input[..., start:end:self.stride]
        highs = _input - lows
        return highs


class HighPassFilter(paddle.nn.Layer):
    """
    Same as `HighPassFilters` but applies a single high pass filter.

    Shape:

        - Input: `[*, T]`
        - Output: `[*, T']`, with `T'=T` if `pad` is True and `stride` is 1.

    >>> highpass = HighPassFilter(1/4, stride=1)
    >>> x = paddle.randn([4, 124])
    >>> list(highpass(x).shape)
    [4, 124]
    """

    def __init__(self,
                 cutoff: float,
                 stride: int=1,
                 pad: bool=True,
                 zeros: float=8,
                 fft: Optional[bool]=None):
        super().__init__()
        self._highpasses = HighPassFilters([cutoff], stride, pad, zeros, fft)

    @property
    def cutoff(self):
        return self._highpasses.cutoffs[0]

    @property
    def stride(self):
        return self._highpasses.stride

    @property
    def pad(self):
        return self._highpasses.pad

    @property
    def zeros(self):
        return self._highpasses.zeros

    @property
    def fft(self):
        return self._highpasses.fft

    def forward(self, _input):
        return self._highpasses(_input)[0]


def highpass_filters(
        _input: paddle.Tensor,
        cutoffs: Sequence[float],
        stride: int=1,
        pad: bool=True,
        zeros: float=8,
        fft: Optional[bool]=None, ):
    """
    Functional version of `HighPassFilters`, refer to this class for more information.
    """
    return HighPassFilters(cutoffs, stride, pad, zeros, fft)(_input)


def highpass_filter(_input: paddle.Tensor,
                    cutoff: float,
                    stride: int=1,
                    pad: bool=True,
                    zeros: float=8,
                    fft: Optional[bool]=None):
    """
    Functional version of `HighPassFilter`, refer to this class for more information.
    Output will not have a dimension inserted in the front.
    """
    return highpass_filters(_input, [cutoff], stride, pad, zeros, fft)[0]


class SplitBands(paddle.nn.Layer):
    """
    Decomposes a signal over the given frequency bands in the waveform domain using
    a cascade of low pass filters as implemented by `julius.lowpass.LowPassFilters`.
    You can either specify explicitly the frequency cutoffs, or just the number of bands,
    in which case the frequency cutoffs will be spread out evenly in mel scale.

    Args:
        sample_rate (float): Sample rate of the input signal in Hz.
        n_bands (int or None): number of bands, when not giving them explicitly with `cutoffs`.
            In that case, the cutoff frequencies will be evenly spaced in mel-space.
        cutoffs (list[float] or None): list of frequency cutoffs in Hz.
        pad (bool): if True, appropriately pad the input with zero over the edge. If `stride=1`,
            the output will have the same length as the input.
        zeros (float): Number of zero crossings to keep. See `LowPassFilters` for more informations.
        fft (bool or None): See `LowPassFilters` for more info.

    ..note::
        The sum of all the bands will always be the input signal.

    ..warning::
        Unlike `julius.lowpass.LowPassFilters`, the cutoffs frequencies must be provided in Hz along
        with the sample rate.

    Shape:

        - Input: `[*, T]`
        - Output: `[B, *, T']`, with `T'=T` if `pad` is True.
            If `n_bands` was provided, `B = n_bands` otherwise `B = len(cutoffs) + 1`

    >>> bands = SplitBands(sample_rate=128, n_bands=10)
    >>> x = paddle.randn(shape=[6, 4, 1024])
    >>> list(bands(x).shape)
    [10, 6, 4, 1024]
    """

    def __init__(
            self,
            sample_rate: float,
            n_bands: Optional[int]=None,
            cutoffs: Optional[Sequence[float]]=None,
            pad: bool=True,
            zeros: float=8,
            fft: Optional[bool]=None, ):
        super().__init__()
        if (cutoffs is None) + (n_bands is None) != 1:
            raise ValueError(
                "You must provide either n_bands, or cutoffs, but not both.")

        self.sample_rate = sample_rate
        self.n_bands = n_bands
        self._cutoffs = list(cutoffs) if cutoffs is not None else None
        self.pad = pad
        self.zeros = zeros
        self.fft = fft

        if cutoffs is None:
            if n_bands is None:
                raise ValueError("You must provide one of n_bands or cutoffs.")
            if not n_bands >= 1:
                raise ValueError(
                    f"n_bands must be greater than one (got {n_bands})")
            cutoffs = paddle.audio.functional.mel_frequencies(
                n_bands + 1, 0, sample_rate / 2)[1:-1]
        else:
            if max(cutoffs) > 0.5 * sample_rate:
                raise ValueError(
                    "A cutoff above sample_rate/2 does not make sense.")
        if len(cutoffs) > 0:
            self.lowpass = LowPassFilters(
                [c / sample_rate for c in cutoffs],
                pad=pad,
                zeros=zeros,
                fft=fft)
        else:
            self.lowpass = None  # type: ignore

    def forward(self, input):
        if self.lowpass is None:
            return input[None]
        lows = self.lowpass(input)
        low = lows[0]
        bands = [low]
        for low_and_band in lows[1:]:
            # Get a bandpass filter by subtracting lowpasses
            band = low_and_band - low
            bands.append(band)
            low = low_and_band
        # Last band is whatever is left in the signal
        bands.append(input - low)
        return paddle.stack(bands)

    @property
    def cutoffs(self):
        if self._cutoffs is not None:
            return self._cutoffs
        elif self.lowpass is not None:
            return [c * self.sample_rate for c in self.lowpass.cutoffs]
        else:
            return []


def split_bands(
        signal: paddle.Tensor,
        sample_rate: float,
        n_bands: Optional[int]=None,
        cutoffs: Optional[Sequence[float]]=None,
        pad: bool=True,
        zeros: float=8,
        fft: Optional[bool]=None, ):
    """
    Functional version of `SplitBands`, refer to this class for more information.

    >>> x = paddle.randn(shape=[6, 4, 1024])
    >>> list(split_bands(x, sample_rate=64, cutoffs=[12, 24]).shape)
    [3, 6, 4, 1024]
    """
    return SplitBands(sample_rate, n_bands, cutoffs, pad, zeros, fft)(signal)


================================================
FILE: paddlespeech/audiotools/core/audio_signal.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
# 
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/core/audio_signal.py)
import copy
import functools
import hashlib
import math
import pathlib
import tempfile
import typing
import warnings
from collections import namedtuple
from pathlib import Path
from typing import Optional

import librosa
import numpy as np
import paddle
import soundfile

from . import util
from ._julius import resample_frac
from .display import DisplayMixin
from .dsp import DSPMixin
from .effects import EffectMixin
from .effects import ImpulseResponseMixin
from .ffmpeg import FFMPEGMixin
from .loudness import LoudnessMixin

__all__ = ['STFTParams', 'AudioSignal']


def create_dct(n_mfcc: int, n_mels: int, norm: Optional[str]) -> paddle.Tensor:
    r"""Create a DCT transformation matrix with shape (``n_mels``, ``n_mfcc``),
    normalized depending on norm.

    Args:
        n_mfcc (int): Number of mfc coefficients to retain
        n_mels (int): Number of mel filterbanks
        norm (str or None): Norm to use (either "ortho" or None)

    Returns:
        paddle.Tensor: The transformation matrix, to be right-multiplied to
        row-wise data of size (``n_mels``, ``n_mfcc``).
    """

    if norm is not None and norm != "ortho":
        raise ValueError('norm must be either "ortho" or None')

    # http://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II
    n = paddle.arange(float(n_mels))
    k = paddle.arange(float(n_mfcc)).unsqueeze([1])
    dct = paddle.cos(math.pi / float(n_mels) * (n + 0.5) *
                     k)  # size (n_mfcc, n_mels)

    if norm is None:
        dct *= 2.0
    else:
        dct[0] *= 1.0 / math.sqrt(2.0)
        dct *= math.sqrt(2.0 / float(n_mels))
    return dct.transpose([1, 0])


STFTParams = namedtuple(
    "STFTParams",
    [
        "window_length",
        "hop_length",
        "window_type",
        "match_stride",
        "padding_type",
    ], )
"""
STFTParams object is a container that holds STFT parameters - window_length,
hop_length, and window_type. Not all parameters need to be specified. Ones that
are not specified will be inferred by the AudioSignal parameters.

Parameters
----------
window_length : int, optional
    Window length of STFT, by default ``0.032 * self.sample_rate``.
hop_length : int, optional
    Hop length of STFT, by default ``window_length // 4``.
window_type : str, optional
    Type of window to use, by default ``sqrt\_hann``.
match_stride : bool, optional
    Whether to match the stride of convolutional layers, by default False
padding_type : str, optional
    Type of padding to use, by default 'reflect'
"""
STFTParams.__new__.__defaults__ = (None, None, None, None, None)


class AudioSignal(
        EffectMixin,
        LoudnessMixin,
        ImpulseResponseMixin,
        DSPMixin,
        DisplayMixin,
        FFMPEGMixin, ):
    """This is the core object of this library. Audio is always
    loaded into an AudioSignal, which then enables all the features
    of this library, including audio augmentations, I/O, playback,
    and more.

    The structure of this object is that the base functionality
    is defined in ``core/audio_signal.py``, while extensions to
    that functionality are defined in the other ``core/*.py``
    files. For example, all the display-based functionality
    (e.g. plot spectrograms, waveforms, write to tensorboard)
    are in ``core/display.py``.

    Parameters
    ----------
    audio_path_or_array : typing.Union[paddle.Tensor, str, Path, np.ndarray]
        Object to create AudioSignal from. Can be a tensor, numpy array,
        or a path to a file. The file is always reshaped to
    sample_rate : int, optional
        Sample rate of the audio. If different from underlying file, resampling is
        performed. If passing in an array or tensor, this must be defined,
        by default None
    stft_params : STFTParams, optional
        Parameters of STFT to use. , by default None
    offset : float, optional
        Offset in seconds to read from file, by default 0
    duration : float, optional
        Duration in seconds to read from file, by default None
    device : str, optional
        Device to load audio onto, by default None

    Examples
    --------
    Loading an AudioSignal from an array, at a sample rate of
    44100.

    >>> signal = AudioSignal(paddle.randn([5*44100]), 44100)

    Note, the signal is reshaped to have a batch size, and one
    audio channel:

    >>> print(signal.shape)
    (1, 1, 44100)

    You can treat AudioSignals like tensors, and many of the same
    functions you might use on tensors are defined for AudioSignals
    as well:

    >>> signal.to("cuda")
    >>> signal.cuda()
    >>> signal.clone()
    >>> signal.detach()

    Indexing AudioSignals returns an AudioSignal:

    >>> signal[..., 3*44100:4*44100]

    The above signal is 1 second long, and is also an AudioSignal.
    """

    def __init__(
            self,
            audio_path_or_array: typing.Union[paddle.Tensor, str, Path,
                                              np.ndarray],
            sample_rate: int=None,
            stft_params: STFTParams=None,
            offset: float=0,
            duration: float=None,
            device: str=None, ):
        # 
        audio_path = None
        audio_array = None

        if isinstance(audio_path_or_array, str):
            audio_path = audio_path_or_array
        elif isinstance(audio_path_or_array, pathlib.Path):
            audio_path = audio_path_or_array
        elif isinstance(audio_path_or_array, np.ndarray):
            audio_array = audio_path_or_array
        elif paddle.is_tensor(audio_path_or_array):
            audio_array = audio_path_or_array
        else:
            raise ValueError("audio_path_or_array must be either a Path, "
                             "string, numpy array, or paddle Tensor!")

        self.path_to_file = None

        self.audio_data = None
        self.sources = None  # List of AudioSignal objects.
        self.stft_data = None
        if audio_path is not None:
            self.load_from_file(
                audio_path, offset=offset, duration=duration, device=device)
        elif audio_array is not None:
            assert sample_rate is not None, "Must set sample rate!"
            self.load_from_array(audio_array, sample_rate, device=device)

        self.window = None
        self.stft_params = stft_params

        self.metadata = {
            "offset": offset,
            "duration": duration,
        }

    @property
    def path_to_input_file(
            self, ):
        """
        Path to input file, if it exists.
        Alias to ``path_to_file`` for backwards compatibility
        """
        return self.path_to_file

    @classmethod
    def excerpt(
            cls,
            audio_path: typing.Union[str, Path],
            offset: float=None,
            duration: float=None,
            state: typing.Union[np.random.RandomState, int]=None,
            **kwargs, ):
        """Randomly draw an excerpt of ``duration`` seconds from an
        audio file specified at ``audio_path``, between ``offset`` seconds
        and end of file. ``state`` can be used to seed the random draw.

        Parameters
        ----------
        audio_path : typing.Union[str, Path]
            Path to audio file to grab excerpt from.
        offset : float, optional
            Lower bound for the start time, in seconds drawn from
            the file, by default None.
        duration : float, optional
            Duration of excerpt, in seconds, by default None
        state : typing.Union[np.random.RandomState, int], optional
            RandomState or seed of random state, by default None

        Returns
        -------
        AudioSignal
            AudioSignal containing excerpt.

        Examples
        --------
        >>> signal = AudioSignal.excerpt("path/to/audio", duration=5)
        """
        info = util.info(audio_path)
        total_duration = info.duration

        state = util.random_state(state)
        lower_bound = 0 if offset is None else offset
        upper_bound = max(total_duration - duration, 0)
        offset = state.uniform(lower_bound, upper_bound)

        signal = cls(audio_path, offset=offset, duration=duration, **kwargs)
        signal.metadata["offset"] = offset
        signal.metadata["duration"] = duration

        return signal

    @classmethod
    def salient_excerpt(
            cls,
            audio_path: typing.Union[str, Path],
            loudness_cutoff: float=None,
            num_tries: int=8,
            state: typing.Union[np.random.RandomState, int]=None,
            **kwargs, ):
        """Similar to AudioSignal.excerpt, except it extracts excerpts only
        if they are above a specified loudness threshold, which is computed via
        a fast LUFS routine.

        Parameters
        ----------
        audio_path : typing.Union[str, Path]
            Path to audio file to grab excerpt from.
        loudness_cutoff : float, optional
            Loudness threshold in dB. Typical values are ``-40, -60``,
            etc, by default None
        num_tries : int, optional
            Number of tries to grab an excerpt above the threshold
            before giving up, by default 8.
        state : typing.Union[np.random.RandomState, int], optional
            RandomState or seed of random state, by default None
        kwargs : dict
            Keyword arguments to AudioSignal.excerpt

        Returns
        -------
        AudioSignal
            AudioSignal containing excerpt.


        .. warning::
            if ``num_tries`` is set to None, ``salient_excerpt`` may try forever, which can
            result in an infinite loop if ``audio_path`` does not have
            any loud enough excerpts.

        Examples
        --------
        >>> signal = AudioSignal.salient_excerpt(
                "path/to/audio",
                loudness_cutoff=-40,
                duration=5
            )
        """
        state = util.random_state(state)
        if loudness_cutoff is None:
            excerpt = cls.excerpt(audio_path, state=state, **kwargs)
        else:
            loudness = -np.inf
            num_try = 0
            while loudness <= loudness_cutoff:
                excerpt = cls.excerpt(audio_path, state=state, **kwargs)
                loudness = excerpt.loudness()
                num_try += 1
                if num_tries is not None and num_try >= num_tries:
                    break
        return excerpt

    @classmethod
    def zeros(
            cls,
            duration: float,
            sample_rate: int,
            num_channels: int=1,
            batch_size: int=1,
            **kwargs, ):
        """Helper function create an AudioSignal of all zeros.

        Parameters
        ----------
        duration : float
            Duration of AudioSignal
        sample_rate : int
            Sample rate of AudioSignal
        num_channels : int, optional
            Number of channels, by default 1
        batch_size : int, optional
            Batch size, by default 1

        Returns
        -------
        AudioSignal
            AudioSignal containing all zeros.

        Examples
        --------
        Generate 5 seconds of all zeros at a sample rate of 44100.

        >>> signal = AudioSignal.zeros(5.0, 44100)
        """
        n_samples = int(duration * sample_rate)
        return cls(
            paddle.zeros([batch_size, num_channels, n_samples]),
            sample_rate,
            **kwargs, )

    @classmethod
    def wave(
            cls,
            frequency: float,
            duration: float,
            sample_rate: int,
            num_channels: int=1,
            shape: str="sine",
            **kwargs, ):
        """
        Generate a waveform of a given frequency and shape.

        Parameters
        ----------
        frequency : float
            Frequency of the waveform
        duration : float
            Duration of the waveform
        sample_rate : int
            Sample rate of the waveform
        num_channels : int, optional
            Number of channels, by default 1
        shape : str, optional
            Shape of the waveform, by default "saw"
            One of "sawtooth", "square", "sine", "triangle"
        kwargs : dict
            Keyword arguments to AudioSignal
        """
        n_samples = int(duration * sample_rate)
        t = np.linspace(0, duration, n_samples)
        if shape == "sawtooth":
            from scipy.signal import sawtooth

            wave_data = sawtooth(2 * np.pi * frequency * t, 0.5)
        elif shape == "square":
            from scipy.signal import square

            wave_data = square(2 * np.pi * frequency * t)
        elif shape == "sine":
            wave_data = np.sin(2 * np.pi * frequency * t)
        elif shape == "triangle":
            from scipy.signal import sawtooth

            # frequency is doubled by the abs call, so omit the 2 in 2pi
            wave_data = sawtooth(np.pi * frequency * t, 0.5)
            wave_data = -np.abs(wave_data) * 2 + 1
        else:
            raise ValueError(f"Invalid shape {shape}")

        wave_data = paddle.to_tensor(wave_data, dtype=paddle.float32)
        wave_data = wave_data[None, None].expand([1, num_channels, -1])
        return cls(wave_data, sample_rate, **kwargs)

    @classmethod
    def batch(
            cls,
            audio_signals: list,
            pad_signals: bool=False,
            truncate_signals: bool=False,
            resample: bool=False,
            dim: int=0, ):
        """Creates a batched AudioSignal from a list of AudioSignals.

        Parameters
        ----------
        audio_signals : list[AudioSignal]
            List of AudioSignal objects
        pad_signals : bool, optional
            Whether to pad signals to length of the maximum length
            AudioSignal in the list, by default False
        truncate_signals : bool, optional
            Whether to truncate signals to length of shortest length
            AudioSignal in the list, by default False
        resample : bool, optional
            Whether to resample AudioSignal to the sample rate of
            the first AudioSignal in the list, by default False
        dim : int, optional
            Dimension along which to batch the signals.

        Returns
        -------
        AudioSignal
            Batched AudioSignal.

        Raises
        ------
        RuntimeError
            If not all AudioSignals are the same sample rate, and
            ``resample=False``, an error is raised.
        RuntimeError
            If not all AudioSignals are the same the length, and
            both ``pad_signals=False`` and ``truncate_signals=False``,
            an error is raised.

        Examples
        --------
        Batching a bunch of random signals:

        >>> signal_list = [AudioSignal(paddle.randn([44100]), 44100) for _ in range(10)]
        >>> signal = AudioSignal.batch(signal_list)
        >>> print(signal.shape)
        (10, 1, 44100)

        """
        signal_lengths = [x.signal_length for x in audio_signals]
        sample_rates = [x.sample_rate for x in audio_signals]

        if len(set(sample_rates)) != 1:
            if resample:
                for x in audio_signals:
                    x.resample(sample_rates[0])
            else:
                raise RuntimeError(
                    f"Not all signals had the same sample rate! Got {sample_rates}. "
                    f"All signals must have the same sample rate, or resample must be True. "
                )

        if len(set(signal_lengths)) != 1:
            if pad_signals:
                max_length = max(signal_lengths)
                for x in audio_signals:
                    pad_len = max_length - x.signal_length
                    x.zero_pad(0, pad_len)
            elif truncate_signals:
                min_length = min(signal_lengths)
                for x in audio_signals:
                    x.truncate_samples(min_length)
            else:
                raise RuntimeError(
                    f"Not all signals had the same length! Got {signal_lengths}. "
                    f"All signals must be the same length, or pad_signals/truncate_signals "
                    f"must be True. ")
        # Concatenate along the specified dimension (default 0)
        audio_data = paddle.concat(
            [x.audio_data for x in audio_signals], axis=dim)
        audio_paths = [x.path_to_file for x in audio_signals]

        batched_signal = cls(
            audio_data,
            sample_rate=audio_signals[0].sample_rate, )
        batched_signal.path_to_file = audio_paths
        return batched_signal

    # I/O
    def load_from_file(
            self,
            audio_path: typing.Union[str, Path],
            offset: float,
            duration: float,
            device: str="cpu", ):
        """Loads data from file. Used internally when AudioSignal
        is instantiated with a path to a file.

        Parameters
        ----------
        audio_path : typing.Union[str, Path]
            Path to file
        offset : float
            Offset in seconds
        duration : float
            Duration in seconds
        device : str, optional
            Device to put AudioSignal on, by default "cpu"

        Returns
        -------
        AudioSignal
            AudioSignal loaded from file
        """
        # need `ffmpeg`
        data, sample_rate = librosa.load(
            audio_path,
            offset=offset,
            duration=duration,
            sr=None,
            mono=False, )
        data = util.ensure_tensor(data)
        if data.shape[-1] == 0:
            raise RuntimeError(
                f"Audio file {audio_path} with offset {offset} and duration {duration} is empty!"
            )

        if data.ndim < 2:
            data = data.unsqueeze(0)
        if data.ndim < 3:
            data = data.unsqueeze(0)
        self.audio_data = data

        self.original_signal_length = self.signal_length

        self.sample_rate = sample_rate
        self.path_to_file = audio_path
        return self.to(device)

    def load_from_array(
            self,
            audio_array: typing.Union[paddle.Tensor, np.ndarray],
            sample_rate: int,
            device: str="cpu", ):
        """Loads data from array, reshaping it to be exactly 3
        dimensions. Used internally when AudioSignal is called
        with a tensor or an array.

        Parameters
        ----------
        audio_array : typing.Union[paddle.Tensor, np.ndarray]
            Array/tensor of audio of samples.
        sample_rate : int
            Sample rate of audio
        device : str, optional
            Device to move audio onto, by default "cpu"

        Returns
        -------
        AudioSignal
            AudioSignal loaded from array
        """
        audio_data = util.ensure_tensor(audio_array)

        if str(audio_data.dtype) == paddle.float64:
            audio_data = audio_data.astype("float32")

        if audio_data.ndim < 2:
            audio_data = audio_data.unsqueeze(0)
        if audio_data.ndim < 3:
            audio_data = audio_data.unsqueeze(0)
        self.audio_data = audio_data

        self.original_signal_length = self.signal_length

        self.sample_rate = sample_rate

        return self

    def write(self, audio_path: typing.Union[str, Path]):
        """Writes audio to a file. Only writes the audio
        that is in the very first item of the batch. To write other items
        in the batch, index the signal along the batch dimension
        before writing. After writing, the signal's ``path_to_file``
        attribute is updated to the new path.

        Parameters
        ----------
        audio_path : typing.Union[str, Path]
            Path to write audio to.

        Returns
        -------
        AudioSignal
            Returns original AudioSignal, so you can use this in a fluent
            interface.

        Examples
        --------
        Creating and writing a signal to disk:

        >>> signal = AudioSignal(paddle.randn([10, 1, 44100]), 44100)
        >>> signal.write("/tmp/out.wav")

        Writing a different element of the batch:

        >>> signal[5].write("/tmp/out.wav")

        Using this in a fluent interface:

        >>> signal.write("/tmp/original.wav").low_pass(4000).write("/tmp/lowpass.wav")

        """
        if self.audio_data[0].abs().max() > 1:
            warnings.warn("Audio amplitude > 1 clipped when saving")
        soundfile.write(
            str(audio_path), self.audio_data[0].numpy().T, self.sample_rate)

        self.path_to_file = audio_path
        return self

    def deepcopy(self):
        """Copies the signal and all of its attributes.

        Returns
        -------
        AudioSignal
            Deep copy of the audio signal.
        """
        return copy.deepcopy(self)

    def copy(self):
        """Shallow copy of signal.

        Returns
        -------
        AudioSignal
            Shallow copy of the audio signal.
        """
        return copy.copy(self)

    def clone(self):
        """Clones all tensors contained in the AudioSignal,
        and returns a copy of the signal with everything
        cloned. Useful when using AudioSignal within autograd
        computation graphs.

        Relevant attributes are the stft data, the audio data,
        and the loudness of the file.

        Returns
        -------
        AudioSignal
            Clone of AudioSignal.
        """
        clone = type(self)(
            self.audio_data.clone(),
            self.sample_rate,
            stft_params=self.stft_params, )
        if self.stft_data is not None:
            clone.stft_data = self.stft_data.clone()
        if self._loudness is not None:
            clone._loudness = self._loudness.clone()
        clone.path_to_file = copy.deepcopy(self.path_to_file)
        clone.metadata = copy.deepcopy(self.metadata)
        return clone

    def detach(self):
        """Detaches tensors contained in AudioSignal.

        Relevant attributes are the stft data, the audio data,
        and the loudness of the file.

        Returns
        -------
        AudioSignal
            Same signal, but with all tensors detached.
        """
        if self._loudness is not None:
            self._loudness = self._loudness.detach()
        if self.stft_data is not None:
            self.stft_data = self.stft_data.detach()

        self.audio_data = self.audio_data.detach()
        return self

    def hash(self):
        """Writes the audio data to a temporary file, and then
        hashes it using hashlib. Useful for creating a file
        name based on the audio content.

        Returns
        -------
        str
            Hash of audio data.

        Examples
        --------
        Creating a signal, and writing it to a unique file name:

        >>> signal = AudioSignal(paddle.randn([44100]), 44100)
        >>> hash = signal.hash()
        >>> signal.write(f"{hash}.wav")

        """
        with tempfile.NamedTemporaryFile(suffix=".wav") as f:
            self.write(f.name)
            h = hashlib.sha256()
            b = bytearray(128 * 1024)
            mv = memoryview(b)
            with open(f.name, "rb", buffering=0) as f:
                for n in iter(lambda: f.readinto(mv), 0):
                    h.update(mv[:n])
            file_hash = h.hexdigest()
        return file_hash

    # Signal operations
    def to_mono(self):
        """Converts audio data to mono audio, by taking the mean
        along the channels dimension.

        Returns
        -------
        AudioSignal
            AudioSignal with mean of channels.
        """
        self.audio_data = self.audio_data.mean(1, keepdim=True)
        return self

    def resample(self, sample_rate: int):
        """Resamples the audio, using sinc interpolation. This works on both
        cpu and gpu, and is much faster on gpu.

        Parameters
        ----------
        sample_rate : int
            Sample rate to resample to.

        Returns
        -------
        AudioSignal
            Resampled AudioSignal
        """
        if sample_rate == self.sample_rate:
            return self
        self.audio_data = resample_frac(self.audio_data, self.sample_rate,
                                        sample_rate)
        self.sample_rate = sample_rate
        return self

    # Tensor operations
    def to(self, device: str):
        """Moves all tensors contained in signal to the specified device.

        Parameters
        ----------
        device : str
            Device to move AudioSignal onto. Typical values are
            "gpu", "cpu", or "gpu:x" to specify the nth gpu.

        Returns
        -------
        AudioSignal
            AudioSignal with all tensors moved to specified device.
        """
        if self._loudness is not None:
            self._loudness = util.move_to_device(self._loudness, device)
        if self.stft_data is not None:
            self.stft_data = util.move_to_device(self.stft_data, device)
        if self.audio_data is not None:
            self.audio_data = util.move_to_device(self.audio_data, device)
        return self

    def float(self):
        """Calls ``.float()`` on ``self.audio_data``.

        Returns
        -------
        AudioSignal
        """
        self.audio_data = self.audio_data.astype("float32")
        return self

    def cpu(self):
        """Moves AudioSignal to cpu.

        Returns
        -------
        AudioSignal
        """
        return self.to("cpu")

    def cuda(self):
        """Moves AudioSignal to cuda.

        Returns
        -------
        AudioSignal
        """
        return self.to("gpu")

    def numpy(self):
        """Detaches ``self.audio_data``, moves to cpu, and converts to numpy.

        Returns
        -------
        np.ndarray
            Audio data as a numpy array.
        """
        return self.audio_data.detach().cpu().numpy()

    def zero_pad(self, before: int, after: int):
        """Zero pads the audio_data tensor before and after.

        Parameters
        ----------
        before : int
            How many zeros to prepend to audio.
        after : int
            How many zeros to append to audio.

        Returns
        -------
        AudioSignal
            AudioSignal with padding applied.
        """
        self.audio_data = paddle.nn.functional.pad(
            self.audio_data, (before, after), data_format="NCL")
        return self

    def zero_pad_to(self, length: int, mode: str="after"):
        """Pad with zeros to a specified length, either before or after
        the audio data.

        Parameters
        ----------
        length : int
            Length to pad to
        mode : str, optional
            Whether to prepend or append zeros to signal, by default "after"

        Returns
        -------
        AudioSignal
            AudioSignal with padding applied.
        """
        if mode == "before":
            self.zero_pad(max(length - self.signal_length, 0), 0)
        elif mode == "after":
            self.zero_pad(0, max(length - self.signal_length, 0))
        return self

    def trim(self, before: int, after: int):
        """Trims the audio_data tensor before and after.

        Parameters
        ----------
        before : int
            How many samples to trim from beginning.
        after : int
            How many samples to trim from end.

        Returns
        -------
        AudioSignal
            AudioSignal with trimming applied.
        """
        if after == 0:
            self.audio_data = self.audio_data[..., before:]
        else:
            self.audio_data = self.audio_data[..., before:-after]
        return self

    def truncate_samples(self, length_in_samples: int):
        """Truncate signal to specified length.

        Parameters
        ----------
        length_in_samples : int
            Truncate to this many samples.

        Returns
        -------
        AudioSignal
            AudioSignal with truncation applied.
        """
        self.audio_data = self.audio_data[..., :length_in_samples]
        return self

    @property
    def device(self):
        """Get device that AudioSignal is on.

        Returns
        -------
        paddle.device
            Device that AudioSignal is on.
        """
        if self.audio_data is not None:
            device = self.audio_data.place
        elif self.stft_data is not None:
            device = self.stft_data.place
        return device

    # Properties
    @property
    def audio_data(self):
        """Returns the audio data tensor in the object.

        Audio data is always of the shape
        (batch_size, num_channels, num_samples). If value has less
        than 3 dims (e.g. is (num_channels, num_samples)), then it will
        be reshaped to (1, num_channels, num_samples) - a batch size of 1.

        Parameters
        ----------
        data : typing.Union[paddle.Tensor, np.ndarray]
            Audio data to set.

        Returns
        -------
        paddle.Tensor
            Audio samples.
        """
        return self._audio_data

    @audio_data.setter
    def audio_data(self, data: typing.Union[paddle.Tensor, np.ndarray]):
        if data is not None:
            assert paddle.is_tensor(data), "audio_data should be paddle.Tensor"
            assert data.ndim == 3, "audio_data should be 3-dim (B, C, T)"
        self._audio_data = data
        # Old loudness value not guaranteed to be right, reset it.
        self._loudness = None
        return

    # alias for audio_data
    samples = audio_data

    @property
    def stft_data(self):
        """Returns the STFT data inside the signal. Shape is
        (batch, channels, frequencies, time).

        Returns
        -------
        paddle.Tensor
            Complex spectrogram data.
        """
        return self._stft_data

    @stft_data.setter
    def stft_data(self, data: typing.Union[paddle.Tensor, np.ndarray]):
        if data is not None:
            assert paddle.is_tensor(data) and paddle.is_complex(data)
            if self.stft_data is not None and self.stft_data.shape != data.shape:
                warnings.warn("stft_data changed shape")
        self._stft_data = data
        return

    @property
    def batch_size(self):
        """Batch size of audio signal.

        Returns
        -------
        int
            Batch size of signal.
        """
        return self.audio_data.shape[0]

    @property
    def signal_length(self):
        """Length of audio signal.

        Returns
        -------
        int
            Length of signal in samples.
        """
        return self.audio_data.shape[-1]

    # alias for signal_length
    length = signal_length

    @property
    def shape(self):
        """Shape of audio data.

        Returns
        -------
        tuple
            Shape of audio data.
        """
        return self.audio_data.shape

    @property
    def signal_duration(self):
        """Length of audio signal in seconds.

        Returns
        -------
        float
            Length of signal in seconds.
        """
        return self.signal_length / self.sample_rate

    # alias for signal_duration
    duration = signal_duration

    @property
    def num_channels(self):
        """Number of audio channels.

        Returns
        -------
        int
            Number of audio channels.
        """
        return self.audio_data.shape[1]

    # STFT
    @staticmethod
    @functools.lru_cache(None)
    def get_window(window_type: str, window_length: int, device: str=None):
        """Wrapper around scipy.signal.get_window so one can also get the
        popular sqrt-hann window. This function caches for efficiency
        using functools.lru\_cache.

        Parameters
        ----------
        window_type : str
            Type of window to get
        window_length : int
            Length of the window
        device : str
            Device to put window onto.

        Returns
        -------
        paddle.Tensor
            Window returned by scipy.signal.get_window, as a tensor.
        """
        from scipy import signal

        if window_type == "average":
            window = np.ones(window_length) / window_length
        elif window_type == "sqrt_hann":
            window = np.sqrt(signal.get_window("hann", window_length))
        else:
            window = signal.get_window(window_type, window_length)
        window = paddle.to_tensor(window).astype("float32")
        return window

    @property
    def stft_params(self):
        """Returns STFTParams object, which can be re-used to other
        AudioSignals.

        This property can be set as well. If values are not defined in STFTParams,
        they are inferred automatically from the signal properties. The default is to use
        32ms windows, with 8ms hop length, and the square root of the hann window.

        Returns
        -------
        STFTParams
            STFT parameters for the AudioSignal.

        Examples
        --------
        >>> stft_params = STFTParams(128, 32)
        >>> signal1 = AudioSignal(paddle.randn([44100]), 44100, stft_params=stft_params)
        >>> signal2 = AudioSignal(paddle.randn([44100]), 44100, stft_params=signal1.stft_params)
        >>> signal1.stft_params = STFTParams() # Defaults
        """
        return self._stft_params

    @stft_params.setter
    def stft_params(self, value: STFTParams):
        # 
        default_win_len = int(2**(np.ceil(np.log2(0.032 * self.sample_rate))))
        default_hop_len = default_win_len // 4
        default_win_type = "hann"
        default_match_stride = False
        default_padding_type = "reflect"

        default_stft_params = STFTParams(
            window_length=default_win_len,
            hop_length=default_hop_len,
            window_type=default_win_type,
            match_stride=default_match_stride,
            padding_type=default_padding_type, )._asdict()

        value = value._asdict() if value else default_stft_params

        for key in default_stft_params:
            if value[key] is None:
                value[key] = default_stft_params[key]

        self._stft_params = STFTParams(**value)
        self.stft_data = None

    def compute_stft_padding(self,
                             window_length: int,
                             hop_length: int,
                             match_stride: bool):
        """Compute how the STFT should be padded, based on match\_stride.

        Parameters
        ----------
        window_length : int
            Window length of STFT.
        hop_length : int
            Hop length of STFT.
        match_stride : bool
            Whether or not to match stride, making the STFT have the same alignment as
            convolutional layers.

        Returns
        -------
        tuple
            Amount to pad on either side of audio.
        """
        length = self.signal_length

        if match_stride:
            assert hop_length == window_length // 4, "For match_stride, hop must equal n_fft // 4"
            right_pad = math.ceil(length / hop_length) * hop_length - length
            pad = (window_length - hop_length) // 2
        else:
            right_pad = 0
            pad = 0

        return right_pad, pad

    def stft(
            self,
            window_length: int=None,
            hop_length: int=None,
            window_type: str=None,
            match_stride: bool=None,
            padding_type: str=None, ):
        """Computes the short-time Fourier transform of the audio data,
        with specified STFT parameters.

        Parameters
        ----------
        window_length : int, optional
            Window length of STFT, by default ``0.032 * self.sample_rate``.
        hop_length : int, optional
            Hop length of STFT, by default ``window_length // 4``.
        window_type : str, optional
            Type of window to use, by default ``sqrt\_hann``.
        match_stride : bool, optional
            Whether to match the stride of convolutional layers, by default False
        padding_type : str, optional
            Type of padding to use, by default 'reflect'

        Returns
        -------
        paddle.Tensor
            STFT of audio data.

        Examples
        --------
        Compute the STFT of an AudioSignal:

        >>> signal = AudioSignal(paddle.randn([44100]), 44100)
        >>> signal.stft()

        Vary the window and hop length:

        >>> stft_params_list = [STFTParams(128, 32), STFTParams(512, 128)]
        >>> for stft_params in stft_params_list:
        >>>     signal.stft_params = stft_params
        >>>     signal.stft()

        """
        window_length = self.stft_params.window_length if window_length is None else int(
            window_length)
        hop_length = self.stft_params.hop_length if hop_length is None else int(
            hop_length)
        window_type = self.stft_params.window_type if window_type is None else window_type
        match_stride = self.stft_params.match_stride if match_stride is None else match_stride
        padding_type = self.stft_params.padding_type if padding_type is None else padding_type

        window = self.get_window(window_type, window_length)

        audio_data = self.audio_data
        right_pad, pad = self.compute_stft_padding(window_length, hop_length,
                                                   match_stride)
        audio_data = paddle.nn.functional.pad(
            x=audio_data,
            pad=[pad, pad + right_pad],
            mode="reflect",
            data_format="NCL", )
        stft_data = paddle.signal.stft(
            audio_data.reshape([-1, audio_data.shape[-1]]).astype("float32"),
            n_fft=window_length,
            hop_length=hop_length,
            window=window,
            # return_complex=True,
            center=True, )
        _, nf, nt = stft_data.shape
        stft_data = stft_data.reshape(
            [self.batch_size, self.num_channels, nf, nt])

        if match_stride:
            # Drop first two and last two frames, which are added
            # because of padding. Now num_frames * hop_length = num_samples.
            stft_data = stft_data[..., 2:-2]
        self.stft_data = stft_data

        return stft_data

    def istft(
            self,
            window_length: int=None,
            hop_length: int=None,
            window_type: str=None,
            match_stride: bool=None,
            length: int=None, ):
        """Computes inverse STFT and sets it to audio\_data.

        Parameters
        ----------
        window_length : int, optional
            Window length of STFT, by default ``0.032 * self.sample_rate``.
        hop_length : int, optional
            Hop length of STFT, by default ``window_length // 4``.
        window_type : str, optional
            Type of window to use, by default ``sqrt\_hann``.
        match_stride : bool, optional
            Whether to match the stride of convolutional layers, by default False
        length : int, optional
            Original length of signal, by default None

        Returns
        -------
        AudioSignal
            AudioSignal with istft applied.

        Raises
        ------
        RuntimeError
            Raises an error if stft was not called prior to istft on the signal,
            or if stft_data is not set.
        """
        if self.stft_data is None:
            raise RuntimeError("Cannot do inverse STFT without self.stft_data!")

        window_length = self.stft_params.window_length if window_length is None else int(
            window_length)
        hop_length = self.stft_params.hop_length if hop_length is None else int(
            hop_length)
        window_type = self.stft_params.window_type if window_type is None else window_type
        match_stride = self.stft_params.match_stride if match_stride is None else match_stride

        window = self.get_window(window_type, window_length,
                                 self.stft_data.place)

        nb, nch, nf, nt = self.stft_data.shape
        stft_data = self.stft_data.reshape([nb * nch, nf, nt])
        right_pad, pad = self.compute_stft_padding(window_length, hop_length,
                                                   match_stride)

        if length is None:
            length = self.original_signal_length
            length = length + 2 * pad + right_pad

        if match_stride:
            # Zero-pad the STFT on either side, putting back the frames that were
            # dropped in stft().
            stft_data = paddle.nn.functional.pad(
                stft_data, pad=(2, 2), data_format="NCL")

        audio_data = paddle.signal.istft(
            stft_data,
            n_fft=window_length,
            hop_length=hop_length,
            window=window,
            length=length,
            center=True, )
        audio_data = audio_data.reshape([nb, nch, -1])
        if match_stride:
            audio_data = audio_data[..., pad:-(pad + right_pad)]
        self.audio_data = audio_data

        return self

    @staticmethod
    @functools.lru_cache(None)
    def get_mel_filters(sr: int,
                        n_fft: int,
                        n_mels: int,
                        fmin: float=0.0,
                        fmax: float=None):
        """Create a Filterbank matrix to combine FFT bins into Mel-frequency bins.

        Parameters
        ----------
        sr : int
            Sample rate of audio
        n_fft : int
            Number of FFT bins
        n_mels : int
            Number of mels
        fmin : float, optional
            Lowest frequency, in Hz, by default 0.0
        fmax : float, optional
            Highest frequency, by default None

        Returns
        -------
        np.ndarray [shape=(n_mels, 1 + n_fft/2)]
            Mel transform matrix
        """
        from librosa.filters import mel as librosa_mel_fn

        return librosa_mel_fn(
            sr=sr,
            n_fft=n_fft,
            n_mels=n_mels,
            fmin=fmin,
            fmax=fmax, )

    def mel_spectrogram(
            self,
            n_mels: int=80,
            mel_fmin: float=0.0,
            mel_fmax: float=None,
            **kwargs, ):
        """Computes a Mel spectrogram.

        Parameters
        ----------
        n_mels : int, optional
            Number of mels, by default 80
        mel_fmin : float, optional
            Lowest frequency, in Hz, by default 0.0
        mel_fmax : float, optional
            Highest frequency, by default None
        kwargs : dict, optional
            Keyword arguments to self.stft().

        Returns
        -------
        paddle.Tensor [shape=(batch, channels, mels, time)]
            Mel spectrogram.
        """
        # from paddle.audio.compliance.librosa import melspectrogram
        # # from ..compliance.librosa import melspectrogram
        # return melspectrogram(
        #     x=self.audio_data,
        #     sr=self.sample_rate,
        #     window_size: int=512,
        #     hop_length: int=320,
        #     n_mels: int=64,
        #     fmin: float=50.0,
        #     fmax: Optional[float]=None,
        #     window: str='hann',
        #     center: bool=True,
        #     pad_mode: str='reflect',
        #     power: float=2.0,
        #     to_db: bool=True,
        #     ref: float=1.0,
        #     amin: float=1e-10,
        #     top_db: Optional[float]=None
        # )

        stft = self.stft(**kwargs)
        magnitude = paddle.abs(stft)

        nf = magnitude.shape[2]
        mel_basis = self.get_mel_filters(
            sr=self.sample_rate,
            n_fft=2 * (nf - 1),
            n_mels=n_mels,
            fmin=mel_fmin,
            fmax=mel_fmax, )
        mel_basis = paddle.to_tensor(mel_basis)

        mel_spectrogram = magnitude.transpose([0, 1, 3, 2]) @ mel_basis.T
        mel_spectrogram = mel_spectrogram.transpose([0, 1, 3, 2])
        return mel_spectrogram

    @staticmethod
    @functools.lru_cache(None)
    def get_dct(n_mfcc: int, n_mels: int, norm: str="ortho", device: str=None):
        """Create a discrete cosine transform (DCT) transformation matrix with shape (``n_mels``, ``n_mfcc``),
        it can be normalized depending on norm. For more information about dct:
        http://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II

        Parameters
        ----------
        n_mfcc : int
            Number of mfccs
        n_mels : int
            Number of mels
        norm   : str
            Use "ortho" to get a orthogonal matrix or None, by default "ortho"
        device : str, optional
            Device to load the transformation matrix on, by default None

        Returns
        -------
        paddle.Tensor [shape=(n_mels, n_mfcc)] T
            The dct transformation matrix.
        """

        return create_dct(n_mfcc, n_mels, norm)

    def mfcc(
            self,
            n_mfcc: int=40,
            n_mels: int=80,
            log_offset: float=1e-6,
            **kwargs, ):
        """Computes mel-frequency cepstral coefficients (MFCCs).

        Parameters
        ----------
        n_mfcc : int, optional
            Number of mels, by default 40
        n_mels : int, optional
            Number of mels, by default 80
        log_offset: float, optional
            Small value to prevent numerical issues when trying to compute log(0), by default 1e-6
        kwargs : dict, optional
            Keyword arguments to self.mel_spectrogram(), note that some of them will be used for self.stft()

        Returns
        -------
        paddle.Tensor [shape=(batch, channels, mfccs, time)]
            MFCCs.
        """

        # from paddle.audio.compliance.librosa import mfcc
        # return mfcc(self.audio_data, self.sample_rate, n_mfcc=n_mfcc, n_mels=n_mels)

        mel_spectrogram = self.mel_spectrogram(n_mels, **kwargs)
        mel_spectrogram = paddle.log(mel_spectrogram + log_offset)
        dct_mat = self.get_dct(n_mfcc, n_mels, "ortho", self.device)

        mfcc = mel_spectrogram.transpose([0, 1, 3, 2]) @ dct_mat
        mfcc = mfcc.transpose([0, 1, 3, 2])
        return mfcc

    @property
    def magnitude(self):
        """Computes and returns the absolute value of the STFT, which
        is the magnitude. This value can also be set to some tensor.
        When set, ``self.stft_data`` is manipulated so that its magnitude
        matches what this is set to, and modulated by the phase.

        Returns
        -------
        paddle.Tensor
            Magnitude of STFT.

        Examples
        --------
        >>> signal = AudioSignal(paddle.randn([44100]), 44100)
        >>> magnitude = signal.magnitude # Computes stft if not computed
        >>> magnitude[magnitude < magnitude.mean()] = 0
        >>> signal.magnitude = magnitude
        >>> signal.istft()
        """
        if self.stft_data is None:
            self.stft()
        return paddle.abs(self.stft_data)

    @magnitude.setter
    def magnitude(self, value):
        self.stft_data = value * util.exp_compat(1j * self.phase)
        return

    def log_magnitude(self,
                      ref_value: float=1.0,
                      amin: float=1e-5,
                      top_db: float=80.0):
        """Computes the log-magnitude of the spectrogram.

        Parameters
        ----------
        ref_value : float, optional
            The magnitude is scaled relative to ``ref``: ``20 * log10(S / ref)``.
            Zeros in the output correspond to positions where ``S == ref``,
            by default 1.0
        amin : float, optional
            Minimum threshold for ``S`` and ``ref``, by default 1e-5
        top_db : float, optional
            Threshold the output at ``top_db`` below the peak:
            ``max(10 * log10(S/ref)) - top_db``, by default -80.0

        Returns
        -------
        paddle.Tensor
            Log-magnitude spectrogram
        """
        magnitude = self.magnitude

        amin = amin**2
        log_spec = 10.0 * paddle.log10(magnitude.pow(2).clip(min=amin))
        if paddle.is_tensor(ref_value):
            ref_value = ref_value.item()
        log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value))

        if top_db is not None:
            log_spec = paddle.maximum(log_spec, log_spec.max() - top_db)
        return log_spec

    @property
    def phase(self):
        """Computes and returns the phase of the STFT.
        This value can also be set to some tensor.
        When set, ``self.stft_data`` is manipulated so that its phase
        matches what this is set to, we original magnitudeith th.

        Returns
        -------
        paddle.Tensor
            Phase of STFT.

        Examples
        --------
        >>> signal = AudioSignal(paddle.randn([44100]), 44100)
        >>> phase = signal.phase # Computes stft if not computed
        >>> phase[phase < phase.mean()] = 0
        >>> signal.phase = phase
        >>> signal.istft()
        """
        if self.stft_data is None:
            self.stft()
        return paddle.angle(self.stft_data)

    @phase.setter
    def phase(self, value):
        # 
        self.stft_data = self.magnitude * util.exp_compat(1j * value)
        return

    # Operator overloading
    def __add__(self, other):
        new_signal = self.clone()
        new_signal.audio_data += util._get_value(other)
        return new_signal

    def __iadd__(self, other):
        self.audio_data += util._get_value(other)
        return self

    def __radd__(self, other):
        return self + other

    def __sub__(self, other):
        new_signal = self.clone()
        new_signal.audio_data -= util._get_value(other)
        return new_signal

    def __isub__(self, other):
        self.audio_data -= util._get_value(other)
        return self

    def __mul__(self, other):
        new_signal = self.clone()
        new_signal.audio_data *= util._get_value(other)
        return new_signal

    def __imul__(self, other):
        self.audio_data *= util._get_value(other)
        return self

    def __rmul__(self, other):
        return self * other

    # Representation
    def _info(self):
        # 
        dur = f"{self.signal_duration:0.3f}" if self.signal_duration else "[unknown]"
        info = {
            "duration":
            f"{dur} seconds",
            "batch_size":
            self.batch_size,
            "path":
            self.path_to_file if self.path_to_file else "path unknown",
            "sample_rate":
            self.sample_rate,
            "num_channels": (self.num_channels
                             if self.num_channels else "[unknown]"),
            "audio_data.shape":
            self.audio_data.shape,
            "stft_params":
            self.stft_params,
            "device":
            self.device,
        }

        return info

    def markdown(self):
        """Produces a markdown representation of AudioSignal, in a markdown table.

        Returns
        -------
        str
            Markdown representation of AudioSignal.

        Examples
        --------
        >>> signal = AudioSignal(paddle.randn([44100]), 44100)
        >>> print(signal.markdown())
        | Key | Value
        |---|---
        | duration | 1.000 seconds |
        | batch_size | 1 |
        | path | path unknown |
        | sample_rate | 44100 |
        | num_channels | 1 |
        | audio_data.shape | paddle.Size([1, 1, 44100]) |
        | stft_params | STFTParams(window_length=2048, hop_length=512, window_type='sqrt_hann', match_stride=False) |
        | device | cpu |
        """
        info = self._info()

        FORMAT = "| Key | Value \n" "|---|--- \n"
        for k, v in info.items():
            row = f"| {k} | {v} |\n"
            FORMAT += row
        return FORMAT

    def __str__(self):
        info = self._info()

        desc = ""
        for k, v in info.items():
            desc += f"{k}: {v}\n"
        return desc

    def __rich__(self):
        from rich.table import Table

        info = self._info()

        table = Table(title=f"{self.__class__.__name__}")
        table.add_column("Key", style="green")
        table.add_column("Value", style="cyan")

        for k, v in info.items():
            table.add_row(k, str(v))
        return table

    # Comparison
    def __eq__(self, other):
        for k, v in list(self.__dict__.items()):
            if paddle.is_tensor(v):

                if paddle.is_complex(v):
                    if not np.allclose(
                            v.cpu().numpy(),
                            other.__dict__[k].cpu().numpy(),
                            atol=1e-6):
                        max_error = (v - other.__dict__[k]).abs().max()
                        print(f"Max abs error for {k}: {max_error}")
                        return False
                else:
                    if not paddle.allclose(v, other.__dict__[k], atol=1e-6):
                        max_error = (v - other.__dict__[k]).abs().max()
                        print(f"Max abs error for {k}: {max_error}")
                        return False
        return True

    # Indexing
    def __getitem__(self, key):
        if paddle.is_tensor(key) and key.ndim == 0 and key.item() is True:
            assert self.batch_size == 1
            audio_data = self.audio_data
            _loudness = self._loudness
            stft_data = self.stft_data

        elif isinstance(key, (bool, int, list, slice, tuple)) or (
                paddle.is_tensor(key) and key.ndim <= 1):
            # Indexing only on the batch dimension.
            # Then let's copy over relevant stuff.
            # Future work: make this work for time-indexing
            # as well, using the hop length.
            audio_data = self.audio_data[key]
            _loudness = self._loudness[
                key] if self._loudness is not None else None
            # stft_data = self.stft_data[
            #     key] if self.stft_data is not None else None
            stft_data = util.bool_index_compat(
                self.stft_data, key) if self.stft_data is not None else None

        sources = None

        copy = type(self)(
            audio_data, self.sample_rate, stft_params=self.stft_params)
        copy._loudness = _loudness
        copy._stft_data = stft_data
        copy.sources = sources

        return copy

    def __setitem__(self, key, value):
        if not isinstance(value, type(self)):
            self.audio_data[key] = value
            return

        if paddle.is_tensor(key) and key.ndim == 0 and key.item() is True:
            assert self.batch_size == 1
            self.audio_data = value.audio_data
            self._loudness = value._loudness
            self.stft_data = value.stft_data
            return

        elif isinstance(key, (bool, int, list, slice, tuple)) or (
                paddle.is_tensor(key) and key.ndim <= 1):
            if self.audio_data is not None and value.audio_data is not None:
                self.audio_data[key] = value.audio_data
            if self._loudness is not None and value._loudness is not None:
                if paddle.is_tensor(key) and key.dtype == paddle.bool:
                    # FOR Paddle BOOL Index
                    _key_no_bool = paddle.nonzero(key).flatten()
                    self._loudness[_key_no_bool] = value._loudness
                else:
                    self._loudness[key] = value._loudness
            if self.stft_data is not None and value.stft_data is not None:
                # self.stft_data[key] = value.stft_data
                self.stft_data = util.bool_setitem_compat(self.stft_data, key,
                                                          value.stft_data)
            return

    def __ne__(self, other):
        return not self == other


================================================
FILE: paddlespeech/audiotools/core/display.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/core/display.py)
import inspect
import typing
from functools import wraps

from . import util


def format_figure(func):
    """Decorator for formatting figures produced by the code below.
    See :py:func:`audiotools.core.util.format_figure` for more.

    Parameters
    ----------
    func : Callable
        Plotting function that is decorated by this function.

    """

    @wraps(func)
    def wrapper(*args, **kwargs):
        f_keys = inspect.signature(util.format_figure).parameters.keys()
        f_kwargs = {}
        for k, v in list(kwargs.items()):
            if k in f_keys:
                kwargs.pop(k)
                f_kwargs[k] = v
        func(*args, **kwargs)
        util.format_figure(**f_kwargs)

    return wrapper


class DisplayMixin:
    @format_figure
    def specshow(
            self,
            preemphasis: bool=False,
            x_axis: str="time",
            y_axis: str="linear",
            n_mels: int=128,
            **kwargs, ):
        """Displays a spectrogram, using ``librosa.display.specshow``.

        Parameters
        ----------
        preemphasis : bool, optional
            Whether or not to apply preemphasis, which makes high
            frequency detail easier to see, by default False
        x_axis : str, optional
            How to label the x axis, by default "time"
        y_axis : str, optional
            How to label the y axis, by default "linear"
        n_mels : int, optional
            If displaying a mel spectrogram with ``y_axis = "mel"``,
            this controls the number of mels, by default 128.
        kwargs : dict, optional
            Keyword arguments to :py:func:`audiotools.core.util.format_figure`.
        """
        import librosa
        import librosa.display

        # Always re-compute the STFT data before showing it, in case
        # it changed.
        signal = self.clone()
        signal.stft_data = None

        if preemphasis:
            signal.preemphasis()

        ref = signal.magnitude.max()
        log_mag = signal.log_magnitude(ref_value=ref)

        if y_axis == "mel":
            log_mag = 20 * signal.mel_spectrogram(n_mels).clip(1e-5).log10()
            log_mag -= log_mag.max()

        librosa.display.specshow(
            log_mag.numpy()[0].mean(axis=0),
            x_axis=x_axis,
            y_axis=y_axis,
            sr=signal.sample_rate,
            **kwargs, )

    @format_figure
    def waveplot(self, x_axis: str="time", **kwargs):
        """Displays a waveform plot, using ``librosa.display.waveshow``.

        Parameters
        ----------
        x_axis : str, optional
            How to label the x axis, by default "time"
        kwargs : dict, optional
            Keyword arguments to :py:func:`audiotools.core.util.format_figure`.
        """
        import librosa
        import librosa.display

        audio_data = self.audio_data[0].mean(axis=0)
        audio_data = audio_data.cpu().numpy()

        plot_fn = "waveshow" if hasattr(librosa.display,
                                        "waveshow") else "waveplot"
        wave_plot_fn = getattr(librosa.display, plot_fn)
        wave_plot_fn(audio_data, x_axis=x_axis, sr=self.sample_rate, **kwargs)

    @format_figure
    def wavespec(self, x_axis: str="time", **kwargs):
        """Displays a waveform plot, using ``librosa.display.waveshow``.

        Parameters
        ----------
        x_axis : str, optional
            How to label the x axis, by default "time"
        kwargs : dict, optional
            Keyword arguments to :py:func:`audiotools.core.display.DisplayMixin.specshow`.
        """
        import matplotlib.pyplot as plt
        from matplotlib.gridspec import GridSpec

        gs = GridSpec(6, 1)
        plt.subplot(gs[0, :])
        self.waveplot(x_axis=x_axis)
        plt.subplot(gs[1:, :])
        self.specshow(x_axis=x_axis, **kwargs)

    def write_audio_to_tb(
            self,
            tag: str,
            writer,
            step: int=None,
            plot_fn: typing.Union[typing.Callable, str]="specshow",
            **kwargs, ):
        """Writes a signal and its spectrogram to Tensorboard. Will show up
        under the Audio and Images tab in Tensorboard.

        Parameters
        ----------
        tag : str
            Tag to write signal to (e.g. ``clean/sample_0.wav``). The image will be
            written to the corresponding ``.png`` file (e.g. ``clean/sample_0.png``).
        writer : SummaryWriter
            A SummaryWriter object from PyTorch library.
        step : int, optional
            The step to write the signal to, by default None
        plot_fn : typing.Union[typing.Callable, str], optional
            How to create the image. Set to ``None`` to avoid plotting, by default "specshow"
        kwargs : dict, optional
            Keyword arguments to :py:func:`audiotools.core.display.DisplayMixin.specshow` or
            whatever ``plot_fn`` is set to.
        """
        import matplotlib.pyplot as plt

        audio_data = self.audio_data[0, 0].detach().cpu().numpy()
        sample_rate = self.sample_rate
        writer.add_audio(tag, audio_data, step, sample_rate)

        if plot_fn is not None:
            if isinstance(plot_fn, str):
                plot_fn = getattr(self, plot_fn)
            fig = plt.figure()
            plt.clf()
            plot_fn(**kwargs)
            writer.add_figure(tag.replace("wav", "png"), fig, step)

    def save_image(
            self,
            image_path: str,
            plot_fn: typing.Union[typing.Callable, str]="specshow",
            **kwargs, ):
        """Save AudioSignal spectrogram (or whatever ``plot_fn`` is set to) to
        a specified file.

        Parameters
        ----------
        image_path : str
            Where to save the file to.
        plot_fn : typing.Union[typing.Callable, str], optional
            How to create the image. Set to ``None`` to avoid plotting, by default "specshow"
        kwargs : dict, optional
            Keyword arguments to :py:func:`audiotools.core.display.DisplayMixin.specshow` or
            whatever ``plot_fn`` is set to.
        """
        import matplotlib.pyplot as plt

        if isinstance(plot_fn, str):
            plot_fn = getattr(self, plot_fn)

        plt.clf()
        plot_fn(**kwargs)
        plt.savefig(image_path, bbox_inches="tight", pad_inches=0)
        plt.close()


================================================
FILE: paddlespeech/audiotools/core/dsp.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/core/dsp.py)
import typing

import numpy as np
import paddle

from . import _julius
from . import util


def _unfold(x, kernel_sizes, strides):
    # https://github.com/PaddlePaddle/Paddle/pull/70102

    if 1 == kernel_sizes[0]:
        x_zeros = paddle.zeros_like(x)
        x = paddle.concat([x, x_zeros], axis=2)

        kernel_sizes = [2, kernel_sizes[1]]
        strides = list(strides)

    unfolded = paddle.nn.functional.unfold(
        x,
        kernel_sizes=kernel_sizes,
        strides=strides, )
    if 2 == kernel_sizes[0]:
        unfolded = unfolded[:, :kernel_sizes[1]]
    return unfolded


def _fold(x, output_sizes, kernel_sizes, strides):
    # https://github.com/PaddlePaddle/Paddle/pull/70102

    if 1 == output_sizes[0] and 1 == kernel_sizes[0]:
        x_zeros = paddle.zeros_like(x)
        x = paddle.concat([x, x_zeros], axis=1)

        output_sizes = (2, output_sizes[1])
        kernel_sizes = (2, kernel_sizes[1])

    fold = paddle.nn.functional.fold(
        x,
        output_sizes=output_sizes,
        kernel_sizes=kernel_sizes,
        strides=strides, )
    if 2 == kernel_sizes[0]:
        fold = fold[:, :, :1]
    return fold


class DSPMixin:
    _original_batch_size = None
    _original_num_channels = None
    _padded_signal_length = None

    def _preprocess_signal_for_windowing(self, window_duration, hop_duration):
        self._original_batch_size = self.batch_size
        self._original_num_channels = self.num_channels

        window_length = int(window_duration * self.sample_rate)
        hop_length = int(hop_duration * self.sample_rate)

        if window_length % hop_length != 0:
            factor = window_length // hop_length
            window_length = factor * hop_length

        self.zero_pad(hop_length, hop_length)
        self._padded_signal_length = self.signal_length

        return window_length, hop_length

    def windows(self,
                window_duration: float,
                hop_duration: float,
                preprocess: bool=True):
        """Generator which yields windows of specified duration from signal with a specified
        hop length.

        Parameters
        ----------
        window_duration : float
            Duration of every window in seconds.
        hop_duration : float
            Hop between windows in seconds.
        preprocess : bool, optional
            Whether to preprocess the signal, so that the first sample is in
            the middle of the first window, by default True

        Yields
        ------
        AudioSignal
            Each window is returned as an AudioSignal.
        """
        if preprocess:
            window_length, hop_length = self._preprocess_signal_for_windowing(
                window_duration, hop_duration)

        self.audio_data = self.audio_data.reshape([-1, 1, self.signal_length])

        for b in range(self.batch_size):
            i = 0
            start_idx = i * hop_length
            while True:
                start_idx = i * hop_length
                i += 1
                end_idx = start_idx + window_length
                if end_idx > self.signal_length:
                    break
                yield self[b, ..., start_idx:end_idx]

    def collect_windows(self,
                        window_duration: float,
                        hop_duration: float,
                        preprocess: bool=True):
        """Reshapes signal into windows of specified duration from signal with a specified
        hop length. Window are placed along the batch dimension. Use with
        :py:func:`audiotools.core.dsp.DSPMixin.overlap_and_add` to reconstruct the
        original signal.

        Parameters
        ----------
        window_duration : float
            Duration of every window in seconds.
        hop_duration : float
            Hop between windows in seconds.
        preprocess : bool, optional
            Whether to preprocess the signal, so that the first sample is in
            the middle of the first window, by default True

        Returns
        -------
        AudioSignal
            AudioSignal unfolded with shape ``(nb * nch * num_windows, 1, window_length)``
        """
        if preprocess:
            window_length, hop_length = self._preprocess_signal_for_windowing(
                window_duration, hop_duration)

        # self.audio_data: (nb, nch, nt).
        # unfolded = paddle.nn.functional.unfold(
        #     self.audio_data.reshape([-1, 1, 1, self.signal_length]),
        #     kernel_sizes=(1, window_length),
        #     strides=(1, hop_length),
        # )
        unfolded = _unfold(
            self.audio_data.reshape([-1, 1, 1, self.signal_length]),
            kernel_sizes=(1, window_length),
            strides=(1, hop_length), )
        # unfolded: (nb * nch, window_length, num_windows).
        # -> (nb * nch * num_windows, 1, window_length)
        unfolded = unfolded.transpose([0, 2, 1]).reshape([-1, 1, window_length])
        self.audio_data = unfolded
        return self

    def overlap_and_add(self, hop_duration: float):
        """Function which takes a list of windows and overlap adds them into a
        signal the same length as ``audio_signal``.

        Parameters
        ----------
        hop_duration : float
            How much to shift for each window
            (overlap is window_duration - hop_duration) in seconds.

        Returns
        -------
        AudioSignal
            overlap-and-added signal.
        """
        hop_length = int(hop_duration * self.sample_rate)
        window_length = self.signal_length

        nb, nch = self._original_batch_size, self._original_num_channels

        unfolded = self.audio_data.reshape(
            [nb * nch, -1, window_length]).transpose([0, 2, 1])
        # folded = paddle.nn.functional.fold(
        #     unfolded,
        #     output_sizes=(1, self._padded_signal_length),
        #     kernel_sizes=(1, window_length),
        #     strides=(1, hop_length),
        # )
        folded = _fold(
            unfolded,
            output_sizes=(1, self._padded_signal_length),
            kernel_sizes=(1, window_length),
            strides=(1, hop_length), )

        norm = paddle.ones_like(unfolded)
        # norm = paddle.nn.functional.fold(
        #     norm,
        #     output_sizes=(1, self._padded_signal_length),
        #     kernel_sizes=(1, window_length),
        #     strides=(1, hop_length),
        # )
        norm = _fold(
            norm,
            output_sizes=(1, self._padded_signal_length),
            kernel_sizes=(1, window_length),
            strides=(1, hop_length), )

        folded = folded / norm

        folded = folded.reshape([nb, nch, -1])
        self.audio_data = folded
        self.trim(hop_length, hop_length)
        return self

    def low_pass(self,
                 cutoffs: typing.Union[paddle.Tensor, np.ndarray, float],
                 zeros: int=51):
        """Low-passes the signal in-place. Each item in the batch
        can have a different low-pass cutoff, if the input
        to this signal is an array or tensor. If a float, all
        items are given the same low-pass filter.

        Parameters
        ----------
        cutoffs : typing.Union[paddle.Tensor, np.ndarray, float]
            Cutoff in Hz of low-pass filter.
        zeros : int, optional
            Number of taps to use in low-pass filter, by default 51

        Returns
        -------
        AudioSignal
            Low-passed AudioSignal.
        """
        cutoffs = util.ensure_tensor(cutoffs, 2, self.batch_size)
        cutoffs = cutoffs / self.sample_rate
        filtered = paddle.empty_like(self.audio_data)

        for i, cutoff in enumerate(cutoffs):
            lp_filter = _julius.LowPassFilter(cutoff.cpu(), zeros=zeros)
            filtered[i] = lp_filter(self.audio_data[i])

        self.audio_data = filtered
        self.stft_data = None
        return self

    def high_pass(self,
                  cutoffs: typing.Union[paddle.Tensor, np.ndarray, float],
                  zeros: int=51):
        """High-passes the signal in-place. Each item in the batch
        can have a different high-pass cutoff, if the input
        to this signal is an array or tensor. If a float, all
        items are given the same high-pass filter.

        Parameters
        ----------
        cutoffs : typing.Union[paddle.Tensor, np.ndarray, float]
            Cutoff in Hz of high-pass filter.
        zeros : int, optional
            Number of taps to use in high-pass filter, by default 51

        Returns
        -------
        AudioSignal
            High-passed AudioSignal.
        """
        cutoffs = util.ensure_tensor(cutoffs, 2, self.batch_size)
        cutoffs = cutoffs / self.sample_rate
        filtered = paddle.empty_like(self.audio_data)

        for i, cutoff in enumerate(cutoffs):
            hp_filter = _julius.HighPassFilter(cutoff.cpu(), zeros=zeros)
            filtered[i] = hp_filter(self.audio_data[i])

        self.audio_data = filtered
        self.stft_data = None
        return self

    def mask_frequencies(
            self,
            fmin_hz: typing.Union[paddle.Tensor, np.ndarray, float],
            fmax_hz: typing.Union[paddle.Tensor, np.ndarray, float],
            val: float=0.0, ):
        """Masks frequencies between ``fmin_hz`` and ``fmax_hz``, and fills them
        with the value specified by ``val``. Useful for implementing SpecAug.
        The min and max can be different for every item in the batch.

        Parameters
        ----------
        fmin_hz : typing.Union[paddle.Tensor, np.ndarray, float]
            Lower end of band to mask out.
        fmax_hz : typing.Union[paddle.Tensor, np.ndarray, float]
            Upper end of band to mask out.
        val : float, optional
            Value to fill in, by default 0.0

        Returns
        -------
        AudioSignal
            Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
            masked audio data.
        """
        # SpecAug
        mag, phase = self.magnitude, self.phase
        fmin_hz = util.ensure_tensor(
            fmin_hz,
            ndim=mag.ndim, )
        fmax_hz = util.ensure_tensor(
            fmax_hz,
            ndim=mag.ndim, )
        assert paddle.all(fmin_hz < fmax_hz)

        # build mask
        nbins = mag.shape[-2]
        bins_hz = paddle.linspace(
            0,
            self.sample_rate / 2,
            nbins, )
        bins_hz = bins_hz[None, None, :, None].tile(
            [self.batch_size, 1, 1, mag.shape[-1]])

        fmin_hz, fmax_hz = fmin_hz.astype(bins_hz.dtype), fmax_hz.astype(
            bins_hz.dtype)
        mask = (fmin_hz <= bins_hz) & (bins_hz < fmax_hz)

        mag = paddle.where(mask, paddle.full_like(mag, val), mag)
        phase = paddle.where(mask, paddle.full_like(phase, val), phase)
        self.stft_data = mag * util.exp_compat(1j * phase)
        return self

    def mask_timesteps(
            self,
            tmin_s: typing.Union[paddle.Tensor, np.ndarray, float],
            tmax_s: typing.Union[paddle.Tensor, np.ndarray, float],
            val: float=0.0, ):
        """Masks timesteps between ``tmin_s`` and ``tmax_s``, and fills them
        with the value specified by ``val``. Useful for implementing SpecAug.
        The min and max can be different for every item in the batch.

        Parameters
        ----------
        tmin_s : typing.Union[paddle.Tensor, np.ndarray, float]
            Lower end of timesteps to mask out.
        tmax_s : typing.Union[paddle.Tensor, np.ndarray, float]
            Upper end of timesteps to mask out.
        val : float, optional
            Value to fill in, by default 0.0

        Returns
        -------
        AudioSignal
            Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
            masked audio data.
        """
        # SpecAug
        mag, phase = self.magnitude, self.phase
        tmin_s = util.ensure_tensor(tmin_s, ndim=mag.ndim)
        tmax_s = util.ensure_tensor(tmax_s, ndim=mag.ndim)

        assert paddle.all(tmin_s < tmax_s)

        # build mask
        nt = mag.shape[-1]
        bins_t = paddle.linspace(
            0,
            self.signal_duration,
            nt, )
        bins_t = bins_t[None, None, None, :].tile(
            [self.batch_size, 1, mag.shape[-2], 1])
        mask = (tmin_s <= bins_t) & (bins_t < tmax_s)

        # mag = mag.masked_fill(mask, val)
        # phase = phase.masked_fill(mask, val)
        mag = paddle.where(mask, paddle.full_like(mag, val), mag)
        phase = paddle.where(mask, paddle.full_like(phase, val), phase)

        self.stft_data = mag * util.exp_compat(1j * phase)
        return self

    def mask_low_magnitudes(
            self,
            db_cutoff: typing.Union[paddle.Tensor, np.ndarray, float],
            val: float=0.0):
        """Mask away magnitudes below a specified threshold, which
        can be different for every item in the batch.

        Parameters
        ----------
        db_cutoff : typing.Union[paddle.Tensor, np.ndarray, float]
            Decibel value for which things below it will be masked away.
        val : float, optional
            Value to fill in for masked portions, by default 0.0

        Returns
        -------
        AudioSignal
            Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
            masked audio data.
        """
        mag = self.magnitude
        log_mag = self.log_magnitude()

        db_cutoff = util.ensure_tensor(db_cutoff, ndim=mag.ndim)
        db_cutoff = db_cutoff.astype(log_mag.dtype)
        mask = log_mag < db_cutoff
        # mag = mag.masked_fill(mask, val)
        mag = paddle.where(mask, mag, val * paddle.ones_like(mag))

        self.magnitude = mag
        return self

    def shift_phase(self,
                    shift: typing.Union[paddle.Tensor, np.ndarray, float]):
        """Shifts the phase by a constant value.

        Parameters
        ----------
        shift : typing.Union[paddle.Tensor, np.ndarray, float]
            What to shift the phase by.

        Returns
        -------
        AudioSignal
            Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
            masked audio data.
        """
        shift = util.ensure_tensor(shift, ndim=self.phase.ndim)
        shift = shift.astype(self.phase.dtype)
        self.phase = self.phase + shift
        return self

    def corrupt_phase(self,
                      scale: typing.Union[paddle.Tensor, np.ndarray, float]):
        """Corrupts the phase randomly by some scaled value.

        Parameters
        ----------
        scale : typing.Union[paddle.Tensor, np.ndarray, float]
            Standard deviation of noise to add to the phase.

        Returns
        -------
        AudioSignal
            Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
            masked audio data.
        """
        scale = util.ensure_tensor(scale, ndim=self.phase.ndim)
        self.phase = self.phase + scale * paddle.randn(
            shape=self.phase.shape, dtype=self.phase.dtype)
        return self

    def preemphasis(self, coef: float=0.85):
        """Applies pre-emphasis to audio signal.

        Parameters
        ----------
        coef : float, optional
            How much pre-emphasis to apply, lower values do less. 0 does nothing.
            by default 0.85

        Returns
        -------
        AudioSignal
            Pre-emphasized signal.
        """
        kernel = paddle.to_tensor([1, -coef, 0]).reshape([1, 1, -1])
        x = self.audio_data.reshape([-1, 1, self.signal_length])
        x = paddle.nn.functional.conv1d(
            x.astype(kernel.dtype), kernel, padding=1)
        self.audio_data = x.reshape(self.audio_data.shape)
        return self


================================================
FILE: paddlespeech/audiotools/core/effects.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/core/effects.py)
import typing

import numpy as np
import paddle

from . import util
from ._julius import SplitBands


class EffectMixin:
    GAIN_FACTOR = np.log(10) / 20
    """Gain factor for converting between amplitude and decibels."""
    CODEC_PRESETS = {
        "8-bit": {
            "format": "wav",
            "encoding": "ULAW",
            "bits_per_sample": 8
        },
        "GSM-FR": {
            "format": "gsm"
        },
        "MP3": {
            "format": "mp3",
            "compression": -9
        },
        "Vorbis": {
            "format": "vorbis",
            "compression": -1
        },
        "Ogg": {
            "format": "ogg",
            "compression": -1,
        },
        "Amr-nb": {
            "format": "amr-nb"
        },
    }
    """Presets for applying codecs via torchaudio."""

    def mix(
            self,
            other,
            snr: typing.Union[paddle.Tensor, np.ndarray, float]=10,
            other_eq: typing.Union[paddle.Tensor, np.ndarray]=None, ):
        """Mixes noise with signal at specified
        signal-to-noise ratio. Optionally, the
        other signal can be equalized in-place.


        Parameters
        ----------
        other : AudioSignal
            AudioSignal object to mix with.
        snr : typing.Union[paddle.Tensor, np.ndarray, float], optional
            Signal to noise ratio, by default 10
        other_eq : typing.Union[paddle.Tensor, np.ndarray], optional
            EQ curve to apply to other signal, if any, by default None

        Returns
        -------
        AudioSignal
            In-place modification of AudioSignal.
        """
        snr = util.ensure_tensor(snr)

        pad_len = max(0, self.signal_length - other.signal_length)
        other.zero_pad(0, pad_len)
        other.truncate_samples(self.signal_length)
        if other_eq is not None:
            other = other.equalizer(other_eq)

        tgt_loudness = self.loudness() - snr
        other = other.normalize(tgt_loudness)

        self.audio_data = self.audio_data + other.audio_data
        return self

    def convolve(self, other, start_at_max: bool=True):
        """Convolves self with other.
        This function uses FFTs to do the convolution.

        Parameters
        ----------
        other : AudioSignal
            Signal to convolve with.
        start_at_max : bool, optional
            Whether to start at the max value of other signal, to
            avoid inducing delays, by default True

        Returns
        -------
        AudioSignal
            Convolved signal, in-place.
        """
        from . import AudioSignal

        pad_len = self.signal_length - other.signal_length

        if pad_len > 0:
            other.zero_pad(0, pad_len)
        else:
            other.truncate_samples(self.signal_length)

        if start_at_max:
            # Use roll to rotate over the max for every item
            # so that the impulse responses don't induce any
            # delay.
            idx = paddle.argmax(paddle.abs(other.audio_data), axis=-1)
            irs = paddle.zeros_like(other.audio_data)
            for i in range(other.batch_size):
                irs[i] = paddle.roll(
                    other.audio_data[i], shifts=-idx[i].item(), axis=-1)
            other = AudioSignal(irs, other.sample_rate)

        delta = paddle.zeros_like(other.audio_data)
        delta[..., 0] = 1

        length = self.signal_length
        delta_fft = paddle.fft.rfft(delta, n=length)
        other_fft = paddle.fft.rfft(other.audio_data, n=length)
        self_fft = paddle.fft.rfft(self.audio_data, n=length)

        convolved_fft = other_fft * self_fft
        convolved_audio = paddle.fft.irfft(convolved_fft, n=length)

        delta_convolved_fft = other_fft * delta_fft
        delta_audio = paddle.fft.irfft(delta_convolved_fft, n=length)

        # Use the delta to rescale the audio exactly as needed.
        delta_max = paddle.max(paddle.abs(delta_audio), axis=-1, keepdim=True)
        scale = 1 / paddle.clip(delta_max, min=1e-5)
        convolved_audio = convolved_audio * scale

        self.audio_data = convolved_audio

        return self

    def apply_ir(
            self,
            ir,
            drr: typing.Union[paddle.Tensor, np.ndarray, float]=None,
            ir_eq: typing.Union[paddle.Tensor, np.ndarray]=None,
            use_original_phase: bool=False, ):
        """Applies an impulse response to the signal. If ` is`ir_eq``
        is specified, the impulse response is equalized before
        it is applied, using the given curve.

        Parameters
        ----------
        ir : AudioSignal
            Impulse response to convolve with.
        drr : typing.Union[paddle.Tensor, np.ndarray, float], optional
            Direct-to-reverberant ratio that impulse response will be
            altered to, if specified, by default None
        ir_eq : typing.Union[paddle.Tensor, np.ndarray], optional
            Equalization that will be applied to impulse response
            if specified, by default None
        use_original_phase : bool, optional
            Whether to use the original phase, instead of the convolved
            phase, by default False

        Returns
        -------
        AudioSignal
            Signal with impulse response applied to it
        """
        if ir_eq is not None:
            ir = ir.equalizer(ir_eq)
        if drr is not None:
            ir = ir.alter_drr(drr)

        # Save the peak before
        max_spk = self.audio_data.abs().max(axis=-1, keepdim=True)

        # Augment the impulse response to simulate microphone effects
        # and with varying direct-to-reverberant ratio.
        phase = self.phase
        self.convolve(ir)

        # Use the input phase
        if use_original_phase:
            self.stft()
            self.stft_data = self.magnitude * util.exp_compat(1j * phase)
            self.istft()

        # Rescale to the input's amplitude
        max_transformed = self.audio_data.abs().max(axis=-1, keepdim=True)
        scale_factor = max_spk.clip(1e-8) / max_transformed.clip(1e-8)
        self = self * scale_factor

        return self

    def ensure_max_of_audio(self, _max: float=1.0):
        """Ensures that ``abs(audio_data) <= max``.

        Parameters
        ----------
        max : float, optional
            Max absolute value of signal, by default 1.0

        Returns
        -------
        AudioSignal
            Signal with values scaled between -max and max.
        """
        peak = self.audio_data.abs().max(axis=-1, keepdim=True)
        peak_gain = paddle.ones_like(peak)
        # peak_gain[peak > _max] = _max / peak[peak > _max]
        peak_gain = paddle.where(peak > _max, _max / peak, peak_gain)
        self.audio_data = self.audio_data * peak_gain
        return self

    def normalize(self,
                  db: typing.Union[paddle.Tensor, np.ndarray, float]=-24.0):
        """Normalizes the signal's volume to the specified db, in LUFS.
        This is GPU-compatible, making for very fast loudness normalization.

        Parameters
        ----------
        db : typing.Union[paddle.Tensor, np.ndarray, float], optional
            Loudness to normalize to, by default -24.0

        Returns
        -------
        AudioSignal
            Normalized audio signal.
        """
        db = util.ensure_tensor(db)
        ref_db = self.loudness()
        gain = db.astype(ref_db.dtype) - ref_db
        gain = util.exp_compat(gain * self.GAIN_FACTOR)

        self.audio_data = self.audio_data * gain[:, None, None]
        return self

    def volume_change(self, db: typing.Union[paddle.Tensor, np.ndarray, float]):
        """Change volume of signal by some amount, in dB.

        Parameters
        ----------
        db : typing.Union[paddle.Tensor, np.ndarray, float]
            Amount to change volume by.

        Returns
        -------
        AudioSignal
            Signal at new volume.
        """
        db = util.ensure_tensor(db, ndim=1)
        gain = util.exp_compat(db * self.GAIN_FACTOR)
        self.audio_data = self.audio_data * gain[:, None, None]
        return self

    def mel_filterbank(self, n_bands: int):
        """Breaks signal into mel bands.

        Parameters
        ----------
        n_bands : int
            Number of mel bands to use.

        Returns
        -------
        paddle.Tensor
            Mel-filtered bands, with last axis being the band index.
        """
        filterbank = SplitBands(self.sample_rate, n_bands)
        filtered = filterbank(self.audio_data)
        return filtered.transpose([1, 2, 3, 0])

    def equalizer(self, db: typing.Union[paddle.Tensor, np.ndarray]):
        """Applies a mel-spaced equalizer to the audio signal.

        Parameters
        ----------
        db : typing.Union[paddle.Tensor, np.ndarray]
            EQ curve to apply.

        Returns
        -------
        AudioSignal
            AudioSignal with equalization applied.
        """
        db = util.ensure_tensor(db)
        n_bands = db.shape[-1]
        fbank = self.mel_filterbank(n_bands)

        # If there's a batch dimension, make sure it's the same.
        if db.ndim == 2:
            if db.shape[0] != 1:
                assert db.shape[0] == fbank.shape[0]
        else:
            db = db.unsqueeze(0)

        weights = (10**db).astype("float32")
        fbank = fbank * weights[:, None, None, :]
        eq_audio_data = fbank.sum(-1)
        self.audio_data = eq_audio_data
        return self

    def clip_distortion(
            self,
            clip_percentile: typing.Union[paddle.Tensor, np.ndarray, float]):
        """Clips the signal at a given percentile. The higher it is,
        the lower the threshold for clipping.

        Parameters
        ----------
        clip_percentile : typing.Union[paddle.Tensor, np.ndarray, float]
            Values are between 0.0 to 1.0. Typical values are 0.1 or below.

        Returns
        -------
        AudioSignal
            Audio signal with clipped audio data.
        """
        clip_percentile = util.ensure_tensor(clip_percentile, ndim=1)
        clip_percentile = clip_percentile.cpu().numpy()
        min_thresh = paddle.quantile(
            self.audio_data, (clip_percentile / 2).tolist(), axis=-1)[None]
        max_thresh = paddle.quantile(
            self.audio_data, (1 - clip_percentile / 2).tolist(), axis=-1)[None]

        nc = self.audio_data.shape[1]
        min_thresh = min_thresh[:, :nc, :]
        max_thresh = max_thresh[:, :nc, :]

        self.audio_data = self.audio_data.clip(min_thresh, max_thresh)

        return self

    def quantization(self,
                     quantization_channels: typing.Union[paddle.Tensor,
                                                         np.ndarray, int]):
        """Applies quantization to the input waveform.

        Parameters
        ----------
        quantization_channels : typing.Union[paddle.Tensor, np.ndarray, int]
            Number of evenly spaced quantization channels to quantize
            to.

        Returns
        -------
        AudioSignal
            Quantized AudioSignal.
        """
        quantization_channels = util.ensure_tensor(
            quantization_channels, ndim=3)

        x = self.audio_data
        quantization_channels = quantization_channels.astype(x.dtype)
        x = (x + 1) / 2
        x = x * quantization_channels
        x = x.floor()
        x = x / quantization_channels
        x = 2 * x - 1

        residual = (self.audio_data - x).detach()
        self.audio_data = self.audio_data - residual
        return self

    def mulaw_quantization(self,
                           quantization_channels: typing.Union[
                               paddle.Tensor, np.ndarray, int]):
        """Applies mu-law quantization to the input waveform.

        Parameters
        ----------
        quantization_channels : typing.Union[paddle.Tensor, np.ndarray, int]
            Number of mu-law spaced quantization channels to quantize
            to.

        Returns
        -------
        AudioSignal
            Quantized AudioSignal.
        """
        mu = quantization_channels - 1.0
        mu = util.ensure_tensor(mu, ndim=3)

        x = self.audio_data

        # quantize
        x = paddle.sign(x) * paddle.log1p(mu * paddle.abs(x)) / paddle.log1p(mu)
        x = ((x + 1) / 2 * mu + 0.5).astype("int64")

        # unquantize
        x = (x.astype(mu.dtype) / mu) * 2 - 1.0
        x = paddle.sign(x) * (
            util.exp_compat(paddle.abs(x) * paddle.log1p(mu)) - 1.0) / mu

        residual = (self.audio_data - x).detach()
        self.audio_data = self.audio_data - residual
        return self

    def __matmul__(self, other):
        return self.convolve(other)


class ImpulseResponseMixin:
    """These functions are generally only used with AudioSignals that are derived
    from impulse responses, not other sources like music or speech. These methods
    are used to replicate the data augmentation described in [1].

    1.  Bryan, Nicholas J. "Impulse response data augmentation and deep
        neural networks for blind room acoustic parameter estimation."
        ICASSP 2020-2020 IEEE International Conference on Acoustics,
        Speech and Signal Processing (ICASSP). IEEE, 2020.
    """

    def decompose_ir(self):
        """Decomposes an impulse response into early and late
        field responses.
        """
        # Equations 1 and 2
        # -----------------
        # Breaking up into early
        # response + late field response.

        td = paddle.argmax(self.audio_data, axis=-1, keepdim=True)
        t0 = int(self.sample_rate * 0.0025)

        idx = paddle.arange(self.audio_data.shape[-1])[None, None, :]
        idx = idx.expand([self.batch_size, -1, -1])
        early_idx = (idx >= td - t0) * (idx <= td + t0)

        early_response = paddle.zeros_like(self.audio_data)

        # early_response[early_idx] = self.audio_data[early_idx]
        early_response = paddle.where(early_idx, self.audio_data,
                                      early_response)

        late_idx = ~early_idx
        late_field = paddle.zeros_like(self.audio_data)
        # late_field[late_idx] = self.audio_data[late_idx]
        late_field = paddle.where(late_idx, self.audio_data, late_field)

        # Equation 4
        # ----------
        # Decompose early response into windowed
        # direct path and windowed residual.

        window = paddle.zeros_like(self.audio_data)
        window_idx = paddle.nonzero(early_idx)
        for idx in range(self.batch_size):
            # window_idx = early_idx[idx, 0]

            # ----- Just for this -----
            # window[idx, ..., window_idx] = self.get_window("hann", window_idx.sum().item())
            # indices = paddle.nonzero(window_idx).reshape(
            #     [-1])  # shape: [num_true], dtype: int64  
            indices = window_idx[window_idx[:, 0] == idx][:, -1]

            temp_window = self.get_window("hann", indices.shape[0])

            window_slice = window[idx, 0]
            updated_window_slice = paddle.scatter(
                window_slice, index=indices, updates=temp_window)

            window[idx, 0] = updated_window_slice
            # ----- Just for that -----

        return early_response, late_field, window

    def measure_drr(self):
        """Measures the direct-to-reverberant ratio of the impulse
        response.

        Returns
        -------
        float
            Direct-to-reverberant ratio
        """
        early_response, late_field, _ = self.decompose_ir()
        num = (early_response**2).sum(axis=-1)
        den = (late_field**2).sum(axis=-1)
        drr = 10 * paddle.log10(num / den)
        return drr

    @staticmethod
    def solve_alpha(early_response, late_field, wd, target_drr):
        """Used to solve for the alpha value, which is used
        to alter the drr.
        """
        # Equation 5
        # ----------
        # Apply the good ol' quadratic formula.

        wd_sq = wd**2
        wd_sq_1 = (1 - wd)**2
        e_sq = early_response**2
        l_sq = late_field**2
        a = (wd_sq * e_sq).sum(axis=-1)
        b = (2 * (1 - wd) * wd * e_sq).sum(axis=-1)
        c = (wd_sq_1 * e_sq).sum(axis=-1) - paddle.pow(10 * paddle.ones_like(
            target_drr, dtype="float32"), target_drr.cast("float32") /
                                                       10) * l_sq.sum(axis=-1)

        expr = ((b**2) - 4 * a * c).sqrt()
        alpha = paddle.maximum(
            (-b - expr) / (2 * a),
            (-b + expr) / (2 * a), )
        return alpha

    def alter_drr(self, drr: typing.Union[paddle.Tensor, np.ndarray, float]):
        """Alters the direct-to-reverberant ratio of the impulse response.

        Parameters
        ----------
        drr : typing.Union[paddle.Tensor, np.ndarray, float]
            Direct-to-reverberant ratio that impulse response will be
            altered to, if specified, by default None

        Returns
        -------
        AudioSignal
            Altered impulse response.
        """
        drr = util.ensure_tensor(
            drr, 2, self.batch_size
        )  # Assuming util.ensure_tensor is adapted or equivalent exists

        early_response, late_field, window = self.decompose_ir()
        alpha = self.solve_alpha(early_response, late_field, window, drr)
        min_alpha = late_field.abs().max(axis=-1)[0] / early_response.abs().max(
            axis=-1)[0]
        alpha = paddle.maximum(alpha, min_alpha)[..., None]

        aug_ir_data = alpha * window * early_response + (
            (1 - window) * early_response) + late_field
        self.audio_data = aug_ir_data
        self.ensure_max_of_audio(
        )  # Assuming ensure_max_of_audio is a method defined elsewhere
        return self


================================================
FILE: paddlespeech/audiotools/core/ffmpeg.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/core/ffmpeg.py)
import json
import shlex
import subprocess
import tempfile
from pathlib import Path
from typing import Tuple

import ffmpy
import numpy as np
import paddle


def r128stats(filepath: str, quiet: bool):
    """Takes a path to an audio file, returns a dict with the loudness
    stats computed by the ffmpeg ebur128 filter.

    Parameters
    ----------
    filepath : str
        Path to compute loudness stats on.
    quiet : bool
        Whether to show FFMPEG output during computation.

    Returns
    -------
    dict
        Dictionary containing loudness stats.
    """
    ffargs = [
        "ffmpeg",
        "-nostats",
        "-i",
        filepath,
        "-filter_complex",
        "ebur128",
        "-f",
        "null",
        "-",
    ]
    if quiet:
        ffargs += ["-hide_banner"]
    proc = subprocess.Popen(
        ffargs, stderr=subprocess.PIPE, universal_newlines=True)
    stats = proc.communicate()[1]
    summary_index = stats.rfind("Summary:")

    summary_list = stats[summary_index:].split()
    i_lufs = float(summary_list[summary_list.index("I:") + 1])
    i_thresh = float(summary_list[summary_list.index("I:") + 4])
    lra = float(summary_list[summary_list.index("LRA:") + 1])
    lra_thresh = float(summary_list[summary_list.index("LRA:") + 4])
    lra_low = float(summary_list[summary_list.index("low:") + 1])
    lra_high = float(summary_list[summary_list.index("high:") + 1])
    stats_dict = {
        "I": i_lufs,
        "I Threshold": i_thresh,
        "LRA": lra,
        "LRA Threshold": lra_thresh,
        "LRA Low": lra_low,
        "LRA High": lra_high,
    }

    return stats_dict


def ffprobe_offset_and_codec(path: str) -> Tuple[float, str]:
    """Given a path to a file, returns the start time offset and codec of
    the first audio stream.
    """
    ff = ffmpy.FFprobe(
        inputs={path: None},
        global_options="-show_entries format=start_time:stream=duration,start_time,codec_type,codec_name,start_pts,time_base -of json -v quiet",
    )
    streams = json.loads(ff.run(stdout=subprocess.PIPE)[0])["streams"]
    seconds_offset = 0.0
    codec = None

    # Get the offset and codec of the first audio stream we find
    # and return its start time, if it has one.
    for stream in streams:
        if stream["codec_type"] == "audio":
            seconds_offset = stream.get("start_time", 0.0)
            codec = stream.get("codec_name")
            break
    return float(seconds_offset), codec


class FFMPEGMixin:
    _loudness = None

    def ffmpeg_loudness(self, quiet: bool=True):
        """Computes loudness of audio file using FFMPEG.

        Parameters
        ----------
        quiet : bool, optional
            Whether to show FFMPEG output during computation,
            by default True

        Returns
        -------
        paddle.Tensor
            Loudness of every item in the batch, computed via
            FFMPEG.
        """
        loudness = []

        with tempfile.NamedTemporaryFile(suffix=".wav") as f:
            for i in range(self.batch_size):
                self[i].write(f.name)
                loudness_stats = r128stats(f.name, quiet=quiet)
                loudness.append(loudness_stats["I"])

        self._loudness = paddle.to_tensor(np.array(loudness)).astype("float32")
        return self.loudness()


================================================
FILE: paddlespeech/audiotools/core/loudness.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/core/loudness.py)
import copy
import math
import typing

import numpy as np
import paddle
import paddle.nn.functional as F
import scipy

from . import _julius


def _unfold1d(x, kernel_size, stride):
    # https://github.com/PaddlePaddle/Paddle/pull/70102
    """1D only unfolding similar to the one from Paddlepaddle.

    Given an _input tensor of size `[*, T]` this will return
    a tensor `[*, F, K]` with `K` the kernel size, and `F` the number
    of frames. The i-th frame is a view onto `i * stride: i * stride + kernel_size`.
    This will automatically pad the _input to cover at least once all entries in `_input`.

    Args:
        _input (Tensor): tensor for which to return the frames.
        kernel_size (int): size of each frame.
        stride (int): stride between each frame.

    Shape:

        - Inputs: `_input` is `[*, T]`
        - Output: `[*, F, kernel_size]` with `F = 1 + ceil((T - kernel_size) / stride)`
    """

    if 3 != x.dim():
        raise NotImplementedError

    N, C, length = x.shape
    x = x.reshape([N * C, 1, length])

    n_frames = math.ceil((max(length, kernel_size) - kernel_size) / stride) + 1
    tgt_length = (n_frames - 1) * stride + kernel_size
    x = F.pad(x, (0, tgt_length - length), data_format="NCL")

    x = x.unsqueeze(-1)

    unfolded = paddle.nn.functional.unfold(
        x,
        kernel_sizes=[kernel_size, 1],
        strides=[stride, 1], )

    unfolded = unfolded.transpose([0, 2, 1])
    unfolded = unfolded.reshape([N, C, *unfolded.shape[1:]])
    return unfolded


class Meter(paddle.nn.Layer):
    """Tensorized version of pyloudnorm.Meter. Works with batched audio tensors.

    Parameters
    ----------
    rate : int
        Sample rate of audio.
    filter_class : str, optional
        Class of weighting filter used.
        K-weighting' (default), 'Fenton/Lee 1'
        'Fenton/Lee 2', 'Dash et al.'
        by default "K-weighting"
    block_size : float, optional
        Gating block size in seconds, by default 0.400
    zeros : int, optional
         Number of zeros to use in FIR approximation of
         IIR filters, by default 512
    use_fir : bool, optional
        Whether to use FIR approximation or exact IIR formulation.
        If computing on GPU, ``use_fir=True`` will be used, as its
        much faster, by default False
    """

    def __init__(
            self,
            rate: int,
            filter_class: str="K-weighting",
            block_size: float=0.400,
            zeros: int=512,
            use_fir: bool=False, ):
        super().__init__()

        self.rate = rate
        self.filter_class = filter_class
        self.block_size = block_size
        self.use_fir = use_fir

        G = paddle.to_tensor(
            np.array([1.0, 1.0, 1.0, 1.41, 1.41]), stop_gradient=True)
        self.register_buffer("G", G)

        # Compute impulse responses so that filtering is fast via
        # a convolution at runtime, on GPU, unlike lfilter.
        impulse = np.zeros((zeros, ))
        impulse[..., 0] = 1.0

        firs = np.zeros((len(self._filters), 1, zeros))
        # passband_gain = torch.zeros(len(self._filters))
        passband_gain = paddle.zeros([len(self._filters)], dtype="float32")

        for i, (_, filter_stage) in enumerate(self._filters.items()):
            firs[i] = scipy.signal.lfilter(filter_stage.b, filter_stage.a,
                                           impulse)
            passband_gain[i] = filter_stage.passband_gain

        firs = paddle.to_tensor(
            firs[..., ::-1].copy(), dtype="float32", stop_gradient=True)

        self.register_buffer("firs", firs)
        self.register_buffer("passband_gain", passband_gain)

    def apply_filter_gpu(self, data: paddle.Tensor):
        """Performs FIR approximation of loudness computation.

        Parameters
        ----------
        data : paddle.Tensor
            Audio data of shape (nb, nch, nt).

        Returns
        -------
        paddle.Tensor
            Filtered audio data.
        """
        # Data is of shape (nb, nch, nt)
        # Reshape to (nb*nch, 1, nt)
        nb, nt, nch = data.shape
        data = data.transpose([0, 2, 1])
        data = data.reshape([nb * nch, 1, nt])

        # Apply padding
        pad_length = self.firs.shape[-1]

        # Apply filtering in sequence
        for i in range(self.firs.shape[0]):
            data = F.pad(data, (pad_length, pad_length), data_format="NCL")
            data = _julius.fft_conv1d(data, self.firs[i, None, ...])
            data = self.passband_gain[i] * data
            data = data[..., 1:nt + 1]

        data = data.transpose([0, 2, 1])
        data = data[:, :nt, :]
        return data

    @staticmethod
    def scipy_lfilter(waveform, a_coeffs, b_coeffs, clamp: bool=True):
        # 使用 scipy.signal.lfilter 进行滤波（处理三维数据）
        output = np.zeros_like(waveform)
        for batch_idx in range(waveform.shape[0]):
            for channel_idx in range(waveform.shape[2]):
                output[batch_idx, :, channel_idx] = scipy.signal.lfilter(
                    b_coeffs, a_coeffs, waveform[batch_idx, :, channel_idx])
        return output

    def apply_filter_cpu(self, data: paddle.Tensor):
        """Performs IIR formulation of loudness computation.

        Parameters
        ----------
        data : paddle.Tensor
            Audio data of shape (nb, nch, nt).

        Returns
        -------
        paddle.Tensor
            Filtered audio data.
        """
        _data = data.cpu().numpy().copy()
        for _, filter_stage in self._filters.items():
            passband_gain = filter_stage.passband_gain

            a_coeffs = filter_stage.a
            b_coeffs = filter_stage.b

            filtered = self.scipy_lfilter(_data, a_coeffs, b_coeffs)
            _data[:] = passband_gain * filtered
        data = paddle.to_tensor(_data)
        return data

    def apply_filter(self, data: paddle.Tensor):
        """Applies filter on either CPU or GPU, depending
        on if the audio is on GPU or is on CPU, or if
        ``self.use_fir`` is True.

        Parameters
        ----------
        data : paddle.Tensor
            Audio data of shape (nb, nch, nt).

        Returns
        -------
        paddle.Tensor
            Filtered audio data.
        """
        # if data.place.is_gpu_place() or self.use_fir:
        #     data = self.apply_filter_gpu(data)
        # else:
        #     data = self.apply_filter_cpu(data)
        data = self.apply_filter_cpu(data)
        return data

    def forward(self, data: paddle.Tensor):
        """Computes integrated loudness of data.

        Parameters
        ----------
        data : paddle.Tensor
            Audio data of shape (nb, nch, nt).

        Returns
        -------
        paddle.Tensor
            Filtered audio data.
        """
        return self.integrated_loudness(data)

    def _unfold(self, input_data):
        T_g = self.block_size
        overlap = 0.75  # overlap of 75% of the block duration
        step = 1.0 - overlap  # step size by percentage

        kernel_size = int(T_g * self.rate)
        stride = int(T_g * self.rate * step)
        unfolded = _unfold1d(
            input_data.transpose([0, 2, 1]), kernel_size, stride)
        unfolded = unfolded.transpose([0, 1, 3, 2])

        return unfolded

    def integrated_loudness(self, data: paddle.Tensor):
        """Computes integrated loudness of data.

        Parameters
        ----------
        data : paddle.Tensor
            Audio data of shape (nb, nch, nt).

        Returns
        -------
        paddle.Tensor
            Filtered audio data.
        """
        if not paddle.is_tensor(data):
            data = paddle.to_tensor(data, dtype="float32")
        else:
            data = data.astype("float32")

        input_data = data.clone()
        # Data always has a batch and channel dimension.
        # Is of shape (nb, nt, nch)
        if input_data.ndim < 2:
            input_data = input_data.unsqueeze(-1)
        if input_data.ndim < 3:
            input_data = input_data.unsqueeze(0)

        nb, nt, nch = input_data.shape

        # Apply frequency weighting filters - account
        # for the acoustic respose of the head and auditory system
        input_data = self.apply_filter(input_data)

        G = self.G  # channel gains
        T_g = self.block_size  # 400 ms gating block standard
        Gamma_a = -70.0  # -70 LKFS = absolute loudness threshold

        unfolded = self._unfold(input_data)

        z = (1.0 / (T_g * self.rate)) * unfolded.square().sum(2)
        l = -0.691 + 10.0 * paddle.log10(
            (G[None, :nch, None] * z).sum(1, keepdim=True))
        l = l.expand_as(z)

        # find gating block indices above absolute threshold
        z_avg_gated = z
        z_avg_gated[l <= Gamma_a] = 0
        masked = l > Gamma_a
        z_avg_gated = z_avg_gated.sum(2) / masked.sum(2).astype("float32")

        # calculate the relative threshold value (see eq. 6)
        Gamma_r = -0.691 + 10.0 * paddle.log10(
            (z_avg_gated * G[None, :nch]).sum(-1)) - 10.0
        Gamma_r = Gamma_r[:, None, None]
        Gamma_r = Gamma_r.expand([nb, nch, l.shape[-1]])

        # find gating block indices above relative and absolute thresholds  (end of eq. 7)
        z_avg_gated = z
        z_avg_gated[l <= Gamma_a] = 0
        z_avg_gated[l <= Gamma_r] = 0
        masked = (l > Gamma_a) * (l > Gamma_r)
        z_avg_gated = z_avg_gated.sum(2) / (masked.sum(2) + 10e-6)

        # TODO Currently, paddle has a segmentation fault bug in this section of the code
        # z_avg_gated = paddle.nan_to_num(z_avg_gated)
        # z_avg_gated = paddle.where(
        #     paddle.isnan(z_avg_gated),
        #     paddle.zeros_like(z_avg_gated), z_avg_gated)
        z_avg_gated[z_avg_gated == float("inf")] = float(
            np.finfo(np.float32).max)
        z_avg_gated[z_avg_gated == -float("inf")] = float(
            np.finfo(np.float32).min)

        LUFS = -0.691 + 10.0 * paddle.log10(
            (G[None, :nch] * z_avg_gated).sum(1))
        return LUFS.astype("float32")

    @property
    def filter_class(self):
        return self._filter_class

    @filter_class.setter
    def filter_class(self, value):
        from pyloudnorm import Meter

        meter = Meter(self.rate)
        meter.filter_class = value
        self._filter_class = value
        self._filters = meter._filters


class LoudnessMixin:
    _loudness = None
    MIN_LOUDNESS = -70
    """Minimum loudness possible."""

    def loudness(self,
                 filter_class: str="K-weighting",
                 block_size: float=0.400,
                 **kwargs):
        """Calculates loudness using an implementation of ITU-R BS.1770-4.
        Allows control over gating block size and frequency weighting filters for
        additional control. Measure the integrated gated loudness of a signal.

        API is derived from PyLoudnorm, but this implementation is ported to PyTorch
        and is tensorized across batches. When on GPU, an FIR approximation of the IIR
        filters is used to compute loudness for speed.

        Uses the weighting filters and block size defined by the meter
        the integrated loudness is measured based upon the gating algorithm
        defined in the ITU-R BS.1770-4 specification.

        Parameters
        ----------
        filter_class : str, optional
            Class of weighting filter used.
            K-weighting' (default), 'Fenton/Lee 1'
            'Fenton/Lee 2', 'Dash et al.'
            by default "K-weighting"
        block_size : float, optional
            Gating block size in seconds, by default 0.400
        kwargs : dict, optional
            Keyword arguments to :py:func:`audiotools.core.loudness.Meter`.

        Returns
        -------
        paddle.Tensor
            Loudness of audio data.
        """
        if self._loudness is not None:
            return self._loudness  # .to(self.device)
        original_length = self.signal_length
        if self.signal_duration < 0.5:
            pad_len = int((0.5 - self.signal_duration) * self.sample_rate)
            self.zero_pad(0, pad_len)

        # create BS.1770 meter
        meter = Meter(
            self.sample_rate,
            filter_class=filter_class,
            block_size=block_size,
            **kwargs)
        # meter = meter.to(self.device)
        # measure loudness
        loudness = meter.integrated_loudness(
            self.audio_data.transpose([0, 2, 1]))
        self.truncate_samples(original_length)
        min_loudness = paddle.ones_like(loudness) * self.MIN_LOUDNESS
        self._loudness = paddle.maximum(loudness, min_loudness)

        return self._loudness  # .to(self.device)


================================================
FILE: paddlespeech/audiotools/core/util.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/core/util.py)
import collections
import csv
import glob
import math
import numbers
import os
import random
import typing
from contextlib import contextmanager
from dataclasses import dataclass
from pathlib import Path
from typing import Any
from typing import Callable
from typing import Dict
from typing import Iterable
from typing import List
from typing import NamedTuple
from typing import Optional
from typing import Tuple
from typing import Type
from typing import Union

import ffmpeg
import librosa
import numpy as np
import paddle
import soundfile
from flatten_dict import flatten
from flatten_dict import unflatten

from paddlespeech.utils import satisfy_paddle_version
from paddlespeech.vector.training.seeding import seed_everything

__all__ = [
    "exp_compat",
    "bool_index_compat",
    "bool_setitem_compat",
    "Info",
    "info",
    "ensure_tensor",
    "random_state",
    "find_audio",
    "read_sources",
    "choose_from_list_of_lists",
    "chdir",
    "move_to_device",
    "prepare_batch",
    "sample_from_dist",
    "format_figure",
    "default_collate",
    "collate",
    "hz_to_bin",
    "generate_chord_dataset",
]


def exp_compat(x):
    """
    Compute the exponential of the input tensor `x`.

    This function is designed to handle compatibility issues with PaddlePaddle versions below 2.6,
    which do not support the `exp` operation for complex tensors. In such cases, the computation
    is offloaded to NumPy.

    Args:
        x (paddle.Tensor): The input tensor for which to compute the exponential.

    Returns:
        paddle.Tensor: The result of the exponential operation, as a PaddlePaddle tensor.

    Notes:
        - If the PaddlePaddle version is 2.6 or above, the function uses `paddle.exp` directly.
        - For versions below 2.6, the tensor is first converted to a NumPy array, the exponential
          is computed using `np.exp`, and the result is then converted back to a PaddlePaddle tensor.
    """
    if satisfy_paddle_version("2.6"):
        return paddle.exp(x)
    else:
        x_np = x.cpu().numpy()
        return paddle.to_tensor(np.exp(x_np))


def bool_index_compat(x, mask):
    """
    Perform boolean indexing on the input tensor `x` using the provided `mask`.

    This function ensures compatibility with PaddlePaddle versions below 2.6, where boolean indexing
    may not be fully supported. For older versions, the operation is performed using NumPy.

    Args:
        x (paddle.Tensor): The input tensor to be indexed.
        mask (paddle.Tensor or int): The boolean mask or integer index used for indexing.

    Returns:
        paddle.Tensor: The result of the boolean indexing operation, as a PaddlePaddle tensor.

    Notes:
        - If the PaddlePaddle version is 2.6 or above, or if `mask` is an integer, the function uses
          Paddle's native indexing directly.
        - For versions below 2.6, the tensor and mask are converted to NumPy arrays, the indexing
          operation is performed using NumPy, and the result is converted back to a PaddlePaddle tensor.
    """
    if satisfy_paddle_version("2.6") or isinstance(mask, (int, list, slice)):
        return x[mask]
    else:
        x_np = x.cpu().numpy()[mask.cpu().numpy()]
        return paddle.to_tensor(x_np)


def bool_setitem_compat(x, mask, y):
    """
    Perform boolean assignment on the input tensor `x` using the provided `mask` and values `y`.

    This function ensures compatibility with PaddlePaddle versions below 2.6, where boolean assignment
    may not be fully supported. For older versions, the operation is performed using NumPy.

    Args:
        x (paddle.Tensor): The input tensor to be modified.
        mask (paddle.Tensor): The boolean mask used for assignment.
        y (paddle.Tensor): The values to assign to the selected elements of `x`.

    Returns:
        paddle.Tensor: The modified tensor after the assignment operation.

    Notes:
        - If the PaddlePaddle version is 2.6 or above, the function uses Paddle's native assignment directly.
        - For versions below 2.6, the tensor, mask, and values are converted to NumPy arrays, the assignment
          operation is performed using NumPy, and the result is converted back to a PaddlePaddle tensor.
    """
    if satisfy_paddle_version("2.6"):

        x[mask] = y
        return x
    else:
        x_np = x.cpu().numpy()
        x_np[mask.cpu().numpy()] = y.cpu().numpy()

        return paddle.to_tensor(x_np)


@dataclass
class Info:

    sample_rate: float
    num_frames: int

    @property
    def duration(self) -> float:
        return self.num_frames / self.sample_rate


def info_ffmpeg(audio_path: str):
    """
    Parameters
    ----------
    audio_path : str
        Path to audio file.
    """
    probe = ffmpeg.probe(audio_path)
    audio_streams = [
        stream for stream in probe['streams'] if stream['codec_type'] == 'audio'
    ]
    if not audio_streams:
        raise ValueError("No audio stream found in the file.")
    audio_stream = audio_streams[0]

    sample_rate = int(audio_stream['sample_rate'])
    duration = float(audio_stream['duration'])

    num_frames = int(duration * sample_rate)

    info = Info(sample_rate=sample_rate, num_frames=num_frames)
    return info


def info(audio_path: str):
    """

    Parameters
    ----------
    audio_path : str
        Path to audio file.
    """
    try:
        info = soundfile.info(str(audio_path))
        info = Info(sample_rate=info.samplerate, num_frames=info.frames)
    except:
        info = info_ffmpeg(str(audio_path))

    return info


def ensure_tensor(
        x: typing.Union[np.ndarray, paddle.Tensor, float, int],
        ndim: int=None,
        batch_size: int=None, ):
    """Ensures that the input ``x`` is a tensor of specified
    dimensions and batch size.

    Parameters
    ----------
    x : typing.Union[np.ndarray, paddle.Tensor, float, int]
        Data that will become a tensor on its way out.
    ndim : int, optional
        How many dimensions should be in the output, by default None
    batch_size : int, optional
        The batch size of the output, by default None

    Returns
    -------
    paddle.Tensor
        Modified version of ``x`` as a tensor.
    """
    if not paddle.is_tensor(x):
        x = paddle.to_tensor(x)
    if ndim is not None:
        assert x.ndim <= ndim
        while x.ndim < ndim:
            x = x.unsqueeze(-1)
    if batch_size is not None:
        if x.shape[0] != batch_size:
            shape = list(x.shape)
            shape[0] = batch_size
            x = paddle.expand(x, shape)
    return x


def _get_value(other):
    # 
    from .audio_signal import AudioSignal
    if isinstance(other, AudioSignal):
        return other.audio_data
    return other


def random_state(seed: typing.Union[int, np.random.RandomState]):
    """
    Turn seed into a np.random.RandomState instance.

    Parameters
    ----------
    seed : typing.Union[int, np.random.RandomState] or None
        If seed is None, return the RandomState singleton used by np.random.
        If seed is an int, return a new RandomState instance seeded with seed.
        If seed is already a RandomState instance, return it.
        Otherwise raise ValueError.

    Returns
    -------
    np.random.RandomState
        Random state object.

    Raises
    ------
    ValueError
        If seed is not valid, an error is thrown.
    """
    if seed is None or seed is np.random:
        return np.random.mtrand._rand
    elif isinstance(seed, (numbers.Integral, np.integer, int)):
        return np.random.RandomState(seed)
    elif isinstance(seed, np.random.RandomState):
        return seed
    else:
        raise ValueError("%r cannot be used to seed a numpy.random.RandomState"
                         " instance" % seed)


@contextmanager
def _close_temp_files(tmpfiles: list):
    """Utility function for creating a context and closing all temporary files
    once the context is exited. For correct functionality, all temporary file
    handles created inside the context must be appended to the ```tmpfiles```
    list.

    This function is taken wholesale from Scaper.

    Parameters
    ----------
    tmpfiles : list
        List of temporary file handles
    """

    def _close():
        for t in tmpfiles:
            try:
                t.close()
                os.unlink(t.name)
            except:
                pass

    try:
        yield
    except:
        _close()
        raise
    _close()


AUDIO_EXTENSIONS = [".wav", ".flac", ".mp3"]


def find_audio(folder: str, ext: List[str]=AUDIO_EXTENSIONS):
    """Finds all audio files in a directory recursively.
    Returns a list.

    Parameters
    ----------
    folder : str
        Folder to look for audio files in, recursively.
    ext : List[str], optional
        Extensions to look for without the ., by default
        ``['.wav', '.flac', '.mp3', '.mp4']``.
    """
    folder = Path(folder)
    # Take care of case where user has passed in an audio file directly
    # into one of the calling functions.
    if str(folder).endswith(tuple(ext)):
        # if, however, there's a glob in the path, we need to
        # return the glob, not the file.
        if "*" in str(folder):
            return glob.glob(str(folder), recursive=("**" in str(folder)))
        else:
            return [folder]

    files = []
    for x in ext:
        files += folder.glob(f"**/*{x}")
    return files


def read_sources(
        sources: List[str],
        remove_empty: bool=True,
        relative_path: str="",
        ext: List[str]=AUDIO_EXTENSIONS, ):
    """Reads audio sources that can either be folders
    full of audio files, or CSV files that contain paths
    to audio files. CSV files that adhere to the expected
    format can be generated by
    :py:func:`audiotools.data.preprocess.create_csv`.

    Parameters
    ----------
    sources : List[str]
        List of audio sources to be converted into a
        list of lists of audio files.
    remove_empty : bool, optional
        Whether or not to remove rows with an empty "path"
        from each CSV file, by default True.

    Returns
    -------
    list
        List of lists of rows of CSV files.
    """
    files = []
    relative_path = Path(relative_path)
    for source in sources:
        source = str(source)
        _files = []
        if source.endswith(".csv"):
            with open(source, "r") as f:
                reader = csv.DictReader(f)
                for x in reader:
                    if remove_empty and x["path"] == "":
                        continue
                    if x["path"] != "":
                        x["path"] = str(relative_path / x["path"])
                    _files.append(x)
        else:
            for x in find_audio(source, ext=ext):
                x = str(relative_path / x)
                _files.append({"path": x})
        files.append(sorted(_files, key=lambda x: x["path"]))
    return files


def choose_from_list_of_lists(state: np.random.RandomState,
                              list_of_lists: list,
                              p: float=None):
    """Choose a single item from a list of lists.

    Parameters
    ----------
    state : np.random.RandomState
        Random state to use when choosing an item.
    list_of_lists : list
        A list of lists from which items will be drawn.
    p : float, optional
        Probabilities of each list, by default None

    Returns
    -------
    typing.Any
        An item from the list of lists.
    """
    source_idx = state.choice(list(range(len(list_of_lists))), p=p)
    item_idx = state.randint(len(list_of_lists[source_idx]))
    return list_of_lists[source_idx][item_idx], source_idx, item_idx


@contextmanager
def chdir(newdir: typing.Union[Path, str]):
    """
    Context manager for switching directories to run a
    function. Useful for when you want to use relative
    paths to different runs.

    Parameters
    ----------
    newdir : typing.Union[Path, str]
        Directory to switch to.
    """
    curdir = os.getcwd()
    try:
        os.chdir(newdir)
        yield
    finally:
        os.chdir(curdir)


def move_to_device(data, device):
    if device is None or device == "":
        return data
    elif device == 'cpu':
        return paddle.to_tensor(data, place=paddle.CPUPlace())
    elif device in ('gpu', 'cuda'):
        return paddle.to_tensor(data, place=paddle.CUDAPlace())
    else:
        device = device.replace("cuda", "gpu") if "cuda" in device else device
        return data.to(device)


def prepare_batch(batch: typing.Union[dict, list, paddle.Tensor],
                  device: str="cpu"):
    """Moves items in a batch (typically generated by a DataLoader as a list
    or a dict) to the specified device. This works even if dictionaries
    are nested.

    Parameters
    ----------
    batch : typing.Union[dict, list, paddle.Tensor]
        Batch, typically generated by a dataloader, that will be moved to
        the device.
    device : str, optional
        Device to move batch to, by default "cpu"

    Returns
    -------
    typing.Union[dict, list, paddle.Tensor]
        Batch with all values moved to the specified device.
    """
    device = device.replace("cuda", "gpu")
    if isinstance(batch, dict):
        batch = flatten(batch)
        for key, val in batch.items():
            try:
                # batch[key] = val.to(device)
                batch[key] = move_to_device(val, device)
            except:
                pass
        batch = unflatten(batch)
    elif paddle.is_tensor(batch):
        # batch = batch.to(device)
        batch = move_to_device(batch, device)
    elif isinstance(batch, list):
        for i in range(len(batch)):
            try:
                batch[i] = batch[i].to(device)
            except:
                pass
    return batch


def sample_from_dist(dist_tuple: tuple, state: np.random.RandomState=None):
    """Samples from a distribution defined by a tuple. The first
    item in the tuple is the distribution type, and the rest of the
    items are arguments to that distribution. The distribution function
    is gotten from the ``np.random.RandomState`` object.

    Parameters
    ----------
    dist_tuple : tuple
        Distribution tuple
    state : np.random.RandomState, optional
        Random state, or seed to use, by default None

    Returns
    -------
    typing.Union[float, int, str]
        Draw from the distribution.

    Examples
    --------
    Sample from a uniform distribution:

    >>> dist_tuple = ("uniform", 0, 1)
    >>> sample_from_dist(dist_tuple)

    Sample from a constant distribution:

    >>> dist_tuple = ("const", 0)
    >>> sample_from_dist(dist_tuple)

    Sample from a normal distribution:

    >>> dist_tuple = ("normal", 0, 0.5)
    >>> sample_from_dist(dist_tuple)

    """
    if dist_tuple[0] == "const":
        return dist_tuple[1]
    state = random_state(state)
    dist_fn = getattr(state, dist_tuple[0])
    return dist_fn(*dist_tuple[1:])


BASE_SIZE = 864
DEFAULT_FIG_SIZE = (9, 3)


def format_figure(
        fig_size: tuple=None,
        title: str=None,
        fig=None,
        format_axes: bool=True,
        format: bool=True,
        font_color: str="white", ):
    """Prettifies the spectrogram and waveform plots. A title
    can be inset into the top right corner, and the axes can be
    inset into the figure, allowing the data to take up the entire
    image. Used in

    - :py:func:`audiotools.core.display.DisplayMixin.specshow`
    - :py:func:`audiotools.core.display.DisplayMixin.waveplot`
    - :py:func:`audiotools.core.display.DisplayMixin.wavespec`

    Parameters
    ----------
    fig_size : tuple, optional
        Size of figure, by default (9, 3)
    title : str, optional
        Title to inset in top right, by default None
    fig : matplotlib.figure.Figure, optional
        Figure object, if None ``plt.gcf()`` will be used, by default None
    format_axes : bool, optional
        Format the axes to be inside the figure, by default True
    format : bool, optional
        This formatting can be skipped entirely by passing ``format=False``
        to any of the plotting functions that use this formater, by default True
    font_color : str, optional
        Color of font of axes, by default "white"
    """
    import matplotlib
    import matplotlib.pyplot as plt

    if fig_size is None:
        fig_size = DEFAULT_FIG_SIZE
    if not format:
        return
    if fig is None:
        fig = plt.gcf()
    fig.set_size_inches(*fig_size)
    axs = fig.axes

    pixels = (fig.get_size_inches() * fig.dpi)[0]
    font_scale = pixels / BASE_SIZE

    if format_axes:
        axs = fig.axes

        for ax in axs:
            ymin, _ = ax.get_ylim()
            xmin, _ = ax.get_xlim()

            ticks = ax.get_yticks()
            for t in ticks[2:-1]:
                t = axs[0].annotate(
                    f"{(t / 1000):2.1f}k",
                    xy=(xmin, t),
                    xycoords="data",
                    xytext=(5, -5),
                    textcoords="offset points",
                    ha="left",
                    va="top",
                    color=font_color,
                    fontsize=12 * font_scale,
                    alpha=0.75, )

            ticks = ax.get_xticks()[2:]
            for t in ticks[:-1]:
                t = axs[0].annotate(
                    f"{t:2.1f}s",
                    xy=(t, ymin),
                    xycoords="data",
                    xytext=(5, 5),
                    textcoords="offset points",
                    ha="center",
                    va="bottom",
                    color=font_color,
                    fontsize=12 * font_scale,
                    alpha=0.75, )

            ax.margins(0, 0)
            ax.set_axis_off()
            ax.xaxis.set_major_locator(plt.NullLocator())
            ax.yaxis.set_major_locator(plt.NullLocator())

        plt.subplots_adjust(
            top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)

    if title is not None:
        t = axs[0].annotate(
            title,
            xy=(1, 1),
            xycoords="axes fraction",
            fontsize=20 * font_scale,
            xytext=(-5, -5),
            textcoords="offset points",
            ha="right",
            va="top",
            color="white", )
        t.set_bbox(dict(facecolor="black", alpha=0.5, edgecolor="black"))


_default_collate_err_msg_format = (
    "default_collate: batch must contain tensors, numpy arrays, numbers, "
    "dicts or lists; found {}")


def collate_tensor_fn(
        batch,
        *,
        collate_fn_map: Optional[Dict[Union[type, Tuple[type, ...]],
                                      Callable]]=None, ):
    out = paddle.stack(batch, axis=0)
    return out


def collate_float_fn(
        batch,
        *,
        collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ...]],
                                      Callable]]=None, ):
    return paddle.to_tensor(batch, dtype=paddle.float64)


def collate_int_fn(
        batch,
        *,
        collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ...]],
                                      Callable]]=None, ):
    return paddle.to_tensor(batch)


def collate_str_fn(
        batch,
        *,
        collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ...]],
                                      Callable]]=None, ):
    return batch


default_collate_fn_map: Dict[Union[Type, Tuple[Type, ...]], Callable] = {
    paddle.Tensor: collate_tensor_fn
}
default_collate_fn_map[float] = collate_float_fn
default_collate_fn_map[int] = collate_int_fn
default_collate_fn_map[str] = collate_str_fn
default_collate_fn_map[bytes] = collate_str_fn


def default_collate(batch,
                    *,
                    collate_fn_map: Optional[Dict[Union[Type, Tuple[Type, ...]],
                                                  Callable]]=None):
    r"""
    General collate function that handles collection type of element within each batch.

    The function also opens function registry to deal with specific element types. `default_collate_fn_map`
    provides default collate functions for tensors, numpy arrays, numbers and strings.

    Args:
        batch: a single batch to be collated
        collate_fn_map: Optional dictionary mapping from element type to the corresponding collate function.
            If the element type isn't present in this dictionary,
            this function will go through each key of the dictionary in the insertion order to
            invoke the corresponding collate function if the element type is a subclass of the key.
    Note:
        Each collate function requires a positional argument for batch and a keyword argument
        for the dictionary of collate functions as `collate_fn_map`.
    """
    elem = batch[0]
    elem_type = type(elem)

    if collate_fn_map is not None:
        if elem_type in collate_fn_map:
            return collate_fn_map[elem_type](
                batch, collate_fn_map=collate_fn_map)

        for collate_type in collate_fn_map:
            if isinstance(elem, collate_type):
                return collate_fn_map[collate_type](
                    batch, collate_fn_map=collate_fn_map)

    if isinstance(elem, collections.abc.Mapping):
        try:
            return elem_type({
                key: default_collate(
                    [d[key] for d in batch], collate_fn_map=collate_fn_map)
                for key in elem
            })
        except TypeError:
            # The mapping type may not support `__init__(iterable)`.
            return {
                key: default_collate(
                    [d[key] for d in batch], collate_fn_map=collate_fn_map)
                for key in elem
            }
    elif isinstance(elem, tuple) and hasattr(elem, "_fields"):  # namedtuple
        return elem_type(*(default_collate(
            samples, collate_fn_map=collate_fn_map) for samples in zip(*batch)))
    elif isinstance(elem, collections.abc.Sequence):
        # check to make sure that the elements in batch have consistent size
        it = iter(batch)
        elem_size = len(next(it))
        if not all(len(elem) == elem_size for elem in it):
            raise RuntimeError(
                "each element in list of batch should be of equal size")
        transposed = list(
            zip(*batch))  # It may be accessed twice, so we use a list.

        if isinstance(elem, tuple):
            return [
                default_collate(samples, collate_fn_map=collate_fn_map)
                for samples in transposed
            ]  # Backwards compatibility.
        else:
            try:
                return elem_type([
                    default_collate(samples, collate_fn_map=collate_fn_map)
                    for samples in transposed
                ])
            except TypeError:
                # The sequence type may not support `__init__(iterable)` (e.g., `range`).
                return [
                    default_collate(samples, collate_fn_map=collate_fn_map)
                    for samples in transposed
                ]

    raise TypeError(_default_collate_err_msg_format.format(elem_type))


def collate(list_of_dicts: list, n_splits: int=None):
    """Collates a list of dictionaries (e.g. as returned by a
    dataloader) into a dictionary with batched values. This routine
    uses the default torch collate function for everything
    except AudioSignal objects, which are handled by the
    :py:func:`audiotools.core.audio_signal.AudioSignal.batch`
    function.

    This function takes n_splits to enable splitting a batch
    into multiple sub-batches for the purposes of gradient accumulation,
    etc.

    Parameters
    ----------
    list_of_dicts : list
        List of dictionaries to be collated.
    n_splits : int
        Number of splits to make when creating the batches (split into
        sub-batches). Useful for things like gradient accumulation.

    Returns
    -------
    dict
        Dictionary containing batched data.
    """

    from .audio_signal import AudioSignal

    batches = []
    list_len = len(list_of_dicts)

    return_list = False if n_splits is None else True
    n_splits = 1 if n_splits is None else n_splits
    n_items = int(math.ceil(list_len / n_splits))

    for i in range(0, list_len, n_items):
        # Flatten the dictionaries to avoid recursion.
        list_of_dicts_ = [flatten(d) for d in list_of_dicts[i:i + n_items]]
        dict_of_lists = {
            k: [dic[k] for dic in list_of_dicts_]
            for k in list_of_dicts_[0]
        }

        batch = {}
        for k, v in dict_of_lists.items():
            if isinstance(v, list):
                if all(isinstance(s, AudioSignal) for s in v):
                    batch[k] = AudioSignal.batch(v, pad_signals=True)
                else:
                    batch[k] = default_collate(
                        v, collate_fn_map=default_collate_fn_map)
        batches.append(unflatten(batch))

    batches = batches[0] if not return_list else batches
    return batches


def hz_to_bin(hz: paddle.Tensor, n_fft: int, sample_rate: int):
    """Closest frequency bin given a frequency, number
    of bins, and a sampling rate.

    Parameters
    ----------
    hz : paddle.Tensor
       Tensor of frequencies in Hz.
    n_fft : int
        Number of FFT bins.
    sample_rate : int
        Sample rate of audio.

    Returns
    -------
    paddle.Tensor
        Closest bins to the data.
    """
    shape = hz.shape
    hz = hz.reshape([-1])
    freqs = paddle.linspace(0, sample_rate / 2, 2 + n_fft // 2)
    hz = paddle.clip(hz, max=sample_rate / 2).astype(freqs.dtype)

    closest = (hz[None, :] - freqs[:, None]).abs()
    closest_bins = closest.argmin(axis=0)

    return closest_bins.reshape(shape)


def generate_chord_dataset(
        max_voices: int=8,
        sample_rate: int=44100,
        num_items: int=5,
        duration: float=1.0,
        min_note: str="C2",
        max_note: str="C6",
        output_dir: Path="chords", ):
    """
    Generates a toy multitrack dataset of chords, synthesized from sine waves.


    Parameters
    ----------
    max_voices : int, optional
        Maximum number of voices in a chord, by default 8
    sample_rate : int, optional
        Sample rate of audio, by default 44100
    num_items : int, optional
        Number of items to generate, by default 5
    duration : float, optional
        Duration of each item, by default 1.0
    min_note : str, optional
        Minimum note in the dataset, by default "C2"
    max_note : str, optional
        Maximum note in the dataset, by default "C6"
    output_dir : Path, optional
        Directory to save the dataset, by default "chords"

    """
    import librosa
    from .audio_signal import AudioSignal
    from ..data.preprocess import create_csv

    min_midi = librosa.note_to_midi(min_note)
    max_midi = librosa.note_to_midi(max_note)

    tracks = []
    for idx in range(num_items):
        track = {}
        # figure out how many voices to put in this track
        num_voices = random.randint(1, max_voices)
        for voice_idx in range(num_voices):
            # choose some random params
            midinote = random.randint(min_midi, max_midi)
            dur = random.uniform(0.85 * duration, duration)

            sig = AudioSignal.wave(
                frequency=librosa.midi_to_hz(midinote),
                duration=dur,
                sample_rate=sample_rate,
                shape="sine", )
            track[f"voice_{voice_idx}"] = sig
        tracks.append(track)

    # save the tracks to disk
    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True)
    for idx, track in enumerate(tracks):
        track_dir = output_dir / f"track_{idx}"
        track_dir.mkdir(exist_ok=True)
        for voice_name, sig in track.items():
            sig.write(track_dir / f"{voice_name}.wav")

    all_voices = list(set([k for track in tracks for k in track.keys()]))
    voice_lists = {voice: [] for voice in all_voices}
    for track in tracks:
        for voice_name in all_voices:
            if voice_name in track:
                voice_lists[voice_name].append(track[voice_name].path_to_file)
            else:
                voice_lists[voice_name].append("")

    for voice_name, paths in voice_lists.items():
        create_csv(paths, output_dir / f"{voice_name}.csv", loudness=True)

    return output_dir


================================================
FILE: paddlespeech/audiotools/data/__init__.py
================================================
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import datasets
from . import preprocess
from . import transforms


================================================
FILE: paddlespeech/audiotools/data/datasets.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/data/datasets.py)
from pathlib import Path
from typing import Callable
from typing import Dict
from typing import List
from typing import Union

import numpy as np
import paddle
from paddle.io import DistributedBatchSampler
from paddle.io import SequenceSampler

from ..core import AudioSignal
from ..core import util

__all__ = [
    "AudioLoader", "AudioDataset", "ConcatDataset",
    "ResumableDistributedSampler", "ResumableSequentialSampler"
]


class AudioLoader:
    """Loads audio endlessly from a list of audio sources
    containing paths to audio files. Audio sources can be
    folders full of audio files (which are found via file
    extension) or by providing a CSV file which contains paths
    to audio files.

    Parameters
    ----------
    sources : List[str], optional
        Sources containing folders, or CSVs with
        paths to audio files, by default None
    weights : List[float], optional
        Weights to sample audio files from each source, by default None
    relative_path : str, optional
        Path audio should be loaded relative to, by default ""
    transform : Callable, optional
        Transform to instantiate alongside audio sample,
        by default None
    ext : List[str]
        List of extensions to find audio within each source by. Can
        also be a file name (e.g. "vocals.wav"). by default
        ``['.wav', '.flac', '.mp3', '.mp4']``.
    shuffle: bool
        Whether to shuffle the files within the dataloader. Defaults to True.
    shuffle_state: int
        State to use to seed the shuffle of the files.
    """

    def __init__(
            self,
            sources: List[str]=None,
            weights: List[float]=None,
            transform: Callable=None,
            relative_path: str="",
            ext: List[str]=util.AUDIO_EXTENSIONS,
            shuffle: bool=True,
            shuffle_state: int=0, ):
        self.audio_lists = util.read_sources(
            sources, relative_path=relative_path, ext=ext)

        self.audio_indices = [(src_idx, item_idx)
                              for src_idx, src in enumerate(self.audio_lists)
                              for item_idx in range(len(src))]
        if shuffle:
            state = util.random_state(shuffle_state)
            state.shuffle(self.audio_indices)

        self.sources = sources
        self.weights = weights
        self.transform = transform

    def __call__(
            self,
            state,
            sample_rate: int,
            duration: float,
            loudness_cutoff: float=-40,
            num_channels: int=1,
            offset: float=None,
            source_idx: int=None,
            item_idx: int=None,
            global_idx: int=None, ):
        if source_idx is not None and item_idx is not None:
            try:
                audio_info = self.audio_lists[source_idx][item_idx]
            except:
                audio_info = {"path": "none"}
        elif global_idx is not None:
            source_idx, item_idx = self.audio_indices[global_idx %
                                                      len(self.audio_indices)]
            audio_info = self.audio_lists[source_idx][item_idx]
        else:
            audio_info, source_idx, item_idx = util.choose_from_list_of_lists(
                state, self.audio_lists, p=self.weights)

        path = audio_info["path"]
        signal = AudioSignal.zeros(duration, sample_rate, num_channels)

        if path != "none":
            if offset is None:
                signal = AudioSignal.salient_excerpt(
                    path,
                    duration=duration,
                    state=state,
                    loudness_cutoff=loudness_cutoff, )
            else:
                signal = AudioSignal(
                    path,
                    offset=offset,
                    duration=duration, )

        if num_channels == 1:
            signal = signal.to_mono()
        signal = signal.resample(sample_rate)

        if signal.duration < duration:
            signal = signal.zero_pad_to(int(duration * sample_rate))

        for k, v in audio_info.items():
            signal.metadata[k] = v

        item = {
            "signal": signal,
            "source_idx": source_idx,
            "item_idx": item_idx,
            "source": str(self.sources[source_idx]),
            "path": str(path),
        }
        if self.transform is not None:
            item["transform_args"] = self.transform.instantiate(
                state, signal=signal)
        return item


def default_matcher(x, y):
    return Path(x).parent == Path(y).parent


def align_lists(lists, matcher: Callable=default_matcher):
    longest_list = lists[np.argmax([len(l) for l in lists])]
    for i, x in enumerate(longest_list):
        for l in lists:
            if i >= len(l):
                l.append({"path": "none"})
            elif not matcher(l[i]["path"], x["path"]):
                l.insert(i, {"path": "none"})
    return lists


class AudioDataset:
    """Loads audio from multiple loaders (with associated transforms)
    for a specified number of samples. Excerpts are drawn randomly
    of the specified duration, above a specified loudness threshold
    and are resampled on the fly to the desired sample rate
    (if it is different from the audio source sample rate).

    This takes either a single AudioLoader object,
    a dictionary of AudioLoader objects, or a dictionary of AudioLoader
    objects. Each AudioLoader is called by the dataset, and the
    result is placed in the output dictionary. A transform can also be
    specified for the entire dataset, rather than for each specific
    loader. This transform can be applied to the output of all the
    loaders if desired.

    AudioLoader objects can be specified as aligned, which means the
    loaders correspond to multitrack audio (e.g. a vocals, bass,
    drums, and other loader for multitrack music mixtures).


    Parameters
    ----------
    loaders : Union[AudioLoader, List[AudioLoader], Dict[str, AudioLoader]]
        AudioLoaders to sample audio from.
    sample_rate : int
        Desired sample rate.
    n_examples : int, optional
        Number of examples (length of dataset), by default 1000
    duration : float, optional
        Duration of audio samples, by default 0.5
    loudness_cutoff : float, optional
        Loudness cutoff threshold for audio samples, by default -40
    num_channels : int, optional
        Number of channels in output audio, by default 1
    transform : Callable, optional
        Transform to instantiate alongside each dataset item, by default None
    aligned : bool, optional
        Whether the loaders should be sampled in an aligned manner (e.g. same
        offset, duration, and matched file name), by default False
    shuffle_loaders : bool, optional
        Whether to shuffle the loaders before sampling from them, by default False
    matcher : Callable
        How to match files from adjacent audio lists (e.g. for a multitrack audio loader),
        by default uses the parent directory of each file.
    without_replacement : bool
        Whether to choose files with or without replacement, by default True.


    Examples
    --------
    >>> from paddlespeech.audiotools.data.datasets import AudioLoader
    >>> from paddlespeech.audiotools.data.datasets import AudioDataset
    >>> from paddlespeech.audiotools import transforms as tfm
    >>> import numpy as np
    >>>
    >>> loaders = [
    >>>     AudioLoader(
    >>>         sources=[f"tests/audiotools/audio/spk"],
    >>>         transform=tfm.Equalizer(),
    >>>         ext=["wav"],
    >>>     )
    >>>     for i in range(5)
    >>> ]
    >>>
    >>> dataset = AudioDataset(
    >>>     loaders = loaders,
    >>>     sample_rate = 44100,
    >>>     duration = 1.0,
    >>>     transform = tfm.RescaleAudio(),
    >>> )
    >>>
    >>> item = dataset[np.random.randint(len(dataset))]
    >>>
    >>> for i in range(len(loaders)):
    >>>     item[i]["signal"] = loaders[i].transform(
    >>>         item[i]["signal"], **item[i]["transform_args"]
    >>>     )
    >>>     item[i]["signal"].widget(i)
    >>>
    >>> mix = sum([item[i]["signal"] for i in range(len(loaders))])
    >>> mix = dataset.transform(mix, **item["transform_args"])
    >>> mix.widget("mix")

    Below is an example of how one could load MUSDB multitrack data:

    >>> from paddlespeech import audiotools as at
    >>> from pathlib import Path
    >>> from paddlespeech.audiotools import transforms as tfm
    >>> import numpy as np
    >>> import torch
    >>>
    >>> def build_dataset(
    >>>     sample_rate: int = 44100,
    >>>     duration: float = 5.0,
    >>>     musdb_path: str = "~/.data/musdb/",
    >>> ):
    >>>     musdb_path = Path(musdb_path).expanduser()
    >>>     loaders = {
    >>>         src: at.datasets.AudioLoader(
    >>>             sources=[musdb_path],
    >>>             transform=tfm.Compose(
    >>>                 tfm.VolumeNorm(("uniform", -20, -10)),
    >>>                 tfm.Silence(prob=0.1),
    >>>             ),
    >>>             ext=[f"{src}.wav"],
    >>>         )
    >>>         for src in ["vocals", "bass", "drums", "other"]
    >>>     }
    >>>
    >>>     dataset = at.datasets.AudioDataset(
    >>>         loaders=loaders,
    >>>         sample_rate=sample_rate,
    >>>         duration=duration,
    >>>         num_channels=1,
    >>>         aligned=True,
    >>>         transform=tfm.RescaleAudio(),
    >>>         shuffle_loaders=True,
    >>>     )
    >>>     return dataset, list(loaders.keys())
    >>>
    >>> train_data, sources = build_dataset()
    >>> dataloader = torch.utils.data.DataLoader(
    >>>     train_data,
    >>>     batch_size=16,
    >>>     num_workers=0,
    >>>     collate_fn=train_data.collate,
    >>> )
    >>> batch = next(iter(dataloader))
    >>>
    >>> for k in sources:
    >>>     src = batch[k]
    >>>     src["transformed"] = train_data.loaders[k].transform(
    >>>         src["signal"].clone(), **src["transform_args"]
    >>>     )
    >>>
    >>> mixture = sum(batch[k]["transformed"] for k in sources)
    >>> mixture = train_data.transform(mixture, **batch["transform_args"])
    >>>
    >>> # Say a model takes the mix and gives back (n_batch, n_src, n_time).
    >>> # Construct the targets:
    >>> targets = at.AudioSignal.batch([batch[k]["transformed"] for k in sources], dim=1)

    Similarly, here's example code for loading Slakh data:

    >>> from paddlespeech import audiotools as at
    >>> from pathlib import Path
    >>> from paddlespeech.audiotools import transforms as tfm
    >>> import numpy as np
    >>> import torch
    >>> import glob
    >>>
    >>> def build_dataset(
    >>>     sample_rate: int = 16000,
    >>>     duration: float = 10.0,
    >>>     slakh_path: str = "~/.data/slakh/",
    >>> ):
    >>>     slakh_path = Path(slakh_path).expanduser()
    >>>
    >>>     # Find the max number of sources in Slakh
    >>>     src_names = [x.name for x in list(slakh_path.glob("**/*.wav"))  if "S" in str(x.name)]
    >>>     n_sources = len(list(set(src_names)))
    >>>
    >>>     loaders = {
    >>>         f"S{i:02d}": at.datasets.AudioLoader(
    >>>             sources=[slakh_path],
    >>>             transform=tfm.Compose(
    >>>                 tfm.VolumeNorm(("uniform", -20, -10)),
    >>>                 tfm.Silence(prob=0.1),
    >>>             ),
    >>>             ext=[f"S{i:02d}.wav"],
    >>>         )
    >>>         for i in range(n_sources)
    >>>     }
    >>>     dataset = at.datasets.AudioDataset(
    >>>         loaders=loaders,
    >>>         sample_rate=sample_rate,
    >>>         duration=duration,
    >>>         num_channels=1,
    >>>         aligned=True,
    >>>         transform=tfm.RescaleAudio(),
    >>>         shuffle_loaders=False,
    >>>     )
    >>>
    >>>     return dataset, list(loaders.keys())
    >>>
    >>> train_data, sources = build_dataset()
    >>> dataloader = torch.utils.data.DataLoader(
    >>>     train_data,
    >>>     batch_size=16,
    >>>     num_workers=0,
    >>>     collate_fn=train_data.collate,
    >>> )
    >>> batch = next(iter(dataloader))
    >>>
    >>> for k in sources:
    >>>     src = batch[k]
    >>>     src["transformed"] = train_data.loaders[k].transform(
    >>>         src["signal"].clone(), **src["transform_args"]
    >>>     )
    >>>
    >>> mixture = sum(batch[k]["transformed"] for k in sources)
    >>> mixture = train_data.transform(mixture, **batch["transform_args"])

    """

    def __init__(
            self,
            loaders: Union[AudioLoader, List[AudioLoader], Dict[str,
                                                                AudioLoader]],
            sample_rate: int,
            n_examples: int=1000,
            duration: float=0.5,
            offset: float=None,
            loudness_cutoff: float=-40,
            num_channels: int=1,
            transform: Callable=None,
            aligned: bool=False,
            shuffle_loaders: bool=False,
            matcher: Callable=default_matcher,
            without_replacement: bool=True, ):
        # Internally we convert loaders to a dictionary
        if isinstance(loaders, list):
            loaders = {i: l for i, l in enumerate(loaders)}
        elif isinstance(loaders, AudioLoader):
            loaders = {0: loaders}

        self.loaders = loaders
        self.loudness_cutoff = loudness_cutoff
        self.num_channels = num_channels

        self.length = n_examples
        self.transform = transform
        self.sample_rate = sample_rate
        self.duration = duration
        self.offset = offset
        self.aligned = aligned
        self.shuffle_loaders = shuffle_loaders
        self.without_replacement = without_replacement

        if aligned:
            loaders_list = list(loaders.values())
            for i in range(len(loaders_list[0].audio_lists)):
                input_lists = [l.audio_lists[i] for l in loaders_list]
                # Alignment happens in-place
                align_lists(input_lists, matcher)

    def __getitem__(self, idx):
        state = util.random_state(idx)
        offset = None if self.offset is None else self.offset
        item = {}

        keys = list(self.loaders.keys())
        if self.shuffle_loaders:
            state.shuffle(keys)

        loader_kwargs = {
            "state": state,
            "sample_rate": self.sample_rate,
            "duration": self.duration,
            "loudness_cutoff": self.loudness_cutoff,
            "num_channels": self.num_channels,
            "global_idx": idx if self.without_replacement else None,
        }

        # Draw item from first loader
        loader = self.loaders[keys[0]]
        item[keys[0]] = loader(**loader_kwargs)

        for key in keys[1:]:
            loader = self.loaders[key]
            if self.aligned:
                # Path mapper takes the current loader + everything
                # returned by the first loader.
                offset = item[keys[0]]["signal"].metadata["offset"]
                loader_kwargs.update({
                    "offset": offset,
                    "source_idx": item[keys[0]]["source_idx"],
                    "item_idx": item[keys[0]]["item_idx"],
                })
            item[key] = loader(**loader_kwargs)

        # Sort dictionary back into original order
        keys = list(self.loaders.keys())
        item = {k: item[k] for k in keys}

        item["idx"] = idx
        if self.transform is not None:
            item["transform_args"] = self.transform.instantiate(
                state=state, signal=item[keys[0]]["signal"])

        # If there's only one loader, pop it up
        # to the main dictionary, instead of keeping it
        # nested.
        if len(keys) == 1:
            item.update(item.pop(keys[0]))

        return item

    def __len__(self):
        return self.length

    @staticmethod
    def collate(list_of_dicts: Union[list, dict], n_splits: int=None):
        """Collates items drawn from this dataset. Uses
        :py:func:`audiotools.core.util.collate`.

        Parameters
        ----------
        list_of_dicts : typing.Union[list, dict]
            Data drawn from each item.
        n_splits : int
            Number of splits to make when creating the batches (split into
            sub-batches). Useful for things like gradient accumulation.

        Returns
        -------
        dict
            Dictionary of batched data.
        """
        return util.collate(list_of_dicts, n_splits=n_splits)


class ConcatDataset(AudioDataset):
    # 
    def __init__(self, datasets: list):
        self.datasets = datasets

    def __len__(self):
        return sum([len(d) for d in self.datasets])

    def __getitem__(self, idx):
        dataset = self.datasets[idx % len(self.datasets)]
        return dataset[idx // len(self.datasets)]


class ResumableDistributedSampler(DistributedBatchSampler):
    """Distributed sampler that can be resumed from a given start index."""

    def __init__(self,
                 dataset,
                 batch_size,
                 start_idx: int=None,
                 num_replicas=None,
                 rank=None,
                 shuffle=False,
                 drop_last=False):
        super().__init__(
            dataset=dataset,
            batch_size=batch_size,
            num_replicas=num_replicas,
            rank=rank,
            shuffle=shuffle,
            drop_last=drop_last, )
        # Start index, allows to resume an experiment at the index it was
        if start_idx is not None:
            self.start_idx = start_idx // self.num_replicas
        else:
            self.start_idx = 0
        # 重新计算样本总数，因为 DistributedBatchSampler 的 __len__ 方法是基于 shuffle 后的样本总数计算的
        self.total_size = len(self.dataset) if not shuffle else len(
            self.indices)

    def __iter__(self):
        # 由于 Paddle 的 DistributedBatchSampler 直接返回 batch，我们需要将其展开为单个索引
        indices_iter = iter(super().__iter__())
        # 跳过前面的 start_idx 个 batch
        for _ in range(self.start_idx):
            next(indices_iter)

        current_idx = 0
        while True:
            batch_indices = next(indices_iter, None)
            if batch_indices is None:
                break
            for idx in batch_indices:
                if current_idx >= self.start_idx * self.batch_size:  # 调整判断条件，确保从 start_idx 开始
                    yield idx
                current_idx += 1
        self.start_idx = 0  # set the index back to 0 so for the next epoch


class ResumableSequentialSampler(SequenceSampler):
    """Sequential sampler that can be resumed from a given start index."""

    def __init__(self, dataset, start_idx: int=None, **kwargs):
        super().__init__(dataset, **kwargs)
        # Start index, allows to resume an experiment at the index it was
        self.start_idx = start_idx if start_idx is not None else 0

    def __iter__(self):
        for i, idx in enumerate(super().__iter__()):
            if i >= self.start_idx:
                yield idx
        self.start_idx = 0  # set the index back to 0 so for the next epoch


================================================
FILE: paddlespeech/audiotools/data/preprocess.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/data/preprocess.py)
import csv
import os
from pathlib import Path

from tqdm import tqdm

from ..core import AudioSignal


def create_csv(audio_files: list,
               output_csv: Path,
               loudness: bool=False,
               data_path: str=None):
    """Converts a folder of audio files to a CSV file. If ``loudness = True``,
    the output of this function will create a CSV file that looks something
    like:

    ..  csv-table::
        :header: path,loudness

        daps/produced/f1_script1_produced.wav,-16.299999237060547
        daps/produced/f1_script2_produced.wav,-16.600000381469727
        daps/produced/f1_script3_produced.wav,-17.299999237060547
        daps/produced/f1_script4_produced.wav,-16.100000381469727
        daps/produced/f1_script5_produced.wav,-16.700000762939453
        daps/produced/f3_script1_produced.wav,-16.5

    ..  note::
        The paths above are written relative to the ``data_path`` argument
        which defaults to the environment variable ``PATH_TO_DATA`` if
        it isn't passed to this function, and defaults to the empty string
        if that environment variable is not set.

    You can produce a CSV file from a directory of audio files via:

    >>> from paddlespeech import audiotools
    >>> directory = ...
    >>> audio_files = audiotools.util.find_audio(directory)
    >>> output_path = "train.csv"
    >>> audiotools.data.preprocess.create_csv(
    >>>     audio_files, output_csv, loudness=True
    >>> )

    Note that you can create empty rows in the CSV file by passing an empty
    string or None in the ``audio_files`` list. This is useful if you want to
    sync multiple CSV files in a multitrack setting. The loudness of these
    empty rows will be set to -inf.

    Parameters
    ----------
    audio_files : list
        List of audio files.
    output_csv : Path
        Output CSV, with each row containing the relative path of every file
        to ``data_path``, if specified (defaults to None).
    loudness : bool
        Compute loudness of entire file and store alongside path.
    """

    info = []
    pbar = tqdm(audio_files)
    for af in pbar:
        af = Path(af)
        pbar.set_description(f"Processing {af.name}")
        _info = {}
        if af.name == "":
            _info["path"] = ""
            if loudness:
                _info["loudness"] = -float("inf")
        else:
            _info["path"] = af.relative_to(
                data_path) if data_path is not None else af
            if loudness:
                _info["loudness"] = AudioSignal(af).ffmpeg_loudness().item()

        info.append(_info)

    with open(output_csv, "w") as f:
        writer = csv.DictWriter(f, fieldnames=list(info[0].keys()))
        writer.writeheader()

        for item in info:
            writer.writerow(item)


================================================
FILE: paddlespeech/audiotools/data/transforms.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/data/transforms.py)
import copy
from contextlib import contextmanager
from inspect import signature
from typing import List

import numpy as np
import paddle
from flatten_dict import flatten
from flatten_dict import unflatten
from numpy.random import RandomState

from .. import ml
from ..core import AudioSignal
from ..core import util
from .datasets import AudioLoader
from paddlespeech.utils import satisfy_paddle_version

__all__ = [
    "Identity",
    "SpectralTransform",
    "Compose",
    "Choose",
    "Repeat",
    "RepeatUpTo",
    "ClippingDistortion",
    "Equalizer",
    "BackgroundNoise",
    "RoomImpulseResponse",
    "VolumeNorm",
    "GlobalVolumeNorm",
    "Silence",
    "LowPass",
    "HighPass",
    "FrequencyMask",
    "TimeMask",
    "Smoothing",
    "FrequencyNoise",
]


class BaseTransform:
    """This is the base class for all transforms that are implemented
    in this library. Transforms have two main operations: ``transform``
    and ``instantiate``.

    ``instantiate`` sets the parameters randomly
    from distribution tuples for each parameter. For example, for the
    ``BackgroundNoise`` transform, the signal-to-noise ratio (``snr``)
    is chosen randomly by instantiate. By default, it chosen uniformly
    between 10.0 and 30.0 (the tuple is set to ``("uniform", 10.0, 30.0)``).

    ``transform`` applies the transform using the instantiated parameters.
    A simple example is as follows:

    >>> seed = 0
    >>> signal = ...
    >>> transform = transforms.NoiseFloor(db = ("uniform", -50.0, -30.0))
    >>> kwargs = transform.instantiate()
    >>> output = transform(signal.clone(), **kwargs)

    By breaking apart the instantiation of parameters from the actual audio
    processing of the transform, we can make things more reproducible, while
    also applying the transform on batches of data efficiently on GPU,
    rather than on individual audio samples.

    ..  note::
        We call ``signal.clone()`` for the input to the ``transform`` function
        because signals are modified in-place! If you don't clone the signal,
        you will lose the original data.

    Parameters
    ----------
    keys : list, optional
        Keys that the transform looks for when
        calling ``self.transform``, by default []. In general this is
        set automatically, and you won't need to manipulate this argument.
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0

    Examples
    --------

    >>> seed = 0
    >>>
    >>> audio_path = "tests/audio/spk/f10_script4_produced.wav"
    >>> signal = AudioSignal(audio_path, offset=10, duration=2)
    >>> transform = tfm.Compose(
    >>>     [
    >>>         tfm.RoomImpulseResponse(sources=["tests/audio/irs.csv"]),
    >>>         tfm.BackgroundNoise(sources=["tests/audio/noises.csv"]),
    >>>     ],
    >>> )
    >>>
    >>> kwargs = transform.instantiate(seed, signal)
    >>> output = transform(signal, **kwargs)

    """

    def __init__(self, keys: list=[], name: str=None, prob: float=1.0):
        # Get keys from the _transform signature.
        tfm_keys = list(signature(self._transform).parameters.keys())

        # Filter out signal and kwargs keys.
        ignore_keys = ["signal", "kwargs"]
        tfm_keys = [k for k in tfm_keys if k not in ignore_keys]

        # Combine keys specified by the child class, the keys found in
        # _transform signature, and the mask key.
        self.keys = keys + tfm_keys + ["mask"]

        self.prob = prob

        if name is None:
            name = self.__class__.__name__
        self.name = name

    def _prepare(self, batch: dict):
        sub_batch = batch[self.name]

        for k in self.keys:
            assert k in sub_batch.keys(), f"{k} not in batch"

        return sub_batch

    def _transform(self, signal):
        return signal

    def _instantiate(self, state: RandomState, signal: AudioSignal=None):
        return {}

    @staticmethod
    def apply_mask(batch: dict, mask: paddle.Tensor):
        """Applies a mask to the batch.

        Parameters
        ----------
        batch : dict
            Batch whose values will be masked in the ``transform`` pass.
        mask : paddle.Tensor
            Mask to apply to batch.

        Returns
        -------
        dict
            A dictionary that contains values only where ``mask = True``.
        """
        # masked_batch = {k: v[mask] for k, v in flatten(batch).items()}
        masked_batch = {}
        for k, v in flatten(batch).items():
            # `v` may be `Tensor` or `AudioSignal`
            if 0 == len(v.shape) and 0 == mask.dim():
                if mask:  # 0d 的 True
                    masked_batch[k] = v.unsqueeze(0)
                else:
                    masked_batch[k] = paddle.to_tensor([], dtype=v.dtype)
            else:
                if not satisfy_paddle_version('2.6'):
                    if 0 == mask.dim() and bool(mask) and paddle.is_tensor(v):
                        masked_batch[k] = v.unsqueeze(0)
                    else:
                        masked_batch[k] = v[mask]
                else:
                    masked_batch[k] = v[mask]
        return unflatten(masked_batch)

    def transform(self, signal: AudioSignal, **kwargs):
        """Apply the transform to the audio signal,
        with given keyword arguments.

        Parameters
        ----------
        signal : AudioSignal
            Signal that will be modified by the transforms in-place.
        kwargs: dict
            Keyword arguments to the specific transforms ``self._transform``
            function.

        Returns
        -------
        AudioSignal
            Transformed AudioSignal.

        Examples
        --------

        >>> for seed in range(10):
        >>>     kwargs = transform.instantiate(seed, signal)
        >>>     output = transform(signal.clone(), **kwargs)

        """
        tfm_kwargs = self._prepare(kwargs)
        mask = tfm_kwargs["mask"]

        if paddle.any(mask):
            tfm_kwargs = self.apply_mask(tfm_kwargs, mask)
            tfm_kwargs = {k: v for k, v in tfm_kwargs.items() if k != "mask"}
            signal[mask] = self._transform(signal[mask], **tfm_kwargs)

        return signal

    def __call__(self, *args, **kwargs):
        return self.transform(*args, **kwargs)

    def instantiate(
            self,
            state: RandomState=None,
            signal: AudioSignal=None, ):
        """Instantiates parameters for the transform.

        Parameters
        ----------
        state : RandomState, optional
            _description_, by default None
        signal : AudioSignal, optional
            _description_, by default None

        Returns
        -------
        dict
            Dictionary containing instantiated arguments for every keyword
            argument to ``self._transform``.

        Examples
        --------

        >>> for seed in range(10):
        >>>     kwargs = transform.instantiate(seed, signal)
        >>>     output = transform(signal.clone(), **kwargs)

        """
        state = util.random_state(state)

        # Not all instantiates need the signal. Check if signal
        # is needed before passing it in, so that the end-user
        # doesn't need to have variables they're not using flowing
        # into their function.
        needs_signal = "signal" in set(
            signature(self._instantiate).parameters.keys())
        kwargs = {}
        if needs_signal:
            kwargs = {"signal": signal}

        # Instantiate the parameters for the transform.
        params = self._instantiate(state, **kwargs)
        for k in list(params.keys()):
            v = params[k]
            if isinstance(v, (AudioSignal, paddle.Tensor, dict)):
                params[k] = v
            else:
                params[k] = paddle.to_tensor(v)
        mask = state.rand() <= self.prob
        params[f"mask"] = paddle.to_tensor(mask)

        # Put the params into a nested dictionary that will be
        # used later when calling the transform. This is to avoid
        # collisions in the dictionary.
        params = {self.name: params}

        return params

    def batch_instantiate(
            self,
            states: list=None,
            signal: AudioSignal=None, ):
        """Instantiates arguments for every item in a batch,
        given a list of states. Each state in the list
        corresponds to one item in the batch.

        Parameters
        ----------
        states : list, optional
            List of states, by default None
        signal : AudioSignal, optional
            AudioSignal to pass to the ``self.instantiate`` section
            if it is needed for this transform, by default None

        Returns
        -------
        dict
            Collated dictionary of arguments.

        Examples
        --------

        >>> batch_size = 4
        >>> signal = AudioSignal(audio_path, offset=10, duration=2)
        >>> signal_batch = AudioSignal.batch([signal.clone() for _ in range(batch_size)])
        >>>
        >>> states = [seed + idx for idx in list(range(batch_size))]
        >>> kwargs = transform.batch_instantiate(states, signal_batch)
        >>> batch_output = transform(signal_batch, **kwargs)
        """
        kwargs = []
        for state in states:
            kwargs.append(self.instantiate(state, signal))
        kwargs = util.collate(kwargs)
        return kwargs


class Identity(BaseTransform):
    """This transform just returns the original signal."""

    pass


class SpectralTransform(BaseTransform):
    """Spectral transforms require STFT data to exist, since manipulations
    of the STFT require the spectrogram. This just calls ``stft`` before
    the transform is called, and calls ``istft`` after the transform is
    called so that the audio data is written to after the spectral
    manipulation.
    """

    def transform(self, signal, **kwargs):
        signal.stft()
        super().transform(signal, **kwargs)
        signal.istft()
        return signal


class Compose(BaseTransform):
    """Compose applies transforms in sequence, one after the other. The
    transforms are passed in as positional arguments or as a list like so:

    >>> transform = tfm.Compose(
    >>>     [
    >>>         tfm.RoomImpulseResponse(sources=["tests/audio/irs.csv"]),
    >>>         tfm.BackgroundNoise(sources=["tests/audio/noises.csv"]),
    >>>     ],
    >>> )

    This will convolve the signal with a room impulse response, and then
    add background noise to the signal. Instantiate instantiates
    all the parameters for every transform in the transform list so the
    interface for using the Compose transform is the same as everything
    else:

    >>> kwargs = transform.instantiate()
    >>> output = transform(signal.clone(), **kwargs)

    Under the hood, the transform maps each transform to a unique name
    under the hood of the form ``{position}.{name}``, where ``position``
    is the index of the transform in the list. ``Compose`` can nest
    within other ``Compose`` transforms, like so:

    >>> preprocess = transforms.Compose(
    >>>     tfm.GlobalVolumeNorm(),
    >>>     tfm.CrossTalk(),
    >>>     name="preprocess",
    >>> )
    >>> augment = transforms.Compose(
    >>>     tfm.RoomImpulseResponse(),
    >>>     tfm.BackgroundNoise(),
    >>>     name="augment",
    >>> )
    >>> postprocess = transforms.Compose(
    >>>     tfm.VolumeChange(),
    >>>     tfm.RescaleAudio(),
    >>>     tfm.ShiftPhase(),
    >>>     name="postprocess",
    >>> )
    >>> transform = transforms.Compose(preprocess, augment, postprocess),

    This defines 3 composed transforms, and then composes them in sequence
    with one another.

    Parameters
    ----------
    *transforms : list
        List of transforms to apply
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(self, *transforms: list, name: str=None, prob: float=1.0):
        if isinstance(transforms[0], list):
            transforms = transforms[0]

        for i, tfm in enumerate(transforms):
            tfm.name = f"{i}.{tfm.name}"

        keys = [tfm.name for tfm in transforms]
        super().__init__(keys=keys, name=name, prob=prob)

        self.transforms = transforms
        self.transforms_to_apply = keys

    @contextmanager
    def filter(self, *names: list):
        """This can be used to skip transforms entirely when applying
        the sequence of transforms to a signal. For example, take
        the following transforms with the names ``preprocess, augment, postprocess``.

        >>> preprocess = transforms.Compose(
        >>>     tfm.GlobalVolumeNorm(),
        >>>     tfm.CrossTalk(),
        >>>     name="preprocess",
        >>> )
        >>> augment = transforms.Compose(
        >>>     tfm.RoomImpulseResponse(),
        >>>     tfm.BackgroundNoise(),
        >>>     name="augment",
        >>> )
        >>> postprocess = transforms.Compose(
        >>>     tfm.VolumeChange(),
        >>>     tfm.RescaleAudio(),
        >>>     tfm.ShiftPhase(),
        >>>     name="postprocess",
        >>> )
        >>> transform = transforms.Compose(preprocess, augment, postprocess)

        If we wanted to apply all 3 to a signal, we do:

        >>> kwargs = transform.instantiate()
        >>> output = transform(signal.clone(), **kwargs)

        But if we only wanted to apply the ``preprocess`` and ``postprocess``
        transforms to the signal, we do:

        >>> with transform_fn.filter("preprocess", "postprocess"):
        >>>     output = transform(signal.clone(), **kwargs)

        Parameters
        ----------
        *names : list
            List of transforms, identified by name, to apply to signal.
        """
        old_transforms = self.transforms_to_apply
        self.transforms_to_apply = names
        yield
        self.transforms_to_apply = old_transforms

    def _transform(self, signal, **kwargs):
        for transform in self.transforms:
            if any([x in transform.name for x in self.transforms_to_apply]):
                signal = transform(signal, **kwargs)
        return signal

    def _instantiate(self, state: RandomState, signal: AudioSignal=None):
        parameters = {}
        for transform in self.transforms:
            parameters.update(transform.instantiate(state, signal=signal))
        return parameters

    def __getitem__(self, idx):
        return self.transforms[idx]

    def __len__(self):
        return len(self.transforms)

    def __iter__(self):
        for transform in self.transforms:
            yield transform


class Choose(Compose):
    """Choose logic is the same as :py:func:`audiotools.data.transforms.Compose`,
    but instead of applying all the transforms in sequence, it applies just a single transform,
    which is chosen for each item in the batch.

    Parameters
    ----------
    *transforms : list
        List of transforms to apply
    weights : list
        Probability of choosing any specific transform.
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0

    Examples
    --------

    >>> transforms.Choose(tfm.LowPass(), tfm.HighPass())
    """

    def __init__(
            self,
            *transforms: list,
            weights: list=None,
            name: str=None,
            prob: float=1.0, ):
        super().__init__(*transforms, name=name, prob=prob)

        if weights is None:
            _len = len(self.transforms)
            weights = [1 / _len for _ in range(_len)]
        self.weights = np.array(weights)

    def _instantiate(self, state: RandomState, signal: AudioSignal=None):
        kwargs = super()._instantiate(state, signal)
        tfm_idx = list(range(len(self.transforms)))
        tfm_idx = state.choice(tfm_idx, p=self.weights)
        one_hot = []
        for i, t in enumerate(self.transforms):
            mask = kwargs[t.name]["mask"]
            if mask.item():
                kwargs[t.name]["mask"] = paddle.to_tensor(i == tfm_idx)
            one_hot.append(kwargs[t.name]["mask"])
        kwargs["one_hot"] = one_hot
        return kwargs


class Repeat(Compose):
    """Repeatedly applies a given transform ``n_repeat`` times."

    Parameters
    ----------
    transform : BaseTransform
        Transform to repeat.
    n_repeat : int, optional
        Number of times to repeat transform, by default 1
    """

    def __init__(
            self,
            transform,
            n_repeat: int=1,
            name: str=None,
            prob: float=1.0, ):
        transforms = [copy.copy(transform) for _ in range(n_repeat)]
        super().__init__(transforms, name=name, prob=prob)

        self.n_repeat = n_repeat


class RepeatUpTo(Choose):
    """Repeatedly applies a given transform up to ``max_repeat`` times."

    Parameters
    ----------
    transform : BaseTransform
        Transform to repeat.
    max_repeat : int, optional
        Max number of times to repeat transform, by default 1
    weights : list
        Probability of choosing any specific number up to ``max_repeat``.
    """

    def __init__(
            self,
            transform,
            max_repeat: int=5,
            weights: list=None,
            name: str=None,
            prob: float=1.0, ):
        transforms = []
        for n in range(1, max_repeat):
            transforms.append(Repeat(transform, n_repeat=n))
        super().__init__(transforms, name=name, prob=prob, weights=weights)

        self.max_repeat = max_repeat


class ClippingDistortion(BaseTransform):
    """Adds clipping distortion to signal. Corresponds
    to :py:func:`audiotools.core.effects.EffectMixin.clip_distortion`.

    Parameters
    ----------
    perc : tuple, optional
        Clipping percentile. Values are between 0.0 to 1.0.
        Typical values are 0.1 or below, by default ("uniform", 0.0, 0.1)
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(
            self,
            perc: tuple=("uniform", 0.0, 0.1),
            name: str=None,
            prob: float=1.0, ):
        super().__init__(name=name, prob=prob)

        self.perc = perc

    def _instantiate(self, state: RandomState):
        return {"perc": util.sample_from_dist(self.perc, state)}

    def _transform(self, signal, perc):
        return signal.clip_distortion(perc)


class Equalizer(BaseTransform):
    """Applies an equalization curve to the audio signal. Corresponds
    to :py:func:`audiotools.core.effects.EffectMixin.equalizer`.

    Parameters
    ----------
    eq_amount : tuple, optional
        The maximum dB cut to apply to the audio in any band,
        by default ("const", 1.0 dB)
    n_bands : int, optional
        Number of bands in EQ, by default 6
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(
            self,
            eq_amount: tuple=("const", 1.0),
            n_bands: int=6,
            name: str=None,
            prob: float=1.0, ):
        super().__init__(name=name, prob=prob)

        self.eq_amount = eq_amount
        self.n_bands = n_bands

    def _instantiate(self, state: RandomState):
        eq_amount = util.sample_from_dist(self.eq_amount, state)
        eq = -eq_amount * state.rand(self.n_bands)
        return {"eq": eq}

    def _transform(self, signal, eq):
        return signal.equalizer(eq)


class BackgroundNoise(BaseTransform):
    """Adds background noise from audio specified by a set of CSV files.
    A valid CSV file looks like, and is typically generated by
    :py:func:`audiotools.data.preprocess.create_csv`:

    ..  csv-table::
        :header: path

        room_tone/m6_script2_clean.wav
        room_tone/m6_script2_cleanraw.wav
        room_tone/m6_script2_ipad_balcony1.wav
        room_tone/m6_script2_ipad_bedroom1.wav
        room_tone/m6_script2_ipad_confroom1.wav
        room_tone/m6_script2_ipad_confroom2.wav
        room_tone/m6_script2_ipad_livingroom1.wav
        room_tone/m6_script2_ipad_office1.wav

    ..  note::
        All paths are relative to an environment variable called ``PATH_TO_DATA``,
        so that CSV files are portable across machines where data may be
        located in different places.

    This transform calls :py:func:`audiotools.core.effects.EffectMixin.mix`
    and :py:func:`audiotools.core.effects.EffectMixin.equalizer` under the
    hood.

    Parameters
    ----------
    snr : tuple, optional
        Signal-to-noise ratio, by default ("uniform", 10.0, 30.0)
    sources : List[str], optional
        Sources containing folders, or CSVs with paths to audio files,
        by default None
    weights : List[float], optional
        Weights to sample audio files from each source, by default None
    eq_amount : tuple, optional
        Amount of equalization to apply, by default ("const", 1.0)
    n_bands : int, optional
        Number of bands in equalizer, by default 3
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    loudness_cutoff : float, optional
        Loudness cutoff when loading from audio files, by default None
    """

    def __init__(
            self,
            snr: tuple=("uniform", 10.0, 30.0),
            sources: List[str]=None,
            weights: List[float]=None,
            eq_amount: tuple=("const", 1.0),
            n_bands: int=3,
            name: str=None,
            prob: float=1.0,
            loudness_cutoff: float=None, ):
        super().__init__(name=name, prob=prob)

        self.snr = snr
        self.eq_amount = eq_amount
        self.n_bands = n_bands
        self.loader = AudioLoader(sources, weights)
        self.loudness_cutoff = loudness_cutoff

    def _instantiate(self, state: RandomState, signal: AudioSignal):
        eq_amount = util.sample_from_dist(self.eq_amount, state)
        eq = -eq_amount * state.rand(self.n_bands)
        snr = util.sample_from_dist(self.snr, state)

        bg_signal = self.loader(
            state,
            signal.sample_rate,
            duration=signal.signal_duration,
            loudness_cutoff=self.loudness_cutoff,
            num_channels=signal.num_channels, )["signal"]

        return {"eq": eq, "bg_signal": bg_signal, "snr": snr}

    def _transform(self, signal, bg_signal, snr, eq):
        # Clone bg_signal so that transform can be repeatedly applied
        # to different signals with the same effect.
        return signal.mix(bg_signal.clone(), snr, eq)


class RoomImpulseResponse(BaseTransform):
    """Convolves signal with a room impulse response, at a specified
    direct-to-reverberant ratio, with equalization applied. Room impulse
    response data is drawn from a CSV file that was produced via
    :py:func:`audiotools.data.preprocess.create_csv`.

    This transform calls :py:func:`audiotools.core.effects.EffectMixin.apply_ir`
    under the hood.

    Parameters
    ----------
    drr : tuple, optional
        _description_, by default ("uniform", 0.0, 30.0)
    sources : List[str], optional
        Sources containing folders, or CSVs with paths to audio files,
        by default None
    weights : List[float], optional
        Weights to sample audio files from each source, by default None
    eq_amount : tuple, optional
        Amount of equalization to apply, by default ("const", 1.0)
    n_bands : int, optional
        Number of bands in equalizer, by default 6
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    use_original_phase : bool, optional
        Whether or not to use the original phase, by default False
    offset : float, optional
        Offset from each impulse response file to use, by default 0.0
    duration : float, optional
        Duration of each impulse response, by default 1.0
    """

    def __init__(
            self,
            drr: tuple=("uniform", 0.0, 30.0),
            sources: List[str]=None,
            weights: List[float]=None,
            eq_amount: tuple=("const", 1.0),
            n_bands: int=6,
            name: str=None,
            prob: float=1.0,
            use_original_phase: bool=False,
            offset: float=0.0,
            duration: float=1.0, ):
        super().__init__(name=name, prob=prob)

        self.drr = drr
        self.eq_amount = eq_amount
        self.n_bands = n_bands
        self.use_original_phase = use_original_phase

        self.loader = AudioLoader(sources, weights)
        self.offset = offset
        self.duration = duration

    def _instantiate(self, state: RandomState, signal: AudioSignal=None):
        eq_amount = util.sample_from_dist(self.eq_amount, state)
        eq = -eq_amount * state.rand(self.n_bands)
        drr = util.sample_from_dist(self.drr, state)

        ir_signal = self.loader(
            state,
            signal.sample_rate,
            offset=self.offset,
            duration=self.duration,
            loudness_cutoff=None,
            num_channels=signal.num_channels, )["signal"]
        ir_signal.zero_pad_to(signal.sample_rate)

        return {"eq": eq, "ir_signal": ir_signal, "drr": drr}

    def _transform(self, signal, ir_signal, drr, eq):
        # Clone ir_signal so that transform can be repeatedly applied
        # to different signals with the same effect.
        return signal.apply_ir(
            ir_signal.clone(),
            drr,
            eq,
            use_original_phase=self.use_original_phase)


class VolumeNorm(BaseTransform):
    """Normalizes the volume of the excerpt to a specified decibel.

    Uses :py:func:`audiotools.core.effects.EffectMixin.normalize`.

    Parameters
    ----------
    db : tuple, optional
        dB to normalize signal to, by default ("const", -24)
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(
            self,
            db: tuple=("const", -24),
            name: str=None,
            prob: float=1.0, ):
        super().__init__(name=name, prob=prob)

        self.db = db

    def _instantiate(self, state: RandomState):
        return {"db": util.sample_from_dist(self.db, state)}

    def _transform(self, signal, db):
        return signal.normalize(db)


class GlobalVolumeNorm(BaseTransform):
    """Similar to :py:func:`audiotools.data.transforms.VolumeNorm`, this
    transform also normalizes the volume of a signal, but it uses
    the volume of the entire audio file the loaded excerpt comes from,
    rather than the volume of just the excerpt. The volume of the
    entire audio file is expected in ``signal.metadata["loudness"]``.
    If loading audio from a CSV generated by :py:func:`audiotools.data.preprocess.create_csv`
    with ``loudness = True``, like the following:

    ..  csv-table::
        :header: path,loudness

        daps/produced/f1_script1_produced.wav,-16.299999237060547
        daps/produced/f1_script2_produced.wav,-16.600000381469727
        daps/produced/f1_script3_produced.wav,-17.299999237060547
        daps/produced/f1_script4_produced.wav,-16.100000381469727
        daps/produced/f1_script5_produced.wav,-16.700000762939453
        daps/produced/f3_script1_produced.wav,-16.5

    The ``AudioLoader`` will automatically load the loudness column into
    the metadata of the signal.

    Uses :py:func:`audiotools.core.effects.EffectMixin.volume_change`.

    Parameters
    ----------
    db : tuple, optional
        dB to normalize signal to, by default ("const", -24)
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(
            self,
            db: tuple=("const", -24),
            name: str=None,
            prob: float=1.0, ):
        super().__init__(name=name, prob=prob)

        self.db = db

    def _instantiate(self, state: RandomState, signal: AudioSignal):
        if "loudness" not in signal.metadata:
            db_change = 0.0
        elif float(signal.metadata["loudness"]) == float("-inf"):
            db_change = 0.0
        else:
            db = util.sample_from_dist(self.db, state)
            db_change = db - float(signal.metadata["loudness"])

        return {"db": db_change}

    def _transform(self, signal, db):
        return signal.volume_change(db)


class Silence(BaseTransform):
    """Zeros out the signal with some probability.

    Parameters
    ----------
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 0.1
    """

    def __init__(self, name: str=None, prob: float=0.1):
        super().__init__(name=name, prob=prob)

    def _transform(self, signal):
        _loudness = signal._loudness
        signal = AudioSignal(
            paddle.zeros_like(signal.audio_data),
            sample_rate=signal.sample_rate,
            stft_params=signal.stft_params, )
        # So that the amound of noise added is as if it wasn't silenced.
        # TODO: improve this hack
        signal._loudness = _loudness

        return signal


class LowPass(BaseTransform):
    """Applies a LowPass filter.

    Uses :py:func:`audiotools.core.dsp.DSPMixin.low_pass`.

    Parameters
    ----------
    cutoff : tuple, optional
        Cutoff frequency distribution,
        by default ``("choice", [4000, 8000, 16000])``
    zeros : int, optional
        Number of zero-crossings in filter, argument to
        ``julius.LowPassFilters``, by default 51
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(
            self,
            cutoff: tuple=("choice", [4000, 8000, 16000]),
            zeros: int=51,
            name: str=None,
            prob: float=1, ):
        super().__init__(name=name, prob=prob)

        self.cutoff = cutoff
        self.zeros = zeros

    def _instantiate(self, state: RandomState):
        return {"cutoff": util.sample_from_dist(self.cutoff, state)}

    def _transform(self, signal, cutoff):
        return signal.low_pass(cutoff, zeros=self.zeros)


class HighPass(BaseTransform):
    """Applies a HighPass filter.

    Uses :py:func:`audiotools.core.dsp.DSPMixin.high_pass`.

    Parameters
    ----------
    cutoff : tuple, optional
        Cutoff frequency distribution,
        by default ``("choice", [50, 100, 250, 500, 1000])``
    zeros : int, optional
        Number of zero-crossings in filter, argument to
        ``julius.LowPassFilters``, by default 51
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(
            self,
            cutoff: tuple=("choice", [50, 100, 250, 500, 1000]),
            zeros: int=51,
            name: str=None,
            prob: float=1, ):
        super().__init__(name=name, prob=prob)

        self.cutoff = cutoff
        self.zeros = zeros

    def _instantiate(self, state: RandomState):
        return {"cutoff": util.sample_from_dist(self.cutoff, state)}

    def _transform(self, signal, cutoff):
        return signal.high_pass(cutoff, zeros=self.zeros)


class FrequencyMask(SpectralTransform):
    """Masks a band of frequencies at a center frequency
    from the audio.

    Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_frequencies`.

    Parameters
    ----------
    f_center : tuple, optional
        Center frequency between 0.0 and 1.0 (Nyquist), by default ("uniform", 0.0, 1.0)
    f_width : tuple, optional
        Width of zero'd out band, by default ("const", 0.1)
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(
            self,
            f_center: tuple=("uniform", 0.0, 1.0),
            f_width: tuple=("const", 0.1),
            name: str=None,
            prob: float=1, ):
        super().__init__(name=name, prob=prob)
        self.f_center = f_center
        self.f_width = f_width

    def _instantiate(self, state: RandomState, signal: AudioSignal):
        f_center = util.sample_from_dist(self.f_center, state)
        f_width = util.sample_from_dist(self.f_width, state)

        fmin = max(f_center - (f_width / 2), 0.0)
        fmax = min(f_center + (f_width / 2), 1.0)

        fmin_hz = (signal.sample_rate / 2) * fmin
        fmax_hz = (signal.sample_rate / 2) * fmax

        return {"fmin_hz": fmin_hz, "fmax_hz": fmax_hz}

    def _transform(self, signal, fmin_hz: float, fmax_hz: float):
        return signal.mask_frequencies(fmin_hz=fmin_hz, fmax_hz=fmax_hz)


class TimeMask(SpectralTransform):
    """Masks out contiguous time-steps from signal.

    Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_timesteps`.

    Parameters
    ----------
    t_center : tuple, optional
        Center time in terms of 0.0 and 1.0 (duration of signal),
        by default ("uniform", 0.0, 1.0)
    t_width : tuple, optional
        Width of dropped out portion, by default ("const", 0.025)
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(
            self,
            t_center: tuple=("uniform", 0.0, 1.0),
            t_width: tuple=("const", 0.025),
            name: str=None,
            prob: float=1, ):
        super().__init__(name=name, prob=prob)
        self.t_center = t_center
        self.t_width = t_width

    def _instantiate(self, state: RandomState, signal: AudioSignal):
        t_center = util.sample_from_dist(self.t_center, state)
        t_width = util.sample_from_dist(self.t_width, state)

        tmin = max(t_center - (t_width / 2), 0.0)
        tmax = min(t_center + (t_width / 2), 1.0)

        tmin_s = signal.signal_duration * tmin
        tmax_s = signal.signal_duration * tmax
        return {"tmin_s": tmin_s, "tmax_s": tmax_s}

    def _transform(self, signal, tmin_s: float, tmax_s: float):
        return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s)


class Smoothing(BaseTransform):
    """Convolves the signal with a smoothing window.

    Uses :py:func:`audiotools.core.effects.EffectMixin.convolve`.

    Parameters
    ----------
    window_type : tuple, optional
        Type of window to use, by default ("const", "average")
    window_length : tuple, optional
        Length of smoothing window, by
        default ("choice", [8, 16, 32, 64, 128, 256, 512])
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(
            self,
            window_type: tuple=("const", "average"),
            window_length: tuple=("choice", [8, 16, 32, 64, 128, 256, 512]),
            name: str=None,
            prob: float=1, ):
        super().__init__(name=name, prob=prob)
        self.window_type = window_type
        self.window_length = window_length

    def _instantiate(self, state: RandomState, signal: AudioSignal=None):
        window_type = util.sample_from_dist(self.window_type, state)
        window_length = util.sample_from_dist(self.window_length, state)
        window = signal.get_window(
            window_type=window_type, window_length=window_length, device="cpu")
        return {"window": AudioSignal(window, signal.sample_rate)}

    def _transform(self, signal, window):
        sscale = signal.audio_data.abs().max(axis=-1, keepdim=True)
        sscale[sscale == 0.0] = 1.0

        out = signal.convolve(window)

        oscale = out.audio_data.abs().max(axis=-1, keepdim=True)
        oscale[oscale == 0.0] = 1.0

        out = out * (sscale / oscale)
        return out


class FrequencyNoise(FrequencyMask):
    """Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but
    replaces with noise instead of zeros.

    Parameters
    ----------
    f_center : tuple, optional
        Center frequency between 0.0 and 1.0 (Nyquist), by default ("uniform", 0.0, 1.0)
    f_width : tuple, optional
        Width of zero'd out band, by default ("const", 0.1)
    name : str, optional
        Name of this transform, used to identify it in the dictionary
        produced by ``self.instantiate``, by default None
    prob : float, optional
        Probability of applying this transform, by default 1.0
    """

    def __init__(
            self,
            f_center: tuple=("uniform", 0.0, 1.0),
            f_width: tuple=("const", 0.1),
            name: str=None,
            prob: float=1, ):
        super().__init__(
            f_center=f_center, f_width=f_width, name=name, prob=prob)

    def _transform(self, signal, fmin_hz: float, fmax_hz: float):
        signal = signal.mask_frequencies(fmin_hz=fmin_hz, fmax_hz=fmax_hz)
        mag, phase = signal.magnitude, signal.phase

        mag_r, phase_r = paddle.randn(
            shape=mag.shape, dtype=mag.dtype), paddle.randn(
                shape=phase.shape, dtype=phase.dtype)
        mask = (mag == 0.0) * (phase == 0.0)

        # mag[mask] = mag_r[mask]
        # phase[mask] = phase_r[mask]
        mag = paddle.where(mask, mag_r, mag)
        phase = paddle.where(mask, phase_r, phase)

        signal.magnitude = mag
        signal.phase = phase
        return signal


================================================
FILE: paddlespeech/audiotools/metrics/__init__.py
================================================
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Functions for comparing AudioSignal objects to one another.
"""
from . import quality


================================================
FILE: paddlespeech/audiotools/metrics/quality.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/metrics/quality.py)
import os

import numpy as np
import paddle

from ..core import AudioSignal


def visqol(
        estimates: AudioSignal,
        references: AudioSignal,
        mode: str="audio", ):
    """ViSQOL score.

    Parameters
    ----------
    estimates : AudioSignal
        Degraded AudioSignal
    references : AudioSignal
        Reference AudioSignal
    mode : str, optional
        'audio' or 'speech', by default 'audio'

    Returns
    -------
    Tensor[float]
        ViSQOL score (MOS-LQO)
    """
    try:
        from pyvisqol import visqol_lib_py
        from pyvisqol.pb2 import visqol_config_pb2
        from pyvisqol.pb2 import similarity_result_pb2
    except ImportError:
        from visqol import visqol_lib_py
        from visqol.pb2 import visqol_config_pb2
        from visqol.pb2 import similarity_result_pb2

    config = visqol_config_pb2.VisqolConfig()
    if mode == "audio":
        target_sr = 48000
        config.options.use_speech_scoring = False
        svr_model_path = "libsvm_nu_svr_model.txt"
    elif mode == "speech":
        target_sr = 16000
        config.options.use_speech_scoring = True
        svr_model_path = "lattice_tcditugenmeetpackhref_ls2_nl60_lr12_bs2048_learn.005_ep2400_train1_7_raw.tflite"
    else:
        raise ValueError(f"Unrecognized mode: {mode}")
    config.audio.sample_rate = target_sr
    config.options.svr_model_path = os.path.join(
        os.path.dirname(visqol_lib_py.__file__), "model", svr_model_path)

    api = visqol_lib_py.VisqolApi()
    api.Create(config)

    estimates = estimates.clone().to_mono().resample(target_sr)
    references = references.clone().to_mono().resample(target_sr)

    visqols = []
    for i in range(estimates.batch_size):
        _visqol = api.Measure(
            references.audio_data[i, 0].detach().cpu().numpy().astype(float),
            estimates.audio_data[i, 0].detach().cpu().numpy().astype(float), )
        visqols.append(_visqol.moslqo)
    return paddle.to_tensor(np.array(visqols))


if __name__ == "__main__":
    signal = AudioSignal(paddle.randn([44100]), 44100)
    print(visqol(signal, signal))


================================================
FILE: paddlespeech/audiotools/ml/__init__.py
================================================
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import decorators
from .accelerator import Accelerator
from .basemodel import BaseModel


================================================
FILE: paddlespeech/audiotools/ml/accelerator.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/ml/accelerator.py)
import os
import typing

import paddle
import paddle.distributed as dist
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.io import SequenceSampler


class ResumableDistributedSampler(DistributedBatchSampler):
    """Distributed sampler that can be resumed from a given start index."""

    def __init__(self, dataset, start_idx: int=None, **kwargs):
        super().__init__(dataset, **kwargs)
        # Start index, allows to resume an experiment at the index it was
        self.start_idx = start_idx // self.num_replicas if start_idx is not None else 0

    def __iter__(self):
        for i, idx in enumerate(super().__iter__()):
            if i >= self.start_idx:
                yield idx
        self.start_idx = 0  # set the index back to 0 so for the next epoch


class ResumableSequentialSampler(SequenceSampler):
    """Sequential sampler that can be resumed from a given start index."""

    def __init__(self, dataset, start_idx: int=None, **kwargs):
        super().__init__(dataset, **kwargs)
        # Start index, allows to resume an experiment at the index it was
        self.start_idx = start_idx if start_idx is not None else 0

    def __iter__(self):
        for i, idx in enumerate(super().__iter__()):
            if i >= self.start_idx:
                yield idx
        self.start_idx = 0  # set the index back to 0 so for the next epoch


class Accelerator:
    """This class is used to prepare models and dataloaders for
    usage with DDP or DP. Use the functions prepare_model, prepare_dataloader to
    prepare the respective objects. In the case of models, they are moved to
    the appropriate GPU. In the case of
    dataloaders, a sampler is created and the dataloader is initialized with
    that sampler.

    If the world size is 1, prepare_model and prepare_dataloader are
    no-ops. If the environment variable ``PADDLE_TRAINER_ID`` is not set, then the
    script was launched without ``paddle.distributed.launch``, and ``DataParallel``
    will be used instead of ``DistributedDataParallel`` (not recommended), if
    the world size (number of GPUs) is greater than 1.

    Parameters
    ----------
    amp : bool, optional
        Whether or not to enable automatic mixed precision, by default False
        (Note: This is a placeholder as PaddlePaddle doesn't have native support for AMP as of now)
    """

    def __init__(self, amp: bool=False):
        trainer_id = os.getenv("PADDLE_TRAINER_ID", None)
        self.world_size = paddle.distributed.get_world_size()

        self.use_ddp = self.world_size > 1 and trainer_id is not None
        self.use_dp = self.world_size > 1 and trainer_id is None
        self.device = "cpu" if self.world_size == 0 else "cuda"

        if self.use_ddp:
            trainer_id = int(trainer_id)
            dist.init_parallel_env()

        self.local_rank = 0 if trainer_id is None else int(trainer_id)
        self.amp = amp

        class DummyScaler:
            def __init__(self):
                pass

            def step(self, optimizer):
                optimizer.step()

            def scale(self, loss):
                return loss

            def unscale_(self, optimizer):
                return optimizer

            def update(self):
                pass

        self.scaler = paddle.amp.GradScaler() if self.amp else DummyScaler()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        pass

    def prepare_model(self, model: paddle.nn.Layer, **kwargs):
        """Prepares model for DDP or DP. The model is moved to
        the device of the correct rank.

        Parameters
        ----------
        model : paddle.nn.Layer
            Model that is converted for DDP or DP.

        Returns
        -------
        paddle.nn.Layer
            Wrapped model, or original model if DDP and DP are turned off.
        """
        if self.use_ddp:
            model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
            model = paddle.DataParallel(model, **kwargs)
        elif self.use_dp:
            model = paddle.DataParallel(model, **kwargs)
        return model

    def autocast(self, *args, **kwargs):
        return paddle.amp.auto_cast(self.amp, *args, **kwargs)

    def backward(self, loss: paddle.Tensor):
        """Backwards pass.

        Parameters
        ----------
        loss : paddle.Tensor
            Loss value.
        """
        scaled = self.scaler.scale(loss)  # scale the loss
        scaled.backward()

    def step(self, optimizer: paddle.optimizer.Optimizer):
        """Steps the optimizer.

        Parameters
        ----------
        optimizer : paddle.optimizer.Optimizer
            Optimizer to step forward.
        """
        self.scaler.step(optimizer)

    def update(self):
        # https://www.paddlepaddle.org.cn/documentation/docs/zh/2.6/api/paddle/amp/GradScaler_cn.html#step-optimizer
        self.scaler.update()

    def prepare_dataloader(self,
                           dataset: typing.Iterable,
                           start_idx: int=None,
                           **kwargs):
        """Wraps a dataset with a DataLoader, using the correct sampler if DDP is
        enabled.

        Parameters
        ----------
        dataset : typing.Iterable
            Dataset to build Dataloader around.
        start_idx : int, optional
            Start index of sampler, useful if resuming from some epoch,
            by default None

        Returns
        -------
        DataLoader
            Wrapped DataLoader.
        """

        if self.use_ddp:
            sampler = ResumableDistributedSampler(
                dataset,
                start_idx,
                batch_size=kwargs.get("batch_size", 1),
                shuffle=kwargs.get("shuffle", True),
                drop_last=kwargs.get("drop_last", False),
                num_replicas=self.world_size,
                rank=self.local_rank, )
            if "num_workers" in kwargs:
                kwargs["num_workers"] = max(kwargs["num_workers"] //
                                            self.world_size, 1)
        else:
            sampler = ResumableSequentialSampler(dataset, start_idx)

        dataloader = DataLoader(
            dataset,
            batch_sampler=sampler if self.use_ddp else None,
            sampler=sampler if not self.use_ddp else None,
            **kwargs, )
        return dataloader

    @staticmethod
    def unwrap(model):
        return model


================================================
FILE: paddlespeech/audiotools/ml/basemodel.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/ml/layers/base.py)
import inspect
import shutil
import tempfile
import typing
from pathlib import Path

import paddle
from paddle import nn


class BaseModel(nn.Layer):
    """This is a class that adds useful save/load functionality to a
    ``paddle.nn.Layer`` object. ``BaseModel`` objects can be saved
    as ``package`` easily, making them super easy to port between
    machines without requiring a ton of dependencies. Files can also be
    saved as just weights, in the standard way.

    >>> class Model(ml.BaseModel):
    >>>     def __init__(self, arg1: float = 1.0):
    >>>         super().__init__()
    >>>         self.arg1 = arg1
    >>>         self.linear = nn.Linear(1, 1)
    >>>
    >>>     def forward(self, x):
    >>>         return self.linear(x)
    >>>
    >>> model1 = Model()
    >>>
    >>> with tempfile.NamedTemporaryFile(suffix=".pth") as f:
    >>>     model1.save(
    >>>         f.name,
    >>>     )
    >>>     model2 = Model.load(f.name)
    >>>     out2 = seed_and_run(model2, x)
    >>>     assert paddle.allclose(out1, out2)
    >>>
    >>>     model1.save(f.name, package=True)
    >>>     model2 = Model.load(f.name)
    >>>     model2.save(f.name, package=False)
    >>>     model3 = Model.load(f.name)
    >>>     out3 = seed_and_run(model3, x)
    >>>
    >>> with tempfile.TemporaryDirectory() as d:
    >>>     model1.save_to_folder(d, {"data": 1.0})
    >>>     Model.load_from_folder(d)

    """

    def save(
            self,
            path: str,
            metadata: dict=None,
            package: bool=False,
            intern: list=[],
            extern: list=[],
            mock: list=[], ):
        """Saves the model, either as a package, or just as
        weights, alongside some specified metadata.

        Parameters
        ----------
        path : str
            Path to save model to.
        metadata : dict, optional
            Any metadata to save alongside the model,
            by default None
        package : bool, optional
            Whether to use ``package`` to save the model in
            a format that is portable, by default True
        intern : list, optional
            List of additional libraries that are internal
            to the model, used with package, by default []
        extern : list, optional
            List of additional libraries that are external to
            the model, used with package, by default []
        mock : list, optional
            List of libraries to mock, used with package,
            by default []

        Returns
        -------
        str
            Path to saved model.
        """
        sig = inspect.signature(self.__class__)
        args = {}

        for key, val in sig.parameters.items():
            arg_val = val.default
            if arg_val is not inspect.Parameter.empty:
                args[key] = arg_val

        # Look up attibutes in self, and if any of them are in args,
        # overwrite them in args.
        for attribute in dir(self):
            if attribute in args:
                args[attribute] = getattr(self, attribute)

        metadata = {} if metadata is None else metadata
        metadata["kwargs"] = args
        if not hasattr(self, "metadata"):
            self.metadata = {}
        self.metadata.update(metadata)

        if not package:
            state_dict = {"state_dict": self.state_dict(), "metadata": metadata}
            paddle.save(state_dict, str(path))
        else:
            raise NotImplementedError(
                "Currently Paddle does not support packaging")

        return path

    @property
    def device(self):
        """Gets the device the model is on by looking at the device of
        the first parameter. May not be valid if model is split across
        multiple devices.
        """
        return list(self.parameters())[0].place

    @classmethod
    def load(
            cls,
            location: str,
            *args,
            package_name: str=None,
            strict: bool=False,
            **kwargs, ):
        """Load model from a path. Tries first to load as a package, and if
        that fails, tries to load as weights. The arguments to the class are
        specified inside the model weights file.

        Parameters
        ----------
        location : str
            Path to file.
        package_name : str, optional
            Name of package, by default ``cls.__name__``.
        strict : bool, optional
            Ignore unmatched keys, by default False
        kwargs : dict
            Additional keyword arguments to the model instantiation, if
            not loading from package.

        Returns
        -------
        BaseModel
            A model that inherits from BaseModel.
        """
        model_dict = paddle.load(location)
        metadata = model_dict["metadata"]
        metadata["kwargs"].update(kwargs)

        sig = inspect.signature(cls)
        class_keys = list(sig.parameters.keys())
        for k in list(metadata["kwargs"].keys()):
            if k not in class_keys:
                metadata["kwargs"].pop(k)

        model = cls(*args, **metadata["kwargs"])
        model.set_state_dict(model_dict["state_dict"])
        model.metadata = metadata

        return model

    def save_to_folder(
            self,
            folder: typing.Union[str, Path],
            extra_data: dict=None,
            package: bool=False, ):
        """Dumps a model into a folder, as both a package
        and as weights, as well as anything specified in
        ``extra_data``. ``extra_data`` is a dictionary of other
        pickleable files, with the keys being the paths
        to save them in. The model is saved under a subfolder
        specified by the name of the class (e.g. ``folder/generator/[package, weights].pth``
        if the model name was ``Generator``).

        >>> with tempfile.TemporaryDirectory() as d:
        >>>     extra_data = {
        >>>         "optimizer.pth": optimizer.state_dict()
        >>>     }
        >>>     model.save_to_folder(d, extra_data)
        >>>     Model.load_from_folder(d)

        Parameters
        ----------
        folder : typing.Union[str, Path]
            _description_
        extra_data : dict, optional
            _description_, by default None

        Returns
        -------
        str
            Path to folder
        """
        extra_data = {} if extra_data is None else extra_data
        model_name = type(self).__name__.lower()
        target_base = Path(f"{folder}/{model_name}/")
        target_base.mkdir(exist_ok=True, parents=True)

        if package:
            package_path = target_base / f"package.pth"
            self.save(package_path)

        weights_path = target_base / f"weights.pth"
        self.save(weights_path, package=False)

        for path, obj in extra_data.items():
            paddle.save(obj, str(target_base / path))

        return target_base

    @classmethod
    def load_from_folder(
            cls,
            folder: typing.Union[str, Path],
            package: bool=False,
            strict: bool=False,
            **kwargs, ):
        """Loads the model from a folder generated by
        :py:func:`audiotools.ml.layers.base.BaseModel.save_to_folder`.
        Like that function, this one looks for a subfolder that has
        the name of the class (e.g. ``folder/generator/[package, weights].pth`` if the
        model name was ``Generator``).

        Parameters
        ----------
        folder : typing.Union[str, Path]
            _description_
        package : bool, optional
            Whether to use ``package`` to load the model,
            loading the model from ``package.pth``.
        strict : bool, optional
            Ignore unmatched keys, by default False

        Returns
        -------
        tuple
            tuple of model and extra data as saved by
            :py:func:`audiotools.ml.layers.base.BaseModel.save_to_folder`.
        """
        folder = Path(folder) / cls.__name__.lower()
        model_pth = "package.pth" if package else "weights.pth"
        model_pth = folder / model_pth

        model = cls.load(str(model_pth))
        extra_data = {}
        excluded = ["package.pth", "weights.pth"]
        files = [
            x for x in folder.glob("*")
            if x.is_file() and x.name not in excluded
        ]
        for f in files:
            extra_data[f.name] = paddle.load(str(f), **kwargs)

        return model, extra_data


================================================
FILE: paddlespeech/audiotools/ml/decorators.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/ml/decorators.py)
import math
import os
import time
from collections import defaultdict
from functools import wraps

import paddle
import paddle.distributed as dist
from rich import box
from rich.console import Console
from rich.console import Group
from rich.live import Live
from rich.markdown import Markdown
from rich.padding import Padding
from rich.panel import Panel
from rich.progress import BarColumn
from rich.progress import Progress
from rich.progress import SpinnerColumn
from rich.progress import TimeElapsedColumn
from rich.progress import TimeRemainingColumn
from rich.rule import Rule
from rich.table import Table
from visualdl import LogWriter


# This is here so that the history can be pickled.
def default_list():
    return []


class Mean:
    """Keeps track of the running mean, along with the latest
    value.
    """

    def __init__(self):
        self.reset()

    def __call__(self):
        mean = self.total / max(self.count, 1)
        return mean

    def reset(self):
        self.count = 0
        self.total = 0

    def update(self, val):
        if math.isfinite(val):
            self.count += 1
            self.total += val


def when(condition):
    """Runs a function only when the condition is met. The condition is
    a function that is run.

    Parameters
    ----------
    condition : Callable
        Function to run to check whether or not to run the decorated
        function.

    Example
    -------
    Checkpoint only runs every 100 iterations, and only if the
    local rank is 0.

    >>> i = 0
    >>> rank = 0
    >>>
    >>> @when(lambda: i % 100 == 0 and rank == 0)
    >>> def checkpoint():
    >>>     print("Saving to /runs/exp1")
    >>>
    >>> for i in range(1000):
    >>>     checkpoint()

    """

    def decorator(fn):
        @wraps(fn)
        def decorated(*args, **kwargs):
            if condition():
                return fn(*args, **kwargs)

        return decorated

    return decorator


def timer(prefix: str="time"):
    """Adds execution time to the output dictionary of the decorated
    function. The function decorated by this must output a dictionary.
    The key added will follow the form "[prefix]/[name_of_function]"

    Parameters
    ----------
    prefix : str, optional
        The key added will follow the form "[prefix]/[name_of_function]",
        by default "time".
    """

    def decorator(fn):
        @wraps(fn)
        def decorated(*args, **kwargs):
            s = time.perf_counter()
            output = fn(*args, **kwargs)
            assert isinstance(output, dict)
            e = time.perf_counter()
            output[f"{prefix}/{fn.__name__}"] = e - s
            return output

        return decorated

    return decorator


class Tracker:
    """
    A tracker class that helps to monitor the progress of training and logging the metrics.

    Attributes
    ----------
    metrics : dict
        A dictionary containing the metrics for each label.
    history : dict
        A dictionary containing the history of metrics for each label.
    writer : LogWriter
        A LogWriter object for logging the metrics.
    rank : int
        The rank of the current process.
    step : int
        The current step of the training.
    tasks : dict
        A dictionary containing the progress bars and tables for each label.
    pbar : Progress
        A progress bar object for displaying the progress.
    consoles : list
        A list of console objects for logging.
    live : Live
        A Live object for updating the display live.

    Methods
    -------
    print(msg: str)
        Prints the given message to all consoles.
    update(label: str, fn_name: str)
        Updates the progress bar and table for the given label.
    done(label: str, title: str)
        Resets the progress bar and table for the given label and prints the final result.
    track(label: str, length: int, completed: int = 0, op: dist.ReduceOp = dist.ReduceOp.AVG, ddp_active: bool = "LOCAL_RANK" in os.environ)
        A decorator for tracking the progress and metrics of a function.
    log(label: str, value_type: str = "value", history: bool = True)
        A decorator for logging the metrics of a function.
    is_best(label: str, key: str) -> bool
        Checks if the latest value of the given key in the label is the best so far.
    state_dict() -> dict
        Returns a dictionary containing the state of the tracker.
    load_state_dict(state_dict: dict) -> Tracker
        Loads the state of the tracker from the given state dictionary.
    """

    def __init__(
            self,
            writer: LogWriter=None,
            log_file: str=None,
            rank: int=0,
            console_width: int=100,
            step: int=0, ):
        """
        Initializes the Tracker object.

        Parameters
        ----------
        writer : LogWriter, optional
            A LogWriter object for logging the metrics, by default None.
        log_file : str, optional
            The path to the log file, by default None.
        rank : int, optional
            The rank of the current process, by default 0.
        console_width : int, optional
            The width of the console, by default 100.
        step : int, optional
            The current step of the training, by default 0.
        """
        self.metrics = {}
        self.history = {}
        self.writer = writer
        self.rank = rank
        self.step = step

        # Create progress bars etc.
        self.tasks = {}
        self.pbar = Progress(
            SpinnerColumn(),
            "[progress.description]{task.description}",
            "{task.completed}/{task.total}",
            BarColumn(),
            TimeElapsedColumn(),
            "/",
            TimeRemainingColumn(), )
        self.consoles = [Console(width=console_width)]
        self.live = Live(console=self.consoles[0], refresh_per_second=10)
        if log_file is not None:
            self.consoles.append(
                Console(width=console_width, file=open(log_file, "a")))

    def print(self, msg):
        """
        Prints the given message to all consoles.

        Parameters
        ----------
        msg : str
            The message to be printed.
        """
        if self.rank == 0:
            for c in self.consoles:
                c.log(msg)

    def update(self, label, fn_name):
        """
        Updates the progress bar and table for the given label.

        Parameters
        ----------
        label : str
            The label of the progress bar and table to be updated.
        fn_name : str
            The name of the function associated with the label.
        """
        if self.rank == 0:
            self.pbar.advance(self.tasks[label]["pbar"])

            # Create table
            table = Table(title=label, expand=True, box=box.MINIMAL)
            table.add_column("key", style="cyan")
            table.add_column("value", style="bright_blue")
            table.add_column("mean", style="bright_green")

            keys = self.metrics[label]["value"].keys()
            for k in keys:
                value = self.metrics[label]["value"][k]
                mean = self.metrics[label]["mean"][k]()
                table.add_row(k, f"{value:10.6f}", f"{mean:10.6f}")

            self.tasks[label]["table"] = table
            tables = [t["table"] for t in self.tasks.values()]
            group = Group(*tables, self.pbar)
            self.live.update(
                Group(
                    Padding("", (0, 0)),
                    Rule(f"[italic]{fn_name}()", style="white"),
                    Padding("", (0, 0)),
                    Panel.fit(
                        group,
                        padding=(0, 5),
                        title="[b]Progress",
                        border_style="blue", ), ))

    def done(self, label: str, title: str):
        """
        Resets the progress bar and table for the given label and prints the final result.

        Parameters
        ----------
        label : str
            The label of the progress bar and table to be reset.
        title : str
            The title to be displayed when printing the final result.
        """
        for label in self.metrics:
            for v in self.metrics[label]["mean"].values():
                v.reset()

        if self.rank == 0:
            self.pbar.reset(self.tasks[label]["pbar"])
            tables = [t["table"] for t in self.tasks.values()]
            group = Group(Markdown(f"# {title}"), *tables, self.pbar)
            self.print(group)

    def track(
            self,
            label: str,
            length: int,
            completed: int=0,
            op: dist.ReduceOp=dist.ReduceOp.AVG,
            ddp_active: bool="LOCAL_RANK" in os.environ, ):
        """
        A decorator for tracking the progress and metrics of a function.

        Parameters
        ----------
        label : str
            The label to be associated with the progress and metrics.
        length : int
            The total number of iterations to be completed.
        completed : int, optional
            The number of iterations already completed, by default 0.
        op : dist.ReduceOp, optional
            The reduce operation to be used, by default dist.ReduceOp.AVG.
        ddp_active : bool, optional
            Whether the DistributedDataParallel is active, by default "LOCAL_RANK" in os.environ.
        """
        self.tasks[label] = {
            "pbar":
            self.pbar.add_task(
                f"[white]Iteration ({label})",
                total=length,
                completed=completed),
            "table":
            Table(),
        }
        self.metrics[label] = {
            "value": defaultdict(),
            "mean": defaultdict(lambda: Mean()),
        }

        def decorator(fn):
            @wraps(fn)
            def decorated(*args, **kwargs):
                output = fn(*args, **kwargs)
                if not isinstance(output, dict):
                    self.update(label, fn.__name__)
                    return output
                # Collect across all DDP processes
                scalar_keys = []
                for k, v in output.items():
                    if isinstance(v, (int, float)):
                        v = paddle.to_tensor([v])
                    if not paddle.is_tensor(v):
                        continue
                    if ddp_active and v.is_cuda:
                        dist.all_reduce(v, op=op)
                    output[k] = v.detach()
                    if paddle.numel(v) == 1:
                        scalar_keys.append(k)
                        output[k] = v.item()

                # Save the outputs to tracker
                for k, v in output.items():
                    if k not in scalar_keys:
                        continue
                    self.metrics[label]["value"][k] = v
                    # Update the running mean
                    self.metrics[label]["mean"][k].update(v)

                self.update(label, fn.__name__)
                return output

            return decorated

        return decorator

    def log(self, label: str, value_type: str="value", history: bool=True):
        """
        A decorator for logging the metrics of a function.

        Parameters
        ----------
        label : str
            The label to be associated with the logging.
        value_type : str, optional
            The type of value to be logged, by default "value".
        history : bool, optional
            Whether to save the history of the metrics, by default True.
        """
        assert value_type in ["mean", "value"]
        if history:
            if label not in self.history:
                self.history[label] = defaultdict(default_list)

        def decorator(fn):
            @wraps(fn)
            def decorated(*args, **kwargs):
                output = fn(*args, **kwargs)
                if self.rank == 0:
                    nonlocal value_type, label
                    metrics = self.metrics[label][value_type]
                    for k, v in metrics.items():
                        v = v() if isinstance(v, Mean) else v
                        if self.writer is not None:
                            self.writer.add_scalar(
                                tag=f"{k}/{label}", value=v, step=self.step)
                        if label in self.history:
                            self.history[label][k].append(v)

                    if label in self.history:
                        self.history[label]["step"].append(self.step)

                return output

            return decorated

        return decorator

    def is_best(self, label, key):
        """
        Checks if the latest value of the given key in the label is the best so far.

        Parameters
        ----------
        label : str
            The label of the metrics to be checked.
        key : str
            The key of the metric to be checked.

        Returns
        -------
        bool
            True if the latest value is the best so far, otherwise False.
        """
        return self.history[label][key][-1] == min(self.history[label][key])

    def state_dict(self):
        """
        Returns a dictionary containing the state of the tracker.

        Returns
        -------
        dict
            A dictionary containing the history and step of the tracker.
        """
        return {"history": self.history, "step": self.step}

    def load_state_dict(self, state_dict):
        """
        Loads the state of the tracker from the given state dictionary.

        Parameters
        ----------
        state_dict : dict
            A dictionary containing the history and step of the tracker.

        Returns
        -------
        Tracker
            The tracker object with the loaded state.
        """
        self.history = state_dict["history"]
        self.step = state_dict["step"]
        return self


================================================
FILE: paddlespeech/audiotools/post.py
================================================
# MIT License, Copyright (c) 2023-Present, Descript.
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from audiotools(https://github.com/descriptinc/audiotools/blob/master/audiotools/post.py)
import typing

import paddle

from paddlespeech.audiotools.core import AudioSignal


def audio_table(
        audio_dict: dict,
        first_column: str=None,
        format_fn: typing.Callable=None,
        **kwargs, ):
    """Embeds an audio table into HTML, or as the output cell
    in a notebook.

    Parameters
    ----------
    audio_dict : dict
        Dictionary of data to embed.
    first_column : str, optional
        The label for the first column of the table, by default None
    format_fn : typing.Callable, optional
        How to format the data, by default None

    Returns
    -------
    str
        Table as a string

    Examples
    --------

    >>> audio_dict = {}
    >>> for i in range(signal_batch.batch_size):
    >>>     audio_dict[i] = {
    >>>         "input": signal_batch[i],
    >>>         "output": output_batch[i]
    >>>     }
    >>> audiotools.post.audio_zip(audio_dict)

    """

    output = []
    columns = None

    def _default_format_fn(label, x, **kwargs):
        if paddle.is_tensor(x):
            x = x.tolist()

        if x is None:
            return "."
        elif isinstance(x, AudioSignal):
            return x.embed(display=False, return_html=True, **kwargs)
        else:
            return str(x)

    if format_fn is None:
        format_fn = _default_format_fn

    if first_column is None:
        first_column = "."

    for k, v in audio_dict.items():
        if not isinstance(v, dict):
            v = {"Audio": v}

        v_keys = list(v.keys())
        if columns is None:
            columns = [first_column] + v_keys
            output.append(" | ".join(columns))

            layout = "|---" + len(v_keys) * "|:-:"
            output.append(layout)

        formatted_audio = []
        for col in columns[1:]:
            formatted_audio.append(format_fn(col, v[col], **kwargs))

        row = f"| {k} | "
        row += " | ".join(formatted_audio)
        output.append(row)

    output = "\n" + "\n".join(output)
    return output


================================================
FILE: paddlespeech/cli/README.md
================================================
# PaddleSpeech Command Line

([简体中文](./README_cn.md)|English)

 The simplest approach to use PaddleSpeech models.

 ## Help
 ```bash
 paddlespeech help
 ```
 ## Audio Classification
 ```bash
 paddlespeech cls --input input.wav
 ```

 ## Speaker Verification

 ```bash
 paddlespeech vector --task spk --input input_16k.wav
 ```

 ## Automatic Speech Recognition
 ```
 paddlespeech asr --lang zh --input input_16k.wav
 ```
 
 ## Speech Translation (English to Chinese)
 
 (not support for Windows now)
 ```bash
 paddlespeech st --input input_16k.wav
 ```
 
 ## Text-to-Speech
 ```bash
 paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output output.wav
 ```
 
 ## Text Post-precessing

- Punctuation Restoration
  ```bash
  paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
  ```
- Faster Punctuation Restoration
  ```bash
  paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast
  ```


================================================
FILE: paddlespeech/cli/README_cn.md
================================================
# PaddleSpeech 命令行工具

(简体中文|[English](./README.md))

`paddlespeech.cli` 模块是 PaddleSpeech 的命令行工具，它提供了最简便的方式调用 PaddleSpeech 提供的不同语音应用场景的预训练模型，用一行命令就可以进行模型预测：

 ## 命令行使用帮助
 ```bash
 paddlespeech help
 ```

 ## 声音分类
 ```bash
 paddlespeech cls --input input.wav
 ```

  ## 声纹识别

 ```bash
 paddlespeech vector --task spk --input input_16k.wav
 ```

 ## 语音识别
 ```
 paddlespeech asr --lang zh --input input_16k.wav
 ```
 
 ## 语音翻译（英-中）
 
 (暂不支持Windows系统)
 ```bash
 paddlespeech st --input input_16k.wav
 ```
 
 ## 语音合成
 ```bash
 paddlespeech tts --input "你好，欢迎使用百度飞桨深度学习框架！" --output output.wav
 ```
 
 ## 文本后处理

- 标点恢复
  ```bash
  paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭
  ```
- 快速标点恢复
  ```bash
  paddlespeech text --task punc --input 今天的天气真不错啊你下午有空吗我想约你一起去吃饭 --model ernie_linear_p3_wudao_fast
  ```


================================================
FILE: paddlespeech/cli/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import _locale

from .base_commands import BaseCommand
from .base_commands import HelpCommand

_locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8'])


================================================
FILE: paddlespeech/cli/asr/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .infer import ASRExecutor


================================================
FILE: paddlespeech/cli/asr/infer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import io
import os
import sys
import time
from collections import OrderedDict
from typing import List
from typing import Optional
from typing import Union

import librosa
import numpy as np
import paddle
import soundfile
from yacs.config import CfgNode

from ...utils.env import MODEL_HOME
from ..download import get_path_from_url
from ..executor import BaseExecutor
from ..log import logger
from ..utils import CLI_TIMER
from ..utils import stats_wrapper
from ..utils import timer_register
from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.utils.utility import UpdateConfig

__all__ = ['ASRExecutor']


@timer_register
class ASRExecutor(BaseExecutor):
    def __init__(self):
        super().__init__(task='asr', inference_type='offline')
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech.asr', add_help=True)
        self.parser.add_argument(
            '--input', type=str, default=None, help='Audio file to recognize.')
        self.parser.add_argument(
            '--model',
            type=str,
            default='conformer_u2pp_online_wenetspeech',
            choices=[
                tag[:tag.index('-')]
                for tag in self.task_resource.pretrained_models.keys()
            ],
            help='Choose model type of asr task.')
        self.parser.add_argument(
            '--lang',
            type=str,
            default='zh',
            help='Choose model language. [zh, en, zh_en], zh:[conformer_wenetspeech-zh-16k], en:[transformer_librispeech-en-16k], zh_en:[conformer_talcs-codeswitch_zh_en-16k]'
        )
        self.parser.add_argument(
            '--codeswitch',
            type=bool,
            default=False,
            help='Choose whether use code-switch. True or False.')
        self.parser.add_argument(
            "--sample_rate",
            type=int,
            default=16000,
            choices=[8000, 16000],
            help='Choose the audio sample rate of the model. 8000 or 16000')
        self.parser.add_argument(
            '--config',
            type=str,
            default=None,
            help='Config of asr task. Use default config when it is None.')
        self.parser.add_argument(
            '--decode_method',
            type=str,
            default='attention_rescoring',
            choices=[
                'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention',
                'attention_rescoring'
            ],
            help='only support transformer and conformer model')
        self.parser.add_argument(
            '--num_decoding_left_chunks',
            '-num_left',
            type=str,
            default=-1,
            help='only support transformer and conformer online model')
        self.parser.add_argument(
            '--ckpt_path',
            type=str,
            default=None,
            help='Checkpoint file of model.')
        self.parser.add_argument(
            '--yes',
            '-y',
            action="store_true",
            default=False,
            help='No additional parameters required. \
            Once set this parameter, it means accepting the request of the program by default, \
            which includes transforming the audio sample rate')
        self.parser.add_argument(
            '--rtf',
            action="store_true",
            default=False,
            help='Show Real-time Factor(RTF).')
        self.parser.add_argument(
            '--device',
            type=str,
            default=paddle.get_device(),
            help='Choose device to execute model inference.')
        self.parser.add_argument(
            '-d',
            '--job_dump_result',
            action='store_true',
            help='Save job result into file.')
        self.parser.add_argument(
            '-v',
            '--verbose',
            action='store_true',
            help='Increase logger verbosity of current task.')

    def _init_from_path(self,
                        model_type: str='wenetspeech',
                        lang: str='zh',
                        codeswitch: bool=False,
                        sample_rate: int=16000,
                        cfg_path: Optional[os.PathLike]=None,
                        decode_method: str='attention_rescoring',
                        num_decoding_left_chunks: int=-1,
                        ckpt_path: Optional[os.PathLike]=None):
        """
        Init model and other resources from a specific path.
        """
        logger.debug("start to init the model")
        # default max_len: unit:second
        self.max_len = 50
        if hasattr(self, 'model'):
            logger.debug('Model had been initialized.')
            return

        if cfg_path is None or ckpt_path is None:
            sample_rate_str = '16k' if sample_rate == 16000 else '8k'
            if lang == "zh_en" and codeswitch is True:
                tag = model_type + '-' + 'codeswitch_' + lang + '-' + sample_rate_str
            elif lang == "zh_en" or codeswitch is True:
                raise Exception("codeswitch is true only in zh_en model")
            else:
                tag = model_type + '-' + lang + '-' + sample_rate_str
            self.task_resource.set_task_model(tag, version=None)
            self.res_path = self.task_resource.res_dir

            self.cfg_path = os.path.join(
                self.res_path, self.task_resource.res_dict['cfg_path'])
            self.ckpt_path = os.path.join(
                self.res_path,
                self.task_resource.res_dict['ckpt_path'] + ".pdparams")
            logger.debug(self.res_path)

        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.ckpt_path = os.path.abspath(ckpt_path + ".pdparams")
            self.res_path = os.path.dirname(
                os.path.dirname(os.path.abspath(self.cfg_path)))
        logger.debug(self.cfg_path)
        logger.debug(self.ckpt_path)

        #Init body.
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)

        with UpdateConfig(self.config):
            if self.config.spm_model_prefix:
                self.config.spm_model_prefix = os.path.join(
                    self.res_path, self.config.spm_model_prefix)
            self.text_feature = TextFeaturizer(
                unit_type=self.config.unit_type,
                vocab=self.config.vocab_filepath,
                spm_model_prefix=self.config.spm_model_prefix)
            if "deepspeech2" in model_type:
                self.config.decode.lang_model_path = os.path.join(
                    MODEL_HOME, 'language_model',
                    self.config.decode.lang_model_path)

                lm_url = self.task_resource.res_dict['lm_url']
                lm_md5 = self.task_resource.res_dict['lm_md5']
                self.download_lm(
                    lm_url,
                    os.path.dirname(self.config.decode.lang_model_path), lm_md5)

            elif "conformer" in model_type or "transformer" in model_type:
                self.config.decode.decoding_method = decode_method
                if num_decoding_left_chunks:
                    assert num_decoding_left_chunks == -1 or num_decoding_left_chunks >= 0, "num_decoding_left_chunks should be -1 or >=0"
                    self.config.num_decoding_left_chunks = num_decoding_left_chunks

            else:
                raise Exception("wrong type")
        model_name = model_type[:model_type.rindex(
            '_')]  # model_type: {model_name}_{dataset}
        model_class = self.task_resource.get_model_class(model_name)
        model_conf = self.config
        model = model_class.from_config(model_conf)
        self.model = model
        self.model.eval()

        # load model
        model_dict = paddle.load(self.ckpt_path)
        self.model.set_state_dict(model_dict)

        # compute the max len limit
        if "conformer" in model_type or "transformer" in model_type:
            # in transformer like model, we may use the subsample rate cnn network
            subsample_rate = self.model.subsampling_rate()
            frame_shift_ms = self.config.preprocess_config.process[0][
                'n_shift'] / self.config.preprocess_config.process[0]['fs']
            max_len = self.model.encoder.embed.pos_enc.max_len

            if self.config.encoder_conf.get("max_len", None):
                max_len = self.config.encoder_conf.max_len

            self.max_len = frame_shift_ms * max_len * subsample_rate
            logger.debug(
                f"The asr server limit max duration len: {self.max_len}")

    def preprocess(self, model_type: str, input: Union[str, os.PathLike]):
        """
        Input preprocess and return paddle.Tensor stored in self.input.
        Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet).
        """

        audio_file = input
        if isinstance(audio_file, (str, os.PathLike)):
            logger.debug("Preprocess audio_file:" + audio_file)
        elif isinstance(audio_file, io.BytesIO):
            audio_file.seek(0)

        # Get the object for feature extraction
        if "deepspeech2" in model_type or "conformer" in model_type or "transformer" in model_type:
            logger.debug("get the preprocess conf")
            preprocess_conf = self.config.preprocess_config
            preprocess_args = {"train": False}
            preprocessing = Transformation(preprocess_conf)
            logger.debug("read the audio file")
            audio, audio_sample_rate = soundfile.read(
                audio_file, dtype="int16", always_2d=True)
            if self.change_format:
                if audio.shape[1] >= 2:
                    audio = audio.mean(axis=1, dtype=np.int16)
                else:
                    audio = audio[:, 0]
                # pcm16 -> pcm 32
                audio = self._pcm16to32(audio)
                audio = librosa.resample(
                    audio,
                    orig_sr=audio_sample_rate,
                    target_sr=self.sample_rate)
                audio_sample_rate = self.sample_rate
                # pcm32 -> pcm 16
                audio = self._pcm32to16(audio)
            else:
                audio = audio[:, 0]

            logger.debug(f"audio shape: {audio.shape}")
            # fbank
            audio = preprocessing(audio, **preprocess_args)

            audio_len = paddle.to_tensor(audio.shape[0]).unsqueeze(axis=0)
            audio = paddle.to_tensor(audio, dtype='float32').unsqueeze(axis=0)

            self._inputs["audio"] = audio
            self._inputs["audio_len"] = audio_len
            logger.debug(f"audio feat shape: {audio.shape}")

        else:
            raise Exception("wrong type")

        logger.debug("audio feat process success")

    @paddle.no_grad()
    def infer(self, model_type: str):
        """
        Model inference and result stored in self.output.
        """
        logger.debug("start to infer the model to get the output")
        cfg = self.config.decode
        audio = self._inputs["audio"]
        audio_len = self._inputs["audio_len"]
        if "deepspeech2" in model_type:
            decode_batch_size = audio.shape[0]
            self.model.decoder.init_decoder(
                decode_batch_size, self.text_feature.vocab_list,
                cfg.decoding_method, cfg.lang_model_path, cfg.alpha, cfg.beta,
                cfg.beam_size, cfg.cutoff_prob, cfg.cutoff_top_n,
                cfg.num_proc_bsearch)

            result_transcripts = self.model.decode(audio, audio_len)
            self.model.decoder.del_decoder()
            self._outputs["result"] = result_transcripts[0]

        elif "conformer" in model_type or "transformer" in model_type:
            logger.debug(
                f"we will use the transformer like model : {model_type}")
            try:
                result_transcripts = self.model.decode(
                    audio,
                    audio_len,
                    text_feature=self.text_feature,
                    decoding_method=cfg.decoding_method,
                    beam_size=cfg.beam_size,
                    ctc_weight=cfg.ctc_weight,
                    decoding_chunk_size=cfg.decoding_chunk_size,
                    num_decoding_left_chunks=cfg.num_decoding_left_chunks,
                    simulate_streaming=cfg.simulate_streaming)
                self._outputs["result"] = result_transcripts[0][0]
            except Exception as e:
                logger.exception(e)

        else:
            raise Exception("invalid model name")

    def postprocess(self) -> Union[str, os.PathLike]:
        """
            Output postprocess and return human-readable results such as texts and audio files.
        """
        return self._outputs["result"]

    def download_lm(self, url, lm_dir, md5sum):
        download_path = get_path_from_url(
            url=url,
            root_dir=lm_dir,
            md5sum=md5sum,
            decompress=False, )

    def _pcm16to32(self, audio):
        assert (audio.dtype == np.int16)
        audio = audio.astype("float32")
        bits = np.iinfo(np.int16).bits
        audio = audio / (2**(bits - 1))
        return audio

    def _pcm32to16(self, audio):
        assert (audio.dtype == np.float32)
        bits = np.iinfo(np.int16).bits
        audio = audio * (2**(bits - 1))
        audio = np.round(audio).astype("int16")
        return audio

    def _check(self, audio_file: str, sample_rate: int, force_yes: bool=False):
        self.sample_rate = sample_rate
        if self.sample_rate != 16000 and self.sample_rate != 8000:
            logger.error(
                "invalid sample rate, please input --sr 8000 or --sr 16000")
            return False

        if isinstance(audio_file, (str, os.PathLike)):
            if not os.path.isfile(audio_file):
                logger.error("Please input the right audio file path")
                return False
        elif isinstance(audio_file, io.BytesIO):
            audio_file.seek(0)

        logger.debug("checking the audio file format......")
        try:
            audio, audio_sample_rate = soundfile.read(
                audio_file, dtype="int16", always_2d=True)
            audio_duration = audio.shape[0] / audio_sample_rate
            if audio_duration > self.max_len:
                logger.error(
                    f"Please input audio file less then {self.max_len} seconds.\n"
                )
                return False
        except Exception as e:
            logger.exception(e)
            logger.error(
                f"can not open the audio file, please check the audio file({audio_file}) format is 'wav'. \n \
                 you can try to use sox to change the file format.\n \
                 For example: \n \
                 sample rate: 16k \n \
                 sox input_audio.xx --rate 16k --bits 16 --channels 1 output_audio.wav \n \
                 sample rate: 8k \n \
                 sox input_audio.xx --rate 8k --bits 16 --channels 1 output_audio.wav \n \
                 ")
            return False
        logger.debug("The sample rate is %d" % audio_sample_rate)
        if audio_sample_rate != self.sample_rate:
            logger.warning("The sample rate of the input file is not {}.\n \
                            The program will resample the wav file to {}.\n \
                            If the result does not meet your expectations，\n \
                            Please input the 16k 16 bit 1 channel wav file. \
                        ".format(self.sample_rate, self.sample_rate))
            if force_yes is False:
                while (True):
                    logger.debug(
                        "Whether to change the sample rate and the channel. Y: change the sample. N: exit the prgream."
                    )
                    content = input("Input(Y/N):")
                    if content.strip() == "Y" or content.strip(
                    ) == "y" or content.strip() == "yes" or content.strip(
                    ) == "Yes":
                        logger.debug(
                            "change the sampele rate, channel to 16k and 1 channel"
                        )
                        break
                    elif content.strip() == "N" or content.strip(
                    ) == "n" or content.strip() == "no" or content.strip(
                    ) == "No":
                        logger.debug("Exit the program")
                        return False
                    else:
                        logger.warning("Not regular input, please input again")

            self.change_format = True
        else:
            logger.debug("The audio file format is right")
            self.change_format = False

        return True

    def execute(self, argv: List[str]) -> bool:
        """
            Command line entry.
        """
        parser_args = self.parser.parse_args(argv)

        model = parser_args.model
        lang = parser_args.lang
        codeswitch = parser_args.codeswitch
        sample_rate = parser_args.sample_rate
        config = parser_args.config
        ckpt_path = parser_args.ckpt_path
        decode_method = parser_args.decode_method
        force_yes = parser_args.yes
        rtf = parser_args.rtf
        device = parser_args.device

        if not parser_args.verbose:
            self.disable_task_loggers()

        task_source = self.get_input_source(parser_args.input)
        task_results = OrderedDict()
        has_exceptions = False

        for id_, input_ in task_source.items():
            try:
                res = self(
                    audio_file=input_,
                    model=model,
                    lang=lang,
                    codeswitch=codeswitch,
                    sample_rate=sample_rate,
                    config=config,
                    ckpt_path=ckpt_path,
                    decode_method=decode_method,
                    force_yes=force_yes,
                    rtf=rtf,
                    device=device)
                task_results[id_] = res
            except Exception as e:
                has_exceptions = True
                task_results[id_] = f'{e.__class__.__name__}: {e}'

        if rtf:
            self.show_rtf(CLI_TIMER[self.__class__.__name__])

        self.process_task_results(parser_args.input, task_results,
                                  parser_args.job_dump_result)

        if has_exceptions:
            return False
        else:
            return True

    @stats_wrapper
    def __call__(self,
                 audio_file: os.PathLike,
                 model: str='conformer_u2pp_online_wenetspeech',
                 lang: str='zh',
                 codeswitch: bool=False,
                 sample_rate: int=16000,
                 config: os.PathLike=None,
                 ckpt_path: os.PathLike=None,
                 decode_method: str='attention_rescoring',
                 num_decoding_left_chunks: int=-1,
                 force_yes: bool=False,
                 rtf: bool=False,
                 device=paddle.get_device()):
        """
        Python API to call an executor.
        """
        audio_file = os.path.abspath(audio_file)
        paddle.set_device(device)
        self._init_from_path(model, lang, codeswitch, sample_rate, config,
                             decode_method, num_decoding_left_chunks, ckpt_path)
        if not self._check(audio_file, sample_rate, force_yes):
            sys.exit(-1)
        if rtf:
            k = self.__class__.__name__
            CLI_TIMER[k]['start'].append(time.time())

        self.preprocess(model, audio_file)
        self.infer(model)
        res = self.postprocess()  # Retrieve result of asr.

        if rtf:
            CLI_TIMER[k]['end'].append(time.time())
            audio, audio_sample_rate = soundfile.read(
                audio_file, dtype="int16", always_2d=True)
            CLI_TIMER[k]['extra'].append(audio.shape[0] / audio_sample_rate)

        return res


================================================
FILE: paddlespeech/cli/base_commands.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from typing import List

import numpy
from prettytable import PrettyTable

from ..resource import CommonTaskResource
from .entry import commands
from .utils import cli_register
from .utils import explicit_command_register
from .utils import get_command

__all__ = ['BaseCommand', 'HelpCommand', 'StatsCommand']


@cli_register(name='paddlespeech')
class BaseCommand:
    def execute(self, argv: List[str]) -> bool:
        help = get_command('paddlespeech.help')
        return help().execute(argv)


@cli_register(name='paddlespeech.help', description='Show help for commands.')
class HelpCommand:
    def execute(self, argv: List[str]) -> bool:
        msg = 'Usage:\n'
        msg += '    paddlespeech <command> <options>\n\n'
        msg += 'Commands:\n'
        for command, detail in commands['paddlespeech'].items():
            if command.startswith('_'):
                continue

            if '_description' not in detail:
                continue
            msg += '    {:<15}        {}\n'.format(command,
                                                   detail['_description'])

        print(msg)
        return True


@cli_register(
    name='paddlespeech.version',
    description='Show version and commit id of current package.')
class VersionCommand:
    def execute(self, argv: List[str]) -> bool:
        try:
            from .. import __version__
            version = __version__
        except ImportError:
            version = 'Not an official release'

        try:
            from .. import __commit__
            commit_id = __commit__
        except ImportError:
            commit_id = 'Not found'

        msg = 'Package Version:\n'
        msg += '    {}\n\n'.format(version)
        msg += 'Commit ID:\n'
        msg += '    {}\n\n'.format(commit_id)

        print(msg)
        return True


model_name_format = {
    'asr': 'Model-Size-Code Switch-Multilingual-Language-Sample Rate',
    'cls': 'Model-Sample Rate',
    'st': 'Model-Source language-Target language',
    'text': 'Model-Task-Language',
    'tts': 'Model-Language',
    'vector': 'Model-Sample Rate',
    'ssl': 'Model-Language-Sample Rate',
    'whisper': 'Model-Language-Sample Rate'
}


@cli_register(
    name='paddlespeech.stats',
    description='Get speech tasks support models list.')
class StatsCommand:
    def __init__(self):
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech.stats', add_help=True)
        self.task_choices = [
            'asr', 'cls', 'st', 'text', 'tts', 'vector', 'kws', 'ssl', 'whisper'
        ]
        self.parser.add_argument(
            '--task',
            type=str,
            default='asr',
            choices=self.task_choices,
            help='Choose speech task.',
            required=True)

    def show_support_models(self, pretrained_models: dict):
        fields = model_name_format[self.task].split("-")
        table = PrettyTable(fields)
        for key in pretrained_models:
            line = key.split("-")
            if self.task == "asr" and len(line) < len(fields):
                for i in range(len(line), len(fields)):
                    line.append("-")
                if "codeswitch" in key:
                    line[3], line[1] = line[1].split("_")[0], line[1].split(
                        "_")[1:]
                elif "multilingual" in key:
                    line[4], line[1] = line[1].split("_")[0], line[1].split(
                        "_")[1:]
                # Avoid having arrays within the elements of the input parameters when passing them to numpy.array
                if type(line[1]) is list:
                    line[1] = "/".join(line[1])
                tmp = numpy.array(line)
                idx = [0, 5, 3, 4, 1, 2]
                line = tmp[idx]
            table.add_row(line)

        print(table)

    def execute(self, argv: List[str]) -> bool:
        parser_args = self.parser.parse_args(argv)
        self.task = parser_args.task
        if self.task not in self.task_choices:
            print("Please input correct speech task, choices = " + str(
                self.task_choices))
            return

        pretrained_models = CommonTaskResource(task=self.task).pretrained_models

        try:
            print(
                "Here is the list of {} pretrained models released by PaddleSpeech that can be used by command line and python API"
                .format(self.task.upper()))
            self.show_support_models(pretrained_models)
            return True
        except BaseException:
            print("Failed to get the list of {} pretrained models.".format(
                self.task.upper()))
            return False


# Dynamic import when running specific command
_commands = {
    'asr': ['Speech to text infer command.', 'ASRExecutor'],
    'cls': ['Audio classification infer command.', 'CLSExecutor'],
    'st': ['Speech translation infer command.', 'STExecutor'],
    'text': ['Text command.', 'TextExecutor'],
    'tts': ['Text to Speech infer command.', 'TTSExecutor'],
    'vector': ['Speech to vector embedding infer command.', 'VectorExecutor'],
    'kws': ['Keyword Spotting infer command.', 'KWSExecutor'],
    'ssl':
    ['Self-Supervised Learning Pretrained model infer command.', 'SSLExecutor'],
    'whisper': [
        'Whisper model for speech to text or translate speech to English.',
        'WhisperExecutor'
    ]
}

for com, info in _commands.items():
    explicit_command_register(
        name='paddlespeech.{}'.format(com),
        description=info[0],
        cls='paddlespeech.cli.{}.{}'.format(com, info[1]))


================================================
FILE: paddlespeech/cli/cls/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .infer import CLSExecutor


================================================
FILE: paddlespeech/cli/cls/infer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from collections import OrderedDict
from typing import List
from typing import Optional
from typing import Union

import numpy as np
import paddle
import yaml
from paddle.audio.features import LogMelSpectrogram

from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.audio.backends import soundfile_load as load

__all__ = ['CLSExecutor']


class CLSExecutor(BaseExecutor):
    def __init__(self):
        super().__init__(task='cls')
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech.cls', add_help=True)
        self.parser.add_argument(
            '--input', type=str, default=None, help='Audio file to classify.')
        self.parser.add_argument(
            '--model',
            type=str,
            default='panns_cnn14',
            choices=[
                tag[:tag.index('-')]
                for tag in self.task_resource.pretrained_models.keys()
            ],
            help='Choose model type of cls task.')
        self.parser.add_argument(
            '--config',
            type=str,
            default=None,
            help='Config of cls task. Use default config when it is None.')
        self.parser.add_argument(
            '--ckpt_path',
            type=str,
            default=None,
            help='Checkpoint file of model.')
        self.parser.add_argument(
            '--label_file',
            type=str,
            default=None,
            help='Label file of cls task.')
        self.parser.add_argument(
            '--topk',
            type=int,
            default=1,
            help='Return topk scores of classification result.')
        self.parser.add_argument(
            '--device',
            type=str,
            default=paddle.get_device(),
            help='Choose device to execute model inference.')
        self.parser.add_argument(
            '-d',
            '--job_dump_result',
            action='store_true',
            help='Save job result into file.')
        self.parser.add_argument(
            '-v',
            '--verbose',
            action='store_true',
            help='Increase logger verbosity of current task.')

    def _init_from_path(self,
                        model_type: str='panns_cnn14',
                        cfg_path: Optional[os.PathLike]=None,
                        ckpt_path: Optional[os.PathLike]=None,
                        label_file: Optional[os.PathLike]=None):
        """
            Init model and other resources from a specific path.
        """
        if hasattr(self, 'model'):
            logger.debug('Model had been initialized.')
            return

        if label_file is None or ckpt_path is None:
            tag = model_type + '-' + '32k'  # panns_cnn14-32k
            self.task_resource.set_task_model(tag, version=None)
            self.cfg_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['cfg_path'])
            self.label_file = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['label_file'])
            self.ckpt_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['ckpt_path'])
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.label_file = os.path.abspath(label_file)
            self.ckpt_path = os.path.abspath(ckpt_path)

        # config
        with open(self.cfg_path, 'r') as f:
            self._conf = yaml.safe_load(f)

        # labels
        self._label_list = []
        with open(self.label_file, 'r') as f:
            for line in f:
                self._label_list.append(line.strip())

        # model
        model_class = self.task_resource.get_model_class(model_type)
        model_dict = paddle.load(self.ckpt_path)
        self.model = model_class(extract_embedding=False)
        self.model.set_state_dict(model_dict)
        self.model.eval()

    def preprocess(self, audio_file: Union[str, os.PathLike]):
        """
            Input preprocess and return paddle.Tensor stored in self.input.
            Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet).
        """
        feat_conf = self._conf['feature']
        logger.debug(feat_conf)
        waveform, _ = load(
            file=audio_file,
            sr=feat_conf['sample_rate'],
            mono=True,
            dtype='float32')
        if isinstance(audio_file, (str, os.PathLike)):
            logger.debug("Preprocessing audio_file:" + audio_file)

        # set 'pad_mode' be 'constant' when device is npu, otherwise be the default 'pad_mode' value
        if paddle.get_device().startswith('npu'):
            pad_mode_kwarg = {"pad_mode": "constant"}
        else:
            pad_mode_kwarg = {}

        # Feature extraction
        feature_extractor = LogMelSpectrogram(
            sr=feat_conf['sample_rate'],
            n_fft=feat_conf['n_fft'],
            hop_length=feat_conf['hop_length'],
            window=feat_conf['window'],
            win_length=feat_conf['window_length'],
            f_min=feat_conf['f_min'],
            f_max=feat_conf['f_max'],
            n_mels=feat_conf['n_mels'],
            **pad_mode_kwarg, )
        feats = feature_extractor(
            paddle.to_tensor(paddle.to_tensor(waveform).unsqueeze(0)))
        self._inputs['feats'] = paddle.transpose(feats, [0, 2, 1]).unsqueeze(
            1)  # [B, N, T] -> [B, 1, T, N]

    @paddle.no_grad()
    def infer(self):
        """
            Model inference and result stored in self.output.
        """
        self._outputs['logits'] = self.model(self._inputs['feats'])

    def _generate_topk_label(self, result: np.ndarray, topk: int) -> str:
        assert topk <= len(
            self._label_list), 'Value of topk is larger than number of labels.'

        topk_idx = (-result).argsort()[:topk]
        ret = ''
        for idx in topk_idx:
            label, score = self._label_list[idx], result[idx]
            ret += f'{label} {score} '
        return ret

    def postprocess(self, topk: int) -> Union[str, os.PathLike]:
        """
            Output postprocess and return human-readable results such as texts and audio files.
        """
        return self._generate_topk_label(
            result=self._outputs['logits'].squeeze(0).numpy(), topk=topk)

    def execute(self, argv: List[str]) -> bool:
        """
            Command line entry.
        """
        parser_args = self.parser.parse_args(argv)

        model_type = parser_args.model
        label_file = parser_args.label_file
        cfg_path = parser_args.config
        ckpt_path = parser_args.ckpt_path
        topk = parser_args.topk
        device = parser_args.device

        if not parser_args.verbose:
            self.disable_task_loggers()

        task_source = self.get_input_source(parser_args.input)
        task_results = OrderedDict()
        has_exceptions = False

        for id_, input_ in task_source.items():
            try:
                res = self(input_, model_type, cfg_path, ckpt_path, label_file,
                           topk, device)
                task_results[id_] = res
            except Exception as e:
                has_exceptions = True
                task_results[id_] = f'{e.__class__.__name__}: {e}'

        self.process_task_results(parser_args.input, task_results,
                                  parser_args.job_dump_result)

        if has_exceptions:
            return False
        else:
            return True

    @stats_wrapper
    def __call__(self,
                 audio_file: os.PathLike,
                 model: str='panns_cnn14',
                 config: Optional[os.PathLike]=None,
                 ckpt_path: Optional[os.PathLike]=None,
                 label_file: Optional[os.PathLike]=None,
                 topk: int=1,
                 device: str=paddle.get_device()):
        """
            Python API to call an executor.
        """
        audio_file = os.path.abspath(os.path.expanduser(audio_file))
        paddle.set_device(device)
        self._init_from_path(model, config, ckpt_path, label_file)
        self.preprocess(audio_file)
        self.infer()
        res = self.postprocess(topk)  # Retrieve result of cls.

        return res


================================================
FILE: paddlespeech/cli/download.py
================================================
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import hashlib
import os
import os.path as osp
import shutil
import subprocess
import tarfile
import time
import zipfile

import requests
from tqdm import tqdm

from .log import logger

__all__ = ['get_path_from_url']

DOWNLOAD_RETRY_LIMIT = 3


def _is_url(path):
    """
    Whether path is URL.
    Args:
        path (string): URL string or not.
    """
    return path.startswith('http://') or path.startswith('https://')


def _map_path(url, root_dir):
    # parse path after download under root_dir
    fname = osp.split(url)[-1]
    fpath = fname
    return osp.join(root_dir, fpath)


def _get_unique_endpoints(trainer_endpoints):
    # Sorting is to avoid different environmental variables for each card
    trainer_endpoints.sort()
    ips = set()
    unique_endpoints = set()
    for endpoint in trainer_endpoints:
        ip = endpoint.split(":")[0]
        if ip in ips:
            continue
        ips.add(ip)
        unique_endpoints.add(endpoint)
    logger.debug("unique_endpoints {}".format(unique_endpoints))
    return unique_endpoints


def get_path_from_url(url,
                      root_dir,
                      md5sum=None,
                      check_exist=True,
                      decompress=True,
                      method='get'):
    """ Download from given url to root_dir.
    if file or directory specified by url is exists under
    root_dir, return the path directly, otherwise download
    from url and decompress it, return the path.
    Args:
        url (str): download url
        root_dir (str): root dir for downloading, it should be
                        WEIGHTS_HOME or DATASET_HOME
        md5sum (str): md5 sum of download package
        decompress (bool): decompress zip or tar file. Default is `True`
        method (str): which download method to use. Support `wget` and `get`. Default is `get`.
    Returns:
        str: a local path to save downloaded models & weights & datasets.
    """

    from paddle.distributed import ParallelEnv

    assert _is_url(url), "downloading from {} not a url".format(url)
    # parse path after download to decompress under root_dir
    fullpath = _map_path(url, root_dir)
    # Mainly used to solve the problem of downloading data from different 
    # machines in the case of multiple machines. Different ips will download 
    # data, and the same ip will only download data once.
    unique_endpoints = _get_unique_endpoints(ParallelEnv().trainer_endpoints[:])
    if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum):
        logger.debug("Found {}".format(fullpath))
    else:
        if ParallelEnv().current_endpoint in unique_endpoints:
            fullpath = _download(url, root_dir, md5sum, method=method)
        else:
            while not os.path.exists(fullpath):
                time.sleep(1)

    if ParallelEnv().current_endpoint in unique_endpoints:
        if decompress and (tarfile.is_tarfile(fullpath) or
                           zipfile.is_zipfile(fullpath)):
            fullpath = _decompress(fullpath)

    return fullpath


def _get_download(url, fullname):
    # using requests.get method
    fname = osp.basename(fullname)
    try:
        req = requests.get(url, stream=True)
    except Exception as e:  # requests.exceptions.ConnectionError
        logger.debug("Downloading {} from {} failed with exception {}".format(
            fname, url, str(e)))
        return False

    if req.status_code != 200:
        raise RuntimeError("Downloading from {} failed with code "
                           "{}!".format(url, req.status_code))

    # For protecting download interupted, download to
    # tmp_fullname firstly, move tmp_fullname to fullname
    # after download finished
    tmp_fullname = fullname + "_tmp"
    total_size = req.headers.get('content-length')
    with open(tmp_fullname, 'wb') as f:
        if total_size:
            with tqdm(total=(int(total_size)), unit='B', unit_scale=True) as pbar:
                for chunk in req.iter_content(chunk_size=1024):
                    f.write(chunk)
                    pbar.update(len(chunk))
        else:
            for chunk in req.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
    shutil.move(tmp_fullname, fullname)

    return fullname


def _wget_download(url, fullname):
    # using wget to download url
    tmp_fullname = fullname + "_tmp"
    # –user-agent
    command = 'wget -O {} -t {} {}'.format(tmp_fullname, DOWNLOAD_RETRY_LIMIT,
                                           url)
    subprc = subprocess.Popen(
        command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    _ = subprc.communicate()

    if subprc.returncode != 0:
        raise RuntimeError(
            '{} failed. Please make sure `wget` is installed or {} exists'.
            format(command, url))

    shutil.move(tmp_fullname, fullname)

    return fullname


_download_methods = {
    'get': _get_download,
    'wget': _wget_download,
}


def _download(url, path, md5sum=None, method='get'):
    """
    Download from url, save to path.
    url (str): download url
    path (str): download to given path
    md5sum (str): md5 sum of download package
    method (str): which download method to use. Support `wget` and `get`. Default is `get`.
    """
    assert method in _download_methods, 'make sure `{}` implemented'.format(
        method)

    if not osp.exists(path):
        os.makedirs(path)

    fname = osp.split(url)[-1]
    fullname = osp.join(path, fname)
    retry_cnt = 0

    logger.debug("Downloading {} from {}".format(fname, url))
    while not (osp.exists(fullname) and _md5check(fullname, md5sum)):
        if retry_cnt < DOWNLOAD_RETRY_LIMIT:
            retry_cnt += 1
        else:
            raise RuntimeError("Download from {} failed. "
                               "Retry limit reached".format(url))

        if not _download_methods[method](url, fullname):
            time.sleep(1)
            continue

    return fullname


def _md5check(fullname, md5sum=None):
    if md5sum is None:
        return True

    logger.debug("File {} md5 checking...".format(fullname))
    md5 = hashlib.md5()
    with open(fullname, 'rb') as f:
        for chunk in iter(lambda: f.read(4096), b""):
            md5.update(chunk)
    calc_md5sum = md5.hexdigest()

    if calc_md5sum != md5sum:
        logger.debug("File {} md5 check failed, {}(calc) != "
                     "{}(base)".format(fullname, calc_md5sum, md5sum))
        return False
    return True


def _decompress(fname):
    """
    Decompress for zip and tar file
    """
    logger.debug("Decompressing {}...".format(fname))

    # For protecting decompressing interupted,
    # decompress to fpath_tmp directory firstly, if decompress
    # successed, move decompress files to fpath and delete
    # fpath_tmp and remove download compress file.

    if tarfile.is_tarfile(fname):
        uncompressed_path = _uncompress_file_tar(fname)
    elif zipfile.is_zipfile(fname):
        uncompressed_path = _uncompress_file_zip(fname)
    else:
        raise TypeError("Unsupport compress file type {}".format(fname))

    return uncompressed_path


def _uncompress_file_zip(filepath):
    files = zipfile.ZipFile(filepath, 'r')
    file_list = files.namelist()

    file_dir = os.path.dirname(filepath)

    if _is_a_single_file(file_list):
        rootpath = file_list[0]
        uncompressed_path = os.path.join(file_dir, rootpath)

        for item in file_list:
            files.extract(item, file_dir)

    elif _is_a_single_dir(file_list):
        rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[0]
        uncompressed_path = os.path.join(file_dir, rootpath)

        for item in file_list:
            files.extract(item, file_dir)

    else:
        rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1]
        uncompressed_path = os.path.join(file_dir, rootpath)
        if not os.path.exists(uncompressed_path):
            os.makedirs(uncompressed_path)
        for item in file_list:
            files.extract(item, os.path.join(file_dir, rootpath))

    files.close()

    return uncompressed_path


def _uncompress_file_tar(filepath, mode="r:*"):
    files = tarfile.open(filepath, mode)
    file_list = files.getnames()

    file_dir = os.path.dirname(filepath)

    if _is_a_single_file(file_list):
        rootpath = file_list[0]
        uncompressed_path = os.path.join(file_dir, rootpath)
        for item in file_list:
            files.extract(item, file_dir)
    elif _is_a_single_dir(file_list):
        rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[-1]
        uncompressed_path = os.path.join(file_dir, rootpath)
        for item in file_list:
            files.extract(item, file_dir)
    else:
        rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1]
        uncompressed_path = os.path.join(file_dir, rootpath)
        if not os.path.exists(uncompressed_path):
            os.makedirs(uncompressed_path)

        for item in file_list:
            files.extract(item, os.path.join(file_dir, rootpath))

    files.close()

    return uncompressed_path


def _is_a_single_file(file_list):
    if len(file_list) == 1 and file_list[0].find(os.sep) < -1:
        return True
    return False


def _is_a_single_dir(file_list):
    new_file_list = []
    for file_path in file_list:
        if '/' in file_path:
            file_path = file_path.replace('/', os.sep)
        elif '\\' in file_path:
            file_path = file_path.replace('\\', os.sep)
        new_file_list.append(file_path)

    file_name = new_file_list[0].split(os.sep)[0]
    for i in range(1, len(new_file_list)):
        if file_name != new_file_list[i].split(os.sep)[0]:
            return False
    return True


================================================
FILE: paddlespeech/cli/entry.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from collections import defaultdict

__all__ = ['commands']


def _CommandDict():
    return defaultdict(_CommandDict)


def _execute():
    com = commands

    idx = 0
    for _argv in (['paddlespeech'] + sys.argv[1:]):
        if _argv not in com:
            break
        idx += 1
        com = com[_argv]

    # The method 'execute' of a command instance returns 'True' for a success
    # while 'False' for a failure. Here converts this result into a exit status
    # in bash: 0 for a success and 1 for a failure.
    if not callable(com['_entry']):
        i = com['_entry'].rindex('.')
        module, cls = com['_entry'][:i], com['_entry'][i + 1:]
        exec("from {} import {}".format(module, cls))
        com['_entry'] = locals()[cls]
    status = 0 if com['_entry']().execute(sys.argv[idx:]) else 1
    return status


commands = _CommandDict()


================================================
FILE: paddlespeech/cli/executor.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import sys
from abc import ABC
from abc import abstractmethod
from collections import OrderedDict
from typing import Any
from typing import Dict
from typing import List
from typing import Union

import paddle

from ..resource import CommonTaskResource
from .log import logger


class BaseExecutor(ABC):
    """
        An abstract executor of paddlespeech tasks.
    """

    def __init__(self, task: str, **kwargs):
        self._inputs = OrderedDict()
        self._outputs = OrderedDict()
        self.task_resource = CommonTaskResource(task=task, **kwargs)

    @abstractmethod
    def _init_from_path(self, *args, **kwargs):
        """
        Init model and other resources from arguments. This method should be called by `__call__()`.
        """
        pass

    @abstractmethod
    def preprocess(self, input: Any, *args, **kwargs):
        """
        Input preprocess and return paddle.Tensor stored in self._inputs.
        Input content can be a text(tts), a file(asr, cls), a stream(not supported yet) or anything needed.

        Args:
            input (Any): Input text/file/stream or other content.
        """
        pass

    @paddle.no_grad()
    @abstractmethod
    def infer(self, *args, **kwargs):
        """
        Model inference and put results into self._outputs.
        This method get input tensors from self._inputs, and write output tensors into self._outputs.
        """
        pass

    @abstractmethod
    def postprocess(self, *args, **kwargs) -> Union[str, os.PathLike]:
        """
        Output postprocess and return results.
        This method get model output from self._outputs and convert it into human-readable results.

        Returns:
            Union[str, os.PathLike]: Human-readable results such as texts and audio files.
        """
        pass

    @abstractmethod
    def execute(self, argv: List[str]) -> bool:
        """
        Command line entry. This method can only be accessed by a command line such as `paddlespeech asr`.

        Args:
            argv (List[str]): Arguments from command line.

        Returns:
            int: Result of the command execution. `True` for a success and `False` for a failure.
        """
        pass

    @abstractmethod
    def __call__(self, *arg, **kwargs):
        """
        Python API to call an executor.
        """
        pass

    def get_input_source(self, input_: Union[str, os.PathLike, None]
                         ) -> Dict[str, Union[str, os.PathLike]]:
        """
        Get task input source from command line input.

        Args:
            input_ (Union[str, os.PathLike, None]): Input from command line.

        Returns:
            Dict[str, Union[str, os.PathLike]]: A dict with ids and inputs.
        """
        if self._is_job_input(input_):
            # .job/.scp/.txt file
            ret = self._get_job_contents(input_)
        else:
            # job from stdin
            ret = OrderedDict()
            if input_ is None:  # Take input from stdin
                if not sys.stdin.isatty(
                ):  # Avoid getting stuck when stdin is empty.
                    for i, line in enumerate(sys.stdin):
                        line = line.strip()
                        if len(line.split()) == 1:
                            ret[str(i + 1)] = line
                        elif len(line.split()) == 2:
                            id_, info = line.split()
                            ret[id_] = info
                        else:  # No valid input info from one line.
                            continue
            else:
                ret[1] = input_
        return ret

    def process_task_results(self,
                             input_: Union[str, os.PathLike, None],
                             results: Dict[str, os.PathLike],
                             job_dump_result: bool=False):
        """
        Handling task results and redirect stdout if needed.

        Args:
            input_ (Union[str, os.PathLike, None]): Input from command line.
            results (Dict[str, os.PathLike]): Task outputs.
            job_dump_result (bool, optional): if True, dumps job results into file. Defaults to False.
        """

        if not self._is_job_input(input_) and len(
                results) == 1:  # Only one input sample
            raw_text = list(results.values())[0]
        else:
            raw_text = self._format_task_results(results)

        print(raw_text, end='')  # Stdout

        if self._is_job_input(
                input_) and job_dump_result:  # Dump to *.job.done 
            try:
                job_output_file = os.path.abspath(input_) + '.done'
                sys.stdout = open(job_output_file, 'w')
                print(raw_text, end='')
                logger.info(f'Results had been saved to: {job_output_file}')
            finally:
                sys.stdout.close()

    def _is_job_input(self, input_: Union[str, os.PathLike]) -> bool:
        """
        Check if current input file is a job input or not.

        Args:
            input_ (Union[str, os.PathLike]): Input file of current task.

        Returns:
            bool: return `True` for job input, `False` otherwise.
        """
        return input_ and os.path.isfile(input_) and (input_.endswith('.job') or
                                                      input_.endswith('.txt') or
                                                      input_.endswith('.scp'))

    def _get_job_contents(
            self, job_input: os.PathLike) -> Dict[str, Union[str, os.PathLike]]:
        """
        Read a job input file and return its contents in a dictionary.

        Args:
            job_input (os.PathLike): The job input file.

        Returns:
            Dict[str, str]: Contents of job input.
        """
        job_contents = OrderedDict()
        with open(job_input) as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                k, v = line.split()  # space or \t
                job_contents[k] = v
        return job_contents

    def _format_task_results(
            self, results: Dict[str, Union[str, os.PathLike]]) -> str:
        """
        Convert task results to raw text.

        Args:
            results (Dict[str, str]): A dictionary of task results.

        Returns:
            str: A string object contains task results.
        """
        ret = ''
        for k, v in results.items():
            ret += f'{k} {v}\n'
        return ret

    def disable_task_loggers(self):
        """
        Disable all loggers in current task.
        """
        loggers = [
            logging.getLogger(name) for name in logging.root.manager.loggerDict
        ]
        for l in loggers:
            l.setLevel(logging.ERROR)

    def show_rtf(self, info: Dict[str, List[float]]):
        """
        Calculate rft of current task and show results.
        """
        num_samples = 0
        task_duration = 0.0
        wav_duration = 0.0

        for start, end, dur in zip(info['start'], info['end'], info['extra']):
            num_samples += 1
            task_duration += end - start
            wav_duration += dur

        logger.info('Sample Count: {}'.format(num_samples))
        logger.info('Avg RTF: {}'.format(task_duration / wav_duration))


================================================
FILE: paddlespeech/cli/kws/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .infer import KWSExecutor


================================================
FILE: paddlespeech/cli/kws/infer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from collections import OrderedDict
from typing import List
from typing import Optional
from typing import Union

import paddle
import yaml

from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.audio.backends import soundfile_load as load_audio
from paddlespeech.audio.compliance.kaldi import fbank as kaldi_fbank

__all__ = ['KWSExecutor']


class KWSExecutor(BaseExecutor):
    def __init__(self):
        super().__init__(task='kws')
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech.kws', add_help=True)
        self.parser.add_argument(
            '--input',
            type=str,
            default=None,
            help='Audio file to keyword spotting.')
        self.parser.add_argument(
            '--threshold',
            type=float,
            default=0.8,
            help='Score threshold for keyword spotting.')
        self.parser.add_argument(
            '--model',
            type=str,
            default='mdtc_heysnips',
            choices=[
                tag[:tag.index('-')]
                for tag in self.task_resource.pretrained_models.keys()
            ],
            help='Choose model type of kws task.')
        self.parser.add_argument(
            '--config',
            type=str,
            default=None,
            help='Config of kws task. Use default config when it is None.')
        self.parser.add_argument(
            '--ckpt_path',
            type=str,
            default=None,
            help='Checkpoint file of model.')
        self.parser.add_argument(
            '--device',
            type=str,
            default=paddle.get_device(),
            help='Choose device to execute model inference.')
        self.parser.add_argument(
            '-d',
            '--job_dump_result',
            action='store_true',
            help='Save job result into file.')
        self.parser.add_argument(
            '-v',
            '--verbose',
            action='store_true',
            help='Increase logger verbosity of current task.')

    def _init_from_path(self,
                        model_type: str='mdtc_heysnips',
                        cfg_path: Optional[os.PathLike]=None,
                        ckpt_path: Optional[os.PathLike]=None):
        """
            Init model and other resources from a specific path.
        """
        if hasattr(self, 'model'):
            logger.debug('Model had been initialized.')
            return

        if ckpt_path is None:
            tag = model_type + '-' + '16k'
            self.task_resource.set_task_model(tag)
            self.cfg_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['cfg_path'])
            self.ckpt_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['ckpt_path'] + '.pdparams')
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.ckpt_path = os.path.abspath(ckpt_path)

        # config
        with open(self.cfg_path, 'r') as f:
            config = yaml.safe_load(f)

        # model
        backbone_class = self.task_resource.get_model_class(
            model_type.split('_')[0])
        model_class = self.task_resource.get_model_class(
            model_type.split('_')[0] + '_for_kws')
        backbone = backbone_class(
            stack_num=config['stack_num'],
            stack_size=config['stack_size'],
            in_channels=config['in_channels'],
            res_channels=config['res_channels'],
            kernel_size=config['kernel_size'],
            causal=True, )
        self.model = model_class(
            backbone=backbone, num_keywords=config['num_keywords'])
        model_dict = paddle.load(self.ckpt_path)
        self.model.set_state_dict(model_dict)
        self.model.eval()

        self.feature_extractor = lambda x: kaldi_fbank(
            x, sr=config['sample_rate'],
            frame_shift=config['frame_shift'],
            frame_length=config['frame_length'],
            n_mels=config['n_mels']
        )

    def preprocess(self, audio_file: Union[str, os.PathLike]):
        """
            Input preprocess and return paddle.Tensor stored in self.input.
            Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet).
        """
        assert os.path.isfile(audio_file)
        waveform, _ = load_audio(audio_file)
        if isinstance(audio_file, (str, os.PathLike)):
            logger.debug("Preprocessing audio_file:" + audio_file)

        # Feature extraction
        waveform = paddle.to_tensor(waveform).unsqueeze(0)
        self._inputs['feats'] = self.feature_extractor(waveform).unsqueeze(0)

    @paddle.no_grad()
    def infer(self):
        """
            Model inference and result stored in self.output.
        """
        self._outputs['logits'] = self.model(self._inputs['feats'])

    def postprocess(self, threshold: float) -> Union[str, os.PathLike]:
        """
            Output postprocess and return human-readable results such as texts and audio files.
        """
        kws_score = max(self._outputs['logits'][0, :, 0]).item()
        return 'Score: {:.3f}, Threshold: {}, Is keyword: {}'.format(
            kws_score, threshold, kws_score > threshold)

    def execute(self, argv: List[str]) -> bool:
        """
            Command line entry.
        """
        parser_args = self.parser.parse_args(argv)

        model_type = parser_args.model
        cfg_path = parser_args.config
        ckpt_path = parser_args.ckpt_path
        device = parser_args.device
        threshold = parser_args.threshold

        if not parser_args.verbose:
            self.disable_task_loggers()

        task_source = self.get_input_source(parser_args.input)
        task_results = OrderedDict()
        has_exceptions = False

        for id_, input_ in task_source.items():
            try:
                res = self(input_, threshold, model_type, cfg_path, ckpt_path,
                           device)
                task_results[id_] = res
            except Exception as e:
                has_exceptions = True
                task_results[id_] = f'{e.__class__.__name__}: {e}'

        self.process_task_results(parser_args.input, task_results,
                                  parser_args.job_dump_result)

        if has_exceptions:
            return False
        else:
            return True

    @stats_wrapper
    def __call__(self,
                 audio_file: os.PathLike,
                 threshold: float=0.8,
                 model: str='mdtc_heysnips',
                 config: Optional[os.PathLike]=None,
                 ckpt_path: Optional[os.PathLike]=None,
                 device: str=paddle.get_device()):
        """
            Python API to call an executor.
        """
        audio_file = os.path.abspath(os.path.expanduser(audio_file))
        paddle.set_device(device)
        self._init_from_path(model, config, ckpt_path)
        self.preprocess(audio_file)
        self.infer()
        res = self.postprocess(threshold)

        return res


================================================
FILE: paddlespeech/cli/log.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import logging

__all__ = [
    'logger',
]


class Logger(object):
    def __init__(self, name: str=None):
        name = 'PaddleSpeech' if not name else name
        self.logger = logging.getLogger(name)

        log_config = {
            'DEBUG': 10,
            'INFO': 20,
            'TRAIN': 21,
            'EVAL': 22,
            'WARNING': 30,
            'ERROR': 40,
            'CRITICAL': 50,
            'EXCEPTION': 100,
        }
        for key, level in log_config.items():
            logging.addLevelName(level, key)
            if key == 'EXCEPTION':
                self.__dict__[key.lower()] = self.logger.exception
            else:
                self.__dict__[key.lower()] = functools.partial(self.__call__,
                                                               level)

        self.format = logging.Formatter(
            fmt='[%(asctime)-15s] [%(levelname)8s] - %(message)s')

        self.handler = logging.StreamHandler()
        self.handler.setFormatter(self.format)

        self.logger.addHandler(self.handler)
        self.logger.setLevel(logging.INFO)
        self.logger.propagate = False

    def __call__(self, log_level: str, msg: str):
        self.logger.log(log_level, msg)


logger = Logger()


================================================
FILE: paddlespeech/cli/ssl/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .infer import SSLExecutor


================================================
FILE: paddlespeech/cli/ssl/infer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import io
import os
import sys
import time
from collections import OrderedDict
from typing import List
from typing import Optional
from typing import Union

import librosa
import numpy as np
import paddle
import soundfile
from paddlenlp.transformers import AutoTokenizer
from yacs.config import CfgNode

from ..executor import BaseExecutor
from ..log import logger
from ..utils import CLI_TIMER
from ..utils import stats_wrapper
from ..utils import timer_register
from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.utils.utility import UpdateConfig

__all__ = ['SSLExecutor']


@timer_register
class SSLExecutor(BaseExecutor):
    def __init__(self):
        super().__init__('ssl')
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech.ssl', add_help=True)
        self.parser.add_argument(
            '--input', type=str, default=None, help='Audio file to recognize.')
        self.parser.add_argument(
            '--model',
            type=str,
            default='wav2vec2',
            choices=['wav2vec2', 'hubert', "wavlm"],
            help='Choose model type of asr task.')
        self.parser.add_argument(
            '--task',
            type=str,
            default='asr',
            choices=['asr', 'vector'],
            help='Choose output type for ssl task')
        self.parser.add_argument(
            '--lang',
            type=str,
            default='en',
            help='Choose model language. zh or en, zh:[wav2vec2ASR_aishell1-zh-16k], en:[wav2vec2ASR_librispeech-en-16k, hubertASR_librispeech_100-en-16k]'
        )
        self.parser.add_argument(
            "--sample_rate",
            type=int,
            default=16000,
            choices=[8000, 16000],
            help='Choose the audio sample rate of the model. 8000 or 16000')
        self.parser.add_argument(
            '--config',
            type=str,
            default=None,
            help='Config of asr task. Use default config when it is None.')
        self.parser.add_argument(
            '--decode_method',
            type=str,
            default='ctc_greedy_search',
            choices=[
                'ctc_greedy_search',
                'ctc_prefix_beam_search',
            ],
            help='only support asr task')
        self.parser.add_argument(
            '--ckpt_path',
            type=str,
            default=None,
            help='Checkpoint file of model.')
        self.parser.add_argument(
            '--yes',
            '-y',
            action="store_true",
            default=False,
            help='No additional parameters required. \
            Once set this parameter, it means accepting the request of the program by default, \
            which includes transforming the audio sample rate')
        self.parser.add_argument(
            '--rtf',
            action="store_true",
            default=False,
            help='Show Real-time Factor(RTF).')
        self.parser.add_argument(
            '--device',
            type=str,
            default=paddle.get_device(),
            help='Choose device to execute model inference.')
        self.parser.add_argument(
            '-d',
            '--job_dump_result',
            action='store_true',
            help='Save job result into file.')
        self.parser.add_argument(
            '-v',
            '--verbose',
            action='store_true',
            help='Increase logger verbosity of current task.')
        self.last_call_params = None

    def _init_from_path(self,
                        model_type: str=None,
                        task: str='asr',
                        lang: str='en',
                        sample_rate: int=16000,
                        cfg_path: Optional[os.PathLike]=None,
                        decode_method: str='ctc_greedy_search',
                        ckpt_path: Optional[os.PathLike]=None):
        """
        Init model and other resources from a specific path.
        """
        logger.debug("start to init the model")

        if model_type is None:
            logger.debug(
                "Model type had not been specified, default {} was used.".
                format(model_type))
        # default max_len: unit:second
        self.max_len = 50
        if hasattr(self, 'model'):
            logger.debug('Model had been initialized.')
            return
        if cfg_path is None or ckpt_path is None:
            sample_rate_str = '16k' if sample_rate == 16000 else '8k'
            if task == 'asr':
                if model_type == 'wav2vec2':
                    if lang == 'en':
                        model_prefix = 'wav2vec2ASR_librispeech'
                    elif lang == 'zh':
                        model_prefix = 'wav2vec2ASR_aishell1'
                    tag = model_prefix + '-' + lang + '-' + sample_rate_str
                elif model_type == 'hubert':
                    if lang == 'en':
                        model_prefix = 'hubertASR_librispeech-100h'
                    elif lang == 'zh':
                        logger.error("zh hubertASR is not supported yet")
                    tag = model_prefix + '-' + lang + '-' + sample_rate_str
                elif model_type == 'wavlm':
                    if lang == "en":
                        model_prefix = "wavlmASR_librispeech"
                    elif lang == "zh":
                        logger.error("zh wavlmASR is not supported yet")
                    tag = model_prefix + '-' + lang + '-' + sample_rate_str
            else:
                tag = model_type + '-' + lang + '-' + sample_rate_str
            self.task_resource.set_task_model(tag, version=None)
            self.res_path = self.task_resource.res_dir

            self.cfg_path = os.path.join(
                self.res_path, self.task_resource.res_dict['cfg_path'])
            self.ckpt_path = os.path.join(
                self.res_path,
                self.task_resource.res_dict['ckpt_path'] + ".pdparams")
            logger.debug(self.res_path)
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.ckpt_path = os.path.abspath(ckpt_path + ".pdparams")
            self.res_path = os.path.dirname(
                os.path.dirname(os.path.abspath(self.cfg_path)))
        logger.debug(self.cfg_path)
        logger.debug(self.ckpt_path)

        #Init body.
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)
        if task == 'asr':
            with UpdateConfig(self.config):
                if lang == 'en':
                    self.text_feature = TextFeaturizer(
                        unit_type=self.config.unit_type,
                        vocab=self.config.vocab_filepath)
                    self.config.output_dim = len(self.config.vocab_filepath)
                elif lang == 'zh':
                    self.text_feature = AutoTokenizer.from_pretrained(
                        self.config.tokenizer)
                    self.config.output_dim = self.text_feature.vocab_size
                self.config.decode.decoding_method = decode_method
            model_name = model_prefix[:model_prefix.rindex(
                '_')]  # model_type: {model_name}_{dataset}
        else:
            model_name = model_type
        model_class = self.task_resource.get_model_class(model_name)
        model_conf = self.config
        model = model_class.from_config(model_conf)
        self.model = model
        self.model.eval()

        # load model
        model_dict = paddle.load(self.ckpt_path)
        if task == 'asr':
            self.model.set_state_dict(model_dict)
        else:
            getattr(self.model, model_type).set_state_dict(model_dict)

    def preprocess(self, input: Union[str, os.PathLike]):
        """
        Input preprocess and return paddle.Tensor stored in self.input.
        Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet).
        """

        audio_file = input
        if isinstance(audio_file, (str, os.PathLike)):
            logger.debug("Preprocess audio_file:" + audio_file)
        elif isinstance(audio_file, io.BytesIO):
            audio_file.seek(0)

        # Get the object for feature extraction
        logger.debug("get the preprocess conf")
        preprocess_conf = self.config.preprocess_config
        preprocess_args = {"train": False}
        preprocessing = Transformation(preprocess_conf)
        logger.debug("read the audio file")
        audio, audio_sample_rate = soundfile.read(
            audio_file, dtype="int16", always_2d=True)
        if self.change_format:
            if audio.shape[1] >= 2:
                audio = audio.mean(axis=1, dtype=np.int16)
            else:
                audio = audio[:, 0]
            # pcm16 -> pcm 32
            audio = self._pcm16to32(audio)
            audio = librosa.resample(
                audio, orig_sr=audio_sample_rate, target_sr=self.sample_rate)
            audio_sample_rate = self.sample_rate
            # pcm32 -> pcm 16
            audio = self._pcm32to16(audio)
        else:
            audio = audio[:, 0]

        logger.debug(f"audio shape: {audio.shape}")
        # fbank
        audio = preprocessing(audio, **preprocess_args)

        audio_len = paddle.to_tensor(audio.shape[0]).unsqueeze(axis=0)
        audio = paddle.to_tensor(audio, dtype='float32').unsqueeze(axis=0)

        self._inputs["audio"] = audio
        self._inputs["audio_len"] = audio_len
        logger.debug(f"audio feat shape: {audio.shape}")

        logger.debug("audio feat process success")

    @paddle.no_grad()
    def infer(self, model_type: str, task: str):
        """
        Model inference and result stored in self.output.
        """
        logger.debug("start to infer the model to get the output")
        audio = self._inputs["audio"]
        if task == 'asr':
            cfg = self.config.decode
            logger.debug(f"we will use the {model_type}ASR like model.")
            try:
                result_transcripts = self.model.decode(
                    audio,
                    text_feature=self.text_feature,
                    decoding_method=cfg.decoding_method,
                    beam_size=cfg.beam_size,
                    tokenizer=getattr(self.config, 'tokenizer', None))
                self._outputs["result"] = result_transcripts[0][0]
            except Exception as e:
                logger.exception(e)
        else:
            logger.debug(
                f"we will use the {model_type} like model to extract audio feature."
            )
            try:
                out_feature = self.model(audio[:, :, 0])
                self._outputs["result"] = out_feature[0]
            except Exception as e:
                logger.exception(e)

    def postprocess(self) -> Union[str, os.PathLike]:
        """
            Output postprocess and return human-readable results such as texts and audio files.
        """
        return self._outputs["result"]

    def _pcm16to32(self, audio):
        assert (audio.dtype == np.int16)
        audio = audio.astype("float32")
        bits = np.iinfo(np.int16).bits
        audio = audio / (2**(bits - 1))
        return audio

    def _pcm32to16(self, audio):
        assert (audio.dtype == np.float32)
        bits = np.iinfo(np.int16).bits
        audio = audio * (2**(bits - 1))
        audio = np.round(audio).astype("int16")
        return audio

    def _check(self, audio_file: str, sample_rate: int, force_yes: bool=False):
        self.sample_rate = sample_rate
        if self.sample_rate != 16000 and self.sample_rate != 8000:
            logger.error(
                "invalid sample rate, please input --sr 8000 or --sr 16000")
            return False

        if isinstance(audio_file, (str, os.PathLike)):
            if not os.path.isfile(audio_file):
                logger.error("Please input the right audio file path")
                return False
        elif isinstance(audio_file, io.BytesIO):
            audio_file.seek(0)

        logger.debug("checking the audio file format......")
        try:
            audio, audio_sample_rate = soundfile.read(
                audio_file, dtype="int16", always_2d=True)
            audio_duration = audio.shape[0] / audio_sample_rate
            if audio_duration > self.max_len:
                logger.error(
                    f"Please input audio file less then {self.max_len} seconds.\n"
                )
                return False
        except Exception as e:
            logger.exception(e)
            logger.error(
                f"can not open the audio file, please check the audio file({audio_file}) format is 'wav'. \n \
                 you can try to use sox to change the file format.\n \
                 For example: \n \
                 sample rate: 16k \n \
                 sox input_audio.xx --rate 16k --bits 16 --channels 1 output_audio.wav \n \
                 sample rate: 8k \n \
                 sox input_audio.xx --rate 8k --bits 16 --channels 1 output_audio.wav \n \
                 ")
            return False
        logger.debug("The sample rate is %d" % audio_sample_rate)
        if audio_sample_rate != self.sample_rate:
            logger.warning("The sample rate of the input file is not {}.\n \
                            The program will resample the wav file to {}.\n \
                            If the result does not meet your expectations，\n \
                            Please input the 16k 16 bit 1 channel wav file. \
                        ".format(self.sample_rate, self.sample_rate))
            if force_yes is False:
                while (True):
                    logger.debug(
                        "Whether to change the sample rate and the channel. Y: change the sample. N: exit the prgream."
                    )
                    content = input("Input(Y/N):")
                    if content.strip() == "Y" or content.strip(
                    ) == "y" or content.strip() == "yes" or content.strip(
                    ) == "Yes":
                        logger.debug(
                            "change the sampele rate, channel to 16k and 1 channel"
                        )
                        break
                    elif content.strip() == "N" or content.strip(
                    ) == "n" or content.strip() == "no" or content.strip(
                    ) == "No":
                        logger.debug("Exit the program")
                        return False
                    else:
                        logger.warning("Not regular input, please input again")

            self.change_format = True
        else:
            logger.debug("The audio file format is right")
            self.change_format = False

        return True

    def execute(self, argv: List[str]) -> bool:
        """
            Command line entry.
        """
        parser_args = self.parser.parse_args(argv)

        model = parser_args.model
        task = parser_args.task
        lang = parser_args.lang
        sample_rate = parser_args.sample_rate
        config = parser_args.config
        ckpt_path = parser_args.ckpt_path
        decode_method = parser_args.decode_method
        force_yes = parser_args.yes
        rtf = parser_args.rtf
        device = parser_args.device

        if not parser_args.verbose:
            self.disable_task_loggers()

        task_source = self.get_input_source(parser_args.input)
        task_results = OrderedDict()
        has_exceptions = False

        for id_, input_ in task_source.items():
            try:
                res = self(
                    audio_file=input_,
                    model=model,
                    task=task,
                    lang=lang,
                    sample_rate=sample_rate,
                    config=config,
                    ckpt_path=ckpt_path,
                    decode_method=decode_method,
                    force_yes=force_yes,
                    rtf=rtf,
                    device=device)
                task_results[id_] = res

            except Exception as e:
                has_exceptions = True
                task_results[id_] = f'{e.__class__.__name__}: {e}'

        if rtf:
            self.show_rtf(CLI_TIMER[self.__class__.__name__])
        self.process_task_results(parser_args.input, task_results,
                                  parser_args.job_dump_result)
        if has_exceptions:
            return False
        else:
            return True

    @stats_wrapper
    def __call__(self,
                 audio_file: os.PathLike,
                 model: str=None,
                 task: str='asr',
                 lang: str='en',
                 sample_rate: int=16000,
                 config: os.PathLike=None,
                 ckpt_path: os.PathLike=None,
                 decode_method: str='ctc_greedy_search',
                 force_yes: bool=False,
                 rtf: bool=False,
                 device=paddle.get_device()):
        """
        Python API to call an executor.
        """

        current_call_params = {
            "model": model,
            "task": task,
            "lang": lang,
            "sample_rate": sample_rate,
            "config": config,
            "ckpt_path": ckpt_path,
            "decode_method": decode_method,
            "force_yes": force_yes,
            "rtf": rtf,
            "device": device
        }
        if self.last_call_params is not None and self.last_call_params != current_call_params and hasattr(
                self, 'model'):
            del self.model
        self.last_call_params = current_call_params

        audio_file = os.path.abspath(audio_file)
        paddle.set_device(device)
        self._init_from_path(model, task, lang, sample_rate, config,
                             decode_method, ckpt_path)
        if not self._check(audio_file, sample_rate, force_yes):
            sys.exit(-1)
        if rtf:
            k = self.__class__.__name__
            CLI_TIMER[k]['start'].append(time.time())
        self.preprocess(audio_file)
        self.infer(model, task)
        res = self.postprocess()  # Retrieve result of asr.

        if rtf:
            CLI_TIMER[k]['end'].append(time.time())
            audio, audio_sample_rate = soundfile.read(
                audio_file, dtype="int16", always_2d=True)
            CLI_TIMER[k]['extra'].append(audio.shape[0] / audio_sample_rate)

        return res


================================================
FILE: paddlespeech/cli/st/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .infer import STExecutor


================================================
FILE: paddlespeech/cli/st/infer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import subprocess
from collections import OrderedDict
from typing import List
from typing import Optional
from typing import Union

import kaldiio
import numpy as np
import paddle
import soundfile
from kaldiio import WriteHelper
from yacs.config import CfgNode

from ...utils.env import MODEL_HOME
from ..executor import BaseExecutor
from ..log import logger
from ..utils import download_and_decompress
from ..utils import stats_wrapper
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.utils.utility import UpdateConfig

__all__ = ["STExecutor"]

kaldi_bins = {
    "url":
    "https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz",
    "md5":
    "c0682303b3f3393dbf6ed4c4e35a53eb",
}


class STExecutor(BaseExecutor):
    def __init__(self):
        super().__init__(task='st')
        self.kaldi_bins = kaldi_bins

        self.parser = argparse.ArgumentParser(
            prog="paddlespeech.st", add_help=True)
        self.parser.add_argument(
            "--input", type=str, default=None, help="Audio file to translate.")
        self.parser.add_argument(
            "--model",
            type=str,
            default="fat_st_ted",
            choices=[
                tag[:tag.index('-')]
                for tag in self.task_resource.pretrained_models.keys()
            ],
            help="Choose model type of st task.")
        self.parser.add_argument(
            "--src_lang",
            type=str,
            default="en",
            help="Choose model source language.")
        self.parser.add_argument(
            "--tgt_lang",
            type=str,
            default="zh",
            help="Choose model target language.")
        self.parser.add_argument(
            "--sample_rate",
            type=int,
            default=16000,
            choices=[16000],
            help='Choose the audio sample rate of the model. 8000 or 16000')
        self.parser.add_argument(
            "--config",
            type=str,
            default=None,
            help="Config of st task. Use default config when it is None.")
        self.parser.add_argument(
            "--ckpt_path",
            type=str,
            default=None,
            help="Checkpoint file of model.")
        self.parser.add_argument(
            "--device",
            type=str,
            default=paddle.get_device(),
            help="Choose device to execute model inference.")
        self.parser.add_argument(
            '-d',
            '--job_dump_result',
            action='store_true',
            help='Save job result into file.')
        self.parser.add_argument(
            '-v',
            '--verbose',
            action='store_true',
            help='Increase logger verbosity of current task.')

    def _set_kaldi_bins(self) -> os.PathLike:
        """
            Download and returns kaldi_bins resources path of current task.
        """
        decompressed_path = download_and_decompress(self.kaldi_bins, MODEL_HOME)
        decompressed_path = os.path.abspath(decompressed_path)
        logger.debug("Kaldi_bins stored in: {}".format(decompressed_path))
        if "LD_LIBRARY_PATH" in os.environ:
            os.environ["LD_LIBRARY_PATH"] += f":{decompressed_path}"
        else:
            os.environ["LD_LIBRARY_PATH"] = f"{decompressed_path}"
        os.environ["PATH"] += f":{decompressed_path}"
        return decompressed_path

    def _init_from_path(self,
                        model_type: str="fat_st_ted",
                        src_lang: str="en",
                        tgt_lang: str="zh",
                        cfg_path: Optional[os.PathLike]=None,
                        ckpt_path: Optional[os.PathLike]=None):
        """
            Init model and other resources from a specific path.
        """
        if hasattr(self, 'model'):
            logger.debug('Model had been initialized.')
            return

        if cfg_path is None or ckpt_path is None:
            tag = model_type + "-" + src_lang + "-" + tgt_lang
            self.task_resource.set_task_model(tag, version=None)
            self.cfg_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['cfg_path'])
            self.ckpt_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['ckpt_path'])
            logger.debug(self.cfg_path)
            logger.debug(self.ckpt_path)
            res_path = self.task_resource.res_dir
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.ckpt_path = os.path.abspath(ckpt_path)
            res_path = os.path.dirname(
                os.path.dirname(os.path.abspath(self.cfg_path)))

        #Init body.
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)
        self.config.decode.decoding_method = "fullsentence"

        with UpdateConfig(self.config):
            self.config.cmvn_path = os.path.join(res_path,
                                                 self.config.cmvn_path)
            self.config.spm_model_prefix = os.path.join(
                res_path, self.config.spm_model_prefix)
            self.text_feature = TextFeaturizer(
                unit_type=self.config.unit_type,
                vocab=self.config.vocab_filepath,
                spm_model_prefix=self.config.spm_model_prefix)

        model_conf = self.config
        model_name = model_type[:model_type.rindex(
            '_')]  # model_type: {model_name}_{dataset}
        model_class = self.task_resource.get_model_class(model_name)
        self.model = model_class.from_config(model_conf)
        self.model.eval()

        # load model
        params_path = self.ckpt_path
        model_dict = paddle.load(params_path)
        self.model.set_state_dict(model_dict)

        # set kaldi bins
        self._set_kaldi_bins()

    def _check(self, audio_file: str, sample_rate: int):
        _, audio_sample_rate = soundfile.read(
            audio_file, dtype="int16", always_2d=True)
        if audio_sample_rate != sample_rate:
            raise Exception("invalid sample rate")
            sys.exit(-1)

    def preprocess(self, wav_file: Union[str, os.PathLike], model_type: str):
        """
            Input preprocess and return paddle.Tensor stored in self.input.
            Input content can be a file(wav).
        """
        audio_file = os.path.abspath(wav_file)
        logger.debug("Preprocess audio_file:" + audio_file)

        if "fat_st" in model_type:
            cmvn = self.config.cmvn_path
            utt_name = "_tmp"

            # Get the object for feature extraction
            fbank_extract_command = [
                "compute-fbank-feats", "--num-mel-bins=80", "--verbose=2",
                "--sample-frequency=16000", "scp:-", "ark:-"
            ]
            fbank_extract_process = subprocess.Popen(
                fbank_extract_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            fbank_extract_process.stdin.write(
                f"{utt_name} {wav_file}".encode("utf8"))
            fbank_extract_process.stdin.close()
            fbank_feat = dict(
                kaldiio.load_ark(fbank_extract_process.stdout))[utt_name]

            extract_command = ["compute-kaldi-pitch-feats", "scp:-", "ark:-"]
            pitch_extract_process = subprocess.Popen(
                extract_command,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            pitch_extract_process.stdin.write(
                f"{utt_name} {wav_file}".encode("utf8"))
            process_command = ["process-kaldi-pitch-feats", "ark:", "ark:-"]
            pitch_process = subprocess.Popen(
                process_command,
                stdin=pitch_extract_process.stdout,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            pitch_extract_process.stdin.close()
            pitch_feat = dict(kaldiio.load_ark(pitch_process.stdout))[utt_name]
            concated_feat = np.concatenate((fbank_feat, pitch_feat), axis=1)
            raw_feat = f"{utt_name}.raw"
            with WriteHelper(
                    f"ark,scp:{raw_feat}.ark,{raw_feat}.scp") as writer:
                writer(utt_name, concated_feat)
            cmvn_command = [
                "apply-cmvn", "--norm-vars=true", cmvn, f"scp:{raw_feat}.scp",
                "ark:-"
            ]
            cmvn_process = subprocess.Popen(
                cmvn_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            process_command = [
                "copy-feats", "--compress=true", "ark:-", "ark:-"
            ]
            process = subprocess.Popen(
                process_command,
                stdin=cmvn_process.stdout,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            norm_feat = dict(kaldiio.load_ark(process.stdout))[utt_name]
            self._inputs["audio"] = paddle.to_tensor(norm_feat).unsqueeze(0)
            self._inputs["audio_len"] = paddle.to_tensor(
                self._inputs["audio"].shape[1:2], dtype="int64")
        else:
            raise ValueError("Wrong model type.")

    @paddle.no_grad()
    def infer(self, model_type: str):
        """
            Model inference and result stored in self.output.
        """
        cfg = self.config.decode
        audio = self._inputs["audio"]
        audio_len = self._inputs["audio_len"]
        if model_type == "fat_st_ted":
            hyps = self.model.decode(
                audio,
                audio_len,
                text_feature=self.text_feature,
                decoding_method=cfg.decoding_method,
                beam_size=cfg.beam_size,
                word_reward=cfg.word_reward,
                decoding_chunk_size=cfg.decoding_chunk_size,
                num_decoding_left_chunks=cfg.num_decoding_left_chunks,
                simulate_streaming=cfg.simulate_streaming)
            self._outputs["result"] = hyps
        else:
            raise ValueError("Wrong model type.")

    def postprocess(self, model_type: str) -> Union[str, os.PathLike]:
        """
            Output postprocess and return human-readable results such as texts and audio files.
        """
        if model_type == "fat_st_ted":
            return self._outputs["result"]
        else:
            raise ValueError("Wrong model type.")

    def execute(self, argv: List[str]) -> bool:
        """
            Command line entry.
        """
        parser_args = self.parser.parse_args(argv)

        model = parser_args.model
        src_lang = parser_args.src_lang
        tgt_lang = parser_args.tgt_lang
        sample_rate = parser_args.sample_rate
        config = parser_args.config
        ckpt_path = parser_args.ckpt_path
        device = parser_args.device

        if not parser_args.verbose:
            self.disable_task_loggers()

        task_source = self.get_input_source(parser_args.input)
        task_results = OrderedDict()
        has_exceptions = False

        for id_, input_ in task_source.items():
            try:
                res = self(input_, model, src_lang, tgt_lang, sample_rate,
                           config, ckpt_path, device)
                task_results[id_] = res
            except Exception as e:
                has_exceptions = True
                task_results[id_] = f'{e.__class__.__name__}: {e}'

        self.process_task_results(parser_args.input, task_results,
                                  parser_args.job_dump_result)

        if has_exceptions:
            return False
        else:
            return True

    @stats_wrapper
    def __call__(self,
                 audio_file: os.PathLike,
                 model: str='fat_st_ted',
                 src_lang: str='en',
                 tgt_lang: str='zh',
                 sample_rate: int=16000,
                 config: Optional[os.PathLike]=None,
                 ckpt_path: Optional[os.PathLike]=None,
                 device: str=paddle.get_device()):
        """
            Python API to call an executor.
        """
        audio_file = os.path.abspath(audio_file)
        self._check(audio_file, sample_rate)
        paddle.set_device(device)
        self._init_from_path(model, src_lang, tgt_lang, config, ckpt_path)
        self.preprocess(audio_file, model)
        self.infer(model)
        res = self.postprocess(model)

        return res


================================================
FILE: paddlespeech/cli/text/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .infer import TextExecutor


================================================
FILE: paddlespeech/cli/text/infer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import re
from collections import OrderedDict
from typing import List
from typing import Optional
from typing import Union

import paddle
import yaml
from yacs.config import CfgNode

from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.text.models.ernie_linear import ErnieLinear

__all__ = ['TextExecutor']


class TextExecutor(BaseExecutor):
    def __init__(self):
        super().__init__(task='text')
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech.text', add_help=True)
        self.parser.add_argument(
            '--input', type=str, default=None, help='Input text.')
        self.parser.add_argument(
            '--task',
            type=str,
            default='punc',
            choices=['punc'],
            help='Choose text task.')
        self.parser.add_argument(
            '--model',
            type=str,
            default='ernie_linear_p7_wudao',
            choices=[
                tag[:tag.index('-')]
                for tag in self.task_resource.pretrained_models.keys()
            ],
            help='Choose model type of text task.')
        self.parser.add_argument(
            '--lang',
            type=str,
            default='zh',
            choices=['zh', 'en'],
            help='Choose model language.')
        self.parser.add_argument(
            '--config',
            type=str,
            default=None,
            help='Config of cls task. Use default config when it is None.')
        self.parser.add_argument(
            '--ckpt_path',
            type=str,
            default=None,
            help='Checkpoint file of model.')
        self.parser.add_argument(
            '--punc_vocab',
            type=str,
            default=None,
            help='Vocabulary file of punctuation restoration task.')
        self.parser.add_argument(
            '--device',
            type=str,
            default=paddle.get_device(),
            help='Choose device to execute model inference.')
        self.parser.add_argument(
            '-d',
            '--job_dump_result',
            action='store_true',
            help='Save job result into file.')
        self.parser.add_argument(
            '-v',
            '--verbose',
            action='store_true',
            help='Increase logger verbosity of current task.')

    def _init_from_path(self,
                        task: str='punc',
                        model_type: str='ernie_linear_p7_wudao',
                        lang: str='zh',
                        cfg_path: Optional[os.PathLike]=None,
                        ckpt_path: Optional[os.PathLike]=None,
                        vocab_file: Optional[os.PathLike]=None):
        """
            Init model and other resources from a specific path.
        """
        if hasattr(self, 'model'):
            logger.debug('Model had been initialized.')
            return

        self.task = task

        if cfg_path is None or ckpt_path is None or vocab_file is None:
            tag = '-'.join([model_type, task, lang])
            self.task_resource.set_task_model(tag, version=None)
            self.cfg_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['cfg_path'])
            self.ckpt_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['ckpt_path'])
            self.vocab_file = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['vocab_file'])
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.ckpt_path = os.path.abspath(ckpt_path)
            self.vocab_file = os.path.abspath(vocab_file)

        model_name = model_type[:model_type.rindex('_')]
        if self.task == 'punc':
            # punc list
            self._punc_list = []
            with open(self.vocab_file, 'r', encoding='utf-8') as f:
                for line in f:
                    self._punc_list.append(line.strip())

            # model
            model_class, tokenizer_class = self.task_resource.get_model_class(
                model_name)
            self.model = model_class(
                cfg_path=self.cfg_path, ckpt_path=self.ckpt_path)
            self.tokenizer = tokenizer_class.from_pretrained('ernie-1.0')
        else:
            raise NotImplementedError

        self.model.eval()

    #init new models
    def _init_from_path_new(self,
                            task: str='punc',
                            model_type: str='ernie_linear_p7_wudao',
                            lang: str='zh',
                            cfg_path: Optional[os.PathLike]=None,
                            ckpt_path: Optional[os.PathLike]=None,
                            vocab_file: Optional[os.PathLike]=None):
        if hasattr(self, 'model'):
            logger.debug('Model had been initialized.')
            return

        self.task = task

        if cfg_path is None or ckpt_path is None or vocab_file is None:
            tag = '-'.join([model_type, task, lang])
            self.task_resource.set_task_model(tag, version=None)
            self.cfg_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['cfg_path'])
            self.ckpt_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['ckpt_path'])
            self.vocab_file = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['vocab_file'])
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.ckpt_path = os.path.abspath(ckpt_path)
            self.vocab_file = os.path.abspath(vocab_file)

        model_name = model_type[:model_type.rindex('_')]

        if self.task == 'punc':
            # punc list
            self._punc_list = []
            with open(self.vocab_file, 'r', encoding='utf-8') as f:
                for line in f:
                    self._punc_list.append(line.strip())

            # model
            with open(self.cfg_path, 'r', encoding='utf-8') as f:
                config = CfgNode(yaml.safe_load(f))
            self.model = ErnieLinear(**config["model"])

            _, tokenizer_class = self.task_resource.get_model_class(model_name)
            state_dict = paddle.load(self.ckpt_path)
            self.model.set_state_dict(state_dict["main_params"])
            self.model.eval()

            #tokenizer: fast version: ernie-3.0-mini-zh   slow version:ernie-1.0
            if 'fast' not in model_type:
                self.tokenizer = tokenizer_class.from_pretrained('ernie-1.0')
            else:
                self.tokenizer = tokenizer_class.from_pretrained(
                    'ernie-3.0-mini-zh')

        else:
            raise NotImplementedError

    def _clean_text(self, text):
        text = text.lower()
        text = re.sub('[^A-Za-z0-9\u4e00-\u9fa5]', '', text)
        text = re.sub(f'[{"".join([p for p in self._punc_list][1:])}]', '',
                      text)
        return text

    def preprocess(self, text: Union[str, os.PathLike]):
        """
            Input preprocess and return paddle.Tensor stored in self.input.
            Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet).
        """
        if self.task == 'punc':
            clean_text = self._clean_text(text)
            assert len(clean_text) > 0, f'Invalid input string: {text}'

            tokenized_input = self.tokenizer(
                list(clean_text), return_length=True, is_split_into_words=True)

            self._inputs['input_ids'] = tokenized_input['input_ids']
            self._inputs['seg_ids'] = tokenized_input['token_type_ids']
            self._inputs['seq_len'] = tokenized_input['seq_len']
        else:
            raise NotImplementedError

    @paddle.no_grad()
    def infer(self):
        """
            Model inference and result stored in self.output.
        """
        if self.task == 'punc':
            input_ids = paddle.to_tensor(self._inputs['input_ids']).unsqueeze(0)
            seg_ids = paddle.to_tensor(self._inputs['seg_ids']).unsqueeze(0)
            logits, _ = self.model(input_ids, seg_ids)
            preds = paddle.argmax(logits, axis=-1).squeeze(0)

            self._outputs['preds'] = preds
        else:
            raise NotImplementedError

    def postprocess(self, isNewTrainer: bool=False) -> Union[str, os.PathLike]:
        """
            Output postprocess and return human-readable results such as texts and audio files.
        """
        if self.task == 'punc':
            input_ids = self._inputs['input_ids']
            seq_len = self._inputs['seq_len']
            preds = self._outputs['preds']

            tokens = self.tokenizer.convert_ids_to_tokens(
                input_ids[1:seq_len - 1])
            labels = preds[1:seq_len - 1].tolist()
            assert len(tokens) == len(labels)
            if isNewTrainer:
                self._punc_list = [0] + self._punc_list
            text = ''
            for t, l in zip(tokens, labels):
                text += t
                if l != 0:  # Non punc.
                    text += self._punc_list[l]
            return text
        else:
            raise NotImplementedError

    def execute(self, argv: List[str]) -> bool:
        """
            Command line entry.
        """
        parser_args = self.parser.parse_args(argv)

        task = parser_args.task
        model_type = parser_args.model
        lang = parser_args.lang
        cfg_path = parser_args.config
        ckpt_path = parser_args.ckpt_path
        punc_vocab = parser_args.punc_vocab
        device = parser_args.device

        if not parser_args.verbose:
            self.disable_task_loggers()

        task_source = self.get_input_source(parser_args.input)
        task_results = OrderedDict()
        has_exceptions = False

        for id_, input_ in task_source.items():
            try:
                res = self(input_, task, model_type, lang, cfg_path, ckpt_path,
                           punc_vocab, device)
                task_results[id_] = res
            except Exception as e:
                has_exceptions = True
                task_results[id_] = f'{e.__class__.__name__}: {e}'

        self.process_task_results(parser_args.input, task_results,
                                  parser_args.job_dump_result)

        if has_exceptions:
            return False
        else:
            return True

    @stats_wrapper
    def __call__(
            self,
            text: str,
            task: str='punc',
            model: str='ernie_linear_p7_wudao',
            lang: str='zh',
            config: Optional[os.PathLike]=None,
            ckpt_path: Optional[os.PathLike]=None,
            punc_vocab: Optional[os.PathLike]=None,
            device: str=paddle.get_device(), ):
        """
            Python API to call an executor.
        """
        #Here is old version models 
        if model in ['ernie_linear_p7_wudao', 'ernie_linear_p3_wudao']:
            paddle.set_device(device)
            self._init_from_path(task, model, lang, config, ckpt_path,
                                 punc_vocab)
            self.preprocess(text)
            self.infer()
            res = self.postprocess()  # Retrieve result of text task.
        #Add new way to infer
        else:
            paddle.set_device(device)
            self._init_from_path_new(task, model, lang, config, ckpt_path,
                                     punc_vocab)
            self.preprocess(text)
            self.infer()
            res = self.postprocess(isNewTrainer=True)
        return res


================================================
FILE: paddlespeech/cli/tts/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .infer import TTSExecutor


================================================
FILE: paddlespeech/cli/tts/infer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import time
from collections import OrderedDict
from typing import Any
from typing import List
from typing import Optional
from typing import Union

import numpy as np
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode

from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.resource import CommonTaskResource
from paddlespeech.t2s.exps.syn_utils import get_am_inference
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_sess
from paddlespeech.t2s.exps.syn_utils import get_voc_inference
from paddlespeech.t2s.exps.syn_utils import run_frontend
from paddlespeech.t2s.utils import str2bool

__all__ = ['TTSExecutor']
ONNX_SUPPORT_SET = {
    'speedyspeech_csmsc',
    'fastspeech2_csmsc',
    'fastspeech2_ljspeech',
    'fastspeech2_aishell3',
    'fastspeech2_vctk',
    'fastspeech2_male',
    'fastspeech2_mix',
    'fastspeech2_canton',
    'pwgan_csmsc',
    'pwgan_ljspeech',
    'pwgan_aishell3',
    'pwgan_vctk',
    'pwgan_male',
    'mb_melgan_csmsc',
    'hifigan_csmsc',
    'hifigan_ljspeech',
    'hifigan_aishell3',
    'hifigan_vctk',
    'hifigan_male',
}


class TTSExecutor(BaseExecutor):
    def __init__(self):
        super().__init__('tts')
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech.tts', add_help=True)
        self.parser.add_argument(
            '--input', type=str, default=None, help='Input text to generate.')
        # acoustic model
        self.parser.add_argument(
            '--am',
            type=str,
            default='fastspeech2_csmsc',
            choices=[
                'speedyspeech_csmsc',
                'fastspeech2_csmsc',
                'fastspeech2_ljspeech',
                'fastspeech2_aishell3',
                'fastspeech2_vctk',
                'fastspeech2_mix',
                'tacotron2_csmsc',
                'tacotron2_ljspeech',
                'fastspeech2_male',
                'fastspeech2_canton',
            ],
            help='Choose acoustic model type of tts task.')
        self.parser.add_argument(
            '--am_config',
            type=str,
            default=None,
            help='Config of acoustic model. Use default config when it is None.')
        self.parser.add_argument(
            '--am_ckpt',
            type=str,
            default=None,
            help='Checkpoint file of acoustic model.')
        self.parser.add_argument(
            "--am_stat",
            type=str,
            default=None,
            help="mean and standard deviation used to normalize spectrogram when training acoustic model."
        )
        self.parser.add_argument(
            "--phones_dict",
            type=str,
            default=None,
            help="phone vocabulary file.")
        self.parser.add_argument(
            "--tones_dict",
            type=str,
            default=None,
            help="tone vocabulary file.")
        self.parser.add_argument(
            "--speaker_dict",
            type=str,
            default=None,
            help="speaker id map file.")
        self.parser.add_argument(
            '--spk_id',
            type=int,
            default=0,
            help='spk id for multi speaker acoustic model')
        # vocoder
        self.parser.add_argument(
            '--voc',
            type=str,
            default='hifigan_csmsc',
            choices=[
                'pwgan_csmsc',
                'pwgan_ljspeech',
                'pwgan_aishell3',
                'pwgan_vctk',
                'mb_melgan_csmsc',
                'style_melgan_csmsc',
                'hifigan_csmsc',
                'hifigan_ljspeech',
                'hifigan_aishell3',
                'hifigan_vctk',
                'wavernn_csmsc',
                'pwgan_male',
                'hifigan_male',
            ],
            help='Choose vocoder type of tts task.')

        self.parser.add_argument(
            '--voc_config',
            type=str,
            default=None,
            help='Config of voc. Use default config when it is None.')
        self.parser.add_argument(
            '--voc_ckpt',
            type=str,
            default=None,
            help='Checkpoint file of voc.')
        self.parser.add_argument(
            "--voc_stat",
            type=str,
            default=None,
            help="mean and standard deviation used to normalize spectrogram when training voc."
        )
        # other
        self.parser.add_argument(
            '--lang',
            type=str,
            default='zh',
            help='Choose model language. zh or en or mix')
        self.parser.add_argument(
            '--device',
            type=str,
            default=paddle.get_device(),
            help='Choose device to execute model inference.')

        self.parser.add_argument('--cpu_threads', type=int, default=2)

        self.parser.add_argument(
            '--output', type=str, default='output.wav', help='output file name')
        self.parser.add_argument(
            '-d',
            '--job_dump_result',
            action='store_true',
            help='Save job result into file.')
        self.parser.add_argument(
            '-v',
            '--verbose',
            action='store_true',
            help='Increase logger verbosity of current task.')
        self.parser.add_argument(
            "--use_onnx",
            type=str2bool,
            default=False,
            help="whether to usen onnxruntime inference.")
        self.parser.add_argument(
            '--fs',
            type=int,
            default=24000,
            help='sample rate for onnx models when use specified model files.')

    def _init_from_path(
            self,
            am: str='fastspeech2_csmsc',
            am_config: Optional[os.PathLike]=None,
            am_ckpt: Optional[os.PathLike]=None,
            am_stat: Optional[os.PathLike]=None,
            phones_dict: Optional[os.PathLike]=None,
            tones_dict: Optional[os.PathLike]=None,
            speaker_dict: Optional[os.PathLike]=None,
            voc: str='hifigan_csmsc',
            voc_config: Optional[os.PathLike]=None,
            voc_ckpt: Optional[os.PathLike]=None,
            voc_stat: Optional[os.PathLike]=None,
            lang: str='zh', ):
        """
        Init model and other resources from a specific path.
        """
        if hasattr(self, 'am_inference') and hasattr(self, 'voc_inference'):
            logger.debug('Models had been initialized.')
            return

        # am
        if am_ckpt is None or am_config is None or am_stat is None or phones_dict is None:
            use_pretrained_am = True
        else:
            use_pretrained_am = False

        am_tag = am + '-' + lang
        self.task_resource.set_task_model(
            model_tag=am_tag,
            model_type=0,  # am
            skip_download=not use_pretrained_am,
            version=None,  # default version
        )
        if use_pretrained_am:
            self.am_res_path = self.task_resource.res_dir
            self.am_config = os.path.join(self.am_res_path,
                                          self.task_resource.res_dict['config'])
            self.am_ckpt = os.path.join(self.am_res_path,
                                        self.task_resource.res_dict['ckpt'])
            self.am_stat = os.path.join(
                self.am_res_path, self.task_resource.res_dict['speech_stats'])
            # must have phones_dict in acoustic
            self.phones_dict = os.path.join(
                self.am_res_path, self.task_resource.res_dict['phones_dict'])
            logger.debug(self.am_res_path)
            logger.debug(self.am_config)
            logger.debug(self.am_ckpt)
        else:
            self.am_config = os.path.abspath(am_config)
            self.am_ckpt = os.path.abspath(am_ckpt)
            self.am_stat = os.path.abspath(am_stat)
            self.phones_dict = os.path.abspath(phones_dict)
            self.am_res_path = os.path.dirname(self.am_config)

        # for speedyspeech
        self.tones_dict = None
        if 'tones_dict' in self.task_resource.res_dict:
            self.tones_dict = os.path.join(
                self.am_res_path, self.task_resource.res_dict['tones_dict'])
            if tones_dict:
                self.tones_dict = tones_dict

        # for multi speaker fastspeech2
        self.speaker_dict = None
        if 'speaker_dict' in self.task_resource.res_dict:
            self.speaker_dict = os.path.join(
                self.am_res_path, self.task_resource.res_dict['speaker_dict'])
            if speaker_dict:
                self.speaker_dict = speaker_dict

        # voc
        if voc_ckpt is None or voc_config is None or voc_stat is None:
            use_pretrained_voc = True
        else:
            use_pretrained_voc = False
        voc_lang = lang
        # When speaker is 174 (csmsc), use csmsc's vocoder is better than aishell3's
        if lang == 'mix' or lang == 'canton':
            voc_dataset = voc[voc.rindex('_') + 1:]
            if voc_dataset in {"ljspeech", "vctk"}:
                voc_lang = 'en'
            else:
                voc_lang = 'zh'
        voc_tag = voc + '-' + voc_lang
        self.task_resource.set_task_model(
            model_tag=voc_tag,
            model_type=1,  # vocoder
            skip_download=not use_pretrained_voc,
            version=None,  # default version
        )
        if use_pretrained_voc:
            self.voc_res_path = self.task_resource.voc_res_dir
            self.voc_config = os.path.join(
                self.voc_res_path, self.task_resource.voc_res_dict['config'])
            self.voc_ckpt = os.path.join(
                self.voc_res_path, self.task_resource.voc_res_dict['ckpt'])
            self.voc_stat = os.path.join(
                self.voc_res_path,
                self.task_resource.voc_res_dict['speech_stats'])
            logger.debug(self.voc_res_path)
            logger.debug(self.voc_config)
            logger.debug(self.voc_ckpt)
        else:
            self.voc_config = os.path.abspath(voc_config)
            self.voc_ckpt = os.path.abspath(voc_ckpt)
            self.voc_stat = os.path.abspath(voc_stat)
            self.voc_res_path = os.path.dirname(
                os.path.abspath(self.voc_config))

        # Init body.
        with open(self.am_config) as f:
            self.am_config = CfgNode(yaml.safe_load(f))
        with open(self.voc_config) as f:
            self.voc_config = CfgNode(yaml.safe_load(f))

        with open(self.phones_dict, 'rt', encoding='utf-8') as f:
            phn_id = [line.strip().split() for line in f.readlines()]
        vocab_size = len(phn_id)

        tone_size = None
        if self.tones_dict:
            with open(self.tones_dict, 'rt', encoding='utf-8') as f:
                tone_id = [line.strip().split() for line in f.readlines()]
            tone_size = len(tone_id)

        spk_num = None
        if self.speaker_dict:
            with open(self.speaker_dict, 'rt', encoding='utf-8') as f:
                spk_id = [line.strip().split() for line in f.readlines()]
            spk_num = len(spk_id)

        # frontend
        self.frontend = get_frontend(
            lang=lang, phones_dict=self.phones_dict, tones_dict=self.tones_dict)

        # acoustic model
        self.am_inference = get_am_inference(
            am=am,
            am_config=self.am_config,
            am_ckpt=self.am_ckpt,
            am_stat=self.am_stat,
            phones_dict=self.phones_dict,
            tones_dict=self.tones_dict,
            speaker_dict=self.speaker_dict)

        # vocoder
        self.voc_inference = get_voc_inference(
            voc=voc,
            voc_config=self.voc_config,
            voc_ckpt=self.voc_ckpt,
            voc_stat=self.voc_stat)

    def _init_from_path_onnx(self,
                             am: str='fastspeech2_csmsc',
                             am_ckpt: Optional[os.PathLike]=None,
                             phones_dict: Optional[os.PathLike]=None,
                             tones_dict: Optional[os.PathLike]=None,
                             speaker_dict: Optional[os.PathLike]=None,
                             voc: str='hifigan_csmsc',
                             voc_ckpt: Optional[os.PathLike]=None,
                             lang: str='zh',
                             device: str='cpu',
                             cpu_threads: int=2,
                             fs: int=24000):
        if hasattr(self, 'am_sess') and hasattr(self, 'voc_sess'):
            logger.debug('Models had been initialized.')
            return

        # am
        if am_ckpt is None or phones_dict is None:
            use_pretrained_am = True
        else:
            use_pretrained_am = False

        am_tag = am + '_onnx' + '-' + lang
        self.task_resource.set_task_model(
            model_tag=am_tag,
            model_type=0,  # am
            skip_download=not use_pretrained_am,
            version=None,  # default version
        )
        if use_pretrained_am:
            self.am_res_path = self.task_resource.res_dir
            self.am_ckpt = os.path.join(self.am_res_path,
                                        self.task_resource.res_dict['ckpt'])
            # must have phones_dict in acoustic
            self.phones_dict = os.path.join(
                self.am_res_path, self.task_resource.res_dict['phones_dict'])
            self.am_fs = self.task_resource.res_dict['sample_rate']
            logger.debug(self.am_res_path)
            logger.debug(self.am_ckpt)
        else:
            self.am_ckpt = os.path.abspath(am_ckpt)
            self.phones_dict = os.path.abspath(phones_dict)
            self.am_res_path = os.path.dirname(self.am_ckpt)
            self.am_fs = fs

        # for speedyspeech
        self.tones_dict = None
        if 'tones_dict' in self.task_resource.res_dict:
            self.tones_dict = os.path.join(
                self.am_res_path, self.task_resource.res_dict['tones_dict'])
            if tones_dict:
                self.tones_dict = tones_dict

        # voc
        if voc_ckpt is None:
            use_pretrained_voc = True
        else:
            use_pretrained_voc = False
        voc_lang = lang
        if lang == 'mix' or lang == 'canton':
            voc_dataset = voc[voc.rindex('_') + 1:]
            if voc_dataset in {"ljspeech", "vctk"}:
                voc_lang = 'en'
            else:
                voc_lang = 'zh'
        voc_tag = voc + '_onnx' + '-' + voc_lang
        self.task_resource.set_task_model(
            model_tag=voc_tag,
            model_type=1,  # vocoder
            skip_download=not use_pretrained_voc,
            version=None,  # default version
        )
        if use_pretrained_voc:
            self.voc_res_path = self.task_resource.voc_res_dir
            self.voc_ckpt = os.path.join(
                self.voc_res_path, self.task_resource.voc_res_dict['ckpt'])
            logger.debug(self.voc_res_path)
            logger.debug(self.voc_ckpt)
        else:
            self.voc_ckpt = os.path.abspath(voc_ckpt)
            self.voc_res_path = os.path.dirname(os.path.abspath(self.voc_ckpt))

        # frontend
        self.frontend = get_frontend(
            lang=lang, phones_dict=self.phones_dict, tones_dict=self.tones_dict)
        self.am_sess = get_sess(
            model_path=self.am_ckpt, device=device, cpu_threads=cpu_threads)

        # vocoder
        self.voc_sess = get_sess(
            model_path=self.voc_ckpt, device=device, cpu_threads=cpu_threads)

    def preprocess(self, input: Any, *args, **kwargs):
        """
        Input preprocess and return paddle.Tensor stored in self._inputs.
        Input content can be a text(tts), a file(asr, cls), a stream(not supported yet) or anything needed.

        Args:
            input (Any): Input text/file/stream or other content.
        """
        pass

    @paddle.no_grad()
    def infer(self,
              text: str,
              lang: str='zh',
              am: str='fastspeech2_csmsc',
              spk_id: int=0):
        """
        Model inference and result stored in self.output.
        """
        am_name = am[:am.rindex('_')]
        am_dataset = am[am.rindex('_') + 1:]
        merge_sentences = False
        get_tone_ids = False
        if am_name == 'speedyspeech':
            get_tone_ids = True
        frontend_st = time.time()
        frontend_dict = run_frontend(
            frontend=self.frontend,
            text=text,
            merge_sentences=merge_sentences,
            get_tone_ids=get_tone_ids,
            lang=lang)
        self.frontend_time = time.time() - frontend_st
        self.am_time = 0
        self.voc_time = 0
        flags = 0
        phone_ids = frontend_dict['phone_ids']
        for i in range(len(phone_ids)):
            am_st = time.time()
            part_phone_ids = phone_ids[i]
            # am
            if am_name == 'speedyspeech':
                part_tone_ids = frontend_dict['tone_ids'][i]
                mel = self.am_inference(part_phone_ids, part_tone_ids)
            # fastspeech2
            else:
                # multi speaker
                if am_dataset in {'aishell3', 'vctk', 'mix', 'canton'}:
                    mel = self.am_inference(
                        part_phone_ids, spk_id=paddle.to_tensor([spk_id]))
                else:
                    mel = self.am_inference(part_phone_ids)
            self.am_time += (time.time() - am_st)
            # voc
            voc_st = time.time()
            wav = self.voc_inference(mel)
            if flags == 0:
                wav_all = wav
                flags = 1
            else:
                wav_all = paddle.concat([wav_all, wav])
            self.voc_time += (time.time() - voc_st)
        self._outputs['wav'] = wav_all

    def infer_onnx(self,
                   text: str,
                   lang: str='zh',
                   am: str='fastspeech2_csmsc',
                   spk_id: int=0):
        am_name = am[:am.rindex('_')]
        am_dataset = am[am.rindex('_') + 1:]
        merge_sentences = False
        get_tone_ids = False
        if am_name == 'speedyspeech':
            get_tone_ids = True
        am_input_feed = {}
        frontend_st = time.time()
        frontend_dict = run_frontend(
            frontend=self.frontend,
            text=text,
            merge_sentences=merge_sentences,
            get_tone_ids=get_tone_ids,
            lang=lang,
            to_tensor=False, )
        self.frontend_time = time.time() - frontend_st
        phone_ids = frontend_dict['phone_ids']
        self.am_time = 0
        self.voc_time = 0
        flags = 0
        for i in range(len(phone_ids)):
            am_st = time.time()
            part_phone_ids = phone_ids[i]
            if am_name == 'fastspeech2':
                am_input_feed.update({'text': part_phone_ids})
                if am_dataset in {"aishell3", "vctk", "mix", "canton"}:
                    # NOTE: 'spk_id' should be List[int] rather than int here!!
                    am_input_feed.update({'spk_id': [spk_id]})
            elif am_name == 'speedyspeech':
                part_tone_ids = frontend_dict['tone_ids'][i]
                am_input_feed.update({
                    'phones': part_phone_ids,
                    'tones': part_tone_ids
                })
            mel = self.am_sess.run(output_names=None, input_feed=am_input_feed)
            mel = mel[0]
            self.am_time += (time.time() - am_st)
            # voc
            voc_st = time.time()
            wav = self.voc_sess.run(
                output_names=None, input_feed={'logmel': mel})
            wav = wav[0]
            if flags == 0:
                wav_all = wav
                flags = 1
            else:
                wav_all = np.concatenate([wav_all, wav])
            self.voc_time += (time.time() - voc_st)

        self._outputs['wav'] = wav_all

    def postprocess(self, output: str='output.wav') -> Union[str, os.PathLike]:
        """
        Output postprocess and return results.
        This method get model output from self._outputs and convert it into human-readable results.

        Returns:
            Union[str, os.PathLike]: Human-readable results such as texts and audio files.
        """
        output = os.path.abspath(os.path.expanduser(output))
        sf.write(
            output, self._outputs['wav'].numpy(), samplerate=self.am_config.fs)
        return output

    def postprocess_onnx(self,
                         output: str='output.wav') -> Union[str, os.PathLike]:
        """
        Output postprocess and return results.
        This method get model output from self._outputs and convert it into human-readable results.

        Returns:
            Union[str, os.PathLike]: Human-readable results such as texts and audio files.
        """
        output = os.path.abspath(os.path.expanduser(output))
        sf.write(output, self._outputs['wav'], samplerate=self.am_fs)
        return output

    # 命令行的入口是这里
    def execute(self, argv: List[str]) -> bool:
        """
        Command line entry.
        """

        args = self.parser.parse_args(argv)

        am = args.am
        am_config = args.am_config
        am_ckpt = args.am_ckpt
        am_stat = args.am_stat
        phones_dict = args.phones_dict
        tones_dict = args.tones_dict
        speaker_dict = args.speaker_dict
        voc = args.voc
        voc_config = args.voc_config
        voc_ckpt = args.voc_ckpt
        voc_stat = args.voc_stat
        lang = args.lang
        device = args.device
        spk_id = args.spk_id
        use_onnx = args.use_onnx
        cpu_threads = args.cpu_threads
        fs = args.fs

        if not args.verbose:
            self.disable_task_loggers()

        task_source = self.get_input_source(args.input)
        task_results = OrderedDict()
        has_exceptions = False

        for id_, input_ in task_source.items():
            if len(task_source) > 1:
                assert isinstance(args.output,
                                  str) and args.output.endswith('.wav')
                output = args.output.replace('.wav', f'_{id_}.wav')
            else:
                output = args.output

            try:
                res = self(
                    text=input_,
                    # acoustic model related
                    am=am,
                    am_config=am_config,
                    am_ckpt=am_ckpt,
                    am_stat=am_stat,
                    phones_dict=phones_dict,
                    tones_dict=tones_dict,
                    speaker_dict=speaker_dict,
                    spk_id=spk_id,
                    # vocoder related
                    voc=voc,
                    voc_config=voc_config,
                    voc_ckpt=voc_ckpt,
                    voc_stat=voc_stat,
                    # other
                    lang=lang,
                    device=device,
                    output=output,
                    use_onnx=use_onnx,
                    cpu_threads=cpu_threads,
                    fs=fs)
                task_results[id_] = res
            except Exception as e:
                has_exceptions = True
                task_results[id_] = f'{e.__class__.__name__}: {e}'

        self.process_task_results(args.input, task_results,
                                  args.job_dump_result)

        if has_exceptions:
            return False
        else:
            return True

    # pyton api 的入口是这里
    @stats_wrapper
    def __call__(self,
                 text: str,
                 am: str='fastspeech2_csmsc',
                 am_config: Optional[os.PathLike]=None,
                 am_ckpt: Optional[os.PathLike]=None,
                 am_stat: Optional[os.PathLike]=None,
                 spk_id: int=0,
                 phones_dict: Optional[os.PathLike]=None,
                 tones_dict: Optional[os.PathLike]=None,
                 speaker_dict: Optional[os.PathLike]=None,
                 voc: str='hifigan_csmsc',
                 voc_config: Optional[os.PathLike]=None,
                 voc_ckpt: Optional[os.PathLike]=None,
                 voc_stat: Optional[os.PathLike]=None,
                 lang: str='zh',
                 device: str=paddle.get_device(),
                 output: str='output.wav',
                 use_onnx: bool=False,
                 cpu_threads: int=2,
                 fs: int=24000):
        """
        Python API to call an executor.
        """
        if not use_onnx:
            paddle.set_device(device)
            self._init_from_path(
                am=am,
                am_config=am_config,
                am_ckpt=am_ckpt,
                am_stat=am_stat,
                phones_dict=phones_dict,
                tones_dict=tones_dict,
                speaker_dict=speaker_dict,
                voc=voc,
                voc_config=voc_config,
                voc_ckpt=voc_ckpt,
                voc_stat=voc_stat,
                lang=lang)

            self.infer(text=text, lang=lang, am=am, spk_id=spk_id)
            res = self.postprocess(output=output)
            return res
        else:
            # use onnx
            # we use `cpu` for onnxruntime by default
            # please see description in https://github.com/PaddlePaddle/PaddleSpeech/pull/2220
            self.task_resource = CommonTaskResource(
                task='tts', model_format='onnx')
            assert (
                am in ONNX_SUPPORT_SET and voc in ONNX_SUPPORT_SET
            ), f'the am and voc you choose, they should be in {ONNX_SUPPORT_SET}'
            self._init_from_path_onnx(
                am=am,
                am_ckpt=am_ckpt,
                phones_dict=phones_dict,
                tones_dict=tones_dict,
                speaker_dict=speaker_dict,
                voc=voc,
                voc_ckpt=voc_ckpt,
                lang=lang,
                device=device,
                cpu_threads=cpu_threads,
                fs=fs)
            self.infer_onnx(text=text, lang=lang, am=am, spk_id=spk_id)
            res = self.postprocess_onnx(output=output)
            return res


================================================
FILE: paddlespeech/cli/utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import inspect
import json
import os
import tarfile
import threading
import time
import uuid
import zipfile
from typing import Any
from typing import Dict

import paddle
import requests
import soundfile as sf
import yaml
from paddle.framework import load

from . import download
from ..utils.env import CONF_HOME
from .entry import commands
try:
    from .. import __version__
except ImportError:
    __version__ = "0.0.0"  # for develop branch

requests.adapters.DEFAULT_RETRIES = 3

__all__ = [
    'timer_register',
    'cli_register',
    'explicit_command_register',
    'get_command',
    'download_and_decompress',
    'load_state_dict_from_url',
    'stats_wrapper',
]

CLI_TIMER = {}


def timer_register(command):
    CLI_TIMER[command.__name__] = {'start': [], 'end': [], 'extra': []}
    return command


def cli_register(name: str, description: str='') -> Any:
    def _warpper(command):
        items = name.split('.')

        com = commands
        for item in items:
            com = com[item]
        com['_entry'] = command
        if description:
            com['_description'] = description
        return command

    return _warpper


def explicit_command_register(name: str, description: str='', cls: str=''):
    items = name.split('.')
    com = commands
    for item in items:
        com = com[item]
    com['_entry'] = cls
    if description:
        com['_description'] = description


def get_command(name: str) -> Any:
    items = name.split('.')
    com = commands
    for item in items:
        com = com[item]

    return com['_entry']


def _get_uncompress_path(filepath: os.PathLike) -> os.PathLike:
    file_dir = os.path.dirname(filepath)
    is_zip_file = False
    if tarfile.is_tarfile(filepath):
        files = tarfile.open(filepath, "r:*")
        file_list = files.getnames()
    elif zipfile.is_zipfile(filepath):
        files = zipfile.ZipFile(filepath, 'r')
        file_list = files.namelist()
        is_zip_file = True
    else:
        return file_dir

    if download._is_a_single_file(file_list):
        rootpath = file_list[0]
        uncompressed_path = os.path.join(file_dir, rootpath)
    elif download._is_a_single_dir(file_list):
        if is_zip_file:
            rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[0]
        else:
            rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[-1]
        uncompressed_path = os.path.join(file_dir, rootpath)
    else:
        rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1]
        uncompressed_path = os.path.join(file_dir, rootpath)

    files.close()
    return uncompressed_path


def download_and_decompress(archive: Dict[str, str], path: str) -> os.PathLike:
    """
    Download archieves and decompress to specific path.
    """
    if not os.path.isdir(path):
        os.makedirs(path)

    assert 'url' in archive and 'md5' in archive, \
        'Dictionary keys of "url" and "md5" are required in the archive, but got: {}'.format(list(archive.keys()))

    filepath = os.path.join(path, os.path.basename(archive['url']))
    if os.path.isfile(filepath) and download._md5check(filepath,
                                                       archive['md5']):
        uncompress_path = _get_uncompress_path(filepath)
        if not os.path.isdir(uncompress_path):
            download._decompress(filepath)
    else:
        StatsWorker(
            task='download',
            version=__version__,
            extra_info={
                'download_url': archive['url'],
                'paddle_version': paddle.__version__
            }).start()
        uncompress_path = download.get_path_from_url(archive['url'], path,
                                                     archive['md5'])

    return uncompress_path


def load_state_dict_from_url(url: str, path: str, md5: str=None) -> os.PathLike:
    """
    Download and load a state dict from url
    """
    if not os.path.isdir(path):
        os.makedirs(path)

    download.get_path_from_url(url, path, md5)
    return load(os.path.join(path, os.path.basename(url)))


def _md5(text: str):
    '''Calculate the md5 value of the input text.'''
    md5code = hashlib.md5(text.encode())
    return md5code.hexdigest()


class ConfigCache:
    def __init__(self):
        self._data = {}
        self._initialize()
        self.file = os.path.join(CONF_HOME, 'cache.yaml')
        if not os.path.exists(self.file):
            self.flush()
            return

        with open(self.file, 'r') as file:
            try:
                cfg = yaml.load(file, Loader=yaml.FullLoader)
                self._data.update(cfg)
            except Exception as e:
                self.flush()

    @property
    def cache_info(self):
        return self._data['cache_info']

    def _initialize(self):
        # Set default configuration values.
        cache_info = _md5(str(uuid.uuid1())[-12:]) + "-" + str(int(time.time()))
        self._data['cache_info'] = cache_info

    def flush(self):
        '''Flush the current configuration into the configuration file.'''
        with open(self.file, 'w') as file:
            cfg = json.loads(json.dumps(self._data))
            yaml.dump(cfg, file)


stats_api = "http://paddlepaddle.org.cn/paddlehub/stat"
cache_info = ConfigCache().cache_info


class StatsWorker(threading.Thread):
    def __init__(self,
                 task="asr",
                 model=None,
                 version=__version__,
                 extra_info={}):
        threading.Thread.__init__(self)
        self._task = task
        self._model = model
        self._version = version
        self._extra_info = extra_info

    def run(self):
        params = {
            'task': self._task,
            'version': self._version,
            'from': 'ppspeech'
        }
        if self._model:
            params['model'] = self._model

        self._extra_info.update({
            'cache_info': cache_info,
        })
        params.update({"extra": json.dumps(self._extra_info)})

        try:
            requests.get(stats_api, params)
        except Exception:
            pass

        return


def _note_one_stat(cls_name, params={}):
    task = cls_name.replace('Executor', '').lower()  # XXExecutor
    extra_info = {
        'paddle_version': paddle.__version__,
    }

    if 'model' in params:
        model = params['model']
    else:
        model = None

    if 'audio_file' in params:
        try:
            # recursive import cased by: utils.DATA_HOME
            _, sr = sf.read(params['audio_file'])
        except Exception:
            sr = -1

    if task == 'asr':
        extra_info.update({
            'lang': params['lang'],
            'inp_sr': sr,
            'model_sr': params['sample_rate'],
        })
    elif task == 'st':
        extra_info.update({
            'lang':
            params['src_lang'] + '-' + params['tgt_lang'],
            'inp_sr':
            sr,
            'model_sr':
            params['sample_rate'],
        })
    elif task == 'tts':
        model = params['am']
        extra_info.update({
            'lang': params['lang'],
            'vocoder': params['voc'],
        })
    elif task == 'cls':
        extra_info.update({
            'inp_sr': sr,
        })
    elif task == 'text':
        extra_info.update({
            'sub_task': params['task'],
            'lang': params['lang'],
        })
    else:
        return

    StatsWorker(
        task=task,
        model=model,
        version=__version__,
        extra_info=extra_info, ).start()


def _parse_args(func, *args, **kwargs):
    # FullArgSpec(args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations)
    argspec = inspect.getfullargspec(func)

    keys = argspec[0]
    if keys[0] == 'self':  # Remove self pointer.
        keys = keys[1:]

    default_values = argspec[3]
    values = [None] * (len(keys) - len(default_values))
    values.extend(list(default_values))
    params = dict(zip(keys, values))

    for idx, v in enumerate(args):
        params[keys[idx]] = v
    for k, v in kwargs.items():
        params[k] = v

    return params


def stats_wrapper(executor_func):
    def _warpper(self, *args, **kwargs):
        try:
            _note_one_stat(
                type(self).__name__, _parse_args(executor_func, *args,
                                                 **kwargs))
        except Exception:
            pass
        return executor_func(self, *args, **kwargs)

    return _warpper


================================================
FILE: paddlespeech/cli/vector/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .infer import VectorExecutor


================================================
FILE: paddlespeech/cli/vector/infer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import sys
from collections import OrderedDict
from typing import Dict
from typing import List
from typing import Optional
from typing import Union

import paddle
import soundfile
from yacs.config import CfgNode

from ..executor import BaseExecutor
from ..log import logger
from ..utils import stats_wrapper
from paddlespeech.audio.backends import soundfile_load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.vector.io.batch import feature_normalize
from paddlespeech.vector.modules.sid_model import SpeakerIdetification


class VectorExecutor(BaseExecutor):
    def __init__(self):
        super().__init__('vector')
        self.parser = argparse.ArgumentParser(
            prog="paddlespeech.vector", add_help=True)

        self.parser.add_argument(
            "--model",
            type=str,
            default="ecapatdnn_voxceleb12",
            choices=[
                tag[:tag.index('-')]
                for tag in self.task_resource.pretrained_models.keys()
            ],
            help="Choose model type of vector task.")
        self.parser.add_argument(
            "--task",
            type=str,
            default="spk",
            choices=["spk", "score"],
            help="task type in vector domain")
        self.parser.add_argument(
            "--input",
            type=str,
            default=None,
            help="Audio file to extract embedding.")
        self.parser.add_argument(
            "--sample_rate",
            type=int,
            default=16000,
            choices=[16000],
            help="Choose the audio sample rate of the model. 8000 or 16000")
        self.parser.add_argument(
            "--ckpt_path",
            type=str,
            default=None,
            help="Checkpoint file of model.")
        self.parser.add_argument(
            '--yes',
            '-y',
            action="store_true",
            default=False,
            help='No additional parameters required. \
            Once set this parameter, it means accepting the request of the program by default, \
            which includes transforming the audio sample rate')
        self.parser.add_argument(
            '--config',
            type=str,
            default=None,
            help='Config of asr task. Use default config when it is None.')
        self.parser.add_argument(
            "--device",
            type=str,
            default=paddle.get_device(),
            help="Choose device to execute model inference.")
        self.parser.add_argument(
            '-d',
            '--job_dump_result',
            action='store_true',
            help='Save job result into file.')

        self.parser.add_argument(
            '-v',
            '--verbose',
            action='store_true',
            help='Increase logger verbosity of current task.')

    def execute(self, argv: List[str]) -> bool:
        """Command line entry for vector model

        Args:
            argv (List[str]): command line args list

        Returns:
            bool: 
                False: some audio occurs error
                True: all audio process success
        """
        # stage 0: parse the args and get the required args
        parser_args = self.parser.parse_args(argv)
        model = parser_args.model
        sample_rate = parser_args.sample_rate
        config = parser_args.config
        ckpt_path = parser_args.ckpt_path
        force_yes = parser_args.yes
        device = parser_args.device

        # stage 1: configurate the verbose flag
        if not parser_args.verbose:
            self.disable_task_loggers()

        # stage 2: read the input data and store them as a list
        task_source = self.get_input_source(parser_args.input)
        logger.debug(f"task source: {task_source}")

        # stage 3: process the audio one by one
        # we do action according the task type
        task_result = OrderedDict()
        has_exceptions = False
        for id_, input_ in task_source.items():
            try:
                # extract the speaker audio embedding
                if parser_args.task == "spk":
                    logger.debug("do vector spk task")
                    res = self(
                        audio_file=input_,
                        model=model,
                        sample_rate=sample_rate,
                        config=config,
                        ckpt_path=ckpt_path,
                        force_yes=force_yes,
                        device=device)
                    task_result[id_] = res
                elif parser_args.task == "score":
                    logger.debug("do vector score task")
                    logger.debug(f"input content {input_}")
                    if len(input_.split()) != 2:
                        logger.error(
                            f"vector score task input {input_} wav num is not two,"
                            "that is {len(input_.split())}")
                        sys.exit(-1)

                    # get the enroll and test embedding
                    enroll_audio, test_audio = input_.split()
                    logger.debug(
                        f"score task, enroll audio: {enroll_audio}, test audio: {test_audio}"
                    )
                    enroll_embedding = self(
                        audio_file=enroll_audio,
                        model=model,
                        sample_rate=sample_rate,
                        config=config,
                        ckpt_path=ckpt_path,
                        force_yes=force_yes,
                        device=device)
                    test_embedding = self(
                        audio_file=test_audio,
                        model=model,
                        sample_rate=sample_rate,
                        config=config,
                        ckpt_path=ckpt_path,
                        force_yes=force_yes,
                        device=device)

                    # get the score
                    res = self.get_embeddings_score(enroll_embedding,
                                                    test_embedding)
                    task_result[id_] = res
            except Exception as e:
                has_exceptions = True
                task_result[id_] = f'{e.__class__.__name__}: {e}'

        logger.debug("task result as follows: ")
        logger.debug(f"{task_result}")

        # stage 4: process the all the task results
        self.process_task_results(parser_args.input, task_result,
                                  parser_args.job_dump_result)

        # stage 5: return the exception flag
        #          if return False, somen audio process occurs error
        if has_exceptions:
            return False
        else:
            return True

    def _get_job_contents(
            self, job_input: os.PathLike) -> Dict[str, Union[str, os.PathLike]]:
        """
        Read a job input file and return its contents in a dictionary.
        Refactor from the Executor._get_job_contents

        Args:
            job_input (os.PathLike): The job input file.

        Returns:
            Dict[str, str]: Contents of job input.
        """
        job_contents = OrderedDict()
        with open(job_input) as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                k = line.split(' ')[0]
                v = ' '.join(line.split(' ')[1:])
                job_contents[k] = v
        return job_contents

    def get_embeddings_score(self, enroll_embedding, test_embedding):
        """get the enroll embedding and test embedding score

        Args:
            enroll_embedding (numpy.array): shape: (emb_size), enroll audio embedding
            test_embedding (numpy.array): shape: (emb_size), test audio embedding

        Returns:
            score: the score between enroll embedding and test embedding
        """
        if not hasattr(self, "score_func"):
            self.score_func = paddle.nn.CosineSimilarity(axis=0)
            logger.debug("create the cosine score function ")

        score = self.score_func(
            paddle.to_tensor(enroll_embedding),
            paddle.to_tensor(test_embedding))

        return score.item()

    @stats_wrapper
    def __call__(self,
                 audio_file: os.PathLike,
                 model: str='ecapatdnn_voxceleb12',
                 sample_rate: int=16000,
                 config: os.PathLike=None,
                 ckpt_path: os.PathLike=None,
                 force_yes: bool=False,
                 device=paddle.get_device()):
        """Extract the audio embedding

        Args:
            audio_file (os.PathLike): audio path, 
                                      whose format must be wav and sample rate must be matched the model
            model (str, optional): mode type, which is been loaded from the pretrained model list. 
                                   Defaults to 'ecapatdnn-voxceleb12'.
            sample_rate (int, optional): model sample rate. Defaults to 16000.
            config (os.PathLike, optional): yaml config. Defaults to None.
            ckpt_path (os.PathLike, optional): pretrained model path. Defaults to None.
            device (optional): paddle running host device. Defaults to paddle.get_device().

        Returns:
            dict: return the audio embedding and the embedding shape
        """
        # stage 0: check the audio format
        audio_file = os.path.abspath(audio_file)
        if not self._check(audio_file, sample_rate, force_yes):
            sys.exit(-1)

        # stage 1: set the paddle runtime host device
        logger.debug(f"device type: {device}")
        paddle.device.set_device(device)

        # stage 2: read the specific pretrained model
        self._init_from_path(model, sample_rate, config, ckpt_path)

        # stage 3: preprocess the audio and get the audio feat
        self.preprocess(model, audio_file)

        # stage 4: infer the model and get the audio embedding
        self.infer(model)

        # stage 5: process the result and set them to output dict
        res = self.postprocess()

        return res

    def _init_from_path(self,
                        model_type: str='ecapatdnn_voxceleb12',
                        sample_rate: int=16000,
                        cfg_path: Optional[os.PathLike]=None,
                        ckpt_path: Optional[os.PathLike]=None,
                        task=None):
        """Init the neural network from the model path

        Args:
            model_type (str, optional): model tag in the pretrained model list. 
                                        Defaults to 'ecapatdnn_voxceleb12'.
            sample_rate (int, optional): model sample rate. 
                                         Defaults to 16000.
            cfg_path (Optional[os.PathLike], optional): yaml config file path. 
                                                        Defaults to None.
            ckpt_path (Optional[os.PathLike], optional): the pretrained model path, which is stored in the disk. 
                                                         Defaults to None.
            task (str, optional): the model task type
        """
        # stage 0: avoid to init the mode again
        self.task = task
        if hasattr(self, "model"):
            logger.debug("Model has been initialized")
            return

        # stage 1: get the model and config path
        #          if we want init the network from the model stored in the disk,
        #          we must pass the config path and the ckpt model path
        if cfg_path is None or ckpt_path is None:
            # get the mode from pretrained list
            sample_rate_str = "16k" if sample_rate == 16000 else "8k"
            tag = model_type + "-" + sample_rate_str
            self.task_resource.set_task_model(tag, version=None)
            logger.debug(f"load the pretrained model: {tag}")
            # get the model from the pretrained list
            # we download the pretrained model and store it in the res_path
            self.res_path = self.task_resource.res_dir

            self.cfg_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['cfg_path'])
            self.ckpt_path = os.path.join(
                self.task_resource.res_dir,
                self.task_resource.res_dict['ckpt_path'] + '.pdparams')
        else:
            # get the model from disk
            self.cfg_path = os.path.abspath(cfg_path)
            self.ckpt_path = os.path.abspath(ckpt_path + ".pdparams")
            self.res_path = os.path.dirname(
                os.path.dirname(os.path.abspath(self.cfg_path)))

        logger.debug(f"start to read the ckpt from {self.ckpt_path}")
        logger.debug(f"read the config from {self.cfg_path}")
        logger.debug(f"get the res path {self.res_path}")

        # stage 2: read and config and init the model body
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)

        # stage 3: get the model name to instance the model network with dynamic_import
        logger.debug("start to dynamic import the model class")
        model_name = model_type[:model_type.rindex('_')]
        model_class = self.task_resource.get_model_class(model_name)
        logger.debug(f"model name {model_name}")
        model_conf = self.config.model
        backbone = model_class(**model_conf)
        model = SpeakerIdetification(
            backbone=backbone, num_class=self.config.num_speakers)
        self.model = model
        self.model.eval()

        # stage 4: load the model parameters
        logger.debug("start to set the model parameters to model")
        model_dict = paddle.load(self.ckpt_path)
        self.model.set_state_dict(model_dict)

        logger.debug("create the model instance success")

    @paddle.no_grad()
    def infer(self, model_type: str):
        """Infer the model to get the embedding

        Args:
            model_type (str): speaker verification model type
        """
        # stage 0: get the feat and length from _inputs
        feats = self._inputs["feats"]
        lengths = self._inputs["lengths"]
        logger.debug("start to do backbone network model forward")
        logger.debug(
            f"feats shape:{feats.shape}, lengths shape: {lengths.shape}")

        # stage 1: get the audio embedding
        # embedding from (1, emb_size, 1) -> (emb_size)
        embedding = self.model.backbone(feats, lengths).squeeze().numpy()
        logger.debug(f"embedding size: {embedding.shape}")

        # stage 2: put the embedding and dim info to _outputs property
        #          the embedding type is numpy.array
        self._outputs["embedding"] = embedding

    def postprocess(self) -> Union[str, os.PathLike]:
        """Return the audio embedding info

        Returns:
            Union[str, os.PathLike]: audio embedding info
        """
        embedding = self._outputs["embedding"]
        return embedding

    def preprocess(self, model_type: str, input_file: Union[str, os.PathLike]):
        """Extract the audio feat

        Args:
            model_type (str): speaker verification model type
            input_file (Union[str, os.PathLike]): audio file path
        """
        audio_file = input_file
        if isinstance(audio_file, (str, os.PathLike)):
            logger.debug(f"Preprocess audio file: {audio_file}")

        # stage 1: load the audio sample points
        #    Note: this process must match the training process
        waveform, sr = load_audio(audio_file)
        logger.debug(
            f"load the audio sample points, shape is: {waveform.shape}")

        # stage 2: get the audio feat
        # Note: Now we only support fbank feature
        try:
            feat = melspectrogram(
                x=waveform,
                sr=self.config.sr,
                n_mels=self.config.n_mels,
                window_size=self.config.window_size,
                hop_length=self.config.hop_size)
            logger.debug(f"extract the audio feat, shape is: {feat.shape}")
        except Exception as e:
            logger.debug(f"feat occurs exception {e}")
            sys.exit(-1)

        feat = paddle.to_tensor(feat).unsqueeze(0)
        # in inference period, the lengths is all one without padding
        lengths = paddle.ones([1])

        # stage 3: we do feature normalize,
        #          Now we assume that the feat must do normalize
        feat = feature_normalize(feat, mean_norm=True, std_norm=False)

        # stage 4: store the feat and length in the _inputs,
        #          which will be used in other function
        logger.debug(f"feats shape: {feat.shape}")
        self._inputs["feats"] = feat
        self._inputs["lengths"] = lengths

        logger.debug("audio extract the feat success")

    def _check(self, audio_file: str, sample_rate: int, force_yes: bool=False):
        """Check if the model sample match the audio sample rate 

        Args:
            audio_file (str): audio file path, which will be extracted the embedding
            sample_rate (int): the desired model sample rate 

        Returns:
            bool: return if the audio sample rate matches the model sample rate
        """
        self.sample_rate = sample_rate
        if self.sample_rate != 16000 and self.sample_rate != 8000:
            logger.error(
                "invalid sample rate, please input --sr 8000 or --sr 16000")
            logger.error(
                f"The model sample rate: {self.sample_rate}, the external sample rate is: {sample_rate}"
            )
            return False

        if isinstance(audio_file, (str, os.PathLike)):
            if not os.path.isfile(audio_file):
                logger.error("Please input the right audio file path")
                return False

        logger.debug("checking the aduio file format......")
        try:
            audio, audio_sample_rate = soundfile.read(
                audio_file, dtype="float32", always_2d=True)
        except Exception as e:
            logger.exception(e)
            logger.error(
                "can not open the audio file, please check the audio file format is 'wav'. \n \
                 you can try to use sox to change the file format.\n \
                 For example: \n \
                 sample rate: 16k \n \
                 sox input_audio.xx --rate 16k --bits 16 --channels 1 output_audio.wav \n \
                 sample rate: 8k \n \
                 sox input_audio.xx --rate 8k --bits 16 --channels 1 output_audio.wav \n \
                 ")
            return False

        logger.debug(f"The sample rate is {audio_sample_rate}")

        if audio_sample_rate != self.sample_rate:
            logger.debug("The sample rate of the input file is not {}.\n \
                            The program will resample the wav file to {}.\n \
                            If the result does not meet your expectations，\n \
                            Please input the 16k 16 bit 1 channel wav file. \
                        ".format(self.sample_rate, self.sample_rate))
            if force_yes is False:
                while (True):
                    logger.debug(
                        "Whether to change the sample rate and the channel. Y: change the sample. N: exit the prgream."
                    )
                    content = input("Input(Y/N):")
                    if content.strip() == "Y" or content.strip(
                    ) == "y" or content.strip() == "yes" or content.strip(
                    ) == "Yes":
                        logger.debug(
                            "change the sampele rate, channel to 16k and 1 channel"
                        )
                        break
                    elif content.strip() == "N" or content.strip(
                    ) == "n" or content.strip() == "no" or content.strip(
                    ) == "No":
                        logger.debug("Exit the program")
                        return False
                    else:
                        logger.warning("Not regular input, please input again")
            self.change_format = True
        else:
            logger.debug("The audio file format is right")
            self.change_format = False

        return True


================================================
FILE: paddlespeech/cli/whisper/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .infer import WhisperExecutor


================================================
FILE: paddlespeech/cli/whisper/infer.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import io
import os
import sys
import time
from collections import OrderedDict
from typing import List
from typing import Optional
from typing import Union

import librosa
import numpy as np
import paddle
import soundfile
from yacs.config import CfgNode

from ...utils.env import DATA_HOME
from ..download import get_path_from_url
from ..executor import BaseExecutor
from ..log import logger
from ..utils import CLI_TIMER
from ..utils import stats_wrapper
from ..utils import timer_register
from paddlespeech.s2t.models.whisper import log_mel_spectrogram
from paddlespeech.s2t.models.whisper import ModelDimensions
from paddlespeech.s2t.models.whisper import Whisper
from paddlespeech.s2t.models.whisper.tokenizer import LANGUAGES
from paddlespeech.s2t.models.whisper.tokenizer import TO_LANGUAGE_CODE
from paddlespeech.s2t.utils.utility import UpdateConfig

__all__ = ['WhisperExecutor']


@timer_register
class WhisperExecutor(BaseExecutor):
    def __init__(self):
        super().__init__('whisper')
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech.whisper', add_help=True)
        self.parser.add_argument(
            '--input', type=str, default=None, help='Audio file to recognize.')
        self.parser.add_argument(
            '--model',
            type=str,
            default='whisper',
            choices=['whisper'],
            help='Choose model type of asr task.')
        self.parser.add_argument(
            '--lang',
            type=str,
            default='',
            choices=['', 'en'],
            help='Choose model language. Default is "", English-only model set [en].'
        )
        self.parser.add_argument(
            '--task',
            type=str,
            default='transcribe',
            choices=["transcribe", "translate"],
            help='Choose task tpye for transcribe or translate.')
        self.parser.add_argument(
            '--size',
            type=str,
            default='turbo',
            choices=['large', 'medium', 'base', 'small', 'tiny', 'turbo'],
            help='Choose model size.')
        self.parser.add_argument(
            '--language',
            type=str,
            default='None',
            choices=sorted(LANGUAGES.keys()) + sorted(
                [k.title() for k in TO_LANGUAGE_CODE.keys()]),
            help='Choose model decode language. Default is None, recognized by model.'
        )
        self.parser.add_argument(
            "--sample_rate",
            type=int,
            default=16000,
            choices=[16000],
            help='Choose the audio sample rate of the model. only support 16000')
        self.parser.add_argument(
            '--config',
            type=str,
            default=None,
            help='Config of asr task. Use default config when it is None.')
        self.parser.add_argument(
            '--decode_method',
            type=str,
            default='ctc_prefix_beam_search',
            choices=['ctc_greedy_search', 'ctc_prefix_beam_search'],
            help='only support transformer and conformer model')
        self.parser.add_argument(
            '--ckpt_path',
            type=str,
            default=None,
            help='Checkpoint file of model.')
        self.parser.add_argument(
            '--yes',
            '-y',
            action="store_true",
            default=False,
            help='No additional parameters required. \
            Once set this parameter, it means accepting the request of the program by default, \
            which includes transforming the audio sample rate')
        self.parser.add_argument(
            '--rtf',
            action="store_true",
            default=False,
            help='Show Real-time Factor(RTF).')
        self.parser.add_argument(
            '--device',
            type=str,
            default=paddle.get_device(),
            help='Choose device to execute model inference.')
        self.parser.add_argument(
            '-d',
            '--job_dump_result',
            action='store_true',
            help='Save job result into file.')
        self.parser.add_argument(
            '-v',
            '--verbose',
            action='store_true',
            help='Increase logger verbosity of current task.')

    def _init_from_path(self,
                        model_type: str='whisper',
                        lang: str='',
                        task: str='transcribe',
                        size: str='turbo',
                        language: str='None',
                        sample_rate: int=16000,
                        cfg_path: Optional[os.PathLike]=None,
                        decode_method: str='ctc_prefix_beam_search',
                        num_decoding_left_chunks: int=-1,
                        ckpt_path: Optional[os.PathLike]=None):
        """
        Init model and other resources from a specific path.
        """
        logger.debug("start to init the model")

        if hasattr(self, 'model'):
            logger.debug('Model had been initialized.')
            return

        if cfg_path is None or ckpt_path is None:
            sample_rate_str = '16k' if sample_rate == 16000 else '8k'
            if lang == "":
                tag = model_type + '-' + size + '-' + sample_rate_str
            else:
                tag = model_type + '-' + size + '-' + lang + '-' + sample_rate_str
            self.task_resource.set_task_model(tag, version=None)
            self.res_path = self.task_resource.res_dir

            self.cfg_path = os.path.join(
                self.res_path, self.task_resource.res_dict['cfg_path'])
            self.ckpt_path = os.path.join(
                self.res_path,
                self.task_resource.res_dict['ckpt_path'] + ".pdparams")
            logger.debug(self.res_path)

        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.ckpt_path = os.path.abspath(ckpt_path + ".pdparams")
            self.res_path = os.path.dirname(
                os.path.dirname(os.path.abspath(self.cfg_path)))
        logger.debug(self.cfg_path)
        logger.debug(self.ckpt_path)

        #Init body.
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)

        with UpdateConfig(self.config):
            if "whisper" in model_type:
                resource_url = self.task_resource.res_dict['resource_data']
                resource_md5 = self.task_resource.res_dict['resource_data_md5']

                self.resource_path = os.path.join(
                    DATA_HOME, self.task_resource.version, 'whisper')
                self.download_resource(resource_url, self.resource_path,
                                       resource_md5)
            else:
                raise Exception("wrong type")

        # load model
        model_dict = paddle.load(self.ckpt_path)
        dims = ModelDimensions(**model_dict["dims"])
        self.dims = dims
        self.model = Whisper(dims)
        self.model.load_dict(model_dict)
        self.model.eval()

        #set task
        if task is not None:
            self.task = task

        #set language
        if language is not None:
            if lang == 'en' and language != 'en':
                logger.info(
                    "{tag} is an English-only model, set language=English .")
                self.language = 'en'
            else:
                self.language = language

    def preprocess(self, model_type: str, input: Union[str, os.PathLike]):
        """
        Input preprocess and return paddle.Tensor stored in self.input.
        Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet).
        """

        audio_file = input
        if isinstance(audio_file, (str, os.PathLike)):
            logger.debug("Preprocess audio_file:" + audio_file)
        elif isinstance(audio_file, io.BytesIO):
            audio_file.seek(0)

        # Get the object for feature extraction
        # whisper hard-coded audio hyperparameters, params in paddlespeech/s2t/models/whisper/whisper.py
        logger.debug("read the audio file")
        audio, audio_sample_rate = soundfile.read(
            audio_file, dtype="float32", always_2d=True)
        if self.change_format:
            if audio.shape[1] >= 2:
                audio = audio.mean(axis=1, dtype=np.int16)
            else:
                audio = audio[:, 0]
            # pcm16 -> pcm 32
            audio = self._pcm16to32(audio)
            audio = librosa.resample(
                audio, orig_sr=audio_sample_rate, target_sr=self.sample_rate)
            audio_sample_rate = self.sample_rate
            # pcm32 -> pcm 16
            audio = self._pcm32to16(audio)
        else:
            audio = audio[:, 0]

        logger.debug(f"audio shape: {audio.shape}")
        # fbank
        audio = log_mel_spectrogram(
            audio,
            resource_path=self.resource_path,
            n_mels=self.dims.n_mels,
            padding=480000)
        audio_len = paddle.to_tensor(audio.shape[0]).unsqueeze(axis=0)

        self._inputs["audio"] = audio
        self._inputs["audio_len"] = audio_len
        logger.debug(f"audio feat shape: {audio.shape}")

        logger.debug("audio feat process success")

    @paddle.no_grad()
    def infer(self, model_type: str):
        """
        Model inference and result stored in self.output.
        """
        logger.debug("start to infer the model to get the output")
        cfg = self.config
        audio = self._inputs["audio"]
        if cfg.temperature_increment_on_fallback is not None:
            temperature = tuple(
                np.arange(cfg.temperature, 1.0 + 1e-6,
                          cfg.temperature_increment_on_fallback))
        else:
            temperature = [cfg.temperature]
        self._outputs["result"] = self.model.transcribe(
            audio,
            verbose=cfg.verbose,
            task=self.task,
            language=self.language,
            resource_path=self.resource_path,
            temperature=temperature,
            compression_ratio_threshold=cfg.compression_ratio_threshold,
            logprob_threshold=cfg.logprob_threshold,
            best_of=cfg.best_of,
            beam_size=cfg.beam_size,
            patience=cfg.patience,
            length_penalty=cfg.length_penalty,
            initial_prompt=cfg.initial_prompt,
            condition_on_previous_text=cfg.condition_on_previous_text,
            no_speech_threshold=cfg.no_speech_threshold)

    def postprocess(self) -> Union[str, os.PathLike]:
        """
            Output postprocess and return human-readable results such as texts and audio files.
        """
        return self._outputs["result"]

    def download_resource(self, url, lm_dir, md5sum):
        download_path = get_path_from_url(
            url=url,
            root_dir=lm_dir,
            md5sum=md5sum,
            decompress=True, )

    def _pcm16to32(self, audio):
        assert (audio.dtype == np.int16)
        audio = audio.astype("float32")
        bits = np.iinfo(np.int16).bits
        audio = audio / (2**(bits - 1))
        return audio

    def _pcm32to16(self, audio):
        assert (audio.dtype == np.float32)
        bits = np.iinfo(np.int16).bits
        audio = audio * (2**(bits - 1))
        audio = np.round(audio).astype("int16")
        return audio

    def _check(self, audio_file: str, sample_rate: int, force_yes: bool=False):
        self.sample_rate = sample_rate
        if self.sample_rate != 16000 and self.sample_rate != 8000:
            logger.error(
                "invalid sample rate, please input --sr 8000 or --sr 16000")
            return False

        if isinstance(audio_file, (str, os.PathLike)):
            if not os.path.isfile(audio_file):
                logger.error("Please input the right audio file path")
                return False
        elif isinstance(audio_file, io.BytesIO):
            audio_file.seek(0)

        logger.debug("checking the audio file format......")
        try:
            audio, audio_sample_rate = soundfile.read(
                audio_file, dtype="int16", always_2d=True)
        except Exception as e:
            logger.exception(e)
            logger.error(
                f"can not open the audio file, please check the audio file({audio_file}) format is 'wav'. \n \
                 you can try to use sox to change the file format.\n \
                 For example: \n \
                 sample rate: 16k \n \
                 sox input_audio.xx --rate 16k --bits 16 --channels 1 output_audio.wav \n \
                 sample rate: 8k \n \
                 sox input_audio.xx --rate 8k --bits 16 --channels 1 output_audio.wav \n \
                 ")
            return False
        logger.debug("The sample rate is %d" % audio_sample_rate)
        if audio_sample_rate != self.sample_rate:
            logger.warning("The sample rate of the input file is not {}.\n \
                            The program will resample the wav file to {}.\n \
                            If the result does not meet your expectations，\n \
                            Please input the 16k 16 bit 1 channel wav file. \
                        ".format(self.sample_rate, self.sample_rate))
            if force_yes is False:
                while (True):
                    logger.debug(
                        "Whether to change the sample rate and the channel. Y: change the sample. N: exit the prgream."
                    )
                    content = input("Input(Y/N):")
                    if content.strip() == "Y" or content.strip(
                    ) == "y" or content.strip() == "yes" or content.strip(
                    ) == "Yes":
                        logger.debug(
                            "change the sampele rate, channel to 16k and 1 channel"
                        )
                        break
                    elif content.strip() == "N" or content.strip(
                    ) == "n" or content.strip() == "no" or content.strip(
                    ) == "No":
                        logger.debug("Exit the program")
                        return False
                    else:
                        logger.warning("Not regular input, please input again")

            self.change_format = True
        else:
            logger.debug("The audio file format is right")
            self.change_format = False

        return True

    def execute(self, argv: List[str]) -> bool:
        """
            Command line entry.
        """
        parser_args = self.parser.parse_args(argv)

        model = parser_args.model
        lang = parser_args.lang
        task = parser_args.task
        size = parser_args.size
        language = parser_args.language
        sample_rate = parser_args.sample_rate
        config = parser_args.config
        ckpt_path = parser_args.ckpt_path
        decode_method = parser_args.decode_method
        force_yes = parser_args.yes
        rtf = parser_args.rtf
        device = parser_args.device

        if not parser_args.verbose:
            self.disable_task_loggers()

        task_source = self.get_input_source(parser_args.input)
        task_results = OrderedDict()
        has_exceptions = False

        for id_, input_ in task_source.items():
            try:
                res = self(
                    audio_file=input_,
                    model=model,
                    lang=lang,
                    task=task,
                    size=size,
                    language=language,
                    sample_rate=sample_rate,
                    config=config,
                    ckpt_path=ckpt_path,
                    decode_method=decode_method,
                    force_yes=force_yes,
                    rtf=rtf,
                    device=device)
                task_results[id_] = res
            except Exception as e:
                has_exceptions = True
                task_results[id_] = f'{e.__class__.__name__}: {e}'

        if rtf:
            self.show_rtf(CLI_TIMER[self.__class__.__name__])

        self.process_task_results(parser_args.input, task_results,
                                  parser_args.job_dump_result)

        if has_exceptions:
            return False
        else:
            return True

    @stats_wrapper
    def __call__(self,
                 audio_file: os.PathLike,
                 model: str='whisper',
                 lang: str='',
                 task: str='transcribe',
                 size: str='large',
                 language: str='None',
                 sample_rate: int=16000,
                 config: os.PathLike=None,
                 ckpt_path: os.PathLike=None,
                 decode_method: str='attention_rescoring',
                 num_decoding_left_chunks: int=-1,
                 force_yes: bool=False,
                 rtf: bool=False,
                 device=paddle.get_device()):
        """
        Python API to call an executor.
        """
        audio_file = os.path.abspath(audio_file)
        paddle.set_device(device)
        self._init_from_path(model, lang, task, size, language, sample_rate,
                             config, decode_method, num_decoding_left_chunks,
                             ckpt_path)
        if not self._check(audio_file, sample_rate, force_yes):
            sys.exit(-1)
        if rtf:
            k = self.__class__.__name__
            CLI_TIMER[k]['start'].append(time.time())

        self.preprocess(model, audio_file)
        self.infer(model)
        res = self.postprocess()  # Retrieve result of asr.

        if rtf:
            CLI_TIMER[k]['end'].append(time.time())
            audio, audio_sample_rate = soundfile.read(
                audio_file, dtype="int16", always_2d=True)
            CLI_TIMER[k]['extra'].append(audio.shape[0] / audio_sample_rate)

        return res


================================================
FILE: paddlespeech/cls/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/cls/exps/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/cls/exps/panns/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/cls/exps/panns/deploy/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/cls/exps/panns/deploy/predict.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os

import numpy as np
import paddle
from paddle import inference
from paddle.audio.datasets import ESC50
from paddle.audio.features import LogMelSpectrogram
from scipy.special import softmax

import paddlespeech.utils
from paddlespeech.audio.backends import soundfile_load as load_audio

# yapf: disable
parser = argparse.ArgumentParser()
parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.")
parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu', 'gcu'], default="gpu", help="Select which device to train model, defaults to gpu.")
parser.add_argument("--wav", type=str, required=True, help="Audio file to infer.")
parser.add_argument("--batch_size", type=int, default=1, help="Batch size per GPU/CPU for training.")
parser.add_argument('--use_tensorrt', type=eval, default=False, choices=[True, False], help='Enable to use tensorrt to speed up.')
parser.add_argument("--precision", type=str, default="fp32", choices=["fp32", "fp16"], help='The tensorrt precision.')
parser.add_argument('--cpu_threads', type=int, default=10, help='Number of threads to predict when using cpu.')
parser.add_argument('--enable_mkldnn', type=eval, default=False, choices=[True, False], help='Enable to use mkldnn to speed up when using cpu.')
parser.add_argument("--log_dir", type=str, default="./log", help="The path to save log.")
args = parser.parse_args()
# yapf: enable


def extract_features(files: str, **kwargs):
    waveforms = []
    srs = []
    max_length = float('-inf')
    for file in files:
        waveform, sr = load_audio(file)
        max_length = max(max_length, len(waveform))
        waveforms.append(waveform)
        srs.append(sr)

    feats = []
    for i in range(len(waveforms)):
        # padding
        if len(waveforms[i]) < max_length:
            pad_width = max_length - len(waveforms[i])
            waveforms[i] = np.pad(waveforms[i], pad_width=(0, pad_width))

        feature_extractor = LogMelSpectrogram(sr, **kwargs)
        feat = feature_extractor(paddle.to_tensor(waveforms[i]))
        feat = paddle.transpose(feat, perm=[1, 0]).unsqueeze(0)
        feats.append(feat)

    return np.stack(feats, axis=0)


class Predictor(object):
    def __init__(self,
                 model_dir,
                 device="gpu",
                 batch_size=1,
                 use_tensorrt=False,
                 precision="fp32",
                 cpu_threads=10,
                 enable_mkldnn=False):
        self.batch_size = batch_size

        if paddlespeech.utils.satisfy_paddle_version('3.0.0-beta'):
            config = inference.Config(model_dir, 'inference')
            config.disable_mkldnn()
        else:
            model_file = os.path.join(model_dir, 'inference.pdmodel')
            params_file = os.path.join(model_dir, "inference.pdiparams")

            assert os.path.isfile(model_file) and os.path.isfile(
                params_file), 'Please check model and parameter files.'

            config = inference.Config(model_file, params_file)

        if device == "gpu":
            # set GPU configs accordingly
            # such as intialize the gpu memory, enable tensorrt
            config.enable_use_gpu(100, 0)
            precision_map = {
                "fp16": inference.PrecisionType.Half,
                "fp32": inference.PrecisionType.Float32,
            }
            precision_mode = precision_map[precision]

            if use_tensorrt:
                config.enable_tensorrt_engine(
                    max_batch_size=batch_size,
                    min_subgraph_size=30,
                    precision_mode=precision_mode)
        elif device == "cpu":
            # set CPU configs accordingly,
            # such as enable_mkldnn, set_cpu_math_library_num_threads
            config.disable_gpu()
            if enable_mkldnn:
                # cache 10 different shapes for mkldnn to avoid memory leak
                config.set_mkldnn_cache_capacity(10)
                config.enable_mkldnn()
            config.set_cpu_math_library_num_threads(cpu_threads)
        elif device == "xpu":
            # set XPU configs accordingly
            config.enable_xpu(100)

        config.switch_use_feed_fetch_ops(False)
        self.predictor = inference.create_predictor(config)
        self.input_handles = [
            self.predictor.get_input_handle(name)
            for name in self.predictor.get_input_names()
        ]
        self.output_handle = self.predictor.get_output_handle(
            self.predictor.get_output_names()[0])

    def predict(self, wavs):
        feats = extract_features(wavs)

        self.input_handles[0].copy_from_cpu(feats)
        self.predictor.run()
        logits = self.output_handle.copy_to_cpu()
        probs = softmax(logits, axis=1)
        indices = np.argmax(probs, axis=1)

        return indices


if __name__ == "__main__":
    # Define predictor to do prediction.
    predictor = Predictor(args.model_dir, args.device, args.batch_size,
                          args.use_tensorrt, args.precision, args.cpu_threads,
                          args.enable_mkldnn)

    wavs = [args.wav]

    for i in range(len(wavs)):
        wavs[i] = os.path.abspath(os.path.expanduser(wavs[i]))
        assert os.path.isfile(
            wavs[i]), f'Please check input wave file: {wavs[i]}'

    results = predictor.predict(wavs)
    for idx, wav in enumerate(wavs):
        print(f'Wav: {wav} \t Label: {ESC50.label_list[results[idx]]}')


================================================
FILE: paddlespeech/cls/exps/panns/export_model.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os

import paddle

from paddlespeech.audio.datasets import ESC50
from paddlespeech.cls.models import cnn14
from paddlespeech.cls.models import SoundClassifier

# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--checkpoint", type=str, required=True, help="Checkpoint of model.")
parser.add_argument("--output_dir", type=str, default='./export', help="Path to save static model and its parameters.")
args = parser.parse_args()
# yapf: enable

if __name__ == '__main__':
    model = SoundClassifier(
        backbone=cnn14(pretrained=False, extract_embedding=True),
        num_class=len(ESC50.label_list))
    model.set_state_dict(paddle.load(args.checkpoint))
    model.eval()

    model = paddle.jit.to_static(
        model,
        input_spec=[
            paddle.static.InputSpec(
                shape=[None, None, 64], dtype=paddle.float32)
        ],
        full_graph=True)

    # Save in static graph model.
    paddle.jit.save(model, os.path.join(args.output_dir, "inference"))


================================================
FILE: paddlespeech/cls/exps/panns/predict.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os

import paddle
import paddle.nn.functional as F
import yaml
from paddle.audio.features import LogMelSpectrogram

from paddlespeech.audio.backends import soundfile_load as load_audio
from paddlespeech.audio.utils import logger
from paddlespeech.cls.models import SoundClassifier
from paddlespeech.utils.dynamic_import import dynamic_import

# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--cfg_path", type=str, required=True)
args = parser.parse_args()
# yapf: enable


def extract_features(file: str, **feat_conf) -> paddle.Tensor:
    file = os.path.abspath(os.path.expanduser(file))
    waveform, _ = load_audio(file, sr=feat_conf['sr'])
    feature_extractor = LogMelSpectrogram(**feat_conf)
    feat = feature_extractor(paddle.to_tensor(waveform).unsqueeze(0))
    feat = paddle.transpose(feat, [0, 2, 1])
    return feat


if __name__ == '__main__':

    args.cfg_path = os.path.abspath(os.path.expanduser(args.cfg_path))
    with open(args.cfg_path, 'r') as f:
        config = yaml.safe_load(f)

    model_conf = config['model']
    data_conf = config['data']
    feat_conf = config['feature']
    predicting_conf = config['predicting']

    ds_class = dynamic_import(data_conf['dataset'])
    backbone_class = dynamic_import(model_conf['backbone'])

    model = SoundClassifier(
        backbone=backbone_class(pretrained=False, extract_embedding=True),
        num_class=len(ds_class.label_list))
    model.set_state_dict(paddle.load(predicting_conf['checkpoint']))
    model.eval()

    feat = extract_features(predicting_conf['audio_file'], **feat_conf)
    logits = model(feat)
    probs = F.softmax(logits, axis=1).numpy()

    sorted_indices = (-probs[0]).argsort()

    msg = f"[{predicting_conf['audio_file']}]\n"
    for idx in sorted_indices[:predicting_conf['top_k']]:
        msg += f'{ds_class.label_list[idx]}: {probs[0][idx]}\n'
    logger.info(msg)


================================================
FILE: paddlespeech/cls/exps/panns/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os

import paddle
import yaml
from paddle.audio.features import LogMelSpectrogram

from paddlespeech.audio.utils import logger
from paddlespeech.audio.utils import Timer
from paddlespeech.cls.models import SoundClassifier
from paddlespeech.utils.dynamic_import import dynamic_import

# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--cfg_path", type=str, required=True)
args = parser.parse_args()
# yapf: enable

if __name__ == "__main__":
    nranks = paddle.distributed.get_world_size()
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()
    local_rank = paddle.distributed.get_rank()

    args.cfg_path = os.path.abspath(os.path.expanduser(args.cfg_path))
    with open(args.cfg_path, 'r') as f:
        config = yaml.safe_load(f)

    model_conf = config['model']
    data_conf = config['data']
    feat_conf = config['feature']
    training_conf = config['training']

    # Dataset
    ds_class = dynamic_import(data_conf['dataset'])
    train_ds = ds_class(**data_conf['train'])
    dev_ds = ds_class(**data_conf['dev'])
    train_sampler = paddle.io.DistributedBatchSampler(
        train_ds,
        batch_size=training_conf['batch_size'],
        shuffle=True,
        drop_last=False)
    train_loader = paddle.io.DataLoader(
        train_ds,
        batch_sampler=train_sampler,
        num_workers=training_conf['num_workers'],
        return_list=True,
        use_buffer_reader=True, )

    # Feature
    feature_extractor = LogMelSpectrogram(**feat_conf)

    # Model
    backbone_class = dynamic_import(model_conf['backbone'])
    backbone = backbone_class(pretrained=True, extract_embedding=True)
    model = SoundClassifier(backbone, num_class=data_conf['num_classes'])
    model = paddle.DataParallel(model)
    optimizer = paddle.optimizer.Adam(
        learning_rate=training_conf['learning_rate'],
        parameters=model.parameters())
    criterion = paddle.nn.loss.CrossEntropyLoss()

    steps_per_epoch = len(train_sampler)
    timer = Timer(steps_per_epoch * training_conf['epochs'])
    timer.start()

    for epoch in range(1, training_conf['epochs'] + 1):
        model.train()

        avg_loss = 0
        num_corrects = 0
        num_samples = 0
        for batch_idx, batch in enumerate(train_loader):
            waveforms, labels = batch
            feats = feature_extractor(
                waveforms
            )  # Need a padding when lengths of waveforms differ in a batch.
            feats = paddle.transpose(feats, [0, 2, 1])  # To [N, length, n_mels]

            logits = model(feats)

            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                optimizer._learning_rate.step()
            optimizer.clear_grad()

            # Calculate loss
            avg_loss += float(loss)

            # Calculate metrics
            preds = paddle.argmax(logits, axis=1)
            num_corrects += (preds == labels).numpy().sum()
            num_samples += feats.shape[0]

            timer.count()

            if (batch_idx + 1
                ) % training_conf['log_freq'] == 0 and local_rank == 0:
                lr = optimizer.get_lr()
                avg_loss /= training_conf['log_freq']
                avg_acc = num_corrects / num_samples

                print_msg = 'Epoch={}/{}, Step={}/{}'.format(
                    epoch, training_conf['epochs'], batch_idx + 1,
                    steps_per_epoch)
                print_msg += ' loss={:.4f}'.format(avg_loss)
                print_msg += ' acc={:.4f}'.format(avg_acc)
                print_msg += ' lr={:.6f} step/sec={:.2f} | ETA {}'.format(
                    lr, timer.timing, timer.eta)
                logger.train(print_msg)

                avg_loss = 0
                num_corrects = 0
                num_samples = 0

        if epoch % training_conf[
                'save_freq'] == 0 and batch_idx + 1 == steps_per_epoch and local_rank == 0:
            dev_sampler = paddle.io.BatchSampler(
                dev_ds,
                batch_size=training_conf['batch_size'],
                shuffle=False,
                drop_last=False)
            dev_loader = paddle.io.DataLoader(
                dev_ds,
                batch_sampler=dev_sampler,
                num_workers=training_conf['num_workers'],
                return_list=True, )

            model.eval()
            num_corrects = 0
            num_samples = 0
            with logger.processing('Evaluation on validation dataset'):
                for batch_idx, batch in enumerate(dev_loader):
                    waveforms, labels = batch
                    feats = feature_extractor(waveforms)
                    feats = paddle.transpose(feats, [0, 2, 1])

                    logits = model(feats)

                    preds = paddle.argmax(logits, axis=1)
                    num_corrects += (preds == labels).numpy().sum()
                    num_samples += feats.shape[0]

            print_msg = '[Evaluation result]'
            print_msg += ' dev_acc={:.4f}'.format(num_corrects / num_samples)

            logger.eval(print_msg)

            # Save model
            save_dir = os.path.join(training_conf['checkpoint_dir'],
                                    'epoch_{}'.format(epoch))
            logger.info('Saving model checkpoint to {}'.format(save_dir))
            paddle.save(model.state_dict(),
                        os.path.join(save_dir, 'model.pdparams'))
            paddle.save(optimizer.state_dict(),
                        os.path.join(save_dir, 'model.pdopt'))


================================================
FILE: paddlespeech/cls/models/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .panns import *


================================================
FILE: paddlespeech/cls/models/panns/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .classifier import *
from .panns import *


================================================
FILE: paddlespeech/cls/models/panns/classifier.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.nn as nn


class SoundClassifier(nn.Layer):
    """
    Model for sound classification which uses panns pretrained models to extract
    embeddings from audio files.
    """

    def __init__(self, backbone, num_class, dropout=0.1):
        super(SoundClassifier, self).__init__()
        self.backbone = backbone
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(self.backbone.emb_size, num_class)

    def forward(self, x):
        # x: (batch_size, num_frames, num_melbins) -> (batch_size, 1, num_frames, num_melbins)
        x = x.unsqueeze(1)
        x = self.backbone(x)
        x = self.dropout(x)
        logits = self.fc(x)

        return logits


================================================
FILE: paddlespeech/cls/models/panns/panns.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

import paddle.nn as nn
import paddle.nn.functional as F

from paddlespeech.audio.utils.download import load_state_dict_from_url
from paddlespeech.utils.env import MODEL_HOME

__all__ = ['CNN14', 'CNN10', 'CNN6', 'cnn14', 'cnn10', 'cnn6']

pretrained_model_urls = {
    'cnn14': 'https://bj.bcebos.com/paddleaudio/models/panns_cnn14.pdparams',
    'cnn10': 'https://bj.bcebos.com/paddleaudio/models/panns_cnn10.pdparams',
    'cnn6': 'https://bj.bcebos.com/paddleaudio/models/panns_cnn6.pdparams',
}


class ConvBlock(nn.Layer):
    def __init__(self, in_channels, out_channels):
        super(ConvBlock, self).__init__()

        self.conv1 = nn.Conv2D(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias_attr=False)
        self.conv2 = nn.Conv2D(
            in_channels=out_channels,
            out_channels=out_channels,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias_attr=False)
        self.bn1 = nn.BatchNorm2D(out_channels)
        self.bn2 = nn.BatchNorm2D(out_channels)

    def forward(self, x, pool_size=(2, 2), pool_type='avg'):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)

        if pool_type == 'max':
            x = F.max_pool2d(x, kernel_size=pool_size)
        elif pool_type == 'avg':
            x = F.avg_pool2d(x, kernel_size=pool_size)
        elif pool_type == 'avg+max':
            x = F.avg_pool2d(
                x, kernel_size=pool_size) + F.max_pool2d(
                    x, kernel_size=pool_size)
        else:
            raise Exception(
                f'Pooling type of {pool_type} is not supported. It must be one of "max", "avg" and "avg+max".'
            )
        return x


class ConvBlock5x5(nn.Layer):
    def __init__(self, in_channels, out_channels):
        super(ConvBlock5x5, self).__init__()

        self.conv1 = nn.Conv2D(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=(5, 5),
            stride=(1, 1),
            padding=(2, 2),
            bias_attr=False)
        self.bn1 = nn.BatchNorm2D(out_channels)

    def forward(self, x, pool_size=(2, 2), pool_type='avg'):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)

        if pool_type == 'max':
            x = F.max_pool2d(x, kernel_size=pool_size)
        elif pool_type == 'avg':
            x = F.avg_pool2d(x, kernel_size=pool_size)
        elif pool_type == 'avg+max':
            x = F.avg_pool2d(
                x, kernel_size=pool_size) + F.max_pool2d(
                    x, kernel_size=pool_size)
        else:
            raise Exception(
                f'Pooling type of {pool_type} is not supported. It must be one of "max", "avg" and "avg+max".'
            )
        return x


class CNN14(nn.Layer):
    """
    The CNN14(14-layer CNNs) mainly consist of 6 convolutional blocks while each convolutional
    block consists of 2 convolutional layers with a kernel size of 3 × 3.

    Reference:
        PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition
        https://arxiv.org/pdf/1912.10211.pdf
    """
    emb_size = 2048

    def __init__(self, extract_embedding: bool=True):

        super(CNN14, self).__init__()
        self.bn0 = nn.BatchNorm2D(64)
        self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
        self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
        self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
        self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
        self.conv_block5 = ConvBlock(in_channels=512, out_channels=1024)
        self.conv_block6 = ConvBlock(in_channels=1024, out_channels=2048)

        self.fc1 = nn.Linear(2048, self.emb_size)
        self.fc_audioset = nn.Linear(self.emb_size, 527)
        self.extract_embedding = extract_embedding

    def forward(self, x):
        x.stop_gradient = False
        x = x.transpose([0, 3, 2, 1])
        x = self.bn0(x)
        x = x.transpose([0, 3, 2, 1])

        x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block5(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block6(x, pool_size=(1, 1), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = x.mean(axis=3)
        x = x.max(axis=2) + x.mean(axis=2)

        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.fc1(x))

        if self.extract_embedding:
            output = F.dropout(x, p=0.5, training=self.training)
        else:
            output = F.sigmoid(self.fc_audioset(x))
        return output


class CNN10(nn.Layer):
    """
    The CNN10(14-layer CNNs) mainly consist of 4 convolutional blocks while each convolutional
    block consists of 2 convolutional layers with a kernel size of 3 × 3.

    Reference:
        PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition
        https://arxiv.org/pdf/1912.10211.pdf
    """
    emb_size = 512

    def __init__(self, extract_embedding: bool=True):

        super(CNN10, self).__init__()
        self.bn0 = nn.BatchNorm2D(64)
        self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
        self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
        self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
        self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)

        self.fc1 = nn.Linear(512, self.emb_size)
        self.fc_audioset = nn.Linear(self.emb_size, 527)
        self.extract_embedding = extract_embedding

    def forward(self, x):
        x.stop_gradient = False
        x = x.transpose([0, 3, 2, 1])
        x = self.bn0(x)
        x = x.transpose([0, 3, 2, 1])

        x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = x.mean(axis=3)
        x = x.max(axis=2) + x.mean(axis=2)

        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.fc1(x))

        if self.extract_embedding:
            output = F.dropout(x, p=0.5, training=self.training)
        else:
            output = F.sigmoid(self.fc_audioset(x))
        return output


class CNN6(nn.Layer):
    """
    The CNN14(14-layer CNNs) mainly consist of 4 convolutional blocks while each convolutional
    block consists of 1 convolutional layers with a kernel size of 5 × 5.

    Reference:
        PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition
        https://arxiv.org/pdf/1912.10211.pdf
    """
    emb_size = 512

    def __init__(self, extract_embedding: bool=True):

        super(CNN6, self).__init__()
        self.bn0 = nn.BatchNorm2D(64)
        self.conv_block1 = ConvBlock5x5(in_channels=1, out_channels=64)
        self.conv_block2 = ConvBlock5x5(in_channels=64, out_channels=128)
        self.conv_block3 = ConvBlock5x5(in_channels=128, out_channels=256)
        self.conv_block4 = ConvBlock5x5(in_channels=256, out_channels=512)

        self.fc1 = nn.Linear(512, self.emb_size)
        self.fc_audioset = nn.Linear(self.emb_size, 527)
        self.extract_embedding = extract_embedding

    def forward(self, x):
        x.stop_gradient = False
        x = x.transpose([0, 3, 2, 1])
        x = self.bn0(x)
        x = x.transpose([0, 3, 2, 1])

        x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg')
        x = F.dropout(x, p=0.2, training=self.training)

        x = x.mean(axis=3)
        x = x.max(axis=2) + x.mean(axis=2)

        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.fc1(x))

        if self.extract_embedding:
            output = F.dropout(x, p=0.5, training=self.training)
        else:
            output = F.sigmoid(self.fc_audioset(x))
        return output


def cnn14(pretrained: bool=False, extract_embedding: bool=True) -> CNN14:
    model = CNN14(extract_embedding=extract_embedding)
    if pretrained:
        state_dict = load_state_dict_from_url(
            url=pretrained_model_urls['cnn14'],
            path=os.path.join(MODEL_HOME, 'panns'))
        model.set_state_dict(state_dict)
    return model


def cnn10(pretrained: bool=False, extract_embedding: bool=True) -> CNN10:
    model = CNN10(extract_embedding=extract_embedding)
    if pretrained:
        state_dict = load_state_dict_from_url(
            url=pretrained_model_urls['cnn10'],
            path=os.path.join(MODEL_HOME, 'panns'))
        model.set_state_dict(state_dict)
    return model


def cnn6(pretrained: bool=False, extract_embedding: bool=True) -> CNN6:
    model = CNN6(extract_embedding=extract_embedding)
    if pretrained:
        state_dict = load_state_dict_from_url(
            url=pretrained_model_urls['cnn6'],
            path=os.path.join(MODEL_HOME, 'panns'))
        model.set_state_dict(state_dict)
    return model


================================================
FILE: paddlespeech/dataset/__init__.py
================================================


================================================
FILE: paddlespeech/dataset/aidatatang_200zh/README.md
================================================
# [Aidatatang_200zh](http://openslr.elda.org/62/)

Aidatatang_200zh is a free Chinese Mandarin speech corpus provided by Beijing DataTang Technology Co., Ltd under Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Public License.
The contents and the corresponding descriptions of the corpus include:

* The corpus contains 200 hours of acoustic data, which is mostly mobile recorded data.
* 600 speakers from different accent areas in China are invited to participate in the recording.
* The transcription accuracy for each sentence is larger than 98%.
* Recordings are conducted in a quiet indoor environment.
* The database is divided into training set, validation set, and testing set in a ratio of 7: 1: 2.
* Detail information such as speech data coding and speaker information is preserved in the metadata file.
* Segmented transcripts are also provided.

The corpus aims to support researchers in speech recognition, machine translation, voiceprint recognition, and other speech-related fields. Therefore, the corpus is totally free for academic use.


================================================
FILE: paddlespeech/dataset/aidatatang_200zh/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .aidatatang_200zh import main as aidatatang_200zh_main


================================================
FILE: paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare aidatatang_200zh mandarin dataset

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import codecs
import json
import os
from pathlib import Path

import soundfile

from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unpack
from paddlespeech.utils.argparse import print_arguments

DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')

URL_ROOT = 'http://www.openslr.org/resources/62'
# URL_ROOT = 'https://openslr.magicdatatech.com/resources/62'
DATA_URL = URL_ROOT + '/aidatatang_200zh.tgz'
MD5_DATA = '6e0f4f39cd5f667a7ee53c397c8d0949'

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/aidatatang_200zh",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()


def create_manifest(data_dir, manifest_path_prefix):
    print("Creating manifest %s ..." % manifest_path_prefix)
    json_lines = []
    transcript_path = os.path.join(data_dir, 'transcript',
                                   'aidatatang_200_zh_transcript.txt')
    transcript_dict = {}
    for line in codecs.open(transcript_path, 'r', 'utf-8'):
        line = line.strip()
        if line == '':
            continue
        audio_id, text = line.split(' ', 1)
        # remove withespace, character text
        text = ''.join(text.split())
        transcript_dict[audio_id] = text

    data_types = ['train', 'dev', 'test']
    for dtype in data_types:
        del json_lines[:]
        total_sec = 0.0
        total_text = 0.0
        total_num = 0

        audio_dir = os.path.join(data_dir, 'corpus/', dtype)
        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
            for fname in filelist:
                if not fname.endswith('.wav'):
                    continue

                audio_path = os.path.abspath(os.path.join(subfolder, fname))
                audio_id = os.path.basename(fname)[:-4]
                utt2spk = Path(audio_path).parent.name

                audio_data, samplerate = soundfile.read(audio_path)
                duration = float(len(audio_data) / samplerate)
                text = transcript_dict[audio_id]
                json_lines.append(
                    json.dumps(
                        {
                            'utt': audio_id,
                            'utt2spk': str(utt2spk),
                            'feat': audio_path,
                            'feat_shape': (duration, ),  # second
                            'text': text,
                        },
                        ensure_ascii=False))

                total_sec += duration
                total_text += len(text)
                total_num += 1

        manifest_path = manifest_path_prefix + '.' + dtype
        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
            for line in json_lines:
                fout.write(line + '\n')

        manifest_dir = os.path.dirname(manifest_path_prefix)
        meta_path = os.path.join(manifest_dir, dtype) + '.meta'
        with open(meta_path, 'w') as f:
            print(f"{dtype}:", file=f)
            print(f"{total_num} utts", file=f)
            print(f"{total_sec / (60*60)} h", file=f)
            print(f"{total_text} text", file=f)
            print(f"{total_text / total_sec} text/sec", file=f)
            print(f"{total_sec / total_num} sec/utt", file=f)


def prepare_dataset(url, md5sum, target_dir, manifest_path, subset):
    """Download, unpack and create manifest file."""
    data_dir = os.path.join(target_dir, subset)
    if not os.path.exists(data_dir):
        filepath = download(url, md5sum, target_dir)
        unpack(filepath, target_dir)
        # unpack all audio tar files
        audio_dir = os.path.join(data_dir, 'corpus')
        for subfolder, dirlist, filelist in sorted(os.walk(audio_dir)):
            for sub in dirlist:
                print(f"unpack dir {sub}...")
                for folder, _, filelist in sorted(
                        os.walk(os.path.join(subfolder, sub))):
                    for ftar in filelist:
                        unpack(os.path.join(folder, ftar), folder, True)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)

    create_manifest(data_dir, manifest_path)


def main():
    print_arguments(args, globals())
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    prepare_dataset(
        url=DATA_URL,
        md5sum=MD5_DATA,
        target_dir=args.target_dir,
        manifest_path=args.manifest_prefix,
        subset='aidatatang_200zh')

    print("Data download and manifest prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: paddlespeech/dataset/aishell/README.md
================================================
# [Aishell1](http://openslr.elda.org/33/)

This Open Source Mandarin Speech Corpus, AISHELL-ASR0009-OS1, is 178 hours long. It is a part of AISHELL-ASR0009, of which utterance contains 11 domains, including smart home, autonomous driving, and industrial production. The whole recording was put in quiet indoor environment, using 3 different devices at the same time: high fidelity microphone (44.1kHz, 16-bit,); Android-system mobile phone (16kHz, 16-bit), iOS-system mobile phone (16kHz, 16-bit). Audios in high fidelity were re-sampled to 16kHz to build AISHELL- ASR0009-OS1. 400 speakers from different accent areas in China were invited to participate in the recording. The manual transcription accuracy rate is above 95%, through professional speech annotation and strict quality inspection. The corpus is divided into training, development and testing sets. ( This database is free for academic research, not in the commerce, if without permission. )


## Dataset Architecture

```bash
data_aishell
├── transcript      # text 目录
└── wav             # wav 目录
    ├── dev         # dev 目录
    │   ├── S0724   # spk 目录
    │   ├── S0725
    │   ├── S0726
    ├── train
    │   ├── S0724
    │   ├── S0725
    │   ├── S0726
    ├── test
    │   ├── S0724
    │   ├── S0725
    │   ├── S0726
 

data_aishell
├── transcript
│   └── aishell_transcript_v0.8.txt   # 文本标注文件
└── wav
    ├── dev
    │   ├── S0724
    │   │   ├── BAC009S0724W0121.wav  # S0724 的音频
    │   │   ├── BAC009S0724W0122.wav
    │   │   ├── BAC009S0724W0123.wav
    ├── test
    │   ├── S0724
    │   │   ├── BAC009S0724W0121.wav
    │   │   ├── BAC009S0724W0122.wav
    │   │   ├── BAC009S0724W0123.wav
    ├── train
    │   ├── S0724
    │   │   ├── BAC009S0724W0121.wav
    │   │   ├── BAC009S0724W0122.wav
    │   │   ├── BAC009S0724W0123.wav
    
标注文件格式： <utt> <tokens>
> head data_aishell/transcript/aishell_transcript_v0.8.txt 
BAC009S0002W0122 而 对 楼市 成交 抑制 作用 最 大 的 限 购
BAC009S0002W0123 也 成为 地方 政府 的 眼中 钉
BAC009S0002W0124 自 六月 底 呼和浩特 市 率先 宣布 取消 限 购 后
BAC009S0002W0125 各地 政府 便 纷纷 跟进
BAC009S0002W0126 仅 一 个 多 月 的 时间 里
BAC009S0002W0127 除了 北京 上海 广州 深圳 四 个 一 线 城市 和 三亚 之外
BAC009S0002W0128 四十六 个 限 购 城市 当中
BAC009S0002W0129 四十一 个 已 正式 取消 或 变相 放松 了 限 购
BAC009S0002W0130 财政 金融 政策 紧随 其后 而来
BAC009S0002W0131 显示 出 了 极 强 的 威力
```


================================================
FILE: paddlespeech/dataset/aishell/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .aishell import check_dataset
from .aishell import create_manifest
from .aishell import download_dataset
from .aishell import main as aishell_main
from .aishell import prepare_dataset


================================================
FILE: paddlespeech/dataset/aishell/aishell.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Prepare Aishell mandarin dataset

Download, unpack and create manifest files.
Manifest file is a json-format file with each line containing the
meta data (i.e. audio filepath, transcript and audio duration)
of each audio file in the data set.
"""
import argparse
import codecs
import json
import os
from pathlib import Path

import soundfile

from paddlespeech.dataset.download import download
from paddlespeech.dataset.download import unpack
from paddlespeech.utils.argparse import print_arguments

DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')

URL_ROOT = 'http://openslr.elda.org/resources/33'
# URL_ROOT = 'https://openslr.magicdatatech.com/resources/33'
DATA_URL = URL_ROOT + '/data_aishell.tgz'
MD5_DATA = '2f494334227864a8a8fec932999db9d8'
RESOURCE_URL = URL_ROOT + '/resource_aishell.tgz'
MD5_RESOURCE = '957d480a0fcac85fc18e550756f624e5'

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/Aishell",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
args = parser.parse_args()


def create_manifest(data_dir, manifest_path_prefix):
    print("Creating manifest %s ..." % os.path.join(data_dir,
                                                    manifest_path_prefix))
    json_lines = []
    transcript_path = os.path.join(data_dir, 'transcript',
                                   'aishell_transcript_v0.8.txt')
    transcript_dict = {}
    for line in codecs.open(transcript_path, 'r', 'utf-8'):
        line = line.strip()
        if line == '':
            continue
        audio_id, text = line.split(' ', 1)
        # remove withespace, character text
        text = ''.join(text.split())
        transcript_dict[audio_id] = text

    data_metas = dict()
    data_types = ['train', 'dev', 'test']
    for dtype in data_types:
        del json_lines[:]
        total_sec = 0.0
        total_text = 0.0
        total_num = 0

        audio_dir = os.path.join(data_dir, 'wav', dtype)
        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
            for fname in filelist:
                audio_path = os.path.abspath(os.path.join(subfolder, fname))
                audio_id = os.path.basename(fname)[:-4]
                # if no transcription for audio then skipped
                if audio_id not in transcript_dict:
                    continue

                utt2spk = Path(audio_path).parent.name
                audio_data, samplerate = soundfile.read(audio_path)
                duration = float(len(audio_data) / samplerate)
                text = transcript_dict[audio_id]
                json_lines.append(
                    json.dumps(
                        {
                            'utt': audio_id,
                            'utt2spk': str(utt2spk),
                            'feat': audio_path,
                            'feat_shape': (duration, ),  # second
                            'text': text
                        },
                        ensure_ascii=False))

                total_sec += duration
                total_text += len(text)
                total_num += 1

        manifest_path = manifest_path_prefix + '.' + dtype
        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
            for line in json_lines:
                fout.write(line + '\n')

        meta = dict()
        meta["dtype"] = dtype  # train, dev, test
        meta["utts"] = total_num
        meta["hours"] = total_sec / (60 * 60)
        meta["text"] = total_text
        meta["text/sec"] = total_text / total_sec
        meta["sec/utt"] = total_sec / total_num
        data_metas[dtype] = meta

        manifest_dir = os.path.dirname(manifest_path_prefix)
        meta_path = os.path.join(manifest_dir, dtype) + '.meta'
        with open(meta_path, 'w') as f:
            for key, val in meta.items():
                print(f"{key}: {val}", file=f)

    return data_metas


def download_dataset(url, md5sum, target_dir):
    """Download, unpack and create manifest file."""
    data_dir = os.path.join(target_dir, 'data_aishell')
    if not os.path.exists(data_dir):
        filepath = download(url, md5sum, target_dir)
        unpack(filepath, target_dir)
        # unpack all audio tar files
        audio_dir = os.path.join(data_dir, 'wav')
        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
            for ftar in filelist:
                unpack(os.path.join(subfolder, ftar), subfolder, True)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              os.path.abspath(target_dir))
    return os.path.abspath(data_dir)


def check_dataset(data_dir):
    print(f"check dataset {os.path.abspath(data_dir)} ...")

    transcript_path = os.path.join(data_dir, 'transcript',
                                   'aishell_transcript_v0.8.txt')
    if not os.path.exists(transcript_path):
        raise FileNotFoundError(f"no transcript file found in {data_dir}.")

    transcript_dict = {}
    for line in codecs.open(transcript_path, 'r', 'utf-8'):
        line = line.strip()
        if line == '':
            continue
        audio_id, text = line.split(' ', 1)
        # remove withespace, character text
        text = ''.join(text.split())
        transcript_dict[audio_id] = text

    no_label = 0
    data_types = ['train', 'dev', 'test']
    for dtype in data_types:
        audio_dir = os.path.join(data_dir, 'wav', dtype)
        if not os.path.exists(audio_dir):
            raise IOError(f"{audio_dir} does not exist.")

        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
            for fname in filelist:
                audio_path = os.path.abspath(os.path.join(subfolder, fname))
                audio_id = os.path.basename(fname)[:-4]
                # if no transcription for audio then skipped
                if audio_id not in transcript_dict:
                    print(f"Warning: {audio_id} not has transcript.")
                    no_label += 1
                    continue

                utt2spk = Path(audio_path).parent.name
                audio_data, samplerate = soundfile.read(audio_path)
                assert samplerate == 16000, f"{audio_path} sample rate is {samplerate} not 16k, please check."

        print(f"Warning: {dtype} has {no_label} audio does not has transcript.")


def prepare_dataset(url, md5sum, target_dir, manifest_path=None, check=False):
    """Download, unpack and create manifest file."""
    data_dir = download_dataset(url, md5sum, target_dir)

    if check:
        try:
            check_dataset(data_dir)
        except Exception as e:
            raise ValueError(
                f"{data_dir} dataset format not right, please check it.")

    meta = None
    if manifest_path:
        meta = create_manifest(data_dir, manifest_path)

    return data_dir, meta


def main():
    print_arguments(args, globals())
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)

    data_dir, meta = prepare_dataset(
        url=DATA_URL,
        md5sum=MD5_DATA,
        target_dir=args.target_dir,
        manifest_path=args.manifest_prefix,
        check=True)

    resource_dir, _ = prepare_dataset(
        url=RESOURCE_URL,
        md5sum=MD5_RESOURCE,
        target_dir=args.target_dir,
        manifest_path=None)

    print("Data download and manifest prepare done!")


if __name__ == '__main__':
    main()


================================================
FILE: paddlespeech/dataset/download.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import os
import sys
import tarfile
import zipfile
from typing import Text

__all__ = [
    "check_md5sum",
    "getfile_insensitive",
    "download_multi",
    "download",
    "unpack",
    "unzip",
    "md5file",
]


def md5file(fname):
    hash_md5 = hashlib.md5()
    f = open(fname, "rb")
    for chunk in iter(lambda: f.read(4096), b""):
        hash_md5.update(chunk)
    f.close()
    return hash_md5.hexdigest()


def getfile_insensitive(path):
    """Get the actual file path when given insensitive filename."""
    directory, filename = os.path.split(path)
    directory, filename = (directory or '.'), filename.lower()
    for f in os.listdir(directory):
        newpath = os.path.join(directory, f)
        if os.path.isfile(newpath) and f.lower() == filename:
            return newpath


def download_multi(url, target_dir, extra_args):
    """Download multiple files from url to target_dir."""
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    print("Downloading %s ..." % url)
    ret_code = os.system("wget -c " + url + ' ' + extra_args + " -P " +
                         target_dir)
    return ret_code


def download(url, md5sum, target_dir):
    """Download file from url to target_dir, and check md5sum."""
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    filepath = os.path.join(target_dir, url.split("/")[-1])
    if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
        print("Downloading %s ..." % url)
        os.system("wget -c " + url + " -P " + target_dir)
        print("\nMD5 Chesksum %s ..." % filepath)
        if not md5file(filepath) == md5sum:
            raise RuntimeError("MD5 checksum failed.")
    else:
        print("File exists, skip downloading. (%s)" % filepath)
    return filepath


def check_md5sum(filepath: Text, md5sum: Text) -> bool:
    """check md5sum of file.

    Args:
        filepath (Text): [description]
        md5sum (Text): [description]

    Returns:
        bool: same or not.
    """
    return md5file(filepath) == md5sum


def unpack(filepath, target_dir, rm_tar=False):
    """Unpack the file to the target_dir."""
    print("Unpacking %s ..." % filepath)
    tar = tarfile.open(filepath)
    tar.extractall(target_dir)
    tar.close()
    if rm_tar:
        os.remove(filepath)


def unzip(filepath, target_dir, rm_tar=False):
    """Unzip the file to the target_dir."""
    print("Unpacking %s ..." % filepath)
    tar = zipfile.ZipFile(filepath, 'r')
    tar.extractall(target_dir)
    tar.close()
    if rm_tar:
        os.remove(filepath)


================================================
FILE: paddlespeech/dataset/s2t/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# s2t utils binaries.
from .avg_model import main as avg_ckpts_main
from .build_vocab import main as build_vocab_main
from .compute_mean_std import main as compute_mean_std_main
from .compute_wer import main as compute_wer_main
from .format_data import main as format_data_main
from .format_rsl import main as format_rsl_main


================================================
FILE: paddlespeech/dataset/s2t/avg_model.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import glob
import json
import os

import numpy as np
import paddle


def average_checkpoints(dst_model="",
                        ckpt_dir="",
                        val_best=True,
                        num=5,
                        min_epoch=0,
                        max_epoch=65536):
    paddle.set_device('cpu')

    val_scores = []
    jsons = glob.glob(f'{ckpt_dir}/[!train]*.json')
    jsons = sorted(jsons, key=os.path.getmtime, reverse=True)
    for y in jsons:
        with open(y, 'r') as f:
            dic_json = json.load(f)
        loss = dic_json['val_loss']
        epoch = dic_json['epoch']
        if epoch >= min_epoch and epoch <= max_epoch:
            val_scores.append((epoch, loss))
    assert val_scores, f"Not find any valid checkpoints: {val_scores}"
    val_scores = np.array(val_scores)

    if val_best:
        sort_idx = np.argsort(val_scores[:, 1])
        sorted_val_scores = val_scores[sort_idx]
    else:
        sorted_val_scores = val_scores

    beat_val_scores = sorted_val_scores[:num, 1]
    selected_epochs = sorted_val_scores[:num, 0].astype(np.int64)
    avg_val_score = np.mean(beat_val_scores)
    print("selected val scores = " + str(beat_val_scores))
    print("selected epochs = " + str(selected_epochs))
    print("averaged val score = " + str(avg_val_score))

    path_list = [
        ckpt_dir + '/{}.pdparams'.format(int(epoch))
        for epoch in sorted_val_scores[:num, 0]
    ]
    print(path_list)

    avg = None
    num = num
    assert num == len(path_list)
    for path in path_list:
        print(f'Processing {path}')
        states = paddle.load(path)
        if avg is None:
            avg = states
        else:
            for k in avg.keys():
                avg[k] += states[k]
    # average
    for k in avg.keys():
        if avg[k] is not None:
            avg[k] /= num

    paddle.save(avg, dst_model)
    print(f'Saving to {dst_model}')

    meta_path = os.path.splitext(dst_model)[0] + '.avg.json'
    with open(meta_path, 'w') as f:
        data = json.dumps({
            "mode": 'val_best' if val_best else 'latest',
            "avg_ckpt": dst_model,
            "val_loss_mean": avg_val_score,
            "ckpts": path_list,
            "epochs": selected_epochs.tolist(),
            "val_losses": beat_val_scores.tolist(),
        })
        f.write(data + "\n")


def define_argparse():
    parser = argparse.ArgumentParser(description='average model')
    parser.add_argument('--dst_model', required=True, help='averaged model')
    parser.add_argument(
        '--ckpt_dir', required=True, help='ckpt model dir for average')
    parser.add_argument(
        '--val_best', action="store_true", help='averaged model')
    parser.add_argument(
        '--num', default=5, type=int, help='nums for averaged model')
    parser.add_argument(
        '--min_epoch',
        default=0,
        type=int,
        help='min epoch used for averaging model')
    parser.add_argument(
        '--max_epoch',
        default=65536,  # Big enough
        type=int,
        help='max epoch used for averaging model')

    args = parser.parse_args()
    print(args)
    return args


def main():
    args = define_argparse()
    average_checkpoints(
        dst_model=args.dst_model,
        ckpt_dir=args.ckpt_dir,
        val_best=args.val_best,
        num=args.num,
        min_epoch=args.min_epoch,
        max_epoch=args.max_epoch)


if __name__ == '__main__':
    main()


================================================
FILE: paddlespeech/dataset/s2t/build_vocab.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Build vocabulary from manifest files.
Each item in vocabulary file is a character.
"""
import argparse
import functools
import os
import tempfile
from collections import Counter

import jsonlines

from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.frontend.utility import BLANK
from paddlespeech.s2t.frontend.utility import SOS
from paddlespeech.s2t.frontend.utility import SPACE
from paddlespeech.s2t.frontend.utility import UNK
from paddlespeech.utils.argparse import add_arguments
from paddlespeech.utils.argparse import print_arguments


def count_manifest(counter, text_feature, manifest_path):
    manifest_jsons = []
    with jsonlines.open(manifest_path, 'r') as reader:
        for json_data in reader:
            manifest_jsons.append(json_data)

    for line_json in manifest_jsons:
        if isinstance(line_json['text'], str):
            tokens = text_feature.tokenize(
                line_json['text'], replace_space=False)

            counter.update(tokens)
        else:
            assert isinstance(line_json['text'], list)
            for text in line_json['text']:
                tokens = text_feature.tokenize(text, replace_space=False)
                counter.update(tokens)


def dump_text_manifest(fileobj, manifest_path, key='text'):
    manifest_jsons = []
    with jsonlines.open(manifest_path, 'r') as reader:
        for json_data in reader:
            manifest_jsons.append(json_data)

    for line_json in manifest_jsons:
        if isinstance(line_json[key], str):
            fileobj.write(line_json[key] + "\n")
        else:
            assert isinstance(line_json[key], list)
            for line in line_json[key]:
                fileobj.write(line + "\n")


def build_vocab(manifest_paths="",
                vocab_path="examples/librispeech/data/vocab.txt",
                unit_type="char",
                count_threshold=0,
                text_keys='text',
                spm_mode="unigram",
                spm_vocab_size=0,
                spm_model_prefix="",
                spm_character_coverage=0.9995):
    manifest_paths = [manifest_paths] if isinstance(manifest_paths,
                                                    str) else manifest_paths

    fout = open(vocab_path, 'w', encoding='utf-8')
    fout.write(BLANK + "\n")  # 0 will be used for "blank" in CTC
    fout.write(UNK + '\n')  # <unk> must be 1

    if unit_type == 'spm':
        # tools/spm_train --input=$wave_data/lang_char/input.txt
        # --vocab_size=${nbpe} --model_type=${bpemode}
        # --model_prefix=${bpemodel} --input_sentence_size=100000000
        import sentencepiece as spm

        fp = tempfile.NamedTemporaryFile(mode='w', delete=False)
        for manifest_path in manifest_paths:
            _text_keys = [text_keys] if type(
                text_keys) is not list else text_keys
            for text_key in _text_keys:
                dump_text_manifest(fp, manifest_path, key=text_key)
        fp.close()
        # train
        spm.SentencePieceTrainer.Train(
            input=fp.name,
            vocab_size=spm_vocab_size,
            model_type=spm_mode,
            model_prefix=spm_model_prefix,
            input_sentence_size=100000000,
            character_coverage=spm_character_coverage)
        os.unlink(fp.name)

    # encode
    text_feature = TextFeaturizer(unit_type, "", spm_model_prefix)
    counter = Counter()

    for manifest_path in manifest_paths:
        count_manifest(counter, text_feature, manifest_path)

    count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True)
    tokens = []
    for token, count in count_sorted:
        if count < count_threshold:
            break
        # replace space by `<space>`
        token = SPACE if token == ' ' else token
        tokens.append(token)

    tokens = sorted(tokens)
    for token in tokens:
        fout.write(token + '\n')

    fout.write(SOS + "\n")  # <sos/eos>
    fout.close()


def define_argparse():
    parser = argparse.ArgumentParser(description=__doc__)
    add_arg = functools.partial(add_arguments, argparser=parser)

    # yapf: disable
    add_arg('unit_type', str, "char", "Unit type, e.g. char, word, spm")
    add_arg('count_threshold', int, 0,
            "Truncation threshold for char/word counts.Default 0, no truncate.")
    add_arg('vocab_path', str,
            'examples/librispeech/data/vocab.txt',
            "Filepath to write the vocabulary.")
    add_arg('manifest_paths', str,
            None,
            "Filepaths of manifests for building vocabulary. "
            "You can provide multiple manifest files.",
            nargs='+',
            required=True)
    add_arg('text_keys', str,
            'text',
            "keys of the text in manifest for building vocabulary. "
            "You can provide multiple k.",
            nargs='+')
    # bpe
    add_arg('spm_vocab_size', int, 0, "Vocab size for spm.")
    add_arg('spm_mode', str, 'unigram', "spm model type, e.g. unigram, spm, char, word. only need when `unit_type` is spm")
    add_arg('spm_model_prefix', str, "", "spm_model_%(spm_mode)_%(count_threshold), spm model prefix, only need when `unit_type` is spm")
    add_arg('spm_character_coverage', float, 0.9995, "character coverage to determine the minimum symbols")
    # yapf: disable

    args = parser.parse_args()
    return args

def main():
    args = define_argparse()
    print_arguments(args, globals())
    build_vocab(**vars(args))

if __name__ == '__main__':
    main()


================================================
FILE: paddlespeech/dataset/s2t/compute_mean_std.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compute mean and std for feature normalizer, and save to file."""
import argparse
import functools

from paddlespeech.s2t.frontend.augmentor.augmentation import AugmentationPipeline
from paddlespeech.s2t.frontend.featurizer.audio_featurizer import AudioFeaturizer
from paddlespeech.s2t.frontend.normalizer import FeatureNormalizer
from paddlespeech.utils.argparse import add_arguments
from paddlespeech.utils.argparse import print_arguments


def compute_cmvn(manifest_path="data/librispeech/manifest.train",
                 output_path="data/librispeech/mean_std.npz",
                 num_samples=2000,
                 num_workers=0,
                 spectrum_type="linear",
                 feat_dim=13,
                 delta_delta=False,
                 stride_ms=10,
                 window_ms=20,
                 sample_rate=16000,
                 use_dB_normalization=True,
                 target_dB=-20):

    augmentation_pipeline = AugmentationPipeline('{}')
    audio_featurizer = AudioFeaturizer(
        spectrum_type=spectrum_type,
        feat_dim=feat_dim,
        delta_delta=delta_delta,
        stride_ms=float(stride_ms),
        window_ms=float(window_ms),
        n_fft=None,
        max_freq=None,
        target_sample_rate=sample_rate,
        use_dB_normalization=use_dB_normalization,
        target_dB=target_dB,
        dither=0.0)

    def augment_and_featurize(audio_segment):
        augmentation_pipeline.transform_audio(audio_segment)
        return audio_featurizer.featurize(audio_segment)

    normalizer = FeatureNormalizer(
        mean_std_filepath=None,
        manifest_path=manifest_path,
        featurize_func=augment_and_featurize,
        num_samples=num_samples,
        num_workers=num_workers)
    normalizer.write_to_file(output_path)


def define_argparse():
    parser = argparse.ArgumentParser(description=__doc__)
    add_arg = functools.partial(add_arguments, argparser=parser)

    # yapf: disable
    add_arg('manifest_path', str,
            'data/librispeech/manifest.train',
            "Filepath of manifest to compute normalizer's mean and stddev.")

    add_arg('output_path', str,
            'data/librispeech/mean_std.npz',
            "Filepath of write mean and stddev to (.npz).")
    add_arg('num_samples',  int,    2000,    "# of samples to for statistics.")
    add_arg('num_workers',
                            default=0,
                            type=int,
                            help='num of subprocess workers for processing')


    add_arg('spectrum_type', str,
            'linear',
            "Audio feature type. Options: linear, mfcc, fbank.",
            choices=['linear', 'mfcc', 'fbank'])
    add_arg('feat_dim', int, 13, "Audio feature dim.")
    add_arg('delta_delta', bool,  False, "Audio feature with delta delta.")
    add_arg('stride_ms', int, 10,  "stride length in ms.")
    add_arg('window_ms', int, 20,  "stride length in ms.")
    add_arg('sample_rate',  int, 16000,  "target sample rate.")
    add_arg('use_dB_normalization', bool, True, "do dB normalization.")
    add_arg('target_dB',   int, -20,  "target dB.")
    # yapf: disable

    args = parser.parse_args()
    return args

def main():
    args = define_argparse()
    print_arguments(args, globals())
    compute_cmvn(**vars(args))

if __name__ == '__main__':
    main()


================================================
FILE: paddlespeech/dataset/s2t/compute_wer.py
================================================
# Copyright 2021 Mobvoi Inc. All Rights Reserved.
# flake8: noqa
import codecs
import re
import sys
import unicodedata

remove_tag = True
spacelist = [' ', '\t', '\r', '\n']
puncts = [
    '!', ',', '?', '、', '。', '！', '，', '；', '？', '：', '「', '」', '︰', '『', '』',
    '《', '》'
]


def characterize(string):
    res = []
    i = 0
    while i < len(string):
        char = string[i]
        if char in puncts:
            i += 1
            continue
        cat1 = unicodedata.category(char)
        #https://unicodebook.readthedocs.io/unicode.html#unicode-categories
        if cat1 == 'Zs' or cat1 == 'Cn' or char in spacelist:  # space or not assigned
            i += 1
            continue
        if cat1 == 'Lo':  # letter-other
            res.append(char)
            i += 1
        else:
            # some input looks like: <unk><noise>, we want to separate it to two words.
            sep = ' '
            if char == '<': sep = '>'
            j = i + 1
            while j < len(string):
                c = string[j]
                if ord(c) >= 128 or (c in spacelist) or (c == sep):
                    break
                j += 1
            if j < len(string) and string[j] == '>':
                j += 1
            res.append(string[i:j])
            i = j
    return res


def stripoff_tags(x):
    if not x: return ''
    chars = []
    i = 0
    T = len(x)
    while i < T:
        if x[i] == '<':
            while i < T and x[i] != '>':
                i += 1
            i += 1
        else:
            chars.append(x[i])
            i += 1
    return ''.join(chars)


def normalize(sentence, ignore_words, cs, split=None):
    """ sentence, ignore_words are both in unicode
    """
    new_sentence = []
    for token in sentence:
        x = token
        if not cs:
            x = x.upper()
        if x in ignore_words:
            continue
        if remove_tag:
            x = stripoff_tags(x)
        if not x:
            continue
        if split and x in split:
            new_sentence += split[x]
        else:
            new_sentence.append(x)
    return new_sentence


class Calculator:
    def __init__(self):
        self.data = {}
        self.space = []
        self.cost = {}
        self.cost['cor'] = 0
        self.cost['sub'] = 1
        self.cost['del'] = 1
        self.cost['ins'] = 1

    def calculate(self, lab, rec):
        # Initialization
        lab.insert(0, '')
        rec.insert(0, '')
        while len(self.space) < len(lab):
            self.space.append([])
        for row in self.space:
            for element in row:
                element['dist'] = 0
                element['error'] = 'non'
            while len(row) < len(rec):
                row.append({'dist': 0, 'error': 'non'})
        for i in range(len(lab)):
            self.space[i][0]['dist'] = i
            self.space[i][0]['error'] = 'del'
        for j in range(len(rec)):
            self.space[0][j]['dist'] = j
            self.space[0][j]['error'] = 'ins'
        self.space[0][0]['error'] = 'non'
        for token in lab:
            if token not in self.data and len(token) > 0:
                self.data[token] = {
                    'all': 0,
                    'cor': 0,
                    'sub': 0,
                    'ins': 0,
                    'del': 0
                }
        for token in rec:
            if token not in self.data and len(token) > 0:
                self.data[token] = {
                    'all': 0,
                    'cor': 0,
                    'sub': 0,
                    'ins': 0,
                    'del': 0
                }
        # Computing edit distance
        for i, lab_token in enumerate(lab):
            for j, rec_token in enumerate(rec):
                if i == 0 or j == 0:
                    continue
                min_dist = sys.maxsize
                min_error = 'none'
                dist = self.space[i - 1][j]['dist'] + self.cost['del']
                error = 'del'
                if dist < min_dist:
                    min_dist = dist
                    min_error = error
                dist = self.space[i][j - 1]['dist'] + self.cost['ins']
                error = 'ins'
                if dist < min_dist:
                    min_dist = dist
                    min_error = error
                if lab_token == rec_token:
                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['cor']
                    error = 'cor'
                else:
                    dist = self.space[i - 1][j - 1]['dist'] + self.cost['sub']
                    error = 'sub'
                if dist < min_dist:
                    min_dist = dist
                    min_error = error
                self.space[i][j]['dist'] = min_dist
                self.space[i][j]['error'] = min_error
        # Tracing back
        result = {
            'lab': [],
            'rec': [],
            'all': 0,
            'cor': 0,
            'sub': 0,
            'ins': 0,
            'del': 0
        }
        i = len(lab) - 1
        j = len(rec) - 1
        while True:
            if self.space[i][j]['error'] == 'cor':  # correct
                if len(lab[i]) > 0:
                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
                    self.data[lab[i]]['cor'] = self.data[lab[i]]['cor'] + 1
                    result['all'] = result['all'] + 1
                    result['cor'] = result['cor'] + 1
                result['lab'].insert(0, lab[i])
                result['rec'].insert(0, rec[j])
                i = i - 1
                j = j - 1
            elif self.space[i][j]['error'] == 'sub':  # substitution
                if len(lab[i]) > 0:
                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
                    self.data[lab[i]]['sub'] = self.data[lab[i]]['sub'] + 1
                    result['all'] = result['all'] + 1
                    result['sub'] = result['sub'] + 1
                result['lab'].insert(0, lab[i])
                result['rec'].insert(0, rec[j])
                i = i - 1
                j = j - 1
            elif self.space[i][j]['error'] == 'del':  # deletion
                if len(lab[i]) > 0:
                    self.data[lab[i]]['all'] = self.data[lab[i]]['all'] + 1
                    self.data[lab[i]]['del'] = self.data[lab[i]]['del'] + 1
                    result['all'] = result['all'] + 1
                    result['del'] = result['del'] + 1
                result['lab'].insert(0, lab[i])
                result['rec'].insert(0, "")
                i = i - 1
            elif self.space[i][j]['error'] == 'ins':  # insertion
                if len(rec[j]) > 0:
                    self.data[rec[j]]['ins'] = self.data[rec[j]]['ins'] + 1
                    result['ins'] = result['ins'] + 1
                result['lab'].insert(0, "")
                result['rec'].insert(0, rec[j])
                j = j - 1
            elif self.space[i][j]['error'] == 'non':  # starting point
                break
            else:  # shouldn't reach here
                print(
                    'this should not happen , i = {i} , j = {j} , error = {error}'.
                    format(i=i, j=j, error=self.space[i][j]['error']))
        return result

    def overall(self):
        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
        for token in self.data:
            result['all'] = result['all'] + self.data[token]['all']
            result['cor'] = result['cor'] + self.data[token]['cor']
            result['sub'] = result['sub'] + self.data[token]['sub']
            result['ins'] = result['ins'] + self.data[token]['ins']
            result['del'] = result['del'] + self.data[token]['del']
        return result

    def cluster(self, data):
        result = {'all': 0, 'cor': 0, 'sub': 0, 'ins': 0, 'del': 0}
        for token in data:
            if token in self.data:
                result['all'] = result['all'] + self.data[token]['all']
                result['cor'] = result['cor'] + self.data[token]['cor']
                result['sub'] = result['sub'] + self.data[token]['sub']
                result['ins'] = result['ins'] + self.data[token]['ins']
                result['del'] = result['del'] + self.data[token]['del']
        return result

    def keys(self):
        return list(self.data.keys())


def width(string):
    return sum(1 + (unicodedata.east_asian_width(c) in "AFW") for c in string)


def default_cluster(word):
    unicode_names = [unicodedata.name(char) for char in word]
    for i in reversed(range(len(unicode_names))):
        if unicode_names[i].startswith('DIGIT'):  # 1
            unicode_names[i] = 'Number'  # 'DIGIT'
        elif (unicode_names[i].startswith('CJK UNIFIED IDEOGRAPH') or
              unicode_names[i].startswith('CJK COMPATIBILITY IDEOGRAPH')):
            # 明 / 郎
            unicode_names[i] = 'Mandarin'  # 'CJK IDEOGRAPH'
        elif (unicode_names[i].startswith('LATIN CAPITAL LETTER') or
              unicode_names[i].startswith('LATIN SMALL LETTER')):
            # A / a
            unicode_names[i] = 'English'  # 'LATIN LETTER'
        elif unicode_names[i].startswith('HIRAGANA LETTER'):  # は こ め
            unicode_names[i] = 'Japanese'  # 'GANA LETTER'
        elif (unicode_names[i].startswith('AMPERSAND') or
              unicode_names[i].startswith('APOSTROPHE') or
              unicode_names[i].startswith('COMMERCIAL AT') or
              unicode_names[i].startswith('DEGREE CELSIUS') or
              unicode_names[i].startswith('EQUALS SIGN') or
              unicode_names[i].startswith('FULL STOP') or
              unicode_names[i].startswith('HYPHEN-MINUS') or
              unicode_names[i].startswith('LOW LINE') or
              unicode_names[i].startswith('NUMBER SIGN') or
              unicode_names[i].startswith('PLUS SIGN') or
              unicode_names[i].startswith('SEMICOLON')):
            # & / ' / @ / ℃ / = / . / - / _ / # / + / ;
            del unicode_names[i]
        else:
            return 'Other'
    if len(unicode_names) == 0:
        return 'Other'
    if len(unicode_names) == 1:
        return unicode_names[0]
    for i in range(len(unicode_names) - 1):
        if unicode_names[i] != unicode_names[i + 1]:
            return 'Other'
    return unicode_names[0]


def usage():
    print(
        "compute-wer.py : compute word error rate (WER) and align recognition results and references."
    )
    print(
        "         usage : python compute-wer.py [--cs={0,1}] [--cluster=foo] [--ig=ignore_file] [--char={0,1}] [--v={0,1}] [--padding-symbol={space,underline}] test.ref test.hyp > test.wer"
    )


def main():
    # python utils/compute-wer.py --char=1 --v=1 ref hyp > rsl.error
    if len(sys.argv) == 1:
        usage()
        sys.exit(0)
    calculator = Calculator()
    cluster_file = ''
    ignore_words = set()
    tochar = False
    verbose = 1
    padding_symbol = ' '
    case_sensitive = False
    max_words_per_line = sys.maxsize
    split = None
    while len(sys.argv) > 3:
        a = '--maxw='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):]
            del sys.argv[1]
            max_words_per_line = int(b)
            continue
        a = '--rt='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):].lower()
            del sys.argv[1]
            remove_tag = (b == 'true') or (b != '0')
            continue
        a = '--cs='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):].lower()
            del sys.argv[1]
            case_sensitive = (b == 'true') or (b != '0')
            continue
        a = '--cluster='
        if sys.argv[1].startswith(a):
            cluster_file = sys.argv[1][len(a):]
            del sys.argv[1]
            continue
        a = '--splitfile='
        if sys.argv[1].startswith(a):
            split_file = sys.argv[1][len(a):]
            del sys.argv[1]
            split = dict()
            with codecs.open(split_file, 'r', 'utf-8') as fh:
                for line in fh:  # line in unicode
                    words = line.strip().split()
                    if len(words) >= 2:
                        split[words[0]] = words[1:]
            continue
        a = '--ig='
        if sys.argv[1].startswith(a):
            ignore_file = sys.argv[1][len(a):]
            del sys.argv[1]
            with codecs.open(ignore_file, 'r', 'utf-8') as fh:
                for line in fh:  # line in unicode
                    line = line.strip()
                    if len(line) > 0:
                        ignore_words.add(line)
            continue
        a = '--char='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):].lower()
            del sys.argv[1]
            tochar = (b == 'true') or (b != '0')
            continue
        a = '--v='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):].lower()
            del sys.argv[1]
            verbose = 0
            try:
                verbose = int(b)
            except:
                if b == 'true' or b != '0':
                    verbose = 1
            continue
        a = '--padding-symbol='
        if sys.argv[1].startswith(a):
            b = sys.argv[1][len(a):].lower()
            del sys.argv[1]
            if b == 'space':
                padding_symbol = ' '
            elif b == 'underline':
                padding_symbol = '_'
            continue
        if True or sys.argv[1].startswith('-'):
            #ignore invalid switch
            del sys.argv[1]
            continue

    if not case_sensitive:
        ig = set([w.upper() for w in ignore_words])
        ignore_words = ig

    default_clusters = {}
    default_words = {}

    ref_file = sys.argv[1]
    hyp_file = sys.argv[2]
    rec_set = {}
    if split and not case_sensitive:
        newsplit = dict()
        for w in split:
            words = split[w]
            for i in range(len(words)):
                words[i] = words[i].upper()
            newsplit[w.upper()] = words
        split = newsplit

    with codecs.open(hyp_file, 'r', 'utf-8') as fh:
        for line in fh:
            if tochar:
                array = characterize(line)
            else:
                array = line.strip().split()
            if len(array) == 0: continue
            fid = array[0]
            rec_set[fid] = normalize(array[1:], ignore_words, case_sensitive,
                                     split)

    # compute error rate on the interaction of reference file and hyp file
    for line in open(ref_file, 'r', encoding='utf-8'):
        if tochar:
            array = characterize(line)
        else:
            array = line.rstrip('\n').split()
        if len(array) == 0: continue
        fid = array[0]
        if fid not in rec_set:
            continue
        lab = normalize(array[1:], ignore_words, case_sensitive, split)
        rec = rec_set[fid]
        if verbose:
            print('\nutt: %s' % fid)

        for word in rec + lab:
            if word not in default_words:
                default_cluster_name = default_cluster(word)
                if default_cluster_name not in default_clusters:
                    default_clusters[default_cluster_name] = {}
                if word not in default_clusters[default_cluster_name]:
                    default_clusters[default_cluster_name][word] = 1
                default_words[word] = default_cluster_name

        result = calculator.calculate(lab, rec)
        if verbose:
            if result['all'] != 0:
                wer = float(result['ins'] + result['sub'] + result[
                    'del']) * 100.0 / result['all']
            else:
                wer = 0.0
            print('WER: %4.2f %%' % wer, end=' ')
            print('N=%d C=%d S=%d D=%d I=%d' %
                  (result['all'], result['cor'], result['sub'], result['del'],
                   result['ins']))
            space = {}
            space['lab'] = []
            space['rec'] = []
            for idx in range(len(result['lab'])):
                len_lab = width(result['lab'][idx])
                len_rec = width(result['rec'][idx])
                length = max(len_lab, len_rec)
                space['lab'].append(length - len_lab)
                space['rec'].append(length - len_rec)
            upper_lab = len(result['lab'])
            upper_rec = len(result['rec'])
            lab1, rec1 = 0, 0
            while lab1 < upper_lab or rec1 < upper_rec:
                if verbose > 1:
                    print('lab(%s):' % fid.encode('utf-8'), end=' ')
                else:
                    print('lab:', end=' ')
                lab2 = min(upper_lab, lab1 + max_words_per_line)
                for idx in range(lab1, lab2):
                    token = result['lab'][idx]
                    print('{token}'.format(token=token), end='')
                    for n in range(space['lab'][idx]):
                        print(padding_symbol, end='')
                    print(' ', end='')
                print()
                if verbose > 1:
                    print('rec(%s):' % fid.encode('utf-8'), end=' ')
                else:
                    print('rec:', end=' ')
                rec2 = min(upper_rec, rec1 + max_words_per_line)
                for idx in range(rec1, rec2):
                    token = result['rec'][idx]
                    print('{token}'.format(token=token), end='')
                    for n in range(space['rec'][idx]):
                        print(padding_symbol, end='')
                    print(' ', end='')
                print('\n', end='\n')
                lab1 = lab2
                rec1 = rec2

    if verbose:
        print(
            '==========================================================================='
        )
        print()

    result = calculator.overall()
    if result['all'] != 0:
        wer = float(result['ins'] + result['sub'] + result[
            'del']) * 100.0 / result['all']
    else:
        wer = 0.0
    print('Overall -> %4.2f %%' % wer, end=' ')
    print('N=%d C=%d S=%d D=%d I=%d' %
          (result['all'], result['cor'], result['sub'], result['del'],
           result['ins']))
    if not verbose:
        print()

    if verbose:
        for cluster_id in default_clusters:
            result = calculator.cluster(
                [k for k in default_clusters[cluster_id]])
            if result['all'] != 0:
                wer = float(result['ins'] + result['sub'] + result[
                    'del']) * 100.0 / result['all']
            else:
                wer = 0.0
            print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
            print('N=%d C=%d S=%d D=%d I=%d' %
                  (result['all'], result['cor'], result['sub'], result['del'],
                   result['ins']))
        if len(cluster_file) > 0:  # compute separated WERs for word clusters
            cluster_id = ''
            cluster = []
            for line in open(cluster_file, 'r', encoding='utf-8'):
                for token in line.decode('utf-8').rstrip('\n').split():
                    # end of cluster reached, like </Keyword>
                    if token[0:2] == '</' and token[len(token)-1] == '>' and \
                       token.lstrip('</').rstrip('>') == cluster_id :
                        result = calculator.cluster(cluster)
                        if result['all'] != 0:
                            wer = float(result['ins'] + result['sub'] + result[
                                'del']) * 100.0 / result['all']
                        else:
                            wer = 0.0
                        print('%s -> %4.2f %%' % (cluster_id, wer), end=' ')
                        print('N=%d C=%d S=%d D=%d I=%d' %
                              (result['all'], result['cor'], result['sub'],
                               result['del'], result['ins']))
                        cluster_id = ''
                        cluster = []
                    # begin of cluster reached, like <Keyword>
                    elif token[0] == '<' and token[len(token)-1] == '>' and \
                         cluster_id == '' :
                        cluster_id = token.lstrip('<').rstrip('>')
                        cluster = []
                    # general terms, like WEATHER / CAR / ...
                    else:
                        cluster.append(token)
        print()
        print(
            '==========================================================================='
        )


if __name__ == '__main__':
    main()


================================================
FILE: paddlespeech/dataset/s2t/format_data.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""format manifest with more metadata."""
import argparse
import functools
import json

import jsonlines

from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.frontend.utility import load_cmvn
from paddlespeech.s2t.io.utility import feat_type
from paddlespeech.utils.argparse import add_arguments
from paddlespeech.utils.argparse import print_arguments


def define_argparse():
    parser = argparse.ArgumentParser(description=__doc__)
    add_arg = functools.partial(add_arguments, argparser=parser)
    # yapf: disable
    add_arg('manifest_paths',   str,
            None,
            "Filepaths of manifests for building vocabulary. "
            "You can provide multiple manifest files.",
            nargs='+',
            required=True)
    add_arg('output_path',  str, None, "filepath of formated manifest.", required=True)
    add_arg('cmvn_path',       str,
            'examples/librispeech/data/mean_std.json',
            "Filepath of cmvn.")
    add_arg('unit_type', str, "char", "Unit type, e.g. char, word, spm")
    add_arg('vocab_path',       str,
            'examples/librispeech/data/vocab.txt',
            "Filepath of the vocabulary.")
    # bpe
    add_arg('spm_model_prefix', str, None,
        "spm model prefix, spm_model_%(bpe_mode)_%(count_threshold), only need when `unit_type` is spm")

    # yapf: disable
    args = parser.parse_args()
    return args

def format_data(
    manifest_paths="",
    output_path="",
    cmvn_path="examples/librispeech/data/mean_std.json",
    unit_type="char",
    vocab_path="examples/librispeech/data/vocab.txt",
    spm_model_prefix=""):
    manifest_paths = [manifest_paths] if isinstance(manifest_paths, str) else manifest_paths

    fout = open(output_path, 'w', encoding='utf-8')

    # get feat dim
    filetype = cmvn_path.split(".")[-1]
    mean, istd = load_cmvn(cmvn_path, filetype=filetype)
    feat_dim = mean.shape[0] #(D)
    print(f"Feature dim: {feat_dim}")

    text_feature = TextFeaturizer(unit_type, vocab_path, spm_model_prefix)
    vocab_size = text_feature.vocab_size
    print(f"Vocab size: {vocab_size}")

    # josnline like this
    # {
    #   "input": [{"name": "input1", "shape": (100, 83), "feat": "xxx.ark:123"}],
    #   "output": [{"name":"target1", "shape": (40, 5002), "text": "a b c de"}],
    #   "utt2spk": "111-2222",
    #   "utt": "111-2222-333"
    # }
    count = 0
    for manifest_path in manifest_paths:
        with jsonlines.open(str(manifest_path), 'r') as reader:
            manifest_jsons = list(reader)

        for line_json in manifest_jsons:
            output_json = {
                "input": [],
                "output": [],
                'utt': line_json['utt'],
                'utt2spk': line_json.get('utt2spk', 'global'),
            }

            # output
            line = line_json['text']
            if isinstance(line, str):
                # only one target
                tokens = text_feature.tokenize(line)
                tokenids = text_feature.featurize(line)
                output_json['output'].append({
                    'name': 'target1',
                    'shape': (len(tokenids), vocab_size),
                    'text': line,
                    'token': ' '.join(tokens),
                    'tokenid': ' '.join(map(str, tokenids)),
                })
            else:
                # isinstance(line, list), multi target in one vocab
                for i, item in enumerate(line, 1):
                    tokens = text_feature.tokenize(item)
                    tokenids = text_feature.featurize(item)
                    output_json['output'].append({
                        'name': f'target{i}',
                        'shape': (len(tokenids), vocab_size),
                        'text': item,
                        'token': ' '.join(tokens),
                        'tokenid': ' '.join(map(str, tokenids)),
                    })

            # input
            line = line_json['feat']
            if isinstance(line, str):
                # only one input
                feat_shape = line_json['feat_shape']
                assert isinstance(feat_shape, (list, tuple)), type(feat_shape)
                filetype = feat_type(line)
                if filetype == 'sound':
                    feat_shape.append(feat_dim)
                else: # kaldi
                    raise NotImplementedError('no support kaldi feat now!')

                output_json['input'].append({
                    "name": "input1",
                    "shape": feat_shape,
                    "feat": line,
                    "filetype": filetype,
                })
            else:
                # isinstance(line, list), multi input 
                raise NotImplementedError("not support multi input now!")

            fout.write(json.dumps(output_json) + '\n')
            count += 1

    print(f"{manifest_paths} Examples number: {count}")
    fout.close()

def main():
    args = define_argparse()
    print_arguments(args, globals())
    format_data(**vars(args))

if __name__ == '__main__':
    main()


================================================
FILE: paddlespeech/dataset/s2t/format_rsl.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
format ref/hyp file for `utt text` format to compute CER/WER/MER.

norm:
BAC009S0764W0196 明确了发展目标和重点任务
BAC009S0764W0186 实现我国房地产市场的平稳运行


sclite:
加大对结构机械化环境和收集谈控机制力度(BAC009S0906W0240.wav)
河南省新乡市丰秋县刘光镇政府东五零左右(BAC009S0770W0441.wav)
"""
import argparse

import jsonlines

from paddlespeech.utils.argparse import print_arguments


def transform_hyp(origin, trans, trans_sclite):
    """
    Args:
        origin: The input json file which contains the model output
        trans: The output file for caculate CER/WER
        trans_sclite: The output file for caculate CER/WER using sclite
    """
    input_dict = {}

    with open(origin, "r+", encoding="utf8") as f:
        for item in jsonlines.Reader(f):
            input_dict[item["utt"]] = item["hyps"][0]

    if trans:
        with open(trans, "w+", encoding="utf8") as f:
            for key in input_dict.keys():
                f.write(key + " " + input_dict[key] + "\n")
        print(f"transform_hyp output: {trans}")

    if trans_sclite:
        with open(trans_sclite, "w+") as f:
            for key in input_dict.keys():
                line = input_dict[key] + "(" + key + ".wav" + ")" + "\n"
                f.write(line)
        print(f"transform_hyp output: {trans_sclite}")


def transform_ref(origin, trans, trans_sclite):
    """
    Args:
        origin: The input json file which contains the model output
        trans: The output file for caculate CER/WER
        trans_sclite: The output file for caculate CER/WER using sclite
    """
    input_dict = {}

    with open(origin, "r", encoding="utf8") as f:
        for item in jsonlines.Reader(f):
            input_dict[item["utt"]] = item["text"]

    if trans:
        with open(trans, "w", encoding="utf8") as f:
            for key in input_dict.keys():
                f.write(key + " " + input_dict[key] + "\n")
        print(f"transform_hyp output: {trans}")

    if trans_sclite:
        with open(trans_sclite, "w") as f:
            for key in input_dict.keys():
                line = input_dict[key] + "(" + key + ".wav" + ")" + "\n"
                f.write(line)
        print(f"transform_hyp output: {trans_sclite}")


def define_argparse():
    parser = argparse.ArgumentParser(
        prog='format ref/hyp file for compute CER/WER', add_help=True)
    parser.add_argument(
        '--origin_hyp', type=str, default="", help='origin hyp file')
    parser.add_argument(
        '--trans_hyp',
        type=str,
        default="",
        help='hyp file for caculating CER/WER')
    parser.add_argument(
        '--trans_hyp_sclite',
        type=str,
        default="",
        help='hyp file for caculating CER/WER by sclite')

    parser.add_argument(
        '--origin_ref', type=str, default="", help='origin ref file')
    parser.add_argument(
        '--trans_ref',
        type=str,
        default="",
        help='ref file for caculating CER/WER')
    parser.add_argument(
        '--trans_ref_sclite',
        type=str,
        default="",
        help='ref file for caculating CER/WER by sclite')
    parser_args = parser.parse_args()
    return parser_args


def format_result(origin_hyp="",
                  trans_hyp="",
                  trans_hyp_sclite="",
                  origin_ref="",
                  trans_ref="",
                  trans_ref_sclite=""):

    if origin_hyp:
        transform_hyp(
            origin=origin_hyp, trans=trans_hyp, trans_sclite=trans_hyp_sclite)

    if origin_ref:
        transform_ref(
            origin=origin_ref, trans=trans_ref, trans_sclite=trans_ref_sclite)


def main():
    args = define_argparse()
    print_arguments(args, globals())

    format_result(**vars(args))


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/kws/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .models.mdtc import MDTC


================================================
FILE: paddlespeech/kws/exps/__init__.py
================================================


================================================
FILE: paddlespeech/kws/exps/mdtc/__init__.py
================================================


================================================
FILE: paddlespeech/kws/exps/mdtc/collate.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time

import paddle


def collate_features(batch):
    # (key, feat, label)
    collate_start = time.time()
    keys = []
    feats = []
    labels = []
    lengths = []
    for sample in batch:
        keys.append(sample[0])
        feats.append(sample[1])
        labels.append(sample[2])
        lengths.append(sample[1].shape[0])

    max_length = max(lengths)
    for i in range(len(feats)):
        feats[i] = paddle.nn.functional.pad(
            feats[i], [0, max_length - feats[i].shape[0], 0, 0],
            data_format='NLC')

    return keys, paddle.stack(feats), paddle.to_tensor(
        labels), paddle.to_tensor(lengths)


================================================
FILE: paddlespeech/kws/exps/mdtc/compute_det.py
================================================
# Copyright (c) 2021 Binbin Zhang(binbzha@qq.com)
#               2022 Shaoqing Yu(954793264@qq.com)
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wekws(https://github.com/wenet-e2e/wekws)
import os

import paddle
from tqdm import tqdm
from yacs.config import CfgNode

from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.dynamic_import import dynamic_import


def load_label_and_score(keyword_index: int,
                         ds: paddle.io.Dataset,
                         score_file: os.PathLike):
    score_table = {}  # {utt_id: scores_over_frames}
    with open(score_file, 'r', encoding='utf8') as fin:
        for line in fin:
            arr = line.strip().split()
            key = arr[0]
            current_keyword = arr[1]
            str_list = arr[2:]
            if int(current_keyword) == keyword_index:
                scores = list(map(float, str_list))
                if key not in score_table:
                    score_table.update({key: scores})
    keyword_table = {}  # scores of keyword utt_id
    filler_table = {}  # scores of non-keyword utt_id
    filler_duration = 0.0

    for key, index, duration in zip(ds.keys, ds.labels, ds.durations):
        assert key in score_table
        if index == keyword_index:
            keyword_table[key] = score_table[key]
        else:
            filler_table[key] = score_table[key]
            filler_duration += duration

    return keyword_table, filler_table, filler_duration


if __name__ == '__main__':
    parser = default_argument_parser()
    parser.add_argument(
        '--keyword_index', type=int, default=0, help='keyword index')
    parser.add_argument(
        '--step',
        type=float,
        default=0.01,
        help='threshold step of trigger score')
    parser.add_argument(
        '--window_shift',
        type=int,
        default=50,
        help='window_shift is used to skip the frames after triggered')
    parser.add_argument(
        "--score_file",
        type=str,
        required=True,
        help='output file of trigger scores')
    parser.add_argument(
        '--stats_file',
        type=str,
        default='./stats.0.txt',
        help='output file of detection error tradeoff')
    args = parser.parse_args()

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    # Dataset
    ds_class = dynamic_import(config['dataset'])
    test_ds = ds_class(
        data_dir=config['data_dir'],
        mode='test',
        feat_type=config['feat_type'],
        sample_rate=config['sample_rate'],
        frame_shift=config['frame_shift'],
        frame_length=config['frame_length'],
        n_mels=config['n_mels'], )

    keyword_table, filler_table, filler_duration = load_label_and_score(
        args.keyword_index, test_ds, args.score_file)
    print('Filler total duration Hours: {}'.format(filler_duration / 3600.0))
    pbar = tqdm(total=int(1.0 / args.step))
    with open(args.stats_file, 'w', encoding='utf8') as fout:
        keyword_index = args.keyword_index
        threshold = 0.0
        while threshold <= 1.0:
            num_false_reject = 0
            # transverse the all keyword_table
            for key, score_list in keyword_table.items():
                # computer positive test sample, use the max score of list.
                score = max(score_list)
                if float(score) < threshold:
                    num_false_reject += 1
            num_false_alarm = 0
            # transverse the all filler_table
            for key, score_list in filler_table.items():
                i = 0
                while i < len(score_list):
                    if score_list[i] >= threshold:
                        num_false_alarm += 1
                        i += args.window_shift
                    else:
                        i += 1
            if len(keyword_table) != 0:
                false_reject_rate = num_false_reject / len(keyword_table)
            num_false_alarm = max(num_false_alarm, 1e-6)
            if filler_duration != 0:
                false_alarm_per_hour = num_false_alarm / \
                    (filler_duration / 3600.0)
            fout.write('{:.6f} {:.6f} {:.6f}\n'.format(
                threshold, false_alarm_per_hour, false_reject_rate))
            threshold += args.step
            pbar.update(1)

    pbar.close()
    print('DET saved to: {}'.format(args.stats_file))


================================================
FILE: paddlespeech/kws/exps/mdtc/plot_det_curve.py
================================================
# Copyright (c) 2021 Binbin Zhang(binbzha@qq.com)
#                    Menglong Xu
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wekws(https://github.com/wenet-e2e/wekws)
import argparse
import os

import matplotlib.pyplot as plt
import numpy as np

# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument('--keyword_label', type=str, required=True, help='keyword string shown on image')
parser.add_argument('--stats_file', type=str, required=True, help='output file of detection error tradeoff')
parser.add_argument('--img_file', type=str, default='./det.png', help='output det image')
args = parser.parse_args()
# yapf: enable


def load_stats_file(stats_file):
    values = []
    with open(stats_file, 'r', encoding='utf8') as fin:
        for line in fin:
            arr = line.strip().split()
            threshold, fa_per_hour, frr = arr
            values.append([float(fa_per_hour), float(frr) * 100])
    values.reverse()
    return np.array(values)


def plot_det_curve(keywords, stats_file, figure_file, xlim, x_step, ylim,
                   y_step):
    plt.figure(dpi=200)
    plt.rcParams['xtick.direction'] = 'in'
    plt.rcParams['ytick.direction'] = 'in'
    plt.rcParams['font.size'] = 12

    for index, keyword in enumerate(keywords):
        values = load_stats_file(stats_file)
        plt.plot(values[:, 0], values[:, 1], label=keyword)

    plt.xlim([0, xlim])
    plt.ylim([0, ylim])
    plt.xticks(range(0, xlim + x_step, x_step))
    plt.yticks(range(0, ylim + y_step, y_step))
    plt.xlabel('False Alarm Per Hour')
    plt.ylabel('False Rejection Rate (\\%)')
    plt.grid(linestyle='--')
    plt.legend(loc='best', fontsize=16)
    plt.savefig(figure_file)


if __name__ == '__main__':
    img_file = os.path.abspath(args.img_file)
    stats_file = os.path.abspath(args.stats_file)
    plot_det_curve([args.keyword_label], stats_file, img_file, 10, 2, 10, 2)

    print('DET curve image saved to: {}'.format(img_file))


================================================
FILE: paddlespeech/kws/exps/mdtc/score.py
================================================
# Copyright (c) 2021 Binbin Zhang(binbzha@qq.com)
#               2022 Shaoqing Yu(954793264@qq.com)
#               2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wekws(https://github.com/wenet-e2e/wekws)
import paddle
from tqdm import tqdm
from yacs.config import CfgNode

from paddlespeech.kws.exps.mdtc.collate import collate_features
from paddlespeech.kws.models.mdtc import KWSModel
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.dynamic_import import dynamic_import

if __name__ == '__main__':
    parser = default_argument_parser()
    parser.add_argument(
        "--ckpt",
        type=str,
        required=True,
        help='model checkpoint for evaluation.')
    parser.add_argument(
        "--score_file",
        type=str,
        default='./scores.txt',
        help='output file of trigger scores')
    args = parser.parse_args()

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    # Dataset
    ds_class = dynamic_import(config['dataset'])
    test_ds = ds_class(
        data_dir=config['data_dir'],
        mode='test',
        feat_type=config['feat_type'],
        sample_rate=config['sample_rate'],
        frame_shift=config['frame_shift'],
        frame_length=config['frame_length'],
        n_mels=config['n_mels'], )
    test_sampler = paddle.io.BatchSampler(
        test_ds, batch_size=config['batch_size'], drop_last=False)
    test_loader = paddle.io.DataLoader(
        test_ds,
        batch_sampler=test_sampler,
        num_workers=config['num_workers'],
        return_list=True,
        use_buffer_reader=True,
        collate_fn=collate_features, )

    # Model
    backbone_class = dynamic_import(config['backbone'])
    backbone = backbone_class(
        stack_num=config['stack_num'],
        stack_size=config['stack_size'],
        in_channels=config['in_channels'],
        res_channels=config['res_channels'],
        kernel_size=config['kernel_size'], )
    model = KWSModel(backbone=backbone, num_keywords=config['num_keywords'])
    model.set_state_dict(paddle.load(args.ckpt))
    model.eval()

    with paddle.no_grad(), open(args.score_file, 'w', encoding='utf8') as f:
        for batch_idx, batch in enumerate(
                tqdm(test_loader, total=len(test_loader))):
            keys, feats, labels, lengths = batch
            logits = model(feats)
            num_keywords = logits.shape[2]
            for i in range(len(keys)):
                key = keys[i]
                score = logits[i][:lengths[i]]
                for keyword_i in range(num_keywords):
                    keyword_scores = score[:, keyword_i]
                    score_frames = ' '.join(
                        ['{:.6f}'.format(x) for x in keyword_scores.tolist()])
                    f.write('{} {} {}\n'.format(key, keyword_i, score_frames))

    print('Result saved to: {}'.format(args.score_file))


================================================
FILE: paddlespeech/kws/exps/mdtc/train.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

import paddle
from yacs.config import CfgNode

from paddlespeech.audio.utils import logger
from paddlespeech.audio.utils import Timer
from paddlespeech.kws.exps.mdtc.collate import collate_features
from paddlespeech.kws.models.loss import max_pooling_loss
from paddlespeech.kws.models.mdtc import KWSModel
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.dynamic_import import dynamic_import

if __name__ == '__main__':
    parser = default_argument_parser()
    args = parser.parse_args()

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    nranks = paddle.distributed.get_world_size()
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()
    local_rank = paddle.distributed.get_rank()

    # Dataset
    ds_class = dynamic_import(config['dataset'])
    train_ds = ds_class(
        data_dir=config['data_dir'],
        mode='train',
        feat_type=config['feat_type'],
        sample_rate=config['sample_rate'],
        frame_shift=config['frame_shift'],
        frame_length=config['frame_length'],
        n_mels=config['n_mels'], )
    dev_ds = ds_class(
        data_dir=config['data_dir'],
        mode='dev',
        feat_type=config['feat_type'],
        sample_rate=config['sample_rate'],
        frame_shift=config['frame_shift'],
        frame_length=config['frame_length'],
        n_mels=config['n_mels'], )

    train_sampler = paddle.io.DistributedBatchSampler(
        train_ds,
        batch_size=config['batch_size'],
        shuffle=True,
        drop_last=False)
    train_loader = paddle.io.DataLoader(
        train_ds,
        batch_sampler=train_sampler,
        num_workers=config['num_workers'],
        return_list=True,
        use_buffer_reader=True,
        collate_fn=collate_features, )

    # Model
    backbone_class = dynamic_import(config['backbone'])
    backbone = backbone_class(
        stack_num=config['stack_num'],
        stack_size=config['stack_size'],
        in_channels=config['in_channels'],
        res_channels=config['res_channels'],
        kernel_size=config['kernel_size'], )
    model = KWSModel(backbone=backbone, num_keywords=config['num_keywords'])
    model = paddle.DataParallel(model)
    clip = paddle.nn.ClipGradByGlobalNorm(config['grad_clip'])
    optimizer = paddle.optimizer.Adam(
        learning_rate=config['learning_rate'],
        weight_decay=config['weight_decay'],
        parameters=model.parameters(),
        grad_clip=clip)
    criterion = max_pooling_loss

    steps_per_epoch = len(train_sampler)
    timer = Timer(steps_per_epoch * config['epochs'])
    timer.start()

    for epoch in range(1, config['epochs'] + 1):
        model.train()

        avg_loss = 0
        num_corrects = 0
        num_samples = 0
        for batch_idx, batch in enumerate(train_loader):
            keys, feats, labels, lengths = batch
            logits = model(feats)
            loss, corrects, acc = criterion(logits, labels, lengths)
            loss.backward()
            optimizer.step()
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                optimizer._learning_rate.step()
            optimizer.clear_grad()

            # Calculate loss
            avg_loss += float(loss)

            # Calculate metrics
            num_corrects += corrects
            num_samples += feats.shape[0]

            timer.count()

            if (batch_idx + 1) % config['log_freq'] == 0 and local_rank == 0:
                lr = optimizer.get_lr()
                avg_loss /= config['log_freq']
                avg_acc = num_corrects / num_samples

                print_msg = 'Epoch={}/{}, Step={}/{}'.format(
                    epoch, config['epochs'], batch_idx + 1, steps_per_epoch)
                print_msg += ' loss={:.4f}'.format(avg_loss)
                print_msg += ' acc={:.4f}'.format(avg_acc)
                print_msg += ' lr={:.6f} step/sec={:.2f} | ETA {}'.format(
                    lr, timer.timing, timer.eta)
                logger.train(print_msg)

                avg_loss = 0
                num_corrects = 0
                num_samples = 0

        if epoch % config[
                'save_freq'] == 0 and batch_idx + 1 == steps_per_epoch and local_rank == 0:
            dev_sampler = paddle.io.BatchSampler(
                dev_ds,
                batch_size=config['batch_size'],
                shuffle=False,
                drop_last=False)
            dev_loader = paddle.io.DataLoader(
                dev_ds,
                batch_sampler=dev_sampler,
                num_workers=config['num_workers'],
                return_list=True,
                use_buffer_reader=True,
                collate_fn=collate_features, )

            model.eval()
            num_corrects = 0
            num_samples = 0
            with logger.processing('Evaluation on validation dataset'):
                for batch_idx, batch in enumerate(dev_loader):
                    keys, feats, labels, lengths = batch
                    logits = model(feats)
                    loss, corrects, acc = criterion(logits, labels, lengths)
                    num_corrects += corrects
                    num_samples += feats.shape[0]

            eval_acc = num_corrects / num_samples
            print_msg = '[Evaluation result]'
            print_msg += ' dev_acc={:.4f}'.format(eval_acc)

            logger.eval(print_msg)

            # Save model
            save_dir = os.path.join(config['checkpoint_dir'],
                                    'epoch_{}'.format(epoch))
            logger.info('Saving model checkpoint to {}'.format(save_dir))
            paddle.save(model.state_dict(),
                        os.path.join(save_dir, 'model.pdparams'))
            paddle.save(optimizer.state_dict(),
                        os.path.join(save_dir, 'model.pdopt'))


================================================
FILE: paddlespeech/kws/models/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .mdtc import KWSModel
from .mdtc import MDTC


================================================
FILE: paddlespeech/kws/models/loss.py
================================================
# Copyright (c) 2021 Binbin Zhang
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wekws(https://github.com/wenet-e2e/wekws)
import paddle


def padding_mask(lengths: paddle.Tensor) -> paddle.Tensor:
    batch_size = lengths.shape[0]
    max_len = int(lengths.max().item())
    seq = paddle.arange(max_len, dtype=paddle.int64)
    seq = seq.expand((batch_size, max_len))
    return seq >= lengths.unsqueeze(1)


def fill_mask_elements(condition: paddle.Tensor, value: float,
                       x: paddle.Tensor) -> paddle.Tensor:
    assert condition.shape == x.shape
    values = paddle.ones_like(x, dtype=x.dtype) * value
    return paddle.where(condition, values, x)


def max_pooling_loss(logits: paddle.Tensor,
                     target: paddle.Tensor,
                     lengths: paddle.Tensor,
                     min_duration: int=0):

    mask = padding_mask(lengths)
    num_utts = logits.shape[0]
    num_keywords = logits.shape[2]

    loss = 0.0
    for i in range(num_utts):
        for j in range(num_keywords):
            # Add entropy loss CE = -(t * log(p) + (1 - t) * log(1 - p))
            if target[i] == j:
                # For the keyword, do max-polling
                prob = logits[i, :, j]
                m = mask[i]
                if min_duration > 0:
                    m[:min_duration] = True
                prob = fill_mask_elements(m, 0.0, prob)
                prob = paddle.clip(prob, 1e-8, 1.0)
                max_prob = prob.max()
                loss += -paddle.log(max_prob)
            else:
                # For other keywords or filler, do min-polling
                prob = 1 - logits[i, :, j]
                prob = fill_mask_elements(mask[i], 1.0, prob)
                prob = paddle.clip(prob, 1e-8, 1.0)
                min_prob = prob.min()
                loss += -paddle.log(min_prob)
    loss = loss / num_utts

    # Compute accuracy of current batch
    mask = mask.unsqueeze(-1)
    logits = fill_mask_elements(mask, 0.0, logits)
    max_logits = logits.max(1)
    num_correct = 0
    for i in range(num_utts):
        max_p = max_logits[i].max(0).item()
        idx = max_logits[i].argmax(0).item()
        # Predict correct as the i'th keyword
        if max_p > 0.5 and idx == target[i].item():
            num_correct += 1
        # Predict correct as the filler, filler id < 0
        if max_p < 0.5 and target[i].item() < 0:
            num_correct += 1
    acc = num_correct / num_utts
    # acc = 0.0
    return loss, num_correct, acc


================================================
FILE: paddlespeech/kws/models/mdtc.py
================================================
# Copyright (c) 2021 Jingyong Hou (houjingyong@gmail.com)
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wekws(https://github.com/wenet-e2e/wekws)
import paddle
import paddle.nn as nn
import paddle.nn.functional as F


class DSDilatedConv1d(nn.Layer):
    def __init__(
            self,
            in_channels: int,
            out_channels: int,
            kernel_size: int,
            dilation: int=1,
            stride: int=1,
            bias: bool=True, ):
        super(DSDilatedConv1d, self).__init__()
        self.receptive_fields = dilation * (kernel_size - 1)
        self.conv = nn.Conv1D(
            in_channels,
            in_channels,
            kernel_size,
            padding=0,
            dilation=dilation,
            stride=stride,
            groups=in_channels,
            bias_attr=bias, )
        self.bn = nn.BatchNorm1D(in_channels)
        self.pointwise = nn.Conv1D(
            in_channels,
            out_channels,
            kernel_size=1,
            padding=0,
            dilation=1,
            bias_attr=bias)

    def forward(self, inputs: paddle.Tensor):
        outputs = self.conv(inputs)
        outputs = self.bn(outputs)
        outputs = self.pointwise(outputs)
        return outputs


class TCNBlock(nn.Layer):
    def __init__(
            self,
            in_channels: int,
            res_channels: int,
            kernel_size: int,
            dilation: int,
            causal: bool, ):
        super(TCNBlock, self).__init__()
        self.in_channels = in_channels
        self.res_channels = res_channels
        self.kernel_size = kernel_size
        self.dilation = dilation
        self.causal = causal
        self.receptive_fields = dilation * (kernel_size - 1)
        self.half_receptive_fields = self.receptive_fields // 2
        self.conv1 = DSDilatedConv1d(
            in_channels=in_channels,
            out_channels=res_channels,
            kernel_size=kernel_size,
            dilation=dilation, )
        self.bn1 = nn.BatchNorm1D(res_channels)
        self.relu1 = nn.ReLU()

        self.conv2 = nn.Conv1D(
            in_channels=res_channels, out_channels=res_channels, kernel_size=1)
        self.bn2 = nn.BatchNorm1D(res_channels)
        self.relu2 = nn.ReLU()

    def forward(self, inputs: paddle.Tensor):
        outputs = self.relu1(self.bn1(self.conv1(inputs)))
        outputs = self.bn2(self.conv2(outputs))
        if self.causal:
            inputs = inputs[:, :, self.receptive_fields:]
        else:
            inputs = inputs[:, :, self.half_receptive_fields:
                            -self.half_receptive_fields]
        if self.in_channels == self.res_channels:
            res_out = self.relu2(outputs + inputs)
        else:
            res_out = self.relu2(outputs)
        return res_out


class TCNStack(nn.Layer):
    def __init__(
            self,
            in_channels: int,
            stack_num: int,
            stack_size: int,
            res_channels: int,
            kernel_size: int,
            causal: bool, ):
        super(TCNStack, self).__init__()
        self.in_channels = in_channels
        self.stack_num = stack_num
        self.stack_size = stack_size
        self.res_channels = res_channels
        self.kernel_size = kernel_size
        self.causal = causal
        self.res_blocks = self.stack_tcn_blocks()
        self.receptive_fields = self.calculate_receptive_fields()
        self.res_blocks = nn.Sequential(*self.res_blocks)

    def calculate_receptive_fields(self):
        receptive_fields = 0
        for block in self.res_blocks:
            receptive_fields += block.receptive_fields
        return receptive_fields

    def build_dilations(self):
        dilations = []
        for s in range(0, self.stack_size):
            for l in range(0, self.stack_num):
                dilations.append(2**l)
        return dilations

    def stack_tcn_blocks(self):
        dilations = self.build_dilations()
        res_blocks = nn.LayerList()

        res_blocks.append(
            TCNBlock(
                self.in_channels,
                self.res_channels,
                self.kernel_size,
                dilations[0],
                self.causal, ))
        for dilation in dilations[1:]:
            res_blocks.append(
                TCNBlock(
                    self.res_channels,
                    self.res_channels,
                    self.kernel_size,
                    dilation,
                    self.causal, ))
        return res_blocks

    def forward(self, inputs: paddle.Tensor):
        outputs = self.res_blocks(inputs)
        return outputs


class MDTC(nn.Layer):
    def __init__(
            self,
            stack_num: int,
            stack_size: int,
            in_channels: int,
            res_channels: int,
            kernel_size: int,
            causal: bool=True, ):
        super(MDTC, self).__init__()
        assert kernel_size % 2 == 1
        self.kernel_size = kernel_size
        self.causal = causal
        self.preprocessor = TCNBlock(
            in_channels, res_channels, kernel_size, dilation=1, causal=causal)
        self.relu = nn.ReLU()
        self.blocks = nn.LayerList()
        self.receptive_fields = self.preprocessor.receptive_fields
        for i in range(stack_num):
            self.blocks.append(
                TCNStack(res_channels, stack_size, 1, res_channels, kernel_size,
                         causal))
            self.receptive_fields += self.blocks[-1].receptive_fields
        self.half_receptive_fields = self.receptive_fields // 2
        self.hidden_dim = res_channels

    def forward(self, x: paddle.Tensor):
        if self.causal:
            outputs = F.pad(x, (0, 0, self.receptive_fields, 0, 0, 0),
                            'constant')
        else:
            outputs = F.pad(
                x,
                (0, 0, self.half_receptive_fields, self.half_receptive_fields,
                 0, 0),
                'constant', )
        outputs = outputs.transpose([0, 2, 1])
        outputs_list = []
        outputs = self.relu(self.preprocessor(outputs))
        for block in self.blocks:
            outputs = block(outputs)
            outputs_list.append(outputs)

        normalized_outputs = []
        output_size = outputs_list[-1].shape[-1]
        for x in outputs_list:
            remove_length = x.shape[-1] - output_size
            if self.causal and remove_length > 0:
                normalized_outputs.append(x[:, :, remove_length:])
            elif not self.causal and remove_length > 1:
                half_remove_length = remove_length // 2
                normalized_outputs.append(
                    x[:, :, half_remove_length:-half_remove_length])
            else:
                normalized_outputs.append(x)

        outputs = paddle.zeros_like(
            outputs_list[-1], dtype=outputs_list[-1].dtype)
        for x in normalized_outputs:
            outputs += x
        outputs = outputs.transpose([0, 2, 1])
        return outputs, None


class KWSModel(nn.Layer):
    def __init__(self, backbone, num_keywords):
        super(KWSModel, self).__init__()
        self.backbone = backbone
        self.linear = nn.Linear(self.backbone.hidden_dim, num_keywords)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        outputs = self.backbone(x)
        outputs = self.linear(outputs)
        return self.activation(outputs)


================================================
FILE: paddlespeech/resource/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .resource import CommonTaskResource


================================================
FILE: paddlespeech/resource/model_alias.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    'model_alias',
]

# Records of model name to import class
model_alias = {
    # ---------------------------------
    # -------------- SSL --------------
    # ---------------------------------
    "wav2vec2ASR": ["paddlespeech.s2t.models.wav2vec2:Wav2vec2ASR"],
    "wav2vec2": ["paddlespeech.s2t.models.wav2vec2:Wav2vec2Base"],
    "hubertASR": ["paddlespeech.s2t.models.hubert:HubertASR"],
    "hubert": ["paddlespeech.s2t.models.hubert:HubertBase"],
    "wavlmASR": ["paddlespeech.s2t.models.wavlm:WavLMASR"],

    # ---------------------------------
    # -------------- ASR --------------
    # ---------------------------------
    "deepspeech2offline": ["paddlespeech.s2t.models.ds2:DeepSpeech2Model"],
    "deepspeech2online": ["paddlespeech.s2t.models.ds2:DeepSpeech2Model"],
    "conformer": ["paddlespeech.s2t.models.u2:U2Model"],
    "conformer_online": ["paddlespeech.s2t.models.u2:U2Model"],
    "conformer_u2pp_online": ["paddlespeech.s2t.models.u2:U2Model"],
    "transformer": ["paddlespeech.s2t.models.u2:U2Model"],
    "wenetspeech": ["paddlespeech.s2t.models.u2:U2Model"],

    # ---------------------------------
    # ------------ Whisper ------------
    # ---------------------------------
    "whisper": ["paddlespeech.s2t.models.whisper:Whisper"],

    # ---------------------------------
    # -------------- CLS --------------
    # ---------------------------------
    "panns_cnn6": ["paddlespeech.cls.models.panns:CNN6"],
    "panns_cnn10": ["paddlespeech.cls.models.panns:CNN10"],
    "panns_cnn14": ["paddlespeech.cls.models.panns:CNN14"],

    # ---------------------------------
    # -------------- ST ---------------
    # ---------------------------------
    "fat_st": ["paddlespeech.s2t.models.u2_st:U2STModel"],

    # ---------------------------------
    # -------------- TEXT -------------
    # ---------------------------------
    "ernie_linear_p7": [
        "paddlespeech.text.models:ErnieLinear",
        "paddlenlp.transformers:ErnieTokenizer"
    ],
    "ernie_linear_p3": [
        "paddlespeech.text.models:ErnieLinear",
        "paddlenlp.transformers:ErnieTokenizer"
    ],
    "ernie_linear_p3_wudao": [
        "paddlespeech.text.models:ErnieLinear",
        "paddlenlp.transformers:ErnieTokenizer"
    ],

    # ---------------------------------
    # -------------- TTS --------------
    # ---------------------------------
    # acoustic model
    "speedyspeech": ["paddlespeech.t2s.models.speedyspeech:SpeedySpeech"],
    "speedyspeech_inference":
    ["paddlespeech.t2s.models.speedyspeech:SpeedySpeechInference"],
    "fastspeech2": ["paddlespeech.t2s.models.fastspeech2:FastSpeech2"],
    "fastspeech2_inference":
    ["paddlespeech.t2s.models.fastspeech2:FastSpeech2Inference"],
    "tacotron2": ["paddlespeech.t2s.models.tacotron2:Tacotron2"],
    "tacotron2_inference":
    ["paddlespeech.t2s.models.tacotron2:Tacotron2Inference"],
    # voc
    "pwgan": ["paddlespeech.t2s.models.parallel_wavegan:PWGGenerator"],
    "pwgan_inference":
    ["paddlespeech.t2s.models.parallel_wavegan:PWGInference"],
    "mb_melgan": ["paddlespeech.t2s.models.melgan:MelGANGenerator"],
    "mb_melgan_inference": ["paddlespeech.t2s.models.melgan:MelGANInference"],
    "style_melgan": ["paddlespeech.t2s.models.melgan:StyleMelGANGenerator"],
    "style_melgan_inference":
    ["paddlespeech.t2s.models.melgan:StyleMelGANInference"],
    "hifigan": ["paddlespeech.t2s.models.hifigan:HiFiGANGenerator"],
    "hifigan_inference": ["paddlespeech.t2s.models.hifigan:HiFiGANInference"],
    "wavernn": ["paddlespeech.t2s.models.wavernn:WaveRNN"],
    "wavernn_inference": ["paddlespeech.t2s.models.wavernn:WaveRNNInference"],

    # ---------------------------------
    # ------------ Vector -------------
    # ---------------------------------
    "ecapatdnn": ["paddlespeech.vector.models.ecapa_tdnn:EcapaTdnn"],

    # ---------------------------------
    # -------------- kws --------------
    # ---------------------------------
    "mdtc": ["paddlespeech.kws.models.mdtc:MDTC"],
    "mdtc_for_kws": ["paddlespeech.kws.models.mdtc:KWSModel"],
}


================================================
FILE: paddlespeech/resource/pretrained_models.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    'asr_dynamic_pretrained_models',
    'asr_static_pretrained_models',
    'asr_onnx_pretrained_models',
    'cls_dynamic_pretrained_models',
    'cls_static_pretrained_models',
    'st_dynamic_pretrained_models',
    'st_kaldi_bins',
    'text_dynamic_pretrained_models',
    'tts_dynamic_pretrained_models',
    'tts_static_pretrained_models',
    'tts_onnx_pretrained_models',
    'vector_dynamic_pretrained_models',
    'ssl_dynamic_pretrained_models',
    'whisper_dynamic_pretrained_models',
]

# The tags for pretrained_models should be "{model_name}[_{dataset}][-{lang}][-...]".
# Add code-switch and multilingual tag, "{model_name}[_{dataset}]-[codeswitch/multilingual][_{lang}][-...]".
# e.g. "conformer_wenetspeech-zh-16k" and "panns_cnn6-32k".
# Command line and python api use "{model_name}[_{dataset}]" as --model, usage:
# "paddlespeech asr --model conformer_wenetspeech --lang zh --sr 16000 --input ./input.wav"

# ---------------------------------
# -------------- SSL --------------
# ---------------------------------
ssl_dynamic_pretrained_models = {
    "wav2vec2-en-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr3/wav2vec2-large-960h-lv60-self_ckpt_1.3.0.model.tar.gz',
            'md5':
            'acc46900680e341e500437aa59193518',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'wav2vec2-large-960h-lv60-self',
            'model':
            'wav2vec2-large-960h-lv60-self.pdparams',
            'params':
            'wav2vec2-large-960h-lv60-self.pdparams',
        },
    },
    "wav2vec2ASR_librispeech-en-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.1.model.tar.gz',
            'md5':
            'cbe28d6c78f3dd2e189968402381f454',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/wav2vec2ASR/checkpoints/avg_1',
            'model':
            'exp/wav2vec2ASR/checkpoints/avg_1.pdparams',
            'params':
            'exp/wav2vec2ASR/checkpoints/avg_1.pdparams',
        },
    },
    "wav2vec2-zh-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2-large-wenetspeech-self_ckpt_1.3.0.model.tar.gz',
            'md5':
            '00ea4975c05d1bb58181205674052fe1',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'chinese-wav2vec2-large',
            'model':
            'chinese-wav2vec2-large.pdparams',
            'params':
            'chinese-wav2vec2-large.pdparams',
        },
    },
    "wav2vec2ASR_aishell1-zh-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.3.0.model.tar.gz',
            'md5':
            'ac8fa0a6345e6a7535f6fabb5e59e218',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/wav2vec2ASR/checkpoints/avg_1',
            'model':
            'exp/wav2vec2ASR/checkpoints/avg_1.pdparams',
            'params':
            'exp/wav2vec2ASR/checkpoints/avg_1.pdparams',
        },
        '1.4': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz',
            'md5':
            '150e51b8ea5d255ccce6b395de8d916a',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/wav2vec2ASR/checkpoints/avg_1',
            'model':
            'exp/wav2vec2ASR/checkpoints/avg_1.pdparams',
            'params':
            'exp/wav2vec2ASR/checkpoints/avg_1.pdparams',
        },
    },
    "hubert-en-16k": {
        '1.4': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/hubert/hubert-large-lv60_ckpt_1.4.0.model.tar.gz',
            'md5':
            'efecfb87a8718aa9253b7459c1fe9b54',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'hubert-large-lv60',
            'model':
            'hubert-large-lv60.pdparams',
            'params':
            'hubert-large-lv60.pdparams',
        },
    },
    "hubertASR_librispeech-100h-en-16k": {
        '1.4': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/hubert/hubertASR-large-100h-librispeech_ckpt_1.4.0.model.tar.gz',
            'md5':
            '574cefd11aaef5737969ce22a7f33ea2',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/hubertASR/checkpoints/avg_1',
            'model':
            'exp/hubertASR/checkpoints/avg_1.pdparams',
            'params':
            'exp/hubertASR/checkpoints/avg_1.pdparams',
        },
    },
    "wavlmASR_librispeech-en-16k": {
        "1.0": {
            "url":
            "https://paddlespeech.cdn.bcebos.com/wavlm/wavlm_baseplus_libriclean_100h.tar.gz",
            "md5":
            "f2238e982bb8bcf046e536201f5ea629",
            "cfg_path":
            "model.yaml",
            "ckpt_path":
            "exp/wavlmASR/checkpoints/46",
            "model":
            "exp/wavlmASR/checkpoints/46.pdparams",
            "params":
            "exp/wavlmASR/checkpoints/46.pdparams",
        }
    }
}

# ---------------------------------
# -------------- ASR --------------
# ---------------------------------
asr_dynamic_pretrained_models = {
    "conformer_wenetspeech-zh-16k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1_conformer_wenetspeech_ckpt_0.1.1.model.tar.gz',
            'md5':
            '76cb19ed857e6623856b7cd7ebbfeda4',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/conformer/checkpoints/wenetspeech',
        },
    },
    "conformer_online_wenetspeech-zh-16k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz',
            'md5':
            'b8c02632b04da34aca88459835be54a6',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/chunk_conformer/checkpoints/avg_10',
            'model':
            'exp/chunk_conformer/checkpoints/avg_10.pdparams',
            'params':
            'exp/chunk_conformer/checkpoints/avg_10.pdparams',
            'lm_url':
            '',
            'lm_md5':
            '',
        },
    },
    "conformer_u2pp_online_wenetspeech-zh-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz',
            'md5':
            '62d230c1bf27731192aa9d3b8deca300',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/chunk_conformer_u2pp/checkpoints/avg_10',
            'model':
            'exp/chunk_conformer_u2pp/checkpoints/avg_10.pdparams',
            'params':
            'exp/chunk_conformer_u2pp/checkpoints/avg_10.pdparams',
            'lm_url':
            '',
            'lm_md5':
            '',
        },
    },
    "conformer_online_multicn-zh-16k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/multi_cn/asr1/asr1_chunk_conformer_multi_cn_ckpt_0.2.0.model.tar.gz',
            'md5':
            '7989b3248c898070904cf042fd656003',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/chunk_conformer/checkpoints/multi_cn',
        },
        '2.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/multi_cn/asr1/asr1_chunk_conformer_multi_cn_ckpt_0.2.3.model.tar.gz',
            'md5':
            '0ac93d390552336f2a906aec9e33c5fa',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/chunk_conformer/checkpoints/multi_cn',
            'model':
            'exp/chunk_conformer/checkpoints/multi_cn.pdparams',
            'params':
            'exp/chunk_conformer/checkpoints/multi_cn.pdparams',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3',
        },
    },
    "conformer_aishell-zh-16k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_0.1.2.model.tar.gz',
            'md5':
            '3f073eccfa7bb14e0c6867d65fc0dc3a',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/conformer/checkpoints/avg_30',
        },
    },
    "conformer_online_aishell-zh-16k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz',
            'md5':
            'b374cfb93537761270b6224fb0bfc26a',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/chunk_conformer/checkpoints/avg_30',
        },
        '1.4': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.5.0.model.tar.gz',
            'md5':
            '38924b8adc28ef458847c3571e87e3cb',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/chunk_conformer/checkpoints/avg_30',
        },
    },
    "transformer_librispeech-en-16k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr1/asr1_transformer_librispeech_ckpt_0.1.1.model.tar.gz',
            'md5':
            '2c667da24922aad391eacafe37bc1660',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/transformer/checkpoints/avg_10',
        },
    },
    "deepspeech2online_wenetspeech-zh-16k": {
        '1.0.3': {
            'url':
            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
            'md5':
            'cfe273793e68f790f742b411c98bc75e',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2_online/checkpoints/avg_10',
            'model':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdmodel',
            'params':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdiparams',
            'onnx_model':
            'onnx/model.onnx',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
        '1.0.4': {
            'url':
            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz',
            'md5':
            'c595cb76902b5a5d01409171375989f4',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2_online/checkpoints/avg_10',
            'model':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdmodel',
            'params':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdiparams',
            'onnx_model':
            'onnx/model.onnx',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
    },
    "deepspeech2offline_aishell-zh-16k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz',
            'md5':
            '4d26066c6f19f52087425dc722ae5b13',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2/checkpoints/avg_10',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
    },
    "deepspeech2online_aishell-zh-16k": {
        '1.0.2': {
            'url':
            'http://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
            'md5':
            '4dd42cfce9aaa54db0ec698da6c48ec5',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2_online/checkpoints/avg_1',
            'model':
            'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel',
            'params':
            'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams',
            'onnx_model':
            'onnx/model.onnx',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
    },
    "deepspeech2offline_librispeech-en-16k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/librispeech/asr0/asr0_deepspeech2_offline_librispeech_ckpt_1.0.1.model.tar.gz',
            'md5':
            'ed9e2b008a65268b3484020281ab048c',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2/checkpoints/avg_5',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm',
            'lm_md5':
            '099a601759d467cd0a8523ff939819c5'
        },
    },
    "conformer_talcs-codeswitch_zh_en-16k": {
        '1.4': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/tal_cs/asr1/asr1_conformer_talcs_ckpt_1.4.0.model.tar.gz',
            'md5':
            '01962c5d0a70878fe41cacd4f61e14d1',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/conformer/checkpoints/avg_10'
        },
    },
    "conformer_online_talcs-codeswitch_zh_en-16k": {
        '1.6': {
            'url':
            'https://paddlespeech.bj.bcebos.com/s2t/tal_cs/asr1/asr1_chunk_conformer_talcs_ckpt_1.6.0.model.tar.gz',
            'md5':
            '3132daf1004fd76c185e14b7f0af01f9',
            'cfg_path':
            'model.yaml',
            'model':
            'exp/chunk_conformer/checkpoints/avg_10.pdparams',
            'params':
            'exp/chunk_conformer/checkpoints/avg_10.pdparams',
            'ckpt_path':
            'exp/chunk_conformer/checkpoints/avg_10',
        },
    },
}

asr_static_pretrained_models = {
    "deepspeech2offline_aishell-zh-16k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz',
            'md5':
            '4d26066c6f19f52087425dc722ae5b13',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2/checkpoints/avg_10',
            'model':
            'exp/deepspeech2/checkpoints/avg_10.jit.pdmodel',
            'params':
            'exp/deepspeech2/checkpoints/avg_10.jit.pdiparams',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        }
    },
    "deepspeech2online_aishell-zh-16k": {
        '1.0.1': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.1.model.tar.gz',
            'md5':
            'df5ddeac8b679a470176649ac4b78726',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2_online/checkpoints/avg_1',
            'model':
            'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel',
            'params':
            'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
        '1.0.2': {
            'url':
            'http://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
            'md5':
            '4dd42cfce9aaa54db0ec698da6c48ec5',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2_online/checkpoints/avg_1',
            'model':
            'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel',
            'params':
            'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams',
            'onnx_model':
            'onnx/model.onnx',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
    },
    "deepspeech2online_wenetspeech-zh-16k": {
        '1.0.3': {
            'url':
            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
            'md5':
            'cfe273793e68f790f742b411c98bc75e',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2_online/checkpoints/avg_10',
            'model':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdmodel',
            'params':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdiparams',
            'onnx_model':
            'onnx/model.onnx',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
        '1.0.4': {
            'url':
            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz',
            'md5':
            'c595cb76902b5a5d01409171375989f4',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2_online/checkpoints/avg_10',
            'model':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdmodel',
            'params':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdiparams',
            'onnx_model':
            'onnx/model.onnx',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
    },
}

asr_onnx_pretrained_models = {
    "deepspeech2online_aishell-zh-16k": {
        '1.0.2': {
            'url':
            'http://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
            'md5':
            '4dd42cfce9aaa54db0ec698da6c48ec5',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2_online/checkpoints/avg_1',
            'model':
            'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel',
            'params':
            'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams',
            'onnx_model':
            'onnx/model.onnx',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
    },
    "deepspeech2online_wenetspeech-zh-16k": {
        '1.0.3': {
            'url':
            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
            'md5':
            'cfe273793e68f790f742b411c98bc75e',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2_online/checkpoints/avg_10',
            'model':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdmodel',
            'params':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdiparams',
            'onnx_model':
            'onnx/model.onnx',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
        '1.0.4': {
            'url':
            'http://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.4.model.tar.gz',
            'md5':
            'c595cb76902b5a5d01409171375989f4',
            'cfg_path':
            'model.yaml',
            'ckpt_path':
            'exp/deepspeech2_online/checkpoints/avg_10',
            'model':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdmodel',
            'params':
            'exp/deepspeech2_online/checkpoints/avg_10.jit.pdiparams',
            'onnx_model':
            'onnx/model.onnx',
            'lm_url':
            'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
            'lm_md5':
            '29e02312deb2e59b3c8686c7966d4fe3'
        },
    },
}

whisper_dynamic_pretrained_models = {
    "whisper-large-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-large-model.tar.gz',
            'md5':
            'cf1557af9d8ffa493fefad9cb08ae189',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-large-model',
            'model':
            'whisper-large-model.pdparams',
            'params':
            'whisper-large-model.pdparams',
            'resource_data':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
            'resource_data_md5':
            '37a0a8abdb3641a51194f79567a93b61',
        },
        '1.5': {
            'url':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/whisper-large-model.tar.gz',
            'md5':
            '9ebbd228fa07ca4557e5da863dac2982',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-large-model',
            'model':
            'whisper-large-model.pdparams',
            'params':
            'whisper-large-model.pdparams',
            'resource_data':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/assets.tar',
            'resource_data_md5':
            'dd61d092d362f1fdbae6ede08282e177',
        },
    },
    "whisper-base-en-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-base-en-model.tar.gz',
            'md5':
            'b156529aefde6beb7726d2ea98fd067a',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-base-en-model',
            'model':
            'whisper-base-en-model.pdparams',
            'params':
            'whisper-base-en-model.pdparams',
            'resource_data':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
            'resource_data_md5':
            '37a0a8abdb3641a51194f79567a93b61',
        },
        '1.5': {
            'url':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/whisper-base-en-model.tar.gz',
            'md5':
            '376617a9c5f36404f50dde3708bac0c6',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-base-en-model',
            'model':
            'whisper-base-en-model.pdparams',
            'params':
            'whisper-base-en-model.pdparams',
            'resource_data':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/assets.tar',
            'resource_data_md5':
            'dd61d092d362f1fdbae6ede08282e177',
        },
    },
    "whisper-base-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-base-model.tar.gz',
            'md5':
            '6b012a5abd583db14398c3492e47120b',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-base-model',
            'model':
            'whisper-base-model.pdparams',
            'params':
            'whisper-base-model.pdparams',
            'resource_data':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
            'resource_data_md5':
            '37a0a8abdb3641a51194f79567a93b61',
        },
        '1.5': {
            'url':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/whisper-base-model.tar.gz',
            'md5':
            '61836cb29c93048621f83364d83b532b',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-base-model',
            'model':
            'whisper-base-model.pdparams',
            'params':
            'whisper-base-model.pdparams',
            'resource_data':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/assets.tar',
            'resource_data_md5':
            'dd61d092d362f1fdbae6ede08282e177',
        },
    },
    "whisper-medium-en-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-medium-en-model.tar.gz',
            'md5':
            'c7f57d270bd20c7b170ba9dcf6c16f74',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-medium-en-model',
            'model':
            'whisper-medium-en-model.pdparams',
            'params':
            'whisper-medium-en-model.pdparams',
            'resource_data':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
            'resource_data_md5':
            '37a0a8abdb3641a51194f79567a93b61',
        },
        '1.5': {
            'url':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/whisper-medium-en-model.tar.gz',
            'md5':
            'ac01145c5de962f1416f3d98171be559',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-medium-en-model',
            'model':
            'whisper-medium-en-model.pdparams',
            'params':
            'whisper-medium-en-model.pdparams',
            'resource_data':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/assets.tar',
            'resource_data_md5':
            'dd61d092d362f1fdbae6ede08282e177',
        },
    },
    "whisper-medium-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-medium-model.tar.gz',
            'md5':
            '4c7dcd0df25f408199db4a4548336786',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-medium-model',
            'model':
            'whisper-medium-model.pdparams',
            'params':
            'whisper-medium-model.pdparams',
            'resource_data':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
            'resource_data_md5':
            '37a0a8abdb3641a51194f79567a93b61',
        },
        '1.5': {
            'url':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/whisper-medium-model.tar.gz',
            'md5':
            '07770819961d1fe795facd3666f8db17',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-medium-model',
            'model':
            'whisper-medium-model.pdparams',
            'params':
            'whisper-medium-model.pdparams',
            'resource_data':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/assets.tar',
            'resource_data_md5':
            'dd61d092d362f1fdbae6ede08282e177',
        },
    },
    "whisper-small-en-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-small-en-model.tar.gz',
            'md5':
            '2b24efcb2e93f3275af7c0c7f598ff1c',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-small-en-model',
            'model':
            'whisper-small-en-model.pdparams',
            'params':
            'whisper-small-en-model.pdparams',
            'resource_data':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
            'resource_data_md5':
            '37a0a8abdb3641a51194f79567a93b61',
        },
        '1.5': {
            'url':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/whisper-small-en-model.tar.gz',
            'md5':
            '67af14156b93f49ae738a17204189e46',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-small-en-model',
            'model':
            'whisper-small-en-model.pdparams',
            'params':
            'whisper-small-en-model.pdparams',
            'resource_data':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/assets.tar',
            'resource_data_md5':
            'dd61d092d362f1fdbae6ede08282e177',
        },
    },
    "whisper-small-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-small-model.tar.gz',
            'md5':
            '5a57911dd41651dd6ed78c5763912825',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-small-model',
            'model':
            'whisper-small-model.pdparams',
            'params':
            'whisper-small-model.pdparams',
            'resource_data':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
            'resource_data_md5':
            '37a0a8abdb3641a51194f79567a93b61',
        },
        '1.5': {
            'url':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/whisper-small-model.tar.gz',
            'md5':
            'db53c4bf39a9ad46ef77e6f9a37200b6',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-small-model',
            'model':
            'whisper-small-model.pdparams',
            'params':
            'whisper-small-model.pdparams',
            'resource_data':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/assets.tar',
            'resource_data_md5':
            'dd61d092d362f1fdbae6ede08282e177',
        },
    },
    "whisper-tiny-en-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-en-model.tar.gz',
            'md5':
            '14969164a3f713fd58e56978c34188f6',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-tiny-en-model',
            'model':
            'whisper-tiny-en-model.pdparams',
            'params':
            'whisper-tiny-en-model.pdparams',
            'resource_data':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
            'resource_data_md5':
            '37a0a8abdb3641a51194f79567a93b61',
        },
        '1.5': {
            'url':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/whisper-tiny-en-model.tar.gz',
            'md5':
            'f91f8447d8b37ed13f4327ef6565b094',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-tiny-en-model',
            'model':
            'whisper-tiny-en-model.pdparams',
            'params':
            'whisper-tiny-en-model.pdparams',
            'resource_data':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/assets.tar',
            'resource_data_md5':
            'dd61d092d362f1fdbae6ede08282e177',
        },
    },
    "whisper-tiny-16k": {
        '1.3': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221122/whisper-tiny-model.tar.gz',
            'md5':
            'a5b82a1f2067a2ca400f17fabd62b81b',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-tiny-model',
            'model':
            'whisper-tiny-model.pdparams',
            'params':
            'whisper-tiny-model.pdparams',
            'resource_data':
            'https://paddlespeech.cdn.bcebos.com/whisper/whisper_model_20221108/assets.tar',
            'resource_data_md5':
            '37a0a8abdb3641a51194f79567a93b61',
        },
        '1.5': {
            'url':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/whisper-tiny-model.tar.gz',
            'md5':
            '6f2209ac656ff12de085c824363316e2',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-tiny-model',
            'model':
            'whisper-tiny-model.pdparams',
            'params':
            'whisper-tiny-model.pdparams',
            'resource_data':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/assets.tar',
            'resource_data_md5':
            'dd61d092d362f1fdbae6ede08282e177',
        },
    },
    "whisper-turbo-16k": {
        '1.5': {
            'url':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/whisper-turbo-model.tar.gz',
            'md5':
            'fe2dd1a1d6eb8e6d017aafc7d5f62336',
            'cfg_path':
            'whisper.yaml',
            'ckpt_path':
            'whisper-turbo-model',
            'model':
            'whisper-turbo-model.pdparams',
            'params':
            'whisper-turbo-model.pdparams',
            'resource_data':
            'https://paddlespeech.bj.bcebos.com/whisper/whisper_model_20250825/assets.tar',
            'resource_data_md5':
            'dd61d092d362f1fdbae6ede08282e177',
        },
    },
}

# ---------------------------------
# -------------- CLS --------------
# ---------------------------------
cls_dynamic_pretrained_models = {
    "panns_cnn6-32k": {
        '1.0': {
            'url': 'https://paddlespeech.cdn.bcebos.com/cls/panns_cnn6.tar.gz',
            'md5': '4cf09194a95df024fd12f84712cf0f9c',
            'cfg_path': 'panns.yaml',
            'ckpt_path': 'cnn6.pdparams',
            'label_file': 'audioset_labels.txt',
        },
    },
    "panns_cnn10-32k": {
        '1.0': {
            'url': 'https://paddlespeech.cdn.bcebos.com/cls/panns_cnn10.tar.gz',
            'md5': 'cb8427b22176cc2116367d14847f5413',
            'cfg_path': 'panns.yaml',
            'ckpt_path': 'cnn10.pdparams',
            'label_file': 'audioset_labels.txt',
        },
    },
    "panns_cnn14-32k": {
        '1.0': {
            'url': 'https://paddlespeech.cdn.bcebos.com/cls/panns_cnn14.tar.gz',
            'md5': 'e3b9b5614a1595001161d0ab95edee97',
            'cfg_path': 'panns.yaml',
            'ckpt_path': 'cnn14.pdparams',
            'label_file': 'audioset_labels.txt',
        },
    },
}

cls_static_pretrained_models = {
    "panns_cnn6-32k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn6_static.tar.gz',
            'md5':
            'da087c31046d23281d8ec5188c1967da',
            'cfg_path':
            'panns.yaml',
            'model_path':
            'inference.pdmodel',
            'params_path':
            'inference.pdiparams',
            'label_file':
            'audioset_labels.txt',
        },
    },
    "panns_cnn10-32k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn10_static.tar.gz',
            'md5':
            '5460cc6eafbfaf0f261cc75b90284ae1',
            'cfg_path':
            'panns.yaml',
            'model_path':
            'inference.pdmodel',
            'params_path':
            'inference.pdiparams',
            'label_file':
            'audioset_labels.txt',
        },
    },
    "panns_cnn14-32k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/cls/inference_model/panns_cnn14_static.tar.gz',
            'md5':
            'ccc80b194821274da79466862b2ab00f',
            'cfg_path':
            'panns.yaml',
            'model_path':
            'inference.pdmodel',
            'params_path':
            'inference.pdiparams',
            'label_file':
            'audioset_labels.txt',
        },
    },
}

# ---------------------------------
# -------------- ST ---------------
# ---------------------------------
st_dynamic_pretrained_models = {
    "fat_st_ted-en-zh": {
        '1.0': {
            "url":
            "https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/st1_transformer_mtl_noam_ted-en-zh_ckpt_0.1.1.model.tar.gz",
            "md5":
            "d62063f35a16d91210a71081bd2dd557",
            "cfg_path":
            "model.yaml",
            "ckpt_path":
            "exp/transformer_mtl_noam/checkpoints/fat_st_ted-en-zh.pdparams",
        },
    },
}

st_kaldi_bins = {
    "url":
    "https://paddlespeech.cdn.bcebos.com/s2t/ted_en_zh/st1/kaldi_bins.tar.gz",
    "md5":
    "c0682303b3f3393dbf6ed4c4e35a53eb",
}

# ---------------------------------
# -------------- TEXT -------------
# ---------------------------------
text_dynamic_pretrained_models = {
    "ernie_linear_p7_wudao-punc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/text/ernie_linear_p7_wudao-punc-zh.tar.gz',
            'md5':
            '12283e2ddde1797c5d1e57036b512746',
            'cfg_path':
            'ckpt/model_config.json',
            'ckpt_path':
            'ckpt/model_state.pdparams',
            'vocab_file':
            'punc_vocab.txt',
        }
    },
    "ernie_linear_p3_wudao-punc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/text/ernie_linear_p3_wudao-punc-zh.tar.gz',
            'md5':
            '448eb2fdf85b6a997e7e652e80c51dd2',
            'cfg_path':
            'ckpt/model_config.json',
            'ckpt_path':
            'ckpt/model_state.pdparams',
            'vocab_file':
            'punc_vocab.txt',
        }
    },
    "ernie_linear_p3_wudao_fast-punc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/text/ernie_linear_p3_wudao_fast-punc-zh.tar.gz',
            'md5':
            'c93f9594119541a5dbd763381a751d08',
            'cfg_path':
            'ckpt/model_config.json',
            'ckpt_path':
            'ckpt/model_state.pdparams',
            'vocab_file':
            'punc_vocab.txt',
        }
    }
}

# ---------------------------------
# -------------- TTS --------------
# ---------------------------------
tts_dynamic_pretrained_models = {
    # speedyspeech
    "speedyspeech_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_ckpt_0.2.0.zip',
            'md5':
            '6f6fa967b408454b6662c8c00c0027cb',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_30600.pdz',
            'speech_stats':
            'feats_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
            'tones_dict':
            'tone_id_map.txt',
        },
    },
    # fastspeech2
    "fastspeech2_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip',
            'md5':
            '637d28a5e53aa60275612ba4393d5f22',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_76000.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
        },
    },
    "fastspeech2_canton-canton": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_ckpt_1.4.0.zip',
            'md5':
            '504560c082deba82120927627c900374',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_140000.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
        },
    },
    "fastspeech2_ljspeech-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip',
            'md5':
            'ffed800c93deaf16ca9b3af89bfcd747',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_100000.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
        },
    },
    "fastspeech2_aishell3-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_ckpt_0.4.zip',
            'md5':
            'f4dd4a5f49a4552b77981f544ab3392e',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_96400.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
        },
    },
    "fastspeech2_vctk-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_vctk_ckpt_0.5.zip',
            'md5':
            '743e5024ca1e17a88c5c271db9779ba4',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_66200.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
        },
    },
    "fastspeech2_cnndecoder_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip',
            'md5':
            '6eb28e22ace73e0ebe7845f86478f89f',
            'config':
            'cnndecoder.yaml',
            'ckpt':
            'snapshot_iter_153000.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
        },
    },
    "fastspeech2_mix-mix": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen.zip',
            'md5':
            '77d9d4b5a79ed6203339ead7ef6c74f9',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_94000.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
        },
        '2.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_0.2.0.zip',
            'md5':
            '1d938e104e972386c8bfcbcc98a91587',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_99200.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
        },
    },
    "fastspeech2_male-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_ckpt_1.4.0.zip',
            'md5':
            '43a9f4bc48a91f5a6f53017474e6c788',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_76000.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
        },
    },
    "fastspeech2_male-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_ckpt_1.4.0.zip',
            'md5':
            'cc9f44f1f20a8173f63e2d1d41ef1a9c',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_100000.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
        },
    },
    "fastspeech2_male-mix": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_ckpt_1.4.0.zip',
            'md5':
            '6d48ad60ef0ab2cee89a5d8cfd93dd86',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_177000.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
        },
    },
    # tacotron2
    "tacotron2_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_csmsc_ckpt_0.2.0.zip',
            'md5':
            '0df4b6f0bcbe0d73c5ed6df8867ab91a',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_30600.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
        },
    },
    "tacotron2_ljspeech-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_ljspeech_ckpt_0.2.0.zip',
            'md5':
            '6a5eddd81ae0e81d16959b97481135f3',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_60300.pdz',
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
        },
    },
    # pwgan
    "pwgan_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_ckpt_0.4.zip',
            'md5':
            '2e481633325b5bdf0a3823c714d2c117',
            'config':
            'pwg_default.yaml',
            'ckpt':
            'pwg_snapshot_iter_400000.pdz',
            'speech_stats':
            'pwg_stats.npy',
        },
    },
    "pwgan_ljspeech-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_ljspeech_ckpt_0.5.zip',
            'md5':
            '53610ba9708fd3008ccaf8e99dacbaf0',
            'config':
            'pwg_default.yaml',
            'ckpt':
            'pwg_snapshot_iter_400000.pdz',
            'speech_stats':
            'pwg_stats.npy',
        },
    },
    "pwgan_aishell3-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip',
            'md5':
            'd7598fa41ad362d62f85ffc0f07e3d84',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_1000000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
    "pwgan_vctk-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_vctk_ckpt_0.1.1.zip',
            'md5':
            'b3da1defcde3e578be71eb284cb89f2c',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_1500000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
    "pwgan_male-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_male_ckpt_1.4.0.zip',
            'md5':
            'a443d6253bf9be377f27ae5972a03c65',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_200000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
    # mb_melgan
    "mb_melgan_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_ckpt_0.1.1.zip',
            'md5':
            'ee5f0604e20091f0d495b6ec4618b90d',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_1000000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
    # style_melgan
    "style_melgan_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip',
            'md5':
            '5de2d5348f396de0c966926b8c462755',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_1500000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
    # hifigan
    "hifigan_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip',
            'md5':
            'dd40a3d88dfcf64513fba2f0f961ada6',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_2500000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
    "hifigan_ljspeech-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_ckpt_0.2.0.zip',
            'md5':
            '70e9131695decbca06a65fe51ed38a72',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_2500000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
    "hifigan_aishell3-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip',
            'md5':
            '3bb49bc75032ed12f79c00c8cc79a09a',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_2500000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
    "hifigan_vctk-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip',
            'md5':
            '7da8f88359bca2457e705d924cf27bd4',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_2500000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
    "hifigan_male-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_ckpt_1.4.0.zip',
            'md5':
            'a709830596e102c2b83f8adc26d41d85',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_630000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
    # wavernn
    "wavernn_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip',
            'md5':
            'ee37b752f09bcba8f2af3b777ca38e13',
            'config':
            'default.yaml',
            'ckpt':
            'snapshot_iter_400000.pdz',
            'speech_stats':
            'feats_stats.npy',
        },
    },
}
tts_dynamic_pretrained_models[
    "fastspeech2_mix-zh"] = tts_dynamic_pretrained_models[
        "fastspeech2_mix-en"] = tts_dynamic_pretrained_models[
            "fastspeech2_mix-mix"]
tts_dynamic_pretrained_models["pwgan_male-en"] = tts_dynamic_pretrained_models[
    "pwgan_male-mix"] = tts_dynamic_pretrained_models["pwgan_male-zh"]
tts_dynamic_pretrained_models[
    "hifigan_male-en"] = tts_dynamic_pretrained_models[
        "hifigan_male-mix"] = tts_dynamic_pretrained_models["hifigan_male-zh"]

tts_static_pretrained_models = {
    # speedyspeech
    "speedyspeech_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_nosil_baker_static_0.5.zip',
            'md5':
            'f10cbdedf47dc7a9668d2264494e1823',
            'model':
            'speedyspeech_csmsc.pdmodel',
            'params':
            'speedyspeech_csmsc.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'tones_dict':
            'tone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    # fastspeech2
    "fastspeech2_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_static_0.4.zip',
            'md5':
            '9788cd9745e14c7a5d12d32670b2a5a7',
            'model':
            'fastspeech2_csmsc.pdmodel',
            'params':
            'fastspeech2_csmsc.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_ljspeech-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip',
            'md5':
            'c49f70b52973423ec45aaa6184fb5bc6',
            'model':
            'fastspeech2_ljspeech.pdmodel',
            'params':
            'fastspeech2_ljspeech.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            22050,
        },
    },
    "fastspeech2_aishell3-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip',
            'md5':
            '695af44679f48eb4abc159977ddaee16',
            'model':
            'fastspeech2_aishell3.pdmodel',
            'params':
            'fastspeech2_aishell3.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_vctk-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip',
            'md5':
            '92d8c082f180bda2fd05a534fb4a1b62',
            'model':
            'fastspeech2_vctk.pdmodel',
            'params':
            'fastspeech2_vctk.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_mix-mix": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen_static.zip',
            'md5':
            'b5001f66cccafdde07707e1b6269fa58',
            'model':
            'fastspeech2_mix.pdmodel',
            'params':
            'fastspeech2_mix.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
            'sample_rate':
            24000,
        },
        '2.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip',
            'md5':
            'c6dd138fab3ba261299c0b2efee51d5a',
            'model':
            'fastspeech2_mix.pdmodel',
            'params':
            'fastspeech2_mix.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_male-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_static_1.4.0.zip',
            'md5':
            '9b7218829e7fa01aa33dbb2c5f6ef20f',
            'model':
            'fastspeech2_male-zh.pdmodel',
            'params':
            'fastspeech2_male-zh.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_male-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_static_1.4.0.zip',
            'md5':
            '33cea19b6821b371d242969ffd8b6cbf',
            'model':
            'fastspeech2_male-en.pdmodel',
            'params':
            'fastspeech2_male-en.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_male-mix": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_static_1.4.0.zip',
            'md5':
            '66585b04c0ced72f3cb82ee85b814d80',
            'model':
            'fastspeech2_male-mix.pdmodel',
            'params':
            'fastspeech2_male-mix.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_canton-canton": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_static_1.4.0.zip',
            'md5':
            '5da80931666503b9b6aed25e894d2ade',
            'model':
            'fastspeech2_canton.pdmodel',
            'params':
            'fastspeech2_canton.pdiparams',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    # pwgan
    "pwgan_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwg_baker_static_0.4.zip',
            'md5':
            'e3504aed9c5a290be12d1347836d2742',
            'model':
            'pwgan_csmsc.pdmodel',
            'params':
            'pwgan_csmsc.pdiparams',
            'sample_rate':
            24000,
        },
    },
    "pwgan_ljspeech-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_static_1.1.0.zip',
            'md5':
            '6f457a069da99c6814ac1fb4677281e4',
            'model':
            'pwgan_ljspeech.pdmodel',
            'params':
            'pwgan_ljspeech.pdiparams',
            'sample_rate':
            22050,
        },
    },
    "pwgan_aishell3-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_static_1.1.0.zip',
            'md5':
            '199f64010238275fbdacb326a5cf82d1',
            'model':
            'pwgan_aishell3.pdmodel',
            'params':
            'pwgan_aishell3.pdiparams',
            'sample_rate':
            24000,
        },
    },
    "pwgan_vctk-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_static_1.1.0.zip',
            'md5':
            'ee0fc571ad5a7fbe4ca20e49df22b819',
            'model':
            'pwgan_vctk.pdmodel',
            'params':
            'pwgan_vctk.pdiparams',
            'sample_rate':
            24000,
        },
    },
    "pwgan_male-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_static_1.4.0.zip',
            'md5':
            '52a480ad35694b96603e0a92e9fb3f95',
            'model':
            'pwgan_male.pdmodel',
            'params':
            'pwgan_male.pdiparams',
            'sample_rate':
            24000,
        },
    },
    # mb_melgan
    "mb_melgan_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_static_0.1.1.zip',
            'md5':
            'ac6eee94ba483421d750433f4c3b8d36',
            'model':
            'mb_melgan_csmsc.pdmodel',
            'params':
            'mb_melgan_csmsc.pdiparams',
            'sample_rate':
            24000,
        },
    },
    # hifigan
    "hifigan_csmsc-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip',
            'md5':
            '7edd8c436b3a5546b3a7cb8cff9d5a0c',
            'model':
            'hifigan_csmsc.pdmodel',
            'params':
            'hifigan_csmsc.pdiparams',
            'sample_rate':
            24000,
        },
    },
    "hifigan_ljspeech-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_static_1.1.0.zip',
            'md5':
            '8c674e79be7c45f6eda74825316438a0',
            'model':
            'hifigan_ljspeech.pdmodel',
            'params':
            'hifigan_ljspeech.pdiparams',
            'sample_rate':
            22050,
        },
    },
    "hifigan_aishell3-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_static_1.1.0.zip',
            'md5':
            '7a10ec5d8d851e2000128f040d30cc01',
            'model':
            'hifigan_aishell3.pdmodel',
            'params':
            'hifigan_aishell3.pdiparams',
            'sample_rate':
            24000,
        },
    },
    "hifigan_vctk-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_static_1.1.0.zip',
            'md5':
            '130f791dfac84ccdd44ccbdfb67bf08e',
            'model':
            'hifigan_vctk.pdmodel',
            'params':
            'hifigan_vctk.pdiparams',
            'sample_rate':
            24000,
        },
    },
    "hifigan_male-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_static_1.4.0.zip',
            'md5':
            '9011fa2738b501e909d1a61054bed29b',
            'model':
            'hifigan_male.pdmodel',
            'params':
            'hifigan_male.pdiparams',
            'sample_rate':
            24000,
        },
    },
}

tts_static_pretrained_models[
    "fastspeech2_mix-zh"] = tts_static_pretrained_models[
        "fastspeech2_mix-en"] = tts_static_pretrained_models[
            "fastspeech2_mix-mix"]
tts_static_pretrained_models["pwgan_male-en"] = tts_static_pretrained_models[
    "pwgan_male-mix"] = tts_static_pretrained_models["pwgan_male-zh"]
tts_static_pretrained_models["hifigan_male-en"] = tts_static_pretrained_models[
    "hifigan_male-mix"] = tts_static_pretrained_models["hifigan_male-zh"]
tts_static_pretrained_models[
    "pwgan_aishell3-canton"] = tts_static_pretrained_models["pwgan_aishell3-zh"]

tts_onnx_pretrained_models = {
    # speedyspeech
    "speedyspeech_csmsc_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip',
            'md5':
            '3e9c45af9ef70675fc1968ed5074fc88',
            'ckpt':
            'speedyspeech_csmsc.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'tones_dict':
            'tone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    # fastspeech2
    "fastspeech2_csmsc_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip',
            'md5':
            'fd3ad38d83273ad51f0ea4f4abf3ab4e',
            'ckpt':
            'fastspeech2_csmsc.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_ljspeech_onnx-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip',
            'md5':
            '00754307636a48c972a5f3e65cda3d18',
            'ckpt':
            'fastspeech2_ljspeech.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            22050,
        },
    },
    "fastspeech2_aishell3_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip',
            'md5':
            'a1d6ee21de897ce394f5469e2bb4df0d',
            'ckpt':
            'fastspeech2_aishell3.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_vctk_onnx-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip',
            'md5':
            'd9c3a9b02204a2070504dd99f5f959bf',
            'ckpt':
            'fastspeech2_vctk.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_cnndecoder_csmsc_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip',
            'md5':
            '5f70e1a6bcd29d72d54e7931aa86f266',
            'ckpt': [
                'fastspeech2_csmsc_am_encoder_infer.onnx',
                'fastspeech2_csmsc_am_decoder.onnx',
                'fastspeech2_csmsc_am_postnet.onnx',
            ],
            'speech_stats':
            'speech_stats.npy',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_mix_onnx-mix": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen_onnx.zip',
            'md5':
            '73052520202957920cf54700980933d0',
            'ckpt':
            'fastspeech2_mix.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
            'sample_rate':
            24000,
        },
        '2.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip',
            'md5':
            '43b8ca5f85709c503777f808eb02a39e',
            'ckpt':
            'fastspeech2_mix.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_male_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_zh_onnx_1.4.0.zip',
            'md5':
            '46c66f5ab86f4fcb493d899d9901c863',
            'ckpt':
            'fastspeech2_male-zh.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_male_onnx-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_en_onnx_1.4.0.zip',
            'md5':
            '401fb5cc31fdb25e22e901c9acba79c8',
            'ckpt':
            'fastspeech2_male-en.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_male_onnx-mix": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_male_mix_onnx_1.4.0.zip',
            'md5':
            '07e51c5991c529b78603034547e9d0fa',
            'ckpt':
            'fastspeech2_male-mix.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    "fastspeech2_canton_onnx-canton": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_canton_onnx_1.4.0.zip',
            'md5':
            '1c8d51ceb2f9bdd168e23be575c2ccf8',
            'ckpt':
            'fastspeech2_canton.onnx',
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
            'speaker_id_map.txt',
            'sample_rate':
            24000,
        },
    },
    # pwgan
    "pwgan_csmsc_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_csmsc_onnx_0.2.0.zip',
            'md5':
            '711d0ade33e73f3b721efc9f20669f9c',
            'ckpt':
            'pwgan_csmsc.onnx',
            'sample_rate':
            24000,
        },
    },
    "pwgan_ljspeech_onnx-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_ljspeech_onnx_1.1.0.zip',
            'md5':
            '73cdeeccb77f2ea6ed4d07e71d8ac8b8',
            'ckpt':
            'pwgan_ljspeech.onnx',
            'sample_rate':
            22050,
        },
    },
    "pwgan_aishell3_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_aishell3_onnx_1.1.0.zip',
            'md5':
            '096ab64e152a4fa476aff79ebdadb01b',
            'ckpt':
            'pwgan_aishell3.onnx',
            'sample_rate':
            24000,
        },
    },
    "pwgan_vctk_onnx-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_vctk_onnx_1.1.0.zip',
            'md5':
            '4e754d42cf85f6428f0af887c923d86c',
            'ckpt':
            'pwgan_vctk.onnx',
            'sample_rate':
            24000,
        },
    },
    "pwgan_male_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/pwgan/pwgan_male_onnx_1.4.0.zip',
            'md5':
            '13163fd1326f555650dc7141d31767c3',
            'ckpt':
            'pwgan_male.onnx',
            'sample_rate':
            24000,
        },
    },
    # mb_melgan
    "mb_melgan_csmsc_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip',
            'md5':
            '5b83ec746e8414bc29032d954ffd07ec',
            'ckpt':
            'mb_melgan_csmsc.onnx',
            'sample_rate':
            24000,
        },
    },
    # hifigan
    "hifigan_csmsc_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_onnx_0.2.0.zip',
            'md5':
            '1a7dc0385875889e46952e50c0994a6b',
            'ckpt':
            'hifigan_csmsc.onnx',
            'sample_rate':
            24000,
        },
    },
    "hifigan_ljspeech_onnx-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_ljspeech_onnx_1.1.0.zip',
            'md5':
            '062f54b79c1135a50adb5fc8406260b2',
            'ckpt':
            'hifigan_ljspeech.onnx',
            'sample_rate':
            22050,
        },
    },
    "hifigan_aishell3_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_onnx_1.1.0.zip',
            'md5':
            'd6c0d684ad148583ca57837d5e870167',
            'ckpt':
            'hifigan_aishell3.onnx',
            'sample_rate':
            24000,
        },
    },
    "hifigan_vctk_onnx-en": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_onnx_1.1.0.zip',
            'md5':
            'fd714df3be283c0efbefc8510160ff6d',
            'ckpt':
            'hifigan_vctk.onnx',
            'sample_rate':
            24000,
        },
    },
    "hifigan_male_onnx-zh": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/hifigan/hifigan_male_onnx_1.4.0.zip',
            'md5':
            'ec6b35417b1fe811d3b1641d4b527769',
            'ckpt':
            'hifigan_male.onnx',
            'sample_rate':
            24000,
        },
    },
}

tts_onnx_pretrained_models[
    "fastspeech2_mix_onnx-zh"] = tts_onnx_pretrained_models[
        "fastspeech2_mix_onnx-en"] = tts_onnx_pretrained_models[
            "fastspeech2_mix_onnx-mix"]
tts_onnx_pretrained_models["pwgan_male_onnx-en"] = tts_onnx_pretrained_models[
    "pwgan_male_onnx-mix"] = tts_onnx_pretrained_models["pwgan_male_onnx-zh"]
tts_onnx_pretrained_models["hifigan_male_onnx-en"] = tts_onnx_pretrained_models[
    "hifigan_male_onnx-mix"] = tts_onnx_pretrained_models[
        "hifigan_male_onnx-zh"]
tts_onnx_pretrained_models[
    "pwgan_aishell3_onnx-canton"] = tts_onnx_pretrained_models[
        "pwgan_aishell3_onnx-zh"]

# ---------------------------------
# ------------ Vector -------------
# ---------------------------------
vector_dynamic_pretrained_models = {
    "ecapatdnn_voxceleb12-16k": {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/vector/voxceleb/sv0_ecapa_tdnn_voxceleb12_ckpt_0_2_0.tar.gz',
            'md5':
            'cc33023c54ab346cd318408f43fcaf95',
            'cfg_path':
            'conf/model.yaml',  # the yaml config path
            'ckpt_path':
            'model/model',  # the format is ${dir}/{model_name},
            # so the first 'model' is dir, the second 'model' is the name
            # this means we have a model stored as model/model.pdparams
        },
    },
}

# ---------------------------------
# ------------- KWS ---------------
# ---------------------------------
kws_dynamic_pretrained_models = {
    'mdtc_heysnips-16k': {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/kws/heysnips/kws0_mdtc_heysnips_ckpt.tar.gz',
            'md5':
            'c0de0a9520d66c3c8d6679460893578f',
            'cfg_path':
            'conf/mdtc.yaml',
            'ckpt_path':
            'ckpt/model',
        },
    },
}

# ---------------------------------
# ------------- G2PW ---------------
# ---------------------------------
g2pw_onnx_models = {
    'G2PWModel': {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/G2PWModel_1.0.zip',
            'md5':
            '7e049a55547da840502cf99e8a64f20e',
        },
        '1.1': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/g2p/new/G2PWModel_1.1.zip',
            'md5':
            'f8b60501770bff92ed6ce90860a610e6',
        },
    },
}

# ---------------------------------
# ---------- Rhy_frontend ---------
# ---------------------------------
rhy_frontend_models = {
    'rhy_e2e': {
        '1.0': {
            'url':
            'https://paddlespeech.cdn.bcebos.com/Rhy_e2e/rhy_frontend.zip',
            'md5': '6624a77393de5925d5a84400b363d8ef',
        },
    },
}

# ---------------------------------
# ---------- StarGANv2VC ----------
# ---------------------------------

StarGANv2VC_source = {
    '1.0': {
        'url':
        'https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/starganv2vc/StarGANv2VC_source.zip',
        'md5':
        '195e169419163f5648030ba84c71f866',
    }
}


================================================
FILE: paddlespeech/resource/resource.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from collections import OrderedDict
from typing import Dict
from typing import List
from typing import Optional

from ..cli.utils import download_and_decompress
from ..utils.dynamic_import import dynamic_import
from ..utils.env import MODEL_HOME
from .model_alias import model_alias

task_supported = [
    'asr', 'cls', 'st', 'text', 'tts', 'vector', 'kws', 'ssl', 'whisper'
]
model_format_supported = ['dynamic', 'static', 'onnx']
inference_mode_supported = ['online', 'offline']


class CommonTaskResource:
    def __init__(self, task: str, model_format: str='dynamic', **kwargs):
        assert task in task_supported, 'Arg "task" must be one of {}.'.format(
            task_supported)
        assert model_format in model_format_supported, 'Arg "model_format" must be one of {}.'.format(
            model_format_supported)

        self.task = task
        self.model_format = model_format
        self.pretrained_models = self._get_pretrained_models()

        if 'inference_mode' in kwargs:
            assert kwargs[
                'inference_mode'] in inference_mode_supported, 'Arg "inference_mode" must be one of {}.'.format(
                    inference_mode_supported)
            self._inference_mode_filter(kwargs['inference_mode'])

        # Initialize after model and version had been set.
        self.model_tag = None
        self.version = None
        self.res_dict = None
        self.res_dir = None

        if self.task == 'tts':
            # For vocoder
            self.voc_model_tag = None
            self.voc_version = None
            self.voc_res_dict = None
            self.voc_res_dir = None

    def set_task_model(self,
                       model_tag: str,
                       model_type: int=0,
                       skip_download: bool=False,
                       version: Optional[str]=None):
        """Set model tag and version of current task.

        Args:
            model_tag (str): Model tag.
            model_type (int): 0 for acoustic model otherwise vocoder in tts task.
            version (Optional[str], optional): Version of pretrained model. Defaults to None.
        """
        assert model_tag in self.pretrained_models, \
            "Can't find \"{}\" in resource. Model name must be one of {}".format(model_tag, list(self.pretrained_models.keys()))

        if version is None:
            version = self._get_default_version(model_tag)

        assert version in self.pretrained_models[model_tag], \
            "Can't find version \"{}\" in \"{}\". Model name must be one of {}".format(
                version, model_tag, list(self.pretrained_models[model_tag].keys()))

        if model_type == 0:
            self.model_tag = model_tag
            self.version = version
            self.res_dict = self.pretrained_models[model_tag][version]
            self._format_path(self.res_dict)
            if not skip_download:
                self.res_dir = self._fetch(self.res_dict,
                                           self._get_model_dir(model_type))
        else:
            assert self.task == 'tts', 'Vocoder will only be used in tts task.'
            self.voc_model_tag = model_tag
            self.voc_version = version
            self.voc_res_dict = self.pretrained_models[model_tag][version]
            self._format_path(self.voc_res_dict)
            if not skip_download:
                self.voc_res_dir = self._fetch(self.voc_res_dict,
                                               self._get_model_dir(model_type))

    @staticmethod
    def get_model_class(model_name) -> List[object]:
        """Dynamic import model class.
        Args:
            model_name (str): Model name.

        Returns:
            List[object]: Return a list of model class.
        """
        assert model_name in model_alias, 'No model classes found for "{}"'.format(
            model_name)
        ret = []
        for import_path in model_alias[model_name]:
            ret.append(dynamic_import(import_path))

        if len(ret) == 1:
            return ret[0]
        else:
            return ret

    def get_versions(self, model_tag: str) -> List[str]:
        """List all available versions.

        Args:
            model_tag (str): Model tag.

        Returns:
            List[str]: Version list of model.
        """
        return list(self.pretrained_models[model_tag].keys())

    def _get_default_version(self, model_tag: str) -> str:
        """Get default version of model.

        Args:
            model_tag (str): Model tag.

        Returns:
            str: Default version.
        """
        return self.get_versions(model_tag)[-1]  # get latest version

    def _get_model_dir(self, model_type: int=0) -> os.PathLike:
        """Get resource directory.

        Args:
            model_type (int): 0 for acoustic model otherwise vocoder in tts task.

        Returns:
            os.PathLike: Directory of model resource.
        """
        if model_type == 0:
            model_tag = self.model_tag
            version = self.version
        else:
            model_tag = self.voc_model_tag
            version = self.voc_version

        return os.path.join(MODEL_HOME, model_tag, version)

    def _get_pretrained_models(self) -> Dict[str, str]:
        """Get all available models for current task.

        Returns:
            Dict[str, str]: A dictionary with model tag and resources info.
        """
        try:
            import_models = '{}_{}_pretrained_models'.format(self.task,
                                                             self.model_format)
            exec('from .pretrained_models import {}'.format(import_models))
            models = OrderedDict(locals()[import_models])
        except Exception as e:
            models = OrderedDict({})  # no models.
        finally:
            return models

    def _inference_mode_filter(self, inference_mode: Optional[str]):
        """Filter models dict based on inference_mode.

        Args:
            inference_mode (Optional[str]): 'online', 'offline' or None.
        """
        if inference_mode is None:
            return

        if self.task == 'asr':
            online_flags = [
                'online' in model_tag
                for model_tag in self.pretrained_models.keys()
            ]
            for online_flag, model_tag in zip(
                    online_flags, list(self.pretrained_models.keys())):
                if inference_mode == 'online' and online_flag:
                    continue
                elif inference_mode == 'offline' and not online_flag:
                    continue
                else:
                    del self.pretrained_models[model_tag]
        elif self.task == 'tts':
            # Hardcode for tts online models.
            tts_online_models = [
                'fastspeech2_csmsc-zh', 'fastspeech2_cnndecoder_csmsc-zh',
                'mb_melgan_csmsc-zh', 'hifigan_csmsc-zh'
            ]
            for model_tag in list(self.pretrained_models.keys()):
                if inference_mode == 'online' and model_tag in tts_online_models:
                    continue
                elif inference_mode == 'offline':
                    continue
                else:
                    del self.pretrained_models[model_tag]
        else:
            raise NotImplementedError('Only supports asr and tts task.')

    @staticmethod
    def _fetch(res_dict: Dict[str, str],
               target_dir: os.PathLike) -> os.PathLike:
        """Fetch archive from url.

        Args:
            res_dict (Dict[str, str]): Info dict of a resource.
            target_dir (os.PathLike): Directory to save archives.

        Returns:
            os.PathLike: Directory of model resource.
        """
        return download_and_decompress(res_dict, target_dir)

    @staticmethod
    def _format_path(res_dict: Dict[str, str]):
        for k, v in res_dict.items():
            if isinstance(v, str) and '/' in v:
                if v.startswith('https://') or v.startswith('http://'):
                    continue
                else:
                    res_dict[k] = os.path.join(*(v.split('/')))


================================================
FILE: paddlespeech/s2t/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from typing import List
from typing import Tuple
from typing import Union

import paddle
from paddle import nn
from paddle.nn import functional as F

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

########### hack logging #############
logger.warn = logger.warning

########### hack paddle #############
paddle.half = 'float16'
paddle.float = 'float32'
paddle.double = 'float64'
paddle.short = 'int16'
paddle.int = 'int32'
paddle.long = 'int64'
paddle.uint16 = 'uint16'
paddle.cdouble = 'complex128'

if not hasattr(paddle, 'sigmoid'):
    logger.debug("register user sigmoid to paddle, remove this when fixed!")
    setattr(paddle, 'sigmoid', paddle.nn.functional.sigmoid)

if not hasattr(paddle, 'log_sigmoid'):
    logger.debug("register user log_sigmoid to paddle, remove this when fixed!")
    setattr(paddle, 'log_sigmoid', paddle.nn.functional.log_sigmoid)

if not hasattr(paddle, 'relu'):
    logger.debug("register user relu to paddle, remove this when fixed!")
    setattr(paddle, 'relu', paddle.nn.functional.relu)


def cat(xs, dim=0):
    return paddle.concat(xs, axis=dim)


if not hasattr(paddle, 'cat'):
    logger.debug(
        "override cat of paddle if exists or register, remove this when fixed!")
    paddle.cat = cat


########### hack paddle.Tensor #############
def item(x: paddle.Tensor):
    return x.numpy().item()


if not hasattr(paddle.Tensor, 'item'):
    logger.debug(
        "override item of paddle.Tensor if exists or register, remove this when fixed!"
    )
    paddle.Tensor.item = item


def func_long(x: paddle.Tensor):
    return paddle.cast(x, paddle.long)


if not hasattr(paddle.Tensor, 'long'):
    logger.debug(
        "override long of paddle.Tensor if exists or register, remove this when fixed!"
    )
    paddle.Tensor.long = func_long
    paddle.static.Variable.long = func_long

if not hasattr(paddle.Tensor, 'numel'):
    logger.debug(
        "override numel of paddle.Tensor if exists or register, remove this when fixed!"
    )
    paddle.Tensor.numel = paddle.numel
    paddle.static.Variable.numel = paddle.numel


def new_full(x: paddle.Tensor,
             size: Union[List[int], Tuple[int], paddle.Tensor],
             fill_value: Union[float, int, bool, paddle.Tensor],
             dtype=None):
    return paddle.full(size, fill_value, dtype=x.dtype)


if not hasattr(paddle.Tensor, 'new_full'):
    logger.debug(
        "override new_full of paddle.Tensor if exists or register, remove this when fixed!"
    )
    paddle.Tensor.new_full = new_full
    paddle.static.Variable.new_full = new_full


def contiguous(xs: paddle.Tensor) -> paddle.Tensor:
    return xs


if not hasattr(paddle.Tensor, 'contiguous'):
    logger.debug(
        "override contiguous of paddle.Tensor if exists or register, remove this when fixed!"
    )
    paddle.Tensor.contiguous = contiguous
    paddle.static.Variable.contiguous = contiguous


def view(xs: paddle.Tensor, *args: int) -> paddle.Tensor:
    return xs.reshape(args)


if not hasattr(paddle.Tensor, 'view'):
    logger.debug("register user view to paddle.Tensor, remove this when fixed!")
    paddle.Tensor.view = view
    paddle.static.Variable.view = view


def view_as(xs: paddle.Tensor, ys: paddle.Tensor) -> paddle.Tensor:
    return xs.reshape(paddle.shape(ys))


if not hasattr(paddle.Tensor, 'view_as'):
    logger.debug(
        "register user view_as to paddle.Tensor, remove this when fixed!")
    paddle.Tensor.view_as = view_as
    paddle.static.Variable.view_as = view_as


def is_broadcastable(shp1, shp2):
    for a, b in zip(shp1[::-1], shp2[::-1]):
        if a == 1 or b == 1 or a == b:
            pass
        else:
            return False
    return True


def broadcast_shape(shp1, shp2):
    result = []
    for a, b in zip(shp1[::-1], shp2[::-1]):
        result.append(max(a, b))
    return result[::-1]


def masked_fill(xs: paddle.Tensor,
                mask: paddle.Tensor,
                value: Union[float, int]):
    # will be nan when value is `inf`.
    # mask = mask.astype(xs.dtype)
    # return xs * (1.0 - mask) + mask * value

    bshape = broadcast_shape(xs.shape, mask.shape)
    mask.stop_gradient = True
    # tmp = paddle.ones(shape=[len(bshape)], dtype='int32')
    # for index in range(len(bshape)):
    #     tmp[index] = bshape[index]
    mask = mask.broadcast_to(bshape)
    trues = paddle.full_like(xs, fill_value=value)
    xs = paddle.where(mask, trues, xs)
    return xs


if not hasattr(paddle.Tensor, 'masked_fill'):
    logger.debug(
        "register user masked_fill to paddle.Tensor, remove this when fixed!")
    paddle.Tensor.masked_fill = masked_fill
    paddle.static.Variable.masked_fill = masked_fill


def masked_fill_(xs: paddle.Tensor,
                 mask: paddle.Tensor,
                 value: Union[float, int]) -> paddle.Tensor:
    assert is_broadcastable(xs.shape, mask.shape) is True
    bshape = paddle.broadcast_shape(xs.shape, mask.shape)
    mask = mask.broadcast_to(bshape)
    trues = paddle.ones_like(xs) * value
    ret = paddle.where(mask, trues, xs)
    paddle.assign(ret.detach(), output=xs)
    return xs


if not hasattr(paddle.Tensor, 'masked_fill_'):
    logger.debug(
        "register user masked_fill_ to paddle.Tensor, remove this when fixed!")
    paddle.Tensor.masked_fill_ = masked_fill_
    paddle.static.Variable.maksed_fill_ = masked_fill_


def fill_(xs: paddle.Tensor, value: Union[float, int]) -> paddle.Tensor:
    val = paddle.full_like(xs, value)
    paddle.assign(val.detach(), output=xs)
    return xs


if not hasattr(paddle.Tensor, 'fill_'):
    logger.debug(
        "register user fill_ to paddle.Tensor, remove this when fixed!")
    paddle.Tensor.fill_ = fill_
    paddle.static.Variable.fill_ = fill_


def repeat(xs: paddle.Tensor, *size: Any) -> paddle.Tensor:
    return paddle.tile(xs, size)


if not hasattr(paddle.Tensor, 'repeat'):
    logger.debug(
        "register user repeat to paddle.Tensor, remove this when fixed!")
    paddle.Tensor.repeat = repeat
    paddle.static.Variable.repeat = repeat

if not hasattr(paddle.Tensor, 'softmax'):
    logger.debug(
        "register user softmax to paddle.Tensor, remove this when fixed!")
    setattr(paddle.Tensor, 'softmax', paddle.nn.functional.softmax)

if not hasattr(paddle.Tensor, 'sigmoid'):
    logger.debug(
        "register user sigmoid to paddle.Tensor, remove this when fixed!")
    setattr(paddle.Tensor, 'sigmoid', paddle.nn.functional.sigmoid)

if not hasattr(paddle.Tensor, 'relu'):
    logger.debug("register user relu to paddle.Tensor, remove this when fixed!")
    setattr(paddle.Tensor, 'relu', paddle.nn.functional.relu)


def type_as(x: paddle.Tensor, other: paddle.Tensor) -> paddle.Tensor:
    return x.astype(other.dtype)


if not hasattr(paddle.Tensor, 'type_as'):
    logger.debug(
        "register user type_as to paddle.Tensor, remove this when fixed!")
    setattr(paddle.Tensor, 'type_as', type_as)
    setattr(paddle.static.Variable, 'type_as', type_as)


def to(x: paddle.Tensor, *args, **kwargs) -> paddle.Tensor:
    assert len(args) == 1
    if isinstance(args[0], str):  # dtype
        return x.astype(args[0])
    elif isinstance(args[0], paddle.Tensor):  # Tensor
        return x.astype(args[0].dtype)
    else:  # Device
        return x


if not hasattr(paddle.Tensor, 'to'):
    logger.debug("register user to paddle.Tensor, remove this when fixed!")
    setattr(paddle.Tensor, 'to', to)
    setattr(paddle.static.Variable, 'to', to)


def func_float(x: paddle.Tensor) -> paddle.Tensor:
    return x.astype(paddle.float)


if not hasattr(paddle.Tensor, 'float'):
    logger.debug(
        "register user float to paddle.Tensor, remove this when fixed!")
    setattr(paddle.Tensor, 'float', func_float)
    setattr(paddle.static.Variable, 'float', func_float)


def func_int(x: paddle.Tensor) -> paddle.Tensor:
    return x.astype(paddle.int)


if not hasattr(paddle.Tensor, 'int'):
    logger.debug("register user int to paddle.Tensor, remove this when fixed!")
    setattr(paddle.Tensor, 'int', func_int)
    setattr(paddle.static.Variable, 'int', func_int)


def tolist(x: paddle.Tensor) -> List[Any]:
    return x.numpy().tolist()


if not hasattr(paddle.Tensor, 'tolist'):
    logger.debug(
        "register user tolist to paddle.Tensor, remove this when fixed!")
    setattr(paddle.Tensor, 'tolist', tolist)
    setattr(paddle.static.Variable, 'tolist', tolist)

########### hack paddle.nn #############
from paddle.nn import Layer
from typing import Optional
from typing import Mapping
from typing import Iterable
from typing import Tuple
from typing import Iterator
from collections import OrderedDict, abc as container_abcs


class LayerDict(paddle.nn.Layer):
    r"""Holds submodules in a dictionary.

    :class:`~paddle.nn.LayerDict` can be indexed like a regular Python dictionary,
    but modules it contains are properly registered, and will be visible by all
    :class:`~paddle.nn.Layer` methods.

    :class:`~paddle.nn.LayerDict` is an **ordered** dictionary that respects

    * the order of insertion, and

    * in :meth:`~paddle.nn.LayerDict.update`, the order of the merged
      ``OrderedDict``, ``dict`` (started from Python 3.6) or another
      :class:`~paddle.nn.LayerDict` (the argument to
      :meth:`~paddle.nn.LayerDict.update`).

    Note that :meth:`~paddle.nn.LayerDict.update` with other unordered mapping
    types (e.g., Python's plain ``dict`` before Python version 3.6) does not
    preserve the order of the merged mapping.

    Args:
        modules (iterable, optional): a mapping (dictionary) of (string: module)
            or an iterable of key-value pairs of type (string, module)

    Example::

        class MyModule(nn.Layer):
            def __init__(self):
                super(MyModule, self).__init__()
                self.choices = nn.LayerDict({
                        'conv': nn.Conv2d(10, 10, 3),
                        'pool': nn.MaxPool2d(3)
                })
                self.activations = nn.LayerDict([
                        ['lrelu', nn.LeakyReLU()],
                        ['prelu', nn.PReLU()]
                ])

            def forward(self, x, choice, act):
                x = self.choices[choice](x)
                x = self.activations[act](x)
                return x
    """

    def __init__(self, modules: Optional[Mapping[str, Layer]]=None) -> None:
        super(LayerDict, self).__init__()
        if modules is not None:
            self.update(modules)

    def __getitem__(self, key: str) -> Layer:
        return self._modules[key]

    def __setitem__(self, key: str, module: Layer) -> None:
        self.add_module(key, module)

    def __delitem__(self, key: str) -> None:
        del self._modules[key]

    def __len__(self) -> int:
        return len(self._modules)

    def __iter__(self) -> Iterator[str]:
        return iter(self._modules)

    def __contains__(self, key: str) -> bool:
        return key in self._modules

    def clear(self) -> None:
        """Remove all items from the LayerDict.
        """
        self._modules.clear()

    def pop(self, key: str) -> Layer:
        r"""Remove key from the LayerDict and return its module.

        Args:
            key (string): key to pop from the LayerDict
        """
        v = self[key]
        del self[key]
        return v

    def keys(self) -> Iterable[str]:
        r"""Return an iterable of the LayerDict keys.
        """
        return self._modules.keys()

    def items(self) -> Iterable[Tuple[str, Layer]]:
        r"""Return an iterable of the LayerDict key/value pairs.
        """
        return self._modules.items()

    def values(self) -> Iterable[Layer]:
        r"""Return an iterable of the LayerDict values.
        """
        return self._modules.values()

    def update(self, modules: Mapping[str, Layer]) -> None:
        r"""Update the :class:`~paddle.nn.LayerDict` with the key-value pairs from a
        mapping or an iterable, overwriting existing keys.

        .. note::
            If :attr:`modules` is an ``OrderedDict``, a :class:`~paddle.nn.LayerDict`, or
            an iterable of key-value pairs, the order of new elements in it is preserved.

        Args:
            modules (iterable): a mapping (dictionary) from string to :class:`~paddle.nn.Layer`,
                or an iterable of key-value pairs of type (string, :class:`~paddle.nn.Layer`)
        """
        if not isinstance(modules, container_abcs.Iterable):
            raise TypeError("LayerDict.update should be called with an "
                            "iterable of key/value pairs, but got " + type(
                                modules).__name__)

        if isinstance(modules,
                      (OrderedDict, LayerDict, container_abcs.Mapping)):
            for key, module in modules.items():
                self[key] = module
        else:
            # modules here can be a list with two items
            for j, m in enumerate(modules):
                if not isinstance(m, container_abcs.Iterable):
                    raise TypeError("LayerDict update sequence element "
                                    "#" + str(j) + " should be Iterable; is" +
                                    type(m).__name__)
                if not len(m) == 2:
                    raise ValueError("LayerDict update sequence element "
                                     "#" + str(j) + " has length " + str(
                                         len(m)) + "; 2 is required")
                # modules can be Mapping (what it's typed at), or a list: [(name1, module1), (name2, module2)]
                # that's too cumbersome to type correctly with overloads, so we add an ignore here
                self[m[0]] = m[1]  # type: ignore[assignment]

    # remove forward alltogether to fallback on Module's _forward_unimplemented


if not hasattr(paddle.nn, 'LayerDict'):
    logger.debug(
        "register user LayerDict to paddle.nn, remove this when fixed!")
    setattr(paddle.nn, 'LayerDict', LayerDict)


================================================
FILE: paddlespeech/s2t/decoders/README.md
================================================
# Decoders
we borrow a lot of code from Espnet Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)

## Reference
### CTC Prefix Beam Search
* [Sequence Modeling With CTC](https://distill.pub/2017/ctc/)
* [First-Pass Large Vocabulary Continuous Speech Recognition using Bi-Directional Recurrent DNNs](https://arxiv.org/pdf/1408.2873.pdf)

### CTC Prefix Score & Join CTC/ATT One-passing Decoding
* [Hybrid CTC/Attention Architecture for End-to-End Speech Recognition](http://www.ifp.illinois.edu/speech/speech_web_lg/slides/2019/watanabe_hybridCTCAttention_2017.pdf)
* [Vectorized Beam Search for CTC-Attention-based Speech Recognition](https://www.isca-speech.org/archive/pdfs/interspeech_2019/seki19b_interspeech.pdf)

### Streaming Join CTC/ATT Beam Search
* [STREAMING TRANSFORMER ASR WITH BLOCKWISE SYNCHRONOUS BEAM SEARCH](https://arxiv.org/abs/2006.14941)


================================================
FILE: paddlespeech/s2t/decoders/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/decoders/beam_search/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .batch_beam_search import BatchBeamSearch
from .beam_search import beam_search
from .beam_search import BeamSearch
from .beam_search import Hypothesis


================================================
FILE: paddlespeech/s2t/decoders/beam_search/batch_beam_search.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Reference espnet Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)


class BatchBeamSearch():
    pass


================================================
FILE: paddlespeech/s2t/decoders/beam_search/beam_search.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Beam search module."""
from itertools import chain
from typing import Any
from typing import Dict
from typing import List
from typing import NamedTuple
from typing import Tuple
from typing import Union

import paddle

from ..scorers.scorer_interface import PartialScorerInterface
from ..scorers.scorer_interface import ScorerInterface
from ..utils import end_detect
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()


class Hypothesis(NamedTuple):
    """Hypothesis data type."""

    yseq: paddle.Tensor  # (T,)
    score: Union[float, paddle.Tensor] = 0
    scores: Dict[str, Union[float, paddle.Tensor]] = dict()
    states: Dict[str, Any] = dict()

    def asdict(self) -> dict:
        """Convert data to JSON-friendly dict."""
        return self._replace(
            yseq=self.yseq.tolist(),
            score=float(self.score),
            scores={k: float(v)
                    for k, v in self.scores.items()}, )._asdict()


class BeamSearch(paddle.nn.Layer):
    """Beam search implementation."""

    def __init__(
            self,
            scorers: Dict[str, ScorerInterface],
            weights: Dict[str, float],
            beam_size: int,
            vocab_size: int,
            sos: int,
            eos: int,
            token_list: List[str]=None,
            pre_beam_ratio: float=1.5,
            pre_beam_score_key: str=None, ):
        """Initialize beam search.

        Args:
            scorers (dict[str, ScorerInterface]): Dict of decoder modules
                e.g., Decoder, CTCPrefixScorer, LM
                The scorer will be ignored if it is `None`
            weights (dict[str, float]): Dict of weights for each scorers
                The scorer will be ignored if its weight is 0
            beam_size (int): The number of hypotheses kept during search
            vocab_size (int): The number of vocabulary
            sos (int): Start of sequence id
            eos (int): End of sequence id
            token_list (list[str]): List of tokens for debug log
            pre_beam_score_key (str): key of scores to perform pre-beam search
            pre_beam_ratio (float): beam size in the pre-beam search
                will be `int(pre_beam_ratio * beam_size)`

        """
        super().__init__()
        # set scorers
        self.weights = weights
        self.scorers = dict()  # all = full + partial
        self.full_scorers = dict()  # full tokens
        self.part_scorers = dict()  # partial tokens
        # this module dict is required for recursive cast
        # `self.to(device, dtype)` in `recog.py`
        self.nn_dict = paddle.nn.LayerDict()  # nn.Layer
        for k, v in scorers.items():
            w = weights.get(k, 0)
            if w == 0 or v is None:
                continue
            assert isinstance(
                v, ScorerInterface
            ), f"{k} ({type(v)}) does not implement ScorerInterface"
            self.scorers[k] = v
            if isinstance(v, PartialScorerInterface):
                self.part_scorers[k] = v
            else:
                self.full_scorers[k] = v
            if isinstance(v, paddle.nn.Layer):
                self.nn_dict[k] = v

        # set configurations
        self.sos = sos
        self.eos = eos
        self.token_list = token_list
        # pre_beam_size > beam_size
        self.pre_beam_size = int(pre_beam_ratio * beam_size)
        self.beam_size = beam_size
        self.n_vocab = vocab_size
        if (pre_beam_score_key is not None and pre_beam_score_key != "full" and
                pre_beam_score_key not in self.full_scorers):
            raise KeyError(
                f"{pre_beam_score_key} is not found in {self.full_scorers}")
        # selected `key` scorer to do pre beam search
        self.pre_beam_score_key = pre_beam_score_key
        # do_pre_beam when need, valid and has part_scorers
        self.do_pre_beam = (self.pre_beam_score_key is not None and
                            self.pre_beam_size < self.n_vocab and
                            len(self.part_scorers) > 0)

    def init_hyp(self, x: paddle.Tensor) -> List[Hypothesis]:
        """Get an initial hypothesis data.

        Args:
            x (paddle.Tensor): The encoder output feature, (T, D)

        Returns:
            Hypothesis: The initial hypothesis.

        """
        init_states = dict()
        init_scores = dict()
        for k, d in self.scorers.items():
            init_states[k] = d.init_state(x)
            init_scores[k] = 0.0
        return [
            Hypothesis(
                yseq=paddle.to_tensor([self.sos], place=x.place),
                score=0.0,
                scores=init_scores,
                states=init_states, )
        ]

    @staticmethod
    def append_token(xs: paddle.Tensor,
                     x: Union[int, paddle.Tensor]) -> paddle.Tensor:
        """Append new token to prefix tokens.

        Args:
            xs (paddle.Tensor): The prefix token, (T,)
            x (int): The new token to append

        Returns:
            paddle.Tensor: (T+1,), New tensor contains: xs + [x] with xs.dtype and xs.device

        """
        x = paddle.to_tensor([x], dtype=xs.dtype) if isinstance(x, int) else x
        return paddle.concat((xs, x))

    def score_full(self, hyp: Hypothesis, x: paddle.Tensor
                   ) -> Tuple[Dict[str, paddle.Tensor], Dict[str, Any]]:
        """Score new hypothesis by `self.full_scorers`.

        Args:
            hyp (Hypothesis): Hypothesis with prefix tokens to score
            x (paddle.Tensor): Corresponding input feature, (T, D)

        Returns:
            Tuple[Dict[str, paddle.Tensor], Dict[str, Any]]: Tuple of
                score dict of `hyp` that has string keys of `self.full_scorers`
                and tensor score values of shape: `(self.n_vocab,)`,
                and state dict that has string keys
                and state values of `self.full_scorers`

        """
        scores = dict()
        states = dict()
        for k, d in self.full_scorers.items():
            # scores[k] shape (self.n_vocab,)
            scores[k], states[k] = d.score(hyp.yseq, hyp.states[k], x)
        return scores, states

    def score_partial(self,
                      hyp: Hypothesis,
                      ids: paddle.Tensor,
                      x: paddle.Tensor
                      ) -> Tuple[Dict[str, paddle.Tensor], Dict[str, Any]]:
        """Score new hypothesis by `self.part_scorers`.

        Args:
            hyp (Hypothesis): Hypothesis with prefix tokens to score
            ids (paddle.Tensor): 1D tensor of new partial tokens to score,
                len(ids) < n_vocab
            x (paddle.Tensor): Corresponding input feature, (T, D)

        Returns:
            Tuple[Dict[str, paddle.Tensor], Dict[str, Any]]: Tuple of
                score dict of `hyp` that has string keys of `self.part_scorers`
                and tensor score values of shape: `(len(ids),)`,
                and state dict that has string keys
                and state values of `self.part_scorers`

        """
        scores = dict()
        states = dict()
        for k, d in self.part_scorers.items():
            # scores[k] shape (len(ids),)
            scores[k], states[k] = d.score_partial(hyp.yseq, ids, hyp.states[k],
                                                   x)
        return scores, states

    def beam(self, weighted_scores: paddle.Tensor,
             ids: paddle.Tensor) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Compute topk full token ids and partial token ids.

        Args:
            weighted_scores (paddle.Tensor): The weighted sum scores for each tokens.
                Its shape is `(self.n_vocab,)`.
            ids (paddle.Tensor): The partial token ids(Global) to compute topk.

        Returns:
            Tuple[paddle.Tensor, paddle.Tensor]:
                The topk full token ids and partial token ids.
                Their shapes are `(self.beam_size,)`.
                i.e. (global ids, global relative local ids).

        """
        # no pre beam performed, `ids` equal to `weighted_scores`
        if paddle.shape(weighted_scores)[0] == paddle.shape(ids)[0]:
            top_ids = weighted_scores.topk(
                self.beam_size)[1]  # index in n_vocab
            return top_ids, top_ids

        # mask pruned in pre-beam not to select in topk
        tmp = weighted_scores[ids]
        weighted_scores[:] = -float("inf")
        weighted_scores[ids] = tmp
        # top_ids no equal to local_ids, since ids shape not same
        top_ids = weighted_scores.topk(self.beam_size)[1]  # index in n_vocab
        local_ids = weighted_scores[ids].topk(
            self.beam_size)[1]  # index in len(ids)
        return top_ids, local_ids

    @staticmethod
    def merge_scores(
            prev_scores: Dict[str, float],
            next_full_scores: Dict[str, paddle.Tensor],
            full_idx: int,
            next_part_scores: Dict[str, paddle.Tensor],
            part_idx: int, ) -> Dict[str, paddle.Tensor]:
        """Merge scores for new hypothesis.

        Args:
            prev_scores (Dict[str, float]):
                The previous hypothesis scores by `self.scorers`
            next_full_scores (Dict[str, paddle.Tensor]): scores by `self.full_scorers`
            full_idx (int): The next token id for `next_full_scores`
            next_part_scores (Dict[str, paddle.Tensor]):
                scores of partial tokens by `self.part_scorers`
            part_idx (int): The new token id for `next_part_scores`

        Returns:
            Dict[str, paddle.Tensor]: The new score dict.
                Its keys are names of `self.full_scorers` and `self.part_scorers`.
                Its values are scalar tensors by the scorers.

        """
        new_scores = dict()
        for k, v in next_full_scores.items():
            new_scores[k] = prev_scores[k] + v[full_idx]
        for k, v in next_part_scores.items():
            new_scores[k] = prev_scores[k] + v[part_idx]
        return new_scores

    def merge_states(self, states: Any, part_states: Any, part_idx: int) -> Any:
        """Merge states for new hypothesis.

        Args:
            states: states of `self.full_scorers`
            part_states: states of `self.part_scorers`
            part_idx (int): The new token id for `part_scores`

        Returns:
            Dict[str, paddle.Tensor]: The new score dict.
                Its keys are names of `self.full_scorers` and `self.part_scorers`.
                Its values are states of the scorers.

        """
        new_states = dict()
        for k, v in states.items():
            new_states[k] = v
        for k, d in self.part_scorers.items():
            new_states[k] = d.select_state(part_states[k], part_idx)
        return new_states

    def search(self, running_hyps: List[Hypothesis],
               x: paddle.Tensor) -> List[Hypothesis]:
        """Search new tokens for running hypotheses and encoded speech x.

        Args:
            running_hyps (List[Hypothesis]): Running hypotheses on beam
            x (paddle.Tensor): Encoded speech feature (T, D)

        Returns:
            List[Hypotheses]: Best sorted hypotheses

        """
        best_hyps = []
        part_ids = paddle.arange(self.n_vocab)  # no pre-beam
        for hyp in running_hyps:
            # scoring
            weighted_scores = paddle.zeros([self.n_vocab], dtype=x.dtype)
            scores, states = self.score_full(hyp, x)
            for k in self.full_scorers:
                weighted_scores += self.weights[k] * scores[k]
            # partial scoring
            if self.do_pre_beam:
                pre_beam_scores = (weighted_scores
                                   if self.pre_beam_score_key == "full" else
                                   scores[self.pre_beam_score_key])
                part_ids = paddle.topk(pre_beam_scores, self.pre_beam_size)[1]
            part_scores, part_states = self.score_partial(hyp, part_ids, x)
            for k in self.part_scorers:
                weighted_scores[part_ids] += self.weights[k] * part_scores[k]
            # add previous hyp score
            weighted_scores += hyp.score

            # update hyps
            for j, part_j in zip(*self.beam(weighted_scores, part_ids)):
                # `part_j` is `j` relative id in `part_scores`
                # will be (2 x beam at most)
                best_hyps.append(
                    Hypothesis(
                        score=weighted_scores[j],
                        yseq=self.append_token(hyp.yseq, j),
                        scores=self.merge_scores(hyp.scores, scores, j,
                                                 part_scores, part_j),
                        states=self.merge_states(states, part_states, part_j),
                    ))

            # sort and prune 2 x beam -> beam
            best_hyps = sorted(
                best_hyps, key=lambda x: x.score,
                reverse=True)[:min(len(best_hyps), self.beam_size)]
        return best_hyps

    def forward(self,
                x: paddle.Tensor,
                maxlenratio: float=0.0,
                minlenratio: float=0.0) -> List[Hypothesis]:
        """Perform beam search.

        Args:
            x (paddle.Tensor): Encoded speech feature (T, D)
            maxlenratio (float): Input length ratio to obtain max output length.
                If maxlenratio=0.0 (default), it uses a end-detect function
                    to automatically find maximum hypothesis lengths
                If maxlenratio<0.0, its absolute value is interpreted
                    as a constant max output length.
            minlenratio (float): Input length ratio to obtain min output length.

        Returns:
            list[Hypothesis]: N-best decoding results

        """
        # set length bounds
        if maxlenratio == 0:
            maxlen = paddle.shape(x)[0]
        elif maxlenratio < 0:
            maxlen = -1 * int(maxlenratio)
        else:
            maxlen = max(1, int(maxlenratio * paddle.shape(x)[0]))
        minlen = int(minlenratio * paddle.shape(x)[0])
        logger.info("decoder input length: " + str(paddle.shape(x)[0]))
        logger.info("max output length: " + str(maxlen))
        logger.info("min output length: " + str(minlen))

        # main loop of prefix search
        running_hyps = self.init_hyp(x)
        ended_hyps = []
        for i in range(maxlen):
            logger.debug("position " + str(i))
            best = self.search(running_hyps, x)
            # post process of one iteration
            running_hyps = self.post_process(i, maxlen, maxlenratio, best,
                                             ended_hyps)
            # end detection
            if maxlenratio == 0.0 and end_detect(
                [h.asdict() for h in ended_hyps], i):
                logger.info(f"end detected at {i}")
                break
            if len(running_hyps) == 0:
                logger.info("no hypothesis. Finish decoding.")
                break
            else:
                logger.debug(f"remained hypotheses: {len(running_hyps)}")

        nbest_hyps = sorted(ended_hyps, key=lambda x: x.score, reverse=True)
        # check the number of hypotheses reaching to eos
        if len(nbest_hyps) == 0:
            logger.warning("there is no N-best results, perform recognition "
                           "again with smaller minlenratio.")
            return ([] if minlenratio < 0.1 else
                    self.forward(x, maxlenratio, max(0.0, minlenratio - 0.1)))

        # report the best result
        best = nbest_hyps[0]
        for k, v in best.scores.items():
            logger.info(
                f"{float(v):6.2f} * {self.weights[k]:3} = {float(v) * self.weights[k]:6.2f} for {k}"
            )
        logger.info(f"total log probability: {float(best.score):.2f}")
        logger.info(
            f"normalized log probability: {float(best.score) / len(best.yseq):.2f}"
        )
        logger.info(f"total number of ended hypotheses: {len(nbest_hyps)}")
        if self.token_list is not None:
            # logger.info(
            #     "best hypo: "
            #     + "".join([self.token_list[x] for x in best.yseq[1:-1]])
            #     + "\n"
            # )
            logger.info("best hypo: " + "".join(
                [self.token_list[x] for x in best.yseq[1:]]) + "\n")
        return nbest_hyps

    def post_process(
            self,
            i: int,
            maxlen: int,
            maxlenratio: float,
            running_hyps: List[Hypothesis],
            ended_hyps: List[Hypothesis], ) -> List[Hypothesis]:
        """Perform post-processing of beam search iterations.

        Args:
            i (int): The length of hypothesis tokens.
            maxlen (int): The maximum length of tokens in beam search.
            maxlenratio (int): The maximum length ratio in beam search.
            running_hyps (List[Hypothesis]): The running hypotheses in beam search.
            ended_hyps (List[Hypothesis]): The ended hypotheses in beam search.

        Returns:
            List[Hypothesis]: The new running hypotheses.

        """
        logger.debug(f"the number of running hypotheses: {len(running_hyps)}")
        if self.token_list is not None:
            logger.debug("best hypo: " + "".join(
                [self.token_list[x] for x in running_hyps[0].yseq[1:]]))
        # add eos in the final loop to avoid that there are no ended hyps
        if i == maxlen - 1:
            logger.info("adding <eos> in the last position in the loop")
            running_hyps = [
                h._replace(yseq=self.append_token(h.yseq, self.eos))
                for h in running_hyps
            ]

        # add ended hypotheses to a final list, and removed them from current hypotheses
        # (this will be a problem, number of hyps < beam)
        remained_hyps = []
        for hyp in running_hyps:
            if hyp.yseq[-1] == self.eos:
                # e.g., Word LM needs to add final <eos> score
                for k, d in chain(self.full_scorers.items(),
                                  self.part_scorers.items()):
                    s = d.final_score(hyp.states[k])
                    hyp.scores[k] += s
                    hyp = hyp._replace(score=hyp.score + self.weights[k] * s)
                ended_hyps.append(hyp)
            else:
                remained_hyps.append(hyp)
        return remained_hyps


def beam_search(
        x: paddle.Tensor,
        sos: int,
        eos: int,
        beam_size: int,
        vocab_size: int,
        scorers: Dict[str, ScorerInterface],
        weights: Dict[str, float],
        token_list: List[str]=None,
        maxlenratio: float=0.0,
        minlenratio: float=0.0,
        pre_beam_ratio: float=1.5,
        pre_beam_score_key: str="full", ) -> list:
    """Perform beam search with scorers.

    Args:
        x (paddle.Tensor): Encoded speech feature (T, D)
        sos (int): Start of sequence id
        eos (int): End of sequence id
        beam_size (int): The number of hypotheses kept during search
        vocab_size (int): The number of vocabulary
        scorers (dict[str, ScorerInterface]): Dict of decoder modules
            e.g., Decoder, CTCPrefixScorer, LM
            The scorer will be ignored if it is `None`
        weights (dict[str, float]): Dict of weights for each scorers
            The scorer will be ignored if its weight is 0
        token_list (list[str]): List of tokens for debug log
        maxlenratio (float): Input length ratio to obtain max output length.
            If maxlenratio=0.0 (default), it uses a end-detect function
            to automatically find maximum hypothesis lengths
        minlenratio (float): Input length ratio to obtain min output length.
        pre_beam_score_key (str): key of scores to perform pre-beam search
        pre_beam_ratio (float): beam size in the pre-beam search
            will be `int(pre_beam_ratio * beam_size)`

    Returns:
        List[Dict]: N-best decoding results

    """
    ret = BeamSearch(
        scorers,
        weights,
        beam_size=beam_size,
        vocab_size=vocab_size,
        pre_beam_ratio=pre_beam_ratio,
        pre_beam_score_key=pre_beam_score_key,
        sos=sos,
        eos=eos,
        token_list=token_list, ).forward(
            x=x, maxlenratio=maxlenratio, minlenratio=minlenratio)
    return [h.asdict() for h in ret]


================================================
FILE: paddlespeech/s2t/decoders/ctcdecoder/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .swig_wrapper import ctc_beam_search_decoding
from .swig_wrapper import ctc_beam_search_decoding_batch
from .swig_wrapper import ctc_greedy_decoding
from .swig_wrapper import CTCBeamSearchDecoder
from .swig_wrapper import Scorer


================================================
FILE: paddlespeech/s2t/decoders/ctcdecoder/decoders_deprecated.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains various CTC decoders."""
import multiprocessing
from itertools import groupby
from math import log

import numpy as np


def ctc_greedy_decoder(probs_seq, vocabulary):
    """CTC greedy (best path) decoder.

    Path consisting of the most probable tokens are further post-processed to
    remove consecutive repetitions and all blanks.

    :param probs_seq: 2-D list of probabilities over the vocabulary for each
                      character. Each element is a list of float probabilities
                      for one character.
    :type probs_seq: list
    :param vocabulary: Vocabulary list.
    :type vocabulary: list
    :return: Decoding result string.
    :rtype: baseline
    """
    # dimension verification
    for probs in probs_seq:
        if not len(probs) == len(vocabulary) + 1:
            raise ValueError("probs_seq dimension mismatchedd with vocabulary")
    # argmax to get the best index for each time step
    max_index_list = list(np.array(probs_seq).argmax(axis=1))
    # remove consecutive duplicate indexes
    index_list = [index_group[0] for index_group in groupby(max_index_list)]
    # remove blank indexes
    blank_index = len(vocabulary)
    index_list = [index for index in index_list if index != blank_index]
    # convert index list to string
    return ''.join([vocabulary[index] for index in index_list])


def ctc_beam_search_decoder(probs_seq,
                            beam_size,
                            vocabulary,
                            cutoff_prob=1.0,
                            cutoff_top_n=40,
                            ext_scoring_func=None,
                            nproc=False):
    """CTC Beam search decoder.

    It utilizes beam search to approximately select top best decoding
    labels and returning results in the descending order.
    The implementation is based on Prefix Beam Search
    (https://arxiv.org/abs/1408.2873), and the unclear part is
    redesigned. Two important modifications: 1) in the iterative computation
    of probabilities, the assignment operation is changed to accumulation for
    one prefix may comes from different paths; 2) the if condition "if l^+ not
    in A_prev then" after probabilities' computation is deprecated for it is
    hard to understand and seems unnecessary.

    :param probs_seq: 2-D list of probability distributions over each time
                      step, with each element being a list of normalized
                      probabilities over vocabulary and blank.
    :type probs_seq: 2-D list
    :param beam_size: Width for beam search.
    :type beam_size: int
    :param vocabulary: Vocabulary list.
    :type vocabulary: list
    :param cutoff_prob: Cutoff probability in pruning,
                        default 1.0, no pruning.
    :type cutoff_prob: float
    :param ext_scoring_func: External scoring function for
                            partially decoded sentence, e.g. word count
                            or language model.
    :type external_scoring_func: callable
    :param nproc: Whether the decoder used in multiprocesses.
    :type nproc: bool
    :return: List of tuples of log probability and sentence as decoding
             results, in descending order of the probability.
    :rtype: list
    """
    # dimension check
    for prob_list in probs_seq:
        if not len(prob_list) == len(vocabulary) + 1:
            raise ValueError("The shape of prob_seq does not match with the "
                             "shape of the vocabulary.")

    # blank_id assign
    blank_id = len(vocabulary)

    # If the decoder called in the multiprocesses, then use the global scorer
    # instantiated in ctc_beam_search_decoder_batch().
    if nproc is True:
        global ext_nproc_scorer
        ext_scoring_func = ext_nproc_scorer

    # initialize
    # prefix_set_prev: the set containing selected prefixes
    # probs_b_prev: prefixes' probability ending with blank in previous step
    # probs_nb_prev: prefixes' probability ending with non-blank in previous step
    prefix_set_prev = {'\t': 1.0}
    probs_b_prev, probs_nb_prev = {'\t': 1.0}, {'\t': 0.0}

    # extend prefix in loop
    for time_step in range(len(probs_seq)):
        # prefix_set_next: the set containing candidate prefixes
        # probs_b_cur: prefixes' probability ending with blank in current step
        # probs_nb_cur: prefixes' probability ending with non-blank in current step
        prefix_set_next, probs_b_cur, probs_nb_cur = {}, {}, {}

        prob_idx = list(enumerate(probs_seq[time_step]))
        cutoff_len = len(prob_idx)
        # If pruning is enabled
        if cutoff_prob < 1.0 or cutoff_top_n < cutoff_len:
            prob_idx = sorted(prob_idx, key=lambda asd: asd[1], reverse=True)
            cutoff_len, cum_prob = 0, 0.0
            for i in range(len(prob_idx)):
                cum_prob += prob_idx[i][1]
                cutoff_len += 1
                if cum_prob >= cutoff_prob:
                    break
            cutoff_len = min(cutoff_len, cutoff_top_n)
            prob_idx = prob_idx[0:cutoff_len]

        for l in prefix_set_prev:
            if l not in prefix_set_next:
                probs_b_cur[l], probs_nb_cur[l] = 0.0, 0.0

            # extend prefix by travering prob_idx
            for index in range(cutoff_len):
                c, prob_c = prob_idx[index][0], prob_idx[index][1]

                if c == blank_id:
                    probs_b_cur[l] += prob_c * (
                        probs_b_prev[l] + probs_nb_prev[l])
                else:
                    last_char = l[-1]
                    new_char = vocabulary[c]
                    l_plus = l + new_char
                    if l_plus not in prefix_set_next:
                        probs_b_cur[l_plus], probs_nb_cur[l_plus] = 0.0, 0.0

                    if new_char == last_char:
                        probs_nb_cur[l_plus] += prob_c * probs_b_prev[l]
                        probs_nb_cur[l] += prob_c * probs_nb_prev[l]
                    elif new_char == ' ':
                        if (ext_scoring_func is None) or (len(l) == 1):
                            score = 1.0
                        else:
                            prefix = l[1:]
                            score = ext_scoring_func(prefix)
                        probs_nb_cur[l_plus] += score * prob_c * (
                            probs_b_prev[l] + probs_nb_prev[l])
                    else:
                        probs_nb_cur[l_plus] += prob_c * (
                            probs_b_prev[l] + probs_nb_prev[l])
                    # add l_plus into prefix_set_next
                    prefix_set_next[l_plus] = probs_nb_cur[
                        l_plus] + probs_b_cur[l_plus]
            # add l into prefix_set_next
            prefix_set_next[l] = probs_b_cur[l] + probs_nb_cur[l]
        # update probs
        probs_b_prev, probs_nb_prev = probs_b_cur, probs_nb_cur

        # store top beam_size prefixes
        prefix_set_prev = sorted(
            prefix_set_next.items(), key=lambda asd: asd[1], reverse=True)
        if beam_size < len(prefix_set_prev):
            prefix_set_prev = prefix_set_prev[:beam_size]
        prefix_set_prev = dict(prefix_set_prev)

    beam_result = []
    for seq, prob in prefix_set_prev.items():
        if prob > 0.0 and len(seq) > 1:
            result = seq[1:]
            # score last word by external scorer
            if (ext_scoring_func is not None) and (result[-1] != ' '):
                prob = prob * ext_scoring_func(result)
            log_prob = log(prob)
            beam_result.append((log_prob, result))
        else:
            beam_result.append((float('-inf'), ''))

    # output top beam_size decoding results
    beam_result = sorted(beam_result, key=lambda asd: asd[0], reverse=True)
    return beam_result


def ctc_beam_search_decoder_batch(probs_split,
                                  beam_size,
                                  vocabulary,
                                  num_processes,
                                  cutoff_prob=1.0,
                                  cutoff_top_n=40,
                                  ext_scoring_func=None):
    """CTC beam search decoder using multiple processes.

    :param probs_seq: 3-D list with each element as an instance of 2-D list
                      of probabilities used by ctc_beam_search_decoder().
    :type probs_seq: 3-D list
    :param beam_size: Width for beam search.
    :type beam_size: int
    :param vocabulary: Vocabulary list.
    :type vocabulary: list
    :param num_processes: Number of parallel processes.
    :type num_processes: int
    :param cutoff_prob: Cutoff probability in pruning,
                        default 1.0, no pruning.
    :type cutoff_prob: float
    :param num_processes: Number of parallel processes.
    :type num_processes: int
    :param ext_scoring_func: External scoring function for
                            partially decoded sentence, e.g. word count
                            or language model.
    :type external_scoring_function: callable
    :return: List of tuples of log probability and sentence as decoding
             results, in descending order of the probability.
    :rtype: list
    """
    if not num_processes > 0:
        raise ValueError("Number of processes must be positive!")

    # use global variable to pass the externnal scorer to beam search decoder
    global ext_nproc_scorer
    ext_nproc_scorer = ext_scoring_func
    nproc = True

    pool = multiprocessing.Pool(processes=num_processes)
    results = []
    for i, probs_list in enumerate(probs_split):
        args = (probs_list, beam_size, vocabulary, cutoff_prob, cutoff_top_n,
                None, nproc)
        results.append(pool.apply_async(ctc_beam_search_decoder, args))

    pool.close()
    pool.join()
    beam_search_results = [result.get() for result in results]
    return beam_search_results


================================================
FILE: paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""External Scorer for Beam Search Decoder."""
import os

import kenlm
import numpy as np


class Scorer(object):
    """External scorer to evaluate a prefix or whole sentence in
       beam search decoding, including the score from n-gram language
       model and word count.

    :param alpha: Parameter associated with language model. Don't use
                  language model when alpha = 0.
    :type alpha: float
    :param beta: Parameter associated with word count. Don't use word
                count when beta = 0.
    :type beta: float
    :model_path: Path to load language model.
    :type model_path: str
    """

    def __init__(self, alpha, beta, model_path):
        self._alpha = alpha
        self._beta = beta
        if not os.path.isfile(model_path):
            raise IOError("Invaid language model path: %s" % model_path)
        self._language_model = kenlm.LanguageModel(model_path)

    # n-gram language model scoring
    def _language_model_score(self, sentence):
        #log10 prob of last word
        log_cond_prob = list(
            self._language_model.full_scores(sentence, eos=False))[-1][0]
        return np.power(10, log_cond_prob)

    # word insertion term
    def _word_count(self, sentence):
        words = sentence.strip().split(' ')
        return len(words)

    # reset alpha and beta
    def reset_params(self, alpha, beta):
        self._alpha = alpha
        self._beta = beta

    # execute evaluation
    def __call__(self, sentence, log=False):
        """Evaluation function, gathering all the different scores
        and return the final one.

        :param sentence: The input sentence for evaluation
        :type sentence: str
        :param log: Whether return the score in log representation.
        :type log: bool
        :return: Evaluation score, in the decimal or log.
        :rtype: float
        """
        lm = self._language_model_score(sentence)
        word_cnt = self._word_count(sentence)
        if log is False:
            score = np.power(lm, self._alpha) * np.power(word_cnt, self._beta)
        else:
            score = self._alpha * np.log(lm) + self._beta * np.log(word_cnt)
        return score


================================================
FILE: paddlespeech/s2t/decoders/ctcdecoder/swig_wrapper.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Wrapper for various CTC decoders in SWIG."""
import paddlespeech_ctcdecoders


class Scorer(paddlespeech_ctcdecoders.Scorer):
    """Wrapper for Scorer.

    :param alpha: Parameter associated with language model. Don't use
                  language model when alpha = 0.
    :type alpha: float
    :param beta: Parameter associated with word count. Don't use word
                 count when beta = 0.
    :type beta: float
    :model_path: Path to load language model.
    :type model_path: str
    :param vocabulary: Vocabulary list.
    :type vocabulary: list
    """

    def __init__(self, alpha, beta, model_path, vocabulary):
        paddlespeech_ctcdecoders.Scorer.__init__(self, alpha, beta, model_path,
                                                 vocabulary)


def ctc_greedy_decoding(probs_seq, vocabulary, blank_id):
    """Wrapper for ctc best path decodeing function in swig.

    :param probs_seq: 2-D list of probability distributions over each time
                      step, with each element being a list of normalized
                      probabilities over vocabulary and blank.
    :type probs_seq: 2-D list
    :param vocabulary: Vocabulary list.
    :type vocabulary: list
    :return: Decoding result string.
    :rtype: str
    """
    result = paddlespeech_ctcdecoders.ctc_greedy_decoding(probs_seq.tolist(),
                                                          vocabulary, blank_id)
    return result


def ctc_beam_search_decoding(probs_seq,
                             vocabulary,
                             beam_size,
                             cutoff_prob=1.0,
                             cutoff_top_n=40,
                             ext_scoring_func=None,
                             blank_id=0):
    """Wrapper for the CTC Beam Search Decoding function.

    :param probs_seq: 2-D list of probability distributions over each time
                      step, with each element being a list of normalized
                      probabilities over vocabulary and blank.
    :type probs_seq: 2-D list
    :param vocabulary: Vocabulary list.
    :type vocabulary: list
    :param beam_size: Width for beam search.
    :type beam_size: int
    :param cutoff_prob: Cutoff probability in pruning,
                        default 1.0, no pruning.
    :type cutoff_prob: float
    :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n
                         characters with highest probs in vocabulary will be
                         used in beam search, default 40.
    :type cutoff_top_n: int
    :param ext_scoring_func: External scoring function for
                             partially decoded sentence, e.g. word count
                             or language model.
    :type external_scoring_func: callable
    :return: List of tuples of log probability and sentence as decoding
             results, in descending order of the probability.
    :rtype: list
    """
    beam_results = paddlespeech_ctcdecoders.ctc_beam_search_decoding(
        probs_seq.tolist(), vocabulary, beam_size, cutoff_prob, cutoff_top_n,
        ext_scoring_func, blank_id)
    beam_results = [(res[0], res[1].decode('utf-8')) for res in beam_results]
    return beam_results


def ctc_beam_search_decoding_batch(probs_split,
                                   vocabulary,
                                   beam_size,
                                   num_processes,
                                   cutoff_prob=1.0,
                                   cutoff_top_n=40,
                                   ext_scoring_func=None,
                                   blank_id=0):
    """Wrapper for the batched CTC beam search decodeing batch function.

    :param probs_seq: 3-D list with each element as an instance of 2-D list
                      of probabilities used by ctc_beam_search_decoder().
    :type probs_seq: 3-D list
    :param vocabulary: Vocabulary list.
    :type vocabulary: list
    :param beam_size: Width for beam search.
    :type beam_size: int
    :param num_processes: Number of parallel processes.
    :type num_processes: int
    :param cutoff_prob: Cutoff probability in vocabulary pruning,
                        default 1.0, no pruning.
    :type cutoff_prob: float
    :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n
                         characters with highest probs in vocabulary will be
                         used in beam search, default 40.
    :type cutoff_top_n: int
    :param num_processes: Number of parallel processes.
    :type num_processes: int
    :param ext_scoring_func: External scoring function for
                             partially decoded sentence, e.g. word count
                             or language model.
    :type external_scoring_function: callable
    :return: List of tuples of log probability and sentence as decoding
             results, in descending order of the probability.
    :rtype: list
    """
    probs_split = [probs_seq.tolist() for probs_seq in probs_split]

    batch_beam_results = paddlespeech_ctcdecoders.ctc_beam_search_decoding_batch(
        probs_split, vocabulary, beam_size, num_processes, cutoff_prob,
        cutoff_top_n, ext_scoring_func, blank_id)
    batch_beam_results = [[(res[0], res[1]) for res in beam_results]
                          for beam_results in batch_beam_results]
    return batch_beam_results


class CTCBeamSearchDecoder(paddlespeech_ctcdecoders.CtcBeamSearchDecoderBatch):
    """Wrapper for CtcBeamSearchDecoderBatch.
    Args:
        vocab_list (list): Vocabulary list.
        beam_size (int): Width for beam search.
        num_processes (int): Number of parallel processes.
        param cutoff_prob (float): Cutoff probability in vocabulary pruning,
                            default 1.0, no pruning.
        cutoff_top_n (int): Cutoff number in pruning, only top cutoff_top_n
                            characters with highest probs in vocabulary will be
                            used in beam search, default 40.
        param ext_scorer (Scorer): External scorer for partially decoded sentence, e.g. word count
                                or language model.
    """

    def __init__(self, vocab_list, batch_size, beam_size, num_processes,
                 cutoff_prob, cutoff_top_n, _ext_scorer, blank_id):
        paddlespeech_ctcdecoders.CtcBeamSearchDecoderBatch.__init__(
            self, vocab_list, batch_size, beam_size, num_processes, cutoff_prob,
            cutoff_top_n, _ext_scorer, blank_id)


================================================
FILE: paddlespeech/s2t/decoders/ctcdecoder/tests/test_decoders.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Test decoders."""
import unittest

from paddlespeech.s2t.decoders import decoders_deprecated as decoder


class TestDecoders(unittest.TestCase):
    def setUp(self):
        self.vocab_list = ["\'", ' ', 'a', 'b', 'c', 'd']
        self.beam_size = 20
        self.probs_seq1 = [[
            0.06390443, 0.21124858, 0.27323887, 0.06870235, 0.0361254,
            0.18184413, 0.16493624
        ], [
            0.03309247, 0.22866108, 0.24390638, 0.09699597, 0.31895462,
            0.0094893, 0.06890021
        ], [
            0.218104, 0.19992557, 0.18245131, 0.08503348, 0.14903535,
            0.08424043, 0.08120984
        ], [
            0.12094152, 0.19162472, 0.01473646, 0.28045061, 0.24246305,
            0.05206269, 0.09772094
        ], [
            0.1333387, 0.00550838, 0.00301669, 0.21745861, 0.20803985,
            0.41317442, 0.01946335
        ], [
            0.16468227, 0.1980699, 0.1906545, 0.18963251, 0.19860937,
            0.04377724, 0.01457421
        ]]
        self.probs_seq2 = [[
            0.08034842, 0.22671944, 0.05799633, 0.36814645, 0.11307441,
            0.04468023, 0.10903471
        ], [
            0.09742457, 0.12959763, 0.09435383, 0.21889204, 0.15113123,
            0.10219457, 0.20640612
        ], [
            0.45033529, 0.09091417, 0.15333208, 0.07939558, 0.08649316,
            0.12298585, 0.01654384
        ], [
            0.02512238, 0.22079203, 0.19664364, 0.11906379, 0.07816055,
            0.22538587, 0.13483174
        ], [
            0.17928453, 0.06065261, 0.41153005, 0.1172041, 0.11880313,
            0.07113197, 0.04139363
        ], [
            0.15882358, 0.1235788, 0.23376776, 0.20510435, 0.00279306,
            0.05294827, 0.22298418
        ]]
        self.greedy_result = ["ac'bdc", "b'da"]
        self.beam_search_result = ['acdc', "b'a"]

    def test_greedy_decoder_1(self):
        bst_result = decoder.ctc_greedy_decoder(self.probs_seq1,
                                                self.vocab_list)
        self.assertEqual(bst_result, self.greedy_result[0])

    def test_greedy_decoder_2(self):
        bst_result = decoder.ctc_greedy_decoder(self.probs_seq2,
                                                self.vocab_list)
        self.assertEqual(bst_result, self.greedy_result[1])

    def test_beam_search_decoder_1(self):
        beam_result = decoder.ctc_beam_search_decoder(
            probs_seq=self.probs_seq1,
            beam_size=self.beam_size,
            vocabulary=self.vocab_list)
        self.assertEqual(beam_result[0][1], self.beam_search_result[0])

    def test_beam_search_decoder_2(self):
        beam_result = decoder.ctc_beam_search_decoder(
            probs_seq=self.probs_seq2,
            beam_size=self.beam_size,
            vocabulary=self.vocab_list)
        self.assertEqual(beam_result[0][1], self.beam_search_result[1])

    def test_beam_search_decoder_batch(self):
        beam_results = decoder.ctc_beam_search_decoder_batch(
            probs_split=[self.probs_seq1, self.probs_seq2],
            beam_size=self.beam_size,
            vocabulary=self.vocab_list,
            num_processes=24)
        self.assertEqual(beam_results[0][0][1], self.beam_search_result[0])
        self.assertEqual(beam_results[1][0][1], self.beam_search_result[1])


if __name__ == '__main__':
    unittest.main()


================================================
FILE: paddlespeech/s2t/decoders/recog.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Reference espnet Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
# Modified from espnet(https://github.com/espnet/espnet)
"""V2 backend for `asr_recog.py` using py:class:`decoders.beam_search.BeamSearch`."""
import jsonlines
import paddle
from yacs.config import CfgNode

from .beam_search import BatchBeamSearch
from .beam_search import BeamSearch
from .scorers.length_bonus import LengthBonus
from .scorers.scorer_interface import BatchScorerInterface
from .utils import add_results_to_json
from paddlespeech.s2t.exps import dynamic_import_tester
from paddlespeech.s2t.io.reader import LoadInputsAndTargets
from paddlespeech.s2t.models.asr_interface import ASRInterface
from paddlespeech.s2t.models.lm_interface import dynamic_import_lm
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

# NOTE: you need this func to generate our sphinx doc


def get_config(config_path):
    confs = CfgNode(new_allowed=True)
    confs.merge_from_file(config_path)
    return confs


def load_trained_model(args):
    confs = get_config(args.model_conf)
    class_obj = dynamic_import_tester(args.model_name)
    exp = class_obj(confs, args)
    with exp.eval():
        exp.setup()
        exp.restore()
    char_list = exp.args.char_list
    model = exp.model
    return model, char_list, exp, confs


def load_trained_lm(args):
    lm_args = get_config(args.rnnlm_conf)
    lm_model_module = lm_args.model_module
    lm_class = dynamic_import_lm(lm_model_module)
    lm = lm_class(**lm_args.model)
    model_dict = paddle.load(args.rnnlm)
    lm.set_state_dict(model_dict)
    return lm


def recog_v2(args):
    """Decode with custom models that implements ScorerInterface.

    Args:
        args (namespace): The program arguments.
        See py:func:`bin.asr_recog.get_parser` for details

    """
    logger.warning("experimental API for custom LMs is selected by --api v2")
    if args.batchsize > 1:
        raise NotImplementedError("multi-utt batch decoding is not implemented")
    if args.streaming_mode is not None:
        raise NotImplementedError("streaming mode is not implemented")
    if args.word_rnnlm:
        raise NotImplementedError("word LM is not implemented")

    # set_deterministic(args)
    model, char_list, exp, confs = load_trained_model(args)
    assert isinstance(model, ASRInterface)

    load_inputs_and_targets = LoadInputsAndTargets(
        mode="asr",
        load_output=False,
        sort_in_input_length=False,
        preprocess_conf=confs.preprocess_config
        if args.preprocess_conf is None else args.preprocess_conf,
        preprocess_args={"train": False}, )

    if args.rnnlm:
        lm = load_trained_lm(args)
        lm.eval()
    else:
        lm = None

    if args.ngram_model:
        from .scorers.ngram import NgramFullScorer
        from .scorers.ngram import NgramPartScorer

        if args.ngram_scorer == "full":
            ngram = NgramFullScorer(args.ngram_model, char_list)
        else:
            ngram = NgramPartScorer(args.ngram_model, char_list)
    else:
        ngram = None

    scorers = model.scorers()  # decoder
    scorers["lm"] = lm
    scorers["ngram"] = ngram
    scorers["length_bonus"] = LengthBonus(len(char_list))
    weights = dict(
        decoder=1.0 - args.ctc_weight,
        ctc=args.ctc_weight,
        lm=args.lm_weight,
        ngram=args.ngram_weight,
        length_bonus=args.penalty, )
    beam_search = BeamSearch(
        beam_size=args.beam_size,
        vocab_size=len(char_list),
        weights=weights,
        scorers=scorers,
        sos=model.sos,
        eos=model.eos,
        token_list=char_list,
        pre_beam_score_key=None if args.ctc_weight == 1.0 else "full", )

    # TODO(karita): make all scorers batchfied
    if args.batchsize == 1:
        non_batch = [
            k for k, v in beam_search.full_scorers.items()
            if not isinstance(v, BatchScorerInterface)
        ]
        if len(non_batch) == 0:
            beam_search.__class__ = BatchBeamSearch
            logger.info("BatchBeamSearch implementation is selected.")
        else:
            logger.warning(f"As non-batch scorers {non_batch} are found, "
                           f"fall back to non-batch implementation.")

    if args.ngpu > 1:
        raise NotImplementedError("only single GPU decoding is supported")
    if args.ngpu == 1:
        device = "gpu:0"
    else:
        device = "cpu"
    paddle.set_device(device)
    dtype = getattr(paddle, args.dtype)
    logger.info(f"Decoding device={device}, dtype={dtype}")
    model.to(device=device, dtype=dtype)
    model.eval()
    beam_search.to(device=device, dtype=dtype)
    beam_search.eval()

    # read json data
    js = []
    with jsonlines.open(args.recog_json, "r") as reader:
        for item in reader:
            js.append(item)
    # jsonlines to dict, key by 'utt', value by jsonline
    js = {item['utt']: item for item in js}

    new_js = {}
    with paddle.no_grad():
        with jsonlines.open(args.result_label, "w") as f:
            for idx, name in enumerate(js.keys(), 1):
                logger.info(f"({idx}/{len(js.keys())}) decoding " + name)
                batch = [(name, js[name])]
                feat = load_inputs_and_targets(batch)[0][0]
                logger.info(f'feat: {feat.shape}')
                enc = model.encode(paddle.to_tensor(feat).to(dtype))
                logger.info(f'eout: {enc.shape}')
                nbest_hyps = beam_search(
                    x=enc,
                    maxlenratio=args.maxlenratio,
                    minlenratio=args.minlenratio)
                nbest_hyps = [
                    h.asdict()
                    for h in nbest_hyps[:min(len(nbest_hyps), args.nbest)]
                ]
                new_js[name] = add_results_to_json(js[name], nbest_hyps,
                                                   char_list)

                item = new_js[name]['output'][0]  # 1-best
                ref = item['text']
                rec_text = item['rec_text'].replace('▁', ' ').replace(
                    '<eos>', '').strip()
                rec_tokenid = list(map(int, item['rec_tokenid'].split()))
                f.write({
                    "utt": name,
                    "refs": [ref],
                    "hyps": [rec_text],
                    "hyps_tokenid": [rec_tokenid],
                })


================================================
FILE: paddlespeech/s2t/decoders/recog_bin.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Reference espnet Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
# Modified from espnet(https://github.com/espnet/espnet)
"""End-to-end speech recognition model decoding script."""
import logging
import os
import random
import sys

import configargparse
import numpy as np

from paddlespeech.utils.argparse import strtobool


def get_parser():
    """Get default arguments."""
    parser = configargparse.ArgumentParser(
        description="Transcribe text from speech using "
        "a speech recognition model on one CPU or GPU",
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter, )
    parser.add(
        '--model-name',
        type=str,
        default='u2_kaldi',
        help='model name, e.g: deepspeech2, u2, u2_kaldi, u2_st')
    # general configuration
    parser.add("--config", is_config_file=True, help="Config file path")
    parser.add(
        "--config2",
        is_config_file=True,
        help="Second config file path that overwrites the settings in `--config`",
    )
    parser.add(
        "--config3",
        is_config_file=True,
        help="Third config file path that overwrites the settings "
        "in `--config` and `--config2`", )

    parser.add_argument("--ngpu", type=int, default=0, help="Number of GPUs")
    parser.add_argument(
        "--dtype",
        choices=("float16", "float32", "float64"),
        default="float32",
        help="Float precision (only available in --api v2)", )
    parser.add_argument("--debugmode", type=int, default=1, help="Debugmode")
    parser.add_argument("--seed", type=int, default=1, help="Random seed")
    parser.add_argument(
        "--verbose", "-V", type=int, default=2, help="Verbose option")
    parser.add_argument(
        "--batchsize",
        type=int,
        default=1,
        help="Batch size for beam search (0: means no batch processing)", )
    parser.add_argument(
        "--preprocess-conf",
        type=str,
        default=None,
        help="The configuration file for the pre-processing", )
    parser.add_argument(
        "--api",
        default="v2",
        choices=["v2"],
        help="Beam search APIs "
        "v2: Experimental API. It supports any models that implements ScorerInterface.",
    )
    # task related
    parser.add_argument(
        "--recog-json", type=str, help="Filename of recognition data (json)")
    parser.add_argument(
        "--result-label",
        type=str,
        required=True,
        help="Filename of result label data (json)", )
    # model (parameter) related
    parser.add_argument(
        "--model",
        type=str,
        required=True,
        help="Model file parameters to read")
    parser.add_argument(
        "--model-conf", type=str, default=None, help="Model config file")
    parser.add_argument(
        "--num-spkrs",
        type=int,
        default=1,
        choices=[1, 2],
        help="Number of speakers in the speech", )
    parser.add_argument(
        "--num-encs",
        default=1,
        type=int,
        help="Number of encoders in the model.")
    # search related
    parser.add_argument(
        "--nbest", type=int, default=1, help="Output N-best hypotheses")
    parser.add_argument("--beam-size", type=int, default=1, help="Beam size")
    parser.add_argument(
        "--penalty", type=float, default=0.0, help="Incertion penalty")
    parser.add_argument(
        "--maxlenratio",
        type=float,
        default=0.0,
        help="""Input length ratio to obtain max output length.
                        If maxlenratio=0.0 (default), it uses a end-detect function
                        to automatically find maximum hypothesis lengths.
                        If maxlenratio<0.0, its absolute value is interpreted
                        as a constant max output length""", )
    parser.add_argument(
        "--minlenratio",
        type=float,
        default=0.0,
        help="Input length ratio to obtain min output length", )
    parser.add_argument(
        "--ctc-weight",
        type=float,
        default=0.0,
        help="CTC weight in joint decoding")
    parser.add_argument(
        "--weights-ctc-dec",
        type=float,
        action="append",
        help="ctc weight assigned to each encoder during decoding."
        "[in multi-encoder mode only]", )
    parser.add_argument(
        "--ctc-window-margin",
        type=int,
        default=0,
        help="""Use CTC window with margin parameter to accelerate
                        CTC/attention decoding especially on GPU. Smaller magin
                        makes decoding faster, but may increase search errors.
                        If margin=0 (default), this function is disabled""", )
    # transducer related
    parser.add_argument(
        "--search-type",
        type=str,
        default="default",
        choices=["default", "nsc", "tsd", "alsd", "maes"],
        help="""Type of beam search implementation to use during inference.
        Can be either: default beam search ("default"),
        N-Step Constrained beam search ("nsc"), Time-Synchronous Decoding ("tsd"),
        Alignment-Length Synchronous Decoding ("alsd") or
        modified Adaptive Expansion Search ("maes").""", )
    parser.add_argument(
        "--nstep",
        type=int,
        default=1,
        help="""Number of expansion steps allowed in NSC beam search or mAES
        (nstep > 0 for NSC and nstep > 1 for mAES).""", )
    parser.add_argument(
        "--prefix-alpha",
        type=int,
        default=2,
        help="Length prefix difference allowed in NSC beam search or mAES.", )
    parser.add_argument(
        "--max-sym-exp",
        type=int,
        default=2,
        help="Number of symbol expansions allowed in TSD.", )
    parser.add_argument(
        "--u-max",
        type=int,
        default=400,
        help="Length prefix difference allowed in ALSD.", )
    parser.add_argument(
        "--expansion-gamma",
        type=float,
        default=2.3,
        help="Allowed logp difference for prune-by-value method in mAES.", )
    parser.add_argument(
        "--expansion-beta",
        type=int,
        default=2,
        help="""Number of additional candidates for expanded hypotheses
                selection in mAES.""", )
    parser.add_argument(
        "--score-norm",
        type=strtobool,
        nargs="?",
        default=True,
        help="Normalize final hypotheses' score by length", )
    parser.add_argument(
        "--softmax-temperature",
        type=float,
        default=1.0,
        help="Penalization term for softmax function.", )
    # rnnlm related
    parser.add_argument(
        "--rnnlm", type=str, default=None, help="RNNLM model file to read")
    parser.add_argument(
        "--rnnlm-conf",
        type=str,
        default=None,
        help="RNNLM model config file to read")
    parser.add_argument(
        "--word-rnnlm",
        type=str,
        default=None,
        help="Word RNNLM model file to read")
    parser.add_argument(
        "--word-rnnlm-conf",
        type=str,
        default=None,
        help="Word RNNLM model config file to read", )
    parser.add_argument(
        "--word-dict", type=str, default=None, help="Word list to read")
    parser.add_argument(
        "--lm-weight", type=float, default=0.1, help="RNNLM weight")
    # ngram related
    parser.add_argument(
        "--ngram-model",
        type=str,
        default=None,
        help="ngram model file to read")
    parser.add_argument(
        "--ngram-weight", type=float, default=0.1, help="ngram weight")
    parser.add_argument(
        "--ngram-scorer",
        type=str,
        default="part",
        choices=("full", "part"),
        help="""if the ngram is set as a part scorer, similar with CTC scorer,
                ngram scorer only scores topK hypethesis.
                if the ngram is set as full scorer, ngram scorer scores all hypthesis
                the decoding speed of part scorer is musch faster than full one""",
    )
    # streaming related
    parser.add_argument(
        "--streaming-mode",
        type=str,
        default=None,
        choices=["window", "segment"],
        help="""Use streaming recognizer for inference.
                        `--batchsize` must be set to 0 to enable this mode""", )
    parser.add_argument(
        "--streaming-window", type=int, default=10, help="Window size")
    parser.add_argument(
        "--streaming-min-blank-dur",
        type=int,
        default=10,
        help="Minimum blank duration threshold", )
    parser.add_argument(
        "--streaming-onset-margin", type=int, default=1, help="Onset margin")
    parser.add_argument(
        "--streaming-offset-margin", type=int, default=1, help="Offset margin")
    # non-autoregressive related
    # Mask CTC related. See https://arxiv.org/abs/2005.08700 for the detail.
    parser.add_argument(
        "--maskctc-n-iterations",
        type=int,
        default=10,
        help="Number of decoding iterations."
        "For Mask CTC, set 0 to predict 1 mask/iter.", )
    parser.add_argument(
        "--maskctc-probability-threshold",
        type=float,
        default=0.999,
        help="Threshold probability for CTC output", )
    # quantize model related
    parser.add_argument(
        "--quantize-config",
        nargs="*",
        help="Quantize config list. E.g.: --quantize-config=[Linear,LSTM,GRU]",
    )
    parser.add_argument(
        "--quantize-dtype",
        type=str,
        default="qint8",
        help="Dtype dynamic quantize")
    parser.add_argument(
        "--quantize-asr-model",
        type=bool,
        default=False,
        help="Quantize asr model", )
    parser.add_argument(
        "--quantize-lm-model",
        type=bool,
        default=False,
        help="Quantize lm model", )
    return parser


def main(args):
    """Run the main decoding function."""
    parser = get_parser()
    parser.add_argument(
        "--output", metavar="CKPT_DIR", help="path to save checkpoint.")
    parser.add_argument(
        "--checkpoint_path", type=str, help="path to load checkpoint")
    parser.add_argument("--dict-path", type=str, help="path to load checkpoint")
    args = parser.parse_args(args)

    if args.ngpu == 0 and args.dtype == "float16":
        raise ValueError(
            f"--dtype {args.dtype} does not support the CPU backend.")

    # logging info
    if args.verbose == 1:
        logging.basicConfig(
            level=logging.INFO,
            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
        )
    elif args.verbose == 2:
        logging.basicConfig(
            level=logging.DEBUG,
            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
        )
    else:
        logging.basicConfig(
            level=logging.WARN,
            format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
        )
        logging.warning("Skip DEBUG/INFO messages")
    logging.info(args)

    # check CUDA_VISIBLE_DEVICES
    if args.ngpu > 0:
        cvd = os.environ.get("CUDA_VISIBLE_DEVICES")
        if cvd is None:
            logging.warning("CUDA_VISIBLE_DEVICES is not set.")
        elif args.ngpu != len(cvd.split(",")):
            logging.error("#gpus is not matched with CUDA_VISIBLE_DEVICES.")
            sys.exit(1)

        # TODO(mn5k): support of multiple GPUs
        if args.ngpu > 1:
            logging.error("The program only supports ngpu=1.")
            sys.exit(1)

    # display PYTHONPATH
    logging.info("python path = " + os.environ.get("PYTHONPATH", "(None)"))

    # seed setting
    random.seed(args.seed)
    np.random.seed(args.seed)
    logging.info("set random seed = %d" % args.seed)

    # validate rnn options
    if args.rnnlm is not None and args.word_rnnlm is not None:
        logging.error(
            "It seems that both --rnnlm and --word-rnnlm are specified. "
            "Please use either option.")
        sys.exit(1)

    # recog
    if args.num_spkrs == 1:
        if args.num_encs == 1:
            # Experimental API that supports custom LMs
            if args.api == "v2":
                from paddlespeech.s2t.decoders.recog import recog_v2
                recog_v2(args)
            else:
                raise ValueError("Only support --api v2")
        else:
            if args.api == "v2":
                raise NotImplementedError(
                    f"--num-encs {args.num_encs} > 1 is not supported in --api v2"
                )
    elif args.num_spkrs == 2:
        raise ValueError("asr_mix not supported.")


if __name__ == "__main__":
    main(sys.argv[1:])


================================================
FILE: paddlespeech/s2t/decoders/scorers/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/decoders/scorers/ctc.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""ScorerInterface implementation for CTC."""
import numpy as np
import paddle

from .ctc_prefix_score import CTCPrefixScore
from .ctc_prefix_score import CTCPrefixScorePD
from .scorer_interface import BatchPartialScorerInterface


class CTCPrefixScorer(BatchPartialScorerInterface):
    """Decoder interface wrapper for CTCPrefixScore."""

    def __init__(self, ctc: paddle.nn.Layer, eos: int):
        """Initialize class.

        Args:
            ctc (paddle.nn.Layer): The CTC implementation.
                For example, :class:`paddlespeech.s2t.modules.ctc.CTC`
            eos (int): The end-of-sequence id.

        """
        self.ctc = ctc
        self.eos = eos
        self.impl = None

    def init_state(self, x: paddle.Tensor):
        """Get an initial state for decoding.

        Args:
            x (paddle.Tensor): The encoded feature tensor

        Returns: initial state

        """
        logp = self.ctc.log_softmax(x.unsqueeze(0)).squeeze(0).numpy()
        # TODO(karita): use CTCPrefixScorePD
        self.impl = CTCPrefixScore(logp, 0, self.eos, np)
        return 0, self.impl.initial_state()

    def select_state(self, state, i, new_id=None):
        """Select state with relative ids in the main beam search.

        Args:
            state: Decoder state for prefix tokens
            i (int): Index to select a state in the main beam search
            new_id (int): New label id to select a state if necessary

        Returns:
            state: pruned state

        """
        if type(state) == tuple:
            if len(state) == 2:  # for CTCPrefixScore
                sc, st = state
                return sc[i], st[i]
            else:  # for CTCPrefixScorePD (need new_id > 0)
                r, log_psi, f_min, f_max, scoring_idmap = state
                s = log_psi[i, new_id].expand(paddle.shape(log_psi)[1])
                if scoring_idmap is not None:
                    return r[:, :, i, scoring_idmap[i, new_id]], s, f_min, f_max
                else:
                    return r[:, :, i, new_id], s, f_min, f_max
        return None if state is None else state[i]

    def score_partial(self, y, ids, state, x):
        """Score new token.

        Args:
            y (paddle.Tensor): 1D prefix token
            next_tokens (paddle.Tensor): paddle.int64 next token to score
            state: decoder state for prefix tokens
            x (paddle.Tensor): 2D encoder feature that generates ys

        Returns:
            tuple[paddle.Tensor, Any]:
                Tuple of a score tensor for y that has a shape `(len(next_tokens),)`
                and next state for ys

        """
        prev_score, state = state
        presub_score, new_st = self.impl(y.cpu(), ids.cpu(), state)
        tscore = paddle.to_tensor(
            presub_score - prev_score, place=x.place, dtype=x.dtype)
        return tscore, (presub_score, new_st)

    def batch_init_state(self, x: paddle.Tensor):
        """Get an initial state for decoding.

        Args:
            x (paddle.Tensor): The encoded feature tensor

        Returns: initial state

        """
        logp = self.ctc.log_softmax(x.unsqueeze(0))  # assuming batch_size = 1
        xlen = paddle.to_tensor([paddle.shape(logp)[1]])
        self.impl = CTCPrefixScorePD(logp, xlen, 0, self.eos)
        return None

    def batch_score_partial(self, y, ids, state, x):
        """Score new token.

        Args:
            y (paddle.Tensor): 1D prefix token
            ids (paddle.Tensor): paddle.int64 next token to score
            state: decoder state for prefix tokens
            x (paddle.Tensor): 2D encoder feature that generates ys

        Returns:
            tuple[paddle.Tensor, Any]:
                Tuple of a score tensor for y that has a shape `(len(next_tokens),)`
                and next state for ys

        """
        batch_state = (
            (paddle.stack([s[0] for s in state], axis=2),
             paddle.stack([s[1] for s in state]), state[0][2], state[0][3], )
            if state[0] is not None else None)
        return self.impl(y, batch_state, ids)

    def extend_prob(self, x: paddle.Tensor):
        """Extend probs for decoding.

        This extension is for streaming decoding
        as in Eq (14) in https://arxiv.org/abs/2006.14941

        Args:
            x (paddle.Tensor): The encoded feature tensor

        """
        logp = self.ctc.log_softmax(x.unsqueeze(0))
        self.impl.extend_prob(logp)

    def extend_state(self, state):
        """Extend state for decoding.

        This extension is for streaming decoding
        as in Eq (14) in https://arxiv.org/abs/2006.14941

        Args:
            state: The states of hyps

        Returns: extended state

        """
        new_state = []
        for s in state:
            new_state.append(self.impl.extend_state(s))

        return new_state


================================================
FILE: paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py
================================================
#!/usr/bin/env python3
# Copyright 2018 Mitsubishi Electric Research Labs (Takaaki Hori)
#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
import numpy as np
import paddle
import six


class CTCPrefixScorePD():
    """Batch processing of CTCPrefixScore

    which is based on Algorithm 2 in WATANABE et al.
    "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
    but extended to efficiently compute the label probabilities for multiple
    hypotheses simultaneously
    See also Seki et al. "Vectorized Beam Search for CTC-Attention-Based
    Speech Recognition," In INTERSPEECH (pp. 3825-3829), 2019.
    """

    def __init__(self, x, xlens, blank, eos, margin=0):
        """Construct CTC prefix scorer

        `margin` is M in eq.(22,23)

        :param paddle.Tensor x: input label posterior sequences (B, T, O)
        :param paddle.Tensor xlens: input lengths (B,)
        :param int blank: blank label id
        :param int eos: end-of-sequence id
        :param int margin: margin parameter for windowing (0 means no windowing)
        """
        # In the comment lines,
        # we assume T: input_length, B: batch size, W: beam width, O: output dim.
        self.logzero = -10000000000.0
        self.blank = blank
        self.eos = eos
        self.batch = paddle.shape(x)[0]
        self.input_length = paddle.shape(x)[1]
        self.odim = paddle.shape(x)[2]
        self.dtype = x.dtype

        # Pad the rest of posteriors in the batch
        # TODO(takaaki-hori): need a better way without for-loops
        for i, l in enumerate(xlens):
            if l < self.input_length:
                x[i, l:, :] = self.logzero
                x[i, l:, blank] = 0
        # Reshape input x
        xn = x.transpose([1, 0, 2])  # (B, T, O) -> (T, B, O)
        xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1,
                                                      self.odim)  # (T,B,O)
        self.x = paddle.stack([xn, xb])  # (2, T, B, O)
        self.end_frames = paddle.to_tensor(xlens) - 1  # (B,)

        # Setup CTC windowing
        self.margin = margin
        if margin > 0:
            self.frame_ids = paddle.arange(self.input_length, dtype=self.dtype)
        # Base indices for index conversion
        # B idx, hyp idx. shape (B*W, 1)
        self.idx_bh = None
        # B idx. shape (B,)
        self.idx_b = paddle.arange(self.batch)
        # B idx, O idx. shape (B, 1)
        self.idx_bo = (self.idx_b * self.odim).unsqueeze(1)

    def __call__(self, y, state, scoring_ids=None, att_w=None):
        """Compute CTC prefix scores for next labels

        :param list y: prefix label sequences
        :param tuple state: previous CTC state
        :param paddle.Tensor scoring_ids: selected next ids to score (BW, O'), O' <= O
        :param paddle.Tensor att_w: attention weights to decide CTC window
        :return new_state, ctc_local_scores (BW, O)
        """
        output_length = len(y[0]) - 1  # ignore sos
        last_ids = [yi[-1] for yi in y]  # last output label ids
        n_bh = len(last_ids)  # batch * hyps
        n_hyps = n_bh // self.batch  # assuming each utterance has the same # of hyps
        self.scoring_num = paddle.shape(scoring_ids)[
            -1] if scoring_ids is not None else 0
        # prepare state info
        if state is None:
            r_prev = paddle.full(
                (self.input_length, 2, self.batch, n_hyps),
                self.logzero,
                dtype=self.dtype, )  # (T, 2, B, W)
            r_prev[:, 1] = paddle.cumsum(self.x[0, :, :, self.blank],
                                         0).unsqueeze(2)
            r_prev = r_prev.reshape([-1, 2, n_bh])  # (T, 2, BW)
            s_prev = 0.0  # score
            f_min_prev = 0  # eq. 22-23
            f_max_prev = 1  # eq. 22-23
        else:
            r_prev, s_prev, f_min_prev, f_max_prev = state

        # select input dimensions for scoring
        if self.scoring_num > 0:
            # (BW, O)
            scoring_idmap = paddle.full(
                (n_bh, self.odim), -1, dtype=paddle.long)
            snum = self.scoring_num
            if self.idx_bh is None or n_bh > len(self.idx_bh):
                self.idx_bh = paddle.arange(n_bh).reshape([-1, 1])  # (BW, 1)
            scoring_idmap[self.idx_bh[:n_bh], scoring_ids] = paddle.arange(snum)
            scoring_idx = (
                scoring_ids + self.idx_bo.repeat(1, n_hyps).reshape(
                    [-1, 1])  # (BW,1)
            ).reshape([-1])  # (BWO)
            # x_ shape (2, T, B*W, O)
            x_ = paddle.index_select(
                self.x.reshape([2, -1, self.batch * self.odim]), scoring_idx,
                2).reshape([2, -1, n_bh, snum])
        else:
            scoring_ids = None
            scoring_idmap = None
            snum = self.odim
            # x_ shape (2, T, B*W, O)
            x_ = self.x.unsqueeze(3).repeat(1, 1, 1, n_hyps, 1).reshape(
                [2, -1, n_bh, snum])

        # new CTC forward probs are prepared as a (T x 2 x BW x S) tensor
        # that corresponds to r_t^n(h) and r_t^b(h) in a batch.
        r = paddle.full(
            (self.input_length, 2, n_bh, snum),
            self.logzero,
            dtype=self.dtype, )
        if output_length == 0:
            r[0, 0] = x_[0, 0]

        r_sum = paddle.logsumexp(r_prev, 1)  #(T,BW)
        log_phi = r_sum.unsqueeze(2).repeat(1, 1, snum)  # (T, BW, O)
        if scoring_ids is not None:
            for idx in range(n_bh):
                pos = scoring_idmap[idx, last_ids[idx]]
                if pos >= 0:
                    log_phi[:, idx, pos] = r_prev[:, 1, idx]
        else:
            for idx in range(n_bh):
                log_phi[:, idx, last_ids[idx]] = r_prev[:, 1, idx]

        # decide start and end frames based on attention weights
        if att_w is not None and self.margin > 0:
            f_arg = paddle.matmul(att_w, self.frame_ids)
            f_min = max(int(f_arg.min().cpu()), f_min_prev)
            f_max = max(int(f_arg.max().cpu()), f_max_prev)
            start = min(f_max_prev, max(f_min - self.margin, output_length, 1))
            end = min(f_max + self.margin, self.input_length)
        else:
            f_min = f_max = 0
            # if one frame one out, the output_length is the eating frame num now.
            start = max(output_length, 1)
            end = self.input_length

        # compute forward probabilities log(r_t^n(h)) and log(r_t^b(h))
        for t in range(start, end):
            rp = r[t - 1]  # (2 x BW x O')
            rr = paddle.stack([rp[0], log_phi[t - 1], rp[0], rp[1]]).reshape(
                [2, 2, n_bh, snum])  # (2,2,BW,O')
            r[t] = paddle.logsumexp(rr, 1) + x_[:, t]

        # compute log prefix probabilities log(psi)
        log_phi_x = paddle.concat(
            (log_phi[0].unsqueeze(0), log_phi[:-1]), axis=0) + x_[0]
        if scoring_ids is not None:
            log_psi = paddle.full(
                (n_bh, self.odim), self.logzero, dtype=self.dtype)
            log_psi_ = paddle.logsumexp(
                paddle.concat(
                    (log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)),
                    axis=0),
                axis=0, )
            for si in range(n_bh):
                log_psi[si, scoring_ids[si]] = log_psi_[si]
        else:
            log_psi = paddle.logsumexp(
                paddle.concat(
                    (log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)),
                    axis=0),
                axis=0, )

        for si in range(n_bh):
            log_psi[si, self.eos] = r_sum[self.end_frames[si // n_hyps], si]

        # exclude blank probs
        log_psi[:, self.blank] = self.logzero

        return (log_psi - s_prev), (r, log_psi, f_min, f_max, scoring_idmap)

    def index_select_state(self, state, best_ids):
        """Select CTC states according to best ids

        :param state    : CTC state
        :param best_ids : index numbers selected by beam pruning (B, W)
        :return selected_state
        """
        r, s, f_min, f_max, scoring_idmap = state
        # convert ids to BHO space
        n_bh = len(s)
        n_hyps = n_bh // self.batch
        vidx = (best_ids +
                (self.idx_b *
                 (n_hyps * self.odim)).reshape([-1, 1])).reshape([-1])
        # select hypothesis scores
        s_new = paddle.index_select(s.reshape([-1]), vidx, 0)
        s_new = s_new.reshape([-1, 1]).repeat(1, self.odim).reshape(
            [n_bh, self.odim])
        # convert ids to BHS space (S: scoring_num)
        if scoring_idmap is not None:
            snum = self.scoring_num
            hyp_idx = (best_ids // self.odim +
                       (self.idx_b * n_hyps).reshape([-1, 1])).reshape([-1])
            label_ids = paddle.fmod(best_ids, self.odim).reshape([-1])
            score_idx = scoring_idmap[hyp_idx, label_ids]
            score_idx[score_idx == -1] = 0
            vidx = score_idx + hyp_idx * snum
        else:
            snum = self.odim
        # select forward probabilities
        r_new = paddle.index_select(r.reshape([-1, 2, n_bh * snum]), vidx,
                                    2).reshape([-1, 2, n_bh])
        return r_new, s_new, f_min, f_max

    def extend_prob(self, x):
        """Extend CTC prob.

        :param paddle.Tensor x: input label posterior sequences (B, T, O)
        """

        if self.x.shape[1] < x.shape[1]:  # self.x (2,T,B,O); x (B,T,O)
            # Pad the rest of posteriors in the batch
            # TODO(takaaki-hori): need a better way without for-loops
            xlens = [paddle.shape(x)[1]]
            for i, l in enumerate(xlens):
                if l < self.input_length:
                    x[i, l:, :] = self.logzero
                    x[i, l:, self.blank] = 0
            tmp_x = self.x
            xn = x.transpose([1, 0, 2])  # (B, T, O) -> (T, B, O)
            xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim)
            self.x = paddle.stack([xn, xb])  # (2, T, B, O)
            self.x[:, :tmp_x.shape[1], :, :] = tmp_x
            self.input_length = paddle.shape(x)[1]
            self.end_frames = paddle.to_tensor(xlens) - 1

    def extend_state(self, state):
        """Compute CTC prefix state.


        :param state    : CTC state
        :return ctc_state
        """

        if state is None:
            # nothing to do
            return state
        else:
            r_prev, s_prev, f_min_prev, f_max_prev = state

            r_prev_new = paddle.full(
                (self.input_length, 2),
                self.logzero,
                dtype=self.dtype, )
            start = max(r_prev.shape[0], 1)
            r_prev_new[0:start] = r_prev
            for t in range(start, self.input_length):
                r_prev_new[t, 1] = r_prev_new[t - 1, 1] + self.x[0, t, :,
                                                                 self.blank]

            return (r_prev_new, s_prev, f_min_prev, f_max_prev)


class CTCPrefixScore():
    """Compute CTC label sequence scores

    which is based on Algorithm 2 in WATANABE et al.
    "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
    but extended to efficiently compute the probabilities of multiple labels
    simultaneously
    """

    def __init__(self, x, blank, eos, xp):
        self.xp = xp
        self.logzero = -10000000000.0
        self.blank = blank
        self.eos = eos
        self.input_length = len(x)
        self.x = x  # (T, O)

    def initial_state(self):
        """Obtain an initial CTC state

        :return: CTC state
        """
        # initial CTC state is made of a frame x 2 tensor that corresponds to
        # r_t^n(<sos>) and r_t^b(<sos>), where 0 and 1 of axis=1 represent
        # superscripts n and b (non-blank and blank), respectively.
        # r shape (T, 2)
        r = self.xp.full((self.input_length, 2), self.logzero, dtype=np.float32)
        r[0, 1] = self.x[0, self.blank]
        for i in six.moves.range(1, self.input_length):
            r[i, 1] = r[i - 1, 1] + self.x[i, self.blank]
        return r

    def __call__(self, y, cs, r_prev):
        """Compute CTC prefix scores for next labels

        :param y     : prefix label sequence
        :param cs    : array of next labels
        :param r_prev: previous CTC state
        :return ctc_scores, ctc_states
        """
        # initialize CTC states
        output_length = len(y) - 1  # ignore sos
        # new CTC states are prepared as a frame x (n or b) x n_labels tensor
        # that corresponds to r_t^n(h) and r_t^b(h).
        # r shape (T, 2, n_labels)
        r = self.xp.ndarray((self.input_length, 2, len(cs)), dtype=np.float32)
        xs = self.x[:, cs]
        if output_length == 0:
            r[0, 0] = xs[0]
            r[0, 1] = self.logzero
        else:
            # Although the code does not exactly follow Algorithm 2,
            # we don't have to change it because we can assume
            # r_t(h)=0 for t < |h| in CTC forward computation
            # (Note: we assume here that index t starts with 0).
            # The purpose of this difference is to reduce the number of for-loops.
            # https://github.com/espnet/espnet/pull/3655
            # where we start to accumulate r_t(h) from t=|h|
            # and iterate r_t(h) = (r_{t-1}(h) + ...) to T-1,
            # avoiding accumulating zeros for t=1~|h|-1.
            # Thus, we need to set r_{|h|-1}(h) = 0,
            # i.e., r[output_length-1] = logzero, for initialization.
            # This is just for reducing the computation.
            r[output_length - 1] = self.logzero

        # prepare forward probabilities for the last label
        r_sum = self.xp.logaddexp(r_prev[:, 0],
                                  r_prev[:, 1])  # log(r_t^n(g) + r_t^b(g))
        last = y[-1]
        if output_length > 0 and last in cs:
            log_phi = self.xp.ndarray(
                (self.input_length, len(cs)), dtype=np.float32)
            for i in six.moves.range(len(cs)):
                log_phi[:, i] = r_sum if cs[i] != last else r_prev[:, 1]
        else:
            log_phi = r_sum

        # compute forward probabilities log(r_t^n(h)), log(r_t^b(h)),
        # and log prefix probabilities log(psi)
        start = max(output_length, 1)
        log_psi = r[start - 1, 0]
        for t in six.moves.range(start, self.input_length):
            r[t, 0] = self.xp.logaddexp(r[t - 1, 0], log_phi[t - 1]) + xs[t]
            r[t, 1] = (self.xp.logaddexp(r[t - 1, 0], r[t - 1, 1]) +
                       self.x[t, self.blank])
            log_psi = self.xp.logaddexp(log_psi, log_phi[t - 1] + xs[t])

        # get P(...eos|X) that ends with the prefix itself
        eos_pos = self.xp.where(cs == self.eos)[0]
        if len(eos_pos) > 0:
            log_psi[eos_pos] = r_sum[-1]  # log(r_T^n(g) + r_T^b(g))

        # exclude blank probs
        blank_pos = self.xp.where(cs == self.blank)[0]
        if len(blank_pos) > 0:
            log_psi[blank_pos] = self.logzero

        # return the log prefix probability and CTC states, where the label axis
        # of the CTC states is moved to the first axis to slice it easily
        # log_psi shape (n_labels,), state shape (n_labels, T, 2)
        return log_psi, self.xp.rollaxis(r, 2)


================================================
FILE: paddlespeech/s2t/decoders/scorers/length_bonus.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Length bonus module."""
from typing import Any
from typing import List
from typing import Tuple

import paddle

from .scorer_interface import BatchScorerInterface


class LengthBonus(BatchScorerInterface):
    """Length bonus in beam search."""

    def __init__(self, n_vocab: int):
        """Initialize class.

        Args:
            n_vocab (int): The number of tokens in vocabulary for beam search

        """
        self.n = n_vocab

    def score(self, y, state, x):
        """Score new token.

        Args:
            y (paddle.Tensor): 1D paddle.int64 prefix tokens.
            state: Scorer state for prefix tokens
            x (paddle.Tensor): 2D encoder feature that generates ys.

        Returns:
            tuple[paddle.Tensor, Any]: Tuple of
                paddle.float32 scores for next token (n_vocab)
                and None

        """
        return paddle.to_tensor(
            [1.0], place=x.place, dtype=x.dtype).expand(self.n), None

    def batch_score(self,
                    ys: paddle.Tensor,
                    states: List[Any],
                    xs: paddle.Tensor) -> Tuple[paddle.Tensor, List[Any]]:
        """Score new token batch.

        Args:
            ys (paddle.Tensor): paddle.int64 prefix tokens (n_batch, ylen).
            states (List[Any]): Scorer states for prefix tokens.
            xs (paddle.Tensor):
                The encoder feature that generates ys (n_batch, xlen, n_feat).

        Returns:
            tuple[paddle.Tensor, List[Any]]: Tuple of
                batchfied scores for next token with shape of `(n_batch, n_vocab)`
                and next state list for ys.

        """
        return (paddle.to_tensor([1.0], place=xs.place, dtype=xs.dtype).expand(
            ys.shape[0], self.n), None, )


================================================
FILE: paddlespeech/s2t/decoders/scorers/ngram.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Ngram lm implement."""
from abc import ABC

import kenlm
import paddle

from .scorer_interface import BatchScorerInterface
from .scorer_interface import PartialScorerInterface


class Ngrambase(ABC):
    """Ngram base implemented through ScorerInterface."""

    def __init__(self, ngram_model, token_list):
        """Initialize Ngrambase.

        Args:
            ngram_model: ngram model path
            token_list: token list from dict or model.json

        """
        self.chardict = [x if x != "<eos>" else "</s>" for x in token_list]
        self.charlen = len(self.chardict)
        self.lm = kenlm.LanguageModel(ngram_model)
        self.tmpkenlmstate = kenlm.State()

    def init_state(self, x):
        """Initialize tmp state."""
        state = kenlm.State()
        self.lm.NullContextWrite(state)
        return state

    def score_partial_(self, y, next_token, state, x):
        """Score interface for both full and partial scorer.

        Args:
            y: previous char
            next_token: next token need to be score
            state: previous state
            x: encoded feature

        Returns:
            tuple[paddle.Tensor, List[Any]]: Tuple of
                batchfied scores for next token with shape of `(n_batch, n_vocab)`
                and next state list for ys.

        """
        out_state = kenlm.State()
        ys = self.chardict[y[-1]] if y.shape[0] > 1 else "<s>"
        self.lm.BaseScore(state, ys, out_state)
        scores = paddle.empty_like(next_token, dtype=x.dtype)
        for i, j in enumerate(next_token):
            scores[i] = self.lm.BaseScore(out_state, self.chardict[j],
                                          self.tmpkenlmstate)
        return scores, out_state


class NgramFullScorer(Ngrambase, BatchScorerInterface):
    """Fullscorer for ngram."""

    def score(self, y, state, x):
        """Score interface for both full and partial scorer.

        Args:
            y: previous char
            state: previous state
            x: encoded feature

        Returns:
            tuple[paddle.Tensor, List[Any]]: Tuple of
                batchfied scores for next token with shape of `(n_batch, n_vocab)`
                and next state list for ys.

        """
        return self.score_partial_(y,
                                   paddle.to_tensor(range(self.charlen)), state,
                                   x)


class NgramPartScorer(Ngrambase, PartialScorerInterface):
    """Partialscorer for ngram."""

    def score_partial(self, y, next_token, state, x):
        """Score interface for both full and partial scorer.

        Args:
            y: previous char
            next_token: next token need to be score
            state: previous state
            x: encoded feature

        Returns:
            tuple[paddle.Tensor, List[Any]]: Tuple of
                batchfied scores for next token with shape of `(n_batch, n_vocab)`
                and next state list for ys.

        """
        return self.score_partial_(y, next_token, state, x)

    def select_state(self, state, i):
        """Empty select state for scorer interface."""
        return state


================================================
FILE: paddlespeech/s2t/decoders/scorers/scorer_interface.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Scorer interface module."""
import warnings
from typing import Any
from typing import List
from typing import Tuple

import paddle


class ScorerInterface:
    """Scorer interface for beam search.

    The scorer performs scoring of the all tokens in vocabulary.

    Examples:
        * Search heuristics
            * :class:`scorers.length_bonus.LengthBonus`
        * Decoder networks of the sequence-to-sequence models
            * :class:`transformer.decoder.Decoder`
            * :class:`rnn.decoders.Decoder`
        * Neural language models
            * :class:`lm.transformer.TransformerLM`
            * :class:`lm.default.DefaultRNNLM`
            * :class:`lm.seq_rnn.SequentialRNNLM`

    """

    def init_state(self, x: paddle.Tensor) -> Any:
        """Get an initial state for decoding (optional).

        Args:
            x (paddle.Tensor): The encoded feature tensor

        Returns: initial state

        """
        return None

    def select_state(self, state: Any, i: int, new_id: int=None) -> Any:
        """Select state with relative ids in the main beam search.

        Args:
            state: Decoder state for prefix tokens
            i (int): Index to select a state in the main beam search
            new_id (int): New label index to select a state if necessary

        Returns:
            state: pruned state

        """
        return None if state is None else state[i]

    def score(self, y: paddle.Tensor, state: Any,
              x: paddle.Tensor) -> Tuple[paddle.Tensor, Any]:
        """Score new token (required).

        Args:
            y (paddle.Tensor): 1D paddle.int64 prefix tokens.
            state: Scorer state for prefix tokens
            x (paddle.Tensor): The encoder feature that generates ys.

        Returns:
            tuple[paddle.Tensor, Any]: Tuple of
                scores for next token that has a shape of `(n_vocab)`
                and next state for ys

        """
        raise NotImplementedError

    def final_score(self, state: Any) -> float:
        """Score eos (optional).

        Args:
            state: Scorer state for prefix tokens

        Returns:
            float: final score

        """
        return 0.0


class BatchScorerInterface(ScorerInterface):
    """Batch scorer interface."""

    def batch_init_state(self, x: paddle.Tensor) -> Any:
        """Get an initial state for decoding (optional).

        Args:
            x (paddle.Tensor): The encoded feature tensor

        Returns: initial state

        """
        return self.init_state(x)

    def batch_score(self,
                    ys: paddle.Tensor,
                    states: List[Any],
                    xs: paddle.Tensor) -> Tuple[paddle.Tensor, List[Any]]:
        """Score new token batch (required).

        Args:
            ys (paddle.Tensor): paddle.int64 prefix tokens (n_batch, ylen).
            states (List[Any]): Scorer states for prefix tokens.
            xs (paddle.Tensor):
                The encoder feature that generates ys (n_batch, xlen, n_feat).

        Returns:
            tuple[paddle.Tensor, List[Any]]: Tuple of
                batchfied scores for next token with shape of `(n_batch, n_vocab)`
                and next state list for ys.

        """
        warnings.warn(
            "{} batch score is implemented through for loop not parallelized".
            format(self.__class__.__name__))
        scores = list()
        outstates = list()
        for i, (y, state, x) in enumerate(zip(ys, states, xs)):
            score, outstate = self.score(y, state, x)
            outstates.append(outstate)
            scores.append(score)
        scores = paddle.cat(scores, 0).reshape([ys.shape[0], -1])
        return scores, outstates


class PartialScorerInterface(ScorerInterface):
    """Partial scorer interface for beam search.

    The partial scorer performs scoring when non-partial scorer finished scoring,
    and receives pre-pruned next tokens to score because it is too heavy to score
    all the tokens.

    Score sub-set of tokens, not all.

    Examples:
         * Prefix search for connectionist-temporal-classification models
             * :class:`decoders.scorers.ctc.CTCPrefixScorer`

    """

    def score_partial(self,
                      y: paddle.Tensor,
                      next_tokens: paddle.Tensor,
                      state: Any,
                      x: paddle.Tensor) -> Tuple[paddle.Tensor, Any]:
        """Score new token (required).

        Args:
            y (paddle.Tensor): 1D prefix token
            next_tokens (paddle.Tensor): paddle.int64 next token to score
            state: decoder state for prefix tokens
            x (paddle.Tensor): The encoder feature that generates ys

        Returns:
            tuple[paddle.Tensor, Any]:
                Tuple of a score tensor for y that has a shape `(len(next_tokens),)`
                and next state for ys

        """
        raise NotImplementedError


class BatchPartialScorerInterface(BatchScorerInterface, PartialScorerInterface):
    """Batch partial scorer interface for beam search."""

    def batch_score_partial(
            self,
            ys: paddle.Tensor,
            next_tokens: paddle.Tensor,
            states: List[Any],
            xs: paddle.Tensor, ) -> Tuple[paddle.Tensor, Any]:
        """Score new token (required).

        Args:
            ys (paddle.Tensor): paddle.int64 prefix tokens (n_batch, ylen).
            next_tokens (paddle.Tensor): paddle.int64 tokens to score (n_batch, n_token).
            states (List[Any]): Scorer states for prefix tokens.
            xs (paddle.Tensor):
                The encoder feature that generates ys (n_batch, xlen, n_feat).

        Returns:
            tuple[paddle.Tensor, Any]:
                Tuple of a score tensor for ys that has a shape `(n_batch, n_vocab)`
                and next states for ys
        """
        raise NotImplementedError


================================================
FILE: paddlespeech/s2t/decoders/utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import numpy as np

from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()

__all__ = ["end_detect", "parse_hypothesis", "add_results_to_json"]


def end_detect(ended_hyps, i, M=3, D_end=np.log(1 * np.exp(-10))):
    """End detection.

    described in Eq. (50) of S. Watanabe et al
    "Hybrid CTC/Attention Architecture for End-to-End Speech Recognition"

    :param ended_hyps: dict
    :param i: int
    :param M: int
    :param D_end: float
    :return: bool
    """
    if len(ended_hyps) == 0:
        return False
    count = 0
    best_hyp = sorted(ended_hyps, key=lambda x: x["score"], reverse=True)[0]
    for m in range(M):
        # get ended_hyps with their length is i - m
        hyp_length = i - m
        hyps_same_length = [
            x for x in ended_hyps if len(x["yseq"]) == hyp_length
        ]
        if len(hyps_same_length) > 0:
            best_hyp_same_length = sorted(
                hyps_same_length, key=lambda x: x["score"], reverse=True)[0]
            if best_hyp_same_length["score"] - best_hyp["score"] < D_end:
                count += 1

    if count == M:
        return True
    else:
        return False


# * ------------------ recognition related ------------------ *
def parse_hypothesis(hyp, char_list):
    """Parse hypothesis.

    Args:
        hyp (list[dict[str, Any]]): Recognition hypothesis.
        char_list (list[str]): List of characters.

    Returns:
        tuple(str, str, str, float)

    """
    # remove sos and get results
    tokenid_as_list = list(map(int, hyp["yseq"][1:]))
    token_as_list = [char_list[idx] for idx in tokenid_as_list]
    score = float(hyp["score"])

    # convert to string
    tokenid = " ".join([str(idx) for idx in tokenid_as_list])
    token = " ".join(token_as_list)
    text = "".join(token_as_list).replace("<space>", " ")

    return text, token, tokenid, score


def add_results_to_json(js, nbest_hyps, char_list):
    """Add N-best results to json.

    Args:
        js (dict[str, Any]): Groundtruth utterance dict.
        nbest_hyps_sd (list[dict[str, Any]]):
            List of hypothesis for multi_speakers: nutts x nspkrs.
        char_list (list[str]): List of characters.

    Returns:
        dict[str, Any]: N-best results added utterance dict.

    """
    # copy old json info
    new_js = dict()
    new_js["utt2spk"] = js["utt2spk"]
    new_js["output"] = []

    for n, hyp in enumerate(nbest_hyps, 1):
        # parse hypothesis
        rec_text, rec_token, rec_tokenid, score = parse_hypothesis(hyp,
                                                                   char_list)

        # copy ground-truth
        if len(js["output"]) > 0:
            out_dic = dict(js["output"][0].items())
        else:
            # for no reference case (e.g., speech translation)
            out_dic = {"name": ""}

        # update name
        out_dic["name"] += "[%d]" % n

        # add recognition results
        out_dic["rec_text"] = rec_text
        out_dic["rec_token"] = rec_token
        out_dic["rec_tokenid"] = rec_tokenid
        out_dic["score"] = score

        # add to list of N-best result dicts
        new_js["output"].append(out_dic)

        # show 1-best result
        if n == 1:
            if "text" in out_dic.keys():
                logger.info("groundtruth: %s" % out_dic["text"])
            logger.info("prediction : %s" % out_dic["rec_text"])

    return new_js


================================================
FILE: paddlespeech/s2t/exps/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddlespeech.s2t.training.trainer import Trainer
from paddlespeech.s2t.utils.dynamic_import import dynamic_import

model_trainer_alias = {
    "ds2": "paddlespeech.s2t.exp.deepspeech2.model:DeepSpeech2Trainer",
    "u2": "paddlespeech.s2t.exps.u2.model:U2Trainer",
    "u2_kaldi": "paddlespeech.s2t.exps.u2_kaldi.model:U2Trainer",
    "u2_st": "paddlespeech.s2t.exps.u2_st.model:U2STTrainer",
}


def dynamic_import_trainer(module):
    """Import Trainer dynamically.

    Args:
        module (str): trainer name. e.g., ds2, u2, u2_kaldi

    Returns:
        type: Trainer class

    """
    model_class = dynamic_import(module, model_trainer_alias)
    assert issubclass(model_class,
                      Trainer), f"{module} does not implement Trainer"
    return model_class


model_tester_alias = {
    "ds2": "paddlespeech.s2t.exp.deepspeech2.model:DeepSpeech2Tester",
    "u2": "paddlespeech.s2t.exps.u2.model:U2Tester",
    "u2_kaldi": "paddlespeech.s2t.exps.u2_kaldi.model:U2Tester",
    "u2_st": "paddlespeech.s2t.exps.u2_st.model:U2STTester",
}


def dynamic_import_tester(module):
    """Import Tester dynamically.

    Args:
        module (str): tester name. e.g., ds2, u2, u2_kaldi

    Returns:
        type: Tester class

    """
    model_class = dynamic_import(module, model_tester_alias)
    assert issubclass(model_class,
                      Trainer), f"{module} does not implement Tester"
    return model_class


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/deploy/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/deploy/client.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Client-end for the ASR demo."""
import argparse
import sys

import keyboard
import pyaudio

from paddlespeech.s2t.utils.socket_server import socket_send

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--host_ip",
    default="localhost",
    type=str,
    help="Server IP address. (default: %(default)s)")
parser.add_argument(
    "--host_port",
    default=8086,
    type=int,
    help="Server Port. (default: %(default)s)")
args = parser.parse_args()

is_recording = False
enable_trigger_record = True


def on_press_release(x):
    """Keyboard callback function."""
    global is_recording, enable_trigger_record
    press = keyboard.KeyboardEvent('down', 28, 'space')
    release = keyboard.KeyboardEvent('up', 28, 'space')
    if x.event_type == 'down' and x.name == press.name:
        if (not is_recording) and enable_trigger_record:
            sys.stdout.write("Start Recording ... ")
            sys.stdout.flush()
            is_recording = True
    if x.event_type == 'up' and x.name == release.name:
        if is_recording:
            is_recording = False


data_list = []


def callback(in_data, frame_count, time_info, status):
    """Audio recorder's stream callback function."""
    global data_list, is_recording, enable_trigger_record
    if is_recording:
        data_list.append(in_data)
        enable_trigger_record = False
    elif len(data_list) > 0:
        socket_send(args.host_ip, args.host_port, ''.join(data_list))
        data_list = []
    enable_trigger_record = True
    return (in_data, pyaudio.paContinue)


def main():
    # prepare audio recorder
    p = pyaudio.PyAudio()
    stream = p.open(
        format=pyaudio.paInt16,
        channels=1,
        rate=16000,
        input=True,
        stream_callback=callback)
    stream.start_stream()

    # prepare keyboard listener
    while (1):
        keyboard.hook(on_press_release)
        if keyboard.record('esc'):
            break

    # close up
    stream.stop_stream()
    stream.close()
    p.terminate()


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/deploy/record.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Record wav from Microphone"""
# http://people.csail.mit.edu/hubert/pyaudio/
import wave

import pyaudio

CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"

p = pyaudio.PyAudio()

stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    frames_per_buffer=CHUNK)

print("* recording")

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")

stream.stop_stream()
stream.close()
p.terminate()

wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/deploy/runtime.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Server-end for the ASR demo."""
import functools

import numpy as np
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
from paddle.io import DataLoader
from yacs.config import CfgNode

from paddlespeech.s2t.io.collator import SpeechCollator
from paddlespeech.s2t.io.dataset import ManifestDataset
from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.socket_server import AsrRequestHandler
from paddlespeech.s2t.utils.socket_server import AsrTCPServer
from paddlespeech.s2t.utils.socket_server import warm_up_test
from paddlespeech.utils.argparse import add_arguments
from paddlespeech.utils.argparse import print_arguments


def init_predictor(args):
    if args.model_dir is not None:
        config = Config(args.model_dir)
    else:
        config = Config(args.model_file, args.params_file)

    config.enable_memory_optim()
    if args.use_gpu:
        config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0)
    else:
        # If not specific mkldnn, you can set the blas thread.
        # The thread num should not be greater than the number of cores in the CPU.
        config.set_cpu_math_library_num_threads(4)
        config.enable_mkldnn()

    predictor = create_predictor(config)
    return predictor


def run(predictor, img):
    # copy img data to input tensor
    input_names = predictor.get_input_names()
    for i, name in enumerate(input_names):
        input_tensor = predictor.get_input_handle(name)
        #input_tensor.reshape(img[i].shape)
        #input_tensor.copy_from_cpu(img[i].copy())

    # do the inference
    predictor.run()

    results = []
    # get out data from output tensor
    output_names = predictor.get_output_names()
    for i, name in enumerate(output_names):
        output_tensor = predictor.get_output_handle(name)
        output_data = output_tensor.copy_to_cpu()
        results.append(output_data)

    return results


def inference(config, args):
    predictor = init_predictor(args)


def start_server(config, args):
    """Start the ASR server"""
    config.defrost()
    config.manifest = config.test_manifest
    dataset = ManifestDataset.from_config(config)

    config.augmentation_config = ""
    config.keep_transcription_text = True
    config.batch_size = 1
    config.num_workers = 0
    collate_fn = SpeechCollator.from_config(config)
    test_loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=0)

    model = DeepSpeech2Model.from_pretrained(test_loader, config,
                                             args.checkpoint_path)
    model.eval()

    # prepare ASR inference handler
    def file_to_transcript(filename):
        feature = test_loader.collate_fn.process_utterance(filename, "")
        audio = np.array([feature[0]]).astype('float32')  #[1, T, D]
        audio_len = feature[0].shape[0]
        audio_len = np.array([audio_len]).astype('int64')  # [1]

        result_transcript = model.decode(
            paddle.to_tensor(audio),
            paddle.to_tensor(audio_len),
            vocab_list=test_loader.collate_fn.vocab_list,
            decoding_method=config.decode.decoding_method,
            lang_model_path=config.decode.lang_model_path,
            beam_alpha=config.decode.alpha,
            beam_beta=config.decode.beta,
            beam_size=config.decode.beam_size,
            cutoff_prob=config.decode.cutoff_prob,
            cutoff_top_n=config.decode.cutoff_top_n,
            num_processes=config.decode.num_proc_bsearch)
        return result_transcript[0]

    # warming up with utterrances sampled from Librispeech
    print('-----------------------------------------------------------')
    print('Warming up ...')
    warm_up_test(
        audio_process_handler=file_to_transcript,
        manifest_path=args.warmup_manifest,
        num_test_cases=3)
    print('-----------------------------------------------------------')

    # start the server
    server = AsrTCPServer(
        server_address=(args.host_ip, args.host_port),
        RequestHandlerClass=AsrRequestHandler,
        speech_save_dir=args.speech_save_dir,
        audio_process_handler=file_to_transcript)
    print("ASR Server Started.")
    server.serve_forever()


def main(config, args):
    start_server(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    add_arg = functools.partial(add_arguments, argparser=parser)
    # yapf: disable
    add_arg('host_ip',          str,
            'localhost',
            "Server's IP address.")
    add_arg('host_port',        int,    8089,    "Server's IP port.")
    add_arg('speech_save_dir',  str,
            'demo_cache',
            "Directory to save demo audios.")
    add_arg('warmup_manifest',  str, None, "Filepath of manifest to warm up.")
    add_arg(
        "--model_file",
        type=str,
        default="",
        help="Model filename, Specify this when your model is a combined model."
    )
    add_arg(
        "--params_file",
        type=str,
        default="",
        help="Parameter filename, Specify this when your model is a combined model."
    )
    add_arg(
        "--model_dir",
        type=str,
        default=None,
        help="Model dir, If you load a non-combined model, specify the directory of the model."
    )
    add_arg("--use_gpu",
                        type=bool,
                        default=False,
                        help="Whether use gpu.")
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)

    args.warmup_manifest = config.test_manifest
    print_arguments(args, globals())

    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/deploy/send.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Socket client to send wav to ASR server."""
import argparse
import wave

from paddlespeech.s2t.utils.socket_server import socket_send

parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
    "--host_ip",
    default="localhost",
    type=str,
    help="Server IP address. (default: %(default)s)")
parser.add_argument(
    "--host_port",
    default=8086,
    type=int,
    help="Server Port. (default: %(default)s)")
args = parser.parse_args()

WAVE_OUTPUT_FILENAME = "output.wav"


def main():
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'rb')
    nframe = wf.getnframes()
    data = wf.readframes(nframe)
    print(f"Wave: {WAVE_OUTPUT_FILENAME}")
    print(f"Wave samples: {nframe}")
    print(f"Wave channels: {wf.getnchannels()}")
    print(f"Wave sample rate: {wf.getframerate()}")
    print(f"Wave sample width: {wf.getsampwidth()}")
    assert isinstance(data, bytes)
    socket_send(args.host_ip, args.host_port, data)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/deploy/server.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Server-end for the ASR demo."""
import functools

import numpy as np
import paddle
from paddle.io import DataLoader
from yacs.config import CfgNode

from paddlespeech.s2t.io.collator import SpeechCollator
from paddlespeech.s2t.io.dataset import ManifestDataset
from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.socket_server import AsrRequestHandler
from paddlespeech.s2t.utils.socket_server import AsrTCPServer
from paddlespeech.s2t.utils.socket_server import warm_up_test
from paddlespeech.utils.argparse import add_arguments
from paddlespeech.utils.argparse import print_arguments


def start_server(config, args):
    """Start the ASR server"""
    config.defrost()
    config.manifest = config.test_manifest
    dataset = ManifestDataset.from_config(config)

    config.augmentation_config = ""
    config.keep_transcription_text = True
    config.batch_size = 1
    config.num_workers = 0
    collate_fn = SpeechCollator.from_config(config)
    test_loader = DataLoader(dataset, collate_fn=collate_fn, num_workers=0)

    model = DeepSpeech2Model.from_pretrained(test_loader, config,
                                             args.checkpoint_path)
    model.eval()

    # prepare ASR inference handler
    def file_to_transcript(filename):
        feature = test_loader.collate_fn.process_utterance(filename, "")
        audio = np.array([feature[0]]).astype('float32')  #[1, T, D]
        # audio = audio.swapaxes(1,2)
        print('---file_to_transcript feature----')
        print(audio.shape)
        audio_len = feature[0].shape[0]
        print(audio_len)
        audio_len = np.array([audio_len]).astype('int64')  # [1]

        result_transcript = model.decode(
            paddle.to_tensor(audio),
            paddle.to_tensor(audio_len),
            vocab_list=test_loader.collate_fn.vocab_list,
            decoding_method=config.decode.decoding_method,
            lang_model_path=config.decode.lang_model_path,
            beam_alpha=config.decode.alpha,
            beam_beta=config.decode.beta,
            beam_size=config.decode.beam_size,
            cutoff_prob=config.decode.cutoff_prob,
            cutoff_top_n=config.decode.cutoff_top_n,
            num_processes=config.decode.num_proc_bsearch)
        return result_transcript[0]

    # warming up with utterrances sampled from Librispeech
    print('-----------------------------------------------------------')
    print('Warming up ...')
    warm_up_test(
        audio_process_handler=file_to_transcript,
        manifest_path=args.warmup_manifest,
        num_test_cases=3)
    print('-----------------------------------------------------------')

    # start the server
    server = AsrTCPServer(
        server_address=(args.host_ip, args.host_port),
        RequestHandlerClass=AsrRequestHandler,
        speech_save_dir=args.speech_save_dir,
        audio_process_handler=file_to_transcript)
    print("ASR Server Started.")
    server.serve_forever()


def main(config, args):
    start_server(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    add_arg = functools.partial(add_arguments, argparser=parser)
    # yapf: disable
    add_arg('host_ip',          str,
            'localhost',
            "Server's IP address.")
    add_arg('host_port',        int,    8088,    "Server's IP port.")
    add_arg('speech_save_dir',  str,
            'demo_cache',
            "Directory to save demo audios.")
    add_arg('warmup_manifest', str, None, "Filepath of manifest to warm up.")
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)

    args.warmup_manifest = config.test_manifest
    print_arguments(args, globals())

    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/export.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Export for DeepSpeech2 model."""
from yacs.config import CfgNode

from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Tester as Tester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Tester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_export()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()
    print_arguments(args)

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/test.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for DeepSpeech2 model."""
from yacs.config import CfgNode

from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Tester as Tester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Tester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_test()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/test_export.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for DeepSpeech2 model."""
from yacs.config import CfgNode

from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2ExportTester as ExportTester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = ExportTester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_test()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    parser.add_argument(
        "--enable-auto-log", action="store_true", help="use auto log")
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/test_wav.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for DeepSpeech2 model."""
import os
import sys
from pathlib import Path

import paddle
import soundfile
from yacs.config import CfgNode

from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils.checkpoint import Checkpoint
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig
from paddlespeech.utils.argparse import print_arguments

logger = Log(__name__).getlog()


class DeepSpeech2Tester_hub():
    def __init__(self, config, args):
        self.args = args
        self.config = config
        self.audio_file = args.audio_file

        self.preprocess_conf = config.preprocess_config
        self.preprocess_args = {"train": False}
        self.preprocessing = Transformation(self.preprocess_conf)

        self.text_feature = TextFeaturizer(
            unit_type=config.unit_type,
            vocab=config.vocab_filepath,
            spm_model_prefix=config.spm_model_prefix)
        paddle.set_device('gpu' if self.args.ngpu > 0 else 'cpu')

    def compute_result_transcripts(self, audio, audio_len, vocab_list, cfg):
        decode_batch_size = cfg.decode_batch_size
        self.model.decoder.init_decoder(
            decode_batch_size, vocab_list, cfg.decoding_method,
            cfg.lang_model_path, cfg.alpha, cfg.beta, cfg.beam_size,
            cfg.cutoff_prob, cfg.cutoff_top_n, cfg.num_proc_bsearch)
        result_transcripts = self.model.decode(audio, audio_len)
        return result_transcripts

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def test(self):
        self.model.eval()
        cfg = self.config
        audio_file = self.audio_file

        audio, sample_rate = soundfile.read(
            self.audio_file, dtype="int16", always_2d=True)

        audio = audio[:, 0]
        logger.info(f"audio shape: {audio.shape}")

        # fbank
        feat = self.preprocessing(audio, **self.preprocess_args)
        logger.info(f"feat shape: {feat.shape}")

        audio_len = paddle.to_tensor(feat.shape[0]).unsqueeze(0)
        audio = paddle.to_tensor(feat, dtype='float32').unsqueeze(axis=0)

        result_transcripts = self.compute_result_transcripts(
            audio, audio_len, self.text_feature.vocab_list, cfg.decode)

        logger.info("result_transcripts: " + result_transcripts[0])

    def run_test(self):
        self.resume()
        try:
            self.test()
        except KeyboardInterrupt:
            exit(-1)

    def setup(self):
        """Setup the experiment.
        """
        paddle.set_device('gpu' if self.args.ngpu > 0 else 'cpu')

        self.setup_output_dir()
        self.setup_checkpointer()

        self.setup_model()

    def setup_output_dir(self):
        """Create a directory used for output.
        """
        # output dir
        if self.args.output:
            output_dir = Path(self.args.output).expanduser()
            output_dir.mkdir(parents=True, exist_ok=True)
        else:
            output_dir = Path(
                self.args.checkpoint_path).expanduser().parent.parent
            output_dir.mkdir(parents=True, exist_ok=True)
        self.output_dir = output_dir

    def setup_model(self):
        config = self.config.clone()
        with UpdateConfig(config):
            config.input_dim = config.feat_dim
            config.output_dim = self.text_feature.vocab_size
        model = DeepSpeech2Model.from_config(config)
        self.model = model

    def setup_checkpointer(self):
        """Create a directory used to save checkpoints into.

        It is "checkpoints" inside the output directory.
        """
        # checkpoint dir
        checkpoint_dir = self.output_dir / "checkpoints"
        checkpoint_dir.mkdir(exist_ok=True)

        self.checkpoint_dir = checkpoint_dir

        self.checkpoint = Checkpoint(
            kbest_n=self.config.checkpoint.kbest_n,
            latest_n=self.config.checkpoint.latest_n)

    def resume(self):
        """Resume from the checkpoint at checkpoints in the output
        directory or load a specified checkpoint.
        """
        params_path = self.args.checkpoint_path + ".pdparams"
        model_dict = paddle.load(params_path)
        self.model.set_state_dict(model_dict)


def check(audio_file):
    logger.info("checking the audio file format......")
    try:
        sig, sample_rate = soundfile.read(audio_file)
    except Exception as e:
        logger.error(str(e))
        logger.error(
            "can not open the wav file, please check the audio file format")
        sys.exit(-1)
    logger.info("The sample rate is %d" % sample_rate)
    assert (sample_rate == 16000)
    logger.info("The audio file format is right")


def main_sp(config, args):
    exp = DeepSpeech2Tester_hub(config, args)
    exp.setup()
    exp.run_test()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()
    print_arguments(args, globals())
    if not os.path.isfile(args.audio_file):
        print("Please input the audio file path")
        sys.exit(-1)
    check(args.audio_file)

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/bin/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer for DeepSpeech2 model."""
from yacs.config import CfgNode

from paddlespeech.s2t.exps.deepspeech2.model import DeepSpeech2Trainer as Trainer
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Trainer(config, args)
    exp.setup()
    exp.run()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/deepspeech2/model.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains DeepSpeech2 and DeepSpeech2Online model."""
import os
import time
from collections import defaultdict
from contextlib import nullcontext

import jsonlines
import numpy as np
import paddle
from paddle import distributed as dist
from paddle import inference

import paddlespeech.utils
from paddlespeech.audio.text.text_featurizer import TextFeaturizer
from paddlespeech.s2t.io.dataloader import BatchDataLoader
from paddlespeech.s2t.models.ds2 import DeepSpeech2InferModel
from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
from paddlespeech.s2t.training.reporter import report
from paddlespeech.s2t.training.timer import Timer
from paddlespeech.s2t.training.trainer import Trainer
from paddlespeech.s2t.utils import error_rate
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig

logger = Log(__name__).getlog()


class DeepSpeech2Trainer(Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)

    def train_batch(self, batch_index, batch_data, msg):
        batch_size = self.config.batch_size
        accum_grad = self.config.accum_grad

        start = time.time()

        # forward
        utt, audio, audio_len, text, text_len = batch_data
        loss = self.model(audio, audio_len, text, text_len)
        losses_np = {
            'train_loss': float(loss),
        }

        # loss backward
        if (batch_index + 1) % accum_grad != 0:
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.
            context = nullcontext

        with context():
            loss.backward()
            layer_tools.print_grads(self.model, print_func=None)

        # optimizer step
        if (batch_index + 1) % accum_grad == 0:
            self.optimizer.step()
            self.optimizer.clear_grad()
            self.iteration += 1

        iteration_time = time.time() - start

        for k, v in losses_np.items():
            report(k, v)
        report("batch_size", batch_size)
        report("accum", accum_grad)
        report("step_cost", iteration_time)

        if dist.get_rank() == 0 and self.visualizer:
            for k, v in losses_np.items():
                # `step -1` since we update `step` after optimizer.step().
                self.visualizer.add_scalar("train/{}".format(k), v,
                                           self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
        logger.info(f"Valid Total Examples: {len(self.valid_loader.dataset)}")
        self.model.eval()
        valid_losses = defaultdict(list)
        num_seen_utts = 1
        total_loss = 0.0
        for i, batch in enumerate(self.valid_loader):
            utt, audio, audio_len, text, text_len = batch
            loss = self.model(audio, audio_len, text, text_len)
            if paddle.isfinite(loss):
                num_utts = batch[1].shape[0]
                num_seen_utts += num_utts
                total_loss += float(loss) * num_utts
                valid_losses['val_loss'].append(float(loss))

            if (i + 1) % self.config.log_interval == 0:
                valid_dump = {k: np.mean(v) for k, v in valid_losses.items()}
                valid_dump['val_history_loss'] = total_loss / num_seen_utts

                # logging
                msg = f"Valid: Rank: {dist.get_rank()}, "
                msg += "epoch: {}, ".format(self.epoch)
                msg += "step: {}, ".format(self.iteration)
                msg += "batch : {}/{}, ".format(i + 1, len(self.valid_loader))
                msg += ', '.join('{}: {:>.6f}'.format(k, v)
                                 for k, v in valid_dump.items())
                logger.info(msg)

        logger.info('Rank {} Val info val_loss {}'.format(
            dist.get_rank(), total_loss / num_seen_utts))
        return total_loss, num_seen_utts

    def setup_model(self):
        config = self.config.clone()
        with UpdateConfig(config):
            if self.train:
                config.input_dim = self.train_loader.feat_dim
                config.output_dim = self.train_loader.vocab_size
            else:
                config.input_dim = self.test_loader.feat_dim
                config.output_dim = self.test_loader.vocab_size

        model = DeepSpeech2Model.from_config(config)
        if self.parallel:
            model = paddle.DataParallel(model)

        logger.info(f"{model}")
        layer_tools.print_params(model, logger.info)
        self.model = model
        logger.info("Setup model!")

        if not self.train:
            return

        grad_clip = paddle.nn.ClipGradByGlobalNorm(config.global_grad_clip)
        lr_scheduler = paddle.optimizer.lr.ExponentialDecay(
            learning_rate=config.lr, gamma=config.lr_decay, verbose=True)
        optimizer = paddle.optimizer.Adam(
            learning_rate=lr_scheduler,
            parameters=model.parameters(),
            weight_decay=paddle.regularizer.L2Decay(config.weight_decay),
            grad_clip=grad_clip)
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler
        logger.info("Setup optimizer/lr_scheduler!")

    def setup_dataloader(self):
        config = self.config.clone()
        config.defrost()
        if self.train:
            # train/valid dataset, return token ids
            self.train_loader = BatchDataLoader(
                json_file=config.train_manifest,
                train_mode=True,
                sortagrad=config.sortagrad,
                batch_size=config.batch_size,
                maxlen_in=config.maxlen_in,
                maxlen_out=config.maxlen_out,
                minibatches=config.minibatches,
                mini_batch_size=self.args.ngpu,
                batch_count=config.batch_count,
                batch_bins=config.batch_bins,
                batch_frames_in=config.batch_frames_in,
                batch_frames_out=config.batch_frames_out,
                batch_frames_inout=config.batch_frames_inout,
                preprocess_conf=config.preprocess_config,
                n_iter_processes=config.num_workers,
                subsampling_factor=1,
                num_encs=1,
                dist_sampler=config.get('dist_sampler', False),
                shortest_first=False)

            self.valid_loader = BatchDataLoader(
                json_file=config.dev_manifest,
                train_mode=False,
                sortagrad=False,
                batch_size=config.batch_size,
                maxlen_in=float('inf'),
                maxlen_out=float('inf'),
                minibatches=0,
                mini_batch_size=self.args.ngpu,
                batch_count='auto',
                batch_bins=0,
                batch_frames_in=0,
                batch_frames_out=0,
                batch_frames_inout=0,
                preprocess_conf=config.preprocess_config,
                n_iter_processes=config.num_workers,
                subsampling_factor=1,
                num_encs=1,
                dist_sampler=config.get('dist_sampler', False),
                shortest_first=False)
            logger.info("Setup train/valid Dataloader!")
        else:
            decode_batch_size = config.get('decode', dict()).get(
                'decode_batch_size', 1)
            # test dataset, return raw text
            self.test_loader = BatchDataLoader(
                json_file=config.test_manifest,
                train_mode=False,
                sortagrad=False,
                batch_size=decode_batch_size,
                maxlen_in=float('inf'),
                maxlen_out=float('inf'),
                minibatches=0,
                mini_batch_size=1,
                batch_count='auto',
                batch_bins=0,
                batch_frames_in=0,
                batch_frames_out=0,
                batch_frames_inout=0,
                preprocess_conf=config.preprocess_config,
                n_iter_processes=1,
                subsampling_factor=1,
                num_encs=1)
            logger.info("Setup test/align Dataloader!")


class DeepSpeech2Tester(DeepSpeech2Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)
        self._text_featurizer = TextFeaturizer(
            unit_type=config.unit_type, vocab=config.vocab_filepath)
        self.vocab_list = self._text_featurizer.vocab_list

    def ordid2token(self, texts, texts_len):
        """ ord() id to chr() chr """
        trans = []
        for text, n in zip(texts, texts_len):
            n = n.numpy().item()
            ids = text[:n]
            trans.append(
                self._text_featurizer.defeaturize(ids.numpy().tolist()))
        return trans

    def compute_metrics(self,
                        utts,
                        audio,
                        audio_len,
                        texts,
                        texts_len,
                        fout=None):
        decode_cfg = self.config.decode
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        errors_func = error_rate.char_errors if decode_cfg.error_rate_type == 'cer' else error_rate.word_errors
        error_rate_func = error_rate.cer if decode_cfg.error_rate_type == 'cer' else error_rate.wer

        target_transcripts = self.ordid2token(texts, texts_len)

        result_transcripts = self.compute_result_transcripts(audio, audio_len)

        for utt, target, result in zip(utts, target_transcripts,
                                       result_transcripts):
            errors, len_ref = errors_func(target, result)
            errors_sum += errors
            len_refs += len_ref
            num_ins += 1
            if fout:
                fout.write({"utt": utt, "refs": [target], "hyps": [result]})
            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info(
                "Current error rate [%s] = %f" %
                (decode_cfg.error_rate_type, error_rate_func(target, result)))

        return dict(
            errors_sum=errors_sum,
            len_refs=len_refs,
            num_ins=num_ins,
            error_rate=errors_sum / len_refs,
            error_rate_type=decode_cfg.error_rate_type)

    def compute_result_transcripts(self, audio, audio_len):
        result_transcripts = self.model.decode(audio, audio_len)
        return result_transcripts

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def test(self):
        logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")
        self.model.eval()
        error_rate_type = None
        errors_sum, len_refs, num_ins = 0.0, 0, 0

        # Initialized the decoder in model
        decode_cfg = self.config.decode
        vocab_list = self.vocab_list
        decode_batch_size = decode_cfg.decode_batch_size
        self.model.decoder.init_decoder(
            decode_batch_size, vocab_list, decode_cfg.decoding_method,
            decode_cfg.lang_model_path, decode_cfg.alpha, decode_cfg.beta,
            decode_cfg.beam_size, decode_cfg.cutoff_prob,
            decode_cfg.cutoff_top_n, decode_cfg.num_proc_bsearch)

        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                utts, audio, audio_len, texts, texts_len = batch
                metrics = self.compute_metrics(utts, audio, audio_len, texts,
                                               texts_len, fout)
                errors_sum += metrics['errors_sum']
                len_refs += metrics['len_refs']
                num_ins += metrics['num_ins']
                error_rate_type = metrics['error_rate_type']
                logger.info("Error rate [%s] (%d/?) = %f" %
                            (error_rate_type, num_ins, errors_sum / len_refs))

        # logging
        msg = "Test: "
        msg += "epoch: {}, ".format(self.epoch)
        msg += "step: {}, ".format(self.iteration)
        msg += "Final error rate [%s] (%d/%d) = %f" % (
            error_rate_type, num_ins, num_ins, errors_sum / len_refs)
        logger.info(msg)
        self.model.decoder.del_decoder()

    @paddle.no_grad()
    def export(self):
        infer_model = DeepSpeech2InferModel.from_pretrained(
            self.test_loader, self.config, self.args.checkpoint_path)
        infer_model.eval()
        static_model = infer_model.export()
        try:
            logger.info(f"Export code: {static_model.forward.code}")
        except:
            logger.info(
                f"Fail to print Export code, static_model.forward.code can not be run."
            )
        paddle.jit.save(static_model, self.args.export_path)


class DeepSpeech2ExportTester(DeepSpeech2Tester):
    def __init__(self, config, args):
        super().__init__(config, args)
        self.apply_static = True
        self.args = args

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def test(self):
        logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")
        if self.args.enable_auto_log is True:
            from paddlespeech.s2t.utils.log import Autolog
            self.autolog = Autolog(
                batch_size=self.config.decode.decode_batch_size,
                model_name="deepspeech2",
                model_precision="fp32").getlog()
        self.model.eval()
        error_rate_type = None
        errors_sum, len_refs, num_ins = 0.0, 0, 0

        # Initialized the decoder in model
        decode_cfg = self.config.decode
        vocab_list = self.vocab_list
        if self.config.rnn_direction == "forward":
            decode_batch_size = 1
        elif self.config.rnn_direction == "bidirect":
            decode_batch_size = self.test_loader.batch_size
        else:
            raise Exception("wrong model type")
        self.model.decoder.init_decoder(
            decode_batch_size, vocab_list, decode_cfg.decoding_method,
            decode_cfg.lang_model_path, decode_cfg.alpha, decode_cfg.beta,
            decode_cfg.beam_size, decode_cfg.cutoff_prob,
            decode_cfg.cutoff_top_n, decode_cfg.num_proc_bsearch)

        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                utts, audio, audio_len, texts, texts_len = batch
                metrics = self.compute_metrics(utts, audio, audio_len, texts,
                                               texts_len, fout)
                errors_sum += metrics['errors_sum']
                len_refs += metrics['len_refs']
                num_ins += metrics['num_ins']
                error_rate_type = metrics['error_rate_type']
                logger.info("Error rate [%s] (%d/?) = %f" %
                            (error_rate_type, num_ins, errors_sum / len_refs))
        # logging
        msg = "Test: "
        msg += "epoch: {}, ".format(self.epoch)
        msg += "step: {}, ".format(self.iteration)
        msg += "Final error rate [%s] (%d/%d) = %f" % (
            error_rate_type, num_ins, num_ins, errors_sum / len_refs)
        logger.info(msg)
        if self.args.enable_auto_log is True:
            self.autolog.report()
        self.model.decoder.del_decoder()

    def compute_result_transcripts(self, audio, audio_len):
        if self.config.rnn_direction == "forward":
            output_probs, output_lens, trans_batch = self.static_forward_online(
                audio, audio_len, decoder_chunk_size=1)
            result_transcripts = [trans[-1] for trans in trans_batch]
        elif self.config.rnn_direction == "bidirect":
            output_probs, output_lens = self.static_forward_offline(audio,
                                                                    audio_len)
            batch_size = output_probs.shape[0]
            self.model.decoder.reset_decoder(batch_size=batch_size)

            self.model.decoder.next(output_probs, output_lens)

            trans_best, trans_beam = self.model.decoder.decode()

            result_transcripts = trans_best

        else:
            raise Exception("wrong model type")

        self.predictor.try_shrink_memory()

        #replace the <space> with ' '
        result_transcripts = [
            self._text_featurizer.detokenize(sentence)
            for sentence in result_transcripts
        ]

        return result_transcripts

    def run_test(self):
        """Do Test/Decode"""
        try:
            with Timer("Test/Decode Done: {}"):
                with self.eval():
                    self.test()
        except KeyboardInterrupt:
            exit(-1)

    def static_forward_online(self, audio, audio_len,
                              decoder_chunk_size: int=1):
        """
        Parameters
        ----------
            audio (Tensor): shape[B, T, D]
            audio_len (Tensor): shape[B]
            decoder_chunk_size(int)
        Returns
        -------
            output_probs(numpy.array): shape[B, T, vocab_size]
            output_lens(numpy.array): shape[B]
            trans(list(list(str))): shape[B, T]
        """
        output_probs_list = []
        output_lens_list = []
        subsampling_rate = self.model.encoder.conv.subsampling_rate
        receptive_field_length = self.model.encoder.conv.receptive_field_length
        chunk_stride = subsampling_rate * decoder_chunk_size
        chunk_size = (decoder_chunk_size - 1
                      ) * subsampling_rate + receptive_field_length

        x_batch = audio.numpy()
        batch_size, Tmax, x_dim = x_batch.shape
        x_len_batch = audio_len.numpy().astype(np.int64)
        if (Tmax - chunk_size) % chunk_stride != 0:
            # The length of padding for the batch
            padding_len_batch = chunk_stride - (Tmax - chunk_size
                                                ) % chunk_stride
        else:
            padding_len_batch = 0
        x_list = np.split(x_batch, batch_size, axis=0)
        x_len_list = np.split(x_len_batch, batch_size, axis=0)

        trans_batch = []
        for x, x_len in zip(x_list, x_len_list):
            if self.args.enable_auto_log is True:
                self.autolog.times.start()
            x_len = x_len[0]
            assert (chunk_size <= x_len)

            if (x_len - chunk_size) % chunk_stride != 0:
                padding_len_x = chunk_stride - (x_len - chunk_size
                                                ) % chunk_stride
            else:
                padding_len_x = 0

            padding = np.zeros(
                (x.shape[0], padding_len_x, x.shape[2]), dtype=x.dtype)
            padded_x = np.concatenate([x, padding], axis=1)

            num_chunk = (x_len + padding_len_x - chunk_size) / chunk_stride + 1
            num_chunk = int(num_chunk)

            chunk_state_h_box = np.zeros(
                (self.config.num_rnn_layers, 1, self.config.rnn_layer_size),
                dtype=x.dtype)
            chunk_state_c_box = np.zeros(
                (self.config.num_rnn_layers, 1, self.config.rnn_layer_size),
                dtype=x.dtype)

            input_names = self.predictor.get_input_names()
            audio_handle = self.predictor.get_input_handle(input_names[0])
            audio_len_handle = self.predictor.get_input_handle(input_names[1])
            h_box_handle = self.predictor.get_input_handle(input_names[2])
            c_box_handle = self.predictor.get_input_handle(input_names[3])

            trans = []
            probs_chunk_list = []
            probs_chunk_lens_list = []
            if self.args.enable_auto_log is True:
                # record the model preprocessing time
                self.autolog.times.stamp()

            self.model.decoder.reset_decoder(batch_size=1)
            for i in range(0, num_chunk):
                start = i * chunk_stride
                end = start + chunk_size
                x_chunk = padded_x[:, start:end, :]
                if x_len < i * chunk_stride:
                    x_chunk_lens = 0
                else:
                    x_chunk_lens = min(x_len - i * chunk_stride, chunk_size)
                #means the number of input frames in the chunk is not enough for predicting one prob
                if (x_chunk_lens < receptive_field_length):
                    break
                x_chunk_lens = np.array([x_chunk_lens])
                audio_handle.reshape(x_chunk.shape)
                audio_handle.copy_from_cpu(x_chunk)

                audio_len_handle.reshape(x_chunk_lens.shape)
                audio_len_handle.copy_from_cpu(x_chunk_lens)

                h_box_handle.reshape(chunk_state_h_box.shape)
                h_box_handle.copy_from_cpu(chunk_state_h_box)

                c_box_handle.reshape(chunk_state_c_box.shape)
                c_box_handle.copy_from_cpu(chunk_state_c_box)

                output_names = self.predictor.get_output_names()
                output_handle = self.predictor.get_output_handle(
                    output_names[0])
                output_lens_handle = self.predictor.get_output_handle(
                    output_names[1])
                output_state_h_handle = self.predictor.get_output_handle(
                    output_names[2])
                output_state_c_handle = self.predictor.get_output_handle(
                    output_names[3])
                self.predictor.run()
                output_chunk_probs = output_handle.copy_to_cpu()
                output_chunk_lens = output_lens_handle.copy_to_cpu()
                chunk_state_h_box = output_state_h_handle.copy_to_cpu()
                chunk_state_c_box = output_state_c_handle.copy_to_cpu()
                self.model.decoder.next(output_chunk_probs, output_chunk_lens)
                probs_chunk_list.append(output_chunk_probs)
                probs_chunk_lens_list.append(output_chunk_lens)
                trans_best, trans_beam = self.model.decoder.decode()
                trans.append(trans_best[0])
            trans_batch.append(trans)
            output_probs = np.concatenate(probs_chunk_list, axis=1)
            output_lens = np.sum(probs_chunk_lens_list, axis=0)
            vocab_size = output_probs.shape[2]
            output_probs_padding_len = Tmax + padding_len_batch - output_probs.shape[
                1]
            output_probs_padding = np.zeros(
                (1, output_probs_padding_len, vocab_size),
                dtype=output_probs.
                dtype)  # The prob padding for a piece of utterance
            output_probs = np.concatenate(
                [output_probs, output_probs_padding], axis=1)
            output_probs_list.append(output_probs)
            output_lens_list.append(output_lens)
            if self.args.enable_auto_log is True:
                # record the model inference time
                self.autolog.times.stamp()
                # record the post processing time
                self.autolog.times.stamp()
                self.autolog.times.end()
        output_probs = np.concatenate(output_probs_list, axis=0)
        output_lens = np.concatenate(output_lens_list, axis=0)
        return output_probs, output_lens, trans_batch

    def static_forward_offline(self, audio, audio_len):
        """
        Parameters
        ----------
            audio (Tensor): shape[B, T, D]
            audio_len (Tensor): shape[B]

        Returns
        -------
            output_probs(numpy.array): shape[B, T, vocab_size]
            output_lens(numpy.array): shape[B]
        """
        x = audio.numpy()
        x_len = audio_len.numpy().astype(np.int64)

        input_names = self.predictor.get_input_names()
        audio_handle = self.predictor.get_input_handle(input_names[0])
        audio_len_handle = self.predictor.get_input_handle(input_names[1])

        audio_handle.reshape(x.shape)
        audio_handle.copy_from_cpu(x)

        audio_len_handle.reshape(x_len.shape)
        audio_len_handle.copy_from_cpu(x_len)

        if self.args.enable_auto_log is True:
            self.autolog.times.start()
            # record the prefix processing time
            self.autolog.times.stamp()
        self.predictor.run()
        if self.args.enable_auto_log is True:
            # record the model inference time
            self.autolog.times.stamp()
            # record the post processing time
            self.autolog.times.stamp()
            self.autolog.times.end()

        output_names = self.predictor.get_output_names()
        output_handle = self.predictor.get_output_handle(output_names[0])
        output_lens_handle = self.predictor.get_output_handle(output_names[1])
        output_probs = output_handle.copy_to_cpu()
        output_lens = output_lens_handle.copy_to_cpu()
        return output_probs, output_lens

    def setup_model(self):
        super().setup_model()

        # after paddle 3.0, support new inference interface
        if paddlespeech.utils.satisfy_paddle_version('3.0.0-beta'):
            model_dir = os.path.dirname(self.args.export_path)
            model_prefix = os.path.basename(self.args.export_path)
            deepspeech_config = inference.Config(model_dir, model_prefix)
        else:
            deepspeech_config = inference.Config(
                self.args.export_path + ".pdmodel",
                self.args.export_path + ".pdiparams")

        deepspeech_config.disable_mkldnn()

        if (os.environ['CUDA_VISIBLE_DEVICES'].strip() != ''):
            deepspeech_config.enable_use_gpu(100, 0)
            deepspeech_config.enable_memory_optim()
        deepspeech_predictor = inference.create_predictor(deepspeech_config)
        self.predictor = deepspeech_predictor


================================================
FILE: paddlespeech/s2t/exps/hubert/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/hubert/bin/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/hubert/bin/test.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for hubert model."""
import cProfile

from yacs.config import CfgNode

from paddlespeech.s2t.exps.hubert.model import HubertASRTester as Tester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Tester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_test()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    # save asr result to
    parser.add_argument(
        '--dict-path', type=str, default=None, help='dict path.')
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats('test.profile')


================================================
FILE: paddlespeech/s2t/exps/hubert/bin/test_wav.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for hubert model."""
import os
import sys
from pathlib import Path

import paddle
import soundfile
from yacs.config import CfgNode

from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.models.hubert.hubert_ASR import HubertASR
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig
logger = Log(__name__).getlog()


class HubertInfer():
    def __init__(self, config, args):
        self.args = args
        self.config = config
        self.audio_file = args.audio_file

        self.text_feature = TextFeaturizer(
            unit_type=config.unit_type, vocab=config.vocab_filepath)
        paddle.set_device('gpu' if self.args.ngpu > 0 else 'cpu')

        # model
        model_conf = config
        with UpdateConfig(model_conf):
            model_conf.output_dim = self.text_feature.vocab_size
        model = HubertASR.from_config(model_conf)
        self.model = model
        self.model.eval()

        # load model
        params_path = self.args.checkpoint_path + ".pdparams"
        model_dict = paddle.load(params_path)
        self.model.set_state_dict(model_dict)

    def run(self):
        check(args.audio_file)

        with paddle.no_grad():
            # read
            audio, _ = soundfile.read(
                self.audio_file, dtype="int16", always_2d=True)
            logger.info(f"audio shape: {audio.shape}")

            xs = paddle.to_tensor(audio, dtype='float32').unsqueeze(axis=0)
            decode_config = self.config.decode
            result_transcripts, result_tokenids = self.model.decode(
                xs,
                text_feature=self.text_feature,
                decoding_method=decode_config.decoding_method,
                beam_size=decode_config.beam_size)
            rsl = result_transcripts[0]
            utt = Path(self.audio_file).name
            logger.info(f"hyp: {utt} {rsl}")
            return rsl


def check(audio_file):
    if not os.path.isfile(audio_file):
        print("Please input the right audio file path")
        sys.exit(-1)

    logger.info("checking the audio file format......")
    try:
        sig, sample_rate = soundfile.read(audio_file)
    except Exception as e:
        logger.error(str(e))
        logger.error(
            "can not open the wav file, please check the audio file format")
        sys.exit(-1)
    logger.info("The sample rate is %d" % sample_rate)
    assert (sample_rate == 16000)
    logger.info("The audio file format is right")


def main(config, args):
    HubertInfer(config, args).run()


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()

    config = CfgNode(new_allowed=True)

    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/hubert/bin/train.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer for hubert model."""
import cProfile
import os

from yacs.config import CfgNode

from paddlespeech.s2t.exps.hubert.model import HubertASRTrainer as Trainer
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Trainer(config, args)
    exp.setup()
    exp.run()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    parser.add_argument(
        '--resume', type=str, default="", nargs="?", help='resume ckpt path.')
    args = parser.parse_args()
    print_arguments(args, globals())
    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats(os.path.join(args.output, 'train.profile'))


================================================
FILE: paddlespeech/s2t/exps/hubert/model.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains hubert model."""
import json
import math
import os
import re
import time
from collections import OrderedDict
from contextlib import nullcontext

import jsonlines
import numpy as np
import paddle
from hyperpyyaml import load_hyperpyyaml
from paddle import distributed as dist
from paddlenlp.transformers import AutoTokenizer

from paddlespeech.s2t.frontend.featurizer import TextFeaturizer
from paddlespeech.s2t.io.dataloader import DataLoaderFactory
from paddlespeech.s2t.io.speechbrain import data_pipeline
from paddlespeech.s2t.io.speechbrain import dataio
from paddlespeech.s2t.io.speechbrain import dataset
from paddlespeech.s2t.io.speechbrain.dataloader import make_dataloader
from paddlespeech.s2t.models.hubert.hubert_ASR import HubertASR
from paddlespeech.s2t.models.wav2vec2.processing.speech_augmentation import TimeDomainSpecAugment
from paddlespeech.s2t.training.optimizer import OptimizerFactory
from paddlespeech.s2t.training.reporter import ObsScope
from paddlespeech.s2t.training.reporter import report
from paddlespeech.s2t.training.scheduler import LRSchedulerFactory
from paddlespeech.s2t.training.timer import Timer
from paddlespeech.s2t.training.trainer import Trainer
from paddlespeech.s2t.utils import error_rate
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig

logger = Log(__name__).getlog()


# Todo: change this when paddle supports this api
def clip_grad_norm_(
        parameters,
        max_norm,
        norm_type=2.0,
        error_if_nonfinite=False, ):
    r"""Clips gradient norm of the iteratable parameters.

    Norms are calculated together on all gradients, just as they are
    connected into one vector. The gradient will be modified in place.

    This API can only run in dynamic graph mode, not static graph mode.

    Args:
        parameters (Iterable[paddle.Tensor] or paddle.Tensor): Tensors or a single Tensor
            that will be normalized gradients
        max_norm (float or int): max norm of the gradients
        norm_type (float or int): type of the used p-norm. Can be `inf` for
            infinity norm.
        error_if_nonfinite (bool): if True, throw an error if the total
            norm of the gradients from :attr:`parameters` is `nan`,
            `inf`, or `-inf`.

    Returns:
        Total norm of the parameter gradients (treated as a single vector).
    Example:
        .. code-block:: python
            import paddle

            x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
            max_norm = float(5.0)
            linear = paddle.nn.Linear(in_features=10, out_features=10)
            out = linear(x)
            loss = paddle.mean(out)
            loss.backward()

            paddle.nn.utils.clip_grad_norm_(linear.parameters(), max_norm)

            sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters())
            sdg.step()
    """
    if not paddle.in_dynamic_mode():
        raise RuntimeError('this API can only run in dynamic mode.')

    if isinstance(parameters, paddle.Tensor):
        parameters = [parameters]

    support_norm_type = [float("inf"), 0, 1, 2]
    if norm_type not in support_norm_type:
        raise ValueError(f'norm_type only support {support_norm_type}')

    grads = [p.grad for p in parameters if p.grad is not None]
    max_norm = float(max_norm)
    norm_type = float(norm_type)
    if len(grads) == 0:
        return paddle.to_tensor(0.0)
    if norm_type == float("inf"):
        norms = [g.detach().abs().max() for g in grads]
        total_norm = (norms[0]
                      if len(norms) == 1 else paddle.max(paddle.stack(norms)))
    else:
        total_norm = paddle.linalg.norm(
            paddle.stack(
                [paddle.linalg.norm(g.detach(), norm_type) for g in grads]),
            norm_type, )

    if error_if_nonfinite and paddle.logical_or(total_norm.isnan(),
                                                total_norm.isinf()):
        raise RuntimeError(
            f'The total norm of {norm_type} order of the gradients from '
            '`parameters` is non-finite, so it cannot be clipped. In any case, '
            'disable this error and scale the gradient by non-finite norm, '
            'set `error_if_nonfinite=False`')
    clip_coef = max_norm / (total_norm + 1e-6)
    # Note: when the coef is clamped to 1, it is redundant to multiply the clamped coef, but this
    # avoids the `if clip_coef < 1:` condition.
    clip_coef_clamped = paddle.clip(clip_coef, max=1.0)
    with paddle.no_grad():
        for _, p in enumerate(parameters):
            g = p.grad
            if g is not None:
                p.grad = paddle.multiply(x=g, y=clip_coef_clamped)
    return total_norm


class HubertASRTrainer(Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)
        self.avg_train_loss = 0.0
        self.loss_isfinite = True  # while flag is 'False', loss in Nan or inf, and can not be avg
        self.use_sb = True  # whether use speech brain dataloader

    def update_average(self, batch_index, loss):
        """Update running average of the loss.
        Arguments
        ---------
        batch_index : int
            current batch index
        loss : paddle.tensor
            detached loss, a single float value.
        """
        if math.isfinite(loss):
            self.avg_train_loss -= self.avg_train_loss / (batch_index + 1)
            self.avg_train_loss += loss / (batch_index + 1)
        else:
            self.loss_isfinite = False
            logger.info('loss:{} in Nan or inf, error'.format(loss))

    def before_train(self):
        from_scratch = self.resume_or_scratch()
        if from_scratch:
            # scratch: save init model, i.e. 0 epoch
            self.save(tag='init', infos=None)
        else:
            # resume: train next_epoch and next_iteration
            self.epoch += 1
            logger.info(
                f"Resume train: epoch {self.epoch }, step {self.iteration}!")

        self.maybe_batch_sampler_step()

    def train_batch(self, batch_index, batch, msg):
        train_conf = self.config
        start = time.time()

        # forward
        ## sb data pipeline
        if self.use_sb:
            wav, wavs_lens_rate = batch['sig']
            target, target_lens_rate = batch['tokens']
            target_lens = (target_lens_rate *
                           target.shape[1]).round().astype(paddle.int64)
        else:
            utt, wav, wavs_lens, target, target_lens = batch
            wavs_lens_rate = wavs_lens / wav.shape[1]
            wav = wav[:, :, 0]
        logger.info('training utt ids: {}'.format(utt))
        if hasattr(train_conf, 'audio_augment'):
            wav = self.speech_augmentation(wav, wavs_lens_rate)

        loss = self.model(wav, wavs_lens_rate, target, target_lens)

        # loss div by `batch_size * accum_grad`
        loss /= train_conf.accum_grad
        # update self.avg_train_loss
        self.update_average(batch_index, float(loss))

        # loss backward
        if (batch_index + 1) % train_conf.accum_grad != 0:
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
            # When using cpu w/o DDP, model does not have `no_sync`
            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.
            context = nullcontext
        with context():
            loss.backward()

            layer_tools.print_grads(self.model, print_func=None)

        # optimizer step old
        if (batch_index + 1) % train_conf.accum_grad == 0:
            #do global grad clip
            if train_conf.global_grad_clip != 0:
                clip_grad_norm_(self.model.parameters(),
                                train_conf.global_grad_clip)
            self.model_optimizer.step()
            self.model_optimizer.clear_grad()
            if not train_conf.freeze_hubert:
                self.hubert_optimizer.step()
                self.hubert_optimizer.clear_grad()
            if self.config.model_scheduler != 'newbobscheduler':
                self.model_lr_scheduler.step()
            if self.config.hubert_scheduler != 'newbobscheduler':
                if not train_conf.freeze_hubert:
                    self.hubert_lr_scheduler.step()
            self.iteration += 1

        losses_np = {'loss': self.avg_train_loss * train_conf.accum_grad}
        iteration_time = time.time() - start
        for k, v in losses_np.items():
            report(k, v)
        report("loss_whitoutavg", float(loss))
        report("batch_size", self.config.batch_size)
        report("accum", train_conf.accum_grad)
        report("step_cost", iteration_time)

        if (batch_index + 1) % train_conf.accum_grad == 0:
            if dist.get_rank() == 0 and self.visualizer:
                losses_np_v = losses_np.copy()
                losses_np_v.update({
                    "model_lr": self.model_lr_scheduler(),
                    "hubert_lr": self.hubert_lr_scheduler()
                })
                for key, val in losses_np_v.items():
                    self.visualizer.add_scalar(
                        tag='train/' + key, value=val, step=self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
        self.model.eval()
        if not self.use_streamdata:
            logger.info(
                f"Valid Total Examples: {len(self.valid_loader.dataset)}")
        valid_losses = {}
        step = 0
        total_loss = 0.0
        num_seen_utts = 1  # use update_average and no need for num_seen_utts here
        for i, batch in enumerate(self.valid_loader):
            if self.use_sb:
                wav, wavs_lens_rate = batch['sig']
                target, target_lens_rate = batch['tokens']
                target_lens = (target_lens_rate *
                               target.shape[1]).round().astype(paddle.int64)
            else:
                utt, wav, wavs_lens, target, target_lens = batch
                wavs_lens_rate = wavs_lens / wav.shape[1]
                wav = wav[:, :, 0]

            loss = self.model(wav, wavs_lens_rate, target, target_lens)
            # use update_average
            total_loss -= total_loss / (step + 1)
            total_loss += loss / (step + 1)

            if math.isfinite(float(loss)):
                step += 1
                valid_losses['val_loss'] = float(loss)
            else:
                logger.info('loss:{} in Nan or inf, error'.format(float(loss)))

            if (i + 1) % self.config.log_interval == 0:
                valid_losses['val_history_loss'] = float(total_loss)

                # logging
                msg = f"Valid: Rank: {dist.get_rank()}, "
                msg += "epoch: {}, ".format(self.epoch)
                msg += "step: {}, ".format(self.iteration)
                if not self.use_streamdata:
                    msg += "batch: {}/{}, ".format(i + 1,
                                                   len(self.valid_loader))
                msg += ', '.join('{}: {:>.6f}'.format(k, v)
                                 for k, v in valid_losses.items())
                logger.info(msg)

        logger.info(
            'Rank {} Val info val_loss {}'.format(dist.get_rank(), total_loss))
        return total_loss, num_seen_utts

    @mp_tools.rank_zero_only
    def save(self, tag=None, infos: dict=None):
        """Save checkpoint (model parameters and optimizer states).

        Args:
            tag (int or str, optional): None for step, else using tag, e.g epoch. Defaults to None.
            infos (dict, optional): meta data to save. Defaults to None.
        """

        infos = infos if infos else dict()
        infos.update({
            "epoch": self.epoch,
            "model_lr": self.model_optimizer.get_lr(),
            "hubert_lr": self.hubert_optimizer.get_lr()
        })

        checkpoint_path = os.path.join(
            self.checkpoint_dir,
            "{}".format(self.iteration if tag is None else tag))

        model_dict = self.model.state_dict()
        params_path = checkpoint_path + ".pdparams"
        paddle.save(model_dict, params_path)
        logger.info("Saved model to {}".format(params_path))

        model_opt_dict = self.model_optimizer.state_dict()
        hubert_opt_dict = self.hubert_optimizer.state_dict()

        opt_dict = {'model': model_opt_dict, 'hubert': hubert_opt_dict}

        optimizer_path = checkpoint_path + ".pdopt"
        paddle.save(opt_dict, optimizer_path)
        logger.info("Saved optimzier state to {}".format(optimizer_path))

        scheduler_dict = {}

        if self.config.model_scheduler == 'newbobscheduler':
            scheduler_dict['model'] = self.model_lr_scheduler.save()
        if self.config.hubert_scheduler == 'newbobscheduler':
            scheduler_dict['hubert'] = self.hubert_lr_scheduler.save()
        if scheduler_dict:
            scheduler_path = checkpoint_path + ".pdlrs"
            paddle.save(scheduler_dict, scheduler_path)
            logger.info("Saved scheduler state to {}".format(scheduler_path))
        info_path = re.sub('.pdparams$', '.json', params_path)
        infos = {} if infos is None else infos
        with open(info_path, 'w', encoding='utf8') as fout:
            data = json.dumps(infos)
            fout.write(data)

    def resume_or_scratch(self):
        """Resume from latest checkpoint at checkpoints in the output
        directory or load a specified checkpoint.

        If ``args.checkpoint_path`` is not None, load the checkpoint, else
        resume training.
        """
        scratch = None
        if self.args.resume:
            # just restore ckpt
            # lr will restore from optimizer ckpt
            resume_json_path = os.path.join(self.checkpoint_dir,
                                            self.args.resume + '.json')
            with open(resume_json_path, 'r', encoding='utf8') as f:
                resume_json = json.load(f)
            self.iteration = 0
            self.epoch = resume_json["epoch"]

            # restore model from *.pdparams
            params_path = os.path.join(self.checkpoint_dir,
                                       "{}".format(self.epoch)) + '.pdparams'
            model_dict = paddle.load(params_path)
            self.model.set_state_dict(model_dict)

            # restore optimizer from *.pdopt
            optimizer_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdopt'
            optimizer_dict = paddle.load(optimizer_path)
            self.model_optimizer.set_state_dict(optimizer_dict['model'])
            self.hubert_optimizer.set_state_dict(optimizer_dict['hubert'])

            # restore lr_scheduler from *.pdlrs
            scheduler_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdlrs'
            if os.path.isfile(os.path.join(scheduler_path)):
                scheduler_dict = paddle.load(scheduler_path)
                if self.config.model_scheduler == 'newbobscheduler':
                    self.model_lr_scheduler.load(scheduler_dict['model'])
                if self.config.hubert_scheduler == 'newbobscheduler':
                    self.hubert_lr_scheduler.load(scheduler_dict['hubert'])
            logger.info(
                f"Restore ckpt: epoch {self.epoch }, step {self.iteration}!")
            scratch = False
        else:
            self.iteration = 0
            self.epoch = 0
            scratch = True
            logger.info("Init from scratch!")
        return scratch

    def do_train(self):
        """The training process control by step."""
        # !!!IMPORTANT!!!
        # Try to export the model by script, if fails, we should refine
        # the code to satisfy the script export requirements
        # script_model = paddle.jit.to_static(self.model)
        # script_model_path = str(self.checkpoint_dir / 'init')
        # paddle.jit.save(script_model, script_model_path)

        self.before_train()
        if not self.use_streamdata:
            logger.info(
                f"Train Total Examples: {len(self.train_loader.dataset)}")
        while self.epoch < self.config.n_epoch:
            with Timer("Epoch-Train Time Cost: {}"):
                self.model.train()
                try:
                    data_start_time = time.time()
                    for batch_index, batch in enumerate(self.train_loader):
                        dataload_time = time.time() - data_start_time
                        msg = "Train:"
                        observation = OrderedDict()
                        with ObsScope(observation):
                            report("Rank", dist.get_rank())
                            report("epoch", self.epoch)
                            report('step', self.iteration)
                            report("model_lr", self.model_optimizer.get_lr())
                            report("hubert_lr", self.hubert_optimizer.get_lr())
                            self.train_batch(batch_index, batch, msg)
                            self.after_train_batch()
                            report('iter', batch_index + 1)
                            if not self.use_streamdata:
                                report('total', len(self.train_loader))
                            report('reader_cost', dataload_time)
                        observation['batch_cost'] = observation[
                            'reader_cost'] + observation['step_cost']
                        observation['samples'] = observation['batch_size']
                        observation['ips,samples/s'] = observation[
                            'batch_size'] / observation['batch_cost']
                        for k, v in observation.items():
                            msg += f" {k.split(',')[0]}: "
                            msg += f"{v:>.8f}" if isinstance(v,
                                                             float) else f"{v}"
                            msg += f" {k.split(',')[1]}" if len(
                                k.split(',')) == 2 else ""
                            msg += ","
                        msg = msg[:-1]  # remove the last ","
                        if (batch_index + 1) % self.config.log_interval == 0:
                            logger.info(msg)
                        data_start_time = time.time()
                except Exception as e:
                    logger.error(e)
                    raise e
            with Timer("Eval Time Cost: {}"):
                total_loss, num_seen_utts = self.valid()
                if dist.get_world_size() > 1:
                    num_seen_utts = paddle.to_tensor(num_seen_utts)
                    dist.all_reduce(num_seen_utts)
                    total_loss = paddle.to_tensor(total_loss)
                    dist.all_reduce(total_loss)
                    cv_loss = total_loss / num_seen_utts
                    cv_loss = float(cv_loss)
                else:
                    cv_loss = float(total_loss)
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
                self.visualizer.add_scalar(
                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
                self.visualizer.add_scalar(
                    tag='eval/model_lr',
                    value=self.model_lr_scheduler(),
                    step=self.epoch)
                self.visualizer.add_scalar(
                    tag='eval/hubert_lr',
                    value=self.hubert_lr_scheduler(),
                    step=self.epoch)

            if self.config.model_scheduler == 'newbobscheduler':
                self.model_lr_scheduler.step(cv_loss)
            if self.config.hubert_scheduler == 'newbobscheduler':
                if not self.config.freeze_hubert:
                    self.hubert_lr_scheduler.step(cv_loss)
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.avg_train_loss = 0.0
            self.new_epoch()

    def dataio_prepare(self, hparams):
        """This function prepares the datasets to be used in the brain class.
        It also defines the data processing pipeline through user-defined functions."""
        data_folder = hparams["data_folder"]

        train_data = dataset.DynamicItemDataset.from_csv(
            csv_path=hparams["train_data"],
            replacements={"data_root": data_folder}, )

        if hparams["sorting"] == "ascending":
            # we sort training data to speed up training and get better results.
            train_data = train_data.filtered_sorted(sort_key="duration")
            # when sorting do not shuffle in dataloader ! otherwise is pointless
            hparams["train_dataloader_opts"]["shuffle"] = False

        elif hparams["sorting"] == "descending":
            train_data = train_data.filtered_sorted(
                sort_key="duration", reverse=True)
            # when sorting do not shuffle in dataloader ! otherwise is pointless
            hparams["train_dataloader_opts"]["shuffle"] = False

        elif hparams["sorting"] == "random":
            pass

        else:
            raise NotImplementedError(
                "sorting must be random, ascending or descending")

        valid_data = dataset.DynamicItemDataset.from_csv(
            csv_path=hparams["valid_data"],
            replacements={"data_root": data_folder}, )
        valid_data = valid_data.filtered_sorted(sort_key="duration")

        test_data = dataset.DynamicItemDataset.from_csv(
            csv_path=hparams["test_data"],
            replacements={"data_root": data_folder}, )
        test_data = test_data.filtered_sorted(sort_key="duration")

        datasets = [train_data, valid_data, test_data]

        # Defining tokenizer and loading it
        tokenizer = AutoTokenizer.from_pretrained('bert-base-chinese')
        self.tokenizer = tokenizer

        # 2. Define audio pipeline:
        @data_pipeline.takes("wav")
        @data_pipeline.provides("sig")
        def audio_pipeline(wav):
            sig = dataio.read_audio(wav)
            return sig

        dataset.add_dynamic_item(datasets, audio_pipeline)

        # 3. Define text pipeline:
        @data_pipeline.takes("transcript")
        @data_pipeline.provides("wrd", "tokens_list", "tokens")
        def text_pipeline(wrd):
            wrd = "".join(wrd.split(" "))
            yield wrd
            tokens_list = tokenizer(wrd)["input_ids"]
            yield tokens_list
            tokens = np.array(tokens_list, dtype="int64")
            # tokens = paddle.to_tensor(tokens_list, dtype="int64")
            yield tokens

        dataset.add_dynamic_item(datasets, text_pipeline)

        # 4. Set output:
        dataset.set_output_keys(
            datasets,
            ["id", "sig", "wrd", "tokens"], )

        # 5. If Dynamic Batching is used, we instantiate the needed samplers.
        train_batch_sampler = None
        valid_batch_sampler = None
        if hparams["dynamic_batching"]:
            from sampler import DynamicBatchSampler  # noqa

            dynamic_hparams = hparams["dynamic_batch_sampler"]
            num_buckets = dynamic_hparams["num_buckets"]

            train_batch_sampler = DynamicBatchSampler(
                train_data,
                dynamic_hparams["max_batch_len"],
                num_buckets=num_buckets,
                length_func=lambda x: x["duration"],
                shuffle=dynamic_hparams["shuffle_ex"],
                batch_ordering=dynamic_hparams["batch_ordering"], )

            valid_batch_sampler = DynamicBatchSampler(
                valid_data,
                dynamic_hparams["max_batch_len"],
                num_buckets=num_buckets,
                length_func=lambda x: x["duration"],
                shuffle=dynamic_hparams["shuffle_ex"],
                batch_ordering=dynamic_hparams["batch_ordering"], )

        return (train_data, valid_data, test_data, tokenizer,
                train_batch_sampler, valid_batch_sampler, )

    def setup_dataloader(self):
        config = self.config.clone()
        self.use_streamdata = config.get("use_stream_data", False)
        self.use_sb = config.get("use_sb_pipeline", False)
        if self.use_sb:
            hparams_file = config.sb_pipeline_conf
            with open(hparams_file, 'r', encoding='utf8') as fin:
                hparams = load_hyperpyyaml(fin, None)

            (train_data, valid_data, test_data, tokenizer, train_bsampler,
             valid_bsampler, ) = self.dataio_prepare(hparams)

            train_dataloader_opts = hparams["train_dataloader_opts"]
            valid_dataloader_opts = hparams["valid_dataloader_opts"]

            if train_bsampler is not None:
                train_dataloader_opts = {
                    "batch_sampler": train_bsampler,
                    "num_workers": hparams["num_workers"],
                }

            if valid_bsampler is not None:
                valid_dataloader_opts = {"batch_sampler": valid_bsampler}

            if self.train:
                self.train_loader = make_dataloader(
                    train_data, stage='train', **train_dataloader_opts)
                self.valid_loader = make_dataloader(
                    valid_data,
                    stage='val',
                    **valid_dataloader_opts, )
                logger.info("Setup train/valid Dataloader!")
            else:
                self.test_loader = make_dataloader(
                    test_data, stage='test', **hparams["test_dataloader_opts"])
        else:
            if self.train:
                self.train_loader = DataLoaderFactory.get_dataloader(
                    'train', config, self.args)
                self.valid_loader = DataLoaderFactory.get_dataloader(
                    'valid', config, self.args)
                logger.info("Setup train/valid Dataloader!")
            else:
                decode_batch_size = config.get('decode', dict()).get(
                    'decode_batch_size', 1)
                self.test_loader = DataLoaderFactory.get_dataloader(
                    'test', config, self.args)
                self.align_loader = DataLoaderFactory.get_dataloader(
                    'align', config, self.args)
                logger.info("Setup test/align Dataloader!")

    def setup_model(self):
        config = self.config
        model_conf = config

        with UpdateConfig(model_conf):
            if self.use_sb:
                model_conf.output_dim = self.tokenizer.vocab_size
            else:
                if self.train:
                    model_conf.input_dim = self.train_loader.feat_dim
                    model_conf.output_dim = self.train_loader.vocab_size
                else:
                    model_conf.input_dim = self.test_loader.feat_dim
                    model_conf.output_dim = self.test_loader.vocab_size

        model = HubertASR.from_config(model_conf)

        model_dict = paddle.load(config.hubert_params_path)
        model.set_state_dict(model_dict)

        if self.parallel:
            model = paddle.DataParallel(model, find_unused_parameters=True)

        layer_tools.print_params(model, logger.info)
        self.model = model
        logger.info("Setup model!")

        # setup speech augmentation for hubert
        if hasattr(config, 'audio_augment') and self.train:
            self.speech_augmentation = TimeDomainSpecAugment(
                **config.audio_augment)

        if not self.train:
            return

        train_config = config
        model_optim_type = train_config.model_optim
        model_optim_conf = train_config.model_optim_conf
        logger.info("optim_model:{},{}", model_optim_type, model_optim_conf)
        hubert_optim_type = train_config.hubert_optim
        hubert_optim_conf = train_config.hubert_optim_conf
        logger.info("optim_model:{},{}", hubert_optim_type, hubert_optim_conf)

        model_scheduler_type = train_config.model_scheduler
        model_scheduler_conf = train_config.model_scheduler_conf
        hubert_scheduler_type = train_config.hubert_scheduler
        hubert_scheduler_conf = train_config.hubert_scheduler_conf

        model_scheduler_args = dict(
            **{"learning_rate": model_optim_conf.lr,
               "verbose": False}, **(dict(model_scheduler_conf)))

        hubert_scheduler_args = dict(
            **{"learning_rate": hubert_optim_conf.lr,
               "verbose": False}, **(dict(hubert_scheduler_conf)))

        model_lr_scheduler = LRSchedulerFactory.from_args(model_scheduler_type,
                                                          model_scheduler_args)
        hubert_lr_scheduler = LRSchedulerFactory.from_args(
            hubert_scheduler_type, hubert_scheduler_args)

        def optimizer_args(
                config,
                optim_type,
                optim_conf,
                parameters,
                lr_scheduler=None, ):
            optim_arg = dict(optim_conf)
            optim_arg.update({
                "learning_rate":
                lr_scheduler if lr_scheduler else optim_conf.lr,
                "parameters":
                parameters
            })
            return optim_arg

        model_optimizer_args = optimizer_args(config, model_optim_type,
                                              model_optim_conf, [{
                                                  'params':
                                                  model._layers.enc.parameters()
                                              }, {
                                                  'params':
                                                  model._layers.ctc.parameters()
                                              }] if self.parallel else [{
                                                  'params':
                                                  model.enc.parameters()
                                              }, {
                                                  'params':
                                                  model.ctc.parameters()
                                              }], model_lr_scheduler)

        hubert_optimizer_args = optimizer_args(
            config, hubert_optim_type, hubert_optim_conf,
            model._layers.hubert.parameters() if self.parallel else
            model.hubert.parameters(), hubert_lr_scheduler)

        model_optimizer = OptimizerFactory.from_args(model_optim_type,
                                                     model_optimizer_args)
        hubert_optimizer = OptimizerFactory.from_args(hubert_optim_type,
                                                      hubert_optimizer_args)

        self.model_optimizer = model_optimizer
        self.hubert_optimizer = hubert_optimizer
        self.model_lr_scheduler = model_lr_scheduler
        self.hubert_lr_scheduler = hubert_lr_scheduler
        logger.info("Setup optimizer/lr_scheduler!")


class HubertASRTester(HubertASRTrainer):
    def __init__(self, config, args):
        super().__init__(config, args)
        self.text_featurizer = TextFeaturizer(
            unit_type=config.unit_type, vocab=config.vocab_filepath)
        self.vocab_list = self.text_featurizer.vocab_list

    def id2token(self, texts, texts_len):
        """ ord() id to chr() chr """
        trans = []
        for text, n in zip(texts, texts_len):
            n = n.numpy().item()
            ids = text[:n]
            trans.append(self.text_featurizer.defeaturize(ids.numpy().tolist()))
        return trans

    def compute_metrics(self, id, audio, audio_len, texts, texts_len,
                        fout=None):
        decode_cfg = self.config.decode
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        errors_func = error_rate.char_errors if decode_cfg.error_rate_type == 'cer' else error_rate.word_errors
        error_rate_func = error_rate.cer if decode_cfg.error_rate_type == 'cer' else error_rate.wer

        start_time = time.time()
        target_transcripts = self.id2token(texts, texts_len)
        result_transcripts, result_tokenids = self.model.decode(
            audio,
            text_feature=self.text_featurizer,
            decoding_method=decode_cfg.decoding_method,
            beam_size=decode_cfg.beam_size)
        decode_time = time.time() - start_time

        for utt, target, result, rec_tids in zip(
                id, target_transcripts, result_transcripts, result_tokenids):
            errors, len_ref = errors_func(target, result)
            errors_sum += errors
            len_refs += len_ref
            num_ins += 1
            if fout:
                fout.write({
                    "utt": utt,
                    "refs": [target],
                    "hyps": [result],
                    "hyps_tokenid": [rec_tids],
                })
            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example error rate [%s] = %f" % (
                decode_cfg.error_rate_type, error_rate_func(target, result)))

        return dict(
            errors_sum=errors_sum,
            len_refs=len_refs,
            num_ins=num_ins,  # num examples
            error_rate=errors_sum / len_refs,
            error_rate_type=decode_cfg.error_rate_type,
            num_frames=audio_len.sum().numpy().item(),
            decode_time=decode_time)

    def sb_compute_metrics(self, id, sig, wrd, tokens, fout=None):
        decode_cfg = self.config.decode
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        errors_func = error_rate.char_errors if decode_cfg.error_rate_type == 'cer' else error_rate.word_errors
        error_rate_func = error_rate.cer if decode_cfg.error_rate_type == 'cer' else error_rate.wer
        start_time = time.time()
        target_transcripts = wrd
        result_transcripts, result_tokenids = self.model.decode(
            sig[0],
            text_feature=self.tokenizer,
            decoding_method=decode_cfg.decoding_method,
            beam_size=decode_cfg.beam_size,
            sb_pipeline=True)
        decode_time = time.time() - start_time

        for utt, target, result, rec_tids in zip(
                id, target_transcripts, result_transcripts, result_tokenids):
            errors, len_ref = errors_func(target, result)
            errors_sum += errors
            len_refs += len_ref
            num_ins += 1
            if fout:
                fout.write({
                    "utt": utt,
                    "refs": [target],
                    "hyps": [result],
                    "hyps_tokenid": [rec_tids],
                })
            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example error rate [%s] = %f" % (
                decode_cfg.error_rate_type, error_rate_func(target, result)))

        return dict(
            errors_sum=errors_sum,
            len_refs=len_refs,
            num_ins=num_ins,  # num examples
            error_rate=errors_sum / len_refs,
            error_rate_type=decode_cfg.error_rate_type,
            num_frames=sig[1].sum().numpy().item(),
            decode_time=decode_time)

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def test(self):
        logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")
        self.model.eval()

        error_rate_type = None
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        num_frames = 0.0
        num_time = 0.0
        # Initialized the decoder in model
        decode_cfg = self.config.decode
        vocab_list = self.vocab_list
        decode_batch_size = decode_cfg.decode_batch_size

        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                if self.use_sb:
                    metrics = self.sb_compute_metrics(**batch, fout=fout)
                else:
                    metrics = self.compute_metrics(*batch, fout=fout)
                num_frames += metrics['num_frames']
                num_time += metrics["decode_time"]
                errors_sum += metrics['errors_sum']
                len_refs += metrics['len_refs']
                num_ins += metrics['num_ins']
                error_rate_type = metrics['error_rate_type']
                rtf = num_time / (num_frames)
                logger.info(
                    "RTF: %f, Error rate [%s] (%d/?) = %f" %
                    (rtf, error_rate_type, num_ins, errors_sum / len_refs))

        # logging
        msg = "Test: "
        msg += "epoch: {}, ".format(self.epoch)
        msg += "step: {}, ".format(self.iteration)
        msg += "Final error rate [%s] (%d/%d) = %f" % (
            error_rate_type, num_ins, num_ins, errors_sum / len_refs)
        logger.info(msg)

        err_meta_path = os.path.splitext(self.args.result_file)[0] + '.err'
        err_type_str = "{}".format(error_rate_type)
        with open(err_meta_path, 'w', encoding='utf8') as f:
            data = json.dumps({
                "epoch":
                self.epoch,
                "step":
                self.iteration,
                "rtf":
                rtf,
                error_rate_type:
                errors_sum / len_refs,
                "dataset_hour": (num_frames) / 1000.0 / 3600.0,
                "process_hour":
                num_time / 1000.0 / 3600.0,
                "num_examples":
                num_ins,
                "err_sum":
                errors_sum,
                "ref_len":
                len_refs,
                "decode_method":
                self.config.decode.decoding_method,
            })
            f.write(data + '\n')


================================================
FILE: paddlespeech/s2t/exps/lm/transformer/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/lm/transformer/bin/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/lm/transformer/bin/cacu_perplexity.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

import configargparse


def get_parser():
    """Get default arguments."""
    parser = configargparse.ArgumentParser(
        description="The parser for caculating the perplexity of transformer language model ",
        config_file_parser_class=configargparse.YAMLConfigFileParser,
        formatter_class=configargparse.ArgumentDefaultsHelpFormatter, )

    parser.add_argument(
        "--rnnlm", type=str, default=None, help="RNNLM model file to read")

    parser.add_argument(
        "--rnnlm-conf",
        type=str,
        default=None,
        help="RNNLM model config file to read")

    parser.add_argument(
        "--vocab_path",
        type=str,
        default=None,
        help="vocab path to for token2id")

    parser.add_argument(
        "--bpeprefix",
        type=str,
        default=None,
        help="The path of bpeprefix for loading")

    parser.add_argument(
        "--text_path",
        type=str,
        default=None,
        help="The path of text file for testing ")

    parser.add_argument(
        "--ngpu",
        type=int,
        default=0,
        help="The number of gpu to use, 0 for using cpu instead")

    parser.add_argument(
        "--dtype",
        choices=("float16", "float32", "float64"),
        default="float32",
        help="Float precision (only available in --api v2)", )

    parser.add_argument(
        "--output_dir",
        type=str,
        default=".",
        help="The output directory to store the sentence PPL")

    return parser


def main(args):
    parser = get_parser()
    args = parser.parse_args(args)
    from paddlespeech.s2t.exps.lm.transformer.lm_cacu_perplexity import run_get_perplexity
    run_get_perplexity(args)


if __name__ == "__main__":
    main(sys.argv[1:])


================================================
FILE: paddlespeech/s2t/exps/lm/transformer/lm_cacu_perplexity.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Caculating the PPL of LM model
import os

import numpy as np
import paddle
from paddle.io import DataLoader
from yacs.config import CfgNode

from paddlespeech.s2t.models.lm.dataset import TextCollatorSpm
from paddlespeech.s2t.models.lm.dataset import TextDataset
from paddlespeech.s2t.models.lm_interface import dynamic_import_lm
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()


def get_config(config_path):
    confs = CfgNode(new_allowed=True)
    confs.merge_from_file(config_path)
    return confs


def load_trained_lm(args):
    lm_config = get_config(args.rnnlm_conf)
    lm_model_module = lm_config.model_module
    lm_class = dynamic_import_lm(lm_model_module)
    lm = lm_class(**lm_config.model)
    model_dict = paddle.load(args.rnnlm)
    lm.set_state_dict(model_dict)
    return lm, lm_config


def write_dict_into_file(ppl_dict, name):
    with open(name, "w") as f:
        for key in ppl_dict.keys():
            f.write(key + " " + ppl_dict[key] + "\n")
    return


def cacu_perplexity(
        lm_model,
        lm_config,
        args,
        log_base=None, ):
    unit_type = lm_config.data.unit_type
    batch_size = lm_config.decoding.batch_size
    num_workers = lm_config.decoding.num_workers
    text_file_path = args.text_path

    total_nll = 0.0
    total_ntokens = 0
    ppl_dict = {}
    len_dict = {}
    text_dataset = TextDataset.from_file(text_file_path)
    collate_fn_text = TextCollatorSpm(
        unit_type=unit_type,
        vocab_filepath=args.vocab_path,
        spm_model_prefix=args.bpeprefix)
    train_loader = DataLoader(
        text_dataset,
        batch_size=batch_size,
        collate_fn=collate_fn_text,
        num_workers=num_workers)

    logger.info("start caculating PPL......")
    for i, (keys, ys_input_pad, ys_output_pad,
            y_lens) in enumerate(train_loader()):

        ys_input_pad = paddle.to_tensor(ys_input_pad)
        ys_output_pad = paddle.to_tensor(ys_output_pad)
        _, unused_logp, unused_count, nll, nll_count = lm_model.forward(
            ys_input_pad, ys_output_pad)
        nll = nll.numpy()
        nll_count = nll_count.numpy()
        for key, _nll, ntoken in zip(keys, nll, nll_count):
            if log_base is None:
                utt_ppl = np.exp(_nll / ntoken)
            else:
                utt_ppl = log_base**(_nll / ntoken / np.log(log_base))

            # Write PPL of each utts for debugging or analysis
            ppl_dict[key] = str(utt_ppl)
            len_dict[key] = str(ntoken)

        total_nll += nll.sum()
        total_ntokens += nll_count.sum()
        logger.info("Current total nll: " + str(total_nll))
        logger.info("Current total tokens: " + str(total_ntokens))
    write_dict_into_file(ppl_dict, os.path.join(args.output_dir, "uttPPL"))
    write_dict_into_file(len_dict, os.path.join(args.output_dir, "uttLEN"))
    if log_base is None:
        ppl = np.exp(total_nll / total_ntokens)
    else:
        ppl = log_base**(total_nll / total_ntokens / np.log(log_base))

    if log_base is None:
        log_base = np.e
    else:
        log_base = log_base

    return ppl, log_base


def run_get_perplexity(args):
    if args.ngpu > 1:
        raise NotImplementedError("only single GPU decoding is supported")
    if args.ngpu == 1:
        device = "gpu:0"
    else:
        device = "cpu"
    paddle.set_device(device)
    dtype = getattr(paddle, args.dtype)
    logger.info(f"Decoding device={device}, dtype={dtype}")
    lm_model, lm_config = load_trained_lm(args)
    lm_model.to(device=device, dtype=dtype)
    lm_model.eval()
    PPL, log_base = cacu_perplexity(lm_model, lm_config, args, None)
    logger.info("Final PPL: " + str(PPL))
    logger.info("The log base is:" + str("%.2f" % log_base))


================================================
FILE: paddlespeech/s2t/exps/u2/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/u2/bin/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/u2/bin/alignment.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Alignment for U2 model."""
from paddlespeech.s2t.exps.u2.model import U2Tester as Tester
from paddlespeech.s2t.training.cli import config_from_args
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.training.cli import maybe_dump_config
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Tester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_align()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()
    print_arguments(args, globals())

    config = config_from_args(args)
    print(config)
    maybe_dump_config(args.dump_config, config)
    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/u2/bin/export.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Export for U2 model."""
from paddlespeech.s2t.exps.u2.model import U2Tester as Tester
from paddlespeech.s2t.training.cli import config_from_args
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.training.cli import maybe_dump_config
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Tester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_export()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()
    print_arguments(args, globals())

    config = config_from_args(args)
    print(config)
    maybe_dump_config(args.dump_config, config)
    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/u2/bin/quant.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Quantzation U2 model."""
import paddle
from kaldiio import ReadHelper
from paddleslim import PTQ

from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.models.u2 import U2Model
from paddlespeech.s2t.training.cli import config_from_args
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig

logger = Log(__name__).getlog()


class U2Infer():
    def __init__(self, config, args):
        self.args = args
        self.config = config
        self.audio_scp = args.audio_scp

        self.preprocess_conf = config.preprocess_config
        self.preprocess_args = {"train": False}
        self.preprocessing = Transformation(self.preprocess_conf)
        self.text_feature = TextFeaturizer(
            unit_type=config.unit_type,
            vocab=config.vocab_filepath,
            spm_model_prefix=config.spm_model_prefix)

        paddle.set_device('gpu' if self.args.ngpu > 0 else 'cpu')

        # model
        model_conf = config
        with UpdateConfig(model_conf):
            model_conf.input_dim = config.feat_dim
            model_conf.output_dim = self.text_feature.vocab_size
        model = U2Model.from_config(model_conf)
        self.model = model
        self.model.eval()
        self.ptq = PTQ()
        self.model = self.ptq.quantize(model)

        # load model
        params_path = self.args.checkpoint_path + ".pdparams"
        model_dict = paddle.load(params_path)
        self.model.set_state_dict(model_dict)

    def run(self):
        cnt = 0
        with ReadHelper(f"scp:{self.audio_scp}") as reader:
            for key, (rate, audio) in reader:
                assert rate == 16000
                cnt += 1
                if cnt > args.num_utts:
                    break

                with paddle.no_grad():
                    logger.info(f"audio shape: {audio.shape}")

                    # fbank
                    feat = self.preprocessing(audio, **self.preprocess_args)
                    logger.info(f"feat shape: {feat.shape}")

                    ilen = paddle.to_tensor(feat.shape[0]).unsqueeze(0)
                    xs = paddle.to_tensor(feat, dtype='float32').unsqueeze(0)
                    decode_config = self.config.decode
                    logger.info(f"decode cfg: {decode_config}")
                    result_transcripts = self.model.decode(
                        xs,
                        ilen,
                        text_feature=self.text_feature,
                        decoding_method=decode_config.decoding_method,
                        beam_size=decode_config.beam_size,
                        ctc_weight=decode_config.ctc_weight,
                        decoding_chunk_size=decode_config.decoding_chunk_size,
                        num_decoding_left_chunks=decode_config.
                        num_decoding_left_chunks,
                        simulate_streaming=decode_config.simulate_streaming,
                        reverse_weight=decode_config.reverse_weight)
                    rsl = result_transcripts[0][0]
                    utt = key
                    logger.info(f"hyp: {utt} {rsl}")
                    # print(self.model)
                    # print(self.model.forward_encoder_chunk)

        logger.info("-------------start quant ----------------------")
        batch_size = 1
        feat_dim = 80
        model_size = 512
        num_left_chunks = -1
        reverse_weight = 0.3
        logger.info(
            f"U2 Export Model Params: batch_size {batch_size}, feat_dim {feat_dim}, model_size {model_size}, num_left_chunks {num_left_chunks}, reverse_weight {reverse_weight}"
        )

        # ######################## self.model.forward_encoder_chunk ############
        # input_spec = [
        #     # (T,), int16
        #     paddle.static.InputSpec(shape=[None], dtype='int16'),
        # ]
        # self.model.forward_feature = paddle.jit.to_static(
        #     self.model.forward_feature, input_spec=input_spec)

        ######################### self.model.forward_encoder_chunk ############
        input_spec = [
            # xs, (B, T, D)
            paddle.static.InputSpec(
                shape=[batch_size, None, feat_dim], dtype='float32'),
            # offset, int, but need be tensor
            paddle.static.InputSpec(shape=[1], dtype='int32'),
            # required_cache_size, int
            num_left_chunks,
            # att_cache
            paddle.static.InputSpec(
                shape=[None, None, None, None], dtype='float32'),
            # cnn_cache
            paddle.static.InputSpec(
                shape=[None, None, None, None], dtype='float32')
        ]
        self.model.forward_encoder_chunk = paddle.jit.to_static(
            self.model.forward_encoder_chunk, input_spec=input_spec)

        ######################### self.model.ctc_activation ########################
        input_spec = [
            # encoder_out, (B,T,D)
            paddle.static.InputSpec(
                shape=[batch_size, None, model_size], dtype='float32')
        ]
        self.model.ctc_activation = paddle.jit.to_static(
            self.model.ctc_activation, input_spec=input_spec)

        ######################### self.model.forward_attention_decoder ########################
        input_spec = [
            # hyps, (B, U)
            paddle.static.InputSpec(shape=[None, None], dtype='int64'),
            # hyps_lens, (B,)
            paddle.static.InputSpec(shape=[None], dtype='int64'),
            # encoder_out, (B,T,D)
            paddle.static.InputSpec(
                shape=[batch_size, None, model_size], dtype='float32'),
            reverse_weight
        ]
        self.model.forward_attention_decoder = paddle.jit.to_static(
            self.model.forward_attention_decoder, input_spec=input_spec)
        ################################################################################

        # jit save
        logger.info(f"export save: {self.args.export_path}")
        self.ptq.ptq._convert(self.model)
        paddle.jit.save(
            self.model,
            self.args.export_path,
            combine_params=True,
            skip_forward=True)


def main(config, args):
    U2Infer(config, args).run()


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()

    config = config_from_args(args)
    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/u2/bin/test.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for U2 model."""
import cProfile

from paddlespeech.s2t.exps.u2.model import U2Tester as Tester
from paddlespeech.s2t.training.cli import config_from_args
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.training.cli import maybe_dump_config
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Tester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_test()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()
    print_arguments(args, globals())

    config = config_from_args(args)
    print(config)
    maybe_dump_config(args.dump_config, config)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats('test.profile')


================================================
FILE: paddlespeech/s2t/exps/u2/bin/test_wav.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for U2 model."""
import os
import sys
from pathlib import Path

import numpy as np
import paddle
import soundfile

from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.models.u2 import U2Model
from paddlespeech.s2t.training.cli import config_from_args
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig
logger = Log(__name__).getlog()

# TODO(hui zhang): dynamic load


class U2Infer():
    def __init__(self, config, args):
        self.args = args
        self.config = config
        self.audio_file = args.audio_file

        self.preprocess_conf = config.preprocess_config
        self.preprocess_args = {"train": False}
        self.preprocessing = Transformation(self.preprocess_conf)
        self.text_feature = TextFeaturizer(
            unit_type=config.unit_type,
            vocab=config.vocab_filepath,
            spm_model_prefix=config.spm_model_prefix)

        paddle.set_device('gpu' if self.args.ngpu > 0 else 'cpu')

        # model
        model_conf = config
        with UpdateConfig(model_conf):
            model_conf.input_dim = config.feat_dim
            model_conf.output_dim = self.text_feature.vocab_size
        model = U2Model.from_config(model_conf)
        self.model = model
        self.model.eval()

        # load model
        params_path = self.args.checkpoint_path + ".pdparams"
        model_dict = paddle.load(params_path)
        self.model.set_state_dict(model_dict)

    def run(self):
        check(args.audio_file)

        with paddle.no_grad():
            # read
            audio, sample_rate = soundfile.read(
                self.audio_file, dtype="int16", always_2d=True)
            audio = audio[:, 0]
            logger.info(f"audio shape: {audio.shape}")

            # fbank
            feat = self.preprocessing(audio, **self.preprocess_args)
            logger.info(f"feat shape: {feat.shape}")
            if self.args.debug:
                np.savetxt("feat.transform.txt", feat)

            ilen = paddle.to_tensor(feat.shape[0]).unsqueeze(0)
            xs = paddle.to_tensor(feat, dtype='float32').unsqueeze(0)
            decode_config = self.config.decode
            logger.info(f"decode cfg: {decode_config}")
            reverse_weight = getattr(decode_config, 'reverse_weight', 0.0)
            result_transcripts = self.model.decode(
                xs,
                ilen,
                text_feature=self.text_feature,
                decoding_method=decode_config.decoding_method,
                beam_size=decode_config.beam_size,
                ctc_weight=decode_config.ctc_weight,
                decoding_chunk_size=decode_config.decoding_chunk_size,
                num_decoding_left_chunks=decode_config.num_decoding_left_chunks,
                simulate_streaming=decode_config.simulate_streaming,
                reverse_weight=reverse_weight)
            rsl = result_transcripts[0][0]
            utt = Path(self.audio_file).name
            logger.info(f"hyp: {utt} {result_transcripts[0][0]}")
            return rsl


def check(audio_file):
    if not os.path.isfile(audio_file):
        print("Please input the right audio file path")
        sys.exit(-1)

    logger.info("checking the audio file format......")
    try:
        sig, sample_rate = soundfile.read(audio_file)
    except Exception as e:
        logger.error(str(e))
        logger.error(
            "can not open the wav file, please check the audio file format")
        sys.exit(-1)
    logger.info("The sample rate is %d" % sample_rate)
    assert (sample_rate == 16000)
    logger.info("The audio file format is right")


def main(config, args):
    U2Infer(config, args).run()


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()

    config = config_from_args(args)
    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/u2/bin/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer for U2 model."""
import cProfile
import os

from paddlespeech.s2t.exps.u2.model import U2Trainer as Trainer
from paddlespeech.s2t.training.cli import config_from_args
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.training.cli import maybe_dump_config
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Trainer(config, args)
    exp.setup()
    exp.run()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()
    print_arguments(args, globals())

    config = config_from_args(args)
    print(config)
    maybe_dump_config(args.dump_path, config)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats(os.path.join(args.output, 'train.profile'))


================================================
FILE: paddlespeech/s2t/exps/u2/model.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains U2 model."""
import json
import os
import time
from collections import defaultdict
from collections import OrderedDict
from contextlib import nullcontext

import jsonlines
import numpy as np
import paddle
from paddle import distributed as dist
from paddle.nn.utils import clip_grad_norm_

from paddlespeech.s2t.frontend.featurizer import TextFeaturizer
from paddlespeech.s2t.io.dataloader import DataLoaderFactory
from paddlespeech.s2t.models.u2 import U2Model
from paddlespeech.s2t.training.optimizer import OptimizerFactory
from paddlespeech.s2t.training.reporter import ObsScope
from paddlespeech.s2t.training.reporter import report
from paddlespeech.s2t.training.scheduler import LRSchedulerFactory
from paddlespeech.s2t.training.timer import Timer
from paddlespeech.s2t.training.trainer import Trainer
from paddlespeech.s2t.utils import ctc_utils
from paddlespeech.s2t.utils import error_rate
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig

logger = Log(__name__).getlog()


class U2Trainer(Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)

    def train_batch(self, batch_index, batch_data, scaler, msg):
        train_conf = self.config
        start = time.time()

        # forward
        utt, audio, audio_len, text, text_len = batch_data
        with paddle.amp.auto_cast(
                level=self.amp_level, enable=True if scaler else False):
            loss, attention_loss, ctc_loss = self.model(audio, audio_len, text,
                                                        text_len)

        # loss div by `batch_size * accum_grad`
        loss /= train_conf.accum_grad
        losses_np = {'loss': float(loss) * train_conf.accum_grad}
        if attention_loss:
            losses_np['att_loss'] = float(attention_loss)
        if ctc_loss:
            losses_np['ctc_loss'] = float(ctc_loss)

        # loss backward
        if (batch_index + 1) % train_conf.accum_grad != 0:
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
            # When using cpu w/o DDP, model does not have `no_sync`
            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.
            context = nullcontext
        with context():
            if scaler:
                scaler.scale(loss).backward()
            else:
                loss.backward()
            layer_tools.print_grads(self.model, print_func=None)

        # optimizer step
        if (batch_index + 1) % train_conf.accum_grad == 0:
            # do global grad clip
            if train_conf.global_grad_clip != 0:
                if scaler:
                    scaler.unscale_(self.optimizer)
                # need paddlepaddle==develop or paddlepaddle>=2.5
                clip_grad_norm_(self.model.parameters(),
                                train_conf.global_grad_clip)
            if scaler:
                scaler.step(self.optimizer)
                scaler.update()
            else:
                self.optimizer.step()
            self.optimizer.clear_grad()
            self.lr_scheduler.step()
            self.iteration += 1

        iteration_time = time.time() - start

        for k, v in losses_np.items():
            report(k, v)
        report("batch_size", self.config.batch_size)
        report("accum", train_conf.accum_grad)
        report("step_cost", iteration_time)

        if (batch_index + 1) % train_conf.accum_grad == 0:
            if dist.get_rank() == 0 and self.visualizer:
                losses_np_v = losses_np.copy()
                losses_np_v.update({"lr": self.lr_scheduler()})
                for key, val in losses_np_v.items():
                    self.visualizer.add_scalar(
                        tag='train/' + key, value=val, step=self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
        self.model.eval()
        if not self.use_streamdata:
            logger.info(
                f"Valid Total Examples: {len(self.valid_loader.dataset)}")
        valid_losses = defaultdict(list)
        num_seen_utts = 1
        total_loss = 0.0
        for i, batch in enumerate(self.valid_loader):
            utt, audio, audio_len, text, text_len = batch
            loss, attention_loss, ctc_loss = self.model(audio, audio_len, text,
                                                        text_len)
            if paddle.isfinite(loss):
                num_utts = batch[1].shape[0]
                num_seen_utts += num_utts
                total_loss += float(loss) * num_utts
                valid_losses['val_loss'].append(float(loss))
                if attention_loss:
                    valid_losses['val_att_loss'].append(float(attention_loss))
                if ctc_loss:
                    valid_losses['val_ctc_loss'].append(float(ctc_loss))

            if (i + 1) % self.config.log_interval == 0:
                valid_dump = {k: np.mean(v) for k, v in valid_losses.items()}
                valid_dump['val_history_loss'] = total_loss / num_seen_utts

                # logging
                msg = f"Valid: Rank: {dist.get_rank()}, "
                msg += "epoch: {}, ".format(self.epoch)
                msg += "step: {}, ".format(self.iteration)
                if not self.use_streamdata:
                    msg += "batch: {}/{}, ".format(i + 1,
                                                   len(self.valid_loader))
                msg += ', '.join('{}: {:>.6f}'.format(k, v)
                                 for k, v in valid_dump.items())
                logger.info(msg)

        logger.info('Rank {} Val info val_loss {}'.format(
            dist.get_rank(), total_loss / num_seen_utts))
        return total_loss, num_seen_utts

    def do_train(self):
        """The training process control by step."""
        # !!!IMPORTANT!!!
        # Try to export the model by script, if fails, we should refine
        # the code to satisfy the script export requirements
        # script_model = paddle.jit.to_static(self.model)
        # script_model_path = str(self.checkpoint_dir / 'init')
        # paddle.jit.save(script_model, script_model_path)

        self.before_train()

        if not self.use_streamdata:
            logger.info(
                f"Train Total Examples: {len(self.train_loader.dataset)}")
        while self.epoch < self.config.n_epoch:
            with Timer("Epoch-Train Time Cost: {}"):
                self.model.train()
                try:
                    data_start_time = time.time()
                    for batch_index, batch in enumerate(self.train_loader):
                        dataload_time = time.time() - data_start_time
                        msg = "Train:"
                        observation = OrderedDict()
                        with ObsScope(observation):
                            report("Rank", dist.get_rank())
                            report("epoch", self.epoch)
                            report('step', self.iteration)
                            report("lr", self.lr_scheduler())
                            self.train_batch(batch_index, batch, self.scaler,
                                             msg)
                            self.after_train_batch()
                            report('iter', batch_index + 1)
                            if not self.use_streamdata:
                                report('total', len(self.train_loader))
                            report('reader_cost', dataload_time)
                        observation['batch_cost'] = observation[
                            'reader_cost'] + observation['step_cost']
                        observation['samples'] = observation['batch_size']
                        observation['ips,samples/s'] = observation[
                            'batch_size'] / observation['batch_cost']
                        for k, v in observation.items():
                            msg += f" {k.split(',')[0]}: "
                            msg += f"{v:>.8f}" if isinstance(v,
                                                             float) else f"{v}"
                            msg += f" {k.split(',')[1]}" if len(
                                k.split(',')) == 2 else ""
                            msg += ","
                        msg = msg[:-1]  # remove the last ","
                        if (batch_index + 1) % self.config.log_interval == 0:
                            logger.info(msg)
                        data_start_time = time.time()
                except Exception as e:
                    logger.error(e)
                    raise e
            with Timer("Eval Time Cost: {}"):
                total_loss, num_seen_utts = self.valid()
                if dist.get_world_size() > 1:
                    num_seen_utts = paddle.to_tensor(num_seen_utts)
                    # the default operator in all_reduce function is sum.
                    dist.all_reduce(num_seen_utts)
                    total_loss = paddle.to_tensor(total_loss)
                    dist.all_reduce(total_loss)
                    cv_loss = total_loss / num_seen_utts
                    cv_loss = float(cv_loss)
                else:
                    cv_loss = total_loss / num_seen_utts

            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
                self.visualizer.add_scalar(
                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
                self.visualizer.add_scalar(
                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)

            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.new_epoch()

    def setup_dataloader(self):
        config = self.config.clone()
        self.use_streamdata = config.get("use_stream_data", False)
        if self.train:
            self.train_loader = DataLoaderFactory.get_dataloader(
                'train', config, self.args)
            self.valid_loader = DataLoaderFactory.get_dataloader(
                'valid', config, self.args)
            logger.info("Setup train/valid Dataloader!")
        else:
            decode_batch_size = config.get('decode', dict()).get(
                'decode_batch_size', 1)
            self.test_loader = DataLoaderFactory.get_dataloader('test', config,
                                                                self.args)
            self.align_loader = DataLoaderFactory.get_dataloader(
                'align', config, self.args)
            logger.info("Setup test/align Dataloader!")

    def setup_model(self):
        config = self.config
        model_conf = config

        with UpdateConfig(model_conf):
            if self.train:
                model_conf.input_dim = self.train_loader.feat_dim
                model_conf.output_dim = self.train_loader.vocab_size
            else:
                model_conf.input_dim = self.test_loader.feat_dim
                model_conf.output_dim = self.test_loader.vocab_size

        model = U2Model.from_config(model_conf)

        # For Mixed Precision Training
        self.use_amp = self.config.get("use_amp", True)
        self.amp_level = self.config.get("amp_level", "O1")
        if self.train and self.use_amp:
            self.scaler = paddle.amp.GradScaler(
                init_loss_scaling=self.config.get(
                    "scale_loss", 32768.0))  #amp default num 32768.0
            #Set amp_level
            if self.amp_level == 'O2':
                model = paddle.amp.decorate(models=model, level=self.amp_level)
        else:
            self.scaler = None
        if self.parallel:
            model = paddle.DataParallel(model)

        logger.info(f"{model}")
        layer_tools.print_params(model, logger.info)
        self.model = model
        logger.info("Setup model!")

        if not self.train:
            return

        train_config = config
        optim_type = train_config.optim
        optim_conf = train_config.optim_conf
        scheduler_type = train_config.scheduler
        scheduler_conf = train_config.scheduler_conf

        scheduler_args = {
            "learning_rate": optim_conf.lr,
            "verbose": False,
            "warmup_steps": scheduler_conf.warmup_steps,
            "gamma": scheduler_conf.lr_decay,
            "d_model": model_conf.encoder_conf.output_size,
        }
        lr_scheduler = LRSchedulerFactory.from_args(scheduler_type,
                                                    scheduler_args)

        def optimizer_args(
                config,
                parameters,
                lr_scheduler=None, ):
            train_config = config
            optim_type = train_config.optim
            optim_conf = train_config.optim_conf
            scheduler_type = train_config.scheduler
            scheduler_conf = train_config.scheduler_conf
            return {
                "weight_decay": optim_conf.weight_decay,
                "learning_rate": lr_scheduler
                if lr_scheduler else optim_conf.lr,
                "parameters": parameters,
                "epsilon": 1e-9 if optim_type == 'noam' else None,
                "beta1": 0.9 if optim_type == 'noam' else None,
                "beat2": 0.98 if optim_type == 'noam' else None,
            }

        optimzer_args = optimizer_args(config, model.parameters(), lr_scheduler)
        optimizer = OptimizerFactory.from_args(optim_type, optimzer_args)

        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler
        logger.info("Setup optimizer/lr_scheduler!")


class U2Tester(U2Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)
        self.text_feature = TextFeaturizer(
            unit_type=self.config.unit_type,
            vocab=self.config.vocab_filepath,
            spm_model_prefix=self.config.spm_model_prefix)
        self.vocab_list = self.text_feature.vocab_list

    def id2token(self, texts, texts_len, text_feature):
        """ ord() id to chr() chr """
        trans = []
        for text, n in zip(texts, texts_len):
            n = n.numpy().item()
            ids = text[:n]
            trans.append(text_feature.defeaturize(ids.numpy().tolist()))
        return trans

    def compute_metrics(self,
                        utts,
                        audio,
                        audio_len,
                        texts,
                        texts_len,
                        fout=None):
        decode_config = self.config.decode
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        errors_func = error_rate.char_errors if decode_config.error_rate_type == 'cer' else error_rate.word_errors
        error_rate_func = error_rate.cer if decode_config.error_rate_type == 'cer' else error_rate.wer
        reverse_weight = getattr(decode_config, 'reverse_weight', 0.0)

        start_time = time.time()
        target_transcripts = self.id2token(texts, texts_len, self.text_feature)

        result_transcripts, result_tokenids = self.model.decode(
            audio,
            audio_len,
            text_feature=self.text_feature,
            decoding_method=decode_config.decoding_method,
            beam_size=decode_config.beam_size,
            ctc_weight=decode_config.ctc_weight,
            decoding_chunk_size=decode_config.decoding_chunk_size,
            num_decoding_left_chunks=decode_config.num_decoding_left_chunks,
            simulate_streaming=decode_config.simulate_streaming,
            reverse_weight=reverse_weight)
        decode_time = time.time() - start_time

        for utt, target, result, rec_tids in zip(
                utts, target_transcripts, result_transcripts, result_tokenids):
            errors, len_ref = errors_func(target, result)
            errors_sum += errors
            len_refs += len_ref
            num_ins += 1
            if fout:
                fout.write({
                    "utt": utt,
                    "refs": [target],
                    "hyps": [result],
                    "hyps_tokenid": [rec_tids],
                })
            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example error rate [%s] = %f" % (
                decode_config.error_rate_type, error_rate_func(target, result)))

        return dict(
            errors_sum=errors_sum,
            len_refs=len_refs,
            num_ins=num_ins,  # num examples
            error_rate=errors_sum / len_refs,
            error_rate_type=decode_config.error_rate_type,
            num_frames=audio_len.sum().numpy().item(),
            decode_time=decode_time)

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def test(self):
        assert self.args.result_file
        self.model.eval()
        if not self.use_streamdata:
            logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")

        stride_ms = self.config.stride_ms
        error_rate_type = None
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        num_frames = 0.0
        num_time = 0.0
        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                metrics = self.compute_metrics(*batch, fout=fout)
                num_frames += metrics['num_frames']
                num_time += metrics["decode_time"]
                errors_sum += metrics['errors_sum']
                len_refs += metrics['len_refs']
                num_ins += metrics['num_ins']
                error_rate_type = metrics['error_rate_type']
                rtf = num_time / (num_frames * stride_ms)
                logger.info(
                    "RTF: %f, Error rate [%s] (%d/?) = %f" %
                    (rtf, error_rate_type, num_ins, errors_sum / len_refs))

        rtf = num_time / (num_frames * stride_ms)
        msg = "Test: "
        msg += "epoch: {}, ".format(self.epoch)
        msg += "step: {}, ".format(self.iteration)
        msg += "RTF: {}, ".format(rtf)
        msg += "Final error rate [%s] (%d/%d) = %f" % (
            error_rate_type, num_ins, num_ins, errors_sum / len_refs)
        logger.info(msg)

        # test meta results
        err_meta_path = os.path.splitext(self.args.result_file)[0] + '.err'
        err_type_str = "{}".format(error_rate_type)
        with open(err_meta_path, 'w') as f:
            data = json.dumps({
                "epoch":
                self.epoch,
                "step":
                self.iteration,
                "rtf":
                rtf,
                error_rate_type:
                errors_sum / len_refs,
                "dataset_hour": (num_frames * stride_ms) / 1000.0 / 3600.0,
                "process_hour":
                num_time / 1000.0 / 3600.0,
                "num_examples":
                num_ins,
                "err_sum":
                errors_sum,
                "ref_len":
                len_refs,
                "decode_method":
                self.config.decode.decoding_method,
            })
            f.write(data + '\n')

    @paddle.no_grad()
    def align(self):
        ctc_utils.ctc_align(self.config, self.model, self.align_loader,
                            self.config.decode.decode_batch_size,
                            self.config.stride_ms, self.vocab_list,
                            self.args.result_file)

    def load_inferspec(self):
        """infer model and input spec.

        Returns:
            nn.Layer: inference model
            List[paddle.static.InputSpec]: input spec.
        """
        from paddlespeech.s2t.models.u2 import U2InferModel
        infer_model = U2InferModel.from_pretrained(self.test_loader,
                                                   self.config.clone(),
                                                   self.args.checkpoint_path)
        batch_size = 1
        feat_dim = self.test_loader.feat_dim
        model_size = self.config.encoder_conf.output_size
        num_left_chunks = -1
        logger.info(
            f"U2 Export Model Params: batch_size {batch_size}, feat_dim {feat_dim}, model_size {model_size}, num_left_chunks {num_left_chunks}"
        )

        return infer_model, (batch_size, feat_dim, model_size, num_left_chunks)

    @paddle.no_grad()
    def export(self):
        infer_model, input_spec = self.load_inferspec()
        infer_model.eval()
        paddle.set_device('cpu')

        assert isinstance(input_spec, (list, tuple)), type(input_spec)
        batch_size, feat_dim, model_size, num_left_chunks = input_spec

        ######################## infer_model.forward_encoder_chunk ############
        input_spec = [
            # (T,), int16
            paddle.static.InputSpec(shape=[None], dtype='int16'),
        ]
        infer_model.forward_feature = paddle.jit.to_static(
            infer_model.forward_feature, input_spec=input_spec)

        ######################### infer_model.forward_encoder_chunk ############
        input_spec = [
            # xs, (B, T, D)
            paddle.static.InputSpec(
                shape=[batch_size, None, feat_dim], dtype='float32'),
            # offset, int, but need be tensor
            paddle.static.InputSpec(shape=[1], dtype='int32'),
            # required_cache_size, int
            num_left_chunks,
            # att_cache
            paddle.static.InputSpec(
                shape=[None, None, None, None], dtype='float32'),
            # cnn_cache
            paddle.static.InputSpec(
                shape=[None, None, None, None], dtype='float32')
        ]
        infer_model.forward_encoder_chunk = paddle.jit.to_static(
            infer_model.forward_encoder_chunk, input_spec=input_spec)

        ######################### infer_model.ctc_activation ########################
        input_spec = [
            # encoder_out, (B,T,D)
            paddle.static.InputSpec(
                shape=[batch_size, None, model_size], dtype='float32')
        ]
        infer_model.ctc_activation = paddle.jit.to_static(
            infer_model.ctc_activation, input_spec=input_spec)

        ######################### infer_model.forward_attention_decoder ########################
        reverse_weight = 0.3
        input_spec = [
            # hyps, (B, U)
            paddle.static.InputSpec(shape=[None, None], dtype='int64'),
            # hyps_lens, (B,)
            paddle.static.InputSpec(shape=[None], dtype='int64'),
            # encoder_out, (B,T,D)
            paddle.static.InputSpec(
                shape=[batch_size, None, model_size], dtype='float32'),
            reverse_weight
        ]
        infer_model.forward_attention_decoder = paddle.jit.to_static(
            infer_model.forward_attention_decoder, input_spec=input_spec)

        # jit save
        logger.info(f"export save: {self.args.export_path}")
        paddle.jit.save(
            infer_model,
            self.args.export_path,
            combine_params=True,
            skip_forward=True)

        # test dy2static
        def flatten(out):
            if isinstance(out, paddle.Tensor):
                return [out]

            flatten_out = []
            for var in out:
                if isinstance(var, (list, tuple)):
                    flatten_out.extend(flatten(var))
                else:
                    flatten_out.append(var)
            return flatten_out

        # forward_encoder_chunk dygraph
        xs1 = paddle.full([1, 67, 80], 0.1, dtype='float32')
        offset = paddle.to_tensor([0], dtype='int32')
        required_cache_size = num_left_chunks
        att_cache = paddle.zeros([0, 0, 0, 0])
        cnn_cache = paddle.zeros([0, 0, 0, 0])
        xs_d, att_cache_d, cnn_cache_d = infer_model.forward_encoder_chunk(
            xs1, offset, required_cache_size, att_cache, cnn_cache)

        # load static model
        from paddle.jit.layer import Layer
        layer = Layer()
        logger.info(f"load export model: {self.args.export_path}")
        layer.load(self.args.export_path, paddle.CPUPlace())

        # forward_encoder_chunk static
        xs1 = paddle.full([1, 67, 80], 0.1, dtype='float32')
        offset = paddle.to_tensor([0], dtype='int32')
        att_cache = paddle.zeros([0, 0, 0, 0])
        cnn_cache = paddle.zeros([0, 0, 0, 0])
        func = getattr(layer, 'forward_encoder_chunk')
        xs_s, att_cache_s, cnn_cache_s = func(xs1, offset, att_cache, cnn_cache)
        np.testing.assert_allclose(xs_d, xs_s, atol=1e-5)
        np.testing.assert_allclose(att_cache_d, att_cache_s, atol=1e-4)
        np.testing.assert_allclose(cnn_cache_d, cnn_cache_s, atol=1e-4)
        # logger.info(f"forward_encoder_chunk output: {xs_s}")


================================================
FILE: paddlespeech/s2t/exps/u2/trainer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains U2 model."""
import paddle
from paddle import distributed as dist
from paddle.io import DataLoader

from paddlespeech.s2t.io.collator import SpeechCollator
from paddlespeech.s2t.io.dataset import ManifestDataset
from paddlespeech.s2t.io.sampler import SortagradBatchSampler
from paddlespeech.s2t.io.sampler import SortagradDistributedBatchSampler
from paddlespeech.s2t.models.u2 import U2Evaluator
from paddlespeech.s2t.models.u2 import U2Model
from paddlespeech.s2t.models.u2 import U2Updater
from paddlespeech.s2t.training.extensions.snapshot import Snapshot
from paddlespeech.s2t.training.extensions.visualizer import VisualDL
from paddlespeech.s2t.training.optimizer import OptimizerFactory
from paddlespeech.s2t.training.scheduler import LRSchedulerFactory
from paddlespeech.s2t.training.timer import Timer
from paddlespeech.s2t.training.trainer import Trainer
from paddlespeech.s2t.training.updaters.trainer import Trainer as NewTrainer
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig

logger = Log(__name__).getlog()


class U2Trainer(Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)

    def setup_dataloader(self):
        config = self.config.clone()
        config.defrost()
        config.keep_transcription_text = False

        # train/valid dataset, return token ids
        config.manifest = config.train_manifest
        train_dataset = ManifestDataset.from_config(config)

        config.manifest = config.dev_manifest
        dev_dataset = ManifestDataset.from_config(config)

        collate_fn_train = SpeechCollator.from_config(config)

        collate_fn_dev = SpeechCollator.from_config(config)

        if self.parallel:
            batch_sampler = SortagradDistributedBatchSampler(
                train_dataset,
                batch_size=config.batch_size,
                num_replicas=None,
                rank=None,
                shuffle=True,
                drop_last=True,
                sortagrad=config.sortagrad,
                shuffle_method=config.shuffle_method)
        else:
            batch_sampler = SortagradBatchSampler(
                train_dataset,
                shuffle=True,
                batch_size=config.batch_size,
                drop_last=True,
                sortagrad=config.sortagrad,
                shuffle_method=config.shuffle_method)
        self.train_loader = DataLoader(
            train_dataset,
            batch_sampler=batch_sampler,
            collate_fn=collate_fn_train,
            num_workers=config.num_workers, )
        self.valid_loader = DataLoader(
            dev_dataset,
            batch_size=config.batch_size,
            shuffle=False,
            drop_last=False,
            collate_fn=collate_fn_dev,
            num_workers=config.num_workers, )

        # test dataset, return raw text
        config.manifest = config.test_manifest
        # filter test examples, will cause less examples, but no mismatch with training
        # and can use large batch size , save training time, so filter test egs now.
        config.min_input_len = 0.0  # second
        config.max_input_len = float('inf')  # second
        config.min_output_len = 0.0  # tokens
        config.max_output_len = float('inf')  # tokens
        config.min_output_input_ratio = 0.00
        config.max_output_input_ratio = float('inf')

        test_dataset = ManifestDataset.from_config(config)
        # return text ord id
        config.keep_transcription_text = True
        self.test_loader = DataLoader(
            test_dataset,
            batch_size=config.decode.batch_size,
            shuffle=False,
            drop_last=False,
            collate_fn=SpeechCollator.from_config(config))
        # return text token id
        config.keep_transcription_text = False
        self.align_loader = DataLoader(
            test_dataset,
            batch_size=config.decode.batch_size,
            shuffle=False,
            drop_last=False,
            collate_fn=SpeechCollator.from_config(config))
        logger.info("Setup train/valid/test/align Dataloader!")

    def setup_model(self):
        config = self.config
        model_conf = config
        with UpdateConfig(model_conf):
            model_conf.input_dim = self.train_loader.collate_fn.feature_size
            model_conf.output_dim = self.train_loader.collate_fn.vocab_size

        model = U2Model.from_config(model_conf)

        if self.parallel:
            model = paddle.DataParallel(model)

        model.train()
        logger.info(f"{model}")
        layer_tools.print_params(model, logger.info)

        train_config = config
        optim_type = train_config.optim
        optim_conf = train_config.optim_conf
        scheduler_type = train_config.scheduler
        scheduler_conf = train_config.scheduler_conf

        scheduler_args = {
            "learning_rate": optim_conf.lr,
            "verbose": False,
            "warmup_steps": scheduler_conf.warmup_steps,
            "gamma": scheduler_conf.lr_decay,
            "d_model": model_conf.encoder_conf.output_size,
        }
        lr_scheduler = LRSchedulerFactory.from_args(scheduler_type,
                                                    scheduler_args)

        def optimizer_args(
                config,
                parameters,
                lr_scheduler=None, ):
            train_config = config
            optim_type = train_config.optim
            optim_conf = train_config.optim_conf
            scheduler_type = train_config.scheduler
            scheduler_conf = train_config.scheduler_conf
            return {
                "grad_clip": train_config.global_grad_clip,
                "weight_decay": optim_conf.weight_decay,
                "learning_rate": lr_scheduler
                if lr_scheduler else optim_conf.lr,
                "parameters": parameters,
                "epsilon": 1e-9 if optim_type == 'noam' else None,
                "beta1": 0.9 if optim_type == 'noam' else None,
                "beat2": 0.98 if optim_type == 'noam' else None,
            }

        optimzer_args = optimizer_args(config, model.parameters(), lr_scheduler)
        optimizer = OptimizerFactory.from_args(optim_type, optimzer_args)

        self.model = model
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler
        logger.info("Setup model/optimizer/lr_scheduler!")

    def setup_updater(self):
        output_dir = self.output_dir
        config = self.config

        updater = U2Updater(
            model=self.model,
            optimizer=self.optimizer,
            scheduler=self.lr_scheduler,
            dataloader=self.train_loader,
            output_dir=output_dir,
            accum_grad=config.accum_grad)

        trainer = NewTrainer(updater, (config.n_epoch, 'epoch'), output_dir)

        evaluator = U2Evaluator(self.model, self.valid_loader)

        trainer.extend(evaluator, trigger=(1, "epoch"))

        if dist.get_rank() == 0:
            trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
            num_snapshots = config.checkpoint.kbest_n
            trainer.extend(
                Snapshot(
                    mode='kbest',
                    max_size=num_snapshots,
                    indicator='VALID/LOSS',
                    less_better=True),
                trigger=(1, 'epoch'))
        # print(trainer.extensions)
        # trainer.run()
        self.trainer = trainer

    def run(self):
        """The routine of the experiment after setup. This method is intended
        to be used by the user.
        """
        self.setup_updater()
        with Timer("Training Done: {}"):
            self.trainer.run()


================================================
FILE: paddlespeech/s2t/exps/u2_kaldi/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/u2_kaldi/bin/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/u2_kaldi/bin/recog.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

from paddlespeech.s2t.decoders.recog_bin import main

if __name__ == "__main__":
    main(sys.argv[1:])


================================================
FILE: paddlespeech/s2t/exps/u2_kaldi/bin/test.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for U2 model."""
import cProfile

from yacs.config import CfgNode

from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.dynamic_import import dynamic_import
from paddlespeech.utils.argparse import print_arguments

model_test_alias = {
    "u2": "paddlespeech.s2t.exps.u2.model:U2Tester",
    "u2_kaldi": "paddlespeech.s2t.exps.u2_kaldi.model:U2Tester",
}


def main_sp(config, args):
    class_obj = dynamic_import(args.model_name, model_test_alias)
    exp = class_obj(config, args)
    with exp.eval():
        exp.setup()
        if args.run_mode == 'test':
            exp.run_test()
        elif args.run_mode == 'export':
            exp.run_export()
        elif args.run_mode == 'align':
            exp.run_align()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    parser.add_argument(
        '--model-name',
        type=str,
        default='u2_kaldi',
        help='model name, e.g: deepspeech2, u2, u2_kaldi, u2_st')
    parser.add_argument(
        '--run-mode',
        type=str,
        default='test',
        help='run mode, e.g. test, align, export')
    parser.add_argument(
        '--dict-path', type=str, default=None, help='dict path.')
    # save asr result to 
    parser.add_argument(
        "--result-file", type=str, help="path of save the asr result")
    # save jit model to 
    parser.add_argument(
        "--export-path", type=str, help="path of the jit model to save")
    args = parser.parse_args()
    print_arguments(args, globals())

    config = CfgNode()
    config.set_new_allowed(True)
    config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats('test.profile')


================================================
FILE: paddlespeech/s2t/exps/u2_kaldi/bin/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer for U2 model."""
import cProfile
import os

from yacs.config import CfgNode

from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.dynamic_import import dynamic_import
from paddlespeech.utils.argparse import print_arguments

model_train_alias = {
    "u2": "paddlespeech.s2t.exps.u2.model:U2Trainer",
    "u2_kaldi": "paddlespeech.s2t.exps.u2_kaldi.model:U2Trainer",
}


def main_sp(config, args):
    class_obj = dynamic_import(args.model_name, model_train_alias)
    exp = class_obj(config, args)
    exp.setup()
    exp.run()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    parser.add_argument(
        '--model-name',
        type=str,
        default='u2_kaldi',
        help='model name, e.g: deepspeech2, u2, u2_kaldi, u2_st')
    args = parser.parse_args()
    print_arguments(args, globals())

    config = CfgNode()
    config.set_new_allowed(True)
    config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats(os.path.join(args.output, 'train.profile'))


================================================
FILE: paddlespeech/s2t/exps/u2_kaldi/model.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains U2 model."""
import json
import os
import time
from collections import defaultdict
from contextlib import nullcontext

import jsonlines
import numpy as np
import paddle
from paddle import distributed as dist

from paddlespeech.s2t.frontend.featurizer import TextFeaturizer
from paddlespeech.s2t.frontend.utility import load_dict
from paddlespeech.s2t.io.dataloader import DataLoaderFactory
from paddlespeech.s2t.models.u2 import U2Model
from paddlespeech.s2t.training.optimizer import OptimizerFactory
from paddlespeech.s2t.training.scheduler import LRSchedulerFactory
from paddlespeech.s2t.training.timer import Timer
from paddlespeech.s2t.training.trainer import Trainer
from paddlespeech.s2t.utils import ctc_utils
from paddlespeech.s2t.utils import error_rate
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig

logger = Log(__name__).getlog()


class U2Trainer(Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)

    def train_batch(self, batch_index, batch_data, msg):
        train_conf = self.config
        start = time.time()

        # forward
        utt, audio, audio_len, text, text_len = batch_data
        loss, attention_loss, ctc_loss = self.model(audio, audio_len, text,
                                                    text_len)

        # loss div by `batch_size * accum_grad`
        loss /= train_conf.accum_grad
        losses_np = {'loss': float(loss) * train_conf.accum_grad}
        if attention_loss:
            losses_np['att_loss'] = float(attention_loss)
        if ctc_loss:
            losses_np['ctc_loss'] = float(ctc_loss)

        # loss backward
        if (batch_index + 1) % train_conf.accum_grad != 0:
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.
            context = nullcontext
        with context():
            loss.backward()
            layer_tools.print_grads(self.model, print_func=None)

        # optimizer step
        if (batch_index + 1) % train_conf.accum_grad == 0:
            self.optimizer.step()
            self.optimizer.clear_grad()
            self.lr_scheduler.step()
            self.iteration += 1

        iteration_time = time.time() - start

        if (batch_index + 1) % train_conf.log_interval == 0:
            msg += "train time: {:>.3f}s, ".format(iteration_time)
            msg += "batch size: {}, ".format(self.config.batch_size)
            msg += "accum: {}, ".format(train_conf.accum_grad)
            msg += ', '.join('{}: {:>.6f}'.format(k, v)
                             for k, v in losses_np.items())
            logger.info(msg)

            if dist.get_rank() == 0 and self.visualizer:
                losses_np_v = losses_np.copy()
                losses_np_v.update({"lr": self.lr_scheduler()})
                for key, val in losses_np_v.items():
                    self.visualizer.add_scalar(
                        tag="train/" + key, value=val, step=self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
        self.model.eval()
        if not self.use_streamdata:
            logger.info(
                f"Valid Total Examples: {len(self.valid_loader.dataset)}")
        valid_losses = defaultdict(list)
        num_seen_utts = 1
        total_loss = 0.0

        for i, batch in enumerate(self.valid_loader):
            utt, audio, audio_len, text, text_len = batch
            loss, attention_loss, ctc_loss = self.model(audio, audio_len, text,
                                                        text_len)
            if paddle.isfinite(loss):
                num_utts = batch[1].shape[0]
                num_seen_utts += num_utts
                total_loss += float(loss) * num_utts
                valid_losses['val_loss'].append(float(loss))
                if attention_loss:
                    valid_losses['val_att_loss'].append(float(attention_loss))
                if ctc_loss:
                    valid_losses['val_ctc_loss'].append(float(ctc_loss))

            if (i + 1) % self.config.log_interval == 0:
                valid_dump = {k: np.mean(v) for k, v in valid_losses.items()}
                valid_dump['val_history_loss'] = total_loss / num_seen_utts

                # logging
                msg = f"Valid: Rank: {dist.get_rank()}, "
                msg += "epoch: {}, ".format(self.epoch)
                msg += "step: {}, ".format(self.iteration)
                if not self.use_streamdata:
                    msg += "batch: {}/{}, ".format(i + 1,
                                                   len(self.valid_loader))
                msg += ', '.join('{}: {:>.6f}'.format(k, v)
                                 for k, v in valid_dump.items())
                logger.info(msg)

        logger.info('Rank {} Val info val_loss {}'.format(
            dist.get_rank(), total_loss / num_seen_utts))
        return total_loss, num_seen_utts

    def do_train(self):
        """The training process control by step."""
        # !!!IMPORTANT!!!
        # Try to export the model by script, if fails, we should refine
        # the code to satisfy the script export requirements
        # script_model = paddle.jit.to_static(self.model)
        # script_model_path = str(self.checkpoint_dir / 'init')
        # paddle.jit.save(script_model, script_model_path)

        self.before_train()
        if not self.use_streamdata:
            logger.info(
                f"Train Total Examples: {len(self.train_loader.dataset)}")
        while self.epoch < self.config.n_epoch:
            with Timer("Epoch-Train Time Cost: {}"):
                self.model.train()
                try:
                    data_start_time = time.time()
                    for batch_index, batch in enumerate(self.train_loader):
                        dataload_time = time.time() - data_start_time
                        msg = "Train: Rank: {}, ".format(dist.get_rank())
                        msg += "epoch: {}, ".format(self.epoch)
                        msg += "step: {}, ".format(self.iteration)
                        if not self.use_streamdata:
                            msg += "batch : {}/{}, ".format(
                                batch_index + 1, len(self.train_loader))
                        msg += "lr: {:>.8f}, ".format(self.lr_scheduler())
                        msg += "data time: {:>.3f}s, ".format(dataload_time)
                        self.train_batch(batch_index, batch, msg)
                        self.after_train_batch()
                        data_start_time = time.time()
                except Exception as e:
                    logger.error(e)
                    raise e

            with Timer("Eval Time Cost: {}"):
                total_loss, num_seen_utts = self.valid()
                if dist.get_world_size() > 1:
                    num_seen_utts = paddle.to_tensor(num_seen_utts)
                    # the default operator in all_reduce function is sum.
                    dist.all_reduce(num_seen_utts)
                    total_loss = paddle.to_tensor(total_loss)
                    dist.all_reduce(total_loss)
                    cv_loss = total_loss / num_seen_utts
                    cv_loss = float(cv_loss)
                else:
                    cv_loss = total_loss / num_seen_utts

            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
                self.visualizer.add_scalar(
                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
                self.visualizer.add_scalar(
                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)

            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.new_epoch()

    def setup_dataloader(self):
        self.use_streamdata = config.get("use_stream_data", False)
        if self.train:
            config = self.config.clone()
            self.train_loader = DataLoaderFactory.get_dataloader(
                'train', config, self.args)
            config = self.config.clone()
            config['preprocess_config'] = None
            self.valid_loader = DataLoaderFactory.get_dataloader(
                'valid', config, self.args)
            logger.info("Setup train/valid Dataloader!")
        else:
            config = self.config.clone()
            config['preprocess_config'] = None
            self.test_loader = DataLoaderFactory.get_dataloader('test', config,
                                                                self.args)
            config = self.config.clone()
            config['preprocess_config'] = None
            self.align_loader = DataLoaderFactory.get_dataloader(
                'align', config, self.args)
            logger.info("Setup test/align Dataloader!")

    def setup_model(self):
        config = self.config

        # model
        model_conf = config
        with UpdateConfig(model_conf):
            model_conf.input_dim = self.train_loader.feat_dim
            model_conf.output_dim = self.train_loader.vocab_size
        model = U2Model.from_config(model_conf)
        if self.parallel:
            model = paddle.DataParallel(model)
        layer_tools.print_params(model, logger.info)

        # lr
        scheduler_conf = config.scheduler_conf
        scheduler_args = {
            "learning_rate": scheduler_conf.lr,
            "warmup_steps": scheduler_conf.warmup_steps,
            "gamma": scheduler_conf.lr_decay,
            "d_model": model_conf.encoder_conf.output_size,
            "verbose": False,
        }
        lr_scheduler = LRSchedulerFactory.from_args(config.scheduler,
                                                    scheduler_args)

        # opt
        def optimizer_args(
                config,
                parameters,
                lr_scheduler=None, ):
            optim_conf = config.optim_conf
            return {
                "grad_clip": optim_conf.global_grad_clip,
                "weight_decay": optim_conf.weight_decay,
                "learning_rate": lr_scheduler,
                "parameters": parameters,
            }

        optimzer_args = optimizer_args(config, model.parameters(), lr_scheduler)
        optimizer = OptimizerFactory.from_args(config.optim, optimzer_args)

        self.model = model
        self.lr_scheduler = lr_scheduler
        self.optimizer = optimizer
        logger.info("Setup model/optimizer/lr_scheduler!")


class U2Tester(U2Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)
        self.text_feature = TextFeaturizer(
            unit_type=self.config.unit_type,
            vocab=self.config.vocab_filepath,
            spm_model_prefix=self.config.spm_model_prefix)
        self.vocab_list = self.text_feature.vocab_list

    def id2token(self, texts, texts_len, text_feature):
        """ ord() id to chr() chr """
        trans = []
        for text, n in zip(texts, texts_len):
            n = n.numpy().item()
            ids = text[:n]
            trans.append(text_feature.defeaturize(ids.numpy().tolist()))
        return trans

    def compute_metrics(self,
                        utts,
                        audio,
                        audio_len,
                        texts,
                        texts_len,
                        fout=None):
        decode_cfg = self.config.decode
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        errors_func = error_rate.char_errors if decode_cfg.error_rate_type == 'cer' else error_rate.word_errors
        error_rate_func = error_rate.cer if decode_cfg.error_rate_type == 'cer' else error_rate.wer

        start_time = time.time()
        target_transcripts = self.id2token(texts, texts_len, self.text_feature)
        result_transcripts, result_tokenids = self.model.decode(
            audio,
            audio_len,
            text_feature=self.text_feature,
            decoding_method=decode_cfg.decoding_method,
            beam_size=decode_cfg.beam_size,
            ctc_weight=decode_cfg.ctc_weight,
            decoding_chunk_size=decode_cfg.decoding_chunk_size,
            num_decoding_left_chunks=decode_cfg.num_decoding_left_chunks,
            simulate_streaming=decode_cfg.simulate_streaming)
        decode_time = time.time() - start_time

        for i, (utt, target, result, rec_tids) in enumerate(
                zip(utts, target_transcripts, result_transcripts,
                    result_tokenids)):
            errors, len_ref = errors_func(target, result)
            errors_sum += errors
            len_refs += len_ref
            num_ins += 1
            if fout:
                fout.write({
                    "utt": utt,
                    "refs": [target],
                    "hyps": [result],
                    "hyps_tokenid": [rec_tids],
                })
            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info(
                "One example error rate [%s] = %f" %
                (decode_cfg.error_rate_type, error_rate_func(target, result)))

        return dict(
            errors_sum=errors_sum,
            len_refs=len_refs,
            num_ins=num_ins,  # num examples
            error_rate=errors_sum / len_refs,
            error_rate_type=decode_cfg.error_rate_type,
            num_frames=audio_len.sum().numpy().item(),
            decode_time=decode_time)

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def test(self):
        assert self.args.result_file
        self.model.eval()
        if not self.use_streamdata:
            logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")

        stride_ms = self.config.stride_ms
        error_rate_type = None
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        num_frames = 0.0
        num_time = 0.0
        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                metrics = self.compute_metrics(*batch, fout=fout)
                num_frames += metrics['num_frames']
                num_time += metrics["decode_time"]
                errors_sum += metrics['errors_sum']
                len_refs += metrics['len_refs']
                num_ins += metrics['num_ins']
                error_rate_type = metrics['error_rate_type']
                rtf = num_time / (num_frames * stride_ms)
                logger.info(
                    "RTF: %f, Error rate [%s] (%d/?) = %f" %
                    (rtf, error_rate_type, num_ins, errors_sum / len_refs))

        rtf = num_time / (num_frames * stride_ms)
        msg = "Test: "
        msg += "epoch: {}, ".format(self.epoch)
        msg += "step: {}, ".format(self.iteration)
        msg += "RTF: {}, ".format(rtf)
        msg += "Final error rate [%s] (%d/%d) = %f" % (
            error_rate_type, num_ins, num_ins, errors_sum / len_refs)
        logger.info(msg)

        # test meta results
        err_meta_path = os.path.splitext(self.args.result_file)[0] + '.err'
        err_type_str = "{}".format(error_rate_type)
        with open(err_meta_path, 'w') as f:
            data = json.dumps({
                "epoch":
                self.epoch,
                "step":
                self.iteration,
                "rtf":
                rtf,
                error_rate_type:
                errors_sum / len_refs,
                "dataset_hour": (num_frames * stride_ms) / 1000.0 / 3600.0,
                "process_hour":
                num_time / 1000.0 / 3600.0,
                "num_examples":
                num_ins,
                "err_sum":
                errors_sum,
                "ref_len":
                len_refs,
                "decode_method":
                self.config.decode.decoding_method,
            })
            f.write(data + '\n')

    @paddle.no_grad()
    def align(self):
        ctc_utils.ctc_align(self.config, self.model, self.align_loader,
                            self.config.decode.decode_batch_size,
                            self.config.stride_ms, self.vocab_list,
                            self.args.result_file)

    def load_inferspec(self):
        """infer model and input spec.

        Returns:
            nn.Layer: inference model
            List[paddle.static.InputSpec]: input spec.
        """
        from paddlespeech.s2t.models.u2 import U2InferModel
        infer_model = U2InferModel.from_pretrained(self.test_loader,
                                                   self.config.clone(),
                                                   self.args.checkpoint_path)
        feat_dim = self.test_loader.feat_dim
        input_spec = [
            paddle.static.InputSpec(shape=[1, None, feat_dim],
                                    dtype='float32'),  # audio, [B,T,D]
            paddle.static.InputSpec(shape=[1],
                                    dtype='int64'),  # audio_length, [B]
        ]
        return infer_model, input_spec

    @paddle.no_grad()
    def export(self):
        infer_model, input_spec = self.load_inferspec()
        assert isinstance(input_spec, list), type(input_spec)
        infer_model.eval()
        static_model = paddle.jit.to_static(infer_model, input_spec=input_spec)
        logger.info(f"Export code: {static_model.forward.code}")
        paddle.jit.save(static_model, self.args.export_path)

    def setup_dict(self):
        # load dictionary for debug log
        self.args.char_list = load_dict(self.args.dict_path,
                                        "maskctc" in self.args.model_name)

    def setup(self):
        super().setup()
        self.setup_dict()


================================================
FILE: paddlespeech/s2t/exps/u2_st/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/u2_st/bin/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/u2_st/bin/export.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Export for U2 model."""
from yacs.config import CfgNode

from paddlespeech.s2t.exps.u2_st.model import U2STTester as Tester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Tester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_export()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    # save jit model to
    parser.add_argument(
        "--export_path", type=str, help="path of the jit model to save")
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/u2_st/bin/test.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for U2 model."""
import cProfile

from yacs.config import CfgNode

from paddlespeech.s2t.exps.u2_st.model import U2STTester as Tester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Tester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_test()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_conf = CfgNode(new_allowed=True)
        decode_conf.merge_from_file(args.decode_cfg)
        config.decode = decode_conf
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats('test.profile')


================================================
FILE: paddlespeech/s2t/exps/u2_st/bin/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer for U2 model."""
import cProfile
import os

from yacs.config import CfgNode

from paddlespeech.s2t.exps.u2_st.model import U2STTrainer as Trainer
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Trainer(config, args)
    exp.setup()
    exp.run()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats(os.path.join(args.output, 'train.profile'))


================================================
FILE: paddlespeech/s2t/exps/u2_st/model.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains U2 model."""
import json
import os
import time
from collections import defaultdict
from collections import OrderedDict
from contextlib import nullcontext

import jsonlines
import numpy as np
import paddle
from paddle import distributed as dist

from paddlespeech.s2t.frontend.featurizer import TextFeaturizer
from paddlespeech.s2t.io.dataloader import DataLoaderFactory
from paddlespeech.s2t.models.u2_st import U2STModel
from paddlespeech.s2t.training.optimizer import OptimizerFactory
from paddlespeech.s2t.training.reporter import ObsScope
from paddlespeech.s2t.training.reporter import report
from paddlespeech.s2t.training.scheduler import LRSchedulerFactory
from paddlespeech.s2t.training.timer import Timer
from paddlespeech.s2t.training.trainer import Trainer
from paddlespeech.s2t.utils import bleu_score
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig

logger = Log(__name__).getlog()


class U2STTrainer(Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)

    def train_batch(self, batch_index, batch_data, msg):
        train_conf = self.config
        start = time.time()
        # forward
        utt, audio, audio_len, text, text_len = batch_data
        if isinstance(text, list) and isinstance(text_len, list):
            # joint training with ASR. Two decoding texts [translation, transcription]
            text, text_transcript = text
            text_len, text_transcript_len = text_len
            loss, st_loss, attention_loss, ctc_loss = self.model(
                audio, audio_len, text, text_len, text_transcript,
                text_transcript_len)
        else:
            loss, st_loss, attention_loss, ctc_loss = self.model(
                audio, audio_len, text, text_len)

        # loss div by `batch_size * accum_grad`
        loss /= train_conf.accum_grad
        losses_np = {'loss': float(loss) * train_conf.accum_grad}
        if st_loss:
            losses_np['st_loss'] = float(st_loss)
        if attention_loss:
            losses_np['att_loss'] = float(attention_loss)
        if ctc_loss:
            losses_np['ctc_loss'] = float(ctc_loss)

        # loss backward
        if (batch_index + 1) % train_conf.accum_grad != 0:
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.
            context = nullcontext
        with context():
            loss.backward()
            layer_tools.print_grads(self.model, print_func=None)

        # optimizer step
        if (batch_index + 1) % train_conf.accum_grad == 0:
            self.optimizer.step()
            self.optimizer.clear_grad()
            self.lr_scheduler.step()
            self.iteration += 1

        iteration_time = time.time() - start

        for k, v in losses_np.items():
            report(k, v)
        report("batch_size", self.config.batch_size)
        report("accum", train_conf.accum_grad)
        report("step_cost", iteration_time)

        if (batch_index + 1) % train_conf.log_interval == 0:
            msg += "train time: {:>.3f}s, ".format(iteration_time)
            msg += "batch size: {}, ".format(self.config.batch_size)
            msg += "accum: {}, ".format(train_conf.accum_grad)
            msg += ', '.join('{}: {:>.6f}'.format(k, v)
                             for k, v in losses_np.items())
            logger.info(msg)

            if dist.get_rank() == 0 and self.visualizer:
                losses_np_v = losses_np.copy()
                losses_np_v.update({"lr": self.lr_scheduler()})
                for key, val in losses_np_v.items():
                    self.visualizer.add_scalar(
                        tag="train/" + key, value=val, step=self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
        self.model.eval()
        if not self.use_streamdata:
            logger.info(
                f"Valid Total Examples: {len(self.valid_loader.dataset)}")
        valid_losses = defaultdict(list)
        num_seen_utts = 1
        total_loss = 0.0
        for i, batch in enumerate(self.valid_loader):
            utt, audio, audio_len, text, text_len = batch
            if isinstance(text, list) and isinstance(text_len, list):
                text, text_transcript = text
                text_len, text_transcript_len = text_len
                loss, st_loss, attention_loss, ctc_loss = self.model(
                    audio, audio_len, text, text_len, text_transcript,
                    text_transcript_len)
            else:
                loss, st_loss, attention_loss, ctc_loss = self.model(
                    audio, audio_len, text, text_len)
            if paddle.isfinite(loss):
                num_utts = batch[1].shape[0]
                num_seen_utts += num_utts
                total_loss += float(st_loss) * num_utts
                valid_losses['val_loss'].append(float(st_loss))
                if attention_loss:
                    valid_losses['val_att_loss'].append(float(attention_loss))
                if ctc_loss:
                    valid_losses['val_ctc_loss'].append(float(ctc_loss))

            if (i + 1) % self.config.log_interval == 0:
                valid_dump = {k: np.mean(v) for k, v in valid_losses.items()}
                valid_dump['val_history_st_loss'] = total_loss / num_seen_utts

                # logging
                msg = f"Valid: Rank: {dist.get_rank()}, "
                msg += "epoch: {}, ".format(self.epoch)
                msg += "step: {}, ".format(self.iteration)
                if not self.use_streamdata:
                    msg += "batch: {}/{}, ".format(i + 1,
                                                   len(self.valid_loader))
                msg += ', '.join('{}: {:>.6f}'.format(k, v)
                                 for k, v in valid_dump.items())
                logger.info(msg)

        logger.info('Rank {} Val info st_val_loss {}'.format(
            dist.get_rank(), total_loss / num_seen_utts))
        return total_loss, num_seen_utts

    def do_train(self):
        """The training process control by step."""
        # !!!IMPORTANT!!!
        # Try to export the model by script, if fails, we should refine
        # the code to satisfy the script export requirements
        # script_model = paddle.jit.to_static(self.model)
        # script_model_path = str(self.checkpoint_dir / 'init')
        # paddle.jit.save(script_model, script_model_path)

        self.before_train()
        if not self.use_streamdata:
            logger.info(
                f"Train Total Examples: {len(self.train_loader.dataset)}")
        while self.epoch < self.config.n_epoch:
            with Timer("Epoch-Train Time Cost: {}"):
                self.model.train()
                try:
                    data_start_time = time.time()
                    for batch_index, batch in enumerate(self.train_loader):
                        dataload_time = time.time() - data_start_time
                        msg = "Train:"
                        observation = OrderedDict()
                        with ObsScope(observation):
                            report("Rank", dist.get_rank())
                            report("epoch", self.epoch)
                            report('step', self.iteration)
                            report("lr", self.lr_scheduler())
                            self.train_batch(batch_index, batch, msg)
                            self.after_train_batch()
                            report('iter', batch_index + 1)
                            if not self.use_streamdata:
                                report('total', len(self.train_loader))
                            report('reader_cost', dataload_time)
                        observation['batch_cost'] = observation[
                            'reader_cost'] + observation['step_cost']
                        observation['samples'] = observation['batch_size']
                        observation['ips,sent./sec'] = observation[
                            'batch_size'] / observation['batch_cost']
                        for k, v in observation.items():
                            msg += f" {k.split(',')[0]}: "
                            msg += f"{v:>.8f}" if isinstance(v,
                                                             float) else f"{v}"
                            msg += f" {k.split(',')[1]}" if len(
                                k.split(',')) == 2 else ""
                            msg += ","
                        msg = msg[:-1]  # remove the last ","
                        if (batch_index + 1) % self.config.log_interval == 0:
                            logger.info(msg)
                except Exception as e:
                    logger.error(e)
                    raise e

            with Timer("Eval Time Cost: {}"):
                total_loss, num_seen_utts = self.valid()
                if dist.get_world_size() > 1:
                    num_seen_utts = paddle.to_tensor(num_seen_utts)
                    # the default operator in all_reduce function is sum.
                    dist.all_reduce(num_seen_utts)
                    total_loss = paddle.to_tensor(total_loss)
                    dist.all_reduce(total_loss)
                    cv_loss = total_loss / num_seen_utts
                    cv_loss = float(cv_loss)
                else:
                    cv_loss = total_loss / num_seen_utts

            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
                self.visualizer.add_scalar(
                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
                self.visualizer.add_scalar(
                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)

            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.new_epoch()

    def setup_dataloader(self):
        config = self.config.clone()

        load_transcript = True if config.model_conf.asr_weight > 0 else False

        config = self.config.clone()
        config['load_transcript'] = load_transcript
        self.use_streamdata = config.get("use_stream_data", False)
        if self.train:
            self.train_loader = DataLoaderFactory.get_dataloader(
                'train', config, self.args)
            self.valid_loader = DataLoaderFactory.get_dataloader(
                'valid', config, self.args)
            logger.info("Setup train/valid Dataloader!")
        else:
            self.test_loader = DataLoaderFactory.get_dataloader('test', config,
                                                                self.args)
            logger.info("Setup test Dataloader!")

    def setup_model(self):
        config = self.config
        model_conf = config
        with UpdateConfig(model_conf):
            if self.train:
                model_conf.input_dim = self.train_loader.feat_dim
                model_conf.output_dim = self.train_loader.vocab_size
            else:
                model_conf.input_dim = self.test_loader.feat_dim
                model_conf.output_dim = self.test_loader.vocab_size

        model = U2STModel.from_config(model_conf)

        if self.parallel:
            model = paddle.DataParallel(model)

        logger.info(f"{model}")
        layer_tools.print_params(model, logger.info)

        train_config = config
        optim_type = train_config.optim
        optim_conf = train_config.optim_conf
        scheduler_type = train_config.scheduler
        scheduler_conf = train_config.scheduler_conf

        scheduler_args = {
            "learning_rate": optim_conf.lr,
            "verbose": False,
            "warmup_steps": scheduler_conf.warmup_steps,
            "gamma": scheduler_conf.lr_decay,
            "d_model": model_conf.encoder_conf.output_size,
        }
        lr_scheduler = LRSchedulerFactory.from_args(scheduler_type,
                                                    scheduler_args)

        def optimizer_args(
                config,
                parameters,
                lr_scheduler=None, ):
            train_config = config
            optim_type = train_config.optim
            optim_conf = train_config.optim_conf
            scheduler_type = train_config.scheduler
            scheduler_conf = train_config.scheduler_conf
            return {
                "grad_clip": train_config.global_grad_clip,
                "weight_decay": optim_conf.weight_decay,
                "learning_rate": lr_scheduler
                if lr_scheduler else optim_conf.lr,
                "parameters": parameters,
                "epsilon": 1e-9 if optim_type == 'noam' else None,
                "beta1": 0.9 if optim_type == 'noam' else None,
                "beat2": 0.98 if optim_type == 'noam' else None,
            }

        optimzer_args = optimizer_args(config, model.parameters(), lr_scheduler)
        optimizer = OptimizerFactory.from_args(optim_type, optimzer_args)

        self.model = model
        self.optimizer = optimizer
        self.lr_scheduler = lr_scheduler
        logger.info("Setup model/optimizer/lr_scheduler!")


class U2STTester(U2STTrainer):
    def __init__(self, config, args):
        super().__init__(config, args)
        self.text_feature = TextFeaturizer(
            unit_type=self.config.unit_type,
            vocab=self.config.vocab_filepath,
            spm_model_prefix=self.config.spm_model_prefix)
        self.vocab_list = self.text_feature.vocab_list

    def id2token(self, texts, texts_len, text_feature):
        """ ord() id to chr() chr """
        trans = []
        for text, n in zip(texts, texts_len):
            n = n.numpy().item()
            ids = text[:n]
            trans.append(text_feature.defeaturize(ids.numpy().tolist()))
        return trans

    def translate(self, audio, audio_len):
        """"E2E translation from extracted audio feature"""
        decode_cfg = self.config.decode
        self.model.eval()

        hyps = self.model.decode(
            audio,
            audio_len,
            text_feature=self.text_feature,
            decoding_method=decode_cfg.decoding_method,
            beam_size=decode_cfg.beam_size,
            word_reward=decode_cfg.word_reward,
            maxlenratio=decode_cfg.maxlenratio,
            decoding_chunk_size=decode_cfg.decoding_chunk_size,
            num_decoding_left_chunks=decode_cfg.num_decoding_left_chunks,
            simulate_streaming=decode_cfg.simulate_streaming)
        return hyps

    def compute_translation_metrics(self,
                                    utts,
                                    audio,
                                    audio_len,
                                    texts,
                                    texts_len,
                                    bleu_func,
                                    fout=None):
        decode_cfg = self.config.decode
        len_refs, num_ins = 0, 0

        start_time = time.time()

        refs = self.id2token(texts, texts_len, self.text_feature)

        hyps = self.model.decode(
            audio,
            audio_len,
            text_feature=self.text_feature,
            decoding_method=decode_cfg.decoding_method,
            beam_size=decode_cfg.beam_size,
            word_reward=decode_cfg.word_reward,
            maxlenratio=decode_cfg.maxlenratio,
            decoding_chunk_size=decode_cfg.decoding_chunk_size,
            num_decoding_left_chunks=decode_cfg.num_decoding_left_chunks,
            simulate_streaming=decode_cfg.simulate_streaming)

        decode_time = time.time() - start_time

        for utt, target, result in zip(utts, refs, hyps):
            len_refs += len(target.split())
            num_ins += 1
            if fout:
                fout.write({"utt": utt, "ref": target, "hyp": result})
            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example BLEU = %s" %
                        (bleu_func([result], [[target]]).prec_str))

        return dict(
            hyps=hyps,
            refs=refs,
            bleu=bleu_func(hyps, [refs]).score,
            len_refs=len_refs,
            num_ins=num_ins,  # num examples
            num_frames=audio_len.sum().numpy().item(),
            decode_time=decode_time)

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def test(self):
        assert self.args.result_file
        self.model.eval()
        if not self.use_streamdata:
            logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")

        decode_cfg = self.config.decode
        bleu_func = bleu_score.char_bleu if decode_cfg.error_rate_type == 'char-bleu' else bleu_score.bleu

        stride_ms = self.config.stride_ms
        hyps, refs = [], []
        len_refs, num_ins = 0, 0
        num_frames = 0.0
        num_time = 0.0
        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                metrics = self.compute_translation_metrics(
                    *batch, bleu_func=bleu_func, fout=fout)
                hyps += metrics['hyps']
                refs += metrics['refs']
                bleu = metrics['bleu']
                num_frames += metrics['num_frames']
                num_time += metrics["decode_time"]
                len_refs += metrics['len_refs']
                num_ins += metrics['num_ins']
                rtf = num_time / (num_frames * stride_ms)
                logger.info("RTF: %f, instance (%d), batch BELU   = %f" %
                            (rtf, num_ins, bleu))

        rtf = num_time / (num_frames * stride_ms)
        msg = "Test: "
        msg += "epoch: {}, ".format(self.epoch)
        msg += "step: {}, ".format(self.iteration)
        msg += "RTF: {}, ".format(rtf)
        msg += "Test set [%s]: %s" % (len(hyps), str(bleu_func(hyps, [refs])))
        logger.info(msg)
        bleu_meta_path = os.path.splitext(self.args.result_file)[0] + '.bleu'
        err_type_str = "BLEU"
        with open(bleu_meta_path, 'w') as f:
            data = json.dumps({
                "epoch":
                self.epoch,
                "step":
                self.iteration,
                "rtf":
                rtf,
                err_type_str:
                bleu_func(hyps, [refs]).score,
                "dataset_hour": (num_frames * stride_ms) / 1000.0 / 3600.0,
                "process_hour":
                num_time / 1000.0 / 3600.0,
                "num_examples":
                num_ins,
                "decode_method":
                self.config.decode.decoding_method,
            })
            f.write(data + '\n')

    def load_inferspec(self):
        """infer model and input spec.

        Returns:
            nn.Layer: inference model
            List[paddle.static.InputSpec]: input spec.
        """
        from paddlespeech.s2t.models.u2_st import U2STInferModel
        infer_model = U2STInferModel.from_pretrained(self.test_loader,
                                                     self.config.clone(),
                                                     self.args.checkpoint_path)
        feat_dim = self.test_loader.feat_dim
        input_spec = [
            paddle.static.InputSpec(shape=[1, None, feat_dim],
                                    dtype='float32'),  # audio, [B,T,D]
            paddle.static.InputSpec(shape=[1],
                                    dtype='int64'),  # audio_length, [B]
        ]
        return infer_model, input_spec

    @paddle.no_grad()
    def export(self):
        infer_model, input_spec = self.load_inferspec()
        assert isinstance(input_spec, list), type(input_spec)
        infer_model.eval()
        static_model = paddle.jit.to_static(infer_model, input_spec=input_spec)
        logger.info(f"Export code: {static_model.forward.code}")
        paddle.jit.save(static_model, self.args.export_path)


================================================
FILE: paddlespeech/s2t/exps/wav2vec2/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/wav2vec2/bin/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/exps/wav2vec2/bin/test.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for wav2vec2.0 model."""
import cProfile

from yacs.config import CfgNode

from paddlespeech.s2t.exps.wav2vec2.model import Wav2Vec2ASRTester as Tester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Tester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_test()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    # save asr result to
    parser.add_argument(
        '--dict-path', type=str, default=None, help='dict path.')
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats('test.profile')


================================================
FILE: paddlespeech/s2t/exps/wav2vec2/bin/test_wav.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for wav2vec2.0 model."""
import os
import sys
from pathlib import Path

import paddle
import soundfile
from paddlenlp.transformers import AutoTokenizer
from yacs.config import CfgNode

from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.models.wav2vec2.wav2vec2_ASR import Wav2vec2ASR
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig
logger = Log(__name__).getlog()


class Wav2vec2Infer():
    def __init__(self, config, args):
        self.args = args
        self.config = config
        self.audio_file = args.audio_file
        self.tokenizer = config.get("tokenizer", None)

        if self.tokenizer:
            self.text_feature = AutoTokenizer.from_pretrained(
                self.config.tokenizer)
        else:
            self.text_feature = TextFeaturizer(
                unit_type=config.unit_type, vocab=config.vocab_filepath)

        paddle.set_device('gpu' if self.args.ngpu > 0 else 'cpu')

        # model
        model_conf = config
        with UpdateConfig(model_conf):
            model_conf.output_dim = self.text_feature.vocab_size
        model = Wav2vec2ASR.from_config(model_conf)
        self.model = model
        self.model.eval()

        # load model
        params_path = self.args.checkpoint_path + ".pdparams"
        model_dict = paddle.load(params_path)
        self.model.set_state_dict(model_dict)

    def run(self):
        check(args.audio_file)

        with paddle.no_grad():
            # read
            audio, _ = soundfile.read(
                self.audio_file, dtype="int16", always_2d=True)
            logger.info(f"audio shape: {audio.shape}")
            xs = paddle.to_tensor(audio, dtype='float32').unsqueeze(axis=0)
            decode_config = self.config.decode
            result_transcripts, result_tokenids = self.model.decode(
                xs,
                text_feature=self.text_feature,
                decoding_method=decode_config.decoding_method,
                beam_size=decode_config.beam_size,
                tokenizer=self.tokenizer, )
            rsl = result_transcripts[0]
            utt = Path(self.audio_file).name
            logger.info(f"hyp: {utt} {rsl}")
            return rsl


def check(audio_file):
    if not os.path.isfile(audio_file):
        print("Please input the right audio file path")
        sys.exit(-1)

    logger.info("checking the audio file format......")
    try:
        sig, sample_rate = soundfile.read(audio_file)
    except Exception as e:
        logger.error(str(e))
        logger.error(
            "can not open the wav file, please check the audio file format")
        sys.exit(-1)
    logger.info("The sample rate is %d" % sample_rate)
    assert (sample_rate == 16000)
    logger.info("The audio file format is right")


def main(config, args):
    Wav2vec2Infer(config, args).run()


if __name__ == "__main__":
    parser = default_argument_parser()
    args = parser.parse_args()

    config = CfgNode(new_allowed=True)

    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/wav2vec2/bin/train.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer for wav2vec2.0 model."""
import cProfile
import os

from yacs.config import CfgNode

from paddlespeech.s2t.exps.wav2vec2.model import Wav2Vec2ASRTrainer as Trainer
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Trainer(config, args)
    exp.setup()
    exp.run()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    parser.add_argument(
        '--resume', type=str, default="", nargs="?", help='resume ckpt path.')
    args = parser.parse_args()
    print_arguments(args, globals())
    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats(os.path.join(args.output, 'train.profile'))


================================================
FILE: paddlespeech/s2t/exps/wav2vec2/model.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains wav2vec2 model."""
import json
import math
import os
import re
import time
from collections import OrderedDict
from contextlib import nullcontext

import jsonlines
import numpy as np
import paddle
from hyperpyyaml import load_hyperpyyaml
from paddle import distributed as dist
from paddlenlp.transformers import AutoTokenizer

from paddlespeech.s2t.frontend.featurizer import TextFeaturizer
from paddlespeech.s2t.io.dataloader import DataLoaderFactory
from paddlespeech.s2t.io.speechbrain import data_pipeline
from paddlespeech.s2t.io.speechbrain import dataio
from paddlespeech.s2t.io.speechbrain import dataset
from paddlespeech.s2t.io.speechbrain.dataloader import make_dataloader
from paddlespeech.s2t.models.wav2vec2.processing.speech_augmentation import TimeDomainSpecAugment
from paddlespeech.s2t.models.wav2vec2.wav2vec2_ASR import Wav2vec2ASR
from paddlespeech.s2t.training.optimizer import OptimizerFactory
from paddlespeech.s2t.training.reporter import ObsScope
from paddlespeech.s2t.training.reporter import report
from paddlespeech.s2t.training.scheduler import LRSchedulerFactory
from paddlespeech.s2t.training.timer import Timer
from paddlespeech.s2t.training.trainer import Trainer
from paddlespeech.s2t.utils import error_rate
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig

logger = Log(__name__).getlog()


def clip_grad_norm_(
        parameters,
        max_norm,
        norm_type=2.0,
        error_if_nonfinite=False, ):
    r"""Clips gradient norm of the iteratable parameters.

    Norms are calculated together on all gradients, just as they are
    connected into one vector. The gradient will be modified in place.

    This API can only run in dynamic graph mode, not static graph mode.

    Args:
        parameters (Iterable[paddle.Tensor] or paddle.Tensor): Tensors or a single Tensor
            that will be normalized gradients
        max_norm (float or int): max norm of the gradients
        norm_type (float or int): type of the used p-norm. Can be `inf` for
            infinity norm.
        error_if_nonfinite (bool): if True, throw an error if the total
            norm of the gradients from :attr:`parameters` is `nan`,
            `inf`, or `-inf`.

    Returns:
        Total norm of the parameter gradients (treated as a single vector).
    Example:
        .. code-block:: python
            import paddle

            x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
            max_norm = float(5.0)
            linear = paddle.nn.Linear(in_features=10, out_features=10)
            out = linear(x)
            loss = paddle.mean(out)
            loss.backward()

            paddle.nn.utils.clip_grad_norm_(linear.parameters(), max_norm)

            sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters())
            sdg.step()
    """
    if not paddle.in_dynamic_mode():
        raise RuntimeError('this API can only run in dynamic mode.')

    if isinstance(parameters, paddle.Tensor):
        parameters = [parameters]

    support_norm_type = [float("inf"), 0, 1, 2]
    if norm_type not in support_norm_type:
        raise ValueError(f'norm_type only support {support_norm_type}')

    grads = [p.grad for p in parameters if p.grad is not None]
    max_norm = float(max_norm)
    norm_type = float(norm_type)
    if len(grads) == 0:
        return paddle.to_tensor(0.0)
    if norm_type == float("inf"):
        norms = [g.detach().abs().max() for g in grads]
        total_norm = (norms[0]
                      if len(norms) == 1 else paddle.max(paddle.stack(norms)))
    else:
        total_norm = paddle.linalg.norm(
            paddle.stack(
                [paddle.linalg.norm(g.detach(), norm_type) for g in grads]),
            norm_type, )

    if error_if_nonfinite and paddle.logical_or(total_norm.isnan(),
                                                total_norm.isinf()):
        raise RuntimeError(
            f'The total norm of {norm_type} order of the gradients from '
            '`parameters` is non-finite, so it cannot be clipped. In any case, '
            'disable this error and scale the gradient by non-finite norm, '
            'set `error_if_nonfinite=False`')
    clip_coef = max_norm / (total_norm + 1e-6)
    # Note: when the coef is clamped to 1, it is redundant to multiply the clamped coef, but this
    # avoids the `if clip_coef < 1:` condition.
    clip_coef_clamped = paddle.clip(clip_coef, max=1.0)
    with paddle.no_grad():
        for _, p in enumerate(parameters):
            g = p.grad
            if g is not None:
                p.grad = paddle.multiply(x=g, y=clip_coef_clamped)
    return total_norm


class Wav2Vec2ASRTrainer(Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)
        self.avg_train_loss = 0.0
        self.loss_isfinite = True  # while flag is 'False', loss in Nan or inf, and can not be avg
        self.use_sb = True  # whether use speech brain dataloader

    def update_average(self, batch_index, loss):
        """Update running average of the loss.
        Arguments
        ---------
        batch_index : int
            current batch index
        loss : paddle.tensor
            detached loss, a single float value.
        """
        if math.isfinite(loss):
            self.avg_train_loss -= self.avg_train_loss / (batch_index + 1)
            self.avg_train_loss += loss / (batch_index + 1)
        else:
            self.loss_isfinite = False
            logger.info('loss:{} in Nan or inf, error'.format(loss))

    def before_train(self):
        from_scratch = self.resume_or_scratch()
        if from_scratch:
            # scratch: save init model, i.e. 0 epoch
            self.save(tag='init', infos=None)
        else:
            # resume: train next_epoch and next_iteration
            self.epoch += 1
            logger.info(
                f"Resume train: epoch {self.epoch }, step {self.iteration}!")

        self.maybe_batch_sampler_step()

    def train_batch(self, batch_index, batch, msg):
        train_conf = self.config
        start = time.time()

        # forward
        ## sb data pipeline
        if self.use_sb:
            wav, wavs_lens_rate = batch['sig']
            target, target_lens_rate = batch['tokens']
            target_lens = (target_lens_rate *
                           target.shape[1]).round().astype(paddle.int64)
        else:
            utt, wav, wavs_lens, target, target_lens = batch
            wavs_lens_rate = wavs_lens / wav.shape[1]
            wav = wav[:, :, 0]

        if hasattr(train_conf, 'audio_augment'):
            wav = self.speech_augmentation(wav, wavs_lens_rate)

        loss = self.model(wav, wavs_lens_rate, target, target_lens)

        # loss div by `batch_size * accum_grad`
        loss /= train_conf.accum_grad
        # update self.avg_train_loss
        self.update_average(batch_index, float(loss))

        # loss backward
        if (batch_index + 1) % train_conf.accum_grad != 0:
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
            # When using cpu w/o DDP, model does not have `no_sync`
            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.
            context = nullcontext
        with context():
            loss.backward()

            layer_tools.print_grads(self.model, print_func=None)

        # optimizer step old
        if (batch_index + 1) % train_conf.accum_grad == 0:
            #do global grad clip
            if train_conf.global_grad_clip != 0:
                clip_grad_norm_(self.model.parameters(),
                                train_conf.global_grad_clip)
            self.model_optimizer.step()
            self.model_optimizer.clear_grad()
            if not train_conf.freeze_wav2vec2:
                self.wav2vec2_optimizer.step()
                self.wav2vec2_optimizer.clear_grad()
            if self.config.model_scheduler != 'newbobscheduler':
                self.model_lr_scheduler.step()
            if self.config.wav2vec2_scheduler != 'newbobscheduler':
                if not train_conf.freeze_wav2vec2:
                    self.wav2vec2_lr_scheduler.step()
            self.iteration += 1

        losses_np = {'loss': self.avg_train_loss * train_conf.accum_grad}
        iteration_time = time.time() - start
        for k, v in losses_np.items():
            report(k, v)
        report("loss_whitoutavg", float(loss))
        report("batch_size", self.config.batch_size)
        report("accum", train_conf.accum_grad)
        report("step_cost", iteration_time)

        if (batch_index + 1) % train_conf.accum_grad == 0:
            if dist.get_rank() == 0 and self.visualizer:
                losses_np_v = losses_np.copy()
                losses_np_v.update({
                    "model_lr": self.model_lr_scheduler(),
                    "wav2vec2_lr": self.wav2vec2_lr_scheduler()
                })
                for key, val in losses_np_v.items():
                    self.visualizer.add_scalar(
                        tag='train/' + key, value=val, step=self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
        self.model.eval()
        if not self.use_streamdata:
            logger.info(
                f"Valid Total Examples: {len(self.valid_loader.dataset)}")
        valid_losses = {}
        step = 0
        total_loss = 0.0
        num_seen_utts = 1  # use update_average and no need for num_seen_utts here
        for i, batch in enumerate(self.valid_loader):
            if self.use_sb:
                wav, wavs_lens_rate = batch['sig']
                target, target_lens_rate = batch['tokens']
                target_lens = (target_lens_rate *
                               target.shape[1]).round().astype(paddle.int64)
            else:
                utt, wav, wavs_lens, target, target_lens = batch
                wavs_lens_rate = wavs_lens / wav.shape[1]
                wav = wav[:, :, 0]

            loss = self.model(wav, wavs_lens_rate, target, target_lens)
            # use update_average
            total_loss -= total_loss / (step + 1)
            total_loss += loss / (step + 1)

            if math.isfinite(float(loss)):
                step += 1
                valid_losses['val_loss'] = float(loss)
            else:
                logger.info('loss:{} in Nan or inf, error'.format(float(loss)))

            if (i + 1) % self.config.log_interval == 0:
                valid_losses['val_history_loss'] = float(total_loss)

                # logging
                msg = f"Valid: Rank: {dist.get_rank()}, "
                msg += "epoch: {}, ".format(self.epoch)
                msg += "step: {}, ".format(self.iteration)
                if not self.use_streamdata:
                    msg += "batch: {}/{}, ".format(i + 1,
                                                   len(self.valid_loader))
                msg += ', '.join('{}: {:>.6f}'.format(k, v)
                                 for k, v in valid_losses.items())
                logger.info(msg)

        logger.info(
            'Rank {} Val info val_loss {}'.format(dist.get_rank(), total_loss))
        return total_loss, num_seen_utts

    @mp_tools.rank_zero_only
    def save(self, tag=None, infos: dict=None):
        """Save checkpoint (model parameters and optimizer states).

        Args:
            tag (int or str, optional): None for step, else using tag, e.g epoch. Defaults to None.
            infos (dict, optional): meta data to save. Defaults to None.
        """

        infos = infos if infos else dict()
        infos.update({
            "epoch": self.epoch,
            "model_lr": self.model_optimizer.get_lr(),
            "wav2vec2_lr": self.wav2vec2_optimizer.get_lr()
        })

        checkpoint_path = os.path.join(
            self.checkpoint_dir,
            "{}".format(self.iteration if tag is None else tag))

        model_dict = self.model.state_dict()
        params_path = checkpoint_path + ".pdparams"
        paddle.save(model_dict, params_path)
        logger.info("Saved model to {}".format(params_path))

        model_opt_dict = self.model_optimizer.state_dict()
        wav2vec2_opt_dict = self.wav2vec2_optimizer.state_dict()

        opt_dict = {'model': model_opt_dict, 'wav2vec2': wav2vec2_opt_dict}

        optimizer_path = checkpoint_path + ".pdopt"
        paddle.save(opt_dict, optimizer_path)
        logger.info("Saved optimzier state to {}".format(optimizer_path))

        scheduler_dict = {}

        if self.config.model_scheduler == 'newbobscheduler':
            scheduler_dict['model'] = self.model_lr_scheduler.save()
        if self.config.wav2vec2_scheduler == 'newbobscheduler':
            scheduler_dict['wav2vec2'] = self.wav2vec2_lr_scheduler.save()
        if scheduler_dict:
            scheduler_path = checkpoint_path + ".pdlrs"
            paddle.save(scheduler_dict, scheduler_path)
            logger.info("Saved scheduler state to {}".format(scheduler_path))
        info_path = re.sub('.pdparams$', '.json', params_path)
        infos = {} if infos is None else infos
        with open(info_path, 'w', encoding='utf8') as fout:
            data = json.dumps(infos)
            fout.write(data)

    def resume_or_scratch(self):
        """Resume from latest checkpoint at checkpoints in the output
        directory or load a specified checkpoint.

        If ``args.checkpoint_path`` is not None, load the checkpoint, else
        resume training.
        """
        scratch = None
        if self.args.resume:
            # just restore ckpt
            # lr will restore from optimizer ckpt
            resume_json_path = os.path.join(self.checkpoint_dir,
                                            self.args.resume + '.json')
            with open(resume_json_path, 'r', encoding='utf8') as f:
                resume_json = json.load(f)
            self.iteration = 0
            self.epoch = resume_json["epoch"]

            # restore model from *.pdparams
            params_path = os.path.join(self.checkpoint_dir,
                                       "{}".format(self.epoch)) + '.pdparams'
            model_dict = paddle.load(params_path)
            self.model.set_state_dict(model_dict)

            # restore optimizer from *.pdopt
            optimizer_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdopt'
            optimizer_dict = paddle.load(optimizer_path)
            self.model_optimizer.set_state_dict(optimizer_dict['model'])
            self.wav2vec2_optimizer.set_state_dict(optimizer_dict['wav2vec2'])

            # restore lr_scheduler from *.pdlrs
            scheduler_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdlrs'
            if os.path.isfile(os.path.join(scheduler_path)):
                scheduler_dict = paddle.load(scheduler_path)
                if self.config.model_scheduler == 'newbobscheduler':
                    self.model_lr_scheduler.load(scheduler_dict['model'])
                if self.config.wav2vec2_scheduler == 'newbobscheduler':
                    self.wav2vec2_lr_scheduler.load(scheduler_dict['wav2vec2'])
            logger.info(
                f"Restore ckpt: epoch {self.epoch }, step {self.iteration}!")
            scratch = False
        else:
            self.iteration = 0
            self.epoch = 0
            scratch = True
            logger.info("Init from scratch!")
        return scratch

    def do_train(self):
        """The training process control by step."""
        # !!!IMPORTANT!!!
        # Try to export the model by script, if fails, we should refine
        # the code to satisfy the script export requirements
        # script_model = paddle.jit.to_static(self.model)
        # script_model_path = str(self.checkpoint_dir / 'init')
        # paddle.jit.save(script_model, script_model_path)

        self.before_train()
        if not self.use_streamdata:
            logger.info(
                f"Train Total Examples: {len(self.train_loader.dataset)}")
        while self.epoch < self.config.n_epoch:
            with Timer("Epoch-Train Time Cost: {}"):
                self.model.train()
                try:
                    data_start_time = time.time()
                    for batch_index, batch in enumerate(self.train_loader):
                        dataload_time = time.time() - data_start_time
                        msg = "Train:"
                        observation = OrderedDict()
                        with ObsScope(observation):
                            report("Rank", dist.get_rank())
                            report("epoch", self.epoch)
                            report('step', self.iteration)
                            report("model_lr", self.model_optimizer.get_lr())
                            report("wav2vec2_lr",
                                   self.wav2vec2_optimizer.get_lr())
                            self.train_batch(batch_index, batch, msg)
                            self.after_train_batch()
                            report('iter', batch_index + 1)
                            if not self.use_streamdata:
                                report('total', len(self.train_loader))
                            report('reader_cost', dataload_time)
                        observation['batch_cost'] = observation[
                            'reader_cost'] + observation['step_cost']
                        observation['samples'] = observation['batch_size']
                        observation['ips,samples/s'] = observation[
                            'batch_size'] / observation['batch_cost']
                        for k, v in observation.items():
                            msg += f" {k.split(',')[0]}: "
                            msg += f"{v:>.8f}" if isinstance(v,
                                                             float) else f"{v}"
                            msg += f" {k.split(',')[1]}" if len(
                                k.split(',')) == 2 else ""
                            msg += ","
                        msg = msg[:-1]  # remove the last ","
                        if (batch_index + 1) % self.config.log_interval == 0:
                            logger.info(msg)
                        data_start_time = time.time()
                except Exception as e:
                    logger.error(e)
                    raise e
            with Timer("Eval Time Cost: {}"):
                total_loss, num_seen_utts = self.valid()
                if dist.get_world_size() > 1:
                    num_seen_utts = paddle.to_tensor(num_seen_utts)
                    dist.all_reduce(num_seen_utts)
                    total_loss = paddle.to_tensor(total_loss)
                    dist.all_reduce(total_loss)
                    cv_loss = total_loss / num_seen_utts
                    cv_loss = float(cv_loss)
                else:
                    cv_loss = float(total_loss)
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
                self.visualizer.add_scalar(
                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
                self.visualizer.add_scalar(
                    tag='eval/model_lr',
                    value=self.model_lr_scheduler(),
                    step=self.epoch)
                self.visualizer.add_scalar(
                    tag='eval/wav2vec2_lr',
                    value=self.wav2vec2_lr_scheduler(),
                    step=self.epoch)

            if self.config.model_scheduler == 'newbobscheduler':
                self.model_lr_scheduler.step(cv_loss)
            if self.config.wav2vec2_scheduler == 'newbobscheduler':
                if not self.config.freeze_wav2vec2:
                    self.wav2vec2_lr_scheduler.step(cv_loss)
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.avg_train_loss = 0.0
            self.new_epoch()

    def dataio_prepare(self, hparams):
        """This function prepares the datasets to be used in the brain class.
        It also defines the data processing pipeline through user-defined functions."""
        data_folder = hparams["data_folder"]

        train_data = dataset.DynamicItemDataset.from_csv(
            csv_path=hparams["train_data"],
            replacements={"data_root": data_folder}, )

        if hparams["sorting"] == "ascending":
            # we sort training data to speed up training and get better results.
            train_data = train_data.filtered_sorted(sort_key="duration")
            # when sorting do not shuffle in dataloader ! otherwise is pointless
            hparams["train_dataloader_opts"]["shuffle"] = False

        elif hparams["sorting"] == "descending":
            train_data = train_data.filtered_sorted(
                sort_key="duration", reverse=True)
            # when sorting do not shuffle in dataloader ! otherwise is pointless
            hparams["train_dataloader_opts"]["shuffle"] = False

        elif hparams["sorting"] == "random":
            pass

        else:
            raise NotImplementedError(
                "sorting must be random, ascending or descending")

        valid_data = dataset.DynamicItemDataset.from_csv(
            csv_path=hparams["valid_data"],
            replacements={"data_root": data_folder}, )
        valid_data = valid_data.filtered_sorted(sort_key="duration")

        test_data = dataset.DynamicItemDataset.from_csv(
            csv_path=hparams["test_data"],
            replacements={"data_root": data_folder}, )
        test_data = test_data.filtered_sorted(sort_key="duration")

        datasets = [train_data, valid_data, test_data]

        # Defining tokenizer and loading it
        tokenizer = AutoTokenizer.from_pretrained('bert-base-chinese')
        self.tokenizer = tokenizer
        # 2. Define audio pipeline:
        @data_pipeline.takes("wav")
        @data_pipeline.provides("sig")
        def audio_pipeline(wav):
            sig = dataio.read_audio(wav)
            return sig

        dataset.add_dynamic_item(datasets, audio_pipeline)

        # 3. Define text pipeline:
        @data_pipeline.takes("transcript")
        @data_pipeline.provides("wrd", "tokens_list", "tokens")
        def text_pipeline(wrd):
            wrd = "".join(wrd.split(" "))
            yield wrd
            tokens_list = tokenizer(wrd)["input_ids"]
            yield tokens_list
            tokens = np.array(tokens_list, dtype="int64")
            # tokens = paddle.to_tensor(tokens_list, dtype="int64")
            yield tokens

        dataset.add_dynamic_item(datasets, text_pipeline)

        # 4. Set output:
        dataset.set_output_keys(
            datasets,
            ["id", "sig", "wrd", "tokens"], )

        # 5. If Dynamic Batching is used, we instantiate the needed samplers.
        train_batch_sampler = None
        valid_batch_sampler = None
        if hparams["dynamic_batching"]:
            from sampler import DynamicBatchSampler  # noqa

            dynamic_hparams = hparams["dynamic_batch_sampler"]
            num_buckets = dynamic_hparams["num_buckets"]

            train_batch_sampler = DynamicBatchSampler(
                train_data,
                dynamic_hparams["max_batch_len"],
                num_buckets=num_buckets,
                length_func=lambda x: x["duration"],
                shuffle=dynamic_hparams["shuffle_ex"],
                batch_ordering=dynamic_hparams["batch_ordering"], )

            valid_batch_sampler = DynamicBatchSampler(
                valid_data,
                dynamic_hparams["max_batch_len"],
                num_buckets=num_buckets,
                length_func=lambda x: x["duration"],
                shuffle=dynamic_hparams["shuffle_ex"],
                batch_ordering=dynamic_hparams["batch_ordering"], )

        return (train_data, valid_data, test_data, tokenizer,
                train_batch_sampler, valid_batch_sampler, )

    def setup_dataloader(self):
        config = self.config.clone()
        self.use_streamdata = config.get("use_stream_data", False)
        self.use_sb = config.get("use_sb_pipeline", False)
        if self.use_sb:
            hparams_file = config.sb_pipeline_conf
            with open(hparams_file, 'r', encoding='utf8') as fin:
                hparams = load_hyperpyyaml(fin, None)

            (train_data, valid_data, test_data, tokenizer, train_bsampler,
             valid_bsampler, ) = self.dataio_prepare(hparams)

            train_dataloader_opts = hparams["train_dataloader_opts"]
            valid_dataloader_opts = hparams["valid_dataloader_opts"]

            if train_bsampler is not None:
                train_dataloader_opts = {
                    "batch_sampler": train_bsampler,
                    "num_workers": hparams["num_workers"],
                }

            if valid_bsampler is not None:
                valid_dataloader_opts = {"batch_sampler": valid_bsampler}

            if self.train:
                self.train_loader = make_dataloader(
                    train_data, stage='train', **train_dataloader_opts)
                self.valid_loader = make_dataloader(
                    valid_data,
                    stage='val',
                    **valid_dataloader_opts, )
                logger.info("Setup train/valid Dataloader!")
            else:
                self.test_loader = make_dataloader(
                    test_data, stage='test', **hparams["test_dataloader_opts"])
        else:
            if self.train:
                self.train_loader = DataLoaderFactory.get_dataloader(
                    'train', config, self.args)
                self.valid_loader = DataLoaderFactory.get_dataloader(
                    'valid', config, self.args)
                logger.info("Setup train/valid Dataloader!")
            else:
                decode_batch_size = config.get('decode', dict()).get(
                    'decode_batch_size', 1)
                self.test_loader = DataLoaderFactory.get_dataloader(
                    'test', config, self.args)
                self.align_loader = DataLoaderFactory.get_dataloader(
                    'align', config, self.args)
                logger.info("Setup test/align Dataloader!")

    def setup_model(self):
        config = self.config
        model_conf = config

        with UpdateConfig(model_conf):
            if self.use_sb:
                model_conf.output_dim = self.tokenizer.vocab_size
            else:
                if self.train:
                    model_conf.input_dim = self.train_loader.feat_dim
                    model_conf.output_dim = self.train_loader.vocab_size
                else:
                    model_conf.input_dim = self.test_loader.feat_dim
                    model_conf.output_dim = self.test_loader.vocab_size

        model = Wav2vec2ASR.from_config(model_conf)

        model_dict = paddle.load(config.wav2vec2_params_path)
        model.wav2vec2.set_state_dict(model_dict)

        if self.parallel:
            model = paddle.DataParallel(model, find_unused_parameters=True)

        layer_tools.print_params(model, logger.info)
        self.model = model
        logger.info("Setup model!")

        # setup speech augmentation for wav2vec2
        if hasattr(config, 'audio_augment') and self.train:
            self.speech_augmentation = TimeDomainSpecAugment(
                **config.audio_augment)

        if not self.train:
            return

        train_config = config
        model_optim_type = train_config.model_optim
        model_optim_conf = train_config.model_optim_conf
        logger.info("optim_model:{},{}", model_optim_type, model_optim_conf)
        wav2vec2_optim_type = train_config.wav2vec2_optim
        wav2vec2_optim_conf = train_config.wav2vec2_optim_conf
        logger.info("optim_model:{},{}", wav2vec2_optim_type,
                    wav2vec2_optim_conf)

        model_scheduler_type = train_config.model_scheduler
        model_scheduler_conf = train_config.model_scheduler_conf
        wav2vec2_scheduler_type = train_config.wav2vec2_scheduler
        wav2vec2_scheduler_conf = train_config.wav2vec2_scheduler_conf

        model_scheduler_args = dict(
            **{"learning_rate": model_optim_conf.lr,
               "verbose": False}, **(dict(model_scheduler_conf)))

        wav2vec2_scheduler_args = dict(
            **{"learning_rate": wav2vec2_optim_conf.lr,
               "verbose": False}, **(dict(wav2vec2_scheduler_conf)))

        model_lr_scheduler = LRSchedulerFactory.from_args(model_scheduler_type,
                                                          model_scheduler_args)
        wav2vec2_lr_scheduler = LRSchedulerFactory.from_args(
            wav2vec2_scheduler_type, wav2vec2_scheduler_args)

        def optimizer_args(
                config,
                optim_type,
                optim_conf,
                parameters,
                lr_scheduler=None, ):
            optim_arg = dict(optim_conf)
            optim_arg.update({
                "learning_rate":
                lr_scheduler if lr_scheduler else optim_conf.lr,
                "parameters":
                parameters
            })
            return optim_arg

        model_optimizer_args = optimizer_args(config, model_optim_type,
                                              model_optim_conf, [{
                                                  'params':
                                                  model._layers.enc.parameters()
                                              }, {
                                                  'params':
                                                  model._layers.ctc.parameters()
                                              }] if self.parallel else [{
                                                  'params':
                                                  model.enc.parameters()
                                              }, {
                                                  'params':
                                                  model.ctc.parameters()
                                              }], model_lr_scheduler)

        wav2vec2_optimizer_args = optimizer_args(
            config, wav2vec2_optim_type, wav2vec2_optim_conf,
            model._layers.wav2vec2.parameters() if self.parallel else
            model.wav2vec2.parameters(), wav2vec2_lr_scheduler)

        model_optimizer = OptimizerFactory.from_args(model_optim_type,
                                                     model_optimizer_args)
        wav2vec2_optimizer = OptimizerFactory.from_args(wav2vec2_optim_type,
                                                        wav2vec2_optimizer_args)

        self.model_optimizer = model_optimizer
        self.wav2vec2_optimizer = wav2vec2_optimizer
        self.model_lr_scheduler = model_lr_scheduler
        self.wav2vec2_lr_scheduler = wav2vec2_lr_scheduler
        logger.info("Setup optimizer/lr_scheduler!")


class Wav2Vec2ASRTester(Wav2Vec2ASRTrainer):
    def __init__(self, config, args):
        super().__init__(config, args)
        self.text_featurizer = TextFeaturizer(
            unit_type=config.unit_type, vocab=config.vocab_filepath)
        self.vocab_list = self.text_featurizer.vocab_list

    def id2token(self, texts, texts_len):
        """ ord() id to chr() chr """
        trans = []
        for text, n in zip(texts, texts_len):
            n = n.numpy().item()
            ids = text[:n]
            trans.append(self.text_featurizer.defeaturize(ids.numpy().tolist()))
        return trans

    def compute_metrics(self, id, audio, audio_len, texts, texts_len,
                        fout=None):
        decode_cfg = self.config.decode
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        errors_func = error_rate.char_errors if decode_cfg.error_rate_type == 'cer' else error_rate.word_errors
        error_rate_func = error_rate.cer if decode_cfg.error_rate_type == 'cer' else error_rate.wer

        start_time = time.time()
        target_transcripts = self.id2token(texts, texts_len)
        result_transcripts, result_tokenids = self.model.decode(
            audio,
            text_feature=self.text_featurizer,
            decoding_method=decode_cfg.decoding_method,
            beam_size=decode_cfg.beam_size)
        decode_time = time.time() - start_time

        for utt, target, result, rec_tids in zip(
                id, target_transcripts, result_transcripts, result_tokenids):
            errors, len_ref = errors_func(target, result)
            errors_sum += errors
            len_refs += len_ref
            num_ins += 1
            if fout:
                fout.write({
                    "utt": utt,
                    "refs": [target],
                    "hyps": [result],
                    "hyps_tokenid": [rec_tids],
                })
            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example error rate [%s] = %f" % (
                decode_cfg.error_rate_type, error_rate_func(target, result)))

        return dict(
            errors_sum=errors_sum,
            len_refs=len_refs,
            num_ins=num_ins,  # num examples
            error_rate=errors_sum / len_refs,
            error_rate_type=decode_cfg.error_rate_type,
            num_frames=audio_len.sum().numpy().item(),
            decode_time=decode_time)

    def sb_compute_metrics(self, id, sig, wrd, tokens, fout=None):
        decode_cfg = self.config.decode
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        errors_func = error_rate.char_errors if decode_cfg.error_rate_type == 'cer' else error_rate.word_errors
        error_rate_func = error_rate.cer if decode_cfg.error_rate_type == 'cer' else error_rate.wer
        start_time = time.time()
        target_transcripts = wrd
        result_transcripts, result_tokenids = self.model.decode(
            sig[0],
            text_feature=self.tokenizer,
            decoding_method=decode_cfg.decoding_method,
            beam_size=decode_cfg.beam_size,
            sb_pipeline=True)
        decode_time = time.time() - start_time

        for utt, target, result, rec_tids in zip(
                id, target_transcripts, result_transcripts, result_tokenids):
            errors, len_ref = errors_func(target, result)
            errors_sum += errors
            len_refs += len_ref
            num_ins += 1
            if fout:
                fout.write({
                    "utt": utt,
                    "refs": [target],
                    "hyps": [result],
                    "hyps_tokenid": [rec_tids],
                })
            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example error rate [%s] = %f" % (
                decode_cfg.error_rate_type, error_rate_func(target, result)))

        return dict(
            errors_sum=errors_sum,
            len_refs=len_refs,
            num_ins=num_ins,  # num examples
            error_rate=errors_sum / len_refs,
            error_rate_type=decode_cfg.error_rate_type,
            num_frames=sig[1].sum().numpy().item(),
            decode_time=decode_time)

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def test(self):
        logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")
        self.model.eval()

        error_rate_type = None
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        num_frames = 0.0
        num_time = 0.0
        # Initialized the decoder in model
        decode_cfg = self.config.decode
        vocab_list = self.vocab_list
        decode_batch_size = decode_cfg.decode_batch_size

        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                if self.use_sb:
                    metrics = self.sb_compute_metrics(**batch, fout=fout)
                else:
                    metrics = self.compute_metrics(*batch, fout=fout)
                num_frames += metrics['num_frames']
                num_time += metrics["decode_time"]
                errors_sum += metrics['errors_sum']
                len_refs += metrics['len_refs']
                num_ins += metrics['num_ins']
                error_rate_type = metrics['error_rate_type']
                rtf = num_time / (num_frames)
                logger.info(
                    "RTF: %f, Error rate [%s] (%d/?) = %f" %
                    (rtf, error_rate_type, num_ins, errors_sum / len_refs))

        # logging
        msg = "Test: "
        msg += "epoch: {}, ".format(self.epoch)
        msg += "step: {}, ".format(self.iteration)
        msg += "Final error rate [%s] (%d/%d) = %f" % (
            error_rate_type, num_ins, num_ins, errors_sum / len_refs)
        logger.info(msg)

        err_meta_path = os.path.splitext(self.args.result_file)[0] + '.err'
        err_type_str = "{}".format(error_rate_type)
        with open(err_meta_path, 'w', encoding='utf8') as f:
            data = json.dumps({
                "epoch":
                self.epoch,
                "step":
                self.iteration,
                "rtf":
                rtf,
                error_rate_type:
                errors_sum / len_refs,
                "dataset_hour": (num_frames) / 1000.0 / 3600.0,
                "process_hour":
                num_time / 1000.0 / 3600.0,
                "num_examples":
                num_ins,
                "err_sum":
                errors_sum,
                "ref_len":
                len_refs,
                "decode_method":
                self.config.decode.decoding_method,
            })
            f.write(data + '\n')


================================================
FILE: paddlespeech/s2t/exps/wavlm/__init__.py
================================================


================================================
FILE: paddlespeech/s2t/exps/wavlm/bin/__init__.py
================================================


================================================
FILE: paddlespeech/s2t/exps/wavlm/bin/test.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for WavLM model."""
import cProfile

from yacs.config import CfgNode

from paddlespeech.s2t.exps.wavlm.model import WavLMASRTester as Tester
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import add_arguments
from paddlespeech.utils.argparse import print_arguments


def main_sp(config, args):
    exp = Tester(config, args)
    with exp.eval():
        exp.setup()
        exp.run_test()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    # save asr result to
    parser.add_argument(
        '--dict-path', type=str, default=None, help='dict path.')
    args = parser.parse_args()
    print_arguments(args, globals())

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats('test.profile')


================================================
FILE: paddlespeech/s2t/exps/wavlm/bin/test_wav.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Evaluation for wavlm model."""
import os
import sys
from pathlib import Path

import paddle
import soundfile
from paddlenlp.transformers import AutoTokenizer
from yacs.config import CfgNode

from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.models.wavlm.wavlm_asr import WavLMASR
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig
logger = Log(__name__).getlog()


class WavLMInfer():
    def __init__(self, config, args):
        self.args = args
        self.config = config
        self.audio_file = args.audio_file
        self.tokenizer = config.get("tokenizer", None)

        if self.tokenizer:
            self.text_feature = AutoTokenizer.from_pretrained(
                self.config.tokenizer)
        else:
            self.text_feature = TextFeaturizer(
                unit_type=config.unit_type, vocab=config.vocab_filepath)

        paddle.set_device('gpu' if self.args.ngpu > 0 else 'cpu')

        # model
        model_conf = config
        with UpdateConfig(model_conf):
            model_conf.output_dim = self.text_feature.vocab_size
        model = WavLMASR.from_config(model_conf)
        self.model = model
        self.model.eval()

        # load model
        params_path = self.args.checkpoint_path + ".pdparams"
        model_dict = paddle.load(params_path)
        self.model.set_state_dict(model_dict)

    def run(self):
        check(args.audio_file)

        with paddle.no_grad():
            # read
            audio, _ = soundfile.read(
                self.audio_file, dtype="int16", always_2d=True)
            logger.info(f"audio shape: {audio.shape}")
            xs = paddle.to_tensor(audio, dtype='float32').unsqueeze(axis=0)
            decode_config = self.config.decode
            result_transcripts, result_tokenids = self.model.decode(
                xs,
                text_feature=self.text_feature,
                decoding_method=decode_config.decoding_method,
                beam_size=decode_config.beam_size,
                tokenizer=self.tokenizer, )
            rsl = result_transcripts[0]
            utt = Path(self.audio_file).name
            logger.info(f"hyp: {utt} {rsl}")
            return rsl


def check(audio_file):
    if not os.path.isfile(audio_file):
        print("Please input the right audio file path")
        sys.exit(-1)

    logger.info("checking the audio file format......")
    try:
        sig, sample_rate = soundfile.read(audio_file)
    except Exception as e:
        logger.error(str(e))
        logger.error(
            "can not open the wav file, please check the audio file format")
        sys.exit(-1)
    logger.info("The sample rate is %d" % sample_rate)
    assert (sample_rate == 16000)
    logger.info("The audio file format is right")


def main(config, args):
    WavLMInfer(config, args).run()


if __name__ == "__main__":
    parser = default_argument_parser()
    # save asr result to
    args = parser.parse_args()

    config = CfgNode(new_allowed=True)

    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    main(config, args)


================================================
FILE: paddlespeech/s2t/exps/wavlm/bin/train.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Trainer for wavlm model."""
import cProfile
import os

from yacs.config import CfgNode

from paddlespeech.s2t.exps.wavlm.model import WavLMASRTrainer as Trainer
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.utils.argparse import print_arguments, add_arguments


def main_sp(config, args):
    exp = Trainer(config, args)
    exp.setup()
    exp.run()


def main(config, args):
    main_sp(config, args)


if __name__ == "__main__":
    parser = default_argument_parser()
    parser.add_argument(
        '--resume', type=str, default="", nargs="?", help='resume ckpt path.')
    args = parser.parse_args()
    print_arguments(args, globals())
    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    if args.dump_config:
        with open(args.dump_config, 'w') as f:
            print(config, file=f)

    # Setting for profiling
    pr = cProfile.Profile()
    pr.runcall(main, config, args)
    pr.dump_stats(os.path.join(args.output, 'train.profile'))


================================================
FILE: paddlespeech/s2t/exps/wavlm/model.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains wavlm model."""
import json
import math
import os
import re
import time
from collections import OrderedDict
from contextlib import nullcontext

import jsonlines
import numpy as np
import paddle
from hyperpyyaml import load_hyperpyyaml
from paddle import distributed as dist
from paddlenlp.transformers import AutoTokenizer

from paddlespeech.s2t.frontend.featurizer import TextFeaturizer
from paddlespeech.s2t.io.dataloader import DataLoaderFactory
from paddlespeech.s2t.io.speechbrain import data_pipeline
from paddlespeech.s2t.io.speechbrain import dataio
from paddlespeech.s2t.io.speechbrain import dataset
from paddlespeech.s2t.io.speechbrain.dataloader import make_dataloader
from paddlespeech.s2t.models.wav2vec2.processing.speech_augmentation import TimeDomainSpecAugment
from paddlespeech.s2t.models.wavlm.wavlm_asr import WavLMASR
from paddlespeech.s2t.training.optimizer import OptimizerFactory
from paddlespeech.s2t.training.reporter import ObsScope
from paddlespeech.s2t.training.reporter import report
from paddlespeech.s2t.training.scheduler import LRSchedulerFactory
from paddlespeech.s2t.training.timer import Timer
from paddlespeech.s2t.training.trainer import Trainer
from paddlespeech.s2t.utils import error_rate
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig

logger = Log(__name__).getlog()


def clip_grad_norm_(
        parameters,
        max_norm,
        norm_type=2.0,
        error_if_nonfinite=False, ):
    r"""Clips gradient norm of the iteratable parameters.

    Norms are calculated together on all gradients, just as they are
    connected into one vector. The gradient will be modified in place.

    This API can only run in dynamic graph mode, not static graph mode.

    Args:
        parameters (Iterable[paddle.Tensor] or paddle.Tensor): Tensors or a single Tensor
            that will be normalized gradients
        max_norm (float or int): max norm of the gradients
        norm_type (float or int): type of the used p-norm. Can be `inf` for
            infinity norm.
        error_if_nonfinite (bool): if True, throw an error if the total
            norm of the gradients from :attr:`parameters` is `nan`,
            `inf`, or `-inf`.

    Returns:
        Total norm of the parameter gradients (treated as a single vector).
    Example:
        .. code-block:: python
            import paddle

            x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32')
            max_norm = float(5.0)
            linear = paddle.nn.Linear(in_features=10, out_features=10)
            out = linear(x)
            loss = paddle.mean(out)
            loss.backward()

            paddle.nn.utils.clip_grad_norm_(linear.parameters(), max_norm)

            sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters())
            sdg.step()
    """
    if not paddle.in_dynamic_mode():
        raise RuntimeError('this API can only run in dynamic mode.')

    if isinstance(parameters, paddle.Tensor):
        parameters = [parameters]

    support_norm_type = [float("inf"), 0, 1, 2]
    if norm_type not in support_norm_type:
        raise ValueError(f'norm_type only support {support_norm_type}')

    grads = [p.grad for p in parameters if p.grad is not None]
    max_norm = float(max_norm)
    norm_type = float(norm_type)
    if len(grads) == 0:
        return paddle.to_tensor(0.0)
    if norm_type == float("inf"):
        norms = [g.detach().abs().max() for g in grads]
        total_norm = (norms[0]
                      if len(norms) == 1 else paddle.max(paddle.stack(norms)))
    else:
        total_norm = paddle.linalg.norm(
            paddle.stack(
                [paddle.linalg.norm(g.detach(), norm_type) for g in grads]),
            norm_type, )

    if error_if_nonfinite and paddle.logical_or(total_norm.isnan(),
                                                total_norm.isinf()):
        raise RuntimeError(
            f'The total norm of {norm_type} order of the gradients from '
            '`parameters` is non-finite, so it cannot be clipped. In any case, '
            'disable this error and scale the gradient by non-finite norm, '
            'set `error_if_nonfinite=False`')
    clip_coef = max_norm / (total_norm + 1e-6)
    # Note: when the coef is clamped to 1, it is redundant to multiply the clamped coef, but this
    # avoids the `if clip_coef < 1:` condition.
    clip_coef_clamped = paddle.clip(clip_coef, max=1.0)
    with paddle.no_grad():
        for _, p in enumerate(parameters):
            g = p.grad
            if g is not None:
                p.grad = paddle.multiply(x=g, y=clip_coef_clamped)
    return total_norm


class WavLMASRTrainer(Trainer):
    def __init__(self, config, args):
        super().__init__(config, args)
        self.avg_train_loss = 0.0
        self.loss_isfinite = True  # while flag is 'False', loss in Nan or inf, and can not be avg
        self.use_sb = True  # whether use speech brain dataloader

    def update_average(self, batch_index, loss):
        """Update running average of the loss.
        Arguments
        ---------
        batch_index : int
            current batch index
        loss : paddle.tensor
            detached loss, a single float value.
        """
        if math.isfinite(loss):
            self.avg_train_loss -= self.avg_train_loss / (batch_index + 1)
            self.avg_train_loss += loss / (batch_index + 1)
        else:
            self.loss_isfinite = False
            logger.info('loss:{} in Nan or inf, error'.format(loss))

    def before_train(self):
        from_scratch = self.resume_or_scratch()
        if from_scratch:
            # scratch: save init model, i.e. 0 epoch
            self.save(tag='init', infos=None)
        else:
            # resume: train next_epoch and next_iteration
            self.epoch += 1
            logger.info(
                f"Resume train: epoch {self.epoch }, step {self.iteration}!")

        self.maybe_batch_sampler_step()

    def train_batch(self, batch_index, batch, msg):
        train_conf = self.config
        start = time.time()

        # forward
        ## sb data pipeline
        if self.use_sb:
            wav, wavs_lens_rate = batch['sig']
            target, target_lens_rate = batch['tokens']
            target_lens = (target_lens_rate *
                           target.shape[1]).round().astype(paddle.int64)
        else:
            utt, wav, wavs_lens, target, target_lens = batch
            wavs_lens_rate = wavs_lens / wav.shape[1]
            wav = wav[:, :, 0]

        if hasattr(train_conf, 'audio_augment'):
            wav = self.speech_augmentation(wav, wavs_lens_rate)
        loss = self.model(wav, wavs_lens_rate, target, target_lens)

        # loss div by `batch_size * accum_grad`
        loss /= train_conf.accum_grad
        # update self.avg_train_loss
        self.update_average(batch_index, float(loss))

        # loss backward
        if (batch_index + 1) % train_conf.accum_grad != 0:
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
            # When using cpu w/o DDP, model does not have `no_sync`
            context = self.model.no_sync if (hasattr(self.model, "no_sync") and
                                             self.parallel) else nullcontext
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.
            context = nullcontext
        with context():
            loss.backward()

            layer_tools.print_grads(self.model, print_func=None)

        # NOTE: the code below asserted that the backward() is problematic, and as more steps are accumulated, the output from wavlm alone will be the same for all frames
        # optimizer step old
        if (batch_index + 1) % train_conf.accum_grad == 0:
            #do global grad clip
            if train_conf.global_grad_clip != 0:
                clip_grad_norm_(self.model.parameters(),
                                train_conf.global_grad_clip)
            self.model_optimizer.step()
            self.model_optimizer.clear_grad()
            if not train_conf.freeze_wavlm:
                self.wavlm_optimizer.step()
                self.wavlm_optimizer.clear_grad()
            if self.config.model_scheduler != 'newbobscheduler':
                self.model_lr_scheduler.step()
            if self.config.wavlm_scheduler != 'newbobscheduler':
                if not train_conf.freeze_wavlm:
                    self.wavlm_lr_scheduler.step()
            self.iteration += 1

        losses_np = {'loss': self.avg_train_loss * train_conf.accum_grad}
        iteration_time = time.time() - start
        for k, v in losses_np.items():
            report(k, v)
        report("loss_whitoutavg", float(loss))
        report("batch_size", self.config.batch_size)
        report("accum", train_conf.accum_grad)
        report("step_cost", iteration_time)

        if (batch_index + 1) % train_conf.accum_grad == 0:
            if dist.get_rank() == 0 and self.visualizer:
                losses_np_v = losses_np.copy()
                losses_np_v.update({
                    "model_lr": self.model_lr_scheduler(),
                    "wavlm_lr": self.wavlm_lr_scheduler()
                })
                for key, val in losses_np_v.items():
                    self.visualizer.add_scalar(
                        tag='train/' + key, value=val, step=self.iteration - 1)

    @paddle.no_grad()
    def valid(self):
        self.model.eval()
        if not self.use_streamdata:
            logger.info(
                f"Valid Total Examples: {len(self.valid_loader.dataset)}")
        valid_losses = {}
        step = 0
        total_loss = 0.0
        num_seen_utts = 1  # use update_average and no need for num_seen_utts here
        for i, batch in enumerate(self.valid_loader):
            if self.use_sb:
                wav, wavs_lens_rate = batch['sig']
                target, target_lens_rate = batch['tokens']
                target_lens = (target_lens_rate *
                               target.shape[1]).round().astype(paddle.int64)
            else:
                utt, wav, wavs_lens, target, target_lens = batch
                wavs_lens_rate = wavs_lens / wav.shape[1]
                wav = wav[:, :, 0]

            loss = self.model(wav, wavs_lens_rate, target, target_lens)
            # use update_average
            total_loss -= total_loss / (step + 1)
            total_loss += loss / (step + 1)

            if math.isfinite(float(loss)):
                step += 1
                valid_losses['val_loss'] = float(loss)
            else:
                logger.info('loss:{} in Nan or inf, error'.format(float(loss)))

            if (i + 1) % self.config.log_interval == 0:
                valid_losses['val_history_loss'] = float(total_loss)

                # logging
                msg = f"Valid: Rank: {dist.get_rank()}, "
                msg += "epoch: {}, ".format(self.epoch)
                msg += "step: {}, ".format(self.iteration)
                if not self.use_streamdata:
                    msg += "batch: {}/{}, ".format(i + 1,
                                                   len(self.valid_loader))
                msg += ', '.join('{}: {:>.6f}'.format(k, v)
                                 for k, v in valid_losses.items())
                logger.info(msg)

        logger.info(
            'Rank {} Val info val_loss {}'.format(dist.get_rank(), total_loss))
        return total_loss, num_seen_utts

    @mp_tools.rank_zero_only
    def save(self, tag=None, infos: dict=None):
        """Save checkpoint (model parameters and optimizer states).

        Args:
            tag (int or str, optional): None for step, else using tag, e.g epoch. Defaults to None.
            infos (dict, optional): meta data to save. Defaults to None.
        """

        infos = infos if infos else dict()
        infos.update({
            "epoch": self.epoch,
            "model_lr": self.model_optimizer.get_lr(),
            "wavlm_lr": self.wavlm_optimizer.get_lr()
        })

        checkpoint_path = os.path.join(
            self.checkpoint_dir,
            "{}".format(self.iteration if tag is None else tag))

        model_dict = self.model.state_dict()
        params_path = checkpoint_path + ".pdparams"
        paddle.save(model_dict, params_path)
        logger.info("Saved model to {}".format(params_path))

        model_opt_dict = self.model_optimizer.state_dict()
        wavlm_opt_dict = self.wavlm_optimizer.state_dict()

        opt_dict = {'model': model_opt_dict, 'wavlm': wavlm_opt_dict}

        optimizer_path = checkpoint_path + ".pdopt"
        paddle.save(opt_dict, optimizer_path)
        logger.info("Saved optimzier state to {}".format(optimizer_path))

        scheduler_dict = {}

        if self.config.model_scheduler == 'newbobscheduler':
            scheduler_dict['model'] = self.model_lr_scheduler.save()
        if self.config.wavlm_scheduler == 'newbobscheduler':
            scheduler_dict['wavlm'] = self.wavlm_lr_scheduler.save()
        if scheduler_dict:
            scheduler_path = checkpoint_path + ".pdlrs"
            paddle.save(scheduler_dict, scheduler_path)
            logger.info("Saved scheduler state to {}".format(scheduler_path))
        info_path = re.sub('.pdparams$', '.json', params_path)
        infos = {} if infos is None else infos
        with open(info_path, 'w', encoding='utf8') as fout:
            data = json.dumps(infos)
            fout.write(data)

    def resume_or_scratch(self):
        """Resume from latest checkpoint at checkpoints in the output
        directory or load a specified checkpoint.

        If ``args.checkpoint_path`` is not None, load the checkpoint, else
        resume training.
        """
        scratch = None
        if self.args.resume:
            # just restore ckpt
            # lr will restore from optimizer ckpt
            resume_json_path = os.path.join(self.checkpoint_dir,
                                            self.args.resume + '.json')
            with open(resume_json_path, 'r', encoding='utf8') as f:
                resume_json = json.load(f)
            self.iteration = 0
            self.epoch = resume_json["epoch"]

            # restore model from *.pdparams
            params_path = os.path.join(self.checkpoint_dir,
                                       "{}".format(self.epoch)) + '.pdparams'
            model_dict = paddle.load(params_path)
            self.model.set_state_dict(model_dict)

            # restore optimizer from *.pdopt
            optimizer_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdopt'
            optimizer_dict = paddle.load(optimizer_path)
            self.model_optimizer.set_state_dict(optimizer_dict['model'])
            self.wavlm_optimizer.set_state_dict(optimizer_dict['wavlm'])

            # restore lr_scheduler from *.pdlrs
            scheduler_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdlrs'
            if os.path.isfile(os.path.join(scheduler_path)):
                scheduler_dict = paddle.load(scheduler_path)
                if self.config.model_scheduler == 'newbobscheduler':
                    self.model_lr_scheduler.load(scheduler_dict['model'])
                if self.config.wavlm_scheduler == 'newbobscheduler':
                    self.wavlm_lr_scheduler.load(scheduler_dict['wavlm'])
            logger.info(
                f"Restore ckpt: epoch {self.epoch }, step {self.iteration}!")
            scratch = False
        else:
            self.iteration = 0
            self.epoch = 0
            scratch = True
            logger.info("Init from scratch!")
        return scratch

    def do_train(self):
        """The training process control by step."""
        # !!!IMPORTANT!!!
        # Try to export the model by script, if fails, we should refine
        # the code to satisfy the script export requirements
        # script_model = paddle.jit.to_static(self.model)
        # script_model_path = str(self.checkpoint_dir / 'init')
        # paddle.jit.save(script_model, script_model_path)

        self.before_train()
        if not self.use_streamdata:
            logger.info(
                f"Train Total Examples: {len(self.train_loader.dataset)}")
        while self.epoch < self.config.n_epoch:
            with Timer("Epoch-Train Time Cost: {}"):
                self.model.train()
                try:
                    data_start_time = time.time()
                    for batch_index, batch in enumerate(self.train_loader):
                        dataload_time = time.time() - data_start_time
                        msg = "Train:"
                        observation = OrderedDict()
                        with ObsScope(observation):
                            report("Rank", dist.get_rank())
                            report("epoch", self.epoch)
                            report('step', self.iteration)
                            report("model_lr", self.model_optimizer.get_lr())
                            report("wavlm_lr", self.wavlm_optimizer.get_lr())
                            self.train_batch(batch_index, batch, msg)
                            self.after_train_batch()
                            report('iter', batch_index + 1)
                            if not self.use_streamdata:
                                report('total', len(self.train_loader))
                            report('reader_cost', dataload_time)
                        observation['batch_cost'] = observation[
                            'reader_cost'] + observation['step_cost']
                        observation['samples'] = observation['batch_size']
                        observation['ips,samples/s'] = observation[
                            'batch_size'] / observation['batch_cost']
                        for k, v in observation.items():
                            msg += f" {k.split(',')[0]}: "
                            msg += f"{v:>.8f}" if isinstance(v,
                                                             float) else f"{v}"
                            msg += f" {k.split(',')[1]}" if len(
                                k.split(',')) == 2 else ""
                            msg += ","
                        msg = msg[:-1]  # remove the last ","
                        if (batch_index + 1) % self.config.log_interval == 0:
                            logger.info(msg)
                        data_start_time = time.time()
                except Exception as e:
                    logger.error(e)
                    raise e
            with Timer("Eval Time Cost: {}"):
                total_loss, num_seen_utts = self.valid()
                if dist.get_world_size() > 1:
                    num_seen_utts = paddle.to_tensor(num_seen_utts)
                    dist.all_reduce(num_seen_utts)
                    total_loss = paddle.to_tensor(total_loss)
                    dist.all_reduce(total_loss)
                    cv_loss = total_loss / num_seen_utts
                    cv_loss = float(cv_loss)
                else:
                    cv_loss = float(total_loss)
            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
                self.visualizer.add_scalar(
                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
                self.visualizer.add_scalar(
                    tag='eval/model_lr',
                    value=self.model_lr_scheduler(),
                    step=self.epoch)
                self.visualizer.add_scalar(
                    tag='eval/wavlm_lr',
                    value=self.wavlm_lr_scheduler(),
                    step=self.epoch)

            if self.config.model_scheduler == 'newbobscheduler':
                self.model_lr_scheduler.step(cv_loss)
            if self.config.wavlm_scheduler == 'newbobscheduler':
                if not self.config.freeze_wavlm:
                    self.wavlm_lr_scheduler.step(cv_loss)
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.avg_train_loss = 0.0
            self.new_epoch()

    def dataio_prepare(self, hparams):
        """This function prepares the datasets to be used in the brain class.
        It also defines the data processing pipeline through user-defined functions."""
        data_folder = hparams["data_folder"]

        train_data = dataset.DynamicItemDataset.from_csv(
            csv_path=hparams["train_data"],
            replacements={"data_root": data_folder}, )

        if hparams["sorting"] == "ascending":
            # we sort training data to speed up training and get better results.
            train_data = train_data.filtered_sorted(sort_key="duration")
            # when sorting do not shuffle in dataloader ! otherwise is pointless
            hparams["train_dataloader_opts"]["shuffle"] = False

        elif hparams["sorting"] == "descending":
            train_data = train_data.filtered_sorted(
                sort_key="duration", reverse=True)
            # when sorting do not shuffle in dataloader ! otherwise is pointless
            hparams["train_dataloader_opts"]["shuffle"] = False

        elif hparams["sorting"] == "random":
            pass

        else:
            raise NotImplementedError(
                "sorting must be random, ascending or descending")

        valid_data = dataset.DynamicItemDataset.from_csv(
            csv_path=hparams["valid_data"],
            replacements={"data_root": data_folder}, )
        valid_data = valid_data.filtered_sorted(sort_key="duration")

        test_data = dataset.DynamicItemDataset.from_csv(
            csv_path=hparams["test_data"],
            replacements={"data_root": data_folder}, )
        test_data = test_data.filtered_sorted(sort_key="duration")

        datasets = [train_data, valid_data, test_data]

        # Defining tokenizer and loading it
        tokenizer = AutoTokenizer.from_pretrained('bert-base-chinese')
        self.tokenizer = tokenizer
        # 2. Define audio pipeline:
        @data_pipeline.takes("wav")
        @data_pipeline.provides("sig")
        def audio_pipeline(wav):
            sig = dataio.read_audio(wav)
            return sig

        dataset.add_dynamic_item(datasets, audio_pipeline)

        # 3. Define text pipeline:
        @data_pipeline.takes("transcript")
        @data_pipeline.provides("wrd", "tokens_list", "tokens")
        def text_pipeline(wrd):
            wrd = "".join(wrd.split(" "))
            yield wrd
            tokens_list = tokenizer(wrd)["input_ids"]
            yield tokens_list
            tokens = np.array(tokens_list, dtype="int64")
            # tokens = paddle.to_tensor(tokens_list, dtype="int64")
            yield tokens

        dataset.add_dynamic_item(datasets, text_pipeline)

        # 4. Set output:
        dataset.set_output_keys(
            datasets,
            ["id", "sig", "wrd", "tokens"], )

        # 5. If Dynamic Batching is used, we instantiate the needed samplers.
        train_batch_sampler = None
        valid_batch_sampler = None
        if hparams["dynamic_batching"]:
            from sampler import DynamicBatchSampler  # noqa

            dynamic_hparams = hparams["dynamic_batch_sampler"]
            num_buckets = dynamic_hparams["num_buckets"]

            train_batch_sampler = DynamicBatchSampler(
                train_data,
                dynamic_hparams["max_batch_len"],
                num_buckets=num_buckets,
                length_func=lambda x: x["duration"],
                shuffle=dynamic_hparams["shuffle_ex"],
                batch_ordering=dynamic_hparams["batch_ordering"], )

            valid_batch_sampler = DynamicBatchSampler(
                valid_data,
                dynamic_hparams["max_batch_len"],
                num_buckets=num_buckets,
                length_func=lambda x: x["duration"],
                shuffle=dynamic_hparams["shuffle_ex"],
                batch_ordering=dynamic_hparams["batch_ordering"], )

        return (train_data, valid_data, test_data, tokenizer,
                train_batch_sampler, valid_batch_sampler, )

    def setup_dataloader(self):
        config = self.config.clone()
        self.use_streamdata = config.get("use_stream_data", False)
        self.use_sb = config.get("use_sb_pipeline", False)
        if self.use_sb:
            hparams_file = config.sb_pipeline_conf
            with open(hparams_file, 'r', encoding='utf8') as fin:
                hparams = load_hyperpyyaml(fin, None)

            (train_data, valid_data, test_data, tokenizer, train_bsampler,
             valid_bsampler, ) = self.dataio_prepare(hparams)

            train_dataloader_opts = hparams["train_dataloader_opts"]
            valid_dataloader_opts = hparams["valid_dataloader_opts"]

            if train_bsampler is not None:
                train_dataloader_opts = {
                    "batch_sampler": train_bsampler,
                    "num_workers": hparams["num_workers"],
                }

            if valid_bsampler is not None:
                valid_dataloader_opts = {"batch_sampler": valid_bsampler}

            if self.train:
                self.train_loader = make_dataloader(
                    train_data, stage='train', **train_dataloader_opts)
                self.valid_loader = make_dataloader(
                    valid_data,
                    stage='val',
                    **valid_dataloader_opts, )
                logger.info("Setup train/valid Dataloader!")
            else:
                self.test_loader = make_dataloader(
                    test_data, stage='test', **hparams["test_dataloader_opts"])
        else:
            if self.train:
                self.train_loader = DataLoaderFactory.get_dataloader(
                    'train', config, self.args)
                self.valid_loader = DataLoaderFactory.get_dataloader(
                    'valid', config, self.args)
                logger.info("Setup train/valid Dataloader!")
            else:
                decode_batch_size = config.get('decode', dict()).get(
                    'decode_batch_size', 1)
                self.test_loader = DataLoaderFactory.get_dataloader(
                    'test', config, self.args)
                self.align_loader = DataLoaderFactory.get_dataloader(
                    'align', config, self.args)
                logger.info("Setup test/align Dataloader!")

    def setup_model(self):
        config = self.config
        model_conf = config

        with UpdateConfig(model_conf):
            if self.use_sb:
                model_conf.output_dim = self.tokenizer.vocab_size
            else:
                if self.train:
                    model_conf.input_dim = self.train_loader.feat_dim
                    model_conf.output_dim = self.train_loader.vocab_size
                else:
                    model_conf.input_dim = self.test_loader.feat_dim
                    model_conf.output_dim = self.test_loader.vocab_size

        model = WavLMASR.from_config(model_conf)

        model_dict = paddle.load(config.wavlm_params_path)
        model.wavlm.set_state_dict(model_dict)

        if self.parallel:
            model = paddle.DataParallel(model, find_unused_parameters=True)

        layer_tools.print_params(model, logger.info)
        self.model = model
        logger.info("Setup model!")

        # setup speech augmentation for wavlm
        if hasattr(config, 'audio_augment') and self.train:
            self.speech_augmentation = TimeDomainSpecAugment(
                **config.audio_augment)

        if not self.train:
            return

        train_config = config
        model_optim_type = train_config.model_optim
        model_optim_conf = train_config.model_optim_conf
        logger.info("optim_model:{},{}", model_optim_type, model_optim_conf)
        wavlm_optim_type = train_config.wavlm_optim
        wavlm_optim_conf = train_config.wavlm_optim_conf
        logger.info("optim_model:{},{}", wavlm_optim_type, wavlm_optim_conf)

        model_scheduler_type = train_config.model_scheduler
        model_scheduler_conf = train_config.model_scheduler_conf
        wavlm_scheduler_type = train_config.wavlm_scheduler
        wavlm_scheduler_conf = train_config.wavlm_scheduler_conf

        model_scheduler_args = dict(
            **{"learning_rate": model_optim_conf.lr,
               "verbose": False}, **(dict(model_scheduler_conf)))

        wavlm_scheduler_args = dict(
            **{"learning_rate": wavlm_optim_conf.lr,
               "verbose": False}, **(dict(wavlm_scheduler_conf)))

        model_lr_scheduler = LRSchedulerFactory.from_args(model_scheduler_type,
                                                          model_scheduler_args)
        wavlm_lr_scheduler = LRSchedulerFactory.from_args(wavlm_scheduler_type,
                                                          wavlm_scheduler_args)

        def optimizer_args(
                config,
                optim_type,
                optim_conf,
                parameters,
                lr_scheduler=None, ):
            optim_arg = dict(optim_conf)
            optim_arg.update({
                "learning_rate":
                lr_scheduler if lr_scheduler else optim_conf.lr,
                "parameters":
                parameters
            })
            return optim_arg

        model_optimizer_args = optimizer_args(config, model_optim_type,
                                              model_optim_conf, [{
                                                  'params':
                                                  model._layers.enc.parameters()
                                              }, {
                                                  'params':
                                                  model._layers.ctc.parameters()
                                              }] if self.parallel else [{
                                                  'params':
                                                  model.enc.parameters()
                                              }, {
                                                  'params':
                                                  model.ctc.parameters()
                                              }], model_lr_scheduler)
        # [{'params': model._layers.ctc.parameters()}] if self.parallel else [{'params': model.ctc.parameters()}], model_lr_scheduler)

        wavlm_optimizer_args = optimizer_args(
            config, wavlm_optim_type, wavlm_optim_conf,
            model._layers.wavlm.parameters()
            if self.parallel else model.wavlm.parameters(), wavlm_lr_scheduler)

        model_optimizer = OptimizerFactory.from_args(model_optim_type,
                                                     model_optimizer_args)
        wavlm_optimizer = OptimizerFactory.from_args(wavlm_optim_type,
                                                     wavlm_optimizer_args)

        self.model_optimizer = model_optimizer
        self.wavlm_optimizer = wavlm_optimizer
        self.model_lr_scheduler = model_lr_scheduler
        self.wavlm_lr_scheduler = wavlm_lr_scheduler
        logger.info("Setup optimizer/lr_scheduler!")


class WavLMASRTester(WavLMASRTrainer):
    def __init__(self, config, args):
        super().__init__(config, args)
        self.text_featurizer = TextFeaturizer(
            unit_type=config.unit_type, vocab=config.vocab_filepath)
        self.vocab_list = self.text_featurizer.vocab_list

    def id2token(self, texts, texts_len):
        """ ord() id to chr() chr """
        trans = []
        for text, n in zip(texts, texts_len):
            n = n.numpy().item()
            ids = text[:n]
            trans.append(self.text_featurizer.defeaturize(ids.numpy().tolist()))
        return trans

    def compute_metrics(self, id, audio, audio_len, texts, texts_len,
                        fout=None):
        decode_cfg = self.config.decode
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        errors_func = error_rate.char_errors if decode_cfg.error_rate_type == 'cer' else error_rate.word_errors
        error_rate_func = error_rate.cer if decode_cfg.error_rate_type == 'cer' else error_rate.wer

        start_time = time.time()
        target_transcripts = self.id2token(texts, texts_len)
        result_transcripts, result_tokenids = self.model.decode(
            audio,
            text_feature=self.text_featurizer,
            decoding_method=decode_cfg.decoding_method,
            beam_size=decode_cfg.beam_size)
        decode_time = time.time() - start_time

        for utt, target, result, rec_tids in zip(
                id, target_transcripts, result_transcripts, result_tokenids):
            errors, len_ref = errors_func(target, result)
            errors_sum += errors
            len_refs += len_ref
            num_ins += 1
            if fout:
                fout.write({
                    "utt": utt,
                    "refs": [target],
                    "hyps": [result],
                    "hyps_tokenid": [rec_tids],
                })
            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example error rate [%s] = %f" % (
                decode_cfg.error_rate_type, error_rate_func(target, result)))

        return dict(
            errors_sum=errors_sum,
            len_refs=len_refs,
            num_ins=num_ins,  # num examples
            error_rate=errors_sum / len_refs,
            error_rate_type=decode_cfg.error_rate_type,
            num_frames=audio_len.sum().numpy().item(),
            decode_time=decode_time)

    def sb_compute_metrics(self, id, sig, wrd, tokens, fout=None):
        decode_cfg = self.config.decode
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        errors_func = error_rate.char_errors if decode_cfg.error_rate_type == 'cer' else error_rate.word_errors
        error_rate_func = error_rate.cer if decode_cfg.error_rate_type == 'cer' else error_rate.wer
        start_time = time.time()
        target_transcripts = wrd
        result_transcripts, result_tokenids = self.model.decode(
            sig[0],
            text_feature=self.tokenizer,
            decoding_method=decode_cfg.decoding_method,
            beam_size=decode_cfg.beam_size,
            sb_pipeline=True)
        decode_time = time.time() - start_time

        for utt, target, result, rec_tids in zip(
                id, target_transcripts, result_transcripts, result_tokenids):
            errors, len_ref = errors_func(target, result)
            errors_sum += errors
            len_refs += len_ref
            num_ins += 1
            if fout:
                fout.write({
                    "utt": utt,
                    "refs": [target],
                    "hyps": [result],
                    "hyps_tokenid": [rec_tids],
                })
            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example error rate [%s] = %f" % (
                decode_cfg.error_rate_type, error_rate_func(target, result)))

        return dict(
            errors_sum=errors_sum,
            len_refs=len_refs,
            num_ins=num_ins,  # num examples
            error_rate=errors_sum / len_refs,
            error_rate_type=decode_cfg.error_rate_type,
            num_frames=sig[1].sum().numpy().item(),
            decode_time=decode_time)

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def test(self):
        logger.info(f"Test Total Examples: {len(self.test_loader.dataset)}")
        self.model.eval()

        error_rate_type = None
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        num_frames = 0.0
        num_time = 0.0
        # Initialized the decoder in model
        decode_cfg = self.config.decode
        vocab_list = self.vocab_list
        decode_batch_size = decode_cfg.decode_batch_size

        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                if self.use_sb:
                    metrics = self.sb_compute_metrics(**batch, fout=fout)
                else:
                    metrics = self.compute_metrics(*batch, fout=fout)
                num_frames += metrics['num_frames']
                num_time += metrics["decode_time"]
                errors_sum += metrics['errors_sum']
                len_refs += metrics['len_refs']
                num_ins += metrics['num_ins']
                error_rate_type = metrics['error_rate_type']
                rtf = num_time / (num_frames)
                logger.info(
                    "RTF: %f, Error rate [%s] (%d/?) = %f" %
                    (rtf, error_rate_type, num_ins, errors_sum / len_refs))

        # logging
        msg = "Test: "
        msg += "epoch: {}, ".format(self.epoch)
        msg += "step: {}, ".format(self.iteration)
        msg += "Final error rate [%s] (%d/%d) = %f" % (
            error_rate_type, num_ins, num_ins, errors_sum / len_refs)
        logger.info(msg)

        err_meta_path = os.path.splitext(self.args.result_file)[0] + '.err'
        err_type_str = "{}".format(error_rate_type)
        with open(err_meta_path, 'w', encoding='utf8') as f:
            data = json.dumps({
                "epoch":
                self.epoch,
                "step":
                self.iteration,
                "rtf":
                rtf,
                error_rate_type:
                errors_sum / len_refs,
                "dataset_hour": (num_frames) / 1000.0 / 3600.0,
                "process_hour":
                num_time / 1000.0 / 3600.0,
                "num_examples":
                num_ins,
                "err_sum":
                errors_sum,
                "ref_len":
                len_refs,
                "decode_method":
                self.config.decode.decoding_method,
            })
            f.write(data + '\n')


================================================
FILE: paddlespeech/s2t/exps/whisper/test_wav.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.∏
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from Whisper (https://github.com/openai/whisper/whisper/)
import os.path
import sys

import distutils
import numpy as np
import paddle
import soundfile
from yacs.config import CfgNode

from paddlespeech.s2t.models.whisper import log_mel_spectrogram
from paddlespeech.s2t.models.whisper import ModelDimensions
from paddlespeech.s2t.models.whisper import transcribe
from paddlespeech.s2t.models.whisper import Whisper
from paddlespeech.s2t.training.cli import default_argument_parser
from paddlespeech.s2t.utils.log import Log
from paddlespeech.utils.argparse import strtobool

logger = Log(__name__).getlog()


class WhisperInfer():
    def __init__(self, config, args):
        self.args = args
        self.config = config
        self.audio_file = args.audio_file

        paddle.set_device('gpu' if self.args.ngpu > 0 else 'cpu')
        config.pop("ngpu")

        #load_model
        model_dict = paddle.load(self.config.model_file)
        config.pop("model_file")
        dims = ModelDimensions(**model_dict["dims"])
        self.dims = dims
        self.model = Whisper(dims)
        self.model.load_dict(model_dict)

    def run(self):
        check(args.audio_file)

        with paddle.no_grad():
            temperature = config.pop("temperature")
            temperature_increment_on_fallback = config.pop(
                "temperature_increment_on_fallback")
            if temperature_increment_on_fallback is not None:
                temperature = tuple(
                    np.arange(temperature, 1.0 + 1e-6,
                              temperature_increment_on_fallback))
            else:
                temperature = [temperature]

            #load audio
            mel = log_mel_spectrogram(
                args.audio_file,
                resource_path=config.resource_path,
                n_mels=self.dims.n_mels,
                padding=480000)
            result = transcribe(
                self.model, mel, temperature=temperature, **config)
            if args.result_file is not None:
                with open(args.result_file, 'w') as f:
                    f.write(str(result))
            return result


def check(audio_file: str):
    if not os.path.isfile(audio_file):
        print("Please input the right audio file path")
        sys.exit(-1)

    logger.info("checking the audio file format......")
    try:
        _, sample_rate = soundfile.read(audio_file)
    except Exception as e:
        logger.error(str(e))
        logger.error(
            "can not open the wav file, please check the audio file format")
        sys.exit(-1)
    logger.info("The sample rate is %d" % sample_rate)
    assert (sample_rate == 16000)
    logger.info("The audio file format is right")


def main(config, args):
    WhisperInfer(config, args).run()


if __name__ == "__main__":
    parser = default_argument_parser()
    # save asr result to
    parser.add_argument(
        "--result_file", type=str, help="path of save the asr result")
    parser.add_argument(
        "--audio_file", type=str, help="path of the input audio file")
    parser.add_argument(
        "--debug", type=strtobool, default=False, help="for debug.")
    args = parser.parse_args()

    config = CfgNode(new_allowed=True)

    if args.config:
        config.merge_from_file(args.config)
    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    main(config, args)


================================================
FILE: paddlespeech/s2t/frontend/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/frontend/audio.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the audio segment class."""
import copy
import io
import random
import re
import struct

import numpy as np
import resampy
import soundfile
from scipy import signal

from .utility import convert_samples_from_float32
from .utility import convert_samples_to_float32
from .utility import subfile_from_tar


class AudioSegment():
    """Monaural audio segment abstraction.

    :param samples: Audio samples [num_samples x num_channels].
    :type samples: ndarray.float32
    :param sample_rate: Audio sample rate.
    :type sample_rate: int
    :raises TypeError: If the sample data type is not float or int.
    """

    def __init__(self, samples, sample_rate):
        """Create audio segment from samples.

        Samples are convert float32 internally, with int scaled to [-1, 1].
        """
        self._samples = self._convert_samples_to_float32(samples)
        self._sample_rate = sample_rate
        if self._samples.ndim >= 2:
            self._samples = np.mean(self._samples, 1)

    def __eq__(self, other):
        """Return whether two objects are equal."""
        if type(other) is not type(self):
            return False
        if self._sample_rate != other._sample_rate:
            return False
        if self._samples.shape != other._samples.shape:
            return False
        if np.any(self.samples != other._samples):
            return False
        return True

    def __ne__(self, other):
        """Return whether two objects are unequal."""
        return not self.__eq__(other)

    def __str__(self):
        """Return human-readable representation of segment."""
        return ("%s: num_samples=%d, sample_rate=%d, duration=%.2fsec, "
                "rms=%.2fdB" % (type(self), self.num_samples, self.sample_rate,
                                self.duration, self.rms_db))

    @classmethod
    def from_file(cls, file, infos=None):
        """Create audio segment from audio file.

        Args:
            filepath (str|file): Filepath or file object to audio file.
            infos (TarLocalData, optional): tar2obj and tar2infos. Defaults to None.

        Returns:
            AudioSegment: Audio segment instance.
        """
        if isinstance(file, str) and re.findall(r".seqbin_\d+$", file):
            return cls.from_sequence_file(file)
        elif isinstance(file, str) and file.startswith('tar:'):
            return cls.from_file(subfile_from_tar(file, infos))
        else:
            samples, sample_rate = soundfile.read(file, dtype='float32')
            return cls(samples, sample_rate)

    @classmethod
    def slice_from_file(cls, file, start=None, end=None):
        """Loads a small section of an audio without having to load
        the entire file into the memory which can be incredibly wasteful.

        :param file: Input audio filepath or file object.
        :type file: str|file
        :param start: Start time in seconds. If start is negative, it wraps
                      around from the end. If not provided, this function
                      reads from the very beginning.
        :type start: float
        :param end: End time in seconds. If end is negative, it wraps around
                    from the end. If not provided, the default behvaior is
                    to read to the end of the file.
        :type end: float
        :return: AudioSegment instance of the specified slice of the input
                 audio file.
        :rtype: AudioSegment
        :raise ValueError: If start or end is incorrectly set, e.g. out of
                           bounds in time.
        """
        sndfile = soundfile.SoundFile(file)
        sample_rate = sndfile.samplerate
        duration = float(len(sndfile)) / sample_rate
        start = 0. if start is None else start
        end = duration if end is None else end
        if start < 0.0:
            start += duration
        if end < 0.0:
            end += duration
        if start < 0.0:
            raise ValueError("The slice start position (%f s) is out of "
                             "bounds." % start)
        if end < 0.0:
            raise ValueError("The slice end position (%f s) is out of bounds." %
                             end)
        if start > end:
            raise ValueError("The slice start position (%f s) is later than "
                             "the slice end position (%f s)." % (start, end))
        if end > duration:
            raise ValueError("The slice end position (%f s) is out of bounds "
                             "(> %f s)" % (end, duration))
        start_frame = int(start * sample_rate)
        end_frame = int(end * sample_rate)
        sndfile.seek(start_frame)
        data = sndfile.read(frames=end_frame - start_frame, dtype='float32')
        return cls(data, sample_rate)

    @classmethod
    def from_sequence_file(cls, filepath):
        """Create audio segment from sequence file. Sequence file is a binary
        file containing a collection of multiple audio files, with several
        header bytes in the head indicating the offsets of each audio byte data
        chunk.

        The format is:

            4 bytes (int, version),
            4 bytes (int, num of utterance),
            4 bytes (int, bytes per header),
            [bytes_per_header*(num_utterance+1)] bytes (offsets for each audio),
            audio_bytes_data_of_1st_utterance,
            audio_bytes_data_of_2nd_utterance,
            ......

        Sequence file name must end with ".seqbin". And the filename of the 5th
        utterance's audio file in sequence file "xxx.seqbin" must be
        "xxx.seqbin_5", with "5" indicating the utterance index within this
        sequence file (starting from 1).

        :param filepath: Filepath of sequence file.
        :type filepath: str
        :return: Audio segment instance.
        :rtype: AudioSegment
        """
        # parse filepath
        matches = re.match(r"(.+\.seqbin)_(\d+)", filepath)
        if matches is None:
            raise IOError("File type of %s is not supported" % filepath)
        filename = matches.group(1)
        fileno = int(matches.group(2))

        # read headers
        f = io.open(filename, mode='rb', encoding='utf8')
        version = f.read(4)
        num_utterances = struct.unpack("i", f.read(4))[0]
        bytes_per_header = struct.unpack("i", f.read(4))[0]
        header_bytes = f.read(bytes_per_header * (num_utterances + 1))
        header = [
            struct.unpack("i", header_bytes[bytes_per_header * i:
                                            bytes_per_header * (i + 1)])[0]
            for i in range(num_utterances + 1)
        ]

        # read audio bytes
        f.seek(header[fileno - 1])
        audio_bytes = f.read(header[fileno] - header[fileno - 1])
        f.close()

        # create audio segment
        try:
            return cls.from_bytes(audio_bytes)
        except Exception as e:
            samples = np.frombuffer(audio_bytes, dtype='int16')
            return cls(samples=samples, sample_rate=8000)

    @classmethod
    def from_bytes(cls, bytes):
        """Create audio segment from a byte string containing audio samples.

        :param bytes: Byte string containing audio samples.
        :type bytes: str
        :return: Audio segment instance.
        :rtype: AudioSegment
        """
        samples, sample_rate = soundfile.read(
            io.BytesIO(bytes), dtype='float32')
        return cls(samples, sample_rate)

    @classmethod
    def from_pcm(cls, samples, sample_rate):
        """Create audio segment from a byte string containing audio samples.
        :param samples: Audio samples [num_samples x num_channels].
        :type samples: numpy.ndarray
        :param sample_rate: Audio sample rate.
        :type sample_rate: int
        :return: Audio segment instance.
        :rtype: AudioSegment
        """
        return cls(samples, sample_rate)

    @classmethod
    def concatenate(cls, *segments):
        """Concatenate an arbitrary number of audio segments together.

        :param *segments: Input audio segments to be concatenated.
        :type *segments: tuple of AudioSegment
        :return: Audio segment instance as concatenating results.
        :rtype: AudioSegment
        :raises ValueError: If the number of segments is zero, or if the
                            sample_rate of any segments does not match.
        :raises TypeError: If any segment is not AudioSegment instance.
        """
        # Perform basic sanity-checks.
        if len(segments) == 0:
            raise ValueError("No audio segments are given to concatenate.")
        sample_rate = segments[0]._sample_rate
        for seg in segments:
            if sample_rate != seg._sample_rate:
                raise ValueError("Can't concatenate segments with "
                                 "different sample rates")
            if type(seg) is not cls:
                raise TypeError("Only audio segments of the same type "
                                "can be concatenated.")
        samples = np.concatenate([seg.samples for seg in segments])
        return cls(samples, sample_rate)

    @classmethod
    def make_silence(cls, duration, sample_rate):
        """Creates a silent audio segment of the given duration and sample rate.

        :param duration: Length of silence in seconds.
        :type duration: float
        :param sample_rate: Sample rate.
        :type sample_rate: float
        :return: Silent AudioSegment instance of the given duration.
        :rtype: AudioSegment
        """
        samples = np.zeros(int(duration * sample_rate))
        return cls(samples, sample_rate)

    def to_wav_file(self, filepath, dtype='float32'):
        """Save audio segment to disk as wav file.

        :param filepath: WAV filepath or file object to save the
                         audio segment.
        :type filepath: str|file
        :param dtype: Subtype for audio file. Options: 'int16', 'int32',
                      'float32', 'float64'. Default is 'float32'.
        :type dtype: str
        :raises TypeError: If dtype is not supported.
        """
        samples = self._convert_samples_from_float32(self._samples, dtype)
        subtype_map = {
            'int16': 'PCM_16',
            'int32': 'PCM_32',
            'float32': 'FLOAT',
            'float64': 'DOUBLE'
        }
        soundfile.write(
            filepath,
            samples,
            self._sample_rate,
            format='WAV',
            subtype=subtype_map[dtype])

    def superimpose(self, other):
        """Add samples from another segment to those of this segment
        (sample-wise addition, not segment concatenation).

        Note that this is an in-place transformation.

        :param other: Segment containing samples to be added in.
        :type other: AudioSegments
        :raise TypeError: If type of two segments don't match.
        :raise ValueError: If the sample rates of the two segments are not
                           equal, or if the lengths of segments don't match.
        """
        if isinstance(other, type(self)):
            raise TypeError("Cannot add segments of different types: %s "
                            "and %s." % (type(self), type(other)))
        if self._sample_rate != other._sample_rate:
            raise ValueError("Sample rates must match to add segments.")
        if len(self._samples) != len(other._samples):
            raise ValueError("Segment lengths must match to add segments.")
        self._samples += other._samples

    def to_bytes(self, dtype='float32'):
        """Create a byte string containing the audio content.

        :param dtype: Data type for export samples. Options: 'int16', 'int32',
                      'float32', 'float64'. Default is 'float32'.
        :type dtype: str
        :return: Byte string containing audio content.
        :rtype: str
        """
        samples = self._convert_samples_from_float32(self._samples, dtype)
        return samples.tostring()

    def to(self, dtype='int16'):
        """Create a `dtype` audio content.

        :param dtype: Data type for export samples. Options: 'int16', 'int32',
                      'float32', 'float64'. Default is 'float32'.
        :type dtype: str
        :return: np.ndarray containing `dtype` audio content.
        :rtype: str
        """
        samples = self._convert_samples_from_float32(self._samples, dtype)
        return samples

    def gain_db(self, gain):
        """Apply gain in decibels to samples.

        Note that this is an in-place transformation.

        :param gain: Gain in decibels to apply to samples.
        :type gain: float|1darray
        """
        self._samples *= 10.**(gain / 20.)

    def change_speed(self, speed_rate):
        """Change the audio speed by linear interpolation.

        Note that this is an in-place transformation.

        :param speed_rate: Rate of speed change:
                           speed_rate > 1.0, speed up the audio;
                           speed_rate = 1.0, unchanged;
                           speed_rate < 1.0, slow down the audio;
                           speed_rate <= 0.0, not allowed, raise ValueError.
        :type speed_rate: float
        :raises ValueError: If speed_rate <= 0.0.
        """
        if speed_rate == 1.0:
            return
        if speed_rate <= 0:
            raise ValueError("speed_rate should be greater than zero.")

        # numpy
        # old_length = self._samples.shape[0]
        # new_length = int(old_length / speed_rate)
        # old_indices = np.arange(old_length)
        # new_indices = np.linspace(start=0, stop=old_length, num=new_length)
        # self._samples = np.interp(new_indices, old_indices, self._samples)

        # sox, slow
        try:
            import soxbindings as sox
        except ImportError:
            try:
                from paddlespeech.s2t.utils import dynamic_pip_install
                package = "sox"
                dynamic_pip_install.install(package)
                package = "soxbindings"
                dynamic_pip_install.install(package)
                import soxbindings as sox
            except Exception:
                raise RuntimeError(
                    "Can not install soxbindings on your system.")

        tfm = sox.Transformer()
        tfm.set_globals(multithread=False)
        tfm.speed(speed_rate)
        self._samples = tfm.build_array(
            input_array=self._samples,
            sample_rate_in=self._sample_rate).squeeze(-1).astype(
                np.float32).copy()

    def normalize(self, target_db=-20, max_gain_db=300.0):
        """Normalize audio to be of the desired RMS value in decibels.

        Note that this is an in-place transformation.

        :param target_db: Target RMS value in decibels. This value should be
                          less than 0.0 as 0.0 is full-scale audio.
        :type target_db: float
        :param max_gain_db: Max amount of gain in dB that can be applied for
                            normalization. This is to prevent nans when
                            attempting to normalize a signal consisting of
                            all zeros.
        :type max_gain_db: float
        :raises ValueError: If the required gain to normalize the segment to
                            the target_db value exceeds max_gain_db.
        """
        gain = target_db - self.rms_db
        if gain > max_gain_db:
            raise ValueError(
                "Unable to normalize segment to %f dB because the "
                "the probable gain have exceeds max_gain_db (%f dB)" %
                (target_db, max_gain_db))
        self.gain_db(min(max_gain_db, target_db - self.rms_db))

    def normalize_online_bayesian(self,
                                  target_db,
                                  prior_db,
                                  prior_samples,
                                  startup_delay=0.0):
        """Normalize audio using a production-compatible online/causal
        algorithm. This uses an exponential likelihood and gamma prior to
        make online estimates of the RMS even when there are very few samples.

        Note that this is an in-place transformation.

        :param target_db: Target RMS value in decibels.
        :type target_bd: float
        :param prior_db: Prior RMS estimate in decibels.
        :type prior_db: float
        :param prior_samples: Prior strength in number of samples.
        :type prior_samples: float
        :param startup_delay: Default 0.0s. If provided, this function will
                              accrue statistics for the first startup_delay
                              seconds before applying online normalization.
        :type startup_delay: float
        """
        # Estimate total RMS online.
        startup_sample_idx = min(self.num_samples - 1,
                                 int(self.sample_rate * startup_delay))
        prior_mean_squared = 10.**(prior_db / 10.)
        prior_sum_of_squares = prior_mean_squared * prior_samples
        cumsum_of_squares = np.cumsum(self.samples**2)
        sample_count = np.arange(self.num_samples) + 1
        if startup_sample_idx > 0:
            cumsum_of_squares[:startup_sample_idx] = \
                cumsum_of_squares[startup_sample_idx]
            sample_count[:startup_sample_idx] = \
                sample_count[startup_sample_idx]
        mean_squared_estimate = ((cumsum_of_squares + prior_sum_of_squares) /
                                 (sample_count + prior_samples))
        rms_estimate_db = 10 * np.log10(mean_squared_estimate)
        # Compute required time-varying gain.
        gain_db = target_db - rms_estimate_db
        self.gain_db(gain_db)

    def resample(self, target_sample_rate, filter='kaiser_best'):
        """Resample the audio to a target sample rate.

        Note that this is an in-place transformation.

        :param target_sample_rate: Target sample rate.
        :type target_sample_rate: int
        :param filter: The resampling filter to use one of {'kaiser_best',
                       'kaiser_fast'}.
        :type filter: str
        """
        self._samples = resampy.resample(
            self.samples, self.sample_rate, target_sample_rate, filter=filter)
        self._sample_rate = target_sample_rate

    def pad_silence(self, duration, sides='both'):
        """Pad this audio sample with a period of silence.

        Note that this is an in-place transformation.

        :param duration: Length of silence in seconds to pad.
        :type duration: float
        :param sides: Position for padding:
                     'beginning' - adds silence in the beginning;
                     'end' - adds silence in the end;
                     'both' - adds silence in both the beginning and the end.
        :type sides: str
        :raises ValueError: If sides is not supported.
        """
        if duration == 0.0:
            return self
        cls = type(self)
        silence = self.make_silence(duration, self._sample_rate)
        if sides == "beginning":
            padded = cls.concatenate(silence, self)
        elif sides == "end":
            padded = cls.concatenate(self, silence)
        elif sides == "both":
            padded = cls.concatenate(silence, self, silence)
        else:
            raise ValueError("Unknown value for the sides %s" % sides)
        self._samples = padded._samples

    def shift(self, shift_ms):
        """Shift the audio in time. If `shift_ms` is positive, shift with time
        advance; if negative, shift with time delay. Silence are padded to
        keep the duration unchanged.

        Note that this is an in-place transformation.

        :param shift_ms: Shift time in millseconds. If positive, shift with
                         time advance; if negative; shift with time delay.
        :type shift_ms: float
        :raises ValueError: If shift_ms is longer than audio duration.
        """
        if abs(shift_ms) / 1000.0 > self.duration:
            raise ValueError("Absolute value of shift_ms should be smaller "
                             "than audio duration.")
        shift_samples = int(shift_ms * self._sample_rate / 1000)
        if shift_samples > 0:
            # time advance
            self._samples[:-shift_samples] = self._samples[shift_samples:]
            self._samples[-shift_samples:] = 0
        elif shift_samples < 0:
            # time delay
            self._samples[-shift_samples:] = self._samples[:shift_samples]
            self._samples[:-shift_samples] = 0

    def subsegment(self, start_sec=None, end_sec=None):
        """Cut the AudioSegment between given boundaries.

        Note that this is an in-place transformation.

        :param start_sec: Beginning of subsegment in seconds.
        :type start_sec: float
        :param end_sec: End of subsegment in seconds.
        :type end_sec: float
        :raise ValueError: If start_sec or end_sec is incorrectly set, e.g. out
                           of bounds in time.
        """
        start_sec = 0.0 if start_sec is None else start_sec
        end_sec = self.duration if end_sec is None else end_sec
        if start_sec < 0.0:
            start_sec = self.duration + start_sec
        if end_sec < 0.0:
            end_sec = self.duration + end_sec
        if start_sec < 0.0:
            raise ValueError("The slice start position (%f s) is out of "
                             "bounds." % start_sec)
        if end_sec < 0.0:
            raise ValueError("The slice end position (%f s) is out of bounds." %
                             end_sec)
        if start_sec > end_sec:
            raise ValueError("The slice start position (%f s) is later than "
                             "the end position (%f s)." % (start_sec, end_sec))
        if end_sec > self.duration:
            raise ValueError("The slice end position (%f s) is out of bounds "
                             "(> %f s)" % (end_sec, self.duration))
        start_sample = int(round(start_sec * self._sample_rate))
        end_sample = int(round(end_sec * self._sample_rate))
        self._samples = self._samples[start_sample:end_sample]

    def random_subsegment(self, subsegment_length, rng=None):
        """Cut the specified length of the audiosegment randomly.

        Note that this is an in-place transformation.

        :param subsegment_length: Subsegment length in seconds.
        :type subsegment_length: float
        :param rng: Random number generator state.
        :type rng: random.Random
        :raises ValueError: If the length of subsegment is greater than
                            the origineal segemnt.
        """
        rng = random.Random() if rng is None else rng
        if subsegment_length > self.duration:
            raise ValueError("Length of subsegment must not be greater "
                             "than original segment.")
        start_time = rng.uniform(0.0, self.duration - subsegment_length)
        self.subsegment(start_time, start_time + subsegment_length)

    def convolve(self, impulse_segment, allow_resample=False):
        """Convolve this audio segment with the given impulse segment.

        Note that this is an in-place transformation.

        :param impulse_segment: Impulse response segments.
        :type impulse_segment: AudioSegment
        :param allow_resample: Indicates whether resampling is allowed when
                               the impulse_segment has a different sample
                               rate from this signal.
        :type allow_resample: bool
        :raises ValueError: If the sample rate is not match between two
                            audio segments when resample is not allowed.
        """
        if allow_resample and self.sample_rate != impulse_segment.sample_rate:
            impulse_segment.resample(self.sample_rate)
        if self.sample_rate != impulse_segment.sample_rate:
            raise ValueError("Impulse segment's sample rate (%d Hz) is not "
                             "equal to base signal sample rate (%d Hz)." %
                             (impulse_segment.sample_rate, self.sample_rate))
        samples = signal.fftconvolve(self.samples, impulse_segment.samples,
                                     "full")
        self._samples = samples

    def convolve_and_normalize(self, impulse_segment, allow_resample=False):
        """Convolve and normalize the resulting audio segment so that it
        has the same average power as the input signal.

        Note that this is an in-place transformation.

        :param impulse_segment: Impulse response segments.
        :type impulse_segment: AudioSegment
        :param allow_resample: Indicates whether resampling is allowed when
                               the impulse_segment has a different sample
                               rate from this signal.
        :type allow_resample: bool
        """
        target_db = self.rms_db
        self.convolve(impulse_segment, allow_resample=allow_resample)
        self.normalize(target_db)

    def add_noise(self,
                  noise,
                  snr_dB,
                  allow_downsampling=False,
                  max_gain_db=300.0,
                  rng=None):
        """Add the given noise segment at a specific signal-to-noise ratio.
        If the noise segment is longer than this segment, a random subsegment
        of matching length is sampled from it and used instead.

        Note that this is an in-place transformation.

        :param noise: Noise signal to add.
        :type noise: AudioSegment
        :param snr_dB: Signal-to-Noise Ratio, in decibels.
        :type snr_dB: float
        :param allow_downsampling: Whether to allow the noise signal to be
                                   downsampled to match the base signal sample
                                   rate.
        :type allow_downsampling: bool
        :param max_gain_db: Maximum amount of gain to apply to noise signal
                            before adding it in. This is to prevent attempting
                            to apply infinite gain to a zero signal.
        :type max_gain_db: float
        :param rng: Random number generator state.
        :type rng: None|random.Random
        :raises ValueError: If the sample rate does not match between the two
                            audio segments when downsampling is not allowed, or
                            if the duration of noise segments is shorter than
                            original audio segments.
        """
        rng = random.Random() if rng is None else rng
        if allow_downsampling and noise.sample_rate > self.sample_rate:
            noise = noise.resample(self.sample_rate)
        if noise.sample_rate != self.sample_rate:
            raise ValueError("Noise sample rate (%d Hz) is not equal to base "
                             "signal sample rate (%d Hz)." % (noise.sample_rate,
                                                              self.sample_rate))
        if noise.duration < self.duration:
            raise ValueError("Noise signal (%f sec) must be at least as long as"
                             " base signal (%f sec)." %
                             (noise.duration, self.duration))
        noise_gain_db = min(self.rms_db - noise.rms_db - snr_dB, max_gain_db)
        noise_new = copy.deepcopy(noise)
        noise_new.random_subsegment(self.duration, rng=rng)
        noise_new.gain_db(noise_gain_db)
        self.superimpose(noise_new)

    @property
    def samples(self):
        """Return audio samples.

        :return: Audio samples.
        :rtype: ndarray
        """
        return self._samples.copy()

    @property
    def sample_rate(self):
        """Return audio sample rate.

        :return: Audio sample rate.
        :rtype: int
        """
        return self._sample_rate

    @property
    def num_samples(self):
        """Return number of samples.

        :return: Number of samples.
        :rtype: int
        """
        return self._samples.shape[0]

    @property
    def duration(self):
        """Return audio duration.

        :return: Audio duration in seconds.
        :rtype: float
        """
        return self._samples.shape[0] / float(self._sample_rate)

    @property
    def rms_db(self):
        """Return root mean square energy of the audio in decibels.

        :return: Root mean square energy in decibels.
        :rtype: float
        """
        # square root => multiply by 10 instead of 20 for dBs
        mean_square = np.mean(self._samples**2)
        return 10 * np.log10(mean_square)

    def _convert_samples_to_float32(self, samples):
        """Convert sample type to float32.

        Audio sample type is usually integer or float-point.
        Integers will be scaled to [-1, 1] in float32.
        """
        return convert_samples_to_float32(samples)

    def _convert_samples_from_float32(self, samples, dtype):
        """Convert sample type from float32 to dtype.

        Audio sample type is usually integer or float-point. For integer
        type, float32 will be rescaled from [-1, 1] to the maximum range
        supported by the integer type.

        This is for writing a audio file.
        """
        return convert_samples_from_float32(samples, dtype)


================================================
FILE: paddlespeech/s2t/frontend/augmentor/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/frontend/augmentor/augmentation.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the data augmentation pipeline."""
import json
import os
from collections.abc import Sequence
from inspect import signature
from pprint import pformat

import numpy as np

from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase
from paddlespeech.s2t.utils.dynamic_import import dynamic_import
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ["AugmentationPipeline"]

import_alias = dict(
    volume="paddlespeech.s2t.frontend.augmentor.impulse_response:VolumePerturbAugmentor",
    shift="paddlespeech.s2t.frontend.augmentor.shift_perturb:ShiftPerturbAugmentor",
    speed="paddlespeech.s2t.frontend.augmentor.speed_perturb:SpeedPerturbAugmentor",
    resample="paddlespeech.s2t.frontend.augmentor.resample:ResampleAugmentor",
    bayesian_normal="paddlespeech.s2t.frontend.augmentor.online_bayesian_normalization:OnlineBayesianNormalizationAugmentor",
    noise="paddlespeech.s2t.frontend.augmentor.noise_perturb:NoisePerturbAugmentor",
    impulse="paddlespeech.s2t.frontend.augmentor.impulse_response:ImpulseResponseAugmentor",
    specaug="paddlespeech.s2t.frontend.augmentor.spec_augment:SpecAugmentor", )


class AugmentationPipeline():
    """Build a pre-processing pipeline with various augmentation models.Such a
    data augmentation pipeline is oftern leveraged to augment the training
    samples to make the model invariant to certain types of perturbations in the
    real world, improving model's generalization ability.

    The pipeline is built according to the augmentation configuration in json
    string, e.g.
    
    .. code-block::

        [ {
                "type": "noise",
                "params": {"min_snr_dB": 10,
                           "max_snr_dB": 20,
                           "noise_manifest_path": "datasets/manifest.noise"},
                "prob": 0.0
            },
            {
                "type": "speed",
                "params": {"min_speed_rate": 0.9,
                           "max_speed_rate": 1.1},
                "prob": 1.0
            },
            {
                "type": "shift",
                "params": {"min_shift_ms": -5,
                           "max_shift_ms": 5},
                "prob": 1.0
            },
            {
                "type": "volume",
                "params": {"min_gain_dBFS": -10,
                           "max_gain_dBFS": 10},
                "prob": 0.0
            },
            {
                "type": "bayesian_normal",
                "params": {"target_db": -20,
                           "prior_db": -20,
                           "prior_samples": 100},
                "prob": 0.0
            }
        ]
        
    This augmentation configuration inserts two augmentation models
    into the pipeline, with one is VolumePerturbAugmentor and the other
    SpeedPerturbAugmentor. "prob" indicates the probability of the current
    augmentor to take effect. If "prob" is zero, the augmentor does not take
    effect.

    Params:
        preprocess_conf(str): Augmentation configuration in `json file` or `json string`.
        random_seed(int): Random seed.
    
    Raises:
        ValueError: If the augmentation json config is in incorrect format".
    """

    SPEC_TYPES = {'specaug'}

    def __init__(self, preprocess_conf: str, random_seed: int=0):
        self._rng = np.random.RandomState(random_seed)
        self.conf = {'mode': 'sequential', 'process': []}
        if preprocess_conf:
            if os.path.isfile(preprocess_conf):
                # json file
                with open(preprocess_conf, 'r') as fin:
                    json_string = fin.read()
            else:
                # json string
                json_string = preprocess_conf
            process = json.loads(json_string)
            self.conf['process'] += process

        self._augmentors, self._rates = self._parse_pipeline_from('all')
        self._audio_augmentors, self._audio_rates = self._parse_pipeline_from(
            'audio')
        self._spec_augmentors, self._spec_rates = self._parse_pipeline_from(
            'feature')
        logger.info(
            f"Augmentation: {pformat(list(zip(self._augmentors, self._rates)))}")

    def __call__(self, xs, uttid_list=None, **kwargs):
        if not isinstance(xs, Sequence):
            is_batch = False
            xs = [xs]
        else:
            is_batch = True

        if isinstance(uttid_list, str):
            uttid_list = [uttid_list for _ in range(len(xs))]

        if self.conf.get("mode", "sequential") == "sequential":
            for idx, (func, rate) in enumerate(
                    zip(self._augmentors, self._rates), 0):
                if self._rng.uniform(0., 1.) >= rate:
                    continue

                # Derive only the args which the func has
                try:
                    param = signature(func).parameters
                except ValueError:
                    # Some function, e.g. built-in function, are failed
                    param = {}
                _kwargs = {k: v for k, v in kwargs.items() if k in param}

                try:
                    if uttid_list is not None and "uttid" in param:
                        xs = [
                            func(x, u, **_kwargs)
                            for x, u in zip(xs, uttid_list)
                        ]
                    else:
                        xs = [func(x, **_kwargs) for x in xs]
                except Exception:
                    logger.fatal("Catch a exception from {}th func: {}".format(
                        idx, func))
                    raise
        else:
            raise NotImplementedError(
                "Not supporting mode={}".format(self.conf["mode"]))

        if is_batch:
            return xs
        else:
            return xs[0]

    def transform_audio(self, audio_segment):
        """Run the pre-processing pipeline for data augmentation.

        Note that this is an in-place transformation.
        
        :param audio_segment: Audio segment to process.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
        for augmentor, rate in zip(self._audio_augmentors, self._audio_rates):
            if self._rng.uniform(0., 1.) < rate:
                augmentor.transform_audio(audio_segment)

    def transform_feature(self, spec_segment):
        """spectrogram augmentation.
         
        Args:
            spec_segment (np.ndarray): audio feature, (D, T).
        """
        for augmentor, rate in zip(self._spec_augmentors, self._spec_rates):
            if self._rng.uniform(0., 1.) < rate:
                spec_segment = augmentor.transform_feature(spec_segment)
        return spec_segment

    def _parse_pipeline_from(self, aug_type='all'):
        """Parse the config json to build a augmentation pipelien."""
        assert aug_type in ('audio', 'feature', 'all'), aug_type
        audio_confs = []
        feature_confs = []
        all_confs = []
        for config in self.conf['process']:
            all_confs.append(config)
            if config["type"] in self.SPEC_TYPES:
                feature_confs.append(config)
            else:
                audio_confs.append(config)

        if aug_type == 'audio':
            aug_confs = audio_confs
        elif aug_type == 'feature':
            aug_confs = feature_confs
        elif aug_type == 'all':
            aug_confs = all_confs
        else:
            raise ValueError(f"Not support: {aug_type}")

        augmentors = [
            self._get_augmentor(config["type"], config["params"])
            for config in aug_confs
        ]
        rates = [config["prob"] for config in aug_confs]
        return augmentors, rates

    def _get_augmentor(self, augmentor_type, params):
        """Return an augmentation model by the type name, and pass in params."""
        class_obj = dynamic_import(augmentor_type, import_alias)
        assert issubclass(class_obj, AugmentorBase)
        try:
            obj = class_obj(self._rng, **params)
        except Exception:
            raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
        return obj


================================================
FILE: paddlespeech/s2t/frontend/augmentor/base.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the abstract base class for augmentation models."""
from abc import ABCMeta
from abc import abstractmethod


class AugmentorBase():
    """Abstract base class for augmentation model (augmentor) class.
    All augmentor classes should inherit from this class, and implement the
    following abstract methods.
    """

    __metaclass__ = ABCMeta

    @abstractmethod
    def __init__(self):
        pass

    @abstractmethod
    def __call__(self, xs):
        raise NotImplementedError("AugmentorBase: Not impl __call__")

    @abstractmethod
    def transform_audio(self, audio_segment):
        """Adds various effects to the input audio segment. Such effects
        will augment the training data to make the model invariant to certain
        types of perturbations in the real world, improving model's
        generalization ability.
        
        Note that this is an in-place transformation.

        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
        raise NotImplementedError("AugmentorBase: Not impl transform_audio")

    @abstractmethod
    def transform_feature(self, spec_segment):
        """Adds various effects to the input audo feature segment. Such effects
        will augment the training data to make the model invariant to certain
        types of time_mask or freq_mask in the real world, improving model's
        generalization ability.
        
        Args:
            spec_segment (Spectrogram): Spectrogram segment to add effects to.
        """
        raise NotImplementedError("AugmentorBase: Not impl transform_feature")


================================================
FILE: paddlespeech/s2t/frontend/augmentor/impulse_response.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the impulse response augmentation model."""
import jsonlines

from paddlespeech.s2t.frontend.audio import AudioSegment
from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase


class ImpulseResponseAugmentor(AugmentorBase):
    """Augmentation model for adding impulse response effect.

    :param rng: Random generator object.
    :type rng: random.Random
    :param impulse_manifest_path: Manifest path for impulse audio data.
    :type impulse_manifest_path: str
    """

    def __init__(self, rng, impulse_manifest_path):
        self._rng = rng
        with jsonlines.open(impulse_manifest_path, 'r') as reader:
            self._impulse_manifest = list(reader)

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x
        self.transform_audio(x)
        return x

    def transform_audio(self, audio_segment):
        """Add impulse response effect.

        Note that this is an in-place transformation.

        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
        impulse_json = self._rng.choice(
            self._impulse_manifest, 1, replace=False)[0]
        impulse_segment = AudioSegment.from_file(impulse_json['audio_filepath'])
        audio_segment.convolve(impulse_segment, allow_resample=True)


================================================
FILE: paddlespeech/s2t/frontend/augmentor/noise_perturb.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the noise perturb augmentation model."""
import jsonlines

from paddlespeech.s2t.frontend.audio import AudioSegment
from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase


class NoisePerturbAugmentor(AugmentorBase):
    """Augmentation model for adding background noise.

    :param rng: Random generator object.
    :type rng: random.Random
    :param min_snr_dB: Minimal signal noise ratio, in decibels.
    :type min_snr_dB: float
    :param max_snr_dB: Maximal signal noise ratio, in decibels.
    :type max_snr_dB: float
    :param noise_manifest_path: Manifest path for noise audio data.
    :type noise_manifest_path: str
    """

    def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path):
        self._min_snr_dB = min_snr_dB
        self._max_snr_dB = max_snr_dB
        self._rng = rng
        with jsonlines.open(noise_manifest_path, 'r') as reader:
            self._noise_manifest = list(reader)

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x
        self.transform_audio(x)
        return x

    def transform_audio(self, audio_segment):
        """Add background noise audio.

        Note that this is an in-place transformation.

        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
        noise_json = self._rng.choice(self._noise_manifest, 1, replace=False)[0]
        if noise_json['duration'] < audio_segment.duration:
            raise RuntimeError("The duration of sampled noise audio is smaller "
                               "than the audio segment to add effects to.")
        diff_duration = noise_json['duration'] - audio_segment.duration
        start = self._rng.uniform(0, diff_duration)
        end = start + audio_segment.duration
        noise_segment = AudioSegment.slice_from_file(
            noise_json['audio_filepath'], start=start, end=end)
        snr_dB = self._rng.uniform(self._min_snr_dB, self._max_snr_dB)
        audio_segment.add_noise(
            noise_segment, snr_dB, allow_downsampling=True, rng=self._rng)


================================================
FILE: paddlespeech/s2t/frontend/augmentor/online_bayesian_normalization.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contain the online bayesian normalization augmentation model."""
from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase


class OnlineBayesianNormalizationAugmentor(AugmentorBase):
    """Augmentation model for adding online bayesian normalization.

    :param rng: Random generator object.
    :type rng: random.Random
    :param target_db: Target RMS value in decibels.
    :type target_db: float
    :param prior_db: Prior RMS estimate in decibels.
    :type prior_db: float
    :param prior_samples: Prior strength in number of samples.
    :type prior_samples: int
    :param startup_delay: Default 0.0s. If provided, this function will
                          accrue statistics for the first startup_delay 
                          seconds before applying online normalization.
    :type starup_delay: float.
    """

    def __init__(self,
                 rng,
                 target_db,
                 prior_db,
                 prior_samples,
                 startup_delay=0.0):
        self._target_db = target_db
        self._prior_db = prior_db
        self._prior_samples = prior_samples
        self._rng = rng
        self._startup_delay = startup_delay

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x
        self.transform_audio(x)
        return x

    def transform_audio(self, audio_segment):
        """Normalizes the input audio using the online Bayesian approach.

        Note that this is an in-place transformation.

        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegment|SpeechSegment
        """
        audio_segment.normalize_online_bayesian(self._target_db, self._prior_db,
                                                self._prior_samples,
                                                self._startup_delay)


================================================
FILE: paddlespeech/s2t/frontend/augmentor/resample.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contain the resample augmentation model."""
from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase


class ResampleAugmentor(AugmentorBase):
    """Augmentation model for resampling.

    See more info here:
    https://ccrma.stanford.edu/~jos/resample/index.html
    
    :param rng: Random generator object.
    :type rng: random.Random
    :param new_sample_rate: New sample rate in Hz.
    :type new_sample_rate: int
    """

    def __init__(self, rng, new_sample_rate):
        self._new_sample_rate = new_sample_rate
        self._rng = rng

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x
        self.transform_audio(x)
        return x

    def transform_audio(self, audio_segment):
        """Resamples the input audio to a target sample rate.

        Note that this is an in-place transformation.

        :param audio: Audio segment to add effects to.
        :type audio: AudioSegment|SpeechSegment
        """
        audio_segment.resample(self._new_sample_rate)


================================================
FILE: paddlespeech/s2t/frontend/augmentor/shift_perturb.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the volume perturb augmentation model."""
from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase


class ShiftPerturbAugmentor(AugmentorBase):
    """Augmentation model for adding random shift perturbation.
    
    :param rng: Random generator object.
    :type rng: random.Random
    :param min_shift_ms: Minimal shift in milliseconds.
    :type min_shift_ms: float
    :param max_shift_ms: Maximal shift in milliseconds.
    :type max_shift_ms: float
    """

    def __init__(self, rng, min_shift_ms, max_shift_ms):
        self._min_shift_ms = min_shift_ms
        self._max_shift_ms = max_shift_ms
        self._rng = rng

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x
        self.transform_audio(x)
        return x

    def transform_audio(self, audio_segment):
        """Shift audio.

        Note that this is an in-place transformation.

        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
        shift_ms = self._rng.uniform(self._min_shift_ms, self._max_shift_ms)
        audio_segment.shift(shift_ms)


================================================
FILE: paddlespeech/s2t/frontend/augmentor/spec_augment.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the volume perturb augmentation model."""
import random

import numpy as np
from PIL import Image

from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()


class SpecAugmentor(AugmentorBase):
    """Augmentation model for Time warping, Frequency masking, Time masking.

    SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition
        https://arxiv.org/abs/1904.08779

    SpecAugment on Large Scale Datasets
        https://arxiv.org/abs/1912.05533

    """

    def __init__(self,
                 rng,
                 F,
                 T,
                 n_freq_masks,
                 n_time_masks,
                 p=1.0,
                 W=40,
                 adaptive_number_ratio=0,
                 adaptive_size_ratio=0,
                 max_n_time_masks=20,
                 replace_with_zero=True,
                 warp_mode='PIL'):
        """SpecAugment class.
        Args:
            rng (random.Random): random generator object.
            F (int): parameter for frequency masking
            T (int): parameter for time masking
            n_freq_masks (int): number of frequency masks
            n_time_masks (int): number of time masks
            p (float): parameter for upperbound of the time mask
            W (int): parameter for time warping
            adaptive_number_ratio (float): adaptive multiplicity ratio for time masking
            adaptive_size_ratio (float): adaptive size ratio for time masking
            max_n_time_masks (int): maximum number of time masking
            replace_with_zero (bool): pad zero on mask if true else use mean
            warp_mode (str):  "PIL" (default, fast, not differentiable)
                 or "sparse_image_warp" (slow, differentiable)
        """
        super().__init__()
        self._rng = rng
        self.inplace = True
        self.replace_with_zero = replace_with_zero

        self.mode = warp_mode
        self.W = W
        self.F = F
        self.T = T
        self.n_freq_masks = n_freq_masks
        self.n_time_masks = n_time_masks
        self.p = p

        # adaptive SpecAugment
        self.adaptive_number_ratio = adaptive_number_ratio
        self.adaptive_size_ratio = adaptive_size_ratio
        self.max_n_time_masks = max_n_time_masks

        if adaptive_number_ratio > 0:
            self.n_time_masks = 0
            logger.info('n_time_masks is set ot zero for adaptive SpecAugment.')
        if adaptive_size_ratio > 0:
            self.T = 0
            logger.info('T is set to zero for adaptive SpecAugment.')

        self._freq_mask = None
        self._time_mask = None

    def librispeech_basic(self):
        self.W = 80
        self.F = 27
        self.T = 100
        self.n_freq_masks = 1
        self.n_time_masks = 1
        self.p = 1.0

    def librispeech_double(self):
        self.W = 80
        self.F = 27
        self.T = 100
        self.n_freq_masks = 2
        self.n_time_masks = 2
        self.p = 1.0

    def switchboard_mild(self):
        self.W = 40
        self.F = 15
        self.T = 70
        self.n_freq_masks = 2
        self.n_time_masks = 2
        self.p = 0.2

    def switchboard_strong(self):
        self.W = 40
        self.F = 27
        self.T = 70
        self.n_freq_masks = 2
        self.n_time_masks = 2
        self.p = 0.2

    @property
    def freq_mask(self):
        return self._freq_mask

    @property
    def time_mask(self):
        return self._time_mask

    def __repr__(self):
        return f"specaug: F-{self.F}, T-{self.T}, F-n-{self.n_freq_masks}, T-n-{self.n_time_masks}"

    def time_warp(self, x, mode='PIL'):
        """time warp for spec augment
        move random center frame by the random width ~ uniform(-window, window)

        Args:
            x (np.ndarray): spectrogram (time, freq)
            mode (str): PIL or sparse_image_warp

        Raises:
            NotImplementedError: [description]
            NotImplementedError: [description]

        Returns:
            np.ndarray: time warped spectrogram (time, freq)
        """
        window = max_time_warp = self.W
        if window == 0:
            return x

        if mode == "PIL":
            t = x.shape[0]
            if t - window <= window:
                return x
            # NOTE: randrange(a, b) emits a, a + 1, ..., b - 1
            center = random.randrange(window, t - window)
            warped = random.randrange(center - window, center +
                                      window) + 1  # 1 ... t - 1

            left = Image.fromarray(x[:center]).resize((x.shape[1], warped),
                                                      Image.BICUBIC)
            right = Image.fromarray(x[center:]).resize((x.shape[1], t - warped),
                                                       Image.BICUBIC)
            if self.inplace:
                x[:warped] = left
                x[warped:] = right
                return x
            return np.concatenate((left, right), 0)
        elif mode == "sparse_image_warp":
            raise NotImplementedError('sparse_image_warp')
        else:
            raise NotImplementedError(
                "unknown resize mode: " + mode +
                ", choose one from (PIL, sparse_image_warp).")

    def mask_freq(self, x, replace_with_zero=False):
        """freq mask

        Args:
            x (np.ndarray): spectrogram (time, freq)
            replace_with_zero (bool, optional): Defaults to False.

        Returns:
            np.ndarray: freq mask spectrogram (time, freq)
        """
        n_bins = x.shape[1]
        for i in range(0, self.n_freq_masks):
            f = int(self._rng.uniform(low=0, high=self.F))
            f_0 = int(self._rng.uniform(low=0, high=n_bins - f))
            assert f_0 <= f_0 + f
            if replace_with_zero:
                x[:, f_0:f_0 + f] = 0
            else:
                x[:, f_0:f_0 + f] = x.mean()
            self._freq_mask = (f_0, f_0 + f)
        return x

    def mask_time(self, x, replace_with_zero=False):
        """time mask

        Args:
            x (np.ndarray): spectrogram (time, freq)
            replace_with_zero (bool, optional): Defaults to False.

        Returns:
            np.ndarray: time mask spectrogram (time, freq)
        """
        n_frames = x.shape[0]

        if self.adaptive_number_ratio > 0:
            n_masks = int(n_frames * self.adaptive_number_ratio)
            n_masks = min(n_masks, self.max_n_time_masks)
        else:
            n_masks = self.n_time_masks

        if self.adaptive_size_ratio > 0:
            T = self.adaptive_size_ratio * n_frames
        else:
            T = self.T

        for i in range(n_masks):
            t = int(self._rng.uniform(low=0, high=T))
            t = min(t, int(n_frames * self.p))
            t_0 = int(self._rng.uniform(low=0, high=n_frames - t))
            assert t_0 <= t_0 + t
            if replace_with_zero:
                x[t_0:t_0 + t, :] = 0
            else:
                x[t_0:t_0 + t, :] = x.mean()
            self._time_mask = (t_0, t_0 + t)
        return x

    def __call__(self, x, train=True):
        if not train:
            return x
        return self.transform_feature(x)

    def transform_feature(self, x: np.ndarray):
        """
        Args:
            x (np.ndarray): `[T, F]`
        Returns:
            x (np.ndarray): `[T, F]`
        """
        assert isinstance(x, np.ndarray)
        assert x.ndim == 2
        x = self.time_warp(x, self.mode)
        x = self.mask_freq(x, self.replace_with_zero)
        x = self.mask_time(x, self.replace_with_zero)
        return x


================================================
FILE: paddlespeech/s2t/frontend/augmentor/speed_perturb.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contain the speech perturbation augmentation model."""
import numpy as np

from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase


class SpeedPerturbAugmentor(AugmentorBase):
    """Augmentation model for adding speed perturbation."""

    def __init__(self, rng, min_speed_rate=0.9, max_speed_rate=1.1,
                 num_rates=3):
        """speed perturbation.
        
        The speed perturbation in kaldi uses sox-speed instead of sox-tempo,
        and sox-speed just to resample the input,
        i.e pitch and tempo are changed both.

        "Why use speed option instead of tempo -s in SoX for speed perturbation"
        https://groups.google.com/forum/#!topic/kaldi-help/8OOG7eE4sZ8
    
        Sox speed:
        https://pysox.readthedocs.io/en/latest/api.html#sox.transform.Transformer
        
        See reference paper here:
        http://www.danielpovey.com/files/2015_interspeech_augmentation.pdf
        
        Espnet:
        https://espnet.github.io/espnet/_modules/espnet/transform/perturb.html
        
        Nemo:
        https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/parts/perturb.py#L92

        Args:
            rng (random.Random): Random generator object.
            min_speed_rate (float): Lower bound of new speed rate to sample and should
                not be smaller than 0.9.
            max_speed_rate (float): Upper bound of new speed rate to sample and should
                not be larger than 1.1.
            num_rates (int, optional): Number of discrete rates to allow. 
                Can be a positive or negative integer. Defaults to 3.
                If a positive integer greater than 0 is provided, the range of
                speed rates will be discretized into `num_rates` values.
                If a negative integer or 0 is provided, the full range of speed rates
                will be sampled uniformly.
                Note: If a positive integer is provided and the resultant discretized
                range of rates contains the value '1.0', then those samples with rate=1.0,
                will not be augmented at all and simply skipped. This is to unnecessary
                augmentation and increase computation time. Effective augmentation chance
                in such a case is = `prob * (num_rates - 1 / num_rates) * 100`% chance
                where `prob` is the global probability of a sample being augmented.

        Raises:
            ValueError: when speed_rate error
        """
        if min_speed_rate < 0.9:
            raise ValueError(
                "Sampling speed below 0.9 can cause unnatural effects")
        if max_speed_rate > 1.1:
            raise ValueError(
                "Sampling speed above 1.1 can cause unnatural effects")
        self._min_rate = min_speed_rate
        self._max_rate = max_speed_rate
        self._rng = rng
        self._num_rates = num_rates
        if num_rates > 0:
            self._rates = np.linspace(
                self._min_rate, self._max_rate, self._num_rates, endpoint=True)

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x
        self.transform_audio(x)
        return x

    def transform_audio(self, audio_segment):
        """Sample a new speed rate from the given range and
        changes the speed of the given audio clip.

        Note that this is an in-place transformation.

        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegment|SpeechSegment
        """
        if self._num_rates < 0:
            speed_rate = self._rng.uniform(self._min_rate, self._max_rate)
        else:
            speed_rate = self._rng.choice(self._rates)

        # Skip perturbation in case of identity speed rate
        if speed_rate == 1.0:
            return

        audio_segment.change_speed(speed_rate)


================================================
FILE: paddlespeech/s2t/frontend/augmentor/volume_perturb.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the volume perturb augmentation model."""
from paddlespeech.s2t.frontend.augmentor.base import AugmentorBase


class VolumePerturbAugmentor(AugmentorBase):
    """Augmentation model for adding random volume perturbation.
    
    This is used for multi-loudness training of PCEN. See

    https://arxiv.org/pdf/1607.05666v1.pdf

    for more details.

    :param rng: Random generator object.
    :type rng: random.Random
    :param min_gain_dBFS: Minimal gain in dBFS.
    :type min_gain_dBFS: float
    :param max_gain_dBFS: Maximal gain in dBFS.
    :type max_gain_dBFS: float
    """

    def __init__(self, rng, min_gain_dBFS, max_gain_dBFS):
        self._min_gain_dBFS = min_gain_dBFS
        self._max_gain_dBFS = max_gain_dBFS
        self._rng = rng

    def __call__(self, x, uttid=None, train=True):
        if not train:
            return x
        self.transform_audio(x)
        return x

    def transform_audio(self, audio_segment):
        """Change audio loadness.

        Note that this is an in-place transformation.

        :param audio_segment: Audio segment to add effects to.
        :type audio_segment: AudioSegmenet|SpeechSegment
        """
        gain = self._rng.uniform(self._min_gain_dBFS, self._max_gain_dBFS)
        audio_segment.gain_db(gain)


================================================
FILE: paddlespeech/s2t/frontend/featurizer/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .audio_featurizer import AudioFeaturizer  #noqa: F401
from .speech_featurizer import SpeechFeaturizer
from .text_featurizer import TextFeaturizer


================================================
FILE: paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the audio featurizer class."""
import numpy as np
import paddle
from python_speech_features import delta
from python_speech_features import mfcc

import paddlespeech.audio.compliance.kaldi as kaldi


class AudioFeaturizer():
    """Audio featurizer, for extracting features from audio contents of
    AudioSegment or SpeechSegment.

    Currently, it supports feature types of linear spectrogram and mfcc.

    :param spectrum_type: Specgram feature type. Options: 'linear'.
    :type spectrum_type: str
    :param stride_ms: Striding size (in milliseconds) for generating frames.
    :type stride_ms: float
    :param window_ms: Window size (in milliseconds) for generating frames.
    :type window_ms: float
    :param max_freq: When spectrum_type is 'linear', only FFT bins
                     corresponding to frequencies between [0, max_freq] are
                     returned; when spectrum_type is 'mfcc', max_feq is the
                     highest band edge of mel filters.
    :types max_freq: None|float
    :param target_sample_rate: Audio are resampled (if upsampling or
                               downsampling is allowed) to this before
                               extracting spectrogram features.
    :type target_sample_rate: float
    :param use_dB_normalization: Whether to normalize the audio to a certain
                                 decibels before extracting the features.
    :type use_dB_normalization: bool
    :param target_dB: Target audio decibels for normalization.
    :type target_dB: float
    """

    def __init__(self,
                 spectrum_type: str='linear',
                 feat_dim: int=None,
                 delta_delta: bool=False,
                 stride_ms=10.0,
                 window_ms=20.0,
                 n_fft=None,
                 max_freq=None,
                 target_sample_rate=16000,
                 use_dB_normalization=True,
                 target_dB=-20,
                 dither=1.0):
        self._spectrum_type = spectrum_type
        # mfcc and fbank using `feat_dim`
        self._feat_dim = feat_dim
        # mfcc and fbank using `delta-delta`
        self._delta_delta = delta_delta
        self._stride_ms = stride_ms
        self._window_ms = window_ms
        self._max_freq = max_freq
        self._target_sample_rate = target_sample_rate
        self._use_dB_normalization = use_dB_normalization
        self._target_dB = target_dB
        self._fft_point = n_fft
        self._dither = dither

    def featurize(self,
                  audio_segment,
                  allow_downsampling=True,
                  allow_upsampling=True):
        """Extract audio features from AudioSegment or SpeechSegment.

        :param audio_segment: Audio/speech segment to extract features from.
        :type audio_segment: AudioSegment|SpeechSegment
        :param allow_downsampling: Whether to allow audio downsampling before
                                   featurizing.
        :type allow_downsampling: bool
        :param allow_upsampling: Whether to allow audio upsampling before
                                 featurizing.
        :type allow_upsampling: bool
        :return: Spectrogram audio feature in 2darray.
        :rtype: ndarray
        :raises ValueError: If audio sample rate is not supported.
        """
        # upsampling or downsampling
        if ((audio_segment.sample_rate > self._target_sample_rate and
             allow_downsampling) or
            (audio_segment.sample_rate < self._target_sample_rate and
             allow_upsampling)):
            audio_segment.resample(self._target_sample_rate)
        if audio_segment.sample_rate != self._target_sample_rate:
            raise ValueError("Audio sample rate is not supported. "
                             "Turn allow_downsampling or allow up_sampling on.")
        # decibel normalization
        if self._use_dB_normalization:
            audio_segment.normalize(target_db=self._target_dB)
        # extract spectrogram
        return self._compute_specgram(audio_segment)

    @property
    def stride_ms(self):
        return self._stride_ms

    @property
    def feature_size(self):
        """audio feature size"""
        feat_dim = 0
        if self._spectrum_type == 'linear':
            fft_point = self._window_ms if self._fft_point is None else self._fft_point
            feat_dim = int(fft_point * (self._target_sample_rate / 1000) / 2 +
                           1)
        elif self._spectrum_type == 'mfcc':
            # mfcc, delta, delta-delta
            feat_dim = int(self._feat_dim *
                           3) if self._delta_delta else int(self._feat_dim)
        elif self._spectrum_type == 'fbank':
            # fbank, delta, delta-delta
            feat_dim = int(self._feat_dim *
                           3) if self._delta_delta else int(self._feat_dim)
        else:
            raise ValueError("Unknown spectrum_type %s. "
                             "Supported values: linear." % self._spectrum_type)
        return feat_dim

    def _compute_specgram(self, audio_segment):
        """Extract various audio features."""
        sample_rate = audio_segment.sample_rate
        if self._spectrum_type == 'linear':
            samples = audio_segment.samples
            return self._compute_linear_specgram(
                samples,
                sample_rate,
                stride_ms=self._stride_ms,
                window_ms=self._window_ms,
                max_freq=self._max_freq)
        elif self._spectrum_type == 'mfcc':
            samples = audio_segment.to('int16')
            return self._compute_mfcc(
                samples,
                sample_rate,
                feat_dim=self._feat_dim,
                stride_ms=self._stride_ms,
                window_ms=self._window_ms,
                max_freq=self._max_freq,
                dither=self._dither,
                delta_delta=self._delta_delta)
        elif self._spectrum_type == 'fbank':
            samples = audio_segment.to('int16')
            return self._compute_fbank(
                samples,
                sample_rate,
                feat_dim=self._feat_dim,
                stride_ms=self._stride_ms,
                window_ms=self._window_ms,
                max_freq=self._max_freq,
                dither=self._dither,
                delta_delta=self._delta_delta)
        else:
            raise ValueError("Unknown spectrum_type %s. "
                             "Supported values: linear." % self._spectrum_type)

    def _specgram_real(self, samples, window_size, stride_size, sample_rate):
        """Compute the spectrogram for samples from a real signal."""
        # extract strided windows
        truncate_size = (len(samples) - window_size) % stride_size
        samples = samples[:len(samples) - truncate_size]
        nshape = (window_size, (len(samples) - window_size) // stride_size + 1)
        nstrides = (samples.strides[0], samples.strides[0] * stride_size)
        windows = np.lib.stride_tricks.as_strided(
            samples, shape=nshape, strides=nstrides)
        assert np.all(
            windows[:, 1] == samples[stride_size:(stride_size + window_size)])
        # window weighting, squared Fast Fourier Transform (fft), scaling
        weighting = np.hanning(window_size)[:, None]
        # https://numpy.org/doc/stable/reference/generated/numpy.fft.rfft.html
        fft = np.fft.rfft(windows * weighting, n=None, axis=0)
        fft = np.absolute(fft)
        fft = fft**2
        scale = np.sum(weighting**2) * sample_rate
        fft[1:-1, :] *= (2.0 / scale)
        fft[(0, -1), :] /= scale
        # prepare fft frequency list
        freqs = float(sample_rate) / window_size * np.arange(fft.shape[0])
        return fft, freqs

    def _compute_linear_specgram(self,
                                 samples,
                                 sample_rate,
                                 stride_ms=10.0,
                                 window_ms=20.0,
                                 max_freq=None,
                                 eps=1e-14):
        """Compute the linear spectrogram from FFT energy.

        Args:
            samples ([type]): [description]
            sample_rate ([type]): [description]
            stride_ms (float, optional): [description]. Defaults to 10.0.
            window_ms (float, optional): [description]. Defaults to 20.0.
            max_freq ([type], optional): [description]. Defaults to None.
            eps ([type], optional): [description]. Defaults to 1e-14.

        Raises:
            ValueError: [description]
            ValueError: [description]

        Returns:
            np.ndarray: log spectrogram, (time, freq)
        """
        if max_freq is None:
            max_freq = sample_rate / 2
        if max_freq > sample_rate / 2:
            raise ValueError("max_freq must not be greater than half of "
                             "sample rate.")
        if stride_ms > window_ms:
            raise ValueError("Stride size must not be greater than "
                             "window size.")
        stride_size = int(0.001 * sample_rate * stride_ms)
        window_size = int(0.001 * sample_rate * window_ms)
        specgram, freqs = self._specgram_real(
            samples,
            window_size=window_size,
            stride_size=stride_size,
            sample_rate=sample_rate)
        ind = np.where(freqs <= max_freq)[0][-1] + 1
        # (freq, time)
        spec = np.log(specgram[:ind, :] + eps)
        return np.transpose(spec)

    def _concat_delta_delta(self, feat):
        """append delat, delta-delta feature.

        Args:
            feat (np.ndarray): (T, D)

        Returns:
            np.ndarray: feat with delta-delta, (T, 3*D)
        """
        # Deltas
        d_feat = delta(feat, 2)
        # Deltas-Deltas
        dd_feat = delta(feat, 2)
        # concat above three features
        concat_feat = np.concatenate((feat, d_feat, dd_feat), axis=1)
        return concat_feat

    def _compute_mfcc(self,
                      samples,
                      sample_rate,
                      feat_dim=13,
                      stride_ms=10.0,
                      window_ms=25.0,
                      max_freq=None,
                      dither=1.0,
                      delta_delta=True):
        """Compute mfcc from samples.

        Args:
            samples (np.ndarray, np.int16): the audio signal from which to compute features.
            sample_rate (float): the sample rate of the signal we are working with, in Hz.
            feat_dim (int): the number of cepstrum to return, default 13.
            stride_ms (float, optional): stride length in ms. Defaults to 10.0.
            window_ms (float, optional): window length in ms. Defaults to 25.0.
            max_freq ([type], optional): highest band edge of mel filters. In Hz, default is samplerate/2. Defaults to None.
            delta_delta (bool, optional): Whether with delta delta. Defaults to False.

        Raises:
            ValueError: max_freq > samplerate/2
            ValueError: stride_ms > window_ms

        Returns:
            np.ndarray: mfcc feature, (D, T).
        """
        if max_freq is None:
            max_freq = sample_rate / 2
        if max_freq > sample_rate / 2:
            raise ValueError("max_freq must not be greater than half of "
                             "sample rate.")
        if stride_ms > window_ms:
            raise ValueError("Stride size must not be greater than "
                             "window size.")
        # compute the 13 cepstral coefficients, and the first one is replaced
        # by log(frame energy), (T, D)
        mfcc_feat = mfcc(
            signal=samples,
            samplerate=sample_rate,
            winlen=0.001 * window_ms,
            winstep=0.001 * stride_ms,
            numcep=feat_dim,
            nfilt=23,
            nfft=512,
            lowfreq=20,
            highfreq=max_freq,
            dither=dither,
            remove_dc_offset=True,
            preemph=0.97,
            ceplifter=22,
            useEnergy=True,
            winfunc='povey')
        if delta_delta:
            mfcc_feat = self._concat_delta_delta(mfcc_feat)
        return mfcc_feat

    def _compute_fbank(self,
                       samples,
                       sample_rate,
                       feat_dim=40,
                       stride_ms=10.0,
                       window_ms=25.0,
                       max_freq=None,
                       dither=1.0,
                       delta_delta=False):
        """Compute logfbank from samples.
        
        Args:
            samples (np.ndarray, np.int16): the audio signal from which to compute features. Should be an N*1 array
            sample_rate (float): the sample rate of the signal we are working with, in Hz.
            feat_dim (int): the number of cepstrum to return, default 13.
            stride_ms (float, optional): stride length in ms. Defaults to 10.0.
            window_ms (float, optional): window length in ms. Defaults to 20.0.
            max_freq (float, optional): highest band edge of mel filters. In Hz, default is samplerate/2. Defaults to None.
            delta_delta (bool, optional): Whether with delta delta. Defaults to False.

        Raises:
            ValueError: max_freq > samplerate/2
            ValueError: stride_ms > window_ms

        Returns:
            np.ndarray: mfcc feature, (D, T).
        """
        if max_freq is None:
            max_freq = sample_rate / 2
        if max_freq > sample_rate / 2:
            raise ValueError("max_freq must not be greater than half of "
                             "sample rate.")
        if stride_ms > window_ms:
            raise ValueError("Stride size must not be greater than "
                             "window size.")
        # (T, D)
        waveform = paddle.to_tensor(
            np.expand_dims(samples, 0), dtype=paddle.float32)
        mat = kaldi.fbank(
            waveform,
            n_mels=feat_dim,
            frame_length=window_ms,  # default : 25
            frame_shift=stride_ms,  # default : 10
            dither=dither,
            energy_floor=0.0,
            sr=sample_rate)
        fbank_feat = np.squeeze(mat.numpy())
        if delta_delta:
            fbank_feat = self._concat_delta_delta(fbank_feat)
        return fbank_feat


================================================
FILE: paddlespeech/s2t/frontend/featurizer/speech_featurizer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the speech featurizer class."""
from paddlespeech.s2t.frontend.featurizer.audio_featurizer import AudioFeaturizer
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer


class SpeechFeaturizer():
    """Speech and Text feature extraction.
    """

    def __init__(self,
                 unit_type,
                 vocab_filepath,
                 spm_model_prefix=None,
                 spectrum_type='linear',
                 feat_dim=None,
                 delta_delta=False,
                 stride_ms=10.0,
                 window_ms=20.0,
                 n_fft=None,
                 max_freq=None,
                 target_sample_rate=16000,
                 use_dB_normalization=True,
                 target_dB=-20,
                 dither=1.0,
                 maskctc=False):
        self.stride_ms = stride_ms
        self.window_ms = window_ms

        self.audio_feature = AudioFeaturizer(
            spectrum_type=spectrum_type,
            feat_dim=feat_dim,
            delta_delta=delta_delta,
            stride_ms=stride_ms,
            window_ms=window_ms,
            n_fft=n_fft,
            max_freq=max_freq,
            target_sample_rate=target_sample_rate,
            use_dB_normalization=use_dB_normalization,
            target_dB=target_dB,
            dither=dither)
        self.feature_size = self.audio_feature.feature_size

        self.text_feature = TextFeaturizer(
            unit_type=unit_type,
            vocab=vocab_filepath,
            spm_model_prefix=spm_model_prefix,
            maskctc=maskctc)
        self.vocab_size = self.text_feature.vocab_size

    def featurize(self, speech_segment, keep_transcription_text):
        """Extract features for speech segment.

        1. For audio parts, extract the audio features.
        2. For transcript parts, keep the original text or convert text string
           to a list of token indices in char-level.

        Args:
            speech_segment (SpeechSegment): Speech segment to extract features from.
            keep_transcription_text (bool): True, keep transcript text, False, token ids

        Returns:
            tuple: 1) spectrogram audio feature in 2darray, 2) list oftoken indices.
        """
        spec_feature = self.audio_feature.featurize(speech_segment)

        if keep_transcription_text:
            return spec_feature, speech_segment.transcript

        if speech_segment.has_token:
            text_ids = speech_segment.token_ids
        else:
            text_ids = self.text_feature.featurize(speech_segment.transcript)
        return spec_feature, text_ids

    def text_featurize(self, text, keep_transcription_text):
        """Extract features for speech segment.

        1. For audio parts, extract the audio features.
        2. For transcript parts, keep the original text or convert text string
           to a list of token indices in char-level.

        Args:
            text (str): text.
            keep_transcription_text (bool): True, keep transcript text, False, token ids

        Returns:
            (str|List[int]): text, or list of token indices.
        """
        if keep_transcription_text:
            return text

        text_ids = self.text_feature.featurize(text)
        return text_ids


================================================
FILE: paddlespeech/s2t/frontend/featurizer/text_featurizer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the text featurizer class."""
from pprint import pformat
from typing import Union

import sentencepiece as spm

from ..utility import BLANK
from ..utility import EOS
from ..utility import load_dict
from ..utility import MASKCTC
from ..utility import SOS
from ..utility import SPACE
from ..utility import UNK
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ["TextFeaturizer"]


class TextFeaturizer():
    def __init__(self, unit_type, vocab, spm_model_prefix=None, maskctc=False):
        """Text featurizer, for processing or extracting features from text.

        Currently, it supports char/word/sentence-piece level tokenizing and conversion into
        a list of token indices. Note that the token indexing order follows the
        given vocabulary file.

        Args:
            unit_type (str): unit type, e.g. char, word, spm
            vocab Option[str, list]: Filepath to load vocabulary for token indices conversion, or vocab list.
            spm_model_prefix (str, optional): spm model prefix. Defaults to None.
        """
        assert unit_type in ('char', 'spm', 'word')
        self.unit_type = unit_type
        self.unk = UNK
        self.maskctc = maskctc
        self.vocab_path_or_list = vocab

        if self.vocab_path_or_list:
            self.vocab_dict, self._id2token, self.vocab_list, self.unk_id, self.eos_id, self.blank_id = self._load_vocabulary_from_file(
                vocab, maskctc)
            self.vocab_size = len(self.vocab_list)
        else:
            logger.warning(
                "TextFeaturizer: not have vocab file or vocab list. Only Tokenizer can use, can not convert to token idx"
            )

        if unit_type == 'spm':
            spm_model = spm_model_prefix + '.model'
            self.sp = spm.SentencePieceProcessor()
            self.sp.Load(spm_model)

    def tokenize(self, text, replace_space=True):
        """tokenizer split text into text tokens"""
        if self.unit_type == 'char':
            tokens = self.char_tokenize(text, replace_space)
        elif self.unit_type == 'word':
            tokens = self.word_tokenize(text)
        else:  # spm
            tokens = self.spm_tokenize(text)
        return tokens

    def detokenize(self, tokens):
        """tokenizer convert text tokens back to text"""
        if self.unit_type == 'char':
            text = self.char_detokenize(tokens)
        elif self.unit_type == 'word':
            text = self.word_detokenize(tokens)
        else:  # spm
            text = self.spm_detokenize(tokens)
        return text

    def featurize(self, text):
        """Convert text string to a list of token indices.

        Args:
            text (str): Text to process.

        Returns:
            List[int]: List of token indices.
        """
        assert self.vocab_path_or_list, "toidx need vocab path or vocab list"
        tokens = self.tokenize(text)
        ids = []
        for token in tokens:
            if token not in self.vocab_dict:
                logger.debug(f"Text Token: {token} -> {self.unk}")
                token = self.unk
            ids.append(self.vocab_dict[token])
        return ids

    def defeaturize(self, idxs):
        """Convert a list of token indices to text string,
        ignore index after eos_id.

        Args:
            idxs (List[int]): List of token indices.

        Returns:
            str: Text.
        """
        assert self.vocab_path_or_list, "toidx need vocab path or vocab list"
        tokens = []
        # unwrap `idxs`` like `[[1,2,3]]`
        if idxs and isinstance(idxs[0], (list, tuple)) and len(idxs) == 1:
            idxs = idxs[0]

        for idx in idxs:
            if idx == self.eos_id:
                break
            tokens.append(self._id2token[idx])
        text = self.detokenize(tokens)
        return text

    def char_tokenize(self, text, replace_space=True):
        """Character tokenizer.

        Args:
            text (str): text string.
            replace_space (bool): False only used by build_vocab.py.

        Returns:
            List[str]: tokens.
        """
        text = text.strip()
        if replace_space:
            tokens = [SPACE if item == " " else item for item in list(text)]
        else:
            tokens = list(text)
        return tokens

    def char_detokenize(self, tokens):
        """Character detokenizer.

        Args:
            tokens (List[str]): tokens.

        Returns:
           str: text string.
        """
        tokens = [t.replace(SPACE, " ") for t in tokens]
        return "".join(tokens)

    def word_tokenize(self, text):
        """Word tokenizer, separate by <space>."""
        return text.strip().split()

    def word_detokenize(self, tokens):
        """Word detokenizer, separate by <space>."""
        return " ".join(tokens)

    def spm_tokenize(self, text):
        """spm tokenize.

        Args:
            text (str): text string.

        Returns:
            List[str]: sentence pieces str code
        """
        stats = {"num_empty": 0, "num_filtered": 0}

        def valid(line):
            return True

        def encode(l):
            return self.sp.EncodeAsPieces(l)

        def encode_line(line):
            line = line.strip()
            if len(line) > 0:
                line = encode(line)
                if valid(line):
                    return line
                else:
                    stats["num_filtered"] += 1
            else:
                stats["num_empty"] += 1
            return None

        enc_line = encode_line(text)
        return enc_line

    def spm_detokenize(self, tokens, input_format='piece'):
        """spm detokenize.

        Args:
            ids (List[str]): tokens.

        Returns:
            str: text
        """
        if input_format == "piece":

            def decode(l):
                return "".join(self.sp.DecodePieces(l))
        elif input_format == "id":

            def decode(l):
                return "".join(self.sp.DecodeIds(l))

        return decode(tokens)

    def _load_vocabulary_from_file(self, vocab: Union[str, list],
                                   maskctc: bool):
        """Load vocabulary from file."""
        if isinstance(vocab, list):
            vocab_list = vocab
        else:
            vocab_list = load_dict(vocab, maskctc)
        assert vocab_list is not None
        logger.debug(f"Vocab: {pformat(vocab_list)}")

        id2token = dict(
            [(idx, token) for (idx, token) in enumerate(vocab_list)])
        token2id = dict(
            [(token, idx) for (idx, token) in enumerate(vocab_list)])

        blank_id = vocab_list.index(BLANK) if BLANK in vocab_list else -1
        maskctc_id = vocab_list.index(MASKCTC) if MASKCTC in vocab_list else -1
        unk_id = vocab_list.index(UNK) if UNK in vocab_list else -1
        eos_id = vocab_list.index(EOS) if EOS in vocab_list else -1
        sos_id = vocab_list.index(SOS) if SOS in vocab_list else -1
        space_id = vocab_list.index(SPACE) if SPACE in vocab_list else -1

        logger.debug(f"BLANK id: {blank_id}")
        logger.debug(f"UNK id: {unk_id}")
        logger.debug(f"EOS id: {eos_id}")
        logger.debug(f"SOS id: {sos_id}")
        logger.debug(f"SPACE id: {space_id}")
        logger.debug(f"MASKCTC id: {maskctc_id}")
        return token2id, id2token, vocab_list, unk_id, eos_id, blank_id


================================================
FILE: paddlespeech/s2t/frontend/normalizer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains feature normalizers."""
import json

import jsonlines
import numpy as np
import paddle
from paddle.io import DataLoader
from paddle.io import Dataset

from paddlespeech.s2t.frontend.audio import AudioSegment
from paddlespeech.s2t.frontend.utility import load_cmvn
from paddlespeech.s2t.utils.log import Log

__all__ = ["FeatureNormalizer"]

logger = Log(__name__).getlog()


# https://github.com/PaddlePaddle/Paddle/pull/31481
class CollateFunc(object):
    def __init__(self, feature_func):
        self.feature_func = feature_func

    def __call__(self, batch):
        mean_stat = None
        var_stat = None
        number = 0
        for item in batch:
            audioseg = AudioSegment.from_file(item['feat'])
            feat = self.feature_func(audioseg)  #(T, D)

            sums = np.sum(feat, axis=0)
            if mean_stat is None:
                mean_stat = sums
            else:
                mean_stat += sums

            square_sums = np.sum(np.square(feat), axis=0)
            if var_stat is None:
                var_stat = square_sums
            else:
                var_stat += square_sums

            number += feat.shape[0]
        return number, mean_stat, var_stat


class AudioDataset(Dataset):
    def __init__(self, manifest_path, num_samples=-1, rng=None, random_seed=0):
        self._rng = rng if rng else np.random.RandomState(random_seed)

        with jsonlines.open(manifest_path, 'r') as reader:
            manifest = list(reader)

        if num_samples == -1:
            sampled_manifest = manifest
        else:
            sampled_manifest = self._rng.choice(
                manifest, num_samples, replace=False)
        self.items = sampled_manifest

    def __len__(self):
        return len(self.items)

    def __getitem__(self, idx):
        return self.items[idx]


class FeatureNormalizer(object):
    """Feature normalizer. Normalize features to be of zero mean and unit
    stddev.

    if mean_std_filepath is provided (not None), the normalizer will directly
    initilize from the file. Otherwise, both manifest_path and featurize_func
    should be given for on-the-fly mean and stddev computing.

    :param mean_std_filepath: File containing the pre-computed mean and stddev.
    :type mean_std_filepath: None|str
    :param manifest_path: Manifest of instances for computing mean and stddev.
    :type meanifest_path: None|str
    :param featurize_func: Function to extract features. It should be callable
                           with ``featurize_func(audio_segment)``.
    :type featurize_func: None|callable
    :param num_samples: Number of random samples for computing mean and stddev.
    :type num_samples: int
    :param random_seed: Random seed for sampling instances.
    :type random_seed: int
    :raises ValueError: If both mean_std_filepath and manifest_path
                        (or both mean_std_filepath and featurize_func) are None.
    """

    def __init__(self,
                 mean_std_filepath,
                 manifest_path=None,
                 featurize_func=None,
                 num_samples=500,
                 num_workers=0,
                 random_seed=0):
        if not mean_std_filepath:
            if not (manifest_path and featurize_func):
                raise ValueError("If mean_std_filepath is None, meanifest_path "
                                 "and featurize_func should not be None.")
            self._rng = np.random.RandomState(random_seed)
            self._compute_mean_std(manifest_path, featurize_func, num_samples,
                                   num_workers)
        else:
            mean_std = mean_std_filepath
            self._read_mean_std_from_file(mean_std)

    def apply(self, features):
        """Normalize features to be of zero mean and unit stddev.

        :param features: Input features to be normalized.
        :type features: ndarray, shape (T, D)
        :param eps:  added to stddev to provide numerical stablibity.
        :type eps: float
        :return: Normalized features.
        :rtype: ndarray
        """
        return (features - self._mean) * self._istd

    def _read_mean_std_from_file(self, mean_std, eps=1e-20):
        """Load mean and std from file."""
        if isinstance(mean_std, list):
            mean = mean_std[0]['cmvn_stats']['mean']
            istd = mean_std[0]['cmvn_stats']['istd']
        else:
            filetype = mean_std.split(".")[-1]
            mean, istd = load_cmvn(mean_std, filetype=filetype)
        self._mean = np.expand_dims(mean, axis=0)
        self._istd = np.expand_dims(istd, axis=0)

    def write_to_file(self, filepath):
        """Write the mean and stddev to the file.

        :param filepath: File to write mean and stddev.
        :type filepath: str
        """
        with open(filepath, 'w') as fout:
            fout.write(json.dumps(self.cmvn_info))

    def _compute_mean_std(self,
                          manifest_path,
                          featurize_func,
                          num_samples,
                          num_workers,
                          batch_size=64,
                          eps=1e-20):
        """Compute mean and std from randomly sampled instances."""
        paddle.set_device('cpu')

        collate_func = CollateFunc(featurize_func)
        dataset = AudioDataset(manifest_path, num_samples, self._rng)
        data_loader = DataLoader(
            dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=num_workers,
            collate_fn=collate_func)

        with paddle.no_grad():
            all_mean_stat = None
            all_var_stat = None
            all_number = 0
            wav_number = 0
            for i, batch in enumerate(data_loader):
                number, mean_stat, var_stat = batch
                if i == 0:
                    all_mean_stat = mean_stat
                    all_var_stat = var_stat
                else:
                    all_mean_stat += mean_stat
                    all_var_stat += var_stat
                all_number += number
                wav_number += batch_size

                if wav_number % 1000 == 0:
                    logger.info(
                        f'process {wav_number} wavs,{all_number} frames.')

        self.cmvn_info = {
            'mean_stat': list(all_mean_stat.tolist()),
            'var_stat': list(all_var_stat.tolist()),
            'frame_num': all_number,
        }

        return self.cmvn_info


================================================
FILE: paddlespeech/s2t/frontend/speech.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains the speech segment class."""
import numpy as np

from paddlespeech.s2t.frontend.audio import AudioSegment


class SpeechSegment(AudioSegment):
    """Speech Segment with Text

    Args:
        AudioSegment (AudioSegment): Audio Segment
    """

    def __init__(self,
                 samples,
                 sample_rate,
                 transcript,
                 tokens=None,
                 token_ids=None):
        """Speech segment abstraction, a subclass of AudioSegment,
            with an additional transcript.

        Args:
            samples (ndarray.float32): Audio samples [num_samples x num_channels].
            sample_rate (int): Audio sample rate.
            transcript (str): Transcript text for the speech.
            tokens (List[str], optinal): Transcript tokens for the speech.
            token_ids (List[int], optional): Transcript token ids for the speech.
        """
        AudioSegment.__init__(self, samples, sample_rate)
        self._transcript = transcript
        # must init `tokens` with `token_ids` at the same time
        self._tokens = tokens
        self._token_ids = token_ids

    def __eq__(self, other):
        """Return whether two objects are equal.

        Returns:
            bool: True, when equal to other
        """
        if not AudioSegment.__eq__(self, other):
            return False
        if self._transcript != other._transcript:
            return False
        if self.has_token and other.has_token:
            if self._tokens != other._tokens:
                return False
            if self._token_ids != other._token_ids:
                return False
        return True

    def __ne__(self, other):
        """Return whether two objects are unequal."""
        return not self.__eq__(other)

    @classmethod
    def from_file(cls,
                  filepath,
                  transcript,
                  tokens=None,
                  token_ids=None,
                  infos=None):
        """Create speech segment from audio file and corresponding transcript.

        Args:
            filepath (str|file): Filepath or file object to audio file.
            transcript (str): Transcript text for the speech.
            tokens (List[str], optional): text tokens. Defaults to None.
            token_ids (List[int], optional): text token ids. Defaults to None.
            infos (TarLocalData, optional): tar2obj and tar2infos. Defaults to None.

        Returns:
            SpeechSegment: Speech segment instance.
        """
        audio = AudioSegment.from_file(filepath, infos)
        return cls(audio.samples, audio.sample_rate, transcript, tokens,
                   token_ids)

    @classmethod
    def from_bytes(cls, bytes, transcript, tokens=None, token_ids=None):
        """Create speech segment from a byte string and corresponding

        Args:
            filepath (str|file): Filepath or file object to audio file.
            transcript (str): Transcript text for the speech.
            tokens (List[str], optional): text tokens. Defaults to None.
            token_ids (List[int], optional): text token ids. Defaults to None.

        Returns:
            SpeechSegment: Speech segment instance.
        """
        audio = AudioSegment.from_bytes(bytes)
        return cls(audio.samples, audio.sample_rate, transcript, tokens,
                   token_ids)

    @classmethod
    def from_pcm(cls,
                 samples,
                 sample_rate,
                 transcript,
                 tokens=None,
                 token_ids=None):
        """Create speech segment from pcm on online mode 
        Args:
            samples (numpy.ndarray): Audio samples [num_samples x num_channels].
            sample_rate (int): Audio sample rate.
            transcript (str): Transcript text for the speech.
            tokens (List[str], optional): text tokens. Defaults to None.
            token_ids (List[int], optional): text token ids. Defaults to None.
        Returns: 
            SpeechSegment: Speech segment instance.
        """
        audio = AudioSegment.from_pcm(samples, sample_rate)
        return cls(audio.samples, audio.sample_rate, transcript, tokens,
                   token_ids)

    @classmethod
    def concatenate(cls, *segments):
        """Concatenate an arbitrary number of speech segments together, both
        audio and transcript will be concatenated.

        :param *segments: Input speech segments to be concatenated.
        :type *segments: tuple of SpeechSegment
        :return: Speech segment instance.
        :rtype: SpeechSegment
        :raises ValueError: If the number of segments is zero, or if the 
                            sample_rate of any two segments does not match.
        :raises TypeError: If any segment is not SpeechSegment instance.
        """
        if len(segments) == 0:
            raise ValueError("No speech segments are given to concatenate.")
        sample_rate = segments[0]._sample_rate
        transcripts = ""
        tokens = []
        token_ids = []
        for seg in segments:
            if sample_rate != seg._sample_rate:
                raise ValueError("Can't concatenate segments with "
                                 "different sample rates")
            if type(seg) is not cls:
                raise TypeError("Only speech segments of the same type "
                                "instance can be concatenated.")
            transcripts += seg._transcript
            if self.has_token:
                tokens += seg._tokens
                token_ids += seg._token_ids
        samples = np.concatenate([seg.samples for seg in segments])
        return cls(samples, sample_rate, transcripts, tokens, token_ids)

    @classmethod
    def slice_from_file(cls,
                        filepath,
                        transcript,
                        tokens=None,
                        token_ids=None,
                        start=None,
                        end=None):
        """Loads a small section of an speech without having to load
        the entire file into the memory which can be incredibly wasteful.

        :param filepath: Filepath or file object to audio file.
        :type filepath: str|file
        :param start: Start time in seconds. If start is negative, it wraps
                      around from the end. If not provided, this function 
                      reads from the very beginning.
        :type start: float
        :param end: End time in seconds. If end is negative, it wraps around
                    from the end. If not provided, the default behvaior is
                    to read to the end of the file.
        :type end: float
        :param transcript: Transcript text for the speech. if not provided, 
                           the defaults is an empty string.
        :type transript: str
        :return: SpeechSegment instance of the specified slice of the input
                 speech file.
        :rtype: SpeechSegment
        """
        audio = AudioSegment.slice_from_file(filepath, start, end)
        return cls(audio.samples, audio.sample_rate, transcript, tokens,
                   token_ids)

    @classmethod
    def make_silence(cls, duration, sample_rate):
        """Creates a silent speech segment of the given duration and
        sample rate, transcript will be an empty string.

        Args:
            duration (float): Length of silence in seconds.
            sample_rate (float): Sample rate.

        Returns:
            SpeechSegment: Silence of the given duration.
        """
        audio = AudioSegment.make_silence(duration, sample_rate)
        return cls(audio.samples, audio.sample_rate, "")

    @property
    def has_token(self):
        if self._tokens and self._token_ids:
            return True
        return False

    @property
    def transcript(self):
        """Return the transcript text.

        Returns:
            str: Transcript text for the speech.
        """

        return self._transcript

    @property
    def tokens(self):
        """Return the transcript text tokens.

        Returns:
            List[str]: text tokens.
        """
        return self._tokens

    @property
    def token_ids(self):
        """Return the transcript text token ids.

        Returns:
            List[int]: text token ids.
        """
        return self._token_ids


================================================
FILE: paddlespeech/s2t/frontend/utility.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains data helper functions."""
import json
import math
import tarfile
from collections import namedtuple
from typing import List
from typing import Optional
from typing import Text

import jsonlines
import numpy as np

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = [
    "load_dict", "load_cmvn", "read_manifest", "rms_to_db", "rms_to_dbfs",
    "max_dbfs", "mean_dbfs", "gain_db_to_ratio", "normalize_audio", "SOS",
    "EOS", "UNK", "BLANK", "MASKCTC", "SPACE", "convert_samples_to_float32",
    "convert_samples_from_float32"
]

IGNORE_ID = -1
# `sos` and `eos` using same token
SOS = "<eos>"
EOS = SOS
UNK = "<unk>"
BLANK = "<blank>"
MASKCTC = "<mask>"
SPACE = "<space>"


def load_dict(dict_path: Optional[Text], maskctc=False) -> Optional[List[Text]]:
    if dict_path is None:
        return None

    with open(dict_path, "r") as f:
        dictionary = f.readlines()
    # first token is `<blank>`
    # multi line: `<blank> 0\n`
    # one line: `<blank>`
    # space is relpace with <space>
    char_list = [entry[:-1].split(" ")[0] for entry in dictionary]
    if BLANK not in char_list:
        char_list.insert(0, BLANK)
    if EOS not in char_list:
        char_list.append(EOS)
    # for non-autoregressive maskctc model
    if maskctc and MASKCTC not in char_list:
        char_list.append(MASKCTC)
    return char_list


def read_manifest(
        manifest_path,
        max_input_len=float('inf'),
        min_input_len=0.0,
        max_output_len=float('inf'),
        min_output_len=0.0,
        max_output_input_ratio=float('inf'),
        min_output_input_ratio=0.0, ):
    """Load and parse manifest file.

    Args:
        manifest_path ([type]): Manifest file to load and parse.
        max_input_len ([type], optional): maximum output seq length,
            in seconds for raw wav, in frame numbers for feature data.
            Defaults to float('inf').
        min_input_len (float, optional): minimum input seq length,
            in seconds for raw wav, in frame numbers for feature data.
            Defaults to 0.0.
        max_output_len (float, optional): maximum input seq length,
            in modeling units. Defaults to 500.0.
        min_output_len (float, optional): minimum input seq length,
            in modeling units. Defaults to 0.0.
        max_output_input_ratio (float, optional):
            maximum output seq length/output seq length ratio. Defaults to 10.0.
        min_output_input_ratio (float, optional):
            minimum output seq length/output seq length ratio. Defaults to 0.05.

    Raises:
        IOError: If failed to parse the manifest.

    Returns:
        List[dict]: Manifest parsing results.
    """
    manifest = []
    with jsonlines.open(manifest_path, 'r') as reader:
        for json_data in reader:
            feat_len = json_data["input"][0]["shape"][
                0] if "input" in json_data and "shape" in json_data["input"][
                    0] else 1.0
            token_len = json_data["output"][0]["shape"][
                0] if "output" in json_data and "shape" in json_data["output"][
                    0] else 1.0
            conditions = [
                feat_len >= min_input_len,
                feat_len <= max_input_len,
                token_len >= min_output_len,
                token_len <= max_output_len,
                token_len / feat_len >= min_output_input_ratio,
                token_len / feat_len <= max_output_input_ratio,
            ]
            if all(conditions):
                manifest.append(json_data)
    return manifest


# Tar File read
TarLocalData = namedtuple('TarLocalData', ['tar2info', 'tar2object'])


def parse_tar(file):
    """Parse a tar file to get a tarfile object
    and a map containing tarinfoes
    """
    result = {}
    f = tarfile.open(file)
    for tarinfo in f.getmembers():
        result[tarinfo.name] = tarinfo
    return f, result


def subfile_from_tar(file, local_data=None):
    """Get subfile object from tar.

    tar:tarpath#filename

    It will return a subfile object from tar file
    and cached tar file info for next reading request.
    """
    tarpath, filename = file.split(':', 1)[1].split('#', 1)

    if local_data is None:
        local_data = TarLocalData(tar2info={}, tar2object={})

    assert isinstance(local_data, TarLocalData)

    if 'tar2info' not in local_data.__dict__:
        local_data.tar2info = {}
    if 'tar2object' not in local_data.__dict__:
        local_data.tar2object = {}

    if tarpath not in local_data.tar2info:
        fobj, infos = parse_tar(tarpath)
        local_data.tar2info[tarpath] = infos
        local_data.tar2object[tarpath] = fobj
    else:
        fobj = local_data.tar2object[tarpath]
        infos = local_data.tar2info[tarpath]
    return fobj.extractfile(infos[filename])


def rms_to_db(rms: float):
    """Root Mean Square to dB.

    Args:
        rms ([float]): root mean square

    Returns:
        float: dB
    """
    return 20.0 * math.log10(max(1e-16, rms))


def rms_to_dbfs(rms: float):
    """Root Mean Square to dBFS.
    https://fireattack.wordpress.com/2017/02/06/replaygain-loudness-normalization-and-applications/
    Audio is mix of sine wave, so 1 amp sine wave's Full scale is 0.7071, equal to -3.0103dB.

    dB = dBFS + 3.0103
    dBFS = db - 3.0103
    e.g. 0 dB = -3.0103 dBFS

    Args:
        rms ([float]): root mean square

    Returns:
        float: dBFS
    """
    return rms_to_db(rms) - 3.0103


def max_dbfs(sample_data: np.ndarray):
    """Peak dBFS based on the maximum energy sample.

    Args:
        sample_data ([np.ndarray]): float array, [-1, 1].

    Returns:
        float: dBFS
    """
    # Peak dBFS based on the maximum energy sample. Will prevent overdrive if used for normalization.
    return rms_to_dbfs(max(abs(np.min(sample_data)), abs(np.max(sample_data))))


def mean_dbfs(sample_data):
    """Peak dBFS based on the RMS energy.

    Args:
        sample_data ([np.ndarray]): float array, [-1, 1].

    Returns:
        float: dBFS
    """
    return rms_to_dbfs(
        math.sqrt(np.mean(np.square(sample_data, dtype=np.float64))))


def gain_db_to_ratio(gain_db: float):
    """dB to ratio

    Args:
        gain_db (float): gain in dB

    Returns:
        float: scale in amp
    """
    return math.pow(10.0, gain_db / 20.0)


def normalize_audio(sample_data: np.ndarray, dbfs: float=-3.0103):
    """Nomalize audio to dBFS.

    Args:
        sample_data (np.ndarray): input wave samples, [-1, 1].
        dbfs (float, optional): target dBFS. Defaults to -3.0103.

    Returns:
        np.ndarray: normalized wave
    """
    return np.maximum(
        np.minimum(sample_data * gain_db_to_ratio(dbfs - max_dbfs(sample_data)),
                   1.0), -1.0)


def _load_json_cmvn(json_cmvn_file):
    """ Load the json format cmvn stats file and calculate cmvn

    Args:
        json_cmvn_file: cmvn stats file in json format

    Returns:
        a numpy array of [means, vars]
    """
    with open(json_cmvn_file) as f:
        cmvn_stats = json.load(f)

    means = cmvn_stats['mean_stat']
    variance = cmvn_stats['var_stat']
    count = cmvn_stats['frame_num']
    for i in range(len(means)):
        means[i] /= count
        variance[i] = variance[i] / count - means[i] * means[i]
        if variance[i] < 1.0e-20:
            variance[i] = 1.0e-20
        variance[i] = 1.0 / math.sqrt(variance[i])
    cmvn = np.array([means, variance])
    return cmvn


def _load_kaldi_cmvn(kaldi_cmvn_file):
    """ Load the kaldi format cmvn stats file and calculate cmvn

    Args:
        kaldi_cmvn_file:  kaldi text style global cmvn file, which
           is generated by:
           compute-cmvn-stats --binary=false scp:feats.scp global_cmvn

    Returns:
        a numpy array of [means, vars]
    """
    means = []
    variance = []
    with open(kaldi_cmvn_file, 'r') as fid:
        # kaldi binary file start with '\0B'
        if fid.read(2) == '\0B':
            logger.error('kaldi cmvn binary file is not supported, please '
                         'recompute it by: compute-cmvn-stats --binary=false '
                         ' scp:feats.scp global_cmvn')
            sys.exit(1)
        fid.seek(0)
        arr = fid.read().split()
        assert (arr[0] == '[')
        assert (arr[-2] == '0')
        assert (arr[-1] == ']')
        feat_dim = int((len(arr) - 2 - 2) / 2)
        for i in range(1, feat_dim + 1):
            means.append(float(arr[i]))
        count = float(arr[feat_dim + 1])
        for i in range(feat_dim + 2, 2 * feat_dim + 2):
            variance.append(float(arr[i]))

    for i in range(len(means)):
        means[i] /= count
        variance[i] = variance[i] / count - means[i] * means[i]
        if variance[i] < 1.0e-20:
            variance[i] = 1.0e-20
        variance[i] = 1.0 / math.sqrt(variance[i])
    cmvn = np.array([means, variance])
    return cmvn


def load_cmvn(cmvn_file: str, filetype: str):
    """load cmvn from file.

    Args:
        cmvn_file (str): cmvn path.
        filetype (str): file type, optional[npz, json, kaldi].

    Raises:
        ValueError: file type not support.

    Returns:
        Tuple[np.ndarray, np.ndarray]: mean, istd
    """
    assert filetype in ['npz', 'json', 'kaldi'], filetype
    filetype = filetype.lower()
    if filetype == "json":
        cmvn = _load_json_cmvn(cmvn_file)
    elif filetype == "kaldi":
        cmvn = _load_kaldi_cmvn(cmvn_file)
    elif filetype == "npz":
        eps = 1e-14
        npzfile = np.load(cmvn_file)
        mean = np.squeeze(npzfile["mean"])
        std = np.squeeze(npzfile["std"])
        istd = 1 / (std + eps)
        cmvn = [mean, istd]
    else:
        raise ValueError(f"cmvn file type no support: {filetype}")
    return cmvn[0], cmvn[1]


def convert_samples_to_float32(samples):
    """Convert sample type to float32.

    Audio sample type is usually integer or float-point.
    Integers will be scaled to [-1, 1] in float32.

    PCM16 -> PCM32
    """
    float32_samples = samples.astype('float32')
    if samples.dtype in np.sctypes['int']:
        bits = np.iinfo(samples.dtype).bits
        float32_samples *= (1. / 2**(bits - 1))
    elif samples.dtype in np.sctypes['float']:
        pass
    else:
        raise TypeError("Unsupported sample type: %s." % samples.dtype)
    return float32_samples


def convert_samples_from_float32(samples, dtype):
    """Convert sample type from float32 to dtype.

    Audio sample type is usually integer or float-point. For integer
    type, float32 will be rescaled from [-1, 1] to the maximum range
    supported by the integer type.

    PCM32 -> PCM16
    """
    dtype = np.dtype(dtype)
    output_samples = samples.copy()
    if dtype in np.sctypes['int']:
        bits = np.iinfo(dtype).bits
        output_samples *= (2**(bits - 1) / 1.)
        min_val = np.iinfo(dtype).min
        max_val = np.iinfo(dtype).max
        output_samples[output_samples > max_val] = max_val
        output_samples[output_samples < min_val] = min_val
    elif samples.dtype in np.sctypes['float']:
        min_val = np.finfo(dtype).min
        max_val = np.finfo(dtype).max
        output_samples[output_samples > max_val] = max_val
        output_samples[output_samples < min_val] = min_val
    else:
        raise TypeError("Unsupported sample type: %s." % samples.dtype)
    return output_samples.astype(dtype)


================================================
FILE: paddlespeech/s2t/io/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/io/batchfy.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import itertools

import numpy as np

from paddlespeech.s2t.utils.log import Log

__all__ = ["make_batchset"]

logger = Log(__name__).getlog()


def batchfy_by_seq(
        sorted_data,
        batch_size,
        max_length_in,
        max_length_out,
        min_batch_size=1,
        shortest_first=False,
        ikey="input",
        iaxis=0,
        okey="output",
        oaxis=0, ):
    """Make batch set from json dictionary

    :param List[(str, Dict[str, Any])] sorted_data: dictionary loaded from data.json
    :param int batch_size: batch size
    :param int max_length_in: maximum length of input to decide adaptive batch size
    :param int max_length_out: maximum length of output to decide adaptive batch size
    :param int min_batch_size: mininum batch size (for multi-gpu)
    :param bool shortest_first: Sort from batch with shortest samples
        to longest if true, otherwise reverse
    :param str ikey: key to access input
        (for ASR ikey="input", for TTS, MT ikey="output".)
    :param int iaxis: dimension to access input
        (for ASR, TTS iaxis=0, for MT iaxis="1".)
    :param str okey: key to access output
        (for ASR, MT okey="output". for TTS okey="input".)
    :param int oaxis: dimension to access output
        (for ASR, TTS, MT oaxis=0, reserved for future research, -1 means all axis.)
    :return: List[List[Tuple[str, dict]]] list of batches
    """
    if batch_size <= 0:
        raise ValueError(f"Invalid batch_size={batch_size}")

    # check #utts is more than min_batch_size
    if len(sorted_data) < min_batch_size:
        raise ValueError(
            f"#utts({len(sorted_data)}) is less than min_batch_size({min_batch_size})."
        )

    # make list of minibatches
    minibatches = []
    start = 0
    while True:
        _, info = sorted_data[start]
        ilen = int(info[ikey][iaxis]["shape"][0])
        olen = (int(info[okey][oaxis]["shape"][0]) if oaxis >= 0 else
                max(map(lambda x: int(x["shape"][0]), info[okey])))
        factor = max(int(ilen / max_length_in), int(olen / max_length_out))
        # change batchsize depending on the input and output length
        # if ilen = 1000 and max_length_in = 800
        # then b = batchsize / 2
        # and max(min_batches, .) avoids batchsize = 0
        bs = max(min_batch_size, int(batch_size / (1 + factor)))
        end = min(len(sorted_data), start + bs)
        minibatch = sorted_data[start:end]
        if shortest_first:
            minibatch.reverse()

        # check each batch is more than minimum batchsize
        if len(minibatch) < min_batch_size:
            mod = min_batch_size - len(minibatch) % min_batch_size
            additional_minibatch = [
                sorted_data[i] for i in np.random.randint(0, start, mod)
            ]
            if shortest_first:
                additional_minibatch.reverse()
            minibatch.extend(additional_minibatch)
        minibatches.append(minibatch)

        if end == len(sorted_data):
            break
        start = end

    # batch: List[List[Tuple[str, dict]]]
    return minibatches


def batchfy_by_bin(
        sorted_data,
        batch_bins,
        num_batches=0,
        min_batch_size=1,
        shortest_first=False,
        ikey="input",
        okey="output", ):
    """Make variably sized batch set, which maximizes

    the number of bins up to `batch_bins`.

    :param List[(str, Dict[str, Any])] sorted_data: dictionary loaded from data.json
    :param int batch_bins: Maximum frames of a batch
    :param int num_batches: # number of batches to use (for debug)
    :param int min_batch_size: minimum batch size (for multi-gpu)
    :param int test: Return only every `test` batches
    :param bool shortest_first: Sort from batch with shortest samples
        to longest if true, otherwise reverse

    :param str ikey: key to access input (for ASR ikey="input", for TTS ikey="output".)
    :param str okey: key to access output (for ASR okey="output". for TTS okey="input".)

    :return: List[Tuple[str, Dict[str, List[Dict[str, Any]]]] list of batches
    """
    if batch_bins <= 0:
        raise ValueError(f"invalid batch_bins={batch_bins}")
    length = len(sorted_data)
    idim = int(sorted_data[0][1][ikey][0]["shape"][1])
    odim = int(sorted_data[0][1][okey][0]["shape"][1])
    logger.info("# utts: " + str(len(sorted_data)))
    minibatches = []
    start = 0
    n = 0
    while True:
        # Dynamic batch size depending on size of samples
        b = 0
        next_size = 0
        max_olen = 0
        while next_size < batch_bins and (start + b) < length:
            ilen = int(sorted_data[start + b][1][ikey][0]["shape"][0]) * idim
            olen = int(sorted_data[start + b][1][okey][0]["shape"][0]) * odim
            if olen > max_olen:
                max_olen = olen
            next_size = (max_olen + ilen) * (b + 1)
            if next_size <= batch_bins:
                b += 1
            elif next_size == 0:
                raise ValueError(
                    f"Can't fit one sample in batch_bins ({batch_bins}): "
                    f"Please increase the value")
        end = min(length, start + max(min_batch_size, b))
        batch = sorted_data[start:end]
        if shortest_first:
            batch.reverse()
        minibatches.append(batch)
        # Check for min_batch_size and fixes the batches if needed
        i = -1
        while len(minibatches[i]) < min_batch_size:
            missing = min_batch_size - len(minibatches[i])
            if -i == len(minibatches):
                minibatches[i + 1].extend(minibatches[i])
                minibatches = minibatches[1:]
                break
            else:
                minibatches[i].extend(minibatches[i - 1][:missing])
                minibatches[i - 1] = minibatches[i - 1][missing:]
                i -= 1
        if end == length:
            break
        start = end
        n += 1
    if num_batches > 0:
        minibatches = minibatches[:num_batches]
    lengths = [len(x) for x in minibatches]
    logger.info(
        str(len(minibatches)) + " batches containing from " + str(min(lengths))
        + " to " + str(max(lengths)) + " samples " + "(avg " + str(
            int(np.mean(lengths))) + " samples).")
    return minibatches


def batchfy_by_frame(
        sorted_data,
        max_frames_in,
        max_frames_out,
        max_frames_inout,
        num_batches=0,
        min_batch_size=1,
        shortest_first=False,
        ikey="input",
        okey="output", ):
    """Make variable batch set, which maximizes the number of frames to max_batch_frame.

    :param List[(str, Dict[str, Any])] sorteddata: dictionary loaded from data.json
    :param int max_frames_in: Maximum input frames of a batch
    :param int max_frames_out: Maximum output frames of a batch
    :param int max_frames_inout: Maximum input+output frames of a batch
    :param int num_batches: # number of batches to use (for debug)
    :param int min_batch_size: minimum batch size (for multi-gpu)
    :param int test: Return only every `test` batches
    :param bool shortest_first: Sort from batch with shortest samples
        to longest if true, otherwise reverse

    :param str ikey: key to access input (for ASR ikey="input", for TTS ikey="output".)
    :param str okey: key to access output (for ASR okey="output". for TTS okey="input".)

    :return: List[Tuple[str, Dict[str, List[Dict[str, Any]]]] list of batches
    """
    if max_frames_in <= 0 and max_frames_out <= 0 and max_frames_inout <= 0:
        raise ValueError(
            "At least, one of `--batch-frames-in`, `--batch-frames-out` or "
            "`--batch-frames-inout` should be > 0")
    length = len(sorted_data)
    minibatches = []
    start = 0
    end = 0
    while end != length:
        # Dynamic batch size depending on size of samples
        b = 0
        max_olen = 0
        max_ilen = 0
        while (start + b) < length:
            ilen = int(sorted_data[start + b][1][ikey][0]["shape"][0])
            if ilen > max_frames_in and max_frames_in != 0:
                raise ValueError(
                    f"Can't fit one sample in --batch-frames-in ({max_frames_in}): "
                    f"Please increase the value")
            olen = int(sorted_data[start + b][1][okey][0]["shape"][0])
            if olen > max_frames_out and max_frames_out != 0:
                raise ValueError(
                    f"Can't fit one sample in --batch-frames-out ({max_frames_out}): "
                    f"Please increase the value")
            if ilen + olen > max_frames_inout and max_frames_inout != 0:
                raise ValueError(
                    f"Can't fit one sample in --batch-frames-out ({max_frames_inout}): "
                    f"Please increase the value")
            max_olen = max(max_olen, olen)
            max_ilen = max(max_ilen, ilen)
            in_ok = max_ilen * (b + 1) <= max_frames_in or max_frames_in == 0
            out_ok = max_olen * (b + 1) <= max_frames_out or max_frames_out == 0
            inout_ok = (max_ilen + max_olen) * (
                b + 1) <= max_frames_inout or max_frames_inout == 0
            if in_ok and out_ok and inout_ok:
                # add more seq in the minibatch
                b += 1
            else:
                # no more seq in the minibatch
                break
        end = min(length, start + b)
        batch = sorted_data[start:end]
        if shortest_first:
            batch.reverse()
        minibatches.append(batch)
        # Check for min_batch_size and fixes the batches if needed
        i = -1
        while len(minibatches[i]) < min_batch_size:
            missing = min_batch_size - len(minibatches[i])
            if -i == len(minibatches):
                minibatches[i + 1].extend(minibatches[i])
                minibatches = minibatches[1:]
                break
            else:
                minibatches[i].extend(minibatches[i - 1][:missing])
                minibatches[i - 1] = minibatches[i - 1][missing:]
                i -= 1
        start = end
    if num_batches > 0:
        minibatches = minibatches[:num_batches]
    lengths = [len(x) for x in minibatches]
    logger.info(
        str(len(minibatches)) + " batches containing from " + str(min(lengths))
        + " to " + str(max(lengths)) + " samples" + "(avg " + str(
            int(np.mean(lengths))) + " samples).")

    return minibatches


def batchfy_shuffle(data, batch_size, min_batch_size, num_batches,
                    shortest_first):
    import random

    logger.info("use shuffled batch.")
    sorted_data = random.sample(data.items(), len(data.items()))
    logger.info("# utts: " + str(len(sorted_data)))
    # make list of minibatches
    minibatches = []
    start = 0
    while True:
        end = min(len(sorted_data), start + batch_size)
        # check each batch is more than minimum batchsize
        minibatch = sorted_data[start:end]
        if shortest_first:
            minibatch.reverse()
        if len(minibatch) < min_batch_size:
            mod = min_batch_size - len(minibatch) % min_batch_size
            additional_minibatch = [
                sorted_data[i] for i in np.random.randint(0, start, mod)
            ]
            if shortest_first:
                additional_minibatch.reverse()
            minibatch.extend(additional_minibatch)
        minibatches.append(minibatch)
        if end == len(sorted_data):
            break
        start = end

    # for debugging
    if num_batches > 0:
        minibatches = minibatches[:num_batches]
        logger.info("# minibatches: " + str(len(minibatches)))
    return minibatches


BATCH_COUNT_CHOICES = ["auto", "seq", "bin", "frame"]
BATCH_SORT_KEY_CHOICES = ["input", "output", "shuffle"]


def make_batchset(
        data,
        batch_size=0,
        max_length_in=float("inf"),
        max_length_out=float("inf"),
        num_batches=0,
        min_batch_size=1,
        shortest_first=False,
        batch_sort_key="input",
        count="auto",
        batch_bins=0,
        batch_frames_in=0,
        batch_frames_out=0,
        batch_frames_inout=0,
        iaxis=0,
        oaxis=0, ):
    """Make batch set from json dictionary

    if utts have "category" value,

        >>> data = [{'category': 'A', 'input': ..., 'utt':'utt1'},
        ...         {'category': 'B', 'input': ..., 'utt':'utt2'},
        ...         {'category': 'B', 'input': ..., 'utt':'utt3'},
        ...         {'category': 'A', 'input': ..., 'utt':'utt4'}]
        >>> make_batchset(data, batchsize=2, ...)
        [[('utt1', ...), ('utt4', ...)], [('utt2', ...), ('utt3': ...)]]

    Note that if any utts doesn't have "category",
    perform as same as batchfy_by_{count}

    :param List[Dict[str, Any]] data: dictionary loaded from data.json
    :param int batch_size: maximum number of sequences in a minibatch.
    :param int batch_bins: maximum number of bins (frames x dim) in a minibatch.
    :param int batch_frames_in:  maximum number of input frames in a minibatch.
    :param int batch_frames_out: maximum number of output frames in a minibatch.
    :param int batch_frames_out: maximum number of input+output frames in a minibatch.
    :param str count: strategy to count maximum size of batch.
        For choices, see io.batchfy.BATCH_COUNT_CHOICES

    :param int max_length_in: maximum length of input to decide adaptive batch size
    :param int max_length_out: maximum length of output to decide adaptive batch size
    :param int num_batches: # number of batches to use (for debug)
    :param int min_batch_size: minimum batch size (for multi-gpu)
    :param bool shortest_first: Sort from batch with shortest samples
        to longest if true, otherwise reverse
    :param str batch_sort_key: how to sort data before creating minibatches
        ["input", "output", "shuffle"]
    :param bool swap_io: if True, use "input" as output and "output"
        as input in `data` dict
    :param bool mt: if True, use 0-axis of "output" as output and 1-axis of "output"
        as input in `data` dict
    :param int iaxis: dimension to access input
        (for ASR, TTS iaxis=0, for MT iaxis="1".)
    :param int oaxis: dimension to access output (for ASR, TTS, MT oaxis=0,
        reserved for future research, -1 means all axis.)
    :return: List[List[Tuple[str, dict]]] list of batches
    """
    # check args
    if count not in BATCH_COUNT_CHOICES:
        raise ValueError(
            f"arg 'count' ({count}) should be one of {BATCH_COUNT_CHOICES}")
    if batch_sort_key not in BATCH_SORT_KEY_CHOICES:
        raise ValueError(f"arg 'batch_sort_key' ({batch_sort_key}) should be "
                         f"one of {BATCH_SORT_KEY_CHOICES}")

    ikey = "input"
    okey = "output"
    batch_sort_axis = 0  # index of list 
    if count == "auto":
        if batch_size != 0:
            count = "seq"
        elif batch_bins != 0:
            count = "bin"
        elif batch_frames_in != 0 or batch_frames_out != 0 or batch_frames_inout != 0:
            count = "frame"
        else:
            raise ValueError(
                f"cannot detect `count` manually set one of {BATCH_COUNT_CHOICES}"
            )
        logger.info(f"count is auto detected as {count}")

    if count != "seq" and batch_sort_key == "shuffle":
        raise ValueError(
            "batch_sort_key=shuffle is only available if batch_count=seq")

    category2data = {}  # Dict[str, dict]
    for v in data:
        k = v['utt']
        category2data.setdefault(v.get("category"), {})[k] = v

    batches_list = []  # List[List[List[Tuple[str, dict]]]]
    for d in category2data.values():
        if batch_sort_key == "shuffle":
            batches = batchfy_shuffle(d, batch_size, min_batch_size,
                                      num_batches, shortest_first)
            batches_list.append(batches)
            continue

        # sort it by input lengths (long to short)
        sorted_data = sorted(
            d.items(),
            key=lambda data: float(data[1][batch_sort_key][batch_sort_axis]["shape"][0]),
            reverse=not shortest_first, )
        logger.info("# utts: " + str(len(sorted_data)))

        if count == "seq":
            batches = batchfy_by_seq(
                sorted_data,
                batch_size=batch_size,
                max_length_in=max_length_in,
                max_length_out=max_length_out,
                min_batch_size=min_batch_size,
                shortest_first=shortest_first,
                ikey=ikey,
                iaxis=iaxis,
                okey=okey,
                oaxis=oaxis, )
        if count == "bin":
            batches = batchfy_by_bin(
                sorted_data,
                batch_bins=batch_bins,
                min_batch_size=min_batch_size,
                shortest_first=shortest_first,
                ikey=ikey,
                okey=okey, )
        if count == "frame":
            batches = batchfy_by_frame(
                sorted_data,
                max_frames_in=batch_frames_in,
                max_frames_out=batch_frames_out,
                max_frames_inout=batch_frames_inout,
                min_batch_size=min_batch_size,
                shortest_first=shortest_first,
                ikey=ikey,
                okey=okey, )
        batches_list.append(batches)

    if len(batches_list) == 1:
        batches = batches_list[0]
    else:
        # Concat list. This way is faster than "sum(batch_list, [])"
        batches = list(itertools.chain(*batches_list))

    # for debugging
    if num_batches > 0:
        batches = batches[:num_batches]
    logger.info("# minibatches: " + str(len(batches)))

    # batch: List[List[Tuple[str, dict]]]
    return batches


================================================
FILE: paddlespeech/s2t/io/collator.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io

import numpy as np

from paddlespeech.s2t.frontend.augmentor.augmentation import AugmentationPipeline
from paddlespeech.s2t.frontend.featurizer.speech_featurizer import SpeechFeaturizer
from paddlespeech.s2t.frontend.normalizer import FeatureNormalizer
from paddlespeech.s2t.frontend.speech import SpeechSegment
from paddlespeech.s2t.frontend.utility import IGNORE_ID
from paddlespeech.s2t.frontend.utility import TarLocalData
from paddlespeech.s2t.io.reader import LoadInputsAndTargets
from paddlespeech.s2t.io.utility import pad_list
from paddlespeech.s2t.utils.log import Log

__all__ = ["SpeechCollator", "TripletSpeechCollator"]

logger = Log(__name__).getlog()


def _tokenids(text, keep_transcription_text):
    # for training text is token ids
    tokens = text  # token ids

    if keep_transcription_text:
        # text is string, convert to unicode ord
        assert isinstance(text, str), (type(text), text)
        tokens = [ord(t) for t in text]

    tokens = np.array(tokens, dtype=np.int64)
    return tokens


class SpeechCollatorBase():
    def __init__(
            self,
            aug_file,
            mean_std_filepath,
            vocab_filepath,
            spm_model_prefix,
            random_seed=0,
            unit_type="char",
            spectrum_type='linear',  # 'linear', 'mfcc', 'fbank'
            feat_dim=0,  # 'mfcc', 'fbank'
            delta_delta=False,  # 'mfcc', 'fbank'
            stride_ms=10.0,  # ms
            window_ms=20.0,  # ms
            n_fft=None,  # fft points
            max_freq=None,  # None for samplerate/2
            target_sample_rate=16000,  # target sample rate
            use_dB_normalization=True,
            target_dB=-20,
            dither=1.0,
            keep_transcription_text=True):
        """SpeechCollator Collator

        Args:
            unit_type(str): token unit type, e.g. char, word, spm
            vocab_filepath (str): vocab file path.
            mean_std_filepath (str): mean and std file path, which suffix is *.npy
            spm_model_prefix (str): spm model prefix, need if `unit_type` is spm.
            augmentation_config (str, optional): augmentation json str. Defaults to '{}'.
            stride_ms (float, optional): stride size in ms. Defaults to 10.0.
            window_ms (float, optional): window size in ms. Defaults to 20.0.
            n_fft (int, optional): fft points for rfft. Defaults to None.
            max_freq (int, optional): max cut freq. Defaults to None.
            target_sample_rate (int, optional): target sample rate which used for training. Defaults to 16000.
            spectrum_type (str, optional): 'linear', 'mfcc' or 'fbank'. Defaults to 'linear'.
            feat_dim (int, optional): audio feature dim, using by 'mfcc' or 'fbank'. Defaults to None.
            delta_delta (bool, optional): audio feature with delta-delta, using by 'fbank' or 'mfcc'. Defaults to False.
            use_dB_normalization (bool, optional): do dB normalization. Defaults to True.
            target_dB (int, optional): target dB. Defaults to -20.
            random_seed (int, optional): for random generator. Defaults to 0.
            keep_transcription_text (bool, optional): True, when not in training mode, will not do tokenizer; Defaults to False.
            if ``keep_transcription_text`` is False, text is token ids else is raw string.

        Do augmentations
        Padding audio features with zeros to make them have the same shape (or
        a user-defined shape) within one batch.
        """
        self.keep_transcription_text = keep_transcription_text
        self.train_mode = not keep_transcription_text

        self.stride_ms = stride_ms
        self.window_ms = window_ms
        self.feat_dim = feat_dim

        self.loader = LoadInputsAndTargets()

        # only for tar filetype
        self._local_data = TarLocalData(tar2info={}, tar2object={})

        self.augmentation = AugmentationPipeline(
            preprocess_conf=aug_file.read(), random_seed=random_seed)

        self._normalizer = FeatureNormalizer(
            mean_std_filepath) if mean_std_filepath else None

        self._speech_featurizer = SpeechFeaturizer(
            unit_type=unit_type,
            vocab_filepath=vocab_filepath,
            spm_model_prefix=spm_model_prefix,
            spectrum_type=spectrum_type,
            feat_dim=feat_dim,
            delta_delta=delta_delta,
            stride_ms=stride_ms,
            window_ms=window_ms,
            n_fft=n_fft,
            max_freq=max_freq,
            target_sample_rate=target_sample_rate,
            use_dB_normalization=use_dB_normalization,
            target_dB=target_dB,
            dither=dither)

        self.feature_size = self._speech_featurizer.audio_feature.feature_size
        self.text_feature = self._speech_featurizer.text_feature
        self.vocab_dict = self.text_feature.vocab_dict
        self.vocab_list = self.text_feature.vocab_list
        self.vocab_size = self.text_feature.vocab_size

    def process_utterance(self, audio_file, transcript):
        """Load, augment, featurize and normalize for speech data.

        :param audio_file: Filepath or file object of audio file.
        :type audio_file: str | file
        :param transcript: Transcription text.
        :type transcript: str
        :return: Tuple of audio feature tensor and data of transcription part,
                 where transcription part could be token ids or text.
        :rtype: tuple of (2darray, list)
        """
        filetype = self.loader.file_type(audio_file)

        if filetype != 'sound':
            spectrum = self.loader._get_from_loader(audio_file, filetype)
            feat_dim = spectrum.shape[1]
            assert feat_dim == self.feat_dim, f"expect feat dim {self.feat_dim}, but got {feat_dim}"

            if self.keep_transcription_text:
                transcript_part = transcript
            else:
                text_ids = self.text_feature.featurize(transcript)
                transcript_part = text_ids
        else:
            # read audio
            speech_segment = SpeechSegment.from_file(
                audio_file, transcript, infos=self._local_data)
            # audio augment
            self.augmentation.transform_audio(speech_segment)

            # extract speech feature
            spectrum, transcript_part = self._speech_featurizer.featurize(
                speech_segment, self.keep_transcription_text)
            # CMVN spectrum
            if self._normalizer:
                spectrum = self._normalizer.apply(spectrum)

        # spectrum augment
        spectrum = self.augmentation.transform_feature(spectrum)
        return spectrum, transcript_part

    def __call__(self, batch):
        """batch examples

        Args:
            batch (List[Dict]): batch is [dict(audio, text, ...)]
                audio (np.ndarray) shape (T, D)
                text (List[int] or str): shape (U,)

        Returns:
            tuple(utts, xs_pad, ilens, ys_pad, olens): batched data.
                utts: (B,)
                xs_pad : (B, Tmax, D)
                ilens: (B,)
                ys_pad : (B, Umax)
                olens: (B,)
        """
        audios = []
        audio_lens = []
        texts = []
        text_lens = []
        utts = []
        tids = []  # tokenids

        for idx, item in enumerate(batch):
            utts.append(item['utt'])

            audio = item['input'][0]['feat']
            text = item['output'][0]['text']
            audio, text = self.process_utterance(audio, text)

            audios.append(audio)  # [T, D]
            audio_lens.append(audio.shape[0])

            tokens = _tokenids(text, self.keep_transcription_text)
            texts.append(tokens)
            text_lens.append(tokens.shape[0])

        #[B, T, D]
        xs_pad = pad_list(audios, 0.0).astype(np.float32)
        ilens = np.array(audio_lens).astype(np.int64)
        ys_pad = pad_list(texts, IGNORE_ID).astype(np.int64)
        olens = np.array(text_lens).astype(np.int64)
        return utts, xs_pad, ilens, ys_pad, olens


class SpeechCollator(SpeechCollatorBase):
    @classmethod
    def from_config(cls, config):
        """Build a SpeechCollator object from a config.

        Args:
            config (yacs.config.CfgNode): configs object.

        Returns:
            SpeechCollator: collator object.
        """
        assert 'augmentation_config' in config
        assert 'keep_transcription_text' in config
        assert 'mean_std_filepath' in config
        assert 'vocab_filepath' in config
        assert 'spectrum_type' in config
        assert 'n_fft' in config
        assert config

        if isinstance(config.augmentation_config, (str, bytes)):
            if config.augmentation_config:
                aug_file = io.open(
                    config.augmentation_config, mode='r', encoding='utf8')
            else:
                aug_file = io.StringIO(initial_value='{}', newline='')
        else:
            aug_file = config.augmentation_config
            assert isinstance(aug_file, io.StringIO)

        speech_collator = cls(
            aug_file=aug_file,
            random_seed=0,
            mean_std_filepath=config.mean_std_filepath,
            unit_type=config.unit_type,
            vocab_filepath=config.vocab_filepath,
            spm_model_prefix=config.spm_model_prefix,
            spectrum_type=config.spectrum_type,
            feat_dim=config.feat_dim,
            delta_delta=config.delta_delta,
            stride_ms=config.stride_ms,
            window_ms=config.window_ms,
            n_fft=config.n_fft,
            max_freq=config.max_freq,
            target_sample_rate=config.target_sample_rate,
            use_dB_normalization=config.use_dB_normalization,
            target_dB=config.target_dB,
            dither=config.dither,
            keep_transcription_text=config.keep_transcription_text)
        return speech_collator


class TripletSpeechCollator(SpeechCollator):
    def process_utterance(self, audio_file, translation, transcript):
        """Load, augment, featurize and normalize for speech data.

        :param audio_file: Filepath or file object of audio file.
        :type audio_file: str | file
        :param translation: translation text.
        :type translation: str
        :return: Tuple of audio feature tensor and data of translation part,
                    where translation part could be token ids or text.
        :rtype: tuple of (2darray, list)
        """
        spectrum, translation_part = super().process_utterance(audio_file,
                                                               translation)
        transcript_part = self._speech_featurizer.text_featurize(
            transcript, self.keep_transcription_text)
        return spectrum, translation_part, transcript_part

    def __call__(self, batch):
        """batch examples

        Args:
            batch (List[Dict]): batch is [dict(audio, text, ...)]
                audio (np.ndarray) shape (T, D)
                text (List[int] or str): shape (U,)

        Returns:
            tuple(utts, xs_pad, ilens, ys_pad, olens): batched data.
                utts: (B,)
                xs_pad : (B, Tmax, D)
                ilens: (B,)
                ys_pad : [(B, Umax), (B, Umax)]
                olens: [(B,), (B,)]
        """
        utts = []
        audios = []
        audio_lens = []
        translation_text = []
        translation_text_lens = []
        transcription_text = []
        transcription_text_lens = []

        for idx, item in enumerate(batch):
            utts.append(item['utt'])

            audio = item['input'][0]['feat']
            translation = item['output'][0]['text']
            transcription = item['output'][1]['text']

            audio, translation, transcription = self.process_utterance(
                audio, translation, transcription)

            audios.append(audio)  # [T, D]
            audio_lens.append(audio.shape[0])

            tokens = [[], []]
            for idx, text in enumerate([translation, transcription]):
                tokens[idx] = _tokenids(text, self.keep_transcription_text)

            translation_text.append(tokens[0])
            translation_text_lens.append(tokens[0].shape[0])
            transcription_text.append(tokens[1])
            transcription_text_lens.append(tokens[1].shape[0])

        xs_pad = pad_list(audios, 0.0).astype(np.float32)  #[B, T, D]
        ilens = np.array(audio_lens).astype(np.int64)

        padded_translation = pad_list(translation_text,
                                      IGNORE_ID).astype(np.int64)
        translation_lens = np.array(translation_text_lens).astype(np.int64)

        padded_transcription = pad_list(transcription_text,
                                        IGNORE_ID).astype(np.int64)
        transcription_lens = np.array(transcription_text_lens).astype(np.int64)

        ys_pad = (padded_translation, padded_transcription)
        olens = (translation_lens, transcription_lens)
        return utts, xs_pad, ilens, ys_pad, olens


================================================
FILE: paddlespeech/s2t/io/converter.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import numpy as np

from paddlespeech.s2t.io.utility import pad_list
from paddlespeech.s2t.utils.log import Log

__all__ = ["CustomConverter"]

logger = Log(__name__).getlog()


class CustomConverter():
    """Custom batch converter.

    Args:
        subsampling_factor (int): The subsampling factor.
        dtype (np.dtype): Data type to convert.
        
    """

    def __init__(self,
                 subsampling_factor=1,
                 dtype=np.float32,
                 load_aux_input=False,
                 load_aux_output=False):
        """Construct a CustomConverter object."""
        self.subsampling_factor = subsampling_factor
        self.ignore_id = -1
        self.dtype = dtype
        self.load_aux_input = load_aux_input
        self.load_aux_output = load_aux_output

    def __call__(self, batch):
        """Transform a batch and send it to a device.

        Args:
            batch (list): The batch to transform.

        Returns:
            tuple(np.ndarray, nn.ndarray, nn.ndarray)

        """
        # batch should be located in list
        assert len(batch) == 1
        data, utts = batch[0]
        xs_data, ys_data = [], []
        for ud in data:
            if ud[0].ndim > 1:
                # speech data (input): (speech_len, feat_dim)
                xs_data.append(ud)
            else:
                # text data (output): (text_len, )
                ys_data.append(ud)

        assert xs_data[0][
            0] is not None, "please check Reader and Augmentation impl."

        xs_pad, ilens = [], []
        for xs in xs_data:
            # perform subsampling
            if self.subsampling_factor > 1:
                xs = [x[::self.subsampling_factor, :] for x in xs]

            # get batch of lengths of input sequences
            ilens.append(np.array([x.shape[0] for x in xs]))

            # perform padding and convert to tensor
            # currently only support real number
            xs_pad.append(pad_list(xs, 0).astype(self.dtype))

            if not self.load_aux_input:
                xs_pad, ilens = xs_pad[0], ilens[0]
                break

        # NOTE: this is for multi-output (e.g., speech translation)
        ys_pad, olens = [], []

        for ys in ys_data:
            ys_pad.append(
                pad_list([
                    np.array(y[0][:]) if isinstance(y, tuple) else y for y in ys
                ], self.ignore_id))

            olens.append(
                np.array([
                    y[0].shape[0] if isinstance(y, tuple) else y.shape[0]
                    for y in ys
                ]))

            if not self.load_aux_output:
                ys_pad, olens = ys_pad[0], olens[0]
                break

        return utts, xs_pad, ilens, ys_pad, olens


================================================
FILE: paddlespeech/s2t/io/dataloader.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from typing import Dict
from typing import List
from typing import Text

import jsonlines
import numpy as np
import paddle
from paddle.io import BatchSampler
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from yacs.config import CfgNode

import paddlespeech.audio.streamdata as streamdata
from paddlespeech.audio.text.text_featurizer import TextFeaturizer
from paddlespeech.s2t.io.batchfy import make_batchset
from paddlespeech.s2t.io.converter import CustomConverter
from paddlespeech.s2t.io.dataset import TransformDataset
from paddlespeech.s2t.io.reader import LoadInputsAndTargets
from paddlespeech.s2t.utils.log import Log

__all__ = ["BatchDataLoader", "StreamDataLoader"]

logger = Log(__name__).getlog()


def feat_dim_and_vocab_size(data_json: List[Dict[Text, Any]],
                            mode: Text="asr",
                            iaxis=0,
                            oaxis=0):
    if mode == 'asr':
        feat_dim = data_json[0]['input'][oaxis]['shape'][1]
        vocab_size = data_json[0]['output'][oaxis]['shape'][1]
    else:
        raise ValueError(f"{mode} mode not support!")
    return feat_dim, vocab_size


def batch_collate(x):
    """de-minibatch, since user compose batch.

    Args:
        x (List[Tuple]): [(utts, xs, ilens, ys, olens)]

    Returns:
        Tuple: (utts, xs, ilens, ys, olens)
    """
    return x[0]


def read_preprocess_cfg(preprocess_conf_file):
    augment_conf = dict()
    preprocess_cfg = CfgNode(new_allowed=True)
    preprocess_cfg.merge_from_file(preprocess_conf_file)
    for idx, process in enumerate(preprocess_cfg["process"]):
        opts = dict(process)
        process_type = opts.pop("type")
        if process_type == 'time_warp':
            augment_conf['max_w'] = process['max_time_warp']
            augment_conf['w_inplace'] = process['inplace']
            augment_conf['w_mode'] = process['mode']
        if process_type == 'freq_mask':
            augment_conf['max_f'] = process['F']
            augment_conf['num_f_mask'] = process['n_mask']
            augment_conf['f_inplace'] = process['inplace']
            augment_conf['f_replace_with_zero'] = process['replace_with_zero']
        if process_type == 'time_mask':
            augment_conf['max_t'] = process['T']
            augment_conf['num_t_mask'] = process['n_mask']
            augment_conf['t_inplace'] = process['inplace']
            augment_conf['t_replace_with_zero'] = process['replace_with_zero']
    return augment_conf


class StreamDataLoader():
    def __init__(self,
                 manifest_file: str,
                 train_mode: bool,
                 unit_type: str='char',
                 batch_size: int=0,
                 preprocess_conf=None,
                 num_mel_bins=80,
                 frame_length=25,
                 frame_shift=10,
                 dither=0.0,
                 minlen_in: float=0.0,
                 maxlen_in: float=float('inf'),
                 minlen_out: float=0.0,
                 maxlen_out: float=float('inf'),
                 resample_rate: int=16000,
                 shuffle_size: int=10000,
                 sort_size: int=1000,
                 n_iter_processes: int=1,
                 prefetch_factor: int=2,
                 dist_sampler: bool=False,
                 cmvn_file="data/mean_std.json",
                 vocab_filepath='data/lang_char/vocab.txt'):
        self.manifest_file = manifest_file
        self.train_model = train_mode
        self.batch_size = batch_size
        self.prefetch_factor = prefetch_factor
        self.dist_sampler = dist_sampler
        self.n_iter_processes = n_iter_processes

        text_featurizer = TextFeaturizer(unit_type, vocab_filepath)
        symbol_table = text_featurizer.vocab_dict
        self.feat_dim = num_mel_bins
        self.vocab_size = text_featurizer.vocab_size

        augment_conf = read_preprocess_cfg(preprocess_conf)

        # The list of shard
        shardlist = []
        with open(manifest_file, "r") as f:
            for line in f.readlines():
                shardlist.append(line.strip())
        world_size = 1
        try:
            world_size = paddle.distributed.get_world_size()
        except Exception as e:
            logger.warninig(e)
            logger.warninig(
                "can not get world_size using paddle.distributed.get_world_size(), use world_size=1"
            )
        assert len(shardlist) >= world_size, \
            "the length of shard list should >= number of gpus/xpus/..."

        update_n_iter_processes = int(
            max(min(len(shardlist) / world_size - 1, self.n_iter_processes), 0))
        logger.info(f"update_n_iter_processes {update_n_iter_processes}")
        if update_n_iter_processes != self.n_iter_processes:
            self.n_iter_processes = update_n_iter_processes
            logger.info(f"change nun_workers to {self.n_iter_processes}")

        if self.dist_sampler:
            base_dataset = streamdata.DataPipeline(
                streamdata.SimpleShardList(shardlist), streamdata.split_by_node
                if train_mode else streamdata.placeholder(),
                streamdata.split_by_worker,
                streamdata.tarfile_to_samples(streamdata.reraise_exception))
        else:
            base_dataset = streamdata.DataPipeline(
                streamdata.SimpleShardList(shardlist),
                streamdata.split_by_worker,
                streamdata.tarfile_to_samples(streamdata.reraise_exception))

        self.dataset = base_dataset.append_list(
            streamdata.audio_tokenize(symbol_table),
            streamdata.audio_data_filter(
                frame_shift=frame_shift,
                max_length=maxlen_in,
                min_length=minlen_in,
                token_max_length=maxlen_out,
                token_min_length=minlen_out),
            streamdata.audio_resample(resample_rate=resample_rate),
            streamdata.audio_compute_fbank(
                num_mel_bins=num_mel_bins,
                frame_length=frame_length,
                frame_shift=frame_shift,
                dither=dither),
            streamdata.audio_spec_aug(**augment_conf)
            if train_mode else streamdata.placeholder(
            ),  # num_t_mask=2, num_f_mask=2, max_t=40, max_f=30, max_w=80)
            streamdata.shuffle(shuffle_size),
            streamdata.sort(sort_size=sort_size),
            streamdata.batched(batch_size),
            streamdata.audio_padding(),
            streamdata.audio_cmvn(cmvn_file))

        if paddle.__version__ >= '2.3.2':
            self.loader = streamdata.WebLoader(
                self.dataset,
                num_workers=self.n_iter_processes,
                prefetch_factor=self.prefetch_factor,
                batch_size=None)
        else:
            self.loader = streamdata.WebLoader(
                self.dataset,
                num_workers=self.n_iter_processes,
                batch_size=None)

    def __iter__(self):
        return self.loader.__iter__()

    def __call__(self):
        return self.__iter__()

    def __len__(self):
        logger.info(
            "Stream dataloader does not support calculate the length of the dataset"
        )
        return -1


class BatchDataLoader():
    def __init__(self,
                 json_file: str,
                 train_mode: bool,
                 sortagrad: int=0,
                 batch_size: int=0,
                 maxlen_in: float=float('inf'),
                 maxlen_out: float=float('inf'),
                 minibatches: int=0,
                 mini_batch_size: int=1,
                 batch_count: str='auto',
                 batch_bins: int=0,
                 batch_frames_in: int=0,
                 batch_frames_out: int=0,
                 batch_frames_inout: int=0,
                 preprocess_conf=None,
                 n_iter_processes: int=1,
                 subsampling_factor: int=1,
                 load_aux_input: bool=False,
                 load_aux_output: bool=False,
                 num_encs: int=1,
                 dist_sampler: bool=False,
                 shortest_first: bool=False):
        self.json_file = json_file
        self.train_mode = train_mode
        self.use_sortagrad = sortagrad == -1 or sortagrad > 0
        self.batch_size = batch_size
        self.maxlen_in = maxlen_in
        self.maxlen_out = maxlen_out
        self.batch_count = batch_count
        self.batch_bins = batch_bins
        self.batch_frames_in = batch_frames_in
        self.batch_frames_out = batch_frames_out
        self.batch_frames_inout = batch_frames_inout
        self.subsampling_factor = subsampling_factor
        self.num_encs = num_encs
        self.preprocess_conf = preprocess_conf
        self.n_iter_processes = n_iter_processes
        self.load_aux_input = load_aux_input
        self.load_aux_output = load_aux_output
        self.dist_sampler = dist_sampler
        self.shortest_first = shortest_first

        # read json data
        with jsonlines.open(json_file, 'r') as reader:
            self.data_json = list(reader)

        self.feat_dim, self.vocab_size = feat_dim_and_vocab_size(
            self.data_json, mode='asr')

        # make minibatch list (variable length)
        self.minibaches = make_batchset(
            self.data_json,
            batch_size,
            maxlen_in,
            maxlen_out,
            minibatches,  # for debug
            min_batch_size=mini_batch_size,
            shortest_first=self.shortest_first or self.use_sortagrad,
            count=batch_count,
            batch_bins=batch_bins,
            batch_frames_in=batch_frames_in,
            batch_frames_out=batch_frames_out,
            batch_frames_inout=batch_frames_inout,
            iaxis=0,
            oaxis=0, )

        # data reader
        self.reader = LoadInputsAndTargets(
            mode="asr",
            load_output=True,
            preprocess_conf=preprocess_conf,
            preprocess_args={"train":
                             train_mode},  # Switch the mode of preprocessing
        )

        # Setup a converter
        if num_encs == 1:
            self.converter = CustomConverter(
                subsampling_factor=subsampling_factor,
                dtype=np.float32,
                load_aux_input=load_aux_input,
                load_aux_output=load_aux_output)
        else:
            assert NotImplementedError("not impl CustomConverterMulEnc.")

        # hack to make batchsize argument as 1
        # actual bathsize is included in a list
        # default collate function converts numpy array to paddle tensor
        # we used an empty collate function instead which returns list
        self.dataset = TransformDataset(self.minibaches, self.converter,
                                        self.reader)

        if self.dist_sampler:
            self.batch_sampler = DistributedBatchSampler(
                dataset=self.dataset,
                batch_size=1,
                shuffle=not self.use_sortagrad if self.train_mode else False,
                drop_last=False, )
        else:
            self.batch_sampler = BatchSampler(
                dataset=self.dataset,
                batch_size=1,
                shuffle=not self.use_sortagrad if self.train_mode else False,
                drop_last=False, )

        self.dataloader = DataLoader(
            dataset=self.dataset,
            batch_sampler=self.batch_sampler,
            collate_fn=batch_collate,
            num_workers=self.n_iter_processes, )

    def __len__(self):
        return len(self.dataloader)

    def __iter__(self):
        return self.dataloader.__iter__()

    def __call__(self):
        return self.__iter__()

    def __repr__(self):
        echo = f"<{self.__class__.__module__}.{self.__class__.__name__} object at {hex(id(self))}> "
        echo += f"train_mode: {self.train_mode}, "
        echo += f"sortagrad: {self.use_sortagrad}, "
        echo += f"batch_size: {self.batch_size}, "
        echo += f"maxlen_in: {self.maxlen_in}, "
        echo += f"maxlen_out: {self.maxlen_out}, "
        echo += f"batch_count: {self.batch_count}, "
        echo += f"batch_bins: {self.batch_bins}, "
        echo += f"batch_frames_in: {self.batch_frames_in}, "
        echo += f"batch_frames_out: {self.batch_frames_out}, "
        echo += f"batch_frames_inout: {self.batch_frames_inout}, "
        echo += f"subsampling_factor: {self.subsampling_factor}, "
        echo += f"num_encs: {self.num_encs}, "
        echo += f"num_workers: {self.n_iter_processes}, "
        echo += f"load_aux_input: {self.load_aux_input}, "
        echo += f"load_aux_output: {self.load_aux_output}, "
        echo += f"dist_sampler: {self.dist_sampler}, "
        echo += f"shortest_first: {self.shortest_first}, "
        echo += f"file: {self.json_file}"
        return echo


class DataLoaderFactory():
    @staticmethod
    def get_dataloader(mode: str, config, args):
        config = config.clone()
        use_streamdata = config.get("use_stream_data", False)
        if use_streamdata:
            if mode == 'train':
                config['manifest'] = config.train_manifest
                config['train_mode'] = True
            elif mode == 'valid':
                config['manifest'] = config.dev_manifest
                config['train_mode'] = False
            elif mode == 'test' or mode == 'align':
                config['manifest'] = config.test_manifest
                config['train_mode'] = False
                config['dither'] = 0.0
                config['minlen_in'] = 0.0
                config['maxlen_in'] = float('inf')
                config['minlen_out'] = 0
                config['maxlen_out'] = float('inf')
                config['dist_sampler'] = False
            else:
                raise KeyError(
                    "not valid mode type!!, please input one of 'train, valid, test, align'"
                )
            return StreamDataLoader(
                manifest_file=config.manifest,
                train_mode=config.train_mode,
                unit_type=config.unit_type,
                preprocess_conf=config.preprocess_config,
                batch_size=config.batch_size,
                num_mel_bins=config.feat_dim,
                frame_length=config.window_ms,
                frame_shift=config.stride_ms,
                dither=config.dither,
                minlen_in=config.minlen_in,
                maxlen_in=config.maxlen_in,
                minlen_out=config.minlen_out,
                maxlen_out=config.maxlen_out,
                resample_rate=config.resample_rate,
                shuffle_size=config.shuffle_size,
                sort_size=config.sort_size,
                n_iter_processes=config.num_workers,
                prefetch_factor=config.prefetch_factor,
                dist_sampler=config.dist_sampler,
                cmvn_file=config.cmvn_file,
                vocab_filepath=config.vocab_filepath, )
        else:
            if mode == 'train':
                config['manifest'] = config.train_manifest
                config['train_mode'] = True
                config['mini_batch_size'] = args.ngpu
                config['subsampling_factor'] = 1
                config['num_encs'] = 1
                config['shortest_first'] = False
                config['minibatches'] = 0
                config['batch_count'] = 'auto'
                config['batch_bins'] = 0
                config['batch_frames_in'] = 0
                config['batch_frames_out'] = 0
                config['batch_frames_inout'] = 0
            elif mode == 'valid':
                config['manifest'] = config.dev_manifest
                config['train_mode'] = False
                config['sortagrad'] = False
                config['maxlen_in'] = float('inf')
                config['maxlen_out'] = float('inf')
                config['minibatches'] = 0
                config['mini_batch_size'] = args.ngpu
                config['batch_count'] = 'auto'
                config['batch_bins'] = 0
                config['batch_frames_in'] = 0
                config['batch_frames_out'] = 0
                config['batch_frames_inout'] = 0
                config['subsampling_factor'] = 1
                config['num_encs'] = 1
                config['shortest_first'] = False
            elif mode == 'test' or mode == 'align':
                config['manifest'] = config.test_manifest
                config['train_mode'] = False
                config['sortagrad'] = False
                config['batch_size'] = config.get('decode', dict()).get(
                    'decode_batch_size', 1)
                config['maxlen_in'] = float('inf')
                config['maxlen_out'] = float('inf')
                config['minibatches'] = 0
                config['mini_batch_size'] = 1
                config['batch_count'] = 'auto'
                config['batch_bins'] = 0
                config['batch_frames_in'] = 0
                config['batch_frames_out'] = 0
                config['batch_frames_inout'] = 0
                config['num_workers'] = 1
                config['subsampling_factor'] = 1
                config['num_encs'] = 1
                config['dist_sampler'] = False
                config['shortest_first'] = False
            else:
                raise KeyError(
                    "not valid mode type!!, please input one of 'train, valid, test, align'"
                )

            return BatchDataLoader(
                json_file=config.manifest,
                train_mode=config.train_mode,
                sortagrad=config.sortagrad,
                batch_size=config.batch_size,
                maxlen_in=config.maxlen_in,
                maxlen_out=config.maxlen_out,
                minibatches=config.minibatches,
                mini_batch_size=config.mini_batch_size,
                batch_count=config.batch_count,
                batch_bins=config.batch_bins,
                batch_frames_in=config.batch_frames_in,
                batch_frames_out=config.batch_frames_out,
                batch_frames_inout=config.batch_frames_inout,
                preprocess_conf=config.preprocess_config,
                n_iter_processes=config.num_workers,
                subsampling_factor=config.subsampling_factor,
                load_aux_output=config.get('load_transcript', None),
                num_encs=config.num_encs,
                dist_sampler=config.get('dist_sampler', None),
                shortest_first=config.shortest_first)


================================================
FILE: paddlespeech/s2t/io/dataset.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2021 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
# Modified from wenet(https://github.com/wenet-e2e/wenet)
import jsonlines
from paddle.io import Dataset

from paddlespeech.s2t.frontend.utility import read_manifest
from paddlespeech.s2t.utils.log import Log

__all__ = ["ManifestDataset", "TransformDataset"]

logger = Log(__name__).getlog()


class ManifestDataset(Dataset):
    @classmethod
    def from_config(cls, config):
        """Build a ManifestDataset object from a config.

        Args:
            config (yacs.config.CfgNode): configs object.

        Returns:
            ManifestDataset: dataet object.
        """
        assert 'manifest' in config
        assert config.manifest

        dataset = cls(
            manifest_path=config.manifest,
            max_input_len=config.max_input_len,
            min_input_len=config.min_input_len,
            max_output_len=config.max_output_len,
            min_output_len=config.min_output_len,
            max_output_input_ratio=config.max_output_input_ratio,
            min_output_input_ratio=config.min_output_input_ratio, )
        return dataset

    def __init__(self,
                 manifest_path,
                 max_input_len=float('inf'),
                 min_input_len=0.0,
                 max_output_len=float('inf'),
                 min_output_len=0.0,
                 max_output_input_ratio=float('inf'),
                 min_output_input_ratio=0.0):
        """Manifest Dataset

        Args:
            manifest_path (str): manifest josn file path
            max_input_len ([type], optional): maximum output seq length,
                in seconds for raw wav, in frame numbers for feature data. Defaults to float('inf').
            min_input_len (float, optional): minimum input seq length,
                in seconds for raw wav, in frame numbers for feature data. Defaults to 0.0.
            max_output_len (float, optional): maximum input seq length,
                in modeling units. Defaults to 500.0.
            min_output_len (float, optional): minimum input seq length,
                in modeling units. Defaults to 0.0.
            max_output_input_ratio (float, optional): maximum output seq length/output seq length ratio.
                Defaults to 10.0.
            min_output_input_ratio (float, optional): minimum output seq length/output seq length ratio.
                Defaults to 0.05.

        """
        super().__init__()

        # read manifest
        self._manifest = read_manifest(
            manifest_path=manifest_path,
            max_input_len=max_input_len,
            min_input_len=min_input_len,
            max_output_len=max_output_len,
            min_output_len=min_output_len,
            max_output_input_ratio=max_output_input_ratio,
            min_output_input_ratio=min_output_input_ratio)
        self._manifest.sort(key=lambda x: x["input"][0]["shape"][0])

    def __len__(self):
        return len(self._manifest)

    def __getitem__(self, idx):
        return self._manifest[idx]


class TransformDataset(Dataset):
    """Transform Dataset.

    Args:
        data: list object from make_batchset
        converter: batch function
        reader: read data
    """

    def __init__(self, data, converter, reader):
        """Init function."""
        super().__init__()
        self.data = data
        self.converter = converter
        self.reader = reader

    def __len__(self):
        """Len function."""
        return len(self.data)

    def __getitem__(self, idx):
        """[] operator."""
        return self.converter([self.reader(self.data[idx], return_uttid=True)])


class AudioDataset(Dataset):
    def __init__(self,
                 data_file,
                 max_length=10240,
                 min_length=0,
                 token_max_length=200,
                 token_min_length=1,
                 batch_type='static',
                 batch_size=1,
                 max_frames_in_batch=0,
                 sort=True,
                 raw_wav=True,
                 stride_ms=10):
        """Dataset for loading audio data.
        Attributes::
            data_file: input data file
                Plain text data file, each line contains following 7 fields,
                which is split by '\t':
                    utt:utt1
                    feat:tmp/data/file1.wav or feat:tmp/data/fbank.ark:30
                    feat_shape: 4.95(in seconds) or feat_shape:495,80(495 is in frames)
                    text:i love you
                    token: i <space> l o v e <space> y o u
                    tokenid: int id of this token
                    token_shape: M,N    # M is the number of token, N is vocab size
            max_length: drop utterance which is greater than max_length(10ms), unit 10ms.
            min_length: drop utterance which is less than min_length(10ms), unit 10ms.
            token_max_length: drop utterance which is greater than token_max_length,
                especially when use char unit for english modeling
            token_min_length: drop utterance which is less than token_max_length
            batch_type: static or dynamic, see max_frames_in_batch(dynamic)
            batch_size: number of utterances in a batch,
               it's for static batch size.
            max_frames_in_batch: max feature frames in a batch,
               when batch_type is dynamic, it's for dynamic batch size.
               Then batch_size is ignored, we will keep filling the
               batch until the total frames in batch up to max_frames_in_batch.
            sort: whether to sort all data, so the utterance with the same
               length could be filled in a same batch.
            raw_wav: use raw wave or extracted featute.
                if raw wave is used, dynamic waveform-level augmentation could be used
                and the feature is extracted by torchaudio.
                if extracted featute(e.g. by kaldi) is used, only feature-level
                augmentation such as specaug could be used.
        """
        assert batch_type in ['static', 'dynamic']
        # read manifest
        with jsonlines.open(data_file, 'r') as reader:
            data = list(reader)
        if sort:
            data = sorted(data, key=lambda x: x["feat_shape"][0])
        if raw_wav:
            path_suffix = data[0]['feat'].split(':')[0].splitext()[-1]
            assert path_suffix not in ('.ark', '.scp')
            # m second to n frame
            data = list(
                map(lambda x: (float(x['feat_shape'][0]) * 1000 / stride_ms),
                    data))

        self.input_dim = data[0]['feat_shape'][1]
        self.output_dim = data[0]['token_shape'][1]

        valid_data = []
        for i in range(len(data)):
            length = data[i]['feat_shape'][0]
            token_length = data[i]['token_shape'][0]
            # remove too lang or too short utt for both input and output
            # to prevent from out of memory
            if length > max_length or length < min_length:
                pass
            elif token_length > token_max_length or token_length < token_min_length:
                pass
            else:
                valid_data.append(data[i])
        logger.info(f"raw dataset len: {len(data)}")
        data = valid_data
        num_data = len(data)
        logger.info(f"dataset len after filter: {num_data}")

        self.minibatch = []
        # Dynamic batch size
        if batch_type == 'dynamic':
            assert (max_frames_in_batch > 0)
            self.minibatch.append([])
            num_frames_in_batch = 0
            for i in range(num_data):
                length = data[i]['feat_shape'][0]
                num_frames_in_batch += length
                if num_frames_in_batch > max_frames_in_batch:
                    self.minibatch.append([])
                    num_frames_in_batch = length
                self.minibatch[-1].append(data[i])
        # Static batch size
        else:
            cur = 0
            while cur < num_data:
                end = min(cur + batch_size, num_data)
                item = []
                for i in range(cur, end):
                    item.append(data[i])
                self.minibatch.append(item)
                cur = end

    def __len__(self):
        """number of example(batch)"""
        return len(self.minibatch)

    def __getitem__(self, idx):
        """batch example of idx"""
        return self.minibatch[idx]


================================================
FILE: paddlespeech/s2t/io/reader.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from collections import OrderedDict

import io
import os
import kaldiio
import numpy as np
import soundfile
import h5py

from .utility import feat_type
from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.s2t.utils.log import Log
# from paddlespeech.s2t.frontend.augmentor.augmentation import AugmentationPipeline as Transformation

__all__ = ["LoadInputsAndTargets"]

logger = Log(__name__).getlog()


class LoadInputsAndTargets():
    """Create a mini-batch from a list of dicts

    >>> batch = [('utt1',
    ...           dict(input=[dict(feat='some.ark:123',
    ...                            filetype='mat',
    ...                            name='input1',
    ...                            shape=[100, 80])],
    ...                output=[dict(tokenid='1 2 3 4',
    ...                             name='target1',
    ...                             shape=[4, 31])]]))
    >>> l = LoadInputsAndTargets()
    >>> feat, target = l(batch)

    :param: str mode: Specify the task mode, "asr" or "tts"
    :param: str preprocess_conf: The path of a json file for pre-processing
    :param: bool load_input: If False, not to load the input data
    :param: bool load_output: If False, not to load the output data
    :param: bool sort_in_input_length: Sort the mini-batch in descending order
        of the input length
    :param: bool use_speaker_embedding: Used for tts mode only
    :param: bool use_second_target: Used for tts mode only
    :param: dict preprocess_args: Set some optional arguments for preprocessing
    :param: Optional[dict] preprocess_args: Used for tts mode only
    """

    def __init__(
            self,
            mode="asr",
            preprocess_conf=None,
            load_input=True,
            load_output=True,
            sort_in_input_length=True,
            preprocess_args=None,
            keep_all_data_on_mem=False, ):
        self._loaders = {}

        if mode not in ["asr"]:
            raise ValueError("Only asr are allowed: mode={}".format(mode))

        if preprocess_conf:
            self.preprocessing = Transformation(preprocess_conf)
            logger.warning(
                "[Experimental feature] Some preprocessing will be done "
                "for the mini-batch creation using {}".format(
                    self.preprocessing))
        else:
            # If conf doesn't exist, this function don't touch anything.
            self.preprocessing = None

        self.mode = mode
        self.load_output = load_output
        self.load_input = load_input
        self.sort_in_input_length = sort_in_input_length
        if preprocess_args:
            assert isinstance(preprocess_args, dict), type(preprocess_args)
            self.preprocess_args = dict(preprocess_args)
        else:
            self.preprocess_args = {}
        self.keep_all_data_on_mem = keep_all_data_on_mem

    def __call__(self, batch, return_uttid=False):
        """Function to load inputs and targets from list of dicts

        :param List[Tuple[str, dict]] batch: list of dict which is subset of
            loaded data.json
        :param bool return_uttid: return utterance ID information for visualization
        :return: list of input token id sequences [(L_1), (L_2), ..., (L_B)]
        :return: list of input feature sequences
            [(T_1, D), (T_2, D), ..., (T_B, D)]
        :rtype: list of float ndarray
        :return: list of target token id sequences [(L_1), (L_2), ..., (L_B)]
        :rtype: list of int ndarray

        """
        x_feats_dict = OrderedDict()  # OrderedDict[str, List[np.ndarray]]
        y_feats_dict = OrderedDict()  # OrderedDict[str, List[np.ndarray]]
        uttid_list = []  # List[str]

        for uttid, info in batch:
            uttid_list.append(uttid)

            if self.load_input:
                # Note(kamo): This for-loop is for multiple inputs
                for idx, inp in enumerate(info["input"]):
                    # {"input":
                    #  [{"feat": "some/path.h5:F01_050C0101_PED_REAL",
                    #    "filetype": "hdf5",
                    #    "name": "input1", ...}], ...}
                    x = self._get_from_loader(
                        filepath=inp["feat"],
                        filetype=inp.get("filetype", "mat"))
                    x_feats_dict.setdefault(inp["name"], []).append(x)

            if self.load_output:
                for idx, inp in enumerate(info["output"]):
                    if "tokenid" in inp:
                        # ======= Legacy format for output =======
                        # {"output": [{"tokenid": "1 2 3 4"}])
                        x = np.fromiter(
                            map(int, inp["tokenid"].split()), dtype=np.int64)
                    else:
                        # ======= New format =======
                        # {"input":
                        #  [{"feat": "some/path.h5:F01_050C0101_PED_REAL",
                        #    "filetype": "hdf5",
                        #    "name": "target1", ...}], ...}
                        x = self._get_from_loader(
                            filepath=inp["feat"],
                            filetype=inp.get("filetype", "mat"))

                    y_feats_dict.setdefault(inp["name"], []).append(x)

        if self.mode == "asr":
            return_batch, uttid_list = self._create_batch_asr(
                x_feats_dict, y_feats_dict, uttid_list)
        else:
            raise NotImplementedError(self.mode)

        if self.preprocessing is not None:
            # Apply pre-processing all input features
            for x_name in return_batch.keys():
                if x_name.startswith("input"):
                    return_batch[x_name] = self.preprocessing(
                        return_batch[x_name], uttid_list,
                        **self.preprocess_args)

        if return_uttid:
            return tuple(return_batch.values()), uttid_list

        # Doesn't return the names now.
        return tuple(return_batch.values())

    def _create_batch_asr(self, x_feats_dict, y_feats_dict, uttid_list):
        """Create a OrderedDict for the mini-batch

        :param OrderedDict x_feats_dict:
            e.g. {"input1": [ndarray, ndarray, ...],
                  "input2": [ndarray, ndarray, ...]}
        :param OrderedDict y_feats_dict:
            e.g. {"target1": [ndarray, ndarray, ...],
                  "target2": [ndarray, ndarray, ...]}
        :param: List[str] uttid_list:
            Give uttid_list to sort in the same order as the mini-batch
        :return: batch, uttid_list
        :rtype: Tuple[OrderedDict, List[str]]
        """
        # handle single-input and multi-input (paralell) asr mode
        xs = list(x_feats_dict.values())

        if self.load_output:
            ys = list(y_feats_dict.values())
            assert len(xs[0]) == len(ys[0]), (len(xs[0]), len(ys[0]))

            # get index of non-zero length samples
            nonzero_idx = list(
                filter(lambda i: len(ys[0][i]) > 0, range(len(ys[0]))))
            for n in range(1, len(y_feats_dict)):
                nonzero_idx = filter(lambda i: len(ys[n][i]) > 0, nonzero_idx)
        else:
            # Note(kamo): Be careful not to make nonzero_idx to a generator
            nonzero_idx = list(range(len(xs[0])))

        if self.sort_in_input_length:
            # sort in input lengths based on the first input
            nonzero_sorted_idx = sorted(
                nonzero_idx, key=lambda i: -len(xs[0][i]))
        else:
            nonzero_sorted_idx = nonzero_idx

        if len(nonzero_sorted_idx) != len(xs[0]):
            logger.warning(
                "Target sequences include empty tokenid (batch {} -> {}).".
                format(len(xs[0]), len(nonzero_sorted_idx)))

        # remove zero-length samples
        xs = [[x[i] for i in nonzero_sorted_idx] for x in xs]
        uttid_list = [uttid_list[i] for i in nonzero_sorted_idx]

        x_names = list(x_feats_dict.keys())
        if self.load_output:
            ys = [[y[i] for i in nonzero_sorted_idx] for y in ys]
            y_names = list(y_feats_dict.keys())

            # Keeping x_name and y_name, e.g. input1, for future extension
            return_batch = OrderedDict([
                * [(x_name, x) for x_name, x in zip(x_names, xs)],
                * [(y_name, y) for y_name, y in zip(y_names, ys)],
            ])
        else:
            return_batch = OrderedDict(
                [(x_name, x) for x_name, x in zip(x_names, xs)])
        return return_batch, uttid_list

    def _get_from_loader(self, filepath, filetype):
        """Return ndarray

        In order to make the fds to be opened only at the first referring,
        the loader are stored in self._loaders

        >>> ndarray = loader.get_from_loader(
        ...     'some/path.h5:F01_050C0101_PED_REAL', filetype='hdf5')

        :param: str filepath:
        :param: str filetype:
        :return:
        :rtype: np.ndarray
        """
        if filetype == "hdf5":
            # e.g.
            #    {"input": [{"feat": "some/path.h5:F01_050C0101_PED_REAL",
            #                "filetype": "hdf5",
            # -> filepath = "some/path.h5", key = "F01_050C0101_PED_REAL"
            filepath, key = filepath.split(":", 1)

            loader = self._loaders.get(filepath)
            if loader is None:
                # To avoid disk access, create loader only for the first time
                loader = h5py.File(filepath, "r")
                self._loaders[filepath] = loader
            return loader[key][()]
        elif filetype == "sound.hdf5":
            # e.g.
            #    {"input": [{"feat": "some/path.h5:F01_050C0101_PED_REAL",
            #                "filetype": "sound.hdf5",
            # -> filepath = "some/path.h5", key = "F01_050C0101_PED_REAL"
            filepath, key = filepath.split(":", 1)

            loader = self._loaders.get(filepath)
            if loader is None:
                # To avoid disk access, create loader only for the first time
                loader = SoundHDF5File(filepath, "r", dtype="int16")
                self._loaders[filepath] = loader
            array, rate = loader[key]
            return array
        elif filetype == "sound":
            # e.g.
            #    {"input": [{"feat": "some/path.wav",
            #                "filetype": "sound"},
            # Assume PCM16
            if not self.keep_all_data_on_mem:
                array, _ = soundfile.read(filepath, dtype="int16")
                return array
            if filepath not in self._loaders:
                array, _ = soundfile.read(filepath, dtype="int16")
                self._loaders[filepath] = array
            return self._loaders[filepath]
        elif filetype == "npz":
            # e.g.
            #    {"input": [{"feat": "some/path.npz:F01_050C0101_PED_REAL",
            #                "filetype": "npz",
            filepath, key = filepath.split(":", 1)

            loader = self._loaders.get(filepath)
            if loader is None:
                # To avoid disk access, create loader only for the first time
                loader = np.load(filepath)
                self._loaders[filepath] = loader
            return loader[key]
        elif filetype == "npy":
            # e.g.
            #    {"input": [{"feat": "some/path.npy",
            #                "filetype": "npy"},
            if not self.keep_all_data_on_mem:
                return np.load(filepath)
            if filepath not in self._loaders:
                self._loaders[filepath] = np.load(filepath)
            return self._loaders[filepath]
        elif filetype in ["mat", "vec"]:
            # e.g.
            #    {"input": [{"feat": "some/path.ark:123",
            #                "filetype": "mat"}]},
            # In this case, "123" indicates the starting points of the matrix
            # load_mat can load both matrix and vector
            if not self.keep_all_data_on_mem:
                return kaldiio.load_mat(filepath)
            if filepath not in self._loaders:
                self._loaders[filepath] = kaldiio.load_mat(filepath)
            return self._loaders[filepath]
        elif filetype == "scp":
            # e.g.
            #    {"input": [{"feat": "some/path.scp:F01_050C0101_PED_REAL",
            #                "filetype": "scp",
            filepath, key = filepath.split(":", 1)
            loader = self._loaders.get(filepath)
            if loader is None:
                # To avoid disk access, create loader only for the first time
                loader = kaldiio.load_scp(filepath)
                self._loaders[filepath] = loader
            return loader[key]
        else:
            raise NotImplementedError(
                "Not supported: loader_type={}".format(filetype))

    def file_type(self, filepath):
        return feat_type(filepath)


class SoundHDF5File():
    """Collecting sound files to a HDF5 file

    >>> f = SoundHDF5File('a.flac.h5', mode='a')
    >>> array = np.random.randint(0, 100, 100, dtype=np.int16)
    >>> f['id'] = (array, 16000)
    >>> array, rate = f['id']


    :param: str filepath:
    :param: str mode:
    :param: str format: The type used when saving wav. flac, nist, htk, etc.
    :param: str dtype:

    """

    def __init__(self,
                 filepath,
                 mode="r+",
                 format=None,
                 dtype="int16",
                 **kwargs):
        self.filepath = filepath
        self.mode = mode
        self.dtype = dtype

        self.file = h5py.File(filepath, mode, **kwargs)
        if format is None:
            # filepath = a.flac.h5 -> format = flac
            second_ext = os.path.splitext(os.path.splitext(filepath)[0])[1]
            format = second_ext[1:]
            if format.upper() not in soundfile.available_formats():
                # If not found, flac is selected
                format = "flac"

        # This format affects only saving
        self.format = format

    def __repr__(self):
        return '<SoundHDF5 file "{}" (mode {}, format {}, type {})>'.format(
            self.filepath, self.mode, self.format, self.dtype)

    def create_dataset(self, name, shape=None, data=None, **kwds):
        f = io.BytesIO()
        array, rate = data
        soundfile.write(f, array, rate, format=self.format)
        self.file.create_dataset(
            name, shape=shape, data=np.void(f.getvalue()), **kwds)

    def __setitem__(self, name, data):
        self.create_dataset(name, data=data)

    def __getitem__(self, key):
        data = self.file[key][()]
        f = io.BytesIO(data.tobytes())
        array, rate = soundfile.read(f, dtype=self.dtype)
        return array, rate

    def keys(self):
        return self.file.keys()

    def values(self):
        for k in self.file:
            yield self[k]

    def items(self):
        for k in self.file:
            yield k, self[k]

    def __iter__(self):
        return iter(self.file)

    def __contains__(self, item):
        return item in self.file

    def __len__(self):
        return len(self.file)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.file.close()

    def close(self):
        self.file.close()


================================================
FILE: paddlespeech/s2t/io/sampler.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math

import numpy as np
from paddle import distributed as dist
from paddle.io import BatchSampler
from paddle.io import DistributedBatchSampler

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = [
    "SortagradDistributedBatchSampler",
    "SortagradBatchSampler",
]


def _batch_shuffle(indices, batch_size, epoch, clipped=False):
    """Put similarly-sized instances into minibatches for better efficiency
    and make a batch-wise shuffle.

    1. Sort the audio clips by duration.
    2. Generate a random number `k`, k in [0, batch_size).
    3. Randomly shift `k` instances in order to create different batches
        for different epochs. Create minibatches.
    4. Shuffle the minibatches.

    :param indices: indexes. List of int.
    :type indices: list
    :param batch_size: Batch size. This size is also used for generate
                        a random number for batch shuffle.
    :type batch_size: int
    :param clipped: Whether to clip the heading (small shift) and trailing
                    (incomplete batch) instances.
    :type clipped: bool
    :return: Batch shuffled mainifest.
    :rtype: list
    """
    rng = np.random.RandomState(epoch)
    shift_len = rng.randint(0, batch_size - 1)
    batch_indices = list(zip(* [iter(indices[shift_len:])] * batch_size))
    rng.shuffle(batch_indices)
    batch_indices = [item for batch in batch_indices for item in batch]
    assert clipped is False
    if not clipped:
        res_len = len(indices) - shift_len - len(batch_indices)
        # when res_len is 0, will return whole list, len(List[-0:]) = len(List[:])
        if res_len != 0:
            batch_indices.extend(indices[-res_len:])
        batch_indices.extend(indices[0:shift_len])
        assert len(indices) == len(
            batch_indices
        ), f"_batch_shuffle: {len(indices)} : {len(batch_indices)} : {res_len} - {shift_len}"
    return batch_indices


class SortagradDistributedBatchSampler(DistributedBatchSampler):
    def __init__(self,
                 dataset,
                 batch_size,
                 num_replicas=None,
                 rank=None,
                 shuffle=False,
                 drop_last=False,
                 sortagrad=False,
                 shuffle_method="batch_shuffle"):
        """Sortagrad Sampler for multi gpus.

        Args:
            dataset (paddle.io.Dataset): 
            batch_size (int): batch size for one gpu
            num_replicas (int, optional): world size or numbers of gpus. Defaults to None.
            rank (int, optional): rank id. Defaults to None.
            shuffle (bool, optional): True for do shuffle, or else. Defaults to False.
            drop_last (bool, optional): whether drop last batch which is less than batch size. Defaults to False.
            sortagrad (bool, optional): True, do sortgrad in first epoch, then shuffle as usual; or else. Defaults to False.
            shuffle_method (str, optional): shuffle method, "instance_shuffle" or "batch_shuffle". Defaults to "batch_shuffle".
        """
        super().__init__(dataset, batch_size, num_replicas, rank, shuffle,
                         drop_last)
        self._sortagrad = sortagrad
        self._shuffle_method = shuffle_method

    def __iter__(self):
        num_samples = len(self.dataset)
        indices = np.arange(num_samples).tolist()
        indices += indices[:(self.total_size - len(indices))]
        assert len(indices) == self.total_size

        # sort (by duration) or batch-wise shuffle the manifest
        if self.shuffle:
            if self.epoch == 0 and self._sortagrad:
                logger.info(
                    f'rank: {dist.get_rank()} dataset sortagrad! epoch {self.epoch}'
                )
            else:
                logger.info(
                    f'rank: {dist.get_rank()} dataset shuffle! epoch {self.epoch}'
                )
                if self._shuffle_method == "batch_shuffle":
                    # using `batch_size * nrank`, or will cause instability loss and nan or inf grad, 
                    # since diff batch examlpe length in batches case instability loss in diff rank, 
                    # e.g. rank0 maxlength 20, rank3 maxlength 1000
                    indices = _batch_shuffle(
                        indices,
                        self.batch_size * self.nranks,
                        self.epoch,
                        clipped=False)
                elif self._shuffle_method == "instance_shuffle":
                    np.random.RandomState(self.epoch).shuffle(indices)
                else:
                    raise ValueError("Unknown shuffle method %s." %
                                     self._shuffle_method)
        assert len(
            indices
        ) == self.total_size, f"batch shuffle examples error: {len(indices)} : {self.total_size}"

        # slice `self.batch_size` examples by rank id
        def _get_indices_by_batch_size(indices):
            subsampled_indices = []
            last_batch_size = self.total_size % (self.batch_size * self.nranks)
            assert last_batch_size % self.nranks == 0
            last_local_batch_size = last_batch_size // self.nranks

            for i in range(self.local_rank * self.batch_size,
                           len(indices) - last_batch_size,
                           self.batch_size * self.nranks):
                subsampled_indices.extend(indices[i:i + self.batch_size])

            indices = indices[len(indices) - last_batch_size:]
            subsampled_indices.extend(
                indices[self.local_rank * last_local_batch_size:(
                    self.local_rank + 1) * last_local_batch_size])
            return subsampled_indices

        if self.nranks > 1:
            indices = _get_indices_by_batch_size(indices)

        assert len(indices) == self.num_samples
        _sample_iter = iter(indices)

        batch_indices = []
        for idx in _sample_iter:
            batch_indices.append(idx)
            if len(batch_indices) == self.batch_size:
                logger.debug(
                    f"rank: {dist.get_rank()} batch index: {batch_indices} ")
                yield batch_indices
                batch_indices = []
        if not self.drop_last and len(batch_indices) > 0:
            yield batch_indices

    def __len__(self):
        num_samples = self.num_samples
        num_samples += int(not self.drop_last) * (self.batch_size - 1)
        return num_samples // self.batch_size


class SortagradBatchSampler(BatchSampler):
    def __init__(self,
                 dataset,
                 batch_size,
                 shuffle=False,
                 drop_last=False,
                 sortagrad=False,
                 shuffle_method="batch_shuffle"):
        """Sortagrad Sampler for one gpu.

        Args:
            dataset (paddle.io.Dataset): 
            batch_size (int): batch size for one gpu
            shuffle (bool, optional): True for do shuffle, or else. Defaults to False.
            drop_last (bool, optional): whether drop last batch which is less than batch size. Defaults to False.
            sortagrad (bool, optional): True, do sortgrad in first epoch, then shuffle as usual; or else. Defaults to False.
            shuffle_method (str, optional): shuffle method, "instance_shuffle" or "batch_shuffle". Defaults to "batch_shuffle".
        """
        self.dataset = dataset

        assert isinstance(batch_size, int) and batch_size > 0, \
            "batch_size should be a positive integer"
        self.batch_size = batch_size
        assert isinstance(shuffle, bool), \
            "shuffle should be a boolean value"
        self.shuffle = shuffle
        assert isinstance(drop_last, bool), \
            "drop_last should be a boolean number"

        self.drop_last = drop_last
        self.epoch = 0
        self.num_samples = int(math.ceil(len(self.dataset) * 1.0))
        self.total_size = self.num_samples
        self._sortagrad = sortagrad
        self._shuffle_method = shuffle_method

    def __iter__(self):
        num_samples = len(self.dataset)
        indices = np.arange(num_samples).tolist()
        indices += indices[:(self.total_size - len(indices))]
        assert len(indices) == self.total_size

        # sort (by duration) or batch-wise shuffle the manifest
        if self.shuffle:
            if self.epoch == 0 and self._sortagrad:
                logger.info(f'dataset sortagrad! epoch {self.epoch}')
            else:
                logger.info(f'dataset shuffle! epoch {self.epoch}')
                if self._shuffle_method == "batch_shuffle":
                    indices = _batch_shuffle(
                        indices, self.batch_size, self.epoch, clipped=False)
                elif self._shuffle_method == "instance_shuffle":
                    np.random.RandomState(self.epoch).shuffle(indices)
                else:
                    raise ValueError("Unknown shuffle method %s." %
                                     self._shuffle_method)
        assert len(
            indices
        ) == self.total_size, f"batch shuffle examples error: {len(indices)} : {self.total_size}"

        assert len(indices) == self.num_samples
        _sample_iter = iter(indices)

        batch_indices = []
        for idx in _sample_iter:
            batch_indices.append(idx)
            if len(batch_indices) == self.batch_size:
                logger.debug(
                    f"rank: {dist.get_rank()} batch index: {batch_indices} ")
                yield batch_indices
                batch_indices = []
        if not self.drop_last and len(batch_indices) > 0:
            yield batch_indices

        self.epoch += 1

    def __len__(self):
        num_samples = self.num_samples
        num_samples += int(not self.drop_last) * (self.batch_size - 1)
        return num_samples // self.batch_size


================================================
FILE: paddlespeech/s2t/io/speechbrain/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/io/speechbrain/batch.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/dataio/batch.py)
"""Batch collation

Authors
  * Aku Rouhe 2020
"""
import collections

import paddle

from paddlespeech.s2t.io.speechbrain.data_utils import batch_pad_right
from paddlespeech.s2t.io.speechbrain.data_utils import mod_default_collate

PaddedData = collections.namedtuple("PaddedData", ["data", "lengths"])


class PaddedBatch:
    """Collate_fn when examples are dicts and have variable-length sequences.

    Different elements in the examples get matched by key.
    All numpy tensors get converted to paddle.Tensor 
    Then, by default, all paddle.Tensor valued elements get padded and support
    collective pin_memory() and to() calls.
    Regular Python data types are just collected in a list.

    Arguments
    ---------
    examples : list
        List of example dicts, as produced by Dataloader.
    padded_keys : list, None
        (Optional) List of keys to pad on. If None, pad all paddle.Tensors
    device_prep_keys : list, None
        (Optional) Only these keys participate in collective memory pinning and moving with
        to().
        If None, defaults to all items with paddle.Tensor values.
    padding_func : callable, optional
        Called with a list of tensors to be padded together. Needs to return
        two tensors: the padded data, and another tensor for the data lengths.
    padding_kwargs : dict
        (Optional) Extra kwargs to pass to padding_func. E.G. mode, value
    nonpadded_stack : bool
        Whether to apply Tensor stacking on values that didn't get padded. 
        This stacks if it can, but doesn't error out if it cannot. 
        Default:True, usually does the right thing.
    """

    def __init__(
            self,
            examples,
            padded_keys=None,
            device_prep_keys=None,
            padding_func=batch_pad_right,
            padding_kwargs={},
            nonpadded_stack=True, ):
        self.__length = len(examples)
        self.__keys = list(examples[0].keys())
        self.__padded_keys = []
        self.__device_prep_keys = []
        for key in self.__keys:
            values = [example[key] for example in examples]
            # Default convert usually does the right thing (numpy2tensor etc.)
            values = paddle.to_tensor(values)

            if (padded_keys is not None and key in padded_keys) or (
                    padded_keys is None and
                    isinstance(values[0], paddle.Tensor)):
                # Padding and PaddedData
                self.__padded_keys.append(key)
                padded = PaddedData(*padding_func(values, **padding_kwargs))
                setattr(self, key, padded)
            else:
                if nonpadded_stack:
                    values = mod_default_collate(values)
                setattr(self, key, values)
            if (device_prep_keys is not None and key in device_prep_keys) or (
                    device_prep_keys is None and
                    isinstance(values[0], paddle.Tensor)):
                self.__device_prep_keys.append(key)

    def __len__(self):
        return self.__length

    def __getitem__(self, key):
        if key in self.__keys:
            return getattr(self, key)
        else:
            raise KeyError(f"Batch doesn't have key: {key}")

    def __iter__(self):
        """Iterates over the different elements of the batch.
        """
        return iter((getattr(self, key) for key in self.__keys))


================================================
FILE: paddlespeech/s2t/io/speechbrain/data_pipeline.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/utils/data_pipeline.py)
"""A pipeline for data transformations.

Author:
    * Aku Rouhe
"""
import inspect
from dataclasses import dataclass

from paddlespeech.s2t.io.speechbrain.depgraph import DependencyGraph


@dataclass
class StaticItem:
    """Data class that represents a static item.

    Static items are in-memory items so they don't need to be computed
    dynamically.
    """

    key: str


class DynamicItem:
    """Essentially represents a data transformation function.

    A DynamicItem takes some arguments and computes its value dynamically when
    called. A straight-forward use-case is to load something from disk
    dynamically; take the path and provide the loaded data.

    Instances of this class are often created implicitly via the
    @takes and @provides decorators or otherwise from specifying the taken and
    provided arguments and the function.

    A counterpart is the GeneratorDynamicItem, which should be used for
    generator functions.

    Arguments
    ---------
    takes : list
        The keys of the items that this needs to compute its output.
    func : callable
        The function that is used to compute the output.
    provides : list
        The keys that this provides.
    """

    def __init__(self, takes=[], func=None, provides=[]):
        self.takes = takes
        self.func = func
        self.provides = provides

    def __call__(self, *args):
        return self.func(*args)

    # The next methods are more about supporting GeneratorDynamicItems
    def next_takes(self):
        """The next argkeys to provide to this, when called."""
        # Regular function DynamicItems always just need the same set of args
        return self.takes

    def next_provides(self):
        """The next keys that this provides, when called."""
        # Regular function DynamicItems always just provide the same set of keys
        return self.provides

    def provided_in_order(self):
        """Assuming that this may need to be called multiple times; which keys
        does it provide at that call. Returns a list, with len equal to the
        number of times that this may be called."""
        # Regular function DynamicItems are only called once:
        return [self.provides]

    def reset(self):
        """Signals that this will not be called any more times on this pipeline
        call."""
        # Regular function DynamicItems don't need special resets.
        pass


class GeneratorDynamicItem(DynamicItem):
    """Essentially represents a multi-step data transformation.

    This is the generator function counterpart for DynamicItem (which should be
    used for regular functions).

    A GeneratorDynamicItem first takes some arguments and then uses those in
    multiple steps to incrementally compute some values when called.

    A typical use-case is a pipeline of transformations on data: e.g. taking in
    text as a string, and first a tokenized version, and then on the second
    call providing an integer-encoded version. This can be used even though the
    integer-encoder needs to be trained on the first outputs.

    The main benefit is to be able to define the pipeline in a clear function,
    even if parts of the pipeline depend on others for their initialization.

    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Doesn't generate electricity, only stores the currently active
        # generator:
        self.current_generator = None
        self.num_provided_items = 0

    def __call__(self, *args):
        if self.num_provided_items == len(self.provides):
            raise RuntimeError("DynamicItemPipeline called too many times!")
        if not self.current_generator:
            self.current_generator = self.func(*args)
        # NOTE: Not supporting sending new values to the pipeline.
        out = next(self.current_generator)
        self.num_provided_items += 1
        return out

    def next_takes(self):
        """The next argkeys to provide to this, when called."""
        if not self.current_generator:
            return self.takes
        else:
            return []

    def next_provides(self):
        """The next keys that this provides, when called."""
        keys = self.provides[self.num_provided_items]
        # Support multiple yielded values like:
        # @yields("wav_read", ["left_ch", "right_ch"])
        if isinstance(keys, str):
            return [keys]
        else:
            return keys

    def provided_in_order(self):
        """Assuming that this may need to be called multiple times; which keys
        does it provide at that call. Returns a list, with len equal to the
        number of times that this may be called."""
        in_order = []
        for keys in self.provides:
            # Support multiple yielded values like:
            # @provides("wav_read", ["left_ch", "right_ch"])
            if isinstance(keys, str):
                in_order.append([keys])
            else:
                in_order.append(keys)
        return in_order

    def reset(self):
        """Signals that this will not be called any more times on this pipeline
        call."""
        if self.current_generator is not None:
            self.current_generator.close()
        self.current_generator = None
        self.num_provided_items = 0


def takes(*argkeys):
    """Decorator which makes a DynamicItem and specifies its argkeys.

    If the wrapped object is a generator function (has a yield statement),
    Creates a GeneratorDynamicItem. If the object is already a DynamicItem,
    just specifies the argkeys for that. Otherwise creates a new regular
    DynamicItem, with argkeys specified.

    The args are always passed to the function at the start. Generators could
    support sending new arguments, but for such use cases, simply create a new
    dynamic item. The GeneratorDynamicItem class is meant for pipelines which
    take in an input and transform it in multiple ways, where the intermediate
    representations may be needed for e.g. fitting a BPE segmenter.

    Example
    -------
    >>> @takes("text")
    ... def tokenize(text):
    ...     return text.strip().lower().split()
    >>> tokenize.provides = ["tokenized"]
    >>> tokenize('\tThis Example gets tokenized')
    ['this', 'example', 'gets', 'tokenized']
    """

    def decorator(obj):
        """Decorator definition."""
        if isinstance(obj, DynamicItem):
            if obj.takes:
                raise ValueError("Can't overwrite DynamicItem.takes")
            obj.takes = argkeys
            return obj
        elif inspect.isgeneratorfunction(obj):
            return GeneratorDynamicItem(takes=argkeys, func=obj)
        else:
            return DynamicItem(takes=argkeys, func=obj)

    return decorator


takes_decorator = takes  # Just for DataPipeline.add_dynamic_item


def provides(*output_keys):
    """Decorator which makes a DynamicItem and specifies what keys it provides.

    If the wrapped object is a generator function (has a yield statement),
    Creates a GeneratorDynamicItem. If the object is already a DynamicItem,
    just specifies the provided keys for that. Otherwise creates a new regular
    DynamicItem, with provided keys specified.

    NOTE
    ----
    The behavior is slightly different for generators and regular functions, if
    many output keys are specified, e.g. @provides("signal", "mfcc"). Regular
    functions should return a tuple with len equal to len(output_keys), while
    generators should yield the items one by one.

    >>> @provides("signal", "feat")
    ... def read_feat():
    ...     wav = [.1,.2,-.1]
    ...     feat = [s**2 for s in wav]
    ...     return wav, feat
    >>> @provides("signal", "feat")
    ... def read_feat():
    ...     wav = [.1,.2,-.1]
    ...     yield wav
    ...     feat = [s**2 for s in wav]
    ...     yield feat

    If multiple keys are yielded at once, write e.g.,

    >>> @provides("wav_read", ["left_channel", "right_channel"])
    ... def read_multi_channel():
    ...     wav = [[.1,.2,-.1],[.2,.1,-.1]]
    ...     yield wav
    ...     yield wav[0], wav[1]

    """

    def decorator(obj):
        """Decorator definition."""
        if isinstance(obj, DynamicItem):
            if obj.provides:
                raise ValueError("Can't overwrite DynamicItem provides-list.")
            obj.provides = output_keys
            return obj
        elif inspect.isgeneratorfunction(obj):
            return GeneratorDynamicItem(func=obj, provides=output_keys)
        else:
            return DynamicItem(func=obj, provides=output_keys)

    return decorator


provides_decorator = provides  # Just for DataPipeline.add_dynamic_item


class DataPipeline:
    """Organises data transformations into a pipeline.

    Example
    -------
    >>> pipeline = DataPipeline(
    ...     static_data_keys=["text"],
    ...     dynamic_items=[
    ...     {"func": lambda x: x.lower(), "takes": "text", "provides": "foo"},
    ...     {"func": lambda x: x[::-1], "takes": "foo", "provides": "bar"},
    ...     ],
    ...     output_keys=["bar"],
    ... )
    >>> pipeline({"text": "Test"})
    {'bar': 'tset'}
    """

    def __init__(self, static_data_keys, dynamic_items=[], output_keys=[]):
        self.dg = DependencyGraph()
        self._exec_order = None
        self.key_to_node = {}
        self.unaccounted_keys = {}
        self.dynamic_items = []
        self.output_mapping = {}
        self.add_static_keys(static_data_keys)
        self.add_dynamic_items(dynamic_items)
        self.set_output_keys(output_keys)

    def add_static_keys(self, static_keys):
        """Informs the pipeline about static items.

        Static items are the ones provided to __call__ as data.
        """
        for key in static_keys:
            node_id = self.dg.add_node(data=StaticItem(key=key))
            self.key_to_node[key] = node_id

    def add_dynamic_items(self, dynamic_items):
        """Add multiple dynamic items at once."""
        for item in dynamic_items:
            try:
                self.add_dynamic_item(**item)
            except TypeError:
                self.add_dynamic_item(item)

    def add_dynamic_item(self, func, takes=None, provides=None):
        """Adds a dynamic item to the Pipeline.

        Two calling conventions. For DynamicItem objects, just use:
        add_dynamic_item(dynamic_item)
        But otherwise, should use:
        add_dynamic_item(func, takes, provides)

        Arguments
        ---------
        func : callable, DynamicItem
            If a DynamicItem is given, adds that directly. Otherwise a
            DynamicItem is created, and this specifies the callable to use. If
            a generator function is given, then create a GeneratorDynamicItem.
            Otherwise creates a normal DynamicItem.
        takes : list, str
            List of keys. When func is called, each key is resolved to
            either an entry in the data or the output of another dynamic_item.
            The func is then called with these as positional arguments,
            in the same order as specified here.
            A single key can be given as a bare string.
        provides : str, list
            For regular functions, the key or list of keys that it provides.
            If you give a generator function, key or list of keys that it
            yields, in order. Also see the provides decorator.
            A single key can be given as a bare string.
        """
        if isinstance(func, DynamicItem):
            if takes is not None or provides is not None:
                raise ValueError("If providing a DynamicItem directly, don't "
                                 "specify takes or provides")
            else:
                self._add_dynamic_item_object(func)
                return
        if isinstance(takes, str):
            takes = [takes]
        if isinstance(provides, str):
            provides = [provides]
        di = takes_decorator(*takes)(provides_decorator(*provides)(func))
        self._add_dynamic_item_object(di)

    def _add_dynamic_item_object(self, obj):
        """Internally adds the object.

        There is a node in the dependency graph for each call of the
        DynamicItem. Each call may return multiple keys and depend on multiple
        keys. An internal dict maps key to the id of the node that produces it.
        """
        if not obj.provides:
            raise ValueError("Won't add redundant dynamic item which doesn't "
                             "provide anything.")
        depended = []
        for key in obj.takes:
            # Might not be accounted for, yet:
            if key not in self.key_to_node:
                dependee_keys = self.unaccounted_keys.setdefault(key, [])
                dependee_keys.extend(obj.next_provides())
            else:
                depended.append(self.key_to_node[key])
        for provided in obj.provided_in_order():
            node_id = self.dg.add_node(data=obj)
            for key in provided:
                self.key_to_node[key] = node_id
                # This key may also be unaccounted for, so account for it now:
                if key in self.unaccounted_keys:
                    for dependee_key in self.unaccounted_keys[key]:
                        dependee_node = self.key_to_node[dependee_key]
                        self.dg.add_edge(dependee_node, node_id)
                    del self.unaccounted_keys[key]  # Now accounted for!
            for dep_id in depended:
                self.dg.add_edge(node_id, dep_id)
            # Next call will depend on this call:
            depended = [node_id]
        # Keep a reference to the item in this object, as well:
        self.dynamic_items.append(obj)

    def set_output_keys(self, keys):
        """Use this to change the output keys.

        Also re-evaluates execution order.
        So if you request different outputs, some parts of the
        data pipeline may be skipped.

        Arguments
        ---------
        keys : dict, list, None
            List of keys (str) to produce in output.

            If a dict is given; it is used to map internal keys to output keys.
            From the output_keys dict key:value pairs the key appears outside,
            and value is the internal key.
        """
        self.output_mapping = self._output_keys_to_mapping(keys)
        self._exec_order = None

    @staticmethod
    def _output_keys_to_mapping(keys):
        # Ensure a mapping (accept a list for convenience, too)
        if keys is None:
            output_mapping = {}
        elif isinstance(keys, dict):
            output_mapping = keys
        else:
            output_mapping = {key: key for key in keys}
        return output_mapping

    def compute_outputs(self, data):
        """
        Arguments
        ---------
        data : dict
            Dictionary with data entries by key.

        Returns
        -------
        dict
            With the keys that were set.
        """
        if self._exec_order is None:
            self._prepare_run(data)
        return self._compute(data, self._exec_order, self.output_mapping)

    def compute_specific(self, keys, data):
        """Compute output of specific item, without changing output_keys."""
        output_mapping = self._output_keys_to_mapping(keys)
        order = self.dg.get_evaluation_order(
            selected_keys=self.get_selected_node_ids(keys))
        return self._compute(data, order, output_mapping)

    def _compute(self, data, order, output_mapping):
        if self.unaccounted_keys:
            MSG = "These keys are still unaccounted for in the data pipeline: "
            MSG += ", ".join(self.unaccounted_keys)
            raise RuntimeError(MSG)
        intermediate = {}
        for node_id, edges, item in order:
            if isinstance(item, StaticItem):
                # Static item in data.
                # Just check that key is found.
                try:
                    data[item.key]
                    continue
                except KeyError:
                    raise KeyError(f"Expected key {item.key} in data!")
            # A dynamic item, which we should compute:
            args = [
                data[argkey] if argkey in data else intermediate[argkey]
                for argkey in item.next_takes()
            ]
            # This needs to be called BEFORE the dynamic item is called.
            provided_keys = item.next_provides()
            values = item(*args)  # Call the DynamicItem to produce output
            # If there is just one output value, wrap in a list so that
            # it can be zipped as well:
            if len(provided_keys) == 1:
                values = [values]
            intermediate.update(zip(provided_keys, values))
        for dynamic_item in self.dynamic_items:
            dynamic_item.reset()
        return {
            outkey: data[inkey] if inkey in data else intermediate[inkey]
            for outkey, inkey in output_mapping.items()
        }

    def get_selected_node_ids(self, selected_keys):
        """Translates selected keys to dependency graph keys."""
        return [self.key_to_node[key] for key in selected_keys]

    def __call__(self, data):
        return self.compute_outputs(data)

    def _prepare_run(self, data):
        self._exec_order = list(
            self.dg.get_evaluation_order(
                self.get_selected_node_ids(self.output_mapping.values())))


================================================
FILE: paddlespeech/s2t/io/speechbrain/data_utils.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/utils/data_utils.py)
import collections.abc
import csv
import os
import pathlib
import re
import shutil
import urllib.request

import numpy as np
import paddle
import tqdm


def batch_pad_right(array: list, mode="constant", value=0):
    """Given a list of paddle tensors it batches them together by padding to the right
    on each dimension in order to get same length for all.

    Parameters
    ----------
    array : list
        List of tensor we wish to pad together.
    mode : str
        Padding mode see numpy.pad documentation.
    value : float
        Padding value see numpy.pad documentation.

    Returns
    -------
    batched : numpy array
        Padded numpy array.
    valid_vals : list
        List containing proportion for each dimension of original, non-padded values.

    """

    if not len(array):
        raise IndexError("Tensors list must not be empty")

    if len(array) == 1:
        # if there is only one tensor in the batch we simply unsqueeze it.
        return np.expand_dims(array[0], 0), np.array([1.0], dtype="float32")
    if not (any(
        [array[i].ndim == array[0].ndim for i in range(1, len(array))])):
        raise IndexError("All array must have same number of dimensions")

    # FIXME we limit the support here: we allow padding of only the first dimension
    # need to remove this when feat extraction is updated to handle multichannel.
    max_shape = []
    for dim in range(array[0].ndim):
        if dim != 0:
            if not all(
                [x.shape[dim] == array[0].shape[dim] for x in array[1:]]):
                raise EnvironmentError(
                    "Tensors should have same dimensions except for the first one"
                )
        max_shape.append(max([x.shape[dim] for x in array]))

    batched = []
    valid = []
    for t in array:
        # for each tensor we apply pad_right_to
        padded, valid_percent = pad_right_to(
            t, max_shape, mode=mode, value=value)
        batched.append(padded)
        valid.append(valid_percent[0])

    batched = np.stack(batched)

    return batched, np.array(valid, dtype="float32")


np_str_obj_array_pattern = re.compile(r"[SaUO]")


def pad_right_to(
        array: np.ndarray,
        target_shape: (list, tuple),
        mode="constant",
        value=0, ):
    """
    This function takes a numpy of arbitrary shape and pads it to target
    shape by appending values on the right.

    Parameters
    ----------
    array : input numpy array
        Input tensor whose dimension we need to pad.
    target_shape : (list, tuple)
        Target shape we want for the target tensor its len must be equal to tensor.ndim
    mode : str
        Pad mode, please refer to numpy.pad documentation.
    value : float
        Pad value, please refer to numpy.pad documentation.

    Returns
    -------
    array : numpy array
        Padded numpy array.
    valid_vals : list
        List containing proportion for each dimension of original, non-padded values.
    """
    assert len(target_shape) == array.ndim
    pads = []  # this contains the abs length of the padding for each dimension.
    valid_vals = []  # this contains the relative lengths for each dimension.
    i = len(target_shape) - 1  # iterating over target_shape ndims
    j = 0
    while i >= 0:
        assert (target_shape[i] >= array.shape[i]
                ), "Target shape must be >= original shape for every dim"
        pads.extend([0, target_shape[i] - array.shape[i]])
        valid_vals.append(array.shape[j] / target_shape[j])
        i -= 1
        j += 1
    array = np.pad(array, pads, mode, constant_values=(value, value))

    return array, valid_vals


def mod_default_collate(batch):
    """Makes a tensor from list of batch values.

    Note that this doesn't need to zip(*) values together
    as PaddedBatch connects them already (by key).

    Here the idea is not to error out.
    """
    elem = batch[0]
    elem_type = type(elem)
    if isinstance(elem, paddle.Tensor):
        out = None
        try:
            if paddle.io.get_worker_info() is not None:

                # If we're in a background process, concatenate directly into a
                # shared memory tensor to avoid an extra copy
                numel = sum([x.numel() for x in batch])
                storage = elem.storage()._new_shared(numel)
                out = elem.new(storage)
            return paddle.stack(batch, 0, name=out)
        except RuntimeError:  # Unequal size:
            return batch
    elif (elem_type.__module__ == "numpy" and elem_type.__name__ != "str_" and
          elem_type.__name__ != "string_"):
        try:
            if (elem_type.__name__ == "ndarray" or
                    elem_type.__name__ == "memmap"):
                # array of string classes and object
                if np_str_obj_array_pattern.search(elem.dtype.str) is not None:
                    return batch
                return mod_default_collate(
                    [paddle.to_tensor(b, dtype=b.dtype) for b in batch])
            elif elem.shape == ():  # scalars
                return paddle.to_tensor(batch, dtype=batch.dtype)
        except RuntimeError:  # Unequal size
            return batch
    elif isinstance(elem, float):
        return paddle.to_tensor(batch, dtype=paddle.float64)
    elif isinstance(elem, int):
        return paddle.to_tensor(batch, dtype=paddle.int64)
    else:
        return batch


================================================
FILE: paddlespeech/s2t/io/speechbrain/dataio.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/dataio/dataio.py)
"""
Data reading and writing.

Authors
 * Mirco Ravanelli 2020
 * Aku Rouhe 2020
 * Ju-Chieh Chou 2020
 * Samuele Cornell 2020
 * Abdel HEBA 2020
"""
import csv
import hashlib
import json
import logging
import os
import pickle
import re
import time

import numpy as np
import soundfile
logger = logging.getLogger(__name__)
import paddle


def load_data_json(json_path, replacements={}):
    """Loads JSON and recursively formats string values.

    Arguments
    ----------
    json_path : str
        Path to CSV file.
    replacements : dict
        (Optional dict), e.g., {"data_folder": "/home/PaddleSpeech/data"}.
        This is used to recursively format all string values in the data.

    Returns
    -------
    dict
        JSON data with replacements applied.


    """
    with open(json_path, "r") as f:
        out_json = json.load(f)
    _recursive_format(out_json, replacements)
    return out_json


def _recursive_format(data, replacements):
    # Data: dict or list, replacements : dict
    # Replaces string keys in replacements by their values
    # at all levels of data (in str values)
    # Works in-place.
    if isinstance(data, dict):
        for key, item in data.items():
            if isinstance(item, dict) or isinstance(item, list):
                _recursive_format(item, replacements)
            elif isinstance(item, str):
                data[key] = item.format_map(replacements)
            # If not dict, list or str, do nothing
    if isinstance(data, list):
        for i, item in enumerate(data):
            if isinstance(item, dict) or isinstance(item, list):
                _recursive_format(item, replacements)
            elif isinstance(item, str):
                data[i] = item.format_map(replacements)
            # If not dict, list or str, do nothing


def load_data_csv(csv_path, replacements={}):
    """Loads CSV and formats string values.

    Uses the legacy CSV data format, where the CSV must have an
    'ID' field.
    If there is a field called duration, it is interpreted as a float.
    The rest of the fields are left as they are (legacy _format and _opts fields
    are not used to load the data in any special way).

    Bash-like string replacements with $to_replace are supported.

    Arguments
    ----------
    csv_path : str
        Path to CSV file.
    replacements : dict
        (Optional dict), e.g., {"data_folder": "/home/PaddleSpeech/data"}
        This is used to recursively format all string values in the data.

    Returns
    -------
    dict
        CSV data with replacements applied.
    """

    with open(csv_path, newline="") as csvfile:
        result = {}
        reader = csv.DictReader(csvfile, skipinitialspace=True)
        variable_finder = re.compile(r"\$([\w.]+)")
        for row in reader:
            # ID:
            try:
                data_id = row["ID"]
                del row["ID"]  # This is used as a key in result, instead.
            except KeyError:
                raise KeyError("CSV has to have an 'ID' field, with unique ids"
                               " for all data points")
            if data_id in result:
                raise ValueError(f"Duplicate id: {data_id}")
            # Replacements:
            for key, value in row.items():
                try:
                    row[key] = variable_finder.sub(
                        lambda match: str(replacements[match[1]]), value)
                except KeyError:
                    raise KeyError(f"The item {value} requires replacements "
                                   "which were not supplied.")
            # Duration:
            if "duration" in row:
                row["duration"] = float(row["duration"])
            result[data_id] = row
    return result


def read_audio(waveforms_obj):
    """General audio loading, based on a custom notation.

    Expected use case is in conjunction with Datasets
    specified by JSON.

    The custom notation:

    The annotation can be just a path to a file:
    "/path/to/wav1.wav"

    Or can specify more options in a dict:
    {"file": "/path/to/wav2.wav",
    "start": 8000,
    "stop": 16000
    }

    Arguments
    ----------
    waveforms_obj : str, dict
        Audio reading annotation, see above for format.

    Returns
    -------
    paddle.Tensor
        Audio tensor with shape: (samples, ).
    """
    if isinstance(waveforms_obj, str):
        audio, _ = soundfile.read(waveforms_obj, dtype="float32")
        return audio

    path = waveforms_obj["file"]
    start = waveforms_obj.get("start", 0)
    # Default stop to start -> if not specified, num_frames becomes 0
    stop = waveforms_obj.get("stop", start)
    num_frames = stop - start
    audio, fs = soundfile.read(
        path, start=start, stop=start + num_frames, dtype="float32")
    return audio


def read_audio_multichannel(waveforms_obj):
    """General audio loading, based on a custom notation.

    Expected use case is in conjunction with Datasets
    specified by JSON.

    The custom notation:

    The annotation can be just a path to a file:
    "/path/to/wav1.wav"

    Multiple (possibly multi-channel) files can be specified, as long as they
    have the same length:
    {"files": [
        "/path/to/wav1.wav",
        "/path/to/wav2.wav"
        ]
    }

    Or you can specify a single file more succinctly:
    {"files": "/path/to/wav2.wav"}

    Offset number samples and stop number samples also can be specified to read
    only a segment within the files.
    {"files": [
        "/path/to/wav1.wav",
        "/path/to/wav2.wav"
        ]
    "start": 8000
    "stop": 16000
    }

    Arguments
    ----------
    waveforms_obj : str, dict
        Audio reading annotation, see above for format.

    Returns
    -------
    paddle.Tensor
        Audio tensor with shape: (samples, ).
    """
    if isinstance(waveforms_obj, str):
        audio, _ = soundfile.read(waveforms_obj, dtype="float32")
        audio = paddle.to_tensor(audio)
        return audio

    files = waveforms_obj["files"]
    if not isinstance(files, list):
        files = [files]

    waveforms = []
    start = waveforms_obj.get("start", 0)
    # Default stop to start -> if not specified, num_frames becomes 0
    stop = waveforms_obj.get("stop", start - 1)
    num_frames = stop - start
    for f in files:
        audio, fs = soundfile.read(
            path, start=start, stop=start + num_frames, dtype="float32")
        audio = paddle.to_tensor(audio)
        waveforms.append(audio)

    out = paddle.concat(waveforms, 0)
    return out


def write_audio(filepath, audio, samplerate):
    """Write audio on disk. It is basically a wrapper to support saving
    audio signals in format (audio, channels).

    Arguments
    ---------
    filepath: path
        Path where to save the audio file.
    audio : paddle.Tensor
        Audio file in the expected format (signal, channels).
    samplerate: int
        Sample rate (e.g., 16000).

    """
    if len(audio.shape) == 2:
        audio = audio.transpose([1, 0])
    elif len(audio.shape) == 1:
        audio = audio.unsqueeze(0)

    soundfile.write(filepath, audio, samplerate)


def load_pickle(pickle_path):
    """Utility function for loading .pkl pickle files.

    Arguments
    ---------
    pickle_path : str
        Path to pickle file.

    Returns
    -------
    out : object
        Python object loaded from pickle.
    """
    with open(pickle_path, "rb") as f:
        out = pickle.load(f)
    return out


def to_floatTensor(x: (list, tuple, np.ndarray)):
    """
    Arguments
    ---------
    x : (list, tuple, np.ndarray)
        Input data to be converted to paddle float.

    Returns
    -------
    tensor : paddle.tensor
        Data now in paddle.tensor float datatype.
    """
    return paddle.to_tensor(x, dtype='float32')


def to_doubleTensor(x: (list, tuple, np.ndarray)):
    """
    Arguments
    ---------
    x : (list, tuple, np.ndarray)
        Input data to be converted to paddle double.

    Returns
    -------
    tensor : paddle.tensor
        Data now in paddle.tensor double datatype.
    """
    return paddle.to_tensor(x, dtype='float64')


def to_longTensor(x: (list, tuple, np.ndarray)):
    """
    Arguments
    ---------
    x : (list, tuple, np.ndarray)
        Input data to be converted to paddle long.

    Returns
    -------
    tensor : paddle.tensor
        Data now in paddle.tensor long datatype.
    """
    return paddle.to_tensor(x, dtype='int64')


def convert_index_to_lab(batch, ind2lab):
    """Convert a batch of integer IDs to string labels.

    Arguments
    ---------
    batch : list
        List of lists, a batch of sequences.
    ind2lab : dict
        Mapping from integer IDs to labels.

    Returns
    -------
    list
        List of lists, same size as batch, with labels from ind2lab.

    """
    return [[ind2lab[int(index)] for index in seq] for seq in batch]


def relative_time_to_absolute(batch, relative_lens, rate):
    """Converts relative length to the absolute duration.

    Operates on batch level.

    Arguments
    ---------
    batch : paddle.tensor
        Sequences to determine the duration for.
    relative_lens : paddle.tensor
        The relative length of each sequence in batch. The longest sequence in
        the batch needs to have relative length 1.0.
    rate : float
        The rate at which sequence elements occur in real-world time. Sample
        rate, if batch is raw wavs (recommended) or 1/frame_shift if batch is
        features. This has to have 1/s as the unit.

    Returns
    ------:
    paddle.tensor
        Duration of each sequence in seconds.

    """
    max_len = batch.shape[1]
    durations = paddle.round(relative_lens * max_len) / rate
    return durations


class IterativeCSVWriter:
    """Write CSV files a line at a time.

    Arguments
    ---------
    outstream : file-object
        A writeable stream
    data_fields : list
        List of the optional keys to write. Each key will be expanded, 
        producing three fields: key, key_format, key_opts.
    """

    def __init__(self, outstream, data_fields, defaults={}):
        self._outstream = outstream
        self.fields = ["ID", "duration"] + self._expand_data_fields(data_fields)
        self.defaults = defaults
        self._outstream.write(",".join(self.fields))

    def set_default(self, field, value):
        """Sets a default value for the given CSV field.

        Arguments
        ---------
        field : str
            A field in the CSV.
        value
            The default value.
        """
        if field not in self.fields:
            raise ValueError(f"{field} is not a field in this CSV!")
        self.defaults[field] = value

    def write(self, *args, **kwargs):
        """Writes one data line into the CSV.

        Arguments
        ---------
        *args
            Supply every field with a value in positional form OR.
        **kwargs
            Supply certain fields by key. The ID field is mandatory for all
            lines, but others can be left empty.
        """
        if args and kwargs:
            raise ValueError(
                "Use either positional fields or named fields, but not both.")
        if args:
            if len(args) != len(self.fields):
                raise ValueError("Need consistent fields")
            to_write = [str(arg) for arg in args]
        if kwargs:
            if "ID" not in kwargs:
                raise ValueError("I'll need to see some ID")
            full_vals = self.defaults.copy()
            full_vals.update(kwargs)
            to_write = [str(full_vals.get(field, "")) for field in self.fields]
        self._outstream.write("\n")
        self._outstream.write(",".join(to_write))

    def write_batch(self, *args, **kwargs):
        """Writes a batch of lines into the CSV.

        Here each argument should be a list with the same length.

        Arguments
        ---------
        *args
            Supply every field with a value in positional form OR.
        **kwargs
            Supply certain fields by key. The ID field is mandatory for all
            lines, but others can be left empty.
        """
        if args and kwargs:
            raise ValueError(
                "Use either positional fields or named fields, but not both.")
        if args:
            if len(args) != len(self.fields):
                raise ValueError("Need consistent fields")
            for arg_row in zip(*args):
                self.write(*arg_row)
        if kwargs:
            if "ID" not in kwargs:
                raise ValueError("I'll need to see some ID")
            keys = kwargs.keys()
            for value_row in zip(*kwargs.values()):
                kwarg_row = dict(zip(keys, value_row))
                self.write(**kwarg_row)

    @staticmethod
    def _expand_data_fields(data_fields):
        expanded = []
        for data_field in data_fields:
            expanded.append(data_field)
            expanded.append(data_field + "_format")
            expanded.append(data_field + "_opts")
        return expanded


def write_txt_file(data, filename, sampling_rate=None):
    """Write data in text format.

    Arguments
    ---------
    data : str, list, paddle.tensor, numpy.ndarray
        The data to write in the text file.
    filename : str
        Path to file where to write the data.
    sampling_rate : None
        Not used, just here for interface compatibility.

    Returns
    -------
    None

    """
    del sampling_rate  # Not used.
    # Check if the path of filename exists
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    with open(filename, "w") as fout:
        if isinstance(data, paddle.Tensor):
            data = data.tolist()
        if isinstance(data, np.ndarray):
            data = data.tolist()
        if isinstance(data, list):
            for line in data:
                print(line, file=fout)
        if isinstance(data, str):
            print(data, file=fout)


def write_stdout(data, filename=None, sampling_rate=None):
    """Write data to standard output.

    Arguments
    ---------
    data : str, list, paddle.Tensor, numpy.ndarray
        The data to write in the text file.
    filename : None
        Not used, just here for compatibility.
    sampling_rate : None
        Not used, just here for compatibility.

    Returns
    -------
    None

    """
    # Managing paddle.Tensor
    if isinstance(data, paddle.Tensor):
        data = data.tolist()
    # Managing np.ndarray
    if isinstance(data, np.ndarray):
        data = data.tolist()
    if isinstance(data, list):
        for line in data:
            print(line)
    if isinstance(data, str):
        print(data)


def length_to_mask(length, max_len=None, dtype=None, device=None):
    """Creates a binary mask for each sequence.
    Arguments
    ---------
    length : LongTensor
        Containing the length of each sequence in the batch. Must be 1D.
    max_len : int
        Max length for the mask, also the size of the second dimension.
    dtype : dtype, default: None
        The dtype of the generated mask.
    device: device, default: None
        The device to put the mask variable.

    Returns
    -------
    mask : tensor
        The binary mask.

    """
    assert len(length.shape) == 1

    if max_len is None:
        max_len = length.max().long().item()  # using arange to generate mask
    mask = paddle.arange(
        max_len, dtype=length.dtype).expand(
            [len(length), max_len]) < length.unsqueeze(1)

    if dtype is None:
        dtype = length.dtype

    if device is None:
        device = length.device

    mask = paddle.to_tensor(mask, dtype=dtype)
    return mask


def read_kaldi_lab(kaldi_ali, kaldi_lab_opts):
    """Read labels in kaldi format.

    Uses kaldi IO.

    Arguments
    ---------
    kaldi_ali : str
        Path to directory where kaldi alignments are stored.
    kaldi_lab_opts : str
        A string that contains the options for reading the kaldi alignments.

    Returns
    -------
    lab : dict
        A dictionary containing the labels.

    Note
    ----
    This depends on kaldi-io-for-python. Install it separately.
    See: https://github.com/vesis84/kaldi-io-for-python
    ```
    """
    # EXTRA TOOLS
    try:
        import kaldi_io
    except ImportError:
        raise ImportError("Could not import kaldi_io. Install it to use this.")
    # Reading the Kaldi labels
    lab = {
        k: v
        for k, v in kaldi_io.read_vec_int_ark(
            "gunzip -c " + kaldi_ali + "/ali*.gz | " + kaldi_lab_opts + " " +
            kaldi_ali + "/final.mdl ark:- ark:-|")
    }
    return lab


def get_md5(file):
    """Get the md5 checksum of an input file.

    Arguments
    ---------
    file : str
        Path to file for which compute the checksum.

    Returns
    -------
    md5
        Checksum for the given filepath.
    """
    # Lets read stuff in 64kb chunks!
    BUF_SIZE = 65536
    md5 = hashlib.md5()
    # Computing md5
    with open(file, "rb") as f:
        while True:
            data = f.read(BUF_SIZE)
            if not data:
                break
            md5.update(data)
    return md5.hexdigest()


def save_md5(files, out_file):
    """Saves the md5 of a list of input files as a pickled dict into a file.

    Arguments
    ---------
    files : list
        List of input files from which we will compute the md5.
    outfile : str
        The path where to store the output pkl file.

    Returns
    -------
    None
    """
    # Initialization of the dictionary
    md5_dict = {}
    # Computing md5 for all the files in the list
    for file in files:
        md5_dict[file] = get_md5(file)
    # Saving dictionary in pkl format
    save_pkl(md5_dict, out_file)


def save_pkl(obj, file):
    """Save an object in pkl format.

    Arguments
    ---------
    obj : object
        Object to save in pkl format
    file : str
        Path to the output file
    sampling_rate : int
        Sampling rate of the audio file, TODO: this is not used?

    """
    with open(file, "wb") as f:
        pickle.dump(obj, f)


def load_pkl(file):
    """Loads a pkl file.

    For an example, see `save_pkl`.

    Arguments
    ---------
    file : str
        Path to the input pkl file.

    Returns
    -------
    The loaded object.
    """

    # Deals with the situation where two processes are trying
    # to access the same label dictionary by creating a lock
    count = 100
    while count > 0:
        if os.path.isfile(file + ".lock"):
            time.sleep(1)
            count -= 1
        else:
            break

    try:
        open(file + ".lock", "w").close()
        with open(file, "rb") as f:
            return pickle.load(f)
    finally:
        if os.path.isfile(file + ".lock"):
            os.remove(file + ".lock")


def prepend_bos_token(label, bos_index):
    """Create labels with <bos> token at the beginning.

    Arguments
    ---------
    label : IntTensor
        Containing the original labels. Must be of size: [batch_size, max_length].
    bos_index : int
        The index for <bos> token.

    Returns
    -------
    new_label : tensor
        The new label with <bos> at the beginning.

    """
    new_label = label.long().clone()
    batch_size = label.shape[0]

    bos = new_label.new_zeros(batch_size, 1).fill_(bos_index)
    new_label = paddle.concat([bos, new_label], axis=1)
    return new_label


def append_eos_token(label, length, eos_index):
    """Create labels with <eos> token appended.

    Arguments
    ---------
    label : IntTensor
        Containing the original labels. Must be of size: [batch_size, max_length]
    length : LongTensor
        Containing the original length of each label sequences. Must be 1D.
    eos_index : int
        The index for <eos> token.

    Returns
    -------
    new_label : tensor
        The new label with <eos> appended.

    """
    new_label = paddle.to_tensor(label, dtype="int32").clone()
    batch_size = label.shape[0]

    pad = paddle.zeros([batch_size, 1], dtype=new_label.dtype)

    new_label = paddle.concat([new_label, pad], dim=1)
    new_label[paddle.arange(batch_size), paddle.to_tensor(
        length, dtype="int64")] = eos_index
    return new_label


def merge_char(sequences, space="_"):
    """Merge characters sequences into word sequences.

    Arguments
    ---------
    sequences : list
        Each item contains a list, and this list contains a character sequence.
    space : string
        The token represents space. Default: _

    Returns
    -------
    The list contains word sequences for each sentence.

    """
    results = []
    for seq in sequences:
        words = "".join(seq).split(space)
        results.append(words)
    return results


def merge_csvs(data_folder, csv_lst, merged_csv):
    """Merging several csv files into one file.

    Arguments
    ---------
    data_folder : string
        The folder to store csv files to be merged and after merging.
    csv_lst : list
        Filenames of csv file to be merged.
    merged_csv : string
        The filename to write the merged csv file.

    """
    write_path = os.path.join(data_folder, merged_csv)
    if os.path.isfile(write_path):
        logger.info("Skipping merging. Completed in previous run.")
    with open(os.path.join(data_folder, csv_lst[0])) as f:
        header = f.readline()
    lines = []
    for csv_file in csv_lst:
        with open(os.path.join(data_folder, csv_file)) as f:
            for i, line in enumerate(f):
                if i == 0:
                    # Checking header
                    if line != header:
                        raise ValueError("Different header for "
                                         f"{csv_lst[0]} and {csv}.")
                    continue
                lines.append(line)
    with open(write_path, "w") as f:
        f.write(header)
        for line in lines:
            f.write(line)
    logger.info(f"{write_path} is created.")


def split_word(sequences, space="_"):
    """Split word sequences into character sequences.

    Arguments
    ---------
    sequences : list
        Each item contains a list, and this list contains a words sequence.
    space : string
        The token represents space. Default: _

    Returns
    -------
    The list contains word sequences for each sentence.

    """
    results = []
    for seq in sequences:
        chars = list(space.join(seq))
        results.append(chars)
    return results


================================================
FILE: paddlespeech/s2t/io/speechbrain/dataloader.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/dataio/dataloader.py)
"""Paddle compatible DataLoaders

Essentially we extend Paddle DataLoader by adding the ability to save the
data loading state, so that a checkpoint may be saved in the middle of an
epoch.

Authors:
  * Aku Rouhe 2020
"""
import collections
import functools
import logging
import warnings

import paddle
from paddle.io import DataLoader

from paddlespeech.s2t.io.speechbrain.data_utils import batch_pad_right
from paddlespeech.s2t.io.speechbrain.data_utils import mod_default_collate
from paddlespeech.s2t.io.speechbrain.dataset import DynamicItemDataset
from paddlespeech.s2t.io.speechbrain.sampler import ReproducibleRandomSampler
PaddedData = collections.namedtuple("PaddedData", ["data", "lengths"])
import numpy


class Wav2vec2DataLoader(DataLoader):
    def __init__(self,
                 dataset,
                 batch_size=1,
                 shuffle=False,
                 sampler=None,
                 batch_sampler=None,
                 num_workers=0,
                 collate_fn=None,
                 pin_memory=False,
                 drop_last=False,
                 timeout=0,
                 worker_init_fn=None,
                 multiprocessing_context=None,
                 generator=None):
        if isinstance(dataset[0], (tuple, list)):
            return_list = True
        else:
            return_list = False

        super().__init__(
            dataset,
            feed_list=None,
            places=None,
            return_list=return_list,
            batch_sampler=batch_sampler,
            batch_size=batch_size,
            shuffle=shuffle,
            drop_last=drop_last,
            collate_fn=collate_fn,
            num_workers=num_workers,
            use_buffer_reader=True,
            use_shared_memory=False,
            timeout=timeout,
            worker_init_fn=worker_init_fn)
        if sampler is not None:
            self.batch_sampler.sampler = sampler


def PaddedBatch(
        examples,
        padded_keys=None,
        device_prep_keys=None,
        padding_func=batch_pad_right,
        padding_kwargs={},
        nonpadded_stack=True, ):
    __length = len(examples)
    __keys = list(examples[0].keys())
    __padded_keys = []
    __device_prep_keys = []
    res = {}
    for key in __keys:
        values = [example[key] for example in examples]
        # Default convert usually does the right thing (numpy2tensor etc.)
        # values = default_convert(values)
        if (padded_keys is not None and key in padded_keys) or (
                padded_keys is None and isinstance(values[0], numpy.ndarray)):
            # Padding and PaddedData
            __padded_keys.append(key)

            padded = PaddedData(*padding_func(values, **padding_kwargs))
            res[key] = padded
        else:
            # Default collate usually does the right thing
            # (convert lists of equal sized tensors to batch tensors, etc.)
            if nonpadded_stack:
                values = mod_default_collate(values)
            res[key] = values
        if (device_prep_keys is not None and key in device_prep_keys) or (
                device_prep_keys is None and
                isinstance(values[0], paddle.Tensor)):
            __device_prep_keys.append(key)
    return res


def make_dataloader(dataset, stage, **loader_kwargs):
    """Makes a basic DataLoader.

    For DynamicItemDatasets (which return dicts), use
    PaddedBatch as the default collate_fn.

    Shuffling gets implemented by ReproducibleRandomSampler.

    If the Dataset is not an IterableDataset, the DataLoader
    is a SaveableDataLoader.

    If the Dataset is a webdataset.dataset.Composable, set default
    batch_size = None.

    Can also loop over the underlying dataloader continuously,
    and stop iterations at nominal epoch lengths.

    Arguments
    ---------
    dataset : Dataset
        The dataset to make a DataLoader for.
    looped_nominal_epoch : None, int
        If an integer is given, loop the underlying DataLoader infinitely and
        set a nominal epoch length in batches (or whatever the DataLoader
        yields).
    **loader_kwargs : dict
        Keyword args to DataLoader, see Paddle DataLoader for
        options.

    Returns
    -------
    DataLoader
        If looped_nominal_epoch is None
    LoopedLoader
        If looped_nominal_epoch is not None
    """
    # PaddedBatch as default collation for DynamicItemDataset
    if "collate_fn" not in loader_kwargs and isinstance(dataset,
                                                        DynamicItemDataset):
        loader_kwargs["collate_fn"] = PaddedBatch
    # Reproducible random sampling
    if loader_kwargs.get("shuffle", False):
        if loader_kwargs.get("sampler") is not None:
            raise ValueError("Cannot specify both shuffle=True and a "
                             "sampler in loader_kwargs")
        sampler = ReproducibleRandomSampler(dataset)
        loader_kwargs["sampler"] = sampler
        # Should delete shuffle because you can't set both Sampler and
        # shuffle
        # NOTE: the dict of loader options may get used elsewhere!
        # However, this del doesn't touch those because loader_kwargs comes
        # from a **kwargs dict.
        del loader_kwargs["shuffle"]
    # Create the loader
    dataloader = Wav2vec2DataLoader(dataset, **loader_kwargs)
    return dataloader


================================================
FILE: paddlespeech/s2t/io/speechbrain/dataset.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/dataio/dataset.py)
import contextlib
import copy
import logging
from types import MethodType

from paddle.io import Dataset

from paddlespeech.s2t.io.speechbrain.data_pipeline import DataPipeline
from paddlespeech.s2t.io.speechbrain.dataio import load_data_csv
from paddlespeech.s2t.io.speechbrain.dataio import load_data_json

logger = logging.getLogger(__name__)


class DynamicItemDataset(Dataset):
    """Dataset that reads, wrangles, and produces dicts.

    Each data point dict provides some items (by key), for example, a path to a
    wavefile with the key "wav_file". When a data point is fetched from this
    Dataset, more items are produced dynamically, based on pre-existing items
    and other dynamic created items. For example, a dynamic item could take the
    wavfile path and load the audio from the disk.

    The dynamic items can depend on other dynamic items: a suitable evaluation
    order is used automatically,  as long as there are no circular dependencies.

    A specified list of keys is collected in the output dict. These can be items
    in the original data or dynamic items. If some dynamic items are not
    requested, nor depended on by other requested items, they won't be computed.
    So for example if a user simply wants to iterate over the text, the
    time-consuming audio loading can be skipped.

    About the format:
    Takes a dict of dicts as the collection of data points to read/wrangle.
    The top level keys are data point IDs.
    Each data point (example) dict should have the same keys, corresponding to
    different items in that data point.

    Altogether the data collection could look like this:

    >>> data = {
    ...  "spk1utt1": {
    ...      "wav_file": "/path/to/spk1utt1.wav",
    ...      "text": "hello world",
    ...      "speaker": "spk1",
    ...      },
    ...  "spk1utt2": {
    ...      "wav_file": "/path/to/spk1utt2.wav",
    ...      "text": "how are you world",
    ...      "speaker": "spk1",
    ...      }
    ... }

    NOTE
    ----
        The top-level key, the data point id, is implicitly added as an item
        in the data point, with the key "id"

    Each dynamic item is configured by three things: a key, a func, and a list
    of argkeys. The key should be unique among all the items (dynamic or not) in
    each data point. The func is any callable, and it returns the dynamic item's
    value. The callable is called with the values of other items as specified
    by the argkeys list (as positional args, passed in the order specified by
    argkeys).

    Arguments
    ---------
    data : dict
        Dictionary containing single data points (e.g. utterances).
    dynamic_items : list, optional
        Configuration for the dynamic items produced when fetching an example.
        List of DynamicItems or dicts with the format::
            func: <callable> # To be called
            takes: <list> # key or list of keys of args this takes
            provides: key # key or list of keys that this provides
    output_keys : dict, list, optional
        List of keys (either directly available in data or dynamic items)
        to include in the output dict when data points are fetched.

        If a dict is given; it is used to map internal keys to output keys.
        From the output_keys dict key:value pairs the key appears outside,
        and value is the internal key.
    """

    def __init__(
            self,
            data,
            dynamic_items=[],
            output_keys=[], ):
        self.data = data
        self.data_ids = list(self.data.keys())
        static_keys = list(self.data[self.data_ids[0]].keys())
        if "id" in static_keys:
            raise ValueError("The key 'id' is reserved for the data point id.")
        else:
            static_keys.append("id")
        self.pipeline = DataPipeline(static_keys, dynamic_items)
        self.set_output_keys(output_keys)

    def __len__(self):
        return len(self.data_ids)

    def __getitem__(self, index):
        data_id = self.data_ids[index]
        data_point = self.data[data_id]
        return self.pipeline.compute_outputs({"id": data_id, **data_point})

    def add_dynamic_item(self, func, takes=None, provides=None):
        """Makes a new dynamic item available on the dataset.

        Two calling conventions. For DynamicItem objects, just use:
        add_dynamic_item(dynamic_item).
        But otherwise, should use:
        add_dynamic_item(func, takes, provides).

        Arguments
        ---------
        func : callable, DynamicItem
            If a DynamicItem is given, adds that directly. Otherwise a
            DynamicItem is created, and this specifies the callable to use. If
            a generator function is given, then create a GeneratorDynamicItem.
            Otherwise creates a normal DynamicItem.
        takes : list, str
            List of keys. When func is called, each key is resolved to
            either an entry in the data or the output of another dynamic_item.
            The func is then called with these as positional arguments,
            in the same order as specified here.
            A single arg can be given directly.
        provides : str
            Unique key or keys that this provides.
        """
        self.pipeline.add_dynamic_item(func, takes, provides)

    def set_output_keys(self, keys):
        """Use this to change the output keys.

        These are the keys that are actually evaluated when a data point
        is fetched from the dataset.

        Arguments
        ---------
        keys : dict, list
            List of keys (str) to produce in output.

            If a dict is given; it is used to map internal keys to output keys.
            From the output_keys dict key:value pairs the key appears outside,
            and value is the internal key.
        """
        self.pipeline.set_output_keys(keys)

    @contextlib.contextmanager
    def output_keys_as(self, keys):
        """Context manager to temporarily set output keys.

        NOTE
        ----
        Not thread-safe. While in this context manager, the output keys
        are affected for any call.
        """
        saved_output = self.pipeline.output_mapping
        self.pipeline.set_output_keys(keys)
        yield self
        self.pipeline.set_output_keys(saved_output)

    def filtered_sorted(
            self,
            key_min_value={},
            key_max_value={},
            key_test={},
            sort_key=None,
            reverse=False,
            select_n=None, ):
        """Get a filtered and/or sorted version of this, shares static data.

        The reason to implement these operations in the same method is that
        computing some dynamic items may be expensive, and this way the
        filtering and sorting steps don't need to compute the dynamic items
        twice.

        Arguments
        ---------
        key_min_value : dict
            Map from key (in data or in dynamic items) to limit, will only keep
            data_point if data_point[key] >= limit
        key_max_value : dict
            Map from key (in data or in dynamic items) to limit, will only keep
            data_point if data_point[key] <= limit
        key_test : dict
            Map from key (in data or in dynamic items) to func, will only keep
            data_point if bool(func(data_point[key])) == True
        sort_key : None, str
            If not None, sort by data_point[sort_key]. Default is ascending
            order.
        reverse : bool
            If True, sort in descending order.
        select_n : None, int
            If not None, only keep (at most) the first n filtered data_points.
            The possible sorting is applied, but only on the first n data
            points found. Meant for debugging.

        Returns
        -------
        FilteredSortedDynamicItemDataset
            Shares the static data, but has its own output keys and
            dynamic items (initially deep copied from this, so they have the
            same dynamic items available)

        NOTE
        ----
        Temporarily changes the output keys!
        """
        filtered_sorted_ids = self._filtered_sorted_ids(
            key_min_value,
            key_max_value,
            key_test,
            sort_key,
            reverse,
            select_n, )
        return FilteredSortedDynamicItemDataset(
            self, filtered_sorted_ids)  # NOTE: defined below

    def _filtered_sorted_ids(
            self,
            key_min_value={},
            key_max_value={},
            key_test={},
            sort_key=None,
            reverse=False,
            select_n=None, ):
        """Returns a list of data ids, fulfilling the sorting and filtering."""

        def combined_filter(computed):
            """Applies filter."""
            for key, limit in key_min_value.items():
                # NOTE: docstring promises >= so using that.
                # Mathematically could also use < for nicer syntax, but
                # maybe with some super special weird edge case some one can
                # depend on the >= operator
                if computed[key] >= limit:
                    continue
                return False
            for key, limit in key_max_value.items():
                if computed[key] <= limit:
                    continue
                return False
            for key, func in key_test.items():
                if bool(func(computed[key])):
                    continue
                return False
            return True

        temp_keys = (set(key_min_value.keys()) | set(key_max_value.keys()) |
                     set(key_test.keys()) |
                     set([] if sort_key is None else [sort_key]))
        filtered_ids = []
        with self.output_keys_as(temp_keys):
            for i, data_id in enumerate(self.data_ids):
                if select_n is not None and len(filtered_ids) == select_n:
                    break
                data_point = self.data[data_id]
                data_point["id"] = data_id
                computed = self.pipeline.compute_outputs(data_point)
                if combined_filter(computed):
                    if sort_key is not None:
                        # Add (main sorting index, current index, data_id)
                        # So that we maintain current sorting and don't compare
                        # data_id values ever.
                        filtered_ids.append((computed[sort_key], i, data_id))
                    else:
                        filtered_ids.append(data_id)
        if sort_key is not None:
            filtered_sorted_ids = [
                tup[2] for tup in sorted(filtered_ids, reverse=reverse)
            ]
        else:
            filtered_sorted_ids = filtered_ids
        return filtered_sorted_ids

    @classmethod
    def from_json(cls,
                  json_path,
                  replacements={},
                  dynamic_items=[],
                  output_keys=[]):
        """Load a data prep JSON file and create a Dataset based on it."""
        data = load_data_json(json_path, replacements)
        return cls(data, dynamic_items, output_keys)

    @classmethod
    def from_csv(cls,
                 csv_path,
                 replacements={},
                 dynamic_items=[],
                 output_keys=[]):
        """Load a data prep CSV file and create a Dataset based on it."""
        data = load_data_csv(csv_path, replacements)
        return cls(data, dynamic_items, output_keys)

    @classmethod
    def from_arrow_dataset(cls,
                           dataset,
                           replacements={},
                           dynamic_items=[],
                           output_keys=[]):
        """Loading a prepared huggingface dataset"""

        # define an unbound method to generate puesdo keys
        def keys(self):
            "Returns the keys."
            return [i for i in range(dataset.__len__())]

        # bind this method to arrow dataset
        dataset.keys = MethodType(keys, dataset)
        return cls(dataset, dynamic_items, output_keys)


class FilteredSortedDynamicItemDataset(DynamicItemDataset):
    """Possibly filtered, possibly sorted DynamicItemDataset.

    Shares the static data (reference).
    Has its own dynamic_items and output_keys (deepcopy).
    """

    def __init__(self, from_dataset, data_ids):
        self.data = from_dataset.data
        self.data_ids = data_ids
        self.pipeline = copy.deepcopy(from_dataset.pipeline)

    @classmethod
    def from_json(cls,
                  json_path,
                  replacements={},
                  dynamic_items=None,
                  output_keys=None):
        raise TypeError("Cannot create SubsetDynamicItemDataset directly!")

    @classmethod
    def from_csv(cls,
                 csv_path,
                 replacements={},
                 dynamic_items=None,
                 output_keys=None):
        raise TypeError("Cannot create SubsetDynamicItemDataset directly!")


def add_dynamic_item(datasets, func, takes=None, provides=None):
    """Helper for adding the same item to multiple datasets."""
    for dataset in datasets:
        dataset.add_dynamic_item(func, takes, provides)


def set_output_keys(datasets, output_keys):
    """Helper for setting the same item to multiple datasets."""
    for dataset in datasets:
        dataset.set_output_keys(output_keys)


================================================
FILE: paddlespeech/s2t/io/speechbrain/depgraph.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/utils/depgraph.py)
"""A dependency graph for finding evaluation order.

Authors:
    * Aku Rouhe 2020
"""
import collections
import uuid


class CircularDependencyError(ValueError):
    """
    An error caused by running into circular dependencies while searching for
    an evaluation order in a DependencyGraph.
    """

    pass


DGNode = collections.namedtuple("DGNode", ["key", "edges", "data"])

# A node in DependencyGraph.


class DependencyGraph:
    """General-purpose dependency graph.

    Essentially a directed acyclic graph.
    Usually used to find an evaluation order for e.g. variable substitution
    The relation that an edge between A and B represents is:
    "A depends on B, i.e. B should be evaluated before A"

    Nodes can be added explicitly or they can be created implicitly
    while adding edges.
    Nodes have keys, which should be some hashable value that identifies
    the elements the graph represents in your use case. E.G. they can just
    be the variable name you want to substitute.
    However, if needed, more generally you can attach any data to a node
    (e.g. a path in your tree), and if so desired, a unique key can be
    created for you. You'll only need to know that key while adding edges
    to/from it.
    Implicit keys and explicit keys can also be mixed.
    """

    def __init__(self):
        self.digraph = []
        self.key2ind = {}
        # Guard for manual duplicates (but not implicitly added ones)
        self._manually_added_keys = []

    @staticmethod
    def get_unique_key():
        """Returns a unique hashable identifier."""
        return uuid.uuid4()

    def add_node(self, key=None, data=None):
        """Adds a node explicitly.

        Arguments
        ---------
        key : hashable, optional
            If not given, a key is created for you.
        data : Any, optional
            Any additional data you wish to attach to this node.

        Returns
        -------
        hashable
            The key that was used (either yours or generated).

        Raises
        ------
        ValueError
            If node with the given key has already been added explicitly
            (with this method, not "add_edge").
        """
        if key is None:
            key = self.get_unique_key()
        elif key in self._manually_added_keys:
            raise ValueError("Adding duplicate node: {key}".format(key=key))
        else:
            self._manually_added_keys.append(key)
        if key in self.key2ind:  # Implicitly added already; don't add again.
            ind = self.key2ind[key]
            node = self.digraph[ind]
            # All that this operation can do is add data:
            self.digraph[ind] = DGNode(node.key, node.edges, data)
            return key
        self.key2ind[key] = len(self.digraph)
        self.digraph.append(DGNode(key, [], data))
        return key

    def add_edge(self, from_key, to_key):
        """Adds an edge, and implicitly also creates nodes for keys which have
        not been seen before. This will not let you add data to your nodes.
        The relation encodes: "from_key depends on to_key"
        (to_key must be evaluated before from_key).

        Arguments
        ---------
        from_key : hashable
            The key which depends on.
        to_key : hashable
            The key which is depended on.

        Returns
        -------
        None
        """
        from_ind = self._get_ind_and_add_if_new(from_key)
        to_ind = self._get_ind_and_add_if_new(to_key)
        edges_list = self.digraph[from_ind].edges
        if to_ind not in edges_list:
            edges_list.append(to_ind)

    def _get_ind_and_add_if_new(self, key):
        # Used internally to implicitly add nodes for unseen keys
        if key not in self.key2ind:
            self.key2ind[key] = len(self.digraph)
            self.digraph.append(DGNode(key, [], None))
        return self.key2ind[key]

    def is_valid(self):
        """Checks if an evaluation order can be found.

        A dependency graph is evaluatable if there are no circular
        dependencies, i.e., the graph is acyclic.

        Returns
        -------
        bool
            Indicating if the graph is evaluatable.
        """
        return not self._find_first_cycle()

    def get_evaluation_order(self, selected_keys=None):
        """Finds one valid evaluation order.

        There can be many different valid
        orders.
        NOTE: Generates output one DGNode at a time. May generate DGNodes
        before it finds a circular dependency. If you really need to know
        whether an order can be found, check is_valid() first. However,
        the algorithm for finding cycles is essentially the same as the one
        used for finding an evaluation order, so for very large graphs...
        Ah well, but maybe then you should be using some other solution
        anyway.

        Arguments
        ---------
        selected_keys : list, None
            List of keys. If not None, only the selected keys are guaranteed
            in the evaluation order (along with the keys they depend on).

        Yields
        ------
        DGNode
            The added DGNodes in a valid evaluation order.
            See the DGNode namedtuple above.

        Raises
        ------
        CircularDependencyError
            If a circular dependency is found.
        """
        seen_ever = set()

        def toposort(root_ind, visited):
            """Implementation of topsort."""
            nonlocal seen_ever
            here = visited + [root_ind]
            if root_ind in visited:
                raise CircularDependencyError("{cycle}".format(
                    cycle=" -> ".join(str(self.digraph[i].key) for i in here)))
            if root_ind in seen_ever:
                return  # Yield nothing
            seen_ever = seen_ever.union(set([root_ind]))
            for to_ind in self.digraph[root_ind].edges:
                for ind in toposort(to_ind, visited=here):
                    yield ind
            yield root_ind

        if selected_keys is None:
            start_inds = range(len(self.digraph))
        else:
            start_inds = [self.key2ind[key] for key in selected_keys]

        for start_ind in start_inds:
            for ind in toposort(start_ind, []):
                yield self.digraph[ind]

    def _find_first_cycle(self):
        """Depth-first search based algorithm for finding cycles in the graph."""
        seen_ever = set()

        def cycle_dfs(root_ind, visited):
            """Implementation of cycle_dfs."""
            nonlocal seen_ever
            print(root_ind, visited)
            here = visited + [root_ind]
            if root_ind in visited:
                return here
            if root_ind in seen_ever:
                return []
            seen_ever = seen_ever.union(set([root_ind]))
            for to_ind in self.digraph[root_ind].edges:
                cycle = cycle_dfs(to_ind, here)
                if cycle:
                    return cycle
            return []

        for ind in range(len(self.digraph)):
            if ind not in seen_ever:
                cycle = cycle_dfs(ind, [])
                if cycle:
                    return cycle
        return []

    def __contains__(self, key):
        # Allows the syntax:
        # 'key' in dependency_graph
        return key in self.key2ind


================================================
FILE: paddlespeech/s2t/io/speechbrain/make_dataloader.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/core.py)
import paddlespeech.s2t.io.speechbrain.dataloader


def _train_loader_specifics(self, dataset, loader_kwargs):
    sampler = loader_kwargs.get("sampler", None)
    # Shuffling should really only matter for the train stage. Shuffling
    # will also lead to more padding in batches if the order was otherwise
    # sorted by length.
    shuffle = loader_kwargs.get("shuffle", False)
    if shuffle and not self.distributed_launch:
        if sampler is not None:
            raise ValueError("Cannot specify both shuffle=True"
                             "and a sampler in loader_kwargs")
        sampler = ReproducibleRandomSampler(dataset)
        self.train_sampler = sampler
        loader_kwargs["sampler"] = self.train_sampler
        # Delete the shuffle flag, since you cannot specify both a sampler and
        # shuffling:
        del loader_kwargs["shuffle"]

    # Possibly make a DistributedSampler or a wrapper for some other sampler
    if self.distributed_launch and not isinstance(dataset, IterableDataset):
        drop_last = loader_kwargs.get("drop_last", False)
        # num_replicas arg is equal to world_size
        # and retrieved automatically within
        # DistributedSampler obj.
        if sampler is not None:
            self.train_sampler = DistributedSamplerWrapper(
                sampler,
                rank=self.rank,
                drop_last=drop_last,
                shuffle=shuffle, )

            # with DistributedSamplerWrapper, one must disable shuffling for dataloader
            loader_kwargs["shuffle"] = False
            loader_kwargs["sampler"] = self.train_sampler
        elif loader_kwargs.get("batch_sampler") is None:
            # no sampler and batch-sampler
            self.train_sampler = DistributedSampler(
                dataset, rank=self.rank, shuffle=True, drop_last=drop_last)

            # with DistributedSamplerWrapper, one must disable shuffling for dataloader
            loader_kwargs["shuffle"] = False
            loader_kwargs["sampler"] = self.train_sampler
        else:  # batch_sampler was specified
            self.train_sampler = DistributedSamplerWrapper(
                loader_kwargs.get("batch_sampler", None),
                rank=self.rank,
                shuffle=True, )
            loader_kwargs["batch_sampler"] = self.train_sampler
    elif self.distributed_launch and isinstance(dataset, IterableDataset):
        logger.warning("Cannot automatically solve distributed sampling "
                       "for IterableDataset.")
    return loader_kwargs


def make_dataloader(self, dataset, stage, **loader_kwargs):
    """Creates DataLoaders for Datasets.

        This is used by ``fit()`` and ``evaluate()`` if they just receive
        Datasets.

        Alternatively, this can be called from outside the Brain subclass.
        In that case, the DataLoader should be passed to ``fit()`` in place
        of the dataset.

        The Stage.TRAIN DataLoader is handled specially. It has extra args for
        shuffle and drop_last. In DDP a DistributedSampler is created (unless
        the dataset is an IterableDataset).

        NOTE
        ----
        Some important DataLoader arguments are passed via **loader_kwargs,
        e.g., batch_size, num_workers, pin_memory.

        NOTE
        ----
        By default, ``evaluate()`` specifies ckpt_prefix=None to stop the test
        DataLoader being added to the checkpointer. If you need to add a
        recoverable after saving checkpoints (e.g., at test time, after
        checkpointing the training), and still be able to recover reasonably,
        you should probably specify ``allow_partial_load=True``.

        Arguments
        ---------
        dataset : Dataset
            A set of data to use to create data loader. If the Dataset is a
            DynamicItemDataset, PaddedBatch is used as the default collate_fn,
            unless specified in loader_kwargs.
        stage : Stage
            The stage of the experiment: Stage.TRAIN, Stage.VALID, Stage.TEST
        ckpt_prefix : str, None
            Prefix to use for SaveableDataLoader Checkpoint name. The Stage
            name is added to this to create the full key. Set to None to not
            save the DataLoader.
        **loader_kwargs : dict
            Additional keyword arguments to the DataLoader.
            E.g., batch_size, num_workers, pin_memory.
        """

    dataloader_ = dataloader.make_dataloader(dataset, **loader_kwargs)
    return dataloader_


================================================
FILE: paddlespeech/s2t/io/speechbrain/sampler.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/dataio/sampler.py)
"""compatible samplers.

These determine the order of iteration through a dataset.

Authors:
  * Aku Rouhe 2020
  * Samuele Cornell 2020
  * Ralf Leibold 2020
  * Artem Ploujnikov 2021
  * Andreas Nautsch 2021
"""
import logging
from collections import Counter
from typing import List

import numpy as np
import paddle
from paddle.io import RandomSampler
from paddle.io import Sampler
from paddle.io import WeightedRandomSampler
from scipy.stats import lognorm

from paddlespeech.s2t.io.speechbrain.dataset import DynamicItemDataset

logger = logging.getLogger(__name__)


class ReproducibleRandomSampler(RandomSampler):
    """A modification of RandomSampler which always returns the same values.

    Also look at `paddle.io.RandomSampler`. This has mostly
    the same behaviour and arguments, except for adding 'seed' and 'epoch' and
    not supporting 'generator'.

    Note
    ----
    Call `set_epoch` before every epoch. Otherwise, the sampler will produce the
    same sequence of indices every epoch.

    Arguments
    ---------
    data_source : Dataset
        The data source to sample indices for.
    seed : int
        The base seed to use for the random number generator. It is recommended
        to use a value which has a good mix of 0 and 1 bits.
    epoch : int
        The epoch to start at.

    """

    def __init__(self, data_source, seed=563375142, epoch=0, **kwargs):
        if "generator" in kwargs:
            MSG = ("Cannot give a separate generator when using " +
                   "ReproducibleRandomSampler")
            raise ValueError(MSG)
        super().__init__(data_source, **kwargs)
        self.seed = int(seed)
        self.epoch = epoch
        self.gen = paddle.seed(1)

    def set_epoch(self, epoch):
        """
        You can also just access self.epoch, but we maintain this interface
        to mirror paddle.io.DistributedBatchSampler
        """
        self.epoch = epoch

    def __iter__(self):
        self.gen.manual_seed(self.seed + self.epoch)
        return super().__iter__()


class ReproducibleWeightedRandomSampler(WeightedRandomSampler):
    """A reproducible modification of WeightedRandomSampler.

    Also look at `paddle.io.WeightedRandomSampler`. This has the
    the same behaviour and arguments, except for adding 'seed' and 'epoch' and
    not supporting 'generator'.

    Note
    ----
    Call `set_epoch` before every epoch. Otherwise, the sampler will produce the
    same sequence of indices every epoch.

    Arguments
    ---------
    weights : sequence of float
        Weights for each index. Doesn't need to sum to one.
    num_samples : int
        Number of samples to draw
    replacement : bool
        To draw with replacement or not (within an epoch of num_samples).
    seed : int
        The base seed to use for the random number generator. It is recommended
        to use a value which has a good mix of 0 and 1 bits.
    epoch : int
        The epoch to start at.
    """

    def __init__(
            self,
            weights,
            num_samples,
            replacement,
            seed=129491412,
            epoch=0,
            **kwargs, ):
        if "generator" in kwargs:
            MSG = ("Cannot give a separate generator when using " +
                   "ReproducibleRandomSampler")
            raise ValueError(MSG)
        super().__init__(weights, num_samples, replacement, **kwargs)
        self.seed = int(seed)
        self.epoch = epoch
        self.gen = paddle.seed(1)

    def set_epoch(self, epoch):
        """
        You can also just access self.epoch, but we maintain this interface
        to mirror paddle.io.DistributedBatchSampler
        """
        self.epoch = epoch

    def __iter__(self):
        self.gen.manual_seed(self.seed + self.epoch)
        return super().__iter__()


class DynamicBatchSampler(Sampler):
    """This BatchSampler batches examples together by grouping them by their length.

    Every example in the batch have approximately the same length and
    thus padding is minimized.
    This enables faster training on datasets
    where length of examples can vary significantly (e.g Librispeech).
    Inspired by: https://www.tensorflow.org/api_docs/python/tf/data/experimental/bucket_by_sequence_length

    Dynamic batching is performed by specifying a max_batch_length which is the
    upper limit for the sum of the length of examples in a batch:
    e.g., if ex1 has length 4, ex2 length 5 and if max_batch_length is set to 6
    ex1 and ex2 will be placed, alone, in two distinct batches.

    Length for each example can be obtained in two manners.
    If the input dataset is a DynamicItemDataset it can be obtained by specifying a
    length_func. Default assumes a "duration" entry is in the annotation.
    Length for each example can also be passed to this class upon instantiation
    by specifying a list containing the length for each example and passing it to
    lengths_list.

    Examples are grouped together by defining a set of possible discrete intervals
    (buckets). Examples whose length fall into these intervals can be batched together.

    The number of buckets can be specified by using the arg num_buckets.
    There is usually an optimal range for the value of this argument.

    If num_buckets == 1, all examples can be batched together. You have maximum randomization
    but your training speed will be slower due to the fact that a large amount of the values will be padding
    as long and short examples can be batched together.
    As the number of buckets grows only examples with similar
    length can be grouped together.
    This trades-off speed with randomization.
    TLDR: Low number -> better randomization, High number -> faster training.
    NOTE THAT: if set too high the training speed will decrease. If num_buckets -> number of examples in the 
    dataset the batch size will be small impacting training speed and possibly performance.

    The buckets can also be specified by passing a list to the bucket_boundaries
    argument instead of specifying a left_bucket_length and a bucket_length_multiplier.

    """

    def __init__(
            self,
            dataset,
            max_batch_length: int,
            num_buckets: int=None,
            length_func=lambda x: x["duration"],
            shuffle: bool=True,
            batch_ordering: str="random",
            max_batch_ex: int=None,
            bucket_boundaries: List[int]=[],
            lengths_list: List[int]=None,
            seed: int=42,
            epoch: int=0,
            drop_last: bool=False,
            verbose: bool=False, ):
        self._dataset = dataset
        self._ex_lengths = {}
        ex_ids = self._dataset.data_ids
        self.verbose = verbose

        # We do not put a default on num_buckets to encourage users to play with this parameter
        if num_buckets is None and len(bucket_boundaries) == 0:
            raise RuntimeError(
                "Please specify either num_buckets or bucket boundaries."
                "Check the docs, and/or the tutorial !")

        if lengths_list is not None:
            # take length of examples from this argument and bypass length_key
            for indx in range(len(lengths_list)):
                self._ex_lengths[str(indx)] = lengths_list[indx]
        else:
            # use length func
            if not isinstance(dataset, DynamicItemDataset):
                raise NotImplementedError(
                    "Dataset should be a DynamicItemDataset when using length function"
                )
            for indx in range(len(self._dataset)):
                self._ex_lengths[str(indx)] = length_func(
                    self._dataset.data[ex_ids[indx]])

        if len(bucket_boundaries) > 0:
            if not all([x >= 0 for x in bucket_boundaries]):
                raise ValueError(
                    "All elements in bucket boundaries should be non-negative (>= 0)."
                )
            if not len(set(bucket_boundaries)) == len(bucket_boundaries):
                raise ValueError(
                    "Bucket_boundaries should not contain duplicates.")
            np.testing.assert_array_equal(
                np.array(bucket_boundaries),
                np.array(sorted(bucket_boundaries)),
                err_msg="The arg bucket_boundaries should be an ascending sorted list of non negative values values!",
            )
            self._bucket_boundaries = np.array(sorted(bucket_boundaries))
        else:
            # use num_buckets
            self._bucket_boundaries = np.array(
                self._get_boundaries_through_warping(
                    max_batch_length=max_batch_length,
                    num_quantiles=num_buckets, ))

        self._max_batch_length = max_batch_length
        self._shuffle_ex = shuffle
        self._batch_ordering = batch_ordering
        self._seed = seed
        self._drop_last = drop_last
        if max_batch_ex is None:
            max_batch_ex = np.inf
        self._max_batch_ex = max_batch_ex
        # Calculate bucket lengths - how often does one bucket boundary fit into max_batch_length?
        self._bucket_lens = [
            max(1, int(max_batch_length / self._bucket_boundaries[i]))
            for i in range(len(self._bucket_boundaries))
        ] + [1]
        self._epoch = epoch
        self._generate_batches()

    def get_durations(self, batch):
        """Gets durations of the elements in the batch."""
        return [self._ex_lengths[str(idx)] for idx in batch]

    def _get_boundaries_through_warping(
            self,
            max_batch_length: int,
            num_quantiles: int, ) -> List[int]:

        # NOTE: the following lines do not cover that there is only one example in the dataset
        # warp frames (duration) distribution of train data
        logger.info("Batch quantisation in latent space")
        # linspace set-up
        num_boundaries = num_quantiles + 1
        # create latent linearly equal spaced buckets
        latent_boundaries = np.linspace(
            1 / num_boundaries,
            num_quantiles / num_boundaries,
            num_quantiles, )
        # get quantiles using lognormal distribution
        quantiles = lognorm.ppf(latent_boundaries, 1)
        # scale up to max_batch_length
        bucket_boundaries = quantiles * max_batch_length / quantiles[-1]
        # compute resulting bucket length multipliers
        length_multipliers = [
            bucket_boundaries[x + 1] / bucket_boundaries[x]
            for x in range(num_quantiles - 1)
        ]
        # logging
        logger.info(
            "Latent bucket boundary - buckets: {} - length multipliers: {}".
            format(
                list(map("{:.2f}".format, bucket_boundaries)),
                list(map("{:.2f}".format, length_multipliers)), ))
        return list(sorted(bucket_boundaries))

    def _permute_batches(self):

        if self._batch_ordering == "random":
            # deterministically shuffle based on epoch and seed
            gen = paddle.seed(1)
            gen.manual_seed(self._seed + self._epoch)
            sampler = paddle.randperm(
                len(self._batches)).tolist()  # type: ignore
            tmp = []
            for idx in sampler:
                tmp.append(self._batches[idx])
            self._batches = tmp

        elif self._batch_ordering == "ascending":
            self._batches = sorted(
                self._batches,
                key=lambda x: max([self._ex_lengths[str(idx)] for idx in x]), )
        elif self._batch_ordering == "descending":
            self._batches = sorted(
                self._batches,
                key=lambda x: max([self._ex_lengths[str(idx)] for idx in x]),
                reverse=True, )
        else:
            raise NotImplementedError

    def _generate_batches(self):
        logger.info("DynamicBatchSampler: Generating dynamic batches")
        if self._shuffle_ex:
            # deterministically shuffle based on epoch and seed
            gen = paddle.seed(1)
            gen.manual_seed(self._seed + self._epoch)
            sampler = paddle.randperm(
                len(self._dataset)).tolist()  # type: ignore
        else:
            # take examples as they are: e.g. they have been sorted
            sampler = range(len(self._dataset))  # type: ignore

        self._batches = []
        bucket_batches = [[] for i in self._bucket_lens]

        stats_tracker = [{
            "min": np.inf,
            "max": -np.inf,
            "tot": 0,
            "n_ex": 0
        } for i in self._bucket_lens]

        for idx in sampler:
            # length of pre-sampled audio
            item_len = self._ex_lengths[str(idx)]
            # bucket to fill up most padding
            bucket_id = np.searchsorted(self._bucket_boundaries, item_len)
            # fill audio's duration into that bucket
            bucket_batches[bucket_id].append(idx)

            stats_tracker[bucket_id]["min"] = min(
                stats_tracker[bucket_id]["min"], item_len)
            stats_tracker[bucket_id]["max"] = max(
                stats_tracker[bucket_id]["max"], item_len)
            stats_tracker[bucket_id]["tot"] += item_len
            stats_tracker[bucket_id]["n_ex"] += 1
            # track #samples - why not duration/#frames; rounded up?
            # keep track of durations, if necessary

            if (len(bucket_batches[bucket_id]) >= self._bucket_lens[bucket_id]
                    or len(bucket_batches[bucket_id]) >= self._max_batch_ex):
                self._batches.append(bucket_batches[bucket_id])
                bucket_batches[bucket_id] = []
                # keep track of durations

            # Dump remaining batches
        if not self._drop_last:
            for batch in bucket_batches:
                if batch:
                    self._batches.append(batch)

        self._permute_batches()  # possibly reorder batches

        if self._epoch == 0:  # only log at first epoch
            # frames per batch & their padding remaining
            boundaries = [0] + self._bucket_boundaries.tolist()

            for bucket_indx in range(len(self._bucket_boundaries)):
                try:
                    num_batches = stats_tracker[bucket_indx]["tot"] // (
                        self._max_batch_length)
                    pad_factor = (stats_tracker[bucket_indx]["max"] -
                                  stats_tracker[bucket_indx]["min"]) / (
                                      stats_tracker[bucket_indx]["tot"] /
                                      stats_tracker[bucket_indx]["n_ex"])
                except ZeroDivisionError:
                    num_batches = 0
                    pad_factor = 0

                logger.info((
                    "DynamicBatchSampler: Bucket {} with boundary {:.1f}-{:.1f} and "
                    +
                    "batch_size {}: Num Examples {:.1f}, Num Full Batches {:.3f}, Pad Factor {:.3f}."
                ).format(
                    bucket_indx,
                    boundaries[bucket_indx],
                    boundaries[bucket_indx + 1],
                    self._bucket_lens[bucket_indx],
                    stats_tracker[bucket_indx]["n_ex"],
                    num_batches,
                    pad_factor * 100, ))

            if self.verbose:
                batch_stats = {
                    "tot_frames": [],
                    "tot_pad_frames": [],
                    "pad_%": [],
                }
                for batch in self._batches:
                    tot_frames = sum(
                        [self._ex_lengths[str(idx)] for idx in batch])
                    batch_stats["tot_frames"].append(tot_frames)
                    max_frames = max(
                        [self._ex_lengths[str(idx)] for idx in batch])
                    tot_pad = sum([
                        max_frames - self._ex_lengths[str(idx)] for idx in batch
                    ])
                    batch_stats["tot_pad_frames"].append(tot_pad)
                    batch_stats["pad_%"].append(tot_pad / tot_frames * 100)

                padding_details = "Batch {} with {:.1f} frames with {} files - {:.1f} padding, {:.2f} (%) of total."
                padding_details = "DynamicBatchSampler: " + padding_details
                for i in range(len(self._batches)):
                    logger.info(
                        padding_details.format(
                            i,
                            batch_stats["tot_frames"][i],
                            len(self._batches[i]),
                            batch_stats["tot_pad_frames"][i],
                            batch_stats["pad_%"][i], ))

    def __iter__(self):
        for batch in self._batches:
            yield batch
        if self._shuffle_ex:  # re-generate examples if ex_ordering == "random"
            self._generate_batches()
        if self._batch_ordering == "random":
            # we randomly permute the batches only --> faster
            self._permute_batches()

    def set_epoch(self, epoch):
        """
        You can also just access self.epoch, but we maintain this interface
        to mirror paddle.io.DistributedBatchSampler
        """
        self._epoch = epoch
        self._generate_batches()

    def __len__(self):
        return len(self._batches)


class BalancingDataSampler(ReproducibleWeightedRandomSampler):
    """A data sampler that takes a single key from the dataset and
    ensures an approximately equal distribution by that key

    Arguments
    ---------
    dataset: DynamicItemDataset
        the dataset form which samples will be drawn
    key: str
        the key from which samples will be taken
    num_samples : int
        Number of samples to draw
    replacement : bool
        To draw with replacement or not (within an epoch of num_samples).
    seed : int
        The base seed to use for the random number generator. It is recommended
        to use a value which has a good mix of 0 and 1 bits.
    epoch : int
        The epoch to start at.

    """

    def __init__(
            self,
            dataset,
            key,
            num_samples=None,
            replacement=True,
            seed=563375142,
            epoch=0,
            **kwargs, ):
        self.dataset = dataset
        self.key = key
        if not num_samples:
            num_samples = len(dataset)
        weights = self._compute_weights()
        super().__init__(weights, num_samples, replacement, seed, epoch,
                         **kwargs)

    def _compute_weights(self):
        with self.dataset.output_keys_as([self.key]):
            class_ids = [item[self.key] for item in self.dataset]
            class_counter = Counter(class_ids)
        weights = 1 / paddle.to_tensor(
            [class_counter[class_id] for class_id in class_ids])
        return weights


================================================
FILE: paddlespeech/s2t/io/speechbrain/sb_pipeline.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# 
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/recipes/AISHELL-1/ASR/CTC/train_with_wav2vec.py)
import data_pipeline
import dataio
import numpy
import paddle
import tqdm
import transformers
from dataloader import make_dataloader
from hyperpyyaml import load_hyperpyyaml

import dataset


def dataio_prepare(hparams):
    """This function prepares the datasets to be used in the brain class.
    It also defines the data processing pipeline through user-defined functions."""
    data_folder = hparams["data_folder"]

    train_data = dataset.DynamicItemDataset.from_csv(
        csv_path=hparams["train_data"],
        replacements={"data_root": data_folder}, )

    if hparams["sorting"] == "ascending":
        # we sort training data to speed up training and get better results.
        train_data = train_data.filtered_sorted(sort_key="duration")
        # when sorting do not shuffle in dataloader ! otherwise is pointless
        hparams["train_dataloader_opts"]["shuffle"] = False

    elif hparams["sorting"] == "descending":
        train_data = train_data.filtered_sorted(
            sort_key="duration", reverse=True)
        # when sorting do not shuffle in dataloader ! otherwise is pointless
        hparams["train_dataloader_opts"]["shuffle"] = False

    elif hparams["sorting"] == "random":
        pass

    else:
        raise NotImplementedError(
            "sorting must be random, ascending or descending")

    valid_data = dataset.DynamicItemDataset.from_csv(
        csv_path=hparams["valid_data"],
        replacements={"data_root": data_folder}, )
    valid_data = valid_data.filtered_sorted(sort_key="duration")

    test_data = dataset.DynamicItemDataset.from_csv(
        csv_path=hparams["test_data"],
        replacements={"data_root": data_folder}, )
    test_data = test_data.filtered_sorted(sort_key="duration")

    datasets = [train_data, valid_data, test_data]

    # Defining tokenizer and loading it
    tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-chinese')

    # 2. Define audio pipeline:
    @data_pipeline.takes("wav")
    @data_pipeline.provides("sig")
    def audio_pipeline(wav):
        sig = dataio.read_audio(wav)
        return sig

    dataset.add_dynamic_item(datasets, audio_pipeline)

    # 3. Define text pipeline:
    @data_pipeline.takes("transcript")
    @data_pipeline.provides("wrd", "tokens_list", "tokens")
    def text_pipeline(wrd):
        wrd = "".join(wrd.split(" "))
        yield wrd
        tokens_list = tokenizer(wrd)["input_ids"]
        yield tokens_list
        tokens = numpy.array(tokens_list, dtype="int64")
        yield tokens

    dataset.add_dynamic_item(datasets, text_pipeline)

    # 4. Set output:
    dataset.set_output_keys(
        datasets,
        ["id", "sig", "wrd", "tokens"], )

    # 5. If Dynamic Batching is used, we instantiate the needed samplers.
    train_batch_sampler = None
    valid_batch_sampler = None
    if hparams["dynamic_batching"]:
        from sampler import DynamicBatchSampler  # noqa

        dynamic_hparams = hparams["dynamic_batch_sampler"]
        num_buckets = dynamic_hparams["num_buckets"]

        train_batch_sampler = DynamicBatchSampler(
            train_data,
            dynamic_hparams["max_batch_len"],
            num_buckets=num_buckets,
            length_func=lambda x: x["duration"],
            shuffle=dynamic_hparams["shuffle_ex"],
            batch_ordering=dynamic_hparams["batch_ordering"], )

        valid_batch_sampler = DynamicBatchSampler(
            valid_data,
            dynamic_hparams["max_batch_len"],
            num_buckets=num_buckets,
            length_func=lambda x: x["duration"],
            shuffle=dynamic_hparams["shuffle_ex"],
            batch_ordering=dynamic_hparams["batch_ordering"], )

    return (train_data, valid_data, test_data, tokenizer, train_batch_sampler,
            valid_batch_sampler, )


hparams_file = 'train_with_wav2vec.yaml'
with open(hparams_file) as fin:
    hparams = load_hyperpyyaml(fin, None)

(train_data, valid_data, test_data, tokenizer, train_bsampler,
 valid_bsampler, ) = dataio_prepare(hparams)

train_dataloader_opts = hparams["train_dataloader_opts"]
valid_dataloader_opts = hparams["valid_dataloader_opts"]

if train_bsampler is not None:
    train_dataloader_opts = {
        "batch_sampler": train_bsampler,
        "num_workers": hparams["num_workers"],
    }

if valid_bsampler is not None:
    valid_dataloader_opts = {"batch_sampler": valid_bsampler}

train_set = make_dataloader(train_data, stage='train', **train_dataloader_opts)

valid_set = make_dataloader(
    valid_data,
    stage='train',
    **valid_dataloader_opts, )

for batch in valid_set:
    print(batch)
print('done')  # exit()


================================================
FILE: paddlespeech/s2t/io/utility.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from io import BytesIO
from typing import List

import numpy as np

from paddlespeech.s2t.utils.log import Log

__all__ = ["pad_list", "pad_sequence", "feat_type"]

logger = Log(__name__).getlog()


def pad_list(sequences: List[np.ndarray],
             padding_value: float=0.0) -> np.ndarray:
    return pad_sequence(sequences, True, padding_value)


def pad_sequence(sequences: List[np.ndarray],
                 batch_first: bool=True,
                 padding_value: float=0.0) -> np.ndarray:
    r"""Pad a list of variable length Tensors with ``padding_value``

    ``pad_sequence`` stacks a list of Tensors along a new dimension,
    and pads them to equal length. For example, if the input is list of
    sequences with size ``L x *`` and if batch_first is False, and ``T x B x *``
    otherwise.

    `B` is batch size. It is equal to the number of elements in ``sequences``.
    `T` is length of the longest sequence.
    `L` is length of the sequence.
    `*` is any number of trailing dimensions, including none.

    Example:
        >>> a = np.ones([25, 300])
        >>> b = np.ones([22, 300])
        >>> c = np.ones([15, 300])
        >>> pad_sequence([a, b, c]).shape
        [25, 3, 300]

    Note:
        This function returns a np.ndarray of size ``T x B x *`` or ``B x T x *``
        where `T` is the length of the longest sequence. This function assumes
        trailing dimensions and type of all the Tensors in sequences are same.

    Args:
        sequences (list[np.ndarray]): list of variable length sequences.
        batch_first (bool, optional): output will be in ``B x T x *`` if True, or in
            ``T x B x *`` otherwise
        padding_value (float, optional): value for padded elements. Default: 0.

    Returns:
        np.ndarray of size ``T x B x *`` if :attr:`batch_first` is ``False``.
        np.ndarray of size ``B x T x *`` otherwise
    """

    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
    max_size = sequences[0].shape
    trailing_dims = max_size[1:]
    max_len = max([s.shape[0] for s in sequences])
    if batch_first:
        out_dims = (len(sequences), max_len) + trailing_dims
    else:
        out_dims = (max_len, len(sequences)) + trailing_dims

    out_tensor = np.full(out_dims, padding_value, dtype=sequences[0].dtype)
    for i, tensor in enumerate(sequences):
        length = tensor.shape[0]
        # use index notation to prevent duplicate references to the tensor
        if batch_first:
            out_tensor[i, :length, ...] = tensor
        else:
            out_tensor[:length, i, ...] = tensor

    return out_tensor


def feat_type(filepath):
    # deal with Byteio type for paddlespeech server
    if isinstance(filepath, BytesIO):
        return 'sound'

    suffix = filepath.split(":")[0].split('.')[-1].lower()
    if suffix == 'ark':
        return 'mat'
    elif suffix == 'scp':
        return 'scp'
    elif suffix == 'npy':
        return 'npy'
    elif suffix == 'npz':
        return 'npz'
    elif suffix in ['wav', 'flac']:
        # PCM16
        return 'sound'
    else:
        raise ValueError(f"Not support filetype: {suffix}")


================================================
FILE: paddlespeech/s2t/models/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/models/asr_interface.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""ASR Interface module."""
import argparse

from paddlespeech.s2t.utils.dynamic_import import dynamic_import


class ASRInterface:
    """ASR Interface model implementation."""

    @staticmethod
    def add_arguments(parser):
        """Add arguments to parser."""
        return parser

    @classmethod
    def build(cls, idim: int, odim: int, **kwargs):
        """Initialize this class with python-level args.

        Args:
            idim (int): The number of an input feature dim.
            odim (int): The number of output vocab.

        Returns:
            ASRinterface: A new instance of ASRInterface.

        """
        args = argparse.Namespace(**kwargs)
        return cls(idim, odim, args)

    def forward(self, xs, ilens, ys, olens):
        """Compute loss for training.

        :param xs: batch of padded source sequences paddle.Tensor (B, Tmax, idim)
        :param ilens: batch of lengths of source sequences (B), paddle.Tensor
        :param ys: batch of padded target sequences paddle.Tensor (B, Lmax)
        :param olens: batch of lengths of target sequences (B), paddle.Tensor
        :return: loss value
        :rtype: paddle.Tensor
        """
        raise NotImplementedError("forward method is not implemented")

    def recognize(self, x, recog_args, char_list=None, rnnlm=None):
        """Recognize x for evaluation.

        :param ndarray x: input acouctic feature (B, T, D) or (T, D)
        :param namespace recog_args: argment namespace contraining options
        :param list char_list: list of characters
        :param paddle.nn.Layer rnnlm: language model module
        :return: N-best decoding results
        :rtype: list
        """
        raise NotImplementedError("recognize method is not implemented")

    def recognize_batch(self, x, recog_args, char_list=None, rnnlm=None):
        """Beam search implementation for batch.

        :param paddle.Tensor x: encoder hidden state sequences (B, Tmax, Henc)
        :param namespace recog_args: argument namespace containing options
        :param list char_list: list of characters
        :param paddle.nn.Module rnnlm: language model module
        :return: N-best decoding results
        :rtype: list
        """
        raise NotImplementedError("Batch decoding is not supported yet.")

    def calculate_all_attentions(self, xs, ilens, ys):
        """Calculate attention.

        :param list xs: list of padded input sequences [(T1, idim), (T2, idim), ...]
        :param ndarray ilens: batch of lengths of input sequences (B)
        :param list ys: list of character id sequence tensor [(L1), (L2), (L3), ...]
        :return: attention weights (B, Lmax, Tmax)
        :rtype: float ndarray
        """
        raise NotImplementedError(
            "calculate_all_attentions method is not implemented")

    def calculate_all_ctc_probs(self, xs, ilens, ys):
        """Calculate CTC probability.

        :param list xs_pad: list of padded input sequences [(T1, idim), (T2, idim), ...]
        :param ndarray ilens: batch of lengths of input sequences (B)
        :param list ys: list of character id sequence tensor [(L1), (L2), (L3), ...]
        :return: CTC probabilities (B, Tmax, vocab)
        :rtype: float ndarray
        """
        raise NotImplementedError(
            "calculate_all_ctc_probs method is not implemented")

    @property
    def attention_plot_class(self):
        """Get attention plot class."""
        from paddlespeech.s2t.training.extensions.plot import PlotAttentionReport

        return PlotAttentionReport

    @property
    def ctc_plot_class(self):
        """Get CTC plot class."""
        from paddlespeech.s2t.training.extensions.plot import PlotCTCReport

        return PlotCTCReport

    def get_total_subsampling_factor(self):
        """Get total subsampling factor."""
        raise NotImplementedError(
            "get_total_subsampling_factor method is not implemented")

    def encode(self, feat):
        """Encode feature in `beam_search` (optional).

        Args:
            x (numpy.ndarray): input feature (T, D)
        Returns:
            paddle.Tensor: encoded feature (T, D)
        """
        raise NotImplementedError("encode method is not implemented")

    def scorers(self):
        """Get scorers for `beam_search` (optional).

        Returns:
            dict[str, ScorerInterface]: dict of `ScorerInterface` objects

        """
        raise NotImplementedError("decoders method is not implemented")


predefined_asr = {
    "transformer": "paddlespeech.s2t.models.u2:U2Model",
    "conformer": "paddlespeech.s2t.models.u2:U2Model",
}


def dynamic_import_asr(module):
    """Import ASR models dynamically.

    Args:
        module (str): asr name. e.g., transformer, conformer

    Returns:
        type: ASR class

    """
    model_class = dynamic_import(module, predefined_asr)
    assert issubclass(model_class,
                      ASRInterface), f"{module} does not implement ASRInterface"
    return model_class


================================================
FILE: paddlespeech/s2t/models/ds2/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

from .deepspeech2 import DeepSpeech2InferModel
from .deepspeech2 import DeepSpeech2Model
from paddlespeech.s2t.utils import dynamic_pip_install

try:
    import paddlespeech_ctcdecoders
except ImportError:
    try:
        package_name = 'paddlespeech_ctcdecoders'
        if sys.platform != "win32":
            dynamic_pip_install.install(package_name)
    except Exception:
        raise RuntimeError(
            "Can not install package paddlespeech_ctcdecoders on your system. \
                The DeepSpeech2 model is not supported for your system")

__all__ = ['DeepSpeech2Model', 'DeepSpeech2InferModel']


================================================
FILE: paddlespeech/s2t/models/ds2/conv.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle

from paddlespeech.s2t.modules.subsampling import Conv2dSubsampling4


class Conv2dSubsampling4Pure(Conv2dSubsampling4):
    def __init__(self, idim: int, odim: int, dropout_rate: float):
        super().__init__(idim, odim, dropout_rate, None)
        self.output_dim = ((idim - 1) // 2 - 1) // 2 * odim
        self.receptive_field_length = 2 * (
            3 - 1) + 3  # stride_1 * (kernel_size_2 - 1) + kerel_size_1

    def forward(self, x: paddle.Tensor,
                x_len: paddle.Tensor) -> [paddle.Tensor, paddle.Tensor]:
        x = x.unsqueeze(1)  # (b, c=1, t, f)
        x = self.conv(x)
        #b, c, t, f = paddle.shape(x) #not work under jit
        x = x.transpose([0, 2, 1, 3]).reshape([0, 0, -1])
        x_len = ((x_len - 1) // 2 - 1) // 2
        return x, x_len


================================================
FILE: paddlespeech/s2t/models/ds2/deepspeech2.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Deepspeech2 ASR Model"""
import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.s2t.models.ds2.conv import Conv2dSubsampling4Pure
from paddlespeech.s2t.modules.ctc import CTCDecoder
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils.checkpoint import Checkpoint
from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()

__all__ = ['DeepSpeech2Model', 'DeepSpeech2InferModel']


class CRNNEncoder(nn.Layer):
    def __init__(self,
                 feat_size,
                 dict_size,
                 num_conv_layers=2,
                 num_rnn_layers=4,
                 rnn_size=1024,
                 rnn_direction='forward',
                 num_fc_layers=2,
                 fc_layers_size_list=[512, 256],
                 use_gru=False):
        super().__init__()
        self.rnn_size = rnn_size
        self.feat_size = feat_size  # 161 for linear
        self.dict_size = dict_size
        self.num_rnn_layers = num_rnn_layers
        self.num_fc_layers = num_fc_layers
        self.rnn_direction = rnn_direction
        self.fc_layers_size_list = fc_layers_size_list
        self.use_gru = use_gru
        self.conv = Conv2dSubsampling4Pure(feat_size, 32, dropout_rate=0.0)

        self.output_dim = self.conv.output_dim

        i_size = self.conv.output_dim
        self.rnn = nn.LayerList()
        self.layernorm_list = nn.LayerList()
        self.fc_layers_list = nn.LayerList()
        if rnn_direction == 'bidirect' or rnn_direction == 'bidirectional':
            layernorm_size = 2 * rnn_size
        elif rnn_direction == 'forward':
            layernorm_size = rnn_size
        else:
            raise Exception("Wrong rnn direction")
        for i in range(0, num_rnn_layers):
            if i == 0:
                rnn_input_size = i_size
            else:
                rnn_input_size = layernorm_size
            if use_gru is True:
                self.rnn.append(
                    nn.GRU(
                        input_size=rnn_input_size,
                        hidden_size=rnn_size,
                        num_layers=1,
                        direction=rnn_direction))
            else:
                self.rnn.append(
                    nn.LSTM(
                        input_size=rnn_input_size,
                        hidden_size=rnn_size,
                        num_layers=1,
                        direction=rnn_direction))
            self.layernorm_list.append(nn.LayerNorm(layernorm_size))
            self.output_dim = layernorm_size

        fc_input_size = layernorm_size
        for i in range(self.num_fc_layers):
            self.fc_layers_list.append(
                nn.Linear(fc_input_size, fc_layers_size_list[i]))
            fc_input_size = fc_layers_size_list[i]
            self.output_dim = fc_layers_size_list[i]

    @property
    def output_size(self):
        return self.output_dim

    def forward(self, x, x_lens, init_state_h_box=None, init_state_c_box=None):
        """Compute Encoder outputs

        Args:
            x (Tensor): [B, T, D]
            x_lens (Tensor): [B]
            init_state_h_box(Tensor): init_states h for RNN layers: [num_rnn_layers * num_directions, batch_size, hidden_size]
            init_state_c_box(Tensor): init_states c for RNN layers: [num_rnn_layers * num_directions, batch_size, hidden_size]
        Return:
            x (Tensor): encoder outputs, [B, T, D]
            x_lens (Tensor): encoder length, [B]
            final_state_h_box(Tensor): final_states h for RNN layers: [num_rnn_layers * num_directions, batch_size, hidden_size]
            final_state_c_box(Tensor): final_states c for RNN layers: [num_rnn_layers * num_directions, batch_size, hidden_size]
        """
        if init_state_h_box is not None:
            init_state_list = None

            if self.use_gru is True:
                init_state_h_list = paddle.split(
                    init_state_h_box, self.num_rnn_layers, axis=0)
                init_state_list = init_state_h_list
            else:
                init_state_h_list = paddle.split(
                    init_state_h_box, self.num_rnn_layers, axis=0)
                init_state_c_list = paddle.split(
                    init_state_c_box, self.num_rnn_layers, axis=0)
                init_state_list = [(init_state_h_list[i], init_state_c_list[i])
                                   for i in range(self.num_rnn_layers)]
        else:
            init_state_list = [None] * self.num_rnn_layers

        x, x_lens = self.conv(x, x_lens)
        final_chunk_state_list = []
        for i in range(0, self.num_rnn_layers):
            x, final_state = self.rnn[i](x, init_state_list[i],
                                         x_lens)  #[B, T, D]
            final_chunk_state_list.append(final_state)
            x = self.layernorm_list[i](x)

        for i in range(self.num_fc_layers):
            x = self.fc_layers_list[i](x)
            x = F.relu(x)

        if self.use_gru is True:
            final_chunk_state_h_box = paddle.concat(
                final_chunk_state_list, axis=0)
            final_chunk_state_c_box = init_state_c_box
        else:
            final_chunk_state_h_list = [
                final_chunk_state_list[i][0] for i in range(self.num_rnn_layers)
            ]
            final_chunk_state_c_list = [
                final_chunk_state_list[i][1] for i in range(self.num_rnn_layers)
            ]
            final_chunk_state_h_box = paddle.concat(
                final_chunk_state_h_list, axis=0)
            final_chunk_state_c_box = paddle.concat(
                final_chunk_state_c_list, axis=0)

        return x, x_lens, final_chunk_state_h_box, final_chunk_state_c_box

    def forward_chunk_by_chunk(self, x, x_lens, decoder_chunk_size=8):
        """Compute Encoder outputs

        Args:
            x (Tensor): [B, T, D]
            x_lens (Tensor): [B]
            decoder_chunk_size: The chunk size of decoder
        Returns:
            eouts_list (List of Tensor): The list of encoder outputs in chunk_size: [B, chunk_size, D] * num_chunks
            eouts_lens_list (List of Tensor): The list of  encoder length in chunk_size: [B] * num_chunks
            final_state_h_box(Tensor): final_states h for RNN layers: [num_rnn_layers * num_directions, batch_size, hidden_size]
            final_state_c_box(Tensor): final_states c for RNN layers: [num_rnn_layers * num_directions, batch_size, hidden_size]
        """
        subsampling_rate = self.conv.subsampling_rate
        receptive_field_length = self.conv.receptive_field_length
        chunk_size = (decoder_chunk_size - 1
                      ) * subsampling_rate + receptive_field_length
        chunk_stride = subsampling_rate * decoder_chunk_size
        max_len = x.shape[1]
        assert (chunk_size <= max_len)

        eouts_chunk_list = []
        eouts_chunk_lens_list = []
        if (max_len - chunk_size) % chunk_stride != 0:
            padding_len = chunk_stride - (max_len - chunk_size) % chunk_stride
        else:
            padding_len = 0
        padding = paddle.zeros((x.shape[0], padding_len, x.shape[2]))
        padded_x = paddle.concat([x, padding], axis=1)
        num_chunk = (max_len + padding_len - chunk_size) / chunk_stride + 1
        num_chunk = int(num_chunk)
        chunk_state_h_box = None
        chunk_state_c_box = None
        final_state_h_box = None
        final_state_c_box = None
        for i in range(0, num_chunk):
            start = i * chunk_stride
            end = start + chunk_size
            x_chunk = padded_x[:, start:end, :]

            x_len_left = paddle.where(x_lens - i * chunk_stride < 0,
                                      paddle.zeros_like(x_lens),
                                      x_lens - i * chunk_stride)
            x_chunk_len_tmp = paddle.ones_like(x_lens) * chunk_size
            x_chunk_lens = paddle.where(x_len_left < x_chunk_len_tmp,
                                        x_len_left, x_chunk_len_tmp)

            eouts_chunk, eouts_chunk_lens, chunk_state_h_box, chunk_state_c_box = self.forward(
                x_chunk, x_chunk_lens, chunk_state_h_box, chunk_state_c_box)

            eouts_chunk_list.append(eouts_chunk)
            eouts_chunk_lens_list.append(eouts_chunk_lens)
        final_state_h_box = chunk_state_h_box
        final_state_c_box = chunk_state_c_box
        return eouts_chunk_list, eouts_chunk_lens_list, final_state_h_box, final_state_c_box


class DeepSpeech2Model(nn.Layer):
    """The DeepSpeech2 network structure.

    :param audio: Audio spectrogram data layer.
    :type audio: Variable
    :param text: Transcription text data layer.
    :type text: Variable
    :param audio_len: Valid sequence length data layer.
    :type audio_len: Variable
    :param feat_size: feature size for audio.
    :type feat_size: int
    :param dict_size: Dictionary size for tokenized transcription.
    :type dict_size: int
    :param num_conv_layers: Number of stacking convolution layers.
    :type num_conv_layers: int
    :param num_rnn_layers: Number of stacking RNN layers.
    :type num_rnn_layers: int
    :param rnn_size: RNN layer size (dimension of RNN cells).
    :type rnn_size: int
    :param num_fc_layers: Number of stacking FC layers.
    :type num_fc_layers: int
    :param fc_layers_size_list: The list of FC layer sizes.
    :type fc_layers_size_list: [int,]
    :param use_gru: Use gru if set True. Use simple rnn if set False.
    :type use_gru: bool
    :return: A tuple of an output unnormalized log probability layer (
             before softmax) and a ctc cost layer.
    :rtype: tuple of LayerOutput
    """

    def __init__(
            self,
            feat_size,
            dict_size,
            num_conv_layers=2,
            num_rnn_layers=4,
            rnn_size=1024,
            rnn_direction='forward',
            num_fc_layers=2,
            fc_layers_size_list=[512, 256],
            use_gru=False,
            blank_id=0,
            ctc_grad_norm_type=None, ):
        super().__init__()
        self.encoder = CRNNEncoder(
            feat_size=feat_size,
            dict_size=dict_size,
            num_conv_layers=num_conv_layers,
            num_rnn_layers=num_rnn_layers,
            rnn_direction=rnn_direction,
            num_fc_layers=num_fc_layers,
            fc_layers_size_list=fc_layers_size_list,
            rnn_size=rnn_size,
            use_gru=use_gru)

        self.decoder = CTCDecoder(
            odim=dict_size,  # <blank> is in  vocab
            enc_n_units=self.encoder.output_size,
            blank_id=blank_id,
            dropout_rate=0.0,
            reduction=True,  # sum
            batch_average=True,  # sum / batch_size
            grad_norm_type=ctc_grad_norm_type)

    def forward(self, audio, audio_len, text, text_len):
        """Compute Model loss

        Args:
            audio (Tensor): [B, T, D]
            audio_len (Tensor): [B]
            text (Tensor): [B, U]
            text_len (Tensor): [B]

        Returns:
            loss (Tensor): [1]
        """
        eouts, eouts_len, final_state_h_box, final_state_c_box = self.encoder(
            audio, audio_len, None, None)
        loss = self.decoder(eouts, eouts_len, text, text_len)
        return loss

    @paddle.no_grad()
    def decode(self, audio, audio_len):
        # decoders only accept string encoded in utf-8
        # Make sure the decoder has been initialized
        eouts, eouts_len, final_state_h_box, final_state_c_box = self.encoder(
            audio, audio_len, None, None)
        probs = self.decoder.softmax(eouts)
        batch_size = probs.shape[0]
        self.decoder.reset_decoder(batch_size=batch_size)
        self.decoder.next(probs, eouts_len)
        trans_best, trans_beam = self.decoder.decode()
        return trans_best

    @classmethod
    def from_pretrained(cls, dataloader, config, checkpoint_path):
        """Build a DeepSpeech2Model model from a pretrained model.
        Parameters
        ----------
        dataloader: paddle.io.DataLoader

        config: yacs.config.CfgNode
            model configs

        checkpoint_path: Path or str
            the path of pretrained model checkpoint, without extension name

        Returns
        -------
        DeepSpeech2Model
            The model built from pretrained result.
        """
        model = cls(
            feat_size=dataloader.feat_dim,
            dict_size=dataloader.vocab_size,
            num_conv_layers=config.num_conv_layers,
            num_rnn_layers=config.num_rnn_layers,
            rnn_size=config.rnn_layer_size,
            rnn_direction=config.rnn_direction,
            num_fc_layers=config.num_fc_layers,
            fc_layers_size_list=config.fc_layers_size_list,
            use_gru=config.use_gru,
            blank_id=config.blank_id,
            ctc_grad_norm_type=config.get('ctc_grad_norm_type', None), )
        infos = Checkpoint().load_parameters(
            model, checkpoint_path=checkpoint_path)
        logger.info(f"checkpoint info: {infos}")
        layer_tools.summary(model)
        return model

    @classmethod
    def from_config(cls, config):
        """Build a DeepSpeec2Model from config
        Parameters

        config: yacs.config.CfgNode
            config
        Returns
        -------
        DeepSpeech2Model
            The model built from config.
        """
        model = cls(
            feat_size=config.input_dim,
            dict_size=config.output_dim,
            num_conv_layers=config.num_conv_layers,
            num_rnn_layers=config.num_rnn_layers,
            rnn_size=config.rnn_layer_size,
            rnn_direction=config.rnn_direction,
            num_fc_layers=config.num_fc_layers,
            fc_layers_size_list=config.fc_layers_size_list,
            use_gru=config.use_gru,
            blank_id=config.blank_id,
            ctc_grad_norm_type=config.get('ctc_grad_norm_type', None), )
        return model


class DeepSpeech2InferModel(DeepSpeech2Model):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def forward(self,
                audio_chunk,
                audio_chunk_lens,
                chunk_state_h_box=None,
                chunk_state_c_box=None):
        if self.encoder.rnn_direction == "forward":
            eouts_chunk, eouts_chunk_lens, final_state_h_box, final_state_c_box = self.encoder(
                audio_chunk, audio_chunk_lens, chunk_state_h_box,
                chunk_state_c_box)
            probs_chunk = self.decoder.softmax(eouts_chunk)
            return probs_chunk, eouts_chunk_lens, final_state_h_box, final_state_c_box
        elif self.encoder.rnn_direction == "bidirect":
            eouts, eouts_len, _, _ = self.encoder(audio_chunk, audio_chunk_lens)
            probs = self.decoder.softmax(eouts)
            return probs, eouts_len
        else:
            raise Exception("wrong model type")

    def export(self):
        if self.encoder.rnn_direction == "forward":
            static_model = paddle.jit.to_static(
                self,
                input_spec=[
                    paddle.static.InputSpec(
                        shape=[None, None, self.encoder.feat_size
                               ],  #[B, chunk_size, feat_dim]
                        dtype='float32', ),
                    paddle.static.InputSpec(shape=[None],
                                            dtype='int64'),  # audio_length, [B]
                    paddle.static.InputSpec(
                        shape=[None, None, None], dtype='float32'),
                    paddle.static.InputSpec(
                        shape=[None, None, None], dtype='float32')
                ],
                full_graph=True)
        elif self.encoder.rnn_direction == "bidirect":
            static_model = paddle.jit.to_static(
                self,
                input_spec=[
                    paddle.static.InputSpec(
                        shape=[None, None, self.encoder.feat_size],
                        dtype='float32'),  # audio, [B,T,D]
                    paddle.static.InputSpec(shape=[None],
                                            dtype='int64'),  # audio_length, [B]
                ],
                full_graph=True)
        else:
            raise Exception("wrong model type")
        return static_model


================================================
FILE: paddlespeech/s2t/models/hubert/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .hubert_ASR import HubertASR
from .hubert_ASR import HubertBase

__all__ = ["HubertASR", "HubertBase"]


================================================
FILE: paddlespeech/s2t/models/hubert/hubert_ASR.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""HubertASR model."""
from collections import defaultdict
from copy import deepcopy
from dataclasses import dataclass
from dataclasses import is_dataclass
from typing import Dict
from typing import List
from typing import Tuple

import paddle
import paddle.nn as nn
import paddle.nn.functional as F

from paddlespeech.s2t.models.hubert.modules.hubert_model import HubertConfig
from paddlespeech.s2t.models.hubert.modules.hubert_model import HubertModel
from paddlespeech.s2t.models.hubert.modules.hubert_model import HubertPretrainingConfig
from paddlespeech.s2t.models.wav2vec2.modules.VanillaNN import VanillaNN
from paddlespeech.s2t.models.wav2vec2.processing.speech_augmentation import SpecAugment
from paddlespeech.s2t.modules.ctc import CTCDecoderBase as CTC
from paddlespeech.s2t.modules.initializer import DefaultInitializerContext
from paddlespeech.s2t.utils.ctc_utils import remove_duplicates_and_blank
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import log_add

logger = Log(__name__).getlog()


class HubertASR(nn.Layer):
    def __init__(self, config: dict):
        super().__init__()
        init_type = config.get("init_type", None)
        with DefaultInitializerContext(init_type):
            self.config = config
            task_cfg = self.merge_with_parent(HubertPretrainingConfig,
                                              dict(self.config.task_cfg))
            model_cfg = self.merge_with_parent(HubertConfig,
                                               dict(self.config.model_cfg))
            hubert = HubertModel(model_cfg, task_cfg, [None])

            self.normalize_wav = config.normalize_wav
            self.output_norm = config.output_norm
            if hasattr(config, 'spec_augment'):
                self.spec_augment = SpecAugment(**config.spec_augment)

            if config.freeze_hubert:
                hubert.eval()
                for parm in hubert.parameters():
                    parm.trainable = False
            self.hubert = hubert
            self.enc = VanillaNN(**config.enc)
            self.ctc = CTC(**config.ctc,
                           odim=config.output_dim,
                           batch_average=False,
                           reduction='mean')

    def merge_with_parent(self, dc: dataclass, cfg: dict):
        assert is_dataclass(dc)
        assert type(cfg) == dict
        cfg = deepcopy(cfg)

        def fix_cfg(cfg):
            target_keys = set(dc.__dataclass_fields__.keys())
            for k in list(cfg.keys()):
                if k not in target_keys:
                    del cfg[k]

        fix_cfg(cfg)
        assert len(cfg) > 0
        return dc(**cfg)

    def forward(self, wav, wavs_lens_rate, target, target_lens):

        if self.normalize_wav:
            wav = F.layer_norm(wav, wav.shape[1:])

        # Extract wav2vec output
        out = self.hubert.extract_features(wav)[0]
        # We normalize the output if required
        if self.output_norm:
            out = F.layer_norm(out, out.shape[1:])

        if self.training and hasattr(self.config, 'spec_augment'):
            feats = self.spec_augment(out)
        else:
            feats = out

        x = self.enc(feats)

        x_lens = (wavs_lens_rate * x.shape[1]).round().astype(paddle.int64)

        ctc_loss = self.ctc(x, x_lens, target, target_lens)

        return ctc_loss

    @paddle.no_grad()
    def decode(self,
               feats: paddle.Tensor,
               text_feature: Dict[str, int],
               decoding_method: str,
               beam_size: int,
               tokenizer: str=None,
               sb_pipeline=False):
        batch_size = feats.shape[0]

        if decoding_method == 'ctc_prefix_beam_search' and batch_size > 1:
            logger.error(
                f"decoding mode {decoding_method} must be running with batch_size == 1"
            )
            logger.error(f"current batch_size is {batch_size}")

        if decoding_method == 'ctc_greedy_search':
            if tokenizer is None and sb_pipeline is False:
                hyps = self.ctc_greedy_search(feats)
                res = [text_feature.defeaturize(hyp) for hyp in hyps]
                res_tokenids = [hyp for hyp in hyps]
            else:
                if sb_pipeline is True:
                    hyps = self.ctc_greedy_search(feats.unsqueeze(-1))
                else:
                    hyps = self.ctc_greedy_search(feats)
                res = []
                res_tokenids = []
                for sequence in hyps:
                    # Decode token terms to words 
                    predicted_tokens = text_feature.convert_ids_to_tokens(
                        sequence)
                tmp_res = []
                tmp_res_tokenids = []
                for c in predicted_tokens:
                    if c == "[CLS]":
                        continue
                    elif c == "[SEP]" or c == "[PAD]":
                        break
                    else:
                        tmp_res.append(c)
                        tmp_res_tokenids.append(text_feature.vocab[c])
                res.append(''.join(tmp_res))
                res_tokenids.append(tmp_res_tokenids)

        # ctc_prefix_beam_search and attention_rescoring only return one
        # result in List[int], change it to List[List[int]] for compatible
        # with other batch decoding mode
        elif decoding_method == 'ctc_prefix_beam_search':
            assert feats.shape[0] == 1
            if tokenizer is None and sb_pipeline is False:
                hyp = self.ctc_prefix_beam_search(feats, beam_size)
                res = [text_feature.defeaturize(hyp)]
                res_tokenids = [hyp]
            else:
                if sb_pipeline is True:
                    hyp = self.ctc_prefix_beam_search(
                        feats.unsqueeze(-1), beam_size)
                else:
                    hyp = self.ctc_prefix_beam_search(feats, beam_size)
                res = []
                res_tokenids = []
                predicted_tokens = text_feature.convert_ids_to_tokens(hyp)
                tmp_res = []
                tmp_res_tokenids = []
                for c in predicted_tokens:
                    if c == "[CLS]":
                        continue
                    elif c == "[SEP]" or c == "[PAD]":
                        break
                    else:
                        tmp_res.append(c)
                        tmp_res_tokenids.append(text_feature.vocab[c])
                res.append(''.join(tmp_res))
                res_tokenids.append(tmp_res_tokenids)
        else:
            raise ValueError(
                f"wav2vec2 not support decoding method: {decoding_method}")

        return res, res_tokenids

    @classmethod
    def from_config(cls, config):
        model = cls(config)
        return model

    def ctc_greedy_search(self, wav) -> List[List[int]]:
        """ Apply CTC greedy search
        Args:
            speech (paddle.Tensor): (batch, max_len)
            speech_length (paddle.Tensor): (batch, )
        Returns:
            List[List[int]]: best path result
        """
        batch_size = wav.shape[0]
        wav = wav[:, :, 0]
        if self.normalize_wav:
            wav = F.layer_norm(wav, wav.shape[1:])
        # Extract wav2vec output
        out = self.hubert.extract_features(wav)[0]
        # We normalize the output if required
        if self.output_norm:
            out = F.layer_norm(out, out.shape[1:])
        feats = out
        x = self.enc(feats)
        x_lens = x.shape[1]
        ctc_probs = self.ctc.log_softmax(x)  # (B, maxlen, vocab_size)
        topk_prob, topk_index = ctc_probs.topk(1, axis=2)  # (B, maxlen, 1)
        topk_index = topk_index.reshape([batch_size, x_lens])  # (B, maxlen)

        hyps = [hyp.tolist() for hyp in topk_index]
        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
        return hyps

    def _ctc_prefix_beam_search(
            self,
            wav,
            beam_size,
            blank_id: int=0, ) -> Tuple[List[Tuple[int, float]], paddle.Tensor]:
        """ CTC prefix beam search inner implementation
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            List[Tuple[int, float]]: nbest results, (N,1), (text, likelihood)
            paddle.Tensor: encoder output, (1, max_len, encoder_dim),
                it will be used for rescoring in attention rescoring mode
        """
        wav = wav[:, :, 0]

        if self.normalize_wav:
            wav = F.layer_norm(wav, wav.shape[1:])
        # Extract wav2vec output
        out = self.hubert.extract_features(wav)[0]
        # We normalize the output if required
        if self.output_norm:
            out = F.layer_norm(out, out.shape[1:])
        feats = out

        x = self.enc(feats)
        maxlen = x.shape[1]
        ctc_probs = self.ctc.log_softmax(x)  # (1, maxlen, vocab_size)
        ctc_probs = ctc_probs.squeeze(0)

        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
        # blank_ending_score and  none_blank_ending_score in ln domain
        cur_hyps = [(tuple(), (0.0, -float('inf')))]
        # 2. CTC beam search step by step
        for t in range(0, maxlen):
            logp = ctc_probs[t]  # (vocab_size,)
            # key: prefix, value (pb, pnb), default value(-inf, -inf)
            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
            # 2.1 First beam prune: select topk best
            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
            for s in top_k_index:
                s = s.item()
                ps = logp[s].item()
                for prefix, (pb, pnb) in cur_hyps:
                    last = prefix[-1] if len(prefix) > 0 else None
                    if s == blank_id:  # blank
                        n_pb, n_pnb = next_hyps[prefix]
                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
                        next_hyps[prefix] = (n_pb, n_pnb)
                    elif s == last:
                        #  Update *ss -> *s;
                        n_pb, n_pnb = next_hyps[prefix]
                        n_pnb = log_add([n_pnb, pnb + ps])
                        next_hyps[prefix] = (n_pb, n_pnb)
                        # Update *s-s -> *ss, - is for blank
                        n_prefix = prefix + (s, )
                        n_pb, n_pnb = next_hyps[n_prefix]
                        n_pnb = log_add([n_pnb, pb + ps])
                        next_hyps[n_prefix] = (n_pb, n_pnb)
                    else:
                        n_prefix = prefix + (s, )
                        n_pb, n_pnb = next_hyps[n_prefix]
                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
                        next_hyps[n_prefix] = (n_pb, n_pnb)

            # 2.2 Second beam prune
            next_hyps = sorted(
                next_hyps.items(),
                key=lambda x: log_add(list(x[1])),
                reverse=True)
            cur_hyps = next_hyps[:beam_size]

        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
        return hyps

    def ctc_prefix_beam_search(self, wav, beam_size) -> List[int]:
        """ Apply CTC prefix beam search
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            List[int]: CTC prefix beam search nbest results
        """
        hyps = self._ctc_prefix_beam_search(wav, beam_size)
        return hyps[0][0]


class HubertBase(nn.Layer):
    """Hubert model"""

    def __init__(self, config: dict):
        super().__init__()
        self.config = config
        task_cfg = self.merge_with_parent(HubertPretrainingConfig,
                                          dict(self.config.task_cfg))
        model_cfg = self.merge_with_parent(HubertConfig,
                                           dict(self.config.model_cfg))
        hubert = HubertModel(model_cfg, task_cfg, [None])
        self.hubert = hubert

    @classmethod
    def from_config(cls, configs: dict):
        """init model.
        Args:
            configs (dict): config dict.
        Raises:
            ValueError: raise when using not support encoder type.
        Returns:
            nn.Layer: HubertBase
        """
        model = cls(configs)
        return model

    def merge_with_parent(self, dc: dataclass, cfg: dict):
        assert is_dataclass(dc)
        assert type(cfg) == dict
        cfg = deepcopy(cfg)

        def fix_cfg(cfg):
            target_keys = set(dc.__dataclass_fields__.keys())
            for k in list(cfg.keys()):
                if k not in target_keys:
                    del cfg[k]

        fix_cfg(cfg)
        assert len(cfg) > 0
        return dc(**cfg)

    def forward(self, wav):
        out = self.hubert.extract_features(wav)
        return out


================================================
FILE: paddlespeech/s2t/models/hubert/modules/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/models/hubert/modules/hubert_model.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Hubert model."""
from dataclasses import dataclass
from dataclasses import field
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple

import numpy as np
import paddle
import paddle.nn as nn

from paddlespeech.s2t.models.wav2vec2.modules.wav2vec2_model import ChoiceEnum
from paddlespeech.s2t.models.wav2vec2.modules.wav2vec2_model import compute_mask_indices
from paddlespeech.s2t.models.wav2vec2.modules.wav2vec2_model import ConvFeatureExtractionModel
from paddlespeech.s2t.models.wav2vec2.modules.wav2vec2_model import EXTRACTOR_MODE_CHOICES
from paddlespeech.s2t.models.wav2vec2.modules.wav2vec2_model import get_available_activation_fns
from paddlespeech.s2t.models.wav2vec2.modules.wav2vec2_model import GLU
from paddlespeech.s2t.models.wav2vec2.modules.wav2vec2_model import GradMultiply
from paddlespeech.s2t.models.wav2vec2.modules.wav2vec2_model import LAYER_TYPE_CHOICES
from paddlespeech.s2t.models.wav2vec2.modules.wav2vec2_model import MASKING_DISTRIBUTION_CHOICES
from paddlespeech.s2t.models.wav2vec2.modules.wav2vec2_model import TransformerEncoder
from paddlespeech.s2t.modules.align import LayerNorm
from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()


@dataclass
class HubertPretrainingConfig:
    label_rate: float = field(
        default=-1.0,
        metadata={"help": "label frame rate. -1.0 for sequence label"}, )
    sample_rate: int = field(
        default=16_000,
        metadata={
            "help":
            "target sample rate. audio files will be up/down "
            "sampled to this rate"
        }, )
    normalize: bool = field(
        default=False,
        metadata={
            "help": "if set, normalizes input to have 0 mean and unit variance"
        }, )
    enable_padding: bool = field(
        default=False,
        metadata={"help": "pad shorter samples instead of cropping"}, )
    max_keep_size: Optional[int] = field(
        default=None,
        metadata={"help": "exclude sample longer than this"}, )
    max_sample_size: Optional[int] = field(
        default=None,
        metadata={"help": "max sample size to crop to for batching"}, )
    min_sample_size: Optional[int] = field(
        default=None,
        metadata={"help": "min sample size to crop to for batching"}, )
    random_crop: Optional[bool] = field(
        default=True,
        metadata={"help": "always crop from the beginning if false"}, )
    pad_audio: Optional[bool] = field(
        default=False,
        metadata={"help": "pad audio to the longest one in the batch if true"},
    )


@dataclass
class HubertConfig:
    label_rate: float

    extractor_mode: EXTRACTOR_MODE_CHOICES = field(
        default="default",
        metadata={
            "help":
            "mode for feature extractor. default has a single group "
            "norm with d groups in the first conv block, whereas layer_norm "
            "has layer norms in every block (meant to use with normalize=True)"
        }, )
    encoder_layers: int = field(
        default=12, metadata={"help": "num encoder layers in the transformer"})
    encoder_embed_dim: int = field(
        default=768, metadata={"help": "encoder embedding dimension"})
    encoder_ffn_embed_dim: int = field(
        default=3072, metadata={"help": "encoder embedding dimension for FFN"})
    encoder_attention_heads: int = field(
        default=12, metadata={"help": "num encoder attention heads"})
    activation_fn: ChoiceEnum(get_available_activation_fns()) = field(
        default="gelu", metadata={"help": "activation function to use"})
    layer_type: LAYER_TYPE_CHOICES = field(
        default="transformer", metadata={"help": "layer type in encoder"})

    # dropouts
    dropout: float = field(
        default=0.1,
        metadata={"help": "dropout probability for the transformer"}, )
    attention_dropout: float = field(
        default=0.1,
        metadata={"help": "dropout probability for attention weights"}, )
    activation_dropout: float = field(
        default=0.0,
        metadata={"help": "dropout probability after activation in FFN"}, )
    encoder_layerdrop: float = field(
        default=0.0,
        metadata={"help": "probability of dropping a tarnsformer layer"}, )
    dropout_input: float = field(
        default=0.0,
        metadata={"help": "dropout to apply to the input (after feat extr)"}, )
    dropout_features: float = field(
        default=0.0,
        metadata={"help": "dropout to apply to the features (after feat extr)"},
    )

    final_dim: int = field(
        default=0,
        metadata={
            "help":
            "project final representations and targets to this many "
            "dimensions. set to encoder_embed_dim is <= 0"
        }, )
    untie_final_proj: bool = field(
        default=False,
        metadata={"help": "use separate projection for each target"}, )
    layer_norm_first: bool = field(
        default=False,
        metadata={"help": "apply layernorm first in the transformer"}, )
    conv_feature_layers: str = field(
        default="[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2",
        metadata={
            "help":
            "string describing convolutional feature extraction "
            "layers in form of a python list that contains "
            "[(dim, kernel_size, stride), ...]"
        }, )
    conv_bias: bool = field(
        default=False, metadata={"help": "include bias in conv encoder"})
    logit_temp: float = field(
        default=0.1, metadata={"help": "temperature to divide logits by"})
    target_glu: bool = field(
        default=False, metadata={"help": "adds projection + glu to targets"})
    feature_grad_mult: float = field(
        default=1.0,
        metadata={"help": "multiply feature extractor var grads by this"}, )

    # masking
    mask_length: int = field(default=10, metadata={"help": "mask length"})
    mask_prob: float = field(
        default=0.65,
        metadata={"help": "probability of replacing a token with mask"}, )
    mask_selection: MASKING_DISTRIBUTION_CHOICES = field(
        default="static", metadata={"help": "how to choose mask length"})
    mask_other: float = field(
        default=0,
        metadata={
            "help":
            "secondary mask argument "
            "(used for more complex distributions), "
            "see help in compute_mask_indicesh"
        }, )
    no_mask_overlap: bool = field(
        default=False, metadata={"help": "whether to allow masks to overlap"})
    mask_min_space: int = field(
        default=1,
        metadata={"help": "min space between spans (if no overlap is enabled)"},
    )

    # channel masking
    mask_channel_length: int = field(
        default=10,
        metadata={"help": "length of the mask for features (channels)"}, )
    mask_channel_prob: float = field(
        default=0.0,
        metadata={"help": "probability of replacing a feature with 0"}, )
    mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field(
        default="static",
        metadata={"help": "how to choose mask length for channel masking"}, )
    mask_channel_other: float = field(
        default=0,
        metadata={
            "help":
            "secondary mask argument "
            "(used for more complex distributions), "
            "see help in compute_mask_indicesh"
        }, )
    no_mask_channel_overlap: bool = field(
        default=False,
        metadata={"help": "whether to allow channel masks to overlap"}, )
    mask_channel_min_space: int = field(
        default=1,
        metadata={"help": "min space between spans (if no overlap is enabled)"},
    )

    # positional embeddings
    conv_pos: int = field(
        default=128,
        metadata={
            "help": "number of filters for convolutional positional embeddings"
        }, )
    conv_pos_groups: int = field(
        default=16,
        metadata={
            "help": "number of groups for convolutional positional embedding"
        }, )

    latent_temp: Tuple[float, float, float] = field(
        default=(2, 0.5, 0.999995),
        metadata={"help": "legacy (to be removed)"}, )

    # loss computation
    skip_masked: bool = field(
        default=False,
        metadata={"help": "skip computing losses over masked frames"}, )
    skip_nomask: bool = field(
        default=False,
        metadata={"help": "skip computing losses over unmasked frames"}, )

    checkpoint_activations: bool = field(
        default=False,
        metadata={
            "help": "recompute activations and save memory for extra compute"
        }, )

    # FP16 optimization
    required_seq_len_multiple: int = field(
        default=2,
        metadata={
            "help":
            "pad the input to encoder such that the sequence length is divisible by multiple"
        }, )

    # Conformer
    depthwise_conv_kernel_size: int = field(
        default=31,
        metadata={
            "help":
            "depthwise-conv-kernel-size for convolution in conformer layer"
        }, )
    attn_type: str = field(
        default="",
        metadata={"help": "if espnet use ESPNET MHA"}, )
    pos_enc_type: str = field(
        default="abs",
        metadata={"help": "Positional encoding type to use in conformer"}, )
    fp16: bool = field(
        default=False, metadata={"help": "If fp16 is being used"})


class HubertModel(nn.Layer):
    def __init__(
            self,
            cfg: HubertConfig,
            task_cfg: HubertPretrainingConfig,
            dictionaries: List[Any], ) -> None:
        super().__init__()
        logger.info(f"HubertModel Config: {cfg}")

        feature_enc_layers = eval(cfg.conv_feature_layers)  # noqa
        self.embed = feature_enc_layers[-1][0]

        self.feature_extractor = ConvFeatureExtractionModel(
            conv_layers=feature_enc_layers,
            dropout=0.0,
            mode=cfg.extractor_mode,
            conv_bias=cfg.conv_bias, )
        feature_ds_rate = np.prod([s for _, _, s in feature_enc_layers])
        self.feat2tar_ratio = cfg.label_rate * feature_ds_rate / task_cfg.sample_rate

        self.post_extract_proj = (Linear(self.embed, cfg.encoder_embed_dim) if
                                  self.embed != cfg.encoder_embed_dim else None)

        self.mask_prob = cfg.mask_prob
        self.mask_selection = cfg.mask_selection
        self.mask_other = cfg.mask_other
        self.mask_length = cfg.mask_length
        self.no_mask_overlap = cfg.no_mask_overlap
        self.mask_min_space = cfg.mask_min_space

        self.mask_channel_prob = cfg.mask_channel_prob
        self.mask_channel_selection = cfg.mask_channel_selection
        self.mask_channel_other = cfg.mask_channel_other
        self.mask_channel_length = cfg.mask_channel_length
        self.no_mask_channel_overlap = cfg.no_mask_channel_overlap
        self.mask_channel_min_space = cfg.mask_channel_min_space

        self.dropout_input = nn.Dropout(cfg.dropout_input)
        self.dropout_features = nn.Dropout(cfg.dropout_features)

        self.feature_grad_mult = cfg.feature_grad_mult
        self.logit_temp = cfg.logit_temp
        self.skip_masked = cfg.skip_masked
        self.skip_nomask = cfg.skip_nomask

        final_dim = cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim

        self.mask_emb = paddle.create_parameter(
            shape=[cfg.encoder_embed_dim],
            dtype='float32',
            default_initializer=paddle.nn.initializer.Uniform(low=0), )

        self.encoder = TransformerEncoder(cfg)
        self.layer_norm = LayerNorm(self.embed)

        self.target_glu = None
        if cfg.target_glu:
            self.target_glu = nn.Sequential(
                Linear(final_dim, final_dim * 2), GLU())

        self.untie_final_proj = cfg.untie_final_proj
        if self.untie_final_proj:
            self.final_proj = Linear(cfg.encoder_embed_dim,
                                     final_dim * len(dictionaries))
        else:
            self.final_proj = Linear(cfg.encoder_embed_dim, final_dim)

        # modules below are not needed during fine-tuning
        if any([d is None for d in dictionaries]):
            logger.info(
                "cannot find dictionary. assume will be used for fine-tuning")
        else:
            self.num_classes = [len(d) for d in dictionaries]
            self.label_embs_concat = paddle.create_parameter(
                shape=[sum(self.num_classes), final_dim],
                dtype='float32',
                default_initializer=paddle.nn.initializer.Uniform(low=0), )

    @classmethod
    def build_model(cls, cfg: HubertConfig, task):
        """Build a new model instance."""

        model = HubertModel(cfg, task.cfg, task.dictionaries)
        return model

    def apply_mask(self, x, padding_mask, target_list):
        B, T, C = x.shape
        if self.mask_prob > 0:
            mask_indices = compute_mask_indices(
                (B, T),
                padding_mask,
                self.mask_prob,
                self.mask_length,
                self.mask_selection,
                self.mask_other,
                min_masks=2,
                no_overlap=self.no_mask_overlap,
                min_space=self.mask_min_space, )

            mask_indices = paddle.to_tensor(
                mask_indices, dtype='int64', place=x.place)
            x[mask_indices] = self.mask_emb
        else:
            mask_indices = None

        if self.mask_channel_prob > 0:
            mask_channel_indices = compute_mask_indices(
                (B, C),
                None,
                self.mask_channel_prob,
                self.mask_channel_length,
                self.mask_channel_selection,
                self.mask_channel_other,
                no_overlap=self.no_mask_channel_overlap,
                min_space=self.mask_channel_min_space, )
            mask_channel_indices = (paddle.to_tensor(
                mask_channel_indices, dtype='int64', place=x.place).unsqueeze(1)
                                    .expand([-1, T, -1]))
            x[mask_channel_indices] = 0

        return x, mask_indices

    def compute_nce(self, x, pos, negs):
        neg_is_pos = (pos == negs).all(-1)
        pos = pos.unsqueeze(0)
        targets = paddle.concat([pos, negs], axis=0)

        logits = paddle.nn.functional.cosine_similarity(
            x.astype('float32'), targets.astype('float32'), axis=-1)
        logits /= self.logit_temp
        if paddle.any(neg_is_pos):
            logits[1:][neg_is_pos] = float("-inf")
        logits = logits.transpose([1, 0])  # (num_x, num_cls+1)
        return logits

    def forward_features(self, source: paddle.Tensor) -> paddle.Tensor:
        if self.feature_grad_mult > 0:
            features = self.feature_extractor(source)
            if self.feature_grad_mult != 1.0:
                features = GradMultiply.apply(features, self.feature_grad_mult)
        else:
            with paddle.no_grad():
                features = self.feature_extractor(source)
        return features

    def forward_targets(
            self,
            features: paddle.Tensor,
            target_list: List[paddle.Tensor],
    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        # Trim features to ensure labels exist and then get aligned labels
        feat_tsz = features.shape[2]
        targ_tsz = min([t.shape[1] for t in target_list])
        if self.feat2tar_ratio * feat_tsz > targ_tsz:
            feat_tsz = int(targ_tsz / self.feat2tar_ratio)
            features = features[:, :, :feat_tsz]
        target_inds = paddle.arange(feat_tsz).astype(
            'float32') * self.feat2tar_ratio
        target_list = [t[:, target_inds.astype('int64')] for t in target_list]
        return features, target_list

    def forward_padding_mask(
            self,
            features: paddle.Tensor,
            padding_mask: paddle.Tensor, ) -> paddle.Tensor:
        extra = padding_mask.shape[1] % features.shape[1]
        if extra > 0:
            padding_mask = padding_mask[:, :-extra]
        padding_mask = paddle.reshape(
            padding_mask, [padding_mask.shape[0], features.shape[1], -1])
        padding_mask = paddle.all(padding_mask, axis=-1)
        return padding_mask

    def forward(
            self,
            source: paddle.Tensor,
            target_list: Optional[List[paddle.Tensor]]=None,
            padding_mask: Optional[paddle.Tensor]=None,
            mask: bool=True,
            features_only: bool=False,
            output_layer: Optional[int]=None, ) -> Dict[str, paddle.Tensor]:
        """output layer is 1-based"""
        features = self.forward_features(source)
        if target_list is not None:
            features, target_list = self.forward_targets(features, target_list)

        features_pen = features.pow(2).mean()

        features = features.transpose([0, 2, 1])
        features = self.layer_norm(features)
        unmasked_features = features.clone()

        if padding_mask is not None:
            padding_mask = self.forward_padding_mask(features, padding_mask)

        if self.post_extract_proj is not None:
            features = self.post_extract_proj(features)

        features = self.dropout_input(features)
        unmasked_features = self.dropout_features(unmasked_features)

        if mask:
            x, mask_indices = self.apply_mask(features, padding_mask,
                                              target_list)
        else:
            x = features
            mask_indices = None

        # feature: (B, T, D), float
        # target: (B, T), long
        # x: (B, T, D), float
        # padding_mask: (B, T), bool
        # mask_indices: (B, T), bool
        x, _ = self.encoder(
            x,
            padding_mask=padding_mask,
            layer=None if output_layer is None else output_layer - 1, )

        if features_only:
            return {"x": x, "padding_mask": padding_mask, "features": features}

        def compute_pred(self, proj_x, target, label_embs):
            # compute logits for the i-th label set
            y = paddle.index_select(
                label_embs, index=target.astype('int64'), axis=0)
            negs = paddle.expand(
                label_embs.unsqueeze(1),
                [label_embs.shape[0], proj_x.shape[0], label_embs.shape[-1]])
            if self.target_glu:
                y = self.target_glu(y)
                negs = self.target_glu(negs)
            # proj_x: (S, D)
            # y: (S, D)
            # negs: (Neg, S, D)
            return self.compute_nce(proj_x, y, negs)

        label_embs_list = self.label_embs_concat.split(self.num_classes, 0)

        if not self.skip_masked:
            masked_indices = paddle.logical_and(~padding_mask, mask_indices)
            proj_x_m = self.final_proj(x[masked_indices])
            if self.untie_final_proj:
                proj_x_m_list = proj_x_m.chunk(len(target_list), dim=-1)
            else:
                proj_x_m_list = [proj_x_m for _ in range(len(target_list))]
            logit_m_list = [
                compute_pred(proj_x_m, t[masked_indices], label_embs_list[i])
                for i, (proj_x_m, t
                        ) in enumerate(zip(proj_x_m_list, target_list))
            ]
        else:
            logit_m_list = [None for _ in target_list]

        if not self.skip_nomask:
            nomask_indices = paddle.logical_and(~padding_mask, ~mask_indices)
            proj_x_u = self.final_proj(x[nomask_indices])
            if self.untie_final_proj:
                proj_x_u_list = proj_x_u.chunk(len(target_list), dim=-1)
            else:
                proj_x_u_list = [proj_x_u for _ in range(len(target_list))]

            logit_u_list = [
                compute_pred(proj_x_u, t[nomask_indices], label_embs_list[i])
                for i, (proj_x_u, t
                        ) in enumerate(zip(proj_x_u_list, target_list))
            ]
        else:
            logit_u_list = [None for _ in target_list]

        result = {
            "logit_m_list": logit_m_list,
            "logit_u_list": logit_u_list,
            "padding_mask": padding_mask,
            "features_pen": features_pen,
        }
        return result

    def extract_features(
            self,
            source: paddle.Tensor,
            padding_mask: Optional[paddle.Tensor]=None,
            mask: bool=False,
            ret_conv: bool=False,
            output_layer: Optional[int]=None,
    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        res = self.forward(
            source,
            padding_mask=padding_mask,
            mask=mask,
            features_only=True,
            output_layer=output_layer, )
        feature = res["features"] if ret_conv else res["x"]
        return feature, res["padding_mask"]

    def get_logits(self, net_output, is_masked=True):
        if is_masked:
            logits_list = net_output["logit_m_list"]
        else:
            logits_list = net_output["logit_u_list"]
        logits_list = [
            paddle.cast(x, 'float32') for x in logits_list if x is not None
        ]
        return logits_list

    def get_targets(self, net_output, is_masked=True):
        logits_list = self.get_logits(net_output, is_masked)
        targets_list = [
            paddle.zeros_like(x, dtype='int64') for x in logits_list
        ]
        return targets_list

    def get_extra_losses(self, net_output):
        extra_losses = []
        names = []

        if "features_pen" in net_output:
            extra_losses.append(net_output["features_pen"])
            names.append("features_pen")

        return extra_losses, names

    def remove_pretraining_modules(self):
        self.target_glu = None
        self.final_proj = None


================================================
FILE: paddlespeech/s2t/models/lm/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/models/lm/dataset.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from paddle.io import Dataset

from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.io.utility import pad_list


class TextDataset(Dataset):
    @classmethod
    def from_file(cls, file_path):
        dataset = cls(file_path)
        return dataset

    def __init__(self, file_path):
        self._manifest = []
        with open(file_path) as f:
            for line in f:
                self._manifest.append(line.strip())

    def __len__(self):
        return len(self._manifest)

    def __getitem__(self, idx):
        return self._manifest[idx]


class TextCollatorSpm():
    def __init__(self, unit_type, vocab_filepath, spm_model_prefix):
        assert (vocab_filepath is not None)
        self.text_featurizer = TextFeaturizer(
            unit_type=unit_type,
            vocab=vocab_filepath,
            spm_model_prefix=spm_model_prefix)
        self.eos_id = self.text_featurizer.eos_id
        self.blank_id = self.text_featurizer.blank_id

    def __call__(self, batch):
        """
        return type  [List, np.array [B, T], np.array [B, T], np.array[B]]
        """
        keys = []
        texts = []
        texts_input = []
        texts_output = []
        text_lens = []

        for idx, item in enumerate(batch):
            key = item.split(" ")[0].strip()
            text = " ".join(item.split(" ")[1:])
            keys.append(key)
            token_ids = self.text_featurizer.featurize(text)
            texts_input.append(
                np.array([self.eos_id] + token_ids).astype(np.int64))
            texts_output.append(
                np.array(token_ids + [self.eos_id]).astype(np.int64))
            text_lens.append(len(token_ids) + 1)

        ys_input_pad = pad_list(texts_input, self.blank_id).astype(np.int64)
        ys_output_pad = pad_list(texts_output, self.blank_id).astype(np.int64)
        y_lens = np.array(text_lens).astype(np.int64)
        return keys, ys_input_pad, ys_output_pad, y_lens


================================================
FILE: paddlespeech/s2t/models/lm/transformer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from typing import Any
from typing import List
from typing import Tuple

import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F

from paddlespeech.s2t.decoders.scorers.scorer_interface import BatchScorerInterface
from paddlespeech.s2t.models.lm_interface import LMInterface
from paddlespeech.s2t.modules.encoder import TransformerEncoder
from paddlespeech.s2t.modules.mask import subsequent_mask
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()


class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
    def __init__(self,
                 n_vocab: int,
                 pos_enc: str=None,
                 embed_unit: int=128,
                 att_unit: int=256,
                 head: int=2,
                 unit: int=1024,
                 layer: int=4,
                 dropout_rate: float=0.5,
                 emb_dropout_rate: float=0.0,
                 att_dropout_rate: float=0.0,
                 tie_weights: bool=False,
                 **kwargs):
        nn.Layer.__init__(self)

        if pos_enc == "sinusoidal":
            pos_enc_layer_type = "abs_pos"
        elif pos_enc is None:
            pos_enc_layer_type = "no_pos"
        else:
            raise ValueError(f"unknown pos-enc option: {pos_enc}")

        self.embed = nn.Embedding(n_vocab, embed_unit)

        if emb_dropout_rate == 0.0:
            self.embed_drop = None
        else:
            self.embed_drop = nn.Dropout(emb_dropout_rate)

        self.encoder = TransformerEncoder(
            input_size=embed_unit,
            output_size=att_unit,
            attention_heads=head,
            linear_units=unit,
            num_blocks=layer,
            dropout_rate=dropout_rate,
            attention_dropout_rate=att_dropout_rate,
            input_layer="linear",
            pos_enc_layer_type=pos_enc_layer_type,
            concat_after=False,
            static_chunk_size=1,
            use_dynamic_chunk=False,
            use_dynamic_left_chunk=False)

        self.decoder = nn.Linear(att_unit, n_vocab)

        logger.info("Tie weights set to {}".format(tie_weights))
        logger.info("Dropout set to {}".format(dropout_rate))
        logger.info("Emb Dropout set to {}".format(emb_dropout_rate))
        logger.info("Att Dropout set to {}".format(att_dropout_rate))

        if tie_weights:
            assert (
                att_unit == embed_unit
            ), "Tie Weights: True need embedding and final dimensions to match"
            self.decoder.weight = self.embed.weight

    def _target_mask(self, ys_in_pad):
        ys_mask = ys_in_pad != 0
        m = subsequent_mask(paddle.shape(ys_mask)[-1]).unsqueeze(0)
        return ys_mask.unsqueeze(-2) & m

    def forward(self, x: paddle.Tensor, t: paddle.Tensor
                ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Compute LM loss value from buffer sequences.

        Args:
            x (paddle.Tensor): Input ids. (batch, len)
            t (paddle.Tensor): Target ids. (batch, len)

        Returns:
            tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]: Tuple of
                loss to backward (scalar),
                negative log-likelihood of t: -log p(t) (scalar) and
                the number of elements in x (scalar)

        Notes:
            The last two return values are used
            in perplexity: p(t)^{-n} = exp(-log p(t) / n)

        """
        batch_size = paddle.shape(x)[0]
        xm = x != 0
        xlen = xm.sum(axis=1)
        if self.embed_drop is not None:
            emb = self.embed_drop(self.embed(x))
        else:
            emb = self.embed(x)
        h, _ = self.encoder(emb, xlen)
        y = self.decoder(h)
        loss = F.cross_entropy(
            y.reshape([-1, paddle.shape(y)[-1]]),
            t.reshape([-1]),
            reduction="none")
        mask = xm.to(loss.dtype)
        logp = loss * mask.reshape([-1])
        nll = logp.reshape([batch_size, -1]).sum(-1)
        nll_count = mask.sum(-1)
        logp = logp.sum()
        count = mask.sum()
        return logp / count, logp, count, nll, nll_count

    # beam search API (see ScorerInterface)
    def score(self, y: paddle.Tensor, state: Any,
              x: paddle.Tensor) -> Tuple[paddle.Tensor, Any]:
        """Score new token.

        Args:
            y (paddle.Tensor): 1D paddle.int64 prefix tokens.
            state: Scorer state for prefix tokens
            x (paddle.Tensor): encoder feature that generates ys.

        Returns:
            tuple[paddle.Tensor, Any]: Tuple of
                paddle.float32 scores for next token (n_vocab)
                and next state for ys

        """
        y = y.unsqueeze(0)

        if self.embed_drop is not None:
            emb = self.embed_drop(self.embed(y))
        else:
            emb = self.embed(y)

        h, _, cache = self.encoder.forward_one_step(
            emb, self._target_mask(y), cache=state)
        h = self.decoder(h[:, -1])
        logp = F.log_softmax(h).squeeze(0)
        return logp, cache

    # batch beam search API (see BatchScorerInterface)
    def batch_score(self,
                    ys: paddle.Tensor,
                    states: List[Any],
                    xs: paddle.Tensor) -> Tuple[paddle.Tensor, List[Any]]:
        """Score new token batch (required).

        Args:
            ys (paddle.Tensor): paddle.int64 prefix tokens (n_batch, ylen).
            states (List[Any]): Scorer states for prefix tokens.
            xs (paddle.Tensor):
                The encoder feature that generates ys (n_batch, xlen, n_feat).

        Returns:
            tuple[paddle.Tensor, List[Any]]: Tuple of
                batchfied scores for next token with shape of `(n_batch, n_vocab)`
                and next state list for ys.

        """
        # merge states
        n_batch = len(ys)
        n_layers = len(self.encoder.encoders)
        if states[0] is None:
            batch_state = None
        else:
            # transpose state of [batch, layer] into [layer, batch]
            batch_state = [
                paddle.stack([states[b][i] for b in range(n_batch)])
                for i in range(n_layers)
            ]

        if self.embed_drop is not None:
            emb = self.embed_drop(self.embed(ys))
        else:
            emb = self.embed(ys)

        # batch decoding
        h, _, states = self.encoder.forward_one_step(
            emb, self._target_mask(ys), cache=batch_state)
        h = self.decoder(h[:, -1])
        logp = F.log_softmax(h)

        # transpose state of [layer, batch] into [batch, layer]
        state_list = [[states[i][b] for i in range(n_layers)]
                      for b in range(n_batch)]
        return logp, state_list


if __name__ == "__main__":
    tlm = TransformerLM(
        n_vocab=5002,
        pos_enc=None,
        embed_unit=128,
        att_unit=512,
        head=8,
        unit=2048,
        layer=16,
        dropout_rate=0.5, )

    #     n_vocab: int,
    # pos_enc: str=None,
    # embed_unit: int=128,
    # att_unit: int=256,
    # head: int=2,
    # unit: int=1024,
    # layer: int=4,
    # dropout_rate: float=0.5,
    # emb_dropout_rate: float = 0.0,
    # att_dropout_rate: float = 0.0,
    # tie_weights: bool = False,):
    paddle.set_device("cpu")
    model_dict = paddle.load("transformerLM.pdparams")
    tlm.set_state_dict(model_dict)

    tlm.eval()
    #Test the score
    input2 = np.array([5])
    input2 = paddle.to_tensor(input2)
    state = None
    output, state = tlm.score(input2, state, None)

    input3 = np.array([5, 10])
    input3 = paddle.to_tensor(input3)
    output, state = tlm.score(input3, state, None)

    input4 = np.array([5, 10, 0])
    input4 = paddle.to_tensor(input4)
    output, state = tlm.score(input4, state, None)
    print("output", output)
    """
    #Test the batch score
    batch_size = 2
    inp2 = np.array([[5], [10]])
    inp2 = paddle.to_tensor(inp2)
    output, states = tlm.batch_score(
        inp2, [(None,None,0)] * batch_size)
    inp3 = np.array([[100], [30]])
    inp3 = paddle.to_tensor(inp3)
    output, states = tlm.batch_score(
        inp3, states)
    print("output", output)
    #print("cache", cache)
    #np.save("output_pd.npy", output)
    """


================================================
FILE: paddlespeech/s2t/models/lm_interface.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Language model interface."""
import argparse

from paddlespeech.s2t.decoders.scorers.scorer_interface import ScorerInterface
from paddlespeech.s2t.utils.dynamic_import import dynamic_import


class LMInterface(ScorerInterface):
    """LM Interface model implementation."""

    @staticmethod
    def add_arguments(parser):
        """Add arguments to command line argument parser."""
        return parser

    @classmethod
    def build(cls, n_vocab: int, **kwargs):
        """Initialize this class with python-level args.

        Args:
            idim (int): The number of vocabulary.

        Returns:
            LMinterface: A new instance of LMInterface.

        """
        args = argparse.Namespace(**kwargs)
        return cls(n_vocab, args)

    def forward(self, x, t):
        """Compute LM loss value from buffer sequences.

        Args:
            x (torch.Tensor): Input ids. (batch, len)
            t (torch.Tensor): Target ids. (batch, len)

        Returns:
            tuple[torch.Tensor, torch.Tensor, torch.Tensor]: Tuple of
                loss to backward (scalar),
                negative log-likelihood of t: -log p(t) (scalar) and
                the number of elements in x (scalar)

        Notes:
            The last two return values are used
            in perplexity: p(t)^{-n} = exp(-log p(t) / n)

        """
        raise NotImplementedError("forward method is not implemented")


predefined_lms = {
    "transformer": "paddlespeech.s2t.models.lm.transformer:TransformerLM",
}


def dynamic_import_lm(module):
    """Import LM class dynamically.

    Args:
        module (str): module_name:class_name or alias in `predefined_lms`

    Returns:
        type: LM class

    """
    model_class = dynamic_import(module, predefined_lms)
    assert issubclass(model_class,
                      LMInterface), f"{module} does not implement LMInterface"
    return model_class


================================================
FILE: paddlespeech/s2t/models/st_interface.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""ST Interface module."""
from .asr_interface import ASRInterface
from paddlespeech.s2t.utils.dynamic_import import dynamic_import


class STInterface(ASRInterface):
    """ST Interface model implementation.

    NOTE: This class is inherited from ASRInterface to enable joint translation
    and recognition when performing multi-task learning with the ASR task.

    """

    def translate(self,
                  x,
                  trans_args,
                  char_list=None,
                  rnnlm=None,
                  ensemble_models=[]):
        """Recognize x for evaluation.

        :param ndarray x: input acouctic feature (B, T, D) or (T, D)
        :param namespace trans_args: argment namespace contraining options
        :param list char_list: list of characters
        :param paddle.nn.Layer rnnlm: language model module
        :return: N-best decoding results
        :rtype: list
        """
        raise NotImplementedError("translate method is not implemented")

    def translate_batch(self, x, trans_args, char_list=None, rnnlm=None):
        """Beam search implementation for batch.

        :param paddle.Tensor x: encoder hidden state sequences (B, Tmax, Henc)
        :param namespace trans_args: argument namespace containing options
        :param list char_list: list of characters
        :param paddle.nn.Layer rnnlm: language model module
        :return: N-best decoding results
        :rtype: list
        """
        raise NotImplementedError("Batch decoding is not supported yet.")


predefined_st = {
    "transformer": "paddlespeech.s2t.models.u2_st:U2STModel",
}


def dynamic_import_st(module):
    """Import ST models dynamically.

    Args:
        module (str): module_name:class_name or alias in `predefined_st`

    Returns:
        type: ST class

    """
    model_class = dynamic_import(module, predefined_st)
    assert issubclass(model_class,
                      STInterface), f"{module} does not implement STInterface"
    return model_class


================================================
FILE: paddlespeech/s2t/models/u2/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .u2 import U2InferModel
from .u2 import U2Model
from .updater import U2Evaluator
from .updater import U2Updater

__all__ = ["U2Model", "U2InferModel", "U2Evaluator", "U2Updater"]


================================================
FILE: paddlespeech/s2t/models/u2/u2.py
================================================
# Copyright 2021 Mobvoi Inc. All Rights Reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""U2 ASR Model
Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition
(https://arxiv.org/pdf/2012.05481.pdf)
"""
import sys
import time
from collections import defaultdict
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple

import paddle
from paddle import jit
from paddle import nn

from paddlespeech.audio.utils.tensor_utils import add_sos_eos
from paddlespeech.audio.utils.tensor_utils import pad_sequence
from paddlespeech.audio.utils.tensor_utils import reverse_pad_list
from paddlespeech.audio.utils.tensor_utils import st_reverse_pad_list
from paddlespeech.audio.utils.tensor_utils import th_accuracy
from paddlespeech.s2t.decoders.scorers.ctc import CTCPrefixScorer
from paddlespeech.s2t.frontend.utility import IGNORE_ID
from paddlespeech.s2t.frontend.utility import load_cmvn
from paddlespeech.s2t.models.asr_interface import ASRInterface
from paddlespeech.s2t.modules.cmvn import GlobalCMVN
from paddlespeech.s2t.modules.ctc import CTCDecoderBase
from paddlespeech.s2t.modules.decoder import BiTransformerDecoder
from paddlespeech.s2t.modules.decoder import TransformerDecoder
from paddlespeech.s2t.modules.encoder import ConformerEncoder
from paddlespeech.s2t.modules.encoder import SqueezeformerEncoder
from paddlespeech.s2t.modules.encoder import TransformerEncoder
from paddlespeech.s2t.modules.initializer import DefaultInitializerContext
from paddlespeech.s2t.modules.loss import LabelSmoothingLoss
from paddlespeech.s2t.modules.mask import make_pad_mask
from paddlespeech.s2t.modules.mask import mask_finished_preds
from paddlespeech.s2t.modules.mask import mask_finished_scores
from paddlespeech.s2t.modules.mask import subsequent_mask
from paddlespeech.s2t.utils import checkpoint
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils.ctc_utils import remove_duplicates_and_blank
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import log_add
from paddlespeech.s2t.utils.utility import UpdateConfig

__all__ = ["U2Model", "U2InferModel"]

logger = Log(__name__).getlog()


class U2BaseModel(ASRInterface, nn.Layer):
    """CTC-Attention hybrid Encoder-Decoder model"""

    def __init__(self,
                 vocab_size: int,
                 encoder: TransformerEncoder,
                 decoder: TransformerDecoder,
                 ctc: CTCDecoderBase,
                 ctc_weight: float=0.5,
                 ignore_id: int=IGNORE_ID,
                 reverse_weight: float=0.0,
                 lsm_weight: float=0.0,
                 length_normalized_loss: bool=False,
                 **kwargs):
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight

        nn.Layer.__init__(self)

        # note that eos is the same as sos (equivalent ID)
        self.sos = vocab_size - 1
        self.eos = vocab_size - 1
        self.vocab_size = vocab_size
        self.ignore_id = ignore_id
        self.ctc_weight = ctc_weight
        self.reverse_weight = reverse_weight

        self.encoder = encoder
        self.decoder = decoder
        self.ctc = ctc
        self.criterion_att = LabelSmoothingLoss(
            size=vocab_size,
            padding_idx=ignore_id,
            smoothing=lsm_weight,
            normalize_length=length_normalized_loss, )

    def forward(
            self,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
    ) -> Tuple[Optional[paddle.Tensor], Optional[paddle.Tensor], Optional[
            paddle.Tensor]]:
        """Frontend + Encoder + Decoder + Calc loss
        Args:
            speech: (Batch, Length, ...)
            speech_lengths: (Batch, )
            text: (Batch, Length)
            text_lengths: (Batch,)
        Returns:
            total_loss, attention_loss, ctc_loss
        """
        assert text_lengths.dim() == 1, text_lengths.shape
        # Check that batch_size is unified
        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
                                         text.shape, text_lengths.shape)
        # 1. Encoder
        start = time.time()
        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
        encoder_time = time.time() - start
        #logger.debug(f"encoder time: {encoder_time}")
        encoder_out_lens = encoder_mask.squeeze(1).sum(1)  #[B, 1, T] -> [B]

        # 2a. Attention-decoder branch
        loss_att = None
        if self.ctc_weight != 1.0:
            start = time.time()
            loss_att, acc_att = self._calc_att_loss(encoder_out, encoder_mask,
                                                    text, text_lengths,
                                                    self.reverse_weight)
            decoder_time = time.time() - start
            #logger.debug(f"decoder time: {decoder_time}")

        # 2b. CTC branch
        loss_ctc = None
        if self.ctc_weight != 0.0:
            start = time.time()
            loss_ctc = self.ctc(encoder_out, encoder_out_lens, text,
                                text_lengths)
            ctc_time = time.time() - start
            #logger.debug(f"ctc time: {ctc_time}")
        if loss_ctc is None:
            loss = loss_att
        elif loss_att is None:
            loss = loss_ctc
        else:
            loss = self.ctc_weight * loss_ctc + (1 - self.ctc_weight) * loss_att
        return loss, loss_att, loss_ctc

    def _calc_att_loss(self,
                       encoder_out: paddle.Tensor,
                       encoder_mask: paddle.Tensor,
                       ys_pad: paddle.Tensor,
                       ys_pad_lens: paddle.Tensor,
                       reverse_weight: float) -> Tuple[paddle.Tensor, float]:
        """Calc attention loss.

        Args:
            encoder_out (paddle.Tensor): [B, Tmax, D]
            encoder_mask (paddle.Tensor): [B, 1, Tmax]
            ys_pad (paddle.Tensor): [B, Umax]
            ys_pad_lens (paddle.Tensor): [B]
            reverse_weight (float): reverse decoder weight.

        Returns:
            Tuple[paddle.Tensor, float]: attention_loss, accuracy rate
        """
        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
                                            self.ignore_id)
        ys_in_lens = ys_pad_lens + 1

        r_ys_pad = reverse_pad_list(ys_pad, ys_pad_lens, float(self.ignore_id))
        r_ys_in_pad, r_ys_out_pad = add_sos_eos(r_ys_pad, self.sos, self.eos,
                                                self.ignore_id)
        # 1. Forward decoder
        decoder_out, r_decoder_out, _ = self.decoder(
            encoder_out, encoder_mask, ys_in_pad, ys_in_lens, r_ys_in_pad,
            reverse_weight)

        # 2. Compute attention loss
        loss_att = self.criterion_att(decoder_out, ys_out_pad)
        r_loss_att = paddle.to_tensor(0.0)
        if reverse_weight > 0.0:
            r_loss_att = self.criterion_att(r_decoder_out, r_ys_out_pad)
        loss_att = loss_att * (1 - reverse_weight) + r_loss_att * reverse_weight
        acc_att = th_accuracy(
            decoder_out.reshape([-1, self.vocab_size]),
            ys_out_pad,
            ignore_label=self.ignore_id, )
        return loss_att, acc_att

    def _forward_encoder(
            self,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            decoding_chunk_size: int=-1,
            num_decoding_left_chunks: int=-1,
            simulate_streaming: bool=False,
    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Encoder pass.

        Args:
            speech (paddle.Tensor): [B, Tmax, D]
            speech_lengths (paddle.Tensor): [B]
            decoding_chunk_size (int, optional): chuck size. Defaults to -1.
            num_decoding_left_chunks (int, optional): nums chunks. Defaults to -1.
            simulate_streaming (bool, optional): streaming or not. Defaults to False.

        Returns:
            Tuple[paddle.Tensor, paddle.Tensor]:
                encoder hiddens (B, Tmax, D),
                encoder hiddens mask (B, 1, Tmax).
        """
        # Let's assume B = batch_size
        # 1. Encoder
        if simulate_streaming and decoding_chunk_size > 0:
            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
                speech,
                decoding_chunk_size=decoding_chunk_size,
                num_decoding_left_chunks=num_decoding_left_chunks
            )  # (B, maxlen, encoder_dim)
        else:
            encoder_out, encoder_mask = self.encoder(
                speech,
                speech_lengths,
                decoding_chunk_size=decoding_chunk_size,
                num_decoding_left_chunks=num_decoding_left_chunks
            )  # (B, maxlen, encoder_dim)
        return encoder_out, encoder_mask

    def recognize(
            self,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            beam_size: int=10,
            decoding_chunk_size: int=-1,
            num_decoding_left_chunks: int=-1,
            simulate_streaming: bool=False, ) -> paddle.Tensor:
        """ Apply beam search on attention decoder
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            paddle.Tensor: decoding result, (batch, max_result_len)
        """
        assert speech.shape[0] == speech_lengths.shape[0]
        assert decoding_chunk_size != 0
        device = speech.place
        batch_size = speech.shape[0]

        # Let's assume B = batch_size and N = beam_size
        # 1. Encoder
        encoder_out, encoder_mask = self._forward_encoder(
            speech, speech_lengths, decoding_chunk_size,
            num_decoding_left_chunks,
            simulate_streaming)  # (B, maxlen, encoder_dim)
        maxlen = encoder_out.shape[1]
        encoder_dim = encoder_out.shape[2]
        running_size = batch_size * beam_size
        encoder_out = encoder_out.unsqueeze(1).repeat(
            1, beam_size, 1, 1).reshape(
                [running_size, maxlen,
                 encoder_dim])  # (B*N, maxlen, encoder_dim)
        encoder_mask = encoder_mask.unsqueeze(1).repeat(
            1, beam_size, 1, 1).reshape([running_size, 1,
                                         maxlen])  # (B*N, 1, max_len)

        hyps = paddle.ones(
            [running_size, 1], dtype=paddle.long).fill_(self.sos)  # (B*N, 1)
        # log scale score
        scores = paddle.to_tensor(
            [0.0] + [-float('inf')] * (beam_size - 1), dtype=paddle.float)
        scores = scores.to(device).repeat(batch_size).unsqueeze(1).to(
            device)  # (B*N, 1)
        end_flag = paddle.zeros_like(scores, dtype=paddle.bool)  # (B*N, 1)
        cache: Optional[List[paddle.Tensor]] = None
        # 2. Decoder forward step by step
        for i in range(1, maxlen + 1):
            # Stop if all batch and all beam produce eos
            if end_flag.sum() == running_size:
                break

            # 2.1 Forward decoder step
            hyps_mask = subsequent_mask(i).unsqueeze(0).repeat(
                running_size, 1, 1).to(device)  # (B*N, i, i)
            # logp: (B*N, vocab)
            logp, cache = self.decoder.forward_one_step(
                encoder_out, encoder_mask, hyps, hyps_mask, cache)
            # 2.2 First beam prune: select topk best prob at current time
            top_k_logp, top_k_index = logp.topk(beam_size)  # (B*N, N)
            top_k_logp = mask_finished_scores(top_k_logp, end_flag)
            top_k_index = mask_finished_preds(top_k_index, end_flag, self.eos)

            # 2.3 Seconde beam prune: select topk score with history
            scores = scores + top_k_logp  # (B*N, N), broadcast add
            scores = scores.reshape(
                [batch_size, beam_size * beam_size])  # (B, N*N)
            scores, offset_k_index = scores.topk(k=beam_size)  # (B, N)
            scores = scores.reshape([-1, 1])  # (B*N, 1)

            # 2.4. Compute base index in top_k_index,
            # regard top_k_index as (B*N*N),regard offset_k_index as (B*N),
            # then find offset_k_index in top_k_index
            base_k_index = paddle.arange(batch_size).reshape([-1, 1]).repeat(
                1, beam_size)  # (B, N)
            base_k_index = base_k_index * beam_size * beam_size
            best_k_index = base_k_index.reshape([-1]) + offset_k_index.reshape(
                [-1])  # (B*N)

            # 2.5 Update best hyps
            best_k_pred = paddle.index_select(
                top_k_index.reshape([-1]), index=best_k_index, axis=0)  # (B*N)
            best_hyps_index = best_k_index // beam_size
            last_best_k_hyps = paddle.index_select(
                hyps, index=best_hyps_index, axis=0)  # (B*N, i)
            hyps = paddle.cat(
                (last_best_k_hyps, best_k_pred.reshape([-1, 1])),
                dim=1)  # (B*N, i+1)

            # 2.6 Update end flag
            end_flag = paddle.equal(hyps[:, -1], self.eos).reshape([-1, 1])

        # 3. Select best of best
        scores = scores.reshape([batch_size, beam_size])
        # TODO: length normalization
        best_index = paddle.argmax(scores, axis=-1).long()  # (B)
        best_hyps_index = best_index + paddle.arange(
            batch_size, dtype=paddle.long) * beam_size
        best_hyps = paddle.index_select(hyps, index=best_hyps_index, axis=0)
        best_hyps = best_hyps[:, 1:]
        return best_hyps

    def ctc_greedy_search(
            self,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            decoding_chunk_size: int=-1,
            num_decoding_left_chunks: int=-1,
            simulate_streaming: bool=False, ) -> List[List[int]]:
        """ Apply CTC greedy search
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            List[List[int]]: best path result
        """
        assert speech.shape[0] == speech_lengths.shape[0]
        assert decoding_chunk_size != 0
        batch_size = speech.shape[0]

        # Let's assume B = batch_size
        # encoder_out: (B, maxlen, encoder_dim)
        # encoder_mask: (B, 1, Tmax)

        encoder_out, encoder_mask = self._forward_encoder(
            speech, speech_lengths, decoding_chunk_size,
            num_decoding_left_chunks, simulate_streaming)
        maxlen = encoder_out.shape[1]
        encoder_out_lens = encoder_mask.squeeze(1).sum(1)
        ctc_probs = self.ctc.log_softmax(encoder_out)  # (B, maxlen, vocab_size)

        topk_prob, topk_index = ctc_probs.topk(1, axis=2)  # (B, maxlen, 1)
        topk_index = topk_index.reshape([batch_size, maxlen])  # (B, maxlen)
        pad_mask = make_pad_mask(encoder_out_lens)  # (B, maxlen)
        topk_index = topk_index.masked_fill_(pad_mask, self.eos)  # (B, maxlen)

        hyps = [hyp.tolist() for hyp in topk_index]
        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
        return hyps

    def _ctc_prefix_beam_search(
            self,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            beam_size: int,
            decoding_chunk_size: int=-1,
            num_decoding_left_chunks: int=-1,
            simulate_streaming: bool=False,
            blank_id: int=0, ) -> Tuple[List[Tuple[int, float]], paddle.Tensor]:
        """ CTC prefix beam search inner implementation
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            List[Tuple[int, float]]: nbest results, (N,1), (text, likelihood)
            paddle.Tensor: encoder output, (1, max_len, encoder_dim),
                it will be used for rescoring in attention rescoring mode
        """
        assert speech.shape[0] == speech_lengths.shape[0]
        assert decoding_chunk_size != 0
        batch_size = speech.shape[0]
        # For CTC prefix beam search, we only support batch_size=1
        assert batch_size == 1

        # Let's assume B = batch_size and N = beam_size
        # 1. Encoder forward and get CTC score
        encoder_out, encoder_mask = self._forward_encoder(
            speech, speech_lengths, decoding_chunk_size,
            num_decoding_left_chunks,
            simulate_streaming)  # (B, maxlen, encoder_dim)
        maxlen = encoder_out.shape[1]
        ctc_probs = self.ctc.log_softmax(encoder_out)  # (1, maxlen, vocab_size)
        ctc_probs = ctc_probs.squeeze(0)

        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
        # blank_ending_score and  none_blank_ending_score in ln domain
        cur_hyps = [(tuple(), (0.0, -float('inf')))]
        # 2. CTC beam search step by step
        for t in range(0, maxlen):
            logp = ctc_probs[t]  # (vocab_size,)
            # key: prefix, value (pb, pnb), default value(-inf, -inf)
            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
            # 2.1 First beam prune: select topk best
            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
            for s in top_k_index:
                s = s.item()
                ps = logp[s].item()
                for prefix, (pb, pnb) in cur_hyps:
                    last = prefix[-1] if len(prefix) > 0 else None
                    if s == blank_id:  # blank
                        n_pb, n_pnb = next_hyps[prefix]
                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
                        next_hyps[prefix] = (n_pb, n_pnb)
                    elif s == last:
                        #  Update *ss -> *s;
                        n_pb, n_pnb = next_hyps[prefix]
                        n_pnb = log_add([n_pnb, pnb + ps])
                        next_hyps[prefix] = (n_pb, n_pnb)
                        # Update *s-s -> *ss, - is for blank
                        n_prefix = prefix + (s, )
                        n_pb, n_pnb = next_hyps[n_prefix]
                        n_pnb = log_add([n_pnb, pb + ps])
                        next_hyps[n_prefix] = (n_pb, n_pnb)
                    else:
                        n_prefix = prefix + (s, )
                        n_pb, n_pnb = next_hyps[n_prefix]
                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
                        next_hyps[n_prefix] = (n_pb, n_pnb)

            # 2.2 Second beam prune
            next_hyps = sorted(
                next_hyps.items(),
                key=lambda x: log_add(list(x[1])),
                reverse=True)
            cur_hyps = next_hyps[:beam_size]

        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
        return hyps, encoder_out

    def ctc_prefix_beam_search(
            self,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            beam_size: int,
            decoding_chunk_size: int=-1,
            num_decoding_left_chunks: int=-1,
            simulate_streaming: bool=False, ) -> List[int]:
        """ Apply CTC prefix beam search
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            List[int]: CTC prefix beam search nbest results
        """
        hyps, _ = self._ctc_prefix_beam_search(
            speech, speech_lengths, beam_size, decoding_chunk_size,
            num_decoding_left_chunks, simulate_streaming)
        return hyps[0][0]

    def attention_rescoring(self,
                            speech: paddle.Tensor,
                            speech_lengths: paddle.Tensor,
                            beam_size: int,
                            decoding_chunk_size: int=-1,
                            num_decoding_left_chunks: int=-1,
                            ctc_weight: float=0.0,
                            simulate_streaming: bool=False,
                            reverse_weight: float=0.0) -> List[int]:
        """ Apply attention rescoring decoding, CTC prefix beam search
            is applied first to get nbest, then we resoring the nbest on
            attention decoder with corresponding encoder out
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
            reverse_weight (float): reverse deocder weight.
        Returns:
            List[int]: Attention rescoring result
        """
        assert speech.shape[0] == speech_lengths.shape[0]
        assert decoding_chunk_size != 0
        if reverse_weight > 0.0:
            # decoder should be a bitransformer decoder if reverse_weight > 0.0
            assert hasattr(self.decoder, 'right_decoder')
        device = speech.place
        batch_size = speech.shape[0]
        # For attention rescoring we only support batch_size=1
        assert batch_size == 1

        # len(hyps) = beam_size, encoder_out: (1, maxlen, encoder_dim)
        hyps, encoder_out = self._ctc_prefix_beam_search(
            speech, speech_lengths, beam_size, decoding_chunk_size,
            num_decoding_left_chunks, simulate_streaming)
        assert len(hyps) == beam_size

        hyp_list = []
        for hyp in hyps:
            hyp_content = hyp[0]
            # Prevent the hyp is empty
            if len(hyp_content) == 0:
                hyp_content = (self.ctc.blank_id, )
            hyp_content = paddle.to_tensor(
                hyp_content, place=device, dtype=paddle.long)
            hyp_list.append(hyp_content)
        hyps_pad = pad_sequence(hyp_list, True, self.ignore_id)
        hyps_lens = paddle.to_tensor(
            [len(hyp[0]) for hyp in hyps], place=device,
            dtype=paddle.long)  # (beam_size,)
        hyps_pad, _ = add_sos_eos(hyps_pad, self.sos, self.eos, self.ignore_id)
        hyps_lens = hyps_lens + 1  # Add <sos> at beginning
        logger.debug(
            f"hyps pad: {hyps_pad} {self.sos} {self.eos} {self.ignore_id}")

        # ctc score in ln domain
        # (beam_size, max_hyps_len, vocab_size)
        decoder_out, r_decoder_out = self.forward_attention_decoder(
            hyps_pad, hyps_lens, encoder_out, reverse_weight)

        decoder_out = decoder_out.numpy()
        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
        # conventional transformer decoder.
        r_decoder_out = r_decoder_out.numpy()

        # Only use decoder score for rescoring
        best_score = -float('inf')
        best_index = 0
        # hyps is List[(Text=List[int], Score=float)], len(hyps)=beam_size
        for i, hyp in enumerate(hyps):
            score = 0.0
            for j, w in enumerate(hyp[0]):
                score += decoder_out[i][j][w]
            # last decoder output token is `eos`, for laste decoder input token.
            score += decoder_out[i][len(hyp[0])][self.eos]

            logger.debug(
                f"hyp {i} len {len(hyp[0])} l2r score: {score} ctc_score: {hyp[1]} reverse_weight: {reverse_weight}"
            )

            if reverse_weight > 0:
                r_score = 0.0
                for j, w in enumerate(hyp[0]):
                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
                r_score += r_decoder_out[i][len(hyp[0])][self.eos]

                logger.debug(
                    f"hyp {i} len {len(hyp[0])} r2l score: {r_score} ctc_score: {hyp[1]} reverse_weight: {reverse_weight}"
                )

                score = score * (1 - reverse_weight) + r_score * reverse_weight
            # add ctc score (which in ln domain)
            score += hyp[1] * ctc_weight
            if score > best_score:
                best_score = score
                best_index = i

        logger.debug(f"result: {hyps[best_index]}")
        return hyps[best_index][0]

    @jit.to_static(property=True)
    def subsampling_rate(self) -> int:
        """ Export interface for c++ call, return subsampling_rate of the
            model
        """
        return self.encoder.embed.subsampling_rate

    @jit.to_static(property=True)
    def right_context(self) -> int:
        """ Export interface for c++ call, return right_context of the model
        """
        return self.encoder.embed.right_context

    @jit.to_static(property=True)
    def sos_symbol(self) -> int:
        """ Export interface for c++ call, return sos symbol id of the model
        """
        return self.sos

    @jit.to_static(property=True)
    def eos_symbol(self) -> int:
        """ Export interface for c++ call, return eos symbol id of the model
        """
        return self.eos

    @jit.to_static(property=True)
    def is_bidirectional_decoder(self) -> bool:
        """
        Returns:
            paddle.Tensor: decoder output
        """
        if hasattr(self.decoder, 'right_decoder'):
            return True
        else:
            return False

    # @jit.to_static
    def forward_encoder_chunk(
            self,
            xs: paddle.Tensor,
            offset: int,
            required_cache_size: int,
            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """ Export interface for c++ call, give input chunk xs, and return
            output from time 0 to current chunk.

        Args:
            xs (paddle.Tensor): chunk input, with shape (b=1, time, mel-dim),
                where `time == (chunk_size - 1) * subsample_rate + \
                        subsample.right_context + 1`
            offset (int): current offset in encoder output time stamp
            required_cache_size (int): cache size required for next chunk
                compuation
                >=0: actual cache size
                <0: means all history cache is required
            att_cache (paddle.Tensor): cache tensor for KEY & VALUE in
                transformer/conformer attention, with shape
                (elayers, head, cache_t1, d_k * 2), where
                `head * d_k == hidden-dim` and
                `cache_t1 == chunk_size * num_decoding_left_chunks`.
                `d_k * 2` for att key & value. 
            cnn_cache (paddle.Tensor): cache tensor for cnn_module in conformer,
                (elayers, b=1, hidden-dim, cache_t2), where
                `cache_t2 == cnn.lorder - 1`. 
                
        Returns:
            paddle.Tensor: output of current input xs,
                with shape (b=1, chunk_size, hidden-dim).
            paddle.Tensor: new attention cache required for next chunk, with
                dynamic shape (elayers, head, T(?), d_k * 2)
                depending on required_cache_size.
            paddle.Tensor: new conformer cnn cache required for next chunk, with
                same shape as the original cnn_cache.
        """
        return self.encoder.forward_chunk(xs, offset, required_cache_size,
                                          att_cache, cnn_cache)

    # @jit.to_static
    def ctc_activation(self, xs: paddle.Tensor) -> paddle.Tensor:
        """ Export interface for c++ call, apply linear transform and log
            softmax before ctc
        Args:
            xs (paddle.Tensor): encoder output, (B, T, D)
        Returns:
            paddle.Tensor: activation before ctc. (B, Tmax, odim)
        """
        return self.ctc.log_softmax(xs)

    # @jit.to_static
    def forward_attention_decoder(self,
                                  hyps: paddle.Tensor,
                                  hyps_lens: paddle.Tensor,
                                  encoder_out: paddle.Tensor,
                                  reverse_weight: float=0.0) -> paddle.Tensor:
        """ Export interface for c++ call, forward decoder with multiple
            hypothesis from ctc prefix beam search and one encoder output
        Args:
            hyps (paddle.Tensor): hyps from ctc prefix beam search, already
                pad sos at the beginning, (B, T)
            hyps_lens (paddle.Tensor): length of each hyp in hyps, (B)
            encoder_out (paddle.Tensor): corresponding encoder output, (B=1, T, D)
        Returns:
            paddle.Tensor: decoder output, (B, L)
        """
        assert encoder_out.shape[0] == 1
        num_hyps = hyps.shape[0]
        assert hyps_lens.shape[0] == num_hyps
        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
        # (B, 1, T)
        encoder_mask = paddle.ones(
            [num_hyps, 1, encoder_out.shape[1]], dtype=paddle.bool)

        # input for right to left decoder
        # this hyps_lens has count <sos> token, we need minus it.
        r_hyps_lens = hyps_lens - 1
        # this hyps has included <sos> token, so it should be
        # convert the original hyps.
        r_hyps = hyps[:, 1:]
        # (num_hyps, max_hyps_len, vocab_size)

        r_hyps = st_reverse_pad_list(r_hyps, r_hyps_lens, self.sos, self.eos)

        decoder_out, r_decoder_out, _ = self.decoder(
            encoder_out, encoder_mask, hyps, hyps_lens, r_hyps, reverse_weight)
        decoder_out = paddle.nn.functional.log_softmax(decoder_out, axis=-1)
        r_decoder_out = paddle.nn.functional.log_softmax(r_decoder_out, axis=-1)
        return decoder_out, r_decoder_out

    @paddle.no_grad()
    def decode(self,
               feats: paddle.Tensor,
               feats_lengths: paddle.Tensor,
               text_feature: Dict[str, int],
               decoding_method: str,
               beam_size: int,
               ctc_weight: float=0.0,
               decoding_chunk_size: int=-1,
               num_decoding_left_chunks: int=-1,
               simulate_streaming: bool=False,
               reverse_weight: float=0.0):
        """u2 decoding.

        Args:
            feats (Tensor): audio features, (B, T, D)
            feats_lengths (Tensor): (B)
            text_feature (TextFeaturizer): text feature object.
            decoding_method (str): decoding mode, e.g.
                    'attention', 'ctc_greedy_search',
                    'ctc_prefix_beam_search', 'attention_rescoring'
            beam_size (int): beam size for search
            ctc_weight (float, optional): ctc weight for attention rescoring decode mode. Defaults to 0.0.
            decoding_chunk_size (int, optional): decoding chunk size. Defaults to -1.
                    <0: for decoding, use full chunk.
                    >0: for decoding, use fixed chunk size as set.
                    0: used for training, it's prohibited here.
            num_decoding_left_chunks (int, optional):
                    number of left chunks for decoding. Defaults to -1.
            simulate_streaming (bool, optional): simulate streaming inference. Defaults to False.
            reverse_weight (float, optional): reverse decoder weight, used by `attention_rescoring`.

        Raises:
            ValueError: when not support decoding_method.

        Returns:
            List[List[int]]: transcripts.
        """
        batch_size = feats.shape[0]
        if decoding_method in ['ctc_prefix_beam_search',
                               'attention_rescoring'] and batch_size > 1:
            logger.error(
                f'decoding mode {decoding_method} must be running with batch_size == 1'
            )
            logger.error(f"current batch_size is {batch_size}")
            sys.exit(1)
        if decoding_method == 'attention':
            hyps = self.recognize(
                feats,
                feats_lengths,
                beam_size=beam_size,
                decoding_chunk_size=decoding_chunk_size,
                num_decoding_left_chunks=num_decoding_left_chunks,
                simulate_streaming=simulate_streaming)
            hyps = [hyp.tolist() for hyp in hyps]
        elif decoding_method == 'ctc_greedy_search':
            hyps = self.ctc_greedy_search(
                feats,
                feats_lengths,
                decoding_chunk_size=decoding_chunk_size,
                num_decoding_left_chunks=num_decoding_left_chunks,
                simulate_streaming=simulate_streaming)
        # ctc_prefix_beam_search and attention_rescoring only return one
        # result in List[int], change it to List[List[int]] for compatible
        # with other batch decoding mode
        elif decoding_method == 'ctc_prefix_beam_search':
            assert feats.shape[0] == 1
            hyp = self.ctc_prefix_beam_search(
                feats,
                feats_lengths,
                beam_size,
                decoding_chunk_size=decoding_chunk_size,
                num_decoding_left_chunks=num_decoding_left_chunks,
                simulate_streaming=simulate_streaming)
            hyps = [hyp]
        elif decoding_method == 'attention_rescoring':
            assert feats.shape[0] == 1
            hyp = self.attention_rescoring(
                feats,
                feats_lengths,
                beam_size,
                decoding_chunk_size=decoding_chunk_size,
                num_decoding_left_chunks=num_decoding_left_chunks,
                ctc_weight=ctc_weight,
                simulate_streaming=simulate_streaming,
                reverse_weight=reverse_weight)
            hyps = [hyp]
        else:
            raise ValueError(f"Not support decoding method: {decoding_method}")

        res = [text_feature.defeaturize(hyp) for hyp in hyps]
        res_tokenids = [hyp for hyp in hyps]
        return res, res_tokenids


class U2DecodeModel(U2BaseModel):
    def scorers(self):
        """Scorers."""
        return dict(
            decoder=self.decoder, ctc=CTCPrefixScorer(self.ctc, self.eos))

    def encode(self, x):
        """Encode acoustic features.

        :param ndarray x: source acoustic feature (T, D)
        :return: encoder outputs
        :rtype: paddle.Tensor
        """
        self.eval()
        x = paddle.to_tensor(x).unsqueeze(0)
        ilen = paddle.shape(x)[1]
        enc_output, _ = self._forward_encoder(x, ilen)
        return enc_output.squeeze(0)


class U2Model(U2DecodeModel):
    def __init__(self, configs: dict):
        model_conf = configs.get('model_conf', dict())
        init_type = model_conf.get("init_type", None)
        with DefaultInitializerContext(init_type):
            vocab_size, encoder, decoder, ctc = U2Model._init_from_config(
                configs)
        super().__init__(
            vocab_size=vocab_size,
            encoder=encoder,
            decoder=decoder,
            ctc=ctc,
            **model_conf)

    @classmethod
    def _init_from_config(cls, configs: dict):
        """init sub module for model.

        Args:
            configs (dict): config dict.

        Raises:
            ValueError: raise when using not support encoder type.

        Returns:
            int, nn.Layer, nn.Layer, nn.Layer: vocab size, encoder, decoder, ctc
        """
        # cmvn
        if 'cmvn_file' in configs and configs['cmvn_file']:
            mean, istd = load_cmvn(configs['cmvn_file'],
                                   configs['cmvn_file_type'])
            global_cmvn = GlobalCMVN(
                paddle.to_tensor(mean, dtype=paddle.float),
                paddle.to_tensor(istd, dtype=paddle.float))
        else:
            global_cmvn = None

        # input & output dim
        input_dim = configs['input_dim']
        vocab_size = configs['output_dim']
        assert input_dim != 0, input_dim
        assert vocab_size != 0, vocab_size

        # encoder
        encoder_type = configs.get('encoder', 'transformer')
        logger.debug(f"U2 Encoder type: {encoder_type}")
        if encoder_type == 'transformer':
            encoder = TransformerEncoder(
                input_dim, global_cmvn=global_cmvn, **configs['encoder_conf'])
        elif encoder_type == 'conformer':
            encoder = ConformerEncoder(
                input_dim, global_cmvn=global_cmvn, **configs['encoder_conf'])
        elif encoder_type == 'squeezeformer':
            encoder = SqueezeformerEncoder(
                input_dim, global_cmvn=global_cmvn, **configs['encoder_conf'])
        else:
            raise ValueError(f"not support encoder type:{encoder_type}")

        # decoder
        decoder_type = configs.get('decoder', 'transformer')
        logger.debug(f"U2 Decoder type: {decoder_type}")
        if decoder_type == 'transformer':
            configs['model_conf'].pop('reverse_weight', None)
            configs['decoder_conf'].pop('r_num_blocks', None)
            decoder = TransformerDecoder(vocab_size,
                                         encoder.output_size(),
                                         **configs['decoder_conf'])
        elif decoder_type == 'bitransformer':
            assert 0.0 < configs['model_conf']['reverse_weight'] < 1.0
            assert configs['decoder_conf']['r_num_blocks'] > 0
            decoder = BiTransformerDecoder(vocab_size,
                                           encoder.output_size(),
                                           **configs['decoder_conf'])
        else:
            raise ValueError(f"not support decoder type:{decoder_type}")
        # ctc decoder and ctc loss
        model_conf = configs.get('model_conf', dict())
        dropout_rate = model_conf.get('ctc_dropout_rate', 0.0)
        grad_norm_type = model_conf.get('ctc_grad_norm_type', None)
        ctc = CTCDecoderBase(
            odim=vocab_size,
            enc_n_units=encoder.output_size(),
            blank_id=0,
            dropout_rate=dropout_rate,
            reduction=True,  # sum
            batch_average=True,  # sum / batch_size
            grad_norm_type=grad_norm_type)

        return vocab_size, encoder, decoder, ctc

    @classmethod
    def from_config(cls, configs: dict):
        """init model.

        Args:
            configs (dict): config dict.

        Raises:
            ValueError: raise when using not support encoder type.

        Returns:
            nn.Layer: U2Model
        """
        model = cls(configs)
        return model

    @classmethod
    def from_pretrained(cls, dataloader, config, checkpoint_path):
        """Build a DeepSpeech2Model model from a pretrained model.

        Args:
            dataloader (paddle.io.DataLoader): not used.
            config (yacs.config.CfgNode):  model configs
            checkpoint_path (Path or str): the path of pretrained model checkpoint, without extension name

        Returns:
            DeepSpeech2Model: The model built from pretrained result.
        """
        with UpdateConfig(config):
            config.input_dim = dataloader.feat_dim
            config.output_dim = dataloader.vocab_size

        model = cls.from_config(config)

        if checkpoint_path:
            infos = checkpoint.Checkpoint().load_parameters(
                model, checkpoint_path=checkpoint_path)
            logger.debug(f"checkpoint info: {infos}")
        layer_tools.summary(model)
        return model


class U2InferModel(U2Model):
    def __init__(self, configs: dict):
        super().__init__(configs)

        from paddlespeech.s2t.modules.fbank import KaldiFbank
        import yaml
        import json
        import numpy as np

        input_dim = configs['input_dim']
        process = configs['preprocess_config']
        with open(process, encoding="utf-8") as f:
            conf = yaml.safe_load(f)
            assert isinstance(conf, dict), type(self.conf)

        for idx, process in enumerate(conf['process']):
            assert isinstance(process, dict), type(process)
            opts = dict(process)
            process_type = opts.pop("type")

            if process_type == 'fbank_kaldi':
                opts.update({'n_mels': input_dim})
                opts['dither'] = 0.0
                self.fbank = KaldiFbank(**opts)
                logger.info(f"{self.__class__.__name__} export: {self.fbank}")
            if process_type == 'cmvn_json':
                # align with paddlespeech.audio.transform.cmvn:GlobalCMVN
                std_floor = 1.0e-20

                cmvn = opts['cmvn_path']
                if isinstance(cmvn, dict):
                    cmvn_stats = cmvn
                else:
                    with open(cmvn) as f:
                        cmvn_stats = json.load(f)
                count = cmvn_stats['frame_num']
                mean = np.array(cmvn_stats['mean_stat']) / count
                square_sums = np.array(cmvn_stats['var_stat'])
                var = square_sums / count - mean**2
                std = np.maximum(np.sqrt(var), std_floor)
                istd = 1.0 / std
                self.global_cmvn = GlobalCMVN(
                    paddle.to_tensor(mean, dtype=paddle.float),
                    paddle.to_tensor(istd, dtype=paddle.float))
                logger.info(
                    f"{self.__class__.__name__} export: {self.global_cmvn}")

    def forward(self,
                feats,
                feats_lengths,
                decoding_chunk_size=-1,
                num_decoding_left_chunks=-1,
                simulate_streaming=False):
        """export model function

        Args:
            feats (Tensor): [B, T, D]
            feats_lengths (Tensor): [B]

        Returns:
            List[List[int]]: best path result
        """
        # dummy code for dy2st
        # return self.ctc_greedy_search(
        #     feats,
        #     feats_lengths,
        #     decoding_chunk_size=decoding_chunk_size,
        #     num_decoding_left_chunks=num_decoding_left_chunks,
        #     simulate_streaming=simulate_streaming)
        return feats, feats_lengths

    def forward_feature(self, x):
        """feature pipeline.

        Args:
            x (paddle.Tensor): waveform (T,).

        Return:
            feat (paddle.Tensor): feature (T, D) 
        """
        x = paddle.cast(x, paddle.float32)
        feat = self.fbank(x)
        feat = self.global_cmvn(feat)
        return feat


================================================
FILE: paddlespeech/s2t/models/u2/updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from contextlib import nullcontext

import paddle
from paddle import distributed as dist

from paddlespeech.s2t.training.extensions.evaluator import StandardEvaluator
from paddlespeech.s2t.training.reporter import report
from paddlespeech.s2t.training.timer import Timer
from paddlespeech.s2t.training.updaters.standard_updater import StandardUpdater
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()


class U2Evaluator(StandardEvaluator):
    def __init__(self, model, dataloader):
        super().__init__(model, dataloader)
        self.msg = ""
        self.num_seen_utts = 0
        self.total_loss = 0.0

    def evaluate_core(self, batch):
        self.msg = "Valid: Rank: {}, ".format(dist.get_rank())
        losses_dict = {}

        loss, attention_loss, ctc_loss = self.model(*batch[1:])
        if paddle.isfinite(loss):
            num_utts = batch[1].shape[0]
            self.num_seen_utts += num_utts
            self.total_loss += float(loss) * num_utts

            losses_dict['loss'] = float(loss)
            if attention_loss:
                losses_dict['att_loss'] = float(attention_loss)
            if ctc_loss:
                losses_dict['ctc_loss'] = float(ctc_loss)

            for k, v in losses_dict.items():
                report("eval/" + k, v)

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        logger.info(self.msg)
        return self.total_loss, self.num_seen_utts


class U2Updater(StandardUpdater):
    def __init__(self,
                 model,
                 optimizer,
                 scheduler,
                 dataloader,
                 init_state=None,
                 accum_grad=1,
                 **kwargs):
        super().__init__(
            model, optimizer, scheduler, dataloader, init_state=init_state)
        self.accum_grad = accum_grad
        self.forward_count = 0
        self.msg = ""

    def update_core(self, batch):
        """One Step

        Args:
            batch (List[Object]): utts, xs, xlens, ys, ylens
        """
        losses_dict = {}
        self.msg = "Rank: {}, ".format(dist.get_rank())

        # forward
        batch_size = batch[1].shape[0]
        loss, attention_loss, ctc_loss = self.model(*batch[1:])
        # loss div by `batch_size * accum_grad`
        loss /= self.accum_grad

        # loss backward
        if (self.forward_count + 1) != self.accum_grad:
            # Disable gradient synchronizations across DDP processes.
            # Within this context, gradients will be accumulated on module
            # variables, which will later be synchronized.
            context = self.model.no_sync
        else:
            # Used for single gpu training and DDP gradient synchronization
            # processes.
            context = nullcontext

        with context():
            loss.backward()
            layer_tools.print_grads(self.model, print_func=None)

        # loss info
        losses_dict['loss'] = float(loss) * self.accum_grad
        if attention_loss:
            losses_dict['att_loss'] = float(attention_loss)
        if ctc_loss:
            losses_dict['ctc_loss'] = float(ctc_loss)
        # report loss
        for k, v in losses_dict.items():
            report("train/" + k, v)
        # loss msg
        self.msg += "batch size: {}, ".format(batch_size)
        self.msg += "accum: {}, ".format(self.accum_grad)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())

        # Truncate the graph
        loss.detach()

        # update parameters
        self.forward_count += 1
        if self.forward_count != self.accum_grad:
            return
        self.forward_count = 0

        self.optimizer.step()
        self.optimizer.clear_grad()
        self.scheduler.step()

    def update(self):
        # model is default in train mode

        # training for a step is implemented here
        with Timer("data time cost:{}"):
            batch = self.read_batch()
        with Timer("step time cost:{}"):
            self.update_core(batch)

        # #iterations with accum_grad > 1
        # Ref.: https://github.com/espnet/espnet/issues/777
        if self.forward_count == 0:
            self.state.iteration += 1
        if self.updates_per_epoch is not None:
            if self.state.iteration % self.updates_per_epoch == 0:
                self.state.epoch += 1


================================================
FILE: paddlespeech/s2t/models/u2_st/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .u2_st import U2STInferModel
from .u2_st import U2STModel


================================================
FILE: paddlespeech/s2t/models/u2_st/u2_st.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""U2 ASR Model
Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition
(https://arxiv.org/pdf/2012.05481.pdf)
"""
import time
from typing import Dict
from typing import Optional
from typing import Tuple

import paddle
from paddle import jit
from paddle import nn

from paddlespeech.audio.utils.tensor_utils import add_sos_eos
from paddlespeech.audio.utils.tensor_utils import th_accuracy
from paddlespeech.s2t.frontend.utility import IGNORE_ID
from paddlespeech.s2t.frontend.utility import load_cmvn
from paddlespeech.s2t.modules.cmvn import GlobalCMVN
from paddlespeech.s2t.modules.ctc import CTCDecoderBase
from paddlespeech.s2t.modules.decoder import TransformerDecoder
from paddlespeech.s2t.modules.encoder import ConformerEncoder
from paddlespeech.s2t.modules.encoder import TransformerEncoder
from paddlespeech.s2t.modules.loss import LabelSmoothingLoss
from paddlespeech.s2t.modules.mask import subsequent_mask
from paddlespeech.s2t.utils import checkpoint
from paddlespeech.s2t.utils import layer_tools
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import UpdateConfig

__all__ = ["U2STModel", "U2STInferModel"]

logger = Log(__name__).getlog()


class U2STBaseModel(nn.Layer):
    """CTC-Attention hybrid Encoder-Decoder model"""

    def __init__(self,
                 vocab_size: int,
                 encoder: TransformerEncoder,
                 st_decoder: TransformerDecoder,
                 decoder: TransformerDecoder=None,
                 ctc: CTCDecoderBase=None,
                 ctc_weight: float=0.0,
                 asr_weight: float=0.0,
                 ignore_id: int=IGNORE_ID,
                 lsm_weight: float=0.0,
                 length_normalized_loss: bool=False,
                 **kwargs):
        assert 0.0 <= ctc_weight <= 1.0, ctc_weight

        super().__init__()
        # note that eos is the same as sos (equivalent ID)
        self.sos = vocab_size - 1
        self.eos = vocab_size - 1
        self.vocab_size = vocab_size
        self.ignore_id = ignore_id
        self.ctc_weight = ctc_weight
        self.asr_weight = asr_weight

        self.encoder = encoder
        self.st_decoder = st_decoder
        self.decoder = decoder
        self.ctc = ctc
        self.criterion_att = LabelSmoothingLoss(
            size=vocab_size,
            padding_idx=ignore_id,
            smoothing=lsm_weight,
            normalize_length=length_normalized_loss, )

    def forward(
            self,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            asr_text: paddle.Tensor=None,
            asr_text_lengths: paddle.Tensor=None,
    ) -> Tuple[Optional[paddle.Tensor], Optional[paddle.Tensor], Optional[
            paddle.Tensor]]:
        """Frontend + Encoder + Decoder + Calc loss
        Args:
            speech: (Batch, Length, ...)
            speech_lengths: (Batch, )
            text: (Batch, Length)
            text_lengths: (Batch,)
        Returns:
            total_loss, attention_loss, ctc_loss
        """
        assert text_lengths.dim() == 1, text_lengths.shape
        # Check that batch_size is unified
        assert (speech.shape[0] == speech_lengths.shape[0] == text.shape[0] ==
                text_lengths.shape[0]), (speech.shape, speech_lengths.shape,
                                         text.shape, text_lengths.shape)
        # 1. Encoder
        start = time.time()
        encoder_out, encoder_mask = self.encoder(speech, speech_lengths)
        encoder_time = time.time() - start
        #logger.debug(f"encoder time: {encoder_time}")
        encoder_out_lens = encoder_mask.squeeze(1).sum(1)  #[B, 1, T] -> [B]

        # 2a. ST-decoder branch
        start = time.time()
        loss_st, acc_st = self._calc_st_loss(encoder_out, encoder_mask, text,
                                             text_lengths)
        decoder_time = time.time() - start

        loss_asr_att = None
        loss_asr_ctc = None
        # 2b. ASR Attention-decoder branch
        if self.asr_weight > 0.:
            if self.ctc_weight != 1.0:
                start = time.time()
                loss_asr_att, acc_att = self._calc_att_loss(
                    encoder_out, encoder_mask, asr_text, asr_text_lengths)
                decoder_time = time.time() - start

            # 2c. CTC branch
            if self.ctc_weight != 0.0:
                start = time.time()
                loss_asr_ctc = self.ctc(encoder_out, encoder_out_lens, asr_text,
                                        asr_text_lengths)
                ctc_time = time.time() - start

            if loss_asr_ctc is None:
                loss_asr = loss_asr_att
            elif loss_asr_att is None:
                loss_asr = loss_asr_ctc
            else:
                loss_asr = self.ctc_weight * loss_asr_ctc + (1 - self.ctc_weight
                                                             ) * loss_asr_att
            loss = self.asr_weight * loss_asr + (1 - self.asr_weight) * loss_st
        else:
            loss = loss_st
        return loss, loss_st, loss_asr_att, loss_asr_ctc

    def _calc_st_loss(
            self,
            encoder_out: paddle.Tensor,
            encoder_mask: paddle.Tensor,
            ys_pad: paddle.Tensor,
            ys_pad_lens: paddle.Tensor, ) -> Tuple[paddle.Tensor, float]:
        """Calc attention loss.

        Args:
            encoder_out (paddle.Tensor): [B, Tmax, D]
            encoder_mask (paddle.Tensor): [B, 1, Tmax]
            ys_pad (paddle.Tensor): [B, Umax]
            ys_pad_lens (paddle.Tensor): [B]

        Returns:
            Tuple[paddle.Tensor, float]: attention_loss, accuracy rate
        """
        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
                                            self.ignore_id)
        ys_in_lens = ys_pad_lens + 1

        # 1. Forward decoder
        decoder_out, *_ = self.st_decoder(encoder_out, encoder_mask, ys_in_pad,
                                          ys_in_lens)

        # 2. Compute attention loss
        loss_att = self.criterion_att(decoder_out, ys_out_pad)
        acc_att = th_accuracy(
            decoder_out.reshape([-1, self.vocab_size]),
            ys_out_pad,
            ignore_label=self.ignore_id, )
        return loss_att, acc_att

    def _calc_att_loss(
            self,
            encoder_out: paddle.Tensor,
            encoder_mask: paddle.Tensor,
            ys_pad: paddle.Tensor,
            ys_pad_lens: paddle.Tensor, ) -> Tuple[paddle.Tensor, float]:
        """Calc attention loss.

        Args:
            encoder_out (paddle.Tensor): [B, Tmax, D]
            encoder_mask (paddle.Tensor): [B, 1, Tmax]
            ys_pad (paddle.Tensor): [B, Umax]
            ys_pad_lens (paddle.Tensor): [B]

        Returns:
            Tuple[paddle.Tensor, float]: attention_loss, accuracy rate
        """
        ys_in_pad, ys_out_pad = add_sos_eos(ys_pad, self.sos, self.eos,
                                            self.ignore_id)
        ys_in_lens = ys_pad_lens + 1

        # 1. Forward decoder
        decoder_out, *_ = self.decoder(encoder_out, encoder_mask, ys_in_pad,
                                       ys_in_lens)

        # 2. Compute attention loss
        loss_att = self.criterion_att(decoder_out, ys_out_pad)
        acc_att = th_accuracy(
            decoder_out.reshape([-1, self.vocab_size]),
            ys_out_pad,
            ignore_label=self.ignore_id, )
        return loss_att, acc_att

    def _forward_encoder(
            self,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            decoding_chunk_size: int=-1,
            num_decoding_left_chunks: int=-1,
            simulate_streaming: bool=False,
    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Encoder pass.

        Args:
            speech (paddle.Tensor): [B, Tmax, D]
            speech_lengths (paddle.Tensor): [B]
            decoding_chunk_size (int, optional): chuck size. Defaults to -1.
            num_decoding_left_chunks (int, optional): nums chunks. Defaults to -1.
            simulate_streaming (bool, optional): streaming or not. Defaults to False.

        Returns:
            Tuple[paddle.Tensor, paddle.Tensor]:
                encoder hiddens (B, Tmax, D),
                encoder hiddens mask (B, 1, Tmax).
        """
        # Let's assume B = batch_size
        # 1. Encoder
        if simulate_streaming and decoding_chunk_size > 0:
            encoder_out, encoder_mask = self.encoder.forward_chunk_by_chunk(
                speech,
                decoding_chunk_size=decoding_chunk_size,
                num_decoding_left_chunks=num_decoding_left_chunks
            )  # (B, maxlen, encoder_dim)
        else:
            encoder_out, encoder_mask = self.encoder(
                speech,
                speech_lengths,
                decoding_chunk_size=decoding_chunk_size,
                num_decoding_left_chunks=num_decoding_left_chunks
            )  # (B, maxlen, encoder_dim)
        return encoder_out, encoder_mask

    def translate(
            self,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            beam_size: int=10,
            word_reward: float=0.0,
            maxlenratio: float=0.5,
            decoding_chunk_size: int=-1,
            num_decoding_left_chunks: int=-1,
            simulate_streaming: bool=False, ) -> paddle.Tensor:
        """ Apply beam search on attention decoder with length penalty
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            word_reward (float): word reward used in beam search
            maxlenratio (float): max length ratio to bound the length of translated text
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            paddle.Tensor: decoding result, (batch, max_result_len)
        """
        assert speech.shape[0] == speech_lengths.shape[0]
        assert decoding_chunk_size != 0
        assert speech.shape[0] == 1
        device = speech.place

        # Let's assume B = batch_size and N = beam_size
        # 1. Encoder and init hypothesis
        encoder_out, encoder_mask = self._forward_encoder(
            speech, speech_lengths, decoding_chunk_size,
            num_decoding_left_chunks,
            simulate_streaming)  # (B, maxlen, encoder_dim)

        maxlen = max(int(encoder_out.shape[1] * maxlenratio), 5)

        hyp = {"score": 0.0, "yseq": [self.sos], "cache": None}
        hyps = [hyp]
        ended_hyps = []
        cur_best_score = -float("inf")
        cache = None

        # 2. Decoder forward step by step
        for i in range(1, maxlen + 1):
            ys = paddle.ones((len(hyps), i), dtype=paddle.long)

            if hyps[0]["cache"] is not None:
                cache = [
                    paddle.ones(
                        (len(hyps), i - 1, hyp_cache.shape[-1]),
                        dtype=paddle.float32) for hyp_cache in hyps[0]["cache"]
                ]
            for j, hyp in enumerate(hyps):
                ys[j, :] = paddle.to_tensor(hyp["yseq"])
                if hyps[0]["cache"] is not None:
                    for k in range(len(cache)):
                        cache[k][j] = hyps[j]["cache"][k]
            ys_mask = subsequent_mask(i).unsqueeze(0).to(device)

            logp, cache = self.st_decoder.forward_one_step(
                encoder_out.repeat(len(hyps), 1, 1),
                encoder_mask.repeat(len(hyps), 1, 1), ys, ys_mask, cache)

            hyps_best_kept = []
            for j, hyp in enumerate(hyps):
                top_k_logp, top_k_index = logp[j:j + 1].topk(beam_size)

                for b in range(beam_size):
                    new_hyp = {}
                    new_hyp["score"] = hyp["score"] + float(top_k_logp[0, b])
                    new_hyp["yseq"] = [0] * (1 + len(hyp["yseq"]))
                    new_hyp["yseq"][:len(hyp["yseq"])] = hyp["yseq"]
                    new_hyp["yseq"][len(hyp["yseq"])] = int(top_k_index[0, b])
                    new_hyp["cache"] = [cache_[j] for cache_ in cache]
                    # will be (2 x beam) hyps at most
                    hyps_best_kept.append(new_hyp)

                hyps_best_kept = sorted(
                    hyps_best_kept, key=lambda x: -x["score"])[:beam_size]

            # sort and get nbest
            hyps = hyps_best_kept
            if i == maxlen:
                for hyp in hyps:
                    hyp["yseq"].append(self.eos)

            # finalize the ended hypotheses with word reward (by length)
            remained_hyps = []
            for hyp in hyps:
                if hyp["yseq"][-1] == self.eos:
                    hyp["score"] += (i - 1) * word_reward
                    cur_best_score = max(cur_best_score, hyp["score"])
                    ended_hyps.append(hyp)
                else:
                    # stop while guarantee the optimality
                    if hyp["score"] + maxlen * word_reward > cur_best_score:
                        remained_hyps.append(hyp)

            # stop predition when there is no unended hypothesis
            if not remained_hyps:
                break
            hyps = remained_hyps

        # 3. Select best of best
        best_hyp = max(ended_hyps, key=lambda x: x["score"])

        return paddle.to_tensor([best_hyp["yseq"][1:]])

    # @jit.to_static
    def subsampling_rate(self) -> int:
        """ Export interface for c++ call, return subsampling_rate of the
            model
        """
        return self.encoder.embed.subsampling_rate

    # @jit.to_static
    def right_context(self) -> int:
        """ Export interface for c++ call, return right_context of the model
        """
        return self.encoder.embed.right_context

    # @jit.to_static
    def sos_symbol(self) -> int:
        """ Export interface for c++ call, return sos symbol id of the model
        """
        return self.sos

    # @jit.to_static
    def eos_symbol(self) -> int:
        """ Export interface for c++ call, return eos symbol id of the model
        """
        return self.eos

    @jit.to_static
    def forward_encoder_chunk(
            self,
            xs: paddle.Tensor,
            offset: int,
            required_cache_size: int,
            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """ Export interface for c++ call, give input chunk xs, and return
            output from time 0 to current chunk.

        Args:
            xs (paddle.Tensor): chunk input, with shape (b=1, time, mel-dim),
                where `time == (chunk_size - 1) * subsample_rate + \
                        subsample.right_context + 1`
            offset (int): current offset in encoder output time stamp
            required_cache_size (int): cache size required for next chunk
                compuation
                >=0: actual cache size
                <0: means all history cache is required
            att_cache (paddle.Tensor): cache tensor for KEY & VALUE in
                transformer/conformer attention, with shape
                (elayers, head, cache_t1, d_k * 2), where
                `head * d_k == hidden-dim` and
                `cache_t1 == chunk_size * num_decoding_left_chunks`.
                `d_k * 2` for att key & value.
            cnn_cache (paddle.Tensor): cache tensor for cnn_module in conformer,
                (elayers, b=1, hidden-dim, cache_t2), where
                `cache_t2 == cnn.lorder - 1`

        Returns:
            paddle.Tensor: output of current input xs,
                with shape (b=1, chunk_size, hidden-dim).
            paddle.Tensor: new attention cache required for next chunk, with
                dynamic shape (elayers, head, T(?), d_k * 2)
                depending on required_cache_size.
            paddle.Tensor: new conformer cnn cache required for next chunk, with
                same shape as the original cnn_cache.
        """
        return self.encoder.forward_chunk(xs, offset, required_cache_size,
                                          att_cache, cnn_cache)

    # @jit.to_static
    def ctc_activation(self, xs: paddle.Tensor) -> paddle.Tensor:
        """ Export interface for c++ call, apply linear transform and log
            softmax before ctc
        Args:
            xs (paddle.Tensor): encoder output
        Returns:
            paddle.Tensor: activation before ctc
        """
        return self.ctc.log_softmax(xs)

    @jit.to_static
    def forward_attention_decoder(
            self,
            hyps: paddle.Tensor,
            hyps_lens: paddle.Tensor,
            encoder_out: paddle.Tensor, ) -> paddle.Tensor:
        """ Export interface for c++ call, forward decoder with multiple
            hypothesis from ctc prefix beam search and one encoder output
        Args:
            hyps (paddle.Tensor): hyps from ctc prefix beam search, already
                pad sos at the beginning, (B, T)
            hyps_lens (paddle.Tensor): length of each hyp in hyps, (B)
            encoder_out (paddle.Tensor): corresponding encoder output, (B=1, T, D)
        Returns:
            paddle.Tensor: decoder output, (B, L)
        """
        assert encoder_out.shape[0] == 1
        num_hyps = hyps.shape[0]
        assert hyps_lens.shape[0] == num_hyps
        encoder_out = encoder_out.repeat(num_hyps, 1, 1)
        # (B, 1, T)
        encoder_mask = paddle.ones(
            [num_hyps, 1, encoder_out.shape[1]], dtype=paddle.bool)
        # (num_hyps, max_hyps_len, vocab_size)
        decoder_out, _ = self.decoder(encoder_out, encoder_mask, hyps,
                                      hyps_lens)
        decoder_out = paddle.nn.functional.log_softmax(decoder_out, dim=-1)
        return decoder_out

    @paddle.no_grad()
    def decode(self,
               feats: paddle.Tensor,
               feats_lengths: paddle.Tensor,
               text_feature: Dict[str, int],
               decoding_method: str,
               beam_size: int,
               word_reward: float=0.0,
               maxlenratio: float=0.5,
               decoding_chunk_size: int=-1,
               num_decoding_left_chunks: int=-1,
               simulate_streaming: bool=False):
        """u2 decoding.

        Args:
            feats (Tensor): audio features, (B, T, D)
            feats_lengths (Tensor): (B)
            text_feature (TextFeaturizer): text feature object.
            decoding_method (str): decoding mode, e.g.
                    'fullsentence',
                    'simultaneous'
            beam_size (int): beam size for search
            decoding_chunk_size (int, optional): decoding chunk size. Defaults to -1.
                    <0: for decoding, use full chunk.
                    >0: for decoding, use fixed chunk size as set.
                    0: used for training, it's prohibited here.
            num_decoding_left_chunks (int, optional):
                    number of left chunks for decoding. Defaults to -1.
            simulate_streaming (bool, optional): simulate streaming inference. Defaults to False.

        Raises:
            ValueError: when not support decoding_method.

        Returns:
            List[List[int]]: transcripts.
        """
        batch_size = feats.shape[0]

        if decoding_method == 'fullsentence':
            hyps = self.translate(
                feats,
                feats_lengths,
                beam_size=beam_size,
                word_reward=word_reward,
                maxlenratio=maxlenratio,
                decoding_chunk_size=decoding_chunk_size,
                num_decoding_left_chunks=num_decoding_left_chunks,
                simulate_streaming=simulate_streaming)
            hyps = [hyp.tolist() for hyp in hyps]
        else:
            raise ValueError(f"Not support decoding method: {decoding_method}")

        res = [text_feature.defeaturize(hyp) for hyp in hyps]
        return res


class U2STModel(U2STBaseModel):
    def __init__(self, configs: dict):
        vocab_size, encoder, decoder = U2STModel._init_from_config(configs)

        if isinstance(decoder, Tuple):
            st_decoder, asr_decoder, ctc = decoder
            super().__init__(
                vocab_size=vocab_size,
                encoder=encoder,
                st_decoder=st_decoder,
                decoder=asr_decoder,
                ctc=ctc,
                **configs['model_conf'])
        else:
            super().__init__(
                vocab_size=vocab_size,
                encoder=encoder,
                st_decoder=decoder,
                **configs['model_conf'])

    @classmethod
    def _init_from_config(cls, configs: dict):
        """init sub module for model.

        Args:
            configs (dict): config dict.

        Raises:
            ValueError: raise when using not support encoder type.

        Returns:
            int, nn.Layer, nn.Layer, nn.Layer: vocab size, encoder, decoder, ctc
        """
        if configs['cmvn_file'] is not None:
            mean, istd = load_cmvn(configs['cmvn_file'],
                                   configs['cmvn_file_type'])
            global_cmvn = GlobalCMVN(
                paddle.to_tensor(mean, dtype=paddle.float),
                paddle.to_tensor(istd, dtype=paddle.float))
        else:
            global_cmvn = None

        input_dim = configs['input_dim']
        vocab_size = configs['output_dim']
        assert input_dim != 0, input_dim
        assert vocab_size != 0, vocab_size

        encoder_type = configs.get('encoder', 'transformer')
        logger.info(f"U2 Encoder type: {encoder_type}")
        if encoder_type == 'transformer':
            encoder = TransformerEncoder(
                input_dim, global_cmvn=global_cmvn, **configs['encoder_conf'])
        elif encoder_type == 'conformer':
            encoder = ConformerEncoder(
                input_dim, global_cmvn=global_cmvn, **configs['encoder_conf'])
        else:
            raise ValueError(f"not support encoder type:{encoder_type}")

        st_decoder = TransformerDecoder(vocab_size,
                                        encoder.output_size(),
                                        **configs['decoder_conf'])

        asr_weight = configs['model_conf']['asr_weight']
        logger.info(f"ASR Joint Training Weight: {asr_weight}")

        if asr_weight > 0.:
            decoder = TransformerDecoder(vocab_size,
                                         encoder.output_size(),
                                         **configs['decoder_conf'])
            # ctc decoder and ctc loss
            model_conf = configs['model_conf']
            dropout_rate = model_conf.get('ctc_dropout_rate', 0.0)
            grad_norm_type = model_conf.get('ctc_grad_norm_type', None)
            ctc = CTCDecoderBase(
                odim=vocab_size,
                enc_n_units=encoder.output_size(),
                blank_id=0,
                dropout_rate=dropout_rate,
                reduction=True,  # sum
                batch_average=True,  # sum / batch_size
                grad_norm_type=grad_norm_type)

            return vocab_size, encoder, (st_decoder, decoder, ctc)
        else:
            return vocab_size, encoder, st_decoder

    @classmethod
    def from_config(cls, configs: dict):
        """init model.

        Args:
            configs (dict): config dict.

        Raises:
            ValueError: raise when using not support encoder type.

        Returns:
            nn.Layer: U2STModel
        """
        model = cls(configs)
        return model

    @classmethod
    def from_pretrained(cls, dataloader, config, checkpoint_path):
        """Build a DeepSpeech2Model model from a pretrained model.

        Args:
            dataloader (paddle.io.DataLoader): not used.
            config (yacs.config.CfgNode):  model configs
            checkpoint_path (Path or str): the path of pretrained model checkpoint, without extension name

        Returns:
            DeepSpeech2Model: The model built from pretrained result.
        """
        with UpdateConfig(config):
            config.input_dim = dataloader.collate_fn.feature_size
            config.output_dim = dataloader.collate_fn.vocab_size

        model = cls.from_config(config)

        if checkpoint_path:
            infos = checkpoint.load_parameters(
                model, checkpoint_path=checkpoint_path)
            logger.info(f"checkpoint info: {infos}")
        layer_tools.summary(model)
        return model


class U2STInferModel(U2STModel):
    def __init__(self, configs: dict):
        super().__init__(configs)

    def forward(self,
                feats,
                feats_lengths,
                decoding_chunk_size=-1,
                num_decoding_left_chunks=-1,
                simulate_streaming=False):
        """export model function

        Args:
            feats (Tensor): [B, T, D]
            feats_lengths (Tensor): [B]

        Returns:
            List[List[int]]: best path result
        """
        return self.translate(
            feats,
            feats_lengths,
            decoding_chunk_size=decoding_chunk_size,
            num_decoding_left_chunks=num_decoding_left_chunks,
            simulate_streaming=simulate_streaming)


================================================
FILE: paddlespeech/s2t/models/wav2vec2/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .wav2vec2_ASR import Wav2vec2ASR
from .wav2vec2_ASR import Wav2vec2Base

__all__ = ["Wav2vec2ASR", "Wav2vec2Base"]


================================================
FILE: paddlespeech/s2t/models/wav2vec2/modules/VanillaNN.py
================================================
# Authors
# * Elena Rastorgueva 2020
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from speechbrain(https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/lobes/models/VanillaNN.py).
import paddle

from paddlespeech.s2t.models.wav2vec2.modules import containers
from paddlespeech.s2t.models.wav2vec2.modules import linear
from paddlespeech.s2t.models.wav2vec2.modules.normalization import BatchNorm1d


class VanillaNN(containers.Sequential):
    """A simple vanilla Deep Neural Network.
    Arguments
    ---------
    activation : paddle class
        A class used for constructing the activation layers.
    dnn_blocks : int
        The number of linear neural blocks to include.
    dnn_neurons : int
        The number of neurons in the linear layers.
    Example
    -------
    >>> inputs = paddle.rand([10, 120, 60])
    >>> model = VanillaNN(input_shape=inputs.shape)
    >>> outputs = model(inputs)
    >>> outputs.shape
    paddle.shape([10, 120, 512])
    """

    def __init__(self,
                 input_shape,
                 dnn_blocks=2,
                 dnn_neurons=512,
                 activation=True,
                 normalization=False,
                 dropout_rate=0.5):
        super().__init__(input_shape=[None, None, input_shape])

        if not isinstance(dropout_rate, list):
            dropout_rate = [dropout_rate] * dnn_blocks
        else:
            assert len(
                dropout_rate
            ) == dnn_blocks, "len(dropout_rate) must equal to dnn_blocks"

        for block_index in range(dnn_blocks):
            self.append(
                linear.Linear,
                n_neurons=dnn_neurons,
                bias_attr=None,
                layer_name="linear", )
            if normalization:
                self.append(
                    BatchNorm1d, input_size=dnn_neurons, layer_name='bn')
            if activation:
                self.append(paddle.nn.LeakyReLU(), layer_name="act")
            self.append(
                paddle.nn.Dropout(p=dropout_rate[block_index]),
                layer_name='dropout')


================================================
FILE: paddlespeech/s2t/models/wav2vec2/modules/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/models/wav2vec2/modules/activations.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math

from paddle import nn
from paddle import Tensor

from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()


class NewGELUActivation(nn.Layer):
    """
    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
    the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    """

    def forward(self, input: Tensor) -> Tensor:
        return 0.5 * input * (1.0 + paddle.tanh(
            math.sqrt(2.0 / math.pi) *
            (input + 0.044715 * paddle.pow(input, 3.0))))


class GELUActivation(nn.Layer):
    """
    Original Implementation of the GELU activation function in Google BERT repo when initially created. For
    information: OpenAI GPT's GELU is slightly different (and gives slightly different results): 0.5 * x * (1 +
    paddle.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * paddle.pow(x, 3)))) This is now written in C in nn.functional
    Also see the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    """

    def __init__(self, use_gelu_python: bool=False):
        super().__init__()
        self.act = nn.functional.gelu

    def _gelu_python(self, input: Tensor) -> Tensor:
        return input * 0.5 * (1.0 + paddle.erf(input / math.sqrt(2.0)))

    def forward(self, input: Tensor) -> Tensor:
        return self.act(input)


class FastGELUActivation(nn.Layer):
    """
    Applies GELU approximation that is slower than QuickGELU but more accurate. See: https://github.com/hendrycks/GELUs
    """

    def forward(self, input: Tensor) -> Tensor:
        return 0.5 * input * (
            1.0 + paddle.tanh(input * 0.7978845608 *
                              (1.0 + 0.044715 * input * input)))


class QuickGELUActivation(nn.Layer):
    """
    Applies GELU approximation that is fast but somewhat inaccurate. See: https://github.com/hendrycks/GELUs
    """

    def forward(self, input: Tensor) -> Tensor:
        return input * paddle.sigmoid(1.702 * input)


class ClippedGELUActivation(nn.Layer):
    """
    Clip the range of possible GeLU outputs between [min, max]. This is especially useful for quantization purpose, as
    it allows mapping negatives values in the GeLU spectrum. For more information on this trick, please refer to
    https://arxiv.org/abs/2004.09602.

    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
    initially created.

    For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 +
    paddle.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * paddle.pow(x, 3)))). See https://arxiv.org/abs/1606.08415
    """

    def __init__(self, min: float, max: float):
        if min > max:
            raise ValueError(
                f"min should be < max (got min: {min}, max: {max})")

        super().__init__()
        self.min = min
        self.max = max

    def forward(self, x: Tensor) -> Tensor:
        return paddle.clip(gelu(x), self.min, self.max)


class SiLUActivation(nn.Layer):
    """
    See Gaussian Error Linear Units (Hendrycks et al., https://arxiv.org/abs/1606.08415) where the SiLU (Sigmoid Linear
    Unit) was originally introduced and coined, and see Sigmoid-Weighted Linear Units for Neural Network Function
    Approximation in Reinforcement Learning (Elfwing et al., https://arxiv.org/abs/1702.03118) and Swish: a Self-Gated
    Activation Function (Ramachandran et al., https://arxiv.org/abs/1710.05941v1) where the SiLU was experimented with
    later.
    """

    def __init__(self):
        super().__init__()
        self.act = nn.functional.silu

    def _silu_python(self, input: Tensor) -> Tensor:
        return input * paddle.sigmoid(input)

    def forward(self, input: Tensor) -> Tensor:
        return self.act(input)


class MishActivation(nn.Layer):
    """
    See Mish: A Self-Regularized Non-Monotonic Activation Function (Misra., https://arxiv.org/abs/1908.08681). Also
    visit the official repository for the paper: https://github.com/digantamisra98/Mish
    """

    def __init__(self):
        super().__init__()
        self.act = nn.functional.mish

    def _mish_python(self, input: Tensor) -> Tensor:
        return input * paddle.tanh(nn.functional.softplus(input))

    def forward(self, input: Tensor) -> Tensor:
        return self.act(input)


class LinearActivation(nn.Layer):
    """
    Applies the linear activation function, i.e. forwarding input directly to output.
    """

    def forward(self, input: Tensor) -> Tensor:
        return input


ACT2FN = {
    "gelu": GELUActivation(),
    "gelu_10": ClippedGELUActivation(-10, 10),
    "gelu_fast": FastGELUActivation(),
    "gelu_new": NewGELUActivation(),
    "gelu_python": GELUActivation(use_gelu_python=True),
    "linear": LinearActivation(),
    "mish": MishActivation(),
    "quick_gelu": QuickGELUActivation(),
    "relu": nn.ReLU(),
    "sigmoid": nn.Sigmoid(),
    "silu": SiLUActivation(),
    "swish": SiLUActivation(),
    "tanh": nn.Tanh(),
}


def get_activation(activation_string):
    if activation_string in ACT2FN:
        return ACT2FN[activation_string]
    else:
        raise KeyError(
            f"function {activation_string} not found in ACT2FN mapping {list(ACT2FN.keys())}"
        )


# For backwards compatibility with: from activations import gelu_python
gelu_python = get_activation("gelu_python")
gelu_new = get_activation("gelu_new")
gelu = get_activation("gelu")
gelu_fast = get_activation("gelu_fast")
quick_gelu = get_activation("quick_gelu")
silu = get_activation("silu")
mish = get_activation("mish")
linear_act = get_activation("linear")


================================================
FILE: paddlespeech/s2t/models/wav2vec2/modules/containers.py
================================================
# Authors
#  * Peter Plantinga 2020
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from speechbrain(https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/nnet/containers.py).
import inspect

import paddle


class Sequential(paddle.nn.LayerDict):
    """A sequence of modules with potentially inferring shape on construction.
    If layers are passed with names, these can be referenced with dot notation.
    Arguments
    ---------
    input_shape : iterable
        A list or tuple of ints or None, representing the expected shape of an
        input tensor. None represents a variable-length dimension. If no
        ``input_shape`` is passed, no shape inference will be performed.
    *layers, **named_layers
        The inputs are treated as a list of layers to be
        applied in sequence. The output shape of each layer is used to
        infer the shape of the following layer. If a tuple is returned,
        only the shape of the first element is used to determine input
        shape of the next layer (e.g. RNN returns output, hidden).
    Example
    -------
    >>> inputs = paddle.rand(10, 40, 50)
    >>> model = Sequential(input_shape=inputs.shape)
    >>> model.append(Linear, n_neurons=100, layer_name="layer1")
    >>> model.append(Linear, n_neurons=200, layer_name="layer2")
    >>> outputs = model(inputs)
    >>> outputs.shape
    paddle.shape([10, 40, 200])
    >>> outputs = model.layer1(inputs)
    >>> outputs.shape
    paddle.shape([10, 40, 100])
    """

    def __init__(self, *layers, input_shape=None, **named_layers):
        super().__init__()

        # Make sure either layers or input_shape is passed
        if not layers and input_shape is None and not named_layers:
            raise ValueError("Must pass either layers or input shape")

        # Keep track of what layers need "lengths" passed
        self.length_layers = []

        # Replace None dimensions with arbitrary value
        self.input_shape = input_shape
        if input_shape and None in input_shape:
            self.input_shape = list(input_shape)
            for i, dim in enumerate(self.input_shape):

                # To reduce size of dummy tensors, use 1 for batch dim
                if i == 0 and dim is None:
                    dim = 1

                # Use 64 as nice round arbitrary value, big enough that
                # halving this dimension a few times doesn't reach 1
                self.input_shape[i] = dim or 256

        # Append non-named layers
        for layer in layers:
            self.append(layer)

        # Append named layers
        for name, layer in named_layers.items():
            self.append(layer, layer_name=name)

    def append(self, layer, *args, layer_name=None, **kwargs):
        """Add a layer to the list of layers, inferring shape if necessary.
        Arguments
        ---------
        layer : A paddle.nn.Module class or object
            If the layer is a class, it should accept an argument called
            ``input_shape`` which will be inferred and passed. If the layer
            is a module object, it is added as-is.
        layer_name : str
            The name of the layer, for reference. If the name is in use,
            ``_{count}`` will be appended.
        *args, **kwargs
            These are passed to the layer if it is constructed.
        """

        # Compute layer_name
        if layer_name is None:
            layer_name = str(len(self))
        elif layer_name in self:
            index = 0
            while f"{layer_name}_{index}" in self:
                index += 1
            layer_name = f"{layer_name}_{index}"
        # Check if it needs to be constructed with input shape
        if self.input_shape:
            argspec = inspect.getfullargspec(layer)
            if "input_shape" in argspec.args + argspec.kwonlyargs:
                input_shape = self.get_output_shape()
                layer = layer(*args, input_shape=input_shape, **kwargs)

        # Finally, append the layer.
        try:
            self[layer_name] = layer
        # self.add_module(layer_name, layer)
        except TypeError:
            raise ValueError(
                "Must pass `input_shape` at initialization and use "
                "modules that take `input_shape` to infer shape when "
                "using `append()`.")

    def get_output_shape(self):
        """Returns expected shape of the output.
        Computed by passing dummy input constructed with the
        ``self.input_shape`` attribute.
        """
        with paddle.no_grad():
            dummy_input = paddle.zeros(self.input_shape)
            dummy_output = self(dummy_input)
        return dummy_output.shape

    def forward(self, x):
        """Applies layers in sequence, passing only the first element of tuples.
        Arguments
        ---------
        x : paddle.Tensor
            The input tensor to run through the network.
        """
        for layer in self.values():
            x = layer(x)
            if isinstance(x, tuple):
                x = x[0]
        return x


================================================
FILE: paddlespeech/s2t/models/wav2vec2/modules/linear.py
================================================
# Authors
#  * Mirco Ravanelli 2020
#  * Davide Borra 2021
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from speechbrain(https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/nnet/linear.py).
import logging

import paddle

from paddlespeech.s2t.modules import align

logger = logging.getLogger(__name__)


class Linear(paddle.nn.Layer):
    """Computes a linear transformation y = wx + b.
    Arguments
    ---------
    n_neurons : int
        It is the number of output neurons (i.e, the dimensionality of the
        output).
    input_shape: tuple
        It is the shape of the input tensor.
    input_size: int
        Size of the input tensor.
    bias : bool
        If True, the additive bias b is adopted.
    combine_dims : bool
        If True and the input is 4D, combine 3rd and 4th dimensions of input.
    Example
    -------
    >>> inputs = paddle.rand(10, 50, 40)
    >>> lin_t = Linear(input_shape=(10, 50, 40), n_neurons=100)
    >>> output = lin_t(inputs)
    >>> output.shape
    paddle.shape([10, 50, 100])
    """

    def __init__(
            self,
            n_neurons,
            input_shape=None,
            input_size=None,
            bias_attr=None,
            combine_dims=False, ):
        super().__init__()
        self.combine_dims = combine_dims

        if input_shape is None and input_size is None:
            raise ValueError("Expected one of input_shape or input_size")

        if input_size is None:
            input_size = input_shape[-1]
            if len(input_shape) == 4 and self.combine_dims:
                input_size = input_shape[2] * input_shape[3]

        # Weights are initialized following paddle approach
        self.w = align.Linear(input_size, n_neurons, bias_attr=bias_attr)

    def forward(self, x):
        """Returns the linear transformation of input tensor.
        Arguments
        ---------
        x : paddle.Tensor
            Input to transform linearly.
        """
        if x.rank == 4 and self.combine_dims:
            x = x.reshape(x.shape[0], x.shape[1], x.shape[2] * x.shape[3])

        wx = self.w(x)

        return wx


================================================
FILE: paddlespeech/s2t/models/wav2vec2/modules/modeling_outputs.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from dataclasses import dataclass
from dataclasses import fields
from typing import Optional
from typing import Tuple

import paddle


class ModelOutput(OrderedDict):
    """
    Base class for all model outputs as dataclass. Has a `__getitem__` that allows indexing by integer or slice (like a
    tuple) or strings (like a dictionary) that will ignore the `None` attributes. Otherwise behaves like a regular
    python dictionary.

    <Tip warning={true}>

    You can't unpack a `ModelOutput` directly. Use the [`~utils.ModelOutput.to_tuple`] method to convert it to a tuple
    before.

    </Tip>
    """

    def __post_init__(self):
        class_fields = fields(self)

        # Safety and consistency checks
        if not len(class_fields):
            raise ValueError(f"{self.__class__.__name__} has no fields.")
        if not all(field.default is None for field in class_fields[1:]):
            raise ValueError(
                f"{self.__class__.__name__} should not have more than one required field."
            )

        first_field = getattr(self, class_fields[0].name)
        other_fields_are_none = all(
            getattr(self, field.name) is None for field in class_fields[1:])

        if other_fields_are_none and not paddle.is_tensor(first_field):
            if isinstance(first_field, dict):
                iterator = first_field.items()
                first_field_iterator = True
            else:
                try:
                    iterator = iter(first_field)
                    first_field_iterator = True
                except TypeError:
                    first_field_iterator = False

            # if we provided an iterator as first field and the iterator is a (key, value) iterator
            # set the associated fields
            if first_field_iterator:
                for element in iterator:
                    if (not isinstance(element, (list, tuple)) or
                            not len(element) == 2 or
                            not isinstance(element[0], str)):
                        break
                    setattr(self, element[0], element[1])
                    if element[1] is not None:
                        self[element[0]] = element[1]
            elif first_field is not None:
                self[class_fields[0].name] = first_field
        else:
            for field in class_fields:
                v = getattr(self, field.name)
                if v is not None:
                    self[field.name] = v

    def __delitem__(self, *args, **kwargs):
        raise Exception(
            f"You cannot use ``__delitem__`` on a {self.__class__.__name__} instance."
        )

    def setdefault(self, *args, **kwargs):
        raise Exception(
            f"You cannot use ``setdefault`` on a {self.__class__.__name__} instance."
        )

    def pop(self, *args, **kwargs):
        raise Exception(
            f"You cannot use ``pop`` on a {self.__class__.__name__} instance.")

    def update(self, *args, **kwargs):
        raise Exception(
            f"You cannot use ``update`` on a {self.__class__.__name__} instance."
        )

    def __getitem__(self, k):
        if isinstance(k, str):
            inner_dict = {k: v for (k, v) in self.items()}
            return inner_dict[k]
        else:
            return self.to_tuple()[k]

    def __setattr__(self, name, value):
        if name in self.keys() and value is not None:
            # Don't call self.__setitem__ to avoid recursion errors
            super().__setitem__(name, value)
        super().__setattr__(name, value)

    def __setitem__(self, key, value):
        # Will raise a KeyException if needed
        super().__setitem__(key, value)
        # Don't call self.__setattr__ to avoid recursion errors
        super().__setattr__(key, value)

    def to_tuple(self) -> Tuple:
        """
        Convert self to a tuple containing all the attributes/keys that are not `None`.
        """
        return tuple(self[k] for k in self.keys())


@dataclass
class BaseModelOutput(ModelOutput):
    """
    Base class for model's outputs, with potential hidden states and attentions.

    Args:
        last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    last_hidden_state: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class BaseModelOutputWithNoAttention(ModelOutput):
    """
    Base class for model's outputs, with potential hidden states.

    Args:
        last_hidden_state (`paddle.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, num_channels, height, width)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
    """

    last_hidden_state: paddle = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class BaseModelOutputWithPooling(ModelOutput):
    """
    Base class for model's outputs that also contains a pooling of the last hidden states.

    Args:
        last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        pooler_output (`paddle.Tensor` of shape `(batch_size, hidden_size)`):
            Last layer hidden-state of the first token of the sequence (classification token) after further processing
            through the layers used for the auxiliary pretraining task. E.g. for BERT-family of models, this returns
            the classification token after processing through a linear layer and a tanh activation function. The linear
            layer weights are trained from the next sentence prediction (classification) objective during pretraining.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    last_hidden_state: paddle.Tensor = None
    pooler_output: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class BaseModelOutputWithPoolingAndNoAttention(ModelOutput):
    """
    Base class for model's outputs that also contains a pooling of the last hidden states.

    Args:
        last_hidden_state (`paddle.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Sequence of hidden-states at the output of the last layer of the model.
        pooler_output (`paddle.Tensor` of shape `(batch_size, hidden_size)`):
            Last layer hidden-state after a pooling operation on the spatial dimensions.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, num_channels, height, width)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
    """

    last_hidden_state: paddle.Tensor = None
    pooler_output: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class BaseModelOutputWithPast(ModelOutput):
    """
    Base class for model's outputs that may also contain a past key/values (to speed up sequential decoding).

    Args:
        last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.

            If `past_key_values` is used only the last hidden-state of the sequences of shape `(batch_size, 1,
            hidden_size)` is output.
        past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
            Tuple of `tuple(paddle.Tensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
            `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
            `config.is_encoder_decoder=True` 2 additional tensors of shape `(batch_size, num_heads,
            encoder_sequence_length, embed_size_per_head)`.

            Contains pre-computed hidden-states (key and values in the self-attention blocks and optionally if
            `config.is_encoder_decoder=True` in the cross-attention blocks) that can be used (see `past_key_values`
            input) to speed up sequential decoding.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    last_hidden_state: paddle.Tensor = None
    past_key_values: Optional[Tuple[Tuple[paddle.Tensor]]] = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class BaseModelOutputWithCrossAttentions(ModelOutput):
    """
    Base class for model's outputs, with potential hidden states and attentions.

    Args:
        last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        cross_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` and `config.add_cross_attention=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
            weighted average in the cross-attention heads.
    """

    last_hidden_state: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None
    cross_attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class BaseModelOutputWithPoolingAndCrossAttentions(ModelOutput):
    """
    Base class for model's outputs that also contains a pooling of the last hidden states.

    Args:
        last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        pooler_output (`paddle.Tensor` of shape `(batch_size, hidden_size)`):
            Last layer hidden-state of the first token of the sequence (classification token) after further processing
            through the layers used for the auxiliary pretraining task. E.g. for BERT-family of models, this returns
            the classification token after processing through a linear layer and a tanh activation function. The linear
            layer weights are trained from the next sentence prediction (classification) objective during pretraining.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        cross_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` and `config.add_cross_attention=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
            weighted average in the cross-attention heads.
        past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
            Tuple of `tuple(paddle.Tensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
            `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
            `config.is_encoder_decoder=True` 2 additional tensors of shape `(batch_size, num_heads,
            encoder_sequence_length, embed_size_per_head)`.

            Contains pre-computed hidden-states (key and values in the self-attention blocks and optionally if
            `config.is_encoder_decoder=True` in the cross-attention blocks) that can be used (see `past_key_values`
            input) to speed up sequential decoding.
    """

    last_hidden_state: paddle.Tensor = None
    pooler_output: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    past_key_values: Optional[Tuple[Tuple[paddle.Tensor]]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None
    cross_attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class BaseModelOutputWithPastAndCrossAttentions(ModelOutput):
    """
    Base class for model's outputs that may also contain a past key/values (to speed up sequential decoding).

    Args:
        last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.

            If `past_key_values` is used only the last hidden-state of the sequences of shape `(batch_size, 1,
            hidden_size)` is output.
        past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
            Tuple of `tuple(paddle.Tensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
            `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and optionally if
            `config.is_encoder_decoder=True` 2 additional tensors of shape `(batch_size, num_heads,
            encoder_sequence_length, embed_size_per_head)`.

            Contains pre-computed hidden-states (key and values in the self-attention blocks and optionally if
            `config.is_encoder_decoder=True` in the cross-attention blocks) that can be used (see `past_key_values`
            input) to speed up sequential decoding.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        cross_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` and `config.add_cross_attention=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
            weighted average in the cross-attention heads.
    """

    last_hidden_state: paddle.Tensor = None
    past_key_values: Optional[Tuple[Tuple[paddle.Tensor]]] = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None
    cross_attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class Seq2SeqModelOutput(ModelOutput):
    """
    Base class for model encoder's outputs that also contains : pre-computed hidden states that can speed up sequential
    decoding.

    Args:
        last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the decoder of the model.

            If `past_key_values` is used only the last hidden-state of the sequences of shape `(batch_size, 1,
            hidden_size)` is output.
        past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
            Tuple of `tuple(paddle.Tensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
            `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
            `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.

            Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
            blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
        decoder_hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the decoder at the output of each layer plus the optional initial embedding outputs.
        decoder_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
            self-attention heads.
        cross_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
            weighted average in the cross-attention heads.
        encoder_last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Sequence of hidden-states at the output of the last layer of the encoder of the model.
        encoder_hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the encoder at the output of each layer plus the optional initial embedding outputs.
        encoder_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
            self-attention heads.
    """

    last_hidden_state: paddle.Tensor = None
    past_key_values: Optional[Tuple[Tuple[paddle.Tensor]]] = None
    decoder_hidden_states: Optional[Tuple[paddle.Tensor]] = None
    decoder_attentions: Optional[Tuple[paddle.Tensor]] = None
    cross_attentions: Optional[Tuple[paddle.Tensor]] = None
    encoder_last_hidden_state: Optional[paddle.Tensor] = None
    encoder_hidden_states: Optional[Tuple[paddle.Tensor]] = None
    encoder_attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class CausalLMOutput(ModelOutput):
    """
    Base class for causal language model (or autoregressive) outputs.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Language modeling loss (for next-token prediction).
        logits (`paddle.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class CausalLMOutputWithPast(ModelOutput):
    """
    Base class for causal language model (or autoregressive) outputs.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Language modeling loss (for next-token prediction).
        logits (`paddle.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
            Tuple of `tuple(paddle.Tensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
            `(batch_size, num_heads, sequence_length, embed_size_per_head)`)

            Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
            `past_key_values` input) to speed up sequential decoding.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    past_key_values: Optional[Tuple[Tuple[paddle.Tensor]]] = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class CausalLMOutputWithCrossAttentions(ModelOutput):
    """
    Base class for causal language model (or autoregressive) outputs.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Language modeling loss (for next-token prediction).
        logits (`paddle.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        cross_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Cross attentions weights after the attention softmax, used to compute the weighted average in the
            cross-attention heads.
        past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
            Tuple of `paddle.Tensor` tuples of length `config.n_layers`, with each tuple containing the cached key,
            value states of the self-attention and the cross-attention layers if model is used in encoder-decoder
            setting. Only relevant if `config.is_decoder = True`.

            Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
            `past_key_values` input) to speed up sequential decoding.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    past_key_values: Optional[Tuple[Tuple[paddle.Tensor]]] = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None
    cross_attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class SequenceClassifierOutputWithPast(ModelOutput):
    """
    Base class for outputs of sentence classification models.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification (or regression if config.num_labels==1) loss.
        logits (`paddle.Tensor` of shape `(batch_size, config.num_labels)`):
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
        past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
            Tuple of `tuple(paddle.Tensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
            `(batch_size, num_heads, sequence_length, embed_size_per_head)`)

            Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
            `past_key_values` input) to speed up sequential decoding.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    past_key_values: Optional[Tuple[Tuple[paddle.Tensor]]] = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class MaskedLMOutput(ModelOutput):
    """
    Base class for masked language models outputs.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Masked language modeling (MLM) loss.
        logits (`paddle.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class Seq2SeqLMOutput(ModelOutput):
    """
    Base class for sequence-to-sequence language models outputs.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Language modeling loss.
        logits (`paddle.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
            Tuple of `tuple(paddle.Tensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
            `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
            `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.

            Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
            blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
        decoder_hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
        decoder_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
            self-attention heads.
        cross_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
            weighted average in the cross-attention heads.
        encoder_last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Sequence of hidden-states at the output of the last layer of the encoder of the model.
        encoder_hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
        encoder_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
            self-attention heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    past_key_values: Optional[Tuple[Tuple[paddle.Tensor]]] = None
    decoder_hidden_states: Optional[Tuple[paddle.Tensor]] = None
    decoder_attentions: Optional[Tuple[paddle.Tensor]] = None
    cross_attentions: Optional[Tuple[paddle.Tensor]] = None
    encoder_last_hidden_state: Optional[paddle.Tensor] = None
    encoder_hidden_states: Optional[Tuple[paddle.Tensor]] = None
    encoder_attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class NextSentencePredictorOutput(ModelOutput):
    """
    Base class for outputs of models predicting if two sentences are consecutive or not.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `next_sentence_label` is provided):
            Next sequence prediction (classification) loss.
        logits (`paddle.Tensor` of shape `(batch_size, 2)`):
            Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
            before SoftMax).
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class SequenceClassifierOutput(ModelOutput):
    """
    Base class for outputs of sentence classification models.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification (or regression if config.num_labels==1) loss.
        logits (`paddle.Tensor` of shape `(batch_size, config.num_labels)`):
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class Seq2SeqSequenceClassifierOutput(ModelOutput):
    """
    Base class for outputs of sequence-to-sequence sentence classification models.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `label` is provided):
            Classification (or regression if config.num_labels==1) loss.
        logits (`paddle.Tensor` of shape `(batch_size, config.num_labels)`):
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
        past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
            Tuple of `tuple(paddle.Tensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
            `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
            `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.

            Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
            blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
        decoder_hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
        decoder_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
            self-attention heads.
        cross_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
            weighted average in the cross-attention heads.
        encoder_last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Sequence of hidden-states at the output of the last layer of the encoder of the model.
        encoder_hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
        encoder_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
            self-attention heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    past_key_values: Optional[Tuple[Tuple[paddle.Tensor]]] = None
    decoder_hidden_states: Optional[Tuple[paddle.Tensor]] = None
    decoder_attentions: Optional[Tuple[paddle.Tensor]] = None
    cross_attentions: Optional[Tuple[paddle.Tensor]] = None
    encoder_last_hidden_state: Optional[paddle.Tensor] = None
    encoder_hidden_states: Optional[Tuple[paddle.Tensor]] = None
    encoder_attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class MultipleChoiceModelOutput(ModelOutput):
    """
    Base class for outputs of multiple choice models.

    Args:
        loss (`paddle.Tensor` of shape *(1,)*, *optional*, returned when `labels` is provided):
            Classification loss.
        logits (`paddle.Tensor` of shape `(batch_size, num_choices)`):
            *num_choices* is the second dimension of the input tensors. (see *input_ids* above).

            Classification scores (before SoftMax).
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class TokenClassifierOutput(ModelOutput):
    """
    Base class for outputs of token classification models.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided) :
            Classification loss.
        logits (`paddle.Tensor` of shape `(batch_size, sequence_length, config.num_labels)`):
            Classification scores (before SoftMax).
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class QuestionAnsweringModelOutput(ModelOutput):
    """
    Base class for outputs of question answering models.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
        start_logits (`paddle.Tensor` of shape `(batch_size, sequence_length)`):
            Span-start scores (before SoftMax).
        end_logits (`paddle.Tensor` of shape `(batch_size, sequence_length)`):
            Span-end scores (before SoftMax).
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    start_logits: paddle.Tensor = None
    end_logits: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class Seq2SeqQuestionAnsweringModelOutput(ModelOutput):
    """
    Base class for outputs of sequence-to-sequence question answering models.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
        start_logits (`paddle.Tensor` of shape `(batch_size, sequence_length)`):
            Span-start scores (before SoftMax).
        end_logits (`paddle.Tensor` of shape `(batch_size, sequence_length)`):
            Span-end scores (before SoftMax).
        past_key_values (`tuple(tuple(paddle.Tensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
            Tuple of `tuple(paddle.Tensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
            `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
            `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.

            Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
            blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
        decoder_hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
        decoder_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
            self-attention heads.
        cross_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the decoder's cross-attention layer, after the attention softmax, used to compute the
            weighted average in the cross-attention heads.
        encoder_last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Sequence of hidden-states at the output of the last layer of the encoder of the model.
        encoder_hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
        encoder_attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
            self-attention heads.
    """

    loss: Optional[paddle.Tensor] = None
    start_logits: paddle.Tensor = None
    end_logits: paddle.Tensor = None
    past_key_values: Optional[Tuple[Tuple[paddle.Tensor]]] = None
    decoder_hidden_states: Optional[Tuple[paddle.Tensor]] = None
    decoder_attentions: Optional[Tuple[paddle.Tensor]] = None
    cross_attentions: Optional[Tuple[paddle.Tensor]] = None
    encoder_last_hidden_state: Optional[paddle.Tensor] = None
    encoder_hidden_states: Optional[Tuple[paddle.Tensor]] = None
    encoder_attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class SemanticSegmenterOutput(ModelOutput):
    """
    Base class for outputs of semantic segmentation models.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification (or regression if config.num_labels==1) loss.
        logits (`paddle.Tensor` of shape `(batch_size, config.num_labels, logits_height, logits_width)`):
            Classification scores for each pixel.

            <Tip warning={true}>

            The logits returned do not necessarily have the same size as the `pixel_values` passed as inputs. This is
            to avoid doing two interpolations and lose some quality when a user needs to resize the logits to the
            original image size as post-processing. You should always check your logits shape and resize as needed.

            </Tip>

        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, patch_size, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, patch_size,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class ImageClassifierOutput(ModelOutput):
    """
    Base class for outputs of image classification models.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification (or regression if config.num_labels==1) loss.
        logits (`paddle.Tensor` of shape `(batch_size, config.num_labels)`):
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each stage) of shape `(batch_size, sequence_length, hidden_size)`. Hidden-states
            (also called feature maps) of the model at the output of each stage.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, patch_size,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class ImageClassifierOutputWithNoAttention(ModelOutput):
    """
    Base class for outputs of image classification models.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification (or regression if config.num_labels==1) loss.
        logits (`paddle.Tensor` of shape `(batch_size, config.num_labels)`):
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each stage) of shape `(batch_size, num_channels, height, width)`. Hidden-states (also
            called feature maps) of the model at the output of each stage.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class DepthEstimatorOutput(ModelOutput):
    """
    Base class for outputs of depth estimation models.

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification (or regression if config.num_labels==1) loss.
        predicted_depth (`paddle.Tensor` of shape `(batch_size, height, width)`):
            Predicted depth for each pixel.

        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, num_channels, height, width)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, patch_size,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    predicted_depth: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class Wav2Vec2BaseModelOutput(ModelOutput):
    """
    Base class for models that have been trained with the Wav2Vec2 loss objective.

    Args:
        last_hidden_state (`paddle.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        extract_features (`paddle.Tensor` of shape `(batch_size, sequence_length, conv_dim[-1])`):
            Sequence of extracted feature vectors of the last convolutional layer of the model.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    last_hidden_state: paddle.Tensor = None
    extract_features: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


@dataclass
class XVectorOutput(ModelOutput):
    """
    Output type of [`Wav2Vec2ForXVector`].

    Args:
        loss (`paddle.Tensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
            Classification loss.
        logits (`paddle.Tensor` of shape `(batch_size, config.xvector_output_dim)`):
            Classification hidden states before AMSoftmax.
        embeddings (`paddle.Tensor` of shape `(batch_size, config.xvector_output_dim)`):
            Utterance embeddings used for vector similarity-based retrieval.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    """

    loss: Optional[paddle.Tensor] = None
    logits: paddle.Tensor = None
    embeddings: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None


================================================
FILE: paddlespeech/s2t/models/wav2vec2/modules/modeling_wav2vec2.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2021 The Fairseq Authors and the HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Wav2Vec2 model."""
from dataclasses import dataclass
from typing import Optional
from typing import Tuple
from typing import Union

import numpy as np
import paddle
from paddle import nn

from paddlespeech.s2t.models.wav2vec2.modules.activations import ACT2FN
from paddlespeech.s2t.models.wav2vec2.modules.modeling_outputs import BaseModelOutput
from paddlespeech.s2t.models.wav2vec2.modules.modeling_outputs import ModelOutput
from paddlespeech.s2t.models.wav2vec2.modules.modeling_outputs import Wav2Vec2BaseModelOutput
from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()


@dataclass
class Wav2Vec2ForPreTrainingOutput(ModelOutput):
    """
    Output type of [`Wav2Vec2ForPreTraining`], with potential hidden states and attentions.

    Args:
        loss (*optional*, returned when `sample_negative_indices` are passed, `paddle.Tensor` of shape `(1,)`):
            Total loss as the sum of the contrastive loss (L_m) and the diversity loss (L_d) as stated in the [official
            paper](https://arxiv.org/pdf/2006.11477.pdf) . (classification) loss.
        projected_states (`paddle.Tensor` of shape `(batch_size, sequence_length, config.proj_codevector_dim)`):
            Hidden-states of the model projected to *config.proj_codevector_dim* that can be used to predict the masked
            projected quantized states.
        projected_quantized_states (`paddle.Tensor` of shape `(batch_size, sequence_length, config.proj_codevector_dim)`):
            Quantized extracted feature vectors projected to *config.proj_codevector_dim* representing the positive
            target vectors for contrastive loss.
        hidden_states (`tuple(paddle.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `paddle.Tensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(paddle.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `paddle.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
        contrastive_loss (*optional*, returned when `sample_negative_indices` are passed, `paddle.Tensor` of shape `(1,)`):
            The contrastive loss (L_m) as stated in the [official paper](https://arxiv.org/pdf/2006.11477.pdf) .
        diversity_loss (*optional*, returned when `sample_negative_indices` are passed, `paddle.Tensor` of shape `(1,)`):
            The diversity loss (L_d) as stated in the [official paper](https://arxiv.org/pdf/2006.11477.pdf) .
    """

    loss: Optional[paddle.Tensor] = None
    projected_states: paddle.Tensor = None
    projected_quantized_states: paddle.Tensor = None
    codevector_perplexity: paddle.Tensor = None
    hidden_states: Optional[Tuple[paddle.Tensor]] = None
    attentions: Optional[Tuple[paddle.Tensor]] = None
    contrastive_loss: Optional[paddle.Tensor] = None
    diversity_loss: Optional[paddle.Tensor] = None


def _compute_mask_indices(
        shape: Tuple[int, int],
        mask_prob: float,
        mask_length: int,
        attention_mask: Optional[paddle.Tensor]=None,
        min_masks: int=0, ) -> np.ndarray:
    """
    Computes random mask spans for a given shape. Used to implement [SpecAugment: A Simple Data Augmentation Method for
    ASR](https://arxiv.org/abs/1904.08779). Note that this method is not optimized to run on TPU and should be run on
    CPU as part of the preprocessing during training.

    Args:
        shape: The shape for which to compute masks. This should be of a tuple of size 2 where
               the first element is the batch size and the second element is the length of the axis to span.
        mask_prob:  The percentage of the whole axis (between 0 and 1) which will be masked. The number of
                    independently generated mask spans of length `mask_length` is computed by
                    `mask_prob*shape[1]/mask_length`. Note that due to overlaps, `mask_prob` is an upper bound and the
                    actual percentage will be smaller.
        mask_length: size of the mask
        min_masks: minimum number of masked spans
        attention_mask: A (right-padded) attention mask which independently shortens the feature axis of
                        each batch dimension.
    """
    batch_size, sequence_length = shape

    if mask_length < 1:
        raise ValueError("`mask_length` has to be bigger than 0.")

    if mask_length > sequence_length:
        raise ValueError(
            f"`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: {mask_length}"
            f" and `sequence_length`: {sequence_length}`")

    # epsilon is used for probabilistic rounding
    epsilon = np.random.rand(1).item()

    def compute_num_masked_span(input_length):
        """Given input length, compute how many spans should be masked"""
        num_masked_span = int(mask_prob * input_length / mask_length + epsilon)
        num_masked_span = max(num_masked_span, min_masks)

        # make sure num masked span <= sequence_length
        if num_masked_span * mask_length > sequence_length:
            num_masked_span = sequence_length // mask_length

        # make sure num_masked span is also <= input_length - (mask_length - 1)
        if input_length - (mask_length - 1) < num_masked_span:
            num_masked_span = max(input_length - (mask_length - 1), 0)

        return num_masked_span

    # compute number of masked spans in batch
    input_lengths = (attention_mask.sum(-1).detach().tolist()
                     if attention_mask is not None else
                     [sequence_length for _ in range(batch_size)])

    # SpecAugment mask to fill
    spec_aug_mask = np.zeros((batch_size, sequence_length), dtype=np.bool_)
    spec_aug_mask_idxs = []

    max_num_masked_span = compute_num_masked_span(sequence_length)

    if max_num_masked_span == 0:
        return spec_aug_mask

    for input_length in input_lengths:
        # compute num of masked spans for this input
        num_masked_span = compute_num_masked_span(input_length)

        # get random indices to mask
        spec_aug_mask_idx = np.random.choice(
            np.arange(input_length - (mask_length - 1)),
            num_masked_span,
            replace=False)

        # pick first sampled index that will serve as a dummy index to pad vector
        # to ensure same dimension for all batches due to probabilistic rounding
        # Picking first sample just pads those vectors twice.
        if len(spec_aug_mask_idx) == 0:
            # this case can only happen if `input_length` is strictly smaller then
            # `sequence_length` in which case the last token has to be a padding
            # token which we can use as a dummy mask id
            dummy_mask_idx = sequence_length - 1
        else:
            dummy_mask_idx = spec_aug_mask_idx[0]

        spec_aug_mask_idx = np.concatenate([
            spec_aug_mask_idx,
            np.ones(max_num_masked_span - num_masked_span, dtype=np.int32) *
            dummy_mask_idx
        ])
        spec_aug_mask_idxs.append(spec_aug_mask_idx)

    spec_aug_mask_idxs = np.array(spec_aug_mask_idxs)

    # expand masked indices to masked spans
    spec_aug_mask_idxs = np.broadcast_to(
        spec_aug_mask_idxs[:, :, None],
        (batch_size, max_num_masked_span, mask_length))
    spec_aug_mask_idxs = spec_aug_mask_idxs.reshape(
        (batch_size, max_num_masked_span * mask_length))

    # add offset to the starting indexes so that indexes now create a span
    offsets = np.arange(mask_length)[None, None, :]
    offsets = np.broadcast_to(offsets, (
        batch_size, max_num_masked_span, mask_length)).reshape(
            (batch_size, max_num_masked_span * mask_length))
    spec_aug_mask_idxs = spec_aug_mask_idxs + offsets

    # ensure that we cannot have indices larger than sequence_length
    if spec_aug_mask_idxs.max() > sequence_length - 1:
        spec_aug_mask_idxs[spec_aug_mask_idxs >
                           sequence_length - 1] = sequence_length - 1

    # scatter indices to mask
    np.put_along_axis(spec_aug_mask, spec_aug_mask_idxs, 1, -1)

    return spec_aug_mask


def _sample_negative_indices(features_shape: Tuple,
                             num_negatives: int,
                             mask_time_indices: Optional[np.ndarray]=None):
    """
    Sample `num_negatives` vectors from feature vectors.
    """
    batch_size, sequence_length = features_shape

    # generate indices of the positive vectors themselves, repeat them `num_negatives` times
    sequence_length_range = np.arange(sequence_length)

    # get `num_negatives` random vector indices from the same utterance
    sampled_negative_indices = np.zeros(
        shape=(batch_size, sequence_length, num_negatives), dtype=np.int32)

    mask_time_indices = (mask_time_indices.astype(np.bool_)
                         if mask_time_indices is not None else
                         np.ones(features_shape, dtype=np.bool_))

    for batch_idx in range(batch_size):
        high = mask_time_indices[batch_idx].sum() - 1
        mapped_masked_indices = sequence_length_range[mask_time_indices[
            batch_idx]]

        feature_indices = np.broadcast_to(
            np.arange(high + 1)[:, None], (high + 1, num_negatives))
        sampled_indices = np.random.randint(
            0, high, size=(high + 1, num_negatives))
        # avoid sampling the same positive vector, but keep the distribution uniform
        sampled_indices[sampled_indices >= feature_indices] += 1

        # remap to actual indices
        sampled_negative_indices[batch_idx][mask_time_indices[
            batch_idx]] = mapped_masked_indices[sampled_indices]

        # correct for batch size
        sampled_negative_indices[batch_idx] += batch_idx * sequence_length

    return sampled_negative_indices


class Wav2Vec2NoLayerNormConvLayer(nn.Layer):
    def __init__(self, config, layer_id=0):
        super().__init__()
        self.in_conv_dim = config.conv_dim[layer_id - 1] if layer_id > 0 else 1
        self.out_conv_dim = config.conv_dim[layer_id]

        self.conv = nn.Conv1D(
            self.in_conv_dim,
            self.out_conv_dim,
            kernel_size=config.conv_kernel[layer_id],
            stride=config.conv_stride[layer_id],
            bias_attr=config.conv_bias, )
        self.activation = ACT2FN[config.feat_extract_activation]

    def forward(self, hidden_states):
        hidden_states = self.conv(hidden_states)
        hidden_states = self.activation(hidden_states)
        return hidden_states


class Wav2Vec2LayerNormConvLayer(nn.Layer):
    def __init__(self, config, layer_id=0):
        super().__init__()
        self.in_conv_dim = config.conv_dim[layer_id - 1] if layer_id > 0 else 1
        self.out_conv_dim = config.conv_dim[layer_id]

        self.conv = nn.Conv1D(
            self.in_conv_dim,
            self.out_conv_dim,
            kernel_size=config.conv_kernel[layer_id],
            stride=config.conv_stride[layer_id],
            bias_attr=config.conv_bias, )
        self.layer_norm = nn.LayerNorm(self.out_conv_dim)
        self.activation = ACT2FN[config.feat_extract_activation]

    def forward(self, hidden_states):
        hidden_states = self.conv(hidden_states)
        hidden_states = hidden_states.transpose([0, 2, 1])
        hidden_states = self.layer_norm(hidden_states)
        hidden_states = hidden_states.transpose([0, 2, 1])

        hidden_states = self.activation(hidden_states)
        return hidden_states


class Wav2Vec2GroupNormConvLayer(nn.Layer):
    def __init__(self, config, layer_id=0):
        super().__init__()
        self.in_conv_dim = config.conv_dim[layer_id - 1] if layer_id > 0 else 1
        self.out_conv_dim = config.conv_dim[layer_id]

        self.conv = nn.Conv1D(
            self.in_conv_dim,
            self.out_conv_dim,
            kernel_size=config.conv_kernel[layer_id],
            stride=config.conv_stride[layer_id],
            bias_attr=config.conv_bias, )
        self.activation = ACT2FN[config.feat_extract_activation]

        self.layer_norm = nn.GroupNorm(
            num_groups=self.out_conv_dim, num_channels=self.out_conv_dim)

    def forward(self, hidden_states):
        hidden_states = self.conv(hidden_states)
        hidden_states = self.layer_norm(hidden_states)
        hidden_states = self.activation(hidden_states)
        return hidden_states


class Wav2Vec2PositionalConvEmbedding(nn.Layer):
    def __init__(self, config):
        super().__init__()
        self.conv = nn.Conv1D(
            config.hidden_size,
            config.hidden_size,
            kernel_size=config.num_conv_pos_embeddings,
            padding=config.num_conv_pos_embeddings // 2,
            groups=config.num_conv_pos_embedding_groups, )

        self.conv = nn.utils.weight_norm(self.conv, name="weight", dim=2)

        self.padding = Wav2Vec2SamePadLayer(config.num_conv_pos_embeddings)
        self.activation = ACT2FN[config.feat_extract_activation]

    def forward(self, hidden_states):
        hidden_states = hidden_states.transpose([0, 2, 1])

        hidden_states = self.conv(hidden_states)
        hidden_states = self.padding(hidden_states)
        hidden_states = self.activation(hidden_states)

        hidden_states = hidden_states.transpose([0, 2, 1])
        return hidden_states


class Wav2Vec2SamePadLayer(nn.Layer):
    def __init__(self, num_conv_pos_embeddings):
        super().__init__()
        self.num_pad_remove = 1 if num_conv_pos_embeddings % 2 == 0 else 0

    def forward(self, hidden_states):
        if self.num_pad_remove > 0:
            hidden_states = hidden_states[:, :, :-self.num_pad_remove]
        return hidden_states


class Wav2Vec2FeatureEncoder(nn.Layer):
    """Construct the features from raw audio waveform"""

    def __init__(self, config):
        super().__init__()

        if config.feat_extract_norm == "group":
            conv_layers = [Wav2Vec2GroupNormConvLayer(config, layer_id=0)] + [
                Wav2Vec2NoLayerNormConvLayer(config, layer_id=i + 1)
                for i in range(config.num_feat_extract_layers - 1)
            ]
        elif config.feat_extract_norm == "layer":
            conv_layers = [
                Wav2Vec2LayerNormConvLayer(config, layer_id=i)
                for i in range(config.num_feat_extract_layers)
            ]
        else:
            raise ValueError(
                f"`config.feat_extract_norm` is {config.feat_extract_norm}, but has to be one of ['group', 'layer']"
            )
        self.conv_layers = nn.LayerList(conv_layers)
        self.gradient_checkpointing = False

    def _freeze_parameters(self):
        for param in self.parameters():
            param.trainable = False

    def forward(self, input_values):
        hidden_states = input_values[:, None]
        for conv_layer in self.conv_layers:
            hidden_states = conv_layer(hidden_states)

        return hidden_states


class Wav2Vec2FeatureProjection(nn.Layer):
    def __init__(self, config):
        super().__init__()
        self.layer_norm = nn.LayerNorm(
            config.conv_dim[-1], epsilon=config.layer_norm_eps)
        self.projection = nn.Linear(config.conv_dim[-1], config.hidden_size)
        self.dropout = nn.Dropout(config.feat_proj_dropout)

    def forward(self, hidden_states):
        # non-projected hidden states are needed for quantization
        norm_hidden_states = self.layer_norm(hidden_states)
        hidden_states = self.projection(norm_hidden_states)
        hidden_states = self.dropout(hidden_states)
        return hidden_states, norm_hidden_states


# Copied from transformers.models.bart.modeling_bart.BartAttention with Bart->Wav2Vec2
class Wav2Vec2Attention(nn.Layer):
    """Multi-headed attention from 'Attention Is All You Need' paper"""

    def __init__(
            self,
            embed_dim: int,
            num_heads: int,
            dropout: float=0.0,
            is_decoder: bool=False,
            bias: bool=True, ):
        super().__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.dropout = dropout
        self.head_dim = embed_dim // num_heads

        if (self.head_dim * num_heads) != self.embed_dim:
            raise ValueError(
                f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim}"
                f" and `num_heads`: {num_heads}).")
        self.scaling = self.head_dim**-0.5
        self.is_decoder = is_decoder

        self.k_proj = nn.Linear(embed_dim, embed_dim, bias_attr=bias)
        self.v_proj = nn.Linear(embed_dim, embed_dim, bias_attr=bias)
        self.q_proj = nn.Linear(embed_dim, embed_dim, bias_attr=bias)
        self.out_proj = nn.Linear(embed_dim, embed_dim, bias_attr=bias)

    def _shape(self, tensor: paddle.Tensor, seq_len: int, bsz: int):
        return paddle.reshape(tensor, (bsz, seq_len, self.num_heads,
                                       self.head_dim)).transpose([0, 2, 1, 3])

    def forward(
            self,
            hidden_states: paddle.Tensor,
            key_value_states: Optional[paddle.Tensor]=None,
            past_key_value: Optional[Tuple[paddle.Tensor]]=None,
            attention_mask: Optional[paddle.Tensor]=None,
            layer_head_mask: Optional[paddle.Tensor]=None,
            output_attentions: bool=False, ) -> Tuple[paddle.Tensor, Optional[
                paddle.Tensor], Optional[Tuple[paddle.Tensor]]]:
        """Input shape: Batch x Time x Channel"""

        # if key_value_states are provided this layer is used as a cross-attention layer
        # for the decoder
        is_cross_attention = key_value_states is not None

        bsz, tgt_len, _ = hidden_states.shape

        # get query proj
        query_states = self.q_proj(hidden_states) * self.scaling
        # get key, value proj
        if is_cross_attention and past_key_value is not None:
            # reuse k,v, cross_attentions
            key_states = past_key_value[0]
            value_states = past_key_value[1]
        elif is_cross_attention:
            # cross_attentions
            key_states = self._shape(self.k_proj(key_value_states), -1, bsz)
            value_states = self._shape(self.v_proj(key_value_states), -1, bsz)
        elif past_key_value is not None:
            # reuse k, v, self_attention
            key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
            value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
            key_states = paddle.concat([past_key_value[0], key_states], axis=2)
            value_states = paddle.concat(
                [past_key_value[1], value_states], axis=2)
        else:
            # self_attention
            key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
            value_states = self._shape(self.v_proj(hidden_states), -1, bsz)

        if self.is_decoder:
            # if cross_attention save Tuple(paddle.Tensor, paddle.Tensor) of all cross attention key/value_states.
            # Further calls to cross_attention layer can then reuse all cross-attention
            # key/value_states (first "if" case)
            # if uni-directional self-attention (decoder) save Tuple(paddle.Tensor, paddle.Tensor) of
            # all previous decoder key/value_states. Further calls to uni-directional self-attention
            # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
            # if encoder bi-directional self-attention `past_key_value` is always `None`
            past_key_value = (key_states, value_states)

        proj_shape = (bsz * self.num_heads, -1, self.head_dim)
        query_states = self._shape(query_states, tgt_len,
                                   bsz).reshape(proj_shape)
        key_states = key_states.reshape(proj_shape)
        value_states = value_states.reshape(proj_shape)

        src_len = key_states.shape[1]
        attn_weights = paddle.bmm(query_states, key_states.transpose([0, 2, 1]))

        if attn_weights.shape != [bsz * self.num_heads, tgt_len, src_len]:
            raise ValueError(
                f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is"
                f" {attn_weights.shape}")

        if attention_mask is not None:
            if attention_mask.shape != [bsz, 1, tgt_len, src_len]:
                raise ValueError(
                    f"Attention mask should be of size {[bsz, 1, tgt_len, src_len]}, but is {attention_mask.shape}"
                )
            attn_weights = attn_weights.reshape(bsz, self.num_heads, tgt_len,
                                                src_len) + attention_mask
            attn_weights = attn_weights.reshape(bsz * self.num_heads, tgt_len,
                                                src_len)

        attn_weights = nn.functional.softmax(attn_weights, axis=-1)

        if layer_head_mask is not None:
            if layer_head_mask.shape != [
                    self.num_heads,
            ]:
                raise ValueError(
                    f"Head mask for a single layer should be of size {[self.num_heads,]}, but is"
                    f" {layer_head_mask.shape}")
            attn_weights = layer_head_mask.reshape(
                (1, -1, 1, 1)) * attn_weights.reshape(
                    (bsz, self.num_heads, tgt_len, src_len))
            attn_weights = attn_weights.reshape(
                (bsz * self.num_heads, tgt_len, src_len))

        if output_attentions:
            # this operation is a bit awkward, but it's required to
            # make sure that attn_weights keeps its gradient.
            # In order to do so, attn_weights have to be reshaped
            # twice and have to be reused in the following
            attn_weights_reshaped = attn_weights.reshape(
                (bsz, self.num_heads, tgt_len, src_len))
            attn_weights = attn_weights_reshaped.reshape(
                (bsz * self.num_heads, tgt_len, src_len))
        else:
            attn_weights_reshaped = None

        attn_probs = nn.functional.dropout(
            attn_weights, p=self.dropout, training=self.training)

        attn_output = paddle.bmm(attn_probs, value_states)

        if attn_output.shape != [bsz * self.num_heads, tgt_len, self.head_dim]:
            raise ValueError(
                f"`attn_output` should be of size {[bsz, self.num_heads, tgt_len, self.head_dim]}, but is"
                f" {attn_output.shape}")

        attn_output = attn_output.reshape(
            (bsz, self.num_heads, tgt_len, self.head_dim))
        attn_output = attn_output.transpose([0, 2, 1, 3])

        # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
        # partitioned aross GPUs when using tensor-parallelism.
        attn_output = attn_output.reshape((bsz, tgt_len, self.embed_dim))

        attn_output = self.out_proj(attn_output)

        return attn_output, attn_weights_reshaped, past_key_value


class Wav2Vec2FeedForward(nn.Layer):
    def __init__(self, config):
        super().__init__()
        self.intermediate_dropout = nn.Dropout(config.activation_dropout)

        self.intermediate_dense = nn.Linear(config.hidden_size,
                                            config.intermediate_size)
        if isinstance(config.hidden_act, str):
            self.intermediate_act_fn = ACT2FN[config.hidden_act]
        else:
            self.intermediate_act_fn = config.hidden_act

        self.output_dense = nn.Linear(config.intermediate_size,
                                      config.hidden_size)
        self.output_dropout = nn.Dropout(config.hidden_dropout)

    def forward(self, hidden_states):
        hidden_states = self.intermediate_dense(hidden_states)
        hidden_states = self.intermediate_act_fn(hidden_states)
        hidden_states = self.intermediate_dropout(hidden_states)

        hidden_states = self.output_dense(hidden_states)
        hidden_states = self.output_dropout(hidden_states)
        return hidden_states


class Wav2Vec2EncoderLayer(nn.Layer):
    def __init__(self, config):
        super().__init__()
        self.attention = Wav2Vec2Attention(
            embed_dim=config.hidden_size,
            num_heads=config.num_attention_heads,
            dropout=config.attention_dropout,
            is_decoder=False, )
        self.dropout = nn.Dropout(config.hidden_dropout)
        self.layer_norm = nn.LayerNorm(
            config.hidden_size, epsilon=config.layer_norm_eps)
        self.feed_forward = Wav2Vec2FeedForward(config)
        self.final_layer_norm = nn.LayerNorm(
            config.hidden_size, epsilon=config.layer_norm_eps)

    def forward(self,
                hidden_states,
                attention_mask=None,
                output_attentions=False):
        attn_residual = hidden_states
        hidden_states, attn_weights, _ = self.attention(
            hidden_states,
            attention_mask=attention_mask,
            output_attentions=output_attentions)
        hidden_states = self.dropout(hidden_states)
        hidden_states = attn_residual + hidden_states

        hidden_states = self.layer_norm(hidden_states)
        hidden_states = hidden_states + self.feed_forward(hidden_states)
        hidden_states = self.final_layer_norm(hidden_states)

        outputs = (hidden_states, )

        if output_attentions:
            outputs += (attn_weights, )

        return outputs


class Wav2Vec2EncoderLayerStableLayerNorm(nn.Layer):
    def __init__(self, config):
        super().__init__()
        self.attention = Wav2Vec2Attention(
            embed_dim=config.hidden_size,
            num_heads=config.num_attention_heads,
            dropout=config.attention_dropout,
            is_decoder=False, )
        self.dropout = nn.Dropout(config.hidden_dropout)
        self.layer_norm = nn.LayerNorm(
            config.hidden_size, epsilon=config.layer_norm_eps)
        self.feed_forward = Wav2Vec2FeedForward(config)
        self.final_layer_norm = nn.LayerNorm(
            config.hidden_size, epsilon=config.layer_norm_eps)

    def forward(self,
                hidden_states,
                attention_mask=None,
                output_attentions=False):
        attn_residual = hidden_states
        hidden_states = self.layer_norm(hidden_states)
        hidden_states, attn_weights, _ = self.attention(
            hidden_states,
            attention_mask=attention_mask,
            output_attentions=output_attentions)
        hidden_states = self.dropout(hidden_states)
        hidden_states = attn_residual + hidden_states
        hidden_states = hidden_states + self.feed_forward(
            self.final_layer_norm(hidden_states))

        outputs = (hidden_states, )

        if output_attentions:
            outputs += (attn_weights, )

        return outputs


class Wav2Vec2Encoder(nn.Layer):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.pos_conv_embed = Wav2Vec2PositionalConvEmbedding(config)
        self.layer_norm = nn.LayerNorm(
            config.hidden_size, epsilon=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout)
        self.layers = nn.LayerList([
            Wav2Vec2EncoderLayer(config)
            for _ in range(config.num_hidden_layers)
        ])
        self.gradient_checkpointing = False

    def forward(
            self,
            hidden_states,
            attention_mask=None,
            output_attentions=False,
            output_hidden_states=False,
            return_dict=True, ):
        all_hidden_states = () if output_hidden_states else None
        all_self_attentions = () if output_attentions else None

        if attention_mask is not None:
            # make sure padded tokens output 0
            expand_attention_mask = attention_mask.unsqueeze(-1).repeat(
                1, 1, hidden_states.shape[2])
            hidden_states[~expand_attention_mask] = 0

            # extend attention_mask
            attention_mask = 1.0 - attention_mask[:, None, None, :].to(
                dtype=hidden_states.dtype)
            attention_mask = attention_mask * np.iinfo(np.float32).min
            attention_mask = attention_mask.expand(attention_mask.shape[0], 1,
                                                   attention_mask.shape[-1],
                                                   attention_mask.shape[-1])

        position_embeddings = self.pos_conv_embed(hidden_states)
        hidden_states = hidden_states + position_embeddings
        hidden_states = self.layer_norm(hidden_states)
        hidden_states = self.dropout(hidden_states)

        #deepspeed_zero3_is_enabled = is_deepspeed_zero3_enabled()

        for layer in self.layers:
            if output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states, )

            # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
            dropout_probability = np.random.uniform(0, 1)

            skip_the_layer = True if self.training and (
                dropout_probability < self.config.layerdrop) else False
            if not skip_the_layer:  # or deepspeed_zero3_is_enabled:
                # under deepspeed zero3 all gpus must run in sync
                if self.gradient_checkpointing and self.training:
                    # create gradient checkpointing function
                    def create_custom_forward(module):
                        def custom_forward(*inputs):
                            return module(*inputs, output_attentions)

                        return custom_forward
                else:
                    layer_outputs = layer(
                        hidden_states,
                        attention_mask=attention_mask,
                        output_attentions=output_attentions)
                hidden_states = layer_outputs[0]

            if skip_the_layer:
                layer_outputs = (None, None)

            if output_attentions:
                all_self_attentions = all_self_attentions + (layer_outputs[1], )

        if output_hidden_states:
            all_hidden_states = all_hidden_states + (hidden_states, )

        if not return_dict:
            return tuple(
                v
                for v in
                [hidden_states, all_hidden_states, all_self_attentions]
                if v is not None)
        return BaseModelOutput(
            last_hidden_state=hidden_states,
            hidden_states=all_hidden_states,
            attentions=all_self_attentions, )


class Wav2Vec2EncoderStableLayerNorm(nn.Layer):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.pos_conv_embed = Wav2Vec2PositionalConvEmbedding(config)
        self.layer_norm = nn.LayerNorm(
            config.hidden_size, epsilon=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout)
        self.layers = nn.LayerList([
            Wav2Vec2EncoderLayerStableLayerNorm(config)
            for _ in range(config.num_hidden_layers)
        ])
        self.gradient_checkpointing = False

    def forward(
            self,
            hidden_states,
            attention_mask=None,
            output_attentions=False,
            output_hidden_states=False,
            return_dict=True, ):
        all_hidden_states = () if output_hidden_states else None
        all_self_attentions = () if output_attentions else None

        if attention_mask is not None:
            # make sure padded tokens are not attended to
            expand_attention_mask = attention_mask.unsqueeze(
                -1).repeat_interleave(
                    hidden_states.shape[2], axis=2)
            hidden_states[~expand_attention_mask] = 0

            # extend attention_mask
            attention_mask = 1.0 - attention_mask[:, None, None, :].to(
                dtype=hidden_states.dtype)
            attention_mask = attention_mask * np.iinfo(np.float32).min
            attention_mask = attention_mask.expand(attention_mask.shape[0], 1,
                                                   attention_mask.shape[-1],
                                                   attention_mask.shape[-1])

        position_embeddings = self.pos_conv_embed(hidden_states)
        hidden_states = hidden_states + position_embeddings
        hidden_states = self.dropout(hidden_states)

        for layer in self.layers:
            if output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states, )

            # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
            dropout_probability = np.random.uniform(0, 1)

            skip_the_layer = True if self.training and (
                dropout_probability < self.config.layerdrop) else False
            if not skip_the_layer:  # or deepspeed_zero3_is_enabled:
                # under deepspeed zero3 all gpus must run in sync
                # XXX: could optimize this like synced_gpus in generate_utils but not sure if it's worth the code complication
                if self.gradient_checkpointing and self.training:
                    # create gradient checkpointing function
                    def create_custom_forward(module):
                        def custom_forward(*inputs):
                            return module(*inputs, output_attentions)

                        return custom_forward
                else:
                    layer_outputs = layer(
                        hidden_states,
                        attention_mask=attention_mask,
                        output_attentions=output_attentions)
                hidden_states = layer_outputs[0]

            if skip_the_layer:
                layer_outputs = (None, None)

            if output_attentions:
                all_self_attentions = all_self_attentions + (layer_outputs[1], )

        hidden_states = self.layer_norm(hidden_states)

        if output_hidden_states:
            all_hidden_states = all_hidden_states + (hidden_states, )

        if not return_dict:
            return tuple(
                v
                for v in
                [hidden_states, all_hidden_states, all_self_attentions]
                if v is not None)
        return BaseModelOutput(
            last_hidden_state=hidden_states,
            hidden_states=all_hidden_states,
            attentions=all_self_attentions, )


class Wav2Vec2GumbelVectorQuantizer(nn.Layer):
    """
    Vector quantization using gumbel softmax. See `[CATEGORICAL REPARAMETERIZATION WITH
    GUMBEL-SOFTMAX](https://arxiv.org/pdf/1611.01144.pdf) for more information.
    """

    def __init__(self, config):
        super().__init__()
        self.num_groups = config.num_codevector_groups
        self.num_vars = config.num_codevectors_per_group

        if config.codevector_dim % self.num_groups != 0:
            raise ValueError(
                f"`config.codevector_dim {config.codevector_dim} must be divisible "
                f"by `config.num_codevector_groups` {self.num_groups} for concatenation"
            )

        # storage for codebook variables (codewords)
        self.codevectors = paddle.static.create_parameter(
            shape=[
                1, self.num_groups * self.num_vars,
                config.codevector_dim // self.num_groups
            ],
            dtype='float32')
        self.weight_proj = nn.Linear(config.conv_dim[-1],
                                     self.num_groups * self.num_vars)

        # can be decayed for training
        self.temperature = 2

    @staticmethod
    def _compute_perplexity(probs, mask=None):
        if mask is not None:
            mask_extended = mask.flatten()[:, None, None].expand(probs.shape)
            probs = paddle.where(mask_extended, probs, paddle.zeros_like(probs))
            marginal_probs = probs.sum(dim=0) / mask.sum()
        else:
            marginal_probs = probs.mean(dim=0)

        perplexity = paddle.exp(-paddle.sum(
            marginal_probs * paddle.log(marginal_probs + 1e-7), dim=-1)).sum()
        return perplexity

    def forward(self, hidden_states, mask_time_indices=None):
        batch_size, sequence_length, hidden_size = hidden_states.shape

        # project to codevector dim
        hidden_states = self.weight_proj(hidden_states)
        hidden_states = hidden_states.reshape(
            (batch_size * sequence_length * self.num_groups, -1))

        if self.training:
            # sample code vector probs via gumbel in differentiateable way
            codevector_probs = nn.functional.gumbel_softmax(
                hidden_states.float(), tau=self.temperature,
                hard=True).type_as(hidden_states)

            # compute perplexity
            codevector_soft_dist = paddle.nn.functional.softmax(
                hidden_states.reshape((batch_size * sequence_length,
                                       self.num_groups, -1)).float(),
                axis=-1)
            perplexity = self._compute_perplexity(codevector_soft_dist,
                                                  mask_time_indices)
        else:
            # take argmax in non-differentiable way
            # comptute hard codevector distribution (one hot)
            codevector_idx = hidden_states.argmax(dim=-1)
            codevector_probs = hidden_states.new_zeros(
                *hidden_states.shape).scatter_(-1,
                                               codevector_idx.reshape((-1, 1)),
                                               1.0)
            codevector_probs = codevector_probs.reshape(
                (batch_size * sequence_length, self.num_groups, -1))

            perplexity = self._compute_perplexity(codevector_probs,
                                                  mask_time_indices)

        codevector_probs = codevector_probs.reshape(
            (batch_size * sequence_length, -1))
        # use probs to retrieve codevectors
        codevectors_per_group = codevector_probs.unsqueeze(
            -1) * self.codevectors
        codevectors = codevectors_per_group.reshape(
            (batch_size * sequence_length, self.num_groups, self.num_vars, -1))
        codevectors = codevectors.sum(-2).reshape(
            (batch_size, sequence_length, -1))

        return codevectors, perplexity


class Wav2Vec2Adapter(nn.Layer):
    def __init__(self, config):
        super().__init__()

        # feature dim might need to be down-projected
        if config.output_hidden_size != config.hidden_size:
            self.proj = nn.Linear(config.hidden_size, config.output_hidden_size)
            self.proj_layer_norm = nn.LayerNorm(config.output_hidden_size)
        else:
            self.proj = self.proj_layer_norm = None

        self.layers = nn.LayerList(
            Wav2Vec2AdapterLayer(config)
            for _ in range(config.num_adapter_layers))
        self.layerdrop = config.layerdrop

    def forward(self, hidden_states):
        # down project hidden_states if necessary
        if self.proj is not None and self.proj_layer_norm is not None:
            hidden_states = self.proj(hidden_states)
            hidden_states = self.proj_layer_norm(hidden_states)

        hidden_states = hidden_states.transpose([0, 2, 1])

        for layer in self.layers:
            layerdrop_prob = np.random.random()
            if not self.training or (layerdrop_prob > self.layerdrop):
                hidden_states = layer(hidden_states)

        hidden_states = hidden_states.transpose([0, 2, 1])
        return hidden_states


class Wav2Vec2AdapterLayer(nn.Layer):
    def __init__(self, config):
        super().__init__()
        self.conv = nn.Conv1D(
            config.output_hidden_size,
            2 * config.output_hidden_size,
            config.adapter_kernel_size,
            stride=config.adapter_stride,
            padding=1, )

    def forward(self, hidden_states):
        hidden_states = self.conv(hidden_states)
        hidden_states = nn.functional.glu(hidden_states, axis=1)

        return hidden_states


class Wav2Vec2Model(nn.Layer):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.feature_extractor = Wav2Vec2FeatureEncoder(config)
        self.feature_projection = Wav2Vec2FeatureProjection(config)

        # model only needs masking vector if mask prob is > 0.0
        if config.mask_time_prob > 0.0 or config.mask_feature_prob > 0.0:
            #  self.masked_spec_embed = nn.Parameter(paddle.Tensor(config.hidden_size).uniform_())
            #self.masked_spec_embed = paddle.uniform([config.hidden_size])
            self.masked_spec_embed = paddle.static.create_parameter(
                shape=[config.hidden_size],
                dtype='float32',
                default_initializer=paddle.nn.initializer.Uniform(
                    low=0, high=1.0))
        if config.do_stable_layer_norm:
            self.encoder = Wav2Vec2EncoderStableLayerNorm(config)
        else:
            self.encoder = Wav2Vec2Encoder(config)

        self.adapter = Wav2Vec2Adapter(config) if config.add_adapter else None

        # Initialize weights and apply final processing
        self.post_init()

    def freeze_feature_encoder(self):
        """
        Calling this function will disable the gradient computation for the feature encoder so that its parameter will
        not be updated during training.
        """
        self.feature_extractor._freeze_parameters()

    def _mask_hidden_states(
            self,
            hidden_states: paddle.Tensor,
            mask_time_indices: Optional[paddle.Tensor]=None,
            attention_mask: Optional[paddle.Tensor]=None, ):
        """
        Masks extracted features along time axis and/or along feature axis according to
        [SpecAugment](https://arxiv.org/abs/1904.08779).
        """
        # `config.apply_spec_augment` can set masking to False
        if not getattr(self.config, "apply_spec_augment", True):
            return hidden_states

        # generate indices & apply SpecAugment along time axis
        batch_size, sequence_length, hidden_size = hidden_states.shape
        if mask_time_indices is not None:
            # apply SpecAugment along time axis with given mask_time_indices
            hidden_states[mask_time_indices] = self.masked_spec_embed.to(
                hidden_states.dtype)
        elif self.config.mask_time_prob > 0 and self.training:
            mask_time_indices = _compute_mask_indices(
                (batch_size, sequence_length),
                mask_prob=self.config.mask_time_prob,
                mask_length=self.config.mask_time_length,
                attention_mask=attention_mask,
                min_masks=self.config.mask_time_min_masks, )
            mask_time_indices = paddle.to_tensor(
                mask_time_indices, dtype=paddle.bool)
            hidden_states[mask_time_indices] = self.masked_spec_embed.to(
                hidden_states.dtype)

        if self.config.mask_feature_prob > 0 and self.training:
            # generate indices & apply SpecAugment along feature axis
            mask_feature_indices = _compute_mask_indices(
                (batch_size, hidden_size),
                mask_prob=self.config.mask_feature_prob,
                mask_length=self.config.mask_feature_length,
                min_masks=self.config.mask_feature_min_masks, )
            mask_feature_indices = paddle.to_tensor(
                mask_feature_indices, dtype=paddle.bool)
            mask_feature_indices = mask_feature_indices[:, None].expand(
                -1, sequence_length, -1)
            hidden_states[mask_feature_indices] = 0

        return hidden_states

    def forward(
            self,
            input_values: Optional[paddle.Tensor],
            attention_mask: Optional[paddle.Tensor]=None,
            mask_time_indices: Optional[paddle.Tensor]=None,
            output_attentions: Optional[bool]=None,
            output_hidden_states: Optional[bool]=None,
            return_dict: Optional[bool]=None,
    ) -> Union[Tuple, Wav2Vec2BaseModelOutput]:
        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = (output_hidden_states
                                if output_hidden_states is not None else
                                self.config.output_hidden_states)
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
        extract_features = self.feature_extractor(input_values)
        extract_features = extract_features.transpose([0, 2, 1])

        if attention_mask is not None:
            # compute reduced attention_mask corresponding to feature vectors
            attention_mask = self._get_feature_vector_attention_mask(
                extract_features.shape[1], attention_mask, add_adapter=False)
        hidden_states, extract_features = self.feature_projection(
            extract_features)
        hidden_states = self._mask_hidden_states(
            hidden_states,
            mask_time_indices=mask_time_indices,
            attention_mask=attention_mask)

        encoder_outputs = self.encoder(
            hidden_states,
            attention_mask=attention_mask,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict, )

        hidden_states = encoder_outputs[0]

        if self.adapter is not None:
            hidden_states = self.adapter(hidden_states)

        if not return_dict:
            return (hidden_states, extract_features) + encoder_outputs[1:]

        return Wav2Vec2BaseModelOutput(
            last_hidden_state=hidden_states,
            extract_features=extract_features,
            hidden_states=encoder_outputs.hidden_states,
            attentions=encoder_outputs.attentions, )

    def post_init(self):
        """
        A method executed at the end of each Transformer model initialization, to execute code that needs the model's
        modules properly initialized (such as weight initialization).
        """
        #   self.init_weights()
        #   self._backward_compatibility_gradient_checkpointing()
        pass


class Wav2Vec2ConfigPure():
    model_type = "wav2vec2"

    def __init__(self, config):
        self.output_attentions = False
        self.output_hidden_states = False
        self.use_return_dict = True

        self.hidden_size = config.hidden_size
        self.feat_extract_norm = config.feat_extract_norm
        self.feat_extract_activation = config.feat_extract_activation
        self.conv_dim = config.conv_dim
        self.conv_stride = config.conv_stride
        self.conv_kernel = config.conv_kernel
        self.conv_bias = config.conv_bias
        self.num_conv_pos_embeddings = config.num_conv_pos_embeddings
        self.num_conv_pos_embedding_groups = config.num_conv_pos_embedding_groups
        self.num_feat_extract_layers = len(self.conv_dim)
        self.num_hidden_layers = config.num_hidden_layers
        self.intermediate_size = config.intermediate_size
        self.hidden_act = config.hidden_act
        self.num_attention_heads = config.num_attention_heads
        self.hidden_dropout = config.hidden_dropout
        self.attention_dropout = config.attention_dropout
        self.activation_dropout = config.activation_dropout
        self.feat_proj_dropout = config.feat_proj_dropout
        self.final_dropout = config.final_dropout
        self.layerdrop = config.layerdrop
        self.layer_norm_eps = config.layer_norm_eps
        self.initializer_range = config.initializer_range
        self.do_stable_layer_norm = config.do_stable_layer_norm
        self.use_weighted_layer_sum = config.use_weighted_layer_sum

        if ((len(self.conv_stride) != self.num_feat_extract_layers) or
            (len(self.conv_kernel) != self.num_feat_extract_layers) or
            (len(self.conv_dim) != self.num_feat_extract_layers)):
            raise ValueError(
                "Configuration for convolutional layers is incorrect. It is required that `len(config.conv_dim)` =="
                " `len(config.conv_stride)` == `len(config.conv_kernel)`, but is `len(config.conv_dim) ="
                f" {len(self.conv_dim)}`, `len(config.conv_stride) = {len(self.conv_stride)}`,"
                f" `len(config.conv_kernel) = {len(self.conv_kernel)}`.")

        # fine-tuning config parameters for SpecAugment: https://arxiv.org/abs/1904.08779
        self.apply_spec_augment = config.apply_spec_augment
        self.mask_time_prob = config.mask_time_prob
        self.mask_time_length = config.mask_time_length
        self.mask_time_min_masks = config.mask_time_min_masks
        self.mask_feature_prob = config.mask_feature_prob
        self.mask_feature_length = config.mask_feature_length
        self.mask_feature_min_masks = config.mask_feature_min_masks

        # parameters for pretraining with codevector quantized representations
        self.num_codevectors_per_group = config.num_codevectors_per_group
        self.num_codevector_groups = config.num_codevector_groups
        self.contrastive_logits_temperature = config.contrastive_logits_temperature
        self.feat_quantizer_dropout = config.feat_quantizer_dropout
        self.num_negatives = config.num_negatives
        self.codevector_dim = config.codevector_dim
        self.proj_codevector_dim = config.proj_codevector_dim
        self.diversity_loss_weight = config.diversity_loss_weight

        # adapter
        self.add_adapter = config.add_adapter
        self.adapter_kernel_size = config.adapter_kernel_size
        self.adapter_stride = config.adapter_stride
        self.num_adapter_layers = config.num_adapter_layers
        self.output_hidden_size = config.output_hidden_size or config.hidden_size

    @property
    def inputs_to_logits_ratio(self):
        return functools.reduce(operator.mul, self.conv_stride, 1)


================================================
FILE: paddlespeech/s2t/models/wav2vec2/modules/normalization.py
================================================
# Authors
#  * Mirco Ravanelli 2020
#  * Guillermo Cámbara 2021
#  * Sarthak Yadav 2022
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from speechbrain(https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/nnet/normalization.py)
import paddle.nn as nn

from paddlespeech.s2t.modules.align import BatchNorm1D


class BatchNorm1d(nn.Layer):
    """Applies 1d batch normalization to the input tensor.
    Arguments
    ---------
    input_shape : tuple
        The expected shape of the input. Alternatively, use ``input_size``.
    input_size : int
        The expected size of the input. Alternatively, use ``input_shape``.
    eps : float
        This value is added to std deviation estimation to improve the numerical
        stability.
    momentum : float
        It is a value used for the running_mean and running_var computation.
    affine : bool
        When set to True, the affine parameters are learned.
    track_running_stats : bool
        When set to True, this module tracks the running mean and variance,
        and when set to False, this module does not track such statistics.
    combine_batch_time : bool
        When true, it combines batch an time axis.
    Example
    -------
    >>> input = paddle.randn([100, 10])
    >>> norm = BatchNorm1d(input_shape=input.shape)
    >>> output = norm(input)
    >>> output.shape
    Paddle.Shape([100, 10])
    """

    def __init__(
            self,
            input_shape=None,
            input_size=None,
            eps=1e-05,
            momentum=0.9,
            combine_batch_time=False,
            skip_transpose=False, ):
        super().__init__()
        self.combine_batch_time = combine_batch_time
        self.skip_transpose = skip_transpose

        if input_size is None and skip_transpose:
            input_size = input_shape[1]
        elif input_size is None:
            input_size = input_shape[-1]

        self.norm = BatchNorm1D(input_size, momentum=momentum, epsilon=eps)

    def forward(self, x):
        """Returns the normalized input tensor.
        Arguments
        ---------
        x : paddle.Tensor (batch, time, [channels])
            input to normalize. 2d or 3d tensors are expected in input
            4d tensors can be used when combine_dims=True.
        """
        shape_or = x.shape
        if self.combine_batch_time:
            if x.ndim == 3:
                x = x.reshape(shape_or[0] * shape_or[1], shape_or[2])
            else:
                x = x.reshape(shape_or[0] * shape_or[1], shape_or[3],
                              shape_or[2])

        elif not self.skip_transpose:
            x = x.transpose([0, 2, 1])

        x_n = self.norm(x)
        if self.combine_batch_time:
            x_n = x_n.reshape(shape_or)
        elif not self.skip_transpose:
            x_n = x_n.transpose([0, 2, 1])

        return x_n


================================================
FILE: paddlespeech/s2t/models/wav2vec2/modules/wav2vec2_model.py
================================================
# Copyright (c) Facebook, Inc. and its affiliates.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Wav2Vec2 model."""
import math
import uuid
from dataclasses import dataclass
from dataclasses import field
from enum import Enum
from enum import EnumMeta
from typing import Callable
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple

import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import Tensor

from paddlespeech.s2t.modules.align import Conv1D
from paddlespeech.s2t.modules.align import Conv2D
from paddlespeech.s2t.modules.align import Embedding
from paddlespeech.s2t.modules.align import LayerNorm
from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()


class GLU(nn.Layer):
    r"""Applies the gated linear unit function
    :math:`{GLU}(a, b)= a \otimes \sigma(b)` where :math:`a` is the first half
    of the input matrices and :math:`b` is the second half.

    Args:
        axis (int): the dimension on which to split the input. Default: -1

    Shape:
        - Input: :math:`(\ast_1, N, \ast_2)` where `*` means, any number of additional
          dimensions
        - Output: :math:`(\ast_1, M, \ast_2)` where :math:`M=N/2`

    Examples::

        >>> m = nn.GLU()
        >>> input = paddle.randn([4, 2])
        >>> output = m(input)
    """

    def __init__(self, axis: int=-1) -> None:
        super().__init__()
        self.axis = axis

    def forward(self, input: Tensor) -> Tensor:
        return F.glu(input, self.axis)


class FairseqIncrementalState(object):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.init_incremental_state()

    def init_incremental_state(self):
        self._incremental_state_id = str(uuid.uuid4())

    def _get_full_incremental_state_key(self, key: str) -> str:
        return "{}.{}".format(self._incremental_state_id, key)

    def get_incremental_state(
            self,
            incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]],
            key: str, ) -> Optional[Dict[str, Optional[Tensor]]]:
        """Helper for getting incremental state for an nn.Layer."""
        full_key = self._get_full_incremental_state_key(key)
        if incremental_state is None or full_key not in incremental_state:
            return None
        return incremental_state[full_key]

    def set_incremental_state(
            self,
            incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]],
            key: str,
            value: Dict[str, Optional[Tensor]],
    ) -> Optional[Dict[str, Dict[str, Optional[Tensor]]]]:
        """Helper for setting incremental state for an nn.Layer."""
        if incremental_state is not None:
            full_key = self._get_full_incremental_state_key(key)
            incremental_state[full_key] = value
        return incremental_state


def with_incremental_state(cls):
    cls.__bases__ = (FairseqIncrementalState, ) + tuple(
        b for b in cls.__bases__ if b != FairseqIncrementalState)
    return cls


class FairseqDropout(paddle.nn.Layer):
    def __init__(self, p, module_name=None):
        super().__init__()
        self.p = p
        self.module_name = module_name
        self.apply_during_inference = False

    def forward(self, x):
        if self.p > 0 and (self.training or self.apply_during_inference):
            return F.dropout(x, p=self.p, training=True)
        else:
            return x

    def make_generation_fast_(
            self,
            name: str,
            retain_dropout: bool=False,
            retain_dropout_modules: Optional[List[str]]=None,
            **kwargs, ):
        if retain_dropout:
            if retain_dropout_modules is not None and self.module_name is None:
                logger.warning(
                    "Cannot enable dropout during inference for module {} "
                    "because module_name was not set".format(name))
            elif (retain_dropout_modules is
                  None  # if None, apply to all modules
                  or self.module_name in retain_dropout_modules):
                logger.info("Enabling dropout during inference for module: {}".
                            format(name))
                self.apply_during_inference = True
            else:
                logger.info("Disabling dropout for module: {}".format(name))


def quant_noise(module, p, block_size):
    """
    Wraps modules and applies quantization noise to the weights for
    subsequent quantization with Iterative Product Quantization as
    described in "Training with Quantization Noise for Extreme Model Compression"

    Args:
        - module: nn.Layer
        - p: amount of Quantization Noise
        - block_size: size of the blocks for subsequent quantization with iPQ

    Remarks:
        - Layer weights must have the right sizes wrt the block size
        - Only Linear, Embedding and Conv2d modules are supported for the moment
        - For more detail on how to quantize by blocks with convolutional weights,
          see "And the Bit Goes Down: Revisiting the Quantization of Neural Networks"
        - We implement the simplest form of noise here as stated in the paper
          which consists in randomly dropping blocks
    """

    # if no quantization noise, don't register hook
    if p <= 0:
        return module

    # supported modules
    assert isinstance(module, (Linear, Embedding, Conv2D))

    # test whether module.weight has the right sizes wrt block_size
    is_conv = len(module.weight.shape) == 4

    # 2D matrix
    if not is_conv:
        if isinstance(module, Linear):
            features_weight = module.weight.shape[0]
        else:
            features_weight = module.weight.shape[1]
        assert (
            features_weight %
            block_size == 0), "Input features must be a multiple of block sizes"

    # 4D matrix
    else:
        # 1x1 convolutions
        if module.weight.shape[2:] == (1, 1):
            assert (module.weight.shape[1] % block_size == 0
                    ), "Input channels must be a multiple of block sizes"
        # regular convolutions
        else:
            k = module.weight.shape[2] * module.weight.shape[3]
            assert k % block_size == 0, "Kernel size must be a multiple of block size"

    def _forward_pre_hook(mod, input):
        # no noise for evaluation
        if mod.training:
            if not is_conv:
                # gather weight and sizes
                weight = mod.weight
                if isinstance(module, Linear):
                    in_features = weight.shape[0]
                    out_features = weight.shape[1]
                else:
                    in_features = weight.shape[1]
                    out_features = weight.shape[0]

                # split weight matrix into blocks and randomly drop selected blocks
                mask = paddle.zeros(
                    [in_features // block_size * out_features],
                    dtype=paddle.bool)
                # the implementation of bernoulli_, p=0.5
                mask = paddle.ones_like(mask) * 0.5
                mask = paddle.bernoulli(mask)
                mask = mask.unsqueeze(1).tile([1, block_size]).reshape(
                    [-1, in_features])

            else:
                # gather weight and sizes
                weight = mod.weight
                in_channels = mod.weight.shape[1]
                out_channels = mod.weight.shape[0]

                # split weight matrix into blocks and randomly drop selected blocks
                if module.weight.shape[2:] == (1, 1):
                    mask = paddle.zeros(
                        [in_channels // block_size * out_channels],
                        dtype=paddle.bool)

                    # the implementation of bernoulli_, p=0.5
                    mask = paddle.ones_like(mask) * 0.5
                    mask = paddle.bernoulli(mask)
                    mask = mask.unsqueeze(1).tile([1, block_size]).reshape(
                        [-1, in_channels])
                else:
                    mask = paddle.zeros(weight.shape)

                    # the implementation of bernoulli_, p=0.5
                    mask = paddle.ones_like(mask) * 0.5
                    mask = paddle.bernoulli(mask)
                    mask = mask.unsqueeze(1).tile([1, in_channels, 1, 1])

            # scale weights and apply mask
            s = 1 / (1 - p)
            mod.weight.set_value(s * weight.masked_fill(mask, 0))

    module.register_forward_pre_hook(_forward_pre_hook)
    return module


@with_incremental_state
class MultiheadAttention(nn.Layer):
    """Multi-headed attention.

    See "Attention Is All You Need" for more details.
    """

    def __init__(
            self,
            embed_dim,
            num_heads,
            kdim=None,
            vdim=None,
            dropout=0.0,
            bias=True,
            add_bias_kv=False,
            add_zero_attn=False,
            self_attention=False,
            encoder_decoder_attention=False,
            q_noise=0.0,
            qn_block_size=8,
            # TODO: pass in config rather than string.
            # config defined in xformers.components.attention.AttentionConfig
            xformers_att_config: Optional[str]=None,
            xformers_blocksparse_layout: Optional[
                paddle.Tensor]=None,  # This should be part of the config
            xformers_blocksparse_blocksize: Optional[
                int]=16,  # This should be part of the config
    ):
        super().__init__()

        def eval_str_dict(x, type=dict):
            if x is None:
                return None
            if isinstance(x, str):
                x = eval(x)
            return x

        xformers_att_config = eval_str_dict(xformers_att_config)
        self.use_xformers = xformers_att_config is not None
        assert not self.use_xformers, "Do not use xformers in PaddleSpeech"

        self.embed_dim = embed_dim
        self.kdim = kdim if kdim is not None else embed_dim
        self.vdim = vdim if vdim is not None else embed_dim
        self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim

        self.num_heads = num_heads
        self.dropout_module = FairseqDropout(
            dropout, module_name=self.__class__.__name__)

        self.head_dim = embed_dim // num_heads
        assert (self.head_dim * num_heads == self.embed_dim
                ), "embed_dim must be divisible by num_heads"
        self.scaling = self.head_dim**-0.5

        self.self_attention = self_attention
        self.encoder_decoder_attention = encoder_decoder_attention

        assert not self.self_attention or self.qkv_same_dim, (
            "Self-attention requires query, key and "
            "value to be of the same size")

        # Todo scaled initialization
        # Empirically observed the convergence to be much better with
        # the scaled initialization
        weight_attr = nn.initializer.XavierUniform()
        kv_proj_bias_attr = nn.initializer.XavierUniform()
        out_proj_bias_attr = nn.initializer.Constant(0)

        self.k_proj = quant_noise(
            nn.Linear(
                self.kdim,
                embed_dim,
                weight_attr=weight_attr,
                bias_attr=bias
                if not bias else kv_proj_bias_attr), q_noise, qn_block_size)
        self.v_proj = quant_noise(
            nn.Linear(
                self.vdim,
                embed_dim,
                weight_attr=weight_attr,
                bias_attr=bias
                if not bias else kv_proj_bias_attr), q_noise, qn_block_size)
        self.q_proj = quant_noise(
            nn.Linear(
                embed_dim, embed_dim, weight_attr=weight_attr, bias_attr=bias),
            q_noise, qn_block_size)

        self.out_proj = quant_noise(
            nn.Linear(
                embed_dim,
                embed_dim,
                weight_attr=weight_attr,
                bias_attr=bias
                if not bias else out_proj_bias_attr), q_noise, qn_block_size)

        #         nn.initializer.XavierUniform(self.k_proj.weight, gain=1 / math.sqrt(2))
        #         nn.initializer.XavierUniform(self.v_proj.weight, gain=1 / math.sqrt(2))
        #         nn.initializer.XavierUniform(self.q_proj.weight, gain=1 / math.sqrt(2))
        #     else:
        #         self.k_proj.weight = paddle.ParamAttr()
        #     nn.initializer.XavierUniform(self.k_proj.weight)
        #     nn.initializer.XavierUniform(self.v_proj.weight)
        #     nn.initializer.XavierUniform(self.q_proj.weight)

        #     nn.initializer.XavierUniform(self.out_proj.weight)
        # if self.out_proj.bias is not None:
        #     nn.initializer.Constant(self.out_proj.bias)
        # if self.bias_k is not None:
        #     nn.initializer.XavierNormal(self.bias_k)
        # if self.bias_v is not None:
        #     nn.initializer.XavierNormal(self.bias_v)

        # self.k_proj = Linear(self.kdim, embed_dim)

        # self.v_proj = Linear(self.vdim, embed_dim)

        # self.q_proj = Linear(embed_dim, embed_dim)

        # self.out_proj = Linear(embed_dim, embed_dim)

        if add_bias_kv:
            self.bias_k = paddle.create_parameter(
                shape=[1, 1, embed_dim],
                dtype='float32',
                initializer=nn.initializer.XavierUniform)
            self.bias_v = paddle.create_parameter(
                shape=[1, 1, embed_dim],
                dtype='float32',
                initializer=nn.initializer.XavierUniform)
        else:
            self.bias_k = self.bias_v = None

        self.add_zero_attn = add_zero_attn
        self.beam_size = 1
        # self.reset_parameters()

        self.onnx_trace = False
        self.skip_embed_dim_check = False

    def prepare_for_onnx_export_(self):
        self.onnx_trace = True

    def reset_parameters(self):
        if self.qkv_same_dim:
            # Empirically observed the convergence to be much better with
            # the scaled initialization
            nn.initializer.XavierUniform(
                self.k_proj.weight, gain=1 / math.sqrt(2))
            nn.initializer.XavierUniform(
                self.v_proj.weight, gain=1 / math.sqrt(2))
            nn.initializer.XavierUniform(
                self.q_proj.weight, gain=1 / math.sqrt(2))
        else:
            self.k_proj.weight = paddle.ParamAttr()
            nn.initializer.XavierUniform(self.k_proj.weight)
            nn.initializer.XavierUniform(self.v_proj.weight)
            nn.initializer.XavierUniform(self.q_proj.weight)

            nn.initializer.XavierUniform(self.out_proj.weight)
        if self.out_proj.bias is not None:
            nn.initializer.Constant(self.out_proj.bias)
        if self.bias_k is not None:
            nn.initializer.XavierNormal(self.bias_k)
        if self.bias_v is not None:
            nn.initializer.XavierNormal(self.bias_v)

    def _get_reserve_head_index(self, num_heads_to_keep: int):
        k_proj_heads_norm = []
        q_proj_heads_norm = []
        v_proj_heads_norm = []

        for i in range(self.num_heads):
            start_idx = i * self.head_dim
            end_idx = (i + 1) * self.head_dim
            k_proj_heads_norm.append(
                paddle.sum(
                    paddle.abs(self.k_proj.weight[:, start_idx:end_idx]))
                .tolist() + paddle.sum(
                    paddle.abs(self.k_proj.bias[start_idx:end_idx])).tolist())
            q_proj_heads_norm.append(
                paddle.sum(
                    paddle.abs(self.q_proj.weight[:, start_idx:end_idx]))
                .tolist() + paddle.sum(
                    paddle.abs(self.q_proj.bias[start_idx:end_idx])).tolist())
            v_proj_heads_norm.append(
                paddle.sum(
                    paddle.abs(self.v_proj.weight[:, start_idx:end_idx]))
                .tolist() + paddle.sum(
                    paddle.abs(self.v_proj.bias[start_idx:end_idx])).tolist())

        heads_norm = []
        for i in range(self.num_heads):
            heads_norm.append(k_proj_heads_norm[i] + q_proj_heads_norm[i] +
                              v_proj_heads_norm[i])

        sorted_head_index = sorted(
            range(self.num_heads), key=lambda k: heads_norm[k], reverse=True)
        reserve_head_index = []
        for i in range(num_heads_to_keep):
            start = sorted_head_index[i] * self.head_dim
            end = (sorted_head_index[i] + 1) * self.head_dim
            reserve_head_index.append((start, end))

        return reserve_head_index

    def _adaptive_prune_heads(self, reserve_head_index: List[Tuple[int, int]]):
        new_q_weight = []
        new_q_bias = []
        new_k_weight = []
        new_k_bias = []
        new_v_weight = []
        new_v_bias = []
        new_out_proj_weight = []

        for ele in reserve_head_index:
            start_idx, end_idx = ele
            new_q_weight.append(self.q_proj.weight[:, start_idx:end_idx])
            new_q_bias.append(self.q_proj.bias[start_idx:end_idx])

            new_k_weight.append(self.k_proj.weight[:, start_idx:end_idx])

            new_k_bias.append(self.k_proj.bias[start_idx:end_idx])

            new_v_weight.append(self.v_proj.weight[:, start_idx:end_idx])
            new_v_bias.append(self.v_proj.bias[start_idx:end_idx])

            new_out_proj_weight.append(
                self.out_proj.weight[start_idx:end_idx, ])

        new_q_weight = paddle.concat(new_q_weight, axis=-1).detach()
        new_k_weight = paddle.concat(new_k_weight, axis=-1).detach()
        new_v_weight = paddle.concat(new_v_weight, axis=-1).detach()
        new_out_proj_weight = paddle.concat(new_out_proj_weight).detach()
        new_q_weight.stop_gradient = False
        new_k_weight.stop_gradient = False
        new_v_weight.stop_gradient = False
        new_out_proj_weight.stop_gradient = False

        new_q_bias = paddle.concat(new_q_bias).detach()
        new_q_bias.stop_gradient = False

        new_k_bias = paddle.concat(new_k_bias).detach()
        new_k_bias.stop_gradient = False

        new_v_bias = paddle.concat(new_v_bias).detach()
        new_v_bias.stop_gradient = False

        self.q_proj.weight = paddle.create_parameter(
            shape=new_q_weight.shape,
            dtype=new_q_weight.dtype,
            default_initializer=paddle.nn.initializer.Assign(new_q_weight))
        self.q_proj.bias = paddle.create_parameter(
            shape=new_q_bias.shape,
            dtype=new_q_bias.dtype,
            default_initializer=paddle.nn.initializer.Assign(new_q_bias))

        self.k_proj.weight = paddle.create_parameter(
            shape=new_k_weight.shape,
            dtype=new_k_weight.dtype,
            default_initializer=paddle.nn.initializer.Assign(new_k_weight))
        self.k_proj.bias = paddle.create_parameter(
            shape=new_k_bias.shape,
            dtype=new_k_bias.dtype,
            default_initializer=paddle.nn.initializer.Assign(new_k_bias))

        self.v_proj.weight = paddle.create_parameter(
            shape=new_v_weight.shape,
            dtype=new_v_weight.dtype,
            default_initializer=paddle.nn.initializer.Assign(new_v_weight))
        self.v_proj.bias = paddle.create_parameter(
            shape=new_v_bias.shape,
            dtype=new_v_bias.dtype,
            default_initializer=paddle.nn.initializer.Assign(new_v_bias))

        self.out_proj.weight = paddle.create_parameter(
            shape=new_out_proj_weight.shape,
            dtype=new_out_proj_weight.dtype,
            default_initializer=paddle.nn.initializer.Assign(
                new_out_proj_weight))

        self.num_heads = len(reserve_head_index)
        self.embed_dim = self.head_dim * self.num_heads
        self.q_proj.out_features = self.embed_dim
        self.k_proj.out_features = self.embed_dim
        self.v_proj.out_features = self.embed_dim

    def _set_skip_embed_dim_check(self):
        self.skip_embed_dim_check = True

    def _pad_masks(
            self,
            key_padding_mask: Optional[Tensor],
            attn_mask: Optional[Tensor],
    ) -> Tuple[Optional[Tensor], Optional[Tensor]]:
        if attn_mask is not None:
            shape = attn_mask.shape[:-1] + [
                1,
            ]
            attn_mask = paddle.concat(
                [attn_mask, paddle.zeros(shape, dtype=attn_mask.dtype)],
                axis=-1)
        if key_padding_mask is not None:
            shape = key_padding_mask.shape[:-1] + [
                1,
            ]
            key_padding_mask = paddle.concat(
                [
                    key_padding_mask, paddle.zeros(
                        shape, dtype=key_padding_mask.dtype)
                ],
                axis=-1)
        return key_padding_mask, attn_mask

    def _add_bias(
            self,
            k: Tensor,
            v: Tensor,
            key_padding_mask: Optional[Tensor],
            attn_mask: Optional[Tensor],
            bsz: int,
    ) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]:
        assert self.bias_k is not None
        assert self.bias_v is not None
        k = paddle.concat([k, self.bias_k.tile([1, bsz, 1])], axis=-1)
        v = paddle.concat([v, self.bias_v.tile([1, bsz, 1])], axis=-1)
        key_padding_mask, attn_mask = self._pad_masks(
            key_padding_mask=key_padding_mask, attn_mask=attn_mask)
        return k, v, key_padding_mask, attn_mask

    def _append_zero_attn(
            self,
            k: Tensor,
            v: Tensor,
            key_padding_mask: Optional[Tensor],
            attn_mask: Optional[Tensor],
    ) -> Tuple[Tensor, Tensor, Optional[Tensor], Optional[Tensor]]:
        zero_attn_shape = k.shape[:-2] + [1] + k.shape[-1:]
        k = paddle.concat(
            [k, paddle.zeros(zero_attn_shape, dtype=k.dtype)], axis=-2)
        v = paddle.concat(
            [v, paddle.zeros(zero_attn_shape, dtype=v.dtype)], axis=-2)
        key_padding_mask, attn_mask = self._pad_masks(
            key_padding_mask=key_padding_mask, attn_mask=attn_mask)
        return k, v, key_padding_mask, attn_mask

    def forward(
            self,
            query,
            key: Optional[Tensor],
            value: Optional[Tensor],
            key_padding_mask: Optional[Tensor]=None,
            incremental_state: Optional[Dict[str, Dict[str, Optional[
                Tensor]]]]=None,
            need_weights: bool=True,
            static_kv: bool=False,
            attn_mask: Optional[Tensor]=None,
            before_softmax: bool=False,
            need_head_weights: bool=False, ) -> Tuple[Tensor, Optional[Tensor]]:
        """Input shape: Time x Batch x Channel

        Args:
            key_padding_mask (ByteTensor, optional): mask to exclude
                keys that are pads, of shape `(batch, src_len)`, where
                padding elements are indicated by 1s.
            need_weights (bool, optional): return the attention weights,
                averaged over heads (default: False).
            attn_mask (ByteTensor, optional): typically used to
                implement causal attention, where the mask prevents the
                attention from looking forward in time (default: None).
            before_softmax (bool, optional): return the raw attention
                weights and values before the attention softmax.
            need_head_weights (bool, optional): return the attention
                weights for each head. Implies *need_weights*. Default:
                return the average attention weights over all heads.
        """
        if need_head_weights:
            need_weights = True

        is_tpu = query.place == "xla"

        tgt_len, bsz, embed_dim = query.shape
        src_len = tgt_len
        if not self.skip_embed_dim_check:
            assert (embed_dim == self.embed_dim
                    ), f"query dim {embed_dim} != {self.embed_dim}"
        assert list(query.shape) == [tgt_len, bsz, embed_dim]
        if key is not None:
            src_len, key_bsz, _ = key.shape
            # if not torch.jit.is_scripting():
            #     assert value is not None
            #     assert src_len, key_bsz == value.shape[:2]

        # if (
        #     not self.onnx_trace
        #     and not is_tpu  # don't use PyTorch version on TPUs
        #     and incremental_state is None
        #     and not static_kv
        #     # A workaround for quantization to work. Otherwise JIT compilation
        #     # treats bias in linear module as method.
        #     and not torch.jit.is_scripting()
        #     # The Multihead attention implemented in pytorch forces strong dimension check
        #     # for input embedding dimention and K,Q,V projection dimension.
        #     # Since pruning will break the dimension check and it is not easy to modify the pytorch API,
        #     # it is preferred to bypass the pytorch MHA when we need to skip embed_dim_check
        #     and not self.skip_embed_dim_check
        # ):
        #     assert key is not None and value is not None

        # if self.use_xformers:
        #     return self._xformers_attn_forward(
        #         query, key, value, key_padding_mask, need_weights, attn_mask
        #     )

        # else:
        #     return F.multi_head_attention_forward(
        #         query,
        #         key,
        #         value,
        #         self.embed_dim,
        #         self.num_heads,
        #         torch.empty([0]),
        #         torch.cat((self.q_proj.bias, self.k_proj.bias, self.v_proj.bias)),
        #         self.bias_k,
        #         self.bias_v,
        #         self.add_zero_attn,
        #         self.dropout_module.p,
        #         self.out_proj.weight,
        #         self.out_proj.bias,
        #         self.training or self.dropout_module.apply_during_inference,
        #         key_padding_mask,
        #         need_weights,
        #         attn_mask,
        #         use_separate_proj_weight=True,
        #         q_proj_weight=self.q_proj.weight,
        #         k_proj_weight=self.k_proj.weight,
        #         v_proj_weight=self.v_proj.weight,
        #     )

        if incremental_state is not None:
            saved_state = self._get_input_buffer(incremental_state)
            if saved_state is not None and "prev_key" in saved_state:
                # previous time steps are cached - no need to recompute
                # key and value if they are static
                if static_kv:
                    assert self.encoder_decoder_attention and not self.self_attention
                    key = value = None
        else:
            saved_state = None

        if self.self_attention:
            q = self.q_proj(query)
            k = self.k_proj(query)
            v = self.v_proj(query)
        elif self.encoder_decoder_attention:
            # encoder-decoder attention
            q = self.q_proj(query)
            if key is None:
                assert value is None
                k = v = None
            else:
                if self.beam_size > 1 and bsz == key.size(1):
                    # key is [T, bsz*beam_size, C], reduce to [T, bsz, C]
                    key = key.reshape(
                        [key.size(0), -1, self.beam_size,
                         key.size(2)])[:, :, 0, :]
                    if key_padding_mask is not None:
                        key_padding_mask = key_padding_mask.reshape(
                            [-1, self.beam_size,
                             key_padding_mask.size(1)])[:, 0, :]
                k = self.k_proj(key)
                v = self.v_proj(key)

        else:
            assert key is not None and value is not None
            q = self.q_proj(query)
            k = self.k_proj(key)
            v = self.v_proj(value)
        q *= self.scaling

        if self.bias_k is not None:
            assert self.bias_v is not None
            k, v, attn_mask, key_padding_mask = self._add_bias(
                k, v, attn_mask, key_padding_mask, bsz)

        q = paddle.reshape(
            q, [tgt_len, bsz * self.num_heads, self.head_dim]).transpose(
                [1, 0, 2])
        kv_bsz = bsz  # need default value for scripting
        if k is not None:
            kv_bsz = k.shape[1]
            k = paddle.reshape(
                k, [-1, kv_bsz * self.num_heads, self.head_dim]).transpose(
                    [1, 0, 2])
        if v is not None:
            v = paddle.reshape(
                v, [-1, kv_bsz * self.num_heads, self.head_dim]).transpose(
                    [1, 0, 2])

        if saved_state is not None:
            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
            if "prev_key" in saved_state:
                _prev_key = saved_state["prev_key"]
                assert _prev_key is not None
                kv_bsz = _prev_key.shape[0]
                prev_key = _prev_key.reshape(
                    [kv_bsz * self.num_heads, -1, self.head_dim])
                if static_kv:
                    k = prev_key
                else:
                    assert k is not None
                    k = paddle.concat([prev_key, k], axis=1)
                src_len = k.shape[1]
            if "prev_value" in saved_state:
                _prev_value = saved_state["prev_value"]
                assert _prev_value is not None
                assert kv_bsz == _prev_value.size(0)
                prev_value = _prev_value.reshape(
                    [kv_bsz * self.num_heads, -1, self.head_dim])
                if static_kv:
                    v = prev_value
                else:
                    assert v is not None
                    v = paddle.concat([prev_value, v], axis=1)
            prev_key_padding_mask: Optional[Tensor] = None
            if "prev_key_padding_mask" in saved_state:
                prev_key_padding_mask = saved_state["prev_key_padding_mask"]
            assert k is not None and v is not None
            key_padding_mask = MultiheadAttention._append_prev_key_padding_mask(
                key_padding_mask=key_padding_mask,
                prev_key_padding_mask=prev_key_padding_mask,
                batch_size=kv_bsz,
                src_len=k.shape[1],
                static_kv=static_kv, )

            saved_state["prev_key"] = k.reshape(
                [kv_bsz, self.num_heads, -1, self.head_dim])
            saved_state["prev_value"] = v.reshape(
                [kv_bsz, self.num_heads, -1, self.head_dim])
            saved_state["prev_key_padding_mask"] = key_padding_mask
            # In this branch incremental_state is never None
            assert incremental_state is not None
            incremental_state = self._set_input_buffer(incremental_state,
                                                       saved_state)
        assert k is not None
        assert k.shape[1] == src_len

        # This is part of a workaround to get around fork/join parallelism
        # not supporting Optional types.
        if key_padding_mask is not None and key_padding_mask.dim() == 0:
            key_padding_mask = None

        if key_padding_mask is not None:
            assert key_padding_mask.shape[0] == kv_bsz
            assert key_padding_mask.shape[1] == src_len

        if self.add_zero_attn:
            assert v is not None
            src_len += 1
            k, v, key_padding_mask, attn_mask = self._append_zero_attn(
                k=k,
                v=v,
                key_padding_mask=key_padding_mask,
                attn_mask=attn_mask)

        if self.encoder_decoder_attention and bsz != kv_bsz:
            attn_weights = paddle.einsum(
                "bxhtd,bhsd->bxhts",
                q.reshape([kv_bsz, -1, self.num_heads] + q.shape[1:]),
                k.reshape([kv_bsz, self.num_heads] + k.shape[1:]), )
            attn_weights = attn_weights.reshape([
                -1,
            ] + attn_weights.shape[-2:])
        else:
            attn_weights = paddle.bmm(q, k.transpose([0, 2, 1]))
        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len,
                                              bsz)

        assert list(
            attn_weights.shape) == [bsz * self.num_heads, tgt_len, src_len]

        if attn_mask is not None:
            attn_mask = attn_mask.unsqueeze(0)
            if self.onnx_trace:
                attn_mask = attn_mask.tile([attn_weights.shape[0], 1, 1])
            attn_weights += attn_mask

        if key_padding_mask is not None:
            # don't attend to padding symbols
            attn_weights = attn_weights.reshape(
                [bsz, self.num_heads, tgt_len, src_len])
            if not is_tpu:
                attn_weights = attn_weights.reshape(
                    [kv_bsz, -1, self.num_heads, tgt_len, src_len])
                attn_weights = paddle.where(
                    key_padding_mask.unsqueeze(1).unsqueeze(2).unsqueeze(3)
                    .astype('bool'),
                    float('-inf') * paddle.ones_like(attn_weights),
                    attn_weights)
            else:
                attn_weights = attn_weights.transpose([2, 1, 0])
                attn_weights = paddle.where(key_padding_mask,
                                            float('-inf') *
                                            paddle.ones_like(attn_weights),
                                            attn_weights)
                attn_weights = attn_weights.transpose([2, 1, 0])
            attn_weights = attn_weights.reshape(
                [bsz * self.num_heads, tgt_len, src_len])

        if before_softmax:
            return attn_weights, v

        def softmax_supporting_onnx_trace(x, dim: int, onnx_trace: bool=False):
            if onnx_trace:
                return F.softmax(x, axis=dim)
            else:
                return F.softmax(x, axis=dim, dtype='float32')

        attn_weights_float = softmax_supporting_onnx_trace(
            attn_weights, dim=-1, onnx_trace=self.onnx_trace)
        attn_weights = paddle.cast(attn_weights_float, attn_weights.dtype)
        attn_probs = self.dropout_module(attn_weights)

        assert v is not None
        if self.encoder_decoder_attention and bsz != kv_bsz:
            attn = paddle.einsum(
                "bxhts,bhsd->bxhtd",
                attn_probs.reshape([kv_bsz, -1, self.num_heads] +
                                   attn_probs.shape[1:]),
                v.reshape([kv_bsz, self.num_heads] + v.shape[1:]), )
            attn = attn.reshape([
                -1,
            ] + attn.shape[-2:])
        else:
            attn = paddle.bmm(attn_probs, v)
        assert list(
            attn.shape) == [bsz * self.num_heads, tgt_len, self.head_dim]
        if self.onnx_trace and attn.shape[1] == 1:
            # when ONNX tracing a single decoder step (sequence length == 1)
            # the transpose is a no-op copy before view, thus unnecessary
            attn = attn.reshape([tgt_len, bsz, self.embed_dim])
        else:
            attn = attn.transpose([1, 0, 2]).reshape(
                [tgt_len, bsz, self.embed_dim])
        attn = self.out_proj(attn)
        attn_weights: Optional[Tensor] = None
        if need_weights:
            attn_weights = attn_weights_float.reshape(
                [bsz, self.num_heads, tgt_len, src_len]).transpose([1, 0, 2, 3])
            if not need_head_weights:
                # average attention weights over heads
                attn_weights = attn_weights.mean(axis=0)

        return attn, attn_weights

    @staticmethod
    def _append_prev_key_padding_mask(
            key_padding_mask: Optional[Tensor],
            prev_key_padding_mask: Optional[Tensor],
            batch_size: int,
            src_len: int,
            static_kv: bool, ) -> Optional[Tensor]:
        # saved key padding masks have shape (bsz, seq_len)
        if prev_key_padding_mask is not None and static_kv:
            new_key_padding_mask = prev_key_padding_mask
        elif prev_key_padding_mask is not None and key_padding_mask is not None:
            new_key_padding_mask = paddle.concat(
                [
                    paddle.cast(prev_key_padding_mask, 'float32'),
                    paddle.cast(key_padding_mask, 'float32')
                ],
                axis=1)
        # During incremental decoding, as the padding token enters and
        # leaves the frame, there will be a time when prev or current
        # is None
        elif prev_key_padding_mask is not None:
            if src_len > prev_key_padding_mask.shape[1]:
                filler = paddle.zeros(
                    [batch_size, src_len - prev_key_padding_mask.shape[1]], )
                new_key_padding_mask = paddle.concat(
                    [
                        paddle.cast(prev_key_padding_mask, 'float32'),
                        paddle.cast(filler, 'float32')
                    ],
                    axis=1)
            else:
                new_key_padding_mask = prev_key_padding_mask
        elif key_padding_mask is not None:
            if src_len > key_padding_mask.shape[1]:
                filler = paddle.zeros(
                    [batch_size, src_len - key_padding_mask.shape[1]], )
                new_key_padding_mask = paddle.concat(
                    [
                        paddle.cast(filler, 'float32'),
                        paddle.cast(key_padding_mask, 'float32')
                    ],
                    axis=1)
            else:
                new_key_padding_mask = paddle.cast(key_padding_mask, 'float32')
        else:
            new_key_padding_mask = prev_key_padding_mask
        return new_key_padding_mask

    @paddle.jit.to_static
    def reorder_incremental_state(
            self,
            incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
            new_order: Tensor, ):
        """Reorder buffered internal state (for incremental generation)."""
        input_buffer = self._get_input_buffer(incremental_state)
        if input_buffer is not None:
            for k in input_buffer.keys():
                input_buffer_k = input_buffer[k]
                if input_buffer_k is not None:
                    if self.encoder_decoder_attention:
                        if input_buffer_k.shape[
                                0] * self.beam_size == new_order.shape[0]:
                            return incremental_state
                        elif self.beam_size > 1:
                            input_buffer[k] = paddle.index_select(
                                input_buffer_k,
                                index=new_order.reshape(
                                    [-1, self.beam_size])[:, 0] //
                                self.beam_size,
                                axis=0, )
                        else:
                            input_buffer[k] = paddle.index_select(
                                input_buffer_k, index=new_order, axis=0)
                    else:
                        input_buffer[k] = paddle.index_select(
                            input_buffer_k, index=new_order, axis=0)
            incremental_state = self._set_input_buffer(incremental_state,
                                                       input_buffer)
        return incremental_state

    def set_beam_size(self, beam_size):
        """Used for effiecient beamable enc-dec attention"""
        self.beam_size = beam_size

    def _get_input_buffer(
            self,
            incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]
    ) -> Dict[str, Optional[Tensor]]:
        result = self.get_incremental_state(incremental_state, "attn_state")
        if result is not None:
            return result
        else:
            empty_result: Dict[str, Optional[Tensor]] = {}
            return empty_result

    def _set_input_buffer(
            self,
            incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
            buffer: Dict[str, Optional[Tensor]], ):
        return self.set_incremental_state(incremental_state, "attn_state",
                                          buffer)

    def apply_sparse_mask(self,
                          attn_weights,
                          tgt_len: int,
                          src_len: int,
                          bsz: int):
        return attn_weights

    def upgrade_state_dict_named(self, state_dict, name):
        prefix = name + "." if name != "" else ""
        items_to_add = {}
        keys_to_remove = []
        for k in state_dict.keys():
            if k.endswith(prefix + "in_proj_weight"):
                # in_proj_weight used to be q + k + v with same dimensions
                dim = int(state_dict[k].shape[0] / 3)
                items_to_add[prefix + "q_proj.weight"] = state_dict[k][:dim]
                items_to_add[prefix +
                             "k_proj.weight"] = state_dict[k][dim:2 * dim]
                items_to_add[prefix + "v_proj.weight"] = state_dict[k][2 * dim:]

                keys_to_remove.append(k)

                k_bias = prefix + "in_proj_bias"
                if k_bias in state_dict.keys():
                    dim = int(state_dict[k].shape[0] / 3)
                    items_to_add[prefix +
                                 "q_proj.bias"] = state_dict[k_bias][:dim]
                    items_to_add[prefix + "k_proj.bias"] = state_dict[k_bias][
                        dim:2 * dim]
                    items_to_add[prefix +
                                 "v_proj.bias"] = state_dict[k_bias][2 * dim:]

                    keys_to_remove.append(prefix + "in_proj_bias")

        for k in keys_to_remove:
            del state_dict[k]

        for key, value in items_to_add.items():
            state_dict[key] = value


class GumbelVectorQuantizer(nn.Layer):
    def __init__(
            self,
            dim,
            num_vars,
            temp,
            groups,
            combine_groups,
            vq_dim,
            time_first,
            activation=nn.GELU(),
            weight_proj_depth=1,
            weight_proj_factor=1, ):
        """Vector quantization using gumbel softmax

        Args:
            dim: input dimension (channels)
            num_vars: number of quantized vectors per group
            temp: temperature for training. this should be a tuple of 3 elements: (start, stop, decay factor)
            groups: number of groups for vector quantization
            combine_groups: whether to use the vectors for all groups
            vq_dim: dimensionality of the resulting quantized vector
            time_first: if true, expect input in BxTxC format, otherwise in BxCxT
            activation: what activation to use (should be a module). this is only used if weight_proj_depth is > 1
            weight_proj_depth: number of layers (with activation in between) to project input before computing logits
            weight_proj_factor: this is used only if weight_proj_depth is > 1. scales the inner dimensionality of
                                projections by this factor
        """
        super().__init__()

        self.groups = groups
        self.combine_groups = combine_groups
        self.input_dim = dim
        self.num_vars = num_vars
        self.time_first = time_first

        assert (
            vq_dim % groups == 0
        ), f"dim {vq_dim} must be divisible by groups {groups} for concatenation"

        var_dim = vq_dim // groups
        num_groups = groups if not combine_groups else 1

        self.vars = self.create_parameter(
            (1, num_groups * num_vars, var_dim),
            default_initializer=nn.initializer.Uniform())

        if weight_proj_depth > 1:

            def block(input_dim, output_dim):
                return nn.Sequential(Linear(input_dim, output_dim), activation)

            inner_dim = self.input_dim * weight_proj_factor
            self.weight_proj = nn.Sequential(
                *[
                    block(self.input_dim if i == 0 else inner_dim, inner_dim)
                    for i in range(weight_proj_depth - 1)
                ],
                Linear(inner_dim, groups * num_vars), )
        else:
            self.weight_proj = Linear(
                self.input_dim,
                groups * num_vars,
                weight_attr=nn.initializer.Normal(mean=0, std=1),
                bias_attr=nn.initializer.Zero())

        if isinstance(temp, str):
            import ast

            temp = ast.literal_eval(temp)
        assert len(temp) == 3, f"{temp}, {len(temp)}"

        self.max_temp, self.min_temp, self.temp_decay = temp
        self.curr_temp = self.max_temp
        self.codebook_indices = None

    def set_num_updates(self, num_updates):
        self.curr_temp = max(self.max_temp * self.temp_decay**num_updates,
                             self.min_temp)

    def get_codebook_indices(self):
        if self.codebook_indices is None:
            from itertools import product

            p = [range(self.num_vars)] * self.groups
            inds = list(product(*p))
            self.codebook_indices = paddle.to_tensor(
                inds, dtype='int64', place=self.vars.place).flatten()

            if not self.combine_groups:
                self.codebook_indices = self.codebook_indices.reshape(
                    self.num_vars**self.groups, -1)
                for b in range(1, self.groups):
                    self.codebook_indices[:, b] += self.num_vars * b
                self.codebook_indices = self.codebook_indices.flatten()
        return self.codebook_indices

    def codebook(self):
        indices = self.get_codebook_indices()
        return (self.vars.squeeze(0).index_select(0, indices)
                .reshape(self.num_vars**self.groups, -1))

    def sample_from_codebook(self, b, n):
        indices = self.get_codebook_indices()
        indices = indices.reshape(-1, self.groups)
        cb_size = indices.shape[0]
        assert (n < cb_size
                ), f"sample size {n} is greater than size of codebook {cb_size}"
        sample_idx = paddle.randint(low=0, high=cb_size, shape=(b * n, ))
        indices = indices[sample_idx]

        z = self.vars.squeeze(0).index_select(0, indices.flatten()).reshape(
            b, n, -1)
        return z

    def to_codebook_index(self, indices):
        res = paddle.full(indices.shape[:-1], 0, dtype=indices.dtype)
        for i in range(self.groups):
            exponent = self.groups - i - 1
            res += indices[..., i] * (self.num_vars**exponent)
        return res

    def forward_idx(self, x):
        res = self.forward(x, produce_targets=True)
        return res["x"], res["targets"]

    def forward(self, x, produce_targets=False):
        result = {"num_vars": self.num_vars * self.groups}

        if not self.time_first:
            x = x.transpose([0, 2, 1])

        bsz, tsz, fsz = x.shape
        x = x.reshape([-1, fsz])
        x = self.weight_proj(x)
        x = x.reshape([bsz * tsz * self.groups, -1])

        _, k = x.max(-1)
        hard_x = paddle.zeros_like(x)
        hard_x.scatter_(-1, k.reshape([-1, 1]), 1.0)
        hard_x = hard_x.reshape([bsz * tsz, self.groups, -1])
        hard_probs = paddle.mean(hard_x.astype('float32'), axis=0)
        result["code_perplexity"] = paddle.exp(-paddle.sum(
            hard_probs * paddle.log(hard_probs + 1e-7), axis=-1)).sum()

        avg_probs = F.softmax(
            x.reshape([bsz * tsz, self.groups, -1]).astype('float32'),
            axis=-1).mean(axis=0)
        result["prob_perplexity"] = paddle.exp(-paddle.sum(
            avg_probs * paddle.log(avg_probs + 1e-7), axis=-1)).sum()

        result["temp"] = self.curr_temp

        if self.training:
            x = F.gumbel_softmax(
                x.astype('float32'), temperature=self.curr_temp,
                hard=True).astype(x.dtype)
        else:
            x = hard_x

        x = x.reshape([bsz * tsz, -1])

        vars = self.vars
        if self.combine_groups:
            vars = vars.tile([1, self.groups, 1])

        if produce_targets:
            result["targets"] = (x.reshape([bsz * tsz * self.groups, -1])
                                 .argmax(axis=-1)
                                 .reshape([bsz, tsz, self.groups]).detach())

        x = x.unsqueeze(-1) * vars
        x = x.reshape([bsz * tsz, self.groups, self.num_vars, -1])
        x = x.sum(axis=-2)
        x = x.reshape([bsz, tsz, -1])

        if not self.time_first:
            x = x.transpose([0, 2, 1])

        result["x"] = x

        return result


class GradMultiply(paddle.autograd.PyLayer):
    @staticmethod
    def forward(ctx, x, scale):
        ctx.scale = scale
        res = x.numpy().copy()
        return paddle.to_tensor(res, dtype=x.dtype)

    @staticmethod
    def backward(ctx, grad):
        return grad * ctx.scale, None


class SamePad(nn.Layer):
    def __init__(self, kernel_size, causal=False):
        super().__init__()
        if causal:
            self.remove = kernel_size - 1
        else:
            self.remove = 1 if kernel_size % 2 == 0 else 0

    def forward(self, x):
        if self.remove > 0:
            x = x[:, :, :-self.remove]
        return x


class TransposeLast(nn.Layer):
    def __init__(self, deconstruct_idx=None):
        super().__init__()
        self.deconstruct_idx = deconstruct_idx

    def forward(self, x):
        if self.deconstruct_idx is not None:
            x = x[self.deconstruct_idx]
        trans_dim = np.arange(x.dim())
        trans_dim[-1], trans_dim[-2] = trans_dim[-2], trans_dim[-1]
        return x.transpose(trans_dim)


class Fp32LayerNorm(LayerNorm):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def forward(self, input):
        output = F.layer_norm(
            input.astype('float32'),
            self._normalized_shape,
            self.weight.astype('float32') if self.weight is not None else None,
            self.bias.astype('float32') if self.bias is not None else None,
            self._epsilon, )
        return output.astype(input.dtype)


# Todo: change this when paddle supports F.group_norm
class Fp32GroupNorm(nn.Layer):
    def __init__(self, *args, **kwargs):
        super().__init__()
        self.group_norm = paddle.nn.GroupNorm(*args, **kwargs)
        fp32_weight = paddle.create_parameter(
            shape=self.group_norm.weight.shape,
            dtype='float32',
            default_initializer=paddle.nn.initializer.Assign(
                self.group_norm.weight))
        fp32_bias = paddle.create_parameter(
            shape=self.group_norm.bias.shape,
            dtype='float32',
            default_initializer=paddle.nn.initializer.Assign(
                self.group_norm.bias))
        self.group_norm.weight = fp32_weight
        self.group_norm.bias = fp32_bias

    def forward(self, input):
        output = self.group_norm(input.astype('float32'))
        return output.astype(input.dtype)


class StrEnumMeta(EnumMeta):
    # this is workaround for submitit pickling leading to instance checks failing in hydra for StrEnum, see
    # https://github.com/facebookresearch/hydra/issues/1156
    @classmethod
    def __instancecheck__(cls, other):
        return "enum" in str(type(other))


class StrEnum(Enum, metaclass=StrEnumMeta):
    def __str__(self):
        return self.value

    def __eq__(self, other: str):
        return self.value == other

    def __repr__(self):
        return self.value

    def __hash__(self):
        return hash(str(self))


def ChoiceEnum(choices: List[str]):
    """return the Enum class used to enforce list of choices"""
    return StrEnum("Choices", {k: k for k in choices})


def relu_squared(x: paddle.Tensor):
    return F.relu(x).pow(2)


def get_activation_fn(activation: str) -> Callable:
    """Returns the activation function corresponding to `activation`"""

    def gelu_accurate(x):
        if not hasattr(gelu_accurate, "_a"):
            gelu_accurate._a = math.sqrt(2 / math.pi)
        return (0.5 * x * (1 + paddle.tanh(gelu_accurate._a *
                                           (x + 0.044715 * paddle.pow(x, 3)))))

    def gelu(x: paddle.Tensor) -> paddle.Tensor:
        return paddle.nn.functional.gelu(x.astype('float32')).astype(x.dtype)

    if activation == "relu":
        return F.relu
    elif activation == "relu_squared":
        return relu_squared
    elif activation == "gelu":
        return gelu
    elif activation == "gelu_fast":
        return gelu_accurate
    elif activation == "gelu_accurate":
        return gelu_accurate
    elif activation == "tanh":
        return paddle.tanh
    elif activation == "linear":
        return lambda x: x
    elif activation == "swish":
        return paddle.nn.Swish
    else:
        raise RuntimeError(
            "--activation-fn {} not supported".format(activation))


def get_available_activation_fns() -> List:
    return [
        "relu",
        "gelu",
        "gelu_fast",  # deprecated
        "gelu_accurate",
        "tanh",
        "linear",
    ]


def compute_mask_indices(
        shape: Tuple[int, int],
        padding_mask: Optional[paddle.Tensor],
        mask_prob: float,
        mask_length: int,
        mask_type: str="static",
        mask_other: float=0.0,
        min_masks: int=0,
        no_overlap: bool=False,
        min_space: int=0,
        require_same_masks: bool=True,
        mask_dropout: float=0.0, ) -> np.ndarray:
    """
    Computes random mask spans for a given shape

    Args:
        shape: the the shape for which to compute masks.
            should be of size 2 where first element is batch size and 2nd is timesteps
        padding_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
        mask_prob: probability for each token to be chosen as start of the span to be masked. this will be multiplied by
            number of timesteps divided by length of mask span to mask approximately this percentage of all elements.
            however due to overlaps, the actual number will be smaller (unless no_overlap is True)
        mask_type: how to compute mask lengths
            static = fixed size
            uniform = sample from uniform distribution [mask_other, mask_length*2]
            normal = sample from normal distribution with mean mask_length and stdev mask_other. mask is min 1 element
            poisson = sample from possion distribution with lambda = mask length
        min_masks: minimum number of masked spans
        no_overlap: if false, will switch to an alternative recursive algorithm that prevents spans from overlapping
        min_space: only used if no_overlap is True, this is how many elements to keep unmasked between spans
        require_same_masks: if true, will randomly drop out masks until same amount of masks remains in each sample
        mask_dropout: randomly dropout this percentage of masks in each example
    """

    bsz, all_sz = shape
    mask = np.full((bsz, all_sz), False)

    all_num_mask = int(
        # add a random number for probabilistic rounding
        mask_prob * all_sz / float(mask_length) + np.random.rand())

    all_num_mask = max(min_masks, all_num_mask)

    mask_idcs = []
    for i in range(bsz):
        if padding_mask is not None:
            sz = all_sz - padding_mask[i].long().sum().item()
            num_mask = int(
                # add a random number for probabilistic rounding
                mask_prob * sz / float(mask_length) + np.random.rand())
            num_mask = max(min_masks, num_mask)
        else:
            sz = all_sz
            num_mask = all_num_mask

        if mask_type == "static":
            lengths = np.full(num_mask, mask_length)
        elif mask_type == "uniform":
            lengths = np.random.randint(
                mask_other, mask_length * 2 + 1, size=num_mask)
        elif mask_type == "normal":
            lengths = np.random.normal(mask_length, mask_other, size=num_mask)
            lengths = [max(1, int(round(x))) for x in lengths]
        elif mask_type == "poisson":
            lengths = np.random.poisson(mask_length, size=num_mask)
            lengths = [int(round(x)) for x in lengths]
        else:
            raise Exception("unknown mask selection " + mask_type)

        if sum(lengths) == 0:
            lengths[0] = min(mask_length, sz - 1)

        if no_overlap:
            mask_idc = []

            def arrange(s, e, length, keep_length):
                span_start = np.random.randint(s, e - length)
                mask_idc.extend(span_start + i for i in range(length))

                new_parts = []
                if span_start - s - min_space >= keep_length:
                    new_parts.append((s, span_start - min_space + 1))
                if e - span_start - length - min_space > keep_length:
                    new_parts.append((span_start + length + min_space, e))
                return new_parts

            parts = [(0, sz)]
            min_length = min(lengths)
            for length in sorted(lengths, reverse=True):
                lens = np.fromiter(
                    (e - s if e - s >= length + min_space else 0
                     for s, e in parts),
                    np.int_, )
                l_sum = np.sum(lens)
                if l_sum == 0:
                    break
                probs = lens / np.sum(lens)
                c = np.random.choice(len(parts), p=probs)
                s, e = parts.pop(c)
                parts.extend(arrange(s, e, length, min_length))
            mask_idc = np.asarray(mask_idc)
        else:
            min_len = min(lengths)
            if sz - min_len <= num_mask:
                min_len = sz - num_mask - 1

            mask_idc = np.random.choice(sz - min_len, num_mask, replace=False)

            mask_idc = np.asarray([
                mask_idc[j] + offset
                for j in range(len(mask_idc)) for offset in range(lengths[j])
            ])

        mask_idcs.append(np.unique(mask_idc[mask_idc < sz]))

    min_len = min([len(m) for m in mask_idcs])
    for i, mask_idc in enumerate(mask_idcs):
        if len(mask_idc) > min_len and require_same_masks:
            mask_idc = np.random.choice(mask_idc, min_len, replace=False)
        if mask_dropout > 0:
            num_holes = np.rint(len(mask_idc) * mask_dropout).astype(int)
            mask_idc = np.random.choice(
                mask_idc, len(mask_idc) - num_holes, replace=False)

        mask[i, mask_idc] = True

    return mask


def index_put(tensor, indices, value):
    tensor[indices] = value
    return tensor


# ToDo if faster?
def buffered_arange(max):
    if not hasattr(buffered_arange, "buf"):
        buffered_arange.buf = paddle.empty([max], dtype='int64')
    if max > buffered_arange.buf.numel():
        buffered_arange.buf = paddle.arange(max)
    return buffered_arange.buf[:max]


def pad_to_multiple(x, multiple, dim=-1, value=0):
    # Inspired from https://github.com/lucidrains/local-attention/blob/master/local_attention/local_attention.py#L41
    if x is None:
        return None, 0
    tsz = x.shape[dim]
    m = tsz / multiple
    remainder = math.ceil(m) * multiple - tsz
    if m.is_integer():
        return x, 0
    pad_offset = (0, ) * (-1 - dim) * 2
    return F.pad(
        x,
        pad=[*pad_offset, 0, remainder, *pad_offset],
        value=value,
        data_format='NLC'), remainder


EXTRACTOR_MODE_CHOICES = ChoiceEnum(["default", "layer_norm"])
MASKING_DISTRIBUTION_CHOICES = ChoiceEnum(
    ["static", "uniform", "normal", "poisson"])
LAYER_TYPE_CHOICES = ChoiceEnum(["transformer"])  # ToDo: conformer 


@dataclass
class Wav2Vec2Config:
    extractor_mode: EXTRACTOR_MODE_CHOICES = field(
        default="default",
        metadata={
            "help":
            "mode for feature extractor. default has a single group norm with d "
            "groups in the first conv block, whereas layer_norm has layer norms in "
            "every block (meant to use with normalize=True)"
        }, )
    encoder_layers: int = field(
        default=12, metadata={"help": "num encoder layers in the transformer"})
    encoder_embed_dim: int = field(
        default=768, metadata={"help": "encoder embedding dimension"})
    encoder_ffn_embed_dim: int = field(
        default=3072, metadata={"help": "encoder embedding dimension for FFN"})
    encoder_attention_heads: int = field(
        default=12, metadata={"help": "num encoder attention heads"})
    activation_fn: ChoiceEnum(get_available_activation_fns()) = field(
        default="gelu", metadata={"help": "activation function to use"})
    layer_type: LAYER_TYPE_CHOICES = field(
        default="transformer", metadata={"help": "layer type in encoder"})
    # dropouts
    dropout: float = field(
        default=0.1,
        metadata={"help": "dropout probability for the transformer"})
    attention_dropout: float = field(
        default=0.1,
        metadata={"help": "dropout probability for attention weights"})
    activation_dropout: float = field(
        default=0.0,
        metadata={"help": "dropout probability after activation in FFN"})
    encoder_layerdrop: float = field(
        default=0.0,
        metadata={"help": "probability of dropping a tarnsformer layer"})
    dropout_input: float = field(
        default=0.0,
        metadata={"help": "dropout to apply to the input (after feat extr)"}, )
    dropout_features: float = field(
        default=0.0,
        metadata={"help": "dropout to apply to the features (after feat extr)"},
    )

    final_dim: int = field(
        default=0,
        metadata={
            "help":
            "project final representations and targets to this many dimensions."
            "set to encoder_embed_dim is <= 0"
        }, )
    layer_norm_first: bool = field(
        default=False,
        metadata={"help": "apply layernorm first in the transformer"})
    conv_feature_layers: str = field(
        default="[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]",
        metadata={
            "help":
            "string describing convolutional feature extraction layers in form of a python list that contains "
            "[(dim, kernel_size, stride), ...]"
        }, )
    conv_bias: bool = field(
        default=False, metadata={"help": "include bias in conv encoder"})
    logit_temp: float = field(
        default=0.1, metadata={"help": "temperature to divide logits by"})
    quantize_targets: bool = field(
        default=False, metadata={"help": "use quantized targets"})
    quantize_input: bool = field(
        default=False, metadata={"help": "use quantized inputs"})
    same_quantizer: bool = field(
        default=False,
        metadata={"help": "use same quantizer for inputs and targets"})
    target_glu: bool = field(
        default=False, metadata={"help": "adds projection + glu to targets"})
    feature_grad_mult: float = field(
        default=1.0,
        metadata={"help": "multiply feature extractor var grads by this"})
    quantizer_depth: int = field(
        default=1,
        metadata={"help": "number of quantizer layers"}, )
    quantizer_factor: int = field(
        default=3,
        metadata={
            "help":
            "dimensionality increase for inner quantizer layers (if depth > 1)"
        }, )
    latent_vars: int = field(
        default=320,
        metadata={
            "help": "number of latent variables V in each group of the codebook"
        }, )
    latent_groups: int = field(
        default=2,
        metadata={
            "help": "number of groups G of latent variables in the codebook"
        }, )
    latent_dim: int = field(
        default=0,
        metadata={
            "help":
            "if > 0, uses this dimensionality for latent variables. "
            "otherwise uses final_dim / latent_groups"
        }, )

    # masking
    mask_length: int = field(default=10, metadata={"help": "mask length"})
    mask_prob: float = field(
        default=0.65,
        metadata={"help": "probability of replacing a token with mask"})
    mask_selection: MASKING_DISTRIBUTION_CHOICES = field(
        default="static", metadata={"help": "how to choose mask length"})
    mask_other: float = field(
        default=0,
        metadata={
            "help":
            "secondary mask argument (used for more complex distributions), "
            "see help in compute_mask_indices"
        }, )
    no_mask_overlap: bool = field(
        default=False, metadata={"help": "whether to allow masks to overlap"})
    mask_min_space: int = field(
        default=1,
        metadata={"help": "min space between spans (if no overlap is enabled)"},
    )
    require_same_masks: bool = field(
        default=True,
        metadata={
            "help":
            "whether to number of masked timesteps must be the same across all "
            "examples in a batch"
        }, )
    mask_dropout: float = field(
        default=0.0,
        metadata={"help": "percent of masks to unmask for each sample"}, )

    # channel masking
    mask_channel_length: int = field(
        default=10,
        metadata={"help": "length of the mask for features (channels)"})
    mask_channel_prob: float = field(
        default=0.0,
        metadata={"help": "probability of replacing a feature with 0"})
    mask_channel_before: bool = False
    mask_channel_selection: MASKING_DISTRIBUTION_CHOICES = field(
        default="static",
        metadata={"help": "how to choose mask length for channel masking"}, )
    mask_channel_other: float = field(
        default=0,
        metadata={
            "help":
            "secondary mask argument (used for more complex distributions), "
            "see help in compute_mask_indicesh"
        }, )
    no_mask_channel_overlap: bool = field(
        default=False,
        metadata={"help": "whether to allow channel masks to overlap"})
    mask_channel_min_space: int = field(
        default=1,
        metadata={"help": "min space between spans (if no overlap is enabled)"},
    )

    # negative selection
    num_negatives: int = field(
        default=100,
        metadata={"help": "number of negative examples from the same sample"}, )
    negatives_from_everywhere: bool = field(
        default=False,
        metadata={
            "help": "sample negatives from everywhere, not just masked states"
        }, )
    cross_sample_negatives: int = field(
        default=0,
        metadata={"help": "number of negative examples from the any sample"})
    codebook_negatives: int = field(
        default=0, metadata={"help": "number of negative examples codebook"})

    # positional embeddings
    conv_pos: int = field(
        default=128,
        metadata={
            "help": "number of filters for convolutional positional embeddings"
        }, )
    conv_pos_groups: int = field(
        default=16,
        metadata={
            "help": "number of groups for convolutional positional embedding"
        }, )
    pos_conv_depth: int = field(
        default=1,
        metadata={"help": "depth of positional encoder network"}, )

    latent_temp: Tuple[float, float, float] = field(
        default=(2, 0.5, 0.999995),
        metadata={
            "help":
            "temperature for latent variable sampling. "
            "can be tuple of 3 values (start, end, decay)"
        }, )
    max_positions: int = field(
        default=100000, metadata={"help": "Max positions"})
    checkpoint_activations: bool = field(
        default=False,
        metadata={
            "help": "recompute activations and save memory for extra compute"
        }, )

    # FP16 optimization
    required_seq_len_multiple: int = field(
        default=2,
        metadata={
            "help":
            "pad the input to encoder such that the sequence length is divisible by multiple"
        }, )
    crop_seq_to_multiple: int = field(
        default=1,
        metadata={
            "help":
            "crop convolutional feature extractor output such that the sequence length is divisible by multiple"
        }, )

    # Conformer
    depthwise_conv_kernel_size: int = field(
        default=31,
        metadata={
            "help":
            "depthwise-conv-kernel-size for convolution in conformer layer"
        }, )
    attn_type: str = field(
        default="",
        metadata={"help": "if espnet use ESPNET MHA"}, )
    pos_enc_type: str = field(
        default="abs",
        metadata={"help": "Positional encoding type to use in conformer"}, )
    fp16: bool = field(
        default=False, metadata={"help": "If fp16 is being used"})


class Wav2Vec2Model(nn.Layer):
    def __init__(self, cfg: Wav2Vec2Config):
        super().__init__()
        self.cfg = cfg

        feature_enc_layers = eval(cfg.conv_feature_layers)
        self.embed = feature_enc_layers[-1][0]

        self.feature_extractor = ConvFeatureExtractionModel(
            conv_layers=feature_enc_layers,
            dropout=0.0,
            mode=cfg.extractor_mode,
            conv_bias=cfg.conv_bias, )

        self.post_extract_proj = (Linear(self.embed, cfg.encoder_embed_dim)
                                  if self.embed != cfg.encoder_embed_dim and
                                  not cfg.quantize_input else None)

        self.crop_seq_to_multiple = cfg.crop_seq_to_multiple

        self.mask_prob = cfg.mask_prob
        self.mask_selection = cfg.mask_selection
        self.mask_other = cfg.mask_other
        self.mask_length = cfg.mask_length
        self.no_mask_overlap = cfg.no_mask_overlap
        self.mask_min_space = cfg.mask_min_space

        self.mask_channel_prob = cfg.mask_channel_prob
        self.mask_channel_before = cfg.mask_channel_before
        self.mask_channel_selection = cfg.mask_channel_selection
        self.mask_channel_other = cfg.mask_channel_other
        self.mask_channel_length = cfg.mask_channel_length
        self.no_mask_channel_overlap = cfg.no_mask_channel_overlap
        self.mask_channel_min_space = cfg.mask_channel_min_space

        self.dropout_input = nn.Dropout(cfg.dropout_input)
        self.dropout_features = nn.Dropout(cfg.dropout_features)

        self.feature_grad_mult = cfg.feature_grad_mult

        self.quantizer = None
        self.input_quantizer = None

        self.n_negatives = cfg.num_negatives
        self.cross_sample_negatives = cfg.cross_sample_negatives
        self.codebook_negatives = cfg.codebook_negatives
        self.negatives_from_everywhere = cfg.negatives_from_everywhere

        self.logit_temp = cfg.logit_temp

        final_dim = cfg.final_dim if cfg.final_dim > 0 else cfg.encoder_embed_dim

        if cfg.quantize_targets:
            vq_dim = cfg.latent_dim if cfg.latent_dim > 0 else final_dim
            self.quantizer = GumbelVectorQuantizer(
                dim=self.embed,
                num_vars=cfg.latent_vars,
                temp=cfg.latent_temp,
                groups=cfg.latent_groups,
                combine_groups=False,
                vq_dim=vq_dim,
                time_first=True,
                weight_proj_depth=cfg.quantizer_depth,
                weight_proj_factor=cfg.quantizer_factor, )
            self.project_q = Linear(vq_dim, final_dim)
        else:
            self.project_q = Linear(self.embed, final_dim)

        if cfg.quantize_input:
            if cfg.same_quantizer and self.quantizer is not None:
                vq_dim = final_dim
                self.input_quantizer = self.quantizer
            else:
                vq_dim = cfg.latent_dim if cfg.latent_dim > 0 else cfg.encoder_embed_dim
                self.input_quantizer = GumbelVectorQuantizer(
                    dim=self.embed,
                    num_vars=cfg.latent_vars,
                    temp=cfg.latent_temp,
                    groups=cfg.latent_groups,
                    combine_groups=False,
                    vq_dim=vq_dim,
                    time_first=True,
                    weight_proj_depth=cfg.quantizer_depth,
                    weight_proj_factor=cfg.quantizer_factor, )
            self.project_inp = Linear(vq_dim, cfg.encoder_embed_dim)

        self.mask_emb = self.create_parameter(
            shape=[cfg.encoder_embed_dim],
            default_initializer=paddle.nn.initializer.Uniform(),
            dtype='float32', )

        encoder_cls = TransformerEncoder

        self.encoder = encoder_cls(cfg)
        self.layer_norm = LayerNorm(self.embed)

        self.target_glu = None
        if cfg.target_glu:
            self.target_glu = nn.Sequential(
                Linear(final_dim, final_dim * 2), GLU())

        self.final_proj = Linear(cfg.encoder_embed_dim, final_dim)

    def upgrade_state_dict_named(self, state_dict, name):
        super().upgrade_state_dict_named(state_dict, name)
        """Upgrade a (possibly old) state dict for new versions of fairseq."""
        return state_dict

    @classmethod
    def build_model(cls, cfg: Wav2Vec2Config, task=None):
        """Build a new model instance."""
        return cls(cfg)

    def apply_mask(
            self,
            x,
            padding_mask,
            mask_indices=None,
            mask_channel_indices=None, ):
        B, T, C = x.shape

        if self.mask_channel_prob > 0 and self.mask_channel_before:
            mask_channel_indices = compute_mask_indices(
                (B, C),
                None,
                self.mask_channel_prob,
                self.mask_channel_length,
                self.mask_channel_selection,
                self.mask_channel_other,
                no_overlap=self.no_mask_channel_overlap,
                min_space=self.mask_channel_min_space, )
            mask_channel_indices = (
                paddle.to_tensor(mask_channel_indices, plcae=x.plcae)
                .unsqueeze(1).expand([-1, T, -1]))
            x[mask_channel_indices] = 0

        if self.mask_prob > 0:
            if mask_indices is None:
                mask_indices = compute_mask_indices(
                    (B, T),
                    padding_mask,
                    self.mask_prob,
                    self.mask_length,
                    self.mask_selection,
                    self.mask_other,
                    min_masks=2,
                    no_overlap=self.no_mask_overlap,
                    min_space=self.mask_min_space,
                    require_same_masks=self.cfg.require_same_masks,
                    mask_dropout=self.cfg.mask_dropout, )
                mask_indices = paddle.to_tensor(mask_indices, place=x.place)
            x = index_put(x, mask_indices, self.mask_emb)
        else:
            mask_indices = None

        if self.mask_channel_prob > 0 and not self.mask_channel_before:
            if mask_channel_indices is None:
                mask_channel_indices = compute_mask_indices(
                    (B, C),
                    None,
                    self.mask_channel_prob,
                    self.mask_channel_length,
                    self.mask_channel_selection,
                    self.mask_channel_other,
                    no_overlap=self.no_mask_channel_overlap,
                    min_space=self.mask_channel_min_space, )
                mask_channel_indices = (
                    paddle.to_tensor(mask_channel_indices, place=x.place)
                    .unsqueeze(1).expand([-1, T, -1]))
            x = index_put(x, mask_channel_indices, 0)

        return x, mask_indices

    def sample_negatives(self, y, num, padding_count=None):

        if self.n_negatives == 0 and self.cross_sample_negatives == 0:
            return paddle.empty([0], dtype=y.dtype)

        bsz, tsz, fsz = y.shape
        y = y.reshape([-1, fsz])  # BTC => (BxT)C

        # FIXME: what happens if padding_count is specified?
        cross_high = tsz * bsz
        high = tsz - (padding_count or 0)
        with paddle.no_grad():
            assert high > 1, f"{bsz,tsz,fsz}"

            if self.n_negatives > 0:
                tszs = (buffered_arange(num).unsqueeze(-1)
                        .expand([-1, self.n_negatives]).flatten())

                neg_idxs = paddle.randint(
                    low=0, high=high - 1, shape=[bsz, self.n_negatives * num])
                neg_idxs[neg_idxs >= tszs] += 1

            if self.cross_sample_negatives > 0:
                tszs = (buffered_arange(num).unsqueeze(-1)
                        .expand([-1, self.cross_sample_negatives]).flatten())

                cross_neg_idxs = paddle.randint(
                    low=0,
                    high=cross_high - 1,
                    shape=[bsz, self.cross_sample_negatives * num], )
                cross_neg_idxs[cross_neg_idxs >= tszs] += 1

        if self.n_negatives > 0:
            neg_idxs = neg_idxs + (paddle.arange(bsz).unsqueeze(1) * high)
        else:
            neg_idxs = cross_neg_idxs

        if self.cross_sample_negatives > 0 and self.n_negatives > 0:
            neg_idxs = paddle.concat([neg_idxs, cross_neg_idxs], axis=1)

        negs = y[neg_idxs.reshape([-1])]
        negs = negs.reshape(
            [bsz, num, self.n_negatives + self.cross_sample_negatives,
             fsz]).transpose([2, 0, 1, 3])  # to NxBxTxC
        return negs, neg_idxs

    def compute_preds(self, x, y, negatives):
        neg_is_pos = (y == negatives).all(-1)
        y = y.unsqueeze(0)
        targets = paddle.concat([y, negatives], axis=0)

        logits = paddle.nn.functional.cosine_similarity(x, targets, axis=-1)
        logits = logits / self.logit_temp
        logits = logits.astype(x.dtype)

        return logits

    def _get_feat_extract_output_lengths(self, input_lengths: paddle.Tensor):
        """
        Computes the output length of the convolutional layers
        """

        def _conv_out_length(input_length, kernel_size, stride):
            return paddle.floor((input_length - kernel_size) / stride + 1)

        conv_cfg_list = eval(self.cfg.conv_feature_layers)

        for i in range(len(conv_cfg_list)):
            input_lengths = _conv_out_length(input_lengths, conv_cfg_list[i][1],
                                             conv_cfg_list[i][2])

        return paddle.cast(input_lengths, 'int64')

    def forward(
            self,
            source,
            padding_mask=None,
            mask=True,
            features_only=False,
            layer=None,
            mask_indices=None,
            mask_channel_indices=None,
            padding_count=None, ):

        if self.feature_grad_mult > 0:
            features = self.feature_extractor(source)
            if self.feature_grad_mult != 1.0:
                features = GradMultiply.apply(features, self.feature_grad_mult)
        else:
            with paddle.no_grad():
                features = self.feature_extractor(source)

        features_pen = features.pow(2).mean()

        features = features.transpose([0, 2, 1])
        features = self.layer_norm(features)
        unmasked_features = features.clone()

        if padding_mask is not None and padding_mask.any():
            input_lengths = (1 - paddle.cast(padding_mask, 'int64')).sum(-1)
            # apply conv formula to get real output_lengths
            output_lengths = self._get_feat_extract_output_lengths(
                input_lengths)

            padding_mask = paddle.zeros(
                features.shape[:2], dtype=features.dtype)

            # these two operations makes sure that all values
            # before the output lengths indices are attended to
            padding_mask[(paddle.arange(padding_mask.shape[0]),
                          output_lengths - 1, )] = 1
            padding_mask = paddle.cast(
                (1 - padding_mask.flip([-1]).cumsum(-1).flip([-1])), 'bool')
        else:
            padding_mask = None

        time_steps_to_drop = features.shape[1] % self.crop_seq_to_multiple
        if time_steps_to_drop != 0:
            features = features[:, :-time_steps_to_drop]
            unmasked_features = unmasked_features[:, :-time_steps_to_drop]
            if padding_mask is not None:
                padding_mask = padding_mask[:, :-time_steps_to_drop]

        if self.post_extract_proj is not None:
            features = self.post_extract_proj(features)

        features = self.dropout_input(features)
        unmasked_features = self.dropout_features(unmasked_features)

        num_vars = None
        code_ppl = None
        prob_ppl = None
        curr_temp = None

        if self.input_quantizer:
            q = self.input_quantizer(features, produce_targets=False)
            features = q["x"]
            num_vars = q["num_vars"]
            code_ppl = q["code_perplexity"]
            prob_ppl = q["prob_perplexity"]
            curr_temp = q["temp"]
            features = self.project_inp(features)

        if mask:
            x, mask_indices = self.apply_mask(
                features,
                padding_mask,
                mask_indices=mask_indices,
                mask_channel_indices=mask_channel_indices, )
            if mask_indices is not None:
                y = unmasked_features[mask_indices].reshape([
                    unmasked_features.shape[0], -1, unmasked_features.shape[-1]
                ])
        else:
            x = features
            y = unmasked_features
            mask_indices = None

        x, layer_results = self.encoder(
            x, padding_mask=padding_mask, layer=layer)

        if features_only:
            return {
                "x": x,
                "padding_mask": padding_mask,
                "features": unmasked_features,
                "layer_results": layer_results,
            }

        if self.quantizer:
            if self.negatives_from_everywhere:
                q = self.quantizer(unmasked_features, produce_targets=False)
                y = q["x"]
                num_vars = q["num_vars"]
                code_ppl = q["code_perplexity"]
                prob_ppl = q["prob_perplexity"]
                curr_temp = q["temp"]
                y = self.project_q(y)

                negs, _ = self.sample_negatives(
                    y,
                    mask_indices[0].sum(),
                    padding_count=padding_count, )
                y = y[mask_indices].reshape([y.shape[0], -1, y.shape[-1]])

            else:
                q = self.quantizer(y, produce_targets=False)
                y = q["x"]
                num_vars = q["num_vars"]
                code_ppl = q["code_perplexity"]
                prob_ppl = q["prob_perplexity"]
                curr_temp = q["temp"]

                y = self.project_q(y)

                negs, _ = self.sample_negatives(
                    y,
                    y.shape[1],
                    padding_count=padding_count, )

            if self.codebook_negatives > 0:
                cb_negs = self.quantizer.sample_from_codebook(
                    y.shape[0] * y.shape[1], self.codebook_negatives)
                cb_negs = cb_negs.reshape(
                    [self.codebook_negatives, y.shape[0], y.shape[1],
                     -1])  # order doesnt matter
                cb_negs = self.project_q(cb_negs)
                negs = paddle.concat([negs, cb_negs], axis=0)
        else:
            y = self.project_q(y)

            if self.negatives_from_everywhere:
                negs, _ = self.sample_negatives(
                    unmasked_features,
                    y.shape[1],
                    padding_count=padding_count, )
                negs = self.project_q(negs)
            else:
                negs, _ = self.sample_negatives(
                    y,
                    y.shape[1],
                    padding_count=padding_count, )

        x = x[mask_indices].reshape([x.shape[0], -1, x.shape[-1]])

        if self.target_glu:
            y = self.target_glu(y)
            negs = self.target_glu(negs)

        x = self.final_proj(x)
        x = self.compute_preds(x, y, negs)

        result = {
            "x": x,
            "padding_mask": padding_mask,
            "features_pen": features_pen,
        }

        if prob_ppl is not None:
            result["prob_perplexity"] = prob_ppl
            result["code_perplexity"] = code_ppl
            result["num_vars"] = num_vars
            result["temp"] = curr_temp

        return result

    def quantize(self, x):
        assert self.quantizer is not None
        x = self.feature_extractor(x)
        x = x.transpose([0, 2, 1])
        x = self.layer_norm(x)
        return self.quantizer.forward_idx(x)

    def extract_features(self, source, padding_mask, mask=False, layer=None):
        res = self.forward(
            source, padding_mask, mask=mask, features_only=True, layer=layer)
        return res

    def get_logits(self, net_output):
        logits = net_output["x"]
        logits = logits.transpose([2, 1, 0])
        logits = logits.reshape([-1, logits.shape[-1]])
        return logits

    def get_targets(self, sample, net_output, expand_steps=True):
        x = net_output["x"]
        return paddle.zeros(x.shape[1] * x.shape[2], dtype='int64')

    def get_extra_losses(self, net_output):
        pen = []

        if "prob_perplexity" in net_output:
            pen.append((net_output["num_vars"] - net_output["prob_perplexity"])
                       / net_output["num_vars"])

        if "features_pen" in net_output:
            pen.append(net_output["features_pen"])

        return pen

    def remove_pretraining_modules(self, last_layer=None):
        self.quantizer = None
        self.project_q = None
        self.target_glu = None
        self.final_proj = None

        if last_layer is not None:
            self.encoder.layers = nn.LayerList(
                l for i, l in enumerate(self.encoder.layers) if i <= last_layer)


class ConvFeatureExtractionModel(nn.Layer):
    def __init__(
            self,
            conv_layers: List[Tuple[int, int, int]],
            dropout: float=0.0,
            mode: str="default",
            conv_bias: bool=False, ):
        super().__init__()

        assert mode in {"default", "layer_norm"}

        def block(
                n_in,
                n_out,
                k,
                stride,
                is_layer_norm=False,
                is_group_norm=False,
                conv_bias=False, ):
            def make_conv():
                conv = Conv1D(
                    n_in,
                    n_out,
                    k,
                    stride=stride,
                    bias_attr=conv_bias
                    if not conv_bias else paddle.ParamAttr())
                # nn.initializer.KaimingNormal()(conv.weight)
                return conv

            assert (is_layer_norm and is_group_norm
                    ) is False, "layer norm and group norm are exclusive"

            if is_layer_norm:
                return nn.Sequential(
                    make_conv(),
                    nn.Dropout(p=dropout),
                    nn.Sequential(
                        TransposeLast(),
                        Fp32LayerNorm(dim),
                        TransposeLast(), ),
                    nn.GELU(), )
            elif is_group_norm:
                return nn.Sequential(
                    make_conv(),
                    nn.Dropout(p=dropout),
                    Fp32GroupNorm(dim, dim),
                    nn.GELU(), )
            else:
                return nn.Sequential(
                    make_conv(), nn.Dropout(p=dropout), nn.GELU())

        in_d = 1
        self.conv_layers = nn.LayerList()
        for i, cl in enumerate(conv_layers):
            assert len(cl) == 3, "invalid conv definition: " + str(cl)
            (dim, k, stride) = cl

            self.conv_layers.append(
                block(
                    in_d,
                    dim,
                    k,
                    stride,
                    is_layer_norm=mode == "layer_norm",
                    is_group_norm=mode == "default" and i == 0,
                    conv_bias=conv_bias, ))
            in_d = dim

    def forward(self, x):

        # BxT -> BxCxT
        x = x.unsqueeze(1)
        for conv in self.conv_layers:
            x = conv(x)

        return x


def make_conv_pos(e, k, g):
    dropout = 0
    std = math.sqrt((4 * (1.0 - dropout)) / (k * e))
    pos_conv = Conv1D(
        e,
        e,
        kernel_size=k,
        padding=k // 2,
        groups=g,
        weight_attr=nn.initializer.Normal(mean=0, std=std),
        bias_attr=nn.initializer.Constant(0))
    pos_conv = nn.utils.weight_norm(pos_conv, name="weight", dim=2)
    pos_conv = nn.Sequential(pos_conv, SamePad(k), nn.GELU())

    return pos_conv


class TransformerEncoder(nn.Layer):
    def build_encoder_layer(self, args: Wav2Vec2Config):
        layer = TransformerSentenceEncoderLayer(
            embedding_dim=self.embedding_dim,
            ffn_embedding_dim=args.encoder_ffn_embed_dim,
            num_attention_heads=args.encoder_attention_heads,
            dropout=self.dropout,
            attention_dropout=args.attention_dropout,
            activation_dropout=args.activation_dropout,
            activation_fn=args.activation_fn,
            layer_norm_first=args.layer_norm_first, )
        return layer

    def __init__(self, args: Wav2Vec2Config):
        super().__init__()

        self.dropout = args.dropout
        self.embedding_dim = args.encoder_embed_dim
        self.required_seq_len_multiple = args.required_seq_len_multiple

        pos_conv_depth = getattr(args, "pos_conv_depth", 1)
        if pos_conv_depth > 1:
            num_layers = args.pos_conv_depth
            k = max(3, args.conv_pos // num_layers)

            def make_conv_block(e, k, g, l):
                return nn.Sequential(*[
                    nn.Sequential(
                        Conv1D(
                            e,
                            e,
                            kernel_size=k,
                            padding=k // 2,
                            groups=g, ),
                        SamePad(k),
                        TransposeLast(),
                        LayerNorm(e, elementwise_affine=False),
                        TransposeLast(),
                        nn.GELU(), ) for _ in range(l)
                ])

            self.pos_conv = make_conv_block(self.embedding_dim, k,
                                            args.conv_pos_groups, num_layers)

        else:
            self.pos_conv = make_conv_pos(
                self.embedding_dim,
                args.conv_pos,
                args.conv_pos_groups, )

        self.layers = nn.LayerList([
            self.build_encoder_layer(args) for _ in range(args.encoder_layers)
        ])
        self.layer_norm_first = args.layer_norm_first
        self.layer_norm = LayerNorm(self.embedding_dim)
        self.layerdrop = args.encoder_layerdrop

    def forward(self, x, padding_mask=None, layer=None):
        x, layer_results = self.extract_features(x, padding_mask, layer)
        if self.layer_norm_first and layer is None:
            x = self.layer_norm(x)

        return x, layer_results

    def extract_features(
            self,
            x,
            padding_mask=None,
            tgt_layer=None,
            min_layer=0, ):
        if padding_mask is not None:
            x = index_put(x, padding_mask, 0)

        x_conv = self.pos_conv(x.transpose([0, 2, 1]))
        x_conv = x_conv.transpose([0, 2, 1])
        x = x + x_conv

        if not self.layer_norm_first:
            x = self.layer_norm(x)

        # pad to the sequence length dimension
        x, pad_length = pad_to_multiple(
            x, self.required_seq_len_multiple, dim=-2, value=0)
        if pad_length > 0 and padding_mask is None:
            padding_mask = paddle.zeros([x.shape[0], x.shape[1]], dtype='bool')
            padding_mask[:, -pad_length:] = True
        else:
            padding_mask, _ = pad_to_multiple(
                padding_mask,
                self.required_seq_len_multiple,
                dim=-1,
                value=True)
        x = F.dropout(x, p=self.dropout, training=self.training)

        # B x T x C -> T x B x C
        x = x.transpose([1, 0, 2])

        layer_results = []
        r = None
        for i, layer in enumerate(self.layers):
            dropout_probability = np.random.random() if self.layerdrop > 0 else 1
            if not self.training or (dropout_probability > self.layerdrop):
                x, (z, lr) = layer(
                    x, self_attn_padding_mask=padding_mask, need_weights=False)
                if i >= min_layer:
                    layer_results.append((x, z, lr))
            if i == tgt_layer:
                r = x
                break

        if r is not None:
            x = r

        # T x B x C -> B x T x C
        x = x.transpose([1, 0, 2])

        # undo paddding
        if pad_length > 0:
            x = x[:, :-pad_length]

            def undo_pad(a, b, c):
                return (a[:-pad_length], b[:-pad_length]
                        if b is not None else b, c[:-pad_length], )

            layer_results = [undo_pad(*u) for u in layer_results]

        return x, layer_results

    def max_positions(self):
        """Maximum output length supported by the encoder."""
        return self.args.max_positions

    def upgrade_state_dict_named(self, state_dict, name):
        """Upgrade a (possibly old) state dict for new versions of fairseq."""
        return state_dict


class TransformerSentenceEncoderLayer(nn.Layer):
    """
    Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained
    models.
    """

    def __init__(
            self,
            embedding_dim: float=768,
            ffn_embedding_dim: float=3072,
            num_attention_heads: int=8,
            dropout: float=0.1,
            attention_dropout: float=0.1,
            activation_dropout: float=0.1,
            activation_fn: str="relu",
            layer_norm_first: bool=False, ) -> None:

        super().__init__()
        # Initialize parameters
        self.embedding_dim = embedding_dim
        self.dropout = dropout
        self.activation_dropout = activation_dropout

        # Initialize blocks
        self.activation_fn = get_activation_fn(activation_fn)
        self.self_attn = MultiheadAttention(
            self.embedding_dim,
            num_attention_heads,
            dropout=attention_dropout,
            self_attention=True, )

        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(self.activation_dropout)
        self.dropout3 = nn.Dropout(dropout)

        self.layer_norm_first = layer_norm_first

        # layer norm associated with the self attention layer
        self.self_attn_layer_norm = LayerNorm(self.embedding_dim)
        self.fc1 = Linear(self.embedding_dim, ffn_embedding_dim)
        self.fc2 = Linear(ffn_embedding_dim, self.embedding_dim)

        # layer norm associated with the position wise feed-forward NN
        self.final_layer_norm = LayerNorm(self.embedding_dim)

    def forward(
            self,
            x: paddle.Tensor,
            self_attn_mask: paddle.Tensor=None,
            self_attn_padding_mask: paddle.Tensor=None,
            need_weights: bool=False,
            att_args=None, ):
        """
        LayerNorm is applied either before or after the self-attention/ffn
        modules similar to the original Transformer imlementation.
        """
        residual = x

        if self.layer_norm_first:
            x = self.self_attn_layer_norm(x)
            x, attn = self.self_attn(
                query=x,
                key=x,
                value=x,
                key_padding_mask=self_attn_padding_mask,
                attn_mask=self_attn_mask,
                need_weights=False, )
            x = self.dropout1(x)
            x = residual + x

            residual = x
            x = self.final_layer_norm(x)
            x = self.activation_fn(self.fc1(x))
            x = self.dropout2(x)
            x = self.fc2(x)

            layer_result = x

            x = self.dropout3(x)
            x = residual + x
        else:
            x, attn = self.self_attn(
                query=x,
                key=x,
                value=x,
                key_padding_mask=self_attn_padding_mask,
                need_weights=False, )

            x = self.dropout1(x)
            x = residual + x

            x = self.self_attn_layer_norm(x)

            residual = x
            x = self.activation_fn(self.fc1(x))
            x = self.dropout2(x)
            x = self.fc2(x)

            layer_result = x

            x = self.dropout3(x)
            x = residual + x
            x = self.final_layer_norm(x)

        return x, (attn, layer_result)


@dataclass
class AudioPretrainingConfig:
    sample_rate: int = field(
        default=16_000,
        metadata={
            "help":
            "target sample rate. audio files will be up/down sampled to this rate"
        }, )
    normalize: bool = field(
        default=False,
        metadata={
            "help": "if set, normalizes input to have 0 mean and unit variance"
        }, )
    enable_padding: bool = field(
        default=False,
        metadata={"help": "pad shorter samples instead of cropping"})
    max_sample_size: Optional[int] = field(
        default=None,
        metadata={"help": "max sample size to crop to for batching"})
    min_sample_size: Optional[int] = field(
        default=None,
        metadata={"help": "min sample size to skip small examples"})


================================================
FILE: paddlespeech/s2t/models/wav2vec2/processing/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/models/wav2vec2/processing/signal_processing.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from speechbrain 2023 (https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/processing/signal_processing.py)
"""
Low level signal processing utilities
Authors
 * Peter Plantinga 2020
 * Francois Grondin 2020
 * William Aris 2020
 * Samuele Cornell 2020
 * Sarthak Yadav 2022
"""
import numpy as np
import paddle


def blackman_window(window_length, periodic=True):
    """Blackman window function.
    Arguments
    ---------
    window_length : int
        Controlling the returned window size. 
    periodic : bool
        Determines whether the returned window trims off the
        last duplicate value from the symmetric window

    Returns
    -------
    A 1-D tensor of size (window_length) containing the window
    """
    if window_length == 0:
        return []
    if window_length == 1:
        return paddle.ones([1])
    if periodic:
        window_length += 1
    window = paddle.arange(window_length) * (np.pi / (window_length - 1))
    window = 0.08 * paddle.cos(window * 4) - 0.5 * paddle.cos(window * 2) + 0.42
    return window[:-1] if periodic else window


def compute_amplitude(waveforms, lengths=None, amp_type="avg", scale="linear"):
    """Compute amplitude of a batch of waveforms.
    Arguments
    ---------
    waveform : tensor
        The waveforms used for computing amplitude.
        Shape should be `[time]` or `[batch, time]` or
        `[batch, time, channels]`.
    lengths : tensor
        The lengths of the waveforms excluding the padding.
        Shape should be a single dimension, `[batch]`.
    amp_type : str
        Whether to compute "avg" average or "peak" amplitude.
        Choose between ["avg", "peak"].
    scale : str
        Whether to compute amplitude in "dB" or "linear" scale.
        Choose between ["linear", "dB"].
    Returns
    -------
    The average amplitude of the waveforms.
    Example
    -------
    >>> signal = paddle.sin(paddle.arange(16000.0)).unsqueeze(0)
    >>> compute_amplitude(signal, signal.size(1))
    tensor([[0.6366]])
    """
    if len(waveforms.shape) == 1:
        waveforms = waveforms.unsqueeze(0)

    assert amp_type in ["avg", "peak"]
    assert scale in ["linear", "dB"]

    if amp_type == "avg":
        if lengths is None:
            out = paddle.mean(paddle.abs(waveforms), axis=1, keepdim=True)
        else:
            wav_sum = paddle.sum(paddle.abs(waveforms), axis=1, keepdim=True)
            out = wav_sum / lengths.astype(wav_sum.dtype)
    elif amp_type == "peak":
        out = paddle.max(paddle.abs(waveforms), axis=1, keepdim=True)[0]
    else:
        raise NotImplementedError

    if scale == "linear":
        return out
    elif scale == "dB":
        return paddle.clip(20 * paddle.log10(out), min=-80)  # clamp zeros
    else:
        raise NotImplementedError


def convolve1d(
        waveform,
        kernel,
        padding=0,
        pad_type="constant",
        stride=1,
        groups=1,
        use_fft=False,
        rotation_index=0, ):
    """Use paddle.nn.functional to perform 1d padding and conv.
    Arguments
    ---------
    waveform : tensor
        The tensor to perform operations on.
    kernel : tensor
        The filter to apply during convolution.
    padding : int or tuple
        The padding (pad_left, pad_right) to apply.
        If an integer is passed instead, this is passed
        to the conv1d function and pad_type is ignored.
    pad_type : str
        The type of padding to use. Passed directly to
        `paddle.nn.functional.pad`, see Paddle documentation
        for available options.
    stride : int
        The number of units to move each time convolution is applied.
        Passed to conv1d. Has no effect if `use_fft` is True.
    groups : int
        This option is passed to `conv1d` to split the input into groups for
        convolution. Input channels should be divisible by the number of groups.
    use_fft : bool
        When `use_fft` is passed `True`, then compute the convolution in the
        spectral domain using complex multiply. This is more efficient on CPU
        when the size of the kernel is large (e.g. reverberation). WARNING:
        Without padding, circular convolution occurs. This makes little
        difference in the case of reverberation, but may make more difference
        with different kernels.
    rotation_index : int
        This option only applies if `use_fft` is true. If so, the kernel is
        rolled by this amount before convolution to shift the output location.
    Returns
    -------
    The convolved waveform.
    Example
    -------
    >>> from speechbrain.dataio.dataio import read_audio
    >>> signal = read_audio('tests/samples/single-mic/example1.wav')
    >>> signal = signal.unsqueeze(0).unsqueeze(2)
    >>> kernel = paddle.rand([1, 10, 1])
    >>> signal = convolve1d(signal, kernel, padding=(9, 0))
    """
    if len(waveform.shape) != 3:
        raise ValueError("Convolve1D expects a 3-dimensional tensor")

    # Move time dimension last, which pad and fft and conv expect.
    waveform = waveform.transpose([0, 2, 1])
    kernel = kernel.transpose([0, 2, 1])
    # Padding can be a tuple (left_pad, right_pad) or an int
    if isinstance(padding, tuple):
        waveform = paddle.nn.functional.pad(
            x=waveform, pad=padding, mode=pad_type, data_format='NCL')

    # This approach uses FFT, which is more efficient if the kernel is large
    if use_fft:
        # Pad kernel to same length as signal, ensuring correct alignment
        zero_length = waveform.shape[-1] - kernel.shape[-1]

        # Handle case where signal is shorter
        if zero_length < 0:
            kernel = kernel[..., :zero_length]
            zero_length = 0

        # Perform rotation to ensure alignment
        zeros = paddle.zeros(
            [kernel.shape[0], kernel.shape[1], zero_length], dtype=kernel.dtype)
        after_index = kernel[..., rotation_index:]
        before_index = kernel[..., :rotation_index]
        kernel = paddle.concat((after_index, zeros, before_index), axis=-1)

        # Multiply in frequency domain to convolve in time domain
        import paddle.fft as fft

        result = fft.rfft(waveform) * fft.rfft(kernel)
        convolved = fft.irfft(result, n=waveform.shape[-1])

    # Use the implementation given by paddle, which should be efficient on GPU
    else:
        convolved = paddle.nn.functional.conv1d(
            x=waveform,
            weight=kernel,
            stride=stride,
            groups=groups,
            padding=padding if not isinstance(padding, tuple) else 0, )

    # Return time dimension to the second dimension.
    return convolved.transpose([0, 2, 1])


def notch_filter(notch_freq, filter_width=101, notch_width=0.05):
    """Returns a notch filter constructed from a high-pass and low-pass filter.
    (from https://tomroelandts.com/articles/
    how-to-create-simple-band-pass-and-band-reject-filters)
    Arguments
    ---------
    notch_freq : float
        frequency to put notch as a fraction of the
        sampling rate / 2. The range of possible inputs is 0 to 1.
    filter_width : int
        Filter width in samples. Longer filters have
        smaller transition bands, but are more inefficient.
    notch_width : float
        Width of the notch, as a fraction of the sampling_rate / 2.
    """

    # Check inputs
    assert 0 < notch_freq <= 1
    assert filter_width % 2 != 0
    pad = filter_width // 2
    inputs = paddle.arange(filter_width) - pad

    # Avoid frequencies that are too low
    notch_freq += notch_width

    # Define sinc function, avoiding division by zero
    def sinc(x):
        "Computes the sinc function."

        def _sinc(x):
            return paddle.sin(x) / x

        # The zero is at the middle index
        return paddle.concat(
            [_sinc(x[:pad]), paddle.ones([1]), _sinc(x[pad + 1:])])

    # Compute a low-pass filter with cutoff frequency notch_freq.
    hlpf = sinc(3 * (notch_freq - notch_width) * inputs)
    hlpf *= blackman_window(filter_width)
    hlpf /= paddle.sum(hlpf)

    # Compute a high-pass filter with cutoff frequency notch_freq.
    hhpf = sinc(3 * (notch_freq + notch_width) * inputs)
    hhpf *= blackman_window(filter_width)
    hhpf /= -paddle.sum(hhpf)
    hhpf[pad] += 1

    # Adding filters creates notch filter
    return (hlpf + hhpf).reshape([1, -1, 1])


================================================
FILE: paddlespeech/s2t/models/wav2vec2/processing/speech_augmentation.py
================================================
# Copyright (c) 2023 speechbrain Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from speechbrain(https://github.com/speechbrain/speechbrain/blob/develop/speechbrain/processing/speech_augmentation.py)
"""Classes for mutating speech data for data augmentation.
This module provides classes that produce realistic distortions of speech
data for the purpose of training speech processing models. The list of
distortions includes adding noise, adding reverberation, changing speed,
and more. All the classes are of type `torch.nn.Module`. This gives the
possibility to have end-to-end differentiability and
backpropagate the gradient through them. In addition, all operations
are expected to be performed on the GPU (where available) for efficiency.

Authors
 * Peter Plantinga 2020
"""
import math

import paddle
import paddle.nn as nn

from paddlespeech.s2t.models.wav2vec2.processing.signal_processing import compute_amplitude
from paddlespeech.s2t.models.wav2vec2.processing.signal_processing import convolve1d
from paddlespeech.s2t.models.wav2vec2.processing.signal_processing import notch_filter


class SpeedPerturb(nn.Layer):
    """Slightly speed up or slow down an audio signal.
    Resample the audio signal at a rate that is similar to the original rate,
    to achieve a slightly slower or slightly faster signal. This technique is
    outlined in the paper: "Audio Augmentation for Speech Recognition"
    Arguments
    ---------
    orig_freq : int
        The frequency of the original signal.
    speeds : list
        The speeds that the signal should be changed to, as a percentage of the
        original signal (i.e. `speeds` is divided by 100 to get a ratio).
    perturb_prob : float
        The chance that the batch will be speed-
        perturbed. By default, every batch is perturbed.
    Example
    -------
    >>> from speechbrain.dataio.dataio import read_audio
    >>> signal = read_audio('tests/samples/single-mic/example1.wav')
    >>> perturbator = SpeedPerturb(orig_freq=16000, speeds=[90])
    >>> clean = signal.unsqueeze(0)
    >>> perturbed = perturbator(clean)
    >>> clean.shape
    paddle.shape([1, 52173])
    >>> perturbed.shape
    paddle.shape([1, 46956])
    """

    def __init__(
            self,
            orig_freq,
            speeds=[90, 100, 110],
            perturb_prob=1.0, ):
        super().__init__()
        self.orig_freq = orig_freq
        self.speeds = speeds
        self.perturb_prob = perturb_prob

        # Initialize index of perturbation
        self.samp_index = 0
        # Initialize resamplers
        self.resamplers = []
        for speed in self.speeds:
            config = {
                "orig_freq": self.orig_freq,
                "new_freq": self.orig_freq * speed // 100,
            }
            self.resamplers.append(Resample(**config))

    def forward(self, waveform):
        """
        Arguments
        ---------
        waveforms : tensor
            Shape should be `[batch, time]` or `[batch, time, channels]`.
        lengths : tensor
            Shape should be a single dimension, `[batch]`.
        Returns
        -------
        Tensor of shape `[batch, time]` or `[batch, time, channels]`.
        """

        # Don't perturb (return early) 1-`perturb_prob` portion of the batches
        if paddle.rand([1]) > self.perturb_prob:
            return waveform.clone()
        # Perform a random perturbation
        self.samp_index = paddle.randint(len(self.speeds), shape=(1, ))[0]
        perturbed_waveform = self.resamplers[self.samp_index](waveform)

        return perturbed_waveform


class Resample(nn.Layer):
    """This class resamples an audio signal using sinc-based interpolation.

    It is a modification of the `resample` function from torchaudio
    (https://pytorch.org/audio/stable/tutorials/audio_resampling_tutorial.html)

    Arguments
    ---------
    orig_freq : int
        the sampling frequency of the input signal.
    new_freq : int
        the new sampling frequency after this operation is performed.
    lowpass_filter_width : int
        Controls the sharpness of the filter, larger numbers result in a
        sharper filter, but they are less efficient. Values from 4 to 10 are allowed.
    """

    def __init__(
            self,
            orig_freq=16000,
            new_freq=16000,
            lowpass_filter_width=6, ):
        super().__init__()
        self.orig_freq = orig_freq
        self.new_freq = new_freq
        self.lowpass_filter_width = lowpass_filter_width

        # Compute rate for striding
        self._compute_strides()
        assert self.orig_freq % self.conv_stride == 0
        assert self.new_freq % self.conv_transpose_stride == 0

    def _compute_strides(self):
        """Compute the phases in polyphase filter.

        (almost directly from torchaudio.compliance.kaldi)
        """

        # Compute new unit based on ratio of in/out frequencies
        base_freq = math.gcd(self.orig_freq, self.new_freq)
        input_samples_in_unit = self.orig_freq // base_freq
        self.output_samples = self.new_freq // base_freq

        # Store the appropriate stride based on the new units
        self.conv_stride = input_samples_in_unit
        self.conv_transpose_stride = self.output_samples

    def forward(self, waveforms):
        """
        Arguments
        ---------
        waveforms : tensor
            Shape should be `[batch, time]` or `[batch, time, channels]`.
        lengths : tensor
            Shape should be a single dimension, `[batch]`.

        Returns
        -------
        Tensor of shape `[batch, time]` or `[batch, time, channels]`.
        """

        if not hasattr(self, "first_indices"):
            self._indices_and_weights(waveforms)

        # Don't do anything if the frequencies are the same
        if self.orig_freq == self.new_freq:
            return waveforms
        unsqueezed = False
        if len(waveforms.shape) == 2:
            waveforms = waveforms.unsqueeze(1)
            unsqueezed = True
        elif len(waveforms.shape) == 3:
            waveforms = waveforms.transpose([0, 2, 1])
        else:
            raise ValueError("Input must be 2 or 3 dimensions")

        # Do resampling
        resampled_waveform = self._perform_resample(waveforms)

        if unsqueezed:
            resampled_waveform = resampled_waveform.squeeze(1)
        else:
            resampled_waveform = resampled_waveform.transpose([0, 2, 1])

        return resampled_waveform

    def _perform_resample(self, waveforms):
        """Resamples the waveform at the new frequency.

        This matches Kaldi's OfflineFeatureTpl ResampleWaveform which uses a
        LinearResample (resample a signal at linearly spaced intervals to
        up/downsample a signal). LinearResample (LR) means that the output
        signal is at linearly spaced intervals (i.e the output signal has a
        frequency of `new_freq`). It uses sinc/bandlimited interpolation to
        upsample/downsample the signal.

        (almost directly from torchaudio.compliance.kaldi)

        https://ccrma.stanford.edu/~jos/resample/
        Theory_Ideal_Bandlimited_Interpolation.html

        https://github.com/kaldi-asr/kaldi/blob/master/src/feat/resample.h#L56

        Arguments
        ---------
        waveforms : tensor
            The batch of audio signals to resample.

        Returns
        -------
        The waveforms at the new frequency.
        """

        # Compute output size and initialize
        batch_size, num_channels, wave_len = waveforms.shape
        window_size = self.weights.shape[1]
        tot_output_samp = self._output_samples(wave_len)
        resampled_waveform = paddle.zeros(
            (batch_size, num_channels, tot_output_samp))
        # self.weights = self.weights.to(waveforms.device)

        # Check weights are on correct device
        # if waveforms.device != self.weights.device:
        #     self.weights = self.weights.to(waveforms.device)

        # eye size: (num_channels, num_channels, 1)
        eye = paddle.eye(num_channels).unsqueeze(2)

        # Iterate over the phases in the polyphase filter
        for i in range(self.first_indices.shape[0]):
            wave_to_conv = waveforms
            first_index = int(self.first_indices[i].item())
            if first_index >= 0:
                # trim the signal as the filter will not be applied
                # before the first_index
                wave_to_conv = wave_to_conv[..., first_index:]

            # pad the right of the signal to allow partial convolutions
            # meaning compute values for partial windows (e.g. end of the
            # window is outside the signal length)
            max_index = (tot_output_samp - 1) // self.output_samples
            end_index = max_index * self.conv_stride + window_size
            current_wave_len = wave_len - first_index
            right_padding = max(0, end_index + 1 - current_wave_len)
            left_padding = max(0, -first_index)
            wave_to_conv = paddle.nn.functional.pad(
                wave_to_conv, (left_padding, right_padding), data_format='NCL')
            conv_wave = paddle.nn.functional.conv1d(
                x=wave_to_conv,
                weight=self.weights[i].repeat(num_channels, 1, 1),
                stride=self.conv_stride,
                groups=num_channels, )

            # we want conv_wave[:, i] to be at
            # output[:, i + n*conv_transpose_stride]
            dilated_conv_wave = paddle.nn.functional.conv1d_transpose(
                conv_wave, eye, stride=self.conv_transpose_stride)

            # pad dilated_conv_wave so it reaches the output length if needed.
            left_padding = i
            previous_padding = left_padding + dilated_conv_wave.shape[-1]
            right_padding = max(0, tot_output_samp - previous_padding)
            dilated_conv_wave = paddle.nn.functional.pad(
                dilated_conv_wave, (left_padding, right_padding),
                data_format='NCL')
            dilated_conv_wave = dilated_conv_wave[..., :tot_output_samp]

            resampled_waveform += dilated_conv_wave

        return resampled_waveform

    def _output_samples(self, input_num_samp):
        """Based on LinearResample::GetNumOutputSamples.

        LinearResample (LR) means that the output signal is at
        linearly spaced intervals (i.e the output signal has a
        frequency of ``new_freq``). It uses sinc/bandlimited
        interpolation to upsample/downsample the signal.

        (almost directly from torchaudio.compliance.kaldi)

        Arguments
        ---------
        input_num_samp : int
            The number of samples in each example in the batch.

        Returns
        -------
        Number of samples in the output waveform.
        """

        # For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
        # where tick_freq is the least common multiple of samp_in and
        # samp_out.
        samp_in = int(self.orig_freq)
        samp_out = int(self.new_freq)

        tick_freq = abs(samp_in * samp_out) // math.gcd(samp_in, samp_out)
        ticks_per_input_period = tick_freq // samp_in

        # work out the number of ticks in the time interval
        # [ 0, input_num_samp/samp_in ).
        interval_length = input_num_samp * ticks_per_input_period
        if interval_length <= 0:
            return 0
        ticks_per_output_period = tick_freq // samp_out

        # Get the last output-sample in the closed interval,
        # i.e. replacing [ ) with [ ]. Note: integer division rounds down.
        # See http://en.wikipedia.org/wiki/Interval_(mathematics) for an
        # explanation of the notation.
        last_output_samp = interval_length // ticks_per_output_period

        # We need the last output-sample in the open interval, so if it
        # takes us to the end of the interval exactly, subtract one.
        if last_output_samp * ticks_per_output_period == interval_length:
            last_output_samp -= 1

        # First output-sample index is zero, so the number of output samples
        # is the last output-sample plus one.
        num_output_samp = last_output_samp + 1

        return num_output_samp

    def _indices_and_weights(self, waveforms):
        """Based on LinearResample::SetIndexesAndWeights

        Retrieves the weights for resampling as well as the indices in which
        they are valid. LinearResample (LR) means that the output signal is at
        linearly spaced intervals (i.e the output signal has a frequency
        of ``new_freq``). It uses sinc/bandlimited interpolation to
        upsample/downsample the signal.

        Returns
        -------
        - the place where each filter should start being applied
        - the filters to be applied to the signal for resampling
        """

        # Lowpass filter frequency depends on smaller of two frequencies
        min_freq = min(self.orig_freq, self.new_freq)
        lowpass_cutoff = 0.99 * 0.5 * min_freq

        assert lowpass_cutoff * 2 <= min_freq
        window_width = self.lowpass_filter_width / (2.0 * lowpass_cutoff)

        assert lowpass_cutoff < min(self.orig_freq, self.new_freq) / 2
        output_t = paddle.arange(
            start=0.0, end=self.output_samples, dtype='int64')
        output_t /= self.new_freq
        min_t = output_t - window_width
        max_t = output_t + window_width

        min_input_index = paddle.ceil(min_t * self.orig_freq)
        max_input_index = paddle.floor(max_t * self.orig_freq)
        num_indices = max_input_index - min_input_index + 1

        max_weight_width = num_indices.max()
        j = paddle.arange(max_weight_width)
        input_index = min_input_index.unsqueeze(1) + j.unsqueeze(0)
        delta_t = (input_index / self.orig_freq) - output_t.unsqueeze(1)

        weights = paddle.zeros_like(delta_t)

        inside_window_indices = delta_t.abs() < (window_width)
        # raised-cosine (Hanning) window with width `window_width`
        weights[inside_window_indices] = 0.5 * (1 + paddle.cos(
            2 * math.pi * lowpass_cutoff / self.lowpass_filter_width *
            delta_t[inside_window_indices]))
        t_eq_zero_indices = delta_t == 0.0
        t_not_eq_zero_indices = ~t_eq_zero_indices

        # sinc filter function
        weights[t_not_eq_zero_indices] *= paddle.sin(
            2 * math.pi * lowpass_cutoff * delta_t[t_not_eq_zero_indices]) / (
                math.pi * delta_t[t_not_eq_zero_indices])

        # limit of the function at t = 0
        weights[t_eq_zero_indices] *= 2 * lowpass_cutoff

        # size (output_samples, max_weight_width)
        weights /= self.orig_freq

        self.first_indices = min_input_index
        self.weights = weights


class DropFreq(nn.Layer):
    """This class drops a random frequency from the signal.
    The purpose of this class is to teach models to learn to rely on all parts
    of the signal, not just a few frequency bands.
    Arguments
    ---------
    drop_freq_low : float
        The low end of frequencies that can be dropped,
        as a fraction of the sampling rate / 2.
    drop_freq_high : float
        The high end of frequencies that can be
        dropped, as a fraction of the sampling rate / 2.
    drop_count_low : int
        The low end of number of frequencies that could be dropped.
    drop_count_high : int
        The high end of number of frequencies that could be dropped.
    drop_width : float
        The width of the frequency band to drop, as
        a fraction of the sampling_rate / 2.
    drop_prob : float
        The probability that the batch of signals will  have a frequency
        dropped. By default, every batch has frequencies dropped.
    Example
    -------
    >>> from speechbrain.dataio.dataio import read_audio
    >>> dropper = DropFreq()
    >>> signal = read_audio('tests/samples/single-mic/example1.wav')
    >>> dropped_signal = dropper(signal.unsqueeze(0))
    """

    def __init__(
            self,
            drop_freq_low=1e-14,
            drop_freq_high=1,
            drop_count_low=1,
            drop_count_high=2,
            drop_width=0.05,
            drop_prob=1, ):
        super().__init__()
        self.drop_freq_low = drop_freq_low
        self.drop_freq_high = drop_freq_high
        self.drop_count_low = drop_count_low
        self.drop_count_high = drop_count_high
        self.drop_width = drop_width
        self.drop_prob = drop_prob

    def forward(self, waveforms):
        """
        Arguments
        ---------
        waveforms : tensor
            Shape should be `[batch, time]` or `[batch, time, channels]`.
        Returns
        -------
        Tensor of shape `[batch, time]` or `[batch, time, channels]`.
        """

        # Don't drop (return early) 1-`drop_prob` portion of the batches
        dropped_waveform = waveforms.clone()
        if paddle.rand([1]) > self.drop_prob:
            return dropped_waveform

        # Add channels dimension
        if len(waveforms.shape) == 2:
            dropped_waveform = dropped_waveform.unsqueeze(-1)

        # Pick number of frequencies to drop
        drop_count = paddle.randint(
            low=self.drop_count_low,
            high=self.drop_count_high + 1,
            shape=(1, ), )

        # Filter parameters
        filter_length = 101
        pad = filter_length // 2

        # Start with delta function
        drop_filter = paddle.zeros([1, filter_length, 1])
        drop_filter[0, pad, 0] = 1

        if drop_count.shape == 0:
            # Pick a frequency to drop
            drop_range = self.drop_freq_high - self.drop_freq_low
            drop_frequency = (
                paddle.rand(drop_count) * drop_range + self.drop_freq_low)
            # Subtract each frequency
            for frequency in drop_frequency:
                notch_kernel = notch_filter(
                    frequency,
                    filter_length,
                    self.drop_width, )
                drop_filter = convolve1d(drop_filter, notch_kernel, pad)

        # Apply filter
        dropped_waveform = convolve1d(dropped_waveform, drop_filter, pad)

        # Remove channels dimension if added
        return dropped_waveform.squeeze(-1)


class DropChunk(nn.Layer):
    """This class drops portions of the input signal.
    Using `DropChunk` as an augmentation strategy helps a models learn to rely
    on all parts of the signal, since it can't expect a given part to be
    present.
    Arguments
    ---------
    drop_length_low : int
        The low end of lengths for which to set the
        signal to zero, in samples.
    drop_length_high : int
        The high end of lengths for which to set the
        signal to zero, in samples.
    drop_count_low : int
        The low end of number of times that the signal
        can be dropped to zero.
    drop_count_high : int
        The high end of number of times that the signal
        can be dropped to zero.
    drop_start : int
        The first index for which dropping will be allowed.
    drop_end : int
        The last index for which dropping will be allowed.
    drop_prob : float
        The probability that the batch of signals will
        have a portion dropped. By default, every batch
        has portions dropped.
    noise_factor : float
        The factor relative to average amplitude of an utterance
        to use for scaling the white noise inserted. 1 keeps
        the average amplitude the same, while 0 inserts all 0's.
    Example
    -------
    >>> from speechbrain.dataio.dataio import read_audio
    >>> dropper = DropChunk(drop_start=100, drop_end=200, noise_factor=0.)
    >>> signal = read_audio('tests/samples/single-mic/example1.wav')
    >>> signal = signal.unsqueeze(0) # [batch, time, channels]
    >>> length = paddle.ones([1])
    >>> dropped_signal = dropper(signal, length)
    >>> float(dropped_signal[:, 150])
    0.0
    """

    def __init__(
            self,
            drop_length_low=100,
            drop_length_high=1000,
            drop_count_low=1,
            drop_count_high=10,
            drop_start=0,
            drop_end=None,
            drop_prob=1,
            noise_factor=0.0, ):
        super().__init__()
        self.drop_length_low = drop_length_low
        self.drop_length_high = drop_length_high
        self.drop_count_low = drop_count_low
        self.drop_count_high = drop_count_high
        self.drop_start = drop_start
        self.drop_end = drop_end
        self.drop_prob = drop_prob
        self.noise_factor = noise_factor

        # Validate low < high
        if drop_length_low > drop_length_high:
            raise ValueError("Low limit must not be more than high limit")
        if drop_count_low > drop_count_high:
            raise ValueError("Low limit must not be more than high limit")

        # Make sure the length doesn't exceed end - start
        if drop_end is not None and drop_end >= 0:
            if drop_start > drop_end:
                raise ValueError("Low limit must not be more than high limit")

            drop_range = drop_end - drop_start
            self.drop_length_low = min(drop_length_low, drop_range)
            self.drop_length_high = min(drop_length_high, drop_range)

    def forward(self, waveforms, lengths):
        """
        Arguments
        ---------
        waveforms : tensor
            Shape should be `[batch, time]` or `[batch, time, channels]`.
        lengths : tensor
            Shape should be a single dimension, `[batch]`.
        Returns
        -------
        Tensor of shape `[batch, time]` or
            `[batch, time, channels]`
        """

        # Reading input list
        lengths = (lengths * waveforms.shape[1]).long()
        batch_size = waveforms.shape[0]
        dropped_waveform = waveforms.clone()

        # Don't drop (return early) 1-`drop_prob` portion of the batches
        if paddle.rand([1]) > self.drop_prob:
            return dropped_waveform

        # Store original amplitude for computing white noise amplitude
        clean_amplitude = compute_amplitude(waveforms, lengths.unsqueeze(1))

        # Pick a number of times to drop
        drop_times = paddle.randint(
            low=self.drop_count_low,
            high=self.drop_count_high + 1,
            shape=(batch_size, ), )

        # Iterate batch to set mask
        for i in range(batch_size):
            if drop_times[i] == 0:
                continue

            # Pick lengths
            length = paddle.randint(
                low=self.drop_length_low,
                high=self.drop_length_high + 1,
                shape=(drop_times[i], ), )

            # Compute range of starting locations
            start_min = self.drop_start
            if start_min < 0:
                start_min += lengths[i]
            start_max = self.drop_end
            if start_max is None:
                start_max = lengths[i]
            if start_max < 0:
                start_max += lengths[i]
            start_max = max(0, start_max - length.max())

            # Pick starting locations
            start = paddle.randint(
                low=start_min,
                high=start_max + 1,
                shape=(drop_times[i], ), )

            end = start + length

            # Update waveform
            if not self.noise_factor:
                for j in range(drop_times[i]):
                    dropped_waveform[i, start[j]:end[j]] = 0.0
            else:
                # Uniform distribution of -2 to +2 * avg amplitude should
                # preserve the average for normalization
                noise_max = 2 * clean_amplitude[i] * self.noise_factor
                for j in range(drop_times[i]):
                    # zero-center the noise distribution
                    noise_vec = paddle.rand([length[j]])
                    noise_vec = 2 * noise_max * noise_vec - noise_max
                    dropped_waveform[i, start[j]:end[j]] = noise_vec

        return dropped_waveform


class SpecAugment(paddle.nn.Layer):
    """An implementation of the SpecAugment algorithm.
    Reference:
        https://arxiv.org/abs/1904.08779
    Arguments
    ---------
    time_warp : bool
        Whether applying time warping.
    time_warp_window : int
        Time warp window.
    time_warp_mode : str
        Interpolation mode for time warping (default "bicubic").
    freq_mask : bool
        Whether applying freq mask.
    freq_mask_width : int or tuple
        Freq mask width range.
    n_freq_mask : int
        Number of freq mask.
    time_mask : bool
        Whether applying time mask.
    time_mask_width : int or tuple
        Time mask width range.
    n_time_mask : int
        Number of time mask.
    replace_with_zero : bool
        If True, replace masked value with 0, else replace masked value with mean of the input tensor.
    Example
    -------
    >>> aug = SpecAugment()
    >>> a = paddle.rand([8, 120, 80])
    >>> a = aug(a)
    >>> print(a.shape)
    paddle.Size([8, 120, 80])
    """

    def __init__(
            self,
            time_warp=True,
            time_warp_window=5,
            time_warp_mode="bicubic",
            freq_mask=True,
            freq_mask_width=(0, 20),
            n_freq_mask=2,
            time_mask=True,
            time_mask_width=(0, 100),
            n_time_mask=2,
            replace_with_zero=True, ):
        super().__init__()
        assert (
            time_warp or freq_mask or time_mask
        ), "at least one of time_warp, time_mask, or freq_mask should be applied"

        self.apply_time_warp = time_warp
        self.time_warp_window = time_warp_window
        self.time_warp_mode = time_warp_mode

        self.freq_mask = freq_mask
        if isinstance(freq_mask_width, int):
            freq_mask_width = (0, freq_mask_width)
        self.freq_mask_width = freq_mask_width
        self.n_freq_mask = n_freq_mask

        self.time_mask = time_mask
        if isinstance(time_mask_width, int):
            time_mask_width = (0, time_mask_width)
        self.time_mask_width = time_mask_width
        self.n_time_mask = n_time_mask

        self.replace_with_zero = replace_with_zero

    def forward(self, x):
        """Takes in input a tensors and returns an augmented one."""
        if self.apply_time_warp:
            x = self.time_warp(x)
        if self.freq_mask:
            x = self.mask_along_axis(x, dim=2)
        if self.time_mask:
            x = self.mask_along_axis(x, dim=1)
        return x

    def time_warp(self, x):
        """Time warping with paddle.nn.functional.interpolate"""
        original_size = x.shape
        window = self.time_warp_window

        # 2d interpolation requires 4D or higher dimension tensors
        # x: (Batch, Time, Freq) -> (Batch, 1, Time, Freq)
        if x.dim() == 3:
            x = x.unsqueeze(1)

        time = x.shape[2]
        if time - window <= window:
            return x.reshape([*original_size])

        # compute center and corresponding window
        c = paddle.randint(window, time - window, (1, ))[0]
        w = paddle.randint(c - window, c + window, (1, ))[0] + 1

        left = paddle.nn.functional.interpolate(
            x[:, :, :c],
            (w, x.shape[3]),
            mode=self.time_warp_mode,
            align_corners=True, )
        right = paddle.nn.functional.interpolate(
            x[:, :, c:],
            (time - w, x.shape[3]),
            mode=self.time_warp_mode,
            align_corners=True, )

        x[:, :, :w] = left
        x[:, :, w:] = right
        return x.reshape([*original_size])

    def mask_along_axis(self, x, dim):
        """Mask along time or frequency axis.
        Arguments
        ---------
        x : tensor
            Input tensor.
        dim : int
            Corresponding dimension to mask.
        """
        original_size = x.shape
        if x.dim() == 4:
            x = x.reshape([-1, x.shape[2], x.shape[3]])

        batch, time, fea = x.shape

        if dim == 1:
            D = time
            n_mask = self.n_time_mask
            width_range = self.time_mask_width
        else:
            D = fea
            n_mask = self.n_freq_mask
            width_range = self.freq_mask_width

        mask_len = paddle.randint(width_range[0], width_range[1],
                                  (batch, n_mask)).unsqueeze(2)

        mask_pos = paddle.randint(0, max(1, D - mask_len.max()),
                                  (batch, n_mask)).unsqueeze(2)

        # compute masks
        arange = paddle.arange(end=D).reshape([1, 1, -1])
        mask = (mask_pos <= arange) * (arange < (mask_pos + mask_len))
        mask = mask.any(axis=1)

        if dim == 1:
            mask = mask.unsqueeze(2)
        else:
            mask = mask.unsqueeze(1)

        if self.replace_with_zero:
            val = 0.0
        else:
            val = x.mean()
        # same to x.masked_fill_(mask, val)
        y = paddle.full(x.shape, val, x.dtype)
        x = paddle.where(mask, y, x)
        return x.reshape([*original_size])


class TimeDomainSpecAugment(nn.Layer):
    """A time-domain approximation of the SpecAugment algorithm.
    This augmentation module implements three augmentations in
    the time-domain.
     1. Drop chunks of the audio (zero amplitude or white noise)
     2. Drop frequency bands (with band-drop filters)
     3. Speed peturbation (via resampling to slightly different rate)
    Arguments
    ---------
    perturb_prob : float from 0 to 1
        The probability that a batch will have speed perturbation applied.
    drop_freq_prob : float from 0 to 1
        The probability that a batch will have frequencies dropped.
    drop_chunk_prob : float from 0 to 1
        The probability that a batch will have chunks dropped.
    speeds : list of ints
        A set of different speeds to use to perturb each batch.
        See ``speechbrain.processing.speech_augmentation.SpeedPerturb``
    sample_rate : int
        Sampling rate of the input waveforms.
    drop_freq_count_low : int
        Lowest number of frequencies that could be dropped.
    drop_freq_count_high : int
        Highest number of frequencies that could be dropped.
    drop_chunk_count_low : int
        Lowest number of chunks that could be dropped.
    drop_chunk_count_high : int
        Highest number of chunks that could be dropped.
    drop_chunk_length_low : int
        Lowest length of chunks that could be dropped.
    drop_chunk_length_high : int
        Highest length of chunks that could be dropped.
    drop_chunk_noise_factor : float
        The noise factor used to scale the white noise inserted, relative to
        the average amplitude of the utterance. Default 0 (no noise inserted).
    Example
    -------
    >>> inputs = paddle.randn([10, 16000])
    >>> feature_maker = TimeDomainSpecAugment(speeds=[80])
    >>> feats = feature_maker(inputs, paddle.ones(10))
    >>> feats.shape
    paddle.shape([10, 12800])
    """

    def __init__(
            self,
            perturb_prob=1.0,
            drop_freq_prob=1.0,
            drop_chunk_prob=1.0,
            speeds=[95, 100, 105],
            sample_rate=16000,
            drop_freq_count_low=0,
            drop_freq_count_high=3,
            drop_chunk_count_low=0,
            drop_chunk_count_high=5,
            drop_chunk_length_low=1000,
            drop_chunk_length_high=2000,
            drop_chunk_noise_factor=0, ):
        super().__init__()
        self.speed_perturb = SpeedPerturb(
            perturb_prob=perturb_prob, orig_freq=sample_rate, speeds=speeds)
        self.drop_freq = DropFreq(
            drop_prob=drop_freq_prob,
            drop_count_low=drop_freq_count_low,
            drop_count_high=drop_freq_count_high, )
        self.drop_chunk = DropChunk(
            drop_prob=drop_chunk_prob,
            drop_count_low=drop_chunk_count_low,
            drop_count_high=drop_chunk_count_high,
            drop_length_low=drop_chunk_length_low,
            drop_length_high=drop_chunk_length_high,
            noise_factor=drop_chunk_noise_factor, )

    def forward(self, waveforms, lengths):
        """Returns the distorted waveforms.
        Arguments
        ---------
        waveforms : tensor
            The waveforms to distort
        """
        # Augmentation
        with paddle.no_grad():
            waveforms = self.speed_perturb(waveforms)
            waveforms = self.drop_freq(waveforms)
            waveforms = self.drop_chunk(waveforms, lengths)
        return waveforms


================================================
FILE: paddlespeech/s2t/models/wav2vec2/wav2vec2_ASR.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
from typing import Dict
from typing import List
from typing import Tuple

import paddle
import paddle.nn as nn
import paddle.nn.functional as F

from paddlespeech.s2t.models.wav2vec2.modules.modeling_wav2vec2 import Wav2Vec2ConfigPure
from paddlespeech.s2t.models.wav2vec2.modules.modeling_wav2vec2 import Wav2Vec2Model
from paddlespeech.s2t.models.wav2vec2.modules.VanillaNN import VanillaNN
from paddlespeech.s2t.models.wav2vec2.processing.speech_augmentation import SpecAugment
from paddlespeech.s2t.modules.ctc import CTCDecoderBase as CTC
from paddlespeech.s2t.modules.initializer import DefaultInitializerContext
from paddlespeech.s2t.utils.ctc_utils import remove_duplicates_and_blank
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import log_add

logger = Log(__name__).getlog()


class Wav2vec2ASR(nn.Layer):
    def __init__(self, config: dict):
        super().__init__()
        init_type = config.get("init_type", None)
        with DefaultInitializerContext(init_type):
            self.config = config
            wav2vec2_config = Wav2Vec2ConfigPure(config)
            wav2vec2 = Wav2Vec2Model(wav2vec2_config)
            self.normalize_wav = config.normalize_wav
            self.output_norm = config.output_norm
            if hasattr(config, 'spec_augment'):
                self.spec_augment = SpecAugment(**config.spec_augment)

            if config.freeze_wav2vec2:
                wav2vec2.eval()
                for parm in wav2vec2.parameters():
                    parm.trainable = False
            self.wav2vec2 = wav2vec2
            self.enc = VanillaNN(**config.enc)
            self.ctc = CTC(**config.ctc,
                           odim=config.output_dim,
                           batch_average=False,
                           reduction='mean')

    def forward(self, wav, wavs_lens_rate, target, target_lens):
        if self.normalize_wav:
            wav = F.layer_norm(wav, wav.shape[1:])

        # Extract wav2vec output
        out = self.wav2vec2(wav)[0]
        # We normalize the output if required
        if self.output_norm:
            out = F.layer_norm(out, out.shape[1:])

        if self.training and hasattr(self.config, 'spec_augment'):
            feats = self.spec_augment(out)
        else:
            feats = out

        x = self.enc(feats)

        x_lens = (wavs_lens_rate * x.shape[1]).round().astype(paddle.int64)

        ctc_loss = self.ctc(x, x_lens, target, target_lens)

        return ctc_loss

    @paddle.no_grad()
    def decode(self,
               feats: paddle.Tensor,
               text_feature: Dict[str, int],
               decoding_method: str,
               beam_size: int,
               tokenizer: str=None,
               sb_pipeline=False):
        batch_size = feats.shape[0]

        if decoding_method == 'ctc_prefix_beam_search' and batch_size > 1:
            logger.error(
                f"decoding mode {decoding_method} must be running with batch_size == 1"
            )
            logger.error(f"current batch_size is {batch_size}")

        if decoding_method == 'ctc_greedy_search':
            if tokenizer is None and sb_pipeline is False:
                hyps = self.ctc_greedy_search(feats)
                res = [text_feature.defeaturize(hyp) for hyp in hyps]
                res_tokenids = [hyp for hyp in hyps]
            else:
                if sb_pipeline is True:
                    hyps = self.ctc_greedy_search(feats.unsqueeze(-1))
                else:
                    hyps = self.ctc_greedy_search(feats)
                res = []
                res_tokenids = []
                for sequence in hyps:
                    # Decode token terms to words 
                    predicted_tokens = text_feature.convert_ids_to_tokens(
                        sequence)
                tmp_res = []
                tmp_res_tokenids = []
                for c in predicted_tokens:
                    if c == "[CLS]":
                        continue
                    elif c == "[SEP]" or c == "[PAD]":
                        break
                    else:
                        tmp_res.append(c)
                        tmp_res_tokenids.append(text_feature.vocab[c])
                res.append(''.join(tmp_res))
                res_tokenids.append(tmp_res_tokenids)

        # ctc_prefix_beam_search and attention_rescoring only return one
        # result in List[int], change it to List[List[int]] for compatible
        # with other batch decoding mode
        elif decoding_method == 'ctc_prefix_beam_search':
            assert feats.shape[0] == 1
            if tokenizer is None and sb_pipeline is False:
                hyp = self.ctc_prefix_beam_search(feats, beam_size)
                res = [text_feature.defeaturize(hyp)]
                res_tokenids = [hyp]
            else:
                if sb_pipeline is True:
                    hyp = self.ctc_prefix_beam_search(
                        feats.unsqueeze(-1), beam_size)
                else:
                    hyp = self.ctc_prefix_beam_search(feats, beam_size)
                res = []
                res_tokenids = []
                predicted_tokens = text_feature.convert_ids_to_tokens(hyp)
                tmp_res = []
                tmp_res_tokenids = []
                for c in predicted_tokens:
                    if c == "[CLS]":
                        continue
                    elif c == "[SEP]" or c == "[PAD]":
                        break
                    else:
                        tmp_res.append(c)
                        tmp_res_tokenids.append(text_feature.vocab[c])
                res.append(''.join(tmp_res))
                res_tokenids.append(tmp_res_tokenids)
        else:
            raise ValueError(
                f"wav2vec2 not support decoding method: {decoding_method}")

        return res, res_tokenids

    @classmethod
    def from_config(cls, config):
        model = cls(config)
        return model

    def ctc_greedy_search(self, wav) -> List[List[int]]:
        """ Apply CTC greedy search
        Args:
            speech (paddle.Tensor): (batch, max_len)
            speech_length (paddle.Tensor): (batch, )
        Returns:
            List[List[int]]: best path result
        """
        batch_size = wav.shape[0]
        wav = wav[:, :, 0]
        if self.normalize_wav:
            wav = F.layer_norm(wav, wav.shape[1:])
        # Extract wav2vec output
        out = self.wav2vec2(wav)[0]
        # We normalize the output if required
        if self.output_norm:
            out = F.layer_norm(out, out.shape[1:])
        feats = out
        x = self.enc(feats)
        x_lens = x.shape[1]
        ctc_probs = self.ctc.log_softmax(x)  # (B, maxlen, vocab_size)
        topk_prob, topk_index = ctc_probs.topk(1, axis=2)  # (B, maxlen, 1)
        topk_index = topk_index.reshape([batch_size, x_lens])  # (B, maxlen)

        hyps = [hyp.tolist() for hyp in topk_index]
        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
        return hyps

    def _ctc_prefix_beam_search(
            self,
            wav,
            beam_size,
            blank_id: int=0, ) -> Tuple[List[Tuple[int, float]], paddle.Tensor]:
        """ CTC prefix beam search inner implementation
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            List[Tuple[int, float]]: nbest results, (N,1), (text, likelihood)
            paddle.Tensor: encoder output, (1, max_len, encoder_dim),
                it will be used for rescoring in attention rescoring mode
        """
        wav = wav[:, :, 0]

        if self.normalize_wav:
            wav = F.layer_norm(wav, wav.shape[1:])
        # Extract wav2vec output
        out = self.wav2vec2(wav)[0]
        # We normalize the output if required
        if self.output_norm:
            out = F.layer_norm(out, out.shape[1:])
        feats = out

        x = self.enc(feats)
        maxlen = x.shape[1]
        ctc_probs = self.ctc.log_softmax(x)  # (1, maxlen, vocab_size)
        ctc_probs = ctc_probs.squeeze(0)

        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
        # blank_ending_score and  none_blank_ending_score in ln domain
        cur_hyps = [(tuple(), (0.0, -float('inf')))]
        # 2. CTC beam search step by step
        for t in range(0, maxlen):
            logp = ctc_probs[t]  # (vocab_size,)
            # key: prefix, value (pb, pnb), default value(-inf, -inf)
            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
            # 2.1 First beam prune: select topk best
            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
            for s in top_k_index:
                s = s.item()
                ps = logp[s].item()
                for prefix, (pb, pnb) in cur_hyps:
                    last = prefix[-1] if len(prefix) > 0 else None
                    if s == blank_id:  # blank
                        n_pb, n_pnb = next_hyps[prefix]
                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
                        next_hyps[prefix] = (n_pb, n_pnb)
                    elif s == last:
                        #  Update *ss -> *s;
                        n_pb, n_pnb = next_hyps[prefix]
                        n_pnb = log_add([n_pnb, pnb + ps])
                        next_hyps[prefix] = (n_pb, n_pnb)
                        # Update *s-s -> *ss, - is for blank
                        n_prefix = prefix + (s, )
                        n_pb, n_pnb = next_hyps[n_prefix]
                        n_pnb = log_add([n_pnb, pb + ps])
                        next_hyps[n_prefix] = (n_pb, n_pnb)
                    else:
                        n_prefix = prefix + (s, )
                        n_pb, n_pnb = next_hyps[n_prefix]
                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
                        next_hyps[n_prefix] = (n_pb, n_pnb)

            # 2.2 Second beam prune
            next_hyps = sorted(
                next_hyps.items(),
                key=lambda x: log_add(list(x[1])),
                reverse=True)
            cur_hyps = next_hyps[:beam_size]

        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
        return hyps

    def ctc_prefix_beam_search(self, wav, beam_size) -> List[int]:
        """ Apply CTC prefix beam search
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            List[int]: CTC prefix beam search nbest results
        """
        hyps = self._ctc_prefix_beam_search(wav, beam_size)
        return hyps[0][0]


class Wav2vec2Base(nn.Layer):
    """Wav2vec2 model"""

    def __init__(self, config: dict):
        super().__init__()
        wav2vec2_config = Wav2Vec2ConfigPure(config)
        wav2vec2 = Wav2Vec2Model(wav2vec2_config)
        self.wav2vec2 = wav2vec2

    @classmethod
    def from_config(cls, configs: dict):
        """init model.
        Args:
            configs (dict): config dict.
        Raises:
            ValueError: raise when using not support encoder type.
        Returns:
            nn.Layer: Wav2Vec2Base
        """
        model = cls(configs)
        return model

    def forward(self, wav):
        out = self.wav2vec2(wav)
        return out


================================================
FILE: paddlespeech/s2t/models/wavlm/__init__.py
================================================
from .wavlm_paddle import WavLM, WavLMConfig
from .wavlm_asr import WavLMASR, WavLMBase

================================================
FILE: paddlespeech/s2t/models/wavlm/modules/__init__.py
================================================


================================================
FILE: paddlespeech/s2t/models/wavlm/modules/activations.py
================================================
# Copyright 2020 The HuggingFace Team. All rights reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import math

import paddle
import paddle.nn.functional as F


def _gelu_python(x):
    """
    Original Implementation of the GELU activation function in Google BERT repo when initially created. For
    information: OpenAI GPT's GELU is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) This is now written in C in
    torch.nn.functional Also see the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    """
    return x * 0.5 * (1.0 + paddle.erf(x / math.sqrt(2.0)))


def gelu_new(x):
    """
    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
    the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    """
    return 0.5 * x * (1.0 + paddle.tanh(
        math.sqrt(2.0 / math.pi) * (x + 0.044715 * paddle.pow(x, 3.0))))


def gelu_fast(x):
    return 0.5 * x * (1.0 + paddle.tanh(x * 0.7978845608 *
                                        (1.0 + 0.044715 * x * x)))

gelu = gelu_fast

def _silu_python(x):
    """
    See Gaussian Error Linear Units (Hendrycks et al., https://arxiv.org/abs/1606.08415) where the SiLU (Sigmoid Linear
    Unit) was originally introduced and coined, and see Sigmoid-Weighted Linear Units for Neural Network Function
    Approximation in Reinforcement Learning (Elfwing et al., https://arxiv.org/abs/1702.03118) and Swish: a Self-Gated
    Activation Function (Ramachandran et al., https://arxiv.org/abs/1710.05941v1) where the SiLU was experimented with
    later.
    """
    return x * paddle.nn.functional.sigmoid(x)


def mish(x):
    return x * paddle.tanh(paddle.nn.functional.softplus(x))


def linear_act(x):
    return x


ACT2FN = {
    "relu": F.relu,
    "silu": _silu_python,
    "swish": _silu_python,
    "gelu": gelu,
    "tanh": paddle.tanh,
    "gelu_new": gelu_new,
    "gelu_fast": gelu_fast,
    "mish": mish,
    "linear": linear_act,
    "sigmoid": paddle.nn.functional.sigmoid,
}


def get_activation(activation_string):
    if activation_string in ACT2FN:
        return ACT2FN[activation_string]
    else:
        raise KeyError(
            f"function {activation_string} not found in ACT2FN mapping {list(ACT2FN.keys())}"
        )

================================================
FILE: paddlespeech/s2t/models/wavlm/modules/functional.py
================================================
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from typing import Optional, List, Tuple
import math

def _mha_shape_check(query: paddle.Tensor, key: paddle.Tensor, value: paddle.Tensor,
                     key_padding_mask: Optional[paddle.Tensor], attn_mask: Optional[paddle.Tensor], num_heads: int):
    # Verifies the expected shape for `query, `key`, `value`, `key_padding_mask` and `attn_mask`
    # and returns if the input is batched or not.
    # Raises an error if `query` is not 2-D (unbatched) or 3-D (batched) tensor.

    # Shape check.
    if query.dim() == 3:
        # Batched Inputs
        is_batched = True
        assert key.dim() == 3 and value.dim() == 3, \
            ("For batched (3-D) `query`, expected `key` and `value` to be 3-D"
             f" but found {key.dim()}-D and {value.dim()}-D tensors respectively")
        if key_padding_mask is not None:
            assert key_padding_mask.dim() == 2, \
                ("For batched (3-D) `query`, expected `key_padding_mask` to be `None` or 2-D"
                 f" but found {key_padding_mask.dim()}-D tensor instead")
        if attn_mask is not None:
            assert attn_mask.dim() in (2, 3), \
                ("For batched (3-D) `query`, expected `attn_mask` to be `None`, 2-D or 3-D"
                 f" but found {attn_mask.dim()}-D tensor instead")
    elif query.dim() == 2:
        # Unbatched Inputs
        is_batched = False
        assert key.dim() == 2 and value.dim() == 2, \
            ("For unbatched (2-D) `query`, expected `key` and `value` to be 2-D"
             f" but found {key.dim()}-D and {value.dim()}-D tensors respectively")

        if key_padding_mask is not None:
            assert key_padding_mask.dim() == 1, \
                ("For unbatched (2-D) `query`, expected `key_padding_mask` to be `None` or 1-D"
                 f" but found {key_padding_mask.dim()}-D tensor instead")

        if attn_mask is not None:
            assert attn_mask.dim() in (2, 3), \
                ("For unbatched (2-D) `query`, expected `attn_mask` to be `None`, 2-D or 3-D"
                 f" but found {attn_mask.dim()}-D tensor instead")
            if attn_mask.dim() == 3:
                expected_shape = (num_heads, query.shape[0], key.shape[0])
                assert attn_mask.shape == expected_shape, \
                    (f"Expected `attn_mask` shape to be {expected_shape} but got {attn_mask.shape}")
    else:
        raise AssertionError(
            f"query should be unbatched 2D or batched 3D tensor but received {query.dim()}-D query tensor")


def scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal):
    """
    Scaled Dot-Product Attention
    """
    
    d_key = k.shape[-1]
    scaled_q = paddle.scale(x=q, scale=d_key ** -0.5)
    product = paddle.matmul(x=scaled_q, y=k, transpose_y=True)
    weights = F.softmax(x=product + attn_mask)
    if dropout_p:
        weights = F.dropout(
            weights,
            p=dropout_p,
            training=True,
            mode="upscale_in_train"
        )
    out = paddle.matmul(x=weights, y=v)
    return out

    
def addr(input, vec1, vec2, beta=1, alpha=1, out=None):
    """
    A helper function to calculate alpha*(vec1*vec2^T) + beta*input
    """
    row = vec1.shape[0]
    column = vec2.shape[0]
    vec1 = paddle.unsqueeze(vec1, 0)
    vec1 = paddle.transpose(vec1, [1, 0])
    vec1 = paddle.expand(vec1, [row, column])
    new_vec2 = paddle.zeros([column, column], dtype=vec2.dtype)
    new_vec2[0, :] = vec2
    out = alpha * paddle.matmul(vec1, new_vec2)
    out = beta * input + out
    return out

def multi_head_attention_forward(
    x: paddle.Tensor,
    num_heads: int,
    q_proj: nn.Linear,
    k_proj: nn.Linear,
    v_proj: nn.Linear,
    c_proj: nn.Linear,
    attn_mask: Optional[paddle.Tensor] = None,
):
    max_len, batch_size, emb_dim = x.shape
    head_dim = emb_dim // num_heads
    scaling = float(head_dim) ** -0.5
    q = q_proj(x)  # L, N, E
    k = k_proj(x)  # L, N, E
    v = v_proj(x)  # L, N, E

    v = v.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2))
    k = k.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2))
    q = q.reshape((-1, batch_size * num_heads, head_dim)).transpose((1, 0, 2))

    q = q * scaling
    qk = paddle.matmul(q, k, transpose_y=True)
    if attn_mask is not None:
        if attn_mask.ndim == 2:
            attn_mask.unsqueeze_(0)
        assert attn_mask.shape[0] == 1 and attn_mask.shape[1] == max_len and attn_mask.shape[2] == max_len
        qk += attn_mask

    qk = F.softmax(qk, axis=-1)
    atten = paddle.bmm(qk, v)
    atten = atten.transpose((1, 0, 2))
    atten = atten.reshape((max_len, batch_size, emb_dim))
    atten = c_proj(atten)
    return atten

def linear(input, weight, bias=None):
    # compute y = x A^T + b
    # Input: (N, in_feature) paddle tensor
    # weight: (out_feature, in_feature) paddle tensor
    # bias: (out_feature) paddle tensor
    if input.dim() == 2 and bias is not None:
        # fused op is marginally faster
        return paddle.addmm(bias, input, weight)
    output = paddle.matmul(input, weight)
    if bias is not None:
        output += bias
    return output


def _in_projection_packed(
    q: paddle.Tensor,
    k: paddle.Tensor,
    v: paddle.Tensor,
    w: paddle.Tensor,
    b: Optional[paddle.Tensor] = None,
) -> List[paddle.Tensor]:
    r"""
    Performs the in-projection step of the attention operation, using packed weights.
    Output is a triple containing projection tensors for query, key and value.
    Args:
        q, k, v: query, key and value tensors to be projected. For self-attention,
            these are typically the same tensor; for encoder-decoder attention,
            k and v are typically the same tensor. (We take advantage of these
            identities for performance if they are present.) Regardless, q, k and v
            must share a common embedding dimension; otherwise their shapes may vary.
        w: projection weights for q, k and v, packed into a single tensor. Weights
            are packed along dimension 0, in q, k, v order.
        b: optional projection biases for q, k and v, packed into a single tensor
            in q, k, v order.
    Shape:
        Inputs:
        - q: :math:`(..., E)` where E is the embedding dimension
        - k: :math:`(..., E)` where E is the embedding dimension
        - v: :math:`(..., E)` where E is the embedding dimension
        - w: :math:`(E * 3, E)` where E is the embedding dimension
        - b: :math:`E * 3` where E is the embedding dimension
        Output:
        - in output list :math:`[q', k', v']`, each output tensor will have the
            same shape as the corresponding input tensor.
    """
    E = q.shape[-1]
    if k is v:
        if q is k:
            # self-attention
            proj = F.linear(q, w, b)
            # reshape to 3, E and not E, 3 is deliberate for better memory coalescing and keeping same order as chunk()
            proj = proj.unflatten(-1, (3, E)).unsqueeze(0).transpose([2, 1, 0]).squeeze(-2).contiguous()
            return proj[0], proj[1], proj[2]
        else:
            # encoder-decoder attention
            w_q, w_kv = w.split([E, E * 2])
            if b is None:
                b_q = b_kv = None
            else:
                b_q, b_kv = b.split([E, E * 2])
            q_proj = F.linear(q, w_q, b_q)
            kv_proj = F.linear(k, w_kv, b_kv)
            # reshape to 2, E and not E, 2 is deliberate for better memory coalescing and keeping same order as chunk()
            kv_proj = kv_proj.unflatten(-1, (2, E)).unsqueeze(0).transpose([2, 1, 0]).squeeze(-2).contiguous()
            return (q_proj, kv_proj[0], kv_proj[1])
    else:
        w_q, w_k, w_v = w.chunk(3)
        if b is None:
            b_q = b_k = b_v = None
        else:
            b_q, b_k, b_v = b.chunk(3)
        return F.linear(q, w_q, b_q), F.linear(k, w_k, b_k), F.linear(v, w_v, b_v)
    
def _in_projection(
    q: paddle.Tensor,
    k: paddle.Tensor,
    v: paddle.Tensor,
    w_q: paddle.Tensor,
    w_k: paddle.Tensor,
    w_v: paddle.Tensor,
    b_q: Optional[paddle.Tensor] = None,
    b_k: Optional[paddle.Tensor] = None,
    b_v: Optional[paddle.Tensor] = None,
) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
    A, B, C = F.linear(q, w_q, b_q), F.linear(k, w_k, b_k), F.linear(v, w_v, b_v)    
    return A, B, C
    
def multi_head_attention_forward_paddle(
    query: paddle.Tensor,
    key: paddle.Tensor,
    value: paddle.Tensor,
    embed_dim_to_check: int,
    num_heads: int,
    in_proj_weight: Optional[paddle.Tensor],
    in_proj_bias: Optional[paddle.Tensor],
    bias_k: Optional[paddle.Tensor],
    bias_v: Optional[paddle.Tensor],
    add_zero_attn: bool,
    dropout_p: float,
    out_proj_weight: paddle.Tensor,
    out_proj_bias: Optional[paddle.Tensor],
    training: bool = True,
    key_padding_mask: Optional[paddle.Tensor] = None,
    need_weights: bool = True,
    attn_mask: Optional[paddle.Tensor] = None,
    use_separate_proj_weight: bool = False,
    q_proj_weight: Optional[paddle.Tensor] = None,
    k_proj_weight: Optional[paddle.Tensor] = None,
    v_proj_weight: Optional[paddle.Tensor] = None,
    static_k: Optional[paddle.Tensor] = None,
    static_v: Optional[paddle.Tensor] = None,
    average_attn_weights: bool = True,
    is_causal: bool = False,
) -> Tuple[paddle.Tensor, Optional[paddle.Tensor]]:
    r"""
    Args:
        query, key, value: map a query and a set of key-value pairs to an output.
            See "Attention Is All You Need" for more details.
        embed_dim_to_check: total dimension of the model.
        num_heads: parallel attention heads.
        in_proj_weight, in_proj_bias: input projection weight and bias.
        bias_k, bias_v: bias of the key and value sequences to be added at dim=0.
        add_zero_attn: add a new batch of zeros to the key and
                       value sequences at dim=1.
        dropout_p: probability of an element to be zeroed.
        out_proj_weight, out_proj_bias: the output projection weight and bias.
        training: apply dropout if is ``True``.
        key_padding_mask: if provided, specified padding elements in the key will
            be ignored by the attention. This is an binary mask. When the value is True,
            the corresponding value on the attention layer will be filled with -inf.
        need_weights: output attn_output_weights.
        attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all
            the batches while a 3D mask allows to specify a different mask for the entries of each batch.
        is_causal: If specified, applies a causal mask as attention mask, and ignores
            attn_mask for computing scaled dot product attention.
            Default: ``False``.
        use_separate_proj_weight: the function accept the proj. weights for query, key,
            and value in different forms. If false, in_proj_weight will be used, which is
            a combination of q_proj_weight, k_proj_weight, v_proj_weight.
        q_proj_weight, k_proj_weight, v_proj_weight, in_proj_bias: input projection weight and bias.
        static_k, static_v: static key and value used for attention operators.
        average_attn_weights: If true, indicates that the returned ``attn_weights`` should be averaged across heads.
            Otherwise, ``attn_weights`` are provided separately per head. Note that this flag only has an effect
            when ``need_weights=True.``. Default: True
    Shape:
        Inputs:
        - query: :math:`(L, E)` or :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
          the embedding dimension.
        - key: :math:`(S, E)` or :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
          the embedding dimension.
        - value: :math:`(S, E)` or :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
          the embedding dimension.
        - key_padding_mask: :math:`(S)` or :math:`(N, S)` where N is the batch size, S is the source sequence length.
          If a FloatTensor is provided, it will be directly added to the value.
          If a BoolTensor is provided, the positions with the
          value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
        - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
          3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
          S is the source sequence length. attn_mask ensures that position i is allowed to attend the unmasked
          positions. If a BoolTensor is provided, positions with ``True``
          are not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
          is provided, it will be added to the attention weight.
        - static_k: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,
          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.
        - static_v: :math:`(N*num_heads, S, E/num_heads)`, where S is the source sequence length,
          N is the batch size, E is the embedding dimension. E/num_heads is the head dimension.
        Outputs:
        - attn_output: :math:`(L, E)` or :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
          E is the embedding dimension.
        - attn_output_weights: Only returned when ``need_weights=True``. If ``average_attn_weights=True``, returns
          attention weights averaged across heads of shape :math:`(L, S)` when input is unbatched or
          :math:`(N, L, S)`, where :math:`N` is the batch size, :math:`L` is the target sequence length, and
          :math:`S` is the source sequence length. If ``average_attn_weights=False``, returns attention weights per
          head of shape :math:`(num_heads, L, S)` when input is unbatched or :math:`(N, num_heads, L, S)`.
    """
    
    is_batched = _mha_shape_check(query, key, value, key_padding_mask, attn_mask, num_heads)
    tgt_len, bsz, embed_dim = query.shape
    src_len, _, _ = key.shape

    if is_causal:
        attn_mask = None

    assert embed_dim == embed_dim_to_check, \
        f"was expecting embedding dimension of {embed_dim_to_check}, but got {embed_dim}"
    if isinstance(embed_dim, paddle.Tensor):
        # embed_dim can be a tensor when JIT tracing
        head_dim = embed_dim.div(num_heads, rounding_mode='trunc')
    else:
        head_dim = embed_dim // num_heads
    assert head_dim * num_heads == embed_dim, f"embed_dim {embed_dim} not divisible by num_heads {num_heads}"
    if use_separate_proj_weight:
        # allow MHA to have different embedding dimensions when separate projection weights are used
        assert key.shape[:2] == value.shape[:2], \
            f"key's sequence and batch dims {key.shape[:2]} do not match value's {value.shape[:2]}"
    else:
        assert key.shape == value.shape, f"key shape {key.shape} does not match value shape {value.shape}"

    #
    # compute in-projection
    #
    if not use_separate_proj_weight:
        assert in_proj_weight is not None, "use_separate_proj_weight is False but in_proj_weight is None"
        q, k, v = _in_projection_packed(query, key, value, in_proj_weight, in_proj_bias)
        
    else:
        assert q_proj_weight is not None, "use_separate_proj_weight is True but q_proj_weight is None"
        assert k_proj_weight is not None, "use_separate_proj_weight is True but k_proj_weight is None"
        assert v_proj_weight is not None, "use_separate_proj_weight is True but v_proj_weight is None"
        if in_proj_bias is None:
            b_q = b_k = b_v = None
        else:
            b_q, b_k, b_v = in_proj_bias.chunk(3)
        
        q, k, v = _in_projection(query, key, value, q_proj_weight, k_proj_weight, v_proj_weight, b_q, b_k, b_v)
    
    # prep attention mask

    if attn_mask is not None:
        # ensure attn_mask's dim is 3
        if attn_mask.dim() == 2:
            correct_2d_size = (tgt_len, src_len)
            if attn_mask.shape != correct_2d_size:
                raise RuntimeError(f"The shape of the 2D attn_mask is {attn_mask.shape}, but should be {correct_2d_size}.")
            attn_mask = attn_mask.unsqueeze(0)
        elif attn_mask.dim() == 3:
            correct_3d_size = (bsz * num_heads, tgt_len, src_len)
            if tuple(attn_mask.shape) != correct_3d_size:
                raise RuntimeError(f"The shape of the 3D attn_mask is {attn_mask.shape}, but should be {correct_3d_size}.")
        else:
            raise RuntimeError(f"attn_mask's dimension {attn_mask.dim()} is not supported")

    # add bias along batch dimension (currently second)
    if bias_k is not None and bias_v is not None:
        assert static_k is None, "bias cannot be added to static key."
        assert static_v is None, "bias cannot be added to static value."
        k = paddle.concat([k, bias_k.repeat(1, bsz, 1)], axis=1)
        v = paddle.concat([v, bias_v.repeat(1, bsz, 1)], axis=1)
        if attn_mask is not None:
            # attn_mask = pad(attn_mask, (0, 1))
            # pad last dim with 0 on one side and 1 on the other
            attn_mask = paddle.concat([attn_mask, paddle.zeros_like(attn_mask[:, :, -1:])], axis=2)
        if key_padding_mask is not None:
            # key_padding_mask = pad(key_padding_mask, (0, 1))
            # pad last dim with 0 on one side and 1 on the other
            key_padding_mask = paddle.concat([key_padding_mask, paddle.zeros_like(key_padding_mask[:, -1:])], axis=1)
    else:
        assert bias_k is None
        assert bias_v is None

    #
    # reshape q, k, v for multihead attention and make em batch first
    #
    q = q.reshape([tgt_len, bsz * num_heads, head_dim]).transpose([1, 0, 2])

    
    if static_k is None:
        k = k.reshape([k.shape[0], bsz * num_heads, head_dim]).transpose([1, 0, 2])
    else:
        assert static_k.size(0) == bsz * num_heads, \
            f"expecting static_k.size(0) of {bsz * num_heads}, but got {static_k.size(0)}"
        assert static_k.size(2) == head_dim, \
            f"expecting static_k.size(2) of {head_dim}, but got {static_k.size(2)}"
        k = static_k
    if static_v is None:
        v = v.reshape([v.shape[0], bsz * num_heads, head_dim]).transpose([1, 0, 2])
    else:
        # TODO finish disentangling control flow so we don't do in-projections when statics are passed
        assert static_v.size(0) == bsz * num_heads, \
            f"expecting static_v.size(0) of {bsz * num_heads}, but got {static_v.size(0)}"
        assert static_v.size(2) == head_dim, \
            f"expecting static_v.size(2) of {head_dim}, but got {static_v.size(2)}"
        v = static_v

    # add zero attention along batch dimension (now first)
    if add_zero_attn:
        zero_attn_shape = (bsz * num_heads, 1, head_dim)
        k = paddle.concat([k, paddle.zeros(zero_attn_shape, dtype=k.dtype, device=k.device)], axis=1)
        v = paddle.concat([v, paddle.zeros(zero_attn_shape, dtype=v.dtype, device=v.device)], axis=1)
        if attn_mask is not None:
            # attn_mask = pad(attn_mask, (0, 1))
            attn_mask = paddle.concat([attn_mask, paddle.zeros_like(attn_mask[:, :, -1:])], axis=2)
        if key_padding_mask is not None:
            # key_padding_mask = pad(key_padding_mask, (0, 1))
            key_padding_mask = paddle.concat([key_padding_mask, paddle.zeros_like(key_padding_mask[:, -1:])], axis=1)

    # update source sequence length after adjustments
    src_len = k.shape[1]

    # merge key padding and attention masks
    if key_padding_mask is not None:
        assert key_padding_mask.shape == (bsz, src_len), \
            f"expecting key_padding_mask shape of {(bsz, src_len)}, but got {key_padding_mask.shape}"
        key_padding_mask = key_padding_mask.reshape([bsz, 1, 1, src_len]).expand([-1, num_heads, -1, -1]).reshape([bsz * num_heads, 1, src_len])
        if attn_mask is None:
            attn_mask = key_padding_mask
        else:
            attn_mask = attn_mask + key_padding_mask

    # adjust dropout probability
    if not training:
        dropout_p = 0.0

    #
    # (deep breath) calculate attention and out projection
    #
    if need_weights:
        B, Nt, E = q.shape
        q_scaled = q / math.sqrt(E)
        if attn_mask is not None:
            attn_output_weights = addr(q_scaled, k.transpose(-2, -1))
        else:
            attn_output_weights = paddle.bmm(q_scaled, k.transpose(0, 2, 1))
        attn_output_weights = F.softmax(attn_output_weights, axis=-1)
        if dropout_p > 0.0:
            attn_output_weights = F.dropout(attn_output_weights, p=dropout_p)

        attn_output = paddle.bmm(attn_output_weights, v)
        attn_output = attn_output.transpose([1, 0, 2]).reshape([tgt_len * bsz, embed_dim])
        # attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
        attn_output = F.linear(attn_output, out_proj_weight, out_proj_bias)
        attn_output = attn_output.reshape([tgt_len, bsz, attn_output.shape[1]])

        # optionally average attention weights over heads
        attn_output_weights = attn_output_weights.reshape([bsz, num_heads, tgt_len, src_len])
        if average_attn_weights:
            attn_output_weights = attn_output_weights.mean(dim=1)

        if not is_batched:
            # squeeze the output if input was unbatched
            attn_output = attn_output.squeeze(1)
            attn_output_weights = attn_output_weights.squeeze(0)
        return attn_output, attn_output_weights
    else:
        # attn_mask can be either (L,S) or (N*num_heads, L, S)
        # if attn_mask's shape is (1, L, S) we need to unsqueeze to (1, 1, L, S)
        # in order to match the input for SDPA of (N, num_heads, L, S)
        if attn_mask is not None:
            if attn_mask.shape[0] == 1 and attn_mask.dim() == 3:
                attn_mask = attn_mask.unsqueeze(0)
            else:
                attn_mask = attn_mask.reshape([bsz, num_heads, -1, src_len])

        q = q.reshape([bsz, num_heads, tgt_len, head_dim])
        k = k.reshape([bsz, num_heads, src_len, head_dim])
        v = v.reshape([bsz, num_heads, src_len, head_dim])
        attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)
        attn_output = attn_output.transpose(perm=[2, 0, 1, 3]).reshape([bsz * tgt_len, embed_dim])
        attn_output = F.linear(attn_output, out_proj_weight, out_proj_bias)
        attn_output = attn_output.reshape([tgt_len, bsz, attn_output.shape[1]])
        return attn_output, None

================================================
FILE: paddlespeech/s2t/models/wavlm/modules/modules.py
================================================
# --------------------------------------------------------
# paddle: Large-Scale Self-Supervised  Pre-training  for Full Stack Speech Processing (https://arxiv.org/abs/2110.13900.pdf)
# Github source: https://github.com/microsoft/unilm/tree/master/paddle
# Copyright (c) 2021 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Based on fairseq code bases
# https://github.com/pytorch/fairseq
# --------------------------------------------------------
import math
import warnings
from typing import Dict
from typing import Optional
from typing import Tuple

import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import Tensor

from .functional import multi_head_attention_forward_paddle


class TransposeLast(nn.Layer):
    def __init__(self, deconstruct_idx=None):
        super().__init__()
        self.deconstruct_idx = deconstruct_idx

    def forward(self, x):
        if self.deconstruct_idx is not None:
            x = x[self.deconstruct_idx]
        return paddle.transpose(x, perm=[0, 2, 1])


class Fp32LayerNorm(nn.LayerNorm):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def forward(self, input):
        output = F.layer_norm(
            input.float(),
            self.normalized_shape,
            self.weight.float() if self.weight is not None else None,
            self.bias.float() if self.bias is not None else None,
            self.eps, )
        return output.type_as(input)


class Fp32GroupNorm(nn.GroupNorm):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def forward(self, input):
        output = F.group_norm(
            input.float(),
            self.num_groups,
            self.weight.float() if self.weight is not None else None,
            self.bias.float() if self.bias is not None else None,
            self.eps, )
        return output.type_as(input)


class SamePad(nn.Layer):
    def __init__(self, kernel_size, causal=False):
        super().__init__()
        if causal:
            self.remove = kernel_size - 1
        else:
            self.remove = 1 if kernel_size % 2 == 0 else 0

    def forward(self, x):
        if self.remove > 0:
            x = x[:, :, :-self.remove]
        return x


class Swish(nn.Layer):
    """Swish function
    """

    def __init__(self):
        """Construct an MultiHeadedAttention object."""
        super(Swish, self).__init__()
        self.act = nn.Sigmoid()

    def forward(self, x):
        return x * self.act(x)


class GLU_Linear(nn.Layer):
    def __init__(self,
                 input_dim,
                 output_dim,
                 glu_type="sigmoid",
                 bias_in_glu=True):
        super(GLU_Linear, self).__init__()

        self.glu_type = glu_type
        self.output_dim = output_dim

        if glu_type == "sigmoid":
            self.glu_act = nn.Sigmoid()
        elif glu_type == "swish":
            self.glu_act = Swish()
        elif glu_type == "relu":
            self.glu_act = nn.ReLU()
        elif glu_type == "gelu":
            self.glu_act = nn.GELU()

        if bias_in_glu:
            self.linear = nn.Linear(input_dim, output_dim * 2, True)
        else:
            self.linear = nn.Linear(input_dim, output_dim * 2, False)

    def forward(self, x):
        # to be consistent with GLU_Linear, we assume the input always has the #channel (#dim) in the last dimension of the tensor, so need to switch the dimension first for 1D-Conv case
        x = self.linear(x)

        if self.glu_type == "bilinear":
            x = (x[:, :, 0:self.output_dim] *
                 x[:, :, self.output_dim:self.output_dim * 2])
        else:
            x = (x[:, :, 0:self.output_dim] *
                 self.glu_act(x[:, :, self.output_dim:self.output_dim * 2]))

        return x


def gelu_accurate(x):
    if not hasattr(gelu_accurate, "_a"):
        gelu_accurate._a = math.sqrt(2 / math.pi)
    return (0.5 * x * (1 + paddle.tanh(gelu_accurate._a *
                                       (x + 0.044715 * paddle.pow(x, 3)))))


def gelu(x: Tensor) -> Tensor:
    return nn.functional.gelu(x.astype("float32")).astype(x.dtype)


def get_activation_fn(activation: str):
    """Returns the activation function corresponding to `activation`"""

    if activation == "relu":
        return F.relu
    elif activation == "gelu":
        return gelu
    elif activation == "gelu_fast":
        warnings.warn(
            "--activation-fn=gelu_fast has been renamed to gelu_accurate")
        return gelu_accurate
    elif activation == "gelu_accurate":
        return gelu_accurate
    elif activation == "tanh":
        return paddle.tanh
    elif activation == "linear":
        return lambda x: x
    elif activation == "glu":
        return lambda x: x
    else:
        raise RuntimeError(
            "--activation-fn {} not supported".format(activation))


def quant_noise(module, p, block_size):
    """
    Wraps modules and applies quantization noise to the weights for
    subsequent quantization with Iterative Product Quantization as
    described in "Training with Quantization Noise for Extreme Model Compression"

    Args:
        - module: nn.Layer
        - p: amount of Quantization Noise
        - block_size: size of the blocks for subsequent quantization with iPQ

    Remarks:
        - Module weights must have the right sizes wrt the block size
        - Only Linear, Embedding and Conv2d modules are supported for the moment
        - For more detail on how to quantize by blocks with convolutional weights,
          see "And the Bit Goes Down: Revisiting the Quantization of Neural Networks"
        - We implement the simplest form of noise here as stated in the paper
          which consists in randomly dropping blocks
    """

    # if no quantization noise, don't register hook
    if p <= 0:
        return module

    # supported modules
    assert isinstance(module, (nn.Linear, nn.Embedding, nn.Conv2d))

    # test whether module.weight has the right sizes wrt block_size
    is_conv = module.weight.ndim == 4

    # 2D matrix
    if not is_conv:
        assert (
            module.weight.size(1) %
            block_size == 0), "Input features must be a multiple of block sizes"

    # 4D matrix
    else:
        # 1x1 convolutions
        if module.kernel_size == (1, 1):
            assert (module.in_channels % block_size == 0
                    ), "Input channels must be a multiple of block sizes"
        # regular convolutions
        else:
            k = module.kernel_size[0] * module.kernel_size[1]
            assert k % block_size == 0, "Kernel size must be a multiple of block size"

    def _forward_pre_hook(mod, input):
        # no noise for evaluation
        if mod.training:
            if not is_conv:
                # gather weight and sizes
                weight = mod.weight
                in_features = weight.size(1)
                out_features = weight.size(0)

                # split weight matrix into blocks and randomly drop selected blocks
                mask = paddle.zeros(
                    in_features // block_size * out_features,
                    device=weight.device)
                mask.bernoulli_(p)
                mask = mask.repeat_interleave(block_size, -1).reshape(
                    [-1, in_features])

            else:
                # gather weight and sizes
                weight = mod.weight
                in_channels = mod.in_channels
                out_channels = mod.out_channels

                # split weight matrix into blocks and randomly drop selected blocks
                if mod.kernel_size == (1, 1):
                    mask = paddle.zeros(
                        int(in_channels // block_size * out_channels),
                        device=weight.device, )
                    mask.bernoulli_(p)
                    mask = mask.repeat_interleave(block_size, -1).reshape(
                        [-1, in_channels])
                else:
                    mask = paddle.zeros(
                        weight.size(0), weight.size(1), device=weight.device)

                    mask.bernoulli_(p)
                    mask = (
                        mask.unsqueeze(2).unsqueeze(3)
                        .repeat(1, 1, mod.kernel_size[0], mod.kernel_size[1]))

            # scale weights and apply mask
            mask = mask.to(paddle.bool)
            s = 1 / (1 - p)
            mod.weight.data = s * weight.masked_fill(mask, 0)

    module.register_forward_pre_hook(_forward_pre_hook)
    return module


class MultiheadAttention(nn.Layer):
    """Multi-headed attention.

    See "Attention Is All You Need" for more details.
    """

    def __init__(
            self,
            embed_dim,
            num_heads,
            kdim=None,
            vdim=None,
            dropout=0.0,
            bias=True,
            add_bias_kv=False,
            add_zero_attn=False,
            self_attention=False,
            encoder_decoder_attention=False,
            q_noise=0.0,
            qn_block_size=8,
            has_relative_attention_bias=True,
            num_buckets=32,
            max_distance=128,
            gru_rel_pos=True,
            rescale_init=False, ):
        super().__init__()
        self.embed_dim = embed_dim
        self.kdim = kdim if kdim is not None else embed_dim
        self.vdim = vdim if vdim is not None else embed_dim
        self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim

        self.num_heads = num_heads
        self.dropout_module = nn.Dropout(dropout)

        self.has_relative_attention_bias = has_relative_attention_bias
        self.num_buckets = num_buckets
        self.max_distance = max_distance
        if self.has_relative_attention_bias:
            self.relative_attention_bias = nn.Embedding(num_buckets, num_heads)

        self.head_dim = embed_dim // num_heads
        self.q_head_dim = self.head_dim
        self.k_head_dim = self.head_dim
        assert (self.head_dim * num_heads == self.embed_dim
                ), "embed_dim must be divisible by num_heads"
        self.scaling = self.head_dim**-0.5

        self.self_attention = self_attention
        self.encoder_decoder_attention = encoder_decoder_attention

        assert not self.self_attention or self.qkv_same_dim, (
            "Self-attention requires query, key and "
            "value to be of the same size")

        k_bias = True
        if rescale_init:
            k_bias = False

        k_embed_dim = embed_dim
        q_embed_dim = embed_dim

        self.k_proj = quant_noise(
            nn.Linear(self.kdim, k_embed_dim, bias_attr=k_bias), q_noise,
            qn_block_size)
        self.v_proj = quant_noise(
            nn.Linear(self.vdim, embed_dim, bias_attr=bias), q_noise,
            qn_block_size)
        self.q_proj = quant_noise(
            nn.Linear(embed_dim, q_embed_dim, bias_attr=bias), q_noise,
            qn_block_size)

        self.out_proj = quant_noise(
            nn.Linear(embed_dim, embed_dim, bias_attr=bias), q_noise,
            qn_block_size)

        if add_bias_kv:
            self.bias_k = self.create_parameter(
                shape=[1, 1, embed_dim], dtype="float32")
            self.bias_v = self.create_parameter(
                shape=[1, 1, embed_dim], dtype="float32")

        else:
            self.bias_k = self.bias_v = None

        self.add_zero_attn = add_zero_attn

        self.gru_rel_pos = gru_rel_pos
        if self.gru_rel_pos:
            self.grep_linear = nn.Linear(self.q_head_dim, 8)
            self.grep_a = self.create_parameter(
                shape=[1, num_heads, 1, 1], dtype="float32")

        self.reset_parameters()

    def reset_parameters(self):
        pass

    def _relative_positions_bucket(self, relative_positions,
                                   bidirectional=True):
        num_buckets = self.num_buckets
        max_distance = self.max_distance
        relative_buckets = 0

        if bidirectional:
            num_buckets = num_buckets // 2
            relative_buckets += (
                relative_positions > 0).astype("int64") * num_buckets
            relative_positions = paddle.abs(relative_positions)
        else:
            relative_positions = -paddle.minimum(
                relative_positions, paddle.zeros_like(relative_positions))

        max_exact = num_buckets // 2
        is_small = relative_positions < max_exact

        relative_postion_if_large = max_exact + (
            paddle.log(relative_positions.astype("float32") /
                       max_exact) / math.log(max_distance / max_exact) *
            (num_buckets - max_exact)).astype("int64")
        relative_postion_if_large = paddle.minimum(
            relative_postion_if_large,
            paddle.full_like(relative_postion_if_large, num_buckets - 1))

        relative_buckets += paddle.where(is_small, relative_positions,
                                         relative_postion_if_large)
        return relative_buckets

    def compute_bias(self, query_length, key_length):
        context_position = paddle.arange(query_length, dtype="int64")[:, None]
        memory_position = paddle.arange(key_length, dtype="int64")[None, :]
        relative_position = memory_position - context_position
        relative_position_bucket = self._relative_positions_bucket(
            relative_position, bidirectional=True)
        # relative_position_bucket = relative_position_bucket.to(self.relative_attention_bias.weight.device)
        values = self.relative_attention_bias(relative_position_bucket)
        values = values.transpose([2, 0, 1])
        return values

    def forward(self,
                query,
                key: Optional[Tensor],
                value: Optional[Tensor],
                key_padding_mask: Optional[Tensor]=None,
                incremental_state: Optional[Dict[str, Dict[str, Optional[
                    Tensor]]]]=None,
                need_weights: bool=True,
                static_kv: bool=False,
                attn_mask: Optional[Tensor]=None,
                before_softmax: bool=False,
                need_head_weights: bool=False,
                position_bias: Optional[Tensor]=None
                ) -> Tuple[Tensor, Optional[Tensor], Optional[Tensor]]:
        """Input shape: Time x Batch x Channel

        Args:
            key_padding_mask (ByteTensor, optional): mask to exclude
                keys that are pads, of shape `(batch, src_len)`, where
                padding elements are indicated by 1s.
            need_weights (bool, optional): return the attention weights,
                averaged over heads (default: False).
            attn_mask (ByteTensor, optional): typically used to
                implement causal attention, where the mask prevents the
                attention from looking forward in time (default: None).
            before_softmax (bool, optional): return the raw attention
                weights and values before the attention softmax.
            need_head_weights (bool, optional): return the attention
                weights for each head. Implies *need_weights*. Default:
                return the average attention weights over all heads.
        """
        if need_head_weights:
            need_weights = True

        tgt_len, bsz, embed_dim = query.shape
        src_len = tgt_len
        assert embed_dim == self.embed_dim
        assert list(query.shape) == [tgt_len, bsz, embed_dim]
        if key is not None:
            src_len, key_bsz, _ = key.shape

        if self.has_relative_attention_bias and position_bias is None:
            position_bias = self.compute_bias(tgt_len, src_len)
            position_bias_ = position_bias.unsqueeze(0)
            position_bias = paddle.concat(
                [position_bias_ for _ in range(bsz)], axis=0)
            position_bias = position_bias.reshape(
                [bsz * self.num_heads, tgt_len, src_len])
        if (incremental_state is None and not static_kv and
                self.q_head_dim == self.head_dim):
            assert key is not None and value is not None
            assert attn_mask is None

            attn_mask_rel_pos = None
            if position_bias is not None:
                attn_mask_rel_pos = position_bias
                if self.gru_rel_pos:
                    query_layer = query.transpose([1, 0, 2])
                    new_x_shape = query_layer.shape[:-1] + [self.num_heads, -1]
                    query_layer = query_layer.reshape(new_x_shape)
                    query_layer = query_layer.transpose([0, 2, 1, 3])
                    _B, _H, _L, __ = query_layer.shape

                    gate_a, gate_b = paddle.nn.functional.sigmoid(
                        self.grep_linear(query_layer).reshape(
                            [_B, _H, _L, 2, 4]).sum(-1, keepdim=False)).chunk(
                                2, axis=-1)

                    gate_a_1 = gate_a * (gate_b * self.grep_a - 1.0) + 2.0
                    attn_mask_rel_pos = gate_a_1.reshape(
                        [bsz * self.num_heads, -1, 1]) * position_bias

                attn_mask_rel_pos = attn_mask_rel_pos.reshape(
                    (-1, tgt_len, tgt_len))
            k_proj_bias = self.k_proj.bias
            if k_proj_bias is None:
                k_proj_bias = paddle.zeros_like(self.q_proj.bias)

            x, attn = multi_head_attention_forward_paddle(
                query,
                key,
                value,
                self.embed_dim,
                self.num_heads,
                paddle.empty([0]),
                paddle.concat(
                    (self.q_proj.bias, self.k_proj.bias, self.v_proj.bias),
                    axis=0),
                self.bias_k,
                self.bias_v,
                self.add_zero_attn,
                self.dropout_module.p,
                self.out_proj.weight,
                self.out_proj.bias,
                self.training,
                key_padding_mask,
                need_weights,
                attn_mask_rel_pos,
                use_separate_proj_weight=True,
                q_proj_weight=self.q_proj.weight,
                k_proj_weight=self.k_proj.weight,
                v_proj_weight=self.v_proj.weight, )

            return x, attn, position_bias

        if incremental_state is not None:
            saved_state = self._get_input_buffer(incremental_state)
            if saved_state is not None and "prev_key" in saved_state:
                # previous time steps are cached - no need to recompute
                # key and value if they are static
                if static_kv:
                    assert self.encoder_decoder_attention and not self.self_attention
                    key = value = None
        else:
            saved_state = None

        if self.self_attention:
            q = self.q_proj(query)
            k = self.k_proj(query)
            v = self.v_proj(query)
        elif self.encoder_decoder_attention:
            # encoder-decoder attention
            q = self.q_proj(query)
            if key is None:
                assert value is None
                k = v = None
            else:
                k = self.k_proj(key)
                v = self.v_proj(key)

        else:
            assert key is not None and value is not None
            q = self.q_proj(query)
            k = self.k_proj(key)
            v = self.v_proj(value)
        q *= self.scaling

        if self.bias_k is not None:
            assert self.bias_v is not None
            k = paddle.concat([k, self.bias_k.repeat(1, bsz, 1)], axis=0)
            v = paddle.concat([v, self.bias_v.repeat(1, bsz, 1)], axis=0)
            if attn_mask is not None:
                attn_mask = paddle.concat(
                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)],
                    axis=1)

            if key_padding_mask is not None:
                key_padding_mask = paddle.concat(
                    [
                        key_padding_mask,
                        key_padding_mask.new_zeros(key_padding_mask.size(0), 1),
                    ],
                    axis=1, )

        q = (q.contiguous()
             .reshape([tgt_len, bsz * self.num_heads, self.q_head_dim])
             .transpose([1, 0, 2]))
        if k is not None:
            k = (k.contiguous()
                 .reshape([-1, bsz * self.num_heads, self.k_head_dim])
                 .transpose([1, 0, 2]))
        if v is not None:
            v = (v.contiguous()
                 .reshape([-1, bsz * self.num_heads, self.head_dim])
                 .transpose([1, 0, 2]))

        if saved_state is not None:
            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
            if "prev_key" in saved_state:
                _prev_key = saved_state["prev_key"]
                assert _prev_key is not None
                prev_key = _prev_key.reshape(
                    [bsz * self.num_heads, -1, self.head_dim])
                if static_kv:
                    k = prev_key
                else:
                    assert k is not None
                    k = paddle.concat([prev_key, k], axis=1)
                src_len = k.size(1)
            if "prev_value" in saved_state:
                _prev_value = saved_state["prev_value"]
                assert _prev_value is not None
                prev_value = _prev_value.reshape(
                    [bsz * self.num_heads, -1, self.head_dim])
                if static_kv:
                    v = prev_value
                else:
                    assert v is not None
                    v = paddle.concat([prev_value, v], axis=1)
            prev_key_padding_mask: Optional[Tensor] = None
            if "prev_key_padding_mask" in saved_state:
                prev_key_padding_mask = saved_state["prev_key_padding_mask"]
            assert k is not None and v is not None
            key_padding_mask = MultiheadAttention._append_prev_key_padding_mask(
                key_padding_mask=key_padding_mask,
                prev_key_padding_mask=prev_key_padding_mask,
                batch_size=bsz,
                src_len=k.size(1),
                static_kv=static_kv, )

            saved_state["prev_key"] = k.reshape(
                [bsz, self.num_heads, -1, self.head_dim])
            saved_state["prev_value"] = v.reshape(
                [bsz, self.num_heads, -1, self.head_dim])
            saved_state["prev_key_padding_mask"] = key_padding_mask
            # In this branch incremental_state is never None
            assert incremental_state is not None
            incremental_state = self._set_input_buffer(incremental_state,
                                                       saved_state)
        assert k is not None
        assert k.size(1) == src_len

        # This is part of a workaround to get around fork/join parallelism
        # not supporting Optional types.
        if key_padding_mask is not None and key_padding_mask.dim() == 0:
            key_padding_mask = None

        if key_padding_mask is not None:
            assert key_padding_mask.size(0) == bsz
            assert key_padding_mask.size(1) == src_len

        if self.add_zero_attn:
            assert v is not None
            src_len += 1
            k = paddle.concat(
                [k, k.new_zeros((k.size(0), 1) + k.shape[2:])], axis=1)
            v = paddle.concat(
                [v, v.new_zeros((v.size(0), 1) + v.shape[2:])], axis=1)
            if attn_mask is not None:
                attn_mask = paddle.concat(
                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)],
                    axis=1)

            if key_padding_mask is not None:
                key_padding_mask = paddle.concat(
                    [
                        key_padding_mask,
                        paddle.zeros(key_padding_mask.size(0),
                                     1).type_as(key_padding_mask),
                    ],
                    axis=1, )

        attn_weights = paddle.matmul(q, k.transpose([0, 2, 1]))

        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len,
                                              bsz)

        assert list(
            attn_weights.shape) == [bsz * self.num_heads, tgt_len, src_len]

        if attn_mask is not None:
            attn_mask = attn_mask.unsqueeze(0)
            attn_weights += attn_mask

        if key_padding_mask is not None:
            # don't attend to padding symbols
            attn_weights = attn_weights.reshape(
                [bsz, self.num_heads, tgt_len, src_len])
            attn_weights = attn_weights.masked_fill(
                key_padding_mask.unsqueeze(1).unsqueeze(2).to(paddle.bool),
                float("-inf"), )
            attn_weights = attn_weights.reshape(
                [bsz * self.num_heads, tgt_len, src_len])

        if before_softmax:
            return attn_weights, v, position_bias

        if position_bias is not None:
            if self.gru_rel_pos == 1:
                query_layer = q.reshape(
                    [bsz, self.num_heads, tgt_len, self.q_head_dim])
                _B, _H, _L, __ = query_layer.shape
                gate_a, gate_b = paddle.sigmoid(
                    self.grep_linear(query_layer).reshape([_B, _H, _L, 2, 4])
                    .sum(-1, keepdim=False)).chunk(
                        2, axis=-1)

                gate_a_1 = gate_a * (gate_b * self.grep_a - 1.0) + 2.0
                position_bias = gate_a_1.reshape(
                    [bsz * self.num_heads, -1, 1]) * position_bias

            position_bias = position_bias.reshape(attn_weights.shape)

            attn_weights = attn_weights + position_bias

        attn_weights_float = F.softmax(attn_weights, dim=-1)
        attn_weights = attn_weights_float.type_as(attn_weights)
        attn_probs = self.dropout_module(attn_weights)

        assert v is not None
        attn = paddle.bmm(attn_probs, v)
        assert list(
            attn.shape) == [bsz * self.num_heads, tgt_len, self.head_dim]
        attn = attn.transpose([1, 0, 2]).reshape([tgt_len, bsz, embed_dim])
        attn = self.out_proj(attn)
        attn_weights: Optional[Tensor] = None
        if need_weights:
            attn_weights = attn_weights_float.reshape(
                [bsz, self.num_heads, tgt_len, src_len]).transpose([1, 0, 2, 3])
            if not need_head_weights:
                # average attention weights over heads
                attn_weights = attn_weights.mean(dim=0)

        return attn, attn_weights, position_bias

    @staticmethod
    def _append_prev_key_padding_mask(
            key_padding_mask: Optional[Tensor],
            prev_key_padding_mask: Optional[Tensor],
            batch_size: int,
            src_len: int,
            static_kv: bool, ) -> Optional[Tensor]:
        # saved key padding masks have shape (bsz, seq_len)
        if prev_key_padding_mask is not None and static_kv:
            new_key_padding_mask = prev_key_padding_mask
        elif prev_key_padding_mask is not None and key_padding_mask is not None:
            new_key_padding_mask = paddle.concat(
                [prev_key_padding_mask.float(), key_padding_mask.float()],
                axis=1)
        # During incremental decoding, as the padding token enters and
        # leaves the frame, there will be a time when prev or current
        # is None
        elif prev_key_padding_mask is not None:
            if src_len > prev_key_padding_mask.size(1):
                filler = paddle.zeros(
                    (batch_size, src_len - prev_key_padding_mask.size(1)),
                    device=prev_key_padding_mask.device, )
                new_key_padding_mask = paddle.concat(
                    [prev_key_padding_mask.float(), filler.float()], axis=1)

            else:
                new_key_padding_mask = prev_key_padding_mask.float()
        elif key_padding_mask is not None:
            if src_len > key_padding_mask.size(1):
                filler = paddle.zeros(
                    (batch_size, src_len - key_padding_mask.size(1)),
                    device=key_padding_mask.device, )
                new_key_padding_mask = paddle.concat(
                    [filler.float(), key_padding_mask.float()], axis=1)

            else:
                new_key_padding_mask = key_padding_mask.float()
        else:
            new_key_padding_mask = prev_key_padding_mask
        return new_key_padding_mask

    def _get_input_buffer(
            self,
            incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]
    ) -> Dict[str, Optional[Tensor]]:
        result = self.get_incremental_state(incremental_state, "attn_state")
        if result is not None:
            return result
        else:
            empty_result: Dict[str, Optional[Tensor]] = {}
            return empty_result

    def _set_input_buffer(
            self,
            incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
            buffer: Dict[str, Optional[Tensor]], ):
        return self.set_incremental_state(incremental_state, "attn_state",
                                          buffer)

    def apply_sparse_mask(self,
                          attn_weights,
                          tgt_len: int,
                          src_len: int,
                          bsz: int):
        return attn_weights


================================================
FILE: paddlespeech/s2t/models/wavlm/wavlm_asr.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
from typing import Dict
from typing import List
from typing import Tuple

import paddle
import paddle.nn as nn
import paddle.nn.functional as F

from .wavlm_paddle import WavLM
from .wavlm_paddle import WavLMConfig
from paddlespeech.s2t.models.wav2vec2.modules.VanillaNN import VanillaNN
from paddlespeech.s2t.models.wav2vec2.processing.speech_augmentation import SpecAugment
from paddlespeech.s2t.modules.ctc import CTCDecoderBase as CTC
from paddlespeech.s2t.modules.initializer import DefaultInitializerContext
from paddlespeech.s2t.utils.ctc_utils import remove_duplicates_and_blank
from paddlespeech.s2t.utils.utility import log_add


class WavLMASR(nn.Layer):
    def __init__(self, config: dict):
        super().__init__()
        init_type = config.get("init_type", None)
        with DefaultInitializerContext(init_type):
            self.config = config
            wavlm_config = WavLMConfig(config)
            wavlm = WavLM(wavlm_config)

            self.normalize_wav = config.normalize_wav
            self.output_norm = config.output_norm
            if hasattr(config, 'spec_augment'):
                self.spec_augment = SpecAugment(**config.spec_augment)

            if config.freeze_wavlm:
                wavlm.eval()
                for parm in wavlm.parameters():
                    parm.trainable = False
            self.wavlm = wavlm
            self.enc = VanillaNN(**config.enc)
            self.ctc = CTC(**config.ctc,
                           odim=config.output_dim,
                           batch_average=False,
                           reduction='mean')

    def forward(self, wav, wavs_lens_rate, target, target_lens):
        if self.normalize_wav:
            wav = F.layer_norm(wav, wav.shape[1:])

        # Extract wav2vec output
        out = self.wavlm(wav)
        # We normalize the output if required
        if self.output_norm:
            out = F.layer_norm(out, out.shape[1:])

        if self.training and hasattr(self.config, 'spec_augment'):
            feats = self.spec_augment(out)
        else:
            feats = out

        x = self.enc(feats)
        # x = feats

        x_lens = (wavs_lens_rate * x.shape[1]).round().astype(paddle.int64)
        target_lens = target_lens.astype(paddle.int64)
        # target = target.astype(paddle.int32)
        ctc_loss = self.ctc(x, x_lens, target, target_lens)

        return ctc_loss

    @paddle.no_grad()
    def decode(self,
               feats: paddle.Tensor,
               text_feature: Dict[str, int],
               decoding_method: str,
               beam_size: int,
               tokenizer: str=None,
               sb_pipeline=False):
        batch_size = feats.shape[0]

        if decoding_method == 'ctc_prefix_beam_search' and batch_size > 1:
            print(
                f"decoding mode {decoding_method} must be running with batch_size == 1"
            )
            print(f"current batch_size is {batch_size}")

        if decoding_method == 'ctc_greedy_search':
            if tokenizer is None and sb_pipeline is False:
                hyps = self.ctc_greedy_search(feats)
                res = [text_feature.defeaturize(hyp) for hyp in hyps]
                res_tokenids = [hyp for hyp in hyps]
            else:
                if sb_pipeline is True:
                    hyps = self.ctc_greedy_search(feats.unsqueeze(-1))
                else:
                    hyps = self.ctc_greedy_search(feats)
                res = []
                res_tokenids = []
                for sequence in hyps:
                    # Decode token terms to words 
                    predicted_tokens = text_feature.convert_ids_to_tokens(
                        sequence)
                tmp_res = []
                tmp_res_tokenids = []
                for c in predicted_tokens:
                    if c == "[CLS]":
                        continue
                    elif c == "[SEP]" or c == "[PAD]":
                        break
                    else:
                        tmp_res.append(c)
                        tmp_res_tokenids.append(text_feature.vocab[c])
                res.append(''.join(tmp_res))
                res_tokenids.append(tmp_res_tokenids)

        # ctc_prefix_beam_search and attention_rescoring only return one
        # result in List[int], change it to List[List[int]] for compatible
        # with other batch decoding mode
        elif decoding_method == 'ctc_prefix_beam_search':
            assert feats.shape[0] == 1
            if tokenizer is None and sb_pipeline is False:
                hyp = self.ctc_prefix_beam_search(feats, beam_size)
                res = [text_feature.defeaturize(hyp)]
                res_tokenids = [hyp]
            else:
                if sb_pipeline is True:
                    hyp = self.ctc_prefix_beam_search(
                        feats.unsqueeze(-1), beam_size)
                else:
                    hyp = self.ctc_prefix_beam_search(feats, beam_size)
                res = []
                res_tokenids = []
                predicted_tokens = text_feature.convert_ids_to_tokens(hyp)
                tmp_res = []
                tmp_res_tokenids = []
                for c in predicted_tokens:
                    if c == "[CLS]":
                        continue
                    elif c == "[SEP]" or c == "[PAD]":
                        break
                    else:
                        tmp_res.append(c)
                        tmp_res_tokenids.append(text_feature.vocab[c])
                res.append(''.join(tmp_res))
                res_tokenids.append(tmp_res_tokenids)
        else:
            raise ValueError(
                f"WavLM not support decoding method: {decoding_method}")

        return res, res_tokenids

    @classmethod
    def from_config(cls, config):
        model = cls(config)
        return model

    def ctc_greedy_search(self, wav) -> List[List[int]]:
        """ Apply CTC greedy search
        Args:
            speech (paddle.Tensor): (batch, max_len)
            speech_length (paddle.Tensor): (batch, )
        Returns:
            List[List[int]]: best path result
        """
        batch_size = wav.shape[0]
        wav = wav[:, :, 0]
        if self.normalize_wav:
            wav = F.layer_norm(wav, wav.shape[1:])
        # Extract wavlm output
        out = self.wavlm(wav)
        # We normalize the output if required
        if self.output_norm:
            out = F.layer_norm(out, out.shape[1:])
        feats = out
        x = self.enc(feats)
        x_lens = x.shape[1]
        ctc_probs = self.ctc.log_softmax(x)  # (B, maxlen, vocab_size)
        topk_prob, topk_index = ctc_probs.topk(1, axis=2)  # (B, maxlen, 1)
        topk_index = topk_index.reshape([batch_size, x_lens])  # (B, maxlen)

        hyps = [hyp.tolist() for hyp in topk_index]
        hyps = [remove_duplicates_and_blank(hyp) for hyp in hyps]
        return hyps

    def _ctc_prefix_beam_search(
            self,
            wav,
            beam_size,
            blank_id: int=0, ) -> Tuple[List[Tuple[int, float]], paddle.Tensor]:
        """ CTC prefix beam search inner implementation
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            List[Tuple[int, float]]: nbest results, (N,1), (text, likelihood)
            paddle.Tensor: encoder output, (1, max_len, encoder_dim),
                it will be used for rescoring in attention rescoring mode
        """
        wav = wav[:, :, 0]

        if self.normalize_wav:
            wav = F.layer_norm(wav, wav.shape[1:])
        # Extract wavlm output
        out = self.wavlm(wav)
        # We normalize the output if required
        if self.output_norm:
            out = F.layer_norm(out, out.shape[1:])
        feats = out

        x = self.enc(feats)
        maxlen = x.shape[1]
        ctc_probs = self.ctc.log_softmax(x)  # (1, maxlen, vocab_size)
        ctc_probs = ctc_probs.squeeze(0)

        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
        # blank_ending_score and  none_blank_ending_score in ln domain
        cur_hyps = [(tuple(), (0.0, -float('inf')))]
        # 2. CTC beam search step by step
        for t in range(0, maxlen):
            logp = ctc_probs[t]  # (vocab_size,)
            # key: prefix, value (pb, pnb), default value(-inf, -inf)
            next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
            # 2.1 First beam prune: select topk best
            top_k_logp, top_k_index = logp.topk(beam_size)  # (beam_size,)
            for s in top_k_index:
                s = s.item()
                ps = logp[s].item()
                for prefix, (pb, pnb) in cur_hyps:
                    last = prefix[-1] if len(prefix) > 0 else None
                    if s == blank_id:  # blank
                        n_pb, n_pnb = next_hyps[prefix]
                        n_pb = log_add([n_pb, pb + ps, pnb + ps])
                        next_hyps[prefix] = (n_pb, n_pnb)
                    elif s == last:
                        #  Update *ss -> *s;
                        n_pb, n_pnb = next_hyps[prefix]
                        n_pnb = log_add([n_pnb, pnb + ps])
                        next_hyps[prefix] = (n_pb, n_pnb)
                        # Update *s-s -> *ss, - is for blank
                        n_prefix = prefix + (s, )
                        n_pb, n_pnb = next_hyps[n_prefix]
                        n_pnb = log_add([n_pnb, pb + ps])
                        next_hyps[n_prefix] = (n_pb, n_pnb)
                    else:
                        n_prefix = prefix + (s, )
                        n_pb, n_pnb = next_hyps[n_prefix]
                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
                        next_hyps[n_prefix] = (n_pb, n_pnb)

            # 2.2 Second beam prune
            next_hyps = sorted(
                next_hyps.items(),
                key=lambda x: log_add(list(x[1])),
                reverse=True)
            cur_hyps = next_hyps[:beam_size]

        hyps = [(y[0], log_add([y[1][0], y[1][1]])) for y in cur_hyps]
        return hyps

    def ctc_prefix_beam_search(self, wav, beam_size) -> List[int]:
        """ Apply CTC prefix beam search
        Args:
            speech (paddle.Tensor): (batch, max_len, feat_dim)
            speech_length (paddle.Tensor): (batch, )
            beam_size (int): beam size for beam search
            decoding_chunk_size (int): decoding chunk for dynamic chunk
                trained model.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
                0: used for training, it's prohibited here
            simulate_streaming (bool): whether do encoder forward in a
                streaming fashion
        Returns:
            List[int]: CTC prefix beam search nbest results
        """
        hyps = self._ctc_prefix_beam_search(wav, beam_size)
        return hyps[0][0]


class WavLMBase(nn.Layer):
    """WavLM model"""

    def __init__(self, config: dict):
        super().__init__()
        wavlm_config = WavLMConfig(config)
        wavlm = WavLM(wavlm_config)
        self.wavlm = wavlm

    @classmethod
    def from_config(cls, configs: dict):
        """init model.
        Args:
            configs (dict): config dict.
        Raises:
            ValueError: raise when using not support encoder type.
        Returns:
            nn.Layer: WavLMBase
        """
        model = cls(configs)
        return model

    def forward(self, wav):
        out = self.wavlm(wav)
        return out


================================================
FILE: paddlespeech/s2t/models/wavlm/wavlm_paddle.py
================================================
# --------------------------------------------------------
# WavLM: Large-Scale Self-Supervised  Pre-training  for Full Stack Speech Processing (https://arxiv.org/abs/2110.13900.pdf)
# Github source: https://github.com/microsoft/unilm/tree/master/wavlm
# Copyright (c) 2021 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Based on fairseq code bases
# https://github.com/pytorch/fairseq
# --------------------------------------------------------
import logging
import math
from typing import List
from typing import Optional
from typing import Tuple

import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import Tensor
from paddle.nn import LayerNorm

from .modules.modules import get_activation_fn
from .modules.modules import GLU_Linear
from .modules.modules import MultiheadAttention
from .modules.modules import SamePad
from .modules.modules import TransposeLast

logger = logging.getLogger(__name__)


def compute_mask_indices(
        shape: Tuple[int, int],
        padding_mask: Optional[Tensor],
        mask_prob: float,
        mask_length: int,
        mask_type: str="static",
        mask_other: float=0.0,
        min_masks: int=0,
        no_overlap: bool=False,
        min_space: int=0, ) -> np.ndarray:
    """
    Computes random mask spans for a given shape

    Args:
        shape: the the shape for which to compute masks.
            should be of size 2 where first element is batch size and 2nd is timesteps
        padding_mask: optional padding mask of the same size as shape, which will prevent masking padded elements
        mask_prob: probability for each token to be chosen as start of the span to be masked. this will be multiplied by
            number of timesteps divided by length of mask span to mask approximately this percentage of all elements.
            however due to overlaps, the actual number will be smaller (unless no_overlap is True)
        mask_type: how to compute mask lengths
            static = fixed size
            uniform = sample from uniform distribution [mask_other, mask_length*2]
            normal = sample from normal distribution with mean mask_length and stdev mask_other. mask is min 1 element
            poisson = sample from possion distribution with lambda = mask length
        min_masks: minimum number of masked spans
        no_overlap: if false, will switch to an alternative recursive algorithm that prevents spans from overlapping
        min_space: only used if no_overlap is True, this is how many elements to keep unmasked between spans
    """

    bsz, all_sz = shape
    mask = np.full((bsz, all_sz), False)

    all_num_mask = int(
        # add a random number for probabilistic rounding
        mask_prob * all_sz / float(mask_length) + np.random.rand())

    all_num_mask = max(min_masks, all_num_mask)

    mask_idcs = []
    for i in range(bsz):
        if padding_mask is not None:
            sz = all_sz - padding_mask[i].long().sum().item()
            num_mask = int(
                # add a random number for probabilistic rounding
                mask_prob * sz / float(mask_length) + np.random.rand())
            num_mask = max(min_masks, num_mask)
        else:
            sz = all_sz
            num_mask = all_num_mask

        if mask_type == "static":
            lengths = np.full(num_mask, mask_length)
        elif mask_type == "uniform":
            lengths = np.random.randint(
                mask_other, mask_length * 2 + 1, size=num_mask)
        elif mask_type == "normal":
            lengths = np.random.normal(mask_length, mask_other, size=num_mask)
            lengths = [max(1, int(round(x))) for x in lengths]
        elif mask_type == "poisson":
            lengths = np.random.poisson(mask_length, size=num_mask)
            lengths = [int(round(x)) for x in lengths]
        else:
            raise Exception("unknown mask selection " + mask_type)

        if sum(lengths) == 0:
            lengths[0] = min(mask_length, sz - 1)

        if no_overlap:
            mask_idc = []

            def arrange(s, e, length, keep_length):
                span_start = np.random.randint(s, e - length)
                mask_idc.extend(span_start + i for i in range(length))

                new_parts = []
                if span_start - s - min_space >= keep_length:
                    new_parts.append((s, span_start - min_space + 1))
                if e - span_start - keep_length - min_space > keep_length:
                    new_parts.append((span_start + length + min_space, e))
                return new_parts

            parts = [(0, sz)]
            min_length = min(lengths)
            for length in sorted(lengths, reverse=True):
                lens = np.fromiter(
                    (e - s if e - s >= length + min_space else 0
                     for s, e in parts),
                    np.int_, )
                l_sum = np.sum(lens)
                if l_sum == 0:
                    break
                probs = lens / np.sum(lens)
                c = np.random.choice(len(parts), p=probs)
                s, e = parts.pop(c)
                parts.extend(arrange(s, e, length, min_length))
            mask_idc = np.asarray(mask_idc)
        else:
            min_len = min(lengths)
            if sz - min_len <= num_mask:
                min_len = sz - num_mask - 1

            mask_idc = np.random.choice(sz - min_len, num_mask, replace=False)

            mask_idc = np.asarray([
                mask_idc[j] + offset
                for j in range(len(mask_idc)) for offset in range(lengths[j])
            ])

        mask_idcs.append(np.unique(mask_idc[mask_idc < sz]))

    min_len = min([len(m) for m in mask_idcs])
    for i, mask_idc in enumerate(mask_idcs):
        if len(mask_idc) > min_len:
            mask_idc = np.random.choice(mask_idc, min_len, replace=False)
        mask[i, mask_idc] = True

    return mask


class WavLMConfig:
    def __init__(self, cfg=None):
        self.extractor_mode: str = "default"  # mode for feature extractor. default has a single group norm with d groups in the first conv block, whereas layer_norm has layer norms in every block (meant to use with normalize=True)
        self.encoder_layers: int = 12  # num encoder layers in the transformer

        self.encoder_embed_dim: int = 768  # encoder embedding dimension
        self.encoder_ffn_embed_dim: int = 3072  # encoder embedding dimension for FFN
        self.encoder_attention_heads: int = 12  # num encoder attention heads
        self.activation_fn: str = "gelu"  # activation function to use

        self.layer_norm_first: bool = False  # apply layernorm first in the transformer
        self.conv_feature_layers: str = "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2"  # string describing convolutional feature extraction layers in form of a python list that contains [(dim, kernel_size, stride), ...]
        self.conv_bias: bool = False  # include bias in conv encoder
        self.feature_grad_mult: float = 1.0  # multiply feature extractor var grads by this

        self.normalize: bool = False  # normalize input to have 0 mean and unit variance during training

        # dropouts
        self.dropout: float = 0.1  # dropout probability for the transformer
        self.attention_dropout: float = 0.1  # dropout probability for attention weights
        self.activation_dropout: float = 0.0  # dropout probability after activation in FFN
        self.encoder_layerdrop: float = 0.0  # probability of dropping a tarnsformer layer
        self.dropout_input: float = 0.0  # dropout to apply to the input (after feat extr)
        self.dropout_features: float = 0.0  # dropout to apply to the features (after feat extr)

        # masking
        self.mask_length: int = 10  # mask length
        self.mask_prob: float = 0.65  # probability of replacing a token with mask
        self.mask_selection: str = "static"  # how to choose mask length
        self.mask_other: float = 0  # secondary mask argument (used for more complex distributions), see help in compute_mask_indicesh
        self.no_mask_overlap: bool = False  # whether to allow masks to overlap
        self.mask_min_space: int = 1  # min space between spans (if no overlap is enabled)

        # channel masking
        self.mask_channel_length: int = 10  # length of the mask for features (channels)
        self.mask_channel_prob: float = 0.0  # probability of replacing a feature with 0
        self.mask_channel_selection: str = "static"  # how to choose mask length for channel masking
        self.mask_channel_other: float = 0  # secondary mask argument (used for more complex distributions), see help in compute_mask_indices
        self.no_mask_channel_overlap: bool = False  # whether to allow channel masks to overlap
        self.mask_channel_min_space: int = 1  # min space between spans (if no overlap is enabled)

        # positional embeddings
        self.conv_pos: int = 128  # number of filters for convolutional positional embeddings
        self.conv_pos_groups: int = 16  # number of groups for convolutional positional embedding

        # relative position embedding
        self.relative_position_embedding: bool = True  # apply relative position embedding
        self.num_buckets: int = 320  # number of buckets for relative position embedding
        self.max_distance: int = 1280  # maximum distance for relative position embedding
        self.gru_rel_pos: bool = True  # apply gated relative position embedding

        if cfg is not None:
            self.update(cfg)

    def update(self, cfg: dict):
        self.__dict__.update(cfg)


class WavLM(nn.Layer):
    def __init__(
            self,
            cfg: WavLMConfig, ) -> None:
        super().__init__()
        logger.info(f"WavLM Config: {cfg.__dict__}")

        self.cfg = cfg
        feature_enc_layers = eval(cfg.conv_feature_layers)
        self.embed = feature_enc_layers[-1][0]

        self.feature_extractor = ConvFeatureExtractionModel(
            conv_layers=feature_enc_layers,
            dropout=0.0,
            mode=cfg.extractor_mode,
            conv_bias=cfg.conv_bias, )

        self.post_extract_proj = (nn.Linear(self.embed, cfg.encoder_embed_dim)
                                  if self.embed != cfg.encoder_embed_dim else
                                  None)

        self.mask_prob = cfg.mask_prob
        self.mask_selection = cfg.mask_selection
        self.mask_other = cfg.mask_other
        self.mask_length = cfg.mask_length
        self.no_mask_overlap = cfg.no_mask_overlap
        self.mask_min_space = cfg.mask_min_space

        self.mask_channel_prob = cfg.mask_channel_prob
        self.mask_channel_selection = cfg.mask_channel_selection
        self.mask_channel_other = cfg.mask_channel_other
        self.mask_channel_length = cfg.mask_channel_length
        self.no_mask_channel_overlap = cfg.no_mask_channel_overlap
        self.mask_channel_min_space = cfg.mask_channel_min_space

        self.dropout_input = nn.Dropout(cfg.dropout_input)
        self.dropout_features = nn.Dropout(cfg.dropout_features)

        self.feature_grad_mult = cfg.feature_grad_mult

        self.mask_emb = self.create_parameter(
            shape=[cfg.encoder_embed_dim],
            default_initializer=nn.initializer.Uniform(), )

        self.encoder = TransformerEncoder(cfg)
        self.layer_norm = LayerNorm(self.embed)

    def apply_mask(self, x, padding_mask):
        B, T, C = x.shape
        if self.mask_prob > 0:
            mask_indices = compute_mask_indices(
                (B, T),
                padding_mask,
                self.mask_prob,
                self.mask_length,
                self.mask_selection,
                self.mask_other,
                min_masks=2,
                no_overlap=self.no_mask_overlap,
                min_space=self.mask_min_space, )
            # mask_indices = torch.from_numpy(mask_indices).to(x.device)
            mask_indices = paddle.to_tensor(mask_indices, dtype='int64')
            x[mask_indices] = self.mask_emb
        else:
            mask_indices = None

        if self.mask_channel_prob > 0:
            mask_channel_indices = compute_mask_indices(
                (B, C),
                None,
                self.mask_channel_prob,
                self.mask_channel_length,
                self.mask_channel_selection,
                self.mask_channel_other,
                no_overlap=self.no_mask_channel_overlap,
                min_space=self.mask_channel_min_space, )
            mask_channel_indices = (
                # torch.from_numpy(mask_channel_indices)
                paddle.to_tensor(mask_channel_indices, dtype='int64')
                .to(x.device).unsqueeze(1).expand(-1, T, -1))
            x[mask_channel_indices] = 0

        return x, mask_indices

    def forward_padding_mask(
            self,
            features: Tensor,
            padding_mask: Tensor, ) -> Tensor:
        extra = padding_mask.size(1) % features.size(1)
        if extra > 0:
            padding_mask = padding_mask[:, :-extra]
        padding_mask = padding_mask.reshape(
            [padding_mask.size(0), features.size(1), -1])
        padding_mask = padding_mask.all(-1)
        return padding_mask

    def extract_features(
            self,
            source: Tensor,
            padding_mask: Optional[Tensor]=None,
            mask: bool=False,
            ret_conv: bool=False,
            output_layer: Optional[int]=None,
            ret_layer_results: bool=False, ):

        if self.feature_grad_mult > 0:
            features = self.feature_extractor(source)
            # if self.feature_grad_mult != 1.0:
            #     features = GradMultiply.apply(features, self.feature_grad_mult)
        else:
            # with torch.no_grad():
            with paddle.no_grad():
                features = self.feature_extractor(source)

        features = features.transpose([0, 2, 1])  # [1, 49, 512]
        features = self.layer_norm(features)

        if padding_mask is not None:
            padding_mask = self.forward_padding_mask(features, padding_mask)

        if self.post_extract_proj is not None:
            features = self.post_extract_proj(features)
        # [1, 49, 768]
        features = self.dropout_input(features)

        if mask:
            x, mask_indices = self.apply_mask(features, padding_mask)
        else:
            x = features

        # feature: (B, T, D), float
        # target: (B, T), long
        # x: (B, T, D), float
        # padding_mask: (B, T), bool
        # mask_indices: (B, T), bool

        x, layer_results = self.encoder(
            x,
            padding_mask=padding_mask,
            layer=None if output_layer is None else output_layer - 1)
        # print(f"Debugging: x.shape: {x.shape}, x.mean(): {x.mean()}, x.std(): {x.std()}")
        res = {
            "x": x,
            "padding_mask": padding_mask,
            "features": features,
            "layer_results": layer_results
        }

        feature = res["features"] if ret_conv else res["x"]
        if ret_layer_results:
            feature = (feature, res["layer_results"])
        return feature, res["padding_mask"]

    def forward(self, x):
        return self.extract_features(x)[0]


class ConvFeatureExtractionModel(nn.Layer):
    def __init__(self,
                 conv_layers: List[Tuple[int, int, int]],
                 dropout: float=0.0,
                 mode: str="default",
                 conv_bias: bool=False,
                 conv_type: str="default"):
        super().__init__()

        assert mode in {"default", "layer_norm"}

        def block(
                n_in,
                n_out,
                k,
                stride,
                is_layer_norm=False,
                is_group_norm=False,
                conv_bias=False, ):
            def make_conv():
                conv = nn.Conv1D(
                    n_in,
                    n_out,
                    k,
                    stride=stride,
                    bias_attr=conv_bias,
                    weight_attr=nn.initializer.KaimingNormal())
                # nn.init.kaiming_normal_(conv.weight)
                return conv

            assert (is_layer_norm and is_group_norm
                    ) == False, "layer norm and group norm are exclusive"

            if is_layer_norm:
                return nn.Sequential(
                    make_conv(),
                    nn.Dropout(p=dropout),
                    nn.Sequential(
                        TransposeLast(),
                        nn.LayerNorm(normalized_shape=dim, epsilon=1e-5),
                        TransposeLast(), ),
                    nn.GELU(), )
            elif is_group_norm:
                return nn.Sequential(
                    make_conv(),
                    nn.Dropout(p=dropout),
                    nn.GroupNorm(
                        num_groups=dim, num_channels=dim, epsilon=1e-5),
                    nn.GELU(), )
            else:
                return nn.Sequential(
                    make_conv(), nn.Dropout(p=dropout), nn.GELU())

        self.conv_type = conv_type
        if self.conv_type == "default":
            in_d = 1
            self.conv_layers = nn.LayerList()
            for i, cl in enumerate(conv_layers):
                assert len(cl) == 3, "invalid conv definition: " + str(cl)
                (dim, k, stride) = cl

                self.conv_layers.append(
                    block(
                        in_d,
                        dim,
                        k,
                        stride,
                        is_layer_norm=mode == "layer_norm",
                        is_group_norm=mode == "default" and i == 0,
                        conv_bias=conv_bias, ))
                in_d = dim
        elif self.conv_type == "conv2d":
            in_d = 1
            self.conv_layers = nn.LayerList()
            for i, cl in enumerate(conv_layers):
                assert len(cl) == 3
                (dim, k, stride) = cl

                self.conv_layers.append(paddle.nn.Conv2D(in_d, dim, k, stride))
                self.conv_layers.append(paddle.nn.ReLU())
                in_d = dim
        elif self.conv_type == "custom":
            in_d = 1
            idim = 80
            self.conv_layers = nn.LayerList()
            for i, cl in enumerate(conv_layers):
                assert len(cl) == 3
                (dim, k, stride) = cl
                self.conv_layers.append(
                    paddle.nn.Conv2D(in_d, dim, k, stride, padding=1))
                self.conv_layers.append(paddle.nn.LayerNorm([dim, idim]))
                self.conv_layers.append(paddle.nn.ReLU())
                in_d = dim
                if (i + 1) % 2 == 0:
                    self.conv_layers.append(
                        paddle.nn.MaxPool2D(2, stride=2, ceil_mode=True))
                    idim = int(math.ceil(idim / 2))
        else:
            pass

    def forward(self, x, mask=None):

        # BxT -> BxCxT
        x = x.unsqueeze(1)
        if self.conv_type == "custom":
            for conv in self.conv_layers:
                if isinstance(conv, nn.LayerNorm):
                    x = x.transpose([0, 2, 1])
                    x = conv(x).transpose([0, 2, 1])
                else:
                    x = conv(x)
            x = x.transpose([0, 1, 3, 2]).contiguous()
            x = x.reshape([x.size(0), -1, x.size(-1)])
        else:
            for conv in self.conv_layers:
                x = conv(x)
            if self.conv_type == "conv2d":
                b, c, t, f = x.size()
                # x = x.transpose(2, 3).contiguous().reshape([b, c * f, t])
                x = x.transpose([0, 1, 3, 2]).contiguous().reshape(
                    [b, c * f, t])
        return x


class TransformerEncoder(nn.Layer):
    def __init__(self, args):
        super().__init__()

        self.dropout = args.dropout
        self.embedding_dim = args.encoder_embed_dim
        dropout = 0
        std = math.sqrt(
            (4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim))

        self.pos_conv = nn.Conv1D(
            self.embedding_dim,
            self.embedding_dim,
            kernel_size=args.conv_pos,
            padding=args.conv_pos // 2,
            groups=args.conv_pos_groups,
            weight_attr=nn.initializer.Normal(mean=0, std=std),
            bias_attr=True)
        # nn.init.normal_(self.pos_conv.weight, mean=0, std=std)
        # nn.init.constant_(self.pos_conv.bias, 0)

        # self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2)
        # self.pos_conv.weight_g = self.pos_conv.weight_g.unsqueeze(0).unsqueeze(0)
        self.pos_conv = nn.utils.weight_norm(
            self.pos_conv, name="weight", dim=2)
        self.pos_conv = nn.Sequential(self.pos_conv,
                                      SamePad(args.conv_pos), nn.GELU())

        if hasattr(args, "relative_position_embedding"):
            self.relative_position_embedding = args.relative_position_embedding
            self.num_buckets = args.num_buckets
            self.max_distance = args.max_distance
        else:
            self.relative_position_embedding = False
            self.num_buckets = 0
            self.max_distance = 0

        self.layers = nn.LayerList([
            TransformerSentenceEncoderLayer(
                embedding_dim=self.embedding_dim,
                ffn_embedding_dim=args.encoder_ffn_embed_dim,
                num_attention_heads=args.encoder_attention_heads,
                dropout=self.dropout,
                attention_dropout=args.attention_dropout,
                activation_dropout=args.activation_dropout,
                activation_fn=args.activation_fn,
                layer_norm_first=args.layer_norm_first,
                has_relative_attention_bias=(
                    self.relative_position_embedding and i == 0),
                num_buckets=self.num_buckets,
                max_distance=self.max_distance,
                gru_rel_pos=args.gru_rel_pos, )
            for i in range(args.encoder_layers)
        ])

        self.layer_norm_first = args.layer_norm_first
        self.layer_norm = LayerNorm(self.embedding_dim)
        self.layerdrop = args.encoder_layerdrop

        # self.apply(init_bert_params)

    def forward(self, x, padding_mask=None, streaming_mask=None, layer=None):
        x, layer_results = self.extract_features(x, padding_mask,
                                                 streaming_mask, layer)
        # print("x.shape", x.shape)
        if self.layer_norm_first and layer is None:
            x = self.layer_norm(x)

        return x, layer_results

    def extract_features(self,
                         x,
                         padding_mask=None,
                         streaming_mask=None,
                         tgt_layer=None):

        if padding_mask is not None:
            x[padding_mask] = 0

        x_conv = self.pos_conv(x.transpose([0, 2, 1]))
        x_conv = x_conv.transpose([0, 2, 1])
        x += x_conv
        if not self.layer_norm_first:
            x = self.layer_norm(x)

        x = F.dropout(x, p=self.dropout, training=self.training)

        # B x T x C -> T x B x C
        # x = x.transpose(0, 1)
        x = x.transpose([1, 0, 2])

        layer_results = []
        z = None
        if tgt_layer is not None:
            layer_results.append((x, z))
        r = None
        pos_bias = None
        for i, layer in enumerate(self.layers):
            dropout_probability = np.random.random()
            if not self.training or (dropout_probability > self.layerdrop):
                x, z, pos_bias = layer(
                    x,
                    self_attn_padding_mask=padding_mask,
                    need_weights=False,
                    self_attn_mask=streaming_mask,
                    pos_bias=pos_bias)
            if tgt_layer is not None:
                layer_results.append((x, z))
            if i == tgt_layer:
                r = x
                break

        if r is not None:
            x = r

        # T x B x C -> B x T x C
        # x = x.transpose(0, 1)
        x = x.transpose([1, 0, 2])

        return x, layer_results


class TransformerSentenceEncoderLayer(nn.Layer):
    """
    Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained
    models.
    """

    def __init__(
            self,
            embedding_dim: float=768,
            ffn_embedding_dim: float=3072,
            num_attention_heads: float=8,
            dropout: float=0.1,
            attention_dropout: float=0.1,
            activation_dropout: float=0.1,
            activation_fn: str="relu",
            layer_norm_first: bool=False,
            has_relative_attention_bias: bool=True,
            num_buckets: int=0,
            max_distance: int=0,
            rescale_init: bool=False,
            gru_rel_pos: bool=True, ) -> None:

        super().__init__()
        # Initialize parameters
        self.embedding_dim = embedding_dim
        self.dropout = dropout
        self.activation_dropout = activation_dropout

        # Initialize blocks
        self.activation_name = activation_fn
        self.activation_fn = get_activation_fn(activation_fn)
        self.self_attn = MultiheadAttention(
            self.embedding_dim,
            num_attention_heads,
            dropout=attention_dropout,
            self_attention=True,
            has_relative_attention_bias=has_relative_attention_bias,
            num_buckets=num_buckets,
            max_distance=max_distance,
            rescale_init=rescale_init,
            gru_rel_pos=gru_rel_pos, )

        self.dropout1 = nn.Dropout(dropout)
        self.dropout2 = nn.Dropout(self.activation_dropout)
        self.dropout3 = nn.Dropout(dropout)

        self.layer_norm_first = layer_norm_first

        # layer norm associated with the self attention layer
        self.self_attn_layer_norm = LayerNorm(self.embedding_dim)

        if self.activation_name == "glu":
            self.fc1 = GLU_Linear(self.embedding_dim, ffn_embedding_dim,
                                  "swish")
        else:
            self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim)
        self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim)

        # layer norm associated with the position wise feed-forward NN
        self.final_layer_norm = LayerNorm(self.embedding_dim)

    def forward(self,
                x: Tensor,
                self_attn_mask: Tensor=None,
                self_attn_padding_mask: Tensor=None,
                need_weights: bool=False,
                pos_bias=None):
        """
        LayerNorm is applied either before or after the self-attention/ffn
        modules similar to the original Transformer imlementation.
        """
        residual = x
        if self.layer_norm_first:

            x = self.self_attn_layer_norm(x)
            x, attn, pos_bias = self.self_attn(
                query=x,
                key=x,
                value=x,
                key_padding_mask=self_attn_padding_mask,
                need_weights=False,
                attn_mask=self_attn_mask,
                position_bias=pos_bias)
            # import pdb; pdb.set_trace()
            x = self.dropout1(x)
            x = residual + x

            residual = x
            x = self.final_layer_norm(x)
            if self.activation_name == "glu":
                x = self.fc1(x)
            else:
                x = self.activation_fn(self.fc1(x))
            x = self.dropout2(x)
            x = self.fc2(x)
            x = self.dropout3(x)
            x = residual + x
        else:
            x, attn, pos_bias = self.self_attn(
                query=x,
                key=x,
                value=x,
                key_padding_mask=self_attn_padding_mask,
                need_weights=need_weights,
                attn_mask=self_attn_mask,
                position_bias=pos_bias)

            x = self.dropout1(x)
            x = residual + x

            x = self.self_attn_layer_norm(x)

            residual = x
            if self.activation_name == "glu":
                x = self.fc1(x)
            else:
                x = self.activation_fn(self.fc1(x))
            x = self.dropout2(x)
            x = self.fc2(x)
            x = self.dropout3(x)
            x = residual + x
            x = self.final_layer_norm(x)

        return x, attn, pos_bias


================================================
FILE: paddlespeech/s2t/models/whisper/__init__.py
================================================
# MIT License, Copyright (c) 2022 OpenAI.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Modified from OpenAI Whisper 2022 (https://github.com/openai/whisper/whisper/__init__.py)
from paddlespeech.s2t.models.whisper.whisper import decode
from paddlespeech.s2t.models.whisper.whisper import DecodingOptions
from paddlespeech.s2t.models.whisper.whisper import DecodingResult
from paddlespeech.s2t.models.whisper.whisper import detect_language
from paddlespeech.s2t.models.whisper.whisper import log_mel_spectrogram
from paddlespeech.s2t.models.whisper.whisper import ModelDimensions
from paddlespeech.s2t.models.whisper.whisper import transcribe
from paddlespeech.s2t.models.whisper.whisper import Whisper


================================================
FILE: paddlespeech/s2t/models/whisper/tokenizer.py
================================================
# MIT License, Copyright (c) 2022 OpenAI.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Modified from OpenAI Whisper 2022 (https://github.com/openai/whisper/whisper/tokenizer.py)
import base64
import os
import string
from dataclasses import dataclass
from dataclasses import field
from functools import cached_property
from functools import lru_cache
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple

import tiktoken

LANGUAGES = {
    "en": "english",
    "zh": "chinese",
    "de": "german",
    "es": "spanish",
    "ru": "russian",
    "ko": "korean",
    "fr": "french",
    "ja": "japanese",
    "pt": "portuguese",
    "tr": "turkish",
    "pl": "polish",
    "ca": "catalan",
    "nl": "dutch",
    "ar": "arabic",
    "sv": "swedish",
    "it": "italian",
    "id": "indonesian",
    "hi": "hindi",
    "fi": "finnish",
    "vi": "vietnamese",
    "he": "hebrew",
    "uk": "ukrainian",
    "el": "greek",
    "ms": "malay",
    "cs": "czech",
    "ro": "romanian",
    "da": "danish",
    "hu": "hungarian",
    "ta": "tamil",
    "no": "norwegian",
    "th": "thai",
    "ur": "urdu",
    "hr": "croatian",
    "bg": "bulgarian",
    "lt": "lithuanian",
    "la": "latin",
    "mi": "maori",
    "ml": "malayalam",
    "cy": "welsh",
    "sk": "slovak",
    "te": "telugu",
    "fa": "persian",
    "lv": "latvian",
    "bn": "bengali",
    "sr": "serbian",
    "az": "azerbaijani",
    "sl": "slovenian",
    "kn": "kannada",
    "et": "estonian",
    "mk": "macedonian",
    "br": "breton",
    "eu": "basque",
    "is": "icelandic",
    "hy": "armenian",
    "ne": "nepali",
    "mn": "mongolian",
    "bs": "bosnian",
    "kk": "kazakh",
    "sq": "albanian",
    "sw": "swahili",
    "gl": "galician",
    "mr": "marathi",
    "pa": "punjabi",
    "si": "sinhala",
    "km": "khmer",
    "sn": "shona",
    "yo": "yoruba",
    "so": "somali",
    "af": "afrikaans",
    "oc": "occitan",
    "ka": "georgian",
    "be": "belarusian",
    "tg": "tajik",
    "sd": "sindhi",
    "gu": "gujarati",
    "am": "amharic",
    "yi": "yiddish",
    "lo": "lao",
    "uz": "uzbek",
    "fo": "faroese",
    "ht": "haitian creole",
    "ps": "pashto",
    "tk": "turkmen",
    "nn": "nynorsk",
    "mt": "maltese",
    "sa": "sanskrit",
    "lb": "luxembourgish",
    "my": "myanmar",
    "bo": "tibetan",
    "tl": "tagalog",
    "mg": "malagasy",
    "as": "assamese",
    "tt": "tatar",
    "haw": "hawaiian",
    "ln": "lingala",
    "ha": "hausa",
    "ba": "bashkir",
    "jw": "javanese",
    "su": "sundanese",
    "yue": "cantonese",
}

# language code lookup by name, with a few language aliases
TO_LANGUAGE_CODE = {
    **{language: code for code, language in LANGUAGES.items()},
    "burmese": "my",
    "valencian": "ca",
    "flemish": "nl",
    "haitian": "ht",
    "letzeburgesch": "lb",
    "pushto": "ps",
    "panjabi": "pa",
    "moldavian": "ro",
    "moldovan": "ro",
    "sinhalese": "si",
    "castilian": "es",
    "mandarin": "zh",
}


@dataclass
class Tokenizer:
    """A thin wrapper around `tiktoken` providing quick access to special tokens"""

    encoding: tiktoken.Encoding
    num_languages: int
    language: Optional[str] = None
    task: Optional[str] = None
    sot_sequence: Tuple[int] = ()
    special_tokens: Dict[str, int] = field(default_factory=dict)

    def __post_init__(self):
        for special in self.encoding.special_tokens_set:
            special_token = self.encoding.encode_single_token(special)
            self.special_tokens[special] = special_token

        sot: int = self.special_tokens["<|startoftranscript|>"]
        translate: int = self.special_tokens["<|translate|>"]
        transcribe: int = self.special_tokens["<|transcribe|>"]
        langs = tuple(LANGUAGES.keys())[:self.num_languages]
        sot_sequence = [sot]
        if self.language is not None:
            sot_sequence.append(sot + 1 + langs.index(self.language))
        if self.task is not None:
            task_token: int = transcribe if self.task == "transcribe" else translate
            sot_sequence.append(task_token)

        self.sot_sequence = tuple(sot_sequence)

    def encode(self, text, **kwargs):
        return self.encoding.encode(text, **kwargs)

    def decode(self, token_ids: List[int], **kwargs) -> str:
        token_ids = [t for t in token_ids if t < self.timestamp_begin]
        return self.encoding.decode(token_ids, **kwargs)

    def decode_with_timestamps(self, token_ids: List[int], **kwargs) -> str:
        """
        Timestamp tokens are above other special tokens' id range and are ignored by `decode()`.
        This method decodes given tokens with timestamps tokens annotated, e.g. "<|1.08|>".
        """
        return self.encoding.decode(token_ids, **kwargs)

    @cached_property
    def eot(self) -> int:
        return self.encoding.eot_token

    @cached_property
    def transcribe(self) -> int:
        return self.special_tokens["<|transcribe|>"]

    @cached_property
    def translate(self) -> int:
        return self.special_tokens["<|translate|>"]

    @cached_property
    def sot(self) -> int:
        return self.special_tokens["<|startoftranscript|>"]

    @cached_property
    def sot_lm(self) -> int:
        return self.special_tokens["<|startoflm|>"]

    @cached_property
    def sot_prev(self) -> int:
        return self.special_tokens["<|startofprev|>"]

    @cached_property
    def no_speech(self) -> int:
        return self.special_tokens["<|nospeech|>"]

    @cached_property
    def no_timestamps(self) -> int:
        return self.special_tokens["<|notimestamps|>"]

    @cached_property
    def timestamp_begin(self) -> int:
        return self.special_tokens["<|0.00|>"]

    @cached_property
    def language_token(self) -> int:
        """Returns the token id corresponding to the value of the `language` field"""
        if self.language is None:
            raise ValueError(
                "This tokenizer does not have language token configured")

        return self.to_language_token(self.language)

    def to_language_token(self, language):
        if token := self.special_tokens.get(f"<|{language}|>", None):
            return token

        raise KeyError(f"Language {language} not found in tokenizer.")

    @cached_property
    def all_language_tokens(self) -> Tuple[int]:
        result = []
        for token, token_id in self.special_tokens.items():
            if token.strip("<|>") in LANGUAGES:
                result.append(token_id)
        return tuple(result)[:self.num_languages]

    @cached_property
    def all_language_codes(self) -> Tuple[str]:
        return tuple(
            self.decode([_l]).strip("<|>") for _l in self.all_language_tokens)

    @cached_property
    def sot_sequence_including_notimestamps(self) -> Tuple[int]:
        return tuple(list(self.sot_sequence) + [self.no_timestamps])

    @cached_property
    def non_speech_tokens(self) -> Tuple[int]:
        """
        Returns the list of tokens to suppress in order to avoid any speaker tags or non-speech
        annotations, to prevent sampling texts that are not actually spoken in the audio, e.g.

        - ♪♪♪
        - ( SPEAKING FOREIGN LANGUAGE )
        - [DAVID] Hey there,

        keeping basic punctuations like commas, periods, question marks, exclamation points, etc.
        """
        symbols = list('"#()*+/:;<=>@[\\]^_`{|}~「」『』')
        symbols += (
            "<< >> <<< >>> -- --- -( -[ (' (\" (( )) ((( ))) [[ ]] {{ }} ♪♪ ♪♪♪".
            split())

        # symbols that may be a single token or multiple tokens depending on the tokenizer.
        # In case they're multiple tokens, suppress the first token, which is safe because:
        # These are between U+2640 and U+267F miscellaneous symbols that are okay to suppress
        # in generations, and in the 3-byte UTF-8 representation they share the first two bytes.
        miscellaneous = set("♩♪♫♬♭♮♯")
        assert all(0x2640 <= ord(c) <= 0x267F for c in miscellaneous)

        # allow hyphens "-" and single quotes "'" between words, but not at the beginning of a word
        result = {self.encoding.encode(" -")[0], self.encoding.encode(" '")[0]}
        for symbol in symbols + list(miscellaneous):
            for tokens in [
                    self.encoding.encode(symbol),
                    self.encoding.encode(" " + symbol),
            ]:
                if len(tokens) == 1 or symbol in miscellaneous:
                    result.add(tokens[0])

        return tuple(sorted(result))

    def split_to_word_tokens(self, tokens: List[int]):
        if self.language in {"zh", "ja", "th", "lo", "my", "yue"}:
            # These languages don't typically use spaces, so it is difficult to split words
            # without morpheme analysis. Here, we instead split words at any
            # position where the tokens are decoded as valid unicode points
            return self.split_tokens_on_unicode(tokens)

        return self.split_tokens_on_spaces(tokens)

    def split_tokens_on_unicode(self, tokens: List[int]):
        decoded_full = self.decode_with_timestamps(tokens)
        replacement_char = "\ufffd"

        words = []
        word_tokens = []
        current_tokens = []
        unicode_offset = 0

        for token in tokens:
            current_tokens.append(token)
            decoded = self.decode_with_timestamps(current_tokens)

            if (replacement_char not in decoded or
                    decoded_full[unicode_offset + decoded.index(
                        replacement_char)] == replacement_char):
                words.append(decoded)
                word_tokens.append(current_tokens)
                current_tokens = []
                unicode_offset += len(decoded)

        return words, word_tokens

    def split_tokens_on_spaces(self, tokens: List[int]):
        subwords, subword_tokens_list = self.split_tokens_on_unicode(tokens)
        words = []
        word_tokens = []

        for subword, subword_tokens in zip(subwords, subword_tokens_list):
            special = subword_tokens[0] >= self.eot
            with_space = subword.startswith(" ")
            punctuation = subword.strip() in string.punctuation
            if special or with_space or punctuation or len(words) == 0:
                words.append(subword)
                word_tokens.append(subword_tokens)
            else:
                words[-1] = words[-1] + subword
                word_tokens[-1].extend(subword_tokens)

        return words, word_tokens


@lru_cache(maxsize=None)
def get_encoding(resource_path: str, name: str="gpt2", num_languages: int=99):
    vocab_path = os.path.join(resource_path, "assets", f"{name}.tiktoken")
    ranks = {
        base64.b64decode(token): int(rank)
        for token, rank in (line.split() for line in open(vocab_path) if line)
    }

    n_vocab = len(ranks)
    special_tokens = {}
    specials = [
        "<|endoftext|>",
        "<|startoftranscript|>",
        * [f"<|{lang}|>" for lang in list(LANGUAGES.keys())[:num_languages]],
        "<|translate|>",
        "<|transcribe|>",
        "<|startoflm|>",
        "<|startofprev|>",
        "<|nospeech|>",
        "<|notimestamps|>",
        * [f"<|{i * 0.02:.2f}|>" for i in range(1501)],
    ]
    for token in specials:
        special_tokens[token] = n_vocab
        n_vocab += 1
    return tiktoken.Encoding(
        name=os.path.basename(vocab_path),
        explicit_n_vocab=n_vocab,
        pat_str=r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
        mergeable_ranks=ranks,
        special_tokens=special_tokens, )


@lru_cache(maxsize=None)
def get_tokenizer(
        multilingual: bool,
        resource_path: str,
        *,
        num_languages: int=99,
        language: Optional[str]=None,
        task: Optional[str]=None,  # Literal["transcribe", "translate", None]
) -> Tokenizer:

    if language is not None:
        language = language.lower()
        if language not in LANGUAGES:
            if language in TO_LANGUAGE_CODE:
                language = TO_LANGUAGE_CODE[language]
            else:
                raise ValueError(f"Unsupported language: {language}")

    if multilingual:
        encoding_name = "multilingual"
        language = language or "en"
        task = task or "transcribe"
    else:
        encoding_name = "gpt2"
        language = None
        task = None

    encoding = get_encoding(
        resource_path=resource_path,
        name=encoding_name,
        num_languages=num_languages)

    return Tokenizer(
        encoding=encoding,
        num_languages=num_languages,
        language=language,
        task=task)


================================================
FILE: paddlespeech/s2t/models/whisper/utils.py
================================================
# MIT License, Copyright (c) 2022 OpenAI.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# 
# Modified from OpenAI Whisper 2022 (https://github.com/openai/whisper/whisper/utils.py)
import zlib
from typing import Iterator
from typing import TextIO


def exact_div(x, y):
    assert x % y == 0
    return x // y


def str2bool(string):
    str2val = {"True": True, "False": False}
    if string in str2val:
        return str2val[string]
    else:
        raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")


def optional_int(string):
    return None if string == "None" else int(string)


def optional_float(string):
    return None if string == "None" else float(string)


def compression_ratio(text) -> float:
    return len(text) / len(zlib.compress(text.encode("utf-8")))


def format_timestamp(seconds: float,
                     always_include_hours: bool=False,
                     decimal_marker: str='.'):
    assert seconds >= 0, "non-negative timestamp expected"
    milliseconds = round(seconds * 1000.0)

    hours = milliseconds // 3_600_000
    milliseconds -= hours * 3_600_000

    minutes = milliseconds // 60_000
    milliseconds -= minutes * 60_000

    seconds = milliseconds // 1_000
    milliseconds -= seconds * 1_000

    hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
    return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}"


def write_txt(transcript: Iterator[dict], file: TextIO):
    for segment in transcript:
        print(segment['text'].strip(), file=file, flush=True)


def write_vtt(transcript: Iterator[dict], file: TextIO):
    print("WEBVTT\n", file=file)
    for segment in transcript:
        print(
            f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
            f"{segment['text'].strip().replace('-->', '->')}\n",
            file=file,
            flush=True, )


def write_srt(transcript: Iterator[dict], file: TextIO):
    """
    Write a transcript to a file in SRT format.

    Example usage:
        from pathlib import Path
        from whisper.utils import write_srt

        result = transcribe(model, audio_path, temperature=temperature, **args)

        # save SRT
        audio_basename = Path(audio_path).stem
        with open(Path(output_dir) / (audio_basename + ".srt"), "w", encoding="utf-8") as srt:
            write_srt(result["segments"], file=srt)
    """
    for i, segment in enumerate(transcript, start=1):
        # write srt lines
        print(
            f"{i}\n"
            f"{format_timestamp(segment['start'], always_include_hours=True, decimal_marker=',')} --> "
            f"{format_timestamp(segment['end'], always_include_hours=True, decimal_marker=',')}\n"
            f"{segment['text'].strip().replace('-->', '->')}\n",
            file=file,
            flush=True, )


================================================
FILE: paddlespeech/s2t/models/whisper/whisper.py
================================================
# MIT License, Copyright (c) 2022 OpenAI.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Modified from OpenAI Whisper 2022 (https://github.com/openai/whisper/whisper)
import os
from dataclasses import dataclass
from dataclasses import field
from functools import lru_cache
from typing import Dict
from typing import Iterable
from typing import List
from typing import Optional
from typing import Sequence
from typing import Tuple
from typing import Union

import numpy as np
import paddle
import paddle.nn.functional as F
import soundfile
import tqdm
from paddle import nn
from paddle.distribution import Categorical

import paddlespeech.s2t.modules.align as paddlespeech_nn
from paddlespeech.s2t.models.whisper import utils
from paddlespeech.s2t.models.whisper.tokenizer import get_tokenizer
from paddlespeech.s2t.models.whisper.tokenizer import LANGUAGES
from paddlespeech.s2t.models.whisper.tokenizer import Tokenizer
from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()

_MODELS = ["large"]
SAMPLE_RATE = 16000
N_FFT = 400
HOP_LENGTH = 160
CHUNK_LENGTH = 30
N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE  # 480000: number of samples in a chunk
N_FRAMES = utils.exact_div(
    N_SAMPLES, HOP_LENGTH)  # 3000: number of frames in a mel spectrogram input

N_SAMPLES_PER_TOKEN = HOP_LENGTH * 2  # the initial convolutions has stride 2
FRAMES_PER_SECOND = utils.exact_div(SAMPLE_RATE,
                                    HOP_LENGTH)  # 10ms per audio frame
TOKENS_PER_SECOND = utils.exact_div(SAMPLE_RATE,
                                    N_SAMPLES_PER_TOKEN)  # 20ms per audio token


@dataclass
class ModelDimensions:
    n_mels: int
    n_audio_ctx: int
    n_audio_state: int
    n_audio_head: int
    n_audio_layer: int
    n_vocab: int
    n_text_ctx: int
    n_text_state: int
    n_text_head: int
    n_text_layer: int


class LayerNorm(paddlespeech_nn.LayerNorm):
    def forward(self, x: paddle.Tensor) -> paddle.Tensor:
        return super().forward(x)


class Linear(paddlespeech_nn.Linear):
    def forward(self, x: paddle.Tensor) -> paddle.Tensor:
        return F.linear(x, self.weight, None
                        if self.bias is None else self.bias)


class Conv1d(paddlespeech_nn.Conv1D):
    def forward(self, x: paddle.Tensor) -> paddle.Tensor:
        return super().forward(x)


class MultiHeadAttention(nn.Layer):
    def __init__(self, n_state: int, n_head: int):
        super().__init__()
        self.n_head = n_head
        self.query = Linear(n_state, n_state, bias_attr=True)
        self.key = Linear(n_state, n_state, bias_attr=False)
        self.value = Linear(n_state, n_state, bias_attr=True)
        self.out = Linear(n_state, n_state, bias_attr=True)

    def forward(
            self,
            x: paddle.Tensor,
            xa: Optional[paddle.Tensor]=None,
            mask: Optional[paddle.Tensor]=None,
            kv_cache: Optional[dict]=None, ):
        q = self.query(x)

        if kv_cache is None or xa is None or self.key not in kv_cache:
            # hooks, if installed (i.e. kv_cache is not None), will prepend the cached kv tensors;
            # otherwise, perform key/value projections for self- or cross-attention as usual.
            k = self.key(x if xa is None else xa)
            v = self.value(x if xa is None else xa)
        else:
            # for cross-attention, calculate keys and values once and reuse in subsequent calls.
            k = kv_cache[self.key]
            v = kv_cache[self.value]

        wv = self.qkv_attention(q, k, v, mask)
        return self.out(wv)

    def qkv_attention(self,
                      q: paddle.Tensor,
                      k: paddle.Tensor,
                      v: paddle.Tensor,
                      mask: Optional[paddle.Tensor]=None):
        n_batch, n_ctx, n_state = q.shape
        scale = (n_state // self.n_head)**-0.25
        q = paddle.transpose(
            q.reshape([*q.shape[:2], self.n_head, -1]), (0, 2, 1, 3)) * scale
        k = paddle.transpose(
            k.reshape([*k.shape[:2], self.n_head, -1]), (0, 2, 3, 1)) * scale
        v = paddle.transpose(
            v.reshape([*v.shape[:2], self.n_head, -1]), (0, 2, 1, 3))

        qk = q @ k
        if mask is not None:
            qk = qk + mask[:n_ctx, :n_ctx]

        w = F.softmax(qk.float(), axis=-1).to(q.dtype)
        return paddle.transpose((w @ v), (0, 2, 1, 3)).flatten(start_axis=2)


class ResidualAttentionBlock(nn.Layer):
    def __init__(self, n_state: int, n_head: int, cross_attention: bool=False):
        super().__init__()

        self.attn = MultiHeadAttention(n_state, n_head)
        self.attn_ln = LayerNorm(n_state)

        self.cross_attn = MultiHeadAttention(
            n_state, n_head) if cross_attention else None
        self.cross_attn_ln = LayerNorm(n_state) if cross_attention else None

        n_mlp = n_state * 4
        self.mlp = nn.Sequential(
            Linear(n_state, n_mlp, bias_attr=True),
            nn.GELU(), Linear(n_mlp, n_state, bias_attr=True))
        self.mlp_ln = LayerNorm(n_state)

    def forward(
            self,
            x: paddle.Tensor,
            xa: Optional[paddle.Tensor]=None,
            mask: Optional[paddle.Tensor]=None,
            kv_cache: Optional[dict]=None, ):
        x = x + self.attn(self.attn_ln(x), mask=mask, kv_cache=kv_cache)
        if self.cross_attn:
            x = x + self.cross_attn(
                self.cross_attn_ln(x), xa, kv_cache=kv_cache)
        x = x + self.mlp(self.mlp_ln(x))
        return x


def sinusoids(length, channels, max_timescale=10000):
    """Returns sinusoids for positional embedding"""
    assert channels % 2 == 0
    log_timescale_increment = np.log(max_timescale) / (channels // 2 - 1)
    inv_timescales = paddle.exp(-log_timescale_increment * paddle.arange(
        channels // 2, dtype=paddle.float32))
    scaled_time = paddle.arange(
        length,
        dtype=paddle.float32)[:, np.newaxis] * inv_timescales[np.newaxis, :]
    return paddle.to_tensor(
        paddle.concat(
            [paddle.sin(scaled_time), paddle.cos(scaled_time)], axis=1))


class AudioEncoder(nn.Layer):
    def __init__(self,
                 n_mels: int,
                 n_ctx: int,
                 n_state: int,
                 n_head: int,
                 n_layer: int):
        super().__init__()
        self.conv1 = Conv1d(
            n_mels, n_state, kernel_size=3, stride=1, padding=1, bias_attr=True)
        self.conv2 = Conv1d(
            n_state,
            n_state,
            kernel_size=3,
            stride=2,
            padding=1,
            bias_attr=True)
        self.register_buffer("positional_embedding", sinusoids(n_ctx, n_state))

        self.blocks: Iterable[ResidualAttentionBlock] = nn.LayerList(
            [ResidualAttentionBlock(n_state, n_head) for _ in range(n_layer)])
        self.ln_post = LayerNorm(n_state)

    def forward(self, x: paddle.Tensor):
        """
        x : paddle.Tensor, shape = (batch_size, n_mels, n_ctx)
            the mel spectrogram of the audio
        """
        x = F.gelu(self.conv1(x))
        x = F.gelu(self.conv2(x))
        x = paddle.transpose(x, (0, 2, 1))

        assert x.shape[
            1:] == self.positional_embedding.shape, "incorrect audio shape"
        x = (x + self.positional_embedding)

        for block in self.blocks:
            x = block(x)

        x = self.ln_post(x)
        return x


class TextDecoder(nn.Layer):
    def __init__(self,
                 n_vocab: int,
                 n_ctx: int,
                 n_state: int,
                 n_head: int,
                 n_layer: int):
        super().__init__()

        self.token_embedding = nn.Embedding(n_vocab, n_state)
        self.positional_embedding = paddle.create_parameter(
            shape=[n_ctx, n_state], dtype='float32')

        self.blocks: Iterable[ResidualAttentionBlock] = nn.LayerList([
            ResidualAttentionBlock(n_state, n_head, cross_attention=True)
            for _ in range(n_layer)
        ])
        self.ln = LayerNorm(n_state)

        mask = paddle.full(
            shape=[n_ctx, n_state], fill_value=-np.inf, dtype='float32')
        mask = paddle.triu(mask, diagonal=1)
        self.register_buffer("mask", mask, persistable=False)

    def forward(self,
                x: paddle.Tensor,
                xa: paddle.Tensor,
                kv_cache: Optional[dict]=None):
        """
        x : paddle.LongTensor, shape = (batch_size, <= n_ctx)
            the text tokens
        xa : paddle.Tensor, shape = (batch_size, n_mels, n_audio_ctx)
            the encoded audio features to be attended on
        """
        offset = next(iter(kv_cache.values())).shape[1] if kv_cache else 0
        x = self.token_embedding(x) + self.positional_embedding[offset:offset +
                                                                x.shape[-1]]
        x = x.to(xa.dtype)

        for block in self.blocks:
            x = block(x, xa, mask=self.mask, kv_cache=kv_cache)

        x = self.ln(x)
        logits = (x @ paddle.transpose(self.token_embedding.weight, (1, 0)))

        return logits


@dataclass(frozen=True)
class DecodingOptions:
    task: str = "transcribe"  # whether to perform X->X "transcribe" or X->English "translate"
    language: Optional[
        str] = None  # language that the audio is in; uses detected language if None
    # sampling-related options
    temperature: float = 0.0
    sample_len: Optional[int] = None  # maximum number of tokens to sample
    best_of: Optional[
        int] = None  # number of independent samples to collect, when t > 0
    beam_size: Optional[
        int] = None  # number of beams in beam search, when t == 0
    patience: Optional[
        float] = None  # patience in beam search (https://arxiv.org/abs/2204.05424)

    # options for ranking generations (either beams or best-of-N samples)
    length_penalty: Optional[
        float] = None  # "alpha" in Google NMT, None defaults to length norm

    # prompt, prefix, and token suppression
    prompt: Optional[Union[str, List[
        int]]] = None  # text or tokens for the previous context
    prefix: Optional[Union[str, List[
        int]]] = None  # text or tokens to prefix the current context
    suppress_blank: bool = True  # this will suppress blank outputs

    # list of tokens ids (or comma-separated token ids) to suppress
    # "-1" will suppress a set of symbols as defined in `tokenizer.non_speech_tokens()`
    suppress_tokens: Optional[Union[str, Iterable[int]]] = "-1"

    # timestamp sampling options
    without_timestamps: bool = False  # use <|notimestamps|> to sample text tokens only
    max_initial_timestamp: Optional[
        float] = 1.0  # the initial timestamp cannot be later than this

    # implementation details
    fp16: bool = False  # use fp16 for most of the calculation


@dataclass(frozen=True)
class DecodingResult:
    audio_features: paddle.Tensor
    language: str
    language_probs: Optional[Dict[str, float]] = None
    tokens: List[int] = field(default_factory=list)
    text: str = ""
    avg_logprob: float = np.nan
    no_speech_prob: float = np.nan
    temperature: float = np.nan
    compression_ratio: float = np.nan


class Inference:
    def logits(self, tokens: paddle.Tensor,
               audio_features: paddle.Tensor) -> paddle.Tensor:
        """Perform a forward pass on the decoder and return per-token logits"""
        raise NotImplementedError

    def rearrange_kv_cache(self, source_indices) -> None:
        """Update the key-value cache according to the updated beams"""
        raise NotImplementedError

    def cleanup_caching(self) -> None:
        """Clean up any resources or hooks after decoding is finished"""
        pass


class WhisperInference(Inference):
    def __init__(self, model: "Whisper", initial_token_length: int):
        self.model: "Whisper" = model
        self.initial_token_length = initial_token_length
        self.kv_cache = {}
        self.hooks = []

    def logits(self, tokens: paddle.Tensor,
               audio_features: paddle.Tensor) -> paddle.Tensor:
        if not self.kv_cache:
            self.kv_cache, self.hooks = self.model.install_kv_cache_hooks()

        if tokens.shape[-1] > self.initial_token_length:
            # only need to use the last token except in the first forward pass
            tokens = tokens[:, -1:]

        return self.model.decoder(
            tokens, audio_features, kv_cache=self.kv_cache)

    def cleanup_caching(self):
        for hook in self.hooks:
            hook.remove()

        self.kv_cache = {}
        self.hooks = []

    def rearrange_kv_cache(self, source_indices):
        for module, tensor in self.kv_cache.items():
            # update the key/value cache to contain the selected sequences
            self.kv_cache[module] = tensor[source_indices].detach()


@paddle.no_grad()
def detect_language(
        model: "Whisper",
        mel: paddle.Tensor,
        resource_path: str,
        tokenizer: Tokenizer=None) -> Tuple[paddle.Tensor, List[dict]]:
    """
    Detect the spoken language in the audio, and return them as list of strings, along with the ids
    of the most probable language tokens and the probability distribution over all language tokens.
    This is performed outside the main decode loop in order to not interfere with kv-caching.

    Returns
    -------
    language_tokens : Tensor, shape = (batch_size,)
        ids of the most probable language tokens, which appears after the startoftranscript token.
    language_probs : List[Dict[str, float]], length = batch_size
        list of dictionaries containing the probability distribution over all languages.
    """
    if tokenizer is None:
        tokenizer = get_tokenizer(
            multilingual=model.is_multilingual,
            resource_path=resource_path,
            num_languages=model.num_languages)
    if tokenizer.language is None or tokenizer.language_token not in tokenizer.sot_sequence:
        raise ValueError(
            "This model doesn't have language tokens so it can't perform lang id"
        )

    single = mel.ndim == 2
    if single:
        mel = mel.unsqueeze(0)

    # skip encoder forward pass if already-encoded audio features were given
    if mel.shape[-2:] != (model.dims.n_audio_ctx, model.dims.n_audio_state):
        mel = model.encoder(mel)

    # forward pass using a single token, startoftranscript
    batch_size = mel.shape[0]
    x = paddle.to_tensor([[tokenizer.sot]] * batch_size)  # [batch_size, 1]
    logits = model.logits(x, mel)[:, 0]

    # collect detected languages; suppress all non-language tokens
    mask = paddle.ones(paddle.to_tensor(logits.shape[-1]), dtype=bool)
    mask[list(tokenizer.all_language_tokens)] = False
    logits.contiguous()
    logits[:, mask] = -np.inf
    language_tokens = paddle.argmax(logits, axis=-1)
    language_token_probs = F.softmax(logits, axis=-1)
    language_probs = [{
        c: language_token_probs[i, j].tolist()
        for j, c in zip(tokenizer.all_language_tokens,
                        tokenizer.all_language_codes)
    } for i in range(batch_size)]

    if single:
        language_tokens = language_tokens[0]
        language_probs = language_probs[0]

    return language_tokens, language_probs


def transcribe(
        model: "Whisper",
        mel: paddle.Tensor,
        resource_path: str,
        *,
        verbose: Optional[bool]=None,
        temperature: Union[float, Tuple[float, ...]]=(0.0, 0.2, 0.4, 0.6, 0.8,
                                                      1.0),
        compression_ratio_threshold: Optional[float]=2.4,
        logprob_threshold: Optional[float]=-1.0,
        no_speech_threshold: Optional[float]=0.6,
        condition_on_previous_text: bool=True,
        initial_prompt: Optional[str]=None,
        carry_initial_prompt: bool=False,
        word_timestamps: bool=False,
        prepend_punctuations: str="\"'“¿([{-",
        append_punctuations: str="\"'.。,，!！?？:：”)]}、",
        clip_timestamps: Union[str, List[float]]="0",
        hallucination_silence_threshold: Optional[float]=None,
        **decode_options, ):
    """
    Transcribe an audio file using Whisper

    Parameters
    ----------
    model: Whisper
        The Whisper model instance

    mel: paddle.Tensor
        The audio feature

    verbose: bool
        Whether to display the text being decoded to the console. If True, displays all the details,
        If False, displays minimal details. If None, does not display anything

    temperature: Union[float, Tuple[float, ...]]
        Temperature for sampling. It can be a tuple of temperatures, which will be successfully used
        upon failures according to either `compression_ratio_threshold` or `logprob_threshold`.

    compression_ratio_threshold: float
        If the gzip compression ratio is above this value, treat as failed

    logprob_threshold: float
        If the average log probability over sampled tokens is below this value, treat as failed

    no_speech_threshold: float
        If the no_speech probability is higher than this value AND the average log probability
        over sampled tokens is below `logprob_threshold`, consider the segment as silent

    condition_on_previous_text: bool
        if True, the previous output of the model is provided as a prompt for the next window;
        disabling may make the text inconsistent across windows, but the model becomes less prone to
        getting stuck in a failure loop, such as repetition looping or timestamps going out of sync.

    decode_options: dict
        Keyword arguments to construct `DecodingOptions` instances

    Returns
    -------
    A dictionary containing the resulting text ("text") and segment-level details ("segments"), and
    the spoken language ("language"), which is detected when `decode_options["language"]` is None.
    """
    dtype = np.float32  #paddle only support float32

    if dtype == np.float32:
        decode_options["fp16"] = False

    content_frames = mel.shape[-1] - N_FRAMES
    content_duration = float(content_frames * HOP_LENGTH / SAMPLE_RATE)
    if decode_options.get("language", None) in {None, "None"}:
        if not model.is_multilingual:
            decode_options["language"] = "en"
        else:
            if verbose:
                print(
                    "Detecting language using up to the first 30 seconds. Use `--language` to specify the language"
                )
            mel_segment = pad_or_trim(mel,
                                      N_FRAMES).to(model.device).astype(dtype)
            _, probs = model.detect_language(mel_segment, resource_path)
            decode_options["language"] = max(probs, key=probs.get)
            if verbose is not None:
                print(
                    f"Detected language: {LANGUAGES[decode_options['language']].title()}"
                )

    language: str = decode_options["language"]
    task: str = decode_options.get("task", "transcribe")
    tokenizer = get_tokenizer(
        multilingual=model.is_multilingual,
        resource_path=resource_path,
        num_languages=model.num_languages,
        language=language,
        task=task, )

    if isinstance(clip_timestamps, str):
        clip_timestamps = [
            float(ts)
            for ts in (clip_timestamps.split(",") if clip_timestamps else [])
        ]
    seek_points: List[
        int] = [round(ts * FRAMES_PER_SECOND) for ts in clip_timestamps]
    if len(seek_points) == 0:
        seek_points.append(0)
    if len(seek_points) % 2 == 1:
        seek_points.append(content_frames)
    seek_clips: List[Tuple[int, int]] = list(
        zip(seek_points[::2], seek_points[1::2]))

    punctuation = "\"'“¿([{-\"'.。,，!！?？:：”)]}、"

    if word_timestamps and task == "translate":
        warnings.warn(
            "Word-level timestamps on translations may not be reliable.")

    def decode_with_fallback(segment: paddle.Tensor) -> DecodingResult:
        temperatures = ([temperature] if isinstance(temperature, (int, float))
                        else temperature)
        decode_result = None

        for t in temperatures:
            kwargs = {**decode_options}
            if t > 0:
                # disable beam_size and patience when t > 0
                kwargs.pop("beam_size", None)
                kwargs.pop("patience", None)
            else:
                # disable best_of when t == 0
                kwargs.pop("best_of", None)

            options = DecodingOptions(**kwargs, temperature=t)

            decode_result = model.decode(segment, options, resource_path)

            needs_fallback = False
            if (compression_ratio_threshold is not None and
                    decode_result.compression_ratio >
                    compression_ratio_threshold):
                needs_fallback = True  # too repetitive
            if (logprob_threshold is not None and
                    decode_result.avg_logprob < logprob_threshold):
                needs_fallback = True  # average log probability is too low
            if (no_speech_threshold is not None and
                    decode_result.no_speech_prob > no_speech_threshold and
                    logprob_threshold is not None and
                    decode_result.avg_logprob < logprob_threshold):
                needs_fallback = False  # silence
            if not needs_fallback:
                break

        return decode_result

    clip_idx = 0
    seek = seek_clips[clip_idx][0]
    input_stride = utils.exact_div(
        N_FRAMES, model.dims.n_audio_ctx)  # mel frames per output token: 2
    time_precision = (input_stride * HOP_LENGTH /
                      SAMPLE_RATE)  # time per output token: 0.02 (seconds)
    all_tokens = []
    all_segments = []
    prompt_reset_since = 0

    remaining_prompt_length = model.dims.n_text_ctx // 2 - 1
    if initial_prompt is not None:
        initial_prompt_tokens = tokenizer.encode(" " + initial_prompt.strip())
        all_tokens.extend(initial_prompt_tokens)
        remaining_prompt_length -= len(initial_prompt_tokens)
    else:
        initial_prompt_tokens = []

    def new_segment(*,
                    start: float,
                    end: float,
                    tokens: paddle.Tensor,
                    result: DecodingResult):
        tokens = tokens.tolist()
        text_tokens = [token for token in tokens if token < tokenizer.eot]
        return {
            "seek": seek,
            "start": start,
            "end": end,
            "text": tokenizer.decode(text_tokens),
            "tokens": tokens,
            "temperature": result.temperature,
            "avg_logprob": result.avg_logprob,
            "compression_ratio": result.compression_ratio,
            "no_speech_prob": result.no_speech_prob,
        }

    # show the progress bar when verbose is False (if True, transcribed text will be printed)
    with tqdm.tqdm(
            total=content_frames, unit="frames",
            disable=verbose is not False) as pbar:
        last_speech_timestamp = 0.0
        # NOTE: This loop is obscurely flattened to make the diff readable.
        # A later commit should turn this into a simpler nested loop.
        # for seek_clip_start, seek_clip_end in seek_clips:
        #     while seek < seek_clip_end
        while clip_idx < len(seek_clips):
            seek_clip_start, seek_clip_end = seek_clips[clip_idx]
            if seek < seek_clip_start:
                seek = seek_clip_start
            if seek >= seek_clip_end:
                clip_idx += 1
                if clip_idx < len(seek_clips):
                    seek = seek_clips[clip_idx][0]
                continue
            time_offset = float(seek * HOP_LENGTH / SAMPLE_RATE)
            window_end_time = float(
                (seek + N_FRAMES) * HOP_LENGTH / SAMPLE_RATE)
            segment_size = min(N_FRAMES, content_frames - seek,
                               seek_clip_end - seek)
            mel_segment = mel[:, seek:seek + segment_size]
            segment_duration = segment_size * HOP_LENGTH / SAMPLE_RATE
            mel_segment = pad_or_trim(mel_segment,
                                      N_FRAMES).to(model.device).astype(dtype)

            if carry_initial_prompt:
                nignored = max(len(initial_prompt_tokens), prompt_reset_since)
                remaining_prompt = all_tokens[nignored:][
                    -remaining_prompt_length:]
                decode_options[
                    "prompt"] = initial_prompt_tokens + remaining_prompt
            else:
                decode_options["prompt"] = all_tokens[prompt_reset_since:]
            result: DecodingResult = decode_with_fallback(mel_segment)
            tokens = paddle.to_tensor(result.tokens)

            if no_speech_threshold is not None:
                # no voice activity check
                should_skip = result.no_speech_prob > no_speech_threshold
                if (logprob_threshold is not None and
                        result.avg_logprob > logprob_threshold):
                    # don't skip if the logprob is high enough, despite the no_speech_prob
                    should_skip = False

                if should_skip:
                    seek += segment_size  # fast-forward to the next segment boundary
                    continue

            previous_seek = seek
            current_segments = []

            # anomalous words are very long/short/improbable
            def word_anomaly_score(word: dict) -> float:
                probability = word.get("probability", 0.0)
                duration = word["end"] - word["start"]
                score = 0.0
                if probability < 0.15:
                    score += 1.0
                if duration < 0.133:
                    score += (0.133 - duration) * 15
                if duration > 2.0:
                    score += duration - 2.0
                return score

            def is_segment_anomaly(segment: Optional[dict]) -> bool:
                if segment is None or not segment["words"]:
                    return False
                words = [
                    w for w in segment["words"] if w["word"] not in punctuation
                ]
                words = words[:8]
                score = sum(word_anomaly_score(w) for w in words)
                return score >= 3 or score + 0.01 >= len(words)

            def next_words_segment(segments: List[dict]) -> Optional[dict]:
                return next((s for s in segments if s["words"]), None)

            timestamp_tokens: paddle.Tensor = tokens.greater_equal(
                paddle.to_tensor(tokenizer.timestamp_begin))
            single_timestamp_ending = timestamp_tokens[
                -2:].tolist() == [False, True]

            consecutive = paddle.where(timestamp_tokens[:-1] & timestamp_tokens[
                1:])[0]
            if consecutive.numel() != 0:
                consecutive = paddle.add(consecutive, paddle.to_tensor(1))
            if len(consecutive) > 0:
                # if the output contains two consecutive timestamp tokens
                slices = consecutive.tolist()
                if single_timestamp_ending:
                    slices.append(len(tokens))

                last_slice = 0
                for current_slice in slices:
                    sliced_tokens = tokens[last_slice:current_slice]
                    start_timestamp_pos = (
                        sliced_tokens[0].item() - tokenizer.timestamp_begin)
                    end_timestamp_pos = (
                        sliced_tokens[-1].item() - tokenizer.timestamp_begin)
                    current_segments.append(
                        new_segment(
                            start=time_offset + start_timestamp_pos *
                            time_precision,
                            end=time_offset + end_timestamp_pos *
                            time_precision,
                            tokens=sliced_tokens,
                            result=result, ))
                    last_slice = current_slice

                if single_timestamp_ending:
                    # single timestamp at the end means no speech after the last timestamp.
                    seek += segment_size
                else:
                    # otherwise, ignore the unfinished segment and seek to the last timestamp
                    last_timestamp_pos = (tokens[last_slice - 1].item() -
                                          tokenizer.timestamp_begin)
                    seek += last_timestamp_pos * input_stride
            else:
                duration = segment_duration
                timestamps = tokens[timestamp_tokens.nonzero().flatten()]
                if (len(timestamps) > 0 and
                        timestamps[-1].item() != tokenizer.timestamp_begin):
                    # no consecutive timestamps but it has a timestamp; use the last one.
                    last_timestamp_pos = (
                        timestamps[-1].item() - tokenizer.timestamp_begin)
                    duration = last_timestamp_pos * time_precision

                current_segments.append(
                    new_segment(
                        start=time_offset,
                        end=time_offset + duration,
                        tokens=tokens,
                        result=result, ))
                seek += segment_size

            if verbose:
                for segment in current_segments:
                    start, end, text = segment["start"], segment[
                        "end"], segment["text"]
                    line = f"[{utils.format_timestamp(start)} --> {utils.format_timestamp(end)}] {text}"
                    print(line)

            # if a segment is instantaneous or does not contain text, clear it
            for i, segment in enumerate(current_segments):
                if segment["start"] == segment["end"] or segment[
                        "text"].strip() == "":
                    segment["text"] = ""
                    segment["tokens"] = []
                    segment["words"] = []

            all_segments.extend(
                [{
                    "id": i,
                    **
                    segment
                }
                 for i, segment in enumerate(
                     current_segments, start=len(all_segments))])
            all_tokens.extend([
                token
                for segment in current_segments for token in segment["tokens"]
            ])

            if not condition_on_previous_text or result.temperature > 0.5:
                # do not feed the prompt tokens if a high temperature was used
                prompt_reset_since = len(all_tokens)

            # update progress bar
            pbar.update(min(content_frames, seek) - previous_seek)

    return dict(
        text=tokenizer.decode(all_tokens[len(initial_prompt_tokens):]),
        segments=all_segments,
        language=language, )


class SequenceRanker:
    def rank(self,
             tokens: List[List[paddle.Tensor]],
             sum_logprobs: List[List[float]]) -> List[int]:
        """
        Given a list of groups of samples and their cumulative log probabilities,
        return the indices of the samples in each group to select as the final result
        """
        raise NotImplementedError


class MaximumLikelihoodRanker(SequenceRanker):
    """
    Select the sample with the highest log probabilities, penalized using either
    a simple length normalization or Google NMT paper's length penalty
    """

    def __init__(self, length_penalty: Optional[float]):
        self.length_penalty = length_penalty

    def rank(self,
             tokens: List[List[paddle.Tensor]],
             sum_logprobs: List[List[float]]):
        def scores(logprobs, lengths):
            result = []
            for logprob, length in zip(logprobs, lengths):
                if self.length_penalty is None:
                    penalty = length
                else:
                    # from the Google NMT paper
                    penalty = ((5 + length) / 6)**self.length_penalty
                result.append(logprob / penalty)
            return result

        # get the sequence with the highest score
        lengths = [[len(t) for t in s] for s in tokens]
        return [np.argmax(scores(p, l)) for p, l in zip(sum_logprobs, lengths)]


class TokenDecoder:
    def reset(self):
        """Initialize any stateful variables for decoding a new sequence"""

    def update(self,
               tokens: paddle.Tensor,
               logits: paddle.Tensor,
               sum_logprobs: paddle.Tensor) -> Tuple[paddle.Tensor, bool]:
        """Specify how to select the next token, based on the current trace and logits

        Parameters
        ----------
        tokens : Tensor, shape = (n_batch, current_sequence_length)
            all tokens in the context so far, including the prefix and sot_sequence tokens

        logits : Tensor, shape = (n_batch, vocab_size)
            per-token logits of the probability distribution at the current step

        sum_logprobs : Tensor, shape = (n_batch)
            cumulative log probabilities for each sequence

        Returns
        -------
        tokens : Tensor, shape = (n_batch, current_sequence_length + 1)
            the tokens, appended with the selected next token

        completed : bool
            True if all sequences has reached the end of text

        """
        raise NotImplementedError

    def finalize(
            self, tokens: paddle.Tensor, sum_logprobs: paddle.Tensor
    ) -> Tuple[Sequence[Sequence[paddle.Tensor]], List[List[float]]]:
        """Finalize search and return the final candidate sequences

        Parameters
        ----------
        tokens : Tensor, shape = (batch_size, beam_size, current_sequence_length)
            all tokens in the context so far, including the prefix and sot_sequence

        sum_logprobs : Tensor, shape = (batch_size, beam_size)
            cumulative log probabilities for each sequence

        Returns
        -------
        tokens : Sequence[Sequence[Tensor]], length = batch_size
            sequence of Tensors containing candidate token sequences, for each audio input

        sum_logprobs : List[List[float]], length = batch_size
            sequence of cumulative log probabilities corresponding to the above

        """
        raise NotImplementedError


class GreedyDecoder(TokenDecoder):
    def __init__(self, temperature: float, eot: int):
        self.temperature = temperature
        self.eot = eot

    def update(self,
               tokens: paddle.Tensor,
               logits: paddle.Tensor,
               sum_logprobs: paddle.Tensor) -> Tuple[paddle.Tensor, bool]:
        temperature = self.temperature
        if temperature == 0:
            next_tokens = paddle.argmax(logits, axis=-1)
        else:
            next_tokens = Categorical(logits=logits / temperature).sample([1])
            next_tokens = paddle.reshape(next_tokens, [
                next_tokens.shape[0] * next_tokens.shape[1],
            ])

        logprobs = F.log_softmax(logits, axis=-1, dtype="float32")
        current_logprobs = logprobs[paddle.arange(logprobs.shape[0]),
                                    next_tokens]
        sum_logprobs += current_logprobs * paddle.to_tensor(
            (tokens[:, -1] != self.eot), dtype="float32")

        next_tokens[tokens[:, -1] == self.eot] = self.eot
        tokens = paddle.concat([tokens, next_tokens[:, None]], axis=-1)

        completed = paddle.all((tokens[:, -1] == self.eot))
        return tokens, completed

    def finalize(self, tokens: paddle.Tensor, sum_logprobs: paddle.Tensor):
        # make sure each sequence has at least one EOT token at the end
        tokens = F.pad(tokens, (0, 1), value=self.eot, data_format="NCL")
        return tokens, sum_logprobs.tolist()


class BeamSearchDecoder(TokenDecoder):
    def __init__(self,
                 beam_size: int,
                 eot: int,
                 inference: Inference,
                 patience: Optional[float]=None):
        self.beam_size = beam_size
        self.eot = eot
        self.inference = inference
        self.patience = patience or 1.0
        self.max_candidates: int = round(beam_size * self.patience)
        self.finished_sequences = None

        assert self.max_candidates > 0, f"Invalid beam size ({beam_size}) or patience ({patience})"

    def reset(self):
        self.finished_sequences = None

    def update(self,
               tokens: paddle.Tensor,
               logits: paddle.Tensor,
               sum_logprobs: paddle.Tensor) -> Tuple[paddle.Tensor, bool]:
        if tokens.shape[0] % self.beam_size != 0:
            raise ValueError(f"{tokens.shape}[0] % {self.beam_size} != 0")

        batch_size = tokens.shape[0] // self.beam_size
        if self.finished_sequences is None:  # for the first update
            self.finished_sequences = [{} for _ in range(batch_size)]

        logprobs = F.log_softmax(logits, axis=-1, dtype='float32')
        next_tokens, source_indices, finished_sequences = [], [], []
        for i in range(batch_size):
            scores, sources, finished = {}, {}, {}

            # STEP 1: calculate the cumulative log probabilities for possible candidates
            for j in range(self.beam_size):
                idx = i * self.beam_size + j
                prefix = tokens[idx].tolist()
                logprob, token = paddle.topk(
                    logprobs[idx], k=self.beam_size + 1)
                for logprob, token in zip(logprob, token):
                    # after Paddle 3.0, tolist in 0-D tensor will return a float/int value instead of a list
                    new_logprob = (sum_logprobs[idx] + logprob).tolist()
                    new_logprob = new_logprob if isinstance(
                        new_logprob, float) else new_logprob[0]
                    new_token = token.tolist()
                    new_token = new_token if isinstance(new_token,
                                                        int) else new_token[0]
                    sequence = tuple(prefix + [new_token])
                    scores[sequence] = new_logprob
                    sources[sequence] = idx

            # STEP 2: rank the candidates and keep the top beam_size sequences for each audio
            saved = 0
            for sequence in sorted(scores, key=scores.get, reverse=True):
                if sequence[-1] == self.eot:
                    finished[sequence] = scores[sequence]
                else:
                    sum_logprobs[len(next_tokens)] = scores[sequence]
                    next_tokens.append(sequence)
                    source_indices.append(sources[sequence])

                    saved += 1
                    if saved == self.beam_size:
                        break

            finished_sequences.append(finished)

        tokens = paddle.to_tensor(next_tokens)
        self.inference.rearrange_kv_cache(source_indices)

        # add newly finished sequences to self.finished_sequences
        assert len(self.finished_sequences) == len(finished_sequences)
        for previously_finished, newly_finished in zip(self.finished_sequences,
                                                       finished_sequences):
            for seq in sorted(
                    newly_finished, key=newly_finished.get, reverse=True):
                if len(previously_finished) >= self.max_candidates:
                    break  # the candidate list is full
                previously_finished[seq] = newly_finished[seq]

        # mark as completed if all audio has enough number of samples
        completed = all(
            len(sequences) >= self.max_candidates
            for sequences in self.finished_sequences)
        return tokens, completed

    def finalize(self,
                 preceding_tokens: paddle.Tensor,
                 sum_logprobs: paddle.Tensor):
        # collect all finished sequences, including patience, and add unfinished ones if not enough
        sum_logprobs = sum_logprobs.cpu()
        for i, sequences in enumerate(self.finished_sequences):
            if len(sequences
                   ) < self.beam_size:  # when not enough sequences are finished
                for j in list(np.argsort(sum_logprobs[i]))[::-1]:
                    sequence = preceding_tokens[i, j].tolist() + [self.eot]
                    sequences[tuple(sequence)] = sum_logprobs[i][j].item()
                    if len(sequences) >= self.beam_size:
                        break

        tokens: List[List[paddle.Tensor]] = [
            [paddle.to_tensor(seq) for seq in sequences.keys()]
            for sequences in self.finished_sequences
        ]
        sum_logprobs: List[List[float]] = [
            list(sequences.values()) for sequences in self.finished_sequences
        ]
        return tokens, sum_logprobs


class LogitFilter:
    def apply(self, logits: paddle.Tensor, tokens: paddle.Tensor) -> None:
        """Apply any filtering or masking to logits in-place

        Parameters
        ----------
        logits : Tensor, shape = (n_batch, vocab_size)
            per-token logits of the probability distribution at the current step

        tokens : Tensor, shape = (n_batch, current_sequence_length)
            all tokens in the context so far, including the prefix and sot_sequence tokens

        """
        raise NotImplementedError


class SuppressBlank(LogitFilter):
    def __init__(self, tokenizer: Tokenizer, sample_begin: int):
        self.tokenizer = tokenizer
        self.sample_begin = sample_begin

    def apply(self, logits: paddle.Tensor, tokens: paddle.Tensor):
        if tokens.shape[1] == self.sample_begin:
            logits.contiguous()
            logits[:, self.tokenizer.encode(" ") + [self.tokenizer.eot
                                                    ]] = -np.inf


class SuppressTokens(LogitFilter):
    def __init__(self, suppress_tokens: Sequence[int]):
        self.suppress_tokens = list(suppress_tokens)

    def apply(self, logits: paddle.Tensor, tokens: paddle.Tensor):
        logits.contiguous()
        logits[:, self.suppress_tokens] = -np.inf


class ApplyTimestampRules(LogitFilter):
    def __init__(self,
                 tokenizer: Tokenizer,
                 sample_begin: int,
                 max_initial_timestamp_index: Optional[int]):
        self.tokenizer = tokenizer
        self.sample_begin = sample_begin
        self.max_initial_timestamp_index = max_initial_timestamp_index

    def apply(self, logits: paddle.Tensor, tokens: paddle.Tensor):
        # suppress <|notimestamps|> which is handled by without_timestamps
        if self.tokenizer.no_timestamps is not None:
            logits.contiguous()
            logits[:, self.tokenizer.no_timestamps] = -np.inf

        # timestamps have to appear in pairs, except directly before EOT; mask logits accordingly
        for k in range(tokens.shape[0]):
            seq = [t for t in tokens[k, self.sample_begin:].tolist()]
            last_was_timestamp = len(seq) >= 1 and seq[
                -1] >= self.tokenizer.timestamp_begin
            penultimate_was_timestamp = len(seq) < 2 or seq[
                -2] >= self.tokenizer.timestamp_begin

            if last_was_timestamp:
                if penultimate_was_timestamp:  # has to be non-timestamp
                    logits[k, self.tokenizer.timestamp_begin:] = -np.inf
                else:  # cannot be normal text tokens
                    logits[k, :self.tokenizer.eot] = -np.inf

        # apply the `max_initial_timestamp` option
        if tokens.shape[
                1] == self.sample_begin and self.max_initial_timestamp_index is not None:
            last_allowed = self.tokenizer.timestamp_begin + self.max_initial_timestamp_index
            logits.contiguous()
            logits[:, last_allowed + 1:] = -np.inf

        # if sum of probability over timestamps is above any other token, sample timestamp
        logprobs = F.log_softmax(logits, axis=-1, dtype='float32')
        for k in range(tokens.shape[0]):
            # When using paddle.logsumexp on a 32GB Tesla-V100 GPU, we encountered CUDA error 700. 
            # To bypass this issue in CI, we have decomposed the operation into separate steps. 
            # It will raise 2e-6 difference in precision.
            # TODO: revert this after logsumexp been fixed.
            timestamp_logprob = paddle.exp(
                logprobs[k, self.tokenizer.timestamp_begin:])
            timestamp_logprob = paddle.sum(timestamp_logprob, axis=-1)
            timestamp_logprob = paddle.log(timestamp_logprob)
            max_text_token_logprob = paddle.max(
                logprobs[k, :self.tokenizer.timestamp_begin])
            if timestamp_logprob > max_text_token_logprob:
                logits[k, :self.tokenizer.timestamp_begin] = -np.inf


class DecodingTask:
    inference: Inference
    sequence_ranker: SequenceRanker
    decoder: TokenDecoder
    logit_filters: List[LogitFilter]

    def __init__(self,
                 model: "Whisper",
                 options: DecodingOptions,
                 resource_path: str):
        self.model = model

        language = options.language or "en"
        tokenizer = get_tokenizer(
            multilingual=model.is_multilingual,
            resource_path=resource_path,
            language=language,
            task=options.task,
            num_languages=model.num_languages)
        self.tokenizer: Tokenizer = tokenizer
        self.options: DecodingOptions = self._verify_options(options)
        self.resource_path: str = resource_path

        self.n_group: int = options.beam_size or options.best_of or 1
        self.n_ctx: int = model.dims.n_text_ctx
        self.sample_len: int = options.sample_len or model.dims.n_text_ctx // 2

        self.sot_sequence: Tuple[int] = tokenizer.sot_sequence
        if self.options.without_timestamps:
            self.sot_sequence = tokenizer.sot_sequence_including_notimestamps

        self.initial_tokens: Tuple[int] = self._get_initial_tokens()
        self.sample_begin: int = len(self.initial_tokens)
        self.sot_index: int = self.initial_tokens.index(tokenizer.sot)

        # inference: implements the forward pass through the decoder, including kv caching
        self.inference = WhisperInference(model, len(self.initial_tokens))

        # sequence ranker: implements how to rank a group of sampled sequences
        self.sequence_ranker = MaximumLikelihoodRanker(options.length_penalty)

        # decoder: implements how to select the next tokens, given the autoregressive distribution
        if options.beam_size is not None:
            self.decoder = BeamSearchDecoder(options.beam_size, tokenizer.eot,
                                             self.inference, options.patience)
        else:
            self.decoder = GreedyDecoder(options.temperature, tokenizer.eot)

        # logit filters: applies various rules to suppress or penalize certain tokens
        self.logit_filters = []
        if self.options.suppress_blank:
            self.logit_filters.append(
                SuppressBlank(self.tokenizer, self.sample_begin))
        if self.options.suppress_tokens:
            self.logit_filters.append(
                SuppressTokens(self._get_suppress_tokens()))
        if not options.without_timestamps:
            precision = CHUNK_LENGTH / model.dims.n_audio_ctx  # usually 0.02 seconds
            max_initial_timestamp_index = None
            if options.max_initial_timestamp:
                max_initial_timestamp_index = round(
                    self.options.max_initial_timestamp / precision)
            self.logit_filters.append(
                ApplyTimestampRules(tokenizer, self.sample_begin,
                                    max_initial_timestamp_index))

    def _verify_options(self, options: DecodingOptions) -> DecodingOptions:
        if options.beam_size is not None and options.best_of is not None:
            raise ValueError("beam_size and best_of can't be given together")
        if options.temperature == 0:
            if options.best_of is not None:
                raise ValueError(
                    "best_of with greedy sampling (T=0) is not compatible")
        if options.patience is not None and options.beam_size is None:
            raise ValueError("patience requires beam_size to be given")
        if options.length_penalty is not None and not (
                0 <= options.length_penalty <= 1):
            raise ValueError(
                "length_penalty (alpha) should be a value between 0 and 1")

        return options

    def _get_initial_tokens(self) -> Tuple[int]:
        tokens = list(self.sot_sequence)
        prefix = self.options.prefix
        prompt = self.options.prompt

        if prefix:
            prefix_tokens = (
                self.tokenizer.encode(" " + prefix.strip().input_ids)
                if isinstance(prefix, str) else prefix)
            if self.sample_len is not None:
                max_prefix_len = self.n_ctx // 2 - self.sample_len
                prefix_tokens = prefix_tokens[-max_prefix_len:]
            tokens = tokens + prefix_tokens

        if prompt:
            prompt_tokens = (
                self.tokenizer.encode(" " + prompt.strip().input_ids)
                if isinstance(prompt, str) else prompt)
            tokens = [self.tokenizer.sot_prev] + prompt_tokens[-(self.n_ctx // 2
                                                                 - 1):] + tokens

        return tuple(tokens)

    def _get_suppress_tokens(self) -> Tuple[int]:
        suppress_tokens = self.options.suppress_tokens

        if isinstance(suppress_tokens, str):
            suppress_tokens = [int(t) for t in suppress_tokens.split(",")]

        if -1 in suppress_tokens:
            suppress_tokens = [t for t in suppress_tokens if t >= 0]
            suppress_tokens.extend(self.tokenizer.non_speech_tokens)
        elif suppress_tokens is None or len(suppress_tokens) == 0:
            suppress_tokens = []  # interpret empty string as an empty list
        else:
            assert isinstance(suppress_tokens,
                              list), "suppress_tokens must be a list"

        suppress_tokens.extend([
            self.tokenizer.sot, self.tokenizer.sot_prev, self.tokenizer.sot_lm
        ])
        if self.tokenizer.no_speech is not None:
            # no-speech probability is collected separately
            suppress_tokens.append(self.tokenizer.no_speech)

        return tuple(sorted(set(suppress_tokens)))

    def _get_audio_features(self, mel: paddle.Tensor):
        #if self.options.fp16:
        #    mel = mel.half()

        if mel.shape[-2:] == (self.model.dims.n_audio_ctx,
                              self.model.dims.n_audio_state):
            # encoded audio features are given; skip audio encoding
            audio_features = mel
        else:
            audio_features = self.model.encoder(mel)

        #if audio_features.dtype != (np.float16 if self.options.fp16 else np.float32):
        #    return TypeError(f"audio_features has an incorrect dtype: {audio_features.dtype}")

        return audio_features

    def _detect_language(self,
                         audio_features: paddle.Tensor,
                         tokens: paddle.Tensor,
                         resource_path: str):
        languages = [self.options.language] * audio_features.shape[0]
        lang_probs = None

        if self.options.language is None or self.options.task == "lang_id":
            lang_tokens, lang_probs = self.model.detect_language(
                audio_features, self.tokenizer, self.resource_path)
            languages = [max(probs, key=probs.get) for probs in lang_probs]
            if self.options.language is None:
                tokens[:, self.sot_index +
                       1] = lang_tokens  # write language tokens

        return languages, lang_probs

    def _main_loop(self, audio_features: paddle.Tensor, tokens: paddle.Tensor):
        assert audio_features.shape[0] == tokens.shape[0]
        n_batch = tokens.shape[0]
        sum_logprobs: paddle.Tensor = paddle.zeros(
            paddle.to_tensor(n_batch), dtype=paddle.float32)
        no_speech_probs = [np.nan] * n_batch
        try:
            for i in range(self.sample_len):
                logits = self.inference.logits(tokens, audio_features)
                logits.contiguous()

                if i == 0 and self.tokenizer.no_speech is not None:  # save no_speech_probs
                    probs_at_sot = F.softmax(
                        logits[:, self.sot_index],
                        axis=-1,
                        dtype=paddle.float32)
                    no_speech_probs = probs_at_sot[:, self.tokenizer.
                                                   no_speech].tolist()

                # now we need to consider the logits at the last token only
                logits = logits[:, -1]

                # apply the logit filters, e.g. for suppressing or applying penalty to
                for logit_filter in self.logit_filters:
                    logit_filter.apply(logits, tokens)

                # expand the tokens tensor with the selected next tokens
                tokens, completed = self.decoder.update(tokens, logits,
                                                        sum_logprobs)
                if completed or tokens.shape[-1] > self.n_ctx:
                    break
        finally:
            self.inference.cleanup_caching()

        return tokens, sum_logprobs, no_speech_probs

    @paddle.no_grad()
    def run(self, mel: paddle.Tensor) -> List[DecodingResult]:
        self.decoder.reset()
        tokenizer: Tokenizer = self.tokenizer
        batch_size: int = mel.shape[0]

        audio_features: paddle.Tensor = self._get_audio_features(
            mel)  # encoder forward pass

        tokens: Tensor = paddle.to_tensor([self.initial_tokens]).repeat(
            batch_size, 1)

        # detect language if requested, overwriting the language token
        languages, language_probs = self._detect_language(
            paddle.to_tensor(audio_features),
            paddle.to_tensor(tokens), self.resource_path)

        if self.options.task == "lang_id":
            return [
                DecodingResult(
                    audio_features=features,
                    language=language,
                    language_probs=probs)
                for features, language, probs in zip(audio_features, languages,
                                                     language_probs)
            ]

        # repeat text tensors by the group size, for beam search or best-of-n sampling
        tokens = tokens.repeat_interleave(self.n_group, axis=0)

        # call the main sampling loop
        tokens, sum_logprobs, no_speech_probs = self._main_loop(audio_features,
                                                                tokens)

        # reshape the tensors to have (batch_size, n_group) as the first two dimensions
        audio_features = audio_features[::self.n_group]
        no_speech_probs = no_speech_probs[::self.n_group]
        assert audio_features.shape[0] == len(no_speech_probs) == batch_size

        tokens = tokens.reshape([batch_size, self.n_group, -1])
        sum_logprobs = sum_logprobs.reshape([batch_size, self.n_group])

        # get the final candidates for each group, and slice between the first sampled token and EOT
        tokens, sum_logprobs = self.decoder.finalize(tokens, sum_logprobs)
        tokens: List[List[Tensor]] = [[
            t[self.sample_begin:(t == tokenizer.eot).nonzero()[0, 0]] for t in s
        ] for s in tokens]
        # select the top-ranked sample in each group
        selected = self.sequence_ranker.rank(tokens, sum_logprobs)
        tokens: List[List[
            int]] = [t[i].tolist() for i, t in zip(selected, tokens)]
        texts: List[str] = [tokenizer.decode(t).strip() for t in tokens]

        sum_logprobs: List[
            float] = [lp[i] for i, lp in zip(selected, sum_logprobs)]
        avg_logprobs: List[float] = [
            lp / (len(t) + 1) for t, lp in zip(tokens, sum_logprobs)
        ]

        fields = (texts, languages, tokens, audio_features, avg_logprobs,
                  no_speech_probs, )
        if len(set(map(len, fields))) != 1:
            raise RuntimeError(
                f"inconsistent result lengths: {list(map(len, fields))}")

        return [
            DecodingResult(
                audio_features=features,
                language=language,
                tokens=tokens,
                text=text,
                avg_logprob=avg_logprob,
                no_speech_prob=no_speech_prob,
                temperature=self.options.temperature,
                compression_ratio=utils.compression_ratio(text), )
            for text, language, tokens, features, avg_logprob, no_speech_prob in
            zip(*fields)
        ]


@paddle.no_grad()
def decode(
        model: "Whisper",
        mel: paddle.Tensor,
        options: DecodingOptions=DecodingOptions(),
        resource_path=str, ) -> Union[DecodingResult, List[DecodingResult]]:
    """
    Performs decoding of 30-second audio segment(s), provided as Mel spectrogram(s).

    Parameters
    ----------
    model: Whisper
        the Whisper model instance

    mel: paddle.Tensor, shape = (80, 3000) or (*, 80, 3000) or (128, 3000) or (*, 128, 3000)
        A tensor containing the Mel spectrogram(s)

    options: DecodingOptions
        A dataclass that contains all necessary options for decoding 30-second segments

    Returns
    -------
    result: Union[DecodingResult, List[DecodingResult]]
        The result(s) of decoding contained in `DecodingResult` dataclass instance(s)
    """
    single = mel.ndim == 2
    if single:
        mel = mel.unsqueeze(0)

    result = DecodingTask(model, options, resource_path).run(mel)

    if single:
        result = result[0]

    return result


class Whisper(nn.Layer):
    def __init__(self, dims: ModelDimensions):
        super().__init__()
        self.dims = dims
        self.encoder = AudioEncoder(
            self.dims.n_mels,
            self.dims.n_audio_ctx,
            self.dims.n_audio_state,
            self.dims.n_audio_head,
            self.dims.n_audio_layer, )
        self.decoder = TextDecoder(
            self.dims.n_vocab,
            self.dims.n_text_ctx,
            self.dims.n_text_state,
            self.dims.n_text_head,
            self.dims.n_text_layer, )

    def embed_audio(self, mel: paddle.Tensor):
        return self.encoder.forward(mel)

    def logits(self, tokens: paddle.Tensor, audio_features: paddle.Tensor):
        return self.decoder.forward(tokens, audio_features)

    def forward(self, mel: paddle.Tensor,
                tokens: paddle.Tensor) -> Dict[str, paddle.Tensor]:
        return self.decoder(tokens, self.encoder(mel))

    @property
    def device(self):
        return paddle.device.get_device()

    @property
    def is_multilingual(self):
        return self.dims.n_vocab >= 51865

    @property
    def num_languages(self):
        return self.dims.n_vocab - 51765 - int(self.is_multilingual)

    def install_kv_cache_hooks(self, cache: Optional[dict]=None):
        """
        The `MultiHeadAttention` module optionally accepts `kv_cache` which stores the key and value
        tensors calculated for the previous positions. This method returns a dictionary that stores
        all caches, and the necessary hooks for the key and value projection modules that save the
        intermediate tensors to be reused during later calculations.

        Returns
        -------
        cache : Dict[nn.Layer, paddle.Tensor]
            A dictionary object mapping the key/value projection modules to its cache
        hooks : List[RemovableHandle]
            List of Paddle RemovableHandle objects to stop the hooks to be called
        """
        cache = {**cache} if cache is not None else {}
        hooks = []

        def save_to_cache(module, _, output):
            if module not in cache or output.shape[
                    1] > self.decoder.positional_embedding.shape[0]:
                cache[
                    module] = output  # save as-is, for the first token or cross attention
            else:
                cache[module] = paddle.concat(
                    [cache[module], output], axis=1).detach()
            return cache[module]

        def install_hooks(layer: nn.Layer):
            if isinstance(layer, MultiHeadAttention):
                hooks.append(
                    layer.key.register_forward_post_hook(save_to_cache))
                hooks.append(
                    layer.value.register_forward_post_hook(save_to_cache))

        self.decoder.apply(install_hooks)
        return cache, hooks

    detect_language = detect_language
    transcribe = transcribe
    decode = decode


def pad_or_trim(array, length: int=N_SAMPLES, *, axis: int=-1):
    """
    Pad or trim the audio array to N_SAMPLES, as expected by the encoder.
    """
    if paddle.is_tensor(array):
        if array.shape[axis] > length:
            array = array.index_select(axis=axis, index=paddle.arange(length))

        if array.shape[axis] < length:
            pad_widths = [(0, 0)] * array.ndim
            pad_widths[axis] = (0, length - array.shape[axis])
            array = paddle.transpose(array, (1, 0))
            array = F.pad(
                array, [pad for sizes in pad_widths[::-1] for pad in sizes],
                data_format='NLC')
            array = paddle.transpose(array, (1, 0))
    else:
        if array.shape[axis] > length:
            array = array.take(indices=range(length), axis=axis)

        if array.shape[axis] < length:
            pad_widths = [(0, 0)] * array.ndim
            pad_widths[axis] = (0, length - array.shape[axis])
            array = paddle.transpose(array, (1, 0))
            array = np.pad(array, pad_widths)
            array = paddle.transpose(array, (1, 0))

    return array


def hann_window(n_fft: int=N_FFT):
    """
    hanning window
    n_fft:  The number of frequency components of the discrete Fourier transform.
    """
    return paddle.to_tensor(
        [0.5 - 0.5 * np.cos(2 * np.pi * n / n_fft) for n in range(n_fft)],
        dtype="float32")


@lru_cache(maxsize=None)
def mel_filters(resource_path: str, n_mels: int) -> paddle.Tensor:
    """
    load the mel filterbank matrix for projecting STFT into a Mel spectrogram.
    Allows decoupling librosa dependency; saved using:

        np.savez_compressed(
            "mel_filters.npz",
            mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80),
        )
    """
    assert n_mels in {80, 128}, f"Unsupported n_mels: {n_mels}"

    filters_path = os.path.join(resource_path, "assets", "mel_filters.npz")
    with np.load(filters_path, allow_pickle=False) as f:
        return paddle.to_tensor(f[f"mel_{n_mels}"])


def log_mel_spectrogram(audio: Union[str, np.ndarray, paddle.Tensor],
                        n_mels: int=80,
                        padding: int=0,
                        resource_path: str=None):
    """
    Compute the log-Mel spectrogram of

    Parameters
    ----------
    audio: Union[str, np.ndarray, paddle.Tensor], shape = (*)
        The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz

    n_mels: int
        The number of Mel-frequency filters, only 80 and 128 is supported

    Returns
    -------
    paddle.Tensor, shape = (n_mels, n_frames)
        A Tensor that contains the Mel spectrogram
    """
    if not paddle.is_tensor(audio):
        if isinstance(audio, str):
            audio, _ = soundfile.read(audio, dtype="float32", always_2d=True)
            audio = audio[:, 0]
            logger.info(f"audio shape: {audio.shape}")
        audio = paddle.to_tensor(audio)
    if padding > 0:
        audio = F.pad(audio, (0, padding), data_format="NLC")
    window = hann_window(N_FFT)
    stft = paddle.signal.stft(audio, N_FFT, HOP_LENGTH, window=window)

    magnitudes = stft[:, :-1].abs()**2

    filters = mel_filters(resource_path, n_mels)
    mel_spec = paddle.to_tensor(
        filters.numpy()
        @ magnitudes.numpy())  # Use numpy to reduce precision difference
    mel_spec = paddle.to_tensor(mel_spec.numpy().tolist())

    log_spec = paddle.clip(mel_spec, min=1e-10).log10()
    log_spec = paddle.maximum(log_spec, log_spec.max() - 8.0)
    log_spec = (log_spec + 4.0) / 4.0
    return log_spec


================================================
FILE: paddlespeech/s2t/models/whisper/whisper_LICENSE
================================================
MIT License

Copyright (c) 2022 OpenAI

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

================================================
FILE: paddlespeech/s2t/modules/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/modules/activation.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict

import paddle
from paddle import nn
from paddle.nn import functional as F

from paddlespeech.s2t.modules.align import Conv2D
from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ["get_activation", "brelu", "LinearGLUBlock", "ConvGLUBlock", "GLU"]


def brelu(x, t_min=0.0, t_max=24.0, name=None):
    # paddle.to_tensor is dygraph_only can not work under JIT
    t_min = paddle.full(shape=[1], fill_value=t_min, dtype='float32')
    t_max = paddle.full(shape=[1], fill_value=t_max, dtype='float32')
    return x.maximum(t_min).minimum(t_max)


class GLU(nn.Layer):
    """Gated Linear Units (GLU) Layer"""

    def __init__(self, dim: int=-1):
        super().__init__()
        self.dim = dim

    def forward(self, xs):
        return F.glu(xs, axis=self.dim)


class LinearGLUBlock(nn.Layer):
    """A linear Gated Linear Units (GLU) block."""

    def __init__(self, idim: int):
        """ GLU.
        Args:
            idim (int): input and output dimension
        """
        super().__init__()
        self.fc = Linear(idim, idim * 2)

    def forward(self, xs):
        return glu(self.fc(xs), dim=-1)


class ConvGLUBlock(nn.Layer):
    def __init__(self, kernel_size, in_ch, out_ch, bottlececk_dim=0,
                 dropout=0.):
        """A convolutional Gated Linear Units (GLU) block.

        Args:
            kernel_size (int): kernel size
            in_ch (int): number of input channels
            out_ch (int): number of output channels
            bottlececk_dim (int): dimension of the bottleneck layers for computational efficiency. Defaults to 0.
            dropout (float): dropout probability. Defaults to 0..
        """

        super().__init__()

        self.conv_residual = None
        if in_ch != out_ch:
            self.conv_residual = nn.utils.weight_norm(
                Conv2D(
                    in_channels=in_ch, out_channels=out_ch, kernel_size=(1, 1)),
                name='weight',
                dim=0)
            self.dropout_residual = nn.Dropout(p=dropout)

        self.pad_left = nn.Pad2d((0, 0, kernel_size - 1, 0), 0)

        layers = OrderedDict()
        if bottlececk_dim == 0:
            layers['conv'] = nn.utils.weight_norm(
                Conv2D(
                    in_channels=in_ch,
                    out_channels=out_ch * 2,
                    kernel_size=(kernel_size, 1)),
                name='weight',
                dim=0)
            # TODO(hirofumi0810): padding?
            layers['dropout'] = nn.Dropout(p=dropout)
            layers['glu'] = GLU()

        elif bottlececk_dim > 0:
            layers['conv_in'] = nn.utils.weight_norm(
                nn.Conv2D(
                    in_channels=in_ch,
                    out_channels=bottlececk_dim,
                    kernel_size=(1, 1)),
                name='weight',
                dim=0)
            layers['dropout_in'] = nn.Dropout(p=dropout)
            layers['conv_bottleneck'] = nn.utils.weight_norm(
                Conv2D(
                    in_channels=bottlececk_dim,
                    out_channels=bottlececk_dim,
                    kernel_size=(kernel_size, 1)),
                name='weight',
                dim=0)
            layers['dropout'] = nn.Dropout(p=dropout)
            layers['glu'] = GLU()
            layers['conv_out'] = nn.utils.weight_norm(
                Conv2D(
                    in_channels=bottlececk_dim,
                    out_channels=out_ch * 2,
                    kernel_size=(1, 1)),
                name='weight',
                dim=0)
            layers['dropout_out'] = nn.Dropout(p=dropout)

        self.layers = nn.Sequential(layers)

    def forward(self, xs):
        """Forward pass.
        Args:
            xs (FloatTensor): `[B, in_ch, T, feat_dim]`
        Returns:
            out (FloatTensor): `[B, out_ch, T, feat_dim]`
        """
        residual = xs
        if self.conv_residual is not None:
            residual = self.dropout_residual(self.conv_residual(residual))
        xs = self.pad_left(xs)  # `[B, embed_dim, T+kernel-1, 1]`
        xs = self.layers(xs)  # `[B, out_ch * 2, T ,1]`
        xs = xs + residual
        return xs


def get_activation(act):
    """Return activation function."""
    # Lazy load to avoid unused import
    activation_funcs = {
        "hardshrink": paddle.nn.Hardshrink,
        "hardswish": paddle.nn.Hardswish,
        "hardtanh": paddle.nn.Hardtanh,
        "tanh": paddle.nn.Tanh,
        "relu": paddle.nn.ReLU,
        "relu6": paddle.nn.ReLU6,
        "leakyrelu": paddle.nn.LeakyReLU,
        "selu": paddle.nn.SELU,
        "swish": paddle.nn.Swish,
        "gelu": paddle.nn.GELU,
        "glu": GLU,
        "elu": paddle.nn.ELU,
    }

    return activation_funcs[act]()


================================================
FILE: paddlespeech/s2t/modules/align.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math

import paddle
from paddle import nn
"""
    To align the initializer between paddle and torch, 
    the API below are set default initializer with priority higger than global initializer.
"""
global_init_type = None


class LayerNorm(nn.LayerNorm):
    def __init__(self,
                 normalized_shape,
                 epsilon=1e-05,
                 weight_attr=None,
                 bias_attr=None,
                 name=None):
        if weight_attr is None:
            weight_attr = paddle.ParamAttr(
                initializer=nn.initializer.Constant(1.0))
        if bias_attr is None:
            bias_attr = paddle.ParamAttr(
                initializer=nn.initializer.Constant(0.0))
        super(LayerNorm, self).__init__(
            normalized_shape=normalized_shape,
            epsilon=epsilon,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            name=name)


class BatchNorm1D(nn.BatchNorm1D):
    def __init__(self,
                 num_features,
                 momentum=0.9,
                 epsilon=1e-05,
                 weight_attr=None,
                 bias_attr=None,
                 data_format='NCL',
                 name=None):
        if weight_attr is None:
            weight_attr = paddle.ParamAttr(
                initializer=nn.initializer.Constant(1.0))
        if bias_attr is None:
            bias_attr = paddle.ParamAttr(
                initializer=nn.initializer.Constant(0.0))
        super(BatchNorm1D, self).__init__(
            num_features=num_features,
            momentum=momentum,
            epsilon=epsilon,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            data_format=data_format,
            name=name)


class Embedding(nn.Embedding):
    def __init__(self,
                 num_embeddings,
                 embedding_dim,
                 padding_idx=None,
                 sparse=False,
                 weight_attr=None,
                 name=None):
        if weight_attr is None:
            weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal())
        super(Embedding, self).__init__(
            num_embeddings=num_embeddings,
            embedding_dim=embedding_dim,
            padding_idx=padding_idx,
            sparse=sparse,
            weight_attr=weight_attr,
            name=name)


class Linear(nn.Linear):
    def __init__(self,
                 in_features,
                 out_features,
                 weight_attr=None,
                 bias_attr=None,
                 name=None):
        if weight_attr is None:
            if global_init_type == "kaiming_uniform":
                weight_attr = paddle.ParamAttr(
                    initializer=nn.initializer.KaimingUniform(
                        fan_in=None,
                        negative_slope=math.sqrt(5),
                        nonlinearity='leaky_relu'))
        if bias_attr is None:
            if global_init_type == "kaiming_uniform":
                bias_attr = paddle.ParamAttr(
                    initializer=nn.initializer.KaimingUniform(
                        fan_in=None,
                        negative_slope=math.sqrt(5),
                        nonlinearity='leaky_relu'))
        super(Linear, self).__init__(
            in_features=in_features,
            out_features=out_features,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            name=name)


class Conv1D(nn.Conv1D):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 padding_mode='zeros',
                 weight_attr=None,
                 bias_attr=None,
                 data_format='NCL'):
        if weight_attr is None:
            if global_init_type == "kaiming_uniform":
                weight_attr = paddle.ParamAttr(
                    initializer=nn.initializer.KaimingUniform(
                        fan_in=None,
                        negative_slope=math.sqrt(5),
                        nonlinearity='leaky_relu'))
        if bias_attr is None:
            if global_init_type == "kaiming_uniform":
                bias_attr = paddle.ParamAttr(
                    initializer=nn.initializer.KaimingUniform(
                        fan_in=None,
                        negative_slope=math.sqrt(5),
                        nonlinearity='leaky_relu'))
        super(Conv1D, self).__init__(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            padding_mode=padding_mode,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            data_format=data_format)


class Conv2D(nn.Conv2D):
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 groups=1,
                 padding_mode='zeros',
                 weight_attr=None,
                 bias_attr=None,
                 data_format='NCHW'):
        if weight_attr is None:
            if global_init_type == "kaiming_uniform":
                weight_attr = paddle.ParamAttr(
                    initializer=nn.initializer.KaimingUniform(
                        fan_in=None,
                        negative_slope=math.sqrt(5),
                        nonlinearity='leaky_relu'))
        if bias_attr is None:
            if global_init_type == "kaiming_uniform":
                bias_attr = paddle.ParamAttr(
                    initializer=nn.initializer.KaimingUniform(
                        fan_in=None,
                        negative_slope=math.sqrt(5),
                        nonlinearity='leaky_relu'))
        super(Conv2D, self).__init__(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            groups=groups,
            padding_mode=padding_mode,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            data_format=data_format)


================================================
FILE: paddlespeech/s2t/modules/attention.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""Multi-Head Attention layer definition."""
import math
from typing import List
from typing import Tuple

import paddle
from paddle import nn
from paddle.nn import initializer as I

from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = [
    "MultiHeadedAttention", "RelPositionMultiHeadedAttention",
    "RoPERelPositionMultiHeadedAttention"
]

# Relative Positional Encodings
# https://www.jianshu.com/p/c0608efcc26f
# https://zhuanlan.zhihu.com/p/344604604


class MultiHeadedAttention(nn.Layer):
    """Multi-Head Attention layer."""

    def __init__(self, n_head: int, n_feat: int, dropout_rate: float):
        """Construct an MultiHeadedAttention object.
        Args:
            n_head (int): The number of heads.
            n_feat (int): The number of features.
            dropout_rate (float): Dropout rate.
        """
        super().__init__()
        assert n_feat % n_head == 0
        self.n_feat = n_feat
        # We assume d_v always equals d_k
        self.d_k = n_feat // n_head
        self.h = n_head
        self.linear_q = Linear(n_feat, n_feat)
        self.linear_k = Linear(n_feat, n_feat)
        self.linear_v = Linear(n_feat, n_feat)
        self.linear_out = Linear(n_feat, n_feat)
        self.dropout = nn.Dropout(p=dropout_rate)

    def forward_qkv(self,
                    query: paddle.Tensor,
                    key: paddle.Tensor,
                    value: paddle.Tensor
                    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Transform query, key and value.
        Args:
            query (paddle.Tensor): Query tensor (#batch, time1, size).
            key (paddle.Tensor): Key tensor (#batch, time2, size).
            value (paddle.Tensor): Value tensor (#batch, time2, size).
        Returns:
            paddle.Tensor: Transformed query tensor, size
                (#batch, n_head, time1, d_k).
            paddle.Tensor: Transformed key tensor, size
                (#batch, n_head, time2, d_k).
            paddle.Tensor: Transformed value tensor, size
                (#batch, n_head, time2, d_k).
        """
        n_batch = query.shape[0]

        q = self.linear_q(query).reshape([n_batch, -1, self.h, self.d_k])
        k = self.linear_k(key).reshape([n_batch, -1, self.h, self.d_k])
        v = self.linear_v(value).reshape([n_batch, -1, self.h, self.d_k])

        q = q.transpose([0, 2, 1, 3])  # (batch, head, time1, d_k)
        k = k.transpose([0, 2, 1, 3])  # (batch, head, time2, d_k)
        v = v.transpose([0, 2, 1, 3])  # (batch, head, time2, d_k)

        return q, k, v

    def forward_attention(
            self,
            value: paddle.Tensor,
            scores: paddle.Tensor,
            mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool)
    ) -> paddle.Tensor:
        """Compute attention context vector.
        Args:
            value (paddle.Tensor): Transformed value, size
                (#batch, n_head, time2, d_k).
            scores (paddle.Tensor): Attention score, size
                (#batch, n_head, time1, time2).
            mask (paddle.Tensor): Mask, size (#batch, 1, time2) or
                (#batch, time1, time2), (0, 0, 0) means fake mask.
        Returns:
            paddle.Tensor: Transformed value (#batch, time1, d_model)
                weighted by the attention score (#batch, time1, time2).
        """
        n_batch = value.shape[0]

        # When `if mask.size(2) > 0` be True:
        # 1. training.
        # 2. oonx(16/4, chunk_size/history_size), feed real cache and real mask for the 1st chunk.
        # When will `if mask.size(2) > 0` be False?
        # 1. onnx(16/-1, -1/-1, 16/0)
        # 2. jit (16/-1, -1/-1, 16/0, 16/4)
        if mask.shape[2] > 0:  # time2 > 0
            mask = mask.unsqueeze(1).equal(0)  # (batch, 1, *, time2)
            # for last chunk, time2 might be larger than scores.size(-1)
            mask = mask[:, :, :, :scores.shape[-1]]
            scores = scores.masked_fill(mask, -float('inf'))
            attn = paddle.nn.functional.softmax(
                scores, axis=-1).masked_fill(mask,
                                             0.0)  # (batch, head, time1, time2)
        else:
            attn = paddle.nn.functional.softmax(
                scores, axis=-1)  # (batch, head, time1, time2)

        p_attn = self.dropout(attn)
        x = paddle.matmul(p_attn, value)  # (batch, head, time1, d_k)
        x = x.transpose([0, 2, 1, 3]).reshape(
            [n_batch, -1, self.h * self.d_k])  # (batch, time1, d_model)

        return self.linear_out(x)  # (batch, time1, d_model)

    def forward(self,
                query: paddle.Tensor,
                key: paddle.Tensor,
                value: paddle.Tensor,
                mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
                pos_emb: paddle.Tensor=paddle.empty([0]),
                cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
                ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Compute scaled dot product attention.
       Args:
            query (paddle.Tensor): Query tensor (#batch, time1, size).
            key (paddle.Tensor): Key tensor (#batch, time2, size).
            value (paddle.Tensor): Value tensor (#batch, time2, size).
            mask (paddle.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2).
                1.When applying cross attention between decoder and encoder,
                the batch padding mask for input is in (#batch, 1, T) shape.
                2.When applying self attention of encoder,
                the mask is in (#batch, T, T)  shape.
                3.When applying self attention of decoder,
                the mask is in (#batch, L, L)  shape.
                4.If the different position in decoder see different block
                of the encoder, such as Mocha, the passed in mask could be
                in (#batch, L, T) shape. But there is no such case in current
                Wenet.
            cache (paddle.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
                where `cache_t == chunk_size * num_decoding_left_chunks`
                and `head * d_k == size`
        Returns:
            paddle.Tensor: Output tensor (#batch, time1, d_model).
            paddle.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
                where `cache_t == chunk_size * num_decoding_left_chunks`
                and `head * d_k == size`

        """
        # (B,T,D) -> (B,T,H,D/H)
        q, k, v = self.forward_qkv(query, key, value)

        #   when export onnx model, for 1st chunk, we feed
        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
        #       and we will always do splitting and
        #       concatnation(this will simplify onnx export). Note that
        #       it's OK to concat & split zero-shaped tensors(see code below).
        #   when export jit  model, for 1st chunk, we always feed
        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
        # >>> a = torch.ones((1, 2, 0, 4))
        # >>> b = torch.ones((1, 2, 3, 4))
        # >>> c = torch.cat((a, b), dim=2)
        # >>> torch.equal(b, c)        # True
        # >>> d = torch.split(a, 2, dim=-1)
        # >>> torch.equal(d[0], d[1])  # True
        if cache.shape[0] > 0:
            # last dim `d_k * 2` for (key, val)
            key_cache, value_cache = paddle.split(cache, 2, axis=-1)
            k = paddle.concat([key_cache, k], axis=2)
            v = paddle.concat([value_cache, v], axis=2)
        # We do cache slicing in encoder.forward_chunk, since it's
        #   non-trivial to calculate `next_cache_start` here.
        new_cache = paddle.concat((k, v), axis=-1)

        # scores = paddle.matmul(q,
        #    k.transpose([0, 1, 3, 2])) / math.sqrt(self.d_k)
        scores = paddle.matmul(q, k, transpose_y=True) / math.sqrt(self.d_k)
        return self.forward_attention(v, scores, mask), new_cache


class RelPositionMultiHeadedAttention(MultiHeadedAttention):
    """Multi-Head Attention layer with relative position encoding."""

    def __init__(self,
                 n_head,
                 n_feat,
                 dropout_rate,
                 adaptive_scale=False,
                 init_weights=False):
        """Construct an RelPositionMultiHeadedAttention object.
        Paper: https://arxiv.org/abs/1901.02860
        Args:
            n_head (int): The number of heads.
            n_feat (int): The number of features.
            dropout_rate (float): Dropout rate.
        """
        super().__init__(n_head, n_feat, dropout_rate)
        # linear transformation for positional encoding
        self.linear_pos = Linear(n_feat, n_feat, bias_attr=False)
        # these two learnable bias are used in matrix c and matrix d
        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
        #self.pos_bias_u = nn.Parameter(torch.Tensor(self.h, self.d_k))
        #self.pos_bias_v = nn.Parameter(torch.Tensor(self.h, self.d_k))
        #torch.nn.init.xavier_uniform_(self.pos_bias_u)
        #torch.nn.init.xavier_uniform_(self.pos_bias_v)
        pos_bias_u = self.create_parameter(
            [self.h, self.d_k], default_initializer=I.XavierUniform())
        self.add_parameter('pos_bias_u', pos_bias_u)
        pos_bias_v = self.create_parameter(
            (self.h, self.d_k), default_initializer=I.XavierUniform())
        self.add_parameter('pos_bias_v', pos_bias_v)
        self.adaptive_scale = adaptive_scale
        if self.adaptive_scale:
            ada_scale = self.create_parameter(
                [1, 1, n_feat], default_initializer=I.Constant(1.0))
            self.add_parameter('ada_scale', ada_scale)
            ada_bias = self.create_parameter(
                [1, 1, n_feat], default_initializer=I.Constant(0.0))
            self.add_parameter('ada_bias', ada_bias)
        if init_weights:
            self.init_weights()

    def init_weights(self):
        input_max = (self.h * self.d_k)**-0.5
        self.linear_q._param_attr = paddle.nn.initializer.Uniform(
            low=-input_max, high=input_max)
        self.linear_q._bias_attr = paddle.nn.initializer.Uniform(
            low=-input_max, high=input_max)
        self.linear_k._param_attr = paddle.nn.initializer.Uniform(
            low=-input_max, high=input_max)
        self.linear_k._bias_attr = paddle.nn.initializer.Uniform(
            low=-input_max, high=input_max)
        self.linear_v._param_attr = paddle.nn.initializer.Uniform(
            low=-input_max, high=input_max)
        self.linear_v._bias_attr = paddle.nn.initializer.Uniform(
            low=-input_max, high=input_max)
        self.linear_pos._param_attr = paddle.nn.initializer.Uniform(
            low=-input_max, high=input_max)
        self.linear_pos._bias_attr = paddle.nn.initializer.Uniform(
            low=-input_max, high=input_max)
        self.linear_out._param_attr = paddle.nn.initializer.Uniform(
            low=-input_max, high=input_max)
        self.linear_out._bias_attr = paddle.nn.initializer.Uniform(
            low=-input_max, high=input_max)

    def rel_shift(self, x, zero_triu: bool=False):
        """Compute relative positinal encoding.
        Args:
            x (paddle.Tensor): Input tensor (batch, head, time1, time1).
            zero_triu (bool): If true, return the lower triangular part of
                the matrix.
        Returns:
            paddle.Tensor: Output tensor. (batch, head, time1, time1)
        """
        zero_pad = paddle.zeros(
            (x.shape[0], x.shape[1], x.shape[2], 1), dtype=x.dtype)
        x_padded = paddle.cat([zero_pad, x], dim=-1)

        x_padded = x_padded.reshape(
            [x.shape[0], x.shape[1], x.shape[3] + 1, x.shape[2]])
        x = x_padded[:, :, 1:].view_as(x)  # [B, H, T1, T1]

        if zero_triu:
            ones = paddle.ones((x.shape[2], x.shape[3]))
            x = x * paddle.tril(ones, x.shape[3] - x.shape[2])[None, None, :, :]

        return x

    def forward(self,
                query: paddle.Tensor,
                key: paddle.Tensor,
                value: paddle.Tensor,
                mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
                pos_emb: paddle.Tensor=paddle.empty([0]),
                cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
                ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
        Args:
            query (paddle.Tensor): Query tensor (#batch, time1, size).
            key (paddle.Tensor): Key tensor (#batch, time2, size).
            value (paddle.Tensor): Value tensor (#batch, time2, size).
            mask (paddle.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2), (0, 0, 0) means fake mask.
            pos_emb (paddle.Tensor): Positional embedding tensor
                (#batch, time2, size).
            cache (paddle.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
                where `cache_t == chunk_size * num_decoding_left_chunks`
                and `head * d_k == size`
        Returns:
            paddle.Tensor: Output tensor (#batch, time1, d_model).
            paddle.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
                where `cache_t == chunk_size * num_decoding_left_chunks`
                and `head * d_k == size`
        """
        if self.adaptive_scale:
            query = self.ada_scale * query + self.ada_bias
            key = self.ada_scale * key + self.ada_bias
            value = self.ada_scale * value + self.ada_bias

        q, k, v = self.forward_qkv(query, key, value)
        # q = q.transpose([0, 2, 1, 3])  # (batch, time1, head, d_k)

        #   when export onnx model, for 1st chunk, we feed
        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
        #       and we will always do splitting and
        #       concatnation(this will simplify onnx export). Note that
        #       it's OK to concat & split zero-shaped tensors(see code below).
        #   when export jit  model, for 1st chunk, we always feed
        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
        # >>> a = torch.ones((1, 2, 0, 4))
        # >>> b = torch.ones((1, 2, 3, 4))
        # >>> c = torch.cat((a, b), dim=2)
        # >>> torch.equal(b, c)        # True
        # >>> d = torch.split(a, 2, dim=-1)
        # >>> torch.equal(d[0], d[1])  # True
        if cache.shape[0] > 0:
            # last dim `d_k * 2` for (key, val)
            key_cache, value_cache = paddle.split(cache, 2, axis=-1)
            k = paddle.concat([key_cache, k], axis=2)
            v = paddle.concat([value_cache, v], axis=2)
        # We do cache slicing in encoder.forward_chunk, since it's
        #   non-trivial to calculate `next_cache_start` here.
        new_cache = paddle.concat((k, v), axis=-1)

        n_batch_pos = pos_emb.shape[0]
        p = self.linear_pos(pos_emb).reshape(
            [n_batch_pos, -1, self.h, self.d_k])
        p = p.transpose([0, 2, 1, 3])  # (batch, head, time1, d_k)

        # (batch, head, time1, d_k)
        # q_with_bias_u = (q + self.pos_bias_u).transpose([0, 2, 1, 3])
        q_with_bias_u = q + self.pos_bias_u.unsqueeze(1)
        # (batch, head, time1, d_k)
        # q_with_bias_v = (q + self.pos_bias_v).transpose([0, 2, 1, 3])
        q_with_bias_v = q + self.pos_bias_v.unsqueeze(1)

        # compute attention score
        # first compute matrix a and matrix c
        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
        # (batch, head, time1, time2)
        # matrix_ac = paddle.matmul(q_with_bias_u, k.transpose([0, 1, 3, 2]))
        matrix_ac = paddle.matmul(q_with_bias_u, k, transpose_y=True)

        # compute matrix b and matrix d
        # (batch, head, time1, time2)
        # matrix_bd = paddle.matmul(q_with_bias_v, p.transpose([0, 1, 3, 2]))
        matrix_bd = paddle.matmul(q_with_bias_v, p, transpose_y=True)
        # Remove rel_shift since it is useless in speech recognition,
        # and it requires special attention for streaming.
        # matrix_bd = self.rel_shift(matrix_bd)

        scores = (matrix_ac + matrix_bd) / math.sqrt(
            self.d_k)  # (batch, head, time1, time2)

        return self.forward_attention(v, scores, mask), new_cache


class RoPERelPositionMultiHeadedAttention(MultiHeadedAttention):
    """Multi-Head Attention layer with RoPE relative position encoding."""

    def __init__(self,
                 n_head,
                 n_feat,
                 dropout_rate,
                 adaptive_scale=False,
                 init_weights=False):
        """Construct an RelPositionMultiHeadedAttention object.
        Paper: https://arxiv.org/abs/1901.02860
        Args:
            n_head (int): The number of heads.
            n_feat (int): The number of features.
            dropout_rate (float): Dropout rate.
        """
        super().__init__(n_head, n_feat, dropout_rate)

    def align(self, tensor: paddle.Tensor, axes: List[int], ndim=None):
        """重新对齐tensor（批量版expand_dims）
        axes：原来的第i维对齐新tensor的第axes[i]维；
        ndim：新tensor的维度。
        """
        assert len(axes) == tensor.dim()
        assert ndim or min(axes) >= 0

        ndim = ndim or max(axes) + 1

        # a[0, None, 1] = a[0, np.newaxis, 1]
        indices = [None] * ndim
        for i in axes:
            # slice nothing, a[0, slice(None), 1] = a[0, :, 1]
            indices[i] = slice(None)

        return tensor[indices]

    def apply_rotary_position_embeddings(self, sinusoidal, *tensors):
        """应用RoPE到tensors中
        其中，sinusoidal.shape=[B, T, D]，tensors为tensor的列表，而
        tensor.shape=[B, T, ..., D], or (B,H,T,D/H)
        """
        assert len(tensors) > 0, 'at least one input tensor'
        assert all(
            [tensor.shape == tensors[0].shape
             for tensor in tensors[1:]]), 'all tensors must have the same shape'

        # (B,H,T,D)
        ndim = tensors[0].dim()
        _, H, T, D = tensors[0].shape

        # sinusoidal shape same with tensors[0]
        # [B,T,D] -> [B,T,H,D/H] -> (B,H,T,D/H)
        # sinusoidal = self.align(sinusoidal, [0, 1, -1], ndim)
        sinusoidal = sinusoidal.reshape((1, T, H, D)).transpose([0, 2, 1, 3])

        # http://man.hubwiz.com/docset/TensorFlow.docset/Contents/Resources/Documents/api_docs/python/tf/keras/backend/repeat_elements.html
        # like np.repeat, x (s1, s2, s3), axis 1, (s1, s2*rep, s3)
        # [b,T, ..., d/2] -> [b,T, ..., d]
        cos_pos = paddle.repeat_interleave(sinusoidal[..., 1::2], 2, axis=-1)
        sin_pos = paddle.repeat_interleave(sinusoidal[..., 0::2], 2, axis=-1)
        outputs = []
        for tensor in tensors:
            # x2 = [-x2, x1, -x4, x3, ..., -x_d, x_{d-1}]
            tensor2 = paddle.stack([-tensor[..., 1::2], tensor[..., ::2]], ndim)
            tensor2 = paddle.reshape(tensor2, paddle.shape(tensor))

            # 公式 34, out = x * cos_pos + x2 * sin_pos
            outputs.append(tensor * cos_pos + tensor2 * sin_pos)
        return outputs[0] if len(outputs) == 1 else outputs

    def forward(self,
                query: paddle.Tensor,
                key: paddle.Tensor,
                value: paddle.Tensor,
                mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
                pos_emb: paddle.Tensor=paddle.empty([0]),
                cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
                ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.
        Ref: https://github.com/facebookresearch/llama/blob/main/llama/model.py
        Args:
            query (paddle.Tensor): Query tensor (#batch, time1, size).
            key (paddle.Tensor): Key tensor (#batch, time2, size).
            value (paddle.Tensor): Value tensor (#batch, time2, size).
            mask (paddle.Tensor): Mask tensor (#batch, 1, time2) or
                (#batch, time1, time2), (0, 0, 0) means fake mask.
            pos_emb (paddle.Tensor): Positional embedding tensor
                (#batch, time2, size).
            cache (paddle.Tensor): Cache tensor (1, head, cache_t, d_k * 2),
                where `cache_t == chunk_size * num_decoding_left_chunks`
                and `head * d_k == size`
        Returns:
            paddle.Tensor: Output tensor (#batch, time1, d_model).
            paddle.Tensor: Cache tensor (1, head, cache_t + time1, d_k * 2)
                where `cache_t == chunk_size * num_decoding_left_chunks`
                and `head * d_k == size`
        """
        q, k, v = self.forward_qkv(query, key, value)
        # q = q.transpose([0, 2, 1, 3])  # (batch, time1, head, d_k)

        # f{q,k}(x_m, m) = R^d_{\theta, m} W_{q,k} x_m, m is position index
        # q_t always is chunk_size
        q_t = q.shape[2]
        q = self.apply_rotary_position_embeddings(pos_emb[:, -q_t:, :], q)
        # k will increase when in streaming decoding.
        k = self.apply_rotary_position_embeddings(pos_emb[:, -q_t:, :], k)

        #   when export onnx model, for 1st chunk, we feed
        #       cache(1, head, 0, d_k * 2) (16/-1, -1/-1, 16/0 mode)
        #       or cache(1, head, real_cache_t, d_k * 2) (16/4 mode).
        #       In all modes, `if cache.size(0) > 0` will alwayse be `True`
        #       and we will always do splitting and
        #       concatnation(this will simplify onnx export). Note that
        #       it's OK to concat & split zero-shaped tensors(see code below).
        #   when export jit  model, for 1st chunk, we always feed
        #       cache(0, 0, 0, 0) since jit supports dynamic if-branch.
        # >>> a = torch.ones((1, 2, 0, 4))
        # >>> b = torch.ones((1, 2, 3, 4))
        # >>> c = torch.cat((a, b), dim=2)
        # >>> torch.equal(b, c)        # True
        # >>> d = torch.split(a, 2, dim=-1)
        # >>> torch.equal(d[0], d[1])  # True
        if cache.shape[0] > 0:
            # last dim `d_k * 2` for (key, val)
            key_cache, value_cache = paddle.split(cache, 2, axis=-1)
            k = paddle.concat([key_cache, k], axis=2)
            v = paddle.concat([value_cache, v], axis=2)
        # We do cache slicing in encoder.forward_chunk, since it's
        #   non-trivial to calculate `next_cache_start` here.
        new_cache = paddle.concat((k, v), axis=-1)

        # dot(q, k)
        scores = paddle.matmul(q, k, transpose_y=True) / math.sqrt(self.d_k)
        return self.forward_attention(v, scores, mask), new_cache


================================================
FILE: paddlespeech/s2t/modules/cmvn.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
import paddle
from paddle import nn

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ['GlobalCMVN']


class GlobalCMVN(nn.Layer):
    def __init__(self,
                 mean: paddle.Tensor,
                 istd: paddle.Tensor,
                 norm_var: bool=True):
        """
        Args:
            mean (paddle.Tensor): mean stats
            istd (paddle.Tensor): inverse std, std which is 1.0 / std
        """
        super().__init__()
        assert mean.shape == istd.shape
        self.norm_var = norm_var
        # The buffer can be accessed from this module using self.mean
        self.register_buffer("mean", mean)
        self.register_buffer("istd", istd)

    def __repr__(self):
        return ("{name}(mean={mean}, istd={istd}, norm_var={norm_var})".format(
            name=self.__class__.__name__,
            mean=self.mean,
            istd=self.istd,
            norm_var=self.norm_var))

    def forward(self, x: paddle.Tensor):
        """
        Args:
            x (paddle.Tensor): (batch, max_len, feat_dim)
        Returns:
            (paddle.Tensor): normalized feature
        """
        x = x - self.mean
        if self.norm_var:
            x = x * self.istd
        return x


================================================
FILE: paddlespeech/s2t/modules/conformer_convolution.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""ConvolutionModule definition."""
from typing import Tuple

import paddle
from paddle import nn
from paddle.nn import initializer as I
from typeguard import typechecked

from paddlespeech.s2t.modules.align import BatchNorm1D
from paddlespeech.s2t.modules.align import Conv1D
from paddlespeech.s2t.modules.align import LayerNorm
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ['ConvolutionModule']


class ConvolutionModule(nn.Layer):
    """ConvolutionModule in Conformer model."""

    @typechecked
    def __init__(self,
                 channels: int,
                 kernel_size: int=15,
                 activation: nn.Layer=nn.ReLU(),
                 norm: str="batch_norm",
                 causal: bool=False,
                 bias: bool=True,
                 adaptive_scale: bool=False,
                 init_weights: bool=False):
        """Construct an ConvolutionModule object.
        Args:
            channels (int): The number of channels of conv layers.
            kernel_size (int): Kernel size of conv layers.
            activation (nn.Layer): Activation Layer.
            norm (str): Normalization type, 'batch_norm' or 'layer_norm'
            causal (bool): Whether use causal convolution or not
            bias (bool): Whether Conv with bias or not
        """
        super().__init__()
        self.bias = bias
        self.channels = channels
        self.kernel_size = kernel_size
        self.adaptive_scale = adaptive_scale
        if self.adaptive_scale:
            ada_scale = self.create_parameter(
                [1, 1, channels], default_initializer=I.Constant(1.0))
            self.add_parameter('ada_scale', ada_scale)
            ada_bias = self.create_parameter(
                [1, 1, channels], default_initializer=I.Constant(0.0))
            self.add_parameter('ada_bias', ada_bias)

        self.pointwise_conv1 = Conv1D(
            channels,
            2 * channels,
            kernel_size=1,
            stride=1,
            padding=0,
            bias_attr=None
            if bias else False,  # None for True, using bias as default config
        )

        # self.lorder is used to distinguish if it's a causal convolution,
        # if self.lorder > 0:
        #    it's a causal convolution, the input will be padded with
        #    `self.lorder` frames on the left in forward (causal conv impl).
        # else: it's a symmetrical convolution
        if causal:
            padding = 0
            self.lorder = kernel_size - 1
        else:
            # kernel_size should be an odd number for none causal convolution
            assert (kernel_size - 1) % 2 == 0
            padding = (kernel_size - 1) // 2
            self.lorder = 0

        self.depthwise_conv = Conv1D(
            channels,
            channels,
            kernel_size,
            stride=1,
            padding=padding,
            groups=channels,
            bias_attr=None
            if bias else False,  # None for True, using bias as default config
        )

        assert norm in ['batch_norm', 'layer_norm']
        if norm == "batch_norm":
            self.use_layer_norm = False
            self.norm = BatchNorm1D(channels)
        else:
            self.use_layer_norm = True
            self.norm = LayerNorm(channels)

        self.pointwise_conv2 = Conv1D(
            channels,
            channels,
            kernel_size=1,
            stride=1,
            padding=0,
            bias_attr=None
            if bias else False,  # None for True, using bias as default config
        )
        self.activation = activation

        if init_weights:
            self.init_weights()

    def init_weights(self):
        pw_max = self.channels**-0.5
        dw_max = self.kernel_size**-0.5
        self.pointwise_conv1._param_attr = paddle.nn.initializer.Uniform(
            low=-pw_max, high=pw_max)
        if self.bias:
            self.pointwise_conv1._bias_attr = paddle.nn.initializer.Uniform(
                low=-pw_max, high=pw_max)
        self.depthwise_conv._param_attr = paddle.nn.initializer.Uniform(
            low=-dw_max, high=dw_max)
        if self.bias:
            self.depthwise_conv._bias_attr = paddle.nn.initializer.Uniform(
                low=-dw_max, high=dw_max)
        self.pointwise_conv2._param_attr = paddle.nn.initializer.Uniform(
            low=-pw_max, high=pw_max)
        if self.bias:
            self.pointwise_conv2._bias_attr = paddle.nn.initializer.Uniform(
                low=-pw_max, high=pw_max)

    def forward(
            self,
            x: paddle.Tensor,
            mask_pad: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
            cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Compute convolution module.
        Args:
            x (paddle.Tensor): Input tensor (#batch, time, channels).
            mask_pad (paddle.Tensor): used for batch padding (#batch, 1, time),
                (0, 0, 0) means fake mask.
            cache (paddle.Tensor): left context cache, it is only
                used in causal convolution (#batch, channels, cache_t),
                (0, 0, 0) meas fake cache.
        Returns:
            paddle.Tensor: Output tensor (#batch, time, channels).
            paddle.Tensor: Output cache tensor (#batch, channels, time')
        """
        if self.adaptive_scale:
            x = self.ada_scale * x + self.ada_bias

        # exchange the temporal dimension and the feature dimension
        x = x.transpose([0, 2, 1])  # [B, C, T]

        # mask batch padding
        if mask_pad.shape[2] > 0:  # time > 0
            x = x.masked_fill(mask_pad, 0.0)

        if self.lorder > 0:
            if cache.shape[2] == 0:  # cache_t == 0
                x = nn.functional.pad(
                    x, [self.lorder, 0], 'constant', 0.0, data_format='NCL')
            else:
                assert cache.shape[0] == x.shape[0]  # B
                assert cache.shape[1] == x.shape[1]  # C
                x = paddle.concat((cache, x), axis=2)

            assert (x.shape[2] > self.lorder)
            new_cache = x[:, :, -self.lorder:]  #[B, C, T]
        else:
            # It's better we just return None if no cache is requried,
            # However, for JIT export, here we just fake one tensor instead of
            # None.
            new_cache = paddle.zeros([0, 0, 0], dtype=x.dtype)

        # GLU mechanism
        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
        x = nn.functional.glu(x, axis=1)  # (batch, channel, dim)

        # 1D Depthwise Conv
        x = self.depthwise_conv(x)
        if self.use_layer_norm:
            x = x.transpose([0, 2, 1])  # [B, T, C]
        x = self.activation(self.norm(x))
        if self.use_layer_norm:
            x = x.transpose([0, 2, 1])  # [B, C, T]
        x = self.pointwise_conv2(x)

        # mask batch padding
        if mask_pad.shape[2] > 0:  # time > 0
            x = x.masked_fill(mask_pad, 0.0)

        x = x.transpose([0, 2, 1])  # [B, T, C]
        return x, new_cache


================================================
FILE: paddlespeech/s2t/modules/conv2d.py
================================================
from typing import Optional
from typing import Union

import paddle
import paddle.nn.functional as F
from paddle.nn.layer.conv import _ConvNd

__all__ = ['Conv2DValid']


class Conv2DValid(_ConvNd):
    """
    Conv2d operator for VALID mode padding.
    """

    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 kernel_size: int,
                 stride: int=1,
                 padding: Union[str, int]=0,
                 dilation: int=1,
                 groups: int=1,
                 padding_mode: str='zeros',
                 weight_attr=None,
                 bias_attr=None,
                 data_format="NCHW",
                 valid_trigx: bool=False,
                 valid_trigy: bool=False) -> None:
        super(Conv2DValid, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            False,
            2,
            stride=stride,
            padding=padding,
            padding_mode=padding_mode,
            dilation=dilation,
            groups=groups,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            data_format=data_format)
        self.valid_trigx = valid_trigx
        self.valid_trigy = valid_trigy

    def _conv_forward(self,
                      input: paddle.Tensor,
                      weight: paddle.Tensor,
                      bias: Optional[paddle.Tensor]):
        validx, validy = 0, 0
        if self.valid_trigx:
            validx = (input.shape[-2] *
                      (self._stride[-2] - 1) - 1 + self._kernel_size[-2]) // 2
        if self.valid_trigy:
            validy = (input.shape[-1] *
                      (self._stride[-1] - 1) - 1 + self._kernel_size[-1]) // 2
        return F.conv2d(input, weight, bias, self._stride, (validx, validy),
                        self._dilation, self._groups)

    def forward(self, input: paddle.Tensor) -> paddle.Tensor:
        return self._conv_forward(input, self.weight, self.bias)


================================================
FILE: paddlespeech/s2t/modules/crf.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import nn

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ['CRF']


class CRF(nn.Layer):
    """
    Linear-chain Conditional Random Field (CRF).
    
    Args:
        nb_labels (int): number of labels in your tagset, including special symbols.
        bos_tag_id (int): integer representing the beginning of sentence symbol in
            your tagset.
        eos_tag_id (int): integer representing the end of sentence symbol in your tagset.
        pad_tag_id (int, optional): integer representing the pad symbol in your tagset.
            If None, the model will treat the PAD as a normal tag. Otherwise, the model
            will apply constraints for PAD transitions.
        batch_first (bool): Whether the first dimension represents the batch dimension.
    """

    def __init__(self,
                 nb_labels: int,
                 bos_tag_id: int,
                 eos_tag_id: int,
                 pad_tag_id: int=None,
                 batch_first: bool=True):
        super().__init__()

        self.nb_labels = nb_labels
        self.BOS_TAG_ID = bos_tag_id
        self.EOS_TAG_ID = eos_tag_id
        self.PAD_TAG_ID = pad_tag_id
        self.batch_first = batch_first

        # initialize transitions from a random uniform distribution between -0.1 and 0.1
        self.transitions = self.create_parameter(
            [self.nb_labels, self.nb_labels],
            default_initializer=nn.initializer.Uniform(-0.1, 0.1))
        self.init_weights()

    def init_weights(self):
        # enforce contraints (rows=from, columns=to) with a big negative number
        # so exp(-10000) will tend to zero

        # no transitions allowed to the beginning of sentence
        self.transitions[:, self.BOS_TAG_ID] = -10000.0
        # no transition alloed from the end of sentence
        self.transitions[self.EOS_TAG_ID, :] = -10000.0

        if self.PAD_TAG_ID is not None:
            # no transitions from padding
            self.transitions[self.PAD_TAG_ID, :] = -10000.0
            # no transitions to padding
            self.transitions[:, self.PAD_TAG_ID] = -10000.0
            # except if the end of sentence is reached
            # or we are already in a pad position
            self.transitions[self.PAD_TAG_ID, self.EOS_TAG_ID] = 0.0
            self.transitions[self.PAD_TAG_ID, self.PAD_TAG_ID] = 0.0

    def forward(self,
                emissions: paddle.Tensor,
                tags: paddle.Tensor,
                mask: paddle.Tensor=None) -> paddle.Tensor:
        """Compute the negative log-likelihood. See `log_likelihood` method."""
        nll = -self.log_likelihood(emissions, tags, mask=mask)
        return nll

    def log_likelihood(self, emissions, tags, mask=None):
        """Compute the probability of a sequence of tags given a sequence of
        emissions scores.

        Args:
            emissions (paddle.Tensor): Sequence of emissions for each label.
                Shape of (batch_size, seq_len, nb_labels) if batch_first is True,
                (seq_len, batch_size, nb_labels) otherwise.
            tags (paddle.LongTensor): Sequence of labels.
                Shape of (batch_size, seq_len) if batch_first is True,
                (seq_len, batch_size) otherwise.
            mask (paddle.FloatTensor, optional): Tensor representing valid positions.
                If None, all positions are considered valid.
                Shape of (batch_size, seq_len) if batch_first is True,
                (seq_len, batch_size) otherwise.

        Returns:
            paddle.Tensor: sum of the log-likelihoods for each sequence in the batch.
                Shape of ()
        """
        # fix tensors order by setting batch as the first dimension
        if not self.batch_first:
            emissions = emissions.transpose(0, 1)
            tags = tags.transpose(0, 1)

        if mask is None:
            mask = paddle.ones(emissions.shape[:2], dtype=paddle.float)

        scores = self._compute_scores(emissions, tags, mask=mask)
        partition = self._compute_log_partition(emissions, mask=mask)
        return paddle.sum(scores - partition)

    def decode(self, emissions, mask=None):
        """Find the most probable sequence of labels given the emissions using
        the Viterbi algorithm.

        Args:
            emissions (paddle.Tensor): Sequence of emissions for each label.
                Shape (batch_size, seq_len, nb_labels) if batch_first is True,
                (seq_len, batch_size, nb_labels) otherwise.
            mask (paddle.FloatTensor, optional): Tensor representing valid positions.
                If None, all positions are considered valid.
                Shape (batch_size, seq_len) if batch_first is True,
                (seq_len, batch_size) otherwise.

        Returns:
            paddle.Tensor: the viterbi score for the for each batch.
                Shape of (batch_size,)
            list of lists: the best viterbi sequence of labels for each batch. [B, T]
        """
        # fix tensors order by setting batch as the first dimension
        if not self.batch_first:
            emissions = emissions.transpose(0, 1)
            tags = tags.transpose(0, 1)

        if mask is None:
            mask = paddle.ones(emissions.shape[:2], dtype=paddle.float)

        scores, sequences = self._viterbi_decode(emissions, mask)
        return scores, sequences

    def _compute_scores(self, emissions, tags, mask):
        """Compute the scores for a given batch of emissions with their tags.

        Args:
            emissions (paddle.Tensor): (batch_size, seq_len, nb_labels)
            tags (Paddle.LongTensor): (batch_size, seq_len)
            mask (Paddle.FloatTensor): (batch_size, seq_len)

        Returns:
            paddle.Tensor: Scores for each batch.
                Shape of (batch_size,)
        """
        batch_size, seq_length = tags.shape
        scores = paddle.zeros([batch_size])

        # save first and last tags to be used later
        first_tags = tags[:, 0]
        last_valid_idx = mask.int().sum(1) - 1

        # TODO(Hui Zhang): not support fancy index. 
        # last_tags = tags.gather(last_valid_idx.unsqueeze(1), axis=1).squeeze()
        batch_idx = paddle.arange(batch_size, dtype=last_valid_idx.dtype)
        gather_last_valid_idx = paddle.stack(
            [batch_idx, last_valid_idx], axis=-1)
        last_tags = tags.gather_nd(gather_last_valid_idx)

        # add the transition from BOS to the first tags for each batch
        # t_scores = self.transitions[self.BOS_TAG_ID, first_tags]
        t_scores = self.transitions[self.BOS_TAG_ID].gather(first_tags)

        # add the [unary] emission scores for the first tags for each batch
        # for all batches, the first word, see the correspondent emissions
        # for the first tags (which is a list of ids):
        # emissions[:, 0, [tag_1, tag_2, ..., tag_nblabels]]
        # e_scores = emissions[:, 0].gather(1, first_tags.unsqueeze(1)).squeeze()
        gather_first_tags_idx = paddle.stack([batch_idx, first_tags], axis=-1)
        e_scores = emissions[:, 0].gather_nd(gather_first_tags_idx)

        # the scores for a word is just the sum of both scores
        scores += e_scores + t_scores

        # now lets do this for each remaining word
        for i in range(1, seq_length):

            # we could: iterate over batches, check if we reached a mask symbol
            # and stop the iteration, but vecotrizing is faster due to gpu,
            # so instead we perform an element-wise multiplication
            is_valid = mask[:, i]

            previous_tags = tags[:, i - 1]
            current_tags = tags[:, i]

            # calculate emission and transition scores as we did before
            # e_scores = emissions[:, i].gather(1, current_tags.unsqueeze(1)).squeeze()
            gather_current_tags_idx = paddle.stack(
                [batch_idx, current_tags], axis=-1)
            e_scores = emissions[:, i].gather_nd(gather_current_tags_idx)
            # t_scores = self.transitions[previous_tags, current_tags]
            gather_transitions_idx = paddle.stack(
                [previous_tags, current_tags], axis=-1)
            t_scores = self.transitions.gather_nd(gather_transitions_idx)

            # apply the mask
            e_scores = e_scores * is_valid
            t_scores = t_scores * is_valid

            scores += e_scores + t_scores

        # add the transition from the end tag to the EOS tag for each batch
        # scores += self.transitions[last_tags, self.EOS_TAG_ID]
        scores += self.transitions.gather(last_tags)[:, self.EOS_TAG_ID]

        return scores

    def _compute_log_partition(self, emissions, mask):
        """Compute the partition function in log-space using the forward-algorithm.

        Args:
            emissions (paddle.Tensor): (batch_size, seq_len, nb_labels)
            mask (Paddle.FloatTensor): (batch_size, seq_len)

        Returns:
            paddle.Tensor: the partition scores for each batch.
                Shape of (batch_size,)
        """
        batch_size, seq_length, nb_labels = emissions.shape

        # in the first iteration, BOS will have all the scores
        alphas = self.transitions[self.BOS_TAG_ID, :].unsqueeze(
            0) + emissions[:, 0]

        for i in range(1, seq_length):
            # (bs, nb_labels) -> (bs, 1, nb_labels)
            e_scores = emissions[:, i].unsqueeze(1)

            # (nb_labels, nb_labels) -> (bs, nb_labels, nb_labels)
            t_scores = self.transitions.unsqueeze(0)

            # (bs, nb_labels)  -> (bs, nb_labels, 1)
            a_scores = alphas.unsqueeze(2)

            scores = e_scores + t_scores + a_scores
            new_alphas = paddle.logsumexp(scores, axis=1)

            # set alphas if the mask is valid, otherwise keep the current values
            is_valid = mask[:, i].unsqueeze(-1)
            alphas = is_valid * new_alphas + (1 - is_valid) * alphas

        # add the scores for the final transition
        last_transition = self.transitions[:, self.EOS_TAG_ID]
        end_scores = alphas + last_transition.unsqueeze(0)

        # return a *log* of sums of exps
        return paddle.logsumexp(end_scores, axis=1)

    def _viterbi_decode(self, emissions, mask):
        """Compute the viterbi algorithm to find the most probable sequence of labels
        given a sequence of emissions.

        Args:
            emissions (paddle.Tensor): (batch_size, seq_len, nb_labels)
            mask (Paddle.FloatTensor): (batch_size, seq_len)

        Returns:
            paddle.Tensor: the viterbi score for the for each batch.
                Shape of (batch_size,)
            list of lists of ints: the best viterbi sequence of labels for each batch
        """
        batch_size, seq_length, nb_labels = emissions.shape

        # in the first iteration, BOS will have all the scores and then, the max
        alphas = self.transitions[self.BOS_TAG_ID, :].unsqueeze(
            0) + emissions[:, 0]

        backpointers = []

        for i in range(1, seq_length):
            # (bs, nb_labels) -> (bs, 1, nb_labels)
            e_scores = emissions[:, i].unsqueeze(1)

            # (nb_labels, nb_labels) -> (bs, nb_labels, nb_labels)
            t_scores = self.transitions.unsqueeze(0)

            # (bs, nb_labels)  -> (bs, nb_labels, 1)
            a_scores = alphas.unsqueeze(2)

            # combine current scores with previous alphas
            scores = e_scores + t_scores + a_scores

            # so far is exactly like the forward algorithm,
            # but now, instead of calculating the logsumexp,
            # we will find the highest score and the tag associated with it
            # max_scores, max_score_tags = paddle.max(scores, axis=1)
            max_scores = paddle.max(scores, axis=1)
            max_score_tags = paddle.argmax(scores, axis=1)

            # set alphas if the mask is valid, otherwise keep the current values
            is_valid = mask[:, i].unsqueeze(-1)
            alphas = is_valid * max_scores + (1 - is_valid) * alphas

            # add the max_score_tags for our list of backpointers
            # max_scores has shape (batch_size, nb_labels) so we transpose it to
            # be compatible with our previous loopy version of viterbi
            backpointers.append(max_score_tags.t())

        # add the scores for the final transition
        last_transition = self.transitions[:, self.EOS_TAG_ID]
        end_scores = alphas + last_transition.unsqueeze(0)

        # get the final most probable score and the final most probable tag
        # max_final_scores, max_final_tags = paddle.max(end_scores, axis=1)
        max_final_scores = paddle.max(end_scores, axis=1)
        max_final_tags = paddle.argmax(end_scores, axis=1)

        # find the best sequence of labels for each sample in the batch
        best_sequences = []
        emission_lengths = mask.int().sum(axis=1)
        for i in range(batch_size):

            # recover the original sentence length for the i-th sample in the batch
            sample_length = emission_lengths[i].item()

            # recover the max tag for the last timestep
            sample_final_tag = max_final_tags[i].item()

            # limit the backpointers until the last but one
            # since the last corresponds to the sample_final_tag
            sample_backpointers = backpointers[:sample_length - 1]

            # follow the backpointers to build the sequence of labels
            sample_path = self._find_best_path(i, sample_final_tag,
                                               sample_backpointers)

            # add this path to the list of best sequences
            best_sequences.append(sample_path)

        return max_final_scores, best_sequences

    def _find_best_path(self, sample_id, best_tag, backpointers):
        """Auxiliary function to find the best path sequence for a specific sample.

            Args:
                sample_id (int): sample index in the range [0, batch_size)
                best_tag (int): tag which maximizes the final score
                backpointers (list of lists of tensors): list of pointers with
                shape (seq_len_i-1, nb_labels, batch_size) where seq_len_i
                represents the length of the ith sample in the batch

            Returns:
                list of ints: a list of tag indexes representing the bast path
        """
        # add the final best_tag to our best path
        best_path = [best_tag]

        # traverse the backpointers in backwards
        for backpointers_t in reversed(backpointers):

            # recover the best_tag at this timestep
            best_tag = backpointers_t[best_tag][sample_id].item()

            # append to the beginning of the list so we don't need to reverse it later
            best_path.insert(0, best_tag)

        return best_path


================================================
FILE: paddlespeech/s2t/modules/ctc.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from typing import Union

import paddle
from paddle import nn
from paddle.nn import functional as F
from typeguard import typechecked

from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.modules.loss import CTCLoss
from paddlespeech.s2t.utils import ctc_utils
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

try:
    from paddlespeech.s2t.decoders.ctcdecoder import ctc_beam_search_decoding_batch  # noqa: F401
    from paddlespeech.s2t.decoders.ctcdecoder import ctc_greedy_decoding  # noqa: F401
    from paddlespeech.s2t.decoders.ctcdecoder import Scorer  # noqa: F401
    from paddlespeech.s2t.decoders.ctcdecoder import CTCBeamSearchDecoder  # noqa: F401
except ImportError:
    try:
        from paddlespeech.s2t.utils import dynamic_pip_install
        package_name = 'paddlespeech_ctcdecoders'
        if sys.platform != "win32":
            dynamic_pip_install.install(package_name)
        from paddlespeech.s2t.decoders.ctcdecoder import ctc_beam_search_decoding_batch  # noqa: F401
        from paddlespeech.s2t.decoders.ctcdecoder import ctc_greedy_decoding  # noqa: F401
        from paddlespeech.s2t.decoders.ctcdecoder import Scorer  # noqa: F401
        from paddlespeech.s2t.decoders.ctcdecoder import CTCBeamSearchDecoder  # noqa: F401
    except Exception as e:
        logger.info("paddlespeech_ctcdecoders not installed!")

__all__ = ['CTCDecoder']


class CTCDecoderBase(nn.Layer):
    @typechecked
    def __init__(self,
                 odim,
                 enc_n_units,
                 blank_id=0,
                 dropout_rate: float=0.0,
                 reduction: Union[str, bool]=True,
                 batch_average: bool=True,
                 grad_norm_type: Union[str, None]=None):
        """CTC decoder

        Args:
            odim ([int]): text vocabulary size
            enc_n_units ([int]): encoder output dimention
            dropout_rate (float): dropout rate (0.0 ~ 1.0)
            reduction (bool): reduce the CTC loss into a scalar, True for 'sum' or 'none'
            batch_average (bool): do batch dim wise average.
            grad_norm_type (str): Default, None. one of 'instance', 'batch', 'frame', None.
        """
        super().__init__()

        self.blank_id = blank_id
        self.odim = odim
        self.dropout = nn.Dropout(dropout_rate)
        self.ctc_lo = Linear(enc_n_units, self.odim)
        if isinstance(reduction, bool):
            reduction_type = "sum" if reduction else "none"
        else:
            reduction_type = reduction
        self.criterion = CTCLoss(
            blank=self.blank_id,
            reduction=reduction_type,
            batch_average=batch_average,
            grad_norm_type=grad_norm_type)

    def forward(self, hs_pad, hlens, ys_pad, ys_lens):
        """Calculate CTC loss.

        Args:
            hs_pad (Tensor): batch of padded hidden state sequences (B, Tmax, D)
            hlens (Tensor): batch of lengths of hidden state sequences (B)
            ys_pad (Tensor): batch of padded character id sequence tensor (B, Lmax)
            ys_lens (Tensor): batch of lengths of character sequence (B)
        Returns:
            loss (Tensor): ctc loss value, scalar.
        """
        logits = self.ctc_lo(self.dropout(hs_pad))
        loss = self.criterion(logits, ys_pad, hlens, ys_lens)
        return loss

    def softmax(self, eouts: paddle.Tensor, temperature: float=1.0):
        """Get CTC probabilities.
        Args:
            eouts (FloatTensor): `[B, T, enc_units]`
        Returns:
            probs (FloatTensor): `[B, T, odim]`
        """
        self.probs = F.softmax(self.ctc_lo(eouts) / temperature, axis=2)
        return self.probs

    def log_softmax(self, hs_pad: paddle.Tensor,
                    temperature: float=1.0) -> paddle.Tensor:
        """log_softmax of frame activations
        Args:
            Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            paddle.Tensor: log softmax applied 3d tensor (B, Tmax, odim)
        """
        return F.log_softmax(self.ctc_lo(hs_pad) / temperature, axis=2)

    def argmax(self, hs_pad: paddle.Tensor) -> paddle.Tensor:
        """argmax of frame activations
        Args:
            paddle.Tensor hs_pad: 3d tensor (B, Tmax, eprojs)
        Returns:
            paddle.Tensor: argmax applied 2d tensor (B, Tmax)
        """
        return paddle.argmax(self.ctc_lo(hs_pad), dim=2)

    def forced_align(self,
                     ctc_probs: paddle.Tensor,
                     y: paddle.Tensor,
                     blank_id=0) -> list:
        """ctc forced alignment.
        Args:
            ctc_probs (paddle.Tensor): hidden state sequence, 2d tensor (T, D)
            y (paddle.Tensor): label id sequence tensor, 1d tensor (L)
            blank_id (int): blank symbol index
        Returns:
            paddle.Tensor: best alignment result, (T).
        """
        return ctc_utils.forced_align(ctc_probs, y, blank_id)


class CTCDecoder(CTCDecoderBase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # CTCDecoder LM Score handle
        self._ext_scorer = None
        self.beam_search_decoder = None

    def _decode_batch_greedy_offline(self, probs_split, vocab_list):
        """This function will be deprecated in future.
        Decode by best path for a batch of probs matrix input.
        :param probs_split: List of 2-D probability matrix, and each consists
                            of prob vectors for one speech utterancce.
        :param probs_split: List of matrix
        :param vocab_list: List of tokens in the vocabulary, for decoding.
        :type vocab_list: list
        :return: List of transcription texts.
        :rtype: List of str
        """
        results = []
        for i, probs in enumerate(probs_split):
            output_transcription = ctc_greedy_decoding(
                probs_seq=probs, vocabulary=vocab_list, blank_id=self.blank_id)
            results.append(output_transcription)
        return results

    def _init_ext_scorer(self, beam_alpha, beam_beta, language_model_path,
                         vocab_list):
        """Initialize the external scorer.
        :param beam_alpha: Parameter associated with language model.
        :type beam_alpha: float
        :param beam_beta: Parameter associated with word count.
        :type beam_beta: float
        :param language_model_path: Filepath for language model. If it is
                                    empty, the external scorer will be set to
                                    None, and the decoding method will be pure
                                    beam search without scorer.
        :type language_model_path: str|None
        :param vocab_list: List of tokens in the vocabulary, for decoding.
        :type vocab_list: list
        """
        # init once
        if self._ext_scorer is not None:
            return

        if language_model_path != '':
            logger.info("begin to initialize the external scorer "
                        "for decoding")
            self._ext_scorer = Scorer(beam_alpha, beam_beta,
                                      language_model_path, vocab_list)
            lm_char_based = self._ext_scorer.is_character_based()
            lm_max_order = self._ext_scorer.get_max_order()
            lm_dict_size = self._ext_scorer.get_dict_size()
            logger.info("language model: "
                        "is_character_based = %d," % lm_char_based +
                        " max_order = %d," % lm_max_order + " dict_size = %d" %
                        lm_dict_size)
            logger.info("end initializing scorer")
        else:
            self._ext_scorer = None
            logger.info("no language model provided, "
                        "decoding by pure beam search without scorer.")

    def _decode_batch_beam_search_offline(
            self, probs_split, beam_alpha, beam_beta, beam_size, cutoff_prob,
            cutoff_top_n, vocab_list, num_processes):
        """
        This function will be deprecated in future.
        Decode by beam search for a batch of probs matrix input.
        :param probs_split: List of 2-D probability matrix, and each consists
                            of prob vectors for one speech utterancce.
        :param probs_split: List of matrix
        :param beam_alpha: Parameter associated with language model.
        :type beam_alpha: float
        :param beam_beta: Parameter associated with word count.
        :type beam_beta: float
        :param beam_size: Width for Beam search.
        :type beam_size: int
        :param cutoff_prob: Cutoff probability in pruning,
                            default 1.0, no pruning.
        :type cutoff_prob: float
        :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n
                        characters with highest probs in vocabulary will be
                        used in beam search, default 40.
        :type cutoff_top_n: int
        :param vocab_list: List of tokens in the vocabulary, for decoding.
        :type vocab_list: list
        :param num_processes: Number of processes (CPU) for decoder.
        :type num_processes: int
        :return: List of transcription texts.
        :rtype: List of str
        """
        if self._ext_scorer is not None:
            self._ext_scorer.reset_params(beam_alpha, beam_beta)

        # beam search decode
        num_processes = min(num_processes, len(probs_split))
        beam_search_results = ctc_beam_search_decoding_batch(
            probs_split=probs_split,
            vocabulary=vocab_list,
            beam_size=beam_size,
            num_processes=num_processes,
            ext_scoring_func=self._ext_scorer,
            cutoff_prob=cutoff_prob,
            cutoff_top_n=cutoff_top_n,
            blank_id=self.blank_id)

        results = [result[0][1] for result in beam_search_results]
        return results

    def init_decoder(self, batch_size, vocab_list, decoding_method,
                     lang_model_path, beam_alpha, beam_beta, beam_size,
                     cutoff_prob, cutoff_top_n, num_processes):
        """
        init ctc decoders
        Args:
            batch_size(int): Batch size for input data
            vocab_list (list): List of tokens in the vocabulary, for decoding
            decoding_method (str): ctc_beam_search
            lang_model_path (str): language model path
            beam_alpha (float): beam_alpha
            beam_beta (float): beam_beta
            beam_size (int): beam_size
            cutoff_prob (float): cutoff probability in beam search
            cutoff_top_n (int): cutoff_top_n
            num_processes (int): num_processes

        Raises:
            ValueError: when decoding_method not support.

        Returns:
            CTCBeamSearchDecoder
        """
        self.batch_size = batch_size
        self.vocab_list = vocab_list
        self.decoding_method = decoding_method
        self.beam_size = beam_size
        self.cutoff_prob = cutoff_prob
        self.cutoff_top_n = cutoff_top_n
        self.num_processes = num_processes
        if decoding_method == "ctc_beam_search":
            self._init_ext_scorer(beam_alpha, beam_beta, lang_model_path,
                                  vocab_list)
            if self.beam_search_decoder is None:
                self.beam_search_decoder = self.get_decoder(
                    vocab_list, batch_size, beam_alpha, beam_beta, beam_size,
                    num_processes, cutoff_prob, cutoff_top_n)
            return self.beam_search_decoder
        elif decoding_method == "ctc_greedy":
            self._init_ext_scorer(beam_alpha, beam_beta, lang_model_path,
                                  vocab_list)
        else:
            raise ValueError(f"Not support: {decoding_method}")

    def decode_probs_offline(self, probs, logits_lens, vocab_list,
                             decoding_method, lang_model_path, beam_alpha,
                             beam_beta, beam_size, cutoff_prob, cutoff_top_n,
                             num_processes):
        """
        This function will be deprecated in future.
        ctc decoding with probs.
        Args:
            probs (Tensor): activation after softmax
            logits_lens (Tensor): audio output lens
            vocab_list (list): List of tokens in the vocabulary, for decoding
            decoding_method (str): ctc_beam_search
            lang_model_path (str): language model path
            beam_alpha (float): beam_alpha
            beam_beta (float): beam_beta
            beam_size (int): beam_size
            cutoff_prob (float): cutoff probability in beam search
            cutoff_top_n (int): cutoff_top_n
            num_processes (int): num_processes

        Raises:
            ValueError: when decoding_method not support.

        Returns:
            List[str]: transcripts.
        """
        logger.warn(
            "This function will be deprecated in future: decode_probs_offline")
        probs_split = [probs[i, :l, :] for i, l in enumerate(logits_lens)]
        if decoding_method == "ctc_greedy":
            result_transcripts = self._decode_batch_greedy_offline(
                probs_split=probs_split, vocab_list=vocab_list)
        elif decoding_method == "ctc_beam_search":
            result_transcripts = self._decode_batch_beam_search_offline(
                probs_split=probs_split,
                beam_alpha=beam_alpha,
                beam_beta=beam_beta,
                beam_size=beam_size,
                cutoff_prob=cutoff_prob,
                cutoff_top_n=cutoff_top_n,
                vocab_list=vocab_list,
                num_processes=num_processes)
        else:
            raise ValueError(f"Not support: {decoding_method}")
        return result_transcripts

    def get_decoder(self, vocab_list, batch_size, beam_alpha, beam_beta,
                    beam_size, num_processes, cutoff_prob, cutoff_top_n):
        """
        init get ctc decoder
        Args:
            vocab_list (list): List of tokens in the vocabulary, for decoding.
            batch_size(int): Batch size for input data
            beam_alpha (float): beam_alpha
            beam_beta (float): beam_beta
            beam_size (int): beam_size
            num_processes (int): num_processes
            cutoff_prob (float): cutoff probability in beam search
            cutoff_top_n (int): cutoff_top_n

        Raises:
            ValueError: when decoding_method not support.

        Returns:
            CTCBeamSearchDecoder
        """
        num_processes = min(num_processes, batch_size)
        if self._ext_scorer is not None:
            self._ext_scorer.reset_params(beam_alpha, beam_beta)
        if self.decoding_method == "ctc_beam_search":
            beam_search_decoder = CTCBeamSearchDecoder(
                vocab_list, batch_size, beam_size, num_processes, cutoff_prob,
                cutoff_top_n, self._ext_scorer, self.blank_id)
        else:
            raise ValueError(f"Not support: {decoding_method}")
        return beam_search_decoder

    def next(self, probs, logits_lens):
        """
        Input probs into ctc decoder
        Args:
            probs (list(list(float))): probs for a batch of data
            logits_lens (list(int)): logits lens for a batch of data
        Raises:
            Exception: when the ctc decoder is not initialized
            ValueError: when decoding_method not support.
        """

        if self.beam_search_decoder is None:
            raise Exception(
                "You need to initialize the beam_search_decoder firstly")
        beam_search_decoder = self.beam_search_decoder

        has_value = (logits_lens > 0).tolist()
        has_value = [
            "true" if has_value[i] is True else "false"
            for i in range(len(has_value))
        ]
        probs_split = [
            probs[i, :l, :].tolist() if has_value[i] else probs[i].tolist()
            for i, l in enumerate(logits_lens)
        ]
        if self.decoding_method == "ctc_beam_search":
            beam_search_decoder.next(probs_split, has_value)
        else:
            raise ValueError(f"Not support: {decoding_method}")

        return

    def decode(self):
        """
        Get the decoding result
        Raises:
            Exception: when the ctc decoder is not initialized
            ValueError: when decoding_method not support.
        Returns:
            results_best (list(str)): The best result for a batch of data
            results_beam (list(list(str))): The beam search result for a batch of data
        """
        if self.beam_search_decoder is None:
            raise Exception(
                "You need to initialize the beam_search_decoder firstly")

        beam_search_decoder = self.beam_search_decoder
        if self.decoding_method == "ctc_beam_search":
            batch_beam_results = beam_search_decoder.decode()
            batch_beam_results = [[(res[0], res[1]) for res in beam_results]
                                  for beam_results in batch_beam_results]
            results_best = [result[0][1] for result in batch_beam_results]
            results_beam = [[trans[1] for trans in result]
                            for result in batch_beam_results]

        else:
            raise ValueError(f"Not support: {decoding_method}")

        return results_best, results_beam

    def reset_decoder(self,
                      batch_size=-1,
                      beam_size=-1,
                      num_processes=-1,
                      cutoff_prob=-1.0,
                      cutoff_top_n=-1):
        if batch_size > 0:
            self.batch_size = batch_size
        if beam_size > 0:
            self.beam_size = beam_size
        if num_processes > 0:
            self.num_processes = num_processes
        if cutoff_prob > 0:
            self.cutoff_prob = cutoff_prob
        if cutoff_top_n > 0:
            self.cutoff_top_n = cutoff_top_n
        """
        Reset the decoder state
        Args:
            batch_size(int): Batch size for input data
            beam_size (int): beam_size
            num_processes (int): num_processes
            cutoff_prob (float): cutoff probability in beam search
            cutoff_top_n (int): cutoff_top_n
        Raises:
            Exception: when the ctc decoder is not initialized
        """
        if self.beam_search_decoder is None:
            raise Exception(
                "You need to initialize the beam_search_decoder firstly")
        self.beam_search_decoder.reset_state(
            self.batch_size, self.beam_size, self.num_processes,
            self.cutoff_prob, self.cutoff_top_n)

    def del_decoder(self):
        """
        Delete the decoder
        """
        if self.beam_search_decoder is not None:
            del self.beam_search_decoder
            self.beam_search_decoder = None


================================================
FILE: paddlespeech/s2t/modules/decoder.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""Decoder definition."""
from typing import Any
from typing import List
from typing import Optional
from typing import Tuple

import paddle
from paddle import nn
from typeguard import typechecked

from paddlespeech.s2t.decoders.scorers.scorer_interface import BatchScorerInterface
from paddlespeech.s2t.modules.align import Embedding
from paddlespeech.s2t.modules.align import LayerNorm
from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.modules.attention import MultiHeadedAttention
from paddlespeech.s2t.modules.decoder_layer import DecoderLayer
from paddlespeech.s2t.modules.embedding import PositionalEncoding
from paddlespeech.s2t.modules.mask import make_non_pad_mask
from paddlespeech.s2t.modules.mask import make_xs_mask
from paddlespeech.s2t.modules.mask import subsequent_mask
from paddlespeech.s2t.modules.positionwise_feed_forward import PositionwiseFeedForward
from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()

__all__ = ["TransformerDecoder"]


class TransformerDecoder(BatchScorerInterface, nn.Layer):
    """Base class of Transfomer decoder module.
    Args:
        vocab_size: output dim
        encoder_output_size: dimension of attention
        attention_heads: the number of heads of multi head attention
        linear_units: the hidden units number of position-wise feedforward
        num_blocks: the number of decoder blocks
        dropout_rate: dropout rate
        self_attention_dropout_rate: dropout rate for attention
        input_layer: input layer type, `embed`
        use_output_layer: whether to use output layer
        pos_enc_class: PositionalEncoding module
        normalize_before:
            True: use layer_norm before each sub-block of a layer.
            False: use layer_norm after each sub-block of a layer.
        concat_after: whether to concat attention layer's input and output
            True: x -> x + linear(concat(x, att(x)))
            False: x -> x + att(x)
    """

    @typechecked
    def __init__(self,
                 vocab_size: int,
                 encoder_output_size: int,
                 attention_heads: int=4,
                 linear_units: int=2048,
                 num_blocks: int=6,
                 dropout_rate: float=0.1,
                 positional_dropout_rate: float=0.1,
                 self_attention_dropout_rate: float=0.0,
                 src_attention_dropout_rate: float=0.0,
                 input_layer: str="embed",
                 use_output_layer: bool=True,
                 normalize_before: bool=True,
                 concat_after: bool=False,
                 max_len: int=5000):

        nn.Layer.__init__(self)
        self.selfattention_layer_type = 'selfattn'
        attention_dim = encoder_output_size

        if input_layer == "embed":
            self.embed = nn.Sequential(
                Embedding(vocab_size, attention_dim),
                PositionalEncoding(
                    attention_dim, positional_dropout_rate, max_len=max_len), )
        else:
            raise ValueError(f"only 'embed' is supported: {input_layer}")

        self.normalize_before = normalize_before
        self.after_norm = LayerNorm(attention_dim, epsilon=1e-12)
        self.use_output_layer = use_output_layer
        self.output_layer = Linear(attention_dim, vocab_size)

        self.decoders = nn.LayerList([
            DecoderLayer(
                size=attention_dim,
                self_attn=MultiHeadedAttention(attention_heads, attention_dim,
                                               self_attention_dropout_rate),
                src_attn=MultiHeadedAttention(attention_heads, attention_dim,
                                              src_attention_dropout_rate),
                feed_forward=PositionwiseFeedForward(
                    attention_dim, linear_units, dropout_rate),
                dropout_rate=dropout_rate,
                normalize_before=normalize_before,
                concat_after=concat_after, ) for _ in range(num_blocks)
        ])

    def forward(self,
                memory: paddle.Tensor,
                memory_mask: paddle.Tensor,
                ys_in_pad: paddle.Tensor,
                ys_in_lens: paddle.Tensor,
                r_ys_in_pad: paddle.Tensor=paddle.empty([0]),
                reverse_weight: float=0.0
                ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Forward decoder.
        Args:
            memory: encoded memory, float32  (batch, maxlen_in, feat)
            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
            ys_in_lens: input lengths of this batch (batch)
            r_ys_in_pad: not used in transformer decoder, in order to unify api
                with bidirectional decoder
            reverse_weight: not used in transformer decoder, in order to unify
                api with bidirectional decode
        Returns:
            (tuple): tuple containing:
                x: decoded token score before softmax (batch, maxlen_out, vocab_size)
                    if use_output_layer is True,
                olens: (batch, )
        """
        tgt = ys_in_pad
        # tgt_mask: (B, 1, L)
        tgt_mask = (make_non_pad_mask(ys_in_lens).unsqueeze(1))
        # m: (1, L, L)
        m = subsequent_mask(tgt_mask.shape[-1]).unsqueeze(0)
        # tgt_mask: (B, L, L)
        tgt_mask = tgt_mask & m

        x, _ = self.embed(tgt)
        for layer in self.decoders:
            x, tgt_mask, memory, memory_mask = layer(x, tgt_mask, memory,
                                                     memory_mask)
        if self.normalize_before:
            x = self.after_norm(x)
        if self.use_output_layer:
            x = self.output_layer(x)

        olens = tgt_mask.sum(1)
        return x, paddle.to_tensor(0.0), olens

    def forward_one_step(
            self,
            memory: paddle.Tensor,
            memory_mask: paddle.Tensor,
            tgt: paddle.Tensor,
            tgt_mask: paddle.Tensor,
            cache: Optional[List[paddle.Tensor]]=None,
    ) -> Tuple[paddle.Tensor, List[paddle.Tensor]]:
        """Forward one step.
            This is only used for decoding.
        Args:
            memory: encoded memory, float32  (batch, maxlen_in, feat)
            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
            tgt: input token ids, int64 (batch, maxlen_out)
            tgt_mask: input token mask,  (batch, maxlen_out, maxlen_out)
                      dtype=paddle.bool
            cache: cached output list of (batch, max_time_out-1, size)
        Returns:
            y, cache: NN output value and cache per `self.decoders`.
                y.shape` is (batch, token)
        """
        x, _ = self.embed(tgt)
        new_cache = []
        for i, decoder in enumerate(self.decoders):
            if cache is None:
                c = None
            else:
                c = cache[i]
            x, tgt_mask, memory, memory_mask = decoder(
                x, tgt_mask, memory, memory_mask, cache=c)
            new_cache.append(x)
        if self.normalize_before:
            y = self.after_norm(x[:, -1])
        else:
            y = x[:, -1]
        if self.use_output_layer:
            y = paddle.nn.functional.log_softmax(self.output_layer(y), axis=-1)
        return y, new_cache

    # beam search API (see ScorerInterface)
    def score(self, ys, state, x):
        """Score.
        ys: (ylen,)
        x: (xlen, n_feat)
        """
        ys_mask = subsequent_mask(len(ys)).unsqueeze(0)  # (B,L,L)
        x_mask = make_xs_mask(x.unsqueeze(0)).unsqueeze(1)  # (B,1,T)
        if self.selfattention_layer_type != "selfattn":
            # TODO(karita): implement cache
            logging.warning(
                f"{self.selfattention_layer_type} does not support cached decoding."
            )
            state = None
        logp, state = self.forward_one_step(
            x.unsqueeze(0), x_mask, ys.unsqueeze(0), ys_mask, cache=state)
        return logp.squeeze(0), state

    # batch beam search API (see BatchScorerInterface)
    def batch_score(self,
                    ys: paddle.Tensor,
                    states: List[Any],
                    xs: paddle.Tensor) -> Tuple[paddle.Tensor, List[Any]]:
        """Score new token batch (required).

        Args:
            ys (paddle.Tensor): paddle.int64 prefix tokens (n_batch, ylen).
            states (List[Any]): Scorer states for prefix tokens.
            xs (paddle.Tensor):
                The encoder feature that generates ys (n_batch, xlen, n_feat).

        Returns:
            tuple[paddle.Tensor, List[Any]]: Tuple of
                batchfied scores for next token with shape of `(n_batch, n_vocab)`
                and next state list for ys.

        """
        # merge states
        n_batch = len(ys)
        n_layers = len(self.decoders)
        if states[0] is None:
            batch_state = None
        else:
            # transpose state of [batch, layer] into [layer, batch]
            batch_state = [
                paddle.stack([states[b][i] for b in range(n_batch)])
                for i in range(n_layers)
            ]

        # batch decoding
        ys_mask = subsequent_mask(ys.shape[-1]).unsqueeze(0)  # (B,L,L)
        xs_mask = make_xs_mask(xs).unsqueeze(1)  # (B,1,T)
        logp, states = self.forward_one_step(
            xs, xs_mask, ys, ys_mask, cache=batch_state)

        # transpose state of [layer, batch] into [batch, layer]
        state_list = [[states[i][b] for i in range(n_layers)]
                      for b in range(n_batch)]
        return logp, state_list


class BiTransformerDecoder(BatchScorerInterface, nn.Layer):
    """Base class of Transfomer decoder module.
    Args:
        vocab_size: output dim
        encoder_output_size: dimension of attention
        attention_heads: the number of heads of multi head attention
        linear_units: the hidden units number of position-wise feedforward
        num_blocks: the number of decoder blocks
        r_num_blocks: the number of right to left decoder blocks
        dropout_rate: dropout rate
        self_attention_dropout_rate: dropout rate for attention
        input_layer: input layer type
        use_output_layer: whether to use output layer
        pos_enc_class: PositionalEncoding or ScaledPositionalEncoding
        normalize_before:
            True: use layer_norm before each sub-block of a layer.
            False: use layer_norm after each sub-block of a layer.
        concat_after: whether to concat attention layer's input and output
            True: x -> x + linear(concat(x, att(x)))
            False: x -> x + att(x)
    """

    @typechecked
    def __init__(self,
                 vocab_size: int,
                 encoder_output_size: int,
                 attention_heads: int=4,
                 linear_units: int=2048,
                 num_blocks: int=6,
                 r_num_blocks: int=0,
                 dropout_rate: float=0.1,
                 positional_dropout_rate: float=0.1,
                 self_attention_dropout_rate: float=0.0,
                 src_attention_dropout_rate: float=0.0,
                 input_layer: str="embed",
                 use_output_layer: bool=True,
                 normalize_before: bool=True,
                 concat_after: bool=False,
                 max_len: int=5000):

        nn.Layer.__init__(self)
        self.left_decoder = TransformerDecoder(
            vocab_size, encoder_output_size, attention_heads, linear_units,
            num_blocks, dropout_rate, positional_dropout_rate,
            self_attention_dropout_rate, src_attention_dropout_rate,
            input_layer, use_output_layer, normalize_before, concat_after,
            max_len)

        self.right_decoder = TransformerDecoder(
            vocab_size, encoder_output_size, attention_heads, linear_units,
            r_num_blocks, dropout_rate, positional_dropout_rate,
            self_attention_dropout_rate, src_attention_dropout_rate,
            input_layer, use_output_layer, normalize_before, concat_after,
            max_len)

    def forward(
            self,
            memory: paddle.Tensor,
            memory_mask: paddle.Tensor,
            ys_in_pad: paddle.Tensor,
            ys_in_lens: paddle.Tensor,
            r_ys_in_pad: paddle.Tensor,
            reverse_weight: float=0.0,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Forward decoder.
        Args:
            memory: encoded memory, float32  (batch, maxlen_in, feat)
            memory_mask: encoder memory mask, (batch, 1, maxlen_in)
            ys_in_pad: padded input token ids, int64 (batch, maxlen_out)
            ys_in_lens: input lengths of this batch (batch)
            r_ys_in_pad: padded input token ids, int64 (batch, maxlen_out),
                used for right to left decoder
            reverse_weight: used for right to left decoder
        Returns:
            (tuple): tuple containing:
                x: decoded token score before softmax (batch, maxlen_out,
                    vocab_size) if use_output_layer is True,
                r_x: x: decoded token score (right to left decoder)
                    before softmax (batch, maxlen_out, vocab_size)
                    if use_output_layer is True,
                olens: (batch, )
        """
        l_x, _, olens = self.left_decoder(memory, memory_mask, ys_in_pad,
                                          ys_in_lens)
        r_x = paddle.zeros([1])
        if reverse_weight > 0.0:
            r_x, _, olens = self.right_decoder(memory, memory_mask, r_ys_in_pad,
                                               ys_in_lens)
        return l_x, r_x, olens

    def forward_one_step(
            self,
            memory: paddle.Tensor,
            memory_mask: paddle.Tensor,
            tgt: paddle.Tensor,
            tgt_mask: paddle.Tensor,
            cache: Optional[List[paddle.Tensor]]=None,
    ) -> Tuple[paddle.Tensor, List[paddle.Tensor]]:
        """Forward one step.
            This is only used for decoding.
        Args:
            memory: encoded memory, float32  (batch, maxlen_in, feat)
            memory_mask: encoded memory mask, (batch, 1, maxlen_in)
            tgt: input token ids, int64 (batch, maxlen_out)
            tgt_mask: input token mask,  (batch, maxlen_out, maxlen_out)
                      dtype=paddle.bool
            cache: cached output list of (batch, max_time_out-1, size)
        Returns:
            y, cache: NN output value and cache per `self.decoders`.
            y.shape` is (batch, maxlen_out, token)
        """
        return self.left_decoder.forward_one_step(memory, memory_mask, tgt,
                                                  tgt_mask, cache)


================================================
FILE: paddlespeech/s2t/modules/decoder_layer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""Decoder self-attention layer definition."""
from typing import Optional
from typing import Tuple

import paddle
from paddle import nn

from paddlespeech.s2t.modules.align import LayerNorm
from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ["DecoderLayer"]


class DecoderLayer(nn.Layer):
    """Single decoder layer module.
    Args:
        size (int): Input dimension.
        self_attn (nn.Layer): Self-attention module instance.
            `MultiHeadedAttention` instance can be used as the argument.
        src_attn (nn.Layer): Self-attention module instance.
            `MultiHeadedAttention` instance can be used as the argument.
        feed_forward (nn.Layer): Feed-forward module instance.
            `PositionwiseFeedForward` instance can be used as the argument.
        dropout_rate (float): Dropout rate.
        normalize_before (bool):
            True: use layer_norm before each sub-block.
            False: to use layer_norm after each sub-block.
        concat_after (bool): Whether to concat attention layer's input
            and output.
            True: x -> x + linear(concat(x, att(x)))
            False: x -> x + att(x)
    """

    def __init__(
            self,
            size: int,
            self_attn: nn.Layer,
            src_attn: nn.Layer,
            feed_forward: nn.Layer,
            dropout_rate: float,
            normalize_before: bool=True,
            concat_after: bool=False, ):
        """Construct an DecoderLayer object."""
        super().__init__()
        self.size = size
        self.self_attn = self_attn
        self.src_attn = src_attn
        self.feed_forward = feed_forward
        self.norm1 = LayerNorm(size, epsilon=1e-12)
        self.norm2 = LayerNorm(size, epsilon=1e-12)
        self.norm3 = LayerNorm(size, epsilon=1e-12)
        self.dropout = nn.Dropout(dropout_rate)
        self.normalize_before = normalize_before
        self.concat_after = concat_after
        self.concat_linear1 = Linear(size + size, size)
        self.concat_linear2 = Linear(size + size, size)

    def forward(
            self,
            tgt: paddle.Tensor,
            tgt_mask: paddle.Tensor,
            memory: paddle.Tensor,
            memory_mask: paddle.Tensor,
            cache: Optional[paddle.Tensor]=None
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Compute decoded features.
        Args:
            tgt (paddle.Tensor): Input tensor (#batch, maxlen_out, size).
            tgt_mask (paddle.Tensor): Mask for input tensor
                (#batch, maxlen_out).
            memory (paddle.Tensor): Encoded memory
                (#batch, maxlen_in, size).
            memory_mask (paddle.Tensor): Encoded memory mask
                (#batch, maxlen_in).
            cache (paddle.Tensor): cached tensors.
                (#batch, maxlen_out - 1, size).
        Returns:
            paddle.Tensor: Output tensor (#batch, maxlen_out, size).
            paddle.Tensor: Mask for output tensor (#batch, maxlen_out).
            paddle.Tensor: Encoded memory (#batch, maxlen_in, size).
            paddle.Tensor: Encoded memory mask (#batch, maxlen_in).
        """
        residual = tgt
        if self.normalize_before:
            tgt = self.norm1(tgt)

        if cache is None:
            tgt_q = tgt
            tgt_q_mask = tgt_mask
        else:
            # compute only the last frame query keeping dim: max_time_out -> 1
            assert cache.shape == [
                tgt.shape[0],
                tgt.shape[1] - 1,
                self.size,
            ], f"{cache.shape} == {[tgt.shape[0], tgt.shape[1] - 1, self.size]}"
            tgt_q = tgt[:, -1:, :]
            residual = residual[:, -1:, :]
            tgt_q_mask = tgt_mask[:, -1:, :]

        if self.concat_after:
            tgt_concat = paddle.cat(
                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0]), dim=-1)
            x = residual + self.concat_linear1(tgt_concat)
        else:
            x = residual + self.dropout(
                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)[0])
        if not self.normalize_before:
            x = self.norm1(x)

        residual = x
        if self.normalize_before:
            x = self.norm2(x)
        if self.concat_after:
            x_concat = paddle.cat(
                (x, self.src_attn(x, memory, memory, memory_mask)[0]), dim=-1)
            x = residual + self.concat_linear2(x_concat)
        else:
            x = residual + self.dropout(
                self.src_attn(x, memory, memory, memory_mask)[0])
        if not self.normalize_before:
            x = self.norm2(x)

        residual = x
        if self.normalize_before:
            x = self.norm3(x)
        x = residual + self.dropout(self.feed_forward(x))
        if not self.normalize_before:
            x = self.norm3(x)

        if cache is not None:
            x = paddle.cat([cache, x], dim=1)

        return x, tgt_mask, memory, memory_mask


================================================
FILE: paddlespeech/s2t/modules/embedding.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""Positonal Encoding Module."""
import math
from typing import Tuple

import paddle
from paddle import nn

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = [
    "PositionalEncodingInterface", "NoPositionalEncoding", "PositionalEncoding",
    "RelPositionalEncoding"
]


class PositionalEncodingInterface:
    def forward(self, x: paddle.Tensor,
                offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Compute positional encoding.
        Args:
            x (paddle.Tensor): Input tensor (batch, time, `*`).
        Returns:
            paddle.Tensor: Encoded tensor (batch, time, `*`).
            paddle.Tensor: Positional embedding tensor (1, time, `*`).
        """
        raise NotImplementedError("forward method is not implemented")

    def position_encoding(self, offset: int, size: int) -> paddle.Tensor:
        """ For getting encoding in a streaming fashion
        Args:
            offset (int): start offset
            size (int): requried size of position encoding
        Returns:
            paddle.Tensor: Corresponding position encoding
        """
        raise NotImplementedError("position_encoding method is not implemented")


class NoPositionalEncoding(nn.Layer, PositionalEncodingInterface):
    def __init__(self,
                 d_model: int,
                 dropout_rate: float,
                 max_len: int=5000,
                 reverse: bool=False):
        nn.Layer.__init__(self)

    def forward(self, x: paddle.Tensor,
                offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]:
        return x, None

    def position_encoding(self, offset: int, size: int) -> paddle.Tensor:
        return None


class PositionalEncoding(nn.Layer, PositionalEncodingInterface):
    def __init__(self,
                 d_model: int,
                 dropout_rate: float,
                 max_len: int=5000,
                 reverse: bool=False):
        """Positional encoding.
            PE(pos, 2i)   = sin(pos/(10000^(2i/dmodel)))
            PE(pos, 2i+1) = cos(pos/(10000^(2i/dmodel)))
        Args:
            d_model (int): embedding dim.
            dropout_rate (float): dropout rate.
            max_len (int, optional): maximum input length. Defaults to 5000.
            reverse (bool, optional): Not used. Defaults to False.
        """
        nn.Layer.__init__(self)
        self.d_model = paddle.to_tensor(d_model)
        self.max_len = max_len
        self.xscale = paddle.to_tensor(math.sqrt(self.d_model))
        self.dropout = nn.Dropout(p=dropout_rate)
        self.base = paddle.to_tensor(10000.0)
        self.pe = paddle.zeros([1, self.max_len, self.d_model])  #[B=1,T,D]

        position = paddle.arange(
            0, self.max_len, dtype=paddle.float32).unsqueeze(1)  #[T, 1]
        # base^{-2(i-1)/d)}, i \in (1,2...,d/2)
        div_term = paddle.exp(
            -paddle.arange(0, self.d_model, 2, dtype=paddle.float32) *
            (paddle.log(self.base) / self.d_model))

        # [B,T,D]
        self.pe[:, :, 0::2] = paddle.sin(position * div_term)
        self.pe[:, :, 1::2] = paddle.cos(position * div_term)

    def forward(self, x: paddle.Tensor,
                offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Add positional encoding.
        Args:
            x (paddle.Tensor): Input. Its shape is (batch, time, ...)
            offset (int): position offset
        Returns:
            paddle.Tensor: Encoded tensor. Its shape is (batch, time, ...)
            paddle.Tensor: for compatibility to RelPositionalEncoding, (batch=1, time, ...)
        """
        assert offset + x.shape[
            1] < self.max_len, "offset: {} + x.shape[1]: {} is larger than the max_len: {}".format(
                offset, x.shape[1], self.max_len)
        pos_emb = self.pe[:, offset:offset + x.shape[1]]
        x = x * self.xscale + pos_emb
        return self.dropout(x), self.dropout(pos_emb)

    def position_encoding(self, offset: int, size: int) -> paddle.Tensor:
        """ For getting encoding in a streaming fashion
        Attention!!!!!
        we apply dropout only once at the whole utterance level in a none
        streaming way, but will call this function several times with
        increasing input size in a streaming scenario, so the dropout will
        be applied several times.
        Args:
            offset (int): start offset
            size (int): requried size of position encoding
        Returns:
            paddle.Tensor: Corresponding position encoding, #[1, T, D].
        """
        assert offset + size < self.max_len
        return self.dropout(self.pe[:, offset:offset + size])


class RelPositionalEncoding(PositionalEncoding):
    """Relative positional encoding module.
    See : Appendix B in https://arxiv.org/abs/1901.02860
    """

    def __init__(self, d_model: int, dropout_rate: float, max_len: int=5000):
        """
        Args:
            d_model (int): Embedding dimension.
            dropout_rate (float): Dropout rate.
            max_len (int, optional): [Maximum input length.]. Defaults to 5000.
        """
        super().__init__(d_model, dropout_rate, max_len, reverse=True)
        logger.info(f"max len: {max_len}")

    def forward(self, x: paddle.Tensor,
                offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Compute positional encoding.
        Args:
            x (paddle.Tensor): Input tensor (batch, time, `*`).
        Returns:
            paddle.Tensor: Encoded tensor (batch, time, `*`).
            paddle.Tensor: Positional embedding tensor (1, time, `*`).
        """
        assert offset + x.shape[
            1] < self.max_len, "offset: {} + x.shape[1]: {} is larger than the max_len: {}".format(
                offset, x.shape[1], self.max_len)

        x = x * self.xscale
        pos_emb = self.pe[:, offset:offset + x.shape[1]]
        return self.dropout(x), self.dropout(pos_emb)


# RotaryRelPositionalEncoding is same to RelPositionalEncoding
class ScaledRotaryRelPositionalEncoding(RelPositionalEncoding):
    """Scaled Rotary Relative positional encoding module.
    POSITION INTERPOLATION:  : https://arxiv.org/pdf/2306.15595v2.pdf
    """

    def __init__(self,
                 d_model: int,
                 dropout_rate: float,
                 max_len: int=5000,
                 scale=1):
        """
        Args:
            d_model (int): Embedding dimension.
            dropout_rate (float): Dropout rate.
            max_len (int, optional): [Maximum input length.]. Defaults to 5000.
            scale (int): Interpolation max input length to `scale * max_len` positions.
        """
        super().__init__(d_model, dropout_rate, max_len, reverse=True)
        self.pscale = paddle.to_tensor(scale)
        self.max_len = max_len * scale

    def sinusoidal_embeddings(self,
                              pos: paddle.Tensor,
                              dim: paddle.Tensor,
                              base=10000) -> paddle.Tensor:
        """计算pos位置的dim维sinusoidal编码"""
        assert dim % 2 == 0
        # (d/2,)
        indices = paddle.arange(0, dim // 2, dtype=pos.dtype)
        indices = paddle.pow(paddle.cast(base, pos.dtype), -2 * indices / dim)
        # pos (1, T), indices (d/2,) -> (1, T, d/2)
        embeddings = paddle.einsum('...,d->...d', pos, indices)
        # (1, T, d/2, 2)
        embeddings = paddle.stack(
            [paddle.sin(embeddings), paddle.cos(embeddings)], axis=-1)
        # (1, T, d)
        embeddings = paddle.flatten(embeddings, start_axis=-2, stop_axis=-1)
        return embeddings

    def forward(self, x: paddle.Tensor,
                offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Compute positional encoding.
        Args:
            x (paddle.Tensor): Input tensor (batch, time, `*`).
        Returns:
            paddle.Tensor: Encoded tensor (batch, time, `*`).
            paddle.Tensor: Positional embedding tensor (1, time, `*`).
        """
        x = x * self.xscale

        B, T, D = x.shape
        assert D == self.d_model

        # postion interploation
        start = 0
        end = T * self.pscale
        assert end <= self.max_len
        position = paddle.arange(start, end, dtype=x.dtype).unsqueeze(0)
        position *= 1.0 / self.pscale
        pe = self.sinusoidal_embeddings(position, self.d_model, base=self.base)

        pos_emb = pe[:, offset:offset + x.shape[1]]
        return self.dropout(x), self.dropout(pos_emb)

    def position_encoding(self, offset: int, size: int) -> paddle.Tensor:
        """ For getting encoding in a streaming fashion
        Attention!!!!!
        we apply dropout only once at the whole utterance level in a none
        streaming way, but will call this function several times with
        increasing input size in a streaming scenario, so the dropout will
        be applied several times.
        Args:
            offset (int): start offset
            size (int): requried size of position encoding
        Returns:
            paddle.Tensor: Corresponding position encoding, #[1, T, D].
        """
        # postion interploation
        start = offset
        end = (offset + size) * self.pscale
        assert end <= self.max_len
        position = paddle.arange(
            start, end, dtype=paddle.get_default_dtype()).unsqueeze(0)
        position *= 1.0 / self.pscale

        pe = self.sinusoidal_embeddings(position, self.d_model, base=self.base)

        return self.dropout(pe)


================================================
FILE: paddlespeech/s2t/modules/encoder.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""Encoder definition."""
from typing import List
from typing import Optional
from typing import Tuple
from typing import Union

import paddle
from paddle import nn
from typeguard import typechecked

from paddlespeech.s2t.modules.activation import get_activation
from paddlespeech.s2t.modules.align import LayerNorm
from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.modules.attention import MultiHeadedAttention
from paddlespeech.s2t.modules.attention import RelPositionMultiHeadedAttention
from paddlespeech.s2t.modules.attention import RoPERelPositionMultiHeadedAttention
from paddlespeech.s2t.modules.conformer_convolution import ConvolutionModule
from paddlespeech.s2t.modules.embedding import NoPositionalEncoding
from paddlespeech.s2t.modules.embedding import PositionalEncoding
from paddlespeech.s2t.modules.embedding import RelPositionalEncoding
from paddlespeech.s2t.modules.encoder_layer import ConformerEncoderLayer
from paddlespeech.s2t.modules.encoder_layer import SqueezeformerEncoderLayer
from paddlespeech.s2t.modules.encoder_layer import TransformerEncoderLayer
from paddlespeech.s2t.modules.mask import add_optional_chunk_mask
from paddlespeech.s2t.modules.mask import make_non_pad_mask
from paddlespeech.s2t.modules.positionwise_feed_forward import PositionwiseFeedForward
from paddlespeech.s2t.modules.subsampling import Conv2dSubsampling4
from paddlespeech.s2t.modules.subsampling import Conv2dSubsampling6
from paddlespeech.s2t.modules.subsampling import Conv2dSubsampling8
from paddlespeech.s2t.modules.subsampling import DepthwiseConv2DSubsampling4
from paddlespeech.s2t.modules.subsampling import LinearNoSubsampling
from paddlespeech.s2t.modules.time_reduction import TimeReductionLayer1D
from paddlespeech.s2t.modules.time_reduction import TimeReductionLayer2D
from paddlespeech.s2t.modules.time_reduction import TimeReductionLayerStream
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = [
    "BaseEncoder", 'TransformerEncoder', "ConformerEncoder",
    "SqueezeformerEncoder"
]


class BaseEncoder(nn.Layer):
    @typechecked
    def __init__(self,
                 input_size: int,
                 output_size: int=256,
                 attention_heads: int=4,
                 linear_units: int=2048,
                 num_blocks: int=6,
                 dropout_rate: float=0.1,
                 positional_dropout_rate: float=0.1,
                 attention_dropout_rate: float=0.0,
                 input_layer: str="conv2d",
                 pos_enc_layer_type: str="abs_pos",
                 normalize_before: bool=True,
                 concat_after: bool=False,
                 static_chunk_size: int=0,
                 use_dynamic_chunk: bool=False,
                 global_cmvn: Optional[nn.Layer]=None,
                 use_dynamic_left_chunk: bool=False,
                 max_len: int=5000):
        """
        Args:
            input_size (int): input dim, d_feature
            output_size (int): dimension of attention, d_model
            attention_heads (int): the number of heads of multi head attention
            linear_units (int): the hidden units number of position-wise feed
                forward
            num_blocks (int): the number of encoder blocks
            dropout_rate (float): dropout rate
            attention_dropout_rate (float): dropout rate in attention
            positional_dropout_rate (float): dropout rate after adding
                positional encoding
            input_layer (str): input layer type.
                optional [linear, conv2d, conv2d6, conv2d8]
            pos_enc_layer_type (str): Encoder positional encoding layer type.
                opitonal [abs_pos, scaled_abs_pos, rel_pos, no_pos]
            normalize_before (bool):
                True: use layer_norm before each sub-block of a layer.
                False: use layer_norm after each sub-block of a layer.
            concat_after (bool): whether to concat attention layer's input
                and output.
                True: x -> x + linear(concat(x, att(x)))
                False: x -> x + att(x)
            static_chunk_size (int): chunk size for static chunk training and
                decoding
            use_dynamic_chunk (bool): whether use dynamic chunk size for
                training or not, You can only use fixed chunk(chunk_size > 0)
                or dyanmic chunk size(use_dynamic_chunk = True)
            global_cmvn (Optional[paddle.nn.Layer]): Optional GlobalCMVN layer
            use_dynamic_left_chunk (bool): whether use dynamic left chunk in
                dynamic chunk training
        """
        super().__init__()
        self._output_size = output_size

        if pos_enc_layer_type == "abs_pos":
            pos_enc_class = PositionalEncoding
        elif pos_enc_layer_type == "rel_pos":
            pos_enc_class = RelPositionalEncoding
        elif pos_enc_layer_type == "rope_pos":
            pos_enc_class = RelPositionalEncoding
        elif pos_enc_layer_type == "no_pos":
            pos_enc_class = NoPositionalEncoding
        else:
            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)

        if input_layer == "linear":
            subsampling_class = LinearNoSubsampling
        elif input_layer == "conv2d":
            subsampling_class = Conv2dSubsampling4
        elif input_layer == "conv2d6":
            subsampling_class = Conv2dSubsampling6
        elif input_layer == "conv2d8":
            subsampling_class = Conv2dSubsampling8
        else:
            raise ValueError("unknown input_layer: " + input_layer)

        self.global_cmvn = global_cmvn
        self.embed = subsampling_class(
            idim=input_size,
            odim=output_size,
            dropout_rate=dropout_rate,
            pos_enc_class=pos_enc_class(
                d_model=output_size,
                dropout_rate=positional_dropout_rate,
                max_len=max_len), )

        self.normalize_before = normalize_before
        self.after_norm = LayerNorm(output_size, epsilon=1e-12)
        self.static_chunk_size = static_chunk_size
        self.use_dynamic_chunk = use_dynamic_chunk
        self.use_dynamic_left_chunk = use_dynamic_left_chunk

    def output_size(self) -> int:
        return self._output_size

    def forward(
            self,
            xs: paddle.Tensor,
            xs_lens: paddle.Tensor,
            decoding_chunk_size: int=0,
            num_decoding_left_chunks: int=-1,
    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Embed positions in tensor.
        Args:
            xs: padded input tensor (B, L, D)
            xs_lens: input length (B)
            decoding_chunk_size: decoding chunk size for dynamic chunk
                0: default for training, use random dynamic chunk.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
            num_decoding_left_chunks: number of left chunks, this is for decoding,
                the chunk size is decoding_chunk_size.
                >=0: use num_decoding_left_chunks
                <0: use all left chunks
        Returns:
            encoder output tensor, lens and mask
        """
        masks = make_non_pad_mask(xs_lens).unsqueeze(1)  # (B, 1, L)

        if self.global_cmvn is not None:
            xs = self.global_cmvn(xs)
        xs, pos_emb, masks = self.embed(xs, masks, offset=0)
        mask_pad = ~masks
        chunk_masks = add_optional_chunk_mask(
            xs, masks, self.use_dynamic_chunk, self.use_dynamic_left_chunk,
            decoding_chunk_size, self.static_chunk_size,
            num_decoding_left_chunks)
        for layer in self.encoders:
            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)
        if self.normalize_before:
            xs = self.after_norm(xs)
        # Here we assume the mask is not changed in encoder layers, so just
        # return the masks before encoder layers, and the masks will be used
        # for cross attention with decoder later
        return xs, masks

    def forward_chunk(
            self,
            xs: paddle.Tensor,
            offset: int,
            required_cache_size: int,
            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
            att_mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool)
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """ Forward just one chunk
        Args:
            xs (paddle.Tensor): chunk audio feat input, [B=1, T, D], where 
                `T==(chunk_size-1)*subsampling_rate + subsample.right_context + 1`
            offset (int): current offset in encoder output time stamp
            required_cache_size (int): cache size required for next chunk
                compuation
                >=0: actual cache size
                <0: means all history cache is required
            att_cache(paddle.Tensor): cache tensor for key & val in 
                transformer/conformer attention. Shape is 
                (elayers, head, cache_t1, d_k * 2), where`head * d_k == hidden-dim` 
                and `cache_t1 == chunk_size * num_decoding_left_chunks`.
            cnn_cache (paddle.Tensor): cache tensor for cnn_module in conformer, 
                (elayers, B=1, hidden-dim, cache_t2), where `cache_t2 == cnn.lorder - 1`
        Returns:
            paddle.Tensor: output of current input xs, (B=1, chunk_size, hidden-dim)
            paddle.Tensor: new attention cache required for next chunk, dyanmic shape 
                (elayers, head, T, d_k*2) depending on required_cache_size
            paddle.Tensor: new conformer cnn cache required for next chunk, with
                same shape as the original cnn_cache
        """
        assert xs.shape[0] == 1  # batch size must be one
        # tmp_masks is just for interface compatibility, [B=1, C=1, T]
        tmp_masks = paddle.ones([1, 1, xs.shape[1]], dtype=paddle.bool)

        if self.global_cmvn is not None:
            xs = self.global_cmvn(xs)

        # before embed, xs=(B, T, D1), pos_emb=(B=1, T, D)
        xs, _, _ = self.embed(xs, tmp_masks, offset=offset)
        # after embed, xs=(B=1, chunk_size, hidden-dim)

        elayers, _, cache_t1, _ = att_cache.shape
        chunk_size = xs.shape[1]
        attention_key_size = cache_t1 + chunk_size

        # only used when using `RelPositionMultiHeadedAttention` and `RoPERelPositionMultiHeadedAttention`
        pos_emb = self.embed.position_encoding(
            offset=offset - cache_t1, size=attention_key_size)

        if required_cache_size < 0:
            next_cache_start = 0
        elif required_cache_size == 0:
            next_cache_start = attention_key_size
        else:
            next_cache_start = max(attention_key_size - required_cache_size, 0)

        r_att_cache = []
        r_cnn_cache = []
        for i, layer in enumerate(self.encoders):
            # att_cache[i:i+1] = (1, head, cache_t1, d_k*2)
            # cnn_cache[i:i+1] = (1, B=1, hidden-dim, cache_t2)

            # WARNING: eliminate if-else cond op in graph
            # tensor zeros([0,0,0,0]) support [i:i+1] slice, will return zeros([0,0,0,0]) tensor
            # raw code as below:
            #   att_cache=att_cache[i:i+1] if elayers > 0 else att_cache,
            #   cnn_cache=cnn_cache[i:i+1] if cnn_cache.shape[0] > 0 else cnn_cache,
            xs, _, new_att_cache, new_cnn_cache = layer(
                xs,
                att_mask,
                pos_emb,
                att_cache=att_cache[i:i + 1],
                cnn_cache=cnn_cache[i:i + 1], )
            # new_att_cache = (1, head, attention_key_size, d_k*2)
            # new_cnn_cache = (B=1, hidden-dim, cache_t2)
            r_att_cache.append(new_att_cache[:, :, next_cache_start:, :])
            r_cnn_cache.append(new_cnn_cache)  # add elayer dim

        if self.normalize_before:
            xs = self.after_norm(xs)

        # r_att_cache (elayers, head, T, d_k*2)
        # r_cnn_cache (elayers, B=1, hidden-dim, cache_t2)
        r_att_cache = paddle.concat(r_att_cache, axis=0)
        r_cnn_cache = paddle.stack(r_cnn_cache, axis=0)
        return xs, r_att_cache, r_cnn_cache

    def forward_chunk_by_chunk(
            self,
            xs: paddle.Tensor,
            decoding_chunk_size: int,
            num_decoding_left_chunks: int=-1,
    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """ Forward input chunk by chunk with chunk_size like a streaming
            fashion
        Here we should pay special attention to computation cache in the
        streaming style forward chunk by chunk. Three things should be taken
        into account for computation in the current network:
            1. transformer/conformer encoder layers output cache
            2. convolution in conformer
            3. convolution in subsampling
        However, we don't implement subsampling cache for:
            1. We can control subsampling module to output the right result by
               overlapping input instead of cache left context, even though it
               wastes some computation, but subsampling only takes a very
               small fraction of computation in the whole model.
            2. Typically, there are several covolution layers with subsampling
               in subsampling module, it is tricky and complicated to do cache
               with different convolution layers with different subsampling
               rate.
            3. Currently, nn.Sequential is used to stack all the convolution
               layers in subsampling, we need to rewrite it to make it work
               with cache, which is not prefered.
        Args:
            xs (paddle.Tensor): (1, max_len, dim)
            chunk_size (int): decoding chunk size.
            num_left_chunks (int): decoding with num left chunks.
        """
        assert decoding_chunk_size > 0
        # The model is trained by static or dynamic chunk
        assert self.static_chunk_size > 0 or self.use_dynamic_chunk

        # feature stride and window for `subsampling` module
        subsampling = self.embed.subsampling_rate
        context = self.embed.right_context + 1  # Add current frame
        stride = subsampling * decoding_chunk_size
        decoding_window = (decoding_chunk_size - 1) * subsampling + context

        num_frames = xs.shape[1]
        required_cache_size = decoding_chunk_size * num_decoding_left_chunks

        att_cache: paddle.Tensor = paddle.zeros([0, 0, 0, 0])
        cnn_cache: paddle.Tensor = paddle.zeros([0, 0, 0, 0])

        outputs = []
        offset = 0
        # Feed forward overlap input step by step
        for cur in range(0, num_frames - context + 1, stride):
            end = min(cur + decoding_window, num_frames)
            chunk_xs = xs[:, cur:end, :]

            (y, att_cache, cnn_cache) = self.forward_chunk(
                chunk_xs, offset, required_cache_size, att_cache, cnn_cache)

            outputs.append(y)
            offset += y.shape[1]
        ys = paddle.cat(outputs, 1)
        masks = paddle.ones([1, 1, ys.shape[1]], dtype=paddle.bool)
        return ys, masks


class TransformerEncoder(BaseEncoder):
    """Transformer encoder module."""

    @typechecked
    def __init__(
            self,
            input_size: int,
            output_size: int=256,
            attention_heads: int=4,
            linear_units: int=2048,
            num_blocks: int=6,
            dropout_rate: float=0.1,
            positional_dropout_rate: float=0.1,
            attention_dropout_rate: float=0.0,
            input_layer: str="conv2d",
            pos_enc_layer_type: str="abs_pos",
            normalize_before: bool=True,
            concat_after: bool=False,
            static_chunk_size: int=0,
            use_dynamic_chunk: bool=False,
            global_cmvn: Optional[nn.Layer]=None,
            use_dynamic_left_chunk: bool=False, ):
        """ Construct TransformerEncoder
        See Encoder for the meaning of each parameter.
        """
        super().__init__(input_size, output_size, attention_heads, linear_units,
                         num_blocks, dropout_rate, positional_dropout_rate,
                         attention_dropout_rate, input_layer,
                         pos_enc_layer_type, normalize_before, concat_after,
                         static_chunk_size, use_dynamic_chunk, global_cmvn,
                         use_dynamic_left_chunk)
        self.encoders = nn.LayerList([
            TransformerEncoderLayer(
                size=output_size,
                self_attn=MultiHeadedAttention(attention_heads, output_size,
                                               attention_dropout_rate),
                feed_forward=PositionwiseFeedForward(output_size, linear_units,
                                                     dropout_rate),
                dropout_rate=dropout_rate,
                normalize_before=normalize_before,
                concat_after=concat_after) for _ in range(num_blocks)
        ])

    def forward_one_step(
            self,
            xs: paddle.Tensor,
            masks: paddle.Tensor,
            cache=None, ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Encode input frame.

        Args:
            xs (paddle.Tensor): (Prefix) Input tensor. (B, T, D)
            masks (paddle.Tensor): Mask tensor. (B, T, T)
            cache (List[paddle.Tensor]): List of cache tensors.

        Returns:
            paddle.Tensor: Output tensor.
            paddle.Tensor: Mask tensor.
            List[paddle.Tensor]: List of new cache tensors.
        """
        if self.global_cmvn is not None:
            xs = self.global_cmvn(xs)

        xs, pos_emb, masks = self.embed(xs, masks, offset=0)
        if cache is None:
            cache = [None for _ in range(len(self.encoders))]
        new_cache = []
        for c, e in zip(cache, self.encoders):
            xs, masks, _ = e(xs, masks, output_cache=c)
            new_cache.append(xs)
        if self.normalize_before:
            xs = self.after_norm(xs)
        return xs, masks, new_cache


class ConformerEncoder(BaseEncoder):
    """Conformer encoder module."""

    @typechecked
    def __init__(self,
                 input_size: int,
                 output_size: int=256,
                 attention_heads: int=4,
                 linear_units: int=2048,
                 num_blocks: int=6,
                 dropout_rate: float=0.1,
                 positional_dropout_rate: float=0.1,
                 attention_dropout_rate: float=0.0,
                 input_layer: str="conv2d",
                 pos_enc_layer_type: str="rel_pos",
                 normalize_before: bool=True,
                 concat_after: bool=False,
                 static_chunk_size: int=0,
                 use_dynamic_chunk: bool=False,
                 global_cmvn: Optional[nn.Layer]=None,
                 use_dynamic_left_chunk: bool=False,
                 positionwise_conv_kernel_size: int=1,
                 macaron_style: bool=True,
                 selfattention_layer_type: str="rel_selfattn",
                 activation_type: str="swish",
                 use_cnn_module: bool=True,
                 cnn_module_kernel: int=15,
                 causal: bool=False,
                 cnn_module_norm: str="batch_norm",
                 max_len: int=5000):
        """Construct ConformerEncoder
        Args:
            input_size to use_dynamic_chunk, see in BaseEncoder
            positionwise_conv_kernel_size (int): Kernel size of positionwise
                conv1d layer.
            macaron_style (bool): Whether to use macaron style for
                positionwise layer.
            selfattention_layer_type (str): Encoder attention layer type,
                the parameter has no effect now, it's just for configure
                compatibility.
            activation_type (str): Encoder activation function type.
            use_cnn_module (bool): Whether to use convolution module.
            cnn_module_kernel (int): Kernel size of convolution module.
            causal (bool): whether to use causal convolution or not.
            cnn_module_norm (str): cnn conv norm type, Optional['batch_norm','layer_norm']
        """
        super().__init__(input_size, output_size, attention_heads, linear_units,
                         num_blocks, dropout_rate, positional_dropout_rate,
                         attention_dropout_rate, input_layer,
                         pos_enc_layer_type, normalize_before, concat_after,
                         static_chunk_size, use_dynamic_chunk, global_cmvn,
                         use_dynamic_left_chunk, max_len)
        activation = get_activation(activation_type)

        # self-attention module definition
        encoder_dim = output_size
        if pos_enc_layer_type == "abs_pos":
            encoder_selfattn_layer = MultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, encoder_dim,
                                           attention_dropout_rate)
        elif pos_enc_layer_type == "rel_pos":
            encoder_selfattn_layer = RelPositionMultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, encoder_dim,
                                           attention_dropout_rate)
        elif pos_enc_layer_type == "rope_pos":
            encoder_selfattn_layer = RoPERelPositionMultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, encoder_dim,
                                           attention_dropout_rate)
        else:
            raise ValueError(
                f"pos_enc_layer_type {pos_enc_layer_type} not supported.")

        # feed-forward module definition
        positionwise_layer = PositionwiseFeedForward
        positionwise_layer_args = (encoder_dim, linear_units, dropout_rate,
                                   activation)
        # convolution module definition
        convolution_layer = ConvolutionModule
        convolution_layer_args = (encoder_dim, cnn_module_kernel, activation,
                                  cnn_module_norm, causal)

        self.encoders = nn.LayerList([
            ConformerEncoderLayer(
                size=encoder_dim,
                self_attn=encoder_selfattn_layer(*encoder_selfattn_layer_args),
                feed_forward=positionwise_layer(*positionwise_layer_args),
                feed_forward_macaron=positionwise_layer(
                    *positionwise_layer_args) if macaron_style else None,
                conv_module=convolution_layer(*convolution_layer_args)
                if use_cnn_module else None,
                dropout_rate=dropout_rate,
                normalize_before=normalize_before,
                concat_after=concat_after) for _ in range(num_blocks)
        ])


class SqueezeformerEncoder(nn.Layer):
    @typechecked
    def __init__(self,
                 input_size: int,
                 encoder_dim: int=256,
                 output_size: int=256,
                 attention_heads: int=4,
                 num_blocks: int=12,
                 reduce_idx: Optional[Union[int, List[int]]]=5,
                 recover_idx: Optional[Union[int, List[int]]]=11,
                 feed_forward_expansion_factor: int=4,
                 dw_stride: bool=False,
                 input_dropout_rate: float=0.1,
                 pos_enc_layer_type: str="rel_pos",
                 time_reduction_layer_type: str="conv1d",
                 feed_forward_dropout_rate: float=0.1,
                 attention_dropout_rate: float=0.1,
                 cnn_module_kernel: int=31,
                 cnn_norm_type: str="layer_norm",
                 dropout: float=0.1,
                 causal: bool=False,
                 adaptive_scale: bool=True,
                 activation_type: str="swish",
                 init_weights: bool=True,
                 global_cmvn: Optional[nn.Layer]=None,
                 normalize_before: bool=False,
                 use_dynamic_chunk: bool=False,
                 concat_after: bool=False,
                 static_chunk_size: int=0,
                 use_dynamic_left_chunk: bool=False):
        """Construct SqueezeformerEncoder

        Args:
            input_size to use_dynamic_chunk, see in Transformer BaseEncoder.
            encoder_dim (int): The hidden dimension of encoder layer.
            output_size (int): The output dimension of final projection layer.
            attention_heads (int): Num of attention head in attention module.
            num_blocks (int): Num of encoder layers.
            reduce_idx Optional[Union[int, List[int]]]:
                reduce layer index, from 40ms to 80ms per frame.
            recover_idx Optional[Union[int, List[int]]]:
                recover layer index, from 80ms to 40ms per frame.
            feed_forward_expansion_factor (int): Enlarge coefficient of FFN.
            dw_stride (bool): Whether do depthwise convolution
                              on subsampling module.
            input_dropout_rate (float): Dropout rate of input projection layer.
            pos_enc_layer_type (str): Self attention type.
            time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
            cnn_module_kernel (int): Kernel size of CNN module.
            activation_type (str): Encoder activation function type.
            cnn_module_kernel (int): Kernel size of convolution module.
            adaptive_scale (bool): Whether to use adaptive scale.
            init_weights (bool): Whether to initialize weights.
            causal (bool): whether to use causal convolution or not.
        """
        super().__init__()
        self.global_cmvn = global_cmvn
        self.reduce_idx: Optional[Union[int, List[int]]] = [reduce_idx] \
            if type(reduce_idx) == int else reduce_idx
        self.recover_idx: Optional[Union[int, List[int]]] = [recover_idx] \
            if type(recover_idx) == int else recover_idx
        self.check_ascending_list()
        if reduce_idx is None:
            self.time_reduce = None
        else:
            if recover_idx is None:
                self.time_reduce = 'normal'  # no recovery at the end
            else:
                self.time_reduce = 'recover'  # recovery at the end
                assert len(self.reduce_idx) == len(self.recover_idx)
            self.reduce_stride = 2
        self._output_size = output_size
        self.normalize_before = normalize_before
        self.static_chunk_size = static_chunk_size
        self.use_dynamic_chunk = use_dynamic_chunk
        self.use_dynamic_left_chunk = use_dynamic_left_chunk
        activation = get_activation(activation_type)

        # self-attention module definition
        if pos_enc_layer_type == "abs_pos":
            encoder_selfattn_layer = MultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, output_size,
                                           attention_dropout_rate)
        elif pos_enc_layer_type == "rel_pos":
            encoder_selfattn_layer = RelPositionMultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, encoder_dim,
                                           attention_dropout_rate,
                                           adaptive_scale, init_weights)
        elif pos_enc_layer_type == "rope_pos":
            encoder_selfattn_layer = RoPERelPositionMultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, encoder_dim,
                                           attention_dropout_rate,
                                           adaptive_scale, init_weights)
        else:
            raise ValueError(
                f"pos_enc_layer_type {pos_enc_layer_type} not supported.")

        # feed-forward module definition
        positionwise_layer = PositionwiseFeedForward
        positionwise_layer_args = (
            encoder_dim, encoder_dim * feed_forward_expansion_factor,
            feed_forward_dropout_rate, activation, adaptive_scale, init_weights)

        # convolution module definition
        convolution_layer = ConvolutionModule
        convolution_layer_args = (encoder_dim, cnn_module_kernel, activation,
                                  cnn_norm_type, causal, True, adaptive_scale,
                                  init_weights)

        self.embed = DepthwiseConv2DSubsampling4(
            1, encoder_dim,
            RelPositionalEncoding(encoder_dim, dropout_rate=0.1), dw_stride,
            input_size, input_dropout_rate, init_weights)

        self.preln = LayerNorm(encoder_dim)
        self.encoders = paddle.nn.LayerList([
            SqueezeformerEncoderLayer(
                encoder_dim,
                encoder_selfattn_layer(*encoder_selfattn_layer_args),
                positionwise_layer(*positionwise_layer_args),
                convolution_layer(*convolution_layer_args),
                positionwise_layer(*positionwise_layer_args), normalize_before,
                dropout, concat_after) for _ in range(num_blocks)
        ])
        if time_reduction_layer_type == 'conv1d':
            time_reduction_layer = TimeReductionLayer1D
            time_reduction_layer_args = {
                'channel': encoder_dim,
                'out_dim': encoder_dim,
            }
        elif time_reduction_layer_type == 'stream':
            time_reduction_layer = TimeReductionLayerStream
            time_reduction_layer_args = {
                'channel': encoder_dim,
                'out_dim': encoder_dim,
            }
        else:
            time_reduction_layer = TimeReductionLayer2D
            time_reduction_layer_args = {'encoder_dim': encoder_dim}

        self.time_reduction_layer = time_reduction_layer(
            **time_reduction_layer_args)
        self.time_recover_layer = Linear(encoder_dim, encoder_dim)
        self.final_proj = None
        if output_size != encoder_dim:
            self.final_proj = Linear(encoder_dim, output_size)

    def output_size(self) -> int:
        return self._output_size

    def forward(
            self,
            xs: paddle.Tensor,
            xs_lens: paddle.Tensor,
            decoding_chunk_size: int=0,
            num_decoding_left_chunks: int=-1,
    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Embed positions in tensor.
        Args:
            xs: padded input tensor (B, L, D)
            xs_lens: input length (B)
            decoding_chunk_size: decoding chunk size for dynamic chunk
                0: default for training, use random dynamic chunk.
                <0: for decoding, use full chunk.
                >0: for decoding, use fixed chunk size as set.
            num_decoding_left_chunks: number of left chunks, this is for decoding,
                the chunk size is decoding_chunk_size.
                >=0: use num_decoding_left_chunks
                <0: use all left chunks
        Returns:
            encoder output tensor, lens and mask
        """
        masks = make_non_pad_mask(xs_lens).unsqueeze(1)  # (B, 1, L)

        if self.global_cmvn is not None:
            xs = self.global_cmvn(xs)
        xs, pos_emb, masks = self.embed(xs, masks)
        mask_pad = masks
        chunk_masks = add_optional_chunk_mask(
            xs, masks, self.use_dynamic_chunk, self.use_dynamic_left_chunk,
            decoding_chunk_size, self.static_chunk_size,
            num_decoding_left_chunks)
        xs_lens = chunk_masks.squeeze(1).sum(1)
        xs = self.preln(xs)
        recover_activations: \
            List[Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]] = []
        index = 0
        for i, layer in enumerate(self.encoders):
            if self.reduce_idx is not None:
                if self.time_reduce is not None and i in self.reduce_idx:
                    recover_activations.append(
                        (xs, chunk_masks, pos_emb, mask_pad))
                    xs, xs_lens, chunk_masks, mask_pad = self.time_reduction_layer(
                        xs, xs_lens, chunk_masks, mask_pad)
                    pos_emb = pos_emb[:, ::2, :]
                    index += 1

            if self.recover_idx is not None:
                if self.time_reduce == 'recover' and i in self.recover_idx:
                    index -= 1
                    recover_tensor, recover_chunk_masks, recover_pos_emb, recover_mask_pad = recover_activations[
                        index]
                    # recover output length for ctc decode
                    xs = paddle.repeat_interleave(xs, repeats=2, axis=1)
                    xs = self.time_recover_layer(xs)
                    recoverd_t = recover_tensor.shape[1]
                    xs = recover_tensor + xs[:, :recoverd_t, :]
                    chunk_masks = recover_chunk_masks
                    pos_emb = recover_pos_emb
                    mask_pad = recover_mask_pad

            xs, chunk_masks, _, _ = layer(xs, chunk_masks, pos_emb, mask_pad)

        if self.final_proj is not None:
            xs = self.final_proj(xs)
        return xs, masks

    def check_ascending_list(self):
        if self.reduce_idx is not None:
            assert self.reduce_idx == sorted(self.reduce_idx), \
                "reduce_idx should be int or ascending list"
        if self.recover_idx is not None:
            assert self.recover_idx == sorted(self.recover_idx), \
                "recover_idx should be int or ascending list"

    def calculate_downsampling_factor(self, i: int) -> int:
        if self.reduce_idx is None:
            return 1
        else:
            reduce_exp, recover_exp = 0, 0
            for exp, rd_idx in enumerate(self.reduce_idx):
                if i >= rd_idx:
                    reduce_exp = exp + 1
            if self.recover_idx is not None:
                for exp, rc_idx in enumerate(self.recover_idx):
                    if i >= rc_idx:
                        recover_exp = exp + 1
            return int(2**(reduce_exp - recover_exp))

    def forward_chunk(
            self,
            xs: paddle.Tensor,
            offset: int,
            required_cache_size: int,
            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
            att_mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """ Forward just one chunk

        Args:
            xs (paddle.Tensor): chunk input, with shape (b=1, time, mel-dim),
                where `time == (chunk_size - 1) * subsample_rate + \
                        subsample.right_context + 1`
            offset (int): current offset in encoder output time stamp
            required_cache_size (int): cache size required for next chunk
                compuation
                >=0: actual cache size
                <0: means all history cache is required
            att_cache (paddle.Tensor): cache tensor for KEY & VALUE in
                transformer/conformer attention, with shape
                (elayers, head, cache_t1, d_k * 2), where
                `head * d_k == hidden-dim` and
                `cache_t1 == chunk_size * num_decoding_left_chunks`.
            cnn_cache (paddle.Tensor): cache tensor for cnn_module in conformer,
                (elayers, b=1, hidden-dim, cache_t2), where
                `cache_t2 == cnn.lorder - 1`

        Returns:
            paddle.Tensor: output of current input xs,
                with shape (b=1, chunk_size, hidden-dim).
            paddle.Tensor: new attention cache required for next chunk, with
                dynamic shape (elayers, head, ?, d_k * 2)
                depending on required_cache_size.
            paddle.Tensor: new conformer cnn cache required for next chunk, with
                same shape as the original cnn_cache.
        """
        assert xs.shape[0] == 1  # batch size must be one

        if self.global_cmvn is not None:
            xs = self.global_cmvn(xs)

        # tmp_masks is just for interface compatibility, [B=1, C=1, T]
        tmp_masks = paddle.ones([1, 1, xs.shape[1]], dtype=paddle.bool)
        # before embed, xs=(B, T, D1), pos_emb=(B=1, T, D)
        xs, pos_emb, _ = self.embed(xs, tmp_masks, offset=offset)

        # NOTE(xcsong): After  embed, shape(xs) is (b=1, chunk_size, hidden-dim)
        elayers, cache_t1 = att_cache.shape[0], att_cache.shape[2]
        chunk_size = xs.shape[1]
        attention_key_size = cache_t1 + chunk_size
        pos_emb = self.embed.position_encoding(
            offset=offset - cache_t1, size=attention_key_size)
        if required_cache_size < 0:
            next_cache_start = 0
        elif required_cache_size == 0:
            next_cache_start = attention_key_size
        else:
            next_cache_start = max(attention_key_size - required_cache_size, 0)

        r_att_cache = []
        r_cnn_cache = []

        mask_pad = paddle.ones([1, xs.shape[1]], dtype=paddle.bool)
        mask_pad = mask_pad.unsqueeze(1)
        max_att_len: int = 0
        recover_activations: \
            List[Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]] = []
        index = 0
        xs_lens = paddle.to_tensor([xs.shape[1]], dtype=paddle.int32)
        xs = self.preln(xs)
        for i, layer in enumerate(self.encoders):
            # NOTE(xcsong): Before layer.forward
            #   shape(att_cache[i:i + 1]) is (1, head, cache_t1, d_k * 2),
            #   shape(cnn_cache[i])       is (b=1, hidden-dim, cache_t2)
            if self.reduce_idx is not None:
                if self.time_reduce is not None and i in self.reduce_idx:
                    recover_activations.append(
                        (xs, att_mask, pos_emb, mask_pad))
                    xs, xs_lens, att_mask, mask_pad = self.time_reduction_layer(
                        xs, xs_lens, att_mask, mask_pad)
                    pos_emb = pos_emb[:, ::2, :]
                    index += 1

            if self.recover_idx is not None:
                if self.time_reduce == 'recover' and i in self.recover_idx:
                    index -= 1
                    recover_tensor, recover_att_mask, recover_pos_emb, recover_mask_pad = recover_activations[
                        index]
                    # recover output length for ctc decode
                    xs = paddle.repeat_interleave(xs, repeats=2, axis=1)
                    xs = self.time_recover_layer(xs)
                    recoverd_t = recover_tensor.shape[1]
                    xs = recover_tensor + xs[:, :recoverd_t, :]
                    att_mask = recover_att_mask
                    pos_emb = recover_pos_emb
                    mask_pad = recover_mask_pad

            factor = self.calculate_downsampling_factor(i)
            att_cache1 = att_cache[
                i:i + 1][:, :, ::factor, :][:, :, :pos_emb.shape[1] - xs.shape[
                    1], :]
            cnn_cache1 = cnn_cache[i] if cnn_cache.shape[0] > 0 else cnn_cache
            xs, _, new_att_cache, new_cnn_cache = layer(
                xs,
                att_mask,
                pos_emb,
                att_cache=att_cache1,
                cnn_cache=cnn_cache1)
            # NOTE(xcsong): After layer.forward
            #   shape(new_att_cache) is (1, head, attention_key_size, d_k * 2),
            #   shape(new_cnn_cache) is (b=1, hidden-dim, cache_t2)
            cached_att = new_att_cache[:, :, next_cache_start // factor:, :]
            cached_cnn = new_cnn_cache.unsqueeze(0)
            cached_att = cached_att.repeat_interleave(repeats=factor, axis=2)
            if i == 0:
                # record length for the first block as max length
                max_att_len = cached_att.shape[2]
            r_att_cache.append(cached_att[:, :, :max_att_len, :])
            r_cnn_cache.append(cached_cnn)
        # NOTE(xcsong): shape(r_att_cache) is (elayers, head, ?, d_k * 2),
        #   ? may be larger than cache_t1, it depends on required_cache_size
        r_att_cache = paddle.concat(r_att_cache, axis=0)
        # NOTE(xcsong): shape(r_cnn_cache) is (e, b=1, hidden-dim, cache_t2)
        r_cnn_cache = paddle.concat(r_cnn_cache, axis=0)

        if self.final_proj is not None:
            xs = self.final_proj(xs)
        return xs, r_att_cache, r_cnn_cache


================================================
FILE: paddlespeech/s2t/modules/encoder_layer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""Encoder self-attention layer definition."""
from typing import Optional
from typing import Tuple

import paddle
from paddle import nn

from paddlespeech.s2t.modules.align import LayerNorm
from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = [
    "TransformerEncoderLayer", "ConformerEncoderLayer",
    "SqueezeformerEncoderLayer"
]


class TransformerEncoderLayer(nn.Layer):
    """Encoder layer module."""

    def __init__(
            self,
            size: int,
            self_attn: nn.Layer,
            feed_forward: nn.Layer,
            dropout_rate: float,
            normalize_before: bool=True,
            concat_after: bool=False, ):
        """Construct an EncoderLayer object.

        Args:
            size (int): Input dimension.
            self_attn (nn.Layer): Self-attention module instance.
                `MultiHeadedAttention`, `RelPositionMultiHeadedAttention` or `RoPERelPositionMultiHeadedAttention`
                instance can be used as the argument.
            feed_forward (nn.Layer): Feed-forward module instance.
                `PositionwiseFeedForward`, instance can be used as the argument.
            dropout_rate (float): Dropout rate.
            normalize_before (bool):
                True: use layer_norm before each sub-block.
                False: to use layer_norm after each sub-block.
            concat_after (bool): Whether to concat attention layer's input and
                output.
                True: x -> x + linear(concat(x, att(x)))
                False: x -> x + att(x)
        """
        super().__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.norm1 = LayerNorm(size, epsilon=1e-12)
        self.norm2 = LayerNorm(size, epsilon=1e-12)
        self.dropout = nn.Dropout(dropout_rate)
        self.size = size
        self.normalize_before = normalize_before
        self.concat_after = concat_after
        # concat_linear may be not used in forward fuction,
        # but will be saved in the *.pt
        self.concat_linear = Linear(size + size, size)

    def forward(
            self,
            x: paddle.Tensor,
            mask: paddle.Tensor,
            pos_emb: paddle.Tensor,
            mask_pad: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Compute encoded features.
        Args:
            x (paddle.Tensor): (#batch, time, size)
            mask (paddle.Tensor): Mask tensor for the input (#batch, time，time),
                (0, 0, 0) means fake mask.
            pos_emb (paddle.Tensor): just for interface compatibility
                to ConformerEncoderLayer
            mask_pad (paddle.Tensor): does not used in transformer layer,
                just for unified api with conformer.
            att_cache (paddle.Tensor): Cache tensor of the KEY & VALUE
                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
            cnn_cache (paddle.Tensor): Convolution cache in conformer layer
                (#batch=1, size, cache_t2), not used here, it's for interface
                compatibility to ConformerEncoderLayer.
        Returns:
            paddle.Tensor: Output tensor (#batch, time, size).
            paddle.Tensor: Mask tensor (#batch, time, time).
            paddle.Tensor: att_cache tensor,
                (#batch=1, head, cache_t1 + time, d_k * 2).
            paddle.Tensor: cnn_cahce tensor (#batch=1, size, cache_t2).
        """
        residual = x
        if self.normalize_before:
            x = self.norm1(x)

        x_att, new_att_cache = self.self_attn(x, x, x, mask, cache=att_cache)

        if self.concat_after:
            x_concat = paddle.concat((x, x_att), axis=-1)
            x = residual + self.concat_linear(x_concat)
        else:
            x = residual + self.dropout(x_att)
        if not self.normalize_before:
            x = self.norm1(x)

        residual = x
        if self.normalize_before:
            x = self.norm2(x)
        x = residual + self.dropout(self.feed_forward(x))
        if not self.normalize_before:
            x = self.norm2(x)

        fake_cnn_cache = paddle.zeros([0, 0, 0], dtype=x.dtype)
        return x, mask, new_att_cache, fake_cnn_cache


class ConformerEncoderLayer(nn.Layer):
    """Encoder layer module."""

    def __init__(
            self,
            size: int,
            self_attn: nn.Layer,
            feed_forward: Optional[nn.Layer]=None,
            feed_forward_macaron: Optional[nn.Layer]=None,
            conv_module: Optional[nn.Layer]=None,
            dropout_rate: float=0.1,
            normalize_before: bool=True,
            concat_after: bool=False, ):
        """Construct an EncoderLayer object.

        Args:
            size (int): Input dimension.
            self_attn (nn.Layer): Self-attention module instance.
                `MultiHeadedAttention`, `RelPositionMultiHeadedAttention` or `RoPERelPositionMultiHeadedAttention`
                instance can be used as the argument.
            feed_forward (nn.Layer): Feed-forward module instance.
                `PositionwiseFeedForward` instance can be used as the argument.
            feed_forward_macaron (nn.Layer): Additional feed-forward module
                instance.
                `PositionwiseFeedForward` instance can be used as the argument.
            conv_module (nn.Layer): Convolution module instance.
                `ConvlutionModule` instance can be used as the argument.
            dropout_rate (float): Dropout rate.
            normalize_before (bool):
                True: use layer_norm before each sub-block.
                False: use layer_norm after each sub-block.
            concat_after (bool): Whether to concat attention layer's input and
                output.
                True: x -> x + linear(concat(x, att(x)))
                False: x -> x + att(x)
        """
        super().__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.feed_forward_macaron = feed_forward_macaron
        self.conv_module = conv_module
        self.norm_ff = LayerNorm(size, epsilon=1e-12)  # for the FNN module
        self.norm_mha = LayerNorm(size, epsilon=1e-12)  # for the MHA module
        if feed_forward_macaron is not None:
            self.norm_ff_macaron = LayerNorm(size, epsilon=1e-12)
            self.ff_scale = 0.5
        else:
            self.ff_scale = 1.0
        if self.conv_module is not None:
            self.norm_conv = LayerNorm(
                size, epsilon=1e-12)  # for the CNN module
            self.norm_final = LayerNorm(
                size, epsilon=1e-12)  # for the final output of the block
        self.dropout = nn.Dropout(dropout_rate)
        self.size = size
        self.normalize_before = normalize_before
        self.concat_after = concat_after
        if self.concat_after:
            self.concat_linear = Linear(size + size, size)
        else:
            self.concat_linear = nn.Identity()

    def forward(
            self,
            x: paddle.Tensor,
            mask: paddle.Tensor,
            pos_emb: paddle.Tensor,
            mask_pad: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0])
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Compute encoded features.
        Args:
            x (paddle.Tensor): Input tensor (#batch, time, size).
            mask (paddle.Tensor): Mask tensor for the input (#batch, time, time).
                (0,0,0) means fake mask.
            pos_emb (paddle.Tensor): postional encoding, must not be None 
                for ConformerEncoderLayer
            mask_pad (paddle.Tensor): batch padding mask used for conv module.
               (#batch, 1，time), (0, 0, 0) means fake mask.
            att_cache (paddle.Tensor): Cache tensor of the KEY & VALUE
                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
            cnn_cache (paddle.Tensor): Convolution cache in conformer layer
                (1, #batch=1, size, cache_t2). First dim will not be used, just
                for dy2st.
        Returns:
           paddle.Tensor: Output tensor (#batch, time, size).
           paddle.Tensor: Mask tensor (#batch, time, time).
           paddle.Tensor: att_cache tensor,
                (#batch=1, head, cache_t1 + time, d_k * 2).
           paddle.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
        """
        # (1, #batch=1, size, cache_t2) -> (#batch=1, size, cache_t2)
        cnn_cache = paddle.squeeze(cnn_cache, axis=0)

        # whether to use macaron style FFN
        if self.feed_forward_macaron is not None:
            residual = x
            if self.normalize_before:
                x = self.norm_ff_macaron(x)
            x = residual + self.ff_scale * self.dropout(
                self.feed_forward_macaron(x))
            if not self.normalize_before:
                x = self.norm_ff_macaron(x)

        # multi-headed self-attention module
        residual = x
        if self.normalize_before:
            x = self.norm_mha(x)

        x_att, new_att_cache = self.self_attn(
            x, x, x, mask, pos_emb, cache=att_cache)

        if self.concat_after:
            x_concat = paddle.concat((x, x_att), axis=-1)
            x = residual + self.concat_linear(x_concat)
        else:
            x = residual + self.dropout(x_att)

        if not self.normalize_before:
            x = self.norm_mha(x)

        # convolution module
        # Fake new cnn cache here, and then change it in conv_module
        new_cnn_cache = paddle.zeros([0, 0, 0], dtype=x.dtype)
        if self.conv_module is not None:
            residual = x
            if self.normalize_before:
                x = self.norm_conv(x)

            x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
            x = residual + self.dropout(x)

            if not self.normalize_before:
                x = self.norm_conv(x)

        # feed forward module
        residual = x
        if self.normalize_before:
            x = self.norm_ff(x)

        x = residual + self.ff_scale * self.dropout(self.feed_forward(x))

        if not self.normalize_before:
            x = self.norm_ff(x)

        if self.conv_module is not None:
            x = self.norm_final(x)

        return x, mask, new_att_cache, new_cnn_cache


class SqueezeformerEncoderLayer(nn.Layer):
    """Encoder layer module."""

    def __init__(self,
                 size: int,
                 self_attn: paddle.nn.Layer,
                 feed_forward1: Optional[nn.Layer]=None,
                 conv_module: Optional[nn.Layer]=None,
                 feed_forward2: Optional[nn.Layer]=None,
                 normalize_before: bool=False,
                 dropout_rate: float=0.1,
                 concat_after: bool=False):
        """Construct an EncoderLayer object.

        Args:
            size (int): Input dimension.
            self_attn (paddle.nn.Layer): Self-attention module instance.
                `MultiHeadedAttention`, `RelPositionMultiHeadedAttention` or `RoPERelPositionMultiHeadedAttention`
                instance can be used as the argument.
            feed_forward1 (paddle.nn.Layer): Feed-forward module instance.
                `PositionwiseFeedForward` instance can be used as the argument.
            conv_module (paddle.nn.Layer): Convolution module instance.
                `ConvlutionLayer` instance can be used as the argument.
            feed_forward2 (paddle.nn.Layer): Feed-forward module instance.
                `PositionwiseFeedForward` instance can be used as the argument.
            dropout_rate (float): Dropout rate.
            normalize_before (bool):
                True: use layer_norm before each sub-block.
                False: use layer_norm after each sub-block.
        """
        super().__init__()
        self.size = size
        self.self_attn = self_attn
        self.layer_norm1 = LayerNorm(size)
        self.ffn1 = feed_forward1
        self.layer_norm2 = LayerNorm(size)
        self.conv_module = conv_module
        self.layer_norm3 = LayerNorm(size)
        self.ffn2 = feed_forward2
        self.layer_norm4 = LayerNorm(size)
        self.normalize_before = normalize_before
        self.dropout = nn.Dropout(dropout_rate)
        self.concat_after = concat_after
        if concat_after:
            self.concat_linear = Linear(size + size, size)
        else:
            self.concat_linear = nn.Identity()

    def forward(
            self,
            x: paddle.Tensor,
            mask: paddle.Tensor,
            pos_emb: paddle.Tensor,
            mask_pad: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
            att_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
            cnn_cache: paddle.Tensor=paddle.zeros([0, 0, 0, 0]),
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Compute encoded features.
        Args:
            x (paddle.Tensor): Input tensor (#batch, time, size).
            mask (paddle.Tensor): Mask tensor for the input (#batch, time, time).
                (0,0,0) means fake mask.
            pos_emb (paddle.Tensor): postional encoding, must not be None
                for ConformerEncoderLayer
            mask_pad (paddle.Tensor): batch padding mask used for conv module.
               (#batch, 1，time), (0, 0, 0) means fake mask.
            att_cache (paddle.Tensor): Cache tensor of the KEY & VALUE
                (#batch=1, head, cache_t1, d_k * 2), head * d_k == size.
            cnn_cache (paddle.Tensor): Convolution cache in conformer layer
                (1, #batch=1, size, cache_t2). First dim will not be used, just
                for dy2st.
        Returns:
           paddle.Tensor: Output tensor (#batch, time, size).
           paddle.Tensor: Mask tensor (#batch, time, time).
           paddle.Tensor: att_cache tensor,
                (#batch=1, head, cache_t1 + time, d_k * 2).
           paddle.Tensor: cnn_cahce tensor (#batch, size, cache_t2).
        """
        # self attention module
        residual = x
        if self.normalize_before:
            x = self.layer_norm1(x)
        x_att, new_att_cache = self.self_attn(x, x, x, mask, pos_emb, att_cache)
        if self.concat_after:
            x_concat = paddle.concat((x, x_att), axis=-1)
            x = residual + self.concat_linear(x_concat)
        else:
            x = residual + self.dropout(x_att)
        if not self.normalize_before:
            x = self.layer_norm1(x)

        # ffn module
        residual = x
        if self.normalize_before:
            x = self.layer_norm2(x)
        x = self.ffn1(x)
        x = residual + self.dropout(x)
        if not self.normalize_before:
            x = self.layer_norm2(x)

        # conv module
        residual = x
        if self.normalize_before:
            x = self.layer_norm3(x)
        x, new_cnn_cache = self.conv_module(x, mask_pad, cnn_cache)
        x = residual + self.dropout(x)
        if not self.normalize_before:
            x = self.layer_norm3(x)

        # ffn module
        residual = x
        if self.normalize_before:
            x = self.layer_norm4(x)
        x = self.ffn2(x)
        # we do not use dropout here since it is inside feed forward function
        x = residual + self.dropout(x)
        if not self.normalize_before:
            x = self.layer_norm4(x)

        return x, mask, new_att_cache, new_cnn_cache


================================================
FILE: paddlespeech/s2t/modules/fbank.py
================================================
import paddle
from paddle import nn

from paddlespeech.audio.compliance import kaldi
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ['KaldiFbank']


class KaldiFbank(nn.Layer):
    def __init__(
            self,
            fs=16000,
            n_mels=80,
            n_shift=160,  # unit:sample, 10ms
            win_length=400,  # unit:sample, 25ms
            energy_floor=0.0,
            dither=0.0):
        """
        Args:
            fs (int): sample rate of the audio
            n_mels (int): number of mel filter banks
            n_shift (int): number of points in a frame shift
            win_length (int): number of points in a frame windows
            energy_floor (float): Floor on energy in Spectrogram computation (absolute)
            dither (float): Dithering constant. Default 0.0
        """
        super().__init__()
        self.fs = fs
        self.n_mels = n_mels
        num_point_ms = fs / 1000
        self.n_frame_length = win_length / num_point_ms
        self.n_frame_shift = n_shift / num_point_ms
        self.energy_floor = energy_floor
        self.dither = dither

    def __repr__(self):
        return (
            "{name}(fs={fs}, n_mels={n_mels}, "
            "n_frame_shift={n_frame_shift}, n_frame_length={n_frame_length}, "
            "dither={dither}))".format(
                name=self.__class__.__name__,
                fs=self.fs,
                n_mels=self.n_mels,
                n_frame_shift=self.n_frame_shift,
                n_frame_length=self.n_frame_length,
                dither=self.dither, ))

    def forward(self, x: paddle.Tensor):
        """
        Args:
            x (paddle.Tensor): shape (Ti). 
                Not support: [Time, Channel] and Batch mode.

        Returns:
            paddle.Tensor: (T, D)
        """
        assert x.ndim == 1

        feat = kaldi.fbank(
            x.unsqueeze(0),  # append channel dim, (C, Ti)
            n_mels=self.n_mels,
            frame_length=self.n_frame_length,
            frame_shift=self.n_frame_shift,
            dither=self.dither,
            energy_floor=self.energy_floor,
            sr=self.fs)

        assert feat.ndim == 2  # (T,D)
        return feat


================================================
FILE: paddlespeech/s2t/modules/initializer.py
================================================
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


class DefaultInitializerContext(object):
    """
        egs:
        with DefaultInitializerContext("kaiming_uniform"):
            code for setup_model
    """

    def __init__(self, init_type=None):
        self.init_type = init_type

    def __enter__(self):
        if self.init_type is None:
            return
        else:
            from paddlespeech.s2t.modules import align
            align.global_init_type = self.init_type
            return

    def __exit__(self, exc_type, exc_val, exc_tb):
        from paddlespeech.s2t.modules import align
        align.global_init_type = None


================================================
FILE: paddlespeech/s2t/modules/loss.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
import inspect

import paddle
from paddle import nn
from paddle.nn import functional as F

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ['CTCLoss', "LabelSmoothingLoss"]


class CTCLoss(nn.Layer):
    def __init__(self,
                 blank=0,
                 reduction='sum',
                 batch_average=False,
                 grad_norm_type=None):
        super().__init__()
        # last token id as blank id
        self.loss = nn.CTCLoss(blank=blank, reduction=reduction)
        self.batch_average = batch_average

        logger.debug(
            f"CTCLoss Loss reduction: {reduction}, div-bs: {batch_average}")
        logger.debug(f"CTCLoss Grad Norm Type: {grad_norm_type}")

        assert grad_norm_type in ('instance', 'batch', 'frame', None)
        self.norm_by_times = False
        self.norm_by_batchsize = False
        self.norm_by_total_logits_len = False
        if grad_norm_type is None:
            # no grad norm
            pass
        elif grad_norm_type == 'instance':
            self.norm_by_times = True
        elif grad_norm_type == 'batch':
            self.norm_by_batchsize = True
        elif grad_norm_type == 'frame':
            self.norm_by_total_logits_len = True
        else:
            raise ValueError(f"CTCLoss Grad Norm no support {grad_norm_type}")
        kwargs = {
            "norm_by_times": self.norm_by_times,
            "norm_by_batchsize": self.norm_by_batchsize,
            "norm_by_total_logits_len": self.norm_by_total_logits_len,
        }

        # Derive only the args which the func has
        try:
            param = inspect.signature(self.loss.forward).parameters
        except ValueError:
            # Some function, e.g. built-in function, are failed
            param = {}
        self._kwargs = {k: v for k, v in kwargs.items() if k in param}
        _notin = {k: v for k, v in kwargs.items() if k not in param}
        logger.debug(
            f"{self.loss} kwargs:{self._kwargs}, not support: {_notin}")

    def forward(self, logits, ys_pad, hlens, ys_lens):
        """Compute CTC loss.

        Args:
            logits ([paddle.Tensor]): [B, Tmax, D]
            ys_pad ([paddle.Tensor]): [B, Tmax]
            hlens ([paddle.Tensor]): [B]
            ys_lens ([paddle.Tensor]): [B]

        Returns:
            [paddle.Tensor]: scalar. If reduction is 'none', then (N), where N = \text{batch size}.
        """
        B = logits.shape[0]
        # warp-ctc need logits, and do softmax on logits by itself
        # warp-ctc need activation with shape [T, B, V + 1]
        # logits: (B, L, D) -> (L, B, D)
        logits = logits.transpose([1, 0, 2])
        ys_pad = ys_pad.astype(paddle.int32)
        loss = self.loss(logits, ys_pad, hlens, ys_lens, **self._kwargs)
        if self.batch_average:
            # Batch-size average
            loss = loss / B
        return loss


class LabelSmoothingLoss(nn.Layer):
    """Label-smoothing loss.
    In a standard CE loss, the label's data distribution is:
        [0,1,2] ->
        [
            [1.0, 0.0, 0.0],
            [0.0, 1.0, 0.0],
            [0.0, 0.0, 1.0],
        ]
    In the smoothing version CE Loss,some probabilities
    are taken from the true label prob (1.0) and are divided
    among other labels.
        e.g.
        smoothing=0.1
        [0,1,2] ->
        [
            [0.9, 0.05, 0.05],
            [0.05, 0.9, 0.05],
            [0.05, 0.05, 0.9],
        ]

    """

    def __init__(self,
                 size: int,
                 padding_idx: int,
                 smoothing: float,
                 normalize_length: bool=False):
        """Label-smoothing loss.

        Args:
            size (int): the number of class
            padding_idx (int): padding class id which will be ignored for loss
            smoothing (float): smoothing rate (0.0 means the conventional CE)
            normalize_length (bool):
                True, normalize loss by sequence length;
                False, normalize loss by batch size.
                Defaults to False.
        """
        super().__init__()
        self.size = size
        self.padding_idx = padding_idx
        self.smoothing = smoothing
        self.confidence = 1.0 - smoothing
        self.normalize_length = normalize_length
        self.criterion = nn.KLDivLoss(reduction="none")

    def forward(self, x: paddle.Tensor, target: paddle.Tensor) -> paddle.Tensor:
        """Compute loss between x and target.
        The model outputs and data labels tensors are flatten to
        (batch*seqlen, class) shape and a mask is applied to the
        padding part which should not be calculated for loss.

        Args:
            x (paddle.Tensor): prediction (batch, seqlen, class)
            target (paddle.Tensor):
                target signal masked with self.padding_id (batch, seqlen)
        Returns:
            loss (paddle.Tensor) : The KL loss, scalar float value
        """
        B, T, D = x.shape
        assert D == self.size
        x = x.reshape((-1, self.size))
        target = target.reshape([-1])

        # use zeros_like instead of torch.no_grad() for true_dist,
        # since no_grad() can not be exported by JIT
        true_dist = paddle.full_like(x, self.smoothing / (self.size - 1))
        ignore = target == self.padding_idx  # (B,)

        #TODO(Hui Zhang): target = target * (1 - ignore)  # avoid -1 index
        target = target.masked_fill(ignore, 0)  # avoid -1 index
        # true_dist.scatter_(1, target.unsqueeze(1), self.confidence)
        target_mask = F.one_hot(target, self.size)
        true_dist *= (1 - target_mask)
        true_dist += target_mask * self.confidence

        kl = self.criterion(F.log_softmax(x, axis=1), true_dist)

        #TODO(Hui Zhang): sum not support bool type
        #total = len(target) - int(ignore.sum())
        total = len(target) - int(ignore.type_as(target).sum())
        denom = total if self.normalize_length else B
        #numer = (kl * (1 - ignore)).sum()
        numer = kl.masked_fill(ignore.unsqueeze(1), 0).sum()
        return numer / denom


================================================
FILE: paddlespeech/s2t/modules/mask.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
import paddle

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = [
    "make_xs_mask", "make_pad_mask", "make_non_pad_mask", "subsequent_mask",
    "subsequent_chunk_mask", "add_optional_chunk_mask", "mask_finished_scores",
    "mask_finished_preds"
]


def make_xs_mask(xs: paddle.Tensor, pad_value=0.0) -> paddle.Tensor:
    """Maks mask tensor containing indices of non-padded part.
    Args:
        xs (paddle.Tensor): (B, T, D), zeros for pad.
    Returns:
        paddle.Tensor: Mask Tensor indices of non-padded part. (B, T)
    """
    pad_frame = paddle.full([1, 1, xs.shape[-1]], pad_value, dtype=xs.dtype)
    mask = xs != pad_frame
    mask = mask.all(axis=-1)
    return mask


def make_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor:
    """Make mask tensor containing indices of padded part.
    See description of make_non_pad_mask.
    Args:
        lengths (paddle.Tensor): Batch of lengths (B,).
    Returns:
        paddle.Tensor: Mask tensor containing indices of padded part.
        (B, T)
    Examples:
        >>> lengths = [5, 3, 2]
        >>> make_pad_mask(lengths)
        masks = [[0, 0, 0, 0 ,0],
                 [0, 0, 0, 1, 1],
                 [0, 0, 1, 1, 1]]
    """
    # (TODO: Hui Zhang): jit not support Tensor.dim() and Tensor.ndim
    # assert lengths.dim() == 1
    batch_size = int(lengths.shape[0])
    max_len = int(lengths.max())
    seq_range = paddle.arange(0, max_len, dtype=paddle.int64)
    seq_range_expand = seq_range.unsqueeze(0).expand([batch_size, max_len])
    seq_length_expand = lengths.unsqueeze(-1)
    mask = seq_range_expand >= seq_length_expand
    return mask


def make_non_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor:
    """Make mask tensor containing indices of non-padded part.
    The sequences in a batch may have different lengths. To enable
    batch computing, padding is need to make all sequence in same
    size. To avoid the padding part pass value to context dependent
    block such as attention or convolution , this padding part is
    masked.
    This pad_mask is used in both encoder and decoder.
    1 for non-padded part and 0 for padded part.
    Args:
        lengths (paddle.Tensor): Batch of lengths (B,).
    Returns:
        paddle.Tensor: mask tensor containing indices of padded part.
        (B, T)
    Examples:
        >>> lengths = [5, 3, 2]
        >>> make_non_pad_mask(lengths)
        masks = [[1, 1, 1, 1 ,1],
                 [1, 1, 1, 0, 0],
                 [1, 1, 0, 0, 0]]
    """
    #return ~make_pad_mask(lengths)
    return make_pad_mask(lengths).logical_not()


def subsequent_mask(size: int) -> paddle.Tensor:
    """Create mask for subsequent steps (size, size).
    This mask is used only in decoder which works in an auto-regressive mode.
    This means the current step could only do attention with its left steps.
    In encoder, fully attention is used when streaming is not necessary and
    the sequence is not long. In this case, no attention mask is needed.
    When streaming is need, chunk-based attention is used in encoder. See
    subsequent_chunk_mask for the chunk-based attention mask.
    Args:
        size (int): size of mask
    Returns:
        paddle.Tensor: mask, [size, size]
    Examples:
        >>> subsequent_mask(3)
        [[1, 0, 0],
         [1, 1, 0],
         [1, 1, 1]]
    """
    ret = paddle.ones([size, size], dtype=paddle.bool)
    return paddle.tril(ret)


def subsequent_chunk_mask(
        size: int,
        chunk_size: int,
        num_left_chunks: int=-1, ) -> paddle.Tensor:
    """Create mask for subsequent steps (size, size) with chunk size,
       this is for streaming encoder
    Args:
        size (int): size of mask
        chunk_size (int): size of chunk
        num_left_chunks (int): number of left chunks
            <0: use full chunk
            >=0: use num_left_chunks
    Returns:
        paddle.Tensor: mask, [size, size]
    Examples:
        >>> subsequent_chunk_mask(4, 2)
        [[1, 1, 0, 0],
         [1, 1, 0, 0],
         [1, 1, 1, 1],
         [1, 1, 1, 1]]
    """
    ret = paddle.zeros([size, size], dtype=paddle.bool)
    for i in range(size):
        if num_left_chunks < 0:
            start = 0
        else:
            start = max(0, (i // chunk_size - num_left_chunks) * chunk_size)
        ending = min(size, (i // chunk_size + 1) * chunk_size)
        ret[i, start:ending] = True
    return ret


def add_optional_chunk_mask(xs: paddle.Tensor,
                            masks: paddle.Tensor,
                            use_dynamic_chunk: bool,
                            use_dynamic_left_chunk: bool,
                            decoding_chunk_size: int,
                            static_chunk_size: int,
                            num_decoding_left_chunks: int):
    """ Apply optional mask for encoder.
    Args:
        xs (paddle.Tensor): padded input, (B, L, D), L for max length
        mask (paddle.Tensor): mask for xs, (B, 1, L)
        use_dynamic_chunk (bool): whether to use dynamic chunk or not
        use_dynamic_left_chunk (bool): whether to use dynamic left chunk for
            training.
        decoding_chunk_size (int): decoding chunk size for dynamic chunk, it's
            0: default for training, use random dynamic chunk.
            <0: for decoding, use full chunk.
            >0: for decoding, use fixed chunk size as set.
        static_chunk_size (int): chunk size for static chunk training/decoding
            if it's greater than 0, if use_dynamic_chunk is true,
            this parameter will be ignored
        num_decoding_left_chunks (int): number of left chunks, this is for decoding,
            the chunk size is decoding_chunk_size.
            >=0: use num_decoding_left_chunks
            <0: use all left chunks
    Returns:
        paddle.Tensor: chunk mask of the input xs.
    """
    # Whether to use chunk mask or not
    if use_dynamic_chunk:
        max_len = xs.shape[1]
        if decoding_chunk_size < 0:
            chunk_size = max_len
            num_left_chunks = -1
        elif decoding_chunk_size > 0:
            chunk_size = decoding_chunk_size
            num_left_chunks = num_decoding_left_chunks
        else:
            # chunk size is either [1, 25] or full context(max_len).
            # Since we use 4 times subsampling and allow up to 1s(100 frames)
            # delay, the maximum frame is 100 / 4 = 25.
            chunk_size = int(paddle.randint(1, max_len, (1, )))
            num_left_chunks = -1
            if chunk_size > max_len // 2:
                chunk_size = max_len
            else:
                chunk_size = chunk_size % 25 + 1
                if use_dynamic_left_chunk:
                    max_left_chunks = (max_len - 1) // chunk_size
                    num_left_chunks = int(
                        paddle.randint(0, max_left_chunks, (1, )))
        chunk_masks = subsequent_chunk_mask(xs.shape[1], chunk_size,
                                            num_left_chunks)  # (L, L)
        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
        # chunk_masks = masks & chunk_masks  # (B, L, L)
        chunk_masks = masks.logical_and(chunk_masks)  # (B, L, L)
    elif static_chunk_size > 0:
        num_left_chunks = num_decoding_left_chunks
        chunk_masks = subsequent_chunk_mask(xs.shape[1], static_chunk_size,
                                            num_left_chunks)  # (L, L)
        chunk_masks = chunk_masks.unsqueeze(0)  # (1, L, L)
        # chunk_masks = masks & chunk_masks  # (B, L, L)
        chunk_masks = masks.logical_and(chunk_masks)  # (B, L, L)
    else:
        chunk_masks = masks
    return chunk_masks


def mask_finished_scores(score: paddle.Tensor,
                         flag: paddle.Tensor) -> paddle.Tensor:
    """
    If a sequence is finished, we only allow one alive branch. This function
    aims to give one branch a zero score and the rest -inf score.
    Args:
        score (paddle.Tensor): A real value array with shape
            (batch_size * beam_size, beam_size).
        flag (paddle.Tensor): A bool array with shape
            (batch_size * beam_size, 1).
    Returns:
        paddle.Tensor: (batch_size * beam_size, beam_size).
    Examples:
        flag: tensor([[ True],
                      [False]])
        score: tensor([[-0.3666, -0.6664,  0.6019],
                       [-1.1490, -0.2948,  0.7460]])
        unfinished: tensor([[False,  True,  True],
                            [False, False, False]])
        finished: tensor([[ True, False, False],
                          [False, False, False]])
        return: tensor([[ 0.0000,    -inf,    -inf],
                        [-1.1490, -0.2948,  0.7460]])
    """
    beam_size = score.shape[-1]
    zero_mask = paddle.zeros_like(flag, dtype=paddle.bool)
    if beam_size > 1:
        unfinished = paddle.concat(
            (zero_mask, flag.tile([1, beam_size - 1])), axis=1)
        finished = paddle.concat(
            (flag, zero_mask.tile([1, beam_size - 1])), axis=1)
    else:
        unfinished = zero_mask
        finished = flag

    # infs = paddle.ones_like(score) * -float('inf')
    # score = paddle.where(unfinished, infs, score)
    # score = paddle.where(finished, paddle.zeros_like(score), score)
    score.masked_fill_(unfinished, -float('inf'))
    score.masked_fill_(finished, 0)
    return score


def mask_finished_preds(pred: paddle.Tensor, flag: paddle.Tensor,
                        eos: int) -> paddle.Tensor:
    """
    If a sequence is finished, all of its branch should be <eos>
    Args:
        pred (paddle.Tensor): A int array with shape
            (batch_size * beam_size, beam_size).
        flag (paddle.Tensor): A bool array with shape
            (batch_size * beam_size, 1).
    Returns:
        paddle.Tensor: (batch_size * beam_size).
    """
    beam_size = pred.shape[-1]
    finished = flag.repeat(1, beam_size)
    return pred.masked_fill_(finished, eos)


================================================
FILE: paddlespeech/s2t/modules/positionwise_feed_forward.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""Positionwise feed forward layer definition."""
import paddle
from paddle import nn
from paddle.nn import initializer as I

from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ["PositionwiseFeedForward"]


class PositionwiseFeedForward(nn.Layer):
    """Positionwise feed forward layer."""

    def __init__(self,
                 idim: int,
                 hidden_units: int,
                 dropout_rate: float,
                 activation: nn.Layer=nn.ReLU(),
                 adaptive_scale: bool=False,
                 init_weights: bool=False):
        """Construct a PositionwiseFeedForward object.

        FeedForward are appied on each position of the sequence.
        The output dim is same with the input dim.

        Args:
            idim (int): Input dimenstion.
            hidden_units (int): The number of hidden units.
            dropout_rate (float): Dropout rate.
            activation (paddle.nn.Layer): Activation function
        """
        super().__init__()
        self.idim = idim
        self.hidden_units = hidden_units
        self.w_1 = Linear(idim, hidden_units)
        self.activation = activation
        self.dropout = nn.Dropout(dropout_rate)
        self.w_2 = Linear(hidden_units, idim)
        self.adaptive_scale = adaptive_scale
        if self.adaptive_scale:
            ada_scale = self.create_parameter(
                [1, 1, idim], default_initializer=I.XavierUniform())
            self.add_parameter('ada_scale', ada_scale)
            ada_bias = self.create_parameter(
                [1, 1, idim], default_initializer=I.XavierUniform())
            self.add_parameter('ada_bias', ada_bias)

        if init_weights:
            self.init_weights()

    def init_weights(self):
        ffn1_max = self.idim**-0.5
        ffn2_max = self.hidden_units**-0.5
        self.w_1._param_attr = paddle.nn.initializer.Uniform(
            low=-ffn1_max, high=ffn1_max)
        self.w_1._bias_attr = paddle.nn.initializer.Uniform(
            low=-ffn1_max, high=ffn1_max)
        self.w_2._param_attr = paddle.nn.initializer.Uniform(
            low=-ffn2_max, high=ffn2_max)
        self.w_2._bias_attr = paddle.nn.initializer.Uniform(
            low=-ffn2_max, high=ffn2_max)

    def forward(self, xs: paddle.Tensor) -> paddle.Tensor:
        """Forward function.
        Args:
            xs: input tensor (B, Lmax, D)
        Returns:
            output tensor, (B, Lmax, D)
        """
        if self.adaptive_scale:
            xs = self.ada_scale * xs + self.ada_bias
        return self.w_2(self.dropout(self.activation(self.w_1(xs))))


================================================
FILE: paddlespeech/s2t/modules/subsampling.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""Subsampling layer definition."""
from typing import Tuple

import paddle
from paddle import nn

from paddlespeech.s2t.modules.align import Conv2D
from paddlespeech.s2t.modules.align import LayerNorm
from paddlespeech.s2t.modules.align import Linear
from paddlespeech.s2t.modules.embedding import PositionalEncoding
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = [
    "LinearNoSubsampling", "Conv2dSubsampling4", "Conv2dSubsampling6",
    "Conv2dSubsampling8", "DepthwiseConv2DSubsampling4"
]


class BaseSubsampling(nn.Layer):
    def __init__(self, pos_enc_class: nn.Layer=PositionalEncoding):
        super().__init__()
        self.pos_enc = pos_enc_class
        # window size = (1 + right_context) + (chunk_size -1) * subsampling_rate
        self.right_context = 0
        # stride = subsampling_rate * chunk_size
        self.subsampling_rate = 1

    def position_encoding(self, offset: int, size: int) -> paddle.Tensor:
        return self.pos_enc.position_encoding(offset, size)


class LinearNoSubsampling(BaseSubsampling):
    """Linear transform the input without subsampling."""

    def __init__(self,
                 idim: int,
                 odim: int,
                 dropout_rate: float,
                 pos_enc_class: nn.Layer=PositionalEncoding):
        """Construct an linear object.
        Args:
            idim (int): Input dimension.
            odim (int): Output dimension.
            dropout_rate (float): Dropout rate.
            pos_enc_class (PositionalEncoding): position encoding class
        """
        super().__init__(pos_enc_class)
        self.out = nn.Sequential(
            Linear(idim, odim),
            LayerNorm(odim, epsilon=1e-12),
            nn.Dropout(dropout_rate),
            nn.ReLU(), )
        self.right_context = 0
        self.subsampling_rate = 1

    def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0
                ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Input x.
        Args:
            x (paddle.Tensor): Input tensor (#batch, time, idim).
            x_mask (paddle.Tensor): Input mask (#batch, 1, time).
            offset (int): position encoding offset.
        Returns:
            paddle.Tensor: linear input tensor (#batch, time', odim),
                where time' = time .
            paddle.Tensor: positional encoding
            paddle.Tensor: linear input mask (#batch, 1, time'),
                where time' = time .
        """
        x = self.out(x)
        x, pos_emb = self.pos_enc(x, offset)
        return x, pos_emb, x_mask


class Conv2dSubsampling(BaseSubsampling):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)


class Conv2dSubsampling4(Conv2dSubsampling):
    """Convolutional 2D subsampling (to 1/4 length)."""

    def __init__(self,
                 idim: int,
                 odim: int,
                 dropout_rate: float,
                 pos_enc_class: nn.Layer=PositionalEncoding):
        """Construct an Conv2dSubsampling4 object.

        Args:
            idim (int): Input dimension.
            odim (int): Output dimension.
            dropout_rate (float): Dropout rate.
        """
        super().__init__(pos_enc_class)
        self.conv = nn.Sequential(
            Conv2D(1, odim, 3, 2),
            nn.ReLU(),
            Conv2D(odim, odim, 3, 2),
            nn.ReLU(), )
        self.out = nn.Sequential(
            Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
        self.subsampling_rate = 4
        # The right context for every conv layer is computed by:
        # (kernel_size - 1) * frame_rate_of_this_layer
        # 6 = (3 - 1) * 1 + (3 - 1) * 2
        self.right_context = 6

    def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0
                ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Subsample x.
        Args:
            x (paddle.Tensor): Input tensor (#batch, time, idim).
            x_mask (paddle.Tensor): Input mask (#batch, 1, time).
            offset (int): position encoding offset.
        Returns:
            paddle.Tensor: Subsampled tensor (#batch, time', odim),
                where time' = time // 4.
            paddle.Tensor: positional encoding
            paddle.Tensor: Subsampled mask (#batch, 1, time'),
                where time' = time // 4.
        """
        x = x.unsqueeze(1)  # (b, c=1, t, f)
        x = self.conv(x)
        b, c, t, f = x.shape
        x = self.out(x.transpose([0, 2, 1, 3]).reshape([b, -1, c * f]))
        x, pos_emb = self.pos_enc(x, offset)
        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]


class Conv2dSubsampling6(Conv2dSubsampling):
    """Convolutional 2D subsampling (to 1/6 length)."""

    def __init__(self,
                 idim: int,
                 odim: int,
                 dropout_rate: float,
                 pos_enc_class: nn.Layer=PositionalEncoding):
        """Construct an Conv2dSubsampling6 object.

        Args:
            idim (int): Input dimension.
            odim (int): Output dimension.
            dropout_rate (float): Dropout rate.
            pos_enc (PositionalEncoding): Custom position encoding layer.
        """
        super().__init__(pos_enc_class)
        self.conv = nn.Sequential(
            Conv2D(1, odim, 3, 2),
            nn.ReLU(),
            Conv2D(odim, odim, 5, 3),
            nn.ReLU(), )
        # O = (I - F + Pstart + Pend) // S + 1
        # when Padding == 0, O = (I - F - S) // S
        self.linear = Linear(odim * (((idim - 1) // 2 - 2) // 3), odim)
        # The right context for every conv layer is computed by:
        # (kernel_size - 1) * frame_rate_of_this_layer
        # 10 = (3 - 1) * 1 + (5 - 1) * 2
        self.subsampling_rate = 6
        self.right_context = 10

    def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0
                ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Subsample x.
        Args:
            x (paddle.Tensor): Input tensor (#batch, time, idim).
            x_mask (paddle.Tensor): Input mask (#batch, 1, time).
            offset (int): position encoding offset.
        Returns:
            paddle.Tensor: Subsampled tensor (#batch, time', odim),
                where time' = time // 6.
            paddle.Tensor: positional encoding
            paddle.Tensor: Subsampled mask (#batch, 1, time'),
                where time' = time // 6.
        """
        x = x.unsqueeze(1)  # (b, c, t, f)
        x = self.conv(x)
        b, c, t, f = x.shape
        x = self.linear(x.transpose([0, 2, 1, 3]).reshape([b, -1, c * f]))
        x, pos_emb = self.pos_enc(x, offset)
        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-4:3]


class Conv2dSubsampling8(Conv2dSubsampling):
    """Convolutional 2D subsampling (to 1/8 length)."""

    def __init__(self,
                 idim: int,
                 odim: int,
                 dropout_rate: float,
                 pos_enc_class: nn.Layer=PositionalEncoding):
        """Construct an Conv2dSubsampling8 object.

        Args:
            idim (int): Input dimension.
            odim (int): Output dimension.
            dropout_rate (float): Dropout rate.
        """
        super().__init__(pos_enc_class)
        self.conv = nn.Sequential(
            Conv2D(1, odim, 3, 2),
            nn.ReLU(),
            Conv2D(odim, odim, 3, 2),
            nn.ReLU(),
            Conv2D(odim, odim, 3, 2),
            nn.ReLU(), )
        self.linear = Linear(odim * ((((idim - 1) // 2 - 1) // 2 - 1) // 2),
                             odim)
        self.subsampling_rate = 8
        # The right context for every conv layer is computed by:
        # (kernel_size - 1) * frame_rate_of_this_layer
        # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
        self.right_context = 14

    def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0
                ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Subsample x.
        Args:
            x (paddle.Tensor): Input tensor (#batch, time, idim).
            x_mask (paddle.Tensor): Input mask (#batch, 1, time).
            offset (int): position encoding offset.
        Returns:
            paddle.Tensor: Subsampled tensor (#batch, time', odim),
                where time' = time // 8.
            paddle.Tensor: positional encoding
            paddle.Tensor: Subsampled mask (#batch, 1, time'),
                where time' = time // 8.
        """
        x = x.unsqueeze(1)  # (b, c, t, f)
        x = self.conv(x)
        b, c, t, f = x.shape
        x = self.linear(x.transpose([0, 2, 1, 3]).reshape([b, -1, c * f]))
        x, pos_emb = self.pos_enc(x, offset)
        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2][:, :, :-2:2]


class DepthwiseConv2DSubsampling4(BaseSubsampling):
    """Depthwise Convolutional 2D subsampling (to 1/4 length).

        Args:
            idim (int): Input dimension.
            odim (int): Output dimension.
            pos_enc_class (nn.Layer): position encoding class.
            dw_stride (int): Whether do depthwise convolution.
            input_size (int): filter bank dimension.

        """

    def __init__(self,
                 idim: int,
                 odim: int,
                 pos_enc_class: nn.Layer,
                 dw_stride: bool=False,
                 input_size: int=80,
                 input_dropout_rate: float=0.1,
                 init_weights: bool=True):
        super(DepthwiseConv2DSubsampling4, self).__init__()
        self.idim = idim
        self.odim = odim
        self.pw_conv = Conv2D(
            in_channels=idim, out_channels=odim, kernel_size=3, stride=2)
        self.act1 = nn.ReLU()
        self.dw_conv = Conv2D(
            in_channels=odim,
            out_channels=odim,
            kernel_size=3,
            stride=2,
            groups=odim if dw_stride else 1)
        self.act2 = nn.ReLU()
        self.pos_enc = pos_enc_class
        self.input_proj = nn.Sequential(
            Linear(odim * (((input_size - 1) // 2 - 1) // 2), odim),
            nn.Dropout(p=input_dropout_rate))
        if init_weights:
            linear_max = (odim * input_size / 4)**-0.5
            self.input_proj.state_dict()[
                '0.weight'] = paddle.nn.initializer.Uniform(
                    low=-linear_max, high=linear_max)
            self.input_proj.state_dict()[
                '0.bias'] = paddle.nn.initializer.Uniform(
                    low=-linear_max, high=linear_max)

        self.subsampling_rate = 4
        # 6 = (3 - 1) * 1 + (3 - 1) * 2
        self.right_context = 6

    def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0
                ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        x = x.unsqueeze(1)  # (b, c=1, t, f)
        x = self.pw_conv(x)
        x = self.act1(x)
        x = self.dw_conv(x)
        x = self.act2(x)
        b, c, t, f = x.shape
        x = x.transpose([0, 2, 1, 3]).reshape([b, -1, c * f])
        x, pos_emb = self.pos_enc(x, offset)
        x = self.input_proj(x)
        return x, pos_emb, x_mask[:, :, :-2:2][:, :, :-2:2]


================================================
FILE: paddlespeech/s2t/modules/time_reduction.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright 2019 Mobvoi Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
"""Subsampling layer definition."""
from typing import Tuple

import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.s2t import masked_fill
from paddlespeech.s2t.modules.align import Conv1D
from paddlespeech.s2t.modules.conv2d import Conv2DValid
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = [
    "TimeReductionLayerStream", "TimeReductionLayer1D", "TimeReductionLayer2D"
]


class TimeReductionLayer1D(nn.Layer):
    """
    Modified NeMo,
    Squeezeformer Time Reduction procedure.
    Downsamples the audio by `stride` in the time dimension.
    Args:
        channel (int): input dimension of
                       MultiheadAttentionMechanism and PositionwiseFeedForward
        out_dim (int): Output dimension of the module.
        kernel_size (int): Conv kernel size for
                           depthwise convolution in convolution module
        stride (int): Downsampling factor in time dimension.
    """

    def __init__(self,
                 channel: int,
                 out_dim: int,
                 kernel_size: int=5,
                 stride: int=2):
        super(TimeReductionLayer1D, self).__init__()

        self.channel = channel
        self.out_dim = out_dim
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = max(0, self.kernel_size - self.stride)

        self.dw_conv = Conv1D(
            in_channels=channel,
            out_channels=channel,
            kernel_size=kernel_size,
            stride=stride,
            padding=self.padding,
            groups=channel, )

        self.pw_conv = Conv1D(
            in_channels=channel,
            out_channels=out_dim,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1, )

        self.init_weights()

    def init_weights(self):
        dw_max = self.kernel_size**-0.5
        pw_max = self.channel**-0.5
        self.dw_conv._param_attr = paddle.nn.initializer.Uniform(
            low=-dw_max, high=dw_max)
        self.dw_conv._bias_attr = paddle.nn.initializer.Uniform(
            low=-dw_max, high=dw_max)
        self.pw_conv._param_attr = paddle.nn.initializer.Uniform(
            low=-pw_max, high=pw_max)
        self.pw_conv._bias_attr = paddle.nn.initializer.Uniform(
            low=-pw_max, high=pw_max)

    def forward(
            self,
            xs,
            xs_lens: paddle.Tensor,
            mask: paddle.Tensor=paddle.ones((0, 0, 0), dtype=paddle.bool),
            mask_pad: paddle.Tensor=paddle.ones((0, 0, 0),
                                                dtype=paddle.bool), ):
        xs = xs.transpose([0, 2, 1])  # [B, C, T]
        xs = masked_fill(xs, mask_pad.equal(0), 0.0)

        xs = self.dw_conv(xs)
        xs = self.pw_conv(xs)

        xs = xs.transpose([0, 2, 1])  # [B, T, C]

        B, T, D = xs.shape
        mask = mask[:, ::self.stride, ::self.stride]
        mask_pad = mask_pad[:, :, ::self.stride]
        L = mask_pad.shape[-1]
        # For JIT exporting, we remove F.pad operator.
        if L - T < 0:
            xs = xs[:, :L - T, :]
        else:
            dummy_pad = paddle.zeros([B, L - T, D], dtype=paddle.float32)
            xs = paddle.concat([xs, dummy_pad], axis=1)

        xs_lens = (xs_lens + 1) // 2
        return xs, xs_lens, mask, mask_pad


class TimeReductionLayer2D(nn.Layer):
    def __init__(self, kernel_size: int=5, stride: int=2, encoder_dim: int=256):
        super(TimeReductionLayer2D, self).__init__()
        self.encoder_dim = encoder_dim
        self.kernel_size = kernel_size
        self.dw_conv = Conv2DValid(
            in_channels=encoder_dim,
            out_channels=encoder_dim,
            kernel_size=(kernel_size, 1),
            stride=stride,
            valid_trigy=True)
        self.pw_conv = Conv2DValid(
            in_channels=encoder_dim,
            out_channels=encoder_dim,
            kernel_size=1,
            stride=1,
            valid_trigx=False,
            valid_trigy=False)

        self.kernel_size = kernel_size
        self.stride = stride
        self.init_weights()

    def init_weights(self):
        dw_max = self.kernel_size**-0.5
        pw_max = self.encoder_dim**-0.5
        self.dw_conv._param_attr = paddle.nn.initializer.Uniform(
            low=-dw_max, high=dw_max)
        self.dw_conv._bias_attr = paddle.nn.initializer.Uniform(
            low=-dw_max, high=dw_max)
        self.pw_conv._param_attr = paddle.nn.initializer.Uniform(
            low=-pw_max, high=pw_max)
        self.pw_conv._bias_attr = paddle.nn.initializer.Uniform(
            low=-pw_max, high=pw_max)

    def forward(
            self,
            xs: paddle.Tensor,
            xs_lens: paddle.Tensor,
            mask: paddle.Tensor=paddle.ones((0, 0, 0), dtype=paddle.bool),
            mask_pad: paddle.Tensor=paddle.ones((0, 0, 0), dtype=paddle.bool),
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        xs = masked_fill(xs, mask_pad.transpose([0, 2, 1]).equal(0), 0.0)
        xs = xs.unsqueeze(1)
        padding1 = self.kernel_size - self.stride
        xs = F.pad(
            xs, (0, 0, 0, 0, 0, padding1, 0, 0), mode='constant', value=0.)
        xs = self.dw_conv(xs.transpose([0, 3, 2, 1]))
        xs = self.pw_conv(xs).transpose([0, 3, 2, 1]).squeeze(1)
        tmp_length = xs.shape[1]
        xs_lens = (xs_lens + 1) // 2
        padding2 = max(0, (xs_lens.max() - tmp_length).item())
        batch_size, hidden = xs.shape[0], xs.shape[-1]
        dummy_pad = paddle.zeros(
            [batch_size, padding2, hidden], dtype=paddle.float32)
        xs = paddle.concat([xs, dummy_pad], axis=1)
        mask = mask[:, ::2, ::2]
        mask_pad = mask_pad[:, :, ::2]
        return xs, xs_lens, mask, mask_pad


class TimeReductionLayerStream(nn.Layer):
    """
    Squeezeformer Time Reduction procedure.
    Downsamples the audio by `stride` in the time dimension.
    Args:
        channel (int): input dimension of
            MultiheadAttentionMechanism and PositionwiseFeedForward
        out_dim (int): Output dimension of the module.
        kernel_size (int): Conv kernel size for
            depthwise convolution in convolution module
        stride (int): Downsampling factor in time dimension.
    """

    def __init__(self,
                 channel: int,
                 out_dim: int,
                 kernel_size: int=1,
                 stride: int=2):
        super(TimeReductionLayerStream, self).__init__()

        self.channel = channel
        self.out_dim = out_dim
        self.kernel_size = kernel_size
        self.stride = stride

        self.dw_conv = Conv1D(
            in_channels=channel,
            out_channels=channel,
            kernel_size=kernel_size,
            stride=stride,
            padding=0,
            groups=channel)

        self.pw_conv = Conv1D(
            in_channels=channel,
            out_channels=out_dim,
            kernel_size=1,
            stride=1,
            padding=0,
            groups=1)
        self.init_weights()

    def init_weights(self):
        dw_max = self.kernel_size**-0.5
        pw_max = self.channel**-0.5
        self.dw_conv._param_attr = paddle.nn.initializer.Uniform(
            low=-dw_max, high=dw_max)
        self.dw_conv._bias_attr = paddle.nn.initializer.Uniform(
            low=-dw_max, high=dw_max)
        self.pw_conv._param_attr = paddle.nn.initializer.Uniform(
            low=-pw_max, high=pw_max)
        self.pw_conv._bias_attr = paddle.nn.initializer.Uniform(
            low=-pw_max, high=pw_max)

    def forward(
            self,
            xs,
            xs_lens: paddle.Tensor,
            mask: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool),
            mask_pad: paddle.Tensor=paddle.ones([0, 0, 0], dtype=paddle.bool)):
        xs = xs.transpose([0, 2, 1])  # [B, C, T]
        xs = masked_fill(xs, mask_pad.equal(0), 0.0)

        xs = self.dw_conv(xs)
        xs = self.pw_conv(xs)

        xs = xs.transpose([0, 2, 1])  # [B, T, C]

        B, T, D = xs.shape
        mask = mask[:, ::self.stride, ::self.stride]
        mask_pad = mask_pad[:, :, ::self.stride]
        L = mask_pad.shape[-1]
        # For JIT exporting, we remove F.pad operator.
        if L - T < 0:
            xs = xs[:, :L - T, :]
        else:
            dummy_pad = paddle.zeros([B, L - T, D], dtype=paddle.float32)
            xs = paddle.concat([xs, dummy_pad], axis=1)

        xs_lens = (xs_lens + 1) // 2
        return xs, xs_lens, mask, mask_pad


================================================
FILE: paddlespeech/s2t/training/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/training/cli.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

import distutils
from yacs.config import CfgNode

from paddlespeech.utils.argparse import strtobool


class ExtendAction(argparse.Action):
    """
    [Since Python 3.8, the "extend" is available directly in stdlib]
    (https://docs.python.org/3.8/library/argparse.html#action).
    If you only have to support 3.8+ then defining it yourself is no longer required. 
    Usage of stdlib "extend" action is exactly the same way as this answer originally described:
    """

    def __call__(self, parser, namespace, values, option_string=None):
        items = getattr(namespace, self.dest) or []
        items.extend(values)
        setattr(namespace, self.dest, items)


class LoadFromFile(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        with values as f:
            # parse arguments in the file and store them in the target namespace
            parser.parse_args(f.read().split(), namespace)


def default_argument_parser(parser=None):
    r"""A simple yet genral argument parser for experiments with t2s.

    This is used in examples with t2s. And it is intended to be used by
    other experiments with t2s. It requires a minimal set of command line
    arguments to start a training script.

    The ``--config`` and ``--opts`` are used for overwrite the default
    configuration.

    The ``--data`` and ``--output`` specifies the data path and output path.
    Resuming training from existing progress at the output directory is the
    intended default behavior.

    The ``--checkpoint_path`` specifies the checkpoint to load from.

    The ``--ngpu`` specifies how to run the training.


    See Also
    --------
    paddlespeech.t2s.training.experiment
    Returns
    -------
    argparse.ArgumentParser
        the parser
    """
    if parser is None:
        parser = argparse.ArgumentParser()

    parser.register('action', 'extend', ExtendAction)
    parser.add_argument(
        '--conf', type=open, action=LoadFromFile, help="config file.")
    parser.add_argument(
        "--debug",
        type=strtobool,
        default=False,
        help="logging with debug mode.")
    parser.add_argument(
        "--dump_path", type=str, default=None, help="path to dump config file.")

    # train group
    train_group = parser.add_argument_group(
        title='Train Options', description=None)
    train_group.add_argument(
        "--seed",
        type=int,
        default=None,
        help="seed to use for paddle, np and random. None or 0 for random, else set seed."
    )
    train_group.add_argument(
        "--ngpu",
        type=int,
        default=1,
        help="number of parallel processes. 0 for cpu.")
    train_group.add_argument(
        '--nxpu',
        type=int,
        default=0,
        choices=[0, 1],
        help="if nxpu == 0 and ngpu == 0, use cpu.")
    train_group.add_argument(
        "--config", metavar="CONFIG_FILE", help="config file.")
    train_group.add_argument(
        "--output", metavar="CKPT_DIR", help="path to save checkpoint.")
    train_group.add_argument(
        "--checkpoint_path", type=str, help="path to load checkpoint")
    train_group.add_argument(
        "--opts",
        action='extend',
        nargs=2,
        metavar=('key', 'val'),
        help="overwrite --config field, passing (KEY VALUE) pairs")
    train_group.add_argument(
        "--dump-config", metavar="FILE", help="dump config to `this` file.")

    # test group
    test_group = parser.add_argument_group(
        title='Test Options', description=None)
    test_group.add_argument(
        "--decode_cfg",
        metavar="DECODE_CONFIG_FILE",
        help="decode config file.")
    test_group.add_argument(
        "--result_file", type=str, help="path of save the asr result")
    test_group.add_argument(
        "--audio_file", type=str, help="path of the input audio file")

    # quant & export
    quant_group = parser.add_argument_group(
        title='Quant Options', description=None)
    quant_group.add_argument(
        "--audio_scp", type=str, help="path of the input audio scp file")
    quant_group.add_argument(
        "--num_utts",
        type=int,
        default=200,
        help="num utts for quant calibrition.")
    quant_group.add_argument(
        "--export_path",
        type=str,
        default='export.jit.quant',
        help="path of the jit model to save")

    # profile group
    profile_group = parser.add_argument_group(
        title='Benchmark Options', description=None)
    profile_group.add_argument(
        '--profiler-options',
        type=str,
        default=None,
        help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".'
    )
    profile_group.add_argument(
        '--benchmark-batch-size',
        type=int,
        default=None,
        help='batch size for benchmark.')
    profile_group.add_argument(
        '--benchmark-max-step',
        type=int,
        default=None,
        help='max iteration for benchmark.')

    return parser


def config_from_args(args):
    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)

    if args.config:
        config.merge_from_file(args.config)

    if args.decode_cfg:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_cfg)
        config.decode = decode_confs

    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    return config


def maybe_dump_config(dump_path, config):
    if dump_path:
        with open(dump_path, 'w') as f:
            print(config, file=f)
        print(f"save config to {dump_path}")


================================================
FILE: paddlespeech/s2t/training/extensions/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Callable

from .extension import Extension


def make_extension(trigger: Callable=None,
                   default_name: str=None,
                   priority: int=None,
                   finalizer: Callable=None,
                   initializer: Callable=None,
                   on_error: Callable=None):
    """Make an Extension-like object by injecting required attributes to it.
    """
    if trigger is None:
        trigger = Extension.trigger
    if priority is None:
        priority = Extension.priority

    def decorator(ext):
        ext.trigger = trigger
        ext.default_name = default_name or ext.__name__
        ext.priority = priority
        ext.finalize = finalizer
        ext.on_error = on_error
        ext.initialize = initializer
        return ext

    return decorator


================================================
FILE: paddlespeech/s2t/training/extensions/evaluator.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
from typing import Dict

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer

from . import extension
from ..reporter import DictSummary
from ..reporter import ObsScope
from ..reporter import report
from ..timer import Timer
from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()


class StandardEvaluator(extension.Extension):

    trigger = (1, 'epoch')
    default_name = 'validation'
    priority = extension.PRIORITY_WRITER

    name = None

    def __init__(self, model: Layer, dataloader: DataLoader):
        # it is designed to hold multiple models
        models = {"main": model}
        self.models: Dict[str, Layer] = models
        self.model = model

        # dataloaders
        self.dataloader = dataloader

    def evaluate_core(self, batch):
        # compute
        self.model(batch)  # you may report here
        return

    def evaluate_sync(self, data):
        # dist sync `evaluate_core` outputs
        if data is None:
            return

        numerator, denominator = data
        if dist.get_world_size() > 1:
            numerator = paddle.to_tensor(numerator)
            denominator = paddle.to_tensor(denominator)
            # the default operator in all_reduce function is sum.
            dist.all_reduce(numerator)
            dist.all_reduce(denominator)
            value = numerator / denominator
            value = float(value)
        else:
            value = numerator / denominator
        # used for `snapshort` to do kbest save.
        report("VALID/LOSS", value)
        logger.info(f"Valid: all-reduce loss {value}")

    def evaluate(self):
        # switch to eval mode
        for model in self.models.values():
            model.eval()

        # to average evaluation metrics
        summary = DictSummary()
        for batch in self.dataloader:
            observation = {}
            with ObsScope(observation):
                # main evaluation computation here.
                with paddle.no_grad():
                    self.evaluate_sync(self.evaluate_core(batch))
            summary.add(observation)
        summary = summary.compute_mean()

        # switch to train mode
        for model in self.models.values():
            model.train()
        return summary

    def __call__(self, trainer=None):
        # evaluate and report the averaged metric to current observation
        # if it is used to extend a trainer, the metrics is reported to
        # to observation of the trainer
        # or otherwise, you can use your own observation
        with Timer("Eval Time Cost: {}"):
            summary = self.evaluate()
        for k, v in summary.items():
            report(k, v)


================================================
FILE: paddlespeech/s2t/training/extensions/extension.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
PRIORITY_WRITER = 300
PRIORITY_EDITOR = 200
PRIORITY_READER = 100


class Extension():
    """Extension to customize the behavior of Trainer."""
    trigger = (1, 'iteration')
    priority = PRIORITY_READER
    name = None

    @property
    def default_name(self):
        """Default name of the extension, class name by default."""
        return type(self).__name__

    def __call__(self, trainer):
        """Main action of the extention. After each update, it is executed
        when the trigger fires."""
        raise NotImplementedError(
            'Extension implementation must override __call__.')

    def initialize(self, trainer):
        """Action that is executed once to get the corect trainer state.
        It is called before training normally, but if the trainer restores
        states with an Snapshot extension, this method should also be called.
        """
        pass

    def on_error(self, trainer, exc, tb):
        """Handles the error raised during training before finalization.
        """
        pass

    def finalize(self, trainer):
        """Action that is executed when training is done.
        For example, visualizers would need to be closed.
        """
        pass


================================================
FILE: paddlespeech/s2t/training/extensions/plot.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
import copy
import os

import numpy as np

from . import extension


class PlotAttentionReport(extension.Extension):
    """Plot attention reporter.

    Args:
        att_vis_fn (espnet.nets.*_backend.e2e_asr.E2E.calculate_all_attentions):
            Function of attention visualization.
        data (list[tuple(str, dict[str, list[Any]])]): List json utt key items.
        outdir (str): Directory to save figures.
        converter (espnet.asr.*_backend.asr.CustomConverter):
            Function to convert data.
        device (int | torch.device): Device.
        reverse (bool): If True, input and output length are reversed.
        ikey (str): Key to access input
            (for ASR/ST ikey="input", for MT ikey="output".)
        iaxis (int): Dimension to access input
            (for ASR/ST iaxis=0, for MT iaxis=1.)
        okey (str): Key to access output
            (for ASR/ST okey="input", MT okay="output".)
        oaxis (int): Dimension to access output
            (for ASR/ST oaxis=0, for MT oaxis=0.)
        subsampling_factor (int): subsampling factor in encoder

    """

    def __init__(
            self,
            att_vis_fn,
            data,
            outdir,
            converter,
            transform,
            device,
            reverse=False,
            ikey="input",
            iaxis=0,
            okey="output",
            oaxis=0,
            subsampling_factor=1, ):
        self.att_vis_fn = att_vis_fn
        self.data = copy.deepcopy(data)
        self.data_dict = {k: v for k, v in copy.deepcopy(data)}
        # key is utterance ID
        self.outdir = outdir
        self.converter = converter
        self.transform = transform
        self.device = device
        self.reverse = reverse
        self.ikey = ikey
        self.iaxis = iaxis
        self.okey = okey
        self.oaxis = oaxis
        self.factor = subsampling_factor
        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir)

    def __call__(self, trainer):
        """Plot and save image file of att_ws matrix."""
        att_ws, uttid_list = self.get_attention_weights()
        if isinstance(att_ws, list):  # multi-encoder case
            num_encs = len(att_ws) - 1
            # atts
            for i in range(num_encs):
                for idx, att_w in enumerate(att_ws[i]):
                    filename = "%s/%s.ep.{.updater.epoch}.att%d.png" % (
                        self.outdir, uttid_list[idx], i + 1, )
                    att_w = self.trim_attention_weight(uttid_list[idx], att_w)
                    np_filename = "%s/%s.ep.{.updater.epoch}.att%d.npy" % (
                        self.outdir, uttid_list[idx], i + 1, )
                    np.save(np_filename.format(trainer), att_w)
                    self._plot_and_save_attention(att_w,
                                                  filename.format(trainer))
            # han
            for idx, att_w in enumerate(att_ws[num_encs]):
                filename = "%s/%s.ep.{.updater.epoch}.han.png" % (
                    self.outdir, uttid_list[idx], )
                att_w = self.trim_attention_weight(uttid_list[idx], att_w)
                np_filename = "%s/%s.ep.{.updater.epoch}.han.npy" % (
                    self.outdir, uttid_list[idx], )
                np.save(np_filename.format(trainer), att_w)
                self._plot_and_save_attention(
                    att_w, filename.format(trainer), han_mode=True)
        else:
            for idx, att_w in enumerate(att_ws):
                filename = "%s/%s.ep.{.updater.epoch}.png" % (self.outdir,
                                                              uttid_list[idx], )
                att_w = self.trim_attention_weight(uttid_list[idx], att_w)
                np_filename = "%s/%s.ep.{.updater.epoch}.npy" % (
                    self.outdir, uttid_list[idx], )
                np.save(np_filename.format(trainer), att_w)
                self._plot_and_save_attention(att_w, filename.format(trainer))

    def log_attentions(self, logger, step):
        """Add image files of att_ws matrix to the tensorboard."""
        att_ws, uttid_list = self.get_attention_weights()
        if isinstance(att_ws, list):  # multi-encoder case
            num_encs = len(att_ws) - 1
            # atts
            for i in range(num_encs):
                for idx, att_w in enumerate(att_ws[i]):
                    att_w = self.trim_attention_weight(uttid_list[idx], att_w)
                    plot = self.draw_attention_plot(att_w)
                    logger.add_figure(
                        "%s_att%d" % (uttid_list[idx], i + 1),
                        plot.gcf(),
                        step, )
            # han
            for idx, att_w in enumerate(att_ws[num_encs]):
                att_w = self.trim_attention_weight(uttid_list[idx], att_w)
                plot = self.draw_han_plot(att_w)
                logger.add_figure(
                    "%s_han" % (uttid_list[idx]),
                    plot.gcf(),
                    step, )
        else:
            for idx, att_w in enumerate(att_ws):
                att_w = self.trim_attention_weight(uttid_list[idx], att_w)
                plot = self.draw_attention_plot(att_w)
                logger.add_figure("%s" % (uttid_list[idx]), plot.gcf(), step)

    def get_attention_weights(self):
        """Return attention weights.

        Returns:
            numpy.ndarray: attention weights. float. Its shape would be
                differ from backend.
                * pytorch-> 1) multi-head case => (B, H, Lmax, Tmax), 2)
                    other case => (B, Lmax, Tmax).
                * chainer-> (B, Lmax, Tmax)

        """
        return_batch, uttid_list = self.transform(self.data, return_uttid=True)
        batch = self.converter([return_batch], self.device)
        if isinstance(batch, tuple):
            att_ws = self.att_vis_fn(*batch)
        else:
            att_ws = self.att_vis_fn(**batch)
        return att_ws, uttid_list

    def trim_attention_weight(self, uttid, att_w):
        """Transform attention matrix with regard to self.reverse."""
        if self.reverse:
            enc_key, enc_axis = self.okey, self.oaxis
            dec_key, dec_axis = self.ikey, self.iaxis
        else:
            enc_key, enc_axis = self.ikey, self.iaxis
            dec_key, dec_axis = self.okey, self.oaxis
        dec_len = int(self.data_dict[uttid][dec_key][dec_axis]["shape"][0])
        enc_len = int(self.data_dict[uttid][enc_key][enc_axis]["shape"][0])
        if self.factor > 1:
            enc_len //= self.factor
        if len(att_w.shape) == 3:
            att_w = att_w[:, :dec_len, :enc_len]
        else:
            att_w = att_w[:dec_len, :enc_len]
        return att_w

    def draw_attention_plot(self, att_w):
        """Plot the att_w matrix.

        Returns:
            matplotlib.pyplot: pyplot object with attention matrix image.

        """
        import matplotlib

        matplotlib.use("Agg")
        import matplotlib.pyplot as plt

        plt.clf()
        att_w = att_w.astype(np.float32)
        if len(att_w.shape) == 3:
            for h, aw in enumerate(att_w, 1):
                plt.subplot(1, len(att_w), h)
                plt.imshow(aw, aspect="auto")
                plt.xlabel("Encoder Index")
                plt.ylabel("Decoder Index")
        else:
            plt.imshow(att_w, aspect="auto")
            plt.xlabel("Encoder Index")
            plt.ylabel("Decoder Index")
        plt.tight_layout()
        return plt

    def draw_han_plot(self, att_w):
        """Plot the att_w matrix for hierarchical attention.

        Returns:
            matplotlib.pyplot: pyplot object with attention matrix image.

        """
        import matplotlib

        matplotlib.use("Agg")
        import matplotlib.pyplot as plt

        plt.clf()
        if len(att_w.shape) == 3:
            for h, aw in enumerate(att_w, 1):
                legends = []
                plt.subplot(1, len(att_w), h)
                for i in range(aw.shape[1]):
                    plt.plot(aw[:, i])
                    legends.append("Att{}".format(i))
                plt.ylim([0, 1.0])
                plt.xlim([0, aw.shape[0]])
                plt.grid(True)
                plt.ylabel("Attention Weight")
                plt.xlabel("Decoder Index")
                plt.legend(legends)
        else:
            legends = []
            for i in range(att_w.shape[1]):
                plt.plot(att_w[:, i])
                legends.append("Att{}".format(i))
            plt.ylim([0, 1.0])
            plt.xlim([0, att_w.shape[0]])
            plt.grid(True)
            plt.ylabel("Attention Weight")
            plt.xlabel("Decoder Index")
            plt.legend(legends)
        plt.tight_layout()
        return plt

    def _plot_and_save_attention(self, att_w, filename, han_mode=False):
        if han_mode:
            plt = self.draw_han_plot(att_w)
        else:
            plt = self.draw_attention_plot(att_w)
        plt.savefig(filename)
        plt.close()


class PlotCTCReport(extension.Extension):
    """Plot CTC reporter.

    Args:
        ctc_vis_fn (espnet.nets.*_backend.e2e_asr.E2E.calculate_all_ctc_probs):
            Function of CTC visualization.
        data (list[tuple(str, dict[str, list[Any]])]): List json utt key items.
        outdir (str): Directory to save figures.
        converter (espnet.asr.*_backend.asr.CustomConverter):
            Function to convert data.
        device (int | torch.device): Device.
        reverse (bool): If True, input and output length are reversed.
        ikey (str): Key to access input
            (for ASR/ST ikey="input", for MT ikey="output".)
        iaxis (int): Dimension to access input
            (for ASR/ST iaxis=0, for MT iaxis=1.)
        okey (str): Key to access output
            (for ASR/ST okey="input", MT okay="output".)
        oaxis (int): Dimension to access output
            (for ASR/ST oaxis=0, for MT oaxis=0.)
        subsampling_factor (int): subsampling factor in encoder

    """

    def __init__(
            self,
            ctc_vis_fn,
            data,
            outdir,
            converter,
            transform,
            device,
            reverse=False,
            ikey="input",
            iaxis=0,
            okey="output",
            oaxis=0,
            subsampling_factor=1, ):
        self.ctc_vis_fn = ctc_vis_fn
        self.data = copy.deepcopy(data)
        self.data_dict = {k: v for k, v in copy.deepcopy(data)}
        # key is utterance ID
        self.outdir = outdir
        self.converter = converter
        self.transform = transform
        self.device = device
        self.reverse = reverse
        self.ikey = ikey
        self.iaxis = iaxis
        self.okey = okey
        self.oaxis = oaxis
        self.factor = subsampling_factor
        if not os.path.exists(self.outdir):
            os.makedirs(self.outdir)

    def __call__(self, trainer):
        """Plot and save image file of ctc prob."""
        ctc_probs, uttid_list = self.get_ctc_probs()
        if isinstance(ctc_probs, list):  # multi-encoder case
            num_encs = len(ctc_probs) - 1
            for i in range(num_encs):
                for idx, ctc_prob in enumerate(ctc_probs[i]):
                    filename = "%s/%s.ep.{.updater.epoch}.ctc%d.png" % (
                        self.outdir, uttid_list[idx], i + 1, )
                    ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob)
                    np_filename = "%s/%s.ep.{.updater.epoch}.ctc%d.npy" % (
                        self.outdir, uttid_list[idx], i + 1, )
                    np.save(np_filename.format(trainer), ctc_prob)
                    self._plot_and_save_ctc(ctc_prob, filename.format(trainer))
        else:
            for idx, ctc_prob in enumerate(ctc_probs):
                filename = "%s/%s.ep.{.updater.epoch}.png" % (self.outdir,
                                                              uttid_list[idx], )
                ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob)
                np_filename = "%s/%s.ep.{.updater.epoch}.npy" % (
                    self.outdir, uttid_list[idx], )
                np.save(np_filename.format(trainer), ctc_prob)
                self._plot_and_save_ctc(ctc_prob, filename.format(trainer))

    def log_ctc_probs(self, logger, step):
        """Add image files of ctc probs to the tensorboard."""
        ctc_probs, uttid_list = self.get_ctc_probs()
        if isinstance(ctc_probs, list):  # multi-encoder case
            num_encs = len(ctc_probs) - 1
            for i in range(num_encs):
                for idx, ctc_prob in enumerate(ctc_probs[i]):
                    ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob)
                    plot = self.draw_ctc_plot(ctc_prob)
                    logger.add_figure(
                        "%s_ctc%d" % (uttid_list[idx], i + 1),
                        plot.gcf(),
                        step, )
        else:
            for idx, ctc_prob in enumerate(ctc_probs):
                ctc_prob = self.trim_ctc_prob(uttid_list[idx], ctc_prob)
                plot = self.draw_ctc_plot(ctc_prob)
                logger.add_figure("%s" % (uttid_list[idx]), plot.gcf(), step)

    def get_ctc_probs(self):
        """Return CTC probs.

        Returns:
            numpy.ndarray: CTC probs. float. Its shape would be
                differ from backend. (B, Tmax, vocab).

        """
        return_batch, uttid_list = self.transform(self.data, return_uttid=True)
        batch = self.converter([return_batch], self.device)
        if isinstance(batch, tuple):
            probs = self.ctc_vis_fn(*batch)
        else:
            probs = self.ctc_vis_fn(**batch)
        return probs, uttid_list

    def trim_ctc_prob(self, uttid, prob):
        """Trim CTC posteriors accoding to input lengths."""
        enc_len = int(self.data_dict[uttid][self.ikey][self.iaxis]["shape"][0])
        if self.factor > 1:
            enc_len //= self.factor
        prob = prob[:enc_len]
        return prob

    def draw_ctc_plot(self, ctc_prob):
        """Plot the ctc_prob matrix.

        Returns:
            matplotlib.pyplot: pyplot object with CTC prob matrix image.

        """
        import matplotlib

        matplotlib.use("Agg")
        import matplotlib.pyplot as plt

        ctc_prob = ctc_prob.astype(np.float32)

        plt.clf()
        topk_ids = np.argsort(ctc_prob, axis=1)
        n_frames, vocab = ctc_prob.shape
        times_probs = np.arange(n_frames)

        plt.figure(figsize=(20, 8))

        # NOTE: index 0 is reserved for blank
        for idx in set(topk_ids.reshape(-1).tolist()):
            if idx == 0:
                plt.plot(
                    times_probs,
                    ctc_prob[:, 0],
                    ":",
                    label="<blank>",
                    color="grey")
            else:
                plt.plot(times_probs, ctc_prob[:, idx])
        plt.xlabel(u"Input [frame]", fontsize=12)
        plt.ylabel("Posteriors", fontsize=12)
        plt.xticks(list(range(0, int(n_frames) + 1, 10)))
        plt.yticks(list(range(0, 2, 1)))
        plt.tight_layout()
        return plt

    def _plot_and_save_ctc(self, ctc_prob, filename):
        plt = self.draw_ctc_plot(ctc_prob)
        plt.savefig(filename)
        plt.close()


================================================
FILE: paddlespeech/s2t/training/extensions/snapshot.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
import os
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines

from . import extension
from ..reporter import get_observations
from ..updaters.trainer import Trainer
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.mp_tools import rank_zero_only

logger = Log(__name__).getlog()


def load_records(records_fp):
    """Load record files (json lines.)"""
    with jsonlines.open(records_fp, 'r') as reader:
        records = list(reader)
    return records


class Snapshot(extension.Extension):
    """An extension to make snapshot of the updater object inside
    the trainer. It is done by calling the updater's `save` method.
    An Updater save its state_dict by default, which contains the
    updater state, (i.e. epoch and iteration) and all the model
    parameters and optimizer states. If the updater inside the trainer
    subclasses StandardUpdater, everything is good to go.
    Parameters
    ----------
    checkpoint_dir : Union[str, Path]
        The directory to save checkpoints into.
    """

    trigger = (1, 'epoch')
    priority = -100
    default_name = "snapshot"

    def __init__(self,
                 mode='latest',
                 max_size: int=5,
                 indicator=None,
                 less_better=True,
                 snapshot_on_error: bool=False):
        self.records: List[Dict[str, Any]] = []
        assert mode in ('latest', 'kbest'), mode
        if mode == 'kbest':
            assert indicator is not None
        self.mode = mode
        self.indicator = indicator
        self.less_is_better = less_better
        self.max_size = max_size
        self._snapshot_on_error = snapshot_on_error
        self._save_all = (max_size == -1)
        self.checkpoint_dir = None

    def initialize(self, trainer: Trainer):
        """Setting up this extention."""
        self.checkpoint_dir = trainer.out / "checkpoints"

        # load existing records
        record_path: Path = self.checkpoint_dir / "records.jsonl"
        if record_path.exists():
            self.records = load_records(record_path)
            ckpt_path = self.records[-1]['path']
            logger.info(f"Loading from an existing checkpoint {ckpt_path}")
            trainer.updater.load(ckpt_path)

    def on_error(self, trainer, exc, tb):
        if self._snapshot_on_error:
            self.save_checkpoint_and_update(trainer, 'latest')

    def __call__(self, trainer: Trainer):
        self.save_checkpoint_and_update(trainer, self.mode)

    def full(self):
        """Whether the number of snapshots it keeps track of is greater
        than the max_size."""
        return (not self._save_all) and len(self.records) > self.max_size

    @rank_zero_only
    def save_checkpoint_and_update(self, trainer: Trainer, mode: str):
        """Saving new snapshot and remove the oldest snapshot if needed."""
        iteration = trainer.updater.state.iteration
        epoch = trainer.updater.state.epoch
        num = epoch if self.trigger[1] == 'epoch' else iteration
        path = self.checkpoint_dir / f"{num}.np"

        # add the new one
        trainer.updater.save(path)
        record = {
            "time": str(datetime.now()),
            'path': str(path.resolve()),  # use absolute path
            'iteration': iteration,
            'epoch': epoch,
            'indicator': get_observations()[self.indicator]
        }
        self.records.append(record)

        # remove the earist
        if self.full():
            if mode == 'kbest':
                self.records = sorted(
                    self.records,
                    key=lambda record: record['indicator'],
                    reverse=not self.less_is_better)
            eariest_record = self.records[0]
            os.remove(eariest_record["path"])
            self.records.pop(0)

        # update the record file
        record_path = self.checkpoint_dir / "records.jsonl"
        with jsonlines.open(record_path, 'w') as writer:
            for record in self.records:
                # jsonlines.open may return a Writer or a Reader
                writer.write(record)  # pylint: disable=no-member


================================================
FILE: paddlespeech/s2t/training/extensions/visualizer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from visualdl import LogWriter

from . import extension
from ..updaters.trainer import Trainer


class VisualDL(extension.Extension):
    """A wrapper of visualdl log writer. It assumes that the metrics to be visualized
    are all scalars which are recorded into the `.observation` dictionary of the
    trainer object. The dictionary is created for each step, thus the visualdl log
    writer uses the iteration from the updater's `iteration` as the global step to
    add records.
    """
    trigger = (1, 'iteration')
    default_name = 'visualdl'
    priority = extension.PRIORITY_READER

    def __init__(self, output_dir):
        self.writer = LogWriter(str(output_dir))

    def __call__(self, trainer: Trainer):
        for k, v in trainer.observation.items():
            self.writer.add_scalar(k, v, step=trainer.updater.state.iteration)

    def finalize(self, trainer):
        self.writer.close()


================================================
FILE: paddlespeech/s2t/training/optimizer/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from typing import Any
from typing import Dict
from typing import Text

import paddle
from paddle.optimizer import Optimizer
from paddle.regularizer import L2Decay

from paddlespeech.s2t.utils.dynamic_import import dynamic_import
from paddlespeech.s2t.utils.dynamic_import import instance_class
from paddlespeech.s2t.utils.log import Log

__all__ = ["OptimizerFactory"]

logger = Log(__name__).getlog()

OPTIMIZER_DICT = {
    "sgd": "paddle.optimizer:SGD",
    "momentum": "paddle.optimizer:Momentum",
    "adadelta": "paddle.optimizer:Adadelta",
    "adam": "paddle.optimizer:Adam",
    "adamw": "paddle.optimizer:AdamW",
}


def register_optimizer(cls):
    """Register optimizer."""
    alias = cls.__name__.lower()
    OPTIMIZER_DICT[cls.__name__.lower()] = cls.__module__ + ":" + cls.__name__
    return cls


@register_optimizer
class Noam(paddle.optimizer.Adam):
    """Seem to: espnet/nets/pytorch_backend/transformer/optimizer.py """

    def __init__(self,
                 learning_rate=0,
                 beta1=0.9,
                 beta2=0.98,
                 epsilon=1e-9,
                 parameters=None,
                 weight_decay=None,
                 grad_clip=None,
                 lazy_mode=False,
                 multi_precision=False,
                 name=None):
        super().__init__(
            learning_rate=learning_rate,
            beta1=beta1,
            beta2=beta2,
            epsilon=epsilon,
            parameters=parameters,
            weight_decay=weight_decay,
            grad_clip=grad_clip,
            lazy_mode=lazy_mode,
            multi_precision=multi_precision,
            name=name)

    def __repr__(self):
        echo = f"<{self.__class__.__module__}.{self.__class__.__name__} object at {hex(id(self))}> "
        echo += f"learning_rate: {self._learning_rate}, "
        echo += f"(beta1: {self._beta1} beta2: {self._beta2}), "
        echo += f"epsilon: {self._epsilon}"


def dynamic_import_optimizer(module):
    """Import Optimizer class dynamically.

    Args:
        module (str): module_name:class_name or alias in `OPTIMIZER_DICT`

    Returns:
        type: Optimizer class

    """
    module_class = dynamic_import(module, OPTIMIZER_DICT)
    assert issubclass(module_class,
                      Optimizer), f"{module} does not implement Optimizer"
    return module_class


class OptimizerFactory():
    @classmethod
    def from_args(cls, name: str, args: Dict[Text, Any]):
        assert "parameters" in args, "parameters not in args."
        assert "learning_rate" in args, "learning_rate not in args."

        grad_clip = paddle.nn.ClipGradByGlobalNorm(
            args['grad_clip']) if "grad_clip" in args else None
        weight_decay = args.get("weight_decay", None)
        if weight_decay:
            logger.info(f'<WeightDecay - {weight_decay}>')
        if grad_clip:
            logger.info(f'<GradClip - {grad_clip}>')

        module_class = dynamic_import_optimizer(name.lower())
        args.update({"grad_clip": grad_clip, "weight_decay": weight_decay})
        opt = instance_class(module_class, args)
        if "__repr__" in vars(opt):
            logger.info(f"{opt}")
        else:
            logger.info(
                f"<Optimizer {module_class.__module__}.{module_class.__name__}> LR: {args['learning_rate']}"
            )
        return opt


================================================
FILE: paddlespeech/s2t/training/optimizer/adadelta.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import framework
from paddle.optimizer import Optimizer

__all__ = []


class SimpleAdadelta(Optimizer):
    r"""
    **Notes: This API does not support sparse parameter optimization.**

    Adadelta Optimizer. Please refer to this for details:
    `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD <https://arxiv.org/abs/1212.5701>`_.

    The update is done as follows:

    .. math::

        E(g_t^2) &= \rho * E(g_{t-1}^2) + (1-\rho) * g^2

        learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \epsilon ) / ( E(g_t^2) + \epsilon ) }

        E(dx_t^2) &= \rho * E(dx_{t-1}^2) + (1-\rho) * (-g*learning\_rate)^2

    Args:
        learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
            It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
        epsilon (float): a small float number for numeric stability. Default 1.0e-6.
        rho (float): a floating point value indicating the decay rate. Default 0.95.
        parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
            This parameter is required in dygraph mode. And you can specify different options for \
            different parameter groups such as the learning rate, weight decay, etc, \
            then the parameters are list of dict. Note that the learning_rate in paramter groups \
            represents the scale of base learning_rate. \
            The default value is None in static mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
            It canbe a float value as coeff of L2 regularization or \
            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
            If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
            the regularization setting here in optimizer will be ignored for this parameter. \
            Otherwise, the regularization setting here in optimizer will take effect. \
            Default None, meaning there is no regularization.
        foreach (bool, optional): whether foreach implementation of optimizer is used. The default value is None.
        maximize (bool, optional): maximize the params based on the objective, instead of minimizing.
            The default value is False.
        name (str, optional): The default value is None. Normally there is no need for user
                to set this property. For more information, please refer to
                :ref:`api_guide_Name` .

    Examples:
        .. code-block:: python

            import paddle
            from paddlespeech.s2t.training.optimizer.adadelta import SimpleAdadelta

            inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1)
            linear = paddle.nn.Linear(10, 10)
            out = linear(inp)
            loss = paddle.mean(out)
            adadelta = SimpleAdadelta(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
            out.backward()
            adadelta.step()
            adadelta.clear_grad()

    """

    def __init__(
            self,
            learning_rate=0.001,
            epsilon=1.0e-6,
            rho=0.95,
            parameters=None,
            weight_decay=0.0,
            foreach=None,
            maximize=False,
            name=None, ):
        if learning_rate is None:
            raise ValueError("learning_rate is not set.")
        if epsilon is None:
            raise ValueError("epsilon is not set.")
        if rho is None:
            raise ValueError("rho is not set.")
        super(SimpleAdadelta, self).__init__(
            learning_rate=learning_rate,
            parameters=parameters,
            weight_decay=weight_decay,
            name=name, )

        self._epsilon = epsilon
        self._rho = rho

        self.state = 0  # self.state is 0 or 1, use to control init square_avgs and acc_deltas
        self._weight_decay = weight_decay
        self._learning_rate = learning_rate
        self._foreach = foreach
        self._maximize = maximize
        self.square_avgs = []
        self.acc_deltas = []

    @paddle.no_grad()
    @framework.dygraph_only
    def step(self):
        """Performs a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        if not isinstance(self._parameter_list[0], dict):
            params_grads = []
            for param in self._parameter_list:
                if param.stop_gradient:
                    continue
                if param._grad_ivar() is not None:
                    grad_var = param._grad_ivar()

                    params_grads.append((param, grad_var))
                    if self.state == 0:
                        self.square_avg = paddle.zeros_like(param)
                        self.acc_delta = paddle.zeros_like(param)
                        self.square_avgs.append(self.square_avg)
                        self.acc_deltas.append(self.acc_delta)

        else:
            # optimize parameters in groups
            params_grads = []
            for idx, param_group in enumerate(self._param_groups):
                for param in param_group['params']:
                    if param.stop_gradient:
                        continue
                    if param._grad_ivar() is not None:
                        grad_var = param._grad_ivar()
                        params_grads.append((param, grad_var))
                        if self.state == 0:
                            self.square_avg = paddle.zeros_like(param)
                            self.acc_delta = paddle.zeros_like(param)
                            self.square_avgs.append(self.square_avg)
                            self.acc_deltas.append(self.acc_delta)

        self.state = 1
        adadelta(
            params_grads,
            square_avgs=self.square_avgs,
            acc_deltas=self.acc_deltas,
            learning_rate=self._learning_rate,
            rho=self._rho,
            epsilon=self._epsilon,
            weight_decay=self._weight_decay,
            foreach=self._foreach,
            maximize=self._maximize)


def adadelta(params_grads,
             square_avgs,
             acc_deltas,
             foreach=None,
             *,
             learning_rate: float,
             rho: float,
             epsilon: float,
             weight_decay: float,
             maximize: bool):

    if foreach is None:
        # if foreach is None, set False
        foreach = False
    if not foreach:
        # optimizer is used
        func = _single_tensor_adadelta

    func(
        params_grads,
        square_avgs,
        acc_deltas,
        learning_rate=learning_rate,
        rho=rho,
        epsilon=epsilon,
        weight_decay=weight_decay,
        maximize=maximize)


def _single_tensor_adadelta(params_grads,
                            square_avgs,
                            acc_deltas,
                            *,
                            learning_rate: float,
                            rho: float,
                            epsilon: float,
                            weight_decay: float,
                            maximize: bool):
    """
    Calculate variables(square_avgs, acc_deltas) and update parameters.
    """

    for (params_grad, square_avg, acc_delta) in zip(params_grads, square_avgs,
                                                    acc_deltas):
        param, grad = params_grad
        grad = grad if not maximize else -grad
        if weight_decay != 0:
            grad.set_value(grad.add(paddle.multiply(param, weight_decay)))

        if paddle.is_complex(param):
            square_avg = paddle.as_real(square_avg)
            acc_delta = paddle.as_real(acc_delta)
            grad = paddle.as_real(grad)
        # square_avg = square_avg * rho + (1-rho) * grad * grad
        square_avg.set_value(
            paddle.multiply(square_avg, paddle.to_tensor(rho)).add(
                paddle.multiply(paddle.to_tensor(1 - rho), grad.square())))
        # std = (square_avg + eps).sqrt()
        std = square_avg.add(paddle.to_tensor(epsilon)).sqrt_()
        # delta = std / (acc_delta + eps).sqrt() * grad
        delta = (paddle.multiply(
            paddle.divide(
                acc_delta.add(paddle.to_tensor(epsilon)).sqrt_(), std), grad))
        # acc_delta = acc_delta * rho + (1-rho) * delta * delta
        acc_delta.set_value(
            paddle.multiply(acc_delta, paddle.to_tensor(rho)).add(
                paddle.multiply(paddle.to_tensor(1 - rho), delta.square())))
        if paddle.is_complex(param):
            delta = paddle.as_real(delta)
        # param = param - delta*learning_rate
        param.set_value(
            param.add(
                paddle.multiply(
                    delta.astype('float32'), paddle.to_tensor(-learning_rate))))


================================================
FILE: paddlespeech/s2t/training/reporter.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
import contextlib
import math
from collections import defaultdict

OBSERVATIONS = None


@contextlib.contextmanager
def ObsScope(observations):
    # make `observation` the target to report to.
    # it is basically a dictionary that stores temporary observations
    global OBSERVATIONS
    old = OBSERVATIONS
    OBSERVATIONS = observations

    try:
        yield
    finally:
        OBSERVATIONS = old


def get_observations():
    global OBSERVATIONS
    return OBSERVATIONS


def report(name, value):
    # a simple function to report named value
    # you can use it everywhere, it will get the default target and writ to it
    # you can think of it as std.out
    observations = get_observations()
    if observations is None:
        return
    else:
        observations[name] = value


class Summary():
    """Online summarization of a sequence of scalars.
    Summary computes the statistics of given scalars online.
    """

    def __init__(self):
        self._x = 0.0
        self._x2 = 0.0
        self._n = 0

    def add(self, value, weight=1):
        """Adds a scalar value.
        Args:
            value: Scalar value to accumulate. It is either a NumPy scalar or
                a zero-dimensional array (on CPU or GPU).
            weight: An optional weight for the value. It is a NumPy scalar or
                a zero-dimensional array (on CPU or GPU).
                Default is 1 (integer).
        """
        self._x += weight * value
        self._x2 += weight * value * value
        self._n += weight

    def compute_mean(self):
        """Computes the mean."""
        x, n = self._x, self._n
        return x / n

    def make_statistics(self):
        """Computes and returns the mean and standard deviation values.
        Returns:
            tuple: Mean and standard deviation values.
        """
        x, n = self._x, self._n
        mean = x / n
        var = self._x2 / n - mean * mean
        std = math.sqrt(var)
        return mean, std


class DictSummary():
    """Online summarization of a sequence of dictionaries.
    ``DictSummary`` computes the statistics of a given set of scalars online.
    It only computes the statistics for scalar values and variables of scalar
    values in the dictionaries.
    """

    def __init__(self):
        self._summaries = defaultdict(Summary)

    def add(self, d):
        """Adds a dictionary of scalars.
        Args:
            d (dict): Dictionary of scalars to accumulate. Only elements of
               scalars, zero-dimensional arrays, and variables of
               zero-dimensional arrays are accumulated. When the value
               is a tuple, the second element is interpreted as a weight.
        """
        summaries = self._summaries
        for k, v in d.items():
            w = 1
            if isinstance(v, tuple):
                v = v[0]
                w = v[1]
            summaries[k].add(v, weight=w)

    def compute_mean(self):
        """Creates a dictionary of mean values.
        It returns a single dictionary that holds a mean value for each entry
        added to the summary.
        Returns:
            dict: Dictionary of mean values.
        """
        return {
            name: summary.compute_mean()
            for name, summary in self._summaries.items()
        }

    def make_statistics(self):
        """Creates a dictionary of statistics.
        It returns a single dictionary that holds mean and standard deviation
        values for every entry added to the summary. For an entry of name
        ``'key'``, these values are added to the dictionary by names ``'key'``
        and ``'key.std'``, respectively.
        Returns:
            dict: Dictionary of statistics of all entries.
        """
        stats = {}
        for name, summary in self._summaries.items():
            mean, std = summary.make_statistics()
            stats[name] = mean
            stats[name + '.std'] = std

        return stats


================================================
FILE: paddlespeech/s2t/training/scheduler.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from typing import Any
from typing import Dict
from typing import Text
from typing import Union

import paddle
from paddle.optimizer.lr import LRScheduler
from typeguard import typechecked

from paddlespeech.s2t.utils.dynamic_import import dynamic_import
from paddlespeech.s2t.utils.dynamic_import import instance_class
from paddlespeech.s2t.utils.log import Log

__all__ = ["WarmupLR", "LRSchedulerFactory"]

logger = Log(__name__).getlog()

SCHEDULER_DICT = {
    "noam": "paddle.optimizer.lr:NoamDecay",
    "expdecaylr": "paddle.optimizer.lr:ExponentialDecay",
    "piecewisedecay": "paddle.optimizer.lr:PiecewiseDecay",
}


def register_scheduler(cls):
    """Register scheduler."""
    alias = cls.__name__.lower()
    SCHEDULER_DICT[cls.__name__.lower()] = cls.__module__ + ":" + cls.__name__
    return cls


@register_scheduler
class WarmupLR(LRScheduler):
    """The WarmupLR scheduler
    This scheduler is almost same as NoamLR Scheduler except for following
    difference:
    NoamLR:
        lr = optimizer.lr * model_size ** -0.5
             * min(step ** -0.5, step * warmup_step ** -1.5)
    WarmupLR:
        lr = optimizer.lr * warmup_step ** 0.5
             * min(step ** -0.5, step * warmup_step ** -1.5)
    Note that the maximum lr equals to optimizer.lr in this scheduler.
    """

    @typechecked
    def __init__(self,
                 warmup_steps: Union[int, float]=25000,
                 learning_rate=1.0,
                 last_epoch=-1,
                 verbose=False,
                 **kwargs):
        self.warmup_steps = warmup_steps
        super().__init__(learning_rate, last_epoch, verbose)

    def __repr__(self):
        return f"{self.__class__.__name__}(warmup_steps={self.warmup_steps}, lr={self.base_lr}, last_epoch={self.last_epoch})"

    def get_lr(self):
        # self.last_epoch start from zero
        step_num = self.last_epoch + 1
        return self.base_lr * self.warmup_steps**0.5 * min(
            step_num**-0.5, step_num * self.warmup_steps**-1.5)

    def set_step(self, step: int=None):
        '''
        It will update the learning rate in optimizer according to current ``epoch`` .
        The new learning rate will take effect on next ``optimizer.step`` .

        Args:
            step (int, None): specify current epoch. Default: None. Auto-increment from last_epoch=-1.
        Returns:
            None
        '''
        self.step(epoch=step)


@register_scheduler
class ConstantLR(LRScheduler):
    """
    Args:
        learning_rate (float): The initial learning rate. It is a python float number.
        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
        verbose (bool, optional): If ``True``, prints a message to stdout for each update. Default: ``False`` .

    Returns:
        ``ConstantLR`` instance to schedule learning rate.
    """

    def __init__(self, learning_rate, last_epoch=-1, verbose=False):
        super().__init__(learning_rate, last_epoch, verbose)

    def get_lr(self):
        return self.base_lr


@register_scheduler
class NewBobScheduler(LRScheduler):
    """Scheduler with new-bob technique, used for LR annealing.

    The learning rate is annealed based on the validation performance.
    In particular: if (past_loss-current_loss)/past_loss< impr_threshold:
    lr=lr * annealing_factor.

    Arguments
    ---------
    initial_value : float
        The initial hyperparameter value.
    annealing_factor : float
        It is annealing factor used in new_bob strategy.
    improvement_threshold : float
        It is the improvement rate between losses used to perform learning
        annealing in new_bob strategy.
    patient : int
        When the annealing condition is violated patient times,
        the learning rate is finally reduced.

    Example
    -------
    >>> scheduler = NewBobScheduler(initial_value=1.0)
    >>> scheduler(metric_value=10.0)
    (1.0, 1.0)
    >>> scheduler(metric_value=2.0)
    (1.0, 1.0)
    >>> scheduler(metric_value=2.5)
    (1.0, 0.5)
    """

    def __init__(
            self,
            learning_rate,
            last_epoch=-1,
            verbose=False,
            annealing_factor=0.5,
            improvement_threshold=0.0025,
            patient=0, ):
        self.hyperparam_value = learning_rate
        self.annealing_factor = annealing_factor
        self.improvement_threshold = improvement_threshold
        self.patient = patient
        self.metric_values = []
        self.current_patient = self.patient
        super().__init__(learning_rate, last_epoch, verbose)

    def step(self, metric_value=None):
        """

        ``step`` should be called after ``optimizer.step`` . It will update the learning rate in optimizer according to current ``epoch`` .
        The new learning rate will take effect on next ``optimizer.step`` .

        Args:
            epoch (int, None): specify current epoch. Default: None. Auto-increment from last_epoch=-1.

        Returns:
            None
        """
        if metric_value is None:
            self.last_epoch += 1
            self.last_lr = self.hyperparam_value
        else:
            self.last_epoch += 1
            self.last_lr = self.get_lr(metric_value)

        if self.verbose:
            print('Epoch {}: {} set learning rate to {}.'.format(
                self.last_epoch, self.__class__.__name__, self.last_lr))

    def get_lr(self, metric_value):
        """Returns the current and new value for the hyperparameter.

        Arguments
        ---------
        metric_value : int
            A number for determining whether to change the hyperparameter value.
        """
        new_value = self.hyperparam_value
        if len(self.metric_values) > 0:
            prev_metric = self.metric_values[-1]
            # Update value if improvement too small and patience is 0
            if prev_metric == 0:  # Prevent division by zero
                improvement = 0
            else:
                improvement = (prev_metric - metric_value) / prev_metric
            if improvement < self.improvement_threshold:
                if self.current_patient == 0:
                    new_value *= self.annealing_factor
                    self.current_patient = self.patient
                else:
                    self.current_patient -= 1

        # Store relevant info
        self.metric_values.append(metric_value)
        self.hyperparam_value = new_value

        return new_value

    def save(self):
        """Saves the current metrics on the specified path."""
        data = {
            "current_epoch_index": self.last_epoch,
            "hyperparam_value": self.hyperparam_value,
            "metric_values": self.metric_values,
            "current_patient": self.current_patient
        }
        return data

    def load(self, data):
        """Loads the needed information."""
        self.last_epoch = data["current_epoch_index"]
        self.hyperparam_value = data["hyperparam_value"]
        self.metric_values = data["metric_values"]
        self.current_patient = data["current_patient"]


def dynamic_import_scheduler(module):
    """Import Scheduler class dynamically.

    Args:
        module (str): module_name:class_name or alias in `SCHEDULER_DICT`

    Returns:
        type: Scheduler class

    """
    module_class = dynamic_import(module, SCHEDULER_DICT)
    assert issubclass(module_class,
                      LRScheduler), f"{module} does not implement LRScheduler"
    return module_class


class LRSchedulerFactory():
    @classmethod
    def from_args(cls, name: str, args: Dict[Text, Any]):
        module_class = dynamic_import_scheduler(name.lower())
        return instance_class(module_class, args)


================================================
FILE: paddlespeech/s2t/training/timer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import time

from paddlespeech.s2t.utils.log import Log

__all__ = ["Timer"]

logger = Log(__name__).getlog()


class Timer():
    """To be used like this: 
        with Timer("Message") as value:
            do some thing
    """

    def __init__(self, message=None):
        self.message = message

    def duration(self) -> str:
        elapsed_time = time.time() - self.start
        time_str = str(datetime.timedelta(seconds=elapsed_time))
        return time_str

    def __enter__(self):
        self.start = time.time()
        return self

    def __exit__(self, type, value, traceback):
        if self.message:
            logger.info(self.message.format(self.duration()))

    def __call__(self) -> float:
        return time.time() - self.start

    def __str__(self):
        return self.duration()


================================================
FILE: paddlespeech/s2t/training/trainer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import time
from collections import OrderedDict
from contextlib import contextmanager
from pathlib import Path

import paddle
from paddle import distributed as dist
world_size = dist.get_world_size()
if world_size > 1:
    dist.init_parallel_env()

from visualdl import LogWriter

from paddlespeech.s2t.training.reporter import ObsScope
from paddlespeech.s2t.training.reporter import report
from paddlespeech.s2t.training.timer import Timer
from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils import profiler
from paddlespeech.s2t.utils.checkpoint import Checkpoint
from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.utility import all_version
from paddlespeech.s2t.utils.utility import seed_all
from paddlespeech.s2t.utils.utility import UpdateConfig

__all__ = ["Trainer"]

logger = Log(__name__).getlog()


class Trainer():
    """
    An experiment template in order to structure the training code and take
    care of saving, loading, logging, visualization stuffs. It's intended to
    be flexible and simple.

    So it only handles output directory (create directory for the output,
    create a checkpoint directory, dump the config in use and create
    visualizer and logger) in a standard way without enforcing any
    input-output protocols to the model and dataloader. It leaves the main
    part for the user to implement their own (setup the model, criterion,
    optimizer, define a training step, define a validation function and
    customize all the text and visual logs).
    It does not save too much boilerplate code. The users still have to write
    the forward/backward/update mannually, but they are free to add
    non-standard behaviors if needed.
    We have some conventions to follow.
    1. Experiment should have ``model``, ``optimizer``, ``train_loader`` and
    ``valid_loader``, ``config`` and ``args`` attributes.
    2. The config should have a ``training`` field, which has
    ``valid_interval``, ``save_interval`` and ``max_iteration`` keys. It is
    used as the trigger to invoke validation, checkpointing and stop of the
    experiment.
    3. There are four methods, namely ``train_batch``, ``valid``,
    ``setup_model`` and ``setup_dataloader`` that should be implemented.
    Feel free to add/overwrite other methods and standalone functions if you
    need.

    Parameters
    ----------
    config: yacs.config.CfgNode
        The configuration used for the experiment.

    args: argparse.Namespace
        The parsed command line arguments.
    Examples
    --------
    >>> def main_sp(config, args):
    >>>     exp = Trainer(config, args)
    >>>     exp.setup()
    >>>     exp.run()
    >>>
    >>> config = get_cfg_defaults()
    >>> parser = default_argument_parser()
    >>> args = parser.parse_args()
    >>> if args.config:
    >>>     config.merge_from_file(args.config)
    >>> if args.opts:
    >>>     config.merge_from_list(args.opts)
    >>> config.freeze()
    >>>
    >>> if args.ngpu > 1:
    >>>     dist.spawn(main_sp, args=(config, args), nprocs=args.ngpu)
    >>> else:
    >>>     main_sp(config, args)
    """

    def __init__(self, config, args):
        self.config = config
        self.args = args
        self.optimizer = None
        self.visualizer = None
        self.output_dir = None
        self.checkpoint_dir = None
        self.iteration = 0
        self.epoch = 0
        self.rank = dist.get_rank()
        self.world_size = dist.get_world_size()
        self._train = True
        self.scaler = None

        # print deps version
        all_version()
        logger.info(f"Rank: {self.rank}/{self.world_size}")

        # set device
        if self.args.ngpu == 0:
            if self.args.nxpu == 0:
                paddle.set_device('cpu')
            else:
                paddle.set_device('xpu')
        elif self.args.ngpu > 0:
            paddle.set_device("gpu")
        else:
            raise Exception("invalid device")

        self.checkpoint = Checkpoint(
            kbest_n=self.config.checkpoint.kbest_n,
            latest_n=self.config.checkpoint.latest_n)

        # set random seed if needed
        if args.seed:
            seed_all(args.seed)
            logger.info(f"Set seed {args.seed}")

        # profiler and benchmark options
        if hasattr(self.args,
                   "benchmark_batch_size") and self.args.benchmark_batch_size:
            with UpdateConfig(self.config):
                self.config.batch_size = self.args.benchmark_batch_size
                self.config.log_interval = 1
            logger.info(
                f"Benchmark reset batch-size: {self.args.benchmark_batch_size}")

    @property
    def train(self):
        return self._train

    @contextmanager
    def eval(self):
        self._train = False
        yield
        self._train = True

    def setup(self):
        """Setup the experiment.
        """
        self.setup_output_dir()
        self.dump_config()
        self.setup_visualizer()

        self.setup_dataloader()
        self.setup_model()

        self.iteration = 0
        self.epoch = 0

    @property
    def parallel(self):
        """A flag indicating whether the experiment should run with
        multiprocessing.
        """
        return self.args.ngpu > 1

    @mp_tools.rank_zero_only
    def save(self, tag=None, infos: dict=None):
        """Save checkpoint (model parameters and optimizer states).

        Args:
            tag (int or str, optional): None for step, else using tag, e.g epoch. Defaults to None.
            infos (dict, optional): meta data to save. Defaults to None.
        """

        infos = infos if infos else dict()
        infos.update({
            "step": self.iteration,
            "epoch": self.epoch,
            "lr": self.optimizer.get_lr(),
        })
        if self.scaler:
            scaler_path = os.path.join(self.checkpoint_dir,
                                       "{}".format(self.epoch)) + '.scaler'
            paddle.save(self.scaler.state_dict(), scaler_path)

        self.checkpoint.save_parameters(self.checkpoint_dir, self.iteration
                                        if tag is None else tag, self.model,
                                        self.optimizer, infos)

    def resume_or_scratch(self):
        """Resume from latest checkpoint at checkpoints in the output
        directory or load a specified checkpoint.

        If ``args.checkpoint_path`` is not None, load the checkpoint, else
        resume training.
        """
        scratch = None
        infos = self.checkpoint.load_latest_parameters(
            self.model,
            self.optimizer,
            checkpoint_dir=self.checkpoint_dir,
            checkpoint_path=self.args.checkpoint_path)
        if infos:
            # just restore ckpt
            # lr will restore from optimizer ckpt
            self.iteration = infos["step"]
            self.epoch = infos["epoch"]

            scaler_path = os.path.join(self.checkpoint_dir,
                                       "{}".format(self.epoch)) + '.scaler'
            if os.path.exists(scaler_path):
                scaler_state_dict = paddle.load(scaler_path)
                self.scaler.load_state_dict(scaler_state_dict)

            scratch = False
            logger.info(
                f"Restore ckpt: epoch {self.epoch }, step {self.iteration}!")
        else:
            self.iteration = 0
            self.epoch = 0
            scratch = True
            logger.info("Init from scratch!")
        return scratch

    def maybe_batch_sampler_step(self):
        """ batch_sampler seed by epoch """
        if hasattr(self.train_loader, "batch_sampler"):
            batch_sampler = self.train_loader.batch_sampler
            if isinstance(batch_sampler, paddle.io.DistributedBatchSampler):
                logger.debug(
                    f"train_loader.batch_sample.set_epoch: {self.epoch}")
                batch_sampler.set_epoch(self.epoch)

    def before_train(self):
        from_scratch = self.resume_or_scratch()
        if from_scratch:
            # scratch: save init model, i.e. 0 epoch
            self.save(tag='init', infos=None)
        else:
            # resume: train next_epoch and next_iteration
            self.epoch += 1
            self.iteration += 1
            logger.info(
                f"Resume train: epoch {self.epoch }, step {self.iteration}!")

        self.maybe_batch_sampler_step()

    def new_epoch(self):
        """Reset the train loader seed and increment `epoch`.
        """
        # `iteration` increased by train step
        self.epoch += 1
        self.maybe_batch_sampler_step()

    def after_train_batch(self):
        if self.args.benchmark_max_step:
            profiler.add_profiler_step(self.args.profiler_options)
        if self.args.benchmark_max_step and self.iteration > self.args.benchmark_max_step:
            logger.info(
                f"Reach benchmark-max-step: {self.args.benchmark_max_step}")
            sys.exit(0)

    def do_train(self):
        """The training process control by epoch."""
        self.before_train()

        logger.info(f"Train Total Examples: {len(self.train_loader.dataset)}")
        while self.epoch < self.config.n_epoch:
            with Timer("Epoch-Train Time Cost: {}"):
                self.model.train()
                try:
                    data_start_time = time.time()
                    for batch_index, batch in enumerate(self.train_loader):
                        dataload_time = time.time() - data_start_time
                        msg = "Train:"
                        observation = OrderedDict()
                        with ObsScope(observation):
                            report("Rank", dist.get_rank())
                            report("epoch", self.epoch)
                            report('step', self.iteration)
                            report("lr", self.lr_scheduler())
                            self.train_batch(batch_index, batch, msg)
                            self.after_train_batch()
                            report('iter', batch_index + 1)
                            report('total', len(self.train_loader))
                            report('reader_cost', dataload_time)
                        observation['batch_cost'] = observation[
                            'reader_cost'] + observation['step_cost']
                        observation['samples'] = observation['batch_size']
                        observation['ips samples/s'] = observation[
                            'batch_size'] / observation['batch_cost']
                        for k, v in observation.items():
                            msg += f" {k}: "
                            msg += f"{v:>.8f}" if isinstance(v,
                                                             float) else f"{v}"
                            msg += ","
                        msg = msg[:-1]  # remove the last ","
                        if (batch_index + 1) % self.config.log_interval == 0:
                            logger.info(msg)
                        data_start_time = time.time()
                except Exception as e:
                    logger.error(e)
                    raise e

            with Timer("Eval Time Cost: {}"):
                total_loss, num_seen_utts = self.valid()
                if dist.get_world_size() > 1:
                    num_seen_utts = paddle.to_tensor(num_seen_utts)
                    # the default operator in all_reduce function is sum.
                    dist.all_reduce(num_seen_utts)
                    total_loss = paddle.to_tensor(total_loss)
                    dist.all_reduce(total_loss)
                    cv_loss = total_loss / num_seen_utts
                    cv_loss = float(cv_loss)
                else:
                    cv_loss = total_loss / num_seen_utts

            logger.info(
                'Epoch {} Val info val_loss {}'.format(self.epoch, cv_loss))
            if self.visualizer:
                self.visualizer.add_scalar(
                    tag='eval/cv_loss', value=cv_loss, step=self.epoch)
                self.visualizer.add_scalar(
                    tag='eval/lr', value=self.lr_scheduler(), step=self.epoch)

            # step lr every epoch
            self.lr_scheduler.step()
            # after epoch
            self.save(tag=self.epoch, infos={'val_loss': cv_loss})
            self.new_epoch()

    def run(self):
        """The routine of the experiment after setup. This method is intended
        to be used by the user.
        """
        try:
            with Timer("Training Done: {}"):
                self.do_train()
        except KeyboardInterrupt:
            exit(-1)
        finally:
            self.destory()

    def restore(self):
        """Resume from latest checkpoint at checkpoints in the output
        directory or load a specified checkpoint.

        If ``args.checkpoint_path`` is not None, load the checkpoint, else
        resume training.
        """
        assert self.args.checkpoint_path
        infos = self.checkpoint.load_latest_parameters(
            self.model, checkpoint_path=self.args.checkpoint_path)
        return infos

    def run_test(self):
        """Do Test/Decode"""
        try:
            with Timer("Test/Decode Done: {}"):
                with self.eval():
                    self.restore()
                    self.test()
        except KeyboardInterrupt:
            exit(-1)

    def run_export(self):
        """Do Model Export"""
        try:
            with Timer("Export Done: {}"):
                with self.eval():
                    self.restore()
                    self.export()
        except KeyboardInterrupt:
            exit(-1)

    def run_align(self):
        """Do CTC alignment"""
        try:
            with Timer("Align Done: {}"):
                with self.eval():
                    self.restore()
                    self.align()
        except KeyboardInterrupt:
            sys.exit(-1)

    def setup_output_dir(self):
        """Create a directory used for output.
        """
        if self.args.output:
            output_dir = Path(self.args.output).expanduser()
        elif self.args.checkpoint_path:
            output_dir = Path(
                self.args.checkpoint_path).expanduser().parent.parent
        elif self.args.export_path:
            output_dir = Path(self.args.export_path).expanduser().parent.parent
        self.output_dir = output_dir
        self.output_dir.mkdir(parents=True, exist_ok=True)

        self.checkpoint_dir = self.output_dir / "checkpoints"
        self.checkpoint_dir.mkdir(parents=True, exist_ok=True)

        self.log_dir = output_dir / "log"
        self.log_dir.mkdir(parents=True, exist_ok=True)

        self.test_dir = output_dir / "test"
        self.test_dir.mkdir(parents=True, exist_ok=True)

        self.decode_dir = output_dir / "decode"
        self.decode_dir.mkdir(parents=True, exist_ok=True)

        self.export_dir = output_dir / "export"
        self.export_dir.mkdir(parents=True, exist_ok=True)

        self.visual_dir = output_dir / "visual"
        self.visual_dir.mkdir(parents=True, exist_ok=True)

        self.config_dir = output_dir / "conf"
        self.config_dir.mkdir(parents=True, exist_ok=True)

    @mp_tools.rank_zero_only
    def destory(self):
        """Close visualizer to avoid hanging after training"""
        # https://github.com/pytorch/fairseq/issues/2357
        if self.visualizer:
            self.visualizer.close()

    @mp_tools.rank_zero_only
    def setup_visualizer(self):
        """Initialize a visualizer to log the experiment.

        The visual log is saved in the output directory.

        Notes
        ------
        Only the main process has a visualizer with it. Use multiple
        visualizers in multiprocess to write to a same log file may cause
        unexpected behaviors.
        """
        # visualizer
        visualizer = LogWriter(logdir=str(self.visual_dir))
        self.visualizer = visualizer

    @mp_tools.rank_zero_only
    def dump_config(self):
        """Save the configuration used for this experiment.

        It is saved in to ``config.yaml`` in the output directory at the
        beginning of the experiment.
        """
        config_file = self.config_dir / "config.yaml"
        if self.train and config_file.exists():
            time_stamp = time.strftime("%Y_%m_%d_%H_%M_%s", time.gmtime())
            target_path = self.config_dir / ".".join(
                [time_stamp, "config.yaml"])
            config_file.rename(target_path)

        with open(config_file, 'wt') as f:
            print(self.config, file=f)

    def train_batch(self):
        """The training loop. A subclass should implement this method.
        """
        raise NotImplementedError("train_batch should be implemented.")

    @paddle.no_grad()
    def valid(self):
        """The validation. A subclass should implement this method.
        """
        raise NotImplementedError("valid should be implemented.")

    @paddle.no_grad()
    def test(self):
        """The test. A subclass should implement this method in Tester.
        """
        raise NotImplementedError("test should be implemented.")

    @paddle.no_grad()
    def export(self):
        """The test. A subclass should implement this method in Tester.
        """
        raise NotImplementedError("export should be implemented.")

    @paddle.no_grad()
    def align(self):
        """The align. A subclass should implement this method in Tester.
        """
        raise NotImplementedError("align should be implemented.")

    def setup_model(self):
        """Setup model, criterion and optimizer, etc. A subclass should
        implement this method.
        """
        raise NotImplementedError("setup_model should be implemented.")

    def setup_dataloader(self):
        """Setup training dataloader and validation dataloader. A subclass
        should implement this method.
        """
        raise NotImplementedError("setup_dataloader should be implemented.")


================================================
FILE: paddlespeech/s2t/training/triggers/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/training/triggers/compare_value_trigger.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
from ..reporter import DictSummary
from .utils import get_trigger


class CompareValueTrigger():
    """Trigger invoked when key value getting bigger or lower than before.

    Args:
        key (str) : Key of value.
        compare_fn ((float, float) -> bool) : Function to compare the values.
        trigger (tuple(int, str)) : Trigger that decide the comparison interval.

    """

    def __init__(self, key, compare_fn, trigger=(1, "epoch")):
        self._key = key
        self._best_value = None
        self._interval_trigger = get_trigger(trigger)
        self._init_summary()
        self._compare_fn = compare_fn

    def __call__(self, trainer):
        """Get value related to the key and compare with current value."""
        observation = trainer.observation
        summary = self._summary
        key = self._key
        if key in observation:
            summary.add({key: observation[key]})

        if not self._interval_trigger(trainer):
            return False

        stats = summary.compute_mean()
        value = float(stats[key])  # copy to CPU
        self._init_summary()

        if self._best_value is None:
            # initialize best value
            self._best_value = value
            return False
        elif self._compare_fn(self._best_value, value):
            return True
        else:
            self._best_value = value
            return False

    def _init_summary(self):
        self._summary = DictSummary()


================================================
FILE: paddlespeech/s2t/training/triggers/interval_trigger.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Reference chainer MIT (https://opensource.org/licenses/MIT)


class IntervalTrigger():
    """A Predicate to do something every N cycle."""

    def __init__(self, period: int, unit: str):
        if unit not in ("iteration", "epoch"):
            raise ValueError("unit should be 'iteration' or 'epoch'")
        if period <= 0:
            raise ValueError("period should be a positive integer.")
        self.period = period
        self.unit = unit
        self.last_index = None

    def __call__(self, trainer):
        if self.last_index is None:
            last_index = getattr(trainer.updater.state, self.unit)
            self.last_index = last_index

        last_index = self.last_index
        index = getattr(trainer.updater.state, self.unit)
        fire = index // self.period != last_index // self.period

        self.last_index = index
        return fire


================================================
FILE: paddlespeech/s2t/training/triggers/limit_trigger.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Reference chainer MIT (https://opensource.org/licenses/MIT)


class LimitTrigger():
    """A Predicate to decide whether to stop."""

    def __init__(self, limit: int, unit: str):
        if unit not in ("iteration", "epoch"):
            raise ValueError("unit should be 'iteration' or 'epoch'")
        if limit <= 0:
            raise ValueError("limit should be a positive integer.")
        self.limit = limit
        self.unit = unit

    def __call__(self, trainer):
        state = trainer.updater.state
        index = getattr(state, self.unit)
        fire = index >= self.limit
        return fire


================================================
FILE: paddlespeech/s2t/training/triggers/time_trigger.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Reference chainer MIT (https://opensource.org/licenses/MIT)


class TimeTrigger():
    """Trigger based on a fixed time interval.
    This trigger accepts iterations with a given interval time.
    Args:
        period (float): Interval time. It is given in seconds.
    """

    def __init__(self, period):
        self._period = period
        self._next_time = self._period

    def __call__(self, trainer):
        if self._next_time < trainer.elapsed_time:
            self._next_time += self._period
            return True
        else:
            return False

    def state_dict(self):
        state_dict = {
            "next_time": self._next_time,
        }
        return state_dict

    def set_state_dict(self, state_dict):
        self._next_time = state_dict['next_time']


================================================
FILE: paddlespeech/s2t/training/triggers/utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .interval_trigger import IntervalTrigger


def never_fail_trigger(trainer):
    return False


def get_trigger(trigger):
    if trigger is None:
        return never_fail_trigger
    if callable(trigger):
        return trigger
    else:
        trigger = IntervalTrigger(*trigger)
        return trigger


================================================
FILE: paddlespeech/s2t/training/updaters/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/training/updaters/standard_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
from typing import Dict
from typing import Optional

import paddle
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from paddle.optimizer.lr import LRScheduler

from paddlespeech.s2t.training.reporter import report
from paddlespeech.s2t.training.updaters.updater import UpdaterBase
from paddlespeech.s2t.training.updaters.updater import UpdaterState
from paddlespeech.s2t.utils.log import Log

__all__ = ["StandardUpdater"]

logger = Log(__name__).getlog()


class StandardUpdater(UpdaterBase):
    """An example of over-simplification. Things may not be that simple, but
    you can subclass it to fit your need.
    """

    def __init__(self,
                 model: Layer,
                 optimizer: Optimizer,
                 scheduler: LRScheduler,
                 dataloader: DataLoader,
                 init_state: Optional[UpdaterState]=None):
        super().__init__(init_state)
        # it is designed to hold multiple models
        models = {"main": model}
        self.models: Dict[str, Layer] = models
        self.model = model

        # it is designed to hold multiple optimizers
        optimizers = {"main": optimizer}
        self.optimizer = optimizer
        self.optimizers: Dict[str, Optimizer] = optimizers

        # it is designed to hold multiple scheduler
        schedulers = {"main": scheduler}
        self.scheduler = scheduler
        self.schedulers: Dict[str, LRScheduler] = schedulers

        # dataloaders
        self.dataloader = dataloader

        self.train_iterator = iter(dataloader)

    def update(self):
        # We increase the iteration index after updating and before extension.
        # Here are the reasons.

        # 0. Snapshotting(as well as other extensions, like visualizer) is
        #    executed after a step of updating;
        # 1. We decide to increase the iteration index after updating and
        #    before any all extension is executed. 
        # 3. We do not increase the iteration after extension because we
        #    prefer a consistent resume behavior, when load from a
        #    `snapshot_iter_100.pdz` then the next step to train is `101`,
        #    naturally. But if iteration is increased increased after
        #    extension(including snapshot), then, a `snapshot_iter_99` is
        #    loaded. You would need a extra increasing of the iteration idex
        #    before training to avoid another iteration `99`, which has been
        #    done before snapshotting.
        # 4. Thus iteration index represrnts "currently how mant epochs has
        #    been done."
        # NOTE: use report to capture the correctly value. If you want to
        # report the learning rate used for a step, you must report it before
        # the learning rate scheduler's step() has been called. In paddle's
        # convention, we do not use an extension to change the learning rate.
        # so if you want to report it, do it in the updater.

        # Then here comes the next question. When is the proper time to
        # increase the epoch index? Since all extensions are executed after
        # updating, it is the time that after updating is the proper time to
        # increase epoch index.
        # 1. If we increase the epoch index before updating, then an extension
        #    based ot epoch would miss the correct timing. It could only be
        #    triggerd after an extra updating.
        # 2. Theoretically, when an epoch is done, the epoch index should be
        #    increased. So it would be increase after updating.
        # 3. Thus, eppoch index represents "currently how many epochs has been
        #    done." So it starts from 0.

        # switch to training mode
        for model in self.models.values():
            model.train()

        # training for a step is implemented here
        with Timier("data time cost:{}"):
            batch = self.read_batch()
        with Timier("step time cost:{}"):
            self.update_core(batch)

        self.state.iteration += 1
        if self.updates_per_epoch is not None:
            if self.state.iteration % self.updates_per_epoch == 0:
                self.state.epoch += 1

    def update_core(self, batch):
        """A simple case for a training step. Basic assumptions are:
        Single model;
        Single optimizer;
        Single scheduler, and update learning rate each step;
        A batch from the dataloader is just the input of the model;
        The model return a single loss, or a dict containing serval losses.
        Parameters updates at every batch, no gradient accumulation.
        """
        loss = self.model(*batch)

        if isinstance(loss, paddle.Tensor):
            loss_dict = {"main": loss}
        else:
            # Dict[str, Tensor]
            loss_dict = loss
            if "main" not in loss_dict:
                main_loss = 0
                for loss_item in loss.values():
                    main_loss += loss_item
                loss_dict["main"] = main_loss

        for name, loss_item in loss_dict.items():
            report(name, float(loss_item))

        self.optimizer.clear_grad()
        loss_dict["main"].backward()
        self.optimizer.step()
        self.scheduler.step()

    @property
    def updates_per_epoch(self):
        """Number of steps per epoch, 
        determined by the length of the dataloader."""
        length_of_dataloader = None
        try:
            length_of_dataloader = len(self.dataloader)
        except TypeError:
            logger.debug("This dataloader has no __len__.")
        finally:
            return length_of_dataloader

    def new_epoch(self):
        """Start a new epoch."""
        # NOTE: all batch sampler for distributed training should
        # subclass DistributedBatchSampler and implement `set_epoch` method
        if hasattr(self.dataloader, "batch_sampler"):
            batch_sampler = self.dataloader.batch_sampler
            if isinstance(batch_sampler, DistributedBatchSampler):
                batch_sampler.set_epoch(self.state.epoch)
        self.train_iterator = iter(self.dataloader)

    def read_batch(self):
        """Read a batch from the data loader, auto renew when data is exhausted."""
        try:
            batch = next(self.train_iterator)
        except StopIteration:
            self.new_epoch()
            batch = next(self.train_iterator)
        return batch

    def state_dict(self):
        """State dict of a Updater, model, optimizers/schedulers 
        and updater state are included."""
        state_dict = super().state_dict()
        for name, model in self.models.items():
            state_dict[f"{name}_params"] = model.state_dict()
        for name, optim in self.optimizers.items():
            state_dict[f"{name}_optimizer"] = optim.state_dict()
        return state_dict

    def set_state_dict(self, state_dict):
        """Set state dict for a Updater. Parameters of models, states for
        optimizers/schedulers and UpdaterState are restored."""
        for name, model in self.models.items():
            model.set_state_dict(state_dict[f"{name}_params"])
        for name, optim in self.optimizers.items():
            optim.set_state_dict(state_dict[f"{name}_optimizer"])
        super().set_state_dict(state_dict)


================================================
FILE: paddlespeech/s2t/training/updaters/trainer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
import sys
import traceback
from collections import OrderedDict
from pathlib import Path
from typing import Callable
from typing import List
from typing import Union

import six
import tqdm

from paddlespeech.s2t.training.extensions.extension import Extension
from paddlespeech.s2t.training.extensions.extension import PRIORITY_READER
from paddlespeech.s2t.training.reporter import ObsScope
from paddlespeech.s2t.training.triggers import get_trigger
from paddlespeech.s2t.training.triggers.limit_trigger import LimitTrigger
from paddlespeech.s2t.training.updaters.updater import UpdaterBase


class _ExtensionEntry():
    def __init__(self, extension, trigger, priority):
        self.extension = extension
        self.trigger = trigger
        self.priority = priority


class Trainer():
    def __init__(self,
                 updater: UpdaterBase,
                 stop_trigger: Callable=None,
                 out: Union[str, Path]='result',
                 extensions: List[Extension]=None):
        self.updater = updater
        self.extensions = OrderedDict()
        self.stop_trigger = LimitTrigger(*stop_trigger)
        self.out = Path(out)
        self.observation = None

        self._done = False
        if extensions:
            for ext in extensions:
                self.extend(ext)

    @property
    def is_before_training(self):
        return self.updater.state.iteration == 0

    def extend(self, extension, name=None, trigger=None, priority=None):
        # get name for the extension
        # argument \
        # -> extention's name \
        # -> default_name (class name, when it is an object) \
        # -> function name when it is a function \
        # -> error

        if name is None:
            name = getattr(extension, 'name', None)
            if name is None:
                name = getattr(extension, 'default_name', None)
                if name is None:
                    name = getattr(extension, '__name__', None)
                    if name is None:
                        raise ValueError("Name is not given for the extension.")
        if name == 'training':
            raise ValueError("training is a reserved name.")

        if trigger is None:
            trigger = getattr(extension, 'trigger', (1, 'iteration'))
        trigger = get_trigger(trigger)

        if priority is None:
            priority = getattr(extension, 'priority', PRIORITY_READER)

        # add suffix to avoid nameing conflict
        ordinal = 0
        modified_name = name
        while modified_name in self.extensions:
            ordinal += 1
            modified_name = f"{name}_{ordinal}"
        extension.name = modified_name

        self.extensions[modified_name] = _ExtensionEntry(extension, trigger,
                                                         priority)

    def get_extension(self, name):
        """get extension by name."""
        extensions = self.extensions
        if name in extensions:
            return extensions[name].extension
        else:
            raise ValueError(f'extension {name} not found')

    def run(self):
        if self._done:
            raise RuntimeError("Training is already done!.")

        self.out.mkdir(parents=True, exist_ok=True)

        # sort extensions by priorities once
        extension_order = sorted(
            self.extensions.keys(),
            key=lambda name: self.extensions[name].priority,
            reverse=True)
        extensions = [(name, self.extensions[name]) for name in extension_order]

        # initializing all extensions
        for name, entry in extensions:
            if hasattr(entry.extension, "initialize"):
                entry.extension.initialize(self)

        update = self.updater.update  # training step
        stop_trigger = self.stop_trigger

        # display only one progress bar
        max_iteration = None
        if isinstance(stop_trigger, LimitTrigger):
            if stop_trigger.unit == 'epoch':
                max_epoch = self.stop_trigger.limit
                updates_per_epoch = getattr(self.updater, "updates_per_epoch",
                                            None)
                max_iteration = max_epoch * updates_per_epoch if updates_per_epoch else None
            else:
                max_iteration = self.stop_trigger.limit

        p = tqdm.tqdm(initial=self.updater.state.iteration, total=max_iteration)

        try:
            while not stop_trigger(self):
                self.observation = {}
                # set observation as the `report` target
                # you can use `report` freely in Updater.update()

                # updating parameters and state
                with ObsScope(self.observation):
                    update()
                    p.update()

                    # execute extension when necessary
                    for name, entry in extensions:
                        if entry.trigger(self):
                            entry.extension(self)

                # print("###", self.observation)
        except Exception as e:
            f = sys.stderr
            f.write(f"Exception in main training loop: {e}\n")
            f.write("Traceback (most recent call last):\n")
            traceback.print_tb(sys.exc_info()[2])
            f.write(
                "Trainer extensions will try to handle the extension. Then all extensions will finalize."
            )

            # capture the exception in the mian training loop
            exc_info = sys.exc_info()

            # try to handle it
            for name, entry in extensions:
                if hasattr(entry.extension, "on_error"):
                    try:
                        entry.extension.on_error(self, e, sys.exc_info()[2])
                    except Exception as ee:
                        f.write(f"Exception in error handler: {ee}\n")
                        f.write('Traceback (most recent call last):\n')
                        traceback.print_tb(sys.exc_info()[2])

            # raise exception in main training loop
            six.reraise(*exc_info)
        finally:
            for name, entry in extensions:
                if hasattr(entry.extension, "finalize"):
                    entry.extension.finalize(self)


================================================
FILE: paddlespeech/s2t/training/updaters/updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
from dataclasses import dataclass

import paddle

from paddlespeech.s2t.utils.log import Log

__all__ = ["UpdaterBase", "UpdaterState"]

logger = Log(__name__).getlog()


@dataclass
class UpdaterState:
    iteration: int = 0
    epoch: int = 0


class UpdaterBase():
    """An updater is the abstraction of how a model is trained given the
    dataloader and the optimizer.
    The `update_core` method is a step in the training loop with only necessary
    operations (get a batch, forward and backward, update the parameters).
    Other stuffs are made extensions. Visualization, saving, loading and
    periodical validation and evaluation are not considered here.
    But even in such simplist case, things are not that simple. There is an
    attempt to standardize this process and requires only the model and
    dataset and do all the stuffs automatically. But this may hurt flexibility.
    If we assume a batch yield from the dataloader is just the input to the
    model, we will find that some model requires more arguments, or just some
    keyword arguments. But this prevents us from over-simplifying it.
    From another perspective, the batch may includes not just the input, but
    also the target. But the model's forward method may just need the input.
    We can pass a dict or a super-long tuple to the model and let it pick what
    it really needs. But this is an abuse of lazy interface.
    After all, we care about how a model is trained. But just how the model is
    used for inference. We want to control how a model is trained. We just
    don't want to be messed up with other auxiliary code.
    So the best practice is to define a model and define a updater for it.
    """

    def __init__(self, init_state=None):
        # init state
        if init_state is None:
            self.state = UpdaterState()
        else:
            self.state = init_state

    def update(self, batch):
        raise NotImplementedError(
            "Implement your own `update` method for training a step.")

    def state_dict(self):
        state_dict = {
            "epoch": self.state.epoch,
            "iteration": self.state.iteration,
        }
        return state_dict

    def set_state_dict(self, state_dict):
        self.state.epoch = state_dict["epoch"]
        self.state.iteration = state_dict["iteration"]

    def save(self, path):
        logger.debug(f"Saving to {path}.")
        archive = self.state_dict()
        paddle.save(archive, str(path))

    def load(self, path):
        logger.debug(f"Loading from {path}.")
        archive = paddle.load(str(path))
        self.set_state_dict(archive)


================================================
FILE: paddlespeech/s2t/utils/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/utils/asr_utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Reference espnet Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
import json

import numpy as np

__all__ = ["label_smoothing_dist"]


def label_smoothing_dist(odim, lsm_type, transcript=None, blank=0):
    """Obtain label distribution for loss smoothing.

    :param odim:
    :param lsm_type:
    :param blank:
    :param transcript:
    :return:
    """
    if transcript is not None:
        with open(transcript, "rb") as f:
            trans_json = json.load(f)["utts"]

    if lsm_type == "unigram":
        assert transcript is not None, (
            "transcript is required for %s label smoothing" % lsm_type)
        labelcount = np.zeros(odim)
        for k, v in trans_json.items():
            ids = np.array([int(n) for n in v["output"][0]["tokenid"].split()])
            # to avoid an error when there is no text in an uttrance
            if len(ids) > 0:
                labelcount[ids] += 1
        labelcount[odim - 1] = len(transcript)  # count <eos>
        labelcount[labelcount == 0] = 1  # flooring
        labelcount[blank] = 0  # remove counts for blank
        labeldist = labelcount.astype(np.float32) / np.sum(labelcount)
    else:
        logging.error("Error: unexpected label smoothing type: %s" % lsm_type)
        sys.exit()

    return labeldist


================================================
FILE: paddlespeech/s2t/utils/bleu_score.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""This module provides functions to calculate bleu score in different level.
e.g. wer for word-level, cer for char-level.
"""
import numpy as np
import sacrebleu

__all__ = ['bleu', 'char_bleu', "ErrorCalculator"]


def bleu(hypothesis, reference):
    """Calculate BLEU. BLEU compares reference text and
    hypothesis text in word-level using scarebleu.

    :param reference: The reference sentences.
    :type reference: list[list[str]]
    :param hypothesis: The hypothesis sentence.
    :type hypothesis: list[str]
    :raises ValueError: If the reference length is zero.
    """

    return sacrebleu.corpus_bleu(hypothesis, reference)


def char_bleu(hypothesis, reference):
    """Calculate BLEU. BLEU compares reference text and
    hypothesis text in char-level using scarebleu.

    :param reference: The reference sentences.
    :type reference: list[list[str]]
    :param hypothesis: The hypothesis sentence.
    :type hypothesis: list[str]
    :raises ValueError: If the reference number is zero.
    """
    hypothesis = [' '.join(list(hyp.replace(' ', ''))) for hyp in hypothesis]
    reference = [[' '.join(list(ref_i.replace(' ', ''))) for ref_i in ref]
                 for ref in reference]

    return sacrebleu.corpus_bleu(hypothesis, reference)


class ErrorCalculator():
    """Calculate BLEU for ST and MT models during training.

    :param y_hats: numpy array with predicted text
    :param y_pads: numpy array with true (target) text
    :param char_list: vocabulary list
    :param sym_space: space symbol
    :param sym_pad: pad symbol
    :param report_bleu: report BLUE score if True
    """

    def __init__(self, char_list, sym_space, sym_pad, report_bleu=False):
        """Construct an ErrorCalculator object."""
        super().__init__()
        self.char_list = char_list
        self.space = sym_space
        self.pad = sym_pad
        self.report_bleu = report_bleu
        if self.space in self.char_list:
            self.idx_space = self.char_list.index(self.space)
        else:
            self.idx_space = None

    def __call__(self, ys_hat, ys_pad):
        """Calculate corpus-level BLEU score.

        :param torch.Tensor ys_hat: prediction (batch, seqlen)
        :param torch.Tensor ys_pad: reference (batch, seqlen)
        :return: corpus-level BLEU score in a mini-batch
        :rtype float
        """
        bleu = None
        if not self.report_bleu:
            return bleu

        bleu = self.calculate_corpus_bleu(ys_hat, ys_pad)
        return bleu

    def calculate_corpus_bleu(self, ys_hat, ys_pad):
        """Calculate corpus-level BLEU score in a mini-batch.

        :param torch.Tensor seqs_hat: prediction (batch, seqlen)
        :param torch.Tensor seqs_true: reference (batch, seqlen)
        :return: corpus-level BLEU score
        :rtype float
        """
        seqs_hat, seqs_true = [], []
        for i, y_hat in enumerate(ys_hat):
            y_true = ys_pad[i]
            eos_true = np.where(y_true == -1)[0]
            ymax = eos_true[0] if len(eos_true) > 0 else len(y_true)
            # NOTE: padding index (-1) in y_true is used to pad y_hat
            # because y_hats is not padded with -1
            seq_hat = [self.char_list[int(idx)] for idx in y_hat[:ymax]]
            seq_true = [
                self.char_list[int(idx)] for idx in y_true if int(idx) != -1
            ]
            seq_hat_text = "".join(seq_hat).replace(self.space, " ")
            seq_hat_text = seq_hat_text.replace(self.pad, "")
            seq_true_text = "".join(seq_true).replace(self.space, " ")
            seqs_hat.append(seq_hat_text)
            seqs_true.append(seq_true_text)
        bleu = sacrebleu.corpus_bleu(seqs_hat, [[ref] for ref in seqs_true])
        return bleu.score * 100


================================================
FILE: paddlespeech/s2t/utils/check_kwargs.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import inspect


def check_kwargs(func, kwargs, name=None):
    """check kwargs are valid for func

    If kwargs are invalid, raise TypeError as same as python default
    :param function func: function to be validated
    :param dict kwargs: keyword arguments for func
    :param str name: name used in TypeError (default is func name)
    """
    try:
        params = inspect.signature(func).parameters
    except ValueError:
        return
    if name is None:
        name = func.__name__
    for k in kwargs.keys():
        if k not in params:
            raise TypeError(
                f"{name}() got an unexpected keyword argument '{k}'")


================================================
FILE: paddlespeech/s2t/utils/checkpoint.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import json
import os
import re
from pathlib import Path
from typing import Text
from typing import Union

import paddle
from paddle import distributed as dist
from paddle.optimizer import Optimizer

from paddlespeech.s2t.utils import mp_tools
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ["Checkpoint"]


class Checkpoint():
    def __init__(self, kbest_n: int=5, latest_n: int=1):
        self.best_records: Mapping[Path, float] = {}
        self.latest_records = []
        self.kbest_n = kbest_n
        self.latest_n = latest_n
        self._save_all = (kbest_n == -1)

    def save_parameters(self,
                        checkpoint_dir,
                        tag_or_iteration: Union[int, Text],
                        model: paddle.nn.Layer,
                        optimizer: Optimizer=None,
                        infos: dict=None,
                        metric_type="val_loss"):
        """Save checkpoint in best_n and latest_n.

        Args:
            checkpoint_dir (str): the directory where checkpoint is saved.
            tag_or_iteration (int or str): the latest iteration(step or epoch) number or tag.
            model (Layer):  model to be checkpointed.
            optimizer (Optimizer, optional): optimizer to be checkpointed.
            infos (dict or None)):  any info you want to save.
            metric_type (str, optional): metric type. Defaults to "val_loss".
        """
        if (metric_type not in infos.keys()):
            self._save_parameters(checkpoint_dir, tag_or_iteration, model,
                                  optimizer, infos)
            return

        #save best
        if self._should_save_best(infos[metric_type]):
            self._save_best_checkpoint_and_update(
                infos[metric_type], checkpoint_dir, tag_or_iteration, model,
                optimizer, infos)
        #save latest
        self._save_latest_checkpoint_and_update(
            checkpoint_dir, tag_or_iteration, model, optimizer, infos)

        if isinstance(tag_or_iteration, int):
            self._save_checkpoint_record(checkpoint_dir, tag_or_iteration)

    def load_parameters(self,
                        model,
                        optimizer=None,
                        checkpoint_dir=None,
                        checkpoint_path=None,
                        record_file="checkpoint_latest"):
        """Load a last model checkpoint from disk.
        Args:
            model (Layer): model to load parameters.
            optimizer (Optimizer, optional): optimizer to load states if needed.
                Defaults to None.
            checkpoint_dir (str, optional): the directory where checkpoint is saved.
            checkpoint_path (str, optional): if specified, load the checkpoint
                stored in the checkpoint_path(prefix) and the argument 'checkpoint_dir' will
                be ignored. Defaults to None.
            record_file "checkpoint_latest" or "checkpoint_best"
        Returns:
            configs (dict): epoch or step, lr and other meta info should be saved.
        """
        configs = {}

        if checkpoint_path:
            pass
        elif checkpoint_dir is not None and record_file is not None:
            # load checkpint from record file
            checkpoint_record = os.path.join(checkpoint_dir, record_file)
            iteration = self._load_checkpoint_idx(checkpoint_record)
            if iteration == -1:
                return configs
            checkpoint_path = os.path.join(checkpoint_dir,
                                           "{}".format(iteration))
        else:
            raise ValueError(
                "At least one of 'checkpoint_path' or 'checkpoint_dir' should be specified!"
            )

        rank = dist.get_rank()

        params_path = checkpoint_path + ".pdparams"
        model_dict = paddle.load(params_path)
        model.set_state_dict(model_dict)
        logger.info("Rank {}: Restore model from {}".format(rank, params_path))

        optimizer_path = checkpoint_path + ".pdopt"
        if optimizer and os.path.isfile(optimizer_path):
            optimizer_dict = paddle.load(optimizer_path)
            optimizer.set_state_dict(optimizer_dict)
            logger.info("Rank {}: Restore optimizer state from {}".format(
                rank, optimizer_path))

        info_path = re.sub('.pdparams$', '.json', params_path)
        if os.path.exists(info_path):
            with open(info_path, 'r') as fin:
                configs = json.load(fin)
        return configs

    def load_latest_parameters(self,
                               model,
                               optimizer=None,
                               checkpoint_dir=None,
                               checkpoint_path=None):
        """Load a last model checkpoint from disk.
        Args:
            model (Layer): model to load parameters.
            optimizer (Optimizer, optional): optimizer to load states if needed.
                Defaults to None.
            checkpoint_dir (str, optional): the directory where checkpoint is saved.
            checkpoint_path (str, optional): if specified, load the checkpoint
                stored in the checkpoint_path(prefix) and the argument 'checkpoint_dir' will
                be ignored. Defaults to None.
        Returns:
            configs (dict): epoch or step, lr and other meta info should be saved.
        """
        return self.load_parameters(model, optimizer, checkpoint_dir,
                                    checkpoint_path, "checkpoint_latest")

    def load_best_parameters(self,
                             model,
                             optimizer=None,
                             checkpoint_dir=None,
                             checkpoint_path=None):
        """Load a last model checkpoint from disk.
        Args:
            model (Layer): model to load parameters.
            optimizer (Optimizer, optional): optimizer to load states if needed.
                Defaults to None.
            checkpoint_dir (str, optional): the directory where checkpoint is saved.
            checkpoint_path (str, optional): if specified, load the checkpoint
                stored in the checkpoint_path(prefix) and the argument 'checkpoint_dir' will
                be ignored. Defaults to None.
        Returns:
            configs (dict): epoch or step, lr and other meta info should be saved.
        """
        return self.load_parameters(model, optimizer, checkpoint_dir,
                                    checkpoint_path, "checkpoint_best")

    def _should_save_best(self, metric: float) -> bool:
        if not self._best_full():
            return True

        # already full
        worst_record_path = max(self.best_records, key=self.best_records.get)
        # worst_record_path = max(self.best_records.iteritems(), key=operator.itemgetter(1))[0]
        worst_metric = self.best_records[worst_record_path]
        return metric < worst_metric

    def _best_full(self):
        return (not self._save_all) and len(self.best_records) == self.kbest_n

    def _latest_full(self):
        return len(self.latest_records) == self.latest_n

    def _save_best_checkpoint_and_update(self, metric, checkpoint_dir,
                                         tag_or_iteration, model, optimizer,
                                         infos):
        # remove the worst
        if self._best_full():
            worst_record_path = max(self.best_records,
                                    key=self.best_records.get)
            self.best_records.pop(worst_record_path)
            if (worst_record_path not in self.latest_records):
                logger.info(
                    "remove the worst checkpoint: {}".format(worst_record_path))
                self._del_checkpoint(checkpoint_dir, worst_record_path)

        # add the new one
        self._save_parameters(checkpoint_dir, tag_or_iteration, model,
                              optimizer, infos)
        self.best_records[tag_or_iteration] = metric

    def _save_latest_checkpoint_and_update(
            self, checkpoint_dir, tag_or_iteration, model, optimizer, infos):
        # remove the old
        if self._latest_full():
            to_del_fn = self.latest_records.pop(0)
            if (to_del_fn not in self.best_records.keys()):
                logger.info(
                    "remove the latest checkpoint: {}".format(to_del_fn))
                self._del_checkpoint(checkpoint_dir, to_del_fn)
        self.latest_records.append(tag_or_iteration)

        self._save_parameters(checkpoint_dir, tag_or_iteration, model,
                              optimizer, infos)

    def _del_checkpoint(self, checkpoint_dir, tag_or_iteration):
        checkpoint_path = os.path.join(checkpoint_dir,
                                       "{}".format(tag_or_iteration))
        for filename in glob.glob(checkpoint_path + ".*"):
            os.remove(filename)
            logger.info("delete file: {}".format(filename))

    def _load_checkpoint_idx(self, checkpoint_record: str) -> int:
        """Get the iteration number corresponding to the latest saved checkpoint.
        Args:
            checkpoint_path (str): the saved path of checkpoint.
        Returns:
            int: the latest iteration number. -1 for no checkpoint to load.
        """
        if not os.path.isfile(checkpoint_record):
            return -1

        # Fetch the latest checkpoint index.
        with open(checkpoint_record, "rt") as handle:
            latest_checkpoint = handle.readlines()[-1].strip()
            iteration = int(latest_checkpoint.split(":")[-1])
        return iteration

    def _save_checkpoint_record(self, checkpoint_dir: str, iteration: int):
        """Save the iteration number of the latest model to be checkpoint record.
        Args:
            checkpoint_dir (str): the directory where checkpoint is saved.
            iteration (int): the latest iteration number.
        Returns:
            None
        """
        checkpoint_record_latest = os.path.join(checkpoint_dir,
                                                "checkpoint_latest")
        checkpoint_record_best = os.path.join(checkpoint_dir, "checkpoint_best")

        with open(checkpoint_record_best, "w") as handle:
            for i in self.best_records.keys():
                handle.write("model_checkpoint_path:{}\n".format(i))
        with open(checkpoint_record_latest, "w") as handle:
            for i in self.latest_records:
                handle.write("model_checkpoint_path:{}\n".format(i))

    @mp_tools.rank_zero_only
    def _save_parameters(self,
                         checkpoint_dir: str,
                         tag_or_iteration: Union[int, str],
                         model: paddle.nn.Layer,
                         optimizer: Optimizer=None,
                         infos: dict=None):
        """Checkpoint the latest trained model parameters.
        Args:
            checkpoint_dir (str): the directory where checkpoint is saved.
            tag_or_iteration (int or str): the latest iteration(step or epoch) number.
            model (Layer): model to be checkpointed.
            optimizer (Optimizer, optional): optimizer to be checkpointed.
                Defaults to None.
            infos (dict or None): any info you want to save.
        Returns:
            None
        """
        checkpoint_path = os.path.join(checkpoint_dir,
                                       "{}".format(tag_or_iteration))

        model_dict = model.state_dict()
        params_path = checkpoint_path + ".pdparams"
        paddle.save(model_dict, params_path)
        logger.info("Saved model to {}".format(params_path))

        if optimizer:
            opt_dict = optimizer.state_dict()
            optimizer_path = checkpoint_path + ".pdopt"
            paddle.save(opt_dict, optimizer_path)
            logger.info("Saved optimzier state to {}".format(optimizer_path))

        info_path = re.sub('.pdparams$', '.json', params_path)
        infos = {} if infos is None else infos
        with open(info_path, 'w') as fout:
            data = json.dumps(infos)
            fout.write(data)


================================================
FILE: paddlespeech/s2t/utils/cli_readers.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import io
import logging
import sys

import h5py
import kaldiio
import soundfile

from paddlespeech.s2t.io.reader import SoundHDF5File


def file_reader_helper(
        rspecifier: str,
        filetype: str="mat",
        return_shape: bool=False,
        segments: str=None, ):
    """Read uttid and array in kaldi style

    This function might be a bit confusing as "ark" is used
    for HDF5 to imitate "kaldi-rspecifier".

    Args:
        rspecifier: Give as "ark:feats.ark" or "scp:feats.scp"
        filetype: "mat" is kaldi-martix, "hdf5": HDF5
        return_shape: Return the shape of the matrix,
            instead of the matrix. This can reduce IO cost for HDF5.
        segments (str): The file format is
            "<segment-id> <recording-id> <start-time> <end-time>\n"
            "e.g. call-861225-A-0050-0065 call-861225-A 5.0 6.5\n"
    Returns:
        Generator[Tuple[str, np.ndarray], None, None]:

    Examples:
        Read from kaldi-matrix ark file:

        >>> for u, array in file_reader_helper('ark:feats.ark', 'mat'):
        ...     array

        Read from HDF5 file:

        >>> for u, array in file_reader_helper('ark:feats.h5', 'hdf5'):
        ...     array

    """
    if filetype == "mat":
        return KaldiReader(
            rspecifier, return_shape=return_shape, segments=segments)
    elif filetype == "hdf5":
        return HDF5Reader(rspecifier, return_shape=return_shape)
    elif filetype == "sound.hdf5":
        return SoundHDF5Reader(rspecifier, return_shape=return_shape)
    elif filetype == "sound":
        return SoundReader(rspecifier, return_shape=return_shape)
    else:
        raise NotImplementedError(f"filetype={filetype}")


class KaldiReader:
    def __init__(self, rspecifier, return_shape=False, segments=None):
        self.rspecifier = rspecifier
        self.return_shape = return_shape
        self.segments = segments

    def __iter__(self):
        with kaldiio.ReadHelper(
                self.rspecifier, segments=self.segments) as reader:
            for key, array in reader:
                if self.return_shape:
                    array = array.shape
                yield key, array


class HDF5Reader:
    def __init__(self, rspecifier, return_shape=False):
        if ":" not in rspecifier:
            raise ValueError('Give "rspecifier" such as "ark:some.ark: {}"'.
                             format(self.rspecifier))
        self.rspecifier = rspecifier
        self.ark_or_scp, self.filepath = self.rspecifier.split(":", 1)
        if self.ark_or_scp not in ["ark", "scp"]:
            raise ValueError(f"Must be scp or ark: {self.ark_or_scp}")

        self.return_shape = return_shape

    def __iter__(self):
        if self.ark_or_scp == "scp":
            hdf5_dict = {}
            with open(self.filepath, "r", encoding="utf-8") as f:
                for line in f:
                    key, value = line.rstrip().split(None, 1)

                    if ":" not in value:
                        raise RuntimeError(
                            "scp file for hdf5 should be like: "
                            '"uttid filepath.h5:key": {}({})'.format(
                                line, self.filepath))
                    path, h5_key = value.split(":", 1)

                    hdf5_file = hdf5_dict.get(path)
                    if hdf5_file is None:
                        try:
                            hdf5_file = h5py.File(path, "r")
                        except Exception:
                            logging.error("Error when loading {}".format(path))
                            raise
                        hdf5_dict[path] = hdf5_file

                    try:
                        data = hdf5_file[h5_key]
                    except Exception:
                        logging.error("Error when loading {} with key={}".
                                      format(path, h5_key))
                        raise

                    if self.return_shape:
                        yield key, data.shape
                    else:
                        yield key, data[()]

            # Closing all files
            for k in hdf5_dict:
                try:
                    hdf5_dict[k].close()
                except Exception:
                    pass

        else:
            if self.filepath == "-":
                # Required h5py>=2.9
                filepath = io.BytesIO(sys.stdin.buffer.read())
            else:
                filepath = self.filepath
            with h5py.File(filepath, "r") as f:
                for key in f:
                    if self.return_shape:
                        yield key, f[key].shape
                    else:
                        yield key, f[key][()]


class SoundHDF5Reader:
    def __init__(self, rspecifier, return_shape=False):
        if ":" not in rspecifier:
            raise ValueError('Give "rspecifier" such as "ark:some.ark: {}"'.
                             format(rspecifier))
        self.ark_or_scp, self.filepath = rspecifier.split(":", 1)
        if self.ark_or_scp not in ["ark", "scp"]:
            raise ValueError(f"Must be scp or ark: {self.ark_or_scp}")
        self.return_shape = return_shape

    def __iter__(self):
        if self.ark_or_scp == "scp":
            hdf5_dict = {}
            with open(self.filepath, "r", encoding="utf-8") as f:
                for line in f:
                    key, value = line.rstrip().split(None, 1)

                    if ":" not in value:
                        raise RuntimeError(
                            "scp file for hdf5 should be like: "
                            '"uttid filepath.h5:key": {}({})'.format(
                                line, self.filepath))
                    path, h5_key = value.split(":", 1)

                    hdf5_file = hdf5_dict.get(path)
                    if hdf5_file is None:
                        try:
                            hdf5_file = SoundHDF5File(path, "r")
                        except Exception:
                            logging.error("Error when loading {}".format(path))
                            raise
                        hdf5_dict[path] = hdf5_file

                    try:
                        data = hdf5_file[h5_key]
                    except Exception:
                        logging.error("Error when loading {} with key={}".
                                      format(path, h5_key))
                        raise

                    # Change Tuple[ndarray, int] -> Tuple[int, ndarray]
                    # (soundfile style -> scipy style)
                    array, rate = data
                    if self.return_shape:
                        array = array.shape
                    yield key, (rate, array)

            # Closing all files
            for k in hdf5_dict:
                try:
                    hdf5_dict[k].close()
                except Exception:
                    pass

        else:
            if self.filepath == "-":
                # Required h5py>=2.9
                filepath = io.BytesIO(sys.stdin.buffer.read())
            else:
                filepath = self.filepath
            for key, (a, r) in SoundHDF5File(filepath, "r").items():
                if self.return_shape:
                    a = a.shape
                yield key, (r, a)


class SoundReader:
    def __init__(self, rspecifier, return_shape=False):
        if ":" not in rspecifier:
            raise ValueError('Give "rspecifier" such as "scp:some.scp: {}"'.
                             format(rspecifier))
        self.ark_or_scp, self.filepath = rspecifier.split(":", 1)
        if self.ark_or_scp != "scp":
            raise ValueError('Only supporting "scp" for sound file: {}'.format(
                self.ark_or_scp))
        self.return_shape = return_shape

    def __iter__(self):
        with open(self.filepath, "r", encoding="utf-8") as f:
            for line in f:
                key, sound_file_path = line.rstrip().split(None, 1)
                # Assume PCM16
                array, rate = soundfile.read(sound_file_path, dtype="int16")
                # Change Tuple[ndarray, int] -> Tuple[int, ndarray]
                # (soundfile style -> scipy style)
                if self.return_shape:
                    array = array.shape
                yield key, (rate, array)


================================================
FILE: paddlespeech/s2t/utils/cli_utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import sys
from collections.abc import Sequence

import numpy

from paddlespeech.utils.argparse import strtobool as dist_strtobool


def strtobool(x):
    # paddlespeech.utils.argparse.strtobool returns integer, but it's confusing,
    return bool(dist_strtobool(x))


def get_commandline_args():
    extra_chars = [
        " ",
        ";",
        "&",
        "(",
        ")",
        "|",
        "^",
        "<",
        ">",
        "?",
        "*",
        "[",
        "]",
        "$",
        "`",
        '"',
        "\\",
        "!",
        "{",
        "}",
    ]

    # Escape the extra characters for shell
    argv = [
        arg.replace("'", "'\\''") if all(char not in arg
                                         for char in extra_chars) else
        "'" + arg.replace("'", "'\\''") + "'" for arg in sys.argv
    ]

    return sys.executable + " " + " ".join(argv)


def is_scipy_wav_style(value):
    # If Tuple[int, numpy.ndarray] or not
    return (isinstance(value, Sequence) and len(value) == 2 and
            isinstance(value[0], int) and isinstance(value[1], numpy.ndarray))


def assert_scipy_wav_style(value):
    assert is_scipy_wav_style(
        value), "Must be Tuple[int, numpy.ndarray], but got {}".format(
            type(value) if not isinstance(value, Sequence) else "{}[{}]".format(
                type(value), ", ".join(str(type(v)) for v in value)))


================================================
FILE: paddlespeech/s2t/utils/cli_writers.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from pathlib import Path
from typing import Dict

import h5py
import kaldiio
import numpy
import soundfile

from paddlespeech.s2t.io.reader import SoundHDF5File
from paddlespeech.s2t.utils.cli_utils import assert_scipy_wav_style


def file_writer_helper(
        wspecifier: str,
        filetype: str="mat",
        write_num_frames: str=None,
        compress: bool=False,
        compression_method: int=2,
        pcm_format: str="wav", ):
    """Write matrices in kaldi style

    Args:
        wspecifier: e.g. ark,scp:out.ark,out.scp
        filetype: "mat" is kaldi-martix, "hdf5": HDF5
        write_num_frames: e.g. 'ark,t:num_frames.txt'
        compress: Compress or not
        compression_method: Specify compression level

    Write in kaldi-matrix-ark with "kaldi-scp" file:

    >>> with file_writer_helper('ark,scp:out.ark,out.scp') as f:
    >>>     f['uttid'] = array

    This "scp" has the following format:

        uttidA out.ark:1234
        uttidB out.ark:2222

    where, 1234 and 2222 points the strating byte address of the matrix.
    (For detail, see official documentation of Kaldi)

    Write in HDF5 with "scp" file:

    >>> with file_writer_helper('ark,scp:out.h5,out.scp', 'hdf5') as f:
    >>>     f['uttid'] = array

    This "scp" file is created as:

        uttidA out.h5:uttidA
        uttidB out.h5:uttidB

    HDF5 can be, unlike "kaldi-ark", accessed to any keys,
    so originally "scp" is not required for random-reading.
    Nevertheless we create "scp" for HDF5 because it is useful
    for some use-case. e.g. Concatenation, Splitting.

    """
    if filetype == "mat":
        return KaldiWriter(
            wspecifier,
            write_num_frames=write_num_frames,
            compress=compress,
            compression_method=compression_method, )
    elif filetype == "hdf5":
        return HDF5Writer(
            wspecifier, write_num_frames=write_num_frames, compress=compress)
    elif filetype == "sound.hdf5":
        return SoundHDF5Writer(
            wspecifier,
            write_num_frames=write_num_frames,
            pcm_format=pcm_format)
    elif filetype == "sound":
        return SoundWriter(
            wspecifier,
            write_num_frames=write_num_frames,
            pcm_format=pcm_format)
    else:
        raise NotImplementedError(f"filetype={filetype}")


class BaseWriter:
    def __setitem__(self, key, value):
        raise NotImplementedError

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def close(self):
        try:
            self.writer.close()
        except Exception:
            pass

        if self.writer_scp is not None:
            try:
                self.writer_scp.close()
            except Exception:
                pass

        if self.writer_nframe is not None:
            try:
                self.writer_nframe.close()
            except Exception:
                pass


def get_num_frames_writer(write_num_frames: str):
    """get_num_frames_writer

    Examples:
        >>> get_num_frames_writer('ark,t:num_frames.txt')
    """
    if write_num_frames is not None:
        if ":" not in write_num_frames:
            raise ValueError('Must include ":", write_num_frames={}'.format(
                write_num_frames))

        nframes_type, nframes_file = write_num_frames.split(":", 1)
        if nframes_type != "ark,t":
            raise ValueError("Only supporting text mode. "
                             "e.g. --write-num-frames=ark,t:foo.txt :"
                             "{}".format(nframes_type))

    return open(nframes_file, "w", encoding="utf-8")


class KaldiWriter(BaseWriter):
    def __init__(self,
                 wspecifier,
                 write_num_frames=None,
                 compress=False,
                 compression_method=2):
        if compress:
            self.writer = kaldiio.WriteHelper(
                wspecifier, compression_method=compression_method)
        else:
            self.writer = kaldiio.WriteHelper(wspecifier)
        self.writer_scp = None
        if write_num_frames is not None:
            self.writer_nframe = get_num_frames_writer(write_num_frames)
        else:
            self.writer_nframe = None

    def __setitem__(self, key, value):
        self.writer[key] = value
        if self.writer_nframe is not None:
            self.writer_nframe.write(f"{key} {len(value)}\n")


def parse_wspecifier(wspecifier: str) -> Dict[str, str]:
    """Parse wspecifier to dict

    Examples:
        >>> parse_wspecifier('ark,scp:out.ark,out.scp')
        {'ark': 'out.ark', 'scp': 'out.scp'}

    """
    ark_scp, filepath = wspecifier.split(":", 1)
    if ark_scp not in ["ark", "scp,ark", "ark,scp"]:
        raise ValueError("{} is not allowed: {}".format(ark_scp, wspecifier))
    ark_scps = ark_scp.split(",")
    filepaths = filepath.split(",")
    if len(ark_scps) != len(filepaths):
        raise ValueError("Mismatch: {} and {}".format(ark_scp, filepath))
    spec_dict = dict(zip(ark_scps, filepaths))
    return spec_dict


class HDF5Writer(BaseWriter):
    """HDF5Writer

    Examples:
        >>> with HDF5Writer('ark:out.h5', compress=True) as f:
        ...     f['key'] = array
    """

    def __init__(self, wspecifier, write_num_frames=None, compress=False):
        spec_dict = parse_wspecifier(wspecifier)
        self.filename = spec_dict["ark"]

        if compress:
            self.kwargs = {"compression": "gzip"}
        else:
            self.kwargs = {}
        self.writer = h5py.File(spec_dict["ark"], "w")
        if "scp" in spec_dict:
            self.writer_scp = open(spec_dict["scp"], "w", encoding="utf-8")
        else:
            self.writer_scp = None
        if write_num_frames is not None:
            self.writer_nframe = get_num_frames_writer(write_num_frames)
        else:
            self.writer_nframe = None

    def __setitem__(self, key, value):
        self.writer.create_dataset(key, data=value, **self.kwargs)

        if self.writer_scp is not None:
            self.writer_scp.write(f"{key} {self.filename}:{key}\n")
        if self.writer_nframe is not None:
            self.writer_nframe.write(f"{key} {len(value)}\n")


class SoundHDF5Writer(BaseWriter):
    """SoundHDF5Writer

    Examples:
        >>> fs = 16000
        >>> with SoundHDF5Writer('ark:out.h5') as f:
        ...     f['key'] = fs, array
    """

    def __init__(self, wspecifier, write_num_frames=None, pcm_format="wav"):
        self.pcm_format = pcm_format
        spec_dict = parse_wspecifier(wspecifier)
        self.filename = spec_dict["ark"]
        self.writer = SoundHDF5File(
            spec_dict["ark"], "w", format=self.pcm_format)
        if "scp" in spec_dict:
            self.writer_scp = open(spec_dict["scp"], "w", encoding="utf-8")
        else:
            self.writer_scp = None
        if write_num_frames is not None:
            self.writer_nframe = get_num_frames_writer(write_num_frames)
        else:
            self.writer_nframe = None

    def __setitem__(self, key, value):
        assert_scipy_wav_style(value)
        # Change Tuple[int, ndarray] -> Tuple[ndarray, int]
        # (scipy style -> soundfile style)
        value = (value[1], value[0])
        self.writer.create_dataset(key, data=value)

        if self.writer_scp is not None:
            self.writer_scp.write(f"{key} {self.filename}:{key}\n")
        if self.writer_nframe is not None:
            self.writer_nframe.write(f"{key} {len(value[0])}\n")


class SoundWriter(BaseWriter):
    """SoundWriter

    Examples:
        >>> fs = 16000
        >>> with SoundWriter('ark,scp:outdir,out.scp') as f:
        ...     f['key'] = fs, array
    """

    def __init__(self, wspecifier, write_num_frames=None, pcm_format="wav"):
        self.pcm_format = pcm_format
        spec_dict = parse_wspecifier(wspecifier)
        # e.g. ark,scp:dirname,wav.scp
        # -> The wave files are found in dirname/*.wav
        self.dirname = spec_dict["ark"]
        Path(self.dirname).mkdir(parents=True, exist_ok=True)
        self.writer = None

        if "scp" in spec_dict:
            self.writer_scp = open(spec_dict["scp"], "w", encoding="utf-8")
        else:
            self.writer_scp = None
        if write_num_frames is not None:
            self.writer_nframe = get_num_frames_writer(write_num_frames)
        else:
            self.writer_nframe = None

    def __setitem__(self, key, value):
        assert_scipy_wav_style(value)
        rate, signal = value
        wavfile = Path(self.dirname) / (key + "." + self.pcm_format)
        soundfile.write(wavfile, signal.astype(numpy.int16), rate)

        if self.writer_scp is not None:
            self.writer_scp.write(f"{key} {wavfile}\n")
        if self.writer_nframe is not None:
            self.writer_nframe.write(f"{key} {len(signal)}\n")


================================================
FILE: paddlespeech/s2t/utils/ctc_utils.py
================================================
# Copyright 2021 Mobvoi Inc. All Rights Reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
from pathlib import Path
from typing import List

import numpy as np
import paddle

from paddlespeech.s2t.utils import text_grid
from paddlespeech.s2t.utils import utility
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ["forced_align", "remove_duplicates_and_blank", "insert_blank"]


def remove_duplicates_and_blank(hyp: List[int], blank_id=0) -> List[int]:
    """ctc alignment to ctc label ids.

    "abaa-acee-" -> "abaace"

    Args:
        hyp (List[int]): hypotheses ids, (L)
        blank_id (int, optional): blank id. Defaults to 0.

    Returns:
        List[int]: remove dupicate ids, then remove blank id.
    """
    new_hyp: List[int] = []
    cur = 0
    while cur < len(hyp):
        # add non-blank into new_hyp
        if hyp[cur] != blank_id:
            new_hyp.append(hyp[cur])
        # skip repeat label
        prev = cur
        while cur < len(hyp) and hyp[cur] == hyp[prev]:
            cur += 1
    return new_hyp


def insert_blank(label: np.ndarray, blank_id: int=0) -> np.ndarray:
    """Insert blank token between every two label token.

    "abcdefg" -> "-a-b-c-d-e-f-g-"

    Args:
        label ([np.ndarray]): label ids, List[int], (L).
        blank_id (int, optional): blank id. Defaults to 0.

    Returns:
        [np.ndarray]: (2L+1).
    """
    label = np.expand_dims(label, 1)  #[L, 1]
    blanks = np.zeros((label.shape[0], 1), dtype=np.int64) + blank_id
    label = np.concatenate([blanks, label], axis=1)  #[L, 2]
    label = label.reshape(-1)  #[2L], -l-l-l
    label = np.append(label, label[0])  #[2L + 1], -l-l-l-
    return label


def forced_align(ctc_probs: paddle.Tensor, y: paddle.Tensor,
                 blank_id=0) -> List[int]:
    """ctc forced alignment.

    https://distill.pub/2017/ctc/

    Args:
        ctc_probs (paddle.Tensor): hidden state sequence, 2d tensor (T, D)
        y (paddle.Tensor): label id sequence tensor, 1d tensor (L)
        blank_id (int): blank symbol index
    Returns:
        List[int]: best alignment result, (T).
    """
    y_insert_blank = insert_blank(y, blank_id)  #(2L+1)

    log_alpha = paddle.zeros(
        (ctc_probs.shape[0], len(y_insert_blank)))  #(T, 2L+1)
    log_alpha = log_alpha - float('inf')  # log of zero

    # TODO(Hui Zhang): zeros not support paddle.int16
    # self.__setitem_varbase__(item, value) When assign a value to a paddle.Tensor, the data type of the paddle.Tensor not support int16
    state_path = (paddle.zeros(
        (ctc_probs.shape[0], len(y_insert_blank)), dtype=paddle.int32) - 1
                  )  # state path, Tuple((T, 2L+1))

    # init start state
    # TODO(Hui Zhang): VarBase.__getitem__() not support np.int64
    log_alpha[0, 0] = ctc_probs[0][int(y_insert_blank[0])]  # State-b, Sb
    log_alpha[0, 1] = ctc_probs[0][int(y_insert_blank[1])]  # State-nb, Snb

    for t in range(1, ctc_probs.shape[0]):  # T
        for s in range(len(y_insert_blank)):  # 2L+1
            if y_insert_blank[s] == blank_id or s < 2 or y_insert_blank[
                    s] == y_insert_blank[s - 2]:
                candidates = paddle.to_tensor(
                    [log_alpha[t - 1, s], log_alpha[t - 1, s - 1]])
                prev_state = [s, s - 1]
            else:
                candidates = paddle.to_tensor([
                    log_alpha[t - 1, s],
                    log_alpha[t - 1, s - 1],
                    log_alpha[t - 1, s - 2],
                ])
                prev_state = [s, s - 1, s - 2]
            # TODO(Hui Zhang): VarBase.__getitem__() not support np.int64
            log_alpha[t, s] = paddle.max(candidates) + ctc_probs[t][int(
                y_insert_blank[s])]
            state_path[t, s] = prev_state[paddle.argmax(candidates)]
    # TODO(Hui Zhang): zeros not support paddle.int16
    # self.__setitem_varbase__(item, value) When assign a value to a paddle.Tensor, the data type of the paddle.Tensor not support int16
    state_seq = -1 * paddle.ones((ctc_probs.shape[0], 1), dtype=paddle.int32)

    candidates = paddle.to_tensor([
        log_alpha[-1, len(y_insert_blank) - 1],  # Sb
        log_alpha[-1, len(y_insert_blank) - 2]  # Snb
    ])
    prev_state = [len(y_insert_blank) - 1, len(y_insert_blank) - 2]
    state_seq[-1] = prev_state[paddle.argmax(candidates)]
    for t in range(ctc_probs.shape[0] - 2, -1, -1):
        state_seq[t] = state_path[t + 1, state_seq[t + 1, 0]]

    output_alignment = []
    for t in range(0, ctc_probs.shape[0]):
        output_alignment.append(y_insert_blank[state_seq[t, 0]])

    return output_alignment


def ctc_align(config, model, dataloader, batch_size, stride_ms, token_dict,
              result_file):
    """ctc alignment.

    Args:
        config (cfgNode): config 
        model (nn.Layer): U2 Model.
        dataloader (io.DataLoader): dataloader.
        batch_size (int): decoding batchsize.
        stride_ms (int): audio feature stride in ms unit.
        token_dict (List[str]): vocab list, e.g. ['blank', 'unk', 'a', 'b', '<eos>'].
        result_file (str): alignment output file, e.g. /path/to/xxx.align.
    """
    if batch_size > 1:
        logger.fatal('alignment mode must be running with batch_size == 1')
        sys.exit(1)
    assert result_file and result_file.endswith('.align')

    model.eval()
    # conv subsampling rate
    subsample = utility.get_subsample(config)
    logger.info(f"Align Total Examples: {len(dataloader.dataset)}")

    with open(result_file, 'w') as fout:
        # one example in batch
        for i, batch in enumerate(dataloader):
            key, feat, feats_length, target, target_length = batch

            # 1. Encoder
            encoder_out, encoder_mask = model._forward_encoder(
                feat, feats_length)  # (B, maxlen, encoder_dim)
            maxlen = encoder_out.shape[1]
            ctc_probs = model.ctc.log_softmax(
                encoder_out)  # (1, maxlen, vocab_size)

            # 2. alignment
            ctc_probs = ctc_probs.squeeze(0)
            target = target.squeeze(0)
            alignment = forced_align(ctc_probs, target)

            logger.info(f"align ids: {key[0]} {alignment}")
            fout.write('{} {}\n'.format(key[0], alignment))

            # 3. gen praat
            # segment alignment
            align_segs = text_grid.segment_alignment(alignment)
            logger.info(f"align tokens: {key[0]}, {align_segs}")

            # IntervalTier, List["start end token\n"]
            tierformat = text_grid.align_to_tierformat(align_segs, subsample,
                                                       token_dict)

            # write tier
            align_output_path = Path(result_file).parent / "align"
            align_output_path.mkdir(parents=True, exist_ok=True)
            tier_path = align_output_path / (key[0] + ".tier")
            with tier_path.open('w') as f:
                f.writelines(tierformat)

            # write textgrid
            textgrid_path = align_output_path / (key[0] + ".TextGrid")
            second_per_frame = 1. / (1000. /
                                     stride_ms)  # 25ms window, 10ms stride
            second_per_example = (
                len(alignment) + 1) * subsample * second_per_frame
            text_grid.generate_textgrid(
                maxtime=second_per_example,
                intervals=tierformat,
                output=str(textgrid_path))


================================================
FILE: paddlespeech/s2t/utils/dynamic_import.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import importlib
import inspect
from typing import Any
from typing import Dict
from typing import List
from typing import Text

from paddlespeech.s2t.utils.log import Log
from paddlespeech.s2t.utils.tensor_utils import has_tensor

logger = Log(__name__).getlog()

__all__ = ["dynamic_import", "instance_class"]


def dynamic_import(import_path, alias=dict()):
    """dynamic import module and class

    :param str import_path: syntax 'module_name:class_name'
        e.g., 'paddlespeech.s2t.models.u2:U2Model'
    :param dict alias: shortcut for registered class
    :return: imported class
    """
    if import_path not in alias and ":" not in import_path:
        raise ValueError(
            "import_path should be one of {} or "
            'include ":", e.g. "paddlespeech.s2t.models.u2:U2Model" : '
            "{}".format(set(alias), import_path))
    if ":" not in import_path:
        import_path = alias[import_path]

    module_name, objname = import_path.split(":")
    m = importlib.import_module(module_name)
    return getattr(m, objname)


def filter_valid_args(args: Dict[Text, Any], valid_keys: List[Text]):
    # filter by `valid_keys` and filter `val` is not None
    new_args = {
        key: val
        for key, val in args.items() if (key in valid_keys and val is not None)
    }
    return new_args


def filter_out_tensor(args: Dict[Text, Any]):
    return {key: val for key, val in args.items() if not has_tensor(val)}


def instance_class(module_class, args: Dict[Text, Any]):
    valid_keys = inspect.signature(module_class).parameters.keys()
    new_args = filter_valid_args(args, valid_keys)
    logger.info(
        f"Instance: {module_class.__name__} {filter_out_tensor(new_args)}.")
    return module_class(**new_args)


================================================
FILE: paddlespeech/s2t/utils/dynamic_pip_install.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pip


def install(package_name):
    if int(pip.__version__.split('.')[0]) > 9:
        from pip._internal import main
    else:
        from pip import main
    main(['install', package_name])


================================================
FILE: paddlespeech/s2t/utils/error_rate.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This module provides functions to calculate error rate in different level.
e.g. wer for word-level, cer for char-level.
"""
from itertools import groupby

import editdistance
import numpy as np

__all__ = ['word_errors', 'char_errors', 'wer', 'cer', "ErrorCalculator"]


def _levenshtein_distance(ref, hyp):
    """Levenshtein distance is a string metric for measuring the difference
    between two sequences. Informally, the levenshtein disctance is defined as
    the minimum number of single-character edits (substitutions, insertions or
    deletions) required to change one word into the other. We can naturally
    extend the edits to word level when calculate levenshtein disctance for
    two sentences.
    """
    m = len(ref)
    n = len(hyp)

    # special case
    if ref == hyp:
        return 0
    if m == 0:
        return n
    if n == 0:
        return m

    if m < n:
        ref, hyp = hyp, ref
        m, n = n, m

    # use O(min(m, n)) space
    distance = np.zeros((2, n + 1), dtype=np.int32)

    # initialize distance matrix
    for j in range(n + 1):
        distance[0][j] = j

    # calculate levenshtein distance
    for i in range(1, m + 1):
        prev_row_idx = (i - 1) % 2
        cur_row_idx = i % 2
        distance[cur_row_idx][0] = i
        for j in range(1, n + 1):
            if ref[i - 1] == hyp[j - 1]:
                distance[cur_row_idx][j] = distance[prev_row_idx][j - 1]
            else:
                s_num = distance[prev_row_idx][j - 1] + 1
                i_num = distance[cur_row_idx][j - 1] + 1
                d_num = distance[prev_row_idx][j] + 1
                distance[cur_row_idx][j] = min(s_num, i_num, d_num)

    return distance[m % 2][n]


def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '):
    """Compute the levenshtein distance between reference sequence and
    hypothesis sequence in word-level.

    :param reference: The reference sentence.
    :type reference: str
    :param hypothesis: The hypothesis sentence.
    :type hypothesis: str
    :param ignore_case: Whether case-sensitive or not.
    :type ignore_case: bool
    :param delimiter: Delimiter of input sentences.
    :type delimiter: char
    :return: Levenshtein distance and word number of reference sentence.
    :rtype: list
    """
    if ignore_case:
        reference = reference.lower()
        hypothesis = hypothesis.lower()

    ref_words = list(filter(None, reference.split(delimiter)))
    hyp_words = list(filter(None, hypothesis.split(delimiter)))

    edit_distance = _levenshtein_distance(ref_words, hyp_words)
    # `editdistance.eavl precision` less than `_levenshtein_distance`
    # edit_distance = editdistance.eval(ref_words, hyp_words)
    return float(edit_distance), len(ref_words)


def char_errors(reference, hypothesis, ignore_case=False, remove_space=False):
    """Compute the levenshtein distance between reference sequence and
    hypothesis sequence in char-level.

    :param reference: The reference sentence.
    :type reference: str
    :param hypothesis: The hypothesis sentence.
    :type hypothesis: str
    :param ignore_case: Whether case-sensitive or not.
    :type ignore_case: bool
    :param remove_space: Whether remove internal space characters
    :type remove_space: bool
    :return: Levenshtein distance and length of reference sentence.
    :rtype: list
    """
    if ignore_case:
        reference = reference.lower()
        hypothesis = hypothesis.lower()

    join_char = ' '
    if remove_space:
        join_char = ''

    reference = join_char.join(list(filter(None, reference.split(' '))))
    hypothesis = join_char.join(list(filter(None, hypothesis.split(' '))))

    edit_distance = _levenshtein_distance(reference, hypothesis)
    # `editdistance.eavl precision` less than `_levenshtein_distance`
    # edit_distance = editdistance.eval(reference, hypothesis)
    return float(edit_distance), len(reference)


def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
    """Calculate word error rate (WER). WER compares reference text and
    hypothesis text in word-level. WER is defined as:

    .. math::
        WER = (Sw + Dw + Iw) / Nw

    where

    .. code-block:: text

        Sw is the number of words subsituted,
        Dw is the number of words deleted,
        Iw is the number of words inserted,
        Nw is the number of words in the reference

    We can use levenshtein distance to calculate WER. Please draw an attention
    that empty items will be removed when splitting sentences by delimiter.

    :param reference: The reference sentence.
    :type reference: str
    :param hypothesis: The hypothesis sentence.
    :type hypothesis: str
    :param ignore_case: Whether case-sensitive or not.
    :type ignore_case: bool
    :param delimiter: Delimiter of input sentences.
    :type delimiter: char
    :return: Word error rate.
    :rtype: float
    :raises ValueError: If word number of reference is zero.
    """
    edit_distance, ref_len = word_errors(reference, hypothesis, ignore_case,
                                         delimiter)

    if ref_len == 0:
        raise ValueError("Reference's word number should be greater than 0.")

    wer = float(edit_distance) / ref_len
    return wer


def cer(reference, hypothesis, ignore_case=False, remove_space=False):
    """Calculate character error rate (CER). CER compares reference text and
    hypothesis text in char-level. CER is defined as:

    .. math::
        CER = (Sc + Dc + Ic) / Nc

    where

    .. code-block:: text

        Sc is the number of characters substituted,
        Dc is the number of characters deleted,
        Ic is the number of characters inserted
        Nc is the number of characters in the reference

    We can use levenshtein distance to calculate CER. Chinese input should be
    encoded to unicode. Please draw an attention that the leading and tailing
    space characters will be truncated and multiple consecutive space
    characters in a sentence will be replaced by one space character.

    :param reference: The reference sentence.
    :type reference: str
    :param hypothesis: The hypothesis sentence.
    :type hypothesis: str
    :param ignore_case: Whether case-sensitive or not.
    :type ignore_case: bool
    :param remove_space: Whether remove internal space characters
    :type remove_space: bool
    :return: Character error rate.
    :rtype: float
    :raises ValueError: If the reference length is zero.
    """
    edit_distance, ref_len = char_errors(reference, hypothesis, ignore_case,
                                         remove_space)

    if ref_len == 0:
        raise ValueError("Length of reference should be greater than 0.")

    cer = float(edit_distance) / ref_len
    return cer


class ErrorCalculator():
    """Calculate CER and WER for E2E_ASR and CTC models during training.

    :param y_hats: numpy array with predicted text
    :param y_pads: numpy array with true (target) text
    :param char_list: List[str]
    :param sym_space: <space>
    :param sym_blank: <blank>
    :return:
    """

    def __init__(self,
                 char_list,
                 sym_space,
                 sym_blank,
                 report_cer=False,
                 report_wer=False):
        """Construct an ErrorCalculator object."""
        super().__init__()

        self.report_cer = report_cer
        self.report_wer = report_wer

        self.char_list = char_list
        self.space = sym_space
        self.blank = sym_blank
        self.idx_blank = self.char_list.index(self.blank)
        if self.space in self.char_list:
            self.idx_space = self.char_list.index(self.space)
        else:
            self.idx_space = None

    def __call__(self, ys_hat, ys_pad, is_ctc=False):
        """Calculate sentence-level WER/CER score.

        :param paddle.Tensor ys_hat: prediction (batch, seqlen)
        :param paddle.Tensor ys_pad: reference (batch, seqlen)
        :param bool is_ctc: calculate CER score for CTC
        :return: sentence-level WER score
        :rtype float
        :return: sentence-level CER score
        :rtype float
        """
        cer, wer = None, None
        if is_ctc:
            return self.calculate_cer_ctc(ys_hat, ys_pad)
        elif not self.report_cer and not self.report_wer:
            return cer, wer

        seqs_hat, seqs_true = self.convert_to_char(ys_hat, ys_pad)
        if self.report_cer:
            cer = self.calculate_cer(seqs_hat, seqs_true)

        if self.report_wer:
            wer = self.calculate_wer(seqs_hat, seqs_true)
        return cer, wer

    def calculate_cer_ctc(self, ys_hat, ys_pad):
        """Calculate sentence-level CER score for CTC.

        :param paddle.Tensor ys_hat: prediction (batch, seqlen)
        :param paddle.Tensor ys_pad: reference (batch, seqlen)
        :return: average sentence-level CER score
        :rtype float
        """
        cers, char_ref_lens = [], []
        for i, y in enumerate(ys_hat):
            y_hat = [x[0] for x in groupby(y)]
            y_true = ys_pad[i]
            seq_hat, seq_true = [], []
            for idx in y_hat:
                idx = int(idx)
                if idx != -1 and idx != self.idx_blank and idx != self.idx_space:
                    seq_hat.append(self.char_list[int(idx)])

            for idx in y_true:
                idx = int(idx)
                if idx != -1 and idx != self.idx_blank and idx != self.idx_space:
                    seq_true.append(self.char_list[int(idx)])

            hyp_chars = "".join(seq_hat)
            ref_chars = "".join(seq_true)
            if len(ref_chars) > 0:
                cers.append(editdistance.eval(hyp_chars, ref_chars))
                char_ref_lens.append(len(ref_chars))

        cer_ctc = float(sum(cers)) / sum(char_ref_lens) if cers else None
        return cer_ctc

    def convert_to_char(self, ys_hat, ys_pad):
        """Convert index to character.

        :param paddle.Tensor seqs_hat: prediction (batch, seqlen)
        :param paddle.Tensor seqs_true: reference (batch, seqlen)
        :return: token list of prediction
        :rtype list
        :return: token list of reference
        :rtype list
        """
        seqs_hat, seqs_true = [], []
        for i, y_hat in enumerate(ys_hat):
            y_true = ys_pad[i]
            eos_true = np.where(y_true == -1)[0]
            ymax = eos_true[0] if len(eos_true) > 0 else len(y_true)
            # NOTE: padding index (-1) in y_true is used to pad y_hat
            seq_hat = [self.char_list[int(idx)] for idx in y_hat[:ymax]]
            seq_true = [
                self.char_list[int(idx)] for idx in y_true if int(idx) != -1
            ]
            seq_hat_text = "".join(seq_hat).replace(self.space, " ")
            seq_hat_text = seq_hat_text.replace(self.blank, "")
            seq_true_text = "".join(seq_true).replace(self.space, " ")
            seqs_hat.append(seq_hat_text)
            seqs_true.append(seq_true_text)
        return seqs_hat, seqs_true

    def calculate_cer(self, seqs_hat, seqs_true):
        """Calculate sentence-level CER score.

        :param list seqs_hat: prediction
        :param list seqs_true: reference
        :return: average sentence-level CER score
        :rtype float
        """
        char_eds, char_ref_lens = [], []
        for i, seq_hat_text in enumerate(seqs_hat):
            seq_true_text = seqs_true[i]
            hyp_chars = seq_hat_text.replace(" ", "")
            ref_chars = seq_true_text.replace(" ", "")
            char_eds.append(editdistance.eval(hyp_chars, ref_chars))
            char_ref_lens.append(len(ref_chars))
        return float(sum(char_eds)) / sum(char_ref_lens)

    def calculate_wer(self, seqs_hat, seqs_true):
        """Calculate sentence-level WER score.

        :param list seqs_hat: prediction
        :param list seqs_true: reference
        :return: average sentence-level WER score
        :rtype float
        """
        word_eds, word_ref_lens = [], []
        for i, seq_hat_text in enumerate(seqs_hat):
            seq_true_text = seqs_true[i]
            hyp_words = seq_hat_text.split()
            ref_words = seq_true_text.split()
            word_eds.append(editdistance.eval(hyp_words, ref_words))
            word_ref_lens.append(len(ref_words))
        return float(sum(word_eds)) / sum(word_ref_lens)


================================================
FILE: paddlespeech/s2t/utils/layer_tools.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from paddle import nn

__all__ = [
    "summary", "gradient_norm", "freeze", "unfreeze", "print_grads",
    "print_params"
]


def summary(layer: nn.Layer, print_func=print):
    if print_func is None:
        return
    num_params = num_elements = 0
    for name, param in layer.state_dict().items():
        if print_func:
            print_func(
                "{} | {} | {}".format(name, param.shape, np.prod(param.shape)))
        num_elements += np.prod(param.shape)
        num_params += 1
    if print_func:
        num_elements = num_elements / 1024**2
        print_func(
            f"Total parameters: {num_params}, {num_elements:.2f}M elements.")


def print_grads(model, print_func=print):
    if print_func is None:
        return
    for n, p in model.named_parameters():
        msg = f"param grad: {n}: shape: {p.shape} grad: {p.grad}"
        print_func(msg)


def print_params(model, print_func=print):
    if print_func is None:
        return
    total = 0.0
    num_params = 0.0
    for n, p in model.named_parameters():
        msg = f"{n} | {p.shape} | {np.prod(p.shape)} | {not p.stop_gradient}"
        total += np.prod(p.shape)
        num_params += 1
        if print_func:
            print_func(msg)
    if print_func:
        total = total / 1024**2
        print_func(f"Total parameters: {num_params}, {total:.2f}M elements.")


def gradient_norm(layer: nn.Layer):
    grad_norm_dict = {}
    for name, param in layer.state_dict().items():
        if param.trainable:
            grad = param.gradient()  # return numpy.ndarray
            grad_norm_dict[name] = np.linalg.norm(grad) / grad.size
    return grad_norm_dict


def recursively_remove_weight_norm(layer: nn.Layer):
    for layer in layer.sublayers():
        try:
            nn.utils.remove_weight_norm(layer)
        except ValueError as e:
            # ther is not weight norm hoom in this layer
            pass


def freeze(layer: nn.Layer):
    for param in layer.parameters():
        param.trainable = False


def unfreeze(layer: nn.Layer):
    for param in layer.parameters():
        param.trainable = True


================================================
FILE: paddlespeech/s2t/utils/log.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import getpass
import inspect
import os
import socket
import sys

from loguru import logger
from paddle import inference


def find_log_dir(log_dir=None):
    """Returns the most suitable directory to put log files into.
    Args:
        log_dir: str|None, if specified, the logfile(s) will be created in that
            directory.  Otherwise if the --log_dir command-line flag is provided,
            the logfile will be created in that directory.  Otherwise the logfile
            will be created in a standard location.
    Raises:
        FileNotFoundError: raised when it cannot find a log directory.
  """
    # Get a list of possible log dirs (will try to use them in order).
    if log_dir:
        # log_dir was explicitly specified as an arg, so use it and it alone.
        dirs = [log_dir]
    else:
        dirs = ['/tmp/', './']

    # Find the first usable log dir.
    for d in dirs:
        if os.path.isdir(d) and os.access(d, os.W_OK):
            return d
    raise FileNotFoundError(
        "Can't find a writable directory for logs, tried %s" % dirs)


def find_log_dir_and_names(program_name=None, log_dir=None):
    """Computes the directory and filename prefix for log file.
    Args:
        program_name: str|None, the filename part of the path to the program that
            is running without its extension.  e.g: if your program is called
            'usr/bin/foobar.py' this method should probably be called with
            program_name='foobar' However, this is just a convention, you can
            pass in any string you want, and it will be used as part of the
            log filename. If you don't pass in anything, the default behavior
            is as described in the example.  In python standard logging mode,
            the program_name will be prepended with py_ if it is the program_name
            argument is omitted.
        log_dir: str|None, the desired log directory.
    Returns:
        (log_dir, file_prefix, symlink_prefix)
    Raises:
        FileNotFoundError: raised in Python 3 when it cannot find a log directory.
        OSError: raised in Python 2 when it cannot find a log directory.
  """
    if not program_name:
        # Strip the extension (foobar.par becomes foobar, and
        # fubar.py becomes fubar). We do this so that the log
        # file names are similar to C++ log file names.
        program_name = os.path.splitext(os.path.basename(sys.argv[0]))[0]

        # Prepend py_ to files so that python code gets a unique file, and
        # so that C++ libraries do not try to write to the same log files as us.
        program_name = 'py_%s' % program_name

    actual_log_dir = find_log_dir(log_dir=log_dir)

    try:
        username = getpass.getuser()
    except KeyError:
        # This can happen, e.g. when running under docker w/o passwd file.
        if hasattr(os, 'getuid'):
            # Windows doesn't have os.getuid
            username = str(os.getuid())
        else:
            username = 'unknown'
    hostname = socket.gethostname()
    file_prefix = '%s.%s.%s.log' % (program_name, hostname, username)

    return actual_log_dir, file_prefix, program_name


class Log():
    """Default Logger for all."""
    logger.remove()

    _call_from_cli = False
    _frame = inspect.currentframe()
    while _frame:
        if 'paddlespeech/cli/entry.py' in _frame.f_code.co_filename or 'paddlespeech/t2s' in _frame.f_code.co_filename:
            _call_from_cli = True
            break
        _frame = _frame.f_back

    if _call_from_cli:
        logger.add(
            sys.stdout,
            level='ERROR',
            enqueue=True,
            filter=lambda record: record['level'].no >= 20)
    else:
        logger.add(
            sys.stdout,
            level='INFO',
            enqueue=True,
            filter=lambda record: record['level'].no >= 20)
        _, file_prefix, _ = find_log_dir_and_names()
        sink_prefix = os.path.join("exp/log", file_prefix)
        sink_path = sink_prefix[:-3] + "{time}.log"
        logger.add(sink_path, level='DEBUG', enqueue=True, rotation="500 MB")

    def __init__(self, name=None):
        pass

    def getlog(self):
        return logger


class Autolog:
    """Just used by fullchain project"""

    def __init__(self,
                 batch_size,
                 model_name="DeepSpeech",
                 model_precision="fp32"):
        import auto_log
        pid = os.getpid()
        if os.environ.get('CUDA_VISIBLE_DEVICES', None):
            gpu_id = int(os.environ['CUDA_VISIBLE_DEVICES'].split(',')[0])
            infer_config = inference.Config()
            infer_config.enable_use_gpu(100, gpu_id)
        else:
            gpu_id = None
            infer_config = inference.Config()

        self.autolog = auto_log.AutoLogger(
            model_name=model_name,
            model_precision=model_precision,
            batch_size=batch_size,
            data_shape="dynamic",
            save_path="./output/auto_log.lpg",
            inference_config=infer_config,
            pids=pid,
            process_name=None,
            gpu_ids=gpu_id,
            time_keys=['preprocess_time', 'inference_time', 'postprocess_time'],
            warmup=0)

    def getlog(self):
        return self.autolog


================================================
FILE: paddlespeech/s2t/utils/mp_tools.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import wraps

from paddle import distributed as dist

__all__ = ["rank_zero_only"]


def rank_zero_only(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        rank = dist.get_rank()
        if rank != 0:
            return
        result = func(*args, **kwargs)
        return result

    return wrapper


================================================
FILE: paddlespeech/s2t/utils/profiler.py
================================================
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys

import paddle

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

# A global variable to record the number of calling times for profiler
# functions. It is used to specify the tracing range of training steps.
_profiler_step_id = 0

# A global variable to avoid parsing from string every time.
_profiler_options = None


class ProfilerOptions(object):
    '''
    Use a string to initialize a ProfilerOptions.
    The string should be in the format: "key1=value1;key2=value;key3=value3".
    For example:
      "profile_path=model.profile"
      "batch_range=[50, 60]; profile_path=model.profile"
      "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
    ProfilerOptions supports following key-value pair:
      batch_range      - a integer list, e.g. [100, 110].
      state            - a string, the optional values are 'CPU', 'GPU' or 'All'. 
      sorted_key       - a string, the optional values are 'calls', 'total',
                         'max', 'min' or 'ave.
      tracer_option    - a string, the optional values are 'Default', 'OpDetail',
                         'AllOpDetail'.
      profile_path     - a string, the path to save the serialized profile data,
                         which can be used to generate a timeline.
      exit_on_finished - a boolean.
    '''

    def __init__(self, options_str):
        assert isinstance(options_str, str)

        self._options = {
            'batch_range': [10, 20],
            'state': 'All',
            'sorted_key': 'total',
            'tracer_option': 'Default',
            'profile_path': '/tmp/profile',
            'exit_on_finished': True
        }
        self._parse_from_string(options_str)

    def _parse_from_string(self, options_str):
        if not options_str:
            return

        for kv in options_str.replace(' ', '').split(';'):
            key, value = kv.split('=')
            if key == 'batch_range':
                value_list = value.replace('[', '').replace(']', '').split(',')
                value_list = list(map(int, value_list))
                if len(value_list) >= 2 and value_list[0] >= 0 and value_list[
                        1] > value_list[0]:
                    self._options[key] = value_list
            elif key == 'exit_on_finished':
                self._options[key] = value.lower() in ("yes", "true", "t", "1")
            elif key in [
                    'state', 'sorted_key', 'tracer_option', 'profile_path'
            ]:
                self._options[key] = value

    def __getitem__(self, name):
        if self._options.get(name, None) is None:
            raise ValueError(
                "ProfilerOptions does not have an option named %s." % name)
        return self._options[name]


def add_profiler_step(options_str=None):
    '''
    Enable the operator-level timing using PaddlePaddle's profiler.
    The profiler uses a independent variable to count the profiler steps.
    One call of this function is treated as a profiler step.
    
    Args:
      profiler_options - a string to initialize the ProfilerOptions.
                         Default is None, and the profiler is disabled.
    '''
    if options_str is None:
        return

    global _profiler_step_id
    global _profiler_options

    if _profiler_options is None:
        _profiler_options = ProfilerOptions(options_str)
        logger.info(f"Profiler: {options_str}")
        logger.info(f"Profiler: {_profiler_options._options}")

    if _profiler_step_id == _profiler_options['batch_range'][0]:
        paddle.utils.profiler.start_profiler(_profiler_options['state'],
                                             _profiler_options['tracer_option'])
    elif _profiler_step_id == _profiler_options['batch_range'][1]:
        paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
                                            _profiler_options['profile_path'])
        if _profiler_options['exit_on_finished']:
            sys.exit(0)

    _profiler_step_id += 1


================================================
FILE: paddlespeech/s2t/utils/socket_server.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import random
import socket
import socketserver
import struct
import time
import wave
from time import gmtime
from time import strftime

import jsonlines

__all__ = ["socket_send", "warm_up_test", "AsrTCPServer", "AsrRequestHandler"]


def socket_send(server_ip: str, server_port: str, data: bytes):
    # Connect to server and send data
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    sock.connect((server_ip, server_port))
    sent = data
    sock.sendall(struct.pack('>i', len(sent)) + sent)
    print('Speech[length=%d] Sent.' % len(sent))
    # Receive data from the server and shut down
    received = sock.recv(1024)
    print("Recognition Results: {}".format(received.decode('utf8')))
    sock.close()


def warm_up_test(audio_process_handler,
                 manifest_path,
                 num_test_cases,
                 random_seed=0):
    """Warming-up test."""
    with jsonlines.open(manifest_path) as reader:
        manifest = list(reader)
    rng = random.Random(random_seed)
    samples = rng.sample(manifest, num_test_cases)
    for idx, sample in enumerate(samples):
        print("Warm-up Test Case %d: %s" % (idx, sample['feat']))
        start_time = time.time()
        transcript = audio_process_handler(sample['feat'])
        finish_time = time.time()
        print("Response Time: %f, Transcript: %s" %
              (finish_time - start_time, transcript))


class AsrTCPServer(socketserver.TCPServer):
    """The ASR TCP Server."""

    def __init__(self,
                 server_address,
                 RequestHandlerClass,
                 speech_save_dir,
                 audio_process_handler,
                 bind_and_activate=True):
        self.speech_save_dir = speech_save_dir
        self.audio_process_handler = audio_process_handler
        socketserver.TCPServer.__init__(
            self, server_address, RequestHandlerClass, bind_and_activate=True)


class AsrRequestHandler(socketserver.BaseRequestHandler):
    """The ASR request handler."""

    def handle(self):
        # receive data through TCP socket
        chunk = self.request.recv(1024)
        target_len = struct.unpack('>i', chunk[:4])[0]
        data = chunk[4:]
        while len(data) < target_len:
            chunk = self.request.recv(1024)
            data += chunk
        # write to file
        filename = self._write_to_file(data)

        print("Received utterance[length=%d] from %s, saved to %s." %
              (len(data), self.client_address[0], filename))
        start_time = time.time()
        transcript = self.server.audio_process_handler(filename)
        finish_time = time.time()
        print("Response Time: %f, Transcript: %s" %
              (finish_time - start_time, transcript))
        self.request.sendall(transcript.encode('utf-8'))

    def _write_to_file(self, data):
        # prepare save dir and filename
        if not os.path.exists(self.server.speech_save_dir):
            os.mkdir(self.server.speech_save_dir)
        timestamp = strftime("%Y%m%d%H%M%S", gmtime())
        out_filename = os.path.join(
            self.server.speech_save_dir,
            timestamp + "_" + self.client_address[0] + ".wav")
        # write to wav file
        file = wave.open(out_filename, 'wb')
        file.setnchannels(1)
        file.setsampwidth(2)
        file.setframerate(16000)
        file.writeframes(data)
        file.close()
        return out_filename


================================================
FILE: paddlespeech/s2t/utils/spec_augment.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/s2t/utils/tensor_utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Unility functions for Transformer."""
from typing import List
from typing import Tuple

import paddle

from paddlespeech.s2t.utils.log import Log

__all__ = ["pad_sequence", "add_sos_eos", "th_accuracy", "has_tensor"]

logger = Log(__name__).getlog()


def has_tensor(val):
    if isinstance(val, (list, tuple)):
        for item in val:
            if has_tensor(item):
                return True
    elif isinstance(val, dict):
        for k, v in val.items():
            print(k)
            if has_tensor(v):
                return True
    else:
        return paddle.is_tensor(val)


def pad_sequence(sequences: List[paddle.Tensor],
                 batch_first: bool=False,
                 padding_value: float=0.0) -> paddle.Tensor:
    r"""Pad a list of variable length Tensors with ``padding_value``

    ``pad_sequence`` stacks a list of Tensors along a new dimension,
    and pads them to equal length. For example, if the input is list of
    sequences with size ``L x *`` and if batch_first is False, and ``T x B x *``
    otherwise.

    `B` is batch size. It is equal to the number of elements in ``sequences``.
    `T` is length of the longest sequence.
    `L` is length of the sequence.
    `*` is any number of trailing dimensions, including none.

    Example:
        >>> from paddle.nn.utils.rnn import pad_sequence
        >>> a = paddle.ones(25, 300)
        >>> b = paddle.ones(22, 300)
        >>> c = paddle.ones(15, 300)
        >>> pad_sequence([a, b, c]).shape
        paddle.Tensor([25, 3, 300])

    Note:
        This function returns a Tensor of size ``T x B x *`` or ``B x T x *``
        where `T` is the length of the longest sequence. This function assumes
        trailing dimensions and type of all the Tensors in sequences are same.

    Args:
        sequences (list[Tensor]): list of variable length sequences.
        batch_first (bool, optional): output will be in ``B x T x *`` if True, or in
            ``T x B x *`` otherwise
        padding_value (float, optional): value for padded elements. Default: 0.

    Returns:
        Tensor of size ``T x B x *`` if :attr:`batch_first` is ``False``.
        Tensor of size ``B x T x *`` otherwise
    """

    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
    max_size = paddle.shape(sequences[0])
    # (TODO Hui Zhang): slice not support `end==start`
    # trailing_dims = max_size[1:]
    trailing_dims = tuple(
        max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
    max_len = max([s.shape[0] for s in sequences])
    if batch_first:
        out_dims = (len(sequences), max_len) + trailing_dims
    else:
        out_dims = (max_len, len(sequences)) + trailing_dims

    out_tensor = sequences[0].new_full(out_dims, padding_value)
    for i, tensor in enumerate(sequences):
        length = tensor.shape[0]
        # use index notation to prevent duplicate references to the tensor
        logger.debug(
            f"length {length}, out_tensor {out_tensor.shape}, tensor {tensor.shape}"
        )
        if batch_first:
            # TODO (Hui Zhang): set_value op not support `end==start`
            # TODO (Hui Zhang): set_value op not support int16
            # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
            # out_tensor[i, :length, ...] = tensor
            if length != 0:
                out_tensor[i, :length] = tensor
            else:
                out_tensor[i, length] = tensor
        else:
            # TODO (Hui Zhang): set_value op not support `end==start`
            # out_tensor[:length, i, ...] = tensor
            if length != 0:
                out_tensor[:length, i] = tensor
            else:
                out_tensor[length, i] = tensor

    return out_tensor


def add_sos_eos(ys_pad: paddle.Tensor, sos: int, eos: int,
                ignore_id: int) -> Tuple[paddle.Tensor, paddle.Tensor]:
    """Add <sos> and <eos> labels.
    Args:
        ys_pad (paddle.Tensor): batch of padded target sequences (B, Lmax)
        sos (int): index of <sos>
        eos (int): index of <eeos>
        ignore_id (int): index of padding
    Returns:
        ys_in (paddle.Tensor) : (B, Lmax + 1)
        ys_out (paddle.Tensor) : (B, Lmax + 1)
    Examples:
        >>> sos_id = 10
        >>> eos_id = 11
        >>> ignore_id = -1
        >>> ys_pad
        tensor([[ 1,  2,  3,  4,  5],
                [ 4,  5,  6, -1, -1],
                [ 7,  8,  9, -1, -1]], dtype=paddle.int32)
        >>> ys_in,ys_out=add_sos_eos(ys_pad, sos_id , eos_id, ignore_id)
        >>> ys_in
        tensor([[10,  1,  2,  3,  4,  5],
                [10,  4,  5,  6, 11, 11],
                [10,  7,  8,  9, 11, 11]])
        >>> ys_out
        tensor([[ 1,  2,  3,  4,  5, 11],
                [ 4,  5,  6, 11, -1, -1],
                [ 7,  8,  9, 11, -1, -1]])
    """
    # TODO(Hui Zhang): using comment code,
    #_sos = paddle.to_tensor(
    #    [sos], dtype=paddle.long, stop_gradient=True, place=ys_pad.place)
    #_eos = paddle.to_tensor(
    #    [eos], dtype=paddle.long, stop_gradient=True, place=ys_pad.place)
    #ys = [y[y != ignore_id] for y in ys_pad]  # parse padded ys
    #ys_in = [paddle.cat([_sos, y], dim=0) for y in ys]
    #ys_out = [paddle.cat([y, _eos], dim=0) for y in ys]
    #return pad_sequence(ys_in, padding_value=eos), pad_sequence(ys_out, padding_value=ignore_id)
    B = ys_pad.shape[0]
    _sos = paddle.ones([B, 1], dtype=ys_pad.dtype) * sos
    _eos = paddle.ones([B, 1], dtype=ys_pad.dtype) * eos
    ys_in = paddle.cat([_sos, ys_pad], dim=1)
    mask_pad = (ys_in == ignore_id)
    ys_in = ys_in.masked_fill(mask_pad, eos)

    ys_out = paddle.cat([ys_pad, _eos], dim=1)
    ys_out = ys_out.masked_fill(mask_pad, eos)
    mask_eos = (ys_out == ignore_id)
    ys_out = ys_out.masked_fill(mask_eos, eos)
    ys_out = ys_out.masked_fill(mask_pad, ignore_id)
    return ys_in, ys_out


def th_accuracy(pad_outputs: paddle.Tensor,
                pad_targets: paddle.Tensor,
                ignore_label: int) -> float:
    """Calculate accuracy.
    Args:
        pad_outputs (Tensor): Prediction tensors (B * Lmax, D).
        pad_targets (LongTensor): Target label tensors (B, Lmax, D).
        ignore_label (int): Ignore label id.
    Returns:
        float: Accuracy value (0.0 - 1.0).
    """
    pad_pred = pad_outputs.reshape(
        [pad_targets.shape[0], pad_targets.shape[1],
         pad_outputs.shape[1]]).argmax(2)
    mask = pad_targets != ignore_label

    numerator = paddle.sum(
        pad_pred.masked_select(mask) == pad_targets.masked_select(mask))
    denominator = paddle.sum(mask)
    return float(numerator) / float(denominator)


================================================
FILE: paddlespeech/s2t/utils/text_grid.py
================================================
# Copyright 2021 Mobvoi Inc. All Rights Reserved.
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from wenet(https://github.com/wenet-e2e/wenet)
from typing import Dict
from typing import List
from typing import Text

import textgrid


def segment_alignment(alignment: List[int], blank_id=0) -> List[List[int]]:
    """segment ctc alignment ids by continuous blank and repeat label.

    Args:
        alignment (List[int]): ctc alignment id sequence. 
            e.g. [0, 0, 0, 1, 1, 1, 2, 0, 0, 3]
        blank_id (int, optional): blank id. Defaults to 0.

    Returns:
        List[List[int]]: token align, segment aligment id sequence. 
            e.g. [[0, 0, 0, 1, 1, 1], [2], [0, 0, 3]]
    """
    # convert alignment to a praat format, which is a doing phonetics
    # by computer and helps analyzing alignment
    align_segs = []
    # get frames level duration for each token
    start = 0
    end = 0
    while end < len(alignment):
        while end < len(alignment) and alignment[end] == blank_id:  # blank
            end += 1
        if end == len(alignment):
            align_segs[-1].extend(alignment[start:])
            break
        end += 1
        while end < len(alignment) and alignment[end - 1] == alignment[
                end]:  # repeat label
            end += 1
        align_segs.append(alignment[start:end])
        start = end
    return align_segs


def align_to_tierformat(align_segs: List[List[int]],
                        subsample: int,
                        token_dict: Dict[int, Text],
                        blank_id=0) -> List[Text]:
    """Generate textgrid.Interval format from alignment segmentations.

    Args:
        align_segs (List[List[int]]): segmented ctc alignment ids.
        subsample (int): 25ms frame_length, 10ms hop_length, 1/subsample
        token_dict (Dict[int, Text]): int -> str map.

    Returns:
        List[Text]: list of textgrid.Interval text, str(start, end, text).
    """
    hop_length = 10  # ms
    second_ms = 1000  # ms
    frame_per_second = second_ms / hop_length  # 25ms frame_length, 10ms hop_length
    second_per_frame = 1.0 / frame_per_second

    begin = 0
    duration = 0
    tierformat = []

    for idx, tokens in enumerate(align_segs):
        token_len = len(tokens)
        token = tokens[-1]
        # time duration in second
        duration = token_len * subsample * second_per_frame
        if idx < len(align_segs) - 1:
            print(f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}")
            tierformat.append(
                f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}\n")
        else:
            for i in tokens:
                if i != blank_id:
                    token = i
                    break
            print(f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}")
            tierformat.append(
                f"{begin:.2f} {begin + duration:.2f} {token_dict[token]}\n")
        begin = begin + duration

    return tierformat


def generate_textgrid(maxtime: float,
                      intervals: List[Text],
                      output: Text,
                      name: Text='ali') -> None:
    """Create alignment textgrid file.

    Args:
        maxtime (float): audio duartion.
        intervals (List[Text]): ctc output alignment. e.g. "start-time end-time word" per item.
        output (Text): textgrid filepath.
        name (Text, optional): tier or layer name. Defaults to 'ali'.
    """
    # Download Praat: https://www.fon.hum.uva.nl/praat/
    avg_interval = maxtime / (len(intervals) + 1)
    print(f"average second/token: {avg_interval}")
    margin = 0.0001

    tg = textgrid.TextGrid(maxTime=maxtime)
    tier = textgrid.IntervalTier(name=name, maxTime=maxtime)

    i = 0
    for dur in intervals:
        s, e, text = dur.split()
        tier.add(minTime=float(s) + margin, maxTime=float(e), mark=text)

    tg.append(tier)

    tg.write(output)
    print("successfully generator textgrid {}.".format(output))


================================================
FILE: paddlespeech/s2t/utils/utility.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains common utility functions."""
import math
import os
import random
import sys
from contextlib import contextmanager
from pprint import pformat
from typing import List

import distutils.util
import numpy as np
import paddle
import soundfile

from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()

__all__ = ["all_version", "UpdateConfig", "seed_all", "log_add"]


def all_version():
    vers = {
        "python": sys.version,
        "paddle": paddle.__version__,
        "paddle_commit": paddle.version.commit,
        "soundfile": soundfile.__version__,
    }
    logger.info(f"Deps Module Version:{pformat(list(vers.items()))}")


@contextmanager
def UpdateConfig(config):
    """Update yacs config"""
    config.defrost()
    yield
    config.freeze()


def seed_all(seed: int=20210329):
    """freeze random generator seed."""
    np.random.seed(seed)
    random.seed(seed)
    paddle.seed(seed)


def log_add(args: List[int]) -> float:
    """Stable log add

    Args:
        args (List[int]): log scores

    Returns:
        float: sum of log scores
    """
    if all(a == -float('inf') for a in args):
        return -float('inf')
    a_max = max(args)
    lsp = math.log(sum(math.exp(a - a_max) for a in args))
    return a_max + lsp


def get_subsample(config):
    """Subsample rate from config.

    Args:
        config (yacs.config.CfgNode): yaml config

    Returns:
        int: subsample rate.
    """
    if config['encoder'] == 'squeezeformer':
        return 4
    else:
        input_layer = config["encoder_conf"]["input_layer"]
        assert input_layer in ["conv2d", "conv2d6", "conv2d8"]
    if input_layer == "conv2d":
        return 4
    elif input_layer == "conv2d6":
        return 6
    elif input_layer == "conv2d8":
        return 8


================================================
FILE: paddlespeech/server/README.md
================================================
# PaddleSpeech Server Command Line

([简体中文](./README_cn.md)|English)

 The simplest approach to use PaddleSpeech Server including server and client.

 ## PaddleSpeech Server
 ### Help
 ```bash
 paddlespeech_server help
 ```
 ### Start the server
 First set the service-related configuration parameters, similar to `./conf/application.yaml`. Set `engine_list`, which represents the speech tasks included in the service to be started.
 **Note:** If the service can be started normally in the container, but the client access IP is unreachable, you can try to replace the `host` address in the configuration file with the local IP address.

 Then start the service:
 ```bash
 paddlespeech_server start --config_file ./conf/application.yaml
 ```

 ## PaddleSpeech Client
 ### Help
 ```bash
 paddlespeech_client help
 ```
 ### Access speech recognition services 
 ```
 paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
 ```
 
 ### Access text to speech services
 ```bash
 paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "你好，欢迎使用百度飞桨深度学习框架！" --output output.wav
 ```
 
 ### Access audio classification services
 ```bash
 paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input input.wav
 ```

 ## Online ASR Server

### Lanuch online asr server
```
paddlespeech_server start --config_file conf/ws_conformer_application.yaml
```

### Access online asr server

```
paddlespeech_client asr_online  --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
```

## Online TTS Server

### Lanuch online tts server
```
paddlespeech_server start --config_file conf/tts_online_application.yaml
```

### Access online tts server

```
paddlespeech_client tts_online  --server_ip 127.0.0.1 --port 8092 --input "您好，欢迎使用百度飞桨深度学习框架！" --output output.wav
```


## Speaker Verification

### Lanuch speaker verification server

```
paddlespeech_server start --config_file conf/vector_application.yaml
```

### Extract speaker embedding from aduio

```
paddlespeech_client vector --task spk  --server_ip 127.0.0.1 --port 8090 --input 85236145389.wav
```

### Get score with speaker audio embedding

```
paddlespeech_client vector --task score  --server_ip 127.0.0.1 --port 8090 --enroll 123456789.wav --test 85236145389.wav
```


================================================
FILE: paddlespeech/server/README_cn.md
================================================
# PaddleSpeech Server 命令行工具

(简体中文|[English](./README.md))

它提供了最简便的方式调用 PaddleSpeech 语音服务用一行命令就可以轻松启动服务和调用服务。

 ## 服务端命令行使用
 ### 帮助
 ```bash
 paddlespeech_server help
 ```
 ### 启动服务
 首先设置服务相关配置文件，类似于 `./conf/application.yaml`，设置 `engine_list`，该值表示即将启动的服务中包含的语音任务。
 **注意：** 如果在容器里可正常启动服务，但客户端访问 ip 不可达，可尝试将配置文件中 `host` 地址换成本地 ip 地址。
 然后启动服务：
 ```bash
 paddlespeech_server start --config_file ./conf/application.yaml
 ```

 ## 客户端命令行使用
 ### 帮助
 ```bash
 paddlespeech_client help
 ```
 ### 访问语音识别服务 
 ```
 paddlespeech_client asr --server_ip 127.0.0.1 --port 8090 --input input_16k.wav
 ```
 
 ### 访问语音合成服务
 ```bash
 paddlespeech_client tts --server_ip 127.0.0.1 --port 8090 --input "你好，欢迎使用百度飞桨深度学习框架！" --output output.wav
 ```

 ### 访问音频分类服务
 ```bash
 paddlespeech_client cls --server_ip 127.0.0.1 --port 8090 --input input.wav
 ```

## 流式ASR

### 启动流式语音识别服务

```
paddlespeech_server start --config_file conf/ws_conformer_application.yaml
```

### 访问流式语音识别服务

```
paddlespeech_client asr_online  --server_ip 127.0.0.1 --port 8090 --input zh.wav
```

## 流式TTS

### 启动流式语音合成服务

```
paddlespeech_server start --config_file conf/tts_online_application.yaml
```

### 访问流式语音合成服务

```
paddlespeech_client tts_online  --server_ip 127.0.0.1 --port 8092 --input "您好，欢迎使用百度飞桨深度学习框架！" --output output.wav
```

## 声纹识别

### 启动声纹识别服务

```
paddlespeech_server start --config_file conf/vector_application.yaml
```

### 获取说话人音频声纹

```
paddlespeech_client vector --task spk  --server_ip 127.0.0.1 --port 8090 --input 85236145389.wav
```

### 两个说话人音频声纹打分

```
paddlespeech_client vector --task score  --server_ip 127.0.0.1 --port 8090 --enroll 123456789.wav --test 85236145389.wav
```


================================================
FILE: paddlespeech/server/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import _locale

from .base_commands import ClientBaseCommand
from .base_commands import ClientHelpCommand
from .base_commands import ServerBaseCommand
from .base_commands import ServerHelpCommand
from .bin.paddlespeech_client import ASRClientExecutor
from .bin.paddlespeech_client import CLSClientExecutor
from .bin.paddlespeech_client import TTSClientExecutor
from .bin.paddlespeech_server import ServerExecutor

_locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8'])


================================================
FILE: paddlespeech/server/base_commands.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List

from .entry import client_commands
from .entry import server_commands
from .util import cli_client_register
from .util import cli_server_register
from .util import get_client_command
from .util import get_server_command

__all__ = [
    'ServerBaseCommand',
    'ServerHelpCommand',
    'ClientBaseCommand',
    'ClientHelpCommand',
]


@cli_server_register(name='paddlespeech_server')
class ServerBaseCommand:
    def execute(self, argv: List[str]) -> bool:
        help = get_server_command('paddlespeech_server.help')
        return help().execute(argv)


@cli_server_register(
    name='paddlespeech_server.help', description='Show help for commands.')
class ServerHelpCommand:
    def execute(self, argv: List[str]) -> bool:
        msg = 'Usage:\n'
        msg += '    paddlespeech_server <command> <options>\n\n'
        msg += 'Commands:\n'
        for command, detail in server_commands['paddlespeech_server'].items():
            if command.startswith('_'):
                continue

            if '_description' not in detail:
                continue
            msg += '    {:<15}        {}\n'.format(command,
                                                   detail['_description'])

        print(msg)
        return True


@cli_client_register(name='paddlespeech_client')
class ClientBaseCommand:
    def execute(self, argv: List[str]) -> bool:
        help = get_client_command('paddlespeech_client.help')
        return help().execute(argv)


@cli_client_register(
    name='paddlespeech_client.help', description='Show help for commands.')
class ClientHelpCommand:
    def execute(self, argv: List[str]) -> bool:
        msg = 'Usage:\n'
        msg += '    paddlespeech_client <command> <options>\n\n'
        msg += 'Commands:\n'
        for command, detail in client_commands['paddlespeech_client'].items():
            if command.startswith('_'):
                continue

            if '_description' not in detail:
                continue
            msg += '    {:<15}        {}\n'.format(command,
                                                   detail['_description'])

        print(msg)
        return True


================================================
FILE: paddlespeech/server/bin/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .paddlespeech_client import ASRClientExecutor
from .paddlespeech_client import TTSClientExecutor
from .paddlespeech_server import ServerExecutor
from .paddlespeech_server import ServerStatsExecutor


================================================
FILE: paddlespeech/server/bin/paddlespeech_client.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import asyncio
import base64
import io
import json
import os
import random
import sys
import time
import warnings
from typing import List

import numpy as np
import requests
import soundfile

from ..executor import BaseExecutor
from ..util import cli_client_register
from ..util import stats_wrapper
from paddlespeech.cli.log import logger
from paddlespeech.server.utils.audio_handler import ASRWsAudioHandler
from paddlespeech.server.utils.audio_process import wav2pcm
from paddlespeech.server.utils.util import compute_delay
from paddlespeech.server.utils.util import wav2base64
warnings.filterwarnings("ignore")

__all__ = [
    'TTSClientExecutor', 'TTSOnlineClientExecutor', 'ASRClientExecutor',
    'ASROnlineClientExecutor', 'CLSClientExecutor', 'VectorClientExecutor'
]


@cli_client_register(
    name='paddlespeech_client.tts', description='visit tts service')
class TTSClientExecutor(BaseExecutor):
    def __init__(self):
        super(TTSClientExecutor, self).__init__()
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech_client.tts', add_help=True)
        self.parser.add_argument(
            '--server_ip', type=str, default='127.0.0.1', help='server ip')
        self.parser.add_argument(
            '--port', type=int, default=8090, help='server port')
        self.parser.add_argument(
            '--input',
            type=str,
            default=None,
            help='Text to be synthesized.',
            required=True)
        self.parser.add_argument(
            '--spk_id', type=int, default=0, help='Speaker id')
        self.parser.add_argument(
            '--speed',
            type=float,
            default=1.0,
            help='Audio speed, the value should be set between 0 and 3')
        self.parser.add_argument(
            '--volume',
            type=float,
            default=1.0,
            help='Audio volume, the value should be set between 0 and 3')
        self.parser.add_argument(
            '--sample_rate',
            type=int,
            default=0,
            choices=[0, 8000, 16000],
            help='Sampling rate, the default is the same as the model')
        self.parser.add_argument(
            '--output', type=str, default=None, help='Synthesized audio file')

    def postprocess(self, wav_base64: str, outfile: str) -> float:
        audio_data_byte = base64.b64decode(wav_base64)
        # from byte
        samples, sample_rate = soundfile.read(
            io.BytesIO(audio_data_byte), dtype='float32')

        # transform audio
        if outfile.endswith(".wav"):
            soundfile.write(outfile, samples, sample_rate)
        elif outfile.endswith(".pcm"):
            temp_wav = str(random.getrandbits(128)) + ".wav"
            soundfile.write(temp_wav, samples, sample_rate)
            wav2pcm(temp_wav, outfile, data_type=np.int16)
            os.remove(temp_wav)
        else:
            logger.error("The format for saving audio only supports wav or pcm")

    def execute(self, argv: List[str]) -> bool:
        args = self.parser.parse_args(argv)
        input_ = args.input
        server_ip = args.server_ip
        port = args.port
        spk_id = args.spk_id
        speed = args.speed
        volume = args.volume
        sample_rate = args.sample_rate
        output = args.output

        try:
            time_start = time.time()
            res = self(
                input=input_,
                server_ip=server_ip,
                port=port,
                spk_id=spk_id,
                speed=speed,
                volume=volume,
                sample_rate=sample_rate,
                output=output)
            time_end = time.time()
            time_consume = time_end - time_start
            response_dict = res.json()
            logger.info("Save synthesized audio successfully on %s." % (output))
            logger.info("Audio duration: %f s." %
                        (response_dict['result']['duration']))
            logger.info("Response time: %f s." % (time_consume))
            return True
        except Exception as e:
            logger.error("Failed to synthesized audio.")
            logger.error(e)
            return False

    @stats_wrapper
    def __call__(self,
                 input: str,
                 server_ip: str="127.0.0.1",
                 port: int=8090,
                 spk_id: int=0,
                 speed: float=1.0,
                 volume: float=1.0,
                 sample_rate: int=0,
                 output: str=None):
        """
        Python API to call an executor.
        """

        url = 'http://' + server_ip + ":" + str(port) + '/paddlespeech/tts'
        request = {
            "text": input,
            "spk_id": spk_id,
            "speed": speed,
            "volume": volume,
            "sample_rate": sample_rate,
            "save_path": output
        }

        res = requests.post(url, json.dumps(request))
        response_dict = res.json()
        if output is not None:
            self.postprocess(response_dict["result"]["audio"], output)
        return res


@cli_client_register(
    name='paddlespeech_client.tts_online',
    description='visit tts online service')
class TTSOnlineClientExecutor(BaseExecutor):
    def __init__(self):
        super(TTSOnlineClientExecutor, self).__init__()
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech_client.tts_online', add_help=True)
        self.parser.add_argument(
            '--server_ip', type=str, default='127.0.0.1', help='server ip')
        self.parser.add_argument(
            '--port', type=int, default=8092, help='server port')
        self.parser.add_argument(
            '--protocol',
            type=str,
            default="http",
            choices=["http", "websocket"],
            help='server protocol')
        self.parser.add_argument(
            '--input',
            type=str,
            default=None,
            help='Text to be synthesized.',
            required=True)
        self.parser.add_argument(
            '--spk_id', type=int, default=0, help='Speaker id')
        self.parser.add_argument(
            '--output',
            type=str,
            default=None,
            help='Client saves synthesized audio')
        self.parser.add_argument(
            "--play", type=bool, help="whether to play audio", default=False)

    def execute(self, argv: List[str]) -> bool:
        args = self.parser.parse_args(argv)
        input_ = args.input
        server_ip = args.server_ip
        port = args.port
        protocol = args.protocol
        spk_id = args.spk_id
        output = args.output
        play = args.play

        try:
            self(
                input=input_,
                server_ip=server_ip,
                port=port,
                protocol=protocol,
                spk_id=spk_id,
                output=output,
                play=play)
            return True
        except Exception as e:
            logger.error("Failed to synthesized audio.")
            logger.error(e)
            return False

    @stats_wrapper
    def __call__(self,
                 input: str,
                 server_ip: str="127.0.0.1",
                 port: int=8092,
                 protocol: str="http",
                 spk_id: int=0,
                 output: str=None,
                 play: bool=False):
        """
        Python API to call an executor.
        """

        if protocol == "http":
            logger.info("tts http client start")
            from paddlespeech.server.utils.audio_handler import TTSHttpHandler
            handler = TTSHttpHandler(server_ip, port, play)
            first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list = handler.run(
                input, spk_id, output)
            delay_time_list = compute_delay(receive_time_list,
                                            chunk_duration_list)

        elif protocol == "websocket":
            from paddlespeech.server.utils.audio_handler import TTSWsHandler
            logger.info("tts websocket client start")
            handler = TTSWsHandler(server_ip, port, play)
            loop = asyncio.get_event_loop()
            first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list = loop.run_until_complete(
                handler.run(input, spk_id, output))
            delay_time_list = compute_delay(receive_time_list,
                                            chunk_duration_list)

        else:
            logger.error("Please set correct protocol, http or websocket")
            sys.exit(-1)

        logger.info(f"sentence: {input}")
        logger.info(f"duration: {duration} s")
        logger.info(f"first response: {first_response} s")
        logger.info(f"final response: {final_response} s")
        logger.info(f"RTF: {final_response/duration}")
        if output is not None:
            if save_audio_success:
                logger.info(f"Audio successfully saved in {output}")
            else:
                logger.error("Audio save failed.")

        if delay_time_list != []:
            logger.info(
                f"Delay situation: total number of packages: {len(receive_time_list)}, the number of delayed packets: {len(delay_time_list)}, minimum delay time: {min(delay_time_list)} s, maximum delay time: {max(delay_time_list)} s, average delay time: {sum(delay_time_list)/len(delay_time_list)} s, delay rate:{len(delay_time_list)/len(receive_time_list)}"
            )
        else:
            logger.info("The sentence has no delay in streaming synthesis.")


@cli_client_register(
    name='paddlespeech_client.asr', description='visit asr service')
class ASRClientExecutor(BaseExecutor):
    def __init__(self):
        super(ASRClientExecutor, self).__init__()
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech_client.asr', add_help=True)
        self.parser.add_argument(
            '--server_ip', type=str, default='127.0.0.1', help='server ip')
        self.parser.add_argument(
            '--port', type=int, default=8090, help='server port')
        self.parser.add_argument(
            '--input',
            type=str,
            default=None,
            help='Audio file to be recognized',
            required=True)
        self.parser.add_argument(
            '--protocol',
            type=str,
            default="http",
            choices=["http", "websocket"],
            help='server protocol')
        self.parser.add_argument(
            '--sample_rate', type=int, default=16000, help='audio sample rate')
        self.parser.add_argument(
            '--lang', type=str, default="zh_cn", help='language')
        self.parser.add_argument(
            '--audio_format', type=str, default="wav", help='audio format')

        self.parser.add_argument(
            '--punc.server_ip',
            type=str,
            default=None,
            dest="punc_server_ip",
            help='Punctuation server ip')
        self.parser.add_argument(
            '--punc.port',
            type=int,
            default=8091,
            dest="punc_server_port",
            help='Punctuation server port')

    def execute(self, argv: List[str]) -> bool:
        args = self.parser.parse_args(argv)
        input_ = args.input
        server_ip = args.server_ip
        port = args.port
        sample_rate = args.sample_rate
        lang = args.lang
        audio_format = args.audio_format
        protocol = args.protocol

        try:
            time_start = time.time()
            res = self(
                input=input_,
                server_ip=server_ip,
                port=port,
                sample_rate=sample_rate,
                lang=lang,
                audio_format=audio_format,
                protocol=protocol,
                punc_server_ip=args.punc_server_ip,
                punc_server_port=args.punc_server_port)
            time_end = time.time()
            logger.info(f"ASR result: {res}")
            logger.info("Response time %f s." % (time_end - time_start))
            return True
        except Exception as e:
            logger.error("Failed to speech recognition.")
            logger.error(e)
            return False

    @stats_wrapper
    def __call__(self,
                 input: str,
                 server_ip: str="127.0.0.1",
                 port: int=8090,
                 sample_rate: int=16000,
                 lang: str="zh_cn",
                 audio_format: str="wav",
                 protocol: str="http",
                 punc_server_ip: str=None,
                 punc_server_port: int=None):
        """Python API to call an executor.

        Args:
            input (str): The input audio file path
            server_ip (str, optional): The ASR server ip. Defaults to "127.0.0.1".
            port (int, optional): The ASR server port. Defaults to 8090.
            sample_rate (int, optional): The audio sample rate. Defaults to 16000.
            lang (str, optional): The audio language type. Defaults to "zh_cn".
            audio_format (str, optional): The audio format information. Defaults to "wav".
            protocol (str, optional): The ASR server. Defaults to "http".

        Returns:
            str: The ASR results
        """
        # we use the asr server to recognize the audio text content
        # and paddlespeech_client asr only support http protocol
        protocol = "http"
        if protocol.lower() == "http":
            from paddlespeech.server.utils.audio_handler import ASRHttpHandler
            logger.info("asr http client start")
            handler = ASRHttpHandler(server_ip=server_ip, port=port)
            res = handler.run(input, audio_format, sample_rate, lang)
            res = res['result']['transcription']
            logger.info("asr http client finished")
        else:
            logger.error(f"Sorry, we have not support protocol: {protocol},"
                         "please use http or websocket protocol")
            sys.exit(-1)

        return res


@cli_client_register(
    name='paddlespeech_client.asr_online',
    description='visit asr online service')
class ASROnlineClientExecutor(BaseExecutor):
    def __init__(self):
        super(ASROnlineClientExecutor, self).__init__()
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech_client.asr_online', add_help=True)
        self.parser.add_argument(
            '--server_ip', type=str, default='127.0.0.1', help='server ip')
        self.parser.add_argument(
            '--port', type=int, default=8091, help='server port')
        self.parser.add_argument(
            '--input',
            type=str,
            default=None,
            help='Audio file to be recognized',
            required=True)
        self.parser.add_argument(
            '--sample_rate', type=int, default=16000, help='audio sample rate')
        self.parser.add_argument(
            '--lang', type=str, default="zh_cn", help='language')
        self.parser.add_argument(
            '--audio_format', type=str, default="wav", help='audio format')
        self.parser.add_argument(
            '--punc.server_ip',
            type=str,
            default=None,
            dest="punc_server_ip",
            help='Punctuation server ip')
        self.parser.add_argument(
            '--punc.port',
            type=int,
            default=8190,
            dest="punc_server_port",
            help='Punctuation server port')

    def execute(self, argv: List[str]) -> bool:
        args = self.parser.parse_args(argv)
        input_ = args.input
        server_ip = args.server_ip
        port = args.port
        sample_rate = args.sample_rate
        lang = args.lang
        audio_format = args.audio_format
        try:
            time_start = time.time()
            res = self(
                input=input_,
                server_ip=server_ip,
                port=port,
                sample_rate=sample_rate,
                lang=lang,
                audio_format=audio_format,
                punc_server_ip=args.punc_server_ip,
                punc_server_port=args.punc_server_port)
            time_end = time.time()
            logger.info(res)
            logger.info("Response time %f s." % (time_end - time_start))
            return True
        except Exception as e:
            logger.error("Failed to speech recognition.")
            logger.error(e)
            return False

    @stats_wrapper
    def __call__(self,
                 input: str,
                 server_ip: str="127.0.0.1",
                 port: int=8091,
                 sample_rate: int=16000,
                 lang: str="zh_cn",
                 audio_format: str="wav",
                 punc_server_ip: str=None,
                 punc_server_port: str=None):
        """Python API to call asr online executor.

        Args:
            input (str): the audio file to be send to streaming asr service.
            server_ip (str, optional): streaming asr server ip. Defaults to "127.0.0.1".
            port (int, optional): streaming asr server port. Defaults to 8091.
            sample_rate (int, optional): audio sample rate. Defaults to 16000.
            lang (str, optional): audio language type. Defaults to "zh_cn".
            audio_format (str, optional): audio format. Defaults to "wav".
            punc_server_ip (str, optional): punctuation server ip. Defaults to None.
            punc_server_port (str, optional): punctuation server port. Defaults to None.

        Returns:
            str: the audio text
        """

        logger.info("asr websocket client start")
        handler = ASRWsAudioHandler(
            server_ip,
            port,
            punc_server_ip=punc_server_ip,
            punc_server_port=punc_server_port)
        loop = asyncio.get_event_loop()
        res = loop.run_until_complete(handler.run(input))
        logger.info("asr websocket client finished")

        return res['result']


@cli_client_register(
    name='paddlespeech_client.cls', description='visit cls service')
class CLSClientExecutor(BaseExecutor):
    def __init__(self):
        super(CLSClientExecutor, self).__init__()
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech_client.cls', add_help=True)
        self.parser.add_argument(
            '--server_ip', type=str, default='127.0.0.1', help='server ip')
        self.parser.add_argument(
            '--port', type=int, default=8090, help='server port')
        self.parser.add_argument(
            '--input',
            type=str,
            default=None,
            help='Audio file to classify.',
            required=True)
        self.parser.add_argument(
            '--topk',
            type=int,
            default=1,
            help='Return topk scores of classification result.')

    def execute(self, argv: List[str]) -> bool:
        args = self.parser.parse_args(argv)
        input_ = args.input
        server_ip = args.server_ip
        port = args.port
        topk = args.topk

        try:
            time_start = time.time()
            res = self(input=input_, server_ip=server_ip, port=port, topk=topk)
            time_end = time.time()
            logger.info(res.json())
            logger.info("Response time %f s." % (time_end - time_start))
            return True
        except Exception as e:
            logger.error("Failed to speech classification.")
            logger.error(e)
            return False

    @stats_wrapper
    def __call__(self,
                 input: str,
                 server_ip: str="127.0.0.1",
                 port: int=8090,
                 topk: int=1):
        """
        Python API to call an executor.
        """
        url = 'http://' + server_ip + ":" + str(port) + '/paddlespeech/cls'
        audio = wav2base64(input)
        data = {"audio": audio, "topk": topk}
        res = requests.post(url=url, data=json.dumps(data))
        return res


@cli_client_register(
    name='paddlespeech_client.text', description='visit the text service')
class TextClientExecutor(BaseExecutor):
    def __init__(self):
        super(TextClientExecutor, self).__init__()
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech_client.text', add_help=True)
        self.parser.add_argument(
            '--server_ip', type=str, default='127.0.0.1', help='server ip')
        self.parser.add_argument(
            '--port', type=int, default=8090, help='server port')
        self.parser.add_argument(
            '--input',
            type=str,
            default=None,
            help='sentence to be process by text server.',
            required=True)

    def execute(self, argv: List[str]) -> bool:
        """Execute the request from the argv.

        Args:
            argv (List): the request arguments

        Returns:
            str: the request flag
        """
        args = self.parser.parse_args(argv)
        input_ = args.input
        server_ip = args.server_ip
        port = args.port

        try:
            time_start = time.time()
            res = self(input=input_, server_ip=server_ip, port=port)
            time_end = time.time()
            logger.info(f"The punc text: {res}")
            logger.info("Response time %f s." % (time_end - time_start))
            return True
        except Exception as e:
            logger.error("Failed to Text punctuation.")
            return False

    @stats_wrapper
    def __call__(self, input: str, server_ip: str="127.0.0.1", port: int=8090):
        """
        Python API to call text executor.

        Args:
            input (str): the request sentence text
            server_ip (str, optional): the server ip. Defaults to "127.0.0.1".
            port (int, optional): the server port. Defaults to 8090.

        Returns:
            str: the punctuation text
        """

        url = 'http://' + server_ip + ":" + str(port) + '/paddlespeech/text'
        request = {
            "text": input,
        }

        res = requests.post(url=url, data=json.dumps(request))
        response_dict = res.json()
        punc_text = response_dict["result"]["punc_text"]
        return punc_text


@cli_client_register(
    name='paddlespeech_client.vector', description='visit the vector service')
class VectorClientExecutor(BaseExecutor):
    def __init__(self):
        super(VectorClientExecutor, self).__init__()
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech_client.vector', add_help=True)
        self.parser.add_argument(
            '--server_ip', type=str, default='127.0.0.1', help='server ip')
        self.parser.add_argument(
            '--port', type=int, default=8090, help='server port')
        self.parser.add_argument(
            '--input',
            type=str,
            default=None,
            help='sentence to be process by text server.')
        self.parser.add_argument(
            '--task',
            type=str,
            default="spk",
            choices=["spk", "score"],
            help="The vector service task")
        self.parser.add_argument(
            "--enroll", type=str, default=None, help="The enroll audio")
        self.parser.add_argument(
            "--test", type=str, default=None, help="The test audio")

    def execute(self, argv: List[str]) -> bool:
        """Execute the request from the argv.

        Args:
            argv (List): the request arguments

        Returns:
            str: the request flag
        """
        args = self.parser.parse_args(argv)
        input_ = args.input
        server_ip = args.server_ip
        port = args.port
        task = args.task

        try:
            time_start = time.time()
            res = self(
                input=input_,
                server_ip=server_ip,
                port=port,
                enroll_audio=args.enroll,
                test_audio=args.test,
                task=task)
            time_end = time.time()
            logger.info(res.json())
            logger.info("Response time %f s." % (time_end - time_start))
            return True
        except Exception as e:
            logger.error("Failed to extract vector.")
            logger.error(e)
            return False

    @stats_wrapper
    def __call__(self,
                 input: str,
                 server_ip: str="127.0.0.1",
                 port: int=8090,
                 audio_format: str="wav",
                 sample_rate: int=16000,
                 enroll_audio: str=None,
                 test_audio: str=None,
                 task="spk"):
        """
        Python API to call text executor.

        Args:
            input (str): the request audio data
            server_ip (str, optional): the server ip. Defaults to "127.0.0.1".
            port (int, optional): the server port. Defaults to 8090.
            audio_format (str, optional): audio format. Defaults to "wav".
            sample_rate (str, optional): audio sample rate. Defaults to 16000.
            enroll_audio (str, optional): enroll audio data. Defaults to None.
            test_audio (str, optional): test audio data. Defaults to None.
            task (str, optional): the task type, "spk" or "socre". Defaults to "spk"
        Returns:
            str: the audio embedding or score between enroll and test audio
        """

        if task == "spk":
            from paddlespeech.server.utils.audio_handler import VectorHttpHandler
            logger.info("vector http client start")
            logger.info(f"the input audio: {input}")
            handler = VectorHttpHandler(server_ip=server_ip, port=port)
            res = handler.run(input, audio_format, sample_rate)
            return res
        elif task == "score":
            from paddlespeech.server.utils.audio_handler import VectorScoreHttpHandler
            logger.info("vector score http client start")
            logger.info(
                f"enroll audio: {enroll_audio}, test audio: {test_audio}")
            handler = VectorScoreHttpHandler(server_ip=server_ip, port=port)
            res = handler.run(enroll_audio, test_audio, audio_format,
                              sample_rate)
            return res
        else:
            logger.error(f"Sorry, we have not support such task {task}")


@cli_client_register(
    name='paddlespeech_client.acs', description='visit acs service')
class ACSClientExecutor(BaseExecutor):
    def __init__(self):
        super(ACSClientExecutor, self).__init__()
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech_client.acs', add_help=True)
        self.parser.add_argument(
            '--server_ip', type=str, default='127.0.0.1', help='server ip')
        self.parser.add_argument(
            '--port', type=int, default=8090, help='server port')
        self.parser.add_argument(
            '--input',
            type=str,
            default=None,
            help='Audio file to be recognized',
            required=True)
        self.parser.add_argument(
            '--sample_rate', type=int, default=16000, help='audio sample rate')
        self.parser.add_argument(
            '--lang', type=str, default="zh_cn", help='language')
        self.parser.add_argument(
            '--audio_format', type=str, default="wav", help='audio format')

    def execute(self, argv: List[str]) -> bool:
        args = self.parser.parse_args(argv)
        input_ = args.input
        server_ip = args.server_ip
        port = args.port
        sample_rate = args.sample_rate
        lang = args.lang
        audio_format = args.audio_format

        try:
            time_start = time.time()
            res = self(
                input=input_,
                server_ip=server_ip,
                port=port,
                sample_rate=sample_rate,
                lang=lang,
                audio_format=audio_format, )
            time_end = time.time()
            logger.info(f"ACS result: {res}")
            logger.info("Response time %f s." % (time_end - time_start))
            return True
        except Exception as e:
            logger.error("Failed to speech recognition.")
            logger.error(e)
            return False

    @stats_wrapper
    def __call__(
            self,
            input: str,
            server_ip: str="127.0.0.1",
            port: int=8090,
            sample_rate: int=16000,
            lang: str="zh_cn",
            audio_format: str="wav", ):
        """Python API to call an executor.

        Args:
            input (str): The input audio file path
            server_ip (str, optional): The ASR server ip. Defaults to "127.0.0.1".
            port (int, optional): The ASR server port. Defaults to 8090.
            sample_rate (int, optional): The audio sample rate. Defaults to 16000.
            lang (str, optional): The audio language type. Defaults to "zh_cn".
            audio_format (str, optional): The audio format information. Defaults to "wav".

        Returns:
            str: The ACS results
        """
        # we use the acs server to get the key word time stamp in audio text content
        logger.info("acs http client start")
        from paddlespeech.server.utils.audio_handler import ASRHttpHandler
        handler = ASRHttpHandler(
            server_ip=server_ip, port=port, endpoint="/paddlespeech/asr/search")
        res = handler.run(input, audio_format, sample_rate, lang)
        res = res['result']
        logger.info("acs http client finished")

        return res


================================================
FILE: paddlespeech/server/bin/paddlespeech_server.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import sys
import warnings
from typing import List

import numpy
import uvicorn
from fastapi import FastAPI
from paddlespeech.cli.log import logger
from paddlespeech.resource import CommonTaskResource
from paddlespeech.server.engine.engine_pool import init_engine_pool
from paddlespeech.server.engine.engine_warmup import warm_up
from paddlespeech.server.restful.api import setup_router as setup_http_router
from paddlespeech.server.utils.config import get_config
from paddlespeech.server.ws.api import setup_router as setup_ws_router
from prettytable import PrettyTable
from starlette.middleware.cors import CORSMiddleware

from ..executor import BaseExecutor
from ..util import cli_server_register
from ..util import stats_wrapper
warnings.filterwarnings("ignore")

__all__ = ['ServerExecutor', 'ServerStatsExecutor']

app = FastAPI(
    title="PaddleSpeech Serving API", description="Api", version="0.0.1")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"])


@cli_server_register(
    name='paddlespeech_server.start', description='Start the service')
class ServerExecutor(BaseExecutor):
    def __init__(self):
        super(ServerExecutor, self).__init__()
        self.parser = argparse.ArgumentParser(
            prog='paddlespeech_server.start', add_help=True)
        self.parser.add_argument(
            "--config_file",
            action="store",
            help="yaml file of the app",
            default=None,
            required=True)

        self.parser.add_argument(
            "--log_file",
            action="store",
            help="log file",
            default="./log/paddlespeech.log")

    def init(self, config) -> bool:
        """system initialization

        Args:
            config (CfgNode): config object

        Returns:
            bool: 
        """
        # init api
        api_list = list(engine.split("_")[0] for engine in config.engine_list)
        if config.protocol == "websocket":
            api_router = setup_ws_router(api_list)
        elif config.protocol == "http":
            api_router = setup_http_router(api_list)
        else:
            raise Exception("unsupported protocol")
        app.include_router(api_router)
        logger.info("start to init the engine")
        if not init_engine_pool(config):
            return False

        # warm up
        for engine_and_type in config.engine_list:
            if not warm_up(engine_and_type):
                return False

        return True

    def execute(self, argv: List[str]) -> bool:
        args = self.parser.parse_args(argv)
        try:
            self(args.config_file, args.log_file)
        except Exception as e:
            logger.error("Failed to start server.")
            logger.error(e)
            sys.exit(-1)

    @stats_wrapper
    def __call__(self,
                 config_file: str="./conf/application.yaml",
                 log_file: str="./log/paddlespeech.log"):
        """
        Python API to call an executor.
        """
        config = get_config(config_file)
        if self.init(config):
            uvicorn.run(app, host=config.host, port=config.port)


@cli_server_register(
    name='paddlespeech_server.stats',
    description='Get the models supported by each speech task in the service.')
class ServerStatsExecutor():
    def __init__(self):
        super(ServerStatsExecutor, self).__init__()

        self.parser = argparse.ArgumentParser(
            prog='paddlespeech_server.stats', add_help=True)
        self.parser.add_argument(
            '--task',
            type=str,
            default=None,
            choices=['asr', 'tts', 'cls', 'text', 'vector'],
            help='Choose speech task.',
            required=True)
        self.task_choices = ['asr', 'tts', 'cls', 'text', 'vector']
        self.model_name_format = {
            'asr': 'Model-Size-Code Switch-Multilingual-Language-Sample Rate',
            'tts': 'Model-Language',
            'cls': 'Model-Sample Rate',
            'text': 'Model-Task-Language',
            'vector': 'Model-Sample Rate'
        }

    def show_support_models(self, pretrained_models: dict):
        fields = self.model_name_format[self.task].split("-")
        table = PrettyTable(fields)
        for key in pretrained_models:
            line = key.split("-")
            if self.task == "asr" and len(line) < len(fields):
                for i in range(len(line), len(fields)):
                    line.append("-")
                if "codeswitch" in key:
                    line[3], line[1] = line[1].split("_")[0], line[1].split(
                        "_")[1:]
                elif "multilingual" in key:
                    line[4], line[1] = line[1].split("_")[0], line[1].split(
                        "_")[1:]
                tmp = numpy.array(line)
                idx = [0, 5, 3, 4, 1, 2]
                line = tmp[idx]
            table.add_row(line)
        print(table)

    def execute(self, argv: List[str]) -> bool:
        """
            Command line entry.
        """
        parser_args = self.parser.parse_args(argv)
        self.task = parser_args.task
        if self.task not in self.task_choices:
            logger.error(
                "Please input correct speech task, choices = ['asr', 'tts']")
            return False

        try:
            # Dynamic models
            dynamic_pretrained_models = CommonTaskResource(
                task=self.task, model_format='dynamic').pretrained_models

            if len(dynamic_pretrained_models) > 0:
                logger.info(
                    "Here is the table of {} pretrained models supported in the service.".
                    format(self.task.upper()))
                self.show_support_models(dynamic_pretrained_models)

            # Static models
            static_pretrained_models = CommonTaskResource(
                task=self.task, model_format='static').pretrained_models
            if len(static_pretrained_models) > 0:
                logger.info(
                    "Here is the table of {} static pretrained models supported in the service.".
                    format(self.task.upper()))
                self.show_support_models(static_pretrained_models)

            return True

        except BaseException:
            logger.error(
                "Failed to get the table of {} pretrained models supported in the service.".
                format(self.task.upper()))
            return False


================================================
FILE: paddlespeech/server/conf/application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Offline Serving..

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_python', 'asr_inference', 'tts_python', 'tts_inference', 'cls_python', 'cls_inference']
protocol: 'http'
engine_list: ['asr_python', 'tts_python', 'cls_python', 'text_python', 'vector_python']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: python #######################
asr_python:
    model: 'conformer_wenetspeech'
    lang: 'zh'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    decode_method: 'attention_rescoring'
    num_decoding_left_chunks: -1
    force_yes: True
    device:  # set 'gpu:id' or 'cpu'


################### speech task: asr; engine_type: inference #######################
asr_inference:
    # model_type choices=['deepspeech2offline_aishell']
    model_type: 'deepspeech2offline_aishell'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    num_decoding_left_chunks: -1
    decode_method: 
    force_yes: True

    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config


################################### TTS #########################################
################### speech task: tts; engine_type: python #######################
tts_python: 
    # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc', 
    #                              'fastspeech2_ljspeech', 'fastspeech2_aishell3',
    #                              'fastspeech2_vctk']        
    am: 'fastspeech2_csmsc'   
    am_config: 
    am_ckpt: 
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 
    spk_id: 0

    # voc (vocoder) choices=['pwgan_csmsc', 'pwgan_ljspeech', 'pwgan_aishell3',
    #                        'pwgan_vctk', 'mb_melgan_csmsc']
    voc: 'pwgan_csmsc'
    voc_config: 
    voc_ckpt: 
    voc_stat: 

    # others
    lang: 'zh'
    device:  # set 'gpu:id' or 'cpu'


################### speech task: tts; engine_type: inference #######################
tts_inference:
    # am (acoustic model) choices=['speedyspeech_csmsc', 'fastspeech2_csmsc']
    am: 'fastspeech2_csmsc'   
    am_model: # the pdmodel file of your am static model (XX.pdmodel)
    am_params: # the pdiparams file of your am static model (XX.pdipparams)
    am_sample_rate: 24000
    phones_dict: 
    tones_dict: 
    speaker_dict: 
    spk_id: 0

    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False # True -> print glog
        summary: True  # False -> do not show predictor config

    # voc (vocoder) choices=['pwgan_csmsc', 'mb_melgan_csmsc','hifigan_csmsc']
    voc: 'pwgan_csmsc'
    voc_model: # the pdmodel file of your vocoder static model (XX.pdmodel)
    voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams)
    voc_sample_rate: 24000

    voc_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'  
        switch_ir_optim: True  
        glog_info: False # True -> print glog
        summary: True  # False -> do not show predictor config

    # others
    lang: 'zh'


################################### CLS #########################################
################### speech task: cls; engine_type: python #######################
cls_python:
    # model choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
    model: 'panns_cnn14'
    cfg_path: # [optional] Config of cls task.
    ckpt_path: # [optional] Checkpoint file of model.
    label_file: # [optional] Label file of cls task.
    device:  # set 'gpu:id' or 'cpu'


################### speech task: cls; engine_type: inference #######################
cls_inference:
    # model_type choices=['panns_cnn14', 'panns_cnn10', 'panns_cnn6']
    model_type: 'panns_cnn14' 
    cfg_path: 
    model_path:  # the pdmodel file of am static model [optional]
    params_path:  # the pdiparams file of am static model [optional]
    label_file:  # [optional] Label file of cls task.

    predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config


################################### Text #########################################
################### text task: punc; engine_type: python #######################
text_python:
    task: punc
    model_type: 'ernie_linear_p3_wudao'
    lang: 'zh'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    vocab_file: # [optional]
    device:  # set 'gpu:id' or 'cpu'


################################### Vector ######################################
################### Vector task: spk; engine_type: python #######################
vector_python:
    task: spk
    model_type: 'ecapatdnn_voxceleb12'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    device:  # set 'gpu:id' or 'cpu'


================================================
FILE: paddlespeech/server/conf/tts_online_application.yaml
================================================
# This is the parameter configuration file for streaming tts server.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8092

# The task format in the engin_list is: <speech task>_<engine type>
# engine_list choices = ['tts_online', 'tts_online-onnx'], the inference speed of tts_online-onnx is faster than tts_online.
# protocol choices = ['websocket', 'http'] 
protocol: 'http'
engine_list: ['tts_online-onnx']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online #######################
tts_online: 
    # am (acoustic model) choices=['fastspeech2_csmsc', 'fastspeech2_cnndecoder_csmsc']   
    # fastspeech2_cnndecoder_csmsc support streaming am infer.     
    am: 'fastspeech2_csmsc'   
    am_config: 
    am_ckpt: 
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 
    spk_id: 0

    # voc (vocoder) choices=['mb_melgan_csmsc, hifigan_csmsc']
    # Both mb_melgan_csmsc and hifigan_csmsc support streaming voc inference
    voc: 'mb_melgan_csmsc'
    voc_config: 
    voc_ckpt: 
    voc_stat: 

    # others
    lang: 'zh'
    device: 'cpu' # set 'gpu:id' or 'cpu'
    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
    am_block: 72
    am_pad: 12
    # voc_pad and voc_block voc model to streaming voc infer,
    # when voc model is mb_melgan_csmsc, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
    # when voc model is hifigan_csmsc, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
    voc_block: 36
    voc_pad: 14
    

#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### TTS #########################################
################### speech task: tts; engine_type: online-onnx #######################
tts_online-onnx: 
    # am (acoustic model) choices=['fastspeech2_csmsc_onnx', 'fastspeech2_cnndecoder_csmsc_onnx']
    # fastspeech2_cnndecoder_csmsc_onnx support streaming am infer.        
    am: 'fastspeech2_cnndecoder_csmsc_onnx' 
    # am_ckpt is a list, if am is fastspeech2_cnndecoder_csmsc_onnx, am_ckpt = [encoder model, decoder model, postnet model];
    # if am is fastspeech2_csmsc_onnx, am_ckpt = [ckpt model];
    am_ckpt:   # list
    am_stat: 
    phones_dict: 
    tones_dict: 
    speaker_dict: 
    spk_id: 0
    am_sample_rate: 24000
    am_sess_conf:
        device: "cpu" # set 'gpu:id' or 'cpu'
        use_trt: False
        cpu_threads: 4

    # voc (vocoder) choices=['mb_melgan_csmsc_onnx, hifigan_csmsc_onnx']
    # Both mb_melgan_csmsc_onnx and hifigan_csmsc_onnx support streaming voc inference
    voc: 'hifigan_csmsc_onnx'
    voc_ckpt: 
    voc_sample_rate: 24000
    voc_sess_conf:
        device: "cpu" # set 'gpu:id' or 'cpu'
        use_trt: False
        cpu_threads: 4

    # others
    lang: 'zh'
    # am_block and am_pad only for fastspeech2_cnndecoder_onnx model to streaming am infer,
    # when am_pad set 12, streaming synthetic audio is the same as non-streaming synthetic audio
    am_block: 72
    am_pad: 12
    # voc_pad and voc_block voc model to streaming voc infer,
    # when voc model is mb_melgan_csmsc_onnx, voc_pad set 14, streaming synthetic audio is the same as non-streaming synthetic audio; The minimum value of pad can be set to 7, streaming synthetic audio sounds normal
    # when voc model is hifigan_csmsc_onnx, voc_pad set 19, streaming synthetic audio is the same as non-streaming synthetic audio; voc_pad set 14, streaming synthetic audio sounds normal
    voc_block: 36
    voc_pad: 14
    # voc_upsample should be same as n_shift on voc config.
    voc_upsample: 300
    

================================================
FILE: paddlespeech/server/conf/vector_application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# protocol = ['http'] (only one can be selected). 
# http only support offline engine type.
protocol: 'http'
engine_list: ['vector_python']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### Vector ######################################
################### Vector task: spk; engine_type: python #######################
vector_python:
    task: spk
    model_type: 'ecapatdnn_voxceleb12'
    sample_rate: 16000
    cfg_path: # [optional]
    ckpt_path: # [optional]
    device: # set 'gpu:id' or 'cpu'


================================================
FILE: paddlespeech/server/conf/ws_conformer_application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online', 'tts_online']
# protocol = ['websocket', 'http'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
    model_type: 'conformer_online_multicn'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    num_decoding_left_chunks: -1
    force_yes: True
    device: cpu # cpu or gpu:id
    continuous_decoding: True # enable continue decoding when endpoint detected

    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2


================================================
FILE: paddlespeech/server/conf/ws_conformer_wenetspeech_application_faster.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################

################################### ASR #########################################
################### speech task: asr; engine_type: online #######################
asr_online:
    model_type: 'conformer_online_wenetspeech'
    am_model: # the pdmodel file of am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id
    decode_method: "attention_rescoring"
    continuous_decoding: True # enable continue decoding when endpoint detected
    num_decoding_left_chunks: 16
    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms
        sample_rate: 16000
        sample_width: 2


================================================
FILE: paddlespeech/server/conf/ws_ds2_application.yaml
================================================
# This is the parameter configuration file for PaddleSpeech Serving.

#################################################################################
#                             SERVER SETTING                                    #
#################################################################################
host: 0.0.0.0
port: 8090

# The task format in the engin_list is: <speech task>_<engine type>
# task choices = ['asr_online-inference', 'asr_online-onnx']
# protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type.
protocol: 'websocket'
engine_list: ['asr_online-onnx']


#################################################################################
#                                ENGINE CONFIG                                  #
#################################################################################


################################### ASR #########################################
################### speech task: asr; engine_type: online-onnx #######################
asr_online-onnx:
    model_type: 'deepspeech2online_wenetspeech'
    am_model:  # the pdmodel file of onnx am static model [optional]
    am_params:  # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    num_decoding_left_chunks: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id

    # https://onnxruntime.ai/docs/api/python/api_summary.html#inferencesession
    am_predictor_conf:
        device: 'cpu' # set 'gpu:id' or 'cpu'
        graph_optimization_level: 0 
        intra_op_num_threads: 0 # Sets the number of threads used to parallelize the execution within nodes.
        inter_op_num_threads: 0 # Sets the number of threads used to parallelize the execution of the graph (across nodes).
        log_severity_level: 2   # Log severity level. Applies to session load, initialization, etc. 0:Verbose, 1:Info, 2:Warning. 3:Error, 4:Fatal. Default is 2.
        log_verbosity_level: 0  # VLOG level if DEBUG build and session_log_severity_level is 0. Applies to session load, initialization, etc. Default is 0.

    chunk_buffer_conf:
        frame_duration_ms: 85
        shift_ms: 40
        sample_rate: 16000
        sample_width: 2
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms


################################### ASR #########################################
################### speech task: asr; engine_type: online-inference #######################
asr_online-inference:
    model_type: 'deepspeech2online_wenetspeech'
    am_model:    # the pdmodel file of am static model [optional]
    am_params:   # the pdiparams file of am static model [optional]
    lang: 'zh'
    sample_rate: 16000
    cfg_path: 
    decode_method: 
    num_decoding_left_chunks: 
    force_yes: True
    device: 'cpu' # cpu or gpu:id

    am_predictor_conf:
        device:  # set 'gpu:id' or 'cpu'
        switch_ir_optim: True
        glog_info: False  # True -> print glog
        summary: True  # False -> do not show predictor config

    chunk_buffer_conf:
        frame_duration_ms: 85
        shift_ms: 40
        sample_rate: 16000
        sample_width: 2
        window_n: 7     # frame
        shift_n: 4      # frame
        window_ms: 25   # ms
        shift_ms: 10    # ms

================================================
FILE: paddlespeech/server/engine/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/acs/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/acs/python/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/acs/python/acs_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import json
import os
import re

import numpy as np
import paddle
import soundfile
import websocket

from paddlespeech.cli.log import logger
from paddlespeech.server.engine.base_engine import BaseEngine


class ACSEngine(BaseEngine):
    def __init__(self):
        """The ACSEngine Engine
        """
        super(ACSEngine, self).__init__()
        logger.debug("Create the ACSEngine Instance")
        self.word_list = []

    def init(self, config: dict):
        """Init the ACSEngine Engine

        Args:
            config (dict): The server configuation

        Returns:
            bool: The engine instance flag
        """
        logger.debug("Init the acs engine")
        try:
            self.config = config
            self.device = self.config.get("device", paddle.get_device())

            # websocket default ping timeout is 20 seconds
            self.ping_timeout = self.config.get("ping_timeout", 20)
            paddle.set_device(self.device)
            logger.debug(f"ACS Engine set the device: {self.device}")

        except BaseException as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error("Initialize Text server engine Failed on device: %s." %
                         (self.device))
            return False

        self.read_search_words()

        # init the asr url
        self.url = "ws://" + self.config.asr_server_ip + ":" + str(
            self.config.asr_server_port) + "/paddlespeech/asr/streaming"

        logger.info("Initialize acs server engine successfully on device: %s." %
                    (self.device))

        return True

    def read_search_words(self):
        word_list = self.config.word_list
        if word_list is None:
            logger.error(
                "No word list file in config, please set the word list parameter"
            )
            return

        if not os.path.exists(word_list):
            logger.error("Please input correct word list file")
            return

        with open(word_list, 'r') as fp:
            self.word_list = [line.strip() for line in fp.readlines()]

        logger.info(f"word list: {self.word_list}")

    def get_asr_content(self, audio_data):
        """Get the streaming asr result

        Args:
            audio_data (_type_): _description_

        Returns:
            _type_: _description_
        """
        logger.debug("send a message to the server")
        if self.url is None:
            logger.error("No asr server, please input valid ip and port")
            return ""
        ws = websocket.WebSocket()
        logger.debug(f"set the ping timeout: {self.ping_timeout} seconds")
        ws.connect(self.url, ping_timeout=self.ping_timeout)
        audio_info = json.dumps(
            {
                "name": "test.wav",
                "signal": "start",
                "nbest": 1
            },
            sort_keys=True,
            indent=4,
            separators=(',', ': '))
        ws.send(audio_info)
        msg = ws.recv()
        logger.info("client receive msg={}".format(msg))

        # send the total audio data
        for chunk_data in self.read_wave(audio_data):
            ws.send_binary(chunk_data.tobytes())
            msg = ws.recv()
            msg = json.loads(msg)
            logger.info(f"audio result: {msg}")

        # 3. send chunk audio data to engine
        logger.debug("send the end signal")
        audio_info = json.dumps(
            {
                "name": "test.wav",
                "signal": "end",
                "nbest": 1
            },
            sort_keys=True,
            indent=4,
            separators=(',', ': '))
        ws.send(audio_info)
        msg = ws.recv()
        msg = json.loads(msg)

        logger.info(f"the final result: {msg}")
        ws.close()

        return msg

    def read_wave(self, audio_data: str):
        """read the audio file from specific wavfile path

        Args:
            audio_data (str): the audio data, 
                                 we assume that audio sample rate matches the model

        Yields:
            numpy.array: the samall package audio pcm data
        """
        samples, sample_rate = soundfile.read(audio_data, dtype='int16')
        x_len = len(samples)
        assert sample_rate == 16000

        chunk_size = int(85 * sample_rate / 1000)  # 85ms, sample_rate = 16kHz

        if x_len % chunk_size != 0:
            padding_len_x = chunk_size - x_len % chunk_size
        else:
            padding_len_x = 0

        padding = np.zeros((padding_len_x), dtype=samples.dtype)
        padded_x = np.concatenate([samples, padding], axis=0)

        assert (x_len + padding_len_x) % chunk_size == 0
        num_chunk = (x_len + padding_len_x) / chunk_size
        num_chunk = int(num_chunk)
        for i in range(0, num_chunk):
            start = i * chunk_size
            end = start + chunk_size
            x_chunk = padded_x[start:end]
            yield x_chunk

    def get_macthed_word(self, msg):
        """Get the matched info in msg

        Args:
            msg (dict): the asr info, including the asr result and time stamp

        Returns:
            acs_result, asr_result: the acs result and the asr result
        """
        asr_result = msg['result']
        time_stamp = msg['times']
        acs_result = []

        # search for each word in self.word_list
        offset = self.config.offset
        # last time in time_stamp
        max_ed = time_stamp[-1]['ed']
        for w in self.word_list:
            # search the w in asr_result and the index in asr_result
            # https://docs.python.org/3/library/re.html#re.finditer
            for m in re.finditer(w, asr_result):
                # match start and end char index in timestamp
                # https://docs.python.org/3/library/re.html#re.Match.start
                start = max(time_stamp[m.start(0)]['bg'] - offset, 0)
                end = min(time_stamp[m.end(0) - 1]['ed'] + offset, max_ed)
                logger.debug(f'start: {start}, end: {end}')
                acs_result.append({'w': w, 'bg': start, 'ed': end})

        return acs_result, asr_result

    def run(self, audio_data):
        """process the audio data in acs engine
           the engine does not store any data, so all the request use the self.run api

        Args:
            audio_data (str): the audio data

        Returns:
            acs_result, asr_result: the acs result and the asr result
        """
        logger.debug("start to process the audio content search")
        msg = self.get_asr_content(io.BytesIO(audio_data))

        acs_result, asr_result = self.get_macthed_word(msg)
        logger.info(f'the asr result {asr_result}')
        logger.info(f'the acs result: {acs_result}')
        return acs_result, asr_result


================================================
FILE: paddlespeech/server/engine/asr/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/asr/online/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/asr/online/ctc_endpoint.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass
from dataclasses import field

import numpy as np

from paddlespeech.cli.log import logger


@dataclass
class OnlineCTCEndpointRule:
    must_contain_nonsilence: bool = True
    min_trailing_silence: int = 1000
    min_utterance_length: int = 0


@dataclass
class OnlineCTCEndpoingOpt:
    frame_shift_in_ms: int = 10

    blank: int = 0  # blank id, that we consider as silence for purposes of endpointing.
    blank_threshold: float = 0.8  # above blank threshold is silence

    # We support three rules.  We terminate decoding if ANY of these rules
    # evaluates to "true". If you want to add more rules, do it by changing this
    # code.  If you want to disable a rule, you can set the silence-timeout for
    # that rule to a very large number.

    # rule1 times out after 5 seconds of silence, even if we decoded nothing.
    rule1: OnlineCTCEndpointRule = field(
        default_factory=lambda: OnlineCTCEndpointRule(False, 5000, 0))
    # rule2 times out after 1.0 seconds of silence after decoding something,
    # even if we did not reach a final-state at all.
    rule2: OnlineCTCEndpointRule = field(
        default_factory=lambda: OnlineCTCEndpointRule(True, 1000, 0))
    # rule3 times out after the utterance is 20 seconds long, regardless of
    # anything else.
    rule3: OnlineCTCEndpointRule = field(
        default_factory=lambda: OnlineCTCEndpointRule(False, 0, 20000))


class OnlineCTCEndpoint:
    """
    [END-TO-END AUTOMATIC SPEECH RECOGNITION INTEGRATED WITH CTC-BASED VOICE ACTIVITY DETECTION](https://arxiv.org/pdf/2002.00551.pdf)
    """

    def __init__(self, opts: OnlineCTCEndpoingOpt):
        self.opts = opts
        logger.info(f"Endpont Opts: {opts}")
        self.frame_shift_in_ms = opts.frame_shift_in_ms

        self.num_frames_decoded = 0
        self.trailing_silence_frames = 0

        self.reset()

    def reset(self):
        self.num_frames_decoded = 0
        self.trailing_silence_frames = 0

    def rule_activated(self,
                       rule: OnlineCTCEndpointRule,
                       rule_name: str,
                       decoding_something: bool,
                       trailine_silence: int,
                       utterance_length: int) -> bool:
        ans = (
            decoding_something or (not rule.must_contain_nonsilence)
        ) and trailine_silence >= rule.min_trailing_silence and utterance_length >= rule.min_utterance_length
        if (ans):
            logger.info(f"Endpoint Rule: {rule_name} activated: {rule}")
        return ans

    def endpoint_detected(self,
                          ctc_log_probs: np.ndarray,
                          decoding_something: bool) -> bool:
        """detect endpoint.

        Args:
            ctc_log_probs (np.ndarray): (T, D)
            decoding_something (bool): contain nonsilince.

        Returns:
            bool: whether endpoint detected.
        """
        for logprob in ctc_log_probs:
            blank_prob = np.exp(logprob[self.opts.blank])

            self.num_frames_decoded += 1
            if blank_prob > self.opts.blank_threshold:
                self.trailing_silence_frames += 1
            else:
                self.trailing_silence_frames = 0

        assert self.num_frames_decoded >= self.trailing_silence_frames
        assert self.frame_shift_in_ms > 0

        decoding_something = (
            self.num_frames_decoded > self.trailing_silence_frames
        ) and decoding_something
        utterance_length = self.num_frames_decoded * self.frame_shift_in_ms
        trailing_silence = self.trailing_silence_frames * self.frame_shift_in_ms

        if self.rule_activated(self.opts.rule1, 'rule1', decoding_something,
                               trailing_silence, utterance_length):
            return True
        if self.rule_activated(self.opts.rule2, 'rule2', decoding_something,
                               trailing_silence, utterance_length):
            return True
        if self.rule_activated(self.opts.rule3, 'rule3', decoding_something,
                               trailing_silence, utterance_length):
            return True
        return False


================================================
FILE: paddlespeech/server/engine/asr/online/ctc_search.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from collections import defaultdict

import paddle

from paddlespeech.cli.log import logger
from paddlespeech.s2t.utils.utility import log_add

__all__ = ['CTCPrefixBeamSearch']


class CTCPrefixBeamSearch:
    def __init__(self, config):
        """Implement the ctc prefix beam search

        Args:
            config (yacs.config.CfgNode): the ctc prefix beam search configuration
        """
        self.config = config

        # beam size
        self.first_beam_size = self.config.beam_size
        # TODO(support second beam size)
        self.second_beam_size = int(self.first_beam_size * 1.0)
        logger.info(
            f"first and second beam size: {self.first_beam_size}, {self.second_beam_size}"
        )

        # state
        self.cur_hyps = None
        self.hyps = None
        self.abs_time_step = 0

        self.reset()

    def reset(self):
        """Rest the search cache value
        """
        self.cur_hyps = None
        self.hyps = None
        self.abs_time_step = 0

    @paddle.no_grad()
    def search(self, ctc_probs, device, blank_id=0):
        """ctc prefix beam search method decode a chunk feature

        Args:
            xs (paddle.Tensor): feature data
            ctc_probs (paddle.Tensor): the ctc probability of all the tokens
            device (paddle.fluid.core_avx.Place): the feature host device, such as CUDAPlace(0).
            blank_id (int, optional): the blank id in the vocab. Defaults to 0.

        Returns:
            list: the search result
        """
        # decode 
        logger.info("start to ctc prefix search")
        assert len(ctc_probs.shape) == 2
        batch_size = 1

        vocab_size = ctc_probs.shape[1]
        first_beam_size = min(self.first_beam_size, vocab_size)
        second_beam_size = min(self.second_beam_size, vocab_size)
        logger.info(
            f"effect first and second beam size: {self.first_beam_size}, {self.second_beam_size}"
        )

        maxlen = ctc_probs.shape[0]

        # cur_hyps: (prefix, (blank_ending_score, none_blank_ending_score))
        # 0. blank_ending_score,
        # 1. none_blank_ending_score, 
        # 2. viterbi_blank ending score, 
        # 3. viterbi_non_blank score, 
        # 4. current_token_prob, 
        # 5. times_viterbi_blank, times_b
        # 6. times_titerbi_non_blank, times_nb
        if self.cur_hyps is None:
            self.cur_hyps = [(tuple(), (0.0, -float('inf'), 0.0, 0.0,
                                        -float('inf'), [], []))]
            # self.cur_hyps = [(tuple(), (0.0, -float('inf')))]
        # 2. CTC beam search step by step
        for t in range(0, maxlen):
            logp = ctc_probs[t]  # (vocab_size,)
            # next_hyps = defaultdict(lambda: (-float('inf'), -float('inf')))
            next_hyps = defaultdict(
                        lambda: (-float('inf'), -float('inf'), -float('inf'), -float('inf'), -float('inf'), [], []))

            # 2.1 First beam prune: select topk best
            #     do token passing process
            top_k_logp, top_k_index = logp.topk(
                first_beam_size)  # (first_beam_size,)
            for s in top_k_index:
                s = s.item()
                ps = logp[s].item()
                for prefix, (pb, pnb, v_b_s, v_nb_s, cur_token_prob, times_b,
                             times_nb) in self.cur_hyps:
                    last = prefix[-1] if len(prefix) > 0 else None
                    if s == blank_id:  # blank
                        n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[
                            prefix]
                        n_pb = log_add([n_pb, pb + ps, pnb + ps])

                        pre_times = times_b if v_b_s > v_nb_s else times_nb
                        n_times_b = copy.deepcopy(pre_times)
                        viterbi_score = v_b_s if v_b_s > v_nb_s else v_nb_s
                        n_v_b = viterbi_score + ps
                        next_hyps[prefix] = (n_pb, n_pnb, n_v_b, n_v_nb,
                                             n_cur_token_prob, n_times_b,
                                             n_times_nb)
                    elif s == last:
                        #  Update *ss -> *s;
                        # case1: *a + a => *a
                        n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[
                            prefix]
                        n_pnb = log_add([n_pnb, pnb + ps])
                        if n_v_nb < v_nb_s + ps:
                            n_v_nb = v_nb_s + ps
                            if n_cur_token_prob < ps:
                                n_cur_token_prob = ps
                                n_times_nb = copy.deepcopy(times_nb)
                                n_times_nb[
                                    -1] = self.abs_time_step  # 注意，这里要重新使用绝对时间
                        next_hyps[prefix] = (n_pb, n_pnb, n_v_b, n_v_nb,
                                             n_cur_token_prob, n_times_b,
                                             n_times_nb)

                        # Update *s-s -> *ss, - is for blank
                        # Case 2: *aε + a => *aa
                        n_prefix = prefix + (s, )
                        n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[
                            n_prefix]
                        if n_v_nb < v_b_s + ps:
                            n_v_nb = v_b_s + ps
                            n_cur_token_prob = ps
                            n_times_nb = copy.deepcopy(times_b)
                            n_times_nb.append(self.abs_time_step)
                        n_pnb = log_add([n_pnb, pb + ps])
                        next_hyps[n_prefix] = (n_pb, n_pnb, n_v_b, n_v_nb,
                                               n_cur_token_prob, n_times_b,
                                               n_times_nb)
                    else:
                        # Case 3: *a + b => *ab, *aε + b => *ab
                        n_prefix = prefix + (s, )
                        n_pb, n_pnb, n_v_b, n_v_nb, n_cur_token_prob, n_times_b, n_times_nb = next_hyps[
                            n_prefix]
                        viterbi_score = v_b_s if v_b_s > v_nb_s else v_nb_s
                        pre_times = times_b if v_b_s > v_nb_s else times_nb
                        if n_v_nb < viterbi_score + ps:
                            n_v_nb = viterbi_score + ps
                            n_cur_token_prob = ps
                            n_times_nb = copy.deepcopy(pre_times)
                            n_times_nb.append(self.abs_time_step)

                        n_pnb = log_add([n_pnb, pb + ps, pnb + ps])
                        next_hyps[n_prefix] = (n_pb, n_pnb, n_v_b, n_v_nb,
                                               n_cur_token_prob, n_times_b,
                                               n_times_nb)

            # 2.2 Second beam prune
            next_hyps = sorted(
                next_hyps.items(),
                key=lambda x: log_add([x[1][0], x[1][1]]),
                reverse=True)
            self.cur_hyps = next_hyps[:second_beam_size]

            # 2.3 update the absolute time step
            self.abs_time_step += 1

        self.hyps = [(y[0], log_add([y[1][0], y[1][1]]), y[1][2], y[1][3],
                      y[1][4], y[1][5], y[1][6]) for y in self.cur_hyps]

        logger.info("ctc prefix search success")
        return self.hyps

    def get_one_best_hyps(self):
        """Return the one best result

        Returns:
            list: the one best result, List[str]
        """
        return [self.hyps[0][0]]

    def get_hyps(self):
        """Return the search hyps

        Returns:
            list: return the search hyps, List[Tuple[str, float, ...]]
        """
        return self.hyps

    def finalize_search(self):
        """do nothing in ctc_prefix_beam_search
        """
        pass


================================================
FILE: paddlespeech/server/engine/asr/online/onnx/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/asr/online/onnx/asr_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from typing import ByteString
from typing import Optional

import numpy as np
import paddle
from numpy import float32
from yacs.config import CfgNode

from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.cli.asr.infer import ASRExecutor
from paddlespeech.cli.log import logger
from paddlespeech.resource import CommonTaskResource
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.modules.ctc import CTCDecoder
from paddlespeech.s2t.utils.utility import UpdateConfig
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.server.utils import onnx_infer
from paddlespeech.utils.env import MODEL_HOME

__all__ = ['PaddleASRConnectionHanddler', 'ASRServerExecutor', 'ASREngine']


# ASR server connection process class
class PaddleASRConnectionHanddler:
    def __init__(self, asr_engine):
        """Init a Paddle ASR Connection Handler instance

        Args:
            asr_engine (ASREngine): the global asr engine
        """
        super().__init__()
        logger.debug(
            "create an paddle asr connection handler to process the websocket connection"
        )
        self.config = asr_engine.config  # server config
        self.model_config = asr_engine.executor.config
        self.asr_engine = asr_engine

        # model_type, sample_rate and text_feature is shared for deepspeech2 and conformer
        self.model_type = self.asr_engine.executor.model_type
        self.sample_rate = self.asr_engine.executor.sample_rate
        # tokens to text
        self.text_feature = self.asr_engine.executor.text_feature

        # extract feat, new only fbank in conformer model
        self.preprocess_conf = self.model_config.preprocess_config
        self.preprocess_args = {"train": False}
        self.preprocessing = Transformation(self.preprocess_conf)

        # frame window and frame shift, in samples unit
        self.win_length = self.preprocess_conf.process[0]['win_length']
        self.n_shift = self.preprocess_conf.process[0]['n_shift']

        assert self.preprocess_conf.process[0]['fs'] == self.sample_rate, (
            self.sample_rate, self.preprocess_conf.process[0]['fs'])
        self.frame_shift_in_ms = int(
            self.n_shift / self.preprocess_conf.process[0]['fs'] * 1000)

        self.continuous_decoding = self.config.get("continuous_decoding", False)
        self.init_decoder()
        self.reset()

    def init_decoder(self):
        if "deepspeech2" in self.model_type:
            assert self.continuous_decoding is False, "ds2 model not support endpoint"
            self.am_predictor = self.asr_engine.executor.am_predictor

            self.decoder = CTCDecoder(
                odim=self.model_config.output_dim,  # <blank> is in  vocab
                enc_n_units=self.model_config.rnn_layer_size * 2,
                blank_id=self.model_config.blank_id,
                dropout_rate=0.0,
                reduction=True,  # sum
                batch_average=True,  # sum / batch_size
                grad_norm_type=self.model_config.get('ctc_grad_norm_type',
                                                     None))

            cfg = self.model_config.decode
            decode_batch_size = 1  # for online
            self.decoder.init_decoder(
                decode_batch_size, self.text_feature.vocab_list,
                cfg.decoding_method, cfg.lang_model_path, cfg.alpha, cfg.beta,
                cfg.beam_size, cfg.cutoff_prob, cfg.cutoff_top_n,
                cfg.num_proc_bsearch)
        else:
            raise ValueError(f"Not supported: {self.model_type}")

    def model_reset(self):
        # cache for audio and feat
        self.remained_wav = None
        self.cached_feat = None

    def output_reset(self):
        ## outputs
        # partial/ending decoding results
        self.result_transcripts = ['']

    def reset_continuous_decoding(self):
        """
        when in continous decoding, reset for next utterance.
        """
        self.global_frame_offset = self.num_frames
        self.model_reset()

    def reset(self):
        if "deepspeech2" in self.model_type:
            # for deepspeech2
            # init state
            self.chunk_state_h_box = np.zeros(
                (self.model_config.num_rnn_layers, 1,
                 self.model_config.rnn_layer_size),
                dtype=float32)
            self.chunk_state_c_box = np.zeros(
                (self.model_config.num_rnn_layers, 1,
                 self.model_config.rnn_layer_size),
                dtype=float32)
            self.decoder.reset_decoder(batch_size=1)
        else:
            raise NotImplementedError(f"{self.model_type} not support.")

        self.device = None

        ## common
        # global sample and frame step
        self.num_samples = 0
        self.global_frame_offset = 0
        # frame step of cur utterance
        self.num_frames = 0

        ## endpoint
        self.endpoint_state = False  # True for detect endpoint

        ## conformer
        self.model_reset()

        ## outputs
        self.output_reset()

    def extract_feat(self, samples: ByteString):
        logger.debug("Online ASR extract the feat")
        samples = np.frombuffer(samples, dtype=np.int16)
        assert samples.ndim == 1

        self.num_samples += samples.shape[0]
        logger.debug(
            f"This package receive {samples.shape[0]} pcm data. Global samples:{self.num_samples}"
        )

        # self.reamined_wav stores all the samples,
        # include the original remained_wav and this package samples
        if self.remained_wav is None:
            self.remained_wav = samples
        else:
            assert self.remained_wav.ndim == 1  # (T,)
            self.remained_wav = np.concatenate([self.remained_wav, samples])
        logger.debug(
            f"The concatenation of remain and now audio samples length is: {self.remained_wav.shape}"
        )

        if len(self.remained_wav) < self.win_length:
            # samples not enough for feature window
            return 0

        # fbank
        x_chunk = self.preprocessing(self.remained_wav, **self.preprocess_args)
        x_chunk = paddle.to_tensor(x_chunk, dtype="float32").unsqueeze(axis=0)

        # feature cache
        if self.cached_feat is None:
            self.cached_feat = x_chunk
        else:
            assert (len(x_chunk.shape) == 3)  # (B,T,D)
            assert (len(self.cached_feat.shape) == 3)  # (B,T,D)
            self.cached_feat = paddle.concat(
                [self.cached_feat, x_chunk], axis=1)

        # set the feat device
        if self.device is None:
            self.device = self.cached_feat.place

        # cur frame step
        num_frames = x_chunk.shape[1]

        # global frame step
        self.num_frames += num_frames

        # update remained wav
        self.remained_wav = self.remained_wav[self.n_shift * num_frames:]

        logger.debug(
            f"process the audio feature success, the cached feat shape: {self.cached_feat.shape}"
        )
        logger.debug(
            f"After extract feat, the cached remain the audio samples: {self.remained_wav.shape}"
        )
        logger.debug(f"global samples: {self.num_samples}")
        logger.debug(f"global frames: {self.num_frames}")

    def decode(self, is_finished=False):
        """advance decoding

        Args:
            is_finished (bool, optional): Is last frame or not. Defaults to False.

        Returns:
            None: 
        """
        if "deepspeech2" in self.model_type:
            decoding_chunk_size = 1  # decoding chunk size = 1. int decoding frame unit

            context = 7  # context=7, in audio frame unit
            subsampling = 4  # subsampling=4, in audio frame unit

            cached_feature_num = context - subsampling
            # decoding window for model, in audio frame unit
            decoding_window = (decoding_chunk_size - 1) * subsampling + context
            # decoding stride for model, in audio frame unit
            stride = subsampling * decoding_chunk_size

            if self.cached_feat is None:
                logger.info("no audio feat, please input more pcm data")
                return

            num_frames = self.cached_feat.shape[1]
            logger.debug(
                f"Required decoding window {decoding_window} frames, and the connection has {num_frames} frames"
            )

            # the cached feat must be larger decoding_window
            if num_frames < decoding_window and not is_finished:
                logger.info(
                    f"frame feat num is less than {decoding_window}, please input more pcm data"
                )
                return None, None

            # if is_finished=True, we need at least context frames
            if num_frames < context:
                logger.info(
                    "flast {num_frames} is less than context {context} frames, and we cannot do model forward"
                )
                return None, None

            logger.info("start to do model forward")
            # num_frames - context + 1 ensure that current frame can get context window
            if is_finished:
                # if get the finished chunk, we need process the last context
                left_frames = context
            else:
                # we only process decoding_window frames for one chunk
                left_frames = decoding_window

            end = None
            for cur in range(0, num_frames - left_frames + 1, stride):
                end = min(cur + decoding_window, num_frames)

                # extract the audio
                x_chunk = self.cached_feat[:, cur:end, :].numpy()
                x_chunk_lens = np.array([x_chunk.shape[1]])

                trans_best = self.decode_one_chunk(x_chunk, x_chunk_lens)

            self.result_transcripts = [trans_best]

            # update feat cache
            self.cached_feat = self.cached_feat[:, end - cached_feature_num:, :]

            # return trans_best[0]
        else:
            raise Exception(f"{self.model_type} not support paddleinference.")

    @paddle.no_grad()
    def decode_one_chunk(self, x_chunk, x_chunk_lens):
        """forward one chunk frames

        Args:
            x_chunk (np.ndarray): (B,T,D), audio frames.
            x_chunk_lens ([type]): (B,), audio frame lens

        Returns:
            logprob: poster probability.
        """
        logger.info("start to decoce one chunk for deepspeech2")
        # state_c, state_h, audio_lens, audio
        # 'chunk_state_c_box', 'chunk_state_h_box', 'audio_chunk_lens', 'audio_chunk'
        input_names = [n.name for n in self.am_predictor.get_inputs()]
        logger.info(f"ort inputs: {input_names}")
        # 'softmax_0.tmp_0', 'tmp_5', 'concat_0.tmp_0', 'concat_1.tmp_0'
        # audio, audio_lens, state_h, state_c
        output_names = [n.name for n in self.am_predictor.get_outputs()]
        logger.info(f"ort outpus: {output_names}")
        assert (len(input_names) == len(output_names))
        assert isinstance(input_names[0], str)

        input_datas = [
            self.chunk_state_c_box, self.chunk_state_h_box, x_chunk_lens,
            x_chunk
        ]
        feeds = dict(zip(input_names, input_datas))

        outputs = self.am_predictor.run([*output_names], {**feeds})

        output_chunk_probs, output_chunk_lens, self.chunk_state_h_box, self.chunk_state_c_box = outputs
        self.decoder.next(output_chunk_probs, output_chunk_lens)
        trans_best, trans_beam = self.decoder.decode()
        logger.info(f"decode one best result for deepspeech2: {trans_best[0]}")
        return trans_best[0]

    def get_result(self):
        """return partial/ending asr result.

        Returns:
            str: one best result of partial/ending.
        """
        if len(self.result_transcripts) > 0:
            return self.result_transcripts[0]
        else:
            return ''

    def get_word_time_stamp(self):
        return []

    @paddle.no_grad()
    def rescoring(self):
        ...


class ASRServerExecutor(ASRExecutor):
    def __init__(self):
        super().__init__()
        self.task_resource = CommonTaskResource(
            task='asr', model_format='onnx', inference_mode='online')

    def update_config(self) -> None:
        if "deepspeech2" in self.model_type:
            with UpdateConfig(self.config):
                # download lm
                self.config.decode.lang_model_path = os.path.join(
                    MODEL_HOME, 'language_model',
                    self.config.decode.lang_model_path)

            lm_url = self.task_resource.res_dict['lm_url']
            lm_md5 = self.task_resource.res_dict['lm_md5']
            logger.debug(f"Start to load language model {lm_url}")
            self.download_lm(
                lm_url,
                os.path.dirname(self.config.decode.lang_model_path), lm_md5)
        else:
            raise NotImplementedError(
                f"{self.model_type} not support paddleinference.")

    def init_model(self) -> None:

        if "deepspeech2" in self.model_type:
            # AM predictor
            logger.debug("ASR engine start to init the am predictor")
            self.am_predictor = onnx_infer.get_sess(
                model_path=self.am_model, sess_conf=self.am_predictor_conf)
        else:
            raise NotImplementedError(
                f"{self.model_type} not support paddleinference.")

    def _init_from_path(self,
                        model_type: str=None,
                        am_model: Optional[os.PathLike]=None,
                        am_params: Optional[os.PathLike]=None,
                        lang: str='zh',
                        sample_rate: int=16000,
                        cfg_path: Optional[os.PathLike]=None,
                        decode_method: str='attention_rescoring',
                        num_decoding_left_chunks: int=-1,
                        am_predictor_conf: dict=None):
        """
        Init model and other resources from a specific path.
        """
        if not model_type or not lang or not sample_rate:
            logger.error(
                "The model type or lang or sample rate is None, please input an valid server parameter yaml"
            )
            return False
        assert am_params is None, "am_params not used in onnx engine"

        self.model_type = model_type
        self.sample_rate = sample_rate
        self.decode_method = decode_method
        self.num_decoding_left_chunks = num_decoding_left_chunks
        # conf for paddleinference predictor or onnx
        self.am_predictor_conf = am_predictor_conf
        logger.debug(f"model_type: {self.model_type}")

        sample_rate_str = '16k' if sample_rate == 16000 else '8k'
        tag = model_type + '-' + lang + '-' + sample_rate_str
        self.task_resource.set_task_model(model_tag=tag)

        if cfg_path is None:
            self.res_path = self.task_resource.res_dir
            self.cfg_path = os.path.join(
                self.res_path, self.task_resource.res_dict['cfg_path'])
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.res_path = os.path.dirname(
                os.path.dirname(os.path.abspath(self.cfg_path)))

        self.am_model = os.path.join(self.res_path, self.task_resource.res_dict[
            'onnx_model']) if am_model is None else os.path.abspath(am_model)

        # self.am_params = os.path.join(
        #     self.res_path, self.task_resource.res_dict[
        #         'params']) if am_params is None else os.path.abspath(am_params)

        logger.debug("Load the pretrained model:")
        logger.debug(f"  tag = {tag}")
        logger.debug(f"  res_path: {self.res_path}")
        logger.debug(f"  cfg path: {self.cfg_path}")
        logger.debug(f"  am_model path: {self.am_model}")

        #Init body.
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)

        if self.config.spm_model_prefix:
            self.config.spm_model_prefix = os.path.join(
                self.res_path, self.config.spm_model_prefix)
            logger.debug(f"spm model path: {self.config.spm_model_prefix}")

        self.vocab = self.config.vocab_filepath

        self.text_feature = TextFeaturizer(
            unit_type=self.config.unit_type,
            vocab=self.config.vocab_filepath,
            spm_model_prefix=self.config.spm_model_prefix)

        self.update_config()

        # AM predictor
        self.init_model()

        logger.debug(f"create the {model_type} model success")
        return True


class ASREngine(BaseEngine):
    """ASR model resource

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self):
        super(ASREngine, self).__init__()

    def init_model(self) -> bool:
        if not self.executor._init_from_path(
                model_type=self.config.model_type,
                am_model=self.config.am_model,
                am_params=self.config.am_params,
                lang=self.config.lang,
                sample_rate=self.config.sample_rate,
                cfg_path=self.config.cfg_path,
                decode_method=self.config.decode_method,
                num_decoding_left_chunks=self.config.num_decoding_left_chunks,
                am_predictor_conf=self.config.am_predictor_conf):
            return False
        return True

    def init(self, config: dict) -> bool:
        """init engine resource

        Args:
            config_file (str): config file

        Returns:
            bool: init failed or success
        """
        self.config = config
        self.executor = ASRServerExecutor()

        try:
            self.device = self.config.get("device", paddle.get_device())
            paddle.set_device(self.device)
        except BaseException as e:
            logger.error(
                f"Set device failed, please check if device '{self.device}' is already used and the parameter 'device' in the yaml file"
            )
            logger.error(
                "If all GPU or XPU is used, you can set the server to 'cpu'")
            sys.exit(-1)

        logger.debug(f"paddlespeech_server set the device: {self.device}")

        if not self.init_model():
            logger.error(
                "Init the ASR server occurs error, please check the server configuration yaml"
            )
            return False

        logger.info("Initialize ASR server engine successfully on device: %s." %
                    (self.device))
        return True

    def new_handler(self):
        """New handler from model.

        Returns:
            PaddleASRConnectionHanddler: asr handler instance
        """
        return PaddleASRConnectionHanddler(self)

    def preprocess(self, *args, **kwargs):
        raise NotImplementedError("Online not using this.")

    def run(self, *args, **kwargs):
        raise NotImplementedError("Online not using this.")

    def postprocess(self):
        raise NotImplementedError("Online not using this.")


================================================
FILE: paddlespeech/server/engine/asr/online/paddleinference/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/asr/online/paddleinference/asr_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from typing import ByteString
from typing import Optional

import numpy as np
import paddle
from numpy import float32
from yacs.config import CfgNode

from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.cli.asr.infer import ASRExecutor
from paddlespeech.cli.log import logger
from paddlespeech.resource import CommonTaskResource
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.modules.ctc import CTCDecoder
from paddlespeech.s2t.utils.utility import UpdateConfig
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.server.utils.paddle_predictor import init_predictor
from paddlespeech.utils.env import MODEL_HOME

__all__ = ['PaddleASRConnectionHanddler', 'ASRServerExecutor', 'ASREngine']


# ASR server connection process class
class PaddleASRConnectionHanddler:
    def __init__(self, asr_engine):
        """Init a Paddle ASR Connection Handler instance

        Args:
            asr_engine (ASREngine): the global asr engine
        """
        super().__init__()
        logger.debug(
            "create an paddle asr connection handler to process the websocket connection"
        )
        self.config = asr_engine.config  # server config
        self.model_config = asr_engine.executor.config
        self.asr_engine = asr_engine

        # model_type, sample_rate and text_feature is shared for deepspeech2 and conformer
        self.model_type = self.asr_engine.executor.model_type
        self.sample_rate = self.asr_engine.executor.sample_rate
        # tokens to text
        self.text_feature = self.asr_engine.executor.text_feature

        # extract feat, new only fbank in conformer model
        self.preprocess_conf = self.model_config.preprocess_config
        self.preprocess_args = {"train": False}
        self.preprocessing = Transformation(self.preprocess_conf)

        # frame window and frame shift, in samples unit
        self.win_length = self.preprocess_conf.process[0]['win_length']
        self.n_shift = self.preprocess_conf.process[0]['n_shift']

        assert self.preprocess_conf.process[0]['fs'] == self.sample_rate, (
            self.sample_rate, self.preprocess_conf.process[0]['fs'])
        self.frame_shift_in_ms = int(
            self.n_shift / self.preprocess_conf.process[0]['fs'] * 1000)

        self.continuous_decoding = self.config.get("continuous_decoding", False)
        self.init_decoder()
        self.reset()

    def init_decoder(self):
        if "deepspeech2" in self.model_type:
            assert self.continuous_decoding is False, "ds2 model not support endpoint"
            self.am_predictor = self.asr_engine.executor.am_predictor

            self.decoder = CTCDecoder(
                odim=self.model_config.output_dim,  # <blank> is in  vocab
                enc_n_units=self.model_config.rnn_layer_size * 2,
                blank_id=self.model_config.blank_id,
                dropout_rate=0.0,
                reduction=True,  # sum
                batch_average=True,  # sum / batch_size
                grad_norm_type=self.model_config.get('ctc_grad_norm_type',
                                                     None))

            cfg = self.model_config.decode
            decode_batch_size = 1  # for online
            self.decoder.init_decoder(
                decode_batch_size, self.text_feature.vocab_list,
                cfg.decoding_method, cfg.lang_model_path, cfg.alpha, cfg.beta,
                cfg.beam_size, cfg.cutoff_prob, cfg.cutoff_top_n,
                cfg.num_proc_bsearch)
        else:
            raise ValueError(f"Not supported: {self.model_type}")

    def model_reset(self):
        # cache for audio and feat
        self.remained_wav = None
        self.cached_feat = None

    def output_reset(self):
        ## outputs
        # partial/ending decoding results
        self.result_transcripts = ['']

    def reset_continuous_decoding(self):
        """
        when in continous decoding, reset for next utterance.
        """
        self.global_frame_offset = self.num_frames
        self.model_reset()

    def reset(self):
        if "deepspeech2" in self.model_type:
            # for deepspeech2
            # init state
            self.chunk_state_h_box = np.zeros(
                (self.model_config.num_rnn_layers, 1,
                 self.model_config.rnn_layer_size),
                dtype=float32)
            self.chunk_state_c_box = np.zeros(
                (self.model_config.num_rnn_layers, 1,
                 self.model_config.rnn_layer_size),
                dtype=float32)
            self.decoder.reset_decoder(batch_size=1)
        else:
            raise NotImplementedError(f"{self.model_type} not support.")

        self.device = None

        ## common
        # global sample and frame step
        self.num_samples = 0
        self.global_frame_offset = 0
        # frame step of cur utterance
        self.num_frames = 0

        ## endpoint
        self.endpoint_state = False  # True for detect endpoint

        ## conformer
        self.model_reset()

        ## outputs
        self.output_reset()

    def extract_feat(self, samples: ByteString):
        logger.info("Online ASR extract the feat")
        samples = np.frombuffer(samples, dtype=np.int16)
        assert samples.ndim == 1

        self.num_samples += samples.shape[0]
        logger.debug(
            f"This package receive {samples.shape[0]} pcm data. Global samples:{self.num_samples}"
        )

        # self.reamined_wav stores all the samples,
        # include the original remained_wav and this package samples
        if self.remained_wav is None:
            self.remained_wav = samples
        else:
            assert self.remained_wav.ndim == 1  # (T,)
            self.remained_wav = np.concatenate([self.remained_wav, samples])
        logger.debug(
            f"The concatenation of remain and now audio samples length is: {self.remained_wav.shape}"
        )

        if len(self.remained_wav) < self.win_length:
            # samples not enough for feature window
            return 0

        # fbank
        x_chunk = self.preprocessing(self.remained_wav, **self.preprocess_args)
        x_chunk = paddle.to_tensor(x_chunk, dtype="float32").unsqueeze(axis=0)

        # feature cache
        if self.cached_feat is None:
            self.cached_feat = x_chunk
        else:
            assert (len(x_chunk.shape) == 3)  # (B,T,D)
            assert (len(self.cached_feat.shape) == 3)  # (B,T,D)
            self.cached_feat = paddle.concat(
                [self.cached_feat, x_chunk], axis=1)

        # set the feat device
        if self.device is None:
            self.device = self.cached_feat.place

        # cur frame step
        num_frames = x_chunk.shape[1]

        # global frame step
        self.num_frames += num_frames

        # update remained wav
        self.remained_wav = self.remained_wav[self.n_shift * num_frames:]

        logger.debug(
            f"process the audio feature success, the cached feat shape: {self.cached_feat.shape}"
        )
        logger.debug(
            f"After extract feat, the cached remain the audio samples: {self.remained_wav.shape}"
        )
        logger.debug(f"global samples: {self.num_samples}")
        logger.debug(f"global frames: {self.num_frames}")

    def decode(self, is_finished=False):
        """advance decoding

        Args:
            is_finished (bool, optional): Is last frame or not. Defaults to False.

        Returns:
            None: 
        """
        if "deepspeech2" in self.model_type:
            decoding_chunk_size = 1  # decoding chunk size = 1. int decoding frame unit

            context = 7  # context=7, in audio frame unit
            subsampling = 4  # subsampling=4, in audio frame unit

            cached_feature_num = context - subsampling
            # decoding window for model, in audio frame unit
            decoding_window = (decoding_chunk_size - 1) * subsampling + context
            # decoding stride for model, in audio frame unit
            stride = subsampling * decoding_chunk_size

            if self.cached_feat is None:
                logger.info("no audio feat, please input more pcm data")
                return

            num_frames = self.cached_feat.shape[1]
            logger.debug(
                f"Required decoding window {decoding_window} frames, and the connection has {num_frames} frames"
            )

            # the cached feat must be larger decoding_window
            if num_frames < decoding_window and not is_finished:
                logger.debug(
                    f"frame feat num is less than {decoding_window}, please input more pcm data"
                )
                return None, None

            # if is_finished=True, we need at least context frames
            if num_frames < context:
                logger.info(
                    "flast {num_frames} is less than context {context} frames, and we cannot do model forward"
                )
                return None, None

            logger.info("start to do model forward")
            # num_frames - context + 1 ensure that current frame can get context window
            if is_finished:
                # if get the finished chunk, we need process the last context
                left_frames = context
            else:
                # we only process decoding_window frames for one chunk
                left_frames = decoding_window

            end = None
            for cur in range(0, num_frames - left_frames + 1, stride):
                end = min(cur + decoding_window, num_frames)

                # extract the audio
                x_chunk = self.cached_feat[:, cur:end, :].numpy()
                x_chunk_lens = np.array([x_chunk.shape[1]])

                trans_best = self.decode_one_chunk(x_chunk, x_chunk_lens)

            self.result_transcripts = [trans_best]

            # update feat cache
            self.cached_feat = self.cached_feat[:, end - cached_feature_num:, :]

            # return trans_best[0]
        else:
            raise Exception(f"{self.model_type} not support paddleinference.")

    @paddle.no_grad()
    def decode_one_chunk(self, x_chunk, x_chunk_lens):
        """forward one chunk frames

        Args:
            x_chunk (np.ndarray): (B,T,D), audio frames.
            x_chunk_lens ([type]): (B,), audio frame lens

        Returns:
            logprob: poster probability.
        """
        logger.debug("start to decoce one chunk for deepspeech2")
        input_names = self.am_predictor.get_input_names()
        audio_handle = self.am_predictor.get_input_handle(input_names[0])
        audio_len_handle = self.am_predictor.get_input_handle(input_names[1])
        h_box_handle = self.am_predictor.get_input_handle(input_names[2])
        c_box_handle = self.am_predictor.get_input_handle(input_names[3])

        audio_handle.reshape(x_chunk.shape)
        audio_handle.copy_from_cpu(x_chunk)

        audio_len_handle.reshape(x_chunk_lens.shape)
        audio_len_handle.copy_from_cpu(x_chunk_lens)

        h_box_handle.reshape(self.chunk_state_h_box.shape)
        h_box_handle.copy_from_cpu(self.chunk_state_h_box)

        c_box_handle.reshape(self.chunk_state_c_box.shape)
        c_box_handle.copy_from_cpu(self.chunk_state_c_box)

        output_names = self.am_predictor.get_output_names()
        output_handle = self.am_predictor.get_output_handle(output_names[0])
        output_lens_handle = self.am_predictor.get_output_handle(
            output_names[1])
        output_state_h_handle = self.am_predictor.get_output_handle(
            output_names[2])
        output_state_c_handle = self.am_predictor.get_output_handle(
            output_names[3])

        self.am_predictor.run()

        output_chunk_probs = output_handle.copy_to_cpu()
        output_chunk_lens = output_lens_handle.copy_to_cpu()
        self.chunk_state_h_box = output_state_h_handle.copy_to_cpu()
        self.chunk_state_c_box = output_state_c_handle.copy_to_cpu()

        self.decoder.next(output_chunk_probs, output_chunk_lens)
        trans_best, trans_beam = self.decoder.decode()
        logger.info(f"decode one best result for deepspeech2: {trans_best[0]}")
        return trans_best[0]

    def get_result(self):
        """return partial/ending asr result.

        Returns:
            str: one best result of partial/ending.
        """
        if len(self.result_transcripts) > 0:
            return self.result_transcripts[0]
        else:
            return ''

    def get_word_time_stamp(self):
        return []

    @paddle.no_grad()
    def rescoring(self):
        ...


class ASRServerExecutor(ASRExecutor):
    def __init__(self):
        super().__init__()
        self.task_resource = CommonTaskResource(
            task='asr', model_format='static', inference_mode='online')

    def update_config(self) -> None:
        if "deepspeech2" in self.model_type:
            with UpdateConfig(self.config):
                # download lm
                self.config.decode.lang_model_path = os.path.join(
                    MODEL_HOME, 'language_model',
                    self.config.decode.lang_model_path)

            lm_url = self.task_resource.res_dict['lm_url']
            lm_md5 = self.task_resource.res_dict['lm_md5']
            logger.debug(f"Start to load language model {lm_url}")
            self.download_lm(
                lm_url,
                os.path.dirname(self.config.decode.lang_model_path), lm_md5)
        else:
            raise NotImplementedError(
                f"{self.model_type} not support paddleinference.")

    def init_model(self) -> None:

        if "deepspeech2" in self.model_type:
            # AM predictor
            logger.debug("ASR engine start to init the am predictor")
            self.am_predictor = init_predictor(
                model_file=self.am_model,
                params_file=self.am_params,
                predictor_conf=self.am_predictor_conf)
        else:
            raise NotImplementedError(
                f"{self.model_type} not support paddleinference.")

    def _init_from_path(self,
                        model_type: str=None,
                        am_model: Optional[os.PathLike]=None,
                        am_params: Optional[os.PathLike]=None,
                        lang: str='zh',
                        sample_rate: int=16000,
                        cfg_path: Optional[os.PathLike]=None,
                        decode_method: str='attention_rescoring',
                        num_decoding_left_chunks: int=-1,
                        am_predictor_conf: dict=None):
        """
        Init model and other resources from a specific path.
        """
        if not model_type or not lang or not sample_rate:
            logger.error(
                "The model type or lang or sample rate is None, please input an valid server parameter yaml"
            )
            return False

        self.model_type = model_type
        self.sample_rate = sample_rate
        self.decode_method = decode_method
        self.num_decoding_left_chunks = num_decoding_left_chunks
        # conf for paddleinference predictor or onnx
        self.am_predictor_conf = am_predictor_conf
        logger.debug(f"model_type: {self.model_type}")

        sample_rate_str = '16k' if sample_rate == 16000 else '8k'
        tag = model_type + '-' + lang + '-' + sample_rate_str
        self.task_resource.set_task_model(model_tag=tag)

        if cfg_path is None or am_model is None or am_params is None:
            self.res_path = self.task_resource.res_dir
            self.cfg_path = os.path.join(
                self.res_path, self.task_resource.res_dict['cfg_path'])

            self.am_model = os.path.join(self.res_path,
                                         self.task_resource.res_dict['model'])
            self.am_params = os.path.join(self.res_path,
                                          self.task_resource.res_dict['params'])
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.am_model = os.path.abspath(am_model)
            self.am_params = os.path.abspath(am_params)
            self.res_path = os.path.dirname(
                os.path.dirname(os.path.abspath(self.cfg_path)))

        logger.debug("Load the pretrained model:")
        logger.debug(f"  tag = {tag}")
        logger.debug(f"  res_path: {self.res_path}")
        logger.debug(f"  cfg path: {self.cfg_path}")
        logger.debug(f"  am_model path: {self.am_model}")
        logger.debug(f"  am_params path: {self.am_params}")

        #Init body.
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)

        if self.config.spm_model_prefix:
            self.config.spm_model_prefix = os.path.join(
                self.res_path, self.config.spm_model_prefix)
            logger.debug(f"spm model path: {self.config.spm_model_prefix}")

        self.vocab = self.config.vocab_filepath

        self.text_feature = TextFeaturizer(
            unit_type=self.config.unit_type,
            vocab=self.config.vocab_filepath,
            spm_model_prefix=self.config.spm_model_prefix)

        self.update_config()

        # AM predictor
        self.init_model()

        logger.debug(f"create the {model_type} model success")
        return True


class ASREngine(BaseEngine):
    """ASR model resource

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self):
        super(ASREngine, self).__init__()

    def init_model(self) -> bool:
        if not self.executor._init_from_path(
                model_type=self.config.model_type,
                am_model=self.config.am_model,
                am_params=self.config.am_params,
                lang=self.config.lang,
                sample_rate=self.config.sample_rate,
                cfg_path=self.config.cfg_path,
                decode_method=self.config.decode_method,
                num_decoding_left_chunks=self.config.num_decoding_left_chunks,
                am_predictor_conf=self.config.am_predictor_conf):
            return False
        return True

    def init(self, config: dict) -> bool:
        """init engine resource

        Args:
            config_file (str): config file

        Returns:
            bool: init failed or success
        """
        self.config = config
        self.executor = ASRServerExecutor()

        try:
            self.device = self.config.get("device", paddle.get_device())
            paddle.set_device(self.device)
        except BaseException as e:
            logger.error(
                f"Set device failed, please check if device '{self.device}' is already used and the parameter 'device' in the yaml file"
            )
            logger.error(
                "If all GPU or XPU is used, you can set the server to 'cpu'")
            sys.exit(-1)

        logger.debug(f"paddlespeech_server set the device: {self.device}")

        if not self.init_model():
            logger.error(
                "Init the ASR server occurs error, please check the server configuration yaml"
            )
            return False

        logger.info("Initialize ASR server engine successfully on device: %s." %
                    (self.device))

        return True

    def new_handler(self):
        """New handler from model.

        Returns:
            PaddleASRConnectionHanddler: asr handler instance
        """
        return PaddleASRConnectionHanddler(self)

    def preprocess(self, *args, **kwargs):
        raise NotImplementedError("Online not using this.")

    def run(self, *args, **kwargs):
        raise NotImplementedError("Online not using this.")

    def postprocess(self):
        raise NotImplementedError("Online not using this.")


================================================
FILE: paddlespeech/server/engine/asr/online/python/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/asr/online/python/asr_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from typing import ByteString
from typing import Optional

import numpy as np
import paddle
from numpy import float32
from yacs.config import CfgNode

from paddlespeech.audio.transform.transformation import Transformation
from paddlespeech.cli.asr.infer import ASRExecutor
from paddlespeech.cli.log import logger
from paddlespeech.resource import CommonTaskResource
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.modules.ctc import CTCDecoder
from paddlespeech.s2t.utils.tensor_utils import add_sos_eos
from paddlespeech.s2t.utils.tensor_utils import pad_sequence
from paddlespeech.s2t.utils.utility import UpdateConfig
from paddlespeech.server.engine.asr.online.ctc_endpoint import OnlineCTCEndpoingOpt
from paddlespeech.server.engine.asr.online.ctc_endpoint import OnlineCTCEndpoint
from paddlespeech.server.engine.asr.online.ctc_search import CTCPrefixBeamSearch
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.server.utils.paddle_predictor import init_predictor
from paddlespeech.utils.env import MODEL_HOME

__all__ = ['PaddleASRConnectionHanddler', 'ASRServerExecutor', 'ASREngine']


# ASR server connection process class
class PaddleASRConnectionHanddler:
    def __init__(self, asr_engine):
        """Init a Paddle ASR Connection Handler instance

        Args:
            asr_engine (ASREngine): the global asr engine
        """
        super().__init__()
        logger.debug(
            "create an paddle asr connection handler to process the websocket connection"
        )
        self.config = asr_engine.config  # server config
        self.model_config = asr_engine.executor.config
        self.asr_engine = asr_engine

        # model_type, sample_rate and text_feature is shared for deepspeech2 and conformer
        self.model_type = self.asr_engine.executor.model_type
        self.sample_rate = self.asr_engine.executor.sample_rate
        # tokens to text
        self.text_feature = self.asr_engine.executor.text_feature

        # extract feat, new only fbank in conformer model
        self.preprocess_conf = self.model_config.preprocess_config
        self.preprocess_args = {"train": False}
        self.preprocessing = Transformation(self.preprocess_conf)

        # frame window and frame shift, in samples unit
        self.win_length = self.preprocess_conf.process[0]['win_length']
        self.n_shift = self.preprocess_conf.process[0]['n_shift']

        assert self.preprocess_conf.process[0]['fs'] == self.sample_rate, (
            self.sample_rate, self.preprocess_conf.process[0]['fs'])
        self.frame_shift_in_ms = int(
            self.n_shift / self.preprocess_conf.process[0]['fs'] * 1000)

        self.continuous_decoding = self.config.get("continuous_decoding", False)
        self.init_decoder()
        self.reset()

    def init_decoder(self):
        if "deepspeech2" in self.model_type:
            assert self.continuous_decoding is False, "ds2 model not support endpoint"
            self.am_predictor = self.asr_engine.executor.am_predictor

            self.decoder = CTCDecoder(
                odim=self.model_config.output_dim,  # <blank> is in  vocab
                enc_n_units=self.model_config.rnn_layer_size * 2,
                blank_id=self.model_config.blank_id,
                dropout_rate=0.0,
                reduction=True,  # sum
                batch_average=True,  # sum / batch_size
                grad_norm_type=self.model_config.get('ctc_grad_norm_type',
                                                     None))

            cfg = self.model_config.decode
            decode_batch_size = 1  # for online
            self.decoder.init_decoder(
                decode_batch_size, self.text_feature.vocab_list,
                cfg.decoding_method, cfg.lang_model_path, cfg.alpha, cfg.beta,
                cfg.beam_size, cfg.cutoff_prob, cfg.cutoff_top_n,
                cfg.num_proc_bsearch)

        elif "conformer" in self.model_type or "transformer" in self.model_type:
            # acoustic model
            self.model = self.asr_engine.executor.model
            self.continuous_decoding = self.config.continuous_decoding
            logger.debug(f"continue decoding: {self.continuous_decoding}")

            # ctc decoding config
            self.ctc_decode_config = self.asr_engine.executor.config.decode
            self.searcher = CTCPrefixBeamSearch(self.ctc_decode_config)

            # ctc endpoint
            self.endpoint_opt = OnlineCTCEndpoingOpt(
                frame_shift_in_ms=self.frame_shift_in_ms, blank=0)
            self.endpointer = OnlineCTCEndpoint(self.endpoint_opt)
        else:
            raise ValueError(f"Not supported: {self.model_type}")

    def model_reset(self):
        # cache for audio and feat
        self.remained_wav = None
        self.cached_feat = None

        if "deepspeech2" in self.model_type:
            return

        ## conformer
        # cache for conformer online
        self.att_cache = paddle.zeros([0, 0, 0, 0])
        self.cnn_cache = paddle.zeros([0, 0, 0, 0])

        self.encoder_out = None
        # conformer decoding state
        self.offset = 0  # global offset in decoding frame unit

        ## just for record info
        self.chunk_num = 0  # global decoding chunk num, not used

    def output_reset(self):
        ## outputs
        # partial/ending decoding results
        self.result_transcripts = ['']
        # token timestamp result
        self.word_time_stamp = []

        ## just for record
        self.hyps = []

        # one best timestamp viterbi prob is large.
        self.time_stamp = []

    def reset_continuous_decoding(self):
        """
        when in continous decoding, reset for next utterance.
        """
        self.global_frame_offset = self.num_frames
        self.model_reset()
        self.searcher.reset()
        self.endpointer.reset()

        # reset hys will trancate history transcripts.
        # self.output_reset()

    def reset(self):
        if "deepspeech2" in self.model_type:
            # for deepspeech2
            # init state
            self.chunk_state_h_box = np.zeros(
                (self.model_config.num_rnn_layers, 1,
                 self.model_config.rnn_layer_size),
                dtype=float32)
            self.chunk_state_c_box = np.zeros(
                (self.model_config.num_rnn_layers, 1,
                 self.model_config.rnn_layer_size),
                dtype=float32)
            self.decoder.reset_decoder(batch_size=1)

        if "conformer" in self.model_type or "transformer" in self.model_type:
            self.searcher.reset()
            self.endpointer.reset()

        self.device = None

        ## common
        # global sample and frame step
        self.num_samples = 0
        self.global_frame_offset = 0
        # frame step of cur utterance
        self.num_frames = 0

        ## endpoint
        self.endpoint_state = False  # True for detect endpoint

        ## conformer
        self.model_reset()

        ## outputs
        self.output_reset()

    def extract_feat(self, samples: ByteString):
        logger.info("Online ASR extract the feat")
        samples = np.frombuffer(samples, dtype=np.int16)
        assert samples.ndim == 1

        self.num_samples += samples.shape[0]
        logger.debug(
            f"This package receive {samples.shape[0]} pcm data. Global samples:{self.num_samples}"
        )

        # self.reamined_wav stores all the samples,
        # include the original remained_wav and this package samples
        if self.remained_wav is None:
            self.remained_wav = samples
        else:
            assert self.remained_wav.ndim == 1  # (T,)
            self.remained_wav = np.concatenate([self.remained_wav, samples])
        logger.debug(
            f"The concatenation of remain and now audio samples length is: {self.remained_wav.shape}"
        )

        if len(self.remained_wav) < self.win_length:
            # samples not enough for feature window
            return 0

        # fbank
        x_chunk = self.preprocessing(self.remained_wav, **self.preprocess_args)
        x_chunk = paddle.to_tensor(x_chunk, dtype="float32").unsqueeze(axis=0)

        # feature cache
        if self.cached_feat is None:
            self.cached_feat = x_chunk
        else:
            assert (len(x_chunk.shape) == 3)  # (B,T,D)
            assert (len(self.cached_feat.shape) == 3)  # (B,T,D)
            self.cached_feat = paddle.concat(
                [self.cached_feat, x_chunk], axis=1)

        # set the feat device
        if self.device is None:
            self.device = self.cached_feat.place

        # cur frame step
        num_frames = x_chunk.shape[1]

        # global frame step
        self.num_frames += num_frames

        # update remained wav
        self.remained_wav = self.remained_wav[self.n_shift * num_frames:]

        logger.debug(
            f"process the audio feature success, the cached feat shape: {self.cached_feat.shape}"
        )
        logger.debug(
            f"After extract feat, the cached remain the audio samples: {self.remained_wav.shape}"
        )
        logger.debug(f"global samples: {self.num_samples}")
        logger.debug(f"global frames: {self.num_frames}")

    def decode(self, is_finished=False):
        """advance decoding

        Args:
            is_finished (bool, optional): Is last frame or not. Defaults to False.

        Returns:
            None: 
        """
        if "deepspeech2" in self.model_type:
            decoding_chunk_size = 1  # decoding chunk size = 1. int decoding frame unit

            context = 7  # context=7, in audio frame unit
            subsampling = 4  # subsampling=4, in audio frame unit

            cached_feature_num = context - subsampling
            # decoding window for model, in audio frame unit
            decoding_window = (decoding_chunk_size - 1) * subsampling + context
            # decoding stride for model, in audio frame unit
            stride = subsampling * decoding_chunk_size

            if self.cached_feat is None:
                logger.debug("no audio feat, please input more pcm data")
                return

            num_frames = self.cached_feat.shape[1]
            logger.debug(
                f"Required decoding window {decoding_window} frames, and the connection has {num_frames} frames"
            )

            # the cached feat must be larger decoding_window
            if num_frames < decoding_window and not is_finished:
                logger.debug(
                    f"frame feat num is less than {decoding_window}, please input more pcm data"
                )
                return None, None

            # if is_finished=True, we need at least context frames
            if num_frames < context:
                logger.debug(
                    "flast {num_frames} is less than context {context} frames, and we cannot do model forward"
                )
                return None, None

            logger.info("start to do model forward")
            # num_frames - context + 1 ensure that current frame can get context window
            if is_finished:
                # if get the finished chunk, we need process the last context
                left_frames = context
            else:
                # we only process decoding_window frames for one chunk
                left_frames = decoding_window

            end = None
            for cur in range(0, num_frames - left_frames + 1, stride):
                end = min(cur + decoding_window, num_frames)

                # extract the audio
                x_chunk = self.cached_feat[:, cur:end, :].numpy()
                x_chunk_lens = np.array([x_chunk.shape[1]])

                trans_best = self.decode_one_chunk(x_chunk, x_chunk_lens)

            self.result_transcripts = [trans_best]

            # update feat cache
            self.cached_feat = self.cached_feat[:, end - cached_feature_num:, :]

            # return trans_best[0]
        elif "conformer" in self.model_type or "transformer" in self.model_type:
            try:
                logger.info(
                    f"we will use the transformer like model : {self.model_type}"
                )
                self.advance_decoding(is_finished)
                self.update_result()

            except Exception as e:
                logger.exception(e)
        else:
            raise Exception("invalid model name")

    @paddle.no_grad()
    def decode_one_chunk(self, x_chunk, x_chunk_lens):
        """forward one chunk frames

        Args:
            x_chunk (np.ndarray): (B,T,D), audio frames.
            x_chunk_lens ([type]): (B,), audio frame lens

        Returns:
            logprob: poster probability.
        """
        logger.debug("start to decoce one chunk for deepspeech2")
        input_names = self.am_predictor.get_input_names()
        audio_handle = self.am_predictor.get_input_handle(input_names[0])
        audio_len_handle = self.am_predictor.get_input_handle(input_names[1])
        h_box_handle = self.am_predictor.get_input_handle(input_names[2])
        c_box_handle = self.am_predictor.get_input_handle(input_names[3])

        audio_handle.reshape(x_chunk.shape)
        audio_handle.copy_from_cpu(x_chunk)

        audio_len_handle.reshape(x_chunk_lens.shape)
        audio_len_handle.copy_from_cpu(x_chunk_lens)

        h_box_handle.reshape(self.chunk_state_h_box.shape)
        h_box_handle.copy_from_cpu(self.chunk_state_h_box)

        c_box_handle.reshape(self.chunk_state_c_box.shape)
        c_box_handle.copy_from_cpu(self.chunk_state_c_box)

        output_names = self.am_predictor.get_output_names()
        output_handle = self.am_predictor.get_output_handle(output_names[0])
        output_lens_handle = self.am_predictor.get_output_handle(
            output_names[1])
        output_state_h_handle = self.am_predictor.get_output_handle(
            output_names[2])
        output_state_c_handle = self.am_predictor.get_output_handle(
            output_names[3])

        self.am_predictor.run()

        output_chunk_probs = output_handle.copy_to_cpu()
        output_chunk_lens = output_lens_handle.copy_to_cpu()
        self.chunk_state_h_box = output_state_h_handle.copy_to_cpu()
        self.chunk_state_c_box = output_state_c_handle.copy_to_cpu()

        self.decoder.next(output_chunk_probs, output_chunk_lens)
        trans_best, trans_beam = self.decoder.decode()
        logger.debug(f"decode one best result for deepspeech2: {trans_best[0]}")
        return trans_best[0]

    @paddle.no_grad()
    def advance_decoding(self, is_finished=False):
        if "deepspeech" in self.model_type:
            return

        # reset endpiont state
        self.endpoint_state = False

        logger.debug(
            "Conformer/Transformer: start to decode with advanced_decoding method"
        )
        cfg = self.ctc_decode_config

        # cur chunk size, in decoding frame unit, e.g. 16
        decoding_chunk_size = cfg.decoding_chunk_size
        # using num of history chunks, e.g -1
        num_decoding_left_chunks = cfg.num_decoding_left_chunks
        assert decoding_chunk_size > 0

        # e.g. 4
        subsampling = self.model.encoder.embed.subsampling_rate
        # e.g. 7
        context = self.model.encoder.embed.right_context + 1

        # processed chunk feature cached for next chunk, e.g. 3
        cached_feature_num = context - subsampling

        # decoding window, in audio frame unit
        decoding_window = (decoding_chunk_size - 1) * subsampling + context
        # decoding stride, in audio frame unit
        stride = subsampling * decoding_chunk_size

        if self.cached_feat is None:
            logger.debug("no audio feat, please input more pcm data")
            return

        # (B=1,T,D)
        num_frames = self.cached_feat.shape[1]
        logger.debug(
            f"Required decoding window {decoding_window} frames, and the connection has {num_frames} frames"
        )

        # the cached feat must be larger decoding_window
        if num_frames < decoding_window and not is_finished:
            logger.debug(
                f"frame feat num is less than {decoding_window}, please input more pcm data"
            )
            return None, None

        # if is_finished=True, we need at least context frames
        if num_frames < context:
            logger.debug(
                "flast {num_frames} is less than context {context} frames, and we cannot do model forward"
            )
            return None, None

        logger.info("start to do model forward")

        # num_frames - context + 1 ensure that current frame can get context window
        if is_finished:
            # if get the finished chunk, we need process the last context
            left_frames = context
        else:
            # we only process decoding_window frames for one chunk
            left_frames = decoding_window

        # hist of chunks, in deocding frame unit
        required_cache_size = decoding_chunk_size * num_decoding_left_chunks

        # record the end for removing the processed feat
        outputs = []
        end = None
        for cur in range(0, num_frames - left_frames + 1, stride):
            end = min(cur + decoding_window, num_frames)

            # global chunk_num
            self.chunk_num += 1
            # cur chunk
            chunk_xs = self.cached_feat[:, cur:end, :]
            # forward chunk
            (y, self.att_cache,
             self.cnn_cache) = self.model.encoder.forward_chunk(
                 chunk_xs,
                 self.offset,
                 required_cache_size,
                 att_cache=self.att_cache,
                 cnn_cache=self.cnn_cache)
            outputs.append(y)

            # update the global offset, in decoding frame unit
            self.offset += y.shape[1]

        ys = paddle.cat(outputs, 1)
        if self.encoder_out is None:
            self.encoder_out = ys
        else:
            self.encoder_out = paddle.concat([self.encoder_out, ys], axis=1)
        logger.debug(
            f"This connection handler encoder out shape: {self.encoder_out.shape}"
        )

        # get the ctc probs
        ctc_probs = self.model.ctc.log_softmax(ys)  # (1, maxlen, vocab_size)
        ctc_probs = ctc_probs.squeeze(0)

        ## decoding
        # advance decoding
        self.searcher.search(ctc_probs, self.cached_feat.place)
        # get one best hyps
        self.hyps = self.searcher.get_one_best_hyps()

        # endpoint
        if not is_finished:

            def contain_nonsilence():
                return len(self.hyps) > 0 and len(self.hyps[0]) > 0

            decoding_something = contain_nonsilence()
            if self.endpointer.endpoint_detected(ctc_probs.numpy(),
                                                 decoding_something):
                self.endpoint_state = True
                logger.debug(
                    f"Endpoint is detected at {self.num_frames} frame.")

        # advance cache of feat
        assert self.cached_feat.shape[0] == 1  #(B=1,T,D)
        assert end >= cached_feature_num
        self.cached_feat = self.cached_feat[:, end - cached_feature_num:, :]
        assert len(
            self.cached_feat.shape
        ) == 3, f"current cache feat shape is: {self.cached_feat.shape}"

    def update_result(self):
        """Conformer/Transformer hyps to result.
        """
        logger.debug("update the final result")
        hyps = self.hyps

        # output results and tokenids
        self.result_transcripts = [
            self.text_feature.defeaturize(hyp) for hyp in hyps
        ]
        self.result_tokenids = [hyp for hyp in hyps]

    def get_result(self):
        """return partial/ending asr result.

        Returns:
            str: one best result of partial/ending.
        """
        if len(self.result_transcripts) > 0:
            return self.result_transcripts[0]
        else:
            return ''

    def get_word_time_stamp(self):
        """return token timestamp result.

        Returns:
            list: List of ('w':token, 'bg':time, 'ed':time)
        """
        return self.word_time_stamp

    @paddle.no_grad()
    def rescoring(self):
        """Second-Pass Decoding,
        only for conformer and transformer model.
        """
        if "deepspeech2" in self.model_type:
            logger.debug("deepspeech2 not support rescoring decoding.")
            return

        if "attention_rescoring" != self.ctc_decode_config.decoding_method:
            logger.debug(
                f"decoding method not match: {self.ctc_decode_config.decoding_method}, need attention_rescoring"
            )
            return

        logger.debug("rescoring the final result")

        # last decoding for last audio
        self.searcher.finalize_search()
        # update beam search results
        self.update_result()

        beam_size = self.ctc_decode_config.beam_size
        reverse_weight = getattr(self.ctc_decode_config, 'reverse_weight', 0.0)
        hyps = self.searcher.get_hyps()
        if hyps is None or len(hyps) == 0:
            logger.info("No Hyps!")
            return

        # rescore by decoder post probability

        # assert len(hyps) == beam_size
        # list of Tensor
        hyp_list = []
        for hyp in hyps:
            hyp_content = hyp[0]
            # Prevent the hyp is empty
            if len(hyp_content) == 0:
                hyp_content = (self.model.ctc.blank_id, )

            hyp_content = paddle.to_tensor(
                hyp_content, place=self.device, dtype=paddle.long)
            hyp_list.append(hyp_content)

        hyps_pad = pad_sequence(
            hyp_list, batch_first=True, padding_value=self.model.ignore_id)
        ori_hyps_pad = hyps_pad
        hyps_lens = paddle.to_tensor(
            [len(hyp[0]) for hyp in hyps], place=self.device,
            dtype=paddle.long)  # (beam_size,)
        hyps_pad, _ = add_sos_eos(hyps_pad, self.model.sos, self.model.eos,
                                  self.model.ignore_id)
        hyps_lens = hyps_lens + 1  # Add <sos> at beginning

        # ctc score in ln domain
        # (beam_size, max_hyps_len, vocab_size)
        decoder_out, r_decoder_out = self.model.forward_attention_decoder(
            hyps_pad, hyps_lens, self.encoder_out, reverse_weight)

        decoder_out = decoder_out.numpy()
        # r_decoder_out will be 0.0, if reverse_weight is 0.0 or decoder is a
        # conventional transformer decoder.
        r_decoder_out = r_decoder_out.numpy()

        # Only use decoder score for rescoring
        best_score = -float('inf')
        best_index = 0
        # hyps is List[(Text=List[int], Score=float)], len(hyps)=beam_size
        for i, hyp in enumerate(hyps):
            score = 0.0
            for j, w in enumerate(hyp[0]):
                score += decoder_out[i][j][w]

            # last decoder output token is `eos`, for laste decoder input token.
            score += decoder_out[i][len(hyp[0])][self.model.eos]
            if reverse_weight > 0:
                r_score = 0.0
                for j, w in enumerate(hyp[0]):
                    r_score += r_decoder_out[i][len(hyp[0]) - j - 1][w]
                r_score += r_decoder_out[i][len(hyp[0])][self.model.eos]
                score = score * (1 - reverse_weight) + r_score * reverse_weight
            # add ctc score (which in ln domain)
            score += hyp[1] * self.ctc_decode_config.ctc_weight

            if score > best_score:
                best_score = score
                best_index = i

        # update the one best result
        # hyps stored the beam results and each fields is:

        logger.info(f"best hyp index: {best_index}")
        # logger.info(f'best result: {hyps[best_index]}')
        # the field of the hyps is:
        ## asr results
        # hyps[0][0]: the sentence word-id in the vocab with a tuple
        # hyps[0][1]: the sentence decoding probability with all paths
        ## timestamp
        # hyps[0][2]: viterbi_blank ending probability
        # hyps[0][3]: viterbi_non_blank dending probability
        # hyps[0][4]: current_token_prob,
        # hyps[0][5]: times_viterbi_blank ending timestamp,
        # hyps[0][6]: times_titerbi_non_blank encding timestamp.
        self.hyps = [hyps[best_index][0]]
        logger.info(f"best hyp ids: {self.hyps}")

        # update the hyps time stamp
        self.time_stamp = hyps[best_index][5] if hyps[best_index][2] > hyps[
            best_index][3] else hyps[best_index][6]
        logger.info(f"time stamp: {self.time_stamp}")

        # update one best result
        self.update_result()

        # update each word start and end time stamp
        # decoding frame to audio frame
        decode_frame_shift = self.model.encoder.embed.subsampling_rate
        decode_frame_shift_in_sec = decode_frame_shift * (self.n_shift /
                                                          self.sample_rate)
        logger.info(f"decode frame shift in sec: {decode_frame_shift_in_sec}")

        global_offset_in_sec = self.global_frame_offset * self.frame_shift_in_ms / 1000.0
        logger.info(f"global offset: {global_offset_in_sec} sec.")

        word_time_stamp = []
        for idx, _ in enumerate(self.time_stamp):
            start = (self.time_stamp[idx - 1] + self.time_stamp[idx]
                     ) / 2.0 if idx > 0 else 0
            start = start * decode_frame_shift_in_sec

            end = (self.time_stamp[idx] + self.time_stamp[idx + 1]
                   ) / 2.0 if idx < len(self.time_stamp) - 1 else self.offset

            end = end * decode_frame_shift_in_sec
            word_time_stamp.append({
                "w": self.result_transcripts[0][idx],
                "bg": global_offset_in_sec + start,
                "ed": global_offset_in_sec + end
            })

        self.word_time_stamp = word_time_stamp
        logger.info(f"word time stamp: {self.word_time_stamp}")


class ASRServerExecutor(ASRExecutor):
    def __init__(self):
        super().__init__()
        self.task_resource = CommonTaskResource(
            task='asr', model_format='dynamic', inference_mode='online')

    def update_config(self) -> None:
        if "deepspeech2" in self.model_type:
            with UpdateConfig(self.config):
                # download lm
                self.config.decode.lang_model_path = os.path.join(
                    MODEL_HOME, 'language_model',
                    self.config.decode.lang_model_path)

            lm_url = self.task_resource.res_dict['lm_url']
            lm_md5 = self.task_resource.res_dict['lm_md5']
            logger.debug(f"Start to load language model {lm_url}")
            self.download_lm(
                lm_url,
                os.path.dirname(self.config.decode.lang_model_path), lm_md5)
        elif "conformer" in self.model_type or "transformer" in self.model_type:
            with UpdateConfig(self.config):
                logger.debug("start to create the stream conformer asr engine")
                # update the decoding method
                if self.decode_method:
                    self.config.decode.decoding_method = self.decode_method
                # update num_decoding_left_chunks
                if self.num_decoding_left_chunks:
                    assert self.num_decoding_left_chunks == -1 or self.num_decoding_left_chunks >= 0, "num_decoding_left_chunks should be -1 or >=0"
                    self.config.decode.num_decoding_left_chunks = self.num_decoding_left_chunks
                # we only support ctc_prefix_beam_search and attention_rescoring dedoding method
                # Generally we set the decoding_method to attention_rescoring
                if self.config.decode.decoding_method not in [
                        "ctc_prefix_beam_search", "attention_rescoring"
                ]:
                    logger.debug(
                        "we set the decoding_method to attention_rescoring")
                    self.config.decode.decoding_method = "attention_rescoring"

                assert self.config.decode.decoding_method in [
                    "ctc_prefix_beam_search", "attention_rescoring"
                ], f"we only support ctc_prefix_beam_search and attention_rescoring dedoding method, current decoding method is {self.config.decode.decoding_method}"
        else:
            raise Exception(f"not support: {self.model_type}")

    def init_model(self) -> None:
        if "deepspeech2" in self.model_type:
            # AM predictor
            logger.debug("ASR engine start to init the am predictor")
            self.am_predictor = init_predictor(
                model_file=self.am_model,
                params_file=self.am_params,
                predictor_conf=self.am_predictor_conf)
        elif "conformer" in self.model_type or "transformer" in self.model_type:
            # load model
            # model_type: {model_name}_{dataset}
            model_name = self.model_type[:self.model_type.rindex('_')]
            logger.debug(f"model name: {model_name}")
            model_class = self.task_resource.get_model_class(model_name)
            model = model_class.from_config(self.config)
            self.model = model
            self.model.set_state_dict(paddle.load(self.am_model))
            self.model.eval()
        else:
            raise Exception(f"not support: {self.model_type}")

    def _init_from_path(self,
                        model_type: str=None,
                        am_model: Optional[os.PathLike]=None,
                        am_params: Optional[os.PathLike]=None,
                        lang: str='zh',
                        codeswitch: Optional[bool]=False,
                        sample_rate: int=16000,
                        cfg_path: Optional[os.PathLike]=None,
                        decode_method: str='attention_rescoring',
                        num_decoding_left_chunks: int=-1,
                        am_predictor_conf: dict=None):
        """
        Init model and other resources from a specific path.
        """
        if not model_type or not lang or not sample_rate:
            logger.error(
                "The model type or lang or sample rate is None, please input an valid server parameter yaml"
            )
            return False

        self.model_type = model_type
        self.sample_rate = sample_rate
        self.decode_method = decode_method
        self.num_decoding_left_chunks = num_decoding_left_chunks
        # conf for paddleinference predictor or onnx
        self.am_predictor_conf = am_predictor_conf
        logger.debug(f"model_type: {self.model_type}")

        sample_rate_str = '16k' if sample_rate == 16000 else '8k'
        if lang == "zh_en" and codeswitch is True:
            tag = model_type + '-' + 'codeswitch_' + lang + '-' + sample_rate_str
        elif lang == "zh_en" or codeswitch is True:
            raise Exception("codeswitch is true only in zh_en model")
        else:
            tag = model_type + '-' + lang + '-' + sample_rate_str
        self.task_resource.set_task_model(model_tag=tag)

        if cfg_path is None or am_model is None or am_params is None:
            self.res_path = self.task_resource.res_dir
            self.cfg_path = os.path.join(
                self.res_path, self.task_resource.res_dict['cfg_path'])

            self.am_model = os.path.join(self.res_path,
                                         self.task_resource.res_dict['model'])
            self.am_params = os.path.join(self.res_path,
                                          self.task_resource.res_dict['params'])
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.am_model = os.path.abspath(am_model)
            self.am_params = os.path.abspath(am_params)
            self.res_path = os.path.dirname(
                os.path.dirname(os.path.abspath(self.cfg_path)))

        logger.debug("Load the pretrained model:")
        logger.debug(f"  tag = {tag}")
        logger.debug(f"  res_path: {self.res_path}")
        logger.debug(f"  cfg path: {self.cfg_path}")
        logger.debug(f"  am_model path: {self.am_model}")
        logger.debug(f"  am_params path: {self.am_params}")

        #Init body.
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)

        if self.config.spm_model_prefix:
            self.config.spm_model_prefix = os.path.join(
                self.res_path, self.config.spm_model_prefix)
            logger.debug(f"spm model path: {self.config.spm_model_prefix}")

        self.vocab = self.config.vocab_filepath

        self.text_feature = TextFeaturizer(
            unit_type=self.config.unit_type,
            vocab=self.config.vocab_filepath,
            spm_model_prefix=self.config.spm_model_prefix)

        self.update_config()

        # AM predictor
        self.init_model()

        logger.debug(f"create the {model_type} model success")
        return True


class ASREngine(BaseEngine):
    """ASR server resource

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self):
        super(ASREngine, self).__init__()

    def init_model(self) -> bool:
        if not self.executor._init_from_path(
                model_type=self.config.model_type,
                am_model=self.config.am_model,
                am_params=self.config.am_params,
                lang=self.config.lang,
                codeswitch=self.config.get("codeswitch", False),
                sample_rate=self.config.sample_rate,
                cfg_path=self.config.cfg_path,
                decode_method=self.config.decode_method,
                num_decoding_left_chunks=self.config.num_decoding_left_chunks,
                am_predictor_conf=self.config.am_predictor_conf):
            return False
        return True

    def init(self, config: dict) -> bool:
        """init engine resource

        Args:
            config_file (str): config file

        Returns:
            bool: init failed or success
        """
        self.config = config
        self.executor = ASRServerExecutor()

        try:
            self.device = self.config.get("device", paddle.get_device())
            paddle.set_device(self.device)
        except BaseException as e:
            logger.error(
                f"Set device failed, please check if device '{self.device}' is already used and the parameter 'device' in the yaml file"
            )
            logger.error(
                "If all GPU or XPU is used, you can set the server to 'cpu'")
            sys.exit(-1)

        logger.debug(f"paddlespeech_server set the device: {self.device}")

        if not self.init_model():
            logger.error(
                "Init the ASR server occurs error, please check the server configuration yaml"
            )
            return False

        logger.info("Initialize ASR server engine successfully on device: %s." %
                    (self.device))

        return True

    def new_handler(self):
        """New handler from model.

        Returns:
            PaddleASRConnectionHanddler: asr handler instance
        """
        return PaddleASRConnectionHanddler(self)

    def preprocess(self, *args, **kwargs):
        raise NotImplementedError("Online not using this.")

    def run(self, *args, **kwargs):
        raise NotImplementedError("Online not using this.")

    def postprocess(self):
        raise NotImplementedError("Online not using this.")


================================================
FILE: paddlespeech/server/engine/asr/paddleinference/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/asr/paddleinference/asr_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import os
import time
from typing import Optional

import paddle
from yacs.config import CfgNode

from paddlespeech.cli.asr.infer import ASRExecutor
from paddlespeech.cli.log import logger
from paddlespeech.resource import CommonTaskResource
from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer
from paddlespeech.s2t.modules.ctc import CTCDecoder
from paddlespeech.s2t.utils.utility import UpdateConfig
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.server.utils.paddle_predictor import init_predictor
from paddlespeech.server.utils.paddle_predictor import run_model
from paddlespeech.utils.env import MODEL_HOME

__all__ = ['ASREngine', 'PaddleASRConnectionHandler']


class ASRServerExecutor(ASRExecutor):
    def __init__(self):
        super().__init__()
        self.task_resource = CommonTaskResource(
            task='asr', model_format='static')

    def _init_from_path(self,
                        model_type: str='wenetspeech',
                        am_model: Optional[os.PathLike]=None,
                        am_params: Optional[os.PathLike]=None,
                        lang: str='zh',
                        sample_rate: int=16000,
                        cfg_path: Optional[os.PathLike]=None,
                        decode_method: str='attention_rescoring',
                        am_predictor_conf: dict=None):
        """
        Init model and other resources from a specific path.
        """
        self.max_len = 50
        sample_rate_str = '16k' if sample_rate == 16000 else '8k'
        tag = model_type + '-' + lang + '-' + sample_rate_str
        self.max_len = 50
        self.task_resource.set_task_model(model_tag=tag)
        if cfg_path is None or am_model is None or am_params is None:
            self.res_path = self.task_resource.res_dir
            self.cfg_path = os.path.join(
                self.res_path, self.task_resource.res_dict['cfg_path'])

            self.am_model = os.path.join(self.res_path,
                                         self.task_resource.res_dict['model'])
            self.am_params = os.path.join(self.res_path,
                                          self.task_resource.res_dict['params'])
            logger.debug(self.res_path)
            logger.debug(self.cfg_path)
            logger.debug(self.am_model)
            logger.debug(self.am_params)
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.am_model = os.path.abspath(am_model)
            self.am_params = os.path.abspath(am_params)
            self.res_path = os.path.dirname(
                os.path.dirname(os.path.abspath(self.cfg_path)))

        #Init body.
        self.config = CfgNode(new_allowed=True)
        self.config.merge_from_file(self.cfg_path)

        with UpdateConfig(self.config):
            if "deepspeech2" in model_type:
                self.vocab = self.config.vocab_filepath
                if self.config.spm_model_prefix:
                    self.config.spm_model_prefix = os.path.join(
                        self.res_path, self.config.spm_model_prefix)
                self.text_feature = TextFeaturizer(
                    unit_type=self.config.unit_type,
                    vocab=self.vocab,
                    spm_model_prefix=self.config.spm_model_prefix)
                self.config.decode.lang_model_path = os.path.join(
                    MODEL_HOME, 'language_model',
                    self.config.decode.lang_model_path)

                lm_url = self.task_resource.res_dict['lm_url']
                lm_md5 = self.task_resource.res_dict['lm_md5']
                self.download_lm(
                    lm_url,
                    os.path.dirname(self.config.decode.lang_model_path), lm_md5)
            elif "conformer" in model_type or "transformer" in model_type:
                raise Exception("wrong type")
            else:
                raise Exception("wrong type")

        # AM predictor
        self.am_predictor_conf = am_predictor_conf
        self.am_predictor = init_predictor(
            model_file=self.am_model,
            params_file=self.am_params,
            predictor_conf=self.am_predictor_conf)

        # decoder
        self.decoder = CTCDecoder(
            odim=self.config.output_dim,  # <blank> is in  vocab
            enc_n_units=self.config.rnn_layer_size * 2,
            blank_id=self.config.blank_id,
            dropout_rate=0.0,
            reduction=True,  # sum
            batch_average=True,  # sum / batch_size
            grad_norm_type=self.config.get('ctc_grad_norm_type', None))

    @paddle.no_grad()
    def infer(self, model_type: str):
        """
        Model inference and result stored in self.output.
        """
        cfg = self.config.decode
        audio = self._inputs["audio"]
        audio_len = self._inputs["audio_len"]
        if "deepspeech2" in model_type:
            decode_batch_size = audio.shape[0]
            # init once
            self.decoder.init_decoder(
                decode_batch_size, self.text_feature.vocab_list,
                cfg.decoding_method, cfg.lang_model_path, cfg.alpha, cfg.beta,
                cfg.beam_size, cfg.cutoff_prob, cfg.cutoff_top_n,
                cfg.num_proc_bsearch)

            output_data = run_model(self.am_predictor,
                                    [audio.numpy(), audio_len.numpy()])

            probs = output_data[0]
            eouts_len = output_data[1]

            batch_size = probs.shape[0]
            self.decoder.reset_decoder(batch_size=batch_size)
            self.decoder.next(probs, eouts_len)
            trans_best, trans_beam = self.decoder.decode()

            # self.model.decoder.del_decoder()
            self._outputs["result"] = trans_best[0]

        elif "conformer" in model_type or "transformer" in model_type:
            raise Exception("invalid model name")
        else:
            raise Exception("invalid model name")


class ASREngine(BaseEngine):
    """ASR server engine

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self):
        super(ASREngine, self).__init__()

    def init(self, config: dict) -> bool:
        """init engine resource

        Args:
            config_file (str): config file

        Returns:
            bool: init failed or success
        """
        self.executor = ASRServerExecutor()
        self.config = config
        self.engine_type = "inference"

        try:
            if self.config.am_predictor_conf.device is not None:
                self.device = self.config.am_predictor_conf.device
            else:
                self.device = paddle.get_device()

            paddle.set_device(self.device)
        except Exception as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error(e)
            return False

        self.executor._init_from_path(
            model_type=self.config.model_type,
            am_model=self.config.am_model,
            am_params=self.config.am_params,
            lang=self.config.lang,
            sample_rate=self.config.sample_rate,
            cfg_path=self.config.cfg_path,
            decode_method=self.config.decode_method,
            am_predictor_conf=self.config.am_predictor_conf)

        logger.info("Initialize ASR server engine successfully.")
        return True


class PaddleASRConnectionHandler(ASRServerExecutor):
    def __init__(self, asr_engine):
        """The PaddleSpeech ASR Server Connection Handler
           This connection process every asr server request
        Args:
            asr_engine (ASREngine): The ASR engine
        """
        super().__init__()
        self.input = None
        self.output = None
        self.asr_engine = asr_engine
        self.executor = self.asr_engine.executor
        self.config = self.executor.config
        self.max_len = self.executor.max_len
        self.decoder = self.executor.decoder
        self.am_predictor = self.executor.am_predictor
        self.text_feature = self.executor.text_feature

    def run(self, audio_data):
        """engine run

        Args:
            audio_data (bytes): base64.b64decode
        """
        if self._check(
                io.BytesIO(audio_data), self.asr_engine.config.sample_rate,
                self.asr_engine.config.force_yes):
            logger.debug("start running asr engine")
            self.preprocess(self.asr_engine.config.model_type,
                            io.BytesIO(audio_data))
            st = time.time()
            self.infer(self.asr_engine.config.model_type)
            infer_time = time.time() - st
            self.output = self.postprocess()  # Retrieve result of asr.
            logger.debug("end inferring asr engine")
        else:
            logger.error("file check failed!")
            self.output = None

        logger.info("inference time: {}".format(infer_time))
        logger.info("asr engine type: paddle inference")


================================================
FILE: paddlespeech/server/engine/asr/python/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/asr/python/asr_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import sys
import time

import paddle

from paddlespeech.cli.asr.infer import ASRExecutor
from paddlespeech.cli.log import logger
from paddlespeech.server.engine.base_engine import BaseEngine

__all__ = ['ASREngine', 'PaddleASRConnectionHandler']


class ASRServerExecutor(ASRExecutor):
    def __init__(self):
        super().__init__()
        pass


class ASREngine(BaseEngine):
    """ASR server engine

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self):
        super(ASREngine, self).__init__()

    def init(self, config: dict) -> bool:
        """init engine resource

        Args:
            config_file (str): config file

        Returns:
            bool: init failed or success
        """
        self.executor = ASRServerExecutor()
        self.config = config
        self.engine_type = "python"

        try:
            if self.config.device is not None:
                self.device = self.config.device
            else:
                self.device = paddle.get_device()

            paddle.set_device(self.device)
        except Exception as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error(e)
            return False

        cs = False

        if self.config.lang == "zh_en" :
            cs=True

        self.executor._init_from_path(
            model_type=self.config.model,
            lang=self.config.lang,
            sample_rate=self.config.sample_rate,
            cfg_path=self.config.cfg_path,
            decode_method=self.config.decode_method,
            ckpt_path=self.config.ckpt_path,
            codeswitch=cs )

        logger.info("Initialize ASR server engine successfully on device: %s." %
                    (self.device))
        return True


class PaddleASRConnectionHandler(ASRServerExecutor):
    def __init__(self, asr_engine):
        """The PaddleSpeech ASR Server Connection Handler
           This connection process every asr server request
        Args:
            asr_engine (ASREngine): The ASR engine
        """
        super().__init__()
        self.input = None
        self.output = None
        self.asr_engine = asr_engine
        self.executor = self.asr_engine.executor
        self.max_len = self.executor.max_len
        self.text_feature = self.executor.text_feature
        self.model = self.executor.model
        self.config = self.executor.config

    def run(self, audio_data):
        """engine run 

        Args:
            audio_data (bytes): base64.b64decode
        """
        try:
            if self._check(
                    io.BytesIO(audio_data), self.asr_engine.config.sample_rate,
                    self.asr_engine.config.force_yes):
                logger.debug("start run asr engine")
                self.preprocess(self.asr_engine.config.model,
                                io.BytesIO(audio_data))
                st = time.time()
                self.infer(self.asr_engine.config.model)
                infer_time = time.time() - st
                self.output = self.postprocess()  # Retrieve result of asr.
            else:
                logger.error("file check failed!")
                self.output = None

            logger.info("inference time: {}".format(infer_time))
            logger.info("asr engine type: python")
        except Exception as e:
            logger.info(e)
            sys.exit(-1)


================================================
FILE: paddlespeech/server/engine/base_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Union

from pattern_singleton import Singleton

__all__ = ['BaseEngine']


class BaseEngine(metaclass=Singleton):
    """
        An base engine class
    """

    def __init__(self):
        self._inputs = dict()
        self._outputs = dict()

    def init(self, *args, **kwargs):
        """
        init the engine
        
        Returns:
            bool: true or false
        """
        pass

    def postprocess(self, *args, **kwargs) -> Union[str, os.PathLike]:
        """
        Output postprocess and return results.
        This method get model output from self._outputs and convert it into human-readable results.

        Returns:
            Union[str, os.PathLike]: Human-readable results such as texts and audio files.
        """
        pass

    def run(self, *args, **kwargs) -> Union[str, os.PathLike]:
        """
        Output postprocess and return results.
        This method get model output from self._outputs and convert it into human-readable results.

        Returns:
            Union[str, os.PathLike]: Human-readable results such as texts and audio files.
        """
        pass


================================================
FILE: paddlespeech/server/engine/cls/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/cls/paddleinference/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/cls/paddleinference/cls_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import os
import time
from collections import OrderedDict
from typing import Optional

import numpy as np
import paddle
import yaml

from paddlespeech.cli.cls.infer import CLSExecutor
from paddlespeech.cli.log import logger
from paddlespeech.resource import CommonTaskResource
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.server.utils.paddle_predictor import init_predictor
from paddlespeech.server.utils.paddle_predictor import run_model

__all__ = ['CLSEngine', 'PaddleCLSConnectionHandler']


class CLSServerExecutor(CLSExecutor):
    def __init__(self):
        super().__init__()
        self.task_resource = CommonTaskResource(
            task='cls', model_format='static')

    def _init_from_path(
            self,
            model_type: str='panns_cnn14_audioset',
            cfg_path: Optional[os.PathLike]=None,
            model_path: Optional[os.PathLike]=None,
            params_path: Optional[os.PathLike]=None,
            label_file: Optional[os.PathLike]=None,
            predictor_conf: dict=None, ):
        """
        Init model and other resources from a specific path.
        """

        if cfg_path is None or model_path is None or params_path is None or label_file is None:
            tag = model_type + '-' + '32k'
            self.task_resource.set_task_model(model_tag=tag)
            self.res_path = self.task_resource.res_dir
            self.cfg_path = os.path.join(
                self.res_path, self.task_resource.res_dict['cfg_path'])
            self.model_path = os.path.join(
                self.res_path, self.task_resource.res_dict['model_path'])
            self.params_path = os.path.join(
                self.res_path, self.task_resource.res_dict['params_path'])
            self.label_file = os.path.join(
                self.res_path, self.task_resource.res_dict['label_file'])
        else:
            self.cfg_path = os.path.abspath(cfg_path)
            self.model_path = os.path.abspath(model_path)
            self.params_path = os.path.abspath(params_path)
            self.label_file = os.path.abspath(label_file)

        logger.debug(self.cfg_path)
        logger.debug(self.model_path)
        logger.debug(self.params_path)
        logger.debug(self.label_file)

        # config
        with open(self.cfg_path, 'r') as f:
            self._conf = yaml.safe_load(f)
        logger.debug("Read cfg file successfully.")

        # labels
        self._label_list = []
        with open(self.label_file, 'r') as f:
            for line in f:
                self._label_list.append(line.strip())
        logger.debug("Read label file successfully.")

        # Create predictor
        self.predictor_conf = predictor_conf
        self.predictor = init_predictor(
            model_file=self.model_path,
            params_file=self.params_path,
            predictor_conf=self.predictor_conf)
        logger.debug("Create predictor successfully.")

    @paddle.no_grad()
    def infer(self):
        """
        Model inference and result stored in self.output.
        """
        output = run_model(self.predictor, [self._inputs['feats'].numpy()])
        self._outputs['logits'] = output[0]


class CLSEngine(BaseEngine):
    """CLS server engine

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self):
        super(CLSEngine, self).__init__()

    def init(self, config: dict) -> bool:
        """init engine resource

        Args:
            config_file (str): config file

        Returns:
            bool: init failed or success
        """
        self.executor = CLSServerExecutor()
        self.config = config
        self.engine_type = "inference"

        try:
            if self.config.predictor_conf.device is not None:
                self.device = self.config.predictor_conf.device
            else:
                self.device = paddle.get_device()
            paddle.set_device(self.device)
        except Exception as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error(e)
            return False

        try:
            self.executor._init_from_path(
                self.config.model_type, self.config.cfg_path,
                self.config.model_path, self.config.params_path,
                self.config.label_file, self.config.predictor_conf)

        except Exception as e:
            logger.error("Initialize CLS server engine Failed.")
            logger.error(e)
            return False

        logger.info("Initialize CLS server engine successfully on device: %s." %
                    (self.device))
        return True


class PaddleCLSConnectionHandler(CLSServerExecutor):
    def __init__(self, cls_engine):
        """The PaddleSpeech CLS Server Connection Handler
           This connection process every cls server request
        Args:
            cls_engine (CLSEngine): The CLS engine
        """
        super().__init__()
        logger.debug(
            "Create PaddleCLSConnectionHandler to process the cls request")

        self._inputs = OrderedDict()
        self._outputs = OrderedDict()
        self.cls_engine = cls_engine
        self.executor = self.cls_engine.executor
        self._conf = self.executor._conf
        self._label_list = self.executor._label_list
        self.predictor = self.executor.predictor

    def run(self, audio_data):
        """engine run 

        Args:
            audio_data (bytes): base64.b64decode
        """

        self.preprocess(io.BytesIO(audio_data))
        st = time.time()
        self.infer()
        infer_time = time.time() - st

        logger.debug("inference time: {}".format(infer_time))
        logger.info("cls engine type: inference")

    def postprocess(self, topk: int):
        """postprocess
        """
        assert topk <= len(
            self._label_list), 'Value of topk is larger than number of labels.'

        result = np.squeeze(self._outputs['logits'], axis=0)
        topk_idx = (-result).argsort()[:topk]
        topk_results = []
        for idx in topk_idx:
            res = {}
            label, score = self._label_list[idx], result[idx]
            res['class_name'] = label
            res['prob'] = score
            topk_results.append(res)

        return topk_results


================================================
FILE: paddlespeech/server/engine/cls/python/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/cls/python/cls_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import time
from collections import OrderedDict

import paddle

from paddlespeech.cli.cls.infer import CLSExecutor
from paddlespeech.cli.log import logger
from paddlespeech.server.engine.base_engine import BaseEngine

__all__ = ['CLSEngine', 'PaddleCLSConnectionHandler']


class CLSServerExecutor(CLSExecutor):
    def __init__(self):
        super().__init__()
        pass


class CLSEngine(BaseEngine):
    """CLS server engine

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self):
        super(CLSEngine, self).__init__()

    def init(self, config: dict) -> bool:
        """init engine resource

        Args:
            config_file (str): config file

        Returns:
            bool: init failed or success
        """
        self.executor = CLSServerExecutor()
        self.config = config
        self.engine_type = "python"

        try:
            if self.config.device is not None:
                self.device = self.config.device
            else:
                self.device = paddle.get_device()
            paddle.set_device(self.device)
        except Exception as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error(e)
            return False

        try:
            self.executor._init_from_path(
                self.config.model, self.config.cfg_path, self.config.ckpt_path,
                self.config.label_file)
        except Exception as e:
            logger.error("Initialize CLS server engine Failed.")
            logger.error(e)
            return False

        logger.info("Initialize CLS server engine successfully on device: %s." %
                    (self.device))
        return True


class PaddleCLSConnectionHandler(CLSServerExecutor):
    def __init__(self, cls_engine):
        """The PaddleSpeech CLS Server Connection Handler
           This connection process every cls server request
        Args:
            cls_engine (CLSEngine): The CLS engine
        """
        super().__init__()
        logger.debug(
            "Create PaddleCLSConnectionHandler to process the cls request")

        self._inputs = OrderedDict()
        self._outputs = OrderedDict()
        self.cls_engine = cls_engine
        self.executor = self.cls_engine.executor
        self._conf = self.executor._conf
        self._label_list = self.executor._label_list
        self.model = self.executor.model

    def run(self, audio_data):
        """engine run 

        Args:
            audio_data (bytes): base64.b64decode
        """
        self.preprocess(io.BytesIO(audio_data))
        st = time.time()
        self.infer()
        infer_time = time.time() - st

        logger.debug("inference time: {}".format(infer_time))
        logger.info("cls engine type: python")

    def postprocess(self, topk: int):
        """postprocess
        """
        assert topk <= len(
            self._label_list), 'Value of topk is larger than number of labels.'

        result = self._outputs['logits'].squeeze(0).numpy()
        topk_idx = (-result).argsort()[:topk]
        topk_results = []
        for idx in topk_idx:
            res = {}
            label, score = self._label_list[idx], result[idx]
            res['class_name'] = label
            res['prob'] = score
            topk_results.append(res)

        return topk_results


================================================
FILE: paddlespeech/server/engine/engine_factory.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Text

from paddlespeech.cli.log import logger

__all__ = ['EngineFactory']


class EngineFactory(object):
    @staticmethod
    def get_engine(engine_name: Text, engine_type: Text):
        logger.info(f"{engine_name} : {engine_type} engine.")

        if engine_name == 'asr' and engine_type == 'inference':
            from paddlespeech.server.engine.asr.paddleinference.asr_engine import ASREngine
            return ASREngine()
        elif engine_name == 'asr' and engine_type == 'python':
            from paddlespeech.server.engine.asr.python.asr_engine import ASREngine
            return ASREngine()
        elif engine_name == 'asr' and engine_type == 'online':
            from paddlespeech.server.engine.asr.online.python.asr_engine import ASREngine
            return ASREngine()
        elif engine_name == 'asr' and engine_type == 'online-inference':
            from paddlespeech.server.engine.asr.online.paddleinference.asr_engine import ASREngine
            return ASREngine()
        elif engine_name == 'asr' and engine_type == 'online-onnx':
            from paddlespeech.server.engine.asr.online.onnx.asr_engine import ASREngine
            return ASREngine()
        elif engine_name == 'tts' and engine_type == 'inference':
            from paddlespeech.server.engine.tts.paddleinference.tts_engine import TTSEngine
            return TTSEngine()
        elif engine_name == 'tts' and engine_type == 'python':
            from paddlespeech.server.engine.tts.python.tts_engine import TTSEngine
            return TTSEngine()
        elif engine_name == 'tts' and engine_type == 'online':
            from paddlespeech.server.engine.tts.online.python.tts_engine import TTSEngine
            return TTSEngine()
        elif engine_name == 'tts' and engine_type == 'online-onnx':
            from paddlespeech.server.engine.tts.online.onnx.tts_engine import TTSEngine
            return TTSEngine()
        elif engine_name == 'cls' and engine_type == 'inference':
            from paddlespeech.server.engine.cls.paddleinference.cls_engine import CLSEngine
            return CLSEngine()
        elif engine_name == 'cls' and engine_type == 'python':
            from paddlespeech.server.engine.cls.python.cls_engine import CLSEngine
            return CLSEngine()
        elif engine_name.lower() == 'text' and engine_type.lower() == 'python':
            from paddlespeech.server.engine.text.python.text_engine import TextEngine
            return TextEngine()
        elif engine_name.lower() == 'vector' and engine_type.lower() == 'python':
            from paddlespeech.server.engine.vector.python.vector_engine import VectorEngine
            return VectorEngine()
        elif engine_name.lower() == 'acs' and engine_type.lower() == 'python':
            from paddlespeech.server.engine.acs.python.acs_engine import ACSEngine
            return ACSEngine()
        else:
            return None


================================================
FILE: paddlespeech/server/engine/engine_pool.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddlespeech.server.engine.engine_factory import EngineFactory

# global value
ENGINE_POOL = {}


def get_engine_pool() -> dict:
    """ Get engine pool
    """
    global ENGINE_POOL
    return ENGINE_POOL


def init_engine_pool(config) -> bool:
    """ Init engine pool
    """
    global ENGINE_POOL

    for engine_and_type in config.engine_list:
        engine = engine_and_type.split("_")[0]
        engine_type = engine_and_type.split("_")[1]
        ENGINE_POOL[engine] = EngineFactory.get_engine(
            engine_name=engine, engine_type=engine_type)

        if not ENGINE_POOL[engine].init(config=config[engine_and_type]):
            return False

    return True


================================================
FILE: paddlespeech/server/engine/engine_warmup.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time

from paddlespeech.cli.log import logger
from paddlespeech.server.engine.engine_pool import get_engine_pool


def warm_up(engine_and_type: str, warm_up_time: int=3) -> bool:
    engine_pool = get_engine_pool()

    if "tts" in engine_and_type:
        tts_engine = engine_pool['tts']
        flag_online = False
        if tts_engine.lang == 'zh':
            sentence = "您好，欢迎使用语音合成服务。"
        elif tts_engine.lang == 'en':
            sentence = "Hello and welcome to the speech synthesis service."
        elif tts_engine.lang == 'mix':
            sentence = "您好，欢迎使用TTS多语种服务。"
        else:
            logger.error("tts engine only support lang: zh or en or mix.")
            sys.exit(-1)

        if engine_and_type == "tts_python":
            from paddlespeech.server.engine.tts.python.tts_engine import PaddleTTSConnectionHandler
        elif engine_and_type == "tts_inference":
            from paddlespeech.server.engine.tts.paddleinference.tts_engine import PaddleTTSConnectionHandler
        elif engine_and_type == "tts_online":
            from paddlespeech.server.engine.tts.online.python.tts_engine import PaddleTTSConnectionHandler
            flag_online = True
        elif engine_and_type == "tts_online-onnx":
            from paddlespeech.server.engine.tts.online.onnx.tts_engine import PaddleTTSConnectionHandler
            flag_online = True
        else:
            logger.error("Please check tte engine type.")

        try:
            logger.debug("Start to warm up tts engine.")
            for i in range(warm_up_time):
                connection_handler = PaddleTTSConnectionHandler(tts_engine)
                if flag_online:
                    for wav in connection_handler.infer(
                            text=sentence,
                            lang=tts_engine.lang,
                            am=tts_engine.config.am):
                        logger.debug(
                            f"The first response time of the {i} warm up: {connection_handler.first_response_time} s"
                        )
                        break

                else:
                    st = time.time()
                    connection_handler.infer(
                        text=sentence,
                        lang=tts_engine.lang,
                        am=tts_engine.config.am)
                    et = time.time()
                    logger.debug(
                        f"The response time of the {i} warm up: {et - st} s")
        except Exception as e:
            logger.error("Failed to warm up on tts engine.")
            logger.error(e)
            return False

    else:
        pass

    return True


================================================
FILE: paddlespeech/server/engine/text/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/text/python/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/text/python/text_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict

import paddle

from paddlespeech.cli.log import logger
from paddlespeech.cli.text.infer import TextExecutor
from paddlespeech.server.engine.base_engine import BaseEngine


class PaddleTextConnectionHandler:
    def __init__(self, text_engine):
        """The PaddleSpeech Text Server Connection Handler
           This connection process every server request
        Args:
            text_engine (TextEngine): The Text engine
        """
        super().__init__()
        logger.debug(
            "Create PaddleTextConnectionHandler to process the text request")
        self.text_engine = text_engine
        self.task = self.text_engine.executor.task
        self.model = self.text_engine.executor.model
        self.tokenizer = self.text_engine.executor.tokenizer
        self._punc_list = self.text_engine.executor._punc_list
        self._inputs = OrderedDict()
        self._outputs = OrderedDict()

    @paddle.no_grad()
    def run(self, text):
        """The connection process the request text

        Args:
            text (str): the request text

        Returns:
            str: the punctuation text
        """
        self.preprocess(text)
        self.infer()
        res = self.postprocess()

        return res

    @paddle.no_grad()
    def preprocess(self, text):
        """
            Input preprocess and return paddle.Tensor stored in self.input.
            Input content can be a text(tts), a file(asr, cls) or a streaming(not supported yet).

        Args:
            text (str): the request text
        """
        if self.task == 'punc':
            clean_text = self.text_engine.executor._clean_text(text)
            assert len(clean_text) > 0, f'Invalid input string: {text}'

            tokenized_input = self.tokenizer(
                list(clean_text), return_length=True, is_split_into_words=True)

            self._inputs['input_ids'] = tokenized_input['input_ids']
            self._inputs['seg_ids'] = tokenized_input['token_type_ids']
            self._inputs['seq_len'] = tokenized_input['seq_len']
        else:
            raise NotImplementedError

    @paddle.no_grad()
    def infer(self):
        """Model inference and result stored in self.output.
        """
        if self.task == 'punc':
            input_ids = paddle.to_tensor(self._inputs['input_ids']).unsqueeze(0)
            seg_ids = paddle.to_tensor(self._inputs['seg_ids']).unsqueeze(0)
            logits, _ = self.model(input_ids, seg_ids)
            preds = paddle.argmax(logits, axis=-1).squeeze(0)

            self._outputs['preds'] = preds
        else:
            raise NotImplementedError

    def postprocess(self):
        """Output postprocess and return human-readable results such as texts and audio files.

        Returns:
            str: The punctuation text
        """
        if self.task == 'punc':
            input_ids = self._inputs['input_ids']
            seq_len = self._inputs['seq_len']
            preds = self._outputs['preds']

            tokens = self.tokenizer.convert_ids_to_tokens(
                input_ids[1:seq_len - 1])
            labels = preds[1:seq_len - 1].tolist()
            assert len(tokens) == len(labels)

            text = ''
            is_fast_model = 'fast' in self.text_engine.config.model_type
            for t, l in zip(tokens, labels):
                text += t
                if l != 0:  # Non punc.
                    if is_fast_model:
                        text += self._punc_list[l - 1]
                    else:
                        text += self._punc_list[l]
            return text
        else:
            raise NotImplementedError


class TextServerExecutor(TextExecutor):
    def __init__(self):
        """The wrapper for TextEcutor
        """
        super().__init__()
        pass


class TextEngine(BaseEngine):
    def __init__(self):
        """The Text Engine
        """
        super(TextEngine, self).__init__()
        logger.debug("Create the TextEngine Instance")

    def init(self, config: dict):
        """Init the Text Engine

        Args:
            config (dict): The server configuation

        Returns:
            bool: The engine instance flag
        """
        logger.debug("Init the text engine")
        try:
            self.config = config
            if self.config.device:
                self.device = self.config.device
            else:
                self.device = paddle.get_device()

            paddle.set_device(self.device)
            logger.debug(f"Text Engine set the device: {self.device}")
        except BaseException as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error("Initialize Text server engine Failed on device: %s." %
                         (self.device))
            return False

        self.executor = TextServerExecutor()
        if 'fast' in config.model_type:
            self.executor._init_from_path_new(
                task=config.task,
                model_type=config.model_type,
                lang=config.lang,
                cfg_path=config.cfg_path,
                ckpt_path=config.ckpt_path,
                vocab_file=config.vocab_file)
        else:
            self.executor._init_from_path(
                task=config.task,
                model_type=config.model_type,
                lang=config.lang,
                cfg_path=config.cfg_path,
                ckpt_path=config.ckpt_path,
                vocab_file=config.vocab_file)
        logger.info("Using model: %s." % (config.model_type))
        logger.info("Initialize Text server engine successfully on device: %s."
                    % (self.device))
        return True


================================================
FILE: paddlespeech/server/engine/tts/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/tts/online/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/tts/online/onnx/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/tts/online/onnx/tts_engine.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import math
import os
import time
from typing import Optional

import numpy as np
import paddle

from paddlespeech.cli.log import logger
from paddlespeech.cli.tts.infer import TTSExecutor
from paddlespeech.resource import CommonTaskResource
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.server.utils.audio_process import float2pcm
from paddlespeech.server.utils.onnx_infer import get_sess
from paddlespeech.server.utils.util import denorm
from paddlespeech.server.utils.util import get_chunks
from paddlespeech.t2s.frontend.en_frontend import English
from paddlespeech.t2s.frontend.zh_frontend import Frontend

__all__ = ['TTSEngine', 'PaddleTTSConnectionHandler']


class TTSServerExecutor(TTSExecutor):
    def __init__(self):
        super().__init__()
        self.task_resource = CommonTaskResource(task='tts', model_format='onnx')

    def _init_from_path(
            self,
            am: str='fastspeech2_csmsc_onnx',
            am_ckpt: Optional[list]=None,
            am_stat: Optional[os.PathLike]=None,
            phones_dict: Optional[os.PathLike]=None,
            tones_dict: Optional[os.PathLike]=None,
            speaker_dict: Optional[os.PathLike]=None,
            am_sample_rate: int=24000,
            am_sess_conf: dict=None,
            voc: str='mb_melgan_csmsc_onnx',
            voc_ckpt: Optional[os.PathLike]=None,
            voc_sample_rate: int=24000,
            voc_sess_conf: dict=None,
            lang: str='zh', ):
        """
        Init model and other resources from a specific path.
        """

        if (hasattr(self, 'am_sess') or
            (hasattr(self, 'am_encoder_infer_sess') and
             hasattr(self, 'am_decoder_sess') and hasattr(
                 self, 'am_postnet_sess'))) and hasattr(self, 'voc_inference'):
            logger.debug('Models had been initialized.')
            return

        # am
        am_tag = am + '-' + lang
        if am == "fastspeech2_csmsc_onnx":
            # get model info
            if am_ckpt is None or phones_dict is None:
                self.task_resource.set_task_model(
                    model_tag=am_tag,
                    model_type=0,  # am
                    version=None,  # default version
                )
                self.am_res_path = self.task_resource.res_dir
                self.am_ckpt = os.path.join(self.am_res_path,
                                            self.task_resource.res_dict['ckpt'])
                # must have phones_dict in acoustic
                self.phones_dict = os.path.join(
                    self.am_res_path,
                    self.task_resource.res_dict['phones_dict'])

            else:
                self.am_ckpt = os.path.abspath(am_ckpt[0])
                self.phones_dict = os.path.abspath(phones_dict)
                self.am_res_path = os.path.dirname(os.path.abspath(am_ckpt))

            # create am sess
            self.am_sess = get_sess(self.am_ckpt, am_sess_conf)

        elif am == "fastspeech2_cnndecoder_csmsc_onnx":
            if am_ckpt is None or am_stat is None or phones_dict is None:
                self.task_resource.set_task_model(
                    model_tag=am_tag,
                    model_type=0,  # am
                    version=None,  # default version
                )
                self.am_res_path = self.task_resource.res_dir
                self.am_encoder_infer = os.path.join(
                    self.am_res_path, self.task_resource.res_dict['ckpt'][0])
                self.am_decoder = os.path.join(
                    self.am_res_path, self.task_resource.res_dict['ckpt'][1])
                self.am_postnet = os.path.join(
                    self.am_res_path, self.task_resource.res_dict['ckpt'][2])
                # must have phones_dict in acoustic
                self.phones_dict = os.path.join(
                    self.am_res_path,
                    self.task_resource.res_dict['phones_dict'])
                self.am_stat = os.path.join(
                    self.am_res_path,
                    self.task_resource.res_dict['speech_stats'])

            else:
                self.am_encoder_infer = os.path.abspath(am_ckpt[0])
                self.am_decoder = os.path.abspath(am_ckpt[1])
                self.am_postnet = os.path.abspath(am_ckpt[2])
                self.phones_dict = os.path.abspath(phones_dict)
                self.am_stat = os.path.abspath(am_stat)
                self.am_res_path = os.path.dirname(os.path.abspath(am_ckpt[0]))

            # create am sess
            self.am_encoder_infer_sess = get_sess(self.am_encoder_infer,
                                                  am_sess_conf)
            self.am_decoder_sess = get_sess(self.am_decoder, am_sess_conf)
            self.am_postnet_sess = get_sess(self.am_postnet, am_sess_conf)

            self.am_mu, self.am_std = np.load(self.am_stat)

        logger.debug(f"self.phones_dict: {self.phones_dict}")
        logger.debug(f"am model dir: {self.am_res_path}")
        logger.debug("Create am sess successfully.")

        # voc model info
        voc_tag = voc + '-' + lang

        if voc_ckpt is None:
            self.task_resource.set_task_model(
                model_tag=voc_tag,
                model_type=1,  # vocoder
                version=None,  # default version
            )
            self.voc_res_path = self.task_resource.voc_res_dir
            self.voc_ckpt = os.path.join(
                self.voc_res_path, self.task_resource.voc_res_dict['ckpt'])
        else:
            self.voc_ckpt = os.path.abspath(voc_ckpt)
            self.voc_res_path = os.path.dirname(os.path.abspath(self.voc_ckpt))
        logger.debug(self.voc_res_path)

        # create voc sess
        self.voc_sess = get_sess(self.voc_ckpt, voc_sess_conf)
        logger.debug("Create voc sess successfully.")

        with open(self.phones_dict, "r", encoding='utf-8') as f:
            phn_id = [line.strip().split() for line in f.readlines()]
        self.vocab_size = len(phn_id)
        logger.debug(f"vocab_size: {self.vocab_size}")

        # frontend
        self.tones_dict = None
        if lang == 'zh':
            self.frontend = Frontend(
                phone_vocab_path=self.phones_dict,
                tone_vocab_path=self.tones_dict)

        elif lang == 'en':
            self.frontend = English(phone_vocab_path=self.phones_dict)
        logger.debug("frontend done!")


class TTSEngine(BaseEngine):
    """TTS server engine

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self, name=None):
        """Initialize TTS server engine
        """
        super().__init__()

    def init(self, config: dict) -> bool:
        self.executor = TTSServerExecutor()
        self.config = config
        self.lang = self.config.lang
        self.engine_type = "online-onnx"

        self.am_block = self.config.am_block
        self.am_pad = self.config.am_pad
        self.voc_block = self.config.voc_block
        self.voc_pad = self.config.voc_pad
        self.am_upsample = 1
        self.voc_upsample = self.config.voc_upsample

        assert (
            self.config.am == "fastspeech2_csmsc_onnx" or
            self.config.am == "fastspeech2_cnndecoder_csmsc_onnx"
        ) and (
            self.config.voc == "hifigan_csmsc_onnx" or
            self.config.voc == "mb_melgan_csmsc_onnx"
        ), 'Please check config, am support: fastspeech2, voc support: hifigan_csmsc-zh or mb_melgan_csmsc.'

        assert (
            self.config.voc_block > 0 and self.config.voc_pad > 0
        ), "Please set correct voc_block and voc_pad, they should be more than 0."

        assert (
            self.config.voc_sample_rate == self.config.am_sample_rate
        ), "The sample rate of AM and Vocoder model are different, please check model."

        self.sample_rate = self.config.voc_sample_rate

        try:
            if self.config.am_sess_conf.device is not None:
                self.device = self.config.am_sess_conf.device
            elif self.config.voc_sess_conf.device is not None:
                self.device = self.config.voc_sess_conf.device
            else:
                self.device = paddle.get_device()
            paddle.set_device(self.device)
        except Exception as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error("Initialize TTS server engine Failed on device: %s." %
                         (self.device))
            logger.error(e)
            return False

        try:
            self.executor._init_from_path(
                am=self.config.am,
                am_ckpt=self.config.am_ckpt,
                am_stat=self.config.am_stat,
                phones_dict=self.config.phones_dict,
                tones_dict=self.config.tones_dict,
                speaker_dict=self.config.speaker_dict,
                am_sample_rate=self.config.am_sample_rate,
                am_sess_conf=self.config.am_sess_conf,
                voc=self.config.voc,
                voc_ckpt=self.config.voc_ckpt,
                voc_sample_rate=self.config.voc_sample_rate,
                voc_sess_conf=self.config.voc_sess_conf,
                lang=self.config.lang)

        except Exception as e:
            logger.error("Failed to get model related files.")
            logger.error("Initialize TTS server engine Failed on device: %s." %
                         (self.config.voc_sess_conf.device))
            logger(e)
            return False

        logger.info("Initialize TTS server engine successfully on device: %s." %
                    (self.config.voc_sess_conf.device))

        return True


class PaddleTTSConnectionHandler:
    def __init__(self, tts_engine):
        """The PaddleSpeech TTS Server Connection Handler
           This connection process every tts server request
        Args:
            tts_engine (TTSEngine): The TTS engine
        """
        super().__init__()
        logger.debug(
            "Create PaddleTTSConnectionHandler to process the tts request")

        self.tts_engine = tts_engine
        self.executor = self.tts_engine.executor
        self.config = self.tts_engine.config
        self.am_block = self.tts_engine.am_block
        self.am_pad = self.tts_engine.am_pad
        self.voc_block = self.tts_engine.voc_block
        self.voc_pad = self.tts_engine.voc_pad
        self.am_upsample = self.tts_engine.am_upsample
        self.voc_upsample = self.tts_engine.voc_upsample

    def depadding(self, data, chunk_num, chunk_id, block, pad, upsample):
        """ 
        Streaming inference removes the result of pad inference
        """
        front_pad = min(chunk_id * block, pad)
        # first chunk
        if chunk_id == 0:
            data = data[:block * upsample]
        # last chunk
        elif chunk_id == chunk_num - 1:
            data = data[front_pad * upsample:]
        # middle chunk
        else:
            data = data[front_pad * upsample:(front_pad + block) * upsample]

        return data

    @paddle.no_grad()
    def infer(
            self,
            text: str,
            lang: str='zh',
            am: str='fastspeech2_csmsc_onnx',
            spk_id: int=0, ):
        """
        Model inference and result stored in self.output.
        """

        # first_flag 用于标记首包
        first_flag = 1
        get_tone_ids = False
        merge_sentences = False

        # front 
        frontend_st = time.time()
        if lang == 'zh':
            input_ids = self.executor.frontend.get_input_ids(
                text,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids)
            phone_ids = input_ids["phone_ids"]
            if get_tone_ids:
                tone_ids = input_ids["tone_ids"]
        elif lang == 'en':
            input_ids = self.executor.frontend.get_input_ids(
                text, merge_sentences=merge_sentences)
            phone_ids = input_ids["phone_ids"]
        else:
            logger.error("lang should in {'zh', 'en'}!")
        frontend_et = time.time()
        self.frontend_time = frontend_et - frontend_st

        for i in range(len(phone_ids)):
            part_phone_ids = phone_ids[i].numpy()
            voc_chunk_id = 0

            # fastspeech2_csmsc
            if am == "fastspeech2_csmsc_onnx":
                # am 
                mel = self.executor.am_sess.run(
                    output_names=None, input_feed={'text': part_phone_ids})
                mel = mel[0]
                if first_flag == 1:
                    first_am_et = time.time()
                    self.first_am_infer = first_am_et - frontend_et

                # voc streaming
                mel_chunks = get_chunks(mel, self.voc_block, self.voc_pad,
                                        "voc")
                voc_chunk_num = len(mel_chunks)
                voc_st = time.time()
                for i, mel_chunk in enumerate(mel_chunks):
                    sub_wav = self.executor.voc_sess.run(
                        output_names=None, input_feed={'logmel': mel_chunk})
                    sub_wav = self.depadding(sub_wav[0], voc_chunk_num, i,
                                             self.voc_block, self.voc_pad,
                                             self.voc_upsample)
                    if first_flag == 1:
                        first_voc_et = time.time()
                        self.first_voc_infer = first_voc_et - first_am_et
                        self.first_response_time = first_voc_et - frontend_st
                        first_flag = 0

                    yield sub_wav

            # fastspeech2_cnndecoder_csmsc 
            elif am == "fastspeech2_cnndecoder_csmsc_onnx":
                # am 
                orig_hs = self.executor.am_encoder_infer_sess.run(
                    None, input_feed={'text': part_phone_ids})
                orig_hs = orig_hs[0]

                # streaming voc chunk info
                mel_len = orig_hs.shape[1]
                voc_chunk_num = math.ceil(mel_len / self.voc_block)
                start = 0
                end = min(self.voc_block + self.voc_pad, mel_len)

                # streaming am
                hss = get_chunks(orig_hs, self.am_block, self.am_pad, "am")
                am_chunk_num = len(hss)
                for i, hs in enumerate(hss):
                    am_decoder_output = self.executor.am_decoder_sess.run(
                        None, input_feed={'xs': hs})
                    am_postnet_output = self.executor.am_postnet_sess.run(
                        None,
                        input_feed={
                            'xs': np.transpose(am_decoder_output[0], (0, 2, 1))
                        })
                    am_output_data = am_decoder_output + np.transpose(
                        am_postnet_output[0], (0, 2, 1))
                    normalized_mel = am_output_data[0][0]

                    sub_mel = denorm(normalized_mel, self.executor.am_mu,
                                     self.executor.am_std)
                    sub_mel = self.depadding(sub_mel, am_chunk_num, i,
                                             self.am_block, self.am_pad,
                                             self.am_upsample)

                    if i == 0:
                        mel_streaming = sub_mel
                    else:
                        mel_streaming = np.concatenate(
                            (mel_streaming, sub_mel), axis=0)

                    # streaming voc
                    # 当流式AM推理的mel帧数大于流式voc推理的chunk size，开始进行流式voc 推理
                    while (mel_streaming.shape[0] >= end and
                           voc_chunk_id < voc_chunk_num):
                        if first_flag == 1:
                            first_am_et = time.time()
                            self.first_am_infer = first_am_et - frontend_et
                        voc_chunk = mel_streaming[start:end, :]

                        sub_wav = self.executor.voc_sess.run(
                            output_names=None, input_feed={'logmel': voc_chunk})
                        sub_wav = self.depadding(
                            sub_wav[0], voc_chunk_num, voc_chunk_id,
                            self.voc_block, self.voc_pad, self.voc_upsample)
                        if first_flag == 1:
                            first_voc_et = time.time()
                            self.first_voc_infer = first_voc_et - first_am_et
                            self.first_response_time = first_voc_et - frontend_st
                            first_flag = 0

                        yield sub_wav

                        voc_chunk_id += 1
                        start = max(
                            0, voc_chunk_id * self.voc_block - self.voc_pad)
                        end = min(
                            (voc_chunk_id + 1) * self.voc_block + self.voc_pad,
                            mel_len)

            else:
                logger.error(
                    "Only support fastspeech2_csmsc or fastspeech2_cnndecoder_csmsc on streaming tts."
                )

        self.final_response_time = time.time() - frontend_st

    def run(self, sentence: str, spk_id: int=0):
        """ run include inference and postprocess.

        Args:
            sentence (str): text to be synthesized
            spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.
            
        Returns:
            wav_base64: The base64 format of the synthesized audio.
        """
        wav_list = []

        for wav in self.infer(
                text=sentence,
                lang=self.config.lang,
                am=self.config.am,
                spk_id=spk_id, ):

            # wav type: <class 'numpy.ndarray'>  float32, convert to pcm (base64)
            wav = float2pcm(wav)  # float32 to int16
            wav_bytes = wav.tobytes()  # to bytes
            wav_base64 = base64.b64encode(wav_bytes).decode('utf8')  # to base64
            wav_list.append(wav)

            yield wav_base64

        wav_all = np.concatenate(wav_list, axis=0)
        duration = len(wav_all) / self.tts_engine.sample_rate
        logger.info(f"sentence: {sentence}")
        logger.info(f"The durations of audio is: {duration} s")
        logger.info(f"first response time: {self.first_response_time} s")
        logger.info(f"final response time: {self.final_response_time} s")
        logger.info(f"RTF: {self.final_response_time / duration}")
        logger.info(
            f"Other info: front time: {self.frontend_time} s, first am infer time: {self.first_am_infer} s, first voc infer time: {self.first_voc_infer} s,"
        )


================================================
FILE: paddlespeech/server/engine/tts/online/python/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/tts/online/python/tts_engine.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import math
import os
import time
from typing import Optional

import numpy as np
import paddle
import yaml
from yacs.config import CfgNode

from paddlespeech.cli.log import logger
from paddlespeech.cli.tts.infer import TTSExecutor
from paddlespeech.resource import CommonTaskResource
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.server.utils.audio_process import float2pcm
from paddlespeech.server.utils.util import denorm
from paddlespeech.server.utils.util import get_chunks
from paddlespeech.t2s.frontend.en_frontend import English
from paddlespeech.t2s.frontend.zh_frontend import Frontend
from paddlespeech.t2s.modules.normalizer import ZScore

__all__ = ['TTSEngine', 'PaddleTTSConnectionHandler']


class TTSServerExecutor(TTSExecutor):
    def __init__(self):
        super().__init__()
        self.task_resource = CommonTaskResource(
            task='tts', model_format='dynamic', inference_mode='online')

    def get_model_info(self,
                       field: str,
                       model_name: str,
                       ckpt: Optional[os.PathLike],
                       stat: Optional[os.PathLike]):
        """get model information

        Args:
            field (str): am or voc
            model_name (str): model type, support fastspeech2, higigan, mb_melgan
            ckpt (Optional[os.PathLike]): ckpt file
            stat (Optional[os.PathLike]): stat file, including mean and standard deviation

        Returns:
            [module]: model module
            [Tensor]: mean
            [Tensor]: standard deviation
        """

        model_class = self.task_resource.get_model_class(model_name)

        if field == "am":
            odim = self.am_config.n_mels
            model = model_class(
                idim=self.vocab_size, odim=odim, **self.am_config["model"])
            model.set_state_dict(paddle.load(ckpt)["main_params"])

        elif field == "voc":
            model = model_class(**self.voc_config["generator_params"])
            model.set_state_dict(paddle.load(ckpt)["generator_params"])
            model.remove_weight_norm()

        else:
            logger.error("Please set correct field, am or voc")

        model.eval()
        model_mu, model_std = np.load(stat)
        model_mu = paddle.to_tensor(model_mu)
        model_std = paddle.to_tensor(model_std)

        return model, model_mu, model_std

    def _init_from_path(
            self,
            am: str='fastspeech2_csmsc',
            am_config: Optional[os.PathLike]=None,
            am_ckpt: Optional[os.PathLike]=None,
            am_stat: Optional[os.PathLike]=None,
            phones_dict: Optional[os.PathLike]=None,
            tones_dict: Optional[os.PathLike]=None,
            speaker_dict: Optional[os.PathLike]=None,
            voc: str='mb_melgan_csmsc',
            voc_config: Optional[os.PathLike]=None,
            voc_ckpt: Optional[os.PathLike]=None,
            voc_stat: Optional[os.PathLike]=None,
            lang: str='zh', ):
        """
        Init model and other resources from a specific path.
        """
        if hasattr(self, 'am_inference') and hasattr(self, 'voc_inference'):
            logger.debug('Models had been initialized.')
            return
        # am model info
        if am_ckpt is None or am_config is None or am_stat is None or phones_dict is None:
            use_pretrained_am = True
        else:
            use_pretrained_am = False

        am_tag = am + '-' + lang
        self.task_resource.set_task_model(
            model_tag=am_tag,
            model_type=0,  # am
            skip_download=not use_pretrained_am,
            version=None,  # default version
        )
        if use_pretrained_am:
            self.am_res_path = self.task_resource.res_dir
            self.am_config = os.path.join(self.am_res_path,
                                          self.task_resource.res_dict['config'])
            self.am_ckpt = os.path.join(self.am_res_path,
                                        self.task_resource.res_dict['ckpt'])
            self.am_stat = os.path.join(
                self.am_res_path, self.task_resource.res_dict['speech_stats'])
            # must have phones_dict in acoustic
            self.phones_dict = os.path.join(
                self.am_res_path, self.task_resource.res_dict['phones_dict'])
            logger.debug(self.am_res_path)
            logger.debug(self.am_config)
            logger.debug(self.am_ckpt)
        else:
            self.am_config = os.path.abspath(am_config)
            self.am_ckpt = os.path.abspath(am_ckpt)
            self.am_stat = os.path.abspath(am_stat)
            self.phones_dict = os.path.abspath(phones_dict)
            self.am_res_path = os.path.dirname(os.path.abspath(self.am_config))

        self.tones_dict = None
        self.speaker_dict = None

        # voc model info
        if voc_ckpt is None or voc_config is None or voc_stat is None:
            use_pretrained_voc = True
        else:
            use_pretrained_voc = False

        voc_tag = voc + '-' + lang
        self.task_resource.set_task_model(
            model_tag=voc_tag,
            model_type=1,  # vocoder
            skip_download=not use_pretrained_voc,
            version=None,  # default version
        )
        if use_pretrained_voc:
            self.voc_res_path = self.task_resource.voc_res_dir
            self.voc_config = os.path.join(
                self.voc_res_path, self.task_resource.voc_res_dict['config'])
            self.voc_ckpt = os.path.join(
                self.voc_res_path, self.task_resource.voc_res_dict['ckpt'])
            self.voc_stat = os.path.join(
                self.voc_res_path,
                self.task_resource.voc_res_dict['speech_stats'])
            logger.debug(self.voc_res_path)
            logger.debug(self.voc_config)
            logger.debug(self.voc_ckpt)
        else:
            self.voc_config = os.path.abspath(voc_config)
            self.voc_ckpt = os.path.abspath(voc_ckpt)
            self.voc_stat = os.path.abspath(voc_stat)
            self.voc_res_path = os.path.dirname(
                os.path.abspath(self.voc_config))

        # Init body.
        with open(self.am_config) as f:
            self.am_config = CfgNode(yaml.safe_load(f))
        with open(self.voc_config) as f:
            self.voc_config = CfgNode(yaml.safe_load(f))

        with open(self.phones_dict, "r") as f:
            phn_id = [line.strip().split() for line in f.readlines()]
        self.vocab_size = len(phn_id)

        # frontend
        if lang == 'zh':
            self.frontend = Frontend(
                phone_vocab_path=self.phones_dict,
                tone_vocab_path=self.tones_dict)

        elif lang == 'en':
            self.frontend = English(phone_vocab_path=self.phones_dict)

        # am infer info
        self.am_name = am[:am.rindex('_')]
        if self.am_name == "fastspeech2_cnndecoder":
            self.am_inference, self.am_mu, self.am_std = self.get_model_info(
                "am", "fastspeech2", self.am_ckpt, self.am_stat)
        else:
            am, am_mu, am_std = self.get_model_info("am", self.am_name,
                                                    self.am_ckpt, self.am_stat)
            am_normalizer = ZScore(am_mu, am_std)
            am_inference_class = self.task_resource.get_model_class(
                self.am_name + '_inference')
            self.am_inference = am_inference_class(am_normalizer, am)
            self.am_inference.eval()

        # voc infer info
        self.voc_name = voc[:voc.rindex('_')]
        voc, voc_mu, voc_std = self.get_model_info("voc", self.voc_name,
                                                   self.voc_ckpt, self.voc_stat)
        voc_normalizer = ZScore(voc_mu, voc_std)
        voc_inference_class = self.task_resource.get_model_class(self.voc_name +
                                                                 '_inference')
        self.voc_inference = voc_inference_class(voc_normalizer, voc)
        self.voc_inference.eval()


class TTSEngine(BaseEngine):
    """TTS server engine

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self, name=None):
        """Initialize TTS server engine
        """
        super().__init__()

    def init(self, config: dict) -> bool:
        self.executor = TTSServerExecutor()
        self.config = config
        self.lang = self.config.lang
        self.engine_type = "online"

        assert (
            config.am == "fastspeech2_csmsc" or
            config.am == "fastspeech2_cnndecoder_csmsc"
        ) and (
            config.voc == "hifigan_csmsc" or config.voc == "mb_melgan_csmsc"
        ), 'Please check config, am support: fastspeech2, voc support: hifigan_csmsc-zh or mb_melgan_csmsc.'

        assert (
            config.voc_block > 0 and config.voc_pad > 0
        ), "Please set correct voc_block and voc_pad, they should be more than 0."

        try:
            if self.config.device is not None:
                self.device = self.config.device
            else:
                self.device = paddle.get_device()
            paddle.set_device(self.device)
        except Exception as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error("Initialize TTS server engine Failed on device: %s." %
                         (self.device))
            logger.error(e)
            return False

        try:
            self.executor._init_from_path(
                am=self.config.am,
                am_config=self.config.am_config,
                am_ckpt=self.config.am_ckpt,
                am_stat=self.config.am_stat,
                phones_dict=self.config.phones_dict,
                tones_dict=self.config.tones_dict,
                speaker_dict=self.config.speaker_dict,
                voc=self.config.voc,
                voc_config=self.config.voc_config,
                voc_ckpt=self.config.voc_ckpt,
                voc_stat=self.config.voc_stat,
                lang=self.config.lang)
        except Exception as e:
            logger.error("Failed to get model related files.")
            logger.error("Initialize TTS server engine Failed on device: %s." %
                         (self.device))
            logger.error(e)
            return False

        assert (
            self.executor.am_config.fs == self.executor.voc_config.fs
        ), "The sample rate of AM and Vocoder model are different, please check model."

        self.sample_rate = self.executor.am_config.fs

        self.am_block = self.config.am_block
        self.am_pad = self.config.am_pad
        self.voc_block = self.config.voc_block
        self.voc_pad = self.config.voc_pad
        self.am_upsample = 1
        self.voc_upsample = self.executor.voc_config.n_shift

        logger.info("Initialize TTS server engine successfully on device: %s." %
                    (self.device))

        return True


class PaddleTTSConnectionHandler:
    def __init__(self, tts_engine):
        """The PaddleSpeech TTS Server Connection Handler
           This connection process every tts server request
        Args:
            tts_engine (TTSEngine): The TTS engine
        """
        super().__init__()
        logger.debug(
            "Create PaddleTTSConnectionHandler to process the tts request")

        self.tts_engine = tts_engine
        self.executor = self.tts_engine.executor
        self.config = self.tts_engine.config
        self.am_block = self.tts_engine.am_block
        self.am_pad = self.tts_engine.am_pad
        self.voc_block = self.tts_engine.voc_block
        self.voc_pad = self.tts_engine.voc_pad
        self.am_upsample = self.tts_engine.am_upsample
        self.voc_upsample = self.tts_engine.voc_upsample

    def depadding(self, data, chunk_num, chunk_id, block, pad, upsample):
        """ 
        Streaming inference removes the result of pad inference
        """
        front_pad = min(chunk_id * block, pad)
        # first chunk
        if chunk_id == 0:
            data = data[:block * upsample]
        # last chunk
        elif chunk_id == chunk_num - 1:
            data = data[front_pad * upsample:]
        # middle chunk
        else:
            data = data[front_pad * upsample:(front_pad + block) * upsample]

        return data

    @paddle.no_grad()
    def infer(
            self,
            text: str,
            lang: str='zh',
            am: str='fastspeech2_csmsc',
            spk_id: int=0, ):
        """
        Model inference and result stored in self.output.
        """

        # first_flag 用于标记首包
        first_flag = 1

        get_tone_ids = False
        merge_sentences = False
        frontend_st = time.time()
        if lang == 'zh':
            input_ids = self.executor.frontend.get_input_ids(
                text,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids)
            phone_ids = input_ids["phone_ids"]
            if get_tone_ids:
                tone_ids = input_ids["tone_ids"]
        elif lang == 'en':
            input_ids = self.executor.frontend.get_input_ids(
                text, merge_sentences=merge_sentences)
            phone_ids = input_ids["phone_ids"]
        else:
            logger.error("lang should in {'zh', 'en'}!")
        frontend_et = time.time()
        self.frontend_time = frontend_et - frontend_st

        for i in range(len(phone_ids)):
            part_phone_ids = phone_ids[i]
            voc_chunk_id = 0

            # fastspeech2_csmsc
            if am == "fastspeech2_csmsc":
                # am 
                mel = self.executor.am_inference(part_phone_ids)
                if first_flag == 1:
                    first_am_et = time.time()
                    self.first_am_infer = first_am_et - frontend_et

                # voc streaming
                mel_chunks = get_chunks(mel, self.voc_block, self.voc_pad,
                                        "voc")
                voc_chunk_num = len(mel_chunks)
                voc_st = time.time()
                for i, mel_chunk in enumerate(mel_chunks):
                    sub_wav = self.executor.voc_inference(mel_chunk)
                    sub_wav = self.depadding(sub_wav, voc_chunk_num, i,
                                             self.voc_block, self.voc_pad,
                                             self.voc_upsample)
                    if first_flag == 1:
                        first_voc_et = time.time()
                        self.first_voc_infer = first_voc_et - first_am_et
                        self.first_response_time = first_voc_et - frontend_st
                        first_flag = 0

                    yield sub_wav

            # fastspeech2_cnndecoder_csmsc 
            elif am == "fastspeech2_cnndecoder_csmsc":
                # am 
                orig_hs = self.executor.am_inference.encoder_infer(
                    part_phone_ids)

                # streaming voc chunk info
                mel_len = orig_hs.shape[1]
                voc_chunk_num = math.ceil(mel_len / self.voc_block)
                start = 0
                end = min(self.voc_block + self.voc_pad, mel_len)

                # streaming am
                hss = get_chunks(orig_hs, self.am_block, self.am_pad, "am")
                am_chunk_num = len(hss)
                for i, hs in enumerate(hss):
                    before_outs = self.executor.am_inference.decoder(hs)
                    after_outs = before_outs + self.executor.am_inference.postnet(
                        before_outs.transpose((0, 2, 1))).transpose((0, 2, 1))
                    normalized_mel = after_outs[0]
                    sub_mel = denorm(normalized_mel, self.executor.am_mu,
                                     self.executor.am_std)
                    sub_mel = self.depadding(sub_mel, am_chunk_num, i,
                                             self.am_block, self.am_pad,
                                             self.am_upsample)

                    if i == 0:
                        mel_streaming = sub_mel
                    else:
                        mel_streaming = np.concatenate(
                            (mel_streaming, sub_mel), axis=0)

                    # streaming voc
                    # 当流式AM推理的mel帧数大于流式voc推理的chunk size，开始进行流式voc 推理
                    while (mel_streaming.shape[0] >= end and
                           voc_chunk_id < voc_chunk_num):
                        if first_flag == 1:
                            first_am_et = time.time()
                            self.first_am_infer = first_am_et - frontend_et
                        voc_chunk = mel_streaming[start:end, :]
                        voc_chunk = paddle.to_tensor(voc_chunk)
                        sub_wav = self.executor.voc_inference(voc_chunk)

                        sub_wav = self.depadding(
                            sub_wav, voc_chunk_num, voc_chunk_id,
                            self.voc_block, self.voc_pad, self.voc_upsample)
                        if first_flag == 1:
                            first_voc_et = time.time()
                            self.first_voc_infer = first_voc_et - first_am_et
                            self.first_response_time = first_voc_et - frontend_st
                            first_flag = 0

                        yield sub_wav

                        voc_chunk_id += 1
                        start = max(
                            0, voc_chunk_id * self.voc_block - self.voc_pad)
                        end = min(
                            (voc_chunk_id + 1) * self.voc_block + self.voc_pad,
                            mel_len)

            else:
                logger.error(
                    "Only support fastspeech2_csmsc or fastspeech2_cnndecoder_csmsc on streaming tts."
                )

        self.final_response_time = time.time() - frontend_st

    def run(
            self,
            sentence: str,
            spk_id: int=0, ):
        """ run include inference and postprocess.

        Args:
            sentence (str): text to be synthesized
            spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.

        Returns:
            wav_base64: The base64 format of the synthesized audio.
        """

        wav_list = []

        for wav in self.infer(
                text=sentence,
                lang=self.config.lang,
                am=self.config.am,
                spk_id=spk_id, ):

            # wav type: <class 'numpy.ndarray'>  float32, convert to pcm (base64)
            wav = float2pcm(wav)  # float32 to int16
            wav_bytes = wav.tobytes()  # to bytes
            wav_base64 = base64.b64encode(wav_bytes).decode('utf8')  # to base64
            wav_list.append(wav)

            yield wav_base64

        wav_all = np.concatenate(wav_list, axis=0)
        duration = len(wav_all) / self.tts_engine.sample_rate

        logger.info(f"sentence: {sentence}")
        logger.info(f"The durations of audio is: {duration} s")
        logger.info(f"first response time: {self.first_response_time} s")
        logger.info(f"final response time: {self.final_response_time} s")
        logger.info(f"RTF: {self.final_response_time / duration}")
        logger.info(
            f"Other info: front time: {self.frontend_time} s, first am infer time: {self.first_am_infer} s, first voc infer time: {self.first_voc_infer} s,"
        )


================================================
FILE: paddlespeech/server/engine/tts/paddleinference/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/tts/paddleinference/tts_engine.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import io
import os
import sys
import time
from typing import Optional

import librosa
import numpy as np
import paddle
import soundfile as sf

from paddlespeech.cli.log import logger
from paddlespeech.cli.tts.infer import TTSExecutor
from paddlespeech.resource import CommonTaskResource
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.server.utils.audio_process import change_speed
from paddlespeech.server.utils.errors import ErrorCode
from paddlespeech.server.utils.exception import ServerBaseException
from paddlespeech.server.utils.paddle_predictor import init_predictor
from paddlespeech.server.utils.paddle_predictor import run_model
from paddlespeech.t2s.frontend.en_frontend import English
from paddlespeech.t2s.frontend.zh_frontend import Frontend

__all__ = ['TTSEngine', 'PaddleTTSConnectionHandler']


class TTSServerExecutor(TTSExecutor):
    def __init__(self):
        super().__init__()
        self.task_resource = CommonTaskResource(
            task='tts', model_format='static')

    def _init_from_path(
            self,
            am: str='fastspeech2_csmsc',
            am_model: Optional[os.PathLike]=None,
            am_params: Optional[os.PathLike]=None,
            am_sample_rate: int=24000,
            phones_dict: Optional[os.PathLike]=None,
            tones_dict: Optional[os.PathLike]=None,
            speaker_dict: Optional[os.PathLike]=None,
            voc: str='pwgan_csmsc',
            voc_model: Optional[os.PathLike]=None,
            voc_params: Optional[os.PathLike]=None,
            voc_sample_rate: int=24000,
            lang: str='zh',
            am_predictor_conf: dict=None,
            voc_predictor_conf: dict=None, ):
        """
        Init model and other resources from a specific path.
        """
        if hasattr(self, 'am_predictor') and hasattr(self, 'voc_predictor'):
            logger.debug('Models had been initialized.')
            return
        # am
        if am_model is None or am_params is None or phones_dict is None:
            use_pretrained_am = True
        else:
            use_pretrained_am = False

        am_tag = am + '-' + lang
        self.task_resource.set_task_model(
            model_tag=am_tag,
            model_type=0,  # am
            skip_download=not use_pretrained_am,
            version=None,  # default version
        )
        if use_pretrained_am:
            self.am_res_path = self.task_resource.res_dir
            self.am_model = os.path.join(self.am_res_path,
                                         self.task_resource.res_dict['model'])
            self.am_params = os.path.join(self.am_res_path,
                                          self.task_resource.res_dict['params'])
            # must have phones_dict in acoustic
            self.phones_dict = os.path.join(
                self.am_res_path, self.task_resource.res_dict['phones_dict'])
            self.am_sample_rate = self.task_resource.res_dict['sample_rate']

            logger.debug(self.am_res_path)
            logger.debug(self.am_model)
            logger.debug(self.am_params)
        else:
            self.am_model = os.path.abspath(am_model)
            self.am_params = os.path.abspath(am_params)
            self.phones_dict = os.path.abspath(phones_dict)
            self.am_sample_rate = am_sample_rate
            self.am_res_path = os.path.dirname(os.path.abspath(self.am_model))
        logger.debug("self.phones_dict: {}".format(self.phones_dict))

        # for speedyspeech
        self.tones_dict = None
        if 'tones_dict' in self.task_resource.res_dict:
            self.tones_dict = os.path.join(
                self.am_res_path, self.task_resource.res_dict['tones_dict'])
            if tones_dict:
                self.tones_dict = tones_dict

        # for multi speaker fastspeech2
        self.speaker_dict = None
        if 'speaker_dict' in self.task_resource.res_dict:
            self.speaker_dict = os.path.join(
                self.am_res_path, self.task_resource.res_dict['speaker_dict'])
            if speaker_dict:
                self.speaker_dict = speaker_dict

        # voc
        if voc_model is None or voc_params is None:
            use_pretrained_voc = True
        else:
            use_pretrained_voc = False

        voc_tag = voc + '-' + lang
        self.task_resource.set_task_model(
            model_tag=voc_tag,
            model_type=1,  # vocoder
            skip_download=not use_pretrained_voc,
            version=None,  # default version
        )
        if use_pretrained_voc:
            self.voc_res_path = self.task_resource.voc_res_dir
            self.voc_model = os.path.join(
                self.voc_res_path, self.task_resource.voc_res_dict['model'])
            self.voc_params = os.path.join(
                self.voc_res_path, self.task_resource.voc_res_dict['params'])
            self.voc_sample_rate = self.task_resource.voc_res_dict[
                'sample_rate']
            logger.debug(self.voc_res_path)
            logger.debug(self.voc_model)
            logger.debug(self.voc_params)
        else:
            self.voc_model = os.path.abspath(voc_model)
            self.voc_params = os.path.abspath(voc_params)
            self.voc_sample_rate = voc_sample_rate
            self.voc_res_path = os.path.dirname(os.path.abspath(self.voc_model))

        assert (
            self.voc_sample_rate == self.am_sample_rate
        ), "The sample rate of AM and Vocoder model are different, please check model."

        # Init body.
        with open(self.phones_dict, "r") as f:
            phn_id = [line.strip().split() for line in f.readlines()]
        vocab_size = len(phn_id)
        logger.debug("vocab_size: {}".format(vocab_size))

        tone_size = None
        if self.tones_dict:
            with open(self.tones_dict, "r") as f:
                tone_id = [line.strip().split() for line in f.readlines()]
            tone_size = len(tone_id)
            logger.debug("tone_size: {}".format(tone_size))

        spk_num = None
        if self.speaker_dict:
            with open(self.speaker_dict, 'rt') as f:
                spk_id = [line.strip().split() for line in f.readlines()]
            spk_num = len(spk_id)
            logger.debug("spk_num: {}".format(spk_num))

        # frontend
        if lang == 'zh':
            self.frontend = Frontend(
                phone_vocab_path=self.phones_dict,
                tone_vocab_path=self.tones_dict)

        elif lang == 'en':
            self.frontend = English(phone_vocab_path=self.phones_dict)
        logger.debug("frontend done!")

        # Create am predictor
        self.am_predictor_conf = am_predictor_conf
        self.am_predictor = init_predictor(
            model_file=self.am_model,
            params_file=self.am_params,
            predictor_conf=self.am_predictor_conf)
        logger.debug("Create AM predictor successfully.")

        # Create voc predictor
        self.voc_predictor_conf = voc_predictor_conf
        self.voc_predictor = init_predictor(
            model_file=self.voc_model,
            params_file=self.voc_params,
            predictor_conf=self.voc_predictor_conf)
        logger.debug("Create Vocoder predictor successfully.")

    @paddle.no_grad()
    def infer(self,
              text: str,
              lang: str='zh',
              am: str='fastspeech2_csmsc',
              spk_id: int=0):
        """
        Model inference and result stored in self.output.
        """
        am_name = am[:am.rindex('_')]
        am_dataset = am[am.rindex('_') + 1:]
        get_tone_ids = False
        merge_sentences = False
        frontend_st = time.time()
        if am_name == 'speedyspeech':
            get_tone_ids = True
        if lang == 'zh':
            input_ids = self.frontend.get_input_ids(
                text,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids)
            phone_ids = input_ids["phone_ids"]
            if get_tone_ids:
                tone_ids = input_ids["tone_ids"]
        elif lang == 'en':
            input_ids = self.frontend.get_input_ids(
                text, merge_sentences=merge_sentences)
            phone_ids = input_ids["phone_ids"]
        else:
            logger.error("lang should in {'zh', 'en'}!")
        self.frontend_time = time.time() - frontend_st

        self.am_time = 0
        self.voc_time = 0
        flags = 0
        for i in range(len(phone_ids)):
            am_st = time.time()
            part_phone_ids = phone_ids[i]
            # am
            if am_name == 'speedyspeech':
                part_tone_ids = tone_ids[i]
                am_result = run_model(
                    self.am_predictor,
                    [part_phone_ids.numpy(), part_tone_ids.numpy()])
                mel = am_result[0]

            # fastspeech2
            else:
                # multi speaker  do not have static model
                if am_dataset in {"aishell3", "vctk"}:
                    am_result = run_model(
                        self.am_predictor,
                        [part_phone_ids.numpy(), np.array([spk_id])])
                else:
                    am_result = run_model(self.am_predictor,
                                          [part_phone_ids.numpy()])
                mel = am_result[0]
            self.am_time += (time.time() - am_st)

            # voc
            voc_st = time.time()
            voc_result = run_model(self.voc_predictor, [mel])
            wav = voc_result[0]
            wav = paddle.to_tensor(wav)

            if flags == 0:
                wav_all = wav
                flags = 1
            else:
                wav_all = paddle.concat([wav_all, wav])
            self.voc_time += (time.time() - voc_st)
        self._outputs["wav"] = wav_all


class TTSEngine(BaseEngine):
    """TTS server engine

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self):
        """Initialize TTS server engine
        """
        super(TTSEngine, self).__init__()

    def init(self, config: dict) -> bool:
        self.executor = TTSServerExecutor()
        self.config = config
        self.lang = self.config.lang
        self.engine_type = "inference"

        try:
            if self.config.am_predictor_conf.device is not None:
                self.device = self.config.am_predictor_conf.device
            elif self.config.voc_predictor_conf.device is not None:
                self.device = self.config.voc_predictor_conf.device
            else:
                self.device = paddle.get_device()
            paddle.set_device(self.device)
        except Exception as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error("Initialize TTS server engine Failed on device: %s." %
                         (self.device))
            logger.error(e)
            return False

        try:
            self.executor._init_from_path(
                am=self.config.am,
                am_model=self.config.am_model,
                am_params=self.config.am_params,
                am_sample_rate=self.config.am_sample_rate,
                phones_dict=self.config.phones_dict,
                tones_dict=self.config.tones_dict,
                speaker_dict=self.config.speaker_dict,
                voc=self.config.voc,
                voc_model=self.config.voc_model,
                voc_params=self.config.voc_params,
                voc_sample_rate=self.config.voc_sample_rate,
                lang=self.config.lang,
                am_predictor_conf=self.config.am_predictor_conf,
                voc_predictor_conf=self.config.voc_predictor_conf, )
        except Exception as e:
            logger.error("Failed to get model related files.")
            logger.error("Initialize TTS server engine Failed on device: %s." %
                         (self.device))
            logger.error(e)
            return False

        logger.info("Initialize TTS server engine successfully on device: %s." %
                    (self.device))
        return True


class PaddleTTSConnectionHandler(TTSServerExecutor):
    def __init__(self, tts_engine):
        """The PaddleSpeech TTS Server Connection Handler
           This connection process every tts server request
        Args:
            tts_engine (TTSEngine): The TTS engine
        """
        super().__init__()
        logger.debug(
            "Create PaddleTTSConnectionHandler to process the tts request")

        self.tts_engine = tts_engine
        self.executor = self.tts_engine.executor
        self.config = self.tts_engine.config
        self.frontend = self.executor.frontend
        self.am_predictor = self.executor.am_predictor
        self.voc_predictor = self.executor.voc_predictor

    def postprocess(self,
                    wav,
                    original_fs: int,
                    target_fs: int=0,
                    volume: float=1.0,
                    speed: float=1.0,
                    audio_path: str=None):
        """Post-processing operations, including speech, volume, sample rate, save audio file

        Args:
            wav (numpy(float)): Synthesized audio sample points
            original_fs (int): original audio sample rate
            target_fs (int): target audio sample rate
            volume (float): target volume
            speed (float): target speed

        Raises:
            ServerBaseException: Throws an exception if the change speed unsuccessfully.

        Returns:
            target_fs: target sample rate for synthesized audio.
            wav_base64: The base64 format of the synthesized audio.
        """

        # transform sample_rate
        if target_fs == 0 or target_fs > original_fs:
            target_fs = original_fs
            wav_tar_fs = wav
            logger.debug(
                "The sample rate of synthesized audio is the same as model, which is {}Hz".
                format(original_fs))
        else:
            wav_tar_fs = librosa.resample(
                np.squeeze(wav), original_fs, target_fs)
            logger.debug(
                "The sample rate of model is {}Hz and the target sample rate is {}Hz. Converting the sample rate of the synthesized audio successfully.".
                format(original_fs, target_fs))
        # transform volume
        wav_vol = wav_tar_fs * volume
        logger.debug("Transform the volume of the audio successfully.")

        # transform speed
        try:  # windows not support soxbindings
            wav_speed = change_speed(wav_vol, speed, target_fs)
            logger.debug("Transform the speed of the audio successfully.")
        except ServerBaseException:
            raise ServerBaseException(
                ErrorCode.SERVER_INTERNAL_ERR,
                "Failed to transform speed. Can not install soxbindings on your system. \
                 You need to set speed value 1.0.")
            sys.exit(-1)
        except Exception as e:
            logger.error("Failed to transform speed.")
            logger.error(e)
            sys.exit(-1)

        # wav to base64
        buf = io.BytesIO()
        sf.write(buf, wav_speed, target_fs, format="wav")
        buf.seek(0)
        base64_bytes = base64.b64encode(buf.read())
        wav_base64 = base64_bytes.decode('utf-8')
        logger.debug("Audio to string successfully.")

        # save audio
        if audio_path is not None:
            if audio_path.endswith(".wav"):
                sf.write(audio_path, wav_speed, target_fs)
            elif audio_path.endswith(".pcm"):
                wav_norm = wav_speed * (32767 / max(0.001,
                                                    np.max(np.abs(wav_speed))))
                with open(audio_path, "wb") as f:
                    f.write(wav_norm.astype(np.int16))
            logger.info("Save audio to {} successfully.".format(audio_path))
        else:
            logger.info("There is no need to save audio.")

        return target_fs, wav_base64

    def run(self,
            sentence: str,
            spk_id: int=0,
            speed: float=1.0,
            volume: float=1.0,
            sample_rate: int=0,
            save_path: str=None):
        """get the result of the server response

        Args:
            sentence (str): sentence to be synthesized
            spk_id (int, optional): speaker id. Defaults to 0.
            speed (float, optional): audio speed, 0 < speed <=3.0. Defaults to 1.0.
            volume (float, optional): The volume relative to the audio synthesized by the model, 
            0 < volume <=3.0. Defaults to 1.0.
            sample_rate (int, optional): Set the sample rate of the synthesized audio. 
            0 represents the sample rate for model synthesis. Defaults to 0.
            save_path (str, optional): The save path of the synthesized audio. Defaults to None.

        Raises:
            ServerBaseException: Throws an exception if tts inference unsuccessfully.
            ServerBaseException: Throws an exception if postprocess unsuccessfully.

        Returns:
            lang: model language 
            target_sample_rate: target sample rate for synthesized audio.
            wav_base64: The base64 format of the synthesized audio.
        """

        lang = self.config.lang

        try:
            infer_st = time.time()
            self.infer(
                text=sentence, lang=lang, am=self.config.am, spk_id=spk_id)
            infer_et = time.time()
            infer_time = infer_et - infer_st

        except ServerBaseException:
            raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR,
                                      "tts infer failed.")
            sys.exit(-1)
        except Exception as e:
            logger.error("tts infer failed.")
            logger.error(e)
            sys.exit(-1)

        try:
            postprocess_st = time.time()
            target_sample_rate, wav_base64 = self.postprocess(
                wav=self._outputs["wav"].numpy(),
                original_fs=self.executor.am_sample_rate,
                target_fs=sample_rate,
                volume=volume,
                speed=speed,
                audio_path=save_path)
            postprocess_et = time.time()
            postprocess_time = postprocess_et - postprocess_st
            duration = len(
                self._outputs["wav"].numpy()) / self.executor.am_sample_rate
            rtf = infer_time / duration

        except ServerBaseException:
            raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR,
                                      "tts postprocess failed.")
            sys.exit(-1)
        except Exception as e:
            logger.error("tts postprocess failed.")
            logger.error(e)
            sys.exit(-1)

        logger.debug("AM model: {}".format(self.config.am))
        logger.debug("Vocoder model: {}".format(self.config.voc))
        logger.debug("Language: {}".format(lang))
        logger.info("tts engine type: python")

        logger.info("audio duration: {}".format(duration))
        logger.debug("frontend inference time: {}".format(self.frontend_time))
        logger.debug("AM inference time: {}".format(self.am_time))
        logger.debug("Vocoder inference time: {}".format(self.voc_time))
        logger.info("total inference time: {}".format(infer_time))
        logger.info(
            "postprocess (change speed, volume, target sample rate) time: {}".
            format(postprocess_time))
        logger.info("total generate audio time: {}".format(infer_time +
                                                           postprocess_time))
        logger.info("RTF: {}".format(rtf))
        logger.debug("device: {}".format(self.tts_engine.device))

        return lang, target_sample_rate, duration, wav_base64


================================================
FILE: paddlespeech/server/engine/tts/python/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/tts/python/tts_engine.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import io
import sys
import time

import librosa
import numpy as np
import paddle
import soundfile as sf

from paddlespeech.cli.log import logger
from paddlespeech.cli.tts.infer import TTSExecutor
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.server.utils.audio_process import change_speed
from paddlespeech.server.utils.errors import ErrorCode
from paddlespeech.server.utils.exception import ServerBaseException

__all__ = ['TTSEngine', 'PaddleTTSConnectionHandler']


class TTSServerExecutor(TTSExecutor):
    def __init__(self):
        super().__init__()
        pass


class TTSEngine(BaseEngine):
    """TTS server engine

    Args:
        metaclass: Defaults to Singleton.
    """

    def __init__(self, name=None):
        """Initialize TTS server engine
        """
        super(TTSEngine, self).__init__()

    def init(self, config: dict) -> bool:
        self.executor = TTSServerExecutor()
        self.config = config
        self.lang = self.config.lang
        self.engine_type = "python"

        try:
            if self.config.device is not None:
                self.device = self.config.device
            else:
                self.device = paddle.get_device()
            paddle.set_device(self.device)
        except Exception as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error("Initialize TTS server engine Failed on device: %s." %
                         (self.device))
            logger.error(e)
            return False

        try:
            self.executor._init_from_path(
                am=self.config.am,
                am_config=self.config.am_config,
                am_ckpt=self.config.am_ckpt,
                am_stat=self.config.am_stat,
                phones_dict=self.config.phones_dict,
                tones_dict=self.config.tones_dict,
                speaker_dict=self.config.speaker_dict,
                voc=self.config.voc,
                voc_config=self.config.voc_config,
                voc_ckpt=self.config.voc_ckpt,
                voc_stat=self.config.voc_stat,
                lang=self.config.lang)
        except Exception as e:
            logger.error("Failed to get model related files.")
            logger.error("Initialize TTS server engine Failed on device: %s." %
                         (self.device))
            logger.error(e)
            return False

        logger.info("Initialize TTS server engine successfully on device: %s." %
                    (self.device))
        return True


class PaddleTTSConnectionHandler(TTSServerExecutor):
    def __init__(self, tts_engine):
        """The PaddleSpeech TTS Server Connection Handler
           This connection process every tts server request
        Args:
            tts_engine (TTSEngine): The TTS engine
        """
        super().__init__()
        logger.debug(
            "Create PaddleTTSConnectionHandler to process the tts request")

        self.tts_engine = tts_engine
        self.executor = self.tts_engine.executor
        self.config = self.tts_engine.config
        self.frontend = self.executor.frontend
        self.am_inference = self.executor.am_inference
        self.voc_inference = self.executor.voc_inference

    def postprocess(self,
                    wav,
                    original_fs: int,
                    target_fs: int=0,
                    volume: float=1.0,
                    speed: float=1.0,
                    audio_path: str=None):
        """Post-processing operations, including speech, volume, sample rate, save audio file

        Args:
            wav (numpy(float)): Synthesized audio sample points
            original_fs (int): original audio sample rate
            target_fs (int): target audio sample rate
            volume (float): target volume
            speed (float): target speed

        Raises:
            ServerBaseException: Throws an exception if the change speed unsuccessfully.

        Returns:
            target_fs: target sample rate for synthesized audio.
            wav_base64: The base64 format of the synthesized audio.
        """

        # transform sample_rate
        if target_fs == 0 or target_fs > original_fs:
            target_fs = original_fs
            wav_tar_fs = wav
            logger.debug(
                "The sample rate of synthesized audio is the same as model, which is {}Hz".
                format(original_fs))
        else:
            wav_tar_fs = librosa.resample(
                np.squeeze(wav), orig_sr=original_fs, target_sr=target_fs)
            logger.debug(
                "The sample rate of model is {}Hz and the target sample rate is {}Hz. Converting the sample rate of the synthesized audio successfully.".
                format(original_fs, target_fs))
        # transform volume
        wav_vol = wav_tar_fs * volume
        logger.debug("Transform the volume of the audio successfully.")

        # transform speed
        try:  # windows not support soxbindings
            wav_speed = change_speed(wav_vol, speed, target_fs)
            logger.debug("Transform the speed of the audio successfully.")
        except ServerBaseException:
            raise ServerBaseException(
                ErrorCode.SERVER_INTERNAL_ERR,
                "Failed to transform speed. Can not install soxbindings on your system. \
                 You need to set speed value 1.0.")
            sys.exit(-1)
        except Exception as e:
            logger.error("Failed to transform speed.")
            logger.error(e)
            sys.exit(-1)

        # wav to base64
        buf = io.BytesIO()
        sf.write(buf, wav_speed, target_fs, format="wav")
        buf.seek(0)

        base64_bytes = base64.b64encode(buf.read())
        wav_base64 = base64_bytes.decode('utf-8')
        logger.debug("Audio to string successfully.")

        # save audio
        if audio_path is not None:
            if audio_path.endswith(".wav"):
                sf.write(audio_path, wav_speed, target_fs)
            elif audio_path.endswith(".pcm"):
                wav_norm = wav_speed * (32767 / max(0.001,
                                                    np.max(np.abs(wav_speed))))
                with open(audio_path, "wb") as f:
                    f.write(wav_norm.astype(np.int16))
            logger.info("Save audio to {} successfully.".format(audio_path))
        else:
            logger.info("There is no need to save audio.")

        return target_fs, wav_base64

    def run(self,
            sentence: str,
            spk_id: int=0,
            speed: float=1.0,
            volume: float=1.0,
            sample_rate: int=0,
            save_path: str=None):
        """ run include inference and postprocess.

        Args:
            sentence (str): text to be synthesized
            spk_id (int, optional): speaker id for multi-speaker speech synthesis. Defaults to 0.
            speed (float, optional): speed. Defaults to 1.0.
            volume (float, optional): volume. Defaults to 1.0.
            sample_rate (int, optional): target sample rate for synthesized audio, 
            0 means the same as the model sampling rate. Defaults to 0.
            save_path (str, optional): The save path of the synthesized audio. 
            None means do not save audio. Defaults to None.

        Raises:
            ServerBaseException: Throws an exception if tts inference unsuccessfully.
            ServerBaseException: Throws an exception if postprocess unsuccessfully.

        Returns:
            lang: model language 
            target_sample_rate: target sample rate for synthesized audio.
            wav_base64: The base64 format of the synthesized audio.
        """

        lang = self.config.lang

        try:
            infer_st = time.time()
            self.infer(
                text=sentence, lang=lang, am=self.config.am, spk_id=spk_id)
            infer_et = time.time()
            infer_time = infer_et - infer_st
            duration = len(
                self._outputs["wav"].numpy()) / self.executor.am_config.fs
            rtf = infer_time / duration

        except ServerBaseException:
            raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR,
                                      "tts infer failed.")
            sys.exit(-1)
        except Exception as e:
            logger.error("tts infer failed.")
            logger.error(e)
            sys.exit(-1)

        try:
            postprocess_st = time.time()
            target_sample_rate, wav_base64 = self.postprocess(
                wav=self._outputs["wav"].numpy(),
                original_fs=self.executor.am_config.fs,
                target_fs=sample_rate,
                volume=volume,
                speed=speed,
                audio_path=save_path)
            postprocess_et = time.time()
            postprocess_time = postprocess_et - postprocess_st

        except ServerBaseException:
            raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR,
                                      "tts postprocess failed.")
            sys.exit(-1)
        except Exception as e:
            logger.error("tts postprocess failed.")
            logger.error(e)
            sys.exit(-1)

        logger.debug("AM model: {}".format(self.config.am))
        logger.debug("Vocoder model: {}".format(self.config.voc))
        logger.debug("Language: {}".format(lang))
        logger.info("tts engine type: python")

        logger.info("audio duration: {}".format(duration))
        logger.debug("frontend inference time: {}".format(self.frontend_time))
        logger.debug("AM inference time: {}".format(self.am_time))
        logger.debug("Vocoder inference time: {}".format(self.voc_time))
        logger.info("total inference time: {}".format(infer_time))
        logger.info(
            "postprocess (change speed, volume, target sample rate) time: {}".
            format(postprocess_time))
        logger.info("total generate audio time: {}".format(infer_time +
                                                           postprocess_time))
        logger.info("RTF: {}".format(rtf))
        logger.debug("device: {}".format(self.tts_engine.device))

        return lang, target_sample_rate, duration, wav_base64


================================================
FILE: paddlespeech/server/engine/vector/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/vector/python/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/engine/vector/python/vector_engine.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import io
from collections import OrderedDict

import numpy as np
import paddle

from paddlespeech.audio.backends import soundfile_load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.cli.log import logger
from paddlespeech.cli.vector.infer import VectorExecutor
from paddlespeech.server.engine.base_engine import BaseEngine
from paddlespeech.vector.io.batch import feature_normalize


class PaddleVectorConnectionHandler:
    def __init__(self, vector_engine):
        """The PaddleSpeech Vector Server Connection Handler
           This connection process every server request
        Args:
            vector_engine (VectorEngine): The Vector engine
        """
        super().__init__()
        logger.debug(
            "Create PaddleVectorConnectionHandler to process the vector request")
        self.vector_engine = vector_engine
        self.executor = self.vector_engine.executor
        self.task = self.vector_engine.executor.task
        self.model = self.vector_engine.executor.model
        self.config = self.vector_engine.executor.config

        self._inputs = OrderedDict()
        self._outputs = OrderedDict()

    @paddle.no_grad()
    def run(self, audio_data, task="spk"):
        """The connection process the http request audio

        Args:
            audio_data (bytes): base64.b64decode

        Returns:
            str: the punctuation text
        """
        logger.debug(
            f"start to extract the do vector {self.task} from the http request")
        if self.task == "spk" and task == "spk":
            embedding = self.extract_audio_embedding(audio_data)
            return embedding
        else:
            logger.error(
                "The request task is not matched with server model task")
            logger.error(
                f"The server model task is: {self.task}, but the request task is: {task}"
            )

        return np.array([
            0.0,
        ])

    @paddle.no_grad()
    def get_enroll_test_score(self, enroll_audio, test_audio):
        """Get the enroll and test audio score

        Args:
            enroll_audio (str): the base64 format enroll audio
            test_audio (str): the base64 format test audio

        Returns:
            float: the score between enroll and test audio
        """
        logger.debug("start to extract the enroll audio embedding")
        enroll_emb = self.extract_audio_embedding(enroll_audio)

        logger.debug("start to extract the test audio embedding")
        test_emb = self.extract_audio_embedding(test_audio)

        logger.debug(
            "start to get the score between the enroll and test embedding")
        score = self.executor.get_embeddings_score(enroll_emb, test_emb)

        logger.debug(f"get the enroll vs test score: {score}")
        return score

    @paddle.no_grad()
    def extract_audio_embedding(self, audio: str, sample_rate: int=16000):
        """extract the audio embedding

        Args:
            audio (str): the audio data
            sample_rate (int, optional): the audio sample rate. Defaults to 16000.
        """
        # we can not reuse the cache io.BytesIO(audio) data, 
        # because the soundfile will change the io.BytesIO(audio) to the end
        # thus we should convert the base64 string to io.BytesIO when we need the audio data
        if not self.executor._check(
                io.BytesIO(audio), sample_rate, force_yes=True):
            logger.debug("check the audio sample rate occurs error")
            return np.array([0.0])

        waveform, sr = load_audio(io.BytesIO(audio))
        logger.debug(
            f"load the audio sample points, shape is: {waveform.shape}")

        # stage 2: get the audio feat
        # Note: Now we only support fbank feature
        try:
            feats = melspectrogram(
                x=waveform,
                sr=self.config.sr,
                n_mels=self.config.n_mels,
                window_size=self.config.window_size,
                hop_length=self.config.hop_size)
            logger.debug(f"extract the audio feats, shape is: {feats.shape}")
        except Exception as e:
            logger.error(f"feats occurs exception {e}")
            sys.exit(-1)

        feats = paddle.to_tensor(feats).unsqueeze(0)
        # in inference period, the lengths is all one without padding
        lengths = paddle.ones([1])

        # stage 3: we do feature normalize,
        #          Now we assume that the feats must do normalize
        feats = feature_normalize(feats, mean_norm=True, std_norm=False)

        # stage 4: store the feats and length in the _inputs,
        #          which will be used in other function
        logger.info(f"feats shape: {feats.shape}")
        logger.info("audio extract the feats success")

        logger.info("start to extract the audio embedding")
        embedding = self.model.backbone(feats, lengths).squeeze().numpy()
        logger.info(f"embedding size: {embedding.shape}")

        return embedding


class VectorServerExecutor(VectorExecutor):
    def __init__(self):
        """The wrapper for TextEcutor
        """
        super().__init__()
        pass


class VectorEngine(BaseEngine):
    def __init__(self):
        """The Vector Engine
        """
        super(VectorEngine, self).__init__()
        logger.debug("Create the VectorEngine Instance")

    def init(self, config: dict):
        """Init the Vector Engine

        Args:
            config (dict): The server configuation

        Returns:
            bool: The engine instance flag
        """
        logger.debug("Init the vector engine")
        try:
            self.config = config
            if self.config.device:
                self.device = self.config.device
            else:
                self.device = paddle.get_device()

            paddle.set_device(self.device)
            logger.debug(f"Vector Engine set the device: {self.device}")
        except BaseException as e:
            logger.error(
                "Set device failed, please check if device is already used and the parameter 'device' in the yaml file"
            )
            logger.error("Initialize Vector server engine Failed on device: %s."
                         % (self.device))
            return False

        self.executor = VectorServerExecutor()

        self.executor._init_from_path(
            model_type=config.model_type,
            cfg_path=config.cfg_path,
            ckpt_path=config.ckpt_path,
            task=config.task)

        logger.info(
            "Initialize Vector server engine successfully on device: %s." %
            (self.device))
        return True


================================================
FILE: paddlespeech/server/entry.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from collections import defaultdict

__all__ = ['server_commands', 'client_commands']


def _CommandDict():
    return defaultdict(_CommandDict)


def server_execute():
    com = server_commands
    idx = 0
    for _argv in (['paddlespeech_server'] + sys.argv[1:]):
        if _argv not in com:
            break
        idx += 1
        com = com[_argv]

    # The method 'execute' of a command instance returns 'True' for a success
    # while 'False' for a failure. Here converts this result into a exit status
    # in bash: 0 for a success and 1 for a failure.
    status = 0 if com['_entry']().execute(sys.argv[idx:]) else 1
    return status


def client_execute():
    com = client_commands
    idx = 0
    for _argv in (['paddlespeech_client'] + sys.argv[1:]):
        if _argv not in com:
            break
        idx += 1
        com = com[_argv]

    # The method 'execute' of a command instance returns 'True' for a success
    # while 'False' for a failure. Here converts this result into a exit status
    # in bash: 0 for a success and 1 for a failure.
    status = 0 if com['_entry']().execute(sys.argv[idx:]) else 1
    return status


server_commands = _CommandDict()
client_commands = _CommandDict()


================================================
FILE: paddlespeech/server/executor.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from abc import ABC
from abc import abstractmethod
from typing import List


class BaseExecutor(ABC):
    """
        An abstract executor of paddlespeech server tasks.
    """

    def __init__(self):
        self.parser = argparse.ArgumentParser()

    @abstractmethod
    def execute(self, argv: List[str]) -> bool:
        """
        Command line entry. This method can only be accessed by a command line such as `paddlespeech asr`.

        Args:
            argv (List[str]): Arguments from command line.

        Returns:
            int: Result of the command execution. `True` for a success and `False` for a failure.
        """
        pass

    @abstractmethod
    def __call__(self, *arg, **kwargs):
        """
        Python API to call an executor.
        """
        pass


================================================
FILE: paddlespeech/server/restful/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/restful/acs_api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
from typing import Union

from fastapi import APIRouter

from paddlespeech.cli.log import logger
from paddlespeech.server.engine.engine_pool import get_engine_pool
from paddlespeech.server.restful.request import ASRRequest
from paddlespeech.server.restful.response import ACSResponse
from paddlespeech.server.restful.response import ErrorResponse
from paddlespeech.server.utils.errors import ErrorCode
from paddlespeech.server.utils.errors import failed_response
from paddlespeech.server.utils.exception import ServerBaseException

router = APIRouter()


@router.get('/paddlespeech/asr/search/help')
def help():
    """help

    Returns:
        json: the audio content search result
    """
    response = {
        "success": "True",
        "code": 200,
        "message": {
            "global": "success"
        },
        "result": {
            "description": "acs server",
            "input": "base64 string of wavfile",
            "output": {
                "asr_result": "你好",
                "acs_result": [{
                    'w': '你',
                    'bg': 0.0,
                    'ed': 1.2
                }]
            }
        }
    }
    return response


@router.post(
    "/paddlespeech/asr/search",
    response_model=Union[ACSResponse, ErrorResponse])
def acs(request_body: ASRRequest):
    """acs api 

    Args:
        request_body (ASRRequest): the acs request, we reuse the http ASRRequest

    Returns:
        json: the acs result
    """
    try:
        # 1. get the audio data via base64 decoding
        audio_data = base64.b64decode(request_body.audio)

        # 2. get single engine from engine pool
        engine_pool = get_engine_pool()
        acs_engine = engine_pool['acs']

        # 3. no data stored in acs_engine, so we need to create the another instance process the data
        acs_result, asr_result = acs_engine.run(audio_data)

        response = {
            "success": True,
            "code": 200,
            "message": {
                "description": "success"
            },
            "result": {
                "transcription": asr_result,
                "acs": acs_result
            }
        }

    except ServerBaseException as e:
        response = failed_response(e.error_code, e.msg)
    except BaseException as e:
        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
        logger.error(e)

    return response


================================================
FILE: paddlespeech/server/restful/api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from typing import List

from fastapi import APIRouter

from paddlespeech.cli.log import logger
from paddlespeech.server.restful.acs_api import router as acs_router
from paddlespeech.server.restful.asr_api import router as asr_router
from paddlespeech.server.restful.cls_api import router as cls_router
from paddlespeech.server.restful.text_api import router as text_router
from paddlespeech.server.restful.tts_api import router as tts_router
from paddlespeech.server.restful.vector_api import router as vec_router
_router = APIRouter()


def setup_router(api_list: List):
    """setup router for fastapi

    Args:
        api_list (List): [asr, tts, cls, text, vecotr]

    Returns:
        APIRouter
    """
    for api_name in api_list:
        if api_name.lower() == 'asr':
            _router.include_router(asr_router)
        elif api_name.lower() == 'tts':
            _router.include_router(tts_router)
        elif api_name.lower() == 'cls':
            _router.include_router(cls_router)
        elif api_name.lower() == 'text':
            _router.include_router(text_router)
        elif api_name.lower() == 'vector':
            _router.include_router(vec_router)
        elif api_name.lower() == 'acs':
            _router.include_router(acs_router)
        else:
            logger.error(
                f"PaddleSpeech has not support such service: {api_name}")
            sys.exit(-1)

    return _router


================================================
FILE: paddlespeech/server/restful/asr_api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import sys
import traceback
from typing import Union

from fastapi import APIRouter

from paddlespeech.cli.log import logger
from paddlespeech.server.engine.engine_pool import get_engine_pool
from paddlespeech.server.restful.request import ASRRequest
from paddlespeech.server.restful.response import ASRResponse
from paddlespeech.server.restful.response import ErrorResponse
from paddlespeech.server.utils.errors import ErrorCode
from paddlespeech.server.utils.errors import failed_response
from paddlespeech.server.utils.exception import ServerBaseException

router = APIRouter()


@router.get('/paddlespeech/asr/help')
def help():
    """help

    Returns:
        json: [description]
    """
    response = {
        "success": "True",
        "code": 200,
        "message": {
            "global": "success"
        },
        "result": {
            "description": "asr server",
            "input": "base64 string of wavfile",
            "output": "transcription"
        }
    }
    return response


@router.post(
    "/paddlespeech/asr", response_model=Union[ASRResponse, ErrorResponse])
def asr(request_body: ASRRequest):
    """asr api 

    Args:
        request_body (ASRRequest): [description]

    Returns:
        json: [description]
    """
    try:
        audio_data = base64.b64decode(request_body.audio)

        # get single engine from engine pool
        engine_pool = get_engine_pool()
        asr_engine = engine_pool['asr']

        if asr_engine.engine_type == "python":
            from paddlespeech.server.engine.asr.python.asr_engine import PaddleASRConnectionHandler
        elif asr_engine.engine_type == "inference":
            from paddlespeech.server.engine.asr.paddleinference.asr_engine import PaddleASRConnectionHandler
        else:
            logger.error("Offline asr engine only support python or inference.")
            sys.exit(-1)

        connection_handler = PaddleASRConnectionHandler(asr_engine)

        connection_handler.run(audio_data)
        asr_results = connection_handler.postprocess()

        response = {
            "success": True,
            "code": 200,
            "message": {
                "description": "success"
            },
            "result": {
                "transcription": asr_results
            }
        }

    except ServerBaseException as e:
        response = failed_response(e.error_code, e.msg)
    except BaseException:
        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
        traceback.print_exc()

    return response


================================================
FILE: paddlespeech/server/restful/cls_api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import sys
import traceback
from typing import Union

from fastapi import APIRouter

from paddlespeech.cli.log import logger
from paddlespeech.server.engine.engine_pool import get_engine_pool
from paddlespeech.server.restful.request import CLSRequest
from paddlespeech.server.restful.response import CLSResponse
from paddlespeech.server.restful.response import ErrorResponse
from paddlespeech.server.utils.errors import ErrorCode
from paddlespeech.server.utils.errors import failed_response
from paddlespeech.server.utils.exception import ServerBaseException

router = APIRouter()


@router.get('/paddlespeech/cls/help')
def help():
    """help

    Returns:
        json: [description]
    """
    response = {
        "success": "True",
        "code": 200,
        "message": {
            "global": "success"
        },
        "result": {
            "description": "cls server",
            "input": "base64 string of wavfile",
            "output": "classification result"
        }
    }
    return response


@router.post(
    "/paddlespeech/cls", response_model=Union[CLSResponse, ErrorResponse])
def cls(request_body: CLSRequest):
    """cls api 

    Args:
        request_body (CLSRequest): [description]

    Returns:
        json: [description]
    """
    try:
        audio_data = base64.b64decode(request_body.audio)

        # get single engine from engine pool
        engine_pool = get_engine_pool()
        cls_engine = engine_pool['cls']

        if cls_engine.engine_type == "python":
            from paddlespeech.server.engine.cls.python.cls_engine import PaddleCLSConnectionHandler
        elif cls_engine.engine_type == "inference":
            from paddlespeech.server.engine.cls.paddleinference.cls_engine import PaddleCLSConnectionHandler
        else:
            logger.error("Offline cls engine only support python or inference.")
            sys.exit(-1)

        connection_handler = PaddleCLSConnectionHandler(cls_engine)

        connection_handler.run(audio_data)
        cls_results = connection_handler.postprocess(request_body.topk)

        response = {
            "success": True,
            "code": 200,
            "message": {
                "description": "success"
            },
            "result": {
                "topk": request_body.topk,
                "results": cls_results
            }
        }

    except ServerBaseException as e:
        response = failed_response(e.error_code, e.msg)
        logger.error(e)
        sys.exit(-1)
    except Exception as e:
        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
        logger.error(e)
        traceback.print_exc()

    return response


================================================
FILE: paddlespeech/server/restful/request.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional

from pydantic import BaseModel

__all__ = [
    'ASRRequest', 'TTSRequest', 'CLSRequest', 'VectorRequest',
    'VectorScoreRequest'
]


#****************************************************************************************/
#************************************ ASR request ***************************************/
#****************************************************************************************/
class ASRRequest(BaseModel):
    """
    request body example
    {
        "audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...",
        "audio_format": "wav",
        "sample_rate": 16000,
        "lang": "zh_cn",
        "punc":false
    }
    """
    audio: str
    audio_format: str
    sample_rate: int
    lang: str
    punc: Optional[bool] = None


#****************************************************************************************/
#************************************ TTS request ***************************************/
#****************************************************************************************/
class TTSRequest(BaseModel):
    """TTS request

    request body example
    {
        "text": "你好，欢迎使用百度飞桨语音合成服务。",
        "spk_id": 0,
        "speed": 1.0,
        "volume": 1.0,
        "sample_rate": 0,
        "tts_audio_path": "./tts.wav"
    }
    
    """

    text: str
    spk_id: int = 0
    speed: float = 1.0
    volume: float = 1.0
    sample_rate: int = 0
    save_path: Optional[str] = None


#****************************************************************************************/
#************************************ CLS request ***************************************/
#****************************************************************************************/
class CLSRequest(BaseModel):
    """
    request body example
    {
        "audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...",
        "topk": 1
    }
    """
    audio: str
    topk: int = 1


#****************************************************************************************/
#************************************ Text request **************************************/
#****************************************************************************************/
class TextRequest(BaseModel):
    text: str


#****************************************************************************************/
#************************************ Vecotr request ************************************/
#****************************************************************************************/
class VectorRequest(BaseModel):
    """
    request body example
    {
        "audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...",
        "task": "spk",
        "audio_format": "wav",
        "sample_rate": 16000,
    }
    """
    audio: str
    task: str
    audio_format: str
    sample_rate: int


class VectorScoreRequest(BaseModel):
    """
    request body example
    {
        "enroll_audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...",
        "test_audio": "exSI6ICJlbiIsCgkgICAgInBvc2l0aW9uIjogImZhbHNlIgoJf...",
        "task": "score",
        "audio_format": "wav",
        "sample_rate": 16000,
    }
    """
    enroll_audio: str
    test_audio: str
    task: str
    audio_format: str
    sample_rate: int


================================================
FILE: paddlespeech/server/restful/response.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List
from typing import Optional

from pydantic import BaseModel

__all__ = [
    'ASRResponse', 'TTSResponse', 'CLSResponse', 'TextResponse',
    'VectorResponse', 'VectorScoreResponse', 'ACSResponse'
]


class Message(BaseModel):
    description: str


#****************************************************************************************/
#************************************ ASR response **************************************/
#****************************************************************************************/
class AsrResult(BaseModel):
    transcription: str


class ASRResponse(BaseModel):
    """
    response example
    {
        "success": true,
        "code": 0,
        "message": {
            "description": "success" 
        },
        "result": {
            "transcription": "你好，飞桨"
        }
    }
    """
    success: bool
    code: int
    message: Message
    result: AsrResult


#****************************************************************************************/
#************************************ TTS response **************************************/
#****************************************************************************************/
class TTSResult(BaseModel):
    lang: str = "zh"
    spk_id: int = 0
    speed: float = 1.0
    volume: float = 1.0
    sample_rate: int
    duration: float
    save_path: Optional[str] = None
    audio: str


class TTSResponse(BaseModel):
    """
    response example
    {
        "success": true,
        "code": 200,
        "message": {
            "description": "success" 
        },
        "result": {
            "lang": "zh",
            "spk_id": 0,
            "speed": 1.0,
            "volume": 1.0,
            "sample_rate": 24000,
            "duration": 3.6125,
            "audio": "LTI1OTIuNjI1OTUwMzQsOTk2OS41NDk4...",
            "save_path": "./tts.wav"
        }
    }
    """
    success: bool
    code: int
    message: Message
    result: TTSResult


#****************************************************************************************/
#************************************ CLS response **************************************/
#****************************************************************************************/
class CLSResults(BaseModel):
    class_name: str
    prob: float


class CLSResult(BaseModel):
    topk: int
    results: List[CLSResults]


class CLSResponse(BaseModel):
    """
    response example
    {
        "success": true,
        "code": 0,
        "message": {
            "description": "success" 
        },
        "result": {
            topk: 1
            results: [
            {
                "class":"Speech",
                "prob": 0.9027184844017029
            }
            ]
        }
    }
    """
    success: bool
    code: int
    message: Message
    result: CLSResult


#****************************************************************************************/
#************************************ Text response **************************************/
#****************************************************************************************/


class TextResult(BaseModel):
    punc_text: str


class TextResponse(BaseModel):
    """
    response example
    {
        "success": true,
        "code": 0,
        "message": {
            "description": "success" 
        },
        "result": {
            "punc_text": "你好，飞桨"
        }
    }
    """
    success: bool
    code: int
    message: Message
    result: TextResult


#****************************************************************************************/
#************************************ Vector response **************************************/
#****************************************************************************************/


class VectorResult(BaseModel):
    vec: list


class VectorResponse(BaseModel):
    """
    response example
    {
        "success": true,
        "code": 0,
        "message": {
            "description": "success" 
        },
        "result": {
            "vec": [1.0, 1.0]
        }
    }
    """
    success: bool
    code: int
    message: Message
    result: VectorResult


class VectorScoreResult(BaseModel):
    score: float


class VectorScoreResponse(BaseModel):
    """
    response example
    {
        "success": true,
        "code": 0,
        "message": {
            "description": "success" 
        },
        "result": {
            "score": 1.0
        }
    }
    """
    success: bool
    code: int
    message: Message
    result: VectorScoreResult


#****************************************************************************************/
#********************************** Error response **************************************/
#****************************************************************************************/
class ErrorResponse(BaseModel):
    """
    response example
    {
        "success": false,
        "code": 0,
        "message": {
            "description": "Unknown error occurred."
        }
    }
    """
    success: bool
    code: int
    message: Message


#****************************************************************************************/
#************************************ ACS response **************************************/
#****************************************************************************************/
class AcsResult(BaseModel):
    transcription: str
    acs: list


class ACSResponse(BaseModel):
    """
    response example
    {
        "success": true,
        "code": 0,
        "message": {
            "description": "success" 
        },
        "result": {
            "transcription": "你好，飞桨"
            "acs": [(你好, 0.0, 0.45)]
        }
    }
    """
    success: bool
    code: int
    message: Message
    result: AcsResult


================================================
FILE: paddlespeech/server/restful/text_api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import traceback
from typing import Union

from fastapi import APIRouter

from paddlespeech.cli.log import logger
from paddlespeech.server.engine.engine_pool import get_engine_pool
from paddlespeech.server.engine.text.python.text_engine import PaddleTextConnectionHandler
from paddlespeech.server.restful.request import TextRequest
from paddlespeech.server.restful.response import ErrorResponse
from paddlespeech.server.restful.response import TextResponse
from paddlespeech.server.utils.errors import ErrorCode
from paddlespeech.server.utils.errors import failed_response
from paddlespeech.server.utils.exception import ServerBaseException
router = APIRouter()


@router.get('/paddlespeech/text/help')
def help():
    """help

    Returns:
        json: The /paddlespeech/text api response content
    """
    response = {
        "success": "True",
        "code": 200,
        "message": {
            "global": "success"
        },
        "result": {
            "punc_text": "The punctuation text content"
        }
    }
    return response


@router.post(
    "/paddlespeech/text", response_model=Union[TextResponse, ErrorResponse])
def asr(request_body: TextRequest):
    """asr api 

    Args:
        request_body (TextRequest): the punctuation request body

    Returns:
        json: the punctuation response body
    """
    try:
        # 1. we get the sentence content from the request
        text = request_body.text
        logger.info(f"Text service receive the {text}")

        # 2. get single engine from engine pool
        #    and each request has its own connection to process the text
        engine_pool = get_engine_pool()
        text_engine = engine_pool['text']
        connection_handler = PaddleTextConnectionHandler(text_engine)
        punc_text = connection_handler.run(text)
        logger.info(f"Get the Text Connection result {punc_text}")

        # 3. create the response
        if punc_text is None:
            punc_text = text
        response = {
            "success": True,
            "code": 200,
            "message": {
                "description": "success"
            },
            "result": {
                "punc_text": punc_text
            }
        }

        logger.info(f"The Text Service final response: {response}")
    except ServerBaseException as e:
        response = failed_response(e.error_code, e.msg)
    except BaseException:
        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
        traceback.print_exc()

    return response


================================================
FILE: paddlespeech/server/restful/tts_api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import traceback
from typing import Union

from fastapi import APIRouter
from fastapi.responses import StreamingResponse

from paddlespeech.cli.log import logger
from paddlespeech.server.engine.engine_pool import get_engine_pool
from paddlespeech.server.restful.request import TTSRequest
from paddlespeech.server.restful.response import ErrorResponse
from paddlespeech.server.restful.response import TTSResponse
from paddlespeech.server.utils.errors import ErrorCode
from paddlespeech.server.utils.errors import failed_response
from paddlespeech.server.utils.exception import ServerBaseException

router = APIRouter()


@router.get('/paddlespeech/tts/help')
def help():
    """help

    Returns:
        json: [description]
    """
    response = {
        "success": "True",
        "code": 200,
        "message": {
            "global": "success"
        },
        "result": {
            "description": "tts server",
            "text": "sentence to be synthesized",
            "audio": "the base64 of audio"
        }
    }
    return response


@router.post(
    "/paddlespeech/tts", response_model=Union[TTSResponse, ErrorResponse])
def tts(request_body: TTSRequest):
    """tts api

    Args:
        request_body (TTSRequest): [description]

    Returns:
        json: [description]
    """

    logger.info("request: {}".format(request_body))

    # get params
    text = request_body.text
    spk_id = request_body.spk_id
    speed = request_body.speed
    volume = request_body.volume
    sample_rate = request_body.sample_rate
    save_path = request_body.save_path

    # Check parameters
    if speed <= 0 or speed > 3:
        return failed_response(
            ErrorCode.SERVER_PARAM_ERR,
            "invalid speed value, the value should be between 0 and 3.")
    if volume <= 0 or volume > 3:
        return failed_response(
            ErrorCode.SERVER_PARAM_ERR,
            "invalid volume value, the value should be between 0 and 3.")
    if sample_rate not in [0, 16000, 8000]:
        return failed_response(
            ErrorCode.SERVER_PARAM_ERR,
            "invalid sample_rate value, the choice of value is 0, 8000, 16000.")
    if save_path is not None and not save_path.endswith(
            "pcm") and not save_path.endswith("wav"):
        return failed_response(
            ErrorCode.SERVER_PARAM_ERR,
            "invalid save_path, saved audio formats support pcm and wav")

    # run
    try:
        # get single engine from engine pool
        engine_pool = get_engine_pool()
        tts_engine = engine_pool['tts']
        logger.info("Get tts engine successfully.")

        if tts_engine.engine_type == "python":
            from paddlespeech.server.engine.tts.python.tts_engine import PaddleTTSConnectionHandler
        elif tts_engine.engine_type == "inference":
            from paddlespeech.server.engine.tts.paddleinference.tts_engine import PaddleTTSConnectionHandler
        else:
            logger.error("Offline tts engine only support python or inference.")
            sys.exit(-1)

        connection_handler = PaddleTTSConnectionHandler(tts_engine)
        lang, target_sample_rate, duration, wav_base64 = connection_handler.run(
            text, spk_id, speed, volume, sample_rate, save_path)

        response = {
            "success": True,
            "code": 200,
            "message": {
                "description": "success."
            },
            "result": {
                "lang": lang,
                "spk_id": spk_id,
                "speed": speed,
                "volume": volume,
                "sample_rate": target_sample_rate,
                "duration": duration,
                "save_path": save_path,
                "audio": wav_base64
            }
        }
    except ServerBaseException as e:
        response = failed_response(e.error_code, e.msg)
    except BaseException:
        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
        traceback.print_exc()

    return response


@router.post("/paddlespeech/tts/streaming")
async def stream_tts(request_body: TTSRequest):
    # get params
    text = request_body.text
    spk_id = request_body.spk_id

    engine_pool = get_engine_pool()
    tts_engine = engine_pool['tts']
    logger.info("Get tts engine successfully.")

    if tts_engine.engine_type == "online":
        from paddlespeech.server.engine.tts.online.python.tts_engine import PaddleTTSConnectionHandler
    elif tts_engine.engine_type == "online-onnx":
        from paddlespeech.server.engine.tts.online.onnx.tts_engine import PaddleTTSConnectionHandler
    else:
        logger.error("Online tts engine only support online or online-onnx.")
        sys.exit(-1)

    connection_handler = PaddleTTSConnectionHandler(tts_engine)

    return StreamingResponse(
        connection_handler.run(sentence=text, spk_id=spk_id))


@router.get("/paddlespeech/tts/streaming/samplerate")
def get_samplerate():
    try:
        engine_pool = get_engine_pool()
        tts_engine = engine_pool['tts']
        logger.info("Get tts engine successfully.")
        sample_rate = tts_engine.sample_rate

        response = {"sample_rate": sample_rate}

    except ServerBaseException as e:
        response = failed_response(e.error_code, e.msg)
    except BaseException:
        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
        traceback.print_exc()

    return response


================================================
FILE: paddlespeech/server/restful/vector_api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import traceback
from typing import Union

import numpy as np
from fastapi import APIRouter

from paddlespeech.cli.log import logger
from paddlespeech.server.engine.engine_pool import get_engine_pool
from paddlespeech.server.engine.vector.python.vector_engine import PaddleVectorConnectionHandler
from paddlespeech.server.restful.request import VectorRequest
from paddlespeech.server.restful.request import VectorScoreRequest
from paddlespeech.server.restful.response import ErrorResponse
from paddlespeech.server.restful.response import VectorResponse
from paddlespeech.server.restful.response import VectorScoreResponse
from paddlespeech.server.utils.errors import ErrorCode
from paddlespeech.server.utils.errors import failed_response
from paddlespeech.server.utils.exception import ServerBaseException
router = APIRouter()


@router.get('/paddlespeech/vector/help')
def help():
    """help

    Returns:
        json: The /paddlespeech/vector api response content
    """
    response = {
        "success": "True",
        "code": 200,
        "message": {
            "global": "success"
        },
        "vector": [2.3, 3.5, 5.5, 6.2, 2.8, 1.2, 0.3, 3.6]
    }
    return response


@router.post(
    "/paddlespeech/vector", response_model=Union[VectorResponse, ErrorResponse])
def vector(request_body: VectorRequest):
    """vector api 

    Args:
        request_body (VectorRequest): the vector request body

    Returns:
        json: the vector response body
    """
    try:
        # 1. get the audio data
        #    the audio must be base64 format
        audio_data = base64.b64decode(request_body.audio)

        # 2. get single engine from engine pool
        #    and we use the vector_engine to create an connection handler to process the request
        engine_pool = get_engine_pool()
        vector_engine = engine_pool['vector']
        connection_handler = PaddleVectorConnectionHandler(vector_engine)

        # 3. we use the connection handler to process the audio
        audio_vec = connection_handler.run(audio_data, request_body.task)

        # 4. we need the result of the vector instance be numpy.ndarray
        if not isinstance(audio_vec, np.ndarray):
            logger.error(
                f"the vector type is not numpy.array, that is: {type(audio_vec)}"
            )
            error_reponse = ErrorResponse()
            error_reponse.message.description = f"the vector type is not numpy.array, that is: {type(audio_vec)}"
            return error_reponse

        response = {
            "success": True,
            "code": 200,
            "message": {
                "description": "success"
            },
            "result": {
                "vec": audio_vec.tolist()
            }
        }

    except ServerBaseException as e:
        response = failed_response(e.error_code, e.msg)
    except BaseException:
        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
        traceback.print_exc()

    return response


@router.post(
    "/paddlespeech/vector/score",
    response_model=Union[VectorScoreResponse, ErrorResponse])
def score(request_body: VectorScoreRequest):
    """vector api 

    Args:
        request_body (VectorScoreRequest): the punctuation request body

    Returns:
        json: the punctuation response body
    """
    try:
        # 1. get the audio data
        #    the audio must be base64 format
        enroll_data = base64.b64decode(request_body.enroll_audio)
        test_data = base64.b64decode(request_body.test_audio)

        # 2. get single engine from engine pool
        #    and we use the vector_engine to create an connection handler to process the request
        engine_pool = get_engine_pool()
        vector_engine = engine_pool['vector']
        connection_handler = PaddleVectorConnectionHandler(vector_engine)

        # 3. we use the connection handler to process the audio
        score = connection_handler.get_enroll_test_score(enroll_data, test_data)

        response = {
            "success": True,
            "code": 200,
            "message": {
                "description": "success"
            },
            "result": {
                "score": score
            }
        }

    except ServerBaseException as e:
        response = failed_response(e.error_code, e.msg)
    except BaseException:
        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
        traceback.print_exc()

    return response


================================================
FILE: paddlespeech/server/tests/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/tests/asr/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/tests/asr/offline/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/tests/asr/offline/http_client.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the 
import base64
import json
import time

import requests


def readwav2base64(wav_file):
    """
    read wave file and covert to base64 string
    """
    with open(wav_file, 'rb') as f:
        base64_bytes = base64.b64encode(f.read())
        base64_string = base64_bytes.decode('utf-8')
    return base64_string


def main():
    """
    main func
    """
    url = "http://127.0.0.1:8090/paddlespeech/asr"

    # start Timestamp
    time_start = time.time()

    test_audio_dir = "./16_audio.wav"
    audio = readwav2base64(test_audio_dir)

    data = {
        "audio": audio,
        "audio_format": "wav",
        "sample_rate": 16000,
        "lang": "zh_cn",
    }

    r = requests.post(url=url, data=json.dumps(data))

    # ending Timestamp
    time_end = time.time()
    print('time cost', time_end - time_start, 's')

    print(r.json())


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/server/tests/asr/online/README.md
================================================
([简体中文](./README_cn.md)|English)

# Speech Service

## Introduction

This document introduces a client for streaming asr service: microphone


## Usage
### 1. Install
Refer [Install](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

 **paddlepaddle 2.4rc** 或以上版本。
It is recommended to use **paddlepaddle 2.4rc** or above.
You can choose one way from meduim and hard to install paddlespeech.


### 2. Prepare config File


The input of  ASR client demo should be a WAV file(`.wav`), and the sample rate must be the same as the model.

Here are sample files for thisASR client demo that can be downloaded:
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
```

### 2. Streaming ASR Client Usage

- microphone
   ```
   python microphone_client.py

   ```


================================================
FILE: paddlespeech/server/tests/asr/online/README_cn.md
================================================
([English](./README.md)|中文)

# 语音服务

## 介绍
本文档介绍如何使用流式ASR的一种不同客户端:麦克风。 


## 使用方法
### 1. 安装
请看 [安装文档](https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/docs/source/install.md).

推荐使用 **paddlepaddle 2.4rc** 或以上版本。
你可以从 medium，hard 三中方式中选择一种方式安装 PaddleSpeech。


### 2. 准备测试文件

这个 ASR client 的输入应该是一个 WAV 文件（`.wav`），并且采样率必须与模型的采样率相同。

可以下载此 ASR client的示例音频：
```bash
wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
```

### 2. 流式 ASR 客户端使用方法

- Python模拟流式服务命令行
   ```

   # 流式ASR
   paddlespeech_client asr_online --server_ip 127.0.0.1 --port 8091 --input ./zh.wav

   ```


- 麦克风
   ```
   # 直接调用麦克风设备
   python microphone_client.py

   ```


================================================
FILE: paddlespeech/server/tests/asr/online/microphone_client.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
record wave from the mic
"""
import asyncio
import json
import logging
import threading
import wave
from signal import SIGINT
from signal import SIGTERM

import pyaudio
import websockets


class ASRWsAudioHandler(threading.Thread):
    def __init__(self, url="127.0.0.1", port=8091):
        threading.Thread.__init__(self)
        self.url = url
        self.port = port
        self.url = "ws://" + self.url + ":" + str(self.port) + "/ws/asr"
        self.fileName = "./output.wav"
        self.chunk = 5120
        self.format = pyaudio.paInt16
        self.channels = 1
        self.rate = 16000
        self._running = True
        self._frames = []
        self.data_backup = []

    def startrecord(self):
        """
        start a new thread to record wave
        """
        threading._start_new_thread(self.recording, ())

    def recording(self):
        """
        recording wave
        """
        self._running = True
        self._frames = []
        p = pyaudio.PyAudio()
        stream = p.open(
            format=self.format,
            channels=self.channels,
            rate=self.rate,
            input=True,
            frames_per_buffer=self.chunk)
        while (self._running):
            data = stream.read(self.chunk)
            self._frames.append(data)
            self.data_backup.append(data)

        stream.stop_stream()
        stream.close()
        p.terminate()

    def save(self):
        """
        save wave data
        """
        p = pyaudio.PyAudio()
        wf = wave.open(self.fileName, 'wb')
        wf.setnchannels(self.channels)
        wf.setsampwidth(p.get_sample_size(self.format))
        wf.setframerate(self.rate)
        wf.writeframes(b''.join(self.data_backup))
        wf.close()
        p.terminate()

    def stoprecord(self):
        """
        stop recording
        """
        self._running = False

    async def run(self):
        aa = input("是否开始录音？   (y/n)")
        if aa.strip() == "y":
            self.startrecord()
            logging.info("*" * 10 + "开始录音，请输入语音")

            async with websockets.connect(self.url) as ws:
                # 发送开始指令
                audio_info = json.dumps(
                    {
                        "name": "test.wav",
                        "signal": "start",
                        "nbest": 5
                    },
                    sort_keys=True,
                    indent=4,
                    separators=(',', ': '))
                await ws.send(audio_info)
                msg = await ws.recv()
                logging.info("receive msg={}".format(msg))

                # send bytes data
                logging.info("结束录音请: Ctrl + c。继续请按回车。")
                try:
                    while True:
                        while len(self._frames) > 0:
                            await ws.send(self._frames.pop(0))
                            msg = await ws.recv()
                            logging.info("receive msg={}".format(msg))
                except asyncio.CancelledError:
                    # quit
                    # send finished 
                    audio_info = json.dumps(
                        {
                            "name": "test.wav",
                            "signal": "end",
                            "nbest": 5
                        },
                        sort_keys=True,
                        indent=4,
                        separators=(',', ': '))
                    await ws.send(audio_info)
                    msg = await ws.recv()
                    logging.info("receive msg={}".format(msg))

                    self.stoprecord()
                    logging.info("*" * 10 + "录音结束")
                    self.save()
        elif aa.strip() == "n":
            exit()
        else:
            print("无效输入!")
            exit()


if __name__ == "__main__":

    logging.basicConfig(level=logging.INFO)
    logging.info("asr websocket client start")

    handler = ASRWsAudioHandler("127.0.0.1", 8091)
    loop = asyncio.get_event_loop()
    main_task = asyncio.ensure_future(handler.run())
    for signal in [SIGINT, SIGTERM]:
        loop.add_signal_handler(signal, main_task.cancel)
    try:
        loop.run_until_complete(main_task)
    finally:
        loop.close()

    logging.info("asr websocket client finished")


================================================
FILE: paddlespeech/server/tests/text/http_client.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
import time

import requests

from paddlespeech.cli.log import logger


# Request and response
def text_client(args):
    """ Request and response
    Args:
        text: A sentence to be processed by PaddleSpeech Text Server
        outfile: The punctuation text
    """
    url = "http://" + str(args.server) + ":" + str(
        args.port) + "/paddlespeech/text"
    request = {
        "text": args.text,
    }

    response = requests.post(url, json.dumps(request))
    response_dict = response.json()
    punc_text = response_dict["result"]["punc_text"]

    # transform audio
    outfile = args.output
    if outfile:
        with open(outfile, 'w') as w:
            w.write(punc_text + "\n")

    logger.info(f"The punc text is: {punc_text}")
    return punc_text


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--text',
        type=str,
        default="今天的天气真不错啊你下午有空吗我想约你一起去吃饭",
        help='A sentence to be synthesized')
    parser.add_argument(
        '--output', type=str, default="./punc_text", help='Punc text file')
    parser.add_argument(
        "--server", type=str, help="server ip", default="127.0.0.1")
    parser.add_argument("--port", type=int, help="server port", default=8090)
    args = parser.parse_args()

    st = time.time()
    try:
        punc_text = text_client(args)
        time_consume = time.time() - st
        time_per_word = time_consume / len(args.text)
        print("Text Process successfully.")
        print("Inference time: %f" % (time_consume))
        print("The text length: %f" % (len(args.text)))
        print("The time per work is: %f" % (time_per_word))
    except BaseException as e:
        logger.info("Failed to Process text.")
        logger.info(e)


================================================
FILE: paddlespeech/server/tests/tts/offline/http_client.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import base64
import io
import json
import os
import random
import time

import numpy as np
import requests
import soundfile

from paddlespeech.server.utils.audio_process import wav2pcm


# Request and response
def tts_client(args):
    """ Request and response
    Args:
        text: A sentence to be synthesized
        outfile: Synthetic audio file
    """
    url = "http://" + str(args.server) + ":" + str(
        args.port) + "/paddlespeech/tts"
    request = {
        "text": args.text,
        "spk_id": args.spk_id,
        "speed": args.speed,
        "volume": args.volume,
        "sample_rate": args.sample_rate,
        "save_path": args.output
    }

    response = requests.post(url, json.dumps(request))
    response_dict = response.json()
    wav_base64 = response_dict["result"]["audio"]

    audio_data_byte = base64.b64decode(wav_base64)
    # from byte
    samples, sample_rate = soundfile.read(
        io.BytesIO(audio_data_byte), dtype='float32')

    # transform audio
    outfile = args.output
    if outfile.endswith(".wav"):
        soundfile.write(outfile, samples, sample_rate)
    elif outfile.endswith(".pcm"):
        temp_wav = str(random.getrandbits(128)) + ".wav"
        soundfile.write(temp_wav, samples, sample_rate)
        wav2pcm(temp_wav, outfile, data_type=np.int16)
        os.remove(temp_wav)
    else:
        print("The format for saving audio only supports wav or pcm")

    return len(samples), sample_rate


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--text',
        type=str,
        default="您好，欢迎使用语音合成服务。",
        help='A sentence to be synthesized')
    parser.add_argument('--spk_id', type=int, default=0, help='Speaker id')
    parser.add_argument('--speed', type=float, default=1.0, help='Audio speed')
    parser.add_argument(
        '--volume', type=float, default=1.0, help='Audio volume')
    parser.add_argument(
        '--sample_rate',
        type=int,
        default=0,
        help='Sampling rate, the default is the same as the model')
    parser.add_argument(
        '--output',
        type=str,
        default="./out.wav",
        help='Synthesized audio file')
    parser.add_argument(
        "--server", type=str, help="server ip", default="127.0.0.1")
    parser.add_argument("--port", type=int, help="server port", default=8090)
    args = parser.parse_args()

    st = time.time()
    try:
        samples_length, sample_rate = tts_client(args)
        time_consume = time.time() - st
        duration = samples_length / sample_rate
        rtf = time_consume / duration
        print("Synthesized audio successfully.")
        print("Inference time: %f" % (time_consume))
        print("The duration of synthesized audio: %f" % (duration))
        print("The RTF is: %f" % (rtf))
    except BaseException:
        print("Failed to synthesized audio.")


================================================
FILE: paddlespeech/server/tests/tts/online/http_client.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

from paddlespeech.server.utils.audio_handler import TTSHttpHandler
from paddlespeech.server.utils.util import compute_delay

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--text",
        type=str,
        help="A sentence to be synthesized",
        default="您好，欢迎使用语音合成服务。")
    parser.add_argument(
        "--server", type=str, help="server ip", default="127.0.0.1")
    parser.add_argument("--port", type=int, help="server port", default=8092)
    parser.add_argument('--spk_id', type=int, default=0, help='Speaker id')
    parser.add_argument('--speed', type=float, default=1.0, help='Audio speed')
    parser.add_argument(
        '--volume', type=float, default=1.0, help='Audio volume')
    parser.add_argument(
        '--sample_rate',
        type=int,
        default=0,
        choices=[0, 8000, 16000],
        help='Sampling rate, the default is the same as the model')
    parser.add_argument(
        "--output", type=str, help="save audio path", default=None)
    parser.add_argument(
        "--play", type=bool, help="whether to play audio", default=False)
    args = parser.parse_args()

    print("tts http client start")
    handler = TTSHttpHandler(args.server, args.port, args.play)
    first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list = handler.run(
        args.text, args.spk_id, args.speed, args.volume, args.sample_rate,
        args.output)
    delay_time_list = compute_delay(receive_time_list, chunk_duration_list)

    print(f"sentence: {args.text}")
    print(f"duration: {duration} s")
    print(f"first response: {first_response} s")
    print(f"final response: {final_response} s")
    print(f"RTF: {final_response/duration}")
    if args.output is not None:
        if save_audio_success:
            print(f"Audio successfully saved in {args.output}")
        else:
            print("Audio save failed.")

    if delay_time_list != []:
        print(
            f"Delay situation: total number of packages: {len(receive_time_list)}, the number of delayed packets: {len(delay_time_list)}, minimum delay time: {min(delay_time_list)} s, maximum delay time: {max(delay_time_list)} s, average delay time: {sum(delay_time_list)/len(delay_time_list)} s, delay rate:{len(delay_time_list)/len(receive_time_list)}"
        )
    else:
        print("The sentence has no delay in streaming synthesis.")


================================================
FILE: paddlespeech/server/tests/tts/online/ws_client.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import asyncio

from paddlespeech.server.utils.audio_handler import TTSWsHandler
from paddlespeech.server.utils.util import compute_delay

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--text",
        type=str,
        help="A sentence to be synthesized",
        default="您好，欢迎使用语音合成服务。")
    parser.add_argument(
        "--server", type=str, help="server ip", default="127.0.0.1")
    parser.add_argument("--port", type=int, help="server port", default=8092)
    parser.add_argument(
        "--output", type=str, help="save audio path", default=None)
    parser.add_argument(
        "--play", type=bool, help="whether to play audio", default=False)
    args = parser.parse_args()

    print("tts websocket client start")
    handler = TTSWsHandler(args.server, args.port, args.play)
    loop = asyncio.get_event_loop()
    first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list = loop.run_until_complete(
        handler.run(args.text, args.output))
    delay_time_list = compute_delay(receive_time_list, chunk_duration_list)

    print(f"sentence: {args.text}")
    print(f"duration: {duration} s")
    print(f"first response: {first_response} s")
    print(f"final response: {final_response} s")
    print(f"RTF: {final_response/duration}")
    if args.output is not None:
        if save_audio_success:
            print(f"Audio successfully saved in {args.output}")
        else:
            print("Audio save failed.")

    if delay_time_list != []:
        print(
            f"Delay situation: total number of packages: {len(receive_time_list)}, the number of delayed packets: {len(delay_time_list)}, minimum delay time: {min(delay_time_list)} s, maximum delay time: {max(delay_time_list)} s, average delay time: {sum(delay_time_list)/len(delay_time_list)} s, delay rate:{len(delay_time_list)/len(receive_time_list)}"
        )
    else:
        print("The sentence has no delay in streaming synthesis.")


================================================
FILE: paddlespeech/server/util.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import inspect
import json
import os
import tarfile
import threading
import time
import uuid
import zipfile
from typing import Any
from typing import Dict

import paddle
import requests
import yaml
from paddle.framework import load

from .entry import client_commands
from .entry import server_commands
from paddlespeech.audio.backends import soundfile_load
from paddlespeech.cli import download
try:
    from .. import __version__
except ImportError:
    __version__ = "0.0.0"  # for develop branch

requests.adapters.DEFAULT_RETRIES = 3

__all__ = [
    'cli_server_register',
    'get_server_command',
    'cli_client_register',
    'get_client_command',
    'download_and_decompress',
    'load_state_dict_from_url',
    'stats_wrapper',
]


def cli_server_register(name: str, description: str='') -> Any:
    def _warpper(command):
        items = name.split('.')

        com = server_commands
        for item in items:
            com = com[item]
        com['_entry'] = command
        if description:
            com['_description'] = description
        return command

    return _warpper


def get_server_command(name: str) -> Any:
    items = name.split('.')
    com = server_commands
    for item in items:
        com = com[item]

    return com['_entry']


def cli_client_register(name: str, description: str='') -> Any:
    def _warpper(command):
        items = name.split('.')

        com = client_commands
        for item in items:
            com = com[item]
        com['_entry'] = command
        if description:
            com['_description'] = description
        return command

    return _warpper


def get_client_command(name: str) -> Any:
    items = name.split('.')
    com = client_commands
    for item in items:
        com = com[item]

    return com['_entry']


def _get_uncompress_path(filepath: os.PathLike) -> os.PathLike:
    file_dir = os.path.dirname(filepath)
    is_zip_file = False
    if tarfile.is_tarfile(filepath):
        files = tarfile.open(filepath, "r:*")
        file_list = files.getnames()
    elif zipfile.is_zipfile(filepath):
        files = zipfile.ZipFile(filepath, 'r')
        file_list = files.namelist()
        is_zip_file = True
    else:
        return file_dir

    if download._is_a_single_file(file_list):
        rootpath = file_list[0]
        uncompressed_path = os.path.join(file_dir, rootpath)
    elif download._is_a_single_dir(file_list):
        if is_zip_file:
            rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[0]
        else:
            rootpath = os.path.splitext(file_list[0])[0].split(os.sep)[-1]
        uncompressed_path = os.path.join(file_dir, rootpath)
    else:
        rootpath = os.path.splitext(filepath)[0].split(os.sep)[-1]
        uncompressed_path = os.path.join(file_dir, rootpath)

    files.close()
    return uncompressed_path


def download_and_decompress(archive: Dict[str, str], path: str) -> os.PathLike:
    """
    Download archieves and decompress to specific path.
    """
    if not os.path.isdir(path):
        os.makedirs(path)

    assert 'url' in archive and 'md5' in archive, \
        'Dictionary keys of "url" and "md5" are required in the archive, but got: {}'.format(list(archive.keys()))

    filepath = os.path.join(path, os.path.basename(archive['url']))
    if os.path.isfile(filepath) and download._md5check(filepath,
                                                       archive['md5']):
        uncompress_path = _get_uncompress_path(filepath)
        if not os.path.isdir(uncompress_path):
            download._decompress(filepath)
    else:
        StatsWorker(
            task='download',
            version=__version__,
            extra_info={
                'download_url': archive['url'],
                'paddle_version': paddle.__version__
            }).start()
        uncompress_path = download.get_path_from_url(archive['url'], path,
                                                     archive['md5'])

    return uncompress_path


def load_state_dict_from_url(url: str, path: str, md5: str=None) -> os.PathLike:
    """
    Download and load a state dict from url
    """
    if not os.path.isdir(path):
        os.makedirs(path)

    download.get_path_from_url(url, path, md5)
    return load(os.path.join(path, os.path.basename(url)))


def _get_user_home():
    return os.path.expanduser('~')


def _get_paddlespcceh_home():
    if 'PPSPEECH_HOME' in os.environ:
        home_path = os.environ['PPSPEECH_HOME']
        if os.path.exists(home_path):
            if os.path.isdir(home_path):
                return home_path
            else:
                raise RuntimeError(
                    'The environment variable PPSPEECH_HOME {} is not a directory.'.
                    format(home_path))
        else:
            return home_path
    return os.path.join(_get_user_home(), '.paddlespeech')


def _get_sub_home(directory):
    home = os.path.join(_get_paddlespcceh_home(), directory)
    if not os.path.exists(home):
        os.makedirs(home)
    return home


PPSPEECH_HOME = _get_paddlespcceh_home()
MODEL_HOME = _get_sub_home('models')
CONF_HOME = _get_sub_home('conf')


def _md5(text: str):
    '''Calculate the md5 value of the input text.'''
    md5code = hashlib.md5(text.encode())
    return md5code.hexdigest()


class ConfigCache:
    def __init__(self):
        self._data = {}
        self._initialize()
        self.file = os.path.join(CONF_HOME, 'cache.yaml')
        if not os.path.exists(self.file):
            self.flush()
            return

        with open(self.file, 'r') as file:
            try:
                cfg = yaml.load(file, Loader=yaml.FullLoader)
                self._data.update(cfg)
            except BaseException:
                self.flush()

    @property
    def cache_info(self):
        return self._data['cache_info']

    def _initialize(self):
        # Set default configuration values.
        cache_info = _md5(str(uuid.uuid1())[-12:]) + "-" + str(int(time.time()))
        self._data['cache_info'] = cache_info

    def flush(self):
        '''Flush the current configuration into the configuration file.'''
        with open(self.file, 'w') as file:
            cfg = json.loads(json.dumps(self._data))
            yaml.dump(cfg, file)


stats_api = "http://paddlepaddle.org.cn/paddlehub/stat"
cache_info = ConfigCache().cache_info


class StatsWorker(threading.Thread):
    def __init__(self,
                 task="asr",
                 model=None,
                 version=__version__,
                 extra_info={}):
        threading.Thread.__init__(self)
        self._task = task
        self._model = model
        self._version = version
        self._extra_info = extra_info

    def run(self):
        params = {
            'task': self._task,
            'version': self._version,
            'from': 'ppspeech'
        }
        if self._model:
            params['model'] = self._model

        self._extra_info.update({
            'cache_info': cache_info,
        })
        params.update({"extra": json.dumps(self._extra_info)})

        try:
            requests.get(stats_api, params)
        except Exception:
            pass

        return


def _note_one_stat(cls_name, params={}):
    task = cls_name.replace('Executor', '').lower()  # XXExecutor
    extra_info = {
        'paddle_version': paddle.__version__,
    }

    if 'model' in params:
        model = params['model']
    else:
        model = None

    if 'audio_file' in params:
        try:
            _, sr = soundfile_load(params['audio_file'])
        except Exception:
            sr = -1

    if task == 'asr':
        extra_info.update({
            'lang': params['lang'],
            'inp_sr': sr,
            'model_sr': params['sample_rate'],
        })
    elif task == 'st':
        extra_info.update({
            'lang':
            params['src_lang'] + '-' + params['tgt_lang'],
            'inp_sr':
            sr,
            'model_sr':
            params['sample_rate'],
        })
    elif task == 'tts':
        model = params['am']
        extra_info.update({
            'lang': params['lang'],
            'vocoder': params['voc'],
        })
    elif task == 'cls':
        extra_info.update({
            'inp_sr': sr,
        })
    elif task == 'text':
        extra_info.update({
            'sub_task': params['task'],
            'lang': params['lang'],
        })
    else:
        return

    StatsWorker(
        task=task,
        model=model,
        version=__version__,
        extra_info=extra_info, ).start()


def _parse_args(func, *args, **kwargs):
    # FullArgSpec(args, varargs, varkw, defaults, kwonlyargs, kwonlydefaults, annotations)
    argspec = inspect.getfullargspec(func)

    keys = argspec[0]
    if keys[0] == 'self':  # Remove self pointer.
        keys = keys[1:]

    default_values = argspec[3]
    values = [None] * (len(keys) - len(default_values))
    values.extend(list(default_values))
    params = dict(zip(keys, values))

    for idx, v in enumerate(args):
        params[keys[idx]] = v
    for k, v in kwargs.items():
        params[k] = v

    return params


def stats_wrapper(executor_func):
    def _warpper(self, *args, **kwargs):
        try:
            _note_one_stat(
                type(self).__name__, _parse_args(executor_func, *args,
                                                 **kwargs))
        except Exception:
            pass
        return executor_func(self, *args, **kwargs)

    return _warpper


================================================
FILE: paddlespeech/server/utils/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/utils/audio_handler.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import base64
import json
import logging
import threading
import time

import numpy as np
import requests
import soundfile
import websockets

from paddlespeech.cli.log import logger
from paddlespeech.server.utils.audio_process import save_audio
from paddlespeech.server.utils.util import wav2base64


class TextHttpHandler:
    def __init__(self, server_ip="127.0.0.1", port=8090):
        """Text http client request 

        Args:
            server_ip (str, optional): the text server ip. Defaults to "127.0.0.1".
            port (int, optional): the text server port. Defaults to 8090.
        """
        super().__init__()
        self.server_ip = server_ip
        self.port = port
        if server_ip is None or port is None:
            self.url = None
        else:
            self.url = 'http://' + self.server_ip + ":" + str(
                self.port) + '/paddlespeech/text'
        logger.info(f"endpoint: {self.url}")

    def run(self, text):
        """Call the text server to process the specific text

        Args:
            text (str): the text to be processed

        Returns:
            str: punctuation text
        """
        if self.server_ip is None or self.port is None:
            return text
        request = {
            "text": text,
        }
        try:
            res = requests.post(url=self.url, data=json.dumps(request))
            response_dict = res.json()
            punc_text = response_dict["result"]["punc_text"]
        except Exception as e:
            logger.error(f"Call punctuation {self.url} occurs error")
            logger.error(e)
            punc_text = text

        return punc_text


class ASRWsAudioHandler:
    def __init__(self,
                 url=None,
                 port=None,
                 endpoint="/paddlespeech/asr/streaming",
                 punc_server_ip=None,
                 punc_server_port=None):
        """PaddleSpeech Online ASR Server Client  audio handler
           Online asr server use the websocket protocol
        Args:
            url (str, optional): the server ip. Defaults to None.
            port (int, optional): the server port. Defaults to None.
            endpoint(str, optional): to compatiable with python server and c++ server.
            punc_server_ip(str, optional): the punctuation server ip. Defaults to None. 
            punc_server_port(int, optional): the punctuation port. Defaults to None
        """
        self.url = url
        self.port = port
        if url is None or port is None or endpoint is None:
            self.url = None
        else:
            self.url = "ws://" + self.url + ":" + str(self.port) + endpoint
        self.punc_server = TextHttpHandler(punc_server_ip, punc_server_port)
        logger.info(f"endpoint: {self.url}")

    def read_wave(self, wavfile_path: str):
        """read the audio file from specific wavfile path

        Args:
            wavfile_path (str): the audio wavfile, 
                                 we assume that audio sample rate matches the model

        Yields:
            numpy.array: the samall package audio pcm data
        """
        samples, sample_rate = soundfile.read(wavfile_path, dtype='int16')
        x_len = len(samples)
        assert sample_rate == 16000

        chunk_size = int(85 * sample_rate / 1000)  # 85ms, sample_rate = 16kHz

        if x_len % chunk_size != 0:
            padding_len_x = chunk_size - x_len % chunk_size
        else:
            padding_len_x = 0

        padding = np.zeros((padding_len_x), dtype=samples.dtype)
        padded_x = np.concatenate([samples, padding], axis=0)

        assert (x_len + padding_len_x) % chunk_size == 0
        num_chunk = (x_len + padding_len_x) / chunk_size
        num_chunk = int(num_chunk)
        for i in range(0, num_chunk):
            start = i * chunk_size
            end = start + chunk_size
            x_chunk = padded_x[start:end]
            yield x_chunk

    async def run(self, wavfile_path: str):
        """Send a audio file to online server

        Args:
            wavfile_path (str): audio path

        Returns:
            str: the final asr result
        """
        logging.debug("send a message to the server")

        if self.url is None:
            logger.error("No asr server, please input valid ip and port")
            return ""

        # 1. send websocket handshake protocol
        start_time = time.time()
        async with websockets.connect(self.url) as ws:
            # 2. server has already received handshake protocol
            # client start to send the command
            audio_info = json.dumps(
                {
                    "name": "test.wav",
                    "signal": "start",
                    "nbest": 1
                },
                sort_keys=True,
                indent=4,
                separators=(',', ': '))
            await ws.send(audio_info)
            msg = await ws.recv()
            logger.info("client receive msg={}".format(msg))

            # 3. send chunk audio data to engine
            for chunk_data in self.read_wave(wavfile_path):
                await ws.send(chunk_data.tobytes())
                msg = await ws.recv()
                msg = json.loads(msg)
                logger.info("client receive msg={}".format(msg))
            #client start to punctuation restore
            if self.punc_server and len(msg['result']) > 0:
                msg["result"] = self.punc_server.run(msg["result"])
                logger.info("client punctuation restored msg={}".format(msg))
            # 4. we must send finished signal to the server
            audio_info = json.dumps(
                {
                    "name": "test.wav",
                    "signal": "end",
                    "nbest": 1
                },
                sort_keys=True,
                indent=4,
                separators=(',', ': '))
            await ws.send(audio_info)
            msg = await ws.recv()

            # 5. decode the bytes to str
            msg = json.loads(msg)

            if self.punc_server:
                msg["result"] = self.punc_server.run(msg["result"])

            # 6. logging the final result and comptute the statstics
            elapsed_time = time.time() - start_time
            audio_info = soundfile.info(wavfile_path)
            logger.info("client final receive msg={}".format(msg))
            logger.info(
                f"audio duration: {audio_info.duration}, elapsed time: {elapsed_time}, RTF={elapsed_time/audio_info.duration}"
            )

            result = msg

            return result


class ASRHttpHandler:
    def __init__(self, server_ip=None, port=None, endpoint="/paddlespeech/asr"):
        """The ASR client http request

        Args:
            server_ip (str, optional): the http asr server ip. Defaults to "127.0.0.1".
            port (int, optional): the http asr server port. Defaults to 8090.
        """
        super().__init__()
        self.server_ip = server_ip
        self.port = port
        if server_ip is None or port is None:
            self.url = None
        else:
            self.url = 'http://' + self.server_ip + ":" + str(
                self.port) + endpoint
        logger.info(f"endpoint: {self.url}")

    def run(self, input, audio_format, sample_rate, lang):
        """Call the http asr to process the audio

        Args:
            input (str): the audio file path
            audio_format (str): the audio format
            sample_rate (str): the audio sample rate
            lang (str): the audio language type

        Returns:
            str: the final asr result
        """
        if self.url is None:
            logger.error(
                "No punctuation server, please input valid ip and port")
            return ""

        audio = wav2base64(input)
        data = {
            "audio": audio,
            "audio_format": audio_format,
            "sample_rate": sample_rate,
            "lang": lang,
        }

        res = requests.post(url=self.url, data=json.dumps(data))

        return res.json()


class TTSWsHandler:
    def __init__(self, server="127.0.0.1", port=8092, play: bool=False):
        """PaddleSpeech Online TTS Server Client  audio handler
           Online tts server use the websocket protocol
        Args:
            server (str, optional): the server ip. Defaults to "127.0.0.1".
            port (int, optional): the server port. Defaults to 8092.
            play (bool, optional): whether to play audio. Defaults False
        """
        self.server = server
        self.port = port
        self.url = "ws://" + self.server + ":" + str(
            self.port) + "/paddlespeech/tts/streaming"
        self.play = play

        # get model sample rate
        self.url_get_sr = "http://" + str(self.server) + ":" + str(
            self.port) + "/paddlespeech/tts/streaming/samplerate"
        self.sample_rate = requests.get(self.url_get_sr).json()["sample_rate"]

        if self.play:
            import pyaudio
            self.buffer = b''
            self.p = pyaudio.PyAudio()
            self.stream = self.p.open(
                format=self.p.get_format_from_width(2),
                channels=1,
                rate=self.sample_rate,
                output=True)
            self.mutex = threading.Lock()
            self.start_play = True
            self.t = threading.Thread(target=self.play_audio)
            self.max_fail = 50
        logger.info(f"endpoint: {self.url}")

    def play_audio(self):
        while True:
            if not self.buffer:
                self.max_fail -= 1
                time.sleep(0.05)
                if self.max_fail < 0:
                    break
            self.mutex.acquire()
            self.stream.write(self.buffer)
            self.buffer = b''
            self.mutex.release()

    async def run(self, text: str, spk_id=0, output: str=None):
        """Send a text to online server

        Args:
            text (str): sentence to be synthesized
            spk_id (int, optional): speaker id. Defaults to 0.
            output (str, optional): client save audio path. Defaults to None.
        """
        all_bytes = b''
        receive_time_list = []
        chunk_duration_list = []

        # 1. Send websocket handshake request
        async with websockets.connect(self.url) as ws:
            # 2. Server has already received handshake response, send start request
            start_request = json.dumps({"task": "tts", "signal": "start"})
            await ws.send(start_request)
            msg = await ws.recv()
            logger.info(f"client receive msg={msg}")
            msg = json.loads(msg)
            session = msg["session"]

            # 3. send speech synthesis request 
            #text_base64 = str(base64.b64encode((text).encode('utf-8')), "UTF8")
            params = {
                "text": text,
                "spk_id": spk_id,
            }

            request = json.dumps(params)
            st = time.time()
            await ws.send(request)
            logging.debug("send a message to the server")

            # 4. Process the received response
            message = await ws.recv()
            first_response = time.time() - st
            message = json.loads(message)
            status = message["status"]
            while True:
                # When throw an exception
                if status == -1:
                    # send end request
                    end_request = json.dumps({
                        "task": "tts",
                        "signal": "end",
                        "session": session
                    })
                    await ws.send(end_request)
                    break

                # Rerutn last packet normally, no audio information
                elif status == 2:
                    final_response = time.time() - st
                    duration = len(all_bytes) / 2.0 / self.sample_rate

                    if output is not None:
                        save_audio_success = save_audio(all_bytes, output,
                                                        self.sample_rate)
                    else:
                        save_audio_success = False

                    # send end request
                    end_request = json.dumps({
                        "task": "tts",
                        "signal": "end",
                        "session": session
                    })
                    await ws.send(end_request)
                    break

                # Return the audio stream normally
                elif status == 1:
                    receive_time_list.append(time.time())
                    audio = message["audio"]
                    audio = base64.b64decode(audio)  # bytes
                    chunk_duration_list.append(
                        len(audio) / 2.0 / self.sample_rate)
                    all_bytes += audio
                    if self.play:
                        self.mutex.acquire()
                        self.buffer += audio
                        self.mutex.release()
                        if self.start_play:
                            self.t.start()
                            self.start_play = False

                    message = await ws.recv()
                    message = json.loads(message)
                    status = message["status"]

                else:
                    logger.error("infer error, return status is invalid.")

            if self.play:
                self.t.join()
                self.stream.stop_stream()
                self.stream.close()
                self.p.terminate()

        return first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list


class TTSHttpHandler:
    def __init__(self, server="127.0.0.1", port=8092, play: bool=False):
        """PaddleSpeech Online TTS Server Client  audio handler
           Online tts server use the websocket protocol
        Args:
            server (str, optional): the server ip. Defaults to "127.0.0.1".
            port (int, optional): the server port. Defaults to 8092.
            play (bool, optional): whether to play audio. Defaults False
        """
        self.server = server
        self.port = port
        self.url = "http://" + str(self.server) + ":" + str(
            self.port) + "/paddlespeech/tts/streaming"
        self.play = play

        # get model sample rate
        self.url_get_sr = "http://" + str(self.server) + ":" + str(
            self.port) + "/paddlespeech/tts/streaming/samplerate"
        self.sample_rate = requests.get(self.url_get_sr).json()["sample_rate"]

        if self.play:
            import pyaudio
            self.buffer = b''
            self.p = pyaudio.PyAudio()
            self.start_play = True
            self.max_fail = 50

            self.stream = self.p.open(
                format=self.p.get_format_from_width(2),
                channels=1,
                rate=self.sample_rate,
                output=True)
            self.mutex = threading.Lock()
            self.t = threading.Thread(target=self.play_audio)

        logger.info(f"endpoint: {self.url}")

    def play_audio(self):
        while True:
            if not self.buffer:
                self.max_fail -= 1
                time.sleep(0.05)
                if self.max_fail < 0:
                    break
            self.mutex.acquire()
            self.stream.write(self.buffer)
            self.buffer = b''
            self.mutex.release()

    def run(self, text: str, spk_id=0, output: str=None):
        """Send a text to tts online server

        Args:
            text (str): sentence to be synthesized.
            spk_id (int, optional): speaker id. Defaults to 0.
            output (str, optional): client save audio path. Defaults to None.
        """

        # 1. Create request
        params = {
            "text": text,
            "spk_id": spk_id,
        }

        all_bytes = b''
        first_flag = 1
        receive_time_list = []
        chunk_duration_list = []

        # 2. Send request
        st = time.time()
        html = requests.post(self.url, json.dumps(params), stream=True)

        # 3. Process the received response 
        for chunk in html.iter_content(chunk_size=None):
            receive_time_list.append(time.time())
            audio = base64.b64decode(chunk)  # bytes
            if first_flag:
                first_response = time.time() - st
                first_flag = 0

            if self.play:
                self.mutex.acquire()
                self.buffer += audio
                self.mutex.release()
                if self.start_play:
                    self.t.start()
                    self.start_play = False
            all_bytes += audio
            chunk_duration_list.append(len(audio) / 2.0 / self.sample_rate)

        final_response = time.time() - st
        duration = len(all_bytes) / 2.0 / self.sample_rate
        html.close()  # when stream=True

        if output is not None:
            save_audio_success = save_audio(all_bytes, output, self.sample_rate)
        else:
            save_audio_success = False

        if self.play:
            self.t.join()
            self.stream.stop_stream()
            self.stream.close()
            self.p.terminate()

        return first_response, final_response, duration, save_audio_success, receive_time_list, chunk_duration_list


class VectorHttpHandler:
    def __init__(self, server_ip=None, port=None):
        """The Vector client http request

        Args:
            server_ip (str, optional): the http vector server ip. Defaults to "127.0.0.1".
            port (int, optional): the http vector server port. Defaults to 8090.
        """
        super().__init__()
        self.server_ip = server_ip
        self.port = port
        if server_ip is None or port is None:
            self.url = None
        else:
            self.url = 'http://' + self.server_ip + ":" + str(
                self.port) + '/paddlespeech/vector'
        logger.info(f"endpoint: {self.url}")

    def run(self, input, audio_format, sample_rate, task="spk"):
        """Call the http asr to process the audio

        Args:
            input (str): the audio file path
            audio_format (str): the audio format
            sample_rate (str): the audio sample rate

        Returns:
            list: the audio vector
        """
        if self.url is None:
            logger.error("No vector server, please input valid ip and port")
            return ""

        audio = wav2base64(input)
        data = {
            "audio": audio,
            "task": task,
            "audio_format": audio_format,
            "sample_rate": sample_rate,
        }

        res = requests.post(url=self.url, data=json.dumps(data))

        return res


class VectorScoreHttpHandler:
    def __init__(self, server_ip=None, port=None):
        """The Vector score client http request

        Args:
            server_ip (str, optional): the http vector server ip. Defaults to "127.0.0.1".
            port (int, optional): the http vector server port. Defaults to 8090.
        """
        super().__init__()
        self.server_ip = server_ip
        self.port = port
        if server_ip is None or port is None:
            self.url = None
        else:
            self.url = 'http://' + self.server_ip + ":" + str(
                self.port) + '/paddlespeech/vector/score'
        logger.info(f"endpoint: {self.url}")

    def run(self, enroll_audio, test_audio, audio_format, sample_rate):
        """Call the http asr to process the audio

        Args:
            input (str): the audio file path
            audio_format (str): the audio format
            sample_rate (str): the audio sample rate

        Returns:
            list: the audio vector
        """
        if self.url is None:
            logger.error("No vector server, please input valid ip and port")
            return ""

        enroll_audio = wav2base64(enroll_audio)
        test_audio = wav2base64(test_audio)
        data = {
            "enroll_audio": enroll_audio,
            "test_audio": test_audio,
            "task": "score",
            "audio_format": audio_format,
            "sample_rate": sample_rate,
        }

        res = requests.post(url=self.url, data=json.dumps(data))

        return res


================================================
FILE: paddlespeech/server/utils/audio_process.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import wave

import numpy as np

from paddlespeech.cli.log import logger


def wav2pcm(wavfile, pcmfile, data_type=np.int16):
    """ Save the wav file as a pcm file

    Args:
        wavfile (str): wav file path
        pcmfile (str): pcm file save path
        data_type (type, optional): pcm sample type. Defaults to np.int16.
    """
    with open(wavfile, "rb") as f:
        f.seek(0)
        f.read(44)
        data = np.fromfile(f, dtype=data_type)
        data.tofile(pcmfile)


def pcm2wav(pcm_file, wav_file, channels=1, bits=16, sample_rate=16000):
    """Save the pcm file as a wav file

    Args:
        pcm_file (str): pcm file path
        wav_file (str): wav file save path
        channels (int, optional): audio channel. Defaults to 1.
        bits (int, optional): Bit depth. Defaults to 16.
        sample_rate (int, optional): sample rate. Defaults to 16000.
    """
    pcmf = open(pcm_file, 'rb')
    pcmdata = pcmf.read()
    pcmf.close()

    if bits % 8 != 0:
        logger.error("bits % 8 must == 0. now bits:" + str(bits))

    wavfile = wave.open(wav_file, 'wb')
    wavfile.setnchannels(channels)
    wavfile.setsampwidth(bits // 8)
    wavfile.setframerate(sample_rate)
    wavfile.writeframes(pcmdata)
    wavfile.close()


def change_speed(sample_raw, speed_rate, sample_rate):
    """Change the audio speed by linear interpolation.
    Note that this is an in-place transformation.
    :param speed_rate: Rate of speed change:
                       speed_rate > 1.0, speed up the audio;
                       speed_rate = 1.0, unchanged;
                       speed_rate < 1.0, slow down the audio;
                       speed_rate <= 0.0, not allowed, raise ValueError.
    :type speed_rate: float
    :raises ValueError: If speed_rate <= 0.0.
    """
    if speed_rate == 1.0:
        return sample_raw
    if speed_rate <= 0:
        raise ValueError("speed_rate should be greater than zero.")

    # numpy
    # old_length = self._samples.shape[0]
    # new_length = int(old_length / speed_rate)
    # old_indices = np.arange(old_length)
    # new_indices = np.linspace(start=0, stop=old_length, num=new_length)
    # self._samples = np.interp(new_indices, old_indices, self._samples)

    # sox, slow
    try:
        import soxbindings as sox
    except ImportError:
        try:
            from paddlespeech.s2t.utils import dynamic_pip_install
            package = "sox"
            dynamic_pip_install.install(package)
            package = "soxbindings"
            dynamic_pip_install.install(package)
            import soxbindings as sox
        except Exception:
            raise RuntimeError("Can not install soxbindings on your system.")

    tfm = sox.Transformer()
    tfm.set_globals(multithread=False)
    tfm.tempo(speed_rate)
    sample_speed = tfm.build_array(
        input_array=sample_raw,
        sample_rate_in=sample_rate).squeeze(-1).astype(np.float32).copy()

    return sample_speed


def float2pcm(sig, dtype='int16'):
    """Convert floating point signal with a range from -1 to 1 to PCM16.

    Args:
        sig (array): Input array, must have floating point type.
        dtype (str, optional): Desired (integer) data type. Defaults to 'int16'.

    Returns:
        numpy.ndarray: Integer data, scaled and clipped to the range of the given
    """
    sig = np.asarray(sig)
    if sig.dtype.kind != 'f':
        raise TypeError("'sig' must be a float array")
    dtype = np.dtype(dtype)
    if dtype.kind not in 'iu':
        raise TypeError("'dtype' must be an integer type")

    i = np.iinfo(dtype)
    abs_max = 2**(i.bits - 1)
    offset = i.min + abs_max
    return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype)


def pcm2float(data):
    """pcm int16 to float32
    Args:
        audio(numpy.array): numpy.int16
    Returns:
        audio(numpy.array): numpy.float32
    """
    if data.dtype == np.int16:
        data = data.astype("float32")
        bits = np.iinfo(np.int16).bits
        data = data / (2**(bits - 1))
    return data


def save_audio(bytes_data, audio_path, sample_rate: int=24000) -> bool:
    """save byte to audio file.

    Args:
        bytes_data (bytes): audio samples, bytes format
        audio_path (str): save audio path
        sample_rate (int, optional): audio sample rate. Defaults to 24000.

    Returns:
        bool: Whether the audio was saved successfully
    """

    if audio_path.endswith("pcm"):
        with open(audio_path, "wb") as f:
            f.write(bytes_data)
    elif audio_path.endswith("wav"):
        with open("./tmp.pcm", "wb") as f:
            f.write(bytes_data)
        pcm2wav(
            "./tmp.pcm",
            audio_path,
            channels=1,
            bits=16,
            sample_rate=sample_rate)
        os.remove("./tmp.pcm")
    else:
        logger.error("Only supports saved audio format is pcm or wav")
        return False

    return True


================================================
FILE: paddlespeech/server/utils/buffer.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


class Frame(object):
    """Represents a "frame" of audio data."""

    def __init__(self, bytes, timestamp, duration):
        self.bytes = bytes
        self.timestamp = timestamp
        self.duration = duration


class ChunkBuffer(object):
    def __init__(self,
                 window_n=7,
                 shift_n=4,
                 window_ms=20,
                 shift_ms=10,
                 sample_rate=16000,
                 sample_width=2):
        """audio sample data point buffer

        Args:
            window_n (int, optional): decode window frame length. Defaults to 7 frame.
            shift_n (int, optional): decode shift frame length. Defaults to 4 frame.
            window_ms (int, optional): frame length, ms. Defaults to 20 ms.
            shift_ms (int, optional): shift length, ms. Defaults to 10 ms.
            sample_rate (int, optional): audio sample rate. Defaults to 16000.
            sample_width (int, optional): sample point bytes. Defaults to 2 bytes.
        """
        self.window_n = window_n
        self.shift_n = shift_n
        self.window_ms = window_ms
        self.shift_ms = shift_ms
        self.sample_rate = sample_rate
        self.sample_width = sample_width  # int16 = 2; float32 = 4

        self.window_sec = float((self.window_n - 1) * self.shift_ms +
                                self.window_ms) / 1000.0
        self.shift_sec = float(self.shift_n * self.shift_ms / 1000.0)

        self.window_bytes = int(self.window_sec * self.sample_rate *
                                self.sample_width)
        self.shift_bytes = int(self.shift_sec * self.sample_rate *
                               self.sample_width)

        self.remained_audio = b''
        # abs timestamp from `start` or latest `reset`
        self.timestamp = 0.0

    def reset(self):
        """
            reset buffer state.
        """
        self.timestamp = 0.0
        self.remained_audio = b''

    def frame_generator(self, audio):
        """Generates audio frames from PCM audio data.
        Takes the desired frame duration in milliseconds, the PCM data, and
        the sample rate.
        Yields Frames of the requested duration.
        """
        audio = self.remained_audio + audio
        self.remained_audio = b''

        offset = 0
        while offset + self.window_bytes <= len(audio):
            yield Frame(audio[offset:offset + self.window_bytes],
                        self.timestamp, self.window_sec)
            self.timestamp += self.shift_sec
            offset += self.shift_bytes

        self.remained_audio += audio[offset:]


================================================
FILE: paddlespeech/server/utils/config.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import yaml
from yacs.config import CfgNode


def get_config(config_file: str):
    """[summary]

    Args:
        config_file (str): config_file

    Returns:
        CfgNode: 
    """
    with open(config_file, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    return config


================================================
FILE: paddlespeech/server/utils/errors.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from enum import IntEnum

from fastapi import Response


class ErrorCode(IntEnum):
    SERVER_OK = 200  # success.

    SERVER_PARAM_ERR = 400  # Input parameters are not valid.
    SERVER_TASK_NOT_EXIST = 404  # Task is not exist.

    SERVER_INTERNAL_ERR = 500  # Internal error.
    SERVER_NETWORK_ERR = 502  # Network exception.
    SERVER_UNKOWN_ERR = 509  # Unknown error occurred.


ErrorMsg = {
    ErrorCode.SERVER_OK: "success.",
    ErrorCode.SERVER_PARAM_ERR: "Input parameters are not valid.",
    ErrorCode.SERVER_TASK_NOT_EXIST: "Task is not exist.",
    ErrorCode.SERVER_INTERNAL_ERR: "Internal error.",
    ErrorCode.SERVER_NETWORK_ERR: "Network exception.",
    ErrorCode.SERVER_UNKOWN_ERR: "Unknown error occurred."
}


def failed_response(code, msg=""):
    """Interface call failure response

    Args:
        code (int): error code number
        msg (str, optional): Interface call failure information. Defaults to "".

    Returns:
        Response (json): failure json information.
    """

    if not msg:
        msg = ErrorMsg.get(code, "Unknown error occurred.")

    res = {"success": False, "code": int(code), "message": {"description": msg}}

    return Response(content=json.dumps(res), media_type="application/json")


================================================
FILE: paddlespeech/server/utils/exception.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import traceback

from paddlespeech.server.utils.errors import ErrorMsg


class ServerBaseException(Exception):
    """ Server Base exception
    """

    def __init__(self, error_code, msg=None):
        #if msg:
        #log.error(msg)
        msg = msg if msg else ErrorMsg.get(error_code, "")
        super(ServerBaseException, self).__init__(error_code, msg)
        self.error_code = error_code
        self.msg = msg
        traceback.print_exc()


================================================
FILE: paddlespeech/server/utils/onnx_infer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import Optional

import onnxruntime as ort

from paddlespeech.cli.log import logger


def get_sess(model_path: Optional[os.PathLike]=None, sess_conf: dict=None):
    logger.debug(f"ort sessconf: {sess_conf}")
    sess_options = ort.SessionOptions()
    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    if sess_conf.get('graph_optimization_level', 99) == 0:
        sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_DISABLE_ALL
    sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL

    # "gpu:0"
    providers = ['CPUExecutionProvider']
    if "gpu" in sess_conf.get("device", ""):
        device_id = int(sess_conf["device"].split(":")[1])
        providers = [('CUDAExecutionProvider', {'device_id': device_id})]

        # fastspeech2/mb_melgan can't use trt now!
        if sess_conf.get("use_trt", 0):
            providers = ['TensorrtExecutionProvider']
    logger.debug(f"ort providers: {providers}")

    if 'cpu_threads' in sess_conf:
        sess_options.intra_op_num_threads = sess_conf.get("cpu_threads", 0)
    else:
        sess_options.intra_op_num_threads = sess_conf.get(
            "intra_op_num_threads", 0)

    sess_options.inter_op_num_threads = sess_conf.get("inter_op_num_threads", 0)

    sess = ort.InferenceSession(
        model_path, providers=providers, sess_options=sess_options)
    return sess


================================================
FILE: paddlespeech/server/utils/paddle_predictor.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from typing import List
from typing import Optional

import paddle
from paddle.inference import Config
from paddle.inference import create_predictor


def init_predictor(model_dir: Optional[os.PathLike]=None,
                   model_file: Optional[os.PathLike]=None,
                   params_file: Optional[os.PathLike]=None,
                   predictor_conf: dict=None):
    """Create predictor with Paddle inference

    Args:
        model_dir (Optional[os.PathLike], optional): The path of the static model saved in the model layer. Defaults to None.
        model_file (Optional[os.PathLike], optional): *.pdmodel file path. Defaults to None.
        params_file (Optional[os.PathLike], optional): *.pdiparams file path.. Defaults to None.
        predictor_conf (dict, optional): The configuration parameters of predictor. Defaults to None.

    Returns:
        predictor (PaddleInferPredictor): created predictor
    """
    if model_dir is not None:
        assert os.path.isdir(model_dir), 'Please check model dir.'
        config = Config(args.model_dir)
    else:
        assert os.path.isfile(model_file) and os.path.isfile(
            params_file), 'Please check model and parameter files.'
        config = Config(model_file, params_file)

    # set device
    if predictor_conf["device"]:
        device = predictor_conf["device"]
    else:
        device = paddle.get_device()
    if "gpu" in device:
        gpu_id = device.split(":")[-1]
        config.enable_use_gpu(1000, int(gpu_id))

    # IR optim
    if predictor_conf["switch_ir_optim"]:
        config.switch_ir_optim()

    # glog
    if not predictor_conf["glog_info"]:
        config.disable_glog_info()

    # config summary
    if predictor_conf["summary"]:
        print(config.summary())

    # memory optim
    config.enable_memory_optim()

    predictor = create_predictor(config)
    return predictor


def run_model(predictor, input: List) -> List:
    """ run predictor

    Args:
        predictor: paddle inference predictor
        input (list): The input of predictor

    Returns:
        list: result list
    """
    input_names = predictor.get_input_names()
    for i, name in enumerate(input_names):
        input_handle = predictor.get_input_handle(name)
        input_handle.copy_from_cpu(input[i])
    # do the inference
    predictor.run()
    results = []
    # get out data from output tensor
    output_names = predictor.get_output_names()
    for i, name in enumerate(output_names):
        output_handle = predictor.get_output_handle(name)
        output_data = output_handle.copy_to_cpu()
        results.append(output_data)

    return results


================================================
FILE: paddlespeech/server/utils/util.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the 
import base64
import math

from paddlespeech.cli.log import logger


def wav2base64(wav_file: str):
    """
    read wave file and covert to base64 string
    """
    with open(wav_file, 'rb') as f:
        base64_bytes = base64.b64encode(f.read())
        base64_string = base64_bytes.decode('utf-8')
    return base64_string


def base64towav(base64_string: str):
    pass


def self_check():
    """ self check resource
    """
    return True


def denorm(data, mean, std):
    """stream am model need to denorm
    """
    return data * std + mean


def get_chunks(data, block_size, pad_size, step):
    """Divide data into multiple chunks

    Args:
        data (tensor): data
        block_size (int): [description]
        pad_size (int): [description]
        step (str): set "am" or "voc", generate chunk for step am or vocoder(voc)

    Returns:
        list: chunks list
    """

    if block_size == -1:
        return [data]

    if step == "am":
        data_len = data.shape[1]
    elif step == "voc":
        data_len = data.shape[0]
    else:
        logger.error("Please set correct type to get chunks, am or voc")

    chunks = []
    n = math.ceil(data_len / block_size)
    for i in range(n):
        start = max(0, i * block_size - pad_size)
        end = min((i + 1) * block_size + pad_size, data_len)
        if step == "am":
            chunks.append(data[:, start:end, :])
        elif step == "voc":
            chunks.append(data[start:end, :])
        else:
            logger.error("Please set correct type to get chunks, am or voc")
    return chunks


def compute_delay(receive_time_list, chunk_duration_list):
    """compute delay 
        Args:
            receive_time_list (list): Time to receive each packet
            chunk_duration_list (list): The audio duration corresponding to each packet
        Returns:
            [list]: Delay time list
        """
    assert (len(receive_time_list) == len(chunk_duration_list))
    delay_time_list = []
    play_time = receive_time_list[0] + chunk_duration_list[0]
    for i in range(1, len(receive_time_list)):
        receive_time = receive_time_list[i]
        delay_time = receive_time - play_time
        # 有延迟
        if delay_time > 0:
            play_time = play_time + delay_time + chunk_duration_list[i]
            delay_time_list.append(delay_time)
        # 没有延迟
        else:
            play_time = play_time + chunk_duration_list[i]

    return delay_time_list


def count_engine(logfile: str="./nohup.out"):
    """For inference on the statistical engine side
    Args:
        logfile (str, optional): server log. Defaults to "./nohup.out".
    """
    first_response_list = []
    final_response_list = []
    duration_list = []

    with open(logfile, "r") as f:
        for line in f.readlines():
            if "- first response time:" in line:
                first_response = float(line.splie(" ")[-2])
                first_response_list.append(first_response)
            elif "- final response time:" in line:
                final_response = float(line.splie(" ")[-2])
                final_response_list.append(final_response)
            elif "- The durations of audio is:" in line:
                duration = float(line.splie(" ")[-2])
                duration_list.append(duration)

    assert (len(first_response_list) == len(final_response_list) and
            len(final_response_list) == len(duration_list))

    avg_first_response = sum(first_response_list) / len(first_response_list)
    avg_final_response = sum(final_response_list) / len(final_response_list)
    avg_duration = sum(duration_list) / len(duration_list)
    RTF = sum(final_response_list) / sum(duration_list)

    print(
        "************************* engine result ***************************************"
    )
    print(
        f"test num: {len(duration_list)}, avg first response: {avg_first_response} s, avg final response: {avg_final_response} s, avg duration: {avg_duration}, RTF: {RTF}"
    )
    print(
        f"min duration: {min(duration_list)} s, max duration: {max(duration_list)} s"
    )
    print(
        f"max first response: {max(first_response_list)} s, min first response: {min(first_response_list)} s"
    )
    print(
        f"max final response: {max(final_response_list)} s, min final response: {min(final_response_list)} s"
    )


================================================
FILE: paddlespeech/server/utils/vad.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections

import webrtcvad


class VADAudio():
    def __init__(self,
                 aggressiveness=2,
                 rate=16000,
                 frame_duration_ms=20,
                 sample_width=2,
                 padding_ms=200,
                 padding_ratio=0.9):
        """Initializes VAD with given aggressivenes and sets up internal queues"""
        self.vad = webrtcvad.Vad(aggressiveness)
        self.rate = rate
        self.sample_width = sample_width
        self.frame_duration_ms = frame_duration_ms
        self._frame_length = int(rate * (frame_duration_ms / 1000.0) *
                                 self.sample_width)
        self._buffer_queue = collections.deque()
        self.ring_buffer = collections.deque(maxlen=padding_ms //
                                             frame_duration_ms)
        self._ratio = padding_ratio
        self.triggered = False

    def add_audio(self, audio):
        """Adds new audio to internal queue"""
        for x in audio:
            self._buffer_queue.append(x)

    def frame_generator(self):
        """Generator that yields audio frames of frame_duration_ms"""
        while len(self._buffer_queue) > self._frame_length:
            frame = bytearray()
            for _ in range(self._frame_length):
                frame.append(self._buffer_queue.popleft())
            yield bytes(frame)

    def vad_collector(self):
        """Generator that yields series of consecutive audio frames comprising each utterence, separated by yielding a single None.
            Determines voice activity by ratio of frames in padding_ms. Uses a buffer to include padding_ms prior to being triggered.
            Example: (frame, ..., frame, None, frame, ..., frame, None, ...)
                      |---utterence---|        |---utterence---|
        """
        for frame in self.frame_generator():
            is_speech = self.vad.is_speech(frame, self.rate)
            if not self.triggered:
                self.ring_buffer.append((frame, is_speech))
                num_voiced = len(
                    [f for f, speech in self.ring_buffer if speech])
                if num_voiced > self._ratio * self.ring_buffer.maxlen:
                    self.triggered = True
                    for f, s in self.ring_buffer:
                        yield f
                    self.ring_buffer.clear()
            else:
                yield frame
                self.ring_buffer.append((frame, is_speech))
                num_unvoiced = len(
                    [f for f, speech in self.ring_buffer if not speech])
                if num_unvoiced > self._ratio * self.ring_buffer.maxlen:
                    self.triggered = False
                    yield None
                    self.ring_buffer.clear()


================================================
FILE: paddlespeech/server/ws/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/server/ws/api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List

from fastapi import APIRouter

from paddlespeech.server.ws.asr_api import router as asr_router
from paddlespeech.server.ws.tts_api import router as tts_router

_router = APIRouter()


def setup_router(api_list: List):
    """setup router for fastapi
    Args:
        api_list (List): [asr, tts]
    Returns:
        APIRouter
    """
    for api_name in api_list:
        if api_name == 'asr':
            _router.include_router(asr_router)
        elif api_name == 'tts':
            _router.include_router(tts_router)
        else:
            pass

    return _router


================================================
FILE: paddlespeech/server/ws/asr_api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json

from fastapi import APIRouter
from fastapi import WebSocket
from fastapi import WebSocketDisconnect
from starlette.websockets import WebSocketState as WebSocketState

from paddlespeech.cli.log import logger
from paddlespeech.server.engine.engine_pool import get_engine_pool
router = APIRouter()


@router.websocket('/paddlespeech/asr/streaming')
async def websocket_endpoint(websocket: WebSocket):
    """PaddleSpeech Online ASR Server api

    Args:
        websocket (WebSocket): the websocket instance
    """

    #1. the interface wait to accept the websocket protocol header
    #   and only we receive the header, it establish the connection with specific thread
    await websocket.accept()

    #2. if we accept the websocket headers, we will get the online asr engine instance
    engine_pool = get_engine_pool()
    asr_model = engine_pool['asr']

    #3. each websocket connection, we will create an PaddleASRConnectionHanddler to process such audio
    #   and each connection has its own connection instance to process the request
    #   and only if client send the start signal, we create the PaddleASRConnectionHanddler instance
    connection_handler = None

    try:
        #4. we do a loop to process the audio package by package according the protocol
        #   and only if the client send finished signal, we will break the loop
        while True:
            # careful here, changed the source code from starlette.websockets
            # 4.1 we wait for the client signal for the specific action
            assert websocket.application_state == WebSocketState.CONNECTED
            message = await websocket.receive()
            websocket._raise_on_disconnect(message)

            #4.2 text for the action command and bytes for pcm data
            if "text" in message:
                # we first parse the specific command
                message = json.loads(message["text"])
                if 'signal' not in message:
                    resp = {"status": "ok", "message": "no valid json data"}
                    await websocket.send_json(resp)

                # start command, we create the PaddleASRConnectionHanddler instance to process the audio data
                # end command, we process the all the last audio pcm and return the final result
                #              and we break the loop
                if message['signal'] == 'start':
                    resp = {"status": "ok", "signal": "server_ready"}
                    # do something at beginning here
                    # create the instance to process the audio
                    #connection_handler = PaddleASRConnectionHanddler(asr_model)
                    connection_handler = asr_model.new_handler()
                    await websocket.send_json(resp)
                elif message['signal'] == 'end':
                    # reset single  engine for an new connection
                    # and we will destroy the connection
                    connection_handler.decode(is_finished=True)
                    connection_handler.rescoring()
                    asr_results = connection_handler.get_result()
                    word_time_stamp = connection_handler.get_word_time_stamp()
                    connection_handler.reset()

                    resp = {
                        "status": "ok",
                        "signal": "finished",
                        'result': asr_results,
                        'times': word_time_stamp
                    }
                    await websocket.send_json(resp)
                    break
                else:
                    resp = {"status": "ok", "message": "no valid json data"}
                    await websocket.send_json(resp)

            elif "bytes" in message:
                # bytes for the pcm data
                message = message["bytes"]

                # we extract the remained audio pcm 
                # and decode for the result in this package data
                connection_handler.extract_feat(message)
                connection_handler.decode(is_finished=False)

                if connection_handler.endpoint_state:
                    logger.info("endpoint: detected and rescoring.")
                    connection_handler.rescoring()
                    word_time_stamp = connection_handler.get_word_time_stamp()

                asr_results = connection_handler.get_result()

                if connection_handler.endpoint_state:
                    if connection_handler.continuous_decoding:
                        logger.info("endpoint: continue decoding")
                        connection_handler.reset_continuous_decoding()
                    else:
                        logger.info("endpoint: exit decoding")
                        # ending by endpoint
                        resp = {
                            "status": "ok",
                            "signal": "finished",
                            'result': asr_results,
                            'times': word_time_stamp
                        }
                        await websocket.send_json(resp)
                        break

                # return the current partial result
                # if the engine create the vad instance, this connection will have many partial results 
                resp = {'result': asr_results}
                await websocket.send_json(resp)

    except WebSocketDisconnect as e:
        logger.error(e)


================================================
FILE: paddlespeech/server/ws/tts_api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import uuid

from fastapi import APIRouter
from fastapi import WebSocket
from starlette.websockets import WebSocketState as WebSocketState

from paddlespeech.cli.log import logger
from paddlespeech.server.engine.engine_pool import get_engine_pool

router = APIRouter()


@router.websocket('/paddlespeech/tts/streaming')
async def websocket_endpoint(websocket: WebSocket):
    """PaddleSpeech Online TTS Server api

    Args:
        websocket (WebSocket): the websocket instance
    """

    #1. the interface wait to accept the websocket protocol header
    #   and only we receive the header, it establish the connection with specific thread
    await websocket.accept()

    #2. if we accept the websocket headers, we will get the online tts engine instance
    engine_pool = get_engine_pool()
    tts_engine = engine_pool['tts']

    connection_handler = None

    if tts_engine.engine_type == "online":
        from paddlespeech.server.engine.tts.online.python.tts_engine import PaddleTTSConnectionHandler
    elif tts_engine.engine_type == "online-onnx":
        from paddlespeech.server.engine.tts.online.onnx.tts_engine import PaddleTTSConnectionHandler
    else:
        logger.error("Online tts engine only support online or online-onnx.")
        sys.exit(-1)

    try:
        while True:
            # careful here, changed the source code from starlette.websockets
            assert websocket.application_state == WebSocketState.CONNECTED
            message = await websocket.receive()
            websocket._raise_on_disconnect(message)
            message = json.loads(message["text"])

            if 'signal' in message:
                # start request
                if message['signal'] == 'start':
                    session = uuid.uuid1().hex
                    resp = {
                        "status": 0,
                        "signal": "server ready",
                        "session": session
                    }

                    connection_handler = PaddleTTSConnectionHandler(tts_engine)
                    await websocket.send_json(resp)

                # end request
                elif message['signal'] == 'end':
                    connection_handler = None
                    resp = {
                        "status": 0,
                        "signal": "connection will be closed",
                        "session": session
                    }
                    await websocket.send_json(resp)
                    break
                else:
                    resp = {"status": 0, "signal": "no valid json data"}
                    await websocket.send_json(resp)

            # speech synthesis request 
            elif 'text' in message:
                text = message["text"]
                spk_id = message["spk_id"]

                # run
                wav_generator = connection_handler.run(
                    sentence=text, spk_id=spk_id)

                while True:
                    try:
                        tts_results = next(wav_generator)
                        resp = {"status": 1, "audio": tts_results}
                        await websocket.send_json(resp)
                    except StopIteration as e:
                        resp = {"status": 2, "audio": ''}
                        await websocket.send_json(resp)
                        logger.info(
                            "Complete the synthesis of the audio streams")
                        break
                    except Exception as e:
                        resp = {"status": -1, "audio": ''}
                        await websocket.send_json(resp)
                        break

            else:
                logger.error(
                    "Invalid request, please check if the request is correct.")

    except Exception as e:
        logger.error(e)


@router.get("/paddlespeech/tts/streaming/samplerate")
def get_samplerate():
    try:
        engine_pool = get_engine_pool()
        tts_engine = engine_pool['tts']
        logger.info("Get tts engine successfully.")
        sample_rate = tts_engine.sample_rate

        response = {"sample_rate": sample_rate}

    except ServerBaseException as e:
        response = failed_response(e.error_code, e.msg)
    except BaseException:
        response = failed_response(ErrorCode.SERVER_UNKOWN_ERR)
        traceback.print_exc()

    return response


================================================
FILE: paddlespeech/t2s/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging

from . import datasets
from . import exps
from . import frontend
from . import models
from . import modules
from . import training
from . import utils


================================================
FILE: paddlespeech/t2s/assets/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/assets/csmsc_test.txt
================================================
009901 昨日，这名伤者与医生全部被警方依法刑事拘留。
009902 钱伟长想到上海来办学校是经过深思熟虑的。
009903 她见我一进门就骂，吃饭时也骂，骂得我抬不起头。
009904 李述德在离开之前，只说了一句柱驼杀父亲了。
009905 这种车票和保险单捆绑出售属于重复性购买。
009906 戴佩妮的男友西米露接唱情歌，让她非常开心。
009907 观大势，谋大局，出大策始终是该院的办院方针。
009908 他们骑着摩托回家，正好为农忙时的父母帮忙。
009909 但是因为还没到退休年龄，只能掰着指头捱日子。
009910 这几天雨水不断，人们恨不得待在家里不出门。
009911 没想到徐赟，张海翔两人就此玩起了人间蒸发。
009912 藤村此番发言可能是为了凸显野田的领导能力。
009913 程长庚，生在清王朝嘉庆年间，安徽的潜山小县。
009914 南海海域综合补给基地码头项目正在论证中。
009915 也就是说今晚成都市民极有可能再次看到飘雪。
009916 随着天气转热，各地的游泳场所开始人头攒动。
009917 更让徐先生纳闷的是，房客的手机也打不通了。
009918 遇到颠簸时，应听从乘务员的安全指令，回座位坐好。
009919 他在后面呆惯了，怕自己一插身后的人会不满，不敢排进去。
009920 傍晚七个小人回来了，白雪公主说，你们就是我命中的七个小矮人吧。
009921 他本想说，教育局管这个，他们是一路的，这样一管岂不是妓女起嫖客？
009922 一种表示商品所有权的财物证券，也称商品证券，如提货单，交货单。
009923 会有很丰富的东西留下来，说都说不完。
009924 这句话像从天而降，吓得四周一片寂静。
009925 记者所在的是受害人家属所在的右区。
009926 不管哈大爷去哪，它都一步不离地跟着。
009927 大家抬头望去，一只老鼠正趴在吊顶上。
009928 我决定过年就辞职，接手我爸的废品站！
009929 最终，中国男子乒乓球队获得此奖项。
009930 防汛抗旱两手抓，抗旱相对抓的不够。
009931 图们江下游地区开发开放的进展如何？
009932 这要求中国必须有一个坚强的政党领导。
009933 再说，关于利益上的事俺俩都不好开口。
009934 明代瓦剌，鞑靼入侵明境也是通过此地。
009935 咪咪舔着孩子，把它身上的毛舔干净。
009936 是否这次的国标修订被大企业绑架了？
009937 判决后，姚某妻子胡某不服，提起上诉。
009938 由此可以看出邯钢的经济效益来自何处。
009939 琳达说，是瑜伽改变了她和马儿的生活。
009940 楼下的保安告诉记者，这里不租也不卖。
009941 习近平说，中斯两国人民传统友谊深厚。
009942 传闻越来越多，后来连老汉儿自己都怕了。
009943 我怒吼一声冲上去，举起砖头砸了过去。
009944 我现在还不会，这就回去问问发明我的人。
009945 显然，洛阳性奴案不具备上述两个前提。
009946 另外，杰克逊有文唇线，眼线，眉毛的动作。
009947 昨晚，华西都市报记者电话采访了尹琪。
009948 涅拉季科未透露这些航空公司的名称。
009949 从运行轨迹上来说，它也不可能是星星。
009950 目前看，如果继续加息也存在两难问题。
009951 曾宝仪在节目录制现场大爆观众糗事。
009952 但任凭周某怎么叫，男子仍酣睡不醒。
009953 老大爷说，小子，你挡我财路了，知道不？
009954 没料到，闯下大头佛的阿伟还不知悔改。
009955 卡扎菲部落式统治已遭遇部落内讧。
009956 这个孩子的生命一半来源于另一位女士捐赠的冷冻卵子。
009957 出现这种泥鳅内阁的局面既是野田有意为之，也实属无奈。
009958 济青高速济南，华山，章丘，邹平，周村，淄博，临淄站。
009959 赵凌飞的话，反映了沈阳赛区所有奥运志愿者的共同心声。
009960 因为，我们所发出的力量必会因难度加大而减弱。
009961 发生事故的楼梯拐角处仍可看到血迹。
009962 想过进公安，可能身高不够，老汉儿也不让我进去。
009963 路上关卡很多，为了方便撤离，只好轻装前进。
009964 原来比尔盖茨就是美国微软公司联合创始人呀。
009965 之后他们一家三口将与双方父母往峇里岛旅游。
009966 谢谢总理，也感谢广大网友的参与，我们明年再见。
009967 事实上是，从来没有一个欺善怕恶的人能作出过稍大一点的成就。
009968 我会打开邮件，你可以从那里继续。
009969 美方对近期东海局势表示关切。
009970 据悉，奥巴马一家人对这座冬季白宫极为满意。
009971 打扫完你会很有成就感的，试一试，你就信了。
009972 诺曼站在滑板车上，各就各位，准备出发啦！
009973 塔河的寒夜，气温降到了零下三十多摄氏度。
009974 其间，连破六点六，六点五，六点四，六点三五等多个重要关口。
009975 算命其实只是人们的一种自我安慰和自我暗示而已，我们还是要相信科学才好。
009976 这一切都令人欢欣鼓舞，阿讷西没理由不坚持到最后。
009977 直至公元前一万一千年，它又再次出现。
009978 尽量少玩电脑，少看电视，少打游戏。
009979 从五到七，前后也就是六个月的时间。
009980 一进咖啡店，他就遇见一张熟悉的脸。
009981 好在众弟兄看到了把她追了回来。
009982 有一个人说，哥们儿我们跑过它才能活。
009983 捅了她以后，模糊记得她没咋动了。
009984 从小到大，葛启义没有收到过压岁钱。
009985 舞台下的你会对舞台上的你说什么？
009986 但考生普遍认为，试题的怪多过难。
009987 我希望每个人都能够尊重我们的隐私。
009988 漫天的红霞使劲给两人增添气氛。
009989 晚上加完班开车回家，太累了，迷迷糊糊开着车，走一半的时候，铛一声！
009990 该车将三人撞倒后，在大雾中逃窜。
009991 这人一哆嗦，方向盘也把不稳了，差点撞上了高速边道护栏。
009992 那女孩儿委屈的说，我一回头见你已经进去了我不敢进去啊！
009993 小明摇摇头说，不是，我只是美女看多了，想换个口味而已。
009994 接下来，红娘要求记者交费，记者表示不知表姐身份证号码。
009995 李东蓊表示，自己当时在法庭上发表了一次独特的公诉意见。
009996 另一男子扑了上来，手里拿着明晃晃的长刀，向他胸口直刺。
009997 今天，快递员拿着一个快递在办公室喊，秦王是哪个，有他快递？
009998 这场抗议活动究竟是如何发展演变的，又究竟是谁伤害了谁？
009999 因华国锋肖鸡，墓地设计根据其属相设计。
010000 在狱中，张明宝悔恨交加，写了一份忏悔书。


================================================
FILE: paddlespeech/t2s/assets/sentences.txt
================================================
001 凯莫瑞安联合体的经济崩溃，迫在眉睫。
002 对于所有想要离开那片废土，去寻找更美好生活的人来说。
003 克哈，是你们所有人安全的港湾。
004 为了保护尤摩扬人民不受异虫的残害，我所做的，比他们自己的领导委员会都多。
005 无论他们如何诽谤我，我将继续为所有泰伦人的最大利益，而努力奋斗。
006 身为你们的元首，我带领泰伦人实现了人类统治领地和经济的扩张。
007 我们将继续成长，用行动回击那些只会说风凉话，不愿意和我们相向而行的害群之马。
008 帝国武装力量，无数的优秀儿女，正时刻守卫着我们的家园大门，但是他们孤木难支。
009 凡是今天应征入伍者，所获的所有刑罚罪责，减半。
010 激进分子和异见者希望你们一听见枪声，就背弃多年的和平与繁荣。
011 他们没有勇气和能力，带领人类穿越一个充满危险的星系。
012 法治是我们的命脉，然而它却受到前所未有的挑战。
013 我将恢复我们帝国的荣光，绝不会向任何外星势力低头。
014 我已经驯服了异虫，荡平了星灵。如今它们的创造者，想要夺走我们拥有的一切。
015 永远记住，谁才是最能保护你们的人。
016 不要听信别人的谗言，我不是什么克隆人。

================================================
FILE: paddlespeech/t2s/assets/sentences_canton.txt
================================================
001 白云山爬过一次嘅，好远啊，爬上去都成两个钟
002 睇书咯，番屋企，而家好多人好少睇书噶喎
003 因为如果唔考试嘅话，工资好低噶
004 冇固定噶，你中意休边日就边日噶
005 即系太迟嘅话咧，落班太迟嘅话就喺出边食啲咯
006 是非有公理，慎言莫冒犯别人
007 遇上冷风雨，休太认真
008 痴线蜘蛛条蜘蛛丝痴住枝树枝
009 一蚊一斤鸡，一蚊一斤龟，究竟係鸡贵定係龟贵
010 错就要认，打要企定
011 宜家唔系事必要你讲，但系你所讲嘅说话将会成为呈堂证供
012 人生有几多个十年，不如活得痛快
013 嘢可以乱食，话唔可以乱讲
014 你唔好噉心急入市先喇，淡淡定，有钱剩，睇定啲先再决定喇
015 仔，你唔好喺度搞搞震，冇帮衬喇
016 米话我地人穷就要任人踩，滴水都会成流水浸街
017 佢晨早啪奶茶，同场追加奶绿，又狂怼西米露，喫啫啫猪脚煲
018 喂！三点几嚟，饮茶先啦，做咁多都冇用嘅，老细唔锡你嘅嚟
019 嗱嗱声即刻走去搵嘢做，人必须知道自己嘅用途
020 人人都揸住枝苏格兰场非工业用国际线路自动溶雪16哇佬风油軚垂直升降镭射彩色洗衣干衣气垫毛笔一枝
021 各个国家有各个国家嘅国歌

================================================
FILE: paddlespeech/t2s/assets/sentences_en.txt
================================================
001 Life was like a box of chocolates, you never know what you're gonna get.
002 With great power there must come great responsibility.
003 To be or not to be, that’s a question.
004 A man can be destroyed but not defeated
005 Do not, for one repulse, give up the purpose that you resolved to effort.
006 Death is just a part of life, something we're all destined to do.
007 I think it's hard winning a war with words. 
008 Don’t argue with the people of strong determination, because they may change the fact!
009 Love you three thousand times.

================================================
FILE: paddlespeech/t2s/assets/sentences_mix.txt
================================================
001 你好，欢迎使用 Paddle Speech 中英文混合 T T S 功能，开始你的合成之旅吧!
002 我们的声学模型使用了 Fast Speech Two, 声码器使用了 Parallel Wave GAN and Hifi GAN.
003 Paddle N L P 发布 ERNIE Tiny 全系列中文预训练小模型，快速提升预训练模型部署效率，通用信息抽取技术 U I E Tiny 系列模型全新升级，支持速度更快效果更好的 U I E 小模型。
004 Paddle Speech 发布 P P A S R 流式语音识别系统、P P T T S 流式语音合成系统、P P V P R 全链路声纹识别系统。
005 Paddle Bo Bo: 使用 Paddle Speech 的语音合成模块生成虚拟人的声音。
006 热烈欢迎您在 Discussions 中提交问题，并在 Issues 中指出发现的 bug。此外，我们非常希望您参与到 Paddle Speech 的开发中！
007 我喜欢 eat apple, 你喜欢 drink milk。
008 我们要去云南 team building, 非常非常 happy.
009 AI for Sceience 平台。

================================================
FILE: paddlespeech/t2s/assets/sentences_sing.txt
================================================
{"utt_id": "2093003457", "input_type": "word", "text": "小酒窝长睫毛AP是你最美的记号", "notes": "C#4/Db4 | F#4/Gb4 | G#4/Ab4 | A#4/Bb4 F#4/Gb4 | F#4/Gb4 C#4/Db4 | C#4/Db4 | rest | C#4/Db4 | A#4/Bb4 | G#4/Ab4 | A#4/Bb4 | G#4/Ab4 | F4 | C#4/Db4", "note_durs": "0.407140 | 0.376190 | 0.242180 | 0.509550 0.183420 | 0.315400 0.235020 | 0.361660 | 0.223070 | 0.377270 | 0.340550 | 0.299620 | 0.344510 | 0.283770 | 0.323390 | 0.360340"}
{"utt_id": "2093003458", "input_type": "phoneme", "phones": "w o m ei t ian sh ui ui b u u zh ao AP x iang n ian n i d e w ei x iao iao AP" , "notes": "C#4/Db4 C#4/Db4 D#4/Eb4 D#4/Eb4 F4 F4 F#4/Gb4 F#4/Gb4 D#4/Eb4 D#4/Eb4 D#4/Eb4 A#3/Bb3 A#3/Bb3 A#3/Bb3 rest F#4/Gb4 F#4/Gb4 F4 F4 F#4/Gb4 F#4/Gb4 F4 F4 G#4/Ab4 G#4/Ab4 D#4/Eb4 D#4/Eb4 C#4/Db4 rest", "note_durs": "0.221750 0.221750 0.414460 0.414460 0.223160 0.223160 0.430900 0.430900 0.335990 0.269270 0.269270 0.289060 0.522690 0.522690 0.355060 0.397130 0.397130 0.247690 0.247690 0.406720 0.406720 0.246830 0.246830 0.307540 0.307540 0.429910 0.429910 0.519130 0.342300", "is_slurs": "0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0"}

================================================
FILE: paddlespeech/t2s/assets/sentences_ssml.txt
================================================
0001 考古人员<speak>西<say-as pinyin='zang4'>藏</say-as>布达拉宫里发现一个被隐<say-as pinyin="cang2">藏</say-as>的装有宝<say-as pinyin="zang4">藏</say-as></speak>箱子。
0002 <speak>有人询问中国银<say-as pinyin='hang2'>行</say-as>北京分<say-as pinyin='hang2 hang2'>行行</say-as>长是否叫任我<say-as pinyin='xing2'>行</say-as></speak>。
0003 <speak>市委书记亲自<say-as pinyin='shuai4'>率</say-as>领审计员对这家公司进行财务审计，发现企业的利润<say-as pinyin='lv4'>率</say-as>数据虚假</speak>。
0004 <speak>学生们对代<say-as pinyin='shu4'>数</say-as>理解不深刻，特别是小<say-as pinyin='shu4'>数</say-as>点，在<say-as pinyin='shu3 shu4'>数数</say-as>时容易弄错</speak>。
0005 <speak>赵<say-as pinyin='chang2'>长</say-as>军从小学习武术，擅<say-as pinyin='chang2'>长</say-as>散打，<say-as pinyin='zhang3'>长</say-as>大后参军，担任连<say-as pinyin='zhang3'>长</say-as></speak>。
0006 <speak>我说她<say-as pinyin='zhang3'>涨</say-as>了工资，她就<say-as pinyin='zhang4'>涨</say-as>红着脸，摇头否认</speak>。
0007 <speak>请把这封信交<say-as pinyin='gei3'>给</say-as>团长，告诉他，前线的供<say-as pinyin='ji3'>给</say-as>一定要有保障</speak>。
0008 <speak>矿下的<say-as pinyin='hang4'>巷</say-as>道，与北京四合院的小<say-as pinyin='xiang4'>巷</say-as>有点相似</speak>。
0009 <speak>他常叹自己命<say-as pinyin='bo2'>薄</say-as>,几亩<say-as pinyin='bao2'>薄</say-as>田，种点<say-as pinyin='bo4'>薄</say-as>荷</speak>。
0010 <speak>小明对天相很有研究，在<say-as pinyin='su4'>宿</say-as>舍说了一<say-as pinyin='xiu3'>宿</say-as>有关星<say-as pinyin='xiu4'>宿</say-as>的常识</speak>。

================================================
FILE: paddlespeech/t2s/audio/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .audio import AudioProcessor
from .codec import *
from .spec_normalizer import LogMagnitude
from .spec_normalizer import NormalizerBase


================================================
FILE: paddlespeech/t2s/audio/audio.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import librosa
import numpy as np
import soundfile as sf

__all__ = ["AudioProcessor"]


class AudioProcessor(object):
    def __init__(self,
                 sample_rate: int,
                 n_fft: int,
                 win_length: int,
                 hop_length: int,
                 n_mels: int=80,
                 fmin: int=0,
                 fmax: int=None,
                 window="hann",
                 center=True,
                 pad_mode="reflect",
                 normalize=True):
        # read & write
        self.sample_rate = sample_rate
        self.normalize = normalize

        # stft
        self.n_fft = n_fft
        self.win_length = win_length
        self.hop_length = hop_length
        self.window = window
        self.center = center
        self.pad_mode = pad_mode

        # mel
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax

        self.mel_filter = self._create_mel_filter()
        self.inv_mel_filter = np.linalg.pinv(self.mel_filter)

    def _create_mel_filter(self):
        mel_filter = librosa.filters.mel(
            sr=self.sample_rate,
            n_fft=self.n_fft,
            n_mels=self.n_mels,
            fmin=self.fmin,
            fmax=self.fmax)
        return mel_filter

    def read_wav(self, filename):
        # resampling may occur
        wav, _ = librosa.load(filename, sr=self.sample_rate)

        # normalize the volume
        if self.normalize:
            wav = wav / np.max(np.abs(wav)) * 0.999
        return wav

    def write_wav(self, path, wav):
        sf.write(path, wav, samplerate=self.sample_rate)

    def stft(self, wav):
        D = librosa.core.stft(
            wav,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=self.win_length,
            window=self.window,
            center=self.center,
            pad_mode=self.pad_mode)
        return D

    def istft(self, D):
        wav = librosa.core.istft(
            D,
            hop_length=self.hop_length,
            win_length=self.win_length,
            window=self.window,
            center=self.center)
        return wav

    def spectrogram(self, wav):
        D = self.stft(wav)
        return np.abs(D)

    def mel_spectrogram(self, wav):
        S = self.spectrogram(wav)
        mel = np.dot(self.mel_filter, S)
        return mel


================================================
FILE: paddlespeech/t2s/audio/codec.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math

import numpy as np
import paddle


# x: [0: 2**bit-1], return: [-1, 1]
def label_2_float(x, bits):
    return 2 * x / (2**bits - 1.) - 1.


#x: [-1, 1], return: [0, 2**bits-1]
def float_2_label(x, bits):
    assert abs(x).max() <= 1.0
    x = (x + 1.) * (2**bits - 1) / 2
    return x.clip(0, 2**bits - 1)


# y: [-1, 1], mu: 2**bits, return: [0, 2**bits-1]
# see https://en.wikipedia.org/wiki/%CE%9C-law_algorithm
# be careful the input `mu` here, which is +1 than that of the link above
def encode_mu_law(x, mu):
    mu = mu - 1
    fx = np.sign(x) * np.log(1 + mu * np.abs(x)) / np.log(1 + mu)
    return np.floor((fx + 1) / 2 * mu + 0.5)


# from_labels = True:
# y: [0: 2**bit-1], mu: 2**bits, return: [-1,1]
# from_labels = False:
# y: [-1, 1], return: [-1, 1]
def decode_mu_law(y, mu, from_labels=True):
    # TODO: get rid of log2 - makes no sense
    if from_labels:
        y = label_2_float(y, math.log2(mu))
    mu = mu - 1
    x = paddle.sign(y) / mu * ((1 + mu)**paddle.abs(y) - 1)
    return x


================================================
FILE: paddlespeech/t2s/audio/spec_normalizer.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This modules contains normalizers for spectrogram magnitude.
Normalizers are invertible transformations. They can be used to process 
magnitude of spectrogram before training and can also be used to recover from 
the generated spectrogram so as to be used with vocoders like griffin lim.

The base class describe the interface. `transform` is used to perform 
transformation and `inverse` is used to perform the inverse transformation.

check issues:
https://github.com/mozilla/TTS/issues/377
"""
import numpy as np

__all__ = ["NormalizerBase", "LogMagnitude", "UnitMagnitude"]


class NormalizerBase(object):
    def transform(self, spec):
        raise NotImplementedError("transform must be implemented")

    def inverse(self, normalized):
        raise NotImplementedError("inverse must be implemented")


class LogMagnitude(NormalizerBase):
    """
    This is a simple normalizer used in Waveglow, Waveflow, tacotron2...
    """

    def __init__(self, min=1e-5):
        self.min = min

    def transform(self, x):
        x = np.maximum(x, self.min)
        x = np.log(x)
        return x

    def inverse(self, x):
        return np.exp(x)


class UnitMagnitude(NormalizerBase):
    # dbscale and (0, 1) normalization
    """
    This is the normalizer used in the 
    """

    def __init__(self, min=1e-5):
        self.min = min

    def transform(self, x):
        db_scale = 20 * np.log10(np.maximum(self.min, x)) - 20
        normalized = (db_scale + 100) / 100
        clipped = np.clip(normalized, 0, 1)
        return clipped

    def inverse(self, x):
        denormalized = np.clip(x, 0, 1) * 100 - 100
        out = np.exp((denormalized + 20) / 20 * np.log(10))
        return out


================================================
FILE: paddlespeech/t2s/datasets/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .ljspeech import *


================================================
FILE: paddlespeech/t2s/datasets/am_batch_fn.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle

from paddlespeech.t2s.datasets.batch import batch_sequences
from paddlespeech.t2s.modules.nets_utils import get_seg_pos
from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask
from paddlespeech.t2s.modules.nets_utils import phones_masking
from paddlespeech.t2s.modules.nets_utils import phones_text_masking


# 因为要传参数，所以需要额外构建
def build_erniesat_collate_fn(mlm_prob: float=0.8,
                              mean_phn_span: int=8,
                              seg_emb: bool=False,
                              text_masking: bool=False):

    return ErnieSATCollateFn(
        mlm_prob=mlm_prob,
        mean_phn_span=mean_phn_span,
        seg_emb=seg_emb,
        text_masking=text_masking)


class ErnieSATCollateFn:
    """Functor class of common_collate_fn()"""

    def __init__(self,
                 mlm_prob: float=0.8,
                 mean_phn_span: int=8,
                 seg_emb: bool=False,
                 text_masking: bool=False):
        self.mlm_prob = mlm_prob
        self.mean_phn_span = mean_phn_span
        self.seg_emb = seg_emb
        self.text_masking = text_masking

    def __call__(self, exmaples):
        return erniesat_batch_fn(
            exmaples,
            mlm_prob=self.mlm_prob,
            mean_phn_span=self.mean_phn_span,
            seg_emb=self.seg_emb,
            text_masking=self.text_masking)


def erniesat_batch_fn(examples,
                      mlm_prob: float=0.8,
                      mean_phn_span: int=8,
                      seg_emb: bool=False,
                      text_masking: bool=False):
    # fields = ["text", "text_lengths", "speech", "speech_lengths", "align_start", "align_end"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    speech = [np.array(item["speech"], dtype=np.float32) for item in examples]

    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    speech_lengths = [
        np.array(item["speech_lengths"], dtype=np.int64) for item in examples
    ]

    align_start = [
        np.array(item["align_start"], dtype=np.int64) for item in examples
    ]

    align_end = [
        np.array(item["align_end"], dtype=np.int64) for item in examples
    ]

    align_start_lengths = [
        np.array(len(item["align_start"]), dtype=np.int64) for item in examples
    ]

    # add_pad
    text = batch_sequences(text)
    speech = batch_sequences(speech)
    align_start = batch_sequences(align_start)
    align_end = batch_sequences(align_end)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    speech = paddle.to_tensor(speech)
    text_lengths = paddle.to_tensor(text_lengths)
    speech_lengths = paddle.to_tensor(speech_lengths)
    align_start_lengths = paddle.to_tensor(align_start_lengths)

    speech_pad = speech
    text_pad = text

    text_mask = make_non_pad_mask(
        text_lengths, text_pad, length_dim=1).unsqueeze(-2)
    speech_mask = make_non_pad_mask(
        speech_lengths, speech_pad[:, :, 0], length_dim=1).unsqueeze(-2)

    # for training
    span_bdy = None
    # for inference
    if 'span_bdy' in examples[0].keys():
        span_bdy = [
            np.array(item["span_bdy"], dtype=np.int64) for item in examples
        ]
        span_bdy = paddle.to_tensor(span_bdy)

    # dual_mask 的是混合中英时候同时 mask 语音和文本
    # ernie sat 在实现跨语言的时候都 mask 了
    if text_masking:
        masked_pos, text_masked_pos = phones_text_masking(
            xs_pad=speech_pad,
            src_mask=speech_mask,
            text_pad=text_pad,
            text_mask=text_mask,
            align_start=align_start,
            align_end=align_end,
            align_start_lens=align_start_lengths,
            mlm_prob=mlm_prob,
            mean_phn_span=mean_phn_span,
            span_bdy=span_bdy)
    # 训练纯中文和纯英文的 -> a3t 没有对 phoneme 做 mask, 只对语音 mask 了
    # a3t 和 ernie sat 的区别主要在于做 mask 的时候
    else:
        masked_pos = phones_masking(
            xs_pad=speech_pad,
            src_mask=speech_mask,
            align_start=align_start,
            align_end=align_end,
            align_start_lens=align_start_lengths,
            mlm_prob=mlm_prob,
            mean_phn_span=mean_phn_span,
            span_bdy=span_bdy)
        text_masked_pos = paddle.zeros(paddle.shape(text_pad))

    speech_seg_pos, text_seg_pos = get_seg_pos(
        speech_pad=speech_pad,
        text_pad=text_pad,
        align_start=align_start,
        align_end=align_end,
        align_start_lens=align_start_lengths,
        seg_emb=seg_emb)

    batch = {
        "text": text,
        "speech": speech,
        # need to generate
        "masked_pos": masked_pos,
        "speech_mask": speech_mask,
        "text_mask": text_mask,
        "speech_seg_pos": speech_seg_pos,
        "text_seg_pos": text_seg_pos,
        "text_masked_pos": text_masked_pos
    }

    return batch


def tacotron2_single_spk_batch_fn(examples):
    # fields = ["text", "text_lengths", "speech", "speech_lengths"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    speech = [np.array(item["speech"], dtype=np.float32) for item in examples]
    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    speech_lengths = [
        np.array(item["speech_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    speech = batch_sequences(speech)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    speech = paddle.to_tensor(speech)
    text_lengths = paddle.to_tensor(text_lengths)
    speech_lengths = paddle.to_tensor(speech_lengths)

    batch = {
        "text": text,
        "text_lengths": text_lengths,
        "speech": speech,
        "speech_lengths": speech_lengths,
    }
    return batch


def tacotron2_multi_spk_batch_fn(examples):
    # fields = ["text", "text_lengths", "speech", "speech_lengths"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    speech = [np.array(item["speech"], dtype=np.float32) for item in examples]
    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    speech_lengths = [
        np.array(item["speech_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    speech = batch_sequences(speech)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    speech = paddle.to_tensor(speech)
    text_lengths = paddle.to_tensor(text_lengths)
    speech_lengths = paddle.to_tensor(speech_lengths)

    batch = {
        "text": text,
        "text_lengths": text_lengths,
        "speech": speech,
        "speech_lengths": speech_lengths,
    }
    # spk_emb has a higher priority than spk_id
    if "spk_emb" in examples[0]:
        spk_emb = [
            np.array(item["spk_emb"], dtype=np.float32) for item in examples
        ]
        spk_emb = batch_sequences(spk_emb)
        spk_emb = paddle.to_tensor(spk_emb)
        batch["spk_emb"] = spk_emb
    elif "spk_id" in examples[0]:
        spk_id = [np.array(item["spk_id"], dtype=np.int64) for item in examples]
        spk_id = paddle.to_tensor(spk_id)
        batch["spk_id"] = spk_id
    return batch


def speedyspeech_single_spk_batch_fn(examples):
    # fields = ["phones", "tones", "num_phones", "num_frames", "feats", "durations"]
    phones = [np.array(item["phones"], dtype=np.int64) for item in examples]
    tones = [np.array(item["tones"], dtype=np.int64) for item in examples]
    feats = [np.array(item["feats"], dtype=np.float32) for item in examples]
    durations = [
        np.array(item["durations"], dtype=np.int64) for item in examples
    ]
    num_phones = [
        np.array(item["num_phones"], dtype=np.int64) for item in examples
    ]
    num_frames = [
        np.array(item["num_frames"], dtype=np.int64) for item in examples
    ]

    phones = batch_sequences(phones)
    tones = batch_sequences(tones)
    feats = batch_sequences(feats)
    durations = batch_sequences(durations)

    # convert each batch to paddle.Tensor
    phones = paddle.to_tensor(phones)
    tones = paddle.to_tensor(tones)
    feats = paddle.to_tensor(feats)
    durations = paddle.to_tensor(durations)
    num_phones = paddle.to_tensor(num_phones)
    num_frames = paddle.to_tensor(num_frames)
    batch = {
        "phones": phones,
        "tones": tones,
        "num_phones": num_phones,
        "num_frames": num_frames,
        "feats": feats,
        "durations": durations,
    }
    return batch


def speedyspeech_multi_spk_batch_fn(examples):
    # fields = ["phones", "tones", "num_phones", "num_frames", "feats", "durations", "spk_id"]
    phones = [np.array(item["phones"], dtype=np.int64) for item in examples]
    tones = [np.array(item["tones"], dtype=np.int64) for item in examples]
    feats = [np.array(item["feats"], dtype=np.float32) for item in examples]
    durations = [
        np.array(item["durations"], dtype=np.int64) for item in examples
    ]
    num_phones = [
        np.array(item["num_phones"], dtype=np.int64) for item in examples
    ]
    num_frames = [
        np.array(item["num_frames"], dtype=np.int64) for item in examples
    ]

    phones = batch_sequences(phones)
    tones = batch_sequences(tones)
    feats = batch_sequences(feats)
    durations = batch_sequences(durations)

    # convert each batch to paddle.Tensor
    phones = paddle.to_tensor(phones)
    tones = paddle.to_tensor(tones)
    feats = paddle.to_tensor(feats)
    durations = paddle.to_tensor(durations)
    num_phones = paddle.to_tensor(num_phones)
    num_frames = paddle.to_tensor(num_frames)
    batch = {
        "phones": phones,
        "tones": tones,
        "num_phones": num_phones,
        "num_frames": num_frames,
        "feats": feats,
        "durations": durations,
    }
    if "spk_id" in examples[0]:
        spk_id = [np.array(item["spk_id"], dtype=np.int64) for item in examples]
        spk_id = paddle.to_tensor(spk_id)
        batch["spk_id"] = spk_id
    return batch


def fastspeech2_single_spk_batch_fn(examples):
    # fields = ["text", "text_lengths", "speech", "speech_lengths", "durations", "pitch", "energy"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    speech = [np.array(item["speech"], dtype=np.float32) for item in examples]
    pitch = [np.array(item["pitch"], dtype=np.float32) for item in examples]
    energy = [np.array(item["energy"], dtype=np.float32) for item in examples]
    durations = [
        np.array(item["durations"], dtype=np.int64) for item in examples
    ]

    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    speech_lengths = [
        np.array(item["speech_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    pitch = batch_sequences(pitch)
    speech = batch_sequences(speech)
    durations = batch_sequences(durations)
    energy = batch_sequences(energy)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    pitch = paddle.to_tensor(pitch)
    speech = paddle.to_tensor(speech)
    durations = paddle.to_tensor(durations)
    energy = paddle.to_tensor(energy)
    text_lengths = paddle.to_tensor(text_lengths)
    speech_lengths = paddle.to_tensor(speech_lengths)

    batch = {
        "text": text,
        "text_lengths": text_lengths,
        "durations": durations,
        "speech": speech,
        "speech_lengths": speech_lengths,
        "pitch": pitch,
        "energy": energy
    }
    return batch


def fastspeech2_multi_spk_batch_fn(examples):
    # fields = ["text", "text_lengths", "speech", "speech_lengths", "durations", "pitch", "energy", "spk_id"/"spk_emb"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    speech = [np.array(item["speech"], dtype=np.float32) for item in examples]
    pitch = [np.array(item["pitch"], dtype=np.float32) for item in examples]
    energy = [np.array(item["energy"], dtype=np.float32) for item in examples]
    durations = [
        np.array(item["durations"], dtype=np.int64) for item in examples
    ]
    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    speech_lengths = [
        np.array(item["speech_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    pitch = batch_sequences(pitch)
    speech = batch_sequences(speech)
    durations = batch_sequences(durations)
    energy = batch_sequences(energy)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    pitch = paddle.to_tensor(pitch)
    speech = paddle.to_tensor(speech)
    durations = paddle.to_tensor(durations)
    energy = paddle.to_tensor(energy)
    text_lengths = paddle.to_tensor(text_lengths)
    speech_lengths = paddle.to_tensor(speech_lengths)

    batch = {
        "text": text,
        "text_lengths": text_lengths,
        "durations": durations,
        "speech": speech,
        "speech_lengths": speech_lengths,
        "pitch": pitch,
        "energy": energy
    }
    # spk_emb has a higher priority than spk_id
    if "spk_emb" in examples[0]:
        spk_emb = [
            np.array(item["spk_emb"], dtype=np.float32) for item in examples
        ]
        spk_emb = batch_sequences(spk_emb)
        spk_emb = paddle.to_tensor(spk_emb)
        batch["spk_emb"] = spk_emb
    elif "spk_id" in examples[0]:
        spk_id = [np.array(item["spk_id"], dtype=np.int64) for item in examples]
        spk_id = paddle.to_tensor(spk_id)
        batch["spk_id"] = spk_id
    return batch


def diffsinger_single_spk_batch_fn(examples):
    # fields = ["text", "note", "note_dur", "is_slur", "text_lengths", \
    # "speech", "speech_lengths", "durations", "pitch", "energy"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    note = [np.array(item["note"], dtype=np.int64) for item in examples]
    note_dur = [
        np.array(item["note_dur"], dtype=np.float32) for item in examples
    ]
    is_slur = [np.array(item["is_slur"], dtype=np.int64) for item in examples]
    speech = [np.array(item["speech"], dtype=np.float32) for item in examples]
    pitch = [np.array(item["pitch"], dtype=np.float32) for item in examples]
    energy = [np.array(item["energy"], dtype=np.float32) for item in examples]
    durations = [
        np.array(item["durations"], dtype=np.int64) for item in examples
    ]

    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    speech_lengths = [
        np.array(item["speech_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    note = batch_sequences(note)
    note_dur = batch_sequences(note_dur)
    is_slur = batch_sequences(is_slur)
    pitch = batch_sequences(pitch)
    speech = batch_sequences(speech)
    durations = batch_sequences(durations)
    energy = batch_sequences(energy)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    note = paddle.to_tensor(note)
    note_dur = paddle.to_tensor(note_dur)
    is_slur = paddle.to_tensor(is_slur)
    pitch = paddle.to_tensor(pitch)
    speech = paddle.to_tensor(speech)
    durations = paddle.to_tensor(durations)
    energy = paddle.to_tensor(energy)
    text_lengths = paddle.to_tensor(text_lengths)
    speech_lengths = paddle.to_tensor(speech_lengths)

    batch = {
        "text": text,
        "note": note,
        "note_dur": note_dur,
        "is_slur": is_slur,
        "text_lengths": text_lengths,
        "durations": durations,
        "speech": speech,
        "speech_lengths": speech_lengths,
        "pitch": pitch,
        "energy": energy
    }
    return batch


def diffsinger_multi_spk_batch_fn(examples):
    # fields = ["text", "note", "note_dur", "is_slur", "text_lengths", "speech", \
    # "speech_lengths", "durations", "pitch", "energy", "spk_id"/"spk_emb"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    note = [np.array(item["note"], dtype=np.int64) for item in examples]
    note_dur = [
        np.array(item["note_dur"], dtype=np.float32) for item in examples
    ]
    is_slur = [np.array(item["is_slur"], dtype=np.int64) for item in examples]
    speech = [np.array(item["speech"], dtype=np.float32) for item in examples]
    pitch = [np.array(item["pitch"], dtype=np.float32) for item in examples]
    energy = [np.array(item["energy"], dtype=np.float32) for item in examples]
    durations = [
        np.array(item["durations"], dtype=np.int64) for item in examples
    ]
    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    speech_lengths = [
        np.array(item["speech_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    note = batch_sequences(note)
    note_dur = batch_sequences(note_dur)
    is_slur = batch_sequences(is_slur)
    pitch = batch_sequences(pitch)
    speech = batch_sequences(speech)
    durations = batch_sequences(durations)
    energy = batch_sequences(energy)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    note = paddle.to_tensor(note)
    note_dur = paddle.to_tensor(note_dur)
    is_slur = paddle.to_tensor(is_slur)
    pitch = paddle.to_tensor(pitch)
    speech = paddle.to_tensor(speech)
    durations = paddle.to_tensor(durations)
    energy = paddle.to_tensor(energy)
    text_lengths = paddle.to_tensor(text_lengths)
    speech_lengths = paddle.to_tensor(speech_lengths)

    batch = {
        "text": text,
        "note": note,
        "note_dur": note_dur,
        "is_slur": is_slur,
        "text_lengths": text_lengths,
        "durations": durations,
        "speech": speech,
        "speech_lengths": speech_lengths,
        "pitch": pitch,
        "energy": energy
    }
    # spk_emb has a higher priority than spk_id
    if "spk_emb" in examples[0]:
        spk_emb = [
            np.array(item["spk_emb"], dtype=np.float32) for item in examples
        ]
        spk_emb = batch_sequences(spk_emb)
        spk_emb = paddle.to_tensor(spk_emb)
        batch["spk_emb"] = spk_emb
    elif "spk_id" in examples[0]:
        spk_id = [np.array(item["spk_id"], dtype=np.int64) for item in examples]
        spk_id = paddle.to_tensor(spk_id)
        batch["spk_id"] = spk_id
    return batch


def transformer_single_spk_batch_fn(examples):
    # fields = ["text", "text_lengths", "speech", "speech_lengths"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    speech = [np.array(item["speech"], dtype=np.float32) for item in examples]
    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    speech_lengths = [
        np.array(item["speech_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    speech = batch_sequences(speech)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    speech = paddle.to_tensor(speech)
    text_lengths = paddle.to_tensor(text_lengths)
    speech_lengths = paddle.to_tensor(speech_lengths)

    batch = {
        "text": text,
        "text_lengths": text_lengths,
        "speech": speech,
        "speech_lengths": speech_lengths,
    }
    return batch


def vits_single_spk_batch_fn(examples):
    """
    Returns:
        Dict[str, Any]:
            - text (Tensor): Text index tensor (B, T_text).
            - text_lengths (Tensor): Text length tensor (B,).
            - feats (Tensor): Feature tensor (B, T_feats, aux_channels).
            - feats_lengths (Tensor): Feature length tensor (B,).
            - speech (Tensor): Speech waveform tensor (B, T_wav).

    """
    # fields = ["text", "text_lengths", "feats", "feats_lengths", "speech"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    feats = [np.array(item["feats"], dtype=np.float32) for item in examples]
    speech = [np.array(item["wave"], dtype=np.float32) for item in examples]
    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    feats_lengths = [
        np.array(item["feats_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    feats = batch_sequences(feats)
    speech = batch_sequences(speech)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    feats = paddle.to_tensor(feats)
    text_lengths = paddle.to_tensor(text_lengths)
    feats_lengths = paddle.to_tensor(feats_lengths)

    batch = {
        "text": text,
        "text_lengths": text_lengths,
        "feats": feats,
        "feats_lengths": feats_lengths,
        "speech": speech
    }
    return batch


def vits_multi_spk_batch_fn(examples):
    """
    Returns:
        Dict[str, Any]:
            - text (Tensor): Text index tensor (B, T_text).
            - text_lengths (Tensor): Text length tensor (B,).
            - feats (Tensor): Feature tensor (B, T_feats, aux_channels).
            - feats_lengths (Tensor): Feature length tensor (B,).
            - speech (Tensor): Speech waveform tensor (B, T_wav).
            - spk_id (Optional[Tensor]): Speaker index tensor (B,) or (B, 1).
            - spk_emb (Optional[Tensor]): Speaker embedding tensor (B, spk_embed_dim).
    """
    # fields = ["text", "text_lengths", "feats", "feats_lengths", "speech", "spk_id"/"spk_emb"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    feats = [np.array(item["feats"], dtype=np.float32) for item in examples]
    speech = [np.array(item["wave"], dtype=np.float32) for item in examples]
    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    feats_lengths = [
        np.array(item["feats_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    feats = batch_sequences(feats)
    speech = batch_sequences(speech)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    feats = paddle.to_tensor(feats)
    text_lengths = paddle.to_tensor(text_lengths)
    feats_lengths = paddle.to_tensor(feats_lengths)

    batch = {
        "text": text,
        "text_lengths": text_lengths,
        "feats": feats,
        "feats_lengths": feats_lengths,
        "speech": speech
    }
    # spk_emb has a higher priority than spk_id
    if "spk_emb" in examples[0]:
        spk_emb = [
            np.array(item["spk_emb"], dtype=np.float32) for item in examples
        ]
        spk_emb = batch_sequences(spk_emb)
        spk_emb = paddle.to_tensor(spk_emb)
        batch["spk_emb"] = spk_emb
    elif "spk_id" in examples[0]:
        spk_id = [np.array(item["spk_id"], dtype=np.int64) for item in examples]
        spk_id = paddle.to_tensor(spk_id)
        batch["spk_id"] = spk_id
    return batch


def jets_single_spk_batch_fn(examples):
    """
    Returns:
        Dict[str, Any]:
            - text (Tensor): Text index tensor (B, T_text).
            - text_lengths (Tensor): Text length tensor (B,).
            - feats (Tensor): Feature tensor (B, T_feats, aux_channels).
            - feats_lengths (Tensor): Feature length tensor (B,).
            - durations (Tensor): Feature tensor (B, T_text,).
            - durations_lengths (Tensor): Durations length tensor (B,).
            - pitch (Tensor): Feature tensor (B, pitch_length,).
            - energy (Tensor): Feature tensor (B, energy_length,).
            - speech (Tensor): Speech waveform tensor (B, T_wav).

    """
    # fields = ["text", "text_lengths", "feats", "feats_lengths", "durations", "pitch", "energy", "speech"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    feats = [np.array(item["feats"], dtype=np.float32) for item in examples]
    durations = [
        np.array(item["durations"], dtype=np.int64) for item in examples
    ]
    pitch = [np.array(item["pitch"], dtype=np.float32) for item in examples]
    energy = [np.array(item["energy"], dtype=np.float32) for item in examples]
    speech = [np.array(item["wave"], dtype=np.float32) for item in examples]

    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    feats_lengths = [
        np.array(item["feats_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    feats = batch_sequences(feats)
    durations = batch_sequences(durations)
    pitch = batch_sequences(pitch)
    energy = batch_sequences(energy)
    speech = batch_sequences(speech)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    feats = paddle.to_tensor(feats)
    durations = paddle.to_tensor(durations)
    pitch = paddle.to_tensor(pitch)
    energy = paddle.to_tensor(energy)
    text_lengths = paddle.to_tensor(text_lengths)
    feats_lengths = paddle.to_tensor(feats_lengths)

    batch = {
        "text": text,
        "text_lengths": text_lengths,
        "feats": feats,
        "feats_lengths": feats_lengths,
        "durations": durations,
        "durations_lengths": text_lengths,
        "pitch": pitch,
        "energy": energy,
        "speech": speech,
    }
    return batch


def jets_multi_spk_batch_fn(examples):
    """
    Returns:
        Dict[str, Any]:
            - text (Tensor): Text index tensor (B, T_text).
            - text_lengths (Tensor): Text length tensor (B,).
            - feats (Tensor): Feature tensor (B, T_feats, aux_channels).
            - feats_lengths (Tensor): Feature length tensor (B,).
            - durations (Tensor): Feature tensor (B, T_text,).
            - durations_lengths (Tensor): Durations length tensor (B,).
            - pitch (Tensor): Feature tensor (B, pitch_length,).
            - energy (Tensor): Feature tensor (B, energy_length,).
            - speech (Tensor): Speech waveform tensor (B, T_wav).
            - spk_id (Optional[Tensor]): Speaker index tensor (B,) or (B, 1).
            - spk_emb (Optional[Tensor]): Speaker embedding tensor (B, spk_embed_dim).
    """
    # fields = ["text", "text_lengths", "feats", "feats_lengths", "durations", "pitch", "energy", "speech", "spk_id"/"spk_emb"]
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    feats = [np.array(item["feats"], dtype=np.float32) for item in examples]
    durations = [
        np.array(item["durations"], dtype=np.int64) for item in examples
    ]
    pitch = [np.array(item["pitch"], dtype=np.float32) for item in examples]
    energy = [np.array(item["energy"], dtype=np.float32) for item in examples]
    speech = [np.array(item["wave"], dtype=np.float32) for item in examples]
    text_lengths = [
        np.array(item["text_lengths"], dtype=np.int64) for item in examples
    ]
    feats_lengths = [
        np.array(item["feats_lengths"], dtype=np.int64) for item in examples
    ]

    text = batch_sequences(text)
    feats = batch_sequences(feats)
    durations = batch_sequences(durations)
    pitch = batch_sequences(pitch)
    energy = batch_sequences(energy)
    speech = batch_sequences(speech)

    # convert each batch to paddle.Tensor
    text = paddle.to_tensor(text)
    feats = paddle.to_tensor(feats)
    durations = paddle.to_tensor(durations)
    pitch = paddle.to_tensor(pitch)
    energy = paddle.to_tensor(energy)
    text_lengths = paddle.to_tensor(text_lengths)
    feats_lengths = paddle.to_tensor(feats_lengths)

    batch = {
        "text": text,
        "text_lengths": text_lengths,
        "feats": feats,
        "feats_lengths": feats_lengths,
        "durations": durations,
        "durations_lengths": text_lengths,
        "pitch": pitch,
        "energy": energy,
        "speech": speech,
    }
    # spk_emb has a higher priority than spk_id
    if "spk_emb" in examples[0]:
        spk_emb = [
            np.array(item["spk_emb"], dtype=np.float32) for item in examples
        ]
        spk_emb = batch_sequences(spk_emb)
        spk_emb = paddle.to_tensor(spk_emb)
        batch["spk_emb"] = spk_emb
    elif "spk_id" in examples[0]:
        spk_id = [np.array(item["spk_id"], dtype=np.int64) for item in examples]
        spk_id = paddle.to_tensor(spk_id)
        batch["spk_id"] = spk_id
    return batch


# 因为要传参数，所以需要额外构建
def build_starganv2_vc_collate_fn(latent_dim: int=16, max_mel_length: int=192):

    return StarGANv2VCCollateFn(
        latent_dim=latent_dim, max_mel_length=max_mel_length)


class StarGANv2VCCollateFn:
    """Functor class of common_collate_fn()"""

    def __init__(self, latent_dim: int=16, max_mel_length: int=192):
        self.latent_dim = latent_dim
        self.max_mel_length = max_mel_length

    def random_clip(self, mel: np.array):
        # [T, 80]
        mel_length = mel.shape[0]
        if mel_length > self.max_mel_length:
            random_start = np.random.randint(0,
                                             mel_length - self.max_mel_length)

            mel = mel[random_start:random_start + self.max_mel_length, :]
        return mel

    def __call__(self, exmaples):
        return self.starganv2_vc_batch_fn(exmaples)

    def starganv2_vc_batch_fn(self, examples):
        batch_size = len(examples)

        label = [np.array(item["label"], dtype=np.int64) for item in examples]
        ref_label = [
            np.array(item["ref_label"], dtype=np.int64) for item in examples
        ]

        # 需要对 mel 进行裁剪
        mel = [self.random_clip(item["mel"]) for item in examples]
        ref_mel = [self.random_clip(item["ref_mel"]) for item in examples]
        ref_mel_2 = [self.random_clip(item["ref_mel_2"]) for item in examples]
        mel = batch_sequences(mel)
        ref_mel = batch_sequences(ref_mel)
        ref_mel_2 = batch_sequences(ref_mel_2)

        # convert each batch to paddle.Tensor
        # (B,)
        label = paddle.to_tensor(label)
        ref_label = paddle.to_tensor(ref_label)
        # [B, T, 80] -> [B, 1, 80, T]
        mel = paddle.to_tensor(mel).transpose([0, 2, 1]).unsqueeze(1)
        ref_mel = paddle.to_tensor(ref_mel).transpose([0, 2, 1]).unsqueeze(1)
        ref_mel_2 = paddle.to_tensor(ref_mel_2).transpose(
            [0, 2, 1]).unsqueeze(1)

        z_trg = paddle.randn([batch_size, self.latent_dim])
        z_trg2 = paddle.randn([batch_size, self.latent_dim])

        batch = {
            "x_real": mel,
            "y_org": label,
            "x_ref": ref_mel,
            "x_ref2": ref_mel_2,
            "y_trg": ref_label,
            "z_trg": z_trg,
            "z_trg2": z_trg2
        }

        return batch


# for PaddleSlim
def fastspeech2_single_spk_batch_fn_static(examples):
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    text = np.array(text)
    # do not need batch axis in infer
    text = text[0]
    batch = {
        "text": text,
    }
    return batch


def fastspeech2_multi_spk_batch_fn_static(examples):
    text = [np.array(item["text"], dtype=np.int64) for item in examples]
    text = np.array(text)
    text = text[0]
    batch = {
        "text": text,
    }
    if "spk_id" in examples[0]:
        spk_id = [np.array(item["spk_id"], dtype=np.int64) for item in examples]
        spk_id = np.array(spk_id)
        spk_id = spk_id[0]
        batch["spk_id"] = spk_id
    if "spk_emb" in examples[0]:
        spk_emb = [
            np.array(item["spk_emb"], dtype=np.float32) for item in examples
        ]
        spk_emb = np.array(spk_emb)
        spk_emb = spk_id[spk_emb]
        batch["spk_emb"] = spk_emb
    return batch


def speedyspeech_single_spk_batch_fn_static(examples):
    phones = [np.array(item["phones"], dtype=np.int64) for item in examples]
    tones = [np.array(item["tones"], dtype=np.int64) for item in examples]
    phones = np.array(phones)
    tones = np.array(tones)
    phones = phones[0]
    tones = tones[0]
    batch = {
        "phones": phones,
        "tones": tones,
    }
    return batch


def speedyspeech_multi_spk_batch_fn_static(examples):
    phones = [np.array(item["phones"], dtype=np.int64) for item in examples]
    tones = [np.array(item["tones"], dtype=np.int64) for item in examples]
    phones = np.array(phones)
    tones = np.array(tones)
    phones = phones[0]
    tones = tones[0]
    batch = {
        "phones": phones,
        "tones": tones,
    }
    if "spk_id" in examples[0]:
        spk_id = [np.array(item["spk_id"], dtype=np.int64) for item in examples]
        spk_id = np.array(spk_id)
        spk_id = spk_id[0]
        batch["spk_id"] = spk_id
    return batch


================================================
FILE: paddlespeech/t2s/datasets/batch.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Utility functions to create batch for arrays which satisfy some conditions.
Batch functions for text sequences, audio and spectrograms are provided.
"""
import numpy as np

__all__ = [
    "batch_text_id",
    "batch_wav",
    "batch_spec",
    "TextIDBatcher",
    "WavBatcher",
    "SpecBatcher",
]


class TextIDBatcher(object):
    """A wrapper class for `batch_text_id`."""

    def __init__(self, pad_id=0, dtype=np.int64):
        self.pad_id = pad_id
        self.dtype = dtype

    def __call__(self, minibatch):
        out = batch_text_id(minibatch, pad_id=self.pad_id, dtype=self.dtype)
        return out


def batch_text_id(minibatch, pad_id=0, dtype=np.int64):
    """Pad sequences to text_ids to the largest length and batch them.
    
    Args:
        minibatch (List[np.ndarray]): list of rank-1 arrays, shape(T,), dtype np.int64, text_ids.
        pad_id (int, optional): the id which correspond to the special pad token. Defaults to 0.
        dtype (np.dtype, optional): the data dtype of the output. Defaults to np.int64.

    Returns:
        np.ndarray: rank-2 array of text_ids, shape(B, T), B stands for batch_size, T stands for length. The output batch.
    """
    peek_example = minibatch[0]
    assert len(peek_example.shape) == 1, "text example is an 1D tensor"
    # assume (channel, n_samples) or (n_samples, )
    lengths = [example.shape[0] for example in minibatch]
    max_len = np.max(lengths)

    batch = []
    for example in minibatch:
        pad_len = max_len - example.shape[0]
        batch.append(
            np.pad(
                example, [(0, pad_len)],
                mode='constant',
                constant_values=pad_id))

    return np.array(batch, dtype=dtype), np.array(lengths, dtype=np.int64)


class WavBatcher(object):
    """A wrapper class for `batch_wav`."""

    def __init__(self, pad_value=0., dtype=np.float32):
        self.pad_value = pad_value
        self.dtype = dtype

    def __call__(self, minibatch):
        out = batch_wav(minibatch, pad_value=self.pad_value, dtype=self.dtype)
        return out


def batch_wav(minibatch, pad_value=0., dtype=np.float32):
    """pad audios to the largest length and batch them.

    Args:
        minibatch (List[np.ndarray]): list of rank-1 float arrays(mono-channel audio, shape(T,)), dtype float.
        pad_value (float, optional): the pad value. Defaults to 0..
        dtype (np.dtype, optional): the data type of the output. Defaults to np.float32.

    Returns:
        np.ndarray: shape(B, T), the output batch.
    """

    peek_example = minibatch[0]
    assert len(peek_example.shape) == 1, "we only handles mono-channel wav"

    # assume (channel, n_samples) or (n_samples, )
    lengths = [example.shape[-1] for example in minibatch]
    max_len = np.max(lengths)

    batch = []
    for example in minibatch:
        pad_len = max_len - example.shape[-1]
        batch.append(
            np.pad(
                example, [(0, pad_len)],
                mode='constant',
                constant_values=pad_value))
    return np.array(batch, dtype=dtype), np.array(lengths, dtype=np.int64)


class SpecBatcher(object):
    """A wrapper class for `batch_spec`"""

    def __init__(self, pad_value=0., time_major=False, dtype=np.float32):
        self.pad_value = pad_value
        self.dtype = dtype
        self.time_major = time_major

    def __call__(self, minibatch):
        out = batch_spec(
            minibatch,
            pad_value=self.pad_value,
            time_major=self.time_major,
            dtype=self.dtype)
        return out


def batch_spec(minibatch, pad_value=0., time_major=False, dtype=np.float32):
    """Pad spectra to the largest length and batch them.

    Args:
        minibatch (List[np.ndarray]): list of rank-2 arrays of shape(F, T) for mono-channel spectrograms, or list of rank-3 arrays of shape(C, F, T) for multi-channel spectrograms(F stands for frequency bands.), dtype float.
        pad_value (float, optional): the pad value. Defaults to 0..
        dtype (np.dtype, optional): data type of the output. Defaults to np.float32.

    Returns:
        np.ndarray: a rank-3 array of shape(B, F, T) or (B, T, F).
    """
    # assume (F, T) or (T, F)
    peek_example = minibatch[0]
    assert len(
        peek_example.shape) == 2, "we only handles mono channel spectrogram"

    # assume (F, n_frame) or (n_frame, F)
    time_idx = 0 if time_major else -1
    lengths = [example.shape[time_idx] for example in minibatch]
    max_len = np.max(lengths)

    batch = []
    for example in minibatch:
        pad_len = max_len - example.shape[time_idx]
        if time_major:
            batch.append(
                np.pad(
                    example, [(0, pad_len), (0, 0)],
                    mode='constant',
                    constant_values=pad_value))
        else:
            batch.append(
                np.pad(
                    example, [(0, 0), (0, pad_len)],
                    mode='constant',
                    constant_values=pad_value))
    return np.array(batch, dtype=dtype), np.array(lengths, dtype=np.int64)


def batch_sequences(sequences, axis=0, pad_value=0):
    seq = sequences[0]
    ndim = seq.ndim
    if axis < 0:
        axis += ndim
    dtype = seq.dtype
    pad_value = dtype.type(pad_value)
    seq_lengths = [seq.shape[axis] for seq in sequences]
    max_length = np.max(seq_lengths)

    padded_sequences = []
    for seq, length in zip(sequences, seq_lengths):
        padding = [(0, 0)] * axis + [(0, max_length - length)] + [(0, 0)] * (
            ndim - axis - 1)
        padded_seq = np.pad(
            seq, padding, mode='constant', constant_values=pad_value)
        padded_sequences.append(padded_seq)
    batch = np.stack(padded_sequences)
    return batch


================================================
FILE: paddlespeech/t2s/datasets/data_table.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
from multiprocessing import Manager
from typing import Any
from typing import Callable
from typing import Dict
from typing import List

import numpy as np
from paddle.io import Dataset


class DataTable(Dataset):
    """Dataset to load and convert data for general purpose.
    Args:
        data (List[Dict[str, Any]]): Metadata, a list of meta datum, each of which is composed of  several fields
        fields (List[str], optional): Fields to use, if not specified, all the fields in the data are used, by default None
        converters (Dict[str, Callable], optional): Converters used to process each field, by default None
        use_cache (bool, optional): Whether to use cache, by default False

    Raises:
        ValueError:
            If there is some field that does not exist in data. 
        ValueError:
            If there is some field in converters that does not exist in fields.
    """

    def __init__(self,
                 data: List[Dict[str, Any]],
                 fields: List[str]=None,
                 converters: Dict[str, Callable]=None,
                 use_cache: bool=False):
        # metadata
        self.data = data
        assert len(data) > 0, "This dataset has no examples"

        # peak an example to get existing fields.
        first_example = self.data[0]
        fields_in_data = first_example.keys()

        # check all the requested fields exist
        if fields is None:
            self.fields = fields_in_data
        else:
            for field in fields:
                if field not in fields_in_data:
                    raise ValueError(
                        f"The requested field ({field}) is not found"
                        f"in the data. Fields in the data is {fields_in_data}")
            self.fields = fields

        # check converters
        if converters is None:
            self.converters = {}
        else:
            for field in converters.keys():
                if field not in self.fields:
                    raise ValueError(
                        f"The converter has a non existing field ({field})")
            self.converters = converters

        self.use_cache = use_cache
        if use_cache:
            self._initialize_cache()

    def _initialize_cache(self):
        self.manager = Manager()
        self.caches = self.manager.list()
        self.caches += [None for _ in range(len(self))]

    def _get_metadata(self, idx: int) -> Dict[str, Any]:
        """Return a meta-datum given an index."""
        return self.data[idx]

    def _convert(self, meta_datum: Dict[str, Any]) -> Dict[str, Any]:
        """Convert a meta datum to an example by applying the corresponding 
        converters to each fields requested.

        Args:
            meta_datum (Dict[str, Any]): Meta datum

        Returns:
            Dict[str, Any]: Converted example
        """
        example = {}
        for field in self.fields:
            converter = self.converters.get(field, None)
            meta_datum_field = meta_datum[field]
            if converter is not None:
                converted_field = converter(meta_datum_field)
            else:
                converted_field = meta_datum_field
            example[field] = converted_field
        return example

    def __getitem__(self, idx: int) -> Dict[str, Any]:
        """Get an example given an index.
        Args:
            idx (int): Index of the example to get

        Returns:
            Dict[str, Any]: A converted example
        """
        if self.use_cache and self.caches[idx] is not None:
            return self.caches[idx]

        meta_datum = self._get_metadata(idx)
        example = self._convert(meta_datum)

        if self.use_cache:
            self.caches[idx] = example

        return example

    def __len__(self) -> int:
        """Returns the size of the dataset.

        Returns
        -------
        int
            The length of the dataset
        """
        return len(self.data)


class StarGANv2VCDataTable(DataTable):
    def __init__(self, data: List[Dict[str, Any]]):
        super().__init__(data)
        raw_data = data
        spk_id_set = list(set([item['spk_id'] for item in raw_data]))
        data_list_per_class = {}
        for spk_id in spk_id_set:
            data_list_per_class[spk_id] = []
        for item in raw_data:
            for spk_id in spk_id_set:
                if item['spk_id'] == spk_id:
                    data_list_per_class[spk_id].append(item)
        self.data_list_per_class = data_list_per_class

    def __getitem__(self, idx: int) -> Dict[str, Any]:
        """Get an example given an index.
        Args:
            idx (int): Index of the example to get

        Returns:
            Dict[str, Any]: A converted example
        """
        if self.use_cache and self.caches[idx] is not None:
            return self.caches[idx]

        data = self._get_metadata(idx)

        # 裁剪放到 batch_fn 里面
        # 返回一个字典
        """
        {'utt_id': 'p225_111', 'spk_id': '1', 'speech': 'path of *.npy'}
        """
        ref_data = random.choice(self.data)
        ref_label = ref_data['spk_id']
        ref_data_2 = random.choice(self.data_list_per_class[ref_label])
        # mel_tensor, label, ref_mel_tensor, ref2_mel_tensor, ref_label
        new_example = {
            'utt_id': data['utt_id'],
            'mel': np.load(data['speech']),
            'label': int(data['spk_id']),
            'ref_mel': np.load(ref_data['speech']),
            'ref_mel_2': np.load(ref_data_2['speech']),
            'ref_label': int(ref_label)
        }

        if self.use_cache:
            self.caches[idx] = new_example

        return new_example


================================================
FILE: paddlespeech/t2s/datasets/dataset.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import six
from paddle.io import Dataset

__all__ = [
    "split",
    "TransformDataset",
    "CacheDataset",
    "TupleDataset",
    "DictDataset",
    "SliceDataset",
    "SubsetDataset",
    "FilterDataset",
    "ChainDataset",
]


def split(dataset, first_size):
    """A utility function to split a dataset into two datasets."""
    first = SliceDataset(dataset, 0, first_size)
    second = SliceDataset(dataset, first_size, len(dataset))
    return first, second


class TransformDataset(Dataset):
    def __init__(self, dataset, transform):
        """Dataset which is transformed from another with a transform.

        Args:
            dataset (Dataset): the base dataset.
            transform (callable): the transform which takes an example of the base dataset as parameter and return a new example.
        """
        self._dataset = dataset
        self._transform = transform

    def __len__(self):
        return len(self._dataset)

    def __getitem__(self, i):
        in_data = self._dataset[i]
        return self._transform(in_data)


class CacheDataset(Dataset):
    def __init__(self, dataset):
        """A lazy cache of the base dataset.

        Args:
            dataset (Dataset): the base dataset to cache.
        """
        self._dataset = dataset
        self._cache = dict()

    def __len__(self):
        return len(self._dataset)

    def __getitem__(self, i):
        if i not in self._cache:
            self._cache[i] = self._dataset[i]
        return self._cache[i]


class TupleDataset(Dataset):
    def __init__(self, *datasets):
        """A compound dataset made from several datasets of the same length. An example of the `TupleDataset` is a tuple of examples from the constituent datasets.

        Args:
            datasets: tuple[Dataset], the constituent datasets.
        """
        if not datasets:
            raise ValueError("no datasets are given")
        length = len(datasets[0])
        for i, dataset in enumerate(datasets):
            if len(dataset) != length:
                raise ValueError("all the datasets should have the same length."
                                 "dataset {} has a different length".format(i))
        self._datasets = datasets
        self._length = length

    def __getitem__(self, index):
        # SOA
        batches = [dataset[index] for dataset in self._datasets]
        if isinstance(index, slice):
            length = len(batches[0])
            # AOS
            return [
                tuple([batch[i] for batch in batches])
                for i in six.moves.range(length)
            ]
        else:
            return tuple(batches)

    def __len__(self):
        return self._length


class DictDataset(Dataset):
    def __init__(self, **datasets):
        """
        A compound dataset made from several datasets of the same length. An 
        example of the `DictDataset` is a dict of examples from the constituent 
        datasets.

        WARNING: paddle does not have a good support for DictDataset, because
        every batch yield from a DataLoader is a list, but it cannot be a dict.
        So you have to provide a collate function because you cannot use the
        default one.

        Args:
            datasets: Dict[Dataset], the constituent datasets.
        """
        if not datasets:
            raise ValueError("no datasets are given")
        length = None
        for key, dataset in six.iteritems(datasets):
            if length is None:
                length = len(dataset)
            elif len(dataset) != length:
                raise ValueError(
                    "all the datasets should have the same length."
                    "dataset {} has a different length".format(key))
        self._datasets = datasets
        self._length = length

    def __getitem__(self, index):
        batches = {
            key: dataset[index]
            for key, dataset in six.iteritems(self._datasets)
        }
        if isinstance(index, slice):
            length = len(six.next(six.itervalues(batches)))
            return [{key: batch[i]
                     for key, batch in six.iteritems(batches)}
                    for i in six.moves.range(length)]
        else:
            return batches

    def __len__(self):
        return self._length


class SliceDataset(Dataset):
    def __init__(self, dataset, start, finish, order=None):
        """A Dataset which is a slice of the base dataset.

        Args:
            dataset (Dataset): the base dataset.
            start (int): the start of the slice.
            finish (int): the end of the slice, not inclusive.
            order (List[int], optional): the order, it is a permutation of the valid example ids of the base dataset. If `order` is provided, the slice is taken in `order`. Defaults to None.
        """
        if start < 0 or finish > len(dataset):
            raise ValueError("subset overruns the dataset.")
        self._dataset = dataset
        self._start = start
        self._finish = finish
        self._size = finish - start

        if order is not None and len(order) != len(dataset):
            raise ValueError(
                "order should have the same length as the dataset"
                "len(order) = {} which does not euqals len(dataset) = {} ".
                format(len(order), len(dataset)))
        self._order = order

    def __len__(self):
        return self._size

    def __getitem__(self, i):
        if i >= 0:
            if i >= self._size:
                raise IndexError('dataset index out of range')
            index = self._start + i
        else:
            if i < -self._size:
                raise IndexError('dataset index out of range')
            index = self._finish + i

        if self._order is not None:
            index = self._order[index]
        return self._dataset[index]


class SubsetDataset(Dataset):
    def __init__(self, dataset, indices):
        """A Dataset which is a subset of the base dataset.

        Args:
            dataset (Dataset): the base dataset.
            indices (Iterable[int]): the indices of the examples to pick.
        """
        self._dataset = dataset
        if len(indices) > len(dataset):
            raise ValueError("subset's size larger that dataset's size!")
        self._indices = indices
        self._size = len(indices)

    def __len__(self):
        return self._size

    def __getitem__(self, i):
        index = self._indices[i]
        return self._dataset[index]


class FilterDataset(Dataset):
    def __init__(self, dataset, filter_fn):
        """A filtered dataset.

        Args:
            dataset (Dataset): the base dataset.
            filter_fn (callable): a callable which takes an example of the base dataset and return a boolean.
        """
        self._dataset = dataset
        self._indices = [
            i for i in range(len(dataset)) if filter_fn(dataset[i])
        ]
        self._size = len(self._indices)

    def __len__(self):
        return self._size

    def __getitem__(self, i):
        index = self._indices[i]
        return self._dataset[index]


class ChainDataset(Dataset):
    def __init__(self, *datasets):
        """A concatenation of the several datasets which the same structure.

        Args:
            datasets (Iterable[Dataset]): datasets to concat.
        """
        self._datasets = datasets

    def __len__(self):
        return sum(len(dataset) for dataset in self._datasets)

    def __getitem__(self, i):
        if i < 0:
            raise IndexError("ChainDataset doesnot support negative indexing.")

        for dataset in self._datasets:
            if i < len(dataset):
                return dataset[i]
            i -= len(dataset)

        raise IndexError("dataset index out of range")


================================================
FILE: paddlespeech/t2s/datasets/get_feats.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from typing import List
from typing import Optional
from typing import Union

import librosa
import numpy as np
import pyworld
from scipy.interpolate import interp1d
from typing_extensions import Literal


class LogMelFBank():
    def __init__(self,
                 sr: int=24000,
                 n_fft: int=2048,
                 hop_length: int=300,
                 win_length: int=None,
                 window: str="hann",
                 n_mels: int=80,
                 fmin: int=80,
                 fmax: int=7600,
                 norm: Optional[Union[Literal["slaney"], float]]="slaney",
                 htk: bool=False,
                 power: float=1.0):
        self.sr = sr
        # stft
        self.n_fft = n_fft
        self.win_length = win_length
        self.hop_length = hop_length
        self.window = window
        self.center = True
        self.pad_mode = "reflect"
        self.norm = norm
        self.htk = htk

        # mel
        self.n_mels = n_mels
        self.fmin = 0 if fmin is None else fmin
        self.fmax = sr / 2 if fmax is None else fmax
        self.power = power

        self.mel_filter = self._create_mel_filter()

    def _create_mel_filter(self):
        mel_filter = librosa.filters.mel(
            sr=self.sr,
            n_fft=self.n_fft,
            n_mels=self.n_mels,
            fmin=self.fmin,
            fmax=self.fmax,
            norm=self.norm,
            htk=self.htk)
        return mel_filter

    def _stft(self, wav: np.ndarray):
        D = librosa.core.stft(
            wav,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=self.win_length,
            window=self.window,
            center=self.center,
            pad_mode=self.pad_mode)
        return D

    def _spectrogram(self, wav: np.ndarray):
        D = self._stft(wav)
        return np.abs(D)**self.power

    def _mel_spectrogram(self, wav: np.ndarray):
        S = self._spectrogram(wav)
        mel = np.dot(self.mel_filter, S)
        return mel

    # We use different definition for log-spec between TTS and ASR
    #   TTS: log_10(abs(stft))
    #   ASR: log_e(power(stft))

    def get_log_mel_fbank(self, wav, base='10'):
        mel = self._mel_spectrogram(wav)
        mel = np.clip(mel, a_min=1e-10, a_max=float("inf"))
        if base == '10':
            mel = np.log10(mel.T)
        elif base == 'e':
            mel = np.log(mel.T)
        # (num_frames, n_mels)
        return mel


class Pitch():
    def __init__(self,
                 sr: int=24000,
                 hop_length: int=300,
                 f0min: int=80,
                 f0max: int=7600):

        self.sr = sr
        self.hop_length = hop_length
        self.f0min = f0min
        self.f0max = f0max

    def _convert_to_continuous_f0(self, f0: np.ndarray) -> np.ndarray:
        if (f0 == 0).all():
            print("All frames seems to be unvoiced, this utt will be removed.")
            return f0
        # padding start and end of f0 sequence
        start_f0 = f0[f0 != 0][0]
        end_f0 = f0[f0 != 0][-1]
        start_idx = np.where(f0 == start_f0)[0][0]
        end_idx = np.where(f0 == end_f0)[0][-1]
        f0[:start_idx] = start_f0
        f0[end_idx:] = end_f0

        # get non-zero frame index
        nonzero_idxs = np.where(f0 != 0)[0]

        # perform linear interpolation
        interp_fn = interp1d(nonzero_idxs, f0[nonzero_idxs])
        f0 = interp_fn(np.arange(0, f0.shape[0]))

        return f0

    def _calculate_f0(self,
                      input: np.ndarray,
                      use_continuous_f0: bool=True,
                      use_log_f0: bool=True) -> np.ndarray:
        input = input.astype(np.float_)
        frame_period = 1000 * self.hop_length / self.sr
        f0, timeaxis = pyworld.dio(
            input,
            fs=self.sr,
            f0_floor=self.f0min,
            f0_ceil=self.f0max,
            frame_period=frame_period)
        f0 = pyworld.stonemask(input, f0, timeaxis, self.sr)
        if use_continuous_f0:
            f0 = self._convert_to_continuous_f0(f0)
        if use_log_f0:
            nonzero_idxs = np.where(f0 != 0)[0]
            f0[nonzero_idxs] = np.log(f0[nonzero_idxs])
        return f0.reshape(-1)

    def _average_by_duration(self, input: np.ndarray,
                             d: np.ndarray) -> np.ndarray:
        d_cumsum = np.pad(d.cumsum(0), (1, 0), 'constant')
        arr_list = []
        for start, end in zip(d_cumsum[:-1], d_cumsum[1:]):
            arr = input[start:end]
            mask = arr == 0
            arr[mask] = 0
            avg_arr = np.mean(arr, axis=0) if len(arr) != 0 else np.array(0)
            arr_list.append(avg_arr)
        # shape (T,1)
        arr_list = np.expand_dims(np.array(arr_list), 0).T

        return arr_list

    def get_pitch(self,
                  wav: np.ndarray,
                  use_continuous_f0: bool=True,
                  use_log_f0: bool=True,
                  use_token_averaged_f0: bool=True,
                  duration: np.ndarray=None):
        f0 = self._calculate_f0(wav, use_continuous_f0, use_log_f0)
        if use_token_averaged_f0 and duration is not None:
            f0 = self._average_by_duration(f0, duration)
        else:
            f0 = np.expand_dims(np.array(f0), 0).T
        return f0


class Energy():
    def __init__(self,
                 n_fft: int=2048,
                 hop_length: int=300,
                 win_length: int=None,
                 window: str="hann",
                 center: bool=True,
                 pad_mode: str="reflect"):

        self.n_fft = n_fft
        self.win_length = win_length
        self.hop_length = hop_length
        self.window = window
        self.center = center
        self.pad_mode = pad_mode

    def _stft(self, wav: np.ndarray):
        D = librosa.core.stft(
            wav,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=self.win_length,
            window=self.window,
            center=self.center,
            pad_mode=self.pad_mode)
        return D

    def _calculate_energy(self, input: np.ndarray):
        input = input.astype(np.float32)
        input_stft = self._stft(input)
        input_power = np.abs(input_stft)**2
        energy = np.sqrt(
            np.clip(
                np.sum(input_power, axis=0), a_min=1.0e-10, a_max=float('inf')))
        return energy

    def _average_by_duration(self, input: np.ndarray,
                             d: np.ndarray) -> np.ndarray:
        d_cumsum = np.pad(d.cumsum(0), (1, 0), 'constant')
        arr_list = []
        for start, end in zip(d_cumsum[:-1], d_cumsum[1:]):
            arr = input[start:end]
            avg_arr = np.mean(arr, axis=0) if len(arr) != 0 else np.array(0)
            arr_list.append(avg_arr)
        # shape (T,1)
        arr_list = np.expand_dims(np.array(arr_list), 0).T
        return arr_list

    def get_energy(self,
                   wav: np.ndarray,
                   use_token_averaged_energy: bool=True,
                   duration: np.ndarray=None):
        energy = self._calculate_energy(wav)
        if use_token_averaged_energy and duration is not None:
            energy = self._average_by_duration(energy, duration)
        else:
            energy = np.expand_dims(np.array(energy), 0).T
        return energy


class LinearSpectrogram():
    def __init__(
            self,
            n_fft: int=1024,
            win_length: int=None,
            hop_length: int=256,
            window: str="hann",
            center: bool=True, ):
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.win_length = win_length
        self.window = window
        self.center = center
        self.n_fft = n_fft
        self.pad_mode = "reflect"

    def _stft(self, wav: np.ndarray):
        D = librosa.core.stft(
            wav,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=self.win_length,
            window=self.window,
            center=self.center,
            pad_mode=self.pad_mode)
        return D

    def _spectrogram(self, wav: np.ndarray):
        D = self._stft(wav)
        return np.abs(D)

    def get_linear_spectrogram(self, wav: np.ndarray):
        linear_spectrogram = self._spectrogram(wav)
        linear_spectrogram = np.clip(
            linear_spectrogram, a_min=1e-10, a_max=float("inf"))
        return linear_spectrogram.T


================================================
FILE: paddlespeech/t2s/datasets/ljspeech.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path

from paddle.io import Dataset

__all__ = ["LJSpeechMetaData"]


class LJSpeechMetaData(Dataset):
    def __init__(self, root):
        self.root = Path(root).expanduser()
        wav_dir = self.root / "wavs"
        csv_path = self.root / "metadata.csv"
        records = []
        speaker_name = "ljspeech"
        with open(str(csv_path), 'rt', encoding='utf-8') as f:
            for line in f:
                filename, _, normalized_text = line.strip().split("|")
                filename = str(wav_dir / (filename + ".wav"))
                records.append([filename, normalized_text, speaker_name])
        self.records = records

    def __getitem__(self, i):
        return self.records[i]

    def __len__(self):
        return len(self.records)


================================================
FILE: paddlespeech/t2s/datasets/preprocess_utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from typing import List

import librosa
import numpy as np


# speaker|utt_id|phn dur phn dur ...
def get_phn_dur(file_name):
    '''
    read MFA duration.txt
    Args:
        file_name (str or Path): path of gen_duration_from_textgrid.py's result
    Returns: 
        Dict: sentence: {'utt': ([char], [int])}
    '''
    f = open(file_name, 'r')
    sentence = {}
    speaker_set = set()
    for line in f:
        line_list = line.strip().split('|')
        utt = line_list[0]
        speaker = line_list[1]
        p_d = line_list[-1]
        speaker_set.add(speaker)
        phn_dur = p_d.split()
        phn = phn_dur[::2]
        dur = phn_dur[1::2]
        assert len(phn) == len(dur)
        sentence[utt] = (phn, [int(i) for i in dur], speaker)
    f.close()
    return sentence, speaker_set


def note2midi(notes: List[str]) -> List[str]:
    """Covert note string to note id, for example: ["C1"] -> [24]

    Args:
        notes (List[str]): the list of note string

    Returns:
        List[str]: the list of note id
    """
    midis = []
    for note in notes:
        if note == 'rest':
            midi = 0
        else:
            midi = librosa.note_to_midi(note.split("/")[0])
        midis.append(midi)

    return midis


def time2frame(
        times: List[float],
        sample_rate: int=24000,
        n_shift: int=128, ) -> List[int]:
    """Convert the phoneme duration of time(s) into frames

    Args:
        times (List[float]): phoneme duration of time(s)
        sample_rate (int, optional): sample rate. Defaults to 24000.
        n_shift (int, optional): frame shift. Defaults to 128.

    Returns:
        List[int]: phoneme duration of frame
    """
    end = 0.0
    ends = []
    for t in times:
        end += t
        ends.append(end)
    frame_pos = librosa.time_to_frames(ends, sr=sample_rate, hop_length=n_shift)
    durations = np.diff(frame_pos, prepend=0)
    return durations


def get_sentences_svs(
        file_name,
        dataset: str='opencpop',
        sample_rate: int=24000,
        n_shift: int=128, ):
    '''
    read label file
    Args:
        file_name (str or Path): path of gen_duration_from_textgrid.py's result
        dataset (str): dataset name
    Returns: 
        Dict: the information of sentence, include [phone id (int)], [the frame of phone (int)], [note id (int)], [note duration (float)], [is slur (int)], text(str), speaker name (str)
        tuple: speaker name
    '''
    f = open(file_name, 'r')
    sentence = {}
    speaker_set = set()
    if dataset == 'opencpop':
        speaker_set.add("opencpop")
        for line in f:
            line_list = line.strip().split('|')
            utt = line_list[0]
            text = line_list[1]
            ph = line_list[2].split()
            midi = note2midi(line_list[3].split())
            midi_dur = line_list[4].split()
            ph_dur = time2frame([float(t) for t in line_list[5].split()], sample_rate=sample_rate, n_shift=n_shift)
            is_slur = line_list[6].split()
            assert len(ph) == len(midi) == len(midi_dur) == len(is_slur)
            sentence[utt] = (ph, [int(i) for i in ph_dur],
                             [int(i) for i in midi],
                             [float(i) for i in midi_dur],
                             [int(i) for i in is_slur], text, "opencpop")
    else:
        print("dataset should in {opencpop} now!")

    f.close()
    return sentence, speaker_set


def merge_silence(sentence):
    '''
    merge silences
    Args:
        sentence (Dict): sentence: {'utt': (([char], [int]), str)}
    '''
    for utt in sentence:
        cur_phn, cur_dur, speaker = sentence[utt]
        new_phn = []
        new_dur = []

        # merge sp and sil
        for i, p in enumerate(cur_phn):
            if i > 0 and 'sil' == p and cur_phn[i - 1] in {"sil", "sp"}:
                new_dur[-1] += cur_dur[i]
                new_phn[-1] = 'sil'
            else:
                new_phn.append(p)
                new_dur.append(cur_dur[i])

        for i, (p, d) in enumerate(zip(new_phn, new_dur)):
            if p in {"sp"}:
                if d < 14:
                    new_phn[i] = 'sp'
                else:
                    new_phn[i] = 'spl'

        assert len(new_phn) == len(new_dur)
        sentence[utt] = [new_phn, new_dur, speaker]


def get_input_token(sentence, output_path, dataset="baker"):
    '''
    get phone set from training data and save it
    Args:
        sentence (Dict): sentence: {'utt': ([char], [int])}
        output_path (str or path):path to save phone_id_map
    '''
    phn_token = set()
    for utt in sentence:
        for phn in sentence[utt][0]:
            phn_token.add(phn)
    phn_token = list(phn_token)
    phn_token.sort()
    phn_token = ["<pad>", "<unk>"] + phn_token
    if dataset in {"baker", "aishell3"}:
        phn_token += ["，", "。", "？", "！"]
    # svs dataset
    elif dataset in {"opencpop"}:
        pass
    else:
        phn_token += [",", ".", "?", "!"]
    phn_token += ["<eos>"]

    with open(output_path, 'w') as f:
        for i, phn in enumerate(phn_token):
            f.write(phn + ' ' + str(i) + '\n')


def get_phones_tones(sentence,
                     phones_output_path,
                     tones_output_path,
                     dataset="baker"):
    '''
    get phone set and tone set from training data and save it
    Args:
        sentence (Dict): sentence: {'utt': ([char], [int])}
        phones_output_path (str or path): path to save phone_id_map
        tones_output_path (str or path): path to save tone_id_map
    '''
    phn_token = set()
    tone_token = set()
    for utt in sentence:
        for label in sentence[utt][0]:
            # split tone from finals
            match = re.match(r'^(\w+)([012345])$', label)
            if match:
                phn_token.add(match.group(1))
                tone_token.add(match.group(2))
            else:
                phn_token.add(label)
                tone_token.add('0')
    phn_token = list(phn_token)
    tone_token = list(tone_token)
    phn_token.sort()
    tone_token.sort()
    phn_token = ["<pad>", "<unk>"] + phn_token
    if dataset in {"baker", "aishell3"}:
        phn_token += ["，", "。", "？", "！"]
    else:
        phn_token += [",", ".", "?", "!"]
    phn_token += ["<eos>"]

    with open(phones_output_path, 'w') as f:
        for i, phn in enumerate(phn_token):
            f.write(phn + ' ' + str(i) + '\n')
    with open(tones_output_path, 'w') as f:
        for i, tone in enumerate(tone_token):
            f.write(tone + ' ' + str(i) + '\n')


def get_spk_id_map(speaker_set, output_path):
    speakers = sorted(list(speaker_set))
    with open(output_path, 'w') as f:
        for i, spk in enumerate(speakers):
            f.write(spk + ' ' + str(i) + '\n')


def compare_duration_and_mel_length(sentences, utt, mel):
    '''
    check duration error, correct sentences[utt] if possible, else pop sentences[utt]
    Args:
        sentences (Dict): sentences[utt] = [phones_list ,durations_list]
        utt (str): utt_id
        mel (np.ndarry): features (num_frames, n_mels)
    '''

    if utt in sentences:
        len_diff = mel.shape[0] - sum(sentences[utt][1])
        if len_diff != 0:
            if len_diff > 0:
                sentences[utt][1][-1] += len_diff
            elif sentences[utt][1][-1] + len_diff > 0:
                sentences[utt][1][-1] += len_diff
            elif sentences[utt][1][0] + len_diff > 0:
                sentences[utt][1][0] += len_diff
            else:
                print("the len_diff is unable to correct:", len_diff)
                sentences.pop(utt)


================================================
FILE: paddlespeech/t2s/datasets/sampler.py
================================================
import math

import numpy as np
from paddle.io import BatchSampler


class ErnieSATSampler(BatchSampler):
    """Sampler that restricts data loading to a subset of the dataset.
    In such case, each process can pass a DistributedBatchSampler instance 
    as a DataLoader sampler, and load a subset of the original dataset that 
    is exclusive to it.
    .. note::
        Dataset is assumed to be of constant size.
        
    Args:
        dataset(paddle.io.Dataset): this could be a `paddle.io.Dataset` implement
                     or other python object which implemented
                     `__len__` for BatchSampler to get sample
                     number of data source.
        batch_size(int): sample indice number in a mini-batch indices.
        num_replicas(int, optional): porcess number in distributed training.
            If :attr:`num_replicas` is None, :attr:`num_replicas` will be
            retrieved from :code:`paddle.distributed.ParallenEnv`.
            Default None.
        rank(int, optional): the rank of the current process among :attr:`num_replicas`
            processes. If :attr:`rank` is None, :attr:`rank` is retrieved from
            :code:`paddle.distributed.ParallenEnv`. Default None.
        shuffle(bool): whther to shuffle indices order before genrating
            batch indices. Default False.
        drop_last(bool): whether drop the last incomplete batch dataset size
            is not divisible by the batch size. Default False
    Examples:
        .. code-block:: python
            import numpy as np
            from paddle.io import Dataset, DistributedBatchSampler
            # init with dataset
            class RandomDataset(Dataset):
                def __init__(self, num_samples):
                    self.num_samples = num_samples
            
                def __getitem__(self, idx):
                    image = np.random.random([784]).astype('float32')
                    label = np.random.randint(0, 9, (1, )).astype('int64')
                    return image, label
                
                def __len__(self):
                    return self.num_samples
  
            dataset = RandomDataset(100)
            sampler = DistributedBatchSampler(dataset, batch_size=64)
            for data in sampler:
                # do something
                break
    """

    def __init__(self,
                 dataset,
                 batch_size,
                 num_replicas=None,
                 rank=None,
                 shuffle=False,
                 drop_last=False):
        self.dataset = dataset

        assert isinstance(batch_size, int) and batch_size > 0, \
                "batch_size should be a positive integer"
        self.batch_size = batch_size
        assert isinstance(shuffle, bool), \
                "shuffle should be a boolean value"
        self.shuffle = shuffle
        assert isinstance(drop_last, bool), \
                "drop_last should be a boolean number"

        from paddle.distributed import ParallelEnv

        if num_replicas is not None:
            assert isinstance(num_replicas, int) and num_replicas > 0, \
                    "num_replicas should be a positive integer"
            self.nranks = num_replicas
        else:
            self.nranks = ParallelEnv().nranks

        if rank is not None:
            assert isinstance(rank, int) and rank >= 0, \
                    "rank should be a non-negative integer"
            self.local_rank = rank
        else:
            self.local_rank = ParallelEnv().local_rank

        self.drop_last = drop_last
        self.epoch = 0
        self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.nranks))
        self.total_size = self.num_samples * self.nranks

    def __iter__(self):
        num_samples = len(self.dataset)
        indices = np.arange(num_samples).tolist()
        indices += indices[:(self.total_size - len(indices))]
        assert len(indices) == self.total_size

        # subsample
        def _get_indices_by_batch_size(indices):
            subsampled_indices = []
            last_batch_size = self.total_size % (self.batch_size * self.nranks)
            assert last_batch_size % self.nranks == 0
            last_local_batch_size = last_batch_size // self.nranks

            for i in range(self.local_rank * self.batch_size,
                           len(indices) - last_batch_size,
                           self.batch_size * self.nranks):
                subsampled_indices.extend(indices[i:i + self.batch_size])

            indices = indices[len(indices) - last_batch_size:]
            subsampled_indices.extend(
                indices[self.local_rank * last_local_batch_size:(
                    self.local_rank + 1) * last_local_batch_size])
            return subsampled_indices

        if self.nranks > 1:
            indices = _get_indices_by_batch_size(indices)

        assert len(indices) == self.num_samples
        _sample_iter = iter(indices)

        batch_indices_list = []
        batch_indices = []
        for idx in _sample_iter:
            batch_indices.append(idx)
            if len(batch_indices) == self.batch_size:
                batch_indices_list.append(batch_indices)
                batch_indices = []
        if not self.drop_last and len(batch_indices) > 0:
            batch_indices_list.append(batch_indices)

        if self.shuffle:
            np.random.RandomState(self.epoch).shuffle(batch_indices_list)
            self.epoch += 1

        for batch_indices in batch_indices_list:
            yield batch_indices

    def __len__(self):
        num_samples = self.num_samples
        num_samples += int(not self.drop_last) * (self.batch_size - 1)
        return num_samples // self.batch_size

    def set_epoch(self, epoch):
        """
        Sets the epoch number. When :attr:`shuffle=True`, this number is used
        as seeds of random numbers. By default, users may not set this, all
        replicas (workers) use a different random ordering for each epoch.
        If set same number at each epoch, this sampler will yield the same
        ordering at all epoches.
        Arguments:
            epoch (int): Epoch number.
        Examples:
            .. code-block:: python
    
                import numpy as np
    
                from paddle.io import Dataset, DistributedBatchSampler
    
                # init with dataset
                class RandomDataset(Dataset):
                    def __init__(self, num_samples):
                        self.num_samples = num_samples
                
                    def __getitem__(self, idx):
                        image = np.random.random([784]).astype('float32')
                        label = np.random.randint(0, 9, (1, )).astype('int64')
                        return image, label
                    
                    def __len__(self):
                        return self.num_samples
      
                dataset = RandomDataset(100)
                sampler = DistributedBatchSampler(dataset, batch_size=64)
    
                for epoch in range(10):
                    sampler.set_epoch(epoch)
        """
        self.epoch = epoch


================================================
FILE: paddlespeech/t2s/datasets/vocoder_batch_fn.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle

from paddlespeech.t2s.audio.codec import encode_mu_law
from paddlespeech.t2s.audio.codec import float_2_label
from paddlespeech.t2s.audio.codec import label_2_float


class Clip(object):
    """Collate functor for training vocoders.
    """

    def __init__(
            self,
            batch_max_steps=20480,
            hop_size=256,
            aux_context_window=0, ):
        """Initialize customized collater for DataLoader.
        Args:

            batch_max_steps (int): The maximum length of input signal in batch.
            hop_size (int): Hop size of auxiliary features.
            aux_context_window (int): Context window size for auxiliary feature conv.

        """
        if batch_max_steps % hop_size != 0:
            batch_max_steps += -(batch_max_steps % hop_size)
        assert batch_max_steps % hop_size == 0
        self.batch_max_steps = batch_max_steps
        self.batch_max_frames = batch_max_steps // hop_size
        self.hop_size = hop_size
        self.aux_context_window = aux_context_window

        # set useful values in random cutting
        self.start_offset = aux_context_window
        self.end_offset = -(self.batch_max_frames + aux_context_window)
        self.mel_threshold = self.batch_max_frames + 2 * aux_context_window

    def __call__(self, batch):
        """Convert into batch tensors.

        Args:
            batch (list): list of tuple of the pair of audio and features. Audio shape (T, ), features shape(T', C).

        Returns:
            Tensor:
                Target signal batch (B, 1, T).
            Tensor:
                Auxiliary feature batch (B, C, T'), where
                T = (T' - 2 * aux_context_window) * hop_size.
        """
        # check length
        batch = [
            self._adjust_length(b['wave'], b['feats']) for b in batch
            if b['feats'].shape[0] > self.mel_threshold
        ]
        xs, cs = [b[0] for b in batch], [b[1] for b in batch]

        # make batch with random cut
        c_lengths = [c.shape[0] for c in cs]
        start_frames = np.array([
            np.random.randint(self.start_offset, cl + self.end_offset)
            for cl in c_lengths
        ])
        x_starts = start_frames * self.hop_size
        x_ends = x_starts + self.batch_max_steps

        c_starts = start_frames - self.aux_context_window
        c_ends = start_frames + self.batch_max_frames + self.aux_context_window
        y_batch = np.stack(
            [x[start:end] for x, start, end in zip(xs, x_starts, x_ends)])
        c_batch = np.stack(
            [c[start:end] for c, start, end in zip(cs, c_starts, c_ends)])

        # convert each batch to tensor, assume that each item in batch has the same length
        y_batch = paddle.to_tensor(
            y_batch, dtype=paddle.float32).unsqueeze(1)  # (B, 1, T)
        c_batch = paddle.to_tensor(
            c_batch, dtype=paddle.float32).transpose([0, 2, 1])  # (B, C, T')

        return y_batch, c_batch

    def _adjust_length(self, x, c):
        """Adjust the audio and feature lengths.

        Note:
            Basically we assume that the length of x and c are adjusted
            through preprocessing stage, but if we use other library processed
            features, this process will be needed.

        """
        if len(x) < c.shape[0] * self.hop_size:
            x = np.pad(x, (0, c.shape[0] * self.hop_size - len(x)), mode="edge")
        elif len(x) > c.shape[0] * self.hop_size:
            x = x[:c.shape[0] * self.hop_size]
        # check the legnth is valid
        assert len(x) == c.shape[
            0] * self.hop_size, f"wave length: ({len(x)}), mel length: ({c.shape[0]})"

        return x, c


class WaveRNNClip(Clip):
    def __init__(self,
                 mode: str='RAW',
                 batch_max_steps: int=4500,
                 hop_size: int=300,
                 aux_context_window: int=2,
                 bits: int=9,
                 mu_law: bool=True):
        self.mode = mode
        self.mel_win = batch_max_steps // hop_size + 2 * aux_context_window
        self.batch_max_steps = batch_max_steps
        self.hop_size = hop_size
        self.aux_context_window = aux_context_window
        self.mu_law = mu_law
        self.batch_max_frames = batch_max_steps // hop_size
        self.mel_threshold = self.batch_max_frames + 2 * aux_context_window
        if self.mode == 'MOL':
            self.bits = 16
        else:
            self.bits = bits

    def to_quant(self, wav):
        if self.mode == 'RAW':
            if self.mu_law:
                quant = encode_mu_law(wav, mu=2**self.bits)
            else:
                quant = float_2_label(wav, bits=self.bits)
        elif self.mode == 'MOL':
            quant = float_2_label(wav, bits=16)
        quant = quant.astype(np.int64)
        return quant

    def __call__(self, batch):
        # voc_pad = 2  this will pad the input so that the resnet can 'see' wider than input length
        # max_offsets = n_frames - 2 - (mel_win + 2 * hp.voc_pad) = n_frames - 15
        """Convert into batch tensors.
        Args:
            batch (list): list of tuple of the pair of audio and features. Audio shape (T, ), features shape(T', C).

        Returns:
            Tensor: Input signal batch (B, 1, T).
            Tensor: Target signal batch (B, 1, T).
            Tensor: Auxiliary feature batch (B, C, T'), 
                where T = (T' - 2 * aux_context_window) * hop_size.

        """
        # check length
        batch = [
            self._adjust_length(b['wave'], b['feats']) for b in batch
            if b['feats'].shape[0] > self.mel_threshold
        ]
        wav, mel = [b[0] for b in batch], [b[1] for b in batch]
        # mel 此处需要转置
        mel = [x.T for x in mel]
        max_offsets = [
            x.shape[-1] - 2 - (self.mel_win + 2 * self.aux_context_window)
            for x in mel
        ]
        # the slice point of mel selecting randomly 
        mel_offsets = [np.random.randint(0, offset) for offset in max_offsets]
        # the slice point of wav selecting randomly, which is behind 2(=pad) frames 
        sig_offsets = [(offset + self.aux_context_window) * self.hop_size
                       for offset in mel_offsets]
        # mels.shape[1] = voc_seq_len // hop_length + 2 * voc_pad
        mels = [
            x[:, mel_offsets[i]:mel_offsets[i] + self.mel_win]
            for i, x in enumerate(mel)
        ]
        # label.shape[1] = voc_seq_len + 1
        wav = [self.to_quant(x) for x in wav]

        labels = [
            x[sig_offsets[i]:sig_offsets[i] + self.batch_max_steps + 1]
            for i, x in enumerate(wav)
        ]

        mels = np.stack(mels).astype(np.float32)
        labels = np.stack(labels).astype(np.int64)

        mels = paddle.to_tensor(mels)
        labels = paddle.to_tensor(labels, dtype='int64')
        # x is input, y is label
        x = labels[:, :self.batch_max_steps]
        y = labels[:, 1:]
        '''
        mode = RAW:
            mu_law = True:
                quant: bits = 9   0, 1, 2, ..., 509, 510, 511  int
            mu_law = False
                quant bits = 9    [0， 511]  float
        mode = MOL:
            quant: bits = 16  [0. 65536]  float
        '''
        # x should be normalizes in.[0, 1] in RAW mode
        x = label_2_float(paddle.cast(x, dtype='float32'), self.bits)
        # y should be normalizes in.[0, 1] in MOL mode
        if self.mode == 'MOL':
            y = label_2_float(paddle.cast(y, dtype='float32'), self.bits)

        return x, y, mels


# for paddleslim


class Clip_static(Clip):
    """Collate functor for training vocoders.
    """

    def __call__(self, batch):
        """Convert into batch tensors.

        Args:
            batch (list): list of tuple of the pair of audio and features. Audio shape (T, ), features shape(T', C).

        Returns: 
            Dict[str, np.array]:
                Auxiliary feature batch (B, C, T'), where
                T = (T' - 2 * aux_context_window) * hop_size.
        """
        # check length
        batch = [
            self._adjust_length(b['wave'], b['feats']) for b in batch
            if b['feats'].shape[0] > self.mel_threshold
        ]
        xs, cs = [b[0] for b in batch], [b[1] for b in batch]

        # make batch with random cut
        c_lengths = [c.shape[0] for c in cs]
        start_frames = np.array([
            np.random.randint(self.start_offset, cl + self.end_offset)
            for cl in c_lengths
        ])

        c_starts = start_frames - self.aux_context_window
        c_ends = start_frames + self.batch_max_frames + self.aux_context_window
        c_batch = np.stack(
            [c[start:end] for c, start, end in zip(cs, c_starts, c_ends)])
        # infer axis (T',C) is different with train axis (B, C, T')
        # c_batch = c_batch.transpose([0, 2, 1])  # (B, C, T')
        # do not need batch axis in infer
        c_batch = c_batch[0]
        batch = {"logmel": c_batch}
        return batch


================================================
FILE: paddlespeech/t2s/exps/PTQ_dynamic.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

import paddle
from paddleslim.quant import quant_post_dynamic


def parse_args():
    parser = argparse.ArgumentParser(
        description="Paddle Slim Dynamic with acoustic model & vocoder.")
    # acoustic model
    parser.add_argument(
        '--model_name',
        type=str,
        default='fastspeech2_csmsc',
        choices=[
            'speedyspeech_csmsc',
            'fastspeech2_csmsc',
            'fastspeech2_aishell3',
            'fastspeech2_ljspeech',
            'fastspeech2_vctk',
            'tacotron2_csmsc',
            'fastspeech2_mix',
            'pwgan_csmsc',
            'pwgan_aishell3',
            'pwgan_ljspeech',
            'pwgan_vctk',
            'mb_melgan_csmsc',
            'hifigan_csmsc',
            'hifigan_aishell3',
            'hifigan_ljspeech',
            'hifigan_vctk',
            'wavernn_csmsc',
        ],
        help='Choose model type of tts task.')

    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument(
        "--weight_bits",
        type=int,
        default=8,
        choices=[8, 16],
        help="The bits for the quantized weight, and it should be 8 or 16. Default is 8.",
    )

    args, _ = parser.parse_known_args()
    return args


# only inference for models trained with csmsc now
def main():
    args = parse_args()
    paddle.enable_static()
    quant_post_dynamic(
        model_dir=args.inference_dir,
        save_model_dir=args.inference_dir,
        model_filename=args.model_name + ".pdmodel",
        params_filename=args.model_name + ".pdiparams",
        save_model_filename=args.model_name + "_" + str(args.weight_bits) +
        "bits.pdmodel",
        save_params_filename=args.model_name + "_" + str(args.weight_bits) +
        "bits.pdiparams",
        weight_bits=args.weight_bits, )


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/PTQ_static.py
================================================
import argparse
import random

import jsonlines
import numpy as np
import paddle
from paddleslim.quant import quant_post_static

from paddlespeech.t2s.exps.syn_utils import get_dev_dataloader
from paddlespeech.t2s.utils import str2bool


def parse_args():
    parser = argparse.ArgumentParser(
        description="Paddle Slim Static with acoustic model & vocoder.")

    parser.add_argument(
        "--batch_size", type=int, default=1, help="Minibatch size.")
    parser.add_argument("--batch_num", type=int, default=1, help="Batch number")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")
    # model_path save_path
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument(
        '--model_name',
        type=str,
        default='fastspeech2_csmsc',
        choices=[
            'speedyspeech_csmsc',
            'fastspeech2_csmsc',
            'fastspeech2_aishell3',
            'fastspeech2_ljspeech',
            'fastspeech2_vctk',
            'fastspeech2_mix',
            'pwgan_csmsc',
            'pwgan_aishell3',
            'pwgan_ljspeech',
            'pwgan_vctk',
            'mb_melgan_csmsc',
            'hifigan_csmsc',
            'hifigan_aishell3',
            'hifigan_ljspeech',
            'hifigan_vctk',
            'pwgan_opencpop',
            'hifigan_opencpop',
        ],
        help='Choose model type of tts task.')

    parser.add_argument(
        "--algo", type=str, default='avg', help="calibration algorithm.")
    parser.add_argument(
        "--round_type",
        type=str,
        default='round',
        help="The method of converting the quantized weights.")
    parser.add_argument(
        "--hist_percent",
        type=float,
        default=0.9999,
        help="The percentile of algo:hist.")
    parser.add_argument(
        "--is_full_quantize",
        type=str2bool,
        default=False,
        help="Whether is full quantization or not.")
    parser.add_argument(
        "--bias_correction",
        type=str2bool,
        default=False,
        help="Whether to use bias correction.")
    parser.add_argument(
        "--ce_test", type=str2bool, default=False, help="Whether to CE test.")
    parser.add_argument(
        "--onnx_format",
        type=str2bool,
        default=False,
        help="Whether to export the quantized model with format of ONNX.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict",
        type=str,
        default=None,
        help="speaker id map file for multiple speaker model.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument(
        "--quantizable_op_type",
        type=list,
        nargs='+',
        default=[
            "conv2d_transpose", "conv2d", "depthwise_conv2d", "mul", "matmul",
            "matmul_v2"
        ],
        help="The list of op types that will be quantized.")

    args = parser.parse_args()
    return args


def quantize(args):
    shuffle = True
    if args.ce_test:
        # set seed
        seed = 111
        np.random.seed(seed)
        paddle.seed(seed)
        random.seed(seed)
        shuffle = False

    place = paddle.CUDAPlace(0) if args.ngpu > 0 else paddle.CPUPlace()
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)

    dataloader = get_dev_dataloader(
        dev_metadata=dev_metadata,
        am=args.model_name,
        batch_size=args.batch_size,
        speaker_dict=args.speaker_dict,
        shuffle=shuffle)

    exe = paddle.static.Executor(place)
    exe.run()

    print("onnx_format:", args.onnx_format)

    quant_post_static(
        executor=exe,
        model_dir=args.inference_dir,
        quantize_model_path=args.inference_dir + "/" + args.model_name +
        "_quant",
        data_loader=dataloader,
        model_filename=args.model_name + ".pdmodel",
        params_filename=args.model_name + ".pdiparams",
        save_model_filename=args.model_name + ".pdmodel",
        save_params_filename=args.model_name + ".pdiparams",
        batch_size=args.batch_size,
        algo=args.algo,
        round_type=args.round_type,
        hist_percent=args.hist_percent,
        is_full_quantize=args.is_full_quantize,
        bias_correction=args.bias_correction,
        onnx_format=args.onnx_format,
        quantizable_op_type=args.quantizable_op_type)


def main():
    args = parse_args()
    new_quantizable_op_type = []
    for item in args.quantizable_op_type:
        new_quantizable_op_type.append(''.join(item))
    args.quantizable_op_type = new_quantizable_op_type
    paddle.enable_static()
    quantize(args)


if __name__ == '__main__':
    main()


================================================
FILE: paddlespeech/t2s/exps/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/diffsinger/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/diffsinger/gen_gta_mel.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# generate mels using durations.txt
# for mb melgan finetune
import argparse
import os
from pathlib import Path

import numpy as np
import paddle
import yaml
from tqdm import tqdm
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.preprocess_utils import get_sentences_svs
from paddlespeech.t2s.models.diffsinger import DiffSinger
from paddlespeech.t2s.models.diffsinger import DiffSingerInference
from paddlespeech.t2s.modules.normalizer import ZScore
from paddlespeech.t2s.utils import str2bool


def evaluate(args, diffsinger_config):
    rootdir = Path(args.rootdir).expanduser()
    assert rootdir.is_dir()

    # construct dataset for evaluation
    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    phone_dict = {}
    for phn, id in phn_id:
        phone_dict[phn] = int(id)

    if args.speaker_dict:
        with open(args.speaker_dict, 'rt') as f:
            spk_id_list = [line.strip().split() for line in f.readlines()]
            spk_num = len(spk_id_list)
    else:
        spk_num = None

    with open(args.diffsinger_stretch, "r") as f:
        spec_min = np.load(args.diffsinger_stretch)[0]
        spec_max = np.load(args.diffsinger_stretch)[1]
        spec_min = paddle.to_tensor(spec_min)
        spec_max = paddle.to_tensor(spec_max)
    print("min and max spec done!")

    odim = diffsinger_config.n_mels
    diffsinger_config["model"]["fastspeech2_params"]["spk_num"] = spk_num
    model = DiffSinger(
        spec_min=spec_min,
        spec_max=spec_max,
        idim=vocab_size,
        odim=odim,
        **diffsinger_config["model"], )

    model.set_state_dict(paddle.load(args.diffsinger_checkpoint)["main_params"])
    model.eval()

    stat = np.load(args.diffsinger_stat)
    mu, std = stat
    mu = paddle.to_tensor(mu)
    std = paddle.to_tensor(std)
    diffsinger_normalizer = ZScore(mu, std)

    diffsinger_inference = DiffSingerInference(diffsinger_normalizer, model)
    diffsinger_inference.eval()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    sentences, speaker_set = get_sentences_svs(
        args.dur_file,
        dataset=args.dataset,
        sample_rate=diffsinger_config.fs,
        n_shift=diffsinger_config.n_shift, )

    if args.dataset == "opencpop":
        wavdir = rootdir / "wavs"
        # split data into 3 sections
        train_file = rootdir / "train.txt"
        train_wav_files = []
        with open(train_file, "r") as f_train:
            for line in f_train.readlines():
                utt = line.split("|")[0]
                wav_name = utt + ".wav"
                wav_path = wavdir / wav_name
                train_wav_files.append(wav_path)

        test_file = rootdir / "test.txt"
        dev_wav_files = []
        test_wav_files = []
        num_dev = 106
        count = 0
        with open(test_file, "r") as f_test:
            for line in f_test.readlines():
                count += 1
                utt = line.split("|")[0]
                wav_name = utt + ".wav"
                wav_path = wavdir / wav_name
                if count > num_dev:
                    test_wav_files.append(wav_path)
                else:
                    dev_wav_files.append(wav_path)
    else:
        print("dataset should in {opencpop} now!")

    train_wav_files = [
        os.path.basename(str(str_path)) for str_path in train_wav_files
    ]
    dev_wav_files = [
        os.path.basename(str(str_path)) for str_path in dev_wav_files
    ]
    test_wav_files = [
        os.path.basename(str(str_path)) for str_path in test_wav_files
    ]

    for i, utt_id in enumerate(tqdm(sentences)):
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        note = sentences[utt_id][2]
        note_dur = sentences[utt_id][3]
        is_slur = sentences[utt_id][4]
        speaker = sentences[utt_id][-1]

        phone_ids = [phone_dict[phn] for phn in phones]
        phone_ids = paddle.to_tensor(np.array(phone_ids))

        if args.speaker_dict:
            speaker_id = int(
                [item[1] for item in spk_id_list if speaker == item[0]][0])
            speaker_id = paddle.to_tensor(speaker_id)
        else:
            speaker_id = None

        durations = paddle.to_tensor(np.array(durations))
        note = paddle.to_tensor(np.array(note))
        note_dur = paddle.to_tensor(np.array(note_dur))
        is_slur = paddle.to_tensor(np.array(is_slur))
        # 生成的和真实的可能有 1, 2 帧的差距，但是 batch_fn 会修复
        # split data into 3 sections

        wav_path = utt_id + ".wav"

        if wav_path in train_wav_files:
            sub_output_dir = output_dir / ("train/raw")
        elif wav_path in dev_wav_files:
            sub_output_dir = output_dir / ("dev/raw")
        elif wav_path in test_wav_files:
            sub_output_dir = output_dir / ("test/raw")

        sub_output_dir.mkdir(parents=True, exist_ok=True)

        with paddle.no_grad():
            mel = diffsinger_inference(
                text=phone_ids,
                note=note,
                note_dur=note_dur,
                is_slur=is_slur,
                get_mel_fs2=False)
        np.save(sub_output_dir / (utt_id + "_feats.npy"), mel)


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(
        description="Generate mel with diffsinger.")
    parser.add_argument(
        "--dataset",
        default="opencpop",
        type=str,
        help="name of dataset, should in {opencpop} now")
    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")
    parser.add_argument(
        "--diffsinger-config", type=str, help="diffsinger config file.")
    parser.add_argument(
        "--diffsinger-checkpoint",
        type=str,
        help="diffsinger checkpoint to load.")
    parser.add_argument(
        "--diffsinger-stat",
        type=str,
        help="mean and standard deviation used to normalize spectrogram when training diffsinger."
    )
    parser.add_argument(
        "--diffsinger-stretch",
        type=str,
        help="min and max mel used to stretch before training diffusion.")

    parser.add_argument(
        "--phones-dict",
        type=str,
        default="phone_id_map.txt",
        help="phone vocabulary file.")

    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")

    parser.add_argument(
        "--dur-file", default=None, type=str, help="path to durations.txt.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    args = parser.parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    with open(args.diffsinger_config) as f:
        diffsinger_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(diffsinger_config)

    evaluate(args, diffsinger_config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/diffsinger/get_minmax.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging

import jsonlines
import numpy as np
from tqdm import tqdm

from paddlespeech.t2s.datasets.data_table import DataTable


def get_minmax(spec, min_spec, max_spec):
    # spec: [T, 80]
    for i in range(spec.shape[1]):
        min_value = np.min(spec[:, i])
        max_value = np.max(spec[:, i])
        min_spec[i] = min(min_value, min_spec[i])
        max_spec[i] = max(max_value, max_spec[i])

    return min_spec, max_spec


def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
    )
    parser.add_argument(
        "--metadata",
        type=str,
        required=True,
        help="directory including feature files to be normalized. "
        "you need to specify either *-scp or rootdir.")

    parser.add_argument(
        "--speech-stretchs",
        type=str,
        required=True,
        help="min max spec file. only computer on train data")

    args = parser.parse_args()

    # get dataset
    with jsonlines.open(args.metadata, 'r') as reader:
        metadata = list(reader)
    dataset = DataTable(
        metadata, converters={
            "speech": np.load,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    n_mel = 80
    min_spec = 100.0 * np.ones(shape=(n_mel), dtype=np.float32)
    max_spec = -100.0 * np.ones(shape=(n_mel), dtype=np.float32)

    for item in tqdm(dataset):
        spec = item['speech']
        min_spec, max_spec = get_minmax(spec, min_spec, max_spec)

    # Using min_spec=-6.0 training effect is better so far
    min_spec = -6.0 * np.ones(shape=(n_mel), dtype=np.float32)
    min_max_spec = np.stack([min_spec, max_spec], axis=0)
    np.save(
        str(args.speech_stretchs),
        min_max_spec.astype(np.float32),
        allow_pickle=False)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/diffsinger/normalize.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize feature files and dump them."""
import argparse
import logging
from operator import itemgetter
from pathlib import Path

import jsonlines
import numpy as np
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.utils import str2bool


def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
    )
    parser.add_argument(
        "--metadata",
        type=str,
        required=True,
        help="directory including feature files to be normalized. "
        "you need to specify either *-scp or rootdir.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump normalized feature files.")
    parser.add_argument(
        "--speech-stats",
        type=str,
        required=True,
        help="speech statistics file.")
    parser.add_argument(
        "--pitch-stats", type=str, required=True, help="pitch statistics file.")
    parser.add_argument(
        "--energy-stats",
        type=str,
        required=True,
        help="energy statistics file.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        "--norm-feats",
        type=str2bool,
        default=False,
        help="whether to norm features")

    args = parser.parse_args()

    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    # get dataset
    with jsonlines.open(args.metadata, 'r') as reader:
        metadata = list(reader)
    dataset = DataTable(
        metadata,
        converters={
            "speech": np.load,
            "pitch": np.load,
            "energy": np.load,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    # restore scaler
    speech_scaler = StandardScaler()
    if args.norm_feats:
        speech_scaler.mean_ = np.load(args.speech_stats)[0]
        speech_scaler.scale_ = np.load(args.speech_stats)[1]
    else:
        speech_scaler.mean_ = np.zeros(
            np.load(args.speech_stats)[0].shape, dtype="float32")
        speech_scaler.scale_ = np.ones(
            np.load(args.speech_stats)[1].shape, dtype="float32")
    speech_scaler.n_features_in_ = speech_scaler.mean_.shape[0]

    pitch_scaler = StandardScaler()
    if args.norm_feats:
        pitch_scaler.mean_ = np.load(args.pitch_stats)[0]
        pitch_scaler.scale_ = np.load(args.pitch_stats)[1]
    else:
        pitch_scaler.mean_ = np.zeros(
            np.load(args.pitch_stats)[0].shape, dtype="float32")
        pitch_scaler.scale_ = np.ones(
            np.load(args.pitch_stats)[1].shape, dtype="float32")
    pitch_scaler.n_features_in_ = pitch_scaler.mean_.shape[0]

    energy_scaler = StandardScaler()
    if args.norm_feats:
        energy_scaler.mean_ = np.load(args.energy_stats)[0]
        energy_scaler.scale_ = np.load(args.energy_stats)[1]
    else:
        energy_scaler.mean_ = np.zeros(
            np.load(args.energy_stats)[0].shape, dtype="float32")
        energy_scaler.scale_ = np.ones(
            np.load(args.energy_stats)[1].shape, dtype="float32")
    energy_scaler.n_features_in_ = energy_scaler.mean_.shape[0]

    vocab_phones = {}
    with open(args.phones_dict, 'rt') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    for phn, id in phn_id:
        vocab_phones[phn] = int(id)

    vocab_speaker = {}
    with open(args.speaker_dict, 'rt') as f:
        spk_id = [line.strip().split() for line in f.readlines()]
    for spk, id in spk_id:
        vocab_speaker[spk] = int(id)

    # process each file
    output_metadata = []

    for item in tqdm(dataset):
        utt_id = item['utt_id']
        speech = item['speech']
        pitch = item['pitch']
        energy = item['energy']
        # normalize
        speech = speech_scaler.transform(speech)
        speech_dir = dumpdir / "data_speech"
        speech_dir.mkdir(parents=True, exist_ok=True)
        speech_path = speech_dir / f"{utt_id}_speech.npy"
        np.save(speech_path, speech.astype(np.float32), allow_pickle=False)

        pitch = pitch_scaler.transform(pitch)
        pitch_dir = dumpdir / "data_pitch"
        pitch_dir.mkdir(parents=True, exist_ok=True)
        pitch_path = pitch_dir / f"{utt_id}_pitch.npy"
        np.save(pitch_path, pitch.astype(np.float32), allow_pickle=False)

        energy = energy_scaler.transform(energy)
        energy_dir = dumpdir / "data_energy"
        energy_dir.mkdir(parents=True, exist_ok=True)
        energy_path = energy_dir / f"{utt_id}_energy.npy"
        np.save(energy_path, energy.astype(np.float32), allow_pickle=False)
        phone_ids = [vocab_phones[p] for p in item['phones']]
        spk_id = vocab_speaker[item["speaker"]]
        record = {
            "utt_id": item['utt_id'],
            "spk_id": spk_id,
            "text": phone_ids,
            "text_lengths": item['text_lengths'],
            "speech_lengths": item['speech_lengths'],
            "durations": item['durations'],
            "speech": str(speech_path),
            "pitch": str(pitch_path),
            "energy": str(energy_path),
            "note": item['note'],
            "note_dur": item['note_dur'],
            "is_slur": item['is_slur'],
        }
        # add spk_emb for voice cloning
        if "spk_emb" in item:
            record["spk_emb"] = str(item["spk_emb"])

        output_metadata.append(record)
    output_metadata.sort(key=itemgetter('utt_id'))
    output_metadata_path = Path(args.dumpdir) / "metadata.jsonl"
    with jsonlines.open(output_metadata_path, 'w') as writer:
        for item in output_metadata:
            writer.write(item)
    logging.info(f"metadata dumped into {output_metadata_path}")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/diffsinger/preprocess.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines
import librosa
import numpy as np
import tqdm
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import Energy
from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.datasets.get_feats import Pitch
from paddlespeech.t2s.datasets.preprocess_utils import compare_duration_and_mel_length
from paddlespeech.t2s.datasets.preprocess_utils import get_input_token
from paddlespeech.t2s.datasets.preprocess_utils import get_sentences_svs
from paddlespeech.t2s.datasets.preprocess_utils import get_spk_id_map
from paddlespeech.t2s.utils import str2bool

ALL_INITIALS = [
    'zh', 'ch', 'sh', 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h',
    'j', 'q', 'x', 'r', 'z', 'c', 's', 'y', 'w'
]
ALL_FINALS = [
    'a', 'ai', 'an', 'ang', 'ao', 'e', 'ei', 'en', 'eng', 'er', 'i', 'ia',
    'ian', 'iang', 'iao', 'ie', 'in', 'ing', 'iong', 'iu', 'ng', 'o', 'ong',
    'ou', 'u', 'ua', 'uai', 'uan', 'uang', 'ui', 'un', 'uo', 'v', 'van', 've',
    'vn'
]


def process_sentence(
        config: Dict[str, Any],
        fp: Path,
        sentences: Dict,
        output_dir: Path,
        mel_extractor=None,
        pitch_extractor=None,
        energy_extractor=None,
        cut_sil: bool=True,
        spk_emb_dir: Path=None, ):
    utt_id = fp.stem
    record = None
    if utt_id in sentences:
        # reading, resampling may occur
        wav, _ = librosa.load(str(fp), sr=config.fs)
        if len(wav.shape) != 1:
            return record
        max_value = np.abs(wav).max()
        if max_value > 1.0:
            wav = wav / max_value
        assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
        assert np.abs(wav).max(
        ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        note = sentences[utt_id][2]
        note_dur = sentences[utt_id][3]
        is_slur = sentences[utt_id][4]
        speaker = sentences[utt_id][-1]

        # extract mel feats
        logmel = mel_extractor.get_log_mel_fbank(wav)
        # change duration according to mel_length
        compare_duration_and_mel_length(sentences, utt_id, logmel)
        # utt_id may be popped in compare_duration_and_mel_length
        if utt_id not in sentences:
            return None
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        num_frames = logmel.shape[0]

        assert sum(
            durations
        ) == num_frames, "the sum of durations doesn't equal to the num of mel frames. "
        speech_dir = output_dir / "data_speech"
        speech_dir.mkdir(parents=True, exist_ok=True)
        speech_path = speech_dir / (utt_id + "_speech.npy")
        np.save(speech_path, logmel)
        # extract pitch and energy
        pitch = pitch_extractor.get_pitch(wav)
        assert pitch.shape[0] == num_frames
        pitch_dir = output_dir / "data_pitch"
        pitch_dir.mkdir(parents=True, exist_ok=True)
        pitch_path = pitch_dir / (utt_id + "_pitch.npy")
        np.save(pitch_path, pitch)
        energy = energy_extractor.get_energy(wav)
        assert energy.shape[0] == num_frames
        energy_dir = output_dir / "data_energy"
        energy_dir.mkdir(parents=True, exist_ok=True)
        energy_path = energy_dir / (utt_id + "_energy.npy")
        np.save(energy_path, energy)

        record = {
            "utt_id": utt_id,
            "phones": phones,
            "text_lengths": len(phones),
            "speech_lengths": num_frames,
            "durations": durations,
            "speech": str(speech_path),
            "pitch": str(pitch_path),
            "energy": str(energy_path),
            "speaker": speaker,
            "note": note,
            "note_dur": note_dur,
            "is_slur": is_slur,
        }
        if spk_emb_dir:
            if speaker in os.listdir(spk_emb_dir):
                embed_name = utt_id + ".npy"
                embed_path = spk_emb_dir / speaker / embed_name
                if embed_path.is_file():
                    record["spk_emb"] = str(embed_path)
                else:
                    return None
    return record


def process_sentences(
        config,
        fps: List[Path],
        sentences: Dict,
        output_dir: Path,
        mel_extractor=None,
        pitch_extractor=None,
        energy_extractor=None,
        nprocs: int=1,
        cut_sil: bool=True,
        spk_emb_dir: Path=None,
        write_metadata_method: str='w', ):
    if nprocs == 1:
        results = []
        for fp in tqdm.tqdm(fps, total=len(fps)):
            record = process_sentence(
                config=config,
                fp=fp,
                sentences=sentences,
                output_dir=output_dir,
                mel_extractor=mel_extractor,
                pitch_extractor=pitch_extractor,
                energy_extractor=energy_extractor,
                cut_sil=cut_sil,
                spk_emb_dir=spk_emb_dir, )
            if record:
                results.append(record)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            futures = []
            with tqdm.tqdm(total=len(fps)) as progress:
                for fp in fps:
                    future = pool.submit(
                        process_sentence,
                        config,
                        fp,
                        sentences,
                        output_dir,
                        mel_extractor,
                        pitch_extractor,
                        energy_extractor,
                        cut_sil,
                        spk_emb_dir, )
                    future.add_done_callback(lambda p: progress.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    record = ft.result()
                    if record:
                        results.append(record)

    results.sort(key=itemgetter("utt_id"))
    with jsonlines.open(output_dir / "metadata.jsonl",
                        write_metadata_method) as writer:
        for item in results:
            writer.write(item)
    print("Done")


def main():
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--dataset",
        default="opencpop",
        type=str,
        help="name of dataset, should in {opencpop} now")

    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump feature files.")

    parser.add_argument(
        "--label-file", default=None, type=str, help="path to label file.")

    parser.add_argument("--config", type=str, help="diffsinger config file.")

    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")

    parser.add_argument(
        "--cut-sil",
        type=str2bool,
        default=True,
        help="whether cut sil in the edge of audio")

    parser.add_argument(
        "--spk_emb_dir",
        default=None,
        type=str,
        help="directory to speaker embedding files.")

    parser.add_argument(
        "--write_metadata_method",
        default="w",
        type=str,
        choices=["w", "a"],
        help="How the metadata.jsonl file is written.")
    args = parser.parse_args()

    rootdir = Path(args.rootdir).expanduser()
    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)
    label_file = Path(args.label_file).expanduser()

    if args.spk_emb_dir:
        spk_emb_dir = Path(args.spk_emb_dir).expanduser().resolve()
    else:
        spk_emb_dir = None

    assert rootdir.is_dir()
    assert label_file.is_file()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    sentences, speaker_set = get_sentences_svs(
        label_file,
        dataset=args.dataset,
        sample_rate=config.fs,
        n_shift=config.n_shift, )

    phone_id_map_path = dumpdir / "phone_id_map.txt"
    speaker_id_map_path = dumpdir / "speaker_id_map.txt"
    get_input_token(sentences, phone_id_map_path, args.dataset)
    get_spk_id_map(speaker_set, speaker_id_map_path)

    if args.dataset == "opencpop":
        wavdir = rootdir / "wavs"
        # split data into 3 sections
        train_file = rootdir / "train.txt"
        train_wav_files = []
        with open(train_file, "r") as f_train:
            for line in f_train.readlines():
                utt = line.split("|")[0]
                wav_name = utt + ".wav"
                wav_path = wavdir / wav_name
                train_wav_files.append(wav_path)

        test_file = rootdir / "test.txt"
        dev_wav_files = []
        test_wav_files = []
        num_dev = 106
        count = 0
        with open(test_file, "r") as f_test:
            for line in f_test.readlines():
                count += 1
                utt = line.split("|")[0]
                wav_name = utt + ".wav"
                wav_path = wavdir / wav_name
                if count > num_dev:
                    test_wav_files.append(wav_path)
                else:
                    dev_wav_files.append(wav_path)

    else:
        print("dataset should in {opencpop} now!")

    train_dump_dir = dumpdir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dumpdir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dumpdir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    # Extractor
    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax)
    pitch_extractor = Pitch(
        sr=config.fs,
        hop_length=config.n_shift,
        f0min=config.f0min,
        f0max=config.f0max)
    energy_extractor = Energy(
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window)

    # process for the 3 sections
    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            sentences=sentences,
            output_dir=train_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=args.write_metadata_method)
    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            sentences=sentences,
            output_dir=dev_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=args.write_metadata_method)
    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            sentences=sentences,
            output_dir=test_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=args.write_metadata_method)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/diffsinger/train.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle import nn
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.optimizer import AdamW
from paddle.optimizer.lr import StepDecay
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import diffsinger_multi_spk_batch_fn
from paddlespeech.t2s.datasets.am_batch_fn import diffsinger_single_spk_batch_fn
from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.models.diffsinger import DiffSinger
from paddlespeech.t2s.models.diffsinger import DiffSingerEvaluator
from paddlespeech.t2s.models.diffsinger import DiffSingerUpdater
from paddlespeech.t2s.models.diffsinger import DiffusionLoss
from paddlespeech.t2s.models.diffsinger.fastspeech2midi import FastSpeech2MIDILoss
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.optimizer import build_optimizers
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
    world_size = paddle.distributed.get_world_size()
    if world_size > 1:
        paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )
    fields = [
        "text", "text_lengths", "speech", "speech_lengths", "durations",
        "pitch", "energy", "note", "note_dur", "is_slur"
    ]
    converters = {"speech": np.load, "pitch": np.load, "energy": np.load}
    spk_num = None
    if args.speaker_dict is not None:
        print("multiple speaker diffsinger!")
        collate_fn = diffsinger_multi_spk_batch_fn
        with open(args.speaker_dict, 'rt') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
        fields += ["spk_id"]
    else:
        collate_fn = diffsinger_single_spk_batch_fn
        print("single speaker diffsinger!")

    print("spk_num:", spk_num)

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=fields,
        converters=converters, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=fields,
        converters=converters, )

    # collate function and dataloader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)

    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        shuffle=False,
        drop_last=False,
        batch_size=config.batch_size,
        collate_fn=collate_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    with open(args.speech_stretchs, "r") as f:
        spec_min = np.load(args.speech_stretchs)[0]
        spec_max = np.load(args.speech_stretchs)[1]
        spec_min = paddle.to_tensor(spec_min)
        spec_max = paddle.to_tensor(spec_max)
    print("min and max spec done!")

    odim = config.n_mels
    config["model"]["fastspeech2_params"]["spk_num"] = spk_num
    model = DiffSinger(
        spec_min=spec_min,
        spec_max=spec_max,
        idim=vocab_size,
        odim=odim,
        **config["model"], )
    model_fs2 = model.fs2
    model_ds = model.diffusion
    if world_size > 1:
        model = DataParallel(model)
        model_fs2 = model._layers.fs2
        model_ds = model._layers.diffusion
    print("models done!")

    criterion_fs2 = FastSpeech2MIDILoss(**config["fs2_updater"])
    criterion_ds = DiffusionLoss(**config["ds_updater"])
    print("criterions done!")

    optimizer_fs2 = build_optimizers(model_fs2, **config["fs2_optimizer"])
    lr_schedule_ds = StepDecay(**config["ds_scheduler_params"])
    gradient_clip_ds = nn.ClipGradByGlobalNorm(config["ds_grad_norm"])
    optimizer_ds = AdamW(
        learning_rate=lr_schedule_ds,
        grad_clip=gradient_clip_ds,
        parameters=model_ds.parameters(),
        **config["ds_optimizer_params"])
    print("optimizer done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = DiffSingerUpdater(
        model=model,
        optimizers={
            "fs2": optimizer_fs2,
            "ds": optimizer_ds,
        },
        criterions={
            "fs2": criterion_fs2,
            "ds": criterion_ds,
        },
        dataloader=train_dataloader,
        ds_train_start_steps=config.ds_train_start_steps,
        output_dir=output_dir,
        only_train_diffusion=config["only_train_diffusion"])

    evaluator = DiffSingerEvaluator(
        model=model,
        criterions={
            "fs2": criterion_fs2,
            "ds": criterion_ds,
        },
        dataloader=dev_dataloader,
        output_dir=output_dir, )

    trainer = Trainer(
        updater,
        stop_trigger=(config.train_max_steps, "iteration"),
        out=output_dir, )

    if dist.get_rank() == 0:
        trainer.extend(
            evaluator, trigger=(config.eval_interval_steps, 'iteration'))
        trainer.extend(VisualDL(output_dir), trigger=(1, 'iteration'))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots),
        trigger=(config.save_interval_steps, 'iteration'))

    print("Trainer Done!")
    trainer.run()


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(description="Train a DiffSinger model.")
    parser.add_argument("--config", type=str, help="diffsinger config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict",
        type=str,
        default=None,
        help="speaker id map file for multiple speaker model.")
    parser.add_argument(
        "--speech-stretchs",
        type=str,
        help="The min and max values of the mel spectrum.")

    args = parser.parse_args()

    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/dygraph_to_static.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.exps.syn_utils import am_to_static
from paddlespeech.t2s.exps.syn_utils import get_am_inference
from paddlespeech.t2s.exps.syn_utils import get_voc_inference
from paddlespeech.t2s.exps.syn_utils import voc_to_static


def am_dygraph_to_static(args):
    with open(args.am_config) as f:
        am_config = CfgNode(yaml.safe_load(f))
    am_inference = get_am_inference(
        am=args.am,
        am_config=am_config,
        am_ckpt=args.am_ckpt,
        am_stat=args.am_stat,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict,
        speaker_dict=args.speaker_dict)
    print("acoustic model done!")

    # dygraph to static
    am_inference = am_to_static(
        am_inference=am_inference,
        am=args.am,
        inference_dir=args.inference_dir,
        speaker_dict=args.speaker_dict)
    print("finish to convert dygraph acoustic model to static!")


def voc_dygraph_to_static(args):
    with open(args.voc_config) as f:
        voc_config = CfgNode(yaml.safe_load(f))
    voc_inference = get_voc_inference(
        voc=args.voc,
        voc_config=voc_config,
        voc_ckpt=args.voc_ckpt,
        voc_stat=args.voc_stat)
    print("voc done!")

    # dygraph to static
    voc_inference = voc_to_static(
        voc_inference=voc_inference,
        voc=args.voc,
        inference_dir=args.inference_dir)
    print("finish to convert dygraph vocoder model to static!")


def parse_args():
    # parse args and config
    parser = argparse.ArgumentParser(
        description="Synthesize with acoustic model & vocoder")
    parser.add_argument(
        '--type',
        type=str,
        required=True,
        choices=["am", "voc"],
        help='Choose the model type of dynamic to static, am or voc')
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=[
            'speedyspeech_csmsc',
            'speedyspeech_aishell3',
            'fastspeech2_csmsc',
            'fastspeech2_ljspeech',
            'fastspeech2_aishell3',
            'fastspeech2_vctk',
            'tacotron2_csmsc',
            'tacotron2_ljspeech',
            'fastspeech2_mix',
            'fastspeech2_canton',
            'fastspeech2_male-zh',
            'fastspeech2_male-en',
            'fastspeech2_male-mix',
        ],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        '--am_config', type=str, default=None, help='Config of acoustic model.')
    parser.add_argument(
        '--am_ckpt',
        type=str,
        default=None,
        help='Checkpoint file of acoustic model.')
    parser.add_argument(
        "--am_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training acoustic model."
    )
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones_dict", type=str, default=None, help="tone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    # vocoder
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=[
            'pwgan_csmsc',
            'pwgan_ljspeech',
            'pwgan_aishell3',
            'pwgan_vctk',
            'mb_melgan_csmsc',
            'style_melgan_csmsc',
            'hifigan_csmsc',
            'hifigan_ljspeech',
            'hifigan_aishell3',
            'hifigan_vctk',
            'wavernn_csmsc',
            'pwgan_male',
            'hifigan_male',
            'pwgan_opencpop',
            'hifigan_opencpop',
        ],
        help='Choose vocoder type of tts task.')
    parser.add_argument(
        '--voc_config', type=str, default=None, help='Config of voc.')
    parser.add_argument(
        '--voc_ckpt', type=str, default=None, help='Checkpoint file of voc.')
    parser.add_argument(
        "--voc_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training voc."
    )
    # other
    parser.add_argument(
        "--inference_dir",
        type=str,
        default=None,
        help="dir to save inference models")
    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    if args.type == "am":
        am_dygraph_to_static(args)
    elif args.type == "voc":
        voc_dygraph_to_static(args)
    else:
        print("type should be in ['am', 'voc'] !")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/ernie_sat/__init__.py
================================================


================================================
FILE: paddlespeech/t2s/exps/ernie_sat/align.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
from pathlib import Path

import librosa
import numpy as np
import pypinyin
from praatio import textgrid

from paddlespeech.t2s.exps.ernie_sat.utils import get_dict
from paddlespeech.t2s.exps.ernie_sat.utils import get_tmp_name

DICT_EN = 'tools/aligner/cmudict-0.7b'
DICT_ZH = 'tools/aligner/simple.lexicon'
MODEL_DIR_EN = 'tools/aligner/vctk_model.zip'
MODEL_DIR_ZH = 'tools/aligner/aishell3_model.zip'
MFA_PATH = 'tools/montreal-forced-aligner/bin'
os.environ['PATH'] = MFA_PATH + '/:' + os.environ['PATH']


def _get_max_idx(dic):
    return sorted([int(key.split('_')[0]) for key in dic.keys()])[-1]


def _readtg(tg_path: str, lang: str='en', fs: int=24000, n_shift: int=300):
    alignment = textgrid.openTextgrid(tg_path, includeEmptyIntervals=True)
    phones = []
    ends = []
    words = []

    for interval in alignment.getTier('words').entries:
        word = interval.label
        if word:
            words.append(word)
    for interval in alignment.getTier('phones').entries:
        phone = interval.label
        phones.append(phone)
        ends.append(interval.end)
    frame_pos = librosa.time_to_frames(ends, sr=fs, hop_length=n_shift)
    durations = np.diff(frame_pos, prepend=0)
    assert len(durations) == len(phones)
    # merge '' and sp in the end
    if phones[-1] == '' and len(phones) > 1 and phones[-2] == 'sp':
        phones = phones[:-1]
        durations[-2] += durations[-1]
        durations = durations[:-1]

    # replace '' and 'sil' with 'sp'
    phones = ['sp' if (phn == '' or phn == 'sil') else phn for phn in phones]

    if lang == 'en':
        DICT = DICT_EN

    elif lang == 'zh':
        DICT = DICT_ZH

    word2phns_dict = get_dict(DICT)

    phn2word_dict = []
    for word in words:
        if lang == 'en':
            word = word.upper()
        phn2word_dict.append([word2phns_dict[word].split(), word])

    non_sp_idx = 0
    word_idx = 0
    i = 0
    word2phns = {}
    while i < len(phones):
        phn = phones[i]
        if phn == 'sp':
            word2phns[str(word_idx) + '_sp'] = ['sp']
            i += 1
        else:
            phns, word = phn2word_dict[non_sp_idx]
            word2phns[str(word_idx) + '_' + word] = phns
            non_sp_idx += 1
            i += len(phns)
        word_idx += 1
    sum_phn = sum(len(word2phns[k]) for k in word2phns)
    assert sum_phn == len(phones)

    results = ''
    for (p, d) in zip(phones, durations):
        results += p + ' ' + str(d) + ' '
    return results.strip(), word2phns


def alignment(wav_path: str,
              text: str,
              fs: int=24000,
              lang='en',
              n_shift: int=300):
    wav_name = os.path.basename(wav_path)
    utt = wav_name.split('.')[0]
    # prepare data for MFA
    tmp_name = get_tmp_name(text=text)
    tmpbase = './tmp_dir/' + tmp_name
    tmpbase = Path(tmpbase)
    tmpbase.mkdir(parents=True, exist_ok=True)
    print("tmp_name in alignment:", tmp_name)

    shutil.copyfile(wav_path, tmpbase / wav_name)
    txt_name = utt + '.txt'
    txt_path = tmpbase / txt_name
    with open(txt_path, 'w') as wf:
        wf.write(text + '\n')
    # MFA
    if lang == 'en':
        DICT = DICT_EN
        MODEL_DIR = MODEL_DIR_EN

    elif lang == 'zh':
        DICT = DICT_ZH
        MODEL_DIR = MODEL_DIR_ZH
    else:
        print('please input right lang!!')

    CMD = 'mfa_align' + ' ' + str(
        tmpbase) + ' ' + DICT + ' ' + MODEL_DIR + ' ' + str(tmpbase)
    os.system(CMD)
    tg_path = str(tmpbase) + '/' + tmp_name + '/' + utt + '.TextGrid'
    phn_dur, word2phns = _readtg(tg_path, lang=lang)
    phn_dur = phn_dur.split()
    phns = phn_dur[::2]
    durs = phn_dur[1::2]
    durs = [int(d) for d in durs]
    assert len(phns) == len(durs)
    return phns, durs, word2phns


def words2phns(text: str, lang='en'):
    '''
    Args:
        text (str): 
            input text.
            eg: for that reason cover is impossible to be given.
        lang (str):
            'en' or 'zh'
    Returns:
        List[str]: phones of input text.
            eg:
            ['F', 'AO1', 'R', 'DH', 'AE1', 'T', 'R', 'IY1', 'Z', 'AH0', 'N', 'K', 'AH1', 'V', 'ER0',
            'IH1', 'Z', 'IH2', 'M', 'P', 'AA1', 'S', 'AH0', 'B', 'AH0', 'L', 'T', 'UW1', 'B', 'IY1', 
            'G', 'IH1', 'V', 'AH0', 'N']

        Dict(str, str): key - idx_word
                        value - phones
            eg:
            {'0_FOR': ['F', 'AO1', 'R'], '1_THAT': ['DH', 'AE1', 'T'], 
            '2_REASON': ['R', 'IY1', 'Z', 'AH0', 'N'],'3_COVER': ['K', 'AH1', 'V', 'ER0'], '4_IS': ['IH1', 'Z'], 
            '5_IMPOSSIBLE': ['IH2', 'M', 'P', 'AA1', 'S', 'AH0', 'B', 'AH0', 'L'],
            '6_TO': ['T', 'UW1'], '7_BE': ['B', 'IY1'], '8_GIVEN': ['G', 'IH1', 'V', 'AH0', 'N']}
    '''
    text = text.strip()
    words = []
    for pun in [
            ',', '.', ':', ';', '!', '?', '"', '(', ')', '--', '---', u'，',
            u'。', u'：', u'；', u'！', u'？', u'（', u'）'
    ]:
        text = text.replace(pun, ' ')
    for wrd in text.split():
        if (wrd[-1] == '-'):
            wrd = wrd[:-1]
        if (wrd[0] == "'"):
            wrd = wrd[1:]
        if wrd:
            words.append(wrd)
    if lang == 'en':
        dictfile = DICT_EN
    elif lang == 'zh':
        dictfile = DICT_ZH
    else:
        print('please input right lang!!')

    word2phns_dict = get_dict(dictfile)
    ds = word2phns_dict.keys()
    phns = []
    wrd2phns = {}
    for index, wrd in enumerate(words):
        if lang == 'en':
            wrd = wrd.upper()
        if (wrd not in ds):
            wrd2phns[str(index) + '_' + wrd] = 'spn'
            phns.extend(['spn'])
        else:
            wrd2phns[str(index) + '_' + wrd] = word2phns_dict[wrd].split()
            phns.extend(word2phns_dict[wrd].split())
    return phns, wrd2phns


def get_phns_spans(wav_path: str,
                   old_str: str='',
                   new_str: str='',
                   source_lang: str='en',
                   target_lang: str='en',
                   fs: int=24000,
                   n_shift: int=300):
    is_append = (old_str == new_str[:len(old_str)])
    old_phns, mfa_start, mfa_end = [], [], []
    # source
    lang = source_lang
    phn, dur, w2p = alignment(
        wav_path=wav_path, text=old_str, lang=lang, fs=fs, n_shift=n_shift)

    new_d_cumsum = np.pad(np.array(dur).cumsum(0), (1, 0), 'constant').tolist()
    mfa_start = new_d_cumsum[:-1]
    mfa_end = new_d_cumsum[1:]
    old_phns = phn

    # target
    if is_append and (source_lang != target_lang):
        cross_lingual_clone = True
    else:
        cross_lingual_clone = False

    if cross_lingual_clone:
        str_origin = new_str[:len(old_str)]
        str_append = new_str[len(old_str):]

        if target_lang == 'zh':
            phns_origin, origin_w2p = words2phns(str_origin, lang='en')
            phns_append, append_w2p_tmp = words2phns(str_append, lang='zh')
        elif target_lang == 'en':
            # 原始句子
            phns_origin, origin_w2p = words2phns(str_origin, lang='zh')
            # clone 句子 
            phns_append, append_w2p_tmp = words2phns(str_append, lang='en')
        else:
            assert target_lang == 'zh' or target_lang == 'en', \
                'cloning is not support for this language, please check it.'

        new_phns = phns_origin + phns_append

        append_w2p = {}
        length = len(origin_w2p)
        for key, value in append_w2p_tmp.items():
            idx, wrd = key.split('_')
            append_w2p[str(int(idx) + length) + '_' + wrd] = value
        new_w2p = origin_w2p.copy()
        new_w2p.update(append_w2p)

    else:
        if source_lang == target_lang:
            new_phns, new_w2p = words2phns(new_str, lang=source_lang)
        else:
            assert source_lang == target_lang, \
                'source language is not same with target language...'

    span_to_repl = [0, len(old_phns) - 1]
    span_to_add = [0, len(new_phns) - 1]
    left_idx = 0
    new_phns_left = []
    sp_count = 0
    # find the left different index
    # 因为可能 align 时候的 words2phns 和直接 words2phns, 前者会有 sp？
    for key in w2p.keys():
        idx, wrd = key.split('_')
        if wrd == 'sp':
            sp_count += 1
            new_phns_left.append('sp')
        else:
            idx = str(int(idx) - sp_count)
            if idx + '_' + wrd in new_w2p:
                # 是 new_str phn 序列的 index
                left_idx += len(new_w2p[idx + '_' + wrd])
                # old phn 序列
                new_phns_left.extend(w2p[key])
            else:
                span_to_repl[0] = len(new_phns_left)
                span_to_add[0] = len(new_phns_left)
                break

    # reverse w2p and new_w2p
    right_idx = len(new_phns)
    new_phns_right = []
    sp_count = 0
    w2p_max_idx = _get_max_idx(w2p)
    new_w2p_max_idx = _get_max_idx(new_w2p)
    new_phns_mid = []
    if is_append:
        new_phns_right = []
        new_phns_mid = new_phns[left_idx:]
        span_to_repl[0] = len(new_phns_left)
        span_to_add[0] = len(new_phns_left)
        span_to_add[1] = len(new_phns_left) + len(new_phns_mid)
        span_to_repl[1] = len(old_phns) - len(new_phns_right)
    # speech edit
    else:
        for key in list(w2p.keys())[::-1]:
            idx, wrd = key.split('_')
            if wrd == 'sp':
                sp_count += 1
                new_phns_right = ['sp'] + new_phns_right
            else:
                idx = str(new_w2p_max_idx - (w2p_max_idx - int(idx) - sp_count))
                if idx + '_' + wrd in new_w2p:
                    right_idx -= len(new_w2p[idx + '_' + wrd])
                    new_phns_right = w2p[key] + new_phns_right
                else:
                    span_to_repl[1] = len(old_phns) - len(new_phns_right)
                    new_phns_mid = new_phns[left_idx:right_idx]
                    span_to_add[1] = len(new_phns_left) + len(new_phns_mid)
                    if len(new_phns_mid) == 0:
                        span_to_add[1] = min(span_to_add[1] + 1, len(new_phns))
                        span_to_add[0] = max(0, span_to_add[0] - 1)
                        span_to_repl[0] = max(0, span_to_repl[0] - 1)
                        span_to_repl[1] = min(span_to_repl[1] + 1,
                                              len(old_phns))
                    break
    new_phns = new_phns_left + new_phns_mid + new_phns_right
    '''
    For that reason cover should not be given.
    For that reason cover is impossible to be given.
    span_to_repl: [17, 23] "should not"
    span_to_add: [17, 30]  "is impossible to"
    '''
    outs = {}
    outs['mfa_start'] = mfa_start
    outs['mfa_end'] = mfa_end
    outs['old_phns'] = old_phns
    outs['new_phns'] = new_phns
    outs['span_to_repl'] = span_to_repl
    outs['span_to_add'] = span_to_add

    return outs


if __name__ == '__main__':
    text = "For that reason cover should not be given."
    phn, dur, word2phns = alignment("source/p243_313.wav", text, lang='en')
    print(phn, dur)
    print(word2phns)
    print("---------------------------------")
    # 这里可以用我们的中文前端得到 pinyin 序列
    text_zh = "卡尔普陪外孙玩滑梯。"
    text_zh = pypinyin.lazy_pinyin(
        text_zh,
        neutral_tone_with_five=True,
        style=pypinyin.Style.TONE3,
        tone_sandhi=True)
    text_zh = " ".join(text_zh)
    phn, dur, word2phns = alignment("source/000001.wav", text_zh, lang='zh')
    print(phn, dur)
    print(word2phns)
    print("---------------------------------")
    phns, wrd2phns = words2phns(text, lang='en')
    print("phns:", phns)
    print("wrd2phns:", wrd2phns)
    print("---------------------------------")

    phns, wrd2phns = words2phns(text_zh, lang='zh')
    print("phns:", phns)
    print("wrd2phns:", wrd2phns)
    print("---------------------------------")

    outs = get_phns_spans(
        wav_path="source/p243_313.wav",
        old_str="For that reason cover should not be given.",
        new_str="for that reason cover is impossible to be given.")

    mfa_start = outs["mfa_start"]
    mfa_end = outs["mfa_end"]
    old_phns = outs["old_phns"]
    new_phns = outs["new_phns"]
    span_to_repl = outs["span_to_repl"]
    span_to_add = outs["span_to_add"]
    print("mfa_start:", mfa_start)
    print("mfa_end:", mfa_end)
    print("old_phns:", old_phns)
    print("new_phns:", new_phns)
    print("span_to_repl:", span_to_repl)
    print("span_to_add:", span_to_add)
    print("---------------------------------")


================================================
FILE: paddlespeech/t2s/exps/ernie_sat/normalize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize feature files and dump them."""
import argparse
import logging
from operator import itemgetter
from pathlib import Path

import jsonlines
import numpy as np
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

from paddlespeech.t2s.datasets.data_table import DataTable


def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
    )
    parser.add_argument(
        "--metadata",
        type=str,
        required=True,
        help="directory including feature files to be normalized. "
        "you need to specify either *-scp or rootdir.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump normalized feature files.")
    parser.add_argument(
        "--speech-stats",
        type=str,
        required=True,
        help="speech statistics file.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")

    args = parser.parse_args()

    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    # get dataset
    with jsonlines.open(args.metadata, 'r') as reader:
        metadata = list(reader)
    dataset = DataTable(
        metadata, converters={
            "speech": np.load,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    # restore scaler
    speech_scaler = StandardScaler()
    speech_scaler.mean_ = np.load(args.speech_stats)[0]
    speech_scaler.scale_ = np.load(args.speech_stats)[1]
    speech_scaler.n_features_in_ = speech_scaler.mean_.shape[0]

    vocab_phones = {}
    with open(args.phones_dict, 'rt') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    for phn, id in phn_id:
        vocab_phones[phn] = int(id)

    vocab_speaker = {}
    with open(args.speaker_dict, 'rt') as f:
        spk_id = [line.strip().split() for line in f.readlines()]
    for spk, id in spk_id:
        vocab_speaker[spk] = int(id)

    # process each file
    output_metadata = []

    for item in tqdm(dataset):
        utt_id = item['utt_id']
        speech = item['speech']

        # normalize
        speech = speech_scaler.transform(speech)
        speech_dir = dumpdir / "data_speech"
        speech_dir.mkdir(parents=True, exist_ok=True)
        speech_path = speech_dir / f"{utt_id}_speech.npy"
        np.save(speech_path, speech.astype(np.float32), allow_pickle=False)

        phone_ids = [vocab_phones[p] for p in item['phones']]
        spk_id = vocab_speaker[item["speaker"]]
        record = {
            "utt_id": item['utt_id'],
            "spk_id": spk_id,
            "text": phone_ids,
            "text_lengths": item['text_lengths'],
            "speech_lengths": item['speech_lengths'],
            "durations": item['durations'],
            "speech": str(speech_path),
            "align_start": item['align_start'],
            "align_end": item['align_end'],
        }
        # add spk_emb for voice cloning
        if "spk_emb" in item:
            record["spk_emb"] = str(item["spk_emb"])

        output_metadata.append(record)
    output_metadata.sort(key=itemgetter('speech_lengths'))
    output_metadata_path = Path(args.dumpdir) / "metadata.jsonl"
    with jsonlines.open(output_metadata_path, 'w') as writer:
        for item in output_metadata:
            writer.write(item)
    logging.info(f"metadata dumped into {output_metadata_path}")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/ernie_sat/preprocess.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines
import librosa
import numpy as np
import tqdm
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.datasets.preprocess_utils import compare_duration_and_mel_length
from paddlespeech.t2s.datasets.preprocess_utils import get_input_token
from paddlespeech.t2s.datasets.preprocess_utils import get_phn_dur
from paddlespeech.t2s.datasets.preprocess_utils import get_spk_id_map
from paddlespeech.t2s.datasets.preprocess_utils import merge_silence
from paddlespeech.t2s.utils import str2bool


def process_sentence(config: Dict[str, Any],
                     fp: Path,
                     sentences: Dict,
                     output_dir: Path,
                     mel_extractor=None,
                     cut_sil: bool=True,
                     spk_emb_dir: Path=None):
    utt_id = fp.stem
    # for vctk
    if utt_id.endswith("_mic2"):
        utt_id = utt_id[:-5]
    record = None
    if utt_id in sentences:
        # reading, resampling may occur
        wav, _ = librosa.load(str(fp), sr=config.fs)
        if len(wav.shape) != 1:
            return record
        max_value = np.abs(wav).max()
        if max_value > 1.0:
            wav = wav / max_value
        assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
        assert np.abs(wav).max(
        ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        speaker = sentences[utt_id][2]
        d_cumsum = np.pad(np.array(durations).cumsum(0), (1, 0), 'constant')

        # little imprecise than use *.TextGrid directly
        times = librosa.frames_to_time(
            d_cumsum, sr=config.fs, hop_length=config.n_shift)
        if cut_sil:
            start = 0
            end = d_cumsum[-1]
            if phones[0] == "sil" and len(durations) > 1:
                start = times[1]
                durations = durations[1:]
                phones = phones[1:]
            if phones[-1] == 'sil' and len(durations) > 1:
                end = times[-2]
                durations = durations[:-1]
                phones = phones[:-1]
            sentences[utt_id][0] = phones
            sentences[utt_id][1] = durations
            start, end = librosa.time_to_samples([start, end], sr=config.fs)
            wav = wav[start:end]

        # extract mel feats
        logmel = mel_extractor.get_log_mel_fbank(wav)
        # change duration according to mel_length
        compare_duration_and_mel_length(sentences, utt_id, logmel)
        # utt_id may be popped in compare_duration_and_mel_length
        if utt_id not in sentences:
            return None
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        num_frames = logmel.shape[0]
        assert sum(durations) == num_frames

        new_d_cumsum = np.pad(np.array(durations).cumsum(0), (1, 0), 'constant')
        align_start = new_d_cumsum[:-1]
        align_end = new_d_cumsum[1:]
        assert len(align_start) == len(align_end) == len(durations)

        mel_dir = output_dir / "data_speech"
        mel_dir.mkdir(parents=True, exist_ok=True)
        mel_path = mel_dir / (utt_id + "_speech.npy")
        np.save(mel_path, logmel)
        # align_start_lengths == text_lengths
        record = {
            "utt_id": utt_id,
            "phones": phones,
            "text_lengths": len(phones),
            "speech_lengths": num_frames,
            "durations": durations,
            "speech": str(mel_path),
            "speaker": speaker,
            "align_start": align_start.tolist(),
            "align_end": align_end.tolist(),
        }
        if spk_emb_dir:
            if speaker in os.listdir(spk_emb_dir):
                embed_name = utt_id + ".npy"
                embed_path = spk_emb_dir / speaker / embed_name
                if embed_path.is_file():
                    record["spk_emb"] = str(embed_path)
                else:
                    return None
    return record


def process_sentences(config,
                      fps: List[Path],
                      sentences: Dict,
                      output_dir: Path,
                      mel_extractor=None,
                      nprocs: int=1,
                      cut_sil: bool=True,
                      spk_emb_dir: Path=None):
    if nprocs == 1:
        results = []
        for fp in tqdm.tqdm(fps, total=len(fps)):
            record = process_sentence(
                config=config,
                fp=fp,
                sentences=sentences,
                output_dir=output_dir,
                mel_extractor=mel_extractor,
                cut_sil=cut_sil,
                spk_emb_dir=spk_emb_dir)
            if record:
                results.append(record)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            futures = []
            with tqdm.tqdm(total=len(fps)) as progress:
                for fp in fps:
                    future = pool.submit(process_sentence, config, fp,
                                         sentences, output_dir, mel_extractor,
                                         cut_sil, spk_emb_dir)
                    future.add_done_callback(lambda p: progress.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    record = ft.result()
                    if record:
                        results.append(record)

    results.sort(key=itemgetter("speech_lengths"))
    # replace 'w' with 'a' to write from the end of file
    with jsonlines.open(output_dir / "metadata.jsonl", 'a') as writer:
        for item in results:
            writer.write(item)
    print("Done")


def main():
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--dataset",
        default="baker",
        type=str,
        help="name of dataset, should in {baker, aishell3, ljspeech, vctk} now")

    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump feature files.")
    parser.add_argument(
        "--dur-file", default=None, type=str, help="path to durations.txt.")

    parser.add_argument("--config", type=str, help="fastspeech2 config file.")

    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")

    parser.add_argument(
        "--cut-sil",
        type=str2bool,
        default=True,
        help="whether cut sil in the edge of audio")

    parser.add_argument(
        "--spk_emb_dir",
        default=None,
        type=str,
        help="directory to speaker embedding files.")
    args = parser.parse_args()

    rootdir = Path(args.rootdir).expanduser()
    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)
    dur_file = Path(args.dur_file).expanduser()

    if args.spk_emb_dir:
        spk_emb_dir = Path(args.spk_emb_dir).expanduser().resolve()
    else:
        spk_emb_dir = None

    assert rootdir.is_dir()
    assert dur_file.is_file()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    sentences, speaker_set = get_phn_dur(dur_file)

    merge_silence(sentences)
    phone_id_map_path = dumpdir / "phone_id_map.txt"
    speaker_id_map_path = dumpdir / "speaker_id_map.txt"
    get_input_token(sentences, phone_id_map_path, args.dataset)
    get_spk_id_map(speaker_set, speaker_id_map_path)

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
        # split data into 3 sections
        num_train = 9800
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "aishell3":
        sub_num_dev = 5
        wav_dir = rootdir / "train" / "wav"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*.wav")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    elif args.dataset == "ljspeech":
        wav_files = sorted(list((rootdir / "wavs").rglob("*.wav")))
        # split data into 3 sections
        num_train = 12900
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "vctk":
        sub_num_dev = 5
        wav_dir = rootdir / "wav48_silence_trimmed"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*_mic2.flac")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    else:
        print("dataset should in {baker, aishell3, ljspeech, vctk} now!")

    train_dump_dir = dumpdir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dumpdir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dumpdir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    # Extractor
    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax)

    # process for the 3 sections
    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            sentences=sentences,
            output_dir=train_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir)
    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            sentences=sentences,
            output_dir=dev_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir)
    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            sentences=sentences,
            output_dir=test_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/ernie_sat/synthesize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import build_erniesat_collate_fn
from paddlespeech.t2s.exps.syn_utils import denorm
from paddlespeech.t2s.exps.syn_utils import get_am_inference
from paddlespeech.t2s.exps.syn_utils import get_test_dataset
from paddlespeech.t2s.exps.syn_utils import get_voc_inference


def evaluate(args):
    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for evaluation
    with jsonlines.open(args.test_metadata, 'r') as reader:
        test_metadata = list(reader)

    # Init body.
    with open(args.erniesat_config) as f:
        erniesat_config = CfgNode(yaml.safe_load(f))
    with open(args.voc_config) as f:
        voc_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(erniesat_config)
    print(voc_config)

    # ernie sat model
    erniesat_inference = get_am_inference(
        am='erniesat_dataset',
        am_config=erniesat_config,
        am_ckpt=args.erniesat_ckpt,
        am_stat=args.erniesat_stat,
        phones_dict=args.phones_dict)

    test_dataset = get_test_dataset(
        test_metadata=test_metadata, am='erniesat_dataset')

    # vocoder
    voc_inference = get_voc_inference(
        voc=args.voc,
        voc_config=voc_config,
        voc_ckpt=args.voc_ckpt,
        voc_stat=args.voc_stat)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    collate_fn = build_erniesat_collate_fn(
        mlm_prob=erniesat_config.mlm_prob,
        mean_phn_span=erniesat_config.mean_phn_span,
        seg_emb=erniesat_config.model['enc_input_layer'] == 'sega_mlm',
        text_masking=False)

    gen_raw = True
    erniesat_mu, erniesat_std = np.load(args.erniesat_stat)

    for datum in test_dataset:
        # collate function and dataloader
        utt_id = datum["utt_id"]
        speech_len = datum["speech_lengths"]

        # mask the middle 1/3 speech
        left_bdy, right_bdy = speech_len // 3, 2 * speech_len // 3
        span_bdy = [left_bdy, right_bdy]
        datum.update({"span_bdy": span_bdy})

        batch = collate_fn([datum])
        with paddle.no_grad():
            out_mels = erniesat_inference(
                speech=batch["speech"],
                text=batch["text"],
                masked_pos=batch["masked_pos"],
                speech_mask=batch["speech_mask"],
                text_mask=batch["text_mask"],
                speech_seg_pos=batch["speech_seg_pos"],
                text_seg_pos=batch["text_seg_pos"],
                span_bdy=span_bdy)

            # vocoder
            wav_list = []
            for mel in out_mels:
                part_wav = voc_inference(mel)
                wav_list.append(part_wav)
            wav = paddle.concat(wav_list)
            wav = wav.numpy()
            if gen_raw:
                speech = datum['speech']
                denorm_mel = denorm(speech, erniesat_mu, erniesat_std)
                denorm_mel = paddle.to_tensor(denorm_mel)
                wav_raw = voc_inference(denorm_mel)
                wav_raw = wav_raw.numpy()

        sf.write(
            str(output_dir / (utt_id + ".wav")),
            wav,
            samplerate=erniesat_config.fs)
        if gen_raw:
            sf.write(
                str(output_dir / (utt_id + "_raw" + ".wav")),
                wav_raw,
                samplerate=erniesat_config.fs)

        print(f"{utt_id} done!")


def parse_args():
    # parse args and config
    parser = argparse.ArgumentParser(
        description="Synthesize with acoustic model & vocoder")
    # ernie sat

    parser.add_argument(
        '--erniesat_config',
        type=str,
        default=None,
        help='Config of acoustic model.')
    parser.add_argument(
        '--erniesat_ckpt',
        type=str,
        default=None,
        help='Checkpoint file of acoustic model.')
    parser.add_argument(
        "--erniesat_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training acoustic model."
    )
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    # vocoder
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=[
            'pwgan_aishell3',
            'pwgan_vctk',
            'hifigan_aishell3',
            'hifigan_vctk',
        ],
        help='Choose vocoder type of tts task.')
    parser.add_argument(
        '--voc_config', type=str, default=None, help='Config of voc.')
    parser.add_argument(
        '--voc_ckpt', type=str, default=None, help='Checkpoint file of voc.')
    parser.add_argument(
        "--voc_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training voc."
    )
    # other
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument("--test_metadata", type=str, help="test metadata.")
    parser.add_argument("--output_dir", type=str, help="output dir.")

    args = parser.parse_args()
    return args


def main():

    args = parse_args()
    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    evaluate(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/ernie_sat/synthesize_e2e.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from pathlib import Path
from typing import List

import librosa
import numpy as np
import paddle
import pypinyin
import soundfile as sf
import yaml
from pypinyin_dict.phrase_pinyin_data import large_pinyin
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import build_erniesat_collate_fn
from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.exps.ernie_sat.align import get_phns_spans
from paddlespeech.t2s.exps.ernie_sat.utils import eval_durs
from paddlespeech.t2s.exps.ernie_sat.utils import get_dur_adj_factor
from paddlespeech.t2s.exps.ernie_sat.utils import get_span_bdy
from paddlespeech.t2s.exps.ernie_sat.utils import get_tmp_name
from paddlespeech.t2s.exps.syn_utils import get_am_inference
from paddlespeech.t2s.exps.syn_utils import get_voc_inference
from paddlespeech.t2s.exps.syn_utils import norm
from paddlespeech.t2s.utils import str2bool
large_pinyin.load()


def _p2id(phonemes: List[str]) -> np.ndarray:
    # replace unk phone with sp
    phonemes = [phn if phn in vocab_phones else "sp" for phn in phonemes]
    phone_ids = [vocab_phones[item] for item in phonemes]
    return np.array(phone_ids, np.int64)


def prep_feats_with_dur(wav_path: str,
                        old_str: str='',
                        new_str: str='',
                        source_lang: str='en',
                        target_lang: str='en',
                        duration_adjust: bool=True,
                        fs: int=24000,
                        n_shift: int=300):
    '''
    Returns:
        np.ndarray: new wav, replace the part to be edited in original wav with 0
        List[str]: new phones
        List[float]: mfa start of new wav
        List[float]: mfa end of new wav
        List[int]: masked mel boundary of original wav
        List[int]: masked mel boundary of new wav
    '''
    wav_org, _ = librosa.load(wav_path, sr=fs)
    phns_spans_outs = get_phns_spans(
        wav_path=wav_path,
        old_str=old_str,
        new_str=new_str,
        source_lang=source_lang,
        target_lang=target_lang,
        fs=fs,
        n_shift=n_shift)

    mfa_start = phns_spans_outs['mfa_start']
    mfa_end = phns_spans_outs['mfa_end']
    old_phns = phns_spans_outs['old_phns']
    new_phns = phns_spans_outs['new_phns']
    span_to_repl = phns_spans_outs['span_to_repl']
    span_to_add = phns_spans_outs['span_to_add']

    # 中文的 phns 不一定都在 fastspeech2 的字典里, 用 sp 代替
    if target_lang in {'en', 'zh'}:
        old_durs = eval_durs(old_phns, target_lang=source_lang)
    else:
        assert target_lang in {'en', 'zh'}, \
            "calculate duration_predict is not support for this language..."

    orig_old_durs = [e - s for e, s in zip(mfa_end, mfa_start)]

    if duration_adjust:
        d_factor = get_dur_adj_factor(
            orig_dur=orig_old_durs, pred_dur=old_durs, phns=old_phns)
        d_factor = d_factor * 1.25
    else:
        d_factor = 1

    if target_lang in {'en', 'zh'}:
        new_durs = eval_durs(new_phns, target_lang=target_lang)
    else:
        assert target_lang == "zh" or target_lang == "en", \
            "calculate duration_predict is not support for this language..."

    # duration 要是整数
    new_durs_adjusted = [int(np.ceil(d_factor * i)) for i in new_durs]

    new_span_dur_sum = sum(new_durs_adjusted[span_to_add[0]:span_to_add[1]])
    old_span_dur_sum = sum(orig_old_durs[span_to_repl[0]:span_to_repl[1]])
    dur_offset = new_span_dur_sum - old_span_dur_sum
    new_mfa_start = mfa_start[:span_to_repl[0]]
    new_mfa_end = mfa_end[:span_to_repl[0]]

    for dur in new_durs_adjusted[span_to_add[0]:span_to_add[1]]:
        if len(new_mfa_end) == 0:
            new_mfa_start.append(0)
            new_mfa_end.append(dur)
        else:
            new_mfa_start.append(new_mfa_end[-1])
            new_mfa_end.append(new_mfa_end[-1] + dur)

    new_mfa_start += [i + dur_offset for i in mfa_start[span_to_repl[1]:]]
    new_mfa_end += [i + dur_offset for i in mfa_end[span_to_repl[1]:]]

    # 3. get new wav
    # 在原始句子后拼接
    if span_to_repl[0] >= len(mfa_start):
        wav_left_idx = len(wav_org)
        wav_right_idx = wav_left_idx
    # 在原始句子中间替换
    else:
        wav_left_idx = int(np.floor(mfa_start[span_to_repl[0]] * n_shift))
        wav_right_idx = int(np.ceil(mfa_end[span_to_repl[1] - 1] * n_shift))
    blank_wav = np.zeros(
        (int(np.ceil(new_span_dur_sum * n_shift)), ), dtype=wav_org.dtype)
    # 原始音频，需要编辑的部分替换成空音频，空音频的时间由 fs2 的 duration_predictor 决定
    new_wav = np.concatenate(
        [wav_org[:wav_left_idx], blank_wav, wav_org[wav_right_idx:]])

    # 4. get old and new mel span to be mask
    old_span_bdy = get_span_bdy(
        mfa_start=mfa_start, mfa_end=mfa_end, span_to_repl=span_to_repl)

    new_span_bdy = get_span_bdy(
        mfa_start=new_mfa_start, mfa_end=new_mfa_end, span_to_repl=span_to_add)

    # old_span_bdy, new_span_bdy 是帧级别的范围
    outs = {}
    outs['new_wav'] = new_wav
    outs['new_phns'] = new_phns
    outs['new_mfa_start'] = new_mfa_start
    outs['new_mfa_end'] = new_mfa_end
    outs['old_span_bdy'] = old_span_bdy
    outs['new_span_bdy'] = new_span_bdy
    return outs


def prep_feats(wav_path: str,
               old_str: str='',
               new_str: str='',
               source_lang: str='en',
               target_lang: str='en',
               duration_adjust: bool=True,
               fs: int=24000,
               n_shift: int=300):

    with_dur_outs = prep_feats_with_dur(
        wav_path=wav_path,
        old_str=old_str,
        new_str=new_str,
        source_lang=source_lang,
        target_lang=target_lang,
        duration_adjust=duration_adjust,
        fs=fs,
        n_shift=n_shift)

    wav_name = os.path.basename(wav_path)
    utt_id = wav_name.split('.')[0]

    wav = with_dur_outs['new_wav']
    phns = with_dur_outs['new_phns']
    mfa_start = with_dur_outs['new_mfa_start']
    mfa_end = with_dur_outs['new_mfa_end']
    old_span_bdy = with_dur_outs['old_span_bdy']
    new_span_bdy = with_dur_outs['new_span_bdy']
    span_bdy = np.array(new_span_bdy)

    mel = mel_extractor.get_log_mel_fbank(wav)
    erniesat_mean, erniesat_std = np.load(erniesat_stat)
    normed_mel = norm(mel, erniesat_mean, erniesat_std)
    tmp_name = get_tmp_name(text=old_str)
    tmpbase = './tmp_dir/' + tmp_name
    tmpbase = Path(tmpbase)
    tmpbase.mkdir(parents=True, exist_ok=True)

    mel_path = tmpbase / 'mel.npy'
    np.save(mel_path, normed_mel)
    durations = [e - s for e, s in zip(mfa_end, mfa_start)]
    text = _p2id(phns)

    datum = {
        "utt_id": utt_id,
        "spk_id": 0,
        "text": text,
        "text_lengths": len(text),
        "speech_lengths": len(normed_mel),
        "durations": durations,
        "speech": np.load(mel_path),
        "align_start": mfa_start,
        "align_end": mfa_end,
        "span_bdy": span_bdy
    }

    batch = collate_fn([datum])
    outs = dict()
    outs['batch'] = batch
    outs['old_span_bdy'] = old_span_bdy
    outs['new_span_bdy'] = new_span_bdy
    return outs


def get_mlm_output(wav_path: str,
                   old_str: str='',
                   new_str: str='',
                   source_lang: str='en',
                   target_lang: str='en',
                   duration_adjust: bool=True,
                   fs: int=24000,
                   n_shift: int=300):

    prep_feats_outs = prep_feats(
        wav_path=wav_path,
        old_str=old_str,
        new_str=new_str,
        source_lang=source_lang,
        target_lang=target_lang,
        duration_adjust=duration_adjust,
        fs=fs,
        n_shift=n_shift)

    batch = prep_feats_outs['batch']
    new_span_bdy = prep_feats_outs['new_span_bdy']
    old_span_bdy = prep_feats_outs['old_span_bdy']

    out_mels = erniesat_inference(
        speech=batch['speech'],
        text=batch['text'],
        masked_pos=batch['masked_pos'],
        speech_mask=batch['speech_mask'],
        text_mask=batch['text_mask'],
        speech_seg_pos=batch['speech_seg_pos'],
        text_seg_pos=batch['text_seg_pos'],
        span_bdy=new_span_bdy)

    # 拼接音频
    output_feat = paddle.concat(x=out_mels, axis=0)
    wav_org, _ = librosa.load(wav_path, sr=fs)
    outs = dict()
    outs['wav_org'] = wav_org
    outs['output_feat'] = output_feat
    outs['old_span_bdy'] = old_span_bdy
    outs['new_span_bdy'] = new_span_bdy

    return outs


def get_wav(wav_path: str,
            source_lang: str='en',
            target_lang: str='en',
            old_str: str='',
            new_str: str='',
            duration_adjust: bool=True,
            fs: int=24000,
            n_shift: int=300,
            task_name: str='synthesize'):

    outs = get_mlm_output(
        wav_path=wav_path,
        old_str=old_str,
        new_str=new_str,
        source_lang=source_lang,
        target_lang=target_lang,
        duration_adjust=duration_adjust,
        fs=fs,
        n_shift=n_shift)

    wav_org = outs['wav_org']
    output_feat = outs['output_feat']
    old_span_bdy = outs['old_span_bdy']
    new_span_bdy = outs['new_span_bdy']

    masked_feat = output_feat[new_span_bdy[0]:new_span_bdy[1]]

    with paddle.no_grad():
        alt_wav = voc_inference(masked_feat)
    alt_wav = np.squeeze(alt_wav)

    old_time_bdy = [n_shift * x for x in old_span_bdy]
    if task_name == 'edit':
        wav_replaced = np.concatenate(
            [wav_org[:old_time_bdy[0]], alt_wav, wav_org[old_time_bdy[1]:]])
    else:
        wav_replaced = alt_wav
    wav_dict = {"origin": wav_org, "output": wav_replaced}
    return wav_dict


def parse_args():
    # parse args and config
    parser = argparse.ArgumentParser(
        description="Synthesize with acoustic model & vocoder")
    # ernie sat

    parser.add_argument(
        '--erniesat_config',
        type=str,
        default=None,
        help='Config of acoustic model.')
    parser.add_argument(
        '--erniesat_ckpt',
        type=str,
        default=None,
        help='Checkpoint file of acoustic model.')
    parser.add_argument(
        "--erniesat_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training acoustic model."
    )
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    # vocoder
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=[
            'pwgan_aishell3',
            'pwgan_vctk',
            'hifigan_aishell3',
            'hifigan_vctk',
        ],
        help='Choose vocoder type of tts task.')
    parser.add_argument(
        '--voc_config', type=str, default=None, help='Config of voc.')
    parser.add_argument(
        '--voc_ckpt', type=str, default=None, help='Checkpoint file of voc.')
    parser.add_argument(
        "--voc_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training voc."
    )
    # other
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    # ernie sat related
    parser.add_argument(
        "--task_name",
        type=str,
        choices=['edit', 'synthesize'],
        help="task name.")
    parser.add_argument("--wav_path", type=str, help="path of old wav")
    parser.add_argument("--old_str", type=str, help="old string")
    parser.add_argument("--new_str", type=str, help="new string")
    parser.add_argument(
        "--source_lang", type=str, default="en", help="source language")
    parser.add_argument(
        "--target_lang", type=str, default="en", help="target language")
    parser.add_argument(
        "--duration_adjust",
        type=str2bool,
        default=True,
        help="whether to adjust duration.")
    parser.add_argument("--output_name", type=str, default="output.wav")

    args = parser.parse_args()
    return args


if __name__ == '__main__':
    args = parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    # evaluate(args)
    with open(args.erniesat_config) as f:
        erniesat_config = CfgNode(yaml.safe_load(f))
    old_str = args.old_str
    new_str = args.new_str

    # convert Chinese characters to pinyin
    if args.source_lang == 'zh':
        old_str = pypinyin.lazy_pinyin(
            old_str,
            neutral_tone_with_five=True,
            style=pypinyin.Style.TONE3,
            tone_sandhi=True)
        old_str = ' '.join(old_str)
    if args.target_lang == 'zh':
        new_str = pypinyin.lazy_pinyin(
            new_str,
            neutral_tone_with_five=True,
            style=pypinyin.Style.TONE3,
            tone_sandhi=True)
        new_str = ' '.join(new_str)

    if args.task_name == 'edit':
        new_str = new_str
    elif args.task_name == 'synthesize':
        new_str = old_str + ' ' + new_str
    else:
        new_str = old_str + ' ' + new_str

    # Extractor
    mel_extractor = LogMelFBank(
        sr=erniesat_config.fs,
        n_fft=erniesat_config.n_fft,
        hop_length=erniesat_config.n_shift,
        win_length=erniesat_config.win_length,
        window=erniesat_config.window,
        n_mels=erniesat_config.n_mels,
        fmin=erniesat_config.fmin,
        fmax=erniesat_config.fmax)

    collate_fn = build_erniesat_collate_fn(
        mlm_prob=erniesat_config.mlm_prob,
        mean_phn_span=erniesat_config.mean_phn_span,
        seg_emb=erniesat_config.model['enc_input_layer'] == 'sega_mlm',
        text_masking=False)

    vocab_phones = {}

    with open(args.phones_dict, 'rt', encoding='utf-8') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    for phn, id in phn_id:
        vocab_phones[phn] = int(id)

    # ernie sat model
    erniesat_inference = get_am_inference(
        am='erniesat_dataset',
        am_config=erniesat_config,
        am_ckpt=args.erniesat_ckpt,
        am_stat=args.erniesat_stat,
        phones_dict=args.phones_dict)

    with open(args.voc_config) as f:
        voc_config = CfgNode(yaml.safe_load(f))

    # vocoder
    voc_inference = get_voc_inference(
        voc=args.voc,
        voc_config=voc_config,
        voc_ckpt=args.voc_ckpt,
        voc_stat=args.voc_stat)

    erniesat_stat = args.erniesat_stat

    wav_dict = get_wav(
        wav_path=args.wav_path,
        source_lang=args.source_lang,
        target_lang=args.target_lang,
        old_str=old_str,
        new_str=new_str,
        duration_adjust=args.duration_adjust,
        fs=erniesat_config.fs,
        n_shift=erniesat_config.n_shift,
        task_name=args.task_name)

    sf.write(
        args.output_name, wav_dict['output'], samplerate=erniesat_config.fs)
    print(
        f"\033[1;32;m Generated audio saved into {args.output_name} ! \033[0m")


================================================
FILE: paddlespeech/t2s/exps/ernie_sat/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle import nn
from paddle.io import DataLoader
from paddle.optimizer import Adam
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import build_erniesat_collate_fn
from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.datasets.sampler import ErnieSATSampler
from paddlespeech.t2s.models.ernie_sat import ErnieSAT
from paddlespeech.t2s.models.ernie_sat import ErnieSATEvaluator
from paddlespeech.t2s.models.ernie_sat import ErnieSATUpdater
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
    world_size = paddle.distributed.get_world_size()
    if world_size > 1:
        paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )
    fields = [
        "text", "text_lengths", "speech", "speech_lengths", "align_start",
        "align_end"
    ]
    converters = {"speech": np.load}
    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=fields,
        converters=converters, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=fields,
        converters=converters, )

    # collate function and dataloader
    collate_fn = build_erniesat_collate_fn(
        mlm_prob=config.mlm_prob,
        mean_phn_span=config.mean_phn_span,
        seg_emb=config.model['enc_input_layer'] == 'sega_mlm',
        text_masking=config["model"]["text_masking"])

    train_sampler = ErnieSATSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)

    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        shuffle=False,
        drop_last=False,
        batch_size=config.batch_size,
        collate_fn=collate_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    with open(args.phones_dict, 'rt', encoding='utf-8') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_mels
    model = ErnieSAT(idim=vocab_size, odim=odim, **config["model"])

    if world_size > 1:
        model = DataParallel(model)
    print("model done!")

    scheduler = paddle.optimizer.lr.NoamDecay(
        d_model=config["scheduler_params"]["d_model"],
        warmup_steps=config["scheduler_params"]["warmup_steps"])
    grad_clip = nn.ClipGradByGlobalNorm(config["grad_clip"])
    optimizer = Adam(
        learning_rate=scheduler,
        grad_clip=grad_clip,
        parameters=model.parameters())

    print("optimizer done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = ErnieSATUpdater(
        model=model,
        optimizer=optimizer,
        scheduler=scheduler,
        dataloader=train_dataloader,
        text_masking=config["model"]["text_masking"],
        odim=odim,
        vocab_size=vocab_size,
        output_dir=output_dir)

    trainer = Trainer(updater, (config.max_epoch, 'epoch'), output_dir)

    evaluator = ErnieSATEvaluator(
        model=model,
        dataloader=dev_dataloader,
        text_masking=config["model"]["text_masking"],
        odim=odim,
        vocab_size=vocab_size,
        output_dir=output_dir, )

    if dist.get_rank() == 0:
        trainer.extend(evaluator, trigger=(1, "epoch"))
        trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots), trigger=(1, 'epoch'))
    trainer.run()


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(description="Train an ErnieSAT model.")
    parser.add_argument("--config", type=str, help="ErnieSAT config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")

    args = parser.parse_args()

    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/ernie_sat/utils.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import os
from pathlib import Path
from typing import Dict
from typing import List
from typing import Union

import numpy as np
import paddle
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.exps.syn_utils import get_am_inference
from paddlespeech.t2s.exps.syn_utils import get_voc_inference


def _get_user():
    return os.path.expanduser('~').split('/')[-1]


def str2md5(string):
    md5_val = hashlib.md5(string.encode('utf8')).hexdigest()
    return md5_val


def get_tmp_name(text: str):
    return _get_user() + '_' + str(os.getpid()) + '_' + str2md5(text)


def get_dict(dictfile: str):
    word2phns_dict = {}
    with open(dictfile, 'r') as fid:
        for line in fid:
            line_lst = line.split()
            word, phn_lst = line_lst[0], line.split()[1:]
            if word not in word2phns_dict.keys():
                word2phns_dict[word] = ' '.join(phn_lst)
    return word2phns_dict


# 获取需要被 mask 的 mel 帧的范围
def get_span_bdy(mfa_start: List[float],
                 mfa_end: List[float],
                 span_to_repl: List[List[int]]):
    if span_to_repl[0] >= len(mfa_start):
        span_bdy = [mfa_end[-1], mfa_end[-1]]
    else:
        span_bdy = [mfa_start[span_to_repl[0]], mfa_end[span_to_repl[1] - 1]]
    return span_bdy


# mfa 获得的 duration 和 fs2 的 duration_predictor 获取的 duration 可能不同
# 此处获得一个缩放比例, 用于预测值和真实值之间的缩放
def get_dur_adj_factor(orig_dur: List[int],
                       pred_dur: List[int],
                       phns: List[str]):
    length = 0
    factor_list = []
    for orig, pred, phn in zip(orig_dur, pred_dur, phns):
        if pred == 0 or phn == 'sp':
            continue
        else:
            factor_list.append(orig / pred)
    factor_list = np.array(factor_list)
    factor_list.sort()
    if len(factor_list) < 5:
        return 1
    length = 2
    avg = np.average(factor_list[length:-length])
    return avg


def read_2col_text(path: Union[Path, str]) -> Dict[str, str]:
    """Read a text file having 2 column as dict object.

    Examples:
        wav.scp:
            key1 /some/path/a.wav
            key2 /some/path/b.wav

        >>> read_2col_text('wav.scp')
        {'key1': '/some/path/a.wav', 'key2': '/some/path/b.wav'}

    """

    data = {}
    with Path(path).open("r", encoding="utf-8") as f:
        for linenum, line in enumerate(f, 1):
            sps = line.rstrip().split(maxsplit=1)
            if len(sps) == 1:
                k, v = sps[0], ""
            else:
                k, v = sps
            if k in data:
                raise RuntimeError(f"{k} is duplicated ({path}:{linenum})")
            data[k] = v
    return data


def load_num_sequence_text(path: Union[Path, str], loader_type: str="csv_int"
                           ) -> Dict[str, List[Union[float, int]]]:
    """Read a text file indicating sequences of number

    Examples:
        key1 1 2 3
        key2 34 5 6

        >>> d = load_num_sequence_text('text')
        >>> np.testing.assert_array_equal(d["key1"], np.array([1, 2, 3]))
    """
    if loader_type == "text_int":
        delimiter = " "
        dtype = int
    elif loader_type == "text_float":
        delimiter = " "
        dtype = float
    elif loader_type == "csv_int":
        delimiter = ","
        dtype = int
    elif loader_type == "csv_float":
        delimiter = ","
        dtype = float
    else:
        raise ValueError(f"Not supported loader_type={loader_type}")

    # path looks like:
    #   utta 1,0
    #   uttb 3,4,5
    # -> return {'utta': np.ndarray([1, 0]),
    #            'uttb': np.ndarray([3, 4, 5])}
    d = read_2column_text(path)
    # Using for-loop instead of dict-comprehension for debuggability
    retval = {}
    for k, v in d.items():
        try:
            retval[k] = [dtype(i) for i in v.split(delimiter)]
        except TypeError:
            print(f'Error happened with path="{path}", id="{k}", value="{v}"')
            raise
    return retval


def is_chinese(ch):
    if u'\u4e00' <= ch <= u'\u9fff':
        return True
    else:
        return False


def get_voc_out(mel):
    # vocoder
    args = parse_args()
    with open(args.voc_config) as f:
        voc_config = CfgNode(yaml.safe_load(f))
    voc_inference = get_voc_inference(
        voc=args.voc,
        voc_config=voc_config,
        voc_ckpt=args.voc_ckpt,
        voc_stat=args.voc_stat)

    with paddle.no_grad():
        wav = voc_inference(mel)
    return np.squeeze(wav)


def eval_durs(phns, target_lang: str='zh', fs: int=24000, n_shift: int=300):

    if target_lang == 'en':
        am = "fastspeech2_ljspeech"
        am_config = "download/fastspeech2_nosil_ljspeech_ckpt_0.5/default.yaml"
        am_ckpt = "download/fastspeech2_nosil_ljspeech_ckpt_0.5/snapshot_iter_100000.pdz"
        am_stat = "download/fastspeech2_nosil_ljspeech_ckpt_0.5/speech_stats.npy"
        phones_dict = "download/fastspeech2_nosil_ljspeech_ckpt_0.5/phone_id_map.txt"

    elif target_lang == 'zh':
        am = "fastspeech2_csmsc"
        am_config = "download/fastspeech2_conformer_baker_ckpt_0.5/conformer.yaml"
        am_ckpt = "download/fastspeech2_conformer_baker_ckpt_0.5/snapshot_iter_76000.pdz"
        am_stat = "download/fastspeech2_conformer_baker_ckpt_0.5/speech_stats.npy"
        phones_dict = "download/fastspeech2_conformer_baker_ckpt_0.5/phone_id_map.txt"

    # Init body.
    with open(am_config) as f:
        am_config = CfgNode(yaml.safe_load(f))

    am_inference, am = get_am_inference(
        am=am,
        am_config=am_config,
        am_ckpt=am_ckpt,
        am_stat=am_stat,
        phones_dict=phones_dict,
        return_am=True)

    vocab_phones = {}
    with open(phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    for tone, id in phn_id:
        vocab_phones[tone] = int(id)
    vocab_size = len(vocab_phones)
    phonemes = [phn if phn in vocab_phones else "sp" for phn in phns]

    phone_ids = [vocab_phones[item] for item in phonemes]
    phone_ids = paddle.to_tensor(np.array(phone_ids, np.int64))
    _, d_outs, _, _ = am.inference(phone_ids)
    d_outs = d_outs.tolist()
    return d_outs


================================================
FILE: paddlespeech/t2s/exps/fastspeech2/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/fastspeech2/gen_gta_mel.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# generate mels using durations.txt
# for mb melgan finetune
import argparse
import os
from pathlib import Path

import numpy as np
import paddle
import yaml
from tqdm import tqdm
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.preprocess_utils import get_phn_dur
from paddlespeech.t2s.datasets.preprocess_utils import merge_silence
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2
from paddlespeech.t2s.models.fastspeech2 import StyleFastSpeech2Inference
from paddlespeech.t2s.modules.normalizer import ZScore
from paddlespeech.t2s.utils import str2bool


def evaluate(args, fastspeech2_config):
    rootdir = Path(args.rootdir).expanduser()
    assert rootdir.is_dir()

    # construct dataset for evaluation
    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    phone_dict = {}
    for phn, id in phn_id:
        phone_dict[phn] = int(id)

    if args.speaker_dict:
        with open(args.speaker_dict, 'rt') as f:
            spk_id_list = [line.strip().split() for line in f.readlines()]
            spk_num = len(spk_id_list)
    else:
        spk_num = None

    odim = fastspeech2_config.n_mels
    model = FastSpeech2(
        idim=vocab_size,
        odim=odim,
        **fastspeech2_config["model"],
        spk_num=spk_num)

    model.set_state_dict(
        paddle.load(args.fastspeech2_checkpoint)["main_params"])
    model.eval()

    stat = np.load(args.fastspeech2_stat)
    mu, std = stat
    mu = paddle.to_tensor(mu)
    std = paddle.to_tensor(std)
    fastspeech2_normalizer = ZScore(mu, std)

    fastspeech2_inference = StyleFastSpeech2Inference(fastspeech2_normalizer,
                                                      model)
    fastspeech2_inference.eval()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    sentences, speaker_set = get_phn_dur(args.dur_file)
    merge_silence(sentences)

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
        # split data into 3 sections
        num_train = 9800
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "aishell3":
        sub_num_dev = 5
        wav_dir = rootdir / "train" / "wav"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*.wav")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    train_wav_files = [
        os.path.basename(str(str_path)) for str_path in train_wav_files
    ]
    dev_wav_files = [
        os.path.basename(str(str_path)) for str_path in dev_wav_files
    ]
    test_wav_files = [
        os.path.basename(str(str_path)) for str_path in test_wav_files
    ]

    for i, utt_id in enumerate(tqdm(sentences)):
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        speaker = sentences[utt_id][2]
        # 裁剪掉开头和结尾的 sil
        if args.cut_sil:
            if phones[0] == "sil" and len(durations) > 1:
                durations = durations[1:]
                phones = phones[1:]
            if phones[-1] == 'sil' and len(durations) > 1:
                durations = durations[:-1]
                phones = phones[:-1]
            # sentences[utt_id][0] = phones
            # sentences[utt_id][1] = durations

        phone_ids = [phone_dict[phn] for phn in phones]
        phone_ids = paddle.to_tensor(np.array(phone_ids))

        if args.speaker_dict:
            speaker_id = int(
                [item[1] for item in spk_id_list if speaker == item[0]][0])
            speaker_id = paddle.to_tensor(speaker_id)
        else:
            speaker_id = None

        durations = paddle.to_tensor(np.array(durations))
        # 生成的和真实的可能有 1, 2 帧的差距，但是 batch_fn 会修复
        # split data into 3 sections

        wav_path = utt_id + ".wav"

        if wav_path in train_wav_files:
            sub_output_dir = output_dir / ("train/raw")
        elif wav_path in dev_wav_files:
            sub_output_dir = output_dir / ("dev/raw")
        elif wav_path in test_wav_files:
            sub_output_dir = output_dir / ("test/raw")

        sub_output_dir.mkdir(parents=True, exist_ok=True)

        with paddle.no_grad():
            mel = fastspeech2_inference(
                phone_ids, durations=durations, spk_id=speaker_id)
        np.save(sub_output_dir / (utt_id + "_feats.npy"), mel)


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(
        description="Synthesize with fastspeech2 & parallel wavegan.")
    parser.add_argument(
        "--dataset",
        default="baker",
        type=str,
        help="name of dataset, should in {baker, ljspeech, vctk} now")
    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")
    parser.add_argument(
        "--fastspeech2-config", type=str, help="fastspeech2 config file.")
    parser.add_argument(
        "--fastspeech2-checkpoint",
        type=str,
        help="fastspeech2 checkpoint to load.")
    parser.add_argument(
        "--fastspeech2-stat",
        type=str,
        help="mean and standard deviation used to normalize spectrogram when training fastspeech2."
    )

    parser.add_argument(
        "--phones-dict",
        type=str,
        default="phone_id_map.txt",
        help="phone vocabulary file.")

    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")

    parser.add_argument(
        "--dur-file", default=None, type=str, help="path to durations.txt.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    parser.add_argument(
        "--cut-sil",
        type=str2bool,
        default=True,
        help="whether cut sil in the edge of audio")

    args = parser.parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    with open(args.fastspeech2_config) as f:
        fastspeech2_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(fastspeech2_config)

    evaluate(args, fastspeech2_config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/fastspeech2/normalize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize feature files and dump them."""
import argparse
import logging
from operator import itemgetter
from pathlib import Path

import jsonlines
import numpy as np
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

from paddlespeech.t2s.datasets.data_table import DataTable


def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
    )
    parser.add_argument(
        "--metadata",
        type=str,
        required=True,
        help="directory including feature files to be normalized. "
        "you need to specify either *-scp or rootdir.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump normalized feature files.")
    parser.add_argument(
        "--speech-stats",
        type=str,
        required=True,
        help="speech statistics file.")
    parser.add_argument(
        "--pitch-stats", type=str, required=True, help="pitch statistics file.")
    parser.add_argument(
        "--energy-stats",
        type=str,
        required=True,
        help="energy statistics file.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")

    args = parser.parse_args()

    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    # get dataset
    with jsonlines.open(args.metadata, 'r') as reader:
        metadata = list(reader)
    dataset = DataTable(
        metadata,
        converters={
            "speech": np.load,
            "pitch": np.load,
            "energy": np.load,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    # restore scaler
    speech_scaler = StandardScaler()
    speech_scaler.mean_ = np.load(args.speech_stats)[0]
    speech_scaler.scale_ = np.load(args.speech_stats)[1]
    speech_scaler.n_features_in_ = speech_scaler.mean_.shape[0]

    pitch_scaler = StandardScaler()
    pitch_scaler.mean_ = np.load(args.pitch_stats)[0]
    pitch_scaler.scale_ = np.load(args.pitch_stats)[1]
    pitch_scaler.n_features_in_ = pitch_scaler.mean_.shape[0]

    energy_scaler = StandardScaler()
    energy_scaler.mean_ = np.load(args.energy_stats)[0]
    energy_scaler.scale_ = np.load(args.energy_stats)[1]
    energy_scaler.n_features_in_ = energy_scaler.mean_.shape[0]

    vocab_phones = {}
    with open(args.phones_dict, 'rt') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    for phn, id in phn_id:
        vocab_phones[phn] = int(id)

    vocab_speaker = {}
    with open(args.speaker_dict, 'rt') as f:
        spk_id = [line.strip().split() for line in f.readlines()]
    for spk, id in spk_id:
        vocab_speaker[spk] = int(id)

    # process each file
    output_metadata = []

    for item in tqdm(dataset):
        utt_id = item['utt_id']
        speech = item['speech']
        pitch = item['pitch']
        energy = item['energy']
        # normalize
        speech = speech_scaler.transform(speech)
        speech_dir = dumpdir / "data_speech"
        speech_dir.mkdir(parents=True, exist_ok=True)
        speech_path = speech_dir / f"{utt_id}_speech.npy"
        np.save(speech_path, speech.astype(np.float32), allow_pickle=False)

        pitch = pitch_scaler.transform(pitch)
        pitch_dir = dumpdir / "data_pitch"
        pitch_dir.mkdir(parents=True, exist_ok=True)
        pitch_path = pitch_dir / f"{utt_id}_pitch.npy"
        np.save(pitch_path, pitch.astype(np.float32), allow_pickle=False)

        energy = energy_scaler.transform(energy)
        energy_dir = dumpdir / "data_energy"
        energy_dir.mkdir(parents=True, exist_ok=True)
        energy_path = energy_dir / f"{utt_id}_energy.npy"
        np.save(energy_path, energy.astype(np.float32), allow_pickle=False)
        phone_ids = [vocab_phones[p] for p in item['phones']]
        spk_id = vocab_speaker[item["speaker"]]
        record = {
            "utt_id": item['utt_id'],
            "spk_id": spk_id,
            "text": phone_ids,
            "text_lengths": item['text_lengths'],
            "speech_lengths": item['speech_lengths'],
            "durations": item['durations'],
            "speech": str(speech_path),
            "pitch": str(pitch_path),
            "energy": str(energy_path)
        }
        # add spk_emb for voice cloning
        if "spk_emb" in item:
            record["spk_emb"] = str(item["spk_emb"])

        output_metadata.append(record)
    output_metadata.sort(key=itemgetter('utt_id'))
    output_metadata_path = Path(args.dumpdir) / "metadata.jsonl"
    with jsonlines.open(output_metadata_path, 'w') as writer:
        for item in output_metadata:
            writer.write(item)
    logging.info(f"metadata dumped into {output_metadata_path}")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/fastspeech2/preprocess.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines
import librosa
import numpy as np
import tqdm
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import Energy
from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.datasets.get_feats import Pitch
from paddlespeech.t2s.datasets.preprocess_utils import compare_duration_and_mel_length
from paddlespeech.t2s.datasets.preprocess_utils import get_input_token
from paddlespeech.t2s.datasets.preprocess_utils import get_phn_dur
from paddlespeech.t2s.datasets.preprocess_utils import get_spk_id_map
from paddlespeech.t2s.datasets.preprocess_utils import merge_silence
from paddlespeech.t2s.utils import str2bool


def process_sentence(config: Dict[str, Any],
                     fp: Path,
                     sentences: Dict,
                     output_dir: Path,
                     mel_extractor=None,
                     pitch_extractor=None,
                     energy_extractor=None,
                     cut_sil: bool=True,
                     spk_emb_dir: Path=None):
    utt_id = fp.stem
    # for vctk
    if utt_id.endswith("_mic2"):
        utt_id = utt_id[:-5]
    record = None
    if utt_id in sentences:
        # reading, resampling may occur
        wav, _ = librosa.load(
            str(fp), sr=config.fs,
            mono=False) if "canton" in str(fp) else librosa.load(
                str(fp), sr=config.fs)
        if len(wav.shape) == 2 and "canton" in str(fp):
            # Remind that Cantonese datasets should be placed in ~/datasets/canton_all. Otherwise, it may cause problem.
            wav = wav[0]
            wav = np.ascontiguousarray(wav)
        elif len(wav.shape) != 1:
            return record
        max_value = np.abs(wav).max()
        if max_value > 1.0:
            wav = wav / max_value
        assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
        assert np.abs(wav).max(
        ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        speaker = sentences[utt_id][2]
        d_cumsum = np.pad(np.array(durations).cumsum(0), (1, 0), 'constant')
        # little imprecise than use *.TextGrid directly
        times = librosa.frames_to_time(
            d_cumsum, sr=config.fs, hop_length=config.n_shift)
        if cut_sil:
            start = 0
            end = d_cumsum[-1]
            if phones[0] == "sil" and len(durations) > 1:
                start = times[1]
                durations = durations[1:]
                phones = phones[1:]
            if phones[-1] == 'sil' and len(durations) > 1:
                end = times[-2]
                durations = durations[:-1]
                phones = phones[:-1]
            sentences[utt_id][0] = phones
            sentences[utt_id][1] = durations
            start, end = librosa.time_to_samples([start, end], sr=config.fs)
            wav = wav[start:end]
        # extract mel feats
        logmel = mel_extractor.get_log_mel_fbank(wav)
        # change duration according to mel_length
        compare_duration_and_mel_length(sentences, utt_id, logmel)
        # utt_id may be popped in compare_duration_and_mel_length
        if utt_id not in sentences:
            return None
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        num_frames = logmel.shape[0]
        assert sum(durations) == num_frames
        mel_dir = output_dir / "data_speech"
        mel_dir.mkdir(parents=True, exist_ok=True)
        mel_path = mel_dir / (utt_id + "_speech.npy")
        np.save(mel_path, logmel)
        # extract pitch and energy
        f0 = pitch_extractor.get_pitch(wav, duration=np.array(durations))
        if (f0 == 0).all():
            return None
        assert f0.shape[0] == len(durations)
        f0_dir = output_dir / "data_pitch"
        f0_dir.mkdir(parents=True, exist_ok=True)
        f0_path = f0_dir / (utt_id + "_pitch.npy")
        np.save(f0_path, f0)
        energy = energy_extractor.get_energy(wav, duration=np.array(durations))
        assert energy.shape[0] == len(durations)
        energy_dir = output_dir / "data_energy"
        energy_dir.mkdir(parents=True, exist_ok=True)
        energy_path = energy_dir / (utt_id + "_energy.npy")
        np.save(energy_path, energy)
        record = {
            "utt_id": utt_id,
            "phones": phones,
            "text_lengths": len(phones),
            "speech_lengths": num_frames,
            "durations": durations,
            "speech": str(mel_path),
            "pitch": str(f0_path),
            "energy": str(energy_path),
            "speaker": speaker
        }
        if spk_emb_dir:
            if speaker in os.listdir(spk_emb_dir):
                embed_name = utt_id + ".npy"
                embed_path = spk_emb_dir / speaker / embed_name
                if embed_path.is_file():
                    record["spk_emb"] = str(embed_path)
                else:
                    return None
    return record


def process_sentences(config,
                      fps: List[Path],
                      sentences: Dict,
                      output_dir: Path,
                      mel_extractor=None,
                      pitch_extractor=None,
                      energy_extractor=None,
                      nprocs: int=1,
                      cut_sil: bool=True,
                      spk_emb_dir: Path=None,
                      write_metadata_method: str='w'):
    if nprocs == 1:
        results = []
        for fp in tqdm.tqdm(fps, total=len(fps)):
            record = process_sentence(
                config=config,
                fp=fp,
                sentences=sentences,
                output_dir=output_dir,
                mel_extractor=mel_extractor,
                pitch_extractor=pitch_extractor,
                energy_extractor=energy_extractor,
                cut_sil=cut_sil,
                spk_emb_dir=spk_emb_dir)
            if record:
                results.append(record)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            futures = []
            with tqdm.tqdm(total=len(fps)) as progress:
                for fp in fps:
                    future = pool.submit(process_sentence, config, fp,
                                         sentences, output_dir, mel_extractor,
                                         pitch_extractor, energy_extractor,
                                         cut_sil, spk_emb_dir)
                    future.add_done_callback(lambda p: progress.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    record = ft.result()
                    if record:
                        results.append(record)

    results.sort(key=itemgetter("utt_id"))
    with jsonlines.open(output_dir / "metadata.jsonl",
                        write_metadata_method) as writer:
        for item in results:
            writer.write(item)
    print("Done")


def main():
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--dataset",
        default="baker",
        type=str,
        help="name of dataset, should in {baker, aishell3, ljspeech, vctk} now")

    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump feature files.")
    parser.add_argument(
        "--dur-file", default=None, type=str, help="path to durations.txt.")

    parser.add_argument("--config", type=str, help="fastspeech2 config file.")

    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")

    parser.add_argument(
        "--cut-sil",
        type=str2bool,
        default=True,
        help="whether cut sil in the edge of audio")

    parser.add_argument(
        "--spk_emb_dir",
        default=None,
        type=str,
        help="directory to speaker embedding files.")

    parser.add_argument(
        "--write_metadata_method",
        default="w",
        type=str,
        choices=["w", "a"],
        help="How the metadata.jsonl file is written.")
    args = parser.parse_args()

    rootdir = Path(args.rootdir).expanduser()
    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)
    dur_file = Path(args.dur_file).expanduser()

    if args.spk_emb_dir:
        spk_emb_dir = Path(args.spk_emb_dir).expanduser().resolve()
    else:
        spk_emb_dir = None

    assert rootdir.is_dir()
    assert dur_file.is_file()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    sentences, speaker_set = get_phn_dur(dur_file)

    merge_silence(sentences)
    phone_id_map_path = dumpdir / "phone_id_map.txt"
    speaker_id_map_path = dumpdir / "speaker_id_map.txt"
    get_input_token(sentences, phone_id_map_path, args.dataset)
    get_spk_id_map(speaker_set, speaker_id_map_path)

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
        # split data into 3 sections
        num_train = 9800
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "aishell3":
        sub_num_dev = 5
        wav_dir = rootdir / "train" / "wav"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*.wav")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files
    elif args.dataset == "canton":
        sub_num_dev = 5
        wav_dir = rootdir / "WAV"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*.wav")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files
    elif args.dataset == "ljspeech":
        wav_files = sorted(list((rootdir / "wavs").rglob("*.wav")))
        # split data into 3 sections
        num_train = 12900
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "vctk":
        sub_num_dev = 5
        wav_dir = rootdir / "wav48_silence_trimmed"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*_mic2.flac")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    else:
        print("dataset should in {baker, aishell3, ljspeech, vctk} now!")

    train_dump_dir = dumpdir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dumpdir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dumpdir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    # Extractor
    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax)
    pitch_extractor = Pitch(
        sr=config.fs,
        hop_length=config.n_shift,
        f0min=config.f0min,
        f0max=config.f0max)
    energy_extractor = Energy(
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window)

    # process for the 3 sections
    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            sentences=sentences,
            output_dir=train_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=args.write_metadata_method)
    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            sentences=sentences,
            output_dir=dev_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=args.write_metadata_method)
    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            sentences=sentences,
            output_dir=test_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=args.write_metadata_method)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/fastspeech2/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import fastspeech2_multi_spk_batch_fn
from paddlespeech.t2s.datasets.am_batch_fn import fastspeech2_single_spk_batch_fn
from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Evaluator
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Updater
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.optimizer import build_optimizers
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer
from paddlespeech.t2s.utils import str2bool


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    if args.ngpu > 0 and paddle.is_compiled_with_cuda():
        paddle.set_device("gpu")
    elif args.nxpu > 0 and paddle.is_compiled_with_xpu():
        paddle.set_device("xpu")
    elif args.ngpu == 0 and args.nxpu == 0:
        paddle.set_device("cpu")
    else:
        raise ValueError(
            "Please make sure that the paddle you installed matches the device type you set, "
            "and that ngpu and nxpu cannot be negative at the same time.")

    world_size = paddle.distributed.get_world_size()
    if world_size > 1:
        paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )
    fields = [
        "text", "text_lengths", "speech", "speech_lengths", "durations",
        "pitch", "energy"
    ]
    converters = {"speech": np.load, "pitch": np.load, "energy": np.load}
    spk_num = None
    if args.speaker_dict is not None:
        print("multiple speaker fastspeech2!")
        collate_fn = fastspeech2_multi_spk_batch_fn
        with open(args.speaker_dict, 'rt', encoding='utf-8') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
        fields += ["spk_id"]
    elif args.voice_cloning:
        print("Training voice cloning!")
        collate_fn = fastspeech2_multi_spk_batch_fn
        fields += ["spk_emb"]
        converters["spk_emb"] = np.load
    else:
        print("single speaker fastspeech2!")
        collate_fn = fastspeech2_single_spk_batch_fn
    print("spk_num:", spk_num)

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=fields,
        converters=converters, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=fields,
        converters=converters, )

    # collate function and dataloader

    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)

    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        shuffle=False,
        drop_last=False,
        batch_size=config.batch_size,
        collate_fn=collate_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    with open(args.phones_dict, 'rt', encoding='utf-8') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_mels
    model = FastSpeech2(
        idim=vocab_size, odim=odim, spk_num=spk_num, **config["model"])
    if world_size > 1:
        model = DataParallel(model)
    print("model done!")

    optimizer = build_optimizers(model, **config["optimizer"])
    print("optimizer done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    if "enable_speaker_classifier" in config.model:
        enable_spk_cls = config.model.enable_speaker_classifier
    else:
        enable_spk_cls = False

    updater = FastSpeech2Updater(
        model=model,
        optimizer=optimizer,
        dataloader=train_dataloader,
        output_dir=output_dir,
        enable_spk_cls=enable_spk_cls,
        **config["updater"], )

    trainer = Trainer(updater, (config.max_epoch, 'epoch'), output_dir)

    evaluator = FastSpeech2Evaluator(
        model,
        dev_dataloader,
        output_dir=output_dir,
        enable_spk_cls=enable_spk_cls,
        **config["updater"], )

    if dist.get_rank() == 0:
        trainer.extend(evaluator, trigger=(1, "epoch"))
        trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots), trigger=(1, 'epoch'))
    trainer.run()


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(description="Train a FastSpeech2 model.")
    parser.add_argument("--config", type=str, help="fastspeech2 config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu or xpu.")
    parser.add_argument(
        "--nxpu",
        type=int,
        default=0,
        help="if ngpu=0 and nxpu > 0, use xpu. if ngpu=0 and nxpu=0, use cpu.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict",
        type=str,
        default=None,
        help="speaker id map file for multiple speaker model.")

    parser.add_argument(
        "--voice-cloning",
        type=str2bool,
        default=False,
        help="whether training voice cloning model.")

    args = parser.parse_args()

    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/fastspeech2/vc2_infer.py
================================================
import argparse
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path

import numpy as np
import tqdm

from paddlespeech.cli.vector import VectorExecutor


def _process_utterance(ifpath: Path,
                       input_dir: Path,
                       output_dir: Path,
                       vec_executor):
    rel_path = ifpath.relative_to(input_dir)
    ofpath = (output_dir / rel_path).with_suffix(".npy")
    ofpath.parent.mkdir(parents=True, exist_ok=True)
    embed = vec_executor(audio_file=ifpath, force_yes=True)
    np.save(ofpath, embed)
    return ofpath


def main(args):
    # input output preparation
    input_dir = Path(args.input).expanduser()
    ifpaths = list(input_dir.rglob(args.pattern))
    print(f"{len(ifpaths)} utterances in total")
    output_dir = Path(args.output).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    vec_executor = VectorExecutor()
    nprocs = args.num_cpu

    # warm up
    vec_executor(audio_file=ifpaths[0], force_yes=True)

    if nprocs == 1:
        results = []
        for ifpath in tqdm.tqdm(ifpaths, total=len(ifpaths)):
            _process_utterance(
                ifpath=ifpath,
                input_dir=input_dir,
                output_dir=output_dir,
                vec_executor=vec_executor)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            with tqdm.tqdm(total=len(ifpaths)) as progress:
                for ifpath in ifpaths:
                    future = pool.submit(_process_utterance, ifpath, input_dir,
                                         output_dir, vec_executor)
                    future.add_done_callback(lambda p: progress.update())


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="compute utterance embed.")
    parser.add_argument(
        "--input", type=str, help="path of the audio_file folder.")
    parser.add_argument(
        "--pattern",
        type=str,
        default="*.wav",
        help="pattern to filter audio files.")
    parser.add_argument(
        "--output",
        metavar="OUTPUT_DIR",
        help="path to save spk embedding results.")
    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")
    args = parser.parse_args()

    main(args)


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/README.md
================================================
different GAN Vocoders have the same preprocess.py and normalize.py


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/hifigan/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/hifigan/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle import nn
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.optimizer import Adam
from paddle.optimizer.lr import MultiStepDecay
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.datasets.vocoder_batch_fn import Clip
from paddlespeech.t2s.models.hifigan import HiFiGANEvaluator
from paddlespeech.t2s.models.hifigan import HiFiGANGenerator
from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleMultiPeriodDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANUpdater
from paddlespeech.t2s.modules.losses import DiscriminatorAdversarialLoss
from paddlespeech.t2s.modules.losses import FeatureMatchLoss
from paddlespeech.t2s.modules.losses import GeneratorAdversarialLoss
from paddlespeech.t2s.modules.losses import MelSpectrogramLoss
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    world_size = paddle.distributed.get_world_size()
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
        if world_size > 1:
            paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=["wave", "feats"],
        converters={
            "wave": np.load,
            "feats": np.load,
        }, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=["wave", "feats"],
        converters={
            "wave": np.load,
            "feats": np.load,
        }, )

    # collate function and dataloader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)
    dev_sampler = DistributedBatchSampler(
        dev_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=False)
    print("samplers done!")

    if "aux_context_window" in config.generator_params:
        aux_context_window = config.generator_params.aux_context_window
    else:
        aux_context_window = 0
    train_batch_fn = Clip(
        batch_max_steps=config.batch_max_steps,
        hop_size=config.n_shift,
        aux_context_window=aux_context_window)

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=train_batch_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        batch_sampler=dev_sampler,
        collate_fn=train_batch_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    generator = HiFiGANGenerator(**config["generator_params"])
    discriminator = HiFiGANMultiScaleMultiPeriodDiscriminator(
        **config["discriminator_params"])
    if world_size > 1:
        generator = DataParallel(generator)
        discriminator = DataParallel(discriminator)
    print("models done!")

    criterion_feat_match = FeatureMatchLoss(**config["feat_match_loss_params"])
    criterion_mel = MelSpectrogramLoss(
        fs=config.fs,
        fft_size=config.n_fft,
        hop_size=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        num_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax, )
    criterion_gen_adv = GeneratorAdversarialLoss(
        **config["generator_adv_loss_params"])
    criterion_dis_adv = DiscriminatorAdversarialLoss(
        **config["discriminator_adv_loss_params"])
    print("criterions done!")

    lr_schedule_g = MultiStepDecay(**config["generator_scheduler_params"])
    # Compared to multi_band_melgan.v1 config, Adam optimizer without gradient norm is used
    generator_grad_norm = config["generator_grad_norm"]
    gradient_clip_g = nn.ClipGradByGlobalNorm(
        generator_grad_norm) if generator_grad_norm > 0 else None
    print("gradient_clip_g:", gradient_clip_g)

    optimizer_g = Adam(
        learning_rate=lr_schedule_g,
        grad_clip=gradient_clip_g,
        parameters=generator.parameters(),
        **config["generator_optimizer_params"])
    lr_schedule_d = MultiStepDecay(**config["discriminator_scheduler_params"])
    discriminator_grad_norm = config["discriminator_grad_norm"]
    gradient_clip_d = nn.ClipGradByGlobalNorm(
        discriminator_grad_norm) if discriminator_grad_norm > 0 else None
    print("gradient_clip_d:", gradient_clip_d)
    optimizer_d = Adam(
        learning_rate=lr_schedule_d,
        grad_clip=gradient_clip_d,
        parameters=discriminator.parameters(),
        **config["discriminator_optimizer_params"])
    print("optimizers done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = HiFiGANUpdater(
        models={
            "generator": generator,
            "discriminator": discriminator,
        },
        optimizers={
            "generator": optimizer_g,
            "discriminator": optimizer_d,
        },
        criterions={
            "mel": criterion_mel,
            "feat_match": criterion_feat_match,
            "gen_adv": criterion_gen_adv,
            "dis_adv": criterion_dis_adv,
        },
        schedulers={
            "generator": lr_schedule_g,
            "discriminator": lr_schedule_d,
        },
        dataloader=train_dataloader,
        discriminator_train_start_steps=config.discriminator_train_start_steps,
        # only hifigan have generator_train_start_steps
        generator_train_start_steps=config.generator_train_start_steps,
        lambda_adv=config.lambda_adv,
        lambda_aux=config.lambda_aux,
        lambda_feat_match=config.lambda_feat_match,
        output_dir=output_dir)

    evaluator = HiFiGANEvaluator(
        models={
            "generator": generator,
            "discriminator": discriminator,
        },
        criterions={
            "mel": criterion_mel,
            "feat_match": criterion_feat_match,
            "gen_adv": criterion_gen_adv,
            "dis_adv": criterion_dis_adv,
        },
        dataloader=dev_dataloader,
        lambda_adv=config.lambda_adv,
        lambda_aux=config.lambda_aux,
        lambda_feat_match=config.lambda_feat_match,
        output_dir=output_dir)

    trainer = Trainer(
        updater,
        stop_trigger=(config.train_max_steps, "iteration"),
        out=output_dir)

    if dist.get_rank() == 0:
        trainer.extend(
            evaluator, trigger=(config.eval_interval_steps, 'iteration'))
        trainer.extend(VisualDL(output_dir), trigger=(1, 'iteration'))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots),
        trigger=(config.save_interval_steps, 'iteration'))

    print("Trainer Done!")
    trainer.run()


def main():
    # parse args and config and redirect to train_sp

    parser = argparse.ArgumentParser(description="Train a HiFiGAN model.")
    parser.add_argument("--config", type=str, help="HiFiGAN config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    args = parser.parse_args()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/multi_band_melgan/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/multi_band_melgan/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle import nn
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.optimizer import Adam
from paddle.optimizer.lr import MultiStepDecay
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.datasets.vocoder_batch_fn import Clip
from paddlespeech.t2s.models.melgan import MBMelGANEvaluator
from paddlespeech.t2s.models.melgan import MBMelGANUpdater
from paddlespeech.t2s.models.melgan import MelGANGenerator
from paddlespeech.t2s.models.melgan import MelGANMultiScaleDiscriminator
from paddlespeech.t2s.modules.losses import DiscriminatorAdversarialLoss
from paddlespeech.t2s.modules.losses import GeneratorAdversarialLoss
from paddlespeech.t2s.modules.losses import MultiResolutionSTFTLoss
from paddlespeech.t2s.modules.pqmf import PQMF
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    world_size = paddle.distributed.get_world_size()
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
        if world_size > 1:
            paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=["wave", "feats"],
        converters={
            "wave": np.load,
            "feats": np.load,
        }, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=["wave", "feats"],
        converters={
            "wave": np.load,
            "feats": np.load,
        }, )

    # collate function and dataloader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)
    dev_sampler = DistributedBatchSampler(
        dev_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=False)
    print("samplers done!")

    if "aux_context_window" in config.generator_params:
        aux_context_window = config.generator_params.aux_context_window
    else:
        aux_context_window = 0
    train_batch_fn = Clip(
        batch_max_steps=config.batch_max_steps,
        hop_size=config.n_shift,
        aux_context_window=aux_context_window)

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=train_batch_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        batch_sampler=dev_sampler,
        collate_fn=train_batch_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    generator = MelGANGenerator(**config["generator_params"])
    discriminator = MelGANMultiScaleDiscriminator(
        **config["discriminator_params"])
    if world_size > 1:
        generator = DataParallel(generator)
        discriminator = DataParallel(discriminator)
    print("models done!")
    criterion_stft = MultiResolutionSTFTLoss(**config["stft_loss_params"])
    criterion_sub_stft = MultiResolutionSTFTLoss(
        **config["subband_stft_loss_params"])
    criterion_gen_adv = GeneratorAdversarialLoss()
    criterion_dis_adv = DiscriminatorAdversarialLoss()
    # define special module for subband processing
    criterion_pqmf = PQMF(subbands=config["generator_params"]["out_channels"])
    print("criterions done!")

    lr_schedule_g = MultiStepDecay(**config["generator_scheduler_params"])
    # Compared to multi_band_melgan.v1 config, Adam optimizer without gradient norm is used
    generator_grad_norm = config["generator_grad_norm"]
    gradient_clip_g = nn.ClipGradByGlobalNorm(
        generator_grad_norm) if generator_grad_norm > 0 else None
    print("gradient_clip_g:", gradient_clip_g)

    optimizer_g = Adam(
        learning_rate=lr_schedule_g,
        grad_clip=gradient_clip_g,
        parameters=generator.parameters(),
        **config["generator_optimizer_params"])
    lr_schedule_d = MultiStepDecay(**config["discriminator_scheduler_params"])
    discriminator_grad_norm = config["discriminator_grad_norm"]
    gradient_clip_d = nn.ClipGradByGlobalNorm(
        discriminator_grad_norm) if discriminator_grad_norm > 0 else None
    print("gradient_clip_d:", gradient_clip_d)
    optimizer_d = Adam(
        learning_rate=lr_schedule_d,
        grad_clip=gradient_clip_d,
        parameters=discriminator.parameters(),
        **config["discriminator_optimizer_params"])
    print("optimizers done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = MBMelGANUpdater(
        models={
            "generator": generator,
            "discriminator": discriminator,
        },
        optimizers={
            "generator": optimizer_g,
            "discriminator": optimizer_d,
        },
        criterions={
            "stft": criterion_stft,
            "sub_stft": criterion_sub_stft,
            "gen_adv": criterion_gen_adv,
            "dis_adv": criterion_dis_adv,
            "pqmf": criterion_pqmf
        },
        schedulers={
            "generator": lr_schedule_g,
            "discriminator": lr_schedule_d,
        },
        dataloader=train_dataloader,
        discriminator_train_start_steps=config.discriminator_train_start_steps,
        lambda_adv=config.lambda_adv,
        output_dir=output_dir)

    evaluator = MBMelGANEvaluator(
        models={
            "generator": generator,
            "discriminator": discriminator,
        },
        criterions={
            "stft": criterion_stft,
            "sub_stft": criterion_sub_stft,
            "gen_adv": criterion_gen_adv,
            "dis_adv": criterion_dis_adv,
            "pqmf": criterion_pqmf
        },
        dataloader=dev_dataloader,
        lambda_adv=config.lambda_adv,
        output_dir=output_dir)

    trainer = Trainer(
        updater,
        stop_trigger=(config.train_max_steps, "iteration"),
        out=output_dir)

    if dist.get_rank() == 0:
        trainer.extend(
            evaluator, trigger=(config.eval_interval_steps, 'iteration'))
        trainer.extend(VisualDL(output_dir), trigger=(1, 'iteration'))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots),
        trigger=(config.save_interval_steps, 'iteration'))

    print("Trainer Done!")
    trainer.run()


def main():
    # parse args and config and redirect to train_sp

    parser = argparse.ArgumentParser(
        description="Train a Multi-Band MelGAN model.")
    parser.add_argument(
        "--config", type=str, help="Multi-Band MelGAN config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    args = parser.parse_args()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/normalize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize feature files and dump them."""
import argparse
import logging
from operator import itemgetter
from pathlib import Path

import jsonlines
import numpy as np
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

from paddlespeech.t2s.datasets.data_table import DataTable


def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Normalize dumped raw features.")
    parser.add_argument(
        "--metadata",
        type=str,
        required=True,
        help="directory including feature files to be normalized. "
        "you need to specify either *-scp or rootdir.")
    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump normalized feature files.")
    parser.add_argument(
        "--stats", type=str, required=True, help="statistics file.")
    parser.add_argument(
        "--skip-wav-copy",
        default=False,
        action="store_true",
        help="whether to skip the copy of wav files.")

    args = parser.parse_args()

    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    # get dataset
    with jsonlines.open(args.metadata, 'r') as reader:
        metadata = list(reader)
    dataset = DataTable(
        metadata,
        fields=["utt_id", "wave", "feats"],
        converters={
            'utt_id': None,
            'wave': None if args.skip_wav_copy else np.load,
            'feats': np.load,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    # restore scaler
    scaler = StandardScaler()
    scaler.mean_ = np.load(args.stats)[0]
    scaler.scale_ = np.load(args.stats)[1]

    # from version 0.23.0, this information is needed
    scaler.n_features_in_ = scaler.mean_.shape[0]

    # process each file
    output_metadata = []

    for item in tqdm(dataset):
        utt_id = item['utt_id']
        wave = item['wave']
        mel = item['feats']
        # normalize
        mel = scaler.transform(mel)

        # save
        mel_path = dumpdir / f"{utt_id}_feats.npy"
        np.save(mel_path, mel.astype(np.float32), allow_pickle=False)
        if not args.skip_wav_copy:
            wav_path = dumpdir / f"{utt_id}_wave.npy"
            np.save(wav_path, wave.astype(np.float32), allow_pickle=False)
        else:
            wav_path = wave
        output_metadata.append({
            'utt_id': utt_id,
            'wave': str(wav_path),
            'feats': str(mel_path),
        })
    output_metadata.sort(key=itemgetter('utt_id'))
    output_metadata_path = Path(args.dumpdir) / "metadata.jsonl"
    with jsonlines.open(output_metadata_path, 'w') as writer:
        for item in output_metadata:
            writer.write(item)
    logging.info(f"metadata dumped into {output_metadata_path}")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/parallelwave_gan/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/parallelwave_gan/synthesize_from_wav.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
from pathlib import Path

import librosa
import numpy as np
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
from paddlespeech.t2s.models.parallel_wavegan import PWGInference
from paddlespeech.t2s.modules.normalizer import ZScore


def evaluate(args, config):
    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    vocoder = PWGGenerator(**config["generator_params"])
    state_dict = paddle.load(args.checkpoint)
    vocoder.set_state_dict(state_dict["generator_params"])
    vocoder.remove_weight_norm()
    vocoder.eval()
    print("model done!")

    stat = np.load(args.stat)
    mu, std = stat
    mu = paddle.to_tensor(mu)
    std = paddle.to_tensor(std)
    normalizer = ZScore(mu, std)

    pwg_inference = PWGInference(normalizer, vocoder)

    input_dir = Path(args.input_dir)
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax)

    for utt_name in os.listdir(input_dir):
        wav, _ = librosa.load(str(input_dir / utt_name), sr=config.fs)
        # extract mel feats
        mel = mel_extractor.get_log_mel_fbank(wav)
        mel = paddle.to_tensor(mel)
        with paddle.no_grad():
            gen_wav = pwg_inference(mel)
        sf.write(
            str(output_dir / ("gen_" + utt_name)),
            gen_wav.numpy(),
            samplerate=config.fs)
        print(f"{utt_name} done!")


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(
        description="Synthesize with parallel wavegan.")

    parser.add_argument(
        "--config", type=str, help="parallel wavegan config file.")
    parser.add_argument("--checkpoint", type=str, help="snapshot to load.")
    parser.add_argument(
        "--stat",
        type=str,
        help="mean and standard deviation used to normalize spectrogram when training parallel wavegan."
    )
    parser.add_argument("--input-dir", type=str, help="input dir of wavs.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    args = parser.parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)

    evaluate(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/parallelwave_gan/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle import nn
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.optimizer import Adam  # No RAdaom
from paddle.optimizer.lr import StepDecay
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.datasets.vocoder_batch_fn import Clip
from paddlespeech.t2s.models.parallel_wavegan import PWGDiscriminator
from paddlespeech.t2s.models.parallel_wavegan import PWGEvaluator
from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
from paddlespeech.t2s.models.parallel_wavegan import PWGUpdater
from paddlespeech.t2s.modules.losses import MultiResolutionSTFTLoss
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer
from paddlespeech.t2s.utils import str2bool


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    world_size = paddle.distributed.get_world_size()
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
        if world_size > 1:
            paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=["wave", "feats"],
        converters={
            "wave": np.load,
            "feats": np.load,
        }, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=["wave", "feats"],
        converters={
            "wave": np.load,
            "feats": np.load,
        }, )

    # collate function and dataloader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)
    dev_sampler = DistributedBatchSampler(
        dev_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=False)
    print("samplers done!")

    train_batch_fn = Clip(
        batch_max_steps=config.batch_max_steps,
        hop_size=config.n_shift,
        aux_context_window=config.generator_params.aux_context_window)

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=train_batch_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        batch_sampler=dev_sampler,
        collate_fn=train_batch_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    generator = PWGGenerator(**config["generator_params"])
    discriminator = PWGDiscriminator(**config["discriminator_params"])
    if world_size > 1:
        generator = DataParallel(generator)
        discriminator = DataParallel(discriminator)
    print("models done!")

    criterion_stft = MultiResolutionSTFTLoss(**config["stft_loss_params"])
    criterion_mse = nn.MSELoss()
    print("criterions done!")

    lr_schedule_g = StepDecay(**config["generator_scheduler_params"])
    gradient_clip_g = nn.ClipGradByGlobalNorm(config["generator_grad_norm"])
    optimizer_g = Adam(
        learning_rate=lr_schedule_g,
        grad_clip=gradient_clip_g,
        parameters=generator.parameters(),
        **config["generator_optimizer_params"])
    lr_schedule_d = StepDecay(**config["discriminator_scheduler_params"])
    gradient_clip_d = nn.ClipGradByGlobalNorm(config["discriminator_grad_norm"])
    optimizer_d = Adam(
        learning_rate=lr_schedule_d,
        grad_clip=gradient_clip_d,
        parameters=discriminator.parameters(),
        **config["discriminator_optimizer_params"])
    print("optimizers done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = PWGUpdater(
        models={
            "generator": generator,
            "discriminator": discriminator,
        },
        optimizers={
            "generator": optimizer_g,
            "discriminator": optimizer_d,
        },
        criterions={
            "stft": criterion_stft,
            "mse": criterion_mse,
        },
        schedulers={
            "generator": lr_schedule_g,
            "discriminator": lr_schedule_d,
        },
        dataloader=train_dataloader,
        discriminator_train_start_steps=config.discriminator_train_start_steps,
        lambda_adv=config.lambda_adv,
        output_dir=output_dir)

    evaluator = PWGEvaluator(
        models={
            "generator": generator,
            "discriminator": discriminator,
        },
        criterions={
            "stft": criterion_stft,
            "mse": criterion_mse,
        },
        dataloader=dev_dataloader,
        lambda_adv=config.lambda_adv,
        output_dir=output_dir)
    trainer = Trainer(
        updater,
        stop_trigger=(config.train_max_steps, "iteration"),
        out=output_dir,
        profiler_options=args.profiler_options)

    if dist.get_rank() == 0:
        trainer.extend(
            evaluator, trigger=(config.eval_interval_steps, 'iteration'))
        trainer.extend(VisualDL(output_dir), trigger=(1, 'iteration'))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots),
        trigger=(config.save_interval_steps, 'iteration'))

    print("Trainer Done!")
    trainer.run()


def main():
    # parse args and config and redirect to train_sp

    parser = argparse.ArgumentParser(
        description="Train a ParallelWaveGAN model.")
    parser.add_argument(
        "--config", type=str, help="ParallelWaveGAN config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    benchmark_group = parser.add_argument_group(
        'benchmark', 'arguments related to benchmark.')
    benchmark_group.add_argument(
        "--batch-size", type=int, default=8, help="batch size.")
    benchmark_group.add_argument(
        "--max-iter", type=int, default=400000, help="train max steps.")

    benchmark_group.add_argument(
        "--run-benchmark",
        type=str2bool,
        default=False,
        help="runing benchmark or not, if True, use the --batch-size and --max-iter."
    )
    benchmark_group.add_argument(
        "--profiler_options",
        type=str,
        default=None,
        help="The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\"."
    )

    args = parser.parse_args()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    # 增加 --batch_size --max_iter 用于 benchmark 调用
    if args.run_benchmark:
        config.batch_size = args.batch_size
        config.train_max_steps = args.max_iter

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/preprocess.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines
import librosa
import numpy as np
import tqdm
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.datasets.preprocess_utils import get_phn_dur
from paddlespeech.t2s.datasets.preprocess_utils import get_sentences_svs
from paddlespeech.t2s.datasets.preprocess_utils import merge_silence
from paddlespeech.t2s.utils import str2bool


def process_sentence(config: Dict[str, Any],
                     fp: Path,
                     sentences: Dict,
                     output_dir: Path,
                     mel_extractor=None,
                     cut_sil: bool=True):
    utt_id = fp.stem
    # for vctk
    if utt_id.endswith("_mic2"):
        utt_id = utt_id[:-5]
    record = None
    if utt_id in sentences:
        # reading, resampling may occur
        y, _ = librosa.load(str(fp), sr=config.fs)
        if len(y.shape) != 1:
            return record
        max_value = np.abs(y).max()
        if max_value > 1.0:
            y = y / max_value
        assert len(y.shape) == 1, f"{utt_id} is not a mono-channel audio."
        assert np.abs(y).max(
        ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        speaker = sentences[utt_id][2]
        d_cumsum = np.pad(np.array(durations).cumsum(0), (1, 0), 'constant')
        # little imprecise than use *.TextGrid directly
        times = librosa.frames_to_time(
            d_cumsum, sr=config.fs, hop_length=config.n_shift)
        if cut_sil:
            start = 0
            end = d_cumsum[-1]
            if phones[0] == "sil" and len(durations) > 1:
                start = times[1]
                durations = durations[1:]
                phones = phones[1:]
            if phones[-1] == 'sil' and len(durations) > 1:
                end = times[-2]
                durations = durations[:-1]
                phones = phones[:-1]
            sentences[utt_id][0] = phones
            sentences[utt_id][1] = durations
            start, end = librosa.time_to_samples([start, end], sr=config.fs)
            y = y[start:end]

        # extract mel feats
        logmel = mel_extractor.get_log_mel_fbank(y)

        # adjust time to make num_samples == num_frames * hop_length
        num_frames = logmel.shape[0]
        if y.size < num_frames * config.n_shift:
            y = np.pad(
                y, (0, num_frames * config.n_shift - y.size), mode="reflect")
        else:
            y = y[:num_frames * config.n_shift]
        num_samples = y.shape[0]

        mel_path = output_dir / (utt_id + "_feats.npy")
        wav_path = output_dir / (utt_id + "_wave.npy")
        # (num_samples, )
        np.save(wav_path, y)
        # (num_frames, n_mels)
        np.save(mel_path, logmel)
        record = {
            "utt_id": utt_id,
            "num_samples": num_samples,
            "num_frames": num_frames,
            "feats": str(mel_path),
            "wave": str(wav_path),
        }
        return record


def process_sentences(config,
                      fps: List[Path],
                      sentences: Dict,
                      output_dir: Path,
                      mel_extractor=None,
                      nprocs: int=1,
                      cut_sil: bool=True):

    if nprocs == 1:
        results = []
        for fp in tqdm.tqdm(fps, total=len(fps)):
            record = process_sentence(
                config=config,
                fp=fp,
                sentences=sentences,
                output_dir=output_dir,
                mel_extractor=mel_extractor,
                cut_sil=cut_sil)
            if record:
                results.append(record)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            futures = []
            with tqdm.tqdm(total=len(fps)) as progress:
                for fp in fps:
                    future = pool.submit(process_sentence, config, fp,
                                         sentences, output_dir, mel_extractor,
                                         cut_sil)
                    future.add_done_callback(lambda p: progress.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    record = ft.result()
                    if record:
                        results.append(record)

    results.sort(key=itemgetter("utt_id"))
    with jsonlines.open(output_dir / "metadata.jsonl", 'w') as writer:
        for item in results:
            writer.write(item)
    print("Done")


def main():
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features .")
    parser.add_argument(
        "--dataset",
        default="baker",
        type=str,
        help="name of dataset, should in {baker, aishell3, ljspeech, vctk} now")
    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")
    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump feature files.")
    parser.add_argument("--config", type=str, help="vocoder config file.")
    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")
    parser.add_argument(
        "--dur-file", default=None, type=str, help="path to durations.txt.")

    parser.add_argument(
        "--cut-sil",
        type=str2bool,
        default=True,
        help="whether cut sil in the edge of audio")
    args = parser.parse_args()

    rootdir = Path(args.rootdir).expanduser()
    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)
    dur_file = Path(args.dur_file).expanduser()

    assert rootdir.is_dir()
    assert dur_file.is_file()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    if args.dataset == "opencpop":
        sentences, speaker_set = get_sentences_svs(
            dur_file,
            dataset=args.dataset,
            sample_rate=config.fs,
            n_shift=config.n_shift, )
    else:
        sentences, speaker_set = get_phn_dur(dur_file)
        merge_silence(sentences)

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
        # split data into 3 sections
        num_train = 9800
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]

    elif args.dataset == "ljspeech":
        wav_files = sorted(list((rootdir / "wavs").rglob("*.wav")))
        # split data into 3 sections
        num_train = 12900
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "vctk":
        sub_num_dev = 5
        wav_dir = rootdir / "wav48_silence_trimmed"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*_mic2.flac")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files
    elif args.dataset == "aishell3":
        sub_num_dev = 5
        wav_dir = rootdir / "train" / "wav"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*.wav")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files
    elif args.dataset == "opencpop":
        wavdir = rootdir / "wavs"
        # split data into 3 sections
        train_file = rootdir / "train.txt"
        train_wav_files = []
        with open(train_file, "r") as f_train:
            for line in f_train.readlines():
                utt = line.split("|")[0]
                wav_name = utt + ".wav"
                wav_path = wavdir / wav_name
                train_wav_files.append(wav_path)

        test_file = rootdir / "test.txt"
        dev_wav_files = []
        test_wav_files = []
        num_dev = 106
        count = 0
        with open(test_file, "r") as f_test:
            for line in f_test.readlines():
                count += 1
                utt = line.split("|")[0]
                wav_name = utt + ".wav"
                wav_path = wavdir / wav_name
                if count > num_dev:
                    test_wav_files.append(wav_path)
                else:
                    dev_wav_files.append(wav_path)
    else:
        print("dataset should in {baker, ljspeech, vctk, aishell3} now!")

    train_dump_dir = dumpdir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dumpdir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dumpdir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax)

    # process for the 3 sections
    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            sentences=sentences,
            output_dir=train_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil)
    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            sentences=sentences,
            output_dir=dev_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil)
    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            sentences=sentences,
            output_dir=test_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/style_melgan/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/style_melgan/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle import nn
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.optimizer import Adam
from paddle.optimizer.lr import MultiStepDecay
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.datasets.vocoder_batch_fn import Clip
from paddlespeech.t2s.models.melgan import StyleMelGANDiscriminator
from paddlespeech.t2s.models.melgan import StyleMelGANEvaluator
from paddlespeech.t2s.models.melgan import StyleMelGANGenerator
from paddlespeech.t2s.models.melgan import StyleMelGANUpdater
from paddlespeech.t2s.modules.losses import DiscriminatorAdversarialLoss
from paddlespeech.t2s.modules.losses import GeneratorAdversarialLoss
from paddlespeech.t2s.modules.losses import MultiResolutionSTFTLoss
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    world_size = paddle.distributed.get_world_size()
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
        if world_size > 1:
            paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=["wave", "feats"],
        converters={
            "wave": np.load,
            "feats": np.load,
        }, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=["wave", "feats"],
        converters={
            "wave": np.load,
            "feats": np.load,
        }, )

    # collate function and dataloader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)
    dev_sampler = DistributedBatchSampler(
        dev_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=False)
    print("samplers done!")

    if "aux_context_window" in config.generator_params:
        aux_context_window = config.generator_params.aux_context_window
    else:
        aux_context_window = 0
    train_batch_fn = Clip(
        batch_max_steps=config.batch_max_steps,
        hop_size=config.n_shift,
        aux_context_window=aux_context_window)

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=train_batch_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        batch_sampler=dev_sampler,
        collate_fn=train_batch_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    generator = StyleMelGANGenerator(**config["generator_params"])
    discriminator = StyleMelGANDiscriminator(**config["discriminator_params"])
    if world_size > 1:
        generator = DataParallel(generator)
        discriminator = DataParallel(discriminator)
    print("models done!")
    criterion_stft = MultiResolutionSTFTLoss(**config["stft_loss_params"])

    criterion_gen_adv = GeneratorAdversarialLoss(
        **config["generator_adv_loss_params"])
    criterion_dis_adv = DiscriminatorAdversarialLoss(
        **config["discriminator_adv_loss_params"])
    print("criterions done!")

    lr_schedule_g = MultiStepDecay(**config["generator_scheduler_params"])
    # Compared to multi_band_melgan.v1 config, Adam optimizer without gradient norm is used
    generator_grad_norm = config["generator_grad_norm"]
    gradient_clip_g = nn.ClipGradByGlobalNorm(
        generator_grad_norm) if generator_grad_norm > 0 else None
    print("gradient_clip_g:", gradient_clip_g)

    optimizer_g = Adam(
        learning_rate=lr_schedule_g,
        grad_clip=gradient_clip_g,
        parameters=generator.parameters(),
        **config["generator_optimizer_params"])
    lr_schedule_d = MultiStepDecay(**config["discriminator_scheduler_params"])
    discriminator_grad_norm = config["discriminator_grad_norm"]
    gradient_clip_d = nn.ClipGradByGlobalNorm(
        discriminator_grad_norm) if discriminator_grad_norm > 0 else None
    print("gradient_clip_d:", gradient_clip_d)
    optimizer_d = Adam(
        learning_rate=lr_schedule_d,
        grad_clip=gradient_clip_d,
        parameters=discriminator.parameters(),
        **config["discriminator_optimizer_params"])
    print("optimizers done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = StyleMelGANUpdater(
        models={
            "generator": generator,
            "discriminator": discriminator,
        },
        optimizers={
            "generator": optimizer_g,
            "discriminator": optimizer_d,
        },
        criterions={
            "stft": criterion_stft,
            "gen_adv": criterion_gen_adv,
            "dis_adv": criterion_dis_adv,
        },
        schedulers={
            "generator": lr_schedule_g,
            "discriminator": lr_schedule_d,
        },
        dataloader=train_dataloader,
        discriminator_train_start_steps=config.discriminator_train_start_steps,
        lambda_adv=config.lambda_adv,
        output_dir=output_dir)

    evaluator = StyleMelGANEvaluator(
        models={
            "generator": generator,
            "discriminator": discriminator,
        },
        criterions={
            "stft": criterion_stft,
            "gen_adv": criterion_gen_adv,
            "dis_adv": criterion_dis_adv,
        },
        dataloader=dev_dataloader,
        lambda_adv=config.lambda_adv,
        output_dir=output_dir)

    trainer = Trainer(
        updater,
        stop_trigger=(config.train_max_steps, "iteration"),
        out=output_dir)

    if dist.get_rank() == 0:
        trainer.extend(
            evaluator, trigger=(config.eval_interval_steps, 'iteration'))
        trainer.extend(VisualDL(output_dir), trigger=(1, 'iteration'))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots),
        trigger=(config.save_interval_steps, 'iteration'))

    print("Trainer Done!")
    trainer.run()


def main():
    # parse args and config and redirect to train_sp

    parser = argparse.ArgumentParser(description="Train a Style MelGAN model.")
    parser.add_argument("--config", type=str, help="Style MelGAN config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    args = parser.parse_args()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/gan_vocoder/synthesize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import soundfile as sf
import yaml
from paddle import distributed as dist
from timer import timer
from yacs.config import CfgNode

import paddlespeech
from paddlespeech.t2s.datasets.data_table import DataTable


def main():
    parser = argparse.ArgumentParser(description="Synthesize with GANVocoder.")
    parser.add_argument(
        "--generator-type",
        type=str,
        default="pwgan",
        help="type of GANVocoder, should in {pwgan, mb_melgan, style_melgan, hifigan, } now"
    )
    parser.add_argument("--config", type=str, help="GANVocoder config file.")
    parser.add_argument("--checkpoint", type=str, help="snapshot to load.")
    parser.add_argument("--test-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    args = parser.parse_args()

    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    class_map = {
        "hifigan": "HiFiGANGenerator",
        "mb_melgan": "MelGANGenerator",
        "pwgan": "PWGGenerator",
        "style_melgan": "StyleMelGANGenerator",
    }

    generator_type = args.generator_type

    assert generator_type in class_map

    print("generator_type:", generator_type)

    generator_class = getattr(paddlespeech.t2s.models,
                              class_map[generator_type])
    generator = generator_class(**config["generator_params"])
    state_dict = paddle.load(args.checkpoint)
    generator.set_state_dict(state_dict["generator_params"])
    generator.remove_weight_norm()
    generator.eval()

    with jsonlines.open(args.test_metadata, 'r') as reader:
        metadata = list(reader)
    test_dataset = DataTable(
        metadata,
        fields=['utt_id', 'feats'],
        converters={
            'utt_id': None,
            'feats': np.load,
        })
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    N = 0
    T = 0
    for example in test_dataset:
        utt_id = example['utt_id']
        mel = example['feats']
        mel = paddle.to_tensor(mel)  # (T, C)
        with timer() as t:
            with paddle.no_grad():
                wav = generator.inference(c=mel)
            wav = wav.numpy()
            N += wav.size
            T += t.elapse
            speed = wav.size / t.elapse
            rtf = config.fs / speed
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
        sf.write(str(output_dir / (utt_id + ".wav")), wav, samplerate=config.fs)
    print(f"generation speed: {N / T}Hz, RTF: {config.fs / (N / T) }")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/inference.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import paddle
import soundfile as sf
from timer import timer

from paddlespeech.t2s.exps.syn_utils import get_am_output
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_predictor
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.exps.syn_utils import get_voc_output
from paddlespeech.t2s.utils import str2bool


def parse_args():
    parser = argparse.ArgumentParser(
        description="Paddle Infernce with acoustic model & vocoder.")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=[
            'speedyspeech_csmsc',
            'fastspeech2_csmsc',
            'fastspeech2_aishell3',
            'fastspeech2_ljspeech',
            'fastspeech2_vctk',
            'tacotron2_csmsc',
            'fastspeech2_mix',
            'fastspeech2_male-zh',
            'fastspeech2_male-en',
            'fastspeech2_male-mix',
            'fastspeech2_canton',
        ],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones_dict", type=str, default=None, help="tone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')
    # voc
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=[
            'pwgan_csmsc',
            'pwgan_aishell3',
            'pwgan_ljspeech',
            'pwgan_vctk',
            'mb_melgan_csmsc',
            'hifigan_csmsc',
            'hifigan_aishell3',
            'hifigan_ljspeech',
            'hifigan_vctk',
            'wavernn_csmsc',
            'pwgan_male',
            'hifigan_male',
        ],
        help='Choose vocoder type of tts task.')
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en or mix')
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument("--output_dir", type=str, help="output dir")
    # inference
    parser.add_argument(
        "--use_trt",
        type=str2bool,
        default=False,
        help="whether to use TensorRT or not in GPU", )
    parser.add_argument(
        "--use_mkldnn",
        type=str2bool,
        default=False,
        help="whether to use MKLDNN or not in CPU.", )
    parser.add_argument(
        "--precision",
        type=str,
        default='fp32',
        choices=['fp32', 'fp16', 'bf16', 'int8'],
        help="mode of running")
    parser.add_argument(
        "--device",
        default="gpu",
        choices=["gpu", "cpu", "xpu", "npu", "mlu", "gcu"],
        help="Device selected for inference.", )
    parser.add_argument('--cpu_threads', type=int, default=1)

    args, _ = parser.parse_known_args()
    return args


# only inference for models trained with csmsc now
def main():
    args = parse_args()

    paddle.set_device(args.device)

    # frontend
    frontend = get_frontend(
        lang=args.lang,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict)

    # am_predictor
    am_predictor = get_predictor(
        model_dir=args.inference_dir,
        model_file=args.am + ".pdmodel",
        params_file=args.am + ".pdiparams",
        device=args.device,
        use_trt=args.use_trt,
        use_mkldnn=args.use_mkldnn,
        cpu_threads=args.cpu_threads,
        precision=args.precision)
    # model: {model_name}_{dataset}
    am_dataset = args.am[args.am.rindex('_') + 1:]

    # voc_predictor
    voc_predictor = get_predictor(
        model_dir=args.inference_dir,
        model_file=args.voc + ".pdmodel",
        params_file=args.voc + ".pdiparams",
        device=args.device,
        use_trt=args.use_trt,
        use_mkldnn=args.use_mkldnn,
        cpu_threads=args.cpu_threads,
        precision=args.precision)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    sentences = get_sentences(text_file=args.text, lang=args.lang)

    merge_sentences = True
    fs = 24000 if am_dataset != 'ljspeech' else 22050
    # warmup
    for utt_id, sentence in sentences[:3]:
        with timer() as t:
            mel = get_am_output(
                input=sentence,
                am_predictor=am_predictor,
                am=args.am,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences,
                speaker_dict=args.speaker_dict,
                spk_id=args.spk_id, )
            wav = get_voc_output(voc_predictor=voc_predictor, input=mel)
        speed = wav.size / t.elapse
        rtf = fs / speed
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

    print("warm up done!")

    N = 0
    T = 0
    for utt_id, sentence in sentences:
        with timer() as t:
            mel = get_am_output(
                input=sentence,
                am_predictor=am_predictor,
                am=args.am,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences,
                speaker_dict=args.speaker_dict,
                spk_id=args.spk_id, )
            wav = get_voc_output(voc_predictor=voc_predictor, input=mel)

        N += wav.size
        T += t.elapse
        speed = wav.size / t.elapse
        rtf = fs / speed

        sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=fs)
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/inference_streaming.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import numpy as np
import paddle
import soundfile as sf
from timer import timer

from paddlespeech.t2s.exps.syn_utils import denorm
from paddlespeech.t2s.exps.syn_utils import get_am_sublayer_output
from paddlespeech.t2s.exps.syn_utils import get_chunks
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_predictor
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.exps.syn_utils import get_streaming_am_output
from paddlespeech.t2s.exps.syn_utils import get_voc_output
from paddlespeech.t2s.exps.syn_utils import run_frontend
from paddlespeech.t2s.utils import str2bool


def parse_args():
    parser = argparse.ArgumentParser(
        description="Paddle Infernce with acoustic model & vocoder.")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=['fastspeech2_csmsc'],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        "--am_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training acoustic model."
    )
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones_dict", type=str, default=None, help="tone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')
    # voc
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=['pwgan_csmsc', 'mb_melgan_csmsc', 'hifigan_csmsc'],
        help='Choose vocoder type of tts task.')
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en')
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument("--output_dir", type=str, help="output dir")
    # inference
    parser.add_argument(
        "--device",
        default="gpu",
        choices=["gpu", "cpu"],
        help="Device selected for inference.", )
    # streaming related
    parser.add_argument(
        "--am_streaming",
        type=str2bool,
        default=False,
        help="whether use streaming acoustic model")
    parser.add_argument(
        "--block_size", type=int, default=42, help="block size of am streaming")
    parser.add_argument(
        "--pad_size", type=int, default=12, help="pad size of am streaming")

    args, _ = parser.parse_known_args()
    return args


# only inference for models trained with csmsc now
def main():
    args = parse_args()

    paddle.set_device(args.device)

    # frontend
    frontend = get_frontend(
        lang=args.lang,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict)

    # am_predictor

    am_encoder_infer_predictor = get_predictor(
        model_dir=args.inference_dir,
        model_file=args.am + "_am_encoder_infer" + ".pdmodel",
        params_file=args.am + "_am_encoder_infer" + ".pdiparams",
        device=args.device)
    am_decoder_predictor = get_predictor(
        model_dir=args.inference_dir,
        model_file=args.am + "_am_decoder" + ".pdmodel",
        params_file=args.am + "_am_decoder" + ".pdiparams",
        device=args.device)
    am_postnet_predictor = get_predictor(
        model_dir=args.inference_dir,
        model_file=args.am + "_am_postnet" + ".pdmodel",
        params_file=args.am + "_am_postnet" + ".pdiparams",
        device=args.device)
    am_mu, am_std = np.load(args.am_stat)
    # model: {model_name}_{dataset}
    am_dataset = args.am[args.am.rindex('_') + 1:]

    # voc_predictor
    voc_predictor = get_predictor(
        model_dir=args.inference_dir,
        model_file=args.voc + ".pdmodel",
        params_file=args.voc + ".pdiparams",
        device=args.device)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    sentences = get_sentences(text_file=args.text, lang=args.lang)

    merge_sentences = True

    fs = 24000 if am_dataset != 'ljspeech' else 22050
    # warmup
    for utt_id, sentence in sentences[:3]:
        with timer() as t:
            normalized_mel = get_streaming_am_output(
                input=sentence,
                am_encoder_infer_predictor=am_encoder_infer_predictor,
                am_decoder_predictor=am_decoder_predictor,
                am_postnet_predictor=am_postnet_predictor,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences, )
            mel = denorm(normalized_mel, am_mu, am_std)
            wav = get_voc_output(voc_predictor=voc_predictor, input=mel)
        speed = wav.size / t.elapse
        rtf = fs / speed
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

    print("warm up done!")

    N = 0
    T = 0
    block_size = args.block_size
    pad_size = args.pad_size
    get_tone_ids = False
    for utt_id, sentence in sentences:
        with timer() as t:
            # frontend
            frontend_dict = run_frontend(
                frontend=frontend,
                text=sentence,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids,
                lang=args.lang)
            phone_ids = frontend_dict['phone_ids']
            phones = phone_ids[0].numpy()
            # acoustic model
            orig_hs = get_am_sublayer_output(
                am_encoder_infer_predictor, input=phones)

            if args.am_streaming:
                hss = get_chunks(orig_hs, block_size, pad_size)
                chunk_num = len(hss)
                mel_list = []
                for i, hs in enumerate(hss):
                    am_decoder_output = get_am_sublayer_output(
                        am_decoder_predictor, input=hs)
                    am_postnet_output = get_am_sublayer_output(
                        am_postnet_predictor,
                        input=np.transpose(am_decoder_output, (0, 2, 1)))
                    am_output_data = am_decoder_output + np.transpose(
                        am_postnet_output, (0, 2, 1))
                    normalized_mel = am_output_data[0]

                    sub_mel = denorm(normalized_mel, am_mu, am_std)
                    # clip output part of pad
                    if i == 0:
                        sub_mel = sub_mel[:-pad_size]
                    elif i == chunk_num - 1:
                        # 最后一块的右侧一定没有 pad 够
                        sub_mel = sub_mel[pad_size:]
                    else:
                        # 倒数几块的右侧也可能没有 pad 够
                        sub_mel = sub_mel[pad_size:(block_size + pad_size) -
                                          sub_mel.shape[0]]
                    mel_list.append(sub_mel)
                mel = np.concatenate(mel_list, axis=0)

            else:
                am_decoder_output = get_am_sublayer_output(
                    am_decoder_predictor, input=orig_hs)

                am_postnet_output = get_am_sublayer_output(
                    am_postnet_predictor,
                    input=np.transpose(am_decoder_output, (0, 2, 1)))
                am_output_data = am_decoder_output + np.transpose(
                    am_postnet_output, (0, 2, 1))
                normalized_mel = am_output_data[0]
                mel = denorm(normalized_mel, am_mu, am_std)
            # vocoder
            wav = get_voc_output(voc_predictor=voc_predictor, input=mel)

        N += wav.size
        T += t.elapse
        speed = wav.size / t.elapse
        rtf = fs / speed

        sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=24000)
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/jets/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/jets/inference.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import paddle
import soundfile as sf
from timer import timer

from paddlespeech.t2s.exps.syn_utils import get_am_output
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_predictor
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.utils import str2bool


def parse_args():
    parser = argparse.ArgumentParser(
        description="Paddle Infernce with acoustic model & vocoder.")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='jets_csmsc',
        choices=['jets_csmsc', 'jets_aishell3'],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en or mix')
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument(
        "--add-blank",
        type=str2bool,
        default=True,
        help="whether to add blank between phones")
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument("--output_dir", type=str, help="output dir")
    # inference
    parser.add_argument(
        "--use_trt",
        type=str2bool,
        default=False,
        help="whether to use TensorRT or not in GPU", )
    parser.add_argument(
        "--use_mkldnn",
        type=str2bool,
        default=False,
        help="whether to use MKLDNN or not in CPU.", )
    parser.add_argument(
        "--precision",
        type=str,
        default='fp32',
        choices=['fp32', 'fp16', 'bf16', 'int8'],
        help="mode of running")
    parser.add_argument(
        "--device",
        default="gpu",
        choices=["gpu", "cpu"],
        help="Device selected for inference.", )
    parser.add_argument('--cpu_threads', type=int, default=1)

    args, _ = parser.parse_known_args()
    return args


# only inference for models trained with csmsc now
def main():
    args = parse_args()

    paddle.set_device(args.device)

    # frontend
    frontend = get_frontend(lang=args.lang, phones_dict=args.phones_dict)

    # am_predictor
    am_predictor = get_predictor(
        model_dir=args.inference_dir,
        model_file=args.am + ".pdmodel",
        params_file=args.am + ".pdiparams",
        device=args.device,
        use_trt=args.use_trt,
        use_mkldnn=args.use_mkldnn,
        cpu_threads=args.cpu_threads,
        precision=args.precision)
    # model: {model_name}_{dataset}
    am_dataset = args.am[args.am.rindex('_') + 1:]

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    sentences = get_sentences(text_file=args.text, lang=args.lang)

    merge_sentences = True
    add_blank = args.add_blank
    # jets's fs is 22050
    fs = 22050
    # warmup
    for utt_id, sentence in sentences[:3]:
        with timer() as t:
            wav = get_am_output(
                input=sentence,
                am_predictor=am_predictor,
                am=args.am,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences,
                speaker_dict=args.speaker_dict,
                spk_id=args.spk_id, )
        speed = wav.size / t.elapse
        rtf = fs / speed
        print(
            f"{utt_id}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

    print("warm up done!")

    N = 0
    T = 0
    for utt_id, sentence in sentences:
        with timer() as t:
            wav = get_am_output(
                input=sentence,
                am_predictor=am_predictor,
                am=args.am,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences,
                speaker_dict=args.speaker_dict,
                spk_id=args.spk_id, )

        N += wav.size
        T += t.elapse
        speed = wav.size / t.elapse
        rtf = fs / speed
        sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=fs)
        print(
            f"{utt_id}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/jets/normalize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize feature files and dump them."""
import argparse
import logging
from operator import itemgetter
from pathlib import Path

import jsonlines
import numpy as np
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

from paddlespeech.t2s.datasets.data_table import DataTable


def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
    )
    parser.add_argument(
        "--metadata",
        type=str,
        required=True,
        help="directory including feature files to be normalized. "
        "you need to specify either *-scp or rootdir.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump normalized feature files.")
    parser.add_argument(
        "--feats-stats", type=str, required=True, help="feats statistics file.")
    parser.add_argument(
        "--pitch-stats", type=str, required=True, help="pitch statistics file.")
    parser.add_argument(
        "--energy-stats",
        type=str,
        required=True,
        help="energy statistics file.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")

    args = parser.parse_args()

    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    # get dataset
    with jsonlines.open(args.metadata, 'r') as reader:
        metadata = list(reader)
    dataset = DataTable(
        metadata,
        converters={
            "feats": np.load,
            "pitch": np.load,
            "energy": np.load,
            "wave": str,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    # restore scaler
    feats_scaler = StandardScaler()
    feats_scaler.mean_ = np.load(args.feats_stats)[0]
    feats_scaler.scale_ = np.load(args.feats_stats)[1]
    feats_scaler.n_features_in_ = feats_scaler.mean_.shape[0]

    pitch_scaler = StandardScaler()
    pitch_scaler.mean_ = np.load(args.pitch_stats)[0]
    pitch_scaler.scale_ = np.load(args.pitch_stats)[1]
    pitch_scaler.n_features_in_ = pitch_scaler.mean_.shape[0]

    energy_scaler = StandardScaler()
    energy_scaler.mean_ = np.load(args.energy_stats)[0]
    energy_scaler.scale_ = np.load(args.energy_stats)[1]
    energy_scaler.n_features_in_ = energy_scaler.mean_.shape[0]

    vocab_phones = {}
    with open(args.phones_dict, 'rt') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    for phn, id in phn_id:
        vocab_phones[phn] = int(id)

    vocab_speaker = {}
    with open(args.speaker_dict, 'rt') as f:
        spk_id = [line.strip().split() for line in f.readlines()]
    for spk, id in spk_id:
        vocab_speaker[spk] = int(id)

    # process each file
    output_metadata = []

    for item in tqdm(dataset):
        utt_id = item['utt_id']
        feats = item['feats']
        pitch = item['pitch']
        energy = item['energy']
        wave_path = item['wave']
        # normalize
        feats = feats_scaler.transform(feats)
        feats_dir = dumpdir / "data_feats"
        feats_dir.mkdir(parents=True, exist_ok=True)
        feats_path = feats_dir / f"{utt_id}_feats.npy"
        np.save(feats_path, feats.astype(np.float32), allow_pickle=False)

        pitch = pitch_scaler.transform(pitch)
        pitch_dir = dumpdir / "data_pitch"
        pitch_dir.mkdir(parents=True, exist_ok=True)
        pitch_path = pitch_dir / f"{utt_id}_pitch.npy"
        np.save(pitch_path, pitch.astype(np.float32), allow_pickle=False)

        energy = energy_scaler.transform(energy)
        energy_dir = dumpdir / "data_energy"
        energy_dir.mkdir(parents=True, exist_ok=True)
        energy_path = energy_dir / f"{utt_id}_energy.npy"
        np.save(energy_path, energy.astype(np.float32), allow_pickle=False)

        phone_ids = [vocab_phones[p] for p in item['phones']]
        spk_id = vocab_speaker[item["speaker"]]
        record = {
            "utt_id": item['utt_id'],
            "spk_id": spk_id,
            "text": phone_ids,
            "text_lengths": item['text_lengths'],
            "feats_lengths": item['feats_lengths'],
            "durations": item['durations'],
            "feats": str(feats_path),
            "pitch": str(pitch_path),
            "energy": str(energy_path),
            "wave": str(wave_path),
        }
        # add spk_emb for voice cloning
        if "spk_emb" in item:
            record["spk_emb"] = str(item["spk_emb"])

        output_metadata.append(record)
    output_metadata.sort(key=itemgetter('utt_id'))
    output_metadata_path = Path(args.dumpdir) / "metadata.jsonl"
    with jsonlines.open(output_metadata_path, 'w') as writer:
        for item in output_metadata:
            writer.write(item)
    logging.info(f"metadata dumped into {output_metadata_path}")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/jets/preprocess.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines
import librosa
import numpy as np
import tqdm
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import Energy
from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.datasets.get_feats import Pitch
from paddlespeech.t2s.datasets.preprocess_utils import compare_duration_and_mel_length
from paddlespeech.t2s.datasets.preprocess_utils import get_input_token
from paddlespeech.t2s.datasets.preprocess_utils import get_phn_dur
from paddlespeech.t2s.datasets.preprocess_utils import get_spk_id_map
from paddlespeech.t2s.datasets.preprocess_utils import merge_silence
from paddlespeech.t2s.utils import str2bool


def process_sentence(config: Dict[str, Any],
                     fp: Path,
                     sentences: Dict,
                     output_dir: Path,
                     mel_extractor=None,
                     pitch_extractor=None,
                     energy_extractor=None,
                     cut_sil: bool=True,
                     spk_emb_dir: Path=None,
                     token_average: bool=True):
    utt_id = fp.stem
    # for vctk
    if utt_id.endswith("_mic2"):
        utt_id = utt_id[:-5]
    record = None
    if utt_id in sentences:
        # reading, resampling may occur
        wav, _ = librosa.load(
            str(fp), sr=config.fs,
            mono=False) if "canton" in str(fp) else librosa.load(
                str(fp), sr=config.fs)
        if len(wav.shape) == 2 and "canton" in str(fp):
            # Remind that Cantonese datasets should be placed in ~/datasets/canton_all. Otherwise, it may cause problem.
            wav = wav[0]
            wav = np.ascontiguousarray(wav)
        elif len(wav.shape) != 1:
            return record
        max_value = np.abs(wav).max()
        if max_value > 1.0:
            wav = wav / max_value
        assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
        assert np.abs(wav).max(
        ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        speaker = sentences[utt_id][2]
        d_cumsum = np.pad(np.array(durations).cumsum(0), (1, 0), 'constant')
        # little imprecise than use *.TextGrid directly
        times = librosa.frames_to_time(
            d_cumsum, sr=config.fs, hop_length=config.n_shift)
        if cut_sil:
            start = 0
            end = d_cumsum[-1]
            if phones[0] == "sil" and len(durations) > 1:
                start = times[1]
                durations = durations[1:]
                phones = phones[1:]
            if phones[-1] == 'sil' and len(durations) > 1:
                end = times[-2]
                durations = durations[:-1]
                phones = phones[:-1]
            sentences[utt_id][0] = phones
            sentences[utt_id][1] = durations
            start, end = librosa.time_to_samples([start, end], sr=config.fs)
            wav = wav[start:end]
        # extract mel feats
        logmel = mel_extractor.get_log_mel_fbank(wav)
        # change duration according to mel_length
        compare_duration_and_mel_length(sentences, utt_id, logmel)
        # utt_id may be popped in compare_duration_and_mel_length
        if utt_id not in sentences:
            return None
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        num_frames = logmel.shape[0]
        assert sum(durations) == num_frames
        mel_dir = output_dir / "data_feats"
        mel_dir.mkdir(parents=True, exist_ok=True)
        mel_path = mel_dir / (utt_id + "_feats.npy")
        np.save(mel_path, logmel)

        if wav.size < num_frames * config.n_shift:
            wav = np.pad(
                wav, (0, num_frames * config.n_shift - wav.size),
                mode="reflect")
        else:
            wav = wav[:num_frames * config.n_shift]
        wave_dir = output_dir / "data_wave"
        wave_dir.mkdir(parents=True, exist_ok=True)
        wav_path = wave_dir / (utt_id + "_wave.npy")
        # (num_samples, )
        np.save(wav_path, wav)
        # extract pitch and energy
        if token_average == True:
            f0 = pitch_extractor.get_pitch(
                wav,
                duration=np.array(durations),
                use_token_averaged_f0=token_average)
            if (f0 == 0).all():
                return None
            assert f0.shape[0] == len(durations)
        else:
            f0 = pitch_extractor.get_pitch(
                wav, use_token_averaged_f0=token_average)
            if (f0 == 0).all():
                return None
            f0 = f0[:num_frames]
            assert f0.shape[0] == num_frames
        f0_dir = output_dir / "data_pitch"
        f0_dir.mkdir(parents=True, exist_ok=True)
        f0_path = f0_dir / (utt_id + "_pitch.npy")
        np.save(f0_path, f0)
        if token_average == True:
            energy = energy_extractor.get_energy(
                wav,
                duration=np.array(durations),
                use_token_averaged_energy=token_average)
            assert energy.shape[0] == len(durations)
        else:
            energy = energy_extractor.get_energy(
                wav, use_token_averaged_energy=token_average)
            energy = energy[:num_frames]
            assert energy.shape[0] == num_frames

        energy_dir = output_dir / "data_energy"
        energy_dir.mkdir(parents=True, exist_ok=True)
        energy_path = energy_dir / (utt_id + "_energy.npy")
        np.save(energy_path, energy)
        record = {
            "utt_id": utt_id,
            "phones": phones,
            "text_lengths": len(phones),
            "feats_lengths": num_frames,
            "durations": durations,
            "feats": str(mel_path),
            "pitch": str(f0_path),
            "energy": str(energy_path),
            "wave": str(wav_path),
            "speaker": speaker
        }
        if spk_emb_dir:
            if speaker in os.listdir(spk_emb_dir):
                embed_name = utt_id + ".npy"
                embed_path = spk_emb_dir / speaker / embed_name
                if embed_path.is_file():
                    record["spk_emb"] = str(embed_path)
                else:
                    return None
    return record


def process_sentences(config,
                      fps: List[Path],
                      sentences: Dict,
                      output_dir: Path,
                      mel_extractor=None,
                      pitch_extractor=None,
                      energy_extractor=None,
                      nprocs: int=1,
                      cut_sil: bool=True,
                      spk_emb_dir: Path=None,
                      write_metadata_method: str='w',
                      token_average: bool=True):
    if nprocs == 1:
        results = []
        for fp in tqdm.tqdm(fps, total=len(fps)):
            record = process_sentence(
                config=config,
                fp=fp,
                sentences=sentences,
                output_dir=output_dir,
                mel_extractor=mel_extractor,
                pitch_extractor=pitch_extractor,
                energy_extractor=energy_extractor,
                cut_sil=cut_sil,
                spk_emb_dir=spk_emb_dir,
                token_average=token_average)
            if record:
                results.append(record)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            futures = []
            with tqdm.tqdm(total=len(fps)) as progress:
                for fp in fps:
                    future = pool.submit(process_sentence, config, fp,
                                         sentences, output_dir, mel_extractor,
                                         pitch_extractor, energy_extractor,
                                         cut_sil, spk_emb_dir)
                    future.add_done_callback(lambda p: progress.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    record = ft.result()
                    if record:
                        results.append(record)

    results.sort(key=itemgetter("utt_id"))
    with jsonlines.open(output_dir / "metadata.jsonl",
                        write_metadata_method) as writer:
        for item in results:
            writer.write(item)
    print("Done")


def main():
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--dataset",
        default="baker",
        type=str,
        help="name of dataset, should in {baker, aishell3, ljspeech, vctk} now")

    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump feature files.")
    parser.add_argument(
        "--dur-file", default=None, type=str, help="path to durations.txt.")

    parser.add_argument("--config", type=str, help="fastspeech2 config file.")

    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")

    parser.add_argument(
        "--cut-sil",
        type=str2bool,
        default=True,
        help="whether cut sil in the edge of audio")

    parser.add_argument(
        "--spk_emb_dir",
        default=None,
        type=str,
        help="directory to speaker embedding files.")

    parser.add_argument(
        "--write_metadata_method",
        default="w",
        type=str,
        choices=["w", "a"],
        help="How the metadata.jsonl file is written.")

    parser.add_argument(
        "--token_average",
        type=str2bool,
        default=False,
        help="Average the energy and pitch accroding to durations")
    args = parser.parse_args()

    rootdir = Path(args.rootdir).expanduser()
    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)
    dur_file = Path(args.dur_file).expanduser()

    if args.spk_emb_dir:
        spk_emb_dir = Path(args.spk_emb_dir).expanduser().resolve()
    else:
        spk_emb_dir = None

    assert rootdir.is_dir()
    assert dur_file.is_file()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    sentences, speaker_set = get_phn_dur(dur_file)

    merge_silence(sentences)
    phone_id_map_path = dumpdir / "phone_id_map.txt"
    speaker_id_map_path = dumpdir / "speaker_id_map.txt"
    get_input_token(sentences, phone_id_map_path, args.dataset)
    get_spk_id_map(speaker_set, speaker_id_map_path)

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
        # split data into 3 sections
        num_train = 9800
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "aishell3":
        sub_num_dev = 5
        wav_dir = rootdir / "train" / "wav"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*.wav")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files
    elif args.dataset == "canton":
        sub_num_dev = 5
        wav_dir = rootdir / "WAV"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*.wav")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files
    elif args.dataset == "ljspeech":
        wav_files = sorted(list((rootdir / "wavs").rglob("*.wav")))
        # split data into 3 sections
        num_train = 12900
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "vctk":
        sub_num_dev = 5
        wav_dir = rootdir / "wav48_silence_trimmed"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*_mic2.flac")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    else:
        print("dataset should in {baker, aishell3, ljspeech, vctk} now!")

    train_dump_dir = dumpdir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dumpdir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dumpdir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    # Extractor
    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax)
    pitch_extractor = Pitch(
        sr=config.fs,
        hop_length=config.n_shift,
        f0min=config.f0min,
        f0max=config.f0max)
    energy_extractor = Energy(
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window)

    # process for the 3 sections
    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            sentences=sentences,
            output_dir=train_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=args.write_metadata_method,
            token_average=args.token_average)
    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            sentences=sentences,
            output_dir=dev_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=args.write_metadata_method,
            token_average=args.token_average)
    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            sentences=sentences,
            output_dir=test_dump_dir,
            mel_extractor=mel_extractor,
            pitch_extractor=pitch_extractor,
            energy_extractor=energy_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir,
            write_metadata_method=args.write_metadata_method,
            token_average=args.token_average)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/jets/synthesize.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import soundfile as sf
import yaml
from timer import timer
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.models.jets import JETS
from paddlespeech.t2s.utils import str2bool


def evaluate(args):

    # construct dataset for evaluation
    with jsonlines.open(args.test_metadata, 'r') as reader:
        test_metadata = list(reader)
    # Init body.
    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)

    fields = ["utt_id", "text"]
    converters = {}

    spk_num = None
    if args.speaker_dict is not None:
        print("multiple speaker jets!")
        with open(args.speaker_dict, 'rt') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
        fields += ["spk_id"]
    elif args.voice_cloning:
        print("Evaluating voice cloning!")
        fields += ["spk_emb"]
    else:
        print("single speaker jets!")
    print("spk_num:", spk_num)

    test_dataset = DataTable(
        data=test_metadata,
        fields=fields,
        converters=converters, )

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_fft // 2 + 1
    config["model"]["generator_params"]["spks"] = spk_num

    jets = JETS(idim=vocab_size, odim=odim, **config["model"])
    jets.set_state_dict(paddle.load(args.ckpt)["main_params"])
    jets.eval()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    N = 0
    T = 0

    for datum in test_dataset:
        utt_id = datum["utt_id"]
        phone_ids = paddle.to_tensor(datum["text"])
        with timer() as t:
            with paddle.no_grad():
                spk_emb = None
                spk_id = None
                # multi speaker
                if args.voice_cloning and "spk_emb" in datum:
                    spk_emb = paddle.to_tensor(np.load(datum["spk_emb"]))
                elif "spk_id" in datum:
                    spk_id = paddle.to_tensor(datum["spk_id"])
                out = jets.inference(
                    text=phone_ids, sids=spk_id, spembs=spk_emb)
            wav = out["wav"]
            wav = wav.numpy()
            N += wav.size
            T += t.elapse
            speed = wav.size / t.elapse
            rtf = config.fs / speed
        print(
            f"{utt_id}, wave: {wav.size}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
        sf.write(str(output_dir / (utt_id + ".wav")), wav, samplerate=config.fs)
        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {config.fs / (N / T) }")


def parse_args():
    # parse args and config
    parser = argparse.ArgumentParser(description="Synthesize with JETS")
    # model
    parser.add_argument(
        '--config', type=str, default=None, help='Config of JETS.')
    parser.add_argument(
        '--ckpt', type=str, default=None, help='Checkpoint file of JETS.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        "--voice-cloning",
        type=str2bool,
        default=False,
        help="whether training voice cloning model.")
    # other
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument("--test_metadata", type=str, help="test metadata.")
    parser.add_argument("--output_dir", type=str, help="output dir.")

    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    evaluate(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/jets/synthesize_e2e.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import paddle
import soundfile as sf
import yaml
from timer import timer
from yacs.config import CfgNode

from paddlespeech.t2s.exps.syn_utils import am_to_static
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.models.jets import JETS
from paddlespeech.t2s.models.jets import JETSInference
from paddlespeech.t2s.utils import str2bool


def evaluate(args):
    # Init body.
    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)

    sentences = get_sentences(text_file=args.text, lang=args.lang)

    # frontend
    frontend = get_frontend(lang=args.lang, phones_dict=args.phones_dict)
    # acoustic model
    am_name = args.am[:args.am.rindex('_')]
    am_dataset = args.am[args.am.rindex('_') + 1:]

    spk_num = None
    if args.speaker_dict is not None:
        print("multiple speaker jets!")
        with open(args.speaker_dict, 'rt') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
    else:
        print("single speaker jets!")
    print("spk_num:", spk_num)

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_fft // 2 + 1
    config["model"]["generator_params"]["spks"] = spk_num

    jets = JETS(idim=vocab_size, odim=odim, **config["model"])
    jets.set_state_dict(paddle.load(args.ckpt)["main_params"])
    jets.eval()

    jets_inference = JETSInference(jets)
    # whether dygraph to static
    if args.inference_dir:
        jets_inference = am_to_static(
            am_inference=jets_inference,
            am=args.am,
            inference_dir=args.inference_dir,
            speaker_dict=args.speaker_dict)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    merge_sentences = False

    N = 0
    T = 0
    for utt_id, sentence in sentences:
        with timer() as t:
            if args.lang == 'zh':
                input_ids = frontend.get_input_ids(
                    sentence, merge_sentences=merge_sentences)
                phone_ids = input_ids["phone_ids"]
            elif args.lang == 'en':
                input_ids = frontend.get_input_ids(
                    sentence, merge_sentences=merge_sentences)
                phone_ids = input_ids["phone_ids"]
            else:
                print("lang should in {'zh', 'en'}!")
            with paddle.no_grad():
                flags = 0
                for i in range(len(phone_ids)):
                    part_phone_ids = phone_ids[i]
                    spk_id = None
                    if am_dataset in {"aishell3",
                                      "vctk"} and spk_num is not None:
                        spk_id = paddle.to_tensor(args.spk_id)
                        wav = jets_inference(part_phone_ids, spk_id)
                    else:
                        wav = jets_inference(part_phone_ids)
                    if flags == 0:
                        wav_all = wav
                        flags = 1
                    else:
                        wav_all = paddle.concat([wav_all, wav])
        wav = wav_all.numpy()
        N += wav.size
        T += t.elapse
        speed = wav.size / t.elapse
        rtf = config.fs / speed
        print(
            f"{utt_id}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
        sf.write(str(output_dir / (utt_id + ".wav")), wav, samplerate=config.fs)
        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {config.fs / (N / T) }")


def parse_args():
    # parse args and config 
    parser = argparse.ArgumentParser(description="Synthesize with JETS")

    # model
    parser.add_argument(
        '--config', type=str, default=None, help='Config of JETS.')
    parser.add_argument(
        '--ckpt', type=str, default=None, help='Checkpoint file of JETS.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en')

    parser.add_argument(
        "--inference_dir",
        type=str,
        default=None,
        help="dir to save inference models")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line.")
    parser.add_argument("--output_dir", type=str, help="output dir.")

    parser.add_argument(
        '--am',
        type=str,
        default='jets_csmsc',
        help='Choose acoustic model type of tts task.')

    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    evaluate(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/jets/train.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.optimizer import AdamW
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import jets_multi_spk_batch_fn
from paddlespeech.t2s.datasets.am_batch_fn import jets_single_spk_batch_fn
from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.datasets.sampler import ErnieSATSampler
from paddlespeech.t2s.models.jets import JETS
from paddlespeech.t2s.models.jets import JETSEvaluator
from paddlespeech.t2s.models.jets import JETSUpdater
from paddlespeech.t2s.modules.losses import DiscriminatorAdversarialLoss
from paddlespeech.t2s.modules.losses import FeatureMatchLoss
from paddlespeech.t2s.modules.losses import ForwardSumLoss
from paddlespeech.t2s.modules.losses import GeneratorAdversarialLoss
from paddlespeech.t2s.modules.losses import MelSpectrogramLoss
from paddlespeech.t2s.modules.losses import VarianceLoss
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.optimizer import scheduler_classes
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer
from paddlespeech.t2s.utils import str2bool


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    world_size = paddle.distributed.get_world_size()
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
        if world_size > 1:
            paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    fields = [
        "text", "text_lengths", "feats", "feats_lengths", "wave", "durations",
        "pitch", "energy"
    ]

    converters = {
        "wave": np.load,
        "feats": np.load,
        "pitch": np.load,
        "energy": np.load,
    }
    spk_num = None
    if args.speaker_dict is not None:
        print("multiple speaker jets!")
        collate_fn = jets_multi_spk_batch_fn
        with open(args.speaker_dict, 'rt', encoding='utf-8') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
        fields += ["spk_id"]
    elif args.voice_cloning:
        print("Training voice cloning!")
        collate_fn = jets_multi_spk_batch_fn
        fields += ["spk_emb"]
        converters["spk_emb"] = np.load
    else:
        print("single speaker jets!")
        collate_fn = jets_single_spk_batch_fn
    print("spk_num:", spk_num)

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=fields,
        converters=converters, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=fields,
        converters=converters, )

    # collate function and dataloader
    train_sampler = ErnieSATSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=True)
    dev_sampler = ErnieSATSampler(
        dev_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=False)
    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        batch_sampler=dev_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    with open(args.phones_dict, 'rt', encoding='utf-8') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_mels
    config["model"]["generator_params"]["spks"] = spk_num
    model = JETS(idim=vocab_size, odim=odim, **config["model"])
    gen_parameters = model.generator.parameters()
    dis_parameters = model.discriminator.parameters()
    if world_size > 1:
        model = DataParallel(model)
        gen_parameters = model._layers.generator.parameters()
        dis_parameters = model._layers.discriminator.parameters()

    print("model done!")

    # loss
    criterion_mel = MelSpectrogramLoss(
        **config["mel_loss_params"], )
    criterion_feat_match = FeatureMatchLoss(
        **config["feat_match_loss_params"], )
    criterion_gen_adv = GeneratorAdversarialLoss(
        **config["generator_adv_loss_params"], )
    criterion_dis_adv = DiscriminatorAdversarialLoss(
        **config["discriminator_adv_loss_params"], )
    criterion_var = VarianceLoss()
    criterion_forwardsum = ForwardSumLoss()

    print("criterions done!")

    lr_schedule_g = scheduler_classes[config["generator_scheduler"]](
        **config["generator_scheduler_params"])
    optimizer_g = AdamW(
        learning_rate=lr_schedule_g,
        parameters=gen_parameters,
        **config["generator_optimizer_params"])

    lr_schedule_d = scheduler_classes[config["discriminator_scheduler"]](
        **config["discriminator_scheduler_params"])
    optimizer_d = AdamW(
        learning_rate=lr_schedule_d,
        parameters=dis_parameters,
        **config["discriminator_optimizer_params"])

    print("optimizers done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = JETSUpdater(
        model=model,
        optimizers={
            "generator": optimizer_g,
            "discriminator": optimizer_d,
        },
        criterions={
            "mel": criterion_mel,
            "feat_match": criterion_feat_match,
            "gen_adv": criterion_gen_adv,
            "dis_adv": criterion_dis_adv,
            "var": criterion_var,
            "forwardsum": criterion_forwardsum,
        },
        schedulers={
            "generator": lr_schedule_g,
            "discriminator": lr_schedule_d,
        },
        dataloader=train_dataloader,
        lambda_adv=config.lambda_adv,
        lambda_mel=config.lambda_mel,
        lambda_feat_match=config.lambda_feat_match,
        lambda_var=config.lambda_var,
        lambda_align=config.lambda_align,
        generator_first=config.generator_first,
        use_alignment_module=config.use_alignment_module,
        output_dir=output_dir)

    evaluator = JETSEvaluator(
        model=model,
        criterions={
            "mel": criterion_mel,
            "feat_match": criterion_feat_match,
            "gen_adv": criterion_gen_adv,
            "dis_adv": criterion_dis_adv,
            "var": criterion_var,
            "forwardsum": criterion_forwardsum,
        },
        dataloader=dev_dataloader,
        lambda_adv=config.lambda_adv,
        lambda_mel=config.lambda_mel,
        lambda_feat_match=config.lambda_feat_match,
        lambda_var=config.lambda_var,
        lambda_align=config.lambda_align,
        generator_first=config.generator_first,
        use_alignment_module=config.use_alignment_module,
        output_dir=output_dir)

    trainer = Trainer(
        updater,
        stop_trigger=(config.train_max_steps, "iteration"),
        out=output_dir)

    if dist.get_rank() == 0:
        trainer.extend(
            evaluator, trigger=(config.eval_interval_steps, 'iteration'))
        trainer.extend(VisualDL(output_dir), trigger=(1, 'iteration'))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots),
        trigger=(config.save_interval_steps, 'iteration'))

    print("Trainer Done!")
    trainer.run()


def main():
    # parse args and config and redirect to train_sp

    parser = argparse.ArgumentParser(description="Train a JETS model.")
    parser.add_argument("--config", type=str, help="JETS config file")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict",
        type=str,
        default=None,
        help="speaker id map file for multiple speaker model.")

    parser.add_argument(
        "--voice-cloning",
        type=str2bool,
        default=False,
        help="whether training voice cloning model.")

    args = parser.parse_args()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/lite_predict.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import soundfile as sf
from timer import timer

from paddlespeech.t2s.exps.lite_syn_utils import get_lite_am_output
from paddlespeech.t2s.exps.lite_syn_utils import get_lite_predictor
from paddlespeech.t2s.exps.lite_syn_utils import get_lite_voc_output
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_sentences


def parse_args():
    parser = argparse.ArgumentParser(
        description="Paddle Infernce with acoustic model & vocoder.")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=[
            'speedyspeech_csmsc',
            'fastspeech2_csmsc',
            'fastspeech2_aishell3',
            'fastspeech2_ljspeech',
            'fastspeech2_vctk',
            'fastspeech2_mix',
        ],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones_dict", type=str, default=None, help="tone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')
    # voc
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=[
            'pwgan_csmsc',
            'pwgan_aishell3',
            'pwgan_ljspeech',
            'pwgan_vctk',
            'mb_melgan_csmsc',
            'hifigan_csmsc',
            'hifigan_aishell3',
            'hifigan_ljspeech',
            'hifigan_vctk',
        ],
        help='Choose vocoder type of tts task.')
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en or mix')
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument("--output_dir", type=str, help="output dir")

    args, _ = parser.parse_known_args()
    return args


# only inference for models trained with csmsc now
def main():
    args = parse_args()

    # frontend
    frontend = get_frontend(
        lang=args.lang,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict)

    # am_predictor
    am_predictor = get_lite_predictor(
        model_dir=args.inference_dir, model_file=args.am + "_x86.nb")
    # model: {model_name}_{dataset}
    am_dataset = args.am[args.am.rindex('_') + 1:]

    # voc_predictor
    voc_predictor = get_lite_predictor(
        model_dir=args.inference_dir, model_file=args.voc + "_x86.nb")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    sentences = get_sentences(text_file=args.text, lang=args.lang)

    merge_sentences = True
    fs = 24000 if am_dataset != 'ljspeech' else 22050
    # warmup
    for utt_id, sentence in sentences[:3]:
        with timer() as t:
            mel = get_lite_am_output(
                input=sentence,
                am_predictor=am_predictor,
                am=args.am,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences,
                speaker_dict=args.speaker_dict,
                spk_id=args.spk_id, )
            wav = get_lite_voc_output(voc_predictor=voc_predictor, input=mel)
        speed = wav.size / t.elapse
        rtf = fs / speed
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

    print("warm up done!")

    N = 0
    T = 0
    for utt_id, sentence in sentences:
        with timer() as t:
            mel = get_lite_am_output(
                input=sentence,
                am_predictor=am_predictor,
                am=args.am,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences,
                speaker_dict=args.speaker_dict,
                spk_id=args.spk_id, )
            wav = get_lite_voc_output(voc_predictor=voc_predictor, input=mel)

        N += wav.size
        T += t.elapse
        speed = wav.size / t.elapse
        rtf = fs / speed

        sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=fs)
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/lite_predict_streaming.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import numpy as np
import soundfile as sf
from timer import timer

from paddlespeech.t2s.exps.lite_syn_utils import get_lite_am_sublayer_output
from paddlespeech.t2s.exps.lite_syn_utils import get_lite_predictor
from paddlespeech.t2s.exps.lite_syn_utils import get_lite_streaming_am_output
from paddlespeech.t2s.exps.lite_syn_utils import get_lite_voc_output
from paddlespeech.t2s.exps.syn_utils import denorm
from paddlespeech.t2s.exps.syn_utils import get_chunks
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.exps.syn_utils import run_frontend
from paddlespeech.t2s.utils import str2bool


def parse_args():
    parser = argparse.ArgumentParser(
        description="Paddle Infernce with acoustic model & vocoder.")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=['fastspeech2_csmsc'],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        "--am_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training acoustic model."
    )
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones_dict", type=str, default=None, help="tone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')
    # voc
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=['pwgan_csmsc', 'mb_melgan_csmsc', 'hifigan_csmsc'],
        help='Choose vocoder type of tts task.')
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en')
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument("--output_dir", type=str, help="output dir")
    # inference

    # streaming related
    parser.add_argument(
        "--am_streaming",
        type=str2bool,
        default=False,
        help="whether use streaming acoustic model")
    parser.add_argument(
        "--block_size", type=int, default=42, help="block size of am streaming")
    parser.add_argument(
        "--pad_size", type=int, default=12, help="pad size of am streaming")

    args, _ = parser.parse_known_args()
    return args


# only inference for models trained with csmsc now
def main():
    args = parse_args()

    # frontend
    frontend = get_frontend(
        lang=args.lang,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict)

    # am_predictor
    am_encoder_infer_predictor = get_lite_predictor(
        model_dir=args.inference_dir,
        model_file=args.am + "_am_encoder_infer" + "_x86.nb")
    am_decoder_predictor = get_lite_predictor(
        model_dir=args.inference_dir,
        model_file=args.am + "_am_decoder" + "_x86.nb")
    am_postnet_predictor = get_lite_predictor(
        model_dir=args.inference_dir,
        model_file=args.am + "_am_postnet" + "_x86.nb")
    am_mu, am_std = np.load(args.am_stat)
    # model: {model_name}_{dataset}
    am_dataset = args.am[args.am.rindex('_') + 1:]

    # voc_predictor
    voc_predictor = get_lite_predictor(
        model_dir=args.inference_dir, model_file=args.voc + "_x86.nb")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    sentences = get_sentences(text_file=args.text, lang=args.lang)

    merge_sentences = True

    fs = 24000 if am_dataset != 'ljspeech' else 22050
    # warmup
    for utt_id, sentence in sentences[:3]:
        with timer() as t:
            normalized_mel = get_lite_streaming_am_output(
                input=sentence,
                am_encoder_infer_predictor=am_encoder_infer_predictor,
                am_decoder_predictor=am_decoder_predictor,
                am_postnet_predictor=am_postnet_predictor,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences, )
            mel = denorm(normalized_mel, am_mu, am_std)
            wav = get_lite_voc_output(voc_predictor=voc_predictor, input=mel)
        speed = wav.size / t.elapse
        rtf = fs / speed
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

    print("warm up done!")

    N = 0
    T = 0
    block_size = args.block_size
    pad_size = args.pad_size
    get_tone_ids = False
    for utt_id, sentence in sentences:
        with timer() as t:
            # frontend
            frontend_dict = run_frontend(
                frontend=frontend,
                text=sentence,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids,
                lang=args.lang)
            phone_ids = frontend_dict['phone_ids']
            phones = phone_ids[0].numpy()
            # acoustic model
            orig_hs = get_lite_am_sublayer_output(
                am_encoder_infer_predictor, input=phones)

            if args.am_streaming:
                hss = get_chunks(orig_hs, block_size, pad_size)
                chunk_num = len(hss)
                mel_list = []
                for i, hs in enumerate(hss):
                    am_decoder_output = get_lite_am_sublayer_output(
                        am_decoder_predictor, input=hs)
                    am_postnet_output = get_lite_am_sublayer_output(
                        am_postnet_predictor,
                        input=np.transpose(am_decoder_output, (0, 2, 1)))
                    am_output_data = am_decoder_output + np.transpose(
                        am_postnet_output, (0, 2, 1))
                    normalized_mel = am_output_data[0]

                    sub_mel = denorm(normalized_mel, am_mu, am_std)
                    # clip output part of pad
                    if i == 0:
                        sub_mel = sub_mel[:-pad_size]
                    elif i == chunk_num - 1:
                        # 最后一块的右侧一定没有 pad 够
                        sub_mel = sub_mel[pad_size:]
                    else:
                        # 倒数几块的右侧也可能没有 pad 够
                        sub_mel = sub_mel[pad_size:(block_size + pad_size) -
                                          sub_mel.shape[0]]
                    mel_list.append(sub_mel)
                mel = np.concatenate(mel_list, axis=0)

            else:
                am_decoder_output = get_lite_am_sublayer_output(
                    am_decoder_predictor, input=orig_hs)
                am_postnet_output = get_lite_am_sublayer_output(
                    am_postnet_predictor,
                    input=np.transpose(am_decoder_output, (0, 2, 1)))
                am_output_data = am_decoder_output + np.transpose(
                    am_postnet_output, (0, 2, 1))
                normalized_mel = am_output_data[0]
                mel = denorm(normalized_mel, am_mu, am_std)
            # vocoder
            wav = get_lite_voc_output(voc_predictor=voc_predictor, input=mel)

        N += wav.size
        T += t.elapse
        speed = wav.size / t.elapse
        rtf = fs / speed

        sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=24000)
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/lite_syn_utils.py
================================================
import os
from pathlib import Path
from typing import Optional

import numpy as np
from paddlelite.lite import create_paddle_predictor
from paddlelite.lite import MobileConfig

from .syn_utils import run_frontend


# Paddle-Lite
def get_lite_predictor(model_dir: Optional[os.PathLike]=None,
                       model_file: Optional[os.PathLike]=None,
                       cpu_threads: int=1):
    config = MobileConfig()
    config.set_model_from_file(str(Path(model_dir) / model_file))
    predictor = create_paddle_predictor(config)
    return predictor


def get_lite_am_output(input: str,
                       am_predictor,
                       am: str,
                       frontend: object,
                       lang: str='zh',
                       merge_sentences: bool=True,
                       speaker_dict: Optional[os.PathLike]=None,
                       spk_id: int=0,
                       add_blank: bool=False):
    am_name = am[:am.rindex('_')]
    am_dataset = am[am.rindex('_') + 1:]
    get_spk_id = False
    get_tone_ids = False
    if am_name == 'speedyspeech':
        get_tone_ids = True
    if am_dataset in {"aishell3", "vctk", "mix"} and speaker_dict:
        get_spk_id = True
        spk_id = np.array([spk_id])

    frontend_dict = run_frontend(
        frontend=frontend,
        text=input,
        merge_sentences=merge_sentences,
        get_tone_ids=get_tone_ids,
        lang=lang,
        add_blank=add_blank, )

    if get_tone_ids:
        tone_ids = frontend_dict['tone_ids']
        tones = tone_ids[0].numpy()
        tones_handle = am_predictor.get_input(1)
        tones_handle.from_numpy(tones)

    if get_spk_id:
        spk_id_handle = am_predictor.get_input(1)
        spk_id_handle.from_numpy(spk_id)
    phone_ids = frontend_dict['phone_ids']
    phones = phone_ids[0].numpy()
    phones_handle = am_predictor.get_input(0)
    phones_handle.from_numpy(phones)
    am_predictor.run()
    am_output_handle = am_predictor.get_output(0)
    am_output_data = am_output_handle.numpy()
    return am_output_data


def get_lite_voc_output(voc_predictor, input):
    mel_handle = voc_predictor.get_input(0)
    mel_handle.from_numpy(input)
    voc_predictor.run()
    voc_output_handle = voc_predictor.get_output(0)
    wav = voc_output_handle.numpy()
    return wav


def get_lite_am_sublayer_output(am_sublayer_predictor, input):
    input_handle = am_sublayer_predictor.get_input(0)
    input_handle.from_numpy(input)

    am_sublayer_predictor.run()
    am_sublayer_handle = am_sublayer_predictor.get_output(0)
    am_sublayer_output = am_sublayer_handle.numpy()
    return am_sublayer_output


def get_lite_streaming_am_output(input: str,
                                 am_encoder_infer_predictor,
                                 am_decoder_predictor,
                                 am_postnet_predictor,
                                 frontend,
                                 lang: str='zh',
                                 merge_sentences: bool=True):
    get_tone_ids = False
    frontend_dict = run_frontend(
        frontend=frontend,
        text=input,
        merge_sentences=merge_sentences,
        get_tone_ids=get_tone_ids,
        lang=lang)
    phone_ids = frontend_dict['phone_ids']
    phones = phone_ids[0].numpy()
    am_encoder_infer_output = get_lite_am_sublayer_output(
        am_encoder_infer_predictor, input=phones)
    am_decoder_output = get_lite_am_sublayer_output(
        am_decoder_predictor, input=am_encoder_infer_output)
    am_postnet_output = get_lite_am_sublayer_output(
        am_postnet_predictor, input=np.transpose(am_decoder_output, (0, 2, 1)))
    am_output_data = am_decoder_output + np.transpose(am_postnet_output,
                                                      (0, 2, 1))
    normalized_mel = am_output_data[0]
    return normalized_mel


================================================
FILE: paddlespeech/t2s/exps/ort_predict.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import soundfile as sf
from timer import timer

from paddlespeech.t2s.exps.syn_utils import get_sess
from paddlespeech.t2s.exps.syn_utils import get_test_dataset
from paddlespeech.t2s.utils import str2bool


def ort_predict(args):

    # construct dataset for evaluation
    with jsonlines.open(args.test_metadata, 'r') as reader:
        test_metadata = list(reader)
    am_name = args.am[:args.am.rindex('_')]
    am_dataset = args.am[args.am.rindex('_') + 1:]
    test_dataset = get_test_dataset(test_metadata=test_metadata, am=args.am)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    fs = 24000 if am_dataset != 'ljspeech' else 22050

    # am
    am_sess = get_sess(
        model_path=str(Path(args.inference_dir) / (args.am + '.onnx')),
        device=args.device,
        cpu_threads=args.cpu_threads,
        use_trt=args.use_trt)

    # vocoder
    voc_sess = get_sess(
        model_path=str(Path(args.inference_dir) / (args.voc + '.onnx')),
        device=args.device,
        cpu_threads=args.cpu_threads,
        use_trt=args.use_trt)

    # am warmup
    for T in [27, 38, 54]:
        am_input_feed = {}
        if am_name == 'fastspeech2':
            phone_ids = np.random.randint(1, 266, size=(T, ))
            am_input_feed.update({'text': phone_ids})
        elif am_name == 'speedyspeech':
            phone_ids = np.random.randint(1, 92, size=(T, ))
            tone_ids = np.random.randint(1, 5, size=(T, ))
            am_input_feed.update({'phones': phone_ids, 'tones': tone_ids})
        am_sess.run(None, input_feed=am_input_feed)

    # voc warmup
    for T in [227, 308, 544]:
        data = np.random.rand(T, 80).astype("float32")
        voc_sess.run(None, {"logmel": data})
    print("warm up done!")

    N = 0
    T = 0
    am_input_feed = {}
    for example in test_dataset:
        utt_id = example['utt_id']
        if am_name == 'fastspeech2':
            phone_ids = example["text"]
            am_input_feed.update({'text': phone_ids})
        elif am_name == 'speedyspeech':
            phone_ids = example["phones"]
            tone_ids = example["tones"]
            am_input_feed.update({'phones': phone_ids, 'tones': tone_ids})
        with timer() as t:
            mel = am_sess.run(output_names=None, input_feed=am_input_feed)
            mel = mel[0]
            wav = voc_sess.run(output_names=None, input_feed={'logmel': mel})
            N += len(wav[0])
            T += t.elapse
            speed = len(wav[0]) / t.elapse
            rtf = fs / speed
        sf.write(
            str(output_dir / (utt_id + ".wav")),
            np.array(wav)[0],
            samplerate=fs)
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {len(wav[0])}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
    print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")


def parse_args():
    parser = argparse.ArgumentParser(description="Infernce with onnxruntime.")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=['fastspeech2_csmsc', 'speedyspeech_csmsc'],
        help='Choose acoustic model type of tts task.')

    # voc
    parser.add_argument(
        '--voc',
        type=str,
        default='hifigan_csmsc',
        choices=['hifigan_csmsc', 'mb_melgan_csmsc', 'pwgan_csmsc'],
        help='Choose vocoder type of tts task.')
    # other
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument("--test_metadata", type=str, help="test metadata.")
    parser.add_argument("--output_dir", type=str, help="output dir")

    # inference
    parser.add_argument(
        "--use_trt",
        type=str2bool,
        default=False,
        help="Whether to use inference engin TensorRT.", )

    parser.add_argument(
        "--device",
        default="gpu",
        choices=["gpu", "cpu"],
        help="Device selected for inference.", )
    parser.add_argument('--cpu_threads', type=int, default=1)

    args, _ = parser.parse_known_args()
    return args


def main():
    args = parse_args()

    paddle.set_device(args.device)

    ort_predict(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/ort_predict_e2e.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import numpy as np
import paddle
import soundfile as sf
from timer import timer

from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.exps.syn_utils import get_sess
from paddlespeech.t2s.exps.syn_utils import run_frontend
from paddlespeech.t2s.utils import str2bool


def ort_predict(args):

    # frontend
    frontend = get_frontend(
        lang=args.lang,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    sentences = get_sentences(text_file=args.text, lang=args.lang)

    am_name = args.am[:args.am.rindex('_')]
    am_dataset = args.am[args.am.rindex('_') + 1:]
    fs = 24000 if am_dataset != 'ljspeech' else 22050

    am_sess = get_sess(
        model_path=str(Path(args.inference_dir) / (args.am + '.onnx')),
        device=args.device,
        cpu_threads=args.cpu_threads,
        use_trt=args.use_trt)

    # vocoder
    voc_sess = get_sess(
        model_path=str(Path(args.inference_dir) / (args.voc + '.onnx')),
        device=args.device,
        cpu_threads=args.cpu_threads,
        use_trt=args.use_trt)

    merge_sentences = True

    # frontend warmup
    # Loading model cost 0.5+ seconds
    if args.lang == 'zh':
        frontend.get_input_ids(
            "你好，欢迎使用飞桨框架进行深度学习研究！", merge_sentences=merge_sentences)
    else:
        frontend.get_input_ids(
            "hello, thank you, thank you very much",
            merge_sentences=merge_sentences)

    # am warmup
    spk_id = [args.spk_id]
    for T in [27, 38, 54]:
        am_input_feed = {}
        if am_name == 'fastspeech2':
            if args.lang == 'en':
                phone_ids = np.random.randint(1, 78, size=(T, ))
            else:
                phone_ids = np.random.randint(1, 266, size=(T, ))
            am_input_feed.update({'text': phone_ids})
            if am_dataset in {"aishell3", "vctk", "mix", "canton"}:
                am_input_feed.update({'spk_id': spk_id})
        elif am_name == 'speedyspeech':
            phone_ids = np.random.randint(1, 92, size=(T, ))
            tone_ids = np.random.randint(1, 5, size=(T, ))
            am_input_feed.update({'phones': phone_ids, 'tones': tone_ids})
        am_sess.run(None, input_feed=am_input_feed)

    # voc warmup
    for T in [227, 308, 544]:
        data = np.random.rand(T, 80).astype("float32")
        voc_sess.run(None, input_feed={"logmel": data})
    print("warm up done!")

    N = 0
    T = 0
    merge_sentences = False
    get_tone_ids = False
    if am_name == 'speedyspeech':
        get_tone_ids = True
    am_input_feed = {}
    for utt_id, sentence in sentences:
        with timer() as t:
            frontend_dict = run_frontend(
                frontend=frontend,
                text=sentence,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids,
                lang=args.lang)
            phone_ids = frontend_dict['phone_ids']
            flags = 0
            for i in range(len(phone_ids)):
                part_phone_ids = phone_ids[i].numpy()
                if am_name == 'fastspeech2':
                    am_input_feed.update({'text': part_phone_ids})
                    if am_dataset in {"aishell3", "vctk", "mix", "canton"}:
                        am_input_feed.update({'spk_id': spk_id})
                elif am_name == 'speedyspeech':
                    part_tone_ids = frontend_dict['tone_ids'][i].numpy()
                    am_input_feed.update({
                        'phones': part_phone_ids,
                        'tones': part_tone_ids
                    })
                mel = am_sess.run(output_names=None, input_feed=am_input_feed)
                mel = mel[0]
                wav = voc_sess.run(
                    output_names=None, input_feed={'logmel': mel})
                wav = wav[0]
                if flags == 0:
                    wav_all = wav
                    flags = 1
                else:
                    wav_all = np.concatenate([wav_all, wav])
        wav = wav_all
        N += len(wav)
        T += t.elapse
        speed = len(wav) / t.elapse
        rtf = fs / speed
        sf.write(str(output_dir / (utt_id + ".wav")), wav, samplerate=fs)
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {len(wav)}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
    print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")


def parse_args():
    parser = argparse.ArgumentParser(description="Infernce with onnxruntime.")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=[
            'fastspeech2_csmsc',
            'fastspeech2_aishell3',
            'fastspeech2_ljspeech',
            'fastspeech2_vctk',
            'speedyspeech_csmsc',
            'fastspeech2_mix',
            'fastspeech2_male-zh',
            'fastspeech2_male-en',
            'fastspeech2_male-mix',
            'fastspeech2_canton',
        ],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones_dict", type=str, default=None, help="tone vocabulary file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')

    # voc
    parser.add_argument(
        '--voc',
        type=str,
        default='hifigan_csmsc',
        choices=[
            'pwgan_csmsc',
            'pwgan_aishell3',
            'pwgan_ljspeech',
            'pwgan_vctk',
            'hifigan_csmsc',
            'hifigan_aishell3',
            'hifigan_ljspeech',
            'hifigan_vctk',
            'mb_melgan_csmsc',
            'pwgan_male',
            'hifigan_male',
        ],
        help='Choose vocoder type of tts task.')
    # other
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument("--output_dir", type=str, help="output dir")
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en')

    # inference
    parser.add_argument(
        "--use_trt",
        type=str2bool,
        default=False,
        help="Whether to use inference engin TensorRT.", )

    parser.add_argument(
        "--device",
        default="gpu",
        choices=["gpu", "cpu"],
        help="Device selected for inference.", )
    parser.add_argument('--cpu_threads', type=int, default=1)

    args, _ = parser.parse_known_args()
    return args


def main():
    args = parse_args()

    paddle.set_device(args.device)

    ort_predict(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/ort_predict_streaming.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import numpy as np
import paddle
import soundfile as sf
from timer import timer

from paddlespeech.t2s.exps.syn_utils import denorm
from paddlespeech.t2s.exps.syn_utils import get_chunks
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.exps.syn_utils import get_sess
from paddlespeech.t2s.exps.syn_utils import run_frontend
from paddlespeech.t2s.utils import str2bool


def ort_predict(args):

    # frontend
    frontend = get_frontend(
        lang=args.lang,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    sentences = get_sentences(text_file=args.text, lang=args.lang)

    am_name = args.am[:args.am.rindex('_')]
    am_dataset = args.am[args.am.rindex('_') + 1:]
    fs = 24000 if am_dataset != 'ljspeech' else 22050

    # streaming acoustic model
    am_encoder_infer_sess = get_sess(
        model_path=str(
            Path(args.inference_dir) /
            (args.am + '_am_encoder_infer' + '.onnx')),
        device=args.device,
        cpu_threads=args.cpu_threads,
        use_trt=args.use_trt)
    am_decoder_sess = get_sess(
        model_path=str(
            Path(args.inference_dir) / (args.am + '_am_decoder' + '.onnx')),
        device=args.device,
        cpu_threads=args.cpu_threads,
        use_trt=args.use_trt)

    am_postnet_sess = get_sess(
        model_path=str(
            Path(args.inference_dir) / (args.am + '_am_postnet' + '.onnx')),
        device=args.device,
        cpu_threads=args.cpu_threads,
        use_trt=args.use_trt)
    am_mu, am_std = np.load(args.am_stat)

    # vocoder
    voc_sess = get_sess(
        model_path=str(Path(args.inference_dir) / (args.voc + '.onnx')),
        device=args.device,
        cpu_threads=args.cpu_threads,
        use_trt=args.use_trt)

    # frontend warmup
    # Loading model cost 0.5+ seconds
    if args.lang == 'zh':
        frontend.get_input_ids("你好，欢迎使用飞桨框架进行深度学习研究！", merge_sentences=True)
    else:
        print("lang should in be 'zh' here!")

    # am warmup
    for T in [27, 38, 54]:
        phone_ids = np.random.randint(1, 266, size=(T, ))
        am_encoder_infer_sess.run(None, input_feed={'text': phone_ids})

        am_decoder_input = np.random.rand(1, T * 15, 384).astype('float32')
        am_decoder_sess.run(None, input_feed={'xs': am_decoder_input})

        am_postnet_input = np.random.rand(1, 80, T * 15).astype('float32')
        am_postnet_sess.run(None, input_feed={'xs': am_postnet_input})

    # voc warmup
    for T in [227, 308, 544]:
        data = np.random.rand(T, 80).astype("float32")
        voc_sess.run(None, input_feed={"logmel": data})
    print("warm up done!")

    N = 0
    T = 0
    merge_sentences = True
    get_tone_ids = False
    block_size = args.block_size
    pad_size = args.pad_size

    for utt_id, sentence in sentences:
        with timer() as t:
            frontend_dict = run_frontend(
                frontend=frontend,
                text=sentence,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids,
                lang=args.lang)
            phone_ids = frontend_dict['phone_ids']
            # merge_sentences=True here, so we only use the first item of phone_ids
            phone_ids = phone_ids[0].numpy()
            orig_hs = am_encoder_infer_sess.run(
                None, input_feed={'text': phone_ids})
            if args.am_streaming:
                hss = get_chunks(orig_hs[0], block_size, pad_size)
                chunk_num = len(hss)
                mel_list = []
                for i, hs in enumerate(hss):
                    am_decoder_output = am_decoder_sess.run(
                        None, input_feed={'xs': hs})
                    am_postnet_output = am_postnet_sess.run(
                        None,
                        input_feed={
                            'xs': np.transpose(am_decoder_output[0], (0, 2, 1))
                        })
                    am_output_data = am_decoder_output + np.transpose(
                        am_postnet_output[0], (0, 2, 1))
                    normalized_mel = am_output_data[0][0]

                    sub_mel = denorm(normalized_mel, am_mu, am_std)
                    # clip output part of pad
                    if i == 0:
                        sub_mel = sub_mel[:-pad_size]
                    elif i == chunk_num - 1:
                        # 最后一块的右侧一定没有 pad 够
                        sub_mel = sub_mel[pad_size:]
                    else:
                        # 倒数几块的右侧也可能没有 pad 够
                        sub_mel = sub_mel[pad_size:(block_size + pad_size) -
                                          sub_mel.shape[0]]
                    mel_list.append(sub_mel)
                mel = np.concatenate(mel_list, axis=0)
            else:
                am_decoder_output = am_decoder_sess.run(
                    None, input_feed={'xs': orig_hs[0]})
                am_postnet_output = am_postnet_sess.run(
                    None,
                    input_feed={
                        'xs': np.transpose(am_decoder_output[0], (0, 2, 1))
                    })
                am_output_data = am_decoder_output + np.transpose(
                    am_postnet_output[0], (0, 2, 1))
                normalized_mel = am_output_data[0]
                mel = denorm(normalized_mel, am_mu, am_std)
                mel = mel[0]
            # vocoder

            wav = voc_sess.run(output_names=None, input_feed={'logmel': mel})

            N += len(wav[0])
            T += t.elapse
            speed = len(wav[0]) / t.elapse
            rtf = fs / speed
        sf.write(
            str(output_dir / (utt_id + ".wav")),
            np.array(wav)[0],
            samplerate=fs)
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {len(wav[0])}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
    print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")


def parse_args():
    parser = argparse.ArgumentParser(description="Infernce with onnxruntime.")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=['fastspeech2_csmsc'],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        "--am_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training acoustic model."
    )
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones_dict", type=str, default=None, help="tone vocabulary file.")

    # voc
    parser.add_argument(
        '--voc',
        type=str,
        default='hifigan_csmsc',
        choices=['hifigan_csmsc', 'mb_melgan_csmsc', 'pwgan_csmsc'],
        help='Choose vocoder type of tts task.')
    # other
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument("--output_dir", type=str, help="output dir")
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en')

    # inference
    parser.add_argument(
        "--use_trt",
        type=str2bool,
        default=False,
        help="Whether to use inference engin TensorRT.", )

    parser.add_argument(
        "--device",
        default="gpu",
        choices=["gpu", "cpu"],
        help="Device selected for inference.", )
    parser.add_argument('--cpu_threads', type=int, default=1)

    # streaming related
    parser.add_argument(
        "--am_streaming",
        type=str2bool,
        default=False,
        help="whether use streaming acoustic model")
    parser.add_argument(
        "--block_size", type=int, default=42, help="block size of am streaming")
    parser.add_argument(
        "--pad_size", type=int, default=12, help="pad size of am streaming")

    args, _ = parser.parse_known_args()
    return args


def main():
    args = parse_args()

    paddle.set_device(args.device)

    ort_predict(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/speedyspeech/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/speedyspeech/gen_gta_mel.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# generate mels using durations.txt
# for mb melgan finetune
# 长度和原本的 mel 不一致怎么办？
import argparse
import os
from pathlib import Path

import numpy as np
import paddle
import yaml
from tqdm import tqdm
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.preprocess_utils import get_phn_dur
from paddlespeech.t2s.datasets.preprocess_utils import merge_silence
from paddlespeech.t2s.frontend.zh_frontend import Frontend
from paddlespeech.t2s.models.speedyspeech import SpeedySpeech
from paddlespeech.t2s.models.speedyspeech import SpeedySpeechInference
from paddlespeech.t2s.modules.normalizer import ZScore
from paddlespeech.t2s.utils import str2bool


def evaluate(args, speedyspeech_config):
    rootdir = Path(args.rootdir).expanduser()
    assert rootdir.is_dir()

    # construct dataset for evaluation
    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    phone_dict = {}
    for phn, id in phn_id:
        phone_dict[phn] = int(id)

    with open(args.tones_dict, "r") as f:
        tone_id = [line.strip().split() for line in f.readlines()]
    tone_size = len(tone_id)
    print("tone_size:", tone_size)

    frontend = Frontend(
        phone_vocab_path=args.phones_dict, tone_vocab_path=args.tones_dict)

    if args.speaker_dict:
        with open(args.speaker_dict, 'rt') as f:
            spk_id_list = [line.strip().split() for line in f.readlines()]
            spk_num = len(spk_id_list)
    else:
        spk_num = None

    model = SpeedySpeech(
        vocab_size=vocab_size,
        tone_size=tone_size,
        **speedyspeech_config["model"],
        spk_num=spk_num)

    model.set_state_dict(
        paddle.load(args.speedyspeech_checkpoint)["main_params"])
    model.eval()

    stat = np.load(args.speedyspeech_stat)
    mu, std = stat
    mu = paddle.to_tensor(mu)
    std = paddle.to_tensor(std)
    speedyspeech_normalizer = ZScore(mu, std)

    speedyspeech_inference = SpeedySpeechInference(speedyspeech_normalizer,
                                                   model)
    speedyspeech_inference.eval()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    sentences, speaker_set = get_phn_dur(args.dur_file)
    merge_silence(sentences)

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
        # split data into 3 sections
        num_train = 9800
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "aishell3":
        sub_num_dev = 5
        wav_dir = rootdir / "train" / "wav"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*.wav")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    train_wav_files = [
        os.path.basename(str(str_path)) for str_path in train_wav_files
    ]
    dev_wav_files = [
        os.path.basename(str(str_path)) for str_path in dev_wav_files
    ]
    test_wav_files = [
        os.path.basename(str(str_path)) for str_path in test_wav_files
    ]

    for i, utt_id in enumerate(tqdm(sentences)):
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        speaker = sentences[utt_id][2]
        # 裁剪掉开头和结尾的 sil
        if args.cut_sil:
            if phones[0] == "sil" and len(durations) > 1:
                durations = durations[1:]
                phones = phones[1:]
            if phones[-1] == 'sil' and len(durations) > 1:
                durations = durations[:-1]
                phones = phones[:-1]

        phones, tones = frontend._get_phone_tone(phones, get_tone_ids=True)
        if tones:
            tone_ids = frontend._t2id(tones)
            tone_ids = paddle.to_tensor(tone_ids)
        if phones:
            phone_ids = frontend._p2id(phones)
            phone_ids = paddle.to_tensor(phone_ids)

        if args.speaker_dict:
            speaker_id = int(
                [item[1] for item in spk_id_list if speaker == item[0]][0])
            speaker_id = paddle.to_tensor(speaker_id)
        else:
            speaker_id = None

        durations = paddle.to_tensor(np.array(durations))
        durations = paddle.unsqueeze(durations, axis=0)

        # 生成的和真实的可能有 1, 2 帧的差距，但是 batch_fn 会修复
        # split data into 3 sections

        wav_path = utt_id + ".wav"

        if wav_path in train_wav_files:
            sub_output_dir = output_dir / ("train/raw")
        elif wav_path in dev_wav_files:
            sub_output_dir = output_dir / ("dev/raw")
        elif wav_path in test_wav_files:
            sub_output_dir = output_dir / ("test/raw")

        sub_output_dir.mkdir(parents=True, exist_ok=True)

        with paddle.no_grad():
            mel = speedyspeech_inference(
                phone_ids, tone_ids, durations=durations, spk_id=speaker_id)
        np.save(sub_output_dir / (utt_id + "_feats.npy"), mel)


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(
        description="Synthesize with speedyspeech & parallel wavegan.")
    parser.add_argument(
        "--dataset",
        default="baker",
        type=str,
        help="name of dataset, should in {baker, ljspeech, vctk} now")
    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")
    parser.add_argument(
        "--speedyspeech-config", type=str, help="speedyspeech config file.")
    parser.add_argument(
        "--speedyspeech-checkpoint",
        type=str,
        help="speedyspeech checkpoint to load.")
    parser.add_argument(
        "--speedyspeech-stat",
        type=str,
        help="mean and standard deviation used to normalize spectrogram when training speedyspeech."
    )

    parser.add_argument(
        "--phones-dict",
        type=str,
        default="phone_id_map.txt",
        help="phone vocabulary file.")
    parser.add_argument(
        "--tones-dict",
        type=str,
        default="tone_id_map.txt",
        help="tone vocabulary file.")
    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")

    parser.add_argument(
        "--dur-file", default=None, type=str, help="path to durations.txt.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    parser.add_argument(
        "--cut-sil",
        type=str2bool,
        default=True,
        help="whether cut sil in the edge of audio")

    args = parser.parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    with open(args.speedyspeech_config) as f:
        speedyspeech_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(speedyspeech_config)

    evaluate(args, speedyspeech_config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/speedyspeech/inference.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# remain for chains
import argparse
from pathlib import Path

import soundfile as sf
from paddle import inference

import paddlespeech.utils
from paddlespeech.t2s.frontend.zh_frontend import Frontend


def main():
    parser = argparse.ArgumentParser(
        description="Paddle Infernce with speedyspeech & parallel wavegan.")
    parser.add_argument(
        "--inference-dir", type=str, help="dir to save inference models")
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument("--output-dir", type=str, help="output dir")
    parser.add_argument(
        "--phones-dict",
        type=str,
        default="phones.txt",
        help="phone vocabulary file.")
    parser.add_argument(
        "--tones-dict",
        type=str,
        default="tones.txt",
        help="tone vocabulary file.")

    args, _ = parser.parse_known_args()

    frontend = Frontend(
        phone_vocab_path=args.phones_dict, tone_vocab_path=args.tones_dict)
    print("frontend done!")

    # after paddle 3.0, support new inference interface
    if paddlespeech.utils.satisfy_paddle_version('3.0.0-beta'):
        speedyspeech_config = inference.Config(
            str(Path(args.inference_dir)), "speedyspeech")
    else:
        speedyspeech_config = inference.Config(
            str(Path(args.inference_dir) / "speedyspeech.pdmodel"),
            str(Path(args.inference_dir) / "speedyspeech.pdiparams"))

    speedyspeech_config.enable_use_gpu(100, 0)
    speedyspeech_config.enable_memory_optim()
    speedyspeech_predictor = inference.create_predictor(speedyspeech_config)

    # after paddle 3.0, support new inference interface
    if paddlespeech.utils.satisfy_paddle_version('3.0.0-beta'):
        pwg_config = inference.Config(str(Path(args.inference_dir)), "pwg")
    else:
        pwg_config = inference.Config(
            str(Path(args.inference_dir) / "pwg.pdmodel"),
            str(Path(args.inference_dir) / "pwg.pdiparams"))

    pwg_config.enable_use_gpu(100, 0)
    pwg_config.enable_memory_optim()
    pwg_predictor = inference.create_predictor(pwg_config)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    sentences = []

    with open(args.text, 'rt') as f:
        for line in f:
            items = line.strip().split()
            utt_id = items[0]
            sentence = "".join(items[1:])
            sentences.append((utt_id, sentence))

    for utt_id, sentence in sentences:
        input_ids = frontend.get_input_ids(
            sentence, merge_sentences=True, get_tone_ids=True)
        phone_ids = input_ids["phone_ids"]
        tone_ids = input_ids["tone_ids"]
        phones = phone_ids[0].numpy()
        tones = tone_ids[0].numpy()

        input_names = speedyspeech_predictor.get_input_names()
        phones_handle = speedyspeech_predictor.get_input_handle(input_names[0])
        tones_handle = speedyspeech_predictor.get_input_handle(input_names[1])

        phones_handle.reshape(phones.shape)
        phones_handle.copy_from_cpu(phones)
        tones_handle.reshape(tones.shape)
        tones_handle.copy_from_cpu(tones)

        speedyspeech_predictor.run()
        output_names = speedyspeech_predictor.get_output_names()
        output_handle = speedyspeech_predictor.get_output_handle(
            output_names[0])
        output_data = output_handle.copy_to_cpu()

        input_names = pwg_predictor.get_input_names()
        mel_handle = pwg_predictor.get_input_handle(input_names[0])
        mel_handle.reshape(output_data.shape)
        mel_handle.copy_from_cpu(output_data)

        pwg_predictor.run()
        output_names = pwg_predictor.get_output_names()
        output_handle = pwg_predictor.get_output_handle(output_names[0])
        wav = output_data = output_handle.copy_to_cpu()

        sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=24000)

        print(f"{utt_id} done!")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/speedyspeech/normalize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize feature files and dump them."""
import argparse
import logging
from operator import itemgetter
from pathlib import Path

import jsonlines
import numpy as np
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.utils import str2bool


def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
    )
    parser.add_argument(
        "--metadata",
        type=str,
        required=True,
        help="directory including feature files to be normalized. "
        "you need to specify either *-scp or rootdir.")
    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump normalized feature files.")
    parser.add_argument(
        "--stats", type=str, required=True, help="statistics file.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones-dict", type=str, default=None, help="tone vocabulary file.")
    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")

    parser.add_argument(
        "--use-relative-path",
        type=str2bool,
        default=False,
        help="whether use relative path in metadata")
    args = parser.parse_args()

    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    # get dataset
    with jsonlines.open(args.metadata, 'r') as reader:
        metadata = list(reader)
    if args.use_relative_path:
        # if use_relative_path in preprocess, covert it to absolute path here
        metadata_dir = Path(args.metadata).parent
        for item in metadata:
            item["feats"] = str(metadata_dir / item["feats"])

    dataset = DataTable(
        metadata, converters={
            'feats': np.load,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    # restore scaler
    scaler = StandardScaler()
    scaler.mean_ = np.load(args.stats)[0]
    scaler.scale_ = np.load(args.stats)[1]
    # from version 0.23.0, this information is needed
    scaler.n_features_in_ = scaler.mean_.shape[0]

    vocab_phones = {}
    with open(args.phones_dict, 'rt') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    for phn, id in phn_id:
        vocab_phones[phn] = int(id)

    vocab_tones = {}
    with open(args.tones_dict, 'rt') as f:
        tone_id = [line.strip().split() for line in f.readlines()]
    for tone, id in tone_id:
        vocab_tones[tone] = int(id)

    vocab_speaker = {}
    with open(args.speaker_dict, 'rt') as f:
        spk_id = [line.strip().split() for line in f.readlines()]
    for spk, id in spk_id:
        vocab_speaker[spk] = int(id)

    # process each file
    output_metadata = []

    for item in tqdm(dataset):
        utt_id = item['utt_id']
        mel = item['feats']
        # normalize
        mel = scaler.transform(mel)

        # save
        mel_path = dumpdir / f"{utt_id}_feats.npy"
        np.save(mel_path, mel.astype(np.float32), allow_pickle=False)
        phone_ids = [vocab_phones[p] for p in item['phones']]
        tone_ids = [vocab_tones[p] for p in item['tones']]
        spk_id = vocab_speaker[item["speaker"]]
        if args.use_relative_path:
            # convert absolute path to relative path:
            mel_path = mel_path.relative_to(dumpdir)
        output_metadata.append({
            'utt_id': utt_id,
            "spk_id": spk_id,
            'phones': phone_ids,
            'tones': tone_ids,
            'num_phones': item['num_phones'],
            'num_frames': item['num_frames'],
            'durations': item['durations'],
            'feats': str(mel_path),
        })
    output_metadata.sort(key=itemgetter('utt_id'))
    output_metadata_path = Path(args.dumpdir) / "metadata.jsonl"
    with jsonlines.open(output_metadata_path, 'w') as writer:
        for item in output_metadata:
            writer.write(item)
    logging.info(f"metadata dumped into {output_metadata_path}")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/speedyspeech/preprocess.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import re
from concurrent.futures import ThreadPoolExecutor
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines
import librosa
import numpy as np
import tqdm
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.datasets.preprocess_utils import compare_duration_and_mel_length
from paddlespeech.t2s.datasets.preprocess_utils import get_phn_dur
from paddlespeech.t2s.datasets.preprocess_utils import get_phones_tones
from paddlespeech.t2s.datasets.preprocess_utils import get_spk_id_map
from paddlespeech.t2s.datasets.preprocess_utils import merge_silence
from paddlespeech.t2s.utils import str2bool


def process_sentence(config: Dict[str, Any],
                     fp: Path,
                     sentences: Dict,
                     output_dir: Path,
                     mel_extractor=None,
                     cut_sil: bool=True):
    utt_id = fp.stem
    record = None
    if utt_id in sentences:
        # reading, resampling may occur
        wav, _ = librosa.load(str(fp), sr=config.fs)
        if len(wav.shape) != 1:
            return record
        max_value = np.abs(wav).max()
        if max_value > 1.0:
            wav = wav / max_value
        assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
        assert np.abs(wav).max(
        ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        speaker = sentences[utt_id][2]
        d_cumsum = np.pad(np.array(durations).cumsum(0), (1, 0), 'constant')
        # little imprecise than use *.TextGrid directly
        times = librosa.frames_to_time(
            d_cumsum, sr=config.fs, hop_length=config.n_shift)
        if cut_sil:
            start = 0
            end = d_cumsum[-1]
            if phones[0] == "sil" and len(durations) > 1:
                start = times[1]
                durations = durations[1:]
                phones = phones[1:]
            if phones[-1] == 'sil' and len(durations) > 1:
                end = times[-2]
                durations = durations[:-1]
                phones = phones[:-1]
            sentences[utt_id][0] = phones
            sentences[utt_id][1] = durations
            start, end = librosa.time_to_samples([start, end], sr=config.fs)
            wav = wav[start:end]

        # extract mel feats
        logmel = mel_extractor.get_log_mel_fbank(wav)
        # change duration according to mel_length
        compare_duration_and_mel_length(sentences, utt_id, logmel)
        # utt_id may be popped in compare_duration_and_mel_length
        if utt_id not in sentences:
            return None
        labels = sentences[utt_id][0]
        # extract phone and duration
        phones = []
        tones = []
        for label in labels:
            # split tone from finals
            match = re.match(r'^(\w+)([012345])$', label)
            if match:
                phones.append(match.group(1))
                tones.append(match.group(2))
            else:
                phones.append(label)
                tones.append('0')
        durations = sentences[utt_id][1]
        num_frames = logmel.shape[0]
        assert sum(durations) == num_frames
        assert len(phones) == len(tones) == len(durations)

        mel_path = output_dir / (utt_id + "_feats.npy")
        np.save(mel_path, logmel)  # (num_frames, n_mels)
        record = {
            "utt_id": utt_id,
            "phones": phones,
            "tones": tones,
            "speaker": speaker,
            "num_phones": len(phones),
            "num_frames": num_frames,
            "durations": durations,
            "feats": str(mel_path),  # Path object
        }
    return record


def process_sentences(config,
                      fps: List[Path],
                      sentences: Dict,
                      output_dir: Path,
                      mel_extractor=None,
                      nprocs: int=1,
                      cut_sil: bool=True,
                      use_relative_path: bool=False):

    if nprocs == 1:
        results = []
        for fp in tqdm.tqdm(fps, total=len(fps)):
            record = process_sentence(
                config=config,
                fp=fp,
                sentences=sentences,
                output_dir=output_dir,
                mel_extractor=mel_extractor,
                cut_sil=cut_sil)
            if record:
                results.append(record)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            futures = []
            with tqdm.tqdm(total=len(fps)) as progress:
                for fp in fps:
                    future = pool.submit(process_sentence, config, fp,
                                         sentences, output_dir, mel_extractor,
                                         cut_sil)
                    future.add_done_callback(lambda p: progress.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    record = ft.result()
                    if record:
                        results.append(record)

    results.sort(key=itemgetter("utt_id"))
    output_dir = Path(output_dir)
    metadata_path = output_dir / "metadata.jsonl"
    # NOTE: use relative path to the meta jsonlines file for Full Chain Project
    with jsonlines.open(metadata_path, 'w') as writer:
        for item in results:
            if use_relative_path:
                item["feats"] = str(Path(item["feats"]).relative_to(output_dir))
            writer.write(item)
    print("Done")


def main():
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--dataset",
        default="baker",
        type=str,
        help="name of dataset, should in {baker} now")

    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")
    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump feature files.")

    parser.add_argument(
        "--dur-file",
        default=None,
        type=str,
        help="path to baker durations.txt.")

    parser.add_argument("--config", type=str, help="fastspeech2 config file.")

    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")

    parser.add_argument(
        "--cut-sil",
        type=str2bool,
        default=True,
        help="whether cut sil in the edge of audio")

    parser.add_argument(
        "--use-relative-path",
        type=str2bool,
        default=False,
        help="whether use relative path in metadata")

    args = parser.parse_args()

    rootdir = Path(args.rootdir).expanduser()
    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)
    dur_file = Path(args.dur_file).expanduser()

    assert rootdir.is_dir()
    assert dur_file.is_file()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    sentences, speaker_set = get_phn_dur(dur_file)

    merge_silence(sentences)
    phone_id_map_path = dumpdir / "phone_id_map.txt"
    tone_id_map_path = dumpdir / "tone_id_map.txt"
    get_phones_tones(sentences, phone_id_map_path, tone_id_map_path,
                     args.dataset)
    speaker_id_map_path = dumpdir / "speaker_id_map.txt"
    get_spk_id_map(speaker_set, speaker_id_map_path)

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
        # split data into 3 sections
        num_train = 9800
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]

    train_dump_dir = dumpdir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dumpdir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dumpdir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    # Extractor
    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax)

    # process for the 3 sections
    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            sentences=sentences,
            output_dir=train_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            use_relative_path=args.use_relative_path)
    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            sentences=sentences,
            output_dir=dev_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            use_relative_path=args.use_relative_path)
    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            sentences=sentences,
            output_dir=test_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            use_relative_path=args.use_relative_path)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/speedyspeech/synthesize_e2e.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# remain for chains
import argparse
import logging
import os
from pathlib import Path

import numpy as np
import paddle
import soundfile as sf
import yaml
from paddle import jit
from paddle.static import InputSpec
from yacs.config import CfgNode

from paddlespeech.t2s.frontend.zh_frontend import Frontend
from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
from paddlespeech.t2s.models.parallel_wavegan import PWGInference
from paddlespeech.t2s.models.speedyspeech import SpeedySpeech
from paddlespeech.t2s.models.speedyspeech import SpeedySpeechInference
from paddlespeech.t2s.modules.normalizer import ZScore


def evaluate(args, speedyspeech_config, pwg_config):
    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for evaluation
    sentences = []
    with open(args.text, 'rt', encoding='utf-8') as f:
        for line in f:
            items = line.strip().split()
            utt_id = items[0]
            sentence = "".join(items[1:])
            sentences.append((utt_id, sentence))

    with open(args.phones_dict, 'rt', encoding='utf-8') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)
    with open(args.tones_dict, 'rt', encoding='utf-8') as f:
        tone_id = [line.strip().split() for line in f.readlines()]
    tone_size = len(tone_id)
    print("tone_size:", tone_size)

    model = SpeedySpeech(
        vocab_size=vocab_size,
        tone_size=tone_size,
        **speedyspeech_config["model"])
    model.set_state_dict(
        paddle.load(args.speedyspeech_checkpoint)["main_params"])
    model.eval()

    vocoder = PWGGenerator(**pwg_config["generator_params"])
    vocoder.set_state_dict(paddle.load(args.pwg_checkpoint)["generator_params"])
    vocoder.remove_weight_norm()
    vocoder.eval()
    print("model done!")

    stat = np.load(args.speedyspeech_stat)
    mu, std = stat
    mu = paddle.to_tensor(mu)
    std = paddle.to_tensor(std)
    speedyspeech_normalizer = ZScore(mu, std)

    stat = np.load(args.pwg_stat)
    mu, std = stat
    mu = paddle.to_tensor(mu)
    std = paddle.to_tensor(std)
    pwg_normalizer = ZScore(mu, std)

    speedyspeech_inference = SpeedySpeechInference(speedyspeech_normalizer,
                                                   model)
    speedyspeech_inference.eval()
    speedyspeech_inference = jit.to_static(
        speedyspeech_inference,
        input_spec=[
            InputSpec([-1], dtype=paddle.int64), InputSpec(
                [-1], dtype=paddle.int64)
        ])
    paddle.jit.save(speedyspeech_inference,
                    os.path.join(args.inference_dir, "speedyspeech"))
    speedyspeech_inference = paddle.jit.load(
        os.path.join(args.inference_dir, "speedyspeech"))

    pwg_inference = PWGInference(pwg_normalizer, vocoder)
    pwg_inference.eval()
    pwg_inference = jit.to_static(
        pwg_inference, input_spec=[
            InputSpec([-1, 80], dtype=paddle.float32),
        ])
    paddle.jit.save(pwg_inference, os.path.join(args.inference_dir, "pwg"))
    pwg_inference = paddle.jit.load(os.path.join(args.inference_dir, "pwg"))

    frontend = Frontend(
        phone_vocab_path=args.phones_dict, tone_vocab_path=args.tones_dict)
    print("frontend done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    for utt_id, sentence in sentences:
        input_ids = frontend.get_input_ids(
            sentence, merge_sentences=True, get_tone_ids=True)
        phone_ids = input_ids["phone_ids"]
        tone_ids = input_ids["tone_ids"]

        flags = 0
        for i in range(len(phone_ids)):
            part_phone_ids = phone_ids[i]
            part_tone_ids = tone_ids[i]
            with paddle.no_grad():
                mel = speedyspeech_inference(part_phone_ids, part_tone_ids)
                temp_wav = pwg_inference(mel)
            if flags == 0:
                wav = temp_wav
                flags = 1
            else:
                wav = paddle.concat([wav, temp_wav])
        sf.write(
            output_dir / (utt_id + ".wav"),
            wav.numpy(),
            samplerate=speedyspeech_config.fs)
        print(f"{utt_id} done!")


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(
        description="Synthesize with speedyspeech & parallel wavegan.")
    parser.add_argument(
        "--speedyspeech-config", type=str, help="config file for speedyspeech.")
    parser.add_argument(
        "--speedyspeech-checkpoint",
        type=str,
        help="speedyspeech checkpoint to load.")
    parser.add_argument(
        "--speedyspeech-stat",
        type=str,
        help="mean and standard deviation used to normalize spectrogram when training speedyspeech."
    )
    parser.add_argument(
        "--pwg-config", type=str, help="config file for parallelwavegan.")
    parser.add_argument(
        "--pwg-checkpoint",
        type=str,
        help="parallel wavegan checkpoint to load.")
    parser.add_argument(
        "--pwg-stat",
        type=str,
        help="mean and standard deviation used to normalize spectrogram when training speedyspeech."
    )
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones-dict", type=str, default=None, help="tone vocabulary file.")
    parser.add_argument("--output-dir", type=str, help="output dir")
    parser.add_argument(
        "--inference-dir", type=str, help="dir to save inference models")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu or xpu.")
    parser.add_argument(
        "--nxpu",
        type=int,
        default=0,
        help="if nxpu == 0 and ngpu == 0, use cpu.")

    args, _ = parser.parse_known_args()

    if args.ngpu == 0:
        if args.nxpu == 0:
            paddle.set_device("cpu")
        else:
            paddle.set_device("xpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    with open(args.speedyspeech_config) as f:
        speedyspeech_config = CfgNode(yaml.safe_load(f))
    with open(args.pwg_config) as f:
        pwg_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(speedyspeech_config)
    print(pwg_config)

    evaluate(args, speedyspeech_config, pwg_config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/speedyspeech/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import speedyspeech_multi_spk_batch_fn
from paddlespeech.t2s.datasets.am_batch_fn import speedyspeech_single_spk_batch_fn
from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.models.speedyspeech import SpeedySpeech
from paddlespeech.t2s.models.speedyspeech import SpeedySpeechEvaluator
from paddlespeech.t2s.models.speedyspeech import SpeedySpeechUpdater
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.optimizer import build_optimizers
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer
from paddlespeech.t2s.utils import str2bool


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    world_size = paddle.distributed.get_world_size()
    if paddle.is_compiled_with_cuda() and args.ngpu > 0:
        paddle.set_device("gpu")
        if world_size > 1:
            paddle.distributed.init_parallel_env()
    elif paddle.is_compiled_with_xpu() and args.nxpu > 0:
        paddle.device.set_device("xpu")
    elif args.nnpu > 0:
        paddle.device.set_device("npu")
        if world_size > 1:
            paddle.distributed.init_parallel_env()
    elif args.nmlu > 0:
        paddle.device.set_device("mlu")
    else:
        paddle.set_device("cpu")

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )

    fields = [
        "phones", "tones", "num_phones", "num_frames", "feats", "durations"
    ]

    spk_num = None
    if args.speaker_dict is not None:
        print("multiple speaker speedyspeech!")
        collate_fn = speedyspeech_multi_spk_batch_fn
        with open(args.speaker_dict, 'rt', encoding='utf-8') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
        fields += ["spk_id"]
    else:
        print("single speaker speedyspeech!")
        collate_fn = speedyspeech_single_spk_batch_fn
    print("spk_num:", spk_num)

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    if args.use_relative_path:
        # if use_relative_path in preprocess, covert it to absolute path here
        metadata_dir = Path(args.train_metadata).parent
        for item in train_metadata:
            item["feats"] = str(metadata_dir / item["feats"])

    train_dataset = DataTable(
        data=train_metadata,
        fields=fields,
        converters={
            "feats": np.load,
        }, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    if args.use_relative_path:
        # if use_relative_path in preprocess, covert it to absolute path here
        metadata_dir = Path(args.dev_metadata).parent
        for item in dev_metadata:
            item["feats"] = str(metadata_dir / item["feats"])

    dev_dataset = DataTable(
        data=dev_metadata,
        fields=fields,
        converters={
            "feats": np.load,
        }, )

    # collate function and dataloader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)
    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)
    dev_dataloader = DataLoader(
        dev_dataset,
        shuffle=False,
        drop_last=False,
        batch_size=config.batch_size,
        collate_fn=collate_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")
    with open(args.phones_dict, 'rt', encoding='utf-8') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)
    with open(args.tones_dict, 'rt', encoding='utf-8') as f:
        tone_id = [line.strip().split() for line in f.readlines()]
    tone_size = len(tone_id)
    print("tone_size:", tone_size)

    model = SpeedySpeech(
        vocab_size=vocab_size,
        tone_size=tone_size,
        spk_num=spk_num,
        **config["model"])
    if world_size > 1:
        model = DataParallel(model)
    print("model done!")
    optimizer = build_optimizers(model, **config["optimizer"])
    print("optimizer done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = SpeedySpeechUpdater(
        model=model,
        optimizer=optimizer,
        dataloader=train_dataloader,
        output_dir=output_dir)

    trainer = Trainer(updater, (config.max_epoch, 'epoch'), output_dir)

    evaluator = SpeedySpeechEvaluator(
        model, dev_dataloader, output_dir=output_dir)

    if dist.get_rank() == 0:
        trainer.extend(evaluator, trigger=(1, "epoch"))
        trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots), trigger=(1, 'epoch'))
    trainer.run()


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(
        description="Train a Speedyspeech model with a single speaker dataset.")
    parser.add_argument("--config", type=str, help="config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--nxpu",
        type=int,
        default=0,
        help="if wish to use xpu, set ngpu == 0 and nxpu > 0, otherwise use gpu, npu, mlu or cpu."
    )
    parser.add_argument(
        "--nnpu",
        type=int,
        default=0,
        help="if wish to use npu, set ngpu == 0 and nnpu > 0, otherwise use gpu, xpu, mlu or cpu."
    )
    parser.add_argument(
        "--nmlu",
        type=int,
        default=1,
        help="if wish to use npu, set ngpu == 0 and nmlu > 0, otherwise use gpu, xpu, npu or cpu."
    )
    parser.add_argument(
        "--ngpu",
        type=int,
        default=1,
        help="if wish to use gpu, set ngpu > 0, otherwise use xpu, npu or cpu.")

    parser.add_argument(
        "--use-relative-path",
        type=str2bool,
        default=False,
        help="whether use relative path in metadata")

    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")

    parser.add_argument(
        "--tones-dict", type=str, default=None, help="tone vocabulary file.")

    parser.add_argument(
        "--speaker-dict",
        type=str,
        default=None,
        help="speaker id map file for multiple speaker model.")

    # 这里可以多传入 max_epoch 等
    args, rest = parser.parse_known_args()

    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    if rest:
        extra = []
        # to support key=value format
        for item in rest:
            # remove "--"
            item = item[2:]
            extra.extend(item.split("=", maxsplit=1))
        config.merge_from_list(extra)

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/starganv2_vc/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/starganv2_vc/normalize.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize feature files and dump them."""
import argparse
import logging
from operator import itemgetter
from pathlib import Path

import jsonlines
import numpy as np
import tqdm

from paddlespeech.t2s.datasets.data_table import DataTable


def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
    )
    parser.add_argument(
        "--metadata",
        type=str,
        required=True,
        help="directory including feature files to be normalized. "
        "you need to specify either *-scp or rootdir.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump normalized feature files.")

    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")

    args = parser.parse_args()

    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    # get dataset
    with jsonlines.open(args.metadata, 'r') as reader:
        metadata = list(reader)
    dataset = DataTable(
        metadata, converters={
            "speech": np.load,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    vocab_speaker = {}
    with open(args.speaker_dict, 'rt') as f:
        spk_id = [line.strip().split() for line in f.readlines()]
    for spk, id in spk_id:
        vocab_speaker[spk] = int(id)

    # process each file
    output_metadata = []

    for item in tqdm.tqdm(dataset):
        utt_id = item['utt_id']
        speech = item['speech']

        # normalize
        # 这里暂时写死
        mean, std = -4, 4
        speech = (speech - mean) / std
        speech_path = dumpdir / f"{utt_id}_speech.npy"
        np.save(speech_path, speech.astype(np.float32), allow_pickle=False)

        spk_id = vocab_speaker[item["speaker"]]
        record = {
            "utt_id": item['utt_id'],
            "spk_id": spk_id,
            "speech": str(speech_path),
        }

        output_metadata.append(record)
    output_metadata.sort(key=itemgetter('utt_id'))
    output_metadata_path = Path(args.dumpdir) / "metadata.jsonl"
    with jsonlines.open(output_metadata_path, 'w') as writer:
        for item in output_metadata:
            writer.write(item)
    logging.info(f"metadata dumped into {output_metadata_path}")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/starganv2_vc/preprocess.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines
import librosa
import numpy as np
import tqdm
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.datasets.preprocess_utils import get_spk_id_map

speaker_set = set()


def process_sentence(config: Dict[str, Any],
                     fp: Path,
                     output_dir: Path,
                     mel_extractor=None):
    utt_id = fp.stem
    # for vctk
    if utt_id.endswith("_mic2"):
        utt_id = utt_id[:-5]
        speaker = utt_id.split('_')[0]
        speaker_set.add(speaker)
    # 需要额外获取 speaker
    record = None
    # reading, resampling may occur
    # 源码的 bug, 读取的时候按照 24000 读取，但是提取 mel 的时候按照 16000 提取
    # 具体参考 https://github.com/PaddlePaddle/PaddleSpeech/blob/c7d24ba42c377fe4c0765c6b1faa202a9aeb136f/paddlespeech/t2s/exps/starganv2_vc/vc.py#L165
    # 之后需要换成按照 24000 读取和按照 24000 提取 mel
    wav, _ = librosa.load(str(fp), sr=24000)
    max_value = np.abs(wav).max()
    if max_value > 1.0:
        wav = wav / max_value
    assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
    assert np.abs(
        wav).max() <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
    # extract mel feats
    # 注意这里 base = 'e', 后续需要换成 base='10', 我们其他 TTS 模型都是 base='10'
    logmel = mel_extractor.get_log_mel_fbank(wav, base='e')
    mel_path = output_dir / (utt_id + "_speech.npy")
    np.save(mel_path, logmel)
    record = {"utt_id": utt_id, "speech": str(mel_path), "speaker": speaker}
    return record


def process_sentences(
        config,
        fps: List[Path],
        output_dir: Path,
        mel_extractor=None,
        nprocs: int=1, ):
    if nprocs == 1:
        results = []
        for fp in tqdm.tqdm(fps, total=len(fps)):
            record = process_sentence(
                config=config,
                fp=fp,
                output_dir=output_dir,
                mel_extractor=mel_extractor)
            if record:
                results.append(record)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            futures = []
            with tqdm.tqdm(total=len(fps)) as progress:
                for fp in fps:
                    future = pool.submit(process_sentence, config, fp,
                                         output_dir, mel_extractor)
                    future.add_done_callback(lambda p: progress.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    record = ft.result()
                    if record:
                        results.append(record)

    results.sort(key=itemgetter("utt_id"))
    with jsonlines.open(output_dir / "metadata.jsonl", 'w') as writer:
        for item in results:
            writer.write(item)
    print("Done")


def main():
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--dataset",
        default="vctk",
        type=str,
        help="name of dataset, should in {vctk} now")

    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump feature files.")

    parser.add_argument("--config", type=str, help="StarGANv2VC config file.")

    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")

    args = parser.parse_args()

    rootdir = Path(args.rootdir).expanduser()
    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    assert rootdir.is_dir()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    if args.dataset == "vctk":
        sub_num_dev = 5
        wav_dir = rootdir / "wav48_silence_trimmed"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        # only for test
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*_mic2.flac")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    else:
        print("dataset should in {vctk} now!")

    train_dump_dir = dumpdir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dumpdir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dumpdir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    # Extractor
    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax,
        # None here
        norm=config.norm,
        htk=config.htk,
        power=config.power)

    # process for the 3 sections
    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            output_dir=train_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu)
    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            output_dir=dev_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu)
    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            output_dir=test_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu)

    speaker_id_map_path = dumpdir / "speaker_id_map.txt"
    get_spk_id_map(speaker_set, speaker_id_map_path)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/starganv2_vc/train.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.optimizer import AdamW
from paddle.optimizer.lr import OneCycleLR
from yacs.config import CfgNode

from paddlespeech.cli.utils import download_and_decompress
from paddlespeech.resource.pretrained_models import StarGANv2VC_source
from paddlespeech.t2s.datasets.am_batch_fn import build_starganv2_vc_collate_fn
from paddlespeech.t2s.datasets.data_table import StarGANv2VCDataTable
from paddlespeech.t2s.models.starganv2_vc import ASRCNN
from paddlespeech.t2s.models.starganv2_vc import Discriminator
from paddlespeech.t2s.models.starganv2_vc import Generator
from paddlespeech.t2s.models.starganv2_vc import JDCNet
from paddlespeech.t2s.models.starganv2_vc import MappingNetwork
from paddlespeech.t2s.models.starganv2_vc import StarGANv2VCEvaluator
from paddlespeech.t2s.models.starganv2_vc import StarGANv2VCUpdater
from paddlespeech.t2s.models.starganv2_vc import StyleEncoder
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer
from paddlespeech.utils.env import MODEL_HOME


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    world_size = paddle.distributed.get_world_size()
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
        if world_size > 1:
            paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )
    # to edit
    fields = ["speech", "speech_lengths"]
    converters = {"speech": np.load}

    collate_fn = build_starganv2_vc_collate_fn(
        latent_dim=config['mapping_network_params']['latent_dim'],
        max_mel_length=config['max_mel_length'])

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = StarGANv2VCDataTable(data=train_metadata)
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = StarGANv2VCDataTable(data=dev_metadata)

    # collate function and dataloader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)

    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        shuffle=False,
        drop_last=False,
        batch_size=config.batch_size,
        collate_fn=collate_fn,
        num_workers=config.num_workers)

    print("dataloaders done!")

    # load model
    model_version = '1.0'
    uncompress_path = download_and_decompress(StarGANv2VC_source[model_version],
                                              MODEL_HOME)
    # 根据 speaker 的个数修改 num_domains
    # 源码的预训练模型和 default.yaml 里面默认是 20
    if args.speaker_dict is not None:
        with open(args.speaker_dict, 'rt', encoding='utf-8') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
        print("spk_num:", spk_num)
        config['mapping_network_params']['num_domains'] = spk_num
        config['style_encoder_params']['num_domains'] = spk_num
        config['discriminator_params']['num_domains'] = spk_num

    generator = Generator(**config['generator_params'])
    mapping_network = MappingNetwork(**config['mapping_network_params'])
    style_encoder = StyleEncoder(**config['style_encoder_params'])
    discriminator = Discriminator(**config['discriminator_params'])

    # load pretrained model
    jdc_model_dir = os.path.join(uncompress_path, 'jdcnet.pdz')
    asr_model_dir = os.path.join(uncompress_path, 'asr.pdz')

    F0_model = JDCNet(num_class=1, seq_len=config['max_mel_length'])
    F0_model.set_state_dict(paddle.load(jdc_model_dir)['main_params'])
    F0_model.eval()

    asr_model = ASRCNN(**config['asr_params'])
    asr_model.set_state_dict(paddle.load(asr_model_dir)['main_params'])
    asr_model.eval()

    if world_size > 1:
        generator = DataParallel(generator)
        discriminator = DataParallel(discriminator)
    print("models done!")

    lr_schedule_g = OneCycleLR(**config["generator_scheduler_params"])
    optimizer_g = AdamW(
        learning_rate=lr_schedule_g,
        parameters=generator.parameters(),
        **config["generator_optimizer_params"])

    lr_schedule_s = OneCycleLR(**config["style_encoder_scheduler_params"])
    optimizer_s = AdamW(
        learning_rate=lr_schedule_s,
        parameters=style_encoder.parameters(),
        **config["style_encoder_optimizer_params"])

    lr_schedule_m = OneCycleLR(**config["mapping_network_scheduler_params"])
    optimizer_m = AdamW(
        learning_rate=lr_schedule_m,
        parameters=mapping_network.parameters(),
        **config["mapping_network_optimizer_params"])

    lr_schedule_d = OneCycleLR(**config["discriminator_scheduler_params"])
    optimizer_d = AdamW(
        learning_rate=lr_schedule_d,
        parameters=discriminator.parameters(),
        **config["discriminator_optimizer_params"])
    print("optimizers done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = StarGANv2VCUpdater(
        models={
            "generator": generator,
            "style_encoder": style_encoder,
            "mapping_network": mapping_network,
            "discriminator": discriminator,
            "F0_model": F0_model,
            "asr_model": asr_model,
        },
        optimizers={
            "generator": optimizer_g,
            "style_encoder": optimizer_s,
            "mapping_network": optimizer_m,
            "discriminator": optimizer_d,
        },
        schedulers={
            "generator": lr_schedule_g,
            "style_encoder": lr_schedule_s,
            "mapping_network": lr_schedule_m,
            "discriminator": lr_schedule_d,
        },
        dataloader=train_dataloader,
        g_loss_params=config.loss_params.g_loss,
        d_loss_params=config.loss_params.d_loss,
        adv_cls_epoch=config.loss_params.adv_cls_epoch,
        con_reg_epoch=config.loss_params.con_reg_epoch,
        output_dir=output_dir)

    evaluator = StarGANv2VCEvaluator(
        models={
            "generator": generator,
            "style_encoder": style_encoder,
            "mapping_network": mapping_network,
            "discriminator": discriminator,
            "F0_model": F0_model,
            "asr_model": asr_model,
        },
        dataloader=dev_dataloader,
        g_loss_params=config.loss_params.g_loss,
        d_loss_params=config.loss_params.d_loss,
        adv_cls_epoch=config.loss_params.adv_cls_epoch,
        con_reg_epoch=config.loss_params.con_reg_epoch,
        output_dir=output_dir)

    trainer = Trainer(updater, (config.max_epoch, 'epoch'), output_dir)

    if dist.get_rank() == 0:
        trainer.extend(evaluator, trigger=(1, "epoch"))
        trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots), trigger=(1, 'epoch'))
    print("Trainer Done!")

    trainer.run()


def main():
    # parse args and config and redirect to train_sp

    parser = argparse.ArgumentParser(description="Train a HiFiGAN model.")
    parser.add_argument("--config", type=str, help="HiFiGAN config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument(
        "--speaker-dict",
        type=str,
        default=None,
        help="speaker id map file for multiple speaker model.")

    args = parser.parse_args()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/starganv2_vc/vc.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import time
from pathlib import Path

import librosa
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode

from paddlespeech.cli.utils import download_and_decompress
from paddlespeech.resource.pretrained_models import StarGANv2VC_source
from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator
from paddlespeech.t2s.models.starganv2_vc import Generator
from paddlespeech.t2s.models.starganv2_vc import JDCNet
from paddlespeech.t2s.models.starganv2_vc import MappingNetwork
from paddlespeech.t2s.models.starganv2_vc import StyleEncoder
from paddlespeech.utils.env import MODEL_HOME


def get_mel_extractor():
    sr = 16000
    n_fft = 2048
    win_length = 1200
    hop_length = 300
    n_mels = 80
    fmin = 0
    fmax = sr // 2

    mel_extractor = LogMelFBank(
        sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        win_length=win_length,
        n_mels=n_mels,
        fmin=fmin,
        fmax=fmax,
        norm=None,
        htk=True,
        power=2.0)
    return mel_extractor


def preprocess(wave, mel_extractor):
    # (T, 80)
    logmel = mel_extractor.get_log_mel_fbank(wave, base='e')
    mean, std = -4, 4
    # [1, 80, T]
    mel_tensor = (paddle.to_tensor(logmel.T).unsqueeze(0) - mean) / std
    return mel_tensor


def compute_style(speaker_dicts, mel_extractor, style_encoder, mapping_network):
    reference_embeddings = {}
    for key, (path, speaker) in speaker_dicts.items():
        # path = ''
        if path == '':
            label = paddle.to_tensor([speaker], dtype=paddle.int64)
            latent_dim = mapping_network.shared[0].weight.shape[0]
            ref = mapping_network(paddle.randn([1, latent_dim]), label)
        else:
            wave, sr = librosa.load(path, sr=24000)
            audio, index = librosa.effects.trim(wave, top_db=30)
            if sr != 24000:
                wave = librosa.resample(wave, sr, 24000)
            mel_tensor = preprocess(wave=wave, mel_extractor=mel_extractor)
            with paddle.no_grad():
                label = paddle.to_tensor([speaker], dtype=paddle.int64)
                ref = style_encoder(mel_tensor.unsqueeze(1), label)
        reference_embeddings[key] = (ref, label)

    return reference_embeddings


def get_models(args, uncompress_path):
    model_dict = {}
    jdc_model_dir = os.path.join(uncompress_path, 'jdcnet.pdz')
    voc_model_dir = os.path.join(uncompress_path, 'Vocoder/')
    starganv2vc_model_dir = os.path.join(uncompress_path, 'starganv2vc.pdz')

    F0_model = JDCNet(num_class=1, seq_len=192)
    F0_model.set_state_dict(paddle.load(jdc_model_dir)['main_params'])
    F0_model.eval()

    voc_config_path = os.path.join(voc_model_dir, 'config.yml')
    with open(voc_config_path) as f:
        voc_config = CfgNode(yaml.safe_load(f))
    voc_config["generator_params"].pop("upsample_net")
    voc_config["generator_params"]["upsample_scales"] = voc_config[
        "generator_params"].pop("upsample_params")["upsample_scales"]
    vocoder = PWGGenerator(**voc_config["generator_params"])
    vocoder.remove_weight_norm()
    vocoder.eval()
    voc_model_path = os.path.join(voc_model_dir, 'checkpoint-400000steps.pd')
    vocoder.set_state_dict(paddle.load(voc_model_path))

    with open(args.config_path) as f:
        config = CfgNode(yaml.safe_load(f))

    generator = Generator(**config['generator_params'])
    mapping_network = MappingNetwork(**config['mapping_network_params'])
    style_encoder = StyleEncoder(**config['style_encoder_params'])

    starganv2vc_model_param = paddle.load(starganv2vc_model_dir)

    generator.set_state_dict(starganv2vc_model_param['generator_params'])
    mapping_network.set_state_dict(
        starganv2vc_model_param['mapping_network_params'])
    style_encoder.set_state_dict(
        starganv2vc_model_param['style_encoder_params'])

    generator.eval()
    mapping_network.eval()
    style_encoder.eval()

    model_dict['F0_model'] = F0_model
    model_dict['vocoder'] = vocoder
    model_dict['generator'] = generator
    model_dict['mapping_network'] = mapping_network
    model_dict['style_encoder'] = style_encoder
    return model_dict


def voice_conversion(args, uncompress_path):
    speakers = [
        225, 228, 229, 230, 231, 233, 236, 239, 240, 244, 226, 227, 232, 243,
        254, 256, 258, 259, 270, 273
    ]
    demo_dir = os.path.join(uncompress_path, 'Demo/VCTK-corpus/')
    model_dict = get_models(args, uncompress_path=uncompress_path)
    style_encoder = model_dict['style_encoder']
    mapping_network = model_dict['mapping_network']
    generator = model_dict['generator']
    vocoder = model_dict['vocoder']
    F0_model = model_dict['F0_model']

    # 计算 Demo 文件夹下的说话人的风格
    speaker_dicts = {}
    selected_speakers = [273, 259, 258, 243, 254, 244, 236, 233, 230, 228]
    for s in selected_speakers:
        k = s
        speaker_dicts['p' + str(s)] = (
            demo_dir + 'p' + str(k) + '/p' + str(k) + '_023.wav',
            speakers.index(s))
    mel_extractor = get_mel_extractor()
    reference_embeddings = compute_style(
        speaker_dicts=speaker_dicts,
        mel_extractor=mel_extractor,
        style_encoder=style_encoder,
        mapping_network=mapping_network)

    wave, sr = librosa.load(args.source_path, sr=24000)
    source = preprocess(wave=wave, mel_extractor=mel_extractor)
    # # 测试 preprocess.py 的输出是否 ok
    # # 直接用 raw 然后 norm 的在这里 ok
    # # 直接用 norm 在这里 ok
    # import numpy as np
    # source = np.load("~/PaddleSpeech_stargan_preprocess/PaddleSpeech/examples/vctk/vc3/dump/train/norm/p329_414_speech.npy")
    # # ！！！对 mel_extractor norm 后的操作
    # # [1, 80, T]
    # source = paddle.to_tensor(source.T).unsqueeze(0)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    orig_wav_name = str(output_dir / 'orig_voc.wav')
    print('原始语音 (使用声码器解码): %s' % orig_wav_name)
    c = source.transpose([0, 2, 1]).squeeze()
    with paddle.no_grad():
        recon = vocoder.inference(c)
        recon = recon.reshape([-1]).numpy()
    sf.write(orig_wav_name, recon, samplerate=24000)

    keys = []
    converted_samples = {}
    reconstructed_samples = {}
    converted_mels = {}
    start = time.time()

    for key, (ref, _) in reference_embeddings.items():
        with paddle.no_grad():
            # F0_model 输入的特征是否可以不带 norm，或者 norm 是否一定要和 stargan 原作保持一致？
            # !! 需要，ASR 和 F0_model 用的是一样的数据预处理方式
            # 如果不想要重新训练 ASR 和 F0_model, 则我们的数据预处理需要和 stargan 原作保持一致
            # 但是 vocoder 就无法复用
            # 是否因为 asr 的输入是 16k 的，所以 torchaudio 的参数也是 16k 的？
            f0_feat = F0_model.get_feature_GAN(source.unsqueeze(1))
            # 输出是带 norm 的 mel, 所以可以直接用 vocoder.inference
            out = generator(source.unsqueeze(1), ref, F0=f0_feat)
            c = out.transpose([0, 1, 3, 2]).squeeze()
            y_out = vocoder.inference(c)
            y_out = y_out.reshape([-1])
            if key not in speaker_dicts or speaker_dicts[key][0] == "":
                recon = None
            else:
                wave, sr = librosa.load(speaker_dicts[key][0], sr=24000)
                mel = preprocess(wave=wave, mel_extractor=mel_extractor)
                c = mel.transpose([0, 2, 1]).squeeze()
                recon = vocoder.inference(c)
                recon = recon.reshape([-1]).numpy()

        converted_samples[key] = y_out.numpy()
        reconstructed_samples[key] = recon
        converted_mels[key] = out
        keys.append(key)
    end = time.time()
    print('总共花费时间: %.3f sec' % (end - start))
    for key, wave in converted_samples.items():
        wav_name = str(output_dir / ('vc_result_' + key + '.wav'))
        print('语音转换结果: %s' % wav_name)
        sf.write(wav_name, wave, samplerate=24000)
        ref_wav_name = str(output_dir / ('ref_voc_' + key + '.wav'))
        print('参考的说话人 (使用声码器解码): %s' % ref_wav_name)
        if reconstructed_samples[key] is not None:
            sf.write(ref_wav_name, reconstructed_samples[key], samplerate=24000)


def parse_args():
    # parse args and config  
    parser = argparse.ArgumentParser(
        description="StarGANv2-VC Voice Conversion.")
    parser.add_argument("--source_path", type=str, help="source audio's path.")
    parser.add_argument("--output_dir", type=str, help="output dir.")
    parser.add_argument(
        '--config_path',
        type=str,
        default=None,
        help='Config of StarGANv2-VC model.')
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    args = parser.parse_args()
    return args


def main():
    args = parse_args()
    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")
    model_version = '1.0'
    uncompress_path = download_and_decompress(StarGANv2VC_source[model_version],
                                              MODEL_HOME)
    voice_conversion(args, uncompress_path=uncompress_path)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/stream_play_tts.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# stream play TTS
# Before first execution, download and decompress the models in the execution directory
# wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
# wget https://paddlespeech.cdn.bcebos.com/Parakeet/released_models/mb_melgan/mb_melgan_csmsc_onnx_0.2.0.zip
# unzip fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip
# unzip mb_melgan_csmsc_onnx_0.2.0.zip
import math
import time

import numpy as np
import onnxruntime as ort
import pyaudio
import soundfile as sf

from paddlespeech.server.utils.audio_process import float2pcm
from paddlespeech.server.utils.util import denorm
from paddlespeech.server.utils.util import get_chunks
from paddlespeech.t2s.frontend.zh_frontend import Frontend

voc_block = 36
voc_pad = 14
am_block = 72
am_pad = 12
voc_upsample = 300

phones_dict = "fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0/phone_id_map.txt"
frontend = Frontend(phone_vocab_path=phones_dict, tone_vocab_path=None)

am_stat_path = "fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0/speech_stats.npy"
am_mu, am_std = np.load(am_stat_path)

# 模型路径
onnx_am_encoder = "fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0/fastspeech2_csmsc_am_encoder_infer.onnx"
onnx_am_decoder = "fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0/fastspeech2_csmsc_am_decoder.onnx"
onnx_am_postnet = "fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0/fastspeech2_csmsc_am_postnet.onnx"
onnx_voc_melgan = "mb_melgan_csmsc_onnx_0.2.0/mb_melgan_csmsc.onnx"

# 用CPU推理
providers = ['CPUExecutionProvider']

# 配置ort session
sess_options = ort.SessionOptions()

# 创建session
am_encoder_infer_sess = ort.InferenceSession(
    onnx_am_encoder, providers=providers, sess_options=sess_options)
am_decoder_sess = ort.InferenceSession(
    onnx_am_decoder, providers=providers, sess_options=sess_options)
am_postnet_sess = ort.InferenceSession(
    onnx_am_postnet, providers=providers, sess_options=sess_options)
voc_melgan_sess = ort.InferenceSession(
    onnx_voc_melgan, providers=providers, sess_options=sess_options)


def depadding(data, chunk_num, chunk_id, block, pad, upsample):
    """ 
    Streaming inference removes the result of pad inference
    """
    front_pad = min(chunk_id * block, pad)
    # first chunk
    if chunk_id == 0:
        data = data[:block * upsample]
    # last chunk
    elif chunk_id == chunk_num - 1:
        data = data[front_pad * upsample:]
    # middle chunk
    else:
        data = data[front_pad * upsample:(front_pad + block) * upsample]

    return data


def inference_stream(text):
    input_ids = frontend.get_input_ids(
        text, merge_sentences=False, get_tone_ids=False)
    phone_ids = input_ids["phone_ids"]
    for i in range(len(phone_ids)):
        part_phone_ids = phone_ids[i].numpy()
        voc_chunk_id = 0

        orig_hs = am_encoder_infer_sess.run(
            None, input_feed={'text': part_phone_ids})
        orig_hs = orig_hs[0]

        # streaming voc chunk info
        mel_len = orig_hs.shape[1]
        voc_chunk_num = math.ceil(mel_len / voc_block)
        start = 0
        end = min(voc_block + voc_pad, mel_len)

        # streaming am
        hss = get_chunks(orig_hs, am_block, am_pad, "am")
        am_chunk_num = len(hss)
        for i, hs in enumerate(hss):
            am_decoder_output = am_decoder_sess.run(None, input_feed={'xs': hs})
            am_postnet_output = am_postnet_sess.run(
                None,
                input_feed={
                    'xs': np.transpose(am_decoder_output[0], (0, 2, 1))
                })
            am_output_data = am_decoder_output + np.transpose(
                am_postnet_output[0], (0, 2, 1))
            normalized_mel = am_output_data[0][0]

            sub_mel = denorm(normalized_mel, am_mu, am_std)
            sub_mel = depadding(sub_mel, am_chunk_num, i, am_block, am_pad, 1)

            if i == 0:
                mel_streaming = sub_mel
            else:
                mel_streaming = np.concatenate((mel_streaming, sub_mel), axis=0)

            # streaming voc
            # 当流式AM推理的mel帧数大于流式voc推理的chunk size，开始进行流式voc 推理
            while (mel_streaming.shape[0] >= end and
                   voc_chunk_id < voc_chunk_num):
                voc_chunk = mel_streaming[start:end, :]

                sub_wav = voc_melgan_sess.run(
                    output_names=None, input_feed={'logmel': voc_chunk})
                sub_wav = depadding(sub_wav[0], voc_chunk_num, voc_chunk_id,
                                    voc_block, voc_pad, voc_upsample)

                yield sub_wav

                voc_chunk_id += 1
                start = max(0, voc_chunk_id * voc_block - voc_pad)
                end = min((voc_chunk_id + 1) * voc_block + voc_pad, mel_len)


if __name__ == '__main__':

    text = "欢迎使用飞桨语音合成系统，测试一下合成效果。"
    # warm up
    # onnxruntime 第一次时间会长一些，建议先 warmup 一下
    for sub_wav in inference_stream(text="哈哈哈哈"):
        continue

    # pyaudio 播放
    p = pyaudio.PyAudio()
    stream = p.open(
        format=p.get_format_from_width(2),  # int16
        channels=1,
        rate=24000,
        output=True)

    # 计时
    wavs = []
    t1 = time.time()
    for sub_wav in inference_stream(text):
        print("响应时间：", time.time() - t1)
        t1 = time.time()
        wavs.append(sub_wav.flatten())
        # float32 to int16
        wav = float2pcm(sub_wav)
        # to bytes  
        wav_bytes = wav.tobytes()
        stream.write(wav_bytes)

    # 关闭 pyaudio 播放器
    stream.stop_stream()
    stream.close()
    p.terminate()

    # 流式合成的结果导出
    wav = np.concatenate(wavs)
    print(wav.shape)
    sf.write("demo_stream.wav", data=wav, samplerate=24000)


================================================
FILE: paddlespeech/t2s/exps/syn_utils.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import os
import re
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List
from typing import Optional

import jsonlines
import numpy as np
import onnxruntime as ort
import paddle
from paddle import inference
from paddle import jit
from paddle.io import DataLoader
from paddle.static import InputSpec
from yacs.config import CfgNode

import paddlespeech.utils
from paddlespeech.t2s.datasets.am_batch_fn import *
from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.datasets.vocoder_batch_fn import Clip_static
from paddlespeech.t2s.frontend.canton_frontend import CantonFrontend
from paddlespeech.t2s.frontend.en_frontend import English
from paddlespeech.t2s.frontend.mix_frontend import MixFrontend
from paddlespeech.t2s.frontend.sing_frontend import SingFrontend
from paddlespeech.t2s.frontend.zh_frontend import Frontend
from paddlespeech.t2s.modules.normalizer import ZScore
from paddlespeech.utils.dynamic_import import dynamic_import

# remove [W:onnxruntime: xxx] from ort
ort.set_default_logger_severity(3)

model_alias = {
    # acoustic model
    "speedyspeech":
    "paddlespeech.t2s.models.speedyspeech:SpeedySpeech",
    "speedyspeech_inference":
    "paddlespeech.t2s.models.speedyspeech:SpeedySpeechInference",
    "fastspeech2":
    "paddlespeech.t2s.models.fastspeech2:FastSpeech2",
    "fastspeech2_inference":
    "paddlespeech.t2s.models.fastspeech2:FastSpeech2Inference",
    "tacotron2":
    "paddlespeech.t2s.models.tacotron2:Tacotron2",
    "tacotron2_inference":
    "paddlespeech.t2s.models.tacotron2:Tacotron2Inference",
    "diffsinger":
    "paddlespeech.t2s.models.diffsinger:DiffSinger",
    "diffsinger_inference":
    "paddlespeech.t2s.models.diffsinger:DiffSingerInference",

    # voc
    "pwgan":
    "paddlespeech.t2s.models.parallel_wavegan:PWGGenerator",
    "pwgan_inference":
    "paddlespeech.t2s.models.parallel_wavegan:PWGInference",
    "mb_melgan":
    "paddlespeech.t2s.models.melgan:MelGANGenerator",
    "mb_melgan_inference":
    "paddlespeech.t2s.models.melgan:MelGANInference",
    "style_melgan":
    "paddlespeech.t2s.models.melgan:StyleMelGANGenerator",
    "style_melgan_inference":
    "paddlespeech.t2s.models.melgan:StyleMelGANInference",
    "hifigan":
    "paddlespeech.t2s.models.hifigan:HiFiGANGenerator",
    "hifigan_inference":
    "paddlespeech.t2s.models.hifigan:HiFiGANInference",
    "wavernn":
    "paddlespeech.t2s.models.wavernn:WaveRNN",
    "wavernn_inference":
    "paddlespeech.t2s.models.wavernn:WaveRNNInference",
    "erniesat":
    "paddlespeech.t2s.models.ernie_sat:ErnieSAT",
    "erniesat_inference":
    "paddlespeech.t2s.models.ernie_sat:ErnieSATInference",
}


def denorm(data, mean, std):
    return data * std + mean


def norm(data, mean, std):
    return (data - mean) / std


def get_chunks(mel, chunk_size: int, pad_size: int):
    """
    Split mel by chunk size with left and right context.

    Args:
        mel (paddle.Tensor): mel spectrogram, shape (B, T, D)
        chunk_size (int): chunk size
        pad_size (int): size for left and right context.
    """
    T = mel.shape[1]
    n = math.ceil(T / chunk_size)

    chunks = []
    for i in range(n):
        start = max(0, i * chunk_size - pad_size)
        end = min((i + 1) * chunk_size + pad_size, T)
        chunks.append(mel[:, start:end, :])
    return chunks


# input
def get_sentences(text_file: Optional[os.PathLike], lang: str='zh'):
    # construct dataset for evaluation
    sentences = []
    with open(text_file, 'rt', encoding='utf-8') as f:
        for line in f:
            if line.strip() != "":
                items = re.split(r"\s+", line.strip(), maxsplit=1)
                assert len(items) == 2
                utt_id = items[0]
                sentence = items[1]
            sentences.append((utt_id, sentence))
    return sentences


# input for svs
def get_sentences_svs(text_file: Optional[os.PathLike]):
    # construct dataset for evaluation
    sentences = []
    with jsonlines.open(text_file, 'r') as reader:
        svs_inputs = list(reader)
    for svs_input in svs_inputs:
        utt_id = svs_input['utt_id']
        sentence = svs_input
        sentences.append((utt_id, sentence))
    return sentences


# am only
def get_test_dataset(test_metadata: List[Dict[str, Any]],
                     am: str,
                     speaker_dict: Optional[os.PathLike]=None,
                     voice_cloning: bool=False):
    # model: {model_name}_{dataset}
    am_name = am[:am.rindex('_')]
    am_dataset = am[am.rindex('_') + 1:]
    converters = {}
    if am_name == 'fastspeech2':
        fields = ["utt_id", "text"]
        if am_dataset in {"aishell3", "vctk", "mix",
                          "canton"} and speaker_dict is not None:
            print("multiple speaker fastspeech2!")
            fields += ["spk_id"]
        elif voice_cloning:
            print("voice cloning!")
            fields += ["spk_emb"]
        else:
            print("single speaker fastspeech2!")
    elif am_name == 'diffsinger':
        fields = ["utt_id", "text", "note", "note_dur", "is_slur"]
    elif am_name == 'speedyspeech':
        fields = ["utt_id", "phones", "tones"]
    elif am_name == 'tacotron2':
        fields = ["utt_id", "text"]
        if voice_cloning:
            print("voice cloning!")
            fields += ["spk_emb"]
    elif am_name == 'erniesat':
        fields = [
            "utt_id", "text", "text_lengths", "speech", "speech_lengths",
            "align_start", "align_end"
        ]
        converters = {"speech": np.load}
    else:
        print("wrong am, please input right am!!!")

    test_dataset = DataTable(
        data=test_metadata, fields=fields, converters=converters)
    return test_dataset


# am and voc, for PTQ_static
def get_dev_dataloader(dev_metadata: List[Dict[str, Any]],
                       am: str,
                       batch_size: int=1,
                       speaker_dict: Optional[os.PathLike]=None,
                       voice_cloning: bool=False,
                       n_shift: int=300,
                       batch_max_steps: int=16200,
                       shuffle: bool=True):
    # model: {model_name}_{dataset}
    am_name = am[:am.rindex('_')]
    am_dataset = am[am.rindex('_') + 1:]
    converters = {}
    if am_name == 'fastspeech2':
        fields = ["utt_id", "text"]
        if am_dataset in {"aishell3", "vctk", "mix",
                          "canton"} and speaker_dict is not None:
            print("multiple speaker fastspeech2!")
            collate_fn = fastspeech2_multi_spk_batch_fn_static
            fields += ["spk_id"]
        elif voice_cloning:
            print("voice cloning!")
            collate_fn = fastspeech2_multi_spk_batch_fn_static
            fields += ["spk_emb"]
        else:
            print("single speaker fastspeech2!")
            collate_fn = fastspeech2_single_spk_batch_fn_static
    elif am_name == 'speedyspeech':
        fields = ["utt_id", "phones", "tones"]
        if am_dataset in {"aishell3", "vctk",
                          "mix"} and speaker_dict is not None:
            print("multiple speaker speedyspeech!")
            collate_fn = speedyspeech_multi_spk_batch_fn_static
            fields += ["spk_id"]
        else:
            print("single speaker speedyspeech!")
            collate_fn = speedyspeech_single_spk_batch_fn_static
        fields = ["utt_id", "phones", "tones"]
    elif am_name == 'tacotron2':
        fields = ["utt_id", "text"]
        if voice_cloning:
            print("voice cloning!")
            collate_fn = tacotron2_multi_spk_batch_fn_static
            fields += ["spk_emb"]
        else:
            print("single speaker tacotron2!")
            collate_fn = tacotron2_single_spk_batch_fn_static
    else:
        print("voc dataloader")

    # am
    if am_name not in {'pwgan', 'mb_melgan', 'hifigan'}:
        dev_dataset = DataTable(
            data=dev_metadata,
            fields=fields,
            converters=converters, )

        dev_dataloader = DataLoader(
            dev_dataset,
            shuffle=shuffle,
            drop_last=False,
            batch_size=batch_size,
            collate_fn=collate_fn)
    # vocoder
    else:
        # pwgan: batch_max_steps: 25500 aux_context_window: 2
        # mb_melgan: batch_max_steps: 16200 aux_context_window 0
        # hifigan: batch_max_steps: 8400 aux_context_window 0
        aux_context_window = 0
        if am_name == 'pwgan':
            aux_context_window = 2

        train_batch_fn = Clip_static(
            batch_max_steps=batch_max_steps,
            hop_size=n_shift,
            aux_context_window=aux_context_window)
        dev_dataset = DataTable(
            data=dev_metadata,
            fields=["wave", "feats"],
            converters={
                "wave": np.load,
                "feats": np.load,
            }, )

        dev_dataloader = DataLoader(
            dev_dataset,
            shuffle=shuffle,
            drop_last=False,
            batch_size=batch_size,
            collate_fn=train_batch_fn)

    return dev_dataloader


# frontend
def get_frontend(lang: str='zh',
                 phones_dict: Optional[os.PathLike]=None,
                 tones_dict: Optional[os.PathLike]=None,
                 pinyin_phone: Optional[os.PathLike]=None,
                 use_rhy=False):
    if lang == 'zh':
        frontend = Frontend(
            phone_vocab_path=phones_dict,
            tone_vocab_path=tones_dict,
            use_rhy=use_rhy)
    elif lang == 'canton':
        frontend = CantonFrontend(phone_vocab_path=phones_dict)
    elif lang == 'en':
        frontend = English(phone_vocab_path=phones_dict)
    elif lang == 'mix':
        frontend = MixFrontend(
            phone_vocab_path=phones_dict, tone_vocab_path=tones_dict)
    elif lang == 'sing':
        frontend = SingFrontend(
            pinyin_phone_path=pinyin_phone, phone_vocab_path=phones_dict)
    else:
        print("wrong lang!")
    return frontend


def run_frontend(
        frontend: object,
        text: str,
        merge_sentences: bool=False,
        get_tone_ids: bool=False,
        lang: str='zh',
        to_tensor: bool=True,
        add_blank: bool=False,
        svs_input: Dict[str, str]=None, ):
    outs = dict()
    if lang == 'zh':
        input_ids = {}
        if text.strip() != "" and re.match(r".*?<speak>.*?</speak>.*", text,
                                           re.DOTALL):
            # using ssml
            input_ids = frontend.get_input_ids_ssml(
                text,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids,
                to_tensor=to_tensor)
        else:
            input_ids = frontend.get_input_ids(
                text,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids,
                to_tensor=to_tensor,
                add_blank=add_blank)
        phone_ids = input_ids["phone_ids"]
        if get_tone_ids:
            tone_ids = input_ids["tone_ids"]
            outs.update({'tone_ids': tone_ids})
    elif lang == 'canton':
        input_ids = frontend.get_input_ids(
            text, merge_sentences=merge_sentences, to_tensor=to_tensor)
        phone_ids = input_ids["phone_ids"]
    elif lang == 'en':
        input_ids = frontend.get_input_ids(
            text, merge_sentences=merge_sentences, to_tensor=to_tensor)
        phone_ids = input_ids["phone_ids"]
    elif lang == 'mix':
        input_ids = frontend.get_input_ids(
            text, merge_sentences=merge_sentences, to_tensor=to_tensor)
        phone_ids = input_ids["phone_ids"]
    elif lang == 'sing':
        input_ids = frontend.get_input_ids(
            svs_input=svs_input, to_tensor=to_tensor)
        phone_ids = input_ids["phone_ids"]
        note_ids = input_ids["note_ids"]
        note_durs = input_ids["note_durs"]
        is_slurs = input_ids["is_slurs"]
        outs.update({'note_ids': note_ids})
        outs.update({'note_durs': note_durs})
        outs.update({'is_slurs': is_slurs})
    else:
        print("lang should in {'zh', 'en', 'mix', 'canton', 'sing'}!")

    outs.update({'phone_ids': phone_ids})
    return outs


# dygraph
def get_am_inference(
        am: str='fastspeech2_csmsc',
        am_config: CfgNode=None,
        am_ckpt: Optional[os.PathLike]=None,
        am_stat: Optional[os.PathLike]=None,
        phones_dict: Optional[os.PathLike]=None,
        tones_dict: Optional[os.PathLike]=None,
        speaker_dict: Optional[os.PathLike]=None,
        return_am: bool=False,
        speech_stretchs: Optional[os.PathLike]=None, ):
    with open(phones_dict, 'rt', encoding='utf-8') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    tone_size = None
    if tones_dict is not None:
        with open(tones_dict, 'rt', encoding='utf-8') as f:
            tone_id = [line.strip().split() for line in f.readlines()]
        tone_size = len(tone_id)
    spk_num = None
    if speaker_dict is not None:
        with open(speaker_dict, 'rt', encoding='utf-8') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
    odim = am_config.n_mels
    # model: {model_name}_{dataset}
    am_name = am[:am.rindex('_')]
    am_dataset = am[am.rindex('_') + 1:]
    am_class = dynamic_import(am_name, model_alias)
    am_inference_class = dynamic_import(am_name + '_inference', model_alias)
    if am_name == 'fastspeech2':
        am = am_class(
            idim=vocab_size, odim=odim, spk_num=spk_num, **am_config["model"])
    elif am_name == 'diffsinger':
        with open(speech_stretchs, "r") as f:
            spec_min = np.load(speech_stretchs)[0]
            spec_max = np.load(speech_stretchs)[1]
            spec_min = paddle.to_tensor(spec_min)
            spec_max = paddle.to_tensor(spec_max)
        am_config["model"]["fastspeech2_params"]["spk_num"] = spk_num
        am = am_class(
            spec_min=spec_min,
            spec_max=spec_max,
            idim=vocab_size,
            odim=odim,
            **am_config["model"], )
    elif am_name == 'speedyspeech':
        am = am_class(
            vocab_size=vocab_size,
            tone_size=tone_size,
            spk_num=spk_num,
            **am_config["model"])
    elif am_name == 'tacotron2':
        am = am_class(idim=vocab_size, odim=odim, **am_config["model"])
    elif am_name == 'erniesat':
        am = am_class(idim=vocab_size, odim=odim, **am_config["model"])

    am.set_state_dict(paddle.load(am_ckpt)["main_params"])
    am.eval()
    am_mu, am_std = np.load(am_stat)
    am_mu = paddle.to_tensor(am_mu)
    am_std = paddle.to_tensor(am_std)
    am_normalizer = ZScore(am_mu, am_std)
    am_inference = am_inference_class(am_normalizer, am)
    am_inference.eval()
    if return_am:
        return am_inference, am
    else:
        return am_inference


def get_voc_inference(
        voc: str='pwgan_csmsc',
        voc_config: Optional[os.PathLike]=None,
        voc_ckpt: Optional[os.PathLike]=None,
        voc_stat: Optional[os.PathLike]=None, ):
    # model: {model_name}_{dataset}
    voc_name = voc[:voc.rindex('_')]
    voc_class = dynamic_import(voc_name, model_alias)
    voc_inference_class = dynamic_import(voc_name + '_inference', model_alias)

    # npu only support mode=constant right now
    # this code has been adapted to support 'paddlespeech.t2s.models.melgan.melgan.MelGANGenerator'
    npu_pad_mode = {
        "mode": "constant"
    } if paddle.get_device().startswith('npu') else {}

    if voc_name != 'wavernn':
        if npu_pad_mode:
            voc_config["generator_params"].setdefault("pad_params", {})
            voc_config["generator_params"]["pad_params"].update(npu_pad_mode)

        voc = voc_class(**voc_config["generator_params"])
        voc.set_state_dict(paddle.load(voc_ckpt)["generator_params"])
        voc.remove_weight_norm()
        voc.eval()
    else:
        if npu_pad_mode:
            voc_config["model"].setdefault("pad_params", {})
            voc_config["model"]["pad_params"].update(npu_pad_mode)

        voc = voc_class(**voc_config["model"])
        voc.set_state_dict(paddle.load(voc_ckpt)["main_params"])
        voc.eval()

    voc_mu, voc_std = np.load(voc_stat)
    voc_mu = paddle.to_tensor(voc_mu)
    voc_std = paddle.to_tensor(voc_std)
    voc_normalizer = ZScore(voc_mu, voc_std)
    voc_inference = voc_inference_class(voc_normalizer, voc)
    voc_inference.eval()
    return voc_inference


# dygraph to static graph
def am_to_static(am_inference,
                 am: str='fastspeech2_csmsc',
                 inference_dir=Optional[os.PathLike],
                 speaker_dict: Optional[os.PathLike]=None):
    # model: {model_name}_{dataset}
    am_name = am[:am.rindex('_')]
    am_dataset = am[am.rindex('_') + 1:]
    if am_name == 'fastspeech2':
        if am_dataset in {"aishell3", "vctk", "mix",
                          "canton"} and speaker_dict is not None:
            am_inference = jit.to_static(
                am_inference,
                input_spec=[
                    InputSpec([-1], dtype=paddle.int64),
                    InputSpec([1], dtype=paddle.int64),
                ], )
        else:
            am_inference = jit.to_static(
                am_inference, input_spec=[InputSpec([-1], dtype=paddle.int64)])

    elif am_name == 'speedyspeech':
        if am_dataset in {"aishell3", "vctk", "mix",
                          "canton"} and speaker_dict is not None:
            am_inference = jit.to_static(
                am_inference,
                input_spec=[
                    InputSpec([-1], dtype=paddle.int64),  # text
                    InputSpec([-1], dtype=paddle.int64),  # tone
                    InputSpec([1], dtype=paddle.int64),  # spk_id
                    None  # duration
                ])
        else:
            am_inference = jit.to_static(
                am_inference,
                input_spec=[
                    InputSpec([-1], dtype=paddle.int64),
                    InputSpec([-1], dtype=paddle.int64)
                ])

    elif am_name == 'tacotron2':
        am_inference = jit.to_static(
            am_inference, input_spec=[InputSpec([-1], dtype=paddle.int64)])

    elif am_name == 'vits' or am_name == 'jets':
        if am_dataset in {"aishell3", "vctk"} and speaker_dict is not None:
            am_inference = jit.to_static(
                am_inference,
                input_spec=[
                    InputSpec([-1], dtype=paddle.int64),
                    InputSpec([1], dtype=paddle.int64),
                ])
        else:
            am_inference = jit.to_static(
                am_inference, input_spec=[InputSpec([-1], dtype=paddle.int64)])

    elif am_name == 'diffsinger':
        am_inference = jit.to_static(
            am_inference,
            input_spec=[
                InputSpec([-1], dtype=paddle.int64),  # phone
                InputSpec([-1], dtype=paddle.int64),  # note
                InputSpec([-1], dtype=paddle.float32),  # note_dur
                InputSpec([-1], dtype=paddle.int64),  # is_slur
            ])

    jit.save(am_inference, os.path.join(inference_dir, am))
    am_inference = jit.load(os.path.join(inference_dir, am))

    return am_inference


def voc_to_static(voc_inference,
                  voc: str='pwgan_csmsc',
                  inference_dir=Optional[os.PathLike]):
    voc_inference = jit.to_static(
        voc_inference, input_spec=[
            InputSpec([-1, 80], dtype=paddle.float32),
        ])
    jit.save(voc_inference, os.path.join(inference_dir, voc))
    voc_inference = jit.load(os.path.join(inference_dir, voc))
    return voc_inference


# inference
def get_predictor(
        model_dir: Optional[os.PathLike]=None,
        model_file: Optional[os.PathLike]=None,
        params_file: Optional[os.PathLike]=None,
        device: str='cpu',
        # for gpu
        use_trt: bool=False,
        device_id: int=0,
        # for trt
        use_dynamic_shape: bool=True,
        min_subgraph_size: int=5,
        # for cpu
        cpu_threads: int=1,
        use_mkldnn: bool=False,
        # for trt or mkldnn
        precision: int="fp32"):
    """
    Args:
        model_dir (os.PathLike): root path of model.pdmodel and model.pdiparams.
        model_file (os.PathLike): name of model_file.
        params_file (os.PathLike): name of params_file.
        device (str): Choose the device you want to run, it can be: cpu/gpu, default is cpu.
        use_trt (bool): whether to use TensorRT or not in GPU.
        device_id (int): Choose your device id, only valid when the device is gpu, default 0.
        use_dynamic_shape (bool): use dynamic shape or not in TensorRT.
        use_mkldnn (bool): whether to use MKLDNN or not in CPU.
        cpu_threads (int): num of thread when use CPU.
        precision (str): mode of running (fp32/fp16/bf16/int8).  
    """
    rerun_flag = False
    if device != "gpu" and use_trt:
        raise ValueError(
            "Predict by TensorRT mode: {}, expect device=='gpu', but device == {}".
            format(precision, device))

    # after paddle 3.0, support new inference interface
    if paddlespeech.utils.satisfy_paddle_version('3.0.0-beta'):
        model_name = str(model_file).rsplit('.', 1)[0]
        assert model_name == str(params_file).rstrip(
            '.pdiparams'
        ), "The prefix of model_file and params_file should be same."
        config = inference.Config(model_dir, model_name)
    else:
        config = inference.Config(
            str(Path(model_dir) / model_file),
            str(Path(model_dir) / params_file))
    if paddle.__version__ <= "2.5.2" and paddle.__version__ != "0.0.0":
        config.enable_memory_optim()
    config.switch_ir_optim(True)
    if device == "gpu":
        config.enable_use_gpu(100, device_id)
    else:
        config.disable_gpu()
        config.set_cpu_math_library_num_threads(cpu_threads)
        if use_mkldnn:
            # fp32
            config.enable_mkldnn()
            if precision == "int8":
                config.enable_mkldnn_int8({
                    "conv2d_transpose", "conv2d", "depthwise_conv2d", "pool2d",
                    "transpose2", "elementwise_mul"
                })
                # config.enable_mkldnn_int8()
            elif precision in {"fp16", "bf16"}:
                config.enable_mkldnn_bfloat16()
            print("MKLDNN with {}".format(precision))
    if use_trt:
        if precision == "bf16":
            print("paddle trt does not support bf16, switching to fp16.")
            precision = "fp16"
        precision_map = {
            "int8": inference.Config.Precision.Int8,
            "fp32": inference.Config.Precision.Float32,
            "fp16": inference.Config.Precision.Half,
        }
        assert precision in precision_map.keys()
        pdtxt_name = model_file.split(".")[0] + "_" + precision + ".txt"
        if use_dynamic_shape:
            dynamic_shape_file = os.path.join(model_dir, pdtxt_name)
            if os.path.exists(dynamic_shape_file):
                config.enable_tuned_tensorrt_dynamic_shape(dynamic_shape_file,
                                                           True)
                # for fastspeech2
                config.exp_disable_tensorrt_ops(["reshape2"])
                print("trt set dynamic shape done!")
            else:
                # In order to avoid memory overflow when collecting dynamic shapes, it is changed to use CPU.
                config.disable_gpu()
                config.set_cpu_math_library_num_threads(10)
                config.collect_shape_range_info(dynamic_shape_file)
                print("Start collect dynamic shape...")
                rerun_flag = True

        if not rerun_flag:
            print("Tensor RT with {}".format(precision))
            config.enable_tensorrt_engine(
                workspace_size=1 << 30,
                max_batch_size=1,
                min_subgraph_size=min_subgraph_size,
                precision_mode=precision_map[precision],
                use_static=True,
                use_calib_mode=False, )

    predictor = inference.create_predictor(config)
    return predictor


def get_am_output(input: str,
                  am_predictor: paddle.nn.Layer,
                  am: str,
                  frontend: object,
                  lang: str='zh',
                  merge_sentences: bool=True,
                  speaker_dict: Optional[os.PathLike]=None,
                  spk_id: int=0,
                  add_blank: bool=False):
    am_name = am[:am.rindex('_')]
    am_dataset = am[am.rindex('_') + 1:]
    am_input_names = am_predictor.get_input_names()
    get_spk_id = False
    get_tone_ids = False
    if am_name == 'speedyspeech':
        get_tone_ids = True
    if am_dataset in {"aishell3", "vctk", "mix", "canton"} and speaker_dict:
        get_spk_id = True
        spk_id = np.array([spk_id])

    frontend_dict = run_frontend(
        frontend=frontend,
        text=input,
        merge_sentences=merge_sentences,
        get_tone_ids=get_tone_ids,
        lang=lang,
        add_blank=add_blank, )

    if get_tone_ids:
        tone_ids = frontend_dict['tone_ids']
        tones = tone_ids[0].numpy()
        tones_handle = am_predictor.get_input_handle(am_input_names[1])
        tones_handle.reshape(tones.shape)
        tones_handle.copy_from_cpu(tones)
    if get_spk_id:
        spk_id_handle = am_predictor.get_input_handle(am_input_names[1])
        spk_id_handle.reshape(spk_id.shape)
        spk_id_handle.copy_from_cpu(spk_id)
    phone_ids = frontend_dict['phone_ids']
    phones = phone_ids[0].numpy()
    phones_handle = am_predictor.get_input_handle(am_input_names[0])
    phones_handle.reshape(phones.shape)
    phones_handle.copy_from_cpu(phones)

    am_predictor.run()
    am_output_names = am_predictor.get_output_names()
    am_output_handle = am_predictor.get_output_handle(am_output_names[0])
    am_output_data = am_output_handle.copy_to_cpu()
    return am_output_data


def get_voc_output(voc_predictor, input):
    voc_input_names = voc_predictor.get_input_names()
    mel_handle = voc_predictor.get_input_handle(voc_input_names[0])
    mel_handle.reshape(input.shape)
    mel_handle.copy_from_cpu(input)

    voc_predictor.run()
    voc_output_names = voc_predictor.get_output_names()
    voc_output_handle = voc_predictor.get_output_handle(voc_output_names[0])
    wav = voc_output_handle.copy_to_cpu()
    return wav


def get_am_sublayer_output(am_sublayer_predictor, input):
    am_sublayer_input_names = am_sublayer_predictor.get_input_names()
    input_handle = am_sublayer_predictor.get_input_handle(
        am_sublayer_input_names[0])
    input_handle.reshape(input.shape)
    input_handle.copy_from_cpu(input)

    am_sublayer_predictor.run()
    am_sublayer_names = am_sublayer_predictor.get_output_names()
    am_sublayer_handle = am_sublayer_predictor.get_output_handle(
        am_sublayer_names[0])
    am_sublayer_output = am_sublayer_handle.copy_to_cpu()
    return am_sublayer_output


def get_streaming_am_output(input: str,
                            am_encoder_infer_predictor,
                            am_decoder_predictor,
                            am_postnet_predictor,
                            frontend,
                            lang: str='zh',
                            merge_sentences: bool=True):
    get_tone_ids = False
    frontend_dict = run_frontend(
        frontend=frontend,
        text=input,
        merge_sentences=merge_sentences,
        get_tone_ids=get_tone_ids,
        lang=lang)
    phone_ids = frontend_dict['phone_ids']
    phones = phone_ids[0].numpy()
    am_encoder_infer_output = get_am_sublayer_output(
        am_encoder_infer_predictor, input=phones)

    am_decoder_output = get_am_sublayer_output(
        am_decoder_predictor, input=am_encoder_infer_output)

    am_postnet_output = get_am_sublayer_output(
        am_postnet_predictor, input=np.transpose(am_decoder_output, (0, 2, 1)))
    am_output_data = am_decoder_output + np.transpose(am_postnet_output,
                                                      (0, 2, 1))
    normalized_mel = am_output_data[0]
    return normalized_mel


# onnx
def get_sess(model_path: Optional[os.PathLike],
             device: str='cpu',
             cpu_threads: int=1,
             use_trt: bool=False):
    sess_options = ort.SessionOptions()
    sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
    sess_options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
    if 'gpu' in device.lower():
        device_id = int(device.split(':')[1]) if len(
            device.split(':')) == 2 else 0
        # fastspeech2/mb_melgan can't use trt now!
        if use_trt:
            provider_name = 'TensorrtExecutionProvider'
        else:
            provider_name = 'CUDAExecutionProvider'
        providers = [(provider_name, {'device_id': device_id})]
    elif device.lower() == 'cpu':
        providers = ['CPUExecutionProvider']
    sess_options.intra_op_num_threads = cpu_threads
    sess = ort.InferenceSession(
        model_path, providers=providers, sess_options=sess_options)
    return sess


================================================
FILE: paddlespeech/t2s/exps/synthesize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import soundfile as sf
import yaml
from timer import timer
from yacs.config import CfgNode

from paddlespeech.t2s.exps.syn_utils import get_am_inference
from paddlespeech.t2s.exps.syn_utils import get_test_dataset
from paddlespeech.t2s.exps.syn_utils import get_voc_inference
from paddlespeech.t2s.utils import str2bool


def evaluate(args):
    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for evaluation
    with jsonlines.open(args.test_metadata, 'r') as reader:
        test_metadata = list(reader)

    # Init body.
    with open(args.am_config) as f:
        am_config = CfgNode(yaml.safe_load(f))
    with open(args.voc_config) as f:
        voc_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(am_config)
    print(voc_config)

    # acoustic model
    am_name = args.am[:args.am.rindex('_')]
    am_dataset = args.am[args.am.rindex('_') + 1:]

    am_inference = get_am_inference(
        am=args.am,
        am_config=am_config,
        am_ckpt=args.am_ckpt,
        am_stat=args.am_stat,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict,
        speaker_dict=args.speaker_dict,
        speech_stretchs=args.speech_stretchs, )
    test_dataset = get_test_dataset(
        test_metadata=test_metadata,
        am=args.am,
        speaker_dict=args.speaker_dict,
        voice_cloning=args.voice_cloning)

    # vocoder
    voc_inference = get_voc_inference(
        voc=args.voc,
        voc_config=voc_config,
        voc_ckpt=args.voc_ckpt,
        voc_stat=args.voc_stat)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    N = 0
    T = 0

    for datum in test_dataset:
        utt_id = datum["utt_id"]
        with timer() as t:
            with paddle.no_grad():
                # acoustic model
                if am_name == 'fastspeech2':
                    phone_ids = paddle.to_tensor(datum["text"])
                    spk_emb = None
                    spk_id = None
                    # multi speaker
                    if args.voice_cloning and "spk_emb" in datum:
                        spk_emb = paddle.to_tensor(np.load(datum["spk_emb"]))
                    elif "spk_id" in datum:
                        spk_id = paddle.to_tensor(datum["spk_id"])
                    mel = am_inference(
                        phone_ids, spk_id=spk_id, spk_emb=spk_emb)
                elif am_name == 'speedyspeech':
                    phone_ids = paddle.to_tensor(datum["phones"])
                    tone_ids = paddle.to_tensor(datum["tones"])
                    mel = am_inference(phone_ids, tone_ids)
                elif am_name == 'tacotron2':
                    phone_ids = paddle.to_tensor(datum["text"])
                    spk_emb = None
                    # multi speaker
                    if args.voice_cloning and "spk_emb" in datum:
                        spk_emb = paddle.to_tensor(np.load(datum["spk_emb"]))
                    mel = am_inference(phone_ids, spk_emb=spk_emb)
                elif am_name == 'diffsinger':
                    phone_ids = paddle.to_tensor(datum["text"])
                    note = paddle.to_tensor(datum["note"])
                    note_dur = paddle.to_tensor(datum["note_dur"])
                    is_slur = paddle.to_tensor(datum["is_slur"])
                    # get_mel_fs2 = False, means mel from diffusion, get_mel_fs2 = True, means mel from fastspeech2.
                    get_mel_fs2 = False
                    # mel: [T, mel_bin]
                    mel = am_inference(
                        phone_ids,
                        note=note,
                        note_dur=note_dur,
                        is_slur=is_slur,
                        get_mel_fs2=get_mel_fs2)
                # vocoder
                wav = voc_inference(mel)

            wav = wav.numpy()
            N += wav.size
            T += t.elapse
            speed = wav.size / t.elapse
            rtf = am_config.fs / speed
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.size}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
        sf.write(
            str(output_dir / (utt_id + ".wav")), wav, samplerate=am_config.fs)
        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {am_config.fs / (N / T) }")


def parse_args():
    # parse args and config
    parser = argparse.ArgumentParser(
        description="Synthesize with acoustic model & vocoder")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=[
            'speedyspeech_csmsc',
            'fastspeech2_csmsc',
            'fastspeech2_ljspeech',
            'fastspeech2_aishell3',
            'fastspeech2_vctk',
            'tacotron2_csmsc',
            'tacotron2_ljspeech',
            'tacotron2_aishell3',
            'fastspeech2_mix',
            'fastspeech2_canton',
            'diffsinger_opencpop',
        ],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        '--am_config', type=str, default=None, help='Config of acoustic model.')
    parser.add_argument(
        '--am_ckpt',
        type=str,
        default=None,
        help='Checkpoint file of acoustic model.')
    parser.add_argument(
        "--am_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training acoustic model."
    )
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones_dict", type=str, default=None, help="tone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        "--voice-cloning",
        type=str2bool,
        default=False,
        help="whether training voice cloning model.")
    # vocoder
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=[
            'pwgan_csmsc',
            'pwgan_ljspeech',
            'pwgan_aishell3',
            'pwgan_vctk',
            'mb_melgan_csmsc',
            'wavernn_csmsc',
            'hifigan_csmsc',
            'hifigan_ljspeech',
            'hifigan_aishell3',
            'hifigan_vctk',
            'style_melgan_csmsc',
            "pwgan_opencpop",
            "hifigan_opencpop",
        ],
        help='Choose vocoder type of tts task.')
    parser.add_argument(
        '--voc_config', type=str, default=None, help='Config of voc.')
    parser.add_argument(
        '--voc_ckpt', type=str, default=None, help='Checkpoint file of voc.')
    parser.add_argument(
        "--voc_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training voc."
    )
    # other
    parser.add_argument(
        "--ngpu",
        type=int,
        default=1,
        help="if wish to use gpu, set ngpu > 0, otherwise use xpu, npu, mlu or cpu."
    )
    parser.add_argument(
        "--nxpu",
        type=int,
        default=0,
        help="if wish to use xpu, set ngpu == 0 and nxpu > 0, otherwise use gpu, npu, mlu or cpu."
    )
    parser.add_argument(
        "--nnpu",
        type=int,
        default=0,
        help="if wish to use npu, set ngpu == 0 and nnpu > 0, otherwise use gpu, xpu, mlu or cpu."
    )
    parser.add_argument(
        "--nmlu",
        type=int,
        default=0,
        help="if wish to use xpu, set ngpu == 0 and nmlu > 0, otherwise use gpu, xpu, npu or cpu."
    )
    parser.add_argument("--test_metadata", type=str, help="test metadata.")
    parser.add_argument("--output_dir", type=str, help="output dir.")
    parser.add_argument(
        "--speech_stretchs",
        type=str,
        default=None,
        help="The min and max values of the mel spectrum.")

    args = parser.parse_args()
    return args


def main():

    args = parse_args()
    if args.ngpu > 0:
        paddle.set_device("gpu")
    elif args.nxpu > 0:
        paddle.set_device("xpu")
    elif args.nnpu > 0:
        paddle.set_device("npu")
    elif args.nmlu > 0:
        paddle.set_device("mlu")
    elif args.ngpu == 0 and args.nxpu == 0 and args.nnpu == 0 and args.nmlu == 0:
        paddle.set_device("cpu")
    else:
        print(
            "one of ngpu, nxpu, nnpu or nmlu should be greater than 0 or all of them equal to 0"
        )

    evaluate(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/synthesize_e2e.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path
from pprint import pprint

import paddle
import soundfile as sf
import yaml
from timer import timer
from yacs.config import CfgNode

from paddlespeech.t2s.exps.syn_utils import am_to_static
from paddlespeech.t2s.exps.syn_utils import get_am_inference
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.exps.syn_utils import get_sentences_svs
from paddlespeech.t2s.exps.syn_utils import get_voc_inference
from paddlespeech.t2s.exps.syn_utils import run_frontend
from paddlespeech.t2s.exps.syn_utils import voc_to_static
from paddlespeech.t2s.utils import str2bool


def evaluate(args):

    # Init body.
    with open(args.am_config) as f:
        am_config = CfgNode(yaml.safe_load(f))
    with open(args.voc_config) as f:
        voc_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(am_config)
    print(voc_config)

    # frontend
    frontend = get_frontend(
        lang=args.lang,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict,
        pinyin_phone=args.pinyin_phone,
        use_rhy=args.use_rhy)
    print("frontend done!")

    # acoustic model
    am_name = args.am[:args.am.rindex('_')]
    am_dataset = args.am[args.am.rindex('_') + 1:]
    am_inference = get_am_inference(
        am=args.am,
        am_config=am_config,
        am_ckpt=args.am_ckpt,
        am_stat=args.am_stat,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict,
        speaker_dict=args.speaker_dict,
        speech_stretchs=args.speech_stretchs, )
    print("acoustic model done!")

    # vocoder
    voc_inference = get_voc_inference(
        voc=args.voc,
        voc_config=voc_config,
        voc_ckpt=args.voc_ckpt,
        voc_stat=args.voc_stat)
    print("voc done!")

    # whether dygraph to static
    if args.inference_dir:
        print("convert am and voc to static model.")
        # acoustic model
        am_inference = am_to_static(
            am_inference=am_inference,
            am=args.am,
            inference_dir=args.inference_dir,
            speaker_dict=args.speaker_dict)
        # vocoder
        voc_inference = voc_to_static(
            voc_inference=voc_inference,
            voc=args.voc,
            inference_dir=args.inference_dir)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    merge_sentences = False
    # Avoid not stopping at the end of a sub sentence when tacotron2_ljspeech dygraph to static graph
    # but still not stopping in the end (NOTE by yuantian01 Feb 9 2022)
    if am_name == 'tacotron2':
        merge_sentences = True

    get_tone_ids = False
    if am_name == 'speedyspeech':
        get_tone_ids = True

    # wav samples
    N = 0
    # inference time cost
    T = 0

    # [(uid, text), ]
    if am_name == 'diffsinger':
        sentences = get_sentences_svs(text_file=args.text)
    else:
        sentences = get_sentences(text_file=args.text, lang=args.lang)

    for utt_id, sentence in sentences:
        print(f"{utt_id} {sentence}")
        with timer() as t:
            if am_name == "diffsinger":
                text = ""
                svs_input = sentence
            else:
                text = sentence
                svs_input = None

            # frontend
            frontend_dict = run_frontend(
                frontend=frontend,
                text=text,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids,
                lang=args.lang,
                svs_input=svs_input)
            phone_ids = frontend_dict['phone_ids']
            # pprint(f"{utt_id} {phone_ids}")

            with paddle.no_grad():
                flags = 0
                for i in range(len(phone_ids)):
                    # sub phone, split by `sp` or punctuation.
                    part_phone_ids = phone_ids[i]

                    # acoustic model
                    if am_name == 'fastspeech2':
                        # multi speaker
                        if am_dataset in {"aishell3", "vctk", "mix", "canton"}:
                            # multi-speaker
                            spk_id = paddle.to_tensor([args.spk_id])
                            mel = am_inference(part_phone_ids, spk_id)
                        else:
                            # single-speaker
                            mel = am_inference(part_phone_ids)
                    elif am_name == 'speedyspeech':
                        part_tone_ids = frontend_dict['tone_ids'][i]
                        if am_dataset in {"aishell3", "vctk", "mix"}:
                            # multi-speaker
                            spk_id = paddle.to_tensor([args.spk_id])
                            mel = am_inference(part_phone_ids, part_tone_ids,
                                               spk_id)
                        else:
                            # single-speaker
                            mel = am_inference(part_phone_ids, part_tone_ids)
                    elif am_name == 'tacotron2':
                        mel = am_inference(part_phone_ids)
                    elif am_name == 'diffsinger':
                        part_note_ids = frontend_dict['note_ids'][i]
                        part_note_durs = frontend_dict['note_durs'][i]
                        part_is_slurs = frontend_dict['is_slurs'][i]
                        mel = am_inference(
                            text=part_phone_ids,
                            note=part_note_ids,
                            note_dur=part_note_durs,
                            is_slur=part_is_slurs, )

                    # vocoder
                    wav = voc_inference(mel)
                    if flags == 0:
                        wav_all = wav
                        flags = 1
                    else:
                        wav_all = paddle.concat([wav_all, wav])

        wav = wav_all.numpy()
        N += wav.size
        T += t.elapse

        # samples per second
        speed = wav.size / t.elapse
        # generate one second wav need `RTF` seconds
        rtf = am_config.fs / speed
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

        sf.write(
            str(output_dir / (utt_id + ".wav")), wav, samplerate=am_config.fs)
        print(f"{utt_id} done!")

    print(f"generation speed: {N / T}Hz, RTF: {am_config.fs / (N / T) }")


def parse_args():
    # parse args and config
    parser = argparse.ArgumentParser(
        description="Synthesize with acoustic model & vocoder")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=[
            'speedyspeech_csmsc',
            'speedyspeech_aishell3',
            'fastspeech2_csmsc',
            'fastspeech2_ljspeech',
            'fastspeech2_aishell3',
            'fastspeech2_vctk',
            'tacotron2_csmsc',
            'tacotron2_ljspeech',
            'fastspeech2_mix',
            'fastspeech2_canton',
            'fastspeech2_male-zh',
            'fastspeech2_male-en',
            'fastspeech2_male-mix',
            'diffsinger_opencpop',
        ],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        '--am_config', type=str, default=None, help='Config of acoustic model.')
    parser.add_argument(
        '--am_ckpt',
        type=str,
        default=None,
        help='Checkpoint file of acoustic model.')
    parser.add_argument(
        "--am_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training acoustic model."
    )
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones_dict", type=str, default=None, help="tone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')
    # vocoder
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=[
            'pwgan_csmsc',
            'pwgan_ljspeech',
            'pwgan_aishell3',
            'pwgan_vctk',
            'mb_melgan_csmsc',
            'style_melgan_csmsc',
            'hifigan_csmsc',
            'hifigan_ljspeech',
            'hifigan_aishell3',
            'hifigan_vctk',
            'wavernn_csmsc',
            'pwgan_male',
            'hifigan_male',
            'pwgan_opencpop',
            'hifigan_opencpop',
        ],
        help='Choose vocoder type of tts task.')
    parser.add_argument(
        '--voc_config', type=str, default=None, help='Config of voc.')
    parser.add_argument(
        '--voc_ckpt', type=str, default=None, help='Checkpoint file of voc.')
    parser.add_argument(
        "--voc_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training voc."
    )
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        choices=['zh', 'en', 'mix', 'canton', 'sing'],
        help='Choose model language. zh or en or mix')

    parser.add_argument(
        "--inference_dir",
        type=str,
        default=None,
        help="dir to save inference models")
    parser.add_argument(
        "--ngpu",
        type=int,
        default=1,
        help="if wish to use gpu, set ngpu > 0, otherwise use xpu, npu, mlu or cpu."
    )
    parser.add_argument(
        "--nxpu",
        type=int,
        default=0,
        help="if wish to use xpu, set ngpu == 0 and nxpu > 0, otherwise use gpu, npu, mlu or cpu."
    )
    parser.add_argument(
        "--nnpu",
        type=int,
        default=0,
        help="if wish to use npu, set ngpu == 0 and nnpu > 0, otherwise use gpu, xpu, mlu or cpu."
    )
    parser.add_argument(
        "--nmlu",
        type=int,
        default=0,
        help="if wish to use xpu, set ngpu == 0 and nmlu > 0, otherwise use gpu, xpu, npu or cpu."
    )
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line.")
    parser.add_argument("--output_dir", type=str, help="output dir.")
    parser.add_argument(
        "--use_rhy",
        type=str2bool,
        default=False,
        help="run rhythm frontend or not")
    parser.add_argument(
        "--pinyin_phone",
        type=str,
        default=None,
        help="pinyin to phone map file, using on sing_frontend.")
    parser.add_argument(
        "--speech_stretchs",
        type=str,
        default=None,
        help="The min and max values of the mel spectrum, using on diffusion of diffsinger."
    )

    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    if args.ngpu > 0:
        paddle.set_device("gpu")
    elif args.nxpu > 0:
        paddle.set_device("xpu")
    elif args.nnpu > 0:
        paddle.set_device("npu")
    elif args.nmlu > 0:
        paddle.set_device("mlu")
    elif args.ngpu == 0 and args.nxpu == 0 and args.nnpu == 0 and args.nmlu == 0:
        paddle.set_device("cpu")
    else:
        print(
            "one of ngpu, nxpu, nnpu or nmlu should be greater than 0 or all of them equal to 0"
        )

    evaluate(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/synthesize_streaming.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from pathlib import Path

import numpy as np
import paddle
import soundfile as sf
import yaml
from paddle import jit
from paddle.static import InputSpec
from timer import timer
from yacs.config import CfgNode

from paddlespeech.t2s.exps.syn_utils import denorm
from paddlespeech.t2s.exps.syn_utils import get_chunks
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.exps.syn_utils import get_voc_inference
from paddlespeech.t2s.exps.syn_utils import model_alias
from paddlespeech.t2s.exps.syn_utils import run_frontend
from paddlespeech.t2s.exps.syn_utils import voc_to_static
from paddlespeech.t2s.utils import str2bool
from paddlespeech.utils.dynamic_import import dynamic_import


def evaluate(args):

    # Init body.
    with open(args.am_config) as f:
        am_config = CfgNode(yaml.safe_load(f))
    with open(args.voc_config) as f:
        voc_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(am_config)
    print(voc_config)

    sentences = get_sentences(text_file=args.text, lang=args.lang)

    # frontend
    frontend = get_frontend(
        lang=args.lang,
        phones_dict=args.phones_dict,
        tones_dict=args.tones_dict)

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    # acoustic model, only support fastspeech2 here now!
    # model: {model_name}_{dataset}
    am_name = args.am[:args.am.rindex('_')]
    am_dataset = args.am[args.am.rindex('_') + 1:]
    odim = am_config.n_mels

    am_class = dynamic_import(am_name, model_alias)
    am = am_class(idim=vocab_size, odim=odim, **am_config["model"])
    am.set_state_dict(paddle.load(args.am_ckpt)["main_params"])
    am.eval()
    am_mu, am_std = np.load(args.am_stat)
    am_mu = paddle.to_tensor(am_mu)
    am_std = paddle.to_tensor(am_std)

    # am sub layers
    am_encoder_infer = am.encoder_infer
    am_decoder = am.decoder
    am_postnet = am.postnet

    # vocoder
    voc_inference = get_voc_inference(
        voc=args.voc,
        voc_config=voc_config,
        voc_ckpt=args.voc_ckpt,
        voc_stat=args.voc_stat)

    # whether dygraph to static
    if args.inference_dir:
        # fastspeech2 cnndecoder to static
        # am.encoder_infer
        am_encoder_infer = jit.to_static(
            am_encoder_infer, input_spec=[InputSpec([-1], dtype=paddle.int64)])
        paddle.jit.save(am_encoder_infer,
                        os.path.join(args.inference_dir,
                                     args.am + "_am_encoder_infer"))
        am_encoder_infer = paddle.jit.load(
            os.path.join(args.inference_dir, args.am + "_am_encoder_infer"))

        # am.decoder
        am_decoder = jit.to_static(
            am_decoder,
            input_spec=[InputSpec([1, -1, 384], dtype=paddle.float32)])
        paddle.jit.save(am_decoder,
                        os.path.join(args.inference_dir,
                                     args.am + "_am_decoder"))
        am_decoder = paddle.jit.load(
            os.path.join(args.inference_dir, args.am + "_am_decoder"))

        # am.postnet
        am_postnet = jit.to_static(
            am_postnet,
            input_spec=[InputSpec([1, 80, -1], dtype=paddle.float32)])
        paddle.jit.save(am_postnet,
                        os.path.join(args.inference_dir,
                                     args.am + "_am_postnet"))
        am_postnet = paddle.jit.load(
            os.path.join(args.inference_dir, args.am + "_am_postnet"))

        # vocoder
        voc_inference = voc_to_static(
            voc_inference=voc_inference,
            voc=args.voc,
            inference_dir=args.inference_dir)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    merge_sentences = True
    get_tone_ids = False

    N = 0
    T = 0
    block_size = args.block_size
    pad_size = args.pad_size

    for utt_id, sentence in sentences:
        with timer() as t:
            frontend_dict = run_frontend(
                frontend=frontend,
                text=sentence,
                merge_sentences=merge_sentences,
                get_tone_ids=get_tone_ids,
                lang=args.lang)
            phone_ids = frontend_dict['phone_ids']
            # merge_sentences=True here, so we only use the first item of phone_ids
            phone_ids = phone_ids[0]
            with paddle.no_grad():
                # acoustic model
                orig_hs = am_encoder_infer(phone_ids)
                if args.am_streaming:
                    hss = get_chunks(orig_hs, block_size, pad_size)
                    chunk_num = len(hss)
                    mel_list = []
                    for i, hs in enumerate(hss):
                        before_outs = am_decoder(hs)
                        after_outs = before_outs + am_postnet(
                            before_outs.transpose((0, 2, 1))).transpose(
                                (0, 2, 1))
                        normalized_mel = after_outs[0]
                        sub_mel = denorm(normalized_mel, am_mu, am_std)
                        # clip output part of pad
                        if i == 0:
                            sub_mel = sub_mel[:-pad_size]
                        elif i == chunk_num - 1:
                            # 最后一块的右侧一定没有 pad 够
                            sub_mel = sub_mel[pad_size:]
                        else:
                            # 倒数几块的右侧也可能没有 pad 够
                            sub_mel = sub_mel[pad_size:(block_size + pad_size) -
                                              sub_mel.shape[0]]
                        mel_list.append(sub_mel)
                    mel = paddle.concat(mel_list, axis=0)

                else:
                    before_outs = am_decoder(orig_hs)
                    after_outs = before_outs + am_postnet(
                        before_outs.transpose((0, 2, 1))).transpose((0, 2, 1))
                    normalized_mel = after_outs[0]
                    mel = denorm(normalized_mel, am_mu, am_std)

                # vocoder
                wav = voc_inference(mel)

        wav = wav.numpy()
        N += wav.size
        T += t.elapse
        speed = wav.size / t.elapse
        rtf = am_config.fs / speed
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
        sf.write(
            str(output_dir / (utt_id + ".wav")), wav, samplerate=am_config.fs)
        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {am_config.fs / (N / T) }")


def parse_args():
    # parse args and config
    parser = argparse.ArgumentParser(
        description="Synthesize with acoustic model & vocoder")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=['fastspeech2_csmsc'],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        '--am_config', type=str, default=None, help='Config of acoustic model.')
    parser.add_argument(
        '--am_ckpt',
        type=str,
        default=None,
        help='Checkpoint file of acoustic model.')
    parser.add_argument(
        "--am_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training acoustic model."
    )
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--tones_dict", type=str, default=None, help="tone vocabulary file.")

    # vocoder
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=[
            'pwgan_csmsc',
            'mb_melgan_csmsc',
            'style_melgan_csmsc',
            'hifigan_csmsc',
        ],
        help='Choose vocoder type of tts task.')
    parser.add_argument(
        '--voc_config', type=str, default=None, help='Config of voc.')
    parser.add_argument(
        '--voc_ckpt', type=str, default=None, help='Checkpoint file of voc.')
    parser.add_argument(
        "--voc_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training voc."
    )
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en')

    parser.add_argument(
        "--inference_dir",
        type=str,
        default=None,
        help="dir to save inference models")

    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line.")
    # streaming related
    parser.add_argument(
        "--am_streaming",
        type=str2bool,
        default=False,
        help="whether use streaming acoustic model")
    parser.add_argument(
        "--block_size", type=int, default=42, help="block size of am streaming")
    parser.add_argument(
        "--pad_size", type=int, default=12, help="pad size of am streaming")

    parser.add_argument("--output_dir", type=str, help="output dir.")

    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    evaluate(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/tacotron2/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/tacotron2/preprocess.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines
import librosa
import numpy as np
import tqdm
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.datasets.preprocess_utils import compare_duration_and_mel_length
from paddlespeech.t2s.datasets.preprocess_utils import get_input_token
from paddlespeech.t2s.datasets.preprocess_utils import get_phn_dur
from paddlespeech.t2s.datasets.preprocess_utils import get_spk_id_map
from paddlespeech.t2s.datasets.preprocess_utils import merge_silence
from paddlespeech.t2s.utils import str2bool


def process_sentence(config: Dict[str, Any],
                     fp: Path,
                     sentences: Dict,
                     output_dir: Path,
                     mel_extractor=None,
                     cut_sil: bool=True,
                     spk_emb_dir: Path=None):
    utt_id = fp.stem
    # for vctk
    if utt_id.endswith("_mic2"):
        utt_id = utt_id[:-5]
    record = None
    if utt_id in sentences:
        # reading, resampling may occur
        wav, _ = librosa.load(str(fp), sr=config.fs)
        if len(wav.shape) != 1:
            return record
        max_value = np.abs(wav).max()
        if max_value > 1.0:
            wav = wav / max_value
        assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
        assert np.abs(wav).max(
        ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        speaker = sentences[utt_id][2]
        d_cumsum = np.pad(np.array(durations).cumsum(0), (1, 0), 'constant')
        # little imprecise than use *.TextGrid directly
        times = librosa.frames_to_time(
            d_cumsum, sr=config.fs, hop_length=config.n_shift)
        if cut_sil:
            start = 0
            end = d_cumsum[-1]
            if phones[0] == "sil" and len(durations) > 1:
                start = times[1]
                durations = durations[1:]
                phones = phones[1:]
            if phones[-1] == 'sil' and len(durations) > 1:
                end = times[-2]
                durations = durations[:-1]
                phones = phones[:-1]
            sentences[utt_id][0] = phones
            sentences[utt_id][1] = durations
            start, end = librosa.time_to_samples([start, end], sr=config.fs)
            wav = wav[start:end]
        # extract mel feats
        logmel = mel_extractor.get_log_mel_fbank(wav)
        # change duration according to mel_length
        compare_duration_and_mel_length(sentences, utt_id, logmel)
        # utt_id may be popped in compare_duration_and_mel_length
        if utt_id not in sentences:
            return None
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        num_frames = logmel.shape[0]
        assert sum(durations) == num_frames
        mel_dir = output_dir / "data_speech"
        mel_dir.mkdir(parents=True, exist_ok=True)
        mel_path = mel_dir / (utt_id + "_speech.npy")
        np.save(mel_path, logmel)
        record = {
            "utt_id": utt_id,
            "phones": phones,
            "text_lengths": len(phones),
            "speech_lengths": num_frames,
            "speech": str(mel_path),
            "speaker": speaker
        }
        if spk_emb_dir:
            if speaker in os.listdir(spk_emb_dir):
                embed_name = utt_id + ".npy"
                embed_path = spk_emb_dir / speaker / embed_name
                if embed_path.is_file():
                    record["spk_emb"] = str(embed_path)
                else:
                    return None
    return record


def process_sentences(config,
                      fps: List[Path],
                      sentences: Dict,
                      output_dir: Path,
                      mel_extractor=None,
                      nprocs: int=1,
                      cut_sil: bool=True,
                      spk_emb_dir: Path=None):
    if nprocs == 1:
        results = []
        for fp in tqdm.tqdm(fps, total=len(fps)):
            record = process_sentence(
                config=config,
                fp=fp,
                sentences=sentences,
                output_dir=output_dir,
                mel_extractor=mel_extractor,
                cut_sil=cut_sil,
                spk_emb_dir=spk_emb_dir)
            if record:
                results.append(record)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            futures = []
            with tqdm.tqdm(total=len(fps)) as progress:
                for fp in fps:
                    future = pool.submit(process_sentence, config, fp,
                                         sentences, output_dir, mel_extractor,
                                         cut_sil, spk_emb_dir)
                    future.add_done_callback(lambda p: progress.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    record = ft.result()
                    if record:
                        results.append(record)

    results.sort(key=itemgetter("utt_id"))
    with jsonlines.open(output_dir / "metadata.jsonl", 'w') as writer:
        for item in results:
            writer.write(item)
    print("Done")


def main():
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--dataset",
        default="baker",
        type=str,
        help="name of dataset, should in {baker, aishell3, ljspeech, vctk} now")

    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump feature files.")
    parser.add_argument(
        "--dur-file", default=None, type=str, help="path to durations.txt.")

    parser.add_argument("--config", type=str, help="fastspeech2 config file.")

    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")

    parser.add_argument(
        "--cut-sil",
        type=str2bool,
        default=True,
        help="whether cut sil in the edge of audio")

    parser.add_argument(
        "--spk_emb_dir",
        default=None,
        type=str,
        help="directory to speaker embedding files.")
    args = parser.parse_args()

    rootdir = Path(args.rootdir).expanduser()
    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)
    dur_file = Path(args.dur_file).expanduser()

    if args.spk_emb_dir:
        spk_emb_dir = Path(args.spk_emb_dir).expanduser().resolve()
    else:
        spk_emb_dir = None

    assert rootdir.is_dir()
    assert dur_file.is_file()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    sentences, speaker_set = get_phn_dur(dur_file)

    merge_silence(sentences)
    phone_id_map_path = dumpdir / "phone_id_map.txt"
    speaker_id_map_path = dumpdir / "speaker_id_map.txt"
    get_input_token(sentences, phone_id_map_path, args.dataset)
    get_spk_id_map(speaker_set, speaker_id_map_path)

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
        # split data into 3 sections
        num_train = 9800
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "aishell3":
        sub_num_dev = 5
        wav_dir = rootdir / "train" / "wav"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*.wav")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    elif args.dataset == "ljspeech":
        wav_files = sorted(list((rootdir / "wavs").rglob("*.wav")))
        # split data into 3 sections
        num_train = 12900
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "vctk":
        sub_num_dev = 5
        wav_dir = rootdir / "wav48_silence_trimmed"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*_mic2.flac")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    else:
        print("dataset should in {baker, aishell3, ljspeech, vctk} now!")

    train_dump_dir = dumpdir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dumpdir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dumpdir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    # Extractor
    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax)

    # process for the 3 sections
    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            sentences=sentences,
            output_dir=train_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir)
    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            sentences=sentences,
            output_dir=dev_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir)
    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            sentences=sentences,
            output_dir=test_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/tacotron2/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import tacotron2_multi_spk_batch_fn
from paddlespeech.t2s.datasets.am_batch_fn import tacotron2_single_spk_batch_fn
from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.models.tacotron2 import Tacotron2
from paddlespeech.t2s.models.tacotron2 import Tacotron2Evaluator
from paddlespeech.t2s.models.tacotron2 import Tacotron2Updater
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.optimizer import build_optimizers
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer
from paddlespeech.t2s.utils import str2bool


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
    world_size = paddle.distributed.get_world_size()
    if world_size > 1:
        paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    fields = [
        "text",
        "text_lengths",
        "speech",
        "speech_lengths",
    ]

    converters = {
        "speech": np.load,
    }
    if args.voice_cloning:
        print("Training voice cloning!")
        collate_fn = tacotron2_multi_spk_batch_fn
        fields += ["spk_emb"]
        converters["spk_emb"] = np.load
    else:
        print("single speaker tacotron2!")
        collate_fn = tacotron2_single_spk_batch_fn

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=fields,
        converters=converters, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=fields,
        converters=converters, )

    # collate function and dataloader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)

    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        shuffle=False,
        drop_last=False,
        batch_size=config.batch_size,
        collate_fn=collate_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    with open(args.phones_dict, 'rt', encoding='utf-8') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_mels
    model = Tacotron2(idim=vocab_size, odim=odim, **config["model"])
    if world_size > 1:
        model = DataParallel(model)
    print("model done!")

    optimizer = build_optimizers(model, **config["optimizer"])
    print("optimizer done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = Tacotron2Updater(
        model=model,
        optimizer=optimizer,
        dataloader=train_dataloader,
        output_dir=output_dir,
        **config["updater"])

    trainer = Trainer(updater, (config.max_epoch, 'epoch'), output_dir)

    evaluator = Tacotron2Evaluator(
        model, dev_dataloader, output_dir=output_dir, **config["updater"])

    if dist.get_rank() == 0:
        trainer.extend(evaluator, trigger=(1, "epoch"))
        trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots), trigger=(1, 'epoch'))
    trainer.run()


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(description="Train a Tacotron2 model.")
    parser.add_argument("--config", type=str, help="tacotron2 config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")

    parser.add_argument(
        "--voice-cloning",
        type=str2bool,
        default=False,
        help="whether training voice cloning model.")

    args = parser.parse_args()

    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/transformer_tts/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/transformer_tts/normalize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize feature files and dump them."""
import argparse
import logging
from operator import itemgetter
from pathlib import Path

import jsonlines
import numpy as np
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

from paddlespeech.t2s.datasets.data_table import DataTable


def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
    )
    parser.add_argument(
        "--metadata",
        type=str,
        required=True,
        help="directory including feature files to be normalized. "
        "you need to specify either *-scp or rootdir.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump normalized feature files.")
    parser.add_argument(
        "--speech-stats",
        type=str,
        required=True,
        help="speech statistics file.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")

    args = parser.parse_args()

    # check directory existence
    dumpdir = Path(args.dumpdir).resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    # get dataset
    with jsonlines.open(args.metadata, 'r') as reader:
        metadata = list(reader)
    dataset = DataTable(
        metadata, converters={
            "speech": np.load,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    # restore scaler
    speech_scaler = StandardScaler()
    speech_scaler.mean_ = np.load(args.speech_stats)[0]
    speech_scaler.scale_ = np.load(args.speech_stats)[1]
    speech_scaler.n_features_in_ = speech_scaler.mean_.shape[0]

    vocab_phones = {}
    with open(args.phones_dict, 'rt') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    for phn, id in phn_id:
        vocab_phones[phn] = int(id)

    vocab_speaker = {}
    with open(args.speaker_dict, 'rt') as f:
        spk_id = [line.strip().split() for line in f.readlines()]
    for spk, id in spk_id:
        vocab_speaker[spk] = int(id)

    # process each file
    output_metadata = []

    for item in tqdm(dataset):
        utt_id = item['utt_id']
        speech = item['speech']
        # normalize
        speech = speech_scaler.transform(speech)
        speech_dir = dumpdir / "data_speech"
        speech_dir.mkdir(parents=True, exist_ok=True)
        speech_path = speech_dir / f"{utt_id}_speech.npy"
        np.save(speech_path, speech.astype(np.float32), allow_pickle=False)

        phone_ids = [vocab_phones[p] for p in item['phones']]
        spk_id = vocab_speaker[item["speaker"]]
        record = {
            "utt_id": item['utt_id'],
            "spk_id": spk_id,
            "text": phone_ids,
            "text_lengths": item['text_lengths'],
            "speech_lengths": item['speech_lengths'],
            "speech": str(speech_path),
        }
        # add spk_emb for voice cloning
        if "spk_emb" in item:
            record["spk_emb"] = str(item["spk_emb"])
        output_metadata.append(record)
    output_metadata.sort(key=itemgetter('utt_id'))
    output_metadata_path = Path(args.dumpdir) / "metadata.jsonl"
    with jsonlines.open(output_metadata_path, 'w') as writer:
        for item in output_metadata:
            writer.write(item)
    logging.info(f"metadata dumped into {output_metadata_path}")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/transformer_tts/preprocess.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from concurrent.futures import ThreadPoolExecutor
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines
import librosa
import numpy as np
import tqdm
import yaml
from yacs.config import CfgNode as Configuration

from paddlespeech.t2s.datasets.get_feats import LogMelFBank
from paddlespeech.t2s.frontend.en_frontend import English


def get_lj_sentences(file_name, frontend):
    '''read MFA duration.txt

    Args:
        file_name (str or Path)
    Returns:
        Dict: sentence: {'utt': ([char], [int])}
    '''
    f = open(file_name, 'r')
    sentence = {}
    speaker_set = set()
    for line in f:
        line_list = line.strip().split('|')
        utt = line_list[0]
        speaker = utt.split("-")[0][:2]
        speaker_set.add(speaker)
        raw_text = line_list[-1]
        phonemes = frontend.phoneticize(raw_text)
        phonemes = phonemes[1:-1]
        phonemes = [phn for phn in phonemes if not phn.isspace()]
        sentence[utt] = (phonemes, speaker)
    f.close()
    return sentence, speaker_set


def get_input_token(sentence, output_path):
    '''get phone set from training data and save it
    
    Args:
        sentence (Dict): sentence: {'utt': ([char], str)}
        output_path (str or path): path to save phone_id_map
    '''
    phn_token = set()
    for utt in sentence:
        for phn in sentence[utt][0]:
            if phn != "<eos>":
                phn_token.add(phn)
    phn_token = list(phn_token)
    phn_token.sort()
    phn_token = ["<pad>", "<unk>"] + phn_token
    phn_token += ["<eos>"]

    with open(output_path, 'w') as f:
        for i, phn in enumerate(phn_token):
            f.write(phn + ' ' + str(i) + '\n')


def get_spk_id_map(speaker_set, output_path):
    speakers = sorted(list(speaker_set))
    with open(output_path, 'w') as f:
        for i, spk in enumerate(speakers):
            f.write(spk + ' ' + str(i) + '\n')


def process_sentence(config: Dict[str, Any],
                     fp: Path,
                     sentences: Dict,
                     output_dir: Path,
                     mel_extractor=None):
    utt_id = fp.stem
    record = None
    if utt_id in sentences:
        # reading, resampling may occur
        wav, _ = librosa.load(str(fp), sr=config.fs)
        if len(wav.shape) != 1 or np.abs(wav).max() > 1.0:
            return record
        assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
        assert np.abs(wav).max(
        ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
        phones = sentences[utt_id][0]
        speaker = sentences[utt_id][1]
        logmel = mel_extractor.get_log_mel_fbank(wav, base='e')
        # change duration according to mel_length
        num_frames = logmel.shape[0]
        mel_dir = output_dir / "data_speech"
        mel_dir.mkdir(parents=True, exist_ok=True)
        mel_path = mel_dir / (utt_id + "_speech.npy")
        np.save(mel_path, logmel)
        record = {
            "utt_id": utt_id,
            "phones": phones,
            "text_lengths": len(phones),
            "speech_lengths": num_frames,
            "speech": str(mel_path),
            "speaker": speaker
        }
    return record


def process_sentences(config,
                      fps: List[Path],
                      sentences: Dict,
                      output_dir: Path,
                      mel_extractor=None,
                      nprocs: int=1):

    if nprocs == 1:
        results = []
        for fp in tqdm.tqdm(fps, total=len(fps)):
            record = process_sentence(
                config=config,
                fp=fp,
                sentences=sentences,
                output_dir=output_dir,
                mel_extractor=mel_extractor)
            if record:
                results.append(record)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            futures = []
            with tqdm.tqdm(total=len(fps)) as progress:
                for fp in fps:
                    future = pool.submit(process_sentence, config, fp,
                                         sentences, output_dir, mel_extractor)
                    future.add_done_callback(lambda p: progress.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    record = ft.result()
                    if record:
                        results.append(record)

    results.sort(key=itemgetter("utt_id"))
    with jsonlines.open(output_dir / "metadata.jsonl", 'w') as writer:
        for item in results:
            writer.write(item)
    print("Done")


def main():
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--dataset",
        default="ljspeech",
        type=str,
        help="name of dataset, should in {ljspeech} now")

    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump feature files.")

    parser.add_argument(
        "--config-path",
        default="conf/default.yaml",
        type=str,
        help="yaml format configuration file.")

    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")

    args = parser.parse_args()

    config_path = Path(args.config_path).resolve()
    root_dir = Path(args.rootdir).expanduser()
    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    assert root_dir.is_dir()

    with open(config_path, 'rt') as f:
        _C = yaml.safe_load(f)
        _C = Configuration(_C)
        config = _C.clone()

    phone_id_map_path = dumpdir / "phone_id_map.txt"
    speaker_id_map_path = dumpdir / "speaker_id_map.txt"

    if args.dataset == "ljspeech":
        wav_files = sorted(list((root_dir / "wavs").rglob("*.wav")))
        frontend = English()
        sentences, speaker_set = get_lj_sentences(root_dir / "metadata.csv",
                                                  frontend)
        get_input_token(sentences, phone_id_map_path)
        get_spk_id_map(speaker_set, speaker_id_map_path)
        # split data into 3 sections
        num_train = 12900
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]

    train_dump_dir = dumpdir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dumpdir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dumpdir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    # Extractor
    mel_extractor = LogMelFBank(
        sr=config.fs,
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window,
        n_mels=config.n_mels,
        fmin=config.fmin,
        fmax=config.fmax)

    # process for the 3 sections
    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            sentences=sentences,
            output_dir=train_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu)
    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            sentences=sentences,
            output_dir=dev_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu)
    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            sentences=sentences,
            output_dir=test_dump_dir,
            mel_extractor=mel_extractor,
            nprocs=args.num_cpu)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/transformer_tts/synthesize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.models.transformer_tts import TransformerTTS
from paddlespeech.t2s.models.transformer_tts import TransformerTTSInference
from paddlespeech.t2s.models.waveflow import ConditionalWaveFlow
from paddlespeech.t2s.modules.normalizer import ZScore
from paddlespeech.t2s.utils import layer_tools


def evaluate(args, acoustic_model_config, vocoder_config):
    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for evaluation
    with jsonlines.open(args.test_metadata, 'r') as reader:
        test_metadata = list(reader)
    test_dataset = DataTable(data=test_metadata, fields=["utt_id", "text"])

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)
    odim = acoustic_model_config.n_mels
    model = TransformerTTS(
        idim=vocab_size, odim=odim, **acoustic_model_config["model"])

    model.set_state_dict(
        paddle.load(args.transformer_tts_checkpoint)["main_params"])
    model.eval()
    # remove ".pdparams" in waveflow_checkpoint
    vocoder_checkpoint_path = args.waveflow_checkpoint[:-9] if args.waveflow_checkpoint.endswith(
        ".pdparams") else args.waveflow_checkpoint
    vocoder = ConditionalWaveFlow.from_pretrained(vocoder_config,
                                                  vocoder_checkpoint_path)
    layer_tools.recursively_remove_weight_norm(vocoder)
    vocoder.eval()
    print("model done!")

    stat = np.load(args.transformer_tts_stat)
    mu, std = stat
    mu = paddle.to_tensor(mu)
    std = paddle.to_tensor(std)
    transformer_tts_normalizer = ZScore(mu, std)

    transformer_tts_inference = TransformerTTSInference(
        transformer_tts_normalizer, model)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    for datum in test_dataset:
        utt_id = datum["utt_id"]
        text = paddle.to_tensor(datum["text"])

        with paddle.no_grad():
            mel = transformer_tts_inference(text)
            # mel shape is (T, feats) and waveflow's input shape is (batch, feats, T)
            mel = mel.unsqueeze(0).transpose([0, 2, 1])
            # wavflow's output shape is (B, T)
            wav = vocoder.infer(mel)[0]

        sf.write(
            str(output_dir / (utt_id + ".wav")),
            wav.numpy(),
            samplerate=acoustic_model_config.fs)
        print(f"{utt_id} done!")


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(
        description="Synthesize with transformer tts & waveflow.")
    parser.add_argument(
        "--transformer-tts-config",
        type=str,
        help="transformer tts config file.")
    parser.add_argument(
        "--transformer-tts-checkpoint",
        type=str,
        help="transformer tts checkpoint to load.")
    parser.add_argument(
        "--transformer-tts-stat",
        type=str,
        help="mean and standard deviation used to normalize spectrogram when training transformer tts."
    )
    parser.add_argument(
        "--waveflow-config", type=str, help="waveflow config file.")
    # not normalize when training waveflow
    parser.add_argument(
        "--waveflow-checkpoint", type=str, help="waveflow checkpoint to load.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")

    parser.add_argument("--test-metadata", type=str, help="test metadata.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    args = parser.parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    with open(args.transformer_tts_config) as f:
        transformer_tts_config = CfgNode(yaml.safe_load(f))
    with open(args.waveflow_config) as f:
        waveflow_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(transformer_tts_config)
    print(waveflow_config)

    evaluate(args, transformer_tts_config, waveflow_config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/transformer_tts/synthesize_e2e.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
from pathlib import Path

import numpy as np
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.frontend.en_frontend import English
from paddlespeech.t2s.models.transformer_tts import TransformerTTS
from paddlespeech.t2s.models.transformer_tts import TransformerTTSInference
from paddlespeech.t2s.models.waveflow import ConditionalWaveFlow
from paddlespeech.t2s.modules.normalizer import ZScore
from paddlespeech.t2s.utils import layer_tools


def evaluate(args, acoustic_model_config, vocoder_config):
    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for evaluation
    sentences = []
    with open(args.text, 'rt') as f:
        for line in f:
            line_list = line.strip().split()
            utt_id = line_list[0]
            sentence = " ".join(line_list[1:])
            sentences.append((utt_id, sentence))

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]

    vocab_size = len(phn_id)
    phone_id_map = {}
    for phn, id in phn_id:
        phone_id_map[phn] = int(id)
    print("vocab_size:", vocab_size)
    odim = acoustic_model_config.n_mels
    model = TransformerTTS(
        idim=vocab_size, odim=odim, **acoustic_model_config["model"])

    model.set_state_dict(
        paddle.load(args.transformer_tts_checkpoint)["main_params"])
    model.eval()

    # remove ".pdparams" in waveflow_checkpoint
    vocoder_checkpoint_path = args.waveflow_checkpoint[:-9] if args.waveflow_checkpoint.endswith(
        ".pdparams") else args.waveflow_checkpoint
    vocoder = ConditionalWaveFlow.from_pretrained(vocoder_config,
                                                  vocoder_checkpoint_path)
    layer_tools.recursively_remove_weight_norm(vocoder)
    vocoder.eval()
    print("model done!")

    frontend = English()
    print("frontend done!")

    stat = np.load(args.transformer_tts_stat)
    mu, std = stat
    mu = paddle.to_tensor(mu)
    std = paddle.to_tensor(std)
    transformer_tts_normalizer = ZScore(mu, std)

    transformer_tts_inference = TransformerTTSInference(
        transformer_tts_normalizer, model)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    for utt_id, sentence in sentences:
        phones = frontend.phoneticize(sentence)
        # remove start_symbol and end_symbol
        phones = phones[1:-1]
        phones = [phn for phn in phones if not phn.isspace()]
        phones = [phn if phn in phone_id_map else "," for phn in phones]
        phone_ids = [phone_id_map[phn] for phn in phones]
        with paddle.no_grad():
            mel = transformer_tts_inference(paddle.to_tensor(phone_ids))
            # mel shape is (T, feats) and waveflow's input shape is (batch, feats, T)
            mel = mel.unsqueeze(0).transpose([0, 2, 1])
            # wavflow's output shape is (B, T)
            wav = vocoder.infer(mel)[0]

        sf.write(
            str(output_dir / (utt_id + ".wav")),
            wav.numpy(),
            samplerate=acoustic_model_config.fs)
        print(f"{utt_id} done!")


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(
        description="Synthesize with transformer tts & waveflow.")
    parser.add_argument(
        "--transformer-tts-config",
        type=str,
        help="transformer tts config file.")
    parser.add_argument(
        "--transformer-tts-checkpoint",
        type=str,
        help="transformer tts checkpoint to load.")
    parser.add_argument(
        "--transformer-tts-stat",
        type=str,
        help="mean and standard deviation used to normalize spectrogram when training transformer tts."
    )
    parser.add_argument(
        "--waveflow-config", type=str, help="waveflow config file.")
    # not normalize when training waveflow
    parser.add_argument(
        "--waveflow-checkpoint", type=str, help="waveflow checkpoint to load.")
    parser.add_argument(
        "--phones-dict",
        type=str,
        default="phone_id_map.txt",
        help="phone vocabulary file.")
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    args = parser.parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    with open(args.transformer_tts_config) as f:
        transformer_tts_config = CfgNode(yaml.safe_load(f))
    with open(args.waveflow_config) as f:
        waveflow_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(transformer_tts_config)
    print(waveflow_config)

    evaluate(args, transformer_tts_config, waveflow_config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/transformer_tts/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import transformer_single_spk_batch_fn
from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.models.transformer_tts import TransformerTTS
from paddlespeech.t2s.models.transformer_tts import TransformerTTSEvaluator
from paddlespeech.t2s.models.transformer_tts import TransformerTTSUpdater
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.optimizer import build_optimizers
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    if paddle.is_compiled_with_cuda() and args.ngpu > 0:
        paddle.set_device("gpu")
    elif paddle.is_compiled_with_npu() and args.ngpu > 0:
        paddle.set_device("npu")
    else:
        paddle.set_device("cpu")
    world_size = paddle.distributed.get_world_size()
    if world_size > 1:
        paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=[
            "text",
            "text_lengths",
            "speech",
            "speech_lengths",
        ],
        converters={
            "speech": np.load,
        }, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=[
            "text",
            "text_lengths",
            "speech",
            "speech_lengths",
        ],
        converters={
            "speech": np.load,
        }, )

    # collate function and dataloader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)

    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=transformer_single_spk_batch_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        shuffle=False,
        drop_last=False,
        batch_size=config.batch_size,
        collate_fn=transformer_single_spk_batch_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    with open(args.phones_dict, 'rt', encoding='utf-8') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_mels
    model = TransformerTTS(idim=vocab_size, odim=odim, **config["model"])
    if world_size > 1:
        model = DataParallel(model)
    print("model done!")

    optimizer = build_optimizers(model, **config["optimizer"])
    print("optimizer done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = TransformerTTSUpdater(
        model=model,
        optimizer=optimizer,
        dataloader=train_dataloader,
        output_dir=output_dir,
        **config["updater"])

    trainer = Trainer(updater, (config.max_epoch, 'epoch'), output_dir)

    evaluator = TransformerTTSEvaluator(
        model, dev_dataloader, output_dir=output_dir, **config["updater"])

    if dist.get_rank() == 0:
        trainer.extend(evaluator, trigger=(1, "epoch"))
        trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots), trigger=(1, 'epoch'))
    trainer.run()


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(description="Train a TransformerTTS "
                                     "model with LJSpeech TTS dataset.")
    parser.add_argument(
        "--config", type=str, help="TransformerTTS config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")

    args = parser.parse_args()

    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/vits/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/vits/inference.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import paddle
import soundfile as sf
from timer import timer

from paddlespeech.t2s.exps.syn_utils import get_am_output
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_predictor
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.utils import str2bool


def parse_args():
    parser = argparse.ArgumentParser(
        description="Paddle Infernce with acoustic model & vocoder.")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='vits_csmsc',
        choices=['vits_csmsc', 'vits_aishell3'],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en or mix')
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument(
        "--add-blank",
        type=str2bool,
        default=True,
        help="whether to add blank between phones")
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument("--output_dir", type=str, help="output dir")
    # inference
    parser.add_argument(
        "--use_trt",
        type=str2bool,
        default=False,
        help="whether to use TensorRT or not in GPU", )
    parser.add_argument(
        "--use_mkldnn",
        type=str2bool,
        default=False,
        help="whether to use MKLDNN or not in CPU.", )
    parser.add_argument(
        "--precision",
        type=str,
        default='fp32',
        choices=['fp32', 'fp16', 'bf16', 'int8'],
        help="mode of running")
    parser.add_argument(
        "--device",
        default="gpu",
        choices=["gpu", "cpu"],
        help="Device selected for inference.", )
    parser.add_argument('--cpu_threads', type=int, default=1)

    args, _ = parser.parse_known_args()
    return args


# only inference for models trained with csmsc now
def main():
    args = parse_args()

    paddle.set_device(args.device)

    # frontend
    frontend = get_frontend(lang=args.lang, phones_dict=args.phones_dict)

    # am_predictor
    am_predictor = get_predictor(
        model_dir=args.inference_dir,
        model_file=args.am + ".pdmodel",
        params_file=args.am + ".pdiparams",
        device=args.device,
        use_trt=args.use_trt,
        use_mkldnn=args.use_mkldnn,
        cpu_threads=args.cpu_threads,
        precision=args.precision)
    # model: {model_name}_{dataset}
    am_dataset = args.am[args.am.rindex('_') + 1:]

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    sentences = get_sentences(text_file=args.text, lang=args.lang)

    merge_sentences = True
    add_blank = args.add_blank
    # vits's fs is 22050
    fs = 22050
    # warmup
    for utt_id, sentence in sentences[:3]:
        with timer() as t:
            wav = get_am_output(
                input=sentence,
                am_predictor=am_predictor,
                am=args.am,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences,
                speaker_dict=args.speaker_dict,
                spk_id=args.spk_id,
                add_blank=add_blank)
        speed = wav.size / t.elapse
        rtf = fs / speed
        print(
            f"{utt_id}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

    print("warm up done!")

    N = 0
    T = 0
    for utt_id, sentence in sentences:
        with timer() as t:
            wav = get_am_output(
                input=sentence,
                am_predictor=am_predictor,
                am=args.am,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences,
                speaker_dict=args.speaker_dict,
                spk_id=args.spk_id,
                add_blank=add_blank)

        N += wav.size
        T += t.elapse
        speed = wav.size / t.elapse
        rtf = fs / speed
        sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=fs)
        print(
            f"{utt_id}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/vits/lite_predict.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import soundfile as sf
from timer import timer

from paddlespeech.t2s.exps.lite_syn_utils import get_lite_am_output
from paddlespeech.t2s.exps.lite_syn_utils import get_lite_predictor
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.utils import str2bool


def parse_args():
    parser = argparse.ArgumentParser(
        description="Paddle Infernce with acoustic model & vocoder.")
    # acoustic model
    parser.add_argument(
        '--am',
        type=str,
        default='vits_csmsc',
        choices=[
            'vits_csmsc',
            'vits_aishell3',
        ],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en or mix')
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line")
    parser.add_argument(
        "--add-blank",
        type=str2bool,
        default=True,
        help="whether to add blank between phones")
    parser.add_argument(
        "--inference_dir", type=str, help="dir to save inference models")
    parser.add_argument("--output_dir", type=str, help="output dir")

    args, _ = parser.parse_known_args()
    return args


# only inference for models trained with csmsc now
def main():
    args = parse_args()

    # frontend
    frontend = get_frontend(
        lang=args.lang,
        phones_dict=args.phones_dict)

    # am_predictor
    # vits can only run in arm
    am_predictor = get_lite_predictor(
        model_dir=args.inference_dir, model_file=args.am + "_arm.nb")
    # model: {model_name}_{dataset}
    am_dataset = args.am[args.am.rindex('_') + 1:]

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    sentences = get_sentences(text_file=args.text, lang=args.lang)

    merge_sentences = True
    add_blank = args.add_blank
    fs = 22050
    # warmup
    for utt_id, sentence in sentences[:3]:
        with timer() as t:
            wav = get_lite_am_output(
                input=sentence,
                am_predictor=am_predictor,
                am=args.am,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences,
                speaker_dict=args.speaker_dict,
                spk_id=args.spk_id,
                add_blank=add_blank)

        speed = wav.size / t.elapse
        rtf = fs / speed
        print(
            f"{utt_id}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

    print("warm up done!")

    N = 0
    T = 0
    for utt_id, sentence in sentences:
        with timer() as t:
            wav = get_lite_am_output(
                input=sentence,
                am_predictor=am_predictor,
                am=args.am,
                frontend=frontend,
                lang=args.lang,
                merge_sentences=merge_sentences,
                speaker_dict=args.speaker_dict,
                spk_id=args.spk_id,
                add_blank=add_blank)

        N += wav.size
        T += t.elapse
        speed = wav.size / t.elapse
        rtf = fs / speed

        sf.write(output_dir / (utt_id + ".wav"), wav, samplerate=fs)
        print(
            f"{utt_id}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )

        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {fs / (N / T) }")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/vits/normalize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Normalize feature files and dump them."""
import argparse
import logging
from operator import itemgetter
from pathlib import Path
from typing import List

import jsonlines
import numpy as np
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.utils import str2bool

INITIALS = [
    'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh',
    'r', 'z', 'c', 's', 'j', 'q', 'x'
]
INITIALS += ['y', 'w', 'sp', 'spl', 'spn', 'sil']


def intersperse(lst, item):
    result = [item] * (len(lst) * 2 + 1)
    result[1::2] = lst
    return result


def insert_after_character(lst, item):
    result = [item]
    for phone in lst:
        result.append(phone)
        if phone not in INITIALS:
            # finals has tones
            assert phone[-1] in "12345"
            result.append(item)
    return result


def add_blank(phones: List[str],
              filed: str="character",
              blank_token: str="<pad>"):
    if filed == "phone":
        """
        add blank after phones
        input: ["n", "i3", "h", "ao3", "m", "a5"]
        output: ["n", "<pad>", "i3", "<pad>", "h", "<pad>", "ao3", "<pad>", "m", "<pad>", "a5"]
        """
        phones = intersperse(phones, blank_token)
    elif filed == "character":
        """
        add blank after characters
        input: ["n", "i3", "h", "ao3"]
        output: ["n", "i3", "<pad>", "h", "ao3", "<pad>", "m", "a5"]
        """
        phones = insert_after_character(phones, blank_token)
    return phones


def main():
    """Run preprocessing process."""
    parser = argparse.ArgumentParser(
        description="Normalize dumped raw features (See detail in parallel_wavegan/bin/normalize.py)."
    )
    parser.add_argument(
        "--metadata",
        type=str,
        required=True,
        help="directory including feature files to be normalized. "
        "you need to specify either *-scp or rootdir.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump normalized feature files.")
    parser.add_argument(
        "--feats-stats",
        type=str,
        required=True,
        help="speech statistics file.")
    parser.add_argument(
        "--skip-wav-copy",
        default=False,
        action="store_true",
        help="whether to skip the copy of wav files.")

    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        "--add-blank",
        type=str2bool,
        default=True,
        help="whether to add blank between phones")

    args = parser.parse_args()

    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)

    # get dataset
    with jsonlines.open(args.metadata, 'r') as reader:
        metadata = list(reader)
    dataset = DataTable(
        metadata,
        converters={
            "feats": np.load,
            "wave": None if args.skip_wav_copy else np.load,
        })
    logging.info(f"The number of files = {len(dataset)}.")

    # restore scaler
    feats_scaler = StandardScaler()
    feats_scaler.mean_ = np.load(args.feats_stats)[0]
    feats_scaler.scale_ = np.load(args.feats_stats)[1]
    feats_scaler.n_features_in_ = feats_scaler.mean_.shape[0]

    vocab_phones = {}
    with open(args.phones_dict, 'rt') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    for phn, id in phn_id:
        vocab_phones[phn] = int(id)

    vocab_speaker = {}
    with open(args.speaker_dict, 'rt') as f:
        spk_id = [line.strip().split() for line in f.readlines()]
    for spk, id in spk_id:
        vocab_speaker[spk] = int(id)

    # process each file
    output_metadata = []

    for item in tqdm(dataset):
        utt_id = item['utt_id']
        feats = item['feats']
        wave = item['wave']

        # normalize
        feats = feats_scaler.transform(feats)
        feats_path = dumpdir / f"{utt_id}_feats.npy"
        np.save(feats_path, feats.astype(np.float32), allow_pickle=False)

        if not args.skip_wav_copy:
            wav_path = dumpdir / f"{utt_id}_wave.npy"
            np.save(wav_path, wave.astype(np.float32), allow_pickle=False)
        else:
            wav_path = wave

        phones = item['phones']
        text_lengths = item['text_lengths']
        if args.add_blank:
            phones = add_blank(phones, filed="character")
            text_lengths = len(phones)

        phone_ids = [vocab_phones[p] for p in phones]
        spk_id = vocab_speaker[item["speaker"]]

        record = {
            "utt_id": item['utt_id'],
            "text": phone_ids,
            "text_lengths": text_lengths,
            'feats': str(feats_path),
            "feats_lengths": item['feats_lengths'],
            "wave": str(wav_path),
            "spk_id": spk_id,
        }

        # add spk_emb for voice cloning
        if "spk_emb" in item:
            record["spk_emb"] = str(item["spk_emb"])

        output_metadata.append(record)
    output_metadata.sort(key=itemgetter('feats_lengths'), reverse=True)
    output_metadata_path = Path(args.dumpdir) / "metadata.jsonl"
    with jsonlines.open(output_metadata_path, 'w') as writer:
        for item in output_metadata:
            writer.write(item)
    logging.info(f"metadata dumped into {output_metadata_path}")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/vits/preprocess.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from concurrent.futures import ThreadPoolExecutor
from operator import itemgetter
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines
import librosa
import numpy as np
import tqdm
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import LinearSpectrogram
from paddlespeech.t2s.datasets.preprocess_utils import compare_duration_and_mel_length
from paddlespeech.t2s.datasets.preprocess_utils import get_input_token
from paddlespeech.t2s.datasets.preprocess_utils import get_phn_dur
from paddlespeech.t2s.datasets.preprocess_utils import get_spk_id_map
from paddlespeech.t2s.datasets.preprocess_utils import merge_silence
from paddlespeech.t2s.utils import str2bool


def process_sentence(config: Dict[str, Any],
                     fp: Path,
                     sentences: Dict,
                     output_dir: Path,
                     spec_extractor=None,
                     cut_sil: bool=True,
                     spk_emb_dir: Path=None):
    utt_id = fp.stem
    # for vctk
    if utt_id.endswith("_mic2"):
        utt_id = utt_id[:-5]
    record = None
    if utt_id in sentences:
        # reading, resampling may occur
        wav, _ = librosa.load(str(fp), sr=config.fs)
        if len(wav.shape) != 1:
            return record
        max_value = np.abs(wav).max()
        if max_value > 1.0:
            wav = wav / max_value
        assert len(wav.shape) == 1, f"{utt_id} is not a mono-channel audio."
        assert np.abs(wav).max(
        ) <= 1.0, f"{utt_id} is seems to be different that 16 bit PCM."
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        speaker = sentences[utt_id][2]
        d_cumsum = np.pad(np.array(durations).cumsum(0), (1, 0), 'constant')
        # little imprecise than use *.TextGrid directly
        times = librosa.frames_to_time(
            d_cumsum, sr=config.fs, hop_length=config.n_shift)
        if cut_sil:
            start = 0
            end = d_cumsum[-1]
            if phones[0] == "sil" and len(durations) > 1:
                start = times[1]
                durations = durations[1:]
                phones = phones[1:]
            if phones[-1] == 'sil' and len(durations) > 1:
                end = times[-2]
                durations = durations[:-1]
                phones = phones[:-1]
            sentences[utt_id][0] = phones
            sentences[utt_id][1] = durations
            start, end = librosa.time_to_samples([start, end], sr=config.fs)
            wav = wav[start:end]
        # extract mel feats
        spec = spec_extractor.get_linear_spectrogram(wav)
        # change duration according to mel_length
        compare_duration_and_mel_length(sentences, utt_id, spec)
        # utt_id may be popped in compare_duration_and_mel_length
        if utt_id not in sentences:
            return None
        phones = sentences[utt_id][0]
        durations = sentences[utt_id][1]
        num_frames = spec.shape[0]
        assert sum(durations) == num_frames

        if wav.size < num_frames * config.n_shift:
            wav = np.pad(
                wav, (0, num_frames * config.n_shift - wav.size),
                mode="reflect")
        else:
            wav = wav[:num_frames * config.n_shift]
        num_samples = wav.shape[0]

        spec_path = output_dir / (utt_id + "_feats.npy")
        wav_path = output_dir / (utt_id + "_wave.npy")
        # (num_samples, )
        np.save(wav_path, wav)
        # (num_frames, aux_channels)
        np.save(spec_path, spec)

        record = {
            "utt_id": utt_id,
            "phones": phones,
            "text_lengths": len(phones),
            "feats": str(spec_path),
            "feats_lengths": num_frames,
            "wave": str(wav_path),
            "speaker": speaker
        }
        if spk_emb_dir:
            if speaker in os.listdir(spk_emb_dir):
                embed_name = utt_id + ".npy"
                embed_path = spk_emb_dir / speaker / embed_name
                if embed_path.is_file():
                    record["spk_emb"] = str(embed_path)
                else:
                    return None
    return record


def process_sentences(config,
                      fps: List[Path],
                      sentences: Dict,
                      output_dir: Path,
                      spec_extractor=None,
                      nprocs: int=1,
                      cut_sil: bool=True,
                      spk_emb_dir: Path=None):
    if nprocs == 1:
        results = []
        for fp in tqdm.tqdm(fps, total=len(fps)):
            record = process_sentence(
                config=config,
                fp=fp,
                sentences=sentences,
                output_dir=output_dir,
                spec_extractor=spec_extractor,
                cut_sil=cut_sil,
                spk_emb_dir=spk_emb_dir)
            if record:
                results.append(record)
    else:
        with ThreadPoolExecutor(nprocs) as pool:
            futures = []
            with tqdm.tqdm(total=len(fps)) as progress:
                for fp in fps:
                    future = pool.submit(process_sentence, config, fp,
                                         sentences, output_dir, spec_extractor,
                                         cut_sil, spk_emb_dir)
                    future.add_done_callback(lambda p: progress.update())
                    futures.append(future)

                results = []
                for ft in futures:
                    record = ft.result()
                    if record:
                        results.append(record)

    results.sort(key=itemgetter("feats_lengths"), reverse=True)
    with jsonlines.open(output_dir / "metadata.jsonl", 'w') as writer:
        for item in results:
            writer.write(item)
    print("Done")


def main():
    # parse config and args
    parser = argparse.ArgumentParser(
        description="Preprocess audio and then extract features.")

    parser.add_argument(
        "--dataset",
        default="baker",
        type=str,
        help="name of dataset, should in {baker, aishell3, ljspeech, vctk} now")

    parser.add_argument(
        "--rootdir", default=None, type=str, help="directory to dataset.")

    parser.add_argument(
        "--dumpdir",
        type=str,
        required=True,
        help="directory to dump feature files.")
    parser.add_argument(
        "--dur-file", default=None, type=str, help="path to durations.txt.")

    parser.add_argument("--config", type=str, help="fastspeech2 config file.")

    parser.add_argument(
        "--num-cpu", type=int, default=1, help="number of process.")

    parser.add_argument(
        "--cut-sil",
        type=str2bool,
        default=True,
        help="whether cut sil in the edge of audio")

    parser.add_argument(
        "--spk_emb_dir",
        default=None,
        type=str,
        help="directory to speaker embedding files.")
    args = parser.parse_args()

    rootdir = Path(args.rootdir).expanduser()
    dumpdir = Path(args.dumpdir).expanduser()
    # use absolute path
    dumpdir = dumpdir.resolve()
    dumpdir.mkdir(parents=True, exist_ok=True)
    dur_file = Path(args.dur_file).expanduser()

    if args.spk_emb_dir:
        spk_emb_dir = Path(args.spk_emb_dir).expanduser().resolve()
    else:
        spk_emb_dir = None

    assert rootdir.is_dir()
    assert dur_file.is_file()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    sentences, speaker_set = get_phn_dur(dur_file)

    merge_silence(sentences)
    phone_id_map_path = dumpdir / "phone_id_map.txt"
    speaker_id_map_path = dumpdir / "speaker_id_map.txt"
    get_input_token(sentences, phone_id_map_path, args.dataset)
    get_spk_id_map(speaker_set, speaker_id_map_path)

    if args.dataset == "baker":
        wav_files = sorted(list((rootdir / "Wave").rglob("*.wav")))
        # split data into 3 sections
        num_train = 9800
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "aishell3":
        sub_num_dev = 5
        wav_dir = rootdir / "train" / "wav"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*.wav")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    elif args.dataset == "ljspeech":
        wav_files = sorted(list((rootdir / "wavs").rglob("*.wav")))
        # split data into 3 sections
        num_train = 12900
        num_dev = 100
        train_wav_files = wav_files[:num_train]
        dev_wav_files = wav_files[num_train:num_train + num_dev]
        test_wav_files = wav_files[num_train + num_dev:]
    elif args.dataset == "vctk":
        sub_num_dev = 5
        wav_dir = rootdir / "wav48_silence_trimmed"
        train_wav_files = []
        dev_wav_files = []
        test_wav_files = []
        for speaker in os.listdir(wav_dir):
            wav_files = sorted(list((wav_dir / speaker).rglob("*_mic2.flac")))
            if len(wav_files) > 100:
                train_wav_files += wav_files[:-sub_num_dev * 2]
                dev_wav_files += wav_files[-sub_num_dev * 2:-sub_num_dev]
                test_wav_files += wav_files[-sub_num_dev:]
            else:
                train_wav_files += wav_files

    else:
        print("dataset should in {baker, aishell3, ljspeech, vctk} now!")

    train_dump_dir = dumpdir / "train" / "raw"
    train_dump_dir.mkdir(parents=True, exist_ok=True)
    dev_dump_dir = dumpdir / "dev" / "raw"
    dev_dump_dir.mkdir(parents=True, exist_ok=True)
    test_dump_dir = dumpdir / "test" / "raw"
    test_dump_dir.mkdir(parents=True, exist_ok=True)

    # Extractor

    spec_extractor = LinearSpectrogram(
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window)

    # process for the 3 sections
    if train_wav_files:
        process_sentences(
            config=config,
            fps=train_wav_files,
            sentences=sentences,
            output_dir=train_dump_dir,
            spec_extractor=spec_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir)
    if dev_wav_files:
        process_sentences(
            config=config,
            fps=dev_wav_files,
            sentences=sentences,
            output_dir=dev_dump_dir,
            spec_extractor=spec_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir)
    if test_wav_files:
        process_sentences(
            config=config,
            fps=test_wav_files,
            sentences=sentences,
            output_dir=test_dump_dir,
            spec_extractor=spec_extractor,
            nprocs=args.num_cpu,
            cut_sil=args.cut_sil,
            spk_emb_dir=spk_emb_dir)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/vits/synthesize.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import soundfile as sf
import yaml
from timer import timer
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.models.vits import VITS
from paddlespeech.t2s.utils import str2bool


def evaluate(args):

    # construct dataset for evaluation
    with jsonlines.open(args.test_metadata, 'r') as reader:
        test_metadata = list(reader)
    # Init body.
    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)

    fields = ["utt_id", "text"]
    converters = {}

    spk_num = None
    if args.speaker_dict is not None:
        print("multiple speaker vits!")
        with open(args.speaker_dict, 'rt') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
        fields += ["spk_id"]
    elif args.voice_cloning:
        print("Evaluating voice cloning!")
        fields += ["spk_emb"]
    else:
        print("single speaker vits!")
    print("spk_num:", spk_num)

    test_dataset = DataTable(
        data=test_metadata,
        fields=fields,
        converters=converters, )

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_fft // 2 + 1
    config["model"]["generator_params"]["spks"] = spk_num

    vits = VITS(idim=vocab_size, odim=odim, **config["model"])
    vits.set_state_dict(paddle.load(args.ckpt)["main_params"])
    vits.eval()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    N = 0
    T = 0

    for datum in test_dataset:
        utt_id = datum["utt_id"]
        phone_ids = paddle.to_tensor(datum["text"])
        with timer() as t:
            with paddle.no_grad():
                spk_emb = None
                spk_id = None
                # multi speaker
                if args.voice_cloning and "spk_emb" in datum:
                    spk_emb = paddle.to_tensor(np.load(datum["spk_emb"]))
                elif "spk_id" in datum:
                    spk_id = paddle.to_tensor(datum["spk_id"])
                out = vits.inference(
                    text=phone_ids, sids=spk_id, spembs=spk_emb)
            wav = out["wav"]
            wav = wav.numpy()
            N += wav.size
            T += t.elapse
            speed = wav.size / t.elapse
            rtf = config.fs / speed
        print(
            f"{utt_id}, wave: {wav.size}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
        sf.write(str(output_dir / (utt_id + ".wav")), wav, samplerate=config.fs)
        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {config.fs / (N / T) }")


def parse_args():
    # parse args and config
    parser = argparse.ArgumentParser(description="Synthesize with VITS")
    # model
    parser.add_argument(
        '--config', type=str, default=None, help='Config of VITS.')
    parser.add_argument(
        '--ckpt', type=str, default=None, help='Checkpoint file of VITS.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        "--voice-cloning",
        type=str2bool,
        default=False,
        help="whether training voice cloning model.")
    # other
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument("--test_metadata", type=str, help="test metadata.")
    parser.add_argument("--output_dir", type=str, help="output dir.")

    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    evaluate(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/vits/synthesize_e2e.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import paddle
import soundfile as sf
import yaml
from timer import timer
from yacs.config import CfgNode

from paddlespeech.t2s.exps.syn_utils import am_to_static
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.exps.syn_utils import get_sentences
from paddlespeech.t2s.models.vits import VITS
from paddlespeech.t2s.models.vits import VITSInference
from paddlespeech.t2s.utils import str2bool


def evaluate(args):
    # Init body.
    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)

    sentences = get_sentences(text_file=args.text, lang=args.lang)

    # frontend
    frontend = get_frontend(lang=args.lang, phones_dict=args.phones_dict)
    # acoustic model
    am_name = args.am[:args.am.rindex('_')]
    am_dataset = args.am[args.am.rindex('_') + 1:]

    spk_num = None
    if args.speaker_dict is not None:
        print("multiple speaker vits!")
        with open(args.speaker_dict, 'rt') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
    else:
        print("single speaker vits!")
    print("spk_num:", spk_num)

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_fft // 2 + 1
    config["model"]["generator_params"]["spks"] = spk_num

    vits = VITS(idim=vocab_size, odim=odim, **config["model"])
    vits.set_state_dict(paddle.load(args.ckpt)["main_params"])
    vits.eval()

    vits_inference = VITSInference(vits)
    # whether dygraph to static
    if args.inference_dir:
        vits_inference = am_to_static(
            am_inference=vits_inference,
            am=args.am,
            inference_dir=args.inference_dir,
            speaker_dict=args.speaker_dict)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    merge_sentences = False
    add_blank = args.add_blank

    N = 0
    T = 0
    for utt_id, sentence in sentences:
        with timer() as t:
            if args.lang == 'zh':
                input_ids = frontend.get_input_ids(
                    sentence,
                    merge_sentences=merge_sentences,
                    add_blank=add_blank)
                phone_ids = input_ids["phone_ids"]
            elif args.lang == 'en':
                input_ids = frontend.get_input_ids(
                    sentence, merge_sentences=merge_sentences)
                phone_ids = input_ids["phone_ids"]
            else:
                print("lang should in {'zh', 'en'}!")
            with paddle.no_grad():
                flags = 0
                for i in range(len(phone_ids)):
                    part_phone_ids = phone_ids[i]
                    spk_id = None
                    if am_dataset in {"aishell3",
                                      "vctk"} and spk_num is not None:
                        spk_id = paddle.to_tensor(args.spk_id)
                        wav = vits_inference(part_phone_ids, spk_id)
                    else:
                        wav = vits_inference(part_phone_ids)
                    if flags == 0:
                        wav_all = wav
                        flags = 1
                    else:
                        wav_all = paddle.concat([wav_all, wav])
        wav = wav_all.numpy()
        N += wav.size
        T += t.elapse
        speed = wav.size / t.elapse
        rtf = config.fs / speed
        print(
            f"{utt_id}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
        sf.write(str(output_dir / (utt_id + ".wav")), wav, samplerate=config.fs)
        print(f"{utt_id} done!")
    print(f"generation speed: {N / T}Hz, RTF: {config.fs / (N / T) }")


def parse_args():
    # parse args and config 
    parser = argparse.ArgumentParser(description="Synthesize with VITS")

    # model
    parser.add_argument(
        '--config', type=str, default=None, help='Config of VITS.')
    parser.add_argument(
        '--ckpt', type=str, default=None, help='Checkpoint file of VITS.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker_dict", type=str, default=None, help="speaker id map file.")
    parser.add_argument(
        '--spk_id',
        type=int,
        default=0,
        help='spk id for multi speaker acoustic model')
    # other
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en')

    parser.add_argument(
        "--inference_dir",
        type=str,
        default=None,
        help="dir to save inference models")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument(
        "--text",
        type=str,
        help="text to synthesize, a 'utt_id sentence' pair per line.")
    parser.add_argument("--output_dir", type=str, help="output dir.")

    parser.add_argument(
        "--add-blank",
        type=str2bool,
        default=True,
        help="whether to add blank between phones")
    parser.add_argument(
        '--am',
        type=str,
        default='vits_csmsc',
        help='Choose acoustic model type of tts task.')

    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    evaluate(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/vits/train.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.optimizer import AdamW
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.am_batch_fn import vits_multi_spk_batch_fn
from paddlespeech.t2s.datasets.am_batch_fn import vits_single_spk_batch_fn
from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.datasets.sampler import ErnieSATSampler
from paddlespeech.t2s.models.vits import VITS
from paddlespeech.t2s.models.vits import VITSEvaluator
from paddlespeech.t2s.models.vits import VITSUpdater
from paddlespeech.t2s.modules.losses import DiscriminatorAdversarialLoss
from paddlespeech.t2s.modules.losses import FeatureMatchLoss
from paddlespeech.t2s.modules.losses import GeneratorAdversarialLoss
from paddlespeech.t2s.modules.losses import KLDivergenceLoss
from paddlespeech.t2s.modules.losses import MelSpectrogramLoss
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.optimizer import scheduler_classes
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer
from paddlespeech.t2s.utils import str2bool


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    world_size = paddle.distributed.get_world_size()
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
        if world_size > 1:
            paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )

    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True

    fields = ["text", "text_lengths", "feats", "feats_lengths", "wave"]

    converters = {
        "wave": np.load,
        "feats": np.load,
    }
    spk_num = None
    if args.speaker_dict is not None:
        print("multiple speaker vits!")
        collate_fn = vits_multi_spk_batch_fn
        with open(args.speaker_dict, 'rt', encoding='utf-8') as f:
            spk_id = [line.strip().split() for line in f.readlines()]
        spk_num = len(spk_id)
        fields += ["spk_id"]
    elif args.voice_cloning:
        print("Training voice cloning!")
        collate_fn = vits_multi_spk_batch_fn
        fields += ["spk_emb"]
        converters["spk_emb"] = np.load
    else:
        print("single speaker vits!")
        collate_fn = vits_single_spk_batch_fn
    print("spk_num:", spk_num)

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=fields,
        converters=converters, )
    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=fields,
        converters=converters, )

    # collate function and dataloader
    train_sampler = ErnieSATSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=True)
    dev_sampler = ErnieSATSampler(
        dev_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=False)
    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        batch_sampler=dev_sampler,
        collate_fn=collate_fn,
        num_workers=config.num_workers)
    print("dataloaders done!")

    with open(args.phones_dict, 'rt', encoding='utf-8') as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_fft // 2 + 1
    config["model"]["generator_params"]["spks"] = spk_num
    model = VITS(idim=vocab_size, odim=odim, **config["model"])
    gen_parameters = model.generator.parameters()
    dis_parameters = model.discriminator.parameters()
    if world_size > 1:
        model = DataParallel(model)
        gen_parameters = model._layers.generator.parameters()
        dis_parameters = model._layers.discriminator.parameters()

    print("model done!")

    # loss
    criterion_mel = MelSpectrogramLoss(
        **config["mel_loss_params"], )
    criterion_feat_match = FeatureMatchLoss(
        **config["feat_match_loss_params"], )
    criterion_gen_adv = GeneratorAdversarialLoss(
        **config["generator_adv_loss_params"], )
    criterion_dis_adv = DiscriminatorAdversarialLoss(
        **config["discriminator_adv_loss_params"], )
    criterion_kl = KLDivergenceLoss()

    print("criterions done!")

    lr_schedule_g = scheduler_classes[config["generator_scheduler"]](
        **config["generator_scheduler_params"])
    optimizer_g = AdamW(
        learning_rate=lr_schedule_g,
        parameters=gen_parameters,
        **config["generator_optimizer_params"])

    lr_schedule_d = scheduler_classes[config["discriminator_scheduler"]](
        **config["discriminator_scheduler_params"])
    optimizer_d = AdamW(
        learning_rate=lr_schedule_d,
        parameters=dis_parameters,
        **config["discriminator_optimizer_params"])

    print("optimizers done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = VITSUpdater(
        model=model,
        optimizers={
            "generator": optimizer_g,
            "discriminator": optimizer_d,
        },
        criterions={
            "mel": criterion_mel,
            "feat_match": criterion_feat_match,
            "gen_adv": criterion_gen_adv,
            "dis_adv": criterion_dis_adv,
            "kl": criterion_kl,
        },
        schedulers={
            "generator": lr_schedule_g,
            "discriminator": lr_schedule_d,
        },
        dataloader=train_dataloader,
        lambda_adv=config.lambda_adv,
        lambda_mel=config.lambda_mel,
        lambda_kl=config.lambda_kl,
        lambda_feat_match=config.lambda_feat_match,
        lambda_dur=config.lambda_dur,
        generator_first=config.generator_first,
        output_dir=output_dir)

    evaluator = VITSEvaluator(
        model=model,
        criterions={
            "mel": criterion_mel,
            "feat_match": criterion_feat_match,
            "gen_adv": criterion_gen_adv,
            "dis_adv": criterion_dis_adv,
            "kl": criterion_kl,
        },
        dataloader=dev_dataloader,
        lambda_adv=config.lambda_adv,
        lambda_mel=config.lambda_mel,
        lambda_kl=config.lambda_kl,
        lambda_feat_match=config.lambda_feat_match,
        lambda_dur=config.lambda_dur,
        generator_first=config.generator_first,
        output_dir=output_dir)

    trainer = Trainer(
        updater,
        stop_trigger=(config.train_max_steps, "iteration"),
        out=output_dir)

    if dist.get_rank() == 0:
        trainer.extend(
            evaluator, trigger=(config.eval_interval_steps, 'iteration'))
        trainer.extend(VisualDL(output_dir), trigger=(1, 'iteration'))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots),
        trigger=(config.save_interval_steps, 'iteration'))

    print("Trainer Done!")
    trainer.run()


def main():
    # parse args and config and redirect to train_sp

    parser = argparse.ArgumentParser(description="Train a VITS model.")
    parser.add_argument("--config", type=str, help="VITS config file")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")
    parser.add_argument(
        "--phones-dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--speaker-dict",
        type=str,
        default=None,
        help="speaker id map file for multiple speaker model.")

    parser.add_argument(
        "--voice-cloning",
        type=str2bool,
        default=False,
        help="whether training voice cloning model.")

    args = parser.parse_args()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/vits/voice_cloning.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from pathlib import Path

import librosa
import numpy as np
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.get_feats import LinearSpectrogram
from paddlespeech.t2s.exps.syn_utils import get_frontend
from paddlespeech.t2s.models.vits import VITS
from paddlespeech.t2s.utils import str2bool
from paddlespeech.vector.exps.ge2e.audio_processor import SpeakerVerificationPreprocessor
from paddlespeech.vector.models.lstm_speaker_encoder import LSTMSpeakerEncoder


def voice_cloning(args):

    # Init body.
    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)

    # speaker encoder
    spec_extractor = LinearSpectrogram(
        n_fft=config.n_fft,
        hop_length=config.n_shift,
        win_length=config.win_length,
        window=config.window)
    p = SpeakerVerificationPreprocessor(
        sampling_rate=16000,
        audio_norm_target_dBFS=-30,
        vad_window_length=30,
        vad_moving_average_width=8,
        vad_max_silence_length=6,
        mel_window_length=25,
        mel_window_step=10,
        n_mels=40,
        partial_n_frames=160,
        min_pad_coverage=0.75,
        partial_overlap_ratio=0.5)
    print("Audio Processor Done!")

    speaker_encoder = LSTMSpeakerEncoder(
        n_mels=40, num_layers=3, hidden_size=256, output_size=256)
    speaker_encoder.set_state_dict(paddle.load(args.ge2e_params_path))
    speaker_encoder.eval()
    print("GE2E Done!")

    frontend = get_frontend(lang=args.lang, phones_dict=args.phones_dict)
    print("frontend done!")

    with open(args.phones_dict, "r") as f:
        phn_id = [line.strip().split() for line in f.readlines()]
    vocab_size = len(phn_id)
    print("vocab_size:", vocab_size)

    odim = config.n_fft // 2 + 1

    vits = VITS(idim=vocab_size, odim=odim, **config["model"])
    vits.set_state_dict(paddle.load(args.ckpt)["main_params"])
    vits.eval()

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    input_dir = Path(args.input_dir)

    if args.audio_path == "":
        args.audio_path = None
    if args.audio_path is None:
        sentence = args.text
        merge_sentences = True
        add_blank = args.add_blank

        if args.lang == 'zh':
            input_ids = frontend.get_input_ids(
                sentence, merge_sentences=merge_sentences, add_blank=add_blank)
        elif args.lang == 'en':
            input_ids = frontend.get_input_ids(
                sentence, merge_sentences=merge_sentences)
        phone_ids = input_ids["phone_ids"][0]
    else:
        wav, _ = librosa.load(str(args.audio_path), sr=config.fs)
        feats = paddle.to_tensor(spec_extractor.get_linear_spectrogram(wav))

        mel_sequences = p.extract_mel_partials(
            p.preprocess_wav(args.audio_path))
        with paddle.no_grad():
            spk_emb_src = speaker_encoder.embed_utterance(
                paddle.to_tensor(mel_sequences))

    for name in os.listdir(input_dir):
        utt_id = name.split(".")[0]
        ref_audio_path = input_dir / name
        mel_sequences = p.extract_mel_partials(p.preprocess_wav(ref_audio_path))
        # print("mel_sequences: ", mel_sequences.shape)
        with paddle.no_grad():
            spk_emb = speaker_encoder.embed_utterance(
                paddle.to_tensor(mel_sequences))
        # print("spk_emb shape: ", spk_emb.shape)

        with paddle.no_grad():
            if args.audio_path is None:
                out = vits.inference(text=phone_ids, spembs=spk_emb)
            else:
                out = vits.voice_conversion(
                    feats=feats, spembs_src=spk_emb_src, spembs_tgt=spk_emb)
            wav = out["wav"]

        sf.write(
            str(output_dir / (utt_id + ".wav")),
            wav.numpy(),
            samplerate=config.fs)
        print(f"{utt_id} done!")
    # Randomly generate numbers of 0 ~ 0.2, 256 is the dim of spk_emb
    random_spk_emb = np.random.rand(256) * 0.2
    random_spk_emb = paddle.to_tensor(random_spk_emb, dtype='float32')
    utt_id = "random_spk_emb"
    with paddle.no_grad():
        if args.audio_path is None:
            out = vits.inference(text=phone_ids, spembs=random_spk_emb)
        else:
            out = vits.voice_conversion(
                feats=feats, spembs_src=spk_emb_src, spembs_tgt=random_spk_emb)
        wav = out["wav"]
    sf.write(
        str(output_dir / (utt_id + ".wav")), wav.numpy(), samplerate=config.fs)
    print(f"{utt_id} done!")


def parse_args():
    # parse args and config
    parser = argparse.ArgumentParser(description="")
    parser.add_argument(
        '--config', type=str, default=None, help='Config of VITS.')
    parser.add_argument(
        '--ckpt', type=str, default=None, help='Checkpoint file of VITS.')
    parser.add_argument(
        "--phones_dict", type=str, default=None, help="phone vocabulary file.")
    parser.add_argument(
        "--text",
        type=str,
        default="每当你觉得，想要批评什么人的时候，你切要记着，这个世界上的人，并非都具备你禀有的条件。",
        help="text to synthesize, a line")
    parser.add_argument(
        '--lang',
        type=str,
        default='zh',
        help='Choose model language. zh or en')
    parser.add_argument(
        "--audio-path",
        type=str,
        default=None,
        help="audio as content to synthesize")

    parser.add_argument(
        "--ge2e_params_path", type=str, help="ge2e params path.")

    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")

    parser.add_argument(
        "--input-dir",
        type=str,
        help="input dir of *.wav, the sample rate will be resample to 16k.")
    parser.add_argument("--output-dir", type=str, help="output dir.")

    parser.add_argument(
        "--add-blank",
        type=str2bool,
        default=True,
        help="whether to add blank between phones")

    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    voice_cloning(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/voice_cloning.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from pathlib import Path

import numpy as np
import paddle
import soundfile as sf
import yaml
from yacs.config import CfgNode

from paddlespeech.cli.vector import VectorExecutor
from paddlespeech.t2s.exps.syn_utils import get_am_inference
from paddlespeech.t2s.exps.syn_utils import get_voc_inference
from paddlespeech.t2s.frontend.zh_frontend import Frontend
from paddlespeech.t2s.utils import str2bool
from paddlespeech.vector.exps.ge2e.audio_processor import SpeakerVerificationPreprocessor
from paddlespeech.vector.models.lstm_speaker_encoder import LSTMSpeakerEncoder


def gen_random_embed(use_ecapa: bool=False):
    if use_ecapa:
        # Randomly generate numbers of -25 ~ 25, 192 is the dim of spk_emb
        random_spk_emb = (-1 + 2 * np.random.rand(192)) * 25

    # GE2E
    else:
        # Randomly generate numbers of 0 ~ 0.2, 256 is the dim of spk_emb
        random_spk_emb = np.random.rand(256) * 0.2
    random_spk_emb = paddle.to_tensor(random_spk_emb, dtype='float32')
    return random_spk_emb


def voice_cloning(args):
    # Init body.
    with open(args.am_config) as f:
        am_config = CfgNode(yaml.safe_load(f))
    with open(args.voc_config) as f:
        voc_config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(am_config)
    print(voc_config)

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    input_dir = Path(args.input_dir)

    # speaker encoder
    if args.use_ecapa:
        vec_executor = VectorExecutor()
        # warm up
        vec_executor(
            audio_file=input_dir / os.listdir(input_dir)[0], force_yes=True)
        print("ECAPA-TDNN Done!")
    # use GE2E
    else:
        p = SpeakerVerificationPreprocessor(
            sampling_rate=16000,
            audio_norm_target_dBFS=-30,
            vad_window_length=30,
            vad_moving_average_width=8,
            vad_max_silence_length=6,
            mel_window_length=25,
            mel_window_step=10,
            n_mels=40,
            partial_n_frames=160,
            min_pad_coverage=0.75,
            partial_overlap_ratio=0.5)
        print("Audio Processor Done!")

        speaker_encoder = LSTMSpeakerEncoder(
            n_mels=40, num_layers=3, hidden_size=256, output_size=256)
        speaker_encoder.set_state_dict(paddle.load(args.ge2e_params_path))
        speaker_encoder.eval()
        print("GE2E Done!")

    frontend = Frontend(phone_vocab_path=args.phones_dict)
    print("frontend done!")

    sentence = args.text
    input_ids = frontend.get_input_ids(sentence, merge_sentences=True)
    phone_ids = input_ids["phone_ids"][0]

    # acoustic model
    am_inference = get_am_inference(
        am=args.am,
        am_config=am_config,
        am_ckpt=args.am_ckpt,
        am_stat=args.am_stat,
        phones_dict=args.phones_dict)

    # vocoder
    voc_inference = get_voc_inference(
        voc=args.voc,
        voc_config=voc_config,
        voc_ckpt=args.voc_ckpt,
        voc_stat=args.voc_stat)

    for name in os.listdir(input_dir):
        utt_id = name.split(".")[0]
        ref_audio_path = input_dir / name
        if args.use_ecapa:
            spk_emb = vec_executor(audio_file=ref_audio_path, force_yes=True)
            spk_emb = paddle.to_tensor(spk_emb)
        # GE2E
        else:
            mel_sequences = p.extract_mel_partials(
                p.preprocess_wav(ref_audio_path))
            with paddle.no_grad():
                spk_emb = speaker_encoder.embed_utterance(
                    paddle.to_tensor(mel_sequences))
        with paddle.no_grad():
            wav = voc_inference(am_inference(phone_ids, spk_emb=spk_emb))

        sf.write(
            str(output_dir / (utt_id + ".wav")),
            wav.numpy(),
            samplerate=am_config.fs)
        print(f"{utt_id} done!")

    # generate 5 random_spk_emb
    # for i in range(5):
    #     random_spk_emb = gen_random_embed(args.use_ecapa)
    #     utt_id = "random_spk_emb"
    #     with paddle.no_grad():
    #         wav = voc_inference(am_inference(phone_ids, spk_emb=random_spk_emb))
    #     sf.write(
    #         str(output_dir / (utt_id + "_" + str(i) + ".wav")),
    #         wav.numpy(),
    #         samplerate=am_config.fs)
    # print(f"{utt_id} done!")


def parse_args():
    # parse args and config
    parser = argparse.ArgumentParser(description="")
    parser.add_argument(
        '--am',
        type=str,
        default='fastspeech2_csmsc',
        choices=['fastspeech2_aishell3', 'tacotron2_aishell3'],
        help='Choose acoustic model type of tts task.')
    parser.add_argument(
        '--am_config', type=str, default=None, help='Config of acoustic model.')
    parser.add_argument(
        '--am_ckpt',
        type=str,
        default=None,
        help='Checkpoint file of acoustic model.')
    parser.add_argument(
        "--am_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training acoustic model."
    )
    parser.add_argument(
        "--phones-dict",
        type=str,
        default="phone_id_map.txt",
        help="phone vocabulary file.")
    # vocoder
    parser.add_argument(
        '--voc',
        type=str,
        default='pwgan_csmsc',
        choices=['pwgan_aishell3'],
        help='Choose vocoder type of tts task.')

    parser.add_argument(
        '--voc_config', type=str, default=None, help='Config of voc.')
    parser.add_argument(
        '--voc_ckpt', type=str, default=None, help='Checkpoint file of voc.')
    parser.add_argument(
        "--voc_stat",
        type=str,
        default=None,
        help="mean and standard deviation used to normalize spectrogram when training voc."
    )
    parser.add_argument(
        "--text",
        type=str,
        default="每当你觉得，想要批评什么人的时候，你切要记着，这个世界上的人，并非都具备你禀有的条件。",
        help="text to synthesize, a line")
    parser.add_argument(
        "--ge2e_params_path", type=str, help="ge2e params path.")
    parser.add_argument(
        "--use_ecapa",
        type=str2bool,
        default=False,
        help="whether to use ECAPA-TDNN as speaker encoder.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")
    parser.add_argument(
        "--input-dir",
        type=str,
        help="input dir of *.wav, the sample rate will be resample to 16k.")
    parser.add_argument("--output-dir", type=str, help="output dir.")

    args = parser.parse_args()
    return args


def main():
    args = parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    voice_cloning(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/waveflow/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/waveflow/config.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from yacs.config import CfgNode as CN

_C = CN()
_C.data = CN(
    dict(
        batch_size=8,  # batch size
        valid_size=16,  # the first N examples are reserved for validation
        sample_rate=22050,  # Hz, sample rate
        n_fft=1024,  # fft frame size
        win_length=1024,  # window size
        hop_length=256,  # hop size between ajacent frame
        fmin=0,
        fmax=8000,  # Hz, max frequency when converting to mel
        n_mels=80,  # mel bands
        clip_frames=65,  # mel clip frames
    ))

_C.model = CN(
    dict(
        upsample_factors=[16, 16],
        n_flows=8,  # number of flows in WaveFlow
        n_layers=8,  # number of conv block in each flow
        n_group=16,  # folding factor of audio and spectrogram
        channels=128,  # resiaudal channel in each flow
        kernel_size=[3, 3],  # kernel size in each conv block
        sigma=1.0,  # stddev of the random noise
    ))

_C.training = CN(
    dict(
        lr=2e-4,  # learning rates
        valid_interval=1000,  # validation
        save_interval=10000,  # checkpoint
        max_iteration=3000000,  # max iteration to train
    ))


def get_cfg_defaults():
    """Get a yacs CfgNode object with default values for my_project."""
    # Return a clone so that the defaults will not be altered
    # This is for the "local variable" use pattern
    return _C.clone()


================================================
FILE: paddlespeech/t2s/exps/waveflow/ljspeech.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pathlib import Path

import numpy as np
import pandas
from paddle.io import Dataset

from paddlespeech.t2s.datasets.batch import batch_spec
from paddlespeech.t2s.datasets.batch import batch_wav


class LJSpeech(Dataset):
    """A simple dataset adaptor for the processed ljspeech dataset."""

    def __init__(self, root):
        self.root = Path(root).expanduser()
        meta_data = pandas.read_csv(
            str(self.root / "metadata.csv"),
            sep="\t",
            header=None,
            names=["fname", "frames", "samples"])

        records = []
        for row in meta_data.itertuples():
            mel_path = str(self.root / "mel" / (row.fname + ".npy"))
            wav_path = str(self.root / "wav" / (row.fname + ".npy"))
            records.append((mel_path, wav_path))
        self.records = records

    def __getitem__(self, i):
        mel_name, wav_name = self.records[i]
        mel = np.load(mel_name)
        wav = np.load(wav_name)
        return mel, wav

    def __len__(self):
        return len(self.records)


class LJSpeechCollector(object):
    """A simple callable to batch LJSpeech examples."""

    def __init__(self, padding_value=0.):
        self.padding_value = padding_value

    def __call__(self, examples):
        mels = [example[0] for example in examples]
        wavs = [example[1] for example in examples]
        mels, _ = batch_spec(mels, pad_value=self.padding_value)
        wavs, _ = batch_wav(wavs, pad_value=self.padding_value)
        return mels, wavs


class LJSpeechClipCollector(object):
    def __init__(self, clip_frames=65, hop_length=256):
        self.clip_frames = clip_frames
        self.hop_length = hop_length

    def __call__(self, examples):
        mels = []
        wavs = []
        for example in examples:
            mel_clip, wav_clip = self.clip(example)
            mels.append(mel_clip)
            wavs.append(wav_clip)
        mels = np.stack(mels)
        wavs = np.stack(wavs)
        return mels, wavs

    def clip(self, example):
        mel, wav = example
        frames = mel.shape[-1]
        start = np.random.randint(0, frames - self.clip_frames)
        mel_clip = mel[:, start:start + self.clip_frames]
        wav_clip = wav[start * self.hop_length:(start + self.clip_frames) *
                       self.hop_length]
        return mel_clip, wav_clip


================================================
FILE: paddlespeech/t2s/exps/waveflow/preprocess.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from pathlib import Path

import librosa
import numpy as np
import pandas as pd
import tqdm

from paddlespeech.t2s.audio import LogMagnitude
from paddlespeech.t2s.datasets import LJSpeechMetaData
from paddlespeech.t2s.exps.waveflow.config import get_cfg_defaults


class Transform(object):
    def __init__(self, sample_rate, n_fft, win_length, hop_length, n_mels, fmin,
                 fmax):
        self.sample_rate = sample_rate
        self.n_fft = n_fft
        self.win_length = win_length
        self.hop_length = hop_length
        self.n_mels = n_mels
        self.fmin = fmin
        self.fmax = fmax

        self.spec_normalizer = LogMagnitude(min=1e-5)

    def __call__(self, example):
        wav_path, _, _ = example

        sr = self.sample_rate
        n_fft = self.n_fft
        win_length = self.win_length
        hop_length = self.hop_length
        n_mels = self.n_mels
        fmin = self.fmin
        fmax = self.fmax

        wav, loaded_sr = librosa.load(wav_path, sr=None)
        assert loaded_sr == sr, "sample rate does not match, resampling applied"

        # Pad audio to the right size.
        frames = int(np.ceil(float(wav.size) / hop_length))
        fft_padding = (n_fft - hop_length) // 2  # sound
        desired_length = frames * hop_length + fft_padding * 2
        pad_amount = (desired_length - wav.size) // 2

        if wav.size % 2 == 0:
            wav = np.pad(wav, (pad_amount, pad_amount), mode='reflect')
        else:
            wav = np.pad(wav, (pad_amount, pad_amount + 1), mode='reflect')

        # Normalize audio.
        wav = wav / np.abs(wav).max() * 0.999

        # Compute mel-spectrogram.
        # Turn center to False to prevent internal padding.
        spectrogram = librosa.core.stft(
            wav,
            hop_length=hop_length,
            win_length=win_length,
            n_fft=n_fft,
            center=False)
        spectrogram_magnitude = np.abs(spectrogram)

        # Compute mel-spectrograms.
        mel_filter_bank = librosa.filters.mel(
            sr=sr, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax)
        mel_spectrogram = np.dot(mel_filter_bank, spectrogram_magnitude)

        # log scale mel_spectrogram.
        mel_spectrogram = self.spec_normalizer.transform(mel_spectrogram)

        # Extract the center of audio that corresponds to mel spectrograms.
        audio = wav[fft_padding:-fft_padding]
        assert mel_spectrogram.shape[1] * hop_length == audio.size

        # there is no clipping here
        return audio, mel_spectrogram


def create_dataset(config, input_dir, output_dir):
    input_dir = Path(input_dir).expanduser()
    dataset = LJSpeechMetaData(input_dir)

    output_dir = Path(output_dir).expanduser()
    output_dir.mkdir(exist_ok=True)

    transform = Transform(config.sample_rate, config.n_fft, config.win_length,
                          config.hop_length, config.n_mels, config.fmin,
                          config.fmax)
    file_names = []

    for example in tqdm.tqdm(dataset):
        fname, _, _ = example
        base_name = os.path.splitext(os.path.basename(fname))[0]
        wav_dir = output_dir / "wav"
        mel_dir = output_dir / "mel"
        wav_dir.mkdir(exist_ok=True)
        mel_dir.mkdir(exist_ok=True)

        audio, mel = transform(example)
        np.save(str(wav_dir / base_name), audio)
        np.save(str(mel_dir / base_name), mel)

        file_names.append((base_name, mel.shape[-1], audio.shape[-1]))

    meta_data = pd.DataFrame.from_records(file_names)
    meta_data.to_csv(
        str(output_dir / "metadata.csv"), sep="\t", index=None, header=None)
    print("saved meta data in to {}".format(
        os.path.join(output_dir, "metadata.csv")))

    print("Done!")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="create dataset")
    parser.add_argument(
        "--config",
        type=str,
        metavar="FILE",
        help="extra config to overwrite the default config")
    parser.add_argument(
        "--input", type=str, help="path of the ljspeech dataset")
    parser.add_argument(
        "--output", type=str, help="path to save output dataset")
    parser.add_argument(
        "--opts",
        nargs=argparse.REMAINDER,
        help="options to overwrite --config file and the default config, passing in KEY VALUE pairs"
    )

    config = get_cfg_defaults()
    args = parser.parse_args()
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()

    create_dataset(config.data, args.input, args.output)


================================================
FILE: paddlespeech/t2s/exps/waveflow/synthesize.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from pathlib import Path

import numpy as np
import paddle
import soundfile as sf

from paddlespeech.t2s.exps.waveflow.config import get_cfg_defaults
from paddlespeech.t2s.models.waveflow import ConditionalWaveFlow
from paddlespeech.t2s.utils import layer_tools


def main(config, args):
    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    model = ConditionalWaveFlow.from_pretrained(config, args.checkpoint_path)
    layer_tools.recursively_remove_weight_norm(model)
    model.eval()

    mel_dir = Path(args.input).expanduser()
    output_dir = Path(args.output).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    for file_path in mel_dir.glob("*.npy"):
        mel = np.load(str(file_path))
        with paddle.amp.auto_cast():
            audio = model.predict(mel)
        audio_path = output_dir / (os.path.splitext(file_path.name)[0] + ".wav")
        sf.write(audio_path, audio, config.data.sample_rate)
        print("[synthesize] {} -> {}".format(file_path, audio_path))


if __name__ == "__main__":
    config = get_cfg_defaults()

    parser = argparse.ArgumentParser(
        description="generate mel spectrogram with TransformerTTS.")
    parser.add_argument(
        "--config",
        type=str,
        metavar="FILE",
        help="extra config to overwrite the default config")
    parser.add_argument(
        "--checkpoint_path", type=str, help="path of the checkpoint to load.")
    parser.add_argument(
        "--input",
        type=str,
        help="path of directory containing mel spectrogram (in .npy format)")
    parser.add_argument("--output", type=str, help="path to save outputs")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")
    parser.add_argument(
        "--opts",
        nargs=argparse.REMAINDER,
        help="options to overwrite --config file and the default config, passing in KEY VALUE pairs"
    )

    args = parser.parse_args()
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    print(args)

    main(config, args)


================================================
FILE: paddlespeech/t2s/exps/waveflow/train.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time

import numpy as np
import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler

from paddlespeech.t2s.datasets import dataset
from paddlespeech.t2s.exps.waveflow.config import get_cfg_defaults
from paddlespeech.t2s.exps.waveflow.ljspeech import LJSpeech
from paddlespeech.t2s.exps.waveflow.ljspeech import LJSpeechClipCollector
from paddlespeech.t2s.exps.waveflow.ljspeech import LJSpeechCollector
from paddlespeech.t2s.models.waveflow import ConditionalWaveFlow
from paddlespeech.t2s.models.waveflow import WaveFlowLoss
from paddlespeech.t2s.training.cli import default_argument_parser
from paddlespeech.t2s.training.experiment import ExperimentBase
from paddlespeech.t2s.utils import mp_tools


class Experiment(ExperimentBase):
    def setup_model(self):
        config = self.config
        model = ConditionalWaveFlow(
            upsample_factors=config.model.upsample_factors,
            n_flows=config.model.n_flows,
            n_layers=config.model.n_layers,
            n_group=config.model.n_group,
            channels=config.model.channels,
            n_mels=config.data.n_mels,
            kernel_size=config.model.kernel_size)

        if self.parallel:
            model = paddle.DataParallel(model)
        optimizer = paddle.optimizer.Adam(
            config.training.lr, parameters=model.parameters())
        criterion = WaveFlowLoss(sigma=config.model.sigma)

        self.model = model
        self.optimizer = optimizer
        self.criterion = criterion

    def setup_dataloader(self):
        config = self.config
        args = self.args

        ljspeech_dataset = LJSpeech(args.data)
        valid_set, train_set = dataset.split(ljspeech_dataset,
                                             config.data.valid_size)

        batch_fn = LJSpeechClipCollector(config.data.clip_frames,
                                         config.data.hop_length)

        if not self.parallel:
            train_loader = DataLoader(
                train_set,
                batch_size=config.data.batch_size,
                shuffle=True,
                drop_last=True,
                collate_fn=batch_fn)
        else:
            sampler = DistributedBatchSampler(
                train_set,
                batch_size=config.data.batch_size,
                num_replicas=dist.get_world_size(),
                rank=dist.get_rank(),
                shuffle=True,
                drop_last=True)
            train_loader = DataLoader(
                train_set, batch_sampler=sampler, collate_fn=batch_fn)

        valid_batch_fn = LJSpeechCollector()
        valid_loader = DataLoader(
            valid_set, batch_size=1, collate_fn=valid_batch_fn)

        self.train_loader = train_loader
        self.valid_loader = valid_loader

    def compute_outputs(self, mel, wav):
        # model_core = model._layers if isinstance(model, paddle.DataParallel) else model
        z, log_det_jocobian = self.model(wav, mel)
        return z, log_det_jocobian

    def train_batch(self):
        start = time.time()
        batch = self.read_batch()
        data_loader_time = time.time() - start

        self.model.train()
        self.optimizer.clear_grad()
        mel, wav = batch
        z, log_det_jocobian = self.compute_outputs(mel, wav)
        loss = self.criterion(z, log_det_jocobian)
        loss.backward()
        self.optimizer.step()
        iteration_time = time.time() - start

        loss_value = float(loss)
        msg = "Rank: {}, ".format(dist.get_rank())
        msg += "step: {}, ".format(self.iteration)
        msg += "time: {:>.3f}s/{:>.3f}s, ".format(data_loader_time,
                                                  iteration_time)
        msg += "loss: {:>.6f}".format(loss_value)
        self.logger.info(msg)
        if dist.get_rank() == 0:
            self.visualizer.add_scalar("train/loss", loss_value, self.iteration)

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def valid(self):
        valid_iterator = iter(self.valid_loader)
        valid_losses = []
        mel, wav = next(valid_iterator)
        z, log_det_jocobian = self.compute_outputs(mel, wav)
        loss = self.criterion(z, log_det_jocobian)
        valid_losses.append(float(loss))
        valid_loss = np.mean(valid_losses)
        self.visualizer.add_scalar("valid/loss", valid_loss, self.iteration)


def main_sp(config, args):
    exp = Experiment(config, args)
    exp.setup()
    exp.resume_or_load()
    exp.run()


def main(config, args):
    if args.ngpu > 1:
        dist.spawn(main_sp, args=(config, args), nprocs=args.ngpu)
    else:
        main_sp(config, args)


if __name__ == "__main__":
    config = get_cfg_defaults()
    parser = default_argument_parser()
    args = parser.parse_args()
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    print(args)

    main(config, args)


================================================
FILE: paddlespeech/t2s/exps/wavernn/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/exps/wavernn/synthesize.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import soundfile as sf
import yaml
from paddle import distributed as dist
from timer import timer
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.models.wavernn import WaveRNN


def main():
    parser = argparse.ArgumentParser(description="Synthesize with WaveRNN.")

    parser.add_argument("--config", type=str, help="Vocoder config file.")
    parser.add_argument("--checkpoint", type=str, help="snapshot to load.")
    parser.add_argument("--test-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    args = parser.parse_args()

    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    model = WaveRNN(**config["model"])
    state_dict = paddle.load(args.checkpoint)
    model.set_state_dict(state_dict["main_params"])

    model.eval()

    with jsonlines.open(args.test_metadata, 'r') as reader:
        metadata = list(reader)
    test_dataset = DataTable(
        metadata,
        fields=['utt_id', 'feats'],
        converters={
            'utt_id': None,
            'feats': np.load,
        })
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    N = 0
    T = 0
    for example in test_dataset:
        utt_id = example['utt_id']
        mel = example['feats']
        mel = paddle.to_tensor(mel)  # (T, C)
        with timer() as t:
            with paddle.no_grad():
                wav = model.generate(
                    c=mel,
                    batched=config.inference.gen_batched,
                    target=config.inference.target,
                    overlap=config.inference.overlap,
                    mu_law=config.mu_law,
                    gen_display=False)
            wav = wav.numpy()
            N += wav.size
            T += t.elapse
            speed = wav.size / t.elapse
            rtf = config.fs / speed
        print(
            f"{utt_id}, mel: {mel.shape}, wave: {wav.shape}, time: {t.elapse}s, Hz: {speed}, RTF: {rtf}."
        )
        sf.write(str(output_dir / (utt_id + ".wav")), wav, samplerate=config.fs)
    print(f"generation speed: {N / T}Hz, RTF: {config.fs / (N / T) }")


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/exps/wavernn/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import shutil
from pathlib import Path

import jsonlines
import numpy as np
import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.optimizer import Adam
from yacs.config import CfgNode

from paddlespeech.t2s.datasets.data_table import DataTable
from paddlespeech.t2s.datasets.vocoder_batch_fn import WaveRNNClip
from paddlespeech.t2s.models.wavernn import WaveRNN
from paddlespeech.t2s.models.wavernn import WaveRNNEvaluator
from paddlespeech.t2s.models.wavernn import WaveRNNUpdater
from paddlespeech.t2s.modules.losses import discretized_mix_logistic_loss
from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    world_size = paddle.distributed.get_world_size()
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
        if world_size > 1:
            paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank: {dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}",
    )

    # construct dataset for training and validation
    with jsonlines.open(args.train_metadata, 'r') as reader:
        train_metadata = list(reader)
    train_dataset = DataTable(
        data=train_metadata,
        fields=["wave", "feats"],
        converters={
            "wave": np.load,
            "feats": np.load,
        }, )

    with jsonlines.open(args.dev_metadata, 'r') as reader:
        dev_metadata = list(reader)
    dev_dataset = DataTable(
        data=dev_metadata,
        fields=["wave", "feats"],
        converters={
            "wave": np.load,
            "feats": np.load,
        }, )

    batch_fn = WaveRNNClip(
        mode=config.model.mode,
        aux_context_window=config.model.aux_context_window,
        hop_size=config.n_shift,
        batch_max_steps=config.batch_max_steps,
        bits=config.model.bits)

    # collate function and dataloader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=True)
    dev_sampler = DistributedBatchSampler(
        dev_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=False)
    print("samplers done!")

    train_dataloader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        collate_fn=batch_fn,
        num_workers=config.num_workers)

    dev_dataloader = DataLoader(
        dev_dataset,
        collate_fn=batch_fn,
        batch_sampler=dev_sampler,
        num_workers=config.num_workers)

    valid_generate_loader = DataLoader(dev_dataset, batch_size=1)

    print("dataloaders done!")

    model = WaveRNN(
        hop_length=config.n_shift, sample_rate=config.fs, **config["model"])
    if world_size > 1:
        model = DataParallel(model)
    print("model done!")

    if config.model.mode == 'RAW':
        criterion = paddle.nn.CrossEntropyLoss(axis=1)
    elif config.model.mode == 'MOL':
        criterion = discretized_mix_logistic_loss
    else:
        criterion = None
        RuntimeError('Unknown model mode value - ', config.model.mode)
    print("criterions done!")
    clip = paddle.nn.ClipGradByGlobalNorm(config.grad_clip)
    optimizer = Adam(
        parameters=model.parameters(),
        learning_rate=config.learning_rate,
        grad_clip=clip)

    print("optimizer done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = WaveRNNUpdater(
        model=model,
        optimizer=optimizer,
        criterion=criterion,
        dataloader=train_dataloader,
        output_dir=output_dir,
        mode=config.model.mode)

    evaluator = WaveRNNEvaluator(
        model=model,
        dataloader=dev_dataloader,
        criterion=criterion,
        output_dir=output_dir,
        valid_generate_loader=valid_generate_loader,
        config=config)

    trainer = Trainer(
        updater,
        stop_trigger=(config.train_max_steps, "iteration"),
        out=output_dir)

    if dist.get_rank() == 0:
        trainer.extend(
            evaluator, trigger=(config.eval_interval_steps, 'iteration'))
        trainer.extend(VisualDL(output_dir), trigger=(1, 'iteration'))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots),
        trigger=(config.save_interval_steps, 'iteration'))

    print("Trainer Done!")
    trainer.run()


def main():
    # parse args and config and redirect to train_sp

    parser = argparse.ArgumentParser(description="Train a WaveRNN model.")
    parser.add_argument("--config", type=str, help="WaveRNN config file.")
    parser.add_argument("--train-metadata", type=str, help="training data.")
    parser.add_argument("--dev-metadata", type=str, help="dev data.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    args = parser.parse_args()

    with open(args.config, 'rt') as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/t2s/frontend/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .generate_lexicon import *
from .normalizer import *
from .punctuation import *
from .ssml import *
from .tone_sandhi import *
from .vocab import *
from .zh_normalization import *


================================================
FILE: paddlespeech/t2s/frontend/arpabet.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
A phonology system with ARPABET symbols and limited punctuations. The G2P 
conversion is done by g2p_en.

Note that g2p_en does not handle words with hypen well. So make sure the input
sentence is first normalized.
"""
from g2p_en import G2p

from paddlespeech.t2s.frontend.phonectic import Phonetics
from paddlespeech.t2s.frontend.vocab import Vocab


class ARPABET(Phonetics):
    """A phonology for English that uses ARPABET without stress as the phoneme vocabulary.

    47 symbols = 39 phones + 4 punctuations + 4 special tokens(<pad> <unk> <s> </s>)

    The current phoneme set contains 39 phonemes, vowels carry a lexical stress marker:
        0    — No stress
        1    — Primary stress
        2    — Secondary stress

    Phoneme Set:
        Phoneme Example Translation
            ------- ------- -----------
            AA	odd     AA D
            AE	at	AE T
            AH	hut	HH AH T
            AO	ought	AO T
            AW	cow	K AW
            AY	hide	HH AY D
            B 	be	B IY
            CH	cheese	CH IY Z
            D 	dee	D IY
            DH	thee	DH IY
            EH	Ed	EH D
            ER	hurt	HH ER T
            EY	ate	EY T
            F 	fee	F IY
            G 	green	G R IY N
            HH	he	HH IY
            IH	it	IH T
            IY	eat	IY T
            JH	gee	JH IY
            K 	key	K IY
            L 	lee	L IY
            M 	me	M IY
            N 	knee	N IY
            NG	ping	P IH NG
            OW	oat	OW T
            OY	toy	T OY
            P 	pee	P IY
            R 	read	R IY D
            S 	sea	S IY
            SH	she	SH IY
            T 	tea	T IY
            TH	theta	TH EY T AH
            UH	hood	HH UH D
            UW	two	T UW
            V 	vee	V IY
            W 	we	W IY
            Y 	yield	Y IY L D
            Z 	zee	Z IY
            ZH	seizure	S IY ZH ER

    See http://www.speech.cs.cmu.edu/cgi-bin/cmudict for more details.
    """
    # 39 phonemes
    phonemes = [
        'AA', 'AE', 'AH', 'AO', 'AW', 'AY', 'B', 'CH', 'D', 'DH', 'EH', 'ER',
        'EY', 'F', 'G', 'HH', 'IH', 'IY', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW',
        'OY', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UW', 'UH', 'V', 'W', 'Y', 'Z',
        'ZH'
    ]
    punctuations = [',', '.', '?', '!']
    symbols = phonemes + punctuations
    # vowels carry a lexical stress marker：
    # 0 unstressed（无重音）, 1 primary stress（主重音）和 2 secondary stress（次重音）
    _stress_to_no_stress_ = {
        'AA0': 'AA',
        'AA1': 'AA',
        'AA2': 'AA',
        'AE0': 'AE',
        'AE1': 'AE',
        'AE2': 'AE',
        'AH0': 'AH',
        'AH1': 'AH',
        'AH2': 'AH',
        'AO0': 'AO',
        'AO1': 'AO',
        'AO2': 'AO',
        'AW0': 'AW',
        'AW1': 'AW',
        'AW2': 'AW',
        'AY0': 'AY',
        'AY1': 'AY',
        'AY2': 'AY',
        'EH0': 'EH',
        'EH1': 'EH',
        'EH2': 'EH',
        'ER0': 'ER',
        'ER1': 'ER',
        'ER2': 'ER',
        'EY0': 'EY',
        'EY1': 'EY',
        'EY2': 'EY',
        'IH0': 'IH',
        'IH1': 'IH',
        'IH2': 'IH',
        'IY0': 'IY',
        'IY1': 'IY',
        'IY2': 'IY',
        'OW0': 'OW',
        'OW1': 'OW',
        'OW2': 'OW',
        'OY0': 'OY',
        'OY1': 'OY',
        'OY2': 'OY',
        'UH0': 'UH',
        'UH1': 'UH',
        'UH2': 'UH',
        'UW0': 'UW',
        'UW1': 'UW',
        'UW2': 'UW'
    }

    def __repr__(self):
        fmt = "ARPABETWithoutStress(phonemes: {}, punctuations: {})"
        return fmt.format(len(phonemes), punctuations)

    def __init__(self):
        # https://github.com/Kyubyong/g2p/blob/master/g2p_en/g2p.py
        self.backend = G2p()
        self.vocab = Vocab(self.phonemes + self.punctuations)

    def _remove_vowels(self, phone):
        return self._stress_to_no_stress_.get(phone, phone)

    def phoneticize(self, sentence, add_start_end=False):
        """ Normalize the input text sequence and convert it into pronunciation sequence.
        Args:
            sentence (str): The input text sequence.
    
        Returns:
            List[str]: The list of pronunciation sequence.
        """
        # g2p and remove vowel stress
        phonemes = [
            self._remove_vowels(item) for item in self.backend(sentence)
        ]
        if add_start_end:
            start = self.vocab.start_symbol
            end = self.vocab.end_symbol
            phonemes = [start] + phonemes + [end]
        phonemes = [item for item in phonemes if item in self.vocab.stoi]
        return phonemes

    def numericalize(self, phonemes):
        """ Convert pronunciation sequence into pronunciation id sequence.

        Args:
            phonemes (List[str]): The list of pronunciation sequence.
    
        Returns:
            List[int]: The list of pronunciation id sequence.
        """
        # phonemes to ids
        ids = [self.vocab.lookup(item) for item in phonemes]
        return ids

    def reverse(self, ids):
        """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
        
        Args:
            ids( List[int]): The list of pronunciation id sequence.
    
        Returns: 
            List[str]: 
                The list of pronunciation sequence.
        """
        return [self.vocab.reverse(i) for i in ids]

    def __call__(self, sentence, add_start_end=False):
        """ Convert the input text sequence into pronunciation id sequence.
    
        Args:
            sentence (str): The input text sequence.
    
        Returns:
            List[str]: The list of pronunciation id sequence.
        """
        return self.numericalize(
            self.phoneticize(sentence, add_start_end=add_start_end))

    @property
    def vocab_size(self):
        """ Vocab size.
        """
        # 47 = 39 phones + 4 punctuations + 4 special tokens(<pad> <unk> <s> </s>)
        return len(self.vocab)


class ARPABETWithStress(Phonetics):
    """
    A phonology for English that uses ARPABET with stress as the phoneme vocabulary.

    77 symbols = 69 phones + 4 punctuations + 4 special tokens
    """
    phonemes = [
        'AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0',
        'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D',
        'DH', 'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2',
        'F', 'G', 'HH', 'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K',
        'L', 'M', 'N', 'NG', 'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R',
        'S', 'SH', 'T', 'TH', 'UH0', 'UH1', 'UH2', 'UW0', 'UW1', 'UW2', 'V',
        'W', 'Y', 'Z', 'ZH'
    ]
    punctuations = [',', '.', '?', '!']
    symbols = phonemes + punctuations

    def __repr__(self):
        fmt = "ARPABETWithStress(phonemes: {}, punctuations: {})"
        return fmt.format(len(phonemes), punctuations)

    def __init__(self):
        self.backend = G2p()
        self.vocab = Vocab(self.phonemes + self.punctuations)

    def phoneticize(self, sentence, add_start_end=False):
        """ Normalize the input text sequence and convert it into pronunciation sequence.
    
        Args: 
            sentence (str): The input text sequence.
    
        Returns: 
            List[str]: The list of pronunciation sequence.
        """
        phonemes = self.backend(sentence)
        if add_start_end:
            start = self.vocab.start_symbol
            end = self.vocab.end_symbol
            phonemes = [start] + phonemes + [end]
        phonemes = [item for item in phonemes if item in self.vocab.stoi]
        return phonemes

    def numericalize(self, phonemes):
        """ Convert pronunciation sequence into pronunciation id sequence.

        Args:
            phonemes (List[str]): The list of pronunciation sequence.
    
        Returns:
            List[int]: The list of pronunciation id sequence.
        """
        ids = [self.vocab.lookup(item) for item in phonemes]
        return ids

    def reverse(self, ids):
        """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
        Args:
            ids (List[int]): The list of pronunciation id sequence.
    
        Returns: 
            List[str]: The list of pronunciation sequence.
        """
        return [self.vocab.reverse(i) for i in ids]

    def __call__(self, sentence, add_start_end=False):
        """ Convert the input text sequence into pronunciation id sequence.
        Args:
            sentence (str): The input text sequence.
    
        Returns: 
            List[str]: The list of pronunciation id sequence.
        """
        return self.numericalize(
            self.phoneticize(sentence, add_start_end=add_start_end))

    @property
    def vocab_size(self):
        """ Vocab size.
        """
        # 77 = 69 phones + 4 punctuations + 4 special tokens
        return len(self.vocab)


================================================
FILE: paddlespeech/t2s/frontend/canton_frontend.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict
from typing import List

import numpy as np
import paddle
import ToJyutping

from paddlespeech.t2s.frontend.zh_normalization.text_normlization import TextNormalizer

INITIALS = [
    'aa', 'aai', 'aak', 'aap', 'aat', 'aau', 'ai', 'au', 'ap', 'at', 'ak', 'a',
    'p', 'b', 'e', 'ts', 't', 'dz', 'd', 'kw', 'k', 'gw', 'g', 'f', 'h', 'l',
    'm', 'ng', 'n', 's', 'y', 'w', 'c', 'z', 'j', 'ong', 'on', 'ou', 'oi', 'ok',
    'o', 'uk', 'ung'
]
INITIALS += ['sp', 'spl', 'spn', 'sil']


def jyuping_to_phonemes(cantons: List[str]):
    # jyuping to inital and final
    phones = []
    for canton in cantons:
        for consonant in INITIALS:
            if canton.startswith(consonant):
                if canton.startswith("nga"):
                    c, v = canton[:len(consonant)], canton[len(consonant):]
                    phones = phones + [canton[2:]]
                else:
                    c, v = canton[:len(consonant)], canton[len(consonant):]
                    phones = phones + [c, v]
                break
    return phones


class CantonFrontend():
    def __init__(self, phone_vocab_path: str):
        self.text_normalizer = TextNormalizer()
        self.punc = "、：，；。？！“”‘’':,;.?!"

        self.vocab_phones = {}
        if phone_vocab_path:
            with open(phone_vocab_path, 'rt', encoding='utf-8') as f:
                phn_id = [line.strip().split() for line in f.readlines()]
            for phn, id in phn_id:
                self.vocab_phones[phn] = int(id)

    # if merge_sentences, merge all sentences into one phone sequence
    def _g2p(self, sentences: List[str],
             merge_sentences: bool=True) -> List[List[str]]:
        phones_list = []
        for sentence in sentences:
            # jyuping
            # 'gam3 ngaam1 lou5 sai3 jiu1 kau4 keoi5 dang2 zan6 jiu3 hoi1 wui2, zing6 dai1 ge2 je5 ngo5 wui5 gaau2 dim6 ga3 laa3.'
            phones_str = ToJyutping.get_jyutping_text(sentence)
            # phonemes 
            phones_split = jyuping_to_phonemes(phones_str.split(' '))
            phones_list.append(phones_split)
        return phones_list

    def _p2id(self, phonemes: List[str]) -> np.ndarray:
        # replace unk phone with sp
        phonemes = [
            phn if phn in self.vocab_phones else "sp" for phn in phonemes
        ]
        phone_ids = [self.vocab_phones[item] for item in phonemes]
        return np.array(phone_ids, np.int64)

    def get_phonemes(self,
                     sentence: str,
                     merge_sentences: bool=True,
                     print_info: bool=False) -> List[List[str]]:
        # TN & Text Segmentation
        sentences = self.text_normalizer.normalize(sentence)
        # G2P
        phonemes = self._g2p(sentences, merge_sentences=merge_sentences)

        if print_info:
            print("----------------------------")
            print("text norm results:")
            print(sentences)
            print("----------------------------")
            print("g2p results:")
            print(phonemes)
            print("----------------------------")

        return phonemes

    def get_input_ids(self,
                      sentence: str,
                      merge_sentences: bool=True,
                      print_info: bool=False,
                      to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]:

        phonemes = self.get_phonemes(
            sentence, merge_sentences=merge_sentences, print_info=print_info)

        result = {}
        temp_phone_ids = []
        for phones in phonemes:
            if phones:
                phone_ids = self._p2id(phones)
                # if use paddle.to_tensor() in onnxruntime, the first time will be too low
                if to_tensor:
                    phone_ids = paddle.to_tensor(phone_ids)
                temp_phone_ids.append(phone_ids)

        if temp_phone_ids:
            result["phone_ids"] = temp_phone_ids

        return result


================================================
FILE: paddlespeech/t2s/frontend/en_frontend.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .phonectic import English


================================================
FILE: paddlespeech/t2s/frontend/g2pw/__init__.py
================================================
from .onnx_api import G2PWOnnxConverter


================================================
FILE: paddlespeech/t2s/frontend/g2pw/dataset.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Credits
    This code is modified from https://github.com/GitYCC/g2pW
"""
from typing import Dict
from typing import List
from typing import Tuple

import numpy as np

from paddlespeech.t2s.frontend.g2pw.utils import tokenize_and_map

ANCHOR_CHAR = '▁'


def prepare_onnx_input(tokenizer,
                       labels: List[str],
                       char2phonemes: Dict[str, List[int]],
                       chars: List[str],
                       texts: List[str],
                       query_ids: List[int],
                       use_mask: bool=False,
                       window_size: int=None,
                       max_len: int=512) -> Dict[str, np.array]:
    if window_size is not None:
        truncated_texts, truncated_query_ids = _truncate_texts(
            window_size=window_size, texts=texts, query_ids=query_ids)
    input_ids = []
    token_type_ids = []
    attention_masks = []
    phoneme_masks = []
    char_ids = []
    position_ids = []

    for idx in range(len(texts)):
        text = (truncated_texts if window_size else texts)[idx].lower()
        query_id = (truncated_query_ids if window_size else query_ids)[idx]

        try:
            tokens, text2token, token2text = tokenize_and_map(
                tokenizer=tokenizer, text=text)
        except Exception:
            print(f'warning: text "{text}" is invalid')
            return {}

        text, query_id, tokens, text2token, token2text = _truncate(
            max_len=max_len,
            text=text,
            query_id=query_id,
            tokens=tokens,
            text2token=text2token,
            token2text=token2text)

        processed_tokens = ['[CLS]'] + tokens + ['[SEP]']

        input_id = list(
            np.array(tokenizer.convert_tokens_to_ids(processed_tokens)))
        token_type_id = list(np.zeros((len(processed_tokens), ), dtype=int))
        attention_mask = list(np.ones((len(processed_tokens), ), dtype=int))

        query_char = text[query_id]
        phoneme_mask = [1 if i in char2phonemes[query_char] else 0 for i in range(len(labels))] \
            if use_mask else [1] * len(labels)
        char_id = chars.index(query_char)
        position_id = text2token[
            query_id] + 1  # [CLS] token locate at first place

        input_ids.append(input_id)
        token_type_ids.append(token_type_id)
        attention_masks.append(attention_mask)
        phoneme_masks.append(phoneme_mask)
        char_ids.append(char_id)
        position_ids.append(position_id)

    outputs = {
        'input_ids': np.array(input_ids).astype(np.int64),
        'token_type_ids': np.array(token_type_ids).astype(np.int64),
        'attention_masks': np.array(attention_masks).astype(np.int64),
        'phoneme_masks': np.array(phoneme_masks).astype(np.float32),
        'char_ids': np.array(char_ids).astype(np.int64),
        'position_ids': np.array(position_ids).astype(np.int64),
    }
    return outputs


def _truncate_texts(window_size: int, texts: List[str],
                    query_ids: List[int]) -> Tuple[List[str], List[int]]:
    truncated_texts = []
    truncated_query_ids = []
    for text, query_id in zip(texts, query_ids):
        start = max(0, query_id - window_size // 2)
        end = min(len(text), query_id + window_size // 2)
        truncated_text = text[start:end]
        truncated_texts.append(truncated_text)

        truncated_query_id = query_id - start
        truncated_query_ids.append(truncated_query_id)
    return truncated_texts, truncated_query_ids


def _truncate(max_len: int,
              text: str,
              query_id: int,
              tokens: List[str],
              text2token: List[int],
              token2text: List[Tuple[int]]):
    truncate_len = max_len - 2
    if len(tokens) <= truncate_len:
        return (text, query_id, tokens, text2token, token2text)

    token_position = text2token[query_id]

    token_start = token_position - truncate_len // 2
    token_end = token_start + truncate_len
    font_exceed_dist = -token_start
    back_exceed_dist = token_end - len(tokens)
    if font_exceed_dist > 0:
        token_start += font_exceed_dist
        token_end += font_exceed_dist
    elif back_exceed_dist > 0:
        token_start -= back_exceed_dist
        token_end -= back_exceed_dist

    start = token2text[token_start][0]
    end = token2text[token_end - 1][1]

    return (text[start:end], query_id - start, tokens[token_start:token_end], [
        i - token_start if i is not None else None
        for i in text2token[start:end]
    ], [(s - start, e - start) for s, e in token2text[token_start:token_end]])


def get_phoneme_labels(polyphonic_chars: List[List[str]]
                       ) -> Tuple[List[str], Dict[str, List[int]]]:
    labels = sorted(list(set([phoneme for char, phoneme in polyphonic_chars])))
    char2phonemes = {}
    for char, phoneme in polyphonic_chars:
        if char not in char2phonemes:
            char2phonemes[char] = []
        char2phonemes[char].append(labels.index(phoneme))
    return labels, char2phonemes


def get_char_phoneme_labels(polyphonic_chars: List[List[str]]
                            ) -> Tuple[List[str], Dict[str, List[int]]]:
    labels = sorted(
        list(set([f'{char} {phoneme}' for char, phoneme in polyphonic_chars])))
    char2phonemes = {}
    for char, phoneme in polyphonic_chars:
        if char not in char2phonemes:
            char2phonemes[char] = []
        char2phonemes[char].append(labels.index(f'{char} {phoneme}'))
    return labels, char2phonemes


================================================
FILE: paddlespeech/t2s/frontend/g2pw/onnx_api.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Credits
    This code is modified from https://github.com/GitYCC/g2pW
"""
import json
import os
from typing import Any
from typing import Dict
from typing import List
from typing import Tuple

import numpy as np
import onnxruntime
from opencc import OpenCC
from paddlenlp.transformers import BertTokenizer
from pypinyin import pinyin
from pypinyin import Style

from paddlespeech.cli.utils import download_and_decompress
from paddlespeech.resource.pretrained_models import g2pw_onnx_models
from paddlespeech.t2s.frontend.g2pw.dataset import get_char_phoneme_labels
from paddlespeech.t2s.frontend.g2pw.dataset import get_phoneme_labels
from paddlespeech.t2s.frontend.g2pw.dataset import prepare_onnx_input
from paddlespeech.t2s.frontend.g2pw.utils import load_config
from paddlespeech.t2s.frontend.zh_normalization.char_convert import tranditional_to_simplified
from paddlespeech.utils.env import MODEL_HOME

model_version = '1.1'


def get_g2pw_model_path(model_dir: os.PathLike, model_version: str) -> str:
    """Resolve the G2PW ONNX model directory path.

    Checks if the model file 'g2pW.onnx' exists in the expected location.
    If not, downloads and decompresses the model archive

    Args:
        model_dir (os.PathLike): Base directory to store models (e.g., ~/.paddlespeech).
        model_version (str): Model version string (e.g., '1.1').

    Returns:
        str: Path to the model directory containing 'g2pW.onnx'.
    """

    archive_info = g2pw_onnx_models['G2PWModel'][model_version]
    archive_fname = os.path.basename(
        archive_info['url'])  # e.g., "G2PWModel_1.1.zip"
    expected_extract_name = os.path.splitext(archive_fname)[
        0]  # e.g., "G2PWModel_1.1"
    expected_model_dir = os.path.join(model_dir, expected_extract_name)
    uncompress_path = expected_model_dir
    onnx_file_path = os.path.join(expected_model_dir, 'g2pW.onnx')
    if not os.path.isfile(onnx_file_path):
        uncompress_path = download_and_decompress(archive_info, model_dir)
    return uncompress_path


def predict(session, onnx_input: Dict[str, Any],
            labels: List[str]) -> Tuple[List[str], List[float]]:
    all_preds = []
    all_confidences = []
    probs = session.run([], {
        "input_ids": onnx_input['input_ids'],
        "token_type_ids": onnx_input['token_type_ids'],
        "attention_mask": onnx_input['attention_masks'],
        "phoneme_mask": onnx_input['phoneme_masks'],
        "char_ids": onnx_input['char_ids'],
        "position_ids": onnx_input['position_ids']
    })[0]

    preds = np.argmax(probs, axis=1).tolist()
    max_probs = []
    for index, arr in zip(preds, probs.tolist()):
        max_probs.append(arr[index])
    all_preds += [labels[pred] for pred in preds]
    all_confidences += max_probs

    return all_preds, all_confidences


class G2PWOnnxConverter:
    def __init__(self,
                 model_dir: os.PathLike=MODEL_HOME,
                 style: str='bopomofo',
                 model_source: str=None,
                 enable_non_tradional_chinese: bool=False):
        uncompress_path = get_g2pw_model_path(model_dir, model_version)

        sess_options = onnxruntime.SessionOptions()
        sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
        sess_options.execution_mode = onnxruntime.ExecutionMode.ORT_SEQUENTIAL
        sess_options.intra_op_num_threads = 2
        self.session_g2pW = onnxruntime.InferenceSession(
            os.path.join(uncompress_path, 'g2pW.onnx'),
            sess_options=sess_options)
        self.config = load_config(
            config_path=os.path.join(uncompress_path, 'config.py'),
            use_default=True)

        self.model_source = model_source if model_source else self.config.model_source
        self.enable_opencc = enable_non_tradional_chinese

        self.tokenizer = BertTokenizer.from_pretrained(self.config.model_source)

        polyphonic_chars_path = os.path.join(uncompress_path,
                                             'POLYPHONIC_CHARS.txt')
        monophonic_chars_path = os.path.join(uncompress_path,
                                             'MONOPHONIC_CHARS.txt')
        self.polyphonic_chars = [
            line.split('\t')
            for line in open(polyphonic_chars_path, encoding='utf-8').read()
            .strip().split('\n')
        ]
        self.non_polyphonic = {
            '一', '不', '和', '咋', '嗲', '剖', '差', '攢', '倒', '難', '奔', '勁', '拗',
            '肖', '瘙', '誒', '泊', '听', '噢'
        }
        self.non_monophonic = {'似', '攢'}
        self.monophonic_chars = [
            line.split('\t')
            for line in open(monophonic_chars_path, encoding='utf-8').read()
            .strip().split('\n')
        ]
        self.labels, self.char2phonemes = get_char_phoneme_labels(
            polyphonic_chars=self.polyphonic_chars
        ) if self.config.use_char_phoneme else get_phoneme_labels(
            polyphonic_chars=self.polyphonic_chars)

        self.chars = sorted(list(self.char2phonemes.keys()))

        self.polyphonic_chars_new = set(self.chars)
        for char in self.non_polyphonic:
            if char in self.polyphonic_chars_new:
                self.polyphonic_chars_new.remove(char)

        self.monophonic_chars_dict = {
            char: phoneme
            for char, phoneme in self.monophonic_chars
        }
        for char in self.non_monophonic:
            if char in self.monophonic_chars_dict:
                self.monophonic_chars_dict.pop(char)

        self.pos_tags = [
            'UNK', 'A', 'C', 'D', 'I', 'N', 'P', 'T', 'V', 'DE', 'SHI'
        ]

        with open(
                os.path.join(uncompress_path,
                             'bopomofo_to_pinyin_wo_tune_dict.json'),
                'r',
                encoding='utf-8') as fr:
            self.bopomofo_convert_dict = json.load(fr)
        self.style_convert_func = {
            'bopomofo': lambda x: x,
            'pinyin': self._convert_bopomofo_to_pinyin,
        }[style]

        with open(
                os.path.join(uncompress_path, 'char_bopomofo_dict.json'),
                'r',
                encoding='utf-8') as fr:
            self.char_bopomofo_dict = json.load(fr)

        if self.enable_opencc:
            self.cc = OpenCC('s2tw')

    def _convert_bopomofo_to_pinyin(self, bopomofo: str) -> str:
        tone = bopomofo[-1]
        assert tone in '12345'
        component = self.bopomofo_convert_dict.get(bopomofo[:-1])
        if component:
            return component + tone
        else:
            print(f'Warning: "{bopomofo}" cannot convert to pinyin')
            return None

    def __call__(self, sentences: List[str]) -> List[List[str]]:
        if isinstance(sentences, str):
            sentences = [sentences]

        if self.enable_opencc:
            translated_sentences = []
            for sent in sentences:
                translated_sent = self.cc.convert(sent)
                assert len(translated_sent) == len(sent)
                translated_sentences.append(translated_sent)
            sentences = translated_sentences

        texts, query_ids, sent_ids, partial_results = self._prepare_data(
            sentences=sentences)
        if len(texts) == 0:
            # sentences no polyphonic words
            return partial_results

        onnx_input = prepare_onnx_input(
            tokenizer=self.tokenizer,
            labels=self.labels,
            char2phonemes=self.char2phonemes,
            chars=self.chars,
            texts=texts,
            query_ids=query_ids,
            use_mask=self.config.use_mask,
            window_size=None)

        preds, confidences = predict(
            session=self.session_g2pW,
            onnx_input=onnx_input,
            labels=self.labels)
        if self.config.use_char_phoneme:
            preds = [pred.split(' ')[1] for pred in preds]

        results = partial_results
        for sent_id, query_id, pred in zip(sent_ids, query_ids, preds):
            results[sent_id][query_id] = self.style_convert_func(pred)

        return results

    def _prepare_data(
            self, sentences: List[str]
    ) -> Tuple[List[str], List[int], List[int], List[List[str]]]:
        texts, query_ids, sent_ids, partial_results = [], [], [], []
        for sent_id, sent in enumerate(sentences):
            # pypinyin works well for Simplified Chinese than Traditional Chinese
            sent_s = tranditional_to_simplified(sent)
            pypinyin_result = pinyin(
                sent_s, neutral_tone_with_five=True, style=Style.TONE3)
            partial_result = [None] * len(sent)
            for i, char in enumerate(sent):
                if char in self.polyphonic_chars_new:
                    texts.append(sent)
                    query_ids.append(i)
                    sent_ids.append(sent_id)
                elif char in self.monophonic_chars_dict:
                    partial_result[i] = self.style_convert_func(
                        self.monophonic_chars_dict[char])
                elif char in self.char_bopomofo_dict:
                    partial_result[i] = pypinyin_result[i][0]
                    # partial_result[i] =  self.style_convert_func(self.char_bopomofo_dict[char][0])
                else:
                    partial_result[i] = pypinyin_result[i][0]

            partial_results.append(partial_result)
        return texts, query_ids, sent_ids, partial_results


================================================
FILE: paddlespeech/t2s/frontend/g2pw/utils.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Credits
    This code is modified from https://github.com/GitYCC/g2pW
"""
import os
import re


def wordize_and_map(text: str):
    words = []
    index_map_from_text_to_word = []
    index_map_from_word_to_text = []
    while len(text) > 0:
        match_space = re.match(r'^ +', text)
        if match_space:
            space_str = match_space.group(0)
            index_map_from_text_to_word += [None] * len(space_str)
            text = text[len(space_str):]
            continue

        match_en = re.match(r'^[a-zA-Z0-9]+', text)
        if match_en:
            en_word = match_en.group(0)

            word_start_pos = len(index_map_from_text_to_word)
            word_end_pos = word_start_pos + len(en_word)
            index_map_from_word_to_text.append((word_start_pos, word_end_pos))

            index_map_from_text_to_word += [len(words)] * len(en_word)

            words.append(en_word)
            text = text[len(en_word):]
        else:
            word_start_pos = len(index_map_from_text_to_word)
            word_end_pos = word_start_pos + 1
            index_map_from_word_to_text.append((word_start_pos, word_end_pos))

            index_map_from_text_to_word += [len(words)]

            words.append(text[0])
            text = text[1:]
    return words, index_map_from_text_to_word, index_map_from_word_to_text


def tokenize_and_map(tokenizer, text: str):
    words, text2word, word2text = wordize_and_map(text=text)

    tokens = []
    index_map_from_token_to_text = []
    for word, (word_start, word_end) in zip(words, word2text):
        word_tokens = tokenizer.tokenize(word)

        if len(word_tokens) == 0 or word_tokens == ['[UNK]']:
            index_map_from_token_to_text.append((word_start, word_end))
            tokens.append('[UNK]')
        else:
            current_word_start = word_start
            for word_token in word_tokens:
                word_token_len = len(re.sub(r'^##', '', word_token))
                index_map_from_token_to_text.append(
                    (current_word_start, current_word_start + word_token_len))
                current_word_start = current_word_start + word_token_len
                tokens.append(word_token)

    index_map_from_text_to_token = text2word
    for i, (token_start, token_end) in enumerate(index_map_from_token_to_text):
        for token_pos in range(token_start, token_end):
            index_map_from_text_to_token[token_pos] = i

    return tokens, index_map_from_text_to_token, index_map_from_token_to_text


def _load_config(config_path: os.PathLike):
    import importlib.util
    spec = importlib.util.spec_from_file_location('__init__', config_path)
    config = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(config)
    return config


default_config_dict = {
    'manual_seed': 1313,
    'model_source': 'bert-base-chinese',
    'window_size': 32,
    'num_workers': 2,
    'use_mask': True,
    'use_char_phoneme': False,
    'use_conditional': True,
    'param_conditional': {
        'affect_location': 'softmax',
        'bias': True,
        'char-linear': True,
        'pos-linear': False,
        'char+pos-second': True,
        'char+pos-second_lowrank': False,
        'lowrank_size': 0,
        'char+pos-second_fm': False,
        'fm_size': 0,
        'fix_mode': None,
        'count_json': 'train.count.json'
    },
    'lr': 5e-5,
    'val_interval': 200,
    'num_iter': 10000,
    'use_focal': False,
    'param_focal': {
        'alpha': 0.0,
        'gamma': 0.7
    },
    'use_pos': True,
    'param_pos ': {
        'weight': 0.1,
        'pos_joint_training': True,
        'train_pos_path': 'train.pos',
        'valid_pos_path': 'dev.pos',
        'test_pos_path': 'test.pos'
    }
}


def load_config(config_path: os.PathLike, use_default: bool=False):
    config = _load_config(config_path)
    if use_default:
        for attr, val in default_config_dict.items():
            if not hasattr(config, attr):
                setattr(config, attr, val)
            elif isinstance(val, dict):
                d = getattr(config, attr)
                for dict_k, dict_v in val.items():
                    if dict_k not in d:
                        d[dict_k] = dict_v
    return config


================================================
FILE: paddlespeech/t2s/frontend/generate_lexicon.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Design principles: https://zhuanlan.zhihu.com/p/349600439
"""Generate lexicon and symbols for Mandarin Chinese phonology.
The lexicon is used for Montreal Force Aligner.
Note that syllables are used as word in this lexicon. Since syllables rather 
than words are used in transcriptions produced by `reorganize_baker.py`.
We make this choice to better leverage other software for chinese text to 
pinyin tools like pypinyin. This is the convention for G2P in Chinese.
"""
import re
from collections import OrderedDict

INITIALS = [
    'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh',
    'r', 'z', 'c', 's', 'j', 'q', 'x'
]

FINALS = [
    'a', 'ai', 'ao', 'an', 'ang', 'e', 'er', 'ei', 'en', 'eng', 'o', 'ou',
    'ong', 'ii', 'iii', 'i', 'ia', 'iao', 'ian', 'iang', 'ie', 'io', 'iou',
    'iong', 'in', 'ing', 'u', 'ua', 'uai', 'uan', 'uang', 'uei', 'uo', 'uen',
    'ueng', 'v', 've', 'van', 'vn'
]

SPECIALS = ['sil', 'sp']


def rule(C, V, R, T):
    """Generate a syllable given the initial, the final, erhua indicator, and tone.
    Orthographical rules for pinyin are applied. (special case for y, w, ui, un, iu)

    Note that in this system, 'ü' is alway written as 'v' when appeared in phoneme, but converted to 
    'u' in syllables when certain conditions are satisfied.

    'i' is distinguished when appeared in phonemes, and separated into 3 categories, 'i', 'ii' and 'iii'.
    Erhua is possibly applied to every finals, except for finals that already ends with 'r'.
    When a syllable is impossible or does not have any characters with this pronunciation, return None
    to filter it out.
    """

    # 不可拼的音节, ii 只能和 z, c, s 拼
    if V in ["ii"] and (C not in ['z', 'c', 's']):
        return None
    # iii 只能和 zh, ch, sh, r 拼
    if V in ['iii'] and (C not in ['zh', 'ch', 'sh', 'r']):
        return None

    # 齐齿呼或者撮口呼不能和 f, g, k, h, zh, ch, sh, r, z, c, s
    if (V not in ['ii', 'iii']) and V[0] in ['i', 'v'] and (
            C in ['f', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's']):
        return None

    # 撮口呼只能和 j, q, x l, n 拼
    if V.startswith("v"):
        # v, ve 只能和 j ,q , x, n, l 拼
        if V in ['v', 've']:
            if C not in ['j', 'q', 'x', 'n', 'l', '']:
                return None
        # 其他只能和 j, q, x 拼
        else:
            if C not in ['j', 'q', 'x', '']:
                return None

    # j, q, x 只能和齐齿呼或者撮口呼拼
    if (C in ['j', 'q', 'x']) and not (
        (V not in ['ii', 'iii']) and V[0] in ['i', 'v']):
        return None

    # b, p ,m, f 不能和合口呼拼，除了 u 之外
    # bm p, m, f 不能和撮口呼拼
    if (C in ['b', 'p', 'm', 'f']) and ((V[0] in ['u', 'v'] and V != "u") or
                                        V == 'ong'):
        return None

    # ua, uai, uang 不能和 d, t, n, l, r, z, c, s 拼
    if V in ['ua', 'uai',
             'uang'] and C in ['d', 't', 'n', 'l', 'r', 'z', 'c', 's']:
        return None

    # sh 和 ong 不能拼
    if V == 'ong' and C in ['sh']:
        return None

    # o 和 gkh, zh ch sh r z c s 不能拼
    if V == "o" and C in [
            'd', 't', 'n', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's'
    ]:
        return None

    # ueng 只是 weng 这个 ad-hoc 其他情况下都是 ong
    if V == 'ueng' and C != '':
        return

    # 非儿化的 er 只能单独存在
    if V == 'er' and C != '':
        return None

    if C == '':
        if V in ["i", "in", "ing"]:
            C = 'y'
        elif V == 'u':
            C = 'w'
        elif V.startswith('i') and V not in ["ii", "iii"]:
            C = 'y'
            V = V[1:]
        elif V.startswith('u'):
            C = 'w'
            V = V[1:]
        elif V.startswith('v'):
            C = 'yu'
            V = V[1:]
    else:
        if C in ['j', 'q', 'x']:
            if V.startswith('v'):
                V = re.sub('v', 'u', V)
        if V == 'iou':
            V = 'iu'
        elif V == 'uei':
            V = 'ui'
        elif V == 'uen':
            V = 'un'
    result = C + V

    # Filter  er 不能再儿化
    if result.endswith('r') and R == 'r':
        return None

    # ii and iii, change back to i
    result = re.sub(r'i+', 'i', result)

    result = result + R + T
    return result


def generate_lexicon(with_tone=False, with_erhua=False):
    """Generate lexicon for Mandarin Chinese."""
    syllables = OrderedDict()

    for C in [''] + INITIALS:
        for V in FINALS:
            for R in [''] if not with_erhua else ['', 'r']:
                for T in [''] if not with_tone else ['1', '2', '3', '4', '5']:
                    result = rule(C, V, R, T)
                    if result:
                        syllables[result] = f'{C} {V}{R}{T}'
    return syllables


================================================
FILE: paddlespeech/t2s/frontend/mix_frontend.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from typing import Dict
from typing import List

import numpy as np
import paddle

from paddlespeech.t2s.frontend.en_frontend import English as EnFrontend
from paddlespeech.t2s.frontend.ssml.xml_processor import MixTextProcessor
from paddlespeech.t2s.frontend.zh_frontend import Frontend as ZhFrontend


class MixFrontend():
    def __init__(self,
                 g2p_model="pypinyin",
                 phone_vocab_path=None,
                 tone_vocab_path=None):
        self.zh_frontend = ZhFrontend(
            phone_vocab_path=phone_vocab_path, tone_vocab_path=tone_vocab_path)
        self.en_frontend = EnFrontend(phone_vocab_path=phone_vocab_path)
        self.sp_id = self.zh_frontend.vocab_phones["sp"]
        self.sp_id_numpy = np.array([self.sp_id])
        self.sp_id_tensor = paddle.to_tensor([self.sp_id])

    def is_chinese(self, char):
        if char >= '\u4e00' and char <= '\u9fa5':
            return True
        else:
            return False

    def is_alphabet(self, char):
        if (char >= '\u0041' and char <= '\u005a') or (char >= '\u0061' and
                                                       char <= '\u007a'):
            return True
        else:
            return False

    def is_other(self, char):
        if not (self.is_chinese(char) or self.is_alphabet(char)):
            return True
        else:
            return False

    def split_by_lang(self, text: str) -> List[str]:
        # sentence --> [ch_part, en_part, ch_part, ...]
        segments = []
        types = []

        # Determine the type of each character. type: chinese, alphabet, other.
        for ch in text:
            if self.is_chinese(ch):
                types.append("zh")
            elif self.is_alphabet(ch):
                types.append("en")
            else:
                types.append("other")

        assert len(types) == len(text)

        flag = 0
        temp_seg = ""
        temp_lang = ""

        for i in range(len(text)):
            # find the first char of the seg
            if flag == 0:
                temp_seg += text[i]
                temp_lang = types[i]
                flag = 1
            else:
                if temp_lang == "other":
                    # text start is not lang.
                    temp_seg += text[i]
                    if types[i] != temp_lang:
                        temp_lang = types[i]
                else:
                    if types[i] == temp_lang or types[i] == "other":
                        # merge same lang or other
                        temp_seg += text[i]
                    else:
                        # change lang
                        segments.append((temp_seg, temp_lang))
                        temp_seg = text[i]
                        temp_lang = types[i]  # new lang

        segments.append((temp_seg, temp_lang))

        return segments

    def get_input_ids(self,
                      sentence: str,
                      merge_sentences: bool=False,
                      get_tone_ids: bool=False,
                      add_sp: bool=True,
                      to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]:
        # XML Document Object Model (DOM)
        doms = MixTextProcessor.get_dom_split(sentence)

        lang_splits = []
        for dom in doms:
            if dom.lower().startswith("<say-as pinyin="):
                # `<say-as pinyin=` for zh lang
                lang_splits.append((dom, "zh"))
            else:
                # process zh, en and zh/en
                lang_splits.extend(self.split_by_lang(dom))

        # merge adjacent zh segment
        segments = []
        currentSeg = ["", ""]
        for seg in lang_splits:
            if seg[1] == "en" or seg[1] == "other":
                if currentSeg[0] == '':
                    # first see
                    segments.append(seg)
                else:
                    # zh
                    currentSeg[0] = "<speak>" + currentSeg[0] + "</speak>"
                    segments.append(tuple(currentSeg))
                    # en
                    segments.append(seg)
                    # reset
                    currentSeg = ["", ""]
            else:
                # zh
                if currentSeg[0] == '':
                    # first see
                    currentSeg[0] = seg[0]
                    currentSeg[1] = seg[1]
                else:
                    # merge zh 
                    currentSeg[0] = currentSeg[0] + seg[0]

        if currentSeg[0] != '':
            # last zh
            currentSeg[0] = "<speak>" + currentSeg[0] + "</speak>"
            segments.append(tuple(currentSeg))

        phones_list = []
        result = {}

        # 008 我们要去云南 team building, 非常非常 happy.
        # seg ('我们要去云南 ', 'zh')
        # seg ('team building, ', 'en')
        # seg ('非常非常 ', 'zh')
        # seg ('happy.', 'en')
        # [('<speak>我们要去云南 </speak>', 'zh'), ('team building, ', 'en'), ('<speak>非常非常 </speak>', 'zh'), ('happy.', 'en')]
        for seg in segments:
            content = seg[0]
            lang = seg[1]

            if not content:
                continue

            if lang == "en":
                input_ids = self.en_frontend.get_input_ids(
                    content, merge_sentences=False, to_tensor=to_tensor)
            else:
                if content.strip() != "" and \
                    re.match(r".*?<speak>.*?</speak>.*", content, re.DOTALL):
                    # process ssml
                    input_ids = self.zh_frontend.get_input_ids_ssml(
                        content,
                        merge_sentences=False,
                        get_tone_ids=get_tone_ids,
                        to_tensor=to_tensor)
                else:
                    # process plain text
                    input_ids = self.zh_frontend.get_input_ids(
                        content,
                        merge_sentences=False,
                        get_tone_ids=get_tone_ids,
                        to_tensor=to_tensor)

            if add_sp:
                # add sp between zh and en
                if to_tensor:
                    input_ids["phone_ids"][-1] = paddle.concat(
                        [input_ids["phone_ids"][-1], self.sp_id_tensor])
                else:
                    input_ids["phone_ids"][-1] = np.concatenate(
                        (input_ids["phone_ids"][-1], self.sp_id_numpy))

            phones_list.extend(input_ids["phone_ids"])

        if merge_sentences:
            merge_list = paddle.concat(phones_list)
            # rm the last 'sp' to avoid the noise at the end
            # cause in the training data, no 'sp' in the end
            if (to_tensor and merge_list[-1] == self.sp_id_tensor) or (
                    not to_tensor and merge_list[-1] == self.sp_id_numpy):
                merge_list = merge_list[:-1]
            phones_list = []
            phones_list.append(merge_list)

        result["phone_ids"] = phones_list

        return result


================================================
FILE: paddlespeech/t2s/frontend/normalizer/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddlespeech.t2s.frontend.normalizer.normalizer import *
from paddlespeech.t2s.frontend.normalizer.numbers import *


================================================
FILE: paddlespeech/t2s/frontend/normalizer/abbrrviation.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/frontend/normalizer/acronyms.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/frontend/normalizer/normalizer.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import unicodedata
from builtins import str as unicode

from paddlespeech.t2s.frontend.normalizer.numbers import normalize_numbers


def normalize(sentence):
    """ Normalize English text.
    """
    # preprocessing
    sentence = unicode(sentence)
    sentence = normalize_numbers(sentence)
    sentence = ''.join(
        char for char in unicodedata.normalize('NFD', sentence)
        if unicodedata.category(char) != 'Mn')  # Strip accents
    sentence = sentence.lower()
    sentence = re.sub(r"[^ a-z'.,?!\-]", "", sentence)
    sentence = sentence.replace("i.e.", "that is")
    sentence = sentence.replace("e.g.", "for example")
    return sentence


================================================
FILE: paddlespeech/t2s/frontend/normalizer/numbers.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# number expansion is not that easy
import re

import inflect

_inflect = inflect.engine()
_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
_number_re = re.compile(r'[0-9]+')


def _remove_commas(m):
    return m.group(1).replace(',', '')


def _expand_decimal_point(m):
    return m.group(1).replace('.', ' point ')


def _expand_dollars(m):
    match = m.group(1)
    parts = match.split('.')
    if len(parts) > 2:
        return match + ' dollars'  # Unexpected format
    dollars = int(parts[0]) if parts[0] else 0
    cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
    if dollars and cents:
        dollar_unit = 'dollar' if dollars == 1 else 'dollars'
        cent_unit = 'cent' if cents == 1 else 'cents'
        return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
    elif dollars:
        dollar_unit = 'dollar' if dollars == 1 else 'dollars'
        return '%s %s' % (dollars, dollar_unit)
    elif cents:
        cent_unit = 'cent' if cents == 1 else 'cents'
        return '%s %s' % (cents, cent_unit)
    else:
        return 'zero dollars'


def _expand_ordinal(m):
    return _inflect.number_to_words(m.group(0))


def _expand_number(m):
    num = int(m.group(0))
    if num > 1000 and num < 3000:
        if num == 2000:
            return 'two thousand'
        elif num > 2000 and num < 2010:
            return 'two thousand ' + _inflect.number_to_words(num % 100)
        elif num % 100 == 0:
            return _inflect.number_to_words(num // 100) + ' hundred'
        else:
            return _inflect.number_to_words(
                num, andword='', zero='oh', group=2).replace(', ', ' ')
    else:
        return _inflect.number_to_words(num, andword='')


def normalize_numbers(text):
    """ Normalize numbers in English text.
    """
    text = re.sub(_comma_number_re, _remove_commas, text)
    text = re.sub(_pounds_re, r'\1 pounds', text)
    text = re.sub(_dollars_re, _expand_dollars, text)
    text = re.sub(_decimal_number_re, _expand_decimal_point, text)
    text = re.sub(_ordinal_re, _expand_ordinal, text)
    text = re.sub(_number_re, _expand_number, text)
    return text


================================================
FILE: paddlespeech/t2s/frontend/normalizer/width.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def full2half_width(ustr):
    half = []
    for u in ustr:
        num = ord(u)
        if num == 0x3000:  # 全角空格变半角
            num = 32
        elif 0xFF01 <= num <= 0xFF5E:
            num -= 0xfee0
        u = chr(num)
        half.append(u)
    return ''.join(half)


def half2full_width(ustr):
    full = []
    for u in ustr:
        num = ord(u)
        if num == 32:  # 半角空格变全角
            num = 0x3000
        elif 0x21 <= num <= 0x7E:
            num += 0xfee0
        u = chr(num)  # to unicode
        full.append(u)

    return ''.join(full)


================================================
FILE: paddlespeech/t2s/frontend/phonectic.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABC
from abc import abstractmethod
from typing import List

import numpy as np
import paddle
from g2p_en import G2p
from g2pM import G2pM

from paddlespeech.t2s.frontend.normalizer.normalizer import normalize
from paddlespeech.t2s.frontend.punctuation import get_punctuations
from paddlespeech.t2s.frontend.vocab import Vocab
from paddlespeech.t2s.frontend.zh_normalization.text_normlization import TextNormalizer

# discard opencc untill we find an easy solution to install it on windows
# from opencc import OpenCC

__all__ = ["Phonetics", "English", "EnglishCharacter", "Chinese"]


class Phonetics(ABC):
    @abstractmethod
    def __call__(self, sentence):
        pass

    @abstractmethod
    def phoneticize(self, sentence):
        pass

    @abstractmethod
    def numericalize(self, phonemes):
        pass


class English(Phonetics):
    """ Normalize the input text sequence and convert into pronunciation id sequence.

    https://github.com/Kyubyong/g2p/blob/master/g2p_en/g2p.py

    phonemes = ["<pad>", "<unk>", "<s>", "</s>"] + [   
        'AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0',
        'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D', 'DH',
        'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1',
        'EY2', 'F', 'G', 'HH',
        'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L',
        'M', 'N', 'NG', 'OW0', 'OW1',
        'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH',
        'UH0', 'UH1', 'UH2', 'UW',
        'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH']
    """

    LEXICON = {
        # key using lowercase
        "AI".lower(): [["EY0", "AY1"]],
    }

    def __init__(self, phone_vocab_path=None):
        self.backend = G2p()
        self.backend.cmu.update(English.LEXICON)
        self.phonemes = list(self.backend.phonemes)
        self.punctuations = get_punctuations("en")
        self.vocab = Vocab(self.phonemes + self.punctuations)
        self.vocab_phones = {}
        self.punc = "、：，；。？！“”‘’':,;.?!"
        self.text_normalizer = TextNormalizer()
        if phone_vocab_path:
            with open(phone_vocab_path, 'rt', encoding='utf-8') as f:
                phn_id = [line.strip().split() for line in f.readlines()]
            for phn, id in phn_id:
                self.vocab_phones[phn] = int(id)

    def phoneticize(self, sentence):
        """ Normalize the input text sequence and convert it into pronunciation sequence.
        Args:
            sentence (str): The input text sequence.
        Returns: 
            List[str]: The list of pronunciation sequence.
        """
        start = self.vocab.start_symbol
        end = self.vocab.end_symbol
        phonemes = ([] if start is None else [start]) \
                   + self.backend(sentence) \
                   + ([] if end is None else [end])
        phonemes = [item for item in phonemes if item in self.vocab.stoi]
        return phonemes

    def _p2id(self, phonemes: List[str]) -> np.array:
        phone_ids = [self.vocab_phones[item] for item in phonemes]
        return np.array(phone_ids, np.int64)

    def get_input_ids(self,
                      sentence: str,
                      merge_sentences: bool=False,
                      to_tensor: bool=True) -> paddle.Tensor:
        sentences = self.text_normalizer._split(sentence, lang="en")

        phones_list = []
        temp_phone_ids = []
        for sentence in sentences:
            phones = self.phoneticize(sentence)
            # remove start_symbol and end_symbol
            phones = phones[1:-1]
            phones = [phn for phn in phones if not phn.isspace()]
            # replace unk phone with sp
            phones = [
                phn
                if (phn in self.vocab_phones and phn not in self.punc) else "sp"
                for phn in phones
            ]
            if len(phones) != 0:
                phones_list.append(phones)

        if merge_sentences:
            merge_list = sum(phones_list, [])
            # rm the last 'sp' to avoid the noise at the end
            # cause in the training data, no 'sp' in the end
            if merge_list[-1] == 'sp':
                merge_list = merge_list[:-1]
            phones_list = []
            phones_list.append(merge_list)

        for part_phones_list in phones_list:
            phone_ids = self._p2id(part_phones_list)
            if to_tensor:
                phone_ids = paddle.to_tensor(phone_ids)
            temp_phone_ids.append(phone_ids)

        result = {}
        result["phone_ids"] = temp_phone_ids

        return result

    def numericalize(self, phonemes):
        """ Convert pronunciation sequence into pronunciation id sequence.
        Args:
            phonemes (List[str]): The list of pronunciation sequence.
        Returns: 
            List[int]: The list of pronunciation id sequence.
        """
        ids = [
            self.vocab.lookup(item) for item in phonemes
            if item in self.vocab.stoi
        ]
        return ids

    def reverse(self, ids):
        """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
        Args:
            ids (List[int]): The list of pronunciation id sequence.
        Returns: 
            List[str]: The list of pronunciation sequence.
        """
        return [self.vocab.reverse(i) for i in ids]

    def __call__(self, sentence):
        """ Convert the input text sequence into pronunciation id sequence.
        Args:
            sentence(str): The input text sequence.
        Returns: 
            List[str]: The list of pronunciation id sequence.
        """
        return self.numericalize(self.phoneticize(sentence))

    @property
    def vocab_size(self):
        """ Vocab size.
        """
        return len(self.vocab)


class EnglishCharacter(Phonetics):
    """ Normalize the input text sequence and convert it into character id sequence.
    """

    def __init__(self):
        self.backend = G2p()
        self.graphemes = list(self.backend.graphemes)
        self.punctuations = get_punctuations("en")
        self.vocab = Vocab(self.graphemes + self.punctuations)

    def phoneticize(self, sentence):
        """ Normalize the input text sequence.
        Args:
            sentence(str): The input text sequence.
        Returns:
            str: A text sequence after normalize.
        """
        words = normalize(sentence)
        return words

    def numericalize(self, sentence):
        """ Convert a text sequence into ids.
        Args:
            sentence (str): The input text sequence.
        Returns:
            List[int]:
                List of a character id sequence.
        """
        ids = [
            self.vocab.lookup(item) for item in sentence
            if item in self.vocab.stoi
        ]
        return ids

    def reverse(self, ids):
        """ Convert a character id sequence into text.
        Args:
            ids (List[int]): List of a character id sequence.
        Returns:
            str: The input text sequence.
        """
        return [self.vocab.reverse(i) for i in ids]

    def __call__(self, sentence):
        """ Normalize the input text sequence and convert it into character id sequence.
        Args:
            sentence (str): The input text sequence.
        Returns: 
            List[int]: List of a character id sequence.
        """
        return self.numericalize(self.phoneticize(sentence))

    @property
    def vocab_size(self):
        """ Vocab size.
        """
        return len(self.vocab)


class Chinese(Phonetics):
    """Normalize Chinese text sequence and convert it into ids.
    """

    def __init__(self):
        # self.opencc_backend = OpenCC('t2s.json')
        self.backend = G2pM()
        self.phonemes = self._get_all_syllables()
        self.punctuations = get_punctuations("cn")
        self.vocab = Vocab(self.phonemes + self.punctuations)

    def _get_all_syllables(self):
        all_syllables = set([
            syllable for k, v in self.backend.cedict.items() for syllable in v
        ])
        return list(all_syllables)

    def phoneticize(self, sentence):
        """ Normalize the input text sequence and convert it into pronunciation sequence.
        Args:
            sentence(str): The input text sequence.
        Returns: 
            List[str]: The list of pronunciation sequence.
        """
        # simplified = self.opencc_backend.convert(sentence)
        simplified = sentence
        phonemes = self.backend(simplified)
        start = self.vocab.start_symbol
        end = self.vocab.end_symbol
        phonemes = ([] if start is None else [start]) \
                   + phonemes \
                   + ([] if end is None else [end])
        return self._filter_symbols(phonemes)

    def _filter_symbols(self, phonemes):
        cleaned_phonemes = []
        for item in phonemes:
            if item in self.vocab.stoi:
                cleaned_phonemes.append(item)
            else:
                for char in item:
                    if char in self.vocab.stoi:
                        cleaned_phonemes.append(char)
        return cleaned_phonemes

    def numericalize(self, phonemes):
        """ Convert pronunciation sequence into pronunciation id sequence.
        Args:
            phonemes(List[str]): The list of pronunciation sequence.
        Returns:
                List[int]: The list of pronunciation id sequence.
        """
        ids = [self.vocab.lookup(item) for item in phonemes]
        return ids

    def __call__(self, sentence):
        """ Convert the input text sequence into pronunciation id sequence.
        Args:
            sentence (str): The input text sequence.
        Returns:
            List[str]: The list of pronunciation id sequence.
        """
        return self.numericalize(self.phoneticize(sentence))

    @property
    def vocab_size(self):
        """ Vocab size.
        """
        return len(self.vocab)

    def reverse(self, ids):
        """ Reverse the list of pronunciation id sequence to a list of pronunciation sequence.
        Args:
        ids (List[int]): The list of pronunciation id sequence.
        Returns: 
            List[str]: The list of pronunciation sequence.
        """
        return [self.vocab.reverse(i) for i in ids]


================================================
FILE: paddlespeech/t2s/frontend/polyphonic.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

import yaml


class Polyphonic():
    def __init__(self):
        with open(
                os.path.join(
                    os.path.dirname(os.path.abspath(__file__)),
                    'polyphonic.yaml'),
                'r',
                encoding='utf-8') as polyphonic_file:
            # 解析yaml
            polyphonic_dict = yaml.load(polyphonic_file, Loader=yaml.FullLoader)
        self.polyphonic_words = polyphonic_dict["polyphonic"]

    def correct_pronunciation(self, word, pinyin):
        # 词汇被词典收录则返回纠正后的读音
        if word in self.polyphonic_words.keys():
            pinyin = self.polyphonic_words[word]
        # 否则返回原读音
        return pinyin


================================================
FILE: paddlespeech/t2s/frontend/polyphonic.yaml
================================================
polyphonic:
    湖泊: ['hu2','po1']
    地壳: ['di4','qiao4']
    柏树: ['bai3','shu4']
    曝光: ['bao4','guang1']
    弹力: ['tan2','li4']
    字帖: ['zi4','tie4']
    口吃: ['kou3','chi1']
    包扎: ['bao1','za1']
    哪吒: ['ne2','zha1']
    说服: ['shuo1','fu2']
    识字: ['shi2','zi4']
    骨头: ['gu3','tou5']
    对称: ['dui4','chen4']
    口供: ['kou3','gong4']
    抹布: ['ma1','bu4']
    露背: ['lu4','bei4']
    圈养: ['juan4', 'yang3']
    眼眶: ['yan3', 'kuang4']
    品行: ['pin3','xing2']
    颤抖: ['chan4','dou3']
    差不多: ['cha4','bu5','duo1']
    鸭绿江: ['ya1','lu4','jiang1']
    撒切尔: ['sa4','qie4','er3']
    比比皆是: ['bi3','bi3','jie1','shi4']
    身无长物: ['shen1','wu2','chang2','wu4']
    手里: ['shou2','li3']
    关卡: ['guan1','qia3']
    怀揣: ['huai2','chuai1']
    挑剔: ['tiao1','ti4']
    供称: ['gong4','cheng1']
    作坊: ['zuo1', 'fang5']
    中医: ['zhong1','yi1']
    嚷嚷: ['rang1','rang5']
    商厦: ['shang1','sha4']
    大厦: ['da4','sha4']
    刹车: ['sha1','che1']
    嘚瑟: ['de4','se5']
    朝鲜: ['chao2','xian3']
    阿房宫: ['e1','pang2','gong1']
    阿胶: ['e1','jiao1']
    咖喱: ['ga1','li5']
    时分: ['shi2','fen1']
    蚌埠: ['beng4','bu4']
    驯服: ['xun4','fu2']
    幸免于难: ['xing4','mian3','yu2','nan4']
    恶行: ['e4','xing2']
    唉: ['ai4']
    扎实: ['zha1','shi2']
    干将: ['gan4','jiang4']
    陈威行: ['chen2', 'wei1', 'hang2']
    郭晟: ['guo1', 'sheng4']
    中标: ['zhong4', 'biao1']
    抗住: ['kang2', 'zhu4']

================================================
FILE: paddlespeech/t2s/frontend/punctuation.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = ["get_punctuations"]

EN_PUNCT = [
    " ",
    "-",
    "...",
    ",",
    ".",
    "?",
    "!",
]

CN_PUNCT = ["、", "，", "；", "：", "。", "？", "！"]


def get_punctuations(lang):
    if lang == "en":
        return EN_PUNCT
    elif lang == "cn":
        return CN_PUNCT
    else:
        raise ValueError(f"language {lang} Not supported")


================================================
FILE: paddlespeech/t2s/frontend/rhy_prediction/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .rhy_predictor import *


================================================
FILE: paddlespeech/t2s/frontend/rhy_prediction/rhy_predictor.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re

import paddle
import yaml
from paddlenlp.transformers import ErnieTokenizer
from yacs.config import CfgNode

from paddlespeech.cli.utils import download_and_decompress
from paddlespeech.resource.pretrained_models import rhy_frontend_models
from paddlespeech.text.models.ernie_linear import ErnieLinear
from paddlespeech.utils.env import MODEL_HOME

DefinedClassifier = {
    'ErnieLinear': ErnieLinear,
}

model_version = '1.0'


class RhyPredictor():
    def __init__(
            self,
            model_dir: os.PathLike=MODEL_HOME, ):
        uncompress_path = download_and_decompress(
            rhy_frontend_models['rhy_e2e'][model_version], model_dir)
        with open(os.path.join(uncompress_path, 'rhy_default.yaml')) as f:
            config = CfgNode(yaml.safe_load(f))
        self.punc_list = []
        with open(os.path.join(uncompress_path, 'rhy_token'), 'r') as f:
            for line in f:
                self.punc_list.append(line.strip())
        self.punc_list = [0] + self.punc_list
        self.make_rhy_dict()
        self.model = DefinedClassifier["ErnieLinear"](**config["model"])
        pretrained_token = config['data_params']['pretrained_token']
        self.tokenizer = ErnieTokenizer.from_pretrained(pretrained_token)
        state_dict = paddle.load(
            os.path.join(uncompress_path, 'snapshot_iter_2600_main_params.pdz'))
        self.model.set_state_dict(state_dict)
        self.model.eval()

    def _clean_text(self, text):
        text = text.lower()
        text = re.sub('[^A-Za-z0-9\u4e00-\u9fa5]', '', text)
        text = re.sub(f'[{"".join([p for p in self.punc_list][1:])}]', '', text)
        return text

    def preprocess(self, text, tokenizer):
        clean_text = self._clean_text(text)
        assert len(clean_text) > 0, f'Invalid input string: {text}'
        tokenized_input = tokenizer(
            list(clean_text), return_length=True, is_split_into_words=True)
        _inputs = dict()
        _inputs['input_ids'] = tokenized_input['input_ids']
        _inputs['seg_ids'] = tokenized_input['token_type_ids']
        _inputs['seq_len'] = tokenized_input['seq_len']
        return _inputs

    def get_prediction(self, raw_text):
        _inputs = self.preprocess(raw_text, self.tokenizer)
        seq_len = _inputs['seq_len']
        input_ids = paddle.to_tensor(_inputs['input_ids']).unsqueeze(0)
        seg_ids = paddle.to_tensor(_inputs['seg_ids']).unsqueeze(0)
        logits, _ = self.model(input_ids, seg_ids)
        preds = paddle.argmax(logits, axis=-1).squeeze(0)
        tokens = self.tokenizer.convert_ids_to_tokens(
            _inputs['input_ids'][1:seq_len - 1])
        labels = preds[1:seq_len - 1].tolist()
        assert len(tokens) == len(labels)
        # add 0 for non punc
        text = ''
        for t, l in zip(tokens, labels):
            text += t
            if l != 0:  # Non punc.
                text += self.punc_list[l]
        return text

    def make_rhy_dict(self):
        self.rhy_dict = {}
        for i, p in enumerate(self.punc_list[1:]):
            self.rhy_dict[p] = 'sp' + str(i + 1)

    def pinyin_align(self, pinyins, rhy_pre):
        final_py = []
        j = 0
        for i in range(len(rhy_pre)):
            if rhy_pre[i] in self.rhy_dict:
                final_py.append(self.rhy_dict[rhy_pre[i]])
            else:
                final_py.append(pinyins[j])
                j += 1
        return final_py


================================================
FILE: paddlespeech/t2s/frontend/sing_frontend.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from typing import Dict
from typing import List

import librosa
import numpy as np
import paddle
from pypinyin import lazy_pinyin


class SingFrontend():
    def __init__(self, pinyin_phone_path: str, phone_vocab_path: str):
        """SVS Frontend

        Args:
            pinyin_phone_path (str): pinyin to phone file path, a 'pinyin|phones' (like: ba|b a ) pair per line.
            phone_vocab_path (str): phone to phone id file path, a 'phone phone id' (like: a 4 ) pair per line.
        """
        self.punc = '[、：，；。？！“”‘’\':,;.?!]'

        self.pinyin_phones = {'AP': 'AP', 'SP': 'SP'}
        if pinyin_phone_path:
            with open(pinyin_phone_path, 'rt', encoding='utf-8') as f:
                for line in f.readlines():
                    pinyin_phn = [
                        x.strip() for x in line.split('|') if x.strip() != ''
                    ]
                    self.pinyin_phones[pinyin_phn[0]] = pinyin_phn[1]

        self.vocab_phones = {}
        if phone_vocab_path:
            with open(phone_vocab_path, 'rt', encoding='utf-8') as f:
                phn_id = [line.strip().split() for line in f.readlines()]
            for phn, id in phn_id:
                self.vocab_phones[phn] = int(id)

    def get_phones(self, sentence: str) -> List[int]:
        """get phone list

        Args:
            sentence (str): sentence

        Returns:
            List[int]: phones list

        Example:
            sentence = "你好"
            phones = ['n i', 'h ao']
        """
        # remove all punc
        sentence = re.sub(self.punc, "", sentence)

        # Pypinyin can't solve polyphonic words
        sentence = sentence.replace('最长', '最常').replace('长睫毛', '常睫毛') \
            .replace('那么长', '那么常').replace('多长', '多常') \
            .replace('很长', '很常')

        # lyric
        pinyins = lazy_pinyin(sentence, strict=False)
        # replace unk word with SP
        pinyins = [
            pinyin if pinyin in self.pinyin_phones.keys() else "SP"
            for pinyin in pinyins
        ]
        phones = [
            self.pinyin_phones[pinyin.strip()] for pinyin in pinyins
            if pinyin.strip() in self.pinyin_phones
        ]

        return phones

    def get_note_info(self, note_info: str) -> List[str]:
        note_info = [x.strip() for x in note_info.split('|') if x.strip() != '']
        return note_info

    def process(
            self,
            phones: List[int],
            notes: List[str],
            note_durs: List[float], ) -> Dict[str, List[paddle.Tensor]]:
        new_phones = []
        new_notes = []
        new_note_durs = []
        is_slurs = []
        assert len(phones) == len(notes) == len(
            note_durs
        ), "Please check the input, text, notes, note_durs should be the same length."
        for i in range(len(phones)):
            phone = phones[i].split()
            note = notes[i].split()
            note_dur = note_durs[i].split()

            for phn in phone:
                new_phones.append(phn)
                new_notes.append(note[0])
                new_note_durs.append(note_dur[0])
                is_slurs.append(0)

            if len(note) > 1:
                for i in range(1, len(note)):
                    new_phones.append(phone[-1])
                    new_notes.append(note[i])
                    new_note_durs.append(note_dur[i])
                    is_slurs.append(1)

        return new_phones, new_notes, new_note_durs, is_slurs

    def get_input_ids(self, svs_input: Dict[str, str],
                      to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]:
        """convert input to int/float.

        Args:
            svs_input (Dict[str, str]): include keys: if input_type is phones, phones, notes, note_durs and is_slurs are needed.
            if  input_type is word, text, notes, and note_durs sre needed.
            to_tensor (bool, optional): whether to convert to Tensor. Defaults to True.

        Returns:
            Dict[str, List[paddle.Tensor]]: result include phone_ids, note_ids, note_durs, is_slurs.
        """
        result = {}
        input_type = svs_input['input_type']
        if input_type == 'phoneme':
            assert "phones" in svs_input.keys() and "notes" in svs_input.keys() and "note_durs" in svs_input.keys() and "is_slurs" in svs_input.keys(), \
                "When input_type is phoneme, phones, notes, note_durs, is_slurs should be in the svs_input."
            phones = svs_input["phones"].split()
            notes = svs_input["notes"].split()
            note_durs = svs_input["note_durs"].split()
            is_slurs = svs_input["is_slurs"].split()
            assert len(phones) == len(notes) == len(note_durs) == len(
                is_slurs
            ), "Please check the input, phones, notes, note_durs is_slurs should be the same length."
        elif input_type == "word":
            assert "text" in svs_input.keys() and "notes" in svs_input.keys() and "note_durs" in svs_input.keys(), \
                "When input_type is word, text, notes, note_durs, should be in the svs_input."
            phones = self.get_phones(svs_input['text'])
            notes = self.get_note_info(svs_input['notes'])
            note_durs = self.get_note_info(svs_input['note_durs'])
            phones, notes, note_durs, is_slurs = self.process(
                phones=phones, notes=notes, note_durs=note_durs)

        phone_ids = [self.vocab_phones[phn] for phn in phones]
        phone_ids = np.array(phone_ids, np.int64)
        note_ids = [
            librosa.note_to_midi(note.split("/")[0]) if note != 'rest' else 0
            for note in notes
        ]
        note_ids = np.array(note_ids, np.int64)
        note_durs = np.array(note_durs, np.float32)
        is_slurs = np.array(is_slurs, np.int64)

        if to_tensor:
            phone_ids = paddle.to_tensor(phone_ids)
            note_ids = paddle.to_tensor(note_ids)
            note_durs = paddle.to_tensor(note_durs)
            is_slurs = paddle.to_tensor(is_slurs)

        result['phone_ids'] = [phone_ids]
        result['note_ids'] = [note_ids]
        result['note_durs'] = [note_durs]
        result['is_slurs'] = [is_slurs]

        return result


================================================
FILE: paddlespeech/t2s/frontend/ssml/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .xml_processor import *


================================================
FILE: paddlespeech/t2s/frontend/ssml/xml_processor.py
================================================
# -*- coding: utf-8 -*-
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import xml.dom.minidom
import xml.parsers.expat
from xml.dom.minidom import Node
from xml.dom.minidom import parseString
'''
Note:  xml 有5种特殊字符， &<>"'
其一，采用<![CDATA[ ]]>特殊标签，将包含特殊字符的字符串封装起来。
例如：
<TitleName><![CDATA["姓名"]]></TitleName>
其二，使用XML转义序列表示这些特殊的字符，这5个特殊字符所对应XML转义序列为：
&  &amp;
<  &lt;
>  &gt;
"  &quot;
'  &apos;
例如：
<TitleName>&quot;姓名&quot;</TitleName>
'''


class MixTextProcessor():
    def __repr__(self):
        print("@an MixTextProcessor class")

    def get_xml_content(self, mixstr):
        '''返回字符串的 xml 内容'''
        xmlptn = re.compile(r"<speak>.*?</speak>", re.M | re.S)
        ctn = re.search(xmlptn, mixstr)
        if ctn:
            return ctn.group(0)
        else:
            return None

    def get_content_split(self, mixstr):
        ''' 文本分解，顺序加了列表中，按非 xml 和 xml 分开，对应的字符串,带标点符号
        不能去除空格，因为 xml 中tag 属性带空格
        '''
        ctlist = []
        # print("Testing:",mixstr[:20])
        patn = re.compile(r'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$', re.M | re.S)
        mat = re.match(patn, mixstr)
        if mat:
            pre_xml = mat.group(1)
            in_xml = mat.group(2)
            after_xml = mat.group(3)

            ctlist.append(pre_xml)
            ctlist.append(in_xml)
            ctlist.append(after_xml)
            return ctlist
        else:
            ctlist.append(mixstr)
        return ctlist

    @classmethod
    def get_pinyin_split(self, mixstr):
        ctlist = []
        patn = re.compile(r'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$', re.M | re.S)
        mat = re.match(patn, mixstr)
        if mat:
            # pre <speak>
            pre_xml = mat.group(1)
            # between <speak> ... </speak>
            in_xml = mat.group(2)
            # post </speak>
            after_xml = mat.group(3)

            # pre with none syllable
            if pre_xml:
                ctlist.append([pre_xml, []])

            # between with syllable
            # [(sub sentence, [syllables]), ...]
            dom = DomXml(in_xml)
            pinyinlist = dom.get_pinyins_for_xml()
            ctlist = ctlist + pinyinlist

            # post with none syllable
            if after_xml:
                ctlist.append([after_xml, []])
        else:
            ctlist.append([mixstr, []])

        return ctlist

    @classmethod
    def get_dom_split(self, mixstr):
        ''' 文本分解，顺序加了列表中，返回文本和say-as标签
        '''
        ctlist = []
        patn = re.compile(r'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$', re.M | re.S)
        mat = re.match(patn, mixstr)
        if mat:
            pre_xml = mat.group(1)
            in_xml = mat.group(2)
            after_xml = mat.group(3)

            if pre_xml:
                ctlist.append(pre_xml)

            dom = DomXml(in_xml)
            tags = dom.get_text_and_sayas_tags()
            ctlist.extend(tags)

            if after_xml:
                ctlist.append(after_xml)
        else:
            ctlist.append(mixstr)

        return ctlist


class DomXml():
    def __init__(self, xmlstr):
        self.tdom = parseString(xmlstr)  #Document
        self.root = self.tdom.documentElement  #Element
        self.rnode = self.tdom.childNodes  #NodeList

    def get_text(self):
        '''返回 xml 内容的所有文本内容的列表'''
        res = []

        for x1 in self.rnode:
            if x1.nodeType == Node.TEXT_NODE:
                res.append(x1.value)
            else:
                for x2 in x1.childNodes:
                    if isinstance(x2, xml.dom.minidom.Text):
                        res.append(x2.data)
                    else:
                        for x3 in x2.childNodes:
                            if isinstance(x3, xml.dom.minidom.Text):
                                res.append(x3.data)
                            else:
                                print("len(nodes of x3):", len(x3.childNodes))

        return res

    def get_xmlchild_list(self):
        '''返回 xml 内容的列表，包括所有文本内容(不带 tag)'''
        res = []

        for x1 in self.rnode:
            if x1.nodeType == Node.TEXT_NODE:
                res.append(x1.value)
            else:
                for x2 in x1.childNodes:
                    if isinstance(x2, xml.dom.minidom.Text):
                        res.append(x2.data)
                    else:
                        for x3 in x2.childNodes:
                            if isinstance(x3, xml.dom.minidom.Text):
                                res.append(x3.data)
                            else:
                                print("len(nodes of x3):", len(x3.childNodes))
        print(res)
        return res

    def get_pinyins_for_xml(self):
        '''返回 xml 内容，字符串和拼音的 list '''
        res = []

        for x1 in self.rnode:
            if x1.nodeType == Node.TEXT_NODE:
                t = re.sub(r"\s+", "", x1.value)
                res.append([t, []])
            else:
                for x2 in x1.childNodes:
                    if isinstance(x2, xml.dom.minidom.Text):
                        t = re.sub(r"\s+", "", x2.data)
                        res.append([t, []])
                    else:
                        # print("x2",x2,x2.tagName)
                        if x2.hasAttribute('pinyin'):
                            pinyin_value = x2.getAttribute("pinyin")
                            pinyins = pinyin_value.split(" ")
                        for x3 in x2.childNodes:
                            # print('x3',x3)
                            if isinstance(x3, xml.dom.minidom.Text):
                                t = re.sub(r"\s+", "", x3.data)
                                res.append([t, pinyins])
                            else:
                                print("len(nodes of x3):", len(x3.childNodes))

        return res

    def get_all_tags(self, tag_name):
        '''获取所有的 tag 及属性值'''
        alltags = self.root.getElementsByTagName(tag_name)
        for x in alltags:
            if x.hasAttribute('pinyin'):  # pinyin
                print(x.tagName, 'pinyin',
                      x.getAttribute('pinyin'), x.firstChild.data)

    def get_text_and_sayas_tags(self):
        '''返回 xml 内容的列表，包括所有文本内容和<say-as> tag'''
        res = []

        for x1 in self.rnode:
            if x1.nodeType == Node.TEXT_NODE:
                res.append(x1.value)
            else:
                for x2 in x1.childNodes:
                    res.append(x2.toxml())
        return res


================================================
FILE: paddlespeech/t2s/frontend/tone_sandhi.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List
from typing import Tuple

import jieba
from pypinyin import lazy_pinyin
from pypinyin import Style


class ToneSandhi():
    def __repr__(self):
        return "MandarinToneSandhi"

    def __init__(self):
        self.must_neural_tone_words = {
            '麻烦', '麻利', '鸳鸯', '高粱', '骨头', '骆驼', '马虎', '首饰', '馒头', '馄饨', '风筝',
            '难为', '队伍', '阔气', '闺女', '门道', '锄头', '铺盖', '铃铛', '铁匠', '钥匙', '里脊',
            '里头', '部分', '那么', '道士', '造化', '迷糊', '连累', '这么', '这个', '运气', '过去',
            '软和', '转悠', '踏实', '跳蚤', '跟头', '趔趄', '财主', '豆腐', '讲究', '记性', '记号',
            '认识', '规矩', '见识', '裁缝', '补丁', '衣裳', '衣服', '衙门', '街坊', '行李', '行当',
            '蛤蟆', '蘑菇', '薄荷', '葫芦', '葡萄', '萝卜', '荸荠', '苗条', '苗头', '苍蝇', '芝麻',
            '舒服', '舒坦', '舌头', '自在', '膏药', '脾气', '脑袋', '脊梁', '能耐', '胳膊', '胭脂',
            '胡萝', '胡琴', '胡同', '聪明', '耽误', '耽搁', '耷拉', '耳朵', '老爷', '老实', '老婆',
            '戏弄', '将军', '翻腾', '罗嗦', '罐头', '编辑', '结实', '红火', '累赘', '糨糊', '糊涂',
            '精神', '粮食', '簸箕', '篱笆', '算计', '算盘', '答应', '笤帚', '笑语', '笑话', '窟窿',
            '窝囊', '窗户', '稳当', '稀罕', '称呼', '秧歌', '秀气', '秀才', '福气', '祖宗', '砚台',
            '码头', '石榴', '石头', '石匠', '知识', '眼睛', '眯缝', '眨巴', '眉毛', '相声', '盘算',
            '白净', '痢疾', '痛快', '疟疾', '疙瘩', '疏忽', '畜生', '生意', '甘蔗', '琵琶', '琢磨',
            '琉璃', '玻璃', '玫瑰', '玄乎', '狐狸', '状元', '特务', '牲口', '牙碜', '牌楼', '爽快',
            '爱人', '热闹', '烧饼', '烟筒', '烂糊', '点心', '炊帚', '灯笼', '火候', '漂亮', '滑溜',
            '溜达', '温和', '清楚', '消息', '浪头', '活泼', '比方', '正经', '欺负', '模糊', '槟榔',
            '棺材', '棒槌', '棉花', '核桃', '栅栏', '柴火', '架势', '枕头', '枇杷', '机灵', '本事',
            '木头', '木匠', '朋友', '月饼', '月亮', '暖和', '明白', '时候', '新鲜', '故事', '收拾',
            '收成', '提防', '挖苦', '挑剔', '指甲', '指头', '拾掇', '拳头', '拨弄', '招牌', '招呼',
            '抬举', '护士', '折腾', '扫帚', '打量', '打算', '打扮', '打听', '打发', '扎实', '扁担',
            '戒指', '懒得', '意识', '意思', '悟性', '怪物', '思量', '怎么', '念头', '念叨', '别人',
            '快活', '忙活', '志气', '心思', '得罪', '张罗', '弟兄', '开通', '应酬', '庄稼', '干事',
            '帮手', '帐篷', '希罕', '师父', '师傅', '巴结', '巴掌', '差事', '工夫', '岁数', '屁股',
            '尾巴', '少爷', '小气', '小伙', '将就', '对头', '对付', '寡妇', '家伙', '客气', '实在',
            '官司', '学问', '字号', '嫁妆', '媳妇', '媒人', '婆家', '娘家', '委屈', '姑娘', '姐夫',
            '妯娌', '妥当', '妖精', '奴才', '女婿', '头发', '太阳', '大爷', '大方', '大意', '大夫',
            '多少', '多么', '外甥', '壮实', '地道', '地方', '在乎', '困难', '嘴巴', '嘱咐', '嘟囔',
            '嘀咕', '喜欢', '喇嘛', '喇叭', '商量', '唾沫', '哑巴', '哈欠', '哆嗦', '咳嗽', '和尚',
            '告诉', '告示', '含糊', '吓唬', '后头', '名字', '名堂', '合同', '吆喝', '叫唤', '口袋',
            '厚道', '厉害', '千斤', '包袱', '包涵', '匀称', '勤快', '动静', '动弹', '功夫', '力气',
            '前头', '刺猬', '刺激', '别扭', '利落', '利索', '利害', '分析', '出息', '凑合', '凉快',
            '冷战', '冤枉', '冒失', '养活', '关系', '先生', '兄弟', '便宜', '使唤', '佩服', '作坊',
            '体面', '位置', '似的', '伙计', '休息', '什么', '人家', '亲戚', '亲家', '交情', '云彩',
            '事情', '买卖', '主意', '丫头', '丧气', '两口', '东西', '东家', '世故', '不由', '下水',
            '下巴', '上头', '上司', '丈夫', '丈人', '一辈', '那个', '菩萨', '父亲', '母亲', '咕噜',
            '邋遢', '费用', '冤家', '甜头', '介绍', '荒唐', '大人', '泥鳅', '幸福', '熟悉', '计划',
            '扑腾', '蜡烛', '姥爷', '照顾', '喉咙', '吉他', '弄堂', '蚂蚱', '凤凰', '拖沓', '寒碜',
            '糟蹋', '倒腾', '报复', '逻辑', '盘缠', '喽啰', '牢骚', '咖喱', '扫把', '惦记'
        }
        self.must_not_neural_tone_words = {
            '男子', '女子', '分子', '原子', '量子', '莲子', '石子', '瓜子', '电子', '人人', '虎虎',
            '幺幺', '干嘛', '学子', '哈哈', '数数', '袅袅', '局地', '以下', '娃哈哈', '花花草草', '留得',
            '耕地', '想想', '熙熙', '攘攘', '卵子', '死死', '冉冉', '恳恳', '佼佼', '吵吵', '打打',
            '考考', '整整', '莘莘', '落地', '算子', '家家户户', '青青'
        }
        self.punc = "、：，；。？！“”‘’':,;.?!"

    def _split_word(self, word: str) -> List[str]:
        word_list = jieba.cut_for_search(word)
        word_list = sorted(word_list, key=lambda i: len(i), reverse=False)
        first_subword = word_list[0]
        first_begin_idx = word.find(first_subword)
        if first_begin_idx == 0:
            second_subword = word[len(first_subword):]
            new_word_list = [first_subword, second_subword]
        else:
            second_subword = word[:-len(first_subword)]
            new_word_list = [second_subword, first_subword]
        return new_word_list

    # the meaning of jieba pos tag: https://blog.csdn.net/weixin_44174352/article/details/113731041
    # e.g.
    # word: "家里"
    # pos: "s"
    # finals: ['ia1', 'i3']
    def _neural_sandhi(self, word: str, pos: str,
                       finals: List[str]) -> List[str]:
        if word in self.must_not_neural_tone_words:
            return finals
        # reduplication words for n. and v. e.g. 奶奶, 试试, 旺旺
        for j, item in enumerate(word):
            if j - 1 >= 0 and item == word[j - 1] and pos[0] in {"n", "v", "a"}:
                finals[j] = finals[j][:-1] + "5"
        ge_idx = word.find("个")
        if len(word) >= 1 and word[-1] in "吧呢啊呐噻嘛吖嗨呐哦哒滴哩哟喽啰耶喔诶":
            finals[-1] = finals[-1][:-1] + "5"
        elif len(word) >= 1 and word[-1] in "的地得":
            finals[-1] = finals[-1][:-1] + "5"
        # e.g. 走了, 看着, 去过
        elif len(word) == 1 and word in "了着过" and pos in {"ul", "uz", "ug"}:
            finals[-1] = finals[-1][:-1] + "5"
        elif len(word) > 1 and word[-1] in "们子" and pos in {"r", "n"}:
            finals[-1] = finals[-1][:-1] + "5"
        # e.g. 桌上, 地下
        elif len(word) > 1 and word[-1] in "上下" and pos in {"s", "l", "f"}:
            finals[-1] = finals[-1][:-1] + "5"
        # e.g. 上来, 下去
        elif len(word) > 1 and word[-1] in "来去" and word[-2] in "上下进出回过起开":
            finals[-1] = finals[-1][:-1] + "5"
        # 个做量词
        elif (ge_idx >= 1 and
              (word[ge_idx - 1].isnumeric() or
               word[ge_idx - 1] in "几有两半多各整每做是")) or word == '个':
            finals[ge_idx] = finals[ge_idx][:-1] + "5"
        else:
            if word in self.must_neural_tone_words or word[
                    -2:] in self.must_neural_tone_words:
                finals[-1] = finals[-1][:-1] + "5"

        word_list = self._split_word(word)
        finals_list = [finals[:len(word_list[0])], finals[len(word_list[0]):]]
        for i, word in enumerate(word_list):
            # conventional neural in Chinese
            if word in self.must_neural_tone_words or word[
                    -2:] in self.must_neural_tone_words:
                finals_list[i][-1] = finals_list[i][-1][:-1] + "5"
        finals = sum(finals_list, [])
        return finals

    def _bu_sandhi(self, word: str, finals: List[str]) -> List[str]:
        # e.g. 看不懂
        if len(word) == 3 and word[1] == "不":
            finals[1] = finals[1][:-1] + "5"
        else:
            for i, char in enumerate(word):
                # "不" before tone4 should be bu2, e.g. 不怕
                if char == "不" and i + 1 < len(word) and finals[i +
                                                                1][-1] == "4":
                    finals[i] = finals[i][:-1] + "2"
        return finals

    def _yi_sandhi(self, word: str, finals: List[str]) -> List[str]:
        # "一" in number sequences, e.g. 一零零, 二一零
        if word.find("一") != -1 and all(
            [item.isnumeric() for item in word if item != "一"]):
            return finals
        # "一" between reduplication words shold be yi5, e.g. 看一看
        elif len(word) == 3 and word[1] == "一" and word[0] == word[-1]:
            finals[1] = finals[1][:-1] + "5"
        # when "一" is ordinal word, it should be yi1
        elif word.startswith("第一"):
            finals[1] = finals[1][:-1] + "1"
        else:
            for i, char in enumerate(word):
                if char == "一" and i + 1 < len(word):
                    # "一" before tone4 should be yi2, e.g. 一段
                    if finals[i + 1][-1] in {'4', '5'}:
                        finals[i] = finals[i][:-1] + "2"
                    # "一" before non-tone4 should be yi4, e.g. 一天
                    else:
                        # "一" 后面如果是标点，还读一声
                        if word[i + 1] not in self.punc:
                            finals[i] = finals[i][:-1] + "4"
        return finals

    def _all_tone_three(self, finals: List[str]) -> bool:
        return all(x[-1] == "3" for x in finals)

    def _three_sandhi(self, word: str, finals: List[str]) -> List[str]:

        if len(word) == 2 and self._all_tone_three(finals):
            finals[0] = finals[0][:-1] + "2"
        elif len(word) == 3:
            word_list = self._split_word(word)
            if self._all_tone_three(finals):
                #  disyllabic + monosyllabic, e.g. 蒙古/包
                if len(word_list[0]) == 2:
                    finals[0] = finals[0][:-1] + "2"
                    finals[1] = finals[1][:-1] + "2"
                #  monosyllabic + disyllabic, e.g. 纸/老虎
                elif len(word_list[0]) == 1:
                    finals[1] = finals[1][:-1] + "2"
            else:
                finals_list = [
                    finals[:len(word_list[0])], finals[len(word_list[0]):]
                ]
                if len(finals_list) == 2:
                    for i, sub in enumerate(finals_list):
                        # e.g. 所有/人
                        if self._all_tone_three(sub) and len(sub) == 2:
                            finals_list[i][0] = finals_list[i][0][:-1] + "2"
                        # e.g. 好/喜欢
                        elif i == 1 and not self._all_tone_three(sub) and finals_list[i][0][-1] == "3" and \
                                finals_list[0][-1][-1] == "3":

                            finals_list[0][-1] = finals_list[0][-1][:-1] + "2"
                        finals = sum(finals_list, [])
        # split idiom into two words who's length is 2
        elif len(word) == 4:
            finals_list = [finals[:2], finals[2:]]
            finals = []
            for sub in finals_list:
                if self._all_tone_three(sub):
                    sub[0] = sub[0][:-1] + "2"
                finals += sub

        return finals

    # merge "不" and the word behind it
    # if don't merge, "不" sometimes appears alone according to jieba, which may occur sandhi error
    def _merge_bu(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        new_seg = []
        last_word = ""
        for word, pos in seg:
            if last_word == "不":
                word = last_word + word
            if word != "不":
                new_seg.append((word, pos))
            last_word = word[:]
        if last_word == "不":
            new_seg.append((last_word, 'd'))
            last_word = ""
        return new_seg

    # function 1: merge "一" and reduplication words in it's left and right, e.g. "听","一","听" ->"听一听"
    # function 2: merge single  "一" and the word behind it
    # if don't merge, "一" sometimes appears alone according to jieba, which may occur sandhi error
    # e.g.
    # input seg: [('听', 'v'), ('一', 'm'), ('听', 'v')]
    # output seg: [['听一听', 'v']]
    def _merge_yi(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        new_seg = []
        skip_next = False
        # function 1
        for i, (word, pos) in enumerate(seg):
            if skip_next:
                skip_next = False
                continue
            if i - 1 >= 0 and word == "一" and i + 1 < len(seg) and seg[i - 1][
                    0] == seg[i + 1][0] and seg[i - 1][1] == "v":
                new_seg[-1] = (new_seg[-1][0] + "一" + seg[i + 1][0],
                               new_seg[-1][1])
                skip_next = True
            else:
                new_seg.append((word, pos))
        seg = new_seg
        new_seg = []
        # function 2
        for i, (word, pos) in enumerate(seg):
            if new_seg and new_seg[-1][0] == "一":
                new_seg[-1] = (new_seg[-1][0] + word, new_seg[-1][1])
            else:
                new_seg.append((word, pos))
        return new_seg

    # the first and the second words are all_tone_three
    def _merge_continuous_three_tones(
            self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        new_seg = []
        sub_finals_list = []
        for (word, pos) in seg:
            orig_finals = lazy_pinyin(
                word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
            # after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
            en_index = [index for index, c in enumerate(word) if c == "嗯"]
            for i in en_index:
                orig_finals[i] = "n2"
            sub_finals_list.append(orig_finals)

        assert len(sub_finals_list) == len(seg)
        merge_last = [False] * len(seg)
        for i, (word, pos) in enumerate(seg):
            if i - 1 >= 0 and self._all_tone_three(
                    sub_finals_list[i - 1]) and self._all_tone_three(
                        sub_finals_list[i]) and not merge_last[i - 1]:
                # if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
                if not self._is_reduplication(seg[i - 1][0]) and len(
                        seg[i - 1][0]) + len(seg[i][0]) <= 3:
                    new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
                    merge_last[i] = True
                else:
                    new_seg.append([word, pos])
            else:
                new_seg.append([word, pos])

        return new_seg

    def _is_reduplication(self, word: str) -> bool:
        return len(word) == 2 and word[0] == word[1]

    # the last char of first word and the first char of second word is tone_three
    def _merge_continuous_three_tones_2(
            self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        new_seg = []
        sub_finals_list = []
        for (word, pos) in seg:
            orig_finals = lazy_pinyin(
                word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
            # after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
            en_index = [index for index, c in enumerate(word) if c == "嗯"]
            for i in en_index:
                orig_finals[i] = "n2"
            sub_finals_list.append(orig_finals)
        assert len(sub_finals_list) == len(seg)
        merge_last = [False] * len(seg)
        for i, (word, pos) in enumerate(seg):
            if i - 1 >= 0 and sub_finals_list[i - 1][-1][-1] == "3" and sub_finals_list[i][0][-1] == "3" and not \
                    merge_last[i - 1]:
                # if the last word is reduplication, not merge, because reduplication need to be _neural_sandhi
                if not self._is_reduplication(seg[i - 1][0]) and len(
                        seg[i - 1][0]) + len(seg[i][0]) <= 3:
                    new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
                    merge_last[i] = True
                else:
                    new_seg.append([word, pos])
            else:
                new_seg.append([word, pos])
        return new_seg

    def _merge_er(self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        new_seg = []
        for i, (word, pos) in enumerate(seg):
            if i - 1 >= 0 and word == "儿":
                new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
            else:
                new_seg.append([word, pos])
        return new_seg

    def _merge_reduplication(
            self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        new_seg = []
        for i, (word, pos) in enumerate(seg):
            if new_seg and word == new_seg[-1][0]:
                new_seg[-1][0] = new_seg[-1][0] + seg[i][0]
            else:
                new_seg.append([word, pos])
        return new_seg

    def pre_merge_for_modify(
            self, seg: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
        """
            seg: [(word, pos), ...]
        """
        seg = self._merge_bu(seg)
        seg = self._merge_yi(seg)
        seg = self._merge_reduplication(seg)
        seg = self._merge_continuous_three_tones(seg)
        seg = self._merge_continuous_three_tones_2(seg)
        seg = self._merge_er(seg)
        return seg

    def modified_tone(self, word: str, pos: str,
                      finals: List[str]) -> List[str]:
        """
            word: 分词
            pos: 词性
            finals: 带调韵母, [final1, ..., finaln]
        """
        finals = self._bu_sandhi(word, finals)
        finals = self._yi_sandhi(word, finals)
        finals = self._neural_sandhi(word, pos, finals)
        finals = self._three_sandhi(word, finals)
        return finals


================================================
FILE: paddlespeech/t2s/frontend/vocab.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
from typing import Iterable

__all__ = ["Vocab"]


class Vocab(object):
    """  Vocabulary.

    Args:
        symbols (Iterable[str]): Common symbols.
        padding_symbol (str, optional): Symbol for pad. Defaults to "<pad>".
        unk_symbol (str, optional): Symbol for unknow. Defaults to "<unk>"
        start_symbol (str, optional): Symbol for start. Defaults to "<s>"
        end_symbol (str, optional): Symbol for end. Defaults to "</s>"
    """

    def __init__(self,
                 symbols: Iterable[str],
                 padding_symbol="<pad>",
                 unk_symbol="<unk>",
                 start_symbol="<s>",
                 end_symbol="</s>"):
        self.special_symbols = OrderedDict()
        for i, item in enumerate(
            [padding_symbol, unk_symbol, start_symbol, end_symbol]):
            if item:
                self.special_symbols[item] = len(self.special_symbols)

        self.padding_symbol = padding_symbol
        self.unk_symbol = unk_symbol
        self.start_symbol = start_symbol
        self.end_symbol = end_symbol

        self.stoi = OrderedDict()
        self.stoi.update(self.special_symbols)

        for i, s in enumerate(symbols):
            if s not in self.stoi:
                self.stoi[s] = len(self.stoi)
        self.itos = {v: k for k, v in self.stoi.items()}

    def __len__(self):
        return len(self.stoi)

    @property
    def num_specials(self):
        """ The number of special symbols.
        """
        return len(self.special_symbols)

    # special tokens
    @property
    def padding_index(self):
        """ The index of padding symbol
        """
        return self.stoi.get(self.padding_symbol, -1)

    @property
    def unk_index(self):
        """The index of unknow symbol.
        """
        return self.stoi.get(self.unk_symbol, -1)

    @property
    def start_index(self):
        """The index of start symbol.
        """
        return self.stoi.get(self.start_symbol, -1)

    @property
    def end_index(self):
        """ The index of end symbol.
        """
        return self.stoi.get(self.end_symbol, -1)

    def __repr__(self):
        fmt = "Vocab(size: {},\nstoi:\n{})"
        return fmt.format(len(self), self.stoi)

    def __str__(self):
        return self.__repr__()

    def lookup(self, symbol):
        """ The index that symbol correspond.
        """
        return self.stoi[symbol]

    def reverse(self, index):
        """ The symbol thar index cottespond.
        """
        return self.itos[index]

    def add_symbol(self, symbol):
        """ Add a new symbol in vocab.
        """
        if symbol in self.stoi:
            return
        N = len(self.stoi)
        self.stoi[symbol] = N
        self.itos[N] = symbol

    def add_symbols(self, symbols):
        """ Add multiple symbols in vocab.
        """
        for symbol in symbols:
            self.add_symbol(symbol)


================================================
FILE: paddlespeech/t2s/frontend/zh_frontend.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
from operator import itemgetter
from pprint import pprint
from typing import Dict
from typing import List

import jieba.posseg as psg
import numpy as np
import paddle
import yaml
from g2pM import G2pM
from pypinyin import lazy_pinyin
from pypinyin import load_phrases_dict
from pypinyin import load_single_dict
from pypinyin import Style
from pypinyin_dict.phrase_pinyin_data import large_pinyin

from paddlespeech.t2s.frontend.g2pw import G2PWOnnxConverter
from paddlespeech.t2s.frontend.generate_lexicon import generate_lexicon
from paddlespeech.t2s.frontend.polyphonic import Polyphonic
from paddlespeech.t2s.frontend.rhy_prediction.rhy_predictor import RhyPredictor
from paddlespeech.t2s.frontend.ssml.xml_processor import MixTextProcessor
from paddlespeech.t2s.frontend.tone_sandhi import ToneSandhi
from paddlespeech.t2s.frontend.zh_normalization.text_normlization import TextNormalizer

INITIALS = [
    'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh',
    'r', 'z', 'c', 's', 'j', 'q', 'x'
]
INITIALS += ['y', 'w', 'sp', 'spl', 'spn', 'sil']

# 0 for None, 5 for neutral
TONES = ["0", "1", "2", "3", "4", "5"]


def intersperse(lst, item):
    result = [item] * (len(lst) * 2 + 1)
    result[1::2] = lst
    return result


def insert_after_character(lst, item):
    """
    inset `item` after finals.
    """
    result = [item]

    for phone in lst:
        result.append(phone)
        if phone not in INITIALS:
            # finals has tones
            # assert phone[-1] in "12345"
            result.append(item)

    return result


class Frontend():
    def __init__(self,
                 g2p_model="g2pW",
                 phone_vocab_path=None,
                 tone_vocab_path=None,
                 use_rhy=False):

        self.punc = "、：，；。？！“”‘’':,;.?!"
        self.rhy_phns = ['sp1', 'sp2', 'sp3', 'sp4']
        self.phrases_dict = {
            '开户行': [['ka1i'], ['hu4'], ['hang2']],
            '发卡行': [['fa4'], ['ka3'], ['hang2']],
            '放款行': [['fa4ng'], ['kua3n'], ['hang2']],
            '茧行': [['jia3n'], ['hang2']],
            '行号': [['hang2'], ['ha4o']],
            '各地': [['ge4'], ['di4']],
            '借还款': [['jie4'], ['hua2n'], ['kua3n']],
            '时间为': [['shi2'], ['jia1n'], ['we2i']],
            '为准': [['we2i'], ['zhu3n']],
            '色差': [['se4'], ['cha1']],
            '嗲': [['dia3']],
            '呗': [['bei5']],
            '不': [['bu4']],
            '咗': [['zuo5']],
            '嘞': [['lei5']],
            '掺和': [['chan1'], ['huo5']]
        }

        self.must_erhua = {
            "小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿", "媳妇儿"
        }
        self.not_erhua = {
            "虐儿", "为儿", "护儿", "瞒儿", "救儿", "替儿", "有儿", "一儿", "我儿", "俺儿", "妻儿",
            "拐儿", "聋儿", "乞儿", "患儿", "幼儿", "孤儿", "婴儿", "婴幼儿", "连体儿", "脑瘫儿",
            "流浪儿", "体弱儿", "混血儿", "蜜雪儿", "舫儿", "祖儿", "美儿", "应采儿", "可儿", "侄儿",
            "孙儿", "侄孙儿", "女儿", "男儿", "红孩儿", "花儿", "虫儿", "马儿", "鸟儿", "猪儿", "猫儿",
            "狗儿", "少儿"
        }

        self.vocab_phones = {}
        self.vocab_tones = {}
        if phone_vocab_path:
            with open(phone_vocab_path, 'rt', encoding='utf-8') as f:
                phn_id = [line.strip().split() for line in f.readlines()]
            for phn, id in phn_id:
                self.vocab_phones[phn] = int(id)
        if tone_vocab_path:
            with open(tone_vocab_path, 'rt', encoding='utf-8') as f:
                tone_id = [line.strip().split() for line in f.readlines()]
            for tone, id in tone_id:
                self.vocab_tones[tone] = int(id)

        # SSML
        self.mix_ssml_processor = MixTextProcessor()
        # tone sandhi
        self.tone_modifier = ToneSandhi()
        # TN
        self.text_normalizer = TextNormalizer()

        # prosody
        self.use_rhy = use_rhy
        if use_rhy:
            self.rhy_predictor = RhyPredictor()
            print("Rhythm predictor loaded.")

        # g2p
        assert g2p_model in ('pypinyin', 'g2pM', 'g2pW')
        self.g2p_model = g2p_model
        if self.g2p_model == "g2pM":
            self.g2pM_model = G2pM()
            self.pinyin2phone = generate_lexicon(
                with_tone=True, with_erhua=False)
        elif self.g2p_model == "g2pW":
            # use pypinyin as backup for non polyphonic characters in g2pW
            self._init_pypinyin()
            self.corrector = Polyphonic()
            self.g2pM_model = G2pM()
            self.g2pW_model = G2PWOnnxConverter(
                style='pinyin', enable_non_tradional_chinese=True)
            self.pinyin2phone = generate_lexicon(
                with_tone=True, with_erhua=False)
        else:
            self._init_pypinyin()

    def _init_pypinyin(self):
        """
        Load pypinyin G2P module.
        """
        large_pinyin.load()
        load_phrases_dict(self.phrases_dict)
        # 调整字的拼音顺序
        load_single_dict({ord(u'地'): u'de,di4'})

    def _get_initials_finals(self, word: str) -> List[List[str]]:
        """
        Get word initial and final by pypinyin or g2pM
        """
        initials = []
        finals = []
        if self.g2p_model == "pypinyin":
            orig_initials = lazy_pinyin(
                word, neutral_tone_with_five=True, style=Style.INITIALS)
            orig_finals = lazy_pinyin(
                word, neutral_tone_with_five=True, style=Style.FINALS_TONE3)
            # after pypinyin==0.44.0, '嗯' need to be n2, cause the initial and final consonants cannot be empty at the same time
            en_index = [index for index, c in enumerate(word) if c == "嗯"]
            for i in en_index:
                orig_finals[i] = "n2"

            for c, v in zip(orig_initials, orig_finals):
                if re.match(r'i\d', v):
                    if c in ['z', 'c', 's']:
                        # zi, ci, si
                        v = re.sub('i', 'ii', v)
                    elif c in ['zh', 'ch', 'sh', 'r']:
                        # zhi, chi, shi
                        v = re.sub('i', 'iii', v)
                initials.append(c)
                finals.append(v)

        elif self.g2p_model == "g2pM":
            pinyins = self.g2pM_model(word, tone=True, char_split=False)
            for pinyin in pinyins:
                pinyin = pinyin.replace("u:", "v")
                if pinyin in self.pinyin2phone:
                    initial_final_list = self.pinyin2phone[pinyin].split(" ")
                    if len(initial_final_list) == 2:
                        initials.append(initial_final_list[0])
                        finals.append(initial_final_list[1])
                    elif len(initial_final_list) == 1:
                        initials.append('')
                        finals.append(initial_final_list[1])
                else:
                    # If it's not pinyin (possibly punctuation) or no conversion is required
                    initials.append(pinyin)
                    finals.append(pinyin)

        return initials, finals

    def _merge_erhua(self,
                     initials: List[str],
                     finals: List[str],
                     word: str,
                     pos: str) -> List[List[str]]:
        """
        Do erhub.
        """
        # fix er1
        for i, phn in enumerate(finals):
            if i == len(finals) - 1 and word[i] == "儿" and phn == 'er1':
                finals[i] = 'er2'

        # 发音
        if word not in self.must_erhua and (word in self.not_erhua or
                                            pos in {"a", "j", "nr"}):
            return initials, finals

        # "……" 等情况直接返回
        if len(finals) != len(word):
            return initials, finals

        assert len(finals) == len(word)

        # 不发音
        new_initials = []
        new_finals = []
        for i, phn in enumerate(finals):
            if i == len(finals) - 1 and word[i] == "儿" and phn in {
                    "er2", "er5"
            } and word[-2:] not in self.not_erhua and new_finals:
                new_finals[-1] = new_finals[-1][:-1] + "r" + new_finals[-1][-1]
            else:
                new_initials.append(initials[i])
                new_finals.append(phn)

        return new_initials, new_finals

    # if merge_sentences, merge all sentences into one phone sequence
    def _g2p(self,
             sentences: List[str],
             merge_sentences: bool=True,
             with_erhua: bool=True) -> List[List[str]]:
        """
        Return: list of list phonemes.
            [['w', 'o3', 'm', 'en2', 'sp'], ...]
        """
        segments = sentences
        phones_list = []

        # split by punctuation
        for seg in segments:
            if self.use_rhy:
                seg = self.rhy_predictor._clean_text(seg)

            # remove all English words in the sentence
            seg = re.sub('[a-zA-Z]+', '', seg)

            # add prosody mark
            if self.use_rhy:
                seg = self.rhy_predictor.get_prediction(seg)

            # [(word, pos), ...]
            seg_cut = psg.lcut(seg)
            # fix wordseg bad case for sandhi
            seg_cut = self.tone_modifier.pre_merge_for_modify(seg_cut)

            # 为了多音词获得更好的效果，这里采用整句预测
            phones = []
            initials = []
            finals = []
            if self.g2p_model == "g2pW":
                try:
                    # undo prosody 
                    if self.use_rhy:
                        seg = self.rhy_predictor._clean_text(seg)

                    # g2p
                    pinyins = self.g2pW_model(seg)[0]
                except Exception:
                    # g2pW 模型采用繁体输入，如果有cover不了的简体词，采用g2pM预测
                    print("[%s] not in g2pW dict,use g2pM" % seg)
                    pinyins = self.g2pM_model(seg, tone=True, char_split=False)

                # do prosody
                if self.use_rhy:
                    rhy_text = self.rhy_predictor.get_prediction(seg)
                    final_py = self.rhy_predictor.pinyin_align(pinyins,
                                                               rhy_text)
                    pinyins = final_py

                pre_word_length = 0
                for word, pos in seg_cut:
                    sub_initials = []
                    sub_finals = []
                    now_word_length = pre_word_length + len(word)

                    # skip english word
                    if pos == 'eng':
                        pre_word_length = now_word_length
                        continue

                    word_pinyins = pinyins[pre_word_length:now_word_length]

                    # 多音字消歧
                    word_pinyins = self.corrector.correct_pronunciation(
                        word, word_pinyins)

                    for pinyin, char in zip(word_pinyins, word):
                        if pinyin is None:
                            pinyin = char

                        pinyin = pinyin.replace("u:", "v")

                        if pinyin in self.pinyin2phone:
                            initial_final_list = self.pinyin2phone[
                                pinyin].split(" ")
                            if len(initial_final_list) == 2:
                                sub_initials.append(initial_final_list[0])
                                sub_finals.append(initial_final_list[1])
                            elif len(initial_final_list) == 1:
                                sub_initials.append('')
                                sub_finals.append(initial_final_list[1])
                        else:
                            # If it's not pinyin (possibly punctuation) or no conversion is required
                            sub_initials.append(pinyin)
                            sub_finals.append(pinyin)

                    pre_word_length = now_word_length
                    # tone sandhi
                    sub_finals = self.tone_modifier.modified_tone(word, pos,
                                                                  sub_finals)
                    # er hua                                
                    if with_erhua:
                        sub_initials, sub_finals = self._merge_erhua(
                            sub_initials, sub_finals, word, pos)

                    initials.append(sub_initials)
                    finals.append(sub_finals)
                    # assert len(sub_initials) == len(sub_finals) == len(word)
            else:
                # pypinyin, g2pM
                for word, pos in seg_cut:
                    if pos == 'eng':
                        # skip english word
                        continue

                    # g2p
                    sub_initials, sub_finals = self._get_initials_finals(word)
                    # tone sandhi
                    sub_finals = self.tone_modifier.modified_tone(word, pos,
                                                                  sub_finals)
                    # er hua
                    if with_erhua:
                        sub_initials, sub_finals = self._merge_erhua(
                            sub_initials, sub_finals, word, pos)

                    initials.append(sub_initials)
                    finals.append(sub_finals)
                    # assert len(sub_initials) == len(sub_finals) == len(word)

                # sum(iterable[, start])
            initials = sum(initials, [])
            finals = sum(finals, [])

            for c, v in zip(initials, finals):
                # NOTE: post process for pypinyin outputs
                # we discriminate i, ii and iii
                if c and c not in self.punc:
                    phones.append(c)
                # replace punctuation by `sp`
                if c and c in self.punc:
                    phones.append('sp')

                if v and v not in self.punc and v not in self.rhy_phns:
                    phones.append(v)

            phones_list.append(phones)

        # merge split sub sentence into one sentence.
        if merge_sentences:
            # sub sentence phonemes
            merge_list = sum(phones_list, [])
            # rm the last 'sp' to avoid the noise at the end
            # cause in the training data, no 'sp' in the end
            if merge_list[-1] == 'sp':
                merge_list = merge_list[:-1]

            # sentence phonemes
            phones_list = []
            phones_list.append(merge_list)

        return phones_list

    def _p2id(self, phonemes: List[str]) -> np.ndarray:
        """
        Phoneme to Index
        """
        # replace unk phone with sp
        phonemes = [
            phn if phn in self.vocab_phones else "sp" for phn in phonemes
        ]
        phone_ids = [self.vocab_phones[item] for item in phonemes]
        return np.array(phone_ids, np.int64)

    def _t2id(self, tones: List[str]) -> np.ndarray:
        """
        Tone to Index.
        """
        # replace unk phone with sp
        tones = [tone if tone in self.vocab_tones else "0" for tone in tones]
        tone_ids = [self.vocab_tones[item] for item in tones]
        return np.array(tone_ids, np.int64)

    def _get_phone_tone(self, phonemes: List[str],
                        get_tone_ids: bool=False) -> List[List[str]]:
        """
        Get tone from phonemes.
        """
        phones = []
        tones = []
        if get_tone_ids and self.vocab_tones:
            for full_phone in phonemes:
                # split tone from finals
                match = re.match(r'^(\w+)([012345])$', full_phone)
                if match:
                    phone = match.group(1)
                    tone = match.group(2)
                    # if the merged erhua not in the vocab
                    # assume that the input is ['iaor3'] and 'iaor' not in self.vocab_phones, we split 'iaor' into ['iao','er']
                    # and the tones accordingly change from ['3'] to ['3','2'], while '2' is the tone of 'er2'
                    if len(phone) >= 2 and phone != "er" and phone[
                            -1] == 'r' and phone not in self.vocab_phones and phone[:
                                                                                    -1] in self.vocab_phones:
                        phones.append(phone[:-1])
                        tones.append(tone)
                        phones.append("er")
                        tones.append("2")
                    else:
                        phones.append(phone)
                        tones.append(tone)
                else:
                    # initals with 0 tone.
                    phones.append(full_phone)
                    tones.append('0')
        else:
            for phone in phonemes:
                # if the merged erhua not in the vocab
                # assume that the input is ['iaor3'] and 'iaor' not in self.vocab_phones, change ['iaor3'] to ['iao3','er2']
                if len(phone) >= 3 and phone[:-1] != "er" and phone[
                        -2] == 'r' and phone not in self.vocab_phones and (
                            phone[:-2] + phone[-1]) in self.vocab_phones:
                    phones.append((phone[:-2] + phone[-1]))
                    phones.append("er2")
                else:
                    phones.append(phone)

        return phones, tones

    def get_phonemes(self,
                     sentence: str,
                     merge_sentences: bool=True,
                     with_erhua: bool=True,
                     robot: bool=False,
                     print_info: bool=False) -> List[List[str]]:
        """
        Main function to do G2P
        """
        # TN & Text Segmentation
        sentences = self.text_normalizer.normalize(sentence)
        # Prosody & WS & g2p & tone sandhi
        phonemes = self._g2p(
            sentences, merge_sentences=merge_sentences, with_erhua=with_erhua)

        # simulate robot pronunciation, change all tones to `1`
        if robot:
            new_phonemes = []
            for sentence in phonemes:
                new_sentence = []
                for item in sentence:
                    # `er` only have tone `2`
                    if item[-1] in "12345" and item != "er2":
                        item = item[:-1] + "1"
                    new_sentence.append(item)
                new_phonemes.append(new_sentence)
            phonemes = new_phonemes

        if print_info:
            print("----------------------------")
            print("text norm results:")
            print(sentences)
            print("----------------------------")
            print("g2p results:")
            print(phonemes)
            print("----------------------------")
        return phonemes

    def _split_word_to_char(self, words):
        res = []
        for x in words:
            res.append(x)
        return res

    # if using ssml, have pingyin specified, assign pinyin to words
    def _g2p_assign(self,
                    words: List[str],
                    pinyin_spec: List[str],
                    merge_sentences: bool=True) -> List[List[str]]:
        """
        Replace phoneme by SSML
        """
        phones_list = []
        initials = []
        finals = []

        # to character list
        words = self._split_word_to_char(words[0])

        for pinyin, char in zip(pinyin_spec, words):
            sub_initials = []
            sub_finals = []
            pinyin = pinyin.replace("u:", "v")

            #self.pinyin2phone: is a dict with all pinyin mapped with sheng_mu yun_mu
            if pinyin in self.pinyin2phone:
                initial_final_list = self.pinyin2phone[pinyin].split(" ")
                if len(initial_final_list) == 2:
                    sub_initials.append(initial_final_list[0])
                    sub_finals.append(initial_final_list[1])
                elif len(initial_final_list) == 1:
                    sub_initials.append('')
                    sub_finals.append(initial_final_list[1])
            else:
                # If it's not pinyin (possibly punctuation) or no conversion is required
                sub_initials.append(pinyin)
                sub_finals.append(pinyin)

            initials.append(sub_initials)
            finals.append(sub_finals)

        initials = sum(initials, [])
        finals = sum(finals, [])

        phones = []
        for c, v in zip(initials, finals):
            # c for consonant, v for vowel
            # NOTE: post process for pypinyin outputs
            # we discriminate i, ii and iii
            if c and c not in self.punc:
                phones.append(c)
            # replace punc to `sp`
            if c and c in self.punc:
                phones.append('sp')
            if v and v not in self.punc and v not in self.rhy_phns:
                phones.append(v)
        phones_list.append(phones)

        if merge_sentences:
            merge_list = sum(phones_list, [])
            # rm the last 'sp' to avoid the noise at the end
            # cause in the training data, no 'sp' in the end
            if merge_list[-1] == 'sp':
                merge_list = merge_list[:-1]
            phones_list = []
            phones_list.append(merge_list)

        return phones_list

    def get_phonemes_ssml(self,
                          ssml_inputs: list,
                          merge_sentences: bool=True,
                          with_erhua: bool=True,
                          robot: bool=False,
                          print_info: bool=False) -> List[List[str]]:
        """
         Main function to do G2P with SSML support.
        """
        all_phonemes = []
        for word_pinyin_item in ssml_inputs:
            phonemes = []

            # ['你喜欢', []] -> 你喜欢 []
            sentence, pinyin_spec = itemgetter(0, 1)(word_pinyin_item)

            # TN & Text Segmentation
            sentences = self.text_normalizer.normalize(sentence)

            if len(pinyin_spec) == 0:
                # g2p word w/o specified <say-as>
                phonemes = self._g2p(
                    sentences,
                    merge_sentences=merge_sentences,
                    with_erhua=with_erhua)
            else:
                # word phonemes specified by <say-as>
                phonemes = self._g2p_assign(
                    sentences, pinyin_spec, merge_sentences=merge_sentences)

            all_phonemes = all_phonemes + phonemes

        if robot:
            new_phonemes = []
            for sentence in all_phonemes:
                new_sentence = []
                for item in sentence:
                    # `er` only have tone `2`
                    if item[-1] in "12345" and item != "er2":
                        item = item[:-1] + "1"
                    new_sentence.append(item)
                new_phonemes.append(new_sentence)
            all_phonemes = new_phonemes

        if merge_sentences:
            all_phonemes = [sum(all_phonemes, [])]

        if print_info:
            print("----------------------------")
            print("text norm results:")
            print(sentences)
            print("----------------------------")
            print("g2p results:")
            print(all_phonemes)
            print("----------------------------")

        return all_phonemes

    def add_sp_if_no(self, phonemes):
        """
        Prosody mark #4 added at sentence end.
        """
        if not phonemes[-1][-1].startswith('sp'):
            phonemes[-1].append('sp4')
        return phonemes

    def get_input_ids(self,
                      sentence: str,
                      merge_sentences: bool=True,
                      get_tone_ids: bool=False,
                      robot: bool=False,
                      print_info: bool=False,
                      add_blank: bool=False,
                      blank_token: str="<pad>",
                      to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]:

        phonemes = self.get_phonemes(
            sentence,
            merge_sentences=merge_sentences,
            print_info=print_info,
            robot=robot)

        # add #4 for sentence end.
        if self.use_rhy:
            phonemes = self.add_sp_if_no(phonemes)

        result = {}
        phones = []
        tones = []
        temp_phone_ids = []
        temp_tone_ids = []

        for part_phonemes in phonemes:

            phones, tones = self._get_phone_tone(
                part_phonemes, get_tone_ids=get_tone_ids)

            if add_blank:
                phones = insert_after_character(phones, blank_token)

            if tones:
                tone_ids = self._t2id(tones)
                if to_tensor:
                    tone_ids = paddle.to_tensor(tone_ids)
                temp_tone_ids.append(tone_ids)

            if phones:
                phone_ids = self._p2id(phones)
                # if use paddle.to_tensor() in onnxruntime, the first time will be too low
                if to_tensor:
                    phone_ids = paddle.to_tensor(phone_ids)
                temp_phone_ids.append(phone_ids)

        if temp_tone_ids:
            result["tone_ids"] = temp_tone_ids
        if temp_phone_ids:
            result["phone_ids"] = temp_phone_ids

        return result

    def get_input_ids_ssml(
            self,
            sentence: str,
            merge_sentences: bool=True,
            get_tone_ids: bool=False,
            robot: bool=False,
            print_info: bool=False,
            add_blank: bool=False,
            blank_token: str="<pad>",
            to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]:

        # split setence by SSML tag.
        texts = MixTextProcessor.get_pinyin_split(sentence)

        phonemes = self.get_phonemes_ssml(
            texts,
            merge_sentences=merge_sentences,
            print_info=print_info,
            robot=robot)

        result = {}
        phones = []
        tones = []
        temp_phone_ids = []
        temp_tone_ids = []

        for part_phonemes in phonemes:
            phones, tones = self._get_phone_tone(
                part_phonemes, get_tone_ids=get_tone_ids)

            if add_blank:
                phones = insert_after_character(phones, blank_token)

            if tones:
                tone_ids = self._t2id(tones)
                if to_tensor:
                    tone_ids = paddle.to_tensor(tone_ids)
                temp_tone_ids.append(tone_ids)

            if phones:
                phone_ids = self._p2id(phones)
                # if use paddle.to_tensor() in onnxruntime, the first time will be too low
                if to_tensor:
                    phone_ids = paddle.to_tensor(phone_ids)
                temp_phone_ids.append(phone_ids)

        if temp_tone_ids:
            result["tone_ids"] = temp_tone_ids
        if temp_phone_ids:
            result["phone_ids"] = temp_phone_ids

        return result


================================================
FILE: paddlespeech/t2s/frontend/zh_normalization/README.md
================================================
## Supported NSW (Non-Standard-Word) Normalization

|NSW type|raw|normalized|
|:--|:-|:-|
|serial number|电影中梁朝伟扮演的陈永仁的编号27149|电影中梁朝伟扮演的陈永仁的编号二七一四九|
|cardinal|这块黄金重达324.75克<br>我们班的最高总分为583分|这块黄金重达三百二十四点七五克<br>我们班的最高总分为五百八十三分|
|numeric range |12\~23<br>-1.5\~2|十二到二十三<br>负一点五到二|
|date|她出生于86年8月18日，她弟弟出生于1995年3月1日|她出生于八六年八月十八日， 她弟弟出生于一九九五年三月一日|
|time|等会请在12:05请通知我|等会请在十二点零五分请通知我
|temperature|今天的最低气温达到-10°C|今天的最低气温达到零下十度
|fraction|现场有7/12的观众投出了赞成票|现场有十二分之七的观众投出了赞成票|
|percentage|明天有62％的概率降雨|明天有百分之六十二的概率降雨|
|money|随便来几个价格12块5，34.5元，20.1万|随便来几个价格十二块五，三十四点五元，二十点一万|
|telephone|这是固话0421-33441122<br>这是手机+86 18544139121|这是固话零四二一三三四四一一二二<br>这是手机八六一八五四四一三九一二一|
## References
[Pull requests #658 of DeepSpeech](https://github.com/PaddlePaddle/DeepSpeech/pull/658/files)


================================================
FILE: paddlespeech/t2s/frontend/zh_normalization/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddlespeech.t2s.frontend.zh_normalization.text_normlization import *


================================================
FILE: paddlespeech/t2s/frontend/zh_normalization/char_convert.py
================================================
# coding=utf-8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Traditional and simplified Chinese conversion, a simplified character may correspond to multiple traditional characters.
"""
simplified_charcters = '制咖片型超声盘鉴定仔点他命书歌粉巾字帐恤手指记忆棒形转弯沟光○〇㐄㐅㐆㐌㐖毒㐜㐡㐤㐰㐺㑇㑳㒳㒸㔾㗂㗎㝵㞎㞙㞞以㢲㢴㤅㥁㥯㨗㫺㬎㮎㮚㮸㲋㲱㲾㳮涧㵪㶸㷖㷭㹢㹴犬㺢狓㺵碗㽮㿝䍃䔢䖟䖸䗈䗥䗪䝓射䥯䦉䯝鲃鱼䲔䳗鹅䵹鼄䶑一对应映射丁不识下儿子做二休世丘之貉并中台原则串为甚谓干净了百事无成八变五十些人得道鸡升天代如并来去个国政策劲幽灵在欧洲游荡接样萝卜坑侧化传价元论醇共再准刀两断切分耕耘收获钱货物向看旧就绪险刻千金动劳永逸匙零夜半卡通回复返影踪反常态口咬气句话同吐快吹周味呼诺呜品红锅哄而散起唱和问三知生熟团漆黑火糟堆场空块面塌糊涂尘染壁厢夔已足多情露水大早到晚夫妻当关万莫开失古恨套所料既往孔见提师要家主审寸阴难买斗牛小撮部阵局展身层巴掌帆风顺席地带过年计于春头载四季期被蛇怕井绳度愿式份弹顷深前律径心意念差愁孤行俱全房厅交遮打技长把抓死拿眼泪鼻涕钥锁折段抿拍即合扫排掬挥拨拥上入击洞掷揽改故辙败文值名斑方面旁族日秋餐隔雅里终父旦时晌会霎间晃暴寒曝更月望垠际朝夕本正经利杯羹东西板枝独秀根筋杆进条龙服务概模次函数又性程总付步脚印趋登毛拔呵氧氮碳决雌雄波未平派谎言流清楚白准溜烟潭有获闻是处降琴鹤甲病发可拾沙目然了直以相眨穿睹瞥瞬矢的解石鸟神教秉虔诚秘种窝蜂穷窍笑置笔苟勾销抹杀煞等奖箍节吃箭仇双雕诗筹箩筐系列纸级士官统丝毫挂维网尽线微吭响股脑胎脉承腔臂力致效资源址器举功投般说讲规贸易叶障着慎满皆输号木电池衣倾钟高低视仁觉醒览遗角银币触溃九鼎蔽抄出驷马追重语破贫洗贯走路安蹴至几蹶振跃役胆汗较辈轮辞赞退六连遍递边针血锤音错门思闪真倒项栽雾类保护川先惊乍体哄鳞爪鸣滴泡邻域党专鼓作齐炒丑烯亥克内酯冬加奴卯肝炎基尺梁街裤镐客宠庭巳汝昌烷玲磊糖肇酉醛啷青县韪良香骨鲷丂七集河市弦喜嘴张舌堵区工业姊妹星架构巧彩扭歪拼凑余热曜武州爷浮屠美乡老阶树荤素碎落能魄鳃鳗珠丄丅丆万俟丈尚摸母娘量管群亚虎必我堂令申件装伏位博侠义界表女墟台戏臭皮匠胜诸葛亮赛顶倍催请运算包立叉戟离疫苗土史志演围揭瓦晒夷姑婆帝村宝烂尖杉碱屉桌山岔岛由纪峡坝库镇废从德后拗汤治旬食明昧曹朋友框栏极权幂曲归依猫民氟硼氯磷铁江侗自旅法司洋浦梅园温暖湾焦班幸用田略番叠皇炮捶硝苯酸腺苷棱草镜穗跳远索锦纲聚氰胺联店胚膲爱色堇紫罗兰芝茶饭菱云虫藏藩乱叛苏亲债凳学座恐恋柱测肌腹衩锥系貂企乌跪叩军车农题迭都甘油屯奏键短阿姨陪姐只顾茅庐槽驾魂鲜鹿页其菜单乘任供势午齿汉组织吊调泻唇坡城报坟外夸将尉建筑岸岗公床扬新剑升杭林栗校楼标款汽社浣海商馆剧院钢华港机械广媒环球融第医科证券综财乐育游涨犹岭疏瘾睑确兵领导缴肢膛船艾瑟尔苍蔡虞效衫覆访诉课谕议轨述野钩限敌鞋颌颔颚饶首龈站例修凡划垂届属崽颏厨拜挫摆放旋削棋榻槛礼沉注滑营狱画确仪聘花葬诏员跌辖周达酒锚闸陷陆雨雪飞威丌于丹久乏予理评产亢卑亦乎舞己悲矩圆词害志但住佞佳便俗信票案幅翁倦伦假偏倚斜亏鬼敲停备伤脾胃仅此像俭匮免宜穴焉戴兼容许冻伯仲负彼昼皂轩轾实刊划颠卫战哥比省非好黄饰别拘束掩奶睬选择摇扰烦苦枚写协厌及格受欢迎约只估侵犯割状告或缺抗拒挽撤救药喻磨灭端倪少逆逾越避靠适吉誉吝玉含延咎歹听啻渊善谋均匀堪忍够太惹妙妥妨孕症孝术室完纳推冠积宣疑辩栗碴称屈挠屑干涉衡待很忙恶忿怎么怠急耻恭息悦惑惜惟想愉愧怍慌愤启懂懈怀材才紧招认扣抵拉舍也罢插揣冒搭撞南墙扩核支攻敢雷攀敬里吗需景智暇曾罪遇朽枉止况竞争辱求愈渝溶济左右袒困补爽特寂寞示弱找谢畏强疾徐痛痒冤符眠睦瞅董何厚云措活疲羞者轻玻璃祥兆禁移稂莠稳佛换答简结果盟绝缕途给谈否羁翼耐肖胫毋宁兴舒若菲莱痕迹窠臼虚衰脸兔撒鹰棺范该详讳抬泰让须眉象众赀账费灰赖奇虑训辍辨菽麦辛近送透逞徒速续逮捕遂遑违逊斧钺艰醉锈随观弃显饱脂肪使丏丐帮丒且慢末丕替桃宗王尊凉爵各图屋脊粮署录坛吾禄职胄袭君厦丗北壑桐疹损逢陵鹬丙寅戌氨腈唑纶辰酮脱氢酶醚丞丢现掉纱帽弄扯炮碗丠両丣坐存激肩臻蒂莲悖序驱丨丩丫挺杈髻鬟细介俄伊犁京尼布订普渡央委监察检查剂圈设警队斯督剩震境航舶革防托播促质版蝾螈锋研艺历残消频谱精密制造陲邮候埔坚压坜凹汇执府究邦俘摄寮彬狼岳肺肿庸英讯诊埋粒胞括控码韩暑枪枢砥澳哇牟寿甸钻探篇签缀缝继耳肯照妇埃悬璧轴柜台辣搁浅邪跑纤阮阳私囊魔丮丰姿采丱烧丳丵丶丷丸参寨朗桂瑞砂衷霞貌凤仆舰因嫌宰峰干络牌持旨祭祷簿编罚宾办丼丿乀乂乃乄仰慕盛旷留考验阔乆乇么丑麽乊湖燃乑乒乓乕乖僻忤戾离谬迕乗危肥劫除隙浪婿乙炔肠酰吡咯盐乚乛乜嘢卿玄宫尾狐龟塔嶷兄弟泉章霄钉耙乞扎哀怜恕讨乢乣乤乥乧乨乩童乪乫乭乳晕汁液瑶浆牙癌突窦罩腐胶猪酪蛋糕菌瘤乴乵乶乷乸乹乺乼乾俸冰嘉哕嚎坤妈尸垒旱枯涸俐渴潮涩煸豆燥爹瘦瘪癣瞪袋脆姜贝隆馏乿亀亁叫咕攘扔搞男砸窜蓬麻亃亄亅却亇迟典今临繁累卵奉婚聪躬巨与迁添裂副宿岁怪恶尕仑愣杆硅硫钛铀锰芑杂异钠砷胂磺琥珀舱棍簧胡茬盗浩盆贩郎腿亍洪亐互欠助勉惠操斥诿系户译亓墓碑刑铃卅渠缤纷斗米旗宪钒灯徽瘟祖拳福谷丰脏腑绑肉腌苓蕴桥铺霸颜闹判喷冈底蛙陉矿亖亘亜罕们娜桑那努哈喀弗烈曼松森杜氏杯奥琛敦戊穆圣裔汇薛孙亟亡佚虏羊牢奋释卷卸契媾感额睫缠谊趾塞挤纽阻还配驰庄亨洛祚亪享津沪畿郊慈菴枇杷膏亭阁锃丽亳亶亹诛初责翻疯偶杰丛稠妖拖寰居吸授慧蜗吞壮魅狗矛盾益渣患忧稀描猿梦暂涯畜祸缘沸搜引擎臣横纭谁混援蒸兽狮税剖亻亼亽亡什献刹邡么仂仃仄仆富怨仈仉毕昔晨壳绍仍仏仒仕宦仗欺恃腰叹叹炬梓讫施仙后琼逝仚仝仞仟悔仡佬偿填泊拓扑簇羔购顿钦佩发棻阃驭养亿儆尤借帧赈凌叙帖李柔刚沃眦睚戒讹取飨读仨仫仮著泳卧躺韶夏裁仳仵唯贤凭钓诞仿似宋佛讽伀硕盼鹅伄儅伈伉俪柯始娃迈戈坦堡帕茨萨庙玛莉莎藤霍姆伋伍奢胥廷芳豪伎俩侍汛勒希羲雏伐憩整谟闲闲伕伙伴颐伜伝伢叔恒兹恩翰伱伲侣伶俜悧鼬伸懒缩喇叭伹伺伻伽倻辐伾似佃伫布乔妮墨佉卢佌贷劣廉昂档浓矮伞洼缓耗胸谷迷挡率龋宅沫舍疗佐贰佑占优据铧尝呢须鲁晓佗佘余坪寺瓜铳僧蒙芒陀龛哼呕坊奸孽弊揖祟茧缚誓贼佝偻瞀佟你夺赶佡佢佣佤佧贾佪佫佯佰佱洁绩酿肴佴卷佶佷佸佹佺佻佼佽佾具唤窘坏娱怒慨硬习惯聋膨胀蔓骇贵痹侀侁侂侃侄侅鸿燕侇侈糜靡侉侌妾侏儒仓鼠侐侑侔仑侘侚链侜偎傍钴循柳葫芦附価侮骂蔑侯岩截蚀局贴壶嬛宴捷携桶笺酌俣狭膝狄俅俉俊俏俎俑俓俔谚俚俛黎健呈固墒增守康箱湿祐镖镳杠盒靖膜龄俞豹猎噪孚封札筒托衍鸽剪撰稿炼厂禊练缮葺俯瞰撑冲效俳俴俵俶俷俺备俾伥倂倅储卒惶敷猝逃颉蓄崇隐倌倏忽刺蜡烛噍嚼坍扁抽毙葱楣灌灶粪背薮卖赔闭霉腾倓倔幸倘倜傥倝借箸挹浇阅倡狂倢倣値倥偬倨傲倩匡嗣冲柝珍倬倭寇猩倮倶倷倹勤赞偁偃充伪吏嗓寐惺扮拱芫茜藉虢钞偈伟晶偌宕距析滤殿疼瘫注颇偓偕鸭歇滞偝偟偢忘怡旺偨偩逼偫偭偯偰偱偲侦缉蹄偷减惰漏窥窃偸偺迹傀儡傅傈僳骂篱傎奎琳迪叟芭傒傔傕伧悉荒傜傞傢傣芽逼佣婢傮睨寄檄诵谣颂伛担辜弓惨蒿悼疤傺傻屄臆巢泄箧羡盖轧颓傿㑩僄僇佥僊働僎侨僔僖僚僝伪僣僤侥僦猴偾僩僬僭僮僯僰雇僵殖签静僾僿征陇儁侬儃儇侩朴薄儊儋儌儍傧儓俦侪拟尽儜儞儤儦儩汰哉寡渥裕酷儭儱罐儳儵儹傩俨儽兀臬臲鹫允勋勋宙宵帅憝彝谐嫂阋畅沛溢盈饥赫凶悍狠猛顽愚妣斩秦遣鞭耀敏荣槃泽爆碟磁秃缆辉霁卤朵娄孜烽酱勃汀箕裘钳耶蒙蕾彻兑软遭黜兎児韵媳爸兕觥兖兙兛兜售鍪肚兝兞兟兡兢兣樽殓涅睡禀籍赘泌啡肽奸幕涵涝熵疚眷稃衬讧赴焕椒歼植跏没试误猜栖窗肋袖颊兪卦撇胡岐廓轿疸枫茴珑厕秩募勺吨寓斤历亩迫筷厘最淫螺韬兮宽匪筛襄赢轭复兲诈刃堰戎痞蚁饷它冀铸冂冃円冇冉册嫁厉砺竭醮冏牧冑冓冔冕冖冗冘冞冢窄抑诬冥冫烘菇蛰冷凝坨橇淇淋炭饼砖碛窖醋雕雹霜冱冶炉艳嘲峻滩淡漠煖飕饮冼冽凃凄怆梗凅凇净凊凋敝蒙凔凛遵汞脢凞几凢処凰凯凵凶焰凸折刷纹预丧喽奔巡榜殡芙蓉租笼辑鞘萃凼锯镬刁蛮刂娩崩批拆摊掰蘖骤歧颗秒袂赃勿嘱忌磋琢肤刈羽刎讼戮舂桨艇刓刖霹雳刜创犊刡恙墅帜筵致劫劫刨昏默攸尿欲熏润薰圭删刮痧铲刱刲刳刴刵踏磅戳柏槐绣芹苋猬舟铭鹄鹜劫剁剃辫刭锉履铅克剌姻咽哨廊掠桅沿召瞻翅赵卜渺茫郭剒剔剕沥剚愎毅讷才剜剥啄采剞剟剡剣剤䌽剐肾驶黏剰袍剀紊铲剸剺剽剿劁劂札劈啪柴扳啦刘奭姥夼昫涓熙禅禹锡翔雁鹗刽刿弩柄蜻蛉劒劓劖劘劙澜篑赏矶釜晋甜薪逐劦熔纣虐赤囚劬劭労劵效劻劼劾峭艮勅勇励勍勐腊脖庞漫饲荡粥辄勖勗勘骄馁碌泮雇捐竹骑殊阱绩朴恳谨剿勧勩勯勰劢勋勷劝惩慰诫谏勹芡践阑匁庇拯粟扎袱裹饺匆遽匈匉匊匋匍匐茎匏匕妆痰脓蛹斋苑烤蹈塘羌熊阀螳螂疆碚竿纬荷茵邙魏匚匜匝匟扶稷匣匦拢匸匹耦匽匾匿卂叮疮禧轸堤棚迢钧炼卄卆遐卉瓷盲瓶当胱腱裸卋卌卍卐怯污贱鄙龌龊陋卓溪唐梯渔陈枣泥漳浔涧梨芬谯赡辕迦郑単驴弈洽鳌卛占筮卝卞卟吩啉屎翠厄卣卨卪卬卮榫袄玺绶钮蚤惧殆笃耸卲帘帙绕恤卼卽厂厎厓厔厖厗奚厘厍厜厝谅厕厤厥厪腻孢厮厰厳厣厹厺粕垢芜菁厼厾叁悟茸薯叄吵笄悌哺讥坫垄弧芯杠潜婴刍袁诘贪谍煽馈驳収岳缔灾贿骗叚叡吻拦蘑蜜诀燧玩砚筝椎蔺铜逗骊另觅叨唠谒杵姓喊嚷嚣咚咛塑寻恼憎擦只泣渗蝠叱吒咄咤喝籀黛舵舷叵叶铎懿昭穰苴辽叻叼吁堑嫖赌瞧爬众抒吅吆夥卺橡涤抱纵摩郡唁坠扇篮膀袜颈吋忾谘酬哭妓媛暗表缰迩妃羿絮蕃浑拐葵暮隅吔吖啶嗪戚吜啬噬咽吟哦咏吠吧唧嗒咐吪隽咀征燐苞茹钙哧吮吰吱嘎吲哚吴栋娇窟孟箫忠晗淞阖闾趼宇呐睛嘘拂捧疵熄竽笛糠吼吽呀吕韦蒙呃呆笨呇贡呉罄呋喃呎呏呔呠呡痴呣呤呦呧瑛眩扒晬淑姬瑜璇鹃呪呫哔嚅嗫呬呯呰呱呲咧噌钝呴呶呷呸呺呻哱咻啸噜吁坎坷逻呿咁咂咆哮咇咈咋蟹煦珅蔼咍咑咒诅咔哒嚓咾哝哩喱咗咠咡咢咣咥咦咨嗟询咩咪咫啮啮咭咮咱咲咳呛嗽咴啕咸咹咺呙喉咿婉恸悯赋矜绿茗蓝哂抢瞒哆嗦啰噻啾滨彗哋哌哎唷哟哏哐哞哢哤哪里哫啼喘哰哲萎蚌哳咩哽哿呗唅唆唈唉唎唏哗尧棣殇璜睿肃唔睇唕吣唞唣喳唪唬唰喏唲唳唵嘛唶唸唹唻唼唾唿啁啃鹦鹉啅埠栈榷祺铺鞅飙啊啍啎啐啓啕啖啗啜哑祈啢衔啤啥啫啱啲啵啺饥啽噶昆沁喁喂喆裙喈咙喋喌喎喑喒喓喔粗喙幛庆滋鹊喟喣喤喥喦喧骚喨喩梆吃葡萄喭驼挑吓碰枞瓣纯疱藻趟铬喵営喹喺喼喿嗀嗃嗄嗅嗈嗉嗊嗍嗐嗑嗔诟嗕嗖嗙嗛嗜痂癖嗝嗡嗤嗥嗨唢嗬嗯嗰嗲嗵叽嗷嗹嗾嗿嘀嘁嘂嘅惋嘈峪禾荫啀嘌嘏嘐嘒啯啧嘚唛嘞嘟囔嘣嘥嘦嘧嘬嘭这谑严敞馋松哓嘶嗥呒虾嘹嘻啴嘿噀噂噅噇噉噎噏噔噗噘噙噚咝噞噢噤蝉皿噩噫噭嗳噱哙噳嚏涌洒欲巫霏噷噼嚃嚄嚆抖哜尝嚔苏嚚嚜嚞嚟呖嚬嚭嚮嚯亸喾饬按竣苛嚵嘤啭冁呓膪谦囍囒囓囗囘萧酚飘溅谛囝溯眸纥銮鹘囟殉囡団囤囥囧囨囱囫囵囬囮囯囲図囶囷囸囹圄圉拟囻囿圀圂圃圊粹蠹赦圌垦圏滚鲱凿枘圕圛圜圞坯埂壤骸炕祠窑豚绅魠鲮鳖圧握圩圪垯圬圮圯炸岬幔毯祇窨菩溉圳圴圻圾坂坆沾坋坌舛壈昆垫墩椅坒坓坩埚坭坰坱坳坴坵坻坼杨挣涎帘垃垈垌垍垓垔垕垗垚垛垝垣垞垟垤垧垮垵垺垾垿埀畔埄埆埇埈埌殃隍埏埒埕埗埜垭埤埦埧埭埯埰埲埳埴埵埶绋埸培怖桩础辅埼埽堀诃侄庑堃堄摧磐贞韧砌堈堉垩堋堌堍堎垴堙堞堠礁堧堨舆堭堮蜓摘堲堳堽堿塁塄塈煤茔棵塍垲埘塓绸塕鸦沽虱塙冢塝缪塡坞埙塥塩塬塱场螨塼塽塾塿墀墁墈墉墐夯増毁墝墠墦渍钵墫墬堕墰墺墙橱壅壆壊壌壎壒榨蒜壔壕壖圹垆壜壝垅壡壬壭壱売壴壹壻壸寝壿夂夅夆変夊夌漱邑夓腕泄甥御骼夗夘夙衮瑙妊娠醣枭珊莺鹭戗幻魇夤蹀秘擂鸫姚宛闺屿庾挞拇賛蛤裨菠氅漓捞湄蚊霆鲨箐篆篷荆肆舅荔鲆巷惭骰辟邱镕镰阪漂烩鲵鲽鳄鸨胪鹏妒峨谭枰晏玑癸祝秤竺牡籁恢罡蝼蝎赐绒御梭夬夭砣榆怙枕夶夹馅奄崛葩谲奈贺祀赠奌奂奓奕䜣詝奘奜奠奡奣陶奨奁魁奫奬奰娲孩贬隶酥宄狡猾她姹嫣妁毡荼皋膻蝇嫔妄妍嫉媚娆妗趣妚妞妤碍妬娅妯娌妲妳妵妺姁姅姉姗姒姘姙姜姝姞姣姤姧姫姮娥姱姸姺姽婀娀诱慑胁娉婷娑娓娟娣娭娯娵娶娸娼婊婐婕婞婤婥溪孺婧婪婬婹婺婼婽媁媄媊媕媞媟媠媢媬媮妫媲媵媸媺媻媪眯媿嫄嫈袅嫏嫕妪嫘嫚嫜嫠嫡嫦嫩嫪毐嫫嫬嫰妩嫺娴嫽嫿妫嬃嬅嬉耍婵痴艳嬔嬖嬗嫱袅嫒嬢嬷嬦嬬嬭幼嬲嬴婶嬹嬾嬿孀娘孅娈孏曰癫屏孑孓雀孖斟篓谜摺孛矻鸠崮轲祜鸾孥邈毓棠膑孬孭孰孱孳孵泛罔衔孻孪宀宁冗拙株薇掣抚琪瓿榴谧弥宊濂祁瑕宍宏碁宓邸谳実潢町宥宧宨宬徵崎骏掖阙臊煮禽蚕宸豫寀寁寥寃檐庶寎暄碜寔寖寘寙寛寠苫寤肘洱滥蒗陕核寪弘绰螽宝擅疙瘩晷対檐専尃尅赎绌缭畴衅尌峙醌襟痲碧屁昊槌淘恵瀑牝畑莓缸羚觑蔻脏躁尔尓锐尗尙尜尟尢尥尨尪尬尭尰擒尲尶尴尸尹潽蠖蛾尻扣梢蚴鳍脬蹲屇屌蚵屐屃挪屖屘屙屛屝屡屣峦嶂岩舄屧屦屩屪屃屮戍驻钾崖嵛巅旮旯楂榄榉芋茱萸靛麓屴屹屺屼岀岊岌岍阜岑彭巩岒岝岢岚岣岧岨岫岱岵岷峁峇峋峒峓峞峠嵋峨峰峱岘峹峿崀崁崆祯崋崌崃岖昆崒崔嵬巍萤颢崚崞崟崠峥巆崤崦崧殂岽崱崳崴崶崿嵂嵇嵊泗嵌嵎嵒嵓岁嵙嵞嵡嵩嵫嵯嵴嵼嵾嵝崭崭晴嶋嶌嶒嶓嵚崂嶙嶝嶞峤嶡嶢峄嶨嶭嶮嶰嶲岙嵘巂巃巇巉岿巌巓巘巛滇芎巟巠弋回巣巤炊擘蜥蟒蛊觋巰蜀彦淖杏茂甫楞巻巽帼巿帛斐鲫蕊帑帔帗帚琉汶帟帡帣帨裙帯帰帷帹暆帏幄帮幋幌幏帻幙帮幞幠幡幢幦幨幩幪帱幭幯幰遥蹉跎馀庚鉴幵幷稚邃庀庁広庄庈庉笠庋跋庖牺庠庤庥鲸庬庱庳庴庵馨衢庹庿廃厩廆廋廌廎廏廐廑廒荫廖廛厮搏锣廞弛袤廥廧廨廪廱绵踵髓廸迫瓯邺廻廼廾廿躔弁皱弇弌弍弎弐弑吊诡憾荐弝弢弣弤弨弭弮弰弪霖繇焘斌旭溥骞弶弸弼弾彀彄别累纠强彔彖彘彟彟陌彤贻彧绘虹彪炳雕蔚鸥彰瘅彲彳彴仿彷徉徨彸彽踩敛旆徂徇徊渭畲铉裼従筌徘徙徜徕膳苏萌渐徬徭醺徯徳徴潘徻徼忀瘁胖燎怦悸颤扉犀澎湃砰恍惚绞隘忉惮挨饿忐忑忒忖応忝忞耿忡忪忭忮忱忸怩忻悠懑怏遏怔怗怚怛怞怼黍讶怫怭懦怱怲恍怵惕怸怹恁恂恇恉恌恏恒恓恔恘恚恛恝恞恟恠恣恧眄恪恫恬澹恰恿悀悁悃悄悆悊悐悒晦悚悛悜悝悤您悩悪悮悰悱凄恻德悴怅惘闷悻悾惄愫钟蒐惆惇惌惎惏惓惔惙惛耄惝疟浊恿惦德恽惴蠢惸拈愀愃愆愈愊愍愐愑愒愓愔愕恪氓蠢騃昵惬赧悫愬愮愯恺愼慁恿慅慆慇霭慉慊愠慝慥怄怂慬慱悭慴慵慷戚焚憀灼郁憃惫憋憍眺捏轼愦憔憖憙憧憬憨憪憭怃憯憷憸憹憺懃懅懆邀懊懋怿懔懐懞懠懤懥恹懫懮懰懱毖懵遁梁雍忏懽戁戄戆戉戋戕戛戝戛戠戡戢戣戤戥戦戬戭戯轰戱披菊牖戸戹戺戻卯戽锹扂楔扃扆扈扊杖牵绢铐镯赉扐搂搅烊盹瞌跟趸镲靶鼾払扗玫腮扛扞扠扡扢盔押扤扦扱罾揄绥鞍郤窾扻扼扽抃抆抈抉抌抏瞎抔缳缢擞抜拗択抨摔歉蹿牾抶抻搐泵菸拃拄拊髀抛拌脯拎拏拑擢秧沓曳挛迂拚拝拠拡拫拭拮踢拴拶拷攒拽掇芥橐簪摹疔挈瓢骥捺蹻挌挍挎挐拣挓挖掘浚挙揍聩挲挶挟挿捂捃捄捅捆捉捋胳膊揎捌捍捎躯蛛捗捘捙捜捥捩扪捭据捱捻捼捽掀掂抡臀膘掊掎掏掐笙掔掗掞棉芍掤搪阐掫掮掯揉掱掲掽掾揃揅揆搓揌诨揕揗揘揜揝揞揠揥揩揪揫橥遒麈揰揲揵揶揸背揺搆搉搊搋搌搎搔搕撼橹捣搘搠搡搢搣搤搥搦搧搨搬楦裢讪赸掏搰搲搳搴揾搷搽搾搿摀摁摂摃摎掴摒摓跤摙摛掼摞摠摦喉羯摭摮挚摰摲抠摴抟摷掺摽撂撃撅稻撊撋挦锏泼撕撙撚㧑挢撢掸撦撅撩撬撱朔揿蚍蜉挝捡擀掳闯擉缶觚擐擕擖擗擡擣擤澡腚擧擨擩擫擭摈拧撷擸撸擽擿攃摅撵攉攥攐攓撄搀撺每攩攫辔澄攮攰攲攴轶攷砭讦攽碘敁敃敇敉叙敎筏敔敕敖闰诲敜煌敧敪敳敹敺敻敿斁衽斄牒绉诌斉斎斓鹑谰驳鳢斒筲斛斝斞斠斡斢斨斫斮晾沂潟颖绛邵斲斸釳於琅斾斿旀旗旃旄涡旌旎旐旒旓旖旛旝旟旡旣浴旰獭魃旴时旻旼旽昀昃昄昇昉晰躲澈熹皎皓矾昑昕昜昝昞昡昤晖笋昦昨是昱昳昴昶昺昻晁蹇隧蔬髦晄晅晒晛晜晞晟晡晢晤晥曦晩萘莹顗晿暁暋暌暍暐暔暕煅旸暝暠暡曚暦暨暪朦胧昵暲殄冯暵暸暹暻暾曀晔昙曈曌曏曐暧曘曙曛叠昽曩骆曱甴肱曷牍禺锟曽沧耽朁朅朆杪栓夸竟粘绦朊膺朏朐朓朕朘朙瞄觐溘饔飧朠朢朣栅椆淀虱朩朮朰朱炆璋钰炽鹮朳槿朵朾朿杅杇杌陧欣钊湛漼楷瀍煜玟缨翱肇舜贽适逵杓杕杗杙荀蘅杝杞脩珓筊杰榔狍閦颦缅莞杲杳眇杴杶杸杻杼枋枌枒枓衾葄翘纾逋枙狸桠枟槁枲枳枴枵枷枸橼枹枻柁柂柃柅柈柊柎某柑橘柒柘柙柚柜柞栎柟柢柣柤柩柬柮柰柲橙柶柷柸柺査柿栃栄栒栔栘栝栟柏栩栫栭栱栲栳栴檀栵栻桀骜桁镁桄桉桋桎梏椹葚桓桔桕桜桟桫椤桭杯桯桲桴桷桹湘溟梃梊梍梐潼栀枧梜梠梡梣梧梩梱梲梳梴梵梹棁棃樱棐棑棕榈簑绷蓑枨棘棜棨棩棪棫棬棯棰棱棳棸棹椁棼碗椄苕椈椊椋椌椐椑椓椗検椤椪椰椳椴椵椷椸椽椿楀匾楅篪楋楍楎楗楘楙楛楝楟楠楢楥桢楩楪楫楬楮楯楰梅楸楹楻楽榀榃榊榎槺榕榖榘榛狉莽搒笞榠榡榤榥榦榧杩榭榰榱梿霰榼榾桤槊闩槎槑槔槖様槜槢槥椠槪槭椮槱槲槻槼槾樆樊樏樑樕樗樘樛樟樠樧樨権樲樴樵猢狲桦樻罍樾樿橁橄橆桡笥龠橕橚橛辆椭橤橧竖膈跨橾橿檩檃檇柽檍檎檑檖檗桧槚檠樯檨檫檬梼槟檴檵柠棹櫆櫌栉櫜椟櫡槠栌枥榇栊櫹棂茄櫽欀欂欃欐欑栾欙棂溴欨欬欱欵欶欷歔欸欹欻欼欿歁歃歆艎歈歊莳蝶歓歕歘歙歛歜欤歠蹦诠镶蹒跚升陟歩歮歯歰歳歴璞歺瞑歾殁夭殈殍殑殗殜殙殛殒殢殣殥殪殚僵殰殳荃殷殸殹蛟殻肴谤殴毈毉喂毎毑蕈毗毘毚茛邓毧毬毳毷毹毽毾毵牦氄氆靴氉氊氇氍氐聊氕氖気氘氙氚氛氜氝氡汹焊痉氤氲氥氦铝锌氪烃氩铵痤汪浒漉痘盂碾菖蒲蕹蛭螅氵冰氹氺氽烫氾氿渚汆汊汋汍汎汏汐汔汕褟汙汚汜蓠沼秽蔑汧汨汩汭汲汳汴堤汾沄沅沆瀣沇沈葆浸沦湎溺痼疴沌沍沏沐沔沕沘浜畹砾沚沢沬沭沮沰沱灢沴沷籽沺烹濡洄泂肛泅泆涌肓泐泑泒泓泔泖泙泚泜泝泠漩馍涛粼泞藓鳅泩泫泭泯铢泱泲洇洊泾琵琶荽蓟箔洌洎洏洑潄濯洙洚洟洢洣洧洨洩痢滔洫洮洳洴洵洸洹洺洼洿淌蜚浄浉浙赣渫浠浡浤浥淼瀚浬浭翩萍浯浰蜃淀苔蛞蝓蜇螵蛸煲鲤浃浼浽溦涂涊涐涑涒涔滂莅涘涙涪涫涬涮涴涶涷涿淄淅淆淊凄黯淓淙涟淜淝淟淠淢淤渌淦淩猥藿亵淬淮淯淰淳诣涞纺淸淹炖癯绮渇済渉渋渓渕涣渟渢滓渤澥渧渨渮渰渲渶渼湅湉湋湍湑湓湔黔湜湝浈湟湢湣湩湫湮麟湱湲湴涅満沩溍溎溏溛舐漭溠溤溧驯溮溱溲溳溵溷溻溼溽溾滁滃滉滊荥滏稽滕滘汇滝滫滮羼耷卤滹浐煎漈漊漎绎漕漖漘漙沤漜漪漾漥漦漯漰溆漶漷濞潀颍潎潏潕潗潚潝潞潠潦祉疡潲潵滗潸潺潾涠澁澂澃澉澌澍澐澒澔澙渑澣澦澧澨澫澬浍澰澴澶澼熏郁濆濇濈濉濊貊濔疣濜濠濩觞浚濮盥潍濲泺瀁滢渎渖瀌浏瀒瀔濒泸瀛潇潆瀡潴泷濑瀬弥潋瀳瀵瀹瀺瀼沣滠灉灋灒漓灖灏灞灠滦灥灨滟灪蜴灮烬獴灴灸灺炁炅鱿炗炘炙炤炫疽烙钎炯炰炱炲炴炷毁炻烀烋瘴鲳烓烔焙烜烝烳饪烺焃焄耆焌焐焓焗焜焞焠焢焮焯焱焼煁煃煆煇煊熠煍熬煐炜煕暖熏硷霾煚煝煟煠茕矸煨琐炀萁煳煺煻熀熅熇熉罴荧穹炝熘熛熜稔谙烁熤熨熯熰眶蚂颎熳熸熿燀烨燂燄盏燊燋燏燔隼燖焖燠燡灿燨燮燹燻燽燿爇爊爓爚爝爟爨蟾爯爰为爻丬爿牀牁牂牄牋窗牏牓窗釉牚腩蒡虻牠虽蛎牣牤牮牯牲牳牴牷牸牼绊牿靬犂犄犆犇犉犍犎犒荦犗犛犟犠犨犩犪犮犰狳犴犵犺狁甩狃狆狎狒獾狘狙黠狨狩狫狴狷狺狻豕狈蜘猁猇猈猊猋猓猖獗猗猘狰狞犸猞猟獕猭猱猲猳猷猸猹猺玃獀獃獉獍獏獐獒毙獙獚獜獝獞獠獢獣獧鼇蹊狯猃獬豸狝獯鬻獳犷猕猡玁菟玅玆玈珉糁禛郅玍玎玓瓅玔玕玖玗玘玞玠玡玢玤玥玦珏瑰玭玳瑁玶玷玹玼珂珇珈瑚珌馐馔珔珖珙珛珞珡珣珥珧珩珪佩珶珷珺珽琀琁陨玡琇琖琚琠琤琦琨琫琬琭琮琯琰琱琲琅琴珐珲瑀瑂瑄瑉玮瑑瑔瑗瑢瑭瑱瑲瑳瑽瑾瑿璀璨璁璅璆璈琏璊璐璘璚璝璟璠璡璥瑷璩璪璫璯璲玙璸璺璿瓀璎瓖瓘瓒瓛脐瓞瓠瓤瓧瓩瓮瓰瓱瓴瓸瓻瓼甀甁甃甄甇甋甍甎甏甑甒甓甔瓮甖甗饴蔗甙诧钜粱盎锈团甡褥産甪甬甭甮宁铠甹甽甾甿畀畁畇畈畊畋畎畓畚畛畟鄂畤畦畧荻畯畳畵畷畸畽畾疃叠疋疍疎箪疐疒疕疘疝疢疥疧疳疶疿痁痄痊痌痍痏痐痒痔痗瘢痚痠痡痣痦痩痭痯痱痳痵痻痿瘀痖瘃瘈瘉瘊瘌瘏瘐痪瘕瘖瘙瘚瘛疭瘜瘝瘗瘠瘥瘨瘭瘆瘯瘰疬瘳疠瘵瘸瘺瘘瘼癃痨痫癈癎癐癔癙癜癠疖症癞蟆癪瘿痈発踔绀蔫酵皙砬砒翎翳蔹钨镴皑鹎驹暨粤褶皀皁荚皃镈皈皌皋皒朱皕皖皘皜皝皞皤皦皨皪皫皭糙绽皴皲皻皽盅盋碗盍盚盝踞盦盩秋千盬盭眦睁瞤盯盱眙裰盵盻睐眂眅眈眊県眑眕眚眛眞眢眣眭眳眴眵眹瞓眽郛睃睅睆睊睍睎困睒睖睙睟睠睢睥睪睾睯睽睾眯瞈瞋瞍逛瞏瞕瞖眍䁖瞟瞠瞢瞫瞭瞳瞵瞷瞹瞽阇瞿眬矉矍铄矔矗矙瞩矞矟矠矣矧矬矫矰矱硪碇磙罅舫阡、矼矽礓砃砅砆砉砍砑砕砝砟砠砢砦砧砩砫砮砳艏砵砹砼硇硌硍硎硏硐硒硜硖砗磲茚钡硭硻硾碃碉碏碣碓碔碞碡碪碫碬砀碯碲砜碻礴磈磉磎硙磔磕磖磛磟磠磡磤磥蹭磪磬磴磵磹磻硗礀硚礅礌礐礚礜礞礤礧礮砻礲礵礽礿祂祄祅祆禳祊祍祏祓祔祕祗祘祛祧祫祲祻祼饵脔锢禂禇禋祦禔祎隋禖禘禚禜禝禠祃禢禤禥禨禫祢禴禸秆秈秊闱飒秋秏秕笈蘵赁秠秣秪秫秬秭秷秸稊稌稍稑稗稙稛稞稬秸稲稹稼颡稿穂穄穇穈穉穋稣贮穏穜穟秾穑穣穤穧穨穭穮穵穸窿阒窀窂窅窆窈窕窊窋窌窒窗窔窞窣窬黩蹙窑窳窴窵窭窸窗竁竃竈竑竜并竦竖篦篾笆鲛竾笉笊笎笏笐靥笓笤箓笪笫笭笮笰笱笲笳笵笸笻筀筅筇筈筎筑筘筠筤筥筦笕筒筭箸筰筱筳筴宴筸箂个箊箎箑箒箘箙箛箜篌箝箠箬镞箯箴箾篁筼筜篘篙篚篛篜篝篟篠篡篢篥篧篨篭篰篲筚篴篶篹篼箦簁簃簆簉簋簌簏簜簟簠簥簦簨簬簰簸簻籊藤籒籓籔签籚篯箨籣籥籧笾簖籫籯芾麴籵籸籹籼粁秕粋粑粔粝粛粞粢粧粨粲粳稗粻粽辟粿糅糆糈糌糍糒糔萼糗蛆蹋糢糨糬粽糯糱籴粜糸糺紃蹼鲣霉纡纨绔纫闽襻紑纰纮锭鸢鹞纴紞紟扎紩紬绂绁纻紽紾绐絁絃絅経絍绗絏缡褵絓絖絘絜绚絣螯絪絫聒絰絵绝絺絻絿綀绡綅绠绨绣綌綍綎捆綖綘継続缎绻綦綪线綮綯绾罟蝽綷縩绺绫緁绲緅緆缁绯緌緎総緑绱緖缃缄缂绵缗緤褓缌纂緪緰缑缈缏缇縁縃縄萦缙缒縏缣縕缞縚缜缟缛縠縡縢縦绦縯縰骋缧縳纤缦絷缥縻衙縿繄缫繈繊繋繐缯繖繘繙繠缋繣繨缰缲繸繻缱纁纆纇缬缵纩纑纕缵纙纚纛缾罃罆坛罋罂罎罏罖罘罛罝罠罣罥罦罨罫罭锾罳罶罹罻罽罿羂羃羇芈蕉５１鸵羑羖羌羜羝羢羣羟羧羭羮羰羱羵羶羸藜鲐翀翃翅翊翌翏翕翛翟翡翣翥翦跹翪翫翚翮翯翱翽翾翿板饕鸹锨耋耇耎耏专耒耜耔耞耡耤耨耩耪耧耰鬓耵聍聃聆聎聝聡聦聱聴聂聼阈聿肄肏肐肕腋肙肜肟肧胛肫肬肭肰肴肵肸肼胊胍胏胑胔胗胙胝胠铨胤胦胩胬胭胯胰胲胴胹胻胼胾脇脘脝脞脡脣脤脥脧脰脲脳腆腊腌臜腍腒腓胨腜腠脶腥腧腬腯踝蹬镣腴腶蠕诽膂腽嗉膇膋膔腘膗膙膟黐膣膦膫膰膴膵膷脍臃臄臇臈臌臐臑臓膘臖臙臛臝臞臧蓐诩臽臾臿舀舁鳑鲏舋舎舔舗馆舝舠舡舢舨舭舲舳舴舸舺艁艄艅艉艋艑艕艖艗艘艚艜艟艣舣艨艩舻艬艭荏艴艳艸艹艻艿芃芄芊萰陂藭芏芔芘芚蕙芟芣芤茉芧芨芩芪芮芰鲢芴芷芸荛豢芼芿苄苒苘苙苜蓿苠苡苣荬苤苎苪镑苶苹苺苻苾茀茁范蠡萣茆茇茈茌茍茖茞茠茢茥茦菰茭茯茳藨茷藘茼荁荄荅荇荈菅蜢鸮荍荑荘豆荵荸荠莆莒莔莕莘莙莚莛莜莝莦莨菪莩莪莭莰莿菀菆菉菎菏菐菑菓菔芲菘菝菡菢菣菥蓂菧菫毂蓥菶菷菹醢菺菻菼菾萅萆苌萋萏萐萑萜萩萱萴莴扁萻葇葍葎葑荭葖葙葠葥苇葧葭药葳葴葶葸葹葽蒄蒎莼茏薹莅蒟蒻蒢蒦蒨蒭藁蒯蒱鉾蒴蒹蒺蒽荪蓁蓆蓇蓊蓌蓍蓏蓓蓖蓧蓪蓫荜跣藕苁蓰蓱莼蓷蓺蓼蔀蔂蔃蔆蔇蔉蔊蔋蔌蔎蔕蔘蔙蒌蔟锷蒋雯茑蔯蔳麻蔵蔸蔾荨蒇蕋蕍荞蕐蕑芸莸蕖蕗蕝蕞蕠蕡蒉蕣蕤蕨蕳蓣蕸蕺蕻薀薁薃薅薆荟薉芗薏薐蔷薖薘剃谔钗薜薠薢薤薧薨薫薬薳薶薷薸薽薾薿藄藇藋荩藐藙藚藟藦藳藴苈藷藾蘀蘁蕲苹蘗蘘蘝蘤蘧蘩蘸蘼虀虆虍蟠虒虓虖虡虣虥虩虬虰蛵蛇虷鳟虺虼蚆蚈蚋蚓蚔蚖蚘蚜蚡蚣蚧蚨蚩蚪蚯蚰蜒蚱蚳蚶蚹蚺蚻蚿蛀蛁蛄蛅蝮蛌蛍蛐蟮蛑蛓蛔蛘蛚蛜蛡蛣蜊蛩蛱蜕螫蜅蚬蜈蝣蜋蜍蜎蜑蠊蜛饯蜞蜣蜨蜩蜮蜱蜷蜺蜾蜿蝀蝃蝋蝌蝍蝎蝏蝗蝘蝙蝝鲼蝡蝤蝥猿蝰虻蝲蝴蝻螃蠏蛳螉螋螒螓螗螘螙螚蟥螟螣螥螬螭䗖螾螀蟀蟅蝈蟊蟋蟑蟓蟛蟜蟟蟢虮蟨蟪蟭蛲蟳蛏蟷蟺蟿蠁蠂蠃虿蠋蛴蠓蚝蠗蠙蠚蠛蠜蠧蟏蠩蜂蠮蠰蠲蠵蠸蠼蠽衁衄衄衇衈衉衋衎衒同衖胡衞裳钩衭衲衵衹衺衿袈裟袗袚袟袢袪袮袲袴袷袺袼褙袽裀裉袅裋夹裍裎裒裛裯裱裲裴裾褀褂褉褊裈褎褐褒褓褔褕袆褚褡褢褦褧褪褫袅褯褰褱裆褛褽褾襁褒襆裥襉襋襌襏襚襛襜裣襞襡襢褴襦襫襬襭襮襕襶襼襽襾覂覃覅霸覉覊覌覗觇覚覜觍觎覧覩觊觏覰観觌觔觕觖觜觽觝觡酲觩觫觭觱觳觯觷觼觾觿言赅讣訇訏訑訒诂讬訧訬訳訹证訾詀詅诋毁詈詊讵詑诒诐詗诎察詨诜詶詸詹詻诙诖誂誃诔锄诓誋诳诶悖誙诮诰誧説読誯谇訚谄谆諆諌诤诹诼諕谂谀諝谝諟喧谥諴諵谌谖誊謆謇歌謍謏謑谡谥謡謦謪谪讴謷謼谩哗譅譆譈譊讹譒撰谮鑫譞噪譩谵譬譱譲谴譸譹谫讅讆詟䜩雠讐谗谶讙谠讟谽豁豉豇岂豊豋豌豏豔豞豖豗豜豝豣豦豨豭豱豳豵豶豷豺豻貅貆狸猊貔貘䝙貜貤餍贳餸贶贲赂賏赊赇赒賝赓赕賨赍斗賮賵賸赚赙赜赟贉赆赑贕赝赬赭赱赳迄趁趂趄趐趑趒趔趡趦趫趮趯趱趴趵趷趹趺趿跁跂跅跆踬跄跐跕跖跗跙跛跦跧跩跫跬跮跱跲跴跺跼跽踅踆踈踉踊踒踖踘踜踟躇蹰踠踡踣踤踥踦踧跷踫踮逾踱踊踶踹踺踼踽躞蹁蹂躏蹎蹐蹓蹔跸蹚蹜蹝迹蹠蹡蹢跶蹧蹩蹪蹯鞠蹽躃躄躅踌跻躐踯跞躘躙躗躝躠蹑躜躧躩躭躰躬躶軃軆辊軏轫軘軜軝腭転軥軨軭軱轱辘軷轵轺軽軿輀輂辇辂辁輈挽輗辄辎辋輠輤輬輭輮辏輴輵輶輹輼辗辒轇轏轑轒辚轕轖轗轘轙轝轞轹轳罪辣辞辵辶辺込辿迅迋迍麿迓迣迤逦迥迨迮迸迺迻迿逄逅逌逍逑逓迳逖逡逭逯逴逶逹遄遅侦遘遛遝遢遨遫遯遰遴绕遹遻邂邅邉邋邎邕邗邘邛邠邢邧邨邯郸邰邲邳邴邶邷邽邾邿郃郄郇郈郔郕郗郙郚郜郝郞郏郠郢郪郫郯郰郲郳郴郷郹郾郿鄀鄄郓鄇鄈鄋鄍鄎鄏鄐鄑邹邬鄕郧鄗鄘鄚鄜鄞鄠鄢鄣鄤鄦鄩鄫鄬鄮鄯鄱郐鄷鄹邝鄻鄾鄿酃酅酆酇郦酊酋酎酏酐酣酔酕醄酖酗酞酡酢酤酩酴酹酺醁醅醆醊醍醐醑醓醖醝酝醡醤醨醪醭醯醰酦醲醴醵醸醹醼醽醾釂酾酽釆釈鲈镏阊钆钇钌钯钋鼢鼹钐钏釪釬釭釱钍釸钕钫鈃钭鈆鈇钚鈊鈌钤钣鈒鈤钬钪鈬铌铈钶铛钹铍钸钿鉄鉆铊铇鉌铋鉏铂钷铆钵鉥钲鉨钼钽鉱鉲鉶铰铒鉼铪銍銎铣銕镂铫铦铑铷銤铱铟銧铥铕铯銭銰焊銶锑锉汞鋂锒鋆鋈鋊铤鋍铗鋐鋑鋕鋘鋙锊锓锔锇铓鋭铖锆锂铽鋳鋹鋺鉴镚钎錀锞锖锫锩錍铔锕錔锱铮锛錞锬锜錤錩錬録铼錼锝钔锴鍉镀鍏鍐铡鍚锻锽锸锲锘鍫鍭鍱鍴锶鍹锗针锺锿镅鎉鎋鎌鎍鎏鎒鎓鎗镉鎚鎞镃鎤铩锼鎭鎯镒镍鎴镓鎸鎹镎镟鏊镆镠镝鏖铿锵鏚镗镘镛鏠鏦錾镤鏸镪鏻鏽鏾铙鐄鐇鐏铹镦镡鐗馗镫镢镨鐡锎镄鐩镌鐬鐱镭鐶鐻鐽镱鑀鑅镔鑐鑕鑚鑛鑢鑤镥鑪镧鑯鑱鑴鑵镊镢钃镻闫闬闶闳閒闵閗閟阂関合閤哄阆閲阉閺阎阏阍阌暗闉阕阗闑闒闿闘闚阚闟闠闤闼阞阢阤阨阬阯阹阼阽陁陑陔陛陜陡陥陬骘陴険陼陾阴隃隈隒隗隞隠隣隤隩隮隰颧隳隷隹雂雈雉雊雎雑雒雗雘雚雝雟雩雰雱驿霂霅霈霊沾霒霓霙霝霢霣霤霨霩霪霫霮靁叇叆靑靓靣腼靪靮靰靳靷靸靺靼靿鞀鞃鞄鞍鞗鞙鞚鞝鞞鞡鞣鞨鞫鞬鞮鞶鞹鞾鞑韅鞯驮韍韎韔韖韘韝韫韡韣韭韭韱韹韺頀刮頄顸顼頍颀颃颁頖頞頠頫頬颅頯頲颕頼悴顋顑颙颛颜顕顚顜颟顣颥颞飐飑台飓颸飏飖颽颾颿飀飂飚飌翻飡飣饲飥饨饫飮飧飶餀餂饸饹餇餈饽哺馂餖餗餚馄馃餟餠餤餧餩餪餫糊餮糇餲饧馎糕饩馈馊馌馒饇馑馓膳饎饐饘饟馕馘馥馝馡馣骝骡馵馹駃駄駅駆駉駋驽駓驵駗骀驸駜骂骈駪駬骃駴骎駹駽駾騂騄骓騆騉騋骒骐麟騑騒験騕骛騠騢騣騤騧骧騵驺骟騺蓦骖骠骢驆驈骅驌骁驎骣驒驔驖驙驦驩驫骺鲠骫骭肮骱骴骶骷髅骾髁髂髄髆膀髇髑髌髋髙髝髞髟髡髣髧髪髫髭髯髲髳髹髺髽髾鬁鬃鬅鬈鬋鬎鬏鬐鬑鬒鬖鬗鬘鬙鬠鬣斗鬫鬬阄鬯鬰鬲鬵鬷魆魈魊魋魍魉魑魖鳔魛魟魣魦魨魬鲂魵魸鮀鲅鮆鲧鲇鲍鲋鮓鲒鲕鮟鱇鮠鮦鮨鲔鲑鮶鮸鮿鲧鯄鯆鲩鯈鲻鯕鲭鲞鯙鯠鲲鯥鲰鲶鳀鯸鳊鲗䲠鹣鳇鰋鳄鳆鰕鰛鰜鲥鰤鳏鰦鳎鳐鳁鳓鰶鲦鲡鰼鰽鱀鱄鳙鱆鳕鱎鱐鳝鳝鳜鲟鲎鱠鳣鱨鲚鱮鱲鱵鱻鲅鳦凫鳯鳲鳷鳻鴂鴃鴄鸩鴈鴎鸰鴔鴗鸳鸯鸲鹆鸱鴠鴢鸪鴥鸸鹋鴳鸻鴷鴽鵀鵁鸺鹁鵖鵙鹈鹕鹅鵟鵩鹌鵫鵵鵷鵻鹍鶂鶊鶏鶒鹙鶗鶡鶤鶦鶬鶱鹟鶵鶸鶹鹡鶿鹚鷁鷃鷄鷇䴘䴘鷊鷏鹧鷕鹥鸷鷞鷟鸶鹪鹩鷩鷫鷭鹇鹇鸴鷾䴙鸂鸇䴙鸏鸑鸒鸓鸬鹳鸜鹂鹸咸鹾麀麂麃麄麇麋麌麐麑麒麚麛麝麤麸面麫麮麯麰麺麾黁黈黉黢黒黓黕黙黝黟黥黦黧黮黰黱黪黶黹黻黼黾鼋鼂鼃鼅鼈鼍鼏鼐鼒冬鼖鼙鼚鼛鼡鼩鼱鼪鼫鼯鼷鼽齁齆齇齈齉齌赍齑龀齕齗龅齚龇齞龃龉龆齢出齧齩齮齯齰齱齵齾厐龑龒龚龖龘龝龡龢龤'

traditional_characters = '制咖片型超聲盤鑒定仔點他命書歌粉巾字帳恤手指記憶棒形轉彎溝光○〇㐄㐅㐆㐌㐖毒㐜㐡㐤㐰㐺㑇㑳㒳㒸㔾㗂㗎㝵㞎㞙㞞㠯㢲㢴㤅㥁㥯㨗㫺㬎㮎㮚㮸㲋㲱㲾㳮㵎㵪㶸㷖㷭㹢㹴犬㺢狓㺵㼝㽮㿝䍃䔢䖟䖸䗈䗥䗪䝓䠶䥯䦉䯝䰾魚䲔䳗䳘䵹鼄䶑一對應映射丁不識下兒子做二休世丘之貉並中台原則串為甚謂乾淨了百事無成八變五十些人得道雞升天代如併來去個國政策勁幽靈在歐洲遊蕩接樣蘿蔔坑側化傳價元論醇共再准刀兩斷切分耕耘收穫錢貨物向看舊就緒險刻千金動勞永逸匙零夜半卡通回復返影蹤反常態口咬氣句話同吐快吹周味呼諾嗚品紅鍋哄而散起唱和問三知生熟團漆黑火糟堆場空塊麵塌糊塗塵染壁廂夔已足多情露水大早到晚夫妻當關萬莫開失古恨套所料既往孔見提師要家主審寸陰難買鬥牛小撮部陣局展身層巴掌帆風順席地帶過年計於春頭載四季期被蛇怕井繩度願式份彈頃深前律徑心意念差愁孤行俱全房廳交遮打技長把抓死拿眼淚鼻涕鑰鎖折段抿拍即合掃排掬揮撥擁上入擊洞擲攬改故轍敗文值名斑方面旁族日秋餐隔雅里終父旦時晌會霎間晃暴寒曝更月望垠際朝夕本正經利杯羹東西板枝獨秀根筋桿進條龍服務概模次函數又性程總付步腳印趨登毛拔呵氧氮碳決雌雄波未平派謊言流清楚白準溜煙潭有獲聞是處降琴鶴甲病發可拾沙目然瞭直以相眨穿睹瞥瞬矢的解石鳥神教秉虔誠秘種窩蜂窮竅笑置筆苟勾銷抹殺煞等獎箍節吃箭仇雙鵰詩籌籮筐系列紙級士官統絲毫掛維網盡線微吭響股腦胎脈承腔臂力致效資源址器舉功投般說講規貿易葉障著慎滿皆輸號木電池衣傾鐘高低視仁覺醒覽遺角銀幣觸潰九鼎蔽抄出駟馬追重語破貧洗貫走路安蹴至幾蹶振躍役膽汗較輩輪辭贊退六連遍遞邊針血錘音錯門思閃真倒項栽霧類保護川先驚乍體鬨鱗爪鳴滴泡鄰域黨專鼓作齊炒丑烯亥克內酯冬加奴卯肝炎基尺梁街褲鎬客寵庭巳汝昌烷玲磊糖肇酉醛啷青縣韙良香骨鯛丂七集河市弦喜嘴張舌堵區工業姊妹星架構巧彩扭歪拼湊餘熱曜武州爺浮屠美鄉老階樹葷素碎落能魄鰓鰻珠丄丅丆万俟丈尚摸母娘量管群亞虎必我堂令申件裝伏位博俠義界表女墟臺戲臭皮匠勝諸葛亮賽頂倍催請運算包立叉戟離疫苗土史志演圍揭瓦曬夷姑婆帝村寶爛尖杉鹼屜桌山岔島由紀峽壩庫鎮廢從德後拗湯治旬食明昧曹朋友框欄極權冪曲歸依貓民氟硼氯磷鐵江侗自旅法司洋浦梅園溫暖灣焦班幸用田略番疊皇炮捶硝苯酸腺苷稜草鏡穗跳遠索錦綱聚氰胺聯店胚膲愛色堇紫羅蘭芝茶飯菱雲蟲藏藩亂叛蘇親債凳學座恐戀柱測肌腹衩錐係貂企烏跪叩軍車農題迭都甘油屯奏鍵短阿姨陪姐隻顧茅廬槽駕魂鮮鹿頁其菜單乘任供勢午齒漢組織吊調瀉唇坡城報墳外夸將尉建築岸崗公床揚新劍昇杭林栗校樓標款汽社浣海商館劇院鋼華港機械廣媒環球融第醫科證券綜財樂育游漲猶嶺疏癮瞼確兵領導繳肢膛船艾瑟爾蒼蔡虞傚衫覆訪訴課諭議軌述野鉤限敵鞋頜頷顎饒首齦站例修凡劃垂屆屬崽頦廚拜挫擺放旋削棋榻檻禮沉注滑營獄畫确儀聘花葬詔員跌轄週達酒錨閘陷陸雨雪飛威丌于丹久乏予理評產亢卑亦乎舞己悲矩圓詞害誌但住佞佳便俗信票案幅翁倦倫假偏倚斜虧鬼敲停備傷脾胃僅此像儉匱免宜穴焉戴兼容許凍伯仲負彼晝皂軒輊實刊划顛衛戰哥比省非好黃飾別拘束掩奶睬選擇搖擾煩苦枚寫協厭及格受歡迎約只估侵犯割狀告或缺抗拒挽撤救藥喻磨滅端倪少逆逾越避靠適吉譽吝玉含延咎歹聽啻淵善謀均勻堪忍夠太惹妙妥妨孕症孝術室完納推冠積宣疑辯慄碴稱屈撓屑干涉衡待很忙惡忿怎麼怠急恥恭息悅惑惜惟想愉愧怍慌憤啟懂懈懷材才緊招認扣抵拉捨也罷插揣冒搭撞南牆擴核支攻敢雷攀敬裡嗎需景智暇曾罪遇朽枉止況競爭辱求癒渝溶濟左右袒困補爽特寂寞示弱找謝畏強疾徐痛癢冤符眠睦瞅董何厚云措活疲羞者輕玻璃祥兆禁移稂莠穩佛換答簡結果盟絕縷途給談否羈翼耐肖脛毋寧興舒若菲萊痕跡窠臼虛衰臉兔撒鷹棺範該詳諱抬泰讓鬚眉象眾貲賬費灰賴奇慮訓輟辨菽麥辛近送透逞徒速續逮捕遂遑違遜斧鉞艱醉鏽隨觀棄顯飽脂肪使丏丐幫丒且慢末丕替桃宗王尊涼爵各圖屋脊糧署錄壇吾祿職胄襲君廈丗北壑桐疹損逢陵鷸丙寅戌氨腈唑綸辰酮脫氫酶醚丞丟現掉紗帽弄扯砲碗丠両丣坐存激肩臻蒂蓮悖序驅丨丩丫挺杈髻鬟細介俄伊犁京尼布訂普渡央委監察檢查劑圈設警隊斯督剩震境航舶革防托播促質版蠑螈鋒研藝歷殘消頻譜精密製造陲郵候埔堅壓壢凹匯執府究邦俘攝寮彬狼嶽肺腫庸英訊診埋粒胞括控碼韓暑槍樞砥澳哇牟壽甸鑽探篇簽綴縫繼耳肯照婦埃懸璧軸櫃檯辣擱淺邪跑纖阮陽私囊魔丮丰姿采丱燒丳丵丶丷丸參寨朗桂瑞砂衷霞貌鳳僕艦因嫌宰峰幹絡牌持旨祭禱簿編罰賓辦丼丿乀乂乃乄仰慕盛曠留考驗闊乆乇么醜麼乊湖燃乑乒乓乕乖僻忤戾离謬迕乗危肥劫除隙浪婿乙炔腸酰吡咯鹽乚乛乜嘢卿玄宮尾狐龜塔嶷兄弟泉章霄釘耙乞扎哀憐恕討乢乣乤乥乧乨乩童乪乫乭乳暈汁液瑤漿牙癌突竇罩腐膠豬酪蛋糕菌瘤乴乵乶乷乸乹乺乼乾俸冰嘉噦嚎坤媽屍壘旱枯涸俐渴潮澀煸豆燥爹瘦癟癬瞪袋脆薑貝隆餾乿亀亁叫咕攘扔搞男砸竄蓬麻亃亄亅卻亇遲典今臨繁累卵奉婚聰躬巨與遷添裂副宿歲怪噁尕崙愣杆硅硫鈦鈾錳芑雜異鈉砷胂磺琥珀艙棍簧胡茬盜浩盆販郎腿亍洪亐互欠助勉惠操斥諉繫戶譯亓墓碑刑鈴卅渠繽紛斗米旗憲釩燈徽瘟祖拳福穀豐臟腑綁肉醃苓蘊橋鋪霸顏鬧判噴岡底蛙陘礦亖亙亜罕們娜桑那努哈喀弗烈曼松森杜氏盃奧琛敦戊穆聖裔彙薛孫亟亡佚虜羊牢奮釋卷卸契媾感額睫纏誼趾塞擠紐阻還配馳莊亨洛祚亪享津滬畿郊慈菴枇杷膏亭閣鋥麗亳亶亹誅初責翻瘋偶傑叢稠妖拖寰居吸授慧蝸吞壯魅狗矛盾益渣患憂稀描猿夢暫涯畜禍緣沸搜引擎臣橫紜誰混援蒸獸獅稅剖亻亼亽亾什獻剎邡麽仂仃仄仆富怨仈仉畢昔晨殼紹仍仏仒仕宦仗欺恃腰嘆歎炬梓訖施仙后瓊逝仚仝仞仟悔仡佬償填泊拓撲簇羔購頓欽佩髮棻閫馭養億儆尤藉幀賑凌敘帖李柔剛沃眥睚戒訛取饗讀仨仫仮著泳臥躺韶夏裁仳仵唯賢憑釣誕仿似宋彿諷伀碩盼鵝伄儅伈伉儷柯始娃邁戈坦堡帕茨薩廟瑪莉莎藤霍姆伋伍奢胥廷芳豪伎倆侍汛勒希羲雛伐憩整謨閑閒伕伙伴頤伜伝伢叔恆茲恩翰伱伲侶伶俜悧鼬伸懶縮喇叭伹伺伻伽倻輻伾佀佃佇佈喬妮墨佉盧佌貸劣廉昂檔濃矮傘窪緩耗胸谷迷擋率齲宅沫舍療佐貳佑佔優據鏵嘗呢須魯曉佗佘余坪寺瓜銃僧蒙芒陀龕哼嘔坊姦孽弊揖祟繭縛誓賊佝僂瞀佟你奪趕佡佢佣佤佧賈佪佫佯佰佱潔績釀餚佴捲佶佷佸佹佺佻佼佽佾具喚窘壞娛怒慨硬習慣聾膨脹蔓駭貴痺侀侁侂侃侄侅鴻燕侇侈糜靡侉侌妾侏儒倉鼠侐侑侔侖侘侚鏈侜偎傍鈷循柳葫蘆附価侮罵蔑侯岩截蝕侷貼壺嬛宴捷攜桶箋酌俁狹膝狄俅俉俊俏俎俑俓俔諺俚俛黎健呈固墒增守康箱濕祐鏢鑣槓盒靖膜齡俞豹獵噪孚封札筒託衍鴿剪撰稿煉廠禊練繕葺俯瞰撐衝俲俳俴俵俶俷俺俻俾倀倂倅儲卒惶敷猝逃頡蓄崇隱倌倏忽刺蠟燭噍嚼坍扁抽斃蔥楣灌灶糞背藪賣賠閉霉騰倓倔倖倘倜儻倝借箸挹澆閱倡狂倢倣値倥傯倨傲倩匡嗣沖柝珍倬倭寇猩倮倶倷倹勤讚偁偃充偽吏嗓寐惺扮拱芫茜藉虢鈔偈偉晶偌宕距析濾殿疼癱註頗偓偕鴨歇滯偝偟偢忘怡旺偨偩偪偫偭偯偰偱偲偵緝蹄偷減惰漏窺竊偸偺迹傀儡傅傈僳傌籬傎奎琳迪叟芭傒傔傕傖悉荒傜傞傢傣芽逼傭婢傮睨寄檄誦謠頌傴擔辜弓慘蒿悼疤傺傻屄臆巢洩篋羨蓋軋頹傿儸僄僇僉僊働僎僑僔僖僚僝僞僣僤僥僦猴僨僩僬僭僮僯僰僱僵殖籤靜僾僿征隴儁儂儃儇儈朴薄儊儋儌儍儐儓儔儕儗儘儜儞儤儦儩汰哉寡渥裕酷儭儱罐儳儵儹儺儼儽兀臬臲鷲允勛勳宙宵帥憝彞諧嫂鬩暢沛溢盈飢赫兇悍狠猛頑愚妣斬秦遣鞭耀敏榮槃澤爆碟磁禿纜輝霽鹵朵婁孜烽醬勃汀箕裘鉗耶懞蕾徹兌軟遭黜兎児韻媳爸兕觥兗兙兛兜售鍪肚兝兞兟兡兢兣樽殮涅睡稟籍贅泌啡肽奸幕涵澇熵疚眷稃襯訌赴煥椒殲植跏沒試誤猜棲窗肋袖頰兪卦撇鬍岐廓轎疸楓茴瓏廁秩募勺噸寓斤曆畝迫筷釐最淫螺韜兮寬匪篩襄贏軛複兲詐刃堰戎痞蟻餉它冀鑄冂冃円冇冉冊嫁厲礪竭醮冏牧冑冓冔冕冖冗冘冞冢窄抑誣冥冫烘菇蟄冷凝坨橇淇淋炭餅磚磧窖醋雕雹霜冱冶爐艷嘲峻灘淡漠煖颼飲冼冽凃凄愴梗凅凇凈凊凋敝濛凔凜遵汞脢凞几凢処凰凱凵凶焰凸摺刷紋預喪嘍奔巡榜殯芙蓉租籠輯鞘萃凼鋸鑊刁蠻刂娩崩批拆攤掰櫱驟歧顆秒袂贓勿囑忌磋琢膚刈羽刎訟戮舂槳艇刓刖霹靂刜創犢刡恙墅幟筵緻刦刧刨昏默攸尿慾薰潤薰圭刪刮痧鏟刱刲刳刴刵踏磅戳柏槐繡芹莧蝟舟銘鵠鶩刼剁剃辮剄剉履鉛剋剌姻咽哨廊掠桅沿召瞻翅趙卜渺茫郭剒剔剕瀝剚愎毅訥纔剜剝啄採剞剟剡剣剤綵剮腎駛黏剰袍剴紊剷剸剺剽剿劁劂劄劈啪柴扳啦劉奭姥夼昫涓熙禪禹錫翔雁鶚劊劌弩柄蜻蛉劒劓劖劘劙瀾簣賞磯釜晉甜薪逐劦熔紂虐赤囚劬劭労劵効劻劼劾峭艮勅勇勵勍勐臘脖龐漫飼盪粥輒勖勗勘驕餒碌泮雇捐竹騎殊阱勣樸懇謹勦勧勩勯勰勱勲勷勸懲慰誡諫勹芡踐闌匁庇拯粟紮袱裹餃匆遽匈匉匊匋匍匐莖匏匕妝痰膿蛹齋苑烤蹈塘羌熊閥螳螂疆碚竿緯荷茵邙魏匚匜匝匟扶稷匣匭攏匸匹耦匽匾匿卂叮瘡禧軫堤棚迢鈞鍊卄卆遐卉瓷盲瓶噹胱腱裸卋卌卍卐怯污賤鄙齷齪陋卓溪唐梯漁陳棗泥漳潯澗梨芬譙贍轅迦鄭単驢弈洽鰲卛占筮卝卞卟吩啉屎翠厄卣卨卪卬卮榫襖璽綬鈕蚤懼殆篤聳卲帘帙繞卹卼卽厂厎厓厔厖厗奚厘厙厜厝諒厠厤厥厪膩孢厮厰厳厴厹厺粕垢蕪菁厼厾叁悟茸薯叄吵笄悌哺譏坫壟弧芯杠潛嬰芻袁詰貪諜煽饋駁収岳締災賄騙叚叡吻攔蘑蜜訣燧玩硯箏椎藺銅逗驪另覓叨嘮謁杵姓喊嚷囂咚嚀塑尋惱憎擦祇泣滲蝠叱吒咄咤喝籀黛舵舷叵叶鐸懿昭穰苴遼叻叼吁塹嫖賭瞧爬衆抒吅吆夥巹橡滌抱縱摩郡唁墜扇籃膀襪頸吋愾諮酬哭妓媛暗錶韁邇妃羿絮蕃渾拐葵暮隅吔吖啶嗪戚吜嗇噬嚥吟哦詠吠吧唧嗒咐吪雋咀徵燐苞茹鈣哧吮吰吱嘎吲哚吳棟嬌窟孟簫忠晗淞闔閭趼宇吶睛噓拂捧疵熄竽笛糠吼吽呀呂韋矇呃呆笨呇貢呉罄呋喃呎呏呔呠呡癡呣呤呦呧瑛眩扒晬淑姬瑜璇鵑呪呫嗶嚅囁呬呯呰呱呲咧噌鈍呴呶呷呸呺呻哱咻嘯嚕籲坎坷邏呿咁咂咆哮咇咈咋蟹煦珅藹咍咑咒詛咔噠嚓咾噥哩喱咗咠咡咢咣咥咦咨嗟詢咩咪咫嚙齧咭咮咱咲咳嗆嗽咴咷咸咹咺咼喉咿婉慟憫賦矜綠茗藍哂搶瞞哆嗦囉噻啾濱彗哋哌哎唷喲哏哐哞哢哤哪裏哫啼喘哰哲萎蚌哳哶哽哿唄唅唆唈唉唎唏嘩堯棣殤璜睿肅唔睇唕唚唞唣喳唪唬唰喏唲唳唵嘛唶唸唹唻唼唾唿啁啃鸚鵡啅埠棧榷祺舖鞅飆啊啍啎啐啓啕啖啗啜啞祈啢啣啤啥啫啱啲啵啺饑啽噶崑沁喁喂喆裙喈嚨喋喌喎喑喒喓喔粗喙幛慶滋鵲喟喣喤喥喦喧騷喨喩梆喫葡萄喭駝挑嚇碰樅瓣純皰藻趟鉻喵営喹喺喼喿嗀嗃嗄嗅嗈嗉嗊嗍嗐嗑嗔詬嗕嗖嗙嗛嗜痂癖嗝嗡嗤嗥嗨嗩嗬嗯嗰嗲嗵嘰嗷嗹嗾嗿嘀嘁嘂嘅惋嘈峪禾蔭嘊嘌嘏嘐嘒嘓嘖嘚嘜嘞嘟囔嘣嘥嘦嘧嘬嘭這謔嚴敞饞鬆嘵嘶嘷嘸蝦嘹嘻嘽嘿噀噂噅噇噉噎噏噔噗噘噙噚噝噞噢噤蟬皿噩噫噭噯噱噲噳嚏涌灑欲巫霏噷噼嚃嚄嚆抖嚌嚐嚔囌嚚嚜嚞嚟嚦嚬嚭嚮嚯嚲嚳飭按竣苛嚵嚶囀囅囈膪謙囍囒囓囗囘蕭酚飄濺諦囝溯眸紇鑾鶻囟殉囡団囤囥囧囨囪囫圇囬囮囯囲図囶囷囸囹圄圉擬囻囿圀圂圃圊粹蠹赦圌墾圏滾鯡鑿枘圕圛圜圞坯埂壤骸炕祠窯豚紳魠鯪鱉圧握圩圪垯圬圮圯炸岬幔毯祇窨菩溉圳圴圻圾坂坆沾坋坌舛壈昆墊墩椅坒坓坩堝坭坰坱坳坴坵坻坼楊掙涎簾垃垈垌垍垓垔垕垗垚垛垝垣垞垟垤垧垮垵垺垾垿埀畔埄埆埇埈埌殃隍埏埒埕埗埜埡埤埦埧埭埯埰埲埳埴埵埶紼埸培怖樁礎輔埼埽堀訶姪廡堃堄摧磐貞韌砌堈堉堊堋堌堍堎堖堙堞堠礁堧堨輿堭堮蜓摘堲堳堽堿塁塄塈煤塋棵塍塏塒塓綢塕鴉沽虱塙塚塝繆塡塢塤塥塩塬塱塲蟎塼塽塾塿墀墁墈墉墐夯増毀墝墠墦漬缽墫墬墮墰墺墻櫥壅壆壊壌壎壒榨蒜壔壕壖壙壚壜壝壠壡壬壭壱売壴壹壻壼寢壿夂夅夆変夊夌漱邑夓腕泄甥禦骼夗夘夙袞瑙妊娠醣梟珊鶯鷺戧幻魘夤蹀祕擂鶇姚宛閨嶼庾撻拇賛蛤裨菠氅漓撈湄蚊霆鯊箐篆篷荊肆舅荔鮃巷慚骰辟邱鎔鐮阪漂燴鯢鰈鱷鴇臚鵬妒峨譚枰晏璣癸祝秤竺牡籟恢罡螻蠍賜絨御梭夬夭砣榆怙枕夶夾餡奄崛葩譎奈賀祀贈奌奐奓奕訢詝奘奜奠奡奣陶奨奩魁奫奬奰媧孩貶隸酥宄狡猾她奼嫣妁氈荼皋膻蠅嬪妄妍嫉媚嬈妗趣妚妞妤礙妬婭妯娌妲妳妵妺姁姅姉姍姒姘姙姜姝姞姣姤姧姫姮娥姱姸姺姽婀娀誘懾脅娉婷娑娓娟娣娭娯娵娶娸娼婊婐婕婞婤婥谿孺婧婪婬婹婺婼婽媁媄媊媕媞媟媠媢媬媮媯媲媵媸媺媻媼眯媿嫄嫈嫋嫏嫕嫗嫘嫚嫜嫠嫡嫦嫩嫪毐嫫嫬嫰嫵嫺嫻嫽嫿嬀嬃嬅嬉耍嬋痴豔嬔嬖嬗嬙嬝嬡嬢嬤嬦嬬嬭幼嬲嬴嬸嬹嬾嬿孀孃孅孌孏曰癲屏孑孓雀孖斟簍謎摺孛矻鳩崮軻祜鸞孥邈毓棠臏孬孭孰孱孳孵泛罔銜孻孿宀宁宂拙株薇掣撫琪瓿榴謐彌宊濂祁瑕宍宏碁宓邸讞実潢町宥宧宨宬徵崎駿掖闕臊煮禽蠶宸豫寀寁寥寃簷庶寎暄磣寔寖寘寙寛寠苫寤肘洱濫蒗陝覈寪弘綽螽寳擅疙瘩晷対檐専尃尅贖絀繚疇釁尌峙醌襟痲碧屁昊槌淘恵瀑牝畑莓缸羚覷蔻髒躁尒尓銳尗尙尜尟尢尥尨尪尬尭尰擒尲尶尷尸尹潽蠖蛾尻釦梢蚴鰭脬蹲屇屌蚵屐屓挪屖屘屙屛屝屢屣巒嶂巖舄屧屨屩屪屭屮戍駐鉀崖嵛巔旮旯楂欖櫸芋茱萸靛麓屴屹屺屼岀岊岌岍阜岑彭鞏岒岝岢嵐岣岧岨岫岱岵岷峁峇峋峒峓峞峠嵋峩峯峱峴峹峿崀崁崆禎崋崌崍嶇崐崒崔嵬巍螢顥崚崞崟崠崢巆崤崦崧殂崬崱崳崴崶崿嵂嵇嵊泗嵌嵎嵒嵓嵗嵙嵞嵡嵩嵫嵯嵴嵼嵾嶁嶃嶄晴嶋嶌嶒嶓嶔嶗嶙嶝嶞嶠嶡嶢嶧嶨嶭嶮嶰嶲嶴嶸巂巃巇巉巋巌巓巘巛滇芎巟巠弋迴巣巤炊擘蜥蟒蠱覡巰蜀彥淖杏茂甫楞巻巽幗巿帛斐鯽蕊帑帔帗帚琉汶帟帡帣帨帬帯帰帷帹暆幃幄幇幋幌幏幘幙幚幞幠幡幢幦幨幩幪幬幭幯幰遙蹉跎餘庚鑑幵幷稚邃庀庁広庄庈庉笠庋跋庖犧庠庤庥鯨庬庱庳庴庵馨衢庹庿廃廄廆廋廌廎廏廐廑廒廕廖廛廝搏鑼廞弛袤廥廧廨廩廱綿踵髓廸廹甌鄴廻廼廾廿躔弁皺弇弌弍弎弐弒弔詭憾薦弝弢弣弤弨弭弮弰弳霖繇燾斌旭溥騫弶弸弼弾彀彄彆纍糾彊彔彖彘彟彠陌彤貽彧繪虹彪炳彫蔚鷗彰癉彲彳彴彷彷徉徨彸彽踩斂旆徂徇徊渭畬鉉裼従筌徘徙徜徠膳甦萌漸徬徭醺徯徳徴潘徻徼忀瘁胖燎怦悸顫扉犀澎湃砰恍惚絞隘忉憚挨餓忐忑忒忖応忝忞耿忡忪忭忮忱忸怩忻悠懣怏遏怔怗怚怛怞懟黍訝怫怭懦怱怲怳怵惕怸怹恁恂恇恉恌恏恒恓恔恘恚恛恝恞恟恠恣恧眄恪恫恬澹恰恿悀悁悃悄悆悊悐悒晦悚悛悜悝悤您悩悪悮悰悱悽惻悳悴悵惘悶悻悾惄愫鍾蒐惆惇惌惎惏惓惔惙惛耄惝瘧濁惥惦惪惲惴惷惸拈愀愃愆愈愊愍愐愑愒愓愔愕愙氓蠢騃昵愜赧愨愬愮愯愷愼慁慂慅慆慇靄慉慊慍慝慥慪慫慬慱慳慴慵慷慼焚憀灼鬱憃憊憋憍眺捏軾憒憔憖憙憧憬憨憪憭憮憯憷憸憹憺懃懅懆邀懊懋懌懍懐懞懠懤懥懨懫懮懰懱毖懵遁樑雍懺懽戁戄戇戉戔戕戛戝戞戠戡戢戣戤戥戦戩戭戯轟戱披菊牖戸戹戺戻戼戽鍬扂楔扃扆扈扊杖牽絹銬鐲賚扐摟攪烊盹瞌跟躉鑔靶鼾払扗玫腮扛扞扠扡扢盔押扤扦扱罾揄綏鞍郤窾扻扼扽抃抆抈抉抌抏瞎抔繯縊擻抜抝択抨摔歉躥牾抶抻搐泵菸拃拄拊髀拋拌脯拎拏拑擢秧沓曳攣迂拚拝拠拡拫拭拮踢拴拶拷攢拽掇芥橐簪摹疔挈瓢驥捺蹻挌挍挎挐揀挓挖掘浚挙揍聵挲挶挾挿捂捃捄捅捆捉捋胳膊揎捌捍捎軀蛛捗捘捙捜捥捩捫捭据捱捻捼捽掀掂掄臀膘掊掎掏掐笙掔掗掞棉芍掤搪闡掫掮掯揉掱掲掽掾揃揅揆搓揌諢揕揗揘揜揝揞揠揥揩揪揫櫫遒麈揰揲揵揶揸揹揺搆搉搊搋搌搎搔搕撼櫓搗搘搠搡搢搣搤搥搦搧搨搬楦褳訕赸搯搰搲搳搴搵搷搽搾搿摀摁摂摃摎摑摒摓跤摙摛摜摞摠摦睺羯摭摮摯摰摲摳摴摶摷摻摽撂撃撅稻撊撋撏鐧潑撕撙撚撝撟撢撣撦撧撩撬撱朔撳蚍蜉撾撿擀擄闖擉缶觚擐擕擖擗擡擣擤澡腚擧擨擩擫擭擯擰擷擸擼擽擿攃攄攆攉攥攐攓攖攙攛每攩攫轡澄攮攰攲攴軼攷砭訐攽碘敁敃敇敉敍敎筏敔敕敖閏誨敜煌敧敪敱敹敺敻敿斁衽斄牒縐謅斉斎斕鶉讕駮鱧斒筲斛斝斞斠斡斢斨斫斮晾沂潟穎絳邵斲斸釳於琅斾斿旀旂旃旄渦旌旎旐旒旓旖旛旝旟旡旣浴旰獺魃旴旹旻旼旽昀昃昄昇昉晰躲澈熹皎皓礬昑昕昜昝昞昡昤暉筍昦昨昰昱昳昴昶昺昻晁蹇隧蔬髦晄晅晒晛晜晞晟晡晢晤晥曦晩萘瑩顗晿暁暋暌暍暐暔暕煅暘暝暠暡曚暦暨暪朦朧暱暲殄馮暵暸暹暻暾曀曄曇曈曌曏曐曖曘曙曛曡曨曩駱曱甴肱曷牘禺錕曽滄耽朁朅朆杪栓誇竟粘絛朊膺朏朐朓朕朘朙瞄覲溘饔飧朠朢朣柵椆澱蝨朩朮朰朱炆璋鈺熾鹮朳槿朶朾朿杅杇杌隉欣釗湛漼楷瀍煜玟纓翱肈舜贄适逵杓杕杗杙荀蘅杝杞脩珓筊杰榔狍閦顰緬莞杲杳眇杴杶杸杻杼枋枌枒枓衾葄翹紓逋枙狸椏枟槁枲枳枴枵枷枸櫞枹枻柁柂柃柅柈柊柎某柑橘柒柘柙柚柜柞櫟柟柢柣柤柩柬柮柰柲橙柶柷柸柺査柿栃栄栒栔栘栝栟栢栩栫栭栱栲栳栴檀栵栻桀驁桁鎂桄桉桋桎梏椹葚桓桔桕桜桟桫欏桭桮桯桲桴桷桹湘溟梃梊梍梐潼梔梘梜梠梡梣梧梩梱梲梳梴梵梹棁棃櫻棐棑棕櫚簑繃蓑棖棘棜棨棩棪棫棬棯棰棱棳棸棹槨棼椀椄苕椈椊椋椌椐椑椓椗検椤椪椰椳椴椵椷椸椽椿楀楄楅篪楋楍楎楗楘楙楛楝楟楠楢楥楨楩楪楫楬楮楯楰楳楸楹楻楽榀榃榊榎槺榕榖榘榛狉莽榜笞榠榡榤榥榦榧榪榭榰榱槤霰榼榾榿槊閂槎槑槔槖様槜槢槥槧槪槭槮槱槲槻槼槾樆樊樏樑樕樗樘樛樟樠樧樨権樲樴樵猢猻樺樻罍樾樿橁橄橆橈笥龠橕橚橛輛橢橤橧豎膈跨橾橿檁檃檇檉檍檎檑檖檗檜檟檠檣檨檫檬檮檳檴檵檸櫂櫆櫌櫛櫜櫝櫡櫧櫨櫪櫬櫳櫹櫺茄櫽欀欂欃欐欑欒欙欞溴欨欬欱欵欶欷歔欸欹欻欼欿歁歃歆艎歈歊蒔蝶歓歕歘歙歛歜歟歠蹦詮鑲蹣跚陞陟歩歮歯歰歳歴璞歺瞑歾歿殀殈殍殑殗殜殙殛殞殢殣殥殪殫殭殰殳荃殷殸殹蛟殻殽謗毆毈毉餵毎毑蕈毗毘毚茛鄧毧毬毳毷毹毽毾毿氂氄氆靴氉氊氌氍氐聊氕氖気氘氙氚氛氜氝氡洶焊痙氤氳氥氦鋁鋅氪烴氬銨痤汪滸漉痘盂碾菖蒲蕹蛭螅氵氷氹氺氽燙氾氿渚汆汊汋汍汎汏汐汔汕褟汙汚汜蘺沼穢衊汧汨汩汭汲汳汴隄汾沄沅沆瀣沇沈葆浸淪湎溺痼痾沌沍沏沐沔沕沘浜畹礫沚沢沬沭沮沰沱灢沴沷籽沺烹濡洄泂肛泅泆湧肓泐泑泒泓泔泖泙泚泜泝泠漩饃濤粼濘蘚鰍泩泫泭泯銖泱泲洇洊涇琵琶荽薊箔洌洎洏洑潄濯洙洚洟洢洣洧洨洩痢滔洫洮洳洴洵洸洹洺洼洿淌蜚浄浉浙贛渫浠浡浤浥淼瀚浬浭翩萍浯浰蜃淀苔蛞蝓蜇螵蛸煲鯉浹浼浽溦涂涊涐涑涒涔滂涖涘涙涪涫涬涮涴涶涷涿淄淅淆淊淒黯淓淙漣淜淝淟淠淢淤淥淦淩猥藿褻淬淮淯淰淳詣淶紡淸淹燉癯綺渇済渉渋渓渕渙渟渢滓渤澥渧渨渮渰渲渶渼湅湉湋湍湑湓湔黔湜湝湞湟湢湣湩湫湮麟湱湲湴湼満溈溍溎溏溛舐漭溠溤溧馴溮溱溲溳溵溷溻溼溽溾滁滃滉滊滎滏稽滕滘滙滝滫滮羼耷滷滹滻煎漈漊漎繹漕漖漘漙漚漜漪漾漥漦漯漰漵漶漷濞潀潁潎潏潕潗潚潝潞潠潦祉瘍潲潵潷潸潺潾潿澁澂澃澉澌澍澐澒澔澙澠澣澦澧澨澫澬澮澰澴澶澼熏郁濆濇濈濉濊貊濔疣濜濠濩觴濬濮盥濰濲濼瀁瀅瀆瀋瀌瀏瀒瀔瀕瀘瀛瀟瀠瀡瀦瀧瀨瀬瀰瀲瀳瀵瀹瀺瀼灃灄灉灋灒灕灖灝灞灠灤灥灨灩灪蜴灮燼獴灴灸灺炁炅魷炗炘炙炤炫疽烙釺炯炰炱炲炴炷燬炻烀烋瘴鯧烓烔焙烜烝烳飪烺焃焄耆焌焐焓焗焜焞焠焢焮焯焱焼煁煃煆煇煊熠煍熬煐煒煕煗燻礆霾煚煝煟煠煢矸煨瑣煬萁煳煺煻熀熅熇熉羆熒穹熗熘熛熜稔諳爍熤熨熯熰眶螞熲熳熸熿燀燁燂燄盞燊燋燏燔隼燖燜燠燡燦燨燮燹燻燽燿爇爊爓爚爝爟爨蟾爯爰爲爻爿爿牀牁牂牄牋牎牏牓牕釉牚腩蒡虻牠雖蠣牣牤牮牯牲牳牴牷牸牼絆牿靬犂犄犆犇犉犍犎犒犖犗犛犟犠犨犩犪犮犰狳犴犵犺狁甩狃狆狎狒獾狘狙黠狨狩狫狴狷狺狻豕狽蜘猁猇猈猊猋猓猖獗猗猘猙獰獁猞猟獕猭猱猲猳猷猸猹猺玃獀獃獉獍獏獐獒獘獙獚獜獝獞獠獢獣獧鼇蹊獪獫獬豸獮獯鬻獳獷獼玀玁菟玅玆玈珉糝禛郅玍玎玓瓅玔玕玖玗玘玞玠玡玢玤玥玦玨瑰玭玳瑁玶玷玹玼珂珇珈瑚珌饈饌珔珖珙珛珞珡珣珥珧珩珪珮珶珷珺珽琀琁隕琊琇琖琚琠琤琦琨琫琬琭琮琯琰琱琲瑯琹琺琿瑀瑂瑄瑉瑋瑑瑔瑗瑢瑭瑱瑲瑳瑽瑾瑿璀璨璁璅璆璈璉璊璐璘璚璝璟璠璡璥璦璩璪璫璯璲璵璸璺璿瓀瓔瓖瓘瓚瓛臍瓞瓠瓤瓧瓩瓮瓰瓱瓴瓸瓻瓼甀甁甃甄甇甋甍甎甏甑甒甓甔甕甖甗飴蔗甙詫鉅粱盎銹糰甡褥産甪甬甭甮甯鎧甹甽甾甿畀畁畇畈畊畋畎畓畚畛畟鄂畤畦畧荻畯畳畵畷畸畽畾疃疉疋疍疎簞疐疒疕疘疝疢疥疧疳疶疿痁痄痊痌痍痏痐痒痔痗瘢痚痠痡痣痦痩痭痯痱痳痵痻痿瘀瘂瘃瘈瘉瘊瘌瘏瘐瘓瘕瘖瘙瘚瘛瘲瘜瘝瘞瘠瘥瘨瘭瘮瘯瘰癧瘳癘瘵瘸瘺瘻瘼癃癆癇癈癎癐癔癙癜癠癤癥癩蟆癪癭癰発踔紺蔫酵皙砬砒翎翳蘞鎢鑞皚鵯駒鱀粵褶皀皁莢皃鎛皈皌皐皒硃皕皖皘皜皝皞皤皦皨皪皫皭糙綻皴皸皻皽盅盋盌盍盚盝踞盦盩鞦韆盬盭眦睜瞤盯盱眙裰盵盻睞眂眅眈眊県眑眕眚眛眞眢眣眭眳眴眵眹瞓眽郛睃睅睆睊睍睎睏睒睖睙睟睠睢睥睪睪睯睽睾瞇瞈瞋瞍逛瞏瞕瞖瞘瞜瞟瞠瞢瞫瞭瞳瞵瞷瞹瞽闍瞿矓矉矍鑠矔矗矙矚矞矟矠矣矧矬矯矰矱硪碇磙罅舫阡、矼矽礓砃砅砆砉砍砑砕砝砟砠砢砦砧砩砫砮砳艏砵砹砼硇硌硍硎硏硐硒硜硤硨磲茚鋇硭硻硾碃碉碏碣碓碔碞碡碪碫碬碭碯碲碸碻礡磈磉磎磑磔磕磖磛磟磠磡磤磥蹭磪磬磴磵磹磻磽礀礄礅礌礐礚礜礞礤礧礮礱礲礵礽礿祂祄祅祆禳祊祍祏祓祔祕祗祘祛祧祫祲祻祼餌臠錮禂禇禋禑禔禕隋禖禘禚禜禝禠禡禢禤禥禨禫禰禴禸稈秈秊闈颯秌秏秕笈蘵賃秠秣秪秫秬秭秷秸稊稌稍稑稗稙稛稞稬稭稲稹稼顙稾穂穄穇穈穉穋穌貯穏穜穟穠穡穣穤穧穨穭穮穵穸窿闃窀窂窅窆窈窕窊窋窌窒窓窔窞窣窬黷蹙窰窳窴窵窶窸窻竁竃竈竑竜竝竦竪篦篾笆鮫竾笉笊笎笏笐靨笓笤籙笪笫笭笮笰笱笲笳笵笸笻筀筅筇筈筎筑筘筠筤筥筦筧筩筭筯筰筱筳筴讌筸箂箇箊箎箑箒箘箙箛箜篌箝箠箬鏃箯箴箾篁篔簹篘篙篚篛篜篝篟篠篡篢篥篧篨篭篰篲篳篴篶篹篼簀簁簃簆簉簋簌簏簜簟簠簥簦簨簬簰簸簻籊籐籒籓籔籖籚籛籜籣籥籧籩籪籫籯芾麴籵籸籹籼粁粃粋粑粔糲粛粞粢粧粨粲粳粺粻粽闢粿糅糆糈糌糍糒糔萼糗蛆蹋糢糨糬糭糯糱糴糶糸糺紃蹼鰹黴紆紈絝紉閩襻紑紕紘錠鳶鷂紝紞紟紥紩紬紱紲紵紽紾紿絁絃絅経絍絎絏縭褵絓絖絘絜絢絣螯絪絫聒絰絵絶絺絻絿綀綃綅綆綈綉綌綍綎綑綖綘継続緞綣綦綪綫綮綯綰罟蝽綷縩綹綾緁緄緅緆緇緋緌緎総緑緔緖緗緘緙緜緡緤緥緦纂緪緰緱緲緶緹縁縃縄縈縉縋縏縑縕縗縚縝縞縟縠縡縢縦縧縯縰騁縲縳縴縵縶縹縻衙縿繄繅繈繊繋繐繒繖繘繙繠繢繣繨繮繰繸繻繾纁纆纇纈纉纊纑纕纘纙纚纛缾罃罆罈罋罌罎罏罖罘罛罝罠罣罥罦罨罫罭鍰罳罶罹罻罽罿羂羃羇羋蕉５１鴕羑羖羗羜羝羢羣羥羧羭羮羰羱羵羶羸藜鮐翀翃翄翊翌翏翕翛翟翡翣翥翦躚翪翫翬翮翯翺翽翾翿闆饕鴰鍁耋耇耎耏耑耒耜耔耞耡耤耨耩耪耬耰鬢耵聹聃聆聎聝聡聦聱聴聶聼閾聿肄肏肐肕腋肙肜肟肧胛肫肬肭肰肴肵肸肼胊胍胏胑胔胗胙胝胠銓胤胦胩胬胭胯胰胲胴胹胻胼胾脇脘脝脞脡脣脤脥脧脰脲脳腆腊腌臢腍腒腓腖腜腠腡腥腧腬腯踝蹬鐐腴腶蠕誹膂膃膆膇膋膔膕膗膙膟黐膣膦膫膰膴膵膷膾臃臄臇臈臌臐臑臓臕臖臙臛臝臞臧蓐詡臽臾臿舀舁鰟鮍舋舎舔舗舘舝舠舡舢舨舭舲舳舴舸舺艁艄艅艉艋艑艕艖艗艘艚艜艟艣艤艨艩艫艬艭荏艴艶艸艹艻艿芃芄芊萰陂藭芏芔芘芚蕙芟芣芤茉芧芨芩芪芮芰鰱芴芷芸蕘豢芼芿苄苒苘苙苜蓿苠苡苣蕒苤苧苪鎊苶苹苺苻苾茀茁范蠡萣茆茇茈茌茍茖茞茠茢茥茦菰茭茯茳藨茷藘茼荁荄荅荇荈菅蜢鴞荍荑荘荳荵荸薺莆莒莔莕莘莙莚莛莜莝莦莨菪莩莪莭莰莿菀菆菉菎菏菐菑菓菔菕菘菝菡菢菣菥蓂菧菫轂鎣菶菷菹醢菺菻菼菾萅萆萇萋萏萐萑萜萩萱萴萵萹萻葇葍葎葑葒葖葙葠葥葦葧葭葯葳葴葶葸葹葽蒄蒎蒓蘢薹蒞蒟蒻蒢蒦蒨蒭藁蒯蒱鉾蒴蒹蒺蒽蓀蓁蓆蓇蓊蓌蓍蓏蓓蓖蓧蓪蓫蓽跣藕蓯蓰蓱蓴蓷蓺蓼蔀蔂蔃蔆蔇蔉蔊蔋蔌蔎蔕蔘蔙蔞蔟鍔蔣雯蔦蔯蔳蔴蔵蔸蔾蕁蕆蕋蕍蕎蕐蕑蕓蕕蕖蕗蕝蕞蕠蕡蕢蕣蕤蕨蕳蕷蕸蕺蕻薀薁薃薅薆薈薉薌薏薐薔薖薘薙諤釵薜薠薢薤薧薨薫薬薳薶薷薸薽薾薿藄藇藋藎藐藙藚藟藦藳藴藶藷藾蘀蘁蘄蘋蘗蘘蘝蘤蘧蘩蘸蘼虀虆虍蟠虒虓虖虡虣虥虩虯虰蛵虵虷鱒虺虼蚆蚈蚋蚓蚔蚖蚘蚜蚡蚣蚧蚨蚩蚪蚯蚰蜒蚱蚳蚶蚹蚺蚻蚿蛀蛁蛄蛅蝮蛌蛍蛐蟮蛑蛓蛔蛘蛚蛜蛡蛣蜊蛩蛺蛻螫蜅蜆蜈蝣蜋蜍蜎蜑蠊蜛餞蜞蜣蜨蜩蜮蜱蜷蜺蜾蜿蝀蝃蝋蝌蝍蝎蝏蝗蝘蝙蝝鱝蝡蝤蝥蝯蝰蝱蝲蝴蝻螃蠏螄螉螋螒螓螗螘螙螚蟥螟螣螥螬螭螮螾螿蟀蟅蟈蟊蟋蟑蟓蟛蟜蟟蟢蟣蟨蟪蟭蟯蟳蟶蟷蟺蟿蠁蠂蠃蠆蠋蠐蠓蠔蠗蠙蠚蠛蠜蠧蠨蠩蠭蠮蠰蠲蠵蠸蠼蠽衁衂衄衇衈衉衋衎衒衕衖衚衞裳鈎衭衲衵衹衺衿袈裟袗袚袟袢袪袮袲袴袷袺袼褙袽裀裉裊裋裌裍裎裒裛裯裱裲裴裾褀褂褉褊褌褎褐褒褓褔褕褘褚褡褢褦褧褪褫褭褯褰褱襠褸褽褾襁襃襆襇襉襋襌襏襚襛襜襝襞襡襢襤襦襫襬襭襮襴襶襼襽襾覂覃覅覇覉覊覌覗覘覚覜覥覦覧覩覬覯覰観覿觔觕觖觜觽觝觡酲觩觫觭觱觳觶觷觼觾觿言賅訃訇訏訑訒詁託訧訬訳訹証訾詀詅詆譭詈詊詎詑詒詖詗詘詧詨詵詶詸詹詻詼詿誂誃誄鋤誆誋誑誒誖誙誚誥誧説読誯誶誾諂諄諆諌諍諏諑諕諗諛諝諞諟諠諡諴諵諶諼謄謆謇謌謍謏謑謖謚謡謦謪謫謳謷謼謾譁譅譆譈譊譌譒譔譖鑫譞譟譩譫譬譱譲譴譸譹譾讅讆讋讌讎讐讒讖讙讜讟谽豁豉豇豈豊豋豌豏豔豞豖豗豜豝豣豦豨豭豱豳豵豶豷豺豻貅貆貍貎貔貘貙貜貤饜貰餸貺賁賂賏賒賕賙賝賡賧賨賫鬭賮賵賸賺賻賾贇贉贐贔贕贗赬赭赱赳迄趁趂趄趐趑趒趔趡趦趫趮趯趲趴趵趷趹趺趿跁跂跅跆躓蹌跐跕跖跗跙跛跦跧跩跫跬跮跱跲跴跺跼跽踅踆踈踉踊踒踖踘踜踟躇躕踠踡踣踤踥踦踧蹺踫踮踰踱踴踶踹踺踼踽躞蹁蹂躪蹎蹐蹓蹔蹕蹚蹜蹝蹟蹠蹡蹢躂蹧蹩蹪蹯鞠蹽躃躄躅躊躋躐躑躒躘躙躛躝躠躡躦躧躩躭躰躳躶軃軆輥軏軔軘軜軝齶転軥軨軭軱軲轆軷軹軺軽軿輀輂輦輅輇輈輓輗輙輜輞輠輤輬輭輮輳輴輵輶輹輼輾轀轇轏轑轒轔轕轖轗轘轙轝轞轢轤辠辢辤辵辶辺込辿迅迋迍麿迓迣迤邐迥迨迮迸迺迻迿逄逅逌逍逑逓逕逖逡逭逯逴逶逹遄遅遉遘遛遝遢遨遫遯遰遴遶遹遻邂邅邉邋邎邕邗邘邛邠邢邧邨邯鄲邰邲邳邴邶邷邽邾邿郃郄郇郈郔郕郗郙郚郜郝郞郟郠郢郪郫郯郰郲郳郴郷郹郾郿鄀鄄鄆鄇鄈鄋鄍鄎鄏鄐鄑鄒鄔鄕鄖鄗鄘鄚鄜鄞鄠鄢鄣鄤鄦鄩鄫鄬鄮鄯鄱鄶鄷鄹鄺鄻鄾鄿酃酅酆酇酈酊酋酎酏酐酣酔酕醄酖酗酞酡酢酤酩酴酹酺醁醅醆醊醍醐醑醓醖醝醞醡醤醨醪醭醯醰醱醲醴醵醸醹醼醽醾釂釃釅釆釈鱸鎦閶釓釔釕鈀釙鼢鼴釤釧釪釬釭釱釷釸釹鈁鈃鈄鈆鈇鈈鈊鈌鈐鈑鈒鈤鈥鈧鈬鈮鈰鈳鐺鈸鈹鈽鈿鉄鉆鉈鉋鉌鉍鉏鉑鉕鉚鉢鉥鉦鉨鉬鉭鉱鉲鉶鉸鉺鉼鉿銍銎銑銕鏤銚銛銠銣銤銥銦銧銩銪銫銭銰銲銶銻銼銾鋂鋃鋆鋈鋊鋌鋍鋏鋐鋑鋕鋘鋙鋝鋟鋦鋨鋩鋭鋮鋯鋰鋱鋳鋹鋺鋻鏰鐱錀錁錆錇錈錍錏錒錔錙錚錛錞錟錡錤錩錬録錸錼鍀鍆鍇鍉鍍鍏鍐鍘鍚鍛鍠鍤鍥鍩鍫鍭鍱鍴鍶鍹鍺鍼鍾鎄鎇鎉鎋鎌鎍鎏鎒鎓鎗鎘鎚鎞鎡鎤鎩鎪鎭鎯鎰鎳鎴鎵鎸鎹鎿鏇鏊鏌鏐鏑鏖鏗鏘鏚鏜鏝鏞鏠鏦鏨鏷鏸鏹鏻鏽鏾鐃鐄鐇鐏鐒鐓鐔鐗馗鐙鐝鐠鐡鐦鐨鐩鐫鐬鐱鐳鐶鐻鐽鐿鑀鑅鑌鑐鑕鑚鑛鑢鑤鑥鑪鑭鑯鑱鑴鑵鑷钁钃镻閆閈閌閎閒閔閗閟閡関閤閤閧閬閲閹閺閻閼閽閿闇闉闋闐闑闒闓闘闚闞闟闠闤闥阞阢阤阨阬阯阹阼阽陁陑陔陛陜陡陥陬騭陴険陼陾隂隃隈隒隗隞隠隣隤隩隮隰顴隳隷隹雂雈雉雊雎雑雒雗雘雚雝雟雩雰雱驛霂霅霈霊霑霒霓霙霝霢霣霤霨霩霪霫霮靁靆靉靑靚靣靦靪靮靰靳靷靸靺靼靿鞀鞃鞄鞌鞗鞙鞚鞝鞞鞡鞣鞨鞫鞬鞮鞶鞹鞾韃韅韉馱韍韎韔韖韘韝韞韡韣韭韮韱韹韺頀颳頄頇頊頍頎頏頒頖頞頠頫頬顱頯頲頴頼顇顋顑顒顓顔顕顚顜顢顣顬顳颭颮颱颶颸颺颻颽颾颿飀飂飈飌飜飡飣飤飥飩飫飮飱飶餀餂餄餎餇餈餑餔餕餖餗餚餛餜餟餠餤餧餩餪餫餬餮餱餲餳餺餻餼餽餿饁饅饇饉饊饍饎饐饘饟饢馘馥馝馡馣騮騾馵馹駃駄駅駆駉駋駑駓駔駗駘駙駜駡駢駪駬駰駴駸駹駽駾騂騄騅騆騉騋騍騏驎騑騒験騕騖騠騢騣騤騧驤騵騶騸騺驀驂驃驄驆驈驊驌驍驎驏驒驔驖驙驦驩驫骺鯁骫骭骯骱骴骶骷髏骾髁髂髄髆髈髐髑髕髖髙髝髞髟髡髣髧髪髫髭髯髲髳髹髺髽髾鬁鬃鬅鬈鬋鬎鬏鬐鬑鬒鬖鬗鬘鬙鬠鬣鬪鬫鬬鬮鬯鬰鬲鬵鬷魆魈魊魋魍魎魑魖鰾魛魟魣魦魨魬魴魵魸鮀鮁鮆鮌鮎鮑鮒鮓鮚鮞鮟鱇鮠鮦鮨鮪鮭鮶鮸鮿鯀鯄鯆鯇鯈鯔鯕鯖鯗鯙鯠鯤鯥鯫鯰鯷鯸鯿鰂鰆鶼鰉鰋鰐鰒鰕鰛鰜鰣鰤鰥鰦鰨鰩鰮鰳鰶鰷鱺鰼鰽鱀鱄鱅鱆鱈鱎鱐鱓鱔鱖鱘鱟鱠鱣鱨鱭鱮鱲鱵鱻鲅鳦鳧鳯鳲鳷鳻鴂鴃鴄鴆鴈鴎鴒鴔鴗鴛鴦鴝鵒鴟鴠鴢鴣鴥鴯鶓鴳鴴鴷鴽鵀鵁鵂鵓鵖鵙鵜鶘鵞鵟鵩鵪鵫鵵鵷鵻鵾鶂鶊鶏鶒鶖鶗鶡鶤鶦鶬鶱鶲鶵鶸鶹鶺鶿鷀鷁鷃鷄鷇鷈鷉鷊鷏鷓鷕鷖鷙鷞鷟鷥鷦鷯鷩鷫鷭鷳鷴鷽鷾鷿鸂鸇鸊鸏鸑鸒鸓鸕鸛鸜鸝鹸鹹鹺麀麂麃麄麇麋麌麐麑麒麚麛麝麤麩麪麫麮麯麰麺麾黁黈黌黢黒黓黕黙黝黟黥黦黧黮黰黱黲黶黹黻黼黽黿鼂鼃鼅鼈鼉鼏鼐鼒鼕鼖鼙鼚鼛鼡鼩鼱鼪鼫鼯鼷鼽齁齆齇齈齉齌齎齏齔齕齗齙齚齜齞齟齬齠齢齣齧齩齮齯齰齱齵齾龎龑龒龔龖龘龝龡龢龤'

assert len(simplified_charcters) == len(simplified_charcters)

s2t_dict = {}
t2s_dict = {}
for i, item in enumerate(simplified_charcters):
    s2t_dict[item] = traditional_characters[i]
    t2s_dict[traditional_characters[i]] = item


def tranditional_to_simplified(text: str) -> str:
    return "".join(
        [t2s_dict[item] if item in t2s_dict else item for item in text])


def simplified_to_traditional(text: str) -> str:
    return "".join(
        [s2t_dict[item] if item in s2t_dict else item for item in text])


if __name__ == "__main__":
    text = "一般是指存取一個應用程式啟動時始終顯示在網站或網頁瀏覽器中的一個或多個初始網頁等畫面存在的站點"
    print(text)
    text_simple = tranditional_to_simplified(text)
    print(text_simple)
    text_traditional = simplified_to_traditional(text_simple)
    print(text_traditional)


================================================
FILE: paddlespeech/t2s/frontend/zh_normalization/chronology.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re

from .num import DIGITS
from .num import num2str
from .num import verbalize_cardinal
from .num import verbalize_digit


def _time_num2str(num_string: str) -> str:
    """A special case for verbalizing number in time."""
    result = num2str(num_string.lstrip('0'))
    if num_string.startswith('0'):
        result = DIGITS['0'] + result
    return result


# 时刻表达式
RE_TIME = re.compile(r'([0-1]?[0-9]|2[0-3])'
                     r':([0-5][0-9])'
                     r'(:([0-5][0-9]))?')

# 时间范围，如8:30-12:30
RE_TIME_RANGE = re.compile(r'([0-1]?[0-9]|2[0-3])'
                           r':([0-5][0-9])'
                           r'(:([0-5][0-9]))?'
                           r'(~|-)'
                           r'([0-1]?[0-9]|2[0-3])'
                           r':([0-5][0-9])'
                           r'(:([0-5][0-9]))?')


def replace_time(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """

    is_range = len(match.groups()) > 5

    hour = match.group(1)
    minute = match.group(2)
    second = match.group(4)

    if is_range:
        hour_2 = match.group(6)
        minute_2 = match.group(7)
        second_2 = match.group(9)

    result = f"{num2str(hour)}点"
    if minute.lstrip('0'):
        if int(minute) == 30:
            result += "半"
        else:
            result += f"{_time_num2str(minute)}分"
    if second and second.lstrip('0'):
        result += f"{_time_num2str(second)}秒"

    if is_range:
        result += "至"
        result += f"{num2str(hour_2)}点"
        if minute_2.lstrip('0'):
            if int(minute) == 30:
                result += "半"
            else:
                result += f"{_time_num2str(minute_2)}分"
        if second_2 and second_2.lstrip('0'):
            result += f"{_time_num2str(second_2)}秒"

    return result


RE_DATE = re.compile(r'(\d{4}|\d{2})年'
                     r'((0?[1-9]|1[0-2])月)?'
                     r'(((0?[1-9])|((1|2)[0-9])|30|31)([日号]))?')


def replace_date(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    year = match.group(1)
    month = match.group(3)
    day = match.group(5)
    result = ""
    if year:
        result += f"{verbalize_digit(year)}年"
    if month:
        result += f"{verbalize_cardinal(month)}月"
    if day:
        result += f"{verbalize_cardinal(day)}{match.group(9)}"
    return result


# 用 / 或者 - 分隔的 YY/MM/DD 或者 YY-MM-DD 日期
RE_DATE2 = re.compile(
    r'(\d{4})([- /.])(0[1-9]|1[012])\2(0[1-9]|[12][0-9]|3[01])')


def replace_date2(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    year = match.group(1)
    month = match.group(3)
    day = match.group(4)
    result = ""
    if year:
        result += f"{verbalize_digit(year)}年"
    if month:
        result += f"{verbalize_cardinal(month)}月"
    if day:
        result += f"{verbalize_cardinal(day)}日"
    return result


================================================
FILE: paddlespeech/t2s/frontend/zh_normalization/constants.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import string

from pypinyin.constants import SUPPORT_UCS4

# 全角半角转换
# 英文字符全角 -> 半角映射表 (num: 52)
F2H_ASCII_LETTERS = {
    ord(char) + 65248: ord(char)
    for char in string.ascii_letters
}

# 英文字符半角 -> 全角映射表
H2F_ASCII_LETTERS = {value: key for key, value in F2H_ASCII_LETTERS.items()}

# 数字字符全角 -> 半角映射表 (num: 10)
F2H_DIGITS = {ord(char) + 65248: ord(char) for char in string.digits}
# 数字字符半角 -> 全角映射表
H2F_DIGITS = {value: key for key, value in F2H_DIGITS.items()}

# 标点符号全角 -> 半角映射表 (num: 32)
F2H_PUNCTUATIONS = {ord(char) + 65248: ord(char) for char in string.punctuation}
# 标点符号半角 -> 全角映射表
H2F_PUNCTUATIONS = {value: key for key, value in F2H_PUNCTUATIONS.items()}

# 空格 (num: 1)
F2H_SPACE = {'\u3000': ' '}
H2F_SPACE = {' ': '\u3000'}

# 非"有拼音的汉字"的字符串，可用于NSW提取
if SUPPORT_UCS4:
    RE_NSW = re.compile(r'(?:[^'
                        r'\u3007'  # 〇
                        r'\u3400-\u4dbf'  # CJK扩展A:[3400-4DBF]
                        r'\u4e00-\u9fff'  # CJK基本:[4E00-9FFF]
                        r'\uf900-\ufaff'  # CJK兼容:[F900-FAFF]
                        r'\U00020000-\U0002A6DF'  # CJK扩展B:[20000-2A6DF]
                        r'\U0002A703-\U0002B73F'  # CJK扩展C:[2A700-2B73F]
                        r'\U0002B740-\U0002B81D'  # CJK扩展D:[2B740-2B81D]
                        r'\U0002F80A-\U0002FA1F'  # CJK兼容扩展:[2F800-2FA1F]
                        r'])+')
else:
    RE_NSW = re.compile(  # pragma: no cover
        r'(?:[^'
        r'\u3007'  # 〇
        r'\u3400-\u4dbf'  # CJK扩展A:[3400-4DBF]
        r'\u4e00-\u9fff'  # CJK基本:[4E00-9FFF]
        r'\uf900-\ufaff'  # CJK兼容:[F900-FAFF]
        r'])+')


================================================
FILE: paddlespeech/t2s/frontend/zh_normalization/num.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Rules to verbalize numbers into Chinese characters.
https://zh.wikipedia.org/wiki/中文数字#現代中文
"""
import re
from collections import OrderedDict
from typing import List

DIGITS = {str(i): tran for i, tran in enumerate('零一二三四五六七八九')}
UNITS = OrderedDict({
    1: '十',
    2: '百',
    3: '千',
    4: '万',
    8: '亿',
})

COM_QUANTIFIERS = '(封|艘|把|目|套|段|人|所|朵|匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|毫|厘|(公)分|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|小时|旬|纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|元|(亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|美|)元|(亿|千万|百万|万|千|百|十|)吨|(亿|千万|百万|万|千|百|)块|角|毛|分|(公(里|引|丈|尺|寸|分|釐)))'

# 分数表达式
RE_FRAC = re.compile(r'(-?)(\d+)/(\d+)')


def replace_frac(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    sign = match.group(1)
    nominator = match.group(2)
    denominator = match.group(3)
    sign: str = "负" if sign else ""
    nominator: str = num2str(nominator)
    denominator: str = num2str(denominator)
    result = f"{sign}{denominator}分之{nominator}"
    return result


# 百分数表达式
RE_PERCENTAGE = re.compile(r'(-?)(\d+(\.\d+)?)%')


def replace_percentage(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    sign = match.group(1)
    percent = match.group(2)
    sign: str = "负" if sign else ""
    percent: str = num2str(percent)
    result = f"{sign}百分之{percent}"
    return result


# 整数表达式
# 带负号的整数 -10
RE_INTEGER = re.compile(r'(-)' r'(\d+)')


def replace_negative_num(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    sign = match.group(1)
    number = match.group(2)
    sign: str = "负" if sign else ""
    number: str = num2str(number)
    result = f"{sign}{number}"
    return result


# 编号-无符号整形
# 00078
RE_DEFAULT_NUM = re.compile(r'\d{3}\d*')


def replace_default_num(match):
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    number = match.group(0)
    return verbalize_digit(number, alt_one=True)


# 数字表达式
# 纯小数
RE_DECIMAL_NUM = re.compile(r'(-?)((\d+)(\.\d+))' r'|(\.(\d+))')
# 正整数 + 量词
RE_POSITIVE_QUANTIFIERS = re.compile(r"(\d+)([多余几\+])?" + COM_QUANTIFIERS)
RE_NUMBER = re.compile(r'(-?)((\d+)(\.\d+)?)' r'|(\.(\d+))')


def replace_positive_quantifier(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    number = match.group(1)
    match_2 = match.group(2)
    if match_2 == "+":
        match_2 = "多"
    match_2: str = match_2 if match_2 else ""
    quantifiers: str = match.group(3)
    number: str = num2str(number)
    result = f"{number}{match_2}{quantifiers}"
    return result


def replace_number(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    sign = match.group(1)
    number = match.group(2)
    pure_decimal = match.group(5)
    if pure_decimal:
        result = num2str(pure_decimal)
    else:
        sign: str = "负" if sign else ""
        number: str = num2str(number)
        result = f"{sign}{number}"
    return result


# 范围表达式
# match.group(1) and match.group(8) are copy from RE_NUMBER

RE_RANGE = re.compile(
    r'((-?)((\d+)(\.\d+)?)|(\.(\d+)))[-~]((-?)((\d+)(\.\d+)?)|(\.(\d+)))')


def replace_range(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    first, second = match.group(1), match.group(8)
    first = RE_NUMBER.sub(replace_number, first)
    second = RE_NUMBER.sub(replace_number, second)
    result = f"{first}到{second}"
    return result


def _get_value(value_string: str, use_zero: bool=True) -> List[str]:
    stripped = value_string.lstrip('0')
    if len(stripped) == 0:
        return []
    elif len(stripped) == 1:
        if use_zero and len(stripped) < len(value_string):
            return [DIGITS['0'], DIGITS[stripped]]
        else:
            return [DIGITS[stripped]]
    else:
        largest_unit = next(
            power for power in reversed(UNITS.keys()) if power < len(stripped))
        first_part = value_string[:-largest_unit]
        second_part = value_string[-largest_unit:]
        return _get_value(first_part) + [UNITS[largest_unit]] + _get_value(
            second_part)


def verbalize_cardinal(value_string: str) -> str:
    if not value_string:
        return ''

    # 000 -> '零' , 0 -> '零'
    value_string = value_string.lstrip('0')
    if len(value_string) == 0:
        return DIGITS['0']

    result_symbols = _get_value(value_string)
    # verbalized number starting with '一十*' is abbreviated as `十*`
    if len(result_symbols) >= 2 and result_symbols[0] == DIGITS[
            '1'] and result_symbols[1] == UNITS[1]:
        result_symbols = result_symbols[1:]
    return ''.join(result_symbols)


def verbalize_digit(value_string: str, alt_one=False) -> str:
    result_symbols = [DIGITS[digit] for digit in value_string]
    result = ''.join(result_symbols)
    if alt_one:
        result = result.replace("一", "幺")
    return result


def num2str(value_string: str) -> str:
    integer_decimal = value_string.split('.')
    if len(integer_decimal) == 1:
        integer = integer_decimal[0]
        decimal = ''
    elif len(integer_decimal) == 2:
        integer, decimal = integer_decimal
    else:
        raise ValueError(
            f"The value string: '${value_string}' has more than one point in it."
        )

    result = verbalize_cardinal(integer)

    decimal = decimal.rstrip('0')
    if decimal:
        # '.22' is verbalized as '零点二二'
        # '3.20' is verbalized as '三点二
        result = result if result else "零"
        result += '点' + verbalize_digit(decimal)
    return result


================================================
FILE: paddlespeech/t2s/frontend/zh_normalization/phonecode.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re

from .num import verbalize_digit

# 规范化固话/手机号码
# 手机
# http://www.jihaoba.com/news/show/13680
# 移动：139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198
# 联通：130、131、132、156、155、186、185、176
# 电信：133、153、189、180、181、177
RE_MOBILE_PHONE = re.compile(
    r"(?<!\d)((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})(?!\d)")
RE_TELEPHONE = re.compile(
    r"(?<!\d)((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})(?!\d)")

# 全国统一的号码400开头
RE_NATIONAL_UNIFORM_NUMBER = re.compile(r"(400)(-)?\d{3}(-)?\d{4}")


def phone2str(phone_string: str, mobile=True) -> str:
    if mobile:
        sp_parts = phone_string.strip('+').split()
        result = '，'.join(
            [verbalize_digit(part, alt_one=True) for part in sp_parts])
        return result
    else:
        sil_parts = phone_string.split('-')
        result = '，'.join(
            [verbalize_digit(part, alt_one=True) for part in sil_parts])
        return result


def replace_phone(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    return phone2str(match.group(0), mobile=False)


def replace_mobile(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    return phone2str(match.group(0))


================================================
FILE: paddlespeech/t2s/frontend/zh_normalization/quantifier.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re

from .num import num2str

# 温度表达式，温度会影响负号的读法
# -3°C 零下三度
RE_TEMPERATURE = re.compile(r'(-?)(\d+(\.\d+)?)(°C|℃|度|摄氏度)')
measure_dict = {
    "cm2": "平方厘米",
    "cm²": "平方厘米",
    "cm3": "立方厘米",
    "cm³": "立方厘米",
    "cm": "厘米",
    "db": "分贝",
    "ds": "毫秒",
    "kg": "千克",
    "km": "千米",
    "m2": "平方米",
    "m²": "平方米",
    "m³": "立方米",
    "m3": "立方米",
    "ml": "毫升",
    "m": "米",
    "mm": "毫米",
    "s": "秒",
    "h": "小时",
    "mg": "毫克"
}


def replace_temperature(match) -> str:
    """
    Args:
        match (re.Match)
    Returns:
        str
    """
    sign = match.group(1)
    temperature = match.group(2)
    unit = match.group(3)
    sign: str = "零下" if sign else ""
    temperature: str = num2str(temperature)
    unit: str = "摄氏度" if unit == "摄氏度" else "度"
    result = f"{sign}{temperature}{unit}"
    return result


def replace_measure(sentence) -> str:
    for q_notation in measure_dict:
        if q_notation in sentence:
            sentence = sentence.replace(q_notation, measure_dict[q_notation])
    return sentence


================================================
FILE: paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from typing import List

from .char_convert import tranditional_to_simplified
from .chronology import RE_DATE
from .chronology import RE_DATE2
from .chronology import RE_TIME
from .chronology import RE_TIME_RANGE
from .chronology import replace_date
from .chronology import replace_date2
from .chronology import replace_time
from .constants import F2H_ASCII_LETTERS
from .constants import F2H_DIGITS
from .constants import F2H_SPACE
from .num import RE_DECIMAL_NUM
from .num import RE_DEFAULT_NUM
from .num import RE_FRAC
from .num import RE_INTEGER
from .num import RE_NUMBER
from .num import RE_PERCENTAGE
from .num import RE_POSITIVE_QUANTIFIERS
from .num import RE_RANGE
from .num import replace_default_num
from .num import replace_frac
from .num import replace_negative_num
from .num import replace_number
from .num import replace_percentage
from .num import replace_positive_quantifier
from .num import replace_range
from .phonecode import RE_MOBILE_PHONE
from .phonecode import RE_NATIONAL_UNIFORM_NUMBER
from .phonecode import RE_TELEPHONE
from .phonecode import replace_mobile
from .phonecode import replace_phone
from .quantifier import RE_TEMPERATURE
from .quantifier import replace_measure
from .quantifier import replace_temperature


class TextNormalizer():
    def __init__(self):
        self.SENTENCE_SPLITOR = re.compile(r'([：、，；。？！,;?!][”’]?)')

    def _split(self, text: str, lang="zh") -> List[str]:
        """Split long text into sentences with sentence-splitting punctuations.
        Args:
            text (str): The input text.
        Returns:
            List[str]: Sentences.
        """
        # Only for pure Chinese here
        if lang == "zh":
            text = text.replace(" ", "")
            # 过滤掉特殊字符
            text = re.sub(r'[——《》【】<=>{}()（）#&@“”^_|…\\]', '', text)
        text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
        text = text.strip()
        sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
        return sentences

    def _post_replace(self, sentence: str) -> str:
        sentence = sentence.replace('/', '每')
        sentence = sentence.replace('~', '至')
        sentence = sentence.replace('～', '至')
        sentence = sentence.replace('①', '一')
        sentence = sentence.replace('②', '二')
        sentence = sentence.replace('③', '三')
        sentence = sentence.replace('④', '四')
        sentence = sentence.replace('⑤', '五')
        sentence = sentence.replace('⑥', '六')
        sentence = sentence.replace('⑦', '七')
        sentence = sentence.replace('⑧', '八')
        sentence = sentence.replace('⑨', '九')
        sentence = sentence.replace('⑩', '十')
        sentence = sentence.replace('α', '阿尔法')
        sentence = sentence.replace('β', '贝塔')
        sentence = sentence.replace('γ', '伽玛').replace('Γ', '伽玛')
        sentence = sentence.replace('δ', '德尔塔').replace('Δ', '德尔塔')
        sentence = sentence.replace('ε', '艾普西龙')
        sentence = sentence.replace('ζ', '捷塔')
        sentence = sentence.replace('η', '依塔')
        sentence = sentence.replace('θ', '西塔').replace('Θ', '西塔')
        sentence = sentence.replace('ι', '艾欧塔')
        sentence = sentence.replace('κ', '喀帕')
        sentence = sentence.replace('λ', '拉姆达').replace('Λ', '拉姆达')
        sentence = sentence.replace('μ', '缪')
        sentence = sentence.replace('ν', '拗')
        sentence = sentence.replace('ξ', '克西').replace('Ξ', '克西')
        sentence = sentence.replace('ο', '欧米克伦')
        sentence = sentence.replace('π', '派').replace('Π', '派')
        sentence = sentence.replace('ρ', '肉')
        sentence = sentence.replace('ς', '西格玛').replace('Σ', '西格玛').replace(
            'σ', '西格玛')
        sentence = sentence.replace('τ', '套')
        sentence = sentence.replace('υ', '宇普西龙')
        sentence = sentence.replace('φ', '服艾').replace('Φ', '服艾')
        sentence = sentence.replace('χ', '器')
        sentence = sentence.replace('ψ', '普赛').replace('Ψ', '普赛')
        sentence = sentence.replace('ω', '欧米伽').replace('Ω', '欧米伽')
        # re filter special characters, have one more character "-" than line 68
        sentence = re.sub(r'[-——《》【】<=>{}()（）#&@“”^_|…\\]', '', sentence)
        return sentence

    def normalize_sentence(self, sentence: str) -> str:
        # basic character conversions
        sentence = tranditional_to_simplified(sentence)
        sentence = sentence.translate(F2H_ASCII_LETTERS).translate(
            F2H_DIGITS).translate(F2H_SPACE)

        # number related NSW verbalization
        sentence = RE_DATE.sub(replace_date, sentence)
        sentence = RE_DATE2.sub(replace_date2, sentence)

        # range first
        sentence = RE_TIME_RANGE.sub(replace_time, sentence)
        sentence = RE_TIME.sub(replace_time, sentence)

        sentence = RE_TEMPERATURE.sub(replace_temperature, sentence)
        sentence = replace_measure(sentence)
        sentence = RE_FRAC.sub(replace_frac, sentence)
        sentence = RE_PERCENTAGE.sub(replace_percentage, sentence)
        sentence = RE_MOBILE_PHONE.sub(replace_mobile, sentence)

        sentence = RE_TELEPHONE.sub(replace_phone, sentence)
        sentence = RE_NATIONAL_UNIFORM_NUMBER.sub(replace_phone, sentence)

        sentence = RE_RANGE.sub(replace_range, sentence)
        sentence = RE_INTEGER.sub(replace_negative_num, sentence)
        sentence = RE_DECIMAL_NUM.sub(replace_number, sentence)
        sentence = RE_POSITIVE_QUANTIFIERS.sub(replace_positive_quantifier,
                                               sentence)
        sentence = RE_DEFAULT_NUM.sub(replace_default_num, sentence)
        sentence = RE_NUMBER.sub(replace_number, sentence)
        sentence = self._post_replace(sentence)

        return sentence

    def normalize(self, text: str) -> List[str]:
        sentences = self._split(text)
        sentences = [self.normalize_sentence(sent) for sent in sentences]
        return sentences


================================================
FILE: paddlespeech/t2s/models/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .ernie_sat import *
from .fastspeech2 import *
from .hifigan import *
from .melgan import *
from .parallel_wavegan import *
from .speedyspeech import *
from .tacotron2 import *
from .transformer_tts import *
from .vits import *
from .waveflow import *
from .wavernn import *


================================================
FILE: paddlespeech/t2s/models/diffsinger/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .diffsinger import *
from .diffsinger_updater import *


================================================
FILE: paddlespeech/t2s/models/diffsinger/diffsinger.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""DiffSinger related modules for paddle"""
from typing import Any
from typing import Dict
from typing import Tuple

import numpy as np
import paddle
from paddle import nn
from typeguard import typechecked

from paddlespeech.t2s.models.diffsinger.fastspeech2midi import FastSpeech2MIDI
from paddlespeech.t2s.modules.diffnet import DiffNet
from paddlespeech.t2s.modules.diffusion import GaussianDiffusion


class DiffSinger(nn.Layer):
    """DiffSinger module.

    This is a module of DiffSinger described in `DiffSinger: Singing Voice Synthesis via Shallow Diffusion Mechanism`._
    .. _`DiffSinger: Singing Voice Synthesis via Shallow Diffusion Mechanism`:
        https://arxiv.org/pdf/2105.02446.pdf

    Args:

    Returns:

    """

    @typechecked
    def __init__(
            self,
            # min and max spec for stretching before diffusion
            spec_min: paddle.Tensor,
            spec_max: paddle.Tensor,
            # fastspeech2midi config
            idim: int,
            odim: int,
            use_energy_pred: bool=False,
            use_postnet: bool=False,
            # music score related 
            note_num: int=300,
            is_slur_num: int=2,
            fastspeech2_params: Dict[str, Any]={
                "adim": 256,
                "aheads": 2,
                "elayers": 4,
                "eunits": 1024,
                "dlayers": 4,
                "dunits": 1024,
                "positionwise_layer_type": "conv1d",
                "positionwise_conv_kernel_size": 1,
                "use_scaled_pos_enc": True,
                "use_batch_norm": True,
                "encoder_normalize_before": True,
                "decoder_normalize_before": True,
                "encoder_concat_after": False,
                "decoder_concat_after": False,
                "reduction_factor": 1,
                # for transformer
                "transformer_enc_dropout_rate": 0.1,
                "transformer_enc_positional_dropout_rate": 0.1,
                "transformer_enc_attn_dropout_rate": 0.1,
                "transformer_dec_dropout_rate": 0.1,
                "transformer_dec_positional_dropout_rate": 0.1,
                "transformer_dec_attn_dropout_rate": 0.1,
                "transformer_activation_type": "gelu",
                # duration predictor
                "duration_predictor_layers": 2,
                "duration_predictor_chans": 384,
                "duration_predictor_kernel_size": 3,
                "duration_predictor_dropout_rate": 0.1,
                # pitch predictor
                "use_pitch_embed": True,
                "pitch_predictor_layers": 2,
                "pitch_predictor_chans": 384,
                "pitch_predictor_kernel_size": 3,
                "pitch_predictor_dropout": 0.5,
                "pitch_embed_kernel_size": 9,
                "pitch_embed_dropout": 0.5,
                "stop_gradient_from_pitch_predictor": False,
                # energy predictor
                "use_energy_embed": False,
                "energy_predictor_layers": 2,
                "energy_predictor_chans": 384,
                "energy_predictor_kernel_size": 3,
                "energy_predictor_dropout": 0.5,
                "energy_embed_kernel_size": 9,
                "energy_embed_dropout": 0.5,
                "stop_gradient_from_energy_predictor": False,
                # postnet
                "postnet_layers": 5,
                "postnet_chans": 512,
                "postnet_filts": 5,
                "postnet_dropout_rate": 0.5,
                # spk emb
                "spk_num": None,
                "spk_embed_dim": None,
                "spk_embed_integration_type": "add",
                # training related
                "init_type": "xavier_uniform",
                "init_enc_alpha": 1.0,
                "init_dec_alpha": 1.0,
                # speaker classifier
                "enable_speaker_classifier": False,
                "hidden_sc_dim": 256,
            },
            # denoiser config
            denoiser_params: Dict[str, Any]={
                "in_channels": 80,
                "out_channels": 80,
                "kernel_size": 3,
                "layers": 20,
                "stacks": 5,
                "residual_channels": 256,
                "gate_channels": 512,
                "skip_channels": 256,
                "aux_channels": 256,
                "dropout": 0.,
                "bias": True,
                "use_weight_norm": False,
                "init_type": "kaiming_normal",
            },
            # diffusion config
            diffusion_params: Dict[str, Any]={
                "num_train_timesteps": 100,
                "beta_start": 0.0001,
                "beta_end": 0.06,
                "beta_schedule": "squaredcos_cap_v2",
                "num_max_timesteps": 60,
                "stretch": True,
            }, ):
        """Initialize DiffSinger module.

        Args:
            spec_min (paddle.Tensor): The minimum value of the feature(mel) to stretch before diffusion.
            spec_max (paddle.Tensor): The maximum value of the feature(mel) to stretch before diffusion.
            idim (int): Dimension of the inputs (Input vocabrary size.).
            odim (int): Dimension of the outputs (Acoustic feature dimension.).
            use_energy_pred (bool, optional): whether use energy predictor. Defaults False.
            use_postnet (bool, optional): whether use postnet. Defaults False.
            note_num (int, optional): The number of note. Defaults to 300.
            is_slur_num (int, optional): The number of slur. Defaults to 2.
            fastspeech2_params (Dict[str, Any]): Parameter dict for fastspeech2 module.
            denoiser_params (Dict[str, Any]): Parameter dict for dinoiser module.
            diffusion_params (Dict[str, Any]): Parameter dict for diffusion module.
        """
        super().__init__()
        self.fs2 = FastSpeech2MIDI(
            idim=idim,
            odim=odim,
            fastspeech2_params=fastspeech2_params,
            note_num=note_num,
            is_slur_num=is_slur_num,
            use_energy_pred=use_energy_pred,
            use_postnet=use_postnet, )
        denoiser = DiffNet(**denoiser_params)
        self.diffusion = GaussianDiffusion(
            denoiser,
            **diffusion_params,
            min_values=spec_min,
            max_values=spec_max, )

    def forward(
            self,
            text: paddle.Tensor,
            note: paddle.Tensor,
            note_dur: paddle.Tensor,
            is_slur: paddle.Tensor,
            text_lengths: paddle.Tensor,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            durations: paddle.Tensor,
            pitch: paddle.Tensor,
            energy: paddle.Tensor,
            spk_emb: paddle.Tensor=None,
            spk_id: paddle.Tensor=None,
            only_train_fs2: bool=True,
    ) -> Tuple[paddle.Tensor, Dict[str, paddle.Tensor], paddle.Tensor]:
        """Calculate forward propagation.

        Args:
            text(Tensor(int64)): 
                Batch of padded token (phone) ids (B, Tmax).
            note(Tensor(int64)): 
                Batch of padded note (element in music score) ids (B, Tmax).
            note_dur(Tensor(float32)): 
                Batch of padded note durations in seconds (element in music score) (B, Tmax).
            is_slur(Tensor(int64)): 
                Batch of padded slur (element in music score) ids (B, Tmax).
            text_lengths(Tensor(int64)): 
                Batch of phone lengths of each input (B,).
            speech(Tensor[float32]): 
                Batch of padded target features (e.g. mel) (B, Lmax, odim).
            speech_lengths(Tensor(int64)): 
                Batch of the lengths of each target features (B,).
            durations(Tensor(int64)): 
                Batch of padded token durations in frame (B, Tmax).
            pitch(Tensor[float32]): 
                Batch of padded frame-averaged pitch (B, Lmax, 1).
            energy(Tensor[float32]): 
                Batch of padded frame-averaged energy (B, Lmax, 1).
            spk_emb(Tensor[float32], optional): 
                Batch of speaker embeddings (B, spk_embed_dim).
            spk_id(Tnesor[int64], optional(int64)): 
                Batch of speaker ids (B,)
            only_train_fs2(bool):
                Whether to train only the fastspeech2 module

        Returns:

        """
        # only train fastspeech2 module firstly
        before_outs, after_outs, d_outs, p_outs, e_outs, ys, olens, spk_logits = self.fs2(
            text=text,
            note=note,
            note_dur=note_dur,
            is_slur=is_slur,
            text_lengths=text_lengths,
            speech=speech,
            speech_lengths=speech_lengths,
            durations=durations,
            pitch=pitch,
            energy=energy,
            spk_id=spk_id,
            spk_emb=spk_emb)
        if only_train_fs2:
            return before_outs, after_outs, d_outs, p_outs, e_outs, ys, olens, spk_logits

        # get the encoder output from fastspeech2 as the condition of denoiser module
        cond_fs2, mel_masks = self.fs2.encoder_infer_batch(
            text=text,
            note=note,
            note_dur=note_dur,
            is_slur=is_slur,
            text_lengths=text_lengths,
            speech_lengths=speech_lengths,
            ds=durations,
            ps=pitch,
            es=energy)
        cond_fs2 = cond_fs2.transpose((0, 2, 1))

        # get the output(final mel) from diffusion module
        noise_pred, noise_target = self.diffusion(
            speech.transpose((0, 2, 1)), cond_fs2)
        return noise_pred, noise_target, mel_masks

    def inference(
            self,
            text: paddle.Tensor,
            note: paddle.Tensor,
            note_dur: paddle.Tensor,
            is_slur: paddle.Tensor,
            get_mel_fs2: bool=False, ):
        """Run inference

        Args:
            text(Tensor(int64)): 
                Batch of padded token (phone) ids (B, Tmax).
            note(Tensor(int64)): 
                Batch of padded note (element in music score) ids (B, Tmax).
            note_dur(Tensor(float32)): 
                Batch of padded note durations in seconds (element in music score) (B, Tmax).
            is_slur(Tensor(int64)): 
                Batch of padded slur (element in music score) ids (B, Tmax).
            get_mel_fs2 (bool, optional): . Defaults to False.
                Whether to get mel from fastspeech2 module.

        Returns:
            
        """
        mel_fs2, _, _, _ = self.fs2.inference(text, note, note_dur, is_slur)
        if get_mel_fs2:
            return mel_fs2
        mel_fs2 = mel_fs2.unsqueeze(0).transpose((0, 2, 1))
        cond_fs2 = self.fs2.encoder_infer(text, note, note_dur, is_slur)
        cond_fs2 = cond_fs2.transpose((0, 2, 1))
        noise = paddle.randn(mel_fs2.shape)
        mel = self.diffusion.inference(
            noise=noise,
            cond=cond_fs2,
            ref_x=mel_fs2,
            scheduler_type="ddpm",
            num_inference_steps=60)
        mel = mel.transpose((0, 2, 1))
        return mel[0]


class DiffSingerInference(nn.Layer):
    def __init__(self, normalizer, model):
        super().__init__()
        self.normalizer = normalizer
        self.acoustic_model = model

    def forward(self, text, note, note_dur, is_slur, get_mel_fs2: bool=False):
        """Calculate forward propagation.

        Args:
            text(Tensor(int64)): 
                Batch of padded token (phone) ids (B, Tmax).
            note(Tensor(int64)): 
                Batch of padded note (element in music score) ids (B, Tmax).
            note_dur(Tensor(float32)): 
                Batch of padded note durations in seconds (element in music score) (B, Tmax).
            is_slur(Tensor(int64)): 
                Batch of padded slur (element in music score) ids (B, Tmax).
            get_mel_fs2 (bool, optional): . Defaults to False.
                Whether to get mel from fastspeech2 module.

        Returns:
            logmel(Tensor(float32)): denorm logmel, [T, mel_bin]
        """
        normalized_mel = self.acoustic_model.inference(
            text=text,
            note=note,
            note_dur=note_dur,
            is_slur=is_slur,
            get_mel_fs2=get_mel_fs2)
        logmel = normalized_mel
        return logmel


class DiffusionLoss(nn.Layer):
    """Loss function module for Diffusion module on DiffSinger."""

    @typechecked
    def __init__(self, use_masking: bool=True,
                 use_weighted_masking: bool=False):
        """Initialize feed-forward Transformer loss module.
        Args:
            use_masking (bool): 
                Whether to apply masking for padded part in loss calculation.
            use_weighted_masking (bool): 
                Whether to weighted masking in loss calculation.
        """
        super().__init__()

        assert (use_masking != use_weighted_masking) or not use_masking
        self.use_masking = use_masking
        self.use_weighted_masking = use_weighted_masking

        # define criterions
        reduction = "none" if self.use_weighted_masking else "mean"
        self.l1_criterion = nn.L1Loss(reduction=reduction)

    def forward(
            self,
            noise_pred: paddle.Tensor,
            noise_target: paddle.Tensor,
            mel_masks: paddle.Tensor, ) -> paddle.Tensor:
        """Calculate forward propagation.

        Args:
            noise_pred(Tensor): 
                Batch of outputs predict noise (B, Lmax, odim).
            noise_target(Tensor):  
                Batch of target noise (B, Lmax, odim).
            mel_masks(Tensor): 
                Batch of mask of real mel (B, Lmax, 1).
        Returns:
        
        """
        # apply mask to remove padded part
        if self.use_masking:
            noise_pred = noise_pred.masked_select(
                mel_masks.broadcast_to(noise_pred.shape))
            noise_target = noise_target.masked_select(
                mel_masks.broadcast_to(noise_target.shape))

        # calculate loss
        l1_loss = self.l1_criterion(noise_pred, noise_target)

        # make weighted mask and apply it
        if self.use_weighted_masking:
            mel_masks = mel_masks.unsqueeze(-1)
            out_weights = mel_masks.cast(dtype=paddle.float32) / mel_masks.cast(
                dtype=paddle.float32).sum(
                    axis=1, keepdim=True)
            out_weights /= noise_target.shape[0] * noise_target.shape[2]

            # apply weight
            l1_loss = l1_loss.multiply(out_weights)
            l1_loss = l1_loss.masked_select(
                mel_masks.broadcast_to(l1_loss.shape)).sum()

        return l1_loss


================================================
FILE: paddlespeech/t2s/models/diffsinger/diffsinger_updater.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path
from typing import Dict

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer

from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
from paddlespeech.t2s.training.updaters.standard_updater import UpdaterState

logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class DiffSingerUpdater(StandardUpdater):
    def __init__(self,
                 model: Layer,
                 optimizers: Dict[str, Optimizer],
                 criterions: Dict[str, Layer],
                 dataloader: DataLoader,
                 ds_train_start_steps: int=160000,
                 output_dir: Path=None,
                 only_train_diffusion: bool=True):
        super().__init__(model, optimizers, dataloader, init_state=None)
        self.model = model._layers if isinstance(model,
                                                 paddle.DataParallel) else model
        self.only_train_diffusion = only_train_diffusion

        self.optimizers = optimizers
        self.optimizer_fs2: Optimizer = optimizers['fs2']
        self.optimizer_ds: Optimizer = optimizers['ds']

        self.criterions = criterions
        self.criterion_fs2 = criterions['fs2']
        self.criterion_ds = criterions['ds']

        self.dataloader = dataloader

        self.ds_train_start_steps = ds_train_start_steps

        self.state = UpdaterState(iteration=0, epoch=0)
        self.train_iterator = iter(self.dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}
        # spk_id!=None in multiple spk diffsinger 
        spk_id = batch["spk_id"] if "spk_id" in batch else None
        spk_emb = batch["spk_emb"] if "spk_emb" in batch else None
        # No explicit speaker identifier labels are used during voice cloning training.
        if spk_emb is not None:
            spk_id = None

        # only train fastspeech2 module firstly
        if self.state.iteration < self.ds_train_start_steps:
            before_outs, after_outs, d_outs, p_outs, e_outs, ys, olens, spk_logits = self.model(
                text=batch["text"],
                note=batch["note"],
                note_dur=batch["note_dur"],
                is_slur=batch["is_slur"],
                text_lengths=batch["text_lengths"],
                speech=batch["speech"],
                speech_lengths=batch["speech_lengths"],
                durations=batch["durations"],
                pitch=batch["pitch"],
                energy=batch["energy"],
                spk_id=spk_id,
                spk_emb=spk_emb,
                only_train_fs2=True, )

            l1_loss_fs2, ssim_loss_fs2, duration_loss, pitch_loss, energy_loss, speaker_loss = self.criterion_fs2(
                after_outs=after_outs,
                before_outs=before_outs,
                d_outs=d_outs,
                p_outs=p_outs,
                e_outs=e_outs,
                ys=ys,
                ds=batch["durations"],
                ps=batch["pitch"],
                es=batch["energy"],
                ilens=batch["text_lengths"],
                olens=olens,
                spk_logits=spk_logits,
                spk_ids=spk_id, )

            loss_fs2 = l1_loss_fs2 + ssim_loss_fs2 + duration_loss + pitch_loss + energy_loss + speaker_loss

            self.optimizer_fs2.clear_grad()
            loss_fs2.backward()
            self.optimizer_fs2.step()

            report("train/loss_fs2", float(loss_fs2))
            report("train/l1_loss_fs2", float(l1_loss_fs2))
            report("train/ssim_loss_fs2", float(ssim_loss_fs2))
            report("train/duration_loss", float(duration_loss))
            report("train/pitch_loss", float(pitch_loss))

            losses_dict["l1_loss_fs2"] = float(l1_loss_fs2)
            losses_dict["ssim_loss_fs2"] = float(ssim_loss_fs2)
            losses_dict["duration_loss"] = float(duration_loss)
            losses_dict["pitch_loss"] = float(pitch_loss)

            if speaker_loss != 0.:
                report("train/speaker_loss", float(speaker_loss))
                losses_dict["speaker_loss"] = float(speaker_loss)
            if energy_loss != 0.:
                report("train/energy_loss", float(energy_loss))
                losses_dict["energy_loss"] = float(energy_loss)

            losses_dict["loss_fs2"] = float(loss_fs2)
            self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                                  for k, v in losses_dict.items())

        # Then only train diffusion module, freeze fastspeech2 parameters.
        if self.state.iteration > self.ds_train_start_steps:
            for param in self.model.fs2.parameters():
                param.trainable = False if self.only_train_diffusion else True

            noise_pred, noise_target, mel_masks = self.model(
                text=batch["text"],
                note=batch["note"],
                note_dur=batch["note_dur"],
                is_slur=batch["is_slur"],
                text_lengths=batch["text_lengths"],
                speech=batch["speech"],
                speech_lengths=batch["speech_lengths"],
                durations=batch["durations"],
                pitch=batch["pitch"],
                energy=batch["energy"],
                spk_id=spk_id,
                spk_emb=spk_emb,
                only_train_fs2=False, )

            noise_pred = noise_pred.transpose((0, 2, 1))
            noise_target = noise_target.transpose((0, 2, 1))
            mel_masks = mel_masks.transpose((0, 2, 1))
            l1_loss_ds = self.criterion_ds(
                noise_pred=noise_pred,
                noise_target=noise_target,
                mel_masks=mel_masks, )

            loss_ds = l1_loss_ds

            self.optimizer_ds.clear_grad()
            loss_ds.backward()
            self.optimizer_ds.step()

            report("train/loss_ds", float(loss_ds))
            report("train/l1_loss_ds", float(l1_loss_ds))
            losses_dict["l1_loss_ds"] = float(l1_loss_ds)
            losses_dict["loss_ds"] = float(loss_ds)
            self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                                  for k, v in losses_dict.items())

        self.logger.info(self.msg)


class DiffSingerEvaluator(StandardEvaluator):
    def __init__(
            self,
            model: Layer,
            criterions: Dict[str, Layer],
            dataloader: DataLoader,
            output_dir: Path=None, ):
        super().__init__(model, dataloader)
        self.model = model._layers if isinstance(model,
                                                 paddle.DataParallel) else model

        self.criterions = criterions
        self.criterion_fs2 = criterions['fs2']
        self.criterion_ds = criterions['ds']
        self.dataloader = dataloader

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        self.msg = "Evaluate: "
        losses_dict = {}
        # spk_id!=None in multiple spk diffsinger 
        spk_id = batch["spk_id"] if "spk_id" in batch else None
        spk_emb = batch["spk_emb"] if "spk_emb" in batch else None
        if spk_emb is not None:
            spk_id = None

        # Here show fastspeech2 eval 
        before_outs, after_outs, d_outs, p_outs, e_outs, ys, olens, spk_logits = self.model(
            text=batch["text"],
            note=batch["note"],
            note_dur=batch["note_dur"],
            is_slur=batch["is_slur"],
            text_lengths=batch["text_lengths"],
            speech=batch["speech"],
            speech_lengths=batch["speech_lengths"],
            durations=batch["durations"],
            pitch=batch["pitch"],
            energy=batch["energy"],
            spk_id=spk_id,
            spk_emb=spk_emb,
            only_train_fs2=True, )

        l1_loss_fs2, ssim_loss_fs2, duration_loss, pitch_loss, energy_loss, speaker_loss = self.criterion_fs2(
            after_outs=after_outs,
            before_outs=before_outs,
            d_outs=d_outs,
            p_outs=p_outs,
            e_outs=e_outs,
            ys=ys,
            ds=batch["durations"],
            ps=batch["pitch"],
            es=batch["energy"],
            ilens=batch["text_lengths"],
            olens=olens,
            spk_logits=spk_logits,
            spk_ids=spk_id, )

        loss_fs2 = l1_loss_fs2 + ssim_loss_fs2 + duration_loss + pitch_loss + energy_loss + speaker_loss

        report("eval/loss_fs2", float(loss_fs2))
        report("eval/l1_loss_fs2", float(l1_loss_fs2))
        report("eval/ssim_loss_fs2", float(ssim_loss_fs2))
        report("eval/duration_loss", float(duration_loss))
        report("eval/pitch_loss", float(pitch_loss))

        losses_dict["l1_loss_fs2"] = float(l1_loss_fs2)
        losses_dict["ssim_loss_fs2"] = float(ssim_loss_fs2)
        losses_dict["duration_loss"] = float(duration_loss)
        losses_dict["pitch_loss"] = float(pitch_loss)

        if speaker_loss != 0.:
            report("eval/speaker_loss", float(speaker_loss))
            losses_dict["speaker_loss"] = float(speaker_loss)
        if energy_loss != 0.:
            report("eval/energy_loss", float(energy_loss))
            losses_dict["energy_loss"] = float(energy_loss)

        losses_dict["loss_fs2"] = float(loss_fs2)

        # Here show diffusion eval
        noise_pred, noise_target, mel_masks = self.model(
            text=batch["text"],
            note=batch["note"],
            note_dur=batch["note_dur"],
            is_slur=batch["is_slur"],
            text_lengths=batch["text_lengths"],
            speech=batch["speech"],
            speech_lengths=batch["speech_lengths"],
            durations=batch["durations"],
            pitch=batch["pitch"],
            energy=batch["energy"],
            spk_id=spk_id,
            spk_emb=spk_emb,
            only_train_fs2=False, )

        noise_pred = noise_pred.transpose((0, 2, 1))
        noise_target = noise_target.transpose((0, 2, 1))
        mel_masks = mel_masks.transpose((0, 2, 1))
        l1_loss_ds = self.criterion_ds(
            noise_pred=noise_pred,
            noise_target=noise_target,
            mel_masks=mel_masks, )

        loss_ds = l1_loss_ds

        report("eval/loss_ds", float(loss_ds))
        report("eval/l1_loss_ds", float(l1_loss_ds))
        losses_dict["l1_loss_ds"] = float(l1_loss_ds)
        losses_dict["loss_ds"] = float(loss_ds)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())

        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/diffsinger/fastspeech2midi.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from typing import Any
from typing import Dict
from typing import Sequence
from typing import Tuple

import paddle
from paddle import nn
from typeguard import typechecked

from paddlespeech.t2s.models.fastspeech2 import FastSpeech2
from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Loss
from paddlespeech.t2s.modules.losses import ssim
from paddlespeech.t2s.modules.masked_fill import masked_fill
from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask
from paddlespeech.t2s.modules.nets_utils import make_pad_mask


class FastSpeech2MIDI(FastSpeech2):
    """The Fastspeech2 module of DiffSinger.
    """

    @typechecked
    def __init__(
            self,
            # fastspeech2 network structure related
            idim: int,
            odim: int,
            fastspeech2_params: Dict[str, Any],
            # note emb
            note_num: int=300,
            # is_slur emb
            is_slur_num: int=2,
            use_energy_pred: bool=False,
            use_postnet: bool=False, ):
        """Initialize FastSpeech2 module for svs.
        Args:
            fastspeech2_params (Dict):
                The config of FastSpeech2 module on DiffSinger model
            note_num (Optional[int]): 
                Number of note. If not None, assume that the
                note_ids will be provided as the input and use note_embedding_table.
            is_slur_num (Optional[int]): 
                Number of note. If not None, assume that the
                is_slur_ids will be provided as the input
    
        """
        super().__init__(idim=idim, odim=odim, **fastspeech2_params)
        self.use_energy_pred = use_energy_pred
        self.use_postnet = use_postnet
        if not self.use_postnet:
            self.postnet = None

        self.note_embed_dim = self.is_slur_embed_dim = fastspeech2_params[
            "adim"]

        # note_ embed
        self.note_embedding_table = nn.Embedding(
            num_embeddings=note_num,
            embedding_dim=self.note_embed_dim,
            padding_idx=self.padding_idx)
        self.note_dur_layer = nn.Linear(1, self.note_embed_dim)

        # slur embed
        self.is_slur_embedding_table = nn.Embedding(
            num_embeddings=is_slur_num,
            embedding_dim=self.is_slur_embed_dim,
            padding_idx=self.padding_idx)

    def forward(
            self,
            text: paddle.Tensor,
            note: paddle.Tensor,
            note_dur: paddle.Tensor,
            is_slur: paddle.Tensor,
            text_lengths: paddle.Tensor,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            durations: paddle.Tensor,
            pitch: paddle.Tensor,
            energy: paddle.Tensor,
            spk_emb: paddle.Tensor=None,
            spk_id: paddle.Tensor=None,
    ) -> Tuple[paddle.Tensor, Dict[str, paddle.Tensor], paddle.Tensor]:
        """Calculate forward propagation.

        Args:
            text(Tensor(int64)): 
                Batch of padded token (phone) ids (B, Tmax).
            note(Tensor(int64)): 
                Batch of padded note (element in music score) ids (B, Tmax).
            note_dur(Tensor(float32)): 
                Batch of padded note durations in seconds (element in music score) (B, Tmax).
            is_slur(Tensor(int64)): 
                Batch of padded slur (element in music score) ids (B, Tmax).
            text_lengths(Tensor(int64)): 
                Batch of phone lengths of each input (B,).
            speech(Tensor[float32]): 
                Batch of padded target features (e.g. mel) (B, Lmax, odim).
            speech_lengths(Tensor(int64)): 
                Batch of the lengths of each target features (B,).
            durations(Tensor(int64)): 
                Batch of padded token durations in frame (B, Tmax).
            pitch(Tensor[float32]): 
                Batch of padded frame-averaged pitch (B, Lmax, 1).
            energy(Tensor[float32]): 
                Batch of padded frame-averaged energy (B, Lmax, 1).
            spk_emb(Tensor[float32], optional): 
                Batch of speaker embeddings (B, spk_embed_dim).
            spk_id(Tnesor[int64], optional(int64)): 
                Batch of speaker ids (B,)

        Returns:

        """
        xs = paddle.cast(text, 'int64')
        note = paddle.cast(note, 'int64')
        note_dur = paddle.cast(note_dur, 'float32')
        is_slur = paddle.cast(is_slur, 'int64')
        ilens = paddle.cast(text_lengths, 'int64')
        olens = paddle.cast(speech_lengths, 'int64')
        ds = paddle.cast(durations, 'int64')
        ps = pitch
        es = energy
        ys = speech
        olens = speech_lengths
        if spk_id is not None:
            spk_id = paddle.cast(spk_id, 'int64')
        # forward propagation
        before_outs, after_outs, d_outs, p_outs, e_outs, spk_logits = self._forward(
            xs=xs,
            note=note,
            note_dur=note_dur,
            is_slur=is_slur,
            ilens=ilens,
            olens=olens,
            ds=ds,
            ps=ps,
            es=es,
            is_inference=False,
            spk_emb=spk_emb,
            spk_id=spk_id, )
        # modify mod part of groundtruth
        if self.reduction_factor > 1:
            olens = olens - olens % self.reduction_factor
            max_olen = max(olens)
            ys = ys[:, :max_olen]

        return before_outs, after_outs, d_outs, p_outs, e_outs, ys, olens, spk_logits

    def _forward(
            self,
            xs: paddle.Tensor,
            note: paddle.Tensor,
            note_dur: paddle.Tensor,
            is_slur: paddle.Tensor,
            ilens: paddle.Tensor,
            olens: paddle.Tensor=None,
            ds: paddle.Tensor=None,
            ps: paddle.Tensor=None,
            es: paddle.Tensor=None,
            is_inference: bool=False,
            is_train_diffusion: bool=False,
            return_after_enc=False,
            alpha: float=1.0,
            spk_emb=None,
            spk_id=None, ) -> Sequence[paddle.Tensor]:

        before_outs = after_outs = d_outs = p_outs = e_outs = spk_logits = None
        # forward encoder
        masks = self._source_mask(ilens)
        note_emb = self.note_embedding_table(note)
        note_dur_emb = self.note_dur_layer(paddle.unsqueeze(note_dur, axis=-1))
        is_slur_emb = self.is_slur_embedding_table(is_slur)

        # (B, Tmax, adim)
        hs, _ = self.encoder(
            xs=xs,
            masks=masks,
            note_emb=note_emb,
            note_dur_emb=note_dur_emb,
            is_slur_emb=is_slur_emb, )

        if self.spk_num and self.enable_speaker_classifier and not is_inference:
            hs_for_spk_cls = self.grad_reverse(hs)
            spk_logits = self.speaker_classifier(hs_for_spk_cls, ilens)
        else:
            spk_logits = None

        # integrate speaker embedding
        if self.spk_embed_dim is not None:
            # spk_emb has a higher priority than spk_id
            if spk_emb is not None:
                hs = self._integrate_with_spk_embed(hs, spk_emb)
            elif spk_id is not None:
                spk_emb = self.spk_embedding_table(spk_id)
                hs = self._integrate_with_spk_embed(hs, spk_emb)

        # forward duration predictor (phone-level) and variance predictors (frame-level)
        d_masks = make_pad_mask(ilens)
        if olens is not None:
            pitch_masks = make_pad_mask(olens).unsqueeze(-1)
        else:
            pitch_masks = None

        # inference for decoder input for diffusion
        if is_train_diffusion:
            hs = self.length_regulator(hs, ds, is_inference=False)
            p_outs = self.pitch_predictor(hs.detach(), pitch_masks)
            p_embs = self.pitch_embed(p_outs.transpose((0, 2, 1))).transpose(
                (0, 2, 1))
            hs += p_embs
            if self.use_energy_pred:
                e_outs = self.energy_predictor(hs.detach(), pitch_masks)
                e_embs = self.energy_embed(
                    e_outs.transpose((0, 2, 1))).transpose((0, 2, 1))
                hs += e_embs

        elif is_inference:
            # (B, Tmax)
            if ds is not None:
                d_outs = ds
            else:
                d_outs = self.duration_predictor.inference(hs, d_masks)

            # (B, Lmax, adim)
            hs = self.length_regulator(hs, d_outs, alpha, is_inference=True)

            if ps is not None:
                p_outs = ps
            else:
                if self.stop_gradient_from_pitch_predictor:
                    p_outs = self.pitch_predictor(hs.detach(), pitch_masks)
                else:
                    p_outs = self.pitch_predictor(hs, pitch_masks)
            p_embs = self.pitch_embed(p_outs.transpose((0, 2, 1))).transpose(
                (0, 2, 1))
            hs += p_embs

            if self.use_energy_pred:
                if es is not None:
                    e_outs = es
                else:
                    if self.stop_gradient_from_energy_predictor:
                        e_outs = self.energy_predictor(hs.detach(), pitch_masks)
                    else:
                        e_outs = self.energy_predictor(hs, pitch_masks)
                e_embs = self.energy_embed(
                    e_outs.transpose((0, 2, 1))).transpose((0, 2, 1))
                hs += e_embs

        # training
        else:
            d_outs = self.duration_predictor(hs, d_masks)
            # (B, Lmax, adim)
            hs = self.length_regulator(hs, ds, is_inference=False)
            if self.stop_gradient_from_pitch_predictor:
                p_outs = self.pitch_predictor(hs.detach(), pitch_masks)
            else:
                p_outs = self.pitch_predictor(hs, pitch_masks)
            p_embs = self.pitch_embed(ps.transpose((0, 2, 1))).transpose(
                (0, 2, 1))
            hs += p_embs

            if self.use_energy_pred:
                if self.stop_gradient_from_energy_predictor:
                    e_outs = self.energy_predictor(hs.detach(), pitch_masks)
                else:
                    e_outs = self.energy_predictor(hs, pitch_masks)
                e_embs = self.energy_embed(es.transpose((0, 2, 1))).transpose(
                    (0, 2, 1))
                hs += e_embs

        # forward decoder
        if olens is not None and not is_inference:
            if self.reduction_factor > 1:
                olens_in = paddle.to_tensor(
                    [olen // self.reduction_factor for olen in olens.numpy()])
            else:
                olens_in = olens
            # (B, 1, T)
            h_masks = self._source_mask(olens_in)
        else:
            h_masks = None

        if return_after_enc:
            return hs, h_masks

        if self.decoder_type == 'cnndecoder':
            # remove output masks for dygraph to static graph
            zs = self.decoder(hs, h_masks)
            before_outs = zs
        else:
            # (B, Lmax, adim)
            zs, _ = self.decoder(hs, h_masks)
            # (B, Lmax, odim)
            before_outs = self.feat_out(zs).reshape(
                (paddle.shape(zs)[0], -1, self.odim))

        # postnet -> (B, Lmax//r * r, odim)
        if self.postnet is None:
            after_outs = before_outs
        else:
            after_outs = before_outs + self.postnet(
                before_outs.transpose((0, 2, 1))).transpose((0, 2, 1))

        return before_outs, after_outs, d_outs, p_outs, e_outs, spk_logits

    def encoder_infer(
            self,
            text: paddle.Tensor,
            note: paddle.Tensor,
            note_dur: paddle.Tensor,
            is_slur: paddle.Tensor,
            alpha: float=1.0,
            spk_emb=None,
            spk_id=None,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        xs = paddle.cast(text, 'int64').unsqueeze(0)
        note = paddle.cast(note, 'int64').unsqueeze(0)
        note_dur = paddle.cast(note_dur, 'float32').unsqueeze(0)
        is_slur = paddle.cast(is_slur, 'int64').unsqueeze(0)
        # setup batch axis
        ilens = paddle.shape(xs)[1]

        if spk_emb is not None:
            spk_emb = spk_emb.unsqueeze(0)

        # (1, L, odim)
        # use *_ to avoid bug in dygraph to static graph    
        hs, _ = self._forward(
            xs=xs,
            note=note,
            note_dur=note_dur,
            is_slur=is_slur,
            ilens=ilens,
            is_inference=True,
            return_after_enc=True,
            alpha=alpha,
            spk_emb=spk_emb,
            spk_id=spk_id, )
        return hs

    # get encoder output for diffusion training
    def encoder_infer_batch(
            self,
            text: paddle.Tensor,
            note: paddle.Tensor,
            note_dur: paddle.Tensor,
            is_slur: paddle.Tensor,
            text_lengths: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            ds: paddle.Tensor=None,
            ps: paddle.Tensor=None,
            es: paddle.Tensor=None,
            alpha: float=1.0,
            spk_emb=None,
            spk_id=None, ) -> Tuple[paddle.Tensor, paddle.Tensor]:

        xs = paddle.cast(text, 'int64')
        note = paddle.cast(note, 'int64')
        note_dur = paddle.cast(note_dur, 'float32')
        is_slur = paddle.cast(is_slur, 'int64')
        ilens = paddle.cast(text_lengths, 'int64')
        olens = paddle.cast(speech_lengths, 'int64')

        if spk_emb is not None:
            spk_emb = spk_emb.unsqueeze(0)

        # (1, L, odim)
        # use *_ to avoid bug in dygraph to static graph    
        hs, h_masks = self._forward(
            xs=xs,
            note=note,
            note_dur=note_dur,
            is_slur=is_slur,
            ilens=ilens,
            olens=olens,
            ds=ds,
            ps=ps,
            es=es,
            return_after_enc=True,
            is_train_diffusion=True,
            alpha=alpha,
            spk_emb=spk_emb,
            spk_id=spk_id, )
        return hs, h_masks

    def inference(
            self,
            text: paddle.Tensor,
            note: paddle.Tensor,
            note_dur: paddle.Tensor,
            is_slur: paddle.Tensor,
            durations: paddle.Tensor=None,
            pitch: paddle.Tensor=None,
            energy: paddle.Tensor=None,
            alpha: float=1.0,
            use_teacher_forcing: bool=False,
            spk_emb=None,
            spk_id=None,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Generate the sequence of features given the sequences of characters.

        Args:
            text(Tensor(int64)): 
                Input sequence of characters (T,).
            note(Tensor(int64)): 
                Input note (element in music score) ids (T,).
            note_dur(Tensor(float32)): 
               Input note durations in seconds (element in music score) (T,).
            is_slur(Tensor(int64)): 
                Input slur (element in music score) ids (T,).
            durations(Tensor, optional (int64)): 
                Groundtruth of duration (T,).
            pitch(Tensor, optional): 
                Groundtruth of token-averaged pitch (T, 1).
            energy(Tensor, optional): 
                Groundtruth of token-averaged energy (T, 1).
            alpha(float, optional): 
                Alpha to control the speed.
            use_teacher_forcing(bool, optional): 
                Whether to use teacher forcing.
                If true, groundtruth of duration, pitch and energy will be used.
            spk_emb(Tensor, optional, optional): 
                peaker embedding vector (spk_embed_dim,). (Default value = None)
            spk_id(Tensor, optional(int64), optional): 
                spk ids (1,). (Default value = None)

        Returns:

        """
        xs = paddle.cast(text, 'int64').unsqueeze(0)
        note = paddle.cast(note, 'int64').unsqueeze(0)
        note_dur = paddle.cast(note_dur, 'float32').unsqueeze(0)
        is_slur = paddle.cast(is_slur, 'int64').unsqueeze(0)
        d, p, e = durations, pitch, energy
        # setup batch axis
        ilens = paddle.shape(xs)[1]

        if spk_emb is not None:
            spk_emb = spk_emb.unsqueeze(0)

        if use_teacher_forcing:
            # use groundtruth of duration, pitch, and energy
            ds = d.unsqueeze(0) if d is not None else None
            ps = p.unsqueeze(0) if p is not None else None
            es = e.unsqueeze(0) if e is not None else None

            # (1, L, odim)
            _, outs, d_outs, p_outs, e_outs, _ = self._forward(
                xs=xs,
                note=note,
                note_dur=note_dur,
                is_slur=is_slur,
                ilens=ilens,
                ds=ds,
                ps=ps,
                es=es,
                spk_emb=spk_emb,
                spk_id=spk_id,
                is_inference=True)
        else:
            # (1, L, odim)
            _, outs, d_outs, p_outs, e_outs, _ = self._forward(
                xs=xs,
                note=note,
                note_dur=note_dur,
                is_slur=is_slur,
                ilens=ilens,
                is_inference=True,
                alpha=alpha,
                spk_emb=spk_emb,
                spk_id=spk_id, )

        if e_outs is None:
            e_outs = [None]

        return outs[0], d_outs[0], p_outs[0], e_outs[0]


class FastSpeech2MIDILoss(FastSpeech2Loss):
    """Loss function module for DiffSinger."""

    @typechecked
    def __init__(self, use_masking: bool=True,
                 use_weighted_masking: bool=False):
        """Initialize feed-forward Transformer loss module.
        Args:
            use_masking (bool): 
                Whether to apply masking for padded part in loss calculation.
            use_weighted_masking (bool): 
                Whether to weighted masking in loss calculation.
        """
        super().__init__(use_masking, use_weighted_masking)

    def forward(
            self,
            after_outs: paddle.Tensor,
            before_outs: paddle.Tensor,
            d_outs: paddle.Tensor,
            p_outs: paddle.Tensor,
            e_outs: paddle.Tensor,
            ys: paddle.Tensor,
            ds: paddle.Tensor,
            ps: paddle.Tensor,
            es: paddle.Tensor,
            ilens: paddle.Tensor,
            olens: paddle.Tensor,
            spk_logits: paddle.Tensor=None,
            spk_ids: paddle.Tensor=None,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor,
               paddle.Tensor, ]:
        """Calculate forward propagation.

        Args:
            after_outs(Tensor):  
                Batch of outputs after postnets (B, Lmax, odim).
            before_outs(Tensor): 
                Batch of outputs before postnets (B, Lmax, odim).
            d_outs(Tensor): 
                Batch of outputs of duration predictor (B, Tmax).
            p_outs(Tensor): 
                Batch of outputs of pitch predictor (B, Lmax, 1).
            e_outs(Tensor): 
                Batch of outputs of energy predictor (B, Lmax, 1).
            ys(Tensor): 
                Batch of target features (B, Lmax, odim).
            ds(Tensor): 
                Batch of durations (B, Tmax).
            ps(Tensor): 
                Batch of target frame-averaged pitch (B, Lmax, 1).
            es(Tensor): 
                Batch of target frame-averaged energy (B, Lmax, 1).
            ilens(Tensor): 
                Batch of the lengths of each input (B,).
            olens(Tensor): 
                Batch of the lengths of each target (B,).
            spk_logits(Option[Tensor]):
                Batch of outputs after speaker classifier (B, Lmax, num_spk)
            spk_ids(Option[Tensor]):
                Batch of target spk_id (B,)
        

        Returns:

        
        """
        l1_loss = duration_loss = pitch_loss = energy_loss = speaker_loss = ssim_loss = 0.0

        # apply mask to remove padded part
        if self.use_masking:
            # make feature for ssim loss
            out_pad_masks = make_pad_mask(olens).unsqueeze(-1)
            before_outs_ssim = masked_fill(before_outs, out_pad_masks, 0.0)
            if not paddle.equal_all(after_outs, before_outs):
                after_outs_ssim = masked_fill(after_outs, out_pad_masks, 0.0)
            ys_ssim = masked_fill(ys, out_pad_masks, 0.0)

            out_masks = make_non_pad_mask(olens).unsqueeze(-1)
            before_outs = before_outs.masked_select(
                out_masks.broadcast_to(before_outs.shape))
            if not paddle.equal_all(after_outs, before_outs):
                after_outs = after_outs.masked_select(
                    out_masks.broadcast_to(after_outs.shape))
            ys = ys.masked_select(out_masks.broadcast_to(ys.shape))
            duration_masks = make_non_pad_mask(ilens)
            d_outs = d_outs.masked_select(
                duration_masks.broadcast_to(d_outs.shape))
            ds = ds.masked_select(duration_masks.broadcast_to(ds.shape))
            pitch_masks = out_masks
            p_outs = p_outs.masked_select(
                pitch_masks.broadcast_to(p_outs.shape))
            ps = ps.masked_select(pitch_masks.broadcast_to(ps.shape))
            if e_outs is not None:
                e_outs = e_outs.masked_select(
                    pitch_masks.broadcast_to(e_outs.shape))
                es = es.masked_select(pitch_masks.broadcast_to(es.shape))

            if spk_logits is not None and spk_ids is not None:
                batch_size = spk_ids.shape[0]
                spk_ids = paddle.repeat_interleave(spk_ids, spk_logits.shape[1],
                                                   None)
                spk_logits = paddle.reshape(spk_logits,
                                            [-1, spk_logits.shape[-1]])
                mask_index = spk_logits.abs().sum(axis=1) != 0
                spk_ids = spk_ids[mask_index]
                spk_logits = spk_logits[mask_index]

        # calculate loss
        l1_loss = self.l1_criterion(before_outs, ys)
        ssim_loss = 1.0 - ssim(
            before_outs_ssim.unsqueeze(1), ys_ssim.unsqueeze(1))
        if not paddle.equal_all(after_outs, before_outs):
            l1_loss += self.l1_criterion(after_outs, ys)
            ssim_loss += (
                1.0 - ssim(after_outs_ssim.unsqueeze(1), ys_ssim.unsqueeze(1)))
        l1_loss = l1_loss * 0.5
        ssim_loss = ssim_loss * 0.5

        duration_loss = self.duration_criterion(d_outs, ds)
        pitch_loss = self.l1_criterion(p_outs, ps)
        if e_outs is not None:
            energy_loss = self.l1_criterion(e_outs, es)

        if spk_logits is not None and spk_ids is not None:
            speaker_loss = self.ce_criterion(spk_logits, spk_ids) / batch_size

        # make weighted mask and apply it
        if self.use_weighted_masking:
            out_masks = make_non_pad_mask(olens).unsqueeze(-1)
            out_weights = out_masks.cast(dtype=paddle.float32) / out_masks.cast(
                dtype=paddle.float32).sum(
                    axis=1, keepdim=True)
            out_weights /= ys.shape[0] * ys.shape[2]
            duration_masks = make_non_pad_mask(ilens)
            duration_weights = (duration_masks.cast(dtype=paddle.float32) /
                                duration_masks.cast(dtype=paddle.float32).sum(
                                    axis=1, keepdim=True))
            duration_weights /= ds.shape[0]

            # apply weight
            l1_loss = l1_loss.multiply(out_weights)
            l1_loss = l1_loss.masked_select(
                out_masks.broadcast_to(l1_loss.shape)).sum()
            ssim_loss = ssim_loss.multiply(out_weights)
            ssim_loss = ssim_loss.masked_select(
                out_masks.broadcast_to(ssim_loss.shape)).sum()
            duration_loss = (duration_loss.multiply(duration_weights)
                             .masked_select(duration_masks).sum())
            pitch_masks = out_masks
            pitch_weights = out_weights
            pitch_loss = pitch_loss.multiply(pitch_weights)
            pitch_loss = pitch_loss.masked_select(
                pitch_masks.broadcast_to(pitch_loss.shape)).sum()
            if e_outs is not None:
                energy_loss = energy_loss.multiply(pitch_weights)
                energy_loss = energy_loss.masked_select(
                    pitch_masks.broadcast_to(energy_loss.shape)).sum()

        return l1_loss, ssim_loss, duration_loss, pitch_loss, energy_loss, speaker_loss


================================================
FILE: paddlespeech/t2s/models/ernie_sat/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .ernie_sat import *
from .ernie_sat_updater import *


================================================
FILE: paddlespeech/t2s/models/ernie_sat/ernie_sat.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict
from typing import List
from typing import Optional

import paddle
from paddle import nn

from paddlespeech.t2s.modules.activation import get_activation
from paddlespeech.t2s.modules.conformer.convolution import ConvolutionModule
from paddlespeech.t2s.modules.conformer.encoder_layer import EncoderLayer
from paddlespeech.t2s.modules.layer_norm import LayerNorm
from paddlespeech.t2s.modules.masked_fill import masked_fill
from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.tacotron2.decoder import Postnet
from paddlespeech.t2s.modules.transformer.attention import LegacyRelPositionMultiHeadedAttention
from paddlespeech.t2s.modules.transformer.attention import MultiHeadedAttention
from paddlespeech.t2s.modules.transformer.attention import RelPositionMultiHeadedAttention
from paddlespeech.t2s.modules.transformer.embedding import LegacyRelPositionalEncoding
from paddlespeech.t2s.modules.transformer.embedding import PositionalEncoding
from paddlespeech.t2s.modules.transformer.embedding import RelPositionalEncoding
from paddlespeech.t2s.modules.transformer.embedding import ScaledPositionalEncoding
from paddlespeech.t2s.modules.transformer.multi_layer_conv import Conv1dLinear
from paddlespeech.t2s.modules.transformer.multi_layer_conv import MultiLayeredConv1d
from paddlespeech.t2s.modules.transformer.positionwise_feed_forward import PositionwiseFeedForward
from paddlespeech.t2s.modules.transformer.repeat import repeat
from paddlespeech.t2s.modules.transformer.subsampling import Conv2dSubsampling


# MLM -> Mask Language Model
class mySequential(nn.Sequential):
    def forward(self, *inputs):
        for module in self._sub_layers.values():
            if type(inputs) == tuple:
                inputs = module(*inputs)
            else:
                inputs = module(inputs)
        return inputs


class MaskInputLayer(nn.Layer):
    def __init__(self, out_features: int) -> None:
        super().__init__()
        self.mask_feature = paddle.create_parameter(
            shape=(1, 1, out_features),
            dtype=paddle.float32,
            default_initializer=paddle.nn.initializer.Assign(
                paddle.normal(shape=(1, 1, out_features))))

    def forward(self, input: paddle.Tensor,
                masked_pos: paddle.Tensor=None) -> paddle.Tensor:
        masked_pos = paddle.expand_as(paddle.unsqueeze(masked_pos, -1), input)
        masked_input = masked_fill(input, masked_pos, 0) + masked_fill(
            paddle.expand_as(self.mask_feature, input), ~masked_pos, 0)
        return masked_input


class MLMEncoder(nn.Layer):
    """Conformer encoder module.

    Args:
        idim (int): 
            Input dimension.
        attention_dim (int): 
            Dimension of attention.
        attention_heads (int): 
            The number of heads of multi head attention.
        linear_units (int): 
            The number of units of position-wise feed forward.
        num_blocks (int): 
            The number of decoder blocks.
        dropout_rate (float): 
            Dropout rate.
        positional_dropout_rate (float): 
            Dropout rate after adding positional encoding.
        attention_dropout_rate (float): 
            Dropout rate in attention.
        input_layer (Union[str, paddle.nn.Layer]): 
            Input layer type.
        normalize_before (bool): 
            Whether to use layer_norm before the first block.
        concat_after (bool): 
            Whether to concat attention layer's input and output.
            if True, additional linear will be applied.
            i.e. x -> x + linear(concat(x, att(x)))
            if False, no additional linear will be applied. i.e. x -> x + att(x)
        positionwise_layer_type (str): 
            "linear", "conv1d", or "conv1d-linear".
        positionwise_conv_kernel_size (int): 
            Kernel size of positionwise conv1d layer.
        macaron_style (bool): 
            Whether to use macaron style for positionwise layer.
        pos_enc_layer_type (str): 
            Encoder positional encoding layer type.
        selfattention_layer_type (str): 
            Encoder attention layer type.
        activation_type (str): 
            Encoder activation function type.
        use_cnn_module (bool): 
            Whether to use convolution module.
        zero_triu (bool): 
            Whether to zero the upper triangular part of attention matrix.
        cnn_module_kernel (int): 
            Kernerl size of convolution module.
        padding_idx (int): 
            Padding idx for input_layer=embed.
        stochastic_depth_rate (float): 
            Maximum probability to skip the encoder layer.

    """

    def __init__(self,
                 idim: int,
                 vocab_size: int=0,
                 pre_speech_layer: int=0,
                 attention_dim: int=256,
                 attention_heads: int=4,
                 linear_units: int=2048,
                 num_blocks: int=6,
                 dropout_rate: float=0.1,
                 positional_dropout_rate: float=0.1,
                 attention_dropout_rate: float=0.0,
                 input_layer: str="conv2d",
                 normalize_before: bool=True,
                 concat_after: bool=False,
                 positionwise_layer_type: str="linear",
                 positionwise_conv_kernel_size: int=1,
                 macaron_style: bool=False,
                 pos_enc_layer_type: str="abs_pos",
                 pos_enc_class=None,
                 selfattention_layer_type: str="selfattn",
                 activation_type: str="swish",
                 use_cnn_module: bool=False,
                 zero_triu: bool=False,
                 cnn_module_kernel: int=31,
                 padding_idx: int=-1,
                 stochastic_depth_rate: float=0.0,
                 text_masking: bool=False):
        """Construct an Encoder object."""
        super().__init__()
        self._output_size = attention_dim
        self.text_masking = text_masking
        if self.text_masking:
            self.text_masking_layer = MaskInputLayer(attention_dim)
        activation = get_activation(activation_type)
        if pos_enc_layer_type == "abs_pos":
            pos_enc_class = PositionalEncoding
        elif pos_enc_layer_type == "scaled_abs_pos":
            pos_enc_class = ScaledPositionalEncoding
        elif pos_enc_layer_type == "rel_pos":
            assert selfattention_layer_type == "rel_selfattn"
            pos_enc_class = RelPositionalEncoding
        elif pos_enc_layer_type == "legacy_rel_pos":
            pos_enc_class = LegacyRelPositionalEncoding
            assert selfattention_layer_type == "legacy_rel_selfattn"
        else:
            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)

        self.conv_subsampling_factor = 1
        if input_layer == "linear":
            self.embed = nn.Sequential(
                nn.Linear(idim, attention_dim),
                nn.LayerNorm(attention_dim),
                nn.Dropout(dropout_rate),
                nn.ReLU(),
                pos_enc_class(attention_dim, positional_dropout_rate), )
        elif input_layer == "conv2d":
            self.embed = Conv2dSubsampling(
                idim,
                attention_dim,
                dropout_rate,
                pos_enc_class(attention_dim, positional_dropout_rate), )
            self.conv_subsampling_factor = 4
        elif input_layer == "embed":
            self.embed = nn.Sequential(
                nn.Embedding(idim, attention_dim, padding_idx=padding_idx),
                pos_enc_class(attention_dim, positional_dropout_rate), )
        elif input_layer == "mlm":
            self.segment_emb = None
            self.speech_embed = mySequential(
                MaskInputLayer(idim),
                nn.Linear(idim, attention_dim),
                nn.LayerNorm(attention_dim),
                nn.ReLU(),
                pos_enc_class(attention_dim, positional_dropout_rate))
            self.text_embed = nn.Sequential(
                nn.Embedding(
                    vocab_size, attention_dim, padding_idx=padding_idx),
                pos_enc_class(attention_dim, positional_dropout_rate), )
        elif input_layer == "sega_mlm":
            self.segment_emb = nn.Embedding(
                500, attention_dim, padding_idx=padding_idx)
            self.speech_embed = mySequential(
                MaskInputLayer(idim),
                nn.Linear(idim, attention_dim),
                nn.LayerNorm(attention_dim),
                nn.ReLU(),
                pos_enc_class(attention_dim, positional_dropout_rate))
            self.text_embed = nn.Sequential(
                nn.Embedding(
                    vocab_size, attention_dim, padding_idx=padding_idx),
                pos_enc_class(attention_dim, positional_dropout_rate), )
        elif isinstance(input_layer, nn.Layer):
            self.embed = nn.Sequential(
                input_layer,
                pos_enc_class(attention_dim, positional_dropout_rate), )
        elif input_layer is None:
            self.embed = nn.Sequential(
                pos_enc_class(attention_dim, positional_dropout_rate))
        else:
            raise ValueError("unknown input_layer: " + input_layer)
        self.normalize_before = normalize_before

        # self-attention module definition
        if selfattention_layer_type == "selfattn":
            encoder_selfattn_layer = MultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, attention_dim,
                                           attention_dropout_rate, )
        elif selfattention_layer_type == "legacy_rel_selfattn":
            assert pos_enc_layer_type == "legacy_rel_pos"
            encoder_selfattn_layer = LegacyRelPositionMultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, attention_dim,
                                           attention_dropout_rate, )
        elif selfattention_layer_type == "rel_selfattn":
            assert pos_enc_layer_type == "rel_pos"
            encoder_selfattn_layer = RelPositionMultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, attention_dim,
                                           attention_dropout_rate, zero_triu, )
        else:
            raise ValueError("unknown encoder_attn_layer: " +
                             selfattention_layer_type)

        # feed-forward module definition
        if positionwise_layer_type == "linear":
            positionwise_layer = PositionwiseFeedForward
            positionwise_layer_args = (attention_dim, linear_units,
                                       dropout_rate, activation, )
        elif positionwise_layer_type == "conv1d":
            positionwise_layer = MultiLayeredConv1d
            positionwise_layer_args = (attention_dim, linear_units,
                                       positionwise_conv_kernel_size,
                                       dropout_rate, )
        elif positionwise_layer_type == "conv1d-linear":
            positionwise_layer = Conv1dLinear
            positionwise_layer_args = (attention_dim, linear_units,
                                       positionwise_conv_kernel_size,
                                       dropout_rate, )
        else:
            raise NotImplementedError("Support only linear or conv1d.")

        # convolution module definition
        convolution_layer = ConvolutionModule
        convolution_layer_args = (attention_dim, cnn_module_kernel, activation)

        self.encoders = repeat(
            num_blocks,
            lambda lnum: EncoderLayer(
                attention_dim,
                encoder_selfattn_layer(*encoder_selfattn_layer_args),
                positionwise_layer(*positionwise_layer_args),
                positionwise_layer(*positionwise_layer_args) if macaron_style else None,
                convolution_layer(*convolution_layer_args) if use_cnn_module else None,
                dropout_rate,
                normalize_before,
                concat_after,
                stochastic_depth_rate * float(1 + lnum) / num_blocks, ), )
        self.pre_speech_layer = pre_speech_layer
        self.pre_speech_encoders = repeat(
            self.pre_speech_layer,
            lambda lnum: EncoderLayer(
                attention_dim,
                encoder_selfattn_layer(*encoder_selfattn_layer_args),
                positionwise_layer(*positionwise_layer_args),
                positionwise_layer(*positionwise_layer_args) if macaron_style else None,
                convolution_layer(*convolution_layer_args) if use_cnn_module else None,
                dropout_rate,
                normalize_before,
                concat_after,
                stochastic_depth_rate * float(1 + lnum) / self.pre_speech_layer, ),
        )
        if self.normalize_before:
            self.after_norm = LayerNorm(attention_dim)

    def forward(self,
                speech: paddle.Tensor,
                text: paddle.Tensor,
                masked_pos: paddle.Tensor,
                speech_mask: paddle.Tensor=None,
                text_mask: paddle.Tensor=None,
                speech_seg_pos: paddle.Tensor=None,
                text_seg_pos: paddle.Tensor=None):
        """Encode input sequence.

        """
        if masked_pos is not None:
            speech = self.speech_embed(speech, masked_pos)
        else:
            speech = self.speech_embed(speech)
        if text is not None:
            text = self.text_embed(text)
        if speech_seg_pos is not None and text_seg_pos is not None and self.segment_emb:
            speech_seg_emb = self.segment_emb(speech_seg_pos)
            text_seg_emb = self.segment_emb(text_seg_pos)
            text = (text[0] + text_seg_emb, text[1])
            speech = (speech[0] + speech_seg_emb, speech[1])
        if self.pre_speech_encoders:
            speech, _ = self.pre_speech_encoders(speech, speech_mask)

        if text is not None:
            xs = paddle.concat([speech[0], text[0]], axis=1)
            xs_pos_emb = paddle.concat([speech[1], text[1]], axis=1)
            masks = paddle.concat([speech_mask, text_mask], axis=-1)
        else:
            xs = speech[0]
            xs_pos_emb = speech[1]
            masks = speech_mask

        xs, masks = self.encoders((xs, xs_pos_emb), masks)

        if isinstance(xs, tuple):
            xs = xs[0]
        if self.normalize_before:
            xs = self.after_norm(xs)

        return xs, masks


class MLMDecoder(MLMEncoder):
    def forward(self, xs: paddle.Tensor, masks: paddle.Tensor):
        """Encode input sequence.

        Args:
            xs (paddle.Tensor): 
                Input tensor (#batch, time, idim).
            masks (paddle.Tensor): 
                Mask tensor (#batch, time).

        Returns:
            paddle.Tensor: 
                Output tensor (#batch, time, attention_dim).
            paddle.Tensor: 
                Mask tensor (#batch, time).

        """
        xs = self.embed(xs)
        xs, masks = self.encoders(xs, masks)

        if isinstance(xs, tuple):
            xs = xs[0]
        if self.normalize_before:
            xs = self.after_norm(xs)

        return xs, masks


# encoder and decoder is nn.Layer, not str
class MLM(nn.Layer):
    def __init__(self,
                 odim: int,
                 encoder: nn.Layer,
                 decoder: Optional[nn.Layer],
                 postnet_layers: int=0,
                 postnet_chans: int=0,
                 postnet_filts: int=0,
                 text_masking: bool=False):

        super().__init__()
        self.odim = odim
        self.encoder = encoder
        self.decoder = decoder
        self.vocab_size = encoder.text_embed[0]._num_embeddings

        if self.decoder is None or not (hasattr(self.decoder,
                                                'output_layer') and
                                        self.decoder.output_layer is not None):
            self.sfc = nn.Linear(self.encoder._output_size, odim)
        else:
            self.sfc = None
        if text_masking:
            self.text_sfc = nn.Linear(
                self.encoder.text_embed[0]._embedding_dim,
                self.vocab_size,
                weight_attr=self.encoder.text_embed[0]._weight_attr)
        else:
            self.text_sfc = None

        self.postnet = (None if postnet_layers == 0 else Postnet(
            idim=self.encoder._output_size,
            odim=odim,
            n_layers=postnet_layers,
            n_chans=postnet_chans,
            n_filts=postnet_filts,
            use_batch_norm=True,
            dropout_rate=0.5, ))

    def inference(
            self,
            speech: paddle.Tensor,
            text: paddle.Tensor,
            masked_pos: paddle.Tensor,
            speech_mask: paddle.Tensor,
            text_mask: paddle.Tensor,
            speech_seg_pos: paddle.Tensor,
            text_seg_pos: paddle.Tensor,
            span_bdy: List[int],
            use_teacher_forcing: bool=True, ) -> List[paddle.Tensor]:
        '''
        Args:
            speech (paddle.Tensor): 
                input speech (1, Tmax, D).
            text (paddle.Tensor): 
                input text (1, Tmax2).
            masked_pos (paddle.Tensor): 
                masked position of input speech (1, Tmax)
            speech_mask (paddle.Tensor): 
                mask of speech (1, 1, Tmax).
            text_mask (paddle.Tensor): 
                mask of text (1, 1, Tmax2).
            speech_seg_pos (paddle.Tensor): 
                n-th phone of each mel, 0<=n<=Tmax2 (1, Tmax).
            text_seg_pos (paddle.Tensor): 
                n-th phone of each phone, 0<=n<=Tmax2 (1, Tmax2).
            span_bdy (List[int]): 
                masked mel boundary of input speech (2,)
            use_teacher_forcing (bool): 
                whether to use teacher forcing
        Returns:
            List[Tensor]:
                eg: [Tensor(shape=[1, 181, 80]), Tensor(shape=[80, 80]), Tensor(shape=[1, 67, 80])]
        '''

        z_cache = None
        if use_teacher_forcing:
            before_outs, zs, *_ = self.forward(
                speech=speech,
                text=text,
                masked_pos=masked_pos,
                speech_mask=speech_mask,
                text_mask=text_mask,
                speech_seg_pos=speech_seg_pos,
                text_seg_pos=text_seg_pos)
            if zs is None:
                zs = before_outs

            speech = speech.squeeze(0)
            outs = [speech[:span_bdy[0]]]
            outs += [zs[0][span_bdy[0]:span_bdy[1]]]
            outs += [speech[span_bdy[1]:]]
            return outs
        return None


class MLMEncAsDecoder(MLM):
    def forward(self,
                speech: paddle.Tensor,
                text: paddle.Tensor,
                masked_pos: paddle.Tensor,
                speech_mask: paddle.Tensor,
                text_mask: paddle.Tensor,
                speech_seg_pos: paddle.Tensor,
                text_seg_pos: paddle.Tensor):
        # feats: (Batch, Length, Dim)
        # -> encoder_out: (Batch, Length2, Dim2)
        encoder_out, h_masks = self.encoder(
            speech=speech,
            text=text,
            masked_pos=masked_pos,
            speech_mask=speech_mask,
            text_mask=text_mask,
            speech_seg_pos=speech_seg_pos,
            text_seg_pos=text_seg_pos)
        if self.decoder is not None:
            zs, _ = self.decoder(encoder_out, h_masks)
        else:
            zs = encoder_out
        speech_hidden_states = zs[:, :paddle.shape(speech)[1], :]
        if self.sfc is not None:
            before_outs = paddle.reshape(
                self.sfc(speech_hidden_states),
                (paddle.shape(speech_hidden_states)[0], -1, self.odim))
        else:
            before_outs = speech_hidden_states
        if self.postnet is not None:
            after_outs = before_outs + paddle.transpose(
                self.postnet(paddle.transpose(before_outs, [0, 2, 1])),
                [0, 2, 1])
        else:
            after_outs = None
        return before_outs, after_outs, None


class MLMDualMaksing(MLM):
    def forward(self,
                speech: paddle.Tensor,
                text: paddle.Tensor,
                masked_pos: paddle.Tensor,
                speech_mask: paddle.Tensor,
                text_mask: paddle.Tensor,
                speech_seg_pos: paddle.Tensor,
                text_seg_pos: paddle.Tensor):
        # feats: (Batch, Length, Dim)
        # -> encoder_out: (Batch, Length2, Dim2)
        encoder_out, h_masks = self.encoder(
            speech=speech,
            text=text,
            masked_pos=masked_pos,
            speech_mask=speech_mask,
            text_mask=text_mask,
            speech_seg_pos=speech_seg_pos,
            text_seg_pos=text_seg_pos)
        if self.decoder is not None:
            zs, _ = self.decoder(encoder_out, h_masks)
        else:
            zs = encoder_out
        speech_hidden_states = zs[:, :paddle.shape(speech)[1], :]
        if self.text_sfc:
            text_hiddent_states = zs[:, paddle.shape(speech)[1]:, :]
            text_outs = paddle.reshape(
                self.text_sfc(text_hiddent_states),
                (paddle.shape(text_hiddent_states)[0], -1, self.vocab_size))
        if self.sfc is not None:
            before_outs = paddle.reshape(
                self.sfc(speech_hidden_states),
                (paddle.shape(speech_hidden_states)[0], -1, self.odim))
        else:
            before_outs = speech_hidden_states
        if self.postnet is not None:
            after_outs = before_outs + paddle.transpose(
                self.postnet(paddle.transpose(before_outs, [0, 2, 1])),
                [0, 2, 1])
        else:
            after_outs = None
        return before_outs, after_outs, text_outs


class ErnieSAT(nn.Layer):
    def __init__(
            self,
            # network structure related
            idim: int,
            odim: int,
            postnet_layers: int=5,
            postnet_filts: int=5,
            postnet_chans: int=256,
            use_scaled_pos_enc: bool=False,
            encoder_type: str='conformer',
            decoder_type: str='conformer',
            enc_input_layer: str='sega_mlm',
            enc_pre_speech_layer: int=0,
            enc_cnn_module_kernel: int=7,
            enc_attention_dim: int=384,
            enc_attention_heads: int=2,
            enc_linear_units: int=1536,
            enc_num_blocks: int=4,
            enc_dropout_rate: float=0.2,
            enc_positional_dropout_rate: float=0.2,
            enc_attention_dropout_rate: float=0.2,
            enc_normalize_before: bool=True,
            enc_macaron_style: bool=True,
            enc_use_cnn_module: bool=True,
            enc_selfattention_layer_type: str='legacy_rel_selfattn',
            enc_activation_type: str='swish',
            enc_pos_enc_layer_type: str='legacy_rel_pos',
            enc_positionwise_layer_type: str='conv1d',
            enc_positionwise_conv_kernel_size: int=3,
            text_masking: bool=False,
            dec_cnn_module_kernel: int=31,
            dec_attention_dim: int=384,
            dec_attention_heads: int=2,
            dec_linear_units: int=1536,
            dec_num_blocks: int=4,
            dec_dropout_rate: float=0.2,
            dec_positional_dropout_rate: float=0.2,
            dec_attention_dropout_rate: float=0.2,
            dec_macaron_style: bool=True,
            dec_use_cnn_module: bool=True,
            dec_selfattention_layer_type: str='legacy_rel_selfattn',
            dec_activation_type: str='swish',
            dec_pos_enc_layer_type: str='legacy_rel_pos',
            dec_positionwise_layer_type: str='conv1d',
            dec_positionwise_conv_kernel_size: int=3,
            init_type: str="xavier_uniform", ):
        super().__init__()
        # store hyperparameters
        self.odim = odim

        self.use_scaled_pos_enc = use_scaled_pos_enc

        # initialize parameters
        initialize(self, init_type)

        # Encoder
        if encoder_type == "conformer":
            encoder = MLMEncoder(
                idim=odim,
                vocab_size=idim,
                pre_speech_layer=enc_pre_speech_layer,
                attention_dim=enc_attention_dim,
                attention_heads=enc_attention_heads,
                linear_units=enc_linear_units,
                num_blocks=enc_num_blocks,
                dropout_rate=enc_dropout_rate,
                positional_dropout_rate=enc_positional_dropout_rate,
                attention_dropout_rate=enc_attention_dropout_rate,
                input_layer=enc_input_layer,
                normalize_before=enc_normalize_before,
                positionwise_layer_type=enc_positionwise_layer_type,
                positionwise_conv_kernel_size=enc_positionwise_conv_kernel_size,
                macaron_style=enc_macaron_style,
                pos_enc_layer_type=enc_pos_enc_layer_type,
                selfattention_layer_type=enc_selfattention_layer_type,
                activation_type=enc_activation_type,
                use_cnn_module=enc_use_cnn_module,
                cnn_module_kernel=enc_cnn_module_kernel,
                text_masking=text_masking)
        else:
            raise ValueError(f"{encoder_type} is not supported.")

        # Decoder
        if decoder_type != 'no_decoder':
            decoder = MLMDecoder(
                idim=0,
                input_layer=None,
                cnn_module_kernel=dec_cnn_module_kernel,
                attention_dim=dec_attention_dim,
                attention_heads=dec_attention_heads,
                linear_units=dec_linear_units,
                num_blocks=dec_num_blocks,
                dropout_rate=dec_dropout_rate,
                positional_dropout_rate=dec_positional_dropout_rate,
                macaron_style=dec_macaron_style,
                use_cnn_module=dec_use_cnn_module,
                selfattention_layer_type=dec_selfattention_layer_type,
                activation_type=dec_activation_type,
                pos_enc_layer_type=dec_pos_enc_layer_type,
                positionwise_layer_type=dec_positionwise_layer_type,
                positionwise_conv_kernel_size=dec_positionwise_conv_kernel_size)

        else:
            decoder = None

        model_class = MLMDualMaksing if text_masking else MLMEncAsDecoder

        self.model = model_class(
            odim=odim,
            encoder=encoder,
            decoder=decoder,
            postnet_layers=postnet_layers,
            postnet_filts=postnet_filts,
            postnet_chans=postnet_chans,
            text_masking=text_masking)

        nn.initializer.set_global_initializer(None)

    def forward(self,
                speech: paddle.Tensor,
                text: paddle.Tensor,
                masked_pos: paddle.Tensor,
                speech_mask: paddle.Tensor,
                text_mask: paddle.Tensor,
                speech_seg_pos: paddle.Tensor,
                text_seg_pos: paddle.Tensor):
        return self.model(
            speech=speech,
            text=text,
            masked_pos=masked_pos,
            speech_mask=speech_mask,
            text_mask=text_mask,
            speech_seg_pos=speech_seg_pos,
            text_seg_pos=text_seg_pos)

    def inference(
            self,
            speech: paddle.Tensor,
            text: paddle.Tensor,
            masked_pos: paddle.Tensor,
            speech_mask: paddle.Tensor,
            text_mask: paddle.Tensor,
            speech_seg_pos: paddle.Tensor,
            text_seg_pos: paddle.Tensor,
            span_bdy: List[int],
            use_teacher_forcing: bool=True, ) -> Dict[str, paddle.Tensor]:
        return self.model.inference(
            speech=speech,
            text=text,
            masked_pos=masked_pos,
            speech_mask=speech_mask,
            text_mask=text_mask,
            speech_seg_pos=speech_seg_pos,
            text_seg_pos=text_seg_pos,
            span_bdy=span_bdy,
            use_teacher_forcing=use_teacher_forcing)


class ErnieSATInference(nn.Layer):
    def __init__(self, normalizer, model):
        super().__init__()
        self.normalizer = normalizer
        self.acoustic_model = model

    def forward(
            self,
            speech: paddle.Tensor,
            text: paddle.Tensor,
            masked_pos: paddle.Tensor,
            speech_mask: paddle.Tensor,
            text_mask: paddle.Tensor,
            speech_seg_pos: paddle.Tensor,
            text_seg_pos: paddle.Tensor,
            span_bdy: List[int],
            use_teacher_forcing: bool=True, ):
        outs = self.acoustic_model.inference(
            speech=speech,
            text=text,
            masked_pos=masked_pos,
            speech_mask=speech_mask,
            text_mask=text_mask,
            speech_seg_pos=speech_seg_pos,
            text_seg_pos=text_seg_pos,
            span_bdy=span_bdy,
            use_teacher_forcing=use_teacher_forcing)

        normed_mel_pre, normed_mel_masked, normed_mel_post = outs
        logmel_pre = self.normalizer.inverse(normed_mel_pre)
        logmel_masked = self.normalizer.inverse(normed_mel_masked)
        logmel_post = self.normalizer.inverse(normed_mel_post)
        return logmel_pre, logmel_masked, logmel_post


================================================
FILE: paddlespeech/t2s/models/ernie_sat/ernie_sat_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path

from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from paddle.optimizer.lr import LRScheduler

from paddlespeech.t2s.modules.losses import MLMLoss
from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class ErnieSATUpdater(StandardUpdater):
    def __init__(self,
                 model: Layer,
                 optimizer: Optimizer,
                 scheduler: LRScheduler,
                 dataloader: DataLoader,
                 init_state=None,
                 text_masking: bool=False,
                 odim: int=80,
                 vocab_size: int=100,
                 output_dir: Path=None):
        super().__init__(model, optimizer, dataloader, init_state=None)
        self.scheduler = scheduler

        self.criterion = MLMLoss(
            text_masking=text_masking, odim=odim, vocab_size=vocab_size)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}

        before_outs, after_outs, text_outs = self.model(
            speech=batch["speech"],
            text=batch["text"],
            masked_pos=batch["masked_pos"],
            speech_mask=batch["speech_mask"],
            text_mask=batch["text_mask"],
            speech_seg_pos=batch["speech_seg_pos"],
            text_seg_pos=batch["text_seg_pos"])

        mlm_loss, text_mlm_loss = self.criterion(
            speech=batch["speech"],
            before_outs=before_outs,
            after_outs=after_outs,
            masked_pos=batch["masked_pos"],
            text=batch["text"],
            # maybe None
            text_outs=text_outs,
            # maybe None
            text_masked_pos=batch["text_masked_pos"])

        loss = mlm_loss + text_mlm_loss if text_mlm_loss is not None else mlm_loss

        self.optimizer.clear_grad()

        loss.backward()
        self.optimizer.step()
        self.scheduler.step()
        scheduler_msg = 'lr: {}'.format(self.scheduler.last_lr)

        report("train/loss", float(loss))
        report("train/mlm_loss", float(mlm_loss))
        if text_mlm_loss is not None:
            report("train/text_mlm_loss", float(text_mlm_loss))
            losses_dict["text_mlm_loss"] = float(text_mlm_loss)

        losses_dict["mlm_loss"] = float(mlm_loss)
        losses_dict["loss"] = float(loss)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.msg += ', ' + scheduler_msg


class ErnieSATEvaluator(StandardEvaluator):
    def __init__(self,
                 model: Layer,
                 dataloader: DataLoader,
                 text_masking: bool=False,
                 odim: int=80,
                 vocab_size: int=100,
                 output_dir: Path=None):
        super().__init__(model, dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

        self.criterion = MLMLoss(
            text_masking=text_masking, odim=odim, vocab_size=vocab_size)

    def evaluate_core(self, batch):
        self.msg = "Evaluate: "
        losses_dict = {}

        before_outs, after_outs, text_outs = self.model(
            speech=batch["speech"],
            text=batch["text"],
            masked_pos=batch["masked_pos"],
            speech_mask=batch["speech_mask"],
            text_mask=batch["text_mask"],
            speech_seg_pos=batch["speech_seg_pos"],
            text_seg_pos=batch["text_seg_pos"])

        mlm_loss, text_mlm_loss = self.criterion(
            speech=batch["speech"],
            before_outs=before_outs,
            after_outs=after_outs,
            masked_pos=batch["masked_pos"],
            text=batch["text"],
            # maybe None
            text_outs=text_outs,
            # maybe None
            text_masked_pos=batch["text_masked_pos"])
        loss = mlm_loss + text_mlm_loss if text_mlm_loss is not None else mlm_loss

        report("eval/loss", float(loss))
        report("eval/mlm_loss", float(mlm_loss))
        if text_mlm_loss is not None:
            report("eval/text_mlm_loss", float(text_mlm_loss))
            losses_dict["text_mlm_loss"] = float(text_mlm_loss)

        losses_dict["mlm_loss"] = float(mlm_loss)
        losses_dict["loss"] = float(loss)

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/fastspeech2/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .fastspeech2 import *
from .fastspeech2_updater import *


================================================
FILE: paddlespeech/t2s/models/fastspeech2/fastspeech2.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Fastspeech2 related modules for paddle"""
from typing import Dict
from typing import List
from typing import Optional
from typing import Sequence
from typing import Tuple
from typing import Union

import numpy as np
import paddle
import paddle.nn.functional as F
from paddle import nn
from typeguard import typechecked

from paddlespeech.t2s.modules.adversarial_loss.gradient_reversal import GradientReversalLayer
from paddlespeech.t2s.modules.adversarial_loss.speaker_classifier import SpeakerClassifier
from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask
from paddlespeech.t2s.modules.nets_utils import make_pad_mask
from paddlespeech.t2s.modules.predictor.duration_predictor import DurationPredictor
from paddlespeech.t2s.modules.predictor.duration_predictor import DurationPredictorLoss
from paddlespeech.t2s.modules.predictor.length_regulator import LengthRegulator
from paddlespeech.t2s.modules.predictor.variance_predictor import VariancePredictor
from paddlespeech.t2s.modules.tacotron2.decoder import Postnet
from paddlespeech.t2s.modules.transformer.encoder import CNNDecoder
from paddlespeech.t2s.modules.transformer.encoder import CNNPostnet
from paddlespeech.t2s.modules.transformer.encoder import ConformerEncoder
from paddlespeech.t2s.modules.transformer.encoder import TransformerEncoder


class FastSpeech2(nn.Layer):
    """FastSpeech2 module.
    
    This is a module of FastSpeech2 described in `FastSpeech 2: Fast and
    High-Quality End-to-End Text to Speech`_. Instead of quantized pitch and
    energy, we use token-averaged value introduced in `FastPitch: Parallel
    Text-to-speech with Pitch Prediction`_.
    
    .. _`FastSpeech 2: Fast and High-Quality End-to-End Text to Speech`:
        https://arxiv.org/abs/2006.04558
    .. _`FastPitch: Parallel Text-to-speech with Pitch Prediction`:
        https://arxiv.org/abs/2006.06873

    Args:
    
    Returns:

    """

    @typechecked
    def __init__(
            self,
            # network structure related
            idim: int,
            odim: int,
            adim: int=384,
            aheads: int=4,
            elayers: int=6,
            eunits: int=1536,
            dlayers: int=6,
            dunits: int=1536,
            postnet_layers: int=5,
            postnet_chans: int=512,
            postnet_filts: int=5,
            postnet_dropout_rate: float=0.5,
            positionwise_layer_type: str="conv1d",
            positionwise_conv_kernel_size: int=1,
            use_scaled_pos_enc: bool=True,
            use_batch_norm: bool=True,
            encoder_normalize_before: bool=True,
            decoder_normalize_before: bool=True,
            encoder_concat_after: bool=False,
            decoder_concat_after: bool=False,
            reduction_factor: int=1,
            encoder_type: str="transformer",
            decoder_type: str="transformer",
            # for transformer
            transformer_enc_dropout_rate: float=0.1,
            transformer_enc_positional_dropout_rate: float=0.1,
            transformer_enc_attn_dropout_rate: float=0.1,
            transformer_dec_dropout_rate: float=0.1,
            transformer_dec_positional_dropout_rate: float=0.1,
            transformer_dec_attn_dropout_rate: float=0.1,
            transformer_activation_type: str="relu",
            # for conformer
            conformer_pos_enc_layer_type: str="rel_pos",
            conformer_self_attn_layer_type: str="rel_selfattn",
            conformer_activation_type: str="swish",
            use_macaron_style_in_conformer: bool=True,
            use_cnn_in_conformer: bool=True,
            zero_triu: bool=False,
            conformer_enc_kernel_size: int=7,
            conformer_dec_kernel_size: int=31,
            # for CNN Decoder
            cnn_dec_dropout_rate: float=0.2,
            cnn_postnet_dropout_rate: float=0.2,
            cnn_postnet_resblock_kernel_sizes: List[int]=[256, 256],
            cnn_postnet_kernel_size: int=5,
            cnn_decoder_embedding_dim: int=256,
            # duration predictor
            duration_predictor_layers: int=2,
            duration_predictor_chans: int=384,
            duration_predictor_kernel_size: int=3,
            duration_predictor_dropout_rate: float=0.1,
            # energy predictor
            energy_predictor_layers: int=2,
            energy_predictor_chans: int=384,
            energy_predictor_kernel_size: int=3,
            energy_predictor_dropout: float=0.5,
            energy_embed_kernel_size: int=9,
            energy_embed_dropout: float=0.5,
            stop_gradient_from_energy_predictor: bool=False,
            # pitch predictor
            pitch_predictor_layers: int=2,
            pitch_predictor_chans: int=384,
            pitch_predictor_kernel_size: int=3,
            pitch_predictor_dropout: float=0.5,
            pitch_embed_kernel_size: int=9,
            pitch_embed_dropout: float=0.5,
            stop_gradient_from_pitch_predictor: bool=False,
            # spk emb
            spk_num: Optional[int]=None,
            spk_embed_dim: Optional[int]=None,
            spk_embed_integration_type: str="add",
            # tone emb
            tone_num: Optional[int]=None,
            tone_embed_dim: Optional[int]=None,
            tone_embed_integration_type: str="add",
            # training related
            init_type: str="xavier_uniform",
            init_enc_alpha: float=1.0,
            init_dec_alpha: float=1.0,
            # speaker classifier
            enable_speaker_classifier: bool=False,
            hidden_sc_dim: int=256, ):
        """Initialize FastSpeech2 module.
        Args:
            idim (int): 
                Dimension of the inputs.
            odim (int): 
                Dimension of the outputs.
            adim (int): 
                Attention dimension.
            aheads (int): 
                Number of attention heads.
            elayers (int): 
                Number of encoder layers.
            eunits (int): 
                Number of encoder hidden units.
            dlayers (int): 
                Number of decoder layers.
            dunits (int): 
                Number of decoder hidden units.
            postnet_layers (int): 
                Number of postnet layers.
            postnet_chans (int): 
                Number of postnet channels.
            postnet_filts (int): 
                Kernel size of postnet.
            postnet_dropout_rate (float): 
                Dropout rate in postnet.
            use_scaled_pos_enc (bool): 
                Whether to use trainable scaled pos encoding.
            use_batch_norm (bool): 
                Whether to use batch normalization in encoder prenet.
            encoder_normalize_before (bool): 
                Whether to apply layernorm layer before encoder block.
            decoder_normalize_before (bool): 
                Whether to apply layernorm layer before decoder block.
            encoder_concat_after (bool): 
                Whether to concatenate attention layer's input and output in encoder.
            decoder_concat_after (bool): 
                Whether to concatenate attention layer's input  and output in decoder.
            reduction_factor (int): 
                Reduction factor.
            encoder_type (str): 
                Encoder type ("transformer" or "conformer").
            decoder_type (str): 
                Decoder type ("transformer" or "conformer").
            transformer_enc_dropout_rate (float): 
                Dropout rate in encoder except attention and positional encoding.
            transformer_enc_positional_dropout_rate (float): 
                Dropout rate after encoder positional encoding.
            transformer_enc_attn_dropout_rate (float): 
                Dropout rate in encoder self-attention module.
            transformer_dec_dropout_rate (float): 
                Dropout rate in decoder except attention & positional encoding.
            transformer_dec_positional_dropout_rate (float):
                Dropout rate after decoder positional encoding.
            transformer_dec_attn_dropout_rate (float): 
                Dropout rate in decoder self-attention module.
            transformer_activation_type (str): 
                Activation function type in transformer.
            conformer_pos_enc_layer_type (str): 
                Pos encoding layer type in conformer.
            conformer_self_attn_layer_type (str): 
                Self-attention layer type in conformer
            conformer_activation_type (str): 
                Activation function type in conformer.
            use_macaron_style_in_conformer (bool): 
                Whether to use macaron style FFN.
            use_cnn_in_conformer (bool): 
                Whether to use CNN in conformer.
            zero_triu (bool): 
                Whether to use zero triu in relative self-attention module.
            conformer_enc_kernel_size (int): 
                Kernel size of encoder conformer.
            conformer_dec_kernel_size (int): 
                Kernel size of decoder conformer.
            duration_predictor_layers (int): 
                Number of duration predictor layers.
            duration_predictor_chans (int): 
                Number of duration predictor channels.
            duration_predictor_kernel_size (int): 
                Kernel size of duration predictor.
            duration_predictor_dropout_rate (float): 
                Dropout rate in duration predictor.
            pitch_predictor_layers (int): 
                Number of pitch predictor layers.
            pitch_predictor_chans (int):
                Number of pitch predictor channels.
            pitch_predictor_kernel_size (int): 
                Kernel size of pitch predictor.
            pitch_predictor_dropout_rate (float): 
                Dropout rate in pitch predictor.
            pitch_embed_kernel_size (float): 
                Kernel size of pitch embedding.
            pitch_embed_dropout_rate (float): 
                Dropout rate for pitch embedding.
            stop_gradient_from_pitch_predictor (bool): 
                Whether to stop gradient from pitch predictor to encoder.
            energy_predictor_layers (int): 
                Number of energy predictor layers.
            energy_predictor_chans (int): 
                Number of energy predictor channels.
            energy_predictor_kernel_size (int): 
                Kernel size of energy predictor.
            energy_predictor_dropout_rate (float): 
                Dropout rate in energy predictor.
            energy_embed_kernel_size (float): 
                Kernel size of energy embedding.
            energy_embed_dropout_rate (float): 
                Dropout rate for energy embedding.
            stop_gradient_from_energy_predictor (bool): 
                Whether to stop gradient from energy predictor to encoder.
            spk_num (Optional[int]): 
                Number of speakers. If not None, assume that the spk_embed_dim is not None,
                spk_ids will be provided as the input and use spk_embedding_table.
            spk_embed_dim (Optional[int]): 
                Speaker embedding dimension. If not None, 
                assume that spk_emb will be provided as the input or spk_num is not None.
            spk_embed_integration_type (str): 
                How to integrate speaker embedding.
            tone_num (Optional[int]): 
                Number of tones. If not None, assume that the
                tone_ids will be provided as the input and use tone_embedding_table.
            tone_embed_dim (Optional[int]):
                Tone embedding dimension. If not None, assume that tone_num is not None.
            tone_embed_integration_type (str): 
                How to integrate tone embedding.
            init_type (str): 
                How to initialize transformer parameters.
            init_enc_alpha (float): 
                Initial value of alpha in scaled pos encoding of the encoder.
            init_dec_alpha (float): 
                Initial value of alpha in scaled pos encoding of the decoder.
            enable_speaker_classifier (bool):
                Whether to use speaker classifier module
            hidden_sc_dim (int):
                The hidden layer dim of speaker classifier
    
        """
        super().__init__()

        # store hyperparameters
        self.odim = odim
        self.reduction_factor = reduction_factor
        self.encoder_type = encoder_type
        self.decoder_type = decoder_type
        self.stop_gradient_from_pitch_predictor = stop_gradient_from_pitch_predictor
        self.stop_gradient_from_energy_predictor = stop_gradient_from_energy_predictor
        self.use_scaled_pos_enc = use_scaled_pos_enc
        self.hidden_sc_dim = hidden_sc_dim
        self.spk_num = spk_num
        self.enable_speaker_classifier = enable_speaker_classifier

        self.spk_embed_dim = spk_embed_dim
        if self.spk_embed_dim is not None:
            self.spk_embed_integration_type = spk_embed_integration_type

        self.tone_embed_dim = tone_embed_dim
        if self.tone_embed_dim is not None:
            self.tone_embed_integration_type = tone_embed_integration_type

        # use idx 0 as padding idx
        self.padding_idx = 0

        # initialize parameters
        initialize(self, init_type)

        if spk_num and self.spk_embed_dim:
            self.spk_embedding_table = nn.Embedding(
                num_embeddings=spk_num,
                embedding_dim=self.spk_embed_dim,
                padding_idx=self.padding_idx)

        if self.tone_embed_dim is not None:
            self.tone_embedding_table = nn.Embedding(
                num_embeddings=tone_num,
                embedding_dim=self.tone_embed_dim,
                padding_idx=self.padding_idx)

        # get positional encoding layer type
        transformer_pos_enc_layer_type = "scaled_abs_pos" if self.use_scaled_pos_enc else "abs_pos"

        # define encoder
        encoder_input_layer = nn.Embedding(
            num_embeddings=idim,
            embedding_dim=adim,
            padding_idx=self.padding_idx)

        if encoder_type == "transformer":
            self.encoder = TransformerEncoder(
                idim=idim,
                attention_dim=adim,
                attention_heads=aheads,
                linear_units=eunits,
                num_blocks=elayers,
                input_layer=encoder_input_layer,
                dropout_rate=transformer_enc_dropout_rate,
                positional_dropout_rate=transformer_enc_positional_dropout_rate,
                attention_dropout_rate=transformer_enc_attn_dropout_rate,
                pos_enc_layer_type=transformer_pos_enc_layer_type,
                normalize_before=encoder_normalize_before,
                concat_after=encoder_concat_after,
                positionwise_layer_type=positionwise_layer_type,
                positionwise_conv_kernel_size=positionwise_conv_kernel_size,
                activation_type=transformer_activation_type)
        elif encoder_type == "conformer":
            self.encoder = ConformerEncoder(
                idim=idim,
                attention_dim=adim,
                attention_heads=aheads,
                linear_units=eunits,
                num_blocks=elayers,
                input_layer=encoder_input_layer,
                dropout_rate=transformer_enc_dropout_rate,
                positional_dropout_rate=transformer_enc_positional_dropout_rate,
                attention_dropout_rate=transformer_enc_attn_dropout_rate,
                normalize_before=encoder_normalize_before,
                concat_after=encoder_concat_after,
                positionwise_layer_type=positionwise_layer_type,
                positionwise_conv_kernel_size=positionwise_conv_kernel_size,
                macaron_style=use_macaron_style_in_conformer,
                pos_enc_layer_type=conformer_pos_enc_layer_type,
                selfattention_layer_type=conformer_self_attn_layer_type,
                activation_type=conformer_activation_type,
                use_cnn_module=use_cnn_in_conformer,
                cnn_module_kernel=conformer_enc_kernel_size,
                zero_triu=zero_triu, )
        else:
            raise ValueError(f"{encoder_type} is not supported.")

        # define additional projection for speaker embedding
        if self.spk_embed_dim is not None:
            if self.spk_embed_integration_type == "add":
                self.spk_projection = nn.Linear(self.spk_embed_dim, adim)
            else:
                self.spk_projection = nn.Linear(adim + self.spk_embed_dim, adim)

        # define additional projection for tone embedding
        if self.tone_embed_dim is not None:
            if self.tone_embed_integration_type == "add":
                self.tone_projection = nn.Linear(self.tone_embed_dim, adim)
            else:
                self.tone_projection = nn.Linear(adim + self.tone_embed_dim,
                                                 adim)

        if self.spk_num and self.enable_speaker_classifier:
            # set lambda = 1
            self.grad_reverse = GradientReversalLayer(1)
            self.speaker_classifier = SpeakerClassifier(
                idim=adim, hidden_sc_dim=self.hidden_sc_dim, spk_num=spk_num)

        # define duration predictor
        self.duration_predictor = DurationPredictor(
            idim=adim,
            n_layers=duration_predictor_layers,
            n_chans=duration_predictor_chans,
            kernel_size=duration_predictor_kernel_size,
            dropout_rate=duration_predictor_dropout_rate, )

        # define pitch predictor
        self.pitch_predictor = VariancePredictor(
            idim=adim,
            n_layers=pitch_predictor_layers,
            n_chans=pitch_predictor_chans,
            kernel_size=pitch_predictor_kernel_size,
            dropout_rate=pitch_predictor_dropout, )
        #  We use continuous pitch + FastPitch style avg
        self.pitch_embed = nn.Sequential(
            nn.Conv1D(
                in_channels=1,
                out_channels=adim,
                kernel_size=pitch_embed_kernel_size,
                padding=(pitch_embed_kernel_size - 1) // 2, ),
            nn.Dropout(pitch_embed_dropout), )

        # define energy predictor
        self.energy_predictor = VariancePredictor(
            idim=adim,
            n_layers=energy_predictor_layers,
            n_chans=energy_predictor_chans,
            kernel_size=energy_predictor_kernel_size,
            dropout_rate=energy_predictor_dropout, )
        # We use continuous enegy + FastPitch style avg
        self.energy_embed = nn.Sequential(
            nn.Conv1D(
                in_channels=1,
                out_channels=adim,
                kernel_size=energy_embed_kernel_size,
                padding=(energy_embed_kernel_size - 1) // 2, ),
            nn.Dropout(energy_embed_dropout), )

        # define length regulator
        self.length_regulator = LengthRegulator()

        # define decoder
        # NOTE: we use encoder as decoder
        # because fastspeech's decoder is the same as encoder
        if decoder_type == "transformer":
            self.decoder = TransformerEncoder(
                idim=0,
                attention_dim=adim,
                attention_heads=aheads,
                linear_units=dunits,
                num_blocks=dlayers,
                # in decoder, don't need layer before pos_enc_class (we use embedding here in encoder)
                input_layer=None,
                dropout_rate=transformer_dec_dropout_rate,
                positional_dropout_rate=transformer_dec_positional_dropout_rate,
                attention_dropout_rate=transformer_dec_attn_dropout_rate,
                pos_enc_layer_type=transformer_pos_enc_layer_type,
                normalize_before=decoder_normalize_before,
                concat_after=decoder_concat_after,
                positionwise_layer_type=positionwise_layer_type,
                positionwise_conv_kernel_size=positionwise_conv_kernel_size,
                activation_type=conformer_activation_type, )
        elif decoder_type == "conformer":
            self.decoder = ConformerEncoder(
                idim=0,
                attention_dim=adim,
                attention_heads=aheads,
                linear_units=dunits,
                num_blocks=dlayers,
                input_layer=None,
                dropout_rate=transformer_dec_dropout_rate,
                positional_dropout_rate=transformer_dec_positional_dropout_rate,
                attention_dropout_rate=transformer_dec_attn_dropout_rate,
                normalize_before=decoder_normalize_before,
                concat_after=decoder_concat_after,
                positionwise_layer_type=positionwise_layer_type,
                positionwise_conv_kernel_size=positionwise_conv_kernel_size,
                macaron_style=use_macaron_style_in_conformer,
                pos_enc_layer_type=conformer_pos_enc_layer_type,
                selfattention_layer_type=conformer_self_attn_layer_type,
                activation_type=conformer_activation_type,
                use_cnn_module=use_cnn_in_conformer,
                cnn_module_kernel=conformer_dec_kernel_size, )
        elif decoder_type == 'cnndecoder':
            self.decoder = CNNDecoder(
                emb_dim=adim,
                odim=odim,
                kernel_size=cnn_postnet_kernel_size,
                dropout_rate=cnn_dec_dropout_rate,
                resblock_kernel_sizes=cnn_postnet_resblock_kernel_sizes)
        else:
            raise ValueError(f"{decoder_type} is not supported.")

        # define final projection
        self.feat_out = nn.Linear(adim, odim * reduction_factor)

        # define postnet
        if decoder_type == 'cnndecoder':
            self.postnet = CNNPostnet(
                odim=odim,
                kernel_size=cnn_postnet_kernel_size,
                dropout_rate=cnn_postnet_dropout_rate,
                resblock_kernel_sizes=cnn_postnet_resblock_kernel_sizes)
        else:
            self.postnet = (None if postnet_layers == 0 else Postnet(
                idim=idim,
                odim=odim,
                n_layers=postnet_layers,
                n_chans=postnet_chans,
                n_filts=postnet_filts,
                use_batch_norm=use_batch_norm,
                dropout_rate=postnet_dropout_rate, ))

        nn.initializer.set_global_initializer(None)

        self._reset_parameters(
            init_enc_alpha=init_enc_alpha,
            init_dec_alpha=init_dec_alpha, )

    def forward(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            durations: paddle.Tensor,
            pitch: paddle.Tensor,
            energy: paddle.Tensor,
            tone_id: paddle.Tensor=None,
            spk_emb: paddle.Tensor=None,
            spk_id: paddle.Tensor=None
    ) -> Tuple[paddle.Tensor, Dict[str, paddle.Tensor], paddle.Tensor]:
        """Calculate forward propagation.

        Args:
            text(Tensor(int64)): 
                Batch of padded token ids (B, Tmax).
            text_lengths(Tensor(int64)): 
                Batch of lengths of each input (B,).
            speech(Tensor): 
                Batch of padded target features (B, Lmax, odim).
            speech_lengths(Tensor(int64)): 
                Batch of the lengths of each target (B,).
            durations(Tensor(int64)): 
                Batch of padded durations (B, Tmax).
            pitch(Tensor): 
                Batch of padded token-averaged pitch (B, Tmax, 1).
            energy(Tensor): 
                Batch of padded token-averaged energy (B, Tmax, 1).
            tone_id(Tensor, optional(int64)): 
                Batch of padded tone ids  (B, Tmax).
            spk_emb(Tensor, optional): 
                Batch of speaker embeddings (B, spk_embed_dim).
            spk_id(Tnesor, optional(int64)): 
                Batch of speaker ids (B,)

        Returns:

        """

        # input of embedding must be int64
        xs = paddle.cast(text, 'int64')
        ilens = paddle.cast(text_lengths, 'int64')
        ds = paddle.cast(durations, 'int64')
        olens = paddle.cast(speech_lengths, 'int64')
        ys = speech
        ps = pitch
        es = energy
        if spk_id is not None:
            spk_id = paddle.cast(spk_id, 'int64')
        if tone_id is not None:
            tone_id = paddle.cast(tone_id, 'int64')
        # forward propagation
        before_outs, after_outs, d_outs, p_outs, e_outs, spk_logits = self._forward(
            xs,
            ilens,
            olens,
            ds,
            ps,
            es,
            is_inference=False,
            spk_emb=spk_emb,
            spk_id=spk_id,
            tone_id=tone_id)
        # modify mod part of groundtruth
        if self.reduction_factor > 1:
            olens = olens - olens % self.reduction_factor
            max_olen = max(olens)
            ys = ys[:, :max_olen]

        return before_outs, after_outs, d_outs, p_outs, e_outs, ys, olens, spk_logits

    def _forward(self,
                 xs: paddle.Tensor,
                 ilens: paddle.Tensor,
                 olens: paddle.Tensor=None,
                 ds: paddle.Tensor=None,
                 ps: paddle.Tensor=None,
                 es: paddle.Tensor=None,
                 is_inference: bool=False,
                 return_after_enc=False,
                 alpha: float=1.0,
                 spk_emb=None,
                 spk_id=None,
                 tone_id=None) -> Sequence[paddle.Tensor]:
        # forward encoder
        x_masks = self._source_mask(ilens)
        # (B, Tmax, adim)
        hs, _ = self.encoder(xs, x_masks)

        if self.spk_num and self.enable_speaker_classifier and not is_inference:
            hs_for_spk_cls = self.grad_reverse(hs)
            spk_logits = self.speaker_classifier(hs_for_spk_cls, ilens)
        else:
            spk_logits = None

        # integrate speaker embedding
        if self.spk_embed_dim is not None:
            # spk_emb has a higher priority than spk_id
            if spk_emb is not None:
                hs = self._integrate_with_spk_embed(hs, spk_emb)
            elif spk_id is not None:
                spk_emb = self.spk_embedding_table(spk_id)
                hs = self._integrate_with_spk_embed(hs, spk_emb)

        # integrate tone embedding
        if self.tone_embed_dim is not None:
            if tone_id is not None:
                tone_embs = self.tone_embedding_table(tone_id)
                hs = self._integrate_with_tone_embed(hs, tone_embs)
        # forward duration predictor and variance predictors
        d_masks = make_pad_mask(ilens)

        if self.stop_gradient_from_pitch_predictor:
            p_outs = self.pitch_predictor(hs.detach(), d_masks.unsqueeze(-1))
        else:
            p_outs = self.pitch_predictor(hs, d_masks.unsqueeze(-1))
        if self.stop_gradient_from_energy_predictor:
            e_outs = self.energy_predictor(hs.detach(), d_masks.unsqueeze(-1))
        else:
            e_outs = self.energy_predictor(hs, d_masks.unsqueeze(-1))

        if is_inference:
            # (B, Tmax)
            if ds is not None:
                d_outs = ds
            else:
                d_outs = self.duration_predictor.inference(hs, d_masks)
            if ps is not None:
                p_outs = ps
            if es is not None:
                e_outs = es

            # use prediction in inference
            # (B, Tmax, 1)

            p_embs = self.pitch_embed(p_outs.transpose((0, 2, 1))).transpose(
                (0, 2, 1))
            e_embs = self.energy_embed(e_outs.transpose((0, 2, 1))).transpose(
                (0, 2, 1))
            hs = hs + e_embs + p_embs

            # (B, Lmax, adim)
            hs = self.length_regulator(hs, d_outs, alpha, is_inference=True)
        else:
            d_outs = self.duration_predictor(hs, d_masks)
            # use groundtruth in training
            p_embs = self.pitch_embed(ps.transpose((0, 2, 1))).transpose(
                (0, 2, 1))
            e_embs = self.energy_embed(es.transpose((0, 2, 1))).transpose(
                (0, 2, 1))
            hs = hs + e_embs + p_embs

            # (B, Lmax, adim)
            hs = self.length_regulator(hs, ds, is_inference=False)

        # forward decoder
        if olens is not None and not is_inference:
            if self.reduction_factor > 1:
                olens_in = paddle.to_tensor(
                    [olen // self.reduction_factor for olen in olens.numpy()])
            else:
                olens_in = olens
            # (B, 1, T)
            h_masks = self._source_mask(olens_in)
        else:
            h_masks = None
        if return_after_enc:
            return hs, h_masks

        if self.decoder_type == 'cnndecoder':
            # remove output masks for dygraph to static graph
            zs = self.decoder(hs, h_masks)
            before_outs = zs
        else:
            # (B, Lmax, adim)
            zs, _ = self.decoder(hs, h_masks)
            # (B, Lmax, odim)
            before_outs = self.feat_out(zs).reshape(
                (paddle.shape(zs)[0], -1, self.odim))

        # postnet -> (B, Lmax//r * r, odim)
        if self.postnet is None:
            after_outs = before_outs
        else:
            after_outs = before_outs + self.postnet(
                before_outs.transpose((0, 2, 1))).transpose((0, 2, 1))

        return before_outs, after_outs, d_outs, p_outs, e_outs, spk_logits

    def encoder_infer(
            self,
            text: paddle.Tensor,
            spk_id=None,
            alpha: float=1.0,
            spk_emb=None,
            tone_id=None,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        # input of embedding must be int64
        x = paddle.cast(text, 'int64')
        # setup batch axis
        ilens = paddle.shape(x)[0]

        xs = x.unsqueeze(0)

        if spk_emb is not None:
            spk_emb = spk_emb.unsqueeze(0)

        if tone_id is not None:
            tone_id = tone_id.unsqueeze(0)

        # (1, L, odim)
        # use *_ to avoid bug in dygraph to static graph    
        hs, *_ = self._forward(
            xs,
            ilens,
            is_inference=True,
            return_after_enc=True,
            alpha=alpha,
            spk_emb=spk_emb,
            spk_id=spk_id,
            tone_id=tone_id)
        return hs

    def inference(
            self,
            text: paddle.Tensor,
            durations: paddle.Tensor=None,
            pitch: paddle.Tensor=None,
            energy: paddle.Tensor=None,
            alpha: float=1.0,
            use_teacher_forcing: bool=False,
            spk_emb=None,
            spk_id=None,
            tone_id=None,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Generate the sequence of features given the sequences of characters.

        Args:
            text(Tensor(int64)): 
                Input sequence of characters (T,).
            durations(Tensor, optional (int64)): 
                Groundtruth of duration (T,).
            pitch(Tensor, optional): 
                Groundtruth of token-averaged pitch (T, 1).
            energy(Tensor, optional): 
                Groundtruth of token-averaged energy (T, 1).
            alpha(float, optional): 
                Alpha to control the speed.
            use_teacher_forcing(bool, optional): 
                Whether to use teacher forcing.
                If true, groundtruth of duration, pitch and energy will be used.
            spk_emb(Tensor, optional, optional): 
                peaker embedding vector (spk_embed_dim,). (Default value = None)
            spk_id(Tensor, optional(int64), optional): 
                spk ids (1,). (Default value = None)
            tone_id(Tensor, optional(int64), optional): 
                tone ids (T,). (Default value = None)

        Returns:

        """
        # input of embedding must be int64
        x = paddle.cast(text, 'int64')
        d, p, e = durations, pitch, energy
        # setup batch axis
        ilens = paddle.shape(x)[0:1]

        xs = x.unsqueeze(0)

        if spk_emb is not None:
            spk_emb = spk_emb.unsqueeze(0)

        if tone_id is not None:
            tone_id = tone_id.unsqueeze(0)

        if use_teacher_forcing:
            # use groundtruth of duration, pitch, and energy
            ds = d.unsqueeze(0) if d is not None else None
            ps = p.unsqueeze(0) if p is not None else None
            es = e.unsqueeze(0) if e is not None else None

            # (1, L, odim)
            _, outs, d_outs, p_outs, e_outs, _ = self._forward(
                xs,
                ilens,
                ds=ds,
                ps=ps,
                es=es,
                spk_emb=spk_emb,
                spk_id=spk_id,
                tone_id=tone_id,
                is_inference=True)
        else:
            # (1, L, odim)
            _, outs, d_outs, p_outs, e_outs, _ = self._forward(
                xs,
                ilens,
                is_inference=True,
                alpha=alpha,
                spk_emb=spk_emb,
                spk_id=spk_id,
                tone_id=tone_id)

        return outs[0], d_outs[0], p_outs[0], e_outs[0]

    def _integrate_with_spk_embed(self, hs, spk_emb):
        """Integrate speaker embedding with hidden states.

        Args:
            hs(Tensor): 
                Batch of hidden state sequences (B, Tmax, adim).
            spk_emb(Tensor): 
                Batch of speaker embeddings (B, spk_embed_dim).

        Returns:

        
        """
        if self.spk_embed_integration_type == "add":
            # apply projection and then add to hidden states
            spk_emb = self.spk_projection(F.normalize(spk_emb))
            hs = hs + spk_emb.unsqueeze(1)
        elif self.spk_embed_integration_type == "concat":
            # one wave `spk_emb` under synthesize, the dim is `1`
            if spk_emb.dim() == 1:
                spk_emb = spk_emb.unsqueeze(0)
            # concat hidden states with spk embeds and then apply projection
            spk_emb = F.normalize(spk_emb).unsqueeze(1).expand(
                shape=[-1, paddle.shape(hs)[1], -1])
            hs = self.spk_projection(paddle.concat([hs, spk_emb], axis=-1))
        else:
            raise NotImplementedError("support only add or concat.")

        return hs

    def _integrate_with_tone_embed(self, hs, tone_embs):
        """Integrate speaker embedding with hidden states.

        Args:
            hs(Tensor): 
                Batch of hidden state sequences (B, Tmax, adim).
            tone_embs(Tensor): 
                Batch of speaker embeddings (B, Tmax, tone_embed_dim).

        Returns:

        """
        if self.tone_embed_integration_type == "add":
            # apply projection and then add to hidden states
            tone_embs = self.tone_projection(F.normalize(tone_embs))
            hs = hs + tone_embs

        elif self.tone_embed_integration_type == "concat":
            # concat hidden states with tone embeds and then apply projection
            tone_embs = F.normalize(tone_embs).expand(
                shape=[-1, hs.shape[1], -1])
            hs = self.tone_projection(paddle.concat([hs, tone_embs], axis=-1))
        else:
            raise NotImplementedError("support only add or concat.")
        return hs

    def _source_mask(self, ilens: paddle.Tensor) -> paddle.Tensor:
        """Make masks for self-attention.

        Args:
            ilens(Tensor): 
                Batch of lengths (B,).

        Returns:
            Tensor: 
                Mask tensor for self-attention. dtype=paddle.bool

        Examples:
            >>> ilens = [5, 3]
            >>> self._source_mask(ilens)
            tensor([[[1, 1, 1, 1, 1],
                        [1, 1, 1, 0, 0]]]) bool
        """
        x_masks = make_non_pad_mask(ilens)
        return x_masks.unsqueeze(-2)

    def _reset_parameters(self, init_enc_alpha: float, init_dec_alpha: float):

        # initialize alpha in scaled positional encoding
        if self.encoder_type == "transformer" and self.use_scaled_pos_enc:
            init_enc_alpha = paddle.to_tensor(init_enc_alpha).reshape([1])
            self.encoder.embed[-1].alpha = paddle.create_parameter(
                shape=init_enc_alpha.shape,
                dtype=str(init_enc_alpha.numpy().dtype),
                default_initializer=paddle.nn.initializer.Assign(
                    init_enc_alpha))
        if self.decoder_type == "transformer" and self.use_scaled_pos_enc:
            init_dec_alpha = paddle.to_tensor(init_dec_alpha).reshape([1])
            self.decoder.embed[-1].alpha = paddle.create_parameter(
                shape=init_dec_alpha.shape,
                dtype=str(init_dec_alpha.numpy().dtype),
                default_initializer=paddle.nn.initializer.Assign(
                    init_dec_alpha))


class FastSpeech2Inference(nn.Layer):
    def __init__(self, normalizer, model):
        super().__init__()
        self.normalizer = normalizer
        self.acoustic_model = model

    def forward(self, text, spk_id=None, spk_emb=None):
        normalized_mel, d_outs, p_outs, e_outs = self.acoustic_model.inference(
            text, spk_id=spk_id, spk_emb=spk_emb)
        logmel = self.normalizer.inverse(normalized_mel)
        return logmel


class StyleFastSpeech2Inference(FastSpeech2Inference):
    def __init__(self,
                 normalizer,
                 model,
                 pitch_stats_path=None,
                 energy_stats_path=None):
        super().__init__(normalizer, model)
        if pitch_stats_path:
            pitch_mean, pitch_std = np.load(pitch_stats_path)
            self.pitch_mean = paddle.to_tensor(pitch_mean)
            self.pitch_std = paddle.to_tensor(pitch_std)
        if energy_stats_path:
            energy_mean, energy_std = np.load(energy_stats_path)
            self.energy_mean = paddle.to_tensor(energy_mean)
            self.energy_std = paddle.to_tensor(energy_std)

    def denorm(self, data, mean, std):
        return data * std + mean

    def norm(self, data, mean, std):
        return (data - mean) / std

    def forward(self,
                text: paddle.Tensor,
                durations: Union[paddle.Tensor, np.ndarray]=None,
                durations_scale: Union[int, float]=None,
                durations_bias: Union[int, float]=None,
                pitch: Union[paddle.Tensor, np.ndarray]=None,
                pitch_scale: Union[int, float]=None,
                pitch_bias: Union[int, float]=None,
                energy: Union[paddle.Tensor, np.ndarray]=None,
                energy_scale: Union[int, float]=None,
                energy_bias: Union[int, float]=None,
                robot: bool=False,
                spk_emb=None,
                spk_id=None):
        """

        Args:
            text(Tensor(int64)): 
                Input sequence of characters (T,).
            durations(paddle.Tensor/np.ndarray, optional (int64)): 
                Groundtruth of duration (T,), this will overwrite the set of durations_scale and durations_bias
            durations_scale(int/float, optional): 

            durations_bias(int/float, optional): 

            pitch(paddle.Tensor/np.ndarray, optional): 
                Groundtruth of token-averaged pitch (T, 1), this will overwrite the set of pitch_scale and pitch_bias
            pitch_scale(int/float, optional): 
                In denormed HZ domain.
            pitch_bias(int/float, optional): 
                In denormed HZ domain.
            energy(paddle.Tensor/np.ndarray, optional): 
                Groundtruth of token-averaged energy (T, 1), this will overwrite the set of energy_scale and energy_bias
            energy_scale(int/float, optional): 
                In denormed domain.
            energy_bias(int/float, optional): 
                In denormed domain.
            robot(bool) (Default value = False):

            spk_emb(Default value = None):

            spk_id(Default value = None):


        Returns:
            Tensor: logmel

        """
        normalized_mel, d_outs, p_outs, e_outs = self.acoustic_model.inference(
            text,
            durations=None,
            pitch=None,
            energy=None,
            spk_emb=spk_emb,
            spk_id=spk_id)
        # priority: groundtruth > scale/bias > previous output
        # set durations
        if isinstance(durations, np.ndarray):
            durations = paddle.to_tensor(durations)
        elif isinstance(durations, paddle.Tensor):
            durations = durations
        elif durations_scale or durations_bias:
            durations_scale = durations_scale if durations_scale is not None else 1
            durations_bias = durations_bias if durations_bias is not None else 0
            durations = durations_scale * d_outs + durations_bias
        else:
            durations = d_outs

        if robot:
            # set normed pitch to zeros have the same effect with set denormd ones to mean
            pitch = paddle.zeros(p_outs.shape)

        # set pitch, can overwrite robot set  
        if isinstance(pitch, np.ndarray):
            pitch = paddle.to_tensor(pitch)
        elif isinstance(pitch, paddle.Tensor):
            pitch = pitch
        elif pitch_scale or pitch_bias:
            pitch_scale = pitch_scale if pitch_scale is not None else 1
            pitch_bias = pitch_bias if pitch_bias is not None else 0
            p_Hz = paddle.exp(
                self.denorm(p_outs, self.pitch_mean, self.pitch_std))
            p_HZ = pitch_scale * p_Hz + pitch_bias
            pitch = self.norm(paddle.log(p_HZ), self.pitch_mean, self.pitch_std)
        else:
            pitch = p_outs

        # set energy
        if isinstance(energy, np.ndarray):
            energy = paddle.to_tensor(energy)
        elif isinstance(energy, paddle.Tensor):
            energy = energy
        elif energy_scale or energy_bias:
            energy_scale = energy_scale if energy_scale is not None else 1
            energy_bias = energy_bias if energy_bias is not None else 0
            e_dnorm = self.denorm(e_outs, self.energy_mean, self.energy_std)
            e_dnorm = energy_scale * e_dnorm + energy_bias
            energy = self.norm(e_dnorm, self.energy_mean, self.energy_std)
        else:
            energy = e_outs

        normalized_mel, d_outs, p_outs, e_outs = self.acoustic_model.inference(
            text,
            durations=durations,
            pitch=pitch,
            energy=energy,
            use_teacher_forcing=True,
            spk_emb=spk_emb,
            spk_id=spk_id)

        logmel = self.normalizer.inverse(normalized_mel)
        return logmel


class FastSpeech2Loss(nn.Layer):
    """Loss function module for FastSpeech2."""

    @typechecked
    def __init__(self, use_masking: bool=True,
                 use_weighted_masking: bool=False):
        """Initialize feed-forward Transformer loss module.
        Args:
            use_masking (bool): 
                Whether to apply masking for padded part in loss calculation.
            use_weighted_masking (bool): 
                Whether to weighted masking in loss calculation.
        """
        super().__init__()

        assert (use_masking != use_weighted_masking) or not use_masking
        self.use_masking = use_masking
        self.use_weighted_masking = use_weighted_masking

        # define criterions
        reduction = "none" if self.use_weighted_masking else "mean"
        self.l1_criterion = nn.L1Loss(reduction=reduction)
        self.mse_criterion = nn.MSELoss(reduction=reduction)
        self.duration_criterion = DurationPredictorLoss(reduction=reduction)
        self.ce_criterion = nn.CrossEntropyLoss()

    def forward(
            self,
            after_outs: paddle.Tensor,
            before_outs: paddle.Tensor,
            d_outs: paddle.Tensor,
            p_outs: paddle.Tensor,
            e_outs: paddle.Tensor,
            ys: paddle.Tensor,
            ds: paddle.Tensor,
            ps: paddle.Tensor,
            es: paddle.Tensor,
            ilens: paddle.Tensor,
            olens: paddle.Tensor,
            spk_logits: paddle.Tensor=None,
            spk_ids: paddle.Tensor=None,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor,
               paddle.Tensor, ]:
        """Calculate forward propagation.

        Args:
            after_outs(Tensor):  
                Batch of outputs after postnets (B, Lmax, odim).
            before_outs(Tensor): 
                Batch of outputs before postnets (B, Lmax, odim).
            d_outs(Tensor): 
                Batch of outputs of duration predictor (B, Tmax).
            p_outs(Tensor): 
                Batch of outputs of pitch predictor (B, Tmax, 1).
            e_outs(Tensor): 
                Batch of outputs of energy predictor (B, Tmax, 1).
            ys(Tensor): 
                Batch of target features (B, Lmax, odim).
            ds(Tensor): 
                Batch of durations (B, Tmax).
            ps(Tensor): 
                Batch of target token-averaged pitch (B, Tmax, 1).
            es(Tensor): 
                Batch of target token-averaged energy (B, Tmax, 1).
            ilens(Tensor): 
                Batch of the lengths of each input (B,).
            olens(Tensor): 
                Batch of the lengths of each target (B,).
            spk_logits(Option[Tensor]):
                Batch of outputs after speaker classifier (B, Lmax, num_spk)
            spk_ids(Option[Tensor]):
                Batch of target spk_id (B,)
            

        Returns:

        
        """
        speaker_loss = 0.0

        # apply mask to remove padded part
        if self.use_masking:
            out_masks = make_non_pad_mask(olens).unsqueeze(-1)
            before_outs = before_outs.masked_select(
                out_masks.broadcast_to(before_outs.shape))
            if after_outs is not None:
                after_outs = after_outs.masked_select(
                    out_masks.broadcast_to(after_outs.shape))
            ys = ys.masked_select(out_masks.broadcast_to(ys.shape))
            duration_masks = make_non_pad_mask(ilens)
            d_outs = d_outs.masked_select(
                duration_masks.broadcast_to(d_outs.shape))
            ds = ds.masked_select(duration_masks.broadcast_to(ds.shape))
            pitch_masks = make_non_pad_mask(ilens).unsqueeze(-1)
            p_outs = p_outs.masked_select(
                pitch_masks.broadcast_to(p_outs.shape))
            e_outs = e_outs.masked_select(
                pitch_masks.broadcast_to(e_outs.shape))
            ps = ps.masked_select(pitch_masks.broadcast_to(ps.shape))
            es = es.masked_select(pitch_masks.broadcast_to(es.shape))

            if spk_logits is not None and spk_ids is not None:
                batch_size = spk_ids.shape[0]
                spk_ids = paddle.repeat_interleave(spk_ids, spk_logits.shape[1],
                                                   None)
                spk_logits = paddle.reshape(spk_logits,
                                            [-1, spk_logits.shape[-1]])
                mask_index = spk_logits.abs().sum(axis=1) != 0
                spk_ids = spk_ids[mask_index]
                spk_logits = spk_logits[mask_index]

        # calculate loss
        l1_loss = self.l1_criterion(before_outs, ys)
        if after_outs is not None:
            l1_loss += self.l1_criterion(after_outs, ys)
        duration_loss = self.duration_criterion(d_outs, ds)
        pitch_loss = self.mse_criterion(p_outs, ps)
        energy_loss = self.mse_criterion(e_outs, es)

        if spk_logits is not None and spk_ids is not None:
            speaker_loss = self.ce_criterion(spk_logits, spk_ids) / batch_size

        # make weighted mask and apply it
        if self.use_weighted_masking:
            out_masks = make_non_pad_mask(olens).unsqueeze(-1)
            out_weights = out_masks.cast(dtype=paddle.float32) / out_masks.cast(
                dtype=paddle.float32).sum(
                    axis=1, keepdim=True)
            out_weights /= ys.shape[0] * ys.shape[2]
            duration_masks = make_non_pad_mask(ilens)
            duration_weights = (duration_masks.cast(dtype=paddle.float32) /
                                duration_masks.cast(dtype=paddle.float32).sum(
                                    axis=1, keepdim=True))
            duration_weights /= ds.shape[0]

            # apply weight

            l1_loss = l1_loss.multiply(out_weights)
            l1_loss = l1_loss.masked_select(
                out_masks.broadcast_to(l1_loss.shape)).sum()
            duration_loss = (duration_loss.multiply(duration_weights)
                             .masked_select(duration_masks).sum())
            pitch_masks = duration_masks.unsqueeze(-1)
            pitch_weights = duration_weights.unsqueeze(-1)
            pitch_loss = pitch_loss.multiply(pitch_weights)
            pitch_loss = pitch_loss.masked_select(
                pitch_masks.broadcast_to(pitch_loss.shape)).sum()
            energy_loss = energy_loss.multiply(pitch_weights)
            energy_loss = energy_loss.masked_select(
                pitch_masks.broadcast_to(energy_loss.shape)).sum()

        return l1_loss, duration_loss, pitch_loss, energy_loss, speaker_loss


================================================
FILE: paddlespeech/t2s/models/fastspeech2/fastspeech2_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path

from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer

from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Loss
from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater

logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class FastSpeech2Updater(StandardUpdater):
    def __init__(
            self,
            model: Layer,
            optimizer: Optimizer,
            dataloader: DataLoader,
            init_state=None,
            use_masking: bool=False,
            spk_loss_scale: float=0.02,
            use_weighted_masking: bool=False,
            output_dir: Path=None,
            enable_spk_cls: bool=False, ):
        super().__init__(model, optimizer, dataloader, init_state=None)

        self.criterion = FastSpeech2Loss(
            use_masking=use_masking,
            use_weighted_masking=use_weighted_masking, )

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""
        self.spk_loss_scale = spk_loss_scale
        self.enable_spk_cls = enable_spk_cls

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}
        # spk_id!=None in multiple spk fastspeech2 
        spk_id = batch["spk_id"] if "spk_id" in batch else None
        spk_emb = batch["spk_emb"] if "spk_emb" in batch else None
        # No explicit speaker identifier labels are used during voice cloning training.
        if spk_emb is not None:
            spk_id = None

        if type(
                self.model
        ) == DataParallel and self.model._layers.spk_num and self.model._layers.enable_speaker_classifier:
            with self.model.no_sync():
                before_outs, after_outs, d_outs, p_outs, e_outs, ys, olens, spk_logits = self.model(
                    text=batch["text"],
                    text_lengths=batch["text_lengths"],
                    speech=batch["speech"],
                    speech_lengths=batch["speech_lengths"],
                    durations=batch["durations"],
                    pitch=batch["pitch"],
                    energy=batch["energy"],
                    spk_id=spk_id,
                    spk_emb=spk_emb)
        else:
            before_outs, after_outs, d_outs, p_outs, e_outs, ys, olens, spk_logits = self.model(
                text=batch["text"],
                text_lengths=batch["text_lengths"],
                speech=batch["speech"],
                speech_lengths=batch["speech_lengths"],
                durations=batch["durations"],
                pitch=batch["pitch"],
                energy=batch["energy"],
                spk_id=spk_id,
                spk_emb=spk_emb)

        l1_loss, duration_loss, pitch_loss, energy_loss, speaker_loss = self.criterion(
            after_outs=after_outs,
            before_outs=before_outs,
            d_outs=d_outs,
            p_outs=p_outs,
            e_outs=e_outs,
            ys=ys,
            ds=batch["durations"],
            ps=batch["pitch"],
            es=batch["energy"],
            ilens=batch["text_lengths"],
            olens=olens,
            spk_logits=spk_logits,
            spk_ids=spk_id, )

        scaled_speaker_loss = self.spk_loss_scale * speaker_loss
        loss = l1_loss + duration_loss + pitch_loss + energy_loss + scaled_speaker_loss

        optimizer = self.optimizer
        optimizer.clear_grad()
        loss.backward()
        optimizer.step()

        report("train/loss", float(loss))
        report("train/l1_loss", float(l1_loss))
        report("train/duration_loss", float(duration_loss))
        report("train/pitch_loss", float(pitch_loss))
        report("train/energy_loss", float(energy_loss))
        if self.enable_spk_cls:
            report("train/speaker_loss", float(speaker_loss))
            report("train/scaled_speaker_loss", float(scaled_speaker_loss))

        losses_dict["l1_loss"] = float(l1_loss)
        losses_dict["duration_loss"] = float(duration_loss)
        losses_dict["pitch_loss"] = float(pitch_loss)
        losses_dict["energy_loss"] = float(energy_loss)
        losses_dict["energy_loss"] = float(energy_loss)
        if self.enable_spk_cls:
            losses_dict["speaker_loss"] = float(speaker_loss)
            losses_dict["scaled_speaker_loss"] = float(scaled_speaker_loss)
        losses_dict["loss"] = float(loss)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class FastSpeech2Evaluator(StandardEvaluator):
    def __init__(self,
                 model: Layer,
                 dataloader: DataLoader,
                 use_masking: bool=False,
                 use_weighted_masking: bool=False,
                 spk_loss_scale: float=0.02,
                 output_dir: Path=None,
                 enable_spk_cls: bool=False):
        super().__init__(model, dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""
        self.spk_loss_scale = spk_loss_scale
        self.enable_spk_cls = enable_spk_cls

        self.criterion = FastSpeech2Loss(
            use_masking=use_masking, use_weighted_masking=use_weighted_masking)

    def evaluate_core(self, batch):
        self.msg = "Evaluate: "
        losses_dict = {}
        # spk_id!=None in multiple spk fastspeech2 
        spk_id = batch["spk_id"] if "spk_id" in batch else None
        spk_emb = batch["spk_emb"] if "spk_emb" in batch else None
        if spk_emb is not None:
            spk_id = None

        if type(
                self.model
        ) == DataParallel and self.model._layers.spk_num and self.model._layers.enable_speaker_classifier:
            with self.model.no_sync():
                before_outs, after_outs, d_outs, p_outs, e_outs, ys, olens, spk_logits = self.model(
                    text=batch["text"],
                    text_lengths=batch["text_lengths"],
                    speech=batch["speech"],
                    speech_lengths=batch["speech_lengths"],
                    durations=batch["durations"],
                    pitch=batch["pitch"],
                    energy=batch["energy"],
                    spk_id=spk_id,
                    spk_emb=spk_emb)
        else:
            before_outs, after_outs, d_outs, p_outs, e_outs, ys, olens, spk_logits = self.model(
                text=batch["text"],
                text_lengths=batch["text_lengths"],
                speech=batch["speech"],
                speech_lengths=batch["speech_lengths"],
                durations=batch["durations"],
                pitch=batch["pitch"],
                energy=batch["energy"],
                spk_id=spk_id,
                spk_emb=spk_emb)

        l1_loss, duration_loss, pitch_loss, energy_loss, speaker_loss = self.criterion(
            after_outs=after_outs,
            before_outs=before_outs,
            d_outs=d_outs,
            p_outs=p_outs,
            e_outs=e_outs,
            ys=ys,
            ds=batch["durations"],
            ps=batch["pitch"],
            es=batch["energy"],
            ilens=batch["text_lengths"],
            olens=olens,
            spk_logits=spk_logits,
            spk_ids=spk_id, )

        scaled_speaker_loss = self.spk_loss_scale * speaker_loss
        loss = l1_loss + duration_loss + pitch_loss + energy_loss + scaled_speaker_loss

        report("eval/loss", float(loss))
        report("eval/l1_loss", float(l1_loss))
        report("eval/duration_loss", float(duration_loss))
        report("eval/pitch_loss", float(pitch_loss))
        report("eval/energy_loss", float(energy_loss))
        if self.enable_spk_cls:
            report("train/speaker_loss", float(speaker_loss))
            report("train/scaled_speaker_loss", float(scaled_speaker_loss))

        losses_dict["l1_loss"] = float(l1_loss)
        losses_dict["duration_loss"] = float(duration_loss)
        losses_dict["pitch_loss"] = float(pitch_loss)
        losses_dict["energy_loss"] = float(energy_loss)
        if self.enable_spk_cls:
            losses_dict["speaker_loss"] = float(speaker_loss)
            losses_dict["scaled_speaker_loss"] = float(scaled_speaker_loss)
        losses_dict["loss"] = float(loss)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/hifigan/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .hifigan import *
from .hifigan_updater import *


================================================
FILE: paddlespeech/t2s/models/hifigan/hifigan.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This code is based on https://github.com/jik876/hifi-gan.
import copy
from typing import Any
from typing import Dict
from typing import List
from typing import Optional

import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.t2s.modules.activation import get_activation
from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.residual_block import HiFiGANResidualBlock as ResidualBlock


class HiFiGANGenerator(nn.Layer):
    """HiFiGAN generator module."""

    def __init__(
            self,
            in_channels: int=80,
            out_channels: int=1,
            channels: int=512,
            global_channels: int=-1,
            kernel_size: int=7,
            upsample_scales: List[int]=(5, 5, 4, 3),
            upsample_kernel_sizes: List[int]=(10, 10, 8, 6),
            resblock_kernel_sizes: List[int]=(3, 7, 11),
            resblock_dilations: List[List[int]]=[(1, 3, 5), (1, 3, 5),
                                                 (1, 3, 5)],
            use_additional_convs: bool=True,
            bias: bool=True,
            nonlinear_activation: str="leakyrelu",
            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.1},
            use_weight_norm: bool=True,
            init_type: str="xavier_uniform",
            use_istft: bool=False,
            istft_layer_id: int=2,
            n_fft: int=2048,
            win_length: int=1200, ):
        """Initialize HiFiGANGenerator module.

        Args:
            in_channels (int): 
                Number of input channels.
            out_channels (int): 
                Number of output channels.
            channels (int): 
                Number of hidden representation channels.
            global_channels (int): 
                Number of global conditioning channels.
            kernel_size (int): 
                Kernel size of initial and final conv layer.
            upsample_scales (list): 
                List of upsampling scales.
            upsample_kernel_sizes (list): 
                List of kernel sizes for upsampling layers.
            resblock_kernel_sizes (list): 
                List of kernel sizes for residual blocks.
            resblock_dilations (list): 
                List of dilation list for residual blocks.
            use_additional_convs (bool): 
                Whether to use additional conv layers in residual blocks.
            bias (bool): 
                Whether to add bias parameter in convolution layers.
            nonlinear_activation (str): 
                Activation function module name.
            nonlinear_activation_params (dict): 
                Hyperparameters for activation function.
            use_weight_norm (bool): 
                Whether to use weight norm.
                If set to true, it will be applied to all of the conv layers.
            use_istft (bool):
                If set to true, it will be a iSTFTNet based on hifigan.
            istft_layer_id (int):
                Use istft after istft_layer_id layers of upsample layer if use_istft=True
            n_fft (int):
                Number of fft points in feature extraction
            win_length (int):
                Window length in feature extraction
        """
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        # check hyperparameters are valid
        assert kernel_size % 2 == 1, "Kernel size must be odd number."
        assert len(upsample_scales) == len(upsample_kernel_sizes)
        assert len(resblock_dilations) == len(resblock_kernel_sizes)
        assert len(upsample_scales) >= istft_layer_id if use_istft else True

        # define modules
        self.num_upsamples = len(
            upsample_kernel_sizes) if not use_istft else istft_layer_id
        self.num_blocks = len(resblock_kernel_sizes)
        self.input_conv = nn.Conv1D(
            in_channels,
            channels,
            kernel_size,
            1,
            padding=(kernel_size - 1) // 2, )
        self.upsamples = nn.LayerList()
        self.blocks = nn.LayerList()
        for i in range(self.num_upsamples):
            assert upsample_kernel_sizes[i] == 2 * upsample_scales[i]
            self.upsamples.append(
                nn.Sequential(
                    get_activation(nonlinear_activation, **
                                   nonlinear_activation_params),
                    nn.Conv1DTranspose(
                        channels // (2**i),
                        channels // (2**(i + 1)),
                        upsample_kernel_sizes[i],
                        upsample_scales[i],
                        padding=upsample_scales[i] // 2 + upsample_scales[i] %
                        2,
                        output_padding=upsample_scales[i] % 2, ), ))
            for j in range(len(resblock_kernel_sizes)):
                self.blocks.append(
                    ResidualBlock(
                        kernel_size=resblock_kernel_sizes[j],
                        channels=channels // (2**(i + 1)),
                        dilations=resblock_dilations[j],
                        bias=bias,
                        use_additional_convs=use_additional_convs,
                        nonlinear_activation=nonlinear_activation,
                        nonlinear_activation_params=nonlinear_activation_params,
                    ))
        self.use_istft = use_istft
        if self.use_istft:
            self.istft_hop_size = 1
            for j in range(istft_layer_id, len(upsample_scales)):
                self.istft_hop_size *= upsample_scales[j]
            s = 1
            for j in range(istft_layer_id):
                s *= upsample_scales[j]
            self.istft_n_fft = int(n_fft / s) if (
                n_fft / s) % 2 == 0 else int((n_fft / s + 2) - n_fft / s % 2)
            self.istft_win_length = int(win_length / s) if (
                win_length /
                s) % 2 == 0 else int((win_length / s + 2) - win_length / s % 2)
            self.reflection_pad = nn.Pad1D(padding=[1, 0], mode='reflect')
            self.output_conv = nn.Conv1D(
                channels // (2**(i + 1)),
                (self.istft_n_fft // 2 + 1) * 2,
                kernel_size,
                1,
                padding=(kernel_size - 1) // 2, )
        else:
            self.output_conv = nn.Sequential(
                nn.LeakyReLU(),
                nn.Conv1D(
                    channels // (2**(i + 1)),
                    out_channels,
                    kernel_size,
                    1,
                    padding=(kernel_size - 1) // 2, ),
                nn.Tanh(), )

        if global_channels > 0:
            self.global_conv = nn.Conv1D(global_channels, channels, 1)

        nn.initializer.set_global_initializer(None)

        # apply weight norm
        if use_weight_norm:
            self.apply_weight_norm()

        # reset parameters
        self.reset_parameters()

    def forward(self, c, g: Optional[paddle.Tensor]=None):
        """Calculate forward propagation.
        
        Args:
            c (Tensor): Input tensor (B, in_channels, T).
            g (Optional[Tensor]): Global conditioning tensor (B, global_channels, 1).
        Returns:
            Tensor: Output tensor (B, out_channels, T).
        """
        c = self.input_conv(c)
        if g is not None:
            c = c + self.global_conv(g)
        for i in range(self.num_upsamples):
            c = self.upsamples[i](c)
            # initialize
            cs = 0.0
            for j in range(self.num_blocks):
                cs += self.blocks[i * self.num_blocks + j](c)
            c = cs / self.num_blocks

        if self.use_istft:
            c = F.leaky_relu(c)
            c = self.reflection_pad(c)
            c = self.output_conv(c)
            """
            Input of Exp operator, an N-D Tensor, with data type float32, float64 or float16.
            https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/exp_en.html
            Use Euler's formula to implement spec*paddle.exp(1j*phase)
            """
            spec = paddle.exp(c[:, :self.istft_n_fft // 2 + 1, :])
            phase = paddle.sin(c[:, self.istft_n_fft // 2 + 1:, :])

            c = paddle.complex(spec * (paddle.cos(phase)),
                               spec * (paddle.sin(phase)))
            c = paddle.signal.istft(
                c,
                n_fft=self.istft_n_fft,
                hop_length=self.istft_hop_size,
                win_length=self.istft_win_length)
            c = c.unsqueeze(1)
        else:
            c = self.output_conv(c)

        return c

    def reset_parameters(self):
        """Reset parameters.
        This initialization follows official implementation manner.
        https://github.com/jik876/hifi-gan/blob/master/models.py
        """
        # 定义参数为float的正态分布。
        dist = paddle.distribution.Normal(loc=0.0, scale=0.01)

        def _reset_parameters(m):
            if isinstance(m, nn.Conv1D) or isinstance(m, nn.Conv1DTranspose):
                w = dist.sample(m.weight.shape)
                m.weight.set_value(w)

        self.apply(_reset_parameters)

    def apply_weight_norm(self):
        """Recursively apply weight normalization to all the Convolution layers
        in the sublayers.
        """

        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv2D, nn.Conv1DTranspose)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def remove_weight_norm(self):
        """Recursively remove weight normalization from all the Convolution 
        layers in the sublayers.
        """

        def _remove_weight_norm(layer):
            try:
                nn.utils.remove_weight_norm(layer)
            except ValueError:
                pass

        self.apply(_remove_weight_norm)

    def inference(self, c, g: Optional[paddle.Tensor]=None):
        """Perform inference.
        Args:
            c (Tensor): 
                Input tensor (T, in_channels).
            g (Optional[Tensor]): 
                Global conditioning tensor (global_channels, 1).
        Returns:
            Tensor:
                Output tensor (T ** prod(upsample_scales), out_channels).
        """
        if g is not None:
            g = g.unsqueeze(0)
        c = self.forward(c.transpose([1, 0]).unsqueeze(0), g=g)
        return c.squeeze(0).transpose([1, 0])


class HiFiGANPeriodDiscriminator(nn.Layer):
    """HiFiGAN period discriminator module."""

    def __init__(
            self,
            in_channels: int=1,
            out_channels: int=1,
            period: int=3,
            kernel_sizes: List[int]=[5, 3],
            channels: int=32,
            downsample_scales: List[int]=[3, 3, 3, 3, 1],
            max_downsample_channels: int=1024,
            bias: bool=True,
            nonlinear_activation: str="leakyrelu",
            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.1},
            use_weight_norm: bool=True,
            use_spectral_norm: bool=False,
            init_type: str="xavier_uniform", ):
        """Initialize HiFiGANPeriodDiscriminator module.

        Args:
            in_channels (int): 
                Number of input channels.
            out_channels (int): 
                Number of output channels.
            period (int): 
                Period.
            kernel_sizes (list): 
                Kernel sizes of initial conv layers and the final conv layer.
            channels (int): 
                Number of initial channels.
            downsample_scales (list): 
                List of downsampling scales.
            max_downsample_channels (int): 
                Number of maximum downsampling channels.
            use_additional_convs (bool): 
                Whether to use additional conv layers in residual blocks.
            bias (bool): 
                Whether to add bias parameter in convolution layers.
            nonlinear_activation (str): 
                Activation function module name.
            nonlinear_activation_params (dict): 
                Hyperparameters for activation function.
            use_weight_norm (bool): 
                Whether to use weight norm.
                If set to true, it will be applied to all of the conv layers.
            use_spectral_norm (bool): 
                Whether to use spectral norm.
                If set to true, it will be applied to all of the conv layers.
        """
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        assert len(kernel_sizes) == 2
        assert kernel_sizes[0] % 2 == 1, "Kernel size must be odd number."
        assert kernel_sizes[1] % 2 == 1, "Kernel size must be odd number."

        self.period = period
        self.convs = nn.LayerList()
        in_chs = in_channels
        out_chs = channels
        for downsample_scale in downsample_scales:
            self.convs.append(
                nn.Sequential(
                    nn.Conv2D(
                        in_chs,
                        out_chs,
                        (kernel_sizes[0], 1),
                        (downsample_scale, 1),
                        padding=((kernel_sizes[0] - 1) // 2, 0), ),
                    get_activation(nonlinear_activation, **
                                   nonlinear_activation_params), ))
            in_chs = out_chs
            # NOTE: Use downsample_scale + 1?
            out_chs = min(out_chs * 4, max_downsample_channels)
        self.output_conv = nn.Conv2D(
            out_chs,
            out_channels,
            (kernel_sizes[1] - 1, 1),
            1,
            padding=((kernel_sizes[1] - 1) // 2, 0), )

        if use_weight_norm and use_spectral_norm:
            raise ValueError("Either use use_weight_norm or use_spectral_norm.")

        # apply weight norm
        if use_weight_norm:
            self.apply_weight_norm()

        # apply spectral norm
        if use_spectral_norm:
            self.apply_spectral_norm()

    def forward(self, x):
        """Calculate forward propagation.

        Args:
            c (Tensor): 
                Input tensor (B, in_channels, T).
        Returns:
            list: List of each layer's tensors.
        """
        # transform 1d to 2d -> (B, C, T/P, P)
        b, c, t = paddle.shape(x)
        if t % self.period != 0:
            n_pad = self.period - (t % self.period)
            x = F.pad(x, (0, n_pad), "reflect", data_format="NCL")
            t += n_pad
        x = x.reshape([b, c, t // self.period, self.period])

        # forward conv
        outs = []
        for layer in self.convs:
            x = layer(x)
            outs += [x]
        x = self.output_conv(x)
        x = paddle.flatten(x, 1, -1)
        outs += [x]

        return outs

    def apply_weight_norm(self):
        """Recursively apply weight normalization to all the Convolution layers
        in the sublayers.
        """

        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv2D, nn.Conv1DTranspose)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def apply_spectral_norm(self):
        """Apply spectral normalization module from all of the layers."""

        def _apply_spectral_norm(m):
            if isinstance(m, nn.Conv2D):
                nn.utils.spectral_norm(m)

        self.apply(_apply_spectral_norm)


class HiFiGANMultiPeriodDiscriminator(nn.Layer):
    """HiFiGAN multi-period discriminator module."""

    def __init__(
            self,
            periods: List[int]=[2, 3, 5, 7, 11],
            discriminator_params: Dict[str, Any]={
                "in_channels": 1,
                "out_channels": 1,
                "kernel_sizes": [5, 3],
                "channels": 32,
                "downsample_scales": [3, 3, 3, 3, 1],
                "max_downsample_channels": 1024,
                "bias": True,
                "nonlinear_activation": "leakyrelu",
                "nonlinear_activation_params": {
                    "negative_slope": 0.1
                },
                "use_weight_norm": True,
                "use_spectral_norm": False,
            },
            init_type: str="xavier_uniform", ):
        """Initialize HiFiGANMultiPeriodDiscriminator module.

        Args:
            periods (list): 
                List of periods.
            discriminator_params (dict): 
                Parameters for hifi-gan period discriminator module.
                The period parameter will be overwritten.
        """
        super().__init__()
        # initialize parameters
        initialize(self, init_type)

        self.discriminators = nn.LayerList()
        for period in periods:
            params = copy.deepcopy(discriminator_params)
            params["period"] = period
            self.discriminators.append(HiFiGANPeriodDiscriminator(**params))

    def forward(self, x):
        """Calculate forward propagation.

        Args:
            x (Tensor): 
                Input noise signal (B, 1, T).
        Returns:
            List: List of list of each discriminator outputs, which consists of each layer output tensors.
        """
        outs = []
        for f in self.discriminators:
            outs += [f(x)]

        return outs


class HiFiGANScaleDiscriminator(nn.Layer):
    """HiFi-GAN scale discriminator module."""

    def __init__(
            self,
            in_channels: int=1,
            out_channels: int=1,
            kernel_sizes: List[int]=[15, 41, 5, 3],
            channels: int=128,
            max_downsample_channels: int=1024,
            max_groups: int=16,
            bias: bool=True,
            downsample_scales: List[int]=[2, 2, 4, 4, 1],
            nonlinear_activation: str="leakyrelu",
            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.1},
            use_weight_norm: bool=True,
            use_spectral_norm: bool=False,
            init_type: str="xavier_uniform", ):
        """Initilize HiFiGAN scale discriminator module.

        Args:
            in_channels (int): 
                Number of input channels.
            out_channels (int): 
                Number of output channels.
            kernel_sizes (list): 
                List of four kernel sizes. The first will be used for the first conv layer,
                and the second is for downsampling part, and the remaining two are for output layers.
            channels (int): 
                Initial number of channels for conv layer.
            max_downsample_channels (int): 
                Maximum number of channels for downsampling layers.
            bias (bool): 
                Whether to add bias parameter in convolution layers.
            downsample_scales (list): 
                List of downsampling scales.
            nonlinear_activation (str): 
                Activation function module name.
            nonlinear_activation_params (dict): 
                Hyperparameters for activation function.
            use_weight_norm (bool): Whether to use weight norm.
                If set to true, it will be applied to all of the conv layers.
            use_spectral_norm (bool): Whether to use spectral norm.
                If set to true, it will be applied to all of the conv layers.
        """
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        self.layers = nn.LayerList()

        # check kernel size is valid
        assert len(kernel_sizes) == 4
        for ks in kernel_sizes:
            assert ks % 2 == 1

        # add first layer
        self.layers.append(
            nn.Sequential(
                nn.Conv1D(
                    in_channels,
                    channels,
                    # NOTE: Use always the same kernel size
                    kernel_sizes[0],
                    bias_attr=bias,
                    padding=(kernel_sizes[0] - 1) // 2, ),
                get_activation(nonlinear_activation, **
                               nonlinear_activation_params), ))

        # add downsample layers
        in_chs = channels
        out_chs = channels
        # NOTE(kan-bayashi): Remove hard coding?
        groups = 4
        for downsample_scale in downsample_scales:
            self.layers.append(
                nn.Sequential(
                    nn.Conv1D(
                        in_chs,
                        out_chs,
                        kernel_size=kernel_sizes[1],
                        stride=downsample_scale,
                        padding=(kernel_sizes[1] - 1) // 2,
                        groups=groups,
                        bias_attr=bias, ),
                    get_activation(nonlinear_activation, **
                                   nonlinear_activation_params), ))
            in_chs = out_chs
            # NOTE: Remove hard coding?
            out_chs = min(in_chs * 2, max_downsample_channels)
            # NOTE: Remove hard coding?
            groups = min(groups * 4, max_groups)

        # add final layers
        out_chs = min(in_chs * 2, max_downsample_channels)
        self.layers.append(
            nn.Sequential(
                nn.Conv1D(
                    in_chs,
                    out_chs,
                    kernel_size=kernel_sizes[2],
                    stride=1,
                    padding=(kernel_sizes[2] - 1) // 2,
                    bias_attr=bias, ),
                get_activation(nonlinear_activation, **
                               nonlinear_activation_params), ))
        self.layers.append(
            nn.Conv1D(
                out_chs,
                out_channels,
                kernel_size=kernel_sizes[3],
                stride=1,
                padding=(kernel_sizes[3] - 1) // 2,
                bias_attr=bias, ), )

        if use_weight_norm and use_spectral_norm:
            raise ValueError("Either use use_weight_norm or use_spectral_norm.")

        # apply weight norm
        if use_weight_norm:
            self.apply_weight_norm()

        # apply spectral norm
        if use_spectral_norm:
            self.apply_spectral_norm()

    def forward(self, x):
        """Calculate forward propagation.

        Args:
            x (Tensor): Input noise signal (B, 1, T).
        Returns:
            List: List of output tensors of each layer.
        """
        outs = []
        for f in self.layers:
            x = f(x)
            outs += [x]

        return outs

    def apply_weight_norm(self):
        """Recursively apply weight normalization to all the Convolution layers
        in the sublayers.
        """

        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv2D, nn.Conv1DTranspose)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def apply_spectral_norm(self):
        """Apply spectral normalization module from all of the layers."""

        def _apply_spectral_norm(m):
            if isinstance(m, nn.Conv2D):
                nn.utils.spectral_norm(m)

        self.apply(_apply_spectral_norm)


class HiFiGANMultiScaleDiscriminator(nn.Layer):
    """HiFi-GAN multi-scale discriminator module."""

    def __init__(
            self,
            scales: int=3,
            downsample_pooling: str="AvgPool1D",
            # follow the official implementation setting
            downsample_pooling_params: Dict[str, Any]={
                "kernel_size": 4,
                "stride": 2,
                "padding": 2,
            },
            discriminator_params: Dict[str, Any]={
                "in_channels": 1,
                "out_channels": 1,
                "kernel_sizes": [15, 41, 5, 3],
                "channels": 128,
                "max_downsample_channels": 1024,
                "max_groups": 16,
                "bias": True,
                "downsample_scales": [2, 2, 4, 4, 1],
                "nonlinear_activation": "leakyrelu",
                "nonlinear_activation_params": {
                    "negative_slope": 0.1
                },
            },
            follow_official_norm: bool=False,
            init_type: str="xavier_uniform", ):
        """Initilize HiFiGAN multi-scale discriminator module.
   
        Args:
            scales (int): Number of multi-scales.
            downsample_pooling (str): Pooling module name for downsampling of the inputs.
            downsample_pooling_params (dict): Parameters for the above pooling module.
            discriminator_params (dict): Parameters for hifi-gan scale discriminator module.
            follow_official_norm (bool): Whether to follow the norm setting of the official
                implementaion. The first discriminator uses spectral norm and the other discriminators use weight norm.
        """
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        self.discriminators = nn.LayerList()

        # add discriminators
        for i in range(scales):
            params = copy.deepcopy(discriminator_params)
            if follow_official_norm:
                if i == 0:
                    params["use_weight_norm"] = False
                    params["use_spectral_norm"] = True
                else:
                    params["use_weight_norm"] = True
                    params["use_spectral_norm"] = False
            self.discriminators.append(HiFiGANScaleDiscriminator(**params))
        self.pooling = getattr(nn, downsample_pooling)(
            **downsample_pooling_params)

    def forward(self, x):
        """Calculate forward propagation.

        Args:
            x (Tensor): 
                Input noise signal (B, 1, T).
        Returns:
            List: List of list of each discriminator outputs, which consists of each layer output tensors.
        """
        outs = []
        for f in self.discriminators:
            outs += [f(x)]
            x = self.pooling(x)

        return outs


class HiFiGANMultiScaleMultiPeriodDiscriminator(nn.Layer):
    """HiFi-GAN multi-scale + multi-period discriminator module."""

    def __init__(
            self,
            # Multi-scale discriminator related
            scales: int=3,
            scale_downsample_pooling: str="AvgPool1D",
            scale_downsample_pooling_params: Dict[str, Any]={
                "kernel_size": 4,
                "stride": 2,
                "padding": 2,
            },
            scale_discriminator_params: Dict[str, Any]={
                "in_channels": 1,
                "out_channels": 1,
                "kernel_sizes": [15, 41, 5, 3],
                "channels": 128,
                "max_downsample_channels": 1024,
                "max_groups": 16,
                "bias": True,
                "downsample_scales": [2, 2, 4, 4, 1],
                "nonlinear_activation": "leakyrelu",
                "nonlinear_activation_params": {
                    "negative_slope": 0.1
                },
            },
            follow_official_norm: bool=True,
            # Multi-period discriminator related
            periods: List[int]=[2, 3, 5, 7, 11],
            period_discriminator_params: Dict[str, Any]={
                "in_channels": 1,
                "out_channels": 1,
                "kernel_sizes": [5, 3],
                "channels": 32,
                "downsample_scales": [3, 3, 3, 3, 1],
                "max_downsample_channels": 1024,
                "bias": True,
                "nonlinear_activation": "leakyrelu",
                "nonlinear_activation_params": {
                    "negative_slope": 0.1
                },
                "use_weight_norm": True,
                "use_spectral_norm": False,
            },
            init_type: str="xavier_uniform", ):
        """Initilize HiFiGAN multi-scale + multi-period discriminator module.

        Args:
            scales (int): 
                Number of multi-scales.
            scale_downsample_pooling (str): 
                Pooling module name for downsampling of the inputs.
            scale_downsample_pooling_params (dict): 
                Parameters for the above pooling module.
            scale_discriminator_params (dict): 
                Parameters for hifi-gan scale discriminator module.
            follow_official_norm （bool): 
                Whether to follow the norm setting of the official implementaion. 
                The first discriminator uses spectral norm and the other discriminators use weight norm.
            periods (list): 
                List of periods.
            period_discriminator_params (dict): 
                Parameters for hifi-gan period discriminator module.
                The period parameter will be overwritten.
        """
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        self.msd = HiFiGANMultiScaleDiscriminator(
            scales=scales,
            downsample_pooling=scale_downsample_pooling,
            downsample_pooling_params=scale_downsample_pooling_params,
            discriminator_params=scale_discriminator_params,
            follow_official_norm=follow_official_norm, )
        self.mpd = HiFiGANMultiPeriodDiscriminator(
            periods=periods,
            discriminator_params=period_discriminator_params, )

    def forward(self, x):
        """Calculate forward propagation.

        Args:
            x (Tensor): 
                Input noise signal (B, 1, T).
        Returns:
            List:
                List of list of each discriminator outputs,
                which consists of each layer output tensors.
                Multi scale and multi period ones are concatenated.
        """
        msd_outs = self.msd(x)
        mpd_outs = self.mpd(x)
        return msd_outs + mpd_outs


class HiFiGANInference(nn.Layer):
    def __init__(self, normalizer, hifigan_generator):
        super().__init__()
        self.normalizer = normalizer
        self.hifigan_generator = hifigan_generator

    def forward(self, logmel):
        normalized_mel = self.normalizer(logmel)
        wav = self.hifigan_generator.inference(normalized_mel)
        return wav


================================================
FILE: paddlespeech/t2s/models/hifigan/hifigan_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import Dict

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from paddle.optimizer.lr import LRScheduler

from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
from paddlespeech.t2s.training.updaters.standard_updater import UpdaterState
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class HiFiGANUpdater(StandardUpdater):
    def __init__(self,
                 models: Dict[str, Layer],
                 optimizers: Dict[str, Optimizer],
                 criterions: Dict[str, Layer],
                 schedulers: Dict[str, LRScheduler],
                 dataloader: DataLoader,
                 generator_train_start_steps: int=0,
                 discriminator_train_start_steps: int=100000,
                 lambda_adv: float=1.0,
                 lambda_aux: float=1.0,
                 lambda_feat_match: float=1.0,
                 output_dir=None):
        self.models = models
        self.generator: Layer = models['generator']
        self.discriminator: Layer = models['discriminator']

        self.optimizers = optimizers
        self.optimizer_g: Optimizer = optimizers['generator']
        self.optimizer_d: Optimizer = optimizers['discriminator']

        self.criterions = criterions
        self.criterion_feat_match = criterions['feat_match']
        self.criterion_mel = criterions['mel']

        self.criterion_gen_adv = criterions["gen_adv"]
        self.criterion_dis_adv = criterions["dis_adv"]

        self.schedulers = schedulers
        self.scheduler_g = schedulers['generator']
        self.scheduler_d = schedulers['discriminator']

        self.dataloader = dataloader

        self.generator_train_start_steps = generator_train_start_steps
        self.discriminator_train_start_steps = discriminator_train_start_steps
        self.lambda_adv = lambda_adv
        self.lambda_aux = lambda_aux
        self.lambda_feat_match = lambda_feat_match

        self.state = UpdaterState(iteration=0, epoch=0)
        self.train_iterator = iter(self.dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}
        # parse batch
        wav, mel = batch

        # Generator
        if self.state.iteration > self.generator_train_start_steps:
            # (B, out_channels, T ** prod(upsample_scales)
            wav_ = self.generator(mel)

            # initialize
            gen_loss = 0.0
            aux_loss = 0.0

            # mel spectrogram loss
            mel_loss = self.criterion_mel(wav_, wav)
            aux_loss += mel_loss
            report("train/mel_loss", float(mel_loss))
            losses_dict["mel_loss"] = float(mel_loss)

            gen_loss += aux_loss * self.lambda_aux

            # adversarial loss
            if self.state.iteration > self.discriminator_train_start_steps:
                p_ = self.discriminator(wav_)
                adv_loss = self.criterion_gen_adv(p_)
                report("train/adversarial_loss", float(adv_loss))
                losses_dict["adversarial_loss"] = float(adv_loss)

                # feature matching loss
                # no need to track gradients
                with paddle.no_grad():
                    p = self.discriminator(wav)
                fm_loss = self.criterion_feat_match(p_, p)
                report("train/feature_matching_loss", float(fm_loss))
                losses_dict["feature_matching_loss"] = float(fm_loss)

                adv_loss += self.lambda_feat_match * fm_loss

                gen_loss += self.lambda_adv * adv_loss

            report("train/generator_loss", float(gen_loss))
            losses_dict["generator_loss"] = float(gen_loss)

            self.optimizer_g.clear_grad()
            gen_loss.backward()

            self.optimizer_g.step()
            self.scheduler_g.step()

        # Disctiminator
        if self.state.iteration > self.discriminator_train_start_steps:
            # re-compute wav_ which leads better quality
            with paddle.no_grad():
                wav_ = self.generator(mel)

            p = self.discriminator(wav)
            p_ = self.discriminator(wav_.detach())
            real_loss, fake_loss = self.criterion_dis_adv(p_, p)
            dis_loss = real_loss + fake_loss
            report("train/real_loss", float(real_loss))
            report("train/fake_loss", float(fake_loss))
            report("train/discriminator_loss", float(dis_loss))
            losses_dict["real_loss"] = float(real_loss)
            losses_dict["fake_loss"] = float(fake_loss)
            losses_dict["discriminator_loss"] = float(dis_loss)

            self.optimizer_d.clear_grad()
            dis_loss.backward()

            self.optimizer_d.step()
            self.scheduler_d.step()

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class HiFiGANEvaluator(StandardEvaluator):
    def __init__(self,
                 models: Dict[str, Layer],
                 criterions: Dict[str, Layer],
                 dataloader: DataLoader,
                 lambda_adv: float=1.0,
                 lambda_aux: float=1.0,
                 lambda_feat_match: float=1.0,
                 output_dir=None):
        self.models = models
        self.generator = models['generator']
        self.discriminator = models['discriminator']

        self.criterions = criterions
        self.criterion_feat_match = criterions['feat_match']
        self.criterion_mel = criterions['mel']
        self.criterion_gen_adv = criterions["gen_adv"]
        self.criterion_dis_adv = criterions["dis_adv"]

        self.dataloader = dataloader

        self.lambda_adv = lambda_adv
        self.lambda_aux = lambda_aux
        self.lambda_feat_match = lambda_feat_match

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        # logging.debug("Evaluate: ")
        self.msg = "Evaluate: "
        losses_dict = {}
        wav, mel = batch

        # Generator
        # (B, out_channels, T ** prod(upsample_scales)
        wav_ = self.generator(mel)

        # initialize
        gen_loss = 0.0
        aux_loss = 0.0

        ## Adversarial loss
        p_ = self.discriminator(wav_)
        adv_loss = self.criterion_gen_adv(p_)
        report("eval/adversarial_loss", float(adv_loss))
        losses_dict["adversarial_loss"] = float(adv_loss)

        # feature matching loss
        p = self.discriminator(wav)
        fm_loss = self.criterion_feat_match(p_, p)
        report("eval/feature_matching_loss", float(fm_loss))
        losses_dict["feature_matching_loss"] = float(fm_loss)
        adv_loss += self.lambda_feat_match * fm_loss

        gen_loss += self.lambda_adv * adv_loss

        # mel spectrogram loss
        mel_loss = self.criterion_mel(wav_, wav)
        aux_loss += mel_loss
        report("eval/mel_loss", float(mel_loss))
        losses_dict["mel_loss"] = float(mel_loss)

        gen_loss += aux_loss * self.lambda_aux

        report("eval/generator_loss", float(gen_loss))
        losses_dict["generator_loss"] = float(gen_loss)

        # Disctiminator
        p = self.discriminator(wav)
        real_loss, fake_loss = self.criterion_dis_adv(p_, p)
        dis_loss = real_loss + fake_loss
        report("eval/real_loss", float(real_loss))
        report("eval/fake_loss", float(fake_loss))
        report("eval/discriminator_loss", float(dis_loss))

        losses_dict["real_loss"] = float(real_loss)
        losses_dict["fake_loss"] = float(fake_loss)
        losses_dict["discriminator_loss"] = float(dis_loss)

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/jets/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .jets import *
from .jets_updater import *


================================================
FILE: paddlespeech/t2s/models/jets/alignments.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generator module in JETS.

This code is based on https://github.com/imdanboy/jets.

"""
import numpy as np
import paddle
import paddle.nn.functional as F
from numba import jit
from paddle import nn

from paddlespeech.t2s.modules.masked_fill import masked_fill


class AlignmentModule(nn.Layer):
    """Alignment Learning Framework proposed for parallel TTS models in:
    https://arxiv.org/abs/2108.10447
    """

    def __init__(self, adim, odim):
        super().__init__()
        self.t_conv1 = nn.Conv1D(adim, adim, kernel_size=3, padding=1)
        self.t_conv2 = nn.Conv1D(adim, adim, kernel_size=1, padding=0)

        self.f_conv1 = nn.Conv1D(odim, adim, kernel_size=3, padding=1)
        self.f_conv2 = nn.Conv1D(adim, adim, kernel_size=3, padding=1)
        self.f_conv3 = nn.Conv1D(adim, adim, kernel_size=1, padding=0)

    def forward(self, text, feats, x_masks=None):
        """
        Args:
            text (Tensor): Batched text embedding (B, T_text, adim)
            feats (Tensor): Batched acoustic feature (B, T_feats, odim)
            x_masks (Tensor): Mask tensor (B, T_text)

        Returns:
            Tensor: log probability of attention matrix (B, T_feats, T_text)
        """

        text = text.transpose((0, 2, 1))
        text = F.relu(self.t_conv1(text))
        text = self.t_conv2(text)
        text = text.transpose((0, 2, 1))

        feats = feats.transpose((0, 2, 1))
        feats = F.relu(self.f_conv1(feats))
        feats = F.relu(self.f_conv2(feats))
        feats = self.f_conv3(feats)
        feats = feats.transpose((0, 2, 1))

        dist = feats.unsqueeze(2) - text.unsqueeze(1)
        dist = paddle.linalg.norm(dist, p=2, axis=3)
        score = -dist

        if x_masks is not None:
            x_masks = x_masks.unsqueeze(-2)
            score = masked_fill(score, x_masks, -np.inf)
        log_p_attn = F.log_softmax(score, axis=-1)
        return log_p_attn, score


@jit(nopython=True)
def _monotonic_alignment_search(log_p_attn):
    # https://arxiv.org/abs/2005.11129
    T_mel = log_p_attn.shape[0]
    T_inp = log_p_attn.shape[1]
    Q = np.full((T_inp, T_mel), fill_value=-np.inf)

    log_prob = log_p_attn.transpose(1, 0)  # -> (T_inp,T_mel)
    # 1.  Q <- init first row for all j
    for j in range(T_mel):
        Q[0, j] = log_prob[0, :j + 1].sum()

    # 2. 
    for j in range(1, T_mel):
        for i in range(1, min(j + 1, T_inp)):
            Q[i, j] = max(Q[i - 1, j - 1], Q[i, j - 1]) + log_prob[i, j]

    # 3.
    A = np.full((T_mel, ), fill_value=T_inp - 1)
    for j in range(T_mel - 2, -1, -1):  # T_mel-2, ..., 0
        # 'i' in {A[j+1]-1, A[j+1]}
        i_a = A[j + 1] - 1
        i_b = A[j + 1]
        if i_b == 0:
            argmax_i = 0
        elif Q[i_a, j] >= Q[i_b, j]:
            argmax_i = i_a
        else:
            argmax_i = i_b
        A[j] = argmax_i
    return A


def viterbi_decode(log_p_attn, text_lengths, feats_lengths):
    """
    Args:
        log_p_attn (Tensor): 
            Batched log probability of attention matrix (B, T_feats, T_text)
        text_lengths (Tensor): 
            Text length tensor (B,)
        feats_legnths (Tensor): 
            Feature length tensor (B,)
    Returns:
        Tensor: 
            Batched token duration extracted from `log_p_attn` (B,T_text)
        Tensor: 
            binarization loss tensor ()
    """
    B = log_p_attn.shape[0]
    T_text = log_p_attn.shape[2]
    device = log_p_attn.place

    bin_loss = 0
    ds = paddle.zeros((B, T_text), dtype="int32")
    for b in range(B):
        cur_log_p_attn = log_p_attn[b, :feats_lengths[b], :text_lengths[b]]
        viterbi = _monotonic_alignment_search(cur_log_p_attn.numpy())
        _ds = np.bincount(viterbi)
        ds[b, :len(_ds)] = paddle.to_tensor(
            _ds, place=device, dtype="int32")  

        t_idx = paddle.arange(feats_lengths[b])
        bin_loss = bin_loss - cur_log_p_attn[t_idx, viterbi].mean()
    bin_loss = bin_loss / B
    return ds, bin_loss


@jit(nopython=True)
def _average_by_duration(ds, xs, text_lengths, feats_lengths):
    B = ds.shape[0]
    # xs_avg = np.zeros_like(ds)
    xs_avg = np.zeros(shape=ds.shape, dtype=np.float32)
    ds = ds.astype(np.int32)
    for b in range(B):
        t_text = text_lengths[b]
        t_feats = feats_lengths[b]
        d = ds[b, :t_text]
        d_cumsum = d.cumsum()
        d_cumsum = [0] + list(d_cumsum)
        x = xs[b, :t_feats]
        for n, (start, end) in enumerate(zip(d_cumsum[:-1], d_cumsum[1:])):
            if len(x[start:end]) != 0:
                xs_avg[b, n] = x[start:end].mean()
            else:
                xs_avg[b, n] = 0
    return xs_avg


def average_by_duration(ds, xs, text_lengths, feats_lengths):
    """
    Args:
        ds (Tensor): 
            Batched token duration (B,T_text)
        xs (Tensor): 
            Batched feature sequences to be averaged (B,T_feats)
        text_lengths (Tensor): 
            Text length tensor (B,)
        feats_lengths (Tensor): 
            Feature length tensor (B,)
    Returns:
        Tensor: Batched feature averaged according to the token duration (B, T_text)
    """
    device = ds.place
    args = [ds, xs, text_lengths, feats_lengths]
    args = [arg.numpy() for arg in args]
    xs_avg = _average_by_duration(*args)
    xs_avg = paddle.to_tensor(xs_avg, place=device)
    return xs_avg


================================================
FILE: paddlespeech/t2s/models/jets/generator.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generator module in JETS.

This code is based on https://github.com/imdanboy/jets.

"""
import logging
import math
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Sequence
from typing import Tuple

import numpy as np
import paddle
from paddle import nn

from paddlespeech.t2s.models.hifigan import HiFiGANGenerator
from paddlespeech.t2s.models.jets.alignments import AlignmentModule
from paddlespeech.t2s.models.jets.alignments import average_by_duration
from paddlespeech.t2s.models.jets.alignments import viterbi_decode
from paddlespeech.t2s.models.jets.length_regulator import GaussianUpsampling
from paddlespeech.t2s.modules.nets_utils import get_random_segments
from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask
from paddlespeech.t2s.modules.nets_utils import make_pad_mask
from paddlespeech.t2s.modules.predictor.duration_predictor import DurationPredictor
from paddlespeech.t2s.modules.predictor.length_regulator import LengthRegulator
from paddlespeech.t2s.modules.predictor.variance_predictor import VariancePredictor
from paddlespeech.t2s.modules.style_encoder import StyleEncoder
from paddlespeech.t2s.modules.transformer.embedding import PositionalEncoding
from paddlespeech.t2s.modules.transformer.embedding import ScaledPositionalEncoding
from paddlespeech.t2s.modules.transformer.encoder import ConformerEncoder
from paddlespeech.t2s.modules.transformer.encoder import TransformerEncoder


class JETSGenerator(nn.Layer):
    """Generator module in JETS.
    """

    def __init__(
            self,
            idim: int,
            odim: int,
            adim: int=256,
            aheads: int=2,
            elayers: int=4,
            eunits: int=1024,
            dlayers: int=4,
            dunits: int=1024,
            positionwise_layer_type: str="conv1d",
            positionwise_conv_kernel_size: int=1,
            use_scaled_pos_enc: bool=True,
            use_batch_norm: bool=True,
            encoder_normalize_before: bool=True,
            decoder_normalize_before: bool=True,
            encoder_concat_after: bool=False,
            decoder_concat_after: bool=False,
            reduction_factor: int=1,
            encoder_type: str="transformer",
            decoder_type: str="transformer",
            transformer_enc_dropout_rate: float=0.1,
            transformer_enc_positional_dropout_rate: float=0.1,
            transformer_enc_attn_dropout_rate: float=0.1,
            transformer_dec_dropout_rate: float=0.1,
            transformer_dec_positional_dropout_rate: float=0.1,
            transformer_dec_attn_dropout_rate: float=0.1,
            transformer_activation_type: str="relu",
            # only for conformer
            conformer_rel_pos_type: str="legacy",
            conformer_pos_enc_layer_type: str="rel_pos",
            conformer_self_attn_layer_type: str="rel_selfattn",
            conformer_activation_type: str="swish",
            use_macaron_style_in_conformer: bool=True,
            use_cnn_in_conformer: bool=True,
            zero_triu: bool=False,
            conformer_enc_kernel_size: int=7,
            conformer_dec_kernel_size: int=31,
            # duration predictor
            duration_predictor_layers: int=2,
            duration_predictor_chans: int=384,
            duration_predictor_kernel_size: int=3,
            duration_predictor_dropout_rate: float=0.1,
            # energy predictor
            energy_predictor_layers: int=2,
            energy_predictor_chans: int=384,
            energy_predictor_kernel_size: int=3,
            energy_predictor_dropout: float=0.5,
            energy_embed_kernel_size: int=9,
            energy_embed_dropout: float=0.5,
            stop_gradient_from_energy_predictor: bool=False,
            # pitch predictor
            pitch_predictor_layers: int=2,
            pitch_predictor_chans: int=384,
            pitch_predictor_kernel_size: int=3,
            pitch_predictor_dropout: float=0.5,
            pitch_embed_kernel_size: int=9,
            pitch_embed_dropout: float=0.5,
            stop_gradient_from_pitch_predictor: bool=False,
            # extra embedding related
            spks: Optional[int]=None,
            langs: Optional[int]=None,
            spk_embed_dim: Optional[int]=None,
            spk_embed_integration_type: str="add",
            use_gst: bool=False,
            gst_tokens: int=10,
            gst_heads: int=4,
            gst_conv_layers: int=6,
            gst_conv_chans_list: Sequence[int]=(32, 32, 64, 64, 128, 128),
            gst_conv_kernel_size: int=3,
            gst_conv_stride: int=2,
            gst_gru_layers: int=1,
            gst_gru_units: int=128,
            # training related
            init_type: str="xavier_uniform",
            init_enc_alpha: float=1.0,
            init_dec_alpha: float=1.0,
            use_masking: bool=False,
            use_weighted_masking: bool=False,
            segment_size: int=64,
            # hifigan generator
            generator_out_channels: int=1,
            generator_channels: int=512,
            generator_global_channels: int=-1,
            generator_kernel_size: int=7,
            generator_upsample_scales: List[int]=[8, 8, 2, 2],
            generator_upsample_kernel_sizes: List[int]=[16, 16, 4, 4],
            generator_resblock_kernel_sizes: List[int]=[3, 7, 11],
            generator_resblock_dilations: List[List[int]]=[[1, 3, 5], [1, 3, 5],
                                                           [1, 3, 5]],
            generator_use_additional_convs: bool=True,
            generator_bias: bool=True,
            generator_nonlinear_activation: str="LeakyReLU",
            generator_nonlinear_activation_params: Dict[
                str, Any]={"negative_slope": 0.1},
            generator_use_weight_norm: bool=True, ):
        """Initialize JETS generator module.

        Args:
            idim (int): 
                Dimension of the inputs.
            odim (int): 
                Dimension of the outputs.
            adim (int): 
                Attention dimension.
            aheads (int): 
                Number of attention heads.
            elayers (int): 
                Number of encoder layers.
            eunits (int): 
                Number of encoder hidden units.
            dlayers (int): 
                Number of decoder layers.
            dunits (int): 
                Number of decoder hidden units.
            use_scaled_pos_enc (bool): 
                Whether to use trainable scaled pos encoding.
            use_batch_norm (bool): 
                Whether to use batch normalization in encoder prenet.
            encoder_normalize_before (bool): 
                Whether to apply layernorm layer before encoder block.
            decoder_normalize_before (bool): 
                Whether to apply layernorm layer before decoder block.
            encoder_concat_after (bool): 
                Whether to concatenate attention layer's input and output in encoder.
            decoder_concat_after (bool): 
                Whether to concatenate attention layer's input and output in decoder.
            reduction_factor (int): 
                Reduction factor.
            encoder_type (str): 
                Encoder type ("transformer" or "conformer").
            decoder_type (str): 
                Decoder type ("transformer" or "conformer").
            transformer_enc_dropout_rate (float): 
                Dropout rate in encoder except attention and positional encoding.
            transformer_enc_positional_dropout_rate (float): 
                Dropout rate after encoder positional encoding.
            transformer_enc_attn_dropout_rate (float): 
                Dropout rate in encoder self-attention module.
            transformer_dec_dropout_rate (float): 
                Dropout rate in decoder except attention & positional encoding.
            transformer_dec_positional_dropout_rate (float): 
                Dropout rate after decoder positional encoding.
            transformer_dec_attn_dropout_rate (float): 
                Dropout rate in decoder self-attention module.
            conformer_rel_pos_type (str): 
                Relative pos encoding type in conformer.
            conformer_pos_enc_layer_type (str): 
                Pos encoding layer type in conformer.
            conformer_self_attn_layer_type (str): 
                Self-attention layer type in conformer
            conformer_activation_type (str): 
                Activation function type in conformer.
            use_macaron_style_in_conformer: 
                Whether to use macaron style FFN.
            use_cnn_in_conformer: 
                Whether to use CNN in conformer.
            zero_triu: 
                Whether to use zero triu in relative self-attention module.
            conformer_enc_kernel_size: 
                Kernel size of encoder conformer.
            conformer_dec_kernel_size: 
                Kernel size of decoder conformer.
            duration_predictor_layers (int): 
                Number of duration predictor layers.
            duration_predictor_chans (int): 
                Number of duration predictor channels.
            duration_predictor_kernel_size (int): 
                Kernel size of duration predictor.
            duration_predictor_dropout_rate (float): 
                Dropout rate in duration predictor.
            pitch_predictor_layers (int): 
                Number of pitch predictor layers.
            pitch_predictor_chans (int): 
                Number of pitch predictor channels.
            pitch_predictor_kernel_size (int): 
                Kernel size of pitch predictor.
            pitch_predictor_dropout_rate (float): 
                Dropout rate in pitch predictor.
            pitch_embed_kernel_size (float): 
                Kernel size of pitch embedding.
            pitch_embed_dropout_rate (float): 
                Dropout rate for pitch embedding.
            stop_gradient_from_pitch_predictor: 
                Whether to stop gradient from pitch predictor to encoder.
            energy_predictor_layers (int): 
                Number of energy predictor layers.
            energy_predictor_chans (int): 
                Number of energy predictor channels.
            energy_predictor_kernel_size (int): 
                Kernel size of energy predictor.
            energy_predictor_dropout_rate (float): 
                Dropout rate in energy predictor.
            energy_embed_kernel_size (float): 
                Kernel size of energy embedding.
            energy_embed_dropout_rate (float): 
                Dropout rate for energy embedding.
            stop_gradient_from_energy_predictor: 
                Whether to stop gradient from energy predictor to encoder.
            spks (Optional[int]): 
                Number of speakers. If set to > 1, assume that the sids will be provided as the input and use sid embedding layer.
            langs (Optional[int]): 
                Number of languages. If set to > 1, assume that the lids will be provided as the input and use sid embedding layer.
            spk_embed_dim (Optional[int]): 
                Speaker embedding dimension. If set to > 0, assume that spembs will be provided as the input.
            spk_embed_integration_type: 
                How to integrate speaker embedding.
            use_gst (str): 
                Whether to use global style token.
            gst_tokens (int): 
                The number of GST embeddings.
            gst_heads (int): 
                The number of heads in GST multihead attention.
            gst_conv_layers (int): 
                The number of conv layers in GST.
            gst_conv_chans_list: (Sequence[int]):
                List of the number of channels of conv layers in GST.
            gst_conv_kernel_size (int): 
                Kernel size of conv layers in GST.
            gst_conv_stride (int): 
                Stride size of conv layers in GST.
            gst_gru_layers (int): 
                The number of GRU layers in GST.
            gst_gru_units (int): 
                The number of GRU units in GST.
            init_type (str): 
                How to initialize transformer parameters.
            init_enc_alpha (float): 
                Initial value of alpha in scaled pos encoding of the encoder.
            init_dec_alpha (float): 
                Initial value of alpha in scaled pos encoding of the decoder.
            use_masking (bool): 
                Whether to apply masking for padded part in loss calculation.
            use_weighted_masking (bool): 
                Whether to apply weighted masking in loss calculation.
            segment_size (int): 
                Segment size for random windowed discriminator
            generator_out_channels (int): 
                Number of output channels.
            generator_channels (int): 
                Number of hidden representation channels.
            generator_global_channels (int): 
                Number of global conditioning channels.
            generator_kernel_size (int): 
                Kernel size of initial and final conv layer.
            generator_upsample_scales (List[int]): 
                List of upsampling scales.
            generator_upsample_kernel_sizes (List[int]): 
                List of kernel sizes for upsample layers.
            generator_resblock_kernel_sizes (List[int]): 
                List of kernel sizes for residual blocks.
            generator_resblock_dilations (List[List[int]]): 
                List of list of dilations for residual blocks.
            generator_use_additional_convs (bool): 
                Whether to use additional conv layers in residual blocks.
            generator_bias (bool): 
                Whether to add bias parameter in convolution layers.
            generator_nonlinear_activation (str): 
                Activation function module name.
            generator_nonlinear_activation_params (Dict[str, Any]): 
                Hyperparameters for activation function.
            generator_use_weight_norm (bool): 
                Whether to use weight norm. If set to true, it will be applied to all of the conv layers.

        """
        super().__init__()
        self.segment_size = segment_size
        self.upsample_factor = int(np.prod(generator_upsample_scales))
        self.idim = idim
        self.odim = odim
        self.reduction_factor = reduction_factor
        self.encoder_type = encoder_type
        self.decoder_type = decoder_type
        self.stop_gradient_from_pitch_predictor = stop_gradient_from_pitch_predictor
        self.stop_gradient_from_energy_predictor = stop_gradient_from_energy_predictor
        self.use_scaled_pos_enc = use_scaled_pos_enc
        self.use_gst = use_gst

        # use idx 0 as padding idx
        self.padding_idx = 0

        # get positional encoding layer type
        transformer_pos_enc_layer_type = "scaled_abs_pos" if self.use_scaled_pos_enc else "abs_pos"

        # check relative positional encoding compatibility
        if "conformer" in [encoder_type, decoder_type]:
            if conformer_rel_pos_type == "legacy":
                if conformer_pos_enc_layer_type == "rel_pos":
                    conformer_pos_enc_layer_type = "legacy_rel_pos"
                    logging.warning(
                        "Fallback to conformer_pos_enc_layer_type = 'legacy_rel_pos' "
                        "due to the compatibility. If you want to use the new one, "
                        "please use conformer_pos_enc_layer_type = 'latest'.")
                if conformer_self_attn_layer_type == "rel_selfattn":
                    conformer_self_attn_layer_type = "legacy_rel_selfattn"
                    logging.warning(
                        "Fallback to "
                        "conformer_self_attn_layer_type = 'legacy_rel_selfattn' "
                        "due to the compatibility. If you want to use the new one, "
                        "please use conformer_pos_enc_layer_type = 'latest'.")
            elif conformer_rel_pos_type == "latest":
                assert conformer_pos_enc_layer_type != "legacy_rel_pos"
                assert conformer_self_attn_layer_type != "legacy_rel_selfattn"
            else:
                raise ValueError(
                    f"Unknown rel_pos_type: {conformer_rel_pos_type}")

        # define encoder
        encoder_input_layer = nn.Embedding(
            num_embeddings=idim,
            embedding_dim=adim,
            padding_idx=self.padding_idx)
        if encoder_type == "transformer":
            self.encoder = TransformerEncoder(
                idim=idim,
                attention_dim=adim,
                attention_heads=aheads,
                linear_units=eunits,
                num_blocks=elayers,
                input_layer=encoder_input_layer,
                dropout_rate=transformer_enc_dropout_rate,
                positional_dropout_rate=transformer_enc_positional_dropout_rate,
                attention_dropout_rate=transformer_enc_attn_dropout_rate,
                pos_enc_layer_type=transformer_pos_enc_layer_type,
                normalize_before=encoder_normalize_before,
                concat_after=encoder_concat_after,
                positionwise_layer_type=positionwise_layer_type,
                positionwise_conv_kernel_size=positionwise_conv_kernel_size,
                activation_type=transformer_activation_type)
        elif encoder_type == "conformer":
            self.encoder = ConformerEncoder(
                idim=idim,
                attention_dim=adim,
                attention_heads=aheads,
                linear_units=eunits,
                num_blocks=elayers,
                input_layer=encoder_input_layer,
                dropout_rate=transformer_enc_dropout_rate,
                positional_dropout_rate=transformer_enc_positional_dropout_rate,
                attention_dropout_rate=transformer_enc_attn_dropout_rate,
                normalize_before=encoder_normalize_before,
                concat_after=encoder_concat_after,
                positionwise_layer_type=positionwise_layer_type,
                positionwise_conv_kernel_size=positionwise_conv_kernel_size,
                macaron_style=use_macaron_style_in_conformer,
                pos_enc_layer_type=conformer_pos_enc_layer_type,
                selfattention_layer_type=conformer_self_attn_layer_type,
                activation_type=conformer_activation_type,
                use_cnn_module=use_cnn_in_conformer,
                cnn_module_kernel=conformer_enc_kernel_size,
                zero_triu=zero_triu, )
        else:
            raise ValueError(f"{encoder_type} is not supported.")

        # define GST
        if self.use_gst:
            self.gst = StyleEncoder(
                idim=odim,  # the input is mel-spectrogram
                gst_tokens=gst_tokens,
                gst_token_dim=adim,
                gst_heads=gst_heads,
                conv_layers=gst_conv_layers,
                conv_chans_list=gst_conv_chans_list,
                conv_kernel_size=gst_conv_kernel_size,
                conv_stride=gst_conv_stride,
                gru_layers=gst_gru_layers,
                gru_units=gst_gru_units, )

        # define spk and lang embedding
        self.spks = None
        if spks is not None and spks > 1:
            self.spks = spks
            self.sid_emb = nn.Embedding(spks, adim)
        self.langs = None
        if langs is not None and langs > 1:
            self.langs = langs
            self.lid_emb = nn.Embedding(langs, adim)

        # define additional projection for speaker embedding
        self.spk_embed_dim = None
        if spk_embed_dim is not None and spk_embed_dim > 0:
            self.spk_embed_dim = spk_embed_dim
            self.spk_embed_integration_type = spk_embed_integration_type
        if self.spk_embed_dim is not None:
            if self.spk_embed_integration_type == "add":
                self.projection = nn.Linear(self.spk_embed_dim, adim)
            else:
                self.projection = nn.Linear(adim + self.spk_embed_dim, adim)

        # define duration predictor
        self.duration_predictor = DurationPredictor(
            idim=adim,
            n_layers=duration_predictor_layers,
            n_chans=duration_predictor_chans,
            kernel_size=duration_predictor_kernel_size,
            dropout_rate=duration_predictor_dropout_rate, )

        # define pitch predictor
        self.pitch_predictor = VariancePredictor(
            idim=adim,
            n_layers=pitch_predictor_layers,
            n_chans=pitch_predictor_chans,
            kernel_size=pitch_predictor_kernel_size,
            dropout_rate=pitch_predictor_dropout, )
        # NOTE(kan-bayashi): We use continuous pitch + FastPitch style avg
        self.pitch_embed = nn.Sequential(
            nn.Conv1D(
                in_channels=1,
                out_channels=adim,
                kernel_size=pitch_embed_kernel_size,
                padding=(pitch_embed_kernel_size - 1) // 2, ),
            nn.Dropout(pitch_embed_dropout), )

        # define energy predictor
        self.energy_predictor = VariancePredictor(
            idim=adim,
            n_layers=energy_predictor_layers,
            n_chans=energy_predictor_chans,
            kernel_size=energy_predictor_kernel_size,
            dropout_rate=energy_predictor_dropout, )
        # NOTE(kan-bayashi): We use continuous enegy + FastPitch style avg
        self.energy_embed = nn.Sequential(
            nn.Conv1D(
                in_channels=1,
                out_channels=adim,
                kernel_size=energy_embed_kernel_size,
                padding=(energy_embed_kernel_size - 1) // 2, ),
            nn.Dropout(energy_embed_dropout), )

        # define length regulator
        self.length_regulator = GaussianUpsampling()

        # define decoder
        # NOTE: we use encoder as decoder
        # because fastspeech's decoder is the same as encoder
        if decoder_type == "transformer":
            self.decoder = TransformerEncoder(
                idim=0,
                attention_dim=adim,
                attention_heads=aheads,
                linear_units=dunits,
                num_blocks=dlayers,
                # in decoder, don't need layer before pos_enc_class (we use embedding here in encoder)
                input_layer=None,
                dropout_rate=transformer_dec_dropout_rate,
                positional_dropout_rate=transformer_dec_positional_dropout_rate,
                attention_dropout_rate=transformer_dec_attn_dropout_rate,
                pos_enc_layer_type=transformer_pos_enc_layer_type,
                normalize_before=decoder_normalize_before,
                concat_after=decoder_concat_after,
                positionwise_layer_type=positionwise_layer_type,
                positionwise_conv_kernel_size=positionwise_conv_kernel_size,
                activation_type=conformer_activation_type, )

        elif decoder_type == "conformer":
            self.decoder = ConformerEncoder(
                idim=0,
                attention_dim=adim,
                attention_heads=aheads,
                linear_units=dunits,
                num_blocks=dlayers,
                input_layer=None,
                dropout_rate=transformer_dec_dropout_rate,
                positional_dropout_rate=transformer_dec_positional_dropout_rate,
                attention_dropout_rate=transformer_dec_attn_dropout_rate,
                normalize_before=decoder_normalize_before,
                concat_after=decoder_concat_after,
                positionwise_layer_type=positionwise_layer_type,
                positionwise_conv_kernel_size=positionwise_conv_kernel_size,
                macaron_style=use_macaron_style_in_conformer,
                pos_enc_layer_type=conformer_pos_enc_layer_type,
                selfattention_layer_type=conformer_self_attn_layer_type,
                activation_type=conformer_activation_type,
                use_cnn_module=use_cnn_in_conformer,
                cnn_module_kernel=conformer_dec_kernel_size, )
        else:
            raise ValueError(f"{decoder_type} is not supported.")

        self.generator = HiFiGANGenerator(
            in_channels=adim,
            out_channels=generator_out_channels,
            channels=generator_channels,
            global_channels=generator_global_channels,
            kernel_size=generator_kernel_size,
            upsample_scales=generator_upsample_scales,
            upsample_kernel_sizes=generator_upsample_kernel_sizes,
            resblock_kernel_sizes=generator_resblock_kernel_sizes,
            resblock_dilations=generator_resblock_dilations,
            use_additional_convs=generator_use_additional_convs,
            bias=generator_bias,
            nonlinear_activation=generator_nonlinear_activation,
            nonlinear_activation_params=generator_nonlinear_activation_params,
            use_weight_norm=generator_use_weight_norm, )

        self.alignment_module = AlignmentModule(adim, odim)

        # initialize parameters
        self._reset_parameters(
            init_type=init_type,
            init_enc_alpha=init_enc_alpha,
            init_dec_alpha=init_dec_alpha, )

    def forward(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            feats: paddle.Tensor,
            feats_lengths: paddle.Tensor,
            durations: paddle.Tensor,
            durations_lengths: paddle.Tensor,
            pitch: paddle.Tensor,
            energy: paddle.Tensor,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None,
            use_alignment_module: bool=False,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor,
               paddle.Tensor, paddle.Tensor,
               Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor,
                     paddle.Tensor, paddle.Tensor, ], ]:
        """Calculate forward propagation.
        Args:
            text (Tensor):
                Text index tensor (B, T_text).
            text_lengths (Tensor):
                Text length tensor (B,).
            feats (Tensor):
                Feature tensor (B, aux_channels, T_feats).
            feats_lengths (Tensor):
                Feature length tensor (B,).
            pitch (Tensor): 
                Batch of padded token-averaged pitch (B, T_text, 1).
            energy (Tensor):
                Batch of padded token-averaged energy (B, T_text, 1).
            sids (Optional[Tensor]):
                Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]):
                Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]):
                Language index tensor (B,) or (B, 1).
            use_alignment_module (bool):
                Whether to use alignment module.
                
        Returns:
            Tensor: 
                Waveform tensor (B, 1, segment_size * upsample_factor).
            Tensor: 
                binarization loss ()
            Tensor: 
                log probability attention matrix (B,T_feats,T_text)
            Tensor: 
                Segments start index tensor (B,).
            Tensor: 
                predicted duration (B,T_text)
            Tensor: 
                ground-truth duration obtained from an alignment module (B,T_text)
            Tensor: 
                predicted pitch (B,T_text,1)
            Tensor: 
                ground-truth averaged pitch (B,T_text,1)
            Tensor: 
                predicted energy (B,T_text,1)
            Tensor: 
                ground-truth averaged energy (B,T_text,1)
        """
        if use_alignment_module:
            text = text[:, :text_lengths.max()]  # for data-parallel
            feats = feats[:, :feats_lengths.max()]  # for data-parallel
            pitch = pitch[:, :durations_lengths.max()]  # for data-parallel
            energy = energy[:, :durations_lengths.max()]  # for data-parallel
        else:
            text = text[:, :text_lengths.max()]  # for data-parallel
            feats = feats[:, :feats_lengths.max()]  # for data-parallel
            pitch = pitch[:, :feats_lengths.max()]  # for data-parallel
            energy = energy[:, :feats_lengths.max()]  # for data-parallel

        # forward encoder
        x_masks = self._source_mask(text_lengths)
        hs, _ = self.encoder(text, x_masks)  # (B, T_text, adim)

        # integrate with GST
        if self.use_gst:
            style_embs = self.gst(ys)
            hs = hs + style_embs.unsqueeze(1)

        # integrate with SID and LID embeddings
        if self.spks is not None:
            sid_embs = self.sid_emb(sids.reshape([-1]))
            hs = hs + sid_embs.unsqueeze(1)
        if self.langs is not None:
            lid_embs = self.lid_emb(lids.reshape([-1]))
            hs = hs + lid_embs.unsqueeze(1)

        # integrate speaker embedding
        if self.spk_embed_dim is not None:
            hs = self._integrate_with_spk_embed(hs, spembs)

        # forward alignment module and obtain duration, averaged pitch, energy
        h_masks = make_pad_mask(text_lengths)
        if use_alignment_module:
            log_p_attn = self.alignment_module(hs, feats, h_masks)
            ds, bin_loss = viterbi_decode(log_p_attn, text_lengths,
                                          feats_lengths)
            ps = average_by_duration(ds,
                                     pitch.squeeze(-1), text_lengths,
                                     feats_lengths).unsqueeze(-1)
            es = average_by_duration(ds,
                                     energy.squeeze(-1), text_lengths,
                                     feats_lengths).unsqueeze(-1)
        else:
            ds = durations
            ps = pitch
            es = energy
            log_p_attn = attn = bin_loss = None

        # forward duration predictor and variance predictors
        if self.stop_gradient_from_pitch_predictor:
            p_outs = self.pitch_predictor(hs.detach(), h_masks.unsqueeze(-1))
        else:
            p_outs = self.pitch_predictor(hs, h_masks.unsqueeze(-1))
        if self.stop_gradient_from_energy_predictor:
            e_outs = self.energy_predictor(hs.detach(), h_masks.unsqueeze(-1))
        else:
            e_outs = self.energy_predictor(hs, h_masks.unsqueeze(-1))

        d_outs = self.duration_predictor(hs, h_masks)

        # use groundtruth in training
        p_embs = self.pitch_embed(ps.transpose([0, 2, 1])).transpose([0, 2, 1])
        e_embs = self.energy_embed(es.transpose([0, 2, 1])).transpose([0, 2, 1])
        hs = hs + e_embs + p_embs

        # upsampling
        h_masks = make_non_pad_mask(feats_lengths)
        # d_masks = make_non_pad_mask(text_lengths).to(ds.device)
        d_masks = make_non_pad_mask(text_lengths)
        hs = self.length_regulator(hs, ds, h_masks,
                                   d_masks)  # (B, T_feats, adim)

        # forward decoder
        h_masks = self._source_mask(feats_lengths)
        zs, _ = self.decoder(hs, h_masks)  # (B, T_feats, adim)

        # get random segments
        z_segments, z_start_idxs = get_random_segments(
            zs.transpose([0, 2, 1]),
            feats_lengths,
            self.segment_size, )
        # forward generator
        wav = self.generator(z_segments)
        if use_alignment_module:
            return wav, bin_loss, log_p_attn, z_start_idxs, d_outs, ds, p_outs, ps, e_outs, es
        else:
            return wav, None, None, z_start_idxs, d_outs, ds, p_outs, ps, e_outs, es

    def inference(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            feats: Optional[paddle.Tensor]=None,
            feats_lengths: Optional[paddle.Tensor]=None,
            pitch: Optional[paddle.Tensor]=None,
            energy: Optional[paddle.Tensor]=None,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None,
            use_alignment_module: bool=False,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Run inference.

        Args:
            text (Tensor): Input text index tensor (B, T_text,).
            text_lengths (Tensor): Text length tensor (B,).
            feats (Tensor): Feature tensor (B, T_feats, aux_channels).
            feats_lengths (Tensor): Feature length tensor (B,).
            pitch (Tensor): Pitch tensor (B, T_feats, 1)
            energy (Tensor): Energy tensor (B, T_feats, 1)
            sids (Optional[Tensor]): Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]): Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]): Language index tensor (B,) or (B, 1).
            use_alignment_module (bool): Whether to use alignment module.

        Returns:
            Tensor: Generated waveform tensor (B, T_wav).
            Tensor: Duration tensor (B, T_text).

        """
        # forward encoder
        x_masks = self._source_mask(text_lengths)
        hs, _ = self.encoder(text, x_masks)  # (B, T_text, adim)

        # integrate with GST
        if self.use_gst:
            style_embs = self.gst(ys)
            hs = hs + style_embs.unsqueeze(1)

        # integrate with SID and LID embeddings
        if self.spks is not None:
            sid_embs = self.sid_emb(sids.reshape([-1]))
            hs = hs + sid_embs.unsqueeze(1)
        if self.langs is not None:
            lid_embs = self.lid_emb(lids.reshape([-1]))
            hs = hs + lid_embs.unsqueeze(1)

        # integrate speaker embedding
        if self.spk_embed_dim is not None:
            hs = self._integrate_with_spk_embed(hs, spembs)

        h_masks = make_pad_mask(text_lengths)
        if use_alignment_module:
            # forward alignment module and obtain duration, averaged pitch, energy
            log_p_attn, attn = self.alignment_module(hs, feats, h_masks)
            d_outs, _ = viterbi_decode(log_p_attn, text_lengths, feats_lengths)
            p_outs = average_by_duration(d_outs,
                                         pitch.squeeze(-1), text_lengths,
                                         feats_lengths).unsqueeze(-1)
            e_outs = average_by_duration(d_outs,
                                         energy.squeeze(-1), text_lengths,
                                         feats_lengths).unsqueeze(-1)
        else:
            # forward duration predictor and variance predictors
            p_outs = self.pitch_predictor(hs, h_masks.unsqueeze(-1))
            e_outs = self.energy_predictor(hs, h_masks.unsqueeze(-1))
            d_outs = self.duration_predictor.inference(hs, h_masks)

        p_embs = self.pitch_embed(p_outs.transpose([0, 2, 1])).transpose(
            [0, 2, 1])
        e_embs = self.energy_embed(e_outs.transpose([0, 2, 1])).transpose(
            [0, 2, 1])
        hs = hs + e_embs + p_embs

        # upsampling
        if feats_lengths is not None:
            h_masks = make_non_pad_mask(feats_lengths)
        else:
            h_masks = None
        d_masks = make_non_pad_mask(text_lengths)
        hs = self.length_regulator(hs, d_outs, h_masks,
                                   d_masks)  # (B, T_feats, adim)

        # forward decoder
        if feats_lengths is not None:
            h_masks = self._source_mask(feats_lengths)
        else:
            h_masks = None
        zs, _ = self.decoder(hs, h_masks)  # (B, T_feats, adim)

        # forward generator
        wav = self.generator(zs.transpose([0, 2, 1]))

        return wav.squeeze(1), d_outs

    def _integrate_with_spk_embed(self,
                                  hs: paddle.Tensor,
                                  spembs: paddle.Tensor) -> paddle.Tensor:
        """Integrate speaker embedding with hidden states.

        Args:
            hs (Tensor): Batch of hidden state sequences (B, T_text, adim).
            spembs (Tensor): Batch of speaker embeddings (B, spk_embed_dim).

        Returns:
            Tensor: Batch of integrated hidden state sequences (B, T_text, adim).

        """
        if self.spk_embed_integration_type == "add":
            # apply projection and then add to hidden states
            spembs = self.projection(F.normalize(spembs))
            hs = hs + spembs.unsqueeze(1)
        elif self.spk_embed_integration_type == "concat":
            # concat hidden states with spk embeds and then apply projection
            spembs = F.normalize(spembs).unsqueeze(1).expand(-1, hs.shape[1],
                                                             -1)
            hs = self.projection(paddle.concat([hs, spembs], axis=-1))
        else:
            raise NotImplementedError("support only add or concat.")

        return hs

    def _generate_path(self, dur: paddle.Tensor,
                       mask: paddle.Tensor) -> paddle.Tensor:
        """Generate path a.k.a. monotonic attention.
        Args:
            dur (Tensor):
                Duration tensor (B, 1, T_text).
            mask (Tensor):
                Attention mask tensor (B, 1, T_feats, T_text).
        Returns:
            Tensor:
                Path tensor (B, 1, T_feats, T_text).
        """
        b, _, t_y, t_x = paddle.shape(mask)
        cum_dur = paddle.cumsum(dur, -1)
        cum_dur_flat = paddle.reshape(cum_dur, [b * t_x])

        path = paddle.arange(t_y, dtype=dur.dtype)
        path = path.unsqueeze(0) < cum_dur_flat.unsqueeze(1)
        path = paddle.reshape(path, [b, t_x, t_y])
        '''
        path will be like (t_x = 3, t_y = 5):
        [[[1., 1., 0., 0., 0.],      [[[1., 1., 0., 0., 0.],
          [1., 1., 1., 1., 0.],  -->   [0., 0., 1., 1., 0.],
          [1., 1., 1., 1., 1.]]]       [0., 0., 0., 0., 1.]]]
        '''

        path = paddle.cast(path, dtype='float32')
        pad_tmp = self.pad1d(path)[:, :-1]
        path = path - pad_tmp
        return path.unsqueeze(1).transpose([0, 1, 3, 2]) * mask

    def _source_mask(self, ilens: paddle.Tensor) -> paddle.Tensor:
        """Make masks for self-attention.

        Args:
            ilens (LongTensor): Batch of lengths (B,).

        Returns:
            Tensor: Mask tensor for self-attention.
                dtype=paddle.uint8 

        Examples:
            >>> ilens = [5, 3]
            >>> self._source_mask(ilens)
            tensor([[[1, 1, 1, 1, 1],
                     [1, 1, 1, 0, 0]]], dtype=torch.uint8)

        """
        x_masks = paddle.to_tensor(make_non_pad_mask(ilens))
        return x_masks.unsqueeze(-2)

    def _reset_parameters(self,
                          init_type: str,
                          init_enc_alpha: float,
                          init_dec_alpha: float):
        # initialize parameters
        initialize(self, init_type)

        # initialize alpha in scaled positional encoding
        if self.encoder_type == "transformer" and self.use_scaled_pos_enc:
            self.encoder.embed[-1].alpha.data = paddle.to_tensor(init_enc_alpha)
        if self.decoder_type == "transformer" and self.use_scaled_pos_enc:
            self.decoder.embed[-1].alpha.data = paddle.to_tensor(init_dec_alpha)


================================================
FILE: paddlespeech/t2s/models/jets/jets.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generator module in JETS.

This code is based on https://github.com/imdanboy/jets.

"""
"""JETS module"""
import math
from typing import Any
from typing import Dict
from typing import Optional

import paddle
from paddle import nn
from typeguard import typechecked

from paddlespeech.t2s.models.hifigan import HiFiGANMultiPeriodDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleMultiPeriodDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANPeriodDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANScaleDiscriminator
from paddlespeech.t2s.models.jets.generator import JETSGenerator
from paddlespeech.utils.initialize import _calculate_fan_in_and_fan_out
from paddlespeech.utils.initialize import kaiming_uniform_
from paddlespeech.utils.initialize import normal_
from paddlespeech.utils.initialize import ones_
from paddlespeech.utils.initialize import uniform_
from paddlespeech.utils.initialize import zeros_

AVAILABLE_GENERATERS = {
    "jets_generator": JETSGenerator,
}
AVAILABLE_DISCRIMINATORS = {
    "hifigan_period_discriminator":
    HiFiGANPeriodDiscriminator,
    "hifigan_scale_discriminator":
    HiFiGANScaleDiscriminator,
    "hifigan_multi_period_discriminator":
    HiFiGANMultiPeriodDiscriminator,
    "hifigan_multi_scale_discriminator":
    HiFiGANMultiScaleDiscriminator,
    "hifigan_multi_scale_multi_period_discriminator":
    HiFiGANMultiScaleMultiPeriodDiscriminator,
}


class JETS(nn.Layer):
    """JETS module (generator + discriminator).
    This is a module of JETS described in `JETS: Jointly Training FastSpeech2 
    and HiFi-GAN for End to End Text to Speech`_.
    .. _`JETS: Jointly Training FastSpeech2 and HiFi-GAN for End to End Text to Speech
        Text-to-Speech`: https://arxiv.org/abs/2203.16852v1
    """

    @typechecked
    def __init__(
            self,
            # generator related
            idim: int,
            odim: int,
            sampling_rate: int=22050,
            generator_type: str="jets_generator",
            generator_params: Dict[str, Any]={
                "adim": 256,
                "aheads": 2,
                "elayers": 4,
                "eunits": 1024,
                "dlayers": 4,
                "dunits": 1024,
                "positionwise_layer_type": "conv1d",
                "positionwise_conv_kernel_size": 1,
                "use_scaled_pos_enc": True,
                "use_batch_norm": True,
                "encoder_normalize_before": True,
                "decoder_normalize_before": True,
                "encoder_concat_after": False,
                "decoder_concat_after": False,
                "reduction_factor": 1,
                "encoder_type": "transformer",
                "decoder_type": "transformer",
                "transformer_enc_dropout_rate": 0.1,
                "transformer_enc_positional_dropout_rate": 0.1,
                "transformer_enc_attn_dropout_rate": 0.1,
                "transformer_dec_dropout_rate": 0.1,
                "transformer_dec_positional_dropout_rate": 0.1,
                "transformer_dec_attn_dropout_rate": 0.1,
                "conformer_rel_pos_type": "latest",
                "conformer_pos_enc_layer_type": "rel_pos",
                "conformer_self_attn_layer_type": "rel_selfattn",
                "conformer_activation_type": "swish",
                "use_macaron_style_in_conformer": True,
                "use_cnn_in_conformer": True,
                "zero_triu": False,
                "conformer_enc_kernel_size": 7,
                "conformer_dec_kernel_size": 31,
                "duration_predictor_layers": 2,
                "duration_predictor_chans": 384,
                "duration_predictor_kernel_size": 3,
                "duration_predictor_dropout_rate": 0.1,
                "energy_predictor_layers": 2,
                "energy_predictor_chans": 384,
                "energy_predictor_kernel_size": 3,
                "energy_predictor_dropout": 0.5,
                "energy_embed_kernel_size": 1,
                "energy_embed_dropout": 0.5,
                "stop_gradient_from_energy_predictor": False,
                "pitch_predictor_layers": 5,
                "pitch_predictor_chans": 384,
                "pitch_predictor_kernel_size": 5,
                "pitch_predictor_dropout": 0.5,
                "pitch_embed_kernel_size": 1,
                "pitch_embed_dropout": 0.5,
                "stop_gradient_from_pitch_predictor": True,
                "generator_out_channels": 1,
                "generator_channels": 512,
                "generator_global_channels": -1,
                "generator_kernel_size": 7,
                "generator_upsample_scales": [8, 8, 2, 2],
                "generator_upsample_kernel_sizes": [16, 16, 4, 4],
                "generator_resblock_kernel_sizes": [3, 7, 11],
                "generator_resblock_dilations":
                [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
                "generator_use_additional_convs": True,
                "generator_bias": True,
                "generator_nonlinear_activation": "LeakyReLU",
                "generator_nonlinear_activation_params": {
                    "negative_slope": 0.1
                },
                "generator_use_weight_norm": True,
                "segment_size": 64,
                "spks": -1,
                "langs": -1,
                "spk_embed_dim": None,
                "spk_embed_integration_type": "add",
                "use_gst": False,
                "gst_tokens": 10,
                "gst_heads": 4,
                "gst_conv_layers": 6,
                "gst_conv_chans_list": [32, 32, 64, 64, 128, 128],
                "gst_conv_kernel_size": 3,
                "gst_conv_stride": 2,
                "gst_gru_layers": 1,
                "gst_gru_units": 128,
                "init_type": "xavier_uniform",
                "init_enc_alpha": 1.0,
                "init_dec_alpha": 1.0,
                "use_masking": False,
                "use_weighted_masking": False,
            },
            # discriminator related
            discriminator_type: str="hifigan_multi_scale_multi_period_discriminator",
            discriminator_params: Dict[str, Any]={
                "scales": 1,
                "scale_downsample_pooling": "AvgPool1D",
                "scale_downsample_pooling_params": {
                    "kernel_size": 4,
                    "stride": 2,
                    "padding": 2,
                },
                "scale_discriminator_params": {
                    "in_channels": 1,
                    "out_channels": 1,
                    "kernel_sizes": [15, 41, 5, 3],
                    "channels": 128,
                    "max_downsample_channels": 1024,
                    "max_groups": 16,
                    "bias": True,
                    "downsample_scales": [2, 2, 4, 4, 1],
                    "nonlinear_activation": "leakyrelu",
                    "nonlinear_activation_params": {
                        "negative_slope": 0.1
                    },
                    "use_weight_norm": True,
                    "use_spectral_norm": False,
                },
                "follow_official_norm": False,
                "periods": [2, 3, 5, 7, 11],
                "period_discriminator_params": {
                    "in_channels": 1,
                    "out_channels": 1,
                    "kernel_sizes": [5, 3],
                    "channels": 32,
                    "downsample_scales": [3, 3, 3, 3, 1],
                    "max_downsample_channels": 1024,
                    "bias": True,
                    "nonlinear_activation": "leakyrelu",
                    "nonlinear_activation_params": {
                        "negative_slope": 0.1
                    },
                    "use_weight_norm": True,
                    "use_spectral_norm": False,
                },
            },
            cache_generator_outputs: bool=True, ):
        """Initialize JETS module.
        Args:
            idim (int):
                Input vocabrary size.
            odim (int):
                Acoustic feature dimension. The actual output channels will
                be 1 since JETS is the end-to-end text-to-wave model but for the
                compatibility odim is used to indicate the acoustic feature dimension.
            sampling_rate (int):
                Sampling rate, not used for the training but it will
                be referred in saving waveform during the inference.
            generator_type (str):
                Generator type.
            generator_params (Dict[str, Any]):
                Parameter dict for generator.
            discriminator_type (str):
                Discriminator type.
            discriminator_params (Dict[str, Any]):
                Parameter dict for discriminator.
            cache_generator_outputs (bool):
                Whether to cache generator outputs.
        """
        super().__init__()

        # define modules
        generator_class = AVAILABLE_GENERATERS[generator_type]
        if generator_type == "jets_generator":
            # NOTE: Update parameters for the compatibility.
            #   The idim and odim is automatically decided from input data,
            #   where idim represents #vocabularies and odim represents
            #   the input acoustic feature dimension.
            generator_params.update(idim=idim, odim=odim)
        self.generator = generator_class(
            **generator_params, )
        discriminator_class = AVAILABLE_DISCRIMINATORS[discriminator_type]
        self.discriminator = discriminator_class(
            **discriminator_params, )

        # cache
        self.cache_generator_outputs = cache_generator_outputs
        self._cache = None

        # store sampling rate for saving wav file
        # (not used for the training)
        self.fs = sampling_rate

        # store parameters for test compatibility
        self.spks = self.generator.spks
        self.langs = self.generator.langs
        self.spk_embed_dim = self.generator.spk_embed_dim

        self.reuse_cache_gen = True
        self.reuse_cache_dis = True

        self.reset_parameters()
        self.generator._reset_parameters(
            init_type=generator_params["init_type"],
            init_enc_alpha=generator_params["init_enc_alpha"],
            init_dec_alpha=generator_params["init_dec_alpha"], )

    def forward(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            feats: paddle.Tensor,
            feats_lengths: paddle.Tensor,
            durations: paddle.Tensor,
            durations_lengths: paddle.Tensor,
            pitch: paddle.Tensor,
            energy: paddle.Tensor,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None,
            forward_generator: bool=True,
            use_alignment_module: bool=False,
            **kwargs, ) -> Dict[str, Any]:
        """Perform generator forward.
        Args:
            text (Tensor):
                Text index tensor (B, T_text).
            text_lengths (Tensor):
                Text length tensor (B,).
            feats (Tensor):
                Feature tensor (B, T_feats, aux_channels).
            feats_lengths (Tensor):
                Feature length tensor (B,).
            durations(Tensor(int64)): 
                Batch of padded durations (B, Tmax).
            durations_lengths (Tensor):
                durations length tensor (B,).
            pitch(Tensor): 
                Batch of padded token-averaged pitch (B, Tmax, 1).
            energy(Tensor): 
                Batch of padded token-averaged energy (B, Tmax, 1).
            sids (Optional[Tensor]):
                Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]):
                Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]):
                Language index tensor (B,) or (B, 1).
            forward_generator (bool):
                Whether to forward generator.
            use_alignment_module (bool):
                Whether to use alignment module.
        Returns:

        """
        if forward_generator:
            return self._forward_generator(
                text=text,
                text_lengths=text_lengths,
                feats=feats,
                feats_lengths=feats_lengths,
                durations=durations,
                durations_lengths=durations_lengths,
                pitch=pitch,
                energy=energy,
                sids=sids,
                spembs=spembs,
                lids=lids,
                use_alignment_module=use_alignment_module, )
        else:
            return self._forward_discrminator(
                text=text,
                text_lengths=text_lengths,
                feats=feats,
                feats_lengths=feats_lengths,
                durations=durations,
                durations_lengths=durations_lengths,
                pitch=pitch,
                energy=energy,
                sids=sids,
                spembs=spembs,
                lids=lids,
                use_alignment_module=use_alignment_module, )

    def _forward_generator(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            feats: paddle.Tensor,
            feats_lengths: paddle.Tensor,
            durations: paddle.Tensor,
            durations_lengths: paddle.Tensor,
            pitch: paddle.Tensor,
            energy: paddle.Tensor,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None,
            use_alignment_module: bool=False,
            **kwargs, ) -> Dict[str, Any]:
        """Perform generator forward.
        Args:
            text (Tensor):
                Text index tensor (B, T_text).
            text_lengths (Tensor):
                Text length tensor (B,).
            feats (Tensor):
                Feature tensor (B, T_feats, aux_channels).
            feats_lengths (Tensor):
                Feature length tensor (B,).
            durations(Tensor(int64)): 
                Batch of padded durations (B, Tmax).
            durations_lengths (Tensor):
                durations length tensor (B,).
            pitch(Tensor): 
                Batch of padded token-averaged pitch (B, Tmax, 1).
            energy(Tensor): 
                Batch of padded token-averaged energy (B, Tmax, 1).
            sids (Optional[Tensor]):
                Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]):
                Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]):
                Language index tensor (B,) or (B, 1).
            use_alignment_module (bool):
                Whether to use alignment module.
        Returns:

        """
        # setup
        # calculate generator outputs
        self.reuse_cache_gen = True
        if not self.cache_generator_outputs or self._cache is None:
            self.reuse_cache_gen = False
            outs = self.generator(
                text=text,
                text_lengths=text_lengths,
                feats=feats,
                feats_lengths=feats_lengths,
                durations=durations,
                durations_lengths=durations_lengths,
                pitch=pitch,
                energy=energy,
                sids=sids,
                spembs=spembs,
                lids=lids,
                use_alignment_module=use_alignment_module, )
        else:
            outs = self._cache

        # store cache
        if self.training and self.cache_generator_outputs and not self.reuse_cache_gen:
            self._cache = outs

        return outs

    def _forward_discrminator(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            feats: paddle.Tensor,
            feats_lengths: paddle.Tensor,
            durations: paddle.Tensor,
            durations_lengths: paddle.Tensor,
            pitch: paddle.Tensor,
            energy: paddle.Tensor,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None,
            use_alignment_module: bool=False,
            **kwargs, ) -> Dict[str, Any]:
        """Perform discriminator forward.
        Args:
            text (Tensor):
                Text index tensor (B, T_text).
            text_lengths (Tensor):
                Text length tensor (B,).
            feats (Tensor):
                Feature tensor (B, T_feats, aux_channels).
            feats_lengths (Tensor):
                Feature length tensor (B,).
            durations(Tensor(int64)): 
                Batch of padded durations (B, Tmax).
            durations_lengths (Tensor):
                durations length tensor (B,).
            pitch(Tensor): 
                Batch of padded token-averaged pitch (B, Tmax, 1).
            energy(Tensor): 
                Batch of padded token-averaged energy (B, Tmax, 1).
            sids (Optional[Tensor]):
                Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]):
                Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]):
                Language index tensor (B,) or (B, 1).
            use_alignment_module (bool):
                Whether to use alignment module.
        Returns:

        """
        # setup
        # calculate generator outputs
        self.reuse_cache_dis = True
        if not self.cache_generator_outputs or self._cache is None:
            self.reuse_cache_dis = False
            outs = self.generator(
                text=text,
                text_lengths=text_lengths,
                feats=feats,
                feats_lengths=feats_lengths,
                durations=durations,
                durations_lengths=durations_lengths,
                pitch=pitch,
                energy=energy,
                sids=sids,
                spembs=spembs,
                lids=lids,
                use_alignment_module=use_alignment_module,
                **kwargs, )
        else:
            outs = self._cache

        # store cache
        if self.cache_generator_outputs and not self.reuse_cache_dis:
            self._cache = outs

        return outs

    def inference(self,
                  text: paddle.Tensor,
                  feats: Optional[paddle.Tensor]=None,
                  pitch: Optional[paddle.Tensor]=None,
                  energy: Optional[paddle.Tensor]=None,
                  use_alignment_module: bool=False,
                  **kwargs) -> Dict[str, paddle.Tensor]:
        """Run inference.
        Args:
            text (Tensor):
                Input text index tensor (T_text,).
            feats (Tensor):
                Feature tensor (T_feats, aux_channels).
            pitch (Tensor):
                Pitch tensor (T_feats, 1).
            energy (Tensor): 
                Energy tensor (T_feats, 1).
            use_alignment_module (bool):
                Whether to use alignment module.
        Returns:
            Dict[str, Tensor]:
                * wav (Tensor):
                    Generated waveform tensor (T_wav,).
                * duration (Tensor):
                    Predicted duration tensor (T_text,).
        """
        # setup
        text = text[None]
        text_lengths = paddle.to_tensor(paddle.shape(text)[1])

        # inference
        if use_alignment_module:
            assert feats is not None
            feats = feats[None]
            feats_lengths = paddle.to_tensor(paddle.shape(feats)[1])
            pitch = pitch[None]
            energy = energy[None]
            wav, dur = self.generator.inference(
                text=text,
                text_lengths=text_lengths,
                feats=feats,
                feats_lengths=feats_lengths,
                pitch=pitch,
                energy=energy,
                use_alignment_module=use_alignment_module,
                **kwargs)
        else:
            wav, dur = self.generator.inference(
                text=text,
                text_lengths=text_lengths,
                **kwargs, )
        return dict(wav=paddle.reshape(wav, [-1]), duration=dur[0])

    def reset_parameters(self):
        def _reset_parameters(module):
            if isinstance(
                    module,
                (nn.Conv1D, nn.Conv1DTranspose, nn.Conv2D, nn.Conv2DTranspose)):
                kaiming_uniform_(module.weight, a=math.sqrt(5))
                if module.bias is not None:
                    fan_in, _ = _calculate_fan_in_and_fan_out(module.weight)
                    if fan_in != 0:
                        bound = 1 / math.sqrt(fan_in)
                        uniform_(module.bias, -bound, bound)

            if isinstance(
                    module,
                (nn.BatchNorm1D, nn.BatchNorm2D, nn.GroupNorm, nn.LayerNorm)):
                ones_(module.weight)
                zeros_(module.bias)

            if isinstance(module, nn.Linear):
                kaiming_uniform_(module.weight, a=math.sqrt(5))
                if module.bias is not None:
                    fan_in, _ = _calculate_fan_in_and_fan_out(module.weight)
                    bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
                    uniform_(module.bias, -bound, bound)

            if isinstance(module, nn.Embedding):
                normal_(module.weight)
                if module._padding_idx is not None:
                    with paddle.no_grad():
                        module.weight[module._padding_idx] = 0

        self.apply(_reset_parameters)


class JETSInference(nn.Layer):
    def __init__(self, model):
        super().__init__()
        self.acoustic_model = model

    def forward(self, text, sids=None):
        out = self.acoustic_model.inference(text)
        wav = out['wav']
        return wav


================================================
FILE: paddlespeech/t2s/models/jets/jets_updater.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generator module in JETS.

This code is based on https://github.com/imdanboy/jets.

"""
import logging
from typing import Dict

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from paddle.optimizer.lr import LRScheduler

from paddlespeech.t2s.modules.nets_utils import get_segments
from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
from paddlespeech.t2s.training.updaters.standard_updater import UpdaterState

logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class JETSUpdater(StandardUpdater):
    def __init__(self,
                 model: Layer,
                 optimizers: Dict[str, Optimizer],
                 criterions: Dict[str, Layer],
                 schedulers: Dict[str, LRScheduler],
                 dataloader: DataLoader,
                 generator_train_start_steps: int=0,
                 discriminator_train_start_steps: int=100000,
                 lambda_adv: float=1.0,
                 lambda_mel: float=45.0,
                 lambda_feat_match: float=2.0,
                 lambda_var: float=1.0,
                 lambda_align: float=2.0,
                 generator_first: bool=False,
                 use_alignment_module: bool=False,
                 output_dir=None):
        # it is designed to hold multiple models
        # 因为输入的是单模型，但是没有用到父类的 init(), 所以需要重新写这部分
        models = {"main": model}
        self.models: Dict[str, Layer] = models
        # self.model = model

        self.model = model._layers if isinstance(model,
                                                 paddle.DataParallel) else model

        self.optimizers = optimizers
        self.optimizer_g: Optimizer = optimizers['generator']
        self.optimizer_d: Optimizer = optimizers['discriminator']

        self.criterions = criterions
        self.criterion_mel = criterions['mel']
        self.criterion_feat_match = criterions['feat_match']
        self.criterion_gen_adv = criterions["gen_adv"]
        self.criterion_dis_adv = criterions["dis_adv"]
        self.criterion_var = criterions["var"]
        self.criterion_forwardsum = criterions["forwardsum"]

        self.schedulers = schedulers
        self.scheduler_g = schedulers['generator']
        self.scheduler_d = schedulers['discriminator']

        self.dataloader = dataloader

        self.generator_train_start_steps = generator_train_start_steps
        self.discriminator_train_start_steps = discriminator_train_start_steps

        self.lambda_adv = lambda_adv
        self.lambda_mel = lambda_mel
        self.lambda_feat_match = lambda_feat_match
        self.lambda_var = lambda_var
        self.lambda_align = lambda_align

        self.use_alignment_module = use_alignment_module

        if generator_first:
            self.turns = ["generator", "discriminator"]
        else:
            self.turns = ["discriminator", "generator"]

        self.state = UpdaterState(iteration=0, epoch=0)
        self.train_iterator = iter(self.dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}

        for turn in self.turns:
            speech = batch["speech"]
            speech = speech.unsqueeze(1)
            text_lengths = batch["text_lengths"]
            feats_lengths = batch["feats_lengths"]
            outs = self.model(
                text=batch["text"],
                text_lengths=batch["text_lengths"],
                feats=batch["feats"],
                feats_lengths=batch["feats_lengths"],
                durations=batch["durations"],
                durations_lengths=batch["durations_lengths"],
                pitch=batch["pitch"],
                energy=batch["energy"],
                sids=batch.get("spk_id", None),
                spembs=batch.get("spk_emb", None),
                forward_generator=turn == "generator",
                use_alignment_module=self.use_alignment_module)
            # Generator
            if turn == "generator":
                # parse outputs
                speech_hat_, bin_loss, log_p_attn, start_idxs, d_outs, ds, p_outs, ps, e_outs, es = outs
                speech_ = get_segments(
                    x=speech,
                    start_idxs=start_idxs *
                    self.model.generator.upsample_factor,
                    segment_size=self.model.generator.segment_size *
                    self.model.generator.upsample_factor, )

                # calculate discriminator outputs
                p_hat = self.model.discriminator(speech_hat_)
                with paddle.no_grad():
                    # do not store discriminator gradient in generator turn
                    p = self.model.discriminator(speech_)

                # calculate losses
                mel_loss = self.criterion_mel(speech_hat_, speech_)

                adv_loss = self.criterion_gen_adv(p_hat)
                feat_match_loss = self.criterion_feat_match(p_hat, p)
                dur_loss, pitch_loss, energy_loss = self.criterion_var(
                    d_outs, ds, p_outs, ps, e_outs, es, text_lengths)

                mel_loss = mel_loss * self.lambda_mel
                adv_loss = adv_loss * self.lambda_adv
                feat_match_loss = feat_match_loss * self.lambda_feat_match
                g_loss = mel_loss + adv_loss + feat_match_loss
                var_loss = (
                    dur_loss + pitch_loss + energy_loss) * self.lambda_var

                gen_loss = g_loss + var_loss  #+ align_loss

                report("train/generator_loss", float(gen_loss))
                report("train/generator_generator_loss", float(g_loss))
                report("train/generator_variance_loss", float(var_loss))
                report("train/generator_generator_mel_loss", float(mel_loss))
                report("train/generator_generator_adv_loss", float(adv_loss))
                report("train/generator_generator_feat_match_loss",
                       float(feat_match_loss))
                report("train/generator_variance_dur_loss", float(dur_loss))
                report("train/generator_variance_pitch_loss", float(pitch_loss))
                report("train/generator_variance_energy_loss",
                       float(energy_loss))

                losses_dict["generator_loss"] = float(gen_loss)
                losses_dict["generator_generator_loss"] = float(g_loss)
                losses_dict["generator_variance_loss"] = float(var_loss)
                losses_dict["generator_generator_mel_loss"] = float(mel_loss)
                losses_dict["generator_generator_adv_loss"] = float(adv_loss)
                losses_dict["generator_generator_feat_match_loss"] = float(
                    feat_match_loss)
                losses_dict["generator_variance_dur_loss"] = float(dur_loss)
                losses_dict["generator_variance_pitch_loss"] = float(pitch_loss)
                losses_dict["generator_variance_energy_loss"] = float(
                    energy_loss)

                if self.use_alignment_module == True:
                    forwardsum_loss = self.criterion_forwardsum(
                        log_p_attn, text_lengths, feats_lengths)
                    align_loss = (
                        forwardsum_loss + bin_loss) * self.lambda_align
                    report("train/generator_alignment_loss", float(align_loss))
                    report("train/generator_alignment_forwardsum_loss",
                           float(forwardsum_loss))
                    report("train/generator_alignment_bin_loss",
                           float(bin_loss))
                    losses_dict["generator_alignment_loss"] = float(align_loss)
                    losses_dict["generator_alignment_forwardsum_loss"] = float(
                        forwardsum_loss)
                    losses_dict["generator_alignment_bin_loss"] = float(
                        bin_loss)

                self.optimizer_g.clear_grad()
                gen_loss.backward()

                self.optimizer_g.step()
                self.scheduler_g.step()

                # reset cache
                if self.model.reuse_cache_gen or not self.model.training:
                    self.model._cache = None

            # Disctiminator
            elif turn == "discriminator":
                # parse outputs
                speech_hat_, _, _, start_idxs, *_ = outs
                speech_ = get_segments(
                    x=speech,
                    start_idxs=start_idxs *
                    self.model.generator.upsample_factor,
                    segment_size=self.model.generator.segment_size *
                    self.model.generator.upsample_factor, )

                # calculate discriminator outputs
                p_hat = self.model.discriminator(speech_hat_.detach())
                p = self.model.discriminator(speech_)

                # calculate losses
                real_loss, fake_loss = self.criterion_dis_adv(p_hat, p)
                dis_loss = real_loss + fake_loss

                report("train/real_loss", float(real_loss))
                report("train/fake_loss", float(fake_loss))
                report("train/discriminator_loss", float(dis_loss))
                losses_dict["real_loss"] = float(real_loss)
                losses_dict["fake_loss"] = float(fake_loss)
                losses_dict["discriminator_loss"] = float(dis_loss)

                self.optimizer_d.clear_grad()
                dis_loss.backward()

                self.optimizer_d.step()
                self.scheduler_d.step()

                # reset cache
                if self.model.reuse_cache_dis or not self.model.training:
                    self.model._cache = None

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class JETSEvaluator(StandardEvaluator):
    def __init__(self,
                 model,
                 criterions: Dict[str, Layer],
                 dataloader: DataLoader,
                 lambda_adv: float=1.0,
                 lambda_mel: float=45.0,
                 lambda_feat_match: float=2.0,
                 lambda_var: float=1.0,
                 lambda_align: float=2.0,
                 generator_first: bool=False,
                 use_alignment_module: bool=False,
                 output_dir=None):
        # 因为输入的是单模型，但是没有用到父类的 init(), 所以需要重新写这部分
        models = {"main": model}
        self.models: Dict[str, Layer] = models
        # self.model = model
        self.model = model._layers if isinstance(model,
                                                 paddle.DataParallel) else model

        self.criterions = criterions
        self.criterion_mel = criterions['mel']
        self.criterion_feat_match = criterions['feat_match']
        self.criterion_gen_adv = criterions["gen_adv"]
        self.criterion_dis_adv = criterions["dis_adv"]
        self.criterion_var = criterions["var"]
        self.criterion_forwardsum = criterions["forwardsum"]

        self.dataloader = dataloader

        self.lambda_adv = lambda_adv
        self.lambda_mel = lambda_mel
        self.lambda_feat_match = lambda_feat_match
        self.lambda_var = lambda_var
        self.lambda_align = lambda_align
        self.use_alignment_module = use_alignment_module

        if generator_first:
            self.turns = ["generator", "discriminator"]
        else:
            self.turns = ["discriminator", "generator"]

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        # logging.debug("Evaluate: ")
        self.msg = "Evaluate: "
        losses_dict = {}

        for turn in self.turns:
            speech = batch["speech"]
            speech = speech.unsqueeze(1)
            text_lengths = batch["text_lengths"]
            feats_lengths = batch["feats_lengths"]
            outs = self.model(
                text=batch["text"],
                text_lengths=batch["text_lengths"],
                feats=batch["feats"],
                feats_lengths=batch["feats_lengths"],
                durations=batch["durations"],
                durations_lengths=batch["durations_lengths"],
                pitch=batch["pitch"],
                energy=batch["energy"],
                sids=batch.get("spk_id", None),
                spembs=batch.get("spk_emb", None),
                forward_generator=turn == "generator",
                use_alignment_module=self.use_alignment_module)
            # Generator
            if turn == "generator":
                # parse outputs
                speech_hat_, bin_loss, log_p_attn, start_idxs, d_outs, ds, p_outs, ps, e_outs, es = outs
                speech_ = get_segments(
                    x=speech,
                    start_idxs=start_idxs *
                    self.model.generator.upsample_factor,
                    segment_size=self.model.generator.segment_size *
                    self.model.generator.upsample_factor, )

                # calculate discriminator outputs
                p_hat = self.model.discriminator(speech_hat_)
                with paddle.no_grad():
                    # do not store discriminator gradient in generator turn
                    p = self.model.discriminator(speech_)

                # calculate losses
                mel_loss = self.criterion_mel(speech_hat_, speech_)

                adv_loss = self.criterion_gen_adv(p_hat)
                feat_match_loss = self.criterion_feat_match(p_hat, p)
                dur_loss, pitch_loss, energy_loss = self.criterion_var(
                    d_outs, ds, p_outs, ps, e_outs, es, text_lengths)

                mel_loss = mel_loss * self.lambda_mel
                adv_loss = adv_loss * self.lambda_adv
                feat_match_loss = feat_match_loss * self.lambda_feat_match
                g_loss = mel_loss + adv_loss + feat_match_loss
                var_loss = (
                    dur_loss + pitch_loss + energy_loss) * self.lambda_var

                gen_loss = g_loss + var_loss  #+ align_loss

                report("eval/generator_loss", float(gen_loss))
                report("eval/generator_generator_loss", float(g_loss))
                report("eval/generator_variance_loss", float(var_loss))
                report("eval/generator_generator_mel_loss", float(mel_loss))
                report("eval/generator_generator_adv_loss", float(adv_loss))
                report("eval/generator_generator_feat_match_loss",
                       float(feat_match_loss))
                report("eval/generator_variance_dur_loss", float(dur_loss))
                report("eval/generator_variance_pitch_loss", float(pitch_loss))
                report("eval/generator_variance_energy_loss",
                       float(energy_loss))

                losses_dict["generator_loss"] = float(gen_loss)
                losses_dict["generator_generator_loss"] = float(g_loss)
                losses_dict["generator_variance_loss"] = float(var_loss)
                losses_dict["generator_generator_mel_loss"] = float(mel_loss)
                losses_dict["generator_generator_adv_loss"] = float(adv_loss)
                losses_dict["generator_generator_feat_match_loss"] = float(
                    feat_match_loss)
                losses_dict["generator_variance_dur_loss"] = float(dur_loss)
                losses_dict["generator_variance_pitch_loss"] = float(pitch_loss)
                losses_dict["generator_variance_energy_loss"] = float(
                    energy_loss)

                if self.use_alignment_module == True:
                    forwardsum_loss = self.criterion_forwardsum(
                        log_p_attn, text_lengths, feats_lengths)
                    align_loss = (
                        forwardsum_loss + bin_loss) * self.lambda_align
                    report("eval/generator_alignment_loss", float(align_loss))
                    report("eval/generator_alignment_forwardsum_loss",
                           float(forwardsum_loss))
                    report("eval/generator_alignment_bin_loss", float(bin_loss))
                    losses_dict["generator_alignment_loss"] = float(align_loss)
                    losses_dict["generator_alignment_forwardsum_loss"] = float(
                        forwardsum_loss)
                    losses_dict["generator_alignment_bin_loss"] = float(
                        bin_loss)

                # reset cache
                if self.model.reuse_cache_gen or not self.model.training:
                    self.model._cache = None

            # Disctiminator
            elif turn == "discriminator":
                # parse outputs
                speech_hat_, _, _, start_idxs, *_ = outs
                speech_ = get_segments(
                    x=speech,
                    start_idxs=start_idxs *
                    self.model.generator.upsample_factor,
                    segment_size=self.model.generator.segment_size *
                    self.model.generator.upsample_factor, )

                # calculate discriminator outputs
                p_hat = self.model.discriminator(speech_hat_.detach())
                p = self.model.discriminator(speech_)

                # calculate losses
                real_loss, fake_loss = self.criterion_dis_adv(p_hat, p)
                dis_loss = real_loss + fake_loss

                report("eval/real_loss", float(real_loss))
                report("eval/fake_loss", float(fake_loss))
                report("eval/discriminator_loss", float(dis_loss))
                losses_dict["real_loss"] = float(real_loss)
                losses_dict["fake_loss"] = float(fake_loss)
                losses_dict["discriminator_loss"] = float(dis_loss)

                # reset cache
                if self.model.reuse_cache_dis or not self.model.training:
                    self.model._cache = None

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/jets/length_regulator.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generator module in JETS.

This code is based on https://github.com/imdanboy/jets.

"""
import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.t2s.modules.masked_fill import masked_fill


class GaussianUpsampling(nn.Layer):
    """
    Gaussian upsampling with fixed temperature as in:
    https://arxiv.org/abs/2010.04301
    """

    def __init__(self, delta=0.1):
        super().__init__()
        self.delta = delta

    def forward(self, hs, ds, h_masks=None, d_masks=None):
        """
        Args:
            hs (Tensor): Batched hidden state to be expanded (B, T_text, adim)
            ds (Tensor): Batched token duration (B, T_text)
            h_masks (Tensor): Mask tensor (B,T_feats)
            d_masks (Tensor): Mask tensor (B,T_text)
        Returns:
            Tensor: Expanded hidden state (B, T_feat, adim)
        """
        B = ds.shape[0]

        if h_masks is None:
            T_feats = paddle.to_tensor(ds.sum(), dtype="int32")
        else:
            T_feats = h_masks.shape[-1]
        t = paddle.to_tensor(
            paddle.arange(0, T_feats).unsqueeze(0).tile([B, 1]),
            dtype="float32")
        if h_masks is not None:
            t = t * paddle.to_tensor(h_masks, dtype="float32")

        ds_cumsum = ds.cumsum(axis=-1)
        ds_half = ds / 2
        c = ds_cumsum.astype(ds_half.dtype) - ds_half
        energy = -1 * self.delta * (t.unsqueeze(-1) - c.unsqueeze(1))**2
        if d_masks is not None:
            d_masks = ~(d_masks.unsqueeze(1))
            d_masks.stop_gradient = True
            d_masks = d_masks.tile([1, T_feats, 1])
            energy = masked_fill(energy, d_masks, -float("inf"))
        p_attn = F.softmax(energy, axis=2)  # (B, T_feats, T_text)
        hs = paddle.matmul(p_attn, hs)
        return hs


================================================
FILE: paddlespeech/t2s/models/melgan/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .melgan import *
from .multi_band_melgan_updater import *
from .style_melgan import *
from .style_melgan_updater import *


================================================
FILE: paddlespeech/t2s/models/melgan/melgan.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""MelGAN Modules."""
from typing import Any
from typing import Dict
from typing import List

import numpy as np
import paddle
from paddle import nn

from paddlespeech.t2s.modules.activation import get_activation
from paddlespeech.t2s.modules.causal_conv import CausalConv1D
from paddlespeech.t2s.modules.causal_conv import CausalConv1DTranspose
from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.pqmf import PQMF
from paddlespeech.t2s.modules.residual_stack import ResidualStack


class MelGANGenerator(nn.Layer):
    """MelGAN generator module."""

    def __init__(
            self,
            in_channels: int=80,
            out_channels: int=1,
            kernel_size: int=7,
            channels: int=512,
            bias: bool=True,
            upsample_scales: List[int]=[8, 8, 2, 2],
            stack_kernel_size: int=3,
            stacks: int=3,
            nonlinear_activation: str="leakyrelu",
            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.2},
            pad: str="Pad1D",
            pad_params: Dict[str, Any]={"mode": "reflect"},
            use_final_nonlinear_activation: bool=True,
            use_weight_norm: bool=True,
            use_causal_conv: bool=False,
            init_type: str="xavier_uniform", ):
        """Initialize MelGANGenerator module.

        Args:
            in_channels (int): 
                Number of input channels.
            out_channels (int): 
                Number of output channels,
                the number of sub-band is out_channels in multi-band melgan.
            kernel_size (int): 
                Kernel size of initial and final conv layer.
            channels (int): 
                Initial number of channels for conv layer.
            bias (bool): 
                Whether to add bias parameter in convolution layers.
            upsample_scales (List[int]): 
                List of upsampling scales.
            stack_kernel_size (int): 
                Kernel size of dilated conv layers in residual stack.
            stacks (int): 
                Number of stacks in a single residual stack.
            nonlinear_activation (Optional[str], optional): 
                Non linear activation in upsample network, by default None
            nonlinear_activation_params (Dict[str, Any], optional): 
                Parameters passed to the linear activation in the upsample network, by default {}
            pad (str): 
                Padding function module name before dilated convolution layer.
            pad_params (dict): 
                Hyperparameters for padding function.
            use_final_nonlinear_activation (nn.Layer): 
                Activation function for the final layer.
            use_weight_norm (bool): 
                Whether to use weight norm.
                If set to true, it will be applied to all of the conv layers.
            use_causal_conv (bool):
                Whether to use causal convolution.
        """
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        # for compatibility
        if nonlinear_activation:
            nonlinear_activation = nonlinear_activation.lower()

        # check hyper parameters is valid
        assert channels >= np.prod(upsample_scales)
        assert channels % (2**len(upsample_scales)) == 0
        if not use_causal_conv:
            assert (kernel_size - 1
                    ) % 2 == 0, "Not support even number kernel size."

        layers = []
        if not use_causal_conv:
            layers += [
                getattr(paddle.nn, pad)((kernel_size - 1) // 2, **pad_params),
                nn.Conv1D(in_channels, channels, kernel_size, bias_attr=bias),
            ]
        else:
            layers += [
                CausalConv1D(
                    in_channels,
                    channels,
                    kernel_size,
                    bias=bias,
                    pad=pad,
                    pad_params=pad_params, ),
            ]

        for i, upsample_scale in enumerate(upsample_scales):
            # add upsampling layer
            layers += [
                get_activation(nonlinear_activation,
                               **nonlinear_activation_params)
            ]
            if not use_causal_conv:
                layers += [
                    nn.Conv1DTranspose(
                        channels // (2**i),
                        channels // (2**(i + 1)),
                        upsample_scale * 2,
                        stride=upsample_scale,
                        padding=upsample_scale // 2 + upsample_scale % 2,
                        output_padding=upsample_scale % 2,
                        bias_attr=bias, )
                ]
            else:
                layers += [
                    CausalConv1DTranspose(
                        channels // (2**i),
                        channels // (2**(i + 1)),
                        upsample_scale * 2,
                        stride=upsample_scale,
                        bias=bias, )
                ]

            # add residual stack
            for j in range(stacks):
                layers += [
                    ResidualStack(
                        kernel_size=stack_kernel_size,
                        channels=channels // (2**(i + 1)),
                        dilation=stack_kernel_size**j,
                        bias=bias,
                        nonlinear_activation=nonlinear_activation,
                        nonlinear_activation_params=nonlinear_activation_params,
                        pad=pad,
                        pad_params=pad_params,
                        use_causal_conv=use_causal_conv, )
                ]

        # add final layer
        layers += [
            get_activation(nonlinear_activation, **nonlinear_activation_params)
        ]
        if not use_causal_conv:
            layers += [
                getattr(nn, pad)((kernel_size - 1) // 2, **pad_params),
                nn.Conv1D(
                    channels // (2**(i + 1)),
                    out_channels,
                    kernel_size,
                    bias_attr=bias),
            ]
        else:
            layers += [
                CausalConv1D(
                    channels // (2**(i + 1)),
                    out_channels,
                    kernel_size,
                    bias=bias,
                    pad=pad,
                    pad_params=pad_params, ),
            ]
        if use_final_nonlinear_activation:
            layers += [nn.Tanh()]

        # define the model as a single function        
        self.melgan = nn.Sequential(*layers)
        nn.initializer.set_global_initializer(None)

        # apply weight norm
        if use_weight_norm:
            self.apply_weight_norm()

        # reset parameters
        self.reset_parameters()

        # initialize pqmf for multi-band melgan inference
        if out_channels > 1:
            self.pqmf = PQMF(subbands=out_channels)
        else:
            self.pqmf = None

    def forward(self, c):
        """Calculate forward propagation.

        Args:
            c (Tensor): 
                Input tensor (B, in_channels, T).
        Returns:
            Tensor: Output tensor (B, out_channels, T ** prod(upsample_scales)).
        """
        out = self.melgan(c)
        return out

    def apply_weight_norm(self):
        """Recursively apply weight normalization to all the Convolution layers
        in the sublayers.
        """

        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv2D, nn.Conv1DTranspose)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def remove_weight_norm(self):
        """Recursively remove weight normalization from all the Convolution 
        layers in the sublayers.
        """

        def _remove_weight_norm(layer):
            try:
                nn.utils.remove_weight_norm(layer)
            except ValueError:
                pass

        self.apply(_remove_weight_norm)

    def reset_parameters(self):
        """Reset parameters.
        This initialization follows official implementation manner.
        https://github.com/descriptinc/melgan-neurips/blob/master/mel2wav/modules.py
        """
        # 定义参数为float的正态分布。
        dist = paddle.distribution.Normal(loc=0.0, scale=0.02)

        def _reset_parameters(m):
            if isinstance(m, nn.Conv1D) or isinstance(m, nn.Conv1DTranspose):
                w = dist.sample(m.weight.shape)
                m.weight.set_value(w)

        self.apply(_reset_parameters)

    def inference(self, c):
        """Perform inference.

        Args:
            c (Union[Tensor, ndarray]): 
                Input tensor (T, in_channels).
        Returns:
            Tensor: Output tensor (out_channels*T ** prod(upsample_scales), 1).
        """
        # pseudo batch
        c = c.transpose([1, 0]).unsqueeze(0)
        # (B, out_channels, T ** prod(upsample_scales)
        out = self.melgan(c)
        if self.pqmf is not None:
            # (B, 1, out_channels * T ** prod(upsample_scales)
            out = self.pqmf(out)
        out = out.squeeze(0).transpose([1, 0])
        return out


class MelGANDiscriminator(nn.Layer):
    """MelGAN discriminator module."""

    def __init__(
            self,
            in_channels: int=1,
            out_channels: int=1,
            kernel_sizes: List[int]=[5, 3],
            channels: int=16,
            max_downsample_channels: int=1024,
            bias: bool=True,
            downsample_scales: List[int]=[4, 4, 4, 4],
            nonlinear_activation: str="leakyrelu",
            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.2},
            pad: str="Pad1D",
            pad_params: Dict[str, Any]={"mode": "reflect"},
            init_type: str="xavier_uniform", ):
        """Initilize MelGAN discriminator module.

        Args:
            in_channels (int): 
                Number of input channels.
            out_channels (int): 
                Number of output channels.
            kernel_sizes (List[int]): List of two kernel sizes. The prod will be used for the first conv layer,
                and the first and the second kernel sizes will be used for the last two layers.
                For example if kernel_sizes = [5, 3], the first layer kernel size will be 5 * 3 = 15,
                the last two layers' kernel size will be 5 and 3, respectively.
            channels (int): 
                Initial number of channels for conv layer.
            max_downsample_channels (int): 
                Maximum number of channels for downsampling layers.
            bias (bool): 
                Whether to add bias parameter in convolution layers.
            downsample_scales (List[int]): 
                List of downsampling scales.
            nonlinear_activation (str): 
                Activation function module name.
            nonlinear_activation_params (dict): 
                Hyperparameters for activation function.
            pad (str): 
                Padding function module name before dilated convolution layer.
            pad_params (dict): 
                Hyperparameters for padding function.
        """
        super().__init__()

        # for compatibility
        if nonlinear_activation:
            nonlinear_activation = nonlinear_activation.lower()

        # initialize parameters
        initialize(self, init_type)

        self.layers = nn.LayerList()

        # check kernel size is valid
        assert len(kernel_sizes) == 2
        assert kernel_sizes[0] % 2 == 1
        assert kernel_sizes[1] % 2 == 1

        # add first layer
        self.layers.append(
            nn.Sequential(
                getattr(nn, pad)((np.prod(kernel_sizes) - 1) // 2, **
                                 pad_params),
                nn.Conv1D(
                    in_channels,
                    channels,
                    int(np.prod(kernel_sizes)),
                    bias_attr=bias),
                get_activation(nonlinear_activation, **
                               nonlinear_activation_params), ))

        # add downsample layers
        in_chs = channels
        for downsample_scale in downsample_scales:
            out_chs = min(in_chs * downsample_scale, max_downsample_channels)
            self.layers.append(
                nn.Sequential(
                    nn.Conv1D(
                        in_chs,
                        out_chs,
                        kernel_size=downsample_scale * 10 + 1,
                        stride=downsample_scale,
                        padding=downsample_scale * 5,
                        groups=in_chs // 4,
                        bias_attr=bias, ),
                    get_activation(nonlinear_activation, **
                                   nonlinear_activation_params), ))
            in_chs = out_chs

        # add final layers
        out_chs = min(in_chs * 2, max_downsample_channels)
        self.layers.append(
            nn.Sequential(
                nn.Conv1D(
                    in_chs,
                    out_chs,
                    kernel_sizes[0],
                    padding=(kernel_sizes[0] - 1) // 2,
                    bias_attr=bias, ),
                get_activation(nonlinear_activation, **
                               nonlinear_activation_params), ))
        self.layers.append(
            nn.Conv1D(
                out_chs,
                out_channels,
                kernel_sizes[1],
                padding=(kernel_sizes[1] - 1) // 2,
                bias_attr=bias, ), )

    def forward(self, x):
        """Calculate forward propagation.
        Args:
            x (Tensor): 
                Input noise signal (B, 1, T).
        Returns:
            List: List of output tensors of each layer (for feat_match_loss).
        """
        outs = []
        for f in self.layers:
            x = f(x)
            outs += [x]

        return outs


class MelGANMultiScaleDiscriminator(nn.Layer):
    """MelGAN multi-scale discriminator module."""

    def __init__(
            self,
            in_channels: int=1,
            out_channels: int=1,
            scales: int=3,
            downsample_pooling: str="AvgPool1D",
            # follow the official implementation setting
            downsample_pooling_params: Dict[str, Any]={
                "kernel_size": 4,
                "stride": 2,
                "padding": 1,
                "exclusive": True,
            },
            kernel_sizes: List[int]=[5, 3],
            channels: int=16,
            max_downsample_channels: int=1024,
            bias: bool=True,
            downsample_scales: List[int]=[4, 4, 4, 4],
            nonlinear_activation: str="leakyrelu",
            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.2},
            pad: str="Pad1D",
            pad_params: Dict[str, Any]={"mode": "reflect"},
            use_weight_norm: bool=True,
            init_type: str="xavier_uniform", ):
        """Initilize MelGAN multi-scale discriminator module.

        Args:
            in_channels (int): 
                Number of input channels.
            out_channels (int): 
                Number of output channels.
            scales (int): 
                Number of multi-scales.
            downsample_pooling (str): 
                Pooling module name for downsampling of the inputs.
            downsample_pooling_params (dict): 
                Parameters for the above pooling module.
            kernel_sizes (List[int]): 
                List of two kernel sizes. The sum will be used for the first conv layer,
                and the first and the second kernel sizes will be used for the last two layers.
            channels (int): 
                Initial number of channels for conv layer.
            max_downsample_channels (int): 
                Maximum number of channels for downsampling layers.
            bias (bool): 
                Whether to add bias parameter in convolution layers.
            downsample_scales (List[int]): 
                List of downsampling scales.
            nonlinear_activation (str): 
                Activation function module name.
            nonlinear_activation_params (dict): 
                Hyperparameters for activation function.
            pad (str): 
                Padding function module name before dilated convolution layer.
            pad_params (dict): 
                Hyperparameters for padding function.
            use_causal_conv (bool): 
                Whether to use causal convolution.
        """
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        # for 
        if nonlinear_activation:
            nonlinear_activation = nonlinear_activation.lower()

        self.discriminators = nn.LayerList()

        # add discriminators
        for _ in range(scales):
            self.discriminators.append(
                MelGANDiscriminator(
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_sizes=kernel_sizes,
                    channels=channels,
                    max_downsample_channels=max_downsample_channels,
                    bias=bias,
                    downsample_scales=downsample_scales,
                    nonlinear_activation=nonlinear_activation,
                    nonlinear_activation_params=nonlinear_activation_params,
                    pad=pad,
                    pad_params=pad_params, ))
        self.pooling = getattr(nn, downsample_pooling)(
            **downsample_pooling_params)

        nn.initializer.set_global_initializer(None)

        # apply weight norm
        if use_weight_norm:
            self.apply_weight_norm()

        # reset parameters
        self.reset_parameters()

    def forward(self, x):
        """Calculate forward propagation.
        Args:
            x (Tensor):
                Input noise signal (B, 1, T).
        Returns:
            List: List of list of each discriminator outputs, which consists of each layer output tensors.
        """
        outs = []
        for f in self.discriminators:
            outs += [f(x)]
            x = self.pooling(x)

        return outs

    def apply_weight_norm(self):
        """Recursively apply weight normalization to all the Convolution layers
        in the sublayers.
        """

        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv2D, nn.Conv1DTranspose)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def remove_weight_norm(self):
        """Recursively remove weight normalization from all the Convolution 
        layers in the sublayers.
        """

        def _remove_weight_norm(layer):
            try:
                nn.utils.remove_weight_norm(layer)
            except ValueError:
                pass

        self.apply(_remove_weight_norm)

    def reset_parameters(self):
        """Reset parameters.
        This initialization follows official implementation manner.
        https://github.com/descriptinc/melgan-neurips/blob/master/mel2wav/modules.py
        """

        # 定义参数为float的正态分布。
        dist = paddle.distribution.Normal(loc=0.0, scale=0.02)

        def _reset_parameters(m):
            if isinstance(m, nn.Conv1D) or isinstance(m, nn.Conv1DTranspose):
                w = dist.sample(m.weight.shape)
                m.weight.set_value(w)

        self.apply(_reset_parameters)


class MelGANInference(nn.Layer):
    def __init__(self, normalizer, melgan_generator):
        super().__init__()
        self.normalizer = normalizer
        self.melgan_generator = melgan_generator

    def forward(self, logmel):
        normalized_mel = self.normalizer(logmel)
        wav = self.melgan_generator.inference(normalized_mel)
        return wav


================================================
FILE: paddlespeech/t2s/models/melgan/multi_band_melgan_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path
from typing import Dict

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from paddle.optimizer.lr import LRScheduler

from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
from paddlespeech.t2s.training.updaters.standard_updater import UpdaterState
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class MBMelGANUpdater(StandardUpdater):
    def __init__(self,
                 models: Dict[str, Layer],
                 optimizers: Dict[str, Optimizer],
                 criterions: Dict[str, Layer],
                 schedulers: Dict[str, LRScheduler],
                 dataloader: DataLoader,
                 generator_train_start_steps: int=0,
                 discriminator_train_start_steps: int=100000,
                 lambda_aux: float=1.0,
                 lambda_adv: float=1.0,
                 output_dir: Path=None):
        self.models = models
        self.generator: Layer = models['generator']
        self.discriminator: Layer = models['discriminator']

        self.optimizers = optimizers
        self.optimizer_g: Optimizer = optimizers['generator']
        self.optimizer_d: Optimizer = optimizers['discriminator']

        self.criterions = criterions
        self.criterion_stft = criterions['stft']
        self.criterion_sub_stft = criterions['sub_stft']
        self.criterion_pqmf = criterions['pqmf']
        self.criterion_gen_adv = criterions["gen_adv"]
        self.criterion_dis_adv = criterions["dis_adv"]

        self.schedulers = schedulers
        self.scheduler_g = schedulers['generator']
        self.scheduler_d = schedulers['discriminator']

        self.dataloader = dataloader

        self.generator_train_start_steps = generator_train_start_steps
        self.discriminator_train_start_steps = discriminator_train_start_steps
        self.lambda_adv = lambda_adv
        self.lambda_aux = lambda_aux

        self.state = UpdaterState(iteration=0, epoch=0)
        self.train_iterator = iter(self.dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}
        # parse batch
        wav, mel = batch

        # Generator
        if self.state.iteration > self.generator_train_start_steps:
            # (B, out_channels, T ** prod(upsample_scales)
            wav_ = self.generator(mel)
            wav_mb_ = wav_
            # (B, 1, out_channels*T ** prod(upsample_scales)
            wav_ = self.criterion_pqmf.synthesis(wav_mb_)

            # initialize
            gen_loss = 0.0
            aux_loss = 0.0

            # full band Multi-resolution stft loss
            sc_loss, mag_loss = self.criterion_stft(wav_, wav)
            # for balancing with subband stft loss
            # Eq.(9) in paper
            aux_loss += 0.5 * (sc_loss + mag_loss)
            report("train/spectral_convergence_loss", float(sc_loss))
            report("train/log_stft_magnitude_loss", float(mag_loss))
            losses_dict["spectral_convergence_loss"] = float(sc_loss)
            losses_dict["log_stft_magnitude_loss"] = float(mag_loss)

            # sub band Multi-resolution stft loss
            # (B, subbands, T // subbands)
            wav_mb = self.criterion_pqmf.analysis(wav)
            sub_sc_loss, sub_mag_loss = self.criterion_sub_stft(wav_mb_, wav_mb)
            # Eq.(9) in paper
            aux_loss += 0.5 * (sub_sc_loss + sub_mag_loss)
            report("train/sub_spectral_convergence_loss", float(sub_sc_loss))
            report("train/sub_log_stft_magnitude_loss", float(sub_mag_loss))
            losses_dict["sub_spectral_convergence_loss"] = float(sub_sc_loss)
            losses_dict["sub_log_stft_magnitude_loss"] = float(sub_mag_loss)

            gen_loss += aux_loss * self.lambda_aux

            # adversarial loss
            if self.state.iteration > self.discriminator_train_start_steps:
                p_ = self.discriminator(wav_)
                adv_loss = self.criterion_gen_adv(p_)
                report("train/adversarial_loss", float(adv_loss))
                losses_dict["adversarial_loss"] = float(adv_loss)

                gen_loss += self.lambda_adv * adv_loss

            report("train/generator_loss", float(gen_loss))
            losses_dict["generator_loss"] = float(gen_loss)

            self.optimizer_g.clear_grad()
            gen_loss.backward()

            self.optimizer_g.step()
            self.scheduler_g.step()

        # Disctiminator
        if self.state.iteration > self.discriminator_train_start_steps:
            # re-compute wav_ which leads better quality
            with paddle.no_grad():
                wav_ = self.generator(mel)
            wav_ = self.criterion_pqmf.synthesis(wav_)
            p = self.discriminator(wav)
            p_ = self.discriminator(wav_.detach())
            real_loss, fake_loss = self.criterion_dis_adv(p_, p)
            dis_loss = real_loss + fake_loss
            report("train/real_loss", float(real_loss))
            report("train/fake_loss", float(fake_loss))
            report("train/discriminator_loss", float(dis_loss))
            losses_dict["real_loss"] = float(real_loss)
            losses_dict["fake_loss"] = float(fake_loss)
            losses_dict["discriminator_loss"] = float(dis_loss)

            self.optimizer_d.clear_grad()
            dis_loss.backward()

            self.optimizer_d.step()
            self.scheduler_d.step()

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class MBMelGANEvaluator(StandardEvaluator):
    def __init__(self,
                 models: Dict[str, Layer],
                 criterions: Dict[str, Layer],
                 dataloader: DataLoader,
                 lambda_aux: float=1.0,
                 lambda_adv: float=1.0,
                 output_dir: Path=None):
        self.models = models
        self.generator = models['generator']
        self.discriminator = models['discriminator']

        self.criterions = criterions
        self.criterion_stft = criterions['stft']
        self.criterion_sub_stft = criterions['sub_stft']
        self.criterion_pqmf = criterions['pqmf']
        self.criterion_gen_adv = criterions["gen_adv"]
        self.criterion_dis_adv = criterions["dis_adv"]

        self.dataloader = dataloader

        self.lambda_adv = lambda_adv
        self.lambda_aux = lambda_aux

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        # logging.debug("Evaluate: ")
        self.msg = "Evaluate: "
        losses_dict = {}
        wav, mel = batch

        # Generator
        # (B, out_channels, T ** prod(upsample_scales)
        wav_ = self.generator(mel)
        wav_mb_ = wav_
        # (B, 1, out_channels*T ** prod(upsample_scales)
        wav_ = self.criterion_pqmf.synthesis(wav_mb_)

        # initialize
        gen_loss = 0.0
        aux_loss = 0.0

        # adversarial loss
        p_ = self.discriminator(wav_)
        adv_loss = self.criterion_gen_adv(p_)
        report("eval/adversarial_loss", float(adv_loss))
        losses_dict["adversarial_loss"] = float(adv_loss)

        gen_loss += self.lambda_adv * adv_loss

        # Multi-resolution stft loss
        sc_loss, mag_loss = self.criterion_stft(wav_, wav)
        # Eq.(9) in paper
        aux_loss += 0.5 * (sc_loss + mag_loss)
        report("eval/spectral_convergence_loss", float(sc_loss))
        report("eval/log_stft_magnitude_loss", float(mag_loss))
        losses_dict["spectral_convergence_loss"] = float(sc_loss)
        losses_dict["log_stft_magnitude_loss"] = float(mag_loss)

        # sub band Multi-resolution stft loss
        # (B, subbands, T // subbands)
        wav_mb = self.criterion_pqmf.analysis(wav)
        sub_sc_loss, sub_mag_loss = self.criterion_sub_stft(wav_mb_, wav_mb)
        # Eq.(9) in paper
        aux_loss += 0.5 * (sub_sc_loss + sub_mag_loss)
        report("eval/sub_spectral_convergence_loss", float(sub_sc_loss))
        report("eval/sub_log_stft_magnitude_loss", float(sub_mag_loss))
        losses_dict["sub_spectral_convergence_loss"] = float(sub_sc_loss)
        losses_dict["sub_log_stft_magnitude_loss"] = float(sub_mag_loss)

        gen_loss += aux_loss * self.lambda_aux

        report("eval/generator_loss", float(gen_loss))
        losses_dict["generator_loss"] = float(gen_loss)

        # Disctiminator
        p = self.discriminator(wav)
        real_loss, fake_loss = self.criterion_dis_adv(p_, p)
        dis_loss = real_loss + fake_loss
        report("eval/real_loss", float(real_loss))
        report("eval/fake_loss", float(fake_loss))
        report("eval/discriminator_loss", float(dis_loss))

        losses_dict["real_loss"] = float(real_loss)
        losses_dict["fake_loss"] = float(fake_loss)
        losses_dict["discriminator_loss"] = float(dis_loss)

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/melgan/style_melgan.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""StyleMelGAN Modules."""
import copy
from typing import Any
from typing import Dict
from typing import List

import numpy as np
import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.t2s.models.melgan import MelGANDiscriminator as BaseDiscriminator
from paddlespeech.t2s.modules.activation import get_activation
from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.pqmf import PQMF
from paddlespeech.t2s.modules.tade_res_block import TADEResBlock


class StyleMelGANGenerator(nn.Layer):
    """Style MelGAN generator module."""

    def __init__(
            self,
            in_channels: int=128,
            aux_channels: int=80,
            channels: int=64,
            out_channels: int=1,
            kernel_size: int=9,
            dilation: int=2,
            bias: bool=True,
            noise_upsample_scales: List[int]=[11, 2, 2, 2],
            noise_upsample_activation: str="leakyrelu",
            noise_upsample_activation_params: Dict[str,
                                                   Any]={"negative_slope": 0.2},
            upsample_scales: List[int]=[2, 2, 2, 2, 2, 2, 2, 2, 1],
            upsample_mode: str="linear",
            gated_function: str="softmax",
            use_weight_norm: bool=True,
            init_type: str="xavier_uniform", ):
        """Initilize Style MelGAN generator.

        Args:
            in_channels (int): 
                Number of input noise channels.
            aux_channels (int): 
                Number of auxiliary input channels.
            channels (int): 
                Number of channels for conv layer.
            out_channels (int): 
                Number of output channels.
            kernel_size (int): 
                Kernel size of conv layers.
            dilation (int): 
                Dilation factor for conv layers.
            bias (bool): 
                Whether to add bias parameter in convolution layers.
            noise_upsample_scales (list): 
                List of noise upsampling scales.
            noise_upsample_activation (str): 
                Activation function module name for noise upsampling.
            noise_upsample_activation_params (dict): 
                Hyperparameters for the above activation function.
            upsample_scales (list): 
                List of upsampling scales.
            upsample_mode (str): 
                Upsampling mode in TADE layer.
            gated_function (str): 
                Gated function in TADEResBlock ("softmax" or "sigmoid").
            use_weight_norm (bool): 
                Whether to use weight norm.
                If set to true, it will be applied to all of the conv layers.
        """
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        self.in_channels = in_channels
        noise_upsample = []
        in_chs = in_channels
        for noise_upsample_scale in noise_upsample_scales:
            noise_upsample.append(
                nn.Conv1DTranspose(
                    in_chs,
                    channels,
                    noise_upsample_scale * 2,
                    stride=noise_upsample_scale,
                    padding=noise_upsample_scale // 2 + noise_upsample_scale %
                    2,
                    output_padding=noise_upsample_scale % 2,
                    bias_attr=bias, ))
            noise_upsample.append(
                get_activation(noise_upsample_activation, **
                               noise_upsample_activation_params))
            in_chs = channels
        self.noise_upsample = nn.Sequential(*noise_upsample)
        self.noise_upsample_factor = np.prod(noise_upsample_scales)

        self.blocks = nn.LayerList()
        aux_chs = aux_channels
        for upsample_scale in upsample_scales:
            self.blocks.append(
                TADEResBlock(
                    in_channels=channels,
                    aux_channels=aux_chs,
                    kernel_size=kernel_size,
                    dilation=dilation,
                    bias=bias,
                    upsample_factor=upsample_scale,
                    upsample_mode=upsample_mode,
                    gated_function=gated_function, ), )
            aux_chs = channels
        self.upsample_factor = np.prod(upsample_scales)

        self.output_conv = nn.Sequential(
            nn.Conv1D(
                channels,
                out_channels,
                kernel_size,
                1,
                bias_attr=bias,
                padding=(kernel_size - 1) // 2, ),
            nn.Tanh(), )

        nn.initializer.set_global_initializer(None)

        # apply weight norm
        if use_weight_norm:
            self.apply_weight_norm()

        # reset parameters
        self.reset_parameters()

    def forward(self, c, z=None):
        """Calculate forward propagation.

        Args:
            c (Tensor): Auxiliary input tensor (B, channels, T).
            z (Tensor): Input noise tensor (B, in_channels, 1).
        Returns:
            Tensor: Output tensor (B, out_channels, T ** prod(upsample_scales)).
        """
        # batch_max_steps(24000) == noise_upsample_factor(80) * upsample_factor(300)
        if z is None:
            z = paddle.randn([paddle.shape(c)[0], self.in_channels, 1])
        # (B, in_channels, noise_upsample_factor).
        x = self.noise_upsample(z)
        for block in self.blocks:
            x, c = block(x, c)
        x = self.output_conv(x)
        return x

    def apply_weight_norm(self):
        """Recursively apply weight normalization to all the Convolution layers
        in the sublayers.
        """

        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv1DTranspose)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def remove_weight_norm(self):
        """Recursively remove weight normalization from all the Convolution 
        layers in the sublayers.
        """

        def _remove_weight_norm(layer):
            try:
                if layer:
                    nn.utils.remove_weight_norm(layer)
            # add AttributeError to bypass https://github.com/PaddlePaddle/Paddle/issues/38532 temporarily
            except (ValueError, AttributeError):
                pass

        self.apply(_remove_weight_norm)

    def reset_parameters(self):
        """Reset parameters.
        This initialization follows official implementation manner.
        https://github.com/descriptinc/melgan-neurips/blob/master/mel2wav/modules.py
        """
        # 定义参数为float的正态分布。
        dist = paddle.distribution.Normal(loc=0.0, scale=0.02)

        def _reset_parameters(m):
            if isinstance(m, nn.Conv1D) or isinstance(m, nn.Conv1DTranspose):
                w = dist.sample(m.weight.shape)
                m.weight.set_value(w)

        self.apply(_reset_parameters)

    def inference(self, c):
        """Perform inference.
        Args:
            c (Tensor): 
                Input tensor (T, in_channels).
        Returns:
            Tensor: Output tensor (T ** prod(upsample_scales), out_channels).
        """
        # (1, in_channels, T)
        c = c.transpose([1, 0]).unsqueeze(0)
        c_shape = paddle.shape(c)
        # prepare noise input
        # there is a bug in Paddle int division, we must convert a int tensor to int here
        noise_T = paddle.cast(
            paddle.ceil(c_shape[2] / int(self.noise_upsample_factor)),
            dtype='int64')
        noise_size = (1, self.in_channels, noise_T)
        # (1, in_channels, T/noise_upsample_factor)
        noise = paddle.randn(noise_size)
        # (1, in_channels, T)
        x = self.noise_upsample(noise)
        x_shape = paddle.shape(x)
        total_length = c_shape[2] * self.upsample_factor
        # Dygraph to Static Graph bug here, 2021.12.15
        c = F.pad(
            c, (0, x_shape[2] - c_shape[2]), "replicate", data_format="NCL")
        # c.shape[2] == x.shape[2] here
        # (1, in_channels, T*prod(upsample_scales))
        for block in self.blocks:
            x, c = block(x, c)
        x = self.output_conv(x)[..., :total_length]
        return x.squeeze(0).transpose([1, 0])


class StyleMelGANDiscriminator(nn.Layer):
    """Style MelGAN disciminator module."""

    def __init__(
            self,
            repeats: int=2,
            window_sizes: List[int]=[512, 1024, 2048, 4096],
            pqmf_params: List[List[int]]=[
                [1, None, None, None],
                [2, 62, 0.26700, 9.0],
                [4, 62, 0.14200, 9.0],
                [8, 62, 0.07949, 9.0],
            ],
            discriminator_params: Dict[str, Any]={
                "out_channels": 1,
                "kernel_sizes": [5, 3],
                "channels": 16,
                "max_downsample_channels": 512,
                "bias": True,
                "downsample_scales": [4, 4, 4, 1],
                "nonlinear_activation": "leakyrelu",
                "nonlinear_activation_params": {
                    "negative_slope": 0.2
                },
                "pad": "Pad1D",
                "pad_params": {
                    "mode": "reflect"
                },
            },
            use_weight_norm: bool=True,
            init_type: str="xavier_uniform", ):
        """Initilize Style MelGAN discriminator.

        Args:
            repeats (int): 
                Number of repititons to apply RWD.
            window_sizes (list): 
                List of random window sizes.
            pqmf_params (list): 
                List of list of Parameters for PQMF modules
            discriminator_params (dict): 
                Parameters for base discriminator module.
            use_weight_nom (bool): 
                Whether to apply weight normalization.
        """
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        # window size check
        assert len(window_sizes) == len(pqmf_params)
        sizes = [ws // p[0] for ws, p in zip(window_sizes, pqmf_params)]
        assert len(window_sizes) == sum([sizes[0] == size for size in sizes])

        self.repeats = repeats
        self.window_sizes = window_sizes
        self.pqmfs = nn.LayerList()
        self.discriminators = nn.LayerList()
        for pqmf_param in pqmf_params:
            d_params = copy.deepcopy(discriminator_params)
            d_params["in_channels"] = pqmf_param[0]
            if pqmf_param[0] == 1:
                self.pqmfs.append(nn.Identity())
            else:
                self.pqmfs.append(PQMF(*pqmf_param))
            self.discriminators.append(BaseDiscriminator(**d_params))

        nn.initializer.set_global_initializer(None)

        # apply weight norm
        if use_weight_norm:
            self.apply_weight_norm()

        # reset parameters
        self.reset_parameters()

    def forward(self, x):
        """Calculate forward propagation.
        Args:
            x (Tensor): 
                Input tensor (B, 1, T).
        Returns:
            List: List of discriminator outputs, #items in the list will be
                equal to repeats * #discriminators.
        """
        outs = []
        for _ in range(self.repeats):
            outs += self._forward(x)
        return outs

    def _forward(self, x):
        outs = []
        for idx, (ws, pqmf, disc) in enumerate(
                zip(self.window_sizes, self.pqmfs, self.discriminators)):
            start_idx = int(np.random.randint(paddle.shape(x)[-1] - ws))
            x_ = x[:, :, start_idx:start_idx + ws]
            if idx == 0:
                # nn.Identity()
                x_ = pqmf(x_)
            else:
                x_ = pqmf.analysis(x_)
            outs += [disc(x_)]
        return outs

    def apply_weight_norm(self):
        """Recursively apply weight normalization to all the Convolution layers
        in the sublayers.
        """

        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv1DTranspose)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def remove_weight_norm(self):
        """Recursively remove weight normalization from all the Convolution 
        layers in the sublayers.
        """

        def _remove_weight_norm(layer):
            try:
                nn.utils.remove_weight_norm(layer)
            except ValueError:
                pass

        self.apply(_remove_weight_norm)

    def reset_parameters(self):
        """Reset parameters.
        This initialization follows official implementation manner.
        https://github.com/descriptinc/melgan-neurips/blob/master/mel2wav/modules.py
        """
        # 定义参数为float的正态分布。
        dist = paddle.distribution.Normal(loc=0.0, scale=0.02)

        def _reset_parameters(m):
            if isinstance(m, nn.Conv1D) or isinstance(m, nn.Conv1DTranspose):
                w = dist.sample(m.weight.shape)
                m.weight.set_value(w)

        self.apply(_reset_parameters)


class StyleMelGANInference(nn.Layer):
    def __init__(self, normalizer, style_melgan_generator):
        super().__init__()
        self.normalizer = normalizer
        self.style_melgan_generator = style_melgan_generator

    def forward(self, logmel):
        normalized_mel = self.normalizer(logmel)
        wav = self.style_melgan_generator.inference(normalized_mel)
        return wav


================================================
FILE: paddlespeech/t2s/models/melgan/style_melgan_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path
from typing import Dict

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from paddle.optimizer.lr import LRScheduler

from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
from paddlespeech.t2s.training.updaters.standard_updater import UpdaterState
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class StyleMelGANUpdater(StandardUpdater):
    def __init__(self,
                 models: Dict[str, Layer],
                 optimizers: Dict[str, Optimizer],
                 criterions: Dict[str, Layer],
                 schedulers: Dict[str, LRScheduler],
                 dataloader: DataLoader,
                 generator_train_start_steps: int=0,
                 discriminator_train_start_steps: int=100000,
                 lambda_adv: float=1.0,
                 lambda_aux: float=1.0,
                 output_dir: Path=None):
        self.models = models
        self.generator: Layer = models['generator']
        self.discriminator: Layer = models['discriminator']

        self.optimizers = optimizers
        self.optimizer_g: Optimizer = optimizers['generator']
        self.optimizer_d: Optimizer = optimizers['discriminator']

        self.criterions = criterions
        self.criterion_stft = criterions['stft']
        self.criterion_gen_adv = criterions["gen_adv"]
        self.criterion_dis_adv = criterions["dis_adv"]

        self.schedulers = schedulers
        self.scheduler_g = schedulers['generator']
        self.scheduler_d = schedulers['discriminator']

        self.dataloader = dataloader

        self.generator_train_start_steps = generator_train_start_steps
        self.discriminator_train_start_steps = discriminator_train_start_steps
        self.lambda_adv = lambda_adv
        self.lambda_aux = lambda_aux

        self.state = UpdaterState(iteration=0, epoch=0)
        self.train_iterator = iter(self.dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}
        # parse batch
        wav, mel = batch

        # Generator
        if self.state.iteration > self.generator_train_start_steps:
            # (B, out_channels, T ** prod(upsample_scales)
            wav_ = self.generator(mel)

            # initialize
            gen_loss = 0.0
            aux_loss = 0.0

            # full band multi-resolution stft loss
            sc_loss, mag_loss = self.criterion_stft(wav_, wav)
            aux_loss += sc_loss + mag_loss
            report("train/spectral_convergence_loss", float(sc_loss))
            report("train/log_stft_magnitude_loss", float(mag_loss))
            losses_dict["spectral_convergence_loss"] = float(sc_loss)
            losses_dict["log_stft_magnitude_loss"] = float(mag_loss)

            gen_loss += aux_loss * self.lambda_aux

            # adversarial loss
            if self.state.iteration > self.discriminator_train_start_steps:
                p_ = self.discriminator(wav_)
                adv_loss = self.criterion_gen_adv(p_)
                report("train/adversarial_loss", float(adv_loss))
                losses_dict["adversarial_loss"] = float(adv_loss)

                gen_loss += self.lambda_adv * adv_loss

            report("train/generator_loss", float(gen_loss))
            losses_dict["generator_loss"] = float(gen_loss)

            self.optimizer_g.clear_grad()
            gen_loss.backward()

            self.optimizer_g.step()
            self.scheduler_g.step()

        # Disctiminator
        if self.state.iteration > self.discriminator_train_start_steps:
            # re-compute wav_ which leads better quality
            with paddle.no_grad():
                wav_ = self.generator(mel)

            p = self.discriminator(wav)
            p_ = self.discriminator(wav_.detach())
            real_loss, fake_loss = self.criterion_dis_adv(p_, p)
            dis_loss = real_loss + fake_loss
            report("train/real_loss", float(real_loss))
            report("train/fake_loss", float(fake_loss))
            report("train/discriminator_loss", float(dis_loss))
            losses_dict["real_loss"] = float(real_loss)
            losses_dict["fake_loss"] = float(fake_loss)
            losses_dict["discriminator_loss"] = float(dis_loss)

            self.optimizer_d.clear_grad()
            dis_loss.backward()

            self.optimizer_d.step()
            self.scheduler_d.step()

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class StyleMelGANEvaluator(StandardEvaluator):
    def __init__(self,
                 models: Dict[str, Layer],
                 criterions: Dict[str, Layer],
                 dataloader: DataLoader,
                 lambda_adv: float=1.0,
                 lambda_aux: float=1.0,
                 output_dir: Path=None):
        self.models = models
        self.generator = models['generator']
        self.discriminator = models['discriminator']

        self.criterions = criterions
        self.criterion_stft = criterions['stft']
        self.criterion_gen_adv = criterions["gen_adv"]
        self.criterion_dis_adv = criterions["dis_adv"]

        self.dataloader = dataloader

        self.lambda_adv = lambda_adv
        self.lambda_aux = lambda_aux

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        self.msg = "Evaluate: "
        losses_dict = {}
        wav, mel = batch

        # Generator
        # (B, out_channels, T ** prod(upsample_scales)
        wav_ = self.generator(mel)

        # initialize
        gen_loss = 0.0
        aux_loss = 0.0

        # adversarial loss
        p_ = self.discriminator(wav_)
        adv_loss = self.criterion_gen_adv(p_)
        report("eval/adversarial_loss", float(adv_loss))
        losses_dict["adversarial_loss"] = float(adv_loss)

        gen_loss += self.lambda_adv * adv_loss

        # multi-resolution stft loss
        sc_loss, mag_loss = self.criterion_stft(wav_, wav)
        aux_loss += sc_loss + mag_loss
        report("eval/spectral_convergence_loss", float(sc_loss))
        report("eval/log_stft_magnitude_loss", float(mag_loss))
        losses_dict["spectral_convergence_loss"] = float(sc_loss)
        losses_dict["log_stft_magnitude_loss"] = float(mag_loss)

        gen_loss += aux_loss * self.lambda_aux

        report("eval/generator_loss", float(gen_loss))
        losses_dict["generator_loss"] = float(gen_loss)

        # Disctiminator
        p = self.discriminator(wav)
        real_loss, fake_loss = self.criterion_dis_adv(p_, p)
        dis_loss = real_loss + fake_loss
        report("eval/real_loss", float(real_loss))
        report("eval/fake_loss", float(fake_loss))
        report("eval/discriminator_loss", float(dis_loss))

        losses_dict["real_loss"] = float(real_loss)
        losses_dict["fake_loss"] = float(fake_loss)
        losses_dict["discriminator_loss"] = float(dis_loss)

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/parallel_wavegan/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .parallel_wavegan import *
from .parallel_wavegan_updater import *


================================================
FILE: paddlespeech/t2s/models/parallel_wavegan/parallel_wavegan.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import math
from typing import Any
from typing import Dict
from typing import List
from typing import Optional

import numpy as np
import paddle
from paddle import nn

from paddlespeech.t2s.modules.activation import get_activation
from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.residual_block import WaveNetResidualBlock as ResidualBlock
from paddlespeech.t2s.modules.upsample import ConvInUpsampleNet


class PWGGenerator(nn.Layer):
    """Wave Generator for Parallel WaveGAN

    Args:
        in_channels (int, optional): 
            Number of channels of the input waveform, by default 1
        out_channels (int, optional): 
            Number of channels of the output waveform, by default 1
        kernel_size (int, optional): 
            Kernel size of the residual blocks inside, by default 3
        layers (int, optional): 
            Number of residual blocks inside, by default 30
        stacks (int, optional):
            The number of groups to split the residual blocks into, by default 3
            Within each group, the dilation of the residual block grows exponentially.
        residual_channels (int, optional): 
            Residual channel of the residual blocks, by default 64
        gate_channels (int, optional): 
            Gate channel of the residual blocks, by default 128
        skip_channels (int, optional): 
            Skip channel of the residual blocks, by default 64
        aux_channels (int, optional): 
            Auxiliary channel of the residual blocks, by default 80
        aux_context_window (int, optional): 
            The context window size of the first convolution applied to the auxiliary input, by default 2
        dropout (float, optional): 
            Dropout of the residual blocks, by default 0.
        bias (bool, optional): 
            Whether to use bias in residual blocks, by default True
        use_weight_norm (bool, optional): 
            Whether to use weight norm in all convolutions, by default True
        use_causal_conv (bool, optional): 
            Whether to use causal padding in the upsample network and residual blocks, by default False
        upsample_scales (List[int], optional): 
            Upsample scales of the upsample network, by default [4, 4, 4, 4]
        nonlinear_activation (Optional[str], optional): 
            Non linear activation in upsample network, by default None
        nonlinear_activation_params (Dict[str, Any], optional): 
            Parameters passed to the linear activation in the upsample network, by default {}
        interpolate_mode (str, optional): 
            Interpolation mode of the upsample network, by default "nearest"
        freq_axis_kernel_size (int, optional): 
            Kernel size along the frequency axis of the upsample network, by default 1
    """

    def __init__(
            self,
            in_channels: int=1,
            out_channels: int=1,
            kernel_size: int=3,
            layers: int=30,
            stacks: int=3,
            residual_channels: int=64,
            gate_channels: int=128,
            skip_channels: int=64,
            aux_channels: int=80,
            aux_context_window: int=2,
            dropout: float=0.,
            bias: bool=True,
            use_weight_norm: bool=True,
            use_causal_conv: bool=False,
            upsample_scales: List[int]=[4, 4, 4, 4],
            nonlinear_activation: Optional[str]=None,
            nonlinear_activation_params: Dict[str, Any]={},
            interpolate_mode: str="nearest",
            freq_axis_kernel_size: int=1,
            init_type: str="xavier_uniform", ):
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        # for compatibility
        if nonlinear_activation:
            nonlinear_activation = nonlinear_activation.lower()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.aux_channels = aux_channels
        self.aux_context_window = aux_context_window
        self.layers = layers
        self.stacks = stacks
        self.kernel_size = kernel_size

        assert layers % stacks == 0
        layers_per_stack = layers // stacks

        self.first_conv = nn.Conv1D(
            in_channels, residual_channels, 1, bias_attr=True)
        self.upsample_net = ConvInUpsampleNet(
            upsample_scales=upsample_scales,
            nonlinear_activation=nonlinear_activation,
            nonlinear_activation_params=nonlinear_activation_params,
            interpolate_mode=interpolate_mode,
            freq_axis_kernel_size=freq_axis_kernel_size,
            aux_channels=aux_channels,
            aux_context_window=aux_context_window,
            use_causal_conv=use_causal_conv)
        self.upsample_factor = np.prod(upsample_scales)

        self.conv_layers = nn.LayerList()
        for layer in range(layers):
            dilation = 2**(layer % layers_per_stack)
            conv = ResidualBlock(
                kernel_size=kernel_size,
                residual_channels=residual_channels,
                gate_channels=gate_channels,
                skip_channels=skip_channels,
                aux_channels=aux_channels,
                dilation=dilation,
                dropout=dropout,
                bias=bias,
                use_causal_conv=use_causal_conv)
            self.conv_layers.append(conv)

        self.last_conv_layers = nn.Sequential(nn.ReLU(),
                                              nn.Conv1D(
                                                  skip_channels,
                                                  skip_channels,
                                                  1,
                                                  bias_attr=True),
                                              nn.ReLU(),
                                              nn.Conv1D(
                                                  skip_channels,
                                                  out_channels,
                                                  1,
                                                  bias_attr=True))

        if use_weight_norm:
            self.apply_weight_norm()

    def forward(self, x, c):
        """Generate waveform.

        Args:
            x(Tensor): 
                Shape (N, C_in, T), The input waveform.
            c(Tensor): 
                Shape (N, C_aux, T'). The auxiliary input (e.g. spectrogram). 
                It is upsampled to match the time resolution of the input.

        Returns:
            Tensor: Shape (N, C_out, T), the generated waveform.
        """
        c = self.upsample_net(c)
        assert c.shape[-1] == x.shape[-1]

        x = self.first_conv(x)
        skips = 0
        for f in self.conv_layers:
            x, s = f(x, c)
            skips += s
        skips *= math.sqrt(1.0 / len(self.conv_layers))

        x = self.last_conv_layers(skips)
        return x

    def apply_weight_norm(self):
        """Recursively apply weight normalization to all the Convolution layers
        in the sublayers.
        """

        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv2D)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def remove_weight_norm(self):
        """Recursively remove weight normalization from all the Convolution 
        layers in the sublayers.
        """

        def _remove_weight_norm(layer):
            try:
                nn.utils.remove_weight_norm(layer)
            except ValueError:
                pass

        self.apply(_remove_weight_norm)

    def inference(self, c=None):
        """Waveform generation. This function is used for single instance inference.

        Args:
            c(Tensor, optional, optional): 
                Shape (T', C_aux), the auxiliary input, by default None
            x(Tensor, optional): 
                Shape (T, C_in), the noise waveform, by default None

        Returns:
            Tensor: Shape (T, C_out), the generated waveform
        """
        # when to static, can not input x, see https://github.com/PaddlePaddle/Parakeet/pull/132/files
        x = paddle.randn(
            [1, self.in_channels, paddle.shape(c)[0] * self.upsample_factor])
        c = paddle.transpose(c, [1, 0]).unsqueeze(0)  # pseudo batch
        c = nn.Pad1D(self.aux_context_window, mode='replicate')(c)
        out = self(x, c).squeeze(0).transpose([1, 0])
        return out


class PWGDiscriminator(nn.Layer):
    """A convolutional discriminator for audio.

    Args:
        in_channels (int, optional): 
            Number of channels of the input audio, by default 1
        out_channels (int, optional): 
            Output feature size, by default 1
        kernel_size (int, optional): 
            Kernel size of convolutional sublayers, by default 3
        layers (int, optional): 
            Number of layers, by default 10
        conv_channels (int, optional): 
            Feature size of the convolutional sublayers, by default 64
        dilation_factor (int, optional): 
            The factor with which dilation of each convolutional sublayers grows 
            exponentially if it is greater than 1, else the dilation of each convolutional sublayers grows linearly, 
            by default 1
        nonlinear_activation (str, optional): 
            The activation after each convolutional sublayer, by default "leakyrelu"
        nonlinear_activation_params (Dict[str, Any], optional): 
            The parameters passed to the activation's initializer, by default {"negative_slope": 0.2}
        bias (bool, optional): 
            Whether to use bias in convolutional sublayers, by default True
        use_weight_norm (bool, optional): 
            Whether to use weight normalization at all convolutional sublayers, by default True
    """

    def __init__(
            self,
            in_channels: int=1,
            out_channels: int=1,
            kernel_size: int=3,
            layers: int=10,
            conv_channels: int=64,
            dilation_factor: int=1,
            nonlinear_activation: str="leakyrelu",
            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.2},
            bias: bool=True,
            use_weight_norm: bool=True,
            init_type: str="xavier_uniform", ):
        super().__init__()

        # initialize parameters
        initialize(self, init_type)
        # for compatibility
        if nonlinear_activation:
            nonlinear_activation = nonlinear_activation.lower()

        assert kernel_size % 2 == 1
        assert dilation_factor > 0
        conv_layers = []
        conv_in_channels = in_channels
        for i in range(layers - 1):
            if i == 0:
                dilation = 1
            else:
                dilation = i if dilation_factor == 1 else dilation_factor**i
                conv_in_channels = conv_channels
            padding = (kernel_size - 1) // 2 * dilation
            conv_layer = nn.Conv1D(
                conv_in_channels,
                conv_channels,
                kernel_size,
                padding=padding,
                dilation=dilation,
                bias_attr=bias)
            nonlinear = get_activation(nonlinear_activation,
                                       **nonlinear_activation_params)
            conv_layers.append(conv_layer)
            conv_layers.append(nonlinear)
        padding = (kernel_size - 1) // 2
        last_conv = nn.Conv1D(
            conv_in_channels,
            out_channels,
            kernel_size,
            padding=padding,
            bias_attr=bias)
        conv_layers.append(last_conv)
        self.conv_layers = nn.Sequential(*conv_layers)

        if use_weight_norm:
            self.apply_weight_norm()

    def forward(self, x):
        """

        Args:
            x (Tensor): 
                Shape (N, in_channels, num_samples), the input audio.

        Returns:
            Tensor: Shape (N, out_channels, num_samples), the predicted logits.
        """
        return self.conv_layers(x)

    def apply_weight_norm(self):
        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv2D)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def remove_weight_norm(self):
        def _remove_weight_norm(layer):
            try:
                nn.utils.remove_weight_norm(layer)
            except ValueError:
                pass

        self.apply(_remove_weight_norm)


class ResidualPWGDiscriminator(nn.Layer):
    """A wavenet-style discriminator for audio.

    Args:
        in_channels (int, optional): 
            Number of channels of the input audio, by default 1
        out_channels (int, optional): 
            Output feature size, by default 1
        kernel_size (int, optional): 
            Kernel size of residual blocks, by default 3
        layers (int, optional): 
            Number of residual blocks, by default 30
        stacks (int, optional): 
            Number of groups of residual blocks, within which the dilation 
            of each residual blocks grows exponentially, by default 3
        residual_channels (int, optional): 
            Residual channels of residual blocks, by default 64
        gate_channels (int, optional): 
            Gate channels of residual blocks, by default 128
        skip_channels (int, optional): 
            Skip channels of residual blocks, by default 64
        dropout (float, optional): 
            Dropout probability of residual blocks, by default 0.
        bias (bool, optional): 
            Whether to use bias in residual blocks, by default True
        use_weight_norm (bool, optional): 
            Whether to use weight normalization in all convolutional layers, by default True
        use_causal_conv (bool, optional): 
            Whether to use causal convolution in residual blocks, by default False
        nonlinear_activation (str, optional): 
            Activation after convolutions other than those in residual blocks, by default "leakyrelu"
        nonlinear_activation_params (Dict[str, Any], optional): 
            Parameters to pass to the activation, by default {"negative_slope": 0.2}
    """

    def __init__(
            self,
            in_channels: int=1,
            out_channels: int=1,
            kernel_size: int=3,
            layers: int=30,
            stacks: int=3,
            residual_channels: int=64,
            gate_channels: int=128,
            skip_channels: int=64,
            dropout: float=0.,
            bias: bool=True,
            use_weight_norm: bool=True,
            use_causal_conv: bool=False,
            nonlinear_activation: str="leakyrelu",
            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.2},
            init_type: str="xavier_uniform", ):
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        # for compatibility
        if nonlinear_activation:
            nonlinear_activation = nonlinear_activation.lower()

        assert kernel_size % 2 == 1
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.layers = layers
        self.stacks = stacks
        self.kernel_size = kernel_size

        assert layers % stacks == 0
        layers_per_stack = layers // stacks

        self.first_conv = nn.Sequential(
            nn.Conv1D(in_channels, residual_channels, 1, bias_attr=True),
            get_activation(nonlinear_activation, **nonlinear_activation_params))

        self.conv_layers = nn.LayerList()
        for layer in range(layers):
            dilation = 2**(layer % layers_per_stack)
            conv = ResidualBlock(
                kernel_size=kernel_size,
                residual_channels=residual_channels,
                gate_channels=gate_channels,
                skip_channels=skip_channels,
                aux_channels=None,  # no auxiliary input
                dropout=dropout,
                dilation=dilation,
                bias=bias,
                use_causal_conv=use_causal_conv)
            self.conv_layers.append(conv)

        self.last_conv_layers = nn.Sequential(
            get_activation(nonlinear_activation, **nonlinear_activation_params),
            nn.Conv1D(skip_channels, skip_channels, 1, bias_attr=True),
            get_activation(nonlinear_activation, **nonlinear_activation_params),
            nn.Conv1D(skip_channels, out_channels, 1, bias_attr=True))

        if use_weight_norm:
            self.apply_weight_norm()

    def forward(self, x):
        """
        Args:
            x(Tensor): 
                Shape (N, in_channels, num_samples), the input audio.↩

        Returns:
            Tensor: Shape (N, out_channels, num_samples), the predicted logits.
        """
        x = self.first_conv(x)
        skip = 0
        for f in self.conv_layers:
            x, h = f(x, None)
            skip += h
        skip *= math.sqrt(1 / len(self.conv_layers))

        x = skip
        x = self.last_conv_layers(x)
        return x

    def apply_weight_norm(self):
        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv2D)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def remove_weight_norm(self):
        def _remove_weight_norm(layer):
            try:
                nn.utils.remove_weight_norm(layer)
            except ValueError:
                pass

        self.apply(_remove_weight_norm)


class PWGInference(nn.Layer):
    def __init__(self, normalizer, pwg_generator):
        super().__init__()
        self.normalizer = normalizer
        self.pwg_generator = pwg_generator

    def forward(self, logmel):
        normalized_mel = self.normalizer(logmel)
        wav = self.pwg_generator.inference(normalized_mel)
        return wav


================================================
FILE: paddlespeech/t2s/models/parallel_wavegan/parallel_wavegan_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path
from typing import Dict

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from paddle.optimizer.lr import LRScheduler

from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
from paddlespeech.t2s.training.updaters.standard_updater import UpdaterState

logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class PWGUpdater(StandardUpdater):
    def __init__(self,
                 models: Dict[str, Layer],
                 optimizers: Dict[str, Optimizer],
                 criterions: Dict[str, Layer],
                 schedulers: Dict[str, LRScheduler],
                 dataloader: DataLoader,
                 generator_train_start_steps: int=0,
                 discriminator_train_start_steps: int=100000,
                 lambda_adv: float=1.0,
                 lambda_aux: float=1.0,
                 output_dir: Path=None):
        self.models = models
        self.generator: Layer = models['generator']
        self.discriminator: Layer = models['discriminator']

        self.optimizers = optimizers
        self.optimizer_g: Optimizer = optimizers['generator']
        self.optimizer_d: Optimizer = optimizers['discriminator']

        self.criterions = criterions
        self.criterion_stft = criterions['stft']
        self.criterion_mse = criterions['mse']

        self.schedulers = schedulers
        self.scheduler_g = schedulers['generator']
        self.scheduler_d = schedulers['discriminator']

        self.dataloader = dataloader

        self.generator_train_start_steps = generator_train_start_steps
        self.discriminator_train_start_steps = discriminator_train_start_steps
        self.lambda_adv = lambda_adv
        self.lambda_aux = lambda_aux

        self.state = UpdaterState(iteration=0, epoch=0)
        self.train_iterator = iter(self.dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}
        # parse batch
        wav, mel = batch

        # Generator
        if self.state.iteration > self.generator_train_start_steps:
            noise = paddle.randn(wav.shape)
            wav_ = self.generator(noise, mel)

            # initialize
            gen_loss = 0.0
            aux_loss = 0.0

            # multi-resolution stft loss
            sc_loss, mag_loss = self.criterion_stft(wav_, wav)
            aux_loss += sc_loss + mag_loss
            report("train/spectral_convergence_loss", float(sc_loss))
            report("train/log_stft_magnitude_loss", float(mag_loss))

            gen_loss += aux_loss * self.lambda_aux

            losses_dict["spectral_convergence_loss"] = float(sc_loss)
            losses_dict["log_stft_magnitude_loss"] = float(mag_loss)

            # adversarial loss
            if self.state.iteration > self.discriminator_train_start_steps:
                p_ = self.discriminator(wav_)
                adv_loss = self.criterion_mse(p_, paddle.ones_like(p_))
                report("train/adversarial_loss", float(adv_loss))
                losses_dict["adversarial_loss"] = float(adv_loss)

                gen_loss += self.lambda_adv * adv_loss

            report("train/generator_loss", float(gen_loss))
            losses_dict["generator_loss"] = float(gen_loss)

            self.optimizer_g.clear_grad()
            gen_loss.backward()

            self.optimizer_g.step()
            self.scheduler_g.step()

        # Disctiminator
        if self.state.iteration > self.discriminator_train_start_steps:
            with paddle.no_grad():
                wav_ = self.generator(noise, mel)
            p = self.discriminator(wav)
            p_ = self.discriminator(wav_.detach())
            real_loss = self.criterion_mse(p, paddle.ones_like(p))
            fake_loss = self.criterion_mse(p_, paddle.zeros_like(p_))
            dis_loss = real_loss + fake_loss
            report("train/real_loss", float(real_loss))
            report("train/fake_loss", float(fake_loss))
            report("train/discriminator_loss", float(dis_loss))
            losses_dict["real_loss"] = float(real_loss)
            losses_dict["fake_loss"] = float(fake_loss)
            losses_dict["discriminator_loss"] = float(dis_loss)

            self.optimizer_d.clear_grad()
            dis_loss.backward()

            self.optimizer_d.step()
            self.scheduler_d.step()

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class PWGEvaluator(StandardEvaluator):
    def __init__(self,
                 models: Dict[str, Layer],
                 criterions: Dict[str, Layer],
                 dataloader: DataLoader,
                 lambda_adv: float=1.0,
                 lambda_aux: float=1.0,
                 output_dir: Path=None):
        self.models = models
        self.generator = models['generator']
        self.discriminator = models['discriminator']

        self.criterions = criterions
        self.criterion_stft = criterions['stft']
        self.criterion_mse = criterions['mse']

        self.dataloader = dataloader

        self.lambda_adv = lambda_adv
        self.lambda_aux = lambda_aux

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        # logging.debug("Evaluate: ")
        self.msg = "Evaluate: "
        losses_dict = {}
        wav, mel = batch
        noise = paddle.randn(wav.shape)

        # Generator
        wav_ = self.generator(noise, mel)

        # initialize
        gen_loss = 0.0
        aux_loss = 0.0

        # adversarial loss
        p_ = self.discriminator(wav_)
        adv_loss = self.criterion_mse(p_, paddle.ones_like(p_))
        report("eval/adversarial_loss", float(adv_loss))
        losses_dict["adversarial_loss"] = float(adv_loss)

        gen_loss += self.lambda_adv * adv_loss

        # multi-resolution stft loss
        sc_loss, mag_loss = self.criterion_stft(wav_, wav)
        report("eval/spectral_convergence_loss", float(sc_loss))
        report("eval/log_stft_magnitude_loss", float(mag_loss))
        losses_dict["spectral_convergence_loss"] = float(sc_loss)
        losses_dict["log_stft_magnitude_loss"] = float(mag_loss)
        aux_loss += sc_loss + mag_loss

        gen_loss += aux_loss * self.lambda_aux

        report("eval/generator_loss", float(gen_loss))
        losses_dict["generator_loss"] = float(gen_loss)

        # Disctiminator
        p = self.discriminator(wav)
        real_loss = self.criterion_mse(p, paddle.ones_like(p))
        fake_loss = self.criterion_mse(p_, paddle.zeros_like(p_))
        dis_loss = real_loss + fake_loss
        report("eval/real_loss", float(real_loss))
        report("eval/fake_loss", float(fake_loss))
        report("eval/discriminator_loss", float(dis_loss))

        losses_dict["real_loss"] = float(real_loss)
        losses_dict["fake_loss"] = float(fake_loss)
        losses_dict["discriminator_loss"] = float(dis_loss)

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/speedyspeech/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .speedyspeech import *
from .speedyspeech_updater import *


================================================
FILE: paddlespeech/t2s/models/speedyspeech/speedyspeech.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List

import paddle
from paddle import nn

from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.predictor.length_regulator import LengthRegulator
from paddlespeech.t2s.modules.transformer.embedding import ScaledPositionalEncoding


class ResidualBlock(nn.Layer):
    def __init__(self,
                 channels: int=128,
                 kernel_size: int=3,
                 dilation: int=3,
                 n: int=2):
        """SpeedySpeech encoder module.
        Args:
            channels (int, optional): 
                Feature size of the residual output(and also the input).
            kernel_size (int, optional): 
                Kernel size of the 1D convolution.
            dilation (int, optional): 
                Dilation of the 1D convolution.
            n (int): 
                Number of blocks.
        """

        super().__init__()
        total_pad = (dilation * (kernel_size - 1))
        begin = total_pad // 2
        end = total_pad - begin
        # remove padding='same' here, cause onnx don't support dilation + 'same' padding
        blocks = [
            nn.Sequential(
                nn.Conv1D(
                    channels,
                    channels,
                    kernel_size,
                    dilation=dilation,
                    # make sure output T == input T
                    padding=((0, 0), (0, 0), (begin, end))),
                nn.ReLU(),
                nn.BatchNorm1D(channels), ) for _ in range(n)
        ]
        self.blocks = nn.Sequential(*blocks)

    def forward(self, x: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor): 
                Batch of input sequences (B, hidden_size, Tmax).
        Returns:
            Tensor: The residual output (B, hidden_size, Tmax).
        """
        return x + self.blocks(x)


class TextEmbedding(nn.Layer):
    def __init__(self,
                 vocab_size: int,
                 embedding_size: int,
                 tone_vocab_size: int=None,
                 tone_embedding_size: int=None,
                 padding_idx: int=None,
                 tone_padding_idx: int=None,
                 concat: bool=False):
        super().__init__()
        self.text_embedding = nn.Embedding(vocab_size, embedding_size,
                                           padding_idx)
        if tone_vocab_size:
            tone_embedding_size = tone_embedding_size or embedding_size
            if tone_embedding_size != embedding_size and not concat:
                raise ValueError(
                    "embedding size != tone_embedding size, only conat is avaiable."
                )
            self.tone_embedding = nn.Embedding(
                tone_vocab_size, tone_embedding_size, tone_padding_idx)
        self.concat = concat

    def forward(self, text: paddle.Tensor, tone: paddle.Tensor=None):
        """Calculate forward propagation.
        Args:
            text(Tensor(int64)): 
                Batch of padded token ids (B, Tmax).
            tones(Tensor, optional(int64)): 
                Batch of padded tone ids (B, Tmax).
        Returns:
            Tensor: The residual output (B, Tmax, embedding_size).
        """

        text_embed = self.text_embedding(text)
        if tone is None:
            return text_embed
        tone_embed = self.tone_embedding(tone)
        if self.concat:
            embed = paddle.concat([text_embed, tone_embed], -1)
        else:
            embed = text_embed + tone_embed
        return embed


class SpeedySpeechEncoder(nn.Layer):
    """SpeedySpeech encoder module.
    Args:
        vocab_size (int): 
            Dimension of the inputs.
        tone_size (Optional[int]): 
            Number of tones.
        hidden_size (int): 
            Number of encoder hidden units.
        kernel_size (int): 
            Kernel size of encoder.
        dilations (List[int]): 
            Dilations of encoder.
        spk_num (Optional[int]): 
            Number of speakers. 
    """

    def __init__(self,
                 vocab_size: int,
                 tone_size: int,
                 hidden_size: int=128,
                 kernel_size: int=3,
                 dilations: List[int]=[1, 3, 9, 27, 1, 3, 9, 27, 1, 1],
                 spk_num=None):

        super().__init__()
        self.embedding = TextEmbedding(
            vocab_size,
            hidden_size,
            tone_size,
            padding_idx=0,
            tone_padding_idx=0)

        if spk_num:
            self.spk_emb = nn.Embedding(
                num_embeddings=spk_num,
                embedding_dim=hidden_size,
                padding_idx=0)
        else:
            self.spk_emb = None

        self.prenet = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(), )
        res_blocks = [
            ResidualBlock(hidden_size, kernel_size, d, n=2) for d in dilations
        ]
        self.res_blocks = nn.Sequential(*res_blocks)

        self.postnet1 = nn.Sequential(nn.Linear(hidden_size, hidden_size))
        self.postnet2 = nn.Sequential(
            nn.ReLU(),
            nn.BatchNorm1D(hidden_size), )
        self.linear = nn.Linear(hidden_size, hidden_size)

    def forward(self,
                text: paddle.Tensor,
                tones: paddle.Tensor,
                spk_id: paddle.Tensor=None):
        """Encoder input sequence.
        Args:
            text(Tensor(int64)): 
                Batch of padded token ids (B, Tmax).
            tones(Tensor, optional(int64)): 
                Batch of padded tone ids (B, Tmax).
            spk_id(Tnesor, optional(int64)): 
                Batch of speaker ids (B,)

        Returns:
            Tensor: Output tensor (B, Tmax, hidden_size).
        """
        embedding = self.embedding(text, tones)
        if self.spk_emb:
            embedding += self.spk_emb(spk_id).unsqueeze(1)
        embedding = self.prenet(embedding)
        x = self.res_blocks(embedding.transpose([0, 2, 1])).transpose([0, 2, 1])
        # (B, T, dim)
        x = embedding + self.postnet1(x)
        x = self.postnet2(x.transpose([0, 2, 1])).transpose([0, 2, 1])
        x = self.linear(x)
        return x


class DurationPredictor(nn.Layer):
    def __init__(self, hidden_size: int=128):
        super().__init__()
        self.layers = nn.Sequential(
            ResidualBlock(hidden_size, 4, 1, n=1),
            ResidualBlock(hidden_size, 3, 1, n=1),
            ResidualBlock(hidden_size, 1, 1, n=1), )
        self.linear = nn.Linear(hidden_size, 1)

    def forward(self, x: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor): 
                Batch of input sequences (B, Tmax, hidden_size).

        Returns:
            Tensor: Batch of predicted durations in log domain (B, Tmax).
        """
        x = self.layers(x.transpose([0, 2, 1])).transpose([0, 2, 1])
        x = self.linear(x)
        return paddle.squeeze(x, -1)


class SpeedySpeechDecoder(nn.Layer):
    def __init__(self,
                 hidden_size: int=128,
                 output_size: int=80,
                 kernel_size: int=3,
                 dilations: List[int]=[
                     1, 3, 9, 27, 1, 3, 9, 27, 1, 3, 9, 27, 1, 3, 9, 27, 1, 1
                 ]):
        """SpeedySpeech decoder module.
        Args:
            hidden_size (int): 
                Number of decoder hidden units.
            kernel_size (int): 
                Kernel size of decoder.
            output_size (int): 
                Dimension of the outputs.
            dilations (List[int]): 
                Dilations of decoder.
        """
        super().__init__()
        res_blocks = [
            ResidualBlock(hidden_size, kernel_size, d, n=2) for d in dilations
        ]
        self.res_blocks = nn.Sequential(*res_blocks)

        self.postnet1 = nn.Sequential(nn.Linear(hidden_size, hidden_size))
        self.postnet2 = ResidualBlock(hidden_size, kernel_size, 1, n=2)
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        """Decoder input sequence.
        Args:
            x(Tensor): 
                Input tensor (B, time, hidden_size).

        Returns:
            Tensor: Output tensor (B, time, output_size).
        """
        xx = self.res_blocks(x.transpose([0, 2, 1])).transpose([0, 2, 1])
        x = x + self.postnet1(xx)
        x = self.postnet2(x.transpose([0, 2, 1])).transpose([0, 2, 1])
        x = self.linear(x)
        return x


class SpeedySpeech(nn.Layer):
    def __init__(
            self,
            vocab_size,
            encoder_hidden_size: int=128,
            encoder_kernel_size: int=3,
            encoder_dilations: List[int]=[1, 3, 9, 27, 1, 3, 9, 27, 1, 1],
            duration_predictor_hidden_size: int=128,
            decoder_hidden_size: int=128,
            decoder_output_size: int=80,
            decoder_kernel_size: int=3,
            decoder_dilations: List[
                int]=[1, 3, 9, 27, 1, 3, 9, 27, 1, 3, 9, 27, 1, 3, 9, 27, 1, 1],
            tone_size: int=None,
            spk_num: int=None,
            init_type: str="xavier_uniform",
            positional_dropout_rate: int=0.1):
        """Initialize SpeedySpeech module.
        Args:
            vocab_size (int): 
                Dimension of the inputs.
            encoder_hidden_size (int): 
                Number of encoder hidden units.
            encoder_kernel_size (int): 
                Kernel size of encoder.
            encoder_dilations (List[int]): 
                Dilations of encoder.
            duration_predictor_hidden_size (int):
                Number of duration predictor hidden units.
            decoder_hidden_size (int): 
                Number of decoder hidden units.
            decoder_kernel_size (int): 
                Kernel size of decoder.
            decoder_dilations (List[int]): 
                Dilations of decoder.
            decoder_output_size (int): 
                Dimension of the outputs.
            tone_size (Optional[int]): 
                Number of tones.
            spk_num (Optional[int]): 
                Number of speakers. 
            init_type (str): 
                How to initialize transformer parameters.
    
        """
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        encoder = SpeedySpeechEncoder(vocab_size, tone_size,
                                      encoder_hidden_size, encoder_kernel_size,
                                      encoder_dilations, spk_num)
        duration_predictor = DurationPredictor(duration_predictor_hidden_size)
        decoder = SpeedySpeechDecoder(decoder_hidden_size, decoder_output_size,
                                      decoder_kernel_size, decoder_dilations)
        self.position_enc = ScaledPositionalEncoding(encoder_hidden_size,
                                                     positional_dropout_rate)

        self.encoder = encoder
        self.duration_predictor = duration_predictor
        self.decoder = decoder
        # define length regulator
        self.length_regulator = LengthRegulator()

        nn.initializer.set_global_initializer(None)

    def forward(self,
                text: paddle.Tensor,
                tones: paddle.Tensor,
                durations: paddle.Tensor,
                spk_id: paddle.Tensor=None):
        """Calculate forward propagation.
        Args:
            text(Tensor(int64)): 
                Batch of padded token ids (B, Tmax).
            durations(Tensor(int64)): 
                Batch of padded durations (B, Tmax).
            tones(Tensor, optional(int64)): 
                Batch of padded tone ids  (B, Tmax).
            spk_id(Tnesor, optional(int64)): 
                Batch of speaker ids (B,)

        Returns:
            Tensor: 
                Output tensor (B, T_frames, decoder_output_size).
            Tensor: 
                Predicted durations (B, Tmax).
        """
        # input of embedding must be int64
        text = paddle.cast(text, 'int64')
        tones = paddle.cast(tones, 'int64')
        if spk_id is not None:
            spk_id = paddle.cast(spk_id, 'int64')
        durations = paddle.cast(durations, 'int64')
        encodings = self.encoder(text, tones, spk_id)
        pred_durations = self.duration_predictor(encodings.detach())
        # expand encodings
        durations_to_expand = durations
        encodings = self.length_regulator(encodings, durations_to_expand)
        encodings = self.position_enc(encodings)
        # decode
        decoded = self.decoder(encodings)
        return decoded, pred_durations

    def inference(self,
                  text: paddle.Tensor,
                  tones: paddle.Tensor=None,
                  durations: paddle.Tensor=None,
                  spk_id: paddle.Tensor=None):
        """Generate the sequence of features given the sequences of characters.
        Args:
            text(Tensor(int64)): 
                Input sequence of characters (T,).
            tones(Tensor, optional(int64)): 
                Batch of padded tone ids (T, ).
            durations(Tensor, optional (int64)): 
                Groundtruth of duration (T,).
            spk_id(Tensor, optional(int64), optional): 
                spk ids (1,). (Default value = None)

        Returns:
            Tensor: logmel (T, decoder_output_size).
        """
        # input of embedding must be int64
        text = paddle.cast(text, 'int64')
        text = text.unsqueeze(0)
        if tones is not None:
            tones = paddle.cast(tones, 'int64')
            tones = tones.unsqueeze(0)

        encodings = self.encoder(text, tones, spk_id)

        if durations is None:
            # (1, T)
            pred_durations = self.duration_predictor(encodings)
            durations_to_expand = paddle.round(pred_durations.exp())
            durations_to_expand = durations_to_expand.astype(paddle.int64)
        else:
            durations_to_expand = durations
        encodings = self.length_regulator(
            encodings, durations_to_expand, is_inference=True)
        encodings = self.position_enc(encodings)
        decoded = self.decoder(encodings)
        return decoded[0]


class SpeedySpeechInference(nn.Layer):
    def __init__(self, normalizer, speedyspeech_model):
        super().__init__()
        self.normalizer = normalizer
        self.acoustic_model = speedyspeech_model

    def forward(self, phones, tones, spk_id=None, durations=None):
        normalized_mel = self.acoustic_model.inference(
            phones, tones, durations=durations, spk_id=spk_id)
        logmel = self.normalizer.inverse(normalized_mel)
        return logmel


================================================
FILE: paddlespeech/t2s/models/speedyspeech/speedyspeech_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import functional as F
from paddle.nn import Layer
from paddle.optimizer import Optimizer

from paddlespeech.t2s.modules.losses import masked_l1_loss
from paddlespeech.t2s.modules.losses import ssim
from paddlespeech.t2s.modules.losses import weighted_mean
from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class SpeedySpeechUpdater(StandardUpdater):
    def __init__(self,
                 model: Layer,
                 optimizer: Optimizer,
                 dataloader: DataLoader,
                 init_state=None,
                 output_dir: Path=None):
        super().__init__(model, optimizer, dataloader, init_state=None)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}

        # spk_id!=None in multiple spk speedyspeech 
        spk_id = batch["spk_id"] if "spk_id" in batch else None

        decoded, predicted_durations = self.model(
            text=batch["phones"],
            tones=batch["tones"],
            durations=batch["durations"],
            spk_id=spk_id)

        target_mel = batch["feats"]
        spec_mask = F.sequence_mask(
            batch["num_frames"], dtype=target_mel.dtype).unsqueeze(-1)
        text_mask = F.sequence_mask(
            batch["num_phones"], dtype=predicted_durations.dtype)

        # spec loss
        l1_loss = masked_l1_loss(decoded, target_mel, spec_mask)

        # duration loss
        target_durations = batch["durations"]
        target_durations = paddle.maximum(
            target_durations.astype(predicted_durations.dtype),
            paddle.to_tensor([1.0]))
        duration_loss = weighted_mean(
            F.smooth_l1_loss(
                predicted_durations,
                paddle.log(target_durations),
                delta=1.0,
                reduction='none', ),
            text_mask, )

        # ssim loss
        ssim_loss = 1.0 - ssim((decoded * spec_mask).unsqueeze(1),
                               (target_mel * spec_mask).unsqueeze(1))

        loss = l1_loss + ssim_loss + duration_loss

        optimizer = self.optimizer
        optimizer.clear_grad()
        loss.backward()
        optimizer.step()

        report("train/loss", float(loss))
        report("train/l1_loss", float(l1_loss))
        report("train/duration_loss", float(duration_loss))
        report("train/ssim_loss", float(ssim_loss))

        losses_dict["l1_loss"] = float(l1_loss)
        losses_dict["duration_loss"] = float(duration_loss)
        losses_dict["ssim_loss"] = float(ssim_loss)
        losses_dict["loss"] = float(loss)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class SpeedySpeechEvaluator(StandardEvaluator):
    def __init__(self,
                 model: Layer,
                 dataloader: DataLoader,
                 output_dir: Path=None):
        super().__init__(model, dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        self.msg = "Evaluate: "
        losses_dict = {}

        spk_id = batch["spk_id"] if "spk_id" in batch else None

        decoded, predicted_durations = self.model(
            text=batch["phones"],
            tones=batch["tones"],
            durations=batch["durations"],
            spk_id=spk_id)

        target_mel = batch["feats"]
        spec_mask = F.sequence_mask(
            batch["num_frames"], dtype=target_mel.dtype).unsqueeze(-1)
        text_mask = F.sequence_mask(
            batch["num_phones"], dtype=predicted_durations.dtype)

        # spec loss
        l1_loss = masked_l1_loss(decoded, target_mel, spec_mask)

        # duration loss
        target_durations = batch["durations"]
        target_durations = paddle.maximum(
            target_durations.astype(predicted_durations.dtype),
            paddle.to_tensor([1.0]))
        duration_loss = weighted_mean(
            F.smooth_l1_loss(
                predicted_durations,
                paddle.log(target_durations),
                delta=1.0,
                reduction='none', ),
            text_mask, )

        # ssim loss
        ssim_loss = 1.0 - ssim((decoded * spec_mask).unsqueeze(1),
                               (target_mel * spec_mask).unsqueeze(1))

        loss = l1_loss + ssim_loss + duration_loss

        # import pdb; pdb.set_trace()

        report("eval/loss", float(loss))
        report("eval/l1_loss", float(l1_loss))
        report("eval/duration_loss", float(duration_loss))
        report("eval/ssim_loss", float(ssim_loss))

        losses_dict["l1_loss"] = float(l1_loss)
        losses_dict["duration_loss"] = float(duration_loss)
        losses_dict["ssim_loss"] = float(ssim_loss)
        losses_dict["loss"] = float(loss)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/config.yml
================================================
log_dir: "logs"
save_freq: 20
device: "cuda"
epochs: 180
batch_size: 48
pretrained_model: ""
train_data: "asr_train_list.txt"
val_data: "asr_val_list.txt"

dataset_params:
  data_augmentation: true

preprocess_parasm:
  sr: 24000
  spect_params:
    n_fft: 2048
    win_length: 1200
    hop_length: 300
  mel_params:
    n_mels: 80

model_params:
   input_dim: 80
   hidden_dim: 256
   n_token: 80
   token_embedding_dim: 256

optimizer_params:
  lr: 0.0005

================================================
FILE: paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/layers.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.audio.functional import create_dct
from paddlespeech.utils.initialize import _calculate_gain
from paddlespeech.utils.initialize import xavier_uniform_


def _get_activation_fn(activ):
    if activ == 'relu':
        return nn.ReLU()
    elif activ == 'lrelu':
        return nn.LeakyReLU(0.2)
    elif activ == 'swish':
        return nn.Swish()
    else:
        raise RuntimeError(
            'Unexpected activ type %s, expected [relu, lrelu, swish]' % activ)


class LinearNorm(nn.Layer):
    def __init__(self,
                 in_dim: int,
                 out_dim: int,
                 bias: bool=True,
                 w_init_gain: str='linear'):
        super().__init__()
        self.linear_layer = nn.Linear(in_dim, out_dim, bias_attr=bias)
        xavier_uniform_(
            self.linear_layer.weight, gain=_calculate_gain(w_init_gain))

    def forward(self, x: paddle.Tensor):
        out = self.linear_layer(x)
        return out


class ConvNorm(nn.Layer):
    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 kernel_size: int=1,
                 stride: int=1,
                 padding: int=None,
                 dilation: int=1,
                 bias: bool=True,
                 w_init_gain: str='linear',
                 param=None):
        super().__init__()
        if padding is None:
            assert (kernel_size % 2 == 1)
            padding = int(dilation * (kernel_size - 1) / 2)

        self.conv = nn.Conv1D(
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias_attr=bias)

        xavier_uniform_(
            self.conv.weight, gain=_calculate_gain(w_init_gain, param=param))

    def forward(self, signal: paddle.Tensor):
        conv_signal = self.conv(signal)
        return conv_signal


class ConvBlock(nn.Layer):
    def __init__(self,
                 hidden_dim: int,
                 n_conv: int=3,
                 dropout_p: float=0.2,
                 activ: str='relu'):
        super().__init__()
        self._n_groups = 8
        self.blocks = nn.LayerList([
            self._get_conv(
                hidden_dim=hidden_dim,
                dilation=3**i,
                activ=activ,
                dropout_p=dropout_p) for i in range(n_conv)
        ])

    def forward(self, x: paddle.Tensor):
        for block in self.blocks:
            res = x
            x = block(x)
            x += res
        return x

    def _get_conv(self,
                  hidden_dim: int,
                  dilation: int,
                  activ: str='relu',
                  dropout_p: float=0.2):
        layers = [
            ConvNorm(
                in_channels=hidden_dim,
                out_channels=hidden_dim,
                kernel_size=3,
                padding=dilation,
                dilation=dilation), _get_activation_fn(activ),
            nn.GroupNorm(num_groups=self._n_groups, num_channels=hidden_dim),
            nn.Dropout(p=dropout_p), ConvNorm(
                hidden_dim, hidden_dim, kernel_size=3, padding=1,
                dilation=1), _get_activation_fn(activ), nn.Dropout(p=dropout_p)
        ]
        return nn.Sequential(*layers)


class LocationLayer(nn.Layer):
    def __init__(self,
                 attention_n_filters: int,
                 attention_kernel_size: int,
                 attention_dim: int):
        super().__init__()
        padding = int((attention_kernel_size - 1) / 2)
        self.location_conv = ConvNorm(
            in_channels=2,
            out_channels=attention_n_filters,
            kernel_size=attention_kernel_size,
            padding=padding,
            bias=False,
            stride=1,
            dilation=1)
        self.location_dense = LinearNorm(
            in_dim=attention_n_filters,
            out_dim=attention_dim,
            bias=False,
            w_init_gain='tanh')

    def forward(self, attention_weights_cat: paddle.Tensor):
        processed_attention = self.location_conv(attention_weights_cat)
        processed_attention = processed_attention.transpose([0, 2, 1])
        processed_attention = self.location_dense(processed_attention)
        return processed_attention


class Attention(nn.Layer):
    def __init__(self,
                 attention_rnn_dim: int,
                 embedding_dim: int,
                 attention_dim: int,
                 attention_location_n_filters: int,
                 attention_location_kernel_size: int):
        super().__init__()
        self.query_layer = LinearNorm(
            in_dim=attention_rnn_dim,
            out_dim=attention_dim,
            bias=False,
            w_init_gain='tanh')
        self.memory_layer = LinearNorm(
            in_dim=embedding_dim,
            out_dim=attention_dim,
            bias=False,
            w_init_gain='tanh')
        self.v = LinearNorm(in_dim=attention_dim, out_dim=1, bias=False)
        self.location_layer = LocationLayer(
            attention_n_filters=attention_location_n_filters,
            attention_kernel_size=attention_location_kernel_size,
            attention_dim=attention_dim)
        self.score_mask_value = -float("inf")

    def get_alignment_energies(self,
                               query: paddle.Tensor,
                               processed_memory: paddle.Tensor,
                               attention_weights_cat: paddle.Tensor):
        """
        Args:
            query: 
                decoder output (B, n_mel_channels * n_frames_per_step)
            processed_memory: 
                processed encoder outputs (B, T_in, attention_dim)
            attention_weights_cat: 
                cumulative and prev. att weights (B, 2, max_time)
        Returns:
            Tensor: 
                alignment (B, max_time)
        """

        processed_query = self.query_layer(query.unsqueeze(1))
        processed_attention_weights = self.location_layer(attention_weights_cat)
        energies = self.v(
            paddle.tanh(processed_query + processed_attention_weights +
                        processed_memory))

        energies = energies.squeeze(-1)
        return energies

    def forward(self,
                attention_hidden_state: paddle.Tensor,
                memory: paddle.Tensor,
                processed_memory: paddle.Tensor,
                attention_weights_cat: paddle.Tensor,
                mask: paddle.Tensor):
        """
        Args:
            attention_hidden_state: 
                attention rnn last output
            memory: 
                encoder outputs
            processed_memory: 
                processed encoder outputs
            attention_weights_cat: 
                previous and cummulative attention weights
            mask: 
                binary mask for padded data
        """
        alignment = self.get_alignment_energies(
            query=attention_hidden_state,
            processed_memory=processed_memory,
            attention_weights_cat=attention_weights_cat)

        if mask is not None:
            alignment.data.masked_fill_(mask, self.score_mask_value)

        attention_weights = F.softmax(alignment, axis=1)
        attention_context = paddle.bmm(attention_weights.unsqueeze(1), memory)
        attention_context = attention_context.squeeze(1)

        return attention_context, attention_weights


class MFCC(nn.Layer):
    def __init__(self, n_mfcc: int=40, n_mels: int=80):
        super().__init__()
        self.n_mfcc = n_mfcc
        self.n_mels = n_mels
        self.norm = 'ortho'
        dct_mat = create_dct(self.n_mfcc, self.n_mels, self.norm)
        self.register_buffer('dct_mat', dct_mat)

    def forward(self, mel_specgram: paddle.Tensor):
        if len(mel_specgram.shape) == 2:
            mel_specgram = mel_specgram.unsqueeze(0)
            unsqueezed = True
        else:
            unsqueezed = False
        # (channel, n_mels, time).tranpose(...) dot (n_mels, n_mfcc)
        # -> (channel, time, n_mfcc).tranpose(...)
        mfcc = paddle.matmul(mel_specgram.transpose([0, 2, 1]),
                             self.dct_mat).transpose([0, 2, 1])
        # unpack batch
        if unsqueezed:
            mfcc = mfcc.squeeze(0)
        return mfcc


================================================
FILE: paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/model.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math

import paddle
import paddle.nn.functional as F
from paddle import nn

from .layers import Attention
from .layers import ConvBlock
from .layers import ConvNorm
from .layers import LinearNorm
from .layers import MFCC
from paddlespeech.t2s.modules.nets_utils import _reset_parameters
from paddlespeech.utils.initialize import uniform_


class ASRCNN(nn.Layer):
    def __init__(
            self,
            input_dim: int=80,
            hidden_dim: int=256,
            n_token: int=35,
            n_layers: int=6,
            token_embedding_dim: int=256, ):
        super().__init__()
        self.n_token = n_token
        self.n_down = 1
        self.to_mfcc = MFCC()
        self.init_cnn = ConvNorm(
            in_channels=input_dim // 2,
            out_channels=hidden_dim,
            kernel_size=7,
            padding=3,
            stride=2)
        self.cnns = nn.Sequential(* [
            nn.Sequential(
                ConvBlock(hidden_dim),
                nn.GroupNorm(num_groups=1, num_channels=hidden_dim))
            for n in range(n_layers)
        ])
        self.projection = ConvNorm(
            in_channels=hidden_dim, out_channels=hidden_dim // 2)
        self.ctc_linear = nn.Sequential(
            LinearNorm(in_dim=hidden_dim // 2, out_dim=hidden_dim),
            nn.ReLU(), LinearNorm(in_dim=hidden_dim, out_dim=n_token))
        self.asr_s2s = ASRS2S(
            embedding_dim=token_embedding_dim,
            hidden_dim=hidden_dim // 2,
            n_token=n_token)

        self.reset_parameters()
        self.asr_s2s.reset_parameters()

    def forward(self,
                x: paddle.Tensor,
                src_key_padding_mask: paddle.Tensor=None,
                text_input: paddle.Tensor=None):
        x = self.to_mfcc(x)
        x = self.init_cnn(x)
        x = self.cnns(x)
        x = self.projection(x)
        x = x.transpose([0, 2, 1])
        ctc_logit = self.ctc_linear(x)
        if text_input is not None:
            _, s2s_logit, s2s_attn = self.asr_s2s(
                memory=x,
                memory_mask=src_key_padding_mask,
                text_input=text_input)
            return ctc_logit, s2s_logit, s2s_attn
        else:
            return ctc_logit

    def get_feature(self, x: paddle.Tensor):
        x = self.to_mfcc(x.squeeze(1))
        x = self.init_cnn(x)
        x = self.cnns(x)
        x = self.projection(x)
        return x

    def length_to_mask(self, lengths: paddle.Tensor):
        mask = paddle.arange(lengths.max()).unsqueeze(0).expand(
            (lengths.shape[0], -1)).astype(lengths.dtype)
        mask = paddle.greater_than(mask + 1, lengths.unsqueeze(1))
        return mask

    def get_future_mask(self, out_length: int, unmask_future_steps: int=0):
        """
        Args:
            out_length (int):
                returned mask shape is (out_length, out_length).
            unmask_futre_steps (int): 
                unmasking future step size.
        Return:
            Tensor (paddle.Tensor(bool)): 
                mask future timesteps mask[i, j] = True if i > j + unmask_future_steps else False
        """
        index_tensor = paddle.arange(out_length).unsqueeze(0).expand(
            [out_length, -1])
        mask = paddle.greater_than(index_tensor,
                                   index_tensor.T + unmask_future_steps)
        return mask

    def reset_parameters(self):
        self.apply(_reset_parameters)


class ASRS2S(nn.Layer):
    def __init__(self,
                 embedding_dim: int=256,
                 hidden_dim: int=512,
                 n_location_filters: int=32,
                 location_kernel_size: int=63,
                 n_token: int=40):
        super().__init__()
        self.embedding = nn.Embedding(n_token, embedding_dim)
        self.val_range = math.sqrt(6 / hidden_dim)

        self.decoder_rnn_dim = hidden_dim
        self.project_to_n_symbols = nn.Linear(self.decoder_rnn_dim, n_token)
        self.attention_layer = Attention(
            attention_rnn_dim=self.decoder_rnn_dim,
            embedding_dim=hidden_dim,
            attention_dim=hidden_dim,
            attention_location_n_filters=n_location_filters,
            attention_location_kernel_size=location_kernel_size)
        self.decoder_rnn = nn.LSTMCell(self.decoder_rnn_dim + embedding_dim,
                                       self.decoder_rnn_dim)
        self.project_to_hidden = nn.Sequential(
            LinearNorm(in_dim=self.decoder_rnn_dim * 2, out_dim=hidden_dim),
            nn.Tanh())
        self.sos = 1
        self.eos = 2

    def initialize_decoder_states(self,
                                  memory: paddle.Tensor,
                                  mask: paddle.Tensor):
        """
        moemory.shape = (B, L, H) = (Batchsize, Maxtimestep, Hiddendim)
        """
        B, L, H = memory.shape
        dtype = memory.dtype
        self.decoder_hidden = paddle.zeros(
            (B, self.decoder_rnn_dim)).astype(dtype)
        self.decoder_cell = paddle.zeros(
            (B, self.decoder_rnn_dim)).astype(dtype)
        self.attention_weights = paddle.zeros((B, L)).astype(dtype)
        self.attention_weights_cum = paddle.zeros((B, L)).astype(dtype)
        self.attention_context = paddle.zeros((B, H)).astype(dtype)
        self.memory = memory
        self.processed_memory = self.attention_layer.memory_layer(memory)
        self.mask = mask
        self.unk_index = 3
        self.random_mask = 0.1

    def forward(self,
                memory: paddle.Tensor,
                memory_mask: paddle.Tensor,
                text_input: paddle.Tensor):
        """
        moemory.shape = (B, L, H) = (Batchsize, Maxtimestep, Hiddendim)
        moemory_mask.shape = (B, L, )
        texts_input.shape = (B, T)
        """
        self.initialize_decoder_states(memory, memory_mask)
        # text random mask
        random_mask = (paddle.rand(text_input.shape) < self.random_mask)
        _text_input = text_input.clone()
        _text_input[:] = paddle.where(
            condition=random_mask,
            x=paddle.full(
                shape=_text_input.shape,
                fill_value=self.unk_index,
                dtype=_text_input.dtype),
            y=_text_input)
        decoder_inputs = self.embedding(_text_input).transpose(
            [1, 0, 2])  # -> [T, B, channel]
        start_embedding = self.embedding(
            paddle.to_tensor(
                [self.sos] * decoder_inputs.shape[1], dtype=paddle.long))
        decoder_inputs = paddle.concat(
            (start_embedding.unsqueeze(0), decoder_inputs), axis=0)

        hidden_outputs, logit_outputs, alignments = [], [], []
        while len(hidden_outputs) < decoder_inputs.shape[0]:
            decoder_input = decoder_inputs[len(hidden_outputs)]
            hidden, logit, attention_weights = self.decode(decoder_input)
            hidden_outputs += [hidden]
            logit_outputs += [logit]
            alignments += [attention_weights]

        hidden_outputs, logit_outputs, alignments = self.parse_decoder_outputs(
            hidden_outputs, logit_outputs, alignments)

        return hidden_outputs, logit_outputs, alignments

    def decode(self, decoder_input: paddle.Tensor):
        cell_input = paddle.concat((decoder_input, self.attention_context), -1)
        self.decoder_rnn.flatten_parameters()
        self.decoder_hidden, self.decoder_cell = self.decoder_rnn(
            cell_input, (self.decoder_hidden, self.decoder_cell))

        attention_weights_cat = paddle.concat(
            (self.attention_weights.unsqueeze(1),
             self.attention_weights_cum.unsqueeze(1)),
            axis=1)

        self.attention_context, self.attention_weights = self.attention_layer(
            self.decoder_hidden, self.memory, self.processed_memory,
            attention_weights_cat, self.mask)

        self.attention_weights_cum += self.attention_weights

        hidden_and_context = paddle.concat(
            (self.decoder_hidden, self.attention_context), -1)
        hidden = self.project_to_hidden(hidden_and_context)

        # dropout to increasing g
        logit = self.project_to_n_symbols(F.dropout(hidden, 0.5, self.training))

        return hidden, logit, self.attention_weights

    def parse_decoder_outputs(self,
                              hidden: paddle.Tensor,
                              logit: paddle.Tensor,
                              alignments: paddle.Tensor):
        # -> [B, T_out + 1, max_time]
        alignments = paddle.stack(alignments).transpose([1, 0, 2])
        # [T_out + 1, B, n_symbols] -> [B, T_out + 1,  n_symbols]
        logit = paddle.stack(logit).transpose([1, 0, 2])
        hidden = paddle.stack(hidden).transpose([1, 0, 2])

        return hidden, logit, alignments

    def reset_parameters(self):
        uniform_(self.embedding.weight, -self.val_range, self.val_range)


================================================
FILE: paddlespeech/t2s/models/starganv2_vc/JDCNet/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/models/starganv2_vc/JDCNet/model.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Implementation of model from:
Kum et al. - "Joint Detection and Classification of Singing Voice Melody Using
Convolutional Recurrent Neural Networks" (2019)
Link: https://www.semanticscholar.org/paper/Joint-Detection-and-Classification-of-Singing-Voice-Kum-Nam/60a2ad4c7db43bace75805054603747fcd062c0d
"""
import paddle
from paddle import nn


class JDCNet(nn.Layer):
    """
    Joint Detection and Classification Network model for singing voice melody.
    """

    def __init__(self,
                 num_class: int=722,
                 seq_len: int=31,
                 leaky_relu_slope: float=0.01):
        super().__init__()
        self.seq_len = seq_len
        self.num_class = num_class
        # input: (B, num_class, T, n_mels)
        self.conv_block = nn.Sequential(
            # output: (B, out_channels, T, n_mels)
            nn.Conv2D(
                in_channels=1,
                out_channels=64,
                kernel_size=3,
                padding=1,
                bias_attr=False),
            nn.BatchNorm2D(num_features=64),
            nn.LeakyReLU(leaky_relu_slope),
            # out: (B, out_channels, T, n_mels)
            nn.Conv2D(64, 64, 3, padding=1, bias_attr=False), )
        # output: (B, out_channels, T, n_mels // 2)
        self.res_block1 = ResBlock(in_channels=64, out_channels=128)
        # output: (B, out_channels, T, n_mels // 4) 
        self.res_block2 = ResBlock(in_channels=128, out_channels=192)
        # output: (B, out_channels, T, n_mels // 8)  
        self.res_block3 = ResBlock(in_channels=192, out_channels=256)
        # pool block
        self.pool_block = nn.Sequential(
            nn.BatchNorm2D(num_features=256),
            nn.LeakyReLU(leaky_relu_slope),
            # (B, num_features, T, 2)
            nn.MaxPool2D(kernel_size=(1, 4)),
            nn.Dropout(p=0.5), )
        # input: (B, T, input_size), resized from (B, input_size // 2, T, 2)
        # output: (B, T, input_size)
        self.bilstm_classifier = nn.LSTM(
            input_size=512,
            hidden_size=256,
            time_major=False,
            direction='bidirectional')
        # input: (B * T, in_features)
        # output: (B * T, num_class)
        self.classifier = nn.Linear(
            in_features=512, out_features=self.num_class)

        # initialize weights
        self.apply(self.init_weights)

    def get_feature_GAN(self, x: paddle.Tensor):
        """Calculate feature_GAN.
        Args:
            x(Tensor(float32)): 
                Shape (B, num_class, n_mels, T).
        Returns:
            Tensor:
                Shape (B, num_features, n_mels // 8, T).
        """
        x = x.astype(paddle.float32)
        x = x.transpose([0, 1, 3, 2] if len(x.shape) == 4 else [0, 2, 1])
        convblock_out = self.conv_block(x)
        resblock1_out = self.res_block1(convblock_out)
        resblock2_out = self.res_block2(resblock1_out)
        resblock3_out = self.res_block3(resblock2_out)
        poolblock_out = self.pool_block[0](resblock3_out)
        poolblock_out = self.pool_block[1](poolblock_out)
        GAN_feature = poolblock_out.transpose([0, 1, 3, 2] if len(
            poolblock_out.shape) == 4 else [0, 2, 1])
        return GAN_feature

    def forward(self, x: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor(float32)): 
                Shape (B, num_class, n_mels, seq_len).
        Returns:
            Tensor:
                classifier output consists of predicted pitch classes per frame.
                Shape: (B, seq_len, num_class).
            Tensor:
                GAN_feature. Shape: (B, num_features, n_mels // 8, seq_len)
            Tensor:
                poolblock_out. Shape (B, seq_len, 512)     
        """
        ###############################
        # forward pass for classifier #
        ###############################
        # (B, num_class, n_mels, T) -> (B, num_class, T, n_mels)
        x = x.transpose([0, 1, 3, 2] if len(x.shape) == 4 else
                        [0, 2, 1]).astype(paddle.float32)

        convblock_out = self.conv_block(x)
        resblock1_out = self.res_block1(convblock_out)
        resblock2_out = self.res_block2(resblock1_out)
        resblock3_out = self.res_block3(resblock2_out)
        poolblock_out = self.pool_block[0](resblock3_out)
        poolblock_out = self.pool_block[1](poolblock_out)
        GAN_feature = poolblock_out.transpose([0, 1, 3, 2] if len(
            poolblock_out.shape) == 4 else [0, 2, 1])
        poolblock_out = self.pool_block[2](poolblock_out)
        # (B, 256, seq_len, 2) => (B, seq_len, 256, 2) => (B, seq_len, 512)
        classifier_out = poolblock_out.transpose([0, 2, 1, 3]).reshape(
            (-1, self.seq_len, 512))
        self.bilstm_classifier.flatten_parameters()
        # ignore the hidden states
        classifier_out, _ = self.bilstm_classifier(classifier_out)
        # (B * seq_len, 512)
        classifier_out = classifier_out.reshape((-1, 512))
        classifier_out = self.classifier(classifier_out)
        # (B, seq_len, num_class)
        classifier_out = classifier_out.reshape(
            (-1, self.seq_len, self.num_class))
        return paddle.abs(classifier_out.squeeze()), GAN_feature, poolblock_out

    @staticmethod
    def init_weights(m):
        if isinstance(m, nn.Linear):
            nn.initializer.KaimingUniform()(m.weight)
            if m.bias is not None:
                nn.initializer.Constant(0)(m.bias)
        elif isinstance(m, nn.Conv2D):
            nn.initializer.XavierNormal()(m.weight)
        elif isinstance(m, nn.LSTM) or isinstance(m, nn.LSTMCell):
            for p in m.parameters():
                if len(p.shape) >= 2:
                    nn.initializer.Orthogonal()(p)
                else:
                    nn.initializer.Normal()(p)


class ResBlock(nn.Layer):
    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 leaky_relu_slope: float=0.01):
        super().__init__()
        self.downsample = in_channels != out_channels
        # BN / LReLU / MaxPool layer before the conv layer - see Figure 1b in the paper
        self.pre_conv = nn.Sequential(
            nn.BatchNorm2D(num_features=in_channels),
            nn.LeakyReLU(leaky_relu_slope),
            # apply downsampling on the y axis only
            nn.MaxPool2D(kernel_size=(1, 2)), )

        # conv layers
        self.conv = nn.Sequential(
            nn.Conv2D(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=3,
                padding=1,
                bias_attr=False),
            nn.BatchNorm2D(out_channels),
            nn.LeakyReLU(leaky_relu_slope),
            nn.Conv2D(
                in_channels=out_channels,
                out_channels=out_channels,
                kernel_size=3,
                padding=1,
                bias_attr=False), )
        # 1 x 1 convolution layer to match the feature dimensions
        self.conv1by1 = None
        if self.downsample:
            self.conv1by1 = nn.Conv2D(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=1,
                bias_attr=False)

    def forward(self, x: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor(float32)): Shape (B, in_channels, T, n_mels).
        Returns:
            Tensor:
                The residual output, Shape (B, out_channels, T, n_mels // 2).
        """
        x = self.pre_conv(x)
        if self.downsample:
            x = self.conv(x) + self.conv1by1(x)
        else:
            x = self.conv(x) + x
        return x


================================================
FILE: paddlespeech/t2s/models/starganv2_vc/__init__.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .starganv2_vc import *
from .starganv2_vc_updater import *
from .AuxiliaryASR.model import *
from .JDCNet.model import *


================================================
FILE: paddlespeech/t2s/models/starganv2_vc/losses.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Any
from typing import Dict

import paddle
import paddle.nn.functional as F

from .transforms import build_transforms

# 这些都写到 updater 里


def compute_d_loss(
        nets: Dict[str, Any],
        x_real: paddle.Tensor,
        y_org: paddle.Tensor,
        y_trg: paddle.Tensor,
        z_trg: paddle.Tensor=None,
        x_ref: paddle.Tensor=None,
        # TODO: should be True here, but r1_reg has some bug now 
        use_r1_reg: bool=False,
        use_adv_cls: bool=False,
        use_con_reg: bool=False,
        lambda_reg: float=1.,
        lambda_adv_cls: float=0.1,
        lambda_con_reg: float=10.):

    assert (z_trg is None) != (x_ref is None)
    # with real audios
    x_real.stop_gradient = False
    out = nets['discriminator'](x_real, y_org)
    loss_real = adv_loss(out, 1)
    # R1 regularizaition (https://arxiv.org/abs/1801.04406v4)
    if use_r1_reg:
        loss_reg = r1_reg(out, x_real)
    else:
        # loss_reg = paddle.to_tensor([0.], dtype=paddle.float32)
        loss_reg = paddle.zeros([1])

    # consistency regularization (bCR-GAN: https://arxiv.org/abs/2002.04724)
    loss_con_reg = paddle.zeros([1])
    if use_con_reg:
        t = build_transforms()
        out_aug = nets['discriminator'](t(x_real).detach(), y_org)
        loss_con_reg += F.smooth_l1_loss(out, out_aug)

    # with fake audios
    with paddle.no_grad():
        if z_trg is not None:
            s_trg = nets['mapping_network'](z_trg, y_trg)
        else:  # x_ref is not None
            s_trg = nets['style_encoder'](x_ref, y_trg)

        F0 = nets['F0_model'].get_feature_GAN(x_real)
        x_fake = nets['generator'](x_real, s_trg, masks=None, F0=F0)
    out = nets['discriminator'](x_fake, y_trg)
    loss_fake = adv_loss(out, 0)
    if use_con_reg:
        out_aug = nets['discriminator'](t(x_fake).detach(), y_trg)
        loss_con_reg += F.smooth_l1_loss(out, out_aug)

    # adversarial classifier loss
    if use_adv_cls:
        out_de = nets['discriminator'].classifier(x_fake)
        loss_real_adv_cls = F.cross_entropy(out_de[y_org != y_trg],
                                            y_org[y_org != y_trg])

        if use_con_reg:
            out_de_aug = nets['discriminator'].classifier(t(x_fake).detach())
            loss_con_reg += F.smooth_l1_loss(out_de, out_de_aug)
    else:
        loss_real_adv_cls = paddle.zeros([1]).mean()

    loss = loss_real + loss_fake + lambda_reg * loss_reg + \
            lambda_adv_cls * loss_real_adv_cls + \
            lambda_con_reg * loss_con_reg

    return loss


def compute_g_loss(nets: Dict[str, Any],
                   x_real: paddle.Tensor,
                   y_org: paddle.Tensor,
                   y_trg: paddle.Tensor,
                   z_trgs: paddle.Tensor=None,
                   x_refs: paddle.Tensor=None,
                   use_adv_cls: bool=False,
                   lambda_sty: float=1.,
                   lambda_cyc: float=5.,
                   lambda_ds: float=1.,
                   lambda_norm: float=1.,
                   lambda_asr: float=10.,
                   lambda_f0: float=5.,
                   lambda_f0_sty: float=0.1,
                   lambda_adv: float=2.,
                   lambda_adv_cls: float=0.5,
                   norm_bias: float=0.5):

    assert (z_trgs is None) != (x_refs is None)
    if z_trgs is not None:
        z_trg, z_trg2 = z_trgs
    if x_refs is not None:
        x_ref, x_ref2 = x_refs

    # compute style vectors
    if z_trgs is not None:
        s_trg = nets['mapping_network'](z_trg, y_trg)
    else:
        s_trg = nets['style_encoder'](x_ref, y_trg)

    # compute ASR/F0 features (real)
    # 源码没有用 .eval(), 使用了 no_grad()
    # 我们使用了 .eval(), 开启 with paddle.no_grad() 会报错
    F0_real, GAN_F0_real, cyc_F0_real = nets['F0_model'](x_real)
    ASR_real = nets['asr_model'].get_feature(x_real)

    # adversarial loss
    x_fake = nets['generator'](x_real, s_trg, masks=None, F0=GAN_F0_real)
    out = nets['discriminator'](x_fake, y_trg)
    loss_adv = adv_loss(out, 1)

    # compute ASR/F0 features (fake)
    F0_fake, GAN_F0_fake, _ = nets['F0_model'](x_fake)
    ASR_fake = nets['asr_model'].get_feature(x_fake)

    # norm consistency loss
    x_fake_norm = log_norm(x_fake)
    x_real_norm = log_norm(x_real)
    tmp = paddle.abs(x_fake_norm - x_real_norm) - norm_bias
    loss_norm = ((paddle.nn.ReLU()(tmp))**2).mean()

    # F0 loss
    loss_f0 = f0_loss(F0_fake, F0_real)

    # style F0 loss (style initialization)
    if x_refs is not None and lambda_f0_sty > 0 and not use_adv_cls:
        F0_sty, _, _ = nets['F0_model'](x_ref)
        loss_f0_sty = F.l1_loss(
            compute_mean_f0(F0_fake), compute_mean_f0(F0_sty))
    else:
        loss_f0_sty = paddle.zeros([1]).mean()

    # ASR loss
    loss_asr = F.smooth_l1_loss(ASR_fake, ASR_real)

    # style reconstruction loss
    s_pred = nets['style_encoder'](x_fake, y_trg)
    loss_sty = paddle.mean(paddle.abs(s_pred - s_trg))

    # diversity sensitive loss
    if z_trgs is not None:
        s_trg2 = nets['mapping_network'](z_trg2, y_trg)
    else:
        s_trg2 = nets['style_encoder'](x_ref2, y_trg)
    x_fake2 = nets['generator'](x_real, s_trg2, masks=None, F0=GAN_F0_real)
    x_fake2 = x_fake2.detach()
    _, GAN_F0_fake2, _ = nets['F0_model'](x_fake2)
    loss_ds = paddle.mean(paddle.abs(x_fake - x_fake2))
    loss_ds += F.smooth_l1_loss(GAN_F0_fake, GAN_F0_fake2.detach())

    # cycle-consistency loss
    s_org = nets['style_encoder'](x_real, y_org)
    x_rec = nets['generator'](x_fake, s_org, masks=None, F0=GAN_F0_fake)
    loss_cyc = paddle.mean(paddle.abs(x_rec - x_real))
    # F0 loss in cycle-consistency loss
    if lambda_f0 > 0:
        _, _, cyc_F0_rec = nets['F0_model'](x_rec)
        loss_cyc += F.smooth_l1_loss(cyc_F0_rec, cyc_F0_real)
    if lambda_asr > 0:
        ASR_recon = nets['asr_model'].get_feature(x_rec)
        loss_cyc += F.smooth_l1_loss(ASR_recon, ASR_real)

    # adversarial classifier loss
    if use_adv_cls:
        out_de = nets['discriminator'].classifier(x_fake)
        loss_adv_cls = F.cross_entropy(out_de[y_org != y_trg],
                                       y_trg[y_org != y_trg])
    else:
        loss_adv_cls = paddle.zeros([1]).mean()

    loss = lambda_adv * loss_adv + lambda_sty * loss_sty \
           - lambda_ds * loss_ds + lambda_cyc * loss_cyc \
           + lambda_norm * loss_norm \
           + lambda_asr * loss_asr \
           + lambda_f0 * loss_f0 \
           + lambda_f0_sty * loss_f0_sty \
           + lambda_adv_cls * loss_adv_cls

    return loss


# for norm consistency loss
def log_norm(x: paddle.Tensor, mean: float=-4, std: float=4, axis: int=2):
    """
    normalized log mel -> mel -> norm -> log(norm)
    """
    x = paddle.log(paddle.exp(x * std + mean).norm(axis=axis))
    return x


# for adversarial loss
def adv_loss(logits: paddle.Tensor, target: float):
    assert target in [1, 0]
    if len(logits.shape) > 1:
        logits = logits.reshape([-1])
    targets = paddle.full_like(logits, fill_value=target)
    logits = logits.clip(min=-10, max=10)  # prevent nan
    loss = F.binary_cross_entropy_with_logits(logits, targets)
    return loss


# for R1 regularization loss
def r1_reg(d_out: paddle.Tensor, x_in: paddle.Tensor):
    # zero-centered gradient penalty for real images
    batch_size = x_in.shape[0]
    grad_dout = paddle.grad(
        outputs=d_out.sum(),
        inputs=x_in,
        create_graph=True,
        retain_graph=True,
        only_inputs=True)[0]
    grad_dout2 = grad_dout.pow(2)
    assert (grad_dout2.shape == x_in.shape)
    reg = 0.5 * grad_dout2.reshape((batch_size, -1)).sum(1).mean(0)
    return reg


# for F0 consistency loss
def compute_mean_f0(f0: paddle.Tensor):
    f0_mean = f0.mean(-1)
    f0_mean = f0_mean.expand((f0.shape[-1], f0_mean.shape[0])).transpose(
        (1, 0))  # (B, M)
    return f0_mean


def f0_loss(x_f0: paddle.Tensor, y_f0: paddle.Tensor):
    """
    x.shape = (B, 1, M, L): predict
    y.shape = (B, 1, M, L): target
    """
    # compute the mean
    x_mean = compute_mean_f0(x_f0)
    y_mean = compute_mean_f0(y_f0)
    loss = F.l1_loss(x_f0 / x_mean, y_f0 / y_mean)
    return loss


================================================
FILE: paddlespeech/t2s/models/starganv2_vc/starganv2_vc.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
StarGAN v2
Copyright (c) 2020-present NAVER Corp.
This work is licensed under the Creative Commons Attribution-NonCommercial
4.0 International License. To view a copy of this license, visit
http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to
Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
"""
import math

import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.t2s.modules.nets_utils import _reset_parameters


class DownSample(nn.Layer):
    def __init__(self, layer_type: str):
        super().__init__()
        self.layer_type = layer_type

    def forward(self, x: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor(float32)): Shape (B, dim_in, n_mels, T).
        Returns:
            Tensor:
                layer_type == 'none': Shape (B, dim_in, n_mels, T)
                layer_type == 'timepreserve': Shape (B, dim_in, n_mels // 2, T)
                layer_type == 'half': Shape (B, dim_in, n_mels // 2, T // 2)
        """
        if self.layer_type == 'none':
            return x
        elif self.layer_type == 'timepreserve':
            out = F.avg_pool2d(x, (2, 1))
            return out
        elif self.layer_type == 'half':
            out = F.avg_pool2d(x, 2)
            return out
        else:
            raise RuntimeError(
                'Got unexpected donwsampletype %s, expected is [none, timepreserve, half]'
                % self.layer_type)


class UpSample(nn.Layer):
    def __init__(self, layer_type: str):
        super().__init__()
        self.layer_type = layer_type

    def forward(self, x: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor(float32)): Shape (B, dim_in, n_mels, T).
        Returns:
            Tensor:
                layer_type == 'none': Shape (B, dim_in, n_mels, T)
                layer_type == 'timepreserve': Shape (B, dim_in, n_mels * 2, T)
                layer_type == 'half': Shape (B, dim_in, n_mels * 2, T * 2)
        """
        if self.layer_type == 'none':
            return x
        elif self.layer_type == 'timepreserve':
            out = F.interpolate(x, scale_factor=(2, 1), mode='nearest')
            return out
        elif self.layer_type == 'half':
            out = F.interpolate(x, scale_factor=2, mode='nearest')
            return out
        else:
            raise RuntimeError(
                'Got unexpected upsampletype %s, expected is [none, timepreserve, half]'
                % self.layer_type)


class ResBlk(nn.Layer):
    def __init__(self,
                 dim_in: int,
                 dim_out: int,
                 actv: nn.LeakyReLU=nn.LeakyReLU(0.2),
                 normalize: bool=False,
                 downsample: str='none'):
        super().__init__()
        self.actv = actv
        self.normalize = normalize
        self.downsample = DownSample(layer_type=downsample)
        self.learned_sc = dim_in != dim_out
        self._build_weights(dim_in, dim_out)

    def _build_weights(self, dim_in: int, dim_out: int):
        self.conv1 = nn.Conv2D(
            in_channels=dim_in,
            out_channels=dim_in,
            kernel_size=3,
            stride=1,
            padding=1)
        self.conv2 = nn.Conv2D(
            in_channels=dim_in,
            out_channels=dim_out,
            kernel_size=3,
            stride=1,
            padding=1)
        if self.normalize:
            self.norm1 = nn.InstanceNorm2D(dim_in)
            self.norm2 = nn.InstanceNorm2D(dim_in)
        if self.learned_sc:
            self.conv1x1 = nn.Conv2D(
                in_channels=dim_in,
                out_channels=dim_out,
                kernel_size=1,
                stride=1,
                padding=0,
                bias_attr=False)

    def _shortcut(self, x: paddle.Tensor):
        if self.learned_sc:
            x = self.conv1x1(x)
        if self.downsample:
            x = self.downsample(x)
        return x

    def _residual(self, x: paddle.Tensor):
        if self.normalize:
            x = self.norm1(x)
        x = self.actv(x)
        x = self.conv1(x)
        x = self.downsample(x)
        if self.normalize:
            x = self.norm2(x)
        x = self.actv(x)
        x = self.conv2(x)
        return x

    def forward(self, x: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor(float32)): Shape (B, dim_in, n_mels, T).
        Returns:
            Tensor:
                downsample == 'none': Shape (B, dim_in, n_mels, T).
                downsample == 'timepreserve': Shape (B, dim_out, T, n_mels // 2, T).
                downsample == 'half': Shape (B, dim_out, T, n_mels // 2, T // 2).
        """
        x = self._shortcut(x) + self._residual(x)
        # unit variance
        out = x / math.sqrt(2)
        return out


class AdaIN(nn.Layer):
    def __init__(self, style_dim: int, num_features: int):
        super().__init__()
        self.norm = nn.InstanceNorm2D(
            num_features=num_features, weight_attr=False, bias_attr=False)
        self.fc = nn.Linear(style_dim, num_features * 2)

    def forward(self, x: paddle.Tensor, s: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor(float32)): Shape (B, style_dim, n_mels, T).
            s(Tensor(float32)): Shape (style_dim, ).
        Returns:
            Tensor:
                Shape (B, style_dim, T, n_mels, T).
        """
        if len(s.shape) == 1:
            s = s[None]
        h = self.fc(s)
        h = h.reshape((h.shape[0], h.shape[1], 1, 1))
        gamma, beta = paddle.split(h, 2, axis=1)
        out = (1 + gamma) * self.norm(x) + beta
        return out


class AdainResBlk(nn.Layer):
    def __init__(self,
                 dim_in: int,
                 dim_out: int,
                 style_dim: int=64,
                 w_hpf: int=0,
                 actv: nn.Layer=nn.LeakyReLU(0.2),
                 upsample: str='none'):
        super().__init__()
        self.w_hpf = w_hpf
        self.actv = actv
        self.upsample = UpSample(layer_type=upsample)
        self.learned_sc = dim_in != dim_out
        self._build_weights(dim_in, dim_out, style_dim)
        self.layer_type = upsample

    def _build_weights(self, dim_in: int, dim_out: int, style_dim: int=64):
        self.conv1 = nn.Conv2D(
            in_channels=dim_in,
            out_channels=dim_out,
            kernel_size=3,
            stride=1,
            padding=1)
        self.conv2 = nn.Conv2D(
            in_channels=dim_out,
            out_channels=dim_out,
            kernel_size=3,
            stride=1,
            padding=1)
        self.norm1 = AdaIN(style_dim=style_dim, num_features=dim_in)
        self.norm2 = AdaIN(style_dim=style_dim, num_features=dim_out)
        if self.learned_sc:
            self.conv1x1 = nn.Conv2D(
                in_channels=dim_in,
                out_channels=dim_out,
                kernel_size=1,
                stride=1,
                padding=0,
                bias_attr=False)

    def _shortcut(self, x: paddle.Tensor):
        x = self.upsample(x)
        if self.learned_sc:
            x = self.conv1x1(x)
        return x

    def _residual(self, x: paddle.Tensor, s: paddle.Tensor):
        x = self.norm1(x, s)
        x = self.actv(x)
        x = self.upsample(x)
        x = self.conv1(x)
        x = self.norm2(x, s)
        x = self.actv(x)
        x = self.conv2(x)
        return x

    def forward(self, x: paddle.Tensor, s: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor(float32)): 
                Shape (B, dim_in, n_mels, T).
            s(Tensor(float32)):
                Shape (64,).
        Returns:
            Tensor:
                upsample == 'none': Shape (B, dim_out, T, n_mels, T).  
                upsample == 'timepreserve': Shape (B, dim_out, T, n_mels * 2, T).
                upsample == 'half': Shape (B, dim_out, T, n_mels * 2, T * 2).  
        """
        out = self._residual(x, s)
        if self.w_hpf == 0:
            out = (out + self._shortcut(x)) / math.sqrt(2)
        return out


class HighPass(nn.Layer):
    def __init__(self, w_hpf: int):
        super().__init__()
        self.filter = paddle.to_tensor([[-1, -1, -1], [-1, 8., -1],
                                        [-1, -1, -1]]) / w_hpf

    def forward(self, x: paddle.Tensor):
        filter = self.filter.unsqueeze(0).unsqueeze(1).tile(
            [x.shape[1], 1, 1, 1])
        out = F.conv2d(x, filter, padding=1, groups=x.shape[1])
        return out


class Generator(nn.Layer):
    def __init__(self,
                 dim_in: int=48,
                 style_dim: int=48,
                 max_conv_dim: int=48 * 8,
                 w_hpf: int=1,
                 F0_channel: int=0):
        super().__init__()

        self.stem = nn.Conv2D(
            in_channels=1,
            out_channels=dim_in,
            kernel_size=3,
            stride=1,
            padding=1)
        self.encode = nn.LayerList()
        self.decode = nn.LayerList()
        self.to_out = nn.Sequential(
            nn.InstanceNorm2D(dim_in),
            nn.LeakyReLU(0.2),
            nn.Conv2D(
                in_channels=dim_in,
                out_channels=1,
                kernel_size=1,
                stride=1,
                padding=0))
        self.F0_channel = F0_channel
        # down/up-sampling blocks
        # int(np.log2(img_size)) - 4
        repeat_num = 4
        if w_hpf > 0:
            repeat_num += 1

        for lid in range(repeat_num):
            if lid in [1, 3]:
                _downtype = 'timepreserve'
            else:
                _downtype = 'half'

            dim_out = min(dim_in * 2, max_conv_dim)
            self.encode.append(
                ResBlk(
                    dim_in=dim_in,
                    dim_out=dim_out,
                    normalize=True,
                    downsample=_downtype))
            (self.decode.insert if lid else
             lambda i, sublayer: self.decode.append(sublayer))(0, AdainResBlk(
                 dim_in=dim_out,
                 dim_out=dim_in,
                 style_dim=style_dim,
                 w_hpf=w_hpf,
                 upsample=_downtype))  # stack-like
            dim_in = dim_out
        # bottleneck blocks (encoder)
        for _ in range(2):
            self.encode.append(
                ResBlk(dim_in=dim_out, dim_out=dim_out, normalize=True))
        # F0 blocks 
        if F0_channel != 0:
            self.decode.insert(0,
                               AdainResBlk(
                                   dim_in=dim_out + int(F0_channel / 2),
                                   dim_out=dim_out,
                                   style_dim=style_dim,
                                   w_hpf=w_hpf))
        # bottleneck blocks (decoder)
        for _ in range(2):
            self.decode.insert(0,
                               AdainResBlk(
                                   dim_in=dim_out + int(F0_channel / 2),
                                   dim_out=dim_out + int(F0_channel / 2),
                                   style_dim=style_dim,
                                   w_hpf=w_hpf))
        if F0_channel != 0:
            self.F0_conv = nn.Sequential(
                ResBlk(
                    dim_in=F0_channel,
                    dim_out=int(F0_channel / 2),
                    normalize=True,
                    downsample="half"), )
        if w_hpf > 0:
            self.hpf = HighPass(w_hpf)

        self.reset_parameters()

    def forward(self,
                x: paddle.Tensor,
                s: paddle.Tensor,
                masks: paddle.Tensor=None,
                F0: paddle.Tensor=None):
        """Calculate forward propagation.
        Args:
            x(Tensor(float32)): 
                Shape (B, 1, n_mels, T).
            s(Tensor(float32)):
                Shape (64,).
            masks:
                None.
            F0:
                Shape (B, num_features(256), n_mels // 8, T).
        Returns:
            Tensor:
                output of generator. Shape (B, 1, n_mels, T // 4 * 4)
        """
        x = self.stem(x)
        cache = {}
        # output: (B, max_conv_dim, n_mels // 16, T // 4)
        for block in self.encode:
            if (masks is not None) and (x.shape[2] in [32, 64, 128]):
                cache[x.shape[2]] = x
            x = block(x)
        if F0 is not None:
            # input: (B, num_features(256), n_mels // 8, T)
            # output: (B, num_features(256) // 2, n_mels // 16, T // 2)
            F0 = self.F0_conv(F0)
            # output: (B, num_features(256) // 2, n_mels // 16, T // 4)
            F0 = F.adaptive_avg_pool2d(F0, [x.shape[-2], x.shape[-1]])
            x = paddle.concat([x, F0], axis=1)
        # input: (B, max_conv_dim+num_features(256) // 2, n_mels // 16, T // 4 * 4)
        # output: (B, dim_in, n_mels, T // 4 * 4)
        for block in self.decode:
            x = block(x, s)
            if (masks is not None) and (x.shape[2] in [32, 64, 128]):
                mask = masks[0] if x.shape[2] in [32] else masks[1]
                mask = F.interpolate(mask, size=x.shape[2], mode='bilinear')
                x = x + self.hpf(mask * cache[x.shape[2]])
        out = self.to_out(x)
        return out

    def reset_parameters(self):
        self.apply(_reset_parameters)


class MappingNetwork(nn.Layer):
    def __init__(self,
                 latent_dim: int=16,
                 style_dim: int=48,
                 num_domains: int=2,
                 hidden_dim: int=384):
        super().__init__()
        layers = []
        layers += [nn.Linear(latent_dim, hidden_dim)]
        layers += [nn.ReLU()]
        for _ in range(3):
            layers += [nn.Linear(hidden_dim, hidden_dim)]
            layers += [nn.ReLU()]
        self.shared = nn.Sequential(*layers)

        self.unshared = nn.LayerList()
        for _ in range(num_domains):
            self.unshared.extend([
                nn.Sequential(
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(),
                    nn.Linear(hidden_dim, hidden_dim),
                    nn.ReLU(), nn.Linear(hidden_dim, style_dim))
            ])

        self.reset_parameters()

    def forward(self, z: paddle.Tensor, y: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            z(Tensor(float32)): 
                Shape (B, latent_dim).
            y(Tensor(float32)):
                speaker label. Shape (B, ).    
        Returns:
            Tensor:
                Shape (style_dim, )
        """
        h = self.shared(z)
        out = []
        for layer in self.unshared:
            out += [layer(h)]
        # (B, num_domains, style_dim)
        out = paddle.stack(out, axis=1)
        idx = paddle.arange(y.shape[0])
        # (style_dim, )
        s = out[idx, y]
        return s

    def reset_parameters(self):
        self.apply(_reset_parameters)


class StyleEncoder(nn.Layer):
    def __init__(self,
                 dim_in: int=48,
                 style_dim: int=48,
                 num_domains: int=2,
                 max_conv_dim: int=384):
        super().__init__()
        blocks = []
        blocks += [
            nn.Conv2D(
                in_channels=1,
                out_channels=dim_in,
                kernel_size=3,
                stride=1,
                padding=1)
        ]
        repeat_num = 4
        for _ in range(repeat_num):
            dim_out = min(dim_in * 2, max_conv_dim)
            blocks += [
                ResBlk(dim_in=dim_in, dim_out=dim_out, downsample='half')
            ]
            dim_in = dim_out

        blocks += [nn.LeakyReLU(0.2)]
        blocks += [
            nn.Conv2D(
                in_channels=dim_out,
                out_channels=dim_out,
                kernel_size=5,
                stride=1,
                padding=0)
        ]
        blocks += [nn.AdaptiveAvgPool2D(1)]
        blocks += [nn.LeakyReLU(0.2)]
        self.shared = nn.Sequential(*blocks)
        self.unshared = nn.LayerList()
        for _ in range(num_domains):
            self.unshared.append(nn.Linear(dim_out, style_dim))

        self.reset_parameters()

    def forward(self, x: paddle.Tensor, y: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor(float32)): 
                Shape (B, 1, n_mels, T).   
            y(Tensor(float32)):
                speaker label. Shape (B, ).
        Returns:
            Tensor:
                Shape (style_dim, )
        """
        h = self.shared(x)
        h = h.reshape((h.shape[0], -1))
        out = []
        for layer in self.unshared:
            out += [layer(h)]
        # (B, num_domains, style_dim)
        out = paddle.stack(out, axis=1)
        idx = paddle.arange(y.shape[0])
        # (style_dim,)
        s = out[idx, y]
        return s

    def reset_parameters(self):
        self.apply(_reset_parameters)


class Discriminator(nn.Layer):
    def __init__(self,
                 dim_in: int=48,
                 num_domains: int=2,
                 max_conv_dim: int=384,
                 repeat_num: int=4):
        super().__init__()
        # real/fake discriminator
        self.dis = Discriminator2D(
            dim_in=dim_in,
            num_domains=num_domains,
            max_conv_dim=max_conv_dim,
            repeat_num=repeat_num)
        # adversarial classifier
        self.cls = Discriminator2D(
            dim_in=dim_in,
            num_domains=num_domains,
            max_conv_dim=max_conv_dim,
            repeat_num=repeat_num)
        self.num_domains = num_domains

        self.reset_parameters()

    def forward(self, x: paddle.Tensor, y: paddle.Tensor):
        """Calculate forward propagation.
        Args:
            x(Tensor(float32)):
                Shape (B, 1, 80, T).
            y(Tensor(float32)):
                Shape (B, ). 
        Returns:
            Tensor:
                Shape (B, )
        """
        out = self.dis(x, y)
        return out

    def classifier(self, x: paddle.Tensor):
        out = self.cls.get_feature(x)
        return out

    def reset_parameters(self):
        self.apply(_reset_parameters)


class Discriminator2D(nn.Layer):
    def __init__(self,
                 dim_in: int=48,
                 num_domains: int=2,
                 max_conv_dim: int=384,
                 repeat_num: int=4):
        super().__init__()
        blocks = []
        blocks += [
            nn.Conv2D(
                in_channels=1,
                out_channels=dim_in,
                kernel_size=3,
                stride=1,
                padding=1)
        ]

        for lid in range(repeat_num):
            dim_out = min(dim_in * 2, max_conv_dim)
            blocks += [ResBlk(dim_in, dim_out, downsample='half')]
            dim_in = dim_out

        blocks += [nn.LeakyReLU(0.2)]
        blocks += [
            nn.Conv2D(
                in_channels=dim_out,
                out_channels=dim_out,
                kernel_size=5,
                stride=1,
                padding=0)
        ]
        blocks += [nn.LeakyReLU(0.2)]
        blocks += [nn.AdaptiveAvgPool2D(1)]
        blocks += [
            nn.Conv2D(
                in_channels=dim_out,
                out_channels=num_domains,
                kernel_size=1,
                stride=1,
                padding=0)
        ]
        self.main = nn.Sequential(*blocks)

    def get_feature(self, x: paddle.Tensor):
        out = self.main(x)
        # (B, num_domains)
        out = out.reshape((out.shape[0], -1))
        return out

    def forward(self, x: paddle.Tensor, y: paddle.Tensor):
        out = self.get_feature(x)
        idx = paddle.arange(y.shape[0])
        # (B,) ?
        out = out[idx, y]
        return out


================================================
FILE: paddlespeech/t2s/models/starganv2_vc/starganv2_vc_updater.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import Any
from typing import Dict

from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from paddle.optimizer.lr import LRScheduler

from paddlespeech.t2s.models.starganv2_vc.losses import compute_d_loss
from paddlespeech.t2s.models.starganv2_vc.losses import compute_g_loss
from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
from paddlespeech.t2s.training.updaters.standard_updater import UpdaterState

logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class StarGANv2VCUpdater(StandardUpdater):
    def __init__(self,
                 models: Dict[str, Layer],
                 optimizers: Dict[str, Optimizer],
                 schedulers: Dict[str, LRScheduler],
                 dataloader: DataLoader,
                 g_loss_params: Dict[str, Any]={
                     'lambda_sty': 1.,
                     'lambda_cyc': 5.,
                     'lambda_ds': 1.,
                     'lambda_norm': 1.,
                     'lambda_asr': 10.,
                     'lambda_f0': 5.,
                     'lambda_f0_sty': 0.1,
                     'lambda_adv': 2.,
                     'lambda_adv_cls': 0.5,
                     'norm_bias': 0.5,
                 },
                 d_loss_params: Dict[str, Any]={
                     'lambda_reg': 1.,
                     'lambda_adv_cls': 0.1,
                     'lambda_con_reg': 10.,
                 },
                 adv_cls_epoch: int=50,
                 con_reg_epoch: int=30,
                 use_r1_reg: bool=False,
                 output_dir=None):
        self.models = models

        self.optimizers = optimizers
        self.optimizer_g = optimizers['generator']
        self.optimizer_s = optimizers['style_encoder']
        self.optimizer_m = optimizers['mapping_network']
        self.optimizer_d = optimizers['discriminator']

        self.schedulers = schedulers
        self.scheduler_g = schedulers['generator']
        self.scheduler_s = schedulers['style_encoder']
        self.scheduler_m = schedulers['mapping_network']
        self.scheduler_d = schedulers['discriminator']

        self.dataloader = dataloader

        self.g_loss_params = g_loss_params
        self.d_loss_params = d_loss_params

        self.use_r1_reg = use_r1_reg
        self.con_reg_epoch = con_reg_epoch
        self.adv_cls_epoch = adv_cls_epoch

        self.state = UpdaterState(iteration=0, epoch=0)
        self.train_iterator = iter(self.dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def zero_grad(self):
        self.optimizer_d.clear_grad()
        self.optimizer_g.clear_grad()
        self.optimizer_m.clear_grad()
        self.optimizer_s.clear_grad()

    def scheduler(self):
        self.scheduler_d.step()
        self.scheduler_g.step()
        self.scheduler_m.step()
        self.scheduler_s.step()

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}
        # parse batch
        x_real = batch['x_real']
        y_org = batch['y_org']
        x_ref = batch['x_ref']
        x_ref2 = batch['x_ref2']
        y_trg = batch['y_trg']
        z_trg = batch['z_trg']
        z_trg2 = batch['z_trg2']

        use_con_reg = (self.state.epoch >= self.con_reg_epoch)
        use_adv_cls = (self.state.epoch >= self.adv_cls_epoch)

        # Discriminator loss
        # train the discriminator (by random reference)
        self.zero_grad()
        random_d_loss = compute_d_loss(
            nets=self.models,
            x_real=x_real,
            y_org=y_org,
            y_trg=y_trg,
            z_trg=z_trg,
            use_adv_cls=use_adv_cls,
            use_con_reg=use_con_reg,
            **self.d_loss_params)
        random_d_loss.backward()
        self.optimizer_d.step()
        # train the discriminator (by target reference)
        self.zero_grad()
        target_d_loss = compute_d_loss(
            nets=self.models,
            x_real=x_real,
            y_org=y_org,
            y_trg=y_trg,
            x_ref=x_ref,
            use_adv_cls=use_adv_cls,
            use_con_reg=use_con_reg,
            **self.d_loss_params)
        target_d_loss.backward()
        self.optimizer_d.step()
        report("train/random_d_loss", float(random_d_loss))
        report("train/target_d_loss", float(target_d_loss))
        losses_dict["random_d_loss"] = float(random_d_loss)
        losses_dict["target_d_loss"] = float(target_d_loss)

        # Generator
        # train the generator (by random reference)
        self.zero_grad()
        random_g_loss = compute_g_loss(
            nets=self.models,
            x_real=x_real,
            y_org=y_org,
            y_trg=y_trg,
            z_trgs=[z_trg, z_trg2],
            use_adv_cls=use_adv_cls,
            **self.g_loss_params)
        random_g_loss.backward()
        self.optimizer_g.step()
        self.optimizer_m.step()
        self.optimizer_s.step()

        # train the generator (by target reference)
        self.zero_grad()
        target_g_loss = compute_g_loss(
            nets=self.models,
            x_real=x_real,
            y_org=y_org,
            y_trg=y_trg,
            x_refs=[x_ref, x_ref2],
            use_adv_cls=use_adv_cls,
            **self.g_loss_params)
        target_g_loss.backward()
        # 此处是否要 optimizer_g optimizer_m optimizer_s 都写上？
        # 源码没写上后两个是否是疏忽？
        self.optimizer_g.step()
        # self.optimizer_m.step()
        # self.optimizer_s.step()
        report("train/random_g_loss", float(random_g_loss))
        report("train/target_g_loss", float(target_g_loss))
        losses_dict["random_g_loss"] = float(random_g_loss)
        losses_dict["target_g_loss"] = float(target_g_loss)

        self.scheduler()

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class StarGANv2VCEvaluator(StandardEvaluator):
    def __init__(self,
                 models: Dict[str, Layer],
                 dataloader: DataLoader,
                 g_loss_params: Dict[str, Any]={
                     'lambda_sty': 1.,
                     'lambda_cyc': 5.,
                     'lambda_ds': 1.,
                     'lambda_norm': 1.,
                     'lambda_asr': 10.,
                     'lambda_f0': 5.,
                     'lambda_f0_sty': 0.1,
                     'lambda_adv': 2.,
                     'lambda_adv_cls': 0.5,
                     'norm_bias': 0.5,
                 },
                 d_loss_params: Dict[str, Any]={
                     'lambda_reg': 1.,
                     'lambda_adv_cls': 0.1,
                     'lambda_con_reg': 10.,
                 },
                 adv_cls_epoch: int=50,
                 con_reg_epoch: int=30,
                 use_r1_reg: bool=False,
                 output_dir=None):
        self.models = models

        self.dataloader = dataloader

        self.g_loss_params = g_loss_params
        self.d_loss_params = d_loss_params

        self.use_r1_reg = use_r1_reg
        self.con_reg_epoch = con_reg_epoch
        self.adv_cls_epoch = adv_cls_epoch

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        # logging.debug("Evaluate: ")
        self.msg = "Evaluate: "
        losses_dict = {}

        x_real = batch['x_real']
        y_org = batch['y_org']
        x_ref = batch['x_ref']
        x_ref2 = batch['x_ref2']
        y_trg = batch['y_trg']
        z_trg = batch['z_trg']
        z_trg2 = batch['z_trg2']

        # eval the discriminator

        random_d_loss = compute_d_loss(
            nets=self.models,
            x_real=x_real,
            y_org=y_org,
            y_trg=y_trg,
            z_trg=z_trg,
            use_r1_reg=self.use_r1_reg,
            use_adv_cls=use_adv_cls,
            **self.d_loss_params)

        target_d_loss = compute_d_loss(
            nets=self.models,
            x_real=x_real,
            y_org=y_org,
            y_trg=y_trg,
            x_ref=x_ref,
            use_r1_reg=self.use_r1_reg,
            use_adv_cls=use_adv_cls,
            **self.d_loss_params)

        report("eval/random_d_loss", float(random_d_loss))
        report("eval/target_d_loss", float(target_d_loss))
        losses_dict["random_d_loss"] = float(random_d_loss)
        losses_dict["target_d_loss"] = float(target_d_loss)

        # eval the generator

        random_g_loss = compute_g_loss(
            nets=self.models,
            x_real=x_real,
            y_org=y_org,
            y_trg=y_trg,
            z_trgs=[z_trg, z_trg2],
            use_adv_cls=use_adv_cls,
            **self.g_loss_params)

        target_g_loss = compute_g_loss(
            nets=self.models,
            x_real=x_real,
            y_org=y_org,
            y_trg=y_trg,
            x_refs=[x_ref, x_ref2],
            use_adv_cls=use_adv_cls,
            **self.g_loss_params)

        report("eval/random_g_loss", float(random_g_loss))
        report("eval/target_g_loss", float(target_g_loss))
        losses_dict["random_g_loss"] = float(random_g_loss)
        losses_dict["target_g_loss"] = float(target_g_loss)

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/starganv2_vc/transforms.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random

import numpy as np
import paddle
import paddle.nn.functional as F
from paddle import nn


## 1. RandomTimeStrech
class TimeStrech(nn.Layer):
    def __init__(self, scale):
        super().__init__()
        self.scale = scale

    def forward(self, x: paddle.Tensor):
        mel_size = x.shape[-1]

        x = F.interpolate(
            x,
            scale_factor=(1, self.scale),
            align_corners=False,
            mode='bilinear').squeeze()

        if x.shape[-1] < mel_size:
            noise_length = (mel_size - x.shape[-1])
            random_pos = random.randint(0, x.shape[-1]) - noise_length
            if random_pos < 0:
                random_pos = 0
            noise = x[..., random_pos:random_pos + noise_length]
            x = paddle.concat([x, noise], axis=-1)
        else:
            x = x[..., :mel_size]

        return x.unsqueeze(1)


## 2. PitchShift
class PitchShift(nn.Layer):
    def __init__(self, shift):
        super().__init__()
        self.shift = shift

    def forward(self, x: paddle.Tensor):
        if len(x.shape) == 2:
            x = x.unsqueeze(0)
        x = x.squeeze()
        mel_size = x.shape[1]
        shift_scale = (mel_size + self.shift) / mel_size
        x = F.interpolate(
            x.unsqueeze(1),
            scale_factor=(shift_scale, 1.),
            align_corners=False,
            mode='bilinear').squeeze(1)

        x = x[:, :mel_size]
        if x.shape[1] < mel_size:
            pad_size = mel_size - x.shape[1]
            x = paddle.cat(
                [x, paddle.zeros(x.shape[0], pad_size, x.shape[2])], axis=1)
        x = x.squeeze()
        return x.unsqueeze(1)


## 3. ShiftBias
class ShiftBias(nn.Layer):
    def __init__(self, bias):
        super().__init__()
        self.bias = bias

    def forward(self, x: paddle.Tensor):
        return x + self.bias


## 4. Scaling
class SpectScaling(nn.Layer):
    def __init__(self, scale):
        super().__init__()
        self.scale = scale

    def forward(self, x: paddle.Tensor):
        return x * self.scale


## 5. Time Flip
class TimeFlip(nn.Layer):
    def __init__(self, length):
        super().__init__()
        self.length = round(length)

    def forward(self, x: paddle.Tensor):
        if self.length > 1:
            start = np.random.randint(0, x.shape[-1] - self.length)
            x_ret = x.clone()
            x_ret[..., start:start + self.length] = paddle.flip(
                x[..., start:start + self.length], axis=[-1])
            x = x_ret
        return x


class PhaseShuffle2D(nn.Layer):
    def __init__(self, n: int=2):
        super().__init__()
        self.n = n
        self.random = random.Random(1)

    def forward(self, x: paddle.Tensor, move=None):
        # x.size = (B, C, M, L)
        if move is None:
            move = self.random.randint(-self.n, self.n)

        if move == 0:
            return x
        else:
            left = x[:, :, :, :move]
            right = x[:, :, :, move:]
            shuffled = paddle.concat([right, left], axis=3)

        return shuffled


def build_transforms():
    transforms = [
        lambda M: TimeStrech(1 + (np.random.random() - 0.5) * M * 0.2),
        lambda M: SpectScaling(1 + (np.random.random() - 1) * M * 0.1),
        lambda M: PhaseShuffle2D(192),
    ]
    N, M = len(transforms), np.random.random()
    composed = nn.Sequential(
        * [trans(M) for trans in np.random.choice(transforms, N)])
    return composed


================================================
FILE: paddlespeech/t2s/models/tacotron2/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .tacotron2 import *
from .tacotron2_updater import *


================================================
FILE: paddlespeech/t2s/models/tacotron2/tacotron2.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Tacotron 2 related modules for paddle"""
import logging
from typing import Dict
from typing import Optional
from typing import Tuple

import paddle
import paddle.nn.functional as F
from paddle import nn
from typeguard import typechecked

from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.nets_utils import make_pad_mask
from paddlespeech.t2s.modules.tacotron2.attentions import AttForward
from paddlespeech.t2s.modules.tacotron2.attentions import AttForwardTA
from paddlespeech.t2s.modules.tacotron2.attentions import AttLoc
from paddlespeech.t2s.modules.tacotron2.decoder import Decoder
from paddlespeech.t2s.modules.tacotron2.encoder import Encoder


class Tacotron2(nn.Layer):
    """Tacotron2 module for end-to-end text-to-speech.

    This is a module of Spectrogram prediction network in Tacotron2 described
    in `Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions`_,
    which converts the sequence of characters into the sequence of Mel-filterbanks.

    .. _`Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions`:
       https://arxiv.org/abs/1712.05884

    """

    @typechecked
    def __init__(
            self,
            # network structure related
            idim: int,
            odim: int,
            embed_dim: int=512,
            elayers: int=1,
            eunits: int=512,
            econv_layers: int=3,
            econv_chans: int=512,
            econv_filts: int=5,
            atype: str="location",
            adim: int=512,
            aconv_chans: int=32,
            aconv_filts: int=15,
            cumulate_att_w: bool=True,
            dlayers: int=2,
            dunits: int=1024,
            prenet_layers: int=2,
            prenet_units: int=256,
            postnet_layers: int=5,
            postnet_chans: int=512,
            postnet_filts: int=5,
            output_activation: Optional[str]=None,
            use_batch_norm: bool=True,
            use_concate: bool=True,
            use_residual: bool=False,
            reduction_factor: int=1,
            # extra embedding related
            spk_num: Optional[int]=None,
            lang_num: Optional[int]=None,
            spk_embed_dim: Optional[int]=None,
            spk_embed_integration_type: str="concat",
            dropout_rate: float=0.5,
            zoneout_rate: float=0.1,
            # training related
            init_type: str="xavier_uniform", ):
        """Initialize Tacotron2 module.
        Args:
            idim (int): 
                Dimension of the inputs.
            odim (int): 
                Dimension of the outputs.
            embed_dim (int): 
                Dimension of the token embedding.
            elayers (int): 
                Number of encoder blstm layers.
            eunits (int): 
                Number of encoder blstm units.
            econv_layers (int): 
                Number of encoder conv layers.
            econv_filts (int): 
                Number of encoder conv filter size.
            econv_chans (int): 
                Number of encoder conv filter channels.
            dlayers (int): 
                Number of decoder lstm layers.
            dunits (int): 
                Number of decoder lstm units.
            prenet_layers (int): 
                Number of prenet layers.
            prenet_units (int): 
                Number of prenet units.
            postnet_layers (int): 
                Number of postnet layers.
            postnet_filts (int): 
                Number of postnet filter size.
            postnet_chans (int): 
                Number of postnet filter channels.
            output_activation (str): 
                Name of activation function for outputs.
            adim (int): 
                Number of dimension of mlp in attention.
            aconv_chans (int): 
                Number of attention conv filter channels.
            aconv_filts (int): 
                Number of attention conv filter size.
            cumulate_att_w (bool): 
                Whether to cumulate previous attention weight.
            use_batch_norm (bool): 
                Whether to use batch normalization.
            use_concate (bool): 
                Whether to concat enc outputs w/ dec lstm outputs.
            reduction_factor (int): 
                Reduction factor.
            spk_num (Optional[int]): 
                Number of speakers. If set to > 1, assume that the
                sids will be provided as the input and use sid embedding layer.
            lang_num (Optional[int]): 
                Number of languages. If set to > 1, assume that the
                lids will be provided as the input and use sid embedding layer.
            spk_embed_dim (Optional[int]): 
                Speaker embedding dimension. If set to > 0,
                assume that spk_emb will be provided as the input.
            spk_embed_integration_type (str): 
                How to integrate speaker embedding.
            dropout_rate (float): 
                Dropout rate.
            zoneout_rate (float): 
                Zoneout rate.
        """
        super().__init__()

        # store hyperparameters
        self.idim = idim
        self.odim = odim
        self.eos = idim - 1
        self.cumulate_att_w = cumulate_att_w
        self.reduction_factor = reduction_factor

        # define activation function for the final output
        if output_activation is None:
            self.output_activation_fn = None
        elif hasattr(F, output_activation):
            self.output_activation_fn = getattr(F, output_activation)
        else:
            raise ValueError(f"there is no such an activation function. "
                             f"({output_activation})")

        # set padding idx
        padding_idx = 0
        self.padding_idx = padding_idx

        # initialize parameters
        initialize(self, init_type)

        # define network modules
        self.enc = Encoder(
            idim=idim,
            embed_dim=embed_dim,
            elayers=elayers,
            eunits=eunits,
            econv_layers=econv_layers,
            econv_chans=econv_chans,
            econv_filts=econv_filts,
            use_batch_norm=use_batch_norm,
            use_residual=use_residual,
            dropout_rate=dropout_rate,
            padding_idx=padding_idx, )

        self.spk_num = None
        if spk_num is not None and spk_num > 1:
            self.spk_num = spk_num
            self.sid_emb = nn.Embedding(spk_num, eunits)
        self.lang_num = None
        if lang_num is not None and lang_num > 1:
            self.lang_num = lang_num
            self.lid_emb = nn.Embedding(lang_num, eunits)

        self.spk_embed_dim = None
        if spk_embed_dim is not None and spk_embed_dim > 0:
            self.spk_embed_dim = spk_embed_dim
            self.spk_embed_integration_type = spk_embed_integration_type
        if self.spk_embed_dim is None:
            dec_idim = eunits
        elif self.spk_embed_integration_type == "concat":
            dec_idim = eunits + spk_embed_dim
        elif self.spk_embed_integration_type == "add":
            dec_idim = eunits
            self.projection = nn.Linear(self.spk_embed_dim, eunits)
        else:
            raise ValueError(f"{spk_embed_integration_type} is not supported.")

        if atype == "location":
            att = AttLoc(dec_idim, dunits, adim, aconv_chans, aconv_filts)
        elif atype == "forward":
            att = AttForward(dec_idim, dunits, adim, aconv_chans, aconv_filts)
            if self.cumulate_att_w:
                logging.warning("cumulation of attention weights is disabled "
                                "in forward attention.")
                self.cumulate_att_w = False
        elif atype == "forward_ta":
            att = AttForwardTA(dec_idim, dunits, adim, aconv_chans, aconv_filts,
                               odim)
            if self.cumulate_att_w:
                logging.warning("cumulation of attention weights is disabled "
                                "in forward attention.")
                self.cumulate_att_w = False
        else:
            raise NotImplementedError("Support only location or forward")
        self.dec = Decoder(
            idim=dec_idim,
            odim=odim,
            att=att,
            dlayers=dlayers,
            dunits=dunits,
            prenet_layers=prenet_layers,
            prenet_units=prenet_units,
            postnet_layers=postnet_layers,
            postnet_chans=postnet_chans,
            postnet_filts=postnet_filts,
            output_activation_fn=self.output_activation_fn,
            cumulate_att_w=self.cumulate_att_w,
            use_batch_norm=use_batch_norm,
            use_concate=use_concate,
            dropout_rate=dropout_rate,
            zoneout_rate=zoneout_rate,
            reduction_factor=reduction_factor, )

        nn.initializer.set_global_initializer(None)

    def forward(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            spk_emb: Optional[paddle.Tensor]=None,
            spk_id: Optional[paddle.Tensor]=None,
            lang_id: Optional[paddle.Tensor]=None
    ) -> Tuple[paddle.Tensor, Dict[str, paddle.Tensor], paddle.Tensor]:
        """Calculate forward propagation.

        Args:
            text (Tensor(int64)):   
                Batch of padded character ids (B, T_text).
            text_lengths (Tensor(int64)): 
                Batch of lengths of each input batch (B,).
            speech (Tensor):
                 Batch of padded target features (B, T_feats, odim).
            speech_lengths (Tensor(int64)): 
                Batch of the lengths of each target (B,).
            spk_emb (Optional[Tensor]): 
                Batch of speaker embeddings (B, spk_embed_dim).
            spk_id (Optional[Tensor]): 
                Batch of speaker IDs (B, 1).
            lang_id (Optional[Tensor]): 
                Batch of language IDs (B, 1).

        Returns:
            Tensor: 
                Loss scalar value.
            Dict: 
                Statistics to be monitored.
            Tensor: 
                Weight value if not joint training else model outputs.

        """
        text = text[:, :text_lengths.max()]
        speech = speech[:, :speech_lengths.max()]

        batch_size = paddle.shape(text)[0]

        # Add eos at the last of sequence
        xs = F.pad(text, [0, 0, 0, 1], "constant", self.padding_idx)
        for i, l in enumerate(text_lengths):
            xs[i, l] = self.eos
        ilens = text_lengths + 1

        ys = speech
        olens = speech_lengths

        # make labels for stop prediction
        stop_labels = make_pad_mask(olens - 1)
        # bool 类型无法切片
        stop_labels = paddle.cast(stop_labels, dtype='float32')
        stop_labels = F.pad(stop_labels, [0, 0, 0, 1], "constant", 1.0)

        # calculate tacotron2 outputs
        after_outs, before_outs, logits, att_ws = self._forward(
            xs=xs,
            ilens=ilens,
            ys=ys,
            olens=olens,
            spk_emb=spk_emb,
            spk_id=spk_id,
            lang_id=lang_id, )

        # modify mod part of groundtruth
        if self.reduction_factor > 1:
            assert olens.ge(self.reduction_factor).all(
            ), "Output length must be greater than or equal to reduction factor."
            olens = olens - olens % self.reduction_factor
            max_out = max(olens)
            ys = ys[:, :max_out]
            stop_labels = stop_labels[:, :max_out]
            stop_labels = paddle.scatter(stop_labels, 1,
                                         (olens - 1).unsqueeze(1), 1.0)
            olens_in = olens // self.reduction_factor
        else:
            olens_in = olens
        return after_outs, before_outs, logits, ys, stop_labels, olens, att_ws, olens_in

    def _forward(
            self,
            xs: paddle.Tensor,
            ilens: paddle.Tensor,
            ys: paddle.Tensor,
            olens: paddle.Tensor,
            spk_emb: paddle.Tensor,
            spk_id: paddle.Tensor,
            lang_id: paddle.Tensor,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:

        hs, hlens = self.enc(xs, ilens)
        if self.spk_num is not None:
            sid_embs = self.sid_emb(spk_id.reshape([-1]))
            hs = hs + sid_embs.unsqueeze(1)
        if self.lang_num is not None:
            lid_embs = self.lid_emb(lang_id.reshape([-1]))
            hs = hs + lid_embs.unsqueeze(1)
        if self.spk_embed_dim is not None:
            hs = self._integrate_with_spk_embed(hs, spk_emb)

        return self.dec(hs, hlens, ys)

    def inference(
            self,
            text: paddle.Tensor,
            speech: Optional[paddle.Tensor]=None,
            spk_emb: Optional[paddle.Tensor]=None,
            spk_id: Optional[paddle.Tensor]=None,
            lang_id: Optional[paddle.Tensor]=None,
            threshold: float=0.5,
            minlenratio: float=0.0,
            maxlenratio: float=10.0,
            use_att_constraint: bool=False,
            backward_window: int=1,
            forward_window: int=3,
            use_teacher_forcing: bool=False, ) -> Dict[str, paddle.Tensor]:
        """Generate the sequence of features given the sequences of characters.

        Args:
            text (Tensor(int64)): 
                Input sequence of characters (T_text,).
            speech (Optional[Tensor]): 
                Feature sequence to extract style (N, idim).
            spk_emb (ptional[Tensor]): 
                Speaker embedding (spk_embed_dim,).
            spk_id (Optional[Tensor]): 
                Speaker ID (1,).
            lang_id (Optional[Tensor]): 
                Language ID (1,).
            threshold (float): 
                Threshold in inference.
            minlenratio (float): 
                Minimum length ratio in inference.
            maxlenratio (float): 
                Maximum length ratio in inference.
            use_att_constraint (bool): 
                Whether to apply attention constraint.
            backward_window (int): 
                Backward window in attention constraint.
            forward_window (int): 
                    Forward window in attention constraint.
            use_teacher_forcing (bool): 
                Whether to use teacher forcing.

        Returns:
            Dict[str, Tensor]
            Output dict including the following items:
                * feat_gen (Tensor): Output sequence of features (T_feats, odim).
                * prob (Tensor): Output sequence of stop probabilities (T_feats,).
                * att_w (Tensor): Attention weights (T_feats, T).

        """
        x = text
        y = speech

        # add eos at the last of sequence
        x = F.pad(x, [0, 1], "constant", self.eos)

        # inference with teacher forcing
        if use_teacher_forcing:
            assert speech is not None, "speech must be provided with teacher forcing."

            xs, ys = x.unsqueeze(0), y.unsqueeze(0)
            spk_emb = None if spk_emb is None else spk_emb.unsqueeze(0)
            ilens = paddle.shape(xs)[1]
            olens = paddle.shape(ys)[1]
            outs, _, _, att_ws = self._forward(
                xs=xs,
                ilens=ilens,
                ys=ys,
                olens=olens,
                spk_emb=spk_emb,
                spk_id=spk_id,
                lang_id=lang_id, )

            return dict(feat_gen=outs[0], att_w=att_ws[0])

        # inference
        h = self.enc.inference(x)

        if self.spk_num is not None:
            sid_emb = self.sid_emb(spk_id.reshape([-1]))
            h = h + sid_emb
        if self.lang_num is not None:
            lid_emb = self.lid_emb(lang_id.reshape([-1]))
            h = h + lid_emb
        if self.spk_embed_dim is not None:
            hs, spk_emb = h.unsqueeze(0), spk_emb.unsqueeze(0)
            h = self._integrate_with_spk_embed(hs, spk_emb)[0]
        out, prob, att_w = self.dec.inference(
            h,
            threshold=threshold,
            minlenratio=minlenratio,
            maxlenratio=maxlenratio,
            use_att_constraint=use_att_constraint,
            backward_window=backward_window,
            forward_window=forward_window, )

        return dict(feat_gen=out, prob=prob, att_w=att_w)

    def _integrate_with_spk_embed(self,
                                  hs: paddle.Tensor,
                                  spk_emb: paddle.Tensor) -> paddle.Tensor:
        """Integrate speaker embedding with hidden states.

        Args:
            hs (Tensor): Batch of hidden state sequences (B, Tmax, eunits).
            spk_emb (Tensor): Batch of speaker embeddings (B, spk_embed_dim).

        Returns:
            Tensor: Batch of integrated hidden state sequences (B, Tmax, eunits) if
                integration_type is "add" else (B, Tmax, eunits + spk_embed_dim).

        """
        if self.spk_embed_integration_type == "add":
            # apply projection and then add to hidden states
            spk_emb = self.projection(F.normalize(spk_emb))
            hs = hs + spk_emb.unsqueeze(1)
        elif self.spk_embed_integration_type == "concat":
            # concat hidden states with spk embeds
            spk_emb = F.normalize(spk_emb).unsqueeze(1).expand(
                shape=[-1, paddle.shape(hs)[1], -1])
            hs = paddle.concat([hs, spk_emb], axis=-1)
        else:
            raise NotImplementedError("support only add or concat.")

        return hs


class Tacotron2Inference(nn.Layer):
    def __init__(self, normalizer, model):
        super().__init__()
        self.normalizer = normalizer
        self.acoustic_model = model

    def forward(self, text, spk_id=None, spk_emb=None):
        out = self.acoustic_model.inference(
            text, spk_id=spk_id, spk_emb=spk_emb)
        normalized_mel = out["feat_gen"]
        logmel = self.normalizer.inverse(normalized_mel)
        return logmel


================================================
FILE: paddlespeech/t2s/models/tacotron2/tacotron2_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path

from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer

from paddlespeech.t2s.modules.losses import GuidedAttentionLoss
from paddlespeech.t2s.modules.losses import Tacotron2Loss
from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class Tacotron2Updater(StandardUpdater):
    def __init__(self,
                 model: Layer,
                 optimizer: Optimizer,
                 dataloader: DataLoader,
                 init_state=None,
                 use_masking: bool=True,
                 use_weighted_masking: bool=False,
                 bce_pos_weight: float=5.0,
                 loss_type: str="L1+L2",
                 use_guided_attn_loss: bool=True,
                 guided_attn_loss_sigma: float=0.4,
                 guided_attn_loss_lambda: float=1.0,
                 output_dir: Path=None):
        super().__init__(model, optimizer, dataloader, init_state=None)

        self.loss_type = loss_type
        self.use_guided_attn_loss = use_guided_attn_loss

        self.taco2_loss = Tacotron2Loss(
            use_masking=use_masking,
            use_weighted_masking=use_weighted_masking,
            bce_pos_weight=bce_pos_weight, )
        if self.use_guided_attn_loss:
            self.attn_loss = GuidedAttentionLoss(
                sigma=guided_attn_loss_sigma,
                alpha=guided_attn_loss_lambda, )

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}
        # spk_id!=None in multiple spk fastspeech2 
        spk_id = batch["spk_id"] if "spk_id" in batch else None
        spk_emb = batch["spk_emb"] if "spk_emb" in batch else None
        if spk_emb is not None:
            spk_id = None

        after_outs, before_outs, logits, ys, stop_labels, olens, att_ws, olens_in = self.model(
            text=batch["text"],
            text_lengths=batch["text_lengths"],
            speech=batch["speech"],
            speech_lengths=batch["speech_lengths"],
            spk_id=spk_id,
            spk_emb=spk_emb)

        # calculate taco2 loss
        l1_loss, mse_loss, bce_loss = self.taco2_loss(
            after_outs=after_outs,
            before_outs=before_outs,
            logits=logits,
            ys=ys,
            stop_labels=stop_labels,
            olens=olens)

        if self.loss_type == "L1+L2":
            loss = l1_loss + mse_loss + bce_loss
        elif self.loss_type == "L1":
            loss = l1_loss + bce_loss
        elif self.loss_type == "L2":
            loss = mse_loss + bce_loss
        else:
            raise ValueError(f"unknown --loss-type {self.loss_type}")

        # calculate attention loss
        if self.use_guided_attn_loss:
            # NOTE: length of output for auto-regressive
            # input will be changed when r > 1
            attn_loss = self.attn_loss(
                att_ws=att_ws, ilens=batch["text_lengths"] + 1, olens=olens_in)
            loss = loss + attn_loss

        optimizer = self.optimizer
        optimizer.clear_grad()
        loss.backward()
        optimizer.step()

        if self.use_guided_attn_loss:
            report("train/attn_loss", float(attn_loss))
            losses_dict["attn_loss"] = float(attn_loss)
        
        report("train/l1_loss", float(l1_loss))
        report("train/mse_loss", float(mse_loss))
        report("train/bce_loss", float(bce_loss))
        report("train/loss", float(loss))

        losses_dict["l1_loss"] = float(l1_loss)
        losses_dict["mse_loss"] = float(mse_loss)
        losses_dict["bce_loss"] = float(bce_loss)
        losses_dict["loss"] = float(loss)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class Tacotron2Evaluator(StandardEvaluator):
    def __init__(self,
                 model: Layer,
                 dataloader: DataLoader,
                 use_masking: bool=True,
                 use_weighted_masking: bool=False,
                 bce_pos_weight: float=5.0,
                 loss_type: str="L1+L2",
                 use_guided_attn_loss: bool=True,
                 guided_attn_loss_sigma: float=0.4,
                 guided_attn_loss_lambda: float=1.0,
                 output_dir=None):
        super().__init__(model, dataloader)

        self.loss_type = loss_type
        self.use_guided_attn_loss = use_guided_attn_loss

        self.taco2_loss = Tacotron2Loss(
            use_masking=use_masking,
            use_weighted_masking=use_weighted_masking,
            bce_pos_weight=bce_pos_weight, )
        if self.use_guided_attn_loss:
            self.attn_loss = GuidedAttentionLoss(
                sigma=guided_attn_loss_sigma,
                alpha=guided_attn_loss_lambda, )

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        self.msg = "Evaluate: "
        losses_dict = {}
        # spk_id!=None in multiple spk fastspeech2 
        spk_id = batch["spk_id"] if "spk_id" in batch else None
        spk_emb = batch["spk_emb"] if "spk_emb" in batch else None
        if spk_emb is not None:
            spk_id = None

        after_outs, before_outs, logits, ys, stop_labels, olens, att_ws, olens_in = self.model(
            text=batch["text"],
            text_lengths=batch["text_lengths"],
            speech=batch["speech"],
            speech_lengths=batch["speech_lengths"],
            spk_id=spk_id,
            spk_emb=spk_emb)

        # calculate taco2 loss
        l1_loss, mse_loss, bce_loss = self.taco2_loss(
            after_outs=after_outs,
            before_outs=before_outs,
            logits=logits,
            ys=ys,
            stop_labels=stop_labels,
            olens=olens)

        if self.loss_type == "L1+L2":
            loss = l1_loss + mse_loss + bce_loss
        elif self.loss_type == "L1":
            loss = l1_loss + bce_loss
        elif self.loss_type == "L2":
            loss = mse_loss + bce_loss
        else:
            raise ValueError(f"unknown --loss-type {self.loss_type}")

        # calculate attention loss
        if self.use_guided_attn_loss:
            # NOTE: length of output for auto-regressive
            # input will be changed when r > 1
            attn_loss = self.attn_loss(
                att_ws=att_ws, ilens=batch["text_lengths"] + 1, olens=olens_in)
            loss = loss + attn_loss
        
        if self.use_guided_attn_loss:
            report("eval/attn_loss", float(attn_loss))
            losses_dict["attn_loss"] = float(attn_loss)

        report("eval/l1_loss", float(l1_loss))
        report("eval/mse_loss", float(mse_loss))
        report("eval/bce_loss", float(bce_loss))
        report("eval/loss", float(loss))

        losses_dict["l1_loss"] = float(l1_loss)
        losses_dict["mse_loss"] = float(mse_loss)
        losses_dict["bce_loss"] = float(bce_loss)
        losses_dict["loss"] = float(loss)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/transformer_tts/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .transformer_tts import *
from .transformer_tts_updater import *


================================================
FILE: paddlespeech/t2s/models/transformer_tts/transformer_tts.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Fastspeech2 related modules for paddle"""
from optparse import Option
from typing import Dict
from typing import Optional
from typing import Sequence
from typing import Tuple

import numpy
import paddle
import paddle.nn.functional as F
from paddle import nn
from typeguard import typechecked

from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask
from paddlespeech.t2s.modules.nets_utils import make_pad_mask
from paddlespeech.t2s.modules.style_encoder import StyleEncoder
from paddlespeech.t2s.modules.tacotron2.decoder import Postnet
from paddlespeech.t2s.modules.tacotron2.decoder import Prenet as DecoderPrenet
from paddlespeech.t2s.modules.tacotron2.encoder import Encoder as EncoderPrenet
from paddlespeech.t2s.modules.transformer.attention import MultiHeadedAttention
from paddlespeech.t2s.modules.transformer.decoder import Decoder
from paddlespeech.t2s.modules.transformer.embedding import PositionalEncoding
from paddlespeech.t2s.modules.transformer.embedding import ScaledPositionalEncoding
from paddlespeech.t2s.modules.transformer.encoder import TransformerEncoder
from paddlespeech.t2s.modules.transformer.mask import subsequent_mask


class TransformerTTS(nn.Layer):
    """TTS-Transformer module.

    This is a module of text-to-speech Transformer described in `Neural Speech Synthesis
    with Transformer Network`_, which convert the sequence of tokens into the sequence
    of Mel-filterbanks.

    .. _`Neural Speech Synthesis with Transformer Network`:
        https://arxiv.org/pdf/1809.08895.pdf

    Args:
        idim (int): 
            Dimension of the inputs.
        odim (int): 
            Dimension of the outputs.
        embed_dim (int, optional): 
            Dimension of character embedding.
        eprenet_conv_layers (int, optional): 
            Number of encoder prenet convolution layers.
        eprenet_conv_chans (int, optional): 
            Number of encoder prenet convolution channels.
        eprenet_conv_filts (int, optional): 
            Filter size of encoder prenet convolution.
        dprenet_layers (int, optional): 
            Number of decoder prenet layers.
        dprenet_units (int, optional): 
            Number of decoder prenet hidden units.
        elayers (int, optional): 
            Number of encoder layers.
        eunits (int, optional): 
            Number of encoder hidden units.
        adim (int, optional): 
            Number of attention transformation dimensions.
        aheads (int, optional): 
            Number of heads for multi head attention.
        dlayers (int, optional): 
            Number of decoder layers.
        dunits (int, optional): 
            Number of decoder hidden units.
        postnet_layers (int, optional): 
            Number of postnet layers.
        postnet_chans (int, optional): 
            Number of postnet channels.
        postnet_filts (int, optional): 
            Filter size of postnet.
        use_scaled_pos_enc (pool, optional): 
            Whether to use trainable scaled positional encoding.
        use_batch_norm (bool, optional): 
            Whether to use batch normalization in encoder prenet.
        encoder_normalize_before (bool, optional): 
            Whether to perform layer normalization before encoder block.
        decoder_normalize_before (bool, optional): 
            Whether to perform layer normalization before decoder block.
        encoder_concat_after (bool, optional): 
            Whether to concatenate attention layer's input and output in encoder.
        decoder_concat_after (bool, optional): 
            Whether to concatenate attention layer's input and output in decoder.
        positionwise_layer_type (str, optional): 
            Position-wise operation type.
        positionwise_conv_kernel_size (int, optional): 
            Kernel size in position wise conv 1d.
        reduction_factor (int, optional): 
            Reduction factor.
        spk_embed_dim (int, optional): 
            Number of speaker embedding dimenstions.
        spk_embed_integration_type (str, optional): 
            How to integrate speaker embedding.
        use_gst (str, optional): 
            Whether to use global style token.
        gst_tokens (int, optional): 
            The number of GST embeddings.
        gst_heads (int, optional): 
            The number of heads in GST multihead attention.
        gst_conv_layers (int, optional): 
            The number of conv layers in GST.
        gst_conv_chans_list (Sequence[int], optional): 
            List of the number of channels of conv layers in GST.
        gst_conv_kernel_size (int, optional): 
            Kernal size of conv layers in GST.
        gst_conv_stride (int, optional): 
            Stride size of conv layers in GST.
        gst_gru_layers (int, optional): 
            The number of GRU layers in GST.
        gst_gru_units (int, optional): 
            The number of GRU units in GST.
        transformer_lr (float, optional): 
            Initial value of learning rate.
        transformer_warmup_steps (int, optional): 
            Optimizer warmup steps.
        transformer_enc_dropout_rate (float, optional): 
            Dropout rate in encoder except attention and positional encoding.
        transformer_enc_positional_dropout_rate (float, optional): 
            Dropout rate after encoder positional encoding.
        transformer_enc_attn_dropout_rate （float, optional): 
            Dropout rate in encoder self-attention module.
        transformer_dec_dropout_rate (float, optional): 
            Dropout rate in decoder except attention & positional encoding.
        transformer_dec_positional_dropout_rate (float, optional): 
            Dropout rate after decoder positional encoding.
        transformer_dec_attn_dropout_rate （float, optional): 
            Dropout rate in deocoder self-attention module.
        transformer_enc_dec_attn_dropout_rate (float, optional): 
            Dropout rate in encoder-deocoder attention module.
        init_type (str, optional): 
            How to initialize transformer parameters.
        init_enc_alpha （float, optional）: 
            Initial value of alpha in scaled pos encoding of the encoder.
        init_dec_alpha (float, optional): 
            Initial value of alpha in scaled pos encoding of the decoder.
        eprenet_dropout_rate (float, optional): 
            Dropout rate in encoder prenet.
        dprenet_dropout_rate (float, optional): 
            Dropout rate in decoder prenet.
        postnet_dropout_rate (float, optional): 
            Dropout rate in postnet.
        use_masking (bool, optional): 
            Whether to apply masking for padded part in loss calculation.
        use_weighted_masking (bool, optional): 
            Whether to apply weighted masking in loss calculation.
        bce_pos_weight (float, optional): 
            Positive sample weight in bce calculation (only for use_masking=true).
        loss_type (str, optional): 
            How to calculate loss.
        use_guided_attn_loss (bool, optional): 
            Whether to use guided attention loss.
        num_heads_applied_guided_attn (int, optional):
            Number of heads in each layer to apply guided attention loss.
        num_layers_applied_guided_attn (int, optional): 
            Number of layers to apply guided attention loss.
    """

    @typechecked
    def __init__(
            self,
            # network structure related
            idim: int,
            odim: int,
            embed_dim: int=512,
            eprenet_conv_layers: int=3,
            eprenet_conv_chans: int=256,
            eprenet_conv_filts: int=5,
            dprenet_layers: int=2,
            dprenet_units: int=256,
            elayers: int=6,
            eunits: int=1024,
            adim: int=512,
            aheads: int=4,
            dlayers: int=6,
            dunits: int=1024,
            postnet_layers: int=5,
            postnet_chans: int=256,
            postnet_filts: int=5,
            positionwise_layer_type: str="conv1d",
            positionwise_conv_kernel_size: int=1,
            use_scaled_pos_enc: bool=True,
            use_batch_norm: bool=True,
            encoder_normalize_before: bool=True,
            decoder_normalize_before: bool=True,
            encoder_concat_after: bool=False,
            decoder_concat_after: bool=False,
            reduction_factor: int=1,
            spk_embed_dim: Optional[int]=None,
            spk_embed_integration_type: str="add",
            use_gst: bool=False,
            gst_tokens: int=10,
            gst_heads: int=4,
            gst_conv_layers: int=6,
            gst_conv_chans_list: Sequence[int]=(32, 32, 64, 64, 128, 128),
            gst_conv_kernel_size: int=3,
            gst_conv_stride: int=2,
            gst_gru_layers: int=1,
            gst_gru_units: int=128,
            # training related
            transformer_enc_dropout_rate: float=0.1,
            transformer_enc_positional_dropout_rate: float=0.1,
            transformer_enc_attn_dropout_rate: float=0.1,
            transformer_dec_dropout_rate: float=0.1,
            transformer_dec_positional_dropout_rate: float=0.1,
            transformer_dec_attn_dropout_rate: float=0.1,
            transformer_enc_dec_attn_dropout_rate: float=0.1,
            eprenet_dropout_rate: float=0.5,
            dprenet_dropout_rate: float=0.5,
            postnet_dropout_rate: float=0.5,
            init_type: str="xavier_uniform",
            init_enc_alpha: float=1.0,
            init_dec_alpha: float=1.0,
            use_guided_attn_loss: bool=True,
            num_heads_applied_guided_attn: int=2,
            num_layers_applied_guided_attn: int=2, ):
        """Initialize Transformer module."""

        super().__init__()

        # store hyperparameters
        self.idim = idim
        self.odim = odim
        self.eos = idim - 1
        self.spk_embed_dim = spk_embed_dim
        self.reduction_factor = reduction_factor
        self.use_gst = use_gst
        self.use_scaled_pos_enc = use_scaled_pos_enc
        self.use_guided_attn_loss = use_guided_attn_loss
        if self.use_guided_attn_loss:
            if num_layers_applied_guided_attn == -1:
                self.num_layers_applied_guided_attn = elayers
            else:
                self.num_layers_applied_guided_attn = num_layers_applied_guided_attn
            if num_heads_applied_guided_attn == -1:
                self.num_heads_applied_guided_attn = aheads
            else:
                self.num_heads_applied_guided_attn = num_heads_applied_guided_attn
        if self.spk_embed_dim is not None:
            self.spk_embed_integration_type = spk_embed_integration_type

        # use idx 0 as padding idx
        self.padding_idx = 0
        # set_global_initializer 会影响后面的全局，包括 create_parameter
        initialize(self, init_type)

        # get positional encoding layer type
        transformer_pos_enc_layer_type = "scaled_abs_pos" if self.use_scaled_pos_enc else "abs_pos"

        # define transformer encoder
        if eprenet_conv_layers != 0:
            # encoder prenet
            encoder_input_layer = nn.Sequential(
                EncoderPrenet(
                    idim=idim,
                    embed_dim=embed_dim,
                    elayers=0,
                    econv_layers=eprenet_conv_layers,
                    econv_chans=eprenet_conv_chans,
                    econv_filts=eprenet_conv_filts,
                    use_batch_norm=use_batch_norm,
                    dropout_rate=eprenet_dropout_rate,
                    padding_idx=self.padding_idx, ),
                nn.Linear(eprenet_conv_chans, adim), )
        else:
            encoder_input_layer = nn.Embedding(
                num_embeddings=idim,
                embedding_dim=adim,
                padding_idx=self.padding_idx)
        self.encoder = TransformerEncoder(
            idim=idim,
            attention_dim=adim,
            attention_heads=aheads,
            linear_units=eunits,
            num_blocks=elayers,
            input_layer=encoder_input_layer,
            dropout_rate=transformer_enc_dropout_rate,
            positional_dropout_rate=transformer_enc_positional_dropout_rate,
            attention_dropout_rate=transformer_enc_attn_dropout_rate,
            pos_enc_layer_type=transformer_pos_enc_layer_type,
            normalize_before=encoder_normalize_before,
            concat_after=encoder_concat_after,
            positionwise_layer_type=positionwise_layer_type,
            positionwise_conv_kernel_size=positionwise_conv_kernel_size, )

        # define GST
        if self.use_gst:
            self.gst = StyleEncoder(
                idim=odim,  # the input is mel-spectrogram
                gst_tokens=gst_tokens,
                gst_token_dim=adim,
                gst_heads=gst_heads,
                conv_layers=gst_conv_layers,
                conv_chans_list=gst_conv_chans_list,
                conv_kernel_size=gst_conv_kernel_size,
                conv_stride=gst_conv_stride,
                gru_layers=gst_gru_layers,
                gru_units=gst_gru_units, )

        # define projection layer
        if self.spk_embed_dim is not None:
            if self.spk_embed_integration_type == "add":
                self.projection = nn.Linear(self.spk_embed_dim, adim)
            else:
                self.projection = nn.Linear(adim + self.spk_embed_dim, adim)

        # define transformer decoder
        if dprenet_layers != 0:
            # decoder prenet
            decoder_input_layer = nn.Sequential(
                DecoderPrenet(
                    idim=odim,
                    n_layers=dprenet_layers,
                    n_units=dprenet_units,
                    dropout_rate=dprenet_dropout_rate, ),
                nn.Linear(dprenet_units, adim), )
        else:
            decoder_input_layer = "linear"
        # get positional encoding class
        pos_enc_class = (ScaledPositionalEncoding
                         if self.use_scaled_pos_enc else PositionalEncoding)
        self.decoder = Decoder(
            odim=odim,  # odim is needed when no prenet is used
            attention_dim=adim,
            attention_heads=aheads,
            linear_units=dunits,
            num_blocks=dlayers,
            dropout_rate=transformer_dec_dropout_rate,
            positional_dropout_rate=transformer_dec_positional_dropout_rate,
            self_attention_dropout_rate=transformer_dec_attn_dropout_rate,
            src_attention_dropout_rate=transformer_enc_dec_attn_dropout_rate,
            input_layer=decoder_input_layer,
            use_output_layer=False,
            pos_enc_class=pos_enc_class,
            normalize_before=decoder_normalize_before,
            concat_after=decoder_concat_after, )

        # define final projection
        self.feat_out = nn.Linear(adim, odim * reduction_factor)
        self.prob_out = nn.Linear(adim, reduction_factor)

        # define postnet
        self.postnet = (None if postnet_layers == 0 else Postnet(
            idim=idim,
            odim=odim,
            n_layers=postnet_layers,
            n_chans=postnet_chans,
            n_filts=postnet_filts,
            use_batch_norm=use_batch_norm,
            dropout_rate=postnet_dropout_rate, ))

        # 闭合的 initialize() 中的 set_global_initializer 的作用域，防止其影响到 self._reset_parameters()
        nn.initializer.set_global_initializer(None)

        self._reset_parameters(
            init_enc_alpha=init_enc_alpha,
            init_dec_alpha=init_dec_alpha, )

    def _reset_parameters(self, init_enc_alpha: float, init_dec_alpha: float):

        # initialize alpha in scaled positional encoding
        if self.use_scaled_pos_enc:
            init_enc_alpha = paddle.to_tensor(init_enc_alpha)
            self.encoder.embed[-1].alpha = paddle.create_parameter(
                shape=init_enc_alpha.shape,
                dtype=str(init_enc_alpha.numpy().dtype),
                default_initializer=paddle.nn.initializer.Assign(
                    init_enc_alpha))

            init_dec_alpha = paddle.to_tensor(init_dec_alpha)
            self.decoder.embed[-1].alpha = paddle.create_parameter(
                shape=init_dec_alpha.shape,
                dtype=str(init_dec_alpha.numpy().dtype),
                default_initializer=paddle.nn.initializer.Assign(
                    init_dec_alpha))

    def forward(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            speech: paddle.Tensor,
            speech_lengths: paddle.Tensor,
            spk_emb: paddle.Tensor=None,
    ) -> Tuple[paddle.Tensor, Dict[str, paddle.Tensor], paddle.Tensor]:
        """Calculate forward propagation.

        Args:
            text(Tensor(int64)): Batch of padded character ids (B, Tmax).
            text_lengths(Tensor(int64)): Batch of lengths of each input batch (B,).
            speech(Tensor): Batch of padded target features (B, Lmax, odim).
            speech_lengths(Tensor(int64)): Batch of the lengths of each target (B,).
            spk_emb(Tensor, optional): Batch of speaker embeddings (B, spk_embed_dim).

        Returns:
            Tensor: Loss scalar value.
            Dict: Statistics to be monitored.

        """
        # input of embedding must be int64
        text_lengths = paddle.cast(text_lengths, 'int64')

        # Add eos at the last of sequence
        text = numpy.pad(text.numpy(), ((0, 0), (0, 1)), 'constant')
        xs = paddle.to_tensor(text, dtype='int64')
        for i, l in enumerate(text_lengths):
            xs[i, l] = self.eos
        ilens = text_lengths + 1

        ys = speech
        olens = paddle.cast(speech_lengths, 'int64')

        # make labels for stop prediction
        stop_labels = make_pad_mask(olens - 1)
        # bool 类型无法切片
        stop_labels = paddle.cast(stop_labels, dtype='float32')
        stop_labels = F.pad(stop_labels, [0, 0, 0, 1], "constant", 1.0)

        # calculate transformer outputs
        after_outs, before_outs, logits = self._forward(xs, ilens, ys, olens,
                                                        spk_emb)

        # modifiy mod part of groundtruth

        if self.reduction_factor > 1:
            olens = olens - olens % self.reduction_factor
            max_olen = max(olens)
            ys = ys[:, :max_olen]
            stop_labels = stop_labels[:, :max_olen]
            stop_labels[:, -1] = 1.0  # make sure at least one frame has 1
            olens_in = olens // self.reduction_factor
        else:
            olens_in = olens

        need_dict = {}
        need_dict['encoder'] = self.encoder
        need_dict['decoder'] = self.decoder
        need_dict[
            'num_heads_applied_guided_attn'] = self.num_heads_applied_guided_attn
        need_dict[
            'num_layers_applied_guided_attn'] = self.num_layers_applied_guided_attn
        need_dict['use_scaled_pos_enc'] = self.use_scaled_pos_enc

        return after_outs, before_outs, logits, ys, stop_labels, olens, olens_in, need_dict

    def _forward(
            self,
            xs: paddle.Tensor,
            ilens: paddle.Tensor,
            ys: paddle.Tensor,
            olens: paddle.Tensor,
            spk_emb: paddle.Tensor,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        # forward encoder
        x_masks = self._source_mask(ilens)
        hs, h_masks = self.encoder(xs, x_masks)

        # integrate with GST
        if self.use_gst:
            style_embs = self.gst(ys)
            hs = hs + style_embs.unsqueeze(1)

        # integrate speaker embedding
        if self.spk_embed_dim is not None:
            hs = self._integrate_with_spk_embed(hs, spk_emb)

        # thin out frames for reduction factor (B, Lmax, odim) ->  (B, Lmax//r, odim)
        if self.reduction_factor > 1:
            ys_in = ys[:, self.reduction_factor - 1::self.reduction_factor]
            olens_in = olens // self.reduction_factor
        else:
            ys_in, olens_in = ys, olens

        # add first zero frame and remove last frame for auto-regressive
        ys_in = self._add_first_frame_and_remove_last_frame(ys_in)

        # forward decoder
        y_masks = self._target_mask(olens_in)
        zs, _ = self.decoder(ys_in, y_masks, hs, h_masks)
        # (B, Lmax//r, odim * r) -> (B, Lmax//r * r, odim)
        before_outs = self.feat_out(zs).reshape([zs.shape[0], -1, self.odim])
        # (B, Lmax//r, r) -> (B, Lmax//r * r)
        logits = self.prob_out(zs).reshape([zs.shape[0], -1])

        # postnet -> (B, Lmax//r * r, odim)
        if self.postnet is None:
            after_outs = before_outs
        else:
            after_outs = before_outs + self.postnet(
                before_outs.transpose([0, 2, 1])).transpose([0, 2, 1])

        return after_outs, before_outs, logits

    def inference(
            self,
            text: paddle.Tensor,
            speech: paddle.Tensor=None,
            spk_emb: paddle.Tensor=None,
            threshold: float=0.5,
            minlenratio: float=0.0,
            maxlenratio: float=10.0,
            use_teacher_forcing: bool=False,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Generate the sequence of features given the sequences of characters.

        Args:
            text(Tensor(int64)): Input sequence of characters (T,).
            speech(Tensor, optional): Feature sequence to extract style (N, idim).
            spk_emb(Tensor, optional): Speaker embedding vector (spk_embed_dim,).
            threshold(float, optional): Threshold in inference.
            minlenratio(float, optional): Minimum length ratio in inference.
            maxlenratio(float, optional): Maximum length ratio in inference.
            use_teacher_forcing(bool, optional): Whether to use teacher forcing.

        Returns:
            Tensor: Output sequence of features (L, odim).
            Tensor: Output sequence of stop probabilities (L,).
            Tensor: Encoder-decoder (source) attention weights (#layers, #heads, L, T).

        """
        # input of embedding must be int64
        y = speech

        # add eos at the last of sequence
        text = numpy.pad(
            text.numpy(), (0, 1), 'constant', constant_values=self.eos)
        x = paddle.to_tensor(text, dtype='int64')

        # inference with teacher forcing
        if use_teacher_forcing:
            assert speech is not None, "speech must be provided with teacher forcing."

            # get teacher forcing outputs
            xs, ys = x.unsqueeze(0), y.unsqueeze(0)
            spk_emb = None if spk_emb is None else spk_emb.unsqueeze(0)
            ilens = paddle.to_tensor(
                [xs.shape[1]], dtype=paddle.int64, place=xs.place)
            olens = paddle.to_tensor(
                [ys.shape[1]], dtype=paddle.int64, place=ys.place)
            outs, *_ = self._forward(xs, ilens, ys, olens, spk_emb)

            # get attention weights
            att_ws = []
            for i in range(len(self.decoder.decoders)):
                att_ws += [self.decoder.decoders[i].src_attn.attn]
            # (B, L, H, T_out, T_in)
            att_ws = paddle.stack(att_ws, axis=1)

            return outs[0], None, att_ws[0]

        # forward encoder
        xs = x.unsqueeze(0)
        hs, _ = self.encoder(xs, None)

        # integrate GST
        if self.use_gst:
            style_embs = self.gst(y.unsqueeze(0))
            hs = hs + style_embs.unsqueeze(1)

        # integrate speaker embedding
        if spk_emb is not None:
            spk_emb = spk_emb.unsqueeze(0)
            hs = self._integrate_with_spk_embed(hs, spk_emb)

        # set limits of length
        maxlen = int(hs.shape[1] * maxlenratio / self.reduction_factor)
        minlen = int(hs.shape[1] * minlenratio / self.reduction_factor)

        # initialize
        idx = 0
        ys = paddle.zeros([1, 1, self.odim])
        outs, probs = [], []

        # forward decoder step-by-step
        z_cache = None
        while True:
            # update index
            idx += 1

            # calculate output and stop prob at idx-th step
            y_masks = subsequent_mask(idx).unsqueeze(0)
            z, z_cache = self.decoder.forward_one_step(
                ys, y_masks, hs, cache=z_cache)  # (B, adim)
            outs += [
                self.feat_out(z).reshape([self.reduction_factor, self.odim])
            ]  # [(r, odim), ...]
            probs += [F.sigmoid(self.prob_out(z))[0]]  # [(r), ...]

            # update next inputs
            ys = paddle.concat(
                (ys, outs[-1][-1].reshape([1, 1, self.odim])),
                axis=1)  # (1, idx + 1, odim)

            # get attention weights
            att_ws_ = []
            for name, m in self.named_sublayers():
                if isinstance(m, MultiHeadedAttention) and "src" in name:
                    # [(#heads, 1, T),...]
                    att_ws_ += [m.attn[0, :, -1].unsqueeze(1)]
            if idx == 1:
                att_ws = att_ws_
            else:
                # [(#heads, l, T), ...]
                att_ws = [
                    paddle.concat([att_w, att_w_], axis=1)
                    for att_w, att_w_ in zip(att_ws, att_ws_)
                ]

            # check whether to finish generation
            if sum(paddle.cast(probs[-1] >= threshold,
                               'int64')) > 0 or idx >= maxlen:
                # check mininum length
                if idx < minlen:
                    continue
                # (L, odim) -> (1, L, odim) -> (1, odim, L)
                outs = (paddle.concat(outs, axis=0).unsqueeze(0).transpose(
                    [0, 2, 1]))
                if self.postnet is not None:
                    # (1, odim, L)
                    outs = outs + self.postnet(outs)
                # (L, odim)
                outs = outs.transpose([0, 2, 1]).squeeze(0)
                probs = paddle.concat(probs, axis=0)
                break

        # concatenate attention weights -> (#layers, #heads, L, T)
        att_ws = paddle.stack(att_ws, axis=0)

        return outs, probs, att_ws

    def _add_first_frame_and_remove_last_frame(
            self, ys: paddle.Tensor) -> paddle.Tensor:
        ys_in = paddle.concat(
            [paddle.zeros((ys.shape[0], 1, ys.shape[2])), ys[:, :-1]], axis=1)
        return ys_in

    def _source_mask(self, ilens: paddle.Tensor) -> paddle.Tensor:
        """Make masks for self-attention.

        Args:
            ilens(Tensor): Batch of lengths (B,).

        Returns:
            Tensor: Mask tensor for self-attention. dtype=paddle.bool

        Examples:
            >>> ilens = [5, 3]
            >>> self._source_mask(ilens)
            tensor([[[1, 1, 1, 1, 1],
                        [1, 1, 1, 0, 0]]]) bool

        """
        x_masks = make_non_pad_mask(ilens)
        return x_masks.unsqueeze(-2)

    def _target_mask(self, olens: paddle.Tensor) -> paddle.Tensor:
        """Make masks for masked self-attention.

        Args:
            olens (Tensor(int64)): Batch of lengths (B,).

        Returns:
            Tensor: Mask tensor for masked self-attention.

        Examples:
            >>> olens = [5, 3]
            >>> self._target_mask(olens)
            tensor([[[1, 0, 0, 0, 0],
                        [1, 1, 0, 0, 0],
                        [1, 1, 1, 0, 0],
                        [1, 1, 1, 1, 0],
                        [1, 1, 1, 1, 1]],
                    [[1, 0, 0, 0, 0],
                        [1, 1, 0, 0, 0],
                        [1, 1, 1, 0, 0],
                        [1, 1, 1, 0, 0],
                        [1, 1, 1, 0, 0]]], dtype=paddle.uint8)

        """
        y_masks = make_non_pad_mask(olens)
        s_masks = subsequent_mask(y_masks.shape[-1]).unsqueeze(0)
        return paddle.logical_and(y_masks.unsqueeze(-2), s_masks)

    def _integrate_with_spk_embed(self,
                                  hs: paddle.Tensor,
                                  spk_emb: paddle.Tensor) -> paddle.Tensor:
        """Integrate speaker embedding with hidden states.

        Args:
            hs(Tensor): Batch of hidden state sequences (B, Tmax, adim).
            spk_emb(Tensor): Batch of speaker embeddings (B, spk_embed_dim).

        Returns:
            Tensor: Batch of integrated hidden state sequences (B, Tmax, adim).

        """
        if self.spk_embed_integration_type == "add":
            # apply projection and then add to hidden states
            spk_emb = self.projection(F.normalize(spk_emb))
            hs = hs + spk_emb.unsqueeze(1)
        elif self.spk_embed_integration_type == "concat":
            # concat hidden states with spk embeds and then apply projection
            spk_emb = F.normalize(spk_emb).unsqueeze(1).expand(-1, hs.shape[1],
                                                               -1)
            hs = self.projection(paddle.concat([hs, spk_emb], axis=-1))
        else:
            raise NotImplementedError("support only add or concat.")

        return hs


class TransformerTTSInference(nn.Layer):
    def __init__(self, normalizer, model):
        super().__init__()
        self.normalizer = normalizer
        self.acoustic_model = model

    def forward(self, text, spk_id=None):
        normalized_mel = self.acoustic_model.inference(text)[0]
        logmel = self.normalizer.inverse(normalized_mel)
        return logmel


================================================
FILE: paddlespeech/t2s/models/transformer_tts/transformer_tts_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path
from typing import Sequence

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer

from paddlespeech.t2s.modules.losses import GuidedMultiHeadAttentionLoss
from paddlespeech.t2s.modules.losses import Tacotron2Loss as TransformerTTSLoss
from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class TransformerTTSUpdater(StandardUpdater):
    def __init__(
            self,
            model: Layer,
            optimizer: Optimizer,
            dataloader: DataLoader,
            init_state=None,
            use_masking: bool=False,
            use_weighted_masking: bool=False,
            output_dir: Path=None,
            bce_pos_weight: float=5.0,
            loss_type: str="L1",
            use_guided_attn_loss: bool=True,
            modules_applied_guided_attn: Sequence[str]=("encoder-decoder"),
            guided_attn_loss_sigma: float=0.4,
            guided_attn_loss_lambda: float=1.0, ):
        super().__init__(model, optimizer, dataloader, init_state=None)

        self.loss_type = loss_type
        self.use_guided_attn_loss = use_guided_attn_loss
        self.modules_applied_guided_attn = modules_applied_guided_attn

        self.criterion = TransformerTTSLoss(
            use_masking=use_masking,
            use_weighted_masking=use_weighted_masking,
            bce_pos_weight=bce_pos_weight)

        if self.use_guided_attn_loss:
            self.attn_criterion = GuidedMultiHeadAttentionLoss(
                sigma=guided_attn_loss_sigma,
                alpha=guided_attn_loss_lambda, )

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}

        after_outs, before_outs, logits, ys, stop_labels, olens, olens_in, need_dict = self.model(
            text=batch["text"],
            text_lengths=batch["text_lengths"],
            speech=batch["speech"],
            speech_lengths=batch["speech_lengths"], )

        l1_loss, l2_loss, bce_loss = self.criterion(
            after_outs=after_outs,
            before_outs=before_outs,
            logits=logits,
            ys=ys,
            stop_labels=stop_labels,
            olens=olens)

        report("train/bce_loss", float(bce_loss))
        report("train/l1_loss", float(l1_loss))
        report("train/l2_loss", float(l2_loss))
        losses_dict["bce_loss"] = float(bce_loss)
        losses_dict["l1_loss"] = float(l1_loss)
        losses_dict["l2_loss"] = float(l2_loss)
        # caluculate loss values
        if self.loss_type == "L1":
            loss = l1_loss + bce_loss
        elif self.loss_type == "L2":
            loss = l2_loss + bce_loss
        elif self.loss_type == "L1+L2":
            loss = l1_loss + l2_loss + bce_loss
        else:
            raise ValueError("unknown --loss-type " + self.loss_type)

        # calculate guided attention loss
        if self.use_guided_attn_loss:
            # calculate for encoder
            if "encoder" in self.modules_applied_guided_attn:
                att_ws = []
                for idx, layer_idx in enumerate(
                        reversed(range(len(need_dict['encoder'].encoders)))):
                    att_ws += [
                        need_dict['encoder'].encoders[layer_idx].self_attn.
                        attn[:, :need_dict['num_heads_applied_guided_attn']]
                    ]
                    if idx + 1 == need_dict['num_layers_applied_guided_attn']:
                        break
                # (B, H*L, T_in, T_in)
                att_ws = paddle.concat(att_ws, axis=1)
                enc_attn_loss = self.attn_criterion(
                    att_ws=att_ws,
                    ilens=batch["text_lengths"] + 1,
                    olens=batch["text_lengths"] + 1)
                loss = loss + enc_attn_loss
                report("train/enc_attn_loss", float(enc_attn_loss))
                losses_dict["enc_attn_loss"] = float(enc_attn_loss)
            # calculate for decoder
            if "decoder" in self.modules_applied_guided_attn:
                att_ws = []
                for idx, layer_idx in enumerate(
                        reversed(range(len(need_dict['decoder'].decoders)))):
                    att_ws += [
                        need_dict['decoder'].decoders[layer_idx].self_attn.
                        attn[:, :need_dict['num_heads_applied_guided_attn']]
                    ]
                    if idx + 1 == need_dict['num_layers_applied_guided_attn']:
                        break
                # (B, H*L, T_out, T_out)
                att_ws = paddle.concat(att_ws, axis=1)
                dec_attn_loss = self.attn_criterion(
                    att_ws=att_ws, ilens=olens_in, olens=olens_in)
                report("train/dec_attn_loss", float(dec_attn_loss))
                losses_dict["dec_attn_loss"] = float(dec_attn_loss)
                loss = loss + dec_attn_loss
            # calculate for encoder-decoder
            if "encoder-decoder" in self.modules_applied_guided_attn:
                att_ws = []
                for idx, layer_idx in enumerate(
                        reversed(range(len(need_dict['decoder'].decoders)))):
                    att_ws += [
                        need_dict['decoder'].decoders[layer_idx].src_attn.
                        attn[:, :need_dict['num_heads_applied_guided_attn']]
                    ]
                    if idx + 1 == need_dict['num_layers_applied_guided_attn']:
                        break
                # (B, H*L, T_out, T_in)
                att_ws = paddle.concat(att_ws, axis=1)
                enc_dec_attn_loss = self.attn_criterion(
                    att_ws=att_ws,
                    ilens=batch["text_lengths"] + 1,
                    olens=olens_in)
                report("train/enc_dec_attn_loss", float(enc_dec_attn_loss))
                losses_dict["enc_dec_attn_loss"] = float(enc_dec_attn_loss)
                loss = loss + enc_dec_attn_loss
        if need_dict['use_scaled_pos_enc']:
            report("train/encoder_alpha",
                   float(need_dict['encoder'].embed[-1].alpha))
            report("train/decoder_alpha",
                   float(need_dict['decoder'].embed[-1].alpha))
            losses_dict["encoder_alpha"] = float(
                need_dict['encoder'].embed[-1].alpha)
            losses_dict["decoder_alpha"] = float(
                need_dict['decoder'].embed[-1].alpha)

        optimizer = self.optimizer
        optimizer.clear_grad()
        loss.backward()
        optimizer.step()

        report("train/loss", float(loss))
        losses_dict["loss"] = float(loss)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class TransformerTTSEvaluator(StandardEvaluator):
    def __init__(
            self,
            model: Layer,
            dataloader: DataLoader,
            init_state=None,
            use_masking: bool=False,
            use_weighted_masking: bool=False,
            output_dir: Path=None,
            bce_pos_weight: float=5.0,
            loss_type: str="L1",
            use_guided_attn_loss: bool=True,
            modules_applied_guided_attn: Sequence[str]=("encoder-decoder"),
            guided_attn_loss_sigma: float=0.4,
            guided_attn_loss_lambda: float=1.0, ):
        super().__init__(model, dataloader)

        self.loss_type = loss_type
        self.use_guided_attn_loss = use_guided_attn_loss
        self.modules_applied_guided_attn = modules_applied_guided_attn

        self.criterion = TransformerTTSLoss(
            use_masking=use_masking,
            use_weighted_masking=use_weighted_masking,
            bce_pos_weight=bce_pos_weight)

        if self.use_guided_attn_loss:
            self.attn_criterion = GuidedMultiHeadAttentionLoss(
                sigma=guided_attn_loss_sigma,
                alpha=guided_attn_loss_lambda, )

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        self.msg = "Evaluate: "
        losses_dict = {}
        after_outs, before_outs, logits, ys, stop_labels, olens, olens_in, need_dict = self.model(
            text=batch["text"],
            text_lengths=batch["text_lengths"],
            speech=batch["speech"],
            speech_lengths=batch["speech_lengths"])

        l1_loss, l2_loss, bce_loss = self.criterion(
            after_outs=after_outs,
            before_outs=before_outs,
            logits=logits,
            ys=ys,
            stop_labels=stop_labels,
            olens=olens)

        report("eval/bce_loss", float(bce_loss))
        report("eval/l1_loss", float(l1_loss))
        report("eval/l2_loss", float(l2_loss))
        losses_dict["bce_loss"] = float(bce_loss)
        losses_dict["l1_loss"] = float(l1_loss)
        losses_dict["l2_loss"] = float(l2_loss)
        # caluculate loss values
        if self.loss_type == "L1":
            loss = l1_loss + bce_loss
        elif self.loss_type == "L2":
            loss = l2_loss + bce_loss
        elif self.loss_type == "L1+L2":
            loss = l1_loss + l2_loss + bce_loss
        else:
            raise ValueError("unknown --loss-type " + self.loss_type)

        # calculate guided attention loss
        if self.use_guided_attn_loss:
            # calculate for encoder
            if "encoder" in self.modules_applied_guided_attn:
                att_ws = []
                for idx, layer_idx in enumerate(
                        reversed(range(len(need_dict['encoder'].encoders)))):
                    att_ws += [
                        need_dict['encoder'].encoders[layer_idx].self_attn.
                        attn[:, :need_dict['num_heads_applied_guided_attn']]
                    ]
                    if idx + 1 == need_dict['num_layers_applied_guided_attn']:
                        break
                # (B, H*L, T_in, T_in)
                att_ws = paddle.concat(att_ws, axis=1)
                enc_attn_loss = self.attn_criterion(
                    att_ws=att_ws,
                    ilens=batch["text_lengths"] + 1,
                    olens=batch["text_lengths"] + 1)
                loss = loss + enc_attn_loss
                report("train/enc_attn_loss", float(enc_attn_loss))
                losses_dict["enc_attn_loss"] = float(enc_attn_loss)
            # calculate for decoder
            if "decoder" in self.modules_applied_guided_attn:
                att_ws = []
                for idx, layer_idx in enumerate(
                        reversed(range(len(need_dict['decoder'].decoders)))):
                    att_ws += [
                        need_dict['decoder'].decoders[layer_idx].self_attn.
                        attn[:, :need_dict['num_heads_applied_guided_attn']]
                    ]
                    if idx + 1 == need_dict['num_layers_applied_guided_attn']:
                        break
                # (B, H*L, T_out, T_out)
                att_ws = paddle.concat(att_ws, axis=1)
                dec_attn_loss = self.attn_criterion(
                    att_ws=att_ws, ilens=olens_in, olens=olens_in)
                report("eval/dec_attn_loss", float(dec_attn_loss))
                losses_dict["dec_attn_loss"] = float(dec_attn_loss)
                loss = loss + dec_attn_loss
            # calculate for encoder-decoder
            if "encoder-decoder" in self.modules_applied_guided_attn:

                att_ws = []
                for idx, layer_idx in enumerate(
                        reversed(range(len(need_dict['decoder'].decoders)))):
                    att_ws += [
                        need_dict['decoder'].decoders[layer_idx].src_attn.
                        attn[:, :need_dict['num_heads_applied_guided_attn']]
                    ]
                    if idx + 1 == need_dict['num_layers_applied_guided_attn']:
                        break
                # (B, H*L, T_out, T_in)
                att_ws = paddle.concat(att_ws, axis=1)
                enc_dec_attn_loss = self.attn_criterion(
                    att_ws=att_ws,
                    ilens=batch["text_lengths"] + 1,
                    olens=olens_in)
                report("eval/enc_dec_attn_loss", float(enc_dec_attn_loss))
                losses_dict["enc_dec_attn_loss"] = float(enc_dec_attn_loss)
                loss = loss + enc_dec_attn_loss
        if need_dict['use_scaled_pos_enc']:
            report("eval/encoder_alpha",
                   float(need_dict['encoder'].embed[-1].alpha))
            report("eval/decoder_alpha",
                   float(need_dict['decoder'].embed[-1].alpha))
            losses_dict["encoder_alpha"] = float(
                need_dict['encoder'].embed[-1].alpha)
            losses_dict["decoder_alpha"] = float(
                need_dict['decoder'].embed[-1].alpha)
        report("eval/loss", float(loss))
        losses_dict["loss"] = float(loss)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/vits/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .vits import *
from .vits_updater import *


================================================
FILE: paddlespeech/t2s/models/vits/duration_predictor.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Stochastic duration predictor modules in VITS.

This code is based on https://github.com/jaywalnut310/vits.

"""
import math
from typing import Optional

import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.t2s.models.vits.flow import ConvFlow
from paddlespeech.t2s.models.vits.flow import DilatedDepthSeparableConv
from paddlespeech.t2s.models.vits.flow import ElementwiseAffineFlow
from paddlespeech.t2s.models.vits.flow import FlipFlow
from paddlespeech.t2s.models.vits.flow import LogFlow


class StochasticDurationPredictor(nn.Layer):
    """Stochastic duration predictor module.
    This is a module of stochastic duration predictor described in `Conditional
    Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech`_.
    .. _`Conditional Variational Autoencoder with Adversarial Learning for End-to-End
        Text-to-Speech`: https://arxiv.org/abs/2106.06103
    """

    def __init__(
            self,
            channels: int=192,
            kernel_size: int=3,
            dropout_rate: float=0.5,
            flows: int=4,
            dds_conv_layers: int=3,
            global_channels: int=-1, ):
        """Initialize StochasticDurationPredictor module.
        Args:
            channels (int):
                Number of channels.
            kernel_size (int):
                Kernel size.
            dropout_rate (float):
                Dropout rate.
            flows (int):
                Number of flows.
            dds_conv_layers (int):
                Number of conv layers in DDS conv.
            global_channels (int):
                Number of global conditioning channels.
        """
        super().__init__()

        self.pre = nn.Conv1D(channels, channels, 1)
        self.dds = DilatedDepthSeparableConv(
            channels,
            kernel_size,
            layers=dds_conv_layers,
            dropout_rate=dropout_rate, )
        self.proj = nn.Conv1D(channels, channels, 1)

        self.log_flow = LogFlow()
        self.flows = nn.LayerList()
        self.flows.append(ElementwiseAffineFlow(2))
        for i in range(flows):
            self.flows.append(
                ConvFlow(
                    2,
                    channels,
                    kernel_size,
                    layers=dds_conv_layers, ))
            self.flows.append(FlipFlow())

        self.post_pre = nn.Conv1D(1, channels, 1)
        self.post_dds = DilatedDepthSeparableConv(
            channels,
            kernel_size,
            layers=dds_conv_layers,
            dropout_rate=dropout_rate, )
        self.post_proj = nn.Conv1D(channels, channels, 1)
        self.post_flows = nn.LayerList()
        self.post_flows.append(ElementwiseAffineFlow(2))
        for i in range(flows):
            self.post_flows.append(
                ConvFlow(
                    2,
                    channels,
                    kernel_size,
                    layers=dds_conv_layers, ))
            self.post_flows.append(FlipFlow())

        if global_channels > 0:
            self.global_conv = nn.Conv1D(global_channels, channels, 1)

    def forward(
            self,
            x: paddle.Tensor,
            x_mask: paddle.Tensor,
            w: Optional[paddle.Tensor]=None,
            g: Optional[paddle.Tensor]=None,
            inverse: bool=False,
            noise_scale: float=1.0, ) -> paddle.Tensor:
        """Calculate forward propagation.
        Args:
            x (Tensor):
                Input tensor (B, channels, T_text).
            x_mask (Tensor):
                Mask tensor (B, 1, T_text).
            w (Optional[Tensor]):
                Duration tensor (B, 1, T_text).
            g (Optional[Tensor]):
                Global conditioning tensor (B, channels, 1)
            inverse (bool):
                Whether to inverse the flow.
            noise_scale (float):
                Noise scale value.
        Returns:
            Tensor: 
                If not inverse, negative log-likelihood (NLL) tensor (B,).
                If inverse, log-duration tensor (B, 1, T_text).
        """
        # stop gradient
        # x = x.detach()  
        x = self.pre(x)
        if g is not None:
            # stop gradient
            x = x + self.global_conv(g.detach())
        x = self.dds(x, x_mask)
        x = self.proj(x) * x_mask

        if not inverse:
            assert w is not None, "w must be provided."
            h_w = self.post_pre(w)
            h_w = self.post_dds(h_w, x_mask)
            h_w = self.post_proj(h_w) * x_mask
            e_q = (paddle.randn([paddle.shape(w)[0], 2, paddle.shape(w)[2]]) *
                   x_mask)
            z_q = e_q
            logdet_tot_q = 0.0
            for i, flow in enumerate(self.post_flows):
                z_q, logdet_q = flow(z_q, x_mask, g=(x + h_w))
                logdet_tot_q += logdet_q
            z_u, z1 = paddle.split(z_q, [1, 1], 1)
            u = F.sigmoid(z_u) * x_mask
            z0 = (w - u) * x_mask
            tmp1 = (F.log_sigmoid(z_u) + F.log_sigmoid(-z_u)) * x_mask
            logdet_tot_q += paddle.sum(tmp1, [1, 2])
            tmp2 = -0.5 * (math.log(2 * math.pi) + (e_q**2)) * x_mask
            logq = (paddle.sum(tmp2, [1, 2]) - logdet_tot_q)
            logdet_tot = 0
            z0, logdet = self.log_flow(z0, x_mask)
            logdet_tot += logdet
            z = paddle.concat([z0, z1], 1)
            for flow in self.flows:
                z, logdet = flow(z, x_mask, g=x, inverse=inverse)
                logdet_tot = logdet_tot + logdet
            tmp3 = 0.5 * (math.log(2 * math.pi) + (z**2)) * x_mask
            nll = (paddle.sum(tmp3, [1, 2]) - logdet_tot)
            # (B,)
            return nll + logq
        else:
            flows = list(reversed(self.flows))
            # remove a useless vflow
            flows = flows[:-2] + [flows[-1]]
            z = (paddle.randn([paddle.shape(x)[0], 2, paddle.shape(x)[2]]) *
                 noise_scale)
            for flow in flows:
                z = flow(z, x_mask, g=x, inverse=inverse)
            z0, z1 = paddle.split(z, 2, axis=1)
            logw = z0
            return logw


================================================
FILE: paddlespeech/t2s/models/vits/flow.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Basic Flow modules used in VITS.

This code is based on https://github.com/jaywalnut310/vits.

"""
import math
from typing import Optional
from typing import Tuple
from typing import Union

import paddle
from paddle import nn

from paddlespeech.t2s.models.vits.transform import piecewise_rational_quadratic_transform


class FlipFlow(nn.Layer):
    """Flip flow module."""

    def forward(self, x: paddle.Tensor, *args, inverse: bool=False, **kwargs
                ) -> Union[paddle.Tensor, Tuple[paddle.Tensor, paddle.Tensor]]:
        """Calculate forward propagation.
        Args:
            x (Tensor):
                Input tensor (B, channels, T).
            inverse (bool):
                Whether to inverse the flow.
        Returns:
            Tensor:
                Flipped tensor (B, channels, T).
            Tensor:
                Log-determinant tensor for NLL (B,) if not inverse.
        """
        x = paddle.flip(x, [1])
        if not inverse:
            logdet = paddle.zeros(paddle.shape(x)[0], dtype=x.dtype)
            return x, logdet
        else:
            return x


class LogFlow(nn.Layer):
    """Log flow module."""

    def forward(self,
                x: paddle.Tensor,
                x_mask: paddle.Tensor,
                inverse: bool=False,
                eps: float=1e-5,
                **kwargs
                ) -> Union[paddle.Tensor, Tuple[paddle.Tensor, paddle.Tensor]]:
        """Calculate forward propagation.
        Args:
            x (Tensor):
                Input tensor (B, channels, T).
            x_mask (Tensor):
                Mask tensor (B, 1, T).
            inverse (bool):
                Whether to inverse the flow.
            eps (float):
                Epsilon for log.
        Returns:
            Tensor:
                Output tensor (B, channels, T).
            Tensor:
                Log-determinant tensor for NLL (B,) if not inverse.
        """
        if not inverse:
            y = paddle.log(paddle.clip(x, min=eps)) * x_mask
            logdet = paddle.sum(-y, [1, 2])
            return y, logdet
        else:
            x = paddle.exp(x) * x_mask
            return x


class ElementwiseAffineFlow(nn.Layer):
    """Elementwise affine flow module."""

    def __init__(self, channels: int):
        """Initialize ElementwiseAffineFlow module.
        Args:
            channels (int):
                Number of channels.
        """
        super().__init__()
        self.channels = channels

        m = paddle.zeros([channels, 1])
        self.m = paddle.create_parameter(
            shape=m.shape,
            dtype=str(m.numpy().dtype),
            default_initializer=paddle.nn.initializer.Assign(m))
        logs = paddle.zeros([channels, 1])
        self.logs = paddle.create_parameter(
            shape=logs.shape,
            dtype=str(logs.numpy().dtype),
            default_initializer=paddle.nn.initializer.Assign(logs))

    def forward(self,
                x: paddle.Tensor,
                x_mask: paddle.Tensor,
                inverse: bool=False,
                **kwargs
                ) -> Union[paddle.Tensor, Tuple[paddle.Tensor, paddle.Tensor]]:
        """Calculate forward propagation.
        Args:
            x (Tensor):
                Input tensor (B, channels, T).
            x_mask (Tensor):
                Mask tensor (B, 1, T).
            inverse (bool):
                Whether to inverse the flow.
        Returns:
            Tensor:
                Output tensor (B, channels, T).
            Tensor:
                Log-determinant tensor for NLL (B,) if not inverse.
        """
        if not inverse:
            y = self.m + paddle.exp(self.logs) * x
            y = y * x_mask
            logdet = paddle.sum(self.logs * x_mask, [1, 2])
            return y, logdet
        else:
            x = (x - self.m) * paddle.exp(-self.logs) * x_mask
            return x


class Transpose(nn.Layer):
    """Transpose module for paddle.nn.Sequential()."""

    def __init__(self, dim1: int, dim2: int):
        """Initialize Transpose module."""
        super().__init__()
        self.dim1 = dim1
        self.dim2 = dim2

    def forward(self, x: paddle.Tensor) -> paddle.Tensor:
        """Transpose."""
        len_dim = len(x.shape)
        orig_perm = list(range(len_dim))
        new_perm = orig_perm[:]
        temp = new_perm[self.dim1]
        new_perm[self.dim1] = new_perm[self.dim2]
        new_perm[self.dim2] = temp

        return paddle.transpose(x, new_perm)


class DilatedDepthSeparableConv(nn.Layer):
    """Dilated depth-separable conv module."""

    def __init__(
            self,
            channels: int,
            kernel_size: int,
            layers: int,
            dropout_rate: float=0.0,
            eps: float=1e-5, ):
        """Initialize DilatedDepthSeparableConv module.
        Args:
            channels (int):
                Number of channels.
            kernel_size (int):
                Kernel size.
            layers (int):
                Number of layers.
            dropout_rate (float):
                Dropout rate.
            eps (float):
                Epsilon for layer norm.
        """
        super().__init__()

        self.convs = nn.LayerList()
        for i in range(layers):
            dilation = kernel_size**i
            padding = (kernel_size * dilation - dilation) // 2
            self.convs.append(
                nn.Sequential(
                    nn.Conv1D(
                        channels,
                        channels,
                        kernel_size,
                        groups=channels,
                        dilation=dilation,
                        padding=padding, ),
                    Transpose(1, 2),
                    nn.LayerNorm(channels, epsilon=eps),
                    Transpose(1, 2),
                    nn.GELU(),
                    nn.Conv1D(
                        channels,
                        channels,
                        1, ),
                    Transpose(1, 2),
                    nn.LayerNorm(channels, epsilon=eps),
                    Transpose(1, 2),
                    nn.GELU(),
                    nn.Dropout(dropout_rate), ))

    def forward(self,
                x: paddle.Tensor,
                x_mask: paddle.Tensor,
                g: Optional[paddle.Tensor]=None) -> paddle.Tensor:
        """Calculate forward propagation.
        Args:
            x (Tensor):
                Input tensor (B, in_channels, T).
            x_mask (Tensor):
                Mask tensor (B, 1, T).
            g (Optional[Tensor]):
                Global conditioning tensor (B, global_channels, 1).
        Returns:
            Tensor:
                Output tensor (B, channels, T).
        """
        if g is not None:
            x = x + g
        for f in self.convs:
            y = f(x * x_mask)
            x = x + y
        return x * x_mask


class ConvFlow(nn.Layer):
    """Convolutional flow module."""

    def __init__(
            self,
            in_channels: int,
            hidden_channels: int,
            kernel_size: int,
            layers: int,
            bins: int=10,
            tail_bound: float=5.0, ):
        """Initialize ConvFlow module.
        Args:
            in_channels (int):
                Number of input channels.
            hidden_channels (int):
                Number of hidden channels.
            kernel_size (int):
                Kernel size.
            layers (int):
                Number of layers.
            bins (int):
                Number of bins.
            tail_bound (float):
                Tail bound value.
        """
        super().__init__()
        self.half_channels = in_channels // 2
        self.hidden_channels = hidden_channels
        self.bins = bins
        self.tail_bound = tail_bound

        self.input_conv = nn.Conv1D(
            self.half_channels,
            hidden_channels,
            1, )
        self.dds_conv = DilatedDepthSeparableConv(
            hidden_channels,
            kernel_size,
            layers,
            dropout_rate=0.0, )
        self.proj = nn.Conv1D(
            hidden_channels,
            self.half_channels * (bins * 3 - 1),
            1, )

        weight = paddle.zeros(paddle.shape(self.proj.weight))

        self.proj.weight = paddle.create_parameter(
            shape=weight.shape,
            dtype=str(weight.numpy().dtype),
            default_initializer=paddle.nn.initializer.Assign(weight))

        bias = paddle.zeros(paddle.shape(self.proj.bias))

        self.proj.bias = paddle.create_parameter(
            shape=bias.shape,
            dtype=str(bias.numpy().dtype),
            default_initializer=paddle.nn.initializer.Assign(bias))

    def forward(
            self,
            x: paddle.Tensor,
            x_mask: paddle.Tensor,
            g: Optional[paddle.Tensor]=None,
            inverse: bool=False,
    ) -> Union[paddle.Tensor, Tuple[paddle.Tensor, paddle.Tensor]]:
        """Calculate forward propagation.
        Args:
            x (Tensor):
                Input tensor (B, channels, T).
            x_mask (Tensor):
                Mask tensor (B, 1, T).
            g (Optional[Tensor]):
                Global conditioning tensor (B, channels, 1).
            inverse (bool):
                Whether to inverse the flow.
        Returns:
            Tensor:
                Output tensor (B, channels, T).
            Tensor:
                Log-determinant tensor for NLL (B,) if not inverse.
        """
        xa, xb = x.split(2, 1)
        h = self.input_conv(xa)
        h = self.dds_conv(h, x_mask, g=g)
        # (B, half_channels * (bins * 3 - 1), T)
        h = self.proj(h) * x_mask

        b, c, t = xa.shape
        # (B, half_channels, bins * 3 - 1, T) -> (B, half_channels, T, bins * 3 - 1)
        h = h.reshape([b, c, -1, t]).transpose([0, 1, 3, 2])

        denom = math.sqrt(self.hidden_channels)
        unnorm_widths = h[..., :self.bins] / denom
        unnorm_heights = h[..., self.bins:2 * self.bins] / denom
        unnorm_derivatives = h[..., 2 * self.bins:]

        xb, logdet_abs = piecewise_rational_quadratic_transform(
            inputs=xb,
            unnormalized_widths=unnorm_widths,
            unnormalized_heights=unnorm_heights,
            unnormalized_derivatives=unnorm_derivatives,
            inverse=inverse,
            tails="linear",
            tail_bound=self.tail_bound, )
        x = paddle.concat([xa, xb], 1) * x_mask
        logdet = paddle.sum(logdet_abs * x_mask, [1, 2])
        if not inverse:
            return x, logdet
        else:
            return x


================================================
FILE: paddlespeech/t2s/models/vits/generator.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Generator module in VITS.

This code is based on https://github.com/jaywalnut310/vits.

"""
import math
from typing import List
from typing import Optional
from typing import Tuple

import numpy as np
import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.t2s.models.hifigan import HiFiGANGenerator
from paddlespeech.t2s.models.vits.duration_predictor import StochasticDurationPredictor
from paddlespeech.t2s.models.vits.posterior_encoder import PosteriorEncoder
from paddlespeech.t2s.models.vits.residual_coupling import ResidualAffineCouplingBlock
from paddlespeech.t2s.models.vits.text_encoder import TextEncoder
from paddlespeech.t2s.modules.nets_utils import get_random_segments
from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask


class VITSGenerator(nn.Layer):
    """Generator module in VITS.
    This is a module of VITS described in `Conditional Variational Autoencoder
    with Adversarial Learning for End-to-End Text-to-Speech`_.
    As text encoder, we use conformer architecture instead of the relative positional
    Transformer, which contains additional convolution layers.
    .. _`Conditional Variational Autoencoder with Adversarial Learning for End-to-End
        Text-to-Speech`: https://arxiv.org/abs/2006.04558
    """

    def __init__(
            self,
            vocabs: int,
            aux_channels: int=513,
            hidden_channels: int=192,
            spks: Optional[int]=None,
            langs: Optional[int]=None,
            spk_embed_dim: Optional[int]=None,
            global_channels: int=-1,
            segment_size: int=32,
            text_encoder_attention_heads: int=2,
            text_encoder_ffn_expand: int=4,
            text_encoder_blocks: int=6,
            text_encoder_positionwise_layer_type: str="conv1d",
            text_encoder_positionwise_conv_kernel_size: int=1,
            text_encoder_positional_encoding_layer_type: str="rel_pos",
            text_encoder_self_attention_layer_type: str="rel_selfattn",
            text_encoder_activation_type: str="swish",
            text_encoder_normalize_before: bool=True,
            text_encoder_dropout_rate: float=0.1,
            text_encoder_positional_dropout_rate: float=0.0,
            text_encoder_attention_dropout_rate: float=0.0,
            text_encoder_conformer_kernel_size: int=7,
            use_macaron_style_in_text_encoder: bool=True,
            use_conformer_conv_in_text_encoder: bool=True,
            decoder_kernel_size: int=7,
            decoder_channels: int=512,
            decoder_upsample_scales: List[int]=[8, 8, 2, 2],
            decoder_upsample_kernel_sizes: List[int]=[16, 16, 4, 4],
            decoder_resblock_kernel_sizes: List[int]=[3, 7, 11],
            decoder_resblock_dilations: List[List[int]]=[[1, 3, 5], [1, 3, 5],
                                                         [1, 3, 5]],
            use_weight_norm_in_decoder: bool=True,
            posterior_encoder_kernel_size: int=5,
            posterior_encoder_layers: int=16,
            posterior_encoder_stacks: int=1,
            posterior_encoder_base_dilation: int=1,
            posterior_encoder_dropout_rate: float=0.0,
            use_weight_norm_in_posterior_encoder: bool=True,
            flow_flows: int=4,
            flow_kernel_size: int=5,
            flow_base_dilation: int=1,
            flow_layers: int=4,
            flow_dropout_rate: float=0.0,
            use_weight_norm_in_flow: bool=True,
            use_only_mean_in_flow: bool=True,
            stochastic_duration_predictor_kernel_size: int=3,
            stochastic_duration_predictor_dropout_rate: float=0.5,
            stochastic_duration_predictor_flows: int=4,
            stochastic_duration_predictor_dds_conv_layers: int=3, ):
        """Initialize VITS generator module.
        Args:
            vocabs (int):
                Input vocabulary size.
            aux_channels (int):
                Number of acoustic feature channels.
            hidden_channels (int):
                Number of hidden channels.
            spks (Optional[int]):
                Number of speakers. If set to > 1, assume that the
                sids will be provided as the input and use sid embedding layer.
            langs (Optional[int]):
                Number of languages. If set to > 1, assume that the
                lids will be provided as the input and use sid embedding layer.
            spk_embed_dim (Optional[int]):
                Speaker embedding dimension. If set to > 0,
                assume that spembs will be provided as the input.
            global_channels (int):
                Number of global conditioning channels.
            segment_size (int):
                Segment size for decoder.
            text_encoder_attention_heads (int):
                Number of heads in conformer block of text encoder.
            text_encoder_ffn_expand (int): 
                Expansion ratio of FFN in conformer block of text encoder.
            text_encoder_blocks (int):
                Number of conformer blocks in text encoder.
            text_encoder_positionwise_layer_type (str):
                Position-wise layer type in conformer block of text encoder.
            text_encoder_positionwise_conv_kernel_size (int):
                Position-wise convolution kernel size in conformer block of text encoder. 
                Only used when the above layer type is conv1d or conv1d-linear.
            text_encoder_positional_encoding_layer_type (str):
                Positional encoding layer type in conformer block of text encoder.
            text_encoder_self_attention_layer_type (str):
                Self-attention layer type in conformer block of text encoder.
            text_encoder_activation_type (str):
                Activation function type in conformer block of text encoder.
            text_encoder_normalize_before (bool): 
                Whether to apply layer norm before self-attention in conformer block of text encoder.
            text_encoder_dropout_rate (float):
                Dropout rate in conformer block of text encoder.
            text_encoder_positional_dropout_rate (float):
                Dropout rate for positional encoding in conformer block of text encoder.
            text_encoder_attention_dropout_rate (float):
                Dropout rate for attention in conformer block of text encoder.
            text_encoder_conformer_kernel_size (int):
                Conformer conv kernel size. It will be used when only use_conformer_conv_in_text_encoder = True.
            use_macaron_style_in_text_encoder (bool):
                Whether to use macaron style FFN in conformer block of text encoder.
            use_conformer_conv_in_text_encoder (bool):
                Whether to use covolution in conformer block of text encoder.
            decoder_kernel_size (int):
                Decoder kernel size.
            decoder_channels (int):
                Number of decoder initial channels.
            decoder_upsample_scales (List[int]):
                List of upsampling scales in decoder.
            decoder_upsample_kernel_sizes (List[int]):
                List of kernel size for upsampling layers in decoder.
            decoder_resblock_kernel_sizes (List[int]):
                List of kernel size for resblocks in decoder.
            decoder_resblock_dilations (List[List[int]]):
                List of list of dilations for resblocks in decoder.
            use_weight_norm_in_decoder (bool):
                Whether to apply weight normalization in decoder.
            posterior_encoder_kernel_size (int):
                Posterior encoder kernel size.
            posterior_encoder_layers (int):
                Number of layers of posterior encoder.
            posterior_encoder_stacks (int):
                Number of stacks of posterior encoder.
            posterior_encoder_base_dilation (int):
                Base dilation of posterior encoder.
            posterior_encoder_dropout_rate (float):
                Dropout rate for posterior encoder.
            use_weight_norm_in_posterior_encoder (bool): 
                Whether to apply weight normalization in posterior encoder.
            flow_flows (int):
                Number of flows in flow.
            flow_kernel_size (int):
                Kernel size in flow.
            flow_base_dilation (int):
                Base dilation in flow.
            flow_layers (int):
                Number of layers in flow.
            flow_dropout_rate (float):
                Dropout rate in flow
            use_weight_norm_in_flow (bool):
                Whether to apply weight normalization in flow.
            use_only_mean_in_flow (bool):
                Whether to use only mean in flow.
            stochastic_duration_predictor_kernel_size (int): 
                Kernel size in stochastic duration predictor.
            stochastic_duration_predictor_dropout_rate (float):
                Dropout rate in stochastic duration predictor.
            stochastic_duration_predictor_flows (int):
                Number of flows in stochastic duration predictor.
            stochastic_duration_predictor_dds_conv_layers (int):
                Number of DDS conv layers in stochastic duration predictor.
        """
        super().__init__()
        self.segment_size = segment_size
        self.text_encoder = TextEncoder(
            vocabs=vocabs,
            attention_dim=hidden_channels,
            attention_heads=text_encoder_attention_heads,
            linear_units=hidden_channels * text_encoder_ffn_expand,
            blocks=text_encoder_blocks,
            positionwise_layer_type=text_encoder_positionwise_layer_type,
            positionwise_conv_kernel_size=text_encoder_positionwise_conv_kernel_size,
            positional_encoding_layer_type=text_encoder_positional_encoding_layer_type,
            self_attention_layer_type=text_encoder_self_attention_layer_type,
            activation_type=text_encoder_activation_type,
            normalize_before=text_encoder_normalize_before,
            dropout_rate=text_encoder_dropout_rate,
            positional_dropout_rate=text_encoder_positional_dropout_rate,
            attention_dropout_rate=text_encoder_attention_dropout_rate,
            conformer_kernel_size=text_encoder_conformer_kernel_size,
            use_macaron_style=use_macaron_style_in_text_encoder,
            use_conformer_conv=use_conformer_conv_in_text_encoder, )
        self.decoder = HiFiGANGenerator(
            in_channels=hidden_channels,
            out_channels=1,
            channels=decoder_channels,
            global_channels=global_channels,
            kernel_size=decoder_kernel_size,
            upsample_scales=decoder_upsample_scales,
            upsample_kernel_sizes=decoder_upsample_kernel_sizes,
            resblock_kernel_sizes=decoder_resblock_kernel_sizes,
            resblock_dilations=decoder_resblock_dilations,
            use_weight_norm=use_weight_norm_in_decoder, )
        self.posterior_encoder = PosteriorEncoder(
            in_channels=aux_channels,
            out_channels=hidden_channels,
            hidden_channels=hidden_channels,
            kernel_size=posterior_encoder_kernel_size,
            layers=posterior_encoder_layers,
            stacks=posterior_encoder_stacks,
            base_dilation=posterior_encoder_base_dilation,
            global_channels=global_channels,
            dropout_rate=posterior_encoder_dropout_rate,
            use_weight_norm=use_weight_norm_in_posterior_encoder, )
        self.flow = ResidualAffineCouplingBlock(
            in_channels=hidden_channels,
            hidden_channels=hidden_channels,
            flows=flow_flows,
            kernel_size=flow_kernel_size,
            base_dilation=flow_base_dilation,
            layers=flow_layers,
            global_channels=global_channels,
            dropout_rate=flow_dropout_rate,
            use_weight_norm=use_weight_norm_in_flow,
            use_only_mean=use_only_mean_in_flow, )
        # TODO: Add deterministic version as an option
        self.duration_predictor = StochasticDurationPredictor(
            channels=hidden_channels,
            kernel_size=stochastic_duration_predictor_kernel_size,
            dropout_rate=stochastic_duration_predictor_dropout_rate,
            flows=stochastic_duration_predictor_flows,
            dds_conv_layers=stochastic_duration_predictor_dds_conv_layers,
            global_channels=global_channels, )

        self.upsample_factor = int(np.prod(decoder_upsample_scales))
        self.spks = None
        if spks is not None and spks > 1:
            assert global_channels > 0
            self.spks = spks
            self.global_emb = nn.Embedding(spks, global_channels)
        self.spk_embed_dim = None
        if spk_embed_dim is not None and spk_embed_dim > 0:
            assert global_channels > 0
            self.spk_embed_dim = spk_embed_dim
            self.spemb_proj = nn.Linear(spk_embed_dim, global_channels)
        self.langs = None
        if langs is not None and langs > 1:
            assert global_channels > 0
            self.langs = langs
            self.lang_emb = nn.Embedding(langs, global_channels)

        # delayed import
        from paddlespeech.t2s.models.vits.monotonic_align import maximum_path

        self.maximum_path = maximum_path
        self.pad1d = nn.Pad1D(
            padding=[1, 0],
            mode='constant',
            data_format='NLC', )

    def forward(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            feats: paddle.Tensor,
            feats_lengths: paddle.Tensor,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor,
               paddle.Tensor, paddle.Tensor,
               Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor,
                     paddle.Tensor, paddle.Tensor, ], ]:
        """Calculate forward propagation.
        Args:
            text (Tensor):
                Text index tensor (B, T_text).
            text_lengths (Tensor):
                Text length tensor (B,).
            feats (Tensor):
                Feature tensor (B, aux_channels, T_feats).
            feats_lengths (Tensor):
                Feature length tensor (B,).
            sids (Optional[Tensor]):
                Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]):
                Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]):
                Language index tensor (B,) or (B, 1).
        Returns:
            Tensor:
                Waveform tensor (B, 1, segment_size * upsample_factor).
            Tensor:
                Duration negative log-likelihood (NLL) tensor (B,).
            Tensor:
                Monotonic attention weight tensor (B, 1, T_feats, T_text).
            Tensor:
                Segments start index tensor (B,).
            Tensor:
                Text mask tensor (B, 1, T_text).
            Tensor: 
                Feature mask tensor (B, 1, T_feats).
                tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]:
                    - Tensor: Posterior encoder hidden representation (B, H, T_feats).
                    - Tensor: Flow hidden representation (B, H, T_feats).
                    - Tensor: Expanded text encoder projected mean (B, H, T_feats).
                    - Tensor: Expanded text encoder projected scale (B, H, T_feats).
                    - Tensor: Posterior encoder projected mean (B, H, T_feats).
                    - Tensor: Posterior encoder projected scale (B, H, T_feats).
        """
        # forward text encoder
        x, m_p, logs_p, x_mask = self.text_encoder(text, text_lengths)

        # calculate global conditioning
        g = None
        if self.spks is not None:
            # speaker one-hot vector embedding: (B, global_channels, 1)
            g = self.global_emb(paddle.reshape(sids, [-1])).unsqueeze(-1)
        if self.spk_embed_dim is not None:
            # pretreined speaker embedding, e.g., X-vector (B, global_channels, 1)
            g_ = self.spemb_proj(F.normalize(spembs)).unsqueeze(-1)
            if g is None:
                g = g_
            else:
                g = g + g_
        if self.langs is not None:
            # language one-hot vector embedding: (B, global_channels, 1)
            g_ = self.lang_emb(paddle.reshape(lids, [-1])).unsqueeze(-1)
            if g is None:
                g = g_
            else:
                g = g + g_

        # forward posterior encoder
        z, m_q, logs_q, y_mask = self.posterior_encoder(
            feats, feats_lengths, g=g)

        # forward flow
        # (B, H, T_feats)
        z_p = self.flow(z, y_mask, g=g)

        # monotonic alignment search
        with paddle.no_grad():
            # negative cross-entropy
            # (B, H, T_text)
            s_p_sq_r = paddle.exp(-2 * logs_p)
            # (B, 1, T_text)
            tmp1 = -0.5 * math.log(2 * math.pi) - logs_p
            neg_x_ent_1 = paddle.sum(
                tmp1,
                [1],
                keepdim=True, )
            # (B, T_feats, H) x (B, H, T_text) = (B, T_feats, T_text)
            neg_x_ent_2 = paddle.matmul(
                -0.5 * (z_p**2).transpose([0, 2, 1]),
                s_p_sq_r, )
            # (B, T_feats, H) x (B, H, T_text) = (B, T_feats, T_text)
            neg_x_ent_3 = paddle.matmul(
                z_p.transpose([0, 2, 1]),
                (m_p * s_p_sq_r), )
            # (B, 1, T_text)
            tmp2 = -0.5 * (m_p**2) * s_p_sq_r
            neg_x_ent_4 = paddle.sum(
                tmp2,
                [1],
                keepdim=True, )
            # (B, T_feats, T_text)
            neg_x_ent = neg_x_ent_1 + neg_x_ent_2 + neg_x_ent_3 + neg_x_ent_4
            # (B, 1, T_feats, T_text)
            attn_mask = paddle.unsqueeze(x_mask, 2) * paddle.unsqueeze(y_mask,
                                                                       -1)
            # monotonic attention weight: (B, 1, T_feats, T_text)
            attn = (self.maximum_path(
                neg_x_ent,
                attn_mask.squeeze(1), ).unsqueeze(1).detach())

        # forward duration predictor
        # (B, 1, T_text)
        w = attn.sum(2)
        dur_nll = self.duration_predictor(x, x_mask, w=w, g=g)
        dur_nll = dur_nll / paddle.sum(x_mask)
        # expand the length to match with the feature sequence
        # (B, T_feats, T_text) x (B, T_text, H) -> (B, H, T_feats)
        m_p = paddle.matmul(attn.squeeze(1),
                            m_p.transpose([0, 2, 1])).transpose([0, 2, 1])
        # (B, T_feats, T_text) x (B, T_text, H) -> (B, H, T_feats)
        logs_p = paddle.matmul(attn.squeeze(1),
                               logs_p.transpose([0, 2, 1])).transpose([0, 2, 1])

        # get random segments
        z_segments, z_start_idxs = get_random_segments(
            z,
            feats_lengths,
            self.segment_size, )

        # forward decoder with random segments
        wav = self.decoder(z_segments, g=g)

        return (wav, dur_nll, attn, z_start_idxs, x_mask, y_mask,
                (z, z_p, m_p, logs_p, m_q, logs_q), )

    def inference(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            feats: Optional[paddle.Tensor]=None,
            feats_lengths: Optional[paddle.Tensor]=None,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None,
            dur: Optional[paddle.Tensor]=None,
            noise_scale: float=0.667,
            noise_scale_dur: float=0.8,
            alpha: float=1.0,
            max_len: Optional[int]=None,
            use_teacher_forcing: bool=False,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Run inference.
        Args:
            text (Tensor):
                Input text index tensor (B, T_text,).
            text_lengths (Tensor):
                Text length tensor (B,).
            feats (Tensor):
                Feature tensor (B, aux_channels, T_feats,).
            feats_lengths (Tensor):
                Feature length tensor (B,).
            sids (Optional[Tensor]):
                Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]):
                Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]):
                Language index tensor (B,) or (B, 1).
            dur (Optional[Tensor]):
                Ground-truth duration (B, T_text,). If provided,
                skip the prediction of durations (i.e., teacher forcing).
            noise_scale (float):
                Noise scale parameter for flow.
            noise_scale_dur (float):
                Noise scale parameter for duration predictor.
            alpha (float):
                Alpha parameter to control the speed of generated speech.
            max_len (Optional[int]):
                Maximum length of acoustic feature sequence.
            use_teacher_forcing (bool):
                Whether to use teacher forcing.
        Returns:
            Tensor: 
                Generated waveform tensor (B, T_wav).
            Tensor:
                Monotonic attention weight tensor (B, T_feats, T_text).
            Tensor:
                Duration tensor (B, T_text).
        """
        # encoder
        x, m_p, logs_p, x_mask = self.text_encoder(text, text_lengths)
        g = None
        if self.spks is not None:
            # (B, global_channels, 1)
            g = self.global_emb(paddle.reshape(sids, [-1])).unsqueeze(-1)
        if self.spk_embed_dim is not None:
            # (B, global_channels, 1)
            g_ = self.spemb_proj(F.normalize(spembs.unsqueeze(0))).unsqueeze(-1)
            if g is None:
                g = g_
            else:
                g = g + g_
        if self.langs is not None:
            # (B, global_channels, 1)
            g_ = self.lang_emb(paddle.reshape(lids, [-1])).unsqueeze(-1)
            if g is None:
                g = g_
            else:
                g = g + g_

        if use_teacher_forcing:
            # forward posterior encoder
            z, m_q, logs_q, y_mask = self.posterior_encoder(
                feats, feats_lengths, g=g)

            # forward flow
            # (B, H, T_feats)
            z_p = self.flow(z, y_mask, g=g)

            # monotonic alignment search
            # (B, H, T_text)
            s_p_sq_r = paddle.exp(-2 * logs_p)
            # (B, 1, T_text)
            tmp3 = -0.5 * math.log(2 * math.pi) - logs_p
            neg_x_ent_1 = paddle.sum(
                tmp3,
                [1],
                keepdim=True, )
            # (B, T_feats, H) x (B, H, T_text) = (B, T_feats, T_text)
            neg_x_ent_2 = paddle.matmul(
                -0.5 * (z_p**2).transpose([0, 2, 1]),
                s_p_sq_r, )
            # (B, T_feats, H) x (B, H, T_text) = (B, T_feats, T_text)
            neg_x_ent_3 = paddle.matmul(
                z_p.transpose([0, 2, 1]),
                (m_p * s_p_sq_r), )
            # (B, 1, T_text)
            tmp4 = -0.5 * (m_p**2) * s_p_sq_r
            neg_x_ent_4 = paddle.sum(
                tmp4,
                [1],
                keepdim=True, )
            # (B, T_feats, T_text)
            neg_x_ent = neg_x_ent_1 + neg_x_ent_2 + neg_x_ent_3 + neg_x_ent_4
            # (B, 1, T_feats, T_text)
            attn_mask = paddle.unsqueeze(x_mask, 2) * paddle.unsqueeze(y_mask,
                                                                       -1)
            # monotonic attention weight: (B, 1, T_feats, T_text)
            attn = self.maximum_path(
                neg_x_ent,
                attn_mask.squeeze(1), ).unsqueeze(1)
            # (B, 1, T_text)
            dur = attn.sum(2)

            # forward decoder with random segments
            wav = self.decoder(z * y_mask, g=g)
        else:
            # duration
            if dur is None:
                logw = self.duration_predictor(
                    x,
                    x_mask,
                    g=g,
                    inverse=True,
                    noise_scale=noise_scale_dur, )
                w = paddle.exp(logw) * x_mask * alpha
                dur = paddle.ceil(w)
            y_lengths = paddle.cast(
                paddle.clip(paddle.sum(dur, [1, 2]), min=1), dtype='int64')
            y_mask = make_non_pad_mask(y_lengths).unsqueeze(1)
            tmp_a = paddle.cast(paddle.unsqueeze(x_mask, 2), dtype='int64')
            tmp_b = paddle.cast(paddle.unsqueeze(y_mask, -1), dtype='int64')
            attn_mask = tmp_a * tmp_b
            attn = self._generate_path(dur, attn_mask)

            # expand the length to match with the feature sequence
            # (B, T_feats, T_text) x (B, T_text, H) -> (B, H, T_feats)
            m_p = paddle.matmul(
                attn.squeeze(1),
                m_p.transpose([0, 2, 1]), ).transpose([0, 2, 1])
            # (B, T_feats, T_text) x (B, T_text, H) -> (B, H, T_feats)
            logs_p = paddle.matmul(
                attn.squeeze(1),
                logs_p.transpose([0, 2, 1]), ).transpose([0, 2, 1])

            # decoder
            z_p = m_p + paddle.randn(
                paddle.shape(m_p)) * paddle.exp(logs_p) * noise_scale
            z = self.flow(z_p, y_mask.astype(z_p.dtype), g=g, inverse=True)
            wav = self.decoder(
                (z * y_mask.astype(z.dtype))[:, :, :max_len], g=g)

        return wav.squeeze(1), attn.squeeze(1), dur.squeeze(1)

    def voice_conversion(
            self,
            feats: paddle.Tensor=None,
            feats_lengths: paddle.Tensor=None,
            sids_src: Optional[paddle.Tensor]=None,
            sids_tgt: Optional[paddle.Tensor]=None,
            spembs_src: Optional[paddle.Tensor]=None,
            spembs_tgt: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None, ) -> paddle.Tensor:
        """Run voice conversion.
        Args:
            feats (Tensor):
                Feature tensor (B, aux_channels, T_feats,).
            feats_lengths (Tensor):
                Feature length tensor (B,).
            sids_src (Optional[Tensor]):
                Speaker index tensor of source feature (B,) or (B, 1).
            sids_tgt (Optional[Tensor]):
                Speaker index tensor of target feature (B,) or (B, 1).
            spembs_src (Optional[Tensor]):
                Speaker embedding tensor of source feature (B, spk_embed_dim).
            spembs_tgt (Optional[Tensor]):
                Speaker embedding tensor of target feature (B, spk_embed_dim).
            lids (Optional[Tensor]):
                Language index tensor (B,) or (B, 1).
        Returns:
            Tensor:
                Generated waveform tensor (B, T_wav).
        """
        # encoder
        g_src = None
        g_tgt = None
        if self.spks is not None:
            # (B, global_channels, 1)
            g_src = self.global_emb(
                paddle.reshape(sids_src, [-1])).unsqueeze(-1)
            g_tgt = self.global_emb(
                paddle.reshape(sids_tgt, [-1])).unsqueeze(-1)

        if self.spk_embed_dim is not None:
            # (B, global_channels, 1)
            g_src_ = self.spemb_proj(
                F.normalize(spembs_src.unsqueeze(0))).unsqueeze(-1)
            if g_src is None:
                g_src = g_src_
            else:
                g_src = g_src + g_src_

            # (B, global_channels, 1)
            g_tgt_ = self.spemb_proj(
                F.normalize(spembs_tgt.unsqueeze(0))).unsqueeze(-1)
            if g_tgt is None:
                g_tgt = g_tgt_
            else:
                g_tgt = g_tgt + g_tgt_

        if self.langs is not None:
            # (B, global_channels, 1)
            g_ = self.lang_emb(paddle.reshape(lids, [-1])).unsqueeze(-1)

            if g_src is None:
                g_src = g_
            else:
                g_src = g_src + g_

            if g_tgt is None:
                g_tgt = g_
            else:
                g_tgt = g_tgt + g_

        # forward posterior encoder
        z, m_q, logs_q, y_mask = self.posterior_encoder(
            feats, feats_lengths, g=g_src)

        # forward flow
        # (B, H, T_feats)
        z_p = self.flow(z, y_mask, g=g_src)

        # decoder
        z_hat = self.flow(z_p, y_mask, g=g_tgt, inverse=True)
        wav = self.decoder(z_hat * y_mask, g=g_tgt)

        return wav.squeeze(1)

    def _generate_path(self, dur: paddle.Tensor,
                       mask: paddle.Tensor) -> paddle.Tensor:
        """Generate path a.k.a. monotonic attention.
        Args:
            dur (Tensor):
                Duration tensor (B, 1, T_text).
            mask (Tensor):
                Attention mask tensor (B, 1, T_feats, T_text).
        Returns:
            Tensor:
                Path tensor (B, 1, T_feats, T_text).
        """
        b, _, t_y, t_x = paddle.shape(mask)
        cum_dur = paddle.cumsum(dur, -1)
        cum_dur_flat = paddle.reshape(cum_dur, [b * t_x])

        path = paddle.arange(t_y, dtype=dur.dtype)
        path = path.unsqueeze(0) < cum_dur_flat.unsqueeze(1)
        path = paddle.reshape(path, [b, t_x, t_y])
        '''
        path will be like (t_x = 3, t_y = 5):
        [[[1., 1., 0., 0., 0.],      [[[1., 1., 0., 0., 0.],
          [1., 1., 1., 1., 0.],  -->   [0., 0., 1., 1., 0.],
          [1., 1., 1., 1., 1.]]]       [0., 0., 0., 0., 1.]]]
        '''

        path = paddle.cast(path, dtype='float32')
        pad_tmp = self.pad1d(path)[:, :-1]
        path = path - pad_tmp
        return path.unsqueeze(1).transpose(
            [0, 1, 3, 2]) * mask.astype(path.dtype)


================================================
FILE: paddlespeech/t2s/models/vits/monotonic_align/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Maximum path calculation module.

This code is based on https://github.com/jaywalnut310/vits.

"""
import warnings

import numpy as np
import paddle
from numba import njit
from numba import prange

try:
    from .core import maximum_path_c

    is_cython_avalable = True
except ImportError:
    is_cython_avalable = False
    warnings.warn(
        "Cython version is not available. Fallback to 'EXPERIMETAL' numba version. "
        "If you want to use the cython version, please build it as follows: "
        "`cd paddlespeech/t2s/models/vits/monotonic_align; python setup.py build_ext --inplace`"
    )


def maximum_path(neg_x_ent: paddle.Tensor,
                 attn_mask: paddle.Tensor) -> paddle.Tensor:
    """Calculate maximum path.

    Args:
        neg_x_ent (Tensor): Negative X entropy tensor (B, T_feats, T_text).
        attn_mask (Tensor): Attention mask (B, T_feats, T_text).

    Returns:
        Tensor: Maximum path tensor (B, T_feats, T_text).

    """
    dtype = neg_x_ent.dtype
    neg_x_ent = neg_x_ent.numpy().astype(np.float32)
    path = np.zeros(neg_x_ent.shape, dtype=np.int32)
    t_t_max = attn_mask.sum(1)[:, 0].cpu().numpy().astype(np.int32)
    t_s_max = attn_mask.sum(2)[:, 0].cpu().numpy().astype(np.int32)
    if is_cython_avalable:
        maximum_path_c(path, neg_x_ent, t_t_max, t_s_max)
    else:
        maximum_path_numba(path, neg_x_ent, t_t_max, t_s_max)

    return paddle.cast(paddle.to_tensor(path), dtype=dtype)


@njit
def maximum_path_each_numba(path, value, t_y, t_x, max_neg_val=-np.inf):
    """Calculate a single maximum path with numba."""
    index = t_x - 1
    for y in range(t_y):
        for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
            if x == y:
                v_cur = max_neg_val
            else:
                v_cur = value[y - 1, x]
            if x == 0:
                if y == 0:
                    v_prev = 0.0
                else:
                    v_prev = max_neg_val
            else:
                v_prev = value[y - 1, x - 1]
            value[y, x] += max(v_prev, v_cur)

    for y in range(t_y - 1, -1, -1):
        path[y, index] = 1
        if index != 0 and (index == y or
                           value[y - 1, index] < value[y - 1, index - 1]):
            index = index - 1


@njit(parallel=True)
def maximum_path_numba(paths, values, t_ys, t_xs):
    """Calculate batch maximum path with numba."""
    for i in prange(paths.shape[0]):
        maximum_path_each_numba(paths[i], values[i], t_ys[i], t_xs[i])


================================================
FILE: paddlespeech/t2s/models/vits/monotonic_align/core.pyx
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Maximum path calculation module with cython optimization.

This code is copied from https://github.com/jaywalnut310/vits and modifed code format.

"""

cimport cython

from cython.parallel import prange


@cython.boundscheck(False)
@cython.wraparound(False)
cdef void maximum_path_each(int[:, ::1] path, float[:, ::1] value, int t_y, int t_x, float max_neg_val=-1e9) nogil:
    cdef int x
    cdef int y
    cdef float v_prev
    cdef float v_cur
    cdef float tmp
    cdef int index = t_x - 1

    for y in range(t_y):
        for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
            if x == y:
                v_cur = max_neg_val
            else:
                v_cur = value[y - 1, x]
            if x == 0:
                if y == 0:
                    v_prev = 0.0
                else:
                    v_prev = max_neg_val
            else:
                v_prev = value[y - 1, x - 1]
            value[y, x] += max(v_prev, v_cur)

    for y in range(t_y - 1, -1, -1):
        path[y, index] = 1
        if index != 0 and (index == y or value[y - 1, index] < value[y - 1, index - 1]):
            index = index - 1


@cython.boundscheck(False)
@cython.wraparound(False)
cpdef void maximum_path_c(int[:, :, ::1] paths, float[:, :, ::1] values, int[::1] t_ys, int[::1] t_xs) nogil:
    cdef int b = paths.shape[0]
    cdef int i
    for i in prange(b, nogil=True):
        maximum_path_each(paths[i], values[i], t_ys[i], t_xs[i])


================================================
FILE: paddlespeech/t2s/models/vits/monotonic_align/setup.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Setup cython code."""
from Cython.Build import cythonize
from setuptools import Extension
from setuptools import setup
from setuptools.command.build_ext import build_ext as _build_ext


class build_ext(_build_ext):
    """Overwrite build_ext."""

    def finalize_options(self):
        """Prevent numpy from thinking it is still in its setup process."""
        _build_ext.finalize_options(self)
        __builtins__.__NUMPY_SETUP__ = False
        import numpy

        self.include_dirs.append(numpy.get_include())


exts = [Extension(
    name="core",
    sources=["core.pyx"], )]
setup(
    name="monotonic_align",
    ext_modules=cythonize(exts, language_level=3),
    cmdclass={"build_ext": build_ext}, )


================================================
FILE: paddlespeech/t2s/models/vits/posterior_encoder.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Text encoder module in VITS.

This code is based on https://github.com/jaywalnut310/vits.

"""
from typing import Optional
from typing import Tuple

import paddle
from paddle import nn

from paddlespeech.t2s.models.vits.wavenet.wavenet import WaveNet
from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask


class PosteriorEncoder(nn.Layer):
    """Posterior encoder module in VITS.

    This is a module of posterior encoder described in `Conditional Variational
    Autoencoder with Adversarial Learning for End-to-End Text-to-Speech`_.

    .. _`Conditional Variational Autoencoder with Adversarial Learning for End-to-End
        Text-to-Speech`: https://arxiv.org/abs/2006.04558
    """

    def __init__(
            self,
            in_channels: int=513,
            out_channels: int=192,
            hidden_channels: int=192,
            kernel_size: int=5,
            layers: int=16,
            stacks: int=1,
            base_dilation: int=1,
            global_channels: int=-1,
            dropout_rate: float=0.0,
            bias: bool=True,
            use_weight_norm: bool=True, ):
        """Initilialize PosteriorEncoder module.

        Args:
            in_channels (int):
                Number of input channels.
            out_channels (int):
                Number of output channels.
            hidden_channels (int):
                Number of hidden channels.
            kernel_size (int):
                Kernel size in WaveNet.
            layers (int):
                Number of layers of WaveNet.
            stacks (int):
                Number of repeat stacking of WaveNet.
            base_dilation (int):
                Base dilation factor.
            global_channels (int):
                Number of global conditioning channels.
            dropout_rate (float):
                Dropout rate.
            bias (bool):
                Whether to use bias parameters in conv.
            use_weight_norm (bool):
                Whether to apply weight norm.

        """
        super().__init__()

        # define modules
        self.input_conv = nn.Conv1D(in_channels, hidden_channels, 1)
        self.encoder = WaveNet(
            in_channels=-1,
            out_channels=-1,
            kernel_size=kernel_size,
            layers=layers,
            stacks=stacks,
            base_dilation=base_dilation,
            residual_channels=hidden_channels,
            aux_channels=-1,
            gate_channels=hidden_channels * 2,
            skip_channels=hidden_channels,
            global_channels=global_channels,
            dropout_rate=dropout_rate,
            bias=bias,
            use_weight_norm=use_weight_norm,
            use_first_conv=False,
            use_last_conv=False,
            scale_residual=False,
            scale_skip_connect=True, )
        self.proj = nn.Conv1D(hidden_channels, out_channels * 2, 1)

    def forward(
            self,
            x: paddle.Tensor,
            x_lengths: paddle.Tensor,
            g: Optional[paddle.Tensor]=None
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Calculate forward propagation.

        Args:
            x (Tensor):
                Input tensor (B, in_channels, T_feats).
            x_lengths (Tensor):
                Length tensor (B,).
            g (Optional[Tensor]):
                Global conditioning tensor (B, global_channels, 1).

        Returns:
            Tensor:
                Encoded hidden representation tensor (B, out_channels, T_feats).
            Tensor:
                Projected mean tensor (B, out_channels, T_feats).
            Tensor:
                Projected scale tensor (B, out_channels, T_feats).
            Tensor:
                Mask tensor for input tensor (B, 1, T_feats).

        """
        x_mask = make_non_pad_mask(x_lengths).unsqueeze(1)
        x_mask = x_mask.astype(x.dtype)
        x = self.input_conv(x) * x_mask
        x = self.encoder(x, x_mask, g=g)
        stats = self.proj(x) * x_mask
        m, logs = paddle.split(stats, 2, axis=1)
        z = (m + paddle.randn(paddle.shape(m)) * paddle.exp(logs)) * x_mask

        return z, m, logs, x_mask


================================================
FILE: paddlespeech/t2s/models/vits/residual_coupling.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Residual affine coupling modules in VITS.

This code is based on https://github.com/jaywalnut310/vits.

"""
from typing import Optional
from typing import Tuple
from typing import Union

import paddle
from paddle import nn

from paddlespeech.t2s.models.vits.flow import FlipFlow
from paddlespeech.t2s.models.vits.wavenet.wavenet import WaveNet


class ResidualAffineCouplingBlock(nn.Layer):
    """Residual affine coupling block module.

    This is a module of residual affine coupling block, which used as "Flow" in
    `Conditional Variational Autoencoder with Adversarial Learning for End-to-End
    Text-to-Speech`_.

    .. _`Conditional Variational Autoencoder with Adversarial Learning for End-to-End
        Text-to-Speech`: https://arxiv.org/abs/2006.04558

    """

    def __init__(
            self,
            in_channels: int=192,
            hidden_channels: int=192,
            flows: int=4,
            kernel_size: int=5,
            base_dilation: int=1,
            layers: int=4,
            global_channels: int=-1,
            dropout_rate: float=0.0,
            use_weight_norm: bool=True,
            bias: bool=True,
            use_only_mean: bool=True, ):
        """Initilize ResidualAffineCouplingBlock module.

        Args:
            in_channels (int):
                Number of input channels.
            hidden_channels (int):
                Number of hidden channels.
            flows (int):
                Number of flows.
            kernel_size (int):
                Kernel size for WaveNet.
            base_dilation (int):
                Base dilation factor for WaveNet.
            layers (int):
                Number of layers of WaveNet.
            stacks (int):
                Number of stacks of WaveNet.
            global_channels (int):
                Number of global channels.
            dropout_rate (float):
                Dropout rate.
            use_weight_norm (bool):
                Whether to use weight normalization in WaveNet.
            bias (bool):
                Whether to use bias parameters in WaveNet.
            use_only_mean (bool):
                Whether to estimate only mean.

        """
        super().__init__()

        self.flows = nn.LayerList()
        for i in range(flows):
            self.flows.append(
                ResidualAffineCouplingLayer(
                    in_channels=in_channels,
                    hidden_channels=hidden_channels,
                    kernel_size=kernel_size,
                    base_dilation=base_dilation,
                    layers=layers,
                    stacks=1,
                    global_channels=global_channels,
                    dropout_rate=dropout_rate,
                    use_weight_norm=use_weight_norm,
                    bias=bias,
                    use_only_mean=use_only_mean, ))
            self.flows.append(FlipFlow())

    def forward(
            self,
            x: paddle.Tensor,
            x_mask: paddle.Tensor,
            g: Optional[paddle.Tensor]=None,
            inverse: bool=False, ) -> paddle.Tensor:
        """Calculate forward propagation.

        Args:
            x (Tensor):
                Input tensor (B, in_channels, T).
            x_mask (Tensor):
                Length tensor (B, 1, T).
            g (Optional[Tensor]):
                Global conditioning tensor (B, global_channels, 1).
            inverse (bool):
                Whether to inverse the flow.

        Returns:
            Tensor: Output tensor (B, in_channels, T).

        """
        if not inverse:
            for flow in self.flows:
                x, _ = flow(x, x_mask, g=g, inverse=inverse)
        else:
            for flow in reversed(self.flows):
                x = flow(x, x_mask, g=g, inverse=inverse)
        return x


class ResidualAffineCouplingLayer(nn.Layer):
    """Residual affine coupling layer."""

    def __init__(
            self,
            in_channels: int=192,
            hidden_channels: int=192,
            kernel_size: int=5,
            base_dilation: int=1,
            layers: int=5,
            stacks: int=1,
            global_channels: int=-1,
            dropout_rate: float=0.0,
            use_weight_norm: bool=True,
            bias: bool=True,
            use_only_mean: bool=True, ):
        """Initialzie ResidualAffineCouplingLayer module.

        Args:
            in_channels (int):
                Number of input channels.
            hidden_channels (int):
                Number of hidden channels.
            kernel_size (int):
                Kernel size for WaveNet.
            base_dilation (int):
                Base dilation factor for WaveNet.
            layers (int):
                Number of layers of WaveNet.
            stacks (int):
                Number of stacks of WaveNet.
            global_channels (int):
                Number of global channels.
            dropout_rate (float):
                Dropout rate.
            use_weight_norm (bool):
                Whether to use weight normalization in WaveNet.
            bias (bool):
                Whether to use bias parameters in WaveNet.
            use_only_mean (bool):
                Whether to estimate only mean.

        """
        assert in_channels % 2 == 0, "in_channels should be divisible by 2"
        super().__init__()
        self.half_channels = in_channels // 2
        self.use_only_mean = use_only_mean

        # define modules
        self.input_conv = nn.Conv1D(
            self.half_channels,
            hidden_channels,
            1, )
        self.encoder = WaveNet(
            in_channels=-1,
            out_channels=-1,
            kernel_size=kernel_size,
            layers=layers,
            stacks=stacks,
            base_dilation=base_dilation,
            residual_channels=hidden_channels,
            aux_channels=-1,
            gate_channels=hidden_channels * 2,
            skip_channels=hidden_channels,
            global_channels=global_channels,
            dropout_rate=dropout_rate,
            bias=bias,
            use_weight_norm=use_weight_norm,
            use_first_conv=False,
            use_last_conv=False,
            scale_residual=False,
            scale_skip_connect=True, )
        if use_only_mean:
            self.proj = nn.Conv1D(
                hidden_channels,
                self.half_channels,
                1, )
        else:
            self.proj = nn.Conv1D(
                hidden_channels,
                self.half_channels * 2,
                1, )

        weight = paddle.zeros(paddle.shape(self.proj.weight))

        self.proj.weight = paddle.create_parameter(
            shape=weight.shape,
            dtype=str(weight.numpy().dtype),
            default_initializer=paddle.nn.initializer.Assign(weight))

        bias = paddle.zeros(paddle.shape(self.proj.bias))

        self.proj.bias = paddle.create_parameter(
            shape=bias.shape,
            dtype=str(bias.numpy().dtype),
            default_initializer=paddle.nn.initializer.Assign(bias))

    def forward(
            self,
            x: paddle.Tensor,
            x_mask: paddle.Tensor,
            g: Optional[paddle.Tensor]=None,
            inverse: bool=False,
    ) -> Union[paddle.Tensor, Tuple[paddle.Tensor, paddle.Tensor]]:
        """Calculate forward propagation.

        Args:
            x (Tensor):
                Input tensor (B, in_channels, T).
            x_lengths (Tensor):
                Length tensor (B,).
            g (Optional[Tensor]):
                Global conditioning tensor (B, global_channels, 1).
            inverse (bool):
                Whether to inverse the flow.

        Returns:
            Tensor:
                Output tensor (B, in_channels, T).
            Tensor:
                Log-determinant tensor for NLL (B,) if not inverse.

        """
        xa, xb = paddle.split(x, 2, axis=1)
        h = self.input_conv(xa) * x_mask
        h = self.encoder(h, x_mask, g=g)
        stats = self.proj(h) * x_mask
        if not self.use_only_mean:
            m, logs = paddle.split(stats, 2, axis=1)
        else:
            m = stats
            logs = paddle.zeros(paddle.shape(m))

        if not inverse:
            xb = m + xb * paddle.exp(logs) * x_mask
            x = paddle.concat([xa, xb], 1)
            logdet = paddle.sum(logs, [1, 2])
            return x, logdet
        else:
            xb = (xb - m) * paddle.exp(-logs) * x_mask
            x = paddle.concat([xa, xb], 1)
            return x


================================================
FILE: paddlespeech/t2s/models/vits/text_encoder.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Text encoder module in VITS.

This code is based on https://github.com/jaywalnut310/vits.

"""
import math
from typing import Tuple

import paddle
from paddle import nn

from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask
from paddlespeech.t2s.modules.transformer.encoder import ConformerEncoder as Encoder
from paddlespeech.utils.initialize import normal_


class TextEncoder(nn.Layer):
    """Text encoder module in VITS.

    This is a module of text encoder described in `Conditional Variational Autoencoder
    with Adversarial Learning for End-to-End Text-to-Speech`_.

    Instead of the relative positional Transformer, we use conformer architecture as
    the encoder module, which contains additional convolution layers.

    .. _`Conditional Variational Autoencoder with Adversarial Learning for End-to-End
        Text-to-Speech`: https://arxiv.org/abs/2006.04558

    """

    def __init__(
            self,
            vocabs: int,
            attention_dim: int=192,
            attention_heads: int=2,
            linear_units: int=768,
            blocks: int=6,
            positionwise_layer_type: str="conv1d",
            positionwise_conv_kernel_size: int=3,
            positional_encoding_layer_type: str="rel_pos",
            self_attention_layer_type: str="rel_selfattn",
            activation_type: str="swish",
            normalize_before: bool=True,
            use_macaron_style: bool=False,
            use_conformer_conv: bool=False,
            conformer_kernel_size: int=7,
            dropout_rate: float=0.1,
            positional_dropout_rate: float=0.0,
            attention_dropout_rate: float=0.0, ):
        """Initialize TextEncoder module.

        Args:
            vocabs (int):
                Vocabulary size.
            attention_dim (int):
                Attention dimension.
            attention_heads (int):
                Number of attention heads.
            linear_units (int):
                Number of linear units of positionwise layers.
            blocks (int):
                Number of encoder blocks.
            positionwise_layer_type (str):
                Positionwise layer type.
            positionwise_conv_kernel_size (int):
                Positionwise layer's kernel size.
            positional_encoding_layer_type (str):
                Positional encoding layer type.
            self_attention_layer_type (str):
                Self-attention layer type.
            activation_type (str):
                Activation function type.
            normalize_before (bool):
                Whether to apply LayerNorm before attention.
            use_macaron_style (bool):
                Whether to use macaron style components.
            use_conformer_conv (bool):
                Whether to use conformer conv layers.
            conformer_kernel_size (int):
                Conformer's conv kernel size.
            dropout_rate (float):
                Dropout rate.
            positional_dropout_rate (float):
                Dropout rate for positional encoding.
            attention_dropout_rate (float):
                Dropout rate for attention.

        """
        super().__init__()
        # store for forward
        self.attention_dim = attention_dim

        # define modules
        self.emb = nn.Embedding(vocabs, attention_dim)

        self.encoder = Encoder(
            idim=-1,
            input_layer=None,
            attention_dim=attention_dim,
            attention_heads=attention_heads,
            linear_units=linear_units,
            num_blocks=blocks,
            dropout_rate=dropout_rate,
            positional_dropout_rate=positional_dropout_rate,
            attention_dropout_rate=attention_dropout_rate,
            normalize_before=normalize_before,
            positionwise_layer_type=positionwise_layer_type,
            positionwise_conv_kernel_size=positionwise_conv_kernel_size,
            macaron_style=use_macaron_style,
            pos_enc_layer_type=positional_encoding_layer_type,
            selfattention_layer_type=self_attention_layer_type,
            activation_type=activation_type,
            use_cnn_module=use_conformer_conv,
            cnn_module_kernel=conformer_kernel_size, )
        self.proj = nn.Conv1D(attention_dim, attention_dim * 2, 1)

        self.reset_parameters()

    def forward(
            self,
            x: paddle.Tensor,
            x_lengths: paddle.Tensor,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Calculate forward propagation.

        Args:
            x (Tensor):
                Input index tensor (B, T_text).
            x_lengths (Tensor):
                Length tensor (B,).

        Returns:
            Tensor:
                Encoded hidden representation (B, attention_dim, T_text).
            Tensor:
                Projected mean tensor (B, attention_dim, T_text).
            Tensor:
                Projected scale tensor (B, attention_dim, T_text).
            Tensor:
                Mask tensor for input tensor (B, 1, T_text).

        """
        x = self.emb(x) * math.sqrt(self.attention_dim)
        x_mask = make_non_pad_mask(x_lengths).unsqueeze(1)
        x_mask = x_mask.astype(x.dtype)
        # encoder assume the channel last (B, T_text, attention_dim)
        # but mask shape shoud be (B, 1, T_text)
        x, _ = self.encoder(x, x_mask)

        # convert the channel first (B, attention_dim, T_text)
        x = paddle.transpose(x, [0, 2, 1])
        stats = self.proj(x) * x_mask
        m, logs = paddle.split(stats, 2, axis=1)

        return x, m, logs, x_mask

    def reset_parameters(self):
        normal_(self.emb.weight, mean=0.0, std=self.attention_dim**-0.5)
        if self.emb._padding_idx is not None:
            with paddle.no_grad():
                self.emb.weight[self.emb._padding_idx] = 0


================================================
FILE: paddlespeech/t2s/models/vits/transform.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Flow-related transformation.

This code is based on https://github.com/bayesiains/nflows.

"""
import numpy as np
import paddle
from paddle import nn
from paddle.nn import functional as F

from paddlespeech.t2s.modules.nets_utils import paddle_gather

DEFAULT_MIN_BIN_WIDTH = 1e-3
DEFAULT_MIN_BIN_HEIGHT = 1e-3
DEFAULT_MIN_DERIVATIVE = 1e-3


def piecewise_rational_quadratic_transform(
        inputs,
        unnormalized_widths,
        unnormalized_heights,
        unnormalized_derivatives,
        inverse=False,
        tails=None,
        tail_bound=1.0,
        # for dygraph-to-static
        min_bin_width=1e-3,
        min_bin_height=1e-3,
        min_derivative=1e-3, ):
    if tails is None:
        spline_fn = rational_quadratic_spline
        spline_kwargs = {}
    else:
        spline_fn = unconstrained_rational_quadratic_spline
        spline_kwargs = {"tails": tails, "tail_bound": tail_bound}

    outputs, logabsdet = spline_fn(
        inputs=inputs,
        unnormalized_widths=unnormalized_widths,
        unnormalized_heights=unnormalized_heights,
        unnormalized_derivatives=unnormalized_derivatives,
        inverse=inverse,
        min_bin_width=min_bin_width,
        min_bin_height=min_bin_height,
        min_derivative=min_derivative,
        **spline_kwargs)
    return outputs, logabsdet


def mask_preprocess(x, mask):
    # bins.dtype = int32
    B, C, T, bins = paddle.shape(x)
    mask_int = paddle.cast(mask, dtype='int64')
    # paddle.sum 输入是 int32 或 bool 的时候，输出是 int64
    # paddle.zeros (fill_constant) 的 shape 会被强制转成 int32 类型
    new_x = paddle.zeros([paddle.sum(mask_int), bins])
    for i in range(bins):
        new_x[:, i] = x[:, :, :, i][mask]
    return new_x


def unconstrained_rational_quadratic_spline(
        inputs,
        unnormalized_widths,
        unnormalized_heights,
        unnormalized_derivatives,
        inverse=False,
        tails="linear",
        tail_bound=1.0,
        # for dygraph-to-static
        min_bin_width=1e-3,
        min_bin_height=1e-3,
        min_derivative=1e-3, ):
    inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound)
    outside_interval_mask = ~inside_interval_mask
    # for dygraph to static
    # 这里用 paddle.shape(x) 然后调用 zeros 会得到一个全 -1 shape 的 var
    # 如果用 x.shape 的话可以保留确定的维度
    outputs = paddle.zeros(inputs.shape)
    logabsdet = paddle.zeros(inputs.shape)
    if tails == "linear":
        # 注意 padding 的参数顺序
        pad2d = nn.Pad2D(padding=[1, 1, 0, 0], mode='constant')
        unnormalized_derivatives = pad2d(unnormalized_derivatives)
        constant = np.log(np.exp(1 - min_derivative) - 1)
        unnormalized_derivatives[..., 0] = constant
        unnormalized_derivatives[..., -1] = constant
        # for dygraph to static
        tmp = inputs[outside_interval_mask]
        outputs[outside_interval_mask] = tmp
        logabsdet[outside_interval_mask] = 0
    else:
        raise RuntimeError("{} tails are not implemented.".format(tails))

    unnormalized_widths = mask_preprocess(unnormalized_widths,
                                          inside_interval_mask)
    unnormalized_heights = mask_preprocess(unnormalized_heights,
                                           inside_interval_mask)
    unnormalized_derivatives = mask_preprocess(unnormalized_derivatives,
                                               inside_interval_mask)

    (outputs[inside_interval_mask],
     logabsdet[inside_interval_mask], ) = rational_quadratic_spline(
         inputs=inputs[inside_interval_mask],
         unnormalized_widths=unnormalized_widths,
         unnormalized_heights=unnormalized_heights,
         unnormalized_derivatives=unnormalized_derivatives,
         inverse=inverse,
         left=-tail_bound,
         right=tail_bound,
         bottom=-tail_bound,
         top=tail_bound,
         min_bin_width=min_bin_width,
         min_bin_height=min_bin_height,
         min_derivative=min_derivative, )

    return outputs, logabsdet


def rational_quadratic_spline(
        inputs,
        unnormalized_widths,
        unnormalized_heights,
        unnormalized_derivatives,
        inverse=False,
        left=0.0,
        right=1.0,
        bottom=0.0,
        top=1.0,
        # for dygraph-to-static
        min_bin_width=1e-3,
        min_bin_height=1e-3,
        min_derivative=1e-3, ):
    # for dygraph to static
    # if paddle.min(inputs) < left or paddle.max(inputs) > right:
    #     raise ValueError("Input to a transform is not within its domain")
    pad1d = nn.Pad1D(
        padding=[1, 0],
        mode='constant',
        data_format='NCL', )

    num_bins = unnormalized_widths.shape[-1]
    # for dygraph to static
    # if min_bin_width * num_bins > 1.0:
    #     raise ValueError("Minimal bin width too large for the number of bins")
    # if min_bin_height * num_bins > 1.0:
    #     raise ValueError("Minimal bin height too large for the number of bins")

    widths = F.softmax(unnormalized_widths, axis=-1)
    widths = min_bin_width + (1 - min_bin_width * num_bins) * widths
    cumwidths = paddle.cumsum(widths, axis=-1)

    cumwidths = pad1d(cumwidths.unsqueeze(0)).squeeze()
    cumwidths = (right - left) * cumwidths + left
    cumwidths[..., 0] = left
    cumwidths[..., -1] = right
    widths = cumwidths[..., 1:] - cumwidths[..., :-1]

    derivatives = min_derivative + F.softplus(unnormalized_derivatives)

    heights = F.softmax(unnormalized_heights, axis=-1)
    heights = min_bin_height + (1 - min_bin_height * num_bins) * heights
    cumheights = paddle.cumsum(heights, axis=-1)
    cumheights = pad1d(cumheights.unsqueeze(0)).squeeze()
    cumheights = (top - bottom) * cumheights + bottom
    cumheights[..., 0] = bottom
    cumheights[..., -1] = top
    heights = cumheights[..., 1:] - cumheights[..., :-1]

    if inverse:
        bin_idx = _searchsorted(cumheights, inputs)[..., None]
    else:
        bin_idx = _searchsorted(cumwidths, inputs)[..., None]
    input_cumwidths = paddle_gather(cumwidths, -1, bin_idx)[..., 0]
    input_bin_widths = paddle_gather(widths, -1, bin_idx)[..., 0]

    input_cumheights = paddle_gather(cumheights, -1, bin_idx)[..., 0]
    delta = heights / widths
    input_delta = paddle_gather(delta, -1, bin_idx)[..., 0]

    input_derivatives = paddle_gather(derivatives, -1, bin_idx)[..., 0]
    input_derivatives_plus_one = paddle_gather(derivatives[..., 1:], -1,
                                               bin_idx)[..., 0]

    input_heights = paddle_gather(heights, -1, bin_idx)[..., 0]

    if inverse:
        a = (inputs - input_cumheights) * (
            input_derivatives + input_derivatives_plus_one - 2 * input_delta
        ) + input_heights * (input_delta - input_derivatives)
        b = input_heights * input_derivatives - (inputs - input_cumheights) * (
            input_derivatives + input_derivatives_plus_one - 2 * input_delta)
        c = -input_delta * (inputs - input_cumheights)

        discriminant = b.pow(2) - 4 * a * c
        assert (discriminant >= 0).all()

        root = (2 * c) / (-b - paddle.sqrt(discriminant))
        outputs = root * input_bin_widths + input_cumwidths

        theta_one_minus_theta = root * (1 - root)
        denominator = input_delta + (
            (input_derivatives + input_derivatives_plus_one - 2 * input_delta
             ) * theta_one_minus_theta)
        derivative_numerator = input_delta.pow(2) * (
            input_derivatives_plus_one * root.pow(2) + 2 * input_delta *
            theta_one_minus_theta + input_derivatives * (1 - root).pow(2))
        logabsdet = paddle.log(derivative_numerator) - 2 * paddle.log(
            denominator)

        return outputs, -logabsdet
    else:
        theta = (inputs - input_cumwidths) / input_bin_widths
        theta_one_minus_theta = theta * (1 - theta)

        numerator = input_heights * (input_delta * theta.pow(2) +
                                     input_derivatives * theta_one_minus_theta)
        denominator = input_delta + (
            (input_derivatives + input_derivatives_plus_one - 2 * input_delta
             ) * theta_one_minus_theta)
        outputs = input_cumheights + numerator / denominator

        derivative_numerator = input_delta.pow(2) * (
            input_derivatives_plus_one * theta.pow(2) + 2 * input_delta *
            theta_one_minus_theta + input_derivatives * (1 - theta).pow(2))
        logabsdet = paddle.log(derivative_numerator) - 2 * paddle.log(
            denominator)

        return outputs, logabsdet


def _searchsorted(bin_locations, inputs, eps=1e-6):
    bin_locations[..., -1] += eps
    mask = inputs[..., None] >= bin_locations
    mask_int = paddle.cast(mask, dtype='int64')
    out = paddle.sum(mask_int, axis=-1) - 1
    return out


================================================
FILE: paddlespeech/t2s/models/vits/vits.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""VITS module"""
import math
from typing import Any
from typing import Dict
from typing import Optional

import paddle
from paddle import nn
from typeguard import typechecked

from paddlespeech.t2s.models.hifigan import HiFiGANMultiPeriodDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANMultiScaleMultiPeriodDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANPeriodDiscriminator
from paddlespeech.t2s.models.hifigan import HiFiGANScaleDiscriminator
from paddlespeech.t2s.models.vits.generator import VITSGenerator
from paddlespeech.utils.initialize import _calculate_fan_in_and_fan_out
from paddlespeech.utils.initialize import kaiming_uniform_
from paddlespeech.utils.initialize import normal_
from paddlespeech.utils.initialize import ones_
from paddlespeech.utils.initialize import uniform_
from paddlespeech.utils.initialize import zeros_

AVAILABLE_GENERATERS = {
    "vits_generator": VITSGenerator,
}
AVAILABLE_DISCRIMINATORS = {
    "hifigan_period_discriminator":
    HiFiGANPeriodDiscriminator,
    "hifigan_scale_discriminator":
    HiFiGANScaleDiscriminator,
    "hifigan_multi_period_discriminator":
    HiFiGANMultiPeriodDiscriminator,
    "hifigan_multi_scale_discriminator":
    HiFiGANMultiScaleDiscriminator,
    "hifigan_multi_scale_multi_period_discriminator":
    HiFiGANMultiScaleMultiPeriodDiscriminator,
}


class VITS(nn.Layer):
    """VITS module (generator + discriminator).
    This is a module of VITS described in `Conditional Variational Autoencoder
    with Adversarial Learning for End-to-End Text-to-Speech`_.
    .. _`Conditional Variational Autoencoder with Adversarial Learning for End-to-End
        Text-to-Speech`: https://arxiv.org/abs/2006.04558
    """

    @typechecked
    def __init__(
            self,
            # generator related
            idim: int,
            odim: int,
            sampling_rate: int=22050,
            generator_type: str="vits_generator",
            generator_params: Dict[str, Any]={
                "hidden_channels": 192,
                "spks": None,
                "langs": None,
                "spk_embed_dim": None,
                "global_channels": -1,
                "segment_size": 32,
                "text_encoder_attention_heads": 2,
                "text_encoder_ffn_expand": 4,
                "text_encoder_blocks": 6,
                "text_encoder_positionwise_layer_type": "conv1d",
                "text_encoder_positionwise_conv_kernel_size": 1,
                "text_encoder_positional_encoding_layer_type": "rel_pos",
                "text_encoder_self_attention_layer_type": "rel_selfattn",
                "text_encoder_activation_type": "swish",
                "text_encoder_normalize_before": True,
                "text_encoder_dropout_rate": 0.1,
                "text_encoder_positional_dropout_rate": 0.0,
                "text_encoder_attention_dropout_rate": 0.0,
                "text_encoder_conformer_kernel_size": 7,
                "use_macaron_style_in_text_encoder": True,
                "use_conformer_conv_in_text_encoder": True,
                "decoder_kernel_size": 7,
                "decoder_channels": 512,
                "decoder_upsample_scales": [8, 8, 2, 2],
                "decoder_upsample_kernel_sizes": [16, 16, 4, 4],
                "decoder_resblock_kernel_sizes": [3, 7, 11],
                "decoder_resblock_dilations": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
                "use_weight_norm_in_decoder": True,
                "posterior_encoder_kernel_size": 5,
                "posterior_encoder_layers": 16,
                "posterior_encoder_stacks": 1,
                "posterior_encoder_base_dilation": 1,
                "posterior_encoder_dropout_rate": 0.0,
                "use_weight_norm_in_posterior_encoder": True,
                "flow_flows": 4,
                "flow_kernel_size": 5,
                "flow_base_dilation": 1,
                "flow_layers": 4,
                "flow_dropout_rate": 0.0,
                "use_weight_norm_in_flow": True,
                "use_only_mean_in_flow": True,
                "stochastic_duration_predictor_kernel_size": 3,
                "stochastic_duration_predictor_dropout_rate": 0.5,
                "stochastic_duration_predictor_flows": 4,
                "stochastic_duration_predictor_dds_conv_layers": 3,
            },
            # discriminator related
            discriminator_type: str="hifigan_multi_scale_multi_period_discriminator",
            discriminator_params: Dict[str, Any]={
                "scales": 1,
                "scale_downsample_pooling": "AvgPool1D",
                "scale_downsample_pooling_params": {
                    "kernel_size": 4,
                    "stride": 2,
                    "padding": 2,
                },
                "scale_discriminator_params": {
                    "in_channels": 1,
                    "out_channels": 1,
                    "kernel_sizes": [15, 41, 5, 3],
                    "channels": 128,
                    "max_downsample_channels": 1024,
                    "max_groups": 16,
                    "bias": True,
                    "downsample_scales": [2, 2, 4, 4, 1],
                    "nonlinear_activation": "leakyrelu",
                    "nonlinear_activation_params": {
                        "negative_slope": 0.1
                    },
                    "use_weight_norm": True,
                    "use_spectral_norm": False,
                },
                "follow_official_norm": False,
                "periods": [2, 3, 5, 7, 11],
                "period_discriminator_params": {
                    "in_channels": 1,
                    "out_channels": 1,
                    "kernel_sizes": [5, 3],
                    "channels": 32,
                    "downsample_scales": [3, 3, 3, 3, 1],
                    "max_downsample_channels": 1024,
                    "bias": True,
                    "nonlinear_activation": "leakyrelu",
                    "nonlinear_activation_params": {
                        "negative_slope": 0.1
                    },
                    "use_weight_norm": True,
                    "use_spectral_norm": False,
                },
            },
            cache_generator_outputs: bool=True, ):
        """Initialize VITS module.
        Args:
            idim (int):
                Input vocabrary size.
            odim (int):
                Acoustic feature dimension. The actual output channels will
                be 1 since VITS is the end-to-end text-to-wave model but for the
                compatibility odim is used to indicate the acoustic feature dimension.
            sampling_rate (int):
                Sampling rate, not used for the training but it will
                be referred in saving waveform during the inference.
            generator_type (str):
                Generator type.
            generator_params (Dict[str, Any]):
                Parameter dict for generator.
            discriminator_type (str):
                Discriminator type.
            discriminator_params (Dict[str, Any]):
                Parameter dict for discriminator.
            cache_generator_outputs (bool):
                Whether to cache generator outputs.
        """
        super().__init__()

        # define modules
        generator_class = AVAILABLE_GENERATERS[generator_type]
        if generator_type == "vits_generator":
            # NOTE: Update parameters for the compatibility.
            #   The idim and odim is automatically decided from input data,
            #   where idim represents #vocabularies and odim represents
            #   the input acoustic feature dimension.
            generator_params.update(vocabs=idim, aux_channels=odim)
        self.generator = generator_class(
            **generator_params, )
        discriminator_class = AVAILABLE_DISCRIMINATORS[discriminator_type]
        self.discriminator = discriminator_class(
            **discriminator_params, )

        # cache
        self.cache_generator_outputs = cache_generator_outputs
        self._cache = None

        # store sampling rate for saving wav file
        # (not used for the training)
        self.fs = sampling_rate

        # store parameters for test compatibility
        self.spks = self.generator.spks
        self.langs = self.generator.langs
        self.spk_embed_dim = self.generator.spk_embed_dim

        self.reuse_cache_gen = True
        self.reuse_cache_dis = True

        self.reset_parameters()
        self.generator.decoder.reset_parameters()
        self.generator.text_encoder.reset_parameters()

    def forward(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            feats: paddle.Tensor,
            feats_lengths: paddle.Tensor,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None,
            forward_generator: bool=True, ) -> Dict[str, Any]:
        """Perform generator forward.
        Args:
            text (Tensor):
                Text index tensor (B, T_text).
            text_lengths (Tensor):
                Text length tensor (B,).
            feats (Tensor):
                Feature tensor (B, T_feats, aux_channels).
            feats_lengths (Tensor):
                Feature length tensor (B,).
            sids (Optional[Tensor]):
                Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]):
                Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]):
                Language index tensor (B,) or (B, 1).
            forward_generator (bool):
                    Whether to forward generator.
        Returns:

        """
        if forward_generator:
            return self._forward_generator(
                text=text,
                text_lengths=text_lengths,
                feats=feats,
                feats_lengths=feats_lengths,
                sids=sids,
                spembs=spembs,
                lids=lids, )
        else:
            return self._forward_discrminator(
                text=text,
                text_lengths=text_lengths,
                feats=feats,
                feats_lengths=feats_lengths,
                sids=sids,
                spembs=spembs,
                lids=lids, )

    def _forward_generator(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            feats: paddle.Tensor,
            feats_lengths: paddle.Tensor,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None, ) -> Dict[str, Any]:
        """Perform generator forward.
        Args:
            text (Tensor):
                Text index tensor (B, T_text).
            text_lengths (Tensor):
                Text length tensor (B,).
            feats (Tensor):
                Feature tensor (B, T_feats, aux_channels).
            feats_lengths (Tensor):
                Feature length tensor (B,).
            sids (Optional[Tensor]):
                Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]):
                Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]):
                Language index tensor (B,) or (B, 1).
        Returns:

        """
        # setup
        feats = feats.transpose([0, 2, 1])

        # calculate generator outputs
        self.reuse_cache_gen = True
        if not self.cache_generator_outputs or self._cache is None:
            self.reuse_cache_gen = False
            outs = self.generator(
                text=text,
                text_lengths=text_lengths,
                feats=feats,
                feats_lengths=feats_lengths,
                sids=sids,
                spembs=spembs,
                lids=lids, )
        else:
            outs = self._cache

        # store cache
        if self.training and self.cache_generator_outputs and not self.reuse_cache_gen:
            self._cache = outs

        return outs

    def _forward_discrminator(
            self,
            text: paddle.Tensor,
            text_lengths: paddle.Tensor,
            feats: paddle.Tensor,
            feats_lengths: paddle.Tensor,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None, ) -> Dict[str, Any]:
        """Perform discriminator forward.
        Args:
            text (Tensor):
                Text index tensor (B, T_text).
            text_lengths (Tensor):
                Text length tensor (B,).
            feats (Tensor):
                Feature tensor (B, T_feats, aux_channels).
            feats_lengths (Tensor):
                Feature length tensor (B,).
            sids (Optional[Tensor]):
                Speaker index tensor (B,) or (B, 1).
            spembs (Optional[Tensor]):
                Speaker embedding tensor (B, spk_embed_dim).
            lids (Optional[Tensor]):
                Language index tensor (B,) or (B, 1).
        Returns:

        """
        # setup
        feats = feats.transpose([0, 2, 1])

        # calculate generator outputs
        self.reuse_cache_dis = True
        if not self.cache_generator_outputs or self._cache is None:
            self.reuse_cache_dis = False
            outs = self.generator(
                text=text,
                text_lengths=text_lengths,
                feats=feats,
                feats_lengths=feats_lengths,
                sids=sids,
                spembs=spembs,
                lids=lids, )
        else:
            outs = self._cache

        # store cache
        if self.cache_generator_outputs and not self.reuse_cache_dis:
            self._cache = outs

        return outs

    def inference(
            self,
            text: paddle.Tensor,
            feats: Optional[paddle.Tensor]=None,
            sids: Optional[paddle.Tensor]=None,
            spembs: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None,
            durations: Optional[paddle.Tensor]=None,
            noise_scale: float=0.667,
            noise_scale_dur: float=0.8,
            alpha: float=1.0,
            max_len: Optional[int]=None,
            use_teacher_forcing: bool=False, ) -> Dict[str, paddle.Tensor]:
        """Run inference.
        Args:
            text (Tensor):
                Input text index tensor (T_text,).
            feats (Tensor):
                Feature tensor (T_feats, aux_channels).
            sids (Tensor):
                Speaker index tensor (1,).
            spembs (Optional[Tensor]):
                Speaker embedding tensor (spk_embed_dim,).
            lids (Tensor):
                Language index tensor (1,).
            durations (Tensor):
                Ground-truth duration tensor (T_text,).
            noise_scale (float):
                Noise scale value for flow.
            noise_scale_dur (float):
                Noise scale value for duration predictor.
            alpha (float):
                Alpha parameter to control the speed of generated speech.
            max_len (Optional[int]):
                Maximum length.
            use_teacher_forcing (bool):
                Whether to use teacher forcing.
        Returns:
            Dict[str, Tensor]:
                * wav (Tensor):
                    Generated waveform tensor (T_wav,).
                * att_w (Tensor):
                    Monotonic attention weight tensor (T_feats, T_text).
                * duration (Tensor):
                    Predicted duration tensor (T_text,).
        """
        # setup
        text = text[None]
        text_lengths = paddle.to_tensor(paddle.shape(text)[1])

        if durations is not None:
            durations = paddle.reshape(durations, [1, 1, -1])

        # inference
        if use_teacher_forcing:
            assert feats is not None
            feats = feats[None].transpose([0, 2, 1])
            feats_lengths = paddle.to_tensor(paddle.shape(feats)[2])
            wav, att_w, dur = self.generator.inference(
                text=text,
                text_lengths=text_lengths,
                feats=feats,
                feats_lengths=feats_lengths,
                sids=sids,
                spembs=spembs,
                lids=lids,
                max_len=max_len,
                use_teacher_forcing=use_teacher_forcing, )
        else:
            wav, att_w, dur = self.generator.inference(
                text=text,
                text_lengths=text_lengths,
                sids=sids,
                spembs=spembs,
                lids=lids,
                dur=durations,
                noise_scale=noise_scale,
                noise_scale_dur=noise_scale_dur,
                alpha=alpha,
                max_len=max_len, )
        return dict(
            wav=paddle.reshape(wav, [-1]), att_w=att_w[0], duration=dur[0])

    def voice_conversion(
            self,
            feats: paddle.Tensor,
            sids_src: Optional[paddle.Tensor]=None,
            sids_tgt: Optional[paddle.Tensor]=None,
            spembs_src: Optional[paddle.Tensor]=None,
            spembs_tgt: Optional[paddle.Tensor]=None,
            lids: Optional[paddle.Tensor]=None, ) -> paddle.Tensor:
        """Run voice conversion.
        Args:
            feats (Tensor):
                Feature tensor (T_feats, aux_channels).
            sids_src (Optional[Tensor]):
                Speaker index tensor of source feature (1,).
            sids_tgt (Optional[Tensor]):
                Speaker index tensor of target feature (1,).
            spembs_src (Optional[Tensor]):
                Speaker embedding tensor of source feature (spk_embed_dim,).
            spembs_tgt (Optional[Tensor]):
                Speaker embedding tensor of target feature (spk_embed_dim,).
            lids (Optional[Tensor]):
                Language index tensor (1,).
        Returns:
            Dict[str, Tensor]:
                * wav (Tensor):
                    Generated waveform tensor (T_wav,).
        """
        assert feats is not None
        feats = feats[None].transpose([0, 2, 1])
        feats_lengths = paddle.to_tensor(paddle.shape(feats)[2])

        sids_none = sids_src is None and sids_tgt is None
        spembs_none = spembs_src is None and spembs_tgt is None

        assert not sids_none or not spembs_none

        wav = self.generator.voice_conversion(
            feats,
            feats_lengths,
            sids_src,
            sids_tgt,
            spembs_src,
            spembs_tgt,
            lids, )

        return dict(wav=paddle.reshape(wav, [-1]))

    def reset_parameters(self):
        def _reset_parameters(module):
            if isinstance(
                    module,
                (nn.Conv1D, nn.Conv1DTranspose, nn.Conv2D, nn.Conv2DTranspose)):
                kaiming_uniform_(module.weight, a=math.sqrt(5))
                if module.bias is not None:
                    fan_in, _ = _calculate_fan_in_and_fan_out(module.weight)
                    if fan_in != 0:
                        bound = 1 / math.sqrt(fan_in)
                        uniform_(module.bias, -bound, bound)

            if isinstance(
                    module,
                (nn.BatchNorm1D, nn.BatchNorm2D, nn.GroupNorm, nn.LayerNorm)):
                ones_(module.weight)
                zeros_(module.bias)

            if isinstance(module, nn.Linear):
                kaiming_uniform_(module.weight, a=math.sqrt(5))
                if module.bias is not None:
                    fan_in, _ = _calculate_fan_in_and_fan_out(module.weight)
                    bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
                    uniform_(module.bias, -bound, bound)

            if isinstance(module, nn.Embedding):
                normal_(module.weight)
                if module._padding_idx is not None:
                    with paddle.no_grad():
                        module.weight[module._padding_idx] = 0

        self.apply(_reset_parameters)


class VITSInference(nn.Layer):
    def __init__(self, model):
        super().__init__()
        self.acoustic_model = model

    def forward(self, text, sids=None):
        out = self.acoustic_model.inference(text, sids=sids)
        wav = out['wav']
        return wav


================================================
FILE: paddlespeech/t2s/models/vits/vits_updater.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from typing import Dict

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from paddle.optimizer.lr import LRScheduler

from paddlespeech.t2s.modules.nets_utils import get_segments
from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
from paddlespeech.t2s.training.updaters.standard_updater import UpdaterState

logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class VITSUpdater(StandardUpdater):
    def __init__(self,
                 model: Layer,
                 optimizers: Dict[str, Optimizer],
                 criterions: Dict[str, Layer],
                 schedulers: Dict[str, LRScheduler],
                 dataloader: DataLoader,
                 generator_train_start_steps: int=0,
                 discriminator_train_start_steps: int=100000,
                 lambda_adv: float=1.0,
                 lambda_mel: float=45.0,
                 lambda_feat_match: float=2.0,
                 lambda_dur: float=1.0,
                 lambda_kl: float=1.0,
                 generator_first: bool=False,
                 output_dir=None):
        # it is designed to hold multiple models
        # 因为输入的是单模型，但是没有用到父类的 init(), 所以需要重新写这部分
        models = {"main": model}
        self.models: Dict[str, Layer] = models
        # self.model = model

        self.model = model._layers if isinstance(model,
                                                 paddle.DataParallel) else model

        self.optimizers = optimizers
        self.optimizer_g: Optimizer = optimizers['generator']
        self.optimizer_d: Optimizer = optimizers['discriminator']

        self.criterions = criterions
        self.criterion_mel = criterions['mel']
        self.criterion_feat_match = criterions['feat_match']
        self.criterion_gen_adv = criterions["gen_adv"]
        self.criterion_dis_adv = criterions["dis_adv"]
        self.criterion_kl = criterions["kl"]

        self.schedulers = schedulers
        self.scheduler_g = schedulers['generator']
        self.scheduler_d = schedulers['discriminator']

        self.dataloader = dataloader

        self.generator_train_start_steps = generator_train_start_steps
        self.discriminator_train_start_steps = discriminator_train_start_steps

        self.lambda_adv = lambda_adv
        self.lambda_mel = lambda_mel
        self.lambda_feat_match = lambda_feat_match
        self.lambda_dur = lambda_dur
        self.lambda_kl = lambda_kl

        if generator_first:
            self.turns = ["generator", "discriminator"]
        else:
            self.turns = ["discriminator", "generator"]

        self.state = UpdaterState(iteration=0, epoch=0)
        self.train_iterator = iter(self.dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}

        for turn in self.turns:
            speech = batch["speech"]
            speech = speech.unsqueeze(1)
            outs = self.model(
                text=batch["text"],
                text_lengths=batch["text_lengths"],
                feats=batch["feats"],
                feats_lengths=batch["feats_lengths"],
                sids=batch.get("spk_id", None),
                spembs=batch.get("spk_emb", None),
                forward_generator=turn == "generator")
            # Generator
            if turn == "generator":
                # parse outputs
                speech_hat_, dur_nll, _, start_idxs, _, z_mask, outs_ = outs
                _, z_p, m_p, logs_p, _, logs_q = outs_
                speech_ = get_segments(
                    x=speech,
                    start_idxs=start_idxs *
                    self.model.generator.upsample_factor,
                    segment_size=self.model.generator.segment_size *
                    self.model.generator.upsample_factor, )

                # calculate discriminator outputs
                p_hat = self.model.discriminator(speech_hat_)
                with paddle.no_grad():
                    # do not store discriminator gradient in generator turn
                    p = self.model.discriminator(speech_)

                # calculate losses
                mel_loss = self.criterion_mel(speech_hat_, speech_)
                kl_loss = self.criterion_kl(z_p, logs_q, m_p, logs_p, z_mask)
                dur_loss = paddle.sum(dur_nll)
                adv_loss = self.criterion_gen_adv(p_hat)
                feat_match_loss = self.criterion_feat_match(p_hat, p)

                mel_loss = mel_loss * self.lambda_mel
                kl_loss = kl_loss * self.lambda_kl
                dur_loss = dur_loss * self.lambda_dur
                adv_loss = adv_loss * self.lambda_adv
                feat_match_loss = feat_match_loss * self.lambda_feat_match
                gen_loss = mel_loss + kl_loss + dur_loss + adv_loss + feat_match_loss

                report("train/generator_loss", float(gen_loss))
                report("train/generator_mel_loss", float(mel_loss))
                report("train/generator_kl_loss", float(kl_loss))
                report("train/generator_dur_loss", float(dur_loss))
                report("train/generator_adv_loss", float(adv_loss))
                report("train/generator_feat_match_loss",
                       float(feat_match_loss))

                losses_dict["generator_loss"] = float(gen_loss)
                losses_dict["generator_mel_loss"] = float(mel_loss)
                losses_dict["generator_kl_loss"] = float(kl_loss)
                losses_dict["generator_dur_loss"] = float(dur_loss)
                losses_dict["generator_adv_loss"] = float(adv_loss)
                losses_dict["generator_feat_match_loss"] = float(
                    feat_match_loss)

                self.optimizer_g.clear_grad()
                gen_loss.backward()

                self.optimizer_g.step()
                # learning rate updates on each epoch.
                if self.state.iteration % self.updates_per_epoch == 0:
                    self.scheduler_g.step()

                # reset cache
                if self.model.reuse_cache_gen or not self.model.training:
                    self.model._cache = None

            # Disctiminator
            elif turn == "discriminator":
                # parse outputs
                speech_hat_, _, _, start_idxs, *_ = outs
                speech_ = get_segments(
                    x=speech,
                    start_idxs=start_idxs *
                    self.model.generator.upsample_factor,
                    segment_size=self.model.generator.segment_size *
                    self.model.generator.upsample_factor, )

                # calculate discriminator outputs
                p_hat = self.model.discriminator(speech_hat_.detach())
                p = self.model.discriminator(speech_)

                # calculate losses
                real_loss, fake_loss = self.criterion_dis_adv(p_hat, p)
                dis_loss = real_loss + fake_loss

                report("train/real_loss", float(real_loss))
                report("train/fake_loss", float(fake_loss))
                report("train/discriminator_loss", float(dis_loss))
                losses_dict["real_loss"] = float(real_loss)
                losses_dict["fake_loss"] = float(fake_loss)
                losses_dict["discriminator_loss"] = float(dis_loss)

                self.optimizer_d.clear_grad()
                dis_loss.backward()

                self.optimizer_d.step()
                # learning rate updates on each epoch.
                if self.state.iteration % self.updates_per_epoch == 0:
                    self.scheduler_d.step()

                # reset cache
                if self.model.reuse_cache_dis or not self.model.training:
                    self.model._cache = None

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class VITSEvaluator(StandardEvaluator):
    def __init__(self,
                 model,
                 criterions: Dict[str, Layer],
                 dataloader: DataLoader,
                 lambda_adv: float=1.0,
                 lambda_mel: float=45.0,
                 lambda_feat_match: float=2.0,
                 lambda_dur: float=1.0,
                 lambda_kl: float=1.0,
                 generator_first: bool=False,
                 output_dir=None):
        # 因为输入的是单模型，但是没有用到父类的 init(), 所以需要重新写这部分
        models = {"main": model}
        self.models: Dict[str, Layer] = models
        # self.model = model
        self.model = model._layers if isinstance(model,
                                                 paddle.DataParallel) else model

        self.criterions = criterions
        self.criterion_mel = criterions['mel']
        self.criterion_feat_match = criterions['feat_match']
        self.criterion_gen_adv = criterions["gen_adv"]
        self.criterion_dis_adv = criterions["dis_adv"]
        self.criterion_kl = criterions["kl"]

        self.dataloader = dataloader

        self.lambda_adv = lambda_adv
        self.lambda_mel = lambda_mel
        self.lambda_feat_match = lambda_feat_match
        self.lambda_dur = lambda_dur
        self.lambda_kl = lambda_kl

        if generator_first:
            self.turns = ["generator", "discriminator"]
        else:
            self.turns = ["discriminator", "generator"]

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        # logging.debug("Evaluate: ")
        self.msg = "Evaluate: "
        losses_dict = {}

        for turn in self.turns:
            speech = batch["speech"]
            speech = speech.unsqueeze(1)
            outs = self.model(
                text=batch["text"],
                text_lengths=batch["text_lengths"],
                feats=batch["feats"],
                feats_lengths=batch["feats_lengths"],
                sids=batch.get("spk_id", None),
                spembs=batch.get("spk_emb", None),
                forward_generator=turn == "generator")
            # Generator
            if turn == "generator":
                # parse outputs
                speech_hat_, dur_nll, _, start_idxs, _, z_mask, outs_ = outs
                _, z_p, m_p, logs_p, _, logs_q = outs_
                speech_ = get_segments(
                    x=speech,
                    start_idxs=start_idxs *
                    self.model.generator.upsample_factor,
                    segment_size=self.model.generator.segment_size *
                    self.model.generator.upsample_factor, )

                # calculate discriminator outputs
                p_hat = self.model.discriminator(speech_hat_)
                with paddle.no_grad():
                    # do not store discriminator gradient in generator turn
                    p = self.model.discriminator(speech_)

                # calculate losses
                mel_loss = self.criterion_mel(speech_hat_, speech_)
                kl_loss = self.criterion_kl(z_p, logs_q, m_p, logs_p, z_mask)
                dur_loss = paddle.sum(dur_nll)
                adv_loss = self.criterion_gen_adv(p_hat)
                feat_match_loss = self.criterion_feat_match(p_hat, p)

                mel_loss = mel_loss * self.lambda_mel
                kl_loss = kl_loss * self.lambda_kl
                dur_loss = dur_loss * self.lambda_dur
                adv_loss = adv_loss * self.lambda_adv
                feat_match_loss = feat_match_loss * self.lambda_feat_match
                gen_loss = mel_loss + kl_loss + dur_loss + adv_loss + feat_match_loss

                report("eval/generator_loss", float(gen_loss))
                report("eval/generator_mel_loss", float(mel_loss))
                report("eval/generator_kl_loss", float(kl_loss))
                report("eval/generator_dur_loss", float(dur_loss))
                report("eval/generator_adv_loss", float(adv_loss))
                report("eval/generator_feat_match_loss", float(feat_match_loss))

                losses_dict["generator_loss"] = float(gen_loss)
                losses_dict["generator_mel_loss"] = float(mel_loss)
                losses_dict["generator_kl_loss"] = float(kl_loss)
                losses_dict["generator_dur_loss"] = float(dur_loss)
                losses_dict["generator_adv_loss"] = float(adv_loss)
                losses_dict["generator_feat_match_loss"] = float(
                    feat_match_loss)

                # reset cache
                if self.model.reuse_cache_gen or not self.model.training:
                    self.model._cache = None

            # Disctiminator
            elif turn == "discriminator":
                # parse outputs
                speech_hat_, _, _, start_idxs, *_ = outs
                speech_ = get_segments(
                    x=speech,
                    start_idxs=start_idxs *
                    self.model.generator.upsample_factor,
                    segment_size=self.model.generator.segment_size *
                    self.model.generator.upsample_factor, )

                # calculate discriminator outputs
                p_hat = self.model.discriminator(speech_hat_.detach())
                p = self.model.discriminator(speech_)

                # calculate losses
                real_loss, fake_loss = self.criterion_dis_adv(p_hat, p)
                dis_loss = real_loss + fake_loss

                report("eval/real_loss", float(real_loss))
                report("eval/fake_loss", float(fake_loss))
                report("eval/discriminator_loss", float(dis_loss))
                losses_dict["real_loss"] = float(real_loss)
                losses_dict["fake_loss"] = float(fake_loss)
                losses_dict["discriminator_loss"] = float(dis_loss)

                # reset cache
                if self.model.reuse_cache_dis or not self.model.training:
                    self.model._cache = None

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/t2s/models/vits/wavenet/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/models/vits/wavenet/residual_block.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import math
from typing import Optional
from typing import Tuple

import paddle
import paddle.nn.functional as F
from paddle import nn


class ResidualBlock(nn.Layer):
    """Residual block module in WaveNet."""

    def __init__(
            self,
            kernel_size: int=3,
            residual_channels: int=64,
            gate_channels: int=128,
            skip_channels: int=64,
            aux_channels: int=80,
            global_channels: int=-1,
            dropout_rate: float=0.0,
            dilation: int=1,
            bias: bool=True,
            scale_residual: bool=False, ):
        """Initialize ResidualBlock module.

        Args:
            kernel_size (int):
                Kernel size of dilation convolution layer.
            residual_channels (int):
                Number of channels for residual connection.
            skip_channels (int):
                Number of channels for skip connection.
            aux_channels (int):
                Number of local conditioning channels.
            dropout (float):
                Dropout probability.
            dilation (int):
                Dilation factor.
            bias (bool):
                Whether to add bias parameter in convolution layers.
            scale_residual (bool):
                Whether to scale the residual outputs.

        """
        super().__init__()
        self.dropout_rate = dropout_rate
        self.residual_channels = residual_channels
        self.skip_channels = skip_channels
        self.scale_residual = scale_residual

        # check
        assert (
            kernel_size - 1) % 2 == 0, "Not support even number kernel size."
        assert gate_channels % 2 == 0

        # dilation conv
        padding = (kernel_size - 1) // 2 * dilation
        self.conv = nn.Conv1D(
            residual_channels,
            gate_channels,
            kernel_size,
            padding=padding,
            dilation=dilation,
            bias_attr=bias, )

        # local conditioning
        if aux_channels > 0:
            self.conv1x1_aux = nn.Conv1D(
                aux_channels, gate_channels, kernel_size=1, bias_attr=False)
        else:
            self.conv1x1_aux = None

        # global conditioning
        if global_channels > 0:
            self.conv1x1_glo = nn.Conv1D(
                global_channels, gate_channels, kernel_size=1, bias_attr=False)
        else:
            self.conv1x1_glo = None

        # conv output is split into two groups
        gate_out_channels = gate_channels // 2

        # NOTE: concat two convs into a single conv for the efficiency
        #   (integrate res 1x1 + skip 1x1 convs)
        self.conv1x1_out = nn.Conv1D(
            gate_out_channels,
            residual_channels + skip_channels,
            kernel_size=1,
            bias_attr=bias)

    def forward(
            self,
            x: paddle.Tensor,
            x_mask: Optional[paddle.Tensor]=None,
            c: Optional[paddle.Tensor]=None,
            g: Optional[paddle.Tensor]=None,
    ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Calculate forward propagation.

        Args:
            x (Tensor): Input tensor (B, residual_channels, T).
            x_mask Optional[paddle.Tensor]: Mask tensor (B, 1, T).
            c (Optional[Tensor]): Local conditioning tensor (B, aux_channels, T).
            g (Optional[Tensor]): Global conditioning tensor (B, global_channels, 1).

        Returns:
            Tensor: Output tensor for residual connection (B, residual_channels, T).
            Tensor: Output tensor for skip connection (B, skip_channels, T).

        """
        residual = x
        x = F.dropout(x, p=self.dropout_rate, training=self.training)
        x = self.conv(x)

        # split into two part for gated activation
        splitdim = 1
        xa, xb = paddle.split(x, 2, axis=splitdim)

        # local conditioning
        if c is not None:
            c = self.conv1x1_aux(c)
            ca, cb = paddle.split(c, 2, axis=splitdim)
            xa, xb = xa + ca, xb + cb

        # global conditioning
        if g is not None:
            g = self.conv1x1_glo(g)
            ga, gb = paddle.split(g, 2, axis=splitdim)
            xa, xb = xa + ga, xb + gb

        x = paddle.tanh(xa) * F.sigmoid(xb)

        # residual + skip 1x1 conv
        x = self.conv1x1_out(x)
        if x_mask is not None:
            x = x * x_mask

        # split integrated conv results
        x, s = paddle.split(
            x, [self.residual_channels, self.skip_channels], axis=1)

        # for residual connection
        x = x + residual
        if self.scale_residual:
            x = x * math.sqrt(0.5)

        return x, s


================================================
FILE: paddlespeech/t2s/models/vits/wavenet/wavenet.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import math
from typing import Optional

import paddle
from paddle import nn

from paddlespeech.t2s.models.vits.wavenet.residual_block import ResidualBlock


class WaveNet(nn.Layer):
    """WaveNet with global conditioning."""

    def __init__(
            self,
            in_channels: int=1,
            out_channels: int=1,
            kernel_size: int=3,
            layers: int=30,
            stacks: int=3,
            base_dilation: int=2,
            residual_channels: int=64,
            aux_channels: int=-1,
            gate_channels: int=128,
            skip_channels: int=64,
            global_channels: int=-1,
            dropout_rate: float=0.0,
            bias: bool=True,
            use_weight_norm: bool=True,
            use_first_conv: bool=False,
            use_last_conv: bool=False,
            scale_residual: bool=False,
            scale_skip_connect: bool=False, ):
        """Initialize WaveNet module.

        Args:
            in_channels (int):
                Number of input channels.
            out_channels (int):
                Number of output channels.
            kernel_size (int):
                Kernel size of dilated convolution.
            layers (int):
                Number of residual block layers.
            stacks (int):
                Number of stacks i.e., dilation cycles.
            base_dilation (int):
                Base dilation factor.
            residual_channels (int):
                Number of channels in residual conv.
            gate_channels (int):
                Number of channels in gated conv.
            skip_channels (int):
                Number of channels in skip conv.
            aux_channels (int):
                Number of channels for local conditioning feature.
            global_channels (int):
                Number of channels for global conditioning feature.
            dropout_rate (float):
                Dropout rate. 0.0 means no dropout applied.
            bias (bool):
                Whether to use bias parameter in conv layer.
            use_weight_norm (bool):
                Whether to use weight norm. If set to true, it will be applied to all of the conv layers.
            use_first_conv (bool):
                Whether to use the first conv layers.
            use_last_conv (bool):
                Whether to use the last conv layers.
            scale_residual (bool):
                Whether to scale the residual outputs.
            scale_skip_connect (bool):
                Whether to scale the skip connection outputs.

        """
        super().__init__()
        self.layers = layers
        self.stacks = stacks
        self.kernel_size = kernel_size
        self.base_dilation = base_dilation
        self.use_first_conv = use_first_conv
        self.use_last_conv = use_last_conv
        self.scale_skip_connect = scale_skip_connect

        # check the number of layers and stacks
        assert layers % stacks == 0
        layers_per_stack = layers // stacks

        # define first convolution
        if self.use_first_conv:
            self.first_conv = nn.Conv1D(
                in_channels, residual_channels, kernel_size=1, bias_attr=True)

        # define residual blocks
        self.conv_layers = nn.LayerList()
        for layer in range(layers):
            dilation = base_dilation**(layer % layers_per_stack)
            conv = ResidualBlock(
                kernel_size=kernel_size,
                residual_channels=residual_channels,
                gate_channels=gate_channels,
                skip_channels=skip_channels,
                aux_channels=aux_channels,
                global_channels=global_channels,
                dilation=dilation,
                dropout_rate=dropout_rate,
                bias=bias,
                scale_residual=scale_residual, )
            self.conv_layers.append(conv)

        # define output layers
        if self.use_last_conv:
            self.last_conv = nn.Sequential(
                nn.ReLU(),
                nn.Conv1D(
                    skip_channels, skip_channels, kernel_size=1,
                    bias_attr=True),
                nn.ReLU(),
                nn.Conv1D(
                    skip_channels, out_channels, kernel_size=1, bias_attr=True),
            )

        # apply weight norm
        if use_weight_norm:
            self.apply_weight_norm()

    def forward(
            self,
            x: paddle.Tensor,
            x_mask: Optional[paddle.Tensor]=None,
            c: Optional[paddle.Tensor]=None,
            g: Optional[paddle.Tensor]=None, ) -> paddle.Tensor:
        """Calculate forward propagation.

        Args:
            x (Tensor):
                Input noise signal (B, 1, T) if use_first_conv else (B, residual_channels, T).
            x_mask (Optional[Tensor]):
                Mask tensor (B, 1, T).
            c (Optional[Tensor]):
                Local conditioning features (B, aux_channels, T).
            g (Optional[Tensor]):
                Global conditioning features (B, global_channels, 1).

        Returns:
            Tensor:
                Output tensor (B, out_channels, T) if use_last_conv else(B, residual_channels, T).

        """
        # encode to hidden representation
        if self.use_first_conv:
            x = self.first_conv(x)

        # residual block
        skips = 0.0
        for f in self.conv_layers:
            x, h = f(x, x_mask=x_mask, c=c, g=g)
            skips = skips + h
        x = skips
        if self.scale_skip_connect:
            x = x * math.sqrt(1.0 / len(self.conv_layers))

        # apply final layers
        if self.use_last_conv:
            x = self.last_conv(x)

        return x

    def apply_weight_norm(self):
        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv2D)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def remove_weight_norm(self):
        def _remove_weight_norm(layer):
            try:
                nn.utils.remove_weight_norm(layer)
            except ValueError:
                pass

        self.apply(_remove_weight_norm)


================================================
FILE: paddlespeech/t2s/models/waveflow.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import time
from typing import List
from typing import Tuple
from typing import Union

import numpy as np
import paddle
from paddle import nn
from paddle.nn import functional as F
from paddle.nn import initializer as I

from paddlespeech.t2s.modules import geometry as geo
from paddlespeech.t2s.utils import checkpoint

__all__ = ["WaveFlow", "ConditionalWaveFlow", "WaveFlowLoss"]


def fold(x, n_group):
    """Fold audio or spectrogram's temporal dimension in to groups.

    Args:
        x(Tensor): 
            The input tensor. shape=(*, time_steps)
        n_group(int): 
            The size of a group.

    Returns:
        Tensor: Folded tensor. shape=(*, time_steps // n_group, group)
    """
    spatial_shape = list(x.shape[:-1])
    time_steps = paddle.shape(x)[-1]
    new_shape = spatial_shape + [time_steps // n_group, n_group]
    return paddle.reshape(x, new_shape)


class UpsampleNet(nn.LayerList):
    """Layer to upsample mel spectrogram to the same temporal resolution with
    the corresponding waveform.

    It consists of several conv2dtranspose layers which perform deconvolution
    on mel and time dimension.

    Args:
        upscale_factors(List[int], optional): 
            Time upsampling factors for each Conv2DTranspose Layer.
            The ``UpsampleNet`` contains ``len(upscale_factor)`` Conv2DTranspose
            Layers. Each upscale_factor is used as the ``stride`` for the
            corresponding Conv2DTranspose. Defaults to [16, 16], this the default
            upsampling factor is 256.

    Notes:
        ``np.prod(upscale_factors)`` should equals the ``hop_length`` of the stft
        transformation used to extract spectrogram features from audio.

        For example, ``16 * 16 = 256``, then the spectrogram extracted with a stft
        transformation whose ``hop_length`` equals 256 is suitable.

        See Also
    
        ``librosa.core.stft``
    """

    def __init__(self, upsample_factors):
        super().__init__()
        for factor in upsample_factors:
            std = math.sqrt(1 / (3 * 2 * factor))
            init = I.Uniform(-std, std)
            self.append(
                nn.utils.weight_norm(
                    nn.Conv2DTranspose(
                        1,
                        1, (3, 2 * factor),
                        padding=(1, factor // 2),
                        stride=(1, factor),
                        weight_attr=init,
                        bias_attr=init)))

        # upsample factors
        self.upsample_factor = np.prod(upsample_factors)
        self.upsample_factors = upsample_factors

    def forward(self, x, trim_conv_artifact=False):
        """Forward pass of the ``UpsampleNet``

        Args:
            x(Tensor): 
                The input spectrogram. shape=(batch_size, input_channels, time_steps)
            trim_conv_artifact(bool, optional, optional): 
                Trim deconvolution artifact at each layer. Defaults to False.

        Returns:
           Tensor: The upsampled spectrogram. shape=(batch_size, input_channels, time_steps * upsample_factor)

        Notes:
            If trim_conv_artifact is ``True``, the output time steps is less
            than ``time_steps * upsample_factors``.
        """
        x = paddle.unsqueeze(x, 1)  # (B, C, T) -> (B, 1, C, T)
        for layer in self:
            x = layer(x)
            if trim_conv_artifact:
                time_cutoff = layer._kernel_size[1] - layer._stride[1]
                x = x[:, :, :, :-time_cutoff]
            x = F.leaky_relu(x, 0.4)
        x = paddle.squeeze(x, 1)  # back to (B, C, T)
        return x


class ResidualBlock(nn.Layer):
    """ResidualBlock, the basic unit of ResidualNet used in WaveFlow.

    It has a conv2d layer, which has causal padding in height dimension and
    same paddign in width dimension. It also has projection for the condition
    and output.

    Args:
        channels (int): 
            Feature size of the input.
        cond_channels (int): 
            Featuer size of the condition.
        kernel_size (Tuple[int]): 
            Kernel size of the Convolution2d applied to the input.
        dilations (int): 
            Dilations of the Convolution2d applied to the input.
    """

    def __init__(self, channels, cond_channels, kernel_size, dilations):
        super().__init__()
        # input conv
        std = math.sqrt(1 / channels * np.prod(kernel_size))
        init = I.Uniform(-std, std)
        receptive_field = [
            1 + (k - 1) * d for (k, d) in zip(kernel_size, dilations)
        ]
        rh, rw = receptive_field
        paddings = [rh - 1, 0, rw // 2, (rw - 1) // 2]  # causal & same
        conv = nn.Conv2D(
            channels,
            2 * channels,
            kernel_size,
            padding=paddings,
            dilation=dilations,
            weight_attr=init,
            bias_attr=init)
        self.conv = nn.utils.weight_norm(conv)
        self.rh = rh
        self.rw = rw
        self.dilations = dilations

        # condition projection
        std = math.sqrt(1 / cond_channels)
        init = I.Uniform(-std, std)
        condition_proj = nn.Conv2D(
            cond_channels,
            2 * channels, (1, 1),
            weight_attr=init,
            bias_attr=init)
        self.condition_proj = nn.utils.weight_norm(condition_proj)

        # parametric residual & skip connection
        std = math.sqrt(1 / channels)
        init = I.Uniform(-std, std)
        out_proj = nn.Conv2D(
            channels, 2 * channels, (1, 1), weight_attr=init, bias_attr=init)
        self.out_proj = nn.utils.weight_norm(out_proj)

    def forward(self, x, condition):
        """Compute output for a whole folded sequence.

        Args:
            x (Tensor): 
                The input. [shape=(batch_size, channel, height, width)]
            condition (Tensor [shape=(batch_size, condition_channel, height, width)]): 
                The local condition.

        Returns: 
            res (Tensor): 
                The residual output. [shape=(batch_size, channel, height, width)]
            skip (Tensor): 
                The skip output. [shape=(batch_size, channel, height, width)]
        """
        x_in = x
        x = self.conv(x)
        x += self.condition_proj(condition)

        content, gate = paddle.chunk(x, 2, axis=1)
        x = paddle.tanh(content) * F.sigmoid(gate)

        x = self.out_proj(x)
        res, skip = paddle.chunk(x, 2, axis=1)
        res = x_in + res
        return res, skip

    def start_sequence(self):
        """Prepare the layer for incremental computation of causal
        convolution. Reset the buffer for causal convolution.

        Raises:
            ValueError: If not in evaluation mode.
        """
        if self.training:
            raise ValueError("Only use start sequence at evaluation mode.")
        self._conv_buffer = paddle.zeros([1])

        # NOTE: call self.conv's weight norm hook expliccitly since
        # its weight will be visited directly in `add_input` without
        # calling its `__call__` method. If we do not trigger the weight
        # norm hook, the weight may be outdated. e.g. after loading from
        # a saved checkpoint
        # see also: https://github.com/pytorch/pytorch/issues/47588
        for hook in self.conv._forward_pre_hooks.values():
            hook(self.conv, None)

    def add_input(self, x_row, condition_row):
        """Compute the output for a row and update the buffer.

        Args:
            x_row (Tensor): 
                A row of the input. shape=(batch_size, channel, 1, width)
            condition_row (Tensor): 
                A row of the condition. shape=(batch_size, condition_channel, 1, width)

        Returns:
            res (Tensor): 
                A row of the residual output. shape=(batch_size, channel, 1, width)
            skip (Tensor): 
                A row of the skip output. shape=(batch_size, channel, 1, width)

        """
        x_row_in = x_row
        if len(paddle.shape(self._conv_buffer)) == 1:
            self._init_buffer(x_row)
        self._update_buffer(x_row)
        rw = self.rw
        x_row = F.conv2d(
            self._conv_buffer,
            self.conv.weight,
            self.conv.bias,
            padding=[0, 0, rw // 2, (rw - 1) // 2],
            dilation=self.dilations)
        x_row += self.condition_proj(condition_row)
        content, gate = paddle.chunk(x_row, 2, axis=1)
        x_row = paddle.tanh(content) * F.sigmoid(gate)

        x_row = self.out_proj(x_row)
        res, skip = paddle.chunk(x_row, 2, axis=1)
        res = x_row_in + res
        return res, skip

    def _init_buffer(self, input):
        batch_size, channels, _, width = input.shape
        self._conv_buffer = paddle.zeros(
            [batch_size, channels, self.rh, width], dtype=input.dtype)

    def _update_buffer(self, input):
        self._conv_buffer = paddle.concat(
            [self._conv_buffer[:, :, 1:, :], input], axis=2)


class ResidualNet(nn.LayerList):
    """A stack of several ResidualBlocks. It merges condition at each layer.

    Args:
        n_layer (int): 
            Number of ResidualBlocks in the ResidualNet.
        residual_channels (int): 
            Feature size of each ResidualBlocks.
        condition_channels (int): 
            Feature size of the condition.
        kernel_size (Tuple[int]): 
            Kernel size of each ResidualBlock.
        dilations_h (List[int]): 
            Dilation in height dimension of every ResidualBlock.

    Raises:
        ValueError: If the length of dilations_h does not equals n_layers.
    """

    def __init__(self,
                 n_layer: int,
                 residual_channels: int,
                 condition_channels: int,
                 kernel_size: Tuple[int],
                 dilations_h: List[int]):
        if len(dilations_h) != n_layer:
            raise ValueError(
                "number of dilations_h should equals num of layers")
        super().__init__()
        for i in range(n_layer):
            dilation = (dilations_h[i], 2**i)
            layer = ResidualBlock(residual_channels, condition_channels,
                                  kernel_size, dilation)
            self.append(layer)

    def forward(self, x, condition):
        """Comput the output of given the input and the condition.

        Args:
            x (Tensor): 
                The input. shape=(batch_size, channel, height, width)
            condition (Tensor): 
                The local condition. shape=(batch_size, condition_channel, height, width)
            
        Returns: 
            Tensor: The output, which is an aggregation of all the skip outputs. shape=(batch_size, channel, height, width)
            
        """
        skip_connections = []
        for layer in self:
            x, skip = layer(x, condition)
            skip_connections.append(skip)
        out = paddle.sum(paddle.stack(skip_connections, 0), 0)
        return out

    def start_sequence(self):
        """Prepare the layer for incremental computation.
        """
        for layer in self:
            layer.start_sequence()

    def add_input(self, x_row, condition_row):
        """Compute the output for a row and update the buffers.

        Args:
            x_row (Tensor): 
                A row of the input. shape=(batch_size, channel, 1, width)
            condition_row (Tensor):  
                A row of the condition. shape=(batch_size, condition_channel, 1, width)
            
        Returns:
            res (Tensor): 
                A row of the residual output. shape=(batch_size, channel, 1, width) 
            skip (Tensor): 
                A row of the skip output. shape=(batch_size, channel, 1, width)
                
        """
        skip_connections = []
        for layer in self:
            x_row, skip = layer.add_input(x_row, condition_row)
            skip_connections.append(skip)
        out = paddle.sum(paddle.stack(skip_connections, 0), 0)
        return out


class Flow(nn.Layer):
    """A bijection (Reversable layer) that transform a density of latent
    variables p(Z) into a complex data distribution p(X).

    It's an auto regressive flow. The ``forward`` method implements the
    probability density estimation. The ``inverse`` method implements the
    sampling.

    Args:
        n_layers (int): 
            Number of ResidualBlocks in the Flow.
        channels (int): 
            Feature size of the ResidualBlocks.
        mel_bands (int): 
            Feature size of the mel spectrogram (mel bands).
        kernel_size (Tuple[int]): 
            Kernel size of each ResisualBlocks in the Flow.
        n_group (int): 
            Number of timesteps to the folded into a group.
    """
    dilations_dict = {
        8: [1, 1, 1, 1, 1, 1, 1, 1],
        16: [1, 1, 1, 1, 1, 1, 1, 1],
        32: [1, 2, 4, 1, 2, 4, 1, 2],
        64: [1, 2, 4, 8, 16, 1, 2, 4],
        128: [1, 2, 4, 8, 16, 32, 64, 1]
    }

    def __init__(self, n_layers, channels, mel_bands, kernel_size, n_group):
        super().__init__()
        # input projection
        self.input_proj = nn.utils.weight_norm(
            nn.Conv2D(
                1,
                channels, (1, 1),
                weight_attr=I.Uniform(-1., 1.),
                bias_attr=I.Uniform(-1., 1.)))

        # residual net
        self.resnet = ResidualNet(n_layers, channels, mel_bands, kernel_size,
                                  self.dilations_dict[n_group])

        # output projection
        self.output_proj = nn.Conv2D(
            channels,
            2, (1, 1),
            weight_attr=I.Constant(0.),
            bias_attr=I.Constant(0.))

        # specs
        self.n_group = n_group

    def _predict_parameters(self, x, condition):
        x = self.input_proj(x)
        x = self.resnet(x, condition)
        bijection_params = self.output_proj(x)
        logs, b = paddle.chunk(bijection_params, 2, axis=1)
        return logs, b

    def _transform(self, x, logs, b):
        z_0 = x[:, :, :1, :]  # the first row, just copy it
        z_out = x[:, :, 1:, :] * paddle.exp(logs) + b
        z_out = paddle.concat([z_0, z_out], axis=2)
        return z_out

    def forward(self, x, condition):
        """Probability density estimation. It is done by inversely transform
        a sample from p(X) into a sample from p(Z).

        Args:
            x (Tensor): 
                A input sample of the distribution p(X). shape=(batch, 1, height, width)
            condition (Tensor): 
                The local condition. shape=(batch, condition_channel, height, width)
            
        Returns:
            z (Tensor): 
                shape(batch, 1, height, width), the transformed sample.
            Tuple[Tensor, Tensor]:
                The parameter of the transformation.
                logs (Tensor): shape(batch, 1, height - 1, width), the log scale of the transformation from x to z.
                b (Tensor): shape(batch, 1, height - 1, width), the shift of the transformation from x to z.
        """
        # (B, C, H-1, W)
        logs, b = self._predict_parameters(x[:, :, :-1, :],
                                           condition[:, :, 1:, :])
        z = self._transform(x, logs, b)
        return z, (logs, b)

    def _predict_row_parameters(self, x_row, condition_row):
        x_row = self.input_proj(x_row)
        x_row = self.resnet.add_input(x_row, condition_row)
        bijection_params = self.output_proj(x_row)
        logs, b = paddle.chunk(bijection_params, 2, axis=1)
        return logs, b

    def _inverse_transform_row(self, z_row, logs, b):
        x_row = (z_row - b) * paddle.exp(-logs)
        return x_row

    def _inverse_row(self, z_row, x_row, condition_row):
        logs, b = self._predict_row_parameters(x_row, condition_row)
        x_next_row = self._inverse_transform_row(z_row, logs, b)
        return x_next_row, (logs, b)

    def _start_sequence(self):
        self.resnet.start_sequence()

    def inverse(self, z, condition):
        """Sampling from the distrition p(X). It is done by sample form
        p(Z) and transform the sample. It is a auto regressive transformation.

        Args:
            z(Tensor): 
                A sample of the distribution p(Z). shape=(batch, 1, time_steps
            condition(Tensor): 
                The local condition. shape=(batch, condition_channel, time_steps)
        Returns:
            Tensor:
                The transformed sample. shape=(batch, 1, height, width)
        """
        z_0 = z[:, :, :1, :]
        x = paddle.zeros_like(z)
        x[:, :, :1, :] = z_0

        self._start_sequence()

        num_step = paddle.ones([1], dtype='int32') * (self.n_group)
        for i in range(1, num_step):
            x_row = x[:, :, i - 1:i, :]
            z_row = z[:, :, i:i + 1, :]
            condition_row = condition[:, :, i:i + 1, :]
            x_next_row, (logs, b) = self._inverse_row(z_row, x_row,
                                                      condition_row)
            x[:, :, i:i + 1, :] = x_next_row

        return x


class WaveFlow(nn.LayerList):
    """An Deep Reversible layer that is composed of severel auto regressive
    flows.

    Args:
        n_flows (int): 
            Number of flows in the WaveFlow model.
        n_layers (int): 
            Number of ResidualBlocks in each Flow.
        n_group (int): 
            Number of timesteps to fold as a group.
        channels (int): 
            Feature size of each ResidualBlock.
        mel_bands (int): 
            Feature size of mel spectrogram (mel bands).
        kernel_size (Union[int, List[int]]): 
            Kernel size of the convolution layer in each ResidualBlock.
    """

    def __init__(self, n_flows, n_layers, n_group, channels, mel_bands,
                 kernel_size):
        if n_group % 2 or n_flows % 2:
            raise ValueError(
                "number of flows and number of group must be even "
                "since a permutation along group among flows is used.")
        super().__init__()
        for _ in range(n_flows):
            self.append(
                Flow(n_layers, channels, mel_bands, kernel_size, n_group))

        # permutations in h
        self.perms = self._create_perm(n_group, n_flows)

        # specs
        self.n_group = n_group
        self.n_flows = n_flows

    def _create_perm(self, n_group, n_flows):
        indices = list(range(n_group))
        half = n_group // 2
        perms = []
        for i in range(n_flows):
            if i < n_flows // 2:
                perm = indices[::-1]
            else:
                perm = list(reversed(indices[:half])) + list(
                    reversed(indices[half:]))
            perm = paddle.to_tensor(perm)
            self.register_buffer(perm.name, perm)
            perms.append(perm)
        return perms

    def _trim(self, x, condition):
        assert condition.shape[-1] >= x.shape[-1]
        pruned_len = int(paddle.shape(x)[-1] // self.n_group * self.n_group)

        if x.shape[-1] > pruned_len:
            x = x[:, :pruned_len]
        if condition.shape[-1] > pruned_len:
            condition = condition[:, :, :pruned_len]
        return x, condition

    def forward(self, x, condition):
        """Probability density estimation of random variable x given the
        condition.

        Args:
            x (Tensor): 
                The audio. shape=(batch_size, time_steps)
            condition (Tensor): 
                The local condition (mel spectrogram here). shape=(batch_size, condition channel, time_steps)
                
        Returns:
            Tensor: 
                The transformed random variable. shape=(batch_size, time_steps)
            Tensor: 
                The log determinant of the jacobian of the transformation from x to z. shape=(1,)
        """
        # x: (B, T)
        # condition: (B, C, T) upsampled condition
        x, condition = self._trim(x, condition)

        # to (B, C, h, T//h) layout
        x = paddle.unsqueeze(
            paddle.transpose(fold(x, self.n_group), [0, 2, 1]), 1)
        condition = paddle.transpose(
            fold(condition, self.n_group), [0, 1, 3, 2])

        # flows
        logs_list = []
        for i, layer in enumerate(self):
            x, (logs, b) = layer(x, condition)
            logs_list.append(logs)
            # permute paddle has no shuffle dim
            x = geo.shuffle_dim(x, 2, perm=self.perms[i])
            condition = geo.shuffle_dim(condition, 2, perm=self.perms[i])

        z = paddle.squeeze(x, 1)  # (B, H, W)
        batch_size = z.shape[0]
        z = paddle.reshape(paddle.transpose(z, [0, 2, 1]), [batch_size, -1])

        log_det_jacobian = paddle.sum(paddle.stack(logs_list))
        return z, log_det_jacobian

    def inverse(self, z, condition):
        """Sampling from the distrition p(X).

        It is done by sample a ``z`` form p(Z) and transform it into ``x``.
        Each Flow transform .. math:: `z_{i-1}` to .. math:: `z_{i}` in an
        autoregressive manner.

        Args:
            z (Tensor): 
                A sample of the distribution p(Z). shape=(batch, 1, time_steps
            condition (Tensor): 
                The local condition. shape=(batch, condition_channel, time_steps)    

        Returns: 
            Tensor: The transformed sample (audio here). shape=(batch_size, time_steps)
        """

        z, condition = self._trim(z, condition)
        # to (B, C, h, T//h) layout
        z = paddle.unsqueeze(
            paddle.transpose(fold(z, self.n_group), [0, 2, 1]), 1)
        condition = paddle.transpose(
            fold(condition, self.n_group), [0, 1, 3, 2])

        # reverse it flow by flow
        for i in reversed(range(self.n_flows)):
            z = geo.shuffle_dim(z, 2, perm=self.perms[i])
            condition = geo.shuffle_dim(condition, 2, perm=self.perms[i])
            z = self[i].inverse(z, condition)

        x = paddle.squeeze(z, 1)  # (B, H, W)
        batch_size = x.shape[0]
        x = paddle.reshape(paddle.transpose(x, [0, 2, 1]), [batch_size, -1])
        return x


class ConditionalWaveFlow(nn.LayerList):
    """ConditionalWaveFlow, a UpsampleNet with a WaveFlow model.

    Args:
        upsample_factors (List[int]): 
            Upsample factors for the upsample net.
        n_flows (int): 
            Number of flows in the WaveFlow model.
        n_layers (int): 
            Number of ResidualBlocks in each Flow.
        n_group (int): 
            Number of timesteps to fold as a group.
        channels (int): 
            Feature size of each ResidualBlock.
        n_mels (int): 
            Feature size of mel spectrogram (mel bands).
        kernel_size (Union[int, List[int]]): 
            Kernel size of the convolution layer in each ResidualBlock.
        """

    def __init__(self,
                 upsample_factors: List[int],
                 n_flows: int,
                 n_layers: int,
                 n_group: int,
                 channels: int,
                 n_mels: int,
                 kernel_size: Union[int, List[int]]):
        super().__init__()
        self.encoder = UpsampleNet(upsample_factors)
        self.decoder = WaveFlow(
            n_flows=n_flows,
            n_layers=n_layers,
            n_group=n_group,
            channels=channels,
            mel_bands=n_mels,
            kernel_size=kernel_size)

    def forward(self, audio, mel):
        """Compute the transformed random variable z (x to z) and the log of
        the determinant of the jacobian of the transformation from x to z.

        Args:
            audio(Tensor): 
                The audio. shape=(B, T)
            mel(Tensor): 
                The mel spectrogram. shape=(B, C_mel, T_mel)

        Returns:
            Tensor: 
                The inversely transformed random variable z (x to z). shape=(B, T)
            Tensor: 
                the log of the determinant of the jacobian of the transformation from x to z. shape=(1,)
        """
        condition = self.encoder(mel)
        z, log_det_jacobian = self.decoder(audio, condition)
        return z, log_det_jacobian

    @paddle.no_grad()
    def infer(self, mel):
        """Generate raw audio given mel spectrogram.

        Args:
            mel(np.ndarray): 
                Mel spectrogram of an utterance(in log-magnitude). shape=(C_mel, T_mel)

        Returns:
            Tensor: 
                The synthesized audio, where``T <= T_mel * upsample_factors``. shape=(B, T)
        """
        start = time.time()
        condition = self.encoder(mel, trim_conv_artifact=True)  # (B, C, T)
        batch_size, _, time_steps = condition.shape
        z = paddle.randn([batch_size, time_steps], dtype=mel.dtype)
        x = self.decoder.inverse(z, condition)
        end = time.time()
        print("time: {}s".format(end - start))
        return x

    @paddle.no_grad()
    def predict(self, mel):
        """Generate raw audio given mel spectrogram.

        Args:
            mel(np.ndarray): 
                Mel spectrogram of an utterance(in log-magnitude). shape=(C_mel, T_mel)

        Returns:
            np.ndarray: The synthesized audio. shape=(T,)
        """
        mel = paddle.to_tensor(mel)
        mel = paddle.unsqueeze(mel, 0)
        audio = self.infer(mel)
        audio = audio[0].numpy()
        return audio

    @classmethod
    def from_pretrained(cls, config, checkpoint_path):
        """Build a ConditionalWaveFlow model from a pretrained model.

        Args:
            config(yacs.config.CfgNode): 
                model configs
            checkpoint_path(Path or str): 
                the path of pretrained model checkpoint, without extension name

        Returns:
            ConditionalWaveFlow The model built from pretrained result.
        """
        model = cls(upsample_factors=config.model.upsample_factors,
                    n_flows=config.model.n_flows,
                    n_layers=config.model.n_layers,
                    n_group=config.model.n_group,
                    channels=config.model.channels,
                    n_mels=config.data.n_mels,
                    kernel_size=config.model.kernel_size)
        checkpoint.load_parameters(model, checkpoint_path=checkpoint_path)
        return model


class WaveFlowLoss(nn.Layer):
    """Criterion of a WaveFlow model.

    Args:
        sigma (float): 
            The standard deviation of the gaussian noise used in WaveFlow, by default 1.0.
    """

    def __init__(self, sigma=1.0):
        super().__init__()
        self.sigma = sigma
        self.const = 0.5 * np.log(2 * np.pi) + np.log(self.sigma)

    def forward(self, z, log_det_jacobian):
        """Compute the loss given the transformed random variable z and the
        log_det_jacobian of transformation from x to z.

        Args:
            z(Tensor): 
                The transformed random variable (x to z). shape=(B, T)
            log_det_jacobian(Tensor): 
                The log of the determinant of the jacobian matrix of the
                transformation from x to z.  shape=(1,)

        Returns:
            Tensor: The loss. shape=(1,)
        """
        loss = paddle.sum(z * z) / (2 * self.sigma * self.sigma
                                    ) - log_det_jacobian
        loss = loss / np.prod(z.shape)
        return loss + self.const


class ConditionalWaveFlow2Infer(ConditionalWaveFlow):
    def forward(self, mel):
        """Generate raw audio given mel spectrogram.

        Args:
            mel (np.ndarray): 
                Mel spectrogram of an utterance(in log-magnitude). shape=(C_mel, T_mel)
            
        Returns:
            np.ndarray: The synthesized audio. shape=(T,)
            
        """
        audio = self.predict(mel)
        return audio


================================================
FILE: paddlespeech/t2s/models/wavernn/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .wavernn import *
from .wavernn_updater import *


================================================
FILE: paddlespeech/t2s/models/wavernn/wavernn.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from https://github.com/fatchord/WaveRNN
import sys
import time
from typing import List

import numpy as np
import paddle
from paddle import nn
from paddle.nn import functional as F

from paddlespeech.t2s.audio.codec import decode_mu_law
from paddlespeech.t2s.modules.losses import sample_from_discretized_mix_logistic
from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.upsample import Stretch2D


class ResBlock(nn.Layer):
    def __init__(self, dims):
        super().__init__()
        self.conv1 = nn.Conv1D(dims, dims, kernel_size=1, bias_attr=False)
        self.conv2 = nn.Conv1D(dims, dims, kernel_size=1, bias_attr=False)
        self.batch_norm1 = nn.BatchNorm1D(dims)
        self.batch_norm2 = nn.BatchNorm1D(dims)

    def forward(self, x):
        '''
        conv -> bn -> relu -> conv -> bn + residual connection
        '''
        residual = x
        x = self.conv1(x)
        x = self.batch_norm1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = self.batch_norm2(x)
        return x + residual


class MelResNet(nn.Layer):
    def __init__(self,
                 res_blocks: int=10,
                 compute_dims: int=128,
                 res_out_dims: int=128,
                 aux_channels: int=80,
                 aux_context_window: int=0):
        super().__init__()
        k_size = aux_context_window * 2 + 1
        # pay attention here, the dim reduces aux_context_window * 2
        self.conv_in = nn.Conv1D(
            aux_channels, compute_dims, kernel_size=k_size, bias_attr=False)
        self.batch_norm = nn.BatchNorm1D(compute_dims)
        self.layers = nn.LayerList()
        for _ in range(res_blocks):
            self.layers.append(ResBlock(compute_dims))
        self.conv_out = nn.Conv1D(compute_dims, res_out_dims, kernel_size=1)

    def forward(self, x):
        '''
        Args:
            x (Tensor):
                Input tensor (B, in_dims, T).
        Returns:
            Tensor:
                Output tensor (B, res_out_dims, T).
        '''

        x = self.conv_in(x)
        x = self.batch_norm(x)
        x = F.relu(x)
        for f in self.layers:
            x = f(x)
        x = self.conv_out(x)
        return x


class UpsampleNetwork(nn.Layer):
    def __init__(self,
                 aux_channels: int=80,
                 upsample_scales: List[int]=[4, 5, 3, 5],
                 compute_dims: int=128,
                 res_blocks: int=10,
                 res_out_dims: int=128,
                 aux_context_window: int=2):
        super().__init__()
        # total_scale is the total Up sampling multiple
        total_scale = np.prod(upsample_scales)
        # TODO pad*total_scale is numpy.int64
        self.indent = int(aux_context_window * total_scale)
        self.resnet = MelResNet(
            res_blocks=res_blocks,
            aux_channels=aux_channels,
            compute_dims=compute_dims,
            res_out_dims=res_out_dims,
            aux_context_window=aux_context_window)
        self.resnet_stretch = Stretch2D(total_scale, 1)
        self.up_layers = nn.LayerList()
        for scale in upsample_scales:
            k_size = (1, scale * 2 + 1)
            padding = (0, scale)
            stretch = Stretch2D(scale, 1)

            conv = nn.Conv2D(
                1, 1, kernel_size=k_size, padding=padding, bias_attr=False)
            weight_ = paddle.full_like(conv.weight, 1. / k_size[1])
            conv.weight.set_value(weight_)
            self.up_layers.append(stretch)
            self.up_layers.append(conv)

    def forward(self, m):
        '''
        Args:
            c (Tensor):
                Input tensor (B, C_aux, T).
        Returns:
            Tensor:
                Output tensor (B, (T - 2 * pad) *  prob(upsample_scales), C_aux).
            Tensor:
                Output tensor (B, (T - 2 * pad) *  prob(upsample_scales), res_out_dims).
        '''
        # aux: [B, C_aux, T] 
        # -> [B, res_out_dims, T - 2 * aux_context_window]
        # -> [B, 1, res_out_dims, T - 2 * aux_context_window]
        aux = self.resnet(m).unsqueeze(1)
        # aux: [B, 1, res_out_dims, T - 2 * aux_context_window]
        # -> [B, 1, res_out_dims, (T - 2 * pad) *  prob(upsample_scales)]
        aux = self.resnet_stretch(aux)
        # aux: [B, 1, res_out_dims, T * prob(upsample_scales)] 
        # -> [B, res_out_dims, T * prob(upsample_scales)]
        aux = aux.squeeze(1)
        # m: [B, C_aux, T] -> [B, 1, C_aux, T]
        m = m.unsqueeze(1)
        for f in self.up_layers:
            m = f(m)
        # m: [B, 1, C_aux, T*prob(upsample_scales)]
        # -> [B, C_aux, T * prob(upsample_scales)]
        # -> [B, C_aux, (T - 2 * pad) * prob(upsample_scales)]
        m = m.squeeze(1)[:, :, self.indent:-self.indent]
        # m: [B, (T - 2 * pad) * prob(upsample_scales), C_aux]
        # aux: [B, (T - 2 * pad) * prob(upsample_scales), res_out_dims]
        return m.transpose([0, 2, 1]), aux.transpose([0, 2, 1])


class WaveRNN(nn.Layer):
    def __init__(
            self,
            rnn_dims: int=512,
            fc_dims: int=512,
            bits: int=9,
            aux_context_window: int=2,
            upsample_scales: List[int]=[4, 5, 3, 5],
            aux_channels: int=80,
            compute_dims: int=128,
            res_out_dims: int=128,
            res_blocks: int=10,
            hop_length: int=300,
            sample_rate: int=24000,
            mode='RAW',
            init_type: str="xavier_uniform", ):
        '''
        Args:
            rnn_dims (int, optional): 
                Hidden dims of RNN Layers.
            fc_dims (int, optional): 
                Dims of FC Layers.
            bits (int, optional): 
                bit depth of signal.
            aux_context_window (int, optional): 
                The context window size of the first convolution applied to the auxiliary input, by default 2
            upsample_scales (List[int], optional): 
                Upsample scales of the upsample network.
            aux_channels (int, optional): 
                Auxiliary channel of the residual blocks.
            compute_dims (int, optional): 
                Dims of Conv1D in MelResNet.
            res_out_dims (int, optional): 
                Dims of output in MelResNet.
            res_blocks (int, optional): 
                Number of residual blocks.
            mode (str, optional): 
                Output mode of the WaveRNN vocoder. 
                `MOL` for Mixture of Logistic Distribution, and `RAW` for quantized bits as the model's output.
            init_type (str): 
                How to initialize parameters.
        '''
        super().__init__()
        self.mode = mode
        self.aux_context_window = aux_context_window
        if self.mode == 'RAW':
            self.n_classes = 2**bits
        elif self.mode == 'MOL':
            self.n_classes = 10 * 3
        else:
            RuntimeError('Unknown model mode value - ', self.mode)

        # List of rnns to call 'flatten_parameters()' on
        self._to_flatten = []

        self.rnn_dims = rnn_dims
        self.aux_dims = res_out_dims // 4
        self.hop_length = hop_length
        self.sample_rate = sample_rate

        # initialize parameters
        initialize(self, init_type)

        self.upsample = UpsampleNetwork(
            aux_channels=aux_channels,
            upsample_scales=upsample_scales,
            compute_dims=compute_dims,
            res_blocks=res_blocks,
            res_out_dims=res_out_dims,
            aux_context_window=aux_context_window)
        self.I = nn.Linear(aux_channels + self.aux_dims + 1, rnn_dims)

        self.rnn1 = nn.GRU(rnn_dims, rnn_dims)
        self.rnn2 = nn.GRU(rnn_dims + self.aux_dims, rnn_dims)

        self._to_flatten += [self.rnn1, self.rnn2]

        self.fc1 = nn.Linear(rnn_dims + self.aux_dims, fc_dims)
        self.fc2 = nn.Linear(fc_dims + self.aux_dims, fc_dims)
        self.fc3 = nn.Linear(fc_dims, self.n_classes)

        # Avoid fragmentation of RNN parameters and associated warning
        self._flatten_parameters()

        nn.initializer.set_global_initializer(None)

    def forward(self, x, c):
        '''
        Args:
            x (Tensor): 
                wav sequence, [B, T]
            c (Tensor): 
                mel spectrogram [B, C_aux, T']

            T = (T' - 2 * aux_context_window ) * hop_length
        Returns:
            Tensor: [B, T, n_classes]
        '''
        # Although we `_flatten_parameters()` on init, when using DataParallel
        # the model gets replicated, making it no longer guaranteed that the
        # weights are contiguous in GPU memory. Hence, we must call it again
        self._flatten_parameters()

        bsize = paddle.shape(x)[0]
        h1 = paddle.zeros([1, bsize, self.rnn_dims])
        h2 = paddle.zeros([1, bsize, self.rnn_dims])
        # c: [B, T, C_aux]
        # aux: [B, T, res_out_dims]
        c, aux = self.upsample(c)

        aux_idx = [self.aux_dims * i for i in range(5)]
        a1 = aux[:, :, aux_idx[0]:aux_idx[1]]
        a2 = aux[:, :, aux_idx[1]:aux_idx[2]]
        a3 = aux[:, :, aux_idx[2]:aux_idx[3]]
        a4 = aux[:, :, aux_idx[3]:aux_idx[4]]

        x = paddle.concat([x.unsqueeze(-1), c, a1], axis=2)
        x = self.I(x)
        res = x
        x, _ = self.rnn1(x, h1)

        x = x + res
        res = x
        x = paddle.concat([x, a2], axis=2)
        x, _ = self.rnn2(x, h2)

        x = x + res
        x = paddle.concat([x, a3], axis=2)
        x = F.relu(self.fc1(x))

        x = paddle.concat([x, a4], axis=2)
        x = F.relu(self.fc2(x))

        return self.fc3(x)

    @paddle.no_grad()
    def generate(self,
                 c,
                 batched: bool=True,
                 target: int=12000,
                 overlap: int=600,
                 mu_law: bool=True,
                 gen_display: bool=False):
        """
        Args:
            c(Tensor): 
                input mels, (T', C_aux)
            batched(bool): 
                generate in batch or not
            target(int): 
                target number of samples to be generated in each batch entry
            overlap(int): 
                number of samples for crossfading between batches
            mu_law(bool)
        Returns: 
            wav sequence:
                Output (T' * prod(upsample_scales), out_channels, C_out).
        """

        self.eval()

        mu_law = mu_law if self.mode == 'RAW' else False

        output = []
        start = time.time()

        # pseudo batch
        # (T, C_aux) -> (1, C_aux, T)
        c = paddle.transpose(c, [1, 0]).unsqueeze(0)
        T = paddle.shape(c)[-1]
        wave_len = T * self.hop_length
        # TODO remove two transpose op by modifying function pad_tensor
        c = self.pad_tensor(
            c.transpose([0, 2, 1]), pad=self.aux_context_window,
            side='both').transpose([0, 2, 1])

        c, aux = self.upsample(c)

        if batched:
            # (num_folds, target + 2 * overlap, features)
            c = self.fold_with_overlap(c, target, overlap)
            aux = self.fold_with_overlap(aux, target, overlap)

        # for dygraph to static graph, if use seq_len of `b_size, seq_len, _ = paddle.shape(c)` in for
        # will not get TensorArray
        # see https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/04_dygraph_to_static/case_analysis_cn.html#list-lodtensorarray
        # b_size, seq_len, _ = paddle.shape(c)
        b_size = paddle.shape(c)[0]
        seq_len = paddle.shape(c)[1]

        h1 = paddle.zeros([b_size, self.rnn_dims])
        h2 = paddle.zeros([b_size, self.rnn_dims])
        x = paddle.zeros([b_size, 1])

        d = self.aux_dims
        aux_split = [aux[:, :, d * i:d * (i + 1)] for i in range(4)]

        for i in range(seq_len):
            m_t = c[:, i, :]
            # for dygraph to static graph
            # a1_t, a2_t, a3_t, a4_t = (a[:, i, :] for a in aux_split)
            a1_t = aux_split[0][:, i, :]
            a2_t = aux_split[1][:, i, :]
            a3_t = aux_split[2][:, i, :]
            a4_t = aux_split[3][:, i, :]
            x = paddle.concat([x, m_t, a1_t], axis=1)
            x = self.I(x)
            # use GRUCell here
            h1, _ = self.rnn1[0].cell(x, h1)
            x = x + h1
            inp = paddle.concat([x, a2_t], axis=1)
            # use GRUCell here
            h2, _ = self.rnn2[0].cell(inp, h2)

            x = x + h2
            x = paddle.concat([x, a3_t], axis=1)
            x = F.relu(self.fc1(x))

            x = paddle.concat([x, a4_t], axis=1)
            x = F.relu(self.fc2(x))

            logits = self.fc3(x)

            if self.mode == 'MOL':
                sample = sample_from_discretized_mix_logistic(
                    logits.unsqueeze(0).transpose([0, 2, 1]))
                output.append(sample.reshape([-1]))
                x = sample.transpose([1, 0, 2])

            elif self.mode == 'RAW':
                # fix bug for paddle 2.3, see https://github.com/PaddlePaddle/Paddle/commit/01f606b4f1ca3e184a59111084ed460ee0798a5a
                # posterior = F.softmax(logits, axis=1)
                posterior = logits
                distrib = paddle.distribution.Categorical(posterior)
                # corresponding operate [np.floor((fx + 1) / 2 * mu + 0.5)] in enocde_mu_law
                # distrib.sample([1])[0].cast('float32'): [0, 2**bits-1]
                # sample: [-1, 1]
                sample = 2 * distrib.sample([1])[0].cast('float32') / (
                    self.n_classes - 1.) - 1.
                output.append(sample)
                x = sample.unsqueeze(-1)
            else:
                raise RuntimeError('Unknown model mode value - ', self.mode)

            if gen_display:
                if i % 1000 == 0:
                    self.gen_display(i, int(seq_len), int(b_size), start)

        output = paddle.stack(output).transpose([1, 0])

        if mu_law:
            output = decode_mu_law(output, self.n_classes, False)

        if batched:
            output = self.xfade_and_unfold(output, target, overlap)
        else:
            output = output[0]

        # Fade-out at the end to avoid signal cutting out suddenly
        fade_out = paddle.linspace(1, 0, 10 * self.hop_length)
        output = output[:wave_len]
        output[-10 * self.hop_length:] *= fade_out

        self.train()

        # 增加 C_out 维度
        return output.unsqueeze(-1)

    def _flatten_parameters(self):
        [m.flatten_parameters() for m in self._to_flatten]

    def pad_tensor(self, x, pad, side='both'):
        '''
        Args:
            x(Tensor): 
                mel, [1, n_frames, 80]
            pad(int): 
                side(str, optional):  (Default value = 'both')

        Returns:
            Tensor
        '''
        b, t, _ = paddle.shape(x)
        # for dygraph to static graph
        c = x.shape[-1]
        total = t + 2 * pad if side == 'both' else t + pad
        padded = paddle.zeros([b, total, c])
        if side == 'before' or side == 'both':
            padded[:, pad:pad + t, :] = x
        elif side == 'after':
            padded[:, :t, :] = x
        return padded

    def fold_with_overlap(self, x, target, overlap):
        '''
        Fold the tensor with overlap for quick batched inference.
        Overlap will be used for crossfading in xfade_and_unfold()

        Args:
            x(Tensor): 
                Upsampled conditioning features. mels or aux
                shape=(1, T, features)
                mels: [1, T, 80]
                aux: [1, T, 128]
            target(int): 
                Target timesteps for each index of batch
            overlap(int): 
                Timesteps for both xfade and rnn warmup

        Returns:
            Tensor: 
                shape=(num_folds, target + 2 * overlap, features)
                num_flods = (time_seq - overlap) // (target + overlap)
                mel: [num_folds, target + 2 * overlap, 80]
                aux: [num_folds, target + 2 * overlap, 128]

        Details:
            x = [[h1, h2, ... hn]]
            Where each h is a vector of conditioning features
            Eg: target=2, overlap=1 with x.size(1)=10

            folded = [[h1, h2, h3, h4],
                    [h4, h5, h6, h7],
                    [h7, h8, h9, h10]]
        '''

        _, total_len, features = paddle.shape(x)

        # Calculate variables needed
        num_folds = (total_len - overlap) // (target + overlap)
        extended_len = num_folds * (overlap + target) + overlap
        remaining = total_len - extended_len

        # Pad if some time steps poking out
        if remaining != 0:
            num_folds += 1
            padding = target + 2 * overlap - remaining
            x = self.pad_tensor(x, padding, side='after')

        folded = paddle.zeros([num_folds, target + 2 * overlap, features])

        # Get the values for the folded tensor
        for i in range(num_folds):
            start = i * (target + overlap)
            end = start + target + 2 * overlap
            folded[i] = x[0][start:end, :]
        return folded

    def xfade_and_unfold(self, y, target: int=12000, overlap: int=600):
        ''' Applies a crossfade and unfolds into a 1d array.

        Args:
            y (Tensor): 
                Batched sequences of audio samples
                shape=(num_folds, target + 2 * overlap)
                dtype=paddle.float32
            overlap (int): Timesteps for both xfade and rnn warmup

        Returns:
            Tensor
                audio samples in a 1d array
                shape=(total_len)
                dtype=paddle.float32

        Details:
            y = [[seq1],
                [seq2],
                [seq3]]

            Apply a gain envelope at both ends of the sequences

            y = [[seq1_in, seq1_target, seq1_out],
                [seq2_in, seq2_target, seq2_out],
                [seq3_in, seq3_target, seq3_out]]

            Stagger and add up the groups of samples:

            [seq1_in, seq1_target, (seq1_out + seq2_in), seq2_target, ...]

        '''
        # num_folds = (total_len - overlap) // (target + overlap)
        num_folds, length = paddle.shape(y)
        target = length - 2 * overlap
        total_len = num_folds * (target + overlap) + overlap

        # Need some silence for the run warmup
        slience_len = 0
        linear_len = slience_len
        fade_len = overlap - slience_len
        slience = paddle.zeros([slience_len], dtype=paddle.float32)
        linear = paddle.ones([linear_len], dtype=paddle.float32)

        # Equal power crossfade
        # fade_in increase from 0 to 1, fade_out reduces from 1 to 0
        sigmoid_scale = 2.3
        t = paddle.linspace(
            -sigmoid_scale, sigmoid_scale, fade_len, dtype=paddle.float32)
        # sigmoid 曲线应该更好
        fade_in = paddle.nn.functional.sigmoid(t)
        fade_out = 1 - paddle.nn.functional.sigmoid(t)
        # Concat the silence to the fades
        fade_out = paddle.concat([linear, fade_out])
        fade_in = paddle.concat([slience, fade_in])

        # Apply the gain to the overlap samples
        y[:, :overlap] *= fade_in
        y[:, -overlap:] *= fade_out

        unfolded = paddle.zeros([total_len], dtype=paddle.float32)

        # Loop to add up all the samples
        for i in range(num_folds):
            start = i * (target + overlap)
            end = start + target + 2 * overlap
            unfolded[start:end] += y[i]

        return unfolded

    def gen_display(self, i, seq_len, b_size, start):
        gen_rate = (i + 1) / (time.time() - start) * b_size / 1000
        pbar = self.progbar(i, seq_len)
        msg = f'| {pbar} {i*b_size}/{seq_len*b_size} | Batch Size: {b_size} | Gen Rate: {gen_rate:.1f}kHz | '
        sys.stdout.write(f"\r{msg}")

    def progbar(self, i, n, size=16):
        done = int(i * size) // n
        bar = ''
        for i in range(size):
            bar += '█' if i <= done else '░'
        return bar


class WaveRNNInference(nn.Layer):
    def __init__(self, normalizer, wavernn):
        super().__init__()
        self.normalizer = normalizer
        self.wavernn = wavernn

    def forward(self,
                logmel,
                batched: bool=True,
                target: int=12000,
                overlap: int=600,
                mu_law: bool=True,
                gen_display: bool=False):
        normalized_mel = self.normalizer(logmel)

        wav = self.wavernn.generate(
            normalized_mel, )
        # batched=batched,
        # target=target,
        # overlap=overlap,
        # mu_law=mu_law,
        # gen_display=gen_display)

        return wav


================================================
FILE: paddlespeech/t2s/models/wavernn/wavernn_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
from pathlib import Path

import paddle
import soundfile as sf
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer

from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


def calculate_grad_norm(parameters, norm_type: str=2):
    '''
    calculate grad norm of mdoel's parameters
    parameters:
        model's parameters
    norm_type: str
    Returns
    ------------
    Tensor
        grad_norm
    '''

    grad_list = [
        paddle.to_tensor(p.grad) for p in parameters if p.grad is not None
    ]
    norm_list = paddle.stack(
        [paddle.norm(grad, norm_type) for grad in grad_list])
    total_norm = paddle.norm(norm_list)
    return total_norm


# for save name in gen_valid_samples()
ITERATION = 0


class WaveRNNUpdater(StandardUpdater):
    def __init__(self,
                 model: Layer,
                 optimizer: Optimizer,
                 criterion: Layer,
                 dataloader: DataLoader,
                 init_state=None,
                 output_dir: Path=None,
                 mode='RAW'):
        super().__init__(model, optimizer, dataloader, init_state=None)

        self.criterion = criterion
        # self.scheduler = scheduler

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""
        self.mode = mode

    def update_core(self, batch):

        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}
        # parse batch
        self.model.train()
        self.optimizer.clear_grad()

        wav, y, mel = batch

        y_hat = self.model(wav, mel)
        if self.mode == 'RAW':
            y_hat = y_hat.transpose([0, 2, 1]).unsqueeze(-1)
        elif self.mode == 'MOL':
            y_hat = paddle.cast(y, dtype='float32')

        y = y.unsqueeze(-1)
        loss = self.criterion(y_hat, y)
        loss.backward()
        grad_norm = float(
            calculate_grad_norm(self.model.parameters(), norm_type=2))

        self.optimizer.step()

        report("train/loss", float(loss))
        report("train/grad_norm", float(grad_norm))

        losses_dict["loss"] = float(loss)
        losses_dict["grad_norm"] = float(grad_norm)
        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        global ITERATION
        ITERATION = self.state.iteration + 1


class WaveRNNEvaluator(StandardEvaluator):
    def __init__(self,
                 model: Layer,
                 criterion: Layer,
                 dataloader: Optimizer,
                 output_dir: Path=None,
                 valid_generate_loader=None,
                 config=None):
        super().__init__(model, dataloader)

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

        self.criterion = criterion
        self.valid_generate_loader = valid_generate_loader
        self.config = config
        self.mode = config.model.mode

        self.valid_samples_dir = output_dir / "valid_samples"
        self.valid_samples_dir.mkdir(parents=True, exist_ok=True)

    def evaluate_core(self, batch):
        self.msg = "Evaluate: "
        losses_dict = {}
        # parse batch
        wav, y, mel = batch
        y_hat = self.model(wav, mel)

        if self.mode == 'RAW':
            y_hat = y_hat.transpose([0, 2, 1]).unsqueeze(-1)
        elif self.mode == 'MOL':
            y_hat = paddle.cast(y, dtype='float32')

        y = y.unsqueeze(-1)
        loss = self.criterion(y_hat, y)
        report("eval/loss", float(loss))

        losses_dict["loss"] = float(loss)

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)

    def gen_valid_samples(self):

        for i, item in enumerate(self.valid_generate_loader):
            if i >= self.config.generate_num:
                break
            print(
                '\n| Generating: {}/{}'.format(i + 1, self.config.generate_num))

            mel = item['feats']
            wav = item['wave']
            wav = wav.squeeze(0)

            origin_save_path = self.valid_samples_dir / '{}_steps_{}_target.wav'.format(
                self.iteration, i)
            sf.write(origin_save_path, wav.numpy(), samplerate=self.config.fs)

            if self.config.inference.gen_batched:
                batch_str = 'gen_batched_target{}_overlap{}'.format(
                    self.config.inference.target, self.config.inference.overlap)
            else:
                batch_str = 'gen_not_batched'
            gen_save_path = str(self.valid_samples_dir /
                                '{}_steps_{}_{}.wav'.format(self.iteration, i,
                                                            batch_str))
            # (1, T, C_aux) -> (T, C_aux)
            mel = mel.squeeze(0)
            gen_sample = self.model.generate(
                mel, self.config.inference.gen_batched,
                self.config.inference.target, self.config.inference.overlap,
                self.config.mu_law)
            sf.write(
                gen_save_path, gen_sample.numpy(), samplerate=self.config.fs)

    def __call__(self, trainer=None):
        summary = self.evaluate()
        for k, v in summary.items():
            report(k, v)
        # gen samples at then end of evaluate
        self.iteration = ITERATION
        if self.iteration % self.config.gen_eval_samples_interval_steps == 0:
            self.gen_valid_samples()


================================================
FILE: paddlespeech/t2s/modules/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .conv import *
from .fftconv1d import *
from .geometry import *
from .losses import *
from .positional_encoding import *


================================================
FILE: paddlespeech/t2s/modules/activation.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn.functional as F
from paddle import nn


class GLU(nn.Layer):
    """Gated Linear Units (GLU) Layer"""

    def __init__(self, dim: int=-1):
        super().__init__()
        self.dim = dim

    def forward(self, xs):
        return F.glu(xs, axis=self.dim)


def get_activation(act, **kwargs):
    """Return activation function."""

    activation_funcs = {
        "hardtanh": paddle.nn.Hardtanh,
        "tanh": paddle.nn.Tanh,
        "relu": paddle.nn.ReLU,
        "selu": paddle.nn.SELU,
        "leakyrelu": paddle.nn.LeakyReLU,
        "swish": paddle.nn.Swish,
        "glu": GLU,
        "gelu": paddle.nn.GELU,
    }

    return activation_funcs[act](**kwargs)


================================================
FILE: paddlespeech/t2s/modules/adversarial_loss/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/modules/adversarial_loss/gradient_reversal.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
from paddle.autograd import PyLayer


class GradientReversalFunction(PyLayer):
    """Gradient Reversal Layer from:
    Unsupervised Domain Adaptation by Backpropagation (Ganin & Lempitsky, 2015)

    Forward pass is the identity function. In the backward pass,
    the upstream gradients are multiplied by -lambda (i.e. gradient is reversed)
    """

    @staticmethod
    def forward(ctx, x, lambda_=1):
        """Forward in networks
        """
        ctx.save_for_backward(lambda_)
        return x.clone()

    @staticmethod
    def backward(ctx, grads):
        """Backward in networks
        """
        lambda_, = ctx.saved_tensor()
        dx = -lambda_ * grads
        return paddle.clip(dx, min=-0.5, max=0.5)


class GradientReversalLayer(nn.Layer):
    """Gradient Reversal Layer from:
    Unsupervised Domain Adaptation by Backpropagation (Ganin & Lempitsky, 2015)

    Forward pass is the identity function. In the backward pass,
    the upstream gradients are multiplied by -lambda (i.e. gradient is reversed)
    """

    def __init__(self, lambda_=1):
        super(GradientReversalLayer, self).__init__()
        self.lambda_ = lambda_

    def forward(self, x):
        """Forward in networks
        """
        return GradientReversalFunction.apply(x, self.lambda_)


================================================
FILE: paddlespeech/t2s/modules/adversarial_loss/speaker_classifier.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from Cross-Lingual-Voice-Cloning(https://github.com/deterministic-algorithms-lab/Cross-Lingual-Voice-Cloning)
import paddle
from paddle import nn
from typeguard import typechecked


class SpeakerClassifier(nn.Layer):
    @typechecked
    def __init__(
            self,
            idim: int,
            hidden_sc_dim: int,
            spk_num: int, ):
        super().__init__()
        # store hyperparameters
        self.idim = idim
        self.hidden_sc_dim = hidden_sc_dim
        self.spk_num = spk_num

        self.model = nn.Sequential(
            nn.Linear(self.idim, self.hidden_sc_dim),
            nn.Linear(self.hidden_sc_dim, self.spk_num))

    def parse_outputs(self, out, text_lengths):
        mask = paddle.arange(out.shape[1]).expand(
            [out.shape[0], out.shape[1]]) < text_lengths.unsqueeze(1)
        out = paddle.transpose(out, perm=[2, 0, 1])
        out = out * mask
        out = paddle.transpose(out, perm=[1, 2, 0])
        return out

    def forward(self, encoder_outputs, text_lengths):
        """
        encoder_outputs = [batch_size, seq_len, encoder_embedding_size]
        text_lengths = [batch_size]
        
        log probabilities of speaker classification = [batch_size, seq_len, spk_num]
        """

        out = self.model(encoder_outputs)
        out = self.parse_outputs(out, text_lengths)
        return out


================================================
FILE: paddlespeech/t2s/modules/causal_conv.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Causal convolusion layer modules."""
import paddle
from paddle import nn


class CausalConv1D(nn.Layer):
    """CausalConv1D module with customized initialization."""

    def __init__(
            self,
            in_channels,
            out_channels,
            kernel_size,
            dilation=1,
            bias=True,
            pad="Pad1D",
            pad_params={"value": 0.0}, ):
        """Initialize CausalConv1d module."""
        super().__init__()
        self.pad = getattr(paddle.nn, pad)((kernel_size - 1) * dilation,
                                           **pad_params)
        self.conv = nn.Conv1D(
            in_channels,
            out_channels,
            kernel_size,
            dilation=dilation,
            bias_attr=bias)

    def forward(self, x):
        """Calculate forward propagation.
        Args:
            x (Tensor): 
                Input tensor (B, in_channels, T).
        Returns: 
            Tensor: Output tensor (B, out_channels, T).
        """
        return self.conv(self.pad(x))[:, :, :x.shape[2]]


class CausalConv1DTranspose(nn.Layer):
    """CausalConv1DTranspose module with customized initialization."""

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride,
                 bias=True):
        """Initialize CausalConvTranspose1d module."""
        super().__init__()
        self.deconv = nn.Conv1DTranspose(
            in_channels, out_channels, kernel_size, stride, bias_attr=bias)
        self.stride = stride

    def forward(self, x):
        """Calculate forward propagation.
        Args:
            x (Tensor): 
                Input tensor (B, in_channels, T_in).
        Returns:
            Tensor: Output tensor (B, out_channels, T_out).
        """
        return self.deconv(x)[:, :, :-self.stride]


================================================
FILE: paddlespeech/t2s/modules/conformer/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/modules/conformer/convolution.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""ConvolutionModule definition."""
from paddle import nn


class ConvolutionModule(nn.Layer):
    """ConvolutionModule in Conformer model.

    Args:
        channels (int): 
            The number of channels of conv layers.
        kernel_size (int): 
            Kernerl size of conv layers.
    """

    def __init__(self, channels, kernel_size, activation=nn.ReLU(), bias=True):
        """Construct an ConvolutionModule object."""
        super().__init__()
        # kernerl_size should be a odd number for 'SAME' padding
        assert (kernel_size - 1) % 2 == 0

        self.pointwise_conv1 = nn.Conv1D(
            channels,
            2 * channels,
            kernel_size=1,
            stride=1,
            padding=0,
            bias_attr=bias, )
        self.depthwise_conv = nn.Conv1D(
            channels,
            channels,
            kernel_size,
            stride=1,
            padding=(kernel_size - 1) // 2,
            groups=channels,
            bias_attr=bias, )
        self.norm = nn.BatchNorm1D(channels)
        self.pointwise_conv2 = nn.Conv1D(
            channels,
            channels,
            kernel_size=1,
            stride=1,
            padding=0,
            bias_attr=bias, )
        self.activation = activation

    def forward(self, x):
        """Compute convolution module.

        Args:
            x (Tensor): 
                Input tensor (#batch, time, channels).
        Returns:
            Tensor: Output tensor (#batch, time, channels).
        """
        # exchange the temporal dimension and the feature dimension
        x = x.transpose([0, 2, 1])

        # GLU mechanism
        # (batch, 2*channel, time)
        x = self.pointwise_conv1(x)
        # (batch, channel, time)
        x = nn.functional.glu(x, axis=1)

        # 1D Depthwise Conv
        x = self.depthwise_conv(x)
        x = self.activation(self.norm(x))

        x = self.pointwise_conv2(x)

        return x.transpose([0, 2, 1])


================================================
FILE: paddlespeech/t2s/modules/conformer/encoder_layer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Encoder self-attention layer definition."""
import paddle
from paddle import nn

from paddlespeech.t2s.modules.layer_norm import LayerNorm


class EncoderLayer(nn.Layer):
    """Encoder layer module.
    
    Args:
        size (int): 
            Input dimension.
        self_attn (nn.Layer): 
            Self-attention module instance.
            `MultiHeadedAttention` or `RelPositionMultiHeadedAttention` instance
            can be used as the argument.
        feed_forward (nn.Layer): 
            Feed-forward module instance.
            `PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance
            can be used as the argument.
        feed_forward_macaron (nn.Layer): 
            Additional feed-forward module instance.
            `PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance
            can be used as the argument.
        conv_module (nn.Layer): 
            Convolution module instance.
            `ConvlutionModule` instance can be used as the argument.
        dropout_rate (float): 
            Dropout rate.
        normalize_before (bool): 
            Whether to use layer_norm before the first block.
        concat_after (bool): 
            Whether to concat attention layer's input and output.
            if True, additional linear will be applied.
            i.e. x -> x + linear(concat(x, att(x)))
            if False, no additional linear will be applied. i.e. x -> x + att(x)
        stochastic_depth_rate (float): 
            Proability to skip this layer.
            During training, the layer may skip residual computation and return input
            as-is with given probability.
    """

    def __init__(
            self,
            size,
            self_attn,
            feed_forward,
            feed_forward_macaron,
            conv_module,
            dropout_rate,
            normalize_before=True,
            concat_after=False,
            stochastic_depth_rate=0.0, ):
        """Construct an EncoderLayer object."""
        super().__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.feed_forward_macaron = feed_forward_macaron
        self.conv_module = conv_module
        self.norm_ff = LayerNorm(size)  # for the FNN module
        self.norm_mha = LayerNorm(size)  # for the MHA module
        if feed_forward_macaron is not None:
            self.norm_ff_macaron = LayerNorm(size)
            self.ff_scale = 0.5
        else:
            self.ff_scale = 1.0
        if self.conv_module is not None:
            self.norm_conv = LayerNorm(size)  # for the CNN module
            self.norm_final = LayerNorm(
                size)  # for the final output of the block
        self.dropout = nn.Dropout(dropout_rate)
        self.size = size
        self.normalize_before = normalize_before
        self.concat_after = concat_after
        if self.concat_after:
            self.concat_linear = nn.Linear(size + size, size)
        self.stochastic_depth_rate = stochastic_depth_rate

    def forward(self, x_input, mask, cache=None):
        """Compute encoded features.

        Args:
            x_input(Union[Tuple, Tensor]): 
                Input tensor w/ or w/o pos emb.
                - w/ pos emb: Tuple of tensors [(#batch, time, size), (1, time, size)].
                - w/o pos emb: Tensor (#batch, time, size).
            mask(Tensor): 
                Mask tensor for the input (#batch, time).
            cache (Tensor): 

        Returns:
            Tensor: 
                Output tensor (#batch, time, size).
            Tensor: 
                Mask tensor (#batch, time).
        """
        if isinstance(x_input, tuple):
            x, pos_emb = x_input[0], x_input[1]
        else:
            x, pos_emb = x_input, None
        skip_layer = False
        # with stochastic depth, residual connection `x + f(x)` becomes
        # `x <- x + 1 / (1 - p) * f(x)` at training time.
        stoch_layer_coeff = 1.0
        if self.training and self.stochastic_depth_rate > 0:
            skip_layer = paddle.rand(1).item() < self.stochastic_depth_rate
            stoch_layer_coeff = 1.0 / (1 - self.stochastic_depth_rate)
        if skip_layer:
            if cache is not None:
                x = paddle.concat([cache, x], axis=1)
            if pos_emb is not None:
                return (x, pos_emb), mask
            return x, mask
        # whether to use macaron style
        if self.feed_forward_macaron is not None:
            residual = x
            if self.normalize_before:
                x = self.norm_ff_macaron(x)
            x = residual + stoch_layer_coeff * self.ff_scale * self.dropout(
                self.feed_forward_macaron(x))
            if not self.normalize_before:
                x = self.norm_ff_macaron(x)
        # multi-headed self-attention module
        residual = x
        if self.normalize_before:
            x = self.norm_mha(x)

        if cache is None:
            x_q = x
        else:
            assert cache.shape == (x.shape[0], x.shape[1] - 1, self.size)
            x_q = x[:, -1:, :]
            residual = residual[:, -1:, :]
            mask = None if mask is None else mask[:, -1:, :]

        if pos_emb is not None:
            x_att = self.self_attn(x_q, x, x, pos_emb, mask)
        else:
            x_att = self.self_attn(x_q, x, x, mask)

        if self.concat_after:
            x_concat = paddle.concat((x, x_att), axis=-1)
            x = residual + stoch_layer_coeff * self.concat_linear(x_concat)
        else:
            x = residual + stoch_layer_coeff * self.dropout(x_att)
        if not self.normalize_before:
            x = self.norm_mha(x)

        # convolution module
        if self.conv_module is not None:
            residual = x
            if self.normalize_before:
                x = self.norm_conv(x)
            x = residual + stoch_layer_coeff * self.dropout(self.conv_module(x))
            if not self.normalize_before:
                x = self.norm_conv(x)

        # feed forward module
        residual = x
        if self.normalize_before:
            x = self.norm_ff(x)
        x = residual + stoch_layer_coeff * self.ff_scale * self.dropout(
            self.feed_forward(x))
        if not self.normalize_before:
            x = self.norm_ff(x)

        if self.conv_module is not None:
            x = self.norm_final(x)

        if cache is not None:
            x = paddle.concat([cache, x], axis=1)

        if pos_emb is not None:
            return (x, pos_emb), mask

        return x, mask


================================================
FILE: paddlespeech/t2s/modules/conv.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import nn

__all__ = [
    "Conv1dCell",
    "Conv1dBatchNorm",
]


class Conv1dCell(nn.Conv1D):
    """A subclass of Conv1D layer, which can be used in an autoregressive
    decoder like an RNN cell. 
    
    When used in autoregressive decoding, it performs causal temporal
    convolution incrementally. At each time step, it takes a step input and
    returns a step output.
    
    Notes
    ------
    It is done by caching an internal buffer of length ``receptive_file - 1``.
    when adding a step input, the buffer is shited by one step, the latest
    input is added to be buffer and the oldest step is discarded. And it
    returns a step output. For single step case, convolution is equivalent to a
    linear transformation.
    That it can be used as a cell depends on several restrictions:
    1. stride must be 1;
    2. padding must be a causal padding (recpetive_field - 1, 0).
    Thus, these arguments are removed from the ``__init__`` method of this
    class.

    Args:
        in_channels (int): 
            The feature size of the input.
        out_channels (int): 
            The feature size of the output.
        kernel_size (int or Tuple[int]): 
            The size of the kernel.
        dilation (int or Tuple[int]): 
            The dilation of the convolution, by default 1
        weight_attr (ParamAttr, Initializer, str or bool, optional): 
            The parameter attribute of the convolution kernel, 
            by default None.
        bias_attr (ParamAttr, Initializer, str or bool, optional):
            The parameter attribute of the bias. 
            If ``False``, this layer does not have a bias, by default None.
            
    Examples: 
        >>> cell = Conv1dCell(3, 4, kernel_size=5)
        >>> inputs = [paddle.randn([4, 3]) for _ in range(16)]
        >>> outputs = []
        >>> cell.eval()
        >>> cell.start_sequence()
        >>> for xt in inputs:
        >>>     outputs.append(cell.add_input(xt))
        >>> len(outputs))
        16
        >>> outputs[0].shape
        [4, 4]
    """

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 dilation=1,
                 weight_attr=None,
                 bias_attr=None):
        _dilation = dilation[0] if isinstance(dilation,
                                              (tuple, list)) else dilation
        _kernel_size = kernel_size[0] if isinstance(kernel_size, (
            tuple, list)) else kernel_size
        self._r = 1 + (_kernel_size - 1) * _dilation
        super().__init__(
            in_channels,
            out_channels,
            kernel_size,
            padding=(self._r - 1, 0),
            dilation=dilation,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            data_format="NCL")

    @property
    def receptive_field(self):
        """The receptive field of the Conv1dCell.
        """
        return self._r

    def start_sequence(self):
        """Prepare the layer for a series of incremental forward.
        
        Warnings:
            This method should be called before a sequence of calls to
            ``add_input``.

        Raises:
            Exception
                If this method is called when the layer is in training mode.
        """
        if self.training:
            raise Exception("only use start_sequence in evaluation")
        self._buffer = None

        # NOTE: call self's weight norm hook expliccitly since self.weight 
        # is visited directly in this method without calling self.__call__ 
        # method. If we do not trigger the weight norm hook, the weight 
        # may be outdated. e.g. after loading from a saved checkpoint
        # see also: https://github.com/pytorch/pytorch/issues/47588
        for hook in self._forward_pre_hooks.values():
            hook(self, None)
        self._reshaped_weight = paddle.reshape(self.weight,
                                               (self._out_channels, -1))

    def initialize_buffer(self, x_t):
        """Initialize the buffer for the step input.

        Args:
            x_t (Tensor): 
                The step input. shape=(batch_size, in_channels)
            
        """
        batch_size, _ = x_t.shape
        self._buffer = paddle.zeros(
            (batch_size, self._in_channels, self.receptive_field),
            dtype=x_t.dtype)

    def update_buffer(self, x_t):
        """Shift the buffer by one step.

        Args:
            x_t (Tensor): T
                he step input. shape=(batch_size, in_channels)
            
        """
        self._buffer = paddle.concat(
            [self._buffer[:, :, 1:], paddle.unsqueeze(x_t, -1)], -1)

    def add_input(self, x_t):
        """Add step input and compute step output.

        Args:
            x_t (Tensor): 
                The step input. shape=(batch_size, in_channels)
          
        Returns: 
            y_t (Tensor): 
                The step output. shape=(batch_size, out_channels)

        """
        batch_size = x_t.shape[0]
        if self.receptive_field > 1:
            if self._buffer is None:
                self.initialize_buffer(x_t)

            # update buffer
            self.update_buffer(x_t)
            if self._dilation[0] > 1:
                input = self._buffer[:, :, ::self._dilation[0]]
            else:
                input = self._buffer
            input = paddle.reshape(input, (batch_size, -1))
        else:
            input = x_t
        y_t = paddle.matmul(input, self._reshaped_weight, transpose_y=True)
        y_t = y_t + self.bias
        return y_t


class Conv1dBatchNorm(nn.Layer):
    """A Conv1D Layer followed by a BatchNorm1D.

    Args:
        in_channels (int): 
            The feature size of the input.
        out_channels (int): 
            The feature size of the output.
        kernel_size (int): 
            The size of the convolution kernel.
        stride (int, optional): 
            The stride of the convolution, by default 1.
        padding (int, str or Tuple[int], optional):
            The padding of the convolution.
            If int, a symmetrical padding is applied before convolution;
            If str, it should be "same" or "valid";
            If Tuple[int], its length should be 2, meaning
            ``(pad_before, pad_after)``, by default 0.
        weight_attr (ParamAttr, Initializer, str or bool, optional):
            The parameter attribute of the convolution kernel,
            by default None.
        bias_attr (ParamAttr, Initializer, str or bool, optional):
            The parameter attribute of the bias of the convolution,
            by defaultNone.
        data_format (str ["NCL" or "NLC"], optional): 
            The data layout of the input, by default "NCL"
        momentum (float, optional): 
            The momentum of the BatchNorm1D layer, by default 0.9
        epsilon (float, optional): 
            The epsilon of the BatchNorm1D layer, by default 1e-05
    """

    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 weight_attr=None,
                 bias_attr=None,
                 data_format="NCL",
                 momentum=0.9,
                 epsilon=1e-05):
        super().__init__()
        self.conv = nn.Conv1D(
            in_channels,
            out_channels,
            kernel_size,
            stride,
            padding=padding,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            data_format=data_format)
        self.bn = nn.BatchNorm1D(
            out_channels,
            momentum=momentum,
            epsilon=epsilon,
            data_format=data_format)

    def forward(self, x):
        """Forward pass of the Conv1dBatchNorm layer.
        
        Args:
            x (Tensor): 
                The input tensor. Its data layout depends on ``data_format``. 
                shape=(B, C_in, T_in) or (B, T_in, C_in)
    
        Returns:
            Tensor: 
                The output tensor. shape=(B, C_out, T_out) or (B, T_out, C_out)
                
        """
        x = self.conv(x)
        x = self.bn(x)
        return x


================================================
FILE: paddlespeech/t2s/modules/diffnet.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math

import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.utils.initialize import _calculate_fan_in_and_fan_out
from paddlespeech.utils.initialize import kaiming_normal_
from paddlespeech.utils.initialize import kaiming_uniform_
from paddlespeech.utils.initialize import uniform_
from paddlespeech.utils.initialize import zeros_


def Conv1D(*args, **kwargs):
    layer = nn.Conv1D(*args, **kwargs)
    # Initialize the weight to be consistent with the official
    kaiming_normal_(layer.weight)

    # Initialization is consistent with torch
    if layer.bias is not None:
        fan_in, _ = _calculate_fan_in_and_fan_out(layer.weight)
        if fan_in != 0:
            bound = 1 / math.sqrt(fan_in)
            uniform_(layer.bias, -bound, bound)
    return layer


# Initialization is consistent with torch
def Linear(*args, **kwargs):
    layer = nn.Linear(*args, **kwargs)
    kaiming_uniform_(layer.weight, a=math.sqrt(5))
    if layer.bias is not None:
        fan_in, _ = _calculate_fan_in_and_fan_out(layer.weight)
        bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
        uniform_(layer.bias, -bound, bound)
    return layer


class ResidualBlock(nn.Layer):
    """ResidualBlock

    Args:
        encoder_hidden (int, optional): 
            Input feature size of the 1D convolution, by default 256
        residual_channels (int, optional): 
            Feature size of the residual output(and also the input), by default 256
        gate_channels (int, optional): 
            Output feature size of the 1D convolution, by default 512
        kernel_size (int, optional): 
            Kernel size of the 1D convolution, by default 3
        dilation (int, optional): 
            Dilation of the 1D convolution, by default 4
    """

    def __init__(self,
                 encoder_hidden: int=256,
                 residual_channels: int=256,
                 gate_channels: int=512,
                 kernel_size: int=3,
                 dilation: int=4):
        super().__init__()
        self.dilated_conv = Conv1D(
            residual_channels,
            gate_channels,
            kernel_size,
            padding=dilation,
            dilation=dilation)
        self.diffusion_projection = Linear(residual_channels, residual_channels)
        self.conditioner_projection = Conv1D(encoder_hidden, gate_channels, 1)
        self.output_projection = Conv1D(residual_channels, gate_channels, 1)

    def forward(
            self,
            x: paddle.Tensor,
            diffusion_step: paddle.Tensor,
            cond: paddle.Tensor, ):
        """Calculate forward propagation.
        Args:
            spec (Tensor(float32)): input feature. (B, residual_channels, T)
            diffusion_step (Tensor(int64)):  The timestep input (adding noise step). (B,)
            cond (Tensor(float32)): The auxiliary input (e.g. fastspeech2 encoder output). (B, residual_channels, T)

        Returns:
            x (Tensor(float32)): output (B, residual_channels, T)

        """
        diffusion_step = self.diffusion_projection(diffusion_step).unsqueeze(-1)
        cond = self.conditioner_projection(cond)
        y = x + diffusion_step

        y = self.dilated_conv(y) + cond

        gate, filter = paddle.chunk(y, 2, axis=1)
        y = F.sigmoid(gate) * paddle.tanh(filter)

        y = self.output_projection(y)
        residual, skip = paddle.chunk(y, 2, axis=1)
        return (x + residual) / math.sqrt(2.0), skip


class SinusoidalPosEmb(nn.Layer):
    """Positional embedding
    """

    def __init__(self, dim: int=256):
        super().__init__()
        self.dim = dim

    def forward(self, x: paddle.Tensor):
        # check if x is 0-dim tensor, if so, add a dimension
        if x.ndim == 0:
            x = paddle.cast(x.unsqueeze(0), 'float32')
        else:
            x = paddle.cast(x, 'float32')
        half_dim = self.dim // 2
        emb = math.log(10000) / (half_dim - 1)
        emb = paddle.exp(paddle.arange(half_dim) * -emb)
        emb = x[:, None] * emb[None, :]
        emb = paddle.concat([emb.sin(), emb.cos()], axis=-1)
        return emb


class DiffNet(nn.Layer):
    """A Mel-Spectrogram Denoiser

    Args:
        in_channels (int, optional): 
            Number of channels of the input mel-spectrogram, by default 80
        out_channels (int, optional): 
            Number of channels of the output mel-spectrogram, by default 80
        kernel_size (int, optional): 
            Kernel size of the residual blocks inside, by default 3
        layers (int, optional): 
            Number of residual blocks inside, by default 20
        stacks (int, optional):
            The number of groups to split the residual blocks into, by default 5
            Within each group, the dilation of the residual block grows exponentially.
        residual_channels (int, optional): 
            Residual channel of the residual blocks, by default 256
        gate_channels (int, optional): 
            Gate channel of the residual blocks, by default 512
        skip_channels (int, optional): 
            Skip channel of the residual blocks, by default 256
        aux_channels (int, optional): 
            Auxiliary channel of the residual blocks, by default 256
        dropout (float, optional): 
            Dropout of the residual blocks, by default 0.
        bias (bool, optional): 
            Whether to use bias in residual blocks, by default True
        use_weight_norm (bool, optional): 
            Whether to use weight norm in all convolutions, by default False
    """

    def __init__(
            self,
            in_channels: int=80,
            out_channels: int=80,
            kernel_size: int=3,
            layers: int=20,
            stacks: int=5,
            residual_channels: int=256,
            gate_channels: int=512,
            skip_channels: int=256,
            aux_channels: int=256,
            dropout: float=0.,
            bias: bool=True,
            use_weight_norm: bool=False,
            init_type: str="kaiming_normal", ):
        super().__init__()

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.layers = layers
        self.aux_channels = aux_channels
        self.residual_channels = residual_channels
        self.gate_channels = gate_channels
        self.kernel_size = kernel_size
        self.dilation_cycle_length = layers // stacks
        self.skip_channels = skip_channels

        self.input_projection = Conv1D(self.in_channels, self.residual_channels,
                                       1)
        self.diffusion_embedding = SinusoidalPosEmb(self.residual_channels)
        dim = self.residual_channels
        self.mlp = nn.Sequential(
            Linear(dim, dim * 4), nn.Mish(), Linear(dim * 4, dim))
        self.residual_layers = nn.LayerList([
            ResidualBlock(
                encoder_hidden=self.aux_channels,
                residual_channels=self.residual_channels,
                gate_channels=self.gate_channels,
                kernel_size=self.kernel_size,
                dilation=2**(i % self.dilation_cycle_length))
            for i in range(self.layers)
        ])
        self.skip_projection = Conv1D(self.residual_channels,
                                      self.skip_channels, 1)
        self.output_projection = Conv1D(self.residual_channels,
                                        self.out_channels, 1)
        zeros_(self.output_projection.weight)

    def forward(
            self,
            spec: paddle.Tensor,
            diffusion_step: paddle.Tensor,
            cond: paddle.Tensor, ):
        """Calculate forward propagation.
        Args:
            spec (Tensor(float32)): The input mel-spectrogram. (B, n_mel, T)
            diffusion_step (Tensor(int64)):  The timestep input (adding noise step). (B,)
            cond (Tensor(float32)): The auxiliary input (e.g. fastspeech2 encoder output). (B, D_enc_out, T)

        Returns:
            x (Tensor(float32)): pred noise (B, n_mel, T)

        """
        x = spec
        x = self.input_projection(x)  # x [B, residual_channel, T]

        x = F.relu(x)
        diffusion_step = self.diffusion_embedding(diffusion_step)
        diffusion_step = self.mlp(diffusion_step)
        skip = []
        for layer_id, layer in enumerate(self.residual_layers):
            x, skip_connection = layer(
                x=x,
                diffusion_step=diffusion_step,
                cond=cond, )
            skip.append(skip_connection)
        x = paddle.sum(
            paddle.stack(skip), axis=0) / math.sqrt(len(self.residual_layers))
        x = self.skip_projection(x)
        x = F.relu(x)
        x = self.output_projection(x)  # [B, 80, T]
        return x


================================================
FILE: paddlespeech/t2s/modules/diffusion.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Diffusion denoising related modules for paddle"""
from typing import Callable
from typing import Optional
from typing import Tuple

import numpy as np
import paddle
import ppdiffusers
from paddle import nn
from ppdiffusers.schedulers import DDPMScheduler


class GaussianDiffusion(nn.Layer):
    """Common Gaussian Diffusion Denoising Model Module 

    Args:
        denoiser (Layer, optional): 
            The model used for denoising noises.
        num_train_timesteps (int, optional): 
            The number of timesteps between the noise and the real during training, by default 1000.
        beta_start (float, optional): 
            beta start parameter for the scheduler, by default 0.0001.
        beta_end (float, optional): 
            beta end parameter for the scheduler, by default 0.0001.
        beta_schedule (str, optional): 
            beta schedule parameter for the scheduler, by default 'squaredcos_cap_v2' (cosine schedule).
        num_max_timesteps (int, optional): 
            The max timestep transition from real to noise, by default None.
        stretch (bool, optional): 
            Whether to stretch before diffusion, by default True.
        min_values: (paddle.Tensor):
            The minimum value of the feature to stretch.
        max_values: (paddle.Tensor):
            The maximum value of the feature to stretch.
    
    
    Examples: 
        >>> import paddle
        >>> import paddle.nn.functional as F
        >>> from tqdm import tqdm
        >>> 
        >>> denoiser = WaveNetDenoiser()
        >>> diffusion = GaussianDiffusion(denoiser, num_train_timesteps=1000, num_max_timesteps=100)
        >>> x = paddle.ones([4, 80, 192]) # [B, mel_ch, T] # real mel input
        >>> c = paddle.randn([4, 256, 192]) # [B, fs2_encoder_out_ch, T] # fastspeech2 encoder output
        >>> loss = F.mse_loss(*diffusion(x, c))
        >>> loss.backward()
        >>> print('MSE Loss:', loss.item())
        MSE Loss: 1.6669728755950928 
        >>> def create_progress_callback():
        >>>     pbar = None
        >>>     def callback(index, timestep, num_timesteps, sample):
        >>>         nonlocal pbar
        >>>         if pbar is None:
        >>>             pbar = tqdm(total=num_timesteps)
        >>>             pbar.update(index)
        >>>         pbar.update()
        >>> 
        >>>     return callback
        >>> 
        >>> # ds=1000, K_step=60, scheduler=ddpm, from aux fs2 mel output
        >>> ds = 1000
        >>> infer_steps = 1000
        >>> K_step = 60
        >>> scheduler_type = 'ddpm'
        >>> x_in = x
        >>> diffusion = GaussianDiffusion(denoiser, num_train_timesteps=ds, num_max_timesteps=K_step)
        >>> with paddle.no_grad():
        >>>     sample = diffusion.inference(
        >>>         paddle.randn(x.shape), c, ref_x=x_in, 
        >>>         num_inference_steps=infer_steps,
        >>>         scheduler_type=scheduler_type,
        >>>         callback=create_progress_callback())
        100%|█████| 60/60 [00:03<00:00, 18.36it/s] 
        >>> 
        >>> # ds=100, K_step=100, scheduler=ddpm, from gaussian noise
        >>> ds = 100
        >>> infer_steps = 100
        >>> K_step = 100
        >>> scheduler_type = 'ddpm'
        >>> x_in = None
        >>> diffusion = GaussianDiffusion(denoiser, num_train_timesteps=ds, num_max_timesteps=K_step)
        >>> with paddle.no_grad():
        >>>     sample = diffusion.inference(
        >>>         paddle.randn(x.shape), c, ref_x=x_in, 
        >>>         num_inference_steps=infer_steps,
        >>>         scheduler_type=scheduler_type,
        >>>         callback=create_progress_callback())
        100%|█████| 100/100 [00:05<00:00, 18.29it/s] 
        >>> 
        >>> # ds=1000, K_step=1000, scheduler=pndm, infer_step=25, from gaussian noise
        >>> ds = 1000
        >>> infer_steps = 25
        >>> K_step = 1000
        >>> scheduler_type = 'pndm'
        >>> x_in = None
        >>> diffusion = GaussianDiffusion(denoiser, num_train_timesteps=ds, num_max_timesteps=K_step)
        >>> with paddle.no_grad():
        >>>     sample = diffusion.inference(
        >>>         paddle.randn(x.shape), c, ref_x=x_in, 
        >>>         num_inference_steps=infer_steps,
        >>>         scheduler_type=scheduler_type,
        >>>         callback=create_progress_callback())
        100%|█████| 34/34 [00:01<00:00, 19.75it/s]
        >>> 
        >>> # ds=1000, K_step=100, scheduler=pndm, infer_step=50, from aux fs2 mel output
        >>> ds = 1000
        >>> infer_steps = 50
        >>> K_step = 100
        >>> scheduler_type = 'pndm'
        >>> x_in = x
        >>> diffusion = GaussianDiffusion(denoiser, num_train_timesteps=ds, num_max_timesteps=K_step)
        >>> with paddle.no_grad():
        >>>     sample = diffusion.inference(
        >>>         paddle.randn(x.shape), c, ref_x=x_in, 
        >>>         num_inference_steps=infer_steps,
        >>>         scheduler_type=scheduler_type,
        >>>         callback=create_progress_callback())
        100%|█████| 14/14 [00:00<00:00, 23.80it/s]

    """

    def __init__(
            self,
            denoiser: nn.Layer,
            num_train_timesteps: Optional[int]=1000,
            beta_start: Optional[float]=0.0001,
            beta_end: Optional[float]=0.02,
            beta_schedule: Optional[str]="squaredcos_cap_v2",
            num_max_timesteps: Optional[int]=None,
            stretch: bool=True,
            min_values: paddle.Tensor=None,
            max_values: paddle.Tensor=None, ):
        super().__init__()

        self.num_train_timesteps = num_train_timesteps
        self.beta_start = beta_start
        self.beta_end = beta_end
        self.beta_schedule = beta_schedule

        self.denoiser = denoiser
        self.noise_scheduler = DDPMScheduler(
            num_train_timesteps=num_train_timesteps,
            beta_start=beta_start,
            beta_end=beta_end,
            beta_schedule=beta_schedule)
        self.num_max_timesteps = num_max_timesteps
        self.stretch = stretch
        self.min_values = min_values
        self.max_values = max_values

    def norm_spec(self, x):
        """
        Linearly map x to [-1, 1]
        Args:
            x: [B, T, N]
        """
        return (x - self.min_values) / (self.max_values - self.min_values
                                        ) * 2 - 1

    def denorm_spec(self, x):
        return (x + 1) / 2 * (self.max_values - self.min_values
                              ) + self.min_values

    def forward(self, x: paddle.Tensor, cond: Optional[paddle.Tensor]=None
                ) -> Tuple[paddle.Tensor, paddle.Tensor]:
        """Generate random timesteps noised x.

        Args:
            x (Tensor): 
                The input for adding noises.
            cond (Tensor, optional):
                Conditional input for compute noises.
          
        Returns: 
            y (Tensor): 
                The output with noises added in.
            target (Tensor):
                The noises which is added to the input.

        """
        if self.stretch:
            x = x.transpose((0, 2, 1))
            x = self.norm_spec(x)
            x = x.transpose((0, 2, 1))

        noise_scheduler = self.noise_scheduler

        # Sample noise that we'll add to the mel-spectrograms
        target = noise = paddle.randn(x.shape)

        # Sample a random timestep for each mel-spectrogram
        num_timesteps = self.num_train_timesteps
        if self.num_max_timesteps is not None:
            num_timesteps = self.num_max_timesteps
        timesteps = paddle.randint(0, num_timesteps, (x.shape[0], ))

        # Add noise to the clean mel-spectrograms according to the noise magnitude at each timestep
        # (this is the forward diffusion process)
        noisy_images = noise_scheduler.add_noise(x, noise, timesteps)

        y = self.denoiser(noisy_images, timesteps, cond)

        # then compute loss use output y and noisy target for prediction_type == "epsilon"
        return y, target

    def inference(self,
                  noise: paddle.Tensor,
                  cond: Optional[paddle.Tensor]=None,
                  ref_x: Optional[paddle.Tensor]=None,
                  num_inference_steps: Optional[int]=1000,
                  strength: Optional[float]=None,
                  scheduler_type: Optional[str]="ddpm",
                  clip_noise: Optional[bool]=False,
                  clip_noise_range: Optional[Tuple[float, float]]=(-1, 1),
                  callback: Optional[Callable[[int, int, int, paddle.Tensor],
                                              None]]=None,
                  callback_steps: Optional[int]=1):
        """Denoising input from noises. Refer to ppdiffusers img2img pipeline.

        Args:
            noise (Tensor): 
                The input tensor as a starting point for denoising. 
            cond (Tensor, optional):
                Conditional input for compute noises. (N, C_aux, T)
            ref_x (Tensor, optional):
                The real output for the denoising process to refer.
            num_inference_steps (int, optional):
                The number of timesteps between the noise and the real during inference, by default 1000.
            strength (float, optional):
                Mixing strength of ref_x with noise. The larger the value, the stronger the noise. 
                Range [0,1], by default None.
            scheduler_type (str, optional):
                Noise scheduler for generate noises. 
                Choose a great scheduler can skip many denoising step, by default 'ddpm'.
                only support 'ddpm' now !
            clip_noise (bool, optional):
                Whether to clip each denoised output, by default True.
            clip_noise_range (tuple, optional):
                denoised output min and max value range after clip, by default (-1, 1).
            callback (Callable[[int,int,int,Tensor], None], optional):
                Callback function during denoising steps.

                Args:
                    index (int):
                        Current denoising index.
                    timestep (int):
                        Current denoising timestep.
                    num_timesteps (int):
                        Number of the denoising timesteps.
                    denoised_output (Tensor):
                        Current intermediate result produced during denoising.

            callback_steps (int, optional):
                The step to call the callback function.
          
        Returns: 
            denoised_output (Tensor): 
                The denoised output tensor.

        """
        scheduler_cls = None
        for clsname in dir(ppdiffusers.schedulers):
            if clsname.lower() == scheduler_type + "scheduler":
                scheduler_cls = getattr(ppdiffusers.schedulers, clsname)
                break

        if scheduler_cls is None:
            raise ValueError(f"No such scheduler type named {scheduler_type}")

        scheduler = scheduler_cls(
            num_train_timesteps=self.num_train_timesteps,
            beta_start=self.beta_start,
            beta_end=self.beta_end,
            beta_schedule=self.beta_schedule)

        # set timesteps
        scheduler.set_timesteps(num_inference_steps)

        noisy_input = noise
        if self.stretch and ref_x is not None:
            ref_x = ref_x.transpose((0, 2, 1))
            ref_x = self.norm_spec(ref_x)
            ref_x = ref_x.transpose((0, 2, 1))

            # for ddpm
            timesteps = paddle.to_tensor(
                np.flipud(np.arange(num_inference_steps)))
            noisy_input = scheduler.add_noise(ref_x, noise, timesteps[0])

        denoised_output = noisy_input
        if clip_noise:
            n_min, n_max = clip_noise_range
            denoised_output = paddle.clip(denoised_output, n_min, n_max)
        for i, t in enumerate(timesteps):
            denoised_output = scheduler.scale_model_input(denoised_output, t)
            noise_pred = self.denoiser(denoised_output, t, cond)
            # compute the previous noisy sample x_t -> x_t-1
            denoised_output = scheduler.step(noise_pred, t,
                                             denoised_output).prev_sample
            if clip_noise:
                denoised_output = paddle.clip(denoised_output, n_min, n_max)

        if self.stretch:
            denoised_output = denoised_output.transpose((0, 2, 1))
            denoised_output = self.denorm_spec(denoised_output)
            denoised_output = denoised_output.transpose((0, 2, 1))

        return denoised_output


================================================
FILE: paddlespeech/t2s/modules/fftconv1d.py
================================================
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import typing
from typing import Optional
from typing import Sequence

import paddle
import paddle.nn as nn
import paddle.nn.functional as F

from ...utils import satisfy_paddle_version

__all__ = [
    "fft_conv1d",
    "FFTConv1D",
]


def __unfold(x, kernel_size: int, stride: int):
    """1D only unfolding similar to the one from Paddlepaddle.

    Notes
    ------
    Given a tensor `x` of size `[*, T]` this will return
    a tensor `[*, F, K]` with `K` the kernel size, and `F` the number
    of frames. The i-th frame is a view onto `i * stride: i * stride + kernel_size`.
    This will automatically pad `x` to cover at least once all entries in `x`.

    Args:
        x (Tensor): 
            tensor for which to return the frames.
        kernel_size (int): 
            size of each frame.
        stride (int): 
            stride between each frame.
    """
    shape = list(x.shape)
    length = shape.pop(-1)
    n_frames = math.ceil((max(length, kernel_size) - kernel_size) / stride) + 1
    tgt_length = (n_frames - 1) * stride + kernel_size
    padded = F.pad(x, (0, tgt_length - length), data_format="NCL")
    strides: typing.List[int] = []
    for dim in range(padded.dim()):
        strides.append(padded.strides[dim])
    assert strides.pop(-1) == 1, "data should be contiguous"
    strides = strides + [stride, 1]
    return padded.as_strided(shape + [n_frames, kernel_size], strides)


def fft_conv1d(
        x: paddle.Tensor,
        weight: paddle.Tensor,
        bias: Optional[paddle.Tensor]=None,
        stride: int=1,
        padding: int=0,
        block_ratio: float=5, ):
    """
    Same as `paddle.nn.functional.conv1d` but using FFT for the convolution.
    Please check PaddlePaddle documentation for more information.

    Notes
    ------
    This function is faster than `paddle.nn.functional.conv1d` only in specific cases.
    Typically, the kernel size should be of the order of 256 to see any real gain,
    for a stride of 1.
    Dilation and groups are not supported at the moment. This function might use
    more memory than the default Conv1d implementation.

    Args:
        x (Tensor): 
            x signal of shape `[B, C, T]`.
        weight (Tensor): 
            weight of the convolution `[D, C, K]` with `D` the number of output channels.
        bias (Tensor or None): 
            if not None, bias term for the convolution.
        stride (int): 
            stride of convolution.
        padding (int): 
            padding to apply to x.
        block_ratio (float): 
            can be tuned for speed. x is splitted in chunks with a size of `int(block_ratio * kernel_size)`.

    Shape:

        - Inputs: `x` is `[B, C, T]`, `weight` is `[D, C, K]` and bias is `[D]`.
        - Output: `(*, T)`
    """
    x = F.pad(x, (padding, padding), data_format="NCL")
    batch, _, length = x.shape
    out_channels, _, kernel_size = weight.shape

    if length < kernel_size:
        raise RuntimeError(
            f"Input should be at least as large as the kernel size {kernel_size}, "
            f"but it is only {length} samples long.")
    if block_ratio < 1:
        raise RuntimeError("Block ratio must be greater than 1.")

    block_size: int = min(int(kernel_size * block_ratio), length)
    fold_stride = block_size - kernel_size + 1
    # weight = pad_to(weight, block_size)

    weight = F.pad(
        weight, (0, block_size - weight.shape[-1]),
        mode="constant",
        value=0.0,
        data_format="NCL")

    weight_z = paddle.fft.rfft(weight, axis=-1)

    # We pad `x` and get the different frames, on which
    frames = __unfold(x, block_size, fold_stride)

    frames_z = paddle.fft.rfft(frames, axis=-1)
    weight_z_coml = paddle.conj(weight_z)
    out_z = paddle.einsum("bcft,dct->bdft", frames_z, weight_z_coml)
    out = paddle.fft.irfft(out_z, n=block_size, axis=-1)

    # The last bit is invalid, because FFT will do a circular convolution.
    out = out[..., :-kernel_size + 1]
    out = out.reshape([batch, out_channels, -1])
    out = out[..., ::stride]
    target_length = (length - kernel_size) // stride + 1
    out = out[..., :target_length]
    if bias is not None:
        out += bias[:, None]
    return out


class FFTConv1D(paddle.nn.Layer):
    """
    Same as `paddle.nn.Conv1D` but based on a custom FFT-based convolution.
    Please check PaddlePaddle documentation for more information on `paddle.nn.Conv1D`.

    Notes
    ------
    This module is faster than `paddle.nn.Conv1D` only in specific cases.
    Typically, `kernel_size` should be of the order of 256 to see any real gain,
    for a stride of 1.
    Dilation and groups are not supported at the moment. This module might use
    more memory than the default Conv1D implementation.

    Args:
        in_channels (int): 
            number of `x` channels.
        out_channels (int): 
            number of output channels.
        kernel_size (int): 
            kernel size of convolution.
        stride (int): 
            stride of convolution.
        padding (int): 
            padding to apply to `x`.
        bias_attr (bool): 
            if True, use a bias term.

    Examples: 
        >>> fftconv = FFTConv1D(12, 24, 128, 4)
        >>> x = paddle.randn([4, 12, 1024])
        >>> print(list(fftconv(x).shape))
        [4, 24, 225]
    """

    def __init__(
            self,
            in_channels: int,
            out_channels: int,
            kernel_size: int,
            stride: int=1,
            padding: int=0,
            bias_attr: bool=True, ):
        super(FFTConv1D, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding

        # Create a Conv1D layer to initialize weights and bias
        conv = paddle.nn.Conv1D(
            in_channels,
            out_channels,
            kernel_size,
            stride=stride,
            padding=padding,
            bias_attr=bias_attr)
        self.weight = conv.weight
        if bias_attr:
            self.bias = conv.bias
        else:
            self.bias = None

    def forward(self, x: paddle.Tensor):
        return fft_conv1d(x, self.weight, self.bias, self.stride, self.padding)


# Currently, the API unfold in Paddle is extremely slow, so __unfold is implemented 
# using the `.strides` and `.as_strided` APIs. However, these are only supported in 
# Paddle version 2.6 and above, so F.conv1d and Conv1D are used as replacements.
if not satisfy_paddle_version('2.6'):
    fft_conv1d = F.conv1d
    FFTConv1D = nn.Conv1D


================================================
FILE: paddlespeech/t2s/modules/geometry.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle


def shuffle_dim(x, axis, perm=None):
    """Permute input tensor along aixs given the permutation or randomly.
    
    Args:
        x (Tensor): 
            The input tensor.
        axis (int): 
            The axis to shuffle.
        perm (List[int], ndarray, optional): 
            The order to reorder the tensor along the ``axis``-th dimension.
            It is a permutation of ``[0, d)``, where d is the size of the
            ``axis``-th dimension of the input tensor. If not provided,
            a random permutation is used. Defaults to None.

    Returns:
        Tensor: The shuffled tensor, which has the same shape as x does.
    """
    size = x.shape[axis]
    if perm is not None and len(perm) != size:
        raise ValueError("length of permutation should equals the input "
                         "tensor's axis-th dimension's size")
    if perm is not None:
        perm = np.array(perm)
    else:
        perm = np.random.permutation(size)

    perm = paddle.to_tensor(perm)
    out = paddle.gather(x, perm, axis)
    return out


================================================
FILE: paddlespeech/t2s/modules/layer_norm.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Layer normalization module."""
import paddle
from paddle import nn


class LayerNorm(nn.LayerNorm):
    """Layer normalization module.
    Args:
        nout (int): 
            Output dim size.
        dim (int): 
            Dimension to be normalized.
    """

    def __init__(self, nout, dim=-1):
        """Construct an LayerNorm object."""
        super().__init__(nout)
        self.dim = dim

    def forward(self, x):
        """Apply layer normalization.

        Args:
            x (Tensor):
                Input tensor.

        Returns: 
            Tensor: Normalized tensor.
        """

        if self.dim == -1:
            return super(LayerNorm, self).forward(x)
        else:
            len_dim = len(x.shape)
            if self.dim < 0:
                self.dim = len_dim + self.dim
            assert self.dim >= 0

            orig_perm = list(range(len_dim))
            new_perm = orig_perm[:]
            # Python style item change is not able when converting dygraph to static graph.
            # new_perm[self.dim], new_perm[len_dim -1] = new_perm[len_dim -1], new_perm[self.dim]
            # use C++ style item change here
            temp = new_perm[self.dim]
            new_perm[self.dim] = new_perm[len_dim - 1]
            new_perm[len_dim - 1] = temp

            return paddle.transpose(
                super(LayerNorm, self).forward(paddle.transpose(x, new_perm)),
                new_perm)


================================================
FILE: paddlespeech/t2s/modules/losses.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
from typing import Callable
from typing import List
from typing import Optional
from typing import Tuple
from typing import Union

import librosa
import numpy as np
import paddle
from paddle import nn
from paddle.nn import functional as F
from scipy import signal
from scipy.stats import betabinom
from typeguard import typechecked

from paddlespeech.audiotools.core.audio_signal import AudioSignal
from paddlespeech.audiotools.core.audio_signal import STFTParams
from paddlespeech.t2s.modules.nets_utils import make_non_pad_mask
from paddlespeech.t2s.modules.predictor.duration_predictor import (
    DurationPredictorLoss,  # noqa: H301
)


# Losses for WaveRNN
def log_sum_exp(x):
    """ numerically stable log_sum_exp implementation that prevents overflow """
    # TF ordering
    axis = len(x.shape) - 1
    m = paddle.max(x, axis=axis)
    m2 = paddle.max(x, axis=axis, keepdim=True)
    return m + paddle.log(paddle.sum(paddle.exp(x - m2), axis=axis))


# It is adapted from https://github.com/r9y9/wavenet_vocoder/blob/master/wavenet_vocoder/mixture.py
def discretized_mix_logistic_loss(y_hat,
                                  y,
                                  num_classes=65536,
                                  log_scale_min=None,
                                  reduce=True):
    if log_scale_min is None:
        log_scale_min = float(np.log(1e-14))
    y_hat = y_hat.transpose([0, 2, 1])
    assert y_hat.dim() == 3
    assert y_hat.shape[1] % 3 == 0
    nr_mix = y_hat.shape[1] // 3

    # (B x T x C)
    y_hat = y_hat.transpose([0, 2, 1])

    # unpack parameters. (B, T, num_mixtures) x 3
    logit_probs = y_hat[:, :, :nr_mix]
    means = y_hat[:, :, nr_mix:2 * nr_mix]
    log_scales = paddle.clip(
        y_hat[:, :, 2 * nr_mix:3 * nr_mix], min=log_scale_min)

    # B x T x 1 -> B x T x num_mixtures
    y = y.expand_as(means)
    centered_y = paddle.cast(y, dtype=paddle.get_default_dtype()) - means
    inv_stdv = paddle.exp(-log_scales)
    plus_in = inv_stdv * (centered_y + 1. / (num_classes - 1))
    cdf_plus = F.sigmoid(plus_in)
    min_in = inv_stdv * (centered_y - 1. / (num_classes - 1))
    cdf_min = F.sigmoid(min_in)

    # log probability for edge case of 0 (before scaling)
    # equivalent: torch.log(F.sigmoid(plus_in))
    # softplus: log(1+ e^{-x})
    log_cdf_plus = plus_in - F.softplus(plus_in)

    # log probability for edge case of 255 (before scaling)
    # equivalent: (1 - F.sigmoid(min_in)).log()
    log_one_minus_cdf_min = -F.softplus(min_in)

    # probability for all other cases
    cdf_delta = cdf_plus - cdf_min

    mid_in = inv_stdv * centered_y
    # log probability in the center of the bin, to be used in extreme cases
    # (not actually used in our code)
    log_pdf_mid = mid_in - log_scales - 2. * F.softplus(mid_in)

    # TODO: cdf_delta <= 1e-5 actually can happen. How can we choose the value
    # for num_classes=65536 case? 1e-7? not sure..
    inner_inner_cond = cdf_delta > 1e-5

    inner_inner_cond = paddle.cast(
        inner_inner_cond, dtype=paddle.get_default_dtype())

    # inner_inner_out = inner_inner_cond * \
    #                   paddle.log(paddle.clip(cdf_delta, min=1e-12)) + \
    #                   (1. - inner_inner_cond) * (log_pdf_mid - np.log((num_classes - 1) / 2))

    inner_inner_out = inner_inner_cond * paddle.log(
        paddle.clip(cdf_delta, min=1e-12)) + (1. - inner_inner_cond) * (
            log_pdf_mid - np.log((num_classes - 1) / 2))

    inner_cond = y > 0.999

    inner_cond = paddle.cast(inner_cond, dtype=paddle.get_default_dtype())

    inner_out = inner_cond * log_one_minus_cdf_min + (1. - inner_cond
                                                      ) * inner_inner_out
    cond = y < -0.999
    cond = paddle.cast(cond, dtype=paddle.get_default_dtype())

    log_probs = cond * log_cdf_plus + (1. - cond) * inner_out
    log_probs = log_probs + F.log_softmax(logit_probs, -1)

    if reduce:
        return -paddle.mean(log_sum_exp(log_probs))
    else:
        return -log_sum_exp(log_probs).unsqueeze(-1)


def sample_from_discretized_mix_logistic(y, log_scale_min=None):
    """
    Sample from discretized mixture of logistic distributions

    Args:
        y(Tensor): (B, C, T)
        log_scale_min(float, optional):  (Default value = None)

    Returns:
        Tensor: sample in range of [-1, 1].
    """
    if log_scale_min is None:
        log_scale_min = float(np.log(1e-14))

    assert y.shape[1] % 3 == 0
    nr_mix = y.shape[1] // 3

    # (B, T, C)
    y = y.transpose([0, 2, 1])
    logit_probs = y[:, :, :nr_mix]

    # sample mixture indicator from softmax
    temp = paddle.uniform(
        logit_probs.shape, dtype=logit_probs.dtype, min=1e-5, max=1.0 - 1e-5)
    temp = logit_probs - paddle.log(-paddle.log(temp))
    argmax = paddle.argmax(temp, axis=-1)

    # (B, T) -> (B, T, nr_mix)
    one_hot = F.one_hot(argmax, nr_mix)
    one_hot = paddle.cast(one_hot, dtype=paddle.get_default_dtype())

    # select logistic parameters
    means = paddle.sum(y[:, :, nr_mix:2 * nr_mix] * one_hot, axis=-1)
    log_scales = paddle.clip(
        paddle.sum(y[:, :, 2 * nr_mix:3 * nr_mix] * one_hot, axis=-1),
        min=log_scale_min)
    # sample from logistic & clip to interval
    # we don't actually round to the nearest 8bit value when sampling
    u = paddle.uniform(means.shape, min=1e-5, max=1.0 - 1e-5)
    x = means + paddle.exp(log_scales) * (paddle.log(u) - paddle.log(1. - u))
    x = paddle.clip(x, min=-1., max=-1.)

    return x


# Loss for Tacotron2
class GuidedAttentionLoss(nn.Layer):
    """Guided attention loss function module.

    This module calculates the guided attention loss described
    in `Efficiently Trainable Text-to-Speech System Based
    on Deep Convolutional Networks with Guided Attention`_,
    which forces the attention to be diagonal.

    .. _`Efficiently Trainable Text-to-Speech System
        Based on Deep Convolutional Networks with Guided Attention`:
        https://arxiv.org/abs/1710.08969

    """

    def __init__(self, sigma=0.4, alpha=1.0, reset_always=True):
        """Initialize guided attention loss module.

        Args:
            sigma (float, optional): Standard deviation to control how close attention to a diagonal.
            alpha (float, optional): Scaling coefficient (lambda).
            reset_always (bool, optional): Whether to always reset masks.

        """
        super().__init__()
        self.sigma = sigma
        self.alpha = alpha
        self.reset_always = reset_always
        self.guided_attn_masks = None
        self.masks = None

    def _reset_masks(self):
        self.guided_attn_masks = None
        self.masks = None

    def forward(self, att_ws, ilens, olens):
        """Calculate forward propagation.

        Args:
            att_ws(Tensor): Batch of attention weights (B, T_max_out, T_max_in).
            ilens(Tensor(int64)): Batch of input lenghts (B,).
            olens(Tensor(int64)): Batch of output lenghts (B,).

        Returns:
            Tensor: Guided attention loss value.

        """
        if self.guided_attn_masks is None:
            self.guided_attn_masks = self._make_guided_attention_masks(ilens,
                                                                       olens)
        if self.masks is None:
            self.masks = self._make_masks(ilens, olens)
        losses = self.guided_attn_masks * att_ws
        loss = paddle.mean(
            losses.masked_select(self.masks.broadcast_to(losses.shape)))
        if self.reset_always:
            self._reset_masks()
        return self.alpha * loss

    def _make_guided_attention_masks(self, ilens, olens):
        n_batches = len(ilens)
        max_ilen = max(ilens)
        max_olen = max(olens)
        guided_attn_masks = paddle.zeros((n_batches, max_olen, max_ilen))

        for idx, (ilen, olen) in enumerate(zip(ilens, olens)):
            guided_attn_masks[idx, :olen, :
                              ilen] = self._make_guided_attention_mask(
                                  ilen, olen, self.sigma)
        return guided_attn_masks

    @staticmethod
    def _make_guided_attention_mask(ilen, olen, sigma):
        """Make guided attention mask.

        Examples
        ----------
        >>> guided_attn_mask =_make_guided_attention(5, 5, 0.4)
        >>> guided_attn_mask.shape
        [5, 5]
        >>> guided_attn_mask
        tensor([[0.0000, 0.1175, 0.3935, 0.6753, 0.8647],
                [0.1175, 0.0000, 0.1175, 0.3935, 0.6753],
                [0.3935, 0.1175, 0.0000, 0.1175, 0.3935],
                [0.6753, 0.3935, 0.1175, 0.0000, 0.1175],
                [0.8647, 0.6753, 0.3935, 0.1175, 0.0000]])
        >>> guided_attn_mask =_make_guided_attention(3, 6, 0.4)
        >>> guided_attn_mask.shape
        [6, 3]
        >>> guided_attn_mask
        tensor([[0.0000, 0.2934, 0.7506],
                [0.0831, 0.0831, 0.5422],
                [0.2934, 0.0000, 0.2934],
                [0.5422, 0.0831, 0.0831],
                [0.7506, 0.2934, 0.0000],
                [0.8858, 0.5422, 0.0831]])

        """
        grid_x, grid_y = paddle.meshgrid(
            paddle.arange(olen), paddle.arange(ilen))
        grid_x = grid_x.cast(dtype=paddle.float32)
        grid_y = grid_y.cast(dtype=paddle.float32)
        return 1.0 - paddle.exp(-(
            (grid_y / ilen - grid_x / olen)**2) / (2 * (sigma**2)))

    @staticmethod
    def _make_masks(ilens, olens):
        """Make masks indicating non-padded part.

        Args:
            ilens(Tensor(int64) or List): 
                Batch of lengths (B,).
            olens(Tensor(int64) or List): 
                Batch of lengths (B,).

        Returns:
            Tensor: Mask tensor indicating non-padded part.

        Examples:
            >>> ilens, olens = [5, 2], [8, 5]
            >>> _make_mask(ilens, olens)
            tensor([[[1, 1, 1, 1, 1],
                    [1, 1, 1, 1, 1],
                    [1, 1, 1, 1, 1],
                    [1, 1, 1, 1, 1],
                    [1, 1, 1, 1, 1],
                    [1, 1, 1, 1, 1],
                    [1, 1, 1, 1, 1],
                    [1, 1, 1, 1, 1]],

                    [[1, 1, 0, 0, 0],
                    [1, 1, 0, 0, 0],
                    [1, 1, 0, 0, 0],
                    [1, 1, 0, 0, 0],
                    [1, 1, 0, 0, 0],
                    [0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0],
                    [0, 0, 0, 0, 0]]], dtype=paddle.uint8)

        """
        # (B, T_in)
        in_masks = make_non_pad_mask(ilens)
        # (B, T_out)
        out_masks = make_non_pad_mask(olens)
        # (B, T_out, T_in)

        return paddle.logical_and(
            out_masks.unsqueeze(-1), in_masks.unsqueeze(-2))


class GuidedMultiHeadAttentionLoss(GuidedAttentionLoss):
    """Guided attention loss function module for multi head attention.

    Args:
        sigma (float, optional): Standard deviation to controlGuidedAttentionLoss
            how close attention to a diagonal.
        alpha (float, optional): Scaling coefficient (lambda).
        reset_always (bool, optional): Whether to always reset masks.

    """

    def forward(self, att_ws, ilens, olens):
        """Calculate forward propagation.

        Args:
            att_ws(Tensor): 
                Batch of multi head attention weights (B, H, T_max_out, T_max_in).
            ilens(Tensor): 
                Batch of input lenghts (B,).
            olens(Tensor): 
                Batch of output lenghts (B,).

        Returns:
            Tensor: Guided attention loss value.

        """
        if self.guided_attn_masks is None:
            self.guided_attn_masks = (
                self._make_guided_attention_masks(ilens, olens).unsqueeze(1))
        if self.masks is None:
            self.masks = self._make_masks(ilens, olens).unsqueeze(1)
        losses = self.guided_attn_masks * att_ws
        loss = paddle.mean(
            losses.masked_select(self.masks.broadcast_to(losses.shape)))
        if self.reset_always:
            self._reset_masks()

        return self.alpha * loss


class Tacotron2Loss(nn.Layer):
    """Loss function module for Tacotron2."""

    def __init__(self,
                 use_masking=True,
                 use_weighted_masking=False,
                 bce_pos_weight=20.0):
        """Initialize Tactoron2 loss module.

        Args:
            use_masking (bool): 
                Whether to apply masking for padded part in loss calculation.
            use_weighted_masking (bool): 
                Whether to apply weighted masking in loss calculation.
            bce_pos_weight (float): 
                Weight of positive sample of stop token.
        """
        super().__init__()
        assert (use_masking != use_weighted_masking) or not use_masking
        self.use_masking = use_masking
        self.use_weighted_masking = use_weighted_masking

        # define criterions
        reduction = "none" if self.use_weighted_masking else "mean"
        self.l1_criterion = nn.L1Loss(reduction=reduction)
        self.mse_criterion = nn.MSELoss(reduction=reduction)
        self.bce_criterion = nn.BCEWithLogitsLoss(
            reduction=reduction, pos_weight=paddle.to_tensor(bce_pos_weight))

    def forward(self, after_outs, before_outs, logits, ys, stop_labels, olens):
        """Calculate forward propagation.

        Args:
            after_outs(Tensor): 
                Batch of outputs after postnets (B, Lmax, odim).
            before_outs(Tensor): 
                Batch of outputs before postnets (B, Lmax, odim).
            logits(Tensor): 
                Batch of stop logits (B, Lmax).
            ys(Tensor): 
                Batch of padded target features (B, Lmax, odim).
            stop_labels(Tensor(int64)): 
                Batch of the sequences of stop token labels (B, Lmax).
            olens(Tensor(int64)): 

        Returns:
            Tensor: 
                L1 loss value.
            Tensor: 
                Mean square error loss value.
            Tensor: 
                Binary cross entropy loss value.
        """
        # make mask and apply it
        if self.use_masking:
            masks = make_non_pad_mask(olens).unsqueeze(-1)
            ys = ys.masked_select(masks.broadcast_to(ys.shape))
            after_outs = after_outs.masked_select(
                masks.broadcast_to(after_outs.shape))
            before_outs = before_outs.masked_select(
                masks.broadcast_to(before_outs.shape))
            stop_labels = stop_labels.masked_select(
                masks[:, :, 0].broadcast_to(stop_labels.shape))
            logits = logits.masked_select(
                masks[:, :, 0].broadcast_to(logits.shape))

        # calculate loss
        l1_loss = self.l1_criterion(after_outs, ys) + self.l1_criterion(
            before_outs, ys)
        mse_loss = self.mse_criterion(after_outs, ys) + self.mse_criterion(
            before_outs, ys)
        bce_loss = self.bce_criterion(logits, stop_labels)

        # make weighted mask and apply it
        if self.use_weighted_masking:
            masks = make_non_pad_mask(olens).unsqueeze(-1)
            weights = masks.float() / masks.sum(axis=1, keepdim=True).float()
            out_weights = weights.divide(
                paddle.shape(ys)[0] * paddle.shape(ys)[2])
            logit_weights = weights.divide(paddle.shape(ys)[0])

            # apply weight
            l1_loss = l1_loss.multiply(out_weights)
            l1_loss = l1_loss.masked_select(masks.broadcast_to(l1_loss)).sum()
            mse_loss = mse_loss.multiply(out_weights)
            mse_loss = mse_loss.masked_select(
                masks.broadcast_to(mse_loss)).sum()
            bce_loss = bce_loss.multiply(logit_weights.squeeze(-1))
            bce_loss = bce_loss.masked_select(
                masks.squeeze(-1).broadcast_to(bce_loss)).sum()

        return l1_loss, mse_loss, bce_loss


# Losses for GAN Vocoder
def stft(x,
         fft_size,
         hop_length=None,
         win_length=None,
         window='hann',
         center=True,
         pad_mode='reflect'):
    """Perform STFT and convert to magnitude spectrogram.
    Args:
        x(Tensor): 
            Input signal tensor (B, T).
        fft_size(int): 
            FFT size.
        hop_size(int): 
            Hop size.
        win_length(int, optional): 
        window (str, optional):
            (Default value = None)
        window(str, optional): 
            Name of window function, see `scipy.signal.get_window` for more details. Defaults to "hann".
        center(bool, optional, optional): center (bool, optional): 
            Whether to pad `x` to make that the
            :math:`t \times hop\\_length` at the center of :math:`t`-th frame. Default: `True`.
        pad_mode(str, optional, optional):  
            (Default value = 'reflect')
        hop_length:  
            (Default value = None)

    Returns:
        Tensor: Magnitude spectrogram (B, #frames, fft_size // 2 + 1).
    """
    # calculate window
    window = signal.get_window(window, win_length, fftbins=True)
    window = paddle.to_tensor(window, dtype=x.dtype)
    x_stft = paddle.signal.stft(
        x,
        fft_size,
        hop_length,
        win_length,
        window=window,
        center=center,
        pad_mode=pad_mode)

    real = x_stft.real()
    imag = x_stft.imag()

    return paddle.sqrt(paddle.clip(real**2 + imag**2, min=1e-7)).transpose(
        [0, 2, 1])


class SpectralConvergenceLoss(nn.Layer):
    """Spectral convergence loss module."""

    def __init__(self):
        """Initilize spectral convergence loss module."""
        super().__init__()

    def forward(self, x_mag, y_mag):
        """Calculate forward propagation.
        Args: 
            x_mag (Tensor):
                 Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
            y_mag (Tensor): 
                Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
        Returns:
            Tensor: Spectral convergence loss value.
        """
        return paddle.norm(
            y_mag - x_mag, p="fro") / paddle.clip(
                paddle.norm(y_mag, p="fro"), min=1e-10)


class LogSTFTMagnitudeLoss(nn.Layer):
    """Log STFT magnitude loss module."""

    def __init__(self, epsilon=1e-7):
        """Initilize los STFT magnitude loss module."""
        super().__init__()
        self.epsilon = epsilon

    def forward(self, x_mag, y_mag):
        """Calculate forward propagation.
        Args:
            x_mag (Tensor): 
                Magnitude spectrogram of predicted signal (B, #frames, #freq_bins).
            y_mag (Tensor):
                Magnitude spectrogram of groundtruth signal (B, #frames, #freq_bins).
        Returns:
            Tensor: Log STFT magnitude loss value.
        """
        return F.l1_loss(
            paddle.log(paddle.clip(y_mag, min=self.epsilon)),
            paddle.log(paddle.clip(x_mag, min=self.epsilon)))


class STFTLoss(nn.Layer):
    """STFT loss module."""

    def __init__(self,
                 fft_size=1024,
                 shift_size=120,
                 win_length=600,
                 window="hann"):
        """Initialize STFT loss module."""
        super().__init__()
        self.fft_size = fft_size
        self.shift_size = shift_size
        self.win_length = win_length
        self.window = window
        self.spectral_convergence_loss = SpectralConvergenceLoss()
        self.log_stft_magnitude_loss = LogSTFTMagnitudeLoss()

    def forward(self, x, y):
        """Calculate forward propagation.
        Args:
            x (Tensor): 
                Predicted signal (B, T).
            y (Tensor): 
                Groundtruth signal (B, T).
        Returns:
            Tensor: 
                Spectral convergence loss value.
            Tensor: 
                Log STFT magnitude loss value.
        """
        x_mag = stft(x, self.fft_size, self.shift_size, self.win_length,
                     self.window)
        y_mag = stft(y, self.fft_size, self.shift_size, self.win_length,
                     self.window)
        sc_loss = self.spectral_convergence_loss(x_mag, y_mag)
        mag_loss = self.log_stft_magnitude_loss(x_mag, y_mag)

        return sc_loss, mag_loss


class MultiResolutionSTFTLoss(nn.Layer):
    """Multi resolution STFT loss module."""

    def __init__(
            self,
            fft_sizes=[1024, 2048, 512],
            hop_sizes=[120, 240, 50],
            win_lengths=[600, 1200, 240],
            window="hann", ):
        """Initialize Multi resolution STFT loss module.
        Args:
            fft_sizes (list): 
                List of FFT sizes.
            hop_sizes (list): 
                List of hop sizes.
            win_lengths (list): 
                List of window lengths.
            window (str): 
                Window function type.
        """
        super().__init__()
        assert len(fft_sizes) == len(hop_sizes) == len(win_lengths)
        self.stft_losses = nn.LayerList()
        for fs, ss, wl in zip(fft_sizes, hop_sizes, win_lengths):
            self.stft_losses.append(STFTLoss(fs, ss, wl, window))

    def forward(self, x, y):
        """Calculate forward propagation.
        
        Args:
            x (Tensor): 
                Predicted signal (B, T) or (B, #subband, T).
            y (Tensor): 
                Groundtruth signal (B, T) or (B, #subband, T).
        Returns:
            Tensor: 
                Multi resolution spectral convergence loss value.
            Tensor: 
                Multi resolution log STFT magnitude loss value.
        """
        if len(x.shape) == 3:
            # (B, C, T) -> (B x C, T)
            x = x.reshape([-1, x.shape[2]])
            # (B, C, T) -> (B x C, T)
            y = y.reshape([-1, y.shape[2]])
        sc_loss = 0.0
        mag_loss = 0.0
        for f in self.stft_losses:
            sc_l, mag_l = f(x, y)
            sc_loss += sc_l
            mag_loss += mag_l
        sc_loss /= len(self.stft_losses)
        mag_loss /= len(self.stft_losses)

        return sc_loss, mag_loss


class GeneratorAdversarialLoss(nn.Layer):
    """Generator adversarial loss module."""

    def __init__(
            self,
            average_by_discriminators=True,
            loss_type="mse", ):
        """Initialize GeneratorAversarialLoss module."""
        super().__init__()
        self.average_by_discriminators = average_by_discriminators
        assert loss_type in ["mse", "hinge"], f"{loss_type} is not supported."
        if loss_type == "mse":
            self.criterion = self._mse_loss
        else:
            self.criterion = self._hinge_loss

    def forward(self, outputs):
        """Calcualate generator adversarial loss.
        Args:
            outputs (Tensor or List): 
                Discriminator outputs or list of discriminator outputs.
        Returns:
            Tensor: 
                Generator adversarial loss value.
        """
        if isinstance(outputs, (tuple, list)):
            adv_loss = 0.0
            for i, outputs_ in enumerate(outputs):
                if isinstance(outputs_, (tuple, list)):
                    # case including feature maps
                    outputs_ = outputs_[-1]
                adv_loss += self.criterion(outputs_)
            if self.average_by_discriminators:
                adv_loss /= i + 1
        else:
            adv_loss = self.criterion(outputs)

        return adv_loss

    def _mse_loss(self, x):
        return F.mse_loss(x, paddle.ones_like(x))

    def _hinge_loss(self, x):
        return -x.mean()


class DiscriminatorAdversarialLoss(nn.Layer):
    """Discriminator adversarial loss module."""

    def __init__(
            self,
            average_by_discriminators=True,
            loss_type="mse", ):
        """Initialize DiscriminatorAversarialLoss module."""
        super().__init__()
        self.average_by_discriminators = average_by_discriminators
        assert loss_type in ["mse"], f"{loss_type} is not supported."
        if loss_type == "mse":
            self.fake_criterion = self._mse_fake_loss
            self.real_criterion = self._mse_real_loss

    def forward(self, outputs_hat, outputs):
        """Calcualate discriminator adversarial loss.

        Args:
            outputs_hat (Tensor or list): 
                Discriminator outputs or list of discriminator outputs calculated from generator outputs.
            outputs (Tensor or list): 
                Discriminator outputs or list of discriminator outputs calculated from groundtruth.
        Returns:
            Tensor: 
                Discriminator real loss value.
            Tensor: 
                Discriminator fake loss value.
        """
        if isinstance(outputs, (tuple, list)):
            real_loss = 0.0
            fake_loss = 0.0
            for i, (outputs_hat_,
                    outputs_) in enumerate(zip(outputs_hat, outputs)):
                if isinstance(outputs_hat_, (tuple, list)):
                    # case including feature maps
                    outputs_hat_ = outputs_hat_[-1]
                    outputs_ = outputs_[-1]
                real_loss += self.real_criterion(outputs_)
                fake_loss += self.fake_criterion(outputs_hat_)
            if self.average_by_discriminators:
                fake_loss /= i + 1
                real_loss /= i + 1
        else:
            real_loss = self.real_criterion(outputs)
            fake_loss = self.fake_criterion(outputs_hat)

        return real_loss, fake_loss

    def _mse_real_loss(self, x):
        return F.mse_loss(x, paddle.ones_like(x))

    def _mse_fake_loss(self, x):
        return F.mse_loss(x, paddle.zeros_like(x))


# Losses for SpeedySpeech
# Structural Similarity Index Measure (SSIM)
def gaussian(window_size, sigma):
    gauss = paddle.to_tensor([
        math.exp(-(x - window_size // 2)**2 / float(2 * sigma**2))
        for x in range(window_size)
    ])
    return gauss / gauss.sum()


def create_window(window_size, channel):
    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
    _2D_window = paddle.matmul(_1D_window, paddle.transpose(
        _1D_window, [1, 0])).unsqueeze([0, 1])
    window = paddle.expand(_2D_window, [channel, 1, window_size, window_size])
    return window


def _ssim(img1, img2, window, window_size, channel, size_average=True):
    mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
    mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)

    mu1_sq = mu1.pow(2)
    mu2_sq = mu2.pow(2)
    mu1_mu2 = mu1 * mu2

    sigma1_sq = F.conv2d(
        img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
    sigma2_sq = F.conv2d(
        img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
    sigma12 = F.conv2d(
        img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2

    C1 = 0.01**2
    C2 = 0.03**2

    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) \
             / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))

    if size_average:
        return ssim_map.mean()
    else:
        return ssim_map.mean(1).mean(1).mean(1)


def ssim(img1, img2, window_size=11, size_average=True):
    (_, channel, _, _) = img1.shape
    window = create_window(window_size, channel)
    return _ssim(img1, img2, window, window_size, channel, size_average)


def weighted_mean(input, weight):
    """Weighted mean. It can also be used as masked mean.

    Args:
        input(Tensor): The input tensor.
        weight(Tensor): The weight tensor with broadcastable shape with the input.

    Returns:
        Tensor: Weighted mean tensor with the same dtype as input. shape=(1,)
            
    """
    weight = paddle.cast(weight, input.dtype)
    # paddle.Tensor.size is different with torch.size() and has been overrided in s2t.__init__
    broadcast_ratio = input.numel() / weight.numel()
    return paddle.sum(input * weight) / (paddle.sum(weight) * broadcast_ratio)


def masked_l1_loss(prediction, target, mask):
    """Compute maksed L1 loss.

    Args:
        prediction(Tensor): 
            The prediction.
        target(Tensor): 
            The target. The shape should be broadcastable to ``prediction``.
        mask(Tensor): 
            The mask. The shape should be broadcatable to the broadcasted shape of
            ``prediction`` and ``target``.

    Returns:
        Tensor: The masked L1 loss. shape=(1,)
        
    """
    abs_error = F.l1_loss(prediction, target, reduction='none')
    loss = weighted_mean(abs_error, mask)
    return loss


class MelSpectrogram(nn.Layer):
    """Calculate Mel-spectrogram."""

    def __init__(
            self,
            fs=22050,
            fft_size=1024,
            hop_size=256,
            win_length=None,
            window="hann",
            num_mels=80,
            fmin=80,
            fmax=7600,
            center=True,
            normalized=False,
            onesided=True,
            eps=1e-10,
            log_base=10.0, ):
        """Initialize MelSpectrogram module."""
        super().__init__()
        self.fft_size = fft_size
        if win_length is None:
            self.win_length = fft_size
        else:
            self.win_length = win_length
        self.hop_size = hop_size
        self.center = center
        self.normalized = normalized
        self.onesided = onesided

        if window is not None and not hasattr(signal.windows, f"{window}"):
            raise ValueError(f"{window} window is not implemented")
        self.window = window
        self.eps = eps

        fmin = 0 if fmin is None else fmin
        fmax = fs / 2 if fmax is None else fmax
        melmat = librosa.filters.mel(
            sr=fs,
            n_fft=fft_size,
            n_mels=num_mels,
            fmin=fmin,
            fmax=fmax, )

        self.melmat = paddle.to_tensor(melmat.T)
        self.stft_params = {
            "n_fft": self.fft_size,
            "win_length": self.win_length,
            "hop_length": self.hop_size,
            "center": self.center,
            "normalized": self.normalized,
            "onesided": self.onesided,
        }

        self.log_base = log_base
        if self.log_base is None:
            self.log = paddle.log
        elif self.log_base == 2.0:
            self.log = paddle.log2
        elif self.log_base == 10.0:
            self.log = paddle.log10
        else:
            raise ValueError(f"log_base: {log_base} is not supported.")

    def forward(self, x):
        """Calculate Mel-spectrogram.
        Args:
        
            x (Tensor): Input waveform tensor (B, T) or (B, 1, T).
        Returns:
            Tensor: Mel-spectrogram (B, #mels, #frames).
        """
        if len(x.shape) == 3:
            # (B, C, T) -> (B*C, T)
            x = x.reshape([-1, paddle.shape(x)[2]])

        if self.window is not None:
            # calculate window
            window = signal.get_window(
                self.window, self.win_length, fftbins=True)
            window = paddle.to_tensor(window, dtype=x.dtype)
        else:
            window = None

        x_stft = paddle.signal.stft(x, window=window, **self.stft_params)
        real = x_stft.real()
        imag = x_stft.imag()
        # (B, #freqs, #frames) -> (B, $frames, #freqs)
        real = real.transpose([0, 2, 1])
        imag = imag.transpose([0, 2, 1])
        x_power = real**2 + imag**2
        x_amp = paddle.sqrt(paddle.clip(x_power, min=self.eps))
        x_mel = paddle.matmul(x_amp, self.melmat)
        x_mel = paddle.clip(x_mel, min=self.eps)

        return self.log(x_mel).transpose([0, 2, 1])


class MelSpectrogramLoss(nn.Layer):
    """Mel-spectrogram loss."""

    def __init__(
            self,
            fs=22050,
            fft_size=1024,
            hop_size=256,
            win_length=None,
            window="hann",
            num_mels=80,
            fmin=80,
            fmax=7600,
            center=True,
            normalized=False,
            onesided=True,
            eps=1e-10,
            log_base=10.0, ):
        """Initialize Mel-spectrogram loss."""
        super().__init__()
        self.mel_spectrogram = MelSpectrogram(
            fs=fs,
            fft_size=fft_size,
            hop_size=hop_size,
            win_length=win_length,
            window=window,
            num_mels=num_mels,
            fmin=fmin,
            fmax=fmax,
            center=center,
            normalized=normalized,
            onesided=onesided,
            eps=eps,
            log_base=log_base, )

    def forward(self, y_hat, y):
        """Calculate Mel-spectrogram loss.
        Args:
            y_hat(Tensor): 
                Generated single tensor (B, 1, T).
            y(Tensor): 
                Groundtruth single tensor (B, 1, T).

        Returns:
            Tensor: Mel-spectrogram loss value.
        """
        mel_hat = self.mel_spectrogram(y_hat)
        mel = self.mel_spectrogram(y)
        mel_loss = F.l1_loss(mel_hat, mel)

        return mel_loss


class FeatureMatchLoss(nn.Layer):
    """Feature matching loss module."""

    def __init__(
            self,
            average_by_layers=True,
            average_by_discriminators=True,
            include_final_outputs=False, ):
        """Initialize FeatureMatchLoss module."""
        super().__init__()
        self.average_by_layers = average_by_layers
        self.average_by_discriminators = average_by_discriminators
        self.include_final_outputs = include_final_outputs

    def forward(self, feats_hat, feats):
        """Calcualate feature matching loss.

        Args:
            feats_hat(list): 
                List of list of discriminator outputs
                calcuated from generater outputs.
            feats(list): 
                List of list of discriminator outputs

        Returns:
            Tensor: Feature matching loss value.

        """
        feat_match_loss = 0.0
        for i, (feats_hat_, feats_) in enumerate(zip(feats_hat, feats)):
            feat_match_loss_ = 0.0
            if not self.include_final_outputs:
                feats_hat_ = feats_hat_[:-1]
                feats_ = feats_[:-1]
            for j, (feat_hat_, feat_) in enumerate(zip(feats_hat_, feats_)):
                feat_match_loss_ += F.l1_loss(feat_hat_, feat_.detach())
            if self.average_by_layers:
                feat_match_loss_ /= j + 1
            feat_match_loss += feat_match_loss_
        if self.average_by_discriminators:
            feat_match_loss /= i + 1

        return feat_match_loss


# loss for VITS
class KLDivergenceLoss(nn.Layer):
    """KL divergence loss."""

    def forward(
            self,
            z_p: paddle.Tensor,
            logs_q: paddle.Tensor,
            m_p: paddle.Tensor,
            logs_p: paddle.Tensor,
            z_mask: paddle.Tensor, ) -> paddle.Tensor:
        """Calculate KL divergence loss.

        Args:
            z_p (Tensor): 
                Flow hidden representation (B, H, T_feats).
            logs_q (Tensor): 
                Posterior encoder projected scale (B, H, T_feats).
            m_p (Tensor): 
                Expanded text encoder projected mean (B, H, T_feats).
            logs_p (Tensor): 
                Expanded text encoder projected scale (B, H, T_feats).
            z_mask (Tensor): 
                Mask tensor (B, 1, T_feats).

        Returns:
            Tensor: KL divergence loss.

        """
        z_p = paddle.cast(z_p, 'float32')
        logs_q = paddle.cast(logs_q, 'float32')
        m_p = paddle.cast(m_p, 'float32')
        logs_p = paddle.cast(logs_p, 'float32')
        z_mask = paddle.cast(z_mask, 'float32')
        kl = logs_p - logs_q - 0.5
        kl += 0.5 * ((z_p - m_p)**2) * paddle.exp(-2.0 * logs_p)
        kl = paddle.sum(kl * z_mask)
        loss = kl / paddle.sum(z_mask)

        return loss


# loss for ERNIE SAT
class MLMLoss(nn.Layer):
    def __init__(self,
                 odim: int,
                 vocab_size: int=0,
                 lsm_weight: float=0.1,
                 ignore_id: int=-1,
                 text_masking: bool=False):
        super().__init__()
        if text_masking:
            self.text_mlm_loss = nn.CrossEntropyLoss(ignore_index=ignore_id)
        if lsm_weight > 50:
            self.l1_loss_func = nn.MSELoss()
        else:
            self.l1_loss_func = nn.L1Loss(reduction='none')
        self.text_masking = text_masking
        self.odim = odim
        self.vocab_size = vocab_size

    def forward(
            self,
            speech: paddle.Tensor,
            before_outs: paddle.Tensor,
            after_outs: paddle.Tensor,
            masked_pos: paddle.Tensor,
            # for text_loss when text_masking == True
            text: paddle.Tensor=None,
            text_outs: paddle.Tensor=None,
            text_masked_pos: paddle.Tensor=None):

        xs_pad = speech
        mlm_loss_pos = masked_pos > 0
        loss = paddle.sum(
            self.l1_loss_func(
                paddle.reshape(before_outs, (-1, self.odim)),
                paddle.reshape(xs_pad, (-1, self.odim))),
            axis=-1)
        if after_outs is not None:
            loss += paddle.sum(
                self.l1_loss_func(
                    paddle.reshape(after_outs, (-1, self.odim)),
                    paddle.reshape(xs_pad, (-1, self.odim))),
                axis=-1)
        mlm_loss_pos = (mlm_loss_pos).astype(loss.dtype)
        mlm_loss = paddle.sum((loss * paddle.reshape(
            mlm_loss_pos,
            [-1]).astype(loss.dtype))) / paddle.sum((mlm_loss_pos) + 1e-10)

        text_mlm_loss = None

        if self.text_masking:
            assert text is not None
            assert text_outs is not None
            assert text_masked_pos is not None
            text_outs = paddle.reshape(text_outs, [-1, self.vocab_size])
            text = paddle.reshape(text, [-1])
            text_mlm_loss = self.text_mlm_loss(text_outs, text)
            text_masked_pos_reshape = paddle.reshape(text_masked_pos, [-1])
            text_mlm_loss = paddle.sum(
                text_mlm_loss *
                text_masked_pos_reshape) / paddle.sum((text_masked_pos) + 1e-10)

        return mlm_loss, text_mlm_loss


class VarianceLoss(nn.Layer):
    @typechecked
    def __init__(self, use_masking: bool=True,
                 use_weighted_masking: bool=False):
        """Initialize JETS variance loss module.
        Args:
            use_masking (bool): Whether to apply masking for padded part in loss
                calculation.
            use_weighted_masking (bool): Whether to weighted masking in loss
                calculation.

        """
        super().__init__()

        assert (use_masking != use_weighted_masking) or not use_masking
        self.use_masking = use_masking
        self.use_weighted_masking = use_weighted_masking

        # define criterions
        reduction = "none" if self.use_weighted_masking else "mean"
        self.mse_criterion = nn.MSELoss(reduction=reduction)
        self.duration_criterion = DurationPredictorLoss(reduction=reduction)

    def forward(
            self,
            d_outs: paddle.Tensor,
            ds: paddle.Tensor,
            p_outs: paddle.Tensor,
            ps: paddle.Tensor,
            e_outs: paddle.Tensor,
            es: paddle.Tensor,
            ilens: paddle.Tensor,
    ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor, paddle.Tensor]:
        """Calculate forward propagation.

        Args:
            d_outs (LongTensor): Batch of outputs of duration predictor (B, T_text).
            ds (LongTensor): Batch of durations (B, T_text).
            p_outs (Tensor): Batch of outputs of pitch predictor (B, T_text, 1).
            ps (Tensor): Batch of target token-averaged pitch (B, T_text, 1).
            e_outs (Tensor): Batch of outputs of energy predictor (B, T_text, 1).
            es (Tensor): Batch of target token-averaged energy (B, T_text, 1).
            ilens (LongTensor): Batch of the lengths of each input (B,).

        Returns:
            Tensor: Duration predictor loss value.
            Tensor: Pitch predictor loss value.
            Tensor: Energy predictor loss value.

        """
        # apply mask to remove padded part
        if self.use_masking:
            duration_masks = paddle.to_tensor(
                make_non_pad_mask(ilens), place=ds.place)
            d_outs = d_outs.masked_select(duration_masks)
            ds = ds.masked_select(duration_masks)
            pitch_masks = paddle.to_tensor(
                make_non_pad_mask(ilens).unsqueeze(-1), place=ds.place)
            p_outs = p_outs.masked_select(pitch_masks)
            e_outs = e_outs.masked_select(pitch_masks)
            ps = ps.masked_select(pitch_masks)
            es = es.masked_select(pitch_masks)

        # calculate loss
        duration_loss = self.duration_criterion(d_outs, ds)
        pitch_loss = self.mse_criterion(p_outs, ps)
        energy_loss = self.mse_criterion(e_outs, es)

        # make weighted mask and apply it
        if self.use_weighted_masking:
            duration_masks = paddle.to_tensor(
                make_non_pad_mask(ilens), place=ds.place)
            duration_weights = (duration_masks.float() /
                                duration_masks.sum(dim=1, keepdim=True).float())
            duration_weights /= ds.size(0)

            # apply weight
            duration_loss = (duration_loss.mul(duration_weights).masked_select(
                duration_masks).sum())
            pitch_masks = duration_masks.unsqueeze(-1)
            pitch_weights = duration_weights.unsqueeze(-1)
            pitch_loss = pitch_loss.mul(pitch_weights).masked_select(
                pitch_masks).sum()
            energy_loss = (
                energy_loss.mul(pitch_weights).masked_select(pitch_masks).sum())

        return duration_loss, pitch_loss, energy_loss


class ForwardSumLoss(nn.Layer):
    """
    https://openreview.net/forum?id=0NQwnnwAORi
    """

    def __init__(self, cache_prior: bool=True):
        """
        Args:
            cache_prior (bool): Whether to cache beta-binomial prior
        """
        super().__init__()
        self.cache_prior = cache_prior
        self._cache = {}

    def forward(
            self,
            log_p_attn: paddle.Tensor,
            ilens: paddle.Tensor,
            olens: paddle.Tensor,
            blank_prob: float=np.e**-1, ) -> paddle.Tensor:
        """
        Args:
            log_p_attn (Tensor): Batch of log probability of attention matrix (B, T_feats, T_text).
            ilens (Tensor): Batch of the lengths of each input (B,).
            olens (Tensor): Batch of the lengths of each target (B,).
            blank_prob (float): Blank symbol probability

        Returns:
            Tensor: forwardsum loss value.
        """

        B = log_p_attn.shape[0]
        # add beta-binomial prior
        bb_prior = self._generate_prior(ilens, olens)
        bb_prior = paddle.to_tensor(
            bb_prior, dtype=log_p_attn.dtype, place=log_p_attn.place)
        log_p_attn = log_p_attn + bb_prior

        # a row must be added to the attention matrix to account for blank token of CTC loss
        # (B,T_feats,T_text+1)
        log_p_attn_pd = F.pad(
            log_p_attn, (0, 0, 0, 0, 1, 0), value=np.log(blank_prob))
        loss = 0
        for bidx in range(B):
            # construct target sequnece.
            # Every text token is mapped to a unique sequnece number.
            target_seq = paddle.arange(
                1, ilens[bidx] + 1, dtype="int32").unsqueeze(0)
            cur_log_p_attn_pd = log_p_attn_pd[bidx, :olens[bidx], :ilens[
                bidx] + 1].unsqueeze(1)  # (T_feats,1,T_text+1)
            # The input of ctc_loss API need to be fixed
            loss += F.ctc_loss(
                log_probs=cur_log_p_attn_pd,
                labels=target_seq,
                input_lengths=olens[bidx:bidx + 1],
                label_lengths=ilens[bidx:bidx + 1])
        loss = loss / B

        return loss

    def _generate_prior(self, text_lengths, feats_lengths,
                        w=1) -> paddle.Tensor:
        """Generate alignment prior formulated as beta-binomial distribution

        Args:
            text_lengths (Tensor): Batch of the lengths of each input (B,).
            feats_lengths (Tensor): Batch of the lengths of each target (B,).
            w (float): Scaling factor; lower -> wider the width

        Returns:
            Tensor: Batched 2d static prior matrix (B, T_feats, T_text)   
        """
        B = len(text_lengths)
        T_text = text_lengths.max()
        T_feats = feats_lengths.max()

        bb_prior = paddle.full((B, T_feats, T_text), fill_value=-np.inf)
        for bidx in range(B):
            T = feats_lengths[bidx].item()
            N = text_lengths[bidx].item()

            key = str(T) + ',' + str(N)
            if self.cache_prior and key in self._cache:
                prob = self._cache[key]
            else:
                alpha = w * np.arange(1, T + 1, dtype=float)  # (T,)
                beta = w * np.array([T - t + 1 for t in alpha])
                k = np.arange(N)
                batched_k = k[..., None]  # (N,1)
                prob = betabinom.pmf(batched_k, N, alpha, beta)  # (N,T)

            # store cache
            if self.cache_prior and key not in self._cache:
                self._cache[key] = prob

            prob = paddle.to_tensor(
                prob, place=text_lengths.place, dtype="float32").transpose(
                    (1, 0))  # -> (T,N)
            bb_prior[bidx, :T, :N] = prob

        return bb_prior


class MultiScaleSTFTLoss(nn.Layer):
    """Computes the multi-scale STFT loss from [1].

    References
    ----------

    1.  Engel, Jesse, Chenjie Gu, and Adam Roberts.
        "DDSP: Differentiable Digital Signal Processing."
        International Conference on Learning Representations. 2019.

    Implementation copied from: https://github.com/descriptinc/audiotools/blob/master/audiotools/metrics/spectral.py
    """

    def __init__(
            self,
            window_lengths: List[int]=[2048, 512],
            loss_fn: Callable=nn.L1Loss(),
            clamp_eps: float=1e-5,
            mag_weight: float=1.0,
            log_weight: float=1.0,
            pow: float=2.0,
            weight: float=1.0,
            match_stride: bool=False,
            window_type: Optional[str]=None, ):
        """
        Args:
            window_lengths : List[int], optional
                Length of each window of each STFT, by default [2048, 512]
            loss_fn : typing.Callable, optional
                How to compare each loss, by default nn.L1Loss()
            clamp_eps : float, optional
                Clamp on the log magnitude, below, by default 1e-5
            mag_weight : float, optional
                Weight of raw magnitude portion of loss, by default 1.0
            log_weight : float, optional
                Weight of log magnitude portion of loss, by default 1.0
            pow : float, optional
                Power to raise magnitude to before taking log, by default 2.0
            weight : float, optional
                Weight of this loss, by default 1.0
            match_stride : bool, optional
                Whether to match the stride of convolutional layers, by default False
            window_type : str, optional
                Type of window to use, by default None.
        """
        super().__init__()

        self.stft_params = [
            STFTParams(
                window_length=w,
                hop_length=w // 4,
                match_stride=match_stride,
                window_type=window_type, ) for w in window_lengths
        ]
        self.loss_fn = loss_fn
        self.log_weight = log_weight
        self.mag_weight = mag_weight
        self.clamp_eps = clamp_eps
        self.weight = weight
        self.pow = pow

    def forward(self, x: AudioSignal, y: AudioSignal):
        """Computes multi-scale STFT between an estimate and a reference
        signal.

        Args:
            x : AudioSignal
                Estimate signal
            y : AudioSignal
                Reference signal

        Returns:
            paddle.Tensor
                Multi-scale STFT loss.
        
        Example:
            >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal
            >>> import paddle

            >>> x = AudioSignal("https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav", 2_05)
            >>> y = x * 0.01
            >>> loss = MultiScaleSTFTLoss()
            >>> loss(x, y).numpy()
            7.562150
        """
        for s in self.stft_params:
            x.stft(s.window_length, s.hop_length, s.window_type)
            y.stft(s.window_length, s.hop_length, s.window_type)
            loss += self.log_weight * self.loss_fn(
                x.magnitude.clip(self.clamp_eps).pow(self.pow).log10(),
                y.magnitude.clip(self.clamp_eps).pow(self.pow).log10(), )
            loss += self.mag_weight * self.loss_fn(x.magnitude, y.magnitude)
        return loss


class GANLoss(nn.Layer):
    """
    Computes a discriminator loss, given a discriminator on
    generated waveforms/spectrograms compared to ground truth
    waveforms/spectrograms. Computes the loss for both the
    discriminator and the generator in separate functions.

    Example:
    >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal
    >>> import paddle

    >>> x = AudioSignal("https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav", 2_05)
    >>> y = x * 0.01
    >>> class My_discriminator0:
    >>>     def __call__(self, x):
    >>>         return x.sum()
    >>> loss = GANLoss(My_discriminator0())
    >>> [loss(x, y)[0].numpy(), loss(x, y)[1].numpy()]
    [-0.102722, -0.001027]

    >>> class My_discriminator1:
    >>>     def __call__(self, x):
    >>>         return x.sum()
    >>> loss = GANLoss(My_discriminator1())
    >>> [loss.generator_loss(x, y)[0].numpy(), loss.generator_loss(x, y)[1].numpy()]
    [1.00019, 0]

    >>> loss.discriminator_loss(x, y)
    1.000200
    """

    def __init__(self, discriminator):
        """
        Args:
            discriminator : paddle.nn.layer
                Discriminator model
        """
        super().__init__()
        self.discriminator = discriminator

    def forward(self,
                fake: Union[AudioSignal, paddle.Tensor],
                real: Union[AudioSignal, paddle.Tensor]):
        if isinstance(fake, AudioSignal):
            d_fake = self.discriminator(fake.audio_data)
        else:
            d_fake = self.discriminator(fake)

        if isinstance(real, AudioSignal):
            d_real = self.discriminator(real.audio_data)
        else:
            d_real = self.discriminator(real)
        return d_fake, d_real

    def discriminator_loss(self, fake, real):
        d_fake, d_real = self.forward(fake, real)

        loss_d = 0
        for x_fake, x_real in zip(d_fake, d_real):
            loss_d += paddle.mean(x_fake[-1]**2)
            loss_d += paddle.mean((1 - x_real[-1])**2)
        return loss_d

    def generator_loss(self, fake, real):
        d_fake, d_real = self.forward(fake, real)

        loss_g = 0
        for x_fake in d_fake:
            loss_g += paddle.mean((1 - x_fake[-1])**2)

        loss_feature = 0

        for i in range(len(d_fake)):
            for j in range(len(d_fake[i]) - 1):
                loss_feature += F.l1_loss(d_fake[i][j], d_real[i][j]())
        return loss_g, loss_feature


class SISDRLoss(nn.Layer):
    """
    Computes the Scale-Invariant Source-to-Distortion Ratio between a batch
    of estimated and reference audio signals or aligned features.

    Implementation copied from: https://github.com/descriptinc/audiotools/blob/master/audiotools/metrics/distance.py

    Example:
    >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal
    >>> import paddle

    >>> x = AudioSignal("https://paddlespeech.cdn.bcebos.com/PaddleAudio/en.wav", 2_05)
    >>> y = x * 0.01
    >>> sisdr = SISDRLoss()
    >>> sisdr(x, y).numpy()
    -145.377640
    """

    def __init__(
            self,
            scaling: bool=True,
            reduction: str="mean",
            zero_mean: bool=True,
            clip_min: Optional[int]=None,
            weight: float=1.0, ):
        """
        Args:
            scaling : bool, optional
                Whether to use scale-invariant (True) or
                signal-to-noise ratio (False), by default True
            reduction : str, optional
                How to reduce across the batch (either 'mean',
                'sum', or none).], by default ' mean'
            zero_mean : bool, optional
                Zero mean the references and estimates before
                computing the loss, by default True
            clip_min : int, optional
                The minimum possible loss value. Helps network
                to not focus on making already good examples better, by default None
            weight : float, optional
                Weight of this loss, defaults to 1.0.
        """
        self.scaling = scaling
        self.reduction = reduction
        self.zero_mean = zero_mean
        self.clip_min = clip_min
        self.weight = weight
        super().__init__()

    def forward(self,
                x: Union[AudioSignal, paddle.Tensor],
                y: Union[AudioSignal, paddle.Tensor]):
        eps = 1e-8
        # B, C, T
        if isinstance(x, AudioSignal):
            references = x.audio_data
            estimates = y.audio_data
        else:
            references = x
            estimates = y

        nb = references.shape[0]
        references = references.reshape([nb, 1, -1]).transpose([0, 2, 1])
        estimates = estimates.reshape([nb, 1, -1]).transpose([0, 2, 1])

        # samples now on axis 1
        if self.zero_mean:
            mean_reference = references.mean(axis=1, keepdim=True)
            mean_estimate = estimates.mean(axis=1, keepdim=True)
        else:
            mean_reference = 0
            mean_estimate = 0

        _references = references - mean_reference
        _estimates = estimates - mean_estimate

        references_projection = (_references**2).sum(axis=-2) + eps
        references_on_estimates = (_estimates * _references).sum(axis=-2) + eps

        scale = (
            (references_on_estimates / references_projection).unsqueeze(axis=1)
            if self.scaling else 1)

        e_true = scale * _references
        e_res = _estimates - e_true

        signal = (e_true**2).sum(axis=1)
        noise = (e_res**2).sum(axis=1)
        sdr = -10 * paddle.log10(signal / noise + eps)

        if self.clip_min != None:
            sdr = paddle.clip(sdr, min=self.clip_min)

        if self.reduction == "mean":
            sdr = sdr.mean()
        elif self.reduction == "sum":
            sdr = sdr.sum()
        return sdr


================================================
FILE: paddlespeech/t2s/modules/masked_fill.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Union

import paddle


def is_broadcastable(shp1, shp2):
    for a, b in zip(shp1[::-1], shp2[::-1]):
        if a == 1 or b == 1 or a == b:
            pass
        else:
            return False
    return True


# assume that len(shp1) == len(shp2)
def broadcast_shape(shp1, shp2):
    result = []
    for a, b in zip(shp1[::-1], shp2[::-1]):
        is_a_int = isinstance(a, int)
        is_b_int = isinstance(b, int)

        if is_a_int and is_b_int:
            result.append(max(a, b))

        else:
            dtype = None
            if hasattr(a, 'dtype'):
                dtype = a.dtype
            if hasattr(b, 'dtype'):
                dtype = b.dtype

            if (is_a_int):
                a = paddle.full((), a, dtype=dtype)

            if (is_b_int):
                b = paddle.full((), b, dtype=dtype)

            result.append(paddle.maximum(a, b))

    return result[::-1]


def masked_fill(xs: paddle.Tensor,
                mask: paddle.Tensor,
                value: Union[float, int]):
    # comment following line for converting dygraph to static graph. 
    # assert is_broadcastable(xs.shape, mask.shape) is True
    bshape = broadcast_shape(xs.shape, mask.shape)
    mask.stop_gradient = True
    mask = mask.broadcast_to(bshape)
    trues = paddle.ones_like(xs) * value
    mask = mask.cast(dtype=paddle.bool)
    xs = paddle.where(mask, trues, xs)
    return xs


================================================
FILE: paddlespeech/t2s/modules/nets_utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import math
from typing import Tuple

import numpy as np
import paddle
from paddle import nn
from typeguard import typechecked

from paddlespeech.utils.initialize import _calculate_fan_in_and_fan_out
from paddlespeech.utils.initialize import kaiming_uniform_
from paddlespeech.utils.initialize import normal_
from paddlespeech.utils.initialize import ones_
from paddlespeech.utils.initialize import uniform_
from paddlespeech.utils.initialize import zeros_


# default init method of torch
# copy from https://github.com/PaddlePaddle/PaddleSpeech/blob/9cf8c1985a98bb380c183116123672976bdfe5c9/paddlespeech/t2s/models/vits/vits.py#L506
def _reset_parameters(module):
    if isinstance(module, (nn.Conv1D, nn.Conv1DTranspose, nn.Conv2D,
                           nn.Conv2DTranspose)):
        kaiming_uniform_(module.weight, a=math.sqrt(5))
        if module.bias is not None:
            fan_in, _ = _calculate_fan_in_and_fan_out(module.weight)
            if fan_in != 0:
                bound = 1 / math.sqrt(fan_in)
                uniform_(module.bias, -bound, bound)

    if isinstance(module,
                  (nn.BatchNorm1D, nn.BatchNorm2D, nn.GroupNorm, nn.LayerNorm)):
        ones_(module.weight)
        zeros_(module.bias)

    if isinstance(module, nn.Linear):
        kaiming_uniform_(module.weight, a=math.sqrt(5))
        if module.bias is not None:
            fan_in, _ = _calculate_fan_in_and_fan_out(module.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            uniform_(module.bias, -bound, bound)

    if isinstance(module, nn.Embedding):
        normal_(module.weight)
        if module._padding_idx is not None:
            with paddle.no_grad():
                module.weight[module._padding_idx] = 0


def pad_list(xs, pad_value):
    """Perform padding for the list of tensors.

    Args:
        xs (List[Tensor]): 
            List of Tensors [(T_1, `*`), (T_2, `*`), ..., (T_B, `*`)].
        pad_value (float): 
            Value for padding.

    Returns:
        Tensor: Padded tensor (B, Tmax, `*`).

    Examples:
        >>> x = [paddle.ones([4]), paddle.ones([2]), paddle.ones([1])]
        >>> x
        [tensor([1., 1., 1., 1.]), tensor([1., 1.]), tensor([1.])]
        >>> pad_list(x, 0)
        tensor([[1., 1., 1., 1.],
                [1., 1., 0., 0.],
                [1., 0., 0., 0.]])
    """
    n_batch = len(xs)
    max_len = max(x.shape[0] for x in xs)
    pad = paddle.full(
        [n_batch, max_len, *xs[0].shape[1:]], pad_value, dtype=xs[0].dtype)

    for i in range(n_batch):
        pad[i, :xs[i].shape[0]] = xs[i]

    return pad


def make_pad_mask(lengths, xs=None, length_dim=-1):
    """Make mask tensor containing indices of padded part.

    Args:
        lengths (Tensor(int64)): 
            Batch of lengths (B,).
        xs (Tensor, optional): 
            The reference tensor.
            If set, masks will be the same shape as this tensor.
        length_dim (int, optional): 
            Dimension indicator of the above tensor.
            See the example.

    Returns:
        Tensor(bool): Mask tensor containing indices of padded part bool.

    Examples:
        With only lengths.

        >>> lengths = [5, 3, 2]
        >>> make_non_pad_mask(lengths)
        masks = [[0, 0, 0, 0 ,0],
                 [0, 0, 0, 1, 1],
                 [0, 0, 1, 1, 1]]

        With the reference tensor.

        >>> xs = paddle.zeros((3, 2, 4))
        >>> make_pad_mask(lengths, xs)
        tensor([[[0, 0, 0, 0],
                 [0, 0, 0, 0]],
                [[0, 0, 0, 1],
                 [0, 0, 0, 1]],
                [[0, 0, 1, 1],
                 [0, 0, 1, 1]]])
        >>> xs = paddle.zeros((3, 2, 6))
        >>> make_pad_mask(lengths, xs)
        tensor([[[0, 0, 0, 0, 0, 1],
                 [0, 0, 0, 0, 0, 1]],
                [[0, 0, 0, 1, 1, 1],
                 [0, 0, 0, 1, 1, 1]],
                [[0, 0, 1, 1, 1, 1],
                 [0, 0, 1, 1, 1, 1]]])

        With the reference tensor and dimension indicator.

        >>> xs = paddle.zeros((3, 6, 6))
        >>> make_pad_mask(lengths, xs, 1)
        tensor([[[0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0],
                 [1, 1, 1, 1, 1, 1]],
                [[0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0],
                 [1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1]],
                [[0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0],
                 [1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1]]])
        >>> make_pad_mask(lengths, xs, 2)
        tensor([[[0, 0, 0, 0, 0, 1],
                 [0, 0, 0, 0, 0, 1],
                 [0, 0, 0, 0, 0, 1],
                 [0, 0, 0, 0, 0, 1],
                 [0, 0, 0, 0, 0, 1],
                 [0, 0, 0, 0, 0, 1]],
                [[0, 0, 0, 1, 1, 1],
                 [0, 0, 0, 1, 1, 1],
                 [0, 0, 0, 1, 1, 1],
                 [0, 0, 0, 1, 1, 1],
                 [0, 0, 0, 1, 1, 1],
                 [0, 0, 0, 1, 1, 1]],
                [[0, 0, 1, 1, 1, 1],
                 [0, 0, 1, 1, 1, 1],
                 [0, 0, 1, 1, 1, 1],
                 [0, 0, 1, 1, 1, 1],
                 [0, 0, 1, 1, 1, 1],
                 [0, 0, 1, 1, 1, 1]]],)

    """
    if length_dim == 0:
        raise ValueError("length_dim cannot be 0: {}".format(length_dim))

    # check if lengths is 0-dim tensor, if so, add a dimension
    if lengths.ndim == 0:
        bs = paddle.shape(lengths.unsqueeze(0))
    else:
        bs = paddle.shape(lengths)

    if xs is None:
        maxlen = paddle.cast(lengths.max(), dtype=bs.dtype)
    else:
        maxlen = paddle.shape(xs)[length_dim]

    seq_range = paddle.arange(0, maxlen, dtype=paddle.int64)
    # VITS 最后一个 expand 的位置
    seq_range_expand = seq_range.unsqueeze(0).expand([bs, maxlen])
    seq_length_expand = lengths.unsqueeze(-1)
    mask = seq_range_expand >= seq_length_expand.cast(seq_range_expand.dtype)

    if xs is not None:
        assert paddle.shape(xs)[0] == bs, (paddle.shape(xs)[0], bs)
        if length_dim < 0:
            length_dim = len(paddle.shape(xs)) + length_dim
        # ind = (:, None, ..., None, :, , None, ..., None)
        ind = tuple(
            slice(None) if i in (0, length_dim) else None
            for i in range(len(paddle.shape(xs))))
        mask = paddle.expand(mask[ind], paddle.shape(xs))
    return mask


def make_non_pad_mask(lengths, xs=None, length_dim=-1):
    """Make mask tensor containing indices of non-padded part.

    Args:
        lengths (Tensor(int64) or List): 
            Batch of lengths (B,).
        xs (Tensor, optional): 
            The reference tensor.
            If set, masks will be the same shape as this tensor.
        length_dim (int, optional): 
            Dimension indicator of the above tensor.
            See the example.

    Returns:
        Tensor(bool): 
            mask tensor containing indices of padded part bool.

    Examples:
        With only lengths.

        >>> lengths = [5, 3, 2]
        >>> make_non_pad_mask(lengths)
        masks = [[1, 1, 1, 1 ,1],
                 [1, 1, 1, 0, 0],
                 [1, 1, 0, 0, 0]]

        With the reference tensor.

        >>> xs = paddle.zeros((3, 2, 4))
        >>> make_non_pad_mask(lengths, xs)
        tensor([[[1, 1, 1, 1],
                 [1, 1, 1, 1]],
                [[1, 1, 1, 0],
                 [1, 1, 1, 0]],
                [[1, 1, 0, 0],
                 [1, 1, 0, 0]]])
        >>> xs = paddle.zeros((3, 2, 6))
        >>> make_non_pad_mask(lengths, xs)
        tensor([[[1, 1, 1, 1, 1, 0],
                 [1, 1, 1, 1, 1, 0]],
                [[1, 1, 1, 0, 0, 0],
                 [1, 1, 1, 0, 0, 0]],
                [[1, 1, 0, 0, 0, 0],
                 [1, 1, 0, 0, 0, 0]]])

        With the reference tensor and dimension indicator.

        >>> xs = paddle.zeros((3, 6, 6))
        >>> make_non_pad_mask(lengths, xs, 1)
        tensor([[[1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1],
                 [0, 0, 0, 0, 0, 0]],
                [[1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1],
                 [0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0]],
                [[1, 1, 1, 1, 1, 1],
                 [1, 1, 1, 1, 1, 1],
                 [0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0],
                 [0, 0, 0, 0, 0, 0]]])
        >>> make_non_pad_mask(lengths, xs, 2)
        tensor([[[1, 1, 1, 1, 1, 0],
                 [1, 1, 1, 1, 1, 0],
                 [1, 1, 1, 1, 1, 0],
                 [1, 1, 1, 1, 1, 0],
                 [1, 1, 1, 1, 1, 0],
                 [1, 1, 1, 1, 1, 0]],
                [[1, 1, 1, 0, 0, 0],
                 [1, 1, 1, 0, 0, 0],
                 [1, 1, 1, 0, 0, 0],
                 [1, 1, 1, 0, 0, 0],
                 [1, 1, 1, 0, 0, 0],
                 [1, 1, 1, 0, 0, 0]],
                [[1, 1, 0, 0, 0, 0],
                 [1, 1, 0, 0, 0, 0],
                 [1, 1, 0, 0, 0, 0],
                 [1, 1, 0, 0, 0, 0],
                 [1, 1, 0, 0, 0, 0],
                 [1, 1, 0, 0, 0, 0]]])

    """
    return paddle.logical_not(make_pad_mask(lengths, xs, length_dim))


@typechecked
def initialize(model: nn.Layer, init: str):
    """Initialize weights of a neural network module.

    Parameters are initialized using the given method or distribution.

    Custom initialization routines can be implemented into submodules

    Args:
        model (nn.Layer): 
            Target.
        init (str):
            Method of initialization.
    """
    if init == "xavier_uniform":
        nn.initializer.set_global_initializer(nn.initializer.XavierUniform(),
                                              nn.initializer.Constant())
    elif init == "xavier_normal":
        nn.initializer.set_global_initializer(nn.initializer.XavierNormal(),
                                              nn.initializer.Constant())
    elif init == "kaiming_uniform":
        nn.initializer.set_global_initializer(nn.initializer.KaimingUniform(),
                                              nn.initializer.Constant())
    elif init == "kaiming_normal":
        nn.initializer.set_global_initializer(nn.initializer.KaimingNormal(),
                                              nn.initializer.Constant())
    else:
        raise ValueError("Unknown initialization: " + init)


# for VITS
def get_random_segments(
        x: paddle.paddle,
        x_lengths: paddle.Tensor,
        segment_size: int, ) -> Tuple[paddle.Tensor, paddle.Tensor]:
    """Get random segments.
    Args:
        x (Tensor): 
            Input tensor (B, C, T).
        x_lengths (Tensor): 
            Length tensor (B,).
        segment_size (int): 
            Segment size.
    Returns:
        Tensor: 
            Segmented tensor (B, C, segment_size).
        Tensor: 
            Start index tensor (B,).
    """
    b, c, t = paddle.shape(x)
    max_start_idx = x_lengths - segment_size
    rand_number = paddle.rand([b])
    start_idxs = paddle.cast(rand_number *
                             max_start_idx.astype(rand_number.dtype), 'int64')
    segments = get_segments(x, start_idxs, segment_size)

    return segments, start_idxs


def get_segments(
        x: paddle.Tensor,
        start_idxs: paddle.Tensor,
        segment_size: int, ) -> paddle.Tensor:
    """Get segments.
    Args:
        x (Tensor): 
            Input tensor (B, C, T).
        start_idxs (Tensor): 
            Start index tensor (B,).
        segment_size (int): 
            Segment size.
    Returns:
        Tensor: Segmented tensor (B, C, segment_size).
    """
    b, c, t = paddle.shape(x)
    segments = paddle.zeros([b, c, segment_size], dtype=x.dtype)
    for i, start_idx in enumerate(start_idxs):
        segments[i] = x[i, :, start_idx:start_idx + segment_size]
    return segments


# see https://github.com/PaddlePaddle/X2Paddle/blob/develop/docs/pytorch_project_convertor/API_docs/ops/torch.gather.md
def paddle_gather(x, dim, index):
    index_shape = index.shape
    index_flatten = index.flatten()
    if dim < 0:
        dim = len(x.shape) + dim
    nd_index = []
    for k in range(len(x.shape)):
        if k == dim:
            nd_index.append(index_flatten)
        else:
            reshape_shape = [1] * len(x.shape)
            reshape_shape[k] = x.shape[k]
            x_arange = paddle.arange(x.shape[k], dtype=index.dtype)
            x_arange = x_arange.reshape(reshape_shape)
            dim_index = paddle.expand(x_arange, index_shape).flatten()
            nd_index.append(dim_index)
    ind2 = paddle.transpose(paddle.stack(nd_index), [1, 0]).astype("int64")
    paddle_out = paddle.gather_nd(x, ind2).reshape(index_shape)
    return paddle_out


# for ERNIE SAT
# mask phones
def phones_masking(xs_pad: paddle.Tensor,
                   src_mask: paddle.Tensor,
                   align_start: paddle.Tensor,
                   align_end: paddle.Tensor,
                   align_start_lens: paddle.Tensor,
                   mlm_prob: float=0.8,
                   mean_phn_span: int=8,
                   span_bdy: paddle.Tensor=None):
    '''
    Args:
        xs_pad (paddle.Tensor): 
            input speech (B, Tmax, D).
        src_mask (paddle.Tensor): 
            mask of speech (B, 1, Tmax).
        align_start (paddle.Tensor): 
            frame level phone alignment start (B, Tmax2).
        align_end (paddle.Tensor): 
            frame level phone alignment end (B, Tmax2).
        align_start_lens (paddle.Tensor): 
            length of align_start (B, ).
        mlm_prob (float):
        mean_phn_span (int):
        span_bdy (paddle.Tensor): 
            masked mel boundary of input speech (B, 2).
    Returns:
        paddle.Tensor[bool]: masked position of input speech (B, Tmax).
    '''
    bz, sent_len, _ = paddle.shape(xs_pad)
    masked_pos = paddle.zeros((bz, sent_len))
    if mlm_prob == 1.0:
        masked_pos += 1
    elif mean_phn_span == 0:
        # only speech
        length = sent_len
        mean_phn_span = min(length * mlm_prob // 3, 50)
        masked_phn_idxs = random_spans_noise_mask(
            length=length, mlm_prob=mlm_prob,
            mean_phn_span=mean_phn_span).nonzero()
        masked_pos[:, masked_phn_idxs] = 1
    else:
        for idx in range(bz):
            # for inference
            if span_bdy is not None:
                for s, e in zip(span_bdy[idx][::2], span_bdy[idx][1::2]):
                    masked_pos[idx, s:e] = 1
            # for training
            else:
                length = align_start_lens[idx]
                if length < 2:
                    continue
                masked_phn_idxs = random_spans_noise_mask(
                    length=length,
                    mlm_prob=mlm_prob,
                    mean_phn_span=mean_phn_span).nonzero()
                masked_start = align_start[idx][masked_phn_idxs].tolist()
                masked_end = align_end[idx][masked_phn_idxs].tolist()
                for s, e in zip(masked_start, masked_end):
                    masked_pos[idx, s:e] = 1
    non_eos_mask = paddle.reshape(src_mask, paddle.shape(xs_pad)[:2])
    masked_pos = masked_pos * non_eos_mask.astype(masked_pos.dtype)
    masked_pos = paddle.cast(masked_pos, 'bool')

    return masked_pos


# mask speech and phones
def phones_text_masking(xs_pad: paddle.Tensor,
                        src_mask: paddle.Tensor,
                        text_pad: paddle.Tensor,
                        text_mask: paddle.Tensor,
                        align_start: paddle.Tensor,
                        align_end: paddle.Tensor,
                        align_start_lens: paddle.Tensor,
                        mlm_prob: float=0.8,
                        mean_phn_span: int=8,
                        span_bdy: paddle.Tensor=None):
    '''
    Args:
        xs_pad (paddle.Tensor): 
            input speech (B, Tmax, D).
        src_mask (paddle.Tensor): 
            mask of speech (B, 1, Tmax).
        text_pad (paddle.Tensor): 
            input text (B, Tmax2).
        text_mask (paddle.Tensor):
            mask of text (B, 1, Tmax2).
        align_start (paddle.Tensor): 
            frame level phone alignment start (B, Tmax2).
        align_end (paddle.Tensor): 
            frame level phone alignment end (B, Tmax2).
        align_start_lens (paddle.Tensor): 
            length of align_start (B, ).
        mlm_prob (float):
        mean_phn_span (int):
        span_bdy (paddle.Tensor): 
            masked mel boundary of input speech (B, 2).
    Returns:
        paddle.Tensor[bool]: 
            masked position of input speech (B, Tmax).
        paddle.Tensor[bool]: 
            masked position of input text (B, Tmax2).
    '''
    bz, sent_len, _ = paddle.shape(xs_pad)
    masked_pos = paddle.zeros((bz, sent_len))
    _, text_len = paddle.shape(text_pad)
    text_mask_num_lower = math.ceil(text_len * (1 - mlm_prob) * 0.5)
    text_masked_pos = paddle.zeros((bz, text_len))

    if mlm_prob == 1.0:
        masked_pos += 1
    elif mean_phn_span == 0:
        # only speech 
        length = sent_len
        mean_phn_span = min(length * mlm_prob // 3, 50)
        masked_phn_idxs = random_spans_noise_mask(
            length=length, mlm_prob=mlm_prob,
            mean_phn_span=mean_phn_span).nonzero()
        masked_pos[:, masked_phn_idxs] = 1
    else:
        for idx in range(bz):
            # for inference
            if span_bdy is not None:
                for s, e in zip(span_bdy[idx][::2], span_bdy[idx][1::2]):
                    masked_pos[idx, s:e] = 1
            # for training
            else:
                length = align_start_lens[idx]
                if length < 2:
                    continue
                masked_phn_idxs = random_spans_noise_mask(
                    length=length,
                    mlm_prob=mlm_prob,
                    mean_phn_span=mean_phn_span).nonzero()
                unmasked_phn_idxs = list(
                    set(range(length)) - set(masked_phn_idxs[0].tolist()))
                np.random.shuffle(unmasked_phn_idxs)
                masked_text_idxs = unmasked_phn_idxs[:text_mask_num_lower]
                text_masked_pos[idx, masked_text_idxs] = 1
                masked_start = align_start[idx][masked_phn_idxs].tolist()
                masked_end = align_end[idx][masked_phn_idxs].tolist()
                for s, e in zip(masked_start, masked_end):
                    masked_pos[idx, s:e] = 1
    non_eos_mask = paddle.reshape(src_mask, shape=paddle.shape(xs_pad)[:2])
    masked_pos = masked_pos * non_eos_mask.astype(masked_pos.dtype)
    non_eos_text_mask = paddle.reshape(
        text_mask, shape=paddle.shape(text_pad)[:2])
    text_masked_pos = text_masked_pos * non_eos_text_mask.astype(
        text_masked_pos.dtype)
    masked_pos = paddle.cast(masked_pos, 'bool')
    text_masked_pos = paddle.cast(text_masked_pos, 'bool')

    return masked_pos, text_masked_pos


def get_seg_pos(speech_pad: paddle.Tensor,
                text_pad: paddle.Tensor,
                align_start: paddle.Tensor,
                align_end: paddle.Tensor,
                align_start_lens: paddle.Tensor,
                seg_emb: bool=False):
    '''
    Args:
        speech_pad (paddle.Tensor): 
            input speech (B, Tmax, D).
        text_pad (paddle.Tensor): 
            input text (B, Tmax2).
        align_start (paddle.Tensor): 
            frame level phone alignment start (B, Tmax2).
        align_end (paddle.Tensor): 
            frame level phone alignment end (B, Tmax2).
        align_start_lens (paddle.Tensor): 
            length of align_start (B, ).
        seg_emb (bool): 
            whether to use segment embedding.
    Returns:
        paddle.Tensor[int]: n-th phone of each mel, 0<=n<=Tmax2 (B, Tmax).
            eg: 
            Tensor(shape=[1, 328], dtype=int64, place=Place(gpu:0), stop_gradient=True,
            [[0 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
            1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
            1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
            1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
            1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 3 , 3 , 3 , 4 , 4 , 4 ,
            5 , 5 , 5 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 6 , 7 , 7 , 7 , 7 , 7 , 7 , 7 ,
            7 , 8 , 8 , 8 , 8 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 9 , 10, 10, 10, 10, 10,
            10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
            13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15,
            15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17,
            17, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20,
            20, 20, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 23, 23,
            23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25,
            25, 26, 26, 26, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29,
            29, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32,
            32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35,
            35, 35, 35, 35, 35, 35, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37,
            37, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,
            38, 38, 0 , 0 ]])
        paddle.Tensor[int]: n-th phone of each phone, 0<=n<=Tmax2 (B, Tmax2).
            eg: 
            Tensor(shape=[1, 38], dtype=int64, place=Place(gpu:0), stop_gradient=True,
                [[1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10, 11, 12, 13, 14, 15, 16, 17, 
                18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 
                36, 37, 38]])
    '''

    bz, speech_len, _ = paddle.shape(speech_pad)
    _, text_len = paddle.shape(text_pad)

    text_seg_pos = paddle.zeros((bz, text_len), dtype='int64')
    speech_seg_pos = paddle.zeros((bz, speech_len), dtype='int64')

    if not seg_emb:
        return speech_seg_pos, text_seg_pos
    for idx in range(bz):
        align_length = align_start_lens[idx]
        for j in range(align_length):
            s, e = align_start[idx][j], align_end[idx][j]
            speech_seg_pos[idx, s:e] = j + 1
            text_seg_pos[idx, j] = j + 1

    return speech_seg_pos, text_seg_pos


# randomly select the range of speech and text to mask during training
def random_spans_noise_mask(length: int,
                            mlm_prob: float=0.8,
                            mean_phn_span: float=8):
    """This function is copy of `random_spans_helper 
    <https://github.com/google-research/text-to-text-transfer-transformer/blob/84f8bcc14b5f2c03de51bd3587609ba8f6bbd1cd/t5/data/preprocessors.py#L2682>`__ .
    Noise mask consisting of random spans of noise tokens.
    The number of noise tokens and the number of noise spans and non-noise spans
    are determined deterministically as follows:
    num_noise_tokens = round(length * noise_density)
    num_nonnoise_spans = num_noise_spans = round(num_noise_tokens / mean_noise_span_length)
    Spans alternate between non-noise and noise, beginning with non-noise.
    Subject to the above restrictions, all masks are equally likely.
    Args:
        length: an int32 scalar (length of the incoming token sequence)
        noise_density: a float - approximate density of output mask
        mean_noise_span_length: a number
    Returns:
        np.ndarray: a boolean tensor with shape [length]
    """

    orig_length = length

    num_noise_tokens = int(np.round(length * mlm_prob))
    # avoid degeneracy by ensuring positive numbers of noise and nonnoise tokens.
    num_noise_tokens = min(max(num_noise_tokens, 1), length - 1)
    num_noise_spans = int(np.round(num_noise_tokens / mean_phn_span))

    # avoid degeneracy by ensuring positive number of noise spans
    num_noise_spans = max(num_noise_spans, 1)
    num_nonnoise_tokens = length - num_noise_tokens

    # pick the lengths of the noise spans and the non-noise spans
    def _random_seg(num_items, num_segs):
        """Partition a sequence of items randomly into non-empty segments.
        Args:
            num_items: 
                an integer scalar > 0
            num_segs: 
                an integer scalar in [1, num_items]
        Returns:
            a Tensor with shape [num_segs] containing positive integers that add
            up to num_items
        """
        mask_idxs = np.arange(num_items - 1) < (num_segs - 1)
        np.random.shuffle(mask_idxs)
        first_in_seg = np.pad(mask_idxs, [[1, 0]])
        segment_id = np.cumsum(first_in_seg)
        # count length of sub segments assuming that list is sorted
        _, segment_length = np.unique(segment_id, return_counts=True)
        return segment_length

    noise_span_lens = _random_seg(num_noise_tokens, num_noise_spans)
    nonnoise_span_lens = _random_seg(num_nonnoise_tokens, num_noise_spans)

    interleaved_span_lens = np.reshape(
        np.stack([nonnoise_span_lens, noise_span_lens], axis=1),
        [num_noise_spans * 2])
    span_starts = np.cumsum(interleaved_span_lens)[:-1]
    span_start_indicator = np.zeros((length, ), dtype=np.int8)
    span_start_indicator[span_starts] = True
    span_num = np.cumsum(span_start_indicator)
    is_noise = np.equal(span_num % 2, 1)

    return is_noise[:orig_length]


================================================
FILE: paddlespeech/t2s/modules/normalizer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import nn


class ZScore(nn.Layer):
    # feature last
    def __init__(self, mu, sigma):
        super().__init__()
        self.register_buffer("mu", mu)
        self.register_buffer("sigma", sigma)

    def forward(self, x):
        # NOTE: to be compatible with paddle's to_static, we must explicitly
        # call multiply, or add, etc, instead of +-*/, etc.
        return paddle.divide(paddle.subtract(x, self.mu), self.sigma)

    def inverse(self, x):
        # NOTE: to be compatible with paddle's to_static, we must explicitly
        # call multiply, or add, etc, instead of +-*/, etc.
        return paddle.add(paddle.multiply(x, self.sigma), self.mu)


================================================
FILE: paddlespeech/t2s/modules/positional_encoding.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import Tensor

__all__ = ["sinusoid_position_encoding", "scaled_position_encoding"]


def sinusoid_position_encoding(num_positions: int,
                               feature_size: int,
                               omega: float=1.0,
                               start_pos: int=0,
                               dtype=None) -> paddle.Tensor:
    # return tensor shape (num_positions, feature_size)
    # NOTE: to be compatible with paddle's to_static, we cannnot raise 
    # an exception here, take care of it by yourself
    # if (feature_size % 2 != 0):
    #     raise ValueError("size should be divisible by 2")
    dtype = dtype or paddle.get_default_dtype()

    channel = paddle.arange(0, feature_size, 2, dtype=dtype)
    index = paddle.arange(start_pos, start_pos + num_positions, 1, dtype=dtype)
    denominator = channel / float(feature_size)
    denominator = paddle.to_tensor([10000.0], dtype='float32')**denominator
    p = (paddle.unsqueeze(index, -1) * omega) / denominator
    encodings = paddle.zeros([num_positions, feature_size], dtype=dtype)
    encodings[:, 0::2] = paddle.sin(p)
    encodings[:, 1::2] = paddle.cos(p)
    return encodings


def scaled_position_encoding(num_positions: int,
                             feature_size: int,
                             omega: Tensor,
                             start_pos: int=0,
                             dtype=None) -> Tensor:
    # omega: Tensor (batch_size, )
    # return tensor shape (batch_size, num_positions, feature_size)
    # consider renaming this as batched positioning encoding
    if (feature_size % 2 != 0):
        raise ValueError("size should be divisible by 2")
    dtype = dtype or paddle.get_default_dtype()

    channel = paddle.arange(0, feature_size, 2, dtype=dtype)
    index = paddle.arange(
        start_pos, start_pos + num_positions, 1, dtype=omega.dtype)
    batch_size = omega.shape[0]
    omega = paddle.unsqueeze(omega, [1, 2])
    p = (paddle.unsqueeze(index, -1) *
         omega) / (10000.0**(channel / float(feature_size)))
    encodings = paddle.zeros(
        [batch_size, num_positions, feature_size], dtype=dtype)
    # it is nice to have fancy indexing and inplace operations
    encodings[:, :, 0::2] = paddle.sin(p)
    encodings[:, :, 1::2] = paddle.cos(p)
    return encodings


================================================
FILE: paddlespeech/t2s/modules/pqmf.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Pseudo QMF modules."""
import numpy as np
import paddle
import paddle.nn.functional as F
from paddle import nn
from scipy.signal.windows import kaiser


def design_prototype_filter(taps=62, cutoff_ratio=0.142, beta=9.0):
    """Design prototype filter for PQMF.
    This method is based on `A Kaiser window approach for the design of prototype
    filters of cosine modulated filterbanks`_.

    Args:
        taps (int): 
            The number of filter taps.
        cutoff_ratio (float): 
            Cut-off frequency ratio.
        beta (float): 
            Beta coefficient for kaiser window.
    Returns:
        ndarray:
            Impluse response of prototype filter (taps + 1,).
        .. _`A Kaiser window approach for the design of prototype filters of cosine modulated filterbanks`:
            https://ieeexplore.ieee.org/abstract/document/681427
    """
    # check the arguments are valid
    assert taps % 2 == 0, "The number of taps mush be even number."
    assert 0.0 < cutoff_ratio < 1.0, "Cutoff ratio must be > 0.0 and < 1.0."
    # make initial filter
    omega_c = np.pi * cutoff_ratio
    with np.errstate(invalid="ignore"):
        h_i = np.sin(omega_c * (np.arange(taps + 1) - 0.5 * taps)) / (
            np.pi * (np.arange(taps + 1) - 0.5 * taps))
    h_i[taps //
        2] = np.cos(0) * cutoff_ratio  # fix nan due to indeterminate form

    # apply kaiser window
    w = kaiser(taps + 1, beta)
    h = h_i * w

    return h


class PQMF(nn.Layer):
    """PQMF module.
    This module is based on `Near-perfect-reconstruction pseudo-QMF banks`_.
    .. _`Near-perfect-reconstruction pseudo-QMF banks`:
        https://ieeexplore.ieee.org/document/258122
    """

    def __init__(self, subbands=4, taps=62, cutoff_ratio=0.142, beta=9.0):
        """Initilize PQMF module.
        The cutoff_ratio and beta parameters are optimized for #subbands = 4.
        See dicussion in https://github.com/kan-bayashi/ParallelWaveGAN/issues/195.

        Args:
            subbands (int): 
                The number of subbands.
            taps (int): 
                The number of filter taps.
            cutoff_ratio (float): 
                Cut-off frequency ratio.
            beta (float): 
                Beta coefficient for kaiser window.
        """
        super().__init__()

        h_proto = design_prototype_filter(taps, cutoff_ratio, beta)
        h_analysis = np.zeros((subbands, len(h_proto)))
        h_synthesis = np.zeros((subbands, len(h_proto)))
        for k in range(subbands):
            h_analysis[k] = (
                2 * h_proto * np.cos((2 * k + 1) * (np.pi / (2 * subbands)) * (
                    np.arange(taps + 1) - (taps / 2)) + (-1)**k * np.pi / 4))
            h_synthesis[k] = (
                2 * h_proto * np.cos((2 * k + 1) * (np.pi / (2 * subbands)) * (
                    np.arange(taps + 1) - (taps / 2)) - (-1)**k * np.pi / 4))

        # convert to tensor
        self.analysis_filter = paddle.to_tensor(
            h_analysis, dtype="float32").unsqueeze(1)
        self.synthesis_filter = paddle.to_tensor(
            h_synthesis, dtype="float32").unsqueeze(0)

        # filter for downsampling & upsampling
        updown_filter = paddle.zeros(
            (subbands, subbands, subbands), dtype="float32")
        for k in range(subbands):
            updown_filter[k, k, 0] = 1.0
        self.updown_filter = updown_filter
        self.subbands = subbands
        # keep padding info
        self.pad_fn = nn.Pad1D(taps // 2, mode='constant', value=0.0)

    def analysis(self, x):
        """Analysis with PQMF.
        Args:
            x (Tensor): 
                Input tensor (B, 1, T).
        Returns:
            Tensor: Output tensor (B, subbands, T // subbands).
        """
        x = F.conv1d(self.pad_fn(x), self.analysis_filter)
        return F.conv1d(x, self.updown_filter, stride=self.subbands)

    def synthesis(self, x):
        """Synthesis with PQMF.
        Args:
            x (Tensor): 
                Input tensor (B, subbands, T // subbands).
        Returns:
            Tensor: Output tensor (B, 1, T).
        """
        x = F.conv1d_transpose(
            x, self.updown_filter * self.subbands, stride=self.subbands)

        return F.conv1d(self.pad_fn(x), self.synthesis_filter)

    # when converting dygraph to static graph, can not use self.pqmf.synthesis directly
    def forward(self, x):
        return self.synthesis(x)


================================================
FILE: paddlespeech/t2s/modules/predictor/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/modules/predictor/duration_predictor.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Duration predictor related modules."""
import paddle
from paddle import nn

from paddlespeech.t2s.modules.layer_norm import LayerNorm
from paddlespeech.t2s.modules.masked_fill import masked_fill


class DurationPredictor(nn.Layer):
    """Duration predictor module.

    This is a module of duration predictor described
    in `FastSpeech: Fast, Robust and Controllable Text to Speech`_.
    The duration predictor predicts a duration of each frame in log domain
    from the hidden embeddings of encoder.

    .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`:
        https://arxiv.org/pdf/1905.09263.pdf

    Note
    ----------
    The calculation domain of outputs is different
    between in `forward` and in `inference`. In `forward`,
    the outputs are calculated in log domain but in `inference`,
    those are calculated in linear domain.

    """

    def __init__(self,
                 idim,
                 n_layers=2,
                 n_chans=384,
                 kernel_size=3,
                 dropout_rate=0.1,
                 offset=1.0):
        """Initilize duration predictor module.

        Args:
            idim (int):
                Input dimension.
            n_layers (int, optional): 
                Number of convolutional layers.
            n_chans (int, optional): 
                Number of channels of convolutional layers.
            kernel_size (int, optional): 
                Kernel size of convolutional layers.
            dropout_rate (float, optional): 
                Dropout rate.
            offset (float, optional): 
                Offset value to avoid nan in log domain.

        """
        super().__init__()
        self.offset = offset
        self.conv = nn.LayerList()
        for idx in range(n_layers):
            in_chans = idim if idx == 0 else n_chans
            self.conv.append(
                nn.Sequential(
                    nn.Conv1D(
                        in_chans,
                        n_chans,
                        kernel_size,
                        stride=1,
                        padding=(kernel_size - 1) // 2, ),
                    nn.ReLU(),
                    LayerNorm(n_chans, dim=1),
                    nn.Dropout(dropout_rate), ))
        self.linear = nn.Linear(n_chans, 1, bias_attr=True)

    def _forward(self, xs, x_masks=None, is_inference=False):
        # (B, idim, Tmax)
        xs = xs.transpose([0, 2, 1])
        # (B, C, Tmax)
        for f in self.conv:
            xs = f(xs)

        # NOTE: calculate in log domain
        # (B, Tmax)
        xs = self.linear(xs.transpose([0, 2, 1])).squeeze(-1)

        if is_inference:
            # NOTE: calculate in linear domain
            xs = paddle.clip(paddle.round(xs.exp() - self.offset), min=0)

        if x_masks is not None:
            xs = masked_fill(xs, x_masks, 0.0)

        return xs

    def forward(self, xs, x_masks=None):
        """Calculate forward propagation.
        Args:
            xs(Tensor): 
                Batch of input sequences (B, Tmax, idim).
            x_masks(ByteTensor, optional, optional): 
                Batch of masks indicating padded part (B, Tmax). (Default value = None)

        Returns:
            Tensor: Batch of predicted durations in log domain (B, Tmax).
        """
        return self._forward(xs, x_masks, False)

    def inference(self, xs, x_masks=None):
        """Inference duration.
        Args:
            xs(Tensor): 
                Batch of input sequences (B, Tmax, idim).
            x_masks(Tensor(bool), optional, optional): 
                Batch of masks indicating padded part (B, Tmax). (Default value = None)

        Returns:
            Tensor: Batch of predicted durations in linear domain int64 (B, Tmax).
        """
        return self._forward(xs, x_masks, True)


class DurationPredictorLoss(nn.Layer):
    """Loss function module for duration predictor.

    The loss value is Calculated in log domain to make it Gaussian.

    """

    def __init__(self, offset=1.0, reduction="mean"):
        """Initilize duration predictor loss module.
        Args:
            offset (float, optional): Offset value to avoid nan in log domain.
            reduction (str): Reduction type in loss calculation.
        """
        super().__init__()
        self.criterion = nn.MSELoss(reduction=reduction)
        self.offset = offset

    def forward(self, outputs, targets):
        """Calculate forward propagation.

        Args:
            outputs(Tensor): 
                Batch of prediction durations in log domain (B, T)
            targets(Tensor): 
                Batch of groundtruth durations in linear domain (B, T)

        Returns: 
            Tensor: Mean squared error loss value.

        Note: 
            `outputs` is in log domain but `targets` is in linear domain.
        """
        # NOTE: outputs is in log domain while targets in linear
        targets = paddle.log(targets.cast(dtype='float32') + self.offset)
        loss = self.criterion(outputs, targets)

        return loss


================================================
FILE: paddlespeech/t2s/modules/predictor/length_regulator.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Length regulator related modules."""
import numpy as np
import paddle
from paddle import nn


class LengthRegulator(nn.Layer):
    """Length regulator module for feed-forward Transformer.

    This is a module of length regulator described in
    `FastSpeech: Fast, Robust and Controllable Text to Speech`_.
    The length regulator expands char or
    phoneme-level embedding features to frame-level by repeating each
    feature based on the corresponding predicted durations.

    .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`:
        https://arxiv.org/pdf/1905.09263.pdf

    """

    def __init__(self, pad_value=0.0):
        """Initilize length regulator module.

        Args:
            pad_value (float, optional): 
                Value used for padding.

        """
        super().__init__()
        self.pad_value = pad_value

    # expand_numpy is faster than expand
    def expand_numpy(self, encodings: paddle.Tensor,
                     durations: paddle.Tensor) -> paddle.Tensor:
        """
        encodings: (B, T, C)
        durations: (B, T)
        """
        #batch_size, t_enc = durations.shape  # linux
        batch_size = paddle.shape(durations)[0]  # windows and mac
        t_enc = paddle.shape(durations)[1]  # windows and mac
        durations = durations.numpy()
        slens = np.sum(durations, -1)
        t_dec = np.max(slens)
        M = np.zeros([batch_size, t_dec, t_enc])
        for i in range(batch_size):
            k = 0
            for j in range(t_enc):
                d = durations[i, j]
                M[i, k:k + d, j] = 1
                k += d
        M = paddle.to_tensor(M, dtype=encodings.dtype)
        encodings = paddle.matmul(M, encodings)
        return encodings

    def expand(self, encodings: paddle.Tensor,
               durations: paddle.Tensor) -> paddle.Tensor:
        """
        encodings: (B, T, C)
        durations: (B, T)
        """
        batch_size, t_enc = paddle.shape(durations)
        slens = paddle.sum(durations, -1)
        t_dec = paddle.max(slens)
        t_dec_1 = t_dec + 1
        flatten_duration = paddle.cumsum(
            paddle.reshape(durations, [batch_size * t_enc])) + 1
        init = paddle.zeros(t_dec_1)
        m_batch = batch_size * t_enc
        M = paddle.zeros([t_dec_1, m_batch])
        for i in range(m_batch):
            d = flatten_duration[i]
            m = paddle.concat(
                [paddle.ones(d), paddle.zeros(t_dec_1 - d)], axis=0)
            M[:, i] = m - init
            init = m
        M = paddle.reshape(M, shape=[t_dec_1, batch_size, t_enc])
        M = M[1:t_dec_1, :, :]
        M = paddle.transpose(M, (1, 0, 2))
        encodings = paddle.matmul(M, encodings)
        return encodings

    def forward(self, xs, ds, alpha=1.0, is_inference=False):
        """Calculate forward propagation.

        Args:
            xs (Tensor): 
                Batch of sequences of char or phoneme embeddings (B, Tmax, D).
            ds (Tensor(int64)): 
                Batch of durations of each frame (B, T).
            alpha (float, optional): 
                Alpha value to control speed of speech.

        Returns:
            Tensor: replicated input tensor based on durations (B, T*, D).
        """

        if alpha != 1.0:
            assert alpha > 0
            ds = paddle.round(ds.cast(dtype=paddle.float32) * alpha)
        ds = ds.cast(dtype=paddle.int64)
        '''
        from distutils.version import LooseVersion
        from paddlespeech.t2s.modules.nets_utils import pad_list
        # 这里在 paddle 2.2.2 的动转静是不通的
        # if LooseVersion(paddle.__version__) >= "2.3.0" or hasattr(paddle, 'repeat_interleave'):
        # if LooseVersion(paddle.__version__) >= "2.3.0":
        if hasattr(paddle, 'repeat_interleave'):
            repeat = [paddle.repeat_interleave(x, d, axis=0) for x, d in zip(xs, ds)]
            return pad_list(repeat, self.pad_value)
        '''
        if is_inference:
            return self.expand(xs, ds)
        else:
            return self.expand_numpy(xs, ds)


================================================
FILE: paddlespeech/t2s/modules/predictor/variance_predictor.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Variance predictor related modules."""
import paddle
from paddle import nn
from typeguard import typechecked

from paddlespeech.t2s.modules.layer_norm import LayerNorm
from paddlespeech.t2s.modules.masked_fill import masked_fill


class VariancePredictor(nn.Layer):
    """Variance predictor module.

    This is a module of variacne predictor described in `FastSpeech 2:
    Fast and High-Quality End-to-End Text to Speech`_.

    .. _`FastSpeech 2: Fast and High-Quality End-to-End Text to Speech`:
        https://arxiv.org/abs/2006.04558

    """

    @typechecked
    def __init__(
            self,
            idim: int,
            n_layers: int=2,
            n_chans: int=384,
            kernel_size: int=3,
            bias: bool=True,
            dropout_rate: float=0.5, ):
        """Initilize duration predictor module.

        Args:
            idim (int): 
                Input dimension.
            n_layers (int, optional): 
                Number of convolutional layers.
            n_chans (int, optional): 
                Number of channels of convolutional layers.
            kernel_size (int, optional): 
                Kernel size of convolutional layers.
            dropout_rate (float, optional): 
                Dropout rate.
        """
        super().__init__()
        self.conv = nn.LayerList()
        for idx in range(n_layers):
            in_chans = idim if idx == 0 else n_chans
            self.conv.append(
                nn.Sequential(
                    nn.Conv1D(
                        in_chans,
                        n_chans,
                        kernel_size,
                        stride=1,
                        padding=(kernel_size - 1) // 2,
                        bias_attr=True, ),
                    nn.ReLU(),
                    LayerNorm(n_chans, dim=1),
                    nn.Dropout(dropout_rate), ))

        self.linear = nn.Linear(n_chans, 1, bias_attr=True)

    def forward(self, xs: paddle.Tensor,
                x_masks: paddle.Tensor=None) -> paddle.Tensor:
        """Calculate forward propagation.

        Args:
            xs (Tensor): 
                Batch of input sequences (B, Tmax, idim).
            x_masks (Tensor(bool), optional): 
                Batch of masks indicating padded part (B, Tmax, 1).

        Returns:
            Tensor: 
                Batch of predicted sequences (B, Tmax, 1).
        """
        # (B, idim, Tmax)
        xs = xs.transpose([0, 2, 1])
        # (B, C, Tmax)
        for f in self.conv:
            # (B, C, Tmax)
            xs = f(xs)
        # (B, Tmax, 1)
        xs = self.linear(xs.transpose([0, 2, 1]))

        if x_masks is not None:
            xs = masked_fill(xs, x_masks, 0.0)
        return xs


================================================
FILE: paddlespeech/t2s/modules/residual_block.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
from typing import Any
from typing import Dict
from typing import List

import paddle
from paddle import nn
from paddle.nn import functional as F

from paddlespeech.t2s.modules.activation import get_activation


class WaveNetResidualBlock(nn.Layer):
    """A gated activation unit composed of an 1D convolution, a gated tanh
    unit and parametric redidual and skip connections. For more details, 
    refer to `WaveNet: A Generative Model for Raw Audio <https://arxiv.org/abs/1609.03499>`_.

    Args:
        kernel_size (int, optional): 
            Kernel size of the 1D convolution, by default 3
        residual_channels (int, optional): 
            Feature size of the residual output(and also the input), by default 64
        gate_channels (int, optional): 
            Output feature size of the 1D convolution, by default 128
        skip_channels (int, optional): 
            Feature size of the skip output, by default 64
        aux_channels (int, optional): 
            Feature size of the auxiliary input (e.g. spectrogram), by default 80
        dropout (float, optional): 
            Probability of the dropout before the 1D convolution, by default 0.
        dilation (int, optional): 
            Dilation of the 1D convolution, by default 1
        bias (bool, optional): 
            Whether to use bias in the 1D convolution, by default True
        use_causal_conv (bool, optional): 
            Whether to use causal padding for the 1D convolution, by default False
    """

    def __init__(self,
                 kernel_size: int=3,
                 residual_channels: int=64,
                 gate_channels: int=128,
                 skip_channels: int=64,
                 aux_channels: int=80,
                 dropout: float=0.,
                 dilation: int=1,
                 bias: bool=True,
                 use_causal_conv: bool=False):
        super().__init__()
        self.dropout = dropout
        if use_causal_conv:
            padding = (kernel_size - 1) * dilation
        else:
            assert kernel_size % 2 == 1
            padding = (kernel_size - 1) // 2 * dilation
        self.use_causal_conv = use_causal_conv

        self.conv = nn.Conv1D(
            residual_channels,
            gate_channels,
            kernel_size,
            padding=padding,
            dilation=dilation,
            bias_attr=bias)
        if aux_channels is not None:
            self.conv1x1_aux = nn.Conv1D(
                aux_channels, gate_channels, kernel_size=1, bias_attr=False)
        else:
            self.conv1x1_aux = None

        gate_out_channels = gate_channels // 2
        self.conv1x1_out = nn.Conv1D(
            gate_out_channels, residual_channels, kernel_size=1, bias_attr=bias)
        self.conv1x1_skip = nn.Conv1D(
            gate_out_channels, skip_channels, kernel_size=1, bias_attr=bias)

    def forward(self, x, c):
        """
        Args:
            x (Tensor): 
                the input features. Shape (N, C_res, T)
            c (Tensor):
                the auxiliary input. Shape (N, C_aux, T)

        Returns:
            res (Tensor): 
                Shape (N, C_res, T), the residual output, which is used as the 
                input of the next ResidualBlock in a stack of ResidualBlocks.
            skip (Tensor): 
                Shape (N, C_skip, T), the skip output, which is collected among
                each layer in a stack of ResidualBlocks.
        """
        x_input = x
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.conv(x)
        x = x[:, :, x_input.shape[-1]] if self.use_causal_conv else x
        if c is not None:
            c = self.conv1x1_aux(c)
            x += c

        a, b = paddle.chunk(x, 2, axis=1)
        x = paddle.tanh(a) * F.sigmoid(b)

        skip = self.conv1x1_skip(x)
        res = (self.conv1x1_out(x) + x_input) * math.sqrt(0.5)
        return res, skip


class HiFiGANResidualBlock(nn.Layer):
    """Residual block module in HiFiGAN."""

    def __init__(
            self,
            kernel_size: int=3,
            channels: int=512,
            dilations: List[int]=(1, 3, 5),
            bias: bool=True,
            use_additional_convs: bool=True,
            nonlinear_activation: str="leakyrelu",
            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.1},
    ):
        """Initialize HiFiGANResidualBlock module.
        Args:
            kernel_size (int): 
                Kernel size of dilation convolution layer.
            channels (int): 
                Number of channels for convolution layer.
            dilations (List[int]): 
                List of dilation factors.
            use_additional_convs (bool): 
                Whether to use additional convolution layers.
            bias (bool): 
                Whether to add bias parameter in convolution layers.
            nonlinear_activation (str): 
                Activation function module name.
            nonlinear_activation_params (dict): 
                Hyperparameters for activation function.
        """
        super().__init__()

        self.use_additional_convs = use_additional_convs
        self.convs1 = nn.LayerList()
        if use_additional_convs:
            self.convs2 = nn.LayerList()
        assert kernel_size % 2 == 1, "Kernel size must be odd number."

        for dilation in dilations:
            self.convs1.append(
                nn.Sequential(
                    get_activation(nonlinear_activation, **
                                   nonlinear_activation_params),
                    nn.Conv1D(
                        channels,
                        channels,
                        kernel_size,
                        1,
                        dilation=dilation,
                        bias_attr=bias,
                        padding=(kernel_size - 1) // 2 * dilation, ), ))
            if use_additional_convs:
                self.convs2.append(
                    nn.Sequential(
                        get_activation(nonlinear_activation, **
                                       nonlinear_activation_params),
                        nn.Conv1D(
                            channels,
                            channels,
                            kernel_size,
                            1,
                            dilation=1,
                            bias_attr=bias,
                            padding=(kernel_size - 1) // 2, ), ))

    def forward(self, x):
        """Calculate forward propagation.
        Args:
            x (Tensor): 
                Input tensor (B, channels, T).
        Returns:
            Tensor: Output tensor (B, channels, T).
        """
        for idx in range(len(self.convs1)):
            xt = self.convs1[idx](x)
            if self.use_additional_convs:
                xt = self.convs2[idx](xt)
            x = xt + x
        return x


================================================
FILE: paddlespeech/t2s/modules/residual_stack.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Residual stack module in MelGAN."""
from typing import Any
from typing import Dict

from paddle import nn

from paddlespeech.t2s.modules.activation import get_activation
from paddlespeech.t2s.modules.causal_conv import CausalConv1D


class ResidualStack(nn.Layer):
    """Residual stack module introduced in MelGAN."""

    def __init__(
            self,
            kernel_size: int=3,
            channels: int=32,
            dilation: int=1,
            bias: bool=True,
            nonlinear_activation: str="leakyrelu",
            nonlinear_activation_params: Dict[str, Any]={"negative_slope": 0.2},
            pad: str="Pad1D",
            pad_params: Dict[str, Any]={"mode": "reflect"},
            use_causal_conv: bool=False, ):
        """Initialize ResidualStack module.

        Args:
            kernel_size (int): 
                Kernel size of dilation convolution layer.
            channels (int): 
                Number of channels of convolution layers.
            dilation (int): 
                Dilation factor.
            bias (bool): 
                Whether to add bias parameter in convolution layers.
            nonlinear_activation (str): 
                Activation function module name.
            nonlinear_activation_params (Dict[str,Any]): 
                Hyperparameters for activation function.
            pad (str): 
                Padding function module name before dilated convolution layer.
            pad_params (Dict[str, Any]): 
                Hyperparameters for padding function.
            use_causal_conv (bool): 
                Whether to use causal convolution.
        """
        super().__init__()
        # for compatibility
        if nonlinear_activation:
            nonlinear_activation = nonlinear_activation.lower()

        # defile residual stack part
        if not use_causal_conv:
            assert (kernel_size - 1
                    ) % 2 == 0, "Not support even number kernel size."
            self.stack = nn.Sequential(
                get_activation(nonlinear_activation,
                               **nonlinear_activation_params),
                getattr(nn, pad)((kernel_size - 1) // 2 * dilation,
                                 **pad_params),
                nn.Conv1D(
                    channels,
                    channels,
                    kernel_size,
                    dilation=dilation,
                    bias_attr=bias),
                get_activation(nonlinear_activation,
                               **nonlinear_activation_params),
                nn.Conv1D(channels, channels, 1, bias_attr=bias), )
        else:
            self.stack = nn.Sequential(
                get_activation(nonlinear_activation,
                               **nonlinear_activation_params),
                CausalConv1D(
                    channels,
                    channels,
                    kernel_size,
                    dilation=dilation,
                    bias=bias,
                    pad=pad,
                    pad_params=pad_params, ),
                get_activation(nonlinear_activation,
                               **nonlinear_activation_params),
                nn.Conv1D(channels, channels, 1, bias_attr=bias), )

        # defile extra layer for skip connection
        self.skip_layer = nn.Conv1D(channels, channels, 1, bias_attr=bias)

    def forward(self, c):
        """Calculate forward propagation.

        Args:
            c (Tensor): 
                Input tensor (B, channels, T).
        Returns:     
            Tensor: Output tensor (B, chennels, T).
        """
        return self.stack(c) + self.skip_layer(c)


================================================
FILE: paddlespeech/t2s/modules/style_encoder.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Style encoder of GST-Tacotron."""
from typing import Sequence

import paddle
from paddle import nn
from typeguard import typechecked

from paddlespeech.t2s.modules.transformer.attention import MultiHeadedAttention as BaseMultiHeadedAttention


class StyleEncoder(nn.Layer):
    """Style encoder.

    This module is style encoder introduced in `Style Tokens: Unsupervised Style
    Modeling, Control and Transfer in End-to-End Speech Synthesis`.

    .. _`Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End
        Speech Synthesis`: https://arxiv.org/abs/1803.09017
    
    Args:
        idim (int, optional): 
            Dimension of the input mel-spectrogram.
        gst_tokens (int, optional): 
            The number of GST embeddings.
        gst_token_dim (int, optional): 
            Dimension of each GST embedding.
        gst_heads (int, optional): 
            The number of heads in GST multihead attention.
        conv_layers (int, optional): 
            The number of conv layers in the reference encoder.
        conv_chans_list (Sequence[int], optional): 
            List of the number of channels of conv layers in the referece encoder.
        conv_kernel_size (int, optional): 
            Kernal size of conv layers in the reference encoder.
        conv_stride (int, optional): 
            Stride size of conv layers in the reference encoder.
        gru_layers (int, optional): 
            The number of GRU layers in the reference encoder.
        gru_units (int, optional):
            The number of GRU units in the reference encoder.

    Todo:
        * Support manual weight specification in inference.

    """

    @typechecked
    def __init__(
            self,
            idim: int=80,
            gst_tokens: int=10,
            gst_token_dim: int=256,
            gst_heads: int=4,
            conv_layers: int=6,
            conv_chans_list: Sequence[int]=(32, 32, 64, 64, 128, 128),
            conv_kernel_size: int=3,
            conv_stride: int=2,
            gru_layers: int=1,
            gru_units: int=128, ):
        """Initilize global style encoder module."""
        super().__init__()

        self.ref_enc = ReferenceEncoder(
            idim=idim,
            conv_layers=conv_layers,
            conv_chans_list=conv_chans_list,
            conv_kernel_size=conv_kernel_size,
            conv_stride=conv_stride,
            gru_layers=gru_layers,
            gru_units=gru_units, )
        self.stl = StyleTokenLayer(
            ref_embed_dim=gru_units,
            gst_tokens=gst_tokens,
            gst_token_dim=gst_token_dim,
            gst_heads=gst_heads, )

    def forward(self, speech: paddle.Tensor) -> paddle.Tensor:
        """Calculate forward propagation.

        Args:
            speech (Tensor): 
                Batch of padded target features (B, Lmax, odim).

        Returns: 
            Tensor: Style token embeddings (B, token_dim).

        """
        ref_embs = self.ref_enc(speech)
        style_embs = self.stl(ref_embs)

        return style_embs


class ReferenceEncoder(nn.Layer):
    """Reference encoder module.

    This module is refernece encoder introduced in `Style Tokens: Unsupervised Style
    Modeling, Control and Transfer in End-to-End Speech Synthesis`.

    .. _`Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End
        Speech Synthesis`: https://arxiv.org/abs/1803.09017
    
    Args:
        idim (int, optional): 
            Dimension of the input mel-spectrogram.
        conv_layers (int, optional): 
            The number of conv layers in the reference encoder.
        conv_chans_list: (Sequence[int], optional): 
            List of the number of channels of conv layers in the referece encoder.
        conv_kernel_size (int, optional): 
            Kernal size of conv layers in the reference encoder.
        conv_stride (int, optional): 
            Stride size of conv layers in the reference encoder.
        gru_layers (int, optional): 
            The number of GRU layers in the reference encoder.
        gru_units (int, optional): 
            The number of GRU units in the reference encoder.

    """

    @typechecked
    def __init__(
            self,
            idim=80,
            conv_layers: int=6,
            conv_chans_list: Sequence[int]=(32, 32, 64, 64, 128, 128),
            conv_kernel_size: int=3,
            conv_stride: int=2,
            gru_layers: int=1,
            gru_units: int=128, ):
        """Initilize reference encoder module."""
        super().__init__()

        # check hyperparameters are valid
        assert conv_kernel_size % 2 == 1, "kernel size must be odd."
        assert (
            len(conv_chans_list) == conv_layers
        ), "the number of conv layers and length of channels list must be the same."

        convs = []
        padding = (conv_kernel_size - 1) // 2
        for i in range(conv_layers):
            conv_in_chans = 1 if i == 0 else conv_chans_list[i - 1]
            conv_out_chans = conv_chans_list[i]
            convs += [
                nn.Conv2D(
                    conv_in_chans,
                    conv_out_chans,
                    kernel_size=conv_kernel_size,
                    stride=conv_stride,
                    padding=padding,
                    # Do not use bias due to the following batch norm
                    bias_attr=False, ),
                nn.BatchNorm2D(conv_out_chans),
                nn.ReLU(),
            ]
        self.convs = nn.Sequential(*convs)

        self.conv_layers = conv_layers
        self.kernel_size = conv_kernel_size
        self.stride = conv_stride
        self.padding = padding

        # get the number of GRU input units
        gru_in_units = idim
        for i in range(conv_layers):
            gru_in_units = (gru_in_units - conv_kernel_size + 2 * padding
                            ) // conv_stride + 1
        gru_in_units *= conv_out_chans
        self.gru = nn.GRU(gru_in_units, gru_units, gru_layers, time_major=False)

    def forward(self, speech: paddle.Tensor) -> paddle.Tensor:
        """Calculate forward propagation.
        Args:
            speech (Tensor): 
                Batch of padded target features (B, Lmax, idim).

        Returns:
            Tensor: Reference embedding (B, gru_units)

        """
        batch_size = speech.shape[0]
        # (B, 1, Lmax, idim)
        xs = speech.unsqueeze(1)
        # (B, Lmax', conv_out_chans, idim')
        hs = self.convs(xs).transpose([0, 2, 1, 3])
        time_length = hs.shape[1]
        # (B, Lmax', gru_units)
        hs = hs.reshape(shape=[batch_size, time_length, -1])
        self.gru.flatten_parameters()
        # (gru_layers, batch_size, gru_units)
        _, ref_embs = self.gru(hs)
        # (batch_size, gru_units)
        ref_embs = ref_embs[-1]

        return ref_embs


class StyleTokenLayer(nn.Layer):
    """Style token layer module.

    This module is style token layer introduced in `Style Tokens: Unsupervised Style
    Modeling, Control and Transfer in End-to-End Speech Synthesis`.

    .. _`Style Tokens: Unsupervised Style Modeling, Control and Transfer in End-to-End
        Speech Synthesis`: https://arxiv.org/abs/1803.09017
    Args:
        ref_embed_dim (int, optional): 
            Dimension of the input reference embedding.
        gst_tokens (int, optional): 
            The number of GST embeddings.
        gst_token_dim (int, optional): 
            Dimension of each GST embedding.
        gst_heads (int, optional): 
            The number of heads in GST multihead attention.
        dropout_rate (float, optional): 
            Dropout rate in multi-head attention.

    """

    @typechecked
    def __init__(
            self,
            ref_embed_dim: int=128,
            gst_tokens: int=10,
            gst_token_dim: int=256,
            gst_heads: int=4,
            dropout_rate: float=0.0, ):
        """Initilize style token layer module."""
        super().__init__()

        gst_embs = paddle.randn(shape=[gst_tokens, gst_token_dim // gst_heads])
        self.gst_embs = paddle.create_parameter(
            shape=gst_embs.shape,
            dtype=str(gst_embs.numpy().dtype),
            default_initializer=paddle.nn.initializer.Assign(gst_embs))
        self.mha = MultiHeadedAttention(
            q_dim=ref_embed_dim,
            k_dim=gst_token_dim // gst_heads,
            v_dim=gst_token_dim // gst_heads,
            n_head=gst_heads,
            n_feat=gst_token_dim,
            dropout_rate=dropout_rate, )

    def forward(self, ref_embs: paddle.Tensor) -> paddle.Tensor:
        """Calculate forward propagation.

        Args:
            ref_embs (Tensor):
                Reference embeddings (B, ref_embed_dim).

        Returns: 
            Tensor: Style token embeddings (B, gst_token_dim).

        """
        batch_size = ref_embs.shape[0]
        # (num_tokens, token_dim) -> (batch_size, num_tokens, token_dim)
        gst_embs = paddle.tanh(self.gst_embs).unsqueeze(0).expand(
            [batch_size, -1, -1])
        # (batch_size, 1 ,ref_embed_dim)
        ref_embs = ref_embs.unsqueeze(1)
        style_embs = self.mha(ref_embs, gst_embs, gst_embs, None)

        return style_embs.squeeze(1)


class MultiHeadedAttention(BaseMultiHeadedAttention):
    """Multi head attention module with different input dimension."""

    def __init__(self, q_dim, k_dim, v_dim, n_head, n_feat, dropout_rate=0.0):
        """Initialize multi head attention module."""
        # Do not use super().__init__() here since we want to
        # overwrite BaseMultiHeadedAttention.__init__() method.
        nn.Layer.__init__(self)
        assert n_feat % n_head == 0
        # We assume d_v always equals d_k
        self.d_k = n_feat // n_head
        self.h = n_head
        self.linear_q = nn.Linear(q_dim, n_feat)
        self.linear_k = nn.Linear(k_dim, n_feat)
        self.linear_v = nn.Linear(v_dim, n_feat)
        self.linear_out = nn.Linear(n_feat, n_feat)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout_rate)


================================================
FILE: paddlespeech/t2s/modules/tacotron2/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/modules/tacotron2/attentions.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Attention modules for RNN."""
import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.t2s.modules.masked_fill import masked_fill
from paddlespeech.t2s.modules.nets_utils import make_pad_mask


def _apply_attention_constraint(e,
                                last_attended_idx,
                                backward_window=1,
                                forward_window=3):
    """Apply monotonic attention constraint.

    This function apply the monotonic attention constraint
    introduced in `Deep Voice 3: Scaling
    Text-to-Speech with Convolutional Sequence Learning`_.

    Args:
        e(Tensor): 
            Attention energy before applying softmax (1, T).
        last_attended_idx(int): 
            The index of the inputs of the last attended [0, T].
        backward_window(int, optional, optional): 
            Backward window size in attention constraint. (Default value = 1)
        forward_window(int, optional, optional): 
            Forward window size in attetion constraint. (Default value = 3)

    Returns:
        Tensor: Monotonic constrained attention energy (1, T).

    .. _`Deep Voice 3: Scaling Text-to-Speech with Convolutional Sequence Learning`:
        https://arxiv.org/abs/1710.07654

    """
    # for dygraph to static graph
    # if e.shape[0] != 1:
    #     raise NotImplementedError(
    #         "Batch attention constraining is not yet supported.")
    backward_idx = paddle.cast(
        last_attended_idx - backward_window, dtype='int64')
    forward_idx = paddle.cast(last_attended_idx + forward_window, dtype='int64')
    if backward_idx > 0:
        e[:, :backward_idx] = -float("inf")
    if forward_idx < paddle.shape(e)[1]:
        e[:, forward_idx:] = -float("inf")
    return e


class AttLoc(nn.Layer):
    """location-aware attention module.

    Reference: Attention-Based Models for Speech Recognition
        (https://arxiv.org/pdf/1506.07503.pdf)

    Args:
        eprojs (int): 
            projection-units of encoder
        dunits (int): 
            units of decoder
        att_dim (int): 
            attention dimension
        aconv_chans (int): 
            channels of attention convolution
        aconv_filts (int): 
            filter size of attention convolution
        han_mode (bool): 
            flag to swith on mode of hierarchical attention and not store pre_compute_enc_h
    """

    def __init__(self,
                 eprojs,
                 dunits,
                 att_dim,
                 aconv_chans,
                 aconv_filts,
                 han_mode=False):
        super().__init__()
        self.mlp_enc = nn.Linear(eprojs, att_dim)
        self.mlp_dec = nn.Linear(dunits, att_dim, bias_attr=False)
        self.mlp_att = nn.Linear(aconv_chans, att_dim, bias_attr=False)
        self.loc_conv = nn.Conv2D(
            1,
            aconv_chans,
            (1, 2 * aconv_filts + 1),
            padding=(0, aconv_filts),
            bias_attr=False, )
        self.gvec = nn.Linear(att_dim, 1)

        self.dunits = dunits
        self.eprojs = eprojs
        self.att_dim = att_dim
        self.h_length = None
        self.enc_h = None
        self.pre_compute_enc_h = None
        self.mask = None
        self.han_mode = han_mode

    def reset(self):
        """reset states"""
        self.h_length = None
        self.enc_h = None
        self.pre_compute_enc_h = None
        self.mask = None

    def forward(
            self,
            enc_hs_pad,
            enc_hs_len,
            dec_z,
            att_prev,
            scaling=2.0,
            last_attended_idx=-1,
            backward_window=1,
            forward_window=3, ):
        """Calculate AttLoc forward propagation.
        Args:
            enc_hs_pad(Tensor): 
                padded encoder hidden state (B, T_max, D_enc)
            enc_hs_len(Tensor): 
                padded encoder hidden state length (B)
            dec_z(Tensor dec_z): 
                decoder hidden state (B, D_dec)
            att_prev(Tensor): 
                previous attention weight (B, T_max)
            scaling(float, optional): 
                scaling parameter before applying softmax (Default value = 2.0)
            forward_window(Tensor, optional): 
                    forward window size when constraining attention (Default value = 3)
            last_attended_idx(int, optional): 
                index of the inputs of the last attended (Default value = None)
            backward_window(int, optional): 
                backward window size in attention constraint (Default value = 1)
            forward_window(int, optional): 
                    forward window size in attetion constraint (Default value = 3)
        Returns:
            Tensor: 
                attention weighted encoder state (B, D_enc)
            Tensor: 
                previous attention weights (B, T_max)
        """
        batch = paddle.shape(enc_hs_pad)[0]
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None or self.han_mode:
            # (utt, frame, hdim)
            self.enc_h = enc_hs_pad
            self.h_length = paddle.shape(self.enc_h)[1]
            # (utt, frame, att_dim)
            self.pre_compute_enc_h = self.mlp_enc(self.enc_h)

        if dec_z is None:
            dec_z = paddle.zeros([batch, self.dunits])
        else:
            dec_z = dec_z.reshape([batch, self.dunits])

        # initialize attention weight with uniform dist.
        if paddle.sum(att_prev) == 0:
            # if no bias, 0 0-pad goes 0
            att_prev = 1.0 - make_pad_mask(enc_hs_len)
            att_prev = att_prev / enc_hs_len.unsqueeze(-1).astype(
                att_prev.dtype)
        else:
            att_prev = att_prev.unsqueeze(-1)

        # att_prev: (utt, frame) -> (utt, 1, 1, frame)
        # -> (utt, att_conv_chans, 1, frame)
        att_conv = self.loc_conv(att_prev.reshape([batch, 1, 1, self.h_length]))
        # att_conv: (utt, att_conv_chans, 1, frame) -> (utt, frame, att_conv_chans)
        att_conv = att_conv.squeeze(2).transpose([0, 2, 1])
        # att_conv: (utt, frame, att_conv_chans) -> (utt, frame, att_dim)
        att_conv = self.mlp_att(att_conv)
        # dec_z_tiled: (utt, frame, att_dim)        
        dec_z_tiled = self.mlp_dec(dec_z).reshape([batch, 1, self.att_dim])

        # dot with gvec
        # (utt, frame, att_dim) -> (utt, frame)
        e = paddle.tanh(att_conv + self.pre_compute_enc_h + dec_z_tiled)
        e = self.gvec(e).squeeze(2)

        # NOTE: consider zero padding when compute w.
        if self.mask is None:
            self.mask = make_pad_mask(enc_hs_len)

        e = masked_fill(e, self.mask, -float("inf"))
        # apply monotonic attention constraint (mainly for TTS)
        if last_attended_idx != -1:
            e = _apply_attention_constraint(e, last_attended_idx,
                                            backward_window, forward_window)

        w = F.softmax(scaling * e, axis=1)

        # weighted sum over frames
        # utt x hdim
        c = paddle.sum(
            self.enc_h * w.reshape([batch, self.h_length, 1]), axis=1)
        return c, w


class AttForward(nn.Layer):
    """Forward attention module.
    Reference
    ----------
    Forward attention in sequence-to-sequence acoustic modeling for speech synthesis
        (https://arxiv.org/pdf/1807.06736.pdf)

    Args:
        eprojs (int): 
            projection-units of encoder
        dunits (int): 
            units of decoder
        att_dim (int): 
            attention dimension
        aconv_chans (int): 
            channels of attention convolution
        aconv_filts (int): 
            filter size of attention convolution
    """

    def __init__(self, eprojs, dunits, att_dim, aconv_chans, aconv_filts):
        super().__init__()
        self.mlp_enc = nn.Linear(eprojs, att_dim)
        self.mlp_dec = nn.Linear(dunits, att_dim, bias_attr=False)
        self.mlp_att = nn.Linear(aconv_chans, att_dim, bias_attr=False)
        self.loc_conv = nn.Conv2D(
            1,
            aconv_chans,
            (1, 2 * aconv_filts + 1),
            padding=(0, aconv_filts),
            bias_attr=False, )
        self.gvec = nn.Linear(att_dim, 1)
        self.dunits = dunits
        self.eprojs = eprojs
        self.att_dim = att_dim
        self.h_length = None
        self.enc_h = None
        self.pre_compute_enc_h = None
        self.mask = None

    def reset(self):
        """reset states"""
        self.h_length = None
        self.enc_h = None
        self.pre_compute_enc_h = None
        self.mask = None

    def forward(
            self,
            enc_hs_pad,
            enc_hs_len,
            dec_z,
            att_prev,
            scaling=1.0,
            last_attended_idx=None,
            backward_window=1,
            forward_window=3, ):
        """Calculate AttForward forward propagation.

        Args:
            enc_hs_pad(Tensor): 
                padded encoder hidden state (B, T_max, D_enc)
            enc_hs_len(list): 
                padded encoder hidden state length (B,)
            dec_z(Tensor): 
                decoder hidden state (B, D_dec)
            att_prev(Tensor): 
                attention weights of previous step (B, T_max)
            scaling(float, optional): 
                scaling parameter before applying softmax (Default value = 1.0)
            last_attended_idx(int, optional): 
                index of the inputs of the last attended (Default value = None)
            backward_window(int, optional): 
                backward window size in attention constraint (Default value = 1)
            forward_window(int, optional):  
                (Default value = 3)

        Returns:
            Tensor: 
                attention weighted encoder state (B, D_enc)
            Tensor: 
                previous attention weights (B, T_max)
        """
        batch = len(enc_hs_pad)
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = enc_hs_pad  # utt x frame x hdim
            self.h_length = paddle.shape(self.enc_h)[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = self.mlp_enc(self.enc_h)

        if dec_z is None:
            dec_z = paddle.zeros([batch, self.dunits])
        else:
            dec_z = dec_z.reshape([batch, self.dunits])

        if att_prev is None:
            # initial attention will be [1, 0, 0, ...]
            att_prev = paddle.zeros([*paddle.shape(enc_hs_pad)[:2]])
            att_prev[:, 0] = 1.0

        # att_prev: utt x frame -> utt x 1 x 1 x frame
        # -> utt x att_conv_chans x 1 x frame
        att_conv = self.loc_conv(att_prev.reshape([batch, 1, 1, self.h_length]))
        # att_conv: utt x att_conv_chans x 1 x frame -> utt x frame x att_conv_chans
        att_conv = att_conv.squeeze(2).transpose([0, 2, 1])
        # att_conv: utt x frame x att_conv_chans -> utt x frame x att_dim
        att_conv = self.mlp_att(att_conv)

        # dec_z_tiled: utt x frame x att_dim
        dec_z_tiled = self.mlp_dec(dec_z).unsqueeze(1)

        # dot with gvec
        # utt x frame x att_dim -> utt x frame
        e = self.gvec(
            paddle.tanh(self.pre_compute_enc_h + dec_z_tiled +
                        att_conv)).squeeze(2)

        # NOTE: consider zero padding when compute w.
        if self.mask is None:
            self.mask = make_pad_mask(enc_hs_len)
        e = masked_fill(e, self.mask, -float("inf"))

        # apply monotonic attention constraint (mainly for TTS)
        if last_attended_idx is not None:
            e = _apply_attention_constraint(e, last_attended_idx,
                                            backward_window, forward_window)

        w = F.softmax(scaling * e, axis=1)

        # forward attention
        att_prev_shift = F.pad(att_prev, (0, 0, 1, 0))[:, :-1]

        w = (att_prev + att_prev_shift) * w
        # NOTE: clip is needed to avoid nan gradient
        w = F.normalize(paddle.clip(w, 1e-6), p=1, axis=1)

        # weighted sum over flames
        # utt x hdim
        # NOTE use bmm instead of sum(*)
        c = paddle.sum(self.enc_h * w.unsqueeze(-1), axis=1)

        return c, w


class AttForwardTA(nn.Layer):
    """Forward attention with transition agent module.
    Reference:
        Forward attention in sequence-to-sequence acoustic modeling for speech synthesis
            (https://arxiv.org/pdf/1807.06736.pdf)

    Args:
        eunits (int): 
            units of encoder
        dunits (int): 
            units of decoder
        att_dim (int): 
            attention dimension
        aconv_chans (int):  
            channels of attention convolution
        aconv_filts (int): 
            filter size of attention convolution
        odim (int): 
            output dimension
    """

    def __init__(self, eunits, dunits, att_dim, aconv_chans, aconv_filts, odim):
        super().__init__()
        self.mlp_enc = nn.Linear(eunits, att_dim)
        self.mlp_dec = nn.Linear(dunits, att_dim, bias_attr=False)
        self.mlp_ta = nn.Linear(eunits + dunits + odim, 1)
        self.mlp_att = nn.Linear(aconv_chans, att_dim, bias_attr=False)
        self.loc_conv = nn.Conv2D(
            1,
            aconv_chans,
            (1, 2 * aconv_filts + 1),
            padding=(0, aconv_filts),
            bias_attr=False, )
        self.gvec = nn.Linear(att_dim, 1)
        self.dunits = dunits
        self.eunits = eunits
        self.att_dim = att_dim
        self.h_length = None
        self.enc_h = None
        self.pre_compute_enc_h = None
        self.mask = None
        self.trans_agent_prob = 0.5

    def reset(self):
        self.h_length = None
        self.enc_h = None
        self.pre_compute_enc_h = None
        self.mask = None
        self.trans_agent_prob = 0.5

    def forward(
            self,
            enc_hs_pad,
            enc_hs_len,
            dec_z,
            att_prev,
            out_prev,
            scaling=1.0,
            last_attended_idx=None,
            backward_window=1,
            forward_window=3, ):
        """Calculate AttForwardTA forward propagation.

        Args:
            enc_hs_pad(Tensor): 
                padded encoder hidden state (B, Tmax, eunits)
            enc_hs_len(list Tensor): 
                padded encoder hidden state length (B,)
            dec_z(Tensor): 
                decoder hidden state (B, dunits)
            att_prev(Tensor): 
                attention weights of previous step (B, T_max)
            out_prev(Tensor): 
                decoder outputs of previous step (B, odim)
            scaling(float, optional): 
                scaling parameter before applying softmax (Default value = 1.0)
            last_attended_idx(int, optional): 
                index of the inputs of the last attended (Default value = None)
            backward_window(int, optional): 
                backward window size in attention constraint (Default value = 1)
            forward_window(int, optional):  
                (Default value = 3)

        Returns:
            Tensor: 
                attention weighted encoder state (B, dunits)
            Tensor: 
                previous attention weights (B, Tmax)
        """
        batch = len(enc_hs_pad)
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = enc_hs_pad  # utt x frame x hdim
            self.h_length = paddle.shape(self.enc_h)[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = self.mlp_enc(self.enc_h)

        if dec_z is None:
            dec_z = paddle.zeros([batch, self.dunits])
        else:
            dec_z = dec_z.reshape([batch, self.dunits])

        if att_prev is None:
            # initial attention will be [1, 0, 0, ...]
            att_prev = paddle.zeros([*paddle.shape(enc_hs_pad)[:2]])
            att_prev[:, 0] = 1.0

        # att_prev: utt x frame -> utt x 1 x 1 x frame
        # -> utt x att_conv_chans x 1 x frame
        att_conv = self.loc_conv(att_prev.reshape([batch, 1, 1, self.h_length]))
        # att_conv: utt x att_conv_chans x 1 x frame -> utt x frame x att_conv_chans
        att_conv = att_conv.squeeze(2).transpose([0, 2, 1])
        # att_conv: utt x frame x att_conv_chans -> utt x frame x att_dim
        att_conv = self.mlp_att(att_conv)

        # dec_z_tiled: utt x frame x att_dim
        dec_z_tiled = self.mlp_dec(dec_z).reshape([batch, 1, self.att_dim])

        # dot with gvec
        # utt x frame x att_dim -> utt x frame
        e = self.gvec(
            paddle.tanh(att_conv + self.pre_compute_enc_h +
                        dec_z_tiled)).squeeze(2)

        # NOTE consider zero padding when compute w.
        if self.mask is None:
            self.mask = make_pad_mask(enc_hs_len)
        e = masked_fill(e, self.mask, -float("inf"))

        # apply monotonic attention constraint (mainly for TTS)
        if last_attended_idx is not None:
            e = _apply_attention_constraint(e, last_attended_idx,
                                            backward_window, forward_window)

        w = F.softmax(scaling * e, axis=1)

        # forward attention
        # att_prev_shift = F.pad(att_prev.unsqueeze(0), (1, 0), data_format='NCL').squeeze(0)[:, :-1]
        att_prev_shift = F.pad(att_prev, (0, 0, 1, 0))[:, :-1]
        w = (self.trans_agent_prob * att_prev +
             (1 - self.trans_agent_prob) * att_prev_shift) * w
        # NOTE: clip is needed to avoid nan gradient
        w = F.normalize(paddle.clip(w, 1e-6), p=1, axis=1)

        # weighted sum over flames
        # utt x hdim
        # NOTE use bmm instead of sum(*)
        c = paddle.sum(
            self.enc_h * w.reshape([batch, self.h_length, 1]), axis=1)

        # update transition agent prob
        self.trans_agent_prob = F.sigmoid(
            self.mlp_ta(paddle.concat([c, out_prev, dec_z], axis=1)))

        return c, w


================================================
FILE: paddlespeech/t2s/modules/tacotron2/decoder.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Tacotron2 decoder related modules."""
import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.t2s.modules.tacotron2.attentions import AttForwardTA


class Prenet(nn.Layer):
    """Prenet module for decoder of Spectrogram prediction network.

    This is a module of Prenet in the decoder of Spectrogram prediction network,
    which described in `Natural TTS
    Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions`_.
    The Prenet preforms nonlinear conversion
    of inputs before input to auto-regressive lstm,
    which helps to learn diagonal attentions.

    Notes
    ----------
    This module alway applies dropout even in evaluation.
    See the detail in `Natural TTS Synthesis by
    Conditioning WaveNet on Mel Spectrogram Predictions`_.

    .. _`Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions`:
       https://arxiv.org/abs/1712.05884

    """

    def __init__(self, idim, n_layers=2, n_units=256, dropout_rate=0.5):
        """Initialize prenet module.

        Args:
            idim (int): 
                Dimension of the inputs.
            odim (int): 
                Dimension of the outputs.
            n_layers (int, optional): 
                The number of prenet layers.
            n_units (int, optional): 
                The number of prenet units.
        """
        super().__init__()
        self.dropout_rate = dropout_rate
        self.prenet = nn.LayerList()
        for layer in range(n_layers):
            n_inputs = idim if layer == 0 else n_units
            self.prenet.append(
                nn.Sequential(nn.Linear(n_inputs, n_units), nn.ReLU()))

    def forward(self, x):
        """Calculate forward propagation.

        Args:
            x (Tensor): 
                Batch of input tensors (B, ..., idim).

        Returns: 
            Tensor: Batch of output tensors (B, ..., odim).

        """
        for i in range(len(self.prenet)):
            # F.dropout 引入了随机, tacotron2 的 dropout 是不能去掉的
            x = F.dropout(self.prenet[i](x))
        return x


class Postnet(nn.Layer):
    """Postnet module for Spectrogram prediction network.

    This is a module of Postnet in Spectrogram prediction network,
    which described in `Natural TTS Synthesis by
    Conditioning WaveNet on Mel Spectrogram Predictions`_.
    The Postnet predicts refines the predicted
    Mel-filterbank of the decoder,
    which helps to compensate the detail sturcture of spectrogram.

    .. _`Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions`:
       https://arxiv.org/abs/1712.05884

    """

    def __init__(
            self,
            idim,
            odim,
            n_layers=5,
            n_chans=512,
            n_filts=5,
            dropout_rate=0.5,
            use_batch_norm=True, ):
        """Initialize postnet module.

        Args:
            idim (int): Dimension of the inputs.
            odim (int): Dimension of the outputs.
            n_layers (int, optional): The number of layers.
            n_filts (int, optional): The number of filter size.
            n_units (int, optional): The number of filter channels.
            use_batch_norm (bool, optional): Whether to use batch normalization..
            dropout_rate (float, optional): Dropout rate..
        """
        super().__init__()
        self.postnet = nn.LayerList()
        for layer in range(n_layers - 1):
            ichans = odim if layer == 0 else n_chans
            ochans = odim if layer == n_layers - 1 else n_chans
            if use_batch_norm:
                self.postnet.append(
                    nn.Sequential(
                        nn.Conv1D(
                            ichans,
                            ochans,
                            n_filts,
                            stride=1,
                            padding=(n_filts - 1) // 2,
                            bias_attr=False, ),
                        nn.BatchNorm1D(ochans),
                        nn.Tanh(),
                        nn.Dropout(dropout_rate), ))
            else:
                self.postnet.append(
                    nn.Sequential(
                        nn.Conv1D(
                            ichans,
                            ochans,
                            n_filts,
                            stride=1,
                            padding=(n_filts - 1) // 2,
                            bias_attr=False, ),
                        nn.Tanh(),
                        nn.Dropout(dropout_rate), ))
        ichans = n_chans if n_layers != 1 else odim
        if use_batch_norm:
            self.postnet.append(
                nn.Sequential(
                    nn.Conv1D(
                        ichans,
                        odim,
                        n_filts,
                        stride=1,
                        padding=(n_filts - 1) // 2,
                        bias_attr=False, ),
                    nn.BatchNorm1D(odim),
                    nn.Dropout(dropout_rate), ))
        else:
            self.postnet.append(
                nn.Sequential(
                    nn.Conv1D(
                        ichans,
                        odim,
                        n_filts,
                        stride=1,
                        padding=(n_filts - 1) // 2,
                        bias_attr=False, ),
                    nn.Dropout(dropout_rate), ))

    def forward(self, xs):
        """Calculate forward propagation.

        Args:
            xs (Tensor): Batch of the sequences of padded input tensors (B, idim, Tmax).
        Returns:
            Tensor: Batch of padded output tensor. (B, odim, Tmax).
        """
        for i in range(len(self.postnet)):
            xs = self.postnet[i](xs)
        return xs


class ZoneOutCell(nn.Layer):
    """ZoneOut Cell module.
    This is a module of zoneout described in
    `Zoneout: Regularizing RNNs by Randomly Preserving Hidden Activations`_.
    This code is modified from `eladhoffer/seq2seq.pytorch`_.
    Examples
    ----------
        >>> lstm = paddle.nn.LSTMCell(16, 32)
        >>> lstm = ZoneOutCell(lstm, 0.5)
    .. _`Zoneout: Regularizing RNNs by Randomly Preserving Hidden Activations`:
        https://arxiv.org/abs/1606.01305
    .. _`eladhoffer/seq2seq.pytorch`:
        https://github.com/eladhoffer/seq2seq.pytorch
    """

    def __init__(self, cell, zoneout_rate=0.1):
        """Initialize zone out cell module.

        Args:
            cell (nn.Layer): Paddle recurrent cell module
                e.g. `paddle.nn.LSTMCell`.
            zoneout_rate (float, optional): Probability of zoneout from 0.0 to 1.0.
        """
        super().__init__()
        self.cell = cell
        self.hidden_size = cell.hidden_size
        self.zoneout_rate = zoneout_rate
        if zoneout_rate > 1.0 or zoneout_rate < 0.0:
            raise ValueError(
                "zoneout probability must be in the range from 0.0 to 1.0.")

    def forward(self, inputs, hidden):
        """Calculate forward propagation.

        Args:
            inputs (Tensor): 
                Batch of input tensor (B, input_size).
            hidden (tuple):
                - Tensor: Batch of initial hidden states (B, hidden_size).
                - Tensor: Batch of initial cell states (B, hidden_size).
        Returns:
            Tensor:
                Batch of next hidden states (B, hidden_size).
            tuple:
                - Tensor: Batch of next hidden states (B, hidden_size).
                - Tensor: Batch of next cell states (B, hidden_size).
        """
        # we only use the second output of LSTMCell in paddle
        _, next_hidden = self.cell(inputs, hidden)
        next_hidden = self._zoneout(hidden, next_hidden, self.zoneout_rate)
        # to have the same output format with LSTMCell in paddle
        return next_hidden[0], next_hidden

    def _zoneout(self, h, next_h, prob):
        # apply recursively
        if isinstance(h, tuple):
            num_h = len(h)
            if not isinstance(prob, tuple):
                prob = tuple([prob] * num_h)
            return tuple(
                [self._zoneout(h[i], next_h[i], prob[i]) for i in range(num_h)])
        if self.training:
            mask = paddle.bernoulli(paddle.ones([*paddle.shape(h)]) * prob)
            return mask * h + (1 - mask) * next_h
        else:
            return prob * h + (1 - prob) * next_h


class Decoder(nn.Layer):
    """Decoder module of Spectrogram prediction network.
    This is a module of decoder of Spectrogram prediction network in Tacotron2,
    which described in `Natural TTS
    Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions`_.
    The decoder generates the sequence of
    features from the sequence of the hidden states.
    .. _`Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions`:
       https://arxiv.org/abs/1712.05884
    """

    def __init__(
            self,
            idim,
            odim,
            att,
            dlayers=2,
            dunits=1024,
            prenet_layers=2,
            prenet_units=256,
            postnet_layers=5,
            postnet_chans=512,
            postnet_filts=5,
            output_activation_fn=None,
            cumulate_att_w=True,
            use_batch_norm=True,
            use_concate=True,
            dropout_rate=0.5,
            zoneout_rate=0.1,
            reduction_factor=1, ):
        """Initialize Tacotron2 decoder module.

        Args:
            idim (int): 
                Dimension of the inputs.
            odim (int): 
                Dimension of the outputs.
            att (nn.Layer): 
                Instance of attention class.
            dlayers (int, optional): 
                The number of decoder lstm layers.
            dunits (int, optional): 
                The number of decoder lstm units.
            prenet_layers (int, optional): 
                The number of prenet layers.
            prenet_units (int, optional): 
                The number of prenet units.
            postnet_layers (int, optional): 
                The number of postnet layers.
            postnet_filts (int, optional): 
                The number of postnet filter size.
            postnet_chans (int, optional): 
                The number of postnet filter channels.
            output_activation_fn (nn.Layer, optional): 
                Activation function for outputs.
            cumulate_att_w (bool, optional): 
                Whether to cumulate previous attention weight.
            use_batch_norm (bool, optional): 
                Whether to use batch normalization.
            use_concate (bool, optional):
                Whether to concatenate encoder embedding with decoder lstm outputs.
            dropout_rate (float, optional):
                Dropout rate.
            zoneout_rate (float, optional):
                Zoneout rate.
            reduction_factor (int, optional):
                Reduction factor.
        """
        super().__init__()

        # store the hyperparameters
        self.idim = idim
        self.odim = odim
        self.att = att
        self.output_activation_fn = output_activation_fn
        self.cumulate_att_w = cumulate_att_w
        self.use_concate = use_concate
        self.reduction_factor = reduction_factor

        # check attention type
        if isinstance(self.att, AttForwardTA):
            self.use_att_extra_inputs = True
        else:
            self.use_att_extra_inputs = False

        # define lstm network
        prenet_units = prenet_units if prenet_layers != 0 else odim
        self.lstm = nn.LayerList()
        for layer in range(dlayers):
            iunits = idim + prenet_units if layer == 0 else dunits
            lstm = nn.LSTMCell(iunits, dunits)
            if zoneout_rate > 0.0:
                lstm = ZoneOutCell(lstm, zoneout_rate)
            self.lstm.append(lstm)

        # define prenet
        if prenet_layers > 0:
            self.prenet = Prenet(
                idim=odim,
                n_layers=prenet_layers,
                n_units=prenet_units,
                dropout_rate=dropout_rate, )
        else:
            self.prenet = None

        # define postnet
        if postnet_layers > 0:
            self.postnet = Postnet(
                idim=idim,
                odim=odim,
                n_layers=postnet_layers,
                n_chans=postnet_chans,
                n_filts=postnet_filts,
                use_batch_norm=use_batch_norm,
                dropout_rate=dropout_rate, )
        else:
            self.postnet = None

        # define projection layers
        iunits = idim + dunits if use_concate else dunits
        self.feat_out = nn.Linear(
            iunits, odim * reduction_factor, bias_attr=False)
        self.prob_out = nn.Linear(iunits, reduction_factor)

    def _zero_state(self, hs):
        init_hs = paddle.zeros([paddle.shape(hs)[0], self.lstm[0].hidden_size])
        return init_hs

    def forward(self, hs, hlens, ys):
        """Calculate forward propagation.

        Args:
            hs (Tensor): 
                Batch of the sequences of padded hidden states (B, Tmax, idim).
            hlens (Tensor(int64) padded): 
                Batch of lengths of each input batch (B,).
            ys (Tensor): 
                Batch of the sequences of padded target features (B, Lmax, odim).

        Returns:
            Tensor: 
                Batch of output tensors after postnet (B, Lmax, odim).
            Tensor: 
                Batch of output tensors before postnet (B, Lmax, odim).
            Tensor: 
                Batch of logits of stop prediction (B, Lmax).
            Tensor: 
                Batch of attention weights (B, Lmax, Tmax).
            
        Note: 
            This computation is performed in teacher-forcing manner.
        """
        # thin out frames (B, Lmax, odim) ->  (B, Lmax/r, odim)
        if self.reduction_factor > 1:
            ys = ys[:, self.reduction_factor - 1::self.reduction_factor]

        # length list should be list of int
        # hlens = list(map(int, hlens))

        # initialize hidden states of decoder
        c_list = [self._zero_state(hs)]
        z_list = [self._zero_state(hs)]
        for _ in range(1, len(self.lstm)):
            c_list.append(self._zero_state(hs))
            z_list.append(self._zero_state(hs))
        prev_out = paddle.zeros([paddle.shape(hs)[0], self.odim])

        # initialize attention
        prev_att_ws = []
        prev_att_w = paddle.zeros(paddle.shape(hlens))
        prev_att_ws.append(prev_att_w)
        self.att.reset()

        # loop for an output sequence
        outs, logits, att_ws = [], [], []
        for y in ys.transpose([1, 0, 2]):
            if self.use_att_extra_inputs:
                att_c, att_w = self.att(hs, hlens, z_list[0], prev_att_ws[-1],
                                        prev_out)
            else:
                att_c, att_w = self.att(hs, hlens, z_list[0], prev_att_ws[-1])
            prenet_out = self.prenet(
                prev_out) if self.prenet is not None else prev_out
            xs = paddle.concat([att_c, prenet_out], axis=1)
            # we only use the second output of LSTMCell in paddle
            _, next_hidden = self.lstm[0](xs, (z_list[0], c_list[0]))
            z_list[0], c_list[0] = next_hidden
            for i in range(1, len(self.lstm)):
                # we only use the second output of LSTMCell in paddle
                _, next_hidden = self.lstm[i](z_list[i - 1],
                                              (z_list[i], c_list[i]))
                z_list[i], c_list[i] = next_hidden
            zcs = (paddle.concat([z_list[-1], att_c], axis=1)
                   if self.use_concate else z_list[-1])
            outs.append(
                self.feat_out(zcs).reshape([paddle.shape(hs)[0], self.odim, -1
                                            ]))
            logits.append(self.prob_out(zcs))
            att_ws.append(att_w)
            # teacher forcing
            prev_out = y
            if self.cumulate_att_w and paddle.sum(prev_att_w) != 0:
                prev_att_w = prev_att_w + att_w  # Note: error when use +=
            else:
                prev_att_w = att_w
            prev_att_ws.append(prev_att_w)
        # (B, Lmax)
        logits = paddle.concat(logits, axis=1)
        # (B, odim, Lmax) 
        before_outs = paddle.concat(outs, axis=2)
        # (B, Lmax, Tmax)
        att_ws = paddle.stack(att_ws, axis=1)

        if self.reduction_factor > 1:
            # (B, odim, Lmax)
            before_outs = before_outs.reshape(
                [paddle.shape(before_outs)[0], self.odim, -1])

        if self.postnet is not None:
            # (B, odim, Lmax)
            after_outs = before_outs + self.postnet(before_outs)
        else:
            after_outs = before_outs
        # (B, Lmax, odim)
        before_outs = before_outs.transpose([0, 2, 1])
        # (B, Lmax, odim)
        after_outs = after_outs.transpose([0, 2, 1])
        logits = logits

        # apply activation function for scaling
        if self.output_activation_fn is not None:
            before_outs = self.output_activation_fn(before_outs)
            after_outs = self.output_activation_fn(after_outs)

        return after_outs, before_outs, logits, att_ws

    def inference(
            self,
            h,
            threshold=0.5,
            minlenratio=0.0,
            maxlenratio=10.0,
            use_att_constraint=False,
            backward_window=None,
            forward_window=None, ):
        """Generate the sequence of features given the sequences of characters.
        Args:
            h(Tensor): 
                Input sequence of encoder hidden states (T, C).
            threshold(float, optional, optional): 
                Threshold to stop generation. (Default value = 0.5)
            minlenratio(float, optional, optional): 
                Minimum length ratio. If set to 1.0 and the length of input is 10,
                the minimum length of outputs will be 10 * 1 = 10. (Default value = 0.0)
            maxlenratio(float, optional, optional):
                 Minimum length ratio. If set to 10 and the length of input is 10,
                the maximum length of outputs will be 10 * 10 = 100. (Default value = 0.0)
            use_att_constraint(bool, optional): 
                Whether to apply attention constraint introduced in `Deep Voice 3`_. (Default value = False)
            backward_window(int, optional): 
                Backward window size in attention constraint. (Default value = None)
            forward_window(int, optional):  
                    (Default value = None)

        Returns:
            Tensor: 
                Output sequence of features (L, odim).
            Tensor: 
                Output sequence of stop probabilities (L,).
            Tensor: 
                Attention weights (L, T).

        Note: 
            This computation is performed in auto-regressive manner.
    .. _`Deep Voice 3`: https://arxiv.org/abs/1710.07654
        """
        # setup

        assert len(paddle.shape(h)) == 2
        hs = h.unsqueeze(0)
        ilens = paddle.shape(h)[0]
        # 本来 maxlen 和 minlen 外面有 int()，防止动转静的问题此处删除
        maxlen = paddle.shape(h)[0] * maxlenratio
        minlen = paddle.shape(h)[0] * minlenratio
        # 本来是直接使用 threshold 的，此处为了防止动转静的问题把 threshold 转成 tensor
        threshold = paddle.ones([1]) * threshold

        # initialize hidden states of decoder
        c_list = [self._zero_state(hs)]
        z_list = [self._zero_state(hs)]
        for _ in range(1, len(self.lstm)):
            c_list.append(self._zero_state(hs))
            z_list.append(self._zero_state(hs))
        prev_out = paddle.zeros([1, self.odim])

        # initialize attention
        prev_att_ws = []
        prev_att_w = paddle.zeros([ilens])
        prev_att_ws.append(prev_att_w)

        self.att.reset()

        # setup for attention constraint
        if use_att_constraint:
            last_attended_idx = 0
        else:
            last_attended_idx = -1

        # loop for an output sequence
        idx = 0
        outs, att_ws, probs = [], [], []
        prob = paddle.zeros([1])
        while paddle.to_tensor(True):
            z_list = z_list
            c_list = c_list
            # updated index
            idx += self.reduction_factor

            # decoder calculation
            if self.use_att_extra_inputs:
                att_c, att_w = self.att(
                    hs,
                    ilens,
                    z_list[0],
                    prev_att_ws[-1],
                    prev_out,
                    last_attended_idx=last_attended_idx,
                    backward_window=backward_window,
                    forward_window=forward_window, )
            else:
                att_c, att_w = self.att(
                    hs,
                    ilens,
                    z_list[0],
                    prev_att_ws[-1],
                    last_attended_idx=last_attended_idx,
                    backward_window=backward_window,
                    forward_window=forward_window, )

            att_ws.append(att_w)
            prenet_out = self.prenet(
                prev_out) if self.prenet is not None else prev_out
            xs = paddle.concat([att_c, prenet_out], axis=1)
            # we only use the second output of LSTMCell in paddle
            _, next_hidden = self.lstm[0](xs, (z_list[0], c_list[0]))

            z_list[0], c_list[0] = next_hidden
            for i in range(1, len(self.lstm)):
                # we only use the second output of LSTMCell in paddle
                _, next_hidden = self.lstm[i](z_list[i - 1],
                                              (z_list[i], c_list[i]))
                z_list[i], c_list[i] = next_hidden
            zcs = (paddle.concat([z_list[-1], att_c], axis=1)
                   if self.use_concate else z_list[-1])
            # [(1, odim, r), ...]
            outs.append(self.feat_out(zcs).reshape([1, self.odim, -1]))

            prob = F.sigmoid(self.prob_out(zcs))[0]
            probs.append(prob)

            if self.output_activation_fn is not None:
                prev_out = self.output_activation_fn(
                    outs[-1][:, :, -1])  # (1, odim)
            else:
                prev_out = outs[-1][:, :, -1]  # (1, odim)
            if self.cumulate_att_w and paddle.sum(prev_att_w) != 0:
                prev_att_w = prev_att_w + att_w  # Note: error when use +=
            else:
                prev_att_w = att_w
            prev_att_ws.append(prev_att_w)
            if use_att_constraint:
                last_attended_idx = int(att_w.argmax())

            # tacotron2 ljspeech 动转静的问题应该是这里没有正确判断 prob >= threshold 导致的
            if prob >= threshold or idx >= maxlen:
                # check mininum length
                if idx < minlen:
                    continue
                break
            """
            仅解开 665~667 行的代码块，动转静时会卡死，但是动态图时可以正确生成音频，证明模型没问题
            同时解开 665~667 行 和 668 ~ 670 行的代码块，动转静时不会卡死，但是生成的音频末尾有多余的噪声
            证明动转静没有进入 prob >= threshold 的判断，但是静态图可以进入 prob >= threshold 并退出循环
            动转静时是通过 idx >= maxlen 退出循环（所以没有这个逻辑的时候会一直循环，也就是卡死），
            没有在模型判断该结束的时候结束，而是在超出最大长度时结束，所以合成的音频末尾有很长的额外预测的噪声
            动转静用 prob <= threshold 的条件可以退出循环（虽然结果不正确），证明条件参数的类型本身没问题，可能是 prob 有问题
            """
            # if prob >= threshold:
            #     print("prob >= threshold")
            #     break
            # elif idx >= maxlen:
            #     print("idx >= maxlen")
            #     break

        # (1, odim, L)
        outs = paddle.concat(outs, axis=2)
        if self.postnet is not None:
            # (1, odim, L)
            outs = outs + self.postnet(outs)
        # (L, odim)
        outs = outs.transpose([0, 2, 1]).squeeze(0)
        probs = paddle.concat(probs, axis=0)
        att_ws = paddle.concat(att_ws, axis=0)

        if self.output_activation_fn is not None:
            outs = self.output_activation_fn(outs)

        return outs, probs, att_ws

    def calculate_all_attentions(self, hs, hlens, ys):
        """Calculate all of the attention weights.

        Args:
            hs (Tensor): 
                Batch of the sequences of padded hidden states (B, Tmax, idim).
            hlens (Tensor(int64)): 
                Batch of lengths of each input batch (B,).
            ys (Tensor): 
                Batch of the sequences of padded target features (B, Lmax, odim).

        Returns:
            numpy.ndarray:
                Batch of attention weights (B, Lmax, Tmax).
    
        Note:
            This computation is performed in teacher-forcing manner.
        """
        # thin out frames (B, Lmax, odim) ->  (B, Lmax/r, odim)
        if self.reduction_factor > 1:
            ys = ys[:, self.reduction_factor - 1::self.reduction_factor]

        # length list should be list of int
        hlens = list(map(int, hlens))

        # initialize hidden states of decoder
        c_list = [self._zero_state(hs)]
        z_list = [self._zero_state(hs)]
        for _ in range(1, len(self.lstm)):
            c_list.append(self._zero_state(hs))
            z_list.append(self._zero_state(hs))
        prev_out = paddle.zeros([paddle.shape(hs)[0], self.odim])

        # initialize attention
        prev_att_w = None
        self.att.reset()

        # loop for an output sequence
        att_ws = []
        for y in ys.transpose([1, 0, 2]):
            if self.use_att_extra_inputs:
                att_c, att_w = self.att(hs, hlens, z_list[0], prev_att_w,
                                        prev_out)
            else:
                att_c, att_w = self.att(hs, hlens, z_list[0], prev_att_w)
            att_ws.append(att_w)
            prenet_out = self.prenet(
                prev_out) if self.prenet is not None else prev_out
            xs = paddle.concat([att_c, prenet_out], axis=1)
            # we only use the second output of LSTMCell in paddle
            _, next_hidden = self.lstm[0](xs, (z_list[0], c_list[0]))
            z_list[0], c_list[0] = next_hidden
            for i in range(1, len(self.lstm)):
                z_list[i], c_list[i] = self.lstm[i](z_list[i - 1],
                                                    (z_list[i], c_list[i]))
            # teacher forcing
            prev_out = y
            if self.cumulate_att_w and prev_att_w is not None:
                # Note: error when use +=
                prev_att_w = prev_att_w + att_w
            else:
                prev_att_w = att_w
        # (B, Lmax, Tmax)
        att_ws = paddle.stack(att_ws, axis=1)

        return att_ws


================================================
FILE: paddlespeech/t2s/modules/tacotron2/encoder.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Tacotron2 encoder related modules."""
import paddle
from paddle import nn


class Encoder(nn.Layer):
    """Encoder module of Spectrogram prediction network.

    This is a module of encoder of Spectrogram prediction network in Tacotron2,
    which described in `Natural TTS Synthesis by Conditioning WaveNet on Mel
    Spectrogram Predictions`_. This is the encoder which converts either a sequence
    of characters or acoustic features into the sequence of hidden states.

    .. _`Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions`:
       https://arxiv.org/abs/1712.05884

    """

    def __init__(
            self,
            idim,
            input_layer="embed",
            embed_dim=512,
            elayers=1,
            eunits=512,
            econv_layers=3,
            econv_chans=512,
            econv_filts=5,
            use_batch_norm=True,
            use_residual=False,
            dropout_rate=0.5,
            padding_idx=0, ):
        """Initialize Tacotron2 encoder module.
        Args:
            idim (int): 
                Dimension of the inputs.
            input_layer (str): 
                Input layer type.
            embed_dim (int, optional): 
                Dimension of character embedding.
            elayers (int, optional): 
                The number of encoder blstm layers.
            eunits (int, optional): 
                The number of encoder blstm units.
            econv_layers (int, optional): 
                The number of encoder conv layers.
            econv_filts (int, optional): 
                The number of encoder conv filter size.
            econv_chans (int, optional): 
                The number of encoder conv filter channels.
            use_batch_norm (bool, optional): 
                Whether to use batch normalization.
            use_residual (bool, optional): 
                Whether to use residual connection.
            dropout_rate (float, optional): 
                Dropout rate.

        """
        super().__init__()
        # store the hyperparameters
        self.idim = idim
        self.use_residual = use_residual

        # define network layer modules
        if input_layer == "linear":
            self.embed = nn.Linear(idim, econv_chans)
        elif input_layer == "embed":
            self.embed = nn.Embedding(idim, embed_dim, padding_idx=padding_idx)
        else:
            raise ValueError("unknown input_layer: " + input_layer)

        if econv_layers > 0:
            self.convs = nn.LayerList()
            for layer in range(econv_layers):
                ichans = (embed_dim if layer == 0 and input_layer == "embed"
                          else econv_chans)
                if use_batch_norm:
                    self.convs.append(
                        nn.Sequential(
                            nn.Conv1D(
                                ichans,
                                econv_chans,
                                econv_filts,
                                stride=1,
                                padding=(econv_filts - 1) // 2,
                                bias_attr=False, ),
                            nn.BatchNorm1D(econv_chans),
                            nn.ReLU(),
                            nn.Dropout(dropout_rate), ))
                else:
                    self.convs += [
                        nn.Sequential(
                            nn.Conv1D(
                                ichans,
                                econv_chans,
                                econv_filts,
                                stride=1,
                                padding=(econv_filts - 1) // 2,
                                bias_attr=False, ),
                            nn.ReLU(),
                            nn.Dropout(dropout_rate), )
                    ]
        else:
            self.convs = None
        if elayers > 0:
            iunits = econv_chans if econv_layers != 0 else embed_dim
            # batch_first=True, bidirectional=True
            self.blstm = nn.LSTM(
                iunits,
                eunits // 2,
                elayers,
                time_major=False,
                direction='bidirectional',
                bias_ih_attr=True,
                bias_hh_attr=True)
            self.blstm.flatten_parameters()
        else:
            self.blstm = None

        # # initialize
        # self.apply(encoder_init)

    def forward(self, xs, ilens=None):
        """Calculate forward propagation.

        Args:
            xs (Tensor): 
                Batch of the padded sequence. Either character ids (B, Tmax)
                or acoustic feature (B, Tmax, idim * encoder_reduction_factor). 
                Padded value should be 0.
            ilens (Tensor(int64)): 
                Batch of lengths of each input batch (B,).

        Returns:
            Tensor: 
                Batch of the sequences of encoder states(B, Tmax, eunits).
            Tensor(int64): 
                Batch of lengths of each sequence (B,)
        """
        xs = self.embed(xs).transpose([0, 2, 1])
        if self.convs is not None:
            for i in range(len(self.convs)):
                if self.use_residual:
                    xs += self.convs[i](xs)
                else:
                    xs = self.convs[i](xs)
        if self.blstm is None:
            return xs.transpose([0, 2, 1])
        if not isinstance(ilens, paddle.Tensor):
            ilens = paddle.to_tensor(ilens)
        if ilens.ndim == 0:
            ilens = ilens.unsqueeze(0)
        xs = xs.transpose([0, 2, 1])
        # for dygraph to static graph
        # self.blstm.flatten_parameters()
        # (B, Tmax, C)
        # see https://www.paddlepaddle.org.cn/documentation/docs/zh/faq/train_cn.html#paddletorch-nn-utils-rnn-pack-padded-sequencetorch-nn-utils-rnn-pad-packed-sequenceapi
        xs, _ = self.blstm(xs, sequence_length=ilens)
        hlens = ilens

        return xs, hlens

    def inference(self, x):
        """Inference.

        Args:
            x (Tensor): 
                The sequeunce of character ids (T,) or acoustic feature (T, idim * encoder_reduction_factor).

        Returns:
            Tensor: The sequences of encoder states(T, eunits).

        """
        xs = x.unsqueeze(0)
        ilens = paddle.shape(x)[0]

        return self.forward(xs, ilens)[0][0]


================================================
FILE: paddlespeech/t2s/modules/tade_res_block.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""StyleMelGAN's TADEResBlock Modules."""
from functools import partial

import paddle.nn.functional as F
from paddle import nn


class TADELayer(nn.Layer):
    """TADE Layer module."""

    def __init__(
            self,
            in_channels: int=64,
            aux_channels: int=80,
            kernel_size: int=9,
            bias: bool=True,
            upsample_factor: int=2,
            upsample_mode: str="nearest", ):
        """Initilize TADE layer."""
        super().__init__()
        self.norm = nn.InstanceNorm1D(
            in_channels,
            momentum=0.1,
            data_format="NCL",
            weight_attr=False,
            bias_attr=False)
        self.aux_conv = nn.Sequential(
            nn.Conv1D(
                aux_channels,
                in_channels,
                kernel_size,
                1,
                bias_attr=bias,
                padding=(kernel_size - 1) // 2, ), )
        self.gated_conv = nn.Sequential(
            nn.Conv1D(
                in_channels,
                in_channels * 2,
                kernel_size,
                1,
                bias_attr=bias,
                padding=(kernel_size - 1) // 2, ), )
        self.upsample = nn.Upsample(
            scale_factor=upsample_factor, mode=upsample_mode)

    def forward(self, x, c):
        """Calculate forward propagation.
        Args:
            x (Tensor): 
                Input tensor (B, in_channels, T).
            c (Tensor): 
                Auxiliary input tensor (B, aux_channels, T).
        Returns:
            Tensor: 
                Output tensor (B, in_channels, T * upsample_factor).
            Tensor:
                Upsampled aux tensor (B, in_channels, T * upsample_factor).
        """

        x = self.norm(x)
        # 'bilinear', 'bicubic' and 'nearest' only support 4-D tensor.
        c = self.upsample(c.unsqueeze(-1))
        c = c[:, :, :, 0]

        c = self.aux_conv(c)
        cg = self.gated_conv(c)
        cg1, cg2 = cg.split(2, axis=1)
        # 'bilinear', 'bicubic' and 'nearest' only support 4-D tensor.
        y = cg1 * self.upsample(x.unsqueeze(-1))[:, :, :, 0] + cg2
        return y, c


class TADEResBlock(nn.Layer):
    """TADEResBlock module."""

    def __init__(
            self,
            in_channels: int=64,
            aux_channels: int=80,
            kernel_size: int=9,
            dilation: int=2,
            bias: bool=True,
            upsample_factor: int=2,
            # this is a diff in paddle, the mode only can be "linear" when input is 3D
            upsample_mode: str="nearest",
            gated_function: str="softmax", ):
        """Initialize TADEResBlock module."""
        super().__init__()
        self.tade1 = TADELayer(
            in_channels=in_channels,
            aux_channels=aux_channels,
            kernel_size=kernel_size,
            bias=bias,
            upsample_factor=1,
            upsample_mode=upsample_mode, )
        self.gated_conv1 = nn.Conv1D(
            in_channels,
            in_channels * 2,
            kernel_size,
            1,
            bias_attr=bias,
            padding=(kernel_size - 1) // 2, )
        self.tade2 = TADELayer(
            in_channels=in_channels,
            aux_channels=in_channels,
            kernel_size=kernel_size,
            bias=bias,
            upsample_factor=upsample_factor,
            upsample_mode=upsample_mode, )
        self.gated_conv2 = nn.Conv1D(
            in_channels,
            in_channels * 2,
            kernel_size,
            1,
            bias_attr=bias,
            dilation=dilation,
            padding=(kernel_size - 1) // 2 * dilation, )
        self.upsample = nn.Upsample(
            scale_factor=upsample_factor, mode=upsample_mode)
        if gated_function == "softmax":
            self.gated_function = partial(F.softmax, axis=1)
        elif gated_function == "sigmoid":
            self.gated_function = F.sigmoid
        else:
            raise ValueError(f"{gated_function} is not supported.")

    def forward(self, x, c):
        """Calculate forward propagation.
        Args:

            x (Tensor): 
                Input tensor (B, in_channels, T).
            c (Tensor): 
                Auxiliary input tensor (B, aux_channels, T).
        Returns:
            Tensor: 
                Output tensor (B, in_channels, T * upsample_factor).
            Tensor: 
                Upsampled auxirialy tensor (B, in_channels, T * upsample_factor).
        """
        residual = x
        x, c = self.tade1(x, c)
        x = self.gated_conv1(x)
        xa, xb = x.split(2, axis=1)
        x = self.gated_function(xa) * F.tanh(xb)
        x, c = self.tade2(x, c)
        x = self.gated_conv2(x)
        xa, xb = x.split(2, axis=1)
        x = self.gated_function(xa) * F.tanh(xb)
        # 'bilinear', 'bicubic' and 'nearest' only support 4-D tensor.
        return self.upsample(residual.unsqueeze(-1))[:, :, :, 0] + x, c


================================================
FILE: paddlespeech/t2s/modules/transformer/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/modules/transformer/attention.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Multi-Head Attention layer definition."""
import math

import numpy
import paddle
from paddle import nn

from paddlespeech.t2s.modules.masked_fill import masked_fill


class MultiHeadedAttention(nn.Layer):
    """Multi-Head Attention layer.
    Args:
        n_head (int): 
            The number of heads.
        n_feat (int): 
            The number of features.
        dropout_rate (float): 
            Dropout rate.
    """

    def __init__(self, n_head, n_feat, dropout_rate):
        """Construct an MultiHeadedAttention object."""
        super().__init__()
        assert n_feat % n_head == 0
        # We assume d_v always equals d_k
        self.d_k = n_feat // n_head
        self.h = n_head
        self.linear_q = nn.Linear(n_feat, n_feat, bias_attr=True)
        self.linear_k = nn.Linear(n_feat, n_feat, bias_attr=True)
        self.linear_v = nn.Linear(n_feat, n_feat, bias_attr=True)
        self.linear_out = nn.Linear(n_feat, n_feat, bias_attr=True)
        self.attn = None
        self.dropout = nn.Dropout(p=dropout_rate)

    def forward_qkv(self, query, key, value):
        """Transform query, key and value.

        Args:
            query(Tensor): 
                query tensor (#batch, time1, size).
            key(Tensor): 
                Key tensor (#batch, time2, size).
            value(Tensor): 
                Value tensor (#batch, time2, size).

        Returns:
            Tensor: 
                Transformed query tensor (#batch, n_head, time1, d_k).
            Tensor: 
                Transformed key tensor (#batch, n_head, time2, d_k).
            Tensor: 
                Transformed value tensor (#batch, n_head, time2, d_k).
        """
        n_batch = paddle.shape(query)[0]

        q = paddle.reshape(
            self.linear_q(query), [n_batch, -1, self.h, self.d_k])
        k = paddle.reshape(self.linear_k(key), [n_batch, -1, self.h, self.d_k])
        v = paddle.reshape(
            self.linear_v(value), [n_batch, -1, self.h, self.d_k])

        # (batch, head, time1, d_k)
        q = q.transpose((0, 2, 1, 3))
        # (batch, head, time2, d_k)
        k = k.transpose((0, 2, 1, 3))
        # (batch, head, time2, d_k)
        v = v.transpose((0, 2, 1, 3))
        return q, k, v

    def forward_attention(self, value, scores, mask=None):
        """Compute attention context vector.

        Args:
            value(Tensor): 
                Transformed value (#batch, n_head, time2, d_k).
            scores(Tensor): 
                Attention score (#batch, n_head, time1, time2).
            mask(Tensor, optional): 
                Mask (#batch, 1, time2) or (#batch, time1, time2). (Default value = None)

        Returns:
            Tensor: Transformed value (#batch, time1, d_model) weighted by the attention score (#batch, time1, time2).
        """
        n_batch = paddle.shape(value)[0]
        softmax = paddle.nn.Softmax(axis=-1)
        if mask is not None:
            mask = mask.unsqueeze(1)
            mask = paddle.logical_not(mask)
            # assume scores.dtype==paddle.float32, we only use "float32" here
            dtype = str(scores.dtype).split(".")[-1]
            min_value = float(numpy.finfo(dtype).min)
            scores = masked_fill(scores, mask, min_value)
            # (batch, head, time1, time2)
            self.attn = softmax(scores)
            self.attn = masked_fill(self.attn, mask, 0.0)
        else:
            # (batch, head, time1, time2)
            self.attn = softmax(scores)
            # (batch, head, time1, time2)
        p_attn = self.dropout(self.attn)
        # (batch, head, time1, time2) * (batch, head, time2, d_k) -> # (batch, head, time1, d_k)
        x = paddle.matmul(p_attn, value)
        # (batch, time1, d_model)
        x = (paddle.reshape(
            x.transpose((0, 2, 1, 3)), (n_batch, -1, self.h * self.d_k)))
        # (batch, time1, d_model)
        return self.linear_out(x)

    def forward(self, query, key, value, mask=None):
        """Compute scaled dot product attention.

        Args:
            query(Tensor): 
                Query tensor (#batch, time1, size).
            key(Tensor): 
                Key tensor (#batch, time2, size).
            value(Tensor): 
                Value tensor (#batch, time2, size).
            mask(Tensor, optional): 
                Mask tensor (#batch, 1, time2) or (#batch, time1, time2). (Default value = None)

        Returns:
            Tensor: Output tensor (#batch, time1, d_model).
        """
        q, k, v = self.forward_qkv(query, key, value)
        scores = paddle.matmul(q, k.transpose(
            (0, 1, 3, 2))) / math.sqrt(self.d_k)

        return self.forward_attention(v, scores, mask)


class RelPositionMultiHeadedAttention(MultiHeadedAttention):
    """Multi-Head Attention layer with relative position encoding (new implementation).
    Details can be found in https://github.com/espnet/espnet/pull/2816.
    Paper: https://arxiv.org/abs/1901.02860

    Args:
        n_head (int): 
            The number of heads.
        n_feat (int): 
            The number of features.
        dropout_rate (float): 
            Dropout rate.
        zero_triu (bool): 
            Whether to zero the upper triangular part of attention matrix.
    """

    def __init__(self, n_head, n_feat, dropout_rate, zero_triu=False):
        """Construct an RelPositionMultiHeadedAttention object."""
        super().__init__(n_head, n_feat, dropout_rate)
        self.zero_triu = zero_triu
        # linear transformation for positional encoding
        self.linear_pos = nn.Linear(n_feat, n_feat, bias_attr=False)
        # these two learnable bias are used in matrix c and matrix d
        # as described in https://arxiv.org/abs/1901.02860 Section 3.3

        self.pos_bias_u = paddle.create_parameter(
            shape=(self.h, self.d_k),
            dtype='float32',
            default_initializer=paddle.nn.initializer.XavierUniform())
        self.pos_bias_v = paddle.create_parameter(
            shape=(self.h, self.d_k),
            dtype='float32',
            default_initializer=paddle.nn.initializer.XavierUniform())

    def rel_shift(self, x):
        """Compute relative positional encoding.
        Args:
            x(Tensor): 
                Input tensor (batch, head, time1, 2*time1-1).

        Returns:
            Tensor: Output tensor.
        """
        b, h, t1, t2 = paddle.shape(x)
        zero_pad = paddle.zeros((b, h, t1, 1))
        x_padded = paddle.concat([zero_pad, x], axis=-1)
        x_padded = x_padded.reshape([b, h, t2 + 1, t1])
        # only keep the positions from 0 to time2
        new_t = paddle.cast(paddle.floor(t2 / 2) + 1, dtype='int32')
        x = x_padded[:, :, 1:].reshape([b, h, t1, t2])[:, :, :, :new_t]
        if self.zero_triu:
            ones = paddle.ones((t1, t2))
            x = x * paddle.tril(ones, t2 - t1)[None, None, :, :]
        return x

    def forward(self, query, key, value, pos_emb, mask):
        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.

        Args:
            query(Tensor): 
                Query tensor (#batch, time1, size).
            key(Tensor): 
                Key tensor (#batch, time2, size).
            value(Tensor): 
                Value tensor (#batch, time2, size).
            pos_emb(Tensor): 
                Positional embedding tensor (#batch, 2*time1-1, size).
            mask(Tensor): 
                Mask tensor (#batch, 1, time2) or (#batch, time1, time2).

        Returns:
            Tensor: Output tensor (#batch, time1, d_model).
        """
        q, k, v = self.forward_qkv(query, key, value)
        # (batch, time1, head, d_k)
        q = q.transpose([0, 2, 1, 3])
        n_batch_pos = paddle.shape(pos_emb)[0]
        p = self.linear_pos(pos_emb).reshape(
            [n_batch_pos, -1, self.h, self.d_k])
        # (batch, head, 2*time1-1, d_k)
        p = p.transpose([0, 2, 1, 3])
        # (batch, head, time1, d_k)
        q_with_bias_u = (q + self.pos_bias_u).transpose([0, 2, 1, 3])
        # (batch, head, time1, d_k)
        q_with_bias_v = (q + self.pos_bias_v).transpose([0, 2, 1, 3])

        # compute attention score
        # first compute matrix a and matrix c
        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
        # (batch, head, time1, time2)
        matrix_ac = paddle.matmul(q_with_bias_u, k.transpose([0, 1, 3, 2]))

        # compute matrix b and matrix d
        # (batch, head, time1, 2*time1-1)
        matrix_bd = paddle.matmul(q_with_bias_v, p.transpose([0, 1, 3, 2]))
        matrix_bd = self.rel_shift(matrix_bd)
        # (batch, head, time1, time2)
        scores = (matrix_ac + matrix_bd) / math.sqrt(self.d_k)

        return self.forward_attention(v, scores, mask)


class LegacyRelPositionMultiHeadedAttention(MultiHeadedAttention):
    """Multi-Head Attention layer with relative position encoding (old version).
    Details can be found in https://github.com/espnet/espnet/pull/2816.
    Paper: https://arxiv.org/abs/1901.02860

    Args:
        n_head (int): 
            The number of heads.
        n_feat (int): 
            The number of features.
        dropout_rate (float): 
            Dropout rate.
        zero_triu (bool): 
            Whether to zero the upper triangular part of attention matrix.
    """

    def __init__(self, n_head, n_feat, dropout_rate, zero_triu=False):
        """Construct an RelPositionMultiHeadedAttention object."""
        super().__init__(n_head, n_feat, dropout_rate)
        self.zero_triu = zero_triu
        # linear transformation for positional encoding
        self.linear_pos = nn.Linear(n_feat, n_feat, bias_attr=False)
        # these two learnable bias are used in matrix c and matrix d
        # as described in https://arxiv.org/abs/1901.02860 Section 3.3

        self.pos_bias_u = paddle.create_parameter(
            shape=(self.h, self.d_k),
            dtype='float32',
            default_initializer=paddle.nn.initializer.XavierUniform())
        self.pos_bias_v = paddle.create_parameter(
            shape=(self.h, self.d_k),
            dtype='float32',
            default_initializer=paddle.nn.initializer.XavierUniform())

    def rel_shift(self, x):
        """Compute relative positional encoding.
        Args:
            x(Tensor): 
                Input tensor (batch, head, time1, time2).
        Returns:
            Tensor:Output tensor.
        """
        b, h, t1, t2 = paddle.shape(x)
        zero_pad = paddle.zeros((b, h, t1, 1))
        x_padded = paddle.concat([zero_pad, x], axis=-1)
        x_padded = paddle.reshape(x_padded, [b, h, t2 + 1, t1])
        # only keep the positions from 0 to time2
        x = paddle.reshape(x_padded[:, :, 1:], [b, h, t1, t2])

        if self.zero_triu:
            ones = paddle.ones((t1, t2))
            x = x * paddle.tril(ones, t2 - t1)[None, None, :, :]

        return x

    def forward(self, query, key, value, pos_emb, mask):
        """Compute 'Scaled Dot Product Attention' with rel. positional encoding.

        Args:
            query(Tensor): Query tensor (#batch, time1, size).
            key(Tensor): Key tensor (#batch, time2, size).
            value(Tensor): Value tensor (#batch, time2, size).
            pos_emb(Tensor): Positional embedding tensor (#batch, time1, size).
            mask(Tensor): Mask tensor (#batch, 1, time2) or (#batch, time1, time2).

        Returns:
            Tensor: Output tensor (#batch, time1, d_model).
        """
        q, k, v = self.forward_qkv(query, key, value)
        # (batch, time1, head, d_k)
        q = paddle.transpose(q, [0, 2, 1, 3])

        n_batch_pos = paddle.shape(pos_emb)[0]
        p = paddle.reshape(
            self.linear_pos(pos_emb), [n_batch_pos, -1, self.h, self.d_k])
        # (batch, head, time1, d_k)
        p = paddle.transpose(p, [0, 2, 1, 3])
        # (batch, head, time1, d_k)
        q_with_bias_u = paddle.transpose((q + self.pos_bias_u), [0, 2, 1, 3])
        # (batch, head, time1, d_k)
        q_with_bias_v = paddle.transpose((q + self.pos_bias_v), [0, 2, 1, 3])

        # compute attention score
        # first compute matrix a and matrix c
        # as described in https://arxiv.org/abs/1901.02860 Section 3.3
        # (batch, head, time1, time2)
        matrix_ac = paddle.matmul(q_with_bias_u,
                                  paddle.transpose(k, [0, 1, 3, 2]))

        # compute matrix b and matrix d
        # (batch, head, time1, time1)
        matrix_bd = paddle.matmul(q_with_bias_v,
                                  paddle.transpose(p, [0, 1, 3, 2]))
        matrix_bd = self.rel_shift(matrix_bd)
        # (batch, head, time1, time2)
        scores = (matrix_ac + matrix_bd) / math.sqrt(self.d_k)

        return self.forward_attention(v, scores, mask)


================================================
FILE: paddlespeech/t2s/modules/transformer/decoder.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
# 暂时删除了 dyminic conv
"""Decoder definition."""
import logging
from typing import Any
from typing import List
from typing import Tuple

import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.t2s.modules.layer_norm import LayerNorm
from paddlespeech.t2s.modules.transformer.attention import MultiHeadedAttention
from paddlespeech.t2s.modules.transformer.decoder_layer import DecoderLayer
from paddlespeech.t2s.modules.transformer.embedding import PositionalEncoding
from paddlespeech.t2s.modules.transformer.lightconv import LightweightConvolution
from paddlespeech.t2s.modules.transformer.mask import subsequent_mask
from paddlespeech.t2s.modules.transformer.positionwise_feed_forward import PositionwiseFeedForward
from paddlespeech.t2s.modules.transformer.repeat import repeat


class Decoder(nn.Layer):
    """Transfomer decoder module.

    Args:
        odim (int): 
            Output diminsion.
        self_attention_layer_type (str): 
            Self-attention layer type.
        attention_dim (int): 
            Dimention of attention.
        attention_heads (int): 
            The number of heads of multi head attention.
        conv_wshare (int):
            The number of kernel of convolution. Only used in
            self_attention_layer_type == "lightconv*" or "dynamiconv*".
        conv_kernel_length (Union[int, str]):
            Kernel size str of convolution
            (e.g. 71_71_71_71_71_71). Only used in self_attention_layer_type == "lightconv*" or "dynamiconv*".
        conv_usebias (bool): 
            Whether to use bias in convolution. Only used in
            self_attention_layer_type == "lightconv*" or "dynamiconv*".
        linear_units(int): 
            The number of units of position-wise feed forward.
        num_blocks (int): 
            The number of decoder blocks.
        dropout_rate (float): 
            Dropout rate.
        positional_dropout_rate (float): 
            Dropout rate after adding positional encoding.
        self_attention_dropout_rate (float): 
            Dropout rate in self-attention.
        src_attention_dropout_rate (float): 
            Dropout rate in source-attention.
        input_layer (Union[str, nn.Layer]): 
            Input layer type.
        use_output_layer (bool): 
            Whether to use output layer.
        pos_enc_class (nn.Layer): 
            Positional encoding module class.
            `PositionalEncoding `or `ScaledPositionalEncoding`
        normalize_before (bool): 
            Whether to use layer_norm before the first block.
        concat_after (bool): 
            Whether to concat attention layer's input and output.
            if True, additional linear will be applied.
            i.e. x -> x + linear(concat(x, att(x)))
            if False, no additional linear will be applied. i.e. x -> x + att(x)

    """

    def __init__(
            self,
            odim,
            selfattention_layer_type="selfattn",
            attention_dim=256,
            attention_heads=4,
            conv_wshare=4,
            conv_kernel_length=11,
            conv_usebias=False,
            linear_units=2048,
            num_blocks=6,
            dropout_rate=0.1,
            positional_dropout_rate=0.1,
            self_attention_dropout_rate=0.0,
            src_attention_dropout_rate=0.0,
            input_layer="embed",
            use_output_layer=True,
            pos_enc_class=PositionalEncoding,
            normalize_before=True,
            concat_after=False, ):
        """Construct an Decoder object."""
        nn.Layer.__init__(self)
        if input_layer == "embed":
            self.embed = nn.Sequential(
                nn.Embedding(odim, attention_dim),
                pos_enc_class(attention_dim, positional_dropout_rate), )
        elif input_layer == "linear":
            self.embed = nn.Sequential(
                nn.Linear(odim, attention_dim),
                nn.LayerNorm(attention_dim),
                nn.Dropout(dropout_rate),
                nn.ReLU(),
                pos_enc_class(attention_dim, positional_dropout_rate), )
        elif isinstance(input_layer, nn.Layer):
            self.embed = nn.Sequential(
                input_layer,
                pos_enc_class(attention_dim, positional_dropout_rate))
        else:
            raise NotImplementedError("only `embed` or nn.Layer is supported.")
        self.normalize_before = normalize_before

        # self-attention module definition
        if selfattention_layer_type == "selfattn":
            logging.info("decoder self-attention layer type = self-attention")
            decoder_selfattn_layer = MultiHeadedAttention
            decoder_selfattn_layer_args = [
                (attention_heads, attention_dim, self_attention_dropout_rate, )
            ] * num_blocks
        elif selfattention_layer_type == "lightconv":
            logging.info(
                "decoder self-attention layer type = lightweight convolution")
            decoder_selfattn_layer = LightweightConvolution
            decoder_selfattn_layer_args = [(
                conv_wshare, attention_dim, self_attention_dropout_rate,
                int(conv_kernel_length.split("_")[lnum]), True, conv_usebias, )
                                           for lnum in range(num_blocks)]

        self.decoders = repeat(
            num_blocks,
            lambda lnum: DecoderLayer(
                attention_dim,
                decoder_selfattn_layer(*decoder_selfattn_layer_args[lnum]),
                MultiHeadedAttention(attention_heads, attention_dim, src_attention_dropout_rate),
                PositionwiseFeedForward(attention_dim, linear_units, dropout_rate),
                dropout_rate,
                normalize_before,
                concat_after, ), )
        self.selfattention_layer_type = selfattention_layer_type
        if self.normalize_before:
            self.after_norm = LayerNorm(attention_dim)
        if use_output_layer:
            self.output_layer = nn.Linear(attention_dim, odim)
        else:
            self.output_layer = None

    def forward(self, tgt, tgt_mask, memory, memory_mask):
        """Forward decoder.
        Args:
            tgt(Tensor): 
                Input token ids, int64 (#batch, maxlen_out) if input_layer == "embed".
                In the other case, input tensor (#batch, maxlen_out, odim).
            tgt_mask(Tensor): 
                Input token mask (#batch, maxlen_out).
            memory(Tensor): 
                Encoded memory, float32 (#batch, maxlen_in, feat).
            memory_mask(Tensor): 
                Encoded memory mask (#batch, maxlen_in).

        Returns:
            Tensor:
                Decoded token score before softmax (#batch, maxlen_out, odim) if use_output_layer is True. 
                In the other case,final block outputs (#batch, maxlen_out, attention_dim).
            Tensor: 
                Score mask before softmax (#batch, maxlen_out).

        """
        x = self.embed(tgt)
        x, tgt_mask, memory, memory_mask = self.decoders(x, tgt_mask, memory,
                                                         memory_mask)
        if self.normalize_before:
            x = self.after_norm(x)
        if self.output_layer is not None:
            x = self.output_layer(x)
        return x, tgt_mask

    def forward_one_step(self, tgt, tgt_mask, memory, cache=None):
        """Forward one step.

        Args:
            tgt(Tensor): 
                Input token ids, int64 (#batch, maxlen_out).
            tgt_mask(Tensor): 
                Input token mask (#batch, maxlen_out).
            memory(Tensor): 
                Encoded memory, float32 (#batch, maxlen_in, feat).
            cache((List[Tensor]), optional): 
                List of cached tensors. (Default value = None)

        Returns:
            Tensor: 
                Output tensor (batch, maxlen_out, odim).
            List[Tensor]: 
                List of cache tensors of each decoder layer.

        """
        x = self.embed(tgt)
        if cache is None:
            cache = [None] * len(self.decoders)
        new_cache = []
        for c, decoder in zip(cache, self.decoders):
            x, tgt_mask, memory, memory_mask = decoder(
                x, tgt_mask, memory, None, cache=c)
            new_cache.append(x)

        if self.normalize_before:
            y = self.after_norm(x[:, -1])
        else:
            y = x[:, -1]
        if self.output_layer is not None:
            y = F.log_softmax(self.output_layer(y), axis=-1)

        return y, new_cache

    # beam search API (see ScorerInterface)
    def score(self, ys, state, x):
        """Score."""
        ys_mask = subsequent_mask(len(ys)).unsqueeze(0)
        if self.selfattention_layer_type != "selfattn":
            # TODO(karita): implement cache
            logging.warning(
                f"{self.selfattention_layer_type} does not support cached decoding."
            )
            state = None
        logp, state = self.forward_one_step(
            ys.unsqueeze(0), ys_mask, x.unsqueeze(0), cache=state)
        return logp.squeeze(0), state

    # batch beam search API (see BatchScorerInterface)
    def batch_score(self,
                    ys: paddle.Tensor,
                    states: List[Any],
                    xs: paddle.Tensor) -> Tuple[paddle.Tensor, List[Any]]:
        """Score new token batch (required).

        Args:
            ys(Tensor): 
                paddle.int64 prefix tokens (n_batch, ylen).
            states(List[Any]): 
                Scorer states for prefix tokens.
            xs(Tensor): 
                The encoder feature that generates ys (n_batch, xlen, n_feat).

        Returns:
            tuple[Tensor, List[Any]]:
                Tuple ofbatchfied scores for next token with shape of `(n_batch, n_vocab)` and next state list for ys.

        """
        # merge states
        n_batch = len(ys)
        n_layers = len(self.decoders)
        if states[0] is None:
            batch_state = None
        else:
            # transpose state of [batch, layer] into [layer, batch]
            batch_state = [
                paddle.stack([states[b][i] for b in range(n_batch)])
                for i in range(n_layers)
            ]

        # batch decoding
        ys_mask = subsequent_mask(ys.shape[-1]).unsqueeze(0)
        logp, states = self.forward_one_step(ys, ys_mask, xs, cache=batch_state)

        # transpose state of [layer, batch] into [batch, layer]
        state_list = [[states[i][b] for i in range(n_layers)]
                      for b in range(n_batch)]
        return logp, state_list


================================================
FILE: paddlespeech/t2s/modules/transformer/decoder_layer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Decoder self-attention layer definition."""
import paddle
from paddle import nn

from paddlespeech.t2s.modules.layer_norm import LayerNorm


class DecoderLayer(nn.Layer):
    """Single decoder layer module.

 
    Args:
        size (int): 
            Input dimension.
        self_attn (nn.Layer): 
            Self-attention module instance.
            `MultiHeadedAttention` instance can be used as the argument.
        src_attn (nn.Layer): 
            Self-attention module instance.
            `MultiHeadedAttention` instance can be used as the argument.
        feed_forward (nn.Layer): 
            Feed-forward module instance.
            `PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance can be used as the argument.
        dropout_rate (float):
            Dropout rate.
        normalize_before (bool):
             Whether to use layer_norm before the first block.
        concat_after (bool): 
            Whether to concat attention layer's input and output.
            if True, additional linear will be applied.
            i.e. x -> x + linear(concat(x, att(x)))
            if False, no additional linear will be applied. i.e. x -> x + att(x)

    """

    def __init__(
            self,
            size,
            self_attn,
            src_attn,
            feed_forward,
            dropout_rate,
            normalize_before=True,
            concat_after=False, ):
        """Construct an DecoderLayer object."""
        super().__init__()
        self.size = size
        self.self_attn = self_attn
        self.src_attn = src_attn
        self.feed_forward = feed_forward
        self.norm1 = LayerNorm(size)
        self.norm2 = LayerNorm(size)
        self.norm3 = LayerNorm(size)
        self.dropout = nn.Dropout(dropout_rate)
        self.normalize_before = normalize_before
        self.concat_after = concat_after
        if self.concat_after:
            self.concat_linear1 = nn.Linear(size + size, size)
            self.concat_linear2 = nn.Linear(size + size, size)

    def forward(self, tgt, tgt_mask, memory, memory_mask, cache=None):
        """Compute decoded features.

        Args:
            tgt(Tensor): 
                Input tensor (#batch, maxlen_out, size).
            tgt_mask(Tensor): 
                Mask for input tensor (#batch, maxlen_out).
            memory(Tensor): 
                Encoded memory, float32 (#batch, maxlen_in, size).
            memory_mask(Tensor): 
                Encoded memory mask (#batch, maxlen_in).
            cache(List[Tensor], optional): 
                List of cached tensors.
                Each tensor shape should be (#batch, maxlen_out - 1, size). (Default value = None)
        Returns:
            Tensor
                Output tensor(#batch, maxlen_out, size).
            Tensor
                Mask for output tensor (#batch, maxlen_out).
            Tensor
                Encoded memory (#batch, maxlen_in, size).
            Tensor
                Encoded memory mask (#batch, maxlen_in).

        """
        residual = tgt
        if self.normalize_before:
            tgt = self.norm1(tgt)

        if cache is None:
            tgt_q = tgt
            tgt_q_mask = tgt_mask
        else:
            # compute only the last frame query keeping dim: max_time_out -> 1
            assert cache.shape == [
                tgt.shape[0],
                tgt.shape[1] - 1,
                self.size,
            ], f"{cache.shape} == {(tgt.shape[0], tgt.shape[1] - 1, self.size)}"
            tgt_q = tgt[:, -1:, :]
            residual = residual[:, -1:, :]
            tgt_q_mask = None
            if tgt_mask is not None:
                tgt_mask = paddle.cast(tgt_mask, dtype="int64")
                tgt_q_mask = tgt_mask[:, -1:, :]
                tgt_q_mask = paddle.cast(tgt_q_mask, dtype="bool")

        if self.concat_after:
            tgt_concat = paddle.concat(
                (tgt_q, self.self_attn(tgt_q, tgt, tgt, tgt_q_mask)), axis=-1)
            x = residual + self.concat_linear1(tgt_concat)
        else:
            x = residual + self.dropout(
                self.self_attn(tgt_q, tgt, tgt, tgt_q_mask))
        if not self.normalize_before:
            x = self.norm1(x)

        residual = x
        if self.normalize_before:
            x = self.norm2(x)
        if self.concat_after:
            x_concat = paddle.concat(
                (x, self.src_attn(x, memory, memory, memory_mask)), axis=-1)
            x = residual + self.concat_linear2(x_concat)
        else:
            x = residual + self.dropout(
                self.src_attn(x, memory, memory, memory_mask))
        if not self.normalize_before:
            x = self.norm2(x)

        residual = x
        if self.normalize_before:
            x = self.norm3(x)
        x = residual + self.dropout(self.feed_forward(x))
        if not self.normalize_before:
            x = self.norm3(x)

        if cache is not None:
            x = paddle.concat([cache, x], axis=1)

        return x, tgt_mask, memory, memory_mask


================================================
FILE: paddlespeech/t2s/modules/transformer/embedding.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Positional Encoding Module."""
import math

import paddle
from paddle import nn


class PositionalEncoding(nn.Layer):
    """Positional encoding.

    Args:
        d_model (int):
            Embedding dimension.
        dropout_rate (float): 
            Dropout rate.
        max_len (int): 
            Maximum input length.
        reverse (bool): 
            Whether to reverse the input position.
        type (str): 
            dtype of param
    """

    def __init__(self,
                 d_model,
                 dropout_rate,
                 max_len=5000,
                 dtype="float32",
                 reverse=False):
        """Construct an PositionalEncoding object."""
        super().__init__()
        self.d_model = d_model
        self.reverse = reverse
        self.xscale = math.sqrt(self.d_model)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.pe = None
        self.dtype = dtype
        self.extend_pe(paddle.expand(paddle.zeros([1]), (1, max_len)))

    def extend_pe(self, x):
        """Reset the positional encodings."""
        x_shape = paddle.shape(x)
        pe = paddle.zeros([x_shape[1], self.d_model])
        if self.reverse:
            position = paddle.arange(
                x_shape[1] - 1, -1, -1.0, dtype=self.dtype).unsqueeze(1)
        else:
            position = paddle.arange(
                0, x_shape[1], dtype=self.dtype).unsqueeze(1)
        div_term = paddle.exp(
            paddle.arange(0, self.d_model, 2, dtype=self.dtype) *
            -(math.log(10000.0) / self.d_model))
        pe[:, 0::2] = paddle.sin(position * div_term)
        pe[:, 1::2] = paddle.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.pe = paddle.assign(pe)

    def forward(self, x: paddle.Tensor):
        """Add positional encoding.

        Args:
            x (Tensor): 
                Input tensor (batch, time, `*`).

        Returns:
            Tensor: Encoded tensor (batch, time, `*`).
        """
        self.extend_pe(x)
        T = paddle.shape(x)[1]
        x = x * self.xscale + self.pe[:, :T]
        return self.dropout(x)


class ScaledPositionalEncoding(PositionalEncoding):
    """Scaled positional encoding module.
    See Sec. 3.2  https://arxiv.org/abs/1809.08895

    Args:
        d_model (int): 
            Embedding dimension.
        dropout_rate (float): 
            Dropout rate.
        max_len (int): 
            Maximum input length.
        dtype (str): 
            dtype of param
    """

    def __init__(self, d_model, dropout_rate, max_len=5000, dtype="float32"):
        """Initialize class."""
        super().__init__(
            d_model=d_model,
            dropout_rate=dropout_rate,
            max_len=max_len,
            dtype=dtype)
        x = paddle.ones([1], dtype=self.dtype)
        self.alpha = paddle.create_parameter(
            shape=x.shape,
            dtype=self.dtype,
            default_initializer=nn.initializer.Assign(x))

    def reset_parameters(self):
        """Reset parameters."""
        self.alpha = paddle.ones([1])

    def forward(self, x):
        """Add positional encoding.

        Args:
            x (Tensor): 
                Input tensor (batch, time, `*`).
        Returns:
            Tensor: Encoded tensor (batch, time, `*`).
        """
        self.extend_pe(x)
        T = paddle.shape(x)[1]
        x = x + self.alpha * self.pe[:, :T]
        return self.dropout(x)


class RelPositionalEncoding(nn.Layer):
    """Relative positional encoding module (new implementation).
    Details can be found in https://github.com/espnet/espnet/pull/2816.
    See : Appendix B in https://arxiv.org/abs/1901.02860

    Args:
        d_model (int): 
            Embedding dimension.
        dropout_rate (float): 
            Dropout rate.
        max_len (int): 
            Maximum input length.
    """

    def __init__(self, d_model, dropout_rate, max_len=5000, dtype="float32"):
        """Construct an PositionalEncoding object."""
        super().__init__()
        self.d_model = d_model
        self.xscale = math.sqrt(self.d_model)
        self.dropout = nn.Dropout(p=dropout_rate)
        self.pe = None
        self.dtype = dtype
        self.extend_pe(paddle.expand(paddle.zeros([1]), (1, max_len)))

    def extend_pe(self, x):
        """Reset the positional encodings."""
        if self.pe is not None:
            # self.pe contains both positive and negative parts
            # the length of self.pe is 2 * input_len - 1
            if paddle.shape(self.pe)[1] >= paddle.shape(x)[1] * 2 - 1:
                return
        # Suppose `i` means to the position of query vecotr and `j` means the
        # position of key vector. We use position relative positions when keys
        # are to the left (i>j) and negative relative positions otherwise (i<j).
        x_shape = paddle.shape(x)
        pe_positive = paddle.zeros([x_shape[1], self.d_model])
        pe_negative = paddle.zeros([x_shape[1], self.d_model])
        position = paddle.arange(0, x_shape[1], dtype=self.dtype).unsqueeze(1)
        div_term = paddle.exp(
            paddle.arange(0, self.d_model, 2, dtype=self.dtype) *
            -(math.log(10000.0) / self.d_model))
        pe_positive[:, 0::2] = paddle.sin(position * div_term)
        pe_positive[:, 1::2] = paddle.cos(position * div_term)
        pe_negative[:, 0::2] = paddle.sin(-1 * position * div_term)
        pe_negative[:, 1::2] = paddle.cos(-1 * position * div_term)

        # Reserve the order of positive indices and concat both positive and
        # negative indices. This is used to support the shifting trick
        # as in https://arxiv.org/abs/1901.02860
        pe_positive = paddle.flip(pe_positive, [0]).unsqueeze(0)
        pe_negative = pe_negative[1:].unsqueeze(0)
        pe = paddle.concat([pe_positive, pe_negative], axis=1)
        self.pe = pe

    def forward(self, x: paddle.Tensor):
        """Add positional encoding.
        Args:
            x (Tensor):
                Input tensor (batch, time, `*`).
        Returns:
            Tensor: Encoded tensor (batch, time, `*`).
        """
        self.extend_pe(x)
        x = x * self.xscale
        T = paddle.shape(x)[1]
        pe_size = paddle.shape(self.pe)
        tmp = paddle.cast(paddle.floor(pe_size[1] / 2), dtype='int32')
        pos_emb = self.pe[:, tmp - T + 1:tmp + T, ]
        return self.dropout(x), self.dropout(pos_emb)


class LegacyRelPositionalEncoding(PositionalEncoding):
    """Relative positional encoding module (old version).

    Details can be found in https://github.com/espnet/espnet/pull/2816.

    See : Appendix B in https://arxiv.org/abs/1901.02860

    Args:
        d_model (int): 
            Embedding dimension.
        dropout_rate (float): 
            Dropout rate.
        max_len (int): 
            Maximum input length.

    """

    def __init__(self, d_model: int, dropout_rate: float, max_len: int=5000):
        """
        Args:
            d_model (int): 
                Embedding dimension.
            dropout_rate (float): 
                Dropout rate.
            max_len (int, optional): 
                [Maximum input length.]. Defaults to 5000.
        """
        super().__init__(d_model, dropout_rate, max_len, reverse=True)

    def extend_pe(self, x):
        """Reset the positional encodings."""
        if self.pe is not None:
            if paddle.shape(self.pe)[1] >= paddle.shape(x)[1]:
                return
        pe = paddle.zeros((paddle.shape(x)[1], self.d_model))
        if self.reverse:
            position = paddle.arange(
                paddle.shape(x)[1] - 1, -1, -1.0,
                dtype=paddle.float32).unsqueeze(1)
        else:
            position = paddle.arange(
                0, paddle.shape(x)[1], dtype=paddle.float32).unsqueeze(1)
        div_term = paddle.exp(
            paddle.arange(0, self.d_model, 2, dtype=paddle.float32) *
            -(math.log(10000.0) / self.d_model))
        pe[:, 0::2] = paddle.sin(position * div_term)
        pe[:, 1::2] = paddle.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.pe = pe

    def forward(self, x: paddle.Tensor):
        """Compute positional encoding.
        Args:
            x (Tensor): 
                Input tensor (batch, time, `*`).
        Returns:
            Tensor: 
                Encoded tensor (batch, time, `*`).
            Tensor: 
                Positional embedding tensor (1, time, `*`).
        """
        self.extend_pe(x)
        x = x * self.xscale
        pos_emb = self.pe[:, :paddle.shape(x)[1]]
        return self.dropout(x), self.dropout(pos_emb)


================================================
FILE: paddlespeech/t2s/modules/transformer/encoder.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from typing import List
from typing import Union

import paddle
from paddle import nn

from paddlespeech.t2s.modules.activation import get_activation
from paddlespeech.t2s.modules.conformer.convolution import ConvolutionModule
from paddlespeech.t2s.modules.conformer.encoder_layer import EncoderLayer as ConformerEncoderLayer
from paddlespeech.t2s.modules.layer_norm import LayerNorm
from paddlespeech.t2s.modules.transformer.attention import MultiHeadedAttention
from paddlespeech.t2s.modules.transformer.attention import RelPositionMultiHeadedAttention
from paddlespeech.t2s.modules.transformer.embedding import PositionalEncoding
from paddlespeech.t2s.modules.transformer.embedding import RelPositionalEncoding
from paddlespeech.t2s.modules.transformer.embedding import ScaledPositionalEncoding
from paddlespeech.t2s.modules.transformer.encoder_layer import EncoderLayer
from paddlespeech.t2s.modules.transformer.multi_layer_conv import Conv1dLinear
from paddlespeech.t2s.modules.transformer.multi_layer_conv import MultiLayeredConv1d
from paddlespeech.t2s.modules.transformer.positionwise_feed_forward import PositionwiseFeedForward
from paddlespeech.t2s.modules.transformer.repeat import repeat
from paddlespeech.t2s.modules.transformer.subsampling import Conv2dSubsampling


class BaseEncoder(nn.Layer):
    """Base Encoder module.

    Args:
        idim (int): 
            Input dimension.
        attention_dim (int): 
            Dimention of attention.
        attention_heads (int): 
            The number of heads of multi head attention.
        linear_units (int): 
            The number of units of position-wise feed forward.
        num_blocks (int): 
            The number of decoder blocks.
        dropout_rate (float): 
            Dropout rate.
        positional_dropout_rate (float): 
            Dropout rate after adding positional encoding.
        attention_dropout_rate (float): 
            Dropout rate in attention.
        input_layer (Union[str, nn.Layer]): 
            Input layer type.
        normalize_before (bool): 
            Whether to use layer_norm before the first block.
        concat_after (bool): 
            Whether to concat attention layer's input and output.
            if True, additional linear will be applied.
            i.e. x -> x + linear(concat(x, att(x)))
            if False, no additional linear will be applied. i.e. x -> x + att(x)
        positionwise_layer_type (str): 
            "linear", "conv1d", or "conv1d-linear".
        positionwise_conv_kernel_size (int): 
            Kernel size of positionwise conv1d layer.
        macaron_style (bool): 
            Whether to use macaron style for positionwise layer.
        pos_enc_layer_type (str): 
            Encoder positional encoding layer type.
        selfattention_layer_type (str): 
            Encoder attention layer type.
        activation_type (str): 
            Encoder activation function type.
        use_cnn_module (bool): 
            Whether to use convolution module.
        zero_triu (bool): 
            Whether to zero the upper triangular part of attention matrix.
        cnn_module_kernel (int): 
            Kernerl size of convolution module.
        padding_idx (int): 
            Padding idx for input_layer=embed.
        stochastic_depth_rate (float): 
            Maximum probability to skip the encoder layer.
        intermediate_layers (Union[List[int], None]): 
            indices of intermediate CTC layer.
            indices start from 1.
            if not None, intermediate outputs are returned (which changes return type
            signature.)
        encoder_type (str): "transformer", or "conformer".
    """

    def __init__(self,
                 idim: int,
                 attention_dim: int=256,
                 attention_heads: int=4,
                 linear_units: int=2048,
                 num_blocks: int=6,
                 dropout_rate: float=0.1,
                 positional_dropout_rate: float=0.1,
                 attention_dropout_rate: float=0.0,
                 input_layer: str="conv2d",
                 normalize_before: bool=True,
                 concat_after: bool=False,
                 positionwise_layer_type: str="linear",
                 positionwise_conv_kernel_size: int=1,
                 macaron_style: bool=False,
                 pos_enc_layer_type: str="abs_pos",
                 selfattention_layer_type: str="selfattn",
                 activation_type: str="swish",
                 use_cnn_module: bool=False,
                 zero_triu: bool=False,
                 cnn_module_kernel: int=31,
                 padding_idx: int=-1,
                 stochastic_depth_rate: float=0.0,
                 intermediate_layers: Union[List[int], None]=None,
                 encoder_type: str="transformer"):
        """Construct an Base Encoder object."""
        super().__init__()
        activation = get_activation(activation_type)
        pos_enc_class = self.get_pos_enc_class(pos_enc_layer_type,
                                               selfattention_layer_type)
        self.encoder_type = encoder_type

        self.conv_subsampling_factor = 1
        self.embed = self.get_embed(
            idim=idim,
            input_layer=input_layer,
            attention_dim=attention_dim,
            pos_enc_class=pos_enc_class,
            dropout_rate=dropout_rate,
            positional_dropout_rate=positional_dropout_rate,
            padding_idx=padding_idx)

        self.normalize_before = normalize_before

        # self-attention module definition
        encoder_selfattn_layer, encoder_selfattn_layer_args = self.get_encoder_selfattn_layer(
            selfattention_layer_type=selfattention_layer_type,
            attention_heads=attention_heads,
            attention_dim=attention_dim,
            attention_dropout_rate=attention_dropout_rate,
            zero_triu=zero_triu,
            pos_enc_layer_type=pos_enc_layer_type)
        # feed-forward module definition
        positionwise_layer, positionwise_layer_args = self.get_positionwise_layer(
            positionwise_layer_type, attention_dim, linear_units, dropout_rate,
            positionwise_conv_kernel_size, activation)

        # convolution module definition
        convolution_layer = ConvolutionModule
        convolution_layer_args = (attention_dim, cnn_module_kernel, activation)

        if self.encoder_type == "transformer":
            self.encoders = repeat(
                num_blocks,
                lambda lnum: EncoderLayer(
                    attention_dim,
                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
                    positionwise_layer(*positionwise_layer_args),
                    dropout_rate,
                    normalize_before,
                    concat_after, ), )

        elif self.encoder_type == "conformer":
            self.encoders = repeat(
                num_blocks,
                lambda lnum: ConformerEncoderLayer(
                    attention_dim,
                    encoder_selfattn_layer(*encoder_selfattn_layer_args),
                    positionwise_layer(*positionwise_layer_args),
                    positionwise_layer(*positionwise_layer_args) if macaron_style else None,
                    convolution_layer(*convolution_layer_args) if use_cnn_module else None,
                    dropout_rate,
                    normalize_before,
                    concat_after,
                    stochastic_depth_rate * float(1 + lnum) / num_blocks, ), )
            self.intermediate_layers = intermediate_layers
        else:
            raise NotImplementedError("Support only linear or conv1d.")

        if self.normalize_before:
            self.after_norm = LayerNorm(attention_dim)

    def get_positionwise_layer(self,
                               positionwise_layer_type: str="linear",
                               attention_dim: int=256,
                               linear_units: int=2048,
                               dropout_rate: float=0.1,
                               positionwise_conv_kernel_size: int=1,
                               activation: nn.Layer=nn.ReLU()):
        """Define positionwise layer."""
        if positionwise_layer_type == "linear":
            positionwise_layer = PositionwiseFeedForward
            positionwise_layer_args = (attention_dim, linear_units,
                                       dropout_rate, activation)
        elif positionwise_layer_type == "conv1d":
            positionwise_layer = MultiLayeredConv1d
            positionwise_layer_args = (attention_dim, linear_units,
                                       positionwise_conv_kernel_size,
                                       dropout_rate, )
        elif positionwise_layer_type == "conv1d-linear":
            positionwise_layer = Conv1dLinear
            positionwise_layer_args = (attention_dim, linear_units,
                                       positionwise_conv_kernel_size,
                                       dropout_rate, )
        else:
            raise NotImplementedError("Support only linear or conv1d.")
        return positionwise_layer, positionwise_layer_args

    def get_encoder_selfattn_layer(self,
                                   selfattention_layer_type: str="selfattn",
                                   attention_heads: int=4,
                                   attention_dim: int=256,
                                   attention_dropout_rate: float=0.0,
                                   zero_triu: bool=False,
                                   pos_enc_layer_type: str="abs_pos"):
        if selfattention_layer_type == "selfattn":
            encoder_selfattn_layer = MultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, attention_dim,
                                           attention_dropout_rate, )
        elif selfattention_layer_type == "rel_selfattn":
            assert pos_enc_layer_type == "rel_pos"
            encoder_selfattn_layer = RelPositionMultiHeadedAttention
            encoder_selfattn_layer_args = (attention_heads, attention_dim,
                                           attention_dropout_rate, zero_triu, )
        else:
            raise ValueError("unknown encoder_attn_layer: " +
                             selfattention_layer_type)
        return encoder_selfattn_layer, encoder_selfattn_layer_args

    def get_pos_enc_class(self,
                          pos_enc_layer_type: str="abs_pos",
                          selfattention_layer_type: str="selfattn"):
        if pos_enc_layer_type == "abs_pos":
            pos_enc_class = PositionalEncoding
        elif pos_enc_layer_type == "scaled_abs_pos":
            pos_enc_class = ScaledPositionalEncoding
        elif pos_enc_layer_type == "rel_pos":
            assert selfattention_layer_type == "rel_selfattn"
            pos_enc_class = RelPositionalEncoding
        else:
            raise ValueError("unknown pos_enc_layer: " + pos_enc_layer_type)
        return pos_enc_class

    def get_embed(self,
                  idim,
                  input_layer="conv2d",
                  attention_dim: int=256,
                  pos_enc_class=PositionalEncoding,
                  dropout_rate: int=0.1,
                  positional_dropout_rate: int=0.1,
                  padding_idx: int=-1):

        if input_layer == "linear":
            embed = nn.Sequential(
                nn.Linear(idim, attention_dim),
                nn.LayerNorm(attention_dim),
                nn.Dropout(dropout_rate),
                nn.ReLU(),
                pos_enc_class(attention_dim, positional_dropout_rate), )
        elif input_layer == "conv2d":
            embed = Conv2dSubsampling(
                idim,
                attention_dim,
                dropout_rate,
                pos_enc_class(attention_dim, positional_dropout_rate), )
            self.conv_subsampling_factor = 4
        elif input_layer == "embed":
            embed = nn.Sequential(
                nn.Embedding(idim, attention_dim, padding_idx=padding_idx),
                pos_enc_class(attention_dim, positional_dropout_rate), )
        elif isinstance(input_layer, nn.Layer):
            embed = nn.Sequential(
                input_layer,
                pos_enc_class(attention_dim, positional_dropout_rate), )
        elif input_layer is None:
            embed = nn.Sequential(
                pos_enc_class(attention_dim, positional_dropout_rate))
        else:
            raise ValueError("unknown input_layer: " + input_layer)

        return embed

    def forward(self, xs, masks):
        """Encode input sequence.

        Args:
            xs (Tensor): 
                Input tensor (#batch, time, idim).
            masks (Tensor): 
                Mask tensor (#batch, 1, time).

        Returns: 
            Tensor: 
                Output tensor (#batch, time, attention_dim).
            Tensor: 
                Mask tensor (#batch, 1, time).
        """
        xs = self.embed(xs)
        xs, masks = self.encoders(xs, masks)
        if self.normalize_before:
            xs = self.after_norm(xs)
        return xs, masks


class TransformerEncoder(BaseEncoder):
    """Transformer encoder module.

    Args:
        idim (int): 
            Input dimension.
        attention_dim (int): 
            Dimention of attention.
        attention_heads (int): 
            The number of heads of multi head attention.
        linear_units (int): 
            The number of units of position-wise feed forward.
        num_blocks (int): 
            The number of decoder blocks.
        dropout_rate (float): 
            Dropout rate.
        positional_dropout_rate (float): 
            Dropout rate after adding positional encoding.
        attention_dropout_rate (float): 
            Dropout rate in attention.
        input_layer (Union[str, paddle.nn.Layer]): 
            Input layer type.
        pos_enc_layer_type (str): 
            Encoder positional encoding layer type.
        normalize_before (bool): 
            Whether to use layer_norm before the first block.
        concat_after (bool): 
            Whether to concat attention layer's input and output.
            if True, additional linear will be applied.
            i.e. x -> x + linear(concat(x, att(x)))
            if False, no additional linear will be applied. i.e. x -> x + att(x)
        positionwise_layer_type (str): 
            "linear", "conv1d", or "conv1d-linear".
        positionwise_conv_kernel_size (int): 
            Kernel size of positionwise conv1d layer.
        selfattention_layer_type (str): 
            Encoder attention layer type.
        activation_type (str): 
            Encoder activation function type.
        padding_idx (int): 
            Padding idx for input_layer=embed.
    """

    def __init__(
            self,
            idim,
            attention_dim: int=256,
            attention_heads: int=4,
            linear_units: int=2048,
            num_blocks: int=6,
            dropout_rate: float=0.1,
            positional_dropout_rate: float=0.1,
            attention_dropout_rate: float=0.0,
            input_layer: str="conv2d",
            pos_enc_layer_type: str="abs_pos",
            normalize_before: bool=True,
            concat_after: bool=False,
            positionwise_layer_type: str="linear",
            positionwise_conv_kernel_size: int=1,
            selfattention_layer_type: str="selfattn",
            activation_type: str="relu",
            padding_idx: int=-1, ):
        """Construct an Transformer Encoder object."""
        super().__init__(
            idim,
            attention_dim=attention_dim,
            attention_heads=attention_heads,
            linear_units=linear_units,
            num_blocks=num_blocks,
            dropout_rate=dropout_rate,
            positional_dropout_rate=positional_dropout_rate,
            attention_dropout_rate=attention_dropout_rate,
            input_layer=input_layer,
            pos_enc_layer_type=pos_enc_layer_type,
            normalize_before=normalize_before,
            concat_after=concat_after,
            positionwise_layer_type=positionwise_layer_type,
            positionwise_conv_kernel_size=positionwise_conv_kernel_size,
            selfattention_layer_type=selfattention_layer_type,
            activation_type=activation_type,
            padding_idx=padding_idx,
            encoder_type="transformer")

    def forward(self,
                xs: paddle.Tensor,
                masks: paddle.Tensor,
                note_emb: paddle.Tensor=None,
                note_dur_emb: paddle.Tensor=None,
                is_slur_emb: paddle.Tensor=None,
                scale: int=16):
        """Encoder input sequence.

        Args:
            xs(Tensor): 
                Input tensor (#batch, time, idim).
            masks(Tensor): 
                Mask tensor (#batch, 1, time).
            note_emb(Tensor): 
                Input tensor (#batch, time, attention_dim).
            note_dur_emb(Tensor): 
                Input tensor (#batch, time, attention_dim).
            is_slur_emb(Tensor): 
                Input tensor (#batch, time, attention_dim).

        Returns:
            Tensor: 
                Output tensor (#batch, time, attention_dim).
            Tensor: 
                Mask tensor (#batch, 1, time).
        """
        xs = self.embed(xs)
        if note_emb is not None:
            xs = scale * xs + note_emb + note_dur_emb + is_slur_emb
        xs, masks = self.encoders(xs, masks)
        if self.normalize_before:
            xs = self.after_norm(xs)
        return xs, masks

    def forward_one_step(self, xs, masks, cache=None):
        """Encode input frame.

        Args:
            xs (Tensor): 
                Input tensor.
            masks (Tensor): 
                Mask tensor.
            cache (List[Tensor]): 
                List of cache tensors.

        Returns:
            Tensor:
                 Output tensor.
            Tensor:
                 Mask tensor.
            List[Tensor]: 
                List of new cache tensors.
        """

        xs = self.embed(xs)
        if cache is None:
            cache = [None for _ in range(len(self.encoders))]
        new_cache = []
        for c, e in zip(cache, self.encoders):
            xs, masks = e(xs, masks, cache=c)
            new_cache.append(xs)
        if self.normalize_before:
            xs = self.after_norm(xs)
        return xs, masks, new_cache


class ConformerEncoder(BaseEncoder):
    """Conformer encoder module.

    Args:
        idim (int): 
            Input dimension.
        attention_dim (int): 
            Dimention of attention.
        attention_heads (int): 
            The number of heads of multi head attention.
        linear_units (int): 
            The number of units of position-wise feed forward.
        num_blocks (int): 
            The number of decoder blocks.
        dropout_rate (float): 
            Dropout rate.
        positional_dropout_rate (float): 
            Dropout rate after adding positional encoding.
        attention_dropout_rate (float): 
            Dropout rate in attention.
        input_layer (Union[str, nn.Layer]): 
            Input layer type.
        normalize_before (bool): 
            Whether to use layer_norm before the first block.
        concat_after (bool):
            Whether to concat attention layer's input and output.
            if True, additional linear will be applied.
            i.e. x -> x + linear(concat(x, att(x)))
            if False, no additional linear will be applied. i.e. x -> x + att(x)
        positionwise_layer_type (str): 
            "linear", "conv1d", or "conv1d-linear".
        positionwise_conv_kernel_size (int): 
            Kernel size of positionwise conv1d layer.
        macaron_style (bool): 
            Whether to use macaron style for positionwise layer.
        pos_enc_layer_type (str): 
            Encoder positional encoding layer type.
        selfattention_layer_type (str): 
            Encoder attention layer type.
        activation_type (str): 
            Encoder activation function type.
        use_cnn_module (bool): 
            Whether to use convolution module.
        zero_triu (bool): 
            Whether to zero the upper triangular part of attention matrix.
        cnn_module_kernel (int): 
            Kernerl size of convolution module.
        padding_idx (int): 
            Padding idx for input_layer=embed.
        stochastic_depth_rate (float): 
            Maximum probability to skip the encoder layer.
        intermediate_layers (Union[List[int], None]):
            indices of intermediate CTC layer. indices start from 1.
            if not None, intermediate outputs are returned (which changes return type signature.)
    """

    def __init__(
            self,
            idim: int,
            attention_dim: int=256,
            attention_heads: int=4,
            linear_units: int=2048,
            num_blocks: int=6,
            dropout_rate: float=0.1,
            positional_dropout_rate: float=0.1,
            attention_dropout_rate: float=0.0,
            input_layer: str="conv2d",
            normalize_before: bool=True,
            concat_after: bool=False,
            positionwise_layer_type: str="linear",
            positionwise_conv_kernel_size: int=1,
            macaron_style: bool=False,
            pos_enc_layer_type: str="rel_pos",
            selfattention_layer_type: str="rel_selfattn",
            activation_type: str="swish",
            use_cnn_module: bool=False,
            zero_triu: bool=False,
            cnn_module_kernel: int=31,
            padding_idx: int=-1,
            stochastic_depth_rate: float=0.0,
            intermediate_layers: Union[List[int], None]=None, ):
        """Construct an Conformer Encoder object."""
        super().__init__(
            idim=idim,
            attention_dim=attention_dim,
            attention_heads=attention_heads,
            linear_units=linear_units,
            num_blocks=num_blocks,
            dropout_rate=dropout_rate,
            positional_dropout_rate=positional_dropout_rate,
            attention_dropout_rate=attention_dropout_rate,
            input_layer=input_layer,
            normalize_before=normalize_before,
            concat_after=concat_after,
            positionwise_layer_type=positionwise_layer_type,
            positionwise_conv_kernel_size=positionwise_conv_kernel_size,
            macaron_style=macaron_style,
            pos_enc_layer_type=pos_enc_layer_type,
            selfattention_layer_type=selfattention_layer_type,
            activation_type=activation_type,
            use_cnn_module=use_cnn_module,
            zero_triu=zero_triu,
            cnn_module_kernel=cnn_module_kernel,
            padding_idx=padding_idx,
            stochastic_depth_rate=stochastic_depth_rate,
            intermediate_layers=intermediate_layers,
            encoder_type="conformer")

    def forward(self, xs, masks):
        """Encode input sequence.

        Args:
            xs (Tensor): 
                Input tensor (#batch, time, idim).
            masks (Tensor): 
                Mask tensor (#batch, 1, time).
        Returns:
            Tensor: 
                Output tensor (#batch, time, attention_dim).
            Tensor: 
                Mask tensor (#batch, 1, time).
        """
        if isinstance(self.embed, (Conv2dSubsampling)):
            xs, masks = self.embed(xs, masks)
        else:
            xs = self.embed(xs)

        if self.intermediate_layers is None:
            xs, masks = self.encoders(xs, masks)
        else:
            intermediate_outputs = []
            for layer_idx, encoder_layer in enumerate(self.encoders):
                xs, masks = encoder_layer(xs, masks)

                if (self.intermediate_layers is not None and
                        layer_idx + 1 in self.intermediate_layers):
                    # intermediate branches also require normalization.
                    encoder_output = xs
                    if isinstance(encoder_output, tuple):
                        encoder_output = encoder_output[0]
                        if self.normalize_before:
                            encoder_output = self.after_norm(encoder_output)
                    intermediate_outputs.append(encoder_output)

        if isinstance(xs, tuple):
            xs = xs[0]

        if self.normalize_before:
            xs = self.after_norm(xs)

        if self.intermediate_layers is not None:
            return xs, masks, intermediate_outputs
        return xs, masks


class Conv1dResidualBlock(nn.Layer):
    """
    Special module for simplified version of Encoder class.
    """

    def __init__(self,
                 idim: int=256,
                 odim: int=256,
                 kernel_size: int=5,
                 dropout_rate: float=0.2):
        super().__init__()
        self.main_block = nn.Sequential(
            nn.Conv1D(
                idim, odim, kernel_size=kernel_size, padding=kernel_size // 2),
            nn.ReLU(),
            nn.BatchNorm1D(odim),
            nn.Dropout(p=dropout_rate))
        self.conv1d_residual = nn.Conv1D(idim, odim, kernel_size=1)

    def forward(self, xs):
        """Encode input sequence.
        Args:
            xs (Tensor): 
                Input tensor (#batch, idim, T).
        Returns:
            Tensor: Output tensor (#batch, odim, T).
        """
        outputs = self.main_block(xs)
        outputs = self.conv1d_residual(xs) + outputs
        return outputs


class CNNDecoder(nn.Layer):
    """
    Much simplified decoder than the original one with Prenet.
    """

    def __init__(
            self,
            emb_dim: int=256,
            odim: int=80,
            kernel_size: int=5,
            dropout_rate: float=0.2,
            resblock_kernel_sizes: List[int]=[256, 256], ):

        super().__init__()

        input_shape = emb_dim
        out_sizes = resblock_kernel_sizes
        out_sizes.append(out_sizes[-1])

        in_sizes = [input_shape] + out_sizes[:-1]
        self.residual_blocks = nn.LayerList([
            Conv1dResidualBlock(
                idim=in_channels,
                odim=out_channels,
                kernel_size=kernel_size,
                dropout_rate=dropout_rate, )
            for in_channels, out_channels in zip(in_sizes, out_sizes)
        ])
        self.conv1d = nn.Conv1D(
            in_channels=out_sizes[-1], out_channels=odim, kernel_size=1)

    def forward(self, xs, masks=None):
        """Encode input sequence.
        Args:
            xs (Tensor): 
                Input tensor (#batch, time, idim).
            masks (Tensor): 
                Mask tensor (#batch, 1, time).
        Returns:
            Tensor: Output tensor (#batch, time, odim).
        """
        # exchange the temporal dimension and the feature dimension
        xs = xs.transpose([0, 2, 1])
        if masks is not None:
            xs = xs * masks

        for layer in self.residual_blocks:
            outputs = layer(xs)
            if masks is not None:
                # input_mask B * 1 * T
                outputs = outputs * masks
            xs = outputs
        outputs = self.conv1d(outputs)
        if masks is not None:
            outputs = outputs * masks
        outputs = outputs.transpose([0, 2, 1])
        return outputs


class CNNPostnet(nn.Layer):
    def __init__(
            self,
            odim: int=80,
            kernel_size: int=5,
            dropout_rate: float=0.2,
            resblock_kernel_sizes: List[int]=[256, 256], ):
        super().__init__()
        out_sizes = resblock_kernel_sizes
        in_sizes = [odim] + out_sizes[:-1]
        self.residual_blocks = nn.LayerList([
            Conv1dResidualBlock(
                idim=in_channels,
                odim=out_channels,
                kernel_size=kernel_size,
                dropout_rate=dropout_rate)
            for in_channels, out_channels in zip(in_sizes, out_sizes)
        ])
        self.conv1d = nn.Conv1D(
            in_channels=out_sizes[-1], out_channels=odim, kernel_size=1)

    def forward(self, xs, masks=None):
        """Encode input sequence.
        Args:
            xs (Tensor): 
                Input tensor (#batch, odim, time).
            masks (Tensor): 
                Mask tensor (#batch, 1, time).
        Returns:
            Tensor: Output tensor (#batch, odim, time).
        """
        for layer in self.residual_blocks:
            outputs = layer(xs)
            if masks is not None:
                # input_mask B * 1 * T
                outputs = outputs * masks
            xs = outputs
        outputs = self.conv1d(outputs)
        if masks is not None:
            outputs = outputs * masks
        return outputs


================================================
FILE: paddlespeech/t2s/modules/transformer/encoder_layer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Encoder self-attention layer definition."""
import paddle
from paddle import nn


class EncoderLayer(nn.Layer):
    """Encoder layer module.

    Args:
        size (int): 
            Input dimension.
        self_attn (nn.Layer): 
            Self-attention module instance.
            `MultiHeadedAttention`  instance can be used as the argument.
        feed_forward (nn.Layer): 
            Feed-forward module instance.
            `PositionwiseFeedForward`, `MultiLayeredConv1d`, or `Conv1dLinear` instance can be used as the argument.
        dropout_rate (float): 
            Dropout rate.
        normalize_before (bool): 
            Whether to use layer_norm before the first block.
        concat_after (bool): 
            Whether to concat attention layer's input and output.
            if True, additional linear will be applied.
            i.e. x -> x + linear(concat(x, att(x)))
            if False, no additional linear will be applied. i.e. x -> x + att(x)
    """

    def __init__(
            self,
            size,
            self_attn,
            feed_forward,
            dropout_rate,
            normalize_before=True,
            concat_after=False, ):
        """Construct an EncoderLayer object."""
        super().__init__()
        self.self_attn = self_attn
        self.feed_forward = feed_forward
        self.norm1 = nn.LayerNorm(size)
        self.norm2 = nn.LayerNorm(size)
        self.dropout = nn.Dropout(dropout_rate)
        self.size = size
        self.normalize_before = normalize_before
        self.concat_after = concat_after
        if self.concat_after:
            self.concat_linear = nn.Linear(size + size, size, bias_attr=True)

    def forward(self, x, mask, cache=None):
        """Compute encoded features.

        Args:
            x(Tensor): 
                Input tensor (#batch, time, size).
            mask(Tensor): 
                Mask tensor for the input (#batch, time).
            cache(Tensor, optional): 
                Cache tensor of the input (#batch, time - 1, size). 

        Returns:
            Tensor: 
                Output tensor (#batch, time, size).
            Tensor: 
                Mask tensor (#batch, time).
        """
        residual = x
        if self.normalize_before:
            x = self.norm1(x)

        if cache is None:
            x_q = x
        else:
            assert cache.shape == (x.shape[0], x.shape[1] - 1, self.size)
            x_q = x[:, -1:, :]
            residual = residual[:, -1:, :]
            mask = None if mask is None else mask[:, -1:, :]

        if self.concat_after:
            x_concat = paddle.concat(
                (x, self.self_attn(x_q, x, x, mask)), axis=-1)
            x = residual + self.concat_linear(x_concat)
        else:

            x = residual + self.dropout(self.self_attn(x_q, x, x, mask))
        if not self.normalize_before:
            x = self.norm1(x)

        residual = x
        if self.normalize_before:
            x = self.norm2(x)
        x = residual + self.dropout(self.feed_forward(x))
        if not self.normalize_before:
            x = self.norm2(x)

        if cache is not None:
            x = paddle.concat([cache, x], axis=1)

        return x, mask


================================================
FILE: paddlespeech/t2s/modules/transformer/lightconv.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Lightweight Convolution Module."""
import numpy
import paddle
import paddle.nn.functional as F
from paddle import nn

from paddlespeech.t2s.modules.activation import get_activation
from paddlespeech.t2s.modules.masked_fill import masked_fill

MIN_VALUE = float(numpy.finfo(numpy.float32).min)


class LightweightConvolution(nn.Layer):
    """Lightweight Convolution layer.

    This implementation is based on
    https://github.com/pytorch/fairseq/tree/master/fairseq

    Args:
        wshare (int): 
            the number of kernel of convolution
        n_feat (int): 
            the number of features
        dropout_rate (float): 
            dropout_rate
        kernel_size (int): 
            kernel size (length)
        use_kernel_mask (bool): 
            Use causal mask or not for convolution kernel
        use_bias (bool): 
            Use bias term or not.

    """

    def __init__(
            self,
            wshare,
            n_feat,
            dropout_rate,
            kernel_size,
            use_kernel_mask=False,
            use_bias=False, ):
        """Construct Lightweight Convolution layer."""
        super().__init__()

        assert n_feat % wshare == 0
        self.wshare = wshare
        self.use_kernel_mask = use_kernel_mask
        self.dropout_rate = dropout_rate
        self.kernel_size = kernel_size
        self.padding_size = int(kernel_size / 2)

        # linear -> GLU -> lightconv -> linear
        self.linear1 = nn.Linear(n_feat, n_feat * 2)
        self.linear2 = nn.Linear(n_feat, n_feat)
        self.act = get_activation("glu")

        # lightconv related
        self.uniform_ = nn.initializer.Uniform()
        self.weight = paddle.to_tensor(
            numpy.random.uniform(0, 1, size=[self.wshare, 1, kernel_size]),
            dtype="float32")
        self.uniform_(self.weight)
        self.weight = paddle.create_parameter(
            shape=self.weight.shape,
            dtype=str(self.weight.numpy().dtype),
            default_initializer=paddle.nn.initializer.Assign(self.weight))
        self.use_bias = use_bias
        if self.use_bias:
            self.bias = paddle.Tensor(n_feat)
            self.bias = paddle.create_parameter(
                shape=self.bias.shape,
                dtype=str(self.bias.numpy().dtype),
                default_initializer=paddle.nn.initializer.Assign(self.bias))

        # mask of kernel
        kernel_mask0 = paddle.zeros([self.wshare, int(kernel_size / 2)])
        kernel_mask1 = paddle.ones([self.wshare, int(kernel_size / 2 + 1)])
        self.kernel_mask = paddle.concat(
            (kernel_mask1, kernel_mask0), axis=-1).unsqueeze(1)

    def forward(self, query, key, value, mask):
        """Forward of 'Lightweight Convolution'.

        This function takes query, key and value but uses only query.
        This is just for compatibility with self-attention layer (attention.py)

        Args:
            query (Tensor): 
                input tensor. (batch, time1, d_model)
            key (Tensor): 
                NOT USED. (batch, time2, d_model)  
            value (Tensor): 
                NOT USED. (batch, time2, d_model) 
            mask : (Tensor):
                (batch, time1, time2) mask

        Return:
            Tensor: output. (batch, time1, d_model) 

        """
        # linear -> GLU -> lightconv -> linear
        x = query
        B, T, C = x.shape
        H = self.wshare

        # first liner layer
        x = self.linear1(x)

        # GLU activation
        x = self.act(x)

        # lightconv
        # B x C x T
        x = x.transpose([0, 2, 1]).reshape([-1, H, T])
        weight = F.dropout(
            self.weight, self.dropout_rate, training=self.training)
        if self.use_kernel_mask:
            weight = masked_fill(weight, self.kernel_mask == 0.0, float("-inf"))
            # weight = weight.masked_fill(self.kernel_mask == 0.0, float("-inf"))
        weight = F.softmax(weight, axis=-1)
        x = F.conv1d(
            x, weight, padding=self.padding_size,
            groups=self.wshare).reshape([B, C, T])
        if self.use_bias:
            x = x + self.bias.reshape([1, -1, 1])
        # B x T x C
        x = x.transpose([0, 2, 1])

        if mask is not None and not self.use_kernel_mask:
            mask = mask.transpose([0, 2, 1])
            # x = x.masked_fill(mask == 0, 0.0)
            x = masked_fill(x, mask == 0, 0.0)

        # second linear layer
        x = self.linear2(x)
        return x


================================================
FILE: paddlespeech/t2s/modules/transformer/mask.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Mask module."""
import paddle


def subsequent_mask(size, dtype=paddle.bool):
    """Create mask for subsequent steps (size, size).

    Args:
        size (int): 
            size of mask
        dtype (paddle.dtype): 
            result dtype
    Return:
        Tensor:
            >>> subsequent_mask(3)
            [[1, 0, 0],
            [1, 1, 0],
            [1, 1, 1]]
    """
    ret = paddle.ones([size, size], dtype=dtype)
    return paddle.tril(ret)


def target_mask(ys_in_pad, ignore_id, dtype=paddle.bool):
    """Create mask for decoder self-attention.

    Args:
        ys_pad (Tensor): 
            batch of padded target sequences (B, Lmax)
        ignore_id (int): 
            index of padding
        dtype (paddle.dtype): 
            result dtype
    Return: 
        Tensor: (B, Lmax, Lmax)
    """
    ys_mask = ys_in_pad != ignore_id
    m = subsequent_mask(ys_mask.shape[-1]).unsqueeze(0)
    return ys_mask.unsqueeze(-2) & m


================================================
FILE: paddlespeech/t2s/modules/transformer/multi_layer_conv.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Layer modules for FFT block in FastSpeech (Feed-forward Transformer)."""
from paddle import nn


class MultiLayeredConv1d(nn.Layer):
    """Multi-layered conv1d for Transformer block.

    This is a module of multi-leyered conv1d designed
    to replace positionwise feed-forward network
    in Transforner block, which is introduced in
    `FastSpeech: Fast, Robust and Controllable Text to Speech`_.

    .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`:
        https://arxiv.org/pdf/1905.09263.pdf

    """

    def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate):
        """Initialize MultiLayeredConv1d module.

        Args: 
            in_chans (int): 
                Number of input channels.
            hidden_chans (int): 
                Number of hidden channels.
            kernel_size (int): 
                Kernel size of conv1d.
            dropout_rate (float): 
                Dropout rate.

        """
        super().__init__()
        self.w_1 = nn.Conv1D(
            in_chans,
            hidden_chans,
            kernel_size,
            stride=1,
            padding=(kernel_size - 1) // 2, )
        self.w_2 = nn.Conv1D(
            hidden_chans,
            in_chans,
            kernel_size,
            stride=1,
            padding=(kernel_size - 1) // 2, )
        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.ReLU()

    def forward(self, x):
        """Calculate forward propagation.

        Args:
            x (Tensor): 
                Batch of input tensors (B, T, in_chans).

        Returns: 
            Tensor: Batch of output tensors (B, T, in_chans).
        """
        x = self.relu(self.w_1(x.transpose([0, 2, 1]))).transpose([0, 2, 1])
        out = self.w_2(self.dropout(x).transpose([0, 2, 1])).transpose([0, 2, 1])
        return out


class Conv1dLinear(nn.Layer):
    """Conv1D + Linear for Transformer block.

    A variant of MultiLayeredConv1d, which replaces second conv-layer to linear.

    """

    def __init__(self, in_chans, hidden_chans, kernel_size, dropout_rate):
        """Initialize Conv1dLinear module.

        Args:
            in_chans (int): 
                Number of input channels.
            hidden_chans (int): 
                Number of hidden channels.
            kernel_size (int): 
                Kernel size of conv1d.
            dropout_rate (float):
                Dropout rate.
        """
        super().__init__()
        self.w_1 = nn.Conv1D(
            in_chans,
            hidden_chans,
            kernel_size,
            stride=1,
            padding=(kernel_size - 1) // 2, )
        self.w_2 = nn.Linear(hidden_chans, in_chans, bias_attr=True)
        self.dropout = nn.Dropout(dropout_rate)
        self.relu = nn.ReLU()

    def forward(self, x):
        """Calculate forward propagation.

        Args:
            x (Tensor): 
                Batch of input tensors (B, T, in_chans).

        Returns:
            Tensor: Batch of output tensors (B, T, in_chans).

        """
        x = self.relu(self.w_1(x.transpose([0, 2, 1]))).transpose([0, 2, 1])

        return self.w_2(self.dropout(x))


================================================
FILE: paddlespeech/t2s/modules/transformer/positionwise_feed_forward.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Positionwise feed forward layer definition."""
import paddle
from paddle import nn


class PositionwiseFeedForward(nn.Layer):
    """Positionwise feed forward layer.

    Args:
        idim (int): 
            Input dimenstion.
        hidden_units (int): 
            The number of hidden units.
        dropout_rate (float): 
            Dropout rate.
    """

    def __init__(self,
                 idim,
                 hidden_units,
                 dropout_rate,
                 activation=paddle.nn.ReLU()):
        """Construct an PositionwiseFeedForward object."""
        super().__init__()
        self.w_1 = paddle.nn.Linear(idim, hidden_units, bias_attr=True)
        self.w_2 = paddle.nn.Linear(hidden_units, idim, bias_attr=True)
        self.dropout = paddle.nn.Dropout(dropout_rate)
        self.activation = activation

    def forward(self, x):
        """Forward funciton."""
        return self.w_2(self.dropout(self.activation(self.w_1(x))))


================================================
FILE: paddlespeech/t2s/modules/transformer/repeat.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Repeat the same layer definition."""
import paddle


class MultiSequential(paddle.nn.Sequential):
    """Multi-input multi-output paddle.nn.Sequential."""

    def forward(self, *args):
        """Repeat."""
        for m in self:
            args = m(*args)
        return args


def repeat(N, fn):
    """Repeat module N times.

    Args:
        N (int): 
            Number of repeat time.
        fn (Callable): 
            Function to generate module.

    Returns:
        MultiSequential: Repeated model instance.
    """
    return MultiSequential(* [fn(n) for n in range(N)])


================================================
FILE: paddlespeech/t2s/modules/transformer/subsampling.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""Subsampling layer definition."""
import paddle
from paddle import nn

from paddlespeech.t2s.modules.transformer.embedding import PositionalEncoding


class Conv2dSubsampling(nn.Layer):
    """Convolutional 2D subsampling (to 1/4 length).

    Args:
        idim (int): 
            Input dimension.
        odim (int): 
            Output dimension.
        dropout_rate (float): 
            Dropout rate.
        pos_enc (nn.Layer): 
            Custom position encoding layer.
    """

    def __init__(self, idim, odim, dropout_rate, pos_enc=None):
        """Construct an Conv2dSubsampling object."""
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2D(1, odim, 3, 2),
            nn.ReLU(),
            nn.Conv2D(odim, odim, 3, 2),
            nn.ReLU(), )
        self.out = nn.Sequential(
            nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim),
            pos_enc if pos_enc is not None else
            PositionalEncoding(odim, dropout_rate), )

    def forward(self, x, x_mask):
        """Subsample x.
        Args:
            x (Tensor): 
                Input tensor (#batch, time, idim).
            x_mask (Tensor): 
                Input mask (#batch, 1, time).
        Returns:
            Tensor: 
                Subsampled tensor (#batch, time', odim), where time' = time // 4.
            Tensor: 
                Subsampled mask (#batch, 1, time'), where time' = time // 4.
        """
        # (b, c, t, f)
        x = x.unsqueeze(1)
        x = self.conv(x)
        b, c, t, f = paddle.shape(x)
        x = self.out(x.transpose([0, 2, 1, 3]).reshape([b, t, c * f]))
        if x_mask is None:
            return x, None
        return x, x_mask[:, :, :-2:2][:, :, :-2:2]

    def __getitem__(self, key):
        """Get item.
        When reset_parameters() is called, if use_scaled_pos_enc is used,
            return the positioning encoding.
        """
        if key != -1:
            raise NotImplementedError(
                "Support only `-1` (for `reset_parameters`).")
        return self.out[key]


================================================
FILE: paddlespeech/t2s/modules/upsample.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
from typing import Any
from typing import Dict
from typing import List
from typing import Optional

from paddle import nn
from paddle.nn import functional as F

from paddlespeech.t2s.modules.activation import get_activation


class Stretch2D(nn.Layer):
    def __init__(self, w_scale: int, h_scale: int, mode: str="nearest"):
        """Strech an image (or image-like object) with some interpolation.

        Args:
            w_scale (int): 
                Scalar of width.
            h_scale (int): 
                Scalar of the height.
            mode (str, optional): 
                Interpolation mode, modes suppored are "nearest", "bilinear", 
                "trilinear", "bicubic", "linear" and "area",by default "nearest"
        For more details about interpolation, see 
            `paddle.nn.functional.interpolate <https://www.paddlepaddle.org.cn/documentation/docs/en/api/paddle/nn/functional/interpolate_en.html>`_.
        """
        super().__init__()
        self.w_scale = w_scale
        self.h_scale = h_scale
        self.mode = mode

    def forward(self, x):
        """

        Args: 
            x (Tensor): 
                Shape (N, C, H, W)

        Returns:
            Tensor: 
                The stretched image. Shape (N, C, H', W'), where ``H'=h_scale * H``, ``W'=w_scale * W``.
            
        """
        out = F.interpolate(
            x, scale_factor=(self.h_scale, self.w_scale), mode=self.mode)
        return out


class UpsampleNet(nn.Layer):
    """A Layer to upsample spectrogram by applying consecutive stretch and
    convolutions.

    Args:
        upsample_scales (List[int]): 
            Upsampling factors for each strech.
        nonlinear_activation (Optional[str], optional): 
            Activation after each convolution, by default None
        nonlinear_activation_params (Dict[str, Any], optional): 
            Parameters passed to construct the activation, by default {}
        interpolate_mode (str, optional): 
            Interpolation mode of the strech, by default "nearest"
        freq_axis_kernel_size (int, optional): 
            Convolution kernel size along the frequency axis, by default 1
        use_causal_conv (bool, optional): 
            Whether to use causal padding before convolution, by default False
            If True, Causal padding is used along the time axis, 
            i.e. padding amount is ``receptive field - 1`` and 0 for before and after, respectively.
            If False, "same" padding is used along the time axis.
    """

    def __init__(self,
                 upsample_scales: List[int],
                 nonlinear_activation: Optional[str]=None,
                 nonlinear_activation_params: Dict[str, Any]={},
                 interpolate_mode: str="nearest",
                 freq_axis_kernel_size: int=1,
                 use_causal_conv: bool=False):
        super().__init__()
        self.use_causal_conv = use_causal_conv
        self.up_layers = nn.LayerList()

        for scale in upsample_scales:
            stretch = Stretch2D(scale, 1, interpolate_mode)
            assert freq_axis_kernel_size % 2 == 1
            freq_axis_padding = (freq_axis_kernel_size - 1) // 2
            kernel_size = (freq_axis_kernel_size, scale * 2 + 1)
            if use_causal_conv:
                padding = (freq_axis_padding, scale * 2)
            else:
                padding = (freq_axis_padding, scale)
            conv = nn.Conv2D(
                1, 1, kernel_size, padding=padding, bias_attr=False)
            self.up_layers.extend([stretch, conv])
            if nonlinear_activation is not None:
                # for compatibility
                nonlinear_activation = nonlinear_activation.lower()

                nonlinear = get_activation(nonlinear_activation,
                                           **nonlinear_activation_params)
                self.up_layers.append(nonlinear)

    def forward(self, c):
        """
        Args:
            c (Tensor): 
                spectrogram. Shape (N, F, T)

        Returns: 
            Tensor: upsampled spectrogram.
                Shape (N, F, T'), where ``T' = upsample_factor * T``, 
        """
        c = c.unsqueeze(1)
        for f in self.up_layers:
            if self.use_causal_conv and isinstance(f, nn.Conv2D):
                c = f(c)[:, :, :, c.shape[-1]]
            else:
                c = f(c)
        return c.squeeze(1)


class ConvInUpsampleNet(nn.Layer):
    """A Layer to upsample spectrogram composed of a convolution and an 
    UpsampleNet.
    
    Args:
        upsample_scales (List[int]): 
            Upsampling factors for each strech.
        nonlinear_activation (Optional[str], optional): 
            Activation after each convolution, by default None
        nonlinear_activation_params (Dict[str, Any], optional): 
            Parameters passed to construct the activation, by default {}
        interpolate_mode (str, optional): 
            Interpolation mode of the strech, by default "nearest"
        freq_axis_kernel_size (int, optional): 
            Convolution kernel size along the frequency axis, by default 1
        aux_channels (int, optional): 
            Feature size of the input, by default 80
        aux_context_window (int, optional): 
            Context window of the first 1D convolution applied to the input. It 
            related to the kernel size of the convolution, by default 0
            If use causal convolution, the kernel size is ``window + 1``, 
            else the kernel size is ``2 * window + 1``.
        use_causal_conv (bool, optional):
            Whether to use causal padding before convolution, by default False
            If True, Causal padding is used along the time axis, i.e. padding 
            amount is ``receptive field - 1`` and 0 for before and after, respectively.
            If False, "same" padding is used along the time axis.
    """

    def __init__(self,
                 upsample_scales: List[int],
                 nonlinear_activation: Optional[str]=None,
                 nonlinear_activation_params: Dict[str, Any]={},
                 interpolate_mode: str="nearest",
                 freq_axis_kernel_size: int=1,
                 aux_channels: int=80,
                 aux_context_window: int=0,
                 use_causal_conv: bool=False):
        super().__init__()
        self.aux_context_window = aux_context_window
        self.use_causal_conv = use_causal_conv and aux_context_window > 0
        kernel_size = aux_context_window + 1 if use_causal_conv else 2 * aux_context_window + 1
        self.conv_in = nn.Conv1D(
            aux_channels,
            aux_channels,
            kernel_size=kernel_size,
            bias_attr=False)
        self.upsample = UpsampleNet(
            upsample_scales=upsample_scales,
            nonlinear_activation=nonlinear_activation,
            nonlinear_activation_params=nonlinear_activation_params,
            interpolate_mode=interpolate_mode,
            freq_axis_kernel_size=freq_axis_kernel_size,
            use_causal_conv=use_causal_conv)

    def forward(self, c):
        """
        Args:
            c (Tensor): 
                spectrogram. Shape (N, F, T)

        Returns:
            Tensors: upsampled spectrogram. Shape (N, F, T'), where ``T' = upsample_factor * T``, 
        """
        c_ = self.conv_in(c)
        c = c_[:, :, :-self.aux_context_window] if self.use_causal_conv else c_
        return self.upsample(c)


================================================
FILE: paddlespeech/t2s/modules/wavenet_denoiser.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math

import paddle
from paddle import nn
from ppdiffusers.models.embeddings import Timesteps

from paddlespeech.t2s.modules.nets_utils import initialize
from paddlespeech.t2s.modules.residual_block import WaveNetResidualBlock


class WaveNetDenoiser(nn.Layer):
    """A Mel-Spectrogram Denoiser modified from WaveNet

    Args:
        in_channels (int, optional): 
            Number of channels of the input mel-spectrogram, by default 80
        out_channels (int, optional): 
            Number of channels of the output mel-spectrogram, by default 80
        kernel_size (int, optional): 
            Kernel size of the residual blocks inside, by default 3
        layers (int, optional): 
            Number of residual blocks inside, by default 20
        stacks (int, optional):
            The number of groups to split the residual blocks into, by default 5
            Within each group, the dilation of the residual block grows exponentially.
        residual_channels (int, optional): 
            Residual channel of the residual blocks, by default 256
        gate_channels (int, optional): 
            Gate channel of the residual blocks, by default 512
        skip_channels (int, optional): 
            Skip channel of the residual blocks, by default 256
        aux_channels (int, optional): 
            Auxiliary channel of the residual blocks, by default 256
        dropout (float, optional): 
            Dropout of the residual blocks, by default 0.
        bias (bool, optional): 
            Whether to use bias in residual blocks, by default True
        use_weight_norm (bool, optional): 
            Whether to use weight norm in all convolutions, by default False
    """

    def __init__(
            self,
            in_channels: int=80,
            out_channels: int=80,
            kernel_size: int=3,
            layers: int=20,
            stacks: int=5,
            residual_channels: int=256,
            gate_channels: int=512,
            skip_channels: int=256,
            aux_channels: int=256,
            dropout: float=0.,
            bias: bool=True,
            use_weight_norm: bool=False,
            init_type: str="kaiming_normal", ):
        super().__init__()

        # initialize parameters
        initialize(self, init_type)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.aux_channels = aux_channels
        self.layers = layers
        self.stacks = stacks
        self.kernel_size = kernel_size

        assert layers % stacks == 0
        layers_per_stack = layers // stacks

        self.first_t_emb = nn.Sequential(
            Timesteps(
                residual_channels,
                flip_sin_to_cos=False,
                downscale_freq_shift=1),
            nn.Linear(residual_channels, residual_channels * 4),
            nn.Mish(), nn.Linear(residual_channels * 4, residual_channels))
        self.t_emb_layers = nn.LayerList([
            nn.Linear(residual_channels, residual_channels)
            for _ in range(layers)
        ])

        self.first_conv = nn.Conv1D(
            in_channels, residual_channels, 1, bias_attr=True)
        self.first_act = nn.ReLU()

        self.conv_layers = nn.LayerList()
        for layer in range(layers):
            dilation = 2**(layer % layers_per_stack)
            conv = WaveNetResidualBlock(
                kernel_size=kernel_size,
                residual_channels=residual_channels,
                gate_channels=gate_channels,
                skip_channels=skip_channels,
                aux_channels=aux_channels,
                dilation=dilation,
                dropout=dropout,
                bias=bias)
            self.conv_layers.append(conv)

        final_conv = nn.Conv1D(skip_channels, out_channels, 1, bias_attr=True)
        nn.initializer.Constant(0.0)(final_conv.weight)
        self.last_conv_layers = nn.Sequential(nn.ReLU(),
                                              nn.Conv1D(
                                                  skip_channels,
                                                  skip_channels,
                                                  1,
                                                  bias_attr=True),
                                              nn.ReLU(), final_conv)

        if use_weight_norm:
            self.apply_weight_norm()

    def forward(self, x: paddle.Tensor, t: paddle.Tensor, c: paddle.Tensor):
        """Denoise mel-spectrogram.

        Args:
            x(Tensor): 
                Shape (B, C_in, T), The input mel-spectrogram.
            t(Tensor): 
                Shape (B), The timestep input.
            c(Tensor): 
                Shape (B, C_aux, T'). The auxiliary input (e.g. fastspeech2 encoder output).

        Returns:
            Tensor: Shape (B, C_out, T), the pred noise.
        """
        assert c.shape[-1] == x.shape[-1]

        if t.shape[0] != x.shape[0]:
            t = t.tile([x.shape[0]])
        t_emb = self.first_t_emb(t)
        t_embs = [
            t_emb_layer(t_emb)[..., None] for t_emb_layer in self.t_emb_layers
        ]

        x = self.first_conv(x)
        x = self.first_act(x)
        skips = 0
        for f, t in zip(self.conv_layers, t_embs):
            x = x + t
            x, s = f(x, c)
            skips += s
        skips *= math.sqrt(1.0 / len(self.conv_layers))

        x = self.last_conv_layers(skips)
        return x

    def apply_weight_norm(self):
        """Recursively apply weight normalization to all the Convolution layers
        in the sublayers.
        """

        def _apply_weight_norm(layer):
            if isinstance(layer, (nn.Conv1D, nn.Conv2D)):
                nn.utils.weight_norm(layer)

        self.apply(_apply_weight_norm)

    def remove_weight_norm(self):
        """Recursively remove weight normalization from all the Convolution 
        layers in the sublayers.
        """

        def _remove_weight_norm(layer):
            try:
                nn.utils.remove_weight_norm(layer)
            except ValueError:
                pass

        self.apply(_remove_weight_norm)


================================================
FILE: paddlespeech/t2s/training/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .cli import *
from .experiment import *


================================================
FILE: paddlespeech/t2s/training/cli.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse


def default_argument_parser():
    r"""A simple yet genral argument parser for experiments with t2s.
    
    This is used in examples with t2s. And it is intended to be used by 
    other experiments with t2s. It requires a minimal set of command line 
    arguments to start a training script.
    
    The ``--config`` and ``--opts`` are used for overwrite the default 
    configuration.
    
    The ``--data`` and ``--output`` specifies the data path and output path. 
    Resuming training from existing progress at the output directory is the 
    intended default behavior.
    
    The ``--checkpoint_path`` specifies the checkpoint to load from.
    
    The ``--ngpu`` specifies how to run the training.
    
    See Also
    --------
    paddlespeech.t2s.training.experiment

    Returns
    -------
    argparse.ArgumentParser
        the parser
    """
    parser = argparse.ArgumentParser()

    # yapf: disable
    # data and outpu
    parser.add_argument("--config", metavar="FILE", help="path of the config file to overwrite to default config with.")
    parser.add_argument("--data", metavar="DATA_DIR", help="path to the datatset.")
    parser.add_argument("--output", metavar="OUTPUT_DIR", help="path to save checkpoint and logs.")

    # load from saved checkpoint
    parser.add_argument("--checkpoint_path", type=str, help="path of the checkpoint to load")

    # running
    parser.add_argument("--ngpu", type=int, default=1, help="if ngpu == 0, use cpu.")

    # overwrite extra config and default config
    parser.add_argument("--opts", nargs=argparse.REMAINDER, help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
    # yapd: enable

    return parser


================================================
FILE: paddlespeech/t2s/training/default_config.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from yacs.config import CfgNode

_C = CfgNode(
    dict(
        valid_interval=1000,  # validation
        save_interval=10000,  # checkpoint
        max_iteration=900000,  # max iteration to train
    ))


def get_default_training_config():
    return _C.clone()


================================================
FILE: paddlespeech/t2s/training/experiment.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import sys
from pathlib import Path

import paddle
from paddle import distributed as dist
from paddle.io import DistributedBatchSampler
from visualdl import LogWriter

from paddlespeech.t2s.utils import checkpoint
from paddlespeech.t2s.utils import mp_tools

__all__ = ["ExperimentBase"]


class ExperimentBase(object):
    """
    An experiment template in order to structure the training code and take
    care of saving, loading, logging, visualization stuffs. It's intended to
    be flexible and simple.

    So it only handles output directory (create directory for the output,
    create a checkpoint directory, dump the config in use and create
    visualizer and logger) in a standard way without enforcing any
    input-output protocols to the model and dataloader. It leaves the main
    part for the user to implement their own (setup the model, criterion,
    optimizer, define a training step, define a validation function and
    customize all the text and visual logs).

    It does not save too much boilerplate code. The users still have to write
    the forward/backward/update mannually, but they are free to add
    non-standard behaviors if needed.

    We have some conventions to follow.
    1. Experiment should have ``model``, ``optimizer``, ``train_loader`` and
    ``valid_loader``, ``config`` and ``args`` attributes.
    2. The config should have a ``training`` field, which has
    ``valid_interval``, ``save_interval`` and ``max_iteration`` keys. It is
    used as the trigger to invoke validation, checkpointing and stop of the
    experiment.
    3. There are four methods, namely ``train_batch``, ``valid``,
    ``setup_model`` and ``setup_dataloader`` that should be implemented.

    Feel free to add/overwrite other methods and standalone functions if you
    need.

    Args:
        config (yacs.config.CfgNode): 
            The configuration used for the experiment.
        args (argparse.Namespace): 
            The parsed command line arguments.

    Examples:
        >>> def main_sp(config, args):
        >>>     exp = Experiment(config, args)
        >>>     exp.setup()
        >>>     exe.resume_or_load()
        >>>     exp.run()
        >>>
        >>> config = get_cfg_defaults()
        >>> parser = default_argument_parser()
        >>> args = parser.parse_args()
        >>> if args.config:
        >>>     config.merge_from_file(args.config)
        >>> if args.opts:
        >>>     config.merge_from_list(args.opts)
        >>> config.freeze()
        >>>
        >>> if args.ngpu > 1:
        >>>     dist.spawn(main_sp, args=(config, args), nprocs=args.ngpu)
        >>> else:
        >>>     main_sp(config, args)
    """

    def __init__(self, config, args):
        self.config = config
        self.args = args

        self.model = None
        self.optimizer = None
        self.iteration = 0
        self.epoch = 0
        self.train_loader = None
        self.valid_loader = None
        self.iterator = None
        self.logger = None
        self.visualizer = None
        self.output_dir = None
        self.checkpoint_dir = None

    def setup(self):
        """Setup the experiment.
        """
        if self.args.ngpu == 0:
            paddle.set_device("cpu")
        elif self.args.ngpu > 0:
            paddle.set_device("gpu")
        else:
            print("ngpu should >= 0 !")
        if self.parallel:
            self.init_parallel()

        self.setup_output_dir()
        self.dump_config()
        self.setup_visualizer()
        self.setup_logger()
        self.setup_checkpointer()

        self.setup_dataloader()
        self.setup_model()

        self.iteration = 0
        self.epoch = 0

    @property
    def parallel(self):
        """A flag indicating whether the experiment should run with
        multiprocessing.
        """
        return self.args.ngpu > 1

    def init_parallel(self):
        """Init environment for multiprocess training.
        """
        dist.init_parallel_env()

    @mp_tools.rank_zero_only
    def save(self):
        """Save checkpoint (model parameters and optimizer states).
        """
        checkpoint.save_parameters(self.checkpoint_dir, self.iteration,
                                   self.model, self.optimizer)

    def resume_or_load(self):
        """Resume from latest checkpoint at checkpoints in the output
        directory or load a specified checkpoint.

        If ``args.checkpoint_path`` is not None, load the checkpoint, else
        resume training.
        """
        iteration = checkpoint.load_parameters(
            self.model,
            self.optimizer,
            checkpoint_dir=self.checkpoint_dir,
            checkpoint_path=self.args.checkpoint_path)
        self.iteration = iteration

    def read_batch(self):
        """Read a batch from the train_loader.

        Returns
        -------
        List[Tensor]
            A batch.
        """
        try:
            batch = next(self.iterator)
        except StopIteration:
            self.new_epoch()
            batch = next(self.iterator)
        return batch

    def new_epoch(self):
        """Reset the train loader and increment ``epoch``.
        """
        self.epoch += 1
        if self.parallel and isinstance(self.train_loader.batch_sampler,
                                        DistributedBatchSampler):
            self.train_loader.batch_sampler.set_epoch(self.epoch)
        self.iterator = iter(self.train_loader)

    def train(self):
        """The training process.

        It includes forward/backward/update and periodical validation and
        saving.
        """
        self.new_epoch()
        while self.iteration < self.config.training.max_iteration:
            self.iteration += 1
            self.train_batch()

            if self.iteration % self.config.training.valid_interval == 0:
                self.valid()

            if self.iteration % self.config.training.save_interval == 0:
                self.save()

    def run(self):
        """The routine of the experiment after setup. This method is intended
        to be used by the user.
        """
        try:
            self.train()
        except KeyboardInterrupt as exception:
            # delete this, because it can not save a complete model
            # self.save()
            self.close()
            sys.exit(exception)
        finally:
            self.close()

    def setup_output_dir(self):
        """Create a directory used for output.
        """
        # output dir
        output_dir = Path(self.args.output).expanduser()
        output_dir.mkdir(parents=True, exist_ok=True)

        self.output_dir = output_dir

    def setup_checkpointer(self):
        """Create a directory used to save checkpoints into.

        It is "checkpoints" inside the output directory.
        """
        # checkpoint dir
        checkpoint_dir = self.output_dir / "checkpoints"
        checkpoint_dir.mkdir(exist_ok=True)

        self.checkpoint_dir = checkpoint_dir

    @mp_tools.rank_zero_only
    def close(self):
        """Close visualizer to avoid hanging after training"""
        # https://github.com/pytorch/fairseq/issues/2357
        self.visualizer.close()

    @mp_tools.rank_zero_only
    def setup_visualizer(self):
        """Initialize a visualizer to log the experiment.

        The visual log is saved in the output directory.

        Notes
        ------
        Only the main process has a visualizer with it. Use multiple
        visualizers in multiprocess to write to a same log file may cause
        unexpected behaviors.
        """
        # visualizer
        visualizer = LogWriter(logdir=str(self.output_dir))

        self.visualizer = visualizer

    def setup_logger(self):
        """Initialize a text logger to log the experiment.

        Each process has its own text logger. The logging message is write to
        the standard output and a text file named ``worker_n.log`` in the
        output directory, where ``n`` means the rank of the process.
        """
        logger = logging.getLogger(__name__)
        logger.setLevel("INFO")
        log_file = self.output_dir / 'worker_{}.log'.format(dist.get_rank())
        logger.addHandler(logging.FileHandler(str(log_file)))

        self.logger = logger

    @mp_tools.rank_zero_only
    def dump_config(self):
        """Save the configuration used for this experiment.

        It is saved in to ``config.yaml`` in the output directory at the
        beginning of the experiment.
        """
        with open(self.output_dir / "config.yaml", 'wt') as f:
            print(self.config, file=f)

    def train_batch(self):
        """The training loop. A subclass should implement this method.
        """
        raise NotImplementedError("train_batch should be implemented.")

    @mp_tools.rank_zero_only
    @paddle.no_grad()
    def valid(self):
        """The validation. A subclass should implement this method.
        """
        raise NotImplementedError("valid should be implemented.")

    def setup_model(self):
        """Setup model, criterion and optimizer, etc. A subclass should
        implement this method.
        """
        raise NotImplementedError("setup_model should be implemented.")

    def setup_dataloader(self):
        """Setup training dataloader and validation dataloader. A subclass
        should implement this method.
        """
        raise NotImplementedError("setup_dataloader should be implemented.")


================================================
FILE: paddlespeech/t2s/training/extension.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
from typing import Callable

PRIORITY_WRITER = 300
PRIORITY_EDITOR = 200
PRIORITY_READER = 100


class Extension(object):
    """Extension to customize the behavior of Trainer."""
    trigger = (1, 'iteration')
    priority = PRIORITY_READER
    name = None

    @property
    def default_name(self):
        """Default name of the extension, class name by default."""
        return type(self).__name__

    def __call__(self, trainer):
        """Main action of the extention. After each update, it is executed
        when the trigger fires."""
        raise NotImplementedError(
            'Extension implementation must override __call__.')

    def initialize(self, trainer):
        """Action that is executed once to get the corect trainer state.
        It is called before training normally, but if the trainer restores
        states with an Snapshot extension, this method should also be called.g
        """
        pass

    def on_error(self, trainer, exc, tb):
        """Handles the error raised during training before finalization.
        """
        pass

    def finalize(self, trainer):
        """Action that is executed when training is done.
        For example, visualizers would need to be closed.
        """
        pass


def make_extension(trigger: Callable=None,
                   default_name: str=None,
                   priority: int=None,
                   finalizer: Callable=None,
                   initializer: Callable=None,
                   on_error: Callable=None):
    """Make an Extension-like object by injecting required attributes to it.
    """
    if trigger is None:
        trigger = Extension.trigger
    if priority is None:
        priority = Extension.priority

    def decorator(ext):
        ext.trigger = trigger
        ext.default_name = default_name or ext.__name__
        ext.priority = priority
        ext.finalize = finalizer
        ext.on_error = on_error
        ext.initialize = initializer
        return ext

    return decorator


================================================
FILE: paddlespeech/t2s/training/extensions/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/training/extensions/evaluator.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
from typing import Dict

import paddle
from paddle.io import DataLoader
from paddle.nn import Layer

from paddlespeech.t2s.training import extension
from paddlespeech.t2s.training.reporter import DictSummary
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.reporter import scope


class StandardEvaluator(extension.Extension):

    trigger = (1, 'epoch')
    default_name = 'validation'
    priority = extension.PRIORITY_WRITER

    name = None

    def __init__(self, model: Layer, dataloader: DataLoader):
        # it is designed to hold multiple models
        models = {"main": model}
        self.models: Dict[str, Layer] = models
        self.model = model

        # dataloaders
        self.dataloader = dataloader

    def evaluate_core(self, batch):
        # compute
        self.model(batch)  # you may report here

    def evaluate(self):
        # switch to eval mode
        for layer in self.models.values():
            layer.eval()

        # to average evaluation metrics
        summary = DictSummary()
        for batch in self.dataloader:
            observation = {}
            with scope(observation):
                # main evaluation computation here.
                with paddle.no_grad():
                    self.evaluate_core(batch)
            summary.add(observation)
        summary = summary.compute_mean()
        return summary

    def __call__(self, trainer=None):
        # evaluate and report the averaged metric to current observation
        # if it is used to extend a trainer, the metrics is reported to
        # to observation of the trainer
        # or otherwise, you can use your own observation
        summary = self.evaluate()
        for k, v in summary.items():
            report(k, v)


================================================
FILE: paddlespeech/t2s/training/extensions/snapshot.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
import logging
import os
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import Dict
from typing import List

import jsonlines

from paddlespeech.t2s.training import extension
from paddlespeech.t2s.training.trainer import Trainer
from paddlespeech.t2s.utils.mp_tools import rank_zero_only


def load_records(records_fp):
    """Load record files (json lines.)"""
    with jsonlines.open(records_fp, 'r') as reader:
        records = list(reader)
    return records


class Snapshot(extension.Extension):
    """An extension to make snapshot of the updater object inside
    the trainer. It is done by calling the updater's `save` method.

    An Updater save its state_dict by default, which contains the
    updater state, (i.e. epoch and iteration) and all the model
    parameters and optimizer states. If the updater inside the trainer
    subclasses StandardUpdater, everything is good to go.

    Arsg:
        checkpoint_dir (Union[str, Path]): The directory to save checkpoints into.
    """

    trigger = (1, 'epoch')
    priority = -100
    default_name = "snapshot"

    def __init__(self, max_size: int=5, snapshot_on_error: bool=False):
        self.records: List[Dict[str, Any]] = []
        self.max_size = max_size
        self._snapshot_on_error = snapshot_on_error
        self._save_all = (max_size == -1)
        self.checkpoint_dir = None

    def initialize(self, trainer: Trainer):
        """Setting up this extention."""
        self.checkpoint_dir = trainer.out / "checkpoints"

        # load existing records
        record_path: Path = self.checkpoint_dir / "records.jsonl"
        if record_path.exists():
            logging.debug("Loading from an existing checkpoint dir")
            self.records = load_records(record_path)
            trainer.updater.load(self.records[-1]['path'])

    def on_error(self, trainer, exc, tb):
        if self._snapshot_on_error:
            self.save_checkpoint_and_update(trainer)

    def __call__(self, trainer: Trainer):
        self.save_checkpoint_and_update(trainer)

    def full(self):
        """Whether the number of snapshots it keeps track of is greater
        than the max_size."""
        return (not self._save_all) and len(self.records) > self.max_size

    @rank_zero_only
    def save_checkpoint_and_update(self, trainer: Trainer):
        """Saving new snapshot and remove the oldest snapshot if needed."""
        iteration = trainer.updater.state.iteration
        path = self.checkpoint_dir / f"snapshot_iter_{iteration}.pdz"

        # add the new one
        trainer.updater.save(path)
        record = {
            "time": str(datetime.now()),
            'path': str(path.resolve()),  # use absolute path
            'iteration': iteration
        }
        self.records.append(record)

        # remove the earist
        if self.full():
            eariest_record = self.records[0]
            os.remove(eariest_record["path"])
            self.records.pop(0)

        # update the record file
        record_path = self.checkpoint_dir / "records.jsonl"
        with jsonlines.open(record_path, 'w') as writer:
            for record in self.records:
                # jsonlines.open may return a Writer or a Reader
                writer.write(record)  # pylint: disable=no-member


================================================
FILE: paddlespeech/t2s/training/extensions/visualizer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from visualdl import LogWriter

from paddlespeech.t2s.training import extension
from paddlespeech.t2s.training.trainer import Trainer


class VisualDL(extension.Extension):
    """A wrapper of visualdl log writer. It assumes that the metrics to be visualized
    are all scalars which are recorded into the `.observation` dictionary of the
    trainer object. The dictionary is created for each step, thus the visualdl log
    writer uses the iteration from the updater's `iteration` as the global step to
    add records.
    """
    trigger = (1, 'iteration')
    default_name = 'visualdl'
    priority = extension.PRIORITY_READER

    def __init__(self, logdir):
        self.writer = LogWriter(str(logdir))

    def __call__(self, trainer: Trainer):
        for k, v in trainer.observation.items():
            self.writer.add_scalar(k, v, step=trainer.updater.state.iteration)

    def finalize(self, trainer):
        self.writer.close()


================================================
FILE: paddlespeech/t2s/training/optimizer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle import nn

scheduler_classes = dict(
    ReduceOnPlateau=paddle.optimizer.lr.ReduceOnPlateau,
    lambda_decay=paddle.optimizer.lr.LambdaDecay,
    step_decay=paddle.optimizer.lr.StepDecay,
    multistep_decay=paddle.optimizer.lr.MultiStepDecay,
    exponential_decay=paddle.optimizer.lr.ExponentialDecay,
    CosineAnnealingDecay=paddle.optimizer.lr.CosineAnnealingDecay, )

optim_classes = dict(
    adadelta=paddle.optimizer.Adadelta,
    adagrad=paddle.optimizer.Adagrad,
    adam=paddle.optimizer.Adam,
    adamax=paddle.optimizer.Adamax,
    adamw=paddle.optimizer.AdamW,
    lamb=paddle.optimizer.Lamb,
    momentum=paddle.optimizer.Momentum,
    rmsprop=paddle.optimizer.RMSProp,
    sgd=paddle.optimizer.SGD, )


def build_optimizers(
        model: nn.Layer,
        optim='adadelta',
        max_grad_norm=None,
        learning_rate=0.01,
        weight_decay=None,
        epsilon=1.0e-6, ) -> paddle.optimizer:
    optim_class = optim_classes.get(optim)
    if optim_class is None:
        raise ValueError(f"must be one of {list(optim_classes)}: {optim}")
    else:
        grad_clip = None
        if max_grad_norm:
            grad_clip = paddle.nn.ClipGradByGlobalNorm(max_grad_norm)
        optim_dict = {}
        optim_dict['parameters'] = model.parameters()
        optim_dict['learning_rate'] = learning_rate
        optim_dict['grad_clip'] = grad_clip
        optim_dict['weight_decay'] = weight_decay
        if optim_class not in {'momentum', 'sgd'}:
            optim_dict['epsilon'] = epsilon
        optimizers = optim_class(**optim_dict)

    return optimizers


================================================
FILE: paddlespeech/t2s/training/reporter.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
import contextlib
import math
from collections import defaultdict

OBSERVATIONS = None


@contextlib.contextmanager
def scope(observations):
    # make `observation` the target to report to.
    # it is basically a dictionary that stores temporary observations
    global OBSERVATIONS
    old = OBSERVATIONS
    OBSERVATIONS = observations

    try:
        yield
    finally:
        OBSERVATIONS = old


def get_observations():
    global OBSERVATIONS
    return OBSERVATIONS


def report(name, value):
    # a simple function to report named value
    # you can use it everywhere, it will get the default target and writ to it
    # you can think of it as std.out
    observations = get_observations()
    if observations is None:
        return
    else:
        observations[name] = value


class Summary(object):
    """Online summarization of a sequence of scalars.
    Summary computes the statistics of given scalars online.
    """

    def __init__(self):
        self._x = 0.0
        self._x2 = 0.0
        self._n = 0

    def add(self, value, weight=1):
        """Adds a scalar value.

        Args:
            value: Scalar value to accumulate. It is either a NumPy scalar or
                a zero-dimensional array (on CPU or GPU).
            weight: An optional weight for the value. It is a NumPy scalar or
                a zero-dimensional array (on CPU or GPU).
                Default is 1 (integer).

        """
        self._x += weight * value
        self._x2 += weight * value * value
        self._n += weight

    def compute_mean(self):
        """Computes the mean."""
        x, n = self._x, self._n
        return x / n

    def make_statistics(self):
        """Computes and returns the mean and standard deviation values.

        Returns:
            tuple: Mean and standard deviation values.

        """
        x, n = self._x, self._n
        mean = x / n
        var = self._x2 / n - mean * mean
        std = math.sqrt(var)
        return mean, std


class DictSummary(object):
    """Online summarization of a sequence of dictionaries.

    ``DictSummary`` computes the statistics of a given set of scalars online.
    It only computes the statistics for scalar values and variables of scalar
    values in the dictionaries.

    """

    def __init__(self):
        self._summaries = defaultdict(Summary)

    def add(self, d):
        """Adds a dictionary of scalars.

        Args:
            d (dict): Dictionary of scalars to accumulate. Only elements of
               scalars, zero-dimensional arrays, and variables of
               zero-dimensional arrays are accumulated. When the value
               is a tuple, the second element is interpreted as a weight.

        """
        summaries = self._summaries
        for k, v in d.items():
            w = 1
            if isinstance(v, tuple):
                w = v[1]
                v = v[0]
            summaries[k].add(v, weight=w)

    def compute_mean(self):
        """Creates a dictionary of mean values.

        It returns a single dictionary that holds a mean value for each entry
        added to the summary.

        Returns:
            dict: Dictionary of mean values.

        """
        return {
            name: summary.compute_mean()
            for name, summary in self._summaries.items()
        }

    def make_statistics(self):
        """Creates a dictionary of statistics.

        It returns a single dictionary that holds mean and standard deviation
        values for every entry added to the summary. For an entry of name
        ``'key'``, these values are added to the dictionary by names ``'key'``
        and ``'key.std'``, respectively.

        Returns:
            dict: Dictionary of statistics of all entries.

        """
        stats = {}
        for name, summary in self._summaries.items():
            mean, std = summary.make_statistics()
            stats[name] = mean
            stats[name + '.std'] = std

        return stats


================================================
FILE: paddlespeech/t2s/training/seeding.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import random

import numpy as np
import paddle


def seed_everything(seed: int):
    """Seed paddle, random and np.random to help reproductivity."""
    paddle.seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    logging.debug(f"Set the seed of paddle, random, np.random to {seed}.")


================================================
FILE: paddlespeech/t2s/training/trainer.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import traceback
from collections import OrderedDict
from pathlib import Path
from typing import Callable
from typing import List
from typing import Union

import six
import paddle
from paddlespeech.t2s.training.extension import Extension
from paddlespeech.t2s.training.extension import PRIORITY_READER
from paddlespeech.t2s.training.reporter import scope
from paddlespeech.t2s.training.trigger import get_trigger
from paddlespeech.t2s.training.triggers.limit_trigger import LimitTrigger
from paddlespeech.t2s.training.updater import UpdaterBase
from paddlespeech.t2s.utils import profiler


class _ExtensionEntry(object):
    def __init__(self, extension, trigger, priority):
        self.extension = extension
        self.trigger = trigger
        self.priority = priority


class Trainer(object):
    def __init__(self,
                 updater: UpdaterBase,
                 stop_trigger: Callable=None,
                 out: Union[str, Path]='result',
                 extensions: List[Extension]=None,
                 profiler_options: str=None):
        self.updater = updater
        self.extensions = OrderedDict()
        self.stop_trigger = LimitTrigger(*stop_trigger)
        self.out = Path(out)
        self.observation = None
        self.profiler_options = profiler_options
        self._done = False
        if extensions:
            for ext in extensions:
                self.extend(ext)

    @property
    def is_before_training(self):
        return self.updater.state.iteration == 0

    def extend(self, extension, name=None, trigger=None, priority=None):
        # get name for the extension
        # argument \
        # -> extention's name \
        # -> default_name (class name, when it is an object) \
        # -> function name when it is a function \
        # -> error

        if name is None:
            name = getattr(extension, 'name', None)
            if name is None:
                name = getattr(extension, 'default_name', None)
                if name is None:
                    name = getattr(extension, '__name__', None)
                    if name is None:
                        raise ValueError("Name is not given for the extension.")
        if name == 'training':
            raise ValueError("training is a reserved name.")

        if trigger is None:
            trigger = getattr(extension, 'trigger', (1, 'iteration'))
        trigger = get_trigger(trigger)

        if priority is None:
            priority = getattr(extension, 'priority', PRIORITY_READER)

        # add suffix to avoid nameing conflict
        ordinal = 0
        modified_name = name
        while modified_name in self.extensions:
            ordinal += 1
            modified_name = f"{name}_{ordinal}"
        extension.name = modified_name

        self.extensions[modified_name] = _ExtensionEntry(extension, trigger,
                                                         priority)

    def get_extension(self, name):
        """get extension by name."""
        extensions = self.extensions
        if name in extensions:
            return extensions[name].extension
        else:
            raise ValueError(f'extension {name} not found')

    def run(self):
        if self._done:
            raise RuntimeError("Training is already done!.")

        self.out.mkdir(parents=True, exist_ok=True)

        # sort extensions by priorities once
        extension_order = sorted(
            self.extensions.keys(),
            key=lambda name: self.extensions[name].priority,
            reverse=True)
        extensions = [(name, self.extensions[name]) for name in extension_order]

        # initializing all extensions
        for name, entry in extensions:
            if hasattr(entry.extension, "initialize"):
                entry.extension.initialize(self)

        update = self.updater.update  # training step

        stop_trigger = self.stop_trigger

        # display only one progress bar
        max_iteration = None
        if isinstance(stop_trigger, LimitTrigger):
            if stop_trigger.unit == 'epoch':
                max_epoch = self.stop_trigger.limit
                updates_per_epoch = getattr(self.updater, "updates_per_epoch",
                                            None)
                max_iteration = max_epoch * updates_per_epoch if updates_per_epoch else None
            else:
                max_iteration = self.stop_trigger.limit

        try:
            while not stop_trigger(self):
                self.observation = {}
                # set observation as the report target
                # you can use report freely in Updater.update()

                # updating parameters and state
                with scope(self.observation):

                    update()
                    if self.profiler_options:
                        profiler.add_profiler_step(self.profiler_options)
                    batch_read_time = self.updater.batch_read_time
                    batch_time = self.updater.batch_time
                    avg_batch_cost = batch_read_time + batch_time
                    logger = self.updater.logger
                    logger.removeHandler(self.updater.filehandler)
                    msg = self.updater.msg
                    msg = " iter: {}/{}, ".format(self.updater.state.iteration,
                                                  max_iteration) + msg
                    msg += ", avg_reader_cost: {:.5f} sec, ".format(
                        batch_read_time
                    ) + "avg_batch_cost: {:.5f} sec, ".format(avg_batch_cost)
                    msg += "avg_samples: {}, ".format(
                        self.updater.
                        batch_size) + "avg_ips: {:.5f} sequences/sec,".format(
                            self.updater.batch_size / avg_batch_cost)
                    if paddle.device.is_compiled_with_cuda():
                        max_mem_reserved_str = f" max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB"
                        max_mem_allocated_str = f" max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
                        msg += max_mem_reserved_str + "," + max_mem_allocated_str

                    logger.info(msg)

                    # execute extension when necessary
                    for name, entry in extensions:
                        if entry.trigger(self):
                            entry.extension(self)

                # print("###", self.observation)
        except Exception as e:
            f = sys.stderr
            f.write(f"Exception in main training loop: {e}\n")
            f.write("Traceback (most recent call last):\n")
            traceback.print_tb(sys.exc_info()[2])
            f.write(
                "Trainer extensions will try to handle the extension. Then all extensions will finalize."
            )

            # capture the exception in the mian training loop
            exc_info = sys.exc_info()

            # try to handle it
            for name, entry in extensions:
                if hasattr(entry.extension, "on_error"):
                    try:
                        entry.extension.on_error(self, e, sys.exc_info()[2])
                    except Exception as ee:
                        f.write(f"Exception in error handler: {ee}\n")
                        f.write('Traceback (most recent call last):\n')
                        traceback.print_tb(sys.exc_info()[2])

            # raise exception in main training loop
            six.reraise(*exc_info)
        finally:
            for name, entry in extensions:
                if hasattr(entry.extension, "finalize"):
                    entry.extension.finalize(self)


================================================
FILE: paddlespeech/t2s/training/trigger.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddlespeech.t2s.training.triggers.interval_trigger import IntervalTrigger


def never_fail_trigger(trainer):
    return False


def get_trigger(trigger):
    if trigger is None:
        return never_fail_trigger
    if callable(trigger):
        return trigger
    else:
        trigger = IntervalTrigger(*trigger)
        return trigger


================================================
FILE: paddlespeech/t2s/training/triggers/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/training/triggers/interval_trigger.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Reference chainer MIT (https://opensource.org/licenses/MIT)


class IntervalTrigger(object):
    """A Predicate to do something every N cycle."""

    def __init__(self, period: int, unit: str):
        if unit not in ("iteration", "epoch"):
            raise ValueError("unit should be 'iteration' or 'epoch'")
        if period <= 0:
            raise ValueError("period should be a positive integer.")
        self.period = period
        self.unit = unit
        self.last_index = None

    def __call__(self, trainer):
        if self.last_index is None:
            last_index = getattr(trainer.updater.state, self.unit)
            self.last_index = last_index

        last_index = self.last_index
        index = getattr(trainer.updater.state, self.unit)
        fire = index // self.period != last_index // self.period

        self.last_index = index
        return fire


================================================
FILE: paddlespeech/t2s/training/triggers/limit_trigger.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Reference chainer MIT (https://opensource.org/licenses/MIT)


class LimitTrigger(object):
    """A Predicate to decide whether to stop."""

    def __init__(self, limit: int, unit: str):
        if unit not in ("iteration", "epoch"):
            raise ValueError("unit should be 'iteration' or 'epoch'")
        if limit <= 0:
            raise ValueError("limit should be a positive integer.")
        self.limit = limit
        self.unit = unit

    def __call__(self, trainer):
        state = trainer.updater.state
        index = getattr(state, self.unit)
        fire = index >= self.limit
        return fire


================================================
FILE: paddlespeech/t2s/training/triggers/time_trigger.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Reference chainer MIT (https://opensource.org/licenses/MIT)


class TimeTrigger(object):
    """Trigger based on a fixed time interval.

    This trigger accepts iterations with a given interval time.

    Args:
        period (float): Interval time. It is given in seconds.

    """

    def __init__(self, period):
        self._period = period
        self._next_time = self._period

    def __call__(self, trainer):
        if self._next_time < trainer.elapsed_time:
            self._next_time += self._period
            return True
        else:
            return False


================================================
FILE: paddlespeech/t2s/training/updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
import logging
from dataclasses import dataclass

import paddle


@dataclass
class UpdaterState:
    iteration: int = 0
    epoch: int = 0


class UpdaterBase(object):
    """An updater is the abstraction of how a model is trained given the
    dataloader and the optimizer.

    The `update_core` method is a step in the training loop with only necessary
    operations (get a batch, forward and backward, update the parameters).

    Other stuffs are made extensions. Visualization, saving, loading and
    periodical validation and evaluation are not considered here.

    But even in such simplist case, things are not that simple. There is an
    attempt to standardize this process and requires only the model and
    dataset and do all the stuffs automatically. But this may hurt flexibility.

    If we assume a batch yield from the dataloader is just the input to the
    model, we will find that some model requires more arguments, or just some
    keyword arguments. But this prevents us from over-simplifying it.

    From another perspective, the batch may includes not just the input, but
    also the target. But the model's forward method may just need the input.
    We can pass a dict or a super-long tuple to the model and let it pick what
    it really needs. But this is an abuse of lazy interface.

    After all, we care about how a model is trained. But just how the model is
    used for inference. We want to control how a model is trained. We just
    don't want to be messed up with other auxiliary code.

    So the best practice is to define a model and define a updater for it.
    """

    def __init__(self, init_state=None):
        if init_state is None:
            self.state = UpdaterState()
        else:
            self.state = init_state

    def update(self, batch):
        raise NotImplementedError(
            "Implement your own `update` method for training a step.")

    def state_dict(self):
        state_dict = {
            "epoch": self.state.epoch,
            "iteration": self.state.iteration,
        }
        return state_dict

    def set_state_dict(self, state_dict):
        self.state.epoch = state_dict["epoch"]
        self.state.iteration = state_dict["iteration"]

    def save(self, path):
        logging.debug(f"Saving to {path}.")
        archive = self.state_dict()
        paddle.save(archive, str(path))

    def load(self, path):
        logging.debug(f"Loading from {path}.")
        archive = paddle.load(str(path))
        self.set_state_dict(archive)


================================================
FILE: paddlespeech/t2s/training/updaters/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/t2s/training/updaters/standard_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from chainer(https://github.com/chainer/chainer)
import logging
import time
from typing import Dict
from typing import Optional

from paddle import Tensor
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from timer import timer

from paddlespeech.t2s.datasets.sampler import ErnieSATSampler
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updater import UpdaterBase
from paddlespeech.t2s.training.updater import UpdaterState


class StandardUpdater(UpdaterBase):
    """An example of over-simplification. Things may not be that simple, but
    you can subclass it to fit your need.
    """

    def __init__(self,
                 model: Layer,
                 optimizer: Optimizer,
                 dataloader: DataLoader,
                 init_state: Optional[UpdaterState]=None):
        # it is designed to hold multiple models
        models = {"main": model}
        self.models: Dict[str, Layer] = models
        self.model = model

        # it is designed to hold multiple optimizers
        optimizers = {"main": optimizer}
        self.optimizer = optimizer
        self.optimizers: Dict[str, Optimizer] = optimizers

        # dataloaders
        self.dataloader = dataloader

        # init state
        if init_state is None:
            self.state = UpdaterState()
        else:
            self.state = init_state

        self.train_iterator = iter(dataloader)
        self.batch_read_time = 0
        self.batch_time = 0

    def update(self):
        # We increase the iteration index after updating and before extension.
        # Here are the reasons.

        # 0. Snapshotting(as well as other extensions, like visualizer) is
        #    executed after a step of updating;
        # 1. We decide to increase the iteration index after updating and
        #    before any all extension is executed. 
        # 3. We do not increase the iteration after extension because we
        #    prefer a consistent resume behavior, when load from a
        #    `snapshot_iter_100.pdz` then the next step to train is `101`,
        #    naturally. But if iteration is increased increased after
        #    extension(including snapshot), then, a `snapshot_iter_99` is
        #    loaded. You would need a extra increasing of the iteration idex
        #    before training to avoid another iteration `99`, which has been
        #    done before snapshotting.
        # 4. Thus iteration index represrnts "currently how mant epochs has
        #    been done."
        # NOTE: use report to capture the correctly value. If you want to
        # report the learning rate used for a step, you must report it before
        # the learning rate scheduler's step() has been called. In paddle's
        # convention, we do not use an extension to change the learning rate.
        # so if you want to report it, do it in the updater.

        # Then here comes the next question. When is the proper time to
        # increase the epoch index? Since all extensions are executed after
        # updating, it is the time that after updating is the proper time to
        # increase epoch index.
        # 1. If we increase the epoch index before updating, then an extension
        #    based ot epoch would miss the correct timing. It could only be
        #    triggerd after an extra updating.
        # 2. Theoretically, when an epoch is done, the epoch index should be
        #    increased. So it would be increase after updating.
        # 3. Thus, eppoch index represents "currently how many epochs has been
        #    done." So it starts from 0.

        # switch to training mode
        for layer in self.models.values():
            layer.train()

        # training for a step is implemented here
        time_before_read = time.time()
        batch = self.read_batch()
        time_before_core = time.time()
        self.update_core(batch)
        self.batch_time = time.time() - time_before_core
        self.batch_read_time = time_before_core - time_before_read
        if isinstance(batch, dict):
            self.batch_size = len(list(batch.items())[0][-1])
        # for pwg
        elif isinstance(batch, list):
            self.batch_size = batch[0].shape[0]

        self.state.iteration += 1
        if self.updates_per_epoch is not None:
            if self.state.iteration % self.updates_per_epoch == 0:
                self.state.epoch += 1

    def update_core(self, batch):
        """A simple case for a training step. Basic assumptions are:
        Single model;
        Single optimizer;
        A batch from the dataloader is just the input of the model;
        The model return a single loss, or a dict containing serval losses.
        Parameters updates at every batch, no gradient accumulation.
        """
        loss = self.model(*batch)

        if isinstance(loss, Tensor):
            loss_dict = {"main": loss}
        else:
            # Dict[str, Tensor]
            loss_dict = loss
            if "main" not in loss_dict:
                main_loss = 0
                for loss_item in loss.values():
                    main_loss += loss_item
                loss_dict["main"] = main_loss

        for name, loss_item in loss_dict.items():
            report(name, float(loss_item))

        self.optimizer.clear_gradient()
        loss_dict["main"].backward()
        self.optimizer.update()

    @property
    def updates_per_epoch(self):
        """Number of updater per epoch, determined by the length of the
        dataloader."""
        length_of_dataloader = None
        try:
            length_of_dataloader = len(self.dataloader)
        except TypeError:
            logging.debug("This dataloader has no __len__.")
        finally:
            return length_of_dataloader

    def new_epoch(self):
        """Start a new epoch."""
        # NOTE: all batch sampler for distributed training should
        # subclass DistributedBatchSampler and implement `set_epoch` method
        batch_sampler = self.dataloader.batch_sampler
        if isinstance(batch_sampler, DistributedBatchSampler) \
                or isinstance(batch_sampler, ErnieSATSampler):
            batch_sampler.set_epoch(self.state.epoch)
        self.train_iterator = iter(self.dataloader)

    def read_batch(self):
        """Read a batch from the data loader, auto renew when data is exhausted."""
        with timer() as t:
            try:
                batch = next(self.train_iterator)
            except StopIteration:
                self.new_epoch()
                batch = next(self.train_iterator)
            logging.debug(
                f"Read a batch takes {t.elapse}s.")  # replace it with logging
        return batch

    def state_dict(self):
        """State dict of a Updater, model, optimizer and updater state are included."""
        state_dict = super().state_dict()
        for name, layer in self.models.items():
            state_dict[f"{name}_params"] = layer.state_dict()
        for name, optim in self.optimizers.items():
            state_dict[f"{name}_optimizer"] = optim.state_dict()
        return state_dict

    def set_state_dict(self, state_dict):
        """Set state dict for a Updater. Parameters of models, states for
        optimizers and UpdaterState are restored."""
        for name, layer in self.models.items():
            layer.set_state_dict(state_dict[f"{name}_params"])
        for name, optim in self.optimizers.items():
            optim.set_state_dict(state_dict[f"{name}_optimizer"])
        super().set_state_dict(state_dict)


================================================
FILE: paddlespeech/t2s/utils/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import checkpoint
from . import display
from . import layer_tools
from . import mp_tools
from . import scheduler


def str2bool(str):
    return True if str.lower() == 'true' else False


================================================
FILE: paddlespeech/t2s/utils/checkpoint.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

import paddle
from paddle import distributed as dist

from paddlespeech.t2s.utils import mp_tools

__all__ = ["load_parameters", "save_parameters"]


def _load_latest_checkpoint(checkpoint_dir: str) -> int:
    """Get the iteration number corresponding to the latest saved checkpoint.

    Args:
        checkpoint_dir (str):
            the directory where checkpoint is saved.

    Returns:
        int: the latest iteration number.
    """
    checkpoint_record = os.path.join(checkpoint_dir, "checkpoint")
    if (not os.path.isfile(checkpoint_record)):
        return 0

    # Fetch the latest checkpoint index.
    with open(checkpoint_record, "rt") as handle:
        latest_checkpoint = handle.readline().split()[-1]
        iteration = int(latest_checkpoint.split("-")[-1])

    return iteration


def _save_checkpoint(checkpoint_dir: str, iteration: int):
    """Save the iteration number of the latest model to be checkpointed.

    Args:
        checkpoint_dir (str): 
            the directory where checkpoint is saved.
        iteration (int): 
            the latest iteration number.

    Returns:
        None
    """
    checkpoint_record = os.path.join(checkpoint_dir, "checkpoint")
    # Update the latest checkpoint index.
    with open(checkpoint_record, "wt") as handle:
        handle.write("model_checkpoint_path: step-{}".format(iteration))


def load_parameters(model,
                    optimizer=None,
                    checkpoint_dir=None,
                    checkpoint_path=None):
    """Load a specific model checkpoint from disk.

    Args:
        model (Layer): 
            model to load parameters.
        optimizer (Optimizer, optional): 
            optimizer to load states if needed. Defaults to None.
        checkpoint_dir (str, optional): 
            the directory where checkpoint is saved.
        checkpoint_path (str, optional): 
            if specified, load the checkpoint
            stored in the checkpoint_path and the argument 'checkpoint_dir' will
            be ignored. Defaults to None.

    Returns:
        iteration (int): number of iterations that the loaded checkpoint has 
            been trained.
    """
    if checkpoint_path is not None:
        iteration = int(os.path.basename(checkpoint_path).split("-")[-1])
    elif checkpoint_dir is not None:
        iteration = _load_latest_checkpoint(checkpoint_dir)
        if iteration == 0:
            return iteration
        checkpoint_path = os.path.join(checkpoint_dir,
                                       "step-{}".format(iteration))
    else:
        raise ValueError(
            "At least one of 'checkpoint_dir' and 'checkpoint_path' should be specified!"
        )

    local_rank = dist.get_rank()

    params_path = checkpoint_path + ".pdparams"
    model_dict = paddle.load(params_path)
    model.set_state_dict(model_dict)
    print("[checkpoint] Rank {}: loaded model from {}".format(local_rank,
                                                              params_path))

    optimizer_path = checkpoint_path + ".pdopt"
    if optimizer and os.path.isfile(optimizer_path):
        optimizer_dict = paddle.load(optimizer_path)
        optimizer.set_state_dict(optimizer_dict)
        print("[checkpoint] Rank {}: loaded optimizer state from {}".format(
            local_rank, optimizer_path))

    return iteration


@mp_tools.rank_zero_only
def save_parameters(checkpoint_dir, iteration, model, optimizer=None):
    """Checkpoint the latest trained model parameters.

    Args:
        checkpoint_dir (str): 
            the directory where checkpoint is saved.
        iteration (int): 
            the latest iteration number.
        model (Layer): 
            model to be checkpointed.
        optimizer (Optimizer, optional): 
            optimizer to be checkpointed. Defaults to None.

    Returns:
        None
    """
    checkpoint_path = os.path.join(checkpoint_dir, "step-{}".format(iteration))

    model_dict = model.state_dict()
    params_path = checkpoint_path + ".pdparams"
    paddle.save(model_dict, params_path)
    print("[checkpoint] Saved model to {}".format(params_path))

    if optimizer:
        opt_dict = optimizer.state_dict()
        optimizer_path = checkpoint_path + ".pdopt"
        paddle.save(opt_dict, optimizer_path)
        print("[checkpoint] Saved optimzier state to {}".format(optimizer_path))

    _save_checkpoint(checkpoint_dir, iteration)


================================================
FILE: paddlespeech/t2s/utils/display.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import librosa.display
import matplotlib.pylab as plt

__all__ = [
    "plot_alignment",
    "plot_spectrogram",
    "plot_waveform",
    "plot_multihead_alignments",
    "plot_multilayer_multihead_alignments",
]


def plot_alignment(alignment, title=None):
    # alignment: [encoder_steps, decoder_steps)
    fig, ax = plt.subplots(figsize=(6, 4))
    im = ax.imshow(
        alignment, aspect='auto', origin='lower', interpolation='none')
    fig.colorbar(im, ax=ax)
    xlabel = 'Decoder timestep'
    if title is not None:
        xlabel += '\n\n' + title
    plt.xlabel(xlabel)
    plt.ylabel('Encoder timestep')
    plt.tight_layout()
    return fig


def plot_multihead_alignments(alignments, title=None):
    # alignments: [N, encoder_steps, decoder_steps)
    num_subplots = alignments.shape[0]

    fig, axes = plt.subplots(
        figsize=(6 * num_subplots, 4),
        ncols=num_subplots,
        sharey=True,
        squeeze=True)
    for i, ax in enumerate(axes):
        im = ax.imshow(
            alignments[i], aspect='auto', origin='lower', interpolation='none')
        fig.colorbar(im, ax=ax)
        xlabel = 'Decoder timestep'
        if title is not None:
            xlabel += '\n\n' + title
        ax.set_xlabel(xlabel)
        if i == 0:
            ax.set_ylabel('Encoder timestep')
    plt.tight_layout()
    return fig


def plot_multilayer_multihead_alignments(alignments, title=None):
    # alignments: [num_layers, num_heads, encoder_steps, decoder_steps)
    num_layers, num_heads, *_ = alignments.shape

    fig, axes = plt.subplots(
        figsize=(6 * num_heads, 4 * num_layers),
        nrows=num_layers,
        ncols=num_heads,
        sharex=True,
        sharey=True,
        squeeze=True)
    for i, row in enumerate(axes):
        for j, ax in enumerate(row):
            im = ax.imshow(
                alignments[i, j],
                aspect='auto',
                origin='lower',
                interpolation='none')
            fig.colorbar(im, ax=ax)
            xlabel = 'Decoder timestep'
            if title is not None:
                xlabel += '\n\n' + title
            if i == num_layers - 1:
                ax.set_xlabel(xlabel)
            if j == 0:
                ax.set_ylabel('Encoder timestep')
    plt.tight_layout()
    return fig


def plot_spectrogram(spec):
    # spec: [C, T] librosa convention
    fig, ax = plt.subplots(figsize=(12, 3))
    im = ax.imshow(spec, aspect="auto", origin="lower", interpolation='none')
    plt.colorbar(im, ax=ax)
    plt.xlabel("Frames")
    plt.ylabel("Channels")
    plt.tight_layout()
    return fig


def plot_waveform(wav, sr=22050):
    fig, ax = plt.subplots(figsize=(12, 3))
    im = librosa.display.waveplot(wav, sr=22050)
    plt.colorbar(im, ax=ax)
    plt.tight_layout()
    return fig


================================================
FILE: paddlespeech/t2s/utils/error_rate.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This module provides functions to calculate error rate in different level.
e.g. wer for word-level, cer for char-level.
"""
import numpy as np

__all__ = ['word_errors', 'char_errors', 'wer', 'cer']


def _levenshtein_distance(ref, hyp):
    """Levenshtein distance is a string metric for measuring the difference
    between two sequences. Informally, the levenshtein disctance is defined as
    the minimum number of single-character edits (substitutions, insertions or
    deletions) required to change one word into the other. We can naturally
    extend the edits to word level when calculate levenshtein disctance for
    two sentences.
    """
    m = len(ref)
    n = len(hyp)

    # special case
    if ref == hyp:
        return 0
    if m == 0:
        return n
    if n == 0:
        return m

    if m < n:
        ref, hyp = hyp, ref
        m, n = n, m

    # use O(min(m, n)) space
    distance = np.zeros((2, n + 1), dtype=np.int32)

    # initialize distance matrix
    for j in range(n + 1):
        distance[0][j] = j

    # calculate levenshtein distance
    for i in range(1, m + 1):
        prev_row_idx = (i - 1) % 2
        cur_row_idx = i % 2
        distance[cur_row_idx][0] = i
        for j in range(1, n + 1):
            if ref[i - 1] == hyp[j - 1]:
                distance[cur_row_idx][j] = distance[prev_row_idx][j - 1]
            else:
                s_num = distance[prev_row_idx][j - 1] + 1
                i_num = distance[cur_row_idx][j - 1] + 1
                d_num = distance[prev_row_idx][j] + 1
                distance[cur_row_idx][j] = min(s_num, i_num, d_num)

    return distance[m % 2][n]


def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '):
    """Compute the levenshtein distance between reference sequence and
    hypothesis sequence in word-level.

    Args:
        reference (str): 
            The reference sentence.
        hypothesis (str): 
            The hypothesis sentence.
        ignore_case (bool): 
            Whether case-sensitive or not.
        delimiter (char(str)): 
            Delimiter of input sentences.

    Returns:
        list: Levenshtein distance and word number of reference sentence.
    """
    if ignore_case:
        reference = reference.lower()
        hypothesis = hypothesis.lower()

    ref_words = list(filter(None, reference.split(delimiter)))
    hyp_words = list(filter(None, hypothesis.split(delimiter)))

    edit_distance = _levenshtein_distance(ref_words, hyp_words)
    return float(edit_distance), len(ref_words)


def char_errors(reference, hypothesis, ignore_case=False, remove_space=False):
    """Compute the levenshtein distance between reference sequence and
    hypothesis sequence in char-level.

    Args:
        reference (str): The reference sentence.
        hypothesis (str): The hypothesis sentence.
        ignore_case (bool): Whether case-sensitive or not.
        remove_space (bool): Whether remove internal space characters

    Returns:
        list: Levenshtein distance and length of reference sentence.
    """
    if ignore_case:
        reference = reference.lower()
        hypothesis = hypothesis.lower()

    join_char = ' '
    if remove_space:
        join_char = ''

    reference = join_char.join(list(filter(None, reference.split(' '))))
    hypothesis = join_char.join(list(filter(None, hypothesis.split(' '))))

    edit_distance = _levenshtein_distance(reference, hypothesis)
    return float(edit_distance), len(reference)


def wer(reference, hypothesis, ignore_case=False, delimiter=' '):
    """Calculate word error rate (WER). WER compares reference text and
    hypothesis text in word-level. WER is defined as:
    .. math::
        WER = (Sw + Dw + Iw) / Nw
    where
    .. code-block:: text
        Sw is the number of words subsituted,
        Dw is the number of words deleted,
        Iw is the number of words inserted,
        Nw is the number of words in the reference
    We can use levenshtein distance to calculate WER. Please draw an attention
    that empty items will be removed when splitting sentences by delimiter.

    Args:
        reference (str): The reference sentence.
        hypothesis (str): The hypothesis sentence.
        ignore_case (bool): Whether case-sensitive or not.
        delimiter (char): Delimiter of input sentences.

    Returns: 
        float: Word error rate.

    Raises:
        ValueError: If word number of reference is zero.
    """
    edit_distance, ref_len = word_errors(reference, hypothesis, ignore_case,
                                         delimiter)

    if ref_len == 0:
        raise ValueError("Reference's word number should be greater than 0.")

    wer = float(edit_distance) / ref_len
    return wer


def cer(reference, hypothesis, ignore_case=False, remove_space=False):
    """Calculate character error rate (CER). CER compares reference text and
    hypothesis text in char-level. CER is defined as:
    .. math::
        CER = (Sc + Dc + Ic) / Nc
    where
    .. code-block:: text
        Sc is the number of characters substituted,
        Dc is the number of characters deleted,
        Ic is the number of characters inserted
        Nc is the number of characters in the reference
    We can use levenshtein distance to calculate CER. Chinese input should be
    encoded to unicode. Please draw an attention that the leading and tailing
    space characters will be truncated and multiple consecutive space
    characters in a sentence will be replaced by one space character.

    Args:
        reference (str): The reference sentence.
        hypothesis (str): The hypothesis sentence.
        ignore_case (bool): Whether case-sensitive or not.
        remove_space (bool): Whether remove internal space characters

    Returns: 
        float: Character error rate.

    Raises: 
        ValueError: If the reference length is zero.
    """
    edit_distance, ref_len = char_errors(reference, hypothesis, ignore_case,
                                         remove_space)

    if ref_len == 0:
        raise ValueError("Length of reference should be greater than 0.")

    cer = float(edit_distance) / ref_len
    return cer


if __name__ == "__main__":
    reference = [
        'j', 'iou4', 'zh', 'e4', 'iang5', 'x', 'v2', 'b', 'o1', 'k', 'ai1',
        'sh', 'iii3', 'l', 'e5', 'b', 'ei3', 'p', 'iao1', 'sh', 'eng1', 'ia2'
    ]
    hypothesis = [
        'j', 'iou4', 'zh', 'e4', 'iang4', 'x', 'v2', 'b', 'o1', 'k', 'ai1',
        'sh', 'iii3', 'l', 'e5', 'b', 'ei3', 'p', 'iao1', 'sh', 'eng1', 'ia2'
    ]
    reference = " ".join(reference)
    hypothesis = " ".join(hypothesis)
    print(wer(reference, hypothesis))


================================================
FILE: paddlespeech/t2s/utils/h5_utils.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import sys
from pathlib import Path
from typing import Any
from typing import Union

import h5py
import numpy as np


def read_hdf5(filename: Union[Path, str], dataset_name: str) -> Any:
    """Read a dataset from a HDF5 file.
    Args:
        filename (Union[Path, str]): 
            Path of the HDF5 file.
        dataset_name (str): 
            Name of the dataset to read.

    Returns:
        Any: The retrieved dataset.
    """
    filename = Path(filename)

    if not filename.exists():
        logging.error(f"There is no such a hdf5 file ({filename}).")
        sys.exit(1)

    hdf5_file = h5py.File(filename, "r")

    if dataset_name not in hdf5_file:
        logging.error(f"There is no such a data in hdf5 file. ({dataset_name})")
        sys.exit(1)

    # [()]: a special syntax of h5py to get the dataset as-is
    hdf5_data = hdf5_file[dataset_name][()]
    hdf5_file.close()

    return hdf5_data


def write_hdf5(filename: Union[Path, str],
               dataset_name: str,
               write_data: np.ndarray,
               is_overwrite: bool=True) -> None:
    """Write dataset to HDF5 file.
    Args:
        filename (Union[Path, str]): Path of the HDF5 file.
        dataset_name (str): Name of the dataset to write to.
        write_data (np.ndarrays): The data to write.
        is_overwrite (bool, optional): Whether to overwrite, by default True
    """
    # convert to numpy array
    filename = Path(filename)
    write_data = np.array(write_data)

    # check folder existence
    filename.parent.mkdir(parents=True, exist_ok=True)

    # check hdf5 existence
    if filename.exists():
        # if already exists, open with r+ mode
        hdf5_file = h5py.File(filename, "r+")
        # check dataset existence
        if dataset_name in hdf5_file:
            if is_overwrite:
                logging.warning("Dataset in hdf5 file already exists. "
                                "recreate dataset in hdf5.")
                hdf5_file.__delitem__(dataset_name)
            else:
                logging.error(
                    "Dataset in hdf5 file already exists. "
                    "if you want to overwrite, please set is_overwrite = True.")
                hdf5_file.close()
                sys.exit(1)
    else:
        # if not exists, open with w mode
        hdf5_file = h5py.File(filename, "w")

    # write data to hdf5
    hdf5_file.create_dataset(dataset_name, data=write_data)
    hdf5_file.flush()
    hdf5_file.close()


================================================
FILE: paddlespeech/t2s/utils/internals.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from paddle.framework import core

__all__ = ["convert_dtype_to_np_dtype_"]


def convert_dtype_to_np_dtype_(dtype):
    """
    Convert paddle's data type to corrsponding numpy data type.

    Args:
        dtype(np.dtype): 
            the data type in paddle.

    Returns:
        type: the data type in numpy.

    """
    if dtype is core.VarDesc.VarType.FP32:
        return np.float32
    elif dtype is core.VarDesc.VarType.FP64:
        return np.float64
    elif dtype is core.VarDesc.VarType.FP16:
        return np.float16
    elif dtype is core.VarDesc.VarType.BOOL:
        return np.bool_
    elif dtype is core.VarDesc.VarType.INT32:
        return np.int32
    elif dtype is core.VarDesc.VarType.INT64:
        return np.int64
    elif dtype is core.VarDesc.VarType.INT16:
        return np.int16
    elif dtype is core.VarDesc.VarType.INT8:
        return np.int8
    elif dtype is core.VarDesc.VarType.UINT8:
        return np.uint8
    elif dtype is core.VarDesc.VarType.BF16:
        return np.uint16
    else:
        raise ValueError("Not supported dtype %s" % dtype)


================================================
FILE: paddlespeech/t2s/utils/layer_tools.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from paddle import nn

__all__ = ["summary", "gradient_norm", "freeze", "unfreeze"]


def summary(layer: nn.Layer):
    num_params = num_elements = 0
    print("layer summary:")
    for name, param in layer.state_dict().items():
        print("{}|{}|{}".format(name, param.shape, np.prod(param.shape)))
        num_elements += np.prod(param.shape)
        num_params += 1
    print("layer has {} parameters, {} elements.".format(num_params,
                                                         num_elements))


def gradient_norm(layer: nn.Layer):
    grad_norm_dict = {}
    for name, param in layer.state_dict().items():
        if param.trainable:
            grad = param.gradient()
            grad_norm_dict[name] = np.linalg.norm(grad) / grad.size
    return grad_norm_dict


def recursively_remove_weight_norm(layer: nn.Layer):
    for layer in layer.sublayers():
        try:
            nn.utils.remove_weight_norm(layer)
        except Exception as e:
            # ther is not weight norm hoom in this layer
            pass


def freeze(layer: nn.Layer):
    for param in layer.parameters():
        param.trainable = False


def unfreeze(layer: nn.Layer):
    for param in layer.parameters():
        param.trainable = True


================================================
FILE: paddlespeech/t2s/utils/mp_tools.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import wraps

from paddle import distributed as dist

__all__ = ["rank_zero_only"]


def rank_zero_only(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        if dist.get_rank() != 0:
            return
        result = func(*args, **kwargs)
        return result

    return wrapper


================================================
FILE: paddlespeech/t2s/utils/profiler.py
================================================
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import paddle.profiler as profiler

# A global variable to record the number of calling times for profiler
# functions. It is used to specify the tracing range of training steps.
_profiler_step_id = 0

# A global variable to avoid parsing from string every time.
_profiler_options = None
_prof = None

class ProfilerOptions(object):
    '''
    Use a string to initialize a ProfilerOptions.
    The string should be in the format: "key1=value1;key2=value;key3=value3".
    For example:
      "profile_path=model.profile"
      "batch_range=[50, 60]; profile_path=model.profile"
      "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"

    ProfilerOptions supports following key-value pair:
      batch_range      - a integer list, e.g. [100, 110].
      state            - a string, the optional values are 'CPU', 'GPU' or 'All'. 
      sorted_key       - a string, the optional values are 'calls', 'total',
                         'max', 'min' or 'ave.
      tracer_option    - a string, the optional values are 'Default', 'OpDetail',
                         'AllOpDetail'.
      profile_path     - a string, the path to save the serialized profile data,
                         which can be used to generate a timeline.
      exit_on_finished - a boolean.
    '''

    def __init__(self, options_str):
        assert isinstance(options_str, str)

        self._options = {
            'batch_range': [10, 20],
            'state': 'All',
            'sorted_key': 'total',
            'tracer_option': 'Default',
            'profile_path': '/tmp/profile',
            'exit_on_finished': True,
            'timer_only': True
        }
        self._parse_from_string(options_str)

    def _parse_from_string(self, options_str):
        for kv in options_str.replace(' ', '').split(';'):
            key, value = kv.split('=')
            if key == 'batch_range':
                value_list = value.replace('[', '').replace(']', '').split(',')
                value_list = list(map(int, value_list))
                if len(value_list) >= 2 and value_list[0] >= 0 and value_list[
                        1] > value_list[0]:
                    self._options[key] = value_list
            elif key == 'exit_on_finished':
                self._options[key] = value.lower() in ("yes", "true", "t", "1")
            elif key in [
                    'state', 'sorted_key', 'tracer_option', 'profile_path'
            ]:
                self._options[key] = value
            elif key == 'timer_only':
                self._options[key] = value

    def __getitem__(self, name):
        if self._options.get(name, None) is None:
            raise ValueError(
                "ProfilerOptions does not have an option named %s." % name)
        return self._options[name]


def add_profiler_step(options_str=None):
    '''
    Enable the operator-level timing using PaddlePaddle's profiler.
    The profiler uses a independent variable to count the profiler steps.
    One call of this function is treated as a profiler step.
    Args:
      profiler_options - a string to initialize the ProfilerOptions.
                         Default is None, and the profiler is disabled.
    '''
    if options_str is None:
        return

    global _prof 
    global _profiler_step_id
    global _profiler_options

    if _profiler_options is None:
        _profiler_options = ProfilerOptions(options_str)
    # profile : https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/performance_improving/profiling_model.html#chakanxingnengshujudetongjibiaodan
    # timer_only = True  only the model's throughput and time overhead are displayed
    # timer_only = False calling summary can print a statistical form that presents performance data from different perspectives.
    # timer_only = False the output Timeline information can be found in the profiler_log directory
    if _prof is None:
        _timer_only = str(_profiler_options['timer_only']) == str(True)
        _prof = profiler.Profiler(
                   scheduler = (_profiler_options['batch_range'][0], _profiler_options['batch_range'][1]),
                   on_trace_ready = profiler.export_chrome_tracing('./profiler_log'),
                   timer_only = _timer_only)
        _prof.start()
    else:
        _prof.step()
        
    if _profiler_step_id == _profiler_options['batch_range'][1]:
        _prof.stop()
        _prof.summary(
             op_detail=True,
             thread_sep=False,
             time_unit='ms')
        _prof = None
        if _profiler_options['exit_on_finished']:
            sys.exit(0)

    _profiler_step_id += 1


================================================
FILE: paddlespeech/t2s/utils/scheduler.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = ["SchedulerBase", "Constant", "PieceWise", "StepWise"]


class SchedulerBase(object):
    def __call__(self, step):
        raise NotImplementedError("You should implement the __call__ method.")


class Constant(SchedulerBase):
    def __init__(self, value):
        self.value = value

    def __call__(self, step):
        return self.value


class PieceWise(SchedulerBase):
    def __init__(self, anchors):
        anchors = list(anchors)
        anchors = sorted(anchors, key=lambda x: x[0])
        assert anchors[0][0] == 0, "it must start from zero"
        self.xs = [item[0] for item in anchors]
        self.ys = [item[1] for item in anchors]
        self.num_anchors = len(self.xs)

    def __call__(self, step):
        i = 0
        for x in self.xs:
            if step >= x:
                i += 1
        if i == 0:
            return self.ys[0]
        if i == self.num_anchors:
            return self.ys[-1]
        k = (self.ys[i] - self.ys[i - 1]) / (self.xs[i] - self.xs[i - 1])
        out = self.ys[i - 1] + (step - self.xs[i - 1]) * k
        return out


class StepWise(SchedulerBase):
    def __init__(self, anchors):
        anchors = list(anchors)
        anchors = sorted(anchors, key=lambda x: x[0])
        assert anchors[0][0] == 0, "it must start from zero"
        self.xs = [item[0] for item in anchors]
        self.ys = [item[1] for item in anchors]
        self.num_anchors = len(self.xs)

    def __call__(self, step):
        i = 0
        for x in self.xs:
            if step >= x:
                i += 1

        if i == self.num_anchors:
            return self.ys[-1]
        if i == 0:
            return self.ys[0]
        return self.ys[i - 1]


================================================
FILE: paddlespeech/text/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/text/exps/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/text/exps/ernie_linear/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/text/exps/ernie_linear/avg_model.py
================================================
#!/usr/bin/env python3
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import glob
import json
import os

import numpy as np
import paddle


def main(args):
    paddle.set_device('cpu')

    val_scores = []
    beat_val_scores = []
    selected_epochs = []
    if args.val_best:
        jsons = glob.glob(f'{args.ckpt_dir}/[!train]*.json')
        for y in jsons:
            with open(y, 'r') as f:
                dict_json = json.load(f)
            loss = dict_json['F1']
            epoch = dict_json['epoch']
            if epoch >= args.min_epoch and epoch <= args.max_epoch:
                val_scores.append((epoch, loss))

        val_scores = np.array(val_scores)
        sort_idx = np.argsort(-val_scores[:, 1])
        sorted_val_scores = val_scores[sort_idx]
        path_list = [
            args.ckpt_dir + '/{}.pdparams'.format(int(epoch))
            for epoch in sorted_val_scores[:args.num, 0]
        ]

        beat_val_scores = sorted_val_scores[:args.num, 1]
        selected_epochs = sorted_val_scores[:args.num, 0].astype(np.int64)
        print("best val scores = " + str(beat_val_scores))
        print("selected epochs = " + str(selected_epochs))
    else:
        path_list = glob.glob(f'{args.ckpt_dir}/[!avg][!final]*.pdparams')
        path_list = sorted(path_list, key=os.path.getmtime)
        path_list = path_list[-args.num:]

    print(path_list)

    avg = None
    num = args.num
    assert num == len(path_list)
    for path in path_list:
        print(f'Processing {path}')
        states = paddle.load(path)
        if avg is None:
            avg = states
        else:
            for k in avg.keys():
                avg[k] += states[k]
    # average
    for k in avg.keys():
        if avg[k] is not None:
            avg[k] /= num

    paddle.save(avg, args.dst_model)
    print(f'Saving to {args.dst_model}')

    meta_path = os.path.splitext(args.dst_model)[0] + '.avg.json'
    with open(meta_path, 'w') as f:
        data = json.dumps({
            "avg_ckpt": args.dst_model,
            "ckpt": path_list,
            "epoch": selected_epochs.tolist(),
            "val_loss": beat_val_scores.tolist(),
        })
        f.write(data + "\n")


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='average model')
    parser.add_argument('--dst_model', required=True, help='averaged model')
    parser.add_argument(
        '--ckpt_dir', required=True, help='ckpt model dir for average')
    parser.add_argument(
        '--val_best', action="store_true", help='averaged model')
    parser.add_argument(
        '--num', default=5, type=int, help='nums for averaged model')
    parser.add_argument(
        '--min_epoch',
        default=0,
        type=int,
        help='min epoch used for averaging model')
    parser.add_argument(
        '--max_epoch',
        default=65536,  # Big enough
        type=int,
        help='max epoch used for averaging model')

    args = parser.parse_args()
    print(args)

    main(args)


================================================
FILE: paddlespeech/text/exps/ernie_linear/punc_restore.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import re

import paddle
import yaml
from paddlenlp.transformers import ErnieTokenizer
from yacs.config import CfgNode

from paddlespeech.text.models.ernie_linear import ErnieLinear

DefinedClassifier = {
    'ErnieLinear': ErnieLinear,
}


def _clean_text(text, punc_list):
    text = text.lower()
    text = re.sub('[^A-Za-z0-9\u4e00-\u9fa5]', '', text)
    text = re.sub(f'[{"".join([p for p in punc_list][1:])}]', '', text)
    return text


def preprocess(text, punc_list, tokenizer):
    clean_text = _clean_text(text, punc_list)
    assert len(clean_text) > 0, f'Invalid input string: {text}'
    tokenized_input = tokenizer(
        list(clean_text), return_length=True, is_split_into_words=True)
    _inputs = dict()
    _inputs['input_ids'] = tokenized_input['input_ids']
    _inputs['seg_ids'] = tokenized_input['token_type_ids']
    _inputs['seq_len'] = tokenized_input['seq_len']
    return _inputs


def test(args):
    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))
    print("========Args========")
    print(yaml.safe_dump(vars(args), allow_unicode=True))
    # print(args)
    print("========Config========")
    print(config)

    punc_list = []
    with open(config["data_params"]["punc_path"], 'r') as f:
        for line in f:
            punc_list.append(line.strip())

    model = DefinedClassifier[config["model_type"]](**config["model"])
    # print(model)

    pretrained_token = config['data_params']['pretrained_token']
    tokenizer = ErnieTokenizer.from_pretrained(pretrained_token)
    # tokenizer = ErnieTokenizer.from_pretrained('ernie-1.0')

    state_dict = paddle.load(args.checkpoint)
    model.set_state_dict(state_dict["main_params"])
    model.eval()
    _inputs = preprocess(args.text, punc_list, tokenizer)
    seq_len = _inputs['seq_len']
    input_ids = paddle.to_tensor(_inputs['input_ids']).unsqueeze(0)
    seg_ids = paddle.to_tensor(_inputs['seg_ids']).unsqueeze(0)
    logits, _ = model(input_ids, seg_ids)
    preds = paddle.argmax(logits, axis=-1).squeeze(0)
    tokens = tokenizer.convert_ids_to_tokens(
        _inputs['input_ids'][1:seq_len - 1])
    labels = preds[1:seq_len - 1].tolist()
    assert len(tokens) == len(labels)
    # add 0 for non punc
    punc_list = [0] + punc_list
    text = ''
    for t, l in zip(tokens, labels):
        text += t
        if l != 0:  # Non punc.
            text += punc_list[l]
    print("Punctuation Restoration Result:", text)
    return text


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(description="Run Punctuation Restoration.")
    parser.add_argument("--config", type=str, help="ErnieLinear config file.")
    parser.add_argument("--checkpoint", type=str, help="snapshot to load.")
    parser.add_argument("--text", type=str, help="raw text to be restored.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")

    args = parser.parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    test(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/text/exps/ernie_linear/test.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse

import numpy as np
import paddle
import pandas as pd
import yaml
from paddle import nn
from paddle.io import DataLoader
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
from yacs.config import CfgNode

from paddlespeech.t2s.utils import str2bool
from paddlespeech.text.models.ernie_linear import ErnieLinear
from paddlespeech.text.models.ernie_linear import PuncDataset
from paddlespeech.text.models.ernie_linear import PuncDatasetFromErnieTokenizer

DefinedClassifier = {
    'ErnieLinear': ErnieLinear,
}

DefinedLoss = {
    "ce": nn.CrossEntropyLoss,
}

DefinedDataset = {
    'Punc': PuncDataset,
    'Ernie': PuncDatasetFromErnieTokenizer,
}


def evaluation(y_pred, y_test):
    precision, recall, f1, _ = precision_recall_fscore_support(
        y_test, y_pred, average=None, labels=[1, 2, 3])
    overall = precision_recall_fscore_support(
        y_test, y_pred, average='macro', labels=[1, 2, 3])
    result = pd.DataFrame(
        np.array([precision, recall, f1]),
        columns=list(['O', 'COMMA', 'PERIOD', 'QUESTION'])[1:],
        index=['Precision', 'Recall', 'F1'])
    result['OVERALL'] = overall[:3]
    return result


def test(args):
    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))
    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)

    test_dataset = DefinedDataset[config["dataset_type"]](
        train_path=config["test_path"], **config["data_params"])
    test_loader = DataLoader(
        test_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=False)
    model = DefinedClassifier[config["model_type"]](**config["model"])
    state_dict = paddle.load(args.checkpoint)
    model.set_state_dict(state_dict["main_params"])
    model.eval()

    punc_list = []
    for i in range(len(test_loader.dataset.id2punc)):
        punc_list.append(test_loader.dataset.id2punc[i])

    test_total_label = []
    test_total_predict = []

    for i, batch in enumerate(test_loader):
        input, label = batch
        label = paddle.reshape(label, shape=[-1])
        y, logit = model(input)
        pred = paddle.argmax(logit, axis=1)
        test_total_label.extend(label.numpy().tolist())
        test_total_predict.extend(pred.numpy().tolist())
    t = classification_report(
        test_total_label, test_total_predict, target_names=punc_list)
    print(t)
    if args.print_eval:
        t2 = evaluation(test_total_label, test_total_predict)
        print('=========================================================')
        print(t2)


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(description="Test a ErnieLinear model.")
    parser.add_argument("--config", type=str, help="ErnieLinear config file.")
    parser.add_argument("--checkpoint", type=str, help="snapshot to load.")
    parser.add_argument("--print_eval", type=str2bool, default=True)
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")

    args = parser.parse_args()

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    test(args)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/text/exps/ernie_linear/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import shutil
from pathlib import Path

import paddle
import yaml
from paddle import DataParallel
from paddle import distributed as dist
from paddle import nn
from paddle.io import DataLoader
from paddle.optimizer import Adam
from paddle.optimizer.lr import ExponentialDecay
from yacs.config import CfgNode

from paddlespeech.t2s.training.extensions.snapshot import Snapshot
from paddlespeech.t2s.training.extensions.visualizer import VisualDL
from paddlespeech.t2s.training.seeding import seed_everything
from paddlespeech.t2s.training.trainer import Trainer
from paddlespeech.text.models.ernie_linear import ErnieLinear
from paddlespeech.text.models.ernie_linear import ErnieLinearEvaluator
from paddlespeech.text.models.ernie_linear import ErnieLinearUpdater
from paddlespeech.text.models.ernie_linear import PuncDataset
from paddlespeech.text.models.ernie_linear import PuncDatasetFromErnieTokenizer

DefinedClassifier = {
    'ErnieLinear': ErnieLinear,
}

DefinedLoss = {
    "ce": nn.CrossEntropyLoss,
}

DefinedDataset = {
    'Punc': PuncDataset,
    'Ernie': PuncDatasetFromErnieTokenizer,
}


def train_sp(args, config):
    # decides device type and whether to run in parallel
    # setup running environment correctly
    if (not paddle.is_compiled_with_cuda()) or args.ngpu == 0:
        paddle.set_device("cpu")
    else:
        paddle.set_device("gpu")
    world_size = paddle.distributed.get_world_size()
    if world_size > 1:
        paddle.distributed.init_parallel_env()

    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    print(
        f"rank:{dist.get_rank()}, pid: {os.getpid()}, parent_pid: {os.getppid()}"
    )
    # dataloader has been too verbose
    logging.getLogger("DataLoader").disabled = True
    train_dataset = DefinedDataset[config["dataset_type"]](
        train_path=config["train_path"], **config["data_params"])
    dev_dataset = DefinedDataset[config["dataset_type"]](
        train_path=config["dev_path"], **config["data_params"])
    train_dataloader = DataLoader(
        train_dataset,
        shuffle=True,
        num_workers=config.num_workers,
        batch_size=config.batch_size)

    dev_dataloader = DataLoader(
        dev_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        drop_last=False,
        num_workers=config.num_workers)

    print("dataloaders done!")

    model = DefinedClassifier[config["model_type"]](**config["model"])

    if world_size > 1:
        model = DataParallel(model)
    print("model done!")

    criterion = DefinedLoss[config["loss_type"]](
        **config["loss"]) if "loss_type" in config else DefinedLoss["ce"]()

    print("criterions done!")

    lr_schedule = ExponentialDecay(**config["scheduler_params"])
    optimizer = Adam(
        learning_rate=lr_schedule,
        parameters=model.parameters(),
        weight_decay=paddle.regularizer.L2Decay(
            config["optimizer_params"]["weight_decay"]))

    print("optimizer done!")

    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    if dist.get_rank() == 0:
        config_name = args.config.split("/")[-1]
        # copy conf to output_dir
        shutil.copyfile(args.config, output_dir / config_name)

    updater = ErnieLinearUpdater(
        model=model,
        criterion=criterion,
        scheduler=lr_schedule,
        optimizer=optimizer,
        dataloader=train_dataloader,
        output_dir=output_dir)

    trainer = Trainer(updater, (config.max_epoch, 'epoch'), output_dir)

    evaluator = ErnieLinearEvaluator(
        model=model,
        criterion=criterion,
        dataloader=dev_dataloader,
        output_dir=output_dir)

    if dist.get_rank() == 0:
        trainer.extend(evaluator, trigger=(1, "epoch"))
        trainer.extend(VisualDL(output_dir), trigger=(1, "iteration"))
    trainer.extend(
        Snapshot(max_size=config.num_snapshots), trigger=(1, 'epoch'))
    trainer.run()


def main():
    # parse args and config and redirect to train_sp
    parser = argparse.ArgumentParser(description="Train a ErnieLinear model.")
    parser.add_argument("--config", type=str, help="ErnieLinear config file.")
    parser.add_argument("--output-dir", type=str, help="output dir.")
    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")

    args = parser.parse_args()

    with open(args.config) as f:
        config = CfgNode(yaml.safe_load(f))

    print("========Args========")
    print(yaml.safe_dump(vars(args)))
    print("========Config========")
    print(config)
    print(
        f"master see the word size: {dist.get_world_size()}, from pid: {os.getpid()}"
    )

    # dispatch
    if args.ngpu > 1:
        dist.spawn(train_sp, (args, config), nprocs=args.ngpu)
    else:
        train_sp(args, config)


if __name__ == "__main__":
    main()


================================================
FILE: paddlespeech/text/models/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .ernie_crf import ErnieCrf
from .ernie_linear import ErnieLinear


================================================
FILE: paddlespeech/text/models/ernie_crf/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .model import ErnieCrf


================================================
FILE: paddlespeech/text/models/ernie_crf/model.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
from paddlenlp.layers.crf import LinearChainCrf
from paddlenlp.layers.crf import LinearChainCrfLoss
from paddlenlp.layers.crf import ViterbiDecoder
from paddlenlp.transformers import ErnieForTokenClassification


class ErnieCrf(nn.Layer):
    def __init__(self,
                 num_classes,
                 pretrained_token='ernie-1.0',
                 crf_lr=100,
                 **kwargs):
        super().__init__()
        self.ernie = ErnieForTokenClassification.from_pretrained(
            pretrained_token, num_labels=num_classes, **kwargs)
        self.num_classes = num_classes
        self.crf = LinearChainCrf(
            self.num_classes, crf_lr=crf_lr, with_start_stop_tag=False)
        self.crf_loss = LinearChainCrfLoss(self.crf)
        self.viterbi_decoder = ViterbiDecoder(
            self.crf.transitions, with_start_stop_tag=False)

    def forward(self,
                input_ids,
                token_type_ids=None,
                position_ids=None,
                attention_mask=None,
                lengths=None,
                labels=None):
        logits = self.ernie(
            input_ids,
            token_type_ids=token_type_ids,
            attention_mask=attention_mask,
            position_ids=position_ids)

        if lengths is None:
            lengths = paddle.ones(
                shape=[input_ids.shape[0]],
                dtype=paddle.int64) * input_ids.shape[1]

        _, prediction = self.viterbi_decoder(logits, lengths)
        prediction = prediction.reshape([-1])

        if labels is not None:
            labels = labels.reshape([input_ids.shape[0], -1])
            loss = self.crf_loss(logits, lengths, labels)
            avg_loss = paddle.mean(loss)
            return avg_loss, prediction
        else:
            return prediction


================================================
FILE: paddlespeech/text/models/ernie_linear/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .dataset import *
from .ernie_linear import *
from .ernie_linear_updater import *


================================================
FILE: paddlespeech/text/models/ernie_linear/dataset.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
from paddle.io import Dataset
from paddlenlp.transformers import ErnieTokenizer

__all__ = ["PuncDataset", "PuncDatasetFromErnieTokenizer"]


class PuncDataset(Dataset):
    def __init__(self, train_path, vocab_path, punc_path, seq_len=100):
        self.seq_len = seq_len

        self.word2id = self.load_vocab(
            vocab_path, extra_word_list=['<UNK>', '<END>'])
        self.id2word = {v: k for k, v in self.word2id.items()}
        self.punc2id = self.load_vocab(punc_path, extra_word_list=[" "])
        self.id2punc = {k: v for (v, k) in self.punc2id.items()}

        tmp_seqs = open(train_path, encoding='utf-8').readlines()
        self.txt_seqs = [i for seq in tmp_seqs for i in seq.split()]
        self.preprocess(self.txt_seqs)

    def __len__(self):
        """return the sentence nums in .txt
        """
        return self.in_len

    def __getitem__(self, index):
        return self.input_data[index], self.label[index]

    def load_vocab(self, vocab_path, extra_word_list=[], encoding='utf-8'):
        n = len(extra_word_list)
        with open(vocab_path, encoding='utf-8') as vf:
            vocab = {word.strip(): i + n for i, word in enumerate(vf)}
        for i, word in enumerate(extra_word_list):
            vocab[word] = i
        return vocab

    def preprocess(self, txt_seqs: list):
        input_data = []
        label = []
        input_r = []
        label_r = []

        count = 0
        length = len(txt_seqs)
        for token in txt_seqs:
            count += 1
            if count == length:
                break
            if token in self.punc2id:
                continue
            punc = txt_seqs[count]
            if punc not in self.punc2id:
                label.append(self.punc2id[" "])
                input_data.append(
                    self.word2id.get(token, self.word2id["<UNK>"]))
                input_r.append(token)
                label_r.append(' ')
            else:
                label.append(self.punc2id[punc])
                input_data.append(
                    self.word2id.get(token, self.word2id["<UNK>"]))
                input_r.append(token)
                label_r.append(punc)
        if len(input_data) != len(label):
            assert 'error: length input_data != label'

        self.in_len = len(input_data) // self.seq_len
        len_tmp = self.in_len * self.seq_len
        input_data = input_data[:len_tmp]
        label = label[:len_tmp]

        self.input_data = paddle.to_tensor(
            np.array(input_data, dtype='int64').reshape(-1, self.seq_len))
        self.label = paddle.to_tensor(
            np.array(label, dtype='int64').reshape(-1, self.seq_len))


class PuncDatasetFromErnieTokenizer(Dataset):
    def __init__(self,
                 train_path,
                 punc_path,
                 pretrained_token='ernie-1.0',
                 seq_len=100):
        self.tokenizer = ErnieTokenizer.from_pretrained(pretrained_token)
        self.paddingID = self.tokenizer.pad_token_id
        self.seq_len = seq_len
        self.punc2id = self.load_vocab(punc_path, extra_word_list=[" "])
        self.id2punc = {k: v for (v, k) in self.punc2id.items()}
        tmp_seqs = open(train_path, encoding='utf-8').readlines()
        self.txt_seqs = [i for seq in tmp_seqs for i in seq.split()]
        self.preprocess(self.txt_seqs)

    def __len__(self):
        return self.in_len

    def __getitem__(self, index):
        return self.input_data[index], self.label[index]

    def load_vocab(self, vocab_path, extra_word_list=[], encoding='utf-8'):
        n = len(extra_word_list)
        with open(vocab_path, encoding='utf-8') as vf:
            vocab = {word.strip(): i + n for i, word in enumerate(vf)}
        for i, word in enumerate(extra_word_list):
            vocab[word] = i
        return vocab

    def preprocess(self, txt_seqs: list):
        input_data = []
        label = []
        count = 0
        print("Preprocessing in PuncDatasetFromErnieTokenizer...")
        for i in range(len(txt_seqs) - 1):
            word = txt_seqs[i]
            punc = txt_seqs[i + 1]
            if word in self.punc2id:
                continue

            token = self.tokenizer(word)
            x = token["input_ids"][1:-1]
            input_data.extend(x)

            for i in range(len(x) - 1):
                label.append(self.punc2id[" "])

            if punc not in self.punc2id:
                label.append(self.punc2id[" "])
            else:
                label.append(self.punc2id[punc])

        if len(input_data) != len(label):
            assert 'error: length input_data != label'

        self.in_len = len(input_data) // self.seq_len
        len_tmp = self.in_len * self.seq_len
        input_data = input_data[:len_tmp]
        label = label[:len_tmp]
        self.input_data = np.array(
            input_data, dtype='int64').reshape(-1, self.seq_len)
        self.label = np.array(label, dtype='int64').reshape(-1, self.seq_len)


================================================
FILE: paddlespeech/text/models/ernie_linear/ernie_linear.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os

import paddle
import paddle.nn as nn
from paddlenlp.transformers import ErnieForTokenClassification


class ErnieLinear(nn.Layer):
    def __init__(self,
                 num_classes=None,
                 pretrained_token='ernie-1.0',
                 cfg_path=None,
                 ckpt_path=None,
                 **kwargs):
        super(ErnieLinear, self).__init__()

        if cfg_path is not None and ckpt_path is not None:
            cfg_path = os.path.abspath(os.path.expanduser(cfg_path))
            ckpt_path = os.path.abspath(os.path.expanduser(ckpt_path))

            assert os.path.isfile(
                cfg_path), 'Config file is not valid: {}'.format(cfg_path)
            assert os.path.isfile(
                ckpt_path), 'Checkpoint file is not valid: {}'.format(ckpt_path)

            self.ernie = ErnieForTokenClassification.from_pretrained(
                os.path.dirname(cfg_path))
        else:
            assert isinstance(
                num_classes, int
            ) and num_classes > 0, 'Argument `num_classes` must be an integer.'
            self.ernie = ErnieForTokenClassification.from_pretrained(
                pretrained_token, num_labels=num_classes, **kwargs)

        self.num_classes = self.ernie.num_labels
        self.softmax = nn.Softmax()

    def forward(self,
                input_ids,
                token_type_ids=None,
                position_ids=None,
                attention_mask=None):
        y = self.ernie(
            input_ids,
            token_type_ids=token_type_ids,
            attention_mask=attention_mask,
            position_ids=position_ids)

        y = paddle.reshape(y, shape=[-1, self.num_classes])
        logits = self.softmax(y)

        return y, logits


================================================
FILE: paddlespeech/text/models/ernie_linear/ernie_linear_updater.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging

import paddle
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn import Layer
from paddle.optimizer import Optimizer
from paddle.optimizer.lr import LRScheduler
from sklearn.metrics import f1_score

from paddlespeech.t2s.training.extensions.evaluator import StandardEvaluator
from paddlespeech.t2s.training.reporter import report
from paddlespeech.t2s.training.updaters.standard_updater import StandardUpdater
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s',
    datefmt='[%Y-%m-%d %H:%M:%S]')
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class ErnieLinearUpdater(StandardUpdater):
    def __init__(self,
                 model: Layer,
                 criterion: Layer,
                 scheduler: LRScheduler,
                 optimizer: Optimizer,
                 dataloader: DataLoader,
                 output_dir=None):
        super().__init__(model, optimizer, dataloader, init_state=None)
        self.model = model
        self.dataloader = dataloader

        self.criterion = criterion
        self.scheduler = scheduler
        self.optimizer = optimizer

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def update_core(self, batch):
        self.msg = "Rank: {}, ".format(dist.get_rank())
        losses_dict = {}

        input, label = batch
        label = paddle.reshape(label, shape=[-1])
        y, logit = self.model(input)
        pred = paddle.argmax(logit, axis=1)

        loss = self.criterion(y, label)

        self.optimizer.clear_grad()
        loss.backward()

        self.optimizer.step()
        self.scheduler.step()

        F1_score = f1_score(
            label.numpy().tolist(), pred.numpy().tolist(), average="macro")

        report("train/loss", float(loss))
        losses_dict["loss"] = float(loss)
        report("train/F1_score", float(F1_score))
        losses_dict["F1_score"] = float(F1_score)

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())


class ErnieLinearEvaluator(StandardEvaluator):
    def __init__(self,
                 model: Layer,
                 criterion: Layer,
                 dataloader: DataLoader,
                 output_dir=None):
        super().__init__(model, dataloader)
        self.model = model
        self.criterion = criterion
        self.dataloader = dataloader

        log_file = output_dir / 'worker_{}.log'.format(dist.get_rank())
        self.filehandler = logging.FileHandler(str(log_file))
        logger.addHandler(self.filehandler)
        self.logger = logger
        self.msg = ""

    def evaluate_core(self, batch):
        self.msg = "Evaluate: "
        losses_dict = {}

        input, label = batch
        label = paddle.reshape(label, shape=[-1])
        y, logit = self.model(input)
        pred = paddle.argmax(logit, axis=1)

        loss = self.criterion(y, label)

        F1_score = f1_score(
            label.numpy().tolist(), pred.numpy().tolist(), average="macro")

        report("eval/loss", float(loss))
        losses_dict["loss"] = float(loss)
        report("eval/F1_score", float(F1_score))
        losses_dict["F1_score"] = float(F1_score)

        self.msg += ', '.join('{}: {:>.6f}'.format(k, v)
                              for k, v in losses_dict.items())
        self.logger.info(self.msg)


================================================
FILE: paddlespeech/utils/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from packaging.version import Version


def satisfy_version(source: str, target: str, dev_allowed: bool=True) -> bool:
    if dev_allowed and source.startswith('0.0.0'):
        target_version = Version('0.0.0')
    else:
        target_version = Version(target)

    source_version = Version(source)
    return source_version >= target_version


def satisfy_paddle_version(target: str, dev_allowed: bool=True) -> bool:
    import paddle
    return satisfy_version(paddle.__version__, target, dev_allowed)


================================================
FILE: paddlespeech/utils/argparse.py
================================================
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import hashlib
import os
import sys
from typing import Text

import distutils

__all__ = [
    "print_arguments", "add_arguments", "get_commandline_args", "strtobool"
]


def get_commandline_args():
    extra_chars = [
        " ",
        ";",
        "&",
        "(",
        ")",
        "|",
        "^",
        "<",
        ">",
        "?",
        "*",
        "[",
        "]",
        "$",
        "`",
        '"',
        "\\",
        "!",
        "{",
        "}",
    ]

    # Escape the extra characters for shell
    argv = [
        arg.replace("'", "'\\''") if all(char not in arg
                                         for char in extra_chars) else
        "'" + arg.replace("'", "'\\''") + "'" for arg in sys.argv
    ]

    return sys.executable + " " + " ".join(argv)


def print_arguments(args, info=None):
    """Print argparse's arguments.

    Usage:

    .. code-block:: python

        parser = argparse.ArgumentParser()
        parser.add_argument("name", default="Jonh", type=str, help="User name.")
        args = parser.parse_args()
        print_arguments(args)

    :param args: Input argparse.Namespace for printing.
    :type args: argparse.Namespace
    """
    filename = ""
    if info:
        filename = info["__file__"]
    filename = os.path.basename(filename)
    print(f"----------- {filename} Configuration Arguments -----------")
    for arg, value in sorted(vars(args).items()):
        print("%s: %s" % (arg, value))
    print("-----------------------------------------------------------")


def strtobool(value):
    """Convert a string value to an integer boolean (1 for True, 0 for False).

    The function recognizes the following strings as True (case insensitive):
    - "yes"
    - "true"
    - "1"

    All other values are considered False.

    NOTE: After Python 3.10, the distutils module, particularly distutils.util, has been partially deprecated. To maintain compatibility with existing code, the strtobool function implemented here.
    """
    if isinstance(value, bool):
        return int(value)
    value = value.strip().lower()
    if value in ('yes', 'true', '1'):
        return 1
    else:
        return 0


def add_arguments(argname, type, default, help, argparser, **kwargs):
    """Add argparse's argument.

    Usage:

    .. code-block:: python

        parser = argparse.ArgumentParser()
        add_argument("name", str, "Jonh", "User name.", parser)
        args = parser.parse_args()
    """
    type = strtobool if type == bool else type
    argparser.add_argument(
        "--" + argname,
        default=default,
        type=type,
        help=help + ' Default: %(default)s.',
        **kwargs)


================================================
FILE: paddlespeech/utils/dynamic_import.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
import importlib

__all__ = ["dynamic_import"]


def dynamic_import(import_path, alias=dict()):
    """dynamic import module and class

    :param str import_path: syntax 'module_name:class_name'
        e.g., 'paddlespeech.s2t.models.u2:U2Model'
    :param dict alias: shortcut for registered class
    :return: imported class
    """
    if import_path not in alias and ":" not in import_path:
        raise ValueError(
            "import_path should be one of {} or "
            'include ":", e.g. "paddlespeech.s2t.models.u2:U2Model" : '
            "{}".format(set(alias), import_path))
    if ":" not in import_path:
        import_path = alias[import_path]

    module_name, objname = import_path.split(":")
    m = importlib.import_module(module_name)
    return getattr(m, objname)


================================================
FILE: paddlespeech/utils/env.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os


def _get_user_home():
    return os.path.expanduser('~')


def _get_paddlespcceh_home():
    if 'PPSPEECH_HOME' in os.environ:
        home_path = os.environ['PPSPEECH_HOME']
        if os.path.exists(home_path):
            if os.path.isdir(home_path):
                return home_path
            else:
                raise RuntimeError(
                    'The environment variable PPSPEECH_HOME {} is not a directory.'.
                    format(home_path))
        else:
            return home_path
    return os.path.join(_get_user_home(), '.paddlespeech')


def _get_sub_home(directory):
    home = os.path.join(_get_paddlespcceh_home(), directory)
    if not os.path.exists(home):
        os.makedirs(home)
    return home


PPSPEECH_HOME = _get_paddlespcceh_home()
MODEL_HOME = _get_sub_home('models')
CONF_HOME = _get_sub_home('conf')
DATA_HOME = _get_sub_home('datasets')


================================================
FILE: paddlespeech/utils/initialize.py
================================================
#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/pytorch/pytorch/blob/master/torch/nn/init.py
Ths copyright of pytorch/pytorch is a BSD-style license, as found in the LICENSE file.
"""
import math

import numpy as np
import paddle
import paddle.nn as nn

__all__ = [
    "uniform_",
    "normal_",
    "constant_",
    "ones_",
    "zeros_",
    "xavier_uniform_",
    "xavier_normal_",
    "kaiming_uniform_",
    "kaiming_normal_",
    "linear_init_",
    "conv_init_",
    "reset_initialized_parameter",
    "_calculate_fan_in_and_fan_out",
]


def _no_grad_uniform_(tensor, a, b):
    with paddle.no_grad():
        tensor.set_value(
            paddle.uniform(
                shape=tensor.shape, dtype=tensor.dtype, min=a, max=b))
    return tensor


def _no_grad_normal_(tensor, mean=0.0, std=1.0):
    with paddle.no_grad():
        tensor.set_value(paddle.normal(mean=mean, std=std, shape=tensor.shape))
    return tensor


def _no_grad_fill_(tensor, value=0.0):
    with paddle.no_grad():
        tensor.set_value(paddle.full_like(tensor, value, dtype=tensor.dtype))
    return tensor


def uniform_(tensor, a, b):
    """
    Modified tensor inspace using uniform_
    Args:
        tensor (paddle.Tensor): paddle Tensor
        a (float|int): min value.
        b (float|int): max value.
    Return:
        tensor
    """
    return _no_grad_uniform_(tensor, a, b)


def normal_(tensor, mean=0.0, std=1.0):
    """
    Modified tensor inspace using normal_
    Args:
        tensor (paddle.Tensor): paddle Tensor
        mean (float|int): mean value.
        std (float|int): std value.
    Return:
        tensor
    """
    return _no_grad_normal_(tensor, mean, std)


def constant_(tensor, value=0.0):
    """
    Modified tensor inspace using constant_
    Args:
        tensor (paddle.Tensor): paddle Tensor
        value (float|int): value to fill tensor.
    Return:
        tensor
    """
    return _no_grad_fill_(tensor, value)


def ones_(tensor):
    """
    Modified tensor inspace using ones_
    Args:
        tensor (paddle.Tensor): paddle Tensor
    Return:
        tensor
    """
    return _no_grad_fill_(tensor, 1)


def zeros_(tensor):
    """
    Modified tensor inspace using zeros_
    Args:
        tensor (paddle.Tensor): paddle Tensor
    Return:
        tensor
    """
    return _no_grad_fill_(tensor, 0)


def vector_(tensor, vector):
    with paddle.no_grad():
        tensor.set_value(paddle.to_tensor(vector, dtype=tensor.dtype))
    return tensor


def _calculate_fan_in_and_fan_out(tensor, reverse=False):
    """
    Calculate (fan_in, _fan_out) for tensor
    Args:
        tensor (Tensor): paddle.Tensor
        reverse (bool: False): tensor data format order, False by default as [fout, fin, ...]. e.g. : conv.weight [cout, cin, kh, kw] is False; linear.weight [cin, cout] is True
    Return:
        Tuple[fan_in, fan_out]
    """
    if tensor.ndim < 2:
        raise ValueError(
            "Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
        )

    if reverse:
        num_input_fmaps, num_output_fmaps = tensor.shape[0], tensor.shape[1]
    else:
        num_input_fmaps, num_output_fmaps = tensor.shape[1], tensor.shape[0]

    receptive_field_size = 1
    if tensor.ndim > 2:
        receptive_field_size = np.prod(tensor.shape[2:])

    fan_in = num_input_fmaps * receptive_field_size
    fan_out = num_output_fmaps * receptive_field_size

    return fan_in, fan_out


def xavier_uniform_(tensor, gain=1.0, reverse=False):
    """
    Modified tensor inspace using xavier_uniform_
    Args:
        tensor (paddle.Tensor): paddle Tensor
        gain (float): super parameter, 1. default.
        reverse (bool):  reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
    Return:
        tensor
    """
    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse)
    std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
    k = math.sqrt(3.0) * std
    return _no_grad_uniform_(tensor, -k, k)


def xavier_normal_(tensor, gain=1.0, reverse=False):
    """
    Modified tensor inspace using xavier_normal_
    Args:
        tensor (paddle.Tensor): paddle Tensor
        gain (float): super parameter, 1. default.
        reverse (bool):  reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
    Return:
        tensor
    """
    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse=reverse)
    std = gain * math.sqrt(2.0 / float(fan_in + fan_out))
    return _no_grad_normal_(tensor, 0, std)


# reference: https://pytorch.org/docs/stable/_modules/torch/nn/init.html
def _calculate_correct_fan(tensor, mode, reverse=False):
    mode = mode.lower()
    valid_modes = ["fan_in", "fan_out"]
    if mode not in valid_modes:
        raise ValueError("Mode {} not supported, please use one of {}".format(
            mode, valid_modes))

    fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor, reverse)

    return fan_in if mode == "fan_in" else fan_out


def _calculate_gain(nonlinearity, param=None):
    linear_fns = [
        "linear", "conv1d", "conv2d", "conv3d", "conv_transpose1d",
        "conv_transpose2d", "conv_transpose3d"
    ]
    if nonlinearity in linear_fns or nonlinearity == "sigmoid":
        return 1
    elif nonlinearity == "tanh":
        return 5.0 / 3
    elif nonlinearity == "relu":
        return math.sqrt(2.0)
    elif nonlinearity == "leaky_relu":
        if param is None:
            negative_slope = 0.01
        elif not isinstance(param, bool) and isinstance(
                param, int) or isinstance(param, float):
            # True/False are instances of int, hence check above
            negative_slope = param
        else:
            raise ValueError(
                "negative_slope {} not a valid number".format(param))
        return math.sqrt(2.0 / (1 + negative_slope**2))
    elif nonlinearity == "selu":
        return 3.0 / 4
    else:
        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))


def kaiming_uniform_(tensor,
                     a=0,
                     mode="fan_in",
                     nonlinearity="leaky_relu",
                     reverse=False):
    """
    Modified tensor inspace using kaiming_uniform method
    Args:
        tensor (paddle.Tensor): paddle Tensor
        mode (str): ['fan_in', 'fan_out'], 'fin_in' default
        nonlinearity (str): nonlinearity method name
        reverse (bool):  reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
    Return:
        tensor
    """
    fan = _calculate_correct_fan(tensor, mode, reverse)
    gain = _calculate_gain(nonlinearity, a)
    std = gain / math.sqrt(fan)
    k = math.sqrt(3.0) * std
    return _no_grad_uniform_(tensor, -k, k)


def kaiming_normal_(tensor,
                    a=0,
                    mode="fan_in",
                    nonlinearity="leaky_relu",
                    reverse=False):
    """
    Modified tensor inspace using kaiming_normal_
    Args:
        tensor (paddle.Tensor): paddle Tensor
        mode (str): ['fan_in', 'fan_out'], 'fin_in' default
        nonlinearity (str): nonlinearity method name
        reverse (bool):  reverse (bool: False): tensor data format order, False by default as [fout, fin, ...].
    Return:
        tensor
    """
    fan = _calculate_correct_fan(tensor, mode, reverse)
    gain = _calculate_gain(nonlinearity, a)
    std = gain / math.sqrt(fan)
    return _no_grad_normal_(tensor, 0, std)


def linear_init_(module):
    bound = 1 / math.sqrt(module.weight.shape[0])
    uniform_(module.weight, -bound, bound)
    uniform_(module.bias, -bound, bound)


def conv_init_(module):
    bound = 1 / np.sqrt(np.prod(module.weight.shape[1:]))
    uniform_(module.weight, -bound, bound)
    if module.bias is not None:
        uniform_(module.bias, -bound, bound)


def bias_init_with_prob(prior_prob=0.01):
    """initialize conv/fc bias value according to a given probability value."""
    bias_init = float(-np.log((1 - prior_prob) / prior_prob))
    return bias_init


@paddle.no_grad()
def reset_initialized_parameter(model, include_self=True):
    """
    Reset initialized parameter using following method for [conv, linear, embedding, bn]
    Args:
        model (paddle.Layer): paddle Layer
        include_self (bool: False): include_self for Layer.named_sublayers method. Indicate whether including itself
    Return:
        None
    """
    for _, m in model.named_sublayers(include_self=include_self):
        if isinstance(m, nn.Conv2D):
            k = float(m._groups) / (m._in_channels * m._kernel_size[0] *
                                    m._kernel_size[1])
            k = math.sqrt(k)
            _no_grad_uniform_(m.weight, -k, k)
            if hasattr(m, "bias") and getattr(m, "bias") is not None:
                _no_grad_uniform_(m.bias, -k, k)

        elif isinstance(m, nn.Linear):
            k = math.sqrt(1.0 / m.weight.shape[0])
            _no_grad_uniform_(m.weight, -k, k)
            if hasattr(m, "bias") and getattr(m, "bias") is not None:
                _no_grad_uniform_(m.bias, -k, k)

        elif isinstance(m, nn.Embedding):
            _no_grad_normal_(m.weight, mean=0.0, std=1.0)

        elif isinstance(m, (nn.BatchNorm2D, nn.LayerNorm)):
            _no_grad_fill_(m.weight, 1.0)
            if hasattr(m, "bias") and getattr(m, "bias") is not None:
                _no_grad_fill_(m.bias, 0)


================================================
FILE: paddlespeech/vector/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/vector/cluster/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/vector/cluster/diarization.py
================================================
# Copyright (c) 2022 PaddlePaddle and SpeechBrain Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from speechbrain(https://github.com/speechbrain/speechbrain)
"""
This script contains basic functions used for speaker diarization.
This script has an optional dependency on open source sklearn library.
A few sklearn functions are modified in this script as per requirement.
"""
import argparse
import copy
import warnings

import numpy as np
import scipy
import sklearn
from scipy import linalg
from scipy import sparse
from scipy.sparse.csgraph import connected_components
from scipy.sparse.csgraph import laplacian as csgraph_laplacian
from scipy.sparse.linalg import eigsh
from sklearn.cluster import SpectralClustering
from sklearn.cluster._kmeans import k_means
from sklearn.neighbors import kneighbors_graph

from paddlespeech.utils.argparse import strtobool


def _graph_connected_component(graph, node_id):
    """
    Find the largest graph connected components that contains one
    given node.

    Arguments
    ---------
    graph : array-like, shape: (n_samples, n_samples)
        Adjacency matrix of the graph, non-zero weight means an edge
        between the nodes.
    node_id : int
        The index of the query node of the graph.

    Returns
    -------
    connected_components_matrix : array-like
        shape - (n_samples,).
        An array of bool value indicating the indexes of the nodes belonging
        to the largest connected components of the given query node.
    """

    n_node = graph.shape[0]
    if sparse.issparse(graph):
        # speed up row-wise access to boolean connection mask
        graph = graph.tocsr()
    connected_nodes = np.zeros(n_node, dtype=bool)
    nodes_to_explore = np.zeros(n_node, dtype=bool)
    nodes_to_explore[node_id] = True
    for _ in range(n_node):
        last_num_component = connected_nodes.sum()
        np.logical_or(connected_nodes, nodes_to_explore, out=connected_nodes)
        if last_num_component >= connected_nodes.sum():
            break
        indices = np.where(nodes_to_explore)[0]
        nodes_to_explore.fill(False)
        for i in indices:
            if sparse.issparse(graph):
                neighbors = graph[i].toarray().ravel()
            else:
                neighbors = graph[i]
            np.logical_or(nodes_to_explore, neighbors, out=nodes_to_explore)
    return connected_nodes


def _graph_is_connected(graph):
    """
    Return whether the graph is connected (True) or Not (False)

    Arguments
    ---------
    graph : array-like or sparse matrix, shape: (n_samples, n_samples)
        Adjacency matrix of the graph, non-zero weight means an edge between the nodes.

    Returns
    -------
    is_connected : bool
        True means the graph is fully connected and False means not.
    """

    if sparse.isspmatrix(graph):
        # sparse graph, find all the connected components
        n_connected_components, _ = connected_components(graph)
        return n_connected_components == 1
    else:
        # dense graph, find all connected components start from node 0
        return _graph_connected_component(graph, 0).sum() == graph.shape[0]


def _set_diag(laplacian, value, norm_laplacian):
    """
    Set the diagonal of the laplacian matrix and convert it to a sparse
    format well suited for eigenvalue decomposition.

    Arguments
    ---------
    laplacian : array or sparse matrix
        The graph laplacian.
    value : float
        The value of the diagonal.
    norm_laplacian : bool
        Whether the value of the diagonal should be changed or not.

    Returns
    -------
    laplacian : array or sparse matrix
        An array of matrix in a form that is well suited to fast eigenvalue
        decomposition, depending on the bandwidth of the matrix.
    """

    n_nodes = laplacian.shape[0]
    # We need all entries in the diagonal to values
    if not sparse.isspmatrix(laplacian):
        if norm_laplacian:
            laplacian.flat[::n_nodes + 1] = value
    else:
        laplacian = laplacian.tocoo()
        if norm_laplacian:
            diag_idx = laplacian.row == laplacian.col
            laplacian.data[diag_idx] = value
        # If the matrix has a small number of diagonals (as in the
        # case of structured matrices coming from images), the
        # dia format might be best suited for matvec products:
        n_diags = np.unique(laplacian.row - laplacian.col).size
        if n_diags <= 7:
            # 3 or less outer diagonals on each side
            laplacian = laplacian.todia()
        else:
            # csr has the fastest matvec and is thus best suited to
            # arpack
            laplacian = laplacian.tocsr()
    return laplacian


def _deterministic_vector_sign_flip(u):
    """
    Modify the sign of vectors for reproducibility. Flips the sign of
    elements of all the vectors (rows of u) such that the absolute
    maximum element of each vector is positive.

    Arguments
    ---------
    u : ndarray
        Array with vectors as its rows.

    Returns
    -------
    u_flipped : ndarray
        Array with the sign flipped vectors as its rows. The same shape as `u`.
    """

    max_abs_rows = np.argmax(np.abs(u), axis=1)
    signs = np.sign(u[range(u.shape[0]), max_abs_rows])
    u *= signs[:, np.newaxis]
    return u


def _check_random_state(seed):
    """
    Turn seed into a np.random.RandomState instance.

    Arguments
    ---------
    seed : None | int | instance of RandomState
        If seed is None, return the RandomState singleton used by np.random.
        If seed is an int, return a new RandomState instance seeded with seed.
        If seed is already a RandomState instance, return it.
        Otherwise raise ValueError.
    """

    if seed is None or seed is np.random:
        return np.random.mtrand._rand
    if isinstance(seed, numbers.Integral):
        return np.random.RandomState(seed)
    if isinstance(seed, np.random.RandomState):
        return seed
    raise ValueError("%r cannot be used to seed a np.random.RandomState"
                     " instance" % seed)


def spectral_embedding(
        adjacency,
        n_components=8,
        norm_laplacian=True,
        drop_first=True, ):
    """
    Returns spectral embeddings.

    Arguments
    ---------
    adjacency : array-like or sparse graph
        shape - (n_samples, n_samples)
        The adjacency matrix of the graph to embed.
    n_components : int
        The dimension of the projection subspace.
    norm_laplacian : bool
        If True, then compute normalized Laplacian.
    drop_first : bool
        Whether to drop the first eigenvector.

    Returns
    -------
    embedding : array
        Spectral embeddings for each sample.

    Example
    -------
    >>> import numpy as np
    >>> import diarization as diar
    >>> affinity = np.array([[1, 1, 1, 0.5, 0, 0, 0, 0, 0, 0.5],
    ... [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    ... [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    ... [0.5, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0.5, 0, 0, 0, 0, 0, 1, 1, 1, 1]])
    >>> embs = diar.spectral_embedding(affinity, 3)
    >>> # Notice similar embeddings
    >>> print(np.around(embs , decimals=3))
    [[ 0.075  0.244  0.285]
     [ 0.083  0.356 -0.203]
     [ 0.083  0.356 -0.203]
     [ 0.26  -0.149  0.154]
     [ 0.29  -0.218 -0.11 ]
     [ 0.29  -0.218 -0.11 ]
     [-0.198 -0.084 -0.122]
     [-0.198 -0.084 -0.122]
     [-0.198 -0.084 -0.122]
     [-0.167 -0.044  0.316]]
    """

    # Whether to drop the first eigenvector
    if drop_first:
        n_components = n_components + 1

    if not _graph_is_connected(adjacency):
        warnings.warn("Graph is not fully connected, spectral embedding"
                      " may not work as expected.")

    laplacian, dd = csgraph_laplacian(
        adjacency, normed=norm_laplacian, return_diag=True)

    laplacian = _set_diag(laplacian, 1, norm_laplacian)

    laplacian *= -1

    vals, diffusion_map = eigsh(
        laplacian,
        k=n_components,
        sigma=1.0,
        which="LM", )

    embedding = diffusion_map.T[n_components::-1]

    if norm_laplacian:
        embedding = embedding / dd

    embedding = _deterministic_vector_sign_flip(embedding)
    if drop_first:
        return embedding[1:n_components].T
    else:
        return embedding[:n_components].T


def spectral_clustering(
        affinity,
        n_clusters=8,
        n_components=None,
        random_state=None,
        n_init=10, ):
    """
    Performs spectral clustering.

    Arguments
    ---------
    affinity : matrix
        Affinity matrix.
    n_clusters : int
        Number of clusters for kmeans.
    n_components : int
        Number of components to retain while estimating spectral embeddings.
    random_state : int
        A pseudo random number generator used by kmeans.
     n_init : int
        Number of time the k-means algorithm will be run with different centroid seeds.

    Returns
    -------
    labels : array
        Cluster label for each sample.

    Example
    -------
    >>> import numpy as np
    >>> diarization as diar
    >>> affinity = np.array([[1, 1, 1, 0.5, 0, 0, 0, 0, 0, 0.5],
    ... [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    ... [1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
    ... [0.5, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
    ... [0.5, 0, 0, 0, 0, 0, 1, 1, 1, 1]])
    >>> labs = diar.spectral_clustering(affinity, 3)
    >>> # print (labs) # [2 2 2 1 1 1 0 0 0 0]
    """

    random_state = _check_random_state(random_state)
    n_components = n_clusters if n_components is None else n_components

    maps = spectral_embedding(
        affinity,
        n_components=n_components,
        drop_first=False, )

    _, labels, _ = k_means(
        maps, n_clusters, random_state=random_state, n_init=n_init)

    return labels


class EmbeddingMeta:
    """
    A utility class to pack deep embeddings and meta-information in one object.

    Arguments
    ---------
    segset : list
        List of session IDs as an array of strings.
    modelset : list
        List of model IDs as an array of strings.
    stats : tensor
        An ndarray of float64. Each line contains embedding
        from the corresponding session.
    """

    def __init__(
            self,
            segset=None,
            modelset=None,
            stats=None, ):

        if segset is None:
            self.segset = np.empty(0, dtype="|O")
            self.modelset = np.empty(0, dtype="|O")
            self.stats = np.array([], dtype=np.float64)
        else:
            self.segset = segset
            self.modelset = modelset
            self.stats = stats

        self.stat0 = np.array([[1.0]] * self.stats.shape[0])

    def norm_stats(self):
        """
        Divide all first-order statistics by their Euclidean norm.
        """

        vect_norm = np.clip(np.linalg.norm(self.stats, axis=1), 1e-08, np.inf)
        self.stats = (self.stats.transpose() / vect_norm).transpose()

    def get_mean_stats(self):
        """
        Return the mean of first order statistics.
        """
        mu = np.mean(self.stats, axis=0)
        return mu

    def get_total_covariance_stats(self):
        """
        Compute and return the total covariance matrix of the first-order statistics.
        """
        C = self.stats - self.stats.mean(axis=0)
        return np.dot(C.transpose(), C) / self.stats.shape[0]

    def get_model_stat0(self, mod_id):
        """Return zero-order statistics of a given model

        Arguments
        ---------
        mod_id : str
            ID of the model which stat0 will be returned.
        """
        S = self.stat0[self.modelset == mod_id, :]
        return S

    def get_model_stats(self, mod_id):
        """Return first-order statistics of a given model.

        Arguments
        ---------
        mod_id : str
            ID of the model which stat1 will be returned.
        """
        return self.stats[self.modelset == mod_id, :]

    def sum_stat_per_model(self):
        """
        Sum the zero- and first-order statistics per model and store them
        in a new EmbeddingMeta.
        Returns a EmbeddingMeta object with the statistics summed per model
        and a numpy array with session_per_model.
        """

        sts_per_model = EmbeddingMeta()
        sts_per_model.modelset = np.unique(
            self.modelset)  # nd: get uniq spkr ids
        sts_per_model.segset = copy.deepcopy(sts_per_model.modelset)
        sts_per_model.stat0 = np.zeros(
            (sts_per_model.modelset.shape[0], self.stat0.shape[1]),
            dtype=np.float64, )
        sts_per_model.stats = np.zeros(
            (sts_per_model.modelset.shape[0], self.stats.shape[1]),
            dtype=np.float64, )

        session_per_model = np.zeros(np.unique(self.modelset).shape[0])

        # For each model sum the stats
        for idx, model in enumerate(sts_per_model.modelset):
            sts_per_model.stat0[idx, :] = self.get_model_stat0(model).sum(
                axis=0)
            sts_per_model.stats[idx, :] = self.get_model_stats(model).sum(
                axis=0)
            session_per_model[idx] += self.get_model_stats(model).shape[0]
        return sts_per_model, session_per_model

    def center_stats(self, mu):
        """
        Center first order statistics.

        Arguments
        ---------
        mu : array
            Array to center on.
        """

        dim = self.stats.shape[1] / self.stat0.shape[1]
        index_map = np.repeat(np.arange(self.stat0.shape[1]), dim)
        self.stats = self.stats - (self.stat0[:, index_map] *
                                   mu.astype(np.float64))

    def rotate_stats(self, R):
        """
        Rotate first-order statistics by a right-product.

        Arguments
        ---------
        R : ndarray
            Matrix to use for right product on the first order statistics.
        """
        self.stats = np.dot(self.stats, R)

    def whiten_stats(self, mu, sigma, isSqrInvSigma=False):
        """
        Whiten first-order statistics
        If sigma.ndim == 1, case of a diagonal covariance.
        If sigma.ndim == 2, case of a single Gaussian with full covariance.
        If sigma.ndim == 3, case of a full covariance UBM.

        Arguments
        ---------
        mu : array
            Mean vector to be subtracted from the statistics.
        sigma : narray
            Co-variance matrix or covariance super-vector.
        isSqrInvSigma : bool
            True if the input Sigma matrix is the inverse of the square root of a covariance matrix.
        """

        if sigma.ndim == 1:
            self.center_stats(mu)
            self.stats = self.stats / np.sqrt(sigma.astype(np.float64))

        elif sigma.ndim == 2:
            # Compute the inverse square root of the co-variance matrix Sigma
            sqr_inv_sigma = sigma

            if not isSqrInvSigma:
                # eigen_values, eigen_vectors = scipy.linalg.eigh(sigma)
                eigen_values, eigen_vectors = linalg.eigh(sigma)
                ind = eigen_values.real.argsort()[::-1]
                eigen_values = eigen_values.real[ind]
                eigen_vectors = eigen_vectors.real[:, ind]

                sqr_inv_eval_sigma = 1 / np.sqrt(eigen_values.real)
                sqr_inv_sigma = np.dot(eigen_vectors,
                                       np.diag(sqr_inv_eval_sigma))
            else:
                pass

            # Whitening of the first-order statistics
            self.center_stats(mu)  # CENTERING
            self.rotate_stats(sqr_inv_sigma)

        elif sigma.ndim == 3:
            # we assume that sigma is a 3D ndarray of size D x n x n
            # where D is the number of distributions and n is the dimension of a single distribution
            n = self.stats.shape[1] // self.stat0.shape[1]
            sess_nb = self.stat0.shape[0]
            self.center_stats(mu)
            self.stats = (np.einsum("ikj,ikl->ilj",
                                    self.stats.T.reshape(-1, n, sess_nb), sigma)
                          .reshape(-1, sess_nb).T)

        else:
            raise Exception("Wrong dimension of Sigma, must be 1 or 2")

    def align_models(self, model_list):
        """
        Align models of the current EmbeddingMeta to match a list of models
            provided as input parameter. The size of the StatServer might be
            reduced to match the input list of models.

        Arguments
        ---------
        model_list : ndarray of strings
            List of models to match.
        """
        indx = np.array(
            [np.argwhere(self.modelset == v)[0][0] for v in model_list])
        self.segset = self.segset[indx]
        self.modelset = self.modelset[indx]
        self.stat0 = self.stat0[indx, :]
        self.stats = self.stats[indx, :]

    def align_segments(self, segment_list):
        """
        Align segments of the current EmbeddingMeta to match a list of segment
            provided as input parameter. The size of the StatServer might be
            reduced to match the input list of segments.

        Arguments
        ---------
        segment_list: ndarray of strings
            list of segments to match
        """
        indx = np.array(
            [np.argwhere(self.segset == v)[0][0] for v in segment_list])
        self.segset = self.segset[indx]
        self.modelset = self.modelset[indx]
        self.stat0 = self.stat0[indx, :]
        self.stats = self.stats[indx, :]


class SpecClustUnorm:
    """
    This class implements the spectral clustering with unnormalized affinity matrix.
    Useful when affinity matrix is based on cosine similarities.

    Reference
    ---------
    Von Luxburg, U. A tutorial on spectral clustering. Stat Comput 17, 395–416 (2007).
    https://doi.org/10.1007/s11222-007-9033-z

    Example
    -------
    >>> import diarization as diar
    >>> clust = diar.SpecClustUnorm(min_num_spkrs=2, max_num_spkrs=10)
    >>> emb = [[ 2.1, 3.1, 4.1, 4.2, 3.1],
    ... [ 2.2, 3.1, 4.2, 4.2, 3.2],
    ... [ 2.0, 3.0, 4.0, 4.1, 3.0],
    ... [ 8.0, 7.0, 7.0, 8.1, 9.0],
    ... [ 8.1, 7.1, 7.2, 8.1, 9.2],
    ... [ 8.3, 7.4, 7.0, 8.4, 9.0],
    ... [ 0.3, 0.4, 0.4, 0.5, 0.8],
    ... [ 0.4, 0.3, 0.6, 0.7, 0.8],
    ... [ 0.2, 0.3, 0.2, 0.3, 0.7],
    ... [ 0.3, 0.4, 0.4, 0.4, 0.7],]
    >>> # Estimating similarity matrix
    >>> sim_mat = clust.get_sim_mat(emb)
    >>> print (np.around(sim_mat[5:,5:], decimals=3))
    [[1.    0.957 0.961 0.904 0.966]
     [0.957 1.    0.977 0.982 0.997]
     [0.961 0.977 1.    0.928 0.972]
     [0.904 0.982 0.928 1.    0.976]
     [0.966 0.997 0.972 0.976 1.   ]]
    >>> # Prunning
    >>> prunned_sim_mat = clust.p_pruning(sim_mat, 0.3)
    >>> print (np.around(prunned_sim_mat[5:,5:], decimals=3))
    [[1.    0.    0.    0.    0.   ]
     [0.    1.    0.    0.982 0.997]
     [0.    0.977 1.    0.    0.972]
     [0.    0.982 0.    1.    0.976]
     [0.    0.997 0.    0.976 1.   ]]
    >>> # Symmetrization
    >>> sym_prund_sim_mat = 0.5 * (prunned_sim_mat + prunned_sim_mat.T)
    >>> print (np.around(sym_prund_sim_mat[5:,5:], decimals=3))
    [[1.    0.    0.    0.    0.   ]
     [0.    1.    0.489 0.982 0.997]
     [0.    0.489 1.    0.    0.486]
     [0.    0.982 0.    1.    0.976]
     [0.    0.997 0.486 0.976 1.   ]]
    >>> # Laplacian
    >>> laplacian = clust.get_laplacian(sym_prund_sim_mat)
    >>> print (np.around(laplacian[5:,5:], decimals=3))
    [[ 1.999  0.     0.     0.     0.   ]
     [ 0.     2.468 -0.489 -0.982 -0.997]
     [ 0.    -0.489  0.975  0.    -0.486]
     [ 0.    -0.982  0.     1.958 -0.976]
     [ 0.    -0.997 -0.486 -0.976  2.458]]
    >>> # Spectral Embeddings
    >>> spec_emb, num_of_spk = clust.get_spec_embs(laplacian, 3)
    >>> print(num_of_spk)
    3
    >>> # Clustering
    >>> clust.cluster_embs(spec_emb, num_of_spk)
    >>> # print (clust.labels_) # [0 0 0 2 2 2 1 1 1 1]
    >>> # Complete spectral clustering
    >>> clust.do_spec_clust(emb, k_oracle=3, p_val=0.3)
    >>> # print(clust.labels_) # [0 0 0 2 2 2 1 1 1 1]
    """

    def __init__(self, min_num_spkrs=2, max_num_spkrs=10):

        self.min_num_spkrs = min_num_spkrs
        self.max_num_spkrs = max_num_spkrs

    def do_spec_clust(self, X, k_oracle, p_val):
        """
        Function for spectral clustering.

        Arguments
        ---------
        X : array
            (n_samples, n_features).
            Embeddings extracted from the model.
        k_oracle : int
            Number of speakers (when oracle number of speakers).
        p_val : float
            p percent value to prune the affinity matrix.
        """

        # Similarity matrix computation
        sim_mat = self.get_sim_mat(X)

        # Refining similarity matrix with p_val
        prunned_sim_mat = self.p_pruning(sim_mat, p_val)

        # Symmetrization
        sym_prund_sim_mat = 0.5 * (prunned_sim_mat + prunned_sim_mat.T)

        # Laplacian calculation
        laplacian = self.get_laplacian(sym_prund_sim_mat)

        # Get Spectral Embeddings
        emb, num_of_spk = self.get_spec_embs(laplacian, k_oracle)

        # Perform clustering
        self.cluster_embs(emb, num_of_spk)

    def get_sim_mat(self, X):
        """
        Returns the similarity matrix based on cosine similarities.

        Arguments
        ---------
        X : array
            (n_samples, n_features).
            Embeddings extracted from the model.

        Returns
        -------
        M : array
            (n_samples, n_samples).
            Similarity matrix with cosine similarities between each pair of embedding.
        """

        # Cosine similarities
        M = sklearn.metrics.pairwise.cosine_similarity(X, X)
        return M

    def p_pruning(self, A, pval):
        """
        Refine the affinity matrix by zeroing less similar values.

        Arguments
        ---------
        A : array
            (n_samples, n_samples).
            Affinity matrix.
        pval : float
            p-value to be retained in each row of the affinity matrix.

        Returns
        -------
        A : array
            (n_samples, n_samples).
            Prunned affinity matrix based on p_val.
        """

        n_elems = int((1 - pval) * A.shape[0])

        # For each row in a affinity matrix
        for i in range(A.shape[0]):
            low_indexes = np.argsort(A[i, :])
            low_indexes = low_indexes[0:n_elems]

            # Replace smaller similarity values by 0s
            A[i, low_indexes] = 0

        return A

    def get_laplacian(self, M):
        """
        Returns the un-normalized laplacian for the given affinity matrix.

        Arguments
        ---------
        M : array
            (n_samples, n_samples)
            Affinity matrix.

        Returns
        -------
        L : array
            (n_samples, n_samples)
            Laplacian matrix.
        """

        M[np.diag_indices(M.shape[0])] = 0
        D = np.sum(np.abs(M), axis=1)
        D = np.diag(D)
        L = D - M
        return L

    def get_spec_embs(self, L, k_oracle=4):
        """
        Returns spectral embeddings and estimates the number of speakers
        using maximum Eigen gap.

        Arguments
        ---------
        L : array (n_samples, n_samples)
            Laplacian matrix.
        k_oracle : int
            Number of speakers when the condition is oracle number of speakers,
            else None.

        Returns
        -------
        emb : array (n_samples, n_components)
            Spectral embedding for each sample with n Eigen components.
        num_of_spk : int
            Estimated number of speakers. If the condition is set to the oracle
            number of speakers then returns k_oracle.
        """

        lambdas, eig_vecs = scipy.linalg.eigh(L)

        # if params["oracle_n_spkrs"] is True:
        if k_oracle is not None:
            num_of_spk = k_oracle
        else:
            lambda_gap_list = self.get_eigen_gaps(lambdas[1:self.max_num_spkrs])

            num_of_spk = (np.argmax(
                lambda_gap_list[:min(self.max_num_spkrs, len(lambda_gap_list))])
                          + 2)

            if num_of_spk < self.min_num_spkrs:
                num_of_spk = self.min_num_spkrs

        emb = eig_vecs[:, 0:num_of_spk]

        return emb, num_of_spk

    def cluster_embs(self, emb, k):
        """
        Clusters the embeddings using kmeans.

        Arguments
        ---------
        emb : array (n_samples, n_components)
            Spectral embedding for each sample with n Eigen components.
        k : int
            Number of clusters to kmeans.

        Returns
        -------
        self.labels_ : self
            Labels for each sample embedding.
        """
        _, self.labels_, _ = k_means(emb, k)

    def get_eigen_gaps(self, eig_vals):
        """
        Returns the difference (gaps) between the Eigen values.

        Arguments
        ---------
        eig_vals : list
            List of eigen values

        Returns
        -------
        eig_vals_gap_list : list
            List of differences (gaps) between adjacent Eigen values.
        """

        eig_vals_gap_list = []
        for i in range(len(eig_vals) - 1):
            gap = float(eig_vals[i + 1]) - float(eig_vals[i])
            eig_vals_gap_list.append(gap)

        return eig_vals_gap_list


class SpecCluster(SpectralClustering):
    def perform_sc(self, X, n_neighbors=10):
        """
        Performs spectral clustering using sklearn on embeddings.

        Arguments
        ---------
        X : array (n_samples, n_features)
            Embeddings to be clustered.
        n_neighbors : int
            Number of neighbors in estimating affinity matrix.
        """

        # Computation of affinity matrix
        connectivity = kneighbors_graph(
            X,
            n_neighbors=n_neighbors,
            include_self=True, )
        self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)

        # Perform spectral clustering on affinity matrix
        self.labels_ = spectral_clustering(
            self.affinity_matrix_,
            n_clusters=self.n_clusters, )
        return self


def is_overlapped(end1, start2):
    """
    Returns True if segments are overlapping.

    Arguments
    ---------
    end1 : float
        End time of the first segment.
    start2 : float
        Start time of the second segment.

    Returns
    -------
    overlapped : bool
        True of segments overlapped else False.

    Example
    -------
    >>> import diarization as diar
    >>> diar.is_overlapped(5.5, 3.4)
    True
    >>> diar.is_overlapped(5.5, 6.4)
    False
    """

    if start2 > end1:
        return False
    else:
        return True


def merge_ssegs_same_speaker(lol):
    """
    Merge adjacent sub-segs from the same speaker.

    Arguments
    ---------
    lol : list of list
        Each list contains [rec_id, seg_start, seg_end, spkr_id].

    Returns
    -------
    new_lol : list of list
        new_lol contains adjacent segments merged from the same speaker ID.

    Example
    -------
    >>> import diarization as diar
    >>> lol=[['r1', 5.5, 7.0, 's1'],
    ... ['r1', 6.5, 9.0, 's1'],
    ... ['r1', 8.0, 11.0, 's1'],
    ... ['r1', 11.5, 13.0, 's2'],
    ... ['r1', 14.0, 15.0, 's2'],
    ... ['r1', 14.5, 15.0, 's1']]
    >>> diar.merge_ssegs_same_speaker(lol)
    [['r1', 5.5, 11.0, 's1'], ['r1', 11.5, 13.0, 's2'], ['r1', 14.0, 15.0, 's2'], ['r1', 14.5, 15.0, 's1']]
    """

    new_lol = []

    # Start from the first sub-seg
    sseg = lol[0]
    flag = False
    for i in range(1, len(lol)):
        next_sseg = lol[i]

        # IF sub-segments overlap AND has same speaker THEN merge
        if is_overlapped(sseg[2], next_sseg[1]) and sseg[3] == next_sseg[3]:
            sseg[2] = next_sseg[2]  # just update the end time
            # This is important. For the last sseg, if it is the same speaker the merge
            # Make sure we don't append the last segment once more. Hence, set FLAG=True
            if i == len(lol) - 1:
                flag = True
                new_lol.append(sseg)
        else:
            new_lol.append(sseg)
            sseg = next_sseg

    # Add last segment only when it was skipped earlier.
    if flag is False:
        new_lol.append(lol[-1])

    return new_lol


def write_ders_file(ref_rttm, DER, out_der_file):
    """Write the final DERs for individual recording.

    Arguments
    ---------
    ref_rttm : str
        Reference RTTM file.
    DER : array
        Array containing DER values of each recording.
    out_der_file : str
        File to write the DERs.
    """

    rttm = read_rttm(ref_rttm)
    spkr_info = list(filter(lambda x: x.startswith("SPKR-INFO"), rttm))

    rec_id_list = []
    count = 0

    with open(out_der_file, "w") as f:
        for row in spkr_info:
            a = row.split(" ")
            rec_id = a[1]
            if rec_id not in rec_id_list:
                r = [rec_id, str(round(DER[count], 2))]
                rec_id_list.append(rec_id)
                line_str = " ".join(r)
                f.write("%s\n" % line_str)
                count += 1
        r = ["OVERALL ", str(round(DER[count], 2))]
        line_str = " ".join(r)
        f.write("%s\n" % line_str)


def get_oracle_num_spkrs(rec_id, spkr_info):
    """
    Returns actual number of speakers in a recording from the ground-truth.
    This can be used when the condition is oracle number of speakers.

    Arguments
    ---------
    rec_id : str
        Recording ID for which the number of speakers have to be obtained.
    spkr_info : list
        Header of the RTTM file. Starting with `SPKR-INFO`.

    Example
    -------
    >>> from speechbrain.processing import diarization as diar
    >>> spkr_info = ['SPKR-INFO ES2011a 0 <NA> <NA> <NA> unknown ES2011a.A <NA> <NA>',
    ... 'SPKR-INFO ES2011a 0 <NA> <NA> <NA> unknown ES2011a.B <NA> <NA>',
    ... 'SPKR-INFO ES2011a 0 <NA> <NA> <NA> unknown ES2011a.C <NA> <NA>',
    ... 'SPKR-INFO ES2011a 0 <NA> <NA> <NA> unknown ES2011a.D <NA> <NA>',
    ... 'SPKR-INFO ES2011b 0 <NA> <NA> <NA> unknown ES2011b.A <NA> <NA>',
    ... 'SPKR-INFO ES2011b 0 <NA> <NA> <NA> unknown ES2011b.B <NA> <NA>',
    ... 'SPKR-INFO ES2011b 0 <NA> <NA> <NA> unknown ES2011b.C <NA> <NA>']
    >>> diar.get_oracle_num_spkrs('ES2011a', spkr_info)
    4
    >>> diar.get_oracle_num_spkrs('ES2011b', spkr_info)
    3
    """

    num_spkrs = 0
    for line in spkr_info:
        if rec_id in line:
            # Since rec_id is prefix for each speaker
            num_spkrs += 1

    return num_spkrs


def distribute_overlap(lol):
    """
    Distributes the overlapped speech equally among the adjacent segments
    with different speakers.

    Arguments
    ---------
    lol : list of list
        It has each list structure as [rec_id, seg_start, seg_end, spkr_id].

    Returns
    -------
    new_lol : list of list
        It contains the overlapped part equally divided among the adjacent
        segments with different speaker IDs.

    Example
    -------
    >>> import diarization as diar
    >>> lol = [['r1', 5.5, 9.0, 's1'],
    ... ['r1', 8.0, 11.0, 's2'],
    ... ['r1', 11.5, 13.0, 's2'],
    ... ['r1', 12.0, 15.0, 's1']]
    >>> diar.distribute_overlap(lol)
    [['r1', 5.5, 8.5, 's1'], ['r1', 8.5, 11.0, 's2'], ['r1', 11.5, 12.5, 's2'], ['r1', 12.5, 15.0, 's1']]
    """

    new_lol = []
    sseg = lol[0]

    # Add first sub-segment here to avoid error at: "if new_lol[-1] != sseg:" when new_lol is empty
    # new_lol.append(sseg)

    for i in range(1, len(lol)):
        next_sseg = lol[i]
        # No need to check if they are different speakers.
        # Because if segments are overlapped then they always have different speakers.
        # This is because similar speaker's adjacent sub-segments are already merged by "merge_ssegs_same_speaker()"

        if is_overlapped(sseg[2], next_sseg[1]):

            # Get overlap duration.
            # Now this overlap will be divided equally between adjacent segments.
            overlap = sseg[2] - next_sseg[1]

            # Update end time of old seg
            sseg[2] = sseg[2] - (overlap / 2.0)

            # Update start time of next seg
            next_sseg[1] = next_sseg[1] + (overlap / 2.0)

            if len(new_lol) == 0:
                # For first sub-segment entry
                new_lol.append(sseg)
            else:
                # To avoid duplicate entries
                if new_lol[-1] != sseg:
                    new_lol.append(sseg)

            # Current sub-segment is next sub-segment
            sseg = next_sseg

        else:
            # For the first sseg
            if len(new_lol) == 0:
                new_lol.append(sseg)
            else:
                # To avoid duplicate entries
                if new_lol[-1] != sseg:
                    new_lol.append(sseg)

            # Update the current sub-segment
            sseg = next_sseg

    # Add the remaining last sub-segment
    new_lol.append(next_sseg)

    return new_lol


def read_rttm(rttm_file_path):
    """
    Reads and returns RTTM in list format.

    Arguments
    ---------
    rttm_file_path : str
        Path to the RTTM file to be read.

    Returns
    -------
    rttm : list
        List containing rows of RTTM file.
    """

    rttm = []
    with open(rttm_file_path, "r") as f:
        for line in f:
            entry = line[:-1]
            rttm.append(entry)
    return rttm


def write_rttm(segs_list, out_rttm_file):
    """
    Writes the segment list in RTTM format (A standard NIST format).

    Arguments
    ---------
    segs_list : list of list
        Each list contains [rec_id, seg_start, seg_end, spkr_id].
    out_rttm_file : str
        Path of the output RTTM file.
    """

    rttm = []
    rec_id = segs_list[0][0]

    for seg in segs_list:
        new_row = [
            "SPEAKER",
            rec_id,
            "0",
            str(round(seg[1], 4)),
            str(round(seg[2] - seg[1], 4)),
            "<NA>",
            "<NA>",
            seg[3],
            "<NA>",
            "<NA>",
        ]
        rttm.append(new_row)

    with open(out_rttm_file, "w") as f:
        for row in rttm:
            line_str = " ".join(row)
            f.write("%s\n" % line_str)


def do_AHC(diary_obj, out_rttm_file, rec_id, k_oracle=4, p_val=0.3):
    """
    Performs Agglomerative Hierarchical Clustering on embeddings.

    Arguments
    ---------
    diary_obj : EmbeddingMeta type
        Contains embeddings in diary_obj.stats and segment IDs in diary_obj.segset.
    out_rttm_file : str
        Path of the output RTTM file.
    rec_id : str
        Recording ID for the recording under processing.
    k : int
        Number of speaker (None, if it has to be estimated).
    pval : float
        `pval` for prunning affinity matrix. Used only when number of speakers
        are unknown. Note that this is just for experiment. Prefer Spectral clustering
        for better clustering results.
    """

    from sklearn.cluster import AgglomerativeClustering

    # p_val is the threshold_val (for AHC)
    diary_obj.norm_stats()

    # processing
    if k_oracle is not None:
        num_of_spk = k_oracle

        clustering = AgglomerativeClustering(
            n_clusters=num_of_spk,
            affinity="cosine",
            linkage="average", ).fit(diary_obj.stats)
        labels = clustering.labels_

    else:
        # Estimate num of using max eigen gap with `cos` affinity matrix.
        # This is just for experimentation.
        clustering = AgglomerativeClustering(
            n_clusters=None,
            affinity="cosine",
            linkage="average",
            distance_threshold=p_val, ).fit(diary_obj.stats)
        labels = clustering.labels_

    # Convert labels to speaker boundaries
    subseg_ids = diary_obj.segset
    lol = []

    for i in range(labels.shape[0]):
        spkr_id = rec_id + "_" + str(labels[i])

        sub_seg = subseg_ids[i]

        splitted = sub_seg.rsplit("_", 2)
        rec_id = str(splitted[0])
        sseg_start = float(splitted[1])
        sseg_end = float(splitted[2])

        a = [rec_id, sseg_start, sseg_end, spkr_id]
        lol.append(a)

    # Sorting based on start time of sub-segment
    lol.sort(key=lambda x: float(x[1]))

    # Merge and split in 2 simple steps: (i) Merge sseg of same speakers then (ii) split different speakers
    # Step 1: Merge adjacent sub-segments that belong to same speaker (or cluster)
    lol = merge_ssegs_same_speaker(lol)

    # Step 2: Distribute duration of adjacent overlapping sub-segments belonging to different speakers (or cluster)
    # Taking mid-point as the splitting time location.
    lol = distribute_overlap(lol)

    # logger.info("Completed diarizing " + rec_id)
    write_rttm(lol, out_rttm_file)


def do_spec_clustering(diary_obj, out_rttm_file, rec_id, k, pval, affinity_type,
                       n_neighbors):
    """
    Performs spectral clustering on embeddings. This function calls specific
    clustering algorithms as per affinity.

    Arguments
    ---------
    diary_obj : EmbeddingMeta type
        Contains embeddings in diary_obj.stats and segment IDs in diary_obj.segset.
    out_rttm_file : str
        Path of the output RTTM file.
    rec_id : str
        Recording ID for the recording under processing.
    k : int
        Number of speaker (None, if it has to be estimated).
    pval : float
        `pval` for prunning affinity matrix.
    affinity_type : str
        Type of similarity to be used to get affinity matrix (cos or nn).
    """

    if affinity_type == "cos":
        clust_obj = SpecClustUnorm(min_num_spkrs=2, max_num_spkrs=10)
        k_oracle = k  # use it only when oracle num of speakers
        clust_obj.do_spec_clust(diary_obj.stats, k_oracle, pval)
        labels = clust_obj.labels_
    else:
        clust_obj = SpecCluster(
            n_clusters=k,
            assign_labels="kmeans",
            random_state=1234,
            affinity="nearest_neighbors", )
        clust_obj.perform_sc(diary_obj.stats, n_neighbors)
        labels = clust_obj.labels_

    # Convert labels to speaker boundaries
    subseg_ids = diary_obj.segset
    lol = []

    for i in range(labels.shape[0]):
        spkr_id = rec_id + "_" + str(labels[i])

        sub_seg = subseg_ids[i]

        splitted = sub_seg.rsplit("_", 2)
        rec_id = str(splitted[0])
        sseg_start = float(splitted[1])
        sseg_end = float(splitted[2])

        a = [rec_id, sseg_start, sseg_end, spkr_id]
        lol.append(a)

    # Sorting based on start time of sub-segment
    lol.sort(key=lambda x: float(x[1]))

    # Merge and split in 2 simple steps: (i) Merge sseg of same speakers then (ii) split different speakers
    # Step 1: Merge adjacent sub-segments that belong to same speaker (or cluster)
    lol = merge_ssegs_same_speaker(lol)

    # Step 2: Distribute duration of adjacent overlapping sub-segments belonging to different speakers (or cluster)
    # Taking mid-point as the splitting time location.
    lol = distribute_overlap(lol)

    # logger.info("Completed diarizing " + rec_id)
    write_rttm(lol, out_rttm_file)


if __name__ == '__main__':

    parser = argparse.ArgumentParser(
        prog='python diarization.py --backend AHC', description='diarizing')
    parser.add_argument(
        '--sys_rttm_dir',
        required=False,
        help='Directory to store system RTTM files')
    parser.add_argument(
        '--ref_rttm_dir',
        required=False,
        help='Directory to store reference RTTM files')
    parser.add_argument(
        '--backend', default="AHC", help='type of backend, AHC or SC or kmeans')
    parser.add_argument(
        '--oracle_n_spkrs',
        default=True,
        type=strtobool,
        help='Oracle num of speakers')
    parser.add_argument(
        '--mic_type',
        default="Mix-Headset",
        help='Type of microphone to be used')
    parser.add_argument(
        '--affinity', default="cos", help='affinity matrix, cos or nn')
    parser.add_argument(
        '--max_subseg_dur',
        default=3.0,
        type=float,
        help='Duration in seconds of a subsegments to be prepared from larger segments'
    )
    parser.add_argument(
        '--overlap',
        default=1.5,
        type=float,
        help='Overlap duration in seconds between adjacent subsegments')

    args = parser.parse_args()

    pval = 0.3
    rec_id = "utt0001"
    n_neighbors = 10
    out_rttm_file = "./out.rttm"

    embeddings = np.empty(shape=[0, 32], dtype=np.float64)
    segset = []

    for i in range(10):
        seg = [rec_id + "_" + str(i) + "_" + str(i + 1)]
        segset = segset + seg
        emb = np.random.rand(1, 32)
        embeddings = np.concatenate((embeddings, emb), axis=0)

    segset = np.array(segset, dtype="|O")
    stat_obj = EmbeddingMeta(segset, embeddings)
    if args.oracle_n_spkrs is True:
        num_spkrs = 2

    if args.backend == "SC":
        print("begin SC ")
        do_spec_clustering(
            stat_obj,
            out_rttm_file,
            rec_id,
            num_spkrs,
            pval,
            args.affinity,
            n_neighbors, )
    if args.backend == "AHC":
        print("begin AHC ")
        do_AHC(stat_obj, out_rttm_file, rec_id, num_spkrs, pval)


================================================
FILE: paddlespeech/vector/cluster/plda.py
================================================
# Copyright (c) 2022 PaddlePaddle and SpeechBrain Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""A popular speaker recognition/diarization model (LDA and PLDA).

Relevant Papers
 - This implementation of PLDA is based on the following papers.

 - PLDA model Training
    * Ye Jiang et. al, "PLDA Modeling in I-Vector and Supervector Space for Speaker Verification," in Interspeech, 2012.
    * Patrick Kenny et. al, "PLDA for speaker verification with utterances of arbitrary duration," in ICASSP, 2013.

 - PLDA scoring (fast scoring)
    * Daniel Garcia-Romero et. al, “Analysis of i-vector length normalization in speaker recognition systems,” in Interspeech, 2011.
    * Weiwei-LIN et. al, "Fast Scoring for PLDA with Uncertainty Propagation," in Odyssey, 2016.
    * Kong Aik Lee et. al, "Multi-session PLDA Scoring of I-vector for Partially Open-Set Speaker Detection," in Interspeech 2013.

Credits
    This code is adapted from: https://git-lium.univ-lemans.fr/Larcher/sidekit
"""
import copy
import pickle

import numpy
from scipy import linalg

from paddlespeech.vector.cluster.diarization import EmbeddingMeta


def ismember(list1, list2):
    c = [item in list2 for item in list1]
    return c


class Ndx:
    """
    A class that encodes trial index information.  It has a list of
    model names and a list of test segment names and a matrix
    indicating which combinations of model and test segment are
    trials of interest.

    Arguments
    ---------
    modelset : list
        List of unique models in a ndarray.
    segset : list
        List of unique test segments in a ndarray.
    trialmask : 2D ndarray of bool.
        Rows correspond to the models and columns to the test segments. True, if the trial is of interest.
    """

    def __init__(self,
                 ndx_file_name="",
                 models=numpy.array([]),
                 testsegs=numpy.array([])):
        """
        Initialize a Ndx object by loading information from a file.

        Arguments
        ---------
        ndx_file_name : str
            Name of the file to load.
        """
        self.modelset = numpy.empty(0, dtype="|O")
        self.segset = numpy.empty(0, dtype="|O")
        self.trialmask = numpy.array([], dtype="bool")

        if ndx_file_name == "":
            # This is needed to make sizes same
            d = models.shape[0] - testsegs.shape[0]
            if d != 0:
                if d > 0:
                    last = str(testsegs[-1])
                    pad = numpy.array([last] * d)
                    testsegs = numpy.hstack((testsegs, pad))
                    # pad = testsegs[-d:]
                    # testsegs = numpy.concatenate((testsegs, pad), axis=1)
                else:
                    d = abs(d)
                    last = str(models[-1])
                    pad = numpy.array([last] * d)
                    models = numpy.hstack((models, pad))
                    # pad = models[-d:]
                    # models = numpy.concatenate((models, pad), axis=1)

            modelset = numpy.unique(models)
            segset = numpy.unique(testsegs)

            trialmask = numpy.zeros(
                (modelset.shape[0], segset.shape[0]), dtype="bool")
            for m in range(modelset.shape[0]):
                segs = testsegs[numpy.array(ismember(models, modelset[m]))]
                trialmask[m, ] = ismember(segset, segs)  # noqa E231

            self.modelset = modelset
            self.segset = segset
            self.trialmask = trialmask
            assert self.validate(), "Wrong Ndx format"

        else:
            ndx = Ndx.read(ndx_file_name)
            self.modelset = ndx.modelset
            self.segset = ndx.segset
            self.trialmask = ndx.trialmask

    def save_ndx_object(self, output_file_name):
        with open(output_file_name, "wb") as output:
            pickle.dump(self, output, pickle.HIGHEST_PROTOCOL)

    def filter(self, modlist, seglist, keep):
        """
        Removes some of the information in an Ndx. Useful for creating a
        gender specific Ndx from a pooled gender Ndx.  Depending on the
        value of \'keep\', the two input lists indicate the strings to
        retain or the strings to discard.

        Arguments
        ---------
        modlist : array
            A cell array of strings which will be compared with the modelset of 'inndx'.
        seglist : array
            A cell array of strings which will be compared with the segset of 'inndx'.
        keep : bool
            Indicating whether modlist and seglist are the models to keep or discard.
        """
        if keep:
            keepmods = modlist
            keepsegs = seglist
        else:
            keepmods = diff(self.modelset, modlist)
            keepsegs = diff(self.segset, seglist)

        keepmodidx = numpy.array(ismember(self.modelset, keepmods))
        keepsegidx = numpy.array(ismember(self.segset, keepsegs))

        outndx = Ndx()
        outndx.modelset = self.modelset[keepmodidx]
        outndx.segset = self.segset[keepsegidx]
        tmp = self.trialmask[numpy.array(keepmodidx), :]
        outndx.trialmask = tmp[:, numpy.array(keepsegidx)]

        assert outndx.validate, "Wrong Ndx format"

        if self.modelset.shape[0] > outndx.modelset.shape[0]:
            print(
                "Number of models reduced from %d to %d" %
                self.modelset.shape[0],
                outndx.modelset.shape[0], )
        if self.segset.shape[0] > outndx.segset.shape[0]:
            print(
                "Number of test segments reduced from %d to %d",
                self.segset.shape[0],
                outndx.segset.shape[0], )
        return outndx

    def validate(self):
        """
        Checks that an object of type Ndx obeys certain rules that
        must always be true. Returns a boolean value indicating whether the object is valid
        """
        ok = isinstance(self.modelset, numpy.ndarray)
        ok &= isinstance(self.segset, numpy.ndarray)
        ok &= isinstance(self.trialmask, numpy.ndarray)

        ok &= self.modelset.ndim == 1
        ok &= self.segset.ndim == 1
        ok &= self.trialmask.ndim == 2

        ok &= self.trialmask.shape == (self.modelset.shape[0],
                                       self.segset.shape[0], )
        return ok


class Scores:
    """
    A class for storing scores for trials.  The modelset and segset
    fields are lists of model and test segment names respectively.
    The element i,j of scoremat and scoremask corresponds to the
    trial involving model i and test segment j.

    Arguments
    ---------
    modelset : list
        List of unique models in a ndarray.
    segset : list
        List of unique test segments in a ndarray.
    scoremask : 2D ndarray of bool
        Indicates the trials of interest, i.e.,
        the entry i,j in scoremat should be ignored if scoremask[i,j] is False.
    scoremat : 2D ndarray
        Scores matrix.
    """

    def __init__(self, scores_file_name=""):
        """ 
        Initialize a Scores object by loading information from a file HDF5 format.

        Arguments
        ---------
        scores_file_name : str
            Name of the file to load.
        """
        self.modelset = numpy.empty(0, dtype="|O")
        self.segset = numpy.empty(0, dtype="|O")
        self.scoremask = numpy.array([], dtype="bool")
        self.scoremat = numpy.array([])

        if scores_file_name == "":
            pass
        else:
            tmp = Scores.read(scores_file_name)
            self.modelset = tmp.modelset
            self.segset = tmp.segset
            self.scoremask = tmp.scoremask
            self.scoremat = tmp.scoremat

    def __repr__(self):
        ch = "modelset:\n"
        ch += self.modelset + "\n"
        ch += "segset:\n"
        ch += self.segset + "\n"
        ch += "scoremask:\n"
        ch += self.scoremask.__repr__() + "\n"
        ch += "scoremat:\n"
        ch += self.scoremat.__repr__() + "\n"


def fa_model_loop(
        batch_start,
        mini_batch_indices,
        factor_analyser,
        stat0,
        stats,
        e_h,
        e_hh, ):
    """
    A function for PLDA estimation.

    Arguments
    ---------
    batch_start : int
        Index to start at in the list.
    mini_batch_indices : list
        Indices of the elements in the list (should start at zero).
    factor_analyser : instance of PLDA class
        PLDA class object.
    stat0 : tensor
        Matrix of zero-order statistics.
    stats: tensor
        Matrix of first-order statistics.
    e_h : tensor
        An accumulator matrix.
    e_hh: tensor
        An accumulator matrix.
    """
    rank = factor_analyser.F.shape[1]
    if factor_analyser.Sigma.ndim == 2:
        A = factor_analyser.F.T.dot(factor_analyser.F)
        inv_lambda_unique = dict()
        for sess in numpy.unique(stat0[:, 0]):
            inv_lambda_unique[sess] = linalg.inv(sess * A + numpy.eye(A.shape[
                0]))

    tmp = numpy.zeros(
        (factor_analyser.F.shape[1], factor_analyser.F.shape[1]),
        dtype=numpy.float64, )

    for idx in mini_batch_indices:
        if factor_analyser.Sigma.ndim == 1:
            inv_lambda = linalg.inv(
                numpy.eye(rank) + (factor_analyser.F.T * stat0[
                    idx + batch_start, :]).dot(factor_analyser.F))
        else:
            inv_lambda = inv_lambda_unique[stat0[idx + batch_start, 0]]

        aux = factor_analyser.F.T.dot(stats[idx + batch_start, :])
        numpy.dot(aux, inv_lambda, out=e_h[idx])
        e_hh[idx] = inv_lambda + numpy.outer(e_h[idx], e_h[idx], tmp)


def _check_missing_model(enroll, test, ndx):
    # Remove missing models and test segments
    clean_ndx = ndx.filter(enroll.modelset, test.segset, True)

    # Align EmbeddingMeta to match the clean_ndx
    enroll.align_models(clean_ndx.modelset)
    test.align_segments(clean_ndx.segset)

    return clean_ndx


class PLDA:
    """
    A class to train PLDA model from embeddings.

    The input is in paddlespeech.vector.cluster.diarization.EmbeddingMeta format.
    Trains a simplified PLDA model no within-class covariance matrix but full residual covariance matrix.

    Arguments
    ---------
    mean : tensor
        Mean of the vectors.
    F : tensor
        Eigenvoice matrix.
    Sigma : tensor
        Residual matrix.
    """

    def __init__(
            self,
            mean=None,
            F=None,
            Sigma=None,
            rank_f=100,
            nb_iter=10,
            scaling_factor=1.0, ):
        self.mean = None
        self.F = None
        self.Sigma = None
        self.rank_f = rank_f
        self.nb_iter = nb_iter
        self.scaling_factor = scaling_factor

        if mean is not None:
            self.mean = mean
        if F is not None:
            self.F = F
        if Sigma is not None:
            self.Sigma = Sigma

    def plda(
            self,
            emb_meta=None,
            output_file_name=None, ):
        """
        Trains PLDA model with no within class covariance matrix but full residual covariance matrix.

        Arguments
        ---------
        emb_meta : paddlespeech.vector.cluster.diarization.EmbeddingMeta
            Contains vectors and meta-information to perform PLDA
        rank_f : int
            Rank of the between-class covariance matrix.
        nb_iter : int
            Number of iterations to run.
        scaling_factor : float
            Scaling factor to downscale statistics (value between 0 and 1).
        output_file_name : str
            Name of the output file where to store PLDA model.
        """

        # Dimension of the vector (x-vectors stored in stats)
        vect_size = emb_meta.stats.shape[1]

        # Initialize mean and residual covariance from the training data
        self.mean = emb_meta.get_mean_stats()
        self.Sigma = emb_meta.get_total_covariance_stats()

        # Sum stat0 and stat1 for each speaker model
        model_shifted_stat, session_per_model = emb_meta.sum_stat_per_model()

        # Number of speakers (classes) in training set
        class_nb = model_shifted_stat.modelset.shape[0]

        # Multiply statistics by scaling_factor
        model_shifted_stat.stat0 *= self.scaling_factor
        model_shifted_stat.stats *= self.scaling_factor
        session_per_model *= self.scaling_factor

        # Covariance for stats
        sigma_obs = emb_meta.get_total_covariance_stats()
        evals, evecs = linalg.eigh(sigma_obs)

        # Initial F (eigen voice matrix) from rank
        idx = numpy.argsort(evals)[::-1]
        evecs = evecs.real[:, idx[:self.rank_f]]
        self.F = evecs[:, :self.rank_f]

        # Estimate PLDA model by iterating the EM algorithm
        for it in range(self.nb_iter):

            # E-step

            # Copy stats as they will be whitened with a different Sigma for each iteration
            local_stat = copy.deepcopy(model_shifted_stat)

            # Whiten statistics (with the new mean and Sigma)
            local_stat.whiten_stats(self.mean, self.Sigma)

            # Whiten the EigenVoice matrix
            eigen_values, eigen_vectors = linalg.eigh(self.Sigma)
            ind = eigen_values.real.argsort()[::-1]
            eigen_values = eigen_values.real[ind]
            eigen_vectors = eigen_vectors.real[:, ind]
            sqr_inv_eval_sigma = 1 / numpy.sqrt(eigen_values.real)
            sqr_inv_sigma = numpy.dot(eigen_vectors,
                                      numpy.diag(sqr_inv_eval_sigma))
            self.F = sqr_inv_sigma.T.dot(self.F)

            # Replicate self.stat0
            index_map = numpy.zeros(vect_size, dtype=int)
            _stat0 = local_stat.stat0[:, index_map]

            e_h = numpy.zeros((class_nb, self.rank_f))
            e_hh = numpy.zeros((class_nb, self.rank_f, self.rank_f))

            # loop on model id's
            fa_model_loop(
                batch_start=0,
                mini_batch_indices=numpy.arange(class_nb),
                factor_analyser=self,
                stat0=_stat0,
                stats=local_stat.stats,
                e_h=e_h,
                e_hh=e_hh, )

            # Accumulate for minimum divergence step
            _R = numpy.sum(e_hh, axis=0) / session_per_model.shape[0]

            _C = e_h.T.dot(local_stat.stats).dot(linalg.inv(sqr_inv_sigma))
            _A = numpy.einsum("ijk,i->jk", e_hh, local_stat.stat0.squeeze())

            # M-step
            self.F = linalg.solve(_A, _C).T

            # Update the residual covariance
            self.Sigma = sigma_obs - self.F.dot(_C) / session_per_model.sum()

            # Minimum Divergence step
            self.F = self.F.dot(linalg.cholesky(_R))

    def scoring(
            self,
            enroll,
            test,
            ndx,
            test_uncertainty=None,
            Vtrans=None,
            p_known=0.0,
            scaling_factor=1.0,
            check_missing=True, ):
        """
        Compute the PLDA scores between to sets of vectors. The list of
        trials to perform is given in an Ndx object. PLDA matrices have to be
        pre-computed. i-vectors/x-vectors are supposed to be whitened before.

        Arguments
        ---------
        enroll : paddlespeech.vector.cluster.diarization.EmbeddingMeta
            A EmbeddingMeta in which stats are xvectors.
        test : paddlespeech.vector.cluster.diarization.EmbeddingMeta
            A EmbeddingMeta in which stats are xvectors.
        ndx : paddlespeech.vector.cluster.plda.Ndx
            An Ndx object defining the list of trials to perform.
        p_known : float
            Probability of having a known speaker for open-set
            identification case (=1 for the verification task and =0 for the
            closed-set case).
        check_missing : bool
            If True, check that all models and segments exist.
        """

        enroll_ctr = copy.deepcopy(enroll)
        test_ctr = copy.deepcopy(test)

        # Remove missing models and test segments
        if check_missing:
            clean_ndx = _check_missing_model(enroll_ctr, test_ctr, ndx)
        else:
            clean_ndx = ndx

        # Center the i-vectors around the PLDA mean
        enroll_ctr.center_stats(self.mean)
        test_ctr.center_stats(self.mean)

        # Compute constant component of the PLDA distribution
        invSigma = linalg.inv(self.Sigma)
        I_spk = numpy.eye(self.F.shape[1], dtype="float")

        K = self.F.T.dot(invSigma * scaling_factor).dot(self.F)
        K1 = linalg.inv(K + I_spk)
        K2 = linalg.inv(2 * K + I_spk)

        # Compute the Gaussian distribution constant
        alpha1 = numpy.linalg.slogdet(K1)[1]
        alpha2 = numpy.linalg.slogdet(K2)[1]
        plda_cst = alpha2 / 2.0 - alpha1

        # Compute intermediate matrices
        Sigma_ac = numpy.dot(self.F, self.F.T)
        Sigma_tot = Sigma_ac + self.Sigma
        Sigma_tot_inv = linalg.inv(Sigma_tot)

        Tmp = linalg.inv(Sigma_tot - Sigma_ac.dot(Sigma_tot_inv).dot(Sigma_ac))
        Phi = Sigma_tot_inv - Tmp
        Psi = Sigma_tot_inv.dot(Sigma_ac).dot(Tmp)

        # Compute the different parts of PLDA score
        model_part = 0.5 * numpy.einsum("ij, ji->i",
                                        enroll_ctr.stats.dot(Phi),
                                        enroll_ctr.stats.T)
        seg_part = 0.5 * numpy.einsum("ij, ji->i",
                                      test_ctr.stats.dot(Phi), test_ctr.stats.T)

        # Compute verification scores
        score = Scores()  # noqa F821
        score.modelset = clean_ndx.modelset
        score.segset = clean_ndx.segset
        score.scoremask = clean_ndx.trialmask

        score.scoremat = model_part[:, numpy.newaxis] + seg_part + plda_cst
        score.scoremat += enroll_ctr.stats.dot(Psi).dot(test_ctr.stats.T)
        score.scoremat *= scaling_factor

        # Case of open-set identification, we compute the log-likelihood
        # by taking into account the probability of having a known impostor
        # or an out-of set class
        if p_known != 0:
            N = score.scoremat.shape[0]
            open_set_scores = numpy.empty(score.scoremat.shape)
            tmp = numpy.exp(score.scoremat)
            for ii in range(N):
                # open-set term
                open_set_scores[ii, :] = score.scoremat[ii, :] - numpy.log(
                    p_known * tmp[~(numpy.arange(N) == ii)].sum(axis=0) / (
                        N - 1) + (1 - p_known))
            score.scoremat = open_set_scores

        return score


if __name__ == '__main__':
    import random

    dim, N, n_spkrs = 10, 100, 10
    train_xv = numpy.random.rand(N, dim)
    md = ['md' + str(random.randrange(1, n_spkrs, 1)) for i in range(N)]  # spk
    modelset = numpy.array(md, dtype="|O")
    sg = ['sg' + str(i) for i in range(N)]  # utt
    segset = numpy.array(sg, dtype="|O")
    stat0 = numpy.array([[1.0]] * N)
    xvectors_stat = EmbeddingMeta(
        modelset=modelset, segset=segset, stats=train_xv)
    # Training PLDA model: M ~ (mean, F, Sigma)
    plda = PLDA(rank_f=5)
    plda.plda(xvectors_stat)
    print(plda.mean.shape)  #(10,)
    print(plda.F.shape)  #(10, 5)
    print(plda.Sigma.shape)  #(10, 10)
    # Enrollment (20 utts),
    en_N = 20
    en_xv = numpy.random.rand(en_N, dim)
    en_sgs = ['en' + str(i) for i in range(en_N)]
    en_sets = numpy.array(en_sgs, dtype="|O")
    en_stat = EmbeddingMeta(modelset=en_sets, segset=en_sets, stats=en_xv)
    # Test (30 utts)
    te_N = 30
    te_xv = numpy.random.rand(te_N, dim)
    te_sgs = ['te' + str(i) for i in range(te_N)]
    te_sets = numpy.array(te_sgs, dtype="|O")
    te_stat = EmbeddingMeta(modelset=te_sets, segset=te_sets, stats=te_xv)
    ndx = Ndx(models=en_sets, testsegs=te_sets)  # trials
    # PLDA Scoring
    scores_plda = plda.scoring(en_stat, te_stat, ndx)
    print(scores_plda.scoremat.shape)  #(20, 30)


================================================
FILE: paddlespeech/vector/exps/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/vector/exps/ecapa_tdnn/extract_emb.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import time

import paddle
from yacs.config import CfgNode

from paddlespeech.audio.backends import soundfile_load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.batch import feature_normalize
from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn
from paddlespeech.vector.modules.sid_model import SpeakerIdetification
from paddlespeech.vector.training.seeding import seed_everything

logger = Log(__name__).getlog()


def extract_audio_embedding(args, config):
    # stage 0: set the training device, cpu or gpu
    paddle.set_device(args.device)
    # set the random seed, it is a must for multiprocess training
    seed_everything(config.seed)

    # stage 1: build the dnn backbone model network
    ecapa_tdnn = EcapaTdnn(**config.model)

    # stage4: build the speaker verification train instance with backbone model
    model = SpeakerIdetification(
        backbone=ecapa_tdnn, num_class=config.num_speakers)
    # stage 2: load the pre-trained model
    args.load_checkpoint = os.path.abspath(
        os.path.expanduser(args.load_checkpoint))

    # load model checkpoint to sid model
    state_dict = paddle.load(
        os.path.join(args.load_checkpoint, 'model.pdparams'))
    model.set_state_dict(state_dict)
    logger.info(f'Checkpoint loaded from {args.load_checkpoint}')

    # stage 3: we must set the model to eval mode
    model.eval()

    # stage 4: read the audio data and extract the embedding
    # wavform is one dimension numpy array 
    waveform, sr = load_audio(args.audio_path)

    # feat type is numpy array, whose shape is [dim, time]
    # we need convert the audio feat to one-batch shape [batch, dim, time], where the batch is one
    # so the final shape is [1, dim, time]
    start_time = time.time()
    feat = melspectrogram(
        x=waveform,
        sr=config.sr,
        n_mels=config.n_mels,
        window_size=config.window_size,
        hop_length=config.hop_size)
    feat = paddle.to_tensor(feat).unsqueeze(0)

    # in inference period, the lengths is all one without padding
    lengths = paddle.ones([1])
    feat = feature_normalize(feat, mean_norm=True, std_norm=False)

    # model backbone network forward the feats and get the embedding
    embedding = model.backbone(
        feat, lengths).squeeze().numpy()  # (1, emb_size, 1) -> (emb_size)
    elapsed_time = time.time() - start_time
    audio_length = waveform.shape[0] / sr

    # stage 5: do global norm with external mean and std
    rtf = elapsed_time / audio_length
    logger.info(f"{args.device} rft={rtf}")

    return embedding


if __name__ == "__main__":
    # yapf: disable
    parser = argparse.ArgumentParser(__doc__)
    parser.add_argument('--device',
                        choices=['cpu', 'gpu'],
                        default="cpu",
                        help="Select which device to train model, defaults to gpu.")
    parser.add_argument("--config",
                        default=None,
                        type=str,
                        help="configuration file")
    parser.add_argument("--load-checkpoint",
                        type=str,
                        default='',
                        help="Directory to load model checkpoint to contiune trainning.")
    parser.add_argument("--audio-path",
                        default="./data/demo.wav",
                        type=str,
                        help="Single audio file path")
    args = parser.parse_args()
    # yapf: enable
    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    config.freeze()
    print(config)

    extract_audio_embedding(args, config)


================================================
FILE: paddlespeech/vector/exps/ecapa_tdnn/test.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os

import numpy as np
import paddle
from paddle.io import BatchSampler
from paddle.io import DataLoader
from sklearn.metrics import roc_curve
from tqdm import tqdm
from yacs.config import CfgNode

from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.batch import batch_feature_normalize
from paddlespeech.vector.io.dataset import CSVDataset
from paddlespeech.vector.io.embedding_norm import InputNormalization
from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn
from paddlespeech.vector.modules.sid_model import SpeakerIdetification
from paddlespeech.vector.training.seeding import seed_everything

logger = Log(__name__).getlog()


def compute_dataset_embedding(data_loader, model, mean_var_norm_emb, config,
                              id2embedding):
    """compute the dataset embeddings

    Args:
        data_loader (paddle.io.Dataloader): the dataset loader to be compute the embedding
        model (paddle.nn.Layer): the speaker verification model
        mean_var_norm_emb : compute the embedding mean and std norm
        config (yacs.config.CfgNode): the yaml config
    """
    logger.info(
        f'Computing embeddings on {data_loader.dataset.csv_path} dataset')
    with paddle.no_grad():
        for batch_idx, batch in enumerate(tqdm(data_loader)):

            # stage 8-1: extrac the audio embedding
            ids, feats, lengths = batch['ids'], batch['feats'], batch['lengths']
            embeddings = model.backbone(feats, lengths).squeeze(
                -1)  # (N, emb_size, 1) -> (N, emb_size)

            # Global embedding normalization.
            # if we use the global embedding norm
            # eer can reduece about relative 10%
            if config.global_embedding_norm and mean_var_norm_emb:
                lengths = paddle.ones([embeddings.shape[0]])
                embeddings = mean_var_norm_emb(embeddings, lengths)

            # Update embedding dict.
            id2embedding.update(dict(zip(ids, embeddings)))


def compute_verification_scores(id2embedding, train_cohort, config):
    """Compute the verification trial scores

    Args:
        id2embedding (dict): the utterance embedding
        train_cohort (paddle.tensor): the cohort dataset embedding
        config (yacs.config.CfgNode): the yaml config

    Returns:
        the scores and the trial labels, 
        1 refers the target and 0 refers the nontarget in labels
    """
    labels = []
    enroll_ids = []
    test_ids = []
    logger.info(f"read the trial from {config.verification_file}")
    cos_sim_func = paddle.nn.CosineSimilarity(axis=-1)
    scores = []
    with open(config.verification_file, 'r') as f:
        for line in f.readlines():
            label, enroll_id, test_id = line.strip().split(' ')
            enroll_id = enroll_id.split('.')[0].replace('/', '-')
            test_id = test_id.split('.')[0].replace('/', '-')
            labels.append(int(label))

            enroll_emb = id2embedding[enroll_id]
            test_emb = id2embedding[test_id]
            score = cos_sim_func(enroll_emb, test_emb).item()

            if "score_norm" in config:
                # Getting norm stats for enroll impostors
                enroll_rep = paddle.tile(
                    enroll_emb, repeat_times=[train_cohort.shape[0], 1])
                score_e_c = cos_sim_func(enroll_rep, train_cohort)
                if "cohort_size" in config:
                    score_e_c, _ = paddle.topk(
                        score_e_c, k=config.cohort_size, axis=0)
                mean_e_c = paddle.mean(score_e_c, axis=0)
                std_e_c = paddle.std(score_e_c, axis=0)

                # Getting norm stats for test impostors
                test_rep = paddle.tile(
                    test_emb, repeat_times=[train_cohort.shape[0], 1])
                score_t_c = cos_sim_func(test_rep, train_cohort)
                if "cohort_size" in config:
                    score_t_c, _ = paddle.topk(
                        score_t_c, k=config.cohort_size, axis=0)
                mean_t_c = paddle.mean(score_t_c, axis=0)
                std_t_c = paddle.std(score_t_c, axis=0)

                if config.score_norm == "s-norm":
                    score_e = (score - mean_e_c) / std_e_c
                    score_t = (score - mean_t_c) / std_t_c

                    score = 0.5 * (score_e + score_t)
                elif config.score_norm == "z-norm":
                    score = (score - mean_e_c) / std_e_c
                elif config.score_norm == "t-norm":
                    score = (score - mean_t_c) / std_t_c

            scores.append(score)

    return scores, labels


def compute_eer(labels: np.ndarray, scores: np.ndarray) -> List[float]:
    """Compute EER and return score threshold.

    Args:
        labels (np.ndarray): the trial label, shape: [N], one-dimension, N refer to the samples num
        scores (np.ndarray): the trial scores, shape: [N], one-dimension, N refer to the samples num

    Returns:
        List[float]: eer and the specific threshold
    """
    fpr, tpr, threshold = roc_curve(y_true=labels, y_score=scores)
    fnr = 1 - tpr
    eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]
    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    return eer, eer_threshold


def main(args, config):
    """The main process for test the speaker verification model

    Args:
        args (argparse.Namespace): the command line args namespace
        config (yacs.config.CfgNode): the yaml config
    """

    # stage0: set the training device, cpu or gpu
    #         if set the gpu, paddlespeech will select a gpu according the env CUDA_VISIBLE_DEVICES
    paddle.set_device(args.device)
    # set the random seed, it is the necessary measures for multiprocess training
    seed_everything(config.seed)

    # stage1: build the dnn backbone model network
    #         we will extract the audio embedding from the backbone model
    ecapa_tdnn = EcapaTdnn(**config.model)

    # stage2: build the speaker verification eval instance with backbone model
    #         because the checkpoint dict name has the SpeakerIdetification prefix
    #         so we need to create the SpeakerIdetification instance
    #         but we acutally use the backbone model to extact the audio embedding 
    model = SpeakerIdetification(
        backbone=ecapa_tdnn, num_class=config.num_speakers)

    # stage3: load the pre-trained model
    #         generally, we get the last model from the epoch
    args.load_checkpoint = os.path.abspath(
        os.path.expanduser(args.load_checkpoint))

    # load model checkpoint to sid model
    state_dict = paddle.load(
        os.path.join(args.load_checkpoint, 'model.pdparams'))
    model.set_state_dict(state_dict)
    logger.info(f'Checkpoint loaded from {args.load_checkpoint}')

    # stage4: construct the enroll and test dataloader
    #         Now, wo think the enroll dataset is in the {args.data_dir}/vox/csv/enroll.csv,
    #         and the test dataset is in the {args.data_dir}/vox/csv/test.csv
    enroll_dataset = CSVDataset(
        os.path.join(args.data_dir, "vox/csv/enroll.csv"),
        feat_type='melspectrogram',
        random_chunk=False,
        n_mels=config.n_mels,
        window_size=config.window_size,
        hop_length=config.hop_size)
    enroll_sampler = BatchSampler(
        enroll_dataset, batch_size=config.batch_size, shuffle=False)
    enroll_loader = DataLoader(enroll_dataset,
                    batch_sampler=enroll_sampler,
                    collate_fn=lambda x: batch_feature_normalize(
                                x, mean_norm=True, std_norm=False),
                    num_workers=config.num_workers,
                    return_list=True,)

    test_dataset = CSVDataset(
        os.path.join(args.data_dir, "vox/csv/test.csv"),
        feat_type='melspectrogram',
        random_chunk=False,
        n_mels=config.n_mels,
        window_size=config.window_size,
        hop_length=config.hop_size)
    test_sampler = BatchSampler(
        test_dataset, batch_size=config.batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset,
                            batch_sampler=test_sampler,
                            collate_fn=lambda x: batch_feature_normalize(
                                x, mean_norm=True, std_norm=False),
                            num_workers=config.num_workers,
                            return_list=True,)
    # stage5: we must set the model to eval mode
    model.eval()

    # stage6: global embedding norm to imporve the performance
    #         and we create the InputNormalization instance to process the embedding mean and std norm
    logger.info(f"global embedding norm: {config.global_embedding_norm}")
    if config.global_embedding_norm:
        mean_var_norm_emb = InputNormalization(
            norm_type="global",
            mean_norm=config.embedding_mean_norm,
            std_norm=config.embedding_std_norm)

    # stage 7: score norm need the imposters dataset
    #          we select the train dataset as the idea imposters dataset
    #          and we select the config.n_train_snts utterance to as the final imposters dataset
    if "score_norm" in config:
        logger.info(f"we will do score norm: {config.score_norm}")
        train_dataset = CSVDataset(
            os.path.join(args.data_dir, "vox/csv/train.csv"),
            feat_type='melspectrogram',
            n_train_snts=config.n_train_snts,
            random_chunk=False,
            n_mels=config.n_mels,
            window_size=config.window_size,
            hop_length=config.hop_size)
        train_sampler = BatchSampler(
            train_dataset, batch_size=config.batch_size, shuffle=False)
        train_loader = DataLoader(train_dataset,
                            batch_sampler=train_sampler,
                            collate_fn=lambda x: batch_feature_normalize(
                                x, mean_norm=True, std_norm=False),
                            num_workers=config.num_workers,
                            return_list=True,)

    # stage 8: Compute embeddings of audios in enrol and test dataset from model.
    id2embedding = {}
    # Run multi times to make embedding normalization more stable.
    logger.info("First loop for enroll and test dataset")
    compute_dataset_embedding(enroll_loader, model, mean_var_norm_emb, config,
                              id2embedding)
    compute_dataset_embedding(test_loader, model, mean_var_norm_emb, config,
                              id2embedding)

    logger.info("Second loop for enroll and test dataset")
    compute_dataset_embedding(enroll_loader, model, mean_var_norm_emb, config,
                              id2embedding)
    compute_dataset_embedding(test_loader, model, mean_var_norm_emb, config,
                              id2embedding)
    mean_var_norm_emb.save(
        os.path.join(args.load_checkpoint, "mean_var_norm_emb"))

    # stage 9: Compute cosine scores.
    train_cohort = None
    if "score_norm" in config:
        train_embeddings = {}
        # cohort embedding not do mean and std norm
        compute_dataset_embedding(train_loader, model, None, config,
                                  train_embeddings)
        train_cohort = paddle.stack(list(train_embeddings.values()))

    # stage 10: compute the scores
    scores, labels = compute_verification_scores(id2embedding, train_cohort,
                                                 config)

    # stage 11: compute the EER and threshold
    scores = paddle.to_tensor(scores)
    EER, threshold = compute_eer(np.asarray(labels), scores.numpy())
    logger.info(
        f'EER of verification test: {EER*100:.4f}%, score threshold: {threshold:.5f}'
    )


if __name__ == "__main__":
    # yapf: disable
    parser = argparse.ArgumentParser(__doc__)
    parser.add_argument('--device',
                        choices=['cpu', 'gpu'],
                        default="gpu",
                        help="Select which device to train model, defaults to gpu.")
    parser.add_argument("--config",
                        default=None,
                        type=str,
                        help="configuration file")
    parser.add_argument("--data-dir",
                        default="./data/",
                        type=str,
                        help="data directory")
    parser.add_argument("--load-checkpoint",
                        type=str,
                        default='',
                        help="Directory to load model checkpoint to contiune trainning.")
    args = parser.parse_args()
    # yapf: enable
    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    config.freeze()
    print(config)
    main(args, config)


================================================
FILE: paddlespeech/vector/exps/ecapa_tdnn/train.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import time

import numpy as np
import paddle
from paddle.io import BatchSampler
from paddle.io import DataLoader
from paddle.io import DistributedBatchSampler
from yacs.config import CfgNode

from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.augment import build_augment_pipeline
from paddlespeech.vector.io.augment import waveform_augment
from paddlespeech.vector.io.batch import batch_pad_right
from paddlespeech.vector.io.batch import feature_normalize
from paddlespeech.vector.io.batch import waveform_collate_fn
from paddlespeech.vector.io.dataset import CSVDataset
from paddlespeech.vector.models.ecapa_tdnn import EcapaTdnn
from paddlespeech.vector.modules.loss import AdditiveAngularMargin
from paddlespeech.vector.modules.loss import LogSoftmaxWrapper
from paddlespeech.vector.modules.sid_model import SpeakerIdetification
from paddlespeech.vector.training.scheduler import CyclicLRScheduler
from paddlespeech.vector.training.seeding import seed_everything
from paddlespeech.vector.utils.time import Timer

logger = Log(__name__).getlog()


def main(args, config):
    """The main process for test the speaker verification model

    Args:
        args (argparse.Namespace): the command line args namespace
        config (yacs.config.CfgNode): the yaml config
    """
    # stage0: set the training device, cpu or gpu
    paddle.set_device(args.device)

    # stage1: we must call the paddle.distributed.init_parallel_env() api at the beginning
    paddle.distributed.init_parallel_env()
    nranks = paddle.distributed.get_world_size()
    rank = paddle.distributed.get_rank()
    # set the random seed, it is the necessary measures for multiprocess training
    seed_everything(config.seed)

    # stage2: data prepare, such vox1 and vox2 data, and augment noise data and pipline
    # note: some operations must be done in rank==0
    train_dataset = CSVDataset(
        csv_path=os.path.join(args.data_dir, "vox/csv/train.csv"),
        label2id_path=os.path.join(args.data_dir, "vox/meta/label2id.txt"))
    dev_dataset = CSVDataset(
        csv_path=os.path.join(args.data_dir, "vox/csv/dev.csv"),
        label2id_path=os.path.join(args.data_dir, "vox/meta/label2id.txt"))

    # we will build the augment pipeline process list
    if config.augment:
        augment_pipeline = build_augment_pipeline(target_dir=args.data_dir)
    else:
        augment_pipeline = []

    # stage3: build the dnn backbone model network
    #         in speaker verification period, we use the backbone mode to extract the audio embedding
    ecapa_tdnn = EcapaTdnn(**config.model)

    # stage4: build the speaker verification train instance with backbone model
    model = SpeakerIdetification(
        backbone=ecapa_tdnn, num_class=config.num_speakers)

    # stage5: build the optimizer, we now only construct the AdamW optimizer
    #         140000 is single gpu steps
    #         so, in multi-gpu mode, wo reduce the step_size to 140000//nranks to enable CyclicLRScheduler
    lr_schedule = CyclicLRScheduler(
        base_lr=config.learning_rate,
        max_lr=config.max_lr,
        step_size=config.step_size // nranks)
    optimizer = paddle.optimizer.AdamW(
        learning_rate=lr_schedule, parameters=model.parameters())

    # stage6: build the loss function, we now only support LogSoftmaxWrapper
    criterion = LogSoftmaxWrapper(
        loss_fn=AdditiveAngularMargin(margin=config.margin, scale=config.scale))

    # stage7: confirm training start epoch
    #         if pre-trained model exists, start epoch confirmed by the pre-trained model
    start_epoch = 0
    if args.load_checkpoint:
        logger.info("load the check point")
        args.load_checkpoint = os.path.abspath(
            os.path.expanduser(args.load_checkpoint))
        try:
            # load model checkpoint
            state_dict = paddle.load(
                os.path.join(args.load_checkpoint, 'model.pdparams'))
            model.set_state_dict(state_dict)

            # load optimizer checkpoint
            state_dict = paddle.load(
                os.path.join(args.load_checkpoint, 'model.pdopt'))
            optimizer.set_state_dict(state_dict)
            if rank == 0:
                logger.info(f'Checkpoint loaded from {args.load_checkpoint}')
        except FileExistsError:
            if rank == 0:
                logger.info('Train from scratch.')

        try:
            start_epoch = int(args.load_checkpoint[-1])
            logger.info(f'Restore training from epoch {start_epoch}.')
        except ValueError:
            pass

    # stage8: we build the batch sampler for paddle.DataLoader
    train_sampler = DistributedBatchSampler(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        drop_last=False)
    train_loader = DataLoader(
        train_dataset,
        batch_sampler=train_sampler,
        num_workers=config.num_workers,
        collate_fn=waveform_collate_fn,
        return_list=True,
        use_buffer_reader=True, )

    # stage9: start to train
    #         we will comment the training process
    steps_per_epoch = len(train_sampler)
    timer = Timer(steps_per_epoch * config.epochs)
    last_saved_epoch = ""
    timer.start()

    for epoch in range(start_epoch + 1, config.epochs + 1):
        # at the beginning, model must set to train mode
        model.train()

        avg_loss = 0
        num_corrects = 0
        num_samples = 0
        train_reader_cost = 0.0
        train_feat_cost = 0.0
        train_run_cost = 0.0

        reader_start = time.time()
        for batch_idx, batch in enumerate(train_loader):
            train_reader_cost += time.time() - reader_start

            # stage 9-1: batch data is audio sample points and speaker id label
            feat_start = time.time()
            waveforms, labels = batch['waveforms'], batch['labels']
            waveforms, lengths = batch_pad_right(waveforms.numpy())
            waveforms = paddle.to_tensor(waveforms)

            # stage 9-2: audio sample augment method, which is done on the audio sample point
            #            the original wavefrom and the augmented waveform is concatented in a batch
            #            eg. five augment method in the augment pipeline
            #                the final data nums is batch_size * [five + one] 
            #                -> five augmented waveform batch plus one original batch waveform
            if len(augment_pipeline) != 0:
                waveforms = waveform_augment(waveforms, augment_pipeline)
                labels = paddle.concat(
                    [labels for i in range(len(augment_pipeline) + 1)])

            # stage 9-3: extract the audio feats,such fbank, mfcc, spectrogram
            feats = []
            for waveform in waveforms.numpy():
                feat = melspectrogram(
                    x=waveform,
                    sr=config.sr,
                    n_mels=config.n_mels,
                    window_size=config.window_size,
                    hop_length=config.hop_size)
                feats.append(feat)
            feats = paddle.to_tensor(np.asarray(feats))

            # stage 9-4: feature normalize, which help converge and imporve the performance
            feats = feature_normalize(
                feats, mean_norm=True, std_norm=False)  # Features normalization
            train_feat_cost += time.time() - feat_start

            # stage 9-5: model forward, such ecapa-tdnn, x-vector
            train_start = time.time()
            logits = model(feats)

            # stage 9-6: loss function criterion, such AngularMargin, AdditiveAngularMargin
            loss = criterion(logits, labels)

            # stage 9-7: update the gradient and clear the gradient cache
            loss.backward()
            optimizer.step()
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                optimizer._learning_rate.step()
            optimizer.clear_grad()

            # stage 9-8: Calculate average loss per batch
            avg_loss = loss.item()

            # stage 9-9: Calculate metrics, which is one-best accuracy
            preds = paddle.argmax(logits, axis=1)
            num_corrects += (preds == labels).numpy().sum()
            num_samples += feats.shape[0]
            train_run_cost += time.time() - train_start
            timer.count()  # step plus one in timer

            # stage 9-10: print the log information only on 0-rank per log-freq batchs
            if (batch_idx + 1) % config.log_interval == 0 and rank == 0:
                lr = optimizer.get_lr()
                avg_loss /= config.log_interval
                avg_acc = num_corrects / num_samples

                print_msg = 'Train Epoch={}/{}, Step={}/{}'.format(
                    epoch, config.epochs, batch_idx + 1, steps_per_epoch)
                print_msg += ' loss={:.4f}'.format(avg_loss)
                print_msg += ' acc={:.4f}'.format(avg_acc)
                print_msg += ' avg_reader_cost: {:.5f} sec,'.format(
                    train_reader_cost / config.log_interval)
                print_msg += ' avg_feat_cost: {:.5f} sec,'.format(
                    train_feat_cost / config.log_interval)
                print_msg += ' avg_train_cost: {:.5f} sec,'.format(
                    train_run_cost / config.log_interval)

                print_msg += ' lr={:.4E} step/sec={:.2f} ips={:.5f}| ETA {}'.format(
                    lr, timer.timing, timer.ips, timer.eta)
                logger.info(print_msg)

                avg_loss = 0
                num_corrects = 0
                num_samples = 0
                train_reader_cost = 0.0
                train_feat_cost = 0.0
                train_run_cost = 0.0

            reader_start = time.time()

        # stage 9-11: save the model parameters only on 0-rank per save-freq batchs
        if epoch % config.save_interval == 0 and batch_idx + 1 == steps_per_epoch:
            if rank != 0:
                paddle.distributed.barrier(
                )  # Wait for valid step in main process
                continue  # Resume trainning on other process

            # stage 9-12: construct the valid dataset dataloader
            dev_sampler = BatchSampler(
                dev_dataset,
                batch_size=config.batch_size,
                shuffle=False,
                drop_last=False)
            dev_loader = DataLoader(
                dev_dataset,
                batch_sampler=dev_sampler,
                collate_fn=waveform_collate_fn,
                num_workers=config.num_workers,
                return_list=True, )

            # set the model to eval mode
            model.eval()
            num_corrects = 0
            num_samples = 0

            # stage 9-13: evaluation the valid dataset batch data
            logger.info('Evaluate on validation dataset')
            with paddle.no_grad():
                for batch_idx, batch in enumerate(dev_loader):
                    waveforms, labels = batch['waveforms'], batch['labels']

                    feats = []
                    for waveform in waveforms.numpy():
                        feat = melspectrogram(
                            x=waveform,
                            sr=config.sr,
                            n_mels=config.n_mels,
                            window_size=config.window_size,
                            hop_length=config.hop_size)
                        feats.append(feat)

                    feats = paddle.to_tensor(np.asarray(feats))
                    feats = feature_normalize(
                        feats, mean_norm=True, std_norm=False)
                    logits = model(feats)

                    preds = paddle.argmax(logits, axis=1)
                    num_corrects += (preds == labels).numpy().sum()
                    num_samples += feats.shape[0]

            print_msg = '[Evaluation result]'
            print_msg += ' dev_acc={:.4f}'.format(num_corrects / num_samples)
            logger.info(print_msg)

            # stage 9-14: Save model parameters
            save_dir = os.path.join(args.checkpoint_dir,
                                    'epoch_{}'.format(epoch))
            last_saved_epoch = os.path.join('epoch_{}'.format(epoch),
                                            "model.pdparams")
            logger.info('Saving model checkpoint to {}'.format(save_dir))
            paddle.save(model.state_dict(),
                        os.path.join(save_dir, 'model.pdparams'))
            paddle.save(optimizer.state_dict(),
                        os.path.join(save_dir, 'model.pdopt'))

            if nranks > 1:
                paddle.distributed.barrier()  # Main process

    # stage 10: create the final trained model.pdparams with soft link
    if rank == 0:
        final_model = os.path.join(args.checkpoint_dir, "model.pdparams")
        logger.info(f"we will create the final model: {final_model}")
        if os.path.islink(final_model):
            logger.info(
                f"An {final_model} already exists, we will rm is and create it again"
            )
            os.unlink(final_model)
        os.symlink(last_saved_epoch, final_model)


if __name__ == "__main__":
    # yapf: disable
    parser = argparse.ArgumentParser(__doc__)
    parser.add_argument('--device',
                        choices=['cpu', 'gpu'],
                        default="cpu",
                        help="Select which device to train model, defaults to gpu.")
    parser.add_argument("--config",
                        default=None,
                        type=str,
                        help="configuration file")
    parser.add_argument("--data-dir",
                        default="./data/",
                        type=str,
                        help="data directory")
    parser.add_argument("--load-checkpoint",
                        type=str,
                        default=None,
                        help="Directory to load model checkpoint to contiune trainning.")
    parser.add_argument("--checkpoint-dir",
                        type=str,
                        default='./checkpoint',
                        help="Directory to save model checkpoints.")

    args = parser.parse_args()
    # yapf: enable

    # https://yaml.org/type/float.html
    config = CfgNode(new_allowed=True)
    if args.config:
        config.merge_from_file(args.config)

    config.freeze()
    print(config)

    main(args, config)


================================================
FILE: paddlespeech/vector/exps/ge2e/__init__.py
================================================
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/vector/exps/ge2e/audio_processor.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import struct
from pathlib import Path
from warnings import warn

import librosa
import numpy as np
from scipy.ndimage.morphology import binary_dilation

try:
    import webrtcvad
except ModuleNotFoundError:
    warn("Unable to import 'webrtcvad'."
         "This package enables noise removal and is recommended.")
    webrtcvad = None

INT16_MAX = (2**15) - 1


def normalize_volume(wav, target_dBFS, increase_only=False,
                     decrease_only=False):
    # this function implements Loudness normalization, instead of peak
    # normalization, See https://en.wikipedia.org/wiki/Audio_normalization
    # dBFS: Decibels relative to full scale
    # See https://en.wikipedia.org/wiki/DBFS for more details
    # for 16Bit PCM audio, minimal level is -96dB
    # compute the mean dBFS and adjust to target dBFS, with by increasing
    # or decreasing
    if increase_only and decrease_only:
        raise ValueError("Both increase only and decrease only are set")
    dBFS_change = target_dBFS - 10 * np.log10(np.mean(wav**2))
    if dBFS_change < 0 and increase_only:
        return wav
    if dBFS_change > 0 and decrease_only:
        return wav
    gain = 10**(dBFS_change / 20)
    return wav * gain


def trim_long_silences(wav,
                       vad_window_length: int,
                       vad_moving_average_width: int,
                       vad_max_silence_length: int,
                       sampling_rate: int):
    """
    Ensures that segments without voice in the waveform remain no longer than a
    threshold determined by the VAD parameters in params.py.
    Parameters
    ----------
    wav : np.array
        the raw waveform as a numpy array of floats
    Returns
    ----------
    np.array
        the same waveform with silences trimmed away (length <= original wav length)
    """
    # Compute the voice detection window size
    samples_per_window = (vad_window_length * sampling_rate) // 1000

    # Trim the end of the audio to have a multiple of the window size
    wav = wav[:len(wav) - (len(wav) % samples_per_window)]

    # Convert the float waveform to 16-bit mono PCM
    pcm_wave = struct.pack("%dh" % len(wav),
                           *(np.round(wav * INT16_MAX)).astype(np.int16))

    # Perform voice activation detection
    voice_flags = []
    vad = webrtcvad.Vad(mode=3)
    for window_start in range(0, len(wav), samples_per_window):
        window_end = window_start + samples_per_window
        voice_flags.append(
            vad.is_speech(
                pcm_wave[window_start * 2:window_end * 2],
                sample_rate=sampling_rate))
    voice_flags = np.array(voice_flags)

    # Smooth the voice detection with a moving average
    def moving_average(array, width):
        array_padded = np.concatenate((np.zeros((width - 1) // 2), array,
                                       np.zeros(width // 2)))
        ret = np.cumsum(array_padded, dtype=float)
        ret[width:] = ret[width:] - ret[:-width]
        return ret[width - 1:] / width

    audio_mask = moving_average(voice_flags, vad_moving_average_width)
    audio_mask = np.round(audio_mask).astype(bool)

    # Dilate the voiced regions
    audio_mask = binary_dilation(audio_mask,
                                 np.ones(vad_max_silence_length + 1))
    audio_mask = np.repeat(audio_mask, samples_per_window)

    return wav[audio_mask]


def compute_partial_slices(n_samples: int,
                           partial_utterance_n_frames: int,
                           hop_length: int,
                           min_pad_coverage: float=0.75,
                           overlap: float=0.5):
    """
    Computes where to split an utterance waveform and its corresponding mel spectrogram to obtain
    partial utterances of <partial_utterance_n_frames> each. Both the waveform and the mel
    spectrogram slices are returned, so as to make each partial utterance waveform correspond to
    its spectrogram. This function assumes that the mel spectrogram parameters used are those
    defined in params_data.py.

    The returned ranges may be indexing further than the length of the waveform. It is
    recommended that you pad the waveform with zeros up to wave_slices[-1].stop.
    Parameters
    ----------
    n_samples : int
        the number of samples in the waveform.
    partial_utterance_n_frames : int
        the number of mel spectrogram frames in each partial utterance.

    min_pad_coverage : int
        when reaching the last partial utterance, it may or may not have enough frames.
        If at least <min_pad_coverage> of <partial_utterance_n_frames> are present,
        then the last partial utterance will be considered, as if we padded the audio. Otherwise,
        it will be discarded, as if we trimmed the audio. If there aren't enough frames for 1 partial
        utterance, this parameter is ignored so that the function always returns at least 1 slice.
    overlap : float
        by how much the partial utterance should overlap. If set to 0, the partial utterances are entirely disjoint.
    Returns
    ----------
        the waveform slices and mel spectrogram slices as lists of array slices.
        Index respectively the waveform and the mel spectrogram with these slices to obtain the partialutterances.
    """
    assert 0 <= overlap < 1
    assert 0 < min_pad_coverage <= 1

    # librosa's function to compute num_frames from num_samples
    n_frames = int(np.ceil((n_samples + 1) / hop_length))
    # frame shift between ajacent partials
    frame_step = max(1,
                     int(np.round(partial_utterance_n_frames * (1 - overlap))))

    # Compute the slices
    wav_slices, mel_slices = [], []
    steps = max(1, n_frames - partial_utterance_n_frames + frame_step + 1)
    for i in range(0, steps, frame_step):
        mel_range = np.array([i, i + partial_utterance_n_frames])
        wav_range = mel_range * hop_length
        mel_slices.append(slice(*mel_range))
        wav_slices.append(slice(*wav_range))

    # Evaluate whether extra padding is warranted or not
    last_wav_range = wav_slices[-1]
    coverage = (n_samples - last_wav_range.start) / (
        last_wav_range.stop - last_wav_range.start)
    if coverage < min_pad_coverage and len(mel_slices) > 1:
        mel_slices = mel_slices[:-1]
        wav_slices = wav_slices[:-1]

    return wav_slices, mel_slices


class SpeakerVerificationPreprocessor(object):
    def __init__(self,
                 sampling_rate: int,
                 audio_norm_target_dBFS: float,
                 vad_window_length,
                 vad_moving_average_width,
                 vad_max_silence_length,
                 mel_window_length,
                 mel_window_step,
                 n_mels,
                 partial_n_frames: int,
                 min_pad_coverage: float=0.75,
                 partial_overlap_ratio: float=0.5):
        self.sampling_rate = sampling_rate
        self.audio_norm_target_dBFS = audio_norm_target_dBFS

        self.vad_window_length = vad_window_length
        self.vad_moving_average_width = vad_moving_average_width
        self.vad_max_silence_length = vad_max_silence_length

        self.n_fft = int(mel_window_length * sampling_rate / 1000)
        self.hop_length = int(mel_window_step * sampling_rate / 1000)
        self.n_mels = n_mels

        self.partial_n_frames = partial_n_frames
        self.min_pad_coverage = min_pad_coverage
        self.partial_overlap_ratio = partial_overlap_ratio

    def preprocess_wav(self, fpath_or_wav, source_sr=None):
        # Load the wav from disk if needed
        if isinstance(fpath_or_wav, (str, Path)):
            wav, source_sr = librosa.load(str(fpath_or_wav), sr=None)
        else:
            wav = fpath_or_wav

        # Resample if numpy.array is passed and sr does not match
        if source_sr is not None and source_sr != self.sampling_rate:
            wav = librosa.resample(
                wav, orig_sr=source_sr, target_sr=self.sampling_rate)

        # loudness normalization
        wav = normalize_volume(
            wav, self.audio_norm_target_dBFS, increase_only=True)

        # trim long silence
        if webrtcvad:
            wav = trim_long_silences(
                wav, self.vad_window_length, self.vad_moving_average_width,
                self.vad_max_silence_length, self.sampling_rate)
        return wav

    def melspectrogram(self, wav):
        mel = librosa.feature.melspectrogram(
            y=wav,
            sr=self.sampling_rate,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels)
        mel = mel.astype(np.float32).T
        return mel

    def extract_mel_partials(self, wav):
        wav_slices, mel_slices = compute_partial_slices(
            len(wav), self.partial_n_frames, self.hop_length,
            self.min_pad_coverage, self.partial_overlap_ratio)

        # pad audio if needed
        max_wave_length = wav_slices[-1].stop
        if max_wave_length >= len(wav):
            wav = np.pad(wav, (0, max_wave_length - len(wav)), "constant")

        # Split the utterance into partials
        frames = self.melspectrogram(wav)
        frames_batch = np.array([frames[s] for s in mel_slices])
        return frames_batch  # [B, T, C]


================================================
FILE: paddlespeech/vector/exps/ge2e/config.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from yacs.config import CfgNode

_C = CfgNode()

data_config = _C.data = CfgNode()

## Audio volume normalization
data_config.audio_norm_target_dBFS = -30

## Audio sample rate
data_config.sampling_rate = 16000  # Hz

## Voice Activation Detection
# Window size of the VAD. Must be either 10, 20 or 30 milliseconds.
# This sets the granularity of the VAD. Should not need to be changed.
data_config.vad_window_length = 30  # In milliseconds
# Number of frames to average together when performing the moving average smoothing.
# The larger this value, the larger the VAD variations must be to not get smoothed out.
data_config.vad_moving_average_width = 8
# Maximum number of consecutive silent frames a segment can have.
data_config.vad_max_silence_length = 6

## Mel-filterbank
data_config.mel_window_length = 25  # In milliseconds
data_config.mel_window_step = 10  # In milliseconds
data_config.n_mels = 40  # mel bands

# Number of spectrogram frames in a partial utterance
data_config.partial_n_frames = 160  # 1600 ms
data_config.min_pad_coverage = 0.75  # at least 75% of the audio is valid in a partial
data_config.partial_overlap_ratio = 0.5  # overlap ratio between ajancent partials

model_config = _C.model = CfgNode()
model_config.num_layers = 3
model_config.hidden_size = 256
model_config.embedding_size = 256  # output size

training_config = _C.training = CfgNode()
training_config.learning_rate_init = 1e-4
training_config.speakers_per_batch = 64
training_config.utterances_per_speaker = 10
training_config.max_iteration = 1560000
training_config.save_interval = 10000
training_config.valid_interval = 10000


def get_cfg_defaults():
    return _C.clone()


================================================
FILE: paddlespeech/vector/exps/ge2e/dataset_processors.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import multiprocessing as mp
from functools import partial
from pathlib import Path
from typing import List

import numpy as np
from tqdm import tqdm

from paddlespeech.vector.exps.ge2e.audio_processor import SpeakerVerificationPreprocessor


def _process_utterance(path_pair, processor: SpeakerVerificationPreprocessor):
    # Load and preprocess the waveform
    input_path, output_path = path_pair
    wav = processor.preprocess_wav(input_path)
    if len(wav) == 0:
        return

    # Create the mel spectrogram, discard those that are too short
    frames = processor.melspectrogram(wav)
    if len(frames) < processor.partial_n_frames:
        return

    np.save(output_path, frames)


def _process_speaker(speaker_dir: Path,
                     processor: SpeakerVerificationPreprocessor,
                     datasets_root: Path,
                     output_dir: Path,
                     pattern: str,
                     skip_existing: bool=False):
    # datastes root: a reference path to compute speaker_name
    # we prepand dataset name to speaker_id becase we are mixing serveal
    # multispeaker datasets together
    speaker_name = "_".join(speaker_dir.relative_to(datasets_root).parts)
    speaker_output_dir = output_dir / speaker_name
    speaker_output_dir.mkdir(parents=True, exist_ok=True)

    # load exsiting file set
    sources_fpath = speaker_output_dir / "_sources.txt"
    if sources_fpath.exists():
        try:
            with sources_fpath.open("rt") as sources_file:
                existing_names = {line.split(",")[0] for line in sources_file}
        except Exception as e:
            existing_names = {}
    else:
        existing_names = {}

    sources_file = sources_fpath.open("at" if skip_existing else "wt")
    for in_fpath in speaker_dir.rglob(pattern):
        out_name = "_".join(
            in_fpath.relative_to(speaker_dir).with_suffix(".npy").parts)
        if skip_existing and out_name in existing_names:
            continue
        out_fpath = speaker_output_dir / out_name
        _process_utterance((in_fpath, out_fpath), processor)
        sources_file.write(f"{out_name},{in_fpath}\n")

    sources_file.close()


def _process_dataset(processor: SpeakerVerificationPreprocessor,
                     datasets_root: Path,
                     speaker_dirs: List[Path],
                     dataset_name: str,
                     output_dir: Path,
                     pattern: str,
                     skip_existing: bool=False):
    print(
        f"{dataset_name}: Preprocessing data for {len(speaker_dirs)} speakers.")

    _func = partial(
        _process_speaker,
        processor=processor,
        datasets_root=datasets_root,
        output_dir=output_dir,
        pattern=pattern,
        skip_existing=skip_existing)

    with mp.Pool(16) as pool:
        list(
            tqdm(
                pool.imap(_func, speaker_dirs),
                dataset_name,
                len(speaker_dirs),
                unit="speakers"))
    print(f"Done preprocessing {dataset_name}.")


def process_librispeech(processor,
                        datasets_root,
                        output_dir,
                        skip_existing=False):
    dataset_name = "LibriSpeech/train-other-500"
    dataset_root = datasets_root / dataset_name
    speaker_dirs = list(dataset_root.glob("*"))
    _process_dataset(processor, datasets_root, speaker_dirs, dataset_name,
                     output_dir, "*.flac", skip_existing)


def process_voxceleb1(processor, datasets_root, output_dir,
                      skip_existing=False):
    dataset_name = "VoxCeleb1"
    dataset_root = datasets_root / dataset_name

    anglophone_nationalites = ["australia", "canada", "ireland", "uk", "usa"]
    with dataset_root.joinpath("vox1_meta.csv").open("rt") as metafile:
        metadata = [line.strip().split("\t") for line in metafile][1:]

    # speaker id -> nationality
    nationalities = {line[0]: line[3] for line in metadata if line[-1] == "dev"}
    keep_speaker_ids = [
        speaker_id for speaker_id, nationality in nationalities.items()
        if nationality.lower() in anglophone_nationalites
    ]
    print(
        "VoxCeleb1: using samples from {} (presumed anglophone) speakers out of {}."
        .format(len(keep_speaker_ids), len(nationalities)))

    speaker_dirs = list((dataset_root / "wav").glob("*"))
    speaker_dirs = [
        speaker_dir for speaker_dir in speaker_dirs
        if speaker_dir.name in keep_speaker_ids
    ]
    _process_dataset(processor, datasets_root, speaker_dirs, dataset_name,
                     output_dir, "*.wav", skip_existing)


def process_voxceleb2(processor, datasets_root, output_dir,
                      skip_existing=False):
    dataset_name = "VoxCeleb2"
    dataset_root = datasets_root / dataset_name
    # There is no nationality in meta data for VoxCeleb2
    speaker_dirs = list((dataset_root / "wav").glob("*"))
    _process_dataset(processor, datasets_root, speaker_dirs, dataset_name,
                     output_dir, "*.wav", skip_existing)


def process_aidatatang_200zh(processor,
                             datasets_root,
                             output_dir,
                             skip_existing=False):
    dataset_name = "aidatatang_200zh/train"
    dataset_root = datasets_root / dataset_name

    speaker_dirs = list((dataset_root).glob("*"))
    _process_dataset(processor, datasets_root, speaker_dirs, dataset_name,
                     output_dir, "*.wav", skip_existing)


def process_magicdata(processor, datasets_root, output_dir,
                      skip_existing=False):
    dataset_name = "magicdata/train"
    dataset_root = datasets_root / dataset_name

    speaker_dirs = list((dataset_root).glob("*"))
    _process_dataset(processor, datasets_root, speaker_dirs, dataset_name,
                     output_dir, "*.wav", skip_existing)


================================================
FILE: paddlespeech/vector/exps/ge2e/inference.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

import numpy as np
import paddle
import tqdm

from paddlespeech.vector.exps.ge2e.audio_processor import SpeakerVerificationPreprocessor
from paddlespeech.vector.exps.ge2e.config import get_cfg_defaults
from paddlespeech.vector.models.lstm_speaker_encoder import LSTMSpeakerEncoder


def embed_utterance(processor, model, fpath_or_wav):
    # audio processor
    wav = processor.preprocess_wav(fpath_or_wav)
    mel_partials = processor.extract_mel_partials(wav)

    model.eval()
    # speaker encoder
    with paddle.no_grad():
        mel_partials = paddle.to_tensor(mel_partials)
        with paddle.no_grad():
            embed = model.embed_utterance(mel_partials)
    embed = embed.numpy()
    return embed


def _process_utterance(ifpath: Path,
                       input_dir: Path,
                       output_dir: Path,
                       processor: SpeakerVerificationPreprocessor,
                       model: LSTMSpeakerEncoder):
    rel_path = ifpath.relative_to(input_dir)
    ofpath = (output_dir / rel_path).with_suffix(".npy")
    ofpath.parent.mkdir(parents=True, exist_ok=True)
    embed = embed_utterance(processor, model, ifpath)
    np.save(ofpath, embed)


def main(config, args):

    if args.ngpu == 0:
        paddle.set_device("cpu")
    elif args.ngpu > 0:
        paddle.set_device("gpu")
    else:
        print("ngpu should >= 0 !")

    # load model
    model = LSTMSpeakerEncoder(config.data.n_mels, config.model.num_layers,
                               config.model.hidden_size,
                               config.model.embedding_size)
    weights_fpath = str(Path(args.checkpoint_path).expanduser())
    model_state_dict = paddle.load(weights_fpath + ".pdparams")
    model.set_state_dict(model_state_dict)
    model.eval()
    print(f"Loaded encoder {weights_fpath}")

    # create audio processor
    c = config.data
    processor = SpeakerVerificationPreprocessor(
        sampling_rate=c.sampling_rate,
        audio_norm_target_dBFS=c.audio_norm_target_dBFS,
        vad_window_length=c.vad_window_length,
        vad_moving_average_width=c.vad_moving_average_width,
        vad_max_silence_length=c.vad_max_silence_length,
        mel_window_length=c.mel_window_length,
        mel_window_step=c.mel_window_step,
        n_mels=c.n_mels,
        partial_n_frames=c.partial_n_frames,
        min_pad_coverage=c.min_pad_coverage,
        partial_overlap_ratio=c.min_pad_coverage, )

    # input output preparation
    input_dir = Path(args.input).expanduser()
    ifpaths = list(input_dir.rglob(args.pattern))
    print(f"{len(ifpaths)} utterances in total")
    output_dir = Path(args.output).expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)

    for ifpath in tqdm.tqdm(ifpaths, unit="utterance"):
        _process_utterance(ifpath, input_dir, output_dir, processor, model)


if __name__ == "__main__":
    config = get_cfg_defaults()
    parser = argparse.ArgumentParser(description="compute utterance embed.")
    parser.add_argument(
        "--config",
        metavar="FILE",
        help="path of the config file to overwrite to default config with.")
    parser.add_argument(
        "--input", type=str, help="path of the audio_file folder.")
    parser.add_argument(
        "--pattern",
        type=str,
        default="*.wav",
        help="pattern to filter audio files.")
    parser.add_argument(
        "--output",
        metavar="OUTPUT_DIR",
        help="path to save checkpoint and logs.")

    # load from saved checkpoint
    parser.add_argument(
        "--checkpoint_path", type=str, help="path of the checkpoint to load")

    # overwrite extra config and default config
    parser.add_argument(
        "--opts",
        nargs=argparse.REMAINDER,
        help="options to overwrite --config file and the default config, passing in KEY VALUE pairs"
    )

    parser.add_argument(
        "--ngpu", type=int, default=1, help="if ngpu=0, use cpu.")

    args = parser.parse_args()
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    print(args)

    main(config, args)


================================================
FILE: paddlespeech/vector/exps/ge2e/preprocess.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
from pathlib import Path

from paddlespeech.vector.exps.ge2e.audio_processor import SpeakerVerificationPreprocessor
from paddlespeech.vector.exps.ge2e.config import get_cfg_defaults
from paddlespeech.vector.exps.ge2e.dataset_processors import process_aidatatang_200zh
from paddlespeech.vector.exps.ge2e.dataset_processors import process_librispeech
from paddlespeech.vector.exps.ge2e.dataset_processors import process_magicdata
from paddlespeech.vector.exps.ge2e.dataset_processors import process_voxceleb1
from paddlespeech.vector.exps.ge2e.dataset_processors import process_voxceleb2

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="preprocess dataset for speaker verification task")
    parser.add_argument(
        "--datasets_root",
        type=Path,
        help="Path to the directory containing your LibriSpeech, LibriTTS and VoxCeleb datasets."
    )
    parser.add_argument(
        "--output_dir", type=Path, help="Path to save processed dataset.")
    parser.add_argument(
        "--dataset_names",
        type=str,
        default="librispeech_other,voxceleb1,voxceleb2",
        help="comma-separated list of names of the datasets you want to preprocess. only "
        "the train set of these datastes will be used. Possible names: librispeech_other, "
        "voxceleb1, voxceleb2, aidatatang_200zh, magicdata.")
    parser.add_argument(
        "--skip_existing",
        action="store_true",
        help="Whether to skip output files with the same name. Useful if this script was interrupted."
    )
    parser.add_argument(
        "--no_trim",
        action="store_true",
        help="Preprocess audio without trimming silences (not recommended).")

    args = parser.parse_args()

    if not args.no_trim:
        try:
            import webrtcvad
            print(webrtcvad.__version__)
        except Exception as e:
            raise ModuleNotFoundError(
                "Package 'webrtcvad' not found. This package enables "
                "noise removal and is recommended. Please install and "
                "try again. If installation fails, "
                "use --no_trim to disable this error message.")
    del args.no_trim

    args.datasets = [item.strip() for item in args.dataset_names.split(",")]
    if not hasattr(args, "output_dir"):
        args.output_dir = args.dataset_root / "SV2TTS" / "encoder"

    args.output_dir = args.output_dir.expanduser()
    args.datasets_root = args.datasets_root.expanduser()
    assert args.datasets_root.exists()
    args.output_dir.mkdir(exist_ok=True, parents=True)

    config = get_cfg_defaults()
    print(args)

    c = config.data
    processor = SpeakerVerificationPreprocessor(
        sampling_rate=c.sampling_rate,
        audio_norm_target_dBFS=c.audio_norm_target_dBFS,
        vad_window_length=c.vad_window_length,
        vad_moving_average_width=c.vad_moving_average_width,
        vad_max_silence_length=c.vad_max_silence_length,
        mel_window_length=c.mel_window_length,
        mel_window_step=c.mel_window_step,
        n_mels=c.n_mels,
        partial_n_frames=c.partial_n_frames,
        min_pad_coverage=c.min_pad_coverage,
        partial_overlap_ratio=c.min_pad_coverage, )

    preprocess_func = {
        "librispeech_other": process_librispeech,
        "voxceleb1": process_voxceleb1,
        "voxceleb2": process_voxceleb2,
        "aidatatang_200zh": process_aidatatang_200zh,
        "magicdata": process_magicdata,
    }

    for dataset in args.datasets:
        print("Preprocessing %s" % dataset)
        preprocess_func[dataset](processor, args.datasets_root, args.output_dir,
                                 args.skip_existing)


================================================
FILE: paddlespeech/vector/exps/ge2e/random_cycle.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random


def cycle(iterable):
    # cycle('ABCD') --> A B C D A B C D A B C D ...
    saved = []
    for element in iterable:
        yield element
        saved.append(element)
    while saved:
        for element in saved:
            yield element


def random_cycle(iterable):
    # cycle('ABCD') --> A B C D B C D A A D B C ...
    saved = []
    for element in iterable:
        yield element
        saved.append(element)
    random.shuffle(saved)
    while saved:
        for element in saved:
            yield element
        random.shuffle(saved)


================================================
FILE: paddlespeech/vector/exps/ge2e/speaker_verification_dataset.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
from pathlib import Path

import numpy as np
from paddle.io import BatchSampler
from paddle.io import Dataset

from paddlespeech.vector.exps.ge2e.random_cycle import random_cycle


class MultiSpeakerMelDataset(Dataset):
    """A 2 layer directory that contains mel spectrograms in *.npy format.
    An Example file structure tree is shown below. We prefer to preprocess
    raw datasets and organized them like this.

    dataset_root/
      speaker1/
        utterance1.npy
        utterance2.npy
        utterance3.npy
      speaker2/
        utterance1.npy
        utterance2.npy
        utterance3.npy
    """

    def __init__(self, dataset_root: Path):
        self.root = Path(dataset_root).expanduser()
        speaker_dirs = []
        for f in self.root.glob("*"):
            if f.is_dir():
                assert list(f.glob(
                    "*.npy")), "This folder NOT includes any npy data file."
                speaker_dirs.append(f)

        speaker_utterances = {
            speaker_dir: list(speaker_dir.glob("*.npy"))
            for speaker_dir in speaker_dirs
        }

        self.speaker_dirs = speaker_dirs
        self.speaker_to_utterances = speaker_utterances

        # meta data
        self.num_speakers = len(self.speaker_dirs)
        self.num_utterances = np.sum(
            len(utterances)
            for speaker, utterances in self.speaker_to_utterances.items())

    def get_example_by_index(self, speaker_index, utterance_index):
        speaker_dir = self.speaker_dirs[speaker_index]
        fpath = self.speaker_to_utterances[speaker_dir][utterance_index]
        return self[fpath]

    def __getitem__(self, fpath):
        return np.load(fpath)

    def __len__(self):
        return int(self.num_utterances)


class MultiSpeakerSampler(BatchSampler):
    """A multi-stratal sampler designed for speaker verification task.
    First, N speakers from all speakers are sampled randomly. Then, for each
    speaker, randomly sample M utterances from their corresponding utterances.
    """

    def __init__(self,
                 dataset: MultiSpeakerMelDataset,
                 speakers_per_batch: int,
                 utterances_per_speaker: int):
        self._speakers = list(dataset.speaker_dirs)
        self._speaker_to_utterances = dataset.speaker_to_utterances

        self.speakers_per_batch = speakers_per_batch
        self.utterances_per_speaker = utterances_per_speaker

    def __iter__(self):
        # yield list of Paths
        speaker_generator = iter(random_cycle(self._speakers))
        speaker_utterances_generator = {
            s: iter(random_cycle(us))
            for s, us in self._speaker_to_utterances.items()
        }

        while True:
            speakers = []
            for _ in range(self.speakers_per_batch):
                speakers.append(next(speaker_generator))

            utterances = []
            for s in speakers:
                us = speaker_utterances_generator[s]
                for _ in range(self.utterances_per_speaker):
                    utterances.append(next(us))
            yield utterances


class RandomClip(object):
    def __init__(self, frames):
        self.frames = frames

    def __call__(self, spec):
        # spec [T, C]
        T = spec.shape[0]
        start = random.randint(0, T - self.frames)
        return spec[start:start + self.frames, :]


class Collate(object):
    def __init__(self, num_frames):
        self.random_crop = RandomClip(num_frames)

    def __call__(self, examples):
        frame_clips = [self.random_crop(mel) for mel in examples]
        batced_clips = np.stack(frame_clips)
        return batced_clips


================================================
FILE: paddlespeech/vector/exps/ge2e/train.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time

from paddle import DataParallel
from paddle import distributed as dist
from paddle.io import DataLoader
from paddle.nn.clip import ClipGradByGlobalNorm
from paddle.optimizer import Adam

from paddlespeech.t2s.training import default_argument_parser
from paddlespeech.t2s.training import ExperimentBase
from paddlespeech.vector.exps.ge2e.config import get_cfg_defaults
from paddlespeech.vector.exps.ge2e.speaker_verification_dataset import Collate
from paddlespeech.vector.exps.ge2e.speaker_verification_dataset import MultiSpeakerMelDataset
from paddlespeech.vector.exps.ge2e.speaker_verification_dataset import MultiSpeakerSampler
from paddlespeech.vector.models.lstm_speaker_encoder import LSTMSpeakerEncoder


class Ge2eExperiment(ExperimentBase):
    def setup_model(self):
        config = self.config
        model = LSTMSpeakerEncoder(config.data.n_mels, config.model.num_layers,
                                   config.model.hidden_size,
                                   config.model.embedding_size)
        optimizer = Adam(
            config.training.learning_rate_init,
            parameters=model.parameters(),
            grad_clip=ClipGradByGlobalNorm(3))
        self.model = DataParallel(model) if self.parallel else model
        self.model_core = model
        self.optimizer = optimizer

    def setup_dataloader(self):
        config = self.config
        train_dataset = MultiSpeakerMelDataset(self.args.data)
        sampler = MultiSpeakerSampler(train_dataset,
                                      config.training.speakers_per_batch,
                                      config.training.utterances_per_speaker)
        train_loader = DataLoader(
            train_dataset,
            batch_sampler=sampler,
            collate_fn=Collate(config.data.partial_n_frames),
            num_workers=16)

        self.train_dataset = train_dataset
        self.train_loader = train_loader

    def train_batch(self):
        start = time.time()
        batch = self.read_batch()
        data_loader_time = time.time() - start

        self.optimizer.clear_grad()
        self.model.train()
        specs = batch
        loss, eer = self.model(specs, self.config.training.speakers_per_batch)
        loss.backward()
        self.model_core.do_gradient_ops()
        self.optimizer.step()
        iteration_time = time.time() - start

        # logging
        loss_value = float(loss)
        msg = "Rank: {}, ".format(dist.get_rank())
        msg += "step: {}, ".format(self.iteration)
        msg += "time: {:>.3f}s/{:>.3f}s, ".format(data_loader_time,
                                                  iteration_time)
        msg += 'loss: {:>.6f} err: {:>.6f}'.format(loss_value, eer)
        self.logger.info(msg)

        if dist.get_rank() == 0:
            self.visualizer.add_scalar("train/loss", loss_value, self.iteration)
            self.visualizer.add_scalar("train/eer", eer, self.iteration)
            self.visualizer.add_scalar("param/w",
                                       float(self.model_core.similarity_weight),
                                       self.iteration)
            self.visualizer.add_scalar("param/b",
                                       float(self.model_core.similarity_bias),
                                       self.iteration)

    def valid(self):
        pass


def main_sp(config, args):
    exp = Ge2eExperiment(config, args)
    exp.setup()
    exp.resume_or_load()
    exp.run()


def main(config, args):
    if args.ngpu > 1:
        dist.spawn(main_sp, args=(config, args), nprocs=args.ngpu)
    else:
        main_sp(config, args)


if __name__ == "__main__":
    config = get_cfg_defaults()
    parser = default_argument_parser()
    args = parser.parse_args()
    if args.config:
        config.merge_from_file(args.config)
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
    print(config)
    print(args)

    main(config, args)


================================================
FILE: paddlespeech/vector/io/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/vector/io/augment.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this is modified from SpeechBrain
# https://github.com/speechbrain/speechbrain/blob/085be635c07f16d42cd1295045bc46c407f1e15b/speechbrain/lobes/augment.py
import math
import os
from typing import List

import numpy as np
import paddle
import paddle.nn as nn
import paddle.nn.functional as F

from paddlespeech.s2t.utils.log import Log
from paddlespeech.vector.io.dataset import CSVDataset
from paddlespeech.vector.io.signal_processing import compute_amplitude
from paddlespeech.vector.io.signal_processing import convolve1d
from paddlespeech.vector.io.signal_processing import dB_to_amplitude
from paddlespeech.vector.io.signal_processing import notch_filter
from paddlespeech.vector.io.signal_processing import reverberate

logger = Log(__name__).getlog()


# TODO: Complete type-hint and doc string.
class DropFreq(nn.Layer):
    def __init__(
            self,
            drop_freq_low=1e-14,
            drop_freq_high=1,
            drop_count_low=1,
            drop_count_high=2,
            drop_width=0.05,
            drop_prob=1, ):
        super(DropFreq, self).__init__()
        self.drop_freq_low = drop_freq_low
        self.drop_freq_high = drop_freq_high
        self.drop_count_low = drop_count_low
        self.drop_count_high = drop_count_high
        self.drop_width = drop_width
        self.drop_prob = drop_prob

    def forward(self, waveforms):
        # Don't drop (return early) 1-`drop_prob` portion of the batches
        dropped_waveform = waveforms.clone()
        if paddle.rand([1]) > self.drop_prob:
            return dropped_waveform

        # Add channels dimension
        if len(waveforms.shape) == 2:
            dropped_waveform = dropped_waveform.unsqueeze(-1)

        # Pick number of frequencies to drop
        drop_count = paddle.randint(
            low=self.drop_count_low, high=self.drop_count_high + 1, shape=[1])

        # Pick a frequency to drop
        drop_range = self.drop_freq_high - self.drop_freq_low
        drop_frequency = (
            paddle.rand([drop_count]) * drop_range + self.drop_freq_low)

        # Filter parameters
        filter_length = 101
        pad = filter_length // 2

        # Start with delta function
        drop_filter = paddle.zeros([1, filter_length, 1])
        drop_filter[0, pad, 0] = 1

        # Subtract each frequency
        for frequency in drop_frequency:
            notch_kernel = notch_filter(frequency, filter_length,
                                        self.drop_width)
            drop_filter = convolve1d(drop_filter, notch_kernel, pad)

        # Apply filter
        dropped_waveform = convolve1d(dropped_waveform, drop_filter, pad)

        # Remove channels dimension if added
        return dropped_waveform.squeeze(-1)


class DropChunk(nn.Layer):
    def __init__(
            self,
            drop_length_low=100,
            drop_length_high=1000,
            drop_count_low=1,
            drop_count_high=10,
            drop_start=0,
            drop_end=None,
            drop_prob=1,
            noise_factor=0.0, ):
        super(DropChunk, self).__init__()
        self.drop_length_low = drop_length_low
        self.drop_length_high = drop_length_high
        self.drop_count_low = drop_count_low
        self.drop_count_high = drop_count_high
        self.drop_start = drop_start
        self.drop_end = drop_end
        self.drop_prob = drop_prob
        self.noise_factor = noise_factor

        # Validate low < high
        if drop_length_low > drop_length_high:
            raise ValueError("Low limit must not be more than high limit")
        if drop_count_low > drop_count_high:
            raise ValueError("Low limit must not be more than high limit")

        # Make sure the length doesn't exceed end - start
        if drop_end is not None and drop_end >= 0:
            if drop_start > drop_end:
                raise ValueError("Low limit must not be more than high limit")

            drop_range = drop_end - drop_start
            self.drop_length_low = min(drop_length_low, drop_range)
            self.drop_length_high = min(drop_length_high, drop_range)

    def forward(self, waveforms, lengths):
        # Reading input list
        lengths = (lengths * waveforms.shape[1]).astype('int64')
        batch_size = waveforms.shape[0]
        dropped_waveform = waveforms.clone()

        # Don't drop (return early) 1-`drop_prob` portion of the batches
        if paddle.rand([1]) > self.drop_prob:
            return dropped_waveform

        # Store original amplitude for computing white noise amplitude
        clean_amplitude = compute_amplitude(waveforms, lengths.unsqueeze(1))

        # Pick a number of times to drop
        drop_times = paddle.randint(
            low=self.drop_count_low,
            high=self.drop_count_high + 1,
            shape=[batch_size], )

        # Iterate batch to set mask
        for i in range(batch_size):
            if drop_times[i] == 0:
                continue

            # Pick lengths
            length = paddle.randint(
                low=self.drop_length_low,
                high=self.drop_length_high + 1,
                shape=[drop_times[i]], )

            # Compute range of starting locations
            start_min = self.drop_start
            if start_min < 0:
                start_min += lengths[i]
            start_max = self.drop_end
            if start_max is None:
                start_max = lengths[i]
            if start_max < 0:
                start_max += lengths[i]
            start_max = max(0, start_max - length.max())

            # Pick starting locations
            start = paddle.randint(
                low=start_min,
                high=start_max + 1,
                shape=[drop_times[i]], )

            end = start + length

            # Update waveform
            if not self.noise_factor:
                for j in range(drop_times[i]):
                    if start[j] < end[j]:
                        dropped_waveform[i, start[j]:end[j]] = 0.0
            else:
                # Uniform distribution of -2 to +2 * avg amplitude should
                # preserve the average for normalization
                noise_max = 2 * clean_amplitude[i] * self.noise_factor
                for j in range(drop_times[i]):
                    # zero-center the noise distribution
                    noise_vec = paddle.rand([length[j]], dtype='float32')

                    noise_vec = 2 * noise_max * noise_vec - noise_max
                    dropped_waveform[i, int(start[j]):int(end[j])] = noise_vec

        return dropped_waveform


class Resample(nn.Layer):
    def __init__(
            self,
            orig_freq=16000,
            new_freq=16000,
            lowpass_filter_width=6, ):
        super(Resample, self).__init__()
        self.orig_freq = orig_freq
        self.new_freq = new_freq
        self.lowpass_filter_width = lowpass_filter_width

        # Compute rate for striding
        self._compute_strides()
        assert self.orig_freq % self.conv_stride == 0
        assert self.new_freq % self.conv_transpose_stride == 0

    def _compute_strides(self):
        # Compute new unit based on ratio of in/out frequencies
        base_freq = math.gcd(self.orig_freq, self.new_freq)
        input_samples_in_unit = self.orig_freq // base_freq
        self.output_samples = self.new_freq // base_freq

        # Store the appropriate stride based on the new units
        self.conv_stride = input_samples_in_unit
        self.conv_transpose_stride = self.output_samples

    def forward(self, waveforms):
        if not hasattr(self, "first_indices"):
            self._indices_and_weights(waveforms)

        # Don't do anything if the frequencies are the same
        if self.orig_freq == self.new_freq:
            return waveforms

        unsqueezed = False
        if len(waveforms.shape) == 2:
            waveforms = waveforms.unsqueeze(1)
            unsqueezed = True
        elif len(waveforms.shape) == 3:
            waveforms = waveforms.transpose([0, 2, 1])
        else:
            raise ValueError("Input must be 2 or 3 dimensions")

        # Do resampling
        resampled_waveform = self._perform_resample(waveforms)

        if unsqueezed:
            resampled_waveform = resampled_waveform.squeeze(1)
        else:
            resampled_waveform = resampled_waveform.transpose([0, 2, 1])

        return resampled_waveform

    def _perform_resample(self, waveforms):
        # Compute output size and initialize
        batch_size, num_channels, wave_len = waveforms.shape
        window_size = self.weights.shape[1]
        tot_output_samp = self._output_samples(wave_len)
        resampled_waveform = paddle.zeros((batch_size, num_channels,
                                           tot_output_samp))

        # eye size: (num_channels, num_channels, 1)
        eye = paddle.eye(num_channels).unsqueeze(2)

        # Iterate over the phases in the polyphase filter
        for i in range(self.first_indices.shape[0]):
            wave_to_conv = waveforms
            first_index = int(self.first_indices[i].item())
            if first_index >= 0:
                # trim the signal as the filter will not be applied
                # before the first_index
                wave_to_conv = wave_to_conv[:, :, first_index:]

            # pad the right of the signal to allow partial convolutions
            # meaning compute values for partial windows (e.g. end of the
            # window is outside the signal length)
            max_index = (tot_output_samp - 1) // self.output_samples
            end_index = max_index * self.conv_stride + window_size
            current_wave_len = wave_len - first_index
            right_padding = max(0, end_index + 1 - current_wave_len)
            left_padding = max(0, -first_index)
            wave_to_conv = paddle.nn.functional.pad(
                wave_to_conv, [left_padding, right_padding], data_format='NCL')
            conv_wave = paddle.nn.functional.conv1d(
                x=wave_to_conv,
                # weight=self.weights[i].repeat(num_channels, 1, 1),
                weight=self.weights[i].expand((num_channels, 1, -1)),
                stride=self.conv_stride,
                groups=num_channels, )

            # we want conv_wave[:, i] to be at
            # output[:, i + n*conv_transpose_stride]
            dilated_conv_wave = paddle.nn.functional.conv1d_transpose(
                conv_wave, eye, stride=self.conv_transpose_stride)

            # pad dilated_conv_wave so it reaches the output length if needed.
            left_padding = i
            previous_padding = left_padding + dilated_conv_wave.shape[-1]
            right_padding = max(0, tot_output_samp - previous_padding)
            dilated_conv_wave = paddle.nn.functional.pad(
                dilated_conv_wave, [left_padding, right_padding],
                data_format='NCL')
            dilated_conv_wave = dilated_conv_wave[:, :, :tot_output_samp]

            resampled_waveform += dilated_conv_wave

        return resampled_waveform

    def _output_samples(self, input_num_samp):
        samp_in = int(self.orig_freq)
        samp_out = int(self.new_freq)

        tick_freq = abs(samp_in * samp_out) // math.gcd(samp_in, samp_out)
        ticks_per_input_period = tick_freq // samp_in

        # work out the number of ticks in the time interval
        # [ 0, input_num_samp/samp_in ).
        interval_length = input_num_samp * ticks_per_input_period
        if interval_length <= 0:
            return 0
        ticks_per_output_period = tick_freq // samp_out

        # Get the last output-sample in the closed interval,
        # i.e. replacing [ ) with [ ]. Note: integer division rounds down.
        # See http://en.wikipedia.org/wiki/Interval_(mathematics) for an
        # explanation of the notation.
        last_output_samp = interval_length // ticks_per_output_period

        # We need the last output-sample in the open interval, so if it
        # takes us to the end of the interval exactly, subtract one.
        if last_output_samp * ticks_per_output_period == interval_length:
            last_output_samp -= 1

        # First output-sample index is zero, so the number of output samples
        # is the last output-sample plus one.
        num_output_samp = last_output_samp + 1

        return num_output_samp

    def _indices_and_weights(self, waveforms):
        # Lowpass filter frequency depends on smaller of two frequencies
        min_freq = min(self.orig_freq, self.new_freq)
        lowpass_cutoff = 0.99 * 0.5 * min_freq

        assert lowpass_cutoff * 2 <= min_freq
        window_width = self.lowpass_filter_width / (2.0 * lowpass_cutoff)

        assert lowpass_cutoff < min(self.orig_freq, self.new_freq) / 2
        output_t = paddle.arange(
            start=0.0, end=self.output_samples, dtype='int64')
        output_t /= self.new_freq
        min_t = output_t - window_width
        max_t = output_t + window_width

        min_input_index = paddle.ceil(min_t * self.orig_freq)
        max_input_index = paddle.floor(max_t * self.orig_freq)
        num_indices = max_input_index - min_input_index + 1

        max_weight_width = num_indices.max()
        j = paddle.arange(max_weight_width, dtype='float32')
        input_index = min_input_index.unsqueeze(1) + j.unsqueeze(0)
        delta_t = (input_index / self.orig_freq) - output_t.unsqueeze(1)

        weights = paddle.zeros_like(delta_t)
        inside_window_indices = delta_t.abs().less_than(
            paddle.to_tensor(window_width))

        # raised-cosine (Hanning) window with width `window_width`
        weights[inside_window_indices] = 0.5 * (1 + paddle.cos(
            2 * math.pi * lowpass_cutoff / self.lowpass_filter_width *
            delta_t.masked_select(inside_window_indices)))

        t_eq_zero_indices = delta_t.equal(paddle.zeros_like(delta_t))
        t_not_eq_zero_indices = delta_t.not_equal(paddle.zeros_like(delta_t))

        # sinc filter function
        weights = paddle.where(
            t_not_eq_zero_indices,
            weights * paddle.sin(2 * math.pi * lowpass_cutoff * delta_t) /
            (math.pi * delta_t), weights)

        # limit of the function at t = 0
        weights = paddle.where(t_eq_zero_indices, weights * 2 * lowpass_cutoff,
                               weights)

        # size (output_samples, max_weight_width)
        weights /= self.orig_freq

        self.first_indices = min_input_index
        self.weights = weights


class SpeedPerturb(nn.Layer):
    def __init__(
            self,
            orig_freq,
            speeds=[90, 100, 110],
            perturb_prob=1.0, ):
        super(SpeedPerturb, self).__init__()
        self.orig_freq = orig_freq
        self.speeds = speeds
        self.perturb_prob = perturb_prob

        # Initialize index of perturbation
        self.samp_index = 0

        # Initialize resamplers
        self.resamplers = []
        for speed in self.speeds:
            config = {
                "orig_freq": self.orig_freq,
                "new_freq": self.orig_freq * speed // 100,
            }
            self.resamplers.append(Resample(**config))

    def forward(self, waveform):
        # Don't perturb (return early) 1-`perturb_prob` portion of the batches
        if paddle.rand([1]) > self.perturb_prob:
            return waveform.clone()

        # Perform a random perturbation
        self.samp_index = paddle.randint(len(self.speeds), shape=[1]).item()
        perturbed_waveform = self.resamplers[self.samp_index](waveform)

        return perturbed_waveform


class AddNoise(nn.Layer):
    def __init__(
            self,
            noise_dataset=None,  # None for white noise
            num_workers=0,
            snr_low=0,
            snr_high=0,
            mix_prob=1.0,
            start_index=None,
            normalize=False, ):
        super(AddNoise, self).__init__()

        self.num_workers = num_workers
        self.snr_low = snr_low
        self.snr_high = snr_high
        self.mix_prob = mix_prob
        self.start_index = start_index
        self.normalize = normalize
        self.noise_dataset = noise_dataset
        self.noise_dataloader = None

    def forward(self, waveforms, lengths=None):
        if lengths is None:
            lengths = paddle.ones([len(waveforms)])

        # Copy clean waveform to initialize noisy waveform
        noisy_waveform = waveforms.clone()
        lengths = (lengths * waveforms.shape[1]).astype('int64').unsqueeze(1)

        # Don't add noise (return early) 1-`mix_prob` portion of the batches
        if paddle.rand([1]) > self.mix_prob:
            return noisy_waveform

        # Compute the average amplitude of the clean waveforms
        clean_amplitude = compute_amplitude(waveforms, lengths)

        # Pick an SNR and use it to compute the mixture amplitude factors
        SNR = paddle.rand((len(waveforms), 1))
        SNR = SNR * (self.snr_high - self.snr_low) + self.snr_low
        noise_amplitude_factor = 1 / (dB_to_amplitude(SNR) + 1)
        new_noise_amplitude = noise_amplitude_factor * clean_amplitude

        # Scale clean signal appropriately
        noisy_waveform *= 1 - noise_amplitude_factor

        # Loop through clean samples and create mixture
        if self.noise_dataset is None:
            white_noise = paddle.normal(shape=waveforms.shape)
            noisy_waveform += new_noise_amplitude * white_noise
        else:
            tensor_length = waveforms.shape[1]
            noise_waveform, noise_length = self._load_noise(
                lengths,
                tensor_length, )

            # Rescale and add
            noise_amplitude = compute_amplitude(noise_waveform, noise_length)
            noise_waveform *= new_noise_amplitude / (noise_amplitude + 1e-14)
            noisy_waveform += noise_waveform

        # Normalizing to prevent clipping
        if self.normalize:
            abs_max, _ = paddle.max(
                paddle.abs(noisy_waveform), axis=1, keepdim=True)
            noisy_waveform = noisy_waveform / abs_max.clip(min=1.0)

        return noisy_waveform

    def _load_noise(self, lengths, max_length):
        """
        Load a batch of noises

        args
        lengths(Paddle.Tensor): Num samples of waveforms with shape (N, 1).
        max_length(int): Width of a batch.
        """
        lengths = lengths.squeeze(1)
        batch_size = len(lengths)

        # Load a noise batch
        if self.noise_dataloader is None:

            def noise_collate_fn(batch):
                def pad(x, target_length, mode='constant', **kwargs):
                    x = np.asarray(x)
                    w = target_length - x.shape[0]
                    assert w >= 0, f'Target length {target_length} is less than origin length {x.shape[0]}'
                    return np.pad(x, [0, w], mode=mode, **kwargs)

                ids = [item['utt_id'] for item in batch]
                lengths = np.asarray([item['feat'].shape[0] for item in batch])
                waveforms = list(
                    map(lambda x: pad(x, max(max_length, lengths.max().item())),
                        [item['feat'] for item in batch]))
                waveforms = np.stack(waveforms)
                return {'ids': ids, 'feats': waveforms, 'lengths': lengths}

            # Create noise data loader.
            self.noise_dataloader = paddle.io.DataLoader(
                self.noise_dataset,
                batch_size=batch_size,
                shuffle=True,
                num_workers=self.num_workers,
                collate_fn=noise_collate_fn,
                return_list=True, )
            self.noise_data = iter(self.noise_dataloader)

        noise_batch, noise_len = self._load_noise_batch_of_size(batch_size)

        # Select a random starting location in the waveform
        start_index = self.start_index
        if self.start_index is None:
            start_index = 0
            max_chop = (noise_len - lengths).min().clip(min=1)
            start_index = paddle.randint(high=max_chop, shape=[1])

        # Truncate noise_batch to max_length
        noise_batch = noise_batch[:, start_index:start_index + max_length]
        noise_len = (noise_len - start_index).clip(max=max_length).unsqueeze(1)
        return noise_batch, noise_len

    def _load_noise_batch_of_size(self, batch_size):
        """Concatenate noise batches, then chop to correct size"""
        noise_batch, noise_lens = self._load_noise_batch()

        # Expand
        while len(noise_batch) < batch_size:
            noise_batch = paddle.concat((noise_batch, noise_batch))
            noise_lens = paddle.concat((noise_lens, noise_lens))

        # Contract
        if len(noise_batch) > batch_size:
            noise_batch = noise_batch[:batch_size]
            noise_lens = noise_lens[:batch_size]

        return noise_batch, noise_lens

    def _load_noise_batch(self):
        """Load a batch of noises, restarting iteration if necessary."""
        try:
            batch = next(self.noise_data)
        except StopIteration:
            self.noise_data = iter(self.noise_dataloader)
            batch = next(self.noise_data)

        noises, lens = batch['feats'], batch['lengths']
        return noises, lens


class AddReverb(nn.Layer):
    def __init__(
            self,
            rir_dataset,
            reverb_prob=1.0,
            rir_scale_factor=1.0,
            num_workers=0, ):
        super(AddReverb, self).__init__()
        self.rir_dataset = rir_dataset
        self.reverb_prob = reverb_prob
        self.rir_scale_factor = rir_scale_factor

        # Create rir data loader.
        def rir_collate_fn(batch):
            def pad(x, target_length, mode='constant', **kwargs):
                x = np.asarray(x)
                w = target_length - x.shape[0]
                assert w >= 0, f'Target length {target_length} is less than origin length {x.shape[0]}'
                return np.pad(x, [0, w], mode=mode, **kwargs)

            ids = [item['utt_id'] for item in batch]
            lengths = np.asarray([item['feat'].shape[0] for item in batch])
            waveforms = list(
                map(lambda x: pad(x, lengths.max().item()),
                    [item['feat'] for item in batch]))
            waveforms = np.stack(waveforms)
            return {'ids': ids, 'feats': waveforms, 'lengths': lengths}

        self.rir_dataloader = paddle.io.DataLoader(
            self.rir_dataset,
            collate_fn=rir_collate_fn,
            num_workers=num_workers,
            shuffle=True,
            return_list=True, )

        self.rir_data = iter(self.rir_dataloader)

    def forward(self, waveforms, lengths=None):
        """
        Arguments
        ---------
        waveforms : tensor
            Shape should be `[batch, time]` or `[batch, time, channels]`.
        lengths : tensor
            Shape should be a single dimension, `[batch]`.

        Returns
        -------
        Tensor of shape `[batch, time]` or `[batch, time, channels]`.
        """

        if lengths is None:
            lengths = paddle.ones([len(waveforms)])

        # Don't add reverb (return early) 1-`reverb_prob` portion of the time
        if paddle.rand([1]) > self.reverb_prob:
            return waveforms.clone()

        # Add channels dimension if necessary
        channel_added = False
        if len(waveforms.shape) == 2:
            waveforms = waveforms.unsqueeze(-1)
            channel_added = True

        # Load and prepare RIR
        rir_waveform = self._load_rir()

        # Compress or dilate RIR
        if self.rir_scale_factor != 1:
            rir_waveform = F.interpolate(
                rir_waveform.transpose([0, 2, 1]),
                scale_factor=self.rir_scale_factor,
                mode="linear",
                align_corners=False,
                data_format='NCW', )
            # (N, C, L) -> (N, L, C)
            rir_waveform = rir_waveform.transpose([0, 2, 1])

        rev_waveform = reverberate(
            waveforms,
            rir_waveform,
            self.rir_dataset.sample_rate,
            rescale_amp="avg")

        # Remove channels dimension if added
        if channel_added:
            return rev_waveform.squeeze(-1)

        return rev_waveform

    def _load_rir(self):
        try:
            batch = next(self.rir_data)
        except StopIteration:
            self.rir_data = iter(self.rir_dataloader)
            batch = next(self.rir_data)

        rir_waveform = batch['feats']

        # Make sure RIR has correct channels
        if len(rir_waveform.shape) == 2:
            rir_waveform = rir_waveform.unsqueeze(-1)

        return rir_waveform


class AddBabble(nn.Layer):
    def __init__(
            self,
            speaker_count=3,
            snr_low=0,
            snr_high=0,
            mix_prob=1, ):
        super(AddBabble, self).__init__()
        self.speaker_count = speaker_count
        self.snr_low = snr_low
        self.snr_high = snr_high
        self.mix_prob = mix_prob

    def forward(self, waveforms, lengths=None):
        if lengths is None:
            lengths = paddle.ones([len(waveforms)])

        babbled_waveform = waveforms.clone()
        lengths = (lengths * waveforms.shape[1]).unsqueeze(1)
        batch_size = len(waveforms)

        # Don't mix (return early) 1-`mix_prob` portion of the batches
        if paddle.rand([1]) > self.mix_prob:
            return babbled_waveform

        # Pick an SNR and use it to compute the mixture amplitude factors
        clean_amplitude = compute_amplitude(waveforms, lengths)
        SNR = paddle.rand((batch_size, 1))
        SNR = SNR * (self.snr_high - self.snr_low) + self.snr_low
        noise_amplitude_factor = 1 / (dB_to_amplitude(SNR) + 1)
        new_noise_amplitude = noise_amplitude_factor * clean_amplitude

        # Scale clean signal appropriately
        babbled_waveform *= 1 - noise_amplitude_factor

        # For each speaker in the mixture, roll and add
        babble_waveform = waveforms.roll((1, ), axis=0)
        babble_len = lengths.roll((1, ), axis=0)
        for i in range(1, self.speaker_count):
            babble_waveform += waveforms.roll((1 + i, ), axis=0)
            babble_len = paddle.concat(
                [babble_len, babble_len.roll((1, ), axis=0)], axis=-1).max(
                    axis=-1, keepdim=True)

        # Rescale and add to mixture
        babble_amplitude = compute_amplitude(babble_waveform, babble_len)
        babble_waveform *= new_noise_amplitude / (babble_amplitude + 1e-14)
        babbled_waveform += babble_waveform

        return babbled_waveform


class TimeDomainSpecAugment(nn.Layer):
    def __init__(
            self,
            perturb_prob=1.0,
            drop_freq_prob=1.0,
            drop_chunk_prob=1.0,
            speeds=[95, 100, 105],
            sample_rate=16000,
            drop_freq_count_low=0,
            drop_freq_count_high=3,
            drop_chunk_count_low=0,
            drop_chunk_count_high=5,
            drop_chunk_length_low=1000,
            drop_chunk_length_high=2000,
            drop_chunk_noise_factor=0, ):
        super(TimeDomainSpecAugment, self).__init__()
        self.speed_perturb = SpeedPerturb(
            perturb_prob=perturb_prob,
            orig_freq=sample_rate,
            speeds=speeds, )
        self.drop_freq = DropFreq(
            drop_prob=drop_freq_prob,
            drop_count_low=drop_freq_count_low,
            drop_count_high=drop_freq_count_high, )
        self.drop_chunk = DropChunk(
            drop_prob=drop_chunk_prob,
            drop_count_low=drop_chunk_count_low,
            drop_count_high=drop_chunk_count_high,
            drop_length_low=drop_chunk_length_low,
            drop_length_high=drop_chunk_length_high,
            noise_factor=drop_chunk_noise_factor, )

    def forward(self, waveforms, lengths=None):
        if lengths is None:
            lengths = paddle.ones([len(waveforms)])

        with paddle.no_grad():
            # Augmentation
            waveforms = self.speed_perturb(waveforms)
            waveforms = self.drop_freq(waveforms)
            waveforms = self.drop_chunk(waveforms, lengths)

        return waveforms


class EnvCorrupt(nn.Layer):
    def __init__(
            self,
            reverb_prob=1.0,
            babble_prob=1.0,
            noise_prob=1.0,
            rir_dataset=None,
            noise_dataset=None,
            num_workers=0,
            babble_speaker_count=0,
            babble_snr_low=0,
            babble_snr_high=0,
            noise_snr_low=0,
            noise_snr_high=0,
            rir_scale_factor=1.0, ):
        super(EnvCorrupt, self).__init__()

        # Initialize corrupters
        if rir_dataset is not None and reverb_prob > 0.0:
            self.add_reverb = AddReverb(
                rir_dataset=rir_dataset,
                num_workers=num_workers,
                reverb_prob=reverb_prob,
                rir_scale_factor=rir_scale_factor, )

        if babble_speaker_count > 0 and babble_prob > 0.0:
            self.add_babble = AddBabble(
                speaker_count=babble_speaker_count,
                snr_low=babble_snr_low,
                snr_high=babble_snr_high,
                mix_prob=babble_prob, )

        if noise_dataset is not None and noise_prob > 0.0:
            self.add_noise = AddNoise(
                noise_dataset=noise_dataset,
                num_workers=num_workers,
                snr_low=noise_snr_low,
                snr_high=noise_snr_high,
                mix_prob=noise_prob, )

    def forward(self, waveforms, lengths=None):
        if lengths is None:
            lengths = paddle.ones([len(waveforms)])

        # Augmentation
        with paddle.no_grad():
            if hasattr(self, "add_reverb"):
                try:
                    waveforms = self.add_reverb(waveforms, lengths)
                except Exception:
                    pass
            if hasattr(self, "add_babble"):
                waveforms = self.add_babble(waveforms, lengths)
            if hasattr(self, "add_noise"):
                waveforms = self.add_noise(waveforms, lengths)

        return waveforms


def build_augment_pipeline(target_dir=None) -> List[paddle.nn.Layer]:
    """build augment pipeline
    Note: this pipeline cannot be used in the paddle.DataLoader

    Returns:
        List[paddle.nn.Layer]: all augment process
    """
    logger.info("start to build the augment pipeline")
    noise_dataset = CSVDataset(csv_path=os.path.join(target_dir,
                                                     "rir_noise/csv/noise.csv"))
    rir_dataset = CSVDataset(csv_path=os.path.join(target_dir,
                                                   "rir_noise/csv/rir.csv"))

    wavedrop = TimeDomainSpecAugment(
        sample_rate=16000,
        speeds=[100], )
    speed_perturb = TimeDomainSpecAugment(
        sample_rate=16000,
        speeds=[95, 100, 105], )
    add_noise = EnvCorrupt(
        noise_dataset=noise_dataset,
        reverb_prob=0.0,
        noise_prob=1.0,
        noise_snr_low=0,
        noise_snr_high=15,
        rir_scale_factor=1.0, )
    add_rev = EnvCorrupt(
        rir_dataset=rir_dataset,
        reverb_prob=1.0,
        noise_prob=0.0,
        rir_scale_factor=1.0, )
    add_rev_noise = EnvCorrupt(
        noise_dataset=noise_dataset,
        rir_dataset=rir_dataset,
        reverb_prob=1.0,
        noise_prob=1.0,
        noise_snr_low=0,
        noise_snr_high=15,
        rir_scale_factor=1.0, )

    return [wavedrop, speed_perturb, add_noise, add_rev, add_rev_noise]


def waveform_augment(waveforms: paddle.Tensor,
                     augment_pipeline: List[paddle.nn.Layer]) -> paddle.Tensor:
    """process the augment pipeline and return all the waveforms

    Args:
        waveforms (paddle.Tensor): original batch waveform
        augment_pipeline (List[paddle.nn.Layer]): agument pipeline process

    Returns:
        paddle.Tensor: all the audio waveform including the original waveform and augmented waveform
    """
    # stage 0: store the original waveforms
    waveforms_aug_list = [waveforms]

    # augment the original batch waveform
    for aug in augment_pipeline:
        # stage 1: augment the data
        waveforms_aug = aug(waveforms)  # (N, L)
        if waveforms_aug.shape[1] >= waveforms.shape[1]:
            # Trunc
            waveforms_aug = waveforms_aug[:, :waveforms.shape[1]]
        else:
            # Pad
            lengths_to_pad = waveforms.shape[1] - waveforms_aug.shape[1]
            waveforms_aug = F.pad(
                waveforms_aug.unsqueeze(-1), [0, lengths_to_pad],
                data_format='NLC').squeeze(-1)
        # stage 2: append the augmented waveform into the list
        waveforms_aug_list.append(waveforms_aug)

    # get the all the waveforms
    return paddle.concat(waveforms_aug_list, axis=0)


================================================
FILE: paddlespeech/vector/io/batch.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy
import numpy as np
import paddle


def waveform_collate_fn(batch):
    """Wrap the waveform into a batch form

    Args:
        batch (list): the waveform list from the dataloader
                      the item of data include several field
                      feat: the utterance waveform data
                      label: the utterance label encoding data

    Returns:
        dict: the batch data to dataloader
    """
    waveforms = np.stack([item['feat'] for item in batch])
    labels = np.stack([item['label'] for item in batch])

    return {'waveforms': waveforms, 'labels': labels}


def feature_normalize(feats: paddle.Tensor,
                      mean_norm: bool=True,
                      std_norm: bool=True,
                      convert_to_numpy: bool=False):
    """Do one utterance feature normalization

    Args:
        feats (paddle.Tensor): the original utterance feat, such as fbank, mfcc
        mean_norm (bool, optional): mean norm flag. Defaults to True.
        std_norm (bool, optional): std norm flag. Defaults to True.
        convert_to_numpy (bool, optional): convert the paddle.tensor to numpy 
                                           and do feature norm with numpy. Defaults to False.

    Returns:
        paddle.Tensor : the normalized feats
    """
    # Features normalization if needed
    # numpy.mean is a little with paddle.mean about 1e-6
    if convert_to_numpy:
        feats_np = feats.numpy()
        mean = feats_np.mean(axis=-1, keepdims=True) if mean_norm else 0
        std = feats_np.std(axis=-1, keepdims=True) if std_norm else 1
        feats_np = (feats_np - mean) / std
        feats = paddle.to_tensor(feats_np, dtype=feats.dtype)
    else:
        mean = feats.mean(axis=-1, keepdim=True) if mean_norm else 0
        std = feats.std(axis=-1, keepdim=True) if std_norm else 1
        feats = (feats - mean) / std

    return feats


def pad_right_2d(x, target_length, axis=-1, mode='constant', **kwargs):
    x = np.asarray(x)
    assert len(
        x.shape) == 2, f'Only 2D arrays supported, but got shape: {x.shape}'

    w = target_length - x.shape[axis]
    assert w >= 0, f'Target length {target_length} is less than origin length {x.shape[axis]}'

    if axis == 0:
        pad_width = [[0, w], [0, 0]]
    else:
        pad_width = [[0, 0], [0, w]]

    return np.pad(x, pad_width, mode=mode, **kwargs)


def batch_feature_normalize(batch, mean_norm: bool=True, std_norm: bool=True):
    """Do batch utterance features normalization

    Args:
        batch (list): the batch feature from dataloader
        mean_norm (bool, optional): mean normalization flag. Defaults to True.
        std_norm (bool, optional): std normalization flag. Defaults to True.

    Returns:
        dict: the normalized batch features
    """
    ids = [item['utt_id'] for item in batch]
    lengths = np.asarray([item['feat'].shape[1] for item in batch])
    feats = list(
        map(lambda x: pad_right_2d(x, lengths.max()),
            [item['feat'] for item in batch]))
    feats = np.stack(feats)

    # Features normalization if needed
    for i in range(len(feats)):
        feat = feats[i][:, :lengths[i]]  # Excluding pad values.
        mean = feat.mean(axis=-1, keepdims=True) if mean_norm else 0
        std = feat.std(axis=-1, keepdims=True) if std_norm else 1
        feats[i][:, :lengths[i]] = (feat - mean) / std
        assert feats[i][:, lengths[
            i]:].sum() == 0  # Padding valus should all be 0.

    # Converts into ratios.
    # the utterance of the max length doesn't need to padding
    # the remaining utterances need to padding and all of them will be padded to max length
    # we convert the original length of each utterance to the ratio of the max length
    lengths = (lengths / lengths.max()).astype(np.float32)

    return {'ids': ids, 'feats': feats, 'lengths': lengths}


def pad_right_to(array, target_shape, mode="constant", value=0):
    """
    This function takes a numpy array of arbitrary shape and pads it to target
    shape by appending values on the right.

    Args:
        array: input numpy array. Input array whose dimension we need to pad.
    target_shape : (list, tuple). Target shape we want for the target array its len must be equal to array.ndim
    mode : str. Pad mode, please refer to numpy.pad documentation.
    value : float. Pad value, please refer to numpy.pad documentation.

    Returns:
        array: numpy.array. Padded array.
        valid_vals : list. List containing proportion for each dimension of original, non-padded values.
    """
    assert len(target_shape) == array.ndim
    pads = []  # this contains the abs length of the padding for each dimension.
    valid_vals = []  # this contains the relative lengths for each dimension.
    i = 0  # iterating over target_shape ndims
    while i < len(target_shape):
        assert (target_shape[i] >= array.shape[i]
                ), "Target shape must be >= original shape for every dim"
        pads.append([0, target_shape[i] - array.shape[i]])
        valid_vals.append(array.shape[i] / target_shape[i])
        i += 1

    array = numpy.pad(array, pads, mode=mode, constant_values=value)

    return array, valid_vals


def batch_pad_right(arrays, mode="constant", value=0):
    """Given a list of numpy arrays it batches them together by padding to the right
    on each dimension in order to get same length for all.

    Args:
        arrays : list. List of array we wish to pad together.
        mode : str. Padding mode see numpy.pad documentation.
        value : float. Padding value see numpy.pad documentation.

    Returns:
        array : numpy.array. Padded array.
        valid_vals : list. List containing proportion for each dimension of original, non-padded values.
    """

    if not len(arrays):
        raise IndexError("arrays list must not be empty")

    if len(arrays) == 1:
        # if there is only one array in the batch we simply unsqueeze it.
        return numpy.expand_dims(arrays[0], axis=0), numpy.array([1.0])

    if not (any(
        [arrays[i].ndim == arrays[0].ndim for i in range(1, len(arrays))])):
        raise IndexError("All arrays must have same number of dimensions")

    # FIXME we limit the support here: we allow padding of only the last dimension
    # need to remove this when feat extraction is updated to handle multichannel.
    max_shape = []
    for dim in range(arrays[0].ndim):
        if dim != (arrays[0].ndim - 1):
            if not all(
                [x.shape[dim] == arrays[0].shape[dim] for x in arrays[1:]]):
                raise EnvironmentError(
                    "arrays should have same dimensions except for last one")
        max_shape.append(max([x.shape[dim] for x in arrays]))

    batched = []
    valid = []
    for t in arrays:
        # for each array we apply pad_right_to
        padded, valid_percent = pad_right_to(
            t, max_shape, mode=mode, value=value)
        batched.append(padded)
        valid.append(valid_percent[-1])

    batched = numpy.stack(batched)

    return batched, numpy.array(valid)


================================================
FILE: paddlespeech/vector/io/dataset.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass
from dataclasses import fields

from paddle.io import Dataset

from paddlespeech.audio.backends import soundfile_load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.s2t.utils.log import Log
logger = Log(__name__).getlog()

# the audio meta info in the vector CSVDataset
# utt_id: the utterance segment name
# duration: utterance segment time
# wav: utterance file path
# start: start point in the original wav file
# stop: stop point in the original wav file
# label: the utterance segment's label id


@dataclass
class meta_info:
    """the audio meta info in the vector CSVDataset

    Args:
        utt_id (str): the utterance segment name
        duration (float): utterance segment time
        wav (str): utterance file path
        start (int): start point in the original wav file
        stop (int): stop point in the original wav file
        lab_id (str): the utterance segment's label id
    """
    utt_id: str
    duration: float
    wav: str
    start: int
    stop: int
    label: str


# csv dataset support feature type
# raw: return the pcm data sample point
# melspectrogram: fbank feature
feat_funcs = {
    'raw': None,
    'melspectrogram': melspectrogram,
}


class CSVDataset(Dataset):
    def __init__(self,
                 csv_path,
                 label2id_path=None,
                 config=None,
                 random_chunk=True,
                 feat_type: str="raw",
                 n_train_snts: int=-1,
                 **kwargs):
        """Implement the CSV Dataset

        Args:
            csv_path (str): csv dataset file path
            label2id_path (str): the utterance label to integer id map file path
            config (CfgNode): yaml config
            feat_type (str): dataset feature type. if it is raw, it return pcm data.
            n_train_snts (int): select the n_train_snts sample from the dataset. 
                                if n_train_snts = -1, dataset will load all the sample.
                                Default value is -1.
            kwargs : feature type args
        """
        super().__init__()
        self.csv_path = csv_path
        self.label2id_path = label2id_path
        self.config = config
        self.random_chunk = random_chunk
        self.feat_type = feat_type
        self.n_train_snts = n_train_snts
        self.feat_config = kwargs
        self.id2label = {}
        self.label2id = {}
        self.data = self.load_data_csv()
        self.load_speaker_to_label()

    def load_data_csv(self):
        """Load the csv dataset content and store them in the data property
        the csv dataset's format has six fields, 
        that is audio_id or utt_id, audio duration, segment start point, segment stop point 
        and utterance label.
        Note in training period, the utterance label must has a map to integer id in label2id_path 

        Returns:
            list: the csv data with meta_info type
        """
        data = []

        with open(self.csv_path, 'r') as rf:
            for line in rf.readlines()[1:]:
                audio_id, duration, wav, start, stop, spk_id = line.strip(
                ).split(',')
                data.append(
                    meta_info(audio_id,
                              float(duration), wav,
                              int(start), int(stop), spk_id))
        if self.n_train_snts > 0:
            sample_num = min(self.n_train_snts, len(data))
            data = data[0:sample_num]

        return data

    def load_speaker_to_label(self):
        """Load the utterance label map content.
        In vector domain, we call the utterance label as speaker label.
        The speaker label is real speaker label in speaker verification domain,
        and in language identification is language label.
        """
        if not self.label2id_path:
            logger.warning("No speaker id to label file")
            return

        with open(self.label2id_path, 'r') as f:
            for line in f.readlines():
                label_name, label_id = line.strip().split(' ')
                self.label2id[label_name] = int(label_id)
                self.id2label[int(label_id)] = label_name

    def convert_to_record(self, idx: int):
        """convert the dataset sample to training record the CSV Dataset

        Args:
            idx (int) : the request index in all the dataset
        """
        sample = self.data[idx]

        record = {}
        # To show all fields in a namedtuple: `type(sample)._fields`
        for field in fields(sample):
            record[field.name] = getattr(sample, field.name)

        waveform, sr = load_audio(record['wav'])

        # random select a chunk audio samples from the audio
        if self.config and self.config.random_chunk:
            num_wav_samples = waveform.shape[0]
            num_chunk_samples = int(self.config.chunk_duration * sr)
            start = random.randint(0, num_wav_samples - num_chunk_samples - 1)
            stop = start + num_chunk_samples
        else:
            start = record['start']
            stop = record['stop']

        # we only return the waveform as feat
        waveform = waveform[start:stop]

        # all availabel feature type is in feat_funcs
        assert self.feat_type in feat_funcs.keys(), \
            f"Unknown feat_type: {self.feat_type}, it must be one in {list(feat_funcs.keys())}"
        feat_func = feat_funcs[self.feat_type]
        feat = feat_func(
            waveform, sr=sr, **self.feat_config) if feat_func else waveform

        record.update({'feat': feat})
        if self.label2id:
            record.update({'label': self.label2id[record['label']]})

        return record

    def __getitem__(self, idx):
        """Return the specific index sample

        Args:
            idx (int) : the request index in all the dataset
        """
        return self.convert_to_record(idx)

    def __len__(self):
        """Return the dataset length

        Returns:
            int: the length num of the dataset
        """
        return len(self.data)


================================================
FILE: paddlespeech/vector/io/dataset_from_json.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
from dataclasses import dataclass
from dataclasses import fields

from paddle.io import Dataset

from paddlespeech.audio.backends import soundfile_load as load_audio
from paddlespeech.audio.compliance.librosa import melspectrogram
from paddlespeech.audio.compliance.librosa import mfcc


@dataclass
class meta_info:
    """the audio meta info in the vector JSONDataset
    Args:
        utt_id (str): the segment name
        duration (float): segment time
        wav (str): wav file path
        start (int): start point in the original wav file
        stop (int): stop point in the original wav file
        lab_id (str): the record id
    """
    utt_id: str
    duration: float
    wav: str
    start: int
    stop: int
    record_id: str


# json dataset support feature type
feat_funcs = {
    'raw': None,
    'melspectrogram': melspectrogram,
    'mfcc': mfcc,
}


class JSONDataset(Dataset):
    """
    dataset from json file.
    """

    def __init__(self, json_file: str, feat_type: str='raw', **kwargs):
        """
        Ags:
            json_file (:obj:`str`): Data prep JSON file.
            labels (:obj:`List[int]`): Labels of audio files.
            feat_type (:obj:`str`, `optional`, defaults to `raw`):
                It identifies the feature type that user wants to extrace of an audio file.
        """
        if feat_type not in feat_funcs.keys():
            raise RuntimeError(
                f"Unknown feat_type: {feat_type}, it must be one in {list(feat_funcs.keys())}"
            )

        self.json_file = json_file
        self.feat_type = feat_type
        self.feat_config = kwargs
        self._data = self._get_data()
        super(JSONDataset, self).__init__()

    def _get_data(self):
        with open(self.json_file, "r") as f:
            meta_data = json.load(f)
        data = []
        for key in meta_data:
            sub_seg = meta_data[key]["wav"]
            wav = sub_seg["file"]
            duration = sub_seg["duration"]
            start = sub_seg["start"]
            stop = sub_seg["stop"]
            rec_id = str(key).rsplit("_", 2)[0]
            data.append(
                meta_info(
                    str(key),
                    float(duration), wav, int(start), int(stop), str(rec_id)))
        return data

    def _convert_to_record(self, idx: int):
        sample = self._data[idx]

        record = {}
        # To show all fields in a namedtuple
        for field in fields(sample):
            record[field.name] = getattr(sample, field.name)

        waveform, sr = load_audio(record['wav'])
        waveform = waveform[record['start']:record['stop']]

        feat_func = feat_funcs[self.feat_type]
        feat = feat_func(
            waveform, sr=sr, **self.feat_config) if feat_func else waveform

        record.update({'feat': feat})

        return record

    def __getitem__(self, idx):
        return self._convert_to_record(idx)

    def __len__(self):
        return len(self._data)


================================================
FILE: paddlespeech/vector/io/embedding_norm.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict

import paddle


class InputNormalization:
    spk_dict_mean: Dict[int, paddle.Tensor]
    spk_dict_std: Dict[int, paddle.Tensor]
    spk_dict_count: Dict[int, int]

    def __init__(
            self,
            mean_norm=True,
            std_norm=True,
            norm_type="global", ):
        """Do feature or embedding mean and std norm

        Args:
            mean_norm (bool, optional): mean norm flag. Defaults to True.
            std_norm (bool, optional): std norm flag. Defaults to True.
            norm_type (str, optional): norm type. Defaults to "global".
        """
        super().__init__()
        self.training = True
        self.mean_norm = mean_norm
        self.std_norm = std_norm
        self.norm_type = norm_type
        self.glob_mean = paddle.to_tensor([0], dtype="float32")
        self.glob_std = paddle.to_tensor([0], dtype="float32")
        self.spk_dict_mean = {}
        self.spk_dict_std = {}
        self.spk_dict_count = {}
        self.weight = 1.0
        self.count = 0
        self.eps = 1e-10

    def __call__(self,
                 x,
                 lengths,
                 spk_ids=paddle.to_tensor([], dtype="float32")):
        """Returns the tensor with the surrounding context.
        Args:
            x (paddle.Tensor): A batch of tensors.
            lengths (paddle.Tensor): A batch of tensors containing the relative length of each
                                    sentence (e.g, [0.7, 0.9, 1.0]). It is used to avoid
                                    computing stats on zero-padded steps.
            spk_ids (paddle.Tensor, optional): tensor containing the ids of each speaker (e.g, [0 10 6]).
                                        It is used to perform per-speaker normalization when
                                        norm_type='speaker'. Defaults to paddle.to_tensor([], dtype="float32").
        Returns:
            paddle.Tensor: The normalized feature or embedding
        """
        N_batches = x.shape[0]

        current_means = []
        current_stds = []

        for snt_id in range(N_batches):

            # Avoiding padded time steps
            # actual size is the actual time data length
            actual_size = paddle.round(lengths[snt_id] *
                                       x.shape[1]).astype("int32")
            # computing actual time data statistics
            # we extract the snt_id embedding from the x
            # and the target paddle.Tensor will reduce an 0-axis
            # so we need unsqueeze operation to recover the all axis 
            current_mean, current_std = self._compute_current_stats(
                x[snt_id, 0:actual_size, ...].unsqueeze(0))
            current_means.append(current_mean)
            current_stds.append(current_std)

        if self.norm_type == "global":
            current_mean = paddle.mean(paddle.stack(current_means), axis=0)
            current_std = paddle.mean(paddle.stack(current_stds), axis=0)

            if self.norm_type == "global":

                if self.training:
                    if self.count == 0:
                        self.glob_mean = current_mean
                        self.glob_std = current_std

                    else:
                        self.weight = 1 / (self.count + 1)

                        self.glob_mean = (
                            1 - self.weight
                        ) * self.glob_mean + self.weight * current_mean

                        self.glob_std = (
                            1 - self.weight
                        ) * self.glob_std + self.weight * current_std

                    self.glob_mean.detach()
                    self.glob_std.detach()

                    self.count = self.count + 1
                x = (x - self.glob_mean) / (self.glob_std)
        return x

    def _compute_current_stats(self, x):
        """Returns the tensor with the surrounding context.

        Args:
            x (paddle.Tensor): A batch of tensors.

        Returns:
             the statistics of the data
        """
        # Compute current mean
        if self.mean_norm:
            current_mean = paddle.mean(x, axis=0).detach()
        else:
            current_mean = paddle.to_tensor([0.0], dtype="float32")

        # Compute current std
        if self.std_norm:
            current_std = paddle.std(x, axis=0).detach()
        else:
            current_std = paddle.to_tensor([1.0], dtype="float32")

        # Improving numerical stability of std
        current_std = paddle.maximum(current_std,
                                     self.eps * paddle.ones_like(current_std))

        return current_mean, current_std

    def _statistics_dict(self):
        """Fills the dictionary containing the normalization statistics.
        """
        state = {}
        state["count"] = self.count
        state["glob_mean"] = self.glob_mean
        state["glob_std"] = self.glob_std
        state["spk_dict_mean"] = self.spk_dict_mean
        state["spk_dict_std"] = self.spk_dict_std
        state["spk_dict_count"] = self.spk_dict_count

        return state

    def _load_statistics_dict(self, state):
        """Loads the dictionary containing the statistics.

        Arguments
        ---------
        state : dict
            A dictionary containing the normalization statistics.
        """
        self.count = state["count"]
        if isinstance(state["glob_mean"], int):
            self.glob_mean = state["glob_mean"]
            self.glob_std = state["glob_std"]
        else:
            self.glob_mean = state["glob_mean"]  # .to(self.device_inp)
            self.glob_std = state["glob_std"]  # .to(self.device_inp)

        # Loading the spk_dict_mean in the right device
        self.spk_dict_mean = {}
        for spk in state["spk_dict_mean"]:
            self.spk_dict_mean[spk] = state["spk_dict_mean"][spk]

        # Loading the spk_dict_std in the right device
        self.spk_dict_std = {}
        for spk in state["spk_dict_std"]:
            self.spk_dict_std[spk] = state["spk_dict_std"][spk]

        self.spk_dict_count = state["spk_dict_count"]

        return state

    def to(self, device):
        """Puts the needed tensors in the right device.
        """
        self = super(InputNormalization, self).to(device)
        self.glob_mean = self.glob_mean.to(device)
        self.glob_std = self.glob_std.to(device)
        for spk in self.spk_dict_mean:
            self.spk_dict_mean[spk] = self.spk_dict_mean[spk].to(device)
            self.spk_dict_std[spk] = self.spk_dict_std[spk].to(device)
        return self

    def save(self, path):
        """Save statistic dictionary.
    
        Args:
            path (str): A path where to save the dictionary.
        """
        stats = self._statistics_dict()
        paddle.save(stats, path)

    def _load(self, path, end_of_epoch=False, device=None):
        """Load statistic dictionary.

        Arguments
        ---------
        path : str
            The path of the statistic dictionary
        device : str, None
            Passed to paddle.load(..., map_location=device)
        """
        del end_of_epoch  # Unused here.
        stats = paddle.load(path, map_location=device)
        self._load_statistics_dict(stats)


================================================
FILE: paddlespeech/vector/io/signal_processing.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle

# TODO: Complete type-hint and doc string.


def blackman_window(win_len, dtype=np.float32):
    arcs = np.pi * np.arange(win_len) / float(win_len)
    win = np.asarray(
        [0.42 - 0.5 * np.cos(2 * arc) + 0.08 * np.cos(4 * arc) for arc in arcs],
        dtype=dtype)
    return paddle.to_tensor(win)


def compute_amplitude(waveforms, lengths=None, amp_type="avg", scale="linear"):
    if len(waveforms.shape) == 1:
        waveforms = waveforms.unsqueeze(0)

    assert amp_type in ["avg", "peak"]
    assert scale in ["linear", "dB"]

    if amp_type == "avg":
        if lengths is None:
            out = paddle.mean(paddle.abs(waveforms), axis=1, keepdim=True)
        else:
            wav_sum = paddle.sum(paddle.abs(waveforms), axis=1, keepdim=True)
            out = wav_sum / lengths.astype(wav_sum.dtype)
    elif amp_type == "peak":
        out = paddle.max(paddle.abs(waveforms), axis=1, keepdim=True)
    else:
        raise NotImplementedError

    if scale == "linear":
        return out
    elif scale == "dB":
        return paddle.clip(20 * paddle.log10(out), min=-80)
    else:
        raise NotImplementedError


def dB_to_amplitude(SNR):
    return 10**(SNR / 20)


def convolve1d(
        waveform,
        kernel,
        padding=0,
        pad_type="constant",
        stride=1,
        groups=1, ):
    if len(waveform.shape) != 3:
        raise ValueError("Convolve1D expects a 3-dimensional tensor")

    # Padding can be a tuple (left_pad, right_pad) or an int
    if isinstance(padding, list):
        waveform = paddle.nn.functional.pad(
            x=waveform,
            pad=padding,
            mode=pad_type,
            data_format='NLC', )

    # Move time dimension last, which pad and fft and conv expect.
    # (N, L, C) -> (N, C, L)
    waveform = waveform.transpose([0, 2, 1])
    kernel = kernel.transpose([0, 2, 1])

    convolved = paddle.nn.functional.conv1d(
        x=waveform,
        weight=kernel,
        stride=stride,
        groups=groups,
        padding=padding if not isinstance(padding, list) else 0, )

    # Return time dimension to the second dimension.
    return convolved.transpose([0, 2, 1])


def notch_filter(notch_freq, filter_width=101, notch_width=0.05):
    # Check inputs
    assert 0 < notch_freq <= 1
    assert filter_width % 2 != 0
    pad = filter_width // 2
    inputs = paddle.arange(filter_width, dtype='float32') - pad

    # Avoid frequencies that are too low
    notch_freq += notch_width

    # Define sinc function, avoiding division by zero
    def sinc(x):
        def _sinc(x):
            return paddle.sin(x) / x

        # The zero is at the middle index
        res = paddle.concat(
            [_sinc(x[:pad]), paddle.ones([1]), _sinc(x[pad + 1:])])
        return res

    # Compute a low-pass filter with cutoff frequency notch_freq.
    hlpf = sinc(3 * (notch_freq - notch_width) * inputs)
    # import torch
    # hlpf *= paddle.to_tensor(torch.blackman_window(filter_width).detach().numpy())
    hlpf *= blackman_window(filter_width)
    hlpf /= paddle.sum(hlpf)

    # Compute a high-pass filter with cutoff frequency notch_freq.
    hhpf = sinc(3 * (notch_freq + notch_width) * inputs)
    # hhpf *= paddle.to_tensor(torch.blackman_window(filter_width).detach().numpy())
    hhpf *= blackman_window(filter_width)
    hhpf /= -paddle.sum(hhpf)
    hhpf[pad] += 1

    # Adding filters creates notch filter
    return (hlpf + hhpf).reshape([1, -1, 1])


def reverberate(waveforms,
                rir_waveform,
                sample_rate,
                impulse_duration=0.3,
                rescale_amp="avg"):
    orig_shape = waveforms.shape

    if len(waveforms.shape) > 3 or len(rir_waveform.shape) > 3:
        raise NotImplementedError

    # if inputs are mono tensors we reshape to 1, samples
    if len(waveforms.shape) == 1:
        waveforms = waveforms.unsqueeze(0).unsqueeze(-1)
    elif len(waveforms.shape) == 2:
        waveforms = waveforms.unsqueeze(-1)

    if len(rir_waveform.shape) == 1:  # convolve1d expects a 3d tensor !
        rir_waveform = rir_waveform.unsqueeze(0).unsqueeze(-1)
    elif len(rir_waveform.shape) == 2:
        rir_waveform = rir_waveform.unsqueeze(-1)

    # Compute the average amplitude of the clean
    orig_amplitude = compute_amplitude(waveforms, waveforms.shape[1],
                                       rescale_amp)

    # Compute index of the direct signal, so we can preserve alignment
    impulse_index_start = rir_waveform.abs().argmax(axis=1).item()
    impulse_index_end = min(
        impulse_index_start + int(sample_rate * impulse_duration),
        rir_waveform.shape[1])
    rir_waveform = rir_waveform[:, impulse_index_start:impulse_index_end, :]
    rir_waveform = rir_waveform / paddle.norm(rir_waveform, p=2)
    rir_waveform = paddle.flip(rir_waveform, [1])

    waveforms = convolve1d(
        waveform=waveforms,
        kernel=rir_waveform,
        padding=[rir_waveform.shape[1] - 1, 0], )

    # Rescale to the peak amplitude of the clean waveform
    waveforms = rescale(waveforms, waveforms.shape[1], orig_amplitude,
                        rescale_amp)

    if len(orig_shape) == 1:
        waveforms = waveforms.squeeze(0).squeeze(-1)
    if len(orig_shape) == 2:
        waveforms = waveforms.squeeze(-1)

    return waveforms


def rescale(waveforms, lengths, target_lvl, amp_type="avg", scale="linear"):
    assert amp_type in ["peak", "avg"]
    assert scale in ["linear", "dB"]

    batch_added = False
    if len(waveforms.shape) == 1:
        batch_added = True
        waveforms = waveforms.unsqueeze(0)

    waveforms = normalize(waveforms, lengths, amp_type)

    if scale == "linear":
        out = target_lvl * waveforms
    elif scale == "dB":
        out = dB_to_amplitude(target_lvl) * waveforms

    else:
        raise NotImplementedError("Invalid scale, choose between dB and linear")

    if batch_added:
        out = out.squeeze(0)

    return out


def normalize(waveforms, lengths=None, amp_type="avg", eps=1e-14):
    assert amp_type in ["avg", "peak"]

    batch_added = False
    if len(waveforms.shape) == 1:
        batch_added = True
        waveforms = waveforms.unsqueeze(0)

    den = compute_amplitude(waveforms, lengths, amp_type) + eps
    if batch_added:
        waveforms = waveforms.squeeze(0)
    return waveforms / den


================================================
FILE: paddlespeech/vector/models/__init__.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/vector/models/ecapa_tdnn.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math

import paddle
import paddle.nn as nn
import paddle.nn.functional as F


def length_to_mask(length, max_len=None, dtype=None):
    assert len(length.shape) == 1

    if max_len is None:
        max_len = length.max().astype(
            'int').item()  # using arange to generate mask
    mask = paddle.arange(
        max_len, dtype=length.dtype).expand(
            (len(length), max_len)) < length.unsqueeze(1)

    if dtype is None:
        dtype = length.dtype

    mask = paddle.to_tensor(mask, dtype=dtype)
    return mask


class Conv1d(nn.Layer):
    def __init__(
            self,
            in_channels,
            out_channels,
            kernel_size,
            stride=1,
            padding="same",
            dilation=1,
            groups=1,
            bias=True,
            padding_mode="reflect", ):
        """_summary_

        Args:
            in_channels (int): intput channel or input data dimensions
            out_channels (int): output channel or output data dimensions
            kernel_size (int): kernel size of 1-d convolution
            stride (int, optional): strid in 1-d convolution . Defaults to 1.
            padding (str, optional): padding value. Defaults to "same".
            dilation (int, optional): dilation in 1-d convolution. Defaults to 1.
            groups (int, optional): groups in 1-d convolution. Defaults to 1.
            bias (bool, optional): bias in 1-d convolution . Defaults to True.
            padding_mode (str, optional): padding mode. Defaults to "reflect".
        """
        super().__init__()

        self.kernel_size = kernel_size
        self.stride = stride
        self.dilation = dilation
        self.padding = padding

        # padding_mode is forcibly set to 'constant' when using the npu device because npu only support mode=constant right now
        if paddle.get_device().startswith('npu'):
            self.padding_mode = 'constant'
        else:
            self.padding_mode = padding_mode

        self.conv = nn.Conv1D(
            in_channels,
            out_channels,
            self.kernel_size,
            stride=self.stride,
            padding=0,
            dilation=self.dilation,
            groups=groups,
            bias_attr=bias, )

    def forward(self, x):
        """Do conv1d forward

        Args:
            x (paddle.Tensor): [N, C, L] input data, 
                                N is the batch,
                                C is the data dimension, 
                                L is the time

        Raises:
            ValueError: only support the same padding type

        Returns:
            paddle.Tensor: the value of conv1d
        """
        if self.padding == "same":
            x = self._manage_padding(x, self.kernel_size, self.dilation,
                                     self.stride)
        else:
            raise ValueError("Padding must be 'same'. Got {self.padding}")

        return self.conv(x)

    def _manage_padding(self, x, kernel_size: int, dilation: int, stride: int):
        """Padding the input data

        Args:
            x (paddle.Tensor): [N, C, L] input data
                                N is the batch,
                                C is the data dimension, 
                                L is the time
            kernel_size (int): 1-d convolution kernel size
            dilation (int): 1-d convolution dilation
            stride (int): 1-d convolution stride

        Returns:
            paddle.Tensor: the padded input data
        """
        L_in = x.shape[-1]  # Detecting input shape
        padding = self._get_padding_elem(L_in, stride, kernel_size,
                                         dilation)  # Time padding
        x = F.pad(
            x, padding, mode=self.padding_mode,
            data_format="NCL")  # Applying padding
        return x

    def _get_padding_elem(self,
                          L_in: int,
                          stride: int,
                          kernel_size: int,
                          dilation: int):
        """Calculate the padding value in same mode

        Args:
            L_in (int): the times of the input data, 
            stride (int): 1-d convolution stride
            kernel_size (int): 1-d convolution kernel size
            dilation (int): 1-d convolution stride

        Returns:
            int: return the padding value in same mode
        """
        if stride > 1:
            n_steps = math.ceil(((L_in - kernel_size * dilation) / stride) + 1)
            L_out = stride * (n_steps - 1) + kernel_size * dilation
            padding = [kernel_size // 2, kernel_size // 2]
        else:
            L_out = (L_in - dilation * (kernel_size - 1) - 1) // stride + 1

            padding = [(L_in - L_out) // 2, (L_in - L_out) // 2]

        return padding


class BatchNorm1d(nn.Layer):
    def __init__(
            self,
            input_size,
            eps=1e-05,
            momentum=0.9,
            weight_attr=None,
            bias_attr=None,
            data_format='NCL',
            use_global_stats=None, ):
        super().__init__()

        self.norm = nn.BatchNorm1D(
            input_size,
            epsilon=eps,
            momentum=momentum,
            weight_attr=weight_attr,
            bias_attr=bias_attr,
            data_format=data_format,
            use_global_stats=use_global_stats, )

    def forward(self, x):
        x_n = self.norm(x)
        return x_n


class TDNNBlock(nn.Layer):
    def __init__(
            self,
            in_channels,
            out_channels,
            kernel_size,
            dilation,
            activation=nn.ReLU, ):
        """Implementation of TDNN network

        Args:
            in_channels (int): input channels or input embedding dimensions
            out_channels (int): output channels or output embedding dimensions
            kernel_size (int): the kernel size of the TDNN network block
            dilation (int): the dilation of the TDNN network block
            activation (paddle class, optional): the activation layers. Defaults to nn.ReLU.
        """
        super().__init__()
        self.conv = Conv1d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            dilation=dilation, )
        self.activation = activation()
        self.norm = BatchNorm1d(input_size=out_channels)

    def forward(self, x):
        return self.norm(self.activation(self.conv(x)))


class Res2NetBlock(nn.Layer):
    def __init__(self, in_channels, out_channels, scale=8, dilation=1):
        """Implementation of Res2Net Block with dilation
           The paper is refered as "Res2Net: A New Multi-scale Backbone Architecture",
           whose url is https://arxiv.org/abs/1904.01169
        Args:
            in_channels (int): input channels or input dimensions
            out_channels (int): output channels or output dimensions
            scale (int, optional): scale in res2net bolck. Defaults to 8.
            dilation (int, optional): dilation of 1-d convolution in TDNN block. Defaults to 1.
        """
        super().__init__()
        assert in_channels % scale == 0
        assert out_channels % scale == 0

        in_channel = in_channels // scale
        hidden_channel = out_channels // scale

        self.blocks = nn.LayerList([
            TDNNBlock(
                in_channel, hidden_channel, kernel_size=3, dilation=dilation)
            for i in range(scale - 1)
        ])
        self.scale = scale

    def forward(self, x):
        y = []
        for i, x_i in enumerate(paddle.chunk(x, self.scale, axis=1)):
            if i == 0:
                y_i = x_i
            elif i == 1:
                y_i = self.blocks[i - 1](x_i)
            else:
                y_i = self.blocks[i - 1](x_i + y_i)
            y.append(y_i)
        y = paddle.concat(y, axis=1)
        return y


class SEBlock(nn.Layer):
    def __init__(self, in_channels, se_channels, out_channels):
        """Implementation of SEBlock
           The paper is refered as "Squeeze-and-Excitation Networks"
           whose url is https://arxiv.org/abs/1709.01507
        Args:
            in_channels (int): input channels or input data dimensions
            se_channels (_type_): _description_
            out_channels (int): output channels or output data dimensions
        """
        super().__init__()

        self.conv1 = Conv1d(
            in_channels=in_channels, out_channels=se_channels, kernel_size=1)
        self.relu = paddle.nn.ReLU()
        self.conv2 = Conv1d(
            in_channels=se_channels, out_channels=out_channels, kernel_size=1)
        self.sigmoid = paddle.nn.Sigmoid()

    def forward(self, x, lengths=None):
        L = x.shape[-1]
        if lengths is not None:
            mask = length_to_mask(lengths * L, max_len=L)
            mask = mask.unsqueeze(1)
            total = mask.sum(axis=2, keepdim=True)
            s = (x * mask).sum(axis=2, keepdim=True) / total
        else:
            s = x.mean(axis=2, keepdim=True)

        s = self.relu(self.conv1(s))
        s = self.sigmoid(self.conv2(s))

        return s * x


class AttentiveStatisticsPooling(nn.Layer):
    def __init__(self, channels, attention_channels=128, global_context=True):
        """Compute the speaker verification statistics
           The detail info is section 3.1 in https://arxiv.org/pdf/1709.01507.pdf 
        Args:
            channels (int): input data channel or data dimension
            attention_channels (int, optional): attention dimension. Defaults to 128.
            global_context (bool, optional): If use the global context information. Defaults to True.
        """
        super().__init__()

        self.eps = 1e-12
        self.global_context = global_context
        if global_context:
            self.tdnn = TDNNBlock(channels * 3, attention_channels, 1, 1)
        else:
            self.tdnn = TDNNBlock(channels, attention_channels, 1, 1)
        self.tanh = nn.Tanh()
        self.conv = Conv1d(
            in_channels=attention_channels,
            out_channels=channels,
            kernel_size=1)

    def forward(self, x, lengths=None):
        C, L = x.shape[1], x.shape[2]  # KP: (N, C, L)

        def _compute_statistics(x, m, axis=2, eps=self.eps):
            mean = (m * x).sum(axis)
            std = paddle.sqrt(
                (m * (x - mean.unsqueeze(axis)).pow(2)).sum(axis).clip(eps))
            return mean, std

        if lengths is None:
            lengths = paddle.ones([x.shape[0]])

        # Make binary mask of shape [N, 1, L]
        mask = length_to_mask(lengths * L, max_len=L)
        mask = mask.unsqueeze(1)

        # Expand the temporal context of the pooling layer by allowing the
        # self-attention to look at global properties of the utterance.
        if self.global_context:
            total = mask.sum(axis=2, keepdim=True).astype('float32')
            mean, std = _compute_statistics(x, mask / total)
            mean = mean.unsqueeze(2).tile((1, 1, L))
            std = std.unsqueeze(2).tile((1, 1, L))
            attn = paddle.concat([x, mean, std], axis=1)
        else:
            attn = x

        # Apply layers
        attn = self.conv(self.tanh(self.tdnn(attn)))

        if paddle.get_device().startswith('npu'):
            # The following way is designed to fix the 'Broadcast dimension mismatch' error
            # that occurs when using the npu device and setting padding_mode to 'constant'.
            inf_tensor = paddle.full_like(attn, float("-inf"))
        else:
            # the default way
            inf_tensor = paddle.ones_like(attn) * float("-inf")

        # Filter out zero-paddings
        attn = paddle.where(mask.tile((1, C, 1)) == 0, inf_tensor, attn)

        attn = F.softmax(attn, axis=2)
        mean, std = _compute_statistics(x, attn)

        # Append mean and std of the batch
        pooled_stats = paddle.concat((mean, std), axis=1)
        pooled_stats = pooled_stats.unsqueeze(2)

        return pooled_stats


class SERes2NetBlock(nn.Layer):
    def __init__(
            self,
            in_channels,
            out_channels,
            res2net_scale=8,
            se_channels=128,
            kernel_size=1,
            dilation=1,
            activation=nn.ReLU, ):
        """Implementation of Squeeze-Extraction Res2Blocks in ECAPA-TDNN network model
           The paper is refered "Squeeze-and-Excitation Networks"
           whose url is: https://arxiv.org/pdf/1709.01507.pdf
        Args:
            in_channels (int): input channels or input data dimensions
            out_channels (int): output channels or output data dimensions
            res2net_scale (int, optional): scale in the res2net block. Defaults to 8.
            se_channels (int, optional): embedding dimensions of res2net block. Defaults to 128.
            kernel_size (int, optional): kernel size of 1-d convolution in TDNN block. Defaults to 1.
            dilation (int, optional): dilation of 1-d convolution in TDNN block. Defaults to 1.
            activation (paddle.nn.class, optional): activation function. Defaults to nn.ReLU.
        """
        super().__init__()
        self.out_channels = out_channels
        self.tdnn1 = TDNNBlock(
            in_channels,
            out_channels,
            kernel_size=1,
            dilation=1,
            activation=activation, )
        self.res2net_block = Res2NetBlock(out_channels, out_channels,
                                          res2net_scale, dilation)
        self.tdnn2 = TDNNBlock(
            out_channels,
            out_channels,
            kernel_size=1,
            dilation=1,
            activation=activation, )
        self.se_block = SEBlock(out_channels, se_channels, out_channels)

        self.shortcut = None
        if in_channels != out_channels:
            self.shortcut = Conv1d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=1, )

    def forward(self, x, lengths=None):
        residual = x
        if self.shortcut:
            residual = self.shortcut(x)

        x = self.tdnn1(x)
        x = self.res2net_block(x)
        x = self.tdnn2(x)
        x = self.se_block(x, lengths)

        return x + residual


class EcapaTdnn(nn.Layer):
    def __init__(
            self,
            input_size,
            lin_neurons=192,
            activation=nn.ReLU,
            channels=[512, 512, 512, 512, 1536],
            kernel_sizes=[5, 3, 3, 3, 1],
            dilations=[1, 2, 3, 4, 1],
            attention_channels=128,
            res2net_scale=8,
            se_channels=128,
            global_context=True, ):
        """Implementation of ECAPA-TDNN backbone model network
           The paper is refered as "ECAPA-TDNN: Emphasized Channel Attention, Propagation and Aggregation in TDNN Based Speaker Verification"
           whose url is: https://arxiv.org/abs/2005.07143
        Args:
            input_size (_type_): input fature dimension
            lin_neurons (int, optional): speaker embedding size. Defaults to 192.
            activation (paddle.nn.class, optional): activation function. Defaults to nn.ReLU.
            channels (list, optional): inter embedding dimension. Defaults to [512, 512, 512, 512, 1536].
            kernel_sizes (list, optional): kernel size of 1-d convolution in TDNN block . Defaults to [5, 3, 3, 3, 1].
            dilations (list, optional): dilations of 1-d convolution in TDNN block. Defaults to [1, 2, 3, 4, 1].
            attention_channels (int, optional): attention dimensions. Defaults to 128.
            res2net_scale (int, optional): scale value in res2net. Defaults to 8.
            se_channels (int, optional): dimensions of squeeze-excitation block. Defaults to 128.
            global_context (bool, optional): global context flag. Defaults to True.
        """
        super().__init__()
        assert len(channels) == len(kernel_sizes)
        assert len(channels) == len(dilations)
        self.channels = channels
        self.blocks = nn.LayerList()
        self.emb_size = lin_neurons

        # The initial TDNN layer
        self.blocks.append(
            TDNNBlock(
                input_size,
                channels[0],
                kernel_sizes[0],
                dilations[0],
                activation, ))

        # SE-Res2Net layers
        for i in range(1, len(channels) - 1):
            self.blocks.append(
                SERes2NetBlock(
                    channels[i - 1],
                    channels[i],
                    res2net_scale=res2net_scale,
                    se_channels=se_channels,
                    kernel_size=kernel_sizes[i],
                    dilation=dilations[i],
                    activation=activation, ))

        # Multi-layer feature aggregation
        self.mfa = TDNNBlock(
            channels[-1],
            channels[-1],
            kernel_sizes[-1],
            dilations[-1],
            activation, )

        # Attentive Statistical Pooling
        self.asp = AttentiveStatisticsPooling(
            channels[-1],
            attention_channels=attention_channels,
            global_context=global_context, )
        self.asp_bn = BatchNorm1d(input_size=channels[-1] * 2)

        # Final linear transformation
        self.fc = Conv1d(
            in_channels=channels[-1] * 2,
            out_channels=self.emb_size,
            kernel_size=1, )

    def forward(self, x, lengths=None):
        """
        Compute embeddings.

        Args:
            x (paddle.Tensor): Input log-fbanks with shape (N, n_mels, T).
            lengths (paddle.Tensor, optional): Length proportions of batch length with shape (N). Defaults to None.

        Returns:
            paddle.Tensor: Output embeddings with shape (N, self.emb_size, 1)
        """
        xl = []
        for layer in self.blocks:
            try:
                x = layer(x, lengths=lengths)
            except TypeError:
                x = layer(x)
            xl.append(x)

        # Multi-layer feature aggregation
        x = paddle.concat(xl[1:], axis=1)
        x = self.mfa(x)

        # Attentive Statistical Pooling
        x = self.asp(x, lengths=lengths)
        x = self.asp_bn(x)

        # Final linear transformation
        x = self.fc(x)

        return x


================================================
FILE: paddlespeech/vector/models/lstm_speaker_encoder.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle
from paddle import nn
from paddle.nn import functional as F
from paddle.nn import initializer as I
from scipy.interpolate import interp1d
from scipy.optimize import brentq
from sklearn.metrics import roc_curve


class LSTMSpeakerEncoder(nn.Layer):
    def __init__(self, n_mels, num_layers, hidden_size, output_size):
        super().__init__()
        self.lstm = nn.LSTM(n_mels, hidden_size, num_layers)
        self.linear = nn.Linear(hidden_size, output_size)
        self.similarity_weight = self.create_parameter(
            [1], default_initializer=I.Constant(10.))
        self.similarity_bias = self.create_parameter(
            [1], default_initializer=I.Constant(-5.))

    def forward(self, utterances, num_speakers, initial_states=None):
        normalized_embeds = self.embed_sequences(utterances, initial_states)
        embeds = normalized_embeds.reshape([num_speakers, -1, num_speakers])
        loss, eer = self.loss(embeds)
        return loss, eer

    def embed_sequences(self, utterances, initial_states=None, reduce=False):
        out, (h, c) = self.lstm(utterances, initial_states)
        embeds = F.relu(self.linear(h[-1]))
        normalized_embeds = F.normalize(embeds)
        if reduce:
            embed = paddle.mean(normalized_embeds, 0)
            embed = F.normalize(embed, axis=0)
            return embed
        return normalized_embeds

    def embed_utterance(self, utterances, initial_states=None):
        # utterances: [B, T, C] -> embed [C']
        embed = self.embed_sequences(utterances, initial_states, reduce=True)
        return embed

    def similarity_matrix(self, embeds):
        # (N, M, C)
        speakers_per_batch, utterances_per_speaker, embed_dim = embeds.shape

        # Inclusive centroids (1 per speaker). Cloning is needed for reverse differentiation
        centroids_incl = paddle.mean(embeds, axis=1)
        centroids_incl_norm = paddle.norm(
            centroids_incl, p=2, axis=1, keepdim=True)
        normalized_centroids_incl = centroids_incl / centroids_incl_norm

        # Exclusive centroids (1 per utterance)
        centroids_excl = paddle.broadcast_to(
            paddle.sum(embeds, axis=1, keepdim=True), embeds.shape) - embeds
        centroids_excl /= (utterances_per_speaker - 1)
        centroids_excl_norm = paddle.norm(
            centroids_excl, p=2, axis=2, keepdim=True)
        normalized_centroids_excl = centroids_excl / centroids_excl_norm

        p1 = paddle.matmul(
            embeds.reshape([-1, embed_dim]),
            normalized_centroids_incl,
            transpose_y=True)  # (NMN)
        p1 = p1.reshape([-1])
        # print("p1: ", p1.shape)
        p2 = paddle.bmm(
            embeds.reshape([-1, 1, embed_dim]),
            normalized_centroids_excl.reshape([-1, embed_dim, 1]))  # (NM, 1, 1)
        p2 = p2.reshape([-1])  # （NM)

        # begin: alternative implementation for scatter
        with paddle.no_grad():
            index = paddle.arange(
                0, speakers_per_batch * utterances_per_speaker,
                dtype="int64").reshape(
                    [speakers_per_batch, utterances_per_speaker])
            index = index * speakers_per_batch + paddle.arange(
                0, speakers_per_batch, dtype="int64").unsqueeze(-1)
            index = paddle.reshape(index, [-1])
        ones = paddle.ones(
            [speakers_per_batch * utterances_per_speaker * speakers_per_batch])
        zeros = paddle.zeros_like(index, dtype=ones.dtype)
        mask_p1 = paddle.scatter(ones, index, zeros)
        p = p1 * mask_p1 + (1 - mask_p1) * paddle.scatter(ones, index, p2)
        # end: alternative implementation for scatter
        # p = paddle.scatter(p1, index, p2)

        p = p * self.similarity_weight + self.similarity_bias  # neg
        p = p.reshape(
            [speakers_per_batch * utterances_per_speaker, speakers_per_batch])
        return p, p1, p2

    def do_gradient_ops(self):
        for p in [self.similarity_weight, self.similarity_bias]:
            g = p._grad_ivar()
            g = g * 0.01

    def inv_argmax(self, i, num):
        return np.eye(1, num, i, dtype=int)[0]

    def loss(self, embeds):
        """
        Computes the softmax loss according the section 2.1 of GE2E.

        :param embeds: the embeddings as a tensor of shape (speakers_per_batch, 
        utterances_per_speaker, embedding_size)
        :return: the loss and the EER for this batch of embeddings.
        """
        speakers_per_batch, utterances_per_speaker = embeds.shape[:2]

        # Loss
        sim_matrix, *_ = self.similarity_matrix(embeds)
        sim_matrix = sim_matrix.reshape(
            [speakers_per_batch * utterances_per_speaker, speakers_per_batch])
        target = paddle.arange(
            0, speakers_per_batch, dtype="int64").unsqueeze(-1)
        target = paddle.expand(target,
                               [speakers_per_batch, utterances_per_speaker])
        target = paddle.reshape(target, [-1])

        loss = nn.CrossEntropyLoss()(sim_matrix, target)

        # EER (not backpropagated)
        with paddle.no_grad():
            ground_truth = target.numpy()
            labels = np.array(
                [self.inv_argmax(i, speakers_per_batch) for i in ground_truth])
            preds = sim_matrix.numpy()

            # Snippet from https://yangcha.github.io/EER-ROC/
            fpr, tpr, thresholds = roc_curve(labels.flatten(), preds.flatten())
            eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)

        return loss, eer


================================================
FILE: paddlespeech/vector/modules/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/vector/modules/layer.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
from paddle.autograd import PyLayer


class GradientReversalFunction(PyLayer):
    """Gradient Reversal Layer from:
    Unsupervised Domain Adaptation by Backpropagation (Ganin & Lempitsky, 2015)

    Forward pass is the identity function. In the backward pass,
    the upstream gradients are multiplied by -lambda (i.e. gradient is reversed)
    """

    @staticmethod
    def forward(ctx, x, lambda_=1):
        """Forward in networks
        """
        ctx.save_for_backward(lambda_)
        return x.clone()

    @staticmethod
    def backward(ctx, grads):
        """Backward in networks
        """
        lambda_, = ctx.saved_tensor()
        dx = -lambda_ * grads
        return dx


class GradientReversalLayer(nn.Layer):
    """Gradient Reversal Layer from:
    Unsupervised Domain Adaptation by Backpropagation (Ganin & Lempitsky, 2015)

    Forward pass is the identity function. In the backward pass,
    the upstream gradients are multiplied by -lambda (i.e. gradient is reversed)
    """

    def __init__(self, lambda_=1):
        super(GradientReversalLayer, self).__init__()
        self.lambda_ = lambda_

    def forward(self, x):
        """Forward in networks
        """
        return GradientReversalFunction.apply(x, self.lambda_)


if __name__ == "__main__":
    paddle.set_device("cpu")

    data = paddle.randn([2, 3], dtype="float64")
    data.stop_gradient = False
    grl = GradientReversalLayer(1)
    out = grl(data)
    out.mean().backward()
    print(data.grad)

    data = paddle.randn([2, 3], dtype="float64")
    data.stop_gradient = False
    grl = GradientReversalLayer(-1)
    out = grl(data)
    out.mean().backward()
    print(data.grad)


================================================
FILE: paddlespeech/vector/modules/loss.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This is modified from SpeechBrain
# https://github.com/speechbrain/speechbrain/blob/085be635c07f16d42cd1295045bc46c407f1e15b/speechbrain/nnet/losses.py
import math

import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import initializer as I


class AngularMargin(nn.Layer):
    def __init__(self, margin=0.0, scale=1.0):
        """An implementation of Angular Margin (AM) proposed in the following
           paper: '''Margin Matters: Towards More Discriminative Deep Neural Network
           Embeddings for Speaker Recognition''' (https://arxiv.org/abs/1906.07317)

        Args:
            margin (float, optional): The margin for cosine similiarity. Defaults to 0.0.
            scale (float, optional): The scale for cosine similiarity. Defaults to 1.0.
        """
        super(AngularMargin, self).__init__()
        self.margin = margin
        self.scale = scale

    def forward(self, outputs, targets):
        outputs = outputs - self.margin * targets
        return self.scale * outputs


class AdditiveAngularMargin(AngularMargin):
    def __init__(self, margin=0.0, scale=1.0, easy_margin=False):
        """The Implementation of Additive Angular Margin (AAM) proposed
       in the following paper: '''Margin Matters: Towards More Discriminative Deep Neural Network Embeddings for Speaker Recognition'''
       (https://arxiv.org/abs/1906.07317)

        Args:
            margin (float, optional): margin factor. Defaults to 0.0.
            scale (float, optional): scale factor. Defaults to 1.0.
            easy_margin (bool, optional): easy_margin flag. Defaults to False.
        """
        super(AdditiveAngularMargin, self).__init__(margin, scale)
        self.easy_margin = easy_margin

        self.cos_m = math.cos(self.margin)
        self.sin_m = math.sin(self.margin)
        self.th = math.cos(math.pi - self.margin)
        self.mm = math.sin(math.pi - self.margin) * self.margin

    def forward(self, outputs, targets):
        cosine = outputs.astype('float32')
        sine = paddle.sqrt(1.0 - paddle.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m  # cos(theta + m)
        if self.easy_margin:
            phi = paddle.where(cosine > 0, phi, cosine)
        else:
            phi = paddle.where(cosine > self.th, phi, cosine - self.mm)
        outputs = (targets * phi) + ((1.0 - targets) * cosine)
        return self.scale * outputs


class LogSoftmaxWrapper(nn.Layer):
    def __init__(self, loss_fn):
        """Speaker identificatin loss function wrapper 
           including all of compositions of the loss transformation
        Args:
            loss_fn (_type_): the loss value of a batch
        """
        super(LogSoftmaxWrapper, self).__init__()
        self.loss_fn = loss_fn
        self.criterion = paddle.nn.KLDivLoss(reduction="sum")

    def forward(self, outputs, targets, length=None):
        targets = F.one_hot(targets, outputs.shape[1])
        try:
            predictions = self.loss_fn(outputs, targets)
        except TypeError:
            predictions = self.loss_fn(outputs)

        predictions = F.log_softmax(predictions, axis=1)
        loss = self.criterion(predictions, targets) / targets.sum()
        return loss


class NCELoss(nn.Layer):
    """Noise Contrastive Estimation loss funtion

    Noise Contrastive Estimation (NCE) is an approximation method that is used to
    work around the huge computational cost of large softmax layer.
    The basic idea is to convert the prediction problem into classification problem
    at training stage. It has been proved that these two criterions converges to
    the same minimal point as long as noise distribution is close enough to real one.

    NCE bridges the gap between generative models and discriminative models,
    rather than simply speedup the softmax layer.
    With NCE, you can turn almost anything into posterior with less effort (I think).

    Refs:
    NCE：http://www.cs.helsinki.fi/u/ahyvarin/papers/Gutmann10AISTATS.pdf
    Thanks: https://github.com/mingen-pan/easy-to-use-NCE-RNN-for-Pytorch/blob/master/nce.py

    Examples:
    Q = Q_from_tokens(output_dim)
    NCELoss(Q)
    """

    def __init__(self, Q, noise_ratio=100, Z_offset=9.5):
        """Noise Contrastive Estimation loss funtion

        Args:
            Q (tensor): prior model, uniform or guassian
            noise_ratio (int, optional): noise sampling times. Defaults to 100.
            Z_offset (float, optional): scale of post processing the score. Defaults to 9.5.
        """
        super(NCELoss, self).__init__()
        assert type(noise_ratio) is int
        self.Q = paddle.to_tensor(Q, stop_gradient=False)
        self.N = self.Q.shape[0]
        self.K = noise_ratio
        self.Z_offset = Z_offset

    def forward(self, output, target):
        """Forward inference

        Args:
            output (tensor): the model output, which is the input of loss function
        """
        output = paddle.reshape(output, [-1, self.N])
        B = output.shape[0]
        noise_idx = self.get_noise(B)
        idx = self.get_combined_idx(target, noise_idx)
        P_target, P_noise = self.get_prob(idx, output, sep_target=True)
        Q_target, Q_noise = self.get_Q(idx)
        loss = self.nce_loss(P_target, P_noise, Q_noise, Q_target)
        return loss.mean()

    def get_Q(self, idx, sep_target=True):
        """Get prior model of batchsize data
        """
        idx_size = idx.size
        prob_model = paddle.to_tensor(
            self.Q.numpy()[paddle.reshape(idx, [-1]).numpy()])
        prob_model = paddle.reshape(prob_model, [idx.shape[0], idx.shape[1]])
        if sep_target:
            return prob_model[:, 0], prob_model[:, 1:]
        else:
            return prob_model

    def get_prob(self, idx, scores, sep_target=True):
        """Post processing the score of post model(output of nn) of batchsize data
        """
        scores = self.get_scores(idx, scores)
        scale = paddle.to_tensor([self.Z_offset], dtype='float64')
        scores = paddle.add(scores, -scale)
        prob = paddle.exp(scores)
        if sep_target:
            return prob[:, 0], prob[:, 1:]
        else:
            return prob

    def get_scores(self, idx, scores):
        """Get the score of post model(output of nn) of batchsize data
        """
        B, N = scores.shape
        K = idx.shape[1]
        idx_increment = paddle.to_tensor(
            N * paddle.reshape(paddle.arange(B), [B, 1]) * paddle.ones([1, K]),
            dtype="int64",
            stop_gradient=False)
        new_idx = idx_increment + idx
        new_scores = paddle.index_select(
            paddle.reshape(scores, [-1]), paddle.reshape(new_idx, [-1]))

        return paddle.reshape(new_scores, [B, K])

    def get_noise(self, batch_size, uniform=True):
        """Select noise sample
        """
        if uniform:
            noise = np.random.randint(self.N, size=self.K * batch_size)
        else:
            noise = np.random.choice(
                self.N, self.K * batch_size, replace=True, p=self.Q.data)
        noise = paddle.to_tensor(noise, dtype='int64', stop_gradient=False)
        noise_idx = paddle.reshape(noise, [batch_size, self.K])
        return noise_idx

    def get_combined_idx(self, target_idx, noise_idx):
        """Combined target and noise
        """
        target_idx = paddle.reshape(target_idx, [-1, 1])
        return paddle.concat((target_idx, noise_idx), 1)

    def nce_loss(self, prob_model, prob_noise_in_model, prob_noise,
                 prob_target_in_noise):
        """Combined the loss of target and noise
        """

        def safe_log(tensor):
            """Safe log
            """
            EPSILON = 1e-10
            return paddle.log(EPSILON + tensor)

        model_loss = safe_log(prob_model /
                              (prob_model + self.K * prob_target_in_noise))
        model_loss = paddle.reshape(model_loss, [-1])

        noise_loss = paddle.sum(
            safe_log((self.K * prob_noise) /
                     (prob_noise_in_model + self.K * prob_noise)), -1)
        noise_loss = paddle.reshape(noise_loss, [-1])

        loss = -(model_loss + noise_loss)

        return loss


class FocalLoss(nn.Layer):
    """This criterion is a implemenation of Focal Loss, which is proposed in 
    Focal Loss for Dense Object Detection.

        Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])

    The losses are averaged across observations for each minibatch.

    Args:
        alpha(1D Tensor, Variable) : the scalar factor for this criterion
        gamma(float, double) : gamma > 0; reduces the relative loss for well-classiﬁed examples (p > .5), 
                                putting more focus on hard, misclassiﬁed examples
        size_average(bool): By default, the losses are averaged over observations for each minibatch.
                            However, if the field size_average is set to False, the losses are
                            instead summed for each minibatch.
    """

    def __init__(self, alpha=1, gamma=0, size_average=True, ignore_index=-100):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.size_average = size_average
        self.ce = nn.CrossEntropyLoss(
            ignore_index=ignore_index, reduction="none")

    def forward(self, outputs, targets):
        """Forword inference.

        Args:
            outputs: input tensor
            target: target label tensor
        """
        ce_loss = self.ce(outputs, targets)
        pt = paddle.exp(-ce_loss)
        focal_loss = self.alpha * (1 - pt)**self.gamma * ce_loss
        if self.size_average:
            return focal_loss.mean()
        else:
            return focal_loss.sum()


class GE2ELoss(nn.Layer):
    """Generalized end-to-end loss which defined in the paper "GENERALIZED END-TO-END LOSS FOR SPEAKER VERIFICATION"
    """

    def __init__(self, init_w=10.0, init_b=-5.0, loss_method="softmax"):
        super(GE2ELoss, self).__init__()
        self.loss_method = loss_method.lower()
        self.w = self.create_parameter(
            [1], default_initializer=I.Constant(init_w))
        self.b = self.create_parameter(
            [1], default_initializer=I.Constant(init_b))
        assert self.loss_method in ["softmax", "contrast"]

    def get_cossim(self, embeddings_list, centroids):
        """Compute cosine similarity for each speaker
        """
        cossims = []
        for s_idx, embeddings in enumerate(embeddings_list):
            cossim = F.linear(embeddings, centroids.t())
            e_num = len(embeddings)
            if embeddings.ndim > 1 and e_num > 1:
                expand_centroids = paddle.expand(
                    centroids[s_idx], shape=[e_num, embeddings.shape[1]])
                new_centroids = (expand_centroids * e_num - embeddings) / (
                    e_num - 1)
                sims = F.cosine_similarity(embeddings, new_centroids)
                cossim[:, s_idx] = sims
            cossims.append(self.w * cossim + self.b)

        return cossims

    def cal_softmax_loss(self, cossims):
        """Calculate softmax loss
        """
        loss = 0.0
        n = 0
        for s_idx, cossim in enumerate(cossims):
            loss += -F.log_softmax(cossim, axis=1)[:, s_idx].sum()
            n += cossim.shape[0]

        return loss / n

    def cal_contrast_loss(self, cossims):
        """Calculate contrast loss
        """
        loss = 0.0
        n = 0
        for s_idx, cossim in enumerate(cossims):
            cossim = F.sigmoid(cossim)
            col_loss = 1. - cossim[:, s_idx]
            if len(cossims) > 1:
                if s_idx == 0:
                    excl_centroids_sigmoids = cossim[:, s_idx + 1:]
                elif s_idx == (len(cossims) - 1):
                    excl_centroids_sigmoids = cossim[:, :s_idx]
                else:
                    excl_centroids_sigmoids = paddle.concat(
                        (cossim[:, :s_idx], cossim[:, s_idx + 1:]), axis=1)
                col_loss += paddle.max(excl_centroids_sigmoids, axis=1)[0]
            loss += col_loss.sum()
            n += cossim.shape[0]

        return loss / n

    def forward(self, output, target):
        """Forward inference

            Args:
                output: input tensor
                target: target label tensor
        """
        spkers = paddle.unique(target)

        embeddings_list = []
        for spkid in spkers:
            index = (target == spkid).nonzero().reshape([-1])
            embeddings_list.append(output[index])
        # cal centroid
        centroids = []
        for embeddings in embeddings_list:
            if (embeddings.ndim > 1):
                spker_centroid = paddle.mean(embeddings, axis=0)
            else:
                spker_centroid = embeddings
            centroids.append(spker_centroid.clone() / paddle.norm(
                spker_centroid, axis=0, keepdim=True))
        centroids = paddle.stack(centroids)
        # cal cosine similarity
        cossims = self.get_cossim(embeddings_list, centroids)

        # cal loss
        if self.loss_method == "softmax":
            loss = self.cal_softmax_loss(cossims)
        else:
            loss = self.cal_contrast_loss(cossims)

        return loss


if __name__ == "__main__":
    import numpy as np
    from paddlespeech.vector.utils.vector_utils import Q_from_tokens
    paddle.set_device("cpu")

    input_data = paddle.uniform([32, 100], dtype="float64")
    label_data = np.random.randint(0, 4, size=(32)).astype(np.int64)
    input = paddle.to_tensor(input_data)
    label = paddle.to_tensor(label_data)

    loss1 = GE2ELoss(loss_method="softmax")
    loss = loss1.forward(input, label)
    print("GE2ELoss softmax-loss: %.5f" % (loss[0]))

    loss2 = GE2ELoss(loss_method="contrast")
    loss = loss2.forward(input, label)
    print("GE2ELoss contrast-loss: %.5f" % (loss[0]))

    loss3 = FocalLoss()
    loss = loss3.forward(input, label)
    print("FocalLoss loss: %.5f" % (loss))

    Q = Q_from_tokens(100)
    loss4 = NCELoss(Q)
    loss = loss4.forward(input, label)
    print("NCELoss loss: %.5f" % (loss))


================================================
FILE: paddlespeech/vector/modules/sid_model.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F


class SpeakerIdetification(nn.Layer):
    def __init__(
            self,
            backbone,
            num_class,
            lin_blocks=0,
            lin_neurons=192,
            dropout=0.1, ):
        """The speaker identification model, which includes the speaker backbone network 
           and the a linear transform to speaker class num in training

        Args:
            backbone (Paddle.nn.Layer class): the speaker identification backbone network model
            num_class (_type_): the speaker class num in the training dataset
            lin_blocks (int, optional): the linear layer transform between the embedding and the final linear layer. Defaults to 0.
            lin_neurons (int, optional): the output dimension of final linear layer. Defaults to 192.
            dropout (float, optional): the dropout factor on the embedding. Defaults to 0.1.
        """
        super(SpeakerIdetification, self).__init__()
        # speaker idenfication backbone network model
        # the output of the backbond network is the target embedding
        self.backbone = backbone
        if dropout > 0:
            self.dropout = nn.Dropout(dropout)
        else:
            self.dropout = None

        # construct the speaker classifer
        input_size = self.backbone.emb_size
        self.blocks = nn.LayerList()
        for i in range(lin_blocks):
            self.blocks.extend([
                nn.BatchNorm1D(input_size),
                nn.Linear(in_features=input_size, out_features=lin_neurons),
            ])
            input_size = lin_neurons

        # the final layer
        self.weight = paddle.create_parameter(
            shape=(input_size, num_class),
            dtype='float32',
            attr=paddle.ParamAttr(initializer=nn.initializer.XavierUniform()), )

    def forward(self, x, lengths=None):
        """Do the speaker identification model forwrd, 
           including the speaker embedding model and the classifier model network

        Args:
            x (paddle.Tensor): input audio feats, 
                               shape=[batch, dimension, times]
            lengths (paddle.Tensor, optional): input audio length.
                                        shape=[batch, times]
                                        Defaults to None.

        Returns:
            paddle.Tensor: return the logits of the feats
        """
        # x.shape: (N, C, L)
        x = self.backbone(x, lengths).squeeze(
            -1)  # (N, emb_size, 1) -> (N, emb_size)
        if self.dropout is not None:
            x = self.dropout(x)

        for fc in self.blocks:
            x = fc(x)

        logits = F.linear(F.normalize(x), F.normalize(self.weight, axis=0))

        return logits


================================================
FILE: paddlespeech/vector/training/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/vector/training/scheduler.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.optimizer.lr import LRScheduler


class CyclicLRScheduler(LRScheduler):
    def __init__(self,
                 base_lr: float=1e-8,
                 max_lr: float=1e-3,
                 step_size: int=10000):

        super(CyclicLRScheduler, self).__init__()

        self.current_step = -1
        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size

    def step(self):
        if not hasattr(self, 'current_step'):
            return

        self.current_step += 1
        if self.current_step >= 2 * self.step_size:
            self.current_step %= 2 * self.step_size

        self.last_lr = self.get_lr()

    def get_lr(self):
        p = self.current_step / (2 * self.step_size)  # Proportion in one cycle.
        if p < 0.5:  # Increase
            return self.base_lr + p / 0.5 * (self.max_lr - self.base_lr)
        else:  # Decrease
            return self.max_lr - (p / 0.5 - 1) * (self.max_lr - self.base_lr)


================================================
FILE: paddlespeech/vector/training/seeding.py
================================================
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddlespeech.s2t.utils.log import Log

logger = Log(__name__).getlog()
import random

import numpy as np
import paddle


def seed_everything(seed: int):
    """Seed paddle, random and np.random to help reproductivity."""
    paddle.seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    logger.info(f"Set the seed of paddle, random, np.random to {seed}.")


================================================
FILE: paddlespeech/vector/utils/__init__.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


================================================
FILE: paddlespeech/vector/utils/time.py
================================================
# Copyright (c) 2021  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import time


class Timer(object):
    '''Calculate runing speed and estimated time of arrival(ETA)'''

    def __init__(self, total_step: int):
        self.total_step = total_step
        self.last_start_step = 0
        self.current_step = 0
        self._is_running = True
        self.cur_ips = 0

    def start(self):
        self.last_time = time.time()
        self.start_time = time.time()

    def stop(self):
        self._is_running = False
        self.end_time = time.time()

    def count(self) -> int:
        if not self.current_step >= self.total_step:
            self.current_step += 1
        return self.current_step

    @property
    def timing(self) -> float:
        run_steps = self.current_step - self.last_start_step
        self.last_start_step = self.current_step
        time_used = time.time() - self.last_time
        self.last_time = time.time()
        self.cur_ips = run_steps / time_used
        return time_used / run_steps

    @property
    def is_running(self) -> bool:
        return self._is_running

    @property
    def ips(self) -> float:
        return self.cur_ips

    @property
    def eta(self) -> str:
        if not self.is_running:
            return '00:00:00'
        remaining_time = time.time() - self.start_time
        return seconds_to_hms(remaining_time)


def seconds_to_hms(seconds: int) -> str:
    '''Convert the number of seconds to hh:mm:ss'''
    h = math.floor(seconds / 3600)
    m = math.floor((seconds - h * 3600) / 60)
    s = int(seconds - h * 3600 - m * 60)
    hms_str = '{:0>2}:{:0>2}:{:0>2}'.format(h, m, s)
    return hms_str


================================================
FILE: paddlespeech/vector/utils/vector_utils.py
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle


def get_chunks(seg_dur, audio_id, audio_duration):
    """Get all chunk segments from a utterance

    Args:
        seg_dur (float): segment chunk duration, seconds
        audio_id (str): utterance name, 
        audio_duration (float): utterance duration, seconds

    Returns:
        List: all the chunk segments 
    """
    num_chunks = int(audio_duration / seg_dur)  # all in seconds
    chunk_lst = [
        audio_id + "_" + str(i * seg_dur) + "_" + str(i * seg_dur + seg_dur)
        for i in range(num_chunks)
    ]
    return chunk_lst


def Q_from_tokens(token_num):
    """Get prior model, data from uniform, would support others(guassian) in future
    """
    freq = [1] * token_num
    Q = paddle.to_tensor(freq, dtype='float64')
    return Q / Q.sum()


================================================
FILE: runtime/.clang-format
================================================
# This file is used by clang-format to autoformat paddle source code
#
# The clang-format is part of llvm toolchain.
# It need to install llvm and clang to format source code style.
#
# The basic usage is,
#   clang-format -i -style=file PATH/TO/SOURCE/CODE
#
# The -style=file implicit use ".clang-format" file located in one of
# parent directory.
# The -i means inplace change.
#
# The document of clang-format is
#   http://clang.llvm.org/docs/ClangFormat.html
#   http://clang.llvm.org/docs/ClangFormatStyleOptions.html
---
Language:        Cpp
BasedOnStyle:  Google
IndentWidth:     4
TabWidth:        4
ContinuationIndentWidth: 4
MaxEmptyLinesToKeep: 2
AccessModifierOffset: -2  # The private/protected/public has no indent in class
Standard:  Cpp11
AllowAllParametersOfDeclarationOnNextLine: true
BinPackParameters: false
BinPackArguments: false
...


================================================
FILE: runtime/.gitignore
================================================
engine/common/base/flags.h
engine/common/base/log.h

tools/valgrind*
*log
fc_patch/*
test


================================================
FILE: runtime/CMakeLists.txt
================================================
# >=3.17 support -DCMAKE_FIND_DEBUG_MODE=ON
cmake_minimum_required(VERSION 3.17 FATAL_ERROR)

set(CMAKE_PROJECT_INCLUDE_BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/cmake/EnableCMP0077.cmake")

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

include(system)

project(paddlespeech VERSION 0.1)

set(PPS_VERSION_MAJOR 1)
set(PPS_VERSION_MINOR 0)
set(PPS_VERSION_PATCH 0)
set(PPS_VERSION "${PPS_VERSION_MAJOR}.${PPS_VERSION_MINOR}.${PPS_VERSION_PATCH}")

# compiler option
# Keep the same with openfst, -fPIC or -fpic
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl")
SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall")

set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_FIND_DEBUG_MODE OFF)
set(PPS_CXX_STANDARD 14)

# set std-14
set(CMAKE_CXX_STANDARD ${PPS_CXX_STANDARD})

# Ninja Generator will set CMAKE_BUILD_TYPE to Debug
if(NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel" FORCE)
endif()

# find_* e.g. find_library work when Cross-Compiling
if(ANDROID)
    set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH)
    set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
    set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
    set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
endif()

if(BUILD_IN_MACOS)
    add_definitions("-DOS_MACOSX")
endif()

# install dir into `build/install`
set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install)

include(FetchContent)
include(ExternalProject)

# fc_patch dir
set(FETCHCONTENT_QUIET off)
get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
set(FETCHCONTENT_BASE_DIR ${fc_patch})

###############################################################################
# Option Configurations
###############################################################################
# https://github.com/google/brotli/pull/655
option(BUILD_SHARED_LIBS "Build shared libraries" ON)

option(WITH_PPS_DEBUG "debug option" OFF)
if (WITH_PPS_DEBUG)
    add_definitions("-DPPS_DEBUG")
endif()

option(WITH_ASR "build asr" ON)
option(WITH_CLS "build cls" ON)
option(WITH_VAD "build vad" ON)

option(WITH_GPU "NNet using GPU." OFF)

option(WITH_PROFILING "enable c++ profling" OFF)
option(WITH_TESTING "unit test" ON)

option(WITH_ONNX "u2 support onnx runtime" OFF)

###############################################################################
# Include Third Party
###############################################################################
include(gflags)

include(glog)

include(pybind)

#onnx
if(WITH_ONNX)
    add_definitions(-DUSE_ONNX)
endif()

# gtest
if(WITH_TESTING)
    include(gtest) # download, build, install gtest
endif()

# fastdeploy
include(fastdeploy)

if(WITH_ASR)
    # openfst
    include(openfst)
    add_dependencies(openfst gflags extern_glog)
endif()

###############################################################################
# Find Package
###############################################################################
# https://github.com/Kitware/CMake/blob/v3.1.0/Modules/FindThreads.cmake#L207
find_package(Threads REQUIRED)

if(WITH_ASR)
    # https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3
    find_package(Python3 COMPONENTS Interpreter Development)
    find_package(pybind11 CONFIG)

    if(Python3_FOUND)
        message(STATUS "Python3_FOUND = ${Python3_FOUND}")
        message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}")
        message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}")
        message(STATUS "Python3_INCLUDE_DIRS = ${Python3_INCLUDE_DIRS}")
        message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}")
        set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE)
        set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE)
    endif()

    message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
    message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}")
    include_directories(${PYTHON_INCLUDE_DIR})

    if(pybind11_FOUND)
        message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}")
        message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}")
        message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}")
    endif()


    # paddle libpaddle.so
    # paddle include and link option
    # -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
    set(EXECUTE_COMMAND "import os"
        "import paddle"
        "include_dir = paddle.sysconfig.get_include()"
        "paddle_dir=os.path.split(include_dir)[0]"
        "libs_dir=os.path.join(paddle_dir, 'libs')"
        "fluid_dir=os.path.join(paddle_dir, 'fluid')"
        "out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir])"
        "out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out)"
    )
    execute_process(
        COMMAND python -c "${EXECUTE_COMMAND}"
        OUTPUT_VARIABLE PADDLE_LINK_FLAGS
        RESULT_VARIABLE SUCESS)

    message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
    string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)

    # paddle compile option
    # -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
    set(EXECUTE_COMMAND "import paddle"
        "include_dir = paddle.sysconfig.get_include()"
        "print(f\"-I{include_dir}\")"
    )
    execute_process(
        COMMAND python -c "${EXECUTE_COMMAND}"
        OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS)
    message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS})
    string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS)

    # for LD_LIBRARY_PATH
    # set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/)
    set(EXECUTE_COMMAND "import os"
        "import paddle"
        "include_dir=paddle.sysconfig.get_include()"
        "paddle_dir=os.path.split(include_dir)[0]"
        "libs_dir=os.path.join(paddle_dir, 'libs')"
        "fluid_dir=os.path.join(paddle_dir, 'fluid')"
        "out=':'.join([libs_dir, fluid_dir]); print(out)"
        )
    execute_process(
        COMMAND python -c "${EXECUTE_COMMAND}"
        OUTPUT_VARIABLE PADDLE_LIB_DIRS)
    message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
endif()

include(summary)

###############################################################################
# Add local library
###############################################################################
set(ENGINE_ROOT ${CMAKE_SOURCE_DIR}/engine)

add_subdirectory(engine)


###############################################################################
# CPack library
###############################################################################
# build a CPack driven installer package
include (InstallRequiredSystemLibraries)
set(CPACK_PACKAGE_NAME "paddlespeech_library")
set(CPACK_PACKAGE_VENDOR "paddlespeech")
set(CPACK_PACKAGE_VERSION_MAJOR 1)
set(CPACK_PACKAGE_VERSION_MINOR 0)
set(CPACK_PACKAGE_VERSION_PATCH 0)
set(CPACK_PACKAGE_DESCRIPTION "paddlespeech library")
set(CPACK_PACKAGE_CONTACT "paddlespeech@baidu.com")
set(CPACK_SOURCE_GENERATOR "TGZ")
include (CPack)


================================================
FILE: runtime/README.md
================================================

## Environment

We develop under:
* python - >=3.8
* docker - `registry.baidubce.com/paddlepaddle/paddle:2.2.2-gpu-cuda10.2-cudnn7`
* os - Ubuntu 16.04.7 LTS
* gcc/g++/gfortran - 8.2.0
* cmake - 3.16.0

> Please use `tools/env.sh` to create python `venv`, then `source venv/bin/activate` to build engine.

> We make sure all things work fun under docker, and recommend using it to develop and deploy.

* [How to Install Docker](https://docs.docker.com/engine/install/)
* [A Docker Tutorial for Beginners](https://docker-curriculum.com/)
* [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/overview.html)

## Build

1. First to launch docker container.

```
docker run --privileged  --net=host --ipc=host -it --rm -v /path/to/paddlespeech:/workspace --name=dev registry.baidubce.com/paddlepaddle/paddle:2.2.2-gpu-cuda10.2-cudnn7 /bin/bash
```

* More `Paddle` docker images you can see [here](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/docker/linux-docker.html).

2. Create python environment.

```
bash tools/venv.sh
```

2. Build `engine` and `examples`.

For now we are using feature under `develop` branch of paddle, so we need to install `paddlepaddle` nightly build version.
For example: 
```
source venv/bin/activate
python -m pip install paddlepaddle==2.4.2 -i https://mirror.baidu.com/pypi/simple
./build.sh
```


3. Go to `examples` to have a fun.

More details please see `README.md` under `examples`.


## Valgrind (Optional)

> If using docker please check `--privileged` is set when `docker run`.

* Fatal error at startup: `a function redirection which is mandatory for this platform-tool combination cannot be set up`
```bash
apt-get install libc6-dbg
```

* Install

```bash
pushd tools
./setup_valgrind.sh
popd
```

## TODO

### Deepspeech2 with linear feature
* DecibelNormalizer: there is a small difference between the offline and online db norm. The computation of online db norm reads features chunk by chunk, which causes the feature size to be different different with offline db norm. In `normalizer.cc:73`, the `samples.size()` is different, which causes the different result.

## FAQ

1. No moudle named `paddle`. 

```
CMake Error at CMakeLists.txt:119 (string):
  string sub-command STRIP requires two arguments.


Traceback (most recent call last):
  File "<string>", line 1, in <module>
ModuleNotFoundError: No module named 'paddle'
-- PADDLE_COMPILE_FLAGS=
CMake Error at CMakeLists.txt:131 (string):
  string sub-command STRIP requires two arguments.


  File "<string>", line 1
    import os; import paddle; include_dir=paddle.sysconfig.get_include(); paddle_dir=os.path.split(include_dir)[0]; libs_dir=os.path.join(paddle_dir, 'libs'); fluid_dir=os.path.join(paddle_dir, 'fluid'); out=':'.join([libs_dir, fluid_dir]); print(out);     
    ^
```

please install paddlepaddle >= 2.4rc

2. `u2_recognizer_main: error while loading shared libraries: liblibpaddle.so: cannot open shared object file: No such file or directory`


```
cd $YOUR_ENV_PATH/lib/python3.8/site-packages/paddle/fluid
patchelf --set-soname libpaddle.so libpaddle.so
```

3. `u2_recognizer_main: error while loading shared libraries: libgfortran.so.5: cannot open shared object file: No such file or directory`

```
# my gcc version is 8.2
apt-get install gfortran-8
```

4. `Undefined reference to '_gfortran_concat_string'`

using gcc 8.2, gfortran 8.2.

5. `./boost/python/detail/wrap_python.hpp:57:11: fatal error: pyconfig.h: No such file or directory`

```
apt-get install python3-dev
```

for more info please see [here](https://github.com/okfn/piati/issues/65).


================================================
FILE: runtime/build.sh
================================================
#!/usr/bin/env bash
set -xe

BUILD_ROOT=build/Linux
BUILD_DIR=${BUILD_ROOT}/x86_64

mkdir -p ${BUILD_DIR}

BUILD_TYPE=Release
#BUILD_TYPE=Debug
BUILD_SO=OFF
BUILD_ONNX=ON
BUILD_ASR=ON
BUILD_CLS=ON
BUILD_VAD=ON
PPS_DEBUG=OFF
FASTDEPLOY_INSTALL_DIR=""

# the build script had verified in the paddlepaddle docker image.
# please follow the instruction below to install PaddlePaddle image.
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html 
#cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON -DFASTDEPLOY_INSTALL_DIR=/workspace/zhanghui/paddle/FastDeploy/build/Android/arm64-v8a-api-21/install
cmake -B ${BUILD_DIR} \
       	-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
       	-DBUILD_SHARED_LIBS=${BUILD_SO} \
	-DWITH_ONNX=${BUILD_ONNX} \
	-DWITH_ASR=${BUILD_ASR} \
	-DWITH_CLS=${BUILD_CLS} \
	-DWITH_VAD=${BUILD_VAD} \
	-DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} \
	-DWITH_PPS_DEBUG=${PPS_DEBUG}

cmake --build ${BUILD_DIR} -j


================================================
FILE: runtime/build_android.sh
================================================
#!/bin/bash

set -ex

ANDROID_NDK=/mnt/masimeng/workspace/software/android-ndk-r25b/

# Setting up Android toolchanin
ANDROID_ABI=arm64-v8a  # 'arm64-v8a', 'armeabi-v7a'
ANDROID_PLATFORM="android-21"  # API >= 21
ANDROID_STL=c++_shared  # 'c++_shared', 'c++_static'
ANDROID_TOOLCHAIN=clang  # 'clang' only
TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake

# Create build directory
BUILD_ROOT=build/Android
BUILD_DIR=${BUILD_ROOT}/${ANDROID_ABI}-api-21
FASTDEPLOY_INSTALL_DIR="/mnt/masimeng/workspace/FastDeploy/build/Android/arm64-v8a-api-21/install"

mkdir -p ${BUILD_DIR}
cd ${BUILD_DIR}

# CMake configuration with Android toolchain
cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
      -DCMAKE_BUILD_TYPE=MinSizeRel \
      -DANDROID_ABI=${ANDROID_ABI} \
      -DANDROID_NDK=${ANDROID_NDK} \
      -DANDROID_PLATFORM=${ANDROID_PLATFORM} \
      -DANDROID_STL=${ANDROID_STL} \
      -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \
      -DBUILD_SHARED_LIBS=OFF \
      -DWITH_ASR=OFF \
      -DWITH_CLS=OFF \
      -DWITH_VAD=ON \
      -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} \
      -DCMAKE_FIND_DEBUG_MODE=OFF \
      -Wno-dev ../../..

# Build FastDeploy Android C++ SDK
make


================================================
FILE: runtime/build_ios.sh
================================================
# https://www.jianshu.com/p/33672fb819f5

PATH="/Applications/CMake.app/Contents/bin":"$PATH"
tools_dir=$1
ios_toolchain_cmake=${tools_dir}/"/ios-cmake-4.2.0/ios.toolchain.cmake"
fastdeploy_dir=${tools_dir}"/fastdeploy-ort-mac-build/"
build_targets=("OS64")
build_type_array=("Release")

#static_name="libocr"
#lib_name="libocr"

# Switch to workpath
current_path=`cd $(dirname $0);pwd`
work_path=${current_path}/
build_path=${current_path}/build/
output_path=${current_path}/output/
cd ${work_path}

# Clean
rm -rf ${build_path}
rm -rf ${output_path}

if [ "$1"x = "clean"x ]; then
    exit 0
fi

# Build Every Target
for target in "${build_targets[@]}"
do
    for build_type in "${build_type_array[@]}"
    do    
        echo -e "\033[1;36;40mBuilding ${build_type} ${target} ... \033[0m"
        target_build_path=${build_path}/${target}/${build_type}/
        mkdir -p ${target_build_path}

        cd ${target_build_path}
        if [ $? -ne 0 ];then
            echo -e "\033[1;31;40mcd ${target_build_path} failed \033[0m"
            exit -1
        fi
        
        if [ ${target} == "OS64" ];then
            fastdeploy_install_dir=${fastdeploy_dir}/arm64
	    else
            fastdeploy_install_dir=""
            echo "fastdeploy_install_dir is null"
            exit -1
	    fi

        cmake -DCMAKE_TOOLCHAIN_FILE=${ios_toolchain_cmake} \
            -DBUILD_IN_MACOS=ON \
            -DBUILD_SHARED_LIBS=OFF \
            -DWITH_ASR=OFF \
            -DWITH_CLS=OFF \
            -DWITH_VAD=ON \
	        -DFASTDEPLOY_INSTALL_DIR=${fastdeploy_install_dir} \
            -DPLATFORM=${target} ../../../

        cmake --build . --config ${build_type}

		mkdir output
        cp engine/vad/interface/libpps_vad_interface.a output
        cp engine/vad/interface/vad_interface_main.app/vad_interface_main output
        cp ${fastdeploy_install_dir}/lib/libfastdeploy.dylib output
	    cp ${fastdeploy_install_dir}/third_libs/install/onnxruntime/lib/libonnxruntime.dylib output	

    done
done

## combine all ios libraries
#DEVROOT=/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/
#LIPO_TOOL=${DEVROOT}/usr/bin/lipo
#LIBRARY_PATH=${build_path}
#LIBRARY_OUTPUT_PATH=${output_path}/IOS
#mkdir -p ${LIBRARY_OUTPUT_PATH}
#
#${LIPO_TOOL}    \
#    -arch i386 ${LIBRARY_PATH}/ios_x86/Release/${lib_name}.a          \
#    -arch x86_64 ${LIBRARY_PATH}/ios_x86_64/Release/${lib_name}.a     \
#    -arch armv7 ${LIBRARY_PATH}/ios_armv7/Release/${lib_name}.a       \
#    -arch armv7s ${LIBRARY_PATH}/ios_armv7s/Release/${lib_name}.a     \
#    -arch arm64 ${LIBRARY_PATH}/ios_armv8/Release/${lib_name}.a       \
#    -output ${LIBRARY_OUTPUT_PATH}/${lib_name}.a -create
#
#cp ${work_path}/lib/houyi/lib/ios/libhouyi_score.a ${LIBRARY_OUTPUT_PATH}/
#cp ${work_path}/interface/ocr-interface.h ${output_path}
#cp ${work_path}/version/release.v ${output_path}
#
#echo -e "\033[1;36;40mBuild All Target Success At:\n${output_path}\033[0m"
#exit 0


================================================
FILE: runtime/cmake/EnableCMP0048.cmake
================================================
cmake_policy(SET CMP0048 NEW)

================================================
FILE: runtime/cmake/EnableCMP0077.cmake
================================================
cmake_policy(SET CMP0077 NEW)


================================================
FILE: runtime/cmake/FindGFortranLibs.cmake
================================================
#.rst:
# FindGFortranLibs
# --------
#  https://github.com/Argonne-National-Laboratory/PIPS/blob/master/cmake/Modules/FindGFortranLibs.cmake
#  https://enccs.github.io/cmake-workshop/cxx-fortran/
#
# Find gcc Fortran compiler & library paths
#
# The module defines the following variables:
#
# ::
#
#
#   GFORTRANLIBS_FOUND - true if system has gfortran
#   LIBGFORTRAN_LIBRARIES - path to libgfortran
#   LIBQUADMATH_LIBRARIES - path to libquadmath
#   GFORTRAN_LIBARIES_DIR - directory containing libgfortran, libquadmath
#   GFORTRAN_INCLUDE_DIR - directory containing gfortran/gcc headers
#   LIBGOMP_LIBRARIES - path to libgomp
#   LIBGOMP_INCLUDE_DIR - directory containing omp.h header
#   GFORTRAN_VERSION_STRING - version of gfortran found
#
set(CMAKE_REQUIRED_QUIET ${LIBIOMP_FIND_QUIETLY})

if(NOT CMAKE_REQUIRED_QUIET)
  message(STATUS "Looking for gfortran related libraries...")
endif()

enable_language(Fortran)
if(CMAKE_Fortran_COMPILER_ID MATCHES "GNU")

  # Basically, call "gfortran -v" to dump compiler info to the string
  # GFORTRAN_VERBOSE_STR, which will be used to get necessary paths
  message(STATUS "Extracting library and header information by calling 'gfortran -v'...")
  execute_process(COMMAND "${CMAKE_Fortran_COMPILER}" "-v" ERROR_VARIABLE
    GFORTRAN_VERBOSE_STR RESULT_VARIABLE FLAG)

  # For debugging
  message(STATUS "'gfortran -v' returned:")
  message(STATUS "${GFORTRAN_VERBOSE_STR}")

  # Detect gfortran version
  string(REGEX MATCH "gcc version [^\t\n ]+" GFORTRAN_VER_STR "${GFORTRAN_VERBOSE_STR}")
  string(REGEX REPLACE "gcc version ([^\t\n ]+)" "\\1" GFORTRAN_VERSION_STRING "${GFORTRAN_VER_STR}")
  message(STATUS "Detected gfortran version ${GFORTRAN_VERSION_STRING}")
  unset(GFORTRAN_VER_STR)

  set(MATCH_REGEX "[^\t\n ]+[\t\n ]+")
  set(REPLACE_REGEX "([^\t\n ]+)")

  # Find architecture for compiler
  string(REGEX MATCH "Target: [^\t\n ]+"
    GFORTRAN_ARCH_STR "${GFORTRAN_VERBOSE_STR}")
  message(STATUS "Architecture string: ${GFORTRAN_ARCH_STR}")
  string(REGEX REPLACE "Target: ([^\t\n ]+)" "\\1"
    GFORTRAN_ARCH "${GFORTRAN_ARCH_STR}")
  message(STATUS "Detected gfortran architecture: ${GFORTRAN_ARCH}")
  unset(GFORTRAN_ARCH_STR)

  # Find install prefix, if it exists; if not, use default
  string(REGEX MATCH  "--prefix=[^\t\n ]+[\t\n ]+"
    GFORTRAN_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
  if(NOT GFORTRAN_PREFIX_STR)
    message(STATUS "Detected default gfortran prefix")
    set(GFORTRAN_PREFIX_DIR "/usr/local") # default prefix for gcc install
  else()
    string(REGEX REPLACE "--prefix=([^\t\n ]+)" "\\1"
      GFORTRAN_PREFIX_DIR "${GFORTRAN_PREFIX_STR}")
  endif()
  message(STATUS "Detected gfortran prefix: ${GFORTRAN_PREFIX_DIR}")
  unset(GFORTRAN_PREFIX_STR)

  # Find install exec-prefix, if it exists; if not, use default
  string(REGEX MATCH "--exec-prefix=[^\t\n ]+[\t\n ]+" "\\1"
    GFORTRAN_EXEC_PREFIX_STR "${GFORTRAN_VERBOSE_STR}")
  if(NOT GFORTRAN_EXEC_PREFIX_STR)
    message(STATUS "Detected default gfortran exec-prefix")
    set(GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_PREFIX_DIR}")
  else()
    string(REGEX REPLACE "--exec-prefix=([^\t\n ]+)" "\\1"
      GFORTRAN_EXEC_PREFIX_DIR "${GFORTRAN_EXEC_PREFIX_STR}")
  endif()
  message(STATUS "Detected gfortran exec-prefix: ${GFORTRAN_EXEC_PREFIX_DIR}")
  UNSET(GFORTRAN_EXEC_PREFIX_STR)

  # Find library directory and include directory, if library directory specified
  string(REGEX MATCH "--libdir=[^\t\n ]+"
    GFORTRAN_LIB_DIR_STR "${GFORTRAN_VERBOSE_STR}")
  if(NOT GFORTRAN_LIB_DIR_STR)
    message(STATUS "Found --libdir flag -- not found")
    message(STATUS "Using default gfortran library & include directory paths")
    set(GFORTRAN_LIBRARIES_DIR
      "${GFORTRAN_EXEC_PREFIX_DIR}/lib/gcc/${GFORTRAN_ARCH}/${GFORTRAN_VERSION_STRING}")
    string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/include")
  else()
    message(STATUS "Found --libdir flag -- yes")
    string(REGEX REPLACE "--libdir=([^\t\n ]+)" "\\1"
      GFORTRAN_LIBRARIES_DIR "${GFORTRAN_LIB_DIR_STR}")
    string(CONCAT GFORTRAN_INCLUDE_DIR "${GFORTRAN_LIBRARIES_DIR}" "/gcc/" "${GFORTRAN_ARCH}" "/" "${GFORTRAN_VERSION_STRING}" "/include")
  endif()
  message(STATUS "gfortran libraries path: ${GFORTRAN_LIBRARIES_DIR}")
  message(STATUS "gfortran include path dir: ${GFORTRAN_INCLUDE_DIR}")
  unset(GFORTRAN_LIB_DIR_STR)

  # There are lots of other build options for gcc & gfortran. For now, the
  # options implemented above should cover a lot of common use cases.

  # Clean up be deleting the output string from "gfortran -v"
  unset(GFORTRAN_VERBOSE_STR)

  # Find paths for libgfortran, libquadmath, libgomp
  # libgomp needed for OpenMP support without Clang
  find_library(LIBGFORTRAN_LIBRARIES NAMES gfortran libgfortran
    HINTS ${GFORTRAN_LIBRARIES_DIR})
  find_library(LIBQUADMATH_LIBRARIES NAMES quadmath libquadmath
    HINTS ${GFORTRAN_LIBRARIES_DIR})
  find_library(LIBGOMP_LIBRARIES NAMES gomp libgomp
    HINTS ${GFORTRAN_LIBRARIES_DIR})

  # Find OpenMP headers
  find_path(LIBGOMP_INCLUDE_DIR NAMES omp.h HINTS ${GFORTRAN_INCLUDE_DIR})

else()
  message(STATUS "CMAKE_Fortran_COMPILER_ID does not match 'GNU'!")
endif()

include(FindPackageHandleStandardArgs)

# Required: libgfortran, libquadmath, path for gfortran libraries
# Optional: libgomp, path for OpenMP headers, path for gcc/gfortran headers
find_package_handle_standard_args(GFortranLibs
  REQUIRED_VARS LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES GFORTRAN_LIBRARIES_DIR
  VERSION_VAR GFORTRAN_VERSION_STRING)

if(GFORTRANLIBS_FOUND)
  message(STATUS "Looking for gfortran libraries -- found")
  message(STATUS "gfortran version: ${GFORTRAN_VERSION_STRING}")
else()
  message(STATUS "Looking for gfortran libraries -- not found")
endif()

mark_as_advanced(LIBGFORTRAN_LIBRARIES LIBQUADMATH_LIBRARIES
  LIBGOMP_LIBRARIES LIBGOMP_INCLUDE_DIR
  GFORTRAN_LIBRARIES_DIR GFORTRAN_INCLUDE_DIR)
# FindGFortranLIBS.cmake ends here

================================================
FILE: runtime/cmake/absl.cmake
================================================
include(FetchContent)


set(BUILD_SHARED_LIBS OFF) # up to you
set(BUILD_TESTING OFF) # to disable abseil test, or gtest will fail.
set(ABSL_ENABLE_INSTALL ON) # now you can enable install rules even in subproject...

FetchContent_Declare(
  absl
  GIT_REPOSITORY "https://github.com/abseil/abseil-cpp.git"
  GIT_TAG "20210324.1"
)
FetchContent_MakeAvailable(absl)

set(EIGEN3_INCLUDE_DIR ${Eigen3_SOURCE_DIR})
include_directories(${absl_SOURCE_DIR})

================================================
FILE: runtime/cmake/boost.cmake
================================================
include(FetchContent)
set(Boost_DEBUG ON)

set(Boost_PREFIX_DIR ${fc_patch}/boost)
set(Boost_SOURCE_DIR ${fc_patch}/boost-src)

FetchContent_Declare(
  Boost
  URL      https://boostorg.jfrog.io/artifactory/main/release/1.75.0/source/boost_1_75_0.tar.gz
  URL_HASH SHA256=aeb26f80e80945e82ee93e5939baebdca47b9dee80a07d3144be1e1a6a66dd6a
  PREFIX            ${Boost_PREFIX_DIR}
  SOURCE_DIR        ${Boost_SOURCE_DIR}
)

execute_process(COMMAND bootstrap.sh WORKING_DIRECTORY ${Boost_SOURCE_DIR})
execute_process(COMMAND b2 WORKING_DIRECTORY ${Boost_SOURCE_DIR})

FetchContent_MakeAvailable(Boost)

message(STATUS "boost src dir: ${Boost_SOURCE_DIR}")
message(STATUS "boost inc dir: ${Boost_INCLUDE_DIR}")
message(STATUS "boost bin dir: ${Boost_BINARY_DIR}")

set(BOOST_ROOT ${Boost_SOURCE_DIR})
message(STATUS "boost root dir: ${BOOST_ROOT}")

include_directories(${Boost_SOURCE_DIR})

================================================
FILE: runtime/cmake/eigen.cmake
================================================
include(FetchContent)

# update eigen to the commit id f612df27 on 03/16/2021
set(EIGEN_PREFIX_DIR ${fc_patch}/eigen3)

FetchContent_Declare(
  Eigen3
  GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
  GIT_TAG master
  PREFIX            ${EIGEN_PREFIX_DIR}
  GIT_SHALLOW TRUE
  GIT_PROGRESS TRUE)

set(EIGEN_BUILD_DOC OFF)
# note: To disable eigen tests,
# you should put this code in a add_subdirectory to avoid to change
# BUILD_TESTING for your own project too since variables are directory
# scoped
set(BUILD_TESTING OFF)
set(EIGEN_BUILD_PKGCONFIG OFF)
set( OFF)
FetchContent_MakeAvailable(Eigen3)

message(STATUS "eigen src dir: ${Eigen3_SOURCE_DIR}")
message(STATUS "eigen bin dir: ${Eigen3_BINARY_DIR}")
#include_directories(${Eigen3_SOURCE_DIR})
#link_directories(${Eigen3_BINARY_DIR})

================================================
FILE: runtime/cmake/fastdeploy.cmake
================================================
include(FetchContent)

set(EXTERNAL_PROJECT_LOG_ARGS
    LOG_DOWNLOAD 1 # Wrap download in script to log output
    LOG_UPDATE 1 # Wrap update in script to log output
    LOG_PATCH 1
    LOG_CONFIGURE 1# Wrap configure in script to log output
    LOG_BUILD 1 # Wrap build in script to log output
    LOG_INSTALL 1
    LOG_TEST 1 # Wrap test in script to log output
    LOG_MERGED_STDOUTERR 1
    LOG_OUTPUT_ON_FAILURE 1
)

if(NOT FASTDEPLOY_INSTALL_DIR)
    if(ANDROID)
        FetchContent_Declare(
            fastdeploy
            URL      https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.4-shared.tgz
            URL_HASH MD5=2a15301158e9eb157a4f11283689e7ba
            ${EXTERNAL_PROJECT_LOG_ARGS}
        )
        add_definitions("-DUSE_PADDLE_LITE_BAKEND")
        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
    else() # Linux
        FetchContent_Declare(
            fastdeploy
            URL      https://paddlespeech.cdn.bcebos.com/speechx/fastdeploy/fastdeploy-1.0.5-x86_64-onnx.tar.gz 
            URL_HASH MD5=33900d986ea71aa78635e52f0733227c
            ${EXTERNAL_PROJECT_LOG_ARGS}
        )
        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse -msse2")
        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse -msse2 -mavx -O3")
    endif()

    FetchContent_MakeAvailable(fastdeploy)

    set(FASTDEPLOY_INSTALL_DIR ${fc_patch}/fastdeploy-src)
endif()

include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)

# fix compiler flags conflict, since fastdeploy using c++11 for project
# this line must after `include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)`
set(CMAKE_CXX_STANDARD ${PPS_CXX_STANDARD})

include_directories(${FASTDEPLOY_INCS})

# install fastdeploy and dependents lib
# install_fastdeploy_libraries(${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
# No dynamic libs need to install while using
# FastDeploy static lib.
if(ANDROID AND WITH_ANDROID_STATIC_LIB)
    return()
endif()

set(DYN_LIB_SUFFIX "*.so*")
if(WIN32)
    set(DYN_LIB_SUFFIX "*.dll")
elseif(APPLE)
    set(DYN_LIB_SUFFIX "*.dylib*")
endif()

if(FastDeploy_DIR)
    set(DYN_SEARCH_DIR ${FastDeploy_DIR})
elseif(FASTDEPLOY_INSTALL_DIR)
    set(DYN_SEARCH_DIR ${FASTDEPLOY_INSTALL_DIR})
else()
    message(FATAL_ERROR "Please set FastDeploy_DIR/FASTDEPLOY_INSTALL_DIR before call install_fastdeploy_libraries.")
endif()

file(GLOB_RECURSE ALL_NEED_DYN_LIBS ${DYN_SEARCH_DIR}/lib/${DYN_LIB_SUFFIX})
file(GLOB_RECURSE ALL_DEPS_DYN_LIBS ${DYN_SEARCH_DIR}/third_libs/${DYN_LIB_SUFFIX})

if(ENABLE_VISION)
    # OpenCV
    if(ANDROID)
        file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_NATIVE_DIR}/libs/${DYN_LIB_SUFFIX})
    else()
        file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_DIR}/../../${DYN_LIB_SUFFIX})
    endif()
   
    list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_OPENCV_DYN_LIBS})

    if(WIN32)
        file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/x64/vc15/bin/${DYN_LIB_SUFFIX})
        install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib)
    elseif(ANDROID AND (NOT WITH_ANDROID_OPENCV_STATIC))
        file(GLOB OPENCV_DYN_LIBS ${OpenCV_NATIVE_DIR}/libs/${ANDROID_ABI}/${DYN_LIB_SUFFIX})
        install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib)
    else() # linux/mac
        file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/lib/${DYN_LIB_SUFFIX})
        install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib)
    endif()

    # FlyCV
    if(ENABLE_FLYCV)
        file(GLOB_RECURSE ALL_FLYCV_DYN_LIBS ${FLYCV_LIB_DIR}/${DYN_LIB_SUFFIX})
        list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_FLYCV_DYN_LIBS})
        if(ANDROID AND (NOT WITH_ANDROID_FLYCV_STATIC))
        install(FILES ${ALL_FLYCV_DYN_LIBS} DESTINATION lib)
        endif()
    endif()
endif()

if(ENABLE_OPENVINO_BACKEND)
    # need plugins.xml for openvino backend
    set(OPENVINO_RUNTIME_BIN_DIR ${OPENVINO_DIR}/bin)
    file(GLOB OPENVINO_PLUGIN_XML ${OPENVINO_RUNTIME_BIN_DIR}/*.xml)
    install(FILES ${OPENVINO_PLUGIN_XML} DESTINATION lib)
endif()

# Install other libraries
install(FILES ${ALL_NEED_DYN_LIBS} DESTINATION lib)
install(FILES ${ALL_DEPS_DYN_LIBS} DESTINATION lib)


================================================
FILE: runtime/cmake/gflags.cmake
================================================
include(FetchContent)

FetchContent_Declare(
  gflags
  URL      https://paddleaudio.bj.bcebos.com/build/gflag-2.2.2.zip 
  URL_HASH SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5
)
FetchContent_MakeAvailable(gflags)

# openfst need
include_directories(${gflags_BINARY_DIR}/include)
link_directories(${gflags_BINARY_DIR})

#install(FILES ${gflags_BINARY_DIR}/libgflags_nothreads.a DESTINATION lib)


================================================
FILE: runtime/cmake/glog.cmake
================================================
include(FetchContent)

if(ANDROID)
else() # UNIX
  add_definitions(-DWITH_GLOG)
  FetchContent_Declare(
    glog
    URL      https://paddleaudio.bj.bcebos.com/build/glog-0.4.0.zip
    URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
                    -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
                    -DWITH_GFLAGS=OFF
                    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                    ${EXTERNAL_OPTIONAL_ARGS}
  )
  set(BUILD_TESTING OFF)
  FetchContent_MakeAvailable(glog)
  include_directories(${glog_BINARY_DIR} ${glog_SOURCE_DIR}/src)
endif()


if(ANDROID)
  add_library(extern_glog INTERFACE)
  add_dependencies(extern_glog gflags)
else() # UNIX
  add_library(extern_glog ALIAS glog)
  add_dependencies(glog gflags)
endif()

================================================
FILE: runtime/cmake/gtest.cmake
================================================

include(FetchContent)

if(ANDROID)
else() # UNIX
  FetchContent_Declare(
    gtest
    URL      https://paddleaudio.bj.bcebos.com/build/gtest-release-1.11.0.zip
    URL_HASH SHA256=353571c2440176ded91c2de6d6cd88ddd41401d14692ec1f99e35d013feda55a
  )
  FetchContent_MakeAvailable(gtest)

  include_directories(${gtest_BINARY_DIR} ${gtest_SOURCE_DIR}/src)
endif()


if(ANDROID)
  add_library(extern_gtest INTERFACE)
else() # UNIX
  add_dependencies(gtest gflags extern_glog)
  add_library(extern_gtest ALIAS gtest)
endif()

if(WITH_TESTING)
  enable_testing()
endif()


================================================
FILE: runtime/cmake/kenlm.cmake
================================================
include(FetchContent)
FetchContent_Declare(
  kenlm
  GIT_REPOSITORY "https://github.com/kpu/kenlm.git"
  GIT_TAG "df2d717e95183f79a90b2fa6e4307083a351ca6a"
)
# https://github.com/kpu/kenlm/blob/master/cmake/modules/FindEigen3.cmake
set(EIGEN3_INCLUDE_DIR ${Eigen3_SOURCE_DIR})
FetchContent_MakeAvailable(kenlm)
include_directories(${kenlm_SOURCE_DIR})

================================================
FILE: runtime/cmake/libsndfile.cmake
================================================
include(FetchContent)

# https://github.com/pongasoft/vst-sam-spl-64/blob/master/libsndfile.cmake
# https://github.com/popojan/goban/blob/master/CMakeLists.txt#L38
# https://github.com/ddiakopoulos/libnyquist/blob/master/CMakeLists.txt

if(LIBSNDFILE_ROOT_DIR)
  # instructs FetchContent to not download or update but use the location instead
  set(FETCHCONTENT_SOURCE_DIR_LIBSNDFILE ${LIBSNDFILE_ROOT_DIR})
else()
  set(FETCHCONTENT_SOURCE_DIR_LIBSNDFILE "")
endif()

set(LIBSNDFILE_GIT_REPO "https://github.com/libsndfile/libsndfile.git" CACHE STRING "libsndfile git repository url" FORCE)
set(LIBSNDFILE_GIT_TAG 1.0.31 CACHE STRING "libsndfile git tag" FORCE)

FetchContent_Declare(libsndfile
      GIT_REPOSITORY    ${LIBSNDFILE_GIT_REPO}
      GIT_TAG           ${LIBSNDFILE_GIT_TAG}
      GIT_CONFIG        advice.detachedHead=false
#      GIT_SHALLOW       true
      CONFIGURE_COMMAND ""
      BUILD_COMMAND     ""
      INSTALL_COMMAND   ""
      TEST_COMMAND      ""
      )

FetchContent_GetProperties(libsndfile)
if(NOT libsndfile_POPULATED)
  if(FETCHCONTENT_SOURCE_DIR_LIBSNDFILE)
    message(STATUS "Using libsndfile from local ${FETCHCONTENT_SOURCE_DIR_LIBSNDFILE}")
  else()
    message(STATUS "Fetching libsndfile ${LIBSNDFILE_GIT_REPO}/tree/${LIBSNDFILE_GIT_TAG}")
  endif()
  FetchContent_Populate(libsndfile)
endif()

set(LIBSNDFILE_ROOT_DIR ${libsndfile_SOURCE_DIR})
set(LIBSNDFILE_INCLUDE_DIR "${libsndfile_BINARY_DIR}/src")

function(libsndfile_build)
  option(BUILD_PROGRAMS "Build programs" OFF)
  option(BUILD_EXAMPLES "Build examples" OFF)
  option(BUILD_TESTING "Build examples" OFF)
  option(ENABLE_CPACK "Enable CPack support" OFF)
  option(ENABLE_PACKAGE_CONFIG "Generate and install package config file" OFF)
  option(BUILD_REGTEST "Build regtest" OFF)
  # finally we include libsndfile itself
  add_subdirectory(${libsndfile_SOURCE_DIR} ${libsndfile_BINARY_DIR} EXCLUDE_FROM_ALL)
  # copying .hh for c++ support
  #file(COPY "${libsndfile_SOURCE_DIR}/src/sndfile.hh" DESTINATION ${LIBSNDFILE_INCLUDE_DIR})
endfunction()

libsndfile_build()

include_directories(${LIBSNDFILE_INCLUDE_DIR})

================================================
FILE: runtime/cmake/openblas.cmake
================================================
include(FetchContent)

set(OpenBLAS_SOURCE_DIR ${fc_patch}/openblas-src)
set(OpenBLAS_PREFIX ${fc_patch}/openblas-prefix)

# ######################################################################################################################
# OPENBLAS  https://github.com/lattice/quda/blob/develop/CMakeLists.txt#L575
# ######################################################################################################################
enable_language(Fortran)

include(FortranCInterface)

# # Clang doesn't have a Fortran compiler in its suite (yet),
# # so detect libraries for gfortran; we need equivalents to
# # libgfortran and libquadmath, which are implicitly
# # linked by flags in CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES
# include(FindGFortranLibs REQUIRED)
# # Add directory containing libgfortran and libquadmath to
# # linker. Should also contain libgomp, if not using
# # Intel OpenMP runtime
# link_directories(${GFORTRAN_LIBRARIES_DIR})
# # gfortan dir in the docker.
# link_directories(/usr/local/gcc-8.2/lib64)
# # if you are working with C and Fortran
# FortranCInterface_VERIFY()

# # if you are working with C++ and Fortran
# FortranCInterface_VERIFY(CXX)


#TODO: switch to CPM
include(GNUInstallDirs)
ExternalProject_Add(
    OPENBLAS
    GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
    GIT_TAG v0.3.10
    GIT_SHALLOW YES
    PREFIX ${OpenBLAS_PREFIX}
    SOURCE_DIR  ${OpenBLAS_SOURCE_DIR}
    CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> 
    CMAKE_GENERATOR "Unix Makefiles")


# https://cmake.org/cmake/help/latest/module/ExternalProject.html?highlight=externalproject_get_property#external-project-definition
ExternalProject_Get_Property(OPENBLAS INSTALL_DIR)
message(STATUS "OPENBLAS install dir: ${INSTALL_DIR}")
set(OpenBLAS_INSTALL_PREFIX ${INSTALL_DIR})
add_library(openblas STATIC IMPORTED)
add_dependencies(openblas OPENBLAS)
set_target_properties(openblas PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES Fortran)
# ${CMAKE_INSTALL_LIBDIR}  lib
set_target_properties(openblas PROPERTIES IMPORTED_LOCATION ${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a)


# https://cmake.org/cmake/help/latest/command/install.html?highlight=cmake_install_libdir#installing-targets
# ${CMAKE_INSTALL_LIBDIR}  lib
# ${CMAKE_INSTALL_INCLUDEDIR}  include
link_directories(${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
# include_directories(${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
# fix for can not find `cblas.h`
include_directories(${OpenBLAS_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}/openblas)

================================================
FILE: runtime/cmake/openfst.cmake
================================================
set(openfst_PREFIX_DIR ${fc_patch}/openfst)
set(openfst_SOURCE_DIR ${fc_patch}/openfst-src)
set(openfst_BINARY_DIR ${fc_patch}/openfst-build)

include(FetchContent)
# openfst Acknowledgments:
#Cyril Allauzen, Michael Riley, Johan Schalkwyk, Wojciech Skut and Mehryar Mohri, 
#"OpenFst: A General and Efficient Weighted Finite-State Transducer Library", 
#Proceedings of the Ninth International Conference on Implementation and 
#Application of Automata, (CIAA 2007), volume 4783 of Lecture Notes in 
#Computer Science, pages 11-23. Springer, 2007. http://www.openfst.org.

set(EXTERNAL_PROJECT_LOG_ARGS
    LOG_DOWNLOAD 1 # Wrap download in script to log output
    LOG_UPDATE 1 # Wrap update in script to log output
    LOG_CONFIGURE 1# Wrap configure in script to log output
    LOG_BUILD 1 # Wrap build in script to log output
    LOG_TEST 1 # Wrap test in script to log output
    LOG_INSTALL 1 # Wrap install in script to log output
)

ExternalProject_Add(openfst
  URL               https://paddleaudio.bj.bcebos.com/build/openfst_1.7.2.zip
  URL_HASH          SHA256=ffc56931025579a8af3515741c0f3b0fc3a854c023421472c07ca0c6389c75e6
  ${EXTERNAL_PROJECT_LOG_ARGS}
  PREFIX            ${openfst_PREFIX_DIR} 
  SOURCE_DIR        ${openfst_SOURCE_DIR}
  BINARY_DIR        ${openfst_BINARY_DIR}
  BUILD_ALWAYS      0
  CONFIGURE_COMMAND ${openfst_SOURCE_DIR}/configure --prefix=${openfst_PREFIX_DIR}
                      "CPPFLAGS=-I${gflags_BINARY_DIR}/include -I${glog_SOURCE_DIR}/src -I${glog_BINARY_DIR}"
                      "LDFLAGS=-L${gflags_BINARY_DIR} -L${glog_BINARY_DIR}"
                      "LIBS=-lgflags_nothreads -lglog -lpthread -fPIC"
  COMMAND           ${CMAKE_COMMAND} -E copy_directory ${PROJECT_SOURCE_DIR}/patch/openfst ${openfst_SOURCE_DIR}
  BUILD_COMMAND     make -j 4
)
link_directories(${openfst_PREFIX_DIR}/lib)
include_directories(${openfst_PREFIX_DIR}/include)


message(STATUS "OpenFST inc dir: ${openfst_PREFIX_DIR}/include")
message(STATUS "OpenFST lib dir: ${openfst_PREFIX_DIR}/lib")


================================================
FILE: runtime/cmake/paddleinference.cmake
================================================
set(paddle_SOURCE_DIR ${fc_patch}/paddle-lib)
set(paddle_PREFIX_DIR ${fc_patch}/paddle-lib-prefix)

include(FetchContent)
FetchContent_Declare(
  paddle
  URL      https://paddle-inference-lib.bj.bcebos.com/2.2.2/cxx_c/Linux/CPU/gcc8.2_avx_mkl/paddle_inference.tgz
  URL_HASH SHA256=7c6399e778c6554a929b5a39ba2175e702e115145e8fa690d2af974101d98873
  PREFIX            ${paddle_PREFIX_DIR} 
  SOURCE_DIR        ${paddle_SOURCE_DIR}
  CONFIGURE_COMMAND ""
  BUILD_COMMAND     ""
  INSTALL_COMMAND   ""
)
FetchContent_MakeAvailable(paddle)

set(PADDLE_LIB_THIRD_PARTY_PATH "${paddle_SOURCE_DIR}/third_party/install/")

include_directories("${paddle_SOURCE_DIR}/paddle/include")
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include")
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include")
include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/include")

link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}cryptopp/lib")
link_directories("${paddle_SOURCE_DIR}/paddle/lib")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}mklml/lib")
link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn/lib")

##paddle with mkl
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
set(MATH_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mklml")
include_directories("${MATH_LIB_PATH}/include")
set(MATH_LIB ${MATH_LIB_PATH}/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
                 ${MATH_LIB_PATH}/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})

set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn")
include_directories("${MKLDNN_PATH}/include")
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
set(EXTERNAL_LIB "-lrt -ldl -lpthread")

# global vars
set(DEPS ${paddle_SOURCE_DIR}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX} CACHE INTERNAL "deps")
set(DEPS ${DEPS}
      ${MATH_LIB} ${MKLDNN_LIB}
      glog gflags protobuf xxhash cryptopp
      ${EXTERNAL_LIB} CACHE INTERNAL "deps")
message(STATUS "Deps libraries: ${DEPS}")


================================================
FILE: runtime/cmake/pybind.cmake
================================================
#the pybind11 is from:https://github.com/pybind/pybind11
# Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.

SET(PYBIND_ZIP "v2.10.0.zip")
SET(LOCAL_PYBIND_ZIP ${FETCHCONTENT_BASE_DIR}/${PYBIND_ZIP})
SET(PYBIND_SRC ${FETCHCONTENT_BASE_DIR}/pybind11)
SET(DOWNLOAD_URL "https://paddleaudio.bj.bcebos.com/build/v2.10.0.zip")
SET(PYBIND_TIMEOUT 600 CACHE STRING "Timeout in seconds when downloading pybind.")

IF(NOT EXISTS ${LOCAL_PYBIND_ZIP})
    FILE(DOWNLOAD ${DOWNLOAD_URL}
      ${LOCAL_PYBIND_ZIP}
      TIMEOUT ${PYBIND_TIMEOUT}
      STATUS ERR
      SHOW_PROGRESS
    )

    IF(ERR EQUAL 0)
        MESSAGE(STATUS "download pybind success")
    ELSE()
        MESSAGE(FATAL_ERROR "download pybind fail")
    ENDIF()
ENDIF()

IF(NOT EXISTS ${PYBIND_SRC})
    EXECUTE_PROCESS(
      COMMAND ${CMAKE_COMMAND} -E tar xfz ${LOCAL_PYBIND_ZIP}
       WORKING_DIRECTORY ${FETCHCONTENT_BASE_DIR}
       RESULT_VARIABLE tar_result
    )

    file(RENAME ${FETCHCONTENT_BASE_DIR}/pybind11-2.10.0 ${PYBIND_SRC})

  IF (tar_result MATCHES 0)
      MESSAGE(STATUS "unzip pybind success")
  ELSE()
      MESSAGE(FATAL_ERROR "unzip pybind fail")
  ENDIF()

ENDIF()

include_directories(${PYBIND_SRC}/include)


================================================
FILE: runtime/cmake/summary.cmake
================================================
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

function(pps_summary)
  message(STATUS "")
  message(STATUS "*************PaddleSpeech Building Summary**********")
  message(STATUS "  PPS_VERSION               : ${PPS_VERSION}")
  message(STATUS "  CMake version             : ${CMAKE_VERSION}")
  message(STATUS "  CMake command             : ${CMAKE_COMMAND}")
  message(STATUS "  UNIX                      : ${UNIX}")
  message(STATUS "  ANDROID                   : ${ANDROID}")
  message(STATUS "  System                    : ${CMAKE_SYSTEM_NAME}")
  message(STATUS "  C++ compiler              : ${CMAKE_CXX_COMPILER}")
  message(STATUS "  C++ compiler version      : ${CMAKE_CXX_COMPILER_VERSION}")
  message(STATUS "  CXX flags                 : ${CMAKE_CXX_FLAGS}")
  message(STATUS "  Build type                : ${CMAKE_BUILD_TYPE}")
  message(STATUS "  BUILD_SHARED_LIBS         : ${BUILD_SHARED_LIBS}")
  get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS)
  message(STATUS "  Compile definitions       : ${tmp}")
  message(STATUS "  CMAKE_PREFIX_PATH         : ${CMAKE_PREFIX_PATH}")
  message(STATUS "  CMAKE_CURRENT_BINARY_DIR  : ${CMAKE_CURRENT_BINARY_DIR}")
  message(STATUS "  CMAKE_INSTALL_PREFIX      : ${CMAKE_INSTALL_PREFIX}")
  message(STATUS "  CMAKE_INSTALL_LIBDIR      : ${CMAKE_INSTALL_LIBDIR}")
  message(STATUS "  CMAKE_MODULE_PATH         : ${CMAKE_MODULE_PATH}")
  message(STATUS "  CMAKE_SYSTEM_NAME         : ${CMAKE_SYSTEM_NAME}")
  message(STATUS "")

  message(STATUS "  WITH_ASR                  : ${WITH_ASR}")
  message(STATUS "  WITH_CLS                  : ${WITH_CLS}")
  message(STATUS "  WITH_VAD                  : ${WITH_VAD}")
  message(STATUS "  WITH_GPU                  : ${WITH_GPU}")
  message(STATUS "  WITH_TESTING              : ${WITH_TESTING}")
  message(STATUS "  WITH_PROFILING            : ${WITH_PROFILING}")
  message(STATUS "  FASTDEPLOY_INSTALL_DIR    : ${FASTDEPLOY_INSTALL_DIR}")
  message(STATUS "  FASTDEPLOY_INCS           : ${FASTDEPLOY_INCS}")
  message(STATUS "  FASTDEPLOY_LIBS           : ${FASTDEPLOY_LIBS}")
  if(WITH_GPU)
    message(STATUS "  CUDA_DIRECTORY            : ${CUDA_DIRECTORY}")
  endif()

  if(ANDROID)
    message(STATUS "  ANDROID_ABI               : ${ANDROID_ABI}")
    message(STATUS "  ANDROID_PLATFORM          : ${ANDROID_PLATFORM}")
    message(STATUS "  ANDROID_NDK               : ${ANDROID_NDK}")
    message(STATUS "  ANDROID_NDK_VERSION       : ${CMAKE_ANDROID_NDK_VERSION}")
  endif() 
  if (WITH_ASR)
    message(STATUS "  Python executable         : ${PYTHON_EXECUTABLE}")
    message(STATUS "  Python includes           : ${PYTHON_INCLUDE_DIR}")
  endif()
endfunction()

pps_summary()

================================================
FILE: runtime/cmake/system.cmake
================================================
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Detects the OS and sets appropriate variables.
# CMAKE_SYSTEM_NAME only give us a coarse-grained name of the OS CMake is
# building for, but the host processor name like centos is necessary
# in some scenes to distinguish system for customization.
#
# for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems.

if(UNIX AND NOT APPLE)
  # except apple from nix*Os family
  set(LINUX TRUE)
endif()

if(WIN32)
  set(HOST_SYSTEM "win32")
else()
  if(APPLE)
    set(HOST_SYSTEM "macosx")
    exec_program(
      sw_vers ARGS
      -productVersion
      OUTPUT_VARIABLE HOST_SYSTEM_VERSION)
    string(REGEX MATCH "[0-9]+.[0-9]+" MACOS_VERSION "${HOST_SYSTEM_VERSION}")
    if(NOT DEFINED $ENV{MACOSX_DEPLOYMENT_TARGET})
      # Set cache variable - end user may change this during ccmake or cmake-gui configure.
      set(CMAKE_OSX_DEPLOYMENT_TARGET
          ${MACOS_VERSION}
          CACHE
            STRING
            "Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value."
      )
    endif()
    set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
  else()

    if(EXISTS "/etc/issue")
      file(READ "/etc/issue" LINUX_ISSUE)
      if(LINUX_ISSUE MATCHES "CentOS")
        set(HOST_SYSTEM "centos")
      elseif(LINUX_ISSUE MATCHES "Debian")
        set(HOST_SYSTEM "debian")
      elseif(LINUX_ISSUE MATCHES "Ubuntu")
        set(HOST_SYSTEM "ubuntu")
      elseif(LINUX_ISSUE MATCHES "Red Hat")
        set(HOST_SYSTEM "redhat")
      elseif(LINUX_ISSUE MATCHES "Fedora")
        set(HOST_SYSTEM "fedora")
      endif()

      string(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" HOST_SYSTEM_VERSION
                   "${LINUX_ISSUE}")
    endif()

    if(EXISTS "/etc/redhat-release")
      file(READ "/etc/redhat-release" LINUX_ISSUE)
      if(LINUX_ISSUE MATCHES "CentOS")
        set(HOST_SYSTEM "centos")
      endif()
    endif()

    if(NOT HOST_SYSTEM)
      set(HOST_SYSTEM ${CMAKE_SYSTEM_NAME})
    endif()

  endif()
endif()

# query number of logical cores
cmake_host_system_information(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES)

mark_as_advanced(HOST_SYSTEM CPU_CORES)

message(
  STATUS
    "Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}")
message(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores")

# external dependencies log output
set(EXTERNAL_PROJECT_LOG_ARGS
    LOG_DOWNLOAD
    0 # Wrap download in script to log output
    LOG_UPDATE
    1 # Wrap update in script to log output
    LOG_CONFIGURE
    1 # Wrap configure in script to log output
    LOG_BUILD
    0 # Wrap build in script to log output
    LOG_TEST
    1 # Wrap test in script to log output
    LOG_INSTALL
    0 # Wrap install in script to log output
)

================================================
FILE: runtime/docker/.gitkeep
================================================


================================================
FILE: runtime/engine/CMakeLists.txt
================================================
project(speechx LANGUAGES CXX)

include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common)

add_subdirectory(kaldi)
add_subdirectory(common)

if(WITH_ASR)
    add_subdirectory(asr)
endif()

if(WITH_CLS)
    add_subdirectory(audio_classification)
endif()

if(WITH_VAD)
    add_subdirectory(vad)
endif()

add_subdirectory(codelab)


================================================
FILE: runtime/engine/asr/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

project(ASR LANGUAGES CXX)

include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/server)

add_subdirectory(decoder)
add_subdirectory(recognizer)
add_subdirectory(nnet)
add_subdirectory(server)


================================================
FILE: runtime/engine/asr/decoder/CMakeLists.txt
================================================
set(srcs)
list(APPEND srcs
  ctc_prefix_beam_search_decoder.cc
  ctc_tlg_decoder.cc
)

add_library(decoder STATIC ${srcs})
target_link_libraries(decoder PUBLIC utils fst frontend nnet kaldi-decoder)

# test
set(TEST_BINS 
  ctc_prefix_beam_search_decoder_main
  ctc_tlg_decoder_main
)

foreach(bin_name IN LISTS TEST_BINS)
  add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
  target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
  target_link_libraries(${bin_name} nnet decoder fst utils  gflags glog kaldi-base kaldi-matrix kaldi-util)
  target_compile_options(${bin_name}  PRIVATE ${PADDLE_COMPILE_FLAGS})
  target_include_directories(${bin_name}  PRIVATE ${pybind11_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR})
  target_link_libraries(${bin_name}  ${PYTHON_LIBRARIES} ${PADDLE_LINK_FLAGS} -ldl)
endforeach()


================================================
FILE: runtime/engine/asr/decoder/common.h
================================================
// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"

struct DecoderResult {
    BaseFloat acoustic_score;
    std::vector<int32> words_idx;
    std::vector<std::pair<int32, int32>> time_stamp;
};


namespace ppspeech {

struct WordPiece {
    std::string word;
    int start = -1;
    int end = -1;

    WordPiece(std::string word, int start, int end)
        : word(std::move(word)), start(start), end(end) {}
};

struct DecodeResult {
    float score = -kBaseFloatMax;
    std::string sentence;
    std::vector<WordPiece> word_pieces;

    static bool CompareFunc(const DecodeResult& a, const DecodeResult& b) {
        return a.score > b.score;
    }
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/decoder/ctc_beam_search_opt.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "base/common.h"
#include "util/parse-options.h"

namespace ppspeech {


struct CTCBeamSearchOptions {
    // common
    int blank;
    std::string word_symbol_table;

    // u2
    int first_beam_size;
    int second_beam_size;
    
    CTCBeamSearchOptions()
        : blank(0),
          word_symbol_table("vocab.txt"),
          first_beam_size(10),
          second_beam_size(10) {}

    void Register(kaldi::OptionsItf* opts) {
        std::string module = "CTCBeamSearchOptions: ";
        opts->Register("word_symbol_table", &word_symbol_table, module + "vocab file path.");
        opts->Register("blank", &blank, "blank id, default is 0.");
        opts->Register(
            "first-beam-size", &first_beam_size, module + "first beam size.");
        opts->Register("second-beam-size",
                       &second_beam_size,
                       module + "second beam size.");
    }
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc
================================================
// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
//               2022 Binbin Zhang (binbzha@qq.com)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "decoder/ctc_prefix_beam_search_decoder.h"

#include "base/common.h"
#include "decoder/ctc_beam_search_opt.h"
#include "decoder/ctc_prefix_beam_search_score.h"
#include "utils/math.h"

#ifdef WITH_PROFILING
#include "paddle/fluid/platform/profiler.h"
using paddle::platform::RecordEvent;
using paddle::platform::TracerEventType;
#endif

namespace ppspeech {

CTCPrefixBeamSearch::CTCPrefixBeamSearch(const CTCBeamSearchOptions& opts)
    : opts_(opts) {
    unit_table_ = std::shared_ptr<fst::SymbolTable>(
        fst::SymbolTable::ReadText(opts.word_symbol_table));
    CHECK(unit_table_ != nullptr);

    Reset();
}

void CTCPrefixBeamSearch::Reset() {
    num_frame_decoded_ = 0;

    cur_hyps_.clear();

    hypotheses_.clear();
    likelihood_.clear();
    viterbi_likelihood_.clear();
    times_.clear();
    outputs_.clear();

    // empty hyp with Score
    std::vector<int> empty;
    PrefixScore prefix_score;
    prefix_score.InitEmpty();
    cur_hyps_[empty] = prefix_score;

    outputs_.emplace_back(empty);
    hypotheses_.emplace_back(empty);
    likelihood_.emplace_back(prefix_score.TotalScore());
    times_.emplace_back(empty);
}

void CTCPrefixBeamSearch::InitDecoder() { Reset(); }

void CTCPrefixBeamSearch::AdvanceDecode(
    const std::shared_ptr<kaldi::DecodableInterface>& decodable) {
    double search_cost = 0.0;
    double feat_nnet_cost = 0.0;
    while (1) {
        // forward frame by frame
        kaldi::Timer timer;
        std::vector<kaldi::BaseFloat> frame_prob;
        bool flag = decodable->FrameLikelihood(num_frame_decoded_, &frame_prob);
        feat_nnet_cost += timer.Elapsed();
        if (flag == false) {
            VLOG(2) << "decoder advance decode exit." << frame_prob.size();
            break;
        }

        timer.Reset();
        std::vector<std::vector<kaldi::BaseFloat>> likelihood;
        likelihood.push_back(std::move(frame_prob));
        AdvanceDecoding(likelihood);
        search_cost += timer.Elapsed();

        VLOG(1) << "num_frame_decoded_: " << num_frame_decoded_;
    }
    VLOG(2) << "AdvanceDecode feat + forward  cost: " << feat_nnet_cost
            << " sec.";
    VLOG(2) << "AdvanceDecode search  cost: " << search_cost << " sec.";
}

static bool PrefixScoreCompare(
    const std::pair<std::vector<int>, PrefixScore>& a,
    const std::pair<std::vector<int>, PrefixScore>& b) {
    // log domain
    return a.second.TotalScore() > b.second.TotalScore();
}


void CTCPrefixBeamSearch::AdvanceDecoding(
    const std::vector<std::vector<kaldi::BaseFloat>>& logp) {
#ifdef WITH_PROFILING
    RecordEvent event("CtcPrefixBeamSearch::AdvanceDecoding",
                      TracerEventType::UserDefined,
                      1);
#endif

    if (logp.size() == 0) return;

    int first_beam_size =
        std::min(static_cast<int>(logp[0].size()), opts_.first_beam_size);

    for (int t = 0; t < logp.size(); ++t, ++num_frame_decoded_) {
        const std::vector<kaldi::BaseFloat>& logp_t = logp[t];
        std::unordered_map<std::vector<int>, PrefixScore, PrefixScoreHash>
            next_hyps;

        // 1. first beam prune, only select topk candidates
        std::vector<kaldi::BaseFloat> topk_score;
        std::vector<int32_t> topk_index;
        TopK(logp_t, first_beam_size, &topk_score, &topk_index);
        VLOG(2) << "topk: " << num_frame_decoded_ << " "
                << *std::max_element(logp_t.begin(), logp_t.end()) << " "
                << topk_score[0];
        for (int i = 0; i < topk_score.size(); i++) {
            VLOG(2) << "topk: " << num_frame_decoded_ << " " << topk_score[i];
        }

        // 2. token passing
        for (int i = 0; i < topk_index.size(); ++i) {
            int id = topk_index[i];
            auto prob = topk_score[i];

            for (const auto& it : cur_hyps_) {
                const std::vector<int>& prefix = it.first;
                const PrefixScore& prefix_score = it.second;

                // If prefix doesn't exist in next_hyps, next_hyps[prefix] will
                // insert
                // PrefixScore(-inf, -inf) by default, since the default
                // constructor
                // of PrefixScore will set fields b(blank ending Score) and
                // nb(none blank ending Score) to -inf, respectively.

                if (id == opts_.blank) {
                    // case 0: *a + <blank> => *a, *a<blank> + <blank> => *a,
                    // prefix not
                    // change
                    PrefixScore& next_score = next_hyps[prefix];
                    next_score.b =
                        LogSumExp(next_score.b, prefix_score.Score() + prob);

                    // timestamp, blank is slince, not effact timestamp
                    next_score.v_b = prefix_score.ViterbiScore() + prob;
                    next_score.times_b = prefix_score.Times();

                    // Prefix not changed, copy the context from prefix
                    if (context_graph_ && !next_score.has_context) {
                        next_score.CopyContext(prefix_score);
                        next_score.has_context = true;
                    }

                } else if (!prefix.empty() && id == prefix.back()) {
                    // case 1: *a + a => *a, prefix not changed
                    PrefixScore& next_score1 = next_hyps[prefix];
                    next_score1.nb =
                        LogSumExp(next_score1.nb, prefix_score.nb + prob);

                    // timestamp, non-blank symbol effact timestamp
                    if (next_score1.v_nb < prefix_score.v_nb + prob) {
                        // compute viterbi Score
                        next_score1.v_nb = prefix_score.v_nb + prob;
                        if (next_score1.cur_token_prob < prob) {
                            // store max token prob
                            next_score1.cur_token_prob = prob;
                            // update this timestamp as token appeared here.
                            next_score1.times_nb = prefix_score.times_nb;
                            assert(next_score1.times_nb.size() > 0);
                            next_score1.times_nb.back() = num_frame_decoded_;
                        }
                    }

                    // Prefix not changed, copy the context from prefix
                    if (context_graph_ && !next_score1.has_context) {
                        next_score1.CopyContext(prefix_score);
                        next_score1.has_context = true;
                    }

                    // case 2: *a<blank> + a => *aa, prefix changed.
                    std::vector<int> new_prefix(prefix);
                    new_prefix.emplace_back(id);
                    PrefixScore& next_score2 = next_hyps[new_prefix];
                    next_score2.nb =
                        LogSumExp(next_score2.nb, prefix_score.b + prob);

                    // timestamp, non-blank symbol effact timestamp
                    if (next_score2.v_nb < prefix_score.v_b + prob) {
                        // compute viterbi Score
                        next_score2.v_nb = prefix_score.v_b + prob;
                        // new token added
                        next_score2.cur_token_prob = prob;
                        next_score2.times_nb = prefix_score.times_b;
                        next_score2.times_nb.emplace_back(num_frame_decoded_);
                    }

                    // Prefix changed, calculate the context Score.
                    if (context_graph_ && !next_score2.has_context) {
                        next_score2.UpdateContext(
                            context_graph_, prefix_score, id, prefix.size());
                        next_score2.has_context = true;
                    }

                } else {
                    // id != prefix.back()
                    // case 3: *a + b => *ab, *a<blank> +b => *ab
                    std::vector<int> new_prefix(prefix);
                    new_prefix.emplace_back(id);
                    PrefixScore& next_score = next_hyps[new_prefix];
                    next_score.nb =
                        LogSumExp(next_score.nb, prefix_score.Score() + prob);

                    // timetamp, non-blank symbol effact timestamp
                    if (next_score.v_nb < prefix_score.ViterbiScore() + prob) {
                        next_score.v_nb = prefix_score.ViterbiScore() + prob;

                        next_score.cur_token_prob = prob;
                        next_score.times_nb = prefix_score.Times();
                        next_score.times_nb.emplace_back(num_frame_decoded_);
                    }

                    // Prefix changed, calculate the context Score.
                    if (context_graph_ && !next_score.has_context) {
                        next_score.UpdateContext(
                            context_graph_, prefix_score, id, prefix.size());
                        next_score.has_context = true;
                    }
                }
            }  // end for (const auto& it : cur_hyps_)
        }      // end for (int i = 0; i < topk_index.size(); ++i)

        // 3. second beam prune, only keep top n best paths
        std::vector<std::pair<std::vector<int>, PrefixScore>> arr(
            next_hyps.begin(), next_hyps.end());
        int second_beam_size =
            std::min(static_cast<int>(arr.size()), opts_.second_beam_size);
        std::nth_element(arr.begin(),
                         arr.begin() + second_beam_size,
                         arr.end(),
                         PrefixScoreCompare);
        arr.resize(second_beam_size);
        std::sort(arr.begin(), arr.end(), PrefixScoreCompare);

        // 4. update cur_hyps by next_hyps, and get new result
        UpdateHypotheses(arr);
    }  // end for (int t = 0; t < logp.size(); ++t, ++num_frame_decoded_)
}


void CTCPrefixBeamSearch::UpdateHypotheses(
    const std::vector<std::pair<std::vector<int>, PrefixScore>>& hyps) {
    cur_hyps_.clear();

    outputs_.clear();
    hypotheses_.clear();
    likelihood_.clear();
    viterbi_likelihood_.clear();
    times_.clear();

    for (auto& item : hyps) {
        cur_hyps_[item.first] = item.second;

        UpdateOutputs(item);
        hypotheses_.emplace_back(std::move(item.first));
        likelihood_.emplace_back(item.second.TotalScore());
        viterbi_likelihood_.emplace_back(item.second.ViterbiScore());
        times_.emplace_back(item.second.Times());
    }
}

void CTCPrefixBeamSearch::UpdateOutputs(
    const std::pair<std::vector<int>, PrefixScore>& prefix) {
    const std::vector<int>& input = prefix.first;
    const std::vector<int>& start_boundaries = prefix.second.start_boundaries;
    const std::vector<int>& end_boundaries = prefix.second.end_boundaries;

    // add <context> </context> tag
    std::vector<int> output;
    int s = 0;
    int e = 0;
    for (int i = 0; i < input.size(); ++i) {
        output.emplace_back(input[i]);
    }

    outputs_.emplace_back(output);
}

void CTCPrefixBeamSearch::FinalizeSearch() {
    UpdateFinalContext();

    VLOG(2) << "num_frame_decoded_: " << num_frame_decoded_;
    int cnt = 0;
    for (int i = 0; i < hypotheses_.size(); i++) {
        VLOG(2) << "hyp " << cnt << " len: " << hypotheses_[i].size()
                << " ctc score: " << likelihood_[i];
        for (int j = 0; j < hypotheses_[i].size(); j++) {
            VLOG(2) << hypotheses_[i][j];
        }
    }
}

void CTCPrefixBeamSearch::UpdateFinalContext() {
    if (context_graph_ == nullptr) return;

    CHECK(hypotheses_.size() == cur_hyps_.size());
    CHECK(hypotheses_.size() == likelihood_.size());

    // We should backoff the context Score/state when the context is
    // not fully matched at the last time.
    for (const auto& prefix : hypotheses_) {
        PrefixScore& prefix_score = cur_hyps_[prefix];
        if (prefix_score.context_score != 0) {
            prefix_score.UpdateContext(
                context_graph_, prefix_score, 0, prefix.size());
        }
    }
    std::vector<std::pair<std::vector<int>, PrefixScore>> arr(cur_hyps_.begin(),
                                                              cur_hyps_.end());
    std::sort(arr.begin(), arr.end(), PrefixScoreCompare);

    // Update cur_hyps_ and get new result
    UpdateHypotheses(arr);
}

std::string CTCPrefixBeamSearch::GetBestPath(int index) {
    int n_hyps = Outputs().size();
    CHECK_GT(n_hyps, 0);
    CHECK_LT(index, n_hyps);
    std::vector<int> one = Outputs()[index];
    std::string sentence;
    for (int i = 0; i < one.size(); i++) {
        sentence += unit_table_->Find(one[i]);
    }
    return sentence;
}

std::string CTCPrefixBeamSearch::GetBestPath() { return GetBestPath(0); }

std::vector<std::pair<double, std::string>> CTCPrefixBeamSearch::GetNBestPath(
    int n) {
    int hyps_size = hypotheses_.size();
    CHECK_GT(hyps_size, 0);

    int min_n = n == -1 ? hypotheses_.size() : std::min(n, hyps_size);

    std::vector<std::pair<double, std::string>> n_best;
    n_best.reserve(min_n);

    for (int i = 0; i < min_n; i++) {
        n_best.emplace_back(Likelihood()[i], GetBestPath(i));
    }
    return n_best;
}

std::vector<std::pair<double, std::string>>
CTCPrefixBeamSearch::GetNBestPath() {
    return GetNBestPath(-1);
}

std::string CTCPrefixBeamSearch::GetFinalBestPath() { return GetBestPath(); }

std::string CTCPrefixBeamSearch::GetPartialResult() { return GetBestPath(); }


}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.h
================================================
// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// modified from
// https://github.com/wenet-e2e/wenet/blob/main/runtime/core/decoder/ctc_prefix_beam_search.cc

#pragma once

#include "decoder/ctc_beam_search_opt.h"
#include "decoder/ctc_prefix_beam_search_score.h"
#include "decoder/decoder_itf.h"
#include "fst/symbol-table.h"

namespace ppspeech {
class ContextGraph;
class CTCPrefixBeamSearch : public DecoderBase {
  public:
    CTCPrefixBeamSearch(const CTCBeamSearchOptions& opts);
    ~CTCPrefixBeamSearch() {}

    SearchType Type() const { return SearchType::kPrefixBeamSearch; }

    void InitDecoder() override;

    void Reset() override;

    void AdvanceDecode(
        const std::shared_ptr<kaldi::DecodableInterface>& decodable) override;

    std::string GetFinalBestPath() override;
    std::string GetPartialResult() override;

    void FinalizeSearch();

    const std::shared_ptr<fst::SymbolTable> WordSymbolTable() const override {
        return unit_table_;
    }

    const std::vector<std::vector<int>>& Inputs() const { return hypotheses_; }
    const std::vector<std::vector<int>>& Outputs() const { return outputs_; }
    const std::vector<float>& Likelihood() const { return likelihood_; }
    const std::vector<float>& ViterbiLikelihood() const {
        return viterbi_likelihood_;
    }
    const std::vector<std::vector<int>>& Times() const { return times_; }

  protected:
    std::string GetBestPath() override;
    std::vector<std::pair<double, std::string>> GetNBestPath() override;
    std::vector<std::pair<double, std::string>> GetNBestPath(int n) override;

  private:
    std::string GetBestPath(int index);

    void AdvanceDecoding(
        const std::vector<std::vector<kaldi::BaseFloat>>& logp);

    void UpdateOutputs(const std::pair<std::vector<int>, PrefixScore>& prefix);
    void UpdateHypotheses(
        const std::vector<std::pair<std::vector<int>, PrefixScore>>& prefix);
    void UpdateFinalContext();


  private:
    CTCBeamSearchOptions opts_;
    std::shared_ptr<fst::SymbolTable> unit_table_{nullptr};

    std::unordered_map<std::vector<int>, PrefixScore, PrefixScoreHash>
        cur_hyps_;

    // n-best list and corresponding likelihood, in sorted order
    std::vector<std::vector<int>> hypotheses_;
    std::vector<float> likelihood_;

    std::vector<std::vector<int>> times_;
    std::vector<float> viterbi_likelihood_;

    // Outputs contain the hypotheses_ and tags lik: <context> and </context>
    std::vector<std::vector<int>> outputs_;

    std::shared_ptr<ContextGraph> context_graph_{nullptr};

    DISALLOW_COPY_AND_ASSIGN(CTCPrefixBeamSearch);
};


}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "base/common.h"
#include "decoder/ctc_prefix_beam_search_decoder.h"
#include "frontend/data_cache.h"
#include "fst/symbol-table.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/nnet_producer.h"
#include "nnet/u2_nnet.h"

DEFINE_string(feature_rspecifier, "", "test feature rspecifier");
DEFINE_string(result_wspecifier, "", "test result wspecifier");
DEFINE_string(word_symbol_table, "", "vocab path");

DEFINE_string(model_path, "", "paddle nnet model");

DEFINE_int32(receptive_field_length,
             7,
             "receptive field of two CNN(kernel=3) downsampling module.");
DEFINE_int32(subsampling_rate,
             4,
             "two CNN(kernel=3) module downsampling rate.");

DEFINE_int32(nnet_decoder_chunk, 16, "paddle nnet forward chunk");

using kaldi::BaseFloat;
using kaldi::Matrix;
using std::vector;

// test u2 online decoder by feeding speech feature
int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
    google::InstallFailureSignalHandler();
    FLAGS_logtostderr = 1;

    int32 num_done = 0, num_err = 0;

    CHECK_NE(FLAGS_result_wspecifier, "");
    CHECK_NE(FLAGS_feature_rspecifier, "");
    CHECK_NE(FLAGS_word_symbol_table, "");
    CHECK_NE(FLAGS_model_path, "");
    LOG(INFO) << "model path: " << FLAGS_model_path;
    LOG(INFO) << "Reading vocab table " << FLAGS_word_symbol_table;

    kaldi::SequentialBaseFloatMatrixReader feature_reader(
        FLAGS_feature_rspecifier);
    kaldi::TokenWriter result_writer(FLAGS_result_wspecifier);

    // nnet
    ppspeech::ModelOptions model_opts;
    model_opts.model_path = FLAGS_model_path;
    std::shared_ptr<ppspeech::U2Nnet> nnet =
        std::make_shared<ppspeech::U2Nnet>(model_opts);

    // decodeable
    std::shared_ptr<ppspeech::DataCache> raw_data =
        std::make_shared<ppspeech::DataCache>();
    std::shared_ptr<ppspeech::NnetProducer> nnet_producer =
        std::make_shared<ppspeech::NnetProducer>(nnet, raw_data, 1.0);
    std::shared_ptr<ppspeech::Decodable> decodable =
        std::make_shared<ppspeech::Decodable>(nnet_producer);

    // decoder
    ppspeech::CTCBeamSearchOptions opts;
    opts.blank = 0;
    opts.first_beam_size = 10;
    opts.second_beam_size = 10;
    opts.word_symbol_table = FLAGS_word_symbol_table;
    ppspeech::CTCPrefixBeamSearch decoder(opts);


    int32 chunk_size = FLAGS_receptive_field_length +
                       (FLAGS_nnet_decoder_chunk - 1) * FLAGS_subsampling_rate;
    int32 chunk_stride = FLAGS_subsampling_rate * FLAGS_nnet_decoder_chunk;
    int32 receptive_field_length = FLAGS_receptive_field_length;
    LOG(INFO) << "chunk size (frame): " << chunk_size;
    LOG(INFO) << "chunk stride (frame): " << chunk_stride;
    LOG(INFO) << "receptive field (frame): " << receptive_field_length;

    decoder.InitDecoder();

    kaldi::Timer timer;
    for (; !feature_reader.Done(); feature_reader.Next()) {
        string utt = feature_reader.Key();
        kaldi::Matrix<BaseFloat> feature = feature_reader.Value();

        int nframes = feature.NumRows();
        int feat_dim = feature.NumCols();
        raw_data->SetDim(feat_dim);
        LOG(INFO) << "utt: " << utt;
        LOG(INFO) << "feat shape: " << nframes << ", " << feat_dim;

        raw_data->SetDim(feat_dim);

        int32 ori_feature_len = feature.NumRows();
        int32 num_chunks = feature.NumRows() / chunk_stride + 1;
        LOG(INFO) << "num_chunks: " << num_chunks;

        for (int chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) {
            int32 this_chunk_size = 0;
            if (ori_feature_len > chunk_idx * chunk_stride) {
                this_chunk_size = std::min(
                    ori_feature_len - chunk_idx * chunk_stride, chunk_size);
            }
            if (this_chunk_size < receptive_field_length) {
                LOG(WARNING)
                    << "utt: " << utt << " skip last " << this_chunk_size
                    << " frames, expect is " << receptive_field_length;
                break;
            }


            std::vector<kaldi::BaseFloat> feature_chunk(this_chunk_size *
                                                        feat_dim);
            int32 start = chunk_idx * chunk_stride;
            for (int row_id = 0; row_id < this_chunk_size; ++row_id) {
                kaldi::SubVector<kaldi::BaseFloat> feat_row(feature, start);
                std::memcpy(feature_chunk.data() + row_id * feat_dim,
                            feat_row.Data(),
                            feat_dim * sizeof(kaldi::BaseFloat));
                ++start;
            }

            // feat to frontend pipeline cache
            raw_data->Accept(feature_chunk);

            // send data finish signal
            if (chunk_idx == num_chunks - 1) {
                raw_data->SetFinished();
            }

            // forward nnet
            decoder.AdvanceDecode(decodable);

            LOG(INFO) << "Partial result: " << decoder.GetPartialResult();
        }

        decoder.FinalizeSearch();

        // get 1-best result
        std::string result = decoder.GetFinalBestPath();

        // after process one utt, then reset state.
        decodable->Reset();
        decoder.Reset();

        if (result.empty()) {
            // the TokenWriter can not write empty string.
            ++num_err;
            LOG(INFO) << " the result of " << utt << " is empty";
            continue;
        }

        LOG(INFO) << " the result of " << utt << " is " << result;
        result_writer.Write(utt, result);

        ++num_done;
    }

    double elapsed = timer.Elapsed();
    LOG(INFO) << "Program cost:" << elapsed << " sec";

    LOG(INFO) << "Done " << num_done << " utterances, " << num_err
              << " with errors.";
    return (num_done != 0 ? 0 : 1);
}


================================================
FILE: runtime/engine/asr/decoder/ctc_prefix_beam_search_score.h
================================================
// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// modified from
// https://github.com/wenet-e2e/wenet/blob/main/runtime/core/decoder/ctc_prefix_beam_search.h

#pragma once

#include "base/common.h"
#include "utils/math.h"

namespace ppspeech {

class ContextGraph;

struct PrefixScore {
    // decoding, unit in log scale
    float b = -kBaseFloatMax;   // blank ending score
    float nb = -kBaseFloatMax;  // none-blank ending score

    // decoding score, sum
    float Score() const { return LogSumExp(b, nb); }

    // timestamp, unit in log sclae
    float v_b = -kBaseFloatMax;             // viterbi blank ending score
    float v_nb = -kBaseFloatMax;            // niterbi none-blank ending score
    float cur_token_prob = -kBaseFloatMax;  // prob of current token
    std::vector<int> times_b;               // times of viterbi blank path
    std::vector<int> times_nb;              // times of viterbi non-blank path


    // timestamp score, max
    float ViterbiScore() const { return std::max(v_b, v_nb); }

    // get timestamp
    const std::vector<int>& Times() const {
        return v_b > v_nb ? times_b : times_nb;
    }

    // context state
    bool has_context = false;
    int context_state = 0;
    float context_score = 0;
    std::vector<int> start_boundaries;
    std::vector<int> end_boundaries;


    // decodign score with context bias
    float TotalScore() const { return Score() + context_score; }

    void CopyContext(const PrefixScore& prefix_score) {
        context_state = prefix_score.context_state;
        context_score = prefix_score.context_score;
        start_boundaries = prefix_score.start_boundaries;
        end_boundaries = prefix_score.end_boundaries;
    }

    void UpdateContext(const std::shared_ptr<ContextGraph>& constext_graph,
                       const PrefixScore& prefix_score,
                       int word_id,
                       int prefix_len) {
        CHECK(false);
    }

    void InitEmpty() {
        b = 0.0f;             // log(1)
        nb = -kBaseFloatMax;  // log(0)
        v_b = 0.0f;           // log(1)
        v_nb = 0.0f;          // log(1)
    }
};

struct PrefixScoreHash {
    // https://stackoverflow.com/questions/20511347/a-good-hash-function-for-a-vector
    std::size_t operator()(const std::vector<int>& prefix) const {
        std::size_t seed = prefix.size();
        for (auto& i : prefix) {
            seed ^= i + 0x9e3779b9 + (seed << 6) + (seed >> 2);
        }
        return seed;
    }
};

using PrefixWithScoreType = std::pair<std::vector<int>, PrefixScoreHash>;

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/decoder/ctc_tlg_decoder.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "decoder/ctc_tlg_decoder.h"

namespace ppspeech {

TLGDecoder::TLGDecoder(TLGDecoderOptions opts) : opts_(opts) {
    fst_ = opts.fst_ptr;
    CHECK(fst_ != nullptr);

    CHECK(!opts.word_symbol_table.empty());
    word_symbol_table_.reset(
        fst::SymbolTable::ReadText(opts.word_symbol_table));

    decoder_.reset(new kaldi::LatticeFasterOnlineDecoder(*fst_, opts.opts));

    Reset();
}

void TLGDecoder::Reset() {
    decoder_->InitDecoding();
    hypotheses_.clear();
    likelihood_.clear();
    olabels_.clear();
    times_.clear();

    num_frame_decoded_ = 0;
    return;
}

void TLGDecoder::InitDecoder() { Reset(); }

void TLGDecoder::AdvanceDecode(
    const std::shared_ptr<kaldi::DecodableInterface>& decodable) {
    while (!decodable->IsLastFrame(num_frame_decoded_)) {
        AdvanceDecoding(decodable.get());
    }
}

void TLGDecoder::AdvanceDecoding(kaldi::DecodableInterface* decodable) {
    decoder_->AdvanceDecoding(decodable, 1);
    num_frame_decoded_++;
}


std::string TLGDecoder::GetPartialResult() {
    if (num_frame_decoded_ == 0) {
        // Assertion failed: (this->NumFramesDecoded() > 0 && "You cannot call
        // BestPathEnd if no frames were decoded.")
        return std::string("");
    }
    kaldi::Lattice lat;
    kaldi::LatticeWeight weight;
    std::vector<int> alignment;
    std::vector<int> words_id;
    decoder_->GetBestPath(&lat, false);
    fst::GetLinearSymbolSequence(lat, &alignment, &words_id, &weight);
    std::string words;
    for (int32 idx = 0; idx < words_id.size(); ++idx) {
        std::string word = word_symbol_table_->Find(words_id[idx]);
        words += word;
    }
    return words;
}

void TLGDecoder::FinalizeSearch() {
    decoder_->FinalizeDecoding();
    kaldi::CompactLattice clat;
    decoder_->GetLattice(&clat, true);
    kaldi::Lattice lat, nbest_lat;
    fst::ConvertLattice(clat, &lat);
    fst::ShortestPath(lat, &nbest_lat, opts_.nbest);
    std::vector<kaldi::Lattice> nbest_lats;
    fst::ConvertNbestToVector(nbest_lat, &nbest_lats);

    hypotheses_.clear();
    hypotheses_.reserve(nbest_lats.size());
    likelihood_.clear();
    likelihood_.reserve(nbest_lats.size());
    times_.clear();
    times_.reserve(nbest_lats.size());
    for (auto lat : nbest_lats) {
        kaldi::LatticeWeight weight;
        std::vector<int> hypothese;
        std::vector<int> time;
        std::vector<int> alignment;
        std::vector<int> words_id;
        fst::GetLinearSymbolSequence(lat, &alignment, &words_id, &weight);
        int idx = 0;
        for (; idx < alignment.size() - 1; ++idx) {
            if (alignment[idx] == 0) continue;
            if (alignment[idx] != alignment[idx + 1]) {
                hypothese.push_back(alignment[idx] - 1);
                time.push_back(idx);  // fake time, todo later
            }
        }
        hypothese.push_back(alignment[idx] - 1);
        time.push_back(idx);  // fake time, todo later
        hypotheses_.push_back(hypothese);
        times_.push_back(time);
        olabels_.push_back(words_id);
        likelihood_.push_back(-(weight.Value2() + weight.Value1()));
    }
}

std::string TLGDecoder::GetFinalBestPath() {
    if (num_frame_decoded_ == 0) {
        // Assertion failed: (this->NumFramesDecoded() > 0 && "You cannot call
        // BestPathEnd if no frames were decoded.")
        return std::string("");
    }
    kaldi::Lattice lat;
    kaldi::LatticeWeight weight;
    std::vector<int> alignment;
    std::vector<int> words_id;
    decoder_->GetBestPath(&lat, true);
    fst::GetLinearSymbolSequence(lat, &alignment, &words_id, &weight);
    std::string words;
    for (int32 idx = 0; idx < words_id.size(); ++idx) {
        std::string word = word_symbol_table_->Find(words_id[idx]);
        words += word;
    }
    return words;
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/decoder/ctc_tlg_decoder.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"
#include "decoder/decoder_itf.h"
#include "kaldi/decoder/lattice-faster-online-decoder.h"
#include "util/parse-options.h"
#include "utils/file_utils.h"

DECLARE_string(word_symbol_table);
DECLARE_string(graph_path);
DECLARE_int32(max_active);
DECLARE_double(beam);
DECLARE_double(lattice_beam);
DECLARE_int32(nbest);

namespace ppspeech {

struct TLGDecoderOptions {
    kaldi::LatticeFasterDecoderConfig opts{};
    // todo remove later, add into decode resource
    std::string word_symbol_table;
    std::string fst_path;
    std::shared_ptr<fst::Fst<fst::StdArc>> fst_ptr;
    int nbest;

    TLGDecoderOptions() : word_symbol_table(""), fst_path(""), fst_ptr(nullptr), nbest(10) {}

    static TLGDecoderOptions InitFromFlags() {
        TLGDecoderOptions decoder_opts;
        decoder_opts.word_symbol_table = FLAGS_word_symbol_table;
        decoder_opts.fst_path = FLAGS_graph_path;
        LOG(INFO) << "fst path: " << decoder_opts.fst_path;
        LOG(INFO) << "symbole table: " << decoder_opts.word_symbol_table;

        if (!decoder_opts.fst_path.empty()) {
            CHECK(FileExists(decoder_opts.fst_path));
            decoder_opts.fst_ptr.reset(fst::Fst<fst::StdArc>::Read(FLAGS_graph_path));
        }

        decoder_opts.opts.max_active = FLAGS_max_active;
        decoder_opts.opts.beam = FLAGS_beam;
        decoder_opts.opts.lattice_beam = FLAGS_lattice_beam;
        decoder_opts.nbest = FLAGS_nbest;
        LOG(INFO) << "LatticeFasterDecoder max active: "
                  << decoder_opts.opts.max_active;
        LOG(INFO) << "LatticeFasterDecoder beam: " << decoder_opts.opts.beam;
        LOG(INFO) << "LatticeFasterDecoder lattice_beam: "
                  << decoder_opts.opts.lattice_beam;

        return decoder_opts;
    }
};

class TLGDecoder : public DecoderBase {
  public:
    explicit TLGDecoder(TLGDecoderOptions opts);
    ~TLGDecoder() = default;

    void InitDecoder() override;
    void Reset() override;

    void AdvanceDecode(
        const std::shared_ptr<kaldi::DecodableInterface>& decodable) override;

    void Decode();

    std::string GetFinalBestPath() override;
    std::string GetPartialResult() override;

    const std::shared_ptr<fst::SymbolTable> WordSymbolTable() const override {
        return word_symbol_table_;
    }

    int DecodeLikelihoods(const std::vector<std::vector<BaseFloat>>& probs,
                          const std::vector<std::string>& nbest_words);

    void FinalizeSearch() override;
    const std::vector<std::vector<int>>& Inputs() const override {
        return hypotheses_;
    }
    const std::vector<std::vector<int>>& Outputs() const override {
        return olabels_;
    }  // outputs_; }
    const std::vector<float>& Likelihood() const override {
        return likelihood_;
    }
    const std::vector<std::vector<int>>& Times() const override {
        return times_;
    }

  protected:
    std::string GetBestPath() override {
        CHECK(false);
        return {};
    }
    std::vector<std::pair<double, std::string>> GetNBestPath() override {
        CHECK(false);
        return {};
    }
    std::vector<std::pair<double, std::string>> GetNBestPath(int n) override {
        CHECK(false);
        return {};
    }

  private:
    void AdvanceDecoding(kaldi::DecodableInterface* decodable);

    int num_frame_decoded_;
    std::vector<std::vector<int>> hypotheses_;
    std::vector<std::vector<int>> olabels_;
    std::vector<float> likelihood_;
    std::vector<std::vector<int>> times_;

    std::shared_ptr<kaldi::LatticeFasterOnlineDecoder> decoder_;
    std::shared_ptr<fst::Fst<fst::StdArc>> fst_;
    std::shared_ptr<fst::SymbolTable> word_symbol_table_;
    TLGDecoderOptions opts_;
};


}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/decoder/ctc_tlg_decoder_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// todo refactor, repalce with gtest

#include "base/common.h"
#include "decoder/ctc_tlg_decoder.h"
#include "decoder/param.h"
#include "frontend/data_cache.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/nnet_producer.h"


DEFINE_string(nnet_prob_rspecifier, "", "test feature rspecifier");
DEFINE_string(result_wspecifier, "", "test result wspecifier");


using kaldi::BaseFloat;
using kaldi::Matrix;
using std::vector;

// test TLG decoder by feeding speech feature.
int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
    google::InstallFailureSignalHandler();
    FLAGS_logtostderr = 1;

    kaldi::SequentialBaseFloatMatrixReader nnet_prob_reader(
        FLAGS_nnet_prob_rspecifier);
    kaldi::TokenWriter result_writer(FLAGS_result_wspecifier);

    int32 num_done = 0, num_err = 0;

    ppspeech::TLGDecoderOptions opts =
        ppspeech::TLGDecoderOptions::InitFromFlags();
    opts.opts.beam = 15.0;
    opts.opts.lattice_beam = 7.5;
    ppspeech::TLGDecoder decoder(opts);

    ppspeech::ModelOptions model_opts = ppspeech::ModelOptions::InitFromFlags();

    std::shared_ptr<ppspeech::NnetProducer> nnet_producer =
        std::make_shared<ppspeech::NnetProducer>(nullptr, nullptr, 1.0);
    std::shared_ptr<ppspeech::Decodable> decodable(
        new ppspeech::Decodable(nnet_producer, FLAGS_acoustic_scale));

    decoder.InitDecoder();
    kaldi::Timer timer;

    for (; !nnet_prob_reader.Done(); nnet_prob_reader.Next()) {
        string utt = nnet_prob_reader.Key();
        kaldi::Matrix<BaseFloat> prob = nnet_prob_reader.Value();
        decodable->Acceptlikelihood(prob);
        decoder.AdvanceDecode(decodable);
        std::string result;
        result = decoder.GetFinalBestPath();
        decodable->Reset();
        decoder.Reset();
        if (result.empty()) {
            // the TokenWriter can not write empty string.
            ++num_err;
            KALDI_LOG << " the result of " << utt << " is empty";
            continue;
        }
        KALDI_LOG << " the result of " << utt << " is " << result;
        result_writer.Write(utt, result);
        ++num_done;
    }

    double elapsed = timer.Elapsed();
    KALDI_LOG << " cost:" << elapsed << " s";

    KALDI_LOG << "Done " << num_done << " utterances, " << num_err
              << " with errors.";
    return (num_done != 0 ? 0 : 1);
}


================================================
FILE: runtime/engine/asr/decoder/decoder_itf.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"
#include "fst/symbol-table.h"
#include "kaldi/decoder/decodable-itf.h"

namespace ppspeech {

enum SearchType {
    kPrefixBeamSearch = 0,
    kWfstBeamSearch = 1,
};
class DecoderInterface {
  public:
    virtual ~DecoderInterface() {}

    virtual void InitDecoder() = 0;

    virtual void Reset() = 0;

    // call AdvanceDecoding
    virtual void AdvanceDecode(
        const std::shared_ptr<kaldi::DecodableInterface>& decodable) = 0;

    // call GetBestPath
    virtual std::string GetFinalBestPath() = 0;

    virtual std::string GetPartialResult() = 0;

    virtual const std::shared_ptr<fst::SymbolTable> WordSymbolTable() const = 0;
    virtual void FinalizeSearch() = 0;

    virtual const std::vector<std::vector<int>>& Inputs() const = 0;
    virtual const std::vector<std::vector<int>>& Outputs() const = 0;
    virtual const std::vector<float>& Likelihood() const = 0;
    virtual const std::vector<std::vector<int>>& Times() const = 0;

  protected:
    // virtual void AdvanceDecoding(kaldi::DecodableInterface* decodable) = 0;

    // virtual void Decode() = 0;

    virtual std::string GetBestPath() = 0;

    virtual std::vector<std::pair<double, std::string>> GetNBestPath() = 0;

    virtual std::vector<std::pair<double, std::string>> GetNBestPath(int n) = 0;
};

class DecoderBase : public DecoderInterface {
  protected:
    // start from one
    int NumFrameDecoded() { return num_frame_decoded_ + 1; }

  protected:
    // current decoding frame number, abs_time_step_
    int32 num_frame_decoded_;
};

}  // namespace ppspeech

================================================
FILE: runtime/engine/asr/decoder/param.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"

// feature
DEFINE_bool(use_fbank, false, "False for fbank; or linear feature");
DEFINE_bool(fill_zero,
            false,
            "fill zero at last chunk, when chunk < chunk_size");
// DEFINE_bool(to_float32, true, "audio convert to pcm32. True for linear
// feature, or fbank");
DEFINE_int32(num_bins, 161, "num bins of mel");
DEFINE_string(cmvn_file, "", "read cmvn");

// feature sliding window
DEFINE_int32(receptive_field_length,
             7,
             "receptive field of two CNN(kernel=3) downsampling module.");
DEFINE_int32(subsampling_rate,
             4,
             "two CNN(kernel=3) module downsampling rate.");
DEFINE_int32(nnet_decoder_chunk, 1, "paddle nnet forward chunk");

// nnet
DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model");
#ifdef USE_ONNX
DEFINE_bool(with_onnx_model, false, "True mean the model path is onnx model path");
#endif

// decoder
DEFINE_double(acoustic_scale, 1.0, "acoustic scale");
DEFINE_string(graph_path, "", "decoder graph");
DEFINE_string(word_symbol_table, "", "word symbol table");
DEFINE_int32(max_active, 7500, "max active");
DEFINE_double(beam, 15.0, "decoder beam");
DEFINE_double(lattice_beam, 7.5, "decoder beam");
DEFINE_double(blank_threshold, 0.98, "blank skip threshold");

// DecodeOptions flags
DEFINE_int32(num_left_chunks, -1, "left chunks in decoding");
DEFINE_double(ctc_weight,
              0.5,
              "ctc weight when combining ctc score and rescoring score");
DEFINE_double(rescoring_weight,
              1.0,
              "rescoring weight when combining ctc score and rescoring score");
DEFINE_double(reverse_weight,
              0.3,
              "used for bitransformer rescoring. it must be 0.0 if decoder is"
              "conventional transformer decoder, and only reverse_weight > 0.0"
              "dose the right to left decoder will be calculated and used");
DEFINE_int32(nbest, 10, "nbest for ctc wfst or prefix search");
DEFINE_int32(blank, 0, "blank id in vocab");


================================================
FILE: runtime/engine/asr/nnet/CMakeLists.txt
================================================
set(srcs decodable.cc nnet_producer.cc)

list(APPEND srcs u2_nnet.cc)
if(WITH_ONNX)
    list(APPEND srcs u2_onnx_nnet.cc)
endif()
add_library(nnet STATIC ${srcs})
target_link_libraries(nnet utils)
if(WITH_ONNX)
    target_link_libraries(nnet ${FASTDEPLOY_LIBS})
endif()

target_compile_options(nnet  PUBLIC ${PADDLE_COMPILE_FLAGS})
target_include_directories(nnet  PUBLIC ${pybind11_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR})

# test bin  
#set(bin_name u2_nnet_main)
#add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
#target_compile_options(${bin_name}  PRIVATE ${PADDLE_COMPILE_FLAGS})
#target_include_directories(${bin_name}  PRIVATE ${pybind11_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR})
#target_link_libraries(${bin_name}  ${PYTHON_LIBRARIES} ${PADDLE_LINK_FLAGS})

================================================
FILE: runtime/engine/asr/nnet/decodable.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "nnet/decodable.h"

namespace ppspeech {

using kaldi::BaseFloat;
using kaldi::Matrix;
using kaldi::Vector;
using std::vector;

Decodable::Decodable(const std::shared_ptr<NnetProducer>& nnet_producer,
                     kaldi::BaseFloat acoustic_scale)
    : nnet_producer_(nnet_producer),
      frame_offset_(0),
      frames_ready_(0),
      acoustic_scale_(acoustic_scale) {}

// for debug
void Decodable::Acceptlikelihood(const Matrix<BaseFloat>& likelihood) {
    nnet_producer_->Acceptlikelihood(likelihood);
}

// return the size of frame have computed.
int32 Decodable::NumFramesReady() const { return frames_ready_; }


// frame idx is from 0 to frame_ready_ -1;
bool Decodable::IsLastFrame(int32 frame) {
    EnsureFrameHaveComputed(frame);
    return frame >= frames_ready_;
}

int32 Decodable::NumIndices() const { return 0; }

// the ilable(TokenId) of wfst(TLG) insert <eps>(id = 0) in front of Nnet prob
// id.
int32 Decodable::TokenId2NnetId(int32 token_id) { return token_id - 1; }


bool Decodable::EnsureFrameHaveComputed(int32 frame) {
    // decoding frame
    if (frame >= frames_ready_) {
        return AdvanceChunk();
    }
    return true;
}

bool Decodable::AdvanceChunk() {
    kaldi::Timer timer;
    bool flag = nnet_producer_->Read(&framelikelihood_);
    if (flag == false) return false;
    frame_offset_ = frames_ready_;
    frames_ready_ += 1;
    VLOG(1) << "AdvanceChunk feat + forward cost: " << timer.Elapsed()
            << " sec.";
    return true;
}

bool Decodable::AdvanceChunk(kaldi::Vector<kaldi::BaseFloat>* logprobs,
                             int* vocab_dim) {
    if (AdvanceChunk() == false) {
        return false;
    }

    if (framelikelihood_.empty()) {
        LOG(WARNING) << "No new nnet out in cache.";
        return false;
    }

    size_t dim = framelikelihood_.size();
    logprobs->Resize(framelikelihood_.size());
    std::memcpy(logprobs->Data(),
                framelikelihood_.data(),
                dim * sizeof(kaldi::BaseFloat));
    *vocab_dim = framelikelihood_.size();
    return true;
}

// read one frame likelihood
bool Decodable::FrameLikelihood(int32 frame, vector<BaseFloat>* likelihood) {
    if (EnsureFrameHaveComputed(frame) == false) {
        VLOG(3) << "framelikehood exit.";
        return false;
    }

    CHECK_EQ(1, (frames_ready_ - frame_offset_));
    *likelihood = framelikelihood_;
    return true;
}

BaseFloat Decodable::LogLikelihood(int32 frame, int32 index) {
    if (EnsureFrameHaveComputed(frame) == false) {
        return false;
    }

    CHECK_LE(index, framelikelihood_.size());
    CHECK_LE(frame, frames_ready_);

    // the nnet output is prob ranther than log prob
    // the index - 1, because the ilabel
    BaseFloat logprob = 0.0;
    int32 frame_idx = frame - frame_offset_;
    CHECK_EQ(frame_idx, 0);
    logprob = framelikelihood_[TokenId2NnetId(index)];
    return acoustic_scale_ * logprob;
}

void Decodable::Reset() {
    if (nnet_producer_ != nullptr) nnet_producer_->Reset();
    frame_offset_ = 0;
    frames_ready_ = 0;
    framelikelihood_.clear();
}

void Decodable::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                                   float reverse_weight,
                                   std::vector<float>* rescoring_score) {
    kaldi::Timer timer;
    nnet_producer_->AttentionRescoring(hyps, reverse_weight, rescoring_score);
    VLOG(1) << "Attention Rescoring cost:  " << timer.Elapsed() << " sec.";
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/nnet/decodable.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"
#include "kaldi/decoder/decodable-itf.h"
#include "matrix/kaldi-matrix.h"
#include "nnet/nnet_itf.h"
#include "nnet/nnet_producer.h"

namespace ppspeech {

struct DecodableOpts;

class Decodable : public kaldi::DecodableInterface {
  public:
    explicit Decodable(const std::shared_ptr<NnetProducer>& nnet_producer,
                       kaldi::BaseFloat acoustic_scale = 1.0);

    // nnet logprob output, used by wfst
    virtual kaldi::BaseFloat LogLikelihood(int32 frame, int32 index);

    // nnet output
    virtual bool FrameLikelihood(int32 frame,
                                 std::vector<kaldi::BaseFloat>* likelihood);

    // forward nnet with feats
    bool AdvanceChunk();

    // forward nnet with feats, and get nnet output
    bool AdvanceChunk(kaldi::Vector<kaldi::BaseFloat>* logprobs,
                      int* vocab_dim);

    void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                            float reverse_weight,
                            std::vector<float>* rescoring_score);

    virtual bool IsLastFrame(int32 frame);

    // nnet output dim, e.g. vocab size
    virtual int32 NumIndices() const;

    virtual int32 NumFramesReady() const;

    void Reset();

    bool IsInputFinished() const { return nnet_producer_->IsFinished(); }

    bool EnsureFrameHaveComputed(int32 frame);

    int32 TokenId2NnetId(int32 token_id);

    // for offline test
    void Acceptlikelihood(const kaldi::Matrix<kaldi::BaseFloat>& likelihood);

  private:
    std::shared_ptr<NnetProducer> nnet_producer_;

    // the frame is nnet prob frame rather than audio feature frame
    // nnet frame subsample the feature frame
    // eg: 35 frame features output 8 frame inferences
    int32 frame_offset_;
    int32 frames_ready_;

    // todo: feature frame mismatch with nnet inference frame
    // so use subsampled_frame
    int32 current_log_post_subsampled_offset_;
    int32 num_chunk_computed_;
    std::vector<kaldi::BaseFloat> framelikelihood_;

    kaldi::BaseFloat acoustic_scale_;
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/nnet/nnet_itf.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

#include "base/basic_types.h"
#include "kaldi/base/kaldi-types.h"
#include "kaldi/util/options-itf.h"

DECLARE_int32(subsampling_rate);
DECLARE_string(model_path);
DECLARE_string(param_path);
DECLARE_string(model_input_names);
DECLARE_string(model_output_names);
DECLARE_string(model_cache_names);
DECLARE_string(model_cache_shapes);
#ifdef USE_ONNX
DECLARE_bool(with_onnx_model);
#endif

namespace ppspeech {

struct ModelOptions {
    // common
    int subsample_rate{1};
    bool use_gpu{false};
    std::string model_path;
#ifdef USE_ONNX
    bool with_onnx_model{false};
#endif

    static ModelOptions InitFromFlags() {
        ModelOptions opts;
        opts.subsample_rate = FLAGS_subsampling_rate;
        LOG(INFO) << "subsampling rate: " << opts.subsample_rate;
        opts.model_path = FLAGS_model_path;
        LOG(INFO) << "model path: " << opts.model_path;
#ifdef USE_ONNX
        opts.with_onnx_model = FLAGS_with_onnx_model;
        LOG(INFO) << "with onnx model: " << opts.with_onnx_model;
#endif
        return opts;
    }
};

struct NnetOut {
    // nnet out. maybe logprob or prob. Almost time this is logprob.
    std::vector<kaldi::BaseFloat> logprobs;
    int32 vocab_dim;

    // nnet state. Only using in Attention model.
    std::vector<std::vector<kaldi::BaseFloat>> encoder_outs;

    NnetOut() : logprobs({}), vocab_dim(-1), encoder_outs({}) {}
};


class NnetInterface {
  public:
    virtual ~NnetInterface() {}

    // forward feat with nnet.
    // nnet do not cache feats, feats cached by frontend.
    // nnet cache model state, i.e. encoder_outs, att_cache, cnn_cache,
    // frame_offset.
    virtual void FeedForward(const std::vector<kaldi::BaseFloat>& features,
                             const int32& feature_dim,
                             NnetOut* out) = 0;

    virtual void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                                    float reverse_weight,
                                    std::vector<float>* rescoring_score) = 0;

    // reset nnet state, e.g. nnet_logprob_cache_, offset_, encoder_outs_.
    virtual void Reset() = 0;

    // true, nnet output is logprob; otherwise is prob,
    virtual bool IsLogProb() = 0;

    // using to get encoder outs. e.g. seq2seq with Attention model.
    virtual void EncoderOuts(
        std::vector<std::vector<kaldi::BaseFloat>>* encoder_out) const = 0;
};


class NnetBase : public NnetInterface {
  public:
    int SubsamplingRate() const { return subsampling_rate_; }
    virtual std::shared_ptr<NnetBase> Clone() const = 0;
  protected:
    int subsampling_rate_{1};
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/nnet/nnet_producer.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "nnet/nnet_producer.h"

#include "matrix/kaldi-matrix.h"

namespace ppspeech {

using kaldi::BaseFloat;
using std::vector;

NnetProducer::NnetProducer(std::shared_ptr<NnetBase> nnet,
                           std::shared_ptr<FrontendInterface> frontend,
                           float blank_threshold)
    : nnet_(nnet), frontend_(frontend), blank_threshold_(blank_threshold) {
    Reset();
}

void NnetProducer::Accept(const std::vector<kaldi::BaseFloat>& inputs) {
    frontend_->Accept(inputs);
}

void NnetProducer::Acceptlikelihood(
    const kaldi::Matrix<BaseFloat>& likelihood) {
    std::vector<BaseFloat> prob;
    prob.resize(likelihood.NumCols());
    for (size_t idx = 0; idx < likelihood.NumRows(); ++idx) {
        for (size_t col = 0; col < likelihood.NumCols(); ++col) {
            prob[col] = likelihood(idx, col);
        }
        cache_.push_back(prob);
    }
}

bool NnetProducer::Read(std::vector<kaldi::BaseFloat>* nnet_prob) {
    bool flag = cache_.pop(nnet_prob);
    return flag;
}

bool NnetProducer::Compute() {
    vector<BaseFloat> features;
    if (frontend_ == NULL || frontend_->Read(&features) == false) {
        // no feat or frontend_ not init.
        if (frontend_->IsFinished() == true) {
            finished_ = true;
        }
        return false;
    }
    CHECK_GE(frontend_->Dim(), 0);
    VLOG(1) << "Forward in " << features.size() / frontend_->Dim() << " feats.";

    NnetOut out;
    nnet_->FeedForward(features, frontend_->Dim(), &out);
    int32& vocab_dim = out.vocab_dim;
    size_t nframes = out.logprobs.size() / vocab_dim;
    VLOG(1) << "Forward out " << nframes << " decoder frames.";
    for (size_t idx = 0; idx < nframes; ++idx) {
        std::vector<BaseFloat> logprob(
            out.logprobs.data() + idx * vocab_dim,
            out.logprobs.data() + (idx + 1) * vocab_dim);
        // process blank prob
        float blank_prob = std::exp(logprob[0]);
        if (blank_prob > blank_threshold_) {
            last_frame_logprob_ = logprob;
            is_last_frame_skip_ = true;
            continue;
        } else {
            int cur_max = std::max(logprob.begin(), logprob.end()) - logprob.begin();
            if (cur_max == last_max_elem_ && cur_max != 0 && is_last_frame_skip_) {
                cache_.push_back(last_frame_logprob_);
                last_max_elem_ = cur_max;
            }
            last_max_elem_ = cur_max;
            is_last_frame_skip_ = false; 
            cache_.push_back(logprob);
        }
    }
    return true;
}

void NnetProducer::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                                      float reverse_weight,
                                      std::vector<float>* rescoring_score) {
    nnet_->AttentionRescoring(hyps, reverse_weight, rescoring_score);
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/nnet/nnet_producer.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"
#include "base/safe_queue.h"
#include "frontend/frontend_itf.h"
#include "nnet/nnet_itf.h"

namespace ppspeech {

class NnetProducer {
  public:
    explicit NnetProducer(std::shared_ptr<NnetBase> nnet,
                          std::shared_ptr<FrontendInterface> frontend,
                          float blank_threshold);
    // Feed feats or waves
    void Accept(const std::vector<kaldi::BaseFloat>& inputs);

    void Acceptlikelihood(const kaldi::Matrix<BaseFloat>& likelihood);

    // nnet
    bool Read(std::vector<kaldi::BaseFloat>* nnet_prob);

    bool Empty() const { return cache_.empty(); }

    void SetInputFinished() {
        LOG(INFO) << "set finished";
        frontend_->SetFinished();
    }

    // the compute thread exit
    bool IsFinished() const { 
        return (frontend_->IsFinished() && finished_); 
    }

    ~NnetProducer() {}

    void Reset() {
        if (frontend_ != NULL) frontend_->Reset();
        if (nnet_ != NULL) nnet_->Reset();
        cache_.clear();
        finished_ = false;
    }

    void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                            float reverse_weight,
                            std::vector<float>* rescoring_score);

    bool Compute();
  private:

    std::shared_ptr<FrontendInterface> frontend_;
    std::shared_ptr<NnetBase> nnet_;
    SafeQueue<std::vector<kaldi::BaseFloat>> cache_;
    std::vector<BaseFloat> last_frame_logprob_;
    bool is_last_frame_skip_ = false;
    int last_max_elem_ = -1;
    float blank_threshold_ = 0.0;
    bool finished_;

    DISALLOW_COPY_AND_ASSIGN(NnetProducer);
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/nnet/u2_nnet.cc
================================================
// Copyright 2022 Horizon Robotics. All Rights Reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// modified from
// https://github.com/wenet-e2e/wenet/blob/main/runtime/core/decoder/asr_model.cc

#include "nnet/u2_nnet.h"
#include <type_traits>

#ifdef WITH_PROFILING
#include "paddle/fluid/platform/profiler.h"
using paddle::platform::RecordEvent;
using paddle::platform::TracerEventType;
#endif  // end WITH_PROFILING

namespace ppspeech {


void U2Nnet::LoadModel(const std::string& model_path_w_prefix) {
    paddle::jit::utils::InitKernelSignatureMap();

#ifdef WITH_GPU
    dev_ = phi::GPUPlace();
#else
    dev_ = phi::CPUPlace();
#endif
    paddle::jit::Layer model = paddle::jit::Load(model_path_w_prefix, dev_);
    model_ = std::make_shared<paddle::jit::Layer>(std::move(model));

    subsampling_rate_ = model_->Attribute<int>("subsampling_rate");
    right_context_ = model_->Attribute<int>("right_context");
    sos_ = model_->Attribute<int>("sos_symbol");
    eos_ = model_->Attribute<int>("eos_symbol");
    is_bidecoder_ = model_->Attribute<int>("is_bidirectional_decoder");

    forward_encoder_chunk_ = model_->Function("forward_encoder_chunk");
    forward_attention_decoder_ = model_->Function("forward_attention_decoder");
    ctc_activation_ = model_->Function("ctc_activation");
    CHECK(forward_encoder_chunk_.IsValid());
    CHECK(forward_attention_decoder_.IsValid());
    CHECK(ctc_activation_.IsValid());

    LOG(INFO) << "Paddle Model Info: ";
    LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
    LOG(INFO) << "\tright context " << right_context_;
    LOG(INFO) << "\tsos " << sos_;
    LOG(INFO) << "\teos " << eos_;
    LOG(INFO) << "\tis bidecoder " << is_bidecoder_ << std::endl;

    Warmup();
}

void U2Nnet::Warmup() {
#ifdef WITH_PROFILING
    RecordEvent event("warmup", TracerEventType::UserDefined, 1);
#endif

    {
#ifdef WITH_PROFILING
        RecordEvent event(
            "warmup-encoder-ctc", TracerEventType::UserDefined, 1);
#endif
        int feat_dim = 80;
        int frame_num = 16 * 4 + 3;  // chunk_size * downsample_rate +
                                     // (receptive_field - downsample_rate)
        paddle::Tensor feats = paddle::full(
            {1, frame_num, feat_dim}, 0.12f, paddle::DataType::FLOAT32);
        paddle::Tensor offset = paddle::zeros({1}, paddle::DataType::INT32);
        paddle::Tensor att_cache =
            paddle::zeros({0, 0, 0, 0}, paddle::DataType::FLOAT32);
        paddle::Tensor cnn_cache =
            paddle::zeros({0, 0, 0, 0}, paddle::DataType::FLOAT32);
        std::vector<paddle::Tensor> inputs = {
            feats, offset, /*required_cache_size, */ att_cache, cnn_cache};
        std::vector<paddle::Tensor> outputs = forward_encoder_chunk_(inputs);

        auto chunk_out = outputs[0];
        inputs = std::move(std::vector<paddle::Tensor>({chunk_out}));
        outputs = ctc_activation_(inputs);
    }

    {
#ifdef WITH_PROFILING
        RecordEvent event("warmup-decoder", TracerEventType::UserDefined, 1);
#endif
        auto hyps =
            paddle::full({10, 8}, 10, paddle::DataType::INT64, phi::CPUPlace());
        auto hyps_lens =
            paddle::full({10}, 8, paddle::DataType::INT64, phi::CPUPlace());
        auto encoder_out = paddle::ones(
            {1, 20, 512}, paddle::DataType::FLOAT32, phi::CPUPlace());

        std::vector<paddle::Tensor> inputs{
            hyps, hyps_lens, encoder_out};

        std::vector<paddle::Tensor> outputs =
            forward_attention_decoder_(inputs);
    }

    Reset();
}

U2Nnet::U2Nnet(const ModelOptions& opts) : opts_(opts) {
    LoadModel(opts_.model_path);
}

// shallow copy
U2Nnet::U2Nnet(const U2Nnet& other) {
    // copy meta
    chunk_size_ = other.chunk_size_;
    num_left_chunks_ = other.num_left_chunks_;
    offset_ = other.offset_;

    // copy model ptr
    // model_ = other.model_->Clone();
    // hack, fix later
    #ifdef WITH_GPU
        dev_ = phi::GPUPlace();
    #else
        dev_ = phi::CPUPlace();
    #endif
    paddle::jit::Layer model = paddle::jit::Load(other.opts_.model_path, dev_);
    model_ = std::make_shared<paddle::jit::Layer>(std::move(model));
    ctc_activation_ = model_->Function("ctc_activation");
    subsampling_rate_ = model_->Attribute<int>("subsampling_rate");
    right_context_ = model_->Attribute<int>("right_context");
    sos_ = model_->Attribute<int>("sos_symbol");
    eos_ = model_->Attribute<int>("eos_symbol");
    is_bidecoder_ = model_->Attribute<int>("is_bidirectional_decoder");

    forward_encoder_chunk_ = model_->Function("forward_encoder_chunk");
    forward_attention_decoder_ = model_->Function("forward_attention_decoder");
    ctc_activation_ = model_->Function("ctc_activation");
    CHECK(forward_encoder_chunk_.IsValid());
    CHECK(forward_attention_decoder_.IsValid());
    CHECK(ctc_activation_.IsValid());

    LOG(INFO) << "Paddle Model Info: ";
    LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
    LOG(INFO) << "\tright context " << right_context_;
    LOG(INFO) << "\tsos " << sos_;
    LOG(INFO) << "\teos " << eos_;
    LOG(INFO) << "\tis bidecoder " << is_bidecoder_ << std::endl;


    // ignore inner states
}

std::shared_ptr<NnetBase> U2Nnet::Clone() const {
    auto asr_model = std::make_shared<U2Nnet>(*this);
    // reset inner state for new decoding
    asr_model->Reset();
    return asr_model;
}

void U2Nnet::Reset() {
    offset_ = 0;

    att_cache_ =
        std::move(paddle::zeros({0, 0, 0, 0}, paddle::DataType::FLOAT32));
    cnn_cache_ =
        std::move(paddle::zeros({0, 0, 0, 0}, paddle::DataType::FLOAT32));

    encoder_outs_.clear();
    VLOG(1) << "FeedForward cost: " << cost_time_ << " sec. ";
    VLOG(3) << "u2nnet reset";
}

// Debug API
void U2Nnet::FeedEncoderOuts(const paddle::Tensor& encoder_out) {
    // encoder_out (T,D)
    encoder_outs_.clear();
    encoder_outs_.push_back(encoder_out);
}


void U2Nnet::FeedForward(const std::vector<BaseFloat>& features,
                         const int32& feature_dim,
                         NnetOut* out) {
    kaldi::Timer timer;

    std::vector<kaldi::BaseFloat> ctc_probs;
    ForwardEncoderChunkImpl(
        features, feature_dim, &out->logprobs, &out->vocab_dim);
    float forward_chunk_time = timer.Elapsed();
    VLOG(1) << "FeedForward cost: " << forward_chunk_time << " sec. "
            << features.size() / feature_dim << " frames.";
    cost_time_ += forward_chunk_time;
}


void U2Nnet::ForwardEncoderChunkImpl(
    const std::vector<kaldi::BaseFloat>& chunk_feats,
    const int32& feat_dim,
    std::vector<kaldi::BaseFloat>* out_prob,
    int32* vocab_dim) {
#ifdef WITH_PROFILING
    RecordEvent event(
        "ForwardEncoderChunkImpl", TracerEventType::UserDefined, 1);
#endif

    // 1. splice cached_feature, and chunk_feats
    //  First dimension is B, which is 1.
    // int num_frames = cached_feats_.size() + chunk_feats.size();

    int num_frames = chunk_feats.size() / feat_dim;
    VLOG(3) << "num_frames: " << num_frames;
    VLOG(3) << "feat_dim: " << feat_dim;

    // feats (B=1,T,D)
    paddle::Tensor feats =
        paddle::zeros({1, num_frames, feat_dim}, paddle::DataType::FLOAT32);
    float* feats_ptr = feats.mutable_data<float>();

    // not cache feature in nnet
    CHECK_EQ(cached_feats_.size(), 0);
    CHECK_EQ((std::is_same<float, kaldi::BaseFloat>::value), true);
    std::memcpy(feats_ptr,
                chunk_feats.data(),
                chunk_feats.size() * sizeof(kaldi::BaseFloat));

    VLOG(3) << "feats shape: " << feats.shape()[0] << ", " << feats.shape()[1]
            << ", " << feats.shape()[2];

#ifdef PPS_DEBUG
    {
        std::stringstream path("feat", std::ios_base::app | std::ios_base::out);
        path << offset_;
        std::ofstream feat_fobj(path.str().c_str(), std::ios::out);
        CHECK(feat_fobj.is_open());
        // feat_fobj << feats.shape()[0] << " " << feats.shape()[1] << " "
        //           << feats.shape()[2] << "\n";
        for (int i = 0; i < feats.numel(); i++) {
            feat_fobj << std::setprecision(18) << feats_ptr[i] << " ";
            if ((i + 1) % feat_dim == 0) {
                feat_fobj << "\n";
            }
        }
        feat_fobj << "\n";
    }
#endif

// Endocer chunk forward
#ifdef WITH_GPU
    feats = feats.copy_to(paddle::GPUPlace(), /*blocking*/ false);
    att_cache_ = att_cache_.copy_to(paddle::GPUPlace()), /*blocking*/ false;
    cnn_cache_ = cnn_cache_.copy_to(Paddle::GPUPlace(), /*blocking*/ false);
#endif

    int required_cache_size = num_left_chunks_ * chunk_size_;  // -1 * 16
    // must be scalar, but paddle do not have scalar.
    paddle::Tensor offset = paddle::full({1}, offset_, paddle::DataType::INT32);
    // freeze `required_cache_size` in graph, so not specific it in function
    // call.
    std::vector<paddle::Tensor> inputs = {
        feats, offset, /*required_cache_size, */ att_cache_, cnn_cache_};
    CHECK_EQ(inputs.size(), 4);
    std::vector<paddle::Tensor> outputs = forward_encoder_chunk_(inputs);
    CHECK_EQ(outputs.size(), 3);

#ifdef WITH_GPU
    paddle::Tensor chunk_out = outputs[0].copy_to(paddle::CPUPlace());
    att_cache_ = outputs[1].copy_to(paddle::CPUPlace());
    cnn_cache_ = outputs[2].copy_to(paddle::CPUPlace());
#else
    paddle::Tensor chunk_out = outputs[0];
    att_cache_ = outputs[1];
    cnn_cache_ = outputs[2];
#endif

#ifdef PPS_DEBUG
    {
        std::stringstream path("encoder_logits",
                               std::ios_base::app | std::ios_base::out);
        auto i = offset_ - chunk_out.shape()[1];
        path << std::max(i, 0L);
        std::ofstream logits_fobj(path.str().c_str(), std::ios::out);
        CHECK(logits_fobj.is_open());
        logits_fobj << chunk_out.shape()[0] << " " << chunk_out.shape()[1]
                    << " " << chunk_out.shape()[2] << "\n";
        const float* chunk_out_ptr = chunk_out.data<float>();
        logits_fobj << chunk_out_ptr << std::endl;
        for (int i = 0; i < chunk_out.numel(); i++) {
            logits_fobj << chunk_out_ptr[i] << " ";
        }
        logits_fobj << "\n";
    }
#endif  // end TEST_DEBUG

    // current offset in decoder frame
    // not used in nnet
    offset_ += chunk_out.shape()[1];
    VLOG(2) << "encoder out chunk size: " << chunk_out.shape()[1]
            << " total: " << offset_;


    // collects encoder outs.
    encoder_outs_.push_back(chunk_out);
    VLOG(2) << "encoder_outs_ size: " << encoder_outs_.size();

#ifdef PPS_DEBUG
    {
        std::stringstream path("encoder_logits_list",
                               std::ios_base::app | std::ios_base::out);
        path << offset_ - encoder_outs_[0].shape()[1];
        std::ofstream logits_out_fobj(path.str().c_str(), std::ios::out);
        CHECK(logits_out_fobj.is_open());
        logits_out_fobj << encoder_outs_[0].shape()[0] << " "
                        << encoder_outs_[0].shape()[1] << " "
                        << encoder_outs_[0].shape()[2] << "\n";
        const float* encoder_outs_ptr = encoder_outs_[0].data<float>();
        logits_out_fobj << encoder_outs_ptr << std::endl;
        for (int i = 0; i < encoder_outs_[0].numel(); i++) {
            logits_out_fobj << encoder_outs_ptr[i] << " ";
        }
        logits_out_fobj << "\n";
    }
#endif  // end TEST_DEBUG

#ifdef WITH_GPU

#error "Not implementation."

#else
    // compute ctc_activation == log_softmax
    inputs.clear();
    outputs.clear();
    inputs.push_back(chunk_out);
    CHECK_EQ(inputs.size(), 1);
    outputs = ctc_activation_(inputs);
    CHECK_EQ(outputs.size(), 1);
    paddle::Tensor ctc_log_probs = outputs[0];

#ifdef PPS_DEBUG
    {
        std::stringstream path("encoder_logprob",
                               std::ios_base::app | std::ios_base::out);
        path << offset_ - chunk_out.shape()[1];

        std::ofstream logprob_fobj(path.str().c_str(), std::ios::out);
        CHECK(logprob_fobj.is_open());
        logprob_fobj << ctc_log_probs.shape()[0] << " "
                     << ctc_log_probs.shape()[1] << " "
                     << ctc_log_probs.shape()[2] << "\n";
        const float* logprob_ptr = ctc_log_probs.data<float>();
        for (int i = 0; i < ctc_log_probs.numel(); i++) {
            logprob_fobj << logprob_ptr[i] << " ";
            if ((i + 1) % ctc_log_probs.shape()[2] == 0) {
                logprob_fobj << "\n";
            }
        }
        logprob_fobj << "\n";
    }
#endif  // end TEST_DEBUG

#endif  // end WITH_GPU

    // Copy to output, (B=1,T,D)
    std::vector<int64_t> ctc_log_probs_shape = ctc_log_probs.shape();
    CHECK_EQ(ctc_log_probs_shape.size(), 3);
    int B = ctc_log_probs_shape[0];
    CHECK_EQ(B, 1);
    int T = ctc_log_probs_shape[1];
    int D = ctc_log_probs_shape[2];
    *vocab_dim = D;

    float* ctc_log_probs_ptr = ctc_log_probs.data<float>();

    out_prob->resize(T * D);
    std::memcpy(
        out_prob->data(), ctc_log_probs_ptr, T * D * sizeof(kaldi::BaseFloat));

#ifdef PPS_DEBUG
    {
        std::stringstream path("encoder_logits_list_ctc",
                               std::ios_base::app | std::ios_base::out);
        path << offset_ - encoder_outs_[0].shape()[1];
        std::ofstream logits_out_fobj(path.str().c_str(), std::ios::out);
        CHECK(logits_out_fobj.is_open());
        logits_out_fobj << encoder_outs_[0].shape()[0] << " "
                        << encoder_outs_[0].shape()[1] << " "
                        << encoder_outs_[0].shape()[2] << "\n";
        const float* encoder_outs_ptr = encoder_outs_[0].data<float>();
        logits_out_fobj << encoder_outs_ptr << std::endl;
        for (int i = 0; i < encoder_outs_[0].numel(); i++) {
            logits_out_fobj << encoder_outs_ptr[i] << " ";
        }
        logits_out_fobj << "\n";
    }
#endif  // end TEST_DEBUG

    return;
}

float U2Nnet::ComputePathScore(const paddle::Tensor& prob,
                               const std::vector<int>& hyp,
                               int eos) {
    // sum `hyp` path scores in `prob`
    // prob (1, Umax, V)
    // hyp (U,)
    float score = 0.0f;
    std::vector<int64_t> dims = prob.shape();
    CHECK_EQ(dims.size(), 3);
    VLOG(2) << "prob shape: " << dims[0] << ", " << dims[1] << ", " << dims[2];
    CHECK_EQ(dims[0], 1);
    int vocab_dim = static_cast<int>(dims[2]);

    const float* prob_ptr = prob.data<float>();
    for (size_t i = 0; i < hyp.size(); ++i) {
        const float* row = prob_ptr + i * vocab_dim;
        score += row[hyp[i]];
    }
    const float* row = prob_ptr + hyp.size() * vocab_dim;
    score += row[eos];
    return score;
}


void U2Nnet::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                                float reverse_weight,
                                std::vector<float>* rescoring_score) {
#ifdef WITH_PROFILING
    RecordEvent event("AttentionRescoring", TracerEventType::UserDefined, 1);
#endif
    CHECK(rescoring_score != nullptr);

    int num_hyps = hyps.size();
    rescoring_score->resize(num_hyps, 0.0f);

    if (num_hyps == 0) return;
    VLOG(2) << "num hyps: " << num_hyps;

    if (encoder_outs_.size() == 0) {
        // no encoder outs
        std::cerr << "encoder_outs_.size() is zero. Please check it."
                  << std::endl;
        return;
    }

    // prepare input
    paddle::Tensor hyps_lens =
        paddle::zeros({num_hyps}, paddle::DataType::INT64);
    int64_t* hyps_len_ptr = hyps_lens.mutable_data<int64_t>();
    int max_hyps_len = 0;
    for (size_t i = 0; i < num_hyps; ++i) {
        int len = hyps[i].size() + 1;  // eos
        max_hyps_len = std::max(max_hyps_len, len);
        hyps_len_ptr[i] = static_cast<int64_t>(len);
    }
    VLOG(2) << "max_hyps_len: " << max_hyps_len;

    paddle::Tensor hyps_tensor =
        paddle::full({num_hyps, max_hyps_len}, eos_, paddle::DataType::INT64);
    int64_t* hyps_ptr = hyps_tensor.mutable_data<int64_t>();
    for (size_t i = 0; i < num_hyps; ++i) {
        const std::vector<int>& hyp = hyps[i];
        int64_t* row = hyps_ptr + max_hyps_len * i;
        row[0] = sos_;
        for (size_t j = 0; j < hyp.size(); ++j) {
            row[j + 1] = hyp[j];
        }
    }

#ifdef PPS_DEBUG
    {
        std::stringstream path("encoder_logits_concat",
                               std::ios_base::app | std::ios_base::out);
        for (int j = 0; j < encoder_outs_.size(); j++) {
            path << j;
            std::ofstream logits_out_fobj(path.str().c_str(), std::ios::out);
            CHECK(logits_out_fobj.is_open());
            logits_out_fobj << encoder_outs_[j].shape()[0] << " "
                            << encoder_outs_[j].shape()[1] << " "
                            << encoder_outs_[j].shape()[2] << "\n";
            const float* encoder_outs_ptr = encoder_outs_[j].data<float>();
            for (int i = 0; i < encoder_outs_[j].numel(); i++) {
                logits_out_fobj << encoder_outs_ptr[i] << " ";
            }
            logits_out_fobj << "\n";
        }
    }
#endif  // end TEST_DEBUG

    // forward attention decoder by hyps and correspoinding encoder_outs_
    paddle::Tensor encoder_out = paddle::concat(encoder_outs_, 1);
    VLOG(2) << "encoder_outs_ size: " << encoder_outs_.size();

#ifdef PPS_DEBUG
    {
        std::stringstream path("encoder_out0",
                               std::ios_base::app | std::ios_base::out);
        std::ofstream encoder_out_fobj(path.str().c_str(), std::ios::out);
        CHECK(encoder_out_fobj.is_open());

        encoder_out_fobj << encoder_outs_[0].shape()[0] << " "
                         << encoder_outs_[0].shape()[1] << " "
                         << encoder_outs_[0].shape()[2] << "\n";
        const float* enc_logprob_ptr = encoder_outs_[0].data<float>();

        size_t size = encoder_outs_[0].numel();
        for (int i = 0; i < size; i++) {
            encoder_out_fobj << enc_logprob_ptr[i] << "\n";
        }
    }
#endif  // end TEST_DEBUG

#ifdef PPS_DEBUG
    {
        std::stringstream path("encoder_out",
                               std::ios_base::app | std::ios_base::out);
        std::ofstream encoder_out_fobj(path.str().c_str(), std::ios::out);
        CHECK(encoder_out_fobj.is_open());

        encoder_out_fobj << encoder_out.shape()[0] << " "
                         << encoder_out.shape()[1] << " "
                         << encoder_out.shape()[2] << "\n";
        const float* enc_logprob_ptr = encoder_out.data<float>();

        size_t size = encoder_out.numel();
        for (int i = 0; i < size; i++) {
            encoder_out_fobj << enc_logprob_ptr[i] << "\n";
        }
    }
#endif  // end TEST_DEBUG

    std::vector<paddle::Tensor> inputs{
        hyps_tensor, hyps_lens, encoder_out};
    std::vector<paddle::Tensor> outputs = forward_attention_decoder_(inputs);
    CHECK_EQ(outputs.size(), 2);

    // (B, Umax, V)
    paddle::Tensor probs = outputs[0];
    std::vector<int64_t> probs_shape = probs.shape();
    CHECK_EQ(probs_shape.size(), 3);
    CHECK_EQ(probs_shape[0], num_hyps);
    CHECK_EQ(probs_shape[1], max_hyps_len);

#ifdef PPS_DEBUG
    {
        std::stringstream path("decoder_logprob",
                               std::ios_base::app | std::ios_base::out);
        std::ofstream dec_logprob_fobj(path.str().c_str(), std::ios::out);
        CHECK(dec_logprob_fobj.is_open());

        dec_logprob_fobj << probs.shape()[0] << " " << probs.shape()[1] << " "
                         << probs.shape()[2] << "\n";
        const float* dec_logprob_ptr = probs.data<float>();

        size_t size = probs.numel();
        for (int i = 0; i < size; i++) {
            dec_logprob_fobj << dec_logprob_ptr[i] << "\n";
        }
    }
#endif  // end TEST_DEBUG

#ifdef PPS_DEBUG
    {
        std::stringstream path("hyps_lens",
                               std::ios_base::app | std::ios_base::out);
        std::ofstream hyps_len_fobj(path.str().c_str(), std::ios::out);
        CHECK(hyps_len_fobj.is_open());

        const int64_t* hyps_lens_ptr = hyps_lens.data<int64_t>();

        size_t size = hyps_lens.numel();
        for (int i = 0; i < size; i++) {
            hyps_len_fobj << hyps_lens_ptr[i] << "\n";
        }
    }
#endif  // end TEST_DEBUG

#ifdef PPS_DEBUG
    {
        std::stringstream path("hyps_tensor",
                               std::ios_base::app | std::ios_base::out);
        std::ofstream hyps_tensor_fobj(path.str().c_str(), std::ios::out);
        CHECK(hyps_tensor_fobj.is_open());

        const int64_t* hyps_tensor_ptr = hyps_tensor.data<int64_t>();

        size_t size = hyps_tensor.numel();
        for (int i = 0; i < size; i++) {
            hyps_tensor_fobj << hyps_tensor_ptr[i] << "\n";
        }
    }
#endif  // end TEST_DEBUG

    paddle::Tensor r_probs = outputs[1];
    std::vector<int64_t> r_probs_shape = r_probs.shape();
    if (is_bidecoder_ && reverse_weight > 0) {
        CHECK_EQ(r_probs_shape.size(), 3);
        CHECK_EQ(r_probs_shape[0], num_hyps);
        CHECK_EQ(r_probs_shape[1], max_hyps_len);
    } else {
        // dump r_probs
        CHECK_EQ(r_probs_shape.size(), 1);
        //CHECK_EQ(r_probs_shape[0], 1) << r_probs_shape[0];
    }

    // compute rescoring score
    using IntArray = paddle::experimental::IntArray;
    std::vector<paddle::Tensor> probs_v =
        paddle::experimental::split_with_num(probs, num_hyps, 0);
    VLOG(2) << "split prob: " << probs_v.size() << " "
            << probs_v[0].shape().size() << " 0: " << probs_v[0].shape()[0]
            << ", " << probs_v[0].shape()[1] << ", " << probs_v[0].shape()[2];
    //CHECK(static_cast<int>(probs_v.size()) == num_hyps)
     //   << ": is " << probs_v.size() << " expect: " << num_hyps;

    std::vector<paddle::Tensor> r_probs_v;
    if (is_bidecoder_ && reverse_weight > 0) {
        r_probs_v = paddle::experimental::split_with_num(r_probs, num_hyps, 0);
        //CHECK(static_cast<int>(r_probs_v.size()) == num_hyps)
         //   << "r_probs_v size: is " << r_probs_v.size()
          //  << " expect: " << num_hyps;
    }

    for (int i = 0; i < num_hyps; ++i) {
        const std::vector<int>& hyp = hyps[i];

        // left-to-right decoder score
        float score = 0.0f;
        score = ComputePathScore(probs_v[i], hyp, eos_);

        // right-to-left decoder score
        float r_score = 0.0f;
        if (is_bidecoder_ && reverse_weight > 0) {
            std::vector<int> r_hyp(hyp.size());
            std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
            r_score = ComputePathScore(r_probs_v[i], r_hyp, eos_);
        }

        // combinded left-to-right and right-to-lfet score
        (*rescoring_score)[i] =
            score * (1 - reverse_weight) + r_score * reverse_weight;
        VLOG(3) << "hyp " << i << " " << hyp.size() << " score: " << score
                << " r_score: " << r_score
                << " reverse_weight: " << reverse_weight
                << " final score: " << (*rescoring_score)[i];
    }
}


void U2Nnet::EncoderOuts(
    std::vector<std::vector<kaldi::BaseFloat>>* encoder_out) const {
    // list of (B=1,T,D)
    int size = encoder_outs_.size();
    VLOG(3) << "encoder_outs_ size: " << size;

    for (int i = 0; i < size; i++) {
        const paddle::Tensor& item = encoder_outs_[i];
        const std::vector<int64_t> shape = item.shape();
        CHECK_EQ(shape.size(), 3);
        const int& B = shape[0];
        const int& T = shape[1];
        const int& D = shape[2];
        //CHECK(B == 1) << "Only support batch one.";
        VLOG(3) << "encoder out " << i << " shape: (" << B << "," << T << ","
                << D << ")";

        const float* this_tensor_ptr = item.data<float>();
        for (int j = 0; j < T; j++) {
            const float* cur = this_tensor_ptr + j * D;
            std::vector<kaldi::BaseFloat> out(D);
            std::memcpy(out.data(), cur, D * sizeof(kaldi::BaseFloat));
            encoder_out->emplace_back(out);
        }
    }
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/nnet/u2_nnet.h
================================================
// Copyright 2022 Horizon Robotics. All Rights Reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// modified from
// https://github.com/wenet-e2e/wenet/blob/main/runtime/core/decoder/asr_model.h
#pragma once

#include "base/common.h"
#include "matrix/kaldi-matrix.h"
#include "nnet/nnet_itf.h"
#include "paddle/extension.h"
#include "paddle/jit/all.h"
#include "paddle/phi/api/all.h"

namespace ppspeech {


class U2NnetBase : public NnetBase {
  public:
    virtual int Context() const { return right_context_ + 1; }
    virtual int RightContext() const { return right_context_; }

    virtual int EOS() const { return eos_; }
    virtual int SOS() const { return sos_; }
    virtual int IsBidecoder() const { return is_bidecoder_; }
    // current offset in decoder frame
    virtual int Offset() const { return offset_; }
    virtual void SetChunkSize(int chunk_size) { chunk_size_ = chunk_size; }
    virtual void SetNumLeftChunks(int num_left_chunks) {
        num_left_chunks_ = num_left_chunks;
    }

    virtual std::shared_ptr<NnetBase> Clone() const = 0;

  protected:
    virtual void ForwardEncoderChunkImpl(
        const std::vector<kaldi::BaseFloat>& chunk_feats,
        const int32& feat_dim,
        std::vector<kaldi::BaseFloat>* ctc_probs,
        int32* vocab_dim) = 0;

  protected:
    // model specification
    int right_context_{0};

    int sos_{0};
    int eos_{0};

    bool is_bidecoder_{false};

    int chunk_size_{16};  // num of decoder frames. If chunk_size > 0, streaming
                          // case. Otherwise, none streaming case
    int num_left_chunks_{-1};  // -1 means all left chunks

    // asr decoder state, not used in nnet
    int offset_{0};  // current offset in encoder output time stamp. Used by
                     // position embedding.
    std::vector<std::vector<float>> cached_feats_{};  // features cache
};


class U2Nnet : public U2NnetBase {
  public:
    explicit U2Nnet(const ModelOptions& opts);
    U2Nnet(const U2Nnet& other);

    void FeedForward(const std::vector<kaldi::BaseFloat>& features,
                     const int32& feature_dim,
                     NnetOut* out) override;

    void Reset() override;

    bool IsLogProb() override { return true; }

    void Dim();

    void LoadModel(const std::string& model_path_w_prefix);
    void Warmup();

    std::shared_ptr<paddle::jit::Layer> model() const { return model_; }

    std::shared_ptr<NnetBase> Clone() const override;

    void ForwardEncoderChunkImpl(
        const std::vector<kaldi::BaseFloat>& chunk_feats,
        const int32& feat_dim,
        std::vector<kaldi::BaseFloat>* ctc_probs,
        int32* vocab_dim) override;

    float ComputePathScore(const paddle::Tensor& prob,
                           const std::vector<int>& hyp,
                           int eos);

    void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                            float reverse_weight,
                            std::vector<float>* rescoring_score) override;

    // debug
    void FeedEncoderOuts(const paddle::Tensor& encoder_out);

    void EncoderOuts(
        std::vector<std::vector<kaldi::BaseFloat>>* encoder_out) const;

    ModelOptions opts_; // hack, fix later
  private:

    phi::Place dev_;
    std::shared_ptr<paddle::jit::Layer> model_{nullptr};
    std::vector<paddle::Tensor> encoder_outs_;
    // transformer/conformer attention cache
    paddle::Tensor att_cache_ = paddle::full({0, 0, 0, 0}, 0.0);
    // conformer-only conv_module cache
    paddle::Tensor cnn_cache_ = paddle::full({0, 0, 0, 0}, 0.0);

    paddle::jit::Function forward_encoder_chunk_;
    paddle::jit::Function forward_attention_decoder_;
    paddle::jit::Function ctc_activation_;
    float cost_time_ = 0.0;
};

}  // namespace ppspeech

================================================
FILE: runtime/engine/asr/nnet/u2_nnet_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "base/common.h"
#include "decoder/param.h"
#include "frontend/assembler.h"
#include "frontend/data_cache.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/u2_nnet.h"


DEFINE_string(feature_rspecifier, "", "test feature rspecifier");
DEFINE_string(nnet_prob_wspecifier, "", "nnet porb wspecifier");
DEFINE_string(nnet_encoder_outs_wspecifier, "", "nnet encoder outs wspecifier");

using kaldi::BaseFloat;
using kaldi::Matrix;
using std::vector;

int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
    google::InstallFailureSignalHandler();
    FLAGS_logtostderr = 1;

    int32 num_done = 0, num_err = 0;

    CHECK_GT(FLAGS_feature_rspecifier.size(), 0);
    CHECK_GT(FLAGS_nnet_prob_wspecifier.size(), 0);
    CHECK_GT(FLAGS_model_path.size(), 0);
    LOG(INFO) << "input rspecifier: " << FLAGS_feature_rspecifier;
    LOG(INFO) << "output wspecifier: " << FLAGS_nnet_prob_wspecifier;
    LOG(INFO) << "model path: " << FLAGS_model_path;

    kaldi::SequentialBaseFloatMatrixReader feature_reader(
        FLAGS_feature_rspecifier);
    kaldi::BaseFloatMatrixWriter nnet_out_writer(FLAGS_nnet_prob_wspecifier);
    kaldi::BaseFloatMatrixWriter nnet_encoder_outs_writer(
        FLAGS_nnet_encoder_outs_wspecifier);

    ppspeech::ModelOptions model_opts = ppspeech::ModelOptions::InitFromFlags();

    int32 chunk_size = (FLAGS_nnet_decoder_chunk - 1) * FLAGS_subsampling_rate +
                       FLAGS_receptive_field_length;
    int32 chunk_stride = FLAGS_subsampling_rate * FLAGS_nnet_decoder_chunk;
    int32 receptive_field_length = FLAGS_receptive_field_length;
    LOG(INFO) << "chunk size (frame): " << chunk_size;
    LOG(INFO) << "chunk stride (frame): " << chunk_stride;
    LOG(INFO) << "receptive field (frame): " << receptive_field_length;

    std::shared_ptr<ppspeech::U2Nnet> nnet(new ppspeech::U2Nnet(model_opts));
    std::shared_ptr<ppspeech::DataCache> raw_data(new ppspeech::DataCache());
    std::shared_ptr<ppspeech::Decodable> decodable(
        new ppspeech::Decodable(nnet, raw_data, FLAGS_acoustic_scale));
    kaldi::Timer timer;

    for (; !feature_reader.Done(); feature_reader.Next()) {
        string utt = feature_reader.Key();
        kaldi::Matrix<BaseFloat> feature = feature_reader.Value();

        int nframes = feature.NumRows();
        int feat_dim = feature.NumCols();
        raw_data->SetDim(feat_dim);
        LOG(INFO) << "utt: " << utt;
        LOG(INFO) << "feat shape: " << nframes << ", " << feat_dim;

        int32 frame_idx = 0;
        int vocab_dim = 0;
        std::vector<kaldi::Vector<kaldi::BaseFloat>> prob_vec;
        std::vector<kaldi::Vector<kaldi::BaseFloat>> encoder_out_vec;
        int32 ori_feature_len = feature.NumRows();
        int32 num_chunks = feature.NumRows() / chunk_stride + 1;
        LOG(INFO) << "num_chunks: " << num_chunks;

        for (int chunk_idx = 0; chunk_idx < num_chunks; ++chunk_idx) {
            int32 this_chunk_size = 0;
            if (ori_feature_len > chunk_idx * chunk_stride) {
                this_chunk_size = std::min(
                    ori_feature_len - chunk_idx * chunk_stride, chunk_size);
            }
            if (this_chunk_size < receptive_field_length) {
                LOG(WARNING)
                    << "utt: " << utt << " skip last " << this_chunk_size
                    << " frames, expect is " << receptive_field_length;
                break;
            }

            kaldi::Vector<kaldi::BaseFloat> feature_chunk(this_chunk_size *
                                                          feat_dim);
            int32 start = chunk_idx * chunk_stride;
            for (int row_id = 0; row_id < this_chunk_size; ++row_id) {
                kaldi::SubVector<kaldi::BaseFloat> feat_row(feature, start);
                kaldi::SubVector<kaldi::BaseFloat> feature_chunk_row(
                    feature_chunk.Data() + row_id * feat_dim, feat_dim);

                feature_chunk_row.CopyFromVec(feat_row);
                ++start;
            }

            // feat to frontend pipeline cache
            raw_data->Accept(feature_chunk);

            // send data finish signal
            if (chunk_idx == num_chunks - 1) {
                raw_data->SetFinished();
            }

            // get nnet outputs
            kaldi::Timer timer;
            kaldi::Vector<kaldi::BaseFloat> logprobs;
            bool isok = decodable->AdvanceChunk(&logprobs, &vocab_dim);
            CHECK(isok == true);
            for (int row_idx = 0; row_idx < logprobs.Dim() / vocab_dim;
                 row_idx++) {
                kaldi::Vector<kaldi::BaseFloat> vec_tmp(vocab_dim);
                std::memcpy(vec_tmp.Data(),
                            logprobs.Data() + row_idx * vocab_dim,
                            sizeof(kaldi::BaseFloat) * vocab_dim);
                prob_vec.push_back(vec_tmp);
            }

            VLOG(2) << "frame_idx: " << frame_idx
                    << " elapsed: " << timer.Elapsed() << " sec.";
        }

        // get encoder out
        decodable->Nnet()->EncoderOuts(&encoder_out_vec);

        // after process one utt, then reset decoder state.
        decodable->Reset();

        if (prob_vec.size() == 0 || encoder_out_vec.size() == 0) {
            // the TokenWriter can not write empty string.
            ++num_err;
            LOG(WARNING) << " the nnet prob/encoder_out of " << utt
                         << " is empty";
            continue;
        }

        {
            // writer nnet output
            kaldi::MatrixIndexT nrow = prob_vec.size();
            kaldi::MatrixIndexT ncol = prob_vec[0].Dim();
            LOG(INFO) << "nnet out shape: " << nrow << ", " << ncol;
            kaldi::Matrix<kaldi::BaseFloat> nnet_out(nrow, ncol);
            for (int32 row_idx = 0; row_idx < nrow; ++row_idx) {
                for (int32 col_idx = 0; col_idx < ncol; ++col_idx) {
                    nnet_out(row_idx, col_idx) = prob_vec[row_idx](col_idx);
                }
            }
            nnet_out_writer.Write(utt, nnet_out);
        }


        {
            // writer nnet encoder outs
            kaldi::MatrixIndexT nrow = encoder_out_vec.size();
            kaldi::MatrixIndexT ncol = encoder_out_vec[0].Dim();
            LOG(INFO) << "nnet encoder outs shape: " << nrow << ", " << ncol;
            kaldi::Matrix<kaldi::BaseFloat> encoder_outs(nrow, ncol);
            for (int32 row_idx = 0; row_idx < nrow; ++row_idx) {
                for (int32 col_idx = 0; col_idx < ncol; ++col_idx) {
                    encoder_outs(row_idx, col_idx) =
                        encoder_out_vec[row_idx](col_idx);
                }
            }
            nnet_encoder_outs_writer.Write(utt, encoder_outs);
        }

        ++num_done;
    }


    double elapsed = timer.Elapsed();
    LOG(INFO) << "Program cost:" << elapsed << " sec";

    LOG(INFO) << "Done " << num_done << " utterances, " << num_err
              << " with errors.";
    return (num_done != 0 ? 0 : 1);
}


================================================
FILE: runtime/engine/asr/nnet/u2_nnet_thread_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef USE_ONNX
    #include "nnet/u2_nnet.h"
#else
    #include "nnet/u2_onnx_nnet.h"
#endif
#include "base/common.h"
#include "decoder/param.h"
#include "frontend/feature_pipeline.h"
#include "frontend/wave-reader.h"
#include "kaldi/util/table-types.h"
#include "nnet/decodable.h"
#include "nnet/nnet_producer.h"
#include "nnet/u2_nnet.h"

DEFINE_string(wav_rspecifier, "", "test wav rspecifier");
DEFINE_string(nnet_prob_wspecifier, "", "nnet porb wspecifier");
DEFINE_double(streaming_chunk, 0.36, "streaming feature chunk size");
DEFINE_int32(sample_rate, 16000, "sample rate");

using kaldi::BaseFloat;
using kaldi::Matrix;
using std::vector;

int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
    google::InstallFailureSignalHandler();
    FLAGS_logtostderr = 1;

    int32 num_done = 0, num_err = 0;
    int sample_rate = FLAGS_sample_rate;
    float streaming_chunk = FLAGS_streaming_chunk;
    int chunk_sample_size = streaming_chunk * sample_rate;

    CHECK_GT(FLAGS_wav_rspecifier.size(), 0);
    CHECK_GT(FLAGS_nnet_prob_wspecifier.size(), 0);
    CHECK_GT(FLAGS_model_path.size(), 0);
    LOG(INFO) << "input rspecifier: " << FLAGS_wav_rspecifier;
    LOG(INFO) << "output wspecifier: " << FLAGS_nnet_prob_wspecifier;
    LOG(INFO) << "model path: " << FLAGS_model_path;

    kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
        FLAGS_wav_rspecifier);
    kaldi::BaseFloatMatrixWriter nnet_out_writer(FLAGS_nnet_prob_wspecifier);

    ppspeech::ModelOptions model_opts = ppspeech::ModelOptions::InitFromFlags();
    ppspeech::FeaturePipelineOptions feature_opts =
        ppspeech::FeaturePipelineOptions::InitFromFlags();
    feature_opts.assembler_opts.fill_zero = false;

#ifndef USE_ONNX
    std::shared_ptr<ppspeech::U2Nnet> nnet(new ppspeech::U2Nnet(model_opts));
#else
    std::shared_ptr<ppspeech::U2OnnxNnet> nnet(new ppspeech::U2OnnxNnet(model_opts));
#endif
    std::shared_ptr<ppspeech::FeaturePipeline> feature_pipeline(
        new ppspeech::FeaturePipeline(feature_opts));
    std::shared_ptr<ppspeech::NnetProducer> nnet_producer(
        new ppspeech::NnetProducer(nnet, feature_pipeline));
    kaldi::Timer timer;
    float tot_wav_duration = 0;

    for (; !wav_reader.Done(); wav_reader.Next()) {
        std::string utt = wav_reader.Key();
        const kaldi::WaveData& wave_data = wav_reader.Value();
        LOG(INFO) << "utt: " << utt;
        LOG(INFO) << "wav dur: " << wave_data.Duration() << " sec.";
        double dur = wave_data.Duration();
        tot_wav_duration += dur;

        int32 this_channel = 0;
        kaldi::SubVector<kaldi::BaseFloat> waveform(wave_data.Data(),
                                                    this_channel);
        int tot_samples = waveform.Dim();
        LOG(INFO) << "wav len (sample): " << tot_samples;

        int sample_offset = 0;
        kaldi::Timer timer;

        while (sample_offset < tot_samples) {
            int cur_chunk_size =
                std::min(chunk_sample_size, tot_samples - sample_offset);

            std::vector<kaldi::BaseFloat> wav_chunk(cur_chunk_size);
            for (int i = 0; i < cur_chunk_size; ++i) {
                wav_chunk[i] = waveform(sample_offset + i);
            }

            nnet_producer->Accept(wav_chunk);
            if (cur_chunk_size < chunk_sample_size) {
                nnet_producer->SetInputFinished();
            }

            // no overlap
            sample_offset += cur_chunk_size;
        }
        CHECK(sample_offset == tot_samples);

        std::vector<std::vector<kaldi::BaseFloat>> prob_vec;
        while (1) {
            std::vector<kaldi::BaseFloat> logprobs;
            bool isok = nnet_producer->Read(&logprobs);
            if (nnet_producer->IsFinished()) break;
            if (isok == false) continue;
            prob_vec.push_back(logprobs);
        }
        {
            // writer nnet output
            kaldi::MatrixIndexT nrow = prob_vec.size();
            kaldi::MatrixIndexT ncol = prob_vec[0].size();
            LOG(INFO) << "nnet out shape: " << nrow << ", " << ncol;
            kaldi::Matrix<kaldi::BaseFloat> nnet_out(nrow, ncol);
            for (int32 row_idx = 0; row_idx < nrow; ++row_idx) {
                for (int32 col_idx = 0; col_idx < ncol; ++col_idx) {
                    nnet_out(row_idx, col_idx) = prob_vec[row_idx][col_idx];
                }
            }
            nnet_out_writer.Write(utt, nnet_out);
        }
        nnet_producer->Reset();
    }

    nnet_producer->Wait();
    double elapsed = timer.Elapsed();
    LOG(INFO) << "Program cost:" << elapsed << " sec";

    LOG(INFO) << "Done " << num_done << " utterances, " << num_err
              << " with errors.";
    return (num_done != 0 ? 0 : 1);
}


================================================
FILE: runtime/engine/asr/nnet/u2_onnx_nnet.cc
================================================
// Copyright 2022 Horizon Robotics. All Rights Reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// modified from
// https://github.com/wenet-e2e/wenet/blob/main/runtime/core/decoder/onnx_asr_model.cc

#include "nnet/u2_onnx_nnet.h"
#include "common/base/config.h"

namespace ppspeech {

void U2OnnxNnet::LoadModel(const std::string& model_dir) {
    std::string encoder_onnx_path = model_dir + "/encoder.onnx";
    std::string rescore_onnx_path = model_dir + "/decoder.onnx";
    std::string ctc_onnx_path = model_dir + "/ctc.onnx";
    std::string param_path = model_dir + "/param.onnx";
    // 1. Load sessions
    try {
        encoder_ = std::make_shared<fastdeploy::Runtime>();
        ctc_ = std::make_shared<fastdeploy::Runtime>();
        rescore_ = std::make_shared<fastdeploy::Runtime>();
        fastdeploy::RuntimeOption runtime_option;
        runtime_option.UseOrtBackend();
        runtime_option.UseCpu();
        runtime_option.SetCpuThreadNum(1);
        runtime_option.SetModelPath(encoder_onnx_path.c_str(), "", fastdeploy::ModelFormat::ONNX);
        assert(encoder_->Init(runtime_option));
        runtime_option.SetModelPath(rescore_onnx_path.c_str(), "", fastdeploy::ModelFormat::ONNX);
        assert(rescore_->Init(runtime_option));
        runtime_option.SetModelPath(ctc_onnx_path.c_str(), "", fastdeploy::ModelFormat::ONNX);
        assert(ctc_->Init(runtime_option));
    } catch (std::exception const& e) {
        LOG(ERROR) << "error when load onnx model: " << e.what();
        exit(0);
    }

    Config conf(param_path);
    encoder_output_size_ = conf.Read("output_size", encoder_output_size_);
    num_blocks_ = conf.Read("num_blocks", num_blocks_);
    head_ = conf.Read("head", head_);
    cnn_module_kernel_ = conf.Read("cnn_module_kernel", cnn_module_kernel_);
    subsampling_rate_ = conf.Read("subsampling_rate", subsampling_rate_);
    right_context_ = conf.Read("right_context", right_context_);
    sos_= conf.Read("sos_symbol", sos_);
    eos_= conf.Read("eos_symbol", eos_);
    is_bidecoder_= conf.Read("is_bidirectional_decoder", is_bidecoder_);
    chunk_size_= conf.Read("chunk_size", chunk_size_);
    num_left_chunks_ = conf.Read("left_chunks", num_left_chunks_);
    
    LOG(INFO) << "Onnx Model Info:";
    LOG(INFO) << "\tencoder_output_size " << encoder_output_size_;
    LOG(INFO) << "\tnum_blocks " << num_blocks_;
    LOG(INFO) << "\thead " << head_;
    LOG(INFO) << "\tcnn_module_kernel " << cnn_module_kernel_;
    LOG(INFO) << "\tsubsampling_rate " << subsampling_rate_;
    LOG(INFO) << "\tright_context " << right_context_;
    LOG(INFO) << "\tsos " << sos_;
    LOG(INFO) << "\teos " << eos_;
    LOG(INFO) << "\tis bidirectional decoder " << is_bidecoder_;
    LOG(INFO) << "\tchunk_size " << chunk_size_;
    LOG(INFO) << "\tnum_left_chunks " << num_left_chunks_;

    // 3. Read model nodes
    LOG(INFO) << "Onnx Encoder:";
    GetInputOutputInfo(encoder_, &encoder_in_names_, &encoder_out_names_);
    LOG(INFO) << "Onnx CTC:";
    GetInputOutputInfo(ctc_, &ctc_in_names_, &ctc_out_names_);
    LOG(INFO) << "Onnx Rescore:";
    GetInputOutputInfo(rescore_, &rescore_in_names_, &rescore_out_names_);
}

U2OnnxNnet::U2OnnxNnet(const ModelOptions& opts) : opts_(opts) {
    LoadModel(opts_.model_path);
}

// shallow copy
U2OnnxNnet::U2OnnxNnet(const U2OnnxNnet& other) {
    // metadatas
    encoder_output_size_ = other.encoder_output_size_;
    num_blocks_ = other.num_blocks_;
    head_ = other.head_;
    cnn_module_kernel_ = other.cnn_module_kernel_;
    right_context_ = other.right_context_;
    subsampling_rate_ = other.subsampling_rate_;
    sos_ = other.sos_;
    eos_ = other.eos_;
    is_bidecoder_ = other.is_bidecoder_;
    chunk_size_ = other.chunk_size_;
    num_left_chunks_ = other.num_left_chunks_;
    offset_ = other.offset_;
    
    // session
    encoder_ = other.encoder_;
    ctc_ = other.ctc_;
    rescore_ = other.rescore_;

    // node names
    encoder_in_names_ = other.encoder_in_names_;
    encoder_out_names_ = other.encoder_out_names_;
    ctc_in_names_ = other.ctc_in_names_;
    ctc_out_names_ = other.ctc_out_names_;
    rescore_in_names_ = other.rescore_in_names_;
    rescore_out_names_ = other.rescore_out_names_;
}

void U2OnnxNnet::GetInputOutputInfo(const std::shared_ptr<fastdeploy::Runtime>& runtime,
                                std::vector<std::string>* in_names, std::vector<std::string>* out_names) {
    std::vector<fastdeploy::TensorInfo> inputs_info = runtime->GetInputInfos();
    (*in_names).resize(inputs_info.size());
    for (int i = 0; i < inputs_info.size(); ++i){
        fastdeploy::TensorInfo info = inputs_info[i];

        std::stringstream shape;
        for(int j = 0; j < info.shape.size(); ++j){
            shape << info.shape[j];
            shape << " ";
        }
        LOG(INFO) << "\tInput " << i << " : name=" << info.name << " type=" << info.dtype
              << " dims=" << shape.str();
        (*in_names)[i] = info.name;
    }
    std::vector<fastdeploy::TensorInfo> outputs_info = runtime->GetOutputInfos();
    (*out_names).resize(outputs_info.size());
    for (int i = 0; i < outputs_info.size(); ++i){
        fastdeploy::TensorInfo info = outputs_info[i];
        
        std::stringstream shape;
        for(int j = 0; j < info.shape.size(); ++j){
            shape << info.shape[j];
            shape << " ";
        }
        LOG(INFO) << "\tOutput " << i << " : name=" << info.name << " type=" << info.dtype
              << " dims=" << shape.str();
        (*out_names)[i] = info.name;
    }
}

std::shared_ptr<NnetBase> U2OnnxNnet::Clone() const {
    auto asr_model = std::make_shared<U2OnnxNnet>(*this);
    // reset inner state for new decoding
    asr_model->Reset();
    return asr_model;
}

void U2OnnxNnet::Reset() {
    offset_ = 0;
    encoder_outs_.clear();
    cached_feats_.clear();
    // Reset att_cache
    if (num_left_chunks_ > 0) {
        int required_cache_size = chunk_size_ * num_left_chunks_;
        offset_ = required_cache_size;
        att_cache_.resize(num_blocks_ * head_ * required_cache_size *
                            encoder_output_size_ / head_ * 2,
                        0.0);
        const std::vector<int64_t> att_cache_shape = {num_blocks_, head_, required_cache_size,
                                        encoder_output_size_ / head_ * 2};
        att_cache_ort_.SetExternalData(att_cache_shape, fastdeploy::FDDataType::FP32, att_cache_.data());
    } else {
        att_cache_.resize(0, 0.0);
        const std::vector<int64_t> att_cache_shape = {num_blocks_, head_, 0,
                                        encoder_output_size_ / head_ * 2};
        att_cache_ort_.SetExternalData(att_cache_shape, fastdeploy::FDDataType::FP32, att_cache_.data());
    }

    // Reset cnn_cache
    cnn_cache_.resize(
        num_blocks_ * encoder_output_size_ * (cnn_module_kernel_ - 1), 0.0);
    const std::vector<int64_t> cnn_cache_shape = {num_blocks_, 1, encoder_output_size_,
                                        cnn_module_kernel_ - 1};
    cnn_cache_ort_.SetExternalData(cnn_cache_shape, fastdeploy::FDDataType::FP32, cnn_cache_.data());
}

void U2OnnxNnet::FeedForward(const std::vector<BaseFloat>& features,
                         const int32& feature_dim,
                         NnetOut* out) {
    kaldi::Timer timer;

    std::vector<kaldi::BaseFloat> ctc_probs;
    ForwardEncoderChunkImpl(
        features, feature_dim, &out->logprobs, &out->vocab_dim);
    VLOG(1) << "FeedForward cost: " << timer.Elapsed() << " sec. "
            << features.size() / feature_dim << " frames.";
}

void U2OnnxNnet::ForwardEncoderChunkImpl(
        const std::vector<kaldi::BaseFloat>& chunk_feats,
        const int32& feat_dim,
        std::vector<kaldi::BaseFloat>* out_prob,
        int32* vocab_dim) {
        
    // 1. Prepare onnx required data, splice cached_feature_ and chunk_feats
    // chunk
    int num_frames = chunk_feats.size() / feat_dim;
    VLOG(3) << "num_frames: " << num_frames;
    VLOG(3) << "feat_dim: " << feat_dim;
    const int feature_dim = feat_dim;
    std::vector<float> feats;
    feats.insert(feats.end(), chunk_feats.begin(), chunk_feats.end());
    fastdeploy::FDTensor feats_ort;
    const std::vector<int64_t> feats_shape = {1, num_frames, feature_dim};
    feats_ort.SetExternalData(feats_shape, fastdeploy::FDDataType::FP32, feats.data());

    // offset
    int64_t offset_int64 = static_cast<int64_t>(offset_);
    fastdeploy::FDTensor offset_ort;
    offset_ort.SetExternalData({}, fastdeploy::FDDataType::INT64, &offset_int64);

    // required_cache_size
    int64_t required_cache_size = chunk_size_ * num_left_chunks_;
    fastdeploy::FDTensor required_cache_size_ort("");
    required_cache_size_ort.SetExternalData({}, fastdeploy::FDDataType::INT64, &required_cache_size);

    // att_mask
    fastdeploy::FDTensor att_mask_ort;
    std::vector<uint8_t> att_mask(required_cache_size + chunk_size_, 1);
    if (num_left_chunks_ > 0) {
        int chunk_idx = offset_ / chunk_size_ - num_left_chunks_;
        if (chunk_idx < num_left_chunks_) {
            for (int i = 0; i < (num_left_chunks_ - chunk_idx) * chunk_size_; ++i) {
                att_mask[i] = 0;
            }
        }
        const std::vector<int64_t> att_mask_shape = {1, 1, required_cache_size + chunk_size_};
        att_mask_ort.SetExternalData(att_mask_shape, fastdeploy::FDDataType::BOOL, reinterpret_cast<bool*>(att_mask.data()));
    }

    // 2. Encoder chunk forward
    std::vector<fastdeploy::FDTensor> inputs(encoder_in_names_.size());
    for (int i = 0; i < encoder_in_names_.size(); ++i) {
        std::string name = encoder_in_names_[i];
        if (!strcmp(name.data(), "chunk")) {
            inputs[i] = std::move(feats_ort);
            inputs[i].name = "chunk";
        } else if (!strcmp(name.data(), "offset")) {
            inputs[i] = std::move(offset_ort);
            inputs[i].name = "offset";
        } else if (!strcmp(name.data(), "required_cache_size")) {
            inputs[i] = std::move(required_cache_size_ort);
            inputs[i].name = "required_cache_size";
        } else if (!strcmp(name.data(), "att_cache")) {
            inputs[i] = std::move(att_cache_ort_);
            inputs[i].name = "att_cache";
        } else if (!strcmp(name.data(), "cnn_cache")) {
            inputs[i] = std::move(cnn_cache_ort_);
            inputs[i].name = "cnn_cache";
        } else if (!strcmp(name.data(), "att_mask")) {
            inputs[i] = std::move(att_mask_ort);
            inputs[i].name = "att_mask";
        }
    }
   
    std::vector<fastdeploy::FDTensor> ort_outputs;
    assert(encoder_->Infer(inputs, &ort_outputs));

    offset_ += static_cast<int>(ort_outputs[0].shape[1]);
    att_cache_ort_ = std::move(ort_outputs[1]);
    cnn_cache_ort_ = std::move(ort_outputs[2]);

    std::vector<fastdeploy::FDTensor> ctc_inputs;
    ctc_inputs.emplace_back(std::move(ort_outputs[0]));
    // ctc_inputs[0] = std::move(ort_outputs[0]);
    ctc_inputs[0].name = ctc_in_names_[0];

    std::vector<fastdeploy::FDTensor> ctc_ort_outputs;
    assert(ctc_->Infer(ctc_inputs, &ctc_ort_outputs));
    encoder_outs_.emplace_back(std::move(ctc_inputs[0])); // *****

    float* logp_data = reinterpret_cast<float*>(ctc_ort_outputs[0].Data());

    // Copy to output, (B=1,T,D)
    std::vector<int64_t> ctc_log_probs_shape = ctc_ort_outputs[0].shape;
    CHECK_EQ(ctc_log_probs_shape.size(), 3);
    int B = ctc_log_probs_shape[0];
    CHECK_EQ(B, 1);
    int T = ctc_log_probs_shape[1];
    int D = ctc_log_probs_shape[2];
    *vocab_dim = D;

    out_prob->resize(T * D);
    std::memcpy(
        out_prob->data(), logp_data, T * D * sizeof(kaldi::BaseFloat));
    return;
}

float U2OnnxNnet::ComputeAttentionScore(const float* prob,
                                          const std::vector<int>& hyp, int eos,
                                          int decode_out_len) {
  float score = 0.0f;
  for (size_t j = 0; j < hyp.size(); ++j) {
    score += *(prob + j * decode_out_len + hyp[j]);
  }
  score += *(prob + hyp.size() * decode_out_len + eos);
  return score;
}

void U2OnnxNnet::AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                                float reverse_weight,
                                std::vector<float>* rescoring_score) {
    CHECK(rescoring_score != nullptr);
    int num_hyps = hyps.size();
    rescoring_score->resize(num_hyps, 0.0f);

    if (num_hyps == 0) {
        return;
    }
    // No encoder output
    if (encoder_outs_.size() == 0) {
        return;
    }

    std::vector<int64_t> hyps_lens;
    int max_hyps_len = 0;
    for (size_t i = 0; i < num_hyps; ++i) {
        int length = hyps[i].size() + 1;
        max_hyps_len = std::max(length, max_hyps_len);
        hyps_lens.emplace_back(static_cast<int64_t>(length));
    }

    std::vector<float> rescore_input;
    int encoder_len = 0;
    for (int i = 0; i < encoder_outs_.size(); i++) {
        float* encoder_outs_data = reinterpret_cast<float*>(encoder_outs_[i].Data());
        for (int j = 0; j < encoder_outs_[i].Numel(); j++) {
            rescore_input.emplace_back(encoder_outs_data[j]);
        }
        encoder_len += encoder_outs_[i].shape[1];
    }

    std::vector<int64_t> hyps_pad;

    for (size_t i = 0; i < num_hyps; ++i) {
        const std::vector<int>& hyp = hyps[i];
        hyps_pad.emplace_back(sos_);
        size_t j = 0;
        for (; j < hyp.size(); ++j) {
            hyps_pad.emplace_back(hyp[j]);
        }
        if (j == max_hyps_len - 1) {
            continue;
        }
        for (; j < max_hyps_len - 1; ++j) {
            hyps_pad.emplace_back(0);
        }
    }

    const std::vector<int64_t> hyps_pad_shape = {num_hyps, max_hyps_len};
    const std::vector<int64_t> hyps_lens_shape = {num_hyps};
    const std::vector<int64_t> decode_input_shape = {1, encoder_len, encoder_output_size_};

    fastdeploy::FDTensor hyps_pad_tensor_;
    hyps_pad_tensor_.SetExternalData(hyps_pad_shape, fastdeploy::FDDataType::INT64, hyps_pad.data());
    fastdeploy::FDTensor hyps_lens_tensor_;
    hyps_lens_tensor_.SetExternalData(hyps_lens_shape, fastdeploy::FDDataType::INT64, hyps_lens.data());
    fastdeploy::FDTensor decode_input_tensor_;
    decode_input_tensor_.SetExternalData(decode_input_shape, fastdeploy::FDDataType::FP32, rescore_input.data());

    std::vector<fastdeploy::FDTensor> rescore_inputs(3);

    rescore_inputs[0] = std::move(hyps_pad_tensor_);
    rescore_inputs[0].name = rescore_in_names_[0];
    rescore_inputs[1] = std::move(hyps_lens_tensor_);
    rescore_inputs[1].name = rescore_in_names_[1];
    rescore_inputs[2] = std::move(decode_input_tensor_);
    rescore_inputs[2].name = rescore_in_names_[2];

    std::vector<fastdeploy::FDTensor> rescore_outputs;
    assert(rescore_->Infer(rescore_inputs, &rescore_outputs));

    float* decoder_outs_data = reinterpret_cast<float*>(rescore_outputs[0].Data());
    float* r_decoder_outs_data = reinterpret_cast<float*>(rescore_outputs[1].Data());

    int decode_out_len = rescore_outputs[0].shape[2];

    for (size_t i = 0; i < num_hyps; ++i) {
        const std::vector<int>& hyp = hyps[i];
        float score = 0.0f;
        // left to right decoder score
        score = ComputeAttentionScore(
            decoder_outs_data + max_hyps_len * decode_out_len * i, hyp, eos_,
            decode_out_len);
        // Optional: Used for right to left score
        float r_score = 0.0f;
        if (is_bidecoder_ && reverse_weight > 0) {
        std::vector<int> r_hyp(hyp.size());
        std::reverse_copy(hyp.begin(), hyp.end(), r_hyp.begin());
        // right to left decoder score
        r_score = ComputeAttentionScore(
            r_decoder_outs_data + max_hyps_len * decode_out_len * i, r_hyp, eos_,
            decode_out_len);
        }
        // combined left-to-right and right-to-left score
        (*rescoring_score)[i] =
            score * (1 - reverse_weight) + r_score * reverse_weight;
    }
}

void U2OnnxNnet::EncoderOuts(
    std::vector<std::vector<kaldi::BaseFloat>>* encoder_out) const {
}

} //namepace ppspeech

================================================
FILE: runtime/engine/asr/nnet/u2_onnx_nnet.h
================================================
// Copyright 2022 Horizon Robotics. All Rights Reserved.
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// modified from
// https://github.com/wenet-e2e/wenet/blob/main/runtime/core/decoder/onnx_asr_model.h

#pragma once

#include "base/common.h"
#include "matrix/kaldi-matrix.h"
#include "nnet/nnet_itf.h"
#include "nnet/u2_nnet.h"

#include "fastdeploy/runtime.h"

namespace ppspeech {

class U2OnnxNnet : public U2NnetBase {

  public:
    explicit U2OnnxNnet(const ModelOptions& opts);
    U2OnnxNnet(const U2OnnxNnet& other);

    void FeedForward(const std::vector<kaldi::BaseFloat>& features,
                     const int32& feature_dim,
                     NnetOut* out) override;

    void Reset() override;

    bool IsLogProb() override { return true; }

    void Dim();

    void LoadModel(const std::string& model_dir);

    std::shared_ptr<NnetBase> Clone() const override;

    void ForwardEncoderChunkImpl(
        const std::vector<kaldi::BaseFloat>& chunk_feats,
        const int32& feat_dim,
        std::vector<kaldi::BaseFloat>* ctc_probs,
        int32* vocab_dim) override;

    float ComputeAttentionScore(const float* prob, const std::vector<int>& hyp,
                              int eos, int decode_out_len);

    void AttentionRescoring(const std::vector<std::vector<int>>& hyps,
                            float reverse_weight,
                            std::vector<float>* rescoring_score) override;

    void EncoderOuts(
        std::vector<std::vector<kaldi::BaseFloat>>* encoder_out) const;

    void GetInputOutputInfo(const std::shared_ptr<fastdeploy::Runtime>& runtime,
                          std::vector<std::string>* in_names,
                          std::vector<std::string>* out_names);
  private:
    ModelOptions opts_;

    int encoder_output_size_ = 0;
    int num_blocks_ = 0;
    int cnn_module_kernel_ = 0;
    int head_ = 0;

    // sessions
    std::shared_ptr<fastdeploy::Runtime> encoder_ = nullptr;
    std::shared_ptr<fastdeploy::Runtime> rescore_ = nullptr;
    std::shared_ptr<fastdeploy::Runtime> ctc_ = nullptr;


    // node names
    std::vector<std::string> encoder_in_names_, encoder_out_names_;
    std::vector<std::string> ctc_in_names_, ctc_out_names_;
    std::vector<std::string> rescore_in_names_, rescore_out_names_;

    // caches
    fastdeploy::FDTensor att_cache_ort_;
    fastdeploy::FDTensor cnn_cache_ort_;
    std::vector<fastdeploy::FDTensor> encoder_outs_;

    std::vector<float> att_cache_;
    std::vector<float> cnn_cache_;
};

}  // namespace ppspeech

================================================
FILE: runtime/engine/asr/recognizer/CMakeLists.txt
================================================
set(srcs)

list(APPEND srcs
  recognizer_controller.cc
  recognizer_controller_impl.cc
  recognizer_instance.cc
  recognizer.cc
)

add_library(recognizer STATIC ${srcs})
target_link_libraries(recognizer PUBLIC decoder)

set(TEST_BINS 
  recognizer_batch_main
  recognizer_batch_main2
  recognizer_main
)

foreach(bin_name IN LISTS TEST_BINS)
  add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
  target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
  target_link_libraries(${bin_name} recognizer nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util)
  target_compile_options(${bin_name}  PRIVATE ${PADDLE_COMPILE_FLAGS})
  target_include_directories(${bin_name}  PRIVATE ${pybind11_INCLUDE_DIRS} ${PROJECT_SOURCE_DIR})
  target_link_libraries(${bin_name}  ${PYTHON_LIBRARIES} ${PADDLE_LINK_FLAGS} -ldl)
endforeach()


================================================
FILE: runtime/engine/asr/recognizer/recognizer.cc
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "recognizer/recognizer.h"
#include "recognizer/recognizer_instance.h"

bool InitRecognizer(const std::string& model_file, 
                    const std::string& word_symbol_table_file,
                    const std::string& fst_file,
                    int num_instance) {
    return ppspeech::RecognizerInstance::GetInstance().Init(model_file, 
                                                            word_symbol_table_file, 
                                                            fst_file,
                                                            num_instance);
}

int GetRecognizerInstanceId() {
    return ppspeech::RecognizerInstance::GetInstance().GetRecognizerInstanceId();
}

void InitDecoder(int instance_id) {
    return ppspeech::RecognizerInstance::GetInstance().InitDecoder(instance_id);
}

void AcceptData(const std::vector<float>& waves, int instance_id) {
    return ppspeech::RecognizerInstance::GetInstance().Accept(waves, instance_id);
}

void SetInputFinished(int instance_id) {
    return ppspeech::RecognizerInstance::GetInstance().SetInputFinished(instance_id);
}

std::string GetFinalResult(int instance_id) {
    return ppspeech::RecognizerInstance::GetInstance().GetResult(instance_id);
}

================================================
FILE: runtime/engine/asr/recognizer/recognizer.h
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <vector>

bool InitRecognizer(const std::string& model_file, 
                    const std::string& word_symbol_table_file,
                    const std::string& fst_file,
                    int num_instance);
int GetRecognizerInstanceId();
void InitDecoder(int instance_id);
void AcceptData(const std::vector<float>& waves, int instance_id);
void SetInputFinished(int instance_id);
std::string GetFinalResult(int instance_id);

================================================
FILE: runtime/engine/asr/recognizer/recognizer_batch_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "common/base/thread_pool.h"
#include "common/utils/file_utils.h"
#include "common/utils/strings.h"
#include "decoder/param.h"
#include "frontend/wave-reader.h"
#include "kaldi/util/table-types.h"
#include "nnet/u2_nnet.h"
#include "recognizer/recognizer_controller.h"

DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
DEFINE_string(result_wspecifier, "", "test result wspecifier");
DEFINE_double(streaming_chunk, 0.36, "streaming feature chunk size");
DEFINE_int32(sample_rate, 16000, "sample rate");
DEFINE_int32(njob, 3, "njob");

using std::string;
using std::vector;

void SplitUtt(string wavlist_file,
              vector<vector<string>>* uttlists,
              vector<vector<string>>* wavlists,
              int njob) {
    vector<string> wavlist;
    wavlists->resize(njob);
    uttlists->resize(njob);
    ppspeech::ReadFileToVector(wavlist_file, &wavlist);
    for (size_t idx = 0; idx < wavlist.size(); ++idx) {
        string utt_str = wavlist[idx];
        vector<string> utt_wav = ppspeech::StrSplit(utt_str, " \t");
        LOG(INFO) << utt_wav[0];
        CHECK_EQ(utt_wav.size(), size_t(2));
        uttlists->at(idx % njob).push_back(utt_wav[0]);
        wavlists->at(idx % njob).push_back(utt_wav[1]);
    }
}

void recognizer_func(ppspeech::RecognizerController* recognizer_controller,
                     std::vector<string> wavlist,
                     std::vector<string> uttlist,
                     std::vector<string>* results) {
    int32 num_done = 0, num_err = 0;
    double tot_wav_duration = 0.0;
    double tot_attention_rescore_time = 0.0;
    double tot_decode_time = 0.0;
    int chunk_sample_size = FLAGS_streaming_chunk * FLAGS_sample_rate;
    if (wavlist.empty()) return;

    results->reserve(wavlist.size());
    for (size_t idx = 0; idx < wavlist.size(); ++idx) {
        std::string utt = uttlist[idx];
        std::string wav_file = wavlist[idx];
        std::ifstream infile;
        infile.open(wav_file, std::ifstream::in);
        kaldi::WaveData wave_data;
        wave_data.Read(infile);
        int32 recog_id = -1;
        while (recog_id == -1) {
            recog_id = recognizer_controller->GetRecognizerInstanceId();
        }
        recognizer_controller->InitDecoder(recog_id);
        LOG(INFO) << "utt: " << utt;
        LOG(INFO) << "wav dur: " << wave_data.Duration() << " sec.";
        double dur = wave_data.Duration();
        tot_wav_duration += dur;

        int32 this_channel = 0;
        kaldi::SubVector<kaldi::BaseFloat> waveform(wave_data.Data(),
                                                    this_channel);
        int tot_samples = waveform.Dim();
        LOG(INFO) << "wav len (sample): " << tot_samples;

        int sample_offset = 0;
        kaldi::Timer local_timer;

        while (sample_offset < tot_samples) {
            int cur_chunk_size =
                std::min(chunk_sample_size, tot_samples - sample_offset);

            std::vector<kaldi::BaseFloat> wav_chunk(cur_chunk_size);
            for (int i = 0; i < cur_chunk_size; ++i) {
                wav_chunk[i] = waveform(sample_offset + i);
            }

            recognizer_controller->Accept(wav_chunk, recog_id);
            // no overlap
            sample_offset += cur_chunk_size;
        }
        recognizer_controller->SetInputFinished(recog_id);
        CHECK(sample_offset == tot_samples);
        std::string result = recognizer_controller->GetFinalResult(recog_id);
        if (result.empty()) {
            // the TokenWriter can not write empty string.
            ++num_err;
            LOG(INFO) << " the result of " << utt << " is empty";
            result = " ";
        }

        tot_decode_time += local_timer.Elapsed();
        LOG(INFO) << utt << " " << result;
        LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur
                  << " cost: " << local_timer.Elapsed();

        results->push_back(result);
        ++num_done;
    }
    LOG(INFO) << "Done " << num_done << " out of " << (num_err + num_done);
    LOG(INFO) << "total wav duration is: " << tot_wav_duration << " sec";
    LOG(INFO) << "total decode cost:" << tot_decode_time << " sec";
    LOG(INFO) << "RTF is: " << tot_decode_time / tot_wav_duration;
}

int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
    google::InstallFailureSignalHandler();
    FLAGS_logtostderr = 1;

    int sample_rate = FLAGS_sample_rate;
    float streaming_chunk = FLAGS_streaming_chunk;
    int chunk_sample_size = streaming_chunk * sample_rate;
    kaldi::TokenWriter result_writer(FLAGS_result_wspecifier);
    int njob = FLAGS_njob;
    LOG(INFO) << "sr: " << sample_rate;
    LOG(INFO) << "chunk size (s): " << streaming_chunk;
    LOG(INFO) << "chunk size (sample): " << chunk_sample_size;

    ppspeech::RecognizerResource resource =
        ppspeech::RecognizerResource::InitFromFlags();
    ppspeech::RecognizerController recognizer_controller(njob, resource);
    ThreadPool threadpool(njob);
    vector<vector<string>> wavlist;
    vector<vector<string>> uttlist;
    vector<vector<string>> resultlist(njob);
    vector<std::future<void>> futurelist;
    SplitUtt(FLAGS_wav_rspecifier, &uttlist, &wavlist, njob);
    for (size_t i = 0; i < njob; ++i) {
        std::future<void> f = threadpool.enqueue(recognizer_func,
                                                 &recognizer_controller,
                                                 wavlist[i],
                                                 uttlist[i],
                                                 &resultlist[i]);
        futurelist.push_back(std::move(f));
    }

    for (size_t i = 0; i < njob; ++i) {
        futurelist[i].get();
    }

    for (size_t idx = 0; idx < njob; ++idx) {
        for (size_t utt_idx = 0; utt_idx < uttlist[idx].size(); ++utt_idx) {
            string utt = uttlist[idx][utt_idx];
            string result = resultlist[idx][utt_idx];
            result_writer.Write(utt, result);
        }
    }
    return 0;
}


================================================
FILE: runtime/engine/asr/recognizer/recognizer_batch_main2.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "common/base/thread_pool.h"
#include "common/utils/file_utils.h"
#include "common/utils/strings.h"
#include "decoder/param.h"
#include "frontend/wave-reader.h"
#include "kaldi/util/table-types.h"
#include "nnet/u2_nnet.h"
#include "recognizer/recognizer.h"

DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
DEFINE_string(result_wspecifier, "", "test result wspecifier");
DEFINE_double(streaming_chunk, 0.36, "streaming feature chunk size");
DEFINE_int32(sample_rate, 16000, "sample rate");
DEFINE_int32(njob, 3, "njob");

using std::string;
using std::vector;

void SplitUtt(string wavlist_file,
              vector<vector<string>>* uttlists,
              vector<vector<string>>* wavlists,
              int njob) {
    vector<string> wavlist;
    wavlists->resize(njob);
    uttlists->resize(njob);
    ppspeech::ReadFileToVector(wavlist_file, &wavlist);
    for (size_t idx = 0; idx < wavlist.size(); ++idx) {
        string utt_str = wavlist[idx];
        vector<string> utt_wav = ppspeech::StrSplit(utt_str, " \t");
        LOG(INFO) << utt_wav[0];
        CHECK_EQ(utt_wav.size(), size_t(2));
        uttlists->at(idx % njob).push_back(utt_wav[0]);
        wavlists->at(idx % njob).push_back(utt_wav[1]);
    }
}

void recognizer_func(std::vector<string> wavlist,
                     std::vector<string> uttlist,
                     std::vector<string>* results) {
    int32 num_done = 0, num_err = 0;
    double tot_wav_duration = 0.0;
    double tot_attention_rescore_time = 0.0;
    double tot_decode_time = 0.0;
    int chunk_sample_size = FLAGS_streaming_chunk * FLAGS_sample_rate;
    if (wavlist.empty()) return;

    results->reserve(wavlist.size());
    for (size_t idx = 0; idx < wavlist.size(); ++idx) {
        std::string utt = uttlist[idx];
        std::string wav_file = wavlist[idx];
        std::ifstream infile;
        infile.open(wav_file, std::ifstream::in);
        kaldi::WaveData wave_data;
        wave_data.Read(infile);
        int32 recog_id = -1;
        while (recog_id == -1) {
            recog_id = GetRecognizerInstanceId();
        }
        InitDecoder(recog_id);
        LOG(INFO) << "utt: " << utt;
        LOG(INFO) << "wav dur: " << wave_data.Duration() << " sec.";
        double dur = wave_data.Duration();
        tot_wav_duration += dur;

        int32 this_channel = 0;
        kaldi::SubVector<kaldi::BaseFloat> waveform(wave_data.Data(),
                                                    this_channel);
        int tot_samples = waveform.Dim();
        LOG(INFO) << "wav len (sample): " << tot_samples;

        int sample_offset = 0;
        kaldi::Timer local_timer;

        while (sample_offset < tot_samples) {
            int cur_chunk_size =
                std::min(chunk_sample_size, tot_samples - sample_offset);

            std::vector<kaldi::BaseFloat> wav_chunk(cur_chunk_size);
            for (int i = 0; i < cur_chunk_size; ++i) {
                wav_chunk[i] = waveform(sample_offset + i);
            }

            AcceptData(wav_chunk, recog_id);
            // no overlap
            sample_offset += cur_chunk_size;
        }
        SetInputFinished(recog_id);
        CHECK(sample_offset == tot_samples);
        std::string result = GetFinalResult(recog_id);
        if (result.empty()) {
            // the TokenWriter can not write empty string.
            ++num_err;
            LOG(INFO) << " the result of " << utt << " is empty";
            result = " ";
        }

        tot_decode_time += local_timer.Elapsed();
        LOG(INFO) << utt << " " << result;
        LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur
                  << " cost: " << local_timer.Elapsed();

        results->push_back(result);
        ++num_done;
    }
    LOG(INFO) << "Done " << num_done << " out of " << (num_err + num_done);
    LOG(INFO) << "total wav duration is: " << tot_wav_duration << " sec";
    LOG(INFO) << "total decode cost:" << tot_decode_time << " sec";
    LOG(INFO) << "RTF is: " << tot_decode_time / tot_wav_duration;
}

int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
    google::InstallFailureSignalHandler();
    FLAGS_logtostderr = 1;

    int sample_rate = FLAGS_sample_rate;
    float streaming_chunk = FLAGS_streaming_chunk;
    int chunk_sample_size = streaming_chunk * sample_rate;
    kaldi::TokenWriter result_writer(FLAGS_result_wspecifier);
    int njob = FLAGS_njob;
    LOG(INFO) << "sr: " << sample_rate;
    LOG(INFO) << "chunk size (s): " << streaming_chunk;
    LOG(INFO) << "chunk size (sample): " << chunk_sample_size;

    InitRecognizer(FLAGS_model_path, FLAGS_word_symbol_table, FLAGS_graph_path, njob);
    ThreadPool threadpool(njob);
    vector<vector<string>> wavlist;
    vector<vector<string>> uttlist;
    vector<vector<string>> resultlist(njob);
    vector<std::future<void>> futurelist;
    SplitUtt(FLAGS_wav_rspecifier, &uttlist, &wavlist, njob);
    for (size_t i = 0; i < njob; ++i) {
        std::future<void> f = threadpool.enqueue(recognizer_func,
                                                 wavlist[i],
                                                 uttlist[i],
                                                 &resultlist[i]);
        futurelist.push_back(std::move(f));
    }

    for (size_t i = 0; i < njob; ++i) {
        futurelist[i].get();
    }

    for (size_t idx = 0; idx < njob; ++idx) {
        for (size_t utt_idx = 0; utt_idx < uttlist[idx].size(); ++utt_idx) {
            string utt = uttlist[idx][utt_idx];
            string result = resultlist[idx][utt_idx];
            result_writer.Write(utt, result);
        }
    }
    return 0;
}


================================================
FILE: runtime/engine/asr/recognizer/recognizer_controller.cc
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "recognizer/recognizer_controller.h"
#include "nnet/u2_nnet.h"

namespace ppspeech {

RecognizerController::RecognizerController(int num_worker, RecognizerResource resource) {
    recognizer_workers.resize(num_worker);
    for (size_t i = 0; i < num_worker; ++i) {
        recognizer_workers[i].reset(new ppspeech::RecognizerControllerImpl(resource)); 
        waiting_workers.push(i);
    }
}

int RecognizerController::GetRecognizerInstanceId() {
    if (waiting_workers.empty()) {
        return -1;
    }
    int idx = -1;
    {
        std::unique_lock<std::mutex> lock(mutex_);
        idx = waiting_workers.front();
        waiting_workers.pop();
    }
    return idx;
}

RecognizerController::~RecognizerController() {
    for (size_t i = 0; i < recognizer_workers.size(); ++i) {
        recognizer_workers[i]->WaitFinished();
    }
}

void RecognizerController::InitDecoder(int idx) {
    recognizer_workers[idx]->InitDecoder();
}

std::string RecognizerController::GetFinalResult(int idx) {
    recognizer_workers[idx]->WaitDecoderFinished();
    recognizer_workers[idx]->AttentionRescoring();
    std::string result = recognizer_workers[idx]->GetFinalResult();
    {
        std::unique_lock<std::mutex> lock(mutex_);
        waiting_workers.push(idx);
    }
    return result;
}

void RecognizerController::Accept(std::vector<float> data, int idx) {
    recognizer_workers[idx]->Accept(data);
}

void RecognizerController::SetInputFinished(int idx) {
    recognizer_workers[idx]->SetInputFinished();
}

}


================================================
FILE: runtime/engine/asr/recognizer/recognizer_controller.h
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <queue>
#include <memory>

#include "recognizer/recognizer_controller_impl.h"

namespace ppspeech {

class RecognizerController {
  public:
    explicit RecognizerController(int num_worker, RecognizerResource resource);  
    ~RecognizerController();
    int GetRecognizerInstanceId();
    void InitDecoder(int idx);
    void Accept(std::vector<float> data, int idx);
    void SetInputFinished(int idx);
    std::string GetFinalResult(int idx);
    
  private:
    std::queue<int> waiting_workers;  
    std::mutex mutex_;
    std::vector<std::unique_ptr<ppspeech::RecognizerControllerImpl>> recognizer_workers;
  
    DISALLOW_COPY_AND_ASSIGN(RecognizerController);
};

}

================================================
FILE: runtime/engine/asr/recognizer/recognizer_controller_impl.cc
================================================
// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang, Di Wu)
//               2022 Binbin Zhang (binbzha@qq.com)
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "recognizer/recognizer_controller_impl.h"
#include "decoder/ctc_prefix_beam_search_decoder.h"
#include "common/utils/strings.h"

namespace ppspeech {

RecognizerControllerImpl::RecognizerControllerImpl(const RecognizerResource& resource)
: opts_(resource) {
    BaseFloat am_scale = resource.acoustic_scale;
    BaseFloat blank_threshold = resource.blank_threshold;
    const FeaturePipelineOptions& feature_opts = resource.feature_pipeline_opts;
    std::shared_ptr<FeaturePipeline> feature_pipeline(
        new FeaturePipeline(feature_opts));
    std::shared_ptr<NnetBase> nnet;
#ifndef USE_ONNX
    nnet = resource.nnet->Clone();
#else
    if (resource.model_opts.with_onnx_model){
        nnet.reset(new U2OnnxNnet(resource.model_opts));
    } else {
        nnet = resource.nnet->Clone();
    }
#endif
    nnet_producer_.reset(new NnetProducer(nnet, feature_pipeline, blank_threshold));
    nnet_thread_ = std::thread(RunNnetEvaluation, this);

    decodable_.reset(new Decodable(nnet_producer_, am_scale));
    if (resource.decoder_opts.tlg_decoder_opts.fst_path.empty()) {
        LOG(INFO) << "Init PrefixBeamSearch Decoder";
        decoder_ = std::make_unique<CTCPrefixBeamSearch>(
            resource.decoder_opts.ctc_prefix_search_opts);
    } else {
        LOG(INFO) << "Init TLGDecoder";
        decoder_ = std::make_unique<TLGDecoder>(
            resource.decoder_opts.tlg_decoder_opts);
    }

    symbol_table_ = decoder_->WordSymbolTable();
    global_frame_offset_ = 0;
    input_finished_ = false;
    num_frames_ = 0;
    result_.clear(); 
}

RecognizerControllerImpl::~RecognizerControllerImpl() {
    WaitFinished();
}

void RecognizerControllerImpl::Reset() {
    nnet_producer_->Reset();
}

void RecognizerControllerImpl::RunDecoder(RecognizerControllerImpl* me) {
    me->RunDecoderInternal();
}

void RecognizerControllerImpl::RunDecoderInternal() {
    LOG(INFO) << "DecoderInternal begin";
    while (!nnet_producer_->IsFinished()) {
        nnet_condition_.notify_one();
        decoder_->AdvanceDecode(decodable_);
    }
    decoder_->AdvanceDecode(decodable_);
    UpdateResult(false);
    LOG(INFO) << "DecoderInternal exit";
}

void RecognizerControllerImpl::WaitDecoderFinished() {
    if (decoder_thread_.joinable()) decoder_thread_.join();
}

void RecognizerControllerImpl::RunNnetEvaluation(RecognizerControllerImpl* me) {
    me->RunNnetEvaluationInternal();
}

void RecognizerControllerImpl::SetInputFinished() {
    nnet_producer_->SetInputFinished();
    nnet_condition_.notify_one();
    LOG(INFO) << "Set Input Finished";
}

void RecognizerControllerImpl::WaitFinished() {
    abort_ = true;
    LOG(INFO) << "nnet wait finished";
    nnet_condition_.notify_one();
    if (nnet_thread_.joinable()) {
        nnet_thread_.join();
    }
}

void RecognizerControllerImpl::RunNnetEvaluationInternal() {
    bool result = false;
    LOG(INFO) << "NnetEvaluationInteral begin";
    while (!abort_) {
        std::unique_lock<std::mutex> lock(nnet_mutex_);
        nnet_condition_.wait(lock);
        do {
            result = nnet_producer_->Compute();
            decoder_condition_.notify_one();
        } while (result);
    }
    LOG(INFO) << "NnetEvaluationInteral exit";    
}

void RecognizerControllerImpl::Accept(std::vector<float> data) {
    nnet_producer_->Accept(data);
    nnet_condition_.notify_one();
}

void RecognizerControllerImpl::InitDecoder() {
    global_frame_offset_ = 0;
    input_finished_ = false;
    num_frames_ = 0;
    result_.clear();

    decodable_->Reset();
    decoder_->Reset();
    decoder_thread_ = std::thread(RunDecoder, this);
}

void RecognizerControllerImpl::AttentionRescoring() {
    decoder_->FinalizeSearch();
    UpdateResult(false);

    // No need to do rescoring
    if (0.0 == opts_.decoder_opts.rescoring_weight) {
        LOG_EVERY_N(WARNING, 3) << "Not do AttentionRescoring!";
        return;
    }
    LOG_EVERY_N(WARNING, 3) << "Do AttentionRescoring!";

    // Inputs() returns N-best input ids, which is the basic unit for rescoring
    // In CtcPrefixBeamSearch, inputs are the same to outputs
    const auto& hypotheses = decoder_->Inputs();
    int num_hyps = hypotheses.size();
    if (num_hyps <= 0) {
        return;
    }

    std::vector<float> rescoring_score;
    decodable_->AttentionRescoring(
        hypotheses, opts_.decoder_opts.reverse_weight, &rescoring_score);

    // combine ctc score and rescoring score
    for (size_t i = 0; i < num_hyps; i++) {
        VLOG(3) << "hyp " << i << " rescoring_score: " << rescoring_score[i]
                << " ctc_score: " << result_[i].score
                << " rescoring_weight: " << opts_.decoder_opts.rescoring_weight
                << " ctc_weight: " << opts_.decoder_opts.ctc_weight;
        result_[i].score =
            opts_.decoder_opts.rescoring_weight * rescoring_score[i] +
            opts_.decoder_opts.ctc_weight * result_[i].score;

        VLOG(3) << "hyp: " << result_[0].sentence
                << " score: " << result_[0].score;
    }

    std::sort(result_.begin(), result_.end(), DecodeResult::CompareFunc);
    VLOG(3) << "result: " << result_[0].sentence
            << " score: " << result_[0].score;
}

std::string RecognizerControllerImpl::GetFinalResult() { return result_[0].sentence; }

std::string RecognizerControllerImpl::GetPartialResult() { return result_[0].sentence; }

void RecognizerControllerImpl::UpdateResult(bool finish) {
    const auto& hypotheses = decoder_->Outputs();
    const auto& inputs = decoder_->Inputs();
    const auto& likelihood = decoder_->Likelihood();
    const auto& times = decoder_->Times();
    result_.clear();

    CHECK_EQ(inputs.size(), likelihood.size());
    for (size_t i = 0; i < hypotheses.size(); i++) {
        const std::vector<int>& hypothesis = hypotheses[i];

        DecodeResult path;
        path.score = likelihood[i];
        for (size_t j = 0; j < hypothesis.size(); j++) {
            std::string word = symbol_table_->Find(hypothesis[j]);
            path.sentence += (" " + word);
        }
        path.sentence = DelBlank(path.sentence);

        // TimeStamp is only supported in final result
        // TimeStamp of the output of CtcWfstBeamSearch may be inaccurate due to
        // various FST operations when building the decoding graph. So here we
        // use time stamp of the input(e2e model unit), which is more accurate,
        // and it requires the symbol table of the e2e model used in training.
        if (symbol_table_ != nullptr && finish) {
            int offset = global_frame_offset_ * FrameShiftInMs();

            const std::vector<int>& input = inputs[i];
            const std::vector<int> time_stamp = times[i];
            CHECK_EQ(input.size(), time_stamp.size());

            for (size_t j = 0; j < input.size(); j++) {
                std::string word = symbol_table_->Find(input[j]);

                int start =
                    time_stamp[j] * FrameShiftInMs() - time_stamp_gap_ > 0
                        ? time_stamp[j] * FrameShiftInMs() - time_stamp_gap_
                        : 0;
                if (j > 0) {
                    start =
                        (time_stamp[j] - time_stamp[j - 1]) * FrameShiftInMs() <
                                time_stamp_gap_
                            ? (time_stamp[j - 1] + time_stamp[j]) / 2 *
                                  FrameShiftInMs()
                            : start;
                }

                int end = time_stamp[j] * FrameShiftInMs();
                if (j < input.size() - 1) {
                    end =
                        (time_stamp[j + 1] - time_stamp[j]) * FrameShiftInMs() <
                                time_stamp_gap_
                            ? (time_stamp[j + 1] + time_stamp[j]) / 2 *
                                  FrameShiftInMs()
                            : end;
                }

                WordPiece word_piece(word, offset + start, offset + end);
                path.word_pieces.emplace_back(word_piece);
            }
        }

        // if (post_processor_ != nullptr) {
        //   path.sentence = post_processor_->Process(path.sentence, finish);
        // }

        result_.emplace_back(path);
    }

    if (DecodedSomething()) {
        VLOG(1) << "Partial CTC result " << result_[0].sentence;
    }
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/recognizer/recognizer_controller_impl.h
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "decoder/common.h"
#include "fst/fstlib.h"
#include "fst/symbol-table.h"
#include "nnet/u2_nnet.h"
#include "nnet/nnet_producer.h"
#ifdef USE_ONNX
#include "nnet/u2_onnx_nnet.h"
#endif
#include "nnet/decodable.h"
#include "recognizer/recognizer_resource.h"

#include <memory>

namespace ppspeech {

class RecognizerControllerImpl {
  public:
    explicit RecognizerControllerImpl(const RecognizerResource& resource);
    ~RecognizerControllerImpl();
    void Accept(std::vector<float> data);
    void InitDecoder();
    void SetInputFinished();
    std::string GetFinalResult();
    std::string GetPartialResult();
    void Rescoring();
    void Reset();
    void WaitDecoderFinished();
    void WaitFinished();
    void AttentionRescoring();
    bool DecodedSomething() const {
      return !result_.empty() && !result_[0].sentence.empty();
    }
    int FrameShiftInMs() const {
      return 1; //todo
    }

  private:

    static void RunNnetEvaluation(RecognizerControllerImpl* me);
    void RunNnetEvaluationInternal();
    static void RunDecoder(RecognizerControllerImpl* me);
    void RunDecoderInternal();
    void UpdateResult(bool finish = false);

    std::shared_ptr<Decodable> decodable_;
    std::unique_ptr<DecoderBase> decoder_;
    std::shared_ptr<NnetProducer> nnet_producer_;

    // e2e unit symbol table
    std::shared_ptr<fst::SymbolTable> symbol_table_ = nullptr;
    std::vector<DecodeResult> result_;

    RecognizerResource opts_;
    bool abort_ = false;
    // global decoded frame offset
    int global_frame_offset_;
    // cur decoded frame num
    int num_frames_;
    // timestamp gap between words in a sentence
    const int time_stamp_gap_ = 100;
    bool input_finished_;

    std::mutex nnet_mutex_;
    std::mutex decoder_mutex_;
    std::condition_variable nnet_condition_;
    std::condition_variable decoder_condition_;
    std::thread nnet_thread_;
    std::thread decoder_thread_;

    DISALLOW_COPY_AND_ASSIGN(RecognizerControllerImpl);
};

}


================================================
FILE: runtime/engine/asr/recognizer/recognizer_instance.cc
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "recognizer/recognizer_instance.h"


namespace ppspeech {

RecognizerInstance& RecognizerInstance::GetInstance() {
    static RecognizerInstance instance;
    return instance;
}

bool RecognizerInstance::Init(const std::string& model_file, 
                              const std::string& word_symbol_table_file,
                              const std::string& fst_file,
                              int num_instance) {
    RecognizerResource resource = RecognizerResource::InitFromFlags();
    resource.model_opts.model_path = model_file;
    //resource.vocab_path = word_symbol_table_file;
    if (!fst_file.empty()) {
        resource.decoder_opts.tlg_decoder_opts.fst_path = fst_file;
        resource.decoder_opts.tlg_decoder_opts.fst_path = word_symbol_table_file;
    } else {
        resource.decoder_opts.ctc_prefix_search_opts.word_symbol_table = 
            word_symbol_table_file;
    }
    recognizer_controller_ = std::make_unique<RecognizerController>(num_instance, resource);
    return true;
}

void RecognizerInstance::InitDecoder(int idx) {
    recognizer_controller_->InitDecoder(idx);
    return;
}

int RecognizerInstance::GetRecognizerInstanceId() {
    return recognizer_controller_->GetRecognizerInstanceId();
}

void RecognizerInstance::Accept(const std::vector<float>& waves, int idx) const {
    recognizer_controller_->Accept(waves, idx);
    return;
} 

void RecognizerInstance::SetInputFinished(int idx) const {
    recognizer_controller_->SetInputFinished(idx);
    return;
}

std::string RecognizerInstance::GetResult(int idx) const {
    return recognizer_controller_->GetFinalResult(idx);
}

}

================================================
FILE: runtime/engine/asr/recognizer/recognizer_instance.h
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"
#include "recognizer/recognizer_controller.h"

namespace ppspeech {

class RecognizerInstance {
  public:
    static RecognizerInstance& GetInstance();
    RecognizerInstance() {}
    ~RecognizerInstance() {}
    bool Init(const std::string& model_file, 
              const std::string& word_symbol_table_file,
              const std::string& fst_file,
              int num_instance);
    int GetRecognizerInstanceId();
    void InitDecoder(int idx);
    void Accept(const std::vector<float>& waves, int idx) const; 
    void SetInputFinished(int idx) const;
    std::string GetResult(int idx) const;

  private:
    std::unique_ptr<RecognizerController> recognizer_controller_;
};


}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/recognizer/recognizer_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "decoder/param.h"
#include "frontend/wave-reader.h"
#include "kaldi/util/table-types.h"
#include "recognizer/recognizer_controller.h"

DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
DEFINE_string(result_wspecifier, "", "test result wspecifier");
DEFINE_double(streaming_chunk, 0.36, "streaming feature chunk size");
DEFINE_int32(sample_rate, 16000, "sample rate");

int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
    google::InstallFailureSignalHandler();
    FLAGS_logtostderr = 1;

    int32 num_done = 0, num_err = 0;
    double tot_wav_duration = 0.0;
    double tot_attention_rescore_time = 0.0;
    double tot_decode_time = 0.0;

    kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
        FLAGS_wav_rspecifier);
    kaldi::TokenWriter result_writer(FLAGS_result_wspecifier);

    int sample_rate = FLAGS_sample_rate;
    float streaming_chunk = FLAGS_streaming_chunk;
    int chunk_sample_size = streaming_chunk * sample_rate;
    LOG(INFO) << "sr: " << sample_rate;
    LOG(INFO) << "chunk size (s): " << streaming_chunk;
    LOG(INFO) << "chunk size (sample): " << chunk_sample_size;

    ppspeech::RecognizerResource resource =
        ppspeech::RecognizerResource::InitFromFlags();
    std::shared_ptr<ppspeech::RecognizerControllerImpl> recognizer_ptr(
        new ppspeech::RecognizerControllerImpl(resource));

    for (; !wav_reader.Done(); wav_reader.Next()) {
        recognizer_ptr->InitDecoder();
        std::string utt = wav_reader.Key();
        const kaldi::WaveData& wave_data = wav_reader.Value();
        LOG(INFO) << "utt: " << utt;
        LOG(INFO) << "wav dur: " << wave_data.Duration() << " sec.";
        double dur = wave_data.Duration();
        tot_wav_duration += dur;

        int32 this_channel = 0;
        kaldi::SubVector<kaldi::BaseFloat> waveform(wave_data.Data(),
                                                    this_channel);
        int tot_samples = waveform.Dim();
        LOG(INFO) << "wav len (sample): " << tot_samples;

        int sample_offset = 0;
        kaldi::Timer local_timer;

        while (sample_offset < tot_samples) {
            int cur_chunk_size =
                std::min(chunk_sample_size, tot_samples - sample_offset);

            std::vector<kaldi::BaseFloat> wav_chunk(cur_chunk_size);
            for (int i = 0; i < cur_chunk_size; ++i) {
                wav_chunk[i] = waveform(sample_offset + i);
            }

            recognizer_ptr->Accept(wav_chunk);

            // no overlap
            sample_offset += cur_chunk_size;
        }
        CHECK(sample_offset == tot_samples);
        recognizer_ptr->SetInputFinished();
        recognizer_ptr->WaitDecoderFinished();

        kaldi::Timer timer;
        recognizer_ptr->AttentionRescoring();
        float rescore_time = timer.Elapsed();
        tot_attention_rescore_time += rescore_time;

        std::string result = recognizer_ptr->GetFinalResult();
        if (result.empty()) {
            // the TokenWriter can not write empty string.
            ++num_err;
            LOG(INFO) << " the result of " << utt << " is empty";
            continue;
        }

        tot_decode_time += local_timer.Elapsed();
        LOG(INFO) << utt << " " << result;
        LOG(INFO) << " RTF: " << local_timer.Elapsed() / dur << " dur: " << dur
                  << " cost: " << local_timer.Elapsed() << " rescore:" << rescore_time;

        result_writer.Write(utt, result);

        ++num_done;
    }
    recognizer_ptr->WaitFinished();

    LOG(INFO) << "Done " << num_done << " out of " << (num_err + num_done);
    LOG(INFO) << "total wav duration is: " << tot_wav_duration << " sec";
    LOG(INFO) << "total decode cost:" << tot_decode_time << " sec";
    LOG(INFO) << "total rescore cost:" << tot_attention_rescore_time << " sec";
    LOG(INFO) << "RTF is: " << tot_decode_time / tot_wav_duration;
}


================================================
FILE: runtime/engine/asr/recognizer/recognizer_resource.h
================================================
#pragma once

#include "decoder/ctc_beam_search_opt.h"
#include "decoder/ctc_tlg_decoder.h"
#include "frontend/feature_pipeline.h"

DECLARE_int32(nnet_decoder_chunk);
DECLARE_int32(num_left_chunks);
DECLARE_double(ctc_weight);
DECLARE_double(rescoring_weight);
DECLARE_double(reverse_weight);
DECLARE_int32(nbest);
DECLARE_int32(blank);
DECLARE_double(acoustic_scale);
DECLARE_double(blank_threshold);
DECLARE_string(word_symbol_table);

namespace ppspeech {

struct DecodeOptions {
    // chunk_size is the frame number of one chunk after subsampling.
    // e.g. if subsample rate is 4 and chunk_size = 16, the frames in
    // one chunk are 67=16*4 + 3, stride is 64=16*4
    int chunk_size{16};
    int num_left_chunks{-1};

    // final_score = rescoring_weight * rescoring_score + ctc_weight *
    // ctc_score;
    // rescoring_score = left_to_right_score * (1 - reverse_weight) +
    // right_to_left_score * reverse_weight
    // Please note the concept of ctc_scores
    // in the following two search methods are different. For
    // CtcPrefixBeamSerch,
    // it's a sum(prefix) score + context score For CtcWfstBeamSerch, it's a
    // max(viterbi) path score + context score So we should carefully set
    // ctc_weight accroding to the search methods.
    float ctc_weight{0.0};
    float rescoring_weight{1.0};
    float reverse_weight{0.0};

    // CtcEndpointConfig ctc_endpoint_opts;
    CTCBeamSearchOptions ctc_prefix_search_opts{};
    TLGDecoderOptions tlg_decoder_opts{};

    static DecodeOptions InitFromFlags() {
        DecodeOptions decoder_opts;
        decoder_opts.chunk_size = FLAGS_nnet_decoder_chunk;
        decoder_opts.num_left_chunks = FLAGS_num_left_chunks;
        decoder_opts.ctc_weight = FLAGS_ctc_weight;
        decoder_opts.rescoring_weight = FLAGS_rescoring_weight;
        decoder_opts.reverse_weight = FLAGS_reverse_weight;
        decoder_opts.ctc_prefix_search_opts.blank = FLAGS_blank;
        decoder_opts.ctc_prefix_search_opts.first_beam_size = FLAGS_nbest;
        decoder_opts.ctc_prefix_search_opts.second_beam_size = FLAGS_nbest;
        decoder_opts.ctc_prefix_search_opts.word_symbol_table = 
            FLAGS_word_symbol_table;
        decoder_opts.tlg_decoder_opts =
            ppspeech::TLGDecoderOptions::InitFromFlags();

        LOG(INFO) << "chunk_size: " << decoder_opts.chunk_size;
        LOG(INFO) << "num_left_chunks: " << decoder_opts.num_left_chunks;
        LOG(INFO) << "ctc_weight: " << decoder_opts.ctc_weight;
        LOG(INFO) << "rescoring_weight: " << decoder_opts.rescoring_weight;
        LOG(INFO) << "reverse_weight: " << decoder_opts.reverse_weight;
        LOG(INFO) << "blank: " << FLAGS_blank;
        LOG(INFO) << "first_beam_size: " << FLAGS_nbest;
        LOG(INFO) << "second_beam_size: " << FLAGS_nbest;
        return decoder_opts;
    }
};

struct RecognizerResource {
    // decodable opt 
    kaldi::BaseFloat acoustic_scale{1.0};
    kaldi::BaseFloat blank_threshold{0.98};

    FeaturePipelineOptions feature_pipeline_opts{};
    ModelOptions model_opts{};
    DecodeOptions decoder_opts{};
    std::shared_ptr<NnetBase> nnet;

    static RecognizerResource InitFromFlags() {
        RecognizerResource resource;
        resource.acoustic_scale = FLAGS_acoustic_scale;
        resource.blank_threshold = FLAGS_blank_threshold;
        LOG(INFO) << "acoustic_scale: " << resource.acoustic_scale;

        resource.feature_pipeline_opts =
            ppspeech::FeaturePipelineOptions::InitFromFlags();
        resource.feature_pipeline_opts.assembler_opts.fill_zero = false;
        LOG(INFO) << "u2 need fill zero be false: "
                  << resource.feature_pipeline_opts.assembler_opts.fill_zero;
        resource.model_opts = ppspeech::ModelOptions::InitFromFlags();
        resource.decoder_opts = ppspeech::DecodeOptions::InitFromFlags();
        #ifndef USE_ONNX
            resource.nnet.reset(new U2Nnet(resource.model_opts));
        #else
            if (resource.model_opts.with_onnx_model){
                resource.nnet.reset(new U2OnnxNnet(resource.model_opts));
            } else {
                resource.nnet.reset(new U2Nnet(resource.model_opts));
            }
        #endif
        return resource;
    }
};

} //namespace ppspeech

================================================
FILE: runtime/engine/asr/server/CMakeLists.txt
================================================
#add_subdirectory(websocket)


================================================
FILE: runtime/engine/asr/server/websocket/CMakeLists.txt
================================================
add_library(websocket STATIC
  websocket_server.cc
  websocket_client.cc
)
target_link_libraries(websocket PUBLIC frontend nnet decoder recognizer)

add_executable(websocket_server_main ${CMAKE_CURRENT_SOURCE_DIR}/websocket_server_main.cc)
target_include_directories(websocket_server_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(websocket_server_main PUBLIC fst websocket ${DEPS})

add_executable(websocket_client_main ${CMAKE_CURRENT_SOURCE_DIR}/websocket_client_main.cc)
target_include_directories(websocket_client_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(websocket_client_main PUBLIC fst websocket ${DEPS})

================================================
FILE: runtime/engine/asr/server/websocket/websocket_client.cc
================================================
// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
//               2022 PaddlePaddle Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "websocket/websocket_client.h"

#include "boost/json/src.hpp"

namespace json = boost::json;

namespace ppspeech {

WebSocketClient::WebSocketClient(const std::string& host, int port)
    : host_(host), port_(port) {
    Connect();
    t_.reset(new std::thread(&WebSocketClient::ReadLoopFunc, this));
}

void WebSocketClient::Connect() {
    tcp::resolver resolver{ioc_};
    // Look up the domain name
    auto const results = resolver.resolve(host_, std::to_string(port_));
    // Make the connection on the IP address we get from a lookup
    auto ep = asio::connect(ws_.next_layer(), results);
    // Update the host_ string. This will provide the value of the
    // Host HTTP header during the WebSocket handshake.
    // See https://tools.ietf.org/html/rfc7230#section-5.4
    std::string host = host_ + ":" + std::to_string(ep.port());
    // Perform the websocket handshake
    ws_.handshake(host, "/");
}

void WebSocketClient::SendTextData(const std::string& data) {
    ws_.text(true);
    ws_.write(asio::buffer(data));
}

void WebSocketClient::SendBinaryData(const void* data, size_t size) {
    ws_.binary(true);
    ws_.write(asio::buffer(data, size));
}

void WebSocketClient::Close() { ws_.close(websocket::close_code::normal); }

void WebSocketClient::ReadLoopFunc() {
    try {
        while (true) {
            beast::flat_buffer buffer;
            ws_.read(buffer);
            std::string message = beast::buffers_to_string(buffer.data());
            LOG(INFO) << message;
            CHECK(ws_.got_text());
            json::object obj = json::parse(message).as_object();
            if (obj["status"] != "ok") {
                break;
            }
            if (obj["type"] == "final_result") {
                result_ = obj["result"].as_string().c_str();
            }
            if (obj["type"] == "partial_result") {
                partial_result_ = obj["result"].as_string().c_str();
            }
            if (obj["type"] == "speech_end") {
                done_ = true;
                break;
            }
        }
    } catch (beast::system_error const& se) {
        // This indicates that the session was closed
        if (se.code() != websocket::error::closed) {
            LOG(ERROR) << se.code().message();
        }
    } catch (std::exception const& e) {
        LOG(ERROR) << e.what();
    }
}

void WebSocketClient::Join() { t_->join(); }

void WebSocketClient::SendStartSignal() {
    json::value start_tag = {{"signal", "start"}};
    std::string start_message = json::serialize(start_tag);
    this->SendTextData(start_message);
}

void WebSocketClient::SendDataEnd() {
    json::value end_tag = {{"data", "end"}};
    std::string end_message = json::serialize(end_tag);
    this->SendTextData(end_message);
}

void WebSocketClient::SendEndSignal() {
    json::value end_tag = {{"signal", "end"}};
    std::string end_message = json::serialize(end_tag);
    this->SendTextData(end_message);
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/server/websocket/websocket_client.h
================================================
// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
//               2022 PaddlePaddle Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "base/common.h"
#include "boost/asio/connect.hpp"
#include "boost/asio/ip/tcp.hpp"
#include "boost/beast/core.hpp"
#include "boost/beast/websocket.hpp"

namespace beast = boost::beast;          // from <boost/beast.hpp>
namespace http = beast::http;            // from <boost/beast/http.hpp>
namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
namespace asio = boost::asio;            // from <boost/asio.hpp>
using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>

namespace ppspeech {

class WebSocketClient {
  public:
    WebSocketClient(const std::string& host, int port);

    void SendTextData(const std::string& data);
    void SendBinaryData(const void* data, size_t size);
    void ReadLoopFunc();
    void Close();
    void Join();
    void SendStartSignal();
    void SendEndSignal();
    void SendDataEnd();
    bool Done() const { return done_; }
    std::string GetResult() const { return result_; }
    std::string GetPartialResult() const { return partial_result_; }

  private:
    void Connect();
    std::string host_;
    std::string result_;
    std::string partial_result_;
    int port_;
    bool done_ = false;
    asio::io_context ioc_;
    websocket::stream<tcp::socket> ws_{ioc_};
    std::unique_ptr<std::thread> t_{nullptr};
};
}  // namespace ppspeech

================================================
FILE: runtime/engine/asr/server/websocket/websocket_client_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"
#include "websocket/websocket_client.h"

DEFINE_string(host, "127.0.0.1", "host of websocket server");
DEFINE_int32(port, 8082, "port of websocket server");
DEFINE_string(wav_rspecifier, "", "test wav scp path");
DEFINE_double(streaming_chunk, 0.1, "streaming feature chunk size");

using kaldi::int16;
int main(int argc, char* argv[]) {
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);

    kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
        FLAGS_wav_rspecifier);

    const int sample_rate = 16000;
    const float streaming_chunk = FLAGS_streaming_chunk;
    const int chunk_sample_size = streaming_chunk * sample_rate;

    for (; !wav_reader.Done(); wav_reader.Next()) {
        ppspeech::WebSocketClient client(FLAGS_host, FLAGS_port);

        client.SendStartSignal();
        std::string utt = wav_reader.Key();
        const kaldi::WaveData& wave_data = wav_reader.Value();
        CHECK_EQ(wave_data.SampFreq(), sample_rate);

        int32 this_channel = 0;
        kaldi::SubVector<kaldi::BaseFloat> waveform(wave_data.Data(),
                                                    this_channel);
        const int tot_samples = waveform.Dim();
        int sample_offset = 0;

        while (sample_offset < tot_samples) {
            int cur_chunk_size =
                std::min(chunk_sample_size, tot_samples - sample_offset);

            std::vector<int16> wav_chunk(cur_chunk_size);
            for (int i = 0; i < cur_chunk_size; ++i) {
                wav_chunk[i] = static_cast<int16>(waveform(sample_offset + i));
            }
            client.SendBinaryData(wav_chunk.data(),
                                  wav_chunk.size() * sizeof(int16));

            sample_offset += cur_chunk_size;
            LOG(INFO) << "Send " << cur_chunk_size << " samples";
            std::this_thread::sleep_for(
                std::chrono::milliseconds(static_cast<int>(1 * 1000)));

            if (cur_chunk_size < chunk_sample_size) {
                client.SendEndSignal();
            }
        }

        while (!client.Done()) {
        }
        std::string result = client.GetResult();
        LOG(INFO) << "utt: " << utt << " " << result;

        client.Join();
    }

    return 0;
}


================================================
FILE: runtime/engine/asr/server/websocket/websocket_server.cc
================================================
// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
//               2022 PaddlePaddle Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "websocket/websocket_server.h"

#include "base/common.h"
#include "boost/json/src.hpp"

namespace json = boost::json;

namespace ppspeech {

ConnectionHandler::ConnectionHandler(
    tcp::socket&& socket, const RecognizerResource& recognizer_resource)
    : ws_(std::move(socket)), recognizer_resource_(recognizer_resource) {}

void ConnectionHandler::OnSpeechStart() {
    recognizer_ = std::make_shared<Recognizer>(recognizer_resource_);
    // Start decoder thread
    decode_thread_ = std::make_shared<std::thread>(
        &ConnectionHandler::DecodeThreadFunc, this);
    got_start_tag_ = true;
    LOG(INFO) << "Server: Received speech start signal, start reading speech";
    json::value rv = {{"status", "ok"}, {"type", "server_ready"}};
    ws_.text(true);
    ws_.write(asio::buffer(json::serialize(rv)));
}

void ConnectionHandler::OnSpeechEnd() {
    LOG(INFO) << "Server: Received speech end signal";
    if (recognizer_ != nullptr) {
        recognizer_->SetFinished();
    }
    got_end_tag_ = true;
}

void ConnectionHandler::OnFinalResult(const std::string& result) {
    LOG(INFO) << "Server: Final result: " << result;
    json::value rv = {
        {"status", "ok"}, {"type", "final_result"}, {"result", result}};
    ws_.text(true);
    ws_.write(asio::buffer(json::serialize(rv)));
}

void ConnectionHandler::OnFinish() {
    // Send finish tag
    json::value rv = {{"status", "ok"}, {"type", "speech_end"}};
    ws_.text(true);
    ws_.write(asio::buffer(json::serialize(rv)));
}

void ConnectionHandler::OnSpeechData(const beast::flat_buffer& buffer) {
    // Read binary PCM data
    int num_samples = buffer.size() / sizeof(int16_t);
    kaldi::Vector<kaldi::BaseFloat> pcm_data(num_samples);
    const int16_t* pdata = static_cast<const int16_t*>(buffer.data().data());
    for (int i = 0; i < num_samples; i++) {
        pcm_data(i) = static_cast<float>(*pdata);
        pdata++;
    }
    VLOG(2) << "Server: Received " << num_samples << " samples";
    LOG(INFO) << "Server: Received " << num_samples << " samples";
    CHECK(recognizer_ != nullptr);
    recognizer_->Accept(pcm_data);

    std::string partial_result = recognizer_->GetPartialResult();

    json::value rv = {{"status", "ok"},
                      {"type", "partial_result"},
                      {"result", partial_result}};
    ws_.text(true);
    ws_.write(asio::buffer(json::serialize(rv)));
}

void ConnectionHandler::DecodeThreadFunc() {
    try {
        while (true) {
            recognizer_->Decode();
            if (recognizer_->IsFinished()) {
                LOG(INFO) << "Server: enter finish";
                recognizer_->Decode();
                LOG(INFO) << "Server: finish";
                std::string result = recognizer_->GetFinalResult();
                OnFinalResult(result);
                OnFinish();
                stop_recognition_ = true;
                break;
            }
        }
    } catch (std::exception const& e) {
        LOG(ERROR) << e.what();
    }
}

void ConnectionHandler::OnError(const std::string& message) {
    json::value rv = {{"status", "failed"}, {"message", message}};
    ws_.text(true);
    ws_.write(asio::buffer(json::serialize(rv)));
    // Close websocket
    ws_.close(websocket::close_code::normal);
}

void ConnectionHandler::OnText(const std::string& message) {
    json::value v = json::parse(message);
    if (v.is_object()) {
        json::object obj = v.get_object();
        if (obj.find("signal") != obj.end()) {
            json::string signal = obj["signal"].as_string();
            if (signal == "start") {
                OnSpeechStart();
            } else if (signal == "end") {
                OnSpeechEnd();
            } else {
                OnError("Unexpected signal type");
            }
        } else {
            OnError("Wrong message header");
        }
    } else {
        OnError("Wrong protocol");
    }
}

void ConnectionHandler::operator()() {
    try {
        // Accept the websocket handshake
        ws_.accept();
        for (;;) {
            // This buffer will hold the incoming message
            beast::flat_buffer buffer;
            // Read a message
            ws_.read(buffer);
            if (ws_.got_text()) {
                std::string message = beast::buffers_to_string(buffer.data());
                LOG(INFO) << "Server: Text: " << message;
                OnText(message);
                if (got_end_tag_) {
                    break;
                }
            } else {
                if (!got_start_tag_) {
                    OnError("Start signal is expected before binary data");
                } else {
                    if (stop_recognition_) {
                        break;
                    }
                    OnSpeechData(buffer);
                }
            }
        }

        LOG(INFO) << "Server: finished to wait for decoding thread join.";
        if (decode_thread_ != nullptr) {
            decode_thread_->join();
        }
    } catch (beast::system_error const& se) {
        // This indicates that the session was closed
        if (se.code() != websocket::error::closed) {
            if (decode_thread_ != nullptr) {
                decode_thread_->join();
            }
            OnSpeechEnd();
            LOG(ERROR) << se.code().message();
        }
    } catch (std::exception const& e) {
        LOG(ERROR) << e.what();
    }
}

void WebSocketServer::Start() {
    try {
        auto const address = asio::ip::make_address("0.0.0.0");
        tcp::acceptor acceptor{ioc_, {address, static_cast<uint16_t>(port_)}};
        for (;;) {
            // This will receive the new connection
            tcp::socket socket{ioc_};
            // Block until we get a connection
            acceptor.accept(socket);
            // Launch the session, transferring ownership of the socket
            ConnectionHandler handler(std::move(socket), recognizer_resource_);
            std::thread t(std::move(handler));
            t.detach();
        }
    } catch (const std::exception& e) {
        LOG(FATAL) << e.what();
    }
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/server/websocket/websocket_server.h
================================================
// Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
//               2022 PaddlePaddle Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"
#include "boost/asio/connect.hpp"
#include "boost/asio/ip/tcp.hpp"
#include "boost/beast/core.hpp"
#include "boost/beast/websocket.hpp"
#include "frontend/audio/feature_pipeline.h"
#include "recognizer/recognizer.h"

namespace beast = boost::beast;          // from <boost/beast.hpp>
namespace http = beast::http;            // from <boost/beast/http.hpp>
namespace websocket = beast::websocket;  // from <boost/beast/websocket.hpp>
namespace asio = boost::asio;            // from <boost/asio.hpp>
using tcp = boost::asio::ip::tcp;        // from <boost/asio/ip/tcp.hpp>

namespace ppspeech {
class ConnectionHandler {
  public:
    ConnectionHandler(tcp::socket&& socket,
                      const RecognizerResource& recognizer_resource_);
    void operator()();

  private:
    void OnSpeechStart();
    void OnSpeechEnd();
    void OnText(const std::string& message);
    void OnFinish();
    void OnSpeechData(const beast::flat_buffer& buffer);
    void OnError(const std::string& message);
    void OnFinalResult(const std::string& result);
    void DecodeThreadFunc();
    std::string SerializeResult(bool finish);

    bool continuous_decoding_ = false;
    int nbest_ = 1;
    websocket::stream<tcp::socket> ws_;
    RecognizerResource recognizer_resource_;

    bool got_start_tag_ = false;
    bool got_end_tag_ = false;
    // When endpoint is detected, stop recognition, and stop receiving data.
    bool stop_recognition_ = false;
    std::shared_ptr<ppspeech::Recognizer> recognizer_ = nullptr;
    std::shared_ptr<std::thread> decode_thread_ = nullptr;
};

class WebSocketServer {
  public:
    WebSocketServer(int port, const RecognizerResource& recognizer_resource)
        : port_(port), recognizer_resource_(recognizer_resource) {}

    void Start();

  private:
    int port_;
    RecognizerResource recognizer_resource_;
    // The io_context is required for all I/O
    asio::io_context ioc_{1};
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/asr/server/websocket/websocket_server_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "decoder/param.h"
#include "websocket/websocket_server.h"

DEFINE_int32(port, 8082, "websocket listening port");

ppspeech::RecognizerResource InitRecognizerResoure() {
    ppspeech::RecognizerResource resource;
    resource.acoustic_scale = FLAGS_acoustic_scale;
    resource.feature_pipeline_opts =
        ppspeech::FeaturePipelineOptions::InitFromFlags();
    resource.model_opts = ppspeech::ModelOptions::InitFromFlags();
    resource.tlg_opts = ppspeech::TLGDecoderOptions::InitFromFlags();
    return resource;
}

int main(int argc, char *argv[]) {
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);

    ppspeech::RecognizerResource resource = InitRecognizerResoure();

    ppspeech::WebSocketServer server(FLAGS_port, resource);
    LOG(INFO) << "Listening at port " << FLAGS_port;
    server.Start();
    return 0;
}


================================================
FILE: runtime/engine/audio_classification/CMakeLists.txt
================================================
# add_definitions("-DUSE_PADDLE_INFERENCE_BACKEND")
add_definitions("-DUSE_ORT_BACKEND")
add_subdirectory(nnet)

================================================
FILE: runtime/engine/audio_classification/nnet/CMakeLists.txt
================================================
set(srcs 
    panns_nnet.cc 
    panns_interface.cc
)

add_library(cls SHARED ${srcs})
target_link_libraries(cls PRIVATE ${FASTDEPLOY_LIBS} kaldi-matrix kaldi-base frontend utils )

set(bin_name panns_nnet_main)
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
target_link_libraries(${bin_name} gflags glog cls)


================================================
FILE: runtime/engine/audio_classification/nnet/panns_interface.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "audio_classification/nnet/panns_interface.h"

#include "audio_classification/nnet/panns_nnet.h"
#include "common/base/config.h"

namespace ppspeech {

void* ClsCreateInstance(const char* conf_path) {
    Config conf(conf_path);
    // cls init
    ppspeech::ClsNnetConf cls_nnet_conf;
    cls_nnet_conf.wav_normal_ = conf.Read("wav_normal", true);
    cls_nnet_conf.wav_normal_type_ =
        conf.Read("wav_normal_type", std::string("linear"));
    cls_nnet_conf.wav_norm_mul_factor_ = conf.Read("wav_norm_mul_factor", 1.0);
    cls_nnet_conf.model_file_path_ = conf.Read("model_path", std::string(""));
    cls_nnet_conf.param_file_path_ = conf.Read("param_path", std::string(""));
    cls_nnet_conf.dict_file_path_ = conf.Read("dict_path", std::string(""));
    cls_nnet_conf.num_cpu_thread_ = conf.Read("num_cpu_thread", 12);
    cls_nnet_conf.samp_freq = conf.Read("samp_freq", 32000);
    cls_nnet_conf.frame_length_ms = conf.Read("frame_length_ms", 32);
    cls_nnet_conf.frame_shift_ms = conf.Read("frame_shift_ms", 10);
    cls_nnet_conf.num_bins = conf.Read("num_bins", 64);
    cls_nnet_conf.low_freq = conf.Read("low_freq", 50);
    cls_nnet_conf.high_freq = conf.Read("high_freq", 14000);
    cls_nnet_conf.dither = conf.Read("dither", 0.0);

    ppspeech::ClsNnet* cls_model = new ppspeech::ClsNnet();
    int ret = cls_model->Init(cls_nnet_conf);
    return static_cast<void*>(cls_model);
}

int ClsDestroyInstance(void* instance) {
    ppspeech::ClsNnet* cls_model = static_cast<ppspeech::ClsNnet*>(instance);
    if (cls_model != NULL) {
        delete cls_model;
        cls_model = NULL;
    }
    return 0;
}

int ClsFeedForward(void* instance,
                   const char* wav_path,
                   int topk,
                   char* result,
                   int result_max_len) {
    ppspeech::ClsNnet* cls_model = static_cast<ppspeech::ClsNnet*>(instance);
    if (cls_model == NULL) {
        printf("instance is null\n");
        return -1;
    }
    int ret = cls_model->Forward(wav_path, topk, result, result_max_len);
    return 0;
}

int ClsReset(void* instance) {
    ppspeech::ClsNnet* cls_model = static_cast<ppspeech::ClsNnet*>(instance);
    if (cls_model == NULL) {
        printf("instance is null\n");
        return -1;
    }
    cls_model->Reset();
    return 0;
}
}  // namespace ppspeech

================================================
FILE: runtime/engine/audio_classification/nnet/panns_interface.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

namespace ppspeech {

void* ClsCreateInstance(const char* conf_path);
int ClsDestroyInstance(void* instance);
int ClsFeedForward(void* instance,
                   const char* wav_path,
                   int topk,
                   char* result,
                   int result_max_len);
int ClsReset(void* instance);
}  // namespace ppspeech

================================================
FILE: runtime/engine/audio_classification/nnet/panns_nnet.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "audio_classification/nnet/panns_nnet.h"
#ifdef WITH_PROFILING
#include "kaldi/base/timer.h"
#endif

namespace ppspeech {

ClsNnet::ClsNnet() {
    // wav_reader_ = NULL;
    runtime_ = NULL;
}

void ClsNnet::Reset() {
    // wav_reader_->Clear();
    ss_.str("");
}

int ClsNnet::Init(const ClsNnetConf& conf) {
    conf_ = conf;
    // init fbank opts
    fbank_opts_.frame_opts.samp_freq = conf.samp_freq;
    fbank_opts_.frame_opts.frame_length_ms = conf.frame_length_ms;
    fbank_opts_.frame_opts.frame_shift_ms = conf.frame_shift_ms;
    fbank_opts_.mel_opts.num_bins = conf.num_bins;
    fbank_opts_.mel_opts.low_freq = conf.low_freq;
    fbank_opts_.mel_opts.high_freq = conf.high_freq;
    fbank_opts_.frame_opts.dither = conf.dither;
    fbank_opts_.use_log_fbank = false;

    // init dict
    if (conf.dict_file_path_ != "") {
        ReadFileToVector(conf.dict_file_path_, &dict_);
    }

    // init model
    fastdeploy::RuntimeOption runtime_option;

#ifdef USE_PADDLE_INFERENCE_BACKEND
    runtime_option.SetModelPath(conf.model_file_path_,
                                conf.param_file_path_,
                                fastdeploy::ModelFormat::PADDLE);
    runtime_option.UsePaddleInferBackend();
#elif defined(USE_ORT_BACKEND)
    runtime_option.SetModelPath(
        conf.model_file_path_, "", fastdeploy::ModelFormat::ONNX);  // onnx
    runtime_option.UseOrtBackend();                                 // onnx
#elif defined(USE_PADDLE_LITE_BACKEND)
    runtime_option.SetModelPath(conf.model_file_path_,
                                conf.param_file_path_,
                                fastdeploy::ModelFormat::PADDLE);
    runtime_option.UseLiteBackend();
#endif

    runtime_option.SetCpuThreadNum(conf.num_cpu_thread_);
    // runtime_option.DeletePaddleBackendPass("simplify_with_basic_ops_pass");
    runtime_ = std::unique_ptr<fastdeploy::Runtime>(new fastdeploy::Runtime());
    if (!runtime_->Init(runtime_option)) {
        std::cerr << "--- Init FastDeploy Runitme Failed! "
                  << "\n--- Model:  " << conf.model_file_path_ << std::endl;
        return -1;
    } else {
        std::cout << "--- Init FastDeploy Runitme Done! "
                  << "\n--- Model:  " << conf.model_file_path_ << std::endl;
    }

    Reset();
    return 0;
}

int ClsNnet::Forward(const char* wav_path,
                     int topk,
                     char* result,
                     int result_max_len) {
#ifdef WITH_PROFILING
    kaldi::Timer timer;
    timer.Reset();
#endif
    // read wav
    std::ifstream infile(wav_path, std::ifstream::in);
    kaldi::WaveData wave_data;
    wave_data.Read(infile);
    int32 this_channel = 0;
    kaldi::Matrix<float> wavform_kaldi = wave_data.Data();
    // only get channel 0
    int wavform_len = wavform_kaldi.NumCols();
    std::vector<float> wavform(wavform_kaldi.Data(),
                               wavform_kaldi.Data() + wavform_len);
    WaveformFloatNormal(&wavform);
    WaveformNormal(&wavform,
                   conf_.wav_normal_,
                   conf_.wav_normal_type_,
                   conf_.wav_norm_mul_factor_);
#ifdef PPS_DEBUG
    {
        std::ofstream fp("cls.wavform", std::ios::out);
        for (int i = 0; i < wavform.size(); ++i) {
            fp << std::setprecision(18) << wavform[i] << " ";
        }
        fp << "\n";
    }
#endif
#ifdef WITH_PROFILING
    printf("wav read consume: %fs\n", timer.Elapsed());
#endif

#ifdef WITH_PROFILING
    timer.Reset();
#endif

    std::vector<float> feats;
    std::unique_ptr<ppspeech::FrontendInterface> data_source(
        new ppspeech::DataCache());
    ppspeech::Fbank fbank(fbank_opts_, std::move(data_source));
    fbank.Accept(wavform);
    fbank.SetFinished();
    fbank.Read(&feats);

    int feat_dim = fbank_opts_.mel_opts.num_bins;
    int num_frames = feats.size() / feat_dim;

    for (int i = 0; i < num_frames; ++i) {
        for (int j = 0; j < feat_dim; ++j) {
            feats[i * feat_dim + j] = PowerTodb(feats[i * feat_dim + j]);
        }
    }
#ifdef PPS_DEBUG
    {
        std::ofstream fp("cls.feat", std::ios::out);
        for (int i = 0; i < num_frames; ++i) {
            for (int j = 0; j < feat_dim; ++j) {
                fp << std::setprecision(18) << feats[i * feat_dim + j] << " ";
            }
            fp << "\n";
        }
    }
#endif
#ifdef WITH_PROFILING
    printf("extract fbank consume: %fs\n", timer.Elapsed());
#endif

    // infer
    std::vector<float> model_out;
#ifdef WITH_PROFILING
    timer.Reset();
#endif
    ModelForward(feats.data(), num_frames, feat_dim, &model_out);
#ifdef WITH_PROFILING
    printf("fast deploy infer consume: %fs\n", timer.Elapsed());
#endif
#ifdef PPS_DEBUG
    {
        std::ofstream fp("cls.logits", std::ios::out);
        for (int i = 0; i < model_out.size(); ++i) {
            fp << std::setprecision(18) << model_out[i] << "\n";
        }
    }
#endif

    // construct result str
    ss_ << "{";
    GetTopkResult(topk, model_out);
    ss_ << "}";

    if (result_max_len <= ss_.str().size()) {
        printf("result_max_len is short than result len\n");
    }
    snprintf(result, result_max_len, "%s", ss_.str().c_str());
    return 0;
}

int ClsNnet::ModelForward(float* features,
                          const int num_frames,
                          const int feat_dim,
                          std::vector<float>* model_out) {
    // init input tensor shape
    fastdeploy::TensorInfo info = runtime_->GetInputInfo(0);
    info.shape = {1, num_frames, feat_dim};

    std::vector<fastdeploy::FDTensor> input_tensors(1);
    std::vector<fastdeploy::FDTensor> output_tensors(1);

    input_tensors[0].SetExternalData({1, num_frames, feat_dim},
                                     fastdeploy::FDDataType::FP32,
                                     static_cast<void*>(features));

    // get input name
    input_tensors[0].name = info.name;

    runtime_->Infer(input_tensors, &output_tensors);

    // output_tensors[0].PrintInfo();
    std::vector<int64_t> output_shape = output_tensors[0].Shape();
    model_out->resize(output_shape[0] * output_shape[1]);
    memcpy(static_cast<void*>(model_out->data()),
           output_tensors[0].Data(),
           output_shape[0] * output_shape[1] * sizeof(float));
    return 0;
}

int ClsNnet::GetTopkResult(int k, const std::vector<float>& model_out) {
    std::vector<float> values;
    std::vector<int> indics;
    TopK(model_out, k, &values, &indics);
    for (int i = 0; i < k; ++i) {
        if (i != 0) {
            ss_ << ",";
        }
        ss_ << "\"" << dict_[indics[i]] << "\":\"" << values[i] << "\"";
    }
    return 0;
}

}  // namespace ppspeech

================================================
FILE: runtime/engine/audio_classification/nnet/panns_nnet.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "common/frontend/data_cache.h"
#include "common/frontend/fbank.h"
#include "common/frontend/feature-fbank.h"
#include "common/frontend/frontend_itf.h"
#include "common/frontend/wave-reader.h"
#include "common/utils/audio_process.h"
#include "common/utils/file_utils.h"
#include "fastdeploy/runtime.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"

namespace ppspeech {
struct ClsNnetConf {
    // wav
    bool wav_normal_;
    std::string wav_normal_type_;
    float wav_norm_mul_factor_;
    // model
    std::string model_file_path_;
    std::string param_file_path_;
    std::string dict_file_path_;
    int num_cpu_thread_;
    // fbank
    float samp_freq;
    float frame_length_ms;
    float frame_shift_ms;
    int num_bins;
    float low_freq;
    float high_freq;
    float dither;
};

class ClsNnet {
  public:
    ClsNnet();
    int Init(const ClsNnetConf& conf);
    int Forward(const char* wav_path,
                int topk,
                char* result,
                int result_max_len);
    void Reset();

  private:
    int ModelForward(float* features,
                     const int num_frames,
                     const int feat_dim,
                     std::vector<float>* model_out);
    int ModelForwardStream(std::vector<float>* feats);
    int GetTopkResult(int k, const std::vector<float>& model_out);

    ClsNnetConf conf_;
    knf::FbankOptions fbank_opts_;
    std::unique_ptr<fastdeploy::Runtime> runtime_;
    std::vector<std::string> dict_;
    std::stringstream ss_;
};

}  // namespace ppspeech

================================================
FILE: runtime/engine/audio_classification/nnet/panns_nnet_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <fstream>
#include <string>

#include "gflags/gflags.h"
#include "glog/logging.h"
#include "audio_classification/nnet/panns_interface.h"

DEFINE_string(conf_path, "", "config path");
DEFINE_string(scp_path, "", "wav scp path");
DEFINE_string(topk, "", "print topk results");

int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
    google::InstallFailureSignalHandler();
    FLAGS_logtostderr = 1;
    CHECK_GT(FLAGS_conf_path.size(), 0);
    CHECK_GT(FLAGS_scp_path.size(), 0);
    CHECK_GT(FLAGS_topk.size(), 0);
    void* instance = ppspeech::ClsCreateInstance(FLAGS_conf_path.c_str());
    int ret = 0;
    // read wav
    std::ifstream ifs(FLAGS_scp_path);
    std::string line = "";
    int topk = std::atoi(FLAGS_topk.c_str());
    while (getline(ifs, line)) {
        // read wav
        char result[1024] = {0};
        ret = ppspeech::ClsFeedForward(
            instance, line.c_str(), topk, result, 1024);
        printf("%s %s\n", line.c_str(), result);
        ret = ppspeech::ClsReset(instance);
    }
    ret = ppspeech::ClsDestroyInstance(instance);
    return 0;
}


================================================
FILE: runtime/engine/codelab/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

if(ANDROID)
else() #Unix
    add_subdirectory(glog)
endif()

================================================
FILE: runtime/engine/codelab/README.md
================================================

## For Developer  

> Reminder: Only for developer.

* codelab - for speechx developer, using for test.


================================================
FILE: runtime/engine/common/CMakeLists.txt
================================================
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../
)
add_subdirectory(base)
add_subdirectory(utils)
add_subdirectory(matrix)

include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/frontend
)
add_subdirectory(frontend)

add_library(common INTERFACE)
target_link_libraries(common  INTERFACE base utils kaldi-matrix frontend)
install(TARGETS base DESTINATION lib)
install(TARGETS utils DESTINATION lib)
install(TARGETS kaldi-matrix DESTINATION lib)
install(TARGETS frontend DESTINATION lib)

================================================
FILE: runtime/engine/common/base/CMakeLists.txt
================================================


if(WITH_ASR)
  add_compile_options(-DWITH_ASR)
  set(PPS_FLAGS_LIB "fst/flags.h")
else()
  set(PPS_FLAGS_LIB "gflags/gflags.h")
endif()

if(ANDROID)
  set(PPS_GLOG_LIB "base/log_impl.h")
else() #UNIX
  if(WITH_ASR)
    set(PPS_GLOG_LIB "fst/log.h")
  else()
    set(PPS_GLOG_LIB "glog/logging.h")
  endif()
endif()

configure_file(
    ${CMAKE_CURRENT_SOURCE_DIR}/flags.h.in
    ${CMAKE_CURRENT_SOURCE_DIR}/flags.h @ONLY
  )
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/flags.h")

configure_file(
    ${CMAKE_CURRENT_SOURCE_DIR}/log.h.in
    ${CMAKE_CURRENT_SOURCE_DIR}/log.h @ONLY
  )
message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/log.h")


if(ANDROID)
  set(csrc
    log_impl.cc
    glog_utils.cc
  )
  add_library(base ${csrc})
  target_link_libraries(base gflags)
else() # UNIX
  set(csrc)
  add_library(base INTERFACE)
endif()

================================================
FILE: runtime/engine/common/base/basic_types.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <limits>

#include "kaldi/base/kaldi-types.h"

typedef float BaseFloat;
typedef double double64;

typedef signed char int8;
typedef short int16;  // NOLINT
typedef int int32;    // NOLINT

#if defined(__LP64__) && !defined(OS_MACOSX) && !defined(OS_OPENBSD)
typedef long int64;  // NOLINT
#else
typedef long long int64;            // NOLINT
#endif

typedef unsigned char uint8;    // NOLINT
typedef unsigned short uint16;  // NOLINT
typedef unsigned int uint32;    // NOLINT

#if defined(__LP64__) && !defined(OS_MACOSX) && !defined(OS_OPENBSD)
typedef unsigned long uint64;  // NOLINT
#else
typedef unsigned long long uint64;  // NOLINT
#endif

typedef signed int char32;

const uint8 kuint8max = static_cast<uint8>(0xFF);
const uint16 kuint16max = static_cast<uint16>(0xFFFF);
const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF);
const uint64 kuint64max = static_cast<uint64>(0xFFFFFFFFFFFFFFFFLL);
const int8 kint8min = static_cast<int8>(0x80);
const int8 kint8max = static_cast<int8>(0x7F);
const int16 kint16min = static_cast<int16>(0x8000);
const int16 kint16max = static_cast<int16>(0x7FFF);
const int32 kint32min = static_cast<int32>(0x80000000);
const int32 kint32max = static_cast<int32>(0x7FFFFFFF);
const int64 kint64min = static_cast<int64>(0x8000000000000000LL);
const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);

const BaseFloat kBaseFloatMax = std::numeric_limits<BaseFloat>::max();
const BaseFloat kBaseFloatMin = std::numeric_limits<BaseFloat>::min();


================================================
FILE: runtime/engine/common/base/common.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <algorithm>
#include <cassert>
#include <cmath>
#include <condition_variable>
#include <cstring>
#include <deque>
#include <fstream>
#include <functional>
#include <future>
#include <iomanip>
#include <iostream>
#include <istream>
#include <map>
#include <memory>
#include <mutex>
#include <numeric>
#include <ostream>
#include <queue>
#include <set>
#include <sstream>
#include <stack>
#include <stdexcept>
#include <string>
#include <thread>
#include <tuple>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#include "base/basic_types.h"
#include "base/flags.h"
#include "base/log.h"
#include "base/macros.h"
#include "utils/file_utils.h"
#include "utils/math.h"
#include "utils/timer.h"

================================================
FILE: runtime/engine/common/base/config.h
================================================
// Copyright (c) code is from
// https://blog.csdn.net/huixingshao/article/details/45969887.

#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
using namespace std;

#pragma once

#ifdef _MSC_VER
#pragma region ParseIniFile
#endif

/*
 * \brief Generic configuration Class
 *
 */
class Config {
    // Data
  protected:
    std::string m_Delimiter;  //!< separator between key and value
    std::string m_Comment;    //!< separator between value and comments
    std::map<std::string, std::string>
        m_Contents;  //!< extracted keys and values

    typedef std::map<std::string, std::string>::iterator mapi;
    typedef std::map<std::string, std::string>::const_iterator mapci;
    // Methods
  public:
    Config(std::string filename,
           std::string delimiter = "=",
           std::string comment = "#");
    Config();
    template <class T>
    T Read(const std::string& in_key) const;  //!< Search for key and read value
    //! or optional default value, call
    //! as read<T>
    template <class T>
    T Read(const std::string& in_key, const T& in_value) const;
    template <class T>
    bool ReadInto(T* out_var, const std::string& in_key) const;
    template <class T>
    bool ReadInto(T* out_var,
                  const std::string& in_key,
                  const T& in_value) const;
    bool FileExist(std::string filename);
    void ReadFile(std::string filename,
                  std::string delimiter = "=",
                  std::string comment = "#");

    // Check whether key exists in configuration
    bool KeyExists(const std::string& in_key) const;

    // Modify keys and values
    template <class T>
    void Add(const std::string& in_key, const T& in_value);
    void Remove(const std::string& in_key);

    // Check or change configuration syntax
    std::string GetDelimiter() const { return m_Delimiter; }
    std::string GetComment() const { return m_Comment; }
    std::string SetDelimiter(const std::string& in_s) {
        std::string old = m_Delimiter;
        m_Delimiter = in_s;
        return old;
    }
    std::string SetComment(const std::string& in_s) {
        std::string old = m_Comment;
        m_Comment = in_s;
        return old;
    }

    // Write or read configuration
    friend std::ostream& operator<<(std::ostream& os, const Config& cf);
    friend std::istream& operator>>(std::istream& is, Config& cf);

  protected:
    template <class T>
    static std::string T_as_string(const T& t);
    template <class T>
    static T string_as_T(const std::string& s);
    static void Trim(std::string* inout_s);


    // Exception types
  public:
    struct File_not_found {
        std::string filename;
        explicit File_not_found(const std::string& filename_ = std::string())
            : filename(filename_) {}
    };
    struct Key_not_found {  // thrown only by T read(key) variant of read()
        std::string key;
        explicit Key_not_found(const std::string& key_ = std::string())
            : key(key_) {}
    };
};

/* static */
template <class T>
std::string Config::T_as_string(const T& t) {
    // Convert from a T to a string
    // Type T must support << operator
    std::ostringstream ost;
    ost << t;
    return ost.str();
}


/* static */
template <class T>
T Config::string_as_T(const std::string& s) {
    // Convert from a string to a T
    // Type T must support >> operator
    T t;
    std::istringstream ist(s);
    ist >> t;
    return t;
}


/* static */
template <>
inline std::string Config::string_as_T<std::string>(const std::string& s) {
    // Convert from a string to a string
    // In other words, do nothing
    return s;
}


/* static */
template <>
inline bool Config::string_as_T<bool>(const std::string& s) {
    // Convert from a string to a bool
    // Interpret "false", "F", "no", "n", "0" as false
    // Interpret "true", "T", "yes", "y", "1", "-1", or anything else as true
    bool b = true;
    std::string sup = s;
    for (std::string::iterator p = sup.begin(); p != sup.end(); ++p)
        *p = toupper(*p);  // make string all caps
    if (sup == std::string("FALSE") || sup == std::string("F") ||
        sup == std::string("NO") || sup == std::string("N") ||
        sup == std::string("0") || sup == std::string("NONE"))
        b = false;
    return b;
}


template <class T>
T Config::Read(const std::string& key) const {
    // Read the value corresponding to key
    mapci p = m_Contents.find(key);
    if (p == m_Contents.end()) throw Key_not_found(key);
    return string_as_T<T>(p->second);
}


template <class T>
T Config::Read(const std::string& key, const T& value) const {
    // Return the value corresponding to key or given default value
    // if key is not found
    mapci p = m_Contents.find(key);
    if (p == m_Contents.end()) {
        printf("%s = %s(default)\n", key.c_str(), T_as_string(value).c_str());
        return value;
    } else {
        printf("%s = %s\n", key.c_str(), T_as_string(p->second).c_str());
        return string_as_T<T>(p->second);
    }
}


template <class T>
bool Config::ReadInto(T* var, const std::string& key) const {
    // Get the value corresponding to key and store in var
    // Return true if key is found
    // Otherwise leave var untouched
    mapci p = m_Contents.find(key);
    bool found = (p != m_Contents.end());
    if (found) *var = string_as_T<T>(p->second);
    return found;
}


template <class T>
bool Config::ReadInto(T* var, const std::string& key, const T& value) const {
    // Get the value corresponding to key and store in var
    // Return true if key is found
    // Otherwise set var to given default
    mapci p = m_Contents.find(key);
    bool found = (p != m_Contents.end());
    if (found)
        *var = string_as_T<T>(p->second);
    else
        var = value;
    return found;
}


template <class T>
void Config::Add(const std::string& in_key, const T& value) {
    // Add a key with given value
    std::string v = T_as_string(value);
    std::string key = in_key;
    Trim(&key);
    Trim(&v);
    m_Contents[key] = v;
    return;
}

Config::Config(string filename, string delimiter, string comment)
    : m_Delimiter(delimiter), m_Comment(comment) {
    // Construct a Config, getting keys and values from given file

    std::ifstream in(filename.c_str());

    if (!in) throw File_not_found(filename);

    in >> (*this);
}


Config::Config() : m_Delimiter(string(1, '=')), m_Comment(string(1, '#')) {
    // Construct a Config without a file; empty
}


bool Config::KeyExists(const string& key) const {
    // Indicate whether key is found
    mapci p = m_Contents.find(key);
    return (p != m_Contents.end());
}


/* static */
void Config::Trim(string* inout_s) {
    // Remove leading and trailing whitespace
    static const char whitespace[] = " \n\t\v\r\f";
    inout_s->erase(0, inout_s->find_first_not_of(whitespace));
    inout_s->erase(inout_s->find_last_not_of(whitespace) + 1U);
}


std::ostream& operator<<(std::ostream& os, const Config& cf) {
    // Save a Config to os
    for (Config::mapci p = cf.m_Contents.begin(); p != cf.m_Contents.end();
         ++p) {
        os << p->first << " " << cf.m_Delimiter << " ";
        os << p->second << std::endl;
    }
    return os;
}

void Config::Remove(const string& key) {
    // Remove key and its value
    m_Contents.erase(m_Contents.find(key));
    return;
}

std::istream& operator>>(std::istream& is, Config& cf) {
    // Load a Config from is
    // Read in keys and values, keeping internal whitespace
    typedef string::size_type pos;
    const string& delim = cf.m_Delimiter;  // separator
    const string& comm = cf.m_Comment;     // comment
    const pos skip = delim.length();       // length of separator

    string nextline = "";  // might need to read ahead to see where value ends

    while (is || nextline.length() > 0) {
        // Read an entire line at a time
        string line;
        if (nextline.length() > 0) {
            line = nextline;  // we read ahead; use it now
            nextline = "";
        } else {
            std::getline(is, line);
        }

        // Ignore comments
        line = line.substr(0, line.find(comm));

        // Parse the line if it contains a delimiter
        pos delimPos = line.find(delim);
        if (delimPos < string::npos) {
            // Extract the key
            string key = line.substr(0, delimPos);
            line.replace(0, delimPos + skip, "");

            // See if value continues on the next line
            // Stop at blank line, next line with a key, end of stream,
            // or end of file sentry
            bool terminate = false;
            while (!terminate && is) {
                std::getline(is, nextline);
                terminate = true;

                string nlcopy = nextline;
                Config::Trim(&nlcopy);
                if (nlcopy == "") continue;

                nextline = nextline.substr(0, nextline.find(comm));
                if (nextline.find(delim) != string::npos) continue;

                nlcopy = nextline;
                Config::Trim(&nlcopy);
                if (nlcopy != "") line += "\n";
                line += nextline;
                terminate = false;
            }

            // Store key and value
            Config::Trim(&key);
            Config::Trim(&line);
            cf.m_Contents[key] = line;  // overwrites if key is repeated
        }
    }

    return is;
}
bool Config::FileExist(std::string filename) {
    bool exist = false;
    std::ifstream in(filename.c_str());
    if (in) exist = true;
    return exist;
}

void Config::ReadFile(string filename, string delimiter, string comment) {
    m_Delimiter = delimiter;
    m_Comment = comment;
    std::ifstream in(filename.c_str());

    if (!in) throw File_not_found(filename);

    in >> (*this);
}

#ifdef _MSC_VER
#pragma endregion ParseIniFIle
#endif


================================================
FILE: runtime/engine/common/base/flags.h.in
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "@PPS_FLAGS_LIB@"


================================================
FILE: runtime/engine/common/base/glog_utils.cc
================================================

#include "base/glog_utils.h"

namespace google {
void InitGoogleLogging(const char* name) {
    LOG(INFO) << "dummpy InitGoogleLogging.";
}

void InstallFailureSignalHandler() {
    LOG(INFO) << "dummpy InstallFailureSignalHandler.";
}
}  // namespace google


================================================
FILE: runtime/engine/common/base/glog_utils.h
================================================
#pragma once

#include "base/common.h"

namespace google {
void InitGoogleLogging(const char* name);

void InstallFailureSignalHandler();
}  // namespace google

================================================
FILE: runtime/engine/common/base/log.h.in
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "@PPS_GLOG_LIB@"


================================================
FILE: runtime/engine/common/base/log_impl.cc
================================================
#include "base/log.h"

DEFINE_int32(logtostderr, 0, "logging to stderr");

namespace ppspeech {

static char __progname[] = "paddlespeech";

namespace log {

std::mutex LogMessage::lock_;
std::string LogMessage::s_debug_logfile_("");
std::string LogMessage::s_info_logfile_("");
std::string LogMessage::s_warning_logfile_("");
std::string LogMessage::s_error_logfile_("");
std::string LogMessage::s_fatal_logfile_("");

void LogMessage::get_curr_proc_info(std::string* pid, std::string* proc_name) {
    std::stringstream ss;
    ss << getpid();
    ss >> *pid;
    *proc_name = ::ppspeech::__progname;
}

LogMessage::LogMessage(const char* file,
                       int line,
                       Severity level,
                       bool verbose,
                       bool out_to_file /* = false */)
    : level_(level), verbose_(verbose), out_to_file_(out_to_file) {
    if (FLAGS_logtostderr == 0) {
        stream_ = static_cast<std::ostream*>(&std::cout);
    } else if (FLAGS_logtostderr == 1) {
        stream_ = static_cast<std::ostream*>(&std::cerr);
    } else if (out_to_file_) {
        // logfile
        lock_.lock();
        init(file, line);
    }
}

LogMessage::~LogMessage() {
    stream() << std::endl;

    if (out_to_file_) {
        lock_.unlock();
    }

    if (verbose_ && level_ == FATAL) {
        std::abort();
    }
}

std::ostream* LogMessage::nullstream() {
    thread_local static std::ofstream os;
    thread_local static bool flag_set = false;
    if (!flag_set) {
        os.setstate(std::ios_base::badbit);
        flag_set = true;
    }
    return &os;
}

void LogMessage::init(const char* file, int line) {
    time_t t = time(0);
    char tmp[100];
    strftime(tmp, sizeof(tmp), "%Y%m%d-%H%M%S", localtime(&t));

    if (s_info_logfile_.empty()) {
        std::string pid;
        std::string proc_name;
        get_curr_proc_info(&pid, &proc_name);

        s_debug_logfile_ =
            std::string("log." + proc_name + ".log.DEBUG." + tmp + "." + pid);
        s_info_logfile_ =
            std::string("log." + proc_name + ".log.INFO." + tmp + "." + pid);
        s_warning_logfile_ =
            std::string("log." + proc_name + ".log.WARNING." + tmp + "." + pid);
        s_error_logfile_ =
            std::string("log." + proc_name + ".log.ERROR." + tmp + "." + pid);
        s_fatal_logfile_ =
            std::string("log." + proc_name + ".log.FATAL." + tmp + "." + pid);
    }

    thread_local static std::ofstream ofs;
    if (level_ == DEBUG) {
        ofs.open(s_debug_logfile_.c_str(), std::ios::out | std::ios::app);
    } else if (level_ == INFO) {
        ofs.open(s_info_logfile_.c_str(), std::ios::out | std::ios::app);
    } else if (level_ == WARNING) {
        ofs.open(s_warning_logfile_.c_str(), std::ios::out | std::ios::app);
    } else if (level_ == ERROR) {
        ofs.open(s_error_logfile_.c_str(), std::ios::out | std::ios::app);
    } else {
        ofs.open(s_fatal_logfile_.c_str(), std::ios::out | std::ios::app);
    }

    stream_ = &ofs;

    stream() << tmp << " " << file << " line " << line << "; ";
    stream() << std::flush;
}
}  // namespace log
}  // namespace ppspeech

================================================
FILE: runtime/engine/common/base/log_impl.h
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// modified from https://github.com/Dounm/dlog
// modified form
// https://android.googlesource.com/platform/art/+/806defa/src/logging.h

#pragma once

#include <stdlib.h>
#include <unistd.h>

#include <fstream>
#include <iostream>
#include <mutex>
#include <sstream>
#include <string>
#include <thread>

#include "base/common.h"
#include "base/macros.h"
#ifndef WITH_GLOG
#include "base/glog_utils.h"
#endif

DECLARE_int32(logtostderr);

namespace ppspeech {

namespace log {

enum Severity {
    DEBUG,
    INFO,
    WARNING,
    ERROR,
    FATAL,
    NUM_SEVERITIES,
};

class LogMessage {
  public:
    static void get_curr_proc_info(std::string* pid, std::string* proc_name);

    LogMessage(const char* file,
               int line,
               Severity level,
               bool verbose,
               bool out_to_file = false);

    ~LogMessage();

    std::ostream& stream() { return verbose_ ? *stream_ : *nullstream(); }

  private:
    void init(const char* file, int line);
    std::ostream* nullstream();

  private:
    std::ostream* stream_;
    std::ostream* null_stream_;
    Severity level_;
    bool verbose_;
    bool out_to_file_;

    static std::mutex lock_;  // stream write lock
    static std::string s_debug_logfile_;
    static std::string s_info_logfile_;
    static std::string s_warning_logfile_;
    static std::string s_error_logfile_;
    static std::string s_fatal_logfile_;

    DISALLOW_COPY_AND_ASSIGN(LogMessage);
};


}  // namespace log

}  // namespace ppspeech


#ifndef PPS_DEBUG
#define DLOG_INFO \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::INFO, false)
#define DLOG_WARNING \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::WARNING, false)
#define DLOG_ERROR \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::ERROR, false)
#define DLOG_FATAL \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::FATAL, false)
#else
#define DLOG_INFO \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::INFO, true)
#define DLOG_WARNING \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::WARNING, true)
#define DLOG_ERROR \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::ERROR, true)
#define DLOG_FATAL \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::FATAL, true)
#endif


#define LOG_INFO \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::INFO, true)
#define LOG_WARNING \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::WARNING, true)
#define LOG_ERROR \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::ERROR, true)
#define LOG_FATAL \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::FATAL, true)


#define LOG_0 LOG_DEBUG
#define LOG_1 LOG_INFO
#define LOG_2 LOG_WARNING
#define LOG_3 LOG_ERROR
#define LOG_4 LOG_FATAL

#define LOG(level) LOG_##level.stream()

#define DLOG(level) DLOG_##level.stream()

#define VLOG(verboselevel) LOG(verboselevel)

#define CHECK(exp)                                        \
    ppspeech::log::LogMessage(                            \
        __FILE__, __LINE__, ppspeech::log::FATAL, !(exp)) \
            .stream()                                     \
        << "Check Failed: " #exp

#define CHECK_EQ(x, y) CHECK((x) == (y))
#define CHECK_NE(x, y) CHECK((x) != (y))
#define CHECK_LE(x, y) CHECK((x) <= (y))
#define CHECK_LT(x, y) CHECK((x) < (y))
#define CHECK_GE(x, y) CHECK((x) >= (y))
#define CHECK_GT(x, y) CHECK((x) > (y))
#ifdef PPS_DEBUG
#define DCHECK(x) CHECK(x)
#define DCHECK_EQ(x, y) CHECK_EQ(x, y)
#define DCHECK_NE(x, y) CHECK_NE(x, y)
#define DCHECK_LE(x, y) CHECK_LE(x, y)
#define DCHECK_LT(x, y) CHECK_LT(x, y)
#define DCHECK_GE(x, y) CHECK_GE(x, y)
#define DCHECK_GT(x, y) CHECK_GT(x, y)
#else
#define DCHECK(condition) \
    while (false) CHECK(condition)
#define DCHECK_EQ(val1, val2) \
    while (false) CHECK_EQ(val1, val2)
#define DCHECK_NE(val1, val2) \
    while (false) CHECK_NE(val1, val2)
#define DCHECK_LE(val1, val2) \
    while (false) CHECK_LE(val1, val2)
#define DCHECK_LT(val1, val2) \
    while (false) CHECK_LT(val1, val2)
#define DCHECK_GE(val1, val2) \
    while (false) CHECK_GE(val1, val2)
#define DCHECK_GT(val1, val2) \
    while (false) CHECK_GT(val1, val2)
#define DCHECK_STREQ(str1, str2) \
    while (false) CHECK_STREQ(str1, str2)
#endif

================================================
FILE: runtime/engine/common/base/macros.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <limits>
#include <string>

#ifndef DISALLOW_COPY_AND_ASSIGN
#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
    TypeName(const TypeName&) = delete;    \
    void operator=(const TypeName&) = delete
#endif

namespace ppspeech {

// kSpaceSymbol in UTF-8 is: ▁
const char kSpaceSymbo[] = "\xe2\x96\x81";

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/base/safe_queue.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "base/common.h"

namespace ppspeech {

template <typename T>
class SafeQueue {
  public:
    explicit SafeQueue(size_t capacity = 0);
    void push_back(const T& in);
    bool pop(T* out);
    bool empty() const { return buffer_.empty(); }
    size_t size() const { return buffer_.size(); }
    void clear();


  private:
    std::mutex mutex_;
    std::condition_variable condition_;
    std::deque<T> buffer_;
    size_t capacity_;
};

template <typename T>
SafeQueue<T>::SafeQueue(size_t capacity) : capacity_(capacity) {}

template <typename T>
void SafeQueue<T>::push_back(const T& in) {
    std::unique_lock<std::mutex> lock(mutex_);
    if (capacity_ > 0 && buffer_.size() == capacity_) {
        condition_.wait(lock, [this] { return capacity_ >= buffer_.size(); });
    }

    buffer_.push_back(in);
    condition_.notify_one();
}

template <typename T>
bool SafeQueue<T>::pop(T* out) {
    if (buffer_.empty()) {
        return false;
    }

    std::unique_lock<std::mutex> lock(mutex_);
    condition_.wait(lock, [this] { return buffer_.size() > 0; });
    *out = std::move(buffer_.front());
    buffer_.pop_front();
    condition_.notify_one();
    return true;
}

template <typename T>
void SafeQueue<T>::clear() {
    std::unique_lock<std::mutex> lock(mutex_);
    buffer_.clear();
    condition_.notify_one();
}
}  // namespace ppspeech


================================================
FILE: runtime/engine/common/base/safe_queue_inl.h
================================================


================================================
FILE: runtime/engine/common/base/thread_pool.h
================================================
// Copyright (c) 2012 Jakob Progsch, Václav Zeman

// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.

// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:

//   1. The origin of this software must not be misrepresented; you must not
//   claim that you wrote the original software. If you use this software
//   in a product, an acknowledgment in the product documentation would be
//   appreciated but is not required.

//   2. Altered source versions must be plainly marked as such, and must not be
//   misrepresented as being the original software.

//   3. This notice may not be removed or altered from any source
//   distribution.
// this code is from https://github.com/progschj/ThreadPool

#ifndef BASE_THREAD_POOL_H
#define BASE_THREAD_POOL_H

#include <condition_variable>
#include <functional>
#include <future>
#include <memory>
#include <mutex>
#include <queue>
#include <stdexcept>
#include <thread>
#include <vector>

class ThreadPool {
  public:
    explicit ThreadPool(size_t);
    template <class F, class... Args>
    auto enqueue(F&& f, Args&&... args)
        -> std::future<typename std::result_of<F(Args...)>::type>;
    ~ThreadPool();

  private:
    // need to keep track of threads so we can join them
    std::vector<std::thread> workers;
    // the task queue
    std::queue<std::function<void()>> tasks;

    // synchronization
    std::mutex queue_mutex;
    std::condition_variable condition;
    bool stop;
};

// the constructor just launches some amount of workers
inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
    for (size_t i = 0; i < threads; ++i)
        workers.emplace_back([this] {
            for (;;) {
                std::function<void()> task;

                {
                    std::unique_lock<std::mutex> lock(this->queue_mutex);
                    this->condition.wait(lock, [this] {
                        return this->stop || !this->tasks.empty();
                    });
                    if (this->stop && this->tasks.empty()) return;
                    task = std::move(this->tasks.front());
                    this->tasks.pop();
                }

                task();
            }
        });
}

// add new work item to the pool
template <class F, class... Args>
auto ThreadPool::enqueue(F&& f, Args&&... args)
    -> std::future<typename std::result_of<F(Args...)>::type> {
    using return_type = typename std::result_of<F(Args...)>::type;

    auto task = std::make_shared<std::packaged_task<return_type()>>(
        std::bind(std::forward<F>(f), std::forward<Args>(args)...));

    std::future<return_type> res = task->get_future();
    {
        std::unique_lock<std::mutex> lock(queue_mutex);

        // don't allow enqueueing after stopping the pool
        if (stop) throw std::runtime_error("enqueue on stopped ThreadPool");

        tasks.emplace([task]() { (*task)(); });
    }
    condition.notify_one();
    return res;
}

// the destructor joins all threads
inline ThreadPool::~ThreadPool() {
    {
        std::unique_lock<std::mutex> lock(queue_mutex);
        stop = true;
    }
    condition.notify_all();
    for (std::thread& worker : workers) worker.join();
}

#endif


================================================
FILE: runtime/engine/common/frontend/CMakeLists.txt
================================================
add_library(kaldi-native-fbank-core 
  feature-fbank.cc
  feature-functions.cc
  feature-window.cc
  fftsg.c
  mel-computations.cc
  rfft.cc
)
target_link_libraries(kaldi-native-fbank-core PUBLIC utils base)
target_compile_options(kaldi-native-fbank-core PUBLIC "-fPIC")

add_library(frontend STATIC
  cmvn.cc
  audio_cache.cc
  feature_cache.cc
  feature_pipeline.cc
  assembler.cc
  wave-reader.cc
)
target_link_libraries(frontend PUBLIC kaldi-native-fbank-core utils base)

set(BINS 
  compute_fbank_main
)

foreach(bin_name IN LISTS BINS)
  add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
  target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
  # https://github.com/Kitware/CMake/blob/v3.1.0/Modules/FindThreads.cmake#L207
  target_link_libraries(${bin_name} PUBLIC frontend base utils kaldi-util libgflags_nothreads.so Threads::Threads extern_glog)
endforeach()


================================================
FILE: runtime/engine/common/frontend/assembler.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "frontend/assembler.h"

namespace ppspeech {

using kaldi::BaseFloat;
using std::unique_ptr;
using std::vector;

Assembler::Assembler(AssemblerOptions opts,
                     unique_ptr<FrontendInterface> base_extractor) {
    fill_zero_ = opts.fill_zero;
    frame_chunk_stride_ = opts.subsampling_rate * opts.nnet_decoder_chunk;
    frame_chunk_size_ = (opts.nnet_decoder_chunk - 1) * opts.subsampling_rate +
                        opts.receptive_filed_length;
    cache_size_ = frame_chunk_size_ - frame_chunk_stride_;
    receptive_filed_length_ = opts.receptive_filed_length;
    base_extractor_ = std::move(base_extractor);
    dim_ = base_extractor_->Dim();
}

void Assembler::Accept(const std::vector<BaseFloat>& inputs) {
    // read inputs
    base_extractor_->Accept(inputs);
}

// pop feature chunk
bool Assembler::Read(std::vector<BaseFloat>* feats) {
    kaldi::Timer timer;
    bool result = Compute(feats);
    VLOG(1) << "Assembler::Read cost: " << timer.Elapsed() << " sec.";
    return result;
}

// read frame by frame from base_feature_extractor_ into cache_
bool Assembler::Compute(vector<BaseFloat>* feats) {
    // compute and feed frame by frame
    while (feature_cache_.size() < frame_chunk_size_) {
        vector<BaseFloat> feature;
        bool result = base_extractor_->Read(&feature);
        if (result == false || feature.size() == 0) {
            VLOG(1) << "result: " << result
                    << " feature dim: " << feature.size();
            if (IsFinished() == false) {
                VLOG(1) << "finished reading feature. cache size: "
                        << feature_cache_.size();
                return false;
            } else {
                VLOG(1) << "break";
                break;
            }
        }
        feature_cache_.push(feature);
        nframes_ += 1;
        VLOG(1) << "nframes: " << nframes_;
    }

    if (feature_cache_.size() < receptive_filed_length_) {
        VLOG(3) << "feature_cache less than receptive_filed_length. "
                << feature_cache_.size() << ": " << receptive_filed_length_;
        return false;
    }

    if (fill_zero_) {
        while (feature_cache_.size() < frame_chunk_size_) {
            vector<BaseFloat> feature(dim_, kaldi::kSetZero);
            nframes_ += 1;
            feature_cache_.push(feature);
        }
    }

    int32 this_chunk_size =
        std::min(static_cast<int32>(feature_cache_.size()), frame_chunk_size_);
    feats->resize(dim_ * this_chunk_size);
    VLOG(3) << "read " << this_chunk_size << " feat.";

    int32 counter = 0;
    while (counter < this_chunk_size) {
        vector<BaseFloat>& val = feature_cache_.front();
        CHECK(val.size() == dim_) << val.size();

        int32 start = counter * dim_;
        std::memcpy(
            feats->data() + start, val.data(), val.size() * sizeof(BaseFloat));

        if (this_chunk_size - counter <= cache_size_) {
            feature_cache_.push(val);
        }

        // val is reference, so we should pop here
        feature_cache_.pop();

        counter++;
    }
    CHECK(feature_cache_.size() == cache_size_);

    return true;
}


void Assembler::Reset() {
    std::queue<std::vector<BaseFloat>> empty;
    std::swap(feature_cache_, empty);
    nframes_ = 0;
    base_extractor_->Reset();
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/assembler.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"
#include "frontend/frontend_itf.h"

namespace ppspeech {

struct AssemblerOptions {
    // refer:https://github.com/PaddlePaddle/PaddleSpeech/blob/develop/paddlespeech/s2t/exps/deepspeech2/model.py
    // the nnet batch forward
    int32 receptive_filed_length{1};
    int32 subsampling_rate{1};
    int32 nnet_decoder_chunk{1};
    bool fill_zero{false};  // whether fill zero when last chunk is not equal to
                            // frame_chunk_size_
};

class Assembler : public FrontendInterface {
  public:
    explicit Assembler(
        AssemblerOptions opts,
        std::unique_ptr<FrontendInterface> base_extractor = NULL);

    // Feed feats or waves
    void Accept(const std::vector<kaldi::BaseFloat>& inputs) override;

    // feats size = num_frames * feat_dim
    bool Read(std::vector<kaldi::BaseFloat>* feats) override;

    // feat dim
    size_t Dim() const override { return dim_; }

    void SetFinished() override { base_extractor_->SetFinished(); }

    bool IsFinished() const override { return base_extractor_->IsFinished(); }

    void Reset() override;

  private:
    bool Compute(std::vector<kaldi::BaseFloat>* feats);

    bool fill_zero_{false};

    int32 dim_;                 // feat dim
    int32 frame_chunk_size_;    // window
    int32 frame_chunk_stride_;  // stride
    int32 cache_size_;          // window - stride
    int32 receptive_filed_length_;
    std::queue<std::vector<kaldi::BaseFloat>> feature_cache_;
    std::unique_ptr<FrontendInterface> base_extractor_;

    int32 nframes_;  // num frame computed
    DISALLOW_COPY_AND_ASSIGN(Assembler);
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/audio_cache.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "frontend/audio_cache.h"

#include "kaldi/base/timer.h"

namespace ppspeech {

using kaldi::BaseFloat;
using std::vector;

AudioCache::AudioCache(int buffer_size, bool to_float32)
    : finished_(false),
      capacity_(buffer_size),  // unit: sample
      size_(0),
      offset_(0),
      timeout_(1),  // ms
      to_float32_(to_float32) {
    ring_buffer_.resize(capacity_);
}

BaseFloat AudioCache::Convert2PCM32(BaseFloat val) {
    // sample type int16, int16->float32
    return val * (1. / std::pow(2.0, 15));
}

void AudioCache::Accept(const vector<BaseFloat>& waves) {
    kaldi::Timer timer;
    std::unique_lock<std::mutex> lock(mutex_);
    while (size_ + waves.size() > ring_buffer_.size()) {
        ready_feed_condition_.wait(lock);
    }
    for (size_t idx = 0; idx < waves.size(); ++idx) {
        int32 buffer_idx = (idx + offset_ + size_) % ring_buffer_.size();
        ring_buffer_[buffer_idx] = waves[idx];
        if (to_float32_) ring_buffer_[buffer_idx] = Convert2PCM32(waves[idx]);
    }
    size_ += waves.size();
    VLOG(1) << "AudioCache::Accept cost: " << timer.Elapsed() << " sec. "
            << waves.size() << " samples.";
}

bool AudioCache::Read(vector<BaseFloat>* waves) {
    kaldi::Timer timer;
    size_t chunk_size = waves->size();
    std::unique_lock<std::mutex> lock(mutex_);
    if (chunk_size > size_) {
        if (finished_ == false) {
            return false;
        } else {
            // read last chunk data
            chunk_size = size_;
            waves->resize(chunk_size);
        }
    }

    for (size_t idx = 0; idx < chunk_size; ++idx) {
        int buff_idx = (offset_ + idx) % ring_buffer_.size();
        waves->at(idx) = ring_buffer_[buff_idx];
    }
    size_ -= chunk_size;
    offset_ = (offset_ + chunk_size) % ring_buffer_.size();

    nsamples_ += chunk_size;
    VLOG(3) << "nsamples readed: " << nsamples_;

    ready_feed_condition_.notify_one();
    VLOG(1) << "AudioCache::Read cost: " << timer.Elapsed() << " sec. "
            << chunk_size << " samples.";
    return true;
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/audio_cache.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once

#include "base/common.h"
#include "frontend/frontend_itf.h"

namespace ppspeech {

// waves cache
class AudioCache : public FrontendInterface {
  public:
    explicit AudioCache(int buffer_size = 1000 * kint16max,
                        bool to_float32 = false);

    virtual void Accept(const std::vector<BaseFloat>& waves);

    virtual bool Read(std::vector<kaldi::BaseFloat>* waves);

    // the audio dim is 1, one sample, which is useless,
    // so we return size_(cache samples) instead.
    virtual size_t Dim() const { return size_; }

    virtual void SetFinished() {
        std::lock_guard<std::mutex> lock(mutex_);
        finished_ = true;
    }

    virtual bool IsFinished() const { return finished_ && (size_ == 0); }

    void Reset() override {
        offset_ = 0;
        size_ = 0;
        finished_ = false;
        nsamples_ = 0;
    }

  private:
    kaldi::BaseFloat Convert2PCM32(kaldi::BaseFloat val);

    std::vector<kaldi::BaseFloat> ring_buffer_;
    size_t offset_;    // offset in ring_buffer_, begin of data
    size_t size_;      // samples in ring_buffer_, size of valid data
    size_t capacity_;  // capacity of ring_buffer_, full size of data buffer,
                       // unit: sample
    bool finished_;    // reach audio end
    std::mutex mutex_;
    std::condition_variable ready_feed_condition_;
    kaldi::int32 timeout_;  // millisecond
    bool to_float32_;       // int16 -> float32. used in linear_spectrogram

    int32 nsamples_;  // number samples readed.
    DISALLOW_COPY_AND_ASSIGN(AudioCache);
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/cmvn.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "frontend/cmvn.h"

#include "utils/file_utils.h"
#include "utils/picojson.h"

namespace ppspeech {

using kaldi::BaseFloat;
using std::unique_ptr;
using std::vector;


CMVN::CMVN(std::string cmvn_file, unique_ptr<FrontendInterface> base_extractor)
    : var_norm_(true) {
    CHECK_NE(cmvn_file, "");
    base_extractor_ = std::move(base_extractor);
    ReadCMVNFromJson(cmvn_file);
    dim_ = mean_stats_.size() - 1;
}

void CMVN::ReadCMVNFromJson(std::string cmvn_file) {
    std::string json_str = ppspeech::ReadFile2String(cmvn_file);
    picojson::value value;
    std::string err;
    const char* json_end = picojson::parse(
        value, json_str.c_str(), json_str.c_str() + json_str.size(), &err);
    if (!value.is<picojson::object>()) {
        LOG(ERROR) << "Input json file format error.";
    }
    const picojson::value::array& mean_stat =
        value.get("mean_stat").get<picojson::array>();
    for (auto it = mean_stat.begin(); it != mean_stat.end(); it++) {
        mean_stats_.push_back((*it).get<double>());
    }

    const picojson::value::array& var_stat =
        value.get("var_stat").get<picojson::array>();
    for (auto it = var_stat.begin(); it != var_stat.end(); it++) {
        var_stats_.push_back((*it).get<double>());
    }

    kaldi::int32 frame_num = value.get("frame_num").get<int64_t>();
    LOG(INFO) << "nframe: " << frame_num;
    mean_stats_.push_back(frame_num);
    var_stats_.push_back(0);
}

void CMVN::Accept(const std::vector<kaldi::BaseFloat>& inputs) {
    // feed waves/feats to compute feature
    base_extractor_->Accept(inputs);
    return;
}

bool CMVN::Read(std::vector<BaseFloat>* feats) {
    // compute feature
    if (base_extractor_->Read(feats) == false || feats->size() == 0) {
        return false;
    }

    // apply cmvn
    kaldi::Timer timer;
    Compute(feats);
    VLOG(1) << "CMVN::Read cost: " << timer.Elapsed() << " sec.";
    return true;
}

// feats contain num_frames feature.
void CMVN::Compute(vector<BaseFloat>* feats) const {
    KALDI_ASSERT(feats != NULL);

    if (feats->size() % dim_ != 0) {
        LOG(ERROR) << "Dim mismatch: cmvn " << mean_stats_.size() << ','
                   << var_stats_.size() - 1 << ", feats " << feats->size()
                   << 'x';
    }
    if (var_stats_.size() == 0 && var_norm_) {
        LOG(ERROR)
            << "You requested variance normalization but no variance stats_ "
            << "are supplied.";
    }

    double count = mean_stats_[dim_];
    // Do not change the threshold of 1.0 here: in the balanced-cmvn code, when
    // computing an offset and representing it as stats_, we use a count of one.
    if (count < 1.0)
        LOG(ERROR) << "Insufficient stats_ for cepstral mean and variance "
                      "normalization: "
                   << "count = " << count;

    if (!var_norm_) {
        vector<BaseFloat> offset(feats->size());
        vector<double> mean_stats(mean_stats_);
        for (size_t i = 0; i < mean_stats.size(); ++i) {
            mean_stats[i] /= count;
        }
        vector<double> mean_stats_apply(feats->size());
        // fill the datat of mean_stats in mean_stats_appy whose dim_ is equal
        // with the dim_ of feature.
        // the dim_ of feats = dim_ * num_frames;
        for (int32 idx = 0; idx < feats->size() / dim_; ++idx) {
            std::memcpy(mean_stats_apply.data() + dim_ * idx,
                        mean_stats.data(),
                        dim_ * sizeof(double));
        }
        for (size_t idx = 0; idx < feats->size(); ++idx) {
            feats->at(idx) += offset[idx];
        }
        return;
    }
    // norm(0, d) = mean offset;
    // norm(1, d) = scale, e.g. x(d) <-- x(d)*norm(1, d) + norm(0, d).
    vector<BaseFloat> norm0(feats->size());
    vector<BaseFloat> norm1(feats->size());
    for (int32 d = 0; d < dim_; d++) {
        double mean, offset, scale;
        mean = mean_stats_[d] / count;
        double var = (var_stats_[d] / count) - mean * mean, floor = 1.0e-20;
        if (var < floor) {
            LOG(WARNING) << "Flooring cepstral variance from " << var << " to "
                         << floor;
            var = floor;
        }
        scale = 1.0 / sqrt(var);
        if (scale != scale || 1 / scale == 0.0)
            LOG(ERROR)
                << "NaN or infinity in cepstral mean/variance computation";
        offset = -(mean * scale);
        for (int32 d_skip = d; d_skip < feats->size();) {
            norm0[d_skip] = offset;
            norm1[d_skip] = scale;
            d_skip = d_skip + dim_;
        }
    }
    // Apply the normalization.
    for (size_t idx = 0; idx < feats->size(); ++idx) {
        feats->at(idx) *= norm1[idx];
    }

    for (size_t idx = 0; idx < feats->size(); ++idx) {
        feats->at(idx) += norm0[idx];
    }
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/cmvn.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"
#include "frontend/frontend_itf.h"
#include "kaldi/util/options-itf.h"

namespace ppspeech {

class CMVN : public FrontendInterface {
  public:
    explicit CMVN(std::string cmvn_file,
                  std::unique_ptr<FrontendInterface> base_extractor);
    virtual void Accept(const std::vector<kaldi::BaseFloat>& inputs);

    // the length of feats = feature_row * feature_dim,
    // the Matrix is squashed into Vector
    virtual bool Read(std::vector<kaldi::BaseFloat>* feats);
    // the dim_ is the feature dim.
    virtual size_t Dim() const { return dim_; }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
    virtual void Reset() { base_extractor_->Reset(); }

  private:
    void ReadCMVNFromJson(std::string cmvn_file);
    void Compute(std::vector<kaldi::BaseFloat>* feats) const;
    std::vector<double> mean_stats_;
    std::vector<double> var_stats_;
    std::unique_ptr<FrontendInterface> base_extractor_;
    size_t dim_;
    bool var_norm_;
};

}  // namespace ppspeech

================================================
FILE: runtime/engine/common/frontend/compute_fbank_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// todo refactor, repalce with gtest

#include "base/flags.h"
#include "base/log.h"
#include "frontend/audio_cache.h"
#include "frontend/data_cache.h"
#include "frontend/fbank.h"
#include "frontend/feature_cache.h"
#include "frontend/frontend_itf.h"
#include "frontend/normalizer.h"
#include "frontend/wave-reader.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"

DEFINE_string(wav_rspecifier, "", "test wav scp path");
DEFINE_string(feature_wspecifier, "", "output feats wspecifier");
DEFINE_string(cmvn_file, "", "read cmvn");
DEFINE_double(streaming_chunk, 0.36, "streaming feature chunk size");
DEFINE_int32(num_bins, 161, "fbank num bins");
DEFINE_int32(sample_rate, 16000, "sampe rate: 16k, 8k.");

int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
    google::InstallFailureSignalHandler();
    FLAGS_logtostderr = 1;

    CHECK_GT(FLAGS_wav_rspecifier.size(), 0);
    CHECK_GT(FLAGS_feature_wspecifier.size(), 0);
    kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
        FLAGS_wav_rspecifier);
    kaldi::SequentialTableReader<kaldi::WaveInfoHolder> wav_info_reader(
        FLAGS_wav_rspecifier);
    kaldi::BaseFloatMatrixWriter feat_writer(FLAGS_feature_wspecifier);

    int32 num_done = 0, num_err = 0;

    // feature pipeline: wave cache --> povey window
    // -->fbank --> global cmvn -> feat cache

    std::unique_ptr<ppspeech::FrontendInterface> data_source(
        new ppspeech::AudioCache(3600 * 1600, false));

    knf::FbankOptions opt;
    opt.frame_opts.frame_length_ms = 25;
    opt.frame_opts.frame_shift_ms = 10;
    opt.mel_opts.num_bins = FLAGS_num_bins;
    opt.frame_opts.dither = 0.0;
    LOG(INFO) << "frame_length_ms: " << opt.frame_opts.frame_length_ms;
    LOG(INFO) << "frame_shift_ms: " << opt.frame_opts.frame_shift_ms;
    LOG(INFO) << "num_bins: " << opt.mel_opts.num_bins;
    LOG(INFO) << "dither: " << opt.frame_opts.dither;

    std::unique_ptr<ppspeech::FrontendInterface> fbank(
        new ppspeech::Fbank(opt, std::move(data_source)));

    std::unique_ptr<ppspeech::FrontendInterface> cmvn(
        new ppspeech::CMVN(FLAGS_cmvn_file, std::move(fbank)));

    // the feature cache output feature chunk by chunk.
    ppspeech::FeatureCache feature_cache(kint16max, std::move(cmvn));
    LOG(INFO) << "fbank: " << true;
    LOG(INFO) << "feat dim: " << feature_cache.Dim();


    float streaming_chunk = FLAGS_streaming_chunk;
    int chunk_sample_size = streaming_chunk * FLAGS_sample_rate;
    LOG(INFO) << "sr: " << FLAGS_sample_rate;
    LOG(INFO) << "chunk size (sec): " << streaming_chunk;
    LOG(INFO) << "chunk size (sample): " << chunk_sample_size;

    for (; !wav_reader.Done() && !wav_info_reader.Done();
         wav_reader.Next(), wav_info_reader.Next()) {
        const std::string& utt = wav_reader.Key();
        const kaldi::WaveData& wave_data = wav_reader.Value();

        const std::string& utt2 = wav_info_reader.Key();
        const kaldi::WaveInfo& wave_info = wav_info_reader.Value();

        CHECK(utt == utt2)
            << "wav reader and wav info reader using diff rspecifier!!!";
        LOG(INFO) << "utt: " << utt;
        LOG(INFO) << "samples: " << wave_info.SampleCount();
        LOG(INFO) << "dur: " << wave_info.Duration() << " sec";
        CHECK(wave_info.SampFreq() == FLAGS_sample_rate)
            << "need " << FLAGS_sample_rate << " get " << wave_info.SampFreq();

        // load first channel wav
        int32 this_channel = 0;
        kaldi::SubVector<kaldi::BaseFloat> waveform(wave_data.Data(),
                                                    this_channel);

        // compute feat chunk by chunk
        int tot_samples = waveform.Dim();
        int sample_offset = 0;
        std::vector<kaldi::Vector<BaseFloat>> feats;
        int feature_rows = 0;
        while (sample_offset < tot_samples) {
            // cur chunk size
            int cur_chunk_size =
                std::min(chunk_sample_size, tot_samples - sample_offset);

            // get chunk wav
            std::vector<kaldi::BaseFloat> wav_chunk(cur_chunk_size);
            for (int i = 0; i < cur_chunk_size; ++i) {
                wav_chunk[i] = waveform(sample_offset + i);
            }

            // compute feat
            feature_cache.Accept(wav_chunk);

            // send finish signal
            if (cur_chunk_size < chunk_sample_size) {
                feature_cache.SetFinished();
            }

            // read feat
            kaldi::Vector<BaseFloat> features(feature_cache.Dim());
            bool flag = true;
            do {
                std::vector<BaseFloat> tmp;
                flag = feature_cache.Read(&tmp);
                std::memcpy(features.Data(),
                            tmp.data(),
                            tmp.size() * sizeof(BaseFloat));
                if (flag && features.Dim() != 0) {
                    feats.push_back(features);
                    feature_rows += features.Dim() / feature_cache.Dim();
                }
            } while (flag == true && features.Dim() != 0);

            // forward offset
            sample_offset += cur_chunk_size;
        }

        int cur_idx = 0;
        kaldi::Matrix<kaldi::BaseFloat> features(feature_rows,
                                                 feature_cache.Dim());
        for (auto feat : feats) {
            int num_rows = feat.Dim() / feature_cache.Dim();
            for (int row_idx = 0; row_idx < num_rows; ++row_idx) {
                for (size_t col_idx = 0; col_idx < feature_cache.Dim();
                     ++col_idx) {
                    features(cur_idx, col_idx) =
                        feat(row_idx * feature_cache.Dim() + col_idx);
                }
                ++cur_idx;
            }
        }
        LOG(INFO) << "feat shape: " << features.NumRows() << " , "
                  << features.NumCols();
        feat_writer.Write(utt, features);

        // reset frontend pipeline state
        feature_cache.Reset();

        if (num_done % 50 == 0 && num_done != 0)
            VLOG(2) << "Processed " << num_done << " utterances";

        num_done++;
    }

    LOG(INFO) << "Done " << num_done << " utterances, " << num_err
              << " with errors.";
    return (num_done != 0 ? 0 : 1);
}


================================================
FILE: runtime/engine/common/frontend/compute_linear_spectrogram_main.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "base/flags.h"
#include "base/log.h"
#include "frontend/audio/audio_cache.h"
#include "frontend/audio/data_cache.h"
#include "frontend/audio/feature_cache.h"
#include "frontend/audio/frontend_itf.h"
#include "frontend/audio/linear_spectrogram.h"
#include "frontend/audio/normalizer.h"
#include "kaldi/feat/wave-reader.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"

DEFINE_string(wav_rspecifier, "", "test wav scp path");
DEFINE_string(feature_wspecifier, "", "output feats wspecifier");
DEFINE_string(cmvn_file, "./cmvn.ark", "read cmvn");
DEFINE_double(streaming_chunk, 0.36, "streaming feature chunk size");

int main(int argc, char* argv[]) {
    gflags::SetUsageMessage("Usage:");
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
    google::InstallFailureSignalHandler();
    FLAGS_logtostderr = 1;

    kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
        FLAGS_wav_rspecifier);
    kaldi::BaseFloatMatrixWriter feat_writer(FLAGS_feature_wspecifier);

    int32 num_done = 0, num_err = 0;

    // feature pipeline: wave cache --> hanning window
    // -->linear_spectrogram --> global cmvn -> feat cache

    std::unique_ptr<ppspeech::FrontendInterface> data_source(
        new ppspeech::AudioCache(3600 * 1600, true));

    ppspeech::LinearSpectrogramOptions opt;
    opt.frame_opts.frame_length_ms = 20;
    opt.frame_opts.frame_shift_ms = 10;
    opt.frame_opts.dither = 0.0;
    opt.frame_opts.remove_dc_offset = false;
    opt.frame_opts.window_type = "hanning";
    opt.frame_opts.preemph_coeff = 0.0;
    LOG(INFO) << "linear feature: " << true;
    LOG(INFO) << "frame length (ms): " << opt.frame_opts.frame_length_ms;
    LOG(INFO) << "frame shift (ms): " << opt.frame_opts.frame_shift_ms;

    std::unique_ptr<ppspeech::FrontendInterface> linear_spectrogram(
        new ppspeech::LinearSpectrogram(opt, std::move(data_source)));

    std::unique_ptr<ppspeech::FrontendInterface> cmvn(
        new ppspeech::CMVN(FLAGS_cmvn_file, std::move(linear_spectrogram)));

    ppspeech::FeatureCacheOptions feat_cache_opts;
    // the feature cache output feature chunk by chunk.
    ppspeech::FeatureCache feature_cache(feat_cache_opts, std::move(cmvn));
    LOG(INFO) << "feat dim: " << feature_cache.Dim();

    int sample_rate = 16000;
    float streaming_chunk = FLAGS_streaming_chunk;
    int chunk_sample_size = streaming_chunk * sample_rate;
    LOG(INFO) << "sample rate: " << sample_rate;
    LOG(INFO) << "chunk size (s): " << streaming_chunk;
    LOG(INFO) << "chunk size (sample): " << chunk_sample_size;


    for (; !wav_reader.Done(); wav_reader.Next()) {
        std::string utt = wav_reader.Key();
        const kaldi::WaveData& wave_data = wav_reader.Value();
        LOG(INFO) << "process utt: " << utt;

        int32 this_channel = 0;
        kaldi::SubVector<kaldi::BaseFloat> waveform(wave_data.Data(),
                                                    this_channel);
        int tot_samples = waveform.Dim();
        LOG(INFO) << "wav len (sample): " << tot_samples;

        int sample_offset = 0;
        std::vector<kaldi::Vector<BaseFloat>> feats;
        int feature_rows = 0;
        while (sample_offset < tot_samples) {
            int cur_chunk_size =
                std::min(chunk_sample_size, tot_samples - sample_offset);

            kaldi::Vector<kaldi::BaseFloat> wav_chunk(cur_chunk_size);
            for (int i = 0; i < cur_chunk_size; ++i) {
                wav_chunk(i) = waveform(sample_offset + i);
            }

            kaldi::Vector<BaseFloat> features;
            feature_cache.Accept(wav_chunk);
            if (cur_chunk_size < chunk_sample_size) {
                feature_cache.SetFinished();
            }
            bool flag = true;
            do {
                flag = feature_cache.Read(&features);
                feats.push_back(features);
                feature_rows += features.Dim() / feature_cache.Dim();
            } while (flag == true && features.Dim() != 0);
            sample_offset += cur_chunk_size;
        }

        int cur_idx = 0;
        kaldi::Matrix<kaldi::BaseFloat> features(feature_rows,
                                                 feature_cache.Dim());
        for (auto feat : feats) {
            int num_rows = feat.Dim() / feature_cache.Dim();
            for (int row_idx = 0; row_idx < num_rows; ++row_idx) {
                for (size_t col_idx = 0; col_idx < feature_cache.Dim();
                     ++col_idx) {
                    features(cur_idx, col_idx) =
                        feat(row_idx * feature_cache.Dim() + col_idx);
                }
                ++cur_idx;
            }
        }
        feat_writer.Write(utt, features);
        feature_cache.Reset();

        if (num_done % 50 == 0 && num_done != 0)
            KALDI_VLOG(2) << "Processed " << num_done << " utterances";
        num_done++;
    }
    KALDI_LOG << "Done " << num_done << " utterances, " << num_err
              << " with errors.";
    return (num_done != 0 ? 0 : 1);
}


================================================
FILE: runtime/engine/common/frontend/data_cache.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once

#include "base/common.h"
#include "frontend/frontend_itf.h"

using std::vector;

namespace ppspeech {

// Simulates audio/feature input, by returning data from a Vector.
// This class is mostly meant to be used for online decoder testing using
// pre-recorded audio/feature
class DataCache : public FrontendInterface {
  public:
    DataCache() : finished_{false}, dim_{0} {}

    // accept waves/feats
    void Accept(const std::vector<kaldi::BaseFloat>& inputs) override {
        data_ = std::move(inputs);
    }

    bool Read(vector<kaldi::BaseFloat>* feats) override {
        if (data_.size() == 0) {
            return false;
        }
        (*feats) = std::move(data_);
        data_.resize(0);
        return true;
    }

    void SetFinished() override { finished_ = true; }
    bool IsFinished() const override { return finished_; }
    size_t Dim() const override { return dim_; }
    void SetDim(int32 dim) { dim_ = dim; }
    void Reset() override {
        finished_ = true;
        dim_ = 0;
    }

  private:
    std::vector<kaldi::BaseFloat> data_;
    bool finished_;
    int32 dim_;

    DISALLOW_COPY_AND_ASSIGN(DataCache);
};
}  // namespace ppspeech

================================================
FILE: runtime/engine/common/frontend/db_norm.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "frontend/audio/db_norm.h"

#include "kaldi/feat/cmvn.h"
#include "kaldi/util/kaldi-io.h"

namespace ppspeech {

using kaldi::BaseFloat;
using kaldi::SubVector;
using kaldi::Vector;
using kaldi::VectorBase;
using std::unique_ptr;
using std::vector;

DecibelNormalizer::DecibelNormalizer(
    const DecibelNormalizerOptions& opts,
    std::unique_ptr<FrontendInterface> base_extractor) {
    base_extractor_ = std::move(base_extractor);
    opts_ = opts;
    dim_ = 1;
}

void DecibelNormalizer::Accept(const kaldi::VectorBase<BaseFloat>& waves) {
    base_extractor_->Accept(waves);
}

bool DecibelNormalizer::Read(kaldi::Vector<BaseFloat>* waves) {
    if (base_extractor_->Read(waves) == false || waves->Dim() == 0) {
        return false;
    }
    Compute(waves);
    return true;
}

bool DecibelNormalizer::Compute(VectorBase<BaseFloat>* waves) const {
    // calculate db rms
    BaseFloat rms_db = 0.0;
    BaseFloat mean_square = 0.0;
    BaseFloat gain = 0.0;
    BaseFloat wave_float_normlization = 1.0f / (std::pow(2, 16 - 1));

    vector<BaseFloat> samples;
    samples.resize(waves->Dim());
    for (size_t i = 0; i < samples.size(); ++i) {
        samples[i] = (*waves)(i);
    }

    // square
    for (auto& d : samples) {
        if (opts_.convert_int_float) {
            d = d * wave_float_normlization;
        }
        mean_square += d * d;
    }

    // mean
    mean_square /= samples.size();
    rms_db = 10 * std::log10(mean_square);
    gain = opts_.target_db - rms_db;

    if (gain > opts_.max_gain_db) {
        LOG(ERROR)
            << "Unable to normalize segment to " << opts_.target_db << "dB,"
            << "because the probable gain has exceeded opts_.max_gain_db"
            << opts_.max_gain_db << "dB.";
        return false;
    }

    // Note that this is an in-place transformation.
    for (auto& item : samples) {
        // python item *= 10.0 ** (gain / 20.0)
        item *= std::pow(10.0, gain / 20.0);
    }

    std::memcpy(
        waves->Data(), samples.data(), sizeof(BaseFloat) * samples.size());
    return true;
}


}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/db_norm.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once

#include "base/common.h"
#include "frontend/audio/frontend_itf.h"
#include "kaldi/matrix/kaldi-matrix.h"
#include "kaldi/util/options-itf.h"

namespace ppspeech {

struct DecibelNormalizerOptions {
    float target_db;
    float max_gain_db;
    bool convert_int_float;
    DecibelNormalizerOptions()
        : target_db(-20), max_gain_db(300.0), convert_int_float(false) {}

    void Register(kaldi::OptionsItf* opts) {
        opts->Register(
            "target-db", &target_db, "target db for db normalization");
        opts->Register(
            "max-gain-db", &max_gain_db, "max gain db for db normalization");
        opts->Register("convert-int-float",
                       &convert_int_float,
                       "if convert int samples to float");
    }
};

class DecibelNormalizer : public FrontendInterface {
  public:
    explicit DecibelNormalizer(
        const DecibelNormalizerOptions& opts,
        std::unique_ptr<FrontendInterface> base_extractor);
    virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
    // normalize audio, the dim is 1.
    virtual size_t Dim() const { return dim_; }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
    virtual void Reset() { base_extractor_->Reset(); }

  private:
    bool Compute(kaldi::VectorBase<kaldi::BaseFloat>* waves) const;
    DecibelNormalizerOptions opts_;
    size_t dim_;
    std::unique_ptr<FrontendInterface> base_extractor_;
    kaldi::Vector<kaldi::BaseFloat> waveform_;
};


}  // namespace ppspeech

================================================
FILE: runtime/engine/common/frontend/fbank.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"
#include "frontend/feature-fbank.h"
#include "frontend/feature_common.h"

namespace ppspeech {

typedef StreamingFeatureTpl<knf::FbankComputer> Fbank;

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/feature-fbank.cc
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file is copied/modified from kaldi/src/feat/feature-fbank.cc
//
#include "frontend/feature-fbank.h"

#include <cmath>

#include "frontend/feature-functions.h"

namespace knf {

static void Sqrt(float *in_out, int32_t n) {
    for (int32_t i = 0; i != n; ++i) {
        in_out[i] = std::sqrt(in_out[i]);
    }
}

std::ostream &operator<<(std::ostream &os, const FbankOptions &opts) {
    os << opts.ToString();
    return os;
}

FbankComputer::FbankComputer(const FbankOptions &opts)
    : opts_(opts), rfft_(opts.frame_opts.PaddedWindowSize()) {
    if (opts.energy_floor > 0.0f) {
        log_energy_floor_ = logf(opts.energy_floor);
    }

    // We'll definitely need the filterbanks info for VTLN warping factor 1.0.
    // [note: this call caches it.]
    GetMelBanks(1.0f);
}

FbankComputer::~FbankComputer() {
    for (auto iter = mel_banks_.begin(); iter != mel_banks_.end(); ++iter)
        delete iter->second;
}

const MelBanks *FbankComputer::GetMelBanks(float vtln_warp) {
    MelBanks *this_mel_banks = nullptr;

    // std::map<float, MelBanks *>::iterator iter = mel_banks_.find(vtln_warp);
    auto iter = mel_banks_.find(vtln_warp);
    if (iter == mel_banks_.end()) {
        this_mel_banks =
            new MelBanks(opts_.mel_opts, opts_.frame_opts, vtln_warp);
        mel_banks_[vtln_warp] = this_mel_banks;
    } else {
        this_mel_banks = iter->second;
    }
    return this_mel_banks;
}

void FbankComputer::Compute(float signal_raw_log_energy,
                            float vtln_warp,
                            std::vector<float> *signal_frame,
                            float *feature) {
    const MelBanks &mel_banks = *(GetMelBanks(vtln_warp));

    CHECK_EQ(signal_frame->size(), opts_.frame_opts.PaddedWindowSize());

    // Compute energy after window function (not the raw one).
    if (opts_.use_energy && !opts_.raw_energy) {
        signal_raw_log_energy =
            std::log(std::max<float>(InnerProduct(signal_frame->data(),
                                                  signal_frame->data(),
                                                  signal_frame->size()),
                                     std::numeric_limits<float>::epsilon()));
    }
    rfft_.Compute(signal_frame->data());  // signal_frame is modified in-place
    ComputePowerSpectrum(signal_frame);

    // Use magnitude instead of power if requested.
    if (!opts_.use_power) {
        Sqrt(signal_frame->data(), signal_frame->size() / 2 + 1);
    }

    int32_t mel_offset = ((opts_.use_energy && !opts_.htk_compat) ? 1 : 0);

    // Its length is opts_.mel_opts.num_bins
    float *mel_energies = feature + mel_offset;

    // Sum with mel filter banks over the power spectrum
    mel_banks.Compute(signal_frame->data(), mel_energies);

    if (opts_.use_log_fbank) {
        // Avoid log of zero (which should be prevented anyway by dithering).
        for (int32_t i = 0; i != opts_.mel_opts.num_bins; ++i) {
            auto t = std::max(mel_energies[i],
                              std::numeric_limits<float>::epsilon());
            mel_energies[i] = std::log(t);
        }
    }

    // Copy energy as first value (or the last, if htk_compat == true).
    if (opts_.use_energy) {
        if (opts_.energy_floor > 0.0 &&
            signal_raw_log_energy < log_energy_floor_) {
            signal_raw_log_energy = log_energy_floor_;
        }
        int32_t energy_index = opts_.htk_compat ? opts_.mel_opts.num_bins : 0;
        feature[energy_index] = signal_raw_log_energy;
    }
}

}  // namespace knf


================================================
FILE: runtime/engine/common/frontend/feature-fbank.h
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file is copied/modified from kaldi/src/feat/feature-fbank.h

#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_

#include <limits>
#include <map>

#include "frontend/feature-window.h"
#include "frontend/mel-computations.h"
#include "frontend/rfft.h"

namespace knf {

struct FbankOptions {
    FrameExtractionOptions frame_opts;
    MelBanksOptions mel_opts;
    // append an extra dimension with energy to the filter banks
    bool use_energy = false;
    float energy_floor = 0.0f;  // active iff use_energy==true

    // If true, compute log_energy before preemphasis and windowing
    // If false, compute log_energy after preemphasis ans windowing
    bool raw_energy = true;  // active iff use_energy==true

    // If true, put energy last (if using energy)
    // If false, put energy first
    bool htk_compat = false;  // active iff use_energy==true

    // if true (default), produce log-filterbank, else linear
    bool use_log_fbank = true;

    // if true (default), use power in filterbank
    // analysis, else magnitude.
    bool use_power = true;

    FbankOptions() { mel_opts.num_bins = 23; }

    std::string ToString() const {
        std::ostringstream os;
        os << "frame_opts: \n";
        os << frame_opts << "\n";
        os << "\n";

        os << "mel_opts: \n";
        os << mel_opts << "\n";

        os << "use_energy: " << use_energy << "\n";
        os << "energy_floor: " << energy_floor << "\n";
        os << "raw_energy: " << raw_energy << "\n";
        os << "htk_compat: " << htk_compat << "\n";
        os << "use_log_fbank: " << use_log_fbank << "\n";
        os << "use_power: " << use_power << "\n";
        return os.str();
    }
};

std::ostream &operator<<(std::ostream &os, const FbankOptions &opts);

class FbankComputer {
  public:
    using Options = FbankOptions;

    explicit FbankComputer(const FbankOptions &opts);
    ~FbankComputer();

    int32_t Dim() const {
        return opts_.mel_opts.num_bins + (opts_.use_energy ? 1 : 0);
    }

    // if true, compute log_energy_pre_window but after dithering and dc removal
    bool NeedRawLogEnergy() const {
        return opts_.use_energy && opts_.raw_energy;
    }

    const FrameExtractionOptions &GetFrameOptions() const {
        return opts_.frame_opts;
    }

    const FbankOptions &GetOptions() const { return opts_; }

    /**
       Function that computes one frame of features from
       one frame of signal.

       @param [in] signal_raw_log_energy The log-energy of the frame of the
       signal
           prior to windowing and pre-emphasis, or
           log(numeric_limits<float>::min()), whichever is greater.  Must be
           ignored by this function if this class returns false from
           this->NeedsRawLogEnergy().
       @param [in] vtln_warp  The VTLN warping factor that the user wants
           to be applied when computing features for this utterance.  Will
           normally be 1.0, meaning no warping is to be done.  The value will
           be ignored for feature types that don't support VLTN, such as
           spectrogram features.
       @param [in] signal_frame  One frame of the signal,
         as extracted using the function ExtractWindow() using the options
         returned by this->GetFrameOptions().  The function will use the
         vector as a workspace, which is why it's a non-const pointer.
       @param [out] feature  Pointer to a vector of size this->Dim(), to which
           the computed feature will be written. It should be pre-allocated.
    */
    void Compute(float signal_raw_log_energy,
                 float vtln_warp,
                 std::vector<float> *signal_frame,
                 float *feature);

  private:
    const MelBanks *GetMelBanks(float vtln_warp);

    FbankOptions opts_;
    float log_energy_floor_;
    std::map<float, MelBanks *> mel_banks_;  // float is VTLN coefficient.
    Rfft rfft_;
};

}  // namespace knf

#endif  // KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_


================================================
FILE: runtime/engine/common/frontend/feature-functions.cc
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file is copied/modified from kaldi/src/feat/feature-functions.cc

#include "frontend/feature-functions.h"

#include <cstdint>
#include <vector>

namespace knf {

void ComputePowerSpectrum(std::vector<float> *complex_fft) {
    int32_t dim = complex_fft->size();

    // now we have in complex_fft, first half of complex spectrum
    // it's stored as [real0, realN/2, real1, im1, real2, im2, ...]

    float *p = complex_fft->data();
    int32_t half_dim = dim / 2;
    float first_energy = p[0] * p[0];
    float last_energy = p[1] * p[1];  // handle this special case

    for (int32_t i = 1; i < half_dim; ++i) {
        float real = p[i * 2];
        float im = p[i * 2 + 1];
        p[i] = real * real + im * im;
    }
    p[0] = first_energy;
    p[half_dim] = last_energy;  // Will actually never be used, and anyway
    // if the signal has been bandlimited sensibly this should be zero.
}

}  // namespace knf


================================================
FILE: runtime/engine/common/frontend/feature-functions.h
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file is copied/modified from kaldi/src/feat/feature-functions.h
#ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H
#define KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H

#include <vector>
namespace knf {

// ComputePowerSpectrum converts a complex FFT (as produced by the FFT
// functions in csrc/rfft.h), and converts it into
// a power spectrum.  If the complex FFT is a vector of size n (representing
// half of the complex FFT of a real signal of size n, as described there),
// this function computes in the first (n/2) + 1 elements of it, the
// energies of the fft bins from zero to the Nyquist frequency.  Contents of the
// remaining (n/2) - 1 elements are undefined at output.

void ComputePowerSpectrum(std::vector<float> *complex_fft);

}  // namespace knf

#endif  // KALDI_NATIVE_FBANK_CSRC_FEATURE_FUNCTIONS_H


================================================
FILE: runtime/engine/common/frontend/feature-window.cc
================================================
// kaldi-native-fbank/csrc/feature-window.cc
//
// Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)

// This file is copied/modified from kaldi/src/feat/feature-window.cc

#include "frontend/feature-window.h"

#include <cmath>
#include <limits>
#include <vector>

#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif

namespace knf {

std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts) {
    os << opts.ToString();
    return os;
}

FeatureWindowFunction::FeatureWindowFunction(const FrameExtractionOptions &opts)
    : window_(opts.WindowSize()) {
    int32_t frame_length = opts.WindowSize();
    CHECK_GT(frame_length, 0);

    float *window_data = window_.data();

    double a = M_2PI / (frame_length - 1);
    for (int32_t i = 0; i < frame_length; i++) {
        double i_fl = static_cast<double>(i);
        if (opts.window_type == "hanning") {
            window_data[i] = 0.5 - 0.5 * cos(a * i_fl);
        } else if (opts.window_type == "sine") {
            // when you are checking ws wikipedia, please
            // note that 0.5 * a = M_PI/(frame_length-1)
            window_data[i] = sin(0.5 * a * i_fl);
        } else if (opts.window_type == "hamming") {
            window_data[i] = 0.54 - 0.46 * cos(a * i_fl);
        } else if (opts.window_type ==
                   "povey") {  // like hamming but goes to zero at edges.
            window_data[i] = pow(0.5 - 0.5 * cos(a * i_fl), 0.85);
        } else if (opts.window_type == "rectangular") {
            window_data[i] = 1.0;
        } else if (opts.window_type == "blackman") {
            window_data[i] = opts.blackman_coeff - 0.5 * cos(a * i_fl) +
                             (0.5 - opts.blackman_coeff) * cos(2 * a * i_fl);
        } else {
            LOG(FATAL) << "Invalid window type " << opts.window_type;
        }
    }
}

void FeatureWindowFunction::Apply(float *wave) const {
    int32_t window_size = window_.size();
    const float *p = window_.data();
    for (int32_t k = 0; k != window_size; ++k) {
        wave[k] *= p[k];
    }
}

int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts) {
    int64_t frame_shift = opts.WindowShift();
    if (opts.snip_edges) {
        return frame * frame_shift;
    } else {
        int64_t midpoint_of_frame = frame_shift * frame + frame_shift / 2,
                beginning_of_frame = midpoint_of_frame - opts.WindowSize() / 2;
        return beginning_of_frame;
    }
}

int32_t NumFrames(int64_t num_samples,
                  const FrameExtractionOptions &opts,
                  bool flush /*= true*/) {
    int64_t frame_shift = opts.WindowShift();
    int64_t frame_length = opts.WindowSize();
    if (opts.snip_edges) {
        // with --snip-edges=true (the default), we use a HTK-like approach to
        // determining the number of frames-- all frames have to fit completely
        // into
        // the waveform, and the first frame begins at sample zero.
        if (num_samples < frame_length)
            return 0;
        else
            return (1 + ((num_samples - frame_length) / frame_shift));
        // You can understand the expression above as follows: 'num_samples -
        // frame_length' is how much room we have to shift the frame within the
        // waveform; 'frame_shift' is how much we shift it each time; and the
        // ratio
        // is how many times we can shift it (integer arithmetic rounds down).
    } else {
        // if --snip-edges=false, the number of frames is determined by rounding
        // the
        // (file-length / frame-shift) to the nearest integer.  The point of
        // this
        // formula is to make the number of frames an obvious and predictable
        // function of the frame shift and signal length, which makes many
        // segmentation-related questions simpler.
        //
        // Because integer division in C++ rounds toward zero, we add (half the
        // frame-shift minus epsilon) before dividing, to have the effect of
        // rounding towards the closest integer.
        int32_t num_frames = (num_samples + (frame_shift / 2)) / frame_shift;

        if (flush) return num_frames;

        // note: 'end' always means the last plus one, i.e. one past the last.
        int64_t end_sample_of_last_frame =
            FirstSampleOfFrame(num_frames - 1, opts) + frame_length;

        // the following code is optimized more for clarity than efficiency.
        // If flush == false, we can't output frames that extend past the end
        // of the signal.
        while (num_frames > 0 && end_sample_of_last_frame > num_samples) {
            num_frames--;
            end_sample_of_last_frame -= frame_shift;
        }
        return num_frames;
    }
}

void ExtractWindow(int64_t sample_offset,
                   const std::vector<float> &wave,
                   int32_t f,
                   const FrameExtractionOptions &opts,
                   const FeatureWindowFunction &window_function,
                   std::vector<float> *window,
                   float *log_energy_pre_window /*= nullptr*/) {
    CHECK(sample_offset >= 0 && wave.size() != 0);

    int32_t frame_length = opts.WindowSize();
    int32_t frame_length_padded = opts.PaddedWindowSize();

    int64_t num_samples = sample_offset + wave.size();
    int64_t start_sample = FirstSampleOfFrame(f, opts);
    int64_t end_sample = start_sample + frame_length;

    if (opts.snip_edges) {
        CHECK(start_sample >= sample_offset && end_sample <= num_samples);
    } else {
        CHECK(sample_offset == 0 || start_sample >= sample_offset);
    }

    if (window->size() != frame_length_padded) {
        window->resize(frame_length_padded);
    }

    // wave_start and wave_end are start and end indexes into 'wave', for the
    // piece of wave that we're trying to extract.
    int32_t wave_start = int32_t(start_sample - sample_offset);
    int32_t wave_end = wave_start + frame_length;

    if (wave_start >= 0 && wave_end <= wave.size()) {
        // the normal case-- no edge effects to consider.
        std::copy(wave.begin() + wave_start,
                  wave.begin() + wave_start + frame_length,
                  window->data());
    } else {
        // Deal with any end effects by reflection, if needed.  This code will
        // only
        // be reached for about two frames per utterance, so we don't concern
        // ourselves excessively with efficiency.
        int32_t wave_dim = wave.size();
        for (int32_t s = 0; s < frame_length; ++s) {
            int32_t s_in_wave = s + wave_start;
            while (s_in_wave < 0 || s_in_wave >= wave_dim) {
                // reflect around the beginning or end of the wave.
                // e.g. -1 -> 0, -2 -> 1.
                // dim -> dim - 1, dim + 1 -> dim - 2.
                // the code supports repeated reflections, although this
                // would only be needed in pathological cases.
                if (s_in_wave < 0)
                    s_in_wave = -s_in_wave - 1;
                else
                    s_in_wave = 2 * wave_dim - 1 - s_in_wave;
            }
            (*window)[s] = wave[s_in_wave];
        }
    }

    ProcessWindow(opts, window_function, window->data(), log_energy_pre_window);
}

static void RemoveDcOffset(float *d, int32_t n) {
    float sum = 0;
    for (int32_t i = 0; i != n; ++i) {
        sum += d[i];
    }

    float mean = sum / n;

    for (int32_t i = 0; i != n; ++i) {
        d[i] -= mean;
    }
}

float InnerProduct(const float *a, const float *b, int32_t n) {
    float sum = 0;
    for (int32_t i = 0; i != n; ++i) {
        sum += a[i] * b[i];
    }
    return sum;
}

static void Preemphasize(float *d, int32_t n, float preemph_coeff) {
    if (preemph_coeff == 0.0) {
        return;
    }

    CHECK(preemph_coeff >= 0.0 && preemph_coeff <= 1.0);

    for (int32_t i = n - 1; i > 0; --i) {
        d[i] -= preemph_coeff * d[i - 1];
    }
    d[0] -= preemph_coeff * d[0];
}

void ProcessWindow(const FrameExtractionOptions &opts,
                   const FeatureWindowFunction &window_function,
                   float *window,
                   float *log_energy_pre_window /*= nullptr*/) {
    int32_t frame_length = opts.WindowSize();

    // TODO(fangjun): Remove dither
    CHECK_EQ(opts.dither, 0);

    if (opts.remove_dc_offset) {
        RemoveDcOffset(window, frame_length);
    }

    if (log_energy_pre_window != NULL) {
        float energy =
            std::max<float>(InnerProduct(window, window, frame_length),
                            std::numeric_limits<float>::epsilon());
        *log_energy_pre_window = std::log(energy);
    }

    if (opts.preemph_coeff != 0.0) {
        Preemphasize(window, frame_length, opts.preemph_coeff);
    }

    window_function.Apply(window);
}

}  // namespace knf


================================================
FILE: runtime/engine/common/frontend/feature-window.h
================================================
// kaldi-native-fbank/csrc/feature-window.h
//
// Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)

// This file is copied/modified from kaldi/src/feat/feature-window.h

#ifndef KALDI_NATIVE_FEAT_CSRC_FEATURE_WINDOW_H_
#define KALDI_NATIVE_FEAT_CSRC_FEATURE_WINDOW_H_

#include <sstream>
#include <string>
#include <vector>

#include "base/log.h"

namespace knf {

inline int32_t RoundUpToNearestPowerOfTwo(int32_t n) {
    // copied from kaldi/src/base/kaldi-math.cc
    CHECK_GT(n, 0);
    n--;
    n |= n >> 1;
    n |= n >> 2;
    n |= n >> 4;
    n |= n >> 8;
    n |= n >> 16;
    return n + 1;
}

struct FrameExtractionOptions {
    float samp_freq = 16000;
    float frame_shift_ms = 10.0f;   // in milliseconds.
    float frame_length_ms = 25.0f;  // in milliseconds.
    float dither = 1.0f;            // Amount of dithering, 0.0 means no dither.
    float preemph_coeff = 0.97f;    // Preemphasis coefficient.
    bool remove_dc_offset = true;   // Subtract mean of wave before FFT.
    std::string window_type = "povey";  // e.g. Hamming window
    // May be "hamming", "rectangular", "povey", "hanning", "sine", "blackman"
    // "povey" is a window I made to be similar to Hamming but to go to zero at
    // the edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85) I just don't think
    // the
    // Hamming window makes sense as a windowing function.
    bool round_to_power_of_two = true;
    float blackman_coeff = 0.42f;
    bool snip_edges = true;
    // bool allow_downsample = false;
    // bool allow_upsample = false;

    // Used for streaming feature extraction. It indicates the number
    // of feature frames to keep in the recycling vector. -1 means to
    // keep all feature frames.
    int32_t max_feature_vectors = -1;

    int32_t WindowShift() const {
        return static_cast<int32_t>(samp_freq * 0.001f * frame_shift_ms);
    }
    int32_t WindowSize() const {
        return static_cast<int32_t>(samp_freq * 0.001f * frame_length_ms);
    }
    int32_t PaddedWindowSize() const {
        return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize())
                                      : WindowSize());
    }
    std::string ToString() const {
        std::ostringstream os;
#define KNF_PRINT(x) os << #x << ": " << x << "\n"
        KNF_PRINT(samp_freq);
        KNF_PRINT(frame_shift_ms);
        KNF_PRINT(frame_length_ms);
        KNF_PRINT(dither);
        KNF_PRINT(preemph_coeff);
        KNF_PRINT(remove_dc_offset);
        KNF_PRINT(window_type);
        KNF_PRINT(round_to_power_of_two);
        KNF_PRINT(blackman_coeff);
        KNF_PRINT(snip_edges);
        // KNF_PRINT(allow_downsample);
        // KNF_PRINT(allow_upsample);
        KNF_PRINT(max_feature_vectors);
#undef KNF_PRINT
        return os.str();
    }
};

std::ostream &operator<<(std::ostream &os, const FrameExtractionOptions &opts);

class FeatureWindowFunction {
  public:
    FeatureWindowFunction() = default;
    explicit FeatureWindowFunction(const FrameExtractionOptions &opts);
    /**
     * @param wave Pointer to a 1-D array of shape [window_size].
     *             It is modified in-place: wave[i] = wave[i] * window_[i].
     * @param
     */
    void Apply(float *wave) const;

  private:
    std::vector<float> window_;  // of size opts.WindowSize()
};

int64_t FirstSampleOfFrame(int32_t frame, const FrameExtractionOptions &opts);

/**
   This function returns the number of frames that we can extract from a wave
   file with the given number of samples in it (assumed to have the same
   sampling rate as specified in 'opts').

      @param [in] num_samples  The number of samples in the wave file.
      @param [in] opts     The frame-extraction options class

      @param [in] flush   True if we are asserting that this number of samples
   is 'all there is', false if we expecting more data to possibly come in.  This
   only makes a difference to the answer
   if opts.snips_edges== false.  For offline feature extraction you always want
   flush == true.  In an online-decoding context, once you know (or decide) that
   no more data is coming in, you'd call it with flush == true at the end to
   flush out any remaining data.
*/
int32_t NumFrames(int64_t num_samples,
                  const FrameExtractionOptions &opts,
                  bool flush = true);

/*
  ExtractWindow() extracts a windowed frame of waveform (possibly with a
  power-of-two, padded size, depending on the config), including all the
  processing done by ProcessWindow().

  @param [in] sample_offset  If 'wave' is not the entire waveform, but
                   part of it to the left has been discarded, then the
                   number of samples prior to 'wave' that we have
                   already discarded.  Set this to zero if you are
                   processing the entire waveform in one piece, or
                   if you get 'no matching function' compilation
                   errors when updating the code.
  @param [in] wave  The waveform
  @param [in] f     The frame index to be extracted, with
                    0 <= f < NumFrames(sample_offset + wave.Dim(), opts, true)
  @param [in] opts  The options class to be used
  @param [in] window_function  The windowing function, as derived from the
                    options class.
  @param [out] window  The windowed, possibly-padded waveform to be
                     extracted.  Will be resized as needed.
  @param [out] log_energy_pre_window  If non-NULL, the log-energy of
                   the signal prior to pre-emphasis and multiplying by
                   the windowing function will be written to here.
*/
void ExtractWindow(int64_t sample_offset,
                   const std::vector<float> &wave,
                   int32_t f,
                   const FrameExtractionOptions &opts,
                   const FeatureWindowFunction &window_function,
                   std::vector<float> *window,
                   float *log_energy_pre_window = nullptr);

/**
  This function does all the windowing steps after actually
  extracting the windowed signal: depending on the
  configuration, it does dithering, dc offset removal,
  preemphasis, and multiplication by the windowing function.
   @param [in] opts  The options class to be used
   @param [in] window_function  The windowing function-- should have
                    been initialized using 'opts'.
   @param [in,out] window  A vector of size opts.WindowSize().  Note:
      it will typically be a sub-vector of a larger vector of size
      opts.PaddedWindowSize(), with the remaining samples zero,
      as the FFT code is more efficient if it operates on data with
      power-of-two size.
   @param [out]   log_energy_pre_window If non-NULL, then after dithering and
      DC offset removal, this function will write to this pointer the log of
      the total energy (i.e. sum-squared) of the frame.
 */
void ProcessWindow(const FrameExtractionOptions &opts,
                   const FeatureWindowFunction &window_function,
                   float *window,
                   float *log_energy_pre_window = nullptr);

// Compute the inner product of two vectors
float InnerProduct(const float *a, const float *b, int32_t n);

}  // namespace knf

#endif  // KALDI_NATIVE_FEAT_CSRC_FEATURE_WINDOW_H_


================================================
FILE: runtime/engine/common/frontend/feature_cache.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "frontend/feature_cache.h"

namespace ppspeech {

using kaldi::BaseFloat;
using std::unique_ptr;
using std::vector;

FeatureCache::FeatureCache(size_t max_size,
                           unique_ptr<FrontendInterface> base_extractor) {
    max_size_ = max_size;
    base_extractor_ = std::move(base_extractor);
    dim_ = base_extractor_->Dim();
}

void FeatureCache::Accept(const std::vector<kaldi::BaseFloat>& inputs) {
    // read inputs
    base_extractor_->Accept(inputs);
}

// pop feature chunk
bool FeatureCache::Read(std::vector<kaldi::BaseFloat>* feats) {
    kaldi::Timer timer;
    std::unique_lock<std::mutex> lock(mutex_);
    // feed current data
    if (cache_.empty()) {
        bool result = false;
        do {
            result = Compute();
        } while (result);
    }

    if (cache_.empty()) return false;

    // read from cache
    *feats = cache_.front();
    cache_.pop();
    VLOG(2) << "FeatureCache::Read cost: " << timer.Elapsed() << " sec.";
    VLOG(1) << "FeatureCache::size : " <<  cache_.size();
    return true;
}

// read all data from base_feature_extractor_ into cache_
bool FeatureCache::Compute() {
    // compute and feed
    vector<BaseFloat> feature;
    bool result = base_extractor_->Read(&feature);
    if (result == false || feature.size() == 0) return false;

    kaldi::Timer timer;

    int32 num_chunk = feature.size() / dim_;
    VLOG(3) << "nframe computed: " << nframe_;

    for (int chunk_idx = 0; chunk_idx < num_chunk; ++chunk_idx) {
        int32 start = chunk_idx * dim_;
        vector<BaseFloat> feature_chunk(feature.data() + start,
                                        feature.data() + start + dim_);
        // feed cache
        cache_.push(feature_chunk);
        ++nframe_;
    }

    VLOG(2) << "FeatureCache::Compute cost: " << timer.Elapsed() << " sec. "
            << num_chunk << " feats.";
    return true;
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/feature_cache.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/common.h"
#include "frontend/frontend_itf.h"

namespace ppspeech {

class FeatureCache : public FrontendInterface {
  public:
    explicit FeatureCache(
        size_t max_size = kint16max,
        std::unique_ptr<FrontendInterface> base_extractor = NULL);

    // Feed feats or waves
    virtual void Accept(const std::vector<kaldi::BaseFloat>& inputs);

    // feats size = num_frames * feat_dim
    virtual bool Read(std::vector<kaldi::BaseFloat>* feats);

    // feat dim
    virtual size_t Dim() const { return dim_; }

    virtual void SetFinished() {
        std::unique_lock<std::mutex> lock(mutex_);
        base_extractor_->SetFinished();
    }

    virtual bool IsFinished() const {
      return base_extractor_->IsFinished() && cache_.empty(); 
    }

    void Reset() override {
        std::queue<std::vector<BaseFloat>> empty;
        VLOG(1) << "feature cache size: " << cache_.size();
        std::swap(cache_, empty);
        nframe_ = 0;
        base_extractor_->Reset();
    }

  private:
    bool Compute();

    int32 dim_;
    size_t max_size_;  // cache capacity
    std::unique_ptr<FrontendInterface> base_extractor_;

    std::queue<std::vector<BaseFloat>> cache_;  // feature cache
    std::mutex mutex_;

    int32 nframe_;  // num of feature computed
    DISALLOW_COPY_AND_ASSIGN(FeatureCache);
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/feature_common.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "frontend/feature-window.h"
#include "frontend_itf.h"

namespace ppspeech {

template <class F>
class StreamingFeatureTpl : public FrontendInterface {
  public:
    typedef typename F::Options Options;
    StreamingFeatureTpl(const Options& opts,
                        std::unique_ptr<FrontendInterface> base_extractor);
    virtual void Accept(const std::vector<kaldi::BaseFloat>& waves);
    virtual bool Read(std::vector<kaldi::BaseFloat>* feats);

    // the dim_ is the dim of single frame feature
    virtual size_t Dim() const { return computer_.Dim(); }

    virtual void SetFinished() { base_extractor_->SetFinished(); }

    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }

    virtual void Reset() {
        base_extractor_->Reset();
        remained_wav_.resize(0);
    }

  private:
    bool Compute(const std::vector<kaldi::BaseFloat>& waves,
                 std::vector<kaldi::BaseFloat>* feats);
    Options opts_;
    std::unique_ptr<FrontendInterface> base_extractor_;
    knf::FeatureWindowFunction window_function_;
    std::vector<kaldi::BaseFloat> remained_wav_;
    F computer_;
};

}  // namespace ppspeech

#include "frontend/feature_common_inl.h"


================================================
FILE: runtime/engine/common/frontend/feature_common_inl.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


namespace ppspeech {

template <class F>
StreamingFeatureTpl<F>::StreamingFeatureTpl(
    const Options& opts, std::unique_ptr<FrontendInterface> base_extractor)
    : opts_(opts), computer_(opts), window_function_(opts.frame_opts) {
    base_extractor_ = std::move(base_extractor);
}

template <class F>
void StreamingFeatureTpl<F>::Accept(
    const std::vector<kaldi::BaseFloat>& waves) {
    base_extractor_->Accept(waves);
}

template <class F>
bool StreamingFeatureTpl<F>::Read(std::vector<kaldi::BaseFloat>* feats) {
    std::vector<kaldi::BaseFloat> wav(base_extractor_->Dim());
    bool flag = base_extractor_->Read(&wav);
    if (flag == false || wav.size() == 0) return false;

    // append remaned waves
    int32 wav_len = wav.size();
    int32 left_len = remained_wav_.size();
    std::vector<kaldi::BaseFloat> waves(left_len + wav_len);
    std::memcpy(waves.data(),
                remained_wav_.data(),
                left_len * sizeof(kaldi::BaseFloat));
    std::memcpy(waves.data() + left_len,
                wav.data(),
                wav_len * sizeof(kaldi::BaseFloat));

    // compute speech feature
    Compute(waves, feats);

    // cache remaned waves
    knf::FrameExtractionOptions frame_opts = computer_.GetFrameOptions();
    int32 num_frames = knf::NumFrames(waves.size(), frame_opts);
    int32 frame_shift = frame_opts.WindowShift();
    int32 left_samples = waves.size() - frame_shift * num_frames;
    remained_wav_.resize(left_samples);
    std::memcpy(remained_wav_.data(),
                waves.data() + frame_shift * num_frames,
                left_samples * sizeof(BaseFloat));
    return true;
}

// Compute feat
template <class F>
bool StreamingFeatureTpl<F>::Compute(const std::vector<kaldi::BaseFloat>& waves,
                                     std::vector<kaldi::BaseFloat>* feats) {
    const knf::FrameExtractionOptions& frame_opts = computer_.GetFrameOptions();
    int32 num_samples = waves.size();
    int32 frame_length = frame_opts.WindowSize();
    int32 sample_rate = frame_opts.samp_freq;
    if (num_samples < frame_length) {
        return true;
    }

    int32 num_frames = knf::NumFrames(num_samples, frame_opts);
    feats->resize(num_frames * Dim());

    std::vector<kaldi::BaseFloat> window;
    bool need_raw_log_energy = computer_.NeedRawLogEnergy();
    for (int32 frame = 0; frame < num_frames; frame++) {
        std::fill(window.begin(), window.end(), 0);
        kaldi::BaseFloat raw_log_energy = 0.0;
        kaldi::BaseFloat vtln_warp = 1.0;
        knf::ExtractWindow(0,
                           waves,
                           frame,
                           frame_opts,
                           window_function_,
                           &window,
                           need_raw_log_energy ? &raw_log_energy : NULL);

        std::vector<kaldi::BaseFloat> this_feature(computer_.Dim());
        computer_.Compute(
            raw_log_energy, vtln_warp, &window, this_feature.data());
        std::memcpy(feats->data() + frame * Dim(),
                    this_feature.data(),
                    sizeof(BaseFloat) * Dim());
    }
    return true;
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/feature_pipeline.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "frontend/feature_pipeline.h"

namespace ppspeech {

using std::unique_ptr;

FeaturePipeline::FeaturePipeline(const FeaturePipelineOptions& opts)
    : opts_(opts) {
    unique_ptr<FrontendInterface> data_source(
        new ppspeech::AudioCache(1000 * kint16max, false));

    unique_ptr<FrontendInterface> base_feature;

    base_feature.reset(
        new ppspeech::Fbank(opts.fbank_opts, std::move(data_source)));

    // CHECK_NE(opts.cmvn_file, "");
    unique_ptr<FrontendInterface> cache;
    if (opts.cmvn_file != ""){
        unique_ptr<FrontendInterface> cmvn(
            new ppspeech::CMVN(opts.cmvn_file, std::move(base_feature)));

        cache.reset(
            new ppspeech::FeatureCache(kint16max, std::move(cmvn)));
    } else {
        cache.reset(
            new ppspeech::FeatureCache(kint16max, std::move(base_feature)));
    }

    base_extractor_.reset(
        new ppspeech::Assembler(opts.assembler_opts, std::move(cache)));
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/feature_pipeline.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// todo refactor later (SGoat)

#pragma once

#include "frontend/assembler.h"
#include "frontend/audio_cache.h"
#include "frontend/cmvn.h"
#include "frontend/data_cache.h"
#include "frontend/fbank.h"
#include "frontend/feature_cache.h"
#include "frontend/frontend_itf.h"

// feature
DECLARE_bool(fill_zero);
DECLARE_int32(num_bins);
DECLARE_string(cmvn_file);

// feature sliding window
DECLARE_int32(receptive_field_length);
DECLARE_int32(subsampling_rate);
DECLARE_int32(nnet_decoder_chunk);

namespace ppspeech {

struct FeaturePipelineOptions {
    std::string cmvn_file{};
    knf::FbankOptions fbank_opts{};
    AssemblerOptions assembler_opts{};

    static FeaturePipelineOptions InitFromFlags() {
        FeaturePipelineOptions opts;
        opts.cmvn_file = FLAGS_cmvn_file;
        LOG(INFO) << "cmvn file: " << opts.cmvn_file;

        // frame options
        knf::FrameExtractionOptions frame_opts;
        frame_opts.dither = 0.0;
        LOG(INFO) << "dither: " << frame_opts.dither;
        frame_opts.frame_shift_ms = 10;
        LOG(INFO) << "frame shift ms: " << frame_opts.frame_shift_ms;
        frame_opts.window_type = "povey";
        frame_opts.frame_length_ms = 25;
        opts.fbank_opts.mel_opts.num_bins = FLAGS_num_bins;
        LOG(INFO) << "num bins: " << opts.fbank_opts.mel_opts.num_bins;

        opts.fbank_opts.frame_opts = frame_opts;
        LOG(INFO) << "frame length ms: " << frame_opts.frame_length_ms;

        // assembler opts
        opts.assembler_opts.subsampling_rate = FLAGS_subsampling_rate;
        opts.assembler_opts.receptive_filed_length =
            FLAGS_receptive_field_length;
        opts.assembler_opts.nnet_decoder_chunk = FLAGS_nnet_decoder_chunk;
        opts.assembler_opts.fill_zero = FLAGS_fill_zero;
        LOG(INFO) << "subsampling rate: "
                  << opts.assembler_opts.subsampling_rate;
        LOG(INFO) << "nnet receptive filed length: "
                  << opts.assembler_opts.receptive_filed_length;
        LOG(INFO) << "nnet chunk size: "
                  << opts.assembler_opts.nnet_decoder_chunk;
        LOG(INFO) << "frontend fill zeros: " << opts.assembler_opts.fill_zero;
        return opts;
    }
};


class FeaturePipeline : public FrontendInterface {
  public:
    explicit FeaturePipeline(const FeaturePipelineOptions& opts);
    virtual void Accept(const std::vector<kaldi::BaseFloat>& waves) {
        base_extractor_->Accept(waves);
    }
    virtual bool Read(std::vector<kaldi::BaseFloat>* feats) {
        return base_extractor_->Read(feats);
    }
    virtual size_t Dim() const { return base_extractor_->Dim(); }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
    virtual void Reset() { base_extractor_->Reset(); }

    const FeaturePipelineOptions& Config() { return opts_; }

    const BaseFloat FrameShift() const {
        return opts_.fbank_opts.frame_opts.frame_shift_ms;
    }
    const BaseFloat FrameLength() const {
        return opts_.fbank_opts.frame_opts.frame_length_ms;
    }
    const BaseFloat SampleRate() const {
        return opts_.fbank_opts.frame_opts.samp_freq;
    }

  private:
    FeaturePipelineOptions opts_;
    std::unique_ptr<FrontendInterface> base_extractor_;
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/fftsg.c
================================================
/* This file is copied from
 * https://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
 */
/*
Fast Fourier/Cosine/Sine Transform
    dimension   :one
    data length :power of 2
    decimation  :frequency
    radix       :split-radix
    data        :inplace
    table       :use
functions
    cdft: Complex Discrete Fourier Transform
    rdft: Real Discrete Fourier Transform
    ddct: Discrete Cosine Transform
    ddst: Discrete Sine Transform
    dfct: Cosine Transform of RDFT (Real Symmetric DFT)
    dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
function prototypes
    void cdft(int, int, double *, int *, double *);
    void rdft(int, int, double *, int *, double *);
    void ddct(int, int, double *, int *, double *);
    void ddst(int, int, double *, int *, double *);
    void dfct(int, double *, double *, int *, double *);
    void dfst(int, double *, double *, int *, double *);
macro definitions
    USE_CDFT_PTHREADS : default=not defined
        CDFT_THREADS_BEGIN_N  : must be >= 512, default=8192
        CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
    USE_CDFT_WINTHREADS : default=not defined
        CDFT_THREADS_BEGIN_N  : must be >= 512, default=32768
        CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288


-------- Complex DFT (Discrete Fourier Transform) --------
    [definition]
        <case1>
            X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
        <case2>
            X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
        (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
    [usage]
        <case1>
            ip[0] = 0; // first time only
            cdft(2*n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            cdft(2*n, -1, a, ip, w);
    [parameters]
        2*n            :data length (int)
                        n >= 1, n = power of 2
        a[0...2*n-1]   :input/output data (double *)
                        input data
                            a[2*j] = Re(x[j]),
                            a[2*j+1] = Im(x[j]), 0<=j<n
                        output data
                            a[2*k] = Re(X[k]),
                            a[2*k+1] = Im(X[k]), 0<=k<n
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n/2-1]   :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            cdft(2*n, -1, a, ip, w);
        is
            cdft(2*n, 1, a, ip, w);
            for (j = 0; j <= 2 * n - 1; j++) {
                a[j] *= 1.0 / n;
            }
        .


-------- Real DFT / Inverse of Real DFT --------
    [definition]
        <case1> RDFT
            R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
            I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
        <case2> IRDFT (excluding scale)
            a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
                   sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
                   sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
    [usage]
        <case1>
            ip[0] = 0; // first time only
            rdft(n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            rdft(n, -1, a, ip, w);
    [parameters]
        n              :data length (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (double *)
                        <case1>
                            output data
                                a[2*k] = R[k], 0<=k<n/2
                                a[2*k+1] = I[k], 0<k<n/2
                                a[1] = R[n/2]
                        <case2>
                            input data
                                a[2*j] = R[j], 0<=j<n/2
                                a[2*j+1] = I[j], 0<j<n/2
                                a[1] = R[n/2]
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/2)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n/2-1]   :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            rdft(n, 1, a, ip, w);
        is
            rdft(n, -1, a, ip, w);
            for (j = 0; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
    [definition]
        <case1> IDCT (excluding scale)
            C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
        <case2> DCT
            C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
    [usage]
        <case1>
            ip[0] = 0; // first time only
            ddct(n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            ddct(n, -1, a, ip, w);
    [parameters]
        n              :data length (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (double *)
                        output data
                            a[k] = C[k], 0<=k<n
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/2)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/4-1] :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            ddct(n, -1, a, ip, w);
        is
            a[0] *= 0.5;
            ddct(n, 1, a, ip, w);
            for (j = 0; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- DST (Discrete Sine Transform) / Inverse of DST --------
    [definition]
        <case1> IDST (excluding scale)
            S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
        <case2> DST
            S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
    [usage]
        <case1>
            ip[0] = 0; // first time only
            ddst(n, 1, a, ip, w);
        <case2>
            ip[0] = 0; // first time only
            ddst(n, -1, a, ip, w);
    [parameters]
        n              :data length (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (double *)
                        <case1>
                            input data
                                a[j] = A[j], 0<j<n
                                a[0] = A[n]
                            output data
                                a[k] = S[k], 0<=k<n
                        <case2>
                            output data
                                a[k] = S[k], 0<k<n
                                a[0] = S[n]
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/2)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n/2+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/4-1] :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            ddst(n, -1, a, ip, w);
        is
            a[0] *= 0.5;
            ddst(n, 1, a, ip, w);
            for (j = 0; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- Cosine Transform of RDFT (Real Symmetric DFT) --------
    [definition]
        C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
    [usage]
        ip[0] = 0; // first time only
        dfct(n, a, t, ip, w);
    [parameters]
        n              :data length - 1 (int)
                        n >= 2, n = power of 2
        a[0...n]       :input/output data (double *)
                        output data
                            a[k] = C[k], 0<=k<=n
        t[0...n/2]     :work area (double *)
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/4)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/8-1] :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            a[0] *= 0.5;
            a[n] *= 0.5;
            dfct(n, a, t, ip, w);
        is
            a[0] *= 0.5;
            a[n] *= 0.5;
            dfct(n, a, t, ip, w);
            for (j = 0; j <= n; j++) {
                a[j] *= 2.0 / n;
            }
        .


-------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
    [definition]
        S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
    [usage]
        ip[0] = 0; // first time only
        dfst(n, a, t, ip, w);
    [parameters]
        n              :data length + 1 (int)
                        n >= 2, n = power of 2
        a[0...n-1]     :input/output data (double *)
                        output data
                            a[k] = S[k], 0<k<n
                        (a[0] is used for work area)
        t[0...n/2-1]   :work area (double *)
        ip[0...*]      :work area for bit reversal (int *)
                        length of ip >= 2+sqrt(n/4)
                        strictly,
                        length of ip >=
                            2+(1<<(int)(log(n/4+0.5)/log(2))/2).
                        ip[0],ip[1] are pointers of the cos/sin table.
        w[0...n*5/8-1] :cos/sin table (double *)
                        w[],ip[] are initialized if ip[0] == 0.
    [remark]
        Inverse of
            dfst(n, a, t, ip, w);
        is
            dfst(n, a, t, ip, w);
            for (j = 1; j <= n - 1; j++) {
                a[j] *= 2.0 / n;
            }
        .


Appendix :
    The cos/sin table is recalculated when the larger table required.
    w[] and ip[] are compatible with all routines.
*/


void cdft(int n, int isgn, double *a, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void cftbsub(int n, double *a, int *ip, int nw, double *w);
    int nw;

    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        makewt(nw, ip, w);
    }
    if (isgn >= 0) {
        cftfsub(n, a, ip, nw, w);
    } else {
        cftbsub(n, a, ip, nw, w);
    }
}


void rdft(int n, int isgn, double *a, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void makect(int nc, int *ip, double *c);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void cftbsub(int n, double *a, int *ip, int nw, double *w);
    void rftfsub(int n, double *a, int nc, double *c);
    void rftbsub(int n, double *a, int nc, double *c);
    int nw, nc;
    double xi;

    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > (nc << 2)) {
        nc = n >> 2;
        makect(nc, ip, w + nw);
    }
    if (isgn >= 0) {
        if (n > 4) {
            cftfsub(n, a, ip, nw, w);
            rftfsub(n, a, nc, w + nw);
        } else if (n == 4) {
            cftfsub(n, a, ip, nw, w);
        }
        xi = a[0] - a[1];
        a[0] += a[1];
        a[1] = xi;
    } else {
        a[1] = 0.5 * (a[0] - a[1]);
        a[0] -= a[1];
        if (n > 4) {
            rftbsub(n, a, nc, w + nw);
            cftbsub(n, a, ip, nw, w);
        } else if (n == 4) {
            cftbsub(n, a, ip, nw, w);
        }
    }
}


void ddct(int n, int isgn, double *a, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void makect(int nc, int *ip, double *c);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void cftbsub(int n, double *a, int *ip, int nw, double *w);
    void rftfsub(int n, double *a, int nc, double *c);
    void rftbsub(int n, double *a, int nc, double *c);
    void dctsub(int n, double *a, int nc, double *c);
    int j, nw, nc;
    double xr;

    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > nc) {
        nc = n;
        makect(nc, ip, w + nw);
    }
    if (isgn < 0) {
        xr = a[n - 1];
        for (j = n - 2; j >= 2; j -= 2) {
            a[j + 1] = a[j] - a[j - 1];
            a[j] += a[j - 1];
        }
        a[1] = a[0] - xr;
        a[0] += xr;
        if (n > 4) {
            rftbsub(n, a, nc, w + nw);
            cftbsub(n, a, ip, nw, w);
        } else if (n == 4) {
            cftbsub(n, a, ip, nw, w);
        }
    }
    dctsub(n, a, nc, w + nw);
    if (isgn >= 0) {
        if (n > 4) {
            cftfsub(n, a, ip, nw, w);
            rftfsub(n, a, nc, w + nw);
        } else if (n == 4) {
            cftfsub(n, a, ip, nw, w);
        }
        xr = a[0] - a[1];
        a[0] += a[1];
        for (j = 2; j < n; j += 2) {
            a[j - 1] = a[j] - a[j + 1];
            a[j] += a[j + 1];
        }
        a[n - 1] = xr;
    }
}


void ddst(int n, int isgn, double *a, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void makect(int nc, int *ip, double *c);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void cftbsub(int n, double *a, int *ip, int nw, double *w);
    void rftfsub(int n, double *a, int nc, double *c);
    void rftbsub(int n, double *a, int nc, double *c);
    void dstsub(int n, double *a, int nc, double *c);
    int j, nw, nc;
    double xr;

    nw = ip[0];
    if (n > (nw << 2)) {
        nw = n >> 2;
        makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > nc) {
        nc = n;
        makect(nc, ip, w + nw);
    }
    if (isgn < 0) {
        xr = a[n - 1];
        for (j = n - 2; j >= 2; j -= 2) {
            a[j + 1] = -a[j] - a[j - 1];
            a[j] -= a[j - 1];
        }
        a[1] = a[0] + xr;
        a[0] -= xr;
        if (n > 4) {
            rftbsub(n, a, nc, w + nw);
            cftbsub(n, a, ip, nw, w);
        } else if (n == 4) {
            cftbsub(n, a, ip, nw, w);
        }
    }
    dstsub(n, a, nc, w + nw);
    if (isgn >= 0) {
        if (n > 4) {
            cftfsub(n, a, ip, nw, w);
            rftfsub(n, a, nc, w + nw);
        } else if (n == 4) {
            cftfsub(n, a, ip, nw, w);
        }
        xr = a[0] - a[1];
        a[0] += a[1];
        for (j = 2; j < n; j += 2) {
            a[j - 1] = -a[j] - a[j + 1];
            a[j] -= a[j + 1];
        }
        a[n - 1] = -xr;
    }
}


void dfct(int n, double *a, double *t, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void makect(int nc, int *ip, double *c);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void rftfsub(int n, double *a, int nc, double *c);
    void dctsub(int n, double *a, int nc, double *c);
    int j, k, l, m, mh, nw, nc;
    double xr, xi, yr, yi;

    nw = ip[0];
    if (n > (nw << 3)) {
        nw = n >> 3;
        makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > (nc << 1)) {
        nc = n >> 1;
        makect(nc, ip, w + nw);
    }
    m = n >> 1;
    yi = a[m];
    xi = a[0] + a[n];
    a[0] -= a[n];
    t[0] = xi - yi;
    t[m] = xi + yi;
    if (n > 2) {
        mh = m >> 1;
        for (j = 1; j < mh; j++) {
            k = m - j;
            xr = a[j] - a[n - j];
            xi = a[j] + a[n - j];
            yr = a[k] - a[n - k];
            yi = a[k] + a[n - k];
            a[j] = xr;
            a[k] = yr;
            t[j] = xi - yi;
            t[k] = xi + yi;
        }
        t[mh] = a[mh] + a[n - mh];
        a[mh] -= a[n - mh];
        dctsub(m, a, nc, w + nw);
        if (m > 4) {
            cftfsub(m, a, ip, nw, w);
            rftfsub(m, a, nc, w + nw);
        } else if (m == 4) {
            cftfsub(m, a, ip, nw, w);
        }
        a[n - 1] = a[0] - a[1];
        a[1] = a[0] + a[1];
        for (j = m - 2; j >= 2; j -= 2) {
            a[2 * j + 1] = a[j] + a[j + 1];
            a[2 * j - 1] = a[j] - a[j + 1];
        }
        l = 2;
        m = mh;
        while (m >= 2) {
            dctsub(m, t, nc, w + nw);
            if (m > 4) {
                cftfsub(m, t, ip, nw, w);
                rftfsub(m, t, nc, w + nw);
            } else if (m == 4) {
                cftfsub(m, t, ip, nw, w);
            }
            a[n - l] = t[0] - t[1];
            a[l] = t[0] + t[1];
            k = 0;
            for (j = 2; j < m; j += 2) {
                k += l << 2;
                a[k - l] = t[j] - t[j + 1];
                a[k + l] = t[j] + t[j + 1];
            }
            l <<= 1;
            mh = m >> 1;
            for (j = 0; j < mh; j++) {
                k = m - j;
                t[j] = t[m + k] - t[m + j];
                t[k] = t[m + k] + t[m + j];
            }
            t[mh] = t[m + mh];
            m = mh;
        }
        a[l] = t[0];
        a[n] = t[2] - t[1];
        a[0] = t[2] + t[1];
    } else {
        a[1] = a[0];
        a[2] = t[0];
        a[0] = t[1];
    }
}


void dfst(int n, double *a, double *t, int *ip, double *w) {
    void makewt(int nw, int *ip, double *w);
    void makect(int nc, int *ip, double *c);
    void cftfsub(int n, double *a, int *ip, int nw, double *w);
    void rftfsub(int n, double *a, int nc, double *c);
    void dstsub(int n, double *a, int nc, double *c);
    int j, k, l, m, mh, nw, nc;
    double xr, xi, yr, yi;

    nw = ip[0];
    if (n > (nw << 3)) {
        nw = n >> 3;
        makewt(nw, ip, w);
    }
    nc = ip[1];
    if (n > (nc << 1)) {
        nc = n >> 1;
        makect(nc, ip, w + nw);
    }
    if (n > 2) {
        m = n >> 1;
        mh = m >> 1;
        for (j = 1; j < mh; j++) {
            k = m - j;
            xr = a[j] + a[n - j];
            xi = a[j] - a[n - j];
            yr = a[k] + a[n - k];
            yi = a[k] - a[n - k];
            a[j] = xr;
            a[k] = yr;
            t[j] = xi + yi;
            t[k] = xi - yi;
        }
        t[0] = a[mh] - a[n - mh];
        a[mh] += a[n - mh];
        a[0] = a[m];
        dstsub(m, a, nc, w + nw);
        if (m > 4) {
            cftfsub(m, a, ip, nw, w);
            rftfsub(m, a, nc, w + nw);
        } else if (m == 4) {
            cftfsub(m, a, ip, nw, w);
        }
        a[n - 1] = a[1] - a[0];
        a[1] = a[0] + a[1];
        for (j = m - 2; j >= 2; j -= 2) {
            a[2 * j + 1] = a[j] - a[j + 1];
            a[2 * j - 1] = -a[j] - a[j + 1];
        }
        l = 2;
        m = mh;
        while (m >= 2) {
            dstsub(m, t, nc, w + nw);
            if (m > 4) {
                cftfsub(m, t, ip, nw, w);
                rftfsub(m, t, nc, w + nw);
            } else if (m == 4) {
                cftfsub(m, t, ip, nw, w);
            }
            a[n - l] = t[1] - t[0];
            a[l] = t[0] + t[1];
            k = 0;
            for (j = 2; j < m; j += 2) {
                k += l << 2;
                a[k - l] = -t[j] - t[j + 1];
                a[k + l] = t[j] - t[j + 1];
            }
            l <<= 1;
            mh = m >> 1;
            for (j = 1; j < mh; j++) {
                k = m - j;
                t[j] = t[m + k] + t[m + j];
                t[k] = t[m + k] - t[m + j];
            }
            t[0] = t[m + mh];
            m = mh;
        }
        a[l] = t[0];
    }
    a[0] = 0;
}


/* -------- initializing routines -------- */


#include <math.h>

void makewt(int nw, int *ip, double *w) {
    void makeipt(int nw, int *ip);
    int j, nwh, nw0, nw1;
    double delta, wn4r, wk1r, wk1i, wk3r, wk3i;

    ip[0] = nw;
    ip[1] = 1;
    if (nw > 2) {
        nwh = nw >> 1;
        delta = atan(1.0) / nwh;
        wn4r = cos(delta * nwh);
        w[0] = 1;
        w[1] = wn4r;
        if (nwh == 4) {
            w[2] = cos(delta * 2);
            w[3] = sin(delta * 2);
        } else if (nwh > 4) {
            makeipt(nw, ip);
            w[2] = 0.5 / cos(delta * 2);
            w[3] = 0.5 / cos(delta * 6);
            for (j = 4; j < nwh; j += 4) {
                w[j] = cos(delta * j);
                w[j + 1] = sin(delta * j);
                w[j + 2] = cos(3 * delta * j);
                w[j + 3] = -sin(3 * delta * j);
            }
        }
        nw0 = 0;
        while (nwh > 2) {
            nw1 = nw0 + nwh;
            nwh >>= 1;
            w[nw1] = 1;
            w[nw1 + 1] = wn4r;
            if (nwh == 4) {
                wk1r = w[nw0 + 4];
                wk1i = w[nw0 + 5];
                w[nw1 + 2] = wk1r;
                w[nw1 + 3] = wk1i;
            } else if (nwh > 4) {
                wk1r = w[nw0 + 4];
                wk3r = w[nw0 + 6];
                w[nw1 + 2] = 0.5 / wk1r;
                w[nw1 + 3] = 0.5 / wk3r;
                for (j = 4; j < nwh; j += 4) {
                    wk1r = w[nw0 + 2 * j];
                    wk1i = w[nw0 + 2 * j + 1];
                    wk3r = w[nw0 + 2 * j + 2];
                    wk3i = w[nw0 + 2 * j + 3];
                    w[nw1 + j] = wk1r;
                    w[nw1 + j + 1] = wk1i;
                    w[nw1 + j + 2] = wk3r;
                    w[nw1 + j + 3] = wk3i;
                }
            }
            nw0 = nw1;
        }
    }
}


void makeipt(int nw, int *ip) {
    int j, l, m, m2, p, q;

    ip[2] = 0;
    ip[3] = 16;
    m = 2;
    for (l = nw; l > 32; l >>= 2) {
        m2 = m << 1;
        q = m2 << 3;
        for (j = m; j < m2; j++) {
            p = ip[j] << 2;
            ip[m + j] = p;
            ip[m2 + j] = p + q;
        }
        m = m2;
    }
}


void makect(int nc, int *ip, double *c) {
    int j, nch;
    double delta;

    ip[1] = nc;
    if (nc > 1) {
        nch = nc >> 1;
        delta = atan(1.0) / nch;
        c[0] = cos(delta * nch);
        c[nch] = 0.5 * c[0];
        for (j = 1; j < nch; j++) {
            c[j] = 0.5 * cos(delta * j);
            c[nc - j] = 0.5 * sin(delta * j);
        }
    }
}


/* -------- child routines -------- */


#ifdef USE_CDFT_PTHREADS
#define USE_CDFT_THREADS
#ifndef CDFT_THREADS_BEGIN_N
#define CDFT_THREADS_BEGIN_N 8192
#endif
#ifndef CDFT_4THREADS_BEGIN_N
#define CDFT_4THREADS_BEGIN_N 65536
#endif
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#define cdft_thread_t pthread_t
#define cdft_thread_create(thp, func, argp)                       \
    {                                                             \
        if (pthread_create(thp, NULL, func, (void *)argp) != 0) { \
            fprintf(stderr, "cdft thread error\n");               \
            exit(1);                                              \
        }                                                         \
    }
#define cdft_thread_wait(th)                        \
    {                                               \
        if (pthread_join(th, NULL) != 0) {          \
            fprintf(stderr, "cdft thread error\n"); \
            exit(1);                                \
        }                                           \
    }
#endif /* USE_CDFT_PTHREADS */


#ifdef USE_CDFT_WINTHREADS
#define USE_CDFT_THREADS
#ifndef CDFT_THREADS_BEGIN_N
#define CDFT_THREADS_BEGIN_N 32768
#endif
#ifndef CDFT_4THREADS_BEGIN_N
#define CDFT_4THREADS_BEGIN_N 524288
#endif
#include <stdio.h>
#include <stdlib.h>
#include <windows.h>
#define cdft_thread_t HANDLE
#define cdft_thread_create(thp, func, argp)                                 \
    {                                                                       \
        DWORD thid;                                                         \
        *(thp) = CreateThread(                                              \
            NULL, 0, (LPTHREAD_START_ROUTINE)func, (LPVOID)argp, 0, &thid); \
        if (*(thp) == 0) {                                                  \
            fprintf(stderr, "cdft thread error\n");                         \
            exit(1);                                                        \
        }                                                                   \
    }
#define cdft_thread_wait(th)               \
    {                                      \
        WaitForSingleObject(th, INFINITE); \
        CloseHandle(th);                   \
    }
#endif /* USE_CDFT_WINTHREADS */


void cftfsub(int n, double *a, int *ip, int nw, double *w) {
    void bitrv2(int n, int *ip, double *a);
    void bitrv216(double *a);
    void bitrv208(double *a);
    void cftf1st(int n, double *a, double *w);
    void cftrec4(int n, double *a, int nw, double *w);
    void cftleaf(int n, int isplt, double *a, int nw, double *w);
    void cftfx41(int n, double *a, int nw, double *w);
    void cftf161(double *a, double *w);
    void cftf081(double *a, double *w);
    void cftf040(double *a);
    void cftx020(double *a);
#ifdef USE_CDFT_THREADS
    void cftrec4_th(int n, double *a, int nw, double *w);
#endif /* USE_CDFT_THREADS */

    if (n > 8) {
        if (n > 32) {
            cftf1st(n, a, &w[nw - (n >> 2)]);
#ifdef USE_CDFT_THREADS
            if (n > CDFT_THREADS_BEGIN_N) {
                cftrec4_th(n, a, nw, w);
            } else
#endif /* USE_CDFT_THREADS */
                if (n > 512) {
                    cftrec4(n, a, nw, w);
                } else if (n > 128) {
                    cftleaf(n, 1, a, nw, w);
                } else {
                    cftfx41(n, a, nw, w);
                }
            bitrv2(n, ip, a);
        } else if (n == 32) {
            cftf161(a, &w[nw - 8]);
            bitrv216(a);
        } else {
            cftf081(a, w);
            bitrv208(a);
        }
    } else if (n == 8) {
        cftf040(a);
    } else if (n == 4) {
        cftx020(a);
    }
}


void cftbsub(int n, double *a, int *ip, int nw, double *w) {
    void bitrv2conj(int n, int *ip, double *a);
    void bitrv216neg(double *a);
    void bitrv208neg(double *a);
    void cftb1st(int n, double *a, double *w);
    void cftrec4(int n, double *a, int nw, double *w);
    void cftleaf(int n, int isplt, double *a, int nw, double *w);
    void cftfx41(int n, double *a, int nw, double *w);
    void cftf161(double *a, double *w);
    void cftf081(double *a, double *w);
    void cftb040(double *a);
    void cftx020(double *a);
#ifdef USE_CDFT_THREADS
    void cftrec4_th(int n, double *a, int nw, double *w);
#endif /* USE_CDFT_THREADS */

    if (n > 8) {
        if (n > 32) {
            cftb1st(n, a, &w[nw - (n >> 2)]);
#ifdef USE_CDFT_THREADS
            if (n > CDFT_THREADS_BEGIN_N) {
                cftrec4_th(n, a, nw, w);
            } else
#endif /* USE_CDFT_THREADS */
                if (n > 512) {
                    cftrec4(n, a, nw, w);
                } else if (n > 128) {
                    cftleaf(n, 1, a, nw, w);
                } else {
                    cftfx41(n, a, nw, w);
                }
            bitrv2conj(n, ip, a);
        } else if (n == 32) {
            cftf161(a, &w[nw - 8]);
            bitrv216neg(a);
        } else {
            cftf081(a, w);
            bitrv208neg(a);
        }
    } else if (n == 8) {
        cftb040(a);
    } else if (n == 4) {
        cftx020(a);
    }
}


void bitrv2(int n, int *ip, double *a) {
    int j, j1, k, k1, l, m, nh, nm;
    double xr, xi, yr, yi;

    m = 1;
    for (l = n >> 2; l > 8; l >>= 2) {
        m <<= 1;
    }
    nh = n >> 1;
    nm = 4 * m;
    if (l == 8) {
        for (k = 0; k < m; k++) {
            for (j = 0; j < k; j++) {
                j1 = 4 * j + 2 * ip[m + k];
                k1 = 4 * k + 2 * ip[m + j];
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 -= nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nh;
                k1 += 2;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 += nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += 2;
                k1 += nh;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 -= nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nh;
                k1 -= 2;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 += nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
            }
            k1 = 4 * k + 2 * ip[m + k];
            j1 = k1 + 2;
            k1 += nh;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nm;
            k1 += 2 * nm;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nm;
            k1 -= nm;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 -= 2;
            k1 -= nh;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nh + 2;
            k1 += nh + 2;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 -= nh - nm;
            k1 += 2 * nm - 2;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
        }
    } else {
        for (k = 0; k < m; k++) {
            for (j = 0; j < k; j++) {
                j1 = 4 * j + ip[m + k];
                k1 = 4 * k + ip[m + j];
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nh;
                k1 += 2;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += 2;
                k1 += nh;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nh;
                k1 -= 2;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= nm;
                xr = a[j1];
                xi = a[j1 + 1];
                yr = a[k1];
                yi = a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
            }
            k1 = 4 * k + ip[m + k];
            j1 = k1 + 2;
            k1 += nh;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nm;
            k1 += nm;
            xr = a[j1];
            xi = a[j1 + 1];
            yr = a[k1];
            yi = a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
        }
    }
}


void bitrv2conj(int n, int *ip, double *a) {
    int j, j1, k, k1, l, m, nh, nm;
    double xr, xi, yr, yi;

    m = 1;
    for (l = n >> 2; l > 8; l >>= 2) {
        m <<= 1;
    }
    nh = n >> 1;
    nm = 4 * m;
    if (l == 8) {
        for (k = 0; k < m; k++) {
            for (j = 0; j < k; j++) {
                j1 = 4 * j + 2 * ip[m + k];
                k1 = 4 * k + 2 * ip[m + j];
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 -= nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nh;
                k1 += 2;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 += nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += 2;
                k1 += nh;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 -= nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nh;
                k1 -= 2;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 += nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= 2 * nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
            }
            k1 = 4 * k + 2 * ip[m + k];
            j1 = k1 + 2;
            k1 += nh;
            a[j1 - 1] = -a[j1 - 1];
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            a[k1 + 3] = -a[k1 + 3];
            j1 += nm;
            k1 += 2 * nm;
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nm;
            k1 -= nm;
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 -= 2;
            k1 -= nh;
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 += nh + 2;
            k1 += nh + 2;
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            j1 -= nh - nm;
            k1 += 2 * nm - 2;
            a[j1 - 1] = -a[j1 - 1];
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            a[k1 + 3] = -a[k1 + 3];
        }
    } else {
        for (k = 0; k < m; k++) {
            for (j = 0; j < k; j++) {
                j1 = 4 * j + ip[m + k];
                k1 = 4 * k + ip[m + j];
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nh;
                k1 += 2;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += 2;
                k1 += nh;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 += nm;
                k1 += nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nh;
                k1 -= 2;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
                j1 -= nm;
                k1 -= nm;
                xr = a[j1];
                xi = -a[j1 + 1];
                yr = a[k1];
                yi = -a[k1 + 1];
                a[j1] = yr;
                a[j1 + 1] = yi;
                a[k1] = xr;
                a[k1 + 1] = xi;
            }
            k1 = 4 * k + ip[m + k];
            j1 = k1 + 2;
            k1 += nh;
            a[j1 - 1] = -a[j1 - 1];
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            a[k1 + 3] = -a[k1 + 3];
            j1 += nm;
            k1 += nm;
            a[j1 - 1] = -a[j1 - 1];
            xr = a[j1];
            xi = -a[j1 + 1];
            yr = a[k1];
            yi = -a[k1 + 1];
            a[j1] = yr;
            a[j1 + 1] = yi;
            a[k1] = xr;
            a[k1 + 1] = xi;
            a[k1 + 3] = -a[k1 + 3];
        }
    }
}


void bitrv216(double *a) {
    double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x7r, x7i, x8r, x8i,
        x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;

    x1r = a[2];
    x1i = a[3];
    x2r = a[4];
    x2i = a[5];
    x3r = a[6];
    x3i = a[7];
    x4r = a[8];
    x4i = a[9];
    x5r = a[10];
    x5i = a[11];
    x7r = a[14];
    x7i = a[15];
    x8r = a[16];
    x8i = a[17];
    x10r = a[20];
    x10i = a[21];
    x11r = a[22];
    x11i = a[23];
    x12r = a[24];
    x12i = a[25];
    x13r = a[26];
    x13i = a[27];
    x14r = a[28];
    x14i = a[29];
    a[2] = x8r;
    a[3] = x8i;
    a[4] = x4r;
    a[5] = x4i;
    a[6] = x12r;
    a[7] = x12i;
    a[8] = x2r;
    a[9] = x2i;
    a[10] = x10r;
    a[11] = x10i;
    a[14] = x14r;
    a[15] = x14i;
    a[16] = x1r;
    a[17] = x1i;
    a[20] = x5r;
    a[21] = x5i;
    a[22] = x13r;
    a[23] = x13i;
    a[24] = x3r;
    a[25] = x3i;
    a[26] = x11r;
    a[27] = x11i;
    a[28] = x7r;
    a[29] = x7i;
}


void bitrv216neg(double *a) {
    double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i,
        x8r, x8i, x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i,
        x14r, x14i, x15r, x15i;

    x1r = a[2];
    x1i = a[3];
    x2r = a[4];
    x2i = a[5];
    x3r = a[6];
    x3i = a[7];
    x4r = a[8];
    x4i = a[9];
    x5r = a[10];
    x5i = a[11];
    x6r = a[12];
    x6i = a[13];
    x7r = a[14];
    x7i = a[15];
    x8r = a[16];
    x8i = a[17];
    x9r = a[18];
    x9i = a[19];
    x10r = a[20];
    x10i = a[21];
    x11r = a[22];
    x11i = a[23];
    x12r = a[24];
    x12i = a[25];
    x13r = a[26];
    x13i = a[27];
    x14r = a[28];
    x14i = a[29];
    x15r = a[30];
    x15i = a[31];
    a[2] = x15r;
    a[3] = x15i;
    a[4] = x7r;
    a[5] = x7i;
    a[6] = x11r;
    a[7] = x11i;
    a[8] = x3r;
    a[9] = x3i;
    a[10] = x13r;
    a[11] = x13i;
    a[12] = x5r;
    a[13] = x5i;
    a[14] = x9r;
    a[15] = x9i;
    a[16] = x1r;
    a[17] = x1i;
    a[18] = x14r;
    a[19] = x14i;
    a[20] = x6r;
    a[21] = x6i;
    a[22] = x10r;
    a[23] = x10i;
    a[24] = x2r;
    a[25] = x2i;
    a[26] = x12r;
    a[27] = x12i;
    a[28] = x4r;
    a[29] = x4i;
    a[30] = x8r;
    a[31] = x8i;
}


void bitrv208(double *a) {
    double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;

    x1r = a[2];
    x1i = a[3];
    x3r = a[6];
    x3i = a[7];
    x4r = a[8];
    x4i = a[9];
    x6r = a[12];
    x6i = a[13];
    a[2] = x4r;
    a[3] = x4i;
    a[6] = x6r;
    a[7] = x6i;
    a[8] = x1r;
    a[9] = x1i;
    a[12] = x3r;
    a[13] = x3i;
}


void bitrv208neg(double *a) {
    double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i;

    x1r = a[2];
    x1i = a[3];
    x2r = a[4];
    x2i = a[5];
    x3r = a[6];
    x3i = a[7];
    x4r = a[8];
    x4i = a[9];
    x5r = a[10];
    x5i = a[11];
    x6r = a[12];
    x6i = a[13];
    x7r = a[14];
    x7i = a[15];
    a[2] = x7r;
    a[3] = x7i;
    a[4] = x3r;
    a[5] = x3i;
    a[6] = x5r;
    a[7] = x5i;
    a[8] = x1r;
    a[9] = x1i;
    a[10] = x6r;
    a[11] = x6i;
    a[12] = x2r;
    a[13] = x2i;
    a[14] = x4r;
    a[15] = x4i;
}


void cftf1st(int n, double *a, double *w) {
    int j, j0, j1, j2, j3, k, m, mh;
    double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i,
        y3r, y3i;

    mh = n >> 3;
    m = 2 * mh;
    j1 = m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[0] + a[j2];
    x0i = a[1] + a[j2 + 1];
    x1r = a[0] - a[j2];
    x1i = a[1] - a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[0] = x0r + x2r;
    a[1] = x0i + x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i - x2i;
    a[j2] = x1r - x3i;
    a[j2 + 1] = x1i + x3r;
    a[j3] = x1r + x3i;
    a[j3 + 1] = x1i - x3r;
    wn4r = w[1];
    csc1 = w[2];
    csc3 = w[3];
    wd1r = 1;
    wd1i = 0;
    wd3r = 1;
    wd3i = 0;
    k = 0;
    for (j = 2; j < mh - 2; j += 4) {
        k += 4;
        wk1r = csc1 * (wd1r + w[k]);
        wk1i = csc1 * (wd1i + w[k + 1]);
        wk3r = csc3 * (wd3r + w[k + 2]);
        wk3i = csc3 * (wd3i + w[k + 3]);
        wd1r = w[k];
        wd1i = w[k + 1];
        wd3r = w[k + 2];
        wd3i = w[k + 3];
        j1 = j + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j] + a[j2];
        x0i = a[j + 1] + a[j2 + 1];
        x1r = a[j] - a[j2];
        x1i = a[j + 1] - a[j2 + 1];
        y0r = a[j + 2] + a[j2 + 2];
        y0i = a[j + 3] + a[j2 + 3];
        y1r = a[j + 2] - a[j2 + 2];
        y1i = a[j + 3] - a[j2 + 3];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        y2r = a[j1 + 2] + a[j3 + 2];
        y2i = a[j1 + 3] + a[j3 + 3];
        y3r = a[j1 + 2] - a[j3 + 2];
        y3i = a[j1 + 3] - a[j3 + 3];
        a[j] = x0r + x2r;
        a[j + 1] = x0i + x2i;
        a[j + 2] = y0r + y2r;
        a[j + 3] = y0i + y2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i - x2i;
        a[j1 + 2] = y0r - y2r;
        a[j1 + 3] = y0i - y2i;
        x0r = x1r - x3i;
        x0i = x1i + x3r;
        a[j2] = wk1r * x0r - wk1i * x0i;
        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
        x0r = y1r - y3i;
        x0i = y1i + y3r;
        a[j2 + 2] = wd1r * x0r - wd1i * x0i;
        a[j2 + 3] = wd1r * x0i + wd1i * x0r;
        x0r = x1r + x3i;
        x0i = x1i - x3r;
        a[j3] = wk3r * x0r + wk3i * x0i;
        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
        x0r = y1r + y3i;
        x0i = y1i - y3r;
        a[j3 + 2] = wd3r * x0r + wd3i * x0i;
        a[j3 + 3] = wd3r * x0i - wd3i * x0r;
        j0 = m - j;
        j1 = j0 + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j0] + a[j2];
        x0i = a[j0 + 1] + a[j2 + 1];
        x1r = a[j0] - a[j2];
        x1i = a[j0 + 1] - a[j2 + 1];
        y0r = a[j0 - 2] + a[j2 - 2];
        y0i = a[j0 - 1] + a[j2 - 1];
        y1r = a[j0 - 2] - a[j2 - 2];
        y1i = a[j0 - 1] - a[j2 - 1];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        y2r = a[j1 - 2] + a[j3 - 2];
        y2i = a[j1 - 1] + a[j3 - 1];
        y3r = a[j1 - 2] - a[j3 - 2];
        y3i = a[j1 - 1] - a[j3 - 1];
        a[j0] = x0r + x2r;
        a[j0 + 1] = x0i + x2i;
        a[j0 - 2] = y0r + y2r;
        a[j0 - 1] = y0i + y2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i - x2i;
        a[j1 - 2] = y0r - y2r;
        a[j1 - 1] = y0i - y2i;
        x0r = x1r - x3i;
        x0i = x1i + x3r;
        a[j2] = wk1i * x0r - wk1r * x0i;
        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
        x0r = y1r - y3i;
        x0i = y1i + y3r;
        a[j2 - 2] = wd1i * x0r - wd1r * x0i;
        a[j2 - 1] = wd1i * x0i + wd1r * x0r;
        x0r = x1r + x3i;
        x0i = x1i - x3r;
        a[j3] = wk3i * x0r + wk3r * x0i;
        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
        x0r = y1r + y3i;
        x0i = y1i - y3r;
        a[j3 - 2] = wd3i * x0r + wd3r * x0i;
        a[j3 - 1] = wd3i * x0i - wd3r * x0r;
    }
    wk1r = csc1 * (wd1r + wn4r);
    wk1i = csc1 * (wd1i + wn4r);
    wk3r = csc3 * (wd3r - wn4r);
    wk3i = csc3 * (wd3i - wn4r);
    j0 = mh;
    j1 = j0 + m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[j0 - 2] + a[j2 - 2];
    x0i = a[j0 - 1] + a[j2 - 1];
    x1r = a[j0 - 2] - a[j2 - 2];
    x1i = a[j0 - 1] - a[j2 - 1];
    x2r = a[j1 - 2] + a[j3 - 2];
    x2i = a[j1 - 1] + a[j3 - 1];
    x3r = a[j1 - 2] - a[j3 - 2];
    x3i = a[j1 - 1] - a[j3 - 1];
    a[j0 - 2] = x0r + x2r;
    a[j0 - 1] = x0i + x2i;
    a[j1 - 2] = x0r - x2r;
    a[j1 - 1] = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    a[j2 - 2] = wk1r * x0r - wk1i * x0i;
    a[j2 - 1] = wk1r * x0i + wk1i * x0r;
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    a[j3 - 2] = wk3r * x0r + wk3i * x0i;
    a[j3 - 1] = wk3r * x0i - wk3i * x0r;
    x0r = a[j0] + a[j2];
    x0i = a[j0 + 1] + a[j2 + 1];
    x1r = a[j0] - a[j2];
    x1i = a[j0 + 1] - a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[j0] = x0r + x2r;
    a[j0 + 1] = x0i + x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    a[j2] = wn4r * (x0r - x0i);
    a[j2 + 1] = wn4r * (x0i + x0r);
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    a[j3] = -wn4r * (x0r + x0i);
    a[j3 + 1] = -wn4r * (x0i - x0r);
    x0r = a[j0 + 2] + a[j2 + 2];
    x0i = a[j0 + 3] + a[j2 + 3];
    x1r = a[j0 + 2] - a[j2 + 2];
    x1i = a[j0 + 3] - a[j2 + 3];
    x2r = a[j1 + 2] + a[j3 + 2];
    x2i = a[j1 + 3] + a[j3 + 3];
    x3r = a[j1 + 2] - a[j3 + 2];
    x3i = a[j1 + 3] - a[j3 + 3];
    a[j0 + 2] = x0r + x2r;
    a[j0 + 3] = x0i + x2i;
    a[j1 + 2] = x0r - x2r;
    a[j1 + 3] = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    a[j2 + 2] = wk1i * x0r - wk1r * x0i;
    a[j2 + 3] = wk1i * x0i + wk1r * x0r;
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    a[j3 + 2] = wk3i * x0r + wk3r * x0i;
    a[j3 + 3] = wk3i * x0i - wk3r * x0r;
}


void cftb1st(int n, double *a, double *w) {
    int j, j0, j1, j2, j3, k, m, mh;
    double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i,
        y3r, y3i;

    mh = n >> 3;
    m = 2 * mh;
    j1 = m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[0] + a[j2];
    x0i = -a[1] - a[j2 + 1];
    x1r = a[0] - a[j2];
    x1i = -a[1] + a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[0] = x0r + x2r;
    a[1] = x0i - x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i + x2i;
    a[j2] = x1r + x3i;
    a[j2 + 1] = x1i + x3r;
    a[j3] = x1r - x3i;
    a[j3 + 1] = x1i - x3r;
    wn4r = w[1];
    csc1 = w[2];
    csc3 = w[3];
    wd1r = 1;
    wd1i = 0;
    wd3r = 1;
    wd3i = 0;
    k = 0;
    for (j = 2; j < mh - 2; j += 4) {
        k += 4;
        wk1r = csc1 * (wd1r + w[k]);
        wk1i = csc1 * (wd1i + w[k + 1]);
        wk3r = csc3 * (wd3r + w[k + 2]);
        wk3i = csc3 * (wd3i + w[k + 3]);
        wd1r = w[k];
        wd1i = w[k + 1];
        wd3r = w[k + 2];
        wd3i = w[k + 3];
        j1 = j + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j] + a[j2];
        x0i = -a[j + 1] - a[j2 + 1];
        x1r = a[j] - a[j2];
        x1i = -a[j + 1] + a[j2 + 1];
        y0r = a[j + 2] + a[j2 + 2];
        y0i = -a[j + 3] - a[j2 + 3];
        y1r = a[j + 2] - a[j2 + 2];
        y1i = -a[j + 3] + a[j2 + 3];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        y2r = a[j1 + 2] + a[j3 + 2];
        y2i = a[j1 + 3] + a[j3 + 3];
        y3r = a[j1 + 2] - a[j3 + 2];
        y3i = a[j1 + 3] - a[j3 + 3];
        a[j] = x0r + x2r;
        a[j + 1] = x0i - x2i;
        a[j + 2] = y0r + y2r;
        a[j + 3] = y0i - y2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i + x2i;
        a[j1 + 2] = y0r - y2r;
        a[j1 + 3] = y0i + y2i;
        x0r = x1r + x3i;
        x0i = x1i + x3r;
        a[j2] = wk1r * x0r - wk1i * x0i;
        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
        x0r = y1r + y3i;
        x0i = y1i + y3r;
        a[j2 + 2] = wd1r * x0r - wd1i * x0i;
        a[j2 + 3] = wd1r * x0i + wd1i * x0r;
        x0r = x1r - x3i;
        x0i = x1i - x3r;
        a[j3] = wk3r * x0r + wk3i * x0i;
        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
        x0r = y1r - y3i;
        x0i = y1i - y3r;
        a[j3 + 2] = wd3r * x0r + wd3i * x0i;
        a[j3 + 3] = wd3r * x0i - wd3i * x0r;
        j0 = m - j;
        j1 = j0 + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j0] + a[j2];
        x0i = -a[j0 + 1] - a[j2 + 1];
        x1r = a[j0] - a[j2];
        x1i = -a[j0 + 1] + a[j2 + 1];
        y0r = a[j0 - 2] + a[j2 - 2];
        y0i = -a[j0 - 1] - a[j2 - 1];
        y1r = a[j0 - 2] - a[j2 - 2];
        y1i = -a[j0 - 1] + a[j2 - 1];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        y2r = a[j1 - 2] + a[j3 - 2];
        y2i = a[j1 - 1] + a[j3 - 1];
        y3r = a[j1 - 2] - a[j3 - 2];
        y3i = a[j1 - 1] - a[j3 - 1];
        a[j0] = x0r + x2r;
        a[j0 + 1] = x0i - x2i;
        a[j0 - 2] = y0r + y2r;
        a[j0 - 1] = y0i - y2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i + x2i;
        a[j1 - 2] = y0r - y2r;
        a[j1 - 1] = y0i + y2i;
        x0r = x1r + x3i;
        x0i = x1i + x3r;
        a[j2] = wk1i * x0r - wk1r * x0i;
        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
        x0r = y1r + y3i;
        x0i = y1i + y3r;
        a[j2 - 2] = wd1i * x0r - wd1r * x0i;
        a[j2 - 1] = wd1i * x0i + wd1r * x0r;
        x0r = x1r - x3i;
        x0i = x1i - x3r;
        a[j3] = wk3i * x0r + wk3r * x0i;
        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
        x0r = y1r - y3i;
        x0i = y1i - y3r;
        a[j3 - 2] = wd3i * x0r + wd3r * x0i;
        a[j3 - 1] = wd3i * x0i - wd3r * x0r;
    }
    wk1r = csc1 * (wd1r + wn4r);
    wk1i = csc1 * (wd1i + wn4r);
    wk3r = csc3 * (wd3r - wn4r);
    wk3i = csc3 * (wd3i - wn4r);
    j0 = mh;
    j1 = j0 + m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[j0 - 2] + a[j2 - 2];
    x0i = -a[j0 - 1] - a[j2 - 1];
    x1r = a[j0 - 2] - a[j2 - 2];
    x1i = -a[j0 - 1] + a[j2 - 1];
    x2r = a[j1 - 2] + a[j3 - 2];
    x2i = a[j1 - 1] + a[j3 - 1];
    x3r = a[j1 - 2] - a[j3 - 2];
    x3i = a[j1 - 1] - a[j3 - 1];
    a[j0 - 2] = x0r + x2r;
    a[j0 - 1] = x0i - x2i;
    a[j1 - 2] = x0r - x2r;
    a[j1 - 1] = x0i + x2i;
    x0r = x1r + x3i;
    x0i = x1i + x3r;
    a[j2 - 2] = wk1r * x0r - wk1i * x0i;
    a[j2 - 1] = wk1r * x0i + wk1i * x0r;
    x0r = x1r - x3i;
    x0i = x1i - x3r;
    a[j3 - 2] = wk3r * x0r + wk3i * x0i;
    a[j3 - 1] = wk3r * x0i - wk3i * x0r;
    x0r = a[j0] + a[j2];
    x0i = -a[j0 + 1] - a[j2 + 1];
    x1r = a[j0] - a[j2];
    x1i = -a[j0 + 1] + a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[j0] = x0r + x2r;
    a[j0 + 1] = x0i - x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i + x2i;
    x0r = x1r + x3i;
    x0i = x1i + x3r;
    a[j2] = wn4r * (x0r - x0i);
    a[j2 + 1] = wn4r * (x0i + x0r);
    x0r = x1r - x3i;
    x0i = x1i - x3r;
    a[j3] = -wn4r * (x0r + x0i);
    a[j3 + 1] = -wn4r * (x0i - x0r);
    x0r = a[j0 + 2] + a[j2 + 2];
    x0i = -a[j0 + 3] - a[j2 + 3];
    x1r = a[j0 + 2] - a[j2 + 2];
    x1i = -a[j0 + 3] + a[j2 + 3];
    x2r = a[j1 + 2] + a[j3 + 2];
    x2i = a[j1 + 3] + a[j3 + 3];
    x3r = a[j1 + 2] - a[j3 + 2];
    x3i = a[j1 + 3] - a[j3 + 3];
    a[j0 + 2] = x0r + x2r;
    a[j0 + 3] = x0i - x2i;
    a[j1 + 2] = x0r - x2r;
    a[j1 + 3] = x0i + x2i;
    x0r = x1r + x3i;
    x0i = x1i + x3r;
    a[j2 + 2] = wk1i * x0r - wk1r * x0i;
    a[j2 + 3] = wk1i * x0i + wk1r * x0r;
    x0r = x1r - x3i;
    x0i = x1i - x3r;
    a[j3 + 2] = wk3i * x0r + wk3r * x0i;
    a[j3 + 3] = wk3i * x0i - wk3r * x0r;
}


#ifdef USE_CDFT_THREADS
struct cdft_arg_st {
    int n0;
    int n;
    double *a;
    int nw;
    double *w;
};
typedef struct cdft_arg_st cdft_arg_t;


void cftrec4_th(int n, double *a, int nw, double *w) {
    void *cftrec1_th(void *p);
    void *cftrec2_th(void *p);
    int i, idiv4, m, nthread;
    cdft_thread_t th[4];
    cdft_arg_t ag[4];

    nthread = 2;
    idiv4 = 0;
    m = n >> 1;
    if (n > CDFT_4THREADS_BEGIN_N) {
        nthread = 4;
        idiv4 = 1;
        m >>= 1;
    }
    for (i = 0; i < nthread; i++) {
        ag[i].n0 = n;
        ag[i].n = m;
        ag[i].a = &a[i * m];
        ag[i].nw = nw;
        ag[i].w = w;
        if (i != idiv4) {
            cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
        } else {
            cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
        }
    }
    for (i = 0; i < nthread; i++) {
        cdft_thread_wait(th[i]);
    }
}


void *cftrec1_th(void *p) {
    int cfttree(int n, int j, int k, double *a, int nw, double *w);
    void cftleaf(int n, int isplt, double *a, int nw, double *w);
    void cftmdl1(int n, double *a, double *w);
    int isplt, j, k, m, n, n0, nw;
    double *a, *w;

    n0 = ((cdft_arg_t *)p)->n0;
    n = ((cdft_arg_t *)p)->n;
    a = ((cdft_arg_t *)p)->a;
    nw = ((cdft_arg_t *)p)->nw;
    w = ((cdft_arg_t *)p)->w;
    m = n0;
    while (m > 512) {
        m >>= 2;
        cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
    }
    cftleaf(m, 1, &a[n - m], nw, w);
    k = 0;
    for (j = n - m; j > 0; j -= m) {
        k++;
        isplt = cfttree(m, j, k, a, nw, w);
        cftleaf(m, isplt, &a[j - m], nw, w);
    }
    return (void *)0;
}


void *cftrec2_th(void *p) {
    int cfttree(int n, int j, int k, double *a, int nw, double *w);
    void cftleaf(int n, int isplt, double *a, int nw, double *w);
    void cftmdl2(int n, double *a, double *w);
    int isplt, j, k, m, n, n0, nw;
    double *a, *w;

    n0 = ((cdft_arg_t *)p)->n0;
    n = ((cdft_arg_t *)p)->n;
    a = ((cdft_arg_t *)p)->a;
    nw = ((cdft_arg_t *)p)->nw;
    w = ((cdft_arg_t *)p)->w;
    k = 1;
    m = n0;
    while (m > 512) {
        m >>= 2;
        k <<= 2;
        cftmdl2(m, &a[n - m], &w[nw - m]);
    }
    cftleaf(m, 0, &a[n - m], nw, w);
    k >>= 1;
    for (j = n - m; j > 0; j -= m) {
        k++;
        isplt = cfttree(m, j, k, a, nw, w);
        cftleaf(m, isplt, &a[j - m], nw, w);
    }
    return (void *)0;
}
#endif /* USE_CDFT_THREADS */


void cftrec4(int n, double *a, int nw, double *w) {
    int cfttree(int n, int j, int k, double *a, int nw, double *w);
    void cftleaf(int n, int isplt, double *a, int nw, double *w);
    void cftmdl1(int n, double *a, double *w);
    int isplt, j, k, m;

    m = n;
    while (m > 512) {
        m >>= 2;
        cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
    }
    cftleaf(m, 1, &a[n - m], nw, w);
    k = 0;
    for (j = n - m; j > 0; j -= m) {
        k++;
        isplt = cfttree(m, j, k, a, nw, w);
        cftleaf(m, isplt, &a[j - m], nw, w);
    }
}


int cfttree(int n, int j, int k, double *a, int nw, double *w) {
    void cftmdl1(int n, double *a, double *w);
    void cftmdl2(int n, double *a, double *w);
    int i, isplt, m;

    if ((k & 3) != 0) {
        isplt = k & 1;
        if (isplt != 0) {
            cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
        } else {
            cftmdl2(n, &a[j - n], &w[nw - n]);
        }
    } else {
        m = n;
        for (i = k; (i & 3) == 0; i >>= 2) {
            m <<= 2;
        }
        isplt = i & 1;
        if (isplt != 0) {
            while (m > 128) {
                cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
                m >>= 2;
            }
        } else {
            while (m > 128) {
                cftmdl2(m, &a[j - m], &w[nw - m]);
                m >>= 2;
            }
        }
    }
    return isplt;
}


void cftleaf(int n, int isplt, double *a, int nw, double *w) {
    void cftmdl1(int n, double *a, double *w);
    void cftmdl2(int n, double *a, double *w);
    void cftf161(double *a, double *w);
    void cftf162(double *a, double *w);
    void cftf081(double *a, double *w);
    void cftf082(double *a, double *w);

    if (n == 512) {
        cftmdl1(128, a, &w[nw - 64]);
        cftf161(a, &w[nw - 8]);
        cftf162(&a[32], &w[nw - 32]);
        cftf161(&a[64], &w[nw - 8]);
        cftf161(&a[96], &w[nw - 8]);
        cftmdl2(128, &a[128], &w[nw - 128]);
        cftf161(&a[128], &w[nw - 8]);
        cftf162(&a[160], &w[nw - 32]);
        cftf161(&a[192], &w[nw - 8]);
        cftf162(&a[224], &w[nw - 32]);
        cftmdl1(128, &a[256], &w[nw - 64]);
        cftf161(&a[256], &w[nw - 8]);
        cftf162(&a[288], &w[nw - 32]);
        cftf161(&a[320], &w[nw - 8]);
        cftf161(&a[352], &w[nw - 8]);
        if (isplt != 0) {
            cftmdl1(128, &a[384], &w[nw - 64]);
            cftf161(&a[480], &w[nw - 8]);
        } else {
            cftmdl2(128, &a[384], &w[nw - 128]);
            cftf162(&a[480], &w[nw - 32]);
        }
        cftf161(&a[384], &w[nw - 8]);
        cftf162(&a[416], &w[nw - 32]);
        cftf161(&a[448], &w[nw - 8]);
    } else {
        cftmdl1(64, a, &w[nw - 32]);
        cftf081(a, &w[nw - 8]);
        cftf082(&a[16], &w[nw - 8]);
        cftf081(&a[32], &w[nw - 8]);
        cftf081(&a[48], &w[nw - 8]);
        cftmdl2(64, &a[64], &w[nw - 64]);
        cftf081(&a[64], &w[nw - 8]);
        cftf082(&a[80], &w[nw - 8]);
        cftf081(&a[96], &w[nw - 8]);
        cftf082(&a[112], &w[nw - 8]);
        cftmdl1(64, &a[128], &w[nw - 32]);
        cftf081(&a[128], &w[nw - 8]);
        cftf082(&a[144], &w[nw - 8]);
        cftf081(&a[160], &w[nw - 8]);
        cftf081(&a[176], &w[nw - 8]);
        if (isplt != 0) {
            cftmdl1(64, &a[192], &w[nw - 32]);
            cftf081(&a[240], &w[nw - 8]);
        } else {
            cftmdl2(64, &a[192], &w[nw - 64]);
            cftf082(&a[240], &w[nw - 8]);
        }
        cftf081(&a[192], &w[nw - 8]);
        cftf082(&a[208], &w[nw - 8]);
        cftf081(&a[224], &w[nw - 8]);
    }
}


void cftmdl1(int n, double *a, double *w) {
    int j, j0, j1, j2, j3, k, m, mh;
    double wn4r, wk1r, wk1i, wk3r, wk3i;
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;

    mh = n >> 3;
    m = 2 * mh;
    j1 = m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[0] + a[j2];
    x0i = a[1] + a[j2 + 1];
    x1r = a[0] - a[j2];
    x1i = a[1] - a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[0] = x0r + x2r;
    a[1] = x0i + x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i - x2i;
    a[j2] = x1r - x3i;
    a[j2 + 1] = x1i + x3r;
    a[j3] = x1r + x3i;
    a[j3 + 1] = x1i - x3r;
    wn4r = w[1];
    k = 0;
    for (j = 2; j < mh; j += 2) {
        k += 4;
        wk1r = w[k];
        wk1i = w[k + 1];
        wk3r = w[k + 2];
        wk3i = w[k + 3];
        j1 = j + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j] + a[j2];
        x0i = a[j + 1] + a[j2 + 1];
        x1r = a[j] - a[j2];
        x1i = a[j + 1] - a[j2 + 1];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        a[j] = x0r + x2r;
        a[j + 1] = x0i + x2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i - x2i;
        x0r = x1r - x3i;
        x0i = x1i + x3r;
        a[j2] = wk1r * x0r - wk1i * x0i;
        a[j2 + 1] = wk1r * x0i + wk1i * x0r;
        x0r = x1r + x3i;
        x0i = x1i - x3r;
        a[j3] = wk3r * x0r + wk3i * x0i;
        a[j3 + 1] = wk3r * x0i - wk3i * x0r;
        j0 = m - j;
        j1 = j0 + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j0] + a[j2];
        x0i = a[j0 + 1] + a[j2 + 1];
        x1r = a[j0] - a[j2];
        x1i = a[j0 + 1] - a[j2 + 1];
        x2r = a[j1] + a[j3];
        x2i = a[j1 + 1] + a[j3 + 1];
        x3r = a[j1] - a[j3];
        x3i = a[j1 + 1] - a[j3 + 1];
        a[j0] = x0r + x2r;
        a[j0 + 1] = x0i + x2i;
        a[j1] = x0r - x2r;
        a[j1 + 1] = x0i - x2i;
        x0r = x1r - x3i;
        x0i = x1i + x3r;
        a[j2] = wk1i * x0r - wk1r * x0i;
        a[j2 + 1] = wk1i * x0i + wk1r * x0r;
        x0r = x1r + x3i;
        x0i = x1i - x3r;
        a[j3] = wk3i * x0r + wk3r * x0i;
        a[j3 + 1] = wk3i * x0i - wk3r * x0r;
    }
    j0 = mh;
    j1 = j0 + m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[j0] + a[j2];
    x0i = a[j0 + 1] + a[j2 + 1];
    x1r = a[j0] - a[j2];
    x1i = a[j0 + 1] - a[j2 + 1];
    x2r = a[j1] + a[j3];
    x2i = a[j1 + 1] + a[j3 + 1];
    x3r = a[j1] - a[j3];
    x3i = a[j1 + 1] - a[j3 + 1];
    a[j0] = x0r + x2r;
    a[j0 + 1] = x0i + x2i;
    a[j1] = x0r - x2r;
    a[j1 + 1] = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    a[j2] = wn4r * (x0r - x0i);
    a[j2 + 1] = wn4r * (x0i + x0r);
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    a[j3] = -wn4r * (x0r + x0i);
    a[j3 + 1] = -wn4r * (x0i - x0r);
}


void cftmdl2(int n, double *a, double *w) {
    int j, j0, j1, j2, j3, k, kr, m, mh;
    double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;

    mh = n >> 3;
    m = 2 * mh;
    wn4r = w[1];
    j1 = m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[0] - a[j2 + 1];
    x0i = a[1] + a[j2];
    x1r = a[0] + a[j2 + 1];
    x1i = a[1] - a[j2];
    x2r = a[j1] - a[j3 + 1];
    x2i = a[j1 + 1] + a[j3];
    x3r = a[j1] + a[j3 + 1];
    x3i = a[j1 + 1] - a[j3];
    y0r = wn4r * (x2r - x2i);
    y0i = wn4r * (x2i + x2r);
    a[0] = x0r + y0r;
    a[1] = x0i + y0i;
    a[j1] = x0r - y0r;
    a[j1 + 1] = x0i - y0i;
    y0r = wn4r * (x3r - x3i);
    y0i = wn4r * (x3i + x3r);
    a[j2] = x1r - y0i;
    a[j2 + 1] = x1i + y0r;
    a[j3] = x1r + y0i;
    a[j3 + 1] = x1i - y0r;
    k = 0;
    kr = 2 * m;
    for (j = 2; j < mh; j += 2) {
        k += 4;
        wk1r = w[k];
        wk1i = w[k + 1];
        wk3r = w[k + 2];
        wk3i = w[k + 3];
        kr -= 4;
        wd1i = w[kr];
        wd1r = w[kr + 1];
        wd3i = w[kr + 2];
        wd3r = w[kr + 3];
        j1 = j + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j] - a[j2 + 1];
        x0i = a[j + 1] + a[j2];
        x1r = a[j] + a[j2 + 1];
        x1i = a[j + 1] - a[j2];
        x2r = a[j1] - a[j3 + 1];
        x2i = a[j1 + 1] + a[j3];
        x3r = a[j1] + a[j3 + 1];
        x3i = a[j1 + 1] - a[j3];
        y0r = wk1r * x0r - wk1i * x0i;
        y0i = wk1r * x0i + wk1i * x0r;
        y2r = wd1r * x2r - wd1i * x2i;
        y2i = wd1r * x2i + wd1i * x2r;
        a[j] = y0r + y2r;
        a[j + 1] = y0i + y2i;
        a[j1] = y0r - y2r;
        a[j1 + 1] = y0i - y2i;
        y0r = wk3r * x1r + wk3i * x1i;
        y0i = wk3r * x1i - wk3i * x1r;
        y2r = wd3r * x3r + wd3i * x3i;
        y2i = wd3r * x3i - wd3i * x3r;
        a[j2] = y0r + y2r;
        a[j2 + 1] = y0i + y2i;
        a[j3] = y0r - y2r;
        a[j3 + 1] = y0i - y2i;
        j0 = m - j;
        j1 = j0 + m;
        j2 = j1 + m;
        j3 = j2 + m;
        x0r = a[j0] - a[j2 + 1];
        x0i = a[j0 + 1] + a[j2];
        x1r = a[j0] + a[j2 + 1];
        x1i = a[j0 + 1] - a[j2];
        x2r = a[j1] - a[j3 + 1];
        x2i = a[j1 + 1] + a[j3];
        x3r = a[j1] + a[j3 + 1];
        x3i = a[j1 + 1] - a[j3];
        y0r = wd1i * x0r - wd1r * x0i;
        y0i = wd1i * x0i + wd1r * x0r;
        y2r = wk1i * x2r - wk1r * x2i;
        y2i = wk1i * x2i + wk1r * x2r;
        a[j0] = y0r + y2r;
        a[j0 + 1] = y0i + y2i;
        a[j1] = y0r - y2r;
        a[j1 + 1] = y0i - y2i;
        y0r = wd3i * x1r + wd3r * x1i;
        y0i = wd3i * x1i - wd3r * x1r;
        y2r = wk3i * x3r + wk3r * x3i;
        y2i = wk3i * x3i - wk3r * x3r;
        a[j2] = y0r + y2r;
        a[j2 + 1] = y0i + y2i;
        a[j3] = y0r - y2r;
        a[j3 + 1] = y0i - y2i;
    }
    wk1r = w[m];
    wk1i = w[m + 1];
    j0 = mh;
    j1 = j0 + m;
    j2 = j1 + m;
    j3 = j2 + m;
    x0r = a[j0] - a[j2 + 1];
    x0i = a[j0 + 1] + a[j2];
    x1r = a[j0] + a[j2 + 1];
    x1i = a[j0 + 1] - a[j2];
    x2r = a[j1] - a[j3 + 1];
    x2i = a[j1 + 1] + a[j3];
    x3r = a[j1] + a[j3 + 1];
    x3i = a[j1 + 1] - a[j3];
    y0r = wk1r * x0r - wk1i * x0i;
    y0i = wk1r * x0i + wk1i * x0r;
    y2r = wk1i * x2r - wk1r * x2i;
    y2i = wk1i * x2i + wk1r * x2r;
    a[j0] = y0r + y2r;
    a[j0 + 1] = y0i + y2i;
    a[j1] = y0r - y2r;
    a[j1 + 1] = y0i - y2i;
    y0r = wk1i * x1r - wk1r * x1i;
    y0i = wk1i * x1i + wk1r * x1r;
    y2r = wk1r * x3r - wk1i * x3i;
    y2i = wk1r * x3i + wk1i * x3r;
    a[j2] = y0r - y2r;
    a[j2 + 1] = y0i - y2i;
    a[j3] = y0r + y2r;
    a[j3 + 1] = y0i + y2i;
}


void cftfx41(int n, double *a, int nw, double *w) {
    void cftf161(double *a, double *w);
    void cftf162(double *a, double *w);
    void cftf081(double *a, double *w);
    void cftf082(double *a, double *w);

    if (n == 128) {
        cftf161(a, &w[nw - 8]);
        cftf162(&a[32], &w[nw - 32]);
        cftf161(&a[64], &w[nw - 8]);
        cftf161(&a[96], &w[nw - 8]);
    } else {
        cftf081(a, &w[nw - 8]);
        cftf082(&a[16], &w[nw - 8]);
        cftf081(&a[32], &w[nw - 8]);
        cftf081(&a[48], &w[nw - 8]);
    }
}


void cftf161(double *a, double *w) {
    double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i,
        y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
        y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i,
        y14r, y14i, y15r, y15i;

    wn4r = w[1];
    wk1r = w[2];
    wk1i = w[3];
    x0r = a[0] + a[16];
    x0i = a[1] + a[17];
    x1r = a[0] - a[16];
    x1i = a[1] - a[17];
    x2r = a[8] + a[24];
    x2i = a[9] + a[25];
    x3r = a[8] - a[24];
    x3i = a[9] - a[25];
    y0r = x0r + x2r;
    y0i = x0i + x2i;
    y4r = x0r - x2r;
    y4i = x0i - x2i;
    y8r = x1r - x3i;
    y8i = x1i + x3r;
    y12r = x1r + x3i;
    y12i = x1i - x3r;
    x0r = a[2] + a[18];
    x0i = a[3] + a[19];
    x1r = a[2] - a[18];
    x1i = a[3] - a[19];
    x2r = a[10] + a[26];
    x2i = a[11] + a[27];
    x3r = a[10] - a[26];
    x3i = a[11] - a[27];
    y1r = x0r + x2r;
    y1i = x0i + x2i;
    y5r = x0r - x2r;
    y5i = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    y9r = wk1r * x0r - wk1i * x0i;
    y9i = wk1r * x0i + wk1i * x0r;
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    y13r = wk1i * x0r - wk1r * x0i;
    y13i = wk1i * x0i + wk1r * x0r;
    x0r = a[4] + a[20];
    x0i = a[5] + a[21];
    x1r = a[4] - a[20];
    x1i = a[5] - a[21];
    x2r = a[12] + a[28];
    x2i = a[13] + a[29];
    x3r = a[12] - a[28];
    x3i = a[13] - a[29];
    y2r = x0r + x2r;
    y2i = x0i + x2i;
    y6r = x0r - x2r;
    y6i = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    y10r = wn4r * (x0r - x0i);
    y10i = wn4r * (x0i + x0r);
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    y14r = wn4r * (x0r + x0i);
    y14i = wn4r * (x0i - x0r);
    x0r = a[6] + a[22];
    x0i = a[7] + a[23];
    x1r = a[6] - a[22];
    x1i = a[7] - a[23];
    x2r = a[14] + a[30];
    x2i = a[15] + a[31];
    x3r = a[14] - a[30];
    x3i = a[15] - a[31];
    y3r = x0r + x2r;
    y3i = x0i + x2i;
    y7r = x0r - x2r;
    y7i = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    y11r = wk1i * x0r - wk1r * x0i;
    y11i = wk1i * x0i + wk1r * x0r;
    x0r = x1r + x3i;
    x0i = x1i - x3r;
    y15r = wk1r * x0r - wk1i * x0i;
    y15i = wk1r * x0i + wk1i * x0r;
    x0r = y12r - y14r;
    x0i = y12i - y14i;
    x1r = y12r + y14r;
    x1i = y12i + y14i;
    x2r = y13r - y15r;
    x2i = y13i - y15i;
    x3r = y13r + y15r;
    x3i = y13i + y15i;
    a[24] = x0r + x2r;
    a[25] = x0i + x2i;
    a[26] = x0r - x2r;
    a[27] = x0i - x2i;
    a[28] = x1r - x3i;
    a[29] = x1i + x3r;
    a[30] = x1r + x3i;
    a[31] = x1i - x3r;
    x0r = y8r + y10r;
    x0i = y8i + y10i;
    x1r = y8r - y10r;
    x1i = y8i - y10i;
    x2r = y9r + y11r;
    x2i = y9i + y11i;
    x3r = y9r - y11r;
    x3i = y9i - y11i;
    a[16] = x0r + x2r;
    a[17] = x0i + x2i;
    a[18] = x0r - x2r;
    a[19] = x0i - x2i;
    a[20] = x1r - x3i;
    a[21] = x1i + x3r;
    a[22] = x1r + x3i;
    a[23] = x1i - x3r;
    x0r = y5r - y7i;
    x0i = y5i + y7r;
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    x0r = y5r + y7i;
    x0i = y5i - y7r;
    x3r = wn4r * (x0r - x0i);
    x3i = wn4r * (x0i + x0r);
    x0r = y4r - y6i;
    x0i = y4i + y6r;
    x1r = y4r + y6i;
    x1i = y4i - y6r;
    a[8] = x0r + x2r;
    a[9] = x0i + x2i;
    a[10] = x0r - x2r;
    a[11] = x0i - x2i;
    a[12] = x1r - x3i;
    a[13] = x1i + x3r;
    a[14] = x1r + x3i;
    a[15] = x1i - x3r;
    x0r = y0r + y2r;
    x0i = y0i + y2i;
    x1r = y0r - y2r;
    x1i = y0i - y2i;
    x2r = y1r + y3r;
    x2i = y1i + y3i;
    x3r = y1r - y3r;
    x3i = y1i - y3i;
    a[0] = x0r + x2r;
    a[1] = x0i + x2i;
    a[2] = x0r - x2r;
    a[3] = x0i - x2i;
    a[4] = x1r - x3i;
    a[5] = x1i + x3r;
    a[6] = x1r + x3i;
    a[7] = x1i - x3r;
}


void cftf162(double *a, double *w) {
    double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, x0r, x0i, x1r, x1i, x2r,
        x2i, y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r,
        y6i, y7r, y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i,
        y13r, y13i, y14r, y14i, y15r, y15i;

    wn4r = w[1];
    wk1r = w[4];
    wk1i = w[5];
    wk3r = w[6];
    wk3i = -w[7];
    wk2r = w[8];
    wk2i = w[9];
    x1r = a[0] - a[17];
    x1i = a[1] + a[16];
    x0r = a[8] - a[25];
    x0i = a[9] + a[24];
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    y0r = x1r + x2r;
    y0i = x1i + x2i;
    y4r = x1r - x2r;
    y4i = x1i - x2i;
    x1r = a[0] + a[17];
    x1i = a[1] - a[16];
    x0r = a[8] + a[25];
    x0i = a[9] - a[24];
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    y8r = x1r - x2i;
    y8i = x1i + x2r;
    y12r = x1r + x2i;
    y12i = x1i - x2r;
    x0r = a[2] - a[19];
    x0i = a[3] + a[18];
    x1r = wk1r * x0r - wk1i * x0i;
    x1i = wk1r * x0i + wk1i * x0r;
    x0r = a[10] - a[27];
    x0i = a[11] + a[26];
    x2r = wk3i * x0r - wk3r * x0i;
    x2i = wk3i * x0i + wk3r * x0r;
    y1r = x1r + x2r;
    y1i = x1i + x2i;
    y5r = x1r - x2r;
    y5i = x1i - x2i;
    x0r = a[2] + a[19];
    x0i = a[3] - a[18];
    x1r = wk3r * x0r - wk3i * x0i;
    x1i = wk3r * x0i + wk3i * x0r;
    x0r = a[10] + a[27];
    x0i = a[11] - a[26];
    x2r = wk1r * x0r + wk1i * x0i;
    x2i = wk1r * x0i - wk1i * x0r;
    y9r = x1r - x2r;
    y9i = x1i - x2i;
    y13r = x1r + x2r;
    y13i = x1i + x2i;
    x0r = a[4] - a[21];
    x0i = a[5] + a[20];
    x1r = wk2r * x0r - wk2i * x0i;
    x1i = wk2r * x0i + wk2i * x0r;
    x0r = a[12] - a[29];
    x0i = a[13] + a[28];
    x2r = wk2i * x0r - wk2r * x0i;
    x2i = wk2i * x0i + wk2r * x0r;
    y2r = x1r + x2r;
    y2i = x1i + x2i;
    y6r = x1r - x2r;
    y6i = x1i - x2i;
    x0r = a[4] + a[21];
    x0i = a[5] - a[20];
    x1r = wk2i * x0r - wk2r * x0i;
    x1i = wk2i * x0i + wk2r * x0r;
    x0r = a[12] + a[29];
    x0i = a[13] - a[28];
    x2r = wk2r * x0r - wk2i * x0i;
    x2i = wk2r * x0i + wk2i * x0r;
    y10r = x1r - x2r;
    y10i = x1i - x2i;
    y14r = x1r + x2r;
    y14i = x1i + x2i;
    x0r = a[6] - a[23];
    x0i = a[7] + a[22];
    x1r = wk3r * x0r - wk3i * x0i;
    x1i = wk3r * x0i + wk3i * x0r;
    x0r = a[14] - a[31];
    x0i = a[15] + a[30];
    x2r = wk1i * x0r - wk1r * x0i;
    x2i = wk1i * x0i + wk1r * x0r;
    y3r = x1r + x2r;
    y3i = x1i + x2i;
    y7r = x1r - x2r;
    y7i = x1i - x2i;
    x0r = a[6] + a[23];
    x0i = a[7] - a[22];
    x1r = wk1i * x0r + wk1r * x0i;
    x1i = wk1i * x0i - wk1r * x0r;
    x0r = a[14] + a[31];
    x0i = a[15] - a[30];
    x2r = wk3i * x0r - wk3r * x0i;
    x2i = wk3i * x0i + wk3r * x0r;
    y11r = x1r + x2r;
    y11i = x1i + x2i;
    y15r = x1r - x2r;
    y15i = x1i - x2i;
    x1r = y0r + y2r;
    x1i = y0i + y2i;
    x2r = y1r + y3r;
    x2i = y1i + y3i;
    a[0] = x1r + x2r;
    a[1] = x1i + x2i;
    a[2] = x1r - x2r;
    a[3] = x1i - x2i;
    x1r = y0r - y2r;
    x1i = y0i - y2i;
    x2r = y1r - y3r;
    x2i = y1i - y3i;
    a[4] = x1r - x2i;
    a[5] = x1i + x2r;
    a[6] = x1r + x2i;
    a[7] = x1i - x2r;
    x1r = y4r - y6i;
    x1i = y4i + y6r;
    x0r = y5r - y7i;
    x0i = y5i + y7r;
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    a[8] = x1r + x2r;
    a[9] = x1i + x2i;
    a[10] = x1r - x2r;
    a[11] = x1i - x2i;
    x1r = y4r + y6i;
    x1i = y4i - y6r;
    x0r = y5r + y7i;
    x0i = y5i - y7r;
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    a[12] = x1r - x2i;
    a[13] = x1i + x2r;
    a[14] = x1r + x2i;
    a[15] = x1i - x2r;
    x1r = y8r + y10r;
    x1i = y8i + y10i;
    x2r = y9r - y11r;
    x2i = y9i - y11i;
    a[16] = x1r + x2r;
    a[17] = x1i + x2i;
    a[18] = x1r - x2r;
    a[19] = x1i - x2i;
    x1r = y8r - y10r;
    x1i = y8i - y10i;
    x2r = y9r + y11r;
    x2i = y9i + y11i;
    a[20] = x1r - x2i;
    a[21] = x1i + x2r;
    a[22] = x1r + x2i;
    a[23] = x1i - x2r;
    x1r = y12r - y14i;
    x1i = y12i + y14r;
    x0r = y13r + y15i;
    x0i = y13i - y15r;
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    a[24] = x1r + x2r;
    a[25] = x1i + x2i;
    a[26] = x1r - x2r;
    a[27] = x1i - x2i;
    x1r = y12r + y14i;
    x1i = y12i - y14r;
    x0r = y13r - y15i;
    x0i = y13i + y15r;
    x2r = wn4r * (x0r - x0i);
    x2i = wn4r * (x0i + x0r);
    a[28] = x1r - x2i;
    a[29] = x1i + x2r;
    a[30] = x1r + x2i;
    a[31] = x1i - x2r;
}


void cftf081(double *a, double *w) {
    double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i,
        y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;

    wn4r = w[1];
    x0r = a[0] + a[8];
    x0i = a[1] + a[9];
    x1r = a[0] - a[8];
    x1i = a[1] - a[9];
    x2r = a[4] + a[12];
    x2i = a[5] + a[13];
    x3r = a[4] - a[12];
    x3i = a[5] - a[13];
    y0r = x0r + x2r;
    y0i = x0i + x2i;
    y2r = x0r - x2r;
    y2i = x0i - x2i;
    y1r = x1r - x3i;
    y1i = x1i + x3r;
    y3r = x1r + x3i;
    y3i = x1i - x3r;
    x0r = a[2] + a[10];
    x0i = a[3] + a[11];
    x1r = a[2] - a[10];
    x1i = a[3] - a[11];
    x2r = a[6] + a[14];
    x2i = a[7] + a[15];
    x3r = a[6] - a[14];
    x3i = a[7] - a[15];
    y4r = x0r + x2r;
    y4i = x0i + x2i;
    y6r = x0r - x2r;
    y6i = x0i - x2i;
    x0r = x1r - x3i;
    x0i = x1i + x3r;
    x2r = x1r + x3i;
    x2i = x1i - x3r;
    y5r = wn4r * (x0r - x0i);
    y5i = wn4r * (x0r + x0i);
    y7r = wn4r * (x2r - x2i);
    y7i = wn4r * (x2r + x2i);
    a[8] = y1r + y5r;
    a[9] = y1i + y5i;
    a[10] = y1r - y5r;
    a[11] = y1i - y5i;
    a[12] = y3r - y7i;
    a[13] = y3i + y7r;
    a[14] = y3r + y7i;
    a[15] = y3i - y7r;
    a[0] = y0r + y4r;
    a[1] = y0i + y4i;
    a[2] = y0r - y4r;
    a[3] = y0i - y4i;
    a[4] = y2r - y6i;
    a[5] = y2i + y6r;
    a[6] = y2r + y6i;
    a[7] = y2i - y6r;
}


void cftf082(double *a, double *w) {
    double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, y0r, y0i, y1r, y1i, y2r, y2i,
        y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;

    wn4r = w[1];
    wk1r = w[2];
    wk1i = w[3];
    y0r = a[0] - a[9];
    y0i = a[1] + a[8];
    y1r = a[0] + a[9];
    y1i = a[1] - a[8];
    x0r = a[4] - a[13];
    x0i = a[5] + a[12];
    y2r = wn4r * (x0r - x0i);
    y2i = wn4r * (x0i + x0r);
    x0r = a[4] + a[13];
    x0i = a[5] - a[12];
    y3r = wn4r * (x0r - x0i);
    y3i = wn4r * (x0i + x0r);
    x0r = a[2] - a[11];
    x0i = a[3] + a[10];
    y4r = wk1r * x0r - wk1i * x0i;
    y4i = wk1r * x0i + wk1i * x0r;
    x0r = a[2] + a[11];
    x0i = a[3] - a[10];
    y5r = wk1i * x0r - wk1r * x0i;
    y5i = wk1i * x0i + wk1r * x0r;
    x0r = a[6] - a[15];
    x0i = a[7] + a[14];
    y6r = wk1i * x0r - wk1r * x0i;
    y6i = wk1i * x0i + wk1r * x0r;
    x0r = a[6] + a[15];
    x0i = a[7] - a[14];
    y7r = wk1r * x0r - wk1i * x0i;
    y7i = wk1r * x0i + wk1i * x0r;
    x0r = y0r + y2r;
    x0i = y0i + y2i;
    x1r = y4r + y6r;
    x1i = y4i + y6i;
    a[0] = x0r + x1r;
    a[1] = x0i + x1i;
    a[2] = x0r - x1r;
    a[3] = x0i - x1i;
    x0r = y0r - y2r;
    x0i = y0i - y2i;
    x1r = y4r - y6r;
    x1i = y4i - y6i;
    a[4] = x0r - x1i;
    a[5] = x0i + x1r;
    a[6] = x0r + x1i;
    a[7] = x0i - x1r;
    x0r = y1r - y3i;
    x0i = y1i + y3r;
    x1r = y5r - y7r;
    x1i = y5i - y7i;
    a[8] = x0r + x1r;
    a[9] = x0i + x1i;
    a[10] = x0r - x1r;
    a[11] = x0i - x1i;
    x0r = y1r + y3i;
    x0i = y1i - y3r;
    x1r = y5r + y7r;
    x1i = y5i + y7i;
    a[12] = x0r - x1i;
    a[13] = x0i + x1r;
    a[14] = x0r + x1i;
    a[15] = x0i - x1r;
}


void cftf040(double *a) {
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;

    x0r = a[0] + a[4];
    x0i = a[1] + a[5];
    x1r = a[0] - a[4];
    x1i = a[1] - a[5];
    x2r = a[2] + a[6];
    x2i = a[3] + a[7];
    x3r = a[2] - a[6];
    x3i = a[3] - a[7];
    a[0] = x0r + x2r;
    a[1] = x0i + x2i;
    a[2] = x1r - x3i;
    a[3] = x1i + x3r;
    a[4] = x0r - x2r;
    a[5] = x0i - x2i;
    a[6] = x1r + x3i;
    a[7] = x1i - x3r;
}


void cftb040(double *a) {
    double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;

    x0r = a[0] + a[4];
    x0i = a[1] + a[5];
    x1r = a[0] - a[4];
    x1i = a[1] - a[5];
    x2r = a[2] + a[6];
    x2i = a[3] + a[7];
    x3r = a[2] - a[6];
    x3i = a[3] - a[7];
    a[0] = x0r + x2r;
    a[1] = x0i + x2i;
    a[2] = x1r + x3i;
    a[3] = x1i - x3r;
    a[4] = x0r - x2r;
    a[5] = x0i - x2i;
    a[6] = x1r - x3i;
    a[7] = x1i + x3r;
}


void cftx020(double *a) {
    double x0r, x0i;

    x0r = a[0] - a[2];
    x0i = a[1] - a[3];
    a[0] += a[2];
    a[1] += a[3];
    a[2] = x0r;
    a[3] = x0i;
}


void rftfsub(int n, double *a, int nc, double *c) {
    int j, k, kk, ks, m;
    double wkr, wki, xr, xi, yr, yi;

    m = n >> 1;
    ks = 2 * nc / m;
    kk = 0;
    for (j = 2; j < m; j += 2) {
        k = n - j;
        kk += ks;
        wkr = 0.5 - c[nc - kk];
        wki = c[kk];
        xr = a[j] - a[k];
        xi = a[j + 1] + a[k + 1];
        yr = wkr * xr - wki * xi;
        yi = wkr * xi + wki * xr;
        a[j] -= yr;
        a[j + 1] -= yi;
        a[k] += yr;
        a[k + 1] -= yi;
    }
}


void rftbsub(int n, double *a, int nc, double *c) {
    int j, k, kk, ks, m;
    double wkr, wki, xr, xi, yr, yi;

    m = n >> 1;
    ks = 2 * nc / m;
    kk = 0;
    for (j = 2; j < m; j += 2) {
        k = n - j;
        kk += ks;
        wkr = 0.5 - c[nc - kk];
        wki = c[kk];
        xr = a[j] - a[k];
        xi = a[j + 1] + a[k + 1];
        yr = wkr * xr + wki * xi;
        yi = wkr * xi - wki * xr;
        a[j] -= yr;
        a[j + 1] -= yi;
        a[k] += yr;
        a[k + 1] -= yi;
    }
}


void dctsub(int n, double *a, int nc, double *c) {
    int j, k, kk, ks, m;
    double wkr, wki, xr;

    m = n >> 1;
    ks = nc / n;
    kk = 0;
    for (j = 1; j < m; j++) {
        k = n - j;
        kk += ks;
        wkr = c[kk] - c[nc - kk];
        wki = c[kk] + c[nc - kk];
        xr = wki * a[j] - wkr * a[k];
        a[j] = wkr * a[j] + wki * a[k];
        a[k] = xr;
    }
    a[m] *= c[0];
}


void dstsub(int n, double *a, int nc, double *c) {
    int j, k, kk, ks, m;
    double wkr, wki, xr;

    m = n >> 1;
    ks = nc / n;
    kk = 0;
    for (j = 1; j < m; j++) {
        k = n - j;
        kk += ks;
        wkr = c[kk] - c[nc - kk];
        wki = c[kk] + c[nc - kk];
        xr = wki * a[k] - wkr * a[j];
        a[k] = wkr * a[k] + wki * a[j];
        a[j] = xr;
    }
    a[m] *= c[0];
}


================================================
FILE: runtime/engine/common/frontend/frontend_itf.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "base/basic_types.h"
#include "matrix/kaldi-vector.h"

namespace ppspeech {

class FrontendInterface {
  public:
    // Feed inputs: features(2D saved in 1D) or waveforms(1D).
    virtual void Accept(const std::vector<float>& inputs) = 0;

    // Fetch processed data: features or waveforms.
    // For features(2D saved in 1D), the Matrix is squashed into Vector,
    //    the length of output = feature_row * feature_dim.
    // For waveforms(1D), samples saved in vector.
    virtual bool Read(std::vector<float>* outputs) = 0;

    // Dim is the feature dim. For waveforms(1D), Dim is zero; else is specific,
    // e.g 80 for fbank.
    virtual size_t Dim() const = 0;

    // End Flag for Streaming Data.
    virtual void SetFinished() = 0;

    // whether is end of Streaming Data.
    virtual bool IsFinished() const = 0;

    // Reset to start state.
    virtual void Reset() = 0;
};

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/frontend/linear_spectrogram.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "frontend/audio/linear_spectrogram.h"

#include "kaldi/base/kaldi-math.h"
#include "kaldi/feat/feature-common.h"
#include "kaldi/feat/feature-functions.h"
#include "kaldi/matrix/matrix-functions.h"

namespace ppspeech {

using kaldi::BaseFloat;
using kaldi::int32;
using kaldi::Matrix;
using kaldi::SubVector;
using kaldi::Vector;
using kaldi::VectorBase;
using std::vector;

LinearSpectrogramComputer::LinearSpectrogramComputer(const Options& opts)
    : opts_(opts) {
    kaldi::FeatureWindowFunction feature_window_function(opts.frame_opts);
    int32 window_size = opts.frame_opts.WindowSize();
    frame_length_ = window_size;
    dim_ = window_size / 2 + 1;
    BaseFloat hanning_window_energy = kaldi::VecVec(
        feature_window_function.window, feature_window_function.window);
    int32 sample_rate = opts.frame_opts.samp_freq;
    scale_ = 2.0 / (hanning_window_energy * sample_rate);
}

// Compute spectrogram feat
bool LinearSpectrogramComputer::Compute(Vector<BaseFloat>* window,
                                        Vector<BaseFloat>* feat) {
    window->Resize(frame_length_, kaldi::kCopyData);
    RealFft(window, true);
    kaldi::ComputePowerSpectrum(window);
    SubVector<BaseFloat> power_spectrum(*window, 0, dim_);
    power_spectrum.Scale(scale_);
    power_spectrum(0) = power_spectrum(0) / 2;
    power_spectrum(dim_ - 1) = power_spectrum(dim_ - 1) / 2;
    power_spectrum.Add(1e-14);
    power_spectrum.ApplyLog();
    feat->CopyFromVec(power_spectrum);
    return true;
}

}  // namespace ppspeech

================================================
FILE: runtime/engine/common/frontend/linear_spectrogram.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#pragma once

#include "base/common.h"
#include "frontend/audio/feature_common.h"
#include "frontend/audio/frontend_itf.h"
#include "kaldi/feat/feature-window.h"

namespace ppspeech {

struct LinearSpectrogramOptions {
    kaldi::FrameExtractionOptions frame_opts;
    LinearSpectrogramOptions() : frame_opts() {}
};

class LinearSpectrogramComputer {
  public:
    typedef LinearSpectrogramOptions Options;
    explicit LinearSpectrogramComputer(const Options& opts);

    kaldi::FrameExtractionOptions& GetFrameOptions() {
        return opts_.frame_opts;
    }

    bool Compute(kaldi::Vector<kaldi::BaseFloat>* window,
                 kaldi::Vector<kaldi::BaseFloat>* feat);

    int32 Dim() const { return dim_; }

    bool NeedRawLogEnergy() { return false; }

  private:
    kaldi::BaseFloat scale_;
    Options opts_;
    int32 frame_length_;
    int32 dim_;
    DISALLOW_COPY_AND_ASSIGN(LinearSpectrogramComputer);
};

typedef StreamingFeatureTpl<LinearSpectrogramComputer> LinearSpectrogram;


}  // namespace ppspeech

================================================
FILE: runtime/engine/common/frontend/mel-computations.cc
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// This file is copied/modified from kaldi/src/feat/mel-computations.cc

#include "frontend/mel-computations.h"

#include <algorithm>
#include <sstream>

#include "frontend/feature-window.h"

namespace knf {

std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts) {
    os << opts.ToString();
    return os;
}

float MelBanks::VtlnWarpFreq(
    float vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
    float vtln_high_cutoff,
    float low_freq,  // upper+lower frequency cutoffs in mel computation
    float high_freq,
    float vtln_warp_factor,
    float freq) {
    /// This computes a VTLN warping function that is not the same as HTK's one,
    /// but has similar inputs (this function has the advantage of never
    /// producing
    /// empty bins).

    /// This function computes a warp function F(freq), defined between low_freq
    /// and high_freq inclusive, with the following properties:
    ///  F(low_freq) == low_freq
    ///  F(high_freq) == high_freq
    /// The function is continuous and piecewise linear with two inflection
    ///   points.
    /// The lower inflection point (measured in terms of the unwarped
    ///  frequency) is at frequency l, determined as described below.
    /// The higher inflection point is at a frequency h, determined as
    ///   described below.
    /// If l <= f <= h, then F(f) = f/vtln_warp_factor.
    /// If the higher inflection point (measured in terms of the unwarped
    ///   frequency) is at h, then max(h, F(h)) == vtln_high_cutoff.
    ///   Since (by the last point) F(h) == h/vtln_warp_factor, then
    ///   max(h, h/vtln_warp_factor) == vtln_high_cutoff, so
    ///   h = vtln_high_cutoff / max(1, 1/vtln_warp_factor).
    ///     = vtln_high_cutoff * min(1, vtln_warp_factor).
    /// If the lower inflection point (measured in terms of the unwarped
    ///   frequency) is at l, then min(l, F(l)) == vtln_low_cutoff
    ///   This implies that l = vtln_low_cutoff / min(1, 1/vtln_warp_factor)
    ///                       = vtln_low_cutoff * max(1, vtln_warp_factor)

    if (freq < low_freq || freq > high_freq)
        return freq;  // in case this gets called
    // for out-of-range frequencies, just return the freq.

    CHECK_GT(vtln_low_cutoff, low_freq);
    CHECK_LT(vtln_high_cutoff, high_freq);

    float one = 1.0f;
    float l = vtln_low_cutoff * std::max(one, vtln_warp_factor);
    float h = vtln_high_cutoff * std::min(one, vtln_warp_factor);
    float scale = 1.0f / vtln_warp_factor;
    float Fl = scale * l;  // F(l);
    float Fh = scale * h;  // F(h);
    CHECK(l > low_freq && h < high_freq);
    // slope of left part of the 3-piece linear function
    float scale_left = (Fl - low_freq) / (l - low_freq);
    // [slope of center part is just "scale"]

    // slope of right part of the 3-piece linear function
    float scale_right = (high_freq - Fh) / (high_freq - h);

    if (freq < l) {
        return low_freq + scale_left * (freq - low_freq);
    } else if (freq < h) {
        return scale * freq;
    } else {  // freq >= h
        return high_freq + scale_right * (freq - high_freq);
    }
}

float MelBanks::VtlnWarpMelFreq(
    float vtln_low_cutoff,  // upper+lower frequency cutoffs for VTLN.
    float vtln_high_cutoff,
    float low_freq,  // upper+lower frequency cutoffs in mel computation
    float high_freq,
    float vtln_warp_factor,
    float mel_freq) {
    return MelScale(VtlnWarpFreq(vtln_low_cutoff,
                                 vtln_high_cutoff,
                                 low_freq,
                                 high_freq,
                                 vtln_warp_factor,
                                 InverseMelScale(mel_freq)));
}

MelBanks::MelBanks(const MelBanksOptions &opts,
                   const FrameExtractionOptions &frame_opts,
                   float vtln_warp_factor)
    : htk_mode_(opts.htk_mode) {
    int32_t num_bins = opts.num_bins;
    if (num_bins < 3) LOG(FATAL) << "Must have at least 3 mel bins";

    float sample_freq = frame_opts.samp_freq;
    int32_t window_length_padded = frame_opts.PaddedWindowSize();
    CHECK_EQ(window_length_padded % 2, 0);

    int32_t num_fft_bins = window_length_padded / 2;
    float nyquist = 0.5f * sample_freq;

    float low_freq = opts.low_freq, high_freq;
    if (opts.high_freq > 0.0f)
        high_freq = opts.high_freq;
    else
        high_freq = nyquist + opts.high_freq;

    if (low_freq < 0.0f || low_freq >= nyquist || high_freq <= 0.0f ||
        high_freq > nyquist || high_freq <= low_freq) {
        LOG(FATAL) << "Bad values in options: low-freq " << low_freq
                   << " and high-freq " << high_freq << " vs. nyquist "
                   << nyquist;
    }

    float fft_bin_width = sample_freq / window_length_padded;
    // fft-bin width [think of it as Nyquist-freq / half-window-length]

    float mel_low_freq = MelScale(low_freq);
    float mel_high_freq = MelScale(high_freq);

    debug_ = opts.debug_mel;

    // divide by num_bins+1 in next line because of end-effects where the bins
    // spread out to the sides.
    float mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1);

    float vtln_low = opts.vtln_low, vtln_high = opts.vtln_high;
    if (vtln_high < 0.0f) {
        vtln_high += nyquist;
    }

    if (vtln_warp_factor != 1.0f &&
        (vtln_low < 0.0f || vtln_low <= low_freq || vtln_low >= high_freq ||
         vtln_high <= 0.0f || vtln_high >= high_freq ||
         vtln_high <= vtln_low)) {
        LOG(FATAL) << "Bad values in options: vtln-low " << vtln_low
                   << " and vtln-high " << vtln_high << ", versus "
                   << "low-freq " << low_freq << " and high-freq " << high_freq;
    }

    bins_.resize(num_bins);
    center_freqs_.resize(num_bins);

    for (int32_t bin = 0; bin < num_bins; ++bin) {
        float left_mel = mel_low_freq + bin * mel_freq_delta,
              center_mel = mel_low_freq + (bin + 1) * mel_freq_delta,
              right_mel = mel_low_freq + (bin + 2) * mel_freq_delta;

        if (vtln_warp_factor != 1.0f) {
            left_mel = VtlnWarpMelFreq(vtln_low,
                                       vtln_high,
                                       low_freq,
                                       high_freq,
                                       vtln_warp_factor,
                                       left_mel);
            center_mel = VtlnWarpMelFreq(vtln_low,
                                         vtln_high,
                                         low_freq,
                                         high_freq,
                                         vtln_warp_factor,
                                         center_mel);
            right_mel = VtlnWarpMelFreq(vtln_low,
                                        vtln_high,
                                        low_freq,
                                        high_freq,
                                        vtln_warp_factor,
                                        right_mel);
        }
        center_freqs_[bin] = InverseMelScale(center_mel);

        // this_bin will be a vector of coefficients that is only
        // nonzero where this mel bin is active.
        std::vector<float> this_bin(num_fft_bins);

        int32_t first_index = -1, last_index = -1;
        for (int32_t i = 0; i < num_fft_bins; ++i) {
            float freq = (fft_bin_width * i);  // Center frequency of this fft
                                               // bin.
            float mel = MelScale(freq);
            if (mel > left_mel && mel < right_mel) {
                float weight;
                if (mel <= center_mel)
                    weight = (mel - left_mel) / (center_mel - left_mel);
                else
                    weight = (right_mel - mel) / (right_mel - center_mel);
                this_bin[i] = weight;
                if (first_index == -1) first_index = i;
                last_index = i;
            }
        }
        CHECK(first_index != -1 && last_index >= first_index &&
              "You may have set num_mel_bins too large.");

        bins_[bin].first = first_index;
        int32_t size = last_index + 1 - first_index;
        bins_[bin].second.insert(bins_[bin].second.end(),
                                 this_bin.begin() + first_index,
                                 this_bin.begin() + first_index + size);

        // Replicate a bug in HTK, for testing purposes.
        if (opts.htk_mode && bin == 0 && mel_low_freq != 0.0f) {
            bins_[bin].second[0] = 0.0;
        }
    }  // for (int32_t bin = 0; bin < num_bins; ++bin) {

    if (debug_) {
        std::ostringstream os;
        for (size_t i = 0; i < bins_.size(); i++) {
            os << "bin " << i << ", offset = " << bins_[i].first << ", vec = ";
            for (auto k : bins_[i].second) os << k << ", ";
            os << "\n";
        }
        LOG(INFO) << os.str();
    }
}

// "power_spectrum" contains fft energies.
void MelBanks::Compute(const float *power_spectrum,
                       float *mel_energies_out) const {
    int32_t num_bins = bins_.size();

    for (int32_t i = 0; i < num_bins; i++) {
        int32_t offset = bins_[i].first;
        const auto &v = bins_[i].second;
        float energy = 0;
        for (int32_t k = 0; k != v.size(); ++k) {
            energy += v[k] * power_spectrum[k + offset];
        }

        // HTK-like flooring- for testing purposes (we prefer dither)
        if (htk_mode_ && energy < 1.0) {
            energy = 1.0;
        }

        mel_energies_out[i] = energy;

        // The following assert was added due to a problem with OpenBlas that
        // we had at one point (it was a bug in that library).  Just to detect
        // it early.
        CHECK_EQ(energy, energy);  // check that energy is not nan
    }

    if (debug_) {
        fprintf(stderr, "MEL BANKS:\n");
        for (int32_t i = 0; i < num_bins; i++)
            fprintf(stderr, " %f", mel_energies_out[i]);
        fprintf(stderr, "\n");
    }
}

}  // namespace knf


================================================
FILE: runtime/engine/common/frontend/mel-computations.h
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
// This file is copied/modified from kaldi/src/feat/mel-computations.h
#ifndef KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_
#define KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_

#include <cmath>
#include <string>

#include "frontend/feature-window.h"

namespace knf {

struct MelBanksOptions {
    int32_t num_bins = 25;  // e.g. 25; number of triangular bins
    float low_freq = 20;    // e.g. 20; lower frequency cutoff

    // an upper frequency cutoff; 0 -> no cutoff, negative
    // ->added to the Nyquist frequency to get the cutoff.
    float high_freq = 0;

    float vtln_low = 100;  // vtln lower cutoff of warping function.

    // vtln upper cutoff of warping function: if negative, added
    // to the Nyquist frequency to get the cutoff.
    float vtln_high = -500;

    bool debug_mel = false;
    // htk_mode is a "hidden" config, it does not show up on command line.
    // Enables more exact compatibility with HTK, for testing purposes.  Affects
    // mel-energy flooring and reproduces a bug in HTK.
    bool htk_mode = false;

    std::string ToString() const {
        std::ostringstream os;
        os << "num_bins: " << num_bins << "\n";
        os << "low_freq: " << low_freq << "\n";
        os << "high_freq: " << high_freq << "\n";
        os << "vtln_low: " << vtln_low << "\n";
        os << "vtln_high: " << vtln_high << "\n";
        os << "debug_mel: " << debug_mel << "\n";
        os << "htk_mode: " << htk_mode << "\n";
        return os.str();
    }
};

std::ostream &operator<<(std::ostream &os, const MelBanksOptions &opts);

class MelBanks {
  public:
    static inline float InverseMelScale(float mel_freq) {
        return 700.0f * (expf(mel_freq / 1127.0f) - 1.0f);
    }

    static inline float MelScale(float freq) {
        return 1127.0f * logf(1.0f + freq / 700.0f);
    }

    static float VtlnWarpFreq(
        float vtln_low_cutoff,
        float vtln_high_cutoff,  // discontinuities in warp func
        float low_freq,
        float high_freq,  // upper+lower frequency cutoffs in
        // the mel computation
        float vtln_warp_factor,
        float freq);

    static float VtlnWarpMelFreq(float vtln_low_cutoff,
                                 float vtln_high_cutoff,
                                 float low_freq,
                                 float high_freq,
                                 float vtln_warp_factor,
                                 float mel_freq);

    // TODO(fangjun): Remove vtln_warp_factor
    MelBanks(const MelBanksOptions &opts,
             const FrameExtractionOptions &frame_opts,
             float vtln_warp_factor);

    /// Compute Mel energies (note: not log energies).
    /// At input, "fft_energies" contains the FFT energies (not log).
    ///
    /// @param fft_energies 1-D array of size num_fft_bins/2+1
    /// @param mel_energies_out  1-D array of size num_mel_bins
    void Compute(const float *fft_energies, float *mel_energies_out) const;

    int32_t NumBins() const { return bins_.size(); }

  private:
    // center frequencies of bins, numbered from 0 ... num_bins-1.
    // Needed by GetCenterFreqs().
    std::vector<float> center_freqs_;

    // the "bins_" vector is a vector, one for each bin, of a pair:
    // (the first nonzero fft-bin), (the vector of weights).
    std::vector<std::pair<int32_t, std::vector<float>>> bins_;

    // TODO(fangjun): Remove debug_ and htk_mode_
    bool debug_;
    bool htk_mode_;
};

}  // namespace knf

#endif  // KALDI_NATIVE_FBANK_CSRC_MEL_COMPUTATIONS_H_


================================================
FILE: runtime/engine/common/frontend/normalizer.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "frontend/cmvn.h"

================================================
FILE: runtime/engine/common/frontend/rfft.cc
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "frontend/rfft.h"

#include <cmath>
#include <memory>
#include <vector>

#include "base/log.h"

// see fftsg.c
#ifdef __cplusplus
extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);
#else
void rdft(int n, int isgn, double *a, int *ip, double *w);
#endif

namespace knf {
class Rfft::RfftImpl {
  public:
    explicit RfftImpl(int32_t n) : n_(n), ip_(2 + std::sqrt(n / 2)), w_(n / 2) {
        CHECK_EQ(n & (n - 1), 0);
    }

    void Compute(float *in_out) {
        std::vector<double> d(in_out, in_out + n_);

        Compute(d.data());

        std::copy(d.begin(), d.end(), in_out);
    }

    void Compute(double *in_out) {
        // 1 means forward fft
        rdft(n_, 1, in_out, ip_.data(), w_.data());
    }

  private:
    int32_t n_;
    std::vector<int32_t> ip_;
    std::vector<double> w_;
};

Rfft::Rfft(int32_t n) : impl_(std::make_unique<RfftImpl>(n)) {}

Rfft::~Rfft() = default;

void Rfft::Compute(float *in_out) { impl_->Compute(in_out); }
void Rfft::Compute(double *in_out) { impl_->Compute(in_out); }

}  // namespace knf


================================================
FILE: runtime/engine/common/frontend/rfft.h
================================================
/**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Fangjun Kuang)
 *
 * See LICENSE for clarification regarding multiple authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef KALDI_NATIVE_FBANK_CSRC_RFFT_H_
#define KALDI_NATIVE_FBANK_CSRC_RFFT_H_

#include <memory>

namespace knf {

// n-point Real discrete Fourier transform
// where n is a power of 2. n >= 2
//
//  R[k] = sum_j=0^n-1 in[j]*cos(2*pi*j*k/n), 0<=k<=n/2
//  I[k] = sum_j=0^n-1 in[j]*sin(2*pi*j*k/n), 0<k<n/2
class Rfft {
  public:
    // @param n Number of fft bins. it should be a power of 2.
    explicit Rfft(int32_t n);
    ~Rfft();

    /** @param in_out A 1-D array of size n.
     *             On return:
     *               in_out[0] = R[0]
     *               in_out[1] = R[n/2]
     *               for 1 < k < n/2,
     *                 in_out[2*k] = R[k]
     *                 in_out[2*k+1] = I[k]
     *
     */
    void Compute(float *in_out);
    void Compute(double *in_out);

  private:
    class RfftImpl;
    std::unique_ptr<RfftImpl> impl_;
};

}  // namespace knf

#endif  // KALDI_NATIVE_FBANK_CSRC_RFFT_H_


================================================
FILE: runtime/engine/common/frontend/wave-reader.cc
================================================
// feat/wave-reader.cc

// Copyright 2009-2011  Karel Vesely;  Petr Motlicek
//                2013  Florent Masson
//                2013  Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "frontend/wave-reader.h"

#include <algorithm>
#include <cstdio>
#include <limits>
#include <sstream>
#include <vector>

#include "base/kaldi-error.h"
#include "base/kaldi-utils.h"

namespace kaldi {

// A utility class for reading wave header.
struct WaveHeaderReadGofer {
    std::istream &is;
    bool swap;
    char tag[5];

    WaveHeaderReadGofer(std::istream &is) : is(is), swap(false) {
        memset(tag, '\0', sizeof tag);
    }

    void Expect4ByteTag(const char *expected) {
        is.read(tag, 4);
        if (is.fail())
            KALDI_ERR << "WaveData: expected " << expected
                      << ", failed to read anything";
        if (strcmp(tag, expected))
            KALDI_ERR << "WaveData: expected " << expected << ", got " << tag;
    }

    void Read4ByteTag() {
        is.read(tag, 4);
        if (is.fail())
            KALDI_ERR << "WaveData: expected 4-byte chunk-name, got read error";
    }

    uint32 ReadUint32() {
        union {
            char result[4];
            uint32 ans;
        } u;
        is.read(u.result, 4);
        if (swap) KALDI_SWAP4(u.result);
        if (is.fail())
            KALDI_ERR << "WaveData: unexpected end of file or read error";
        return u.ans;
    }

    uint16 ReadUint16() {
        union {
            char result[2];
            int16 ans;
        } u;
        is.read(u.result, 2);
        if (swap) KALDI_SWAP2(u.result);
        if (is.fail())
            KALDI_ERR << "WaveData: unexpected end of file or read error";
        return u.ans;
    }
};

static void WriteUint32(std::ostream &os, int32 i) {
    union {
        char buf[4];
        int i;
    } u;
    u.i = i;
#ifdef __BIG_ENDIAN__
    KALDI_SWAP4(u.buf);
#endif
    os.write(u.buf, 4);
    if (os.fail()) KALDI_ERR << "WaveData: error writing to stream.";
}

static void WriteUint16(std::ostream &os, int16 i) {
    union {
        char buf[2];
        int16 i;
    } u;
    u.i = i;
#ifdef __BIG_ENDIAN__
    KALDI_SWAP2(u.buf);
#endif
    os.write(u.buf, 2);
    if (os.fail()) KALDI_ERR << "WaveData: error writing to stream.";
}

void WaveInfo::Read(std::istream &is) {
    WaveHeaderReadGofer reader(is);
    reader.Read4ByteTag();
    if (strcmp(reader.tag, "RIFF") == 0)
        reverse_bytes_ = false;
    else if (strcmp(reader.tag, "RIFX") == 0)
        reverse_bytes_ = true;
    else
        KALDI_ERR << "WaveData: expected RIFF or RIFX, got " << reader.tag;

#ifdef __BIG_ENDIAN__
    reverse_bytes_ = !reverse_bytes_;
#endif
    reader.swap = reverse_bytes_;

    uint32 riff_chunk_size = reader.ReadUint32();
    reader.Expect4ByteTag("WAVE");

    uint32 riff_chunk_read = 0;
    riff_chunk_read += 4;  // WAVE included in riff_chunk_size.

    // Possibly skip any RIFF tags between 'WAVE' and 'fmt '.
    // Apple devices produce a filler tag 'JUNK' for memory alignment.
    reader.Read4ByteTag();
    riff_chunk_read += 4;
    while (strcmp(reader.tag, "fmt ") != 0) {
        uint32 filler_size = reader.ReadUint32();
        riff_chunk_read += 4;
        for (uint32 i = 0; i < filler_size; i++) {
            is.get();  // read 1 byte,
        }
        riff_chunk_read += filler_size;
        // get next RIFF tag,
        reader.Read4ByteTag();
        riff_chunk_read += 4;
    }

    KALDI_ASSERT(strcmp(reader.tag, "fmt ") == 0);
    uint32 subchunk1_size = reader.ReadUint32();
    uint16 audio_format = reader.ReadUint16();
    num_channels_ = reader.ReadUint16();
    uint32 sample_rate = reader.ReadUint32(), byte_rate = reader.ReadUint32(),
           block_align = reader.ReadUint16(),
           bits_per_sample = reader.ReadUint16();
    samp_freq_ = static_cast<BaseFloat>(sample_rate);

    uint32 fmt_chunk_read = 16;
    if (audio_format == 1) {
        if (subchunk1_size < 16) {
            KALDI_ERR << "WaveData: expect PCM format data to have fmt chunk "
                      << "of at least size 16.";
        }
    } else if (audio_format == 0xFFFE) {  // WAVE_FORMAT_EXTENSIBLE
        uint16 extra_size = reader.ReadUint16();
        if (subchunk1_size < 40 || extra_size < 22) {
            KALDI_ERR
                << "WaveData: malformed WAVE_FORMAT_EXTENSIBLE format data.";
        }
        reader.ReadUint16();  // Unused for PCM.
        reader.ReadUint32();  // Channel map: we do not care.
        uint32 guid1 = reader.ReadUint32(), guid2 = reader.ReadUint32(),
               guid3 = reader.ReadUint32(), guid4 = reader.ReadUint32();
        fmt_chunk_read = 40;

        // Support only KSDATAFORMAT_SUBTYPE_PCM for now. Interesting formats:
        // ("00000001-0000-0010-8000-00aa00389b71", KSDATAFORMAT_SUBTYPE_PCM)
        // ("00000003-0000-0010-8000-00aa00389b71",
        // KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
        // ("00000006-0000-0010-8000-00aa00389b71", KSDATAFORMAT_SUBTYPE_ALAW)
        // ("00000007-0000-0010-8000-00aa00389b71", KSDATAFORMAT_SUBTYPE_MULAW)
        if (guid1 != 0x00000001 || guid2 != 0x00100000 || guid3 != 0xAA000080 ||
            guid4 != 0x719B3800) {
            KALDI_ERR << "WaveData: unsupported WAVE_FORMAT_EXTENSIBLE format.";
        }
    } else {
        KALDI_ERR << "WaveData: can read only PCM data, format id in file is: "
                  << audio_format;
    }

    for (uint32 i = fmt_chunk_read; i < subchunk1_size; ++i)
        is.get();  // use up extra data.

    if (num_channels_ == 0) KALDI_ERR << "WaveData: no channels present";
    if (bits_per_sample != 16)
        KALDI_ERR << "WaveData: unsupported bits_per_sample = "
                  << bits_per_sample;
    if (byte_rate != sample_rate * bits_per_sample / 8 * num_channels_)
        KALDI_ERR << "Unexpected byte rate " << byte_rate << " vs. "
                  << sample_rate << " * " << (bits_per_sample / 8) << " * "
                  << num_channels_;
    if (block_align != num_channels_ * bits_per_sample / 8)
        KALDI_ERR << "Unexpected block_align: " << block_align << " vs. "
                  << num_channels_ << " * " << (bits_per_sample / 8);

    riff_chunk_read += 4 + subchunk1_size;
    // size of what we just read, 4 for subchunk1_size + subchunk1_size itself.

    // We support an optional "fact" chunk (which is useless but which
    // we encountered), and then a single "data" chunk.

    reader.Read4ByteTag();
    riff_chunk_read += 4;

    // Skip any subchunks between "fmt" and "data".  Usually there will
    // be a single "fact" subchunk, but on Windows there can also be a
    // "list" subchunk.
    while (strcmp(reader.tag, "data") != 0) {
        // We will just ignore the data in these chunks.
        uint32 chunk_sz = reader.ReadUint32();
        if (chunk_sz != 4 && strcmp(reader.tag, "fact") == 0)
            KALDI_WARN << "Expected fact chunk to be 4 bytes long.";
        for (uint32 i = 0; i < chunk_sz; i++) is.get();
        riff_chunk_read +=
            4 + chunk_sz;  // for chunk_sz (4) + chunk contents (chunk-sz)

        // Now read the next chunk name.
        reader.Read4ByteTag();
        riff_chunk_read += 4;
    }

    KALDI_ASSERT(strcmp(reader.tag, "data") == 0);
    uint32 data_chunk_size = reader.ReadUint32();
    riff_chunk_read += 4;

    // Figure out if the file is going to be read to the end. Values as
    // observed in the wild:
    bool is_stream_mode =
        riff_chunk_size == 0 || riff_chunk_size == 0xFFFFFFFF ||
        data_chunk_size == 0 || data_chunk_size == 0xFFFFFFFF ||
        data_chunk_size == 0x7FFFF000;  // This value is used by SoX.

    if (is_stream_mode)
        KALDI_VLOG(1) << "Read in RIFF chunk size: " << riff_chunk_size
                      << ", data chunk size: " << data_chunk_size
                      << ". Assume 'stream mode' (reading data to EOF).";

    if (!is_stream_mode && std::abs(static_cast<int64>(riff_chunk_read) +
                                    static_cast<int64>(data_chunk_size) -
                                    static_cast<int64>(riff_chunk_size)) > 1) {
        // We allow the size to be off by one without warning, because there is
        // a
        // weirdness in the format of RIFF files that means that the input may
        // sometimes be padded with 1 unused byte to make the total size even.
        KALDI_WARN << "Expected " << riff_chunk_size
                   << " bytes in RIFF chunk, but "
                   << "after first data block there will be " << riff_chunk_read
                   << " + " << data_chunk_size << " bytes "
                   << "(we do not support reading multiple data chunks).";
    }

    if (is_stream_mode)
        samp_count_ = -1;
    else
        samp_count_ = data_chunk_size / block_align;
}

void WaveData::Read(std::istream &is) {
    const uint32 kBlockSize = 1024 * 1024;

    WaveInfo header;
    header.Read(is);

    data_.Resize(0, 0);  // clear the data.
    samp_freq_ = header.SampFreq();

    std::vector<char> buffer;
    uint32 bytes_to_go = header.IsStreamed() ? kBlockSize : header.DataBytes();

    // Once in a while header.DataBytes() will report an insane value;
    // read the file to the end
    while (is && bytes_to_go > 0) {
        uint32 block_bytes = std::min(bytes_to_go, kBlockSize);
        uint32 offset = buffer.size();
        buffer.resize(offset + block_bytes);
        is.read(&buffer[offset], block_bytes);
        uint32 bytes_read = is.gcount();
        buffer.resize(offset + bytes_read);
        if (!header.IsStreamed()) bytes_to_go -= bytes_read;
    }

    if (is.bad()) KALDI_ERR << "WaveData: file read error";

    if (buffer.size() == 0) KALDI_ERR << "WaveData: empty file (no data)";

    if (!header.IsStreamed() && buffer.size() < header.DataBytes()) {
        KALDI_WARN << "Expected " << header.DataBytes()
                   << " bytes of wave data, "
                   << "but read only " << buffer.size() << " bytes. "
                   << "Truncated file?";
    }

    uint16 *data_ptr = reinterpret_cast<uint16 *>(&buffer[0]);

    // The matrix is arranged row per channel, column per sample.
    data_.Resize(header.NumChannels(), buffer.size() / header.BlockAlign());
    for (uint32 i = 0; i < data_.NumCols(); ++i) {
        for (uint32 j = 0; j < data_.NumRows(); ++j) {
            int16 k = *data_ptr++;
            if (header.ReverseBytes()) KALDI_SWAP2(k);
            data_(j, i) = k;
        }
    }
}


// Write 16-bit PCM.

// note: the WAVE chunk contains 2 subchunks.
//
// subchunk2size = data.NumRows() * data.NumCols() * 2.


void WaveData::Write(std::ostream &os) const {
    os << "RIFF";
    if (data_.NumRows() == 0)
        KALDI_ERR << "Error: attempting to write empty WAVE file";

    int32 num_chan = data_.NumRows(), num_samp = data_.NumCols(),
          bytes_per_samp = 2;

    int32 subchunk2size = (num_chan * num_samp * bytes_per_samp);
    int32 chunk_size = 36 + subchunk2size;
    WriteUint32(os, chunk_size);
    os << "WAVE";
    os << "fmt ";
    WriteUint32(os, 16);
    WriteUint16(os, 1);
    WriteUint16(os, num_chan);
    KALDI_ASSERT(samp_freq_ > 0);
    WriteUint32(os, static_cast<int32>(samp_freq_));
    WriteUint32(os, static_cast<int32>(samp_freq_) * num_chan * bytes_per_samp);
    WriteUint16(os, num_chan * bytes_per_samp);
    WriteUint16(os, 8 * bytes_per_samp);
    os << "data";
    WriteUint32(os, subchunk2size);

    const BaseFloat *data_ptr = data_.Data();
    int32 stride = data_.Stride();

    int num_clipped = 0;
    for (int32 i = 0; i < num_samp; i++) {
        for (int32 j = 0; j < num_chan; j++) {
            int32 elem = static_cast<int32>(trunc(data_ptr[j * stride + i]));
            int16 elem_16 = static_cast<int16>(elem);
            if (elem < std::numeric_limits<int16>::min()) {
                elem_16 = std::numeric_limits<int16>::min();
                ++num_clipped;
            } else if (elem > std::numeric_limits<int16>::max()) {
                elem_16 = std::numeric_limits<int16>::max();
                ++num_clipped;
            }
#ifdef __BIG_ENDIAN__
            KALDI_SWAP2(elem_16);
#endif
            os.write(reinterpret_cast<char *>(&elem_16), 2);
        }
    }
    if (os.fail()) KALDI_ERR << "Error writing wave data to stream.";
    if (num_clipped > 0)
        KALDI_WARN << "WARNING: clipped " << num_clipped
                   << " samples out of total " << num_chan * num_samp
                   << ". Reduce volume?";
}


}  // end namespace kaldi


================================================
FILE: runtime/engine/common/frontend/wave-reader.h
================================================
// feat/wave-reader.h

// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation
//                2013  Florent Masson
//                2013  Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


/*
// THE WAVE FORMAT IS SPECIFIED IN:
// https:// ccrma.stanford.edu/courses/422/projects/WaveFormat/
//
//
//
//  RIFF
//  |
//  WAVE
//  |    \    \   \
//  fmt_ data ... data
//
//
//  Riff is a general container, which usually contains one WAVE chunk
//  each WAVE chunk has header sub-chunk 'fmt_'
//  and one or more data sub-chunks 'data'
//
//  [Note from Dan: to say that the wave format was ever "specified" anywhere is
//   not quite right.  The guy who invented the wave format attempted to create
//   a formal specification but it did not completely make sense.  And there
//   doesn't seem to be a consensus on what makes a valid wave file,
//   particularly where the accuracy of header information is concerned.]
*/


#ifndef KALDI_FEAT_WAVE_READER_H_
#define KALDI_FEAT_WAVE_READER_H_

#include <cstring>

#include "base/kaldi-types.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/kaldi-vector.h"


namespace kaldi {

/// For historical reasons, we scale waveforms to the range
/// (2^15-1)*[-1, 1], not the usual default DSP range [-1, 1].
const BaseFloat kWaveSampleMax = 32768.0;

/// This class reads and hold wave file header information.
class WaveInfo {
  public:
    WaveInfo()
        : samp_freq_(0), samp_count_(0), num_channels_(0), reverse_bytes_(0) {}

    /// Is stream size unknown? Duration and SampleCount not valid if true.
    bool IsStreamed() const { return samp_count_ < 0; }

    /// Sample frequency, Hz.
    BaseFloat SampFreq() const { return samp_freq_; }

    /// Number of samples in stream. Invalid if IsStreamed() is true.
    uint32 SampleCount() const { return samp_count_; }

    /// Approximate duration, seconds. Invalid if IsStreamed() is true.
    BaseFloat Duration() const { return samp_count_ / samp_freq_; }

    /// Number of channels, 1 to 16.
    int32 NumChannels() const { return num_channels_; }

    /// Bytes per sample.
    size_t BlockAlign() const { return 2 * num_channels_; }

    /// Wave data bytes. Invalid if IsStreamed() is true.
    size_t DataBytes() const { return samp_count_ * BlockAlign(); }

    /// Is data file byte order different from machine byte order?
    bool ReverseBytes() const { return reverse_bytes_; }

    /// 'is' should be opened in binary mode. Read() will throw on error.
    /// On success 'is' will be positioned at the beginning of wave data.
    void Read(std::istream &is);

  private:
    BaseFloat samp_freq_;
    int32 samp_count_;  // 0 if empty, -1 if undefined length.
    uint8 num_channels_;
    bool reverse_bytes_;  // File endianness differs from host.
};

/// This class's purpose is to read in Wave files.
class WaveData {
  public:
    WaveData(BaseFloat samp_freq, const MatrixBase<BaseFloat> &data)
        : data_(data), samp_freq_(samp_freq) {}

    WaveData() : samp_freq_(0.0) {}

    /// Read() will throw on error.  It's valid to call Read() more than once--
    /// in this case it will destroy what was there before.
    /// "is" should be opened in binary mode.
    void Read(std::istream &is);

    /// Write() will throw on error.   os should be opened in binary mode.
    void Write(std::ostream &os) const;

    // This function returns the wave data-- it's in a matrix
    // because there may be multiple channels.  In the normal case
    // there's just one channel so Data() will have one row.
    const Matrix<BaseFloat> &Data() const { return data_; }

    BaseFloat SampFreq() const { return samp_freq_; }

    // Returns the duration in seconds
    BaseFloat Duration() const { return data_.NumCols() / samp_freq_; }

    void CopyFrom(const WaveData &other) {
        samp_freq_ = other.samp_freq_;
        data_.CopyFromMat(other.data_);
    }

    void Clear() {
        data_.Resize(0, 0);
        samp_freq_ = 0.0;
    }

    void Swap(WaveData *other) {
        data_.Swap(&(other->data_));
        std::swap(samp_freq_, other->samp_freq_);
    }

  private:
    static const uint32 kBlockSize = 1024 * 1024;  // Use 1M bytes.
    Matrix<BaseFloat> data_;
    BaseFloat samp_freq_;
};


// Holder class for .wav files that enables us to read (but not write) .wav
// files. c.f. util/kaldi-holder.h we don't use the KaldiObjectHolder template
// because we don't want to check for the \0B binary header. We could have faked
// it by pretending to read in the wave data in text mode after failing to find
// the \0B header, but that would have been a little ugly.
class WaveHolder {
  public:
    typedef WaveData T;

    static bool Write(std::ostream &os, bool binary, const T &t) {
        // We don't write the binary-mode header here [always binary].
        if (!binary)
            KALDI_ERR << "Wave data can only be written in binary mode.";
        try {
            t.Write(os);  // throws exception on failure.
            return true;
        } catch (const std::exception &e) {
            KALDI_WARN << "Exception caught in WaveHolder object (writing). "
                       << e.what();
            return false;  // write failure.
        }
    }
    void Copy(const T &t) { t_.CopyFrom(t); }

    static bool IsReadInBinary() { return true; }

    void Clear() { t_.Clear(); }

    T &Value() { return t_; }

    WaveHolder &operator=(const WaveHolder &other) {
        t_.CopyFrom(other.t_);
        return *this;
    }
    WaveHolder(const WaveHolder &other) : t_(other.t_) {}

    WaveHolder() {}

    bool Read(std::istream &is) {
        // We don't look for the binary-mode header here [always binary]
        try {
            t_.Read(is);  // Throws exception on failure.
            return true;
        } catch (const std::exception &e) {
            KALDI_WARN << "Exception caught in WaveHolder::Read(). "
                       << e.what();
            return false;
        }
    }

    void Swap(WaveHolder *other) { t_.Swap(&(other->t_)); }

    bool ExtractRange(const WaveHolder &other, const std::string &range) {
        KALDI_ERR << "ExtractRange is not defined for this type of holder.";
        return false;
    }

  private:
    T t_;
};

// This is like WaveHolder but when you just want the metadata-
// it leaves the actual data undefined, it doesn't read it.
class WaveInfoHolder {
  public:
    typedef WaveInfo T;

    void Clear() { info_ = WaveInfo(); }
    void Swap(WaveInfoHolder *other) { std::swap(info_, other->info_); }
    T &Value() { return info_; }
    static bool IsReadInBinary() { return true; }

    bool Read(std::istream &is) {
        try {
            info_.Read(is);  // Throws exception on failure.
            return true;
        } catch (const std::exception &e) {
            KALDI_WARN << "Exception caught in WaveInfoHolder::Read(). "
                       << e.what();
            return false;
        }
    }

    bool ExtractRange(const WaveInfoHolder &other, const std::string &range) {
        KALDI_ERR << "ExtractRange is not defined for this type of holder.";
        return false;
    }

  private:
    WaveInfo info_;
};


}  // namespace kaldi

#endif  // KALDI_FEAT_WAVE_READER_H_


================================================
FILE: runtime/engine/common/matrix/CMakeLists.txt
================================================

add_library(kaldi-matrix
kaldi-matrix.cc
kaldi-vector.cc
)

target_link_libraries(kaldi-matrix kaldi-base)


================================================
FILE: runtime/engine/common/matrix/kaldi-matrix-inl.h
================================================
// matrix/kaldi-matrix-inl.h

// Copyright 2009-2011  Microsoft Corporation;  Haihua Xu

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_MATRIX_KALDI_MATRIX_INL_H_
#define KALDI_MATRIX_KALDI_MATRIX_INL_H_ 1

#include "matrix/kaldi-vector.h"

namespace kaldi {

/// Empty constructor
template <typename Real>
Matrix<Real>::Matrix() : MatrixBase<Real>(NULL, 0, 0, 0) {}

/*
template<>
template<>
void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float>
&ra, const VectorBase<float> &rb);

template<>
template<>
void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double>
&ra, const VectorBase<double> &rb);
*/

template <typename Real>
inline std::ostream& operator<<(std::ostream& os, const MatrixBase<Real>& M) {
    M.Write(os, false);
    return os;
}

template <typename Real>
inline std::istream& operator>>(std::istream& is, Matrix<Real>& M) {
    M.Read(is, false);
    return is;
}


template <typename Real>
inline std::istream& operator>>(std::istream& is, MatrixBase<Real>& M) {
    M.Read(is, false);
    return is;
}

}  // namespace kaldi


#endif  // KALDI_MATRIX_KALDI_MATRIX_INL_H_


================================================
FILE: runtime/engine/common/matrix/kaldi-matrix.cc
================================================
// matrix/kaldi-matrix.cc

// Copyright 2009-2011   Lukas Burget;  Ondrej Glembek;  Go Vivace Inc.;
//                       Microsoft Corporation;  Saarland University;
//                       Yanmin Qian;  Petr Schwarz;  Jan Silovsky;
//                       Haihua Xu
//           2017        Shiyin Kang
//           2019        Yiwen Shao

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "matrix/kaldi-matrix.h"

namespace kaldi {
/*
template<typename Real>
void MatrixBase<Real>::Invert(Real *log_det, Real *det_sign,
                              bool inverse_needed) {
  KALDI_ASSERT(num_rows_ == num_cols_);
  if (num_rows_ == 0) {
    if (det_sign) *det_sign = 1;
    if (log_det) *log_det = 0.0;
    return;
  }
#ifndef HAVE_ATLAS
  KaldiBlasInt *pivot = new KaldiBlasInt[num_rows_];
  KaldiBlasInt M = num_rows_;
  KaldiBlasInt N = num_cols_;
  KaldiBlasInt LDA = stride_;
  KaldiBlasInt result = -1;
  KaldiBlasInt l_work = std::max<KaldiBlasInt>(1, N);
  Real *p_work;
  void *temp;
  if ((p_work = static_cast<Real*>(
          KALDI_MEMALIGN(16, sizeof(Real)*l_work, &temp))) == NULL) {
    delete[] pivot;
    throw std::bad_alloc();
  }

  clapack_Xgetrf2(&M, &N, data_, &LDA, pivot, &result);
  const int pivot_offset = 1;
#else
  int *pivot = new int[num_rows_];
  int result;
  clapack_Xgetrf(num_rows_, num_cols_, data_, stride_, pivot, &result);
  const int pivot_offset = 0;
#endif
  KALDI_ASSERT(result >= 0 && "Call to CLAPACK sgetrf_ or ATLAS clapack_sgetrf "
               "called with wrong arguments");
  if (result > 0) {
    if (inverse_needed) {
      KALDI_ERR << "Cannot invert: matrix is singular";
    } else {
      if (log_det) *log_det = -std::numeric_limits<Real>::infinity();
      if (det_sign) *det_sign = 0;
      delete[] pivot;
#ifndef HAVE_ATLAS
      KALDI_MEMALIGN_FREE(p_work);
#endif
      return;
    }
  }
  if (det_sign != NULL) {
    int sign = 1;
    for (MatrixIndexT i = 0; i < num_rows_; i++)
      if (pivot[i] != static_cast<int>(i) + pivot_offset) sign *= -1;
    *det_sign = sign;
  }
  if (log_det != NULL || det_sign != NULL) {  // Compute log determinant.
    if (log_det != NULL) *log_det = 0.0;
    Real prod = 1.0;
    for (MatrixIndexT i = 0; i < num_rows_; i++) {
      prod *= (*this)(i, i);
      if (i == num_rows_ - 1 || std::fabs(prod) < 1.0e-10 ||
          std::fabs(prod) > 1.0e+10) {
        if (log_det != NULL) *log_det += kaldi::Log(std::fabs(prod));
        if (det_sign != NULL) *det_sign *= (prod > 0 ? 1.0 : -1.0);
        prod = 1.0;
      }
    }
  }
#ifndef HAVE_ATLAS
  if (inverse_needed) clapack_Xgetri2(&M, data_, &LDA, pivot, p_work, &l_work,
                              &result);
  delete[] pivot;
  KALDI_MEMALIGN_FREE(p_work);
#else
  if (inverse_needed)
    clapack_Xgetri(num_rows_, data_, stride_, pivot, &result);
  delete [] pivot;
#endif
  KALDI_ASSERT(result == 0 && "Call to CLAPACK sgetri_ or ATLAS clapack_sgetri "
               "called with wrong arguments");
}

template<>
template<>
void MatrixBase<float>::AddVecVec(const float alpha,
                                  const VectorBase<float> &a,
                                  const VectorBase<float> &rb) {
  KALDI_ASSERT(a.Dim() == num_rows_ && rb.Dim() == num_cols_);
  cblas_Xger(a.Dim(), rb.Dim(), alpha, a.Data(), 1, rb.Data(),
             1, data_, stride_);
}

template<typename Real>
template<typename OtherReal>
void MatrixBase<Real>::AddVecVec(const Real alpha,
                                 const VectorBase<OtherReal> &a,
                                 const VectorBase<OtherReal> &b) {
  KALDI_ASSERT(a.Dim() == num_rows_ && b.Dim() == num_cols_);
  if (num_rows_ * num_cols_ > 100) { // It's probably worth it to allocate
    // temporary vectors of the right type and use BLAS.
    Vector<Real> temp_a(a), temp_b(b);
    cblas_Xger(num_rows_, num_cols_, alpha, temp_a.Data(), 1,
               temp_b.Data(), 1, data_, stride_);
  } else {
    const OtherReal *a_data = a.Data(), *b_data = b.Data();
    Real *row_data = data_;
    for (MatrixIndexT i = 0; i < num_rows_; i++, row_data += stride_) {
      BaseFloat alpha_ai = alpha * a_data[i];
      for (MatrixIndexT j = 0; j < num_cols_; j++)
        row_data[j] += alpha_ai * b_data[j];
    }
  }
}

// instantiate the template above.
template
void MatrixBase<float>::AddVecVec(const float alpha,
                                  const VectorBase<double> &a,
                                  const VectorBase<double> &b);
template
void MatrixBase<double>::AddVecVec(const double alpha,
                                   const VectorBase<float> &a,
                                   const VectorBase<float> &b);

template<>
template<>
void MatrixBase<double>::AddVecVec(const double alpha,
                                   const VectorBase<double> &a,
                                   const VectorBase<double> &rb) {
  KALDI_ASSERT(a.Dim() == num_rows_ && rb.Dim() == num_cols_);
  if (num_rows_ == 0) return;
  cblas_Xger(a.Dim(), rb.Dim(), alpha, a.Data(), 1, rb.Data(),
             1, data_, stride_);
}

template<typename Real>
void MatrixBase<Real>::AddMatMat(const Real alpha,
                                  const MatrixBase<Real>& A,
                                  MatrixTransposeType transA,
                                  const MatrixBase<Real>& B,
                                  MatrixTransposeType transB,
                                  const Real beta) {
  KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ ==
B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_)
               || (transA == kTrans && transB == kNoTrans && A.num_rows_ ==
B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_)
               || (transA == kNoTrans && transB == kTrans && A.num_cols_ ==
B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_)
               || (transA == kTrans && transB == kTrans && A.num_rows_ ==
B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_));
  KALDI_ASSERT(&A !=  this && &B != this);
  if (num_rows_ == 0) return;
  cblas_Xgemm(alpha, transA, A.data_, A.num_rows_, A.num_cols_, A.stride_,
              transB, B.data_, B.stride_, beta, data_, num_rows_, num_cols_,
stride_);

}

template<typename Real>
void MatrixBase<Real>::SetMatMatDivMat(const MatrixBase<Real>& A,
                                       const MatrixBase<Real>& B,
                                       const MatrixBase<Real>& C) {
  KALDI_ASSERT(A.NumRows() == B.NumRows() && A.NumCols() == B.NumCols());
  KALDI_ASSERT(A.NumRows() == C.NumRows() && A.NumCols() == C.NumCols());
  for (int32 r = 0; r < A.NumRows(); r++) { // each frame...
    for (int32 c = 0; c < A.NumCols(); c++) {
      BaseFloat i = C(r, c), o = B(r, c), od = A(r, c),
          id;
      if (i != 0.0) {
        id = od * (o / i); /// o / i is either zero or "scale".
      } else {
        id = od; /// Just imagine the scale was 1.0.  This is somehow true in
        /// expectation; anyway, this case should basically never happen so it
doesn't
        /// really matter.
      }
      (*this)(r, c) = id;
    }
  }
}
*/

// template<typename Real>
// void MatrixBase<Real>::CopyLowerToUpper() {
// KALDI_ASSERT(num_rows_ == num_cols_);
// Real *data = data_;
// MatrixIndexT num_rows = num_rows_, stride = stride_;
// for (int32 i = 0; i < num_rows; i++)
// for (int32 j = 0; j < i; j++)
// data[j * stride + i ] = data[i * stride + j];
//}


// template<typename Real>
// void MatrixBase<Real>::CopyUpperToLower() {
// KALDI_ASSERT(num_rows_ == num_cols_);
// Real *data = data_;
// MatrixIndexT num_rows = num_rows_, stride = stride_;
// for (int32 i = 0; i < num_rows; i++)
// for (int32 j = 0; j < i; j++)
// data[i * stride + j] = data[j * stride + i];
//}

/*
template<typename Real>
void MatrixBase<Real>::SymAddMat2(const Real alpha,
                                  const MatrixBase<Real> &A,
                                  MatrixTransposeType transA,
                                  Real beta) {
  KALDI_ASSERT(num_rows_ == num_cols_ &&
               ((transA == kNoTrans && A.num_rows_ == num_rows_) ||
                (transA == kTrans && A.num_cols_ == num_cols_)));
  KALDI_ASSERT(A.data_ != data_);
  if (num_rows_ == 0) return;

  /// When the matrix dimension(this->num_rows_) is not less than 56
  /// and the transpose type transA == kTrans, the cblas_Xsyrk(...)
  /// function will produce NaN in the output. This is a bug in the
  /// ATLAS library. To overcome this, the AddMatMat function, which calls
  /// cblas_Xgemm(...) rather than cblas_Xsyrk(...), is used in this special
  /// situation.
  /// Wei Shi: Note this bug is observed for single precision matrix
  /// on a 64-bit machine
#ifdef HAVE_ATLAS
  if (transA == kTrans && num_rows_ >= 56) {
    this->AddMatMat(alpha, A, kTrans, A, kNoTrans, beta);
    return;
  }
#endif // HAVE_ATLAS

  MatrixIndexT A_other_dim = (transA == kNoTrans ? A.num_cols_ : A.num_rows_);

  // This function call is hard-coded to update the lower triangle.
  cblas_Xsyrk(transA, num_rows_, A_other_dim, alpha, A.Data(),
              A.Stride(), beta, this->data_, this->stride_);
}


template<typename Real>
void MatrixBase<Real>::AddMatSmat(const Real alpha,
                                  const MatrixBase<Real> &A,
                                  MatrixTransposeType transA,
                                  const MatrixBase<Real> &B,
                                  MatrixTransposeType transB,
                                  const Real beta) {
  KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ ==
B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_)
               || (transA == kTrans && transB == kNoTrans && A.num_rows_ ==
B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_)
               || (transA == kNoTrans && transB == kTrans && A.num_cols_ ==
B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_)
               || (transA == kTrans && transB == kTrans && A.num_rows_ ==
B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_));
  KALDI_ASSERT(&A !=  this && &B != this);

  // We iterate over the columns of B.

  MatrixIndexT Astride = A.stride_, Bstride = B.stride_, stride = this->stride_,
      Arows = A.num_rows_, Acols = A.num_cols_;
  Real *data = this->data_, *Adata = A.data_, *Bdata = B.data_;
  MatrixIndexT num_cols = this->num_cols_;
  if (transB == kNoTrans) {
    // Iterate over the columns of *this and of B.
    for (MatrixIndexT c = 0; c < num_cols; c++) {
      // for each column of *this, do
      // [this column] = [alpha * A * this column of B] + [beta * this column]
      Xgemv_sparsevec(transA, Arows, Acols, alpha, Adata, Astride,
                      Bdata + c, Bstride, beta, data + c, stride);
    }
  } else {
    // Iterate over the columns of *this and the rows of B.
    for (MatrixIndexT c = 0; c < num_cols; c++) {
      // for each column of *this, do
      // [this column] = [alpha * A * this row of B] + [beta * this column]
      Xgemv_sparsevec(transA, Arows, Acols, alpha, Adata, Astride,
                      Bdata + (c * Bstride), 1, beta, data + c, stride);
    }
  }
}

template<typename Real>
void MatrixBase<Real>::AddSmatMat(const Real alpha,
                                  const MatrixBase<Real> &A,
                                  MatrixTransposeType transA,
                                  const MatrixBase<Real> &B,
                                  MatrixTransposeType transB,
                                  const Real beta) {
  KALDI_ASSERT((transA == kNoTrans && transB == kNoTrans && A.num_cols_ ==
B.num_rows_ && A.num_rows_ == num_rows_ && B.num_cols_ == num_cols_)
               || (transA == kTrans && transB == kNoTrans && A.num_rows_ ==
B.num_rows_ && A.num_cols_ == num_rows_ && B.num_cols_ == num_cols_)
               || (transA == kNoTrans && transB == kTrans && A.num_cols_ ==
B.num_cols_ && A.num_rows_ == num_rows_ && B.num_rows_ == num_cols_)
               || (transA == kTrans && transB == kTrans && A.num_rows_ ==
B.num_cols_ && A.num_cols_ == num_rows_ && B.num_rows_ == num_cols_));
  KALDI_ASSERT(&A !=  this && &B != this);

  MatrixIndexT Astride = A.stride_, Bstride = B.stride_, stride = this->stride_,
      Brows = B.num_rows_, Bcols = B.num_cols_;
  MatrixTransposeType invTransB = (transB == kTrans ? kNoTrans : kTrans);
  Real *data = this->data_, *Adata = A.data_, *Bdata = B.data_;
  MatrixIndexT num_rows = this->num_rows_;
  if (transA == kNoTrans) {
    // Iterate over the rows of *this and of A.
    for (MatrixIndexT r = 0; r < num_rows; r++) {
      // for each row of *this, do
      // [this row] = [alpha * (this row of A) * B^T] + [beta * this row]
      Xgemv_sparsevec(invTransB, Brows, Bcols, alpha, Bdata, Bstride,
                      Adata + (r * Astride), 1, beta, data + (r * stride), 1);
    }
  } else {
    // Iterate over the rows of *this and the columns of A.
    for (MatrixIndexT r = 0; r < num_rows; r++) {
      // for each row of *this, do
      // [this row] = [alpha * (this column of A) * B^T] + [beta * this row]
      Xgemv_sparsevec(invTransB, Brows, Bcols, alpha, Bdata, Bstride,
                      Adata + r, Astride, beta, data + (r * stride), 1);
    }
  }
}

template<typename Real>
void MatrixBase<Real>::AddSpSp(const Real alpha, const SpMatrix<Real> &A_in,
                                const SpMatrix<Real> &B_in, const Real beta) {
  MatrixIndexT sz = num_rows_;
  KALDI_ASSERT(sz == num_cols_ && sz == A_in.NumRows() && sz == B_in.NumRows());

  Matrix<Real> A(A_in), B(B_in);
  // CblasLower or CblasUpper would work below as symmetric matrix is copied
  // fully (to save work, we used the matrix constructor from SpMatrix).
  // CblasLeft means A is on the left: C <-- alpha A B + beta C
  if (sz == 0) return;
  cblas_Xsymm(alpha, sz, A.data_, A.stride_, B.data_, B.stride_, beta, data_,
stride_);
}

template<typename Real>
void MatrixBase<Real>::AddMat(const Real alpha, const MatrixBase<Real>& A,
                               MatrixTransposeType transA) {
  if (&A == this) {
    if (transA == kNoTrans) {
      Scale(alpha + 1.0);
    } else {
      KALDI_ASSERT(num_rows_ == num_cols_ && "AddMat: adding to self
(transposed): not symmetric.");
      Real *data = data_;
      if (alpha == 1.0) {  // common case-- handle separately.
        for (MatrixIndexT row = 0; row < num_rows_; row++) {
          for (MatrixIndexT col = 0; col < row; col++) {
            Real *lower = data + (row * stride_) + col, *upper = data + (col
                                                                          *
stride_) + row;
            Real sum = *lower + *upper;
            *lower = *upper = sum;
          }
          *(data + (row * stride_) + row) *= 2.0;  // diagonal.
        }
      } else {
        for (MatrixIndexT row = 0; row < num_rows_; row++) {
          for (MatrixIndexT col = 0; col < row; col++) {
            Real *lower = data + (row * stride_) + col, *upper = data + (col
                                                                          *
stride_) + row;
            Real lower_tmp = *lower;
            *lower += alpha * *upper;
            *upper += alpha * lower_tmp;
          }
          *(data + (row * stride_) + row) *= (1.0 + alpha);  // diagonal.
        }
      }
    }
  } else {
    int aStride = (int) A.stride_, stride = stride_;
    Real *adata = A.data_, *data = data_;
    if (transA == kNoTrans) {
      KALDI_ASSERT(A.num_rows_ == num_rows_ && A.num_cols_ == num_cols_);
      if (num_rows_ == 0) return;
      for (MatrixIndexT row = 0; row < num_rows_; row++, adata += aStride,
               data += stride) {
        cblas_Xaxpy(num_cols_, alpha, adata, 1, data, 1);
      }
    } else {
      KALDI_ASSERT(A.num_cols_ == num_rows_ && A.num_rows_ == num_cols_);
      if (num_rows_ == 0) return;
      for (MatrixIndexT row = 0; row < num_rows_; row++, adata++, data +=
stride)
        cblas_Xaxpy(num_cols_, alpha, adata, aStride, data, 1);
    }
  }
}

template<typename Real>
void MatrixBase<Real>::AddSmat(Real alpha, const SparseMatrix<Real> &A,
                               MatrixTransposeType trans) {
  if (trans == kNoTrans) {
    KALDI_ASSERT(NumRows() == A.NumRows());
    KALDI_ASSERT(NumCols() == A.NumCols());
    MatrixIndexT a_num_rows = A.NumRows();
    for (MatrixIndexT i = 0; i < a_num_rows; ++i) {
      const SparseVector<Real> &row = A.Row(i);
      MatrixIndexT num_elems = row.NumElements();
      for (MatrixIndexT id = 0; id < num_elems; ++id) {
        (*this)(i, row.GetElement(id).first) += alpha
            * row.GetElement(id).second;
      }
    }
  } else {
    KALDI_ASSERT(NumRows() == A.NumCols());
    KALDI_ASSERT(NumCols() == A.NumRows());
    MatrixIndexT a_num_rows = A.NumRows();
    for (MatrixIndexT i = 0; i < a_num_rows; ++i) {
      const SparseVector<Real> &row = A.Row(i);
      MatrixIndexT num_elems = row.NumElements();
      for (MatrixIndexT id = 0; id < num_elems; ++id) {
        (*this)(row.GetElement(id).first, i) += alpha
            * row.GetElement(id).second;
      }
    }
  }
}

template<typename Real>
void MatrixBase<Real>::AddSmatMat(Real alpha, const SparseMatrix<Real> &A,
                                  MatrixTransposeType transA,
                                  const MatrixBase<Real> &B, Real beta) {
  if (transA == kNoTrans) {
    KALDI_ASSERT(NumRows() == A.NumRows());
    KALDI_ASSERT(NumCols() == B.NumCols());
    KALDI_ASSERT(A.NumCols() == B.NumRows());

    this->Scale(beta);
    MatrixIndexT a_num_rows = A.NumRows(),
        this_num_cols = this->NumCols();
    for (MatrixIndexT i = 0; i < a_num_rows; ++i) {
      Real *this_row_i = this->RowData(i);
      const SparseVector<Real> &A_row_i = A.Row(i);
      MatrixIndexT num_elems = A_row_i.NumElements();
      for (MatrixIndexT e = 0; e < num_elems; ++e) {
        const std::pair<MatrixIndexT, Real> &p = A_row_i.GetElement(e);
        MatrixIndexT k = p.first;
        Real alpha_A_ik = alpha * p.second;
        const Real *b_row_k = B.RowData(k);
        cblas_Xaxpy(this_num_cols, alpha_A_ik, b_row_k, 1,
                    this_row_i, 1);
        //for (MatrixIndexT j = 0; j < this_num_cols; ++j)
        //  this_row_i[j] += alpha_A_ik * b_row_k[j];
      }
    }
  } else {
    KALDI_ASSERT(NumRows() == A.NumCols());
    KALDI_ASSERT(NumCols() == B.NumCols());
    KALDI_ASSERT(A.NumRows() == B.NumRows());

    this->Scale(beta);
    Matrix<Real> buf(NumRows(), NumCols(), kSetZero);
    MatrixIndexT a_num_rows = A.NumRows(),
        this_num_cols = this->NumCols();
    for (int k = 0; k < a_num_rows; ++k) {
      const Real *b_row_k = B.RowData(k);
      const SparseVector<Real> &A_row_k = A.Row(k);
      MatrixIndexT num_elems = A_row_k.NumElements();
      for (MatrixIndexT e = 0; e < num_elems; ++e) {
        const std::pair<MatrixIndexT, Real> &p = A_row_k.GetElement(e);
        MatrixIndexT i = p.first;
        Real alpha_A_ki = alpha * p.second;
        Real *this_row_i = this->RowData(i);
        cblas_Xaxpy(this_num_cols, alpha_A_ki, b_row_k, 1,
                    this_row_i, 1);
        //for (MatrixIndexT j = 0; j < this_num_cols; ++j)
        // this_row_i[j] += alpha_A_ki * b_row_k[j];
      }
    }
  }
}

template<typename Real>
void MatrixBase<Real>::AddMatSmat(Real alpha, const MatrixBase<Real> &A,
                                  const SparseMatrix<Real> &B,
                                  MatrixTransposeType transB, Real beta) {
  if (transB == kNoTrans) {
    KALDI_ASSERT(NumRows() == A.NumRows());
    KALDI_ASSERT(NumCols() == B.NumCols());
    KALDI_ASSERT(A.NumCols() == B.NumRows());

    this->Scale(beta);
    MatrixIndexT b_num_rows = B.NumRows(),
        this_num_rows = this->NumRows();
    // Iterate over the rows of sparse matrix B and columns of A.
    for (MatrixIndexT k = 0; k < b_num_rows; ++k) {
      const SparseVector<Real> &B_row_k = B.Row(k);
      MatrixIndexT num_elems = B_row_k.NumElements();
      const Real *a_col_k = A.Data() + k;
      for (MatrixIndexT e = 0; e < num_elems; ++e) {
        const std::pair<MatrixIndexT, Real> &p = B_row_k.GetElement(e);
        MatrixIndexT j = p.first;
        Real alpha_B_kj = alpha * p.second;
        Real *this_col_j = this->Data() + j;
        // Add to entire 'j'th column of *this at once using cblas_Xaxpy.
        // pass stride to write a colmun as matrices are stored in row major
order.
        cblas_Xaxpy(this_num_rows, alpha_B_kj, a_col_k, A.stride_,
                    this_col_j, this->stride_);
        //for (MatrixIndexT i = 0; i < this_num_rows; ++i)
        // this_col_j[i*this->stride_] +=  alpha_B_kj * a_col_k[i*A.stride_];
      }
    }
  } else {
    KALDI_ASSERT(NumRows() == A.NumRows());
    KALDI_ASSERT(NumCols() == B.NumRows());
    KALDI_ASSERT(A.NumCols() == B.NumCols());

    this->Scale(beta);
    MatrixIndexT b_num_rows = B.NumRows(),
        this_num_rows = this->NumRows();
    // Iterate over the rows of sparse matrix B and columns of *this.
    for (MatrixIndexT j = 0; j < b_num_rows; ++j) {
      const SparseVector<Real> &B_row_j = B.Row(j);
      MatrixIndexT num_elems = B_row_j.NumElements();
      Real *this_col_j = this->Data() + j;
      for (MatrixIndexT e = 0; e < num_elems; ++e) {
        const std::pair<MatrixIndexT, Real> &p = B_row_j.GetElement(e);
        MatrixIndexT k = p.first;
        Real alpha_B_jk = alpha * p.second;
        const Real *a_col_k = A.Data() + k;
        // Add to entire 'j'th column of *this at once using cblas_Xaxpy.
        // pass stride to write a column as matrices are stored in row major
order.
        cblas_Xaxpy(this_num_rows, alpha_B_jk, a_col_k, A.stride_,
                    this_col_j, this->stride_);
        //for (MatrixIndexT i = 0; i < this_num_rows; ++i)
        // this_col_j[i*this->stride_] +=  alpha_B_jk * a_col_k[i*A.stride_];
      }
    }
  }
}

template<typename Real>
template<typename OtherReal>
void MatrixBase<Real>::AddSp(const Real alpha, const SpMatrix<OtherReal> &S) {
  KALDI_ASSERT(S.NumRows() == NumRows() && S.NumRows() == NumCols());
  Real *data = data_; const OtherReal *sdata = S.Data();
  MatrixIndexT num_rows = NumRows(), stride = Stride();
  for (MatrixIndexT i = 0; i < num_rows; i++) {
    for (MatrixIndexT j = 0; j < i; j++, sdata++) {
      data[i*stride + j] += alpha * *sdata;
      data[j*stride + i] += alpha * *sdata;
    }
    data[i*stride + i] += alpha * *sdata++;
  }
}

// instantiate the template above.
template
void MatrixBase<float>::AddSp(const float alpha, const SpMatrix<float> &S);
template
void MatrixBase<double>::AddSp(const double alpha, const SpMatrix<double> &S);
template
void MatrixBase<float>::AddSp(const float alpha, const SpMatrix<double> &S);
template
void MatrixBase<double>::AddSp(const double alpha, const SpMatrix<float> &S);


template<typename Real>
void MatrixBase<Real>::AddDiagVecMat(
    const Real alpha, const VectorBase<Real> &v,
    const MatrixBase<Real> &M,
    MatrixTransposeType transM,
    Real beta) {
  if (beta != 1.0) this->Scale(beta);

  if (transM == kNoTrans) {
    KALDI_ASSERT(SameDim(*this, M));
  } else {
    KALDI_ASSERT(M.NumRows() == NumCols() && M.NumCols() == NumRows());
  }
  KALDI_ASSERT(v.Dim() == this->NumRows());

  MatrixIndexT M_row_stride = M.Stride(), M_col_stride = 1, stride = stride_,
      num_rows = num_rows_, num_cols = num_cols_;
  if (transM == kTrans) std::swap(M_row_stride, M_col_stride);
  Real *data = data_;
  const Real *Mdata = M.Data(), *vdata = v.Data();
  if (num_rows_ == 0) return;
  for (MatrixIndexT i = 0; i < num_rows; i++, data += stride, Mdata +=
M_row_stride, vdata++)
    cblas_Xaxpy(num_cols, alpha * *vdata, Mdata, M_col_stride, data, 1);
}

template<typename Real>
void MatrixBase<Real>::AddMatDiagVec(
    const Real alpha,
    const MatrixBase<Real> &M, MatrixTransposeType transM,
    VectorBase<Real> &v,
    Real beta) {

  if (beta != 1.0) this->Scale(beta);

  if (transM == kNoTrans) {
    KALDI_ASSERT(SameDim(*this, M));
  } else {
    KALDI_ASSERT(M.NumRows() == NumCols() && M.NumCols() == NumRows());
  }
  KALDI_ASSERT(v.Dim() == this->NumCols());

  MatrixIndexT M_row_stride = M.Stride(),
               M_col_stride = 1,
               stride = stride_,
               num_rows = num_rows_,
               num_cols = num_cols_;

  if (transM == kTrans)
    std::swap(M_row_stride, M_col_stride);

  Real *data = data_;
  const Real *Mdata = M.Data(), *vdata = v.Data();
  if (num_rows_ == 0) return;
  for (MatrixIndexT i = 0; i < num_rows; i++){
      for(MatrixIndexT j = 0; j < num_cols; j ++ ){
          data[i*stride + j] += alpha * vdata[j] * Mdata[i*M_row_stride +
j*M_col_stride];
      }
  }
}

template<typename Real>
void MatrixBase<Real>::AddMatMatElements(const Real alpha,
                                         const MatrixBase<Real>& A,
                                         const MatrixBase<Real>& B,
                                         const Real beta) {
    KALDI_ASSERT(A.NumRows() == B.NumRows() && A.NumCols() == B.NumCols());
    KALDI_ASSERT(A.NumRows() == NumRows() && A.NumCols() == NumCols());
    Real *data = data_;
    const Real *dataA = A.Data();
    const Real *dataB = B.Data();

    for (MatrixIndexT i = 0; i < num_rows_; i++) {
        for (MatrixIndexT j = 0; j < num_cols_; j++) {
            data[j] = beta*data[j] + alpha*dataA[j]*dataB[j];
        }
        data += Stride();
        dataA += A.Stride();
        dataB += B.Stride();
    }
}

#if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD)
// ****************************************************************************
// ****************************************************************************
template<typename Real>
void MatrixBase<Real>::LapackGesvd(VectorBase<Real> *s, MatrixBase<Real> *U_in,
                                   MatrixBase<Real> *V_in) {
  KALDI_ASSERT(s != NULL && U_in != this && V_in != this);

  Matrix<Real> tmpU, tmpV;
  if (U_in == NULL) tmpU.Resize(this->num_rows_, 1);  // work-space if U_in
empty.
  if (V_in == NULL) tmpV.Resize(1, this->num_cols_);  // work-space if V_in
empty.

  /// Implementation notes:
  /// Lapack works in column-order, therefore the dimensions of *this are
  /// swapped as well as the U and V matrices.

  KaldiBlasInt M   = num_cols_;
  KaldiBlasInt N   = num_rows_;
  KaldiBlasInt LDA = Stride();

  KALDI_ASSERT(N>=M);  // NumRows >= columns.

  if (U_in) {
    KALDI_ASSERT((int)U_in->num_rows_ == N && (int)U_in->num_cols_ == M);
  }
  if (V_in) {
    KALDI_ASSERT((int)V_in->num_rows_ == M && (int)V_in->num_cols_ == M);
  }
  KALDI_ASSERT((int)s->Dim() == std::min(M, N));

  MatrixBase<Real> *U = (U_in ? U_in : &tmpU);
  MatrixBase<Real> *V = (V_in ? V_in : &tmpV);

  KaldiBlasInt V_stride      = V->Stride();
  KaldiBlasInt U_stride      = U->Stride();

  // Original LAPACK recipe
  // KaldiBlasInt l_work = std::max(std::max<long int>
  //   (1, 3*std::min(M, N)+std::max(M, N)), 5*std::min(M, N))*2;
  KaldiBlasInt l_work = -1;
  Real   work_query;
  KaldiBlasInt result;

  // query for work space
  char *u_job = const_cast<char*>(U_in ? "s" : "N");  // "s" == skinny, "N" ==
"none."
  char *v_job = const_cast<char*>(V_in ? "s" : "N");  // "s" == skinny, "N" ==
"none."
  clapack_Xgesvd(v_job, u_job,
                 &M, &N, data_, &LDA,
                 s->Data(),
                 V->Data(), &V_stride,
                 U->Data(), &U_stride,
                 &work_query, &l_work,
                 &result);

  KALDI_ASSERT(result >= 0 && "Call to CLAPACK dgesvd_ called with wrong
arguments");

  l_work = static_cast<KaldiBlasInt>(work_query);
  Real *p_work;
  void *temp;
  if ((p_work = static_cast<Real*>(
          KALDI_MEMALIGN(16, sizeof(Real)*l_work, &temp))) == NULL)
    throw std::bad_alloc();

  // perform svd
  clapack_Xgesvd(v_job, u_job,
                 &M, &N, data_, &LDA,
                 s->Data(),
                 V->Data(), &V_stride,
                 U->Data(), &U_stride,
                 p_work, &l_work,
                 &result);

  KALDI_ASSERT(result >= 0 && "Call to CLAPACK dgesvd_ called with wrong
arguments");

  if (result != 0) {
    KALDI_WARN << "CLAPACK sgesvd_ : some weird convergence not satisfied";
  }
  KALDI_MEMALIGN_FREE(p_work);
}

#endif
*/
// Copy constructor.  Copies data to newly allocated memory.
template <typename Real>
Matrix<Real>::Matrix(const MatrixBase<Real> &M,
                     MatrixTransposeType trans /*=kNoTrans*/)
    : MatrixBase<Real>() {
    if (trans == kNoTrans) {
        Resize(M.num_rows_, M.num_cols_);
        this->CopyFromMat(M);
    } else {
        Resize(M.num_cols_, M.num_rows_);
        this->CopyFromMat(M, kTrans);
    }
}

// Copy constructor.  Copies data to newly allocated memory.
template <typename Real>
Matrix<Real>::Matrix(const Matrix<Real> &M) : MatrixBase<Real>() {
    Resize(M.num_rows_, M.num_cols_);
    this->CopyFromMat(M);
}

/// Copy constructor from another type.
template <typename Real>
template <typename OtherReal>
Matrix<Real>::Matrix(const MatrixBase<OtherReal> &M, MatrixTransposeType trans)
    : MatrixBase<Real>() {
    if (trans == kNoTrans) {
        Resize(M.NumRows(), M.NumCols());
        this->CopyFromMat(M);
    } else {
        Resize(M.NumCols(), M.NumRows());
        this->CopyFromMat(M, kTrans);
    }
}

// Instantiate this constructor for float->double and double->float.
template Matrix<float>::Matrix(const MatrixBase<double> &M,
                               MatrixTransposeType trans);
template Matrix<double>::Matrix(const MatrixBase<float> &M,
                                MatrixTransposeType trans);

template <typename Real>
inline void Matrix<Real>::Init(const MatrixIndexT rows,
                               const MatrixIndexT cols,
                               const MatrixStrideType stride_type) {
    if (rows * cols == 0) {
        KALDI_ASSERT(rows == 0 && cols == 0);
        this->num_rows_ = 0;
        this->num_cols_ = 0;
        this->stride_ = 0;
        this->data_ = NULL;
        return;
    }
    KALDI_ASSERT(rows > 0 && cols > 0);
    MatrixIndexT skip, stride;
    size_t size;
    void *data;  // aligned memory block
    void *temp;  // memory block to be really freed

    // compute the size of skip and real cols
    skip = ((16 / sizeof(Real)) - cols % (16 / sizeof(Real))) %
           (16 / sizeof(Real));
    stride = cols + skip;
    size =
        static_cast<size_t>(rows) * static_cast<size_t>(stride) * sizeof(Real);

    // allocate the memory and set the right dimensions and parameters
    if (NULL != (data = KALDI_MEMALIGN(16, size, &temp))) {
        MatrixBase<Real>::data_ = static_cast<Real *>(data);
        MatrixBase<Real>::num_rows_ = rows;
        MatrixBase<Real>::num_cols_ = cols;
        MatrixBase<Real>::stride_ =
            (stride_type == kDefaultStride ? stride : cols);
    } else {
        throw std::bad_alloc();
    }
}

template <typename Real>
void Matrix<Real>::Resize(const MatrixIndexT rows,
                          const MatrixIndexT cols,
                          MatrixResizeType resize_type,
                          MatrixStrideType stride_type) {
    // the next block uses recursion to handle what we have to do if
    // resize_type == kCopyData.
    if (resize_type == kCopyData) {
        if (this->data_ == NULL || rows == 0)
            resize_type = kSetZero;  // nothing to copy.
        else if (rows == this->num_rows_ && cols == this->num_cols_ &&
                 (stride_type == kDefaultStride ||
                  this->stride_ == this->num_cols_)) {
            return;
        }  // nothing to do.
        else {
            // set tmp to a matrix of the desired size; if new matrix
            // is bigger in some dimension, zero it.
            MatrixResizeType new_resize_type =
                (rows > this->num_rows_ || cols > this->num_cols_) ? kSetZero
                                                                   : kUndefined;
            Matrix<Real> tmp(rows, cols, new_resize_type, stride_type);
            MatrixIndexT rows_min = std::min(rows, this->num_rows_),
                         cols_min = std::min(cols, this->num_cols_);
            tmp.Range(0, rows_min, 0, cols_min)
                .CopyFromMat(this->Range(0, rows_min, 0, cols_min));
            tmp.Swap(this);
            // and now let tmp go out of scope, deleting what was in *this.
            return;
        }
    }
    // At this point, resize_type == kSetZero or kUndefined.

    if (MatrixBase<Real>::data_ != NULL) {
        if (rows == MatrixBase<Real>::num_rows_ &&
            cols == MatrixBase<Real>::num_cols_) {
            if (resize_type == kSetZero) this->SetZero();
            return;
        } else
            Destroy();
    }
    Init(rows, cols, stride_type);
    if (resize_type == kSetZero) MatrixBase<Real>::SetZero();
}

template <typename Real>
template <typename OtherReal>
void MatrixBase<Real>::CopyFromMat(const MatrixBase<OtherReal> &M,
                                   MatrixTransposeType Trans) {
    if (sizeof(Real) == sizeof(OtherReal) &&
        static_cast<const void *>(M.Data()) ==
            static_cast<const void *>(this->Data())) {
        // CopyFromMat called on same data.  Nothing to do (except sanity
        // checks).
        KALDI_ASSERT(Trans == kNoTrans && M.NumRows() == NumRows() &&
                     M.NumCols() == NumCols() && M.Stride() == Stride());
        return;
    }
    if (Trans == kNoTrans) {
        KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == M.NumCols());
        for (MatrixIndexT i = 0; i < num_rows_; i++)
            (*this).Row(i).CopyFromVec(M.Row(i));
    } else {
        KALDI_ASSERT(num_cols_ == M.NumRows() && num_rows_ == M.NumCols());
        int32 this_stride = stride_, other_stride = M.Stride();
        Real *this_data = data_;
        const OtherReal *other_data = M.Data();
        for (MatrixIndexT i = 0; i < num_rows_; i++)
            for (MatrixIndexT j = 0; j < num_cols_; j++)
                this_data[i * this_stride + j] =
                    other_data[j * other_stride + i];
    }
}

// template instantiations.
template void MatrixBase<float>::CopyFromMat(const MatrixBase<double> &M,
                                             MatrixTransposeType Trans);
template void MatrixBase<double>::CopyFromMat(const MatrixBase<float> &M,
                                              MatrixTransposeType Trans);
template void MatrixBase<float>::CopyFromMat(const MatrixBase<float> &M,
                                             MatrixTransposeType Trans);
template void MatrixBase<double>::CopyFromMat(const MatrixBase<double> &M,
                                              MatrixTransposeType Trans);

/*
// Specialize the template for CopyFromSp for float, float.
template<>
template<>
void MatrixBase<float>::CopyFromSp(const SpMatrix<float> & M) {
  KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
  MatrixIndexT num_rows = num_rows_, stride = stride_;
  const float *Mdata = M.Data();
  float *row_data = data_, *col_data = data_;
  for (MatrixIndexT i = 0; i < num_rows; i++) {
    cblas_scopy(i+1, Mdata, 1, row_data, 1); // copy to the row.
    cblas_scopy(i, Mdata, 1, col_data, stride); // copy to the column.
    Mdata += i+1;
    row_data += stride;
    col_data += 1;
  }
}

// Specialize the template for CopyFromSp for double, double.
template<>
template<>
void MatrixBase<double>::CopyFromSp(const SpMatrix<double> & M) {
  KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
  MatrixIndexT num_rows = num_rows_, stride = stride_;
  const double *Mdata = M.Data();
  double *row_data = data_, *col_data = data_;
  for (MatrixIndexT i = 0; i < num_rows; i++) {
    cblas_dcopy(i+1, Mdata, 1, row_data, 1); // copy to the row.
    cblas_dcopy(i, Mdata, 1, col_data, stride); // copy to the column.
    Mdata += i+1;
    row_data += stride;
    col_data += 1;
  }
}


template<typename Real>
template<typename OtherReal>
void MatrixBase<Real>::CopyFromSp(const SpMatrix<OtherReal> & M) {
  KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
  // MORE EFFICIENT IF LOWER TRIANGULAR!  Reverse code otherwise.
  for (MatrixIndexT i = 0; i < num_rows_; i++) {
    for (MatrixIndexT j = 0; j < i; j++) {
      (*this)(j, i)  = (*this)(i, j) = M(i, j);
    }
    (*this)(i, i) = M(i, i);
  }
}

// Instantiate this function
template
void MatrixBase<float>::CopyFromSp(const SpMatrix<double> & M);
template
void MatrixBase<double>::CopyFromSp(const SpMatrix<float> & M);


template<typename Real>
template<typename OtherReal>
void MatrixBase<Real>::CopyFromTp(const TpMatrix<OtherReal> & M,
                                  MatrixTransposeType Trans) {
  if (Trans == kNoTrans) {
    KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
    SetZero();
    Real *out_i = data_;
    const OtherReal *in_i = M.Data();
    for (MatrixIndexT i = 0; i < num_rows_; i++, out_i += stride_, in_i += i) {
      for (MatrixIndexT j = 0; j <= i; j++)
        out_i[j] = in_i[j];
    }
  } else {
    SetZero();
    KALDI_ASSERT(num_rows_ == M.NumRows() && num_cols_ == num_rows_);
    MatrixIndexT stride = stride_;
    Real *out_i = data_;
    const OtherReal *in_i = M.Data();
    for (MatrixIndexT i = 0; i < num_rows_; i++, out_i ++, in_i += i) {
      for (MatrixIndexT j = 0; j <= i; j++)
        out_i[j*stride] = in_i[j];
    }
  }
}

template
void MatrixBase<float>::CopyFromTp(const TpMatrix<float> & M,
                                   MatrixTransposeType trans);
template
void MatrixBase<float>::CopyFromTp(const TpMatrix<double> & M,
                                   MatrixTransposeType trans);
template
void MatrixBase<double>::CopyFromTp(const TpMatrix<float> & M,
                                    MatrixTransposeType trans);
template
void MatrixBase<double>::CopyFromTp(const TpMatrix<double> & M,
                                    MatrixTransposeType trans);

*/
template <typename Real>
void MatrixBase<Real>::CopyRowsFromVec(const VectorBase<Real> &rv) {
    if (rv.Dim() == num_rows_ * num_cols_) {
        if (stride_ == num_cols_) {
            // one big copy operation.
            const Real *rv_data = rv.Data();
            std::memcpy(data_, rv_data, sizeof(Real) * num_rows_ * num_cols_);
        } else {
            const Real *rv_data = rv.Data();
            for (MatrixIndexT r = 0; r < num_rows_; r++) {
                Real *row_data = RowData(r);
                for (MatrixIndexT c = 0; c < num_cols_; c++) {
                    row_data[c] = rv_data[c];
                }
                rv_data += num_cols_;
            }
        }
    } else if (rv.Dim() == num_cols_) {
        const Real *rv_data = rv.Data();
        for (MatrixIndexT r = 0; r < num_rows_; r++)
            std::memcpy(RowData(r), rv_data, sizeof(Real) * num_cols_);
    } else {
        KALDI_ERR << "Wrong sized arguments";
    }
}

template <typename Real>
template <typename OtherReal>
void MatrixBase<Real>::CopyRowsFromVec(const VectorBase<OtherReal> &rv) {
    if (rv.Dim() == num_rows_ * num_cols_) {
        const OtherReal *rv_data = rv.Data();
        for (MatrixIndexT r = 0; r < num_rows_; r++) {
            Real *row_data = RowData(r);
            for (MatrixIndexT c = 0; c < num_cols_; c++) {
                row_data[c] = static_cast<Real>(rv_data[c]);
            }
            rv_data += num_cols_;
        }
    } else if (rv.Dim() == num_cols_) {
        const OtherReal *rv_data = rv.Data();
        Real *first_row_data = RowData(0);
        for (MatrixIndexT c = 0; c < num_cols_; c++)
            first_row_data[c] = rv_data[c];
        for (MatrixIndexT r = 1; r < num_rows_; r++)
            std::memcpy(RowData(r), first_row_data, sizeof(Real) * num_cols_);
    } else {
        KALDI_ERR << "Wrong sized arguments.";
    }
}


template void MatrixBase<float>::CopyRowsFromVec(const VectorBase<double> &rv);
template void MatrixBase<double>::CopyRowsFromVec(const VectorBase<float> &rv);

template <typename Real>
void MatrixBase<Real>::CopyColsFromVec(const VectorBase<Real> &rv) {
    if (rv.Dim() == num_rows_ * num_cols_) {
        const Real *v_inc_data = rv.Data();
        Real *m_inc_data = data_;

        for (MatrixIndexT c = 0; c < num_cols_; c++) {
            for (MatrixIndexT r = 0; r < num_rows_; r++) {
                m_inc_data[r * stride_] = v_inc_data[r];
            }
            v_inc_data += num_rows_;
            m_inc_data++;
        }
    } else if (rv.Dim() == num_rows_) {
        const Real *v_inc_data = rv.Data();
        Real *m_inc_data = data_;
        for (MatrixIndexT r = 0; r < num_rows_; r++) {
            Real value = *(v_inc_data++);
            for (MatrixIndexT c = 0; c < num_cols_; c++) m_inc_data[c] = value;
            m_inc_data += stride_;
        }
    } else {
        KALDI_ERR << "Wrong size of arguments.";
    }
}

template <typename Real>
void MatrixBase<Real>::CopyRowFromVec(const VectorBase<Real> &rv,
                                      const MatrixIndexT row) {
    KALDI_ASSERT(rv.Dim() == num_cols_ &&
                 static_cast<UnsignedMatrixIndexT>(row) <
                     static_cast<UnsignedMatrixIndexT>(num_rows_));

    const Real *rv_data = rv.Data();
    Real *row_data = RowData(row);

    std::memcpy(row_data, rv_data, num_cols_ * sizeof(Real));
}
/*
template<typename Real>
void MatrixBase<Real>::CopyDiagFromVec(const VectorBase<Real> &rv) {
  KALDI_ASSERT(rv.Dim() == std::min(num_cols_, num_rows_));
  const Real *rv_data = rv.Data(), *rv_end = rv_data + rv.Dim();
  Real *my_data = this->Data();
  for (; rv_data != rv_end; rv_data++, my_data += (this->stride_+1))
    *my_data = *rv_data;
}*/

template <typename Real>
void MatrixBase<Real>::CopyColFromVec(const VectorBase<Real> &rv,
                                      const MatrixIndexT col) {
    KALDI_ASSERT(rv.Dim() == num_rows_ &&
                 static_cast<UnsignedMatrixIndexT>(col) <
                     static_cast<UnsignedMatrixIndexT>(num_cols_));

    const Real *rv_data = rv.Data();
    Real *col_data = data_ + col;

    for (MatrixIndexT r = 0; r < num_rows_; r++)
        col_data[r * stride_] = rv_data[r];
}


template <typename Real>
void Matrix<Real>::RemoveRow(MatrixIndexT i) {
    KALDI_ASSERT(
        static_cast<UnsignedMatrixIndexT>(i) <
            static_cast<UnsignedMatrixIndexT>(MatrixBase<Real>::num_rows_) &&
        "Access out of matrix");
    for (MatrixIndexT j = i + 1; j < MatrixBase<Real>::num_rows_; j++)
        MatrixBase<Real>::Row(j - 1).CopyFromVec(MatrixBase<Real>::Row(j));
    MatrixBase<Real>::num_rows_--;
}

template <typename Real>
void Matrix<Real>::Destroy() {
    // we need to free the data block if it was defined
    if (NULL != MatrixBase<Real>::data_)
        KALDI_MEMALIGN_FREE(MatrixBase<Real>::data_);
    MatrixBase<Real>::data_ = NULL;
    MatrixBase<Real>::num_rows_ = MatrixBase<Real>::num_cols_ =
        MatrixBase<Real>::stride_ = 0;
}


/*
template<typename Real>
void MatrixBase<Real>::MulElements(const MatrixBase<Real> &a) {
  KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_);

  if (num_cols_ == stride_ && num_cols_ == a.stride_) {
    mul_elements(num_rows_ * num_cols_, a.data_, data_);
  } else {
    MatrixIndexT a_stride = a.stride_, stride = stride_;
    Real *data = data_, *a_data = a.data_;
    for (MatrixIndexT i = 0; i < num_rows_; i++) {
      mul_elements(num_cols_, a_data, data);
      a_data += a_stride;
      data += stride;
    }
  }
}

template<typename Real>
void MatrixBase<Real>::DivElements(const MatrixBase<Real> &a) {
  KALDI_ASSERT(a.NumRows() == num_rows_ && a.NumCols() == num_cols_);
  MatrixIndexT i;
  MatrixIndexT j;

  for (i = 0; i < num_rows_; i++) {
    for (j = 0; j < num_cols_; j++) {
      (*this)(i, j) /= a(i, j);
    }
  }
}

template<typename Real>
Real MatrixBase<Real>::Sum() const {
  double sum = 0.0;

  for (MatrixIndexT i = 0; i < num_rows_; i++) {
    for (MatrixIndexT j = 0; j < num_cols_; j++) {
      sum += (*this)(i, j);
    }
  }

  return (Real)sum;
}

template<typename Real> void MatrixBase<Real>::Max(const MatrixBase<Real> &A) {
  KALDI_ASSERT(A.NumRows() == NumRows() && A.NumCols() == NumCols());
  for (MatrixIndexT row = 0; row < num_rows_; row++) {
    Real *row_data = RowData(row);
    const Real *other_row_data = A.RowData(row);
    MatrixIndexT num_cols = num_cols_;
    for (MatrixIndexT col = 0; col < num_cols; col++) {
      row_data[col] = std::max(row_data[col],
                               other_row_data[col]);
    }
  }
}

template<typename Real> void MatrixBase<Real>::Min(const MatrixBase<Real> &A) {
  KALDI_ASSERT(A.NumRows() == NumRows() && A.NumCols() == NumCols());
  for (MatrixIndexT row = 0; row < num_rows_; row++) {
    Real *row_data = RowData(row);
    const Real *other_row_data = A.RowData(row);
    MatrixIndexT num_cols = num_cols_;
    for (MatrixIndexT col = 0; col < num_cols; col++) {
      row_data[col] = std::min(row_data[col],
                               other_row_data[col]);
    }
  }
}


template<typename Real> void MatrixBase<Real>::Scale(Real alpha) {
  if (alpha == 1.0) return;
  if (num_rows_ == 0) return;
  if (num_cols_ == stride_) {
    cblas_Xscal(static_cast<size_t>(num_rows_) * static_cast<size_t>(num_cols_),
                alpha, data_,1);
  } else {
    Real *data = data_;
    for (MatrixIndexT i = 0; i < num_rows_; ++i, data += stride_) {
      cblas_Xscal(num_cols_, alpha, data,1);
    }
  }
}

template<typename Real>  // scales each row by scale[i].
void MatrixBase<Real>::MulRowsVec(const VectorBase<Real> &scale) {
  KALDI_ASSERT(scale.Dim() == num_rows_);
  MatrixIndexT M = num_rows_, N = num_cols_;

  for (MatrixIndexT i = 0; i < M; i++) {
    Real this_scale = scale(i);
    for (MatrixIndexT j = 0; j < N; j++) {
      (*this)(i, j) *= this_scale;
    }
  }
}


template<typename Real>
void MatrixBase<Real>::MulRowsGroupMat(const MatrixBase<Real> &src) {
  KALDI_ASSERT(src.NumRows() == this->NumRows() &&
               this->NumCols() % src.NumCols() == 0);
  int32 group_size = this->NumCols() / src.NumCols(),
      num_groups = this->NumCols() / group_size,
      num_rows = this->NumRows();

  for (MatrixIndexT i = 0; i < num_rows; i++) {
    Real *data = this->RowData(i);
    for (MatrixIndexT j = 0; j < num_groups; j++, data += group_size) {
      Real scale = src(i, j);
      cblas_Xscal(group_size, scale, data, 1);
    }
  }
}

template<typename Real>
void MatrixBase<Real>::GroupPnormDeriv(const MatrixBase<Real> &input,
                                       const MatrixBase<Real> &output,
                                       Real power) {
  KALDI_ASSERT(input.NumCols() == this->NumCols() && input.NumRows() ==
this->NumRows());
  KALDI_ASSERT(this->NumCols() % output.NumCols() == 0 &&
               this->NumRows() == output.NumRows());

  int group_size = this->NumCols() / output.NumCols(),
    num_rows = this->NumRows(), num_cols = this->NumCols();

  if (power == 1.0) {
    for (MatrixIndexT i = 0; i < num_rows; i++) {
      for (MatrixIndexT j = 0; j < num_cols; j++) {
        Real input_val = input(i, j);
        (*this)(i, j) = (input_val == 0 ? 0 : (input_val > 0 ? 1 : -1));
      }
    }
  } else if (power == std::numeric_limits<Real>::infinity()) {
    for (MatrixIndexT i = 0; i < num_rows; i++) {
      for (MatrixIndexT j = 0; j < num_cols; j++) {
        Real output_val = output(i, j / group_size), input_val = input(i, j);
        if (output_val == 0)
          (*this)(i, j) = 0;
        else
          (*this)(i, j) = (std::abs(input_val) == output_val ? 1.0 : 0.0)
              * (input_val >= 0 ? 1 : -1);
      }
    }
  } else {
    for (MatrixIndexT i = 0; i < num_rows; i++) {
      for (MatrixIndexT j = 0; j < num_cols; j++) {
        Real output_val = output(i, j / group_size),
          input_val = input(i, j);
        if (output_val == 0)
          (*this)(i, j) = 0;
         else
            (*this)(i, j) = pow(std::abs(input_val), power - 1) *
              pow(output_val, 1 - power) * (input_val >= 0 ? 1 : -1) ;
      }
    }
  }
}

template<typename Real>
void MatrixBase<Real>::GroupMaxDeriv(const MatrixBase<Real> &input,
                                     const MatrixBase<Real> &output) {
  KALDI_ASSERT(input.NumCols() == this->NumCols() &&
              input.NumRows() == this->NumRows());
  KALDI_ASSERT(this->NumCols() % output.NumCols() == 0 &&
               this->NumRows() == output.NumRows());

  int group_size = this->NumCols() / output.NumCols(),
      num_rows = this->NumRows(), num_cols = this->NumCols();

  for (MatrixIndexT i = 0; i < num_rows; i++) {
    for (MatrixIndexT j = 0; j < num_cols; j++) {
      Real input_val = input(i, j);
      Real output_val = output(i, j / group_size);
      (*this)(i, j) = (input_val == output_val ? 1 : 0);
    }
  }
}

template<typename Real>  // scales each column by scale[i].
void MatrixBase<Real>::MulColsVec(const VectorBase<Real> &scale) {
  KALDI_ASSERT(scale.Dim() == num_cols_);
  for (MatrixIndexT i = 0; i < num_rows_; i++) {
    for (MatrixIndexT j = 0; j < num_cols_; j++) {
      Real this_scale = scale(j);
      (*this)(i, j) *= this_scale;
    }
  }
}
*/

template <typename Real>
void MatrixBase<Real>::SetZero() {
    if (num_cols_ == stride_)
        memset(data_, 0, sizeof(Real) * num_rows_ * num_cols_);
    else
        for (MatrixIndexT row = 0; row < num_rows_; row++)
            memset(data_ + row * stride_, 0, sizeof(Real) * num_cols_);
}

template <typename Real>
void MatrixBase<Real>::Set(Real value) {
    for (MatrixIndexT row = 0; row < num_rows_; row++) {
        for (MatrixIndexT col = 0; col < num_cols_; col++) {
            (*this)(row, col) = value;
        }
    }
}

/*
template<typename Real>
void MatrixBase<Real>::SetUnit() {
  SetZero();
  for (MatrixIndexT row = 0; row < std::min(num_rows_, num_cols_); row++)
    (*this)(row, row) = 1.0;
}

template<typename Real>
void MatrixBase<Real>::SetRandn() {
  kaldi::RandomState rstate;
  for (MatrixIndexT row = 0; row < num_rows_; row++) {
    Real *row_data = this->RowData(row);
    MatrixIndexT nc = (num_cols_ % 2 == 1) ? num_cols_ - 1 : num_cols_;
    for (MatrixIndexT col = 0; col < nc; col += 2) {
      kaldi::RandGauss2(row_data + col, row_data + col + 1, &rstate);
    }
    if (nc != num_cols_) row_data[nc] =
static_cast<Real>(kaldi::RandGauss(&rstate));
  }
}

template<typename Real>
void MatrixBase<Real>::SetRandUniform() {
  kaldi::RandomState rstate;
  for (MatrixIndexT row = 0; row < num_rows_; row++) {
    Real *row_data = this->RowData(row);
    for (MatrixIndexT col = 0; col < num_cols_; col++, row_data++) {
      *row_data = static_cast<Real>(kaldi::RandUniform(&rstate));
    }
  }
}
*/

template <typename Real>
void MatrixBase<Real>::Write(std::ostream &os, bool binary) const {
    if (!os.good()) {
        KALDI_ERR << "Failed to write matrix to stream: stream not good";
    }
    if (binary) {  // Use separate binary and text formats,
        // since in binary mode we need to know if it's float or double.
        std::string my_token = (sizeof(Real) == 4 ? "FM" : "DM");

        WriteToken(os, binary, my_token);
        {
            int32 rows = this->num_rows_;  // make the size 32-bit on disk.
            int32 cols = this->num_cols_;
            KALDI_ASSERT(this->num_rows_ == (MatrixIndexT)rows);
            KALDI_ASSERT(this->num_cols_ == (MatrixIndexT)cols);
            WriteBasicType(os, binary, rows);
            WriteBasicType(os, binary, cols);
        }
        if (Stride() == NumCols())
            os.write(reinterpret_cast<const char *>(Data()),
                     sizeof(Real) * static_cast<size_t>(num_rows_) *
                         static_cast<size_t>(num_cols_));
        else
            for (MatrixIndexT i = 0; i < num_rows_; i++)
                os.write(reinterpret_cast<const char *>(RowData(i)),
                         sizeof(Real) * num_cols_);
        if (!os.good()) {
            KALDI_ERR << "Failed to write matrix to stream";
        }
    } else {  // text mode.
        if (num_cols_ == 0) {
            os << " [ ]\n";
        } else {
            os << " [";
            for (MatrixIndexT i = 0; i < num_rows_; i++) {
                os << "\n  ";
                for (MatrixIndexT j = 0; j < num_cols_; j++)
                    os << (*this)(i, j) << " ";
            }
            os << "]\n";
        }
    }
}


template <typename Real>
void MatrixBase<Real>::Read(std::istream &is, bool binary) {
    //  In order to avoid rewriting this, we just declare a Matrix and
    // use it to read the data, then copy.
    Matrix<Real> tmp;
    tmp.Read(is, binary);
    if (tmp.NumRows() != NumRows() || tmp.NumCols() != NumCols()) {
        KALDI_ERR << "MatrixBase<Real>::Read, size mismatch " << NumRows()
                  << " x " << NumCols() << " versus " << tmp.NumRows() << " x "
                  << tmp.NumCols();
    }
    CopyFromMat(tmp);
}


template <typename Real>
void Matrix<Real>::Read(std::istream &is, bool binary) {
    // now assume add == false.
    MatrixIndexT pos_at_start = is.tellg();
    std::ostringstream specific_error;

    if (binary) {  // Read in binary mode.
        int peekval = Peek(is, binary);
        if (peekval == 'C') {
            // This code enables us to read CompressedMatrix as a regular
            // matrix.
            // CompressedMatrix compressed_mat;
            // compressed_mat.Read(is, binary); // at this point, add == false.
            // this->Resize(compressed_mat.NumRows(), compressed_mat.NumCols());
            // compressed_mat.CopyToMat(this);
            return;
        }
        const char *my_token = (sizeof(Real) == 4 ? "FM" : "DM");
        char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F');
        if (peekval == other_token_start) {  // need to instantiate the other
                                             // type to read it.
            typedef typename OtherReal<Real>::Real OtherType;  // if Real ==
                                                               // float,
                                                               // OtherType ==
                                                               // double, and
                                                               // vice versa.
            Matrix<OtherType> other(this->num_rows_, this->num_cols_);
            other.Read(is, binary);  // add is false at this point anyway.
            this->Resize(other.NumRows(), other.NumCols());
            this->CopyFromMat(other);
            return;
        }
        std::string token;
        ReadToken(is, binary, &token);
        if (token != my_token) {
            if (token.length() > 20) token = token.substr(0, 17) + "...";
            specific_error << ": Expected token " << my_token << ", got "
                           << token;
            goto bad;
        }
        int32 rows, cols;
        ReadBasicType(is, binary, &rows);  // throws on error.
        ReadBasicType(is, binary, &cols);  // throws on error.
        if ((MatrixIndexT)rows != this->num_rows_ ||
            (MatrixIndexT)cols != this->num_cols_) {
            this->Resize(rows, cols);
        }
        if (this->Stride() == this->NumCols() && rows * cols != 0) {
            is.read(reinterpret_cast<char *>(this->Data()),
                    sizeof(Real) * rows * cols);
            if (is.fail()) goto bad;
        } else {
            for (MatrixIndexT i = 0; i < (MatrixIndexT)rows; i++) {
                is.read(reinterpret_cast<char *>(this->RowData(i)),
                        sizeof(Real) * cols);
                if (is.fail()) goto bad;
            }
        }
        if (is.eof()) return;
        if (is.fail()) goto bad;
        return;
    } else {  // Text mode.
        std::string str;
        is >> str;  // get a token
        if (is.fail()) {
            specific_error << ": Expected \"[\", got EOF";
            goto bad;
        }
        // if ((str.compare("DM") == 0) || (str.compare("FM") == 0)) {  // Back
        // compatibility.
        // is >> str;  // get #rows
        //  is >> str;  // get #cols
        //  is >> str;  // get "["
        // }
        if (str == "[]") {
            Resize(0, 0);
            return;
        }  // Be tolerant of variants.
        else if (str != "[") {
            if (str.length() > 20) str = str.substr(0, 17) + "...";
            specific_error << ": Expected \"[\", got \"" << str << '"';
            goto bad;
        }
        // At this point, we have read "[".
        std::vector<std::vector<Real> *> data;
        std::vector<Real> *cur_row = new std::vector<Real>;
        while (1) {
            int i = is.peek();
            if (i == -1) {
                specific_error << "Got EOF while reading matrix data";
                goto cleanup;
            } else if (static_cast<char>(i) ==
                       ']') {  // Finished reading matrix.
                is.get();      // eat the "]".
                i = is.peek();
                if (static_cast<char>(i) == '\r') {
                    is.get();
                    is.get();  // get \r\n (must eat what we wrote)
                } else if (static_cast<char>(i) == '\n') {
                    is.get();
                }  // get \n (must eat what we wrote)
                if (is.fail()) {
                    KALDI_WARN << "After end of matrix data, read error.";
                    // we got the data we needed, so just warn for this error.
                }
                // Now process the data.
                if (!cur_row->empty())
                    data.push_back(cur_row);
                else
                    delete (cur_row);
                cur_row = NULL;
                if (data.empty()) {
                    this->Resize(0, 0);
                    return;
                } else {
                    int32 num_rows = data.size(), num_cols = data[0]->size();
                    this->Resize(num_rows, num_cols);
                    for (int32 i = 0; i < num_rows; i++) {
                        if (static_cast<int32>(data[i]->size()) != num_cols) {
                            specific_error
                                << "Matrix has inconsistent #cols: " << num_cols
                                << " vs." << data[i]->size()
                                << " (processing row" << i << ")";
                            goto cleanup;
                        }
                        for (int32 j = 0; j < num_cols; j++)
                            (*this)(i, j) = (*(data[i]))[j];
                        delete data[i];
                        data[i] = NULL;
                    }
                }
                return;
            } else if (static_cast<char>(i) == '\n' ||
                       static_cast<char>(i) == ';') {
                // End of matrix row.
                is.get();
                if (cur_row->size() != 0) {
                    data.push_back(cur_row);
                    cur_row = new std::vector<Real>;
                    cur_row->reserve(data.back()->size());
                }
            } else if ((i >= '0' && i <= '9') || i == '-') {  // A number...
                Real r;
                is >> r;
                if (is.fail()) {
                    specific_error
                        << "Stream failure/EOF while reading matrix data.";
                    goto cleanup;
                }
                cur_row->push_back(r);
            } else if (isspace(i)) {
                is.get();  // eat the space and do nothing.
            } else {       // NaN or inf or error.
                std::string str;
                is >> str;
                if (!KALDI_STRCASECMP(str.c_str(), "inf") ||
                    !KALDI_STRCASECMP(str.c_str(), "infinity")) {
                    cur_row->push_back(std::numeric_limits<Real>::infinity());
                    KALDI_WARN << "Reading infinite value into matrix.";
                } else if (!KALDI_STRCASECMP(str.c_str(), "nan")) {
                    cur_row->push_back(std::numeric_limits<Real>::quiet_NaN());
                    KALDI_WARN << "Reading NaN value into matrix.";
                } else {
                    if (str.length() > 20) str = str.substr(0, 17) + "...";
                    specific_error << "Expecting numeric matrix data, got "
                                   << str;
                    goto cleanup;
                }
            }
        }
    // Note, we never leave the while () loop before this
    // line (we return from it.)
    cleanup:  // We only reach here in case of error in the while loop above.
        if (cur_row != NULL) delete cur_row;
        for (size_t i = 0; i < data.size(); i++)
            if (data[i] != NULL) delete data[i];
        // and then go on to "bad" below, where we print error.
    }
bad:
    KALDI_ERR << "Failed to read matrix from stream.  " << specific_error.str()
              << " File position at start is " << pos_at_start << ", currently "
              << is.tellg();
}


// Constructor... note that this is not const-safe as it would
// be quite complicated to implement a "const SubMatrix" class that
// would not allow its contents to be changed.
template <typename Real>
SubMatrix<Real>::SubMatrix(const MatrixBase<Real> &M,
                           const MatrixIndexT ro,
                           const MatrixIndexT r,
                           const MatrixIndexT co,
                           const MatrixIndexT c) {
    if (r == 0 || c == 0) {
        // we support the empty sub-matrix as a special case.
        KALDI_ASSERT(c == 0 && r == 0);
        this->data_ = NULL;
        this->num_cols_ = 0;
        this->num_rows_ = 0;
        this->stride_ = 0;
        return;
    }
    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(ro) <
                     static_cast<UnsignedMatrixIndexT>(M.num_rows_) &&
                 static_cast<UnsignedMatrixIndexT>(co) <
                     static_cast<UnsignedMatrixIndexT>(M.num_cols_) &&
                 static_cast<UnsignedMatrixIndexT>(r) <=
                     static_cast<UnsignedMatrixIndexT>(M.num_rows_ - ro) &&
                 static_cast<UnsignedMatrixIndexT>(c) <=
                     static_cast<UnsignedMatrixIndexT>(M.num_cols_ - co));
    // point to the begining of window
    MatrixBase<Real>::num_rows_ = r;
    MatrixBase<Real>::num_cols_ = c;
    MatrixBase<Real>::stride_ = M.Stride();
    MatrixBase<Real>::data_ =
        M.Data_workaround() + static_cast<size_t>(co) +
        static_cast<size_t>(ro) * static_cast<size_t>(M.Stride());
}


template <typename Real>
SubMatrix<Real>::SubMatrix(Real *data,
                           MatrixIndexT num_rows,
                           MatrixIndexT num_cols,
                           MatrixIndexT stride)
    : MatrixBase<Real>(
          data, num_cols, num_rows, stride) {  // caution: reversed order!
    if (data == NULL) {
        KALDI_ASSERT(num_rows * num_cols == 0);
        this->num_rows_ = 0;
        this->num_cols_ = 0;
        this->stride_ = 0;
    } else {
        KALDI_ASSERT(this->stride_ >= this->num_cols_);
    }
}

/*
template<typename Real>
void MatrixBase<Real>::Add(const Real alpha) {
  Real *data = data_;
  MatrixIndexT stride = stride_;
  for (MatrixIndexT r = 0; r < num_rows_; r++)
    for (MatrixIndexT c = 0; c < num_cols_; c++)
      data[c + stride*r] += alpha;
}

template<typename Real>
void MatrixBase<Real>::AddToDiag(const Real alpha) {
  Real *data = data_;
  MatrixIndexT this_stride = stride_ + 1,
      num_to_add = std::min(num_rows_, num_cols_);
  for (MatrixIndexT r = 0; r < num_to_add; r++)
    data[r * this_stride] += alpha;
}


template<typename Real>
Real MatrixBase<Real>::Cond() const {
  KALDI_ASSERT(num_rows_ > 0&&num_cols_ > 0);
  Vector<Real> singular_values(std::min(num_rows_, num_cols_));
  Svd(&singular_values);  // Get singular values...
  Real min = singular_values(0), max = singular_values(0);  // both absolute
values...
  for (MatrixIndexT i = 1;i < singular_values.Dim();i++) {
    min = std::min((Real)std::abs(singular_values(i)), min); max =
std::max((Real)std::abs(singular_values(i)), max);
  }
  if (min > 0) return max/min;
  else return std::numeric_limits<Real>::infinity();
}

template<typename Real>
Real MatrixBase<Real>::Trace(bool check_square) const  {
  KALDI_ASSERT(!check_square || num_rows_ == num_cols_);
  Real ans = 0.0;
  for (MatrixIndexT r = 0;r < std::min(num_rows_, num_cols_);r++) ans += data_
[r + stride_*r];
  return ans;
}

template<typename Real>
Real MatrixBase<Real>::Max() const {
  KALDI_ASSERT(num_rows_ > 0 && num_cols_ > 0);
  Real ans= *data_;
  for (MatrixIndexT r = 0; r < num_rows_; r++)
    for (MatrixIndexT c = 0; c < num_cols_; c++)
      if (data_[c + stride_*r] > ans)
        ans = data_[c + stride_*r];
  return ans;
}

template<typename Real>
Real MatrixBase<Real>::Min() const {
  KALDI_ASSERT(num_rows_ > 0 && num_cols_ > 0);
  Real ans= *data_;
  for (MatrixIndexT r = 0; r < num_rows_; r++)
    for (MatrixIndexT c = 0; c < num_cols_; c++)
      if (data_[c + stride_*r] < ans)
        ans = data_[c + stride_*r];
  return ans;
}


template <typename Real>
void MatrixBase<Real>::AddMatMatMat(Real alpha,
                                    const MatrixBase<Real> &A,
MatrixTransposeType transA,
                                    const MatrixBase<Real> &B,
MatrixTransposeType transB,
                                    const MatrixBase<Real> &C,
MatrixTransposeType transC,
                                    Real beta) {
  // Note on time taken with different orders of computation.  Assume not
transposed in this /
  // discussion. Firstly, normalize expressions using A.NumCols == B.NumRows and
B.NumCols == C.NumRows, prefer
  // rows where there is a choice.
  // time taken for (AB) is:  A.NumRows*B.NumRows*C.Rows
  // time taken for (AB)C is A.NumRows*C.NumRows*C.Cols
  // so this order is A.NumRows*B.NumRows*C.NumRows +
A.NumRows*C.NumRows*C.NumCols.

  // time taken for (BC) is: B.NumRows*C.NumRows*C.Cols
  // time taken for A(BC) is: A.NumRows*B.NumRows*C.Cols
  // so this order is B.NumRows*C.NumRows*C.NumCols + A.NumRows*B.NumRows*C.Cols

  MatrixIndexT ARows = A.num_rows_, ACols = A.num_cols_, BRows = B.num_rows_,
BCols = B.num_cols_,
      CRows = C.num_rows_, CCols = C.num_cols_;
  if (transA == kTrans) std::swap(ARows, ACols);
  if (transB == kTrans) std::swap(BRows, BCols);
  if (transC == kTrans) std::swap(CRows, CCols);

  MatrixIndexT AB_C_time = ARows*BRows*CRows + ARows*CRows*CCols;
  MatrixIndexT A_BC_time = BRows*CRows*CCols + ARows*BRows*CCols;

  if (AB_C_time < A_BC_time) {
    Matrix<Real> AB(ARows, BCols);
    AB.AddMatMat(1.0, A, transA, B, transB, 0.0);  // AB = A * B.
    (*this).AddMatMat(alpha, AB, kNoTrans, C, transC, beta);
  } else {
    Matrix<Real> BC(BRows, CCols);
    BC.AddMatMat(1.0, B, transB, C, transC, 0.0);  // BC = B * C.
    (*this).AddMatMat(alpha, A, transA, BC, kNoTrans, beta);
  }
}


template<typename Real>
void MatrixBase<Real>::DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real> *Vt) {
  // Svd, *this = U*diag(s)*Vt.
  // With (*this).num_rows_ == m, (*this).num_cols_ == n,
  // Support only skinny Svd with m>=n (NumRows>=NumCols), and zero sizes for U
and Vt mean
  // we do not want that output.  We expect that s.Dim() == m,
  // U is either 0 by 0 or m by n, and rv is either 0 by 0 or n by n.
  // Throws exception on error.

  KALDI_ASSERT(num_rows_>=num_cols_ && "Svd requires that #rows by >= #cols.");
// For compatibility with JAMA code.
  KALDI_ASSERT(s->Dim() == num_cols_);  // s should be the smaller dim.
  KALDI_ASSERT(U == NULL || (U->num_rows_ == num_rows_&&U->num_cols_ ==
num_cols_));
  KALDI_ASSERT(Vt == NULL || (Vt->num_rows_ == num_cols_&&Vt->num_cols_ ==
num_cols_));

  Real prescale = 1.0;
  if ( std::abs((*this)(0, 0) ) < 1.0e-30) {  // Very tiny value... can cause
problems in Svd.
    Real max_elem = LargestAbsElem();
    if (max_elem != 0) {
      prescale = 1.0 / max_elem;
      if (std::abs(prescale) == std::numeric_limits<Real>::infinity()) {
prescale = 1.0e+40; }
      (*this).Scale(prescale);
    }
  }

#if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD)
  // "S" == skinny Svd (only one we support because of compatibility with Jama
one which is only skinny),
  // "N"== no eigenvectors wanted.
  LapackGesvd(s, U, Vt);
#else
  /*  if (num_rows_ > 1 && num_cols_ > 1 && (*this)(0, 0) == (*this)(1, 1)
      && Max() == Min() && (*this)(0, 0) != 0.0) { // special case that JamaSvd
sometimes crashes on.
      KALDI_WARN << "Jama SVD crashes on this type of matrix, perturbing it to
prevent crash.";
      for(int32 i = 0; i < NumRows(); i++)
      (*this)(i, i)  *= 1.00001;
      }*/
//  bool ans = JamaSvd(s, U, Vt);
// if (Vt != NULL) Vt->Transpose();  // possibly to do: change this and also the
// transpose inside the JamaSvd routine.  note, Vt is square.
// if (!ans) {
// KALDI_ERR << "Error doing Svd";  // This one will be caught.
//}
//#endif
// if (prescale != 1.0) s->Scale(1.0/prescale);
//}
/*
template<typename Real>
void MatrixBase<Real>::Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
MatrixBase<Real> *Vt) const {
  try {
    if (num_rows_ >= num_cols_) {
      Matrix<Real> tmp(*this);
      tmp.DestructiveSvd(s, U, Vt);
    } else {
      Matrix<Real> tmp(*this, kTrans);  // transpose of *this.
      // rVt will have different dim so cannot transpose in-place --> use a temp
matrix.
      Matrix<Real> Vt_Trans(Vt ? Vt->num_cols_ : 0, Vt ? Vt->num_rows_ : 0);
      // U will be transpose
      tmp.DestructiveSvd(s, Vt ? &Vt_Trans : NULL, U);
      if (U) U->Transpose();
      if (Vt) Vt->CopyFromMat(Vt_Trans, kTrans);  // copy with transpose.
    }
  } catch (...) {
    KALDI_ERR << "Error doing Svd (did not converge), first part of matrix is\n"
              << SubMatrix<Real>(*this, 0, std::min((MatrixIndexT)10,
num_rows_),
                                 0, std::min((MatrixIndexT)10, num_cols_))
              << ", min and max are: " << Min() << ", " << Max();
  }
}

template<typename Real>
bool MatrixBase<Real>::IsSymmetric(Real cutoff) const {
  MatrixIndexT R = num_rows_, C = num_cols_;
  if (R != C) return false;
  Real bad_sum = 0.0, good_sum = 0.0;
  for (MatrixIndexT i = 0;i < R;i++) {
    for (MatrixIndexT j = 0;j < i;j++) {
      Real a = (*this)(i, j), b = (*this)(j, i), avg = 0.5*(a+b), diff =
0.5*(a-b);
      good_sum += std::abs(avg); bad_sum += std::abs(diff);
    }
    good_sum += std::abs((*this)(i, i));
  }
  if (bad_sum > cutoff*good_sum) return false;
  return true;
}

template<typename Real>
bool MatrixBase<Real>::IsDiagonal(Real cutoff) const{
  MatrixIndexT R = num_rows_, C = num_cols_;
  Real bad_sum = 0.0, good_sum = 0.0;
  for (MatrixIndexT i = 0;i < R;i++) {
    for (MatrixIndexT j = 0;j < C;j++) {
      if (i == j) good_sum += std::abs((*this)(i, j));
      else bad_sum += std::abs((*this)(i, j));
    }
  }
  return (!(bad_sum > good_sum * cutoff));
}

// This does nothing, it's designed to trigger Valgrind errors
// if any memory is uninitialized.
template<typename Real>
void MatrixBase<Real>::TestUninitialized() const {
  MatrixIndexT R = num_rows_, C = num_cols_, positive = 0;
  for (MatrixIndexT i = 0; i < R; i++)
    for (MatrixIndexT j = 0; j < C; j++)
      if ((*this)(i, j) > 0.0) positive++;
  if (positive > R * C)
    KALDI_ERR << "Error....";
}


template<typename Real>
bool MatrixBase<Real>::IsUnit(Real cutoff) const {
  MatrixIndexT R = num_rows_, C = num_cols_;
  Real bad_max = 0.0;
  for (MatrixIndexT i = 0; i < R;i++)
    for (MatrixIndexT j = 0; j < C;j++)
      bad_max = std::max(bad_max, static_cast<Real>(std::abs( (*this)(i, j) - (i
== j?1.0:0.0))));
  return (bad_max <= cutoff);
}

template<typename Real>
bool MatrixBase<Real>::IsZero(Real cutoff)const {
  MatrixIndexT R = num_rows_, C = num_cols_;
  Real bad_max = 0.0;
  for (MatrixIndexT i = 0;i < R;i++)
    for (MatrixIndexT j = 0;j < C;j++)
      bad_max = std::max(bad_max, static_cast<Real>(std::abs( (*this)(i, j) )));
  return (bad_max <= cutoff);
}

template<typename Real>
Real MatrixBase<Real>::FrobeniusNorm() const{
  return std::sqrt(TraceMatMat(*this, *this, kTrans));
}

template<typename Real>
bool MatrixBase<Real>::ApproxEqual(const MatrixBase<Real> &other, float tol)
const {
  if (num_rows_ != other.num_rows_ || num_cols_ != other.num_cols_)
    KALDI_ERR << "ApproxEqual: size mismatch.";
  Matrix<Real> tmp(*this);
  tmp.AddMat(-1.0, other);
  return (tmp.FrobeniusNorm() <= static_cast<Real>(tol) *
          this->FrobeniusNorm());
}

template<typename Real>
bool MatrixBase<Real>::Equal(const MatrixBase<Real> &other) const {
  if (num_rows_ != other.num_rows_ || num_cols_ != other.num_cols_)
    KALDI_ERR << "Equal: size mismatch.";
  for (MatrixIndexT i = 0; i < num_rows_; i++)
    for (MatrixIndexT j = 0; j < num_cols_; j++)
      if ( (*this)(i, j) != other(i, j))
        return false;
  return true;
}


template<typename Real>
Real MatrixBase<Real>::LargestAbsElem() const{
  MatrixIndexT R = num_rows_, C = num_cols_;
  Real largest = 0.0;
  for (MatrixIndexT i = 0;i < R;i++)
    for (MatrixIndexT j = 0;j < C;j++)
      largest = std::max(largest, (Real)std::abs((*this)(i, j)));
  return largest;
}


template<typename Real>
void MatrixBase<Real>::OrthogonalizeRows() {
  KALDI_ASSERT(NumRows() <= NumCols());
  MatrixIndexT num_rows = num_rows_;
  for (MatrixIndexT i = 0; i < num_rows; i++) {
    int32 counter = 0;
    while (1) {
      Real start_prod = VecVec(this->Row(i), this->Row(i));
      if (start_prod - start_prod != 0.0 || start_prod == 0.0) {
        KALDI_WARN << "Self-product of row " << i << " of matrix is "
                   << start_prod << ", randomizing.";
        this->Row(i).SetRandn();
        counter++;
        continue;  // loop again.
      }
      for (MatrixIndexT j = 0; j < i; j++) {
        Real prod = VecVec(this->Row(i), this->Row(j));
        this->Row(i).AddVec(-prod, this->Row(j));
      }
      Real end_prod = VecVec(this->Row(i), this->Row(i));
      if (end_prod <= 0.01 * start_prod) { // We removed
        // almost all of the vector during orthogonalization,
        // so we have reason to doubt (for roundoff reasons)
        // that it's still orthogonal to the other vectors.
        // We need to orthogonalize again.
        if (end_prod == 0.0) { // Row is exactly zero:
          // generate random direction.
          this->Row(i).SetRandn();
        }
        counter++;
        if (counter > 100)
          KALDI_ERR << "Loop detected while orthogalizing matrix.";
      } else {
        this->Row(i).Scale(1.0 / std::sqrt(end_prod));
        break;
      }
    }
  }
}


// Uses Svd to compute the eigenvalue decomposition of a symmetric positive
semidefinite
//   matrix:
// (*this) = rU * diag(rs) * rU^T, with rU an orthogonal matrix so rU^{-1} =
rU^T.
// Does this by computing svd (*this) = U diag(rs) V^T ... answer is just U
diag(rs) U^T.
// Throws exception if this failed to within supplied precision (typically
because *this was not
// symmetric positive definite).

template<typename Real>
void MatrixBase<Real>::SymPosSemiDefEig(VectorBase<Real> *rs, MatrixBase<Real>
*rU, Real check_thresh) // e.g. check_thresh = 0.001
{
  const MatrixIndexT D = num_rows_;

  KALDI_ASSERT(num_rows_ == num_cols_);
  KALDI_ASSERT(IsSymmetric() && "SymPosSemiDefEig: expecting input to be
symmetrical.");
  KALDI_ASSERT(rU->num_rows_ == D && rU->num_cols_ == D && rs->Dim() == D);

  Matrix<Real>  Vt(D, D);
  Svd(rs, rU, &Vt);

  // First just zero any singular values if the column of U and V do not have
+ve dot product--
  // this may mean we have small negative eigenvalues, and if we zero them the
result will be closer to correct.
  for (MatrixIndexT i = 0;i < D;i++) {
    Real sum = 0.0;
    for (MatrixIndexT j = 0;j < D;j++) sum += (*rU)(j, i) * Vt(i, j);
    if (sum < 0.0) (*rs)(i) = 0.0;
  }

  {
    Matrix<Real> tmpU(*rU); Vector<Real> tmps(*rs); tmps.ApplyPow(0.5);
    tmpU.MulColsVec(tmps);
    SpMatrix<Real> tmpThis(D);
    tmpThis.AddMat2(1.0, tmpU, kNoTrans, 0.0);
    Matrix<Real> tmpThisFull(tmpThis);
    float new_norm = tmpThisFull.FrobeniusNorm();
    float old_norm = (*this).FrobeniusNorm();
    tmpThisFull.AddMat(-1.0, (*this));

    if (!(old_norm == 0 && new_norm == 0)) {
      float diff_norm = tmpThisFull.FrobeniusNorm();
      if (std::abs(new_norm-old_norm) > old_norm*check_thresh || diff_norm >
old_norm*check_thresh) {
        KALDI_WARN << "SymPosSemiDefEig seems to have failed " << diff_norm << "
!<< "
                   << check_thresh << "*" << old_norm << ", maybe matrix was not
"
                   << "positive semi definite.  Continuing anyway.";
      }
    }
  }
}


template<typename Real>
Real MatrixBase<Real>::LogDet(Real *det_sign) const {
  Real log_det;
  Matrix<Real> tmp(*this);
  tmp.Invert(&log_det, det_sign, false);  // false== output not needed (saves
some computation).
  return log_det;
}

template<typename Real>
void MatrixBase<Real>::InvertDouble(Real *log_det, Real *det_sign,
                                    bool inverse_needed) {
  double log_det_tmp, det_sign_tmp;
  Matrix<double> dmat(*this);
  dmat.Invert(&log_det_tmp, &det_sign_tmp, inverse_needed);
  if (inverse_needed) (*this).CopyFromMat(dmat);
  if (log_det) *log_det = log_det_tmp;
  if (det_sign) *det_sign = det_sign_tmp;
}
*/

// template<class Real>
// void MatrixBase<Real>::CopyFromMat(const CompressedMatrix &mat) {
// mat.CopyToMat(this);
//}

// template<class Real>
// Matrix<Real>::Matrix(const CompressedMatrix &M): MatrixBase<Real>() {
// Resize(M.NumRows(), M.NumCols(), kUndefined);
// M.CopyToMat(this);
//}


template <typename Real>
void MatrixBase<Real>::InvertElements() {
    for (MatrixIndexT r = 0; r < num_rows_; r++) {
        for (MatrixIndexT c = 0; c < num_cols_; c++) {
            (*this)(r, c) = static_cast<Real>(1.0 / (*this)(r, c));
        }
    }
}
/*
template<typename Real>
void MatrixBase<Real>::Transpose() {
  KALDI_ASSERT(num_rows_ == num_cols_);
  MatrixIndexT M = num_rows_;
  for (MatrixIndexT i = 0;i < M;i++)
    for (MatrixIndexT j = 0;j < i;j++) {
      Real &a = (*this)(i, j), &b = (*this)(j, i);
      std::swap(a, b);
    }
}


template<typename Real>
void Matrix<Real>::Transpose() {
  if (this->num_rows_ != this->num_cols_) {
    Matrix<Real> tmp(*this, kTrans);
    Resize(this->num_cols_, this->num_rows_);
    this->CopyFromMat(tmp);
  } else {
    (static_cast<MatrixBase<Real>&>(*this)).Transpose();
  }
}

template<typename Real>
void MatrixBase<Real>::Heaviside(const MatrixBase<Real> &src) {
  KALDI_ASSERT(SameDim(*this, src));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_;
  Real *row_data = data_;
  const Real *src_row_data = src.Data();
  for (MatrixIndexT row = 0; row < num_rows;
       row++,row_data += stride_, src_row_data += src.stride_) {
    for (MatrixIndexT col = 0; col < num_cols; col++)
      row_data[col] = (src_row_data[col] > 0 ? 1.0 : 0.0);
  }
}

template<typename Real>
void MatrixBase<Real>::Exp(const MatrixBase<Real> &src) {
  KALDI_ASSERT(SameDim(*this, src));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_;
  Real *row_data = data_;
  const Real *src_row_data = src.Data();
  for (MatrixIndexT row = 0; row < num_rows;
       row++,row_data += stride_, src_row_data += src.stride_) {
    for (MatrixIndexT col = 0; col < num_cols; col++)
      row_data[col] = kaldi::Exp(src_row_data[col]);
  }
}

template<typename Real>
void MatrixBase<Real>::Pow(const MatrixBase<Real> &src, Real power) {
  KALDI_ASSERT(SameDim(*this, src));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_;
  Real *row_data = data_;
  const Real *src_row_data = src.Data();
  for (MatrixIndexT row = 0; row < num_rows;
       row++,row_data += stride_, src_row_data += src.stride_) {
    for (MatrixIndexT col = 0; col < num_cols; col++) {
      row_data[col] = pow(src_row_data[col], power);
    }
  }
}

template<typename Real>
void MatrixBase<Real>::PowAbs(const MatrixBase<Real> &src, Real power, bool
include_sign) {
  KALDI_ASSERT(SameDim(*this, src));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_;
  Real *row_data = data_;
  const Real *src_row_data = src.Data();
  for (MatrixIndexT row = 0; row < num_rows;
       row++,row_data += stride_, src_row_data += src.stride_) {
    for (MatrixIndexT col = 0; col < num_cols; col ++) {
      if (include_sign == true && src_row_data[col] < 0) {
    row_data[col] = -pow(std::abs(src_row_data[col]), power);
      } else {
    row_data[col] = pow(std::abs(src_row_data[col]), power);
      }
    }
  }
}

template<typename Real>
void MatrixBase<Real>::Floor(const MatrixBase<Real> &src, Real floor_val) {
  KALDI_ASSERT(SameDim(*this, src));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_;
  Real *row_data = data_;
  const Real *src_row_data = src.Data();
  for (MatrixIndexT row = 0; row < num_rows;
       row++,row_data += stride_, src_row_data += src.stride_) {
    for (MatrixIndexT col = 0; col < num_cols; col++)
      row_data[col] = (src_row_data[col] < floor_val ? floor_val :
src_row_data[col]);
  }
}

template<typename Real>
void MatrixBase<Real>::Ceiling(const MatrixBase<Real> &src, Real ceiling_val) {
  KALDI_ASSERT(SameDim(*this, src));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_;
  Real *row_data = data_;
  const Real *src_row_data = src.Data();
  for (MatrixIndexT row = 0; row < num_rows;
       row++,row_data += stride_, src_row_data += src.stride_) {
    for (MatrixIndexT col = 0; col < num_cols; col++)
      row_data[col] = (src_row_data[col] > ceiling_val ? ceiling_val :
src_row_data[col]);
  }
}

template<typename Real>
void MatrixBase<Real>::Log(const MatrixBase<Real> &src) {
  KALDI_ASSERT(SameDim(*this, src));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_;
  Real *row_data = data_;
  const Real *src_row_data = src.Data();
  for (MatrixIndexT row = 0; row < num_rows;
       row++,row_data += stride_, src_row_data += src.stride_) {
    for (MatrixIndexT col = 0; col < num_cols; col++)
      row_data[col] = kaldi::Log(src_row_data[col]);
  }
}

template<typename Real>
void MatrixBase<Real>::ExpSpecial(const MatrixBase<Real> &src) {
  KALDI_ASSERT(SameDim(*this, src));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_;
  Real *row_data = data_;
  const Real *src_row_data = src.Data();
  for (MatrixIndexT row = 0; row < num_rows;
       row++,row_data += stride_, src_row_data += src.stride_) {
    for (MatrixIndexT col = 0; col < num_cols; col++)
      row_data[col] = (src_row_data[col] < Real(0) ?
kaldi::Exp(src_row_data[col]) : (src_row_data[col] + Real(1)));
  }
}

template<typename Real>
void MatrixBase<Real>::ExpLimited(const MatrixBase<Real> &src, Real lower_limit,
Real upper_limit) {
  KALDI_ASSERT(SameDim(*this, src));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_;
  Real *row_data = data_;
  const Real *src_row_data = src.Data();
  for (MatrixIndexT row = 0; row < num_rows;
       row++,row_data += stride_, src_row_data += src.stride_) {
    for (MatrixIndexT col = 0; col < num_cols; col++) {
      const Real x = src_row_data[col];
      if (!(x >= lower_limit))
    row_data[col] = kaldi::Exp(lower_limit);
      else if (x > upper_limit)
    row_data[col] = kaldi::Exp(upper_limit);
      else
    row_data[col] = kaldi::Exp(x);
    }
  }
}

template<typename Real>
bool MatrixBase<Real>::Power(Real power) {
  KALDI_ASSERT(num_rows_ > 0 && num_rows_ == num_cols_);
  MatrixIndexT n = num_rows_;
  Matrix<Real> P(n, n);
  Vector<Real> re(n), im(n);
  this->Eig(&P, &re, &im);
  // Now attempt to take the complex eigenvalues to this power.
  for (MatrixIndexT i = 0; i < n; i++)
    if (!AttemptComplexPower(&(re(i)), &(im(i)), power))
      return false;  // e.g. real and negative, or zero, eigenvalues.

  Matrix<Real> D(n, n);  // D to the power.
  CreateEigenvalueMatrix(re, im, &D);

  Matrix<Real> tmp(n, n);  // P times D
  tmp.AddMatMat(1.0, P, kNoTrans, D, kNoTrans, 0.0);  // tmp := P*D
  P.Invert();
  // next line is: *this = tmp * P^{-1} = P * D * P^{-1}
  (*this).AddMatMat(1.0, tmp, kNoTrans, P, kNoTrans, 0.0);
  return true;
}
*/
template <typename Real>
void Matrix<Real>::Swap(Matrix<Real> *other) {
    std::swap(this->data_, other->data_);
    std::swap(this->num_cols_, other->num_cols_);
    std::swap(this->num_rows_, other->num_rows_);
    std::swap(this->stride_, other->stride_);
}
/*
// Repeating this comment that appeared in the header:
// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D
// P^{-1}.  Be careful: the relationship of D to the eigenvalues we output is
// slightly complicated, due to the need for P to be real.  In the symmetric
// case D is diagonal and real, but in
// the non-symmetric case there may be complex-conjugate pairs of eigenvalues.
// In this case, for the equation (*this) = P D P^{-1} to hold, D must actually
// be block diagonal, with 2x2 blocks corresponding to any such pairs.  If a
// pair is lambda +- i*mu, D will have a corresponding 2x2 block
// [lambda, mu; -mu, lambda].
// Note that if the input matrix (*this) is non-invertible, P may not be
invertible
// so in this case instead of the equation (*this) = P D P^{-1} holding, we have
// instead (*this) P = P D.
//
// By making the pointer arguments non-NULL or NULL, the user can choose to take
// not to take the eigenvalues directly, and/or the matrix D which is
block-diagonal
// with 2x2 blocks.
template<typename Real>
void MatrixBase<Real>::Eig(MatrixBase<Real> *P,
                           VectorBase<Real> *r,
                           VectorBase<Real> *i) const {
  EigenvalueDecomposition<Real>  eig(*this);
  if (P) eig.GetV(P);
  if (r) eig.GetRealEigenvalues(r);
  if (i) eig.GetImagEigenvalues(i);
}


// Begin non-member function definitions.

//  /**
//   * @brief Extension of the HTK header
//  */
// struct HtkHeaderExt
//  {
// INT_32 mHeaderSize;
// INT_32 mVersion;
// INT_32 mSampSize;
// };
/*
template<typename Real>
bool ReadHtk(std::istream &is, Matrix<Real> *M_ptr, HtkHeader *header_ptr)
{
  // check instantiated with double or float.
  KALDI_ASSERT_IS_FLOATING_TYPE(Real);
  Matrix<Real> &M = *M_ptr;
  HtkHeader htk_hdr;

  // TODO(arnab): this fails if the HTK file has CRC checksum or is compressed.
  is.read((char*)&htk_hdr, sizeof(htk_hdr));  // we're being really POSIX here!
  if (is.fail()) {
    KALDI_WARN << "Could not read header from HTK feature file ";
    return false;
  }

  KALDI_SWAP4(htk_hdr.mNSamples);
  KALDI_SWAP4(htk_hdr.mSamplePeriod);
  KALDI_SWAP2(htk_hdr.mSampleSize);
  KALDI_SWAP2(htk_hdr.mSampleKind);

  bool has_checksum = false;
  {
    // See HParm.h in HTK code for sources of these things.
    enum BaseParmKind{
      Waveform, Lpc, Lprefc, Lpcepstra, Lpdelcep,
      Irefc, Mfcc, Fbank, Melspec, User, Discrete, Plp, Anon };

    const int32 IsCompressed = 02000, HasChecksum = 010000, HasVq = 040000,
        Problem = IsCompressed | HasVq;
    int32 base_parm = htk_hdr.mSampleKind & (077);
    has_checksum = (base_parm & HasChecksum) != 0;
    htk_hdr.mSampleKind &= ~HasChecksum; // We don't support writing with
                                         // checksum so turn it off.
    if (htk_hdr.mSampleKind & Problem)
      KALDI_ERR << "Code to read HTK features does not support compressed "
          "features, or features with VQ.";
    if (base_parm == Waveform || base_parm == Irefc || base_parm == Discrete)
      KALDI_ERR << "Attempting to read HTK features from unsupported type "
          "(e.g. waveform or discrete features.";
  }

  KALDI_VLOG(3) << "HTK header: Num Samples: " << htk_hdr.mNSamples
                << "; Sample period: " << htk_hdr.mSamplePeriod
                << "; Sample size: " << htk_hdr.mSampleSize
                << "; Sample kind: " << htk_hdr.mSampleKind;

  M.Resize(htk_hdr.mNSamples, htk_hdr.mSampleSize / sizeof(float));

  MatrixIndexT i;
  MatrixIndexT j;
  if (sizeof(Real) == sizeof(float)) {
    for (i = 0; i< M.NumRows(); i++) {
      is.read((char*)M.RowData(i), sizeof(float)*M.NumCols());
      if (is.fail()) {
        KALDI_WARN << "Could not read data from HTK feature file ";
        return false;
      }
      if (MachineIsLittleEndian()) {
        MatrixIndexT C = M.NumCols();
        for (j = 0; j < C; j++) {
          KALDI_SWAP4((M(i, j)));  // The HTK standard is big-endian!
        }
      }
    }
  } else {
    float *pmem = new float[M.NumCols()];
    for (i = 0; i < M.NumRows(); i++) {
      is.read((char*)pmem, sizeof(float)*M.NumCols());
      if (is.fail()) {
        KALDI_WARN << "Could not read data from HTK feature file ";
        delete [] pmem;
        return false;
      }
      MatrixIndexT C = M.NumCols();
      for (j = 0; j < C; j++) {
        if (MachineIsLittleEndian())  // HTK standard is big-endian!
          KALDI_SWAP4(pmem[j]);
        M(i, j) = static_cast<Real>(pmem[j]);
      }
    }
    delete [] pmem;
  }
  if (header_ptr) *header_ptr = htk_hdr;
  if (has_checksum) {
    int16 checksum;
    is.read((char*)&checksum, sizeof(checksum));
    if (is.fail())
      KALDI_WARN << "Could not read checksum from HTK feature file ";
    // We ignore the checksum.
  }
  return true;
}


template
bool ReadHtk(std::istream &is, Matrix<float> *M, HtkHeader *header_ptr);

template
bool ReadHtk(std::istream &is, Matrix<double> *M, HtkHeader *header_ptr);

template<typename Real>
bool WriteHtk(std::ostream &os, const MatrixBase<Real> &M, HtkHeader htk_hdr) //
header may be derived from a previous call to ReadHtk.  Must be in binary mode.
{
  KALDI_ASSERT(M.NumRows() == static_cast<MatrixIndexT>(htk_hdr.mNSamples));
  KALDI_ASSERT(M.NumCols() == static_cast<MatrixIndexT>(htk_hdr.mSampleSize) /
               static_cast<MatrixIndexT>(sizeof(float)));

  KALDI_SWAP4(htk_hdr.mNSamples);
  KALDI_SWAP4(htk_hdr.mSamplePeriod);
  KALDI_SWAP2(htk_hdr.mSampleSize);
  KALDI_SWAP2(htk_hdr.mSampleKind);

  os.write((char*)&htk_hdr, sizeof(htk_hdr));
  if (os.fail())  goto bad;

  MatrixIndexT i;
  MatrixIndexT j;
  if (sizeof(Real) == sizeof(float) && !MachineIsLittleEndian()) {
    for (i = 0; i< M.NumRows(); i++) {  // Unlikely to reach here ever!
      os.write((char*)M.RowData(i), sizeof(float)*M.NumCols());
      if (os.fail()) goto bad;
    }
  } else {
    float *pmem = new float[M.NumCols()];

    for (i = 0; i < M.NumRows(); i++) {
      const Real *rowData = M.RowData(i);
      for (j = 0;j < M.NumCols();j++)
        pmem[j] =  static_cast<float> ( rowData[j] );
      if (MachineIsLittleEndian())
        for (j = 0;j < M.NumCols();j++)
          KALDI_SWAP4(pmem[j]);
      os.write((char*)pmem, sizeof(float)*M.NumCols());
      if (os.fail()) {
        delete [] pmem;
        goto bad;
      }
    }
    delete [] pmem;
  }
  return true;
bad:
  KALDI_WARN << "Could not write to HTK feature file ";
  return false;
}

template
bool WriteHtk(std::ostream &os, const MatrixBase<float> &M, HtkHeader htk_hdr);

template
bool WriteHtk(std::ostream &os, const MatrixBase<double> &M, HtkHeader htk_hdr);

template<class Real>
bool WriteSphinx(std::ostream &os, const MatrixBase<Real> &M)
{
  // CMUSphinx mfc file header contains count of the floats, followed
  // by the data in float little endian format.

  int size = M.NumRows() * M.NumCols();
  os.write((char*)&size, sizeof(int));
  if (os.fail())  goto bad;

  MatrixIndexT i;
  MatrixIndexT j;
  if (sizeof(Real) == sizeof(float) && MachineIsLittleEndian()) {
    for (i = 0; i< M.NumRows(); i++) {  // Unlikely to reach here ever!
      os.write((char*)M.RowData(i), sizeof(float)*M.NumCols());
      if (os.fail()) goto bad;
    }
  } else {
    float *pmem = new float[M.NumCols()];

    for (i = 0; i < M.NumRows(); i++) {
      const Real *rowData = M.RowData(i);
      for (j = 0;j < M.NumCols();j++)
        pmem[j] =  static_cast<float> ( rowData[j] );
      if (!MachineIsLittleEndian())
        for (j = 0;j < M.NumCols();j++)
          KALDI_SWAP4(pmem[j]);
      os.write((char*)pmem, sizeof(float)*M.NumCols());
      if (os.fail()) {
        delete [] pmem;
        goto bad;
      }
    }
    delete [] pmem;
  }
  return true;
bad:
  KALDI_WARN << "Could not write to Sphinx feature file";
  return false;
}

template
bool WriteSphinx(std::ostream &os, const MatrixBase<float> &M);

template
bool WriteSphinx(std::ostream &os, const MatrixBase<double> &M);

template <typename Real>
Real TraceMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
                    const MatrixBase<Real> &B, MatrixTransposeType transB,
                    const MatrixBase<Real> &C, MatrixTransposeType transC) {
  MatrixIndexT ARows = A.NumRows(), ACols = A.NumCols(), BRows = B.NumRows(),
BCols = B.NumCols(),
      CRows = C.NumRows(), CCols = C.NumCols();
  if (transA == kTrans) std::swap(ARows, ACols);
  if (transB == kTrans) std::swap(BRows, BCols);
  if (transC == kTrans) std::swap(CRows, CCols);
  KALDI_ASSERT( CCols == ARows && ACols == BRows && BCols == CRows &&
"TraceMatMatMat: args have mismatched dimensions.");
  if (ARows*BCols < std::min(BRows*CCols, CRows*ACols)) {
    Matrix<Real> AB(ARows, BCols);
    AB.AddMatMat(1.0, A, transA, B, transB, 0.0);  // AB = A * B.
    return TraceMatMat(AB, C, transC);
  } else if ( BRows*CCols < CRows*ACols) {
    Matrix<Real> BC(BRows, CCols);
    BC.AddMatMat(1.0, B, transB, C, transC, 0.0);  // BC = B * C.
    return TraceMatMat(BC, A, transA);
  } else {
    Matrix<Real> CA(CRows, ACols);
    CA.AddMatMat(1.0, C, transC, A, transA, 0.0);  // CA = C * A
    return TraceMatMat(CA, B, transB);
  }
}

template
float TraceMatMatMat(const MatrixBase<float> &A, MatrixTransposeType transA,
                     const MatrixBase<float> &B, MatrixTransposeType transB,
                     const MatrixBase<float> &C, MatrixTransposeType transC);

template
double TraceMatMatMat(const MatrixBase<double> &A, MatrixTransposeType transA,
                      const MatrixBase<double> &B, MatrixTransposeType transB,
                      const MatrixBase<double> &C, MatrixTransposeType transC);


template <typename Real>
Real TraceMatMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
                       const MatrixBase<Real> &B, MatrixTransposeType transB,
                       const MatrixBase<Real> &C, MatrixTransposeType transC,
                       const MatrixBase<Real> &D, MatrixTransposeType transD) {
  MatrixIndexT ARows = A.NumRows(), ACols = A.NumCols(), BRows = B.NumRows(),
BCols = B.NumCols(),
      CRows = C.NumRows(), CCols = C.NumCols(), DRows = D.NumRows(), DCols =
D.NumCols();
  if (transA == kTrans) std::swap(ARows, ACols);
  if (transB == kTrans) std::swap(BRows, BCols);
  if (transC == kTrans) std::swap(CRows, CCols);
  if (transD == kTrans) std::swap(DRows, DCols);
  KALDI_ASSERT( DCols == ARows && ACols == BRows && BCols == CRows && CCols ==
DRows && "TraceMatMatMat: args have mismatched dimensions.");
  if (ARows*BCols < std::min(BRows*CCols, std::min(CRows*DCols, DRows*ACols))) {
    Matrix<Real> AB(ARows, BCols);
    AB.AddMatMat(1.0, A, transA, B, transB, 0.0);  // AB = A * B.
    return TraceMatMatMat(AB, kNoTrans, C, transC, D, transD);
  } else if ((BRows*CCols) < std::min(CRows*DCols, DRows*ACols)) {
    Matrix<Real> BC(BRows, CCols);
    BC.AddMatMat(1.0, B, transB, C, transC, 0.0);  // BC = B * C.
    return TraceMatMatMat(BC, kNoTrans, D, transD, A, transA);
  } else if (CRows*DCols < DRows*ACols) {
    Matrix<Real> CD(CRows, DCols);
    CD.AddMatMat(1.0, C, transC, D, transD, 0.0);  // CD = C * D
    return TraceMatMatMat(CD, kNoTrans, A, transA, B, transB);
  } else {
    Matrix<Real> DA(DRows, ACols);
    DA.AddMatMat(1.0, D, transD, A, transA, 0.0);  // DA = D * A
    return TraceMatMatMat(DA, kNoTrans, B, transB, C, transC);
  }
}

template
float TraceMatMatMatMat(const MatrixBase<float> &A, MatrixTransposeType transA,
                        const MatrixBase<float> &B, MatrixTransposeType transB,
                        const MatrixBase<float> &C, MatrixTransposeType transC,
                        const MatrixBase<float> &D, MatrixTransposeType transD);

template
double TraceMatMatMatMat(const MatrixBase<double> &A, MatrixTransposeType
transA,
                         const MatrixBase<double> &B, MatrixTransposeType
transB,
                         const MatrixBase<double> &C, MatrixTransposeType
transC,
                         const MatrixBase<double> &D, MatrixTransposeType
transD);

template<typename Real> void  SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
                                   MatrixBase<Real> *Vt, bool
sort_on_absolute_value) {
  /// Makes sure the Svd is sorted (from greatest to least absolute value).
  MatrixIndexT num_singval = s->Dim();
  KALDI_ASSERT(U == NULL || U->NumCols() == num_singval);
  KALDI_ASSERT(Vt == NULL || Vt->NumRows() == num_singval);

  std::vector<std::pair<Real, MatrixIndexT> > vec(num_singval);
  // negative because we want revese order.
  for (MatrixIndexT d = 0; d < num_singval; d++) {
    Real val = (*s)(d),
        sort_val = -(sort_on_absolute_value ? std::abs(val) : val);
    vec[d] = std::pair<Real, MatrixIndexT>(sort_val, d);
  }
  std::sort(vec.begin(), vec.end());
  Vector<Real> s_copy(*s);
  for (MatrixIndexT d = 0; d < num_singval; d++)
    (*s)(d) = s_copy(vec[d].second);
  if (U != NULL) {
    Matrix<Real> Utmp(*U);
    MatrixIndexT dim = Utmp.NumRows();
    for (MatrixIndexT d = 0; d < num_singval; d++) {
      MatrixIndexT oldidx = vec[d].second;
      for (MatrixIndexT e = 0; e < dim; e++)
        (*U)(e, d) = Utmp(e, oldidx);
    }
  }
  if (Vt != NULL) {
    Matrix<Real> Vttmp(*Vt);
    for (MatrixIndexT d = 0; d < num_singval; d++)
      (*Vt).Row(d).CopyFromVec(Vttmp.Row(vec[d].second));
  }
}

template
void SortSvd(VectorBase<float> *s, MatrixBase<float> *U,
             MatrixBase<float> *Vt, bool);

template
void SortSvd(VectorBase<double> *s, MatrixBase<double> *U,
             MatrixBase<double> *Vt, bool);

template<typename Real>
void CreateEigenvalueMatrix(const VectorBase<Real> &re, const VectorBase<Real>
&im,
                            MatrixBase<Real> *D) {
  MatrixIndexT n = re.Dim();
  KALDI_ASSERT(im.Dim() == n && D->NumRows() == n && D->NumCols() == n);

  MatrixIndexT j = 0;
  D->SetZero();
  while (j < n) {
    if (im(j) == 0) {  // Real eigenvalue
      (*D)(j, j) = re(j);
      j++;
    } else {  // First of a complex pair
      KALDI_ASSERT(j+1 < n && ApproxEqual(im(j+1), -im(j))
                   && ApproxEqual(re(j+1), re(j)));
      /// if (im(j) < 0.0) KALDI_WARN << "Negative first im part of pair";  //
TEMP
      Real lambda = re(j), mu = im(j);
      // create 2x2 block [lambda, mu; -mu, lambda]
      (*D)(j, j) = lambda;
      (*D)(j, j+1) = mu;
      (*D)(j+1, j) = -mu;
      (*D)(j+1, j+1) = lambda;
      j += 2;
    }
  }
}

template
void CreateEigenvalueMatrix(const VectorBase<float> &re, const VectorBase<float>
&im,
                            MatrixBase<float> *D);
template
void CreateEigenvalueMatrix(const VectorBase<double> &re, const
VectorBase<double> &im,
                            MatrixBase<double> *D);


template<typename Real>
bool AttemptComplexPower(Real *x_re, Real *x_im, Real power) {
  // Used in Matrix<Real>::Power().
  // Attempts to take the complex value x to the power "power",
  // assuming that power is fractional (i.e. we don't treat integers as a
  // special case).  Returns false if this is not possible, either
  // because x is negative and real (hence there is no obvious answer
  // that is "closest to 1", and anyway this case does not make sense
  // in the Matrix<Real>::Power() routine);
  // or because power is negative, and x is zero.

  // First solve for r and theta in
  // x_re = r*cos(theta), x_im = r*sin(theta)
  if (*x_re < 0.0 && *x_im == 0.0) return false;  // can't do
  // it for negative real values.
  Real r = std::sqrt((*x_re * *x_re) + (*x_im * *x_im));  // r == radius.
  if (power < 0.0 && r == 0.0) return false;
  Real theta = std::atan2(*x_im, *x_re);
  // Take the power.
  r = std::pow(r, power);
  theta *= power;
  *x_re = r * std::cos(theta);
  *x_im = r * std::sin(theta);
  return true;
}

template
bool AttemptComplexPower(float *x_re, float *x_im, float power);
template
bool AttemptComplexPower(double *x_re, double *x_im, double power);


template <typename Real>
Real TraceMatMat(const MatrixBase<Real> &A,
                  const MatrixBase<Real> &B,
                  MatrixTransposeType trans) {  // tr(A B), equivalent to sum of
each element of A times same element in B'
  MatrixIndexT aStride = A.stride_, bStride = B.stride_;
  if (trans == kNoTrans) {
    KALDI_ASSERT(A.NumRows() == B.NumCols() && A.NumCols() == B.NumRows());
    Real ans = 0.0;
    Real *adata = A.data_, *bdata = B.data_;
    MatrixIndexT arows = A.NumRows(), acols = A.NumCols();
    for (MatrixIndexT row = 0;row < arows;row++, adata+=aStride, bdata++)
      ans += cblas_Xdot(acols, adata, 1, bdata, bStride);
    return ans;
  } else {
    KALDI_ASSERT(A.NumRows() == B.NumRows() && A.NumCols() == B.NumCols());
    Real ans = 0.0;
    Real *adata = A.data_, *bdata = B.data_;
    MatrixIndexT arows = A.NumRows(), acols = A.NumCols();
    for (MatrixIndexT row = 0;row < arows;row++, adata+=aStride, bdata+=bStride)
      ans += cblas_Xdot(acols, adata, 1, bdata, 1);
    return ans;
  }
}


// Instantiate the template above for float and double.
template
float TraceMatMat(const MatrixBase<float> &A,
                  const MatrixBase<float> &B,
                  MatrixTransposeType trans);
template
double TraceMatMat(const MatrixBase<double> &A,
                  const MatrixBase<double> &B,
                  MatrixTransposeType trans);


template<typename Real>
Real MatrixBase<Real>::LogSumExp(Real prune) const {
  Real sum;
  if (sizeof(sum) == 8) sum = kLogZeroDouble;
  else sum = kLogZeroFloat;
  Real max_elem = Max(), cutoff;
  if (sizeof(Real) == 4) cutoff = max_elem + kMinLogDiffFloat;
  else cutoff = max_elem + kMinLogDiffDouble;
  if (prune > 0.0 && max_elem - prune > cutoff) // explicit pruning...
    cutoff = max_elem - prune;

  double sum_relto_max_elem = 0.0;

  for (MatrixIndexT i = 0; i < num_rows_; i++) {
    for (MatrixIndexT j = 0; j < num_cols_; j++) {
      BaseFloat f = (*this)(i, j);
      if (f >= cutoff)
        sum_relto_max_elem += kaldi::Exp(f - max_elem);
    }
  }
  return max_elem + kaldi::Log(sum_relto_max_elem);
}

template<typename Real>
Real MatrixBase<Real>::ApplySoftMax() {
  Real max = this->Max(), sum = 0.0;
  // the 'max' helps to get in good numeric range.
  for (MatrixIndexT i = 0; i < num_rows_; i++)
    for (MatrixIndexT j = 0; j < num_cols_; j++)
      sum += ((*this)(i, j) = kaldi::Exp((*this)(i, j) - max));
  this->Scale(1.0 / sum);
  return max + kaldi::Log(sum);
}

template<typename Real>
void MatrixBase<Real>::Tanh(const MatrixBase<Real> &src) {
  KALDI_ASSERT(SameDim(*this, src));

  if (num_cols_ == stride_ && src.num_cols_ == src.stride_) {
    SubVector<Real> src_vec(src.data_, num_rows_ * num_cols_),
        dst_vec(this->data_, num_rows_ * num_cols_);
    dst_vec.Tanh(src_vec);
  } else {
    for (MatrixIndexT r = 0; r < num_rows_; r++) {
      SubVector<Real> src_vec(src, r), dest_vec(*this, r);
      dest_vec.Tanh(src_vec);
    }
  }
}

template<typename Real>
void MatrixBase<Real>::SoftHinge(const MatrixBase<Real> &src) {
  KALDI_ASSERT(SameDim(*this, src));
  int32 num_rows = num_rows_, num_cols = num_cols_;
  for (MatrixIndexT r = 0; r < num_rows; r++) {
    Real *row_data = this->RowData(r);
    const Real *src_row_data = src.RowData(r);
    for (MatrixIndexT c = 0; c < num_cols; c++) {
      Real x = src_row_data[c], y;
      if (x > 10.0) y = x; // avoid exponentiating large numbers; function
      // approaches y=x.
      else y = Log1p(kaldi::Exp(x)); // these defined in kaldi-math.h
      row_data[c] = y;
    }
  }
}

template<typename Real>
void MatrixBase<Real>::GroupPnorm(const MatrixBase<Real> &src, Real power) {
  KALDI_ASSERT(src.NumCols() % this->NumCols() == 0 &&
               src.NumRows() == this->NumRows());
  int group_size = src.NumCols() / this->NumCols(),
    num_rows = this->NumRows(), num_cols = this->NumCols();
  for (MatrixIndexT i = 0; i < num_rows; i++)
    for (MatrixIndexT j = 0; j < num_cols; j++)
      (*this)(i, j) = src.Row(i).Range(j * group_size,  group_size).Norm(power);
}

template<typename Real>
void MatrixBase<Real>::GroupMax(const MatrixBase<Real> &src) {
  KALDI_ASSERT(src.NumCols() % this->NumCols() == 0 &&
               src.NumRows() == this->NumRows());
  int group_size = src.NumCols() / this->NumCols(),
      num_rows = this->NumRows(), num_cols = this->NumCols();
  for (MatrixIndexT i = 0; i < num_rows; i++) {
    const Real *src_row_data = src.RowData(i);
    for (MatrixIndexT j = 0; j < num_cols; j++) {
      Real max_val = -1e20;
      for (MatrixIndexT k = 0; k < group_size; k++) {
        Real src_data = src_row_data[j * group_size + k];
        if (src_data > max_val)
          max_val = src_data;
      }
      (*this)(i, j) = max_val;
    }
  }
}
*/
template <typename Real>
void MatrixBase<Real>::CopyCols(const MatrixBase<Real> &src,
                                const MatrixIndexT *indices) {
    KALDI_ASSERT(NumRows() == src.NumRows());
    MatrixIndexT num_rows = num_rows_, num_cols = num_cols_,
                 this_stride = stride_, src_stride = src.stride_;
    Real *this_data = this->data_;
    const Real *src_data = src.data_;
#ifdef KALDI_PARANOID
    MatrixIndexT src_cols = src.NumCols();
    for (MatrixIndexT i = 0; i < num_cols; i++)
        KALDI_ASSERT(indices[i] >= -1 && indices[i] < src_cols);
#endif

    // For the sake of memory locality we do this row by row, rather
    // than doing it column-wise using cublas_Xcopy
    for (MatrixIndexT r = 0; r < num_rows;
         r++, this_data += this_stride, src_data += src_stride) {
        const MatrixIndexT *index_ptr = &(indices[0]);
        for (MatrixIndexT c = 0; c < num_cols; c++, index_ptr++) {
            if (*index_ptr < 0)
                this_data[c] = 0;
            else
                this_data[c] = src_data[*index_ptr];
        }
    }
}

/*
template<typename Real>
void MatrixBase<Real>::AddCols(const MatrixBase<Real> &src,
                               const MatrixIndexT *indices) {
  KALDI_ASSERT(NumRows() == src.NumRows());
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_,
      this_stride = stride_, src_stride = src.stride_;
  Real *this_data = this->data_;
  const Real *src_data = src.data_;
#ifdef KALDI_PARANOID
  MatrixIndexT src_cols = src.NumCols();
  for (MatrixIndexT i = 0; i < num_cols; i++)
    KALDI_ASSERT(indices[i] >= -1 && indices[i] < src_cols);
#endif

  // For the sake of memory locality we do this row by row, rather
  // than doing it column-wise using cublas_Xcopy
  for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride, src_data
+= src_stride) {
    const MatrixIndexT *index_ptr = &(indices[0]);
    for (MatrixIndexT c = 0; c < num_cols; c++, index_ptr++) {
      if (*index_ptr >= 0)
        this_data[c] += src_data[*index_ptr];
    }
  }
}*/

/*
template<typename Real>
void MatrixBase<Real>::CopyRows(const MatrixBase<Real> &src,
                                const MatrixIndexT *indices) {
  KALDI_ASSERT(NumCols() == src.NumCols());
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_,
      this_stride = stride_;
  Real *this_data = this->data_;

  for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride) {
    MatrixIndexT index = indices[r];
    if (index < 0) memset(this_data, 0, sizeof(Real) * num_cols_);
    else cblas_Xcopy(num_cols, src.RowData(index), 1, this_data, 1);
  }
}

template<typename Real>
void MatrixBase<Real>::CopyRows(const Real *const *src) {
  MatrixIndexT num_rows = num_rows_,
      num_cols = num_cols_, this_stride = stride_;
  Real *this_data = this->data_;

  for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride) {
    const Real *const src_data = src[r];
    if (src_data == NULL) memset(this_data, 0, sizeof(Real) * num_cols);
    else cblas_Xcopy(num_cols, src_data, 1, this_data, 1);
  }
}

template<typename Real>
void MatrixBase<Real>::CopyToRows(Real *const *dst) const {
  MatrixIndexT num_rows = num_rows_,
      num_cols = num_cols_, this_stride = stride_;
  const Real *this_data = this->data_;

  for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride) {
    Real *const dst_data = dst[r];
    if (dst_data != NULL)
      cblas_Xcopy(num_cols, this_data, 1, dst_data, 1);
  }
}

template<typename Real>
void MatrixBase<Real>::AddRows(Real alpha,
                               const MatrixBase<Real> &src,
                               const MatrixIndexT *indexes) {
  KALDI_ASSERT(NumCols() == src.NumCols());
  MatrixIndexT num_rows = num_rows_,
      num_cols = num_cols_, this_stride = stride_;
  Real *this_data = this->data_;

  for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride) {
    MatrixIndexT index = indexes[r];
    KALDI_ASSERT(index >= -1 && index < src.NumRows());
    if (index != -1)
      cblas_Xaxpy(num_cols, alpha, src.RowData(index), 1, this_data, 1);
  }
}

template<typename Real>
void MatrixBase<Real>::AddRows(Real alpha, const Real *const *src) {
  MatrixIndexT num_rows = num_rows_,
      num_cols = num_cols_, this_stride = stride_;
  Real *this_data = this->data_;

  for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride) {
    const Real *const src_data = src[r];
    if (src_data != NULL)
      cblas_Xaxpy(num_cols, alpha, src_data, 1, this_data, 1);
  }
}

template<typename Real>
void MatrixBase<Real>::AddToRows(Real alpha,
                                 const MatrixIndexT *indexes,
                                 MatrixBase<Real> *dst) const {
  KALDI_ASSERT(NumCols() == dst->NumCols());
  MatrixIndexT num_rows = num_rows_,
      num_cols = num_cols_, this_stride = stride_;
  Real *this_data = this->data_;

  for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride) {
    MatrixIndexT index = indexes[r];
    KALDI_ASSERT(index >= -1 && index < dst->NumRows());
    if (index != -1)
      cblas_Xaxpy(num_cols, alpha, this_data, 1, dst->RowData(index), 1);
  }
}

template<typename Real>
void MatrixBase<Real>::AddToRows(Real alpha, Real *const *dst) const {
  MatrixIndexT num_rows = num_rows_,
      num_cols = num_cols_, this_stride = stride_;
  const Real *this_data = this->data_;

  for (MatrixIndexT r = 0; r < num_rows; r++, this_data += this_stride) {
    Real *const dst_data = dst[r];
    if (dst_data != NULL)
      cblas_Xaxpy(num_cols, alpha, this_data, 1, dst_data, 1);
  }
}

template<typename Real>
void MatrixBase<Real>::Sigmoid(const MatrixBase<Real> &src) {
  KALDI_ASSERT(SameDim(*this, src));

  if (num_cols_ == stride_ && src.num_cols_ == src.stride_) {
    SubVector<Real> src_vec(src.data_, num_rows_ * num_cols_),
        dst_vec(this->data_, num_rows_ * num_cols_);
    dst_vec.Sigmoid(src_vec);
  } else {
    for (MatrixIndexT r = 0; r < num_rows_; r++) {
      SubVector<Real> src_vec(src, r), dest_vec(*this, r);
      dest_vec.Sigmoid(src_vec);
    }
  }
}

template<typename Real>
void MatrixBase<Real>::DiffSigmoid(const MatrixBase<Real> &value,
                                   const MatrixBase<Real> &diff) {
  KALDI_ASSERT(SameDim(*this, value) && SameDim(*this, diff));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_,
      stride = stride_, value_stride = value.stride_, diff_stride =
diff.stride_;
  Real *data = data_;
  const Real *value_data = value.data_, *diff_data = diff.data_;
  for (MatrixIndexT r = 0; r < num_rows; r++) {
    for (MatrixIndexT c = 0; c < num_cols; c++)
      data[c] = diff_data[c] * value_data[c] * (1.0 - value_data[c]);
    data += stride;
    value_data += value_stride;
    diff_data += diff_stride;
  }
}

template<typename Real>
void MatrixBase<Real>::DiffTanh(const MatrixBase<Real> &value,
                                   const MatrixBase<Real> &diff) {
  KALDI_ASSERT(SameDim(*this, value) && SameDim(*this, diff));
  MatrixIndexT num_rows = num_rows_, num_cols = num_cols_,
      stride = stride_, value_stride = value.stride_, diff_stride =
diff.stride_;
  Real *data = data_;
  const Real *value_data = value.data_, *diff_data = diff.data_;
  for (MatrixIndexT r = 0; r < num_rows; r++) {
    for (MatrixIndexT c = 0; c < num_cols; c++)
      data[c] = diff_data[c] * (1.0 - (value_data[c] * value_data[c]));
    data += stride;
    value_data += value_stride;
    diff_data += diff_stride;
  }
}*/

/*
template<typename Real>
template<typename OtherReal>
void MatrixBase<Real>::AddVecToRows(const Real alpha, const
VectorBase<OtherReal> &v) {
  const MatrixIndexT num_rows = num_rows_, num_cols = num_cols_,
      stride = stride_;
  KALDI_ASSERT(v.Dim() == num_cols);
  if(num_cols <= 64) {
    Real *data = data_;
    const OtherReal *vdata = v.Data();
    for (MatrixIndexT i = 0; i < num_rows; i++, data += stride) {
      for (MatrixIndexT j = 0; j < num_cols; j++)
        data[j] += alpha * vdata[j];
    }

  } else {
    Vector<OtherReal> ones(num_rows);
    ones.Set(1.0);
    this->AddVecVec(alpha, ones, v);
   }
}

template void MatrixBase<float>::AddVecToRows(const float alpha,
                                              const VectorBase<float> &v);
template void MatrixBase<float>::AddVecToRows(const float alpha,
                                              const VectorBase<double> &v);
template void MatrixBase<double>::AddVecToRows(const double alpha,
                                               const VectorBase<float> &v);
template void MatrixBase<double>::AddVecToRows(const double alpha,
                                               const VectorBase<double> &v);


template<typename Real>
template<typename OtherReal>
void MatrixBase<Real>::AddVecToCols(const Real alpha, const
VectorBase<OtherReal> &v) {
  const MatrixIndexT num_rows = num_rows_, num_cols = num_cols_,
      stride = stride_;
  KALDI_ASSERT(v.Dim() == num_rows);

  if (num_rows <= 64) {
    Real *data = data_;
    const OtherReal *vdata = v.Data();
    for (MatrixIndexT i = 0; i < num_rows; i++, data += stride) {
      Real to_add = alpha * vdata[i];
      for (MatrixIndexT j = 0; j < num_cols; j++)
        data[j] += to_add;
    }

  } else {
    Vector<OtherReal> ones(num_cols);
    ones.Set(1.0);
    this->AddVecVec(alpha, v, ones);
  }
}

template void MatrixBase<float>::AddVecToCols(const float alpha,
                                              const VectorBase<float> &v);
template void MatrixBase<float>::AddVecToCols(const float alpha,
                                              const VectorBase<double> &v);
template void MatrixBase<double>::AddVecToCols(const double alpha,
                                               const VectorBase<float> &v);
template void MatrixBase<double>::AddVecToCols(const double alpha,
                                               const VectorBase<double> &v);
*/
// Explicit instantiation of the classes
// Apparently, it seems to be necessary that the instantiation
// happens at the end of the file. Otherwise, not all the member
// functions will get instantiated.

template class Matrix<float>;
template class Matrix<double>;
template class MatrixBase<float>;
template class MatrixBase<double>;
template class SubMatrix<float>;
template class SubMatrix<double>;

}  // namespace kaldi


================================================
FILE: runtime/engine/common/matrix/kaldi-matrix.h
================================================
// matrix/kaldi-matrix.h

// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Lukas Burget;
//                      Saarland University;  Petr Schwarz;  Yanmin Qian;
//                      Karel Vesely;  Go Vivace Inc.;  Haihua Xu
//           2017       Shiyin Kang
//           2019       Yiwen Shao

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_MATRIX_KALDI_MATRIX_H_
#define KALDI_MATRIX_KALDI_MATRIX_H_ 1

#include <algorithm>

#include "matrix/matrix-common.h"

namespace kaldi {

/// @{ \addtogroup matrix_funcs_scalar

/// \addtogroup matrix_group
/// @{

/// Base class which provides matrix operations not involving resizing
/// or allocation.   Classes Matrix and SubMatrix inherit from it and take care
/// of allocation and resizing.
template <typename Real>
class MatrixBase {
  public:
    // so this child can access protected members of other instances.
    friend class Matrix<Real>;
    friend class SubMatrix<Real>;
    // friend declarations for CUDA matrices (see ../cudamatrix/)

    /// Returns number of rows (or zero for empty matrix).
    inline MatrixIndexT NumRows() const { return num_rows_; }

    /// Returns number of columns (or zero for empty matrix).
    inline MatrixIndexT NumCols() const { return num_cols_; }

    /// Stride (distance in memory between each row).  Will be >= NumCols.
    inline MatrixIndexT Stride() const { return stride_; }

    /// Returns size in bytes of the data held by the matrix.
    size_t SizeInBytes() const {
        return static_cast<size_t>(num_rows_) * static_cast<size_t>(stride_) *
               sizeof(Real);
    }

    /// Gives pointer to raw data (const).
    inline const Real *Data() const { return data_; }

    /// Gives pointer to raw data (non-const).
    inline Real *Data() { return data_; }

    /// Returns pointer to data for one row (non-const)
    inline Real *RowData(MatrixIndexT i) {
        KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
                     static_cast<UnsignedMatrixIndexT>(num_rows_));
        return data_ + i * stride_;
    }

    /// Returns pointer to data for one row (const)
    inline const Real *RowData(MatrixIndexT i) const {
        KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
                     static_cast<UnsignedMatrixIndexT>(num_rows_));
        return data_ + i * stride_;
    }

    /// Indexing operator, non-const
    /// (only checks sizes if compiled with -DKALDI_PARANOID)
    inline Real &operator()(MatrixIndexT r, MatrixIndexT c) {
        KALDI_PARANOID_ASSERT(
            static_cast<UnsignedMatrixIndexT>(r) <
                static_cast<UnsignedMatrixIndexT>(num_rows_) &&
            static_cast<UnsignedMatrixIndexT>(c) <
                static_cast<UnsignedMatrixIndexT>(num_cols_));
        return *(data_ + r * stride_ + c);
    }
    /// Indexing operator, provided for ease of debugging (gdb doesn't work
    /// with parenthesis operator).
    Real &Index(MatrixIndexT r, MatrixIndexT c) { return (*this)(r, c); }

    /// Indexing operator, const
    /// (only checks sizes if compiled with -DKALDI_PARANOID)
    inline const Real operator()(MatrixIndexT r, MatrixIndexT c) const {
        KALDI_PARANOID_ASSERT(
            static_cast<UnsignedMatrixIndexT>(r) <
                static_cast<UnsignedMatrixIndexT>(num_rows_) &&
            static_cast<UnsignedMatrixIndexT>(c) <
                static_cast<UnsignedMatrixIndexT>(num_cols_));
        return *(data_ + r * stride_ + c);
    }

    /*   Basic setting-to-special values functions. */

    /// Sets matrix to zero.
    void SetZero();
    /// Sets all elements to a specific value.
    void Set(Real);
    /// Sets to zero, except ones along diagonal [for non-square matrices too]

    /// Copy given matrix. (no resize is done).
    template <typename OtherReal>
    void CopyFromMat(const MatrixBase<OtherReal> &M,
                     MatrixTransposeType trans = kNoTrans);

    /// Copy from compressed matrix.
    // void CopyFromMat(const CompressedMatrix &M);

    /// Copy given tpmatrix. (no resize is done).
    // template<typename OtherReal>
    // void CopyFromTp(const TpMatrix<OtherReal> &M,
    // MatrixTransposeType trans = kNoTrans);

    /// Copy from CUDA matrix.  Implemented in ../cudamatrix/cu-matrix.h
    // template<typename OtherReal>
    // void CopyFromMat(const CuMatrixBase<OtherReal> &M,
    // MatrixTransposeType trans = kNoTrans);

    /// This function has two modes of operation.  If v.Dim() == NumRows() *
    /// NumCols(), then treats the vector as a row-by-row concatenation of a
    /// matrix and copies to *this.
    /// if v.Dim() == NumCols(), it sets each row of *this to a copy of v.
    void CopyRowsFromVec(const VectorBase<Real> &v);

    /// This version of CopyRowsFromVec is implemented in
    /// ../cudamatrix/cu-vector.cc
    // void CopyRowsFromVec(const CuVectorBase<Real> &v);

    template <typename OtherReal>
    void CopyRowsFromVec(const VectorBase<OtherReal> &v);

    /// Copies vector into matrix, column-by-column.
    /// Note that rv.Dim() must either equal NumRows()*NumCols() or NumRows();
    /// this has two modes of operation.
    void CopyColsFromVec(const VectorBase<Real> &v);

    /// Copy vector into specific column of matrix.
    void CopyColFromVec(const VectorBase<Real> &v, const MatrixIndexT col);
    /// Copy vector into specific row of matrix.
    void CopyRowFromVec(const VectorBase<Real> &v, const MatrixIndexT row);
    /// Copy vector into diagonal of matrix.
    void CopyDiagFromVec(const VectorBase<Real> &v);

    /* Accessing of sub-parts of the matrix. */

    /// Return specific row of matrix [const].
    inline const SubVector<Real> Row(MatrixIndexT i) const {
        KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
                     static_cast<UnsignedMatrixIndexT>(num_rows_));
        return SubVector<Real>(data_ + (i * stride_), NumCols());
    }

    /// Return specific row of matrix.
    inline SubVector<Real> Row(MatrixIndexT i) {
        KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
                     static_cast<UnsignedMatrixIndexT>(num_rows_));
        return SubVector<Real>(data_ + (i * stride_), NumCols());
    }

    /// Return a sub-part of matrix.
    inline SubMatrix<Real> Range(const MatrixIndexT row_offset,
                                 const MatrixIndexT num_rows,
                                 const MatrixIndexT col_offset,
                                 const MatrixIndexT num_cols) const {
        return SubMatrix<Real>(
            *this, row_offset, num_rows, col_offset, num_cols);
    }
    inline SubMatrix<Real> RowRange(const MatrixIndexT row_offset,
                                    const MatrixIndexT num_rows) const {
        return SubMatrix<Real>(*this, row_offset, num_rows, 0, num_cols_);
    }
    inline SubMatrix<Real> ColRange(const MatrixIndexT col_offset,
                                    const MatrixIndexT num_cols) const {
        return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols);
    }

    /*
      /// Returns sum of all elements in matrix.
      Real Sum() const;
      /// Returns trace of matrix.
      Real Trace(bool check_square = true) const;
      // If check_square = true, will crash if matrix is not square.

      /// Returns maximum element of matrix.
      Real Max() const;
      /// Returns minimum element of matrix.
      Real Min() const;

      /// Element by element multiplication with a given matrix.
      void MulElements(const MatrixBase<Real> &A);

      /// Divide each element by the corresponding element of a given matrix.
      void DivElements(const MatrixBase<Real> &A);

      /// Multiply each element with a scalar value.
      void Scale(Real alpha);

      /// Set, element-by-element, *this = max(*this, A)
      void Max(const MatrixBase<Real> &A);
      /// Set, element-by-element, *this = min(*this, A)
      void Min(const MatrixBase<Real> &A);

      /// Equivalent to (*this) = (*this) * diag(scale).  Scaling
      /// each column by a scalar taken from that dimension of the vector.
      void MulColsVec(const VectorBase<Real> &scale);

      /// Equivalent to (*this) = diag(scale) * (*this).  Scaling
      /// each row by a scalar taken from that dimension of the vector.
      void MulRowsVec(const VectorBase<Real> &scale);

      /// Divide each row into src.NumCols() equal groups, and then scale i'th
      row's
      /// j'th group of elements by src(i, j).  Requires src.NumRows() ==
      /// this->NumRows() and this->NumCols() % src.NumCols() == 0.
      void MulRowsGroupMat(const MatrixBase<Real> &src);

      /// Returns logdet of matrix.
      Real LogDet(Real *det_sign = NULL) const;

      /// matrix inverse.
      /// if inverse_needed = false, will fill matrix with garbage.
      /// (only useful if logdet wanted).
      void Invert(Real *log_det = NULL, Real *det_sign = NULL,
                  bool inverse_needed = true);
      /// matrix inverse [double].
      /// if inverse_needed = false, will fill matrix with garbage
      /// (only useful if logdet wanted).
      /// Does inversion in double precision even if matrix was not double.
      void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL,
                          bool inverse_needed = true);
    */
    /// Inverts all the elements of the matrix
    void InvertElements();
    /*
      /// Transpose the matrix.  This one is only
      /// applicable to square matrices (the one in the
      /// Matrix child class works also for non-square.
      void Transpose();

    */
    /// Copies column r from column indices[r] of src.
    /// As a special case, if indexes[i] == -1, sets column i to zero.
    /// all elements of "indices" must be in [-1, src.NumCols()-1],
    /// and src.NumRows() must equal this.NumRows()
    void CopyCols(const MatrixBase<Real> &src, const MatrixIndexT *indices);

    /// Copies row r from row indices[r] of src (does nothing
    /// As a special case, if indexes[i] == -1, sets row i to zero.
    /// all elements of "indices" must be in [-1, src.NumRows()-1],
    /// and src.NumCols() must equal this.NumCols()
    void CopyRows(const MatrixBase<Real> &src, const MatrixIndexT *indices);

    /// Add column indices[r] of src to column r.
    /// As a special case, if indexes[i] == -1, skip column i
    /// indices.size() must equal this->NumCols(),
    /// all elements of "reorder" must be in [-1, src.NumCols()-1],
    /// and src.NumRows() must equal this.NumRows()
    // void AddCols(const MatrixBase<Real> &src,
    //            const MatrixIndexT *indices);

    /// Copies row r of this matrix from an array of floats at the location
    /// given
    /// by src[r]. If any src[r] is NULL then this.Row(r) will be set to zero.
    /// Note: we are using "pointer to const pointer to const object" for "src",
    ///       because we may create "src" by calling Data() of const CuArray
    void CopyRows(const Real *const *src);

    /// Copies row r of this matrix to the array of floats at the location given
    /// by dst[r]. If dst[r] is NULL, does not copy anywhere.  Requires that
    /// none
    /// of the memory regions pointed to by the pointers in "dst" overlap (e.g.
    /// none of the pointers should be the same).
    void CopyToRows(Real *const *dst) const;

    /// Does for each row r, this.Row(r) += alpha * src.row(indexes[r]).
    /// If indexes[r] < 0, does not add anything. all elements of "indexes" must
    /// be in [-1, src.NumRows()-1], and src.NumCols() must equal
    /// this.NumCols().
    // void AddRows(Real alpha,
    //             const MatrixBase<Real> &src,
    //            const MatrixIndexT *indexes);

    /// Does for each row r, this.Row(r) += alpha * src[r], treating src[r] as
    /// the
    /// beginning of a region of memory representing a vector of floats, of the
    /// same length as this.NumCols(). If src[r] is NULL, does not add anything.
    // void AddRows(Real alpha, const Real *const *src);

    /// For each row r of this matrix, adds it (times alpha) to the array of
    /// floats at the location given by dst[r]. If dst[r] is NULL, does not do
    /// anything for that row. Requires that none of the memory regions pointed
    /// to by the pointers in "dst" overlap (e.g. none of the pointers should be
    /// the same).
    // void AddToRows(Real alpha, Real *const *dst) const;

    /// For each row i of *this, adds this->Row(i) to
    /// dst->Row(indexes(i)) if indexes(i) >= 0, else do nothing.
    /// Requires that all the indexes[i] that are >= 0
    /// be distinct, otherwise the behavior is undefined.
    // void AddToRows(Real alpha,
    //              const MatrixIndexT *indexes,
    //             MatrixBase<Real> *dst) const;
    /*
      inline void ApplyPow(Real power) {
        this -> Pow(*this, power);
      }


      inline void ApplyPowAbs(Real power, bool include_sign=false) {
        this -> PowAbs(*this, power, include_sign);
      }

      inline void ApplyHeaviside() {
        this -> Heaviside(*this);
      }

      inline void ApplyFloor(Real floor_val) {
        this -> Floor(*this, floor_val);
      }

      inline void ApplyCeiling(Real ceiling_val) {
        this -> Ceiling(*this, ceiling_val);
      }

      inline void ApplyExp() {
        this -> Exp(*this);
      }

      inline void ApplyExpSpecial() {
        this -> ExpSpecial(*this);
      }

      inline void ApplyExpLimited(Real lower_limit, Real upper_limit) {
        this -> ExpLimited(*this, lower_limit, upper_limit);
      }

      inline void ApplyLog() {
        this -> Log(*this);
      }
    */
    /// Eigenvalue Decomposition of a square NxN matrix into the form (*this) =
    /// P D
    /// P^{-1}.  Be careful: the relationship of D to the eigenvalues we output
    /// is
    /// slightly complicated, due to the need for P to be real.  In the
    /// symmetric
    /// case D is diagonal and real, but in
    /// the non-symmetric case there may be complex-conjugate pairs of
    /// eigenvalues.
    /// In this case, for the equation (*this) = P D P^{-1} to hold, D must
    /// actually
    /// be block diagonal, with 2x2 blocks corresponding to any such pairs.  If
    /// a
    /// pair is lambda +- i*mu, D will have a corresponding 2x2 block
    /// [lambda, mu; -mu, lambda].
    /// Note that if the input matrix (*this) is non-invertible, P may not be
    /// invertible
    /// so in this case instead of the equation (*this) = P D P^{-1} holding, we
    /// have
    /// instead (*this) P = P D.
    ///
    /// The non-member function CreateEigenvalueMatrix creates D from eigs_real
    /// and eigs_imag.
    // void Eig(MatrixBase<Real> *P,
    //        VectorBase<Real> *eigs_real,
    //       VectorBase<Real> *eigs_imag) const;

    /// The Power method attempts to take the matrix to a power using a method
    /// that
    /// works in general for fractional and negative powers.  The input matrix
    /// must
    /// be invertible and have reasonable condition (or we don't guarantee the
    /// results.  The method is based on the eigenvalue decomposition.  It will
    /// return false and leave the matrix unchanged, if at entry the matrix had
    /// real negative eigenvalues (or if it had zero eigenvalues and the power
    /// was
    /// negative).
    //  bool Power(Real pow);

    /** Singular value decomposition
       Major limitations:
       For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we
       return
       the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the
       one on the left is rectangular.

       In Svd, *this = U*diag(S)*Vt.
       Null pointers for U and/or Vt at input mean we do not want that output.
       We
       expect that S.Dim() == m, U is either NULL or m by n,
       and v is either NULL or n by n.
       The singular values are not sorted (use SortSvd for that).  */
    // void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
    //                   MatrixBase<Real> *Vt);  // Destroys calling matrix.

    /// Compute SVD (*this) = U diag(s) Vt.   Note that the V in the call is
    /// already
    /// transposed; the normal formulation is U diag(s) V^T.
    /// Null pointers for U or V mean we don't want that output (this saves
    /// compute).  The singular values are not sorted (use SortSvd for that).
    // void Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
    //        MatrixBase<Real> *Vt) const;
    /// Compute SVD but only retain the singular values.
    // void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); }


    /// Returns smallest singular value.
    // Real MinSingularValue() const {
    // Vector<Real> tmp(std::min(NumRows(), NumCols()));
    // Svd(&tmp);
    // return tmp.Min();
    //}

    // void TestUninitialized() const; // This function is designed so that if
    // any element
    // if the matrix is uninitialized memory, valgrind will complain.

    /// Returns condition number by computing Svd.  Works even if cols > rows.
    /// Returns infinity if all singular values are zero.
    /*
    Real Cond() const;

    /// Returns true if matrix is Symmetric.
    bool IsSymmetric(Real cutoff = 1.0e-05) const;  // replace magic number

    /// Returns true if matrix is Diagonal.
    bool IsDiagonal(Real cutoff = 1.0e-05) const;  // replace magic number

    /// Returns true if the matrix is all zeros, except for ones on diagonal.
    (it
    /// does not have to be square).  More specifically, this function returns
    /// false if for any i, j, (*this)(i, j) differs by more than cutoff from
    the
    /// expression (i == j ? 1 : 0).
    bool IsUnit(Real cutoff = 1.0e-05) const;     // replace magic number

    /// Returns true if matrix is all zeros.
    bool IsZero(Real cutoff = 1.0e-05) const;     // replace magic number

    /// Frobenius norm, which is the sqrt of sum of square elements.  Same as
    Schatten 2-norm,
    /// or just "2-norm".
    Real FrobeniusNorm() const;

    /// Returns true if ((*this)-other).FrobeniusNorm()
    /// <= tol * (*this).FrobeniusNorm().
    bool ApproxEqual(const MatrixBase<Real> &other, float tol = 0.01) const;

    /// Tests for exact equality.  It's usually preferable to use ApproxEqual.
    bool Equal(const MatrixBase<Real> &other) const;

    /// largest absolute value.
    Real LargestAbsElem() const;  // largest absolute value.

    /// Returns log(sum(exp())) without exp overflow
    /// If prune > 0.0, it uses a pruning beam, discarding
    /// terms less than (max - prune).  Note: in future
    /// we may change this so that if prune = 0.0, it takes
    /// the max, so use -1 if you don't want to prune.
    Real LogSumExp(Real prune = -1.0) const;

    /// Apply soft-max to the collection of all elements of the
    /// matrix and return normalizer (log sum of exponentials).
    Real ApplySoftMax();

    /// Set each element to the sigmoid of the corresponding element of "src".
    void Sigmoid(const MatrixBase<Real> &src);

    /// Sets each element to the Heaviside step function (x > 0 ? 1 : 0) of the
    /// corresponding element in "src".  Note: in general you can make different
    /// choices for x = 0, but for now please leave it as it (i.e. returning
    zero)
    /// because it affects the RectifiedLinearComponent in the neural net code.
    void Heaviside(const MatrixBase<Real> &src);

    void Exp(const MatrixBase<Real> &src);

    void Pow(const MatrixBase<Real> &src, Real power);

    void Log(const MatrixBase<Real> &src);

    /// Apply power to the absolute value of each element.
    /// If include_sign is true, the result will be multiplied with
    /// the sign of the input value.
    /// If the power is negative and the input to the power is zero,
    /// The output will be set zero. If include_sign is true, it will
    /// multiply the result by the sign of the input.
    void PowAbs(const MatrixBase<Real> &src, Real power, bool
    include_sign=false);

    void Floor(const MatrixBase<Real> &src, Real floor_val);

    void Ceiling(const MatrixBase<Real> &src, Real ceiling_val);

    /// For each element x of the matrix, set it to
    /// (x < 0 ? exp(x) : x + 1).  This function is used
    /// in our RNNLM training.
    void ExpSpecial(const MatrixBase<Real> &src);

    /// This is equivalent to running:
    /// Floor(src, lower_limit);
    /// Ceiling(src, upper_limit);
    /// Exp(src)
    void ExpLimited(const MatrixBase<Real> &src, Real lower_limit, Real
    upper_limit);

    /// Set each element to y = log(1 + exp(x))
    void SoftHinge(const MatrixBase<Real> &src);

    /// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 /
    p).
    /// Requires src.NumRows() == this->NumRows() and  src.NumCols() %
    this->NumCols() == 0.
    void GroupPnorm(const MatrixBase<Real> &src, Real power);

    /// Calculate derivatives for the GroupPnorm function above...
    /// if "input" is the input to the GroupPnorm function above (i.e. the "src"
    variable),
    /// and "output" is the result of the computation (i.e. the "this" of that
    function
    /// call), and *this has the same dimension as "input", then it sets each
    element
    /// of *this to the derivative d(output-elem)/d(input-elem) for each element
    of "input", where
    /// "output-elem" is whichever element of output depends on that input
    element.
    void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real>
    &output,
                         Real power);

    /// Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j
    /// Requires src.NumRows() == this->NumRows() and  src.NumCols() %
    this->NumCols() == 0.
    void GroupMax(const MatrixBase<Real> &src);

    /// Calculate derivatives for the GroupMax function above, where
    /// "input" is the input to the GroupMax function above (i.e. the "src"
    variable),
    /// and "output" is the result of the computation (i.e. the "this" of that
    function
    /// call), and *this must have the same dimension as "input". Each element
    /// of *this will be set to 1 if the corresponding input equals the output
    of
    /// the group, and 0 otherwise. The equals the function derivative where it
    is
    /// defined (it's not defined where multiple inputs in the group are equal
    to the output).
    void GroupMaxDeriv(const MatrixBase<Real> &input, const MatrixBase<Real>
    &output);

    /// Set each element to the tanh of the corresponding element of "src".
    void Tanh(const MatrixBase<Real> &src);

    // Function used in backpropagating derivatives of the sigmoid function:
    // element-by-element, set *this = diff * value * (1.0 - value).
    void DiffSigmoid(const MatrixBase<Real> &value,
                     const MatrixBase<Real> &diff);

    // Function used in backpropagating derivatives of the tanh function:
    // element-by-element, set *this = diff * (1.0 - value^2).
    void DiffTanh(const MatrixBase<Real> &value,
                  const MatrixBase<Real> &diff);
  */
    /** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
     * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
     * orthogonal matrix so rP^{-1} = rP^T.   Throws exception if input was not
     * positive semi-definite (check_thresh controls how stringent the check is;
     * set it to 2 to ensure it won't ever complain, but it will zero out
     * negative
     * dimensions in your matrix.
     *
     * Caution: if you want the eigenvalues, it may make more sense to convert
     * to
     * SpMatrix and use Eig() function there, which uses eigenvalue
     * decomposition
     * directly rather than SVD.
     */

    /// stream read.
    /// Use instead of stream<<*this, if you want to add to existing contents.
    // Will throw exception on failure.
    void Read(std::istream &in, bool binary);
    /// write to stream.
    void Write(std::ostream &out, bool binary) const;

    // Below is internal methods for Svd, user does not have to know about this.
  protected:
    ///  Initializer, callable only from child.
    explicit MatrixBase(Real *data,
                        MatrixIndexT cols,
                        MatrixIndexT rows,
                        MatrixIndexT stride)
        : data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) {
        KALDI_ASSERT_IS_FLOATING_TYPE(Real);
    }

    ///  Initializer, callable only from child.
    /// Empty initializer, for un-initialized matrix.
    explicit MatrixBase() : data_(NULL) { KALDI_ASSERT_IS_FLOATING_TYPE(Real); }

    // Make sure pointers to MatrixBase cannot be deleted.
    ~MatrixBase() {}

    /// A workaround that allows SubMatrix to get a pointer to non-const data
    /// for const Matrix. Unfortunately C++ does not allow us to declare a
    /// "public const" inheritance or anything like that, so it would require
    /// a lot of work to make the SubMatrix class totally const-correct--
    /// we would have to override many of the Matrix functions.
    inline Real *Data_workaround() const { return data_; }

    /// data memory area
    Real *data_;

    /// these attributes store the real matrix size as it is stored in memory
    /// including memalignment
    MatrixIndexT num_cols_;  /// < Number of columns
    MatrixIndexT num_rows_;  /// < Number of rows
    /** True number of columns for the internal matrix. This number may differ
     * from num_cols_ as memory alignment might be used. */
    MatrixIndexT stride_;

  private:
    KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase);
};

/// A class for storing matrices.
template <typename Real>
class Matrix : public MatrixBase<Real> {
  public:
    /// Empty constructor.
    Matrix();

    /// Basic constructor.
    Matrix(const MatrixIndexT r,
           const MatrixIndexT c,
           MatrixResizeType resize_type = kSetZero,
           MatrixStrideType stride_type = kDefaultStride)
        : MatrixBase<Real>() {
        Resize(r, c, resize_type, stride_type);
    }

    /// Swaps the contents of *this and *other.  Shallow swap.
    void Swap(Matrix<Real> *other);

    /// Constructor from any MatrixBase. Can also copy with transpose.
    /// Allocates new memory.
    explicit Matrix(const MatrixBase<Real> &M,
                    MatrixTransposeType trans = kNoTrans);

    /// Same as above, but need to avoid default copy constructor.
    Matrix(const Matrix<Real> &M);  //  (cannot make explicit)

    /// Copy constructor: as above, but from another type.
    template <typename OtherReal>
    explicit Matrix(const MatrixBase<OtherReal> &M,
                    MatrixTransposeType trans = kNoTrans);

    /// Copy constructor taking TpMatrix...
    // template <typename OtherReal>
    // explicit Matrix(const TpMatrix<OtherReal> & M,
    // MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
    // if (trans == kNoTrans) {
    // Resize(M.NumRows(), M.NumCols(), kUndefined);
    // this->CopyFromTp(M);
    //} else {
    // Resize(M.NumCols(), M.NumRows(), kUndefined);
    // this->CopyFromTp(M, kTrans);
    //}
    //}

    /// read from stream.
    // Unlike one in base, allows resizing.
    void Read(std::istream &in, bool binary);

    /// Remove a specified row.
    void RemoveRow(MatrixIndexT i);

    /// Transpose the matrix.  Works for non-square
    /// matrices as well as square ones.
    // void Transpose();

    /// Distructor to free matrices.
    ~Matrix() { Destroy(); }

    /// Sets matrix to a specified size (zero is OK as long as both r and c are
    /// zero).  The value of the new data depends on resize_type:
    ///   -if kSetZero, the new data will be zero
    ///   -if kUndefined, the new data will be undefined
    ///   -if kCopyData, the new data will be the same as the old data in any
    ///      shared positions, and zero elsewhere.
    ///
    /// You can set stride_type to kStrideEqualNumCols to force the stride
    /// to equal the number of columns; by default it is set so that the stride
    /// in bytes is a multiple of 16.
    ///
    /// This function takes time proportional to the number of data elements.
    void Resize(const MatrixIndexT r,
                const MatrixIndexT c,
                MatrixResizeType resize_type = kSetZero,
                MatrixStrideType stride_type = kDefaultStride);

    /// Assignment operator that takes MatrixBase.
    Matrix<Real> &operator=(const MatrixBase<Real> &other) {
        if (MatrixBase<Real>::NumRows() != other.NumRows() ||
            MatrixBase<Real>::NumCols() != other.NumCols())
            Resize(other.NumRows(), other.NumCols(), kUndefined);
        MatrixBase<Real>::CopyFromMat(other);
        return *this;
    }

    /// Assignment operator. Needed for inclusion in std::vector.
    Matrix<Real> &operator=(const Matrix<Real> &other) {
        if (MatrixBase<Real>::NumRows() != other.NumRows() ||
            MatrixBase<Real>::NumCols() != other.NumCols())
            Resize(other.NumRows(), other.NumCols(), kUndefined);
        MatrixBase<Real>::CopyFromMat(other);
        return *this;
    }


  private:
    /// Deallocates memory and sets to empty matrix (dimension 0, 0).
    void Destroy();

    /// Init assumes the current class contents are invalid (i.e. junk or have
    /// already been freed), and it sets the matrix to newly allocated memory
    /// with
    /// the specified number of rows and columns.  r == c == 0 is acceptable.
    /// The data
    /// memory contents will be undefined.
    void Init(const MatrixIndexT r,
              const MatrixIndexT c,
              const MatrixStrideType stride_type);
};
/// @} end "addtogroup matrix_group"

/// \addtogroup matrix_funcs_io
/// @{

/// A structure containing the HTK header.
/// [TODO: change the style of the variables to Kaldi-compliant]

template <typename Real>
class SubMatrix : public MatrixBase<Real> {
  public:
    // Initialize a SubMatrix from part of a matrix; this is
    // a bit like A(b:c, d:e) in Matlab.
    // This initializer is against the proper semantics of "const", since
    // SubMatrix can change its contents.  It would be hard to implement
    // a "const-safe" version of this class.
    SubMatrix(const MatrixBase<Real> &T,
              const MatrixIndexT ro,  // row offset, 0 < ro < NumRows()
              const MatrixIndexT r,   // number of rows, r > 0
              const MatrixIndexT co,  // column offset, 0 < co < NumCols()
              const MatrixIndexT c);  // number of columns, c > 0

    // This initializer is mostly intended for use in CuMatrix and related
    // classes.  Be careful!
    SubMatrix(Real *data,
              MatrixIndexT num_rows,
              MatrixIndexT num_cols,
              MatrixIndexT stride);

    ~SubMatrix<Real>() {}

    /// This type of constructor is needed for Range() to work [in Matrix base
    /// class]. Cannot make it explicit.
    SubMatrix<Real>(const SubMatrix &other)
        : MatrixBase<Real>(
              other.data_, other.num_cols_, other.num_rows_, other.stride_) {}

  private:
    /// Disallow assignment.
    SubMatrix<Real> &operator=(const SubMatrix<Real> &other);
};

/// @} End of "addtogroup matrix_funcs_io".

/// \addtogroup matrix_funcs_scalar
/// @{

// Some declarations.  These are traces of products.

/************************
template<typename Real>
bool ApproxEqual(const MatrixBase<Real> &A,
                 const MatrixBase<Real> &B, Real tol = 0.01) {
  return A.ApproxEqual(B, tol);
}

template<typename Real>
inline void AssertEqual(const MatrixBase<Real> &A, const MatrixBase<Real> &B,
                        float tol = 0.01) {
  KALDI_ASSERT(A.ApproxEqual(B, tol));
}

/// Returns trace of matrix.
template <typename Real>
double TraceMat(const MatrixBase<Real> &A) { return A.Trace(); }


/// Returns tr(A B C)
template <typename Real>
Real TraceMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
                      const MatrixBase<Real> &B, MatrixTransposeType transB,
                      const MatrixBase<Real> &C, MatrixTransposeType transC);

/// Returns tr(A B C D)
template <typename Real>
Real TraceMatMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
                         const MatrixBase<Real> &B, MatrixTransposeType transB,
                         const MatrixBase<Real> &C, MatrixTransposeType transC,
                         const MatrixBase<Real> &D, MatrixTransposeType transD);

/// @} end "addtogroup matrix_funcs_scalar"


/// \addtogroup matrix_funcs_misc
/// @{


/// Function to ensure that SVD is sorted.  This function is made as generic as
/// possible, to be applicable to other types of problems.  s->Dim() should be
/// the same as U->NumCols(), and we sort s from greatest to least absolute
/// value (if sort_on_absolute_value == true) or greatest to least value
/// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it
/// exists, around in the same way.  Note: the "absolute value" part won't
matter
/// if this is an actual SVD, since singular values are non-negative.
template<typename Real> void SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
                                     MatrixBase<Real>* Vt = NULL,
                                     bool sort_on_absolute_value = true);

/// Creates the eigenvalue matrix D that is part of the decomposition used
Matrix::Eig.
/// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2
/// for complex pairs.  If a complex pair is lambda +- i*mu, D will have a
corresponding
/// 2x2 block [lambda, mu; -mu, lambda].
/// This function will throw if any complex eigenvalues are not in complex
conjugate
/// pairs (or the members of such pairs are not consecutively numbered).
template<typename Real>
void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real>
&imag,
                            MatrixBase<Real> *D);

/// The following function is used in Matrix::Power, and separately tested, so
we
/// declare it here mainly for the testing code to see.  It takes a complex
value to
/// a power using a method that will work for noninteger powers (but will fail
if the
/// complex value is real and negative).
template<typename Real>
bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);

**********/

/// @} end of addtogroup matrix_funcs_misc

/// \addtogroup matrix_funcs_io
/// @{
template <typename Real>
std::ostream &operator<<(std::ostream &Out, const MatrixBase<Real> &M);

template <typename Real>
std::istream &operator>>(std::istream &In, MatrixBase<Real> &M);

// The Matrix read allows resizing, so we override the MatrixBase one.
template <typename Real>
std::istream &operator>>(std::istream &In, Matrix<Real> &M);

template <typename Real>
bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
    return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols());
}

/// @} end of \addtogroup matrix_funcs_io


}  // namespace kaldi


// we need to include the implementation and some
// template specializations.
#include "matrix/kaldi-matrix-inl.h"


#endif  // KALDI_MATRIX_KALDI_MATRIX_H_


================================================
FILE: runtime/engine/common/matrix/kaldi-vector-inl.h
================================================
// matrix/kaldi-vector-inl.h

// Copyright 2009-2011   Ondrej Glembek;  Microsoft Corporation;
//                       Haihua Xu

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

// This is an internal header file, included by other library headers.
// You should not attempt to use it directly.

#ifndef KALDI_MATRIX_KALDI_VECTOR_INL_H_
#define KALDI_MATRIX_KALDI_VECTOR_INL_H_ 1

namespace kaldi {

template <typename Real>
std::ostream &operator<<(std::ostream &os, const VectorBase<Real> &rv) {
    rv.Write(os, false);
    return os;
}

template <typename Real>
std::istream &operator>>(std::istream &is, VectorBase<Real> &rv) {
    rv.Read(is, false);
    return is;
}

template <typename Real>
std::istream &operator>>(std::istream &is, Vector<Real> &rv) {
    rv.Read(is, false);
    return is;
}

// template<>
// template<>
// void VectorBase<float>::AddVec(const float alpha, const VectorBase<float>
// &rv);

// template<>
// template<>
// void VectorBase<double>::AddVec<double>(const double alpha,
// const VectorBase<double> &rv);

}  // namespace kaldi

#endif  // KALDI_MATRIX_KALDI_VECTOR_INL_H_


================================================
FILE: runtime/engine/common/matrix/kaldi-vector.cc
================================================
// matrix/kaldi-vector.cc

// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;
//                      Saarland University;   Go Vivace Inc.;  Ariya Rastrow;
//                      Petr Schwarz;  Yanmin Qian;  Jan Silovsky;
//                      Haihua Xu; Wei Shi
//                2015  Guoguo Chen
//                2017  Daniel Galvez
//                2019  Yiwen Shao

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "matrix/kaldi-vector.h"

#include <algorithm>
#include <string>

#include "matrix/kaldi-matrix.h"

namespace kaldi {

template <typename Real>
inline void Vector<Real>::Init(const MatrixIndexT dim) {
    KALDI_ASSERT(dim >= 0);
    if (dim == 0) {
        this->dim_ = 0;
        this->data_ = NULL;
        return;
    }
    MatrixIndexT size;
    void *data;
    void *free_data;

    size = dim * sizeof(Real);

    if ((data = KALDI_MEMALIGN(16, size, &free_data)) != NULL) {
        this->data_ = static_cast<Real *>(data);
        this->dim_ = dim;
    } else {
        throw std::bad_alloc();
    }
}


template <typename Real>
void Vector<Real>::Resize(const MatrixIndexT dim,
                          MatrixResizeType resize_type) {
    // the next block uses recursion to handle what we have to do if
    // resize_type == kCopyData.
    if (resize_type == kCopyData) {
        if (this->data_ == NULL || dim == 0)
            resize_type = kSetZero;  // nothing to copy.
        else if (this->dim_ == dim) {
            return;
        }  // nothing to do.
        else {
            // set tmp to a vector of the desired size.
            Vector<Real> tmp(dim, kUndefined);
            if (dim > this->dim_) {
                memcpy(tmp.data_, this->data_, sizeof(Real) * this->dim_);
                memset(tmp.data_ + this->dim_,
                       0,
                       sizeof(Real) * (dim - this->dim_));
            } else {
                memcpy(tmp.data_, this->data_, sizeof(Real) * dim);
            }
            tmp.Swap(this);
            // and now let tmp go out of scope, deleting what was in *this.
            return;
        }
    }
    // At this point, resize_type == kSetZero or kUndefined.

    if (this->data_ != NULL) {
        if (this->dim_ == dim) {
            if (resize_type == kSetZero) this->SetZero();
            return;
        } else {
            Destroy();
        }
    }
    Init(dim);
    if (resize_type == kSetZero) this->SetZero();
}


/// Copy data from another vector
template <typename Real>
void VectorBase<Real>::CopyFromVec(const VectorBase<Real> &v) {
    KALDI_ASSERT(Dim() == v.Dim());
    if (data_ != v.data_) {
        std::memcpy(this->data_, v.data_, dim_ * sizeof(Real));
    }
}

/*
template<typename Real>
template<typename OtherReal>
void VectorBase<Real>::CopyFromPacked(const PackedMatrix<OtherReal>& M) {
  SubVector<OtherReal> v(M);
  this->CopyFromVec(v);
}
// instantiate the template.
template void VectorBase<float>::CopyFromPacked(const PackedMatrix<double>
&other);
template void VectorBase<float>::CopyFromPacked(const PackedMatrix<float>
&other);
template void VectorBase<double>::CopyFromPacked(const PackedMatrix<double>
&other);
template void VectorBase<double>::CopyFromPacked(const PackedMatrix<float>
&other);

/// Load data into the vector
template<typename Real>
void VectorBase<Real>::CopyFromPtr(const Real *data, MatrixIndexT sz) {
  KALDI_ASSERT(dim_ == sz);
  std::memcpy(this->data_, data, Dim() * sizeof(Real));
}*/

template <typename Real>
template <typename OtherReal>
void VectorBase<Real>::CopyFromVec(const VectorBase<OtherReal> &other) {
    KALDI_ASSERT(dim_ == other.Dim());
    Real *__restrict__ ptr = data_;
    const OtherReal *__restrict__ other_ptr = other.Data();
    for (MatrixIndexT i = 0; i < dim_; i++) ptr[i] = other_ptr[i];
}

template void VectorBase<float>::CopyFromVec(const VectorBase<double> &other);
template void VectorBase<double>::CopyFromVec(const VectorBase<float> &other);

// Remove element from the vector. The vector is not reallocated
template <typename Real>
void Vector<Real>::RemoveElement(MatrixIndexT i) {
    KALDI_ASSERT(i < this->dim_ && "Access out of vector");
    for (MatrixIndexT j = i + 1; j < this->dim_; j++)
        this->data_[j - 1] = this->data_[j];
    this->dim_--;
}


/// Deallocates memory and sets object to empty vector.
template <typename Real>
void Vector<Real>::Destroy() {
    /// we need to free the data block if it was defined
    if (this->data_ != NULL) KALDI_MEMALIGN_FREE(this->data_);
    this->data_ = NULL;
    this->dim_ = 0;
}

template <typename Real>
void VectorBase<Real>::SetZero() {
    std::memset(data_, 0, dim_ * sizeof(Real));
}

template <typename Real>
bool VectorBase<Real>::IsZero(Real cutoff) const {
    Real abs_max = 0.0;
    for (MatrixIndexT i = 0; i < Dim(); i++)
        abs_max = std::max(std::abs(data_[i]), abs_max);
    return (abs_max <= cutoff);
}

/*
template<typename Real>
void VectorBase<Real>::SetRandn() {
  kaldi::RandomState rstate;
  MatrixIndexT last = (Dim() % 2 == 1) ? Dim() - 1 : Dim();
  for (MatrixIndexT i = 0; i < last; i += 2) {
    kaldi::RandGauss2(data_ + i, data_ + i + 1, &rstate);
  }
  if (Dim() != last) data_[last] = static_cast<Real>(kaldi::RandGauss(&rstate));
}

template<typename Real>
void VectorBase<Real>::SetRandUniform() {
  kaldi::RandomState rstate;
  for (MatrixIndexT i = 0; i < Dim(); i++) {
    *(data_+i) = RandUniform(&rstate);
  }
}

template<typename Real>
MatrixIndexT VectorBase<Real>::RandCategorical() const {
  kaldi::RandomState rstate;
  Real sum = this->Sum();
  KALDI_ASSERT(this->Min() >= 0.0 && sum > 0.0);
  Real r = RandUniform(&rstate) * sum;
  Real *data = this->data_;
  MatrixIndexT dim = this->dim_;
  Real running_sum = 0.0;
  for (MatrixIndexT i = 0; i < dim; i++) {
    running_sum += data[i];
    if (r < running_sum) return i;
  }
  return dim_ - 1; // Should only happen if RandUniform()
                   // returns exactly 1, or due to roundoff.
}*/

template <typename Real>
void VectorBase<Real>::Set(Real f) {
    // Why not use memset here?
    // The basic unit of memset is a byte.
    // If f != 0 and sizeof(Real) > 1, then we cannot use memset.
    if (f == 0) {
        this->SetZero();  // calls std::memset
    } else {
        for (MatrixIndexT i = 0; i < dim_; i++) {
            data_[i] = f;
        }
    }
}

template <typename Real>
void VectorBase<Real>::CopyRowsFromMat(const MatrixBase<Real> &mat) {
    KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows());

    Real *inc_data = data_;
    const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows();

    if (mat.Stride() == mat.NumCols()) {
        memcpy(inc_data, mat.Data(), cols * rows * sizeof(Real));
    } else {
        for (MatrixIndexT i = 0; i < rows; i++) {
            // copy the data to the proper position
            memcpy(inc_data, mat.RowData(i), cols * sizeof(Real));
            // set new copy position
            inc_data += cols;
        }
    }
}

template <typename Real>
template <typename OtherReal>
void VectorBase<Real>::CopyRowsFromMat(const MatrixBase<OtherReal> &mat) {
    KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows());
    Real *vec_data = data_;
    const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows();

    for (MatrixIndexT i = 0; i < rows; i++) {
        const OtherReal *mat_row = mat.RowData(i);
        for (MatrixIndexT j = 0; j < cols; j++) {
            vec_data[j] = static_cast<Real>(mat_row[j]);
        }
        vec_data += cols;
    }
}

template void VectorBase<float>::CopyRowsFromMat(const MatrixBase<double> &mat);
template void VectorBase<double>::CopyRowsFromMat(const MatrixBase<float> &mat);


template <typename Real>
void VectorBase<Real>::CopyColsFromMat(const MatrixBase<Real> &mat) {
    KALDI_ASSERT(dim_ == mat.NumCols() * mat.NumRows());

    Real *inc_data = data_;
    const MatrixIndexT cols = mat.NumCols(), rows = mat.NumRows(),
                       stride = mat.Stride();
    const Real *mat_inc_data = mat.Data();

    for (MatrixIndexT i = 0; i < cols; i++) {
        for (MatrixIndexT j = 0; j < rows; j++) {
            inc_data[j] = mat_inc_data[j * stride];
        }
        mat_inc_data++;
        inc_data += rows;
    }
}

template <typename Real>
void VectorBase<Real>::CopyRowFromMat(const MatrixBase<Real> &mat,
                                      MatrixIndexT row) {
    KALDI_ASSERT(row < mat.NumRows());
    KALDI_ASSERT(dim_ == mat.NumCols());
    const Real *mat_row = mat.RowData(row);
    memcpy(data_, mat_row, sizeof(Real) * dim_);
}

template <typename Real>
template <typename OtherReal>
void VectorBase<Real>::CopyRowFromMat(const MatrixBase<OtherReal> &mat,
                                      MatrixIndexT row) {
    KALDI_ASSERT(row < mat.NumRows());
    KALDI_ASSERT(dim_ == mat.NumCols());
    const OtherReal *mat_row = mat.RowData(row);
    for (MatrixIndexT i = 0; i < dim_; i++)
        data_[i] = static_cast<Real>(mat_row[i]);
}

template void VectorBase<float>::CopyRowFromMat(const MatrixBase<double> &mat,
                                                MatrixIndexT row);
template void VectorBase<double>::CopyRowFromMat(const MatrixBase<float> &mat,
                                                 MatrixIndexT row);

/*
template<typename Real>
template<typename OtherReal>
void VectorBase<Real>::CopyRowFromSp(const SpMatrix<OtherReal> &sp, MatrixIndexT
row) {
  KALDI_ASSERT(row < sp.NumRows());
  KALDI_ASSERT(dim_ == sp.NumCols());

  const OtherReal *sp_data = sp.Data();

  sp_data += (row*(row+1)) / 2; // takes us to beginning of this row.
  MatrixIndexT i;
  for (i = 0; i < row; i++) // copy consecutive elements.
    data_[i] = static_cast<Real>(*(sp_data++));
  for(; i < dim_; ++i, sp_data += i)
    data_[i] = static_cast<Real>(*sp_data);
}

template
void VectorBase<float>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT
row);
template
void VectorBase<double>::CopyRowFromSp(const SpMatrix<float> &mat, MatrixIndexT
row);
template
void VectorBase<float>::CopyRowFromSp(const SpMatrix<float> &mat, MatrixIndexT
row);
template
void VectorBase<double>::CopyRowFromSp(const SpMatrix<double> &mat, MatrixIndexT
row);

// takes absolute value of the elements to a power.
// Throws exception if could not (but only for power != 1 and power != 2).
template<typename Real>
void VectorBase<Real>::ApplyPowAbs(Real power, bool include_sign) {
  if (power == 1.0)
    for (MatrixIndexT i = 0; i < dim_; i++)
      data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * std::abs(data_[i]);
  if (power == 2.0) {
    for (MatrixIndexT i = 0; i < dim_; i++)
      data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) * data_[i] * data_[i];
  } else if (power == 0.5) {
    for (MatrixIndexT i = 0; i < dim_; i++) {
      data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) *
std::sqrt(std::abs(data_[i]));
    }
  } else if (power < 0.0) {
    for (MatrixIndexT i = 0; i < dim_; i++) {
      data_[i] = (data_[i] == 0.0 ? 0.0 : pow(std::abs(data_[i]), power));
      data_[i] *= (include_sign && data_[i] < 0 ? -1 : 1);
      if (data_[i] == HUGE_VAL) {  // HUGE_VAL is what errno returns on error.
        KALDI_ERR << "Could not raise element "  << i << "to power "
                  << power << ": returned value = " << data_[i];
      }
    }
  } else {
    for (MatrixIndexT i = 0; i < dim_; i++) {
      data_[i] = (include_sign && data_[i] < 0 ? -1 : 1) *
pow(std::abs(data_[i]), power);
      if (data_[i] == HUGE_VAL) {  // HUGE_VAL is what errno returns on error.
        KALDI_ERR << "Could not raise element "  << i << "to power "
                  << power << ": returned value = " << data_[i];
      }
    }
  }
}

// Computes the p-th norm. Throws exception if could not.
template<typename Real>
Real VectorBase<Real>::Norm(Real p) const {
  KALDI_ASSERT(p >= 0.0);
  Real sum = 0.0;
  if (p == 0.0) {
    for (MatrixIndexT i = 0; i < dim_; i++)
      if (data_[i] != 0.0) sum += 1.0;
    return sum;
  } else if (p == 1.0) {
    for (MatrixIndexT i = 0; i < dim_; i++)
      sum += std::abs(data_[i]);
    return sum;
  } else if (p == 2.0) {
    for (MatrixIndexT i = 0; i < dim_; i++)
      sum += data_[i] * data_[i];
    return std::sqrt(sum);
  } else if (p == std::numeric_limits<Real>::infinity()){
    for (MatrixIndexT i = 0; i < dim_; i++)
      sum = std::max(sum, std::abs(data_[i]));
    return sum;
  } else {
    Real tmp;
    bool ok = true;
    for (MatrixIndexT i = 0; i < dim_; i++) {
      tmp = pow(std::abs(data_[i]), p);
      if (tmp == HUGE_VAL) // HUGE_VAL is what pow returns on error.
        ok = false;
      sum += tmp;
    }
    tmp = pow(sum, static_cast<Real>(1.0/p));
    KALDI_ASSERT(tmp != HUGE_VAL); // should not happen here.
    if (ok) {
      return tmp;
    } else {
      Real maximum = this->Max(), minimum = this->Min(),
          max_abs = std::max(maximum, -minimum);
      KALDI_ASSERT(max_abs > 0); // Or should not have reached here.
      Vector<Real> tmp(*this);
      tmp.Scale(1.0 / max_abs);
      return tmp.Norm(p) * max_abs;
    }
  }
}

template<typename Real>
bool VectorBase<Real>::ApproxEqual(const VectorBase<Real> &other, float tol)
const {
  if (dim_ != other.dim_) KALDI_ERR << "ApproxEqual: size mismatch "
                                    << dim_ << " vs. " << other.dim_;
  KALDI_ASSERT(tol >= 0.0);
  if (tol != 0.0) {
    Vector<Real> tmp(*this);
    tmp.AddVec(-1.0, other);
    return (tmp.Norm(2.0) <= static_cast<Real>(tol) * this->Norm(2.0));
  } else { // Test for exact equality.
    const Real *data = data_;
    const Real *other_data = other.data_;
    for (MatrixIndexT dim = dim_, i = 0; i < dim; i++)
      if (data[i] != other_data[i]) return false;
    return true;
  }
}

template<typename Real>
Real VectorBase<Real>::Max() const {
  Real ans = - std::numeric_limits<Real>::infinity();
  const Real *data = data_;
  MatrixIndexT i, dim = dim_;
  for (i = 0; i + 4 <= dim; i += 4) {
    Real a1 = data[i], a2 = data[i+1], a3 = data[i+2], a4 = data[i+3];
    if (a1 > ans || a2 > ans || a3 > ans || a4 > ans) {
      Real b1 = (a1 > a2 ? a1 : a2), b2 = (a3 > a4 ? a3 : a4);
      if (b1 > ans) ans = b1;
      if (b2 > ans) ans = b2;
    }
  }
  for (; i < dim; i++)
    if (data[i] > ans) ans = data[i];
  return ans;
}

template<typename Real>
Real VectorBase<Real>::Max(MatrixIndexT *index_out) const {
  if (dim_ == 0) KALDI_ERR << "Empty vector";
  Real ans = - std::numeric_limits<Real>::infinity();
  MatrixIndexT index = 0;
  const Real *data = data_;
  MatrixIndexT i, dim = dim_;
  for (i = 0; i + 4 <= dim; i += 4) {
    Real a1 = data[i], a2 = data[i+1], a3 = data[i+2], a4 = data[i+3];
    if (a1 > ans || a2 > ans || a3 > ans || a4 > ans) {
      if (a1 > ans) { ans = a1; index = i; }
      if (a2 > ans) { ans = a2; index = i + 1; }
      if (a3 > ans) { ans = a3; index = i + 2; }
      if (a4 > ans) { ans = a4; index = i + 3; }
    }
  }
  for (; i < dim; i++)
    if (data[i] > ans) { ans = data[i]; index = i; }
  *index_out = index;
  return ans;
}

template<typename Real>
Real VectorBase<Real>::Min() const {
  Real ans = std::numeric_limits<Real>::infinity();
  const Real *data = data_;
  MatrixIndexT i, dim = dim_;
  for (i = 0; i + 4 <= dim; i += 4) {
    Real a1 = data[i], a2 = data[i+1], a3 = data[i+2], a4 = data[i+3];
    if (a1 < ans || a2 < ans || a3 < ans || a4 < ans) {
      Real b1 = (a1 < a2 ? a1 : a2), b2 = (a3 < a4 ? a3 : a4);
      if (b1 < ans) ans = b1;
      if (b2 < ans) ans = b2;
    }
  }
  for (; i < dim; i++)
    if (data[i] < ans) ans = data[i];
  return ans;
}

template<typename Real>
Real VectorBase<Real>::Min(MatrixIndexT *index_out) const {
  if (dim_ == 0) KALDI_ERR << "Empty vector";
  Real ans = std::numeric_limits<Real>::infinity();
  MatrixIndexT index = 0;
  const Real *data = data_;
  MatrixIndexT i, dim = dim_;
  for (i = 0; i + 4 <= dim; i += 4) {
    Real a1 = data[i], a2 = data[i+1], a3 = data[i+2], a4 = data[i+3];
    if (a1 < ans || a2 < ans || a3 < ans || a4 < ans) {
      if (a1 < ans) { ans = a1; index = i; }
      if (a2 < ans) { ans = a2; index = i + 1; }
      if (a3 < ans) { ans = a3; index = i + 2; }
      if (a4 < ans) { ans = a4; index = i + 3; }
    }
  }
  for (; i < dim; i++)
    if (data[i] < ans) { ans = data[i]; index = i; }
  *index_out = index;
  return ans;
}*/


template <typename Real>
template <typename OtherReal>
void VectorBase<Real>::CopyColFromMat(const MatrixBase<OtherReal> &mat,
                                      MatrixIndexT col) {
    KALDI_ASSERT(col < mat.NumCols());
    KALDI_ASSERT(dim_ == mat.NumRows());
    for (MatrixIndexT i = 0; i < dim_; i++) data_[i] = mat(i, col);
    // can't do this very efficiently so don't really bother. could improve this
    // though.
}
// instantiate the template above.
template void VectorBase<float>::CopyColFromMat(const MatrixBase<float> &mat,
                                                MatrixIndexT col);
template void VectorBase<float>::CopyColFromMat(const MatrixBase<double> &mat,
                                                MatrixIndexT col);
template void VectorBase<double>::CopyColFromMat(const MatrixBase<float> &mat,
                                                 MatrixIndexT col);
template void VectorBase<double>::CopyColFromMat(const MatrixBase<double> &mat,
                                                 MatrixIndexT col);

// template<typename Real>
// void VectorBase<Real>::CopyDiagFromMat(const MatrixBase<Real> &M) {
// KALDI_ASSERT(dim_ == std::min(M.NumRows(), M.NumCols()));
// cblas_Xcopy(dim_, M.Data(), M.Stride() + 1, data_, 1);
//}

// template<typename Real>
// void VectorBase<Real>::CopyDiagFromPacked(const PackedMatrix<Real> &M) {
// KALDI_ASSERT(dim_ == M.NumCols());
// for (MatrixIndexT i = 0; i < dim_; i++)
// data_[i] = M(i, i);
//// could make this more efficient.
//}

// template<typename Real>
// Real VectorBase<Real>::Sum() const {
//// Do a dot-product with a size-1 array with a stride of 0 to
//// implement sum. This allows us to access SIMD operations in a
//// cross-platform way via your BLAS library.
// Real one(1);
// return cblas_Xdot(dim_, data_, 1, &one, 0);
//}

// template<typename Real>
// Real VectorBase<Real>::SumLog() const {
// double sum_log = 0.0;
// double prod = 1.0;
// for (MatrixIndexT i = 0; i < dim_; i++) {
// prod *= data_[i];
//// Possible future work (arnab): change these magic values to pre-defined
//// constants
// if (prod < 1.0e-10 || prod > 1.0e+10) {
// sum_log += Log(prod);
// prod = 1.0;
//}
//}
// if (prod != 1.0) sum_log += Log(prod);
// return sum_log;
//}

// template<typename Real>
// void VectorBase<Real>::AddRowSumMat(Real alpha, const MatrixBase<Real> &M,
// Real beta) {
// KALDI_ASSERT(dim_ == M.NumCols());
// MatrixIndexT num_rows = M.NumRows(), stride = M.Stride(), dim = dim_;
// Real *data = data_;

//// implement the function according to a dimension cutoff for computation
/// efficiency
// if (num_rows <= 64) {
// cblas_Xscal(dim, beta, data, 1);
// const Real *m_data = M.Data();
// for (MatrixIndexT i = 0; i < num_rows; i++, m_data += stride)
// cblas_Xaxpy(dim, alpha, m_data, 1, data, 1);

//} else {
// Vector<Real> ones(M.NumRows());
// ones.Set(1.0);
// this->AddMatVec(alpha, M, kTrans, ones, beta);
//}
//}

// template<typename Real>
// void VectorBase<Real>::AddColSumMat(Real alpha, const MatrixBase<Real> &M,
// Real beta) {
// KALDI_ASSERT(dim_ == M.NumRows());
// MatrixIndexT num_cols = M.NumCols();

//// implement the function according to a dimension cutoff for computation
/// efficiency
// if (num_cols <= 64) {
// for (MatrixIndexT i = 0; i < dim_; i++) {
// double sum = 0.0;
// const Real *src = M.RowData(i);
// for (MatrixIndexT j = 0; j < num_cols; j++)
// sum += src[j];
// data_[i] = alpha * sum + beta * data_[i];
//}
//} else {
// Vector<Real> ones(M.NumCols());
// ones.Set(1.0);
// this->AddMatVec(alpha, M, kNoTrans, ones, beta);
//}
//}

// template<typename Real>
// Real VectorBase<Real>::LogSumExp(Real prune) const {
// Real sum;
// if (sizeof(sum) == 8) sum = kLogZeroDouble;
// else sum = kLogZeroFloat;
// Real max_elem = Max(), cutoff;
// if (sizeof(Real) == 4) cutoff = max_elem + kMinLogDiffFloat;
// else cutoff = max_elem + kMinLogDiffDouble;
// if (prune > 0.0 && max_elem - prune > cutoff) // explicit pruning...
// cutoff = max_elem - prune;

// double sum_relto_max_elem = 0.0;

// for (MatrixIndexT i = 0; i < dim_; i++) {
// BaseFloat f = data_[i];
// if (f >= cutoff)
// sum_relto_max_elem += Exp(f - max_elem);
//}
// return max_elem + Log(sum_relto_max_elem);
//}

// template<typename Real>
// void VectorBase<Real>::InvertElements() {
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] = static_cast<Real>(1 / data_[i]);
//}
//}

// template<typename Real>
// void VectorBase<Real>::ApplyLog() {
// for (MatrixIndexT i = 0; i < dim_; i++) {
// if (data_[i] < 0.0)
// KALDI_ERR << "Trying to take log of a negative number.";
// data_[i] = Log(data_[i]);
//}
//}

// template<typename Real>
// void VectorBase<Real>::ApplyLogAndCopy(const VectorBase<Real> &v) {
// KALDI_ASSERT(dim_ == v.Dim());
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] = Log(v(i));
//}
//}

// template<typename Real>
// void VectorBase<Real>::ApplyExp() {
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] = Exp(data_[i]);
//}
//}

// template<typename Real>
// void VectorBase<Real>::ApplyAbs() {
// for (MatrixIndexT i = 0; i < dim_; i++) { data_[i] = std::abs(data_[i]); }
//}

// template<typename Real>
// void VectorBase<Real>::Floor(const VectorBase<Real> &v, Real floor_val,
// MatrixIndexT *floored_count) {
// KALDI_ASSERT(dim_ == v.dim_);
// if (floored_count == nullptr) {
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] = std::max(v.data_[i], floor_val);
//}
//} else {
// MatrixIndexT num_floored = 0;
// for (MatrixIndexT i = 0; i < dim_; i++) {
// if (v.data_[i] < floor_val) {
// data_[i] = floor_val;
// num_floored++;
//} else {
// data_[i] = v.data_[i];
//}
//}
//*floored_count = num_floored;
//}
//}

// template<typename Real>
// void VectorBase<Real>::Ceiling(const VectorBase<Real> &v, Real ceil_val,
// MatrixIndexT *ceiled_count) {
// KALDI_ASSERT(dim_ == v.dim_);
// if (ceiled_count == nullptr) {
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] = std::min(v.data_[i], ceil_val);
//}
//} else {
// MatrixIndexT num_changed = 0;
// for (MatrixIndexT i = 0; i < dim_; i++) {
// if (v.data_[i] > ceil_val) {
// data_[i] = ceil_val;
// num_changed++;
//} else {
// data_[i] = v.data_[i];
//}
//}
//*ceiled_count = num_changed;
//}
//}

// template<typename Real>
// MatrixIndexT VectorBase<Real>::ApplyFloor(const VectorBase<Real> &floor_vec)
// {
// KALDI_ASSERT(floor_vec.Dim() == dim_);
// MatrixIndexT num_floored = 0;
// for (MatrixIndexT i = 0; i < dim_; i++) {
// if (data_[i] < floor_vec(i)) {
// data_[i] = floor_vec(i);
// num_floored++;
//}
//}
// return num_floored;
//}

// template<typename Real>
// Real VectorBase<Real>::ApplySoftMax() {
// Real max = this->Max(), sum = 0.0;
// for (MatrixIndexT i = 0; i < dim_; i++) {
// sum += (data_[i] = Exp(data_[i] - max));
//}
// this->Scale(1.0 / sum);
// return max + Log(sum);
//}

// template<typename Real>
// Real VectorBase<Real>::ApplyLogSoftMax() {
// Real max = this->Max(), sum = 0.0;
// for (MatrixIndexT i = 0; i < dim_; i++) {
// sum += Exp((data_[i] -= max));
//}
// sum = Log(sum);
// this->Add(-1.0 * sum);
// return max + sum;
//}

//#ifdef HAVE_MKL
// template<>
// void VectorBase<float>::Tanh(const VectorBase<float> &src) {
// KALDI_ASSERT(dim_ == src.dim_);
// vsTanh(dim_, src.data_, data_);
//}
// template<>
// void VectorBase<double>::Tanh(const VectorBase<double> &src) {
// KALDI_ASSERT(dim_ == src.dim_);
// vdTanh(dim_, src.data_, data_);
//}
//#else
// template<typename Real>
// void VectorBase<Real>::Tanh(const VectorBase<Real> &src) {
// KALDI_ASSERT(dim_ == src.dim_);
// for (MatrixIndexT i = 0; i < dim_; i++) {
// Real x = src.data_[i];
// if (x > 0.0) {
// Real inv_expx = Exp(-x);
// x = -1.0 + 2.0 / (1.0 + inv_expx * inv_expx);
//} else {
// Real expx = Exp(x);
// x = 1.0 - 2.0 / (1.0 + expx * expx);
//}
// data_[i] = x;
//}
//}
//#endif

//#ifdef HAVE_MKL
//// Implementing sigmoid based on tanh.
// template<>
// void VectorBase<float>::Sigmoid(const VectorBase<float> &src) {
// KALDI_ASSERT(dim_ == src.dim_);
// this->CopyFromVec(src);
// this->Scale(0.5);
// vsTanh(dim_, data_, data_);
// this->Add(1.0);
// this->Scale(0.5);
//}
// template<>
// void VectorBase<double>::Sigmoid(const VectorBase<double> &src) {
// KALDI_ASSERT(dim_ == src.dim_);
// this->CopyFromVec(src);
// this->Scale(0.5);
// vdTanh(dim_, data_, data_);
// this->Add(1.0);
// this->Scale(0.5);
//}
//#else
// template<typename Real>
// void VectorBase<Real>::Sigmoid(const VectorBase<Real> &src) {
// KALDI_ASSERT(dim_ == src.dim_);
// for (MatrixIndexT i = 0; i < dim_; i++) {
// Real x = src.data_[i];
//// We aim to avoid floating-point overflow here.
// if (x > 0.0) {
// x = 1.0 / (1.0 + Exp(-x));
//} else {
// Real ex = Exp(x);
// x = ex / (ex + 1.0);
//}
// data_[i] = x;
//}
//}
//#endif


// template<typename Real>
// void VectorBase<Real>::Add(Real c) {
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] += c;
//}
//}

// template<typename Real>
// void VectorBase<Real>::Scale(Real alpha) {
// cblas_Xscal(dim_, alpha, data_, 1);
//}

// template<typename Real>
// void VectorBase<Real>::MulElements(const VectorBase<Real> &v) {
// KALDI_ASSERT(dim_ == v.dim_);
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] *= v.data_[i];
//}
//}

// template<typename Real>  // Set each element to y = (x == orig ? changed :
// x).
// void VectorBase<Real>::ReplaceValue(Real orig, Real changed) {
// Real *data = data_;
// for (MatrixIndexT i = 0; i < dim_; i++)
// if (data[i] == orig) data[i] = changed;
//}


// template<typename Real>
// template<typename OtherReal>
// void VectorBase<Real>::MulElements(const VectorBase<OtherReal> &v) {
// KALDI_ASSERT(dim_ == v.Dim());
// const OtherReal *other_ptr = v.Data();
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] *= other_ptr[i];
//}
//}
//// instantiate template.
// template
// void VectorBase<float>::MulElements(const VectorBase<double> &v);
// template
// void VectorBase<double>::MulElements(const VectorBase<float> &v);


// template<typename Real>
// void VectorBase<Real>::AddVecVec(Real alpha, const VectorBase<Real> &v,
// const VectorBase<Real> &r, Real beta) {
// KALDI_ASSERT(v.data_ != this->data_ && r.data_ != this->data_);
//// We pretend that v is a band-diagonal matrix.
// KALDI_ASSERT(dim_ == v.dim_ && dim_ == r.dim_);
// cblas_Xgbmv(kNoTrans, dim_, dim_, 0, 0, alpha, v.data_, 1,
// r.data_, 1, beta, this->data_, 1);
//}


// template<typename Real>
// void VectorBase<Real>::DivElements(const VectorBase<Real> &v) {
// KALDI_ASSERT(dim_ == v.dim_);
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] /= v.data_[i];
//}
//}

// template<typename Real>
// template<typename OtherReal>
// void VectorBase<Real>::DivElements(const VectorBase<OtherReal> &v) {
// KALDI_ASSERT(dim_ == v.Dim());
// const OtherReal *other_ptr = v.Data();
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] /= other_ptr[i];
//}
//}
//// instantiate template.
// template
// void VectorBase<float>::DivElements(const VectorBase<double> &v);
// template
// void VectorBase<double>::DivElements(const VectorBase<float> &v);

// template<typename Real>
// void VectorBase<Real>::AddVecDivVec(Real alpha, const VectorBase<Real> &v,
// const VectorBase<Real> &rr, Real beta) {
// KALDI_ASSERT((dim_ == v.dim_ && dim_ == rr.dim_));
// for (MatrixIndexT i = 0; i < dim_; i++) {
// data_[i] = alpha * v.data_[i]/rr.data_[i] + beta * data_[i] ;
//}
//}

// template<typename Real>
// template<typename OtherReal>
// void VectorBase<Real>::AddVec(const Real alpha, const VectorBase<OtherReal>
// &v) {
// KALDI_ASSERT(dim_ == v.dim_);
//// remove __restrict__ if it causes compilation problems.
// Real *__restrict__ data = data_;
// OtherReal *__restrict__ other_data = v.data_;
// MatrixIndexT dim = dim_;
// if (alpha != 1.0)
// for (MatrixIndexT i = 0; i < dim; i++)
// data[i] += alpha * other_data[i];
// else
// for (MatrixIndexT i = 0; i < dim; i++)
// data[i] += other_data[i];
//}

// template
// void VectorBase<float>::AddVec(const float alpha, const VectorBase<double>
// &v);
// template
// void VectorBase<double>::AddVec(const double alpha, const VectorBase<float>
// &v);

// template<typename Real>
// template<typename OtherReal>
// void VectorBase<Real>::AddVec2(const Real alpha, const VectorBase<OtherReal>
// &v) {
// KALDI_ASSERT(dim_ == v.dim_);
//// remove __restrict__ if it causes compilation problems.
// Real *__restrict__ data = data_;
// OtherReal *__restrict__ other_data = v.data_;
// MatrixIndexT dim = dim_;
// if (alpha != 1.0)
// for (MatrixIndexT i = 0; i < dim; i++)
// data[i] += alpha * other_data[i] * other_data[i];
// else
// for (MatrixIndexT i = 0; i < dim; i++)
// data[i] += other_data[i] * other_data[i];
//}

// template
// void VectorBase<float>::AddVec2(const float alpha, const VectorBase<double>
// &v);
// template
// void VectorBase<double>::AddVec2(const double alpha, const VectorBase<float>
// &v);


template <typename Real>
void VectorBase<Real>::Read(std::istream &is, bool binary) {
    //  In order to avoid rewriting this, we just declare a Vector and
    // use it to read the data, then copy.
    Vector<Real> tmp;
    tmp.Read(is, binary);
    if (tmp.Dim() != Dim())
        KALDI_ERR << "VectorBase<Real>::Read, size mismatch " << Dim()
                  << " vs. " << tmp.Dim();
    CopyFromVec(tmp);
}


template <typename Real>
void Vector<Real>::Read(std::istream &is, bool binary) {
    std::ostringstream specific_error;
    MatrixIndexT pos_at_start = is.tellg();

    if (binary) {
        int peekval = Peek(is, binary);
        const char *my_token = (sizeof(Real) == 4 ? "FV" : "DV");
        char other_token_start = (sizeof(Real) == 4 ? 'D' : 'F');
        if (peekval == other_token_start) {  // need to instantiate the other
                                             // type to read it.
            typedef typename OtherReal<Real>::Real OtherType;  // if Real ==
                                                               // float,
                                                               // OtherType ==
                                                               // double, and
                                                               // vice versa.
            Vector<OtherType> other(this->Dim());
            other.Read(is, binary);  // add is false at this point.
            if (this->Dim() != other.Dim()) this->Resize(other.Dim());
            this->CopyFromVec(other);
            return;
        }
        std::string token;
        ReadToken(is, binary, &token);
        if (token != my_token) {
            if (token.length() > 20) token = token.substr(0, 17) + "...";
            specific_error << ": Expected token " << my_token << ", got "
                           << token;
            goto bad;
        }
        int32 size;
        ReadBasicType(is, binary, &size);  // throws on error.
        if ((MatrixIndexT)size != this->Dim()) this->Resize(size);
        if (size > 0)
            is.read(reinterpret_cast<char *>(this->data_), sizeof(Real) * size);
        if (is.fail()) {
            specific_error
                << "Error reading vector data (binary mode); truncated "
                   "stream? (size = "
                << size << ")";
            goto bad;
        }
        return;
    } else {  // Text mode reading; format is " [ 1.1 2.0 3.4 ]\n"
        std::string s;
        is >> s;
        // if ((s.compare("DV") == 0) || (s.compare("FV") == 0)) {  // Back
        // compatibility.
        //  is >> s;  // get dimension
        //  is >> s;  // get "["
        // }
        if (is.fail()) {
            specific_error << "EOF while trying to read vector.";
            goto bad;
        }
        if (s.compare("[]") == 0) {
            Resize(0);
            return;
        }  // tolerate this variant.
        if (s.compare("[")) {
            if (s.length() > 20) s = s.substr(0, 17) + "...";
            specific_error << "Expected \"[\" but got " << s;
            goto bad;
        }
        std::vector<Real> data;
        while (1) {
            int i = is.peek();
            if (i == '-' || (i >= '0' && i <= '9')) {  // common cases first.
                Real r;
                is >> r;
                if (is.fail()) {
                    specific_error << "Failed to read number.";
                    goto bad;
                }
                if (!std::isspace(is.peek()) && is.peek() != ']') {
                    specific_error << "Expected whitespace after number.";
                    goto bad;
                }
                data.push_back(r);
                // But don't eat whitespace... we want to check that it's not
                // newlines
                // which would be valid only for a matrix.
            } else if (i == ' ' || i == '\t') {
                is.get();
            } else if (i == ']') {
                is.get();  // eat the ']'
                this->Resize(data.size());
                for (size_t j = 0; j < data.size(); j++)
                    this->data_[j] = data[j];
                i = is.peek();
                if (static_cast<char>(i) == '\r') {
                    is.get();
                    is.get();  // get \r\n (must eat what we wrote)
                } else if (static_cast<char>(i) == '\n') {
                    is.get();
                }  // get \n (must eat what we wrote)
                if (is.fail()) {
                    KALDI_WARN << "After end of vector data, read error.";
                    // we got the data we needed, so just warn for this error.
                }
                return;  // success.
            } else if (i == -1) {
                specific_error << "EOF while reading vector data.";
                goto bad;
            } else if (i == '\n' || i == '\r') {
                specific_error << "Newline found while reading vector (maybe "
                                  "it's a matrix?)";
                goto bad;
            } else {
                is >> s;  // read string.
                if (!KALDI_STRCASECMP(s.c_str(), "inf") ||
                    !KALDI_STRCASECMP(s.c_str(), "infinity")) {
                    data.push_back(std::numeric_limits<Real>::infinity());
                    KALDI_WARN << "Reading infinite value into vector.";
                } else if (!KALDI_STRCASECMP(s.c_str(), "nan")) {
                    data.push_back(std::numeric_limits<Real>::quiet_NaN());
                    KALDI_WARN << "Reading NaN value into vector.";
                } else {
                    if (s.length() > 20) s = s.substr(0, 17) + "...";
                    specific_error << "Expecting numeric vector data, got "
                                   << s;
                    goto bad;
                }
            }
        }
    }
// we never reach this line (the while loop returns directly).
bad:
    KALDI_ERR << "Failed to read vector from stream.  " << specific_error.str()
              << " File position at start is " << pos_at_start << ", currently "
              << is.tellg();
}


template <typename Real>
void VectorBase<Real>::Write(std::ostream &os, bool binary) const {
    if (!os.good()) {
        KALDI_ERR << "Failed to write vector to stream: stream not good";
    }
    if (binary) {
        std::string my_token = (sizeof(Real) == 4 ? "FV" : "DV");
        WriteToken(os, binary, my_token);

        int32 size = Dim();  // make the size 32-bit on disk.
        KALDI_ASSERT(Dim() == (MatrixIndexT)size);
        WriteBasicType(os, binary, size);
        os.write(reinterpret_cast<const char *>(Data()), sizeof(Real) * size);
    } else {
        os << " [ ";
        for (MatrixIndexT i = 0; i < Dim(); i++) os << (*this)(i) << " ";
        os << "]\n";
    }
    if (!os.good()) KALDI_ERR << "Failed to write vector to stream";
}


// template<typename Real>
// void VectorBase<Real>::AddVec2(const Real alpha, const VectorBase<Real> &v) {
// KALDI_ASSERT(dim_ == v.dim_);
// for (MatrixIndexT i = 0; i < dim_; i++)
// data_[i] += alpha * v.data_[i] * v.data_[i];
//}

//// this <-- beta*this + alpha*M*v.
// template<typename Real>
// void VectorBase<Real>::AddTpVec(const Real alpha, const TpMatrix<Real> &M,
// const MatrixTransposeType trans,
// const VectorBase<Real> &v,
// const Real beta) {
// KALDI_ASSERT(dim_ == v.dim_ && dim_ == M.NumRows());
// if (beta == 0.0) {
// if (&v != this) CopyFromVec(v);
// MulTp(M, trans);
// if (alpha != 1.0) Scale(alpha);
//} else {
// Vector<Real> tmp(v);
// tmp.MulTp(M, trans);
// if (beta != 1.0) Scale(beta);  // *this <-- beta * *this
// AddVec(alpha, tmp);          // *this += alpha * M * v
//}
//}

// template<typename Real>
// Real VecMatVec(const VectorBase<Real> &v1, const MatrixBase<Real> &M,
// const VectorBase<Real> &v2) {
// KALDI_ASSERT(v1.Dim() == M.NumRows() && v2.Dim() == M.NumCols());
// Vector<Real> vtmp(M.NumRows());
// vtmp.AddMatVec(1.0, M, kNoTrans, v2, 0.0);
// return VecVec(v1, vtmp);
//}

// template
// float VecMatVec(const VectorBase<float> &v1, const MatrixBase<float> &M,
// const VectorBase<float> &v2);
// template
// double VecMatVec(const VectorBase<double> &v1, const MatrixBase<double> &M,
// const VectorBase<double> &v2);

template <typename Real>
void Vector<Real>::Swap(Vector<Real> *other) {
    std::swap(this->data_, other->data_);
    std::swap(this->dim_, other->dim_);
}


// template<typename Real>
// void VectorBase<Real>::AddDiagMat2(
// Real alpha, const MatrixBase<Real> &M,
// MatrixTransposeType trans, Real beta) {
// if (trans == kNoTrans) {
// KALDI_ASSERT(this->dim_ == M.NumRows());
// MatrixIndexT rows = this->dim_, cols = M.NumCols(),
// mat_stride = M.Stride();
// Real *data = this->data_;
// const Real *mat_data = M.Data();
// for (MatrixIndexT i = 0; i < rows; i++, mat_data += mat_stride, data++)
//*data = beta * *data + alpha * cblas_Xdot(cols,mat_data,1,mat_data,1);
//} else {
// KALDI_ASSERT(this->dim_ == M.NumCols());
// MatrixIndexT rows = M.NumRows(), cols = this->dim_,
// mat_stride = M.Stride();
// Real *data = this->data_;
// const Real *mat_data = M.Data();
// for (MatrixIndexT i = 0; i < cols; i++, mat_data++, data++)
//*data = beta * *data + alpha * cblas_Xdot(rows, mat_data, mat_stride,
// mat_data, mat_stride);
//}
//}

// template<typename Real>
// void VectorBase<Real>::AddDiagMatMat(
// Real alpha,
// const MatrixBase<Real> &M, MatrixTransposeType transM,
// const MatrixBase<Real> &N, MatrixTransposeType transN,
// Real beta) {
// MatrixIndexT dim = this->dim_,
// M_col_dim = (transM == kTrans ? M.NumRows() : M.NumCols()),
// N_row_dim = (transN == kTrans ? N.NumCols() : N.NumRows());
// KALDI_ASSERT(M_col_dim == N_row_dim); // this is the dimension we sum over
// MatrixIndexT M_row_stride = M.Stride(), M_col_stride = 1;
// if (transM == kTrans) std::swap(M_row_stride, M_col_stride);
// MatrixIndexT N_row_stride = N.Stride(), N_col_stride = 1;
// if (transN == kTrans) std::swap(N_row_stride, N_col_stride);

// Real *data = this->data_;
// const Real *Mdata = M.Data(), *Ndata = N.Data();
// for (MatrixIndexT i = 0; i < dim; i++, Mdata += M_row_stride, Ndata +=
// N_col_stride, data++) {
//*data = beta * *data + alpha * cblas_Xdot(M_col_dim, Mdata, M_col_stride,
// Ndata, N_row_stride);
//}
//}


template class Vector<float>;
template class Vector<double>;
template class VectorBase<float>;
template class VectorBase<double>;

}  // namespace kaldi


================================================
FILE: runtime/engine/common/matrix/kaldi-vector.h
================================================
// matrix/kaldi-vector.h

// Copyright 2009-2012   Ondrej Glembek;  Microsoft Corporation;  Lukas Burget;
//                       Saarland University (Author: Arnab Ghoshal);
//                       Ariya Rastrow;  Petr Schwarz;  Yanmin Qian;
//                       Karel Vesely;  Go Vivace Inc.;  Arnab Ghoshal
//                       Wei Shi;
//                2015   Guoguo Chen
//                2017   Daniel Galvez
//                2019   Yiwen Shao

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_MATRIX_KALDI_VECTOR_H_
#define KALDI_MATRIX_KALDI_VECTOR_H_ 1

#include "matrix/matrix-common.h"

namespace kaldi {

/// \addtogroup matrix_group
/// @{

///  Provides a vector abstraction class.
///  This class provides a way to work with vectors in kaldi.
///  It encapsulates basic operations and memory optimizations.
template <typename Real>
class VectorBase {
  public:
    /// Set vector to all zeros.
    void SetZero();

    /// Returns true if matrix is all zeros.
    bool IsZero(Real cutoff = 1.0e-06) const;  // replace magic number

    /// Set all members of a vector to a specified value.
    void Set(Real f);

    /// Returns the  dimension of the vector.
    inline MatrixIndexT Dim() const { return dim_; }

    /// Returns the size in memory of the vector, in bytes.
    inline MatrixIndexT SizeInBytes() const { return (dim_ * sizeof(Real)); }

    /// Returns a pointer to the start of the vector's data.
    inline Real *Data() { return data_; }

    /// Returns a pointer to the start of the vector's data (const).
    inline const Real *Data() const { return data_; }

    /// Indexing  operator (const).
    inline Real operator()(MatrixIndexT i) const {
        KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
                              static_cast<UnsignedMatrixIndexT>(dim_));
        return *(data_ + i);
    }

    /// Indexing operator (non-const).
    inline Real &operator()(MatrixIndexT i) {
        KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
                              static_cast<UnsignedMatrixIndexT>(dim_));
        return *(data_ + i);
    }

    /** @brief Returns a sub-vector of a vector (a range of elements).
     *  @param o [in] Origin, 0 < o < Dim()
     *  @param l [in] Length 0 < l < Dim()-o
     *  @return A SubVector object that aliases the data of the Vector object.
     *  See @c SubVector class for details   */
    SubVector<Real> Range(const MatrixIndexT o, const MatrixIndexT l) {
        return SubVector<Real>(*this, o, l);
    }

    /** @brief Returns a const sub-vector of a vector (a range of elements).
     *  @param o [in] Origin, 0 < o < Dim()
     *  @param l [in] Length 0 < l < Dim()-o
     *  @return A SubVector object that aliases the data of the Vector object.
     *  See @c SubVector class for details   */
    const SubVector<Real> Range(const MatrixIndexT o,
                                const MatrixIndexT l) const {
        return SubVector<Real>(*this, o, l);
    }

    /// Copy data from another vector (must match own size).
    void CopyFromVec(const VectorBase<Real> &v);

    /// Copy data from another vector of different type (double vs. float)
    template <typename OtherReal>
    void CopyFromVec(const VectorBase<OtherReal> &v);

    /// Performs a row stack of the matrix M
    void CopyRowsFromMat(const MatrixBase<Real> &M);
    template <typename OtherReal>
    void CopyRowsFromMat(const MatrixBase<OtherReal> &M);

    /// Performs a column stack of the matrix M
    void CopyColsFromMat(const MatrixBase<Real> &M);

    /// Extracts a row of the matrix M.  Could also do this with
    /// this->Copy(M[row]).
    void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
    /// Extracts a row of the matrix M with type conversion.
    template <typename OtherReal>
    void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);

    /// Extracts a column of the matrix M.
    template <typename OtherReal>
    void CopyColFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT col);

    /// Reads from C++ stream (option to add to existing contents).
    /// Throws exception on failure
    void Read(std::istream &in, bool binary);

    /// Writes to C++ stream (option to write in binary).
    void Write(std::ostream &Out, bool binary) const;

    friend class VectorBase<double>;
    friend class VectorBase<float>;

  protected:
    /// Destructor;  does not deallocate memory, this is handled by child
    /// classes.
    /// This destructor is protected so this object can only be
    /// deleted via a child.
    ~VectorBase() {}

    /// Empty initializer, corresponds to vector of zero size.
    explicit VectorBase() : data_(NULL), dim_(0) {
        KALDI_ASSERT_IS_FLOATING_TYPE(Real);
    }

    /// data memory area
    Real *data_;
    /// dimension of vector
    MatrixIndexT dim_;
    KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
};  // class VectorBase

/** @brief A class representing a vector.
 *
 *  This class provides a way to work with vectors in kaldi.
 *  It encapsulates basic operations and memory optimizations.  */
template <typename Real>
class Vector : public VectorBase<Real> {
  public:
    /// Constructor that takes no arguments.  Initializes to empty.
    Vector() : VectorBase<Real>() {}

    /// Constructor with specific size.  Sets to all-zero by default
    /// if set_zero == false, memory contents are undefined.
    explicit Vector(const MatrixIndexT s,
                    MatrixResizeType resize_type = kSetZero)
        : VectorBase<Real>() {
        Resize(s, resize_type);
    }

    /// Copy constructor from CUDA vector
    /// This is defined in ../cudamatrix/cu-vector.h
    // template<typename OtherReal>
    // explicit Vector(const CuVectorBase<OtherReal> &cu);

    /// Copy constructor.  The need for this is controversial.
    Vector(const Vector<Real> &v)
        : VectorBase<Real>() {  //  (cannot be explicit)
        Resize(v.Dim(), kUndefined);
        this->CopyFromVec(v);
    }

    /// Copy-constructor from base-class, needed to copy from SubVector.
    explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
        Resize(v.Dim(), kUndefined);
        this->CopyFromVec(v);
    }

    /// Type conversion constructor.
    template <typename OtherReal>
    explicit Vector(const VectorBase<OtherReal> &v) : VectorBase<Real>() {
        Resize(v.Dim(), kUndefined);
        this->CopyFromVec(v);
    }

    // Took this out since it is unsafe : Arnab
    //  /// Constructor from a pointer and a size; copies the data to a location
    //  /// it owns.
    //  Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
    //    Resize(s);
    //    CopyFromPtr(Data, s);
    //  }


    /// Swaps the contents of *this and *other.  Shallow swap.
    void Swap(Vector<Real> *other);

    /// Destructor.  Deallocates memory.
    ~Vector() { Destroy(); }

    /// Read function using C++ streams.  Can also add to existing contents
    /// of matrix.
    void Read(std::istream &in, bool binary);

    /// Set vector to a specified size (can be zero).
    /// The value of the new data depends on resize_type:
    ///   -if kSetZero, the new data will be zero
    ///   -if kUndefined, the new data will be undefined
    ///   -if kCopyData, the new data will be the same as the old data in any
    ///      shared positions, and zero elsewhere.
    /// This function takes time proportional to the number of data elements.
    void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero);

    /// Remove one element and shifts later elements down.
    void RemoveElement(MatrixIndexT i);

    /// Assignment operator.
    Vector<Real> &operator=(const Vector<Real> &other) {
        Resize(other.Dim(), kUndefined);
        this->CopyFromVec(other);
        return *this;
    }

    /// Assignment operator that takes VectorBase.
    Vector<Real> &operator=(const VectorBase<Real> &other) {
        Resize(other.Dim(), kUndefined);
        this->CopyFromVec(other);
        return *this;
    }

  private:
    /// Init assumes the current contents of the class are invalid (i.e. junk or
    /// has already been freed), and it sets the vector to newly allocated
    /// memory
    /// with the specified dimension.  dim == 0 is acceptable.  The memory
    /// contents
    /// pointed to by data_ will be undefined.
    void Init(const MatrixIndexT dim);

    /// Destroy function, called internally.
    void Destroy();
};


/// Represents a non-allocating general vector which can be defined
/// as a sub-vector of higher-level vector [or as the row of a matrix].
template <typename Real>
class SubVector : public VectorBase<Real> {
  public:
    /// Constructor from a Vector or SubVector.
    /// SubVectors are not const-safe and it's very hard to make them
    /// so for now we just give up.  This function contains const_cast.
    SubVector(const VectorBase<Real> &t,
              const MatrixIndexT origin,
              const MatrixIndexT length)
        : VectorBase<Real>() {
        // following assert equiv to origin>=0 && length>=0 &&
        // origin+length <= rt.dim_
        KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin) +
                         static_cast<UnsignedMatrixIndexT>(length) <=
                     static_cast<UnsignedMatrixIndexT>(t.Dim()));
        VectorBase<Real>::data_ = const_cast<Real *>(t.Data() + origin);
        VectorBase<Real>::dim_ = length;
    }

    /// This constructor initializes the vector to point at the contents
    /// of this packed matrix (SpMatrix or TpMatrix).
    // SubVector(const PackedMatrix<Real> &M) {
    // VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
    // VectorBase<Real>::dim_   = (M.NumRows()*(M.NumRows()+1))/2;
    //}

    /// Copy constructor
    SubVector(const SubVector &other) : VectorBase<Real>() {
        // this copy constructor needed for Range() to work in base class.
        VectorBase<Real>::data_ = other.data_;
        VectorBase<Real>::dim_ = other.dim_;
    }

    /// Constructor from a pointer to memory and a length.  Keeps a pointer
    /// to the data but does not take ownership (will never delete).
    /// Caution: this constructor enables you to evade const constraints.
    SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real>() {
        VectorBase<Real>::data_ = const_cast<Real *>(data);
        VectorBase<Real>::dim_ = length;
    }

    /// This operation does not preserve const-ness, so be careful.
    SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
        VectorBase<Real>::data_ = const_cast<Real *>(matrix.RowData(row));
        VectorBase<Real>::dim_ = matrix.NumCols();
    }

    ~SubVector() {}  ///< Destructor (does nothing; no pointers are owned here).

  private:
    /// Disallow assignment operator.
    SubVector &operator=(const SubVector &other) {}
};

/// @} end of "addtogroup matrix_group"
/// \addtogroup matrix_funcs_io
/// @{
/// Output to a C++ stream.  Non-binary by default (use Write for
/// binary output).
template <typename Real>
std::ostream &operator<<(std::ostream &out, const VectorBase<Real> &v);

/// Input from a C++ stream.  Will automatically read text or
/// binary data from the stream.
template <typename Real>
std::istream &operator>>(std::istream &in, VectorBase<Real> &v);

/// Input from a C++ stream. Will automatically read text or
/// binary data from the stream.
template <typename Real>
std::istream &operator>>(std::istream &in, Vector<Real> &v);
/// @} end of \addtogroup matrix_funcs_io

/// \addtogroup matrix_funcs_scalar
/// @{


// template<typename Real>
// bool ApproxEqual(const VectorBase<Real> &a,
// const VectorBase<Real> &b, Real tol = 0.01) {
// return a.ApproxEqual(b, tol);
//}

// template<typename Real>
// inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
// float tol = 0.01) {
// KALDI_ASSERT(a.ApproxEqual(b, tol));
//}


}  // namespace kaldi

// we need to include the implementation
#include "matrix/kaldi-vector-inl.h"


#endif  // KALDI_MATRIX_KALDI_VECTOR_H_


================================================
FILE: runtime/engine/common/matrix/matrix-common.h
================================================
// matrix/matrix-common.h

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_MATRIX_MATRIX_COMMON_H_
#define KALDI_MATRIX_MATRIX_COMMON_H_

// This file contains some #includes, forward declarations
// and typedefs that are needed by all the main header
// files in this directory.

#include "base/kaldi-common.h"

namespace kaldi {
// this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library
// we are writing them as literals because we don't want to include here
// matrix/kaldi-blas.h,
// which puts many symbols into global scope (like "real") via the header f2c.h
typedef enum {
    kTrans = 112,   // = CblasTrans
    kNoTrans = 111  // = CblasNoTrans
} MatrixTransposeType;

typedef enum { kSetZero, kUndefined, kCopyData } MatrixResizeType;


typedef enum {
    kDefaultStride,
    kStrideEqualNumCols,
} MatrixStrideType;

typedef enum {
    kTakeLower,
    kTakeUpper,
    kTakeMean,
    kTakeMeanAndCheck
} SpCopyType;

template <typename Real>
class VectorBase;
template <typename Real>
class Vector;
template <typename Real>
class SubVector;
template <typename Real>
class MatrixBase;
template <typename Real>
class SubMatrix;
template <typename Real>
class Matrix;


/// This class provides a way for switching between double and float types.
template <typename T>
class OtherReal {};  // useful in reading+writing routines
                     // to switch double and float.
/// A specialized class for switching from float to double.
template <>
class OtherReal<float> {
  public:
    typedef double Real;
};
/// A specialized class for switching from double to float.
template <>
class OtherReal<double> {
  public:
    typedef float Real;
};


typedef int32 MatrixIndexT;
typedef int32 SignedMatrixIndexT;
typedef uint32 UnsignedMatrixIndexT;

// If you want to use size_t for the index type, do as follows instead:
// typedef size_t MatrixIndexT;
// typedef ssize_t SignedMatrixIndexT;
// typedef size_t UnsignedMatrixIndexT;
}  // namespace kaldi


#endif  // KALDI_MATRIX_MATRIX_COMMON_H_


================================================
FILE: runtime/engine/common/utils/CMakeLists.txt
================================================


set(csrc   
  file_utils.cc
  math.cc
  strings.cc
  audio_process.cc
  timer.cc
)

add_library(utils ${csrc})

if(WITH_TESTING)
  enable_testing()
  
  if(ANDROID)
  else() # UNIX
      link_libraries(gtest_main gmock)
  
      add_executable(strings_test strings_test.cc)
      target_link_libraries(strings_test PUBLIC utils)
      add_test(
        NAME strings_test
        COMMAND strings_test
      )
  endif()
endif()


================================================
FILE: runtime/engine/common/utils/audio_process.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "utils/audio_process.h"

namespace ppspeech{

int WaveformFloatNormal(std::vector<float>* waveform) {
    int tot_samples = waveform->size();
    for (int i = 0; i < tot_samples; i++) {
        (*waveform)[i] = (*waveform)[i] / 32768.0;
    }
    return 0;
}

int WaveformNormal(std::vector<float>* waveform,
                   bool wav_normal,
                   const std::string& wav_normal_type,
                   float wav_norm_mul_factor) {
    if (wav_normal == false) {
        return 0;
    }
    if (wav_normal_type == "linear") {
        float amax = INT32_MIN;
        for (int i = 0; i < waveform->size(); ++i) {
            float tmp = std::abs((*waveform)[i]);
            amax = std::max(amax, tmp);
        }
        float factor = 1.0 / (amax + 1e-8);
        for (int i = 0; i < waveform->size(); ++i) {
            (*waveform)[i] = (*waveform)[i] * factor * wav_norm_mul_factor;
        }
    } else if (wav_normal_type == "gaussian") {
        double sum = std::accumulate(waveform->begin(), waveform->end(), 0.0);
        double mean = sum / waveform->size();  //均值

        double accum = 0.0;
        std::for_each(waveform->begin(), waveform->end(), [&](const double d) {
            accum += (d - mean) * (d - mean);
        });

        double stdev = sqrt(accum / (waveform->size() - 1));  //方差
        stdev = std::max(stdev, 1e-8);

        for (int i = 0; i < waveform->size(); ++i) {
            (*waveform)[i] =
                wav_norm_mul_factor * ((*waveform)[i] - mean) / stdev;
        }
    } else {
        printf("don't support\n");
        return -1;
    }
    return 0;
}

float PowerTodb(float in, float ref_value, float amin, float top_db) {
    if (amin <= 0) {
        printf("amin must be strictly positive\n");
        return -1;
    }

    if (ref_value <= 0) {
        printf("ref_value must be strictly positive\n");
        return -1;
    }

    float out = 10.0 * log10(std::max(amin, in));
    out -= 10.0 * log10(std::max(ref_value, amin));
    return out;
}

} // namespace ppspeech

================================================
FILE: runtime/engine/common/utils/audio_process.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <vector>
#include <string>
#include <algorithm>
#include <numeric>
#include <iomanip>
#include <math.h>

namespace ppspeech{
int WaveformFloatNormal(std::vector<float>* waveform);
int WaveformNormal(std::vector<float>* waveform,
                    bool wav_normal,
                    const std::string& wav_normal_type,
                    float wav_norm_mul_factor);
float PowerTodb(float in,
                float ref_value = 1.0,
                float amin = 1e-10,
                float top_db = 80.0);
} // namespace ppspeech

================================================
FILE: runtime/engine/common/utils/blank_process_test.cc
================================================
#include "utils/blank_process.h"

#include <gtest/gtest.h>
#include <gmock/gmock.h>

TEST(BlankProcess, BlankProcessTest) {
    std::string test_str = "我 今天 去 了 超市 花了 120 元。";
    std::string out_str = ppspeech::BlankProcess(test_str);
    int ret = out_str.compare("我今天去了超市花了120元。");
    EXPECT_EQ(ret, 0);

    test_str = "how are you today";
    out_str = ppspeech::BlankProcess(test_str);
    ret = out_str.compare("how are you today");
    EXPECT_EQ(ret, 0);

    test_str = "我 的 paper 在 哪里？";
    out_str = ppspeech::BlankProcess(test_str);
    ret = out_str.compare("我的paper在哪里？");
    EXPECT_EQ(ret, 0);

    test_str = "我 今天     去 了 超市     花了   120 元。";
    out_str = ppspeech::BlankProcess(test_str);
    ret = out_str.compare("我今天去了超市花了120元。");
    EXPECT_EQ(ret, 0);
}

================================================
FILE: runtime/engine/common/utils/file_utils.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "utils/file_utils.h"

#include <sys/stat.h>

namespace ppspeech {

bool ReadFileToVector(const std::string& filename,
                      std::vector<std::string>* vocabulary) {
    std::ifstream file_in(filename);
    if (!file_in) {
        std::cerr << "please input a valid file" << std::endl;
        return false;
    }

    std::string line;
    while (std::getline(file_in, line)) {
        vocabulary->emplace_back(line);
    }

    return true;
}

std::string ReadFile2String(const std::string& path) {
    std::ifstream input_file(path);
    if (!input_file.is_open()) {
        std::cerr << "please input a valid file" << std::endl;
    }
    return std::string((std::istreambuf_iterator<char>(input_file)),
                       std::istreambuf_iterator<char>());
}

bool FileExists(const std::string& strFilename) { 
    // this function if from:
    // https://github.com/kaldi-asr/kaldi/blob/master/src/fstext/deterministic-fst-test.cc
    struct stat stFileInfo; 
    bool blnReturn; 
    int intStat; 

    // Attempt to get the file attributes 
    intStat = stat(strFilename.c_str(), &stFileInfo); 
    if (intStat == 0) { 
      // We were able to get the file attributes 
      // so the file obviously exists. 
      blnReturn = true; 
    } else { 
      // We were not able to get the file attributes. 
      // This may mean that we don't have permission to 
      // access the folder which contains this file. If you 
      // need to do that level of checking, lookup the 
      // return values of stat which will give you 
      // more details on why stat failed. 
      blnReturn = false; 
    } 
   
    return blnReturn; 
}

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/utils/file_utils.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "base/common.h"

namespace ppspeech {

bool ReadFileToVector(const std::string& filename,
                      std::vector<std::string>* data);

std::string ReadFile2String(const std::string& path);

bool FileExists(const std::string& filename);

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/utils/math.cc
================================================

// Copyright (c) 2021 Mobvoi Inc (Zhendong Peng)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "utils/math.h"
#include "base/basic_types.h"

#include <algorithm>
#include <cmath>
#include <queue>
#include <string>
#include <utility>
#include <vector>


namespace ppspeech {

// Sum in log scale
float LogSumExp(float x, float y) {
    if (x <= -kBaseFloatMax) return y;
    if (y <= -kBaseFloatMax) return x;
    float max = std::max(x, y);
    return max + std::log(std::exp(x - max) + std::exp(y - max));
}

// greater compare for smallest priority_queue
template <typename T>
struct ValGreaterComp {
    bool operator()(const std::pair<T, int32_t>& lhs,
                    const std::pair<T, int32_t>& rhs) const {
        return lhs.first > rhs.first ||
               (lhs.first == rhs.first && lhs.second < rhs.second);
    }
};

template <typename T>
void TopK(const std::vector<T>& data,
          int32_t k,
          std::vector<T>* values,
          std::vector<int>* indices) {
    int n = data.size();
    int min_k_n = std::min(k, n);

    // smallest heap, (val, idx)
    std::vector<std::pair<T, int32_t>> smallest_heap;
    for (int i = 0; i < min_k_n; i++) {
        smallest_heap.emplace_back(data[i], i);
    }

    // smallest priority_queue
    std::priority_queue<std::pair<T, int32_t>,
                        std::vector<std::pair<T, int32_t>>,
                        ValGreaterComp<T>>
        pq(ValGreaterComp<T>(), std::move(smallest_heap));

    // top k
    for (int i = k; i < n; i++) {
        if (pq.top().first < data[i]) {
            pq.pop();
            pq.emplace(data[i], i);
        }
    }

    values->resize(min_k_n);
    indices->resize(min_k_n);

    // from largest to samllest
    int cur = values->size() - 1;
    while (!pq.empty()) {
        const auto& item = pq.top();

        (*values)[cur] = item.first;
        (*indices)[cur] = item.second;

        // item if reference, must pop here
        pq.pop();

        cur--;
    }
}

template void TopK<float>(const std::vector<float>& data,
                          int32_t k,
                          std::vector<float>* values,
                          std::vector<int>* indices);

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/utils/math.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <cstdint>
#include <vector>

namespace ppspeech {

// Sum in log scale
float LogSumExp(float x, float y);

template <typename T>
void TopK(const std::vector<T>& data,
          int32_t k,
          std::vector<T>* values,
          std::vector<int>* indices);

}  // namespace ppspeech

================================================
FILE: runtime/engine/common/utils/picojson.h
================================================
/*
 * Copyright 2009-2010 Cybozu Labs, Inc.
 * Copyright 2011-2014 Kazuho Oku
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
#ifndef picojson_h
#define picojson_h

#include <algorithm>
#include <cstddef>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <iterator>
#include <limits>
#include <map>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>

#define PICOJSON_USE_INT64 1

// for isnan/isinf
#if __cplusplus >= 201103L
#include <cmath>
#else
extern "C" {
#ifdef _MSC_VER
#include <float.h>
#elif defined(__INTEL_COMPILER)
#include <mathimf.h>
#else
#include <math.h>
#endif
}
#endif

#ifndef PICOJSON_USE_RVALUE_REFERENCE
#if (defined(__cpp_rvalue_references) && __cpp_rvalue_references >= 200610) || \
    (defined(_MSC_VER) && _MSC_VER >= 1600)
#define PICOJSON_USE_RVALUE_REFERENCE 1
#else
#define PICOJSON_USE_RVALUE_REFERENCE 0
#endif
#endif  // PICOJSON_USE_RVALUE_REFERENCE

#ifndef PICOJSON_NOEXCEPT
#if PICOJSON_USE_RVALUE_REFERENCE
#define PICOJSON_NOEXCEPT noexcept
#else
#define PICOJSON_NOEXCEPT throw()
#endif
#endif

// experimental support for int64_t (see README.mkdn for detail)
#ifdef PICOJSON_USE_INT64
#define __STDC_FORMAT_MACROS
#include <cerrno>
#if __cplusplus >= 201103L
#include <cinttypes>
#else
extern "C" {
#include <inttypes.h>
}
#endif
#endif

// to disable the use of localeconv(3), set PICOJSON_USE_LOCALE to 0
#ifndef PICOJSON_USE_LOCALE
#define PICOJSON_USE_LOCALE 1
#endif
#if PICOJSON_USE_LOCALE
extern "C" {
#include <locale.h>
}
#endif

#ifndef PICOJSON_ASSERT
#define PICOJSON_ASSERT(e)                      \
    do {                                        \
        if (!(e)) throw std::runtime_error(#e); \
    } while (0)
#endif

#ifdef _MSC_VER
#define SNPRINTF _snprintf_s
#pragma warning(push)
#pragma warning(disable : 4244)  // conversion from int to char
#pragma warning(disable : 4127)  // conditional expression is constant
#pragma warning(disable : 4702)  // unreachable code
#pragma warning(disable : 4706)  // assignment within conditional expression
#else
#define SNPRINTF snprintf
#endif

namespace picojson {

enum {
    null_type,
    boolean_type,
    number_type,
    string_type,
    array_type,
    object_type
#ifdef PICOJSON_USE_INT64
    ,
    int64_type
#endif
};

enum { INDENT_WIDTH = 2, DEFAULT_MAX_DEPTHS = 100 };

struct null {};

class value {
  public:
    typedef std::vector<value> array;
    typedef std::map<std::string, value> object;
    union _storage {
        bool boolean_;
        double number_;
#ifdef PICOJSON_USE_INT64
        int64_t int64_;
#endif
        std::string *string_;
        array *array_;
        object *object_;
    };

  protected:
    int type_;
    _storage u_;

  public:
    value();
    value(int type, bool);
    explicit value(bool b);
#ifdef PICOJSON_USE_INT64
    explicit value(int64_t i);
#endif
    explicit value(double n);
    explicit value(const std::string &s);
    explicit value(const array &a);
    explicit value(const object &o);
#if PICOJSON_USE_RVALUE_REFERENCE
    explicit value(std::string &&s);
    explicit value(array &&a);
    explicit value(object &&o);
#endif
    explicit value(const char *s);
    value(const char *s, size_t len);
    ~value();
    value(const value &x);
    value &operator=(const value &x);
#if PICOJSON_USE_RVALUE_REFERENCE
    value(value &&x) PICOJSON_NOEXCEPT;
    value &operator=(value &&x) PICOJSON_NOEXCEPT;
#endif
    void swap(value &x) PICOJSON_NOEXCEPT;
    template <typename T>
    bool is() const;
    template <typename T>
    const T &get() const;
    template <typename T>
    T &get();
    template <typename T>
    void set(const T &);
#if PICOJSON_USE_RVALUE_REFERENCE
    template <typename T>
    void set(T &&);
#endif
    bool evaluate_as_boolean() const;
    const value &get(const size_t idx) const;
    const value &get(const std::string &key) const;
    value &get(const size_t idx);
    value &get(const std::string &key);

    bool contains(const size_t idx) const;
    bool contains(const std::string &key) const;
    std::string to_str() const;
    template <typename Iter>
    void serialize(Iter os, bool prettify = false) const;
    std::string serialize(bool prettify = false) const;

  private:
    template <typename T>
    value(const T *);  // intentionally defined to block implicit conversion of
                       // pointer to bool
    template <typename Iter>
    static void _indent(Iter os, int indent);
    template <typename Iter>
    void _serialize(Iter os, int indent) const;
    std::string _serialize(int indent) const;
    void clear();
};

typedef value::array array;
typedef value::object object;

inline value::value() : type_(null_type), u_() {}

inline value::value(int type, bool) : type_(type), u_() {
    switch (type) {
#define INIT(p, v) \
    case p##type:  \
        u_.p = v;  \
        break
        INIT(boolean_, false);
        INIT(number_, 0.0);
#ifdef PICOJSON_USE_INT64
        INIT(int64_, 0);
#endif
        INIT(string_, new std::string());
        INIT(array_, new array());
        INIT(object_, new object());
#undef INIT
        default:
            break;
    }
}

inline value::value(bool b) : type_(boolean_type), u_() { u_.boolean_ = b; }

#ifdef PICOJSON_USE_INT64
inline value::value(int64_t i) : type_(int64_type), u_() { u_.int64_ = i; }
#endif

inline value::value(double n) : type_(number_type), u_() {
    if (
#ifdef _MSC_VER
        !_finite(n)
#elif __cplusplus >= 201103L
        std::isnan(n) || std::isinf(n)
#else
        isnan(n) || isinf(n)
#endif
            ) {
        throw std::overflow_error("");
    }
    u_.number_ = n;
}

inline value::value(const std::string &s) : type_(string_type), u_() {
    u_.string_ = new std::string(s);
}

inline value::value(const array &a) : type_(array_type), u_() {
    u_.array_ = new array(a);
}

inline value::value(const object &o) : type_(object_type), u_() {
    u_.object_ = new object(o);
}

#if PICOJSON_USE_RVALUE_REFERENCE
inline value::value(std::string &&s) : type_(string_type), u_() {
    u_.string_ = new std::string(std::move(s));
}

inline value::value(array &&a) : type_(array_type), u_() {
    u_.array_ = new array(std::move(a));
}

inline value::value(object &&o) : type_(object_type), u_() {
    u_.object_ = new object(std::move(o));
}
#endif

inline value::value(const char *s) : type_(string_type), u_() {
    u_.string_ = new std::string(s);
}

inline value::value(const char *s, size_t len) : type_(string_type), u_() {
    u_.string_ = new std::string(s, len);
}

inline void value::clear() {
    switch (type_) {
#define DEINIT(p)    \
    case p##type:    \
        delete u_.p; \
        break
        DEINIT(string_);
        DEINIT(array_);
        DEINIT(object_);
#undef DEINIT
        default:
            break;
    }
}

inline value::~value() { clear(); }

inline value::value(const value &x) : type_(x.type_), u_() {
    switch (type_) {
#define INIT(p, v) \
    case p##type:  \
        u_.p = v;  \
        break
        INIT(string_, new std::string(*x.u_.string_));
        INIT(array_, new array(*x.u_.array_));
        INIT(object_, new object(*x.u_.object_));
#undef INIT
        default:
            u_ = x.u_;
            break;
    }
}

inline value &value::operator=(const value &x) {
    if (this != &x) {
        value t(x);
        swap(t);
    }
    return *this;
}

#if PICOJSON_USE_RVALUE_REFERENCE
inline value::value(value &&x) PICOJSON_NOEXCEPT : type_(null_type), u_() {
    swap(x);
}
inline value &value::operator=(value &&x) PICOJSON_NOEXCEPT {
    swap(x);
    return *this;
}
#endif
inline void value::swap(value &x) PICOJSON_NOEXCEPT {
    std::swap(type_, x.type_);
    std::swap(u_, x.u_);
}

#define IS(ctype, jtype)                   \
    template <>                            \
    inline bool value::is<ctype>() const { \
        return type_ == jtype##_type;      \
    }
IS(null, null)
IS(bool, boolean)
#ifdef PICOJSON_USE_INT64
IS(int64_t, int64)
#endif
IS(std::string, string)
IS(array, array)
IS(object, object)
#undef IS
template <>
inline bool value::is<double>() const {
    return type_ == number_type
#ifdef PICOJSON_USE_INT64
           || type_ == int64_type
#endif
        ;
}

#define GET(ctype, var)                                                        \
    template <>                                                                \
    inline const ctype &value::get<ctype>() const {                            \
        PICOJSON_ASSERT("type mismatch! call is<type>() before get<type>()" && \
                        is<ctype>());                                          \
        return var;                                                            \
    }                                                                          \
    template <>                                                                \
    inline ctype &value::get<ctype>() {                                        \
        PICOJSON_ASSERT("type mismatch! call is<type>() before get<type>()" && \
                        is<ctype>());                                          \
        return var;                                                            \
    }
GET(bool, u_.boolean_)
GET(std::string, *u_.string_)
GET(array, *u_.array_)
GET(object, *u_.object_)
#ifdef PICOJSON_USE_INT64
GET(double,
    (type_ == int64_type &&
         (const_cast<value *>(this)->type_ = number_type,
          (const_cast<value *>(this)->u_.number_ = u_.int64_)),
     u_.number_))
GET(int64_t, u_.int64_)
#else
GET(double, u_.number_)
#endif
#undef GET

#define SET(ctype, jtype, setter)                      \
    template <>                                        \
    inline void value::set<ctype>(const ctype &_val) { \
        clear();                                       \
        type_ = jtype##_type;                          \
        setter                                         \
    }
SET(bool, boolean, u_.boolean_ = _val;)
SET(std::string, string, u_.string_ = new std::string(_val);)
SET(array, array, u_.array_ = new array(_val);)
SET(object, object, u_.object_ = new object(_val);)
SET(double, number, u_.number_ = _val;)
#ifdef PICOJSON_USE_INT64
SET(int64_t, int64, u_.int64_ = _val;)
#endif
#undef SET

#if PICOJSON_USE_RVALUE_REFERENCE
#define MOVESET(ctype, jtype, setter)              \
    template <>                                    \
    inline void value::set<ctype>(ctype && _val) { \
        clear();                                   \
        type_ = jtype##_type;                      \
        setter                                     \
    }
MOVESET(std::string, string, u_.string_ = new std::string(std::move(_val));)
MOVESET(array, array, u_.array_ = new array(std::move(_val));)
MOVESET(object, object, u_.object_ = new object(std::move(_val));)
#undef MOVESET
#endif

inline bool value::evaluate_as_boolean() const {
    switch (type_) {
        case null_type:
            return false;
        case boolean_type:
            return u_.boolean_;
        case number_type:
            return u_.number_ != 0;
#ifdef PICOJSON_USE_INT64
        case int64_type:
            return u_.int64_ != 0;
#endif
        case string_type:
            return !u_.string_->empty();
        default:
            return true;
    }
}

inline const value &value::get(const size_t idx) const {
    static value s_null;
    PICOJSON_ASSERT(is<array>());
    return idx < u_.array_->size() ? (*u_.array_)[idx] : s_null;
}

inline value &value::get(const size_t idx) {
    static value s_null;
    PICOJSON_ASSERT(is<array>());
    return idx < u_.array_->size() ? (*u_.array_)[idx] : s_null;
}

inline const value &value::get(const std::string &key) const {
    static value s_null;
    PICOJSON_ASSERT(is<object>());
    object::const_iterator i = u_.object_->find(key);
    return i != u_.object_->end() ? i->second : s_null;
}

inline value &value::get(const std::string &key) {
    static value s_null;
    PICOJSON_ASSERT(is<object>());
    object::iterator i = u_.object_->find(key);
    return i != u_.object_->end() ? i->second : s_null;
}

inline bool value::contains(const size_t idx) const {
    PICOJSON_ASSERT(is<array>());
    return idx < u_.array_->size();
}

inline bool value::contains(const std::string &key) const {
    PICOJSON_ASSERT(is<object>());
    object::const_iterator i = u_.object_->find(key);
    return i != u_.object_->end();
}

inline std::string value::to_str() const {
    switch (type_) {
        case null_type:
            return "null";
        case boolean_type:
            return u_.boolean_ ? "true" : "false";
#ifdef PICOJSON_USE_INT64
        case int64_type: {
            char buf[sizeof("-9223372036854775808")];
            SNPRINTF(buf, sizeof(buf), "%" PRId64, u_.int64_);
            return buf;
        }
#endif
        case number_type: {
            char buf[256];
            double tmp;
            SNPRINTF(
                buf,
                sizeof(buf),
                fabs(u_.number_) < (1ULL << 53) && modf(u_.number_, &tmp) == 0
                    ? "%.f"
                    : "%.17g",
                u_.number_);
#if PICOJSON_USE_LOCALE
            char *decimal_point = localeconv()->decimal_point;
            if (strcmp(decimal_point, ".") != 0) {
                size_t decimal_point_len = strlen(decimal_point);
                for (char *p = buf; *p != '\0'; ++p) {
                    if (strncmp(p, decimal_point, decimal_point_len) == 0) {
                        return std::string(buf, p) + "." +
                               (p + decimal_point_len);
                    }
                }
            }
#endif
            return buf;
        }
        case string_type:
            return *u_.string_;
        case array_type:
            return "array";
        case object_type:
            return "object";
        default:
            PICOJSON_ASSERT(0);
#ifdef _MSC_VER
            __assume(0);
#endif
    }
    return std::string();
}

template <typename Iter>
void copy(const std::string &s, Iter oi) {
    std::copy(s.begin(), s.end(), oi);
}

template <typename Iter>
struct serialize_str_char {
    Iter oi;
    void operator()(char c) {
        switch (c) {
#define MAP(val, sym)  \
    case val:          \
        copy(sym, oi); \
        break
            MAP('"', "\\\"");
            MAP('\\', "\\\\");
            MAP('/', "\\/");
            MAP('\b', "\\b");
            MAP('\f', "\\f");
            MAP('\n', "\\n");
            MAP('\r', "\\r");
            MAP('\t', "\\t");
#undef MAP
            default:
                if (static_cast<unsigned char>(c) < 0x20 || c == 0x7f) {
                    char buf[7];
                    SNPRINTF(buf, sizeof(buf), "\\u%04x", c & 0xff);
                    copy(buf, buf + 6, oi);
                } else {
                    *oi++ = c;
                }
                break;
        }
    }
};

template <typename Iter>
void serialize_str(const std::string &s, Iter oi) {
    *oi++ = '"';
    serialize_str_char<Iter> process_char = {oi};
    std::for_each(s.begin(), s.end(), process_char);
    *oi++ = '"';
}

template <typename Iter>
void value::serialize(Iter oi, bool prettify) const {
    return _serialize(oi, prettify ? 0 : -1);
}

inline std::string value::serialize(bool prettify) const {
    return _serialize(prettify ? 0 : -1);
}

template <typename Iter>
void value::_indent(Iter oi, int indent) {
    *oi++ = '\n';
    for (int i = 0; i < indent * INDENT_WIDTH; ++i) {
        *oi++ = ' ';
    }
}

template <typename Iter>
void value::_serialize(Iter oi, int indent) const {
    switch (type_) {
        case string_type:
            serialize_str(*u_.string_, oi);
            break;
        case array_type: {
            *oi++ = '[';
            if (indent != -1) {
                ++indent;
            }
            for (array::const_iterator i = u_.array_->begin();
                 i != u_.array_->end();
                 ++i) {
                if (i != u_.array_->begin()) {
                    *oi++ = ',';
                }
                if (indent != -1) {
                    _indent(oi, indent);
                }
                i->_serialize(oi, indent);
            }
            if (indent != -1) {
                --indent;
                if (!u_.array_->empty()) {
                    _indent(oi, indent);
                }
            }
            *oi++ = ']';
            break;
        }
        case object_type: {
            *oi++ = '{';
            if (indent != -1) {
                ++indent;
            }
            for (object::const_iterator i = u_.object_->begin();
                 i != u_.object_->end();
                 ++i) {
                if (i != u_.object_->begin()) {
                    *oi++ = ',';
                }
                if (indent != -1) {
                    _indent(oi, indent);
                }
                serialize_str(i->first, oi);
                *oi++ = ':';
                if (indent != -1) {
                    *oi++ = ' ';
                }
                i->second._serialize(oi, indent);
            }
            if (indent != -1) {
                --indent;
                if (!u_.object_->empty()) {
                    _indent(oi, indent);
                }
            }
            *oi++ = '}';
            break;
        }
        default:
            copy(to_str(), oi);
            break;
    }
    if (indent == 0) {
        *oi++ = '\n';
    }
}

inline std::string value::_serialize(int indent) const {
    std::string s;
    _serialize(std::back_inserter(s), indent);
    return s;
}

template <typename Iter>
class input {
  protected:
    Iter cur_, end_;
    bool consumed_;
    int line_;

  public:
    input(const Iter &first, const Iter &last)
        : cur_(first), end_(last), consumed_(false), line_(1) {}
    int getc() {
        if (consumed_) {
            if (*cur_ == '\n') {
                ++line_;
            }
            ++cur_;
        }
        if (cur_ == end_) {
            consumed_ = false;
            return -1;
        }
        consumed_ = true;
        return *cur_ & 0xff;
    }
    void ungetc() { consumed_ = false; }
    Iter cur() const {
        if (consumed_) {
            input<Iter> *self = const_cast<input<Iter> *>(this);
            self->consumed_ = false;
            ++self->cur_;
        }
        return cur_;
    }
    int line() const { return line_; }
    void skip_ws() {
        while (1) {
            int ch = getc();
            if (!(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')) {
                ungetc();
                break;
            }
        }
    }
    bool expect(const int expected) {
        skip_ws();
        if (getc() != expected) {
            ungetc();
            return false;
        }
        return true;
    }
    bool match(const std::string &pattern) {
        for (std::string::const_iterator pi(pattern.begin());
             pi != pattern.end();
             ++pi) {
            if (getc() != *pi) {
                ungetc();
                return false;
            }
        }
        return true;
    }
};

template <typename Iter>
inline int _parse_quadhex(input<Iter> &in) {
    int uni_ch = 0, hex;
    for (int i = 0; i < 4; i++) {
        if ((hex = in.getc()) == -1) {
            return -1;
        }
        if ('0' <= hex && hex <= '9') {
            hex -= '0';
        } else if ('A' <= hex && hex <= 'F') {
            hex -= 'A' - 0xa;
        } else if ('a' <= hex && hex <= 'f') {
            hex -= 'a' - 0xa;
        } else {
            in.ungetc();
            return -1;
        }
        uni_ch = uni_ch * 16 + hex;
    }
    return uni_ch;
}

template <typename String, typename Iter>
inline bool _parse_codepoint(String &out, input<Iter> &in) {
    int uni_ch;
    if ((uni_ch = _parse_quadhex(in)) == -1) {
        return false;
    }
    if (0xd800 <= uni_ch && uni_ch <= 0xdfff) {
        if (0xdc00 <= uni_ch) {
            // a second 16-bit of a surrogate pair appeared
            return false;
        }
        // first 16-bit of surrogate pair, get the next one
        if (in.getc() != '\\' || in.getc() != 'u') {
            in.ungetc();
            return false;
        }
        int second = _parse_quadhex(in);
        if (!(0xdc00 <= second && second <= 0xdfff)) {
            return false;
        }
        uni_ch = ((uni_ch - 0xd800) << 10) | ((second - 0xdc00) & 0x3ff);
        uni_ch += 0x10000;
    }
    if (uni_ch < 0x80) {
        out.push_back(static_cast<char>(uni_ch));
    } else {
        if (uni_ch < 0x800) {
            out.push_back(static_cast<char>(0xc0 | (uni_ch >> 6)));
        } else {
            if (uni_ch < 0x10000) {
                out.push_back(static_cast<char>(0xe0 | (uni_ch >> 12)));
            } else {
                out.push_back(static_cast<char>(0xf0 | (uni_ch >> 18)));
                out.push_back(
                    static_cast<char>(0x80 | ((uni_ch >> 12) & 0x3f)));
            }
            out.push_back(static_cast<char>(0x80 | ((uni_ch >> 6) & 0x3f)));
        }
        out.push_back(static_cast<char>(0x80 | (uni_ch & 0x3f)));
    }
    return true;
}

template <typename String, typename Iter>
inline bool _parse_string(String &out, input<Iter> &in) {
    while (1) {
        int ch = in.getc();
        if (ch < ' ') {
            in.ungetc();
            return false;
        } else if (ch == '"') {
            return true;
        } else if (ch == '\\') {
            if ((ch = in.getc()) == -1) {
                return false;
            }
            switch (ch) {
#define MAP(sym, val)       \
    case sym:               \
        out.push_back(val); \
        break
                MAP('"', '\"');
                MAP('\\', '\\');
                MAP('/', '/');
                MAP('b', '\b');
                MAP('f', '\f');
                MAP('n', '\n');
                MAP('r', '\r');
                MAP('t', '\t');
#undef MAP
                case 'u':
                    if (!_parse_codepoint(out, in)) {
                        return false;
                    }
                    break;
                default:
                    return false;
            }
        } else {
            out.push_back(static_cast<char>(ch));
        }
    }
    return false;
}

template <typename Context, typename Iter>
inline bool _parse_array(Context &ctx, input<Iter> &in) {
    if (!ctx.parse_array_start()) {
        return false;
    }
    size_t idx = 0;
    if (in.expect(']')) {
        return ctx.parse_array_stop(idx);
    }
    do {
        if (!ctx.parse_array_item(in, idx)) {
            return false;
        }
        idx++;
    } while (in.expect(','));
    return in.expect(']') && ctx.parse_array_stop(idx);
}

template <typename Context, typename Iter>
inline bool _parse_object(Context &ctx, input<Iter> &in) {
    if (!ctx.parse_object_start()) {
        return false;
    }
    if (in.expect('}')) {
        return ctx.parse_object_stop();
    }
    do {
        std::string key;
        if (!in.expect('"') || !_parse_string(key, in) || !in.expect(':')) {
            return false;
        }
        if (!ctx.parse_object_item(in, key)) {
            return false;
        }
    } while (in.expect(','));
    return in.expect('}') && ctx.parse_object_stop();
}

template <typename Iter>
inline std::string _parse_number(input<Iter> &in) {
    std::string num_str;
    while (1) {
        int ch = in.getc();
        if (('0' <= ch && ch <= '9') || ch == '+' || ch == '-' || ch == 'e' ||
            ch == 'E') {
            num_str.push_back(static_cast<char>(ch));
        } else if (ch == '.') {
#if PICOJSON_USE_LOCALE
            num_str += localeconv()->decimal_point;
#else
            num_str.push_back('.');
#endif
        } else {
            in.ungetc();
            break;
        }
    }
    return num_str;
}

template <typename Context, typename Iter>
inline bool _parse(Context &ctx, input<Iter> &in) {
    in.skip_ws();
    int ch = in.getc();
    switch (ch) {
#define IS(ch, text, op)            \
    case ch:                        \
        if (in.match(text) && op) { \
            return true;            \
        } else {                    \
            return false;           \
        }
        IS('n', "ull", ctx.set_null());
        IS('f', "alse", ctx.set_bool(false));
        IS('t', "rue", ctx.set_bool(true));
#undef IS
        case '"':
            return ctx.parse_string(in);
        case '[':
            return _parse_array(ctx, in);
        case '{':
            return _parse_object(ctx, in);
        default:
            if (('0' <= ch && ch <= '9') || ch == '-') {
                double f;
                char *endp;
                in.ungetc();
                std::string num_str(_parse_number(in));
                if (num_str.empty()) {
                    return false;
                }
#ifdef PICOJSON_USE_INT64
                {
                    errno = 0;
                    intmax_t ival = strtoimax(num_str.c_str(), &endp, 10);
                    if (errno == 0 &&
                        std::numeric_limits<int64_t>::min() <= ival &&
                        ival <= std::numeric_limits<int64_t>::max() &&
                        endp == num_str.c_str() + num_str.size()) {
                        ctx.set_int64(ival);
                        return true;
                    }
                }
#endif
                f = strtod(num_str.c_str(), &endp);
                if (endp == num_str.c_str() + num_str.size()) {
                    ctx.set_number(f);
                    return true;
                }
                return false;
            }
            break;
    }
    in.ungetc();
    return false;
}

class deny_parse_context {
  public:
    bool set_null() { return false; }
    bool set_bool(bool) { return false; }
#ifdef PICOJSON_USE_INT64
    bool set_int64(int64_t) { return false; }
#endif
    bool set_number(double) { return false; }
    template <typename Iter>
    bool parse_string(input<Iter> &) {
        return false;
    }
    bool parse_array_start() { return false; }
    template <typename Iter>
    bool parse_array_item(input<Iter> &, size_t) {
        return false;
    }
    bool parse_array_stop(size_t) { return false; }
    bool parse_object_start() { return false; }
    template <typename Iter>
    bool parse_object_item(input<Iter> &, const std::string &) {
        return false;
    }
};

class default_parse_context {
  protected:
    value *out_;
    size_t depths_;

  public:
    default_parse_context(value *out, size_t depths = DEFAULT_MAX_DEPTHS)
        : out_(out), depths_(depths) {}
    bool set_null() {
        *out_ = value();
        return true;
    }
    bool set_bool(bool b) {
        *out_ = value(b);
        return true;
    }
#ifdef PICOJSON_USE_INT64
    bool set_int64(int64_t i) {
        *out_ = value(i);
        return true;
    }
#endif
    bool set_number(double f) {
        *out_ = value(f);
        return true;
    }
    template <typename Iter>
    bool parse_string(input<Iter> &in) {
        *out_ = value(string_type, false);
        return _parse_string(out_->get<std::string>(), in);
    }
    bool parse_array_start() {
        if (depths_ == 0) return false;
        --depths_;
        *out_ = value(array_type, false);
        return true;
    }
    template <typename Iter>
    bool parse_array_item(input<Iter> &in, size_t) {
        array &a = out_->get<array>();
        a.push_back(value());
        default_parse_context ctx(&a.back(), depths_);
        return _parse(ctx, in);
    }
    bool parse_array_stop(size_t) {
        ++depths_;
        return true;
    }
    bool parse_object_start() {
        if (depths_ == 0) return false;
        *out_ = value(object_type, false);
        return true;
    }
    template <typename Iter>
    bool parse_object_item(input<Iter> &in, const std::string &key) {
        object &o = out_->get<object>();
        default_parse_context ctx(&o[key], depths_);
        return _parse(ctx, in);
    }
    bool parse_object_stop() {
        ++depths_;
        return true;
    }

  private:
    default_parse_context(const default_parse_context &);
    default_parse_context &operator=(const default_parse_context &);
};

class null_parse_context {
  protected:
    size_t depths_;

  public:
    struct dummy_str {
        void push_back(int) {}
    };

  public:
    null_parse_context(size_t depths = DEFAULT_MAX_DEPTHS) : depths_(depths) {}
    bool set_null() { return true; }
    bool set_bool(bool) { return true; }
#ifdef PICOJSON_USE_INT64
    bool set_int64(int64_t) { return true; }
#endif
    bool set_number(double) { return true; }
    template <typename Iter>
    bool parse_string(input<Iter> &in) {
        dummy_str s;
        return _parse_string(s, in);
    }
    bool parse_array_start() {
        if (depths_ == 0) return false;
        --depths_;
        return true;
    }
    template <typename Iter>
    bool parse_array_item(input<Iter> &in, size_t) {
        return _parse(*this, in);
    }
    bool parse_array_stop(size_t) {
        ++depths_;
        return true;
    }
    bool parse_object_start() {
        if (depths_ == 0) return false;
        --depths_;
        return true;
    }
    template <typename Iter>
    bool parse_object_item(input<Iter> &in, const std::string &) {
        ++depths_;
        return _parse(*this, in);
    }
    bool parse_object_stop() { return true; }

  private:
    null_parse_context(const null_parse_context &);
    null_parse_context &operator=(const null_parse_context &);
};

// obsolete, use the version below
template <typename Iter>
inline std::string parse(value &out, Iter &pos, const Iter &last) {
    std::string err;
    pos = parse(out, pos, last, &err);
    return err;
}

template <typename Context, typename Iter>
inline Iter _parse(Context &ctx,
                   const Iter &first,
                   const Iter &last,
                   std::string *err) {
    input<Iter> in(first, last);
    if (!_parse(ctx, in) && err != NULL) {
        char buf[64];
        SNPRINTF(buf, sizeof(buf), "syntax error at line %d near: ", in.line());
        *err = buf;
        while (1) {
            int ch = in.getc();
            if (ch == -1 || ch == '\n') {
                break;
            } else if (ch >= ' ') {
                err->push_back(static_cast<char>(ch));
            }
        }
    }
    return in.cur();
}

template <typename Iter>
inline Iter parse(value &out,
                  const Iter &first,
                  const Iter &last,
                  std::string *err) {
    default_parse_context ctx(&out);
    return _parse(ctx, first, last, err);
}

inline std::string parse(value &out, const std::string &s) {
    std::string err;
    parse(out, s.begin(), s.end(), &err);
    return err;
}

inline std::string parse(value &out, std::istream &is) {
    std::string err;
    parse(out,
          std::istreambuf_iterator<char>(is.rdbuf()),
          std::istreambuf_iterator<char>(),
          &err);
    return err;
}

template <typename T>
struct last_error_t {
    static std::string s;
};
template <typename T>
std::string last_error_t<T>::s;

inline void set_last_error(const std::string &s) { last_error_t<bool>::s = s; }

inline const std::string &get_last_error() { return last_error_t<bool>::s; }

inline bool operator==(const value &x, const value &y) {
    if (x.is<null>()) return y.is<null>();
#define PICOJSON_CMP(type) \
    if (x.is<type>()) return y.is<type>() && x.get<type>() == y.get<type>()
    PICOJSON_CMP(bool);
    PICOJSON_CMP(double);
    PICOJSON_CMP(std::string);
    PICOJSON_CMP(array);
    PICOJSON_CMP(object);
#undef PICOJSON_CMP
    PICOJSON_ASSERT(0);
#ifdef _MSC_VER
    __assume(0);
#endif
    return false;
}

inline bool operator!=(const value &x, const value &y) { return !(x == y); }
}

#if !PICOJSON_USE_RVALUE_REFERENCE
namespace std {
template <>
inline void swap(picojson::value &x, picojson::value &y) {
    x.swap(y);
}
}
#endif

inline std::istream &operator>>(std::istream &is, picojson::value &x) {
    picojson::set_last_error(std::string());
    const std::string err(picojson::parse(x, is));
    if (!err.empty()) {
        picojson::set_last_error(err);
        is.setstate(std::ios::failbit);
    }
    return is;
}

inline std::ostream &operator<<(std::ostream &os, const picojson::value &x) {
    x.serialize(std::ostream_iterator<char>(os));
    return os;
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif

#endif

================================================
FILE: runtime/engine/common/utils/strings.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sstream>

#include "utils/strings.h"

namespace ppspeech {

std::vector<std::string> StrSplit(const std::string& str,
                                  const char* delim,
                                  bool omit_empty_string) {
    std::vector<std::string> outs;
    int start = 0;
    int end = str.size();
    int found = 0;
    while (found != std::string::npos) {
        found = str.find_first_of(delim, start);
        // start != end condition is for when the delimiter is at the end
        if (!omit_empty_string || (found != start && start != end)) {
            outs.push_back(str.substr(start, found - start));
        }
        start = found + 1;
    }

    return outs;
}


std::string StrJoin(const std::vector<std::string>& strs, const char* delim) {
    std::stringstream ss;
    for (ssize_t i = 0; i < strs.size(); ++i) {
        ss << strs[i];
        if (i < strs.size() - 1) {
            ss << std::string(delim);
        }
    }
    return ss.str();
}

std::string DelBlank(const std::string& str) {
    std::string out = "";
    int ptr_in = 0;    //  the pointer of input string (for traversal)
    int end = str.size();
    int ptr_out = -1;  //  the pointer of output string (last char)
    while (ptr_in != end) {
        while (ptr_in != end && str[ptr_in] == ' ') {
            ptr_in += 1;
        }
        if (ptr_in == end) 
            return out;
        if (ptr_out != -1 && isalpha(str[ptr_in]) && isalpha(str[ptr_out]) && str[ptr_in-1] == ' ')
            // add a space when the last and current chars are in English and there have space(s) between them
            out += ' ';
        out += str[ptr_in];
        ptr_out = ptr_in;
        ptr_in += 1;
    }
    return out;
}

std::string AddBlank(const std::string& str) {
    std::string out = "";
    int ptr = 0;  // the pointer of the input string
    int end = str.size();
    while (ptr != end) {
        if (isalpha(str[ptr])) {
            if (ptr == 0 or str[ptr-1] != ' ')
                out += " ";  // add pre-space for an English word
            while (isalpha(str[ptr])) {
                out += str[ptr];
                ptr += 1;
            }
            out += " ";  // add post-space for an English word
        } else {
            out += str[ptr];
            ptr += 1;
        }
    }
    return out;
}

std::string ReverseFraction(const std::string& str) {
    std::string out = "";
    int ptr = 0;   // the pointer of the input string
    int end = str.size();
    int left, right, frac;  // the start index of the left tag, right tag and '/'.
    left = right = frac = 0;
    int len_tag = 5;  // length of "<tag>"

    while (ptr != end) {
        // find the position of left tag, right tag and '/'. (xxx<tag>num1/num2</tag>)
        left = str.find("<tag>", ptr);
        if (left == -1)
            break;
        out += str.substr(ptr, left - ptr);  // content before left tag (xxx)
        frac = str.find("/", left);
        right = str.find("<tag>", frac);
        
        out += str.substr(frac + 1, right - frac - 1) + '/' + 
               str.substr(left + len_tag, frac - left - len_tag);  // num2/num1
        ptr = right + len_tag;
    }
    if (ptr != end) {
        out += str.substr(ptr, end - ptr);
    }
    return out;
}

#ifdef _MSC_VER
std::wstring ToWString(const std::string& str) {
    unsigned len = str.size() * 2;
    setlocale(LC_CTYPE, "");
    wchar_t* p = new wchar_t[len];
    mbstowcs(p, str.c_str(), len);
    std::wstring wstr(p);
    delete[] p;
    return wstr;
}
#endif

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/utils/strings.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <string>
#include <vector>

namespace ppspeech {

std::vector<std::string> StrSplit(const std::string& str,
                                  const char* delim,
                                  bool omit_empty_string = true);

std::string StrJoin(const std::vector<std::string>& strs, const char* delim);

std::string DelBlank(const std::string& str);

std::string AddBlank(const std::string& str);

std::string ReverseFraction(const std::string& str);

#ifdef _MSC_VER
std::wstring ToWString(const std::string& str);
#endif

}  // namespace ppspeech


================================================
FILE: runtime/engine/common/utils/strings_test.cc
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "utils/strings.h"

#include <gmock/gmock.h>
#include <gtest/gtest.h>


TEST(StringTest, StrSplitTest) {
    using ::testing::ElementsAre;

    std::string test_str = "hello world";
    std::vector<std::string> outs = ppspeech::StrSplit(test_str, " \t");
    EXPECT_THAT(outs, ElementsAre("hello", "world"));
}


TEST(StringTest, StrJoinTest) {
    std::vector<std::string> ins{"hello", "world"};
    std::string out = ppspeech::StrJoin(ins, " ");
    EXPECT_THAT(out, "hello world");
}

TEST(StringText, DelBlankTest) {
    std::string test_str = "我 今天     去 了 超市     花了   120 元。";
    std::string out_str = ppspeech::DelBlank(test_str);
    int ret = out_str.compare("我今天去了超市花了120元。");
    EXPECT_EQ(ret, 0);

    test_str = "how are you today";
    out_str = ppspeech::DelBlank(test_str);
    ret = out_str.compare("how are you today");
    EXPECT_EQ(ret, 0);

    test_str = "我 的 paper 在 哪里？";
    out_str = ppspeech::DelBlank(test_str);
    ret = out_str.compare("我的paper在哪里？");
    EXPECT_EQ(ret, 0);
}

TEST(StringTest, AddBlankTest) {
    std::string test_str = "how are you";
    std::string out_str = ppspeech::AddBlank(test_str);
    int ret = out_str.compare(" how  are  you ");
    EXPECT_EQ(ret, 0);

    test_str = "欢迎来到China。";
    out_str = ppspeech::AddBlank(test_str);
    ret = out_str.compare("欢迎来到 China 。");
    EXPECT_EQ(ret, 0);
}

TEST(StringTest, ReverseFractionTest) {
    std::string test_str = "<tag>3/1<tag>";
    std::string out_str = ppspeech::ReverseFraction(test_str);
    int ret = out_str.compare("1/3");
    std::cout<<out_str<<std::endl;
    EXPECT_EQ(ret, 0);

    test_str = "<tag>3/1<tag> <tag>100/10000<tag>";
    out_str = ppspeech::ReverseFraction(test_str);
    ret = out_str.compare("1/3 10000/100");
    std::cout<<out_str<<std::endl;
    EXPECT_EQ(ret, 0);
}


================================================
FILE: runtime/engine/common/utils/timer.cc
================================================
// Copyright      2020  Xiaomi Corporation (authors: Haowen Qiu)
//                      Mobvoi Inc.        (authors: Fangjun Kuang)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <chrono>

#include "common/utils/timer.h"

namespace ppspeech{

struct TimerImpl{
    TimerImpl() = default;
    virtual ~TimerImpl() = default;
    virtual void Reset() = 0;
    // time in seconds
    virtual double Elapsed() = 0;
};

class CpuTimerImpl : public TimerImpl {
 public:
  CpuTimerImpl() { Reset(); }

  using high_resolution_clock = std::chrono::high_resolution_clock;

  void Reset() override { begin_ = high_resolution_clock::now(); }

  // time in seconds
  double Elapsed() override {
    auto end = high_resolution_clock::now();
    auto dur =
        std::chrono::duration_cast<std::chrono::microseconds>(end - begin_);
    return dur.count() / 1000000.0;
  }

 private:
  high_resolution_clock::time_point begin_;
};

Timer::Timer() {
    impl_ = std::make_unique<CpuTimerImpl>();
}

Timer::~Timer() = default;

void Timer::Reset() const { impl_->Reset(); }

double Timer::Elapsed() const { return impl_->Elapsed(); }


} //namespace ppspeech

================================================
FILE: runtime/engine/common/utils/timer.h
================================================
// Copyright      2020  Xiaomi Corporation (authors: Haowen Qiu)
//                      Mobvoi Inc.        (authors: Fangjun Kuang)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <memory>

namespace ppspeech {

struct TimerImpl;

class Timer {
    public:
        Timer();
        ~Timer();

        void Reset() const;

        // time in seconds
        double Elapsed() const;

    private:
        std::unique_ptr<TimerImpl> impl_;
};

} //namespace ppspeech

================================================
FILE: runtime/engine/kaldi/CMakeLists.txt
================================================
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}
)

add_subdirectory(base)
add_subdirectory(util)
if(WITH_ASR)
    add_subdirectory(lat)
    add_subdirectory(fstext)
    add_subdirectory(decoder)
    add_subdirectory(lm)

    add_subdirectory(fstbin)
    add_subdirectory(lmbin)
endif()


================================================
FILE: runtime/engine/kaldi/base/CMakeLists.txt
================================================

add_library(kaldi-base  
  io-funcs.cc
  kaldi-error.cc
  kaldi-math.cc
  kaldi-utils.cc
  timer.cc)

================================================
FILE: runtime/engine/kaldi/base/io-funcs-inl.h
================================================
// base/io-funcs-inl.h

// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
//                      Jan Silovsky;   Yanmin Qian;
//                      Johns Hopkins University (Author: Daniel Povey)
//                2016  Xiaohui Zhang

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_BASE_IO_FUNCS_INL_H_
#define KALDI_BASE_IO_FUNCS_INL_H_ 1

// Do not include this file directly.  It is included by base/io-funcs.h

#include <limits>
#include <vector>

namespace kaldi {

// Template that covers integers.
template<class T>  void WriteBasicType(std::ostream &os,
                                       bool binary, T t) {
  // Compile time assertion that this is not called with a wrong type.
  KALDI_ASSERT_IS_INTEGER_TYPE(T);
  if (binary) {
    char len_c = (std::numeric_limits<T>::is_signed ? 1 :  -1)
        * static_cast<char>(sizeof(t));
    os.put(len_c);
    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
  } else {
    if (sizeof(t) == 1)
      os << static_cast<int16>(t) << " ";
    else
      os << t << " ";
  }
  if (os.fail()) {
    KALDI_ERR << "Write failure in WriteBasicType.";
  }
}

// Template that covers integers.
template<class T> inline void ReadBasicType(std::istream &is,
                                            bool binary, T *t) {
  KALDI_PARANOID_ASSERT(t != NULL);
  // Compile time assertion that this is not called with a wrong type.
  KALDI_ASSERT_IS_INTEGER_TYPE(T);
  if (binary) {
    int len_c_in = is.get();
    if (len_c_in == -1)
      KALDI_ERR << "ReadBasicType: encountered end of stream.";
    char len_c = static_cast<char>(len_c_in), len_c_expected
      = (std::numeric_limits<T>::is_signed ? 1 :  -1)
      * static_cast<char>(sizeof(*t));
    if (len_c !=  len_c_expected) {
      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
                << static_cast<int>(len_c)
                << " vs. " << static_cast<int>(len_c_expected)
                << ".  You can change this code to successfully"
                << " read it later, if needed.";
      // insert code here to read "wrong" type.  Might have a switch statement.
    }
    is.read(reinterpret_cast<char *>(t), sizeof(*t));
  } else {
    if (sizeof(*t) == 1) {
      int16 i;
      is >> i;
      *t = i;
    } else {
      is >> *t;
    }
  }
  if (is.fail()) {
    KALDI_ERR << "Read failure in ReadBasicType, file position is "
              << is.tellg() << ", next char is " << is.peek();
  }
}

// Template that covers integers.
template<class T>
inline void WriteIntegerPairVector(std::ostream &os, bool binary,
                                   const std::vector<std::pair<T, T> > &v) {
  // Compile time assertion that this is not called with a wrong type.
  KALDI_ASSERT_IS_INTEGER_TYPE(T);
  if (binary) {
    char sz = sizeof(T);  // this is currently just a check.
    os.write(&sz, 1);
    int32 vecsz = static_cast<int32>(v.size());
    KALDI_ASSERT((size_t)vecsz == v.size());
    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
    if (vecsz != 0) {
      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T) * vecsz * 2);
    }
  } else {
    // focus here is on prettiness of text form rather than
    // efficiency of reading-in.
    // reading-in is dominated by low-level operations anyway:
    // for efficiency use binary.
    os << "[ ";
    typename std::vector<std::pair<T, T> >::const_iterator iter = v.begin(),
                                                            end = v.end();
    for (; iter != end; ++iter) {
      if (sizeof(T) == 1)
        os << static_cast<int16>(iter->first) << ','
           << static_cast<int16>(iter->second) << ' ';
      else
        os << iter->first << ','
           << iter->second << ' ';
    }
    os << "]\n";
  }
  if (os.fail()) {
    KALDI_ERR << "Write failure in WriteIntegerPairVector.";
  }
}

// Template that covers integers.
template<class T>
inline void ReadIntegerPairVector(std::istream &is, bool binary,
                                  std::vector<std::pair<T, T> > *v) {
  KALDI_ASSERT_IS_INTEGER_TYPE(T);
  KALDI_ASSERT(v != NULL);
  if (binary) {
    int sz = is.peek();
    if (sz == sizeof(T)) {
      is.get();
    } else {  // this is currently just a check.
      KALDI_ERR << "ReadIntegerPairVector: expected to see type of size "
                << sizeof(T) << ", saw instead " << sz << ", at file position "
                << is.tellg();
    }
    int32 vecsz;
    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
    if (is.fail() || vecsz < 0) goto bad;
    v->resize(vecsz);
    if (vecsz > 0) {
      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz*2);
    }
  } else {
    std::vector<std::pair<T, T> > tmp_v;  // use temporary so v doesn't use extra memory
                           // due to resizing.
    is >> std::ws;
    if (is.peek() != static_cast<int>('[')) {
      KALDI_ERR << "ReadIntegerPairVector: expected to see [, saw "
                << is.peek() << ", at file position " << is.tellg();
    }
    is.get();  // consume the '['.
    is >> std::ws;  // consume whitespace.
    while (is.peek() != static_cast<int>(']')) {
      if (sizeof(T) == 1) {  // read/write chars as numbers.
        int16 next_t1, next_t2;
        is >> next_t1;
        if (is.fail()) goto bad;
        if (is.peek() != static_cast<int>(','))
          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
                    << is.peek() << ", at file position " << is.tellg();
        is.get();  // consume the ','.
        is >> next_t2 >> std::ws;
        if (is.fail()) goto bad;
        else
            tmp_v.push_back(std::make_pair<T, T>((T)next_t1, (T)next_t2));
      } else {
        T next_t1, next_t2;
        is >> next_t1;
        if (is.fail()) goto bad;
        if (is.peek() != static_cast<int>(','))
          KALDI_ERR << "ReadIntegerPairVector: expected to see ',', saw "
                    << is.peek() << ", at file position " << is.tellg();
        is.get();  // consume the ','.
        is >> next_t2 >> std::ws;
        if (is.fail()) goto bad;
        else
            tmp_v.push_back(std::pair<T, T>(next_t1, next_t2));
      }
    }
    is.get();  // get the final ']'.
    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
    // uses less permanent memory.
  }
  if (!is.fail()) return;
 bad:
  KALDI_ERR << "ReadIntegerPairVector: read failure at file position "
            << is.tellg();
}

template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
                                                 const std::vector<T> &v) {
  // Compile time assertion that this is not called with a wrong type.
  KALDI_ASSERT_IS_INTEGER_TYPE(T);
  if (binary) {
    char sz = sizeof(T);  // this is currently just a check.
    os.write(&sz, 1);
    int32 vecsz = static_cast<int32>(v.size());
    KALDI_ASSERT((size_t)vecsz == v.size());
    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
    if (vecsz != 0) {
      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T)*vecsz);
    }
  } else {
    // focus here is on prettiness of text form rather than
    // efficiency of reading-in.
    // reading-in is dominated by low-level operations anyway:
    // for efficiency use binary.
    os << "[ ";
    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
    for (; iter != end; ++iter) {
      if (sizeof(T) == 1)
        os << static_cast<int16>(*iter) << " ";
      else
        os << *iter << " ";
    }
    os << "]\n";
  }
  if (os.fail()) {
    KALDI_ERR << "Write failure in WriteIntegerVector.";
  }
}


template<class T> inline void ReadIntegerVector(std::istream &is,
                                                bool binary,
                                                std::vector<T> *v) {
  KALDI_ASSERT_IS_INTEGER_TYPE(T);
  KALDI_ASSERT(v != NULL);
  if (binary) {
    int sz = is.peek();
    if (sz == sizeof(T)) {
      is.get();
    } else {  // this is currently just a check.
      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
                << sizeof(T) << ", saw instead " << sz << ", at file position "
                << is.tellg();
    }
    int32 vecsz;
    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
    if (is.fail() || vecsz < 0) goto bad;
    v->resize(vecsz);
    if (vecsz > 0) {
      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz);
    }
  } else {
    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
                           // due to resizing.
    is >> std::ws;
    if (is.peek() != static_cast<int>('[')) {
      KALDI_ERR << "ReadIntegerVector: expected to see [, saw "
                << is.peek() << ", at file position " << is.tellg();
    }
    is.get();  // consume the '['.
    is >> std::ws;  // consume whitespace.
    while (is.peek() != static_cast<int>(']')) {
      if (sizeof(T) == 1) {  // read/write chars as numbers.
        int16 next_t;
        is >> next_t >> std::ws;
        if (is.fail()) goto bad;
        else
            tmp_v.push_back((T)next_t);
      } else {
        T next_t;
        is >> next_t >> std::ws;
        if (is.fail()) goto bad;
        else
            tmp_v.push_back(next_t);
      }
    }
    is.get();  // get the final ']'.
    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
    // uses less permanent memory.
  }
  if (!is.fail()) return;
 bad:
  KALDI_ERR << "ReadIntegerVector: read failure at file position "
            << is.tellg();
}


// Initialize an opened stream for writing by writing an optional binary
// header and modifying the floating-point precision.
inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
  // This does not throw exceptions (does not check for errors).
  if (binary) {
    os.put('\0');
    os.put('B');
  }
  // Note, in non-binary mode we may at some point want to mess with
  // the precision a bit.
  // 7 is a bit more than the precision of float..
  if (os.precision() < 7)
    os.precision(7);
}

/// Initialize an opened stream for reading by detecting the binary header and
// setting the "binary" value appropriately.
inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
  // Sets the 'binary' variable.
  // Throws exception in the very unusual situation that stream
  // starts with '\0' but not then 'B'.

  if (is.peek() == '\0') {  // seems to be binary
    is.get();
    if (is.peek() != 'B') {
      return false;
    }
    is.get();
    *binary = true;
    return true;
  } else {
    *binary = false;
    return true;
  }
}

}  // end namespace kaldi.

#endif  // KALDI_BASE_IO_FUNCS_INL_H_


================================================
FILE: runtime/engine/kaldi/base/io-funcs.cc
================================================
// base/io-funcs.cc

// Copyright 2009-2011  Microsoft Corporation;  Saarland University

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "base/io-funcs.h"
#include "base/kaldi-math.h"

namespace kaldi {

template<>
void WriteBasicType<bool>(std::ostream &os, bool binary, bool b) {
  os << (b ? "T":"F");
  if (!binary) os << " ";
  if (os.fail())
    KALDI_ERR << "Write failure in WriteBasicType<bool>";
}

template<>
void ReadBasicType<bool>(std::istream &is, bool binary, bool *b) {
  KALDI_PARANOID_ASSERT(b != NULL);
  if (!binary) is >> std::ws;  // eat up whitespace.
  char c = is.peek();
  if (c == 'T') {
      *b = true;
      is.get();
  } else if (c == 'F') {
      *b = false;
      is.get();
  } else {
    KALDI_ERR << "Read failure in ReadBasicType<bool>, file position is "
              << is.tellg() << ", next char is " << CharToString(c);
  }
}

template<>
void WriteBasicType<float>(std::ostream &os, bool binary, float f) {
  if (binary) {
    char c = sizeof(f);
    os.put(c);
    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
  } else {
    os << f << " ";
  }
}

template<>
void WriteBasicType<double>(std::ostream &os, bool binary, double f) {
  if (binary) {
    char c = sizeof(f);
    os.put(c);
    os.write(reinterpret_cast<const char *>(&f), sizeof(f));
  } else {
    os << f << " ";
  }
}

template<>
void ReadBasicType<float>(std::istream &is, bool binary, float *f) {
  KALDI_PARANOID_ASSERT(f != NULL);
  if (binary) {
    double d;
    int c = is.peek();
    if (c == sizeof(*f)) {
      is.get();
      is.read(reinterpret_cast<char*>(f), sizeof(*f));
    } else if (c == sizeof(d)) {
      ReadBasicType(is, binary, &d);
      *f = d;
    } else {
      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
                << ", at file position " << is.tellg();
    }
  } else {
    is >> *f;
  }
  if (is.fail()) {
    KALDI_ERR << "ReadBasicType: failed to read, at file position "
              << is.tellg();
  }
}

template<>
void ReadBasicType<double>(std::istream &is, bool binary, double *d) {
  KALDI_PARANOID_ASSERT(d != NULL);
  if (binary) {
    float f;
    int c = is.peek();
    if (c == sizeof(*d)) {
      is.get();
      is.read(reinterpret_cast<char*>(d), sizeof(*d));
    } else if (c == sizeof(f)) {
      ReadBasicType(is, binary, &f);
      *d = f;
    } else {
      KALDI_ERR << "ReadBasicType: expected float, saw " << is.peek()
                << ", at file position " << is.tellg();
    }
  } else {
    is >> *d;
  }
  if (is.fail()) {
    KALDI_ERR << "ReadBasicType: failed to read, at file position "
              << is.tellg();
  }
}

void CheckToken(const char *token) {
  if (*token == '\0')
    KALDI_ERR << "Token is empty (not a valid token)";
  const char *orig_token = token;
  while (*token != '\0') {
    if (::isspace(*token))
      KALDI_ERR << "Token is not a valid token (contains space): '"
                << orig_token << "'";
    token++;
  }
}

void WriteToken(std::ostream &os, bool binary, const char *token) {
  // binary mode is ignored;
  // we use space as termination character in either case.
  KALDI_ASSERT(token != NULL);
  CheckToken(token);  // make sure it's valid (can be read back)
  os << token << " ";
  if (os.fail()) {
    KALDI_ERR << "Write failure in WriteToken.";
  }
}

int Peek(std::istream &is, bool binary) {
  if (!binary) is >> std::ws;  // eat up whitespace.
  return is.peek();
}

void WriteToken(std::ostream &os, bool binary, const std::string & token) {
  WriteToken(os, binary, token.c_str());
}

void ReadToken(std::istream &is, bool binary, std::string *str) {
  KALDI_ASSERT(str != NULL);
  if (!binary) is >> std::ws;  // consume whitespace.
  is >> *str;
  if (is.fail()) {
    KALDI_ERR << "ReadToken, failed to read token at file position "
              << is.tellg();
  }
  if (!isspace(is.peek())) {
    KALDI_ERR << "ReadToken, expected space after token, saw instead "
              << CharToString(static_cast<char>(is.peek()))
              << ", at file position " << is.tellg();
  }
  is.get();  // consume the space.
}

int PeekToken(std::istream &is, bool binary) {
  if (!binary) is >> std::ws;  // consume whitespace.
  bool read_bracket;
  if (static_cast<char>(is.peek()) == '<') {
    read_bracket = true;
    is.get();
  } else {
    read_bracket = false;
  }
  int ans = is.peek();
  if (read_bracket) {
    if (!is.unget()) {
      // Clear the bad bit. This code can be (and is in fact) reached, since the
      // C++ standard does not guarantee that a call to unget() must succeed.
      is.clear();
    }
  }
  return ans;
}


void ExpectToken(std::istream &is, bool binary, const char *token) {
  int pos_at_start = is.tellg();
  KALDI_ASSERT(token != NULL);
  CheckToken(token);  // make sure it's valid (can be read back)
  if (!binary) is >> std::ws;  // consume whitespace.
  std::string str;
  is >> str;
  is.get();  // consume the space.
  if (is.fail()) {
    KALDI_ERR << "Failed to read token [started at file position "
              << pos_at_start << "], expected " << token;
  }
  // The second half of the '&&' expression below is so that if we're expecting
  // "<Foo>", we will accept "Foo>" instead.  This is so that the model-reading
  // code will tolerate errors in PeekToken where is.unget() failed; search for
  // is.clear() in PeekToken() for an explanation.
  if (strcmp(str.c_str(), token) != 0 &&
      !(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
    KALDI_ERR << "Expected token \"" << token << "\", got instead \""
              << str <<"\".";
  }
}

void ExpectToken(std::istream &is, bool binary, const std::string &token) {
  ExpectToken(is, binary, token.c_str());
}

}  // end namespace kaldi


================================================
FILE: runtime/engine/kaldi/base/io-funcs.h
================================================
// base/io-funcs.h

// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
//                      Jan Silovsky;   Yanmin Qian
//                2016  Xiaohui Zhang

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_BASE_IO_FUNCS_H_
#define KALDI_BASE_IO_FUNCS_H_

// This header only contains some relatively low-level I/O functions.
// The full Kaldi I/O declarations are in ../util/kaldi-io.h
// and ../util/kaldi-table.h
// They were put in util/ in order to avoid making the Matrix library
// dependent on them.

#include <cctype>
#include <vector>
#include <string>

#include "base/kaldi-common.h"
#include "base/io-funcs-inl.h"

namespace kaldi {


/*
  This comment describes the Kaldi approach to I/O.  All objects can be written
  and read in two modes: binary and text.  In addition we want to make the I/O
  work if we redefine the typedef "BaseFloat" between floats and doubles.
  We also want to have control over whitespace in text mode without affecting
  the meaning of the file, for pretty-printing purposes.

  Errors are handled by throwing a KaldiFatalError exception.

  For integer and floating-point types (and boolean values):

   WriteBasicType(std::ostream &, bool binary, const T&);
   ReadBasicType(std::istream &, bool binary, T*);

  and we expect these functions to be defined in such a way that they work when
  the type T changes between float and double, so you can read float into double
  and vice versa].  Note that for efficiency and space-saving reasons, the Vector
  and Matrix classes do not use these functions [but they preserve the type
  interchangeability in their own way]

  For a class (or struct) C:
  class C {
  ..
    Write(std::ostream &, bool binary, [possibly extra optional args for specific classes]) const;
    Read(std::istream &, bool binary, [possibly extra optional args for specific classes]);
  ..
  }
  NOTE: The only actual optional args we used are the "add" arguments in
  Vector/Matrix classes, which specify whether we should sum the data already
  in the class with the data being read.

  For types which are typedef's involving stl classes, I/O is as follows:
  typedef std::vector<std::pair<A, B> > MyTypedefName;

  The user should define something like:

   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);

  The user would have to write these functions.

  For a type std::vector<T>:

   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T> &v);
   void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);

  For other types, e.g. vectors of pairs, the user should create a routine of the
  type WriteMyTypedefName.  This is to avoid introducing confusing templated functions;
  we could easily create templated functions to handle most of these cases but they
  would have to share the same name.

  It also often happens that the user needs to write/read special tokens as part
  of a file.  These might be class headers, or separators/identifiers in the class.
  We provide special functions for manipulating these.  These special tokens must
  be nonempty and must not contain any whitespace.

    void WriteToken(std::ostream &os, bool binary, const char*);
    void WriteToken(std::ostream &os, bool binary, const std::string & token);
    int Peek(std::istream &is, bool binary);
    void ReadToken(std::istream &is, bool binary, std::string *str);
    void PeekToken(std::istream &is, bool binary, std::string *str);

  WriteToken writes the token and one space (whether in binary or text mode).

  Peek returns the first character of the next token, by consuming whitespace
  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
  it doesn't throw.  It's useful if a class can have various forms based on
  typedefs and virtual classes, and wants to know which version to read.

  ReadToken allows the caller to obtain the next token.  PeekToken works just
  like ReadToken, but seeks back to the beginning of the token.  A subsequent
  call to ReadToken will read the same token again.  This is useful when
  different object types are written to the same file; using PeekToken one can
  decide which of the objects to read.

  There is currently no special functionality for writing/reading strings (where the strings
  contain data rather than "special tokens" that are whitespace-free and nonempty).  This is
  because Kaldi is structured in such a way that strings don't appear, except as OpenFst symbol
  table entries (and these have their own format).


  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
  such as int and size_t, that are machine-independent -- at least not
  if you want your file formats to port between machines.  Use int32 and
  int64 where necessary.  There is no way to detect this using compile-time
  assertions because C++ only keeps track of the internal representation of
  the type.
*/

/// \addtogroup io_funcs_basic
/// @{


/// WriteBasicType is the name of the write function for bool, integer types,
/// and floating-point types. They all throw on error.
template<class T> void WriteBasicType(std::ostream &os, bool binary, T t);

/// ReadBasicType is the name of the read function for bool, integer types,
/// and floating-point types. They all throw on error.
template<class T> void ReadBasicType(std::istream &is, bool binary, T *t);


// Declare specialization for bool.
template<>
void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);

template <>
void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);

// Declare specializations for float and double.
template<>
void WriteBasicType<float>(std::ostream &os, bool binary, float f);

template<>
void WriteBasicType<double>(std::ostream &os, bool binary, double f);

template<>
void ReadBasicType<float>(std::istream &is, bool binary, float *f);

template<>
void ReadBasicType<double>(std::istream &is, bool binary, double *f);

// Define ReadBasicType that accepts an "add" parameter to add to
// the destination.  Caution: if used in Read functions, be careful
// to initialize the parameters concerned to zero in the default
// constructor.
template<class T>
inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
  if (!add) {
    ReadBasicType(is, binary, t);
  } else {
    T tmp = T(0);
    ReadBasicType(is, binary, &tmp);
    *t += tmp;
  }
}

/// Function for writing STL vectors of integer types.
template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
                                                 const std::vector<T> &v);

/// Function for reading STL vector of integer types.
template<class T> inline void ReadIntegerVector(std::istream &is, bool binary,
                                                std::vector<T> *v);

/// Function for writing STL vectors of pairs of integer types.
template<class T>
inline void WriteIntegerPairVector(std::ostream &os, bool binary,
                                   const std::vector<std::pair<T, T> > &v);

/// Function for reading STL vector of pairs of integer types.
template<class T>
inline void ReadIntegerPairVector(std::istream &is, bool binary,
                                  std::vector<std::pair<T, T> > *v);

/// The WriteToken functions are for writing nonempty sequences of non-space
/// characters. They are not for general strings.
void WriteToken(std::ostream &os, bool binary, const char *token);
void WriteToken(std::ostream &os, bool binary, const std::string & token);

/// Peek consumes whitespace (if binary == false) and then returns the peek()
/// value of the stream.
int Peek(std::istream &is, bool binary);

/// ReadToken gets the next token and puts it in str (exception on failure). If
/// PeekToken() had been previously called, it is possible that the stream had
/// failed to unget the starting '<' character. In this case ReadToken() returns
/// the token string without the leading '<'. You must be prepared to handle
/// this case. ExpectToken() handles this internally, and is not affected.
void ReadToken(std::istream &is, bool binary, std::string *token);

/// PeekToken will return the first character of the next token, or -1 if end of
/// file.  It's the same as Peek(), except if the first character is '<' it will
/// skip over it and will return the next character. It will attempt to unget
/// the '<' so the stream is where it was before you did PeekToken(), however,
/// this is not guaranteed (see ReadToken()).
int PeekToken(std::istream &is, bool binary);

/// ExpectToken tries to read in the given token, and throws an exception
/// on failure.
void ExpectToken(std::istream &is, bool binary, const char *token);
void ExpectToken(std::istream &is, bool binary, const std::string & token);

/// ExpectPretty attempts to read the text in "token", but only in non-binary
/// mode.  Throws exception on failure.  It expects an exact match except that
/// arbitrary whitespace matches arbitrary whitespace.
void ExpectPretty(std::istream &is, bool binary, const char *token);
void ExpectPretty(std::istream &is, bool binary, const std::string & token);

/// @} end "addtogroup io_funcs_basic"


/// InitKaldiOutputStream initializes an opened stream for writing by writing an
/// optional binary header and modifying the floating-point precision; it will
/// typically not be called by users directly.
inline void InitKaldiOutputStream(std::ostream &os, bool binary);

/// InitKaldiInputStream initializes an opened stream for reading by detecting
/// the binary header and setting the "binary" value appropriately;
/// It will typically not be called by users directly.
inline bool InitKaldiInputStream(std::istream &is, bool *binary);

}  // end namespace kaldi.
#endif  // KALDI_BASE_IO_FUNCS_H_


================================================
FILE: runtime/engine/kaldi/base/kaldi-common.h
================================================
// base/kaldi-common.h

// Copyright 2009-2011 Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_BASE_KALDI_COMMON_H_
#define KALDI_BASE_KALDI_COMMON_H_ 1

#include <cstddef>
#include <cstdlib>
#include <cstring>  // C string stuff like strcpy
#include <string>
#include <sstream>
#include <stdexcept>
#include <cassert>
#include <vector>
#include <iostream>
#include <fstream>

#include "base/kaldi-utils.h"
#include "base/kaldi-error.h"
#include "base/kaldi-types.h"
#include "base/io-funcs.h"
#include "base/kaldi-math.h"
#include "base/timer.h"

#endif  // KALDI_BASE_KALDI_COMMON_H_


================================================
FILE: runtime/engine/kaldi/base/kaldi-error.cc
================================================
// base/kaldi-error.cc

// Copyright 2019 LAIX (Yi Sun)
// Copyright 2019 SmartAction LLC (kkm)
// Copyright 2016 Brno University of Technology (author: Karel Vesely)
// Copyright 2009-2011  Microsoft Corporation;  Lukas Burget;  Ondrej Glembek

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifdef HAVE_EXECINFO_H
#include <execinfo.h> // To get stack trace in error messages.
// If this #include fails there is an error in the Makefile, it does not
// support your platform well. Make sure HAVE_EXECINFO_H is undefined,
// and the code will compile.
#ifdef HAVE_CXXABI_H
#include <cxxabi.h> // For name demangling.
// Useful to decode the stack trace, but only used if we have execinfo.h
#endif // HAVE_CXXABI_H
#endif // HAVE_EXECINFO_H

#include "base/kaldi-common.h"
#include "base/kaldi-error.h"
#include "base/version.h"

namespace kaldi {

/***** GLOBAL VARIABLES FOR LOGGING *****/

int32 g_kaldi_verbose_level = 0;
static std::string program_name;
static LogHandler log_handler = NULL;

void SetProgramName(const char *basename) {
  // Using the 'static std::string' for the program name is mostly harmless,
  // because (a) Kaldi logging is undefined before main(), and (b) no stdc++
  // string implementation has been found in the wild that would not be just
  // an empty string when zero-initialized but not yet constructed.
  program_name = basename;
}

/***** HELPER FUNCTIONS *****/

// Trim filename to at most 1 trailing directory long. Given a filename like
// "/a/b/c/d/e/f.cc", return "e/f.cc". Support both '/' and '\' as the path
// separator.
static const char *GetShortFileName(const char *path) {
  if (path == nullptr)
    return "";

  const char *prev = path, *last = path;
  while ((path = std::strpbrk(path, "\\/")) != nullptr) {
    ++path;
    prev = last;
    last = path;
  }
  return prev;
}

/***** STACK TRACE *****/

namespace internal {
bool LocateSymbolRange(const std::string &trace_name, size_t *begin,
                       size_t *end) {
  // Find the first '_' with leading ' ' or '('.
  *begin = std::string::npos;
  for (size_t i = 1; i < trace_name.size(); i++) {
    if (trace_name[i] != '_') {
      continue;
    }
    if (trace_name[i - 1] == ' ' || trace_name[i - 1] == '(') {
      *begin = i;
      break;
    }
  }
  if (*begin == std::string::npos) {
    return false;
  }
  *end = trace_name.find_first_of(" +", *begin);
  return *end != std::string::npos;
}
} // namespace internal

#ifdef HAVE_EXECINFO_H
static std::string Demangle(std::string trace_name) {
#ifndef HAVE_CXXABI_H
  return trace_name;
#else  // HAVE_CXXABI_H
  // Try demangle the symbol. We are trying to support the following formats
  // produced by different platforms:
  //
  // Linux:
  //   ./kaldi-error-test(_ZN5kaldi13UnitTestErrorEv+0xb) [0x804965d]
  //
  // Mac:
  //   0 server 0x000000010f67614d _ZNK5kaldi13MessageLogger10LogMessageEv + 813
  //
  // We want to extract the name e.g., '_ZN5kaldi13UnitTestErrorEv' and
  // demangle it info a readable name like kaldi::UnitTextError.
  size_t begin, end;
  if (!internal::LocateSymbolRange(trace_name, &begin, &end)) {
    return trace_name;
  }
  std::string symbol = trace_name.substr(begin, end - begin);
  int status;
  char *demangled_name = abi::__cxa_demangle(symbol.c_str(), 0, 0, &status);
  if (status == 0 && demangled_name != nullptr) {
    symbol = demangled_name;
    free(demangled_name);
  }
  return trace_name.substr(0, begin) + symbol +
         trace_name.substr(end, std::string::npos);
#endif // HAVE_CXXABI_H
}
#endif // HAVE_EXECINFO_H

static std::string KaldiGetStackTrace() {
  std::string ans;
#ifdef HAVE_EXECINFO_H
  const size_t KALDI_MAX_TRACE_SIZE = 50;
  const size_t KALDI_MAX_TRACE_PRINT = 50; // Must be even.
  // Buffer for the trace.
  void *trace[KALDI_MAX_TRACE_SIZE];
  // Get the trace.
  size_t size = backtrace(trace, KALDI_MAX_TRACE_SIZE);
  // Get the trace symbols.
  char **trace_symbol = backtrace_symbols(trace, size);
  if (trace_symbol == NULL)
    return ans;

  // Compose a human-readable backtrace string.
  ans += "[ Stack-Trace: ]\n";
  if (size <= KALDI_MAX_TRACE_PRINT) {
    for (size_t i = 0; i < size; i++) {
      ans += Demangle(trace_symbol[i]) + "\n";
    }
  } else { // Print out first+last (e.g.) 5.
    for (size_t i = 0; i < KALDI_MAX_TRACE_PRINT / 2; i++) {
      ans += Demangle(trace_symbol[i]) + "\n";
    }
    ans += ".\n.\n.\n";
    for (size_t i = size - KALDI_MAX_TRACE_PRINT / 2; i < size; i++) {
      ans += Demangle(trace_symbol[i]) + "\n";
    }
    if (size == KALDI_MAX_TRACE_SIZE)
      ans += ".\n.\n.\n"; // Stack was too long, probably a bug.
  }

  // We must free the array of pointers allocated by backtrace_symbols(),
  // but not the strings themselves.
  free(trace_symbol);
#endif // HAVE_EXECINFO_H
  return ans;
}

/***** KALDI LOGGING *****/

MessageLogger::MessageLogger(LogMessageEnvelope::Severity severity,
                             const char *func, const char *file, int32 line) {
  // Obviously, we assume the strings survive the destruction of this object.
  envelope_.severity = severity;
  envelope_.func = func;
  envelope_.file = GetShortFileName(file); // Points inside 'file'.
  envelope_.line = line;
}

void MessageLogger::LogMessage() const {
  // Send to the logging handler if provided.
  if (log_handler != NULL) {
    log_handler(envelope_, GetMessage().c_str());
    return;
  }

  // Otherwise, use the default Kaldi logging.
  // Build the log-message header.
  std::stringstream full_message;
  if (envelope_.severity > LogMessageEnvelope::kInfo) {
    full_message << "VLOG[" << envelope_.severity << "] (";
  } else {
    switch (envelope_.severity) {
    case LogMessageEnvelope::kInfo:
      full_message << "LOG (";
      break;
    case LogMessageEnvelope::kWarning:
      full_message << "WARNING (";
      break;
    case LogMessageEnvelope::kAssertFailed:
      full_message << "ASSERTION_FAILED (";
      break;
    case LogMessageEnvelope::kError:
    default: // If not the ERROR, it still an error!
      full_message << "ERROR (";
      break;
    }
  }
  // Add other info from the envelope and the message text.
  full_message << program_name.c_str() << "[" KALDI_VERSION "]" << ':'
               << envelope_.func << "():" << envelope_.file << ':'
               << envelope_.line << ") " << GetMessage().c_str();

  // Add stack trace for errors and assertion failures, if available.
  if (envelope_.severity < LogMessageEnvelope::kWarning) {
    const std::string &stack_trace = KaldiGetStackTrace();
    if (!stack_trace.empty()) {
      full_message << "\n\n" << stack_trace;
    }
  }

  // Print the complete message to stderr.
  full_message << "\n";
  std::cerr << full_message.str();
}

/***** KALDI ASSERTS *****/

void KaldiAssertFailure_(const char *func, const char *file, int32 line,
                         const char *cond_str) {
  MessageLogger::Log() =
      MessageLogger(LogMessageEnvelope::kAssertFailed, func, file, line)
      << "Assertion failed: (" << cond_str << ")";
  fflush(NULL); // Flush all pending buffers, abort() may not flush stderr.
  std::abort();
}

/***** THIRD-PARTY LOG-HANDLER *****/

LogHandler SetLogHandler(LogHandler handler) {
  LogHandler old_handler = log_handler;
  log_handler = handler;
  return old_handler;
}

} // namespace kaldi


================================================
FILE: runtime/engine/kaldi/base/kaldi-error.h
================================================
// base/kaldi-error.h

// Copyright 2019 LAIX (Yi Sun)
// Copyright 2019 SmartAction LLC (kkm)
// Copyright 2016 Brno University of Technology (author: Karel Vesely)
// Copyright 2009-2011  Microsoft Corporation;  Ondrej Glembek;  Lukas Burget;
//                      Saarland University

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_BASE_KALDI_ERROR_H_
#define KALDI_BASE_KALDI_ERROR_H_ 1

#include <cstdio>
#include <cstring>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>

#include "base/kaldi-types.h"
#include "base/kaldi-utils.h"
/* Important that this file does not depend on any other kaldi headers. */

#ifdef _MSC_VER
#define __func__ __FUNCTION__
#endif

namespace kaldi {

/// \addtogroup error_group
/// @{

/***** PROGRAM NAME AND VERBOSITY LEVEL *****/

/// Called by ParseOptions to set base name (no directory) of the executing
/// program. The name is printed in logging code along with every message,
/// because in our scripts, we often mix together the stderr of many programs.
/// This function is very thread-unsafe.
void SetProgramName(const char *basename);

/// This is set by util/parse-options.{h,cc} if you set --verbose=? option.
/// Do not use directly, prefer {Get,Set}VerboseLevel().
extern int32 g_kaldi_verbose_level;

/// Get verbosity level, usually set via command line '--verbose=' switch.
inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }

/// This should be rarely used, except by programs using Kaldi as library;
/// command-line programs set the verbose level automatically from ParseOptions.
inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }

/***** KALDI LOGGING *****/

/// Log message severity and source location info.
struct LogMessageEnvelope {
  /// Message severity. In addition to these levels, positive values (1 to 6)
  /// specify verbose logging level. Verbose messages are produced only when
  /// SetVerboseLevel() has been called to set logging level to at least the
  /// corresponding value.
  enum Severity {
    kAssertFailed = -3, //!< Assertion failure. abort() will be called.
    kError = -2,        //!< Fatal error. KaldiFatalError will be thrown.
    kWarning = -1,      //!< Indicates a recoverable but abnormal condition.
    kInfo = 0,          //!< Informational message.
  };
  int severity;     //!< A Severity value, or positive verbosity level.
  const char *func; //!< Name of the function invoking the logging.
  const char *file; //!< Source file name with up to 1 leading directory.
  int32 line;       //<! Line number in the source file.
};

/// Kaldi fatal runtime error exception. This exception is thrown from any use
/// of the KALDI_ERR logging macro after the logging function, either set by
/// SetLogHandler(), or the Kaldi's internal one, has returned.
class KaldiFatalError : public std::runtime_error {
public:
  explicit KaldiFatalError(const std::string &message)
      : std::runtime_error(message) {}
  explicit KaldiFatalError(const char *message) : std::runtime_error(message) {}

  /// Returns the exception name, "kaldi::KaldiFatalError".
  virtual const char *what() const noexcept override {
    return "kaldi::KaldiFatalError";
  }

  /// Returns the Kaldi error message logged by KALDI_ERR.
  const char *KaldiMessage() const { return std::runtime_error::what(); }
};

// Class MessageLogger is the workhorse behind the KALDI_ASSERT, KALDI_ERR,
// KALDI_WARN, KALDI_LOG and KALDI_VLOG macros. It formats the message, then
// either prints it to stderr or passes to the custom logging handler if
// provided. Then, in case of the error, throws a KaldiFatalError exception, or
// in case of failed KALDI_ASSERT, calls std::abort().
class MessageLogger {
public:
  /// The constructor stores the message's "envelope", a set of data which
  // identifies the location in source which is sending the message to log.
  // The pointers to strings are stored internally, and not owned or copied,
  // so that their storage must outlive this object.
  MessageLogger(LogMessageEnvelope::Severity severity, const char *func,
                const char *file, int32 line);

  // The stream insertion operator, used in e.g. 'KALDI_LOG << "Message"'.
  template <typename T> MessageLogger &operator<<(const T &val) {
    ss_ << val;
    return *this;
  }

  // When assigned a MessageLogger, log its contents.
  struct Log final {
    void operator=(const MessageLogger &logger) { logger.LogMessage(); }
  };

  // When assigned a MessageLogger, log its contents and then throw
  // a KaldiFatalError.
  struct LogAndThrow final {
    [[noreturn]] void operator=(const MessageLogger &logger) {
      logger.LogMessage();
      throw KaldiFatalError(logger.GetMessage());
    }
  };

private:
  std::string GetMessage() const { return ss_.str(); }
  void LogMessage() const;

  LogMessageEnvelope envelope_;
  std::ostringstream ss_;
};

// Logging macros.
#define KALDI_ERR                                                              \
  ::kaldi::MessageLogger::LogAndThrow() = ::kaldi::MessageLogger(              \
      ::kaldi::LogMessageEnvelope::kError, __func__, __FILE__, __LINE__)
#define KALDI_WARN                                                             \
  ::kaldi::MessageLogger::Log() = ::kaldi::MessageLogger(                      \
      ::kaldi::LogMessageEnvelope::kWarning, __func__, __FILE__, __LINE__)
#define KALDI_LOG                                                              \
  ::kaldi::MessageLogger::Log() = ::kaldi::MessageLogger(                      \
      ::kaldi::LogMessageEnvelope::kInfo, __func__, __FILE__, __LINE__)
#define KALDI_VLOG(v)                                                          \
  if ((v) <= ::kaldi::GetVerboseLevel())                                       \
  ::kaldi::MessageLogger::Log() =                                              \
      ::kaldi::MessageLogger((::kaldi::LogMessageEnvelope::Severity)(v),       \
                             __func__, __FILE__, __LINE__)

/***** KALDI ASSERTS *****/

[[noreturn]] void KaldiAssertFailure_(const char *func, const char *file,
                                      int32 line, const char *cond_str);

// Note on KALDI_ASSERT and KALDI_PARANOID_ASSERT:
//
// A single block {} around if /else  does not work, because it causes
// syntax error (unmatched else block) in the following code:
//
// if (condition)
//   KALDI_ASSERT(condition2);
// else
//   SomethingElse();
//
// do {} while(0) -- note there is no semicolon at the end! -- works nicely,
// and compilers will be able to optimize the loop away (as the condition
// is always false).
//
// Also see KALDI_COMPILE_TIME_ASSERT, defined in base/kaldi-utils.h, and
// KALDI_ASSERT_IS_INTEGER_TYPE and KALDI_ASSERT_IS_FLOATING_TYPE, also defined
// there.
#ifdef PPS_DEBUG
#define KALDI_ASSERT(cond)                                                     \
  do {                                                                         \
    if (cond)                                                                  \
      (void)0;                                                                 \
    else                                                                       \
      ::kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond);       \
  } while (0)
#else
#define KALDI_ASSERT(cond) (void)0
#endif

// Some more expensive asserts only checked if this defined.
#ifdef KALDI_PARANOID
#define KALDI_PARANOID_ASSERT(cond)                                            \
  do {                                                                         \
    if (cond)                                                                  \
      (void)0;                                                                 \
    else                                                                       \
      ::kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond);       \
  } while (0)
#else
#define KALDI_PARANOID_ASSERT(cond) (void)0
#endif

/***** THIRD-PARTY LOG-HANDLER *****/

/// Type of third-party logging function.
typedef void (*LogHandler)(const LogMessageEnvelope &envelope,
                           const char *message);

/// Set logging handler. If called with a non-NULL function pointer, the
/// function pointed by it is called to send messages to a caller-provided log.
/// If called with a NULL pointer, restores default Kaldi error logging to
/// stderr. This function is obviously not thread safe; the log handler must be.
/// Returns a previously set logging handler pointer, or NULL.
LogHandler SetLogHandler(LogHandler);

/// @} end "addtogroup error_group"

// Functions within internal is exported for testing only, do not use.
namespace internal {
bool LocateSymbolRange(const std::string &trace_name, size_t *begin,
                       size_t *end);
} // namespace internal
} // namespace kaldi

#endif // KALDI_BASE_KALDI_ERROR_H_


================================================
FILE: runtime/engine/kaldi/base/kaldi-math.cc
================================================
// base/kaldi-math.cc

// Copyright 2009-2011  Microsoft Corporation;  Yanmin Qian;
//                      Saarland University;  Jan Silovsky

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "base/kaldi-math.h"
#ifndef _MSC_VER
#include <stdlib.h>
#include <unistd.h>
#endif
#include <string>
#include <mutex>

namespace kaldi {
// These routines are tested in matrix/matrix-test.cc

int32 RoundUpToNearestPowerOfTwo(int32 n) {
  KALDI_ASSERT(n > 0);
  n--;
  n |= n >> 1;
  n |= n >> 2;
  n |= n >> 4;
  n |= n >> 8;
  n |= n >> 16;
  return n+1;
}

static std::mutex _RandMutex;

int Rand(struct RandomState* state) {
#if !defined(_POSIX_THREAD_SAFE_FUNCTIONS)
  // On Windows and Cygwin, just call Rand()
  return rand();
#else
  if (state) {
    return rand_r(&(state->seed));
  } else {
    std::lock_guard<std::mutex> lock(_RandMutex);
    return rand();
  }
#endif
}

RandomState::RandomState() {
  // we initialize it as Rand() + 27437 instead of just Rand(), because on some
  // systems, e.g. at the very least Mac OSX Yosemite and later, it seems to be
  // the case that rand_r when initialized with rand() will give you the exact
  // same sequence of numbers that rand() will give if you keep calling rand()
  // after that initial call.  This can cause problems with repeated sequences.
  // For example if you initialize two RandomState structs one after the other
  // without calling rand() in between, they would give you the same sequence
  // offset by one (if we didn't have the "+ 27437" in the code).  27437 is just
  // a randomly chosen prime number.
  seed = Rand() + 27437;
}

bool WithProb(BaseFloat prob, struct RandomState* state) {
  KALDI_ASSERT(prob >= 0 && prob <= 1.1);  // prob should be <= 1.0,
  // but we allow slightly larger values that could arise from roundoff in
  // previous calculations.
  KALDI_COMPILE_TIME_ASSERT(RAND_MAX > 128 * 128);
  if (prob == 0) return false;
  else if (prob == 1.0) return true;
  else if (prob * RAND_MAX < 128.0) {
    // prob is very small but nonzero, and the "main algorithm"
    // wouldn't work that well.  So: with probability 1/128, we
    // return WithProb (prob * 128), else return false.
    if (Rand(state) < RAND_MAX / 128) {  // with probability 128...
      // Note: we know that prob * 128.0 < 1.0, because
      // we asserted RAND_MAX > 128 * 128.
      return WithProb(prob * 128.0);
    } else {
      return false;
    }
  } else {
    return (Rand(state) < ((RAND_MAX + static_cast<BaseFloat>(1.0)) * prob));
  }
}

int32 RandInt(int32 min_val, int32 max_val, struct RandomState* state) {
  // This is not exact.
  KALDI_ASSERT(max_val >= min_val);
  if (max_val == min_val) return min_val;

#ifdef _MSC_VER
  // RAND_MAX is quite small on Windows -> may need to handle larger numbers.
  if (RAND_MAX > (max_val-min_val)*8) {
        // *8 to avoid large inaccuracies in probability, from the modulus...
    return min_val +
      ((unsigned int)Rand(state) % (unsigned int)(max_val+1-min_val));
  } else {
    if ((unsigned int)(RAND_MAX*RAND_MAX) >
        (unsigned int)((max_val+1-min_val)*8)) {
        // *8 to avoid inaccuracies in probability, from the modulus...
      return min_val + ( (unsigned int)( (Rand(state)+RAND_MAX*Rand(state)))
                    % (unsigned int)(max_val+1-min_val));
    } else {
      KALDI_ERR << "rand_int failed because we do not support such large "
          "random numbers. (Extend this function).";
    }
  }
#else
  return min_val +
      (static_cast<int32>(Rand(state)) % static_cast<int32>(max_val+1-min_val));
#endif
}

// Returns poisson-distributed random number.
// Take care: this takes time proportional
// to lambda.  Faster algorithms exist but are more complex.
int32 RandPoisson(float lambda, struct RandomState* state) {
  // Knuth's algorithm.
  KALDI_ASSERT(lambda >= 0);
  float L = expf(-lambda), p = 1.0;
  int32 k = 0;
  do {
    k++;
    float u = RandUniform(state);
    p *= u;
  } while (p > L);
  return k-1;
}

void RandGauss2(float *a, float *b, RandomState *state) {
  KALDI_ASSERT(a);
  KALDI_ASSERT(b);
  float u1 = RandUniform(state);
  float u2 = RandUniform(state);
  u1 = sqrtf(-2.0f * logf(u1));
  u2 =  2.0f * M_PI * u2;
  *a = u1 * cosf(u2);
  *b = u1 * sinf(u2);
}

void RandGauss2(double *a, double *b, RandomState *state) {
  KALDI_ASSERT(a);
  KALDI_ASSERT(b);
  float a_float, b_float;
  // Just because we're using doubles doesn't mean we need super-high-quality
  // random numbers, so we just use the floating-point version internally.
  RandGauss2(&a_float, &b_float, state);
  *a = a_float;
  *b = b_float;
}


}  // end namespace kaldi


================================================
FILE: runtime/engine/kaldi/base/kaldi-math.h
================================================
// base/kaldi-math.h

// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
//                      Jan Silovsky;  Saarland University
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_BASE_KALDI_MATH_H_
#define KALDI_BASE_KALDI_MATH_H_ 1

#ifdef _MSC_VER
#include <float.h>
#endif

#include <cmath>
#include <limits>
#include <vector>

#include "base/kaldi-types.h"
#include "base/kaldi-common.h"


#ifndef DBL_EPSILON
#define DBL_EPSILON 2.2204460492503131e-16
#endif
#ifndef FLT_EPSILON
#define FLT_EPSILON 1.19209290e-7f
#endif

#ifndef M_PI
#define M_PI 3.1415926535897932384626433832795
#endif

#ifndef M_SQRT2
#define M_SQRT2 1.4142135623730950488016887
#endif

#ifndef M_2PI
#define M_2PI 6.283185307179586476925286766559005
#endif

#ifndef M_SQRT1_2
#define M_SQRT1_2 0.7071067811865475244008443621048490
#endif

#ifndef M_LOG_2PI
#define M_LOG_2PI 1.8378770664093454835606594728112
#endif

#ifndef M_LN2
#define M_LN2 0.693147180559945309417232121458
#endif

#ifndef M_LN10
#define M_LN10 2.302585092994045684017991454684
#endif


#define KALDI_ISNAN std::isnan
#define KALDI_ISINF std::isinf
#define KALDI_ISFINITE(x) std::isfinite(x)

#if !defined(KALDI_SQR)
# define KALDI_SQR(x) ((x) * (x))
#endif

namespace kaldi {

#if !defined(_MSC_VER) || (_MSC_VER >= 1900)
inline double Exp(double x) { return exp(x); }
#ifndef KALDI_NO_EXPF
inline float Exp(float x) { return expf(x); }
#else
inline float Exp(float x) { return exp(static_cast<double>(x)); }
#endif  // KALDI_NO_EXPF
#else
inline double Exp(double x) { return exp(x); }
#if !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
// Microsoft CL v18.0 buggy 64-bit implementation of
// expf() incorrectly returns -inf for exp(-inf).
inline float Exp(float x) { return exp(static_cast<double>(x)); }
#else
inline float Exp(float x) { return expf(x); }
#endif  // !defined(__INTEL_COMPILER) && _MSC_VER == 1800 && defined(_M_X64)
#endif  // !defined(_MSC_VER) || (_MSC_VER >= 1900)

inline double Log(double x) { return log(x); }
inline float Log(float x) { return logf(x); }

#if !defined(_MSC_VER) || (_MSC_VER >= 1700)
inline double Log1p(double x) {  return log1p(x); }
inline float Log1p(float x) {  return log1pf(x); }
#else
inline double Log1p(double x) {
  const double cutoff = 1.0e-08;
  if (x < cutoff)
    return x - 0.5 * x * x;
  else
    return Log(1.0 + x);
}

inline float Log1p(float x) {
  const float cutoff = 1.0e-07;
  if (x < cutoff)
    return x - 0.5 * x * x;
  else
    return Log(1.0 + x);
}
#endif

static const double kMinLogDiffDouble = Log(DBL_EPSILON);  // negative!
static const float kMinLogDiffFloat = Log(FLT_EPSILON);  // negative!

// -infinity
const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();

// Returns a random integer between 0 and RAND_MAX, inclusive
int Rand(struct RandomState* state = NULL);

// State for thread-safe random number generator
struct RandomState {
  RandomState();
  unsigned seed;
};

// Returns a random integer between first and last inclusive.
int32 RandInt(int32 first, int32 last, struct RandomState* state = NULL);

// Returns true with probability "prob",
bool WithProb(BaseFloat prob, struct RandomState* state = NULL);
// with 0 <= prob <= 1 [we check this].
// Internally calls Rand().  This function is carefully implemented so
// that it should work even if prob is very small.

/// Returns a random number strictly between 0 and 1.
inline float RandUniform(struct RandomState* state = NULL) {
  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
}

inline float RandGauss(struct RandomState* state = NULL) {
  return static_cast<float>(sqrtf (-2 * Log(RandUniform(state)))
                            * cosf(2*M_PI*RandUniform(state)));
}

// Returns poisson-distributed random number.  Uses Knuth's algorithm.
// Take care: this takes time proportional
// to lambda.  Faster algorithms exist but are more complex.
int32 RandPoisson(float lambda, struct RandomState* state = NULL);

// Returns a pair of gaussian random numbers. Uses Box-Muller transform
void RandGauss2(float *a, float *b, RandomState *state = NULL);
void RandGauss2(double *a, double *b, RandomState *state = NULL);

// Also see Vector<float,double>::RandCategorical().

// This is a randomized pruning mechanism that preserves expectations,
// that we typically use to prune posteriors.
template<class Float>
inline Float RandPrune(Float post, BaseFloat prune_thresh,
                       struct RandomState* state = NULL) {
  KALDI_ASSERT(prune_thresh >= 0.0);
  if (post == 0.0 || std::abs(post) >= prune_thresh)
    return post;
  return (post >= 0 ? 1.0 : -1.0) *
      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
}

// returns log(exp(x) + exp(y)).
inline double LogAdd(double x, double y) {
  double diff;

  if (x < y) {
    diff = x - y;
    x = y;
  } else {
    diff = y - x;
  }
  // diff is negative.  x is now the larger one.

  if (diff >= kMinLogDiffDouble) {
    double res;
    res = x + Log1p(Exp(diff));
    return res;
  } else {
    return x;  // return the larger one.
  }
}


// returns log(exp(x) + exp(y)).
inline float LogAdd(float x, float y) {
  float diff;

  if (x < y) {
    diff = x - y;
    x = y;
  } else {
    diff = y - x;
  }
  // diff is negative.  x is now the larger one.

  if (diff >= kMinLogDiffFloat) {
    float res;
    res = x + Log1p(Exp(diff));
    return res;
  } else {
    return x;  // return the larger one.
  }
}


// returns log(exp(x) - exp(y)).
inline double LogSub(double x, double y) {
  if (y >= x) {  // Throws exception if y>=x.
    if (y == x)
      return kLogZeroDouble;
    else
      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
  }

  double diff = y - x;  // Will be negative.
  double res = x + Log(1.0 - Exp(diff));

  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
  if (KALDI_ISNAN(res))
    return kLogZeroDouble;
  return res;
}


// returns log(exp(x) - exp(y)).
inline float LogSub(float x, float y) {
  if (y >= x) {  // Throws exception if y>=x.
    if (y == x)
      return kLogZeroDouble;
    else
      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
  }

  float diff = y - x;  // Will be negative.
  float res = x + Log(1.0f - Exp(diff));

  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
  if (KALDI_ISNAN(res))
    return kLogZeroFloat;
  return res;
}

/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
static inline bool ApproxEqual(float a, float b,
                               float relative_tolerance = 0.001) {
  // a==b handles infinities.
  if (a == b) return true;
  float diff = std::abs(a-b);
  if (diff == std::numeric_limits<float>::infinity()
      || diff != diff) return false;  // diff is +inf or nan.
  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b)));
}

/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
static inline void AssertEqual(float a, float b,
                               float relative_tolerance = 0.001) {
  // a==b handles infinities.
  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
}


// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
int32 RoundUpToNearestPowerOfTwo(int32 n);

/// Returns a / b, rounding towards negative infinity in all cases.
static inline int32 DivideRoundingDown(int32 a, int32 b) {
  KALDI_ASSERT(b != 0);
  if (a * b >= 0)
    return a / b;
  else if (a < 0)
    return (a - b + 1) / b;
  else
    return (a - b - 1) / b;
}

template<class I> I  Gcd(I m, I n) {
  if (m == 0 || n == 0) {
    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
    }
    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
    // return absolute value of whichever is nonzero
  }
  // could use compile-time assertion
  // but involves messing with complex template stuff.
  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
  while (1) {
    m %= n;
    if (m == 0) return (n > 0 ? n : -n);
    n %= m;
    if (n == 0) return (m > 0 ? m : -m);
  }
}

/// Returns the least common multiple of two integers.  Will
/// crash unless the inputs are positive.
template<class I> I  Lcm(I m, I n) {
  KALDI_ASSERT(m > 0 && n > 0);
  I gcd = Gcd(m, n);
  return gcd * (m/gcd) * (n/gcd);
}


template<class I> void Factorize(I m, std::vector<I> *factors) {
  // Splits a number into its prime factors, in sorted order from
  // least to greatest,  with duplication.  A very inefficient
  // algorithm, which is mainly intended for use in the
  // mixed-radix FFT computation (where we assume most factors
  // are small).
  KALDI_ASSERT(factors != NULL);
  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
  factors->clear();
  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };

  // First try small factors.
  for (I i = 0; i < 10; i++) {
    if (m == 1) return;  // We're done.
    while (m % small_factors[i] == 0) {
      m /= small_factors[i];
      factors->push_back(small_factors[i]);
    }
  }
  // Next try all odd numbers starting from 31.
  for (I j = 31;; j += 2) {
    if (m == 1) return;
    while (m % j == 0) {
      m /= j;
      factors->push_back(j);
    }
  }
}

inline double Hypot(double x, double y) {  return hypot(x, y); }
inline float Hypot(float x, float y) {  return hypotf(x, y); }


}  // namespace kaldi


#endif  // KALDI_BASE_KALDI_MATH_H_


================================================
FILE: runtime/engine/kaldi/base/kaldi-types.h
================================================
// base/kaldi-types.h

// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
//                      Jan Silovsky;  Yanmin Qian

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_BASE_KALDI_TYPES_H_
#define KALDI_BASE_KALDI_TYPES_H_ 1

namespace kaldi {
// TYPEDEFS ..................................................................
#if (KALDI_DOUBLEPRECISION != 0)
typedef double  BaseFloat;
#else
typedef float   BaseFloat;
#endif
}

#ifdef _MSC_VER
#include <basetsd.h>
#define ssize_t SSIZE_T
#endif

// we can do this a different way if some platform
// we find in the future lacks stdint.h
#include <stdint.h>

// for discussion on what to do if you need compile kaldi
// without OpenFST, see the bottom of this file

#ifndef COMPILE_WITHOUT_OPENFST

#ifdef WITH_ASR
#include <fst/types.h>
#else
using int8 = int8_t;
using int16 = int16_t;
using int32 = int32_t;
using int64 = int64_t;

using uint8 = uint8_t;
using uint16 = uint16_t;
using uint32 = uint32_t;
using uint64 = uint64_t;
#endif

namespace kaldi {
  using ::int16;
  using ::int32;
  using ::int64;
  using ::uint16;
  using ::uint32;
  using ::uint64;
  typedef float   float32;
  typedef double double64;
}  // end namespace kaldi

#else
// In a theoretical case you decide compile Kaldi without the OpenFST
// comment the previous namespace statement and uncomment the following

namespace kaldi {
  typedef int8_t   int8;
  typedef int16_t  int16;
  typedef int32_t  int32;
  typedef int64_t  int64;

  typedef uint8_t  uint8;
  typedef uint16_t uint16;
  typedef uint32_t uint32;
  typedef uint64_t uint64;
  typedef float    float32;
  typedef double   double64;
}  // end namespace kaldi
#endif

#endif  // KALDI_BASE_KALDI_TYPES_H_


================================================
FILE: runtime/engine/kaldi/base/kaldi-utils.cc
================================================
// base/kaldi-utils.cc
// Copyright 2009-2011   Karel Vesely;  Yanmin Qian;  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifdef _WIN32_WINNT_WIN8
#include <Synchapi.h>
#elif defined(_WIN32) || defined(_MSC_VER) || defined(MINGW)
#include <Windows.h>
#if defined(_MSC_VER) && _MSC_VER < 1900
#define snprintf _snprintf
#endif /* _MSC_VER < 1900 */
#else
#include <unistd.h>
#endif

#include <string>
#include "base/kaldi-common.h"


namespace kaldi {

std::string CharToString(const char &c) {
  char buf[20];
  if (std::isprint(c))
    snprintf(buf, sizeof(buf), "\'%c\'", c);
  else
    snprintf(buf, sizeof(buf), "[character %d]", static_cast<int>(c));
  return (std::string) buf;
}

void Sleep(float seconds) {
#if defined(_MSC_VER) || defined(MINGW)
  ::Sleep(static_cast<int>(seconds * 1000.0));
#elif defined(__CYGWIN__)
  sleep(static_cast<int>(seconds));
#else
  usleep(static_cast<int>(seconds * 1000000.0));
#endif
}

}  // end namespace kaldi


================================================
FILE: runtime/engine/kaldi/base/kaldi-utils.h
================================================
// base/kaldi-utils.h

// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
//                      Saarland University;  Karel Vesely;  Yanmin Qian

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_BASE_KALDI_UTILS_H_
#define KALDI_BASE_KALDI_UTILS_H_ 1

#if defined(_MSC_VER)
# define WIN32_LEAN_AND_MEAN
# define NOMINMAX
# include <windows.h>
#endif

#ifdef _MSC_VER
#include <stdio.h>
#define unlink _unlink
#else
#include <unistd.h>
#endif

#include <limits>
#include <string>

#if defined(_MSC_VER)
#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
#if _MSC_VER < 1400
#define __restrict__
#else
#define __restrict__ __restrict
#endif
#endif

#if defined(_MSC_VER)
#  define KALDI_MEMALIGN(align, size, pp_orig) \
  (*(pp_orig) = _aligned_malloc(size, align))
#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
#elif defined(__CYGWIN__)
#  define KALDI_MEMALIGN(align, size, pp_orig) \
  (*(pp_orig) = aligned_alloc(align, size))
#  define KALDI_MEMALIGN_FREE(x) free(x)
#else
#  define KALDI_MEMALIGN(align, size, pp_orig) \
     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
#  define KALDI_MEMALIGN_FREE(x) free(x)
#endif

#ifdef __ICC
#pragma warning(disable: 383)  // ICPC remark we don't want.
#pragma warning(disable: 810)  // ICPC remark we don't want.
#pragma warning(disable: 981)  // ICPC remark we don't want.
#pragma warning(disable: 1418)  // ICPC remark we don't want.
#pragma warning(disable: 444)  // ICPC remark we don't want.
#pragma warning(disable: 869)  // ICPC remark we don't want.
#pragma warning(disable: 1287)  // ICPC remark we don't want.
#pragma warning(disable: 279)  // ICPC remark we don't want.
#pragma warning(disable: 981)  // ICPC remark we don't want.
#endif


namespace kaldi {


// CharToString prints the character in a human-readable form, for debugging.
std::string CharToString(const char &c);


inline int MachineIsLittleEndian() {
  int check = 1;
  return (*reinterpret_cast<char*>(&check) != 0);
}

// This function kaldi::Sleep() provides a portable way
// to sleep for a possibly fractional
// number of seconds.  On Windows it's only accurate to microseconds.
void Sleep(float seconds);
}

#define KALDI_SWAP8(a) { \
  int t = (reinterpret_cast<char*>(&a))[0];\
          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[7];\
          (reinterpret_cast<char*>(&a))[7]=t;\
      t = (reinterpret_cast<char*>(&a))[1];\
          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[6];\
          (reinterpret_cast<char*>(&a))[6]=t;\
      t = (reinterpret_cast<char*>(&a))[2];\
          (reinterpret_cast<char*>(&a))[2]=(reinterpret_cast<char*>(&a))[5];\
          (reinterpret_cast<char*>(&a))[5]=t;\
      t = (reinterpret_cast<char*>(&a))[3];\
          (reinterpret_cast<char*>(&a))[3]=(reinterpret_cast<char*>(&a))[4];\
          (reinterpret_cast<char*>(&a))[4]=t;}
#define KALDI_SWAP4(a) { \
  int t = (reinterpret_cast<char*>(&a))[0];\
          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[3];\
          (reinterpret_cast<char*>(&a))[3]=t;\
      t = (reinterpret_cast<char*>(&a))[1];\
          (reinterpret_cast<char*>(&a))[1]=(reinterpret_cast<char*>(&a))[2];\
          (reinterpret_cast<char*>(&a))[2]=t;}
#define KALDI_SWAP2(a) { \
  int t = (reinterpret_cast<char*>(&a))[0];\
          (reinterpret_cast<char*>(&a))[0]=(reinterpret_cast<char*>(&a))[1];\
          (reinterpret_cast<char*>(&a))[1]=t;}


// Makes copy constructor and operator= private.
#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
  type(const type&);                  \
  void operator = (const type&)

template<bool B> class KaldiCompileTimeAssert { };
template<> class KaldiCompileTimeAssert<true> {
 public:
  static inline void Check() { }
};

#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()

#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
                 && std::numeric_limits<I>::is_integer>::Check()

#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
                && !std::numeric_limits<F>::is_integer>::Check()

#if defined(_MSC_VER)
#define KALDI_STRCASECMP _stricmp
#elif defined(__CYGWIN__)
#include <strings.h>
#define KALDI_STRCASECMP strcasecmp
#else
#define KALDI_STRCASECMP strcasecmp
#endif
#ifdef _MSC_VER
#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
#else
#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
#endif

#endif  // KALDI_BASE_KALDI_UTILS_H_


================================================
FILE: runtime/engine/kaldi/base/timer.cc
================================================
// base/timer.cc

// Copyright 2018  Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "base/timer.h"
#include "base/kaldi-error.h"
#include <algorithm>
#include <iomanip>
#include <map>
#include <unordered_map>

namespace kaldi {

class ProfileStats {
 public:
  void AccStats(const char *function_name, double elapsed) {
    std::unordered_map<const char*, ProfileStatsEntry>::iterator
        iter = map_.find(function_name);
    if (iter == map_.end()) {
      map_[function_name] = ProfileStatsEntry(function_name);
      map_[function_name].total_time = elapsed;
    } else {
      iter->second.total_time += elapsed;
    }
  }
  ~ProfileStats() {
    // This map makes sure we agglomerate the time if there were any duplicate
    // addresses of strings.
    std::unordered_map<std::string, double> total_time;
    for (auto iter = map_.begin(); iter != map_.end(); iter++)
      total_time[iter->second.name] += iter->second.total_time;

    ReverseSecondComparator comp;
    std::vector<std::pair<std::string, double> > pairs(total_time.begin(),
                                                       total_time.end());
    std::sort(pairs.begin(), pairs.end(), comp);
    for (size_t i = 0; i < pairs.size(); i++) {
      KALDI_LOG << "Time taken in " << pairs[i].first << " is "
                << std::fixed << std::setprecision(2) << pairs[i].second << "s.";
    }
  }
 private:

  struct ProfileStatsEntry {
    std::string name;
    double total_time;
    ProfileStatsEntry() { }
    ProfileStatsEntry(const char *name): name(name) { }
  };

  struct ReverseSecondComparator {
    bool operator () (const std::pair<std::string, double> &a,
                      const std::pair<std::string, double> &b) {
      return a.second > b.second;
    }
  };

  // Note: this map is keyed on the address of the string, there is no proper
  // hash function.  The assumption is that the strings are compile-time
  // constants.
  std::unordered_map<const char*, ProfileStatsEntry> map_;
};

ProfileStats g_profile_stats;

Profiler::~Profiler() {
  g_profile_stats.AccStats(name_, tim_.Elapsed());
}

}  // namespace kaldi


================================================
FILE: runtime/engine/kaldi/base/timer.h
================================================
// base/timer.h

// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_BASE_TIMER_H_
#define KALDI_BASE_TIMER_H_

#include "base/kaldi-utils.h"
#include "base/kaldi-error.h"


#if defined(_MSC_VER) || defined(MINGW)

namespace kaldi {
class Timer {
 public:
  Timer() { Reset(); }

  // You can initialize with bool to control whether or not you want the time to
  // be set when the object is created.
  explicit Timer(bool set_timer) { if (set_timer) Reset(); }

  void Reset() {
    QueryPerformanceCounter(&time_start_);
  }
  double Elapsed() const {
    LARGE_INTEGER time_end;
    LARGE_INTEGER freq;
    QueryPerformanceCounter(&time_end);

    if (QueryPerformanceFrequency(&freq) == 0) {
      //  Hardware does not support this.
      return 0.0;
    }
    return (static_cast<double>(time_end.QuadPart) -
            static_cast<double>(time_start_.QuadPart)) /
           (static_cast<double>(freq.QuadPart));
  }
 private:
  LARGE_INTEGER time_start_;
};


#else
#include <sys/time.h>
#include <unistd.h>

namespace kaldi {
class Timer {
 public:
  Timer() { Reset(); }

  // You can initialize with bool to control whether or not you want the time to
  // be set when the object is created.
  explicit Timer(bool set_timer) { if (set_timer) Reset(); }

  void Reset() { gettimeofday(&this->time_start_, &time_zone_); }

  /// Returns time in seconds.
  double Elapsed() const {
    struct timeval time_end;
    struct timezone time_zone;
    gettimeofday(&time_end, &time_zone);
    double t1, t2;
    t1 =  static_cast<double>(time_start_.tv_sec) +
          static_cast<double>(time_start_.tv_usec)/(1000*1000);
    t2 =  static_cast<double>(time_end.tv_sec) +
          static_cast<double>(time_end.tv_usec)/(1000*1000);
    return t2-t1;
  }

 private:
  struct timeval time_start_;
  struct timezone time_zone_;
};

#endif

class Profiler {
 public:
  // Caution: the 'const char' should always be a string constant; for speed,
  // internally the profiling code uses the address of it as a lookup key.
  Profiler(const char *function_name): name_(function_name) { }
  ~Profiler();
 private:
  Timer tim_;
  const char *name_;
};

//  To add timing info for a function, you just put
//  KALDI_PROFILE;
//  at the beginning of the function.  Caution: this doesn't
//  include the class name.
#define KALDI_PROFILE Profiler _profiler(__func__)


}  // namespace kaldi


#endif  // KALDI_BASE_TIMER_H_


================================================
FILE: runtime/engine/kaldi/base/version.h
================================================
// This file was automatically created by ./get_version.sh.
// It is only included by ./kaldi-error.cc.
#define KALDI_VERSION "5.5.544~2-f21d7"
#define KALDI_GIT_HEAD "f21d7e768635ca98aeeb43f30e2c6a9f14ab8f0f"


================================================
FILE: runtime/engine/kaldi/decoder/CMakeLists.txt
================================================

add_library(kaldi-decoder
lattice-faster-decoder.cc
lattice-faster-online-decoder.cc
)
target_link_libraries(kaldi-decoder PUBLIC kaldi-lat)


================================================
FILE: runtime/engine/kaldi/decoder/decodable-itf.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// itf/decodable-itf.h

// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
//                      Mirko Hannemann;  Go Vivace Inc.;
//                2013  Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_ITF_DECODABLE_ITF_H_
#define KALDI_ITF_DECODABLE_ITF_H_ 1
#include "base/kaldi-common.h"

namespace kaldi {
/// @ingroup Interfaces
/// @{


/**
    DecodableInterface provides a link between the (acoustic-modeling and
    feature-processing) code and the decoder.  The idea is to make this
    interface as small as possible, and to make it as agnostic as possible about
    the form of the acoustic model (e.g. don't assume the probabilities are a
    function of just a vector of floats), and about the decoder (e.g. don't
    assume it accesses frames in strict left-to-right order).  For normal
    models, without on-line operation, the "decodable" sub-class will just be a
    wrapper around a matrix of features and an acoustic model, and it will
    answer the question 'what is the acoustic likelihood for this index and this
    frame?'.

    For online decoding, where the features are coming in in real time, it is
    important to understand the IsLastFrame() and NumFramesReady() functions.
    There are two ways these are used: the old online-decoding code, in
   ../online/,
    and the new online-decoding code, in ../online2/.  In the old
   online-decoding
    code, the decoder would do:
    \code{.cc}
    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
      // Process this frame
    }
    \endcode
   and the call to IsLastFrame would block if the features had not arrived yet.
   The decodable object would have to know when to terminate the decoding.  This
   online-decoding mode is still supported, it is what happens when you call,
   for
   example, LatticeFasterDecoder::Decode().

   We realized that this "blocking" mode of decoding is not very convenient
   because it forces the program to be multi-threaded and makes it complex to
   control endpointing.  In the "new" decoding code, you don't call (for
   example)
   LatticeFasterDecoder::Decode(), you call
   LatticeFasterDecoder::InitDecoding(),
   and then each time you get more features, you provide them to the decodable
   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
   something like this:
   \code{.cc}
   while (num_frames_decoded_ < decodable.NumFramesReady()) {
     // Decode one more frame [increments num_frames_decoded_]
   }
   \endcode
   So the decodable object never has IsLastFrame() called.  For decoding where
   you are starting with a matrix of features, the NumFramesReady() function
   will
   always just return the number of frames in the file, and IsLastFrame() will
   return true for the last frame.

   For truly online decoding, the "old" online decodable objects in ../online/
   have a "blocking" IsLastFrame() and will crash if you call NumFramesReady().
   The "new" online decodable objects in ../online2/ return the number of frames
   currently accessible if you call NumFramesReady().  You will likely not need
   to call IsLastFrame(), but we implement it to only return true for the last
   frame of the file once we've decided to terminate decoding.
*/
class DecodableInterface {
  public:
    virtual ~DecodableInterface() {}

    /// Returns the log likelihood(logprob), which will be negated in the decoder.
    /// The "frame" starts from zero.  You should verify that NumFramesReady() >
    /// frame
    /// before calling this.
    virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;

    /// Returns true if this is the last frame.  Frames are zero-based, so the
    /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
    /// is empty (which is a case that I'm not sure all the code will handle, so
    /// be careful).  Caution: the behavior of this function in an online
    /// setting
    /// is being changed somewhat.  In future it may return false in cases where
    /// we haven't yet decided to terminate decoding, but later true if we
    /// decide
    /// to terminate decoding.  The plan in future is to rely more on
    /// NumFramesReady(), and in future, IsLastFrame() would always return false
    /// in an online-decoding setting, and would only return true in a
    /// decoding-from-matrix setting where we want to allow the last delta or
    /// LDA
    /// features to be flushed out for compatibility with the baseline setup.
    virtual bool IsLastFrame(int32 frame) = 0;

    /// The call NumFramesReady() will return the number of frames currently
    /// available
    /// for this decodable object.  This is for use in setups where you don't
    /// want the
    /// decoder to block while waiting for input.  This is newly added as of Jan
    /// 2014,
    /// and I hope, going forward, to rely on this mechanism more than
    /// IsLastFrame to
    /// know when to stop decoding.
    virtual int32 NumFramesReady() const {
        KALDI_ERR
            << "NumFramesReady() not implemented for this decodable type.";
        return -1;
    }

    /// Returns the number of states in the acoustic model
    /// (they will be indexed one-based, i.e. from 1 to NumIndices();
    /// this is for compatibility with OpenFst).
    virtual int32 NumIndices() const = 0;

    /// Returns the likelihood(prob), which will be postive in the decoder.
    /// The "frame" starts from zero.  You should verify that NumFramesReady() >
    /// frame
    /// before calling this.
    virtual bool FrameLikelihood(
        int32 frame, std::vector<kaldi::BaseFloat>* likelihood) = 0;
};
/// @}
}  // namespace Kaldi

#endif  // KALDI_ITF_DECODABLE_ITF_H_


================================================
FILE: runtime/engine/kaldi/decoder/lattice-faster-decoder.cc
================================================
// decoder/lattice-faster-decoder.cc

// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
//           2013-2018  Johns Hopkins University (Author: Daniel Povey)
//                2014  Guoguo Chen
//                2018  Zhehuai Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "decoder/lattice-faster-decoder.h"
#include "lat/lattice-functions.h"

namespace kaldi {

// instantiate this class once for each thing you have to decode.
template <typename FST, typename Token>
LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
    const FST &fst, const LatticeFasterDecoderConfig &config)
    : fst_(&fst),
      delete_fst_(false),
      config_(config),
      num_toks_(0),
      token_pool_(config.memory_pool_tokens_block_size),
      forward_link_pool_(config.memory_pool_links_block_size) {
  config.Check();
  toks_.SetSize(1000);  // just so on the first frame we do something reasonable.
}

template <typename FST, typename Token>
LatticeFasterDecoderTpl<FST, Token>::LatticeFasterDecoderTpl(
    const LatticeFasterDecoderConfig &config, FST *fst)
    : fst_(fst),
      delete_fst_(true),
      config_(config),
      num_toks_(0),
      token_pool_(config.memory_pool_tokens_block_size),
      forward_link_pool_(config.memory_pool_links_block_size) {
  config.Check();
  toks_.SetSize(1000);  // just so on the first frame we do something reasonable.
}

template <typename FST, typename Token>
LatticeFasterDecoderTpl<FST, Token>::~LatticeFasterDecoderTpl() {
  DeleteElems(toks_.Clear());
  ClearActiveTokens();
  if (delete_fst_) delete fst_;
}

template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::InitDecoding() {
  // clean up from last time:
  DeleteElems(toks_.Clear());
  cost_offsets_.clear();
  ClearActiveTokens();
  warned_ = false;
  num_toks_ = 0;
  decoding_finalized_ = false;
  final_costs_.clear();
  StateId start_state = fst_->Start();
  KALDI_ASSERT(start_state != fst::kNoStateId);
  active_toks_.resize(1);
  Token *start_tok =
      new (token_pool_.Allocate()) Token(0.0, 0.0, NULL, NULL, NULL);
  active_toks_[0].toks = start_tok;
  toks_.Insert(start_state, start_tok);
  num_toks_++;
  ProcessNonemitting(config_.beam);
}

// Returns true if any kind of traceback is available (not necessarily from
// a final state).  It should only very rarely return false; this indicates
// an unusual search error.
template <typename FST, typename Token>
bool LatticeFasterDecoderTpl<FST, Token>::Decode(DecodableInterface *decodable) {
  InitDecoding();
  // We use 1-based indexing for frames in this decoder (if you view it in
  // terms of features), but note that the decodable object uses zero-based
  // numbering, which we have to correct for when we call it.
  AdvanceDecoding(decodable);
  FinalizeDecoding();

  // Returns true if we have any kind of traceback available (not necessarily
  // to the end state; query ReachedFinal() for that).
  return !active_toks_.empty() && active_toks_.back().toks != NULL;
}


// Outputs an FST corresponding to the single best path through the lattice.
template <typename FST, typename Token>
bool LatticeFasterDecoderTpl<FST, Token>::GetBestPath(Lattice *olat,
                                       bool use_final_probs) const {
  Lattice raw_lat;
  GetRawLattice(&raw_lat, use_final_probs);
  ShortestPath(raw_lat, olat);
  return (olat->NumStates() != 0);
}


// Outputs an FST corresponding to the raw, state-level lattice
template <typename FST, typename Token>
bool LatticeFasterDecoderTpl<FST, Token>::GetRawLattice(
    Lattice *ofst,
    bool use_final_probs) const {
  typedef LatticeArc Arc;
  typedef Arc::StateId StateId;
  typedef Arc::Weight Weight;
  typedef Arc::Label Label;

  // Note: you can't use the old interface (Decode()) if you want to
  // get the lattice with use_final_probs = false.  You'd have to do
  // InitDecoding() and then AdvanceDecoding().
  if (decoding_finalized_ && !use_final_probs)
    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
              << "GetRawLattice() with use_final_probs == false";

  unordered_map<Token*, BaseFloat> final_costs_local;

  const unordered_map<Token*, BaseFloat> &final_costs =
      (decoding_finalized_ ? final_costs_ : final_costs_local);
  if (!decoding_finalized_ && use_final_probs)
    ComputeFinalCosts(&final_costs_local, NULL, NULL);

  ofst->DeleteStates();
  // num-frames plus one (since frames are one-based, and we have
  // an extra frame for the start-state).
  int32 num_frames = active_toks_.size() - 1;
  KALDI_ASSERT(num_frames > 0);
  const int32 bucket_count = num_toks_/2 + 3;
  unordered_map<Token*, StateId> tok_map(bucket_count);
  // First create all states.
  std::vector<Token*> token_list;
  for (int32 f = 0; f <= num_frames; f++) {
    if (active_toks_[f].toks == NULL) {
      KALDI_WARN << "GetRawLattice: no tokens active on frame " << f
                 << ": not producing lattice.\n";
      return false;
    }
    TopSortTokens(active_toks_[f].toks, &token_list);
    for (size_t i = 0; i < token_list.size(); i++)
      if (token_list[i] != NULL)
        tok_map[token_list[i]] = ofst->AddState();
  }
  // The next statement sets the start state of the output FST.  Because we
  // topologically sorted the tokens, state zero must be the start-state.
  ofst->SetStart(0);

  KALDI_VLOG(4) << "init:" << num_toks_/2 + 3 << " buckets:"
                << tok_map.bucket_count() << " load:" << tok_map.load_factor()
                << " max:" << tok_map.max_load_factor();
  // Now create all arcs.
  for (int32 f = 0; f <= num_frames; f++) {
    for (Token *tok = active_toks_[f].toks; tok != NULL; tok = tok->next) {
      StateId cur_state = tok_map[tok];
      for (ForwardLinkT *l = tok->links;
           l != NULL;
           l = l->next) {
        typename unordered_map<Token*, StateId>::const_iterator
            iter = tok_map.find(l->next_tok);
        StateId nextstate = iter->second;
        KALDI_ASSERT(iter != tok_map.end());
        BaseFloat cost_offset = 0.0;
        if (l->ilabel != 0) {  // emitting..
          KALDI_ASSERT(f >= 0 && f < cost_offsets_.size());
          cost_offset = cost_offsets_[f];
        }
        Arc arc(l->ilabel, l->olabel,
                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
                nextstate);
        ofst->AddArc(cur_state, arc);
      }
      if (f == num_frames) {
        if (use_final_probs && !final_costs.empty()) {
          typename unordered_map<Token*, BaseFloat>::const_iterator
              iter = final_costs.find(tok);
          if (iter != final_costs.end())
            ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
        } else {
          ofst->SetFinal(cur_state, LatticeWeight::One());
        }
      }
    }
  }
  return (ofst->NumStates() > 0);
}


// This function is now deprecated, since now we do determinization from outside
// the LatticeFasterDecoder class.  Outputs an FST corresponding to the
// lattice-determinized lattice (one path per word sequence).
template <typename FST, typename Token>
bool LatticeFasterDecoderTpl<FST, Token>::GetLattice(CompactLattice *ofst,
                                           bool use_final_probs) const {
  Lattice raw_fst;
  GetRawLattice(&raw_fst, use_final_probs);
  Invert(&raw_fst);  // make it so word labels are on the input.
  // (in phase where we get backward-costs).
  fst::ILabelCompare<LatticeArc> ilabel_comp;
  ArcSort(&raw_fst, ilabel_comp);  // sort on ilabel; makes
  // lattice-determinization more efficient.

  fst::DeterminizeLatticePrunedOptions lat_opts;
  lat_opts.max_mem = config_.det_opts.max_mem;

  DeterminizeLatticePruned(raw_fst, config_.lattice_beam, ofst, lat_opts);
  raw_fst.DeleteStates();  // Free memory-- raw_fst no longer needed.
  Connect(ofst);  // Remove unreachable states... there might be
  // a small number of these, in some cases.
  // Note: if something went wrong and the raw lattice was empty,
  // we should still get to this point in the code without warnings or failures.
  return (ofst->NumStates() != 0);
}

template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::PossiblyResizeHash(size_t num_toks) {
  size_t new_sz = static_cast<size_t>(static_cast<BaseFloat>(num_toks)
                                      * config_.hash_ratio);
  if (new_sz > toks_.Size()) {
    toks_.SetSize(new_sz);
  }
}

/*
  A note on the definition of extra_cost.

  extra_cost is used in pruning tokens, to save memory.

  extra_cost can be thought of as a beta (backward) cost assuming
  we had set the betas on currently-active tokens to all be the negative
  of the alphas for those tokens.  (So all currently active tokens would
  be on (tied) best paths).

  We can use the extra_cost to accurately prune away tokens that we know will
  never appear in the lattice.  If the extra_cost is greater than the desired
  lattice beam, the token would provably never appear in the lattice, so we can
  prune away the token.

  (Note: we don't update all the extra_costs every time we update a frame; we
  only do it every 'config_.prune_interval' frames).
 */

// FindOrAddToken either locates a token in hash of toks_,
// or if necessary inserts a new, empty token (i.e. with no forward links)
// for the current frame.  [note: it's inserted if necessary into hash toks_
// and also into the singly linked list of tokens active on this frame
// (whose head is at active_toks_[frame]).
template <typename FST, typename Token>
inline typename LatticeFasterDecoderTpl<FST, Token>::Elem*
LatticeFasterDecoderTpl<FST, Token>::FindOrAddToken(
      StateId state, int32 frame_plus_one, BaseFloat tot_cost,
      Token *backpointer, bool *changed) {
  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true
  // if the token was newly created or the cost changed.
  KALDI_ASSERT(frame_plus_one < active_toks_.size());
  Token *&toks = active_toks_[frame_plus_one].toks;
  Elem *e_found = toks_.Insert(state, NULL);
  if (e_found->val == NULL) {  // no such token presently.
    const BaseFloat extra_cost = 0.0;
    // tokens on the currently final frame have zero extra_cost
    // as any of them could end up
    // on the winning path.
    Token *new_tok = new (token_pool_.Allocate())
        Token(tot_cost, extra_cost, NULL, toks, backpointer);
    // NULL: no forward links yet
    toks = new_tok;
    num_toks_++;
    e_found->val = new_tok;
    if (changed) *changed = true;
    return e_found;
  } else {
    Token *tok = e_found->val;  // There is an existing Token for this state.
    if (tok->tot_cost > tot_cost) {  // replace old token
      tok->tot_cost = tot_cost;
      // SetBackpointer() just does tok->backpointer = backpointer in
      // the case where Token == BackpointerToken, else nothing.
      tok->SetBackpointer(backpointer);
      // we don't allocate a new token, the old stays linked in active_toks_
      // we only replace the tot_cost
      // in the current frame, there are no forward links (and no extra_cost)
      // only in ProcessNonemitting we have to delete forward links
      // in case we visit a state for the second time
      // those forward links, that lead to this replaced token before:
      // they remain and will hopefully be pruned later (PruneForwardLinks...)
      if (changed) *changed = true;
    } else {
      if (changed) *changed = false;
    }
    return e_found;
  }
}

// prunes outgoing links for all tokens in active_toks_[frame]
// it's called by PruneActiveTokens
// all links, that have link_extra_cost > lattice_beam are pruned
template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinks(
    int32 frame_plus_one, bool *extra_costs_changed,
    bool *links_pruned, BaseFloat delta) {
  // delta is the amount by which the extra_costs must change
  // If delta is larger,  we'll tend to go back less far
  //    toward the beginning of the file.
  // extra_costs_changed is set to true if extra_cost was changed for any token
  // links_pruned is set to true if any link in any token was pruned

  *extra_costs_changed = false;
  *links_pruned = false;
  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
  if (active_toks_[frame_plus_one].toks == NULL) {  // empty list; should not happen.
    if (!warned_) {
      KALDI_WARN << "No tokens alive [doing pruning].. warning first "
          "time only for each utterance\n";
      warned_ = true;
    }
  }

  // We have to iterate until there is no more change, because the links
  // are not guaranteed to be in topological order.
  bool changed = true;  // difference new minus old extra cost >= delta ?
  while (changed) {
    changed = false;
    for (Token *tok = active_toks_[frame_plus_one].toks;
         tok != NULL; tok = tok->next) {
      ForwardLinkT *link, *prev_link = NULL;
      // will recompute tok_extra_cost for tok.
      BaseFloat tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
      // tok_extra_cost is the best (min) of link_extra_cost of outgoing links
      for (link = tok->links; link != NULL; ) {
        // See if we need to excise this link...
        Token *next_tok = link->next_tok;
        BaseFloat link_extra_cost = next_tok->extra_cost +
            ((tok->tot_cost + link->acoustic_cost + link->graph_cost)
             - next_tok->tot_cost);  // difference in brackets is >= 0
        // link_exta_cost is the difference in score between the best paths
        // through link source state and through link destination state
        KALDI_ASSERT(link_extra_cost == link_extra_cost);  // check for NaN
        if (link_extra_cost > config_.lattice_beam) {  // excise link
          ForwardLinkT *next_link = link->next;
          if (prev_link != NULL) prev_link->next = next_link;
          else tok->links = next_link;
          forward_link_pool_.Free(link);
          link = next_link;  // advance link but leave prev_link the same.
          *links_pruned = true;
        } else {   // keep the link and update the tok_extra_cost if needed.
          if (link_extra_cost < 0.0) {  // this is just a precaution.
            if (link_extra_cost < -0.01)
              KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
            link_extra_cost = 0.0;
          }
          if (link_extra_cost < tok_extra_cost)
            tok_extra_cost = link_extra_cost;
          prev_link = link;  // move to next link
          link = link->next;
        }
      }  // for all outgoing links
      if (fabs(tok_extra_cost - tok->extra_cost) > delta)
        changed = true;   // difference new minus old is bigger than delta
      tok->extra_cost = tok_extra_cost;
      // will be +infinity or <= lattice_beam_.
      // infinity indicates, that no forward link survived pruning
    }  // for all Token on active_toks_[frame]
    if (changed) *extra_costs_changed = true;

    // Note: it's theoretically possible that aggressive compiler
    // optimizations could cause an infinite loop here for small delta and
    // high-dynamic-range scores.
  } // while changed
}

// PruneForwardLinksFinal is a version of PruneForwardLinks that we call
// on the final frame.  If there are final tokens active, it uses
// the final-probs for pruning, otherwise it treats all tokens as final.
template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::PruneForwardLinksFinal() {
  KALDI_ASSERT(!active_toks_.empty());
  int32 frame_plus_one = active_toks_.size() - 1;

  if (active_toks_[frame_plus_one].toks == NULL)  // empty list; should not happen.
    KALDI_WARN << "No tokens alive at end of file";

  typedef typename unordered_map<Token*, BaseFloat>::const_iterator IterType;
  ComputeFinalCosts(&final_costs_, &final_relative_cost_, &final_best_cost_);
  decoding_finalized_ = true;
  // We call DeleteElems() as a nicety, not because it's really necessary;
  // otherwise there would be a time, after calling PruneTokensForFrame() on the
  // final frame, when toks_.GetList() or toks_.Clear() would contain pointers
  // to nonexistent tokens.
  DeleteElems(toks_.Clear());

  // Now go through tokens on this frame, pruning forward links...  may have to
  // iterate a few times until there is no more change, because the list is not
  // in topological order.  This is a modified version of the code in
  // PruneForwardLinks, but here we also take account of the final-probs.
  bool changed = true;
  BaseFloat delta = 1.0e-05;
  while (changed) {
    changed = false;
    for (Token *tok = active_toks_[frame_plus_one].toks;
         tok != NULL; tok = tok->next) {
      ForwardLinkT *link, *prev_link = NULL;
      // will recompute tok_extra_cost.  It has a term in it that corresponds
      // to the "final-prob", so instead of initializing tok_extra_cost to infinity
      // below we set it to the difference between the (score+final_prob) of this token,
      // and the best such (score+final_prob).
      BaseFloat final_cost;
      if (final_costs_.empty()) {
        final_cost = 0.0;
      } else {
        IterType iter = final_costs_.find(tok);
        if (iter != final_costs_.end())
          final_cost = iter->second;
        else
          final_cost = std::numeric_limits<BaseFloat>::infinity();
      }
      BaseFloat tok_extra_cost = tok->tot_cost + final_cost - final_best_cost_;
      // tok_extra_cost will be a "min" over either directly being final, or
      // being indirectly final through other links, and the loop below may
      // decrease its value:
      for (link = tok->links; link != NULL; ) {
        // See if we need to excise this link...
        Token *next_tok = link->next_tok;
        BaseFloat link_extra_cost = next_tok->extra_cost +
            ((tok->tot_cost + link->acoustic_cost + link->graph_cost)
             - next_tok->tot_cost);
        if (link_extra_cost > config_.lattice_beam) {  // excise link
          ForwardLinkT *next_link = link->next;
          if (prev_link != NULL) prev_link->next = next_link;
          else tok->links = next_link;
          forward_link_pool_.Free(link);
          link = next_link; // advance link but leave prev_link the same.
        } else { // keep the link and update the tok_extra_cost if needed.
          if (link_extra_cost < 0.0) { // this is just a precaution.
            if (link_extra_cost < -0.01)
              KALDI_WARN << "Negative extra_cost: " << link_extra_cost;
            link_extra_cost = 0.0;
          }
          if (link_extra_cost < tok_extra_cost)
            tok_extra_cost = link_extra_cost;
          prev_link = link;
          link = link->next;
        }
      }
      // prune away tokens worse than lattice_beam above best path.  This step
      // was not necessary in the non-final case because then, this case
      // showed up as having no forward links.  Here, the tok_extra_cost has
      // an extra component relating to the final-prob.
      if (tok_extra_cost > config_.lattice_beam)
        tok_extra_cost = std::numeric_limits<BaseFloat>::infinity();
      // to be pruned in PruneTokensForFrame

      if (!ApproxEqual(tok->extra_cost, tok_extra_cost, delta))
        changed = true;
      tok->extra_cost = tok_extra_cost; // will be +infinity or <= lattice_beam_.
    }
  } // while changed
}

template <typename FST, typename Token>
BaseFloat LatticeFasterDecoderTpl<FST, Token>::FinalRelativeCost() const {
  if (!decoding_finalized_) {
    BaseFloat relative_cost;
    ComputeFinalCosts(NULL, &relative_cost, NULL);
    return relative_cost;
  } else {
    // we're not allowed to call that function if FinalizeDecoding() has
    // been called; return a cached value.
    return final_relative_cost_;
  }
}


// Prune away any tokens on this frame that have no forward links.
// [we don't do this in PruneForwardLinks because it would give us
// a problem with dangling pointers].
// It's called by PruneActiveTokens if any forward links have been pruned
template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::PruneTokensForFrame(int32 frame_plus_one) {
  KALDI_ASSERT(frame_plus_one >= 0 && frame_plus_one < active_toks_.size());
  Token *&toks = active_toks_[frame_plus_one].toks;
  if (toks == NULL)
    KALDI_WARN << "No tokens alive [doing pruning]";
  Token *tok, *next_tok, *prev_tok = NULL;
  for (tok = toks; tok != NULL; tok = next_tok) {
    next_tok = tok->next;
    if (tok->extra_cost == std::numeric_limits<BaseFloat>::infinity()) {
      // token is unreachable from end of graph; (no forward links survived)
      // excise tok from list and delete tok.
      if (prev_tok != NULL) prev_tok->next = tok->next;
      else toks = tok->next;
      token_pool_.Free(tok);
      num_toks_--;
    } else {  // fetch next Token
      prev_tok = tok;
    }
  }
}

// Go backwards through still-alive tokens, pruning them, starting not from
// the current frame (where we want to keep all tokens) but from the frame before
// that.  We go backwards through the frames and stop when we reach a point
// where the delta-costs are not changing (and the delta controls when we consider
// a cost to have "not changed").
template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::PruneActiveTokens(BaseFloat delta) {
  int32 cur_frame_plus_one = NumFramesDecoded();
  int32 num_toks_begin = num_toks_;
  // The index "f" below represents a "frame plus one", i.e. you'd have to subtract
  // one to get the corresponding index for the decodable object.
  for (int32 f = cur_frame_plus_one - 1; f >= 0; f--) {
    // Reason why we need to prune forward links in this situation:
    // (1) we have never pruned them (new TokenList)
    // (2) we have not yet pruned the forward links to the next f,
    // after any of those tokens have changed their extra_cost.
    if (active_toks_[f].must_prune_forward_links) {
      bool extra_costs_changed = false, links_pruned = false;
      PruneForwardLinks(f, &extra_costs_changed, &links_pruned, delta);
      if (extra_costs_changed && f > 0) // any token has changed extra_cost
        active_toks_[f-1].must_prune_forward_links = true;
      if (links_pruned) // any link was pruned
        active_toks_[f].must_prune_tokens = true;
      active_toks_[f].must_prune_forward_links = false; // job done
    }
    if (f+1 < cur_frame_plus_one &&      // except for last f (no forward links)
        active_toks_[f+1].must_prune_tokens) {
      PruneTokensForFrame(f+1);
      active_toks_[f+1].must_prune_tokens = false;
    }
  }
  KALDI_VLOG(4) << "PruneActiveTokens: pruned tokens from " << num_toks_begin
                << " to " << num_toks_;
}

template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::ComputeFinalCosts(
    unordered_map<Token*, BaseFloat> *final_costs,
    BaseFloat *final_relative_cost,
    BaseFloat *final_best_cost) const {
  KALDI_ASSERT(!decoding_finalized_);
  if (final_costs != NULL)
    final_costs->clear();
  const Elem *final_toks = toks_.GetList();
  BaseFloat infinity = std::numeric_limits<BaseFloat>::infinity();
  BaseFloat best_cost = infinity,
      best_cost_with_final = infinity;

  while (final_toks != NULL) {
    StateId state = final_toks->key;
    Token *tok = final_toks->val;
    const Elem *next = final_toks->tail;
    BaseFloat final_cost = fst_->Final(state).Value();
    BaseFloat cost = tok->tot_cost,
        cost_with_final = cost + final_cost;
    best_cost = std::min(cost, best_cost);
    best_cost_with_final = std::min(cost_with_final, best_cost_with_final);
    if (final_costs != NULL && final_cost != infinity)
      (*final_costs)[tok] = final_cost;
    final_toks = next;
  }
  if (final_relative_cost != NULL) {
    if (best_cost == infinity && best_cost_with_final == infinity) {
      // Likely this will only happen if there are no tokens surviving.
      // This seems the least bad way to handle it.
      *final_relative_cost = infinity;
    } else {
      *final_relative_cost = best_cost_with_final - best_cost;
    }
  }
  if (final_best_cost != NULL) {
    if (best_cost_with_final != infinity) { // final-state exists.
      *final_best_cost = best_cost_with_final;
    } else { // no final-state exists.
      *final_best_cost = best_cost;
    }
  }
}

template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::AdvanceDecoding(DecodableInterface *decodable,
                                                int32 max_num_frames) {
  if (std::is_same<FST, fst::Fst<fst::StdArc> >::value) {
    // if the type 'FST' is the FST base-class, then see if the FST type of fst_
    // is actually VectorFst or ConstFst.  If so, call the AdvanceDecoding()
    // function after casting *this to the more specific type.
    if (fst_->Type() == "const") {
      LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token> *this_cast =
          reinterpret_cast<LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, Token>* >(this);
      this_cast->AdvanceDecoding(decodable, max_num_frames);
      return;
    } else if (fst_->Type() == "vector") {
      LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token> *this_cast =
          reinterpret_cast<LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, Token>* >(this);
      this_cast->AdvanceDecoding(decodable, max_num_frames);
      return;
    }
  }


  KALDI_ASSERT(!active_toks_.empty() && !decoding_finalized_ &&
               "You must call InitDecoding() before AdvanceDecoding");
  int32 num_frames_ready = decodable->NumFramesReady();
  // num_frames_ready must be >= num_frames_decoded, or else
  // the number of frames ready must have decreased (which doesn't
  // make sense) or the decodable object changed between calls
  // (which isn't allowed).
  KALDI_ASSERT(num_frames_ready >= NumFramesDecoded());
  int32 target_frames_decoded = num_frames_ready;
  if (max_num_frames >= 0)
    target_frames_decoded = std::min(target_frames_decoded,
                                     NumFramesDecoded() + max_num_frames);
  while (NumFramesDecoded() < target_frames_decoded) {
    if (NumFramesDecoded() % config_.prune_interval == 0) {
      PruneActiveTokens(config_.lattice_beam * config_.prune_scale);
    }
    BaseFloat cost_cutoff = ProcessEmitting(decodable);
    ProcessNonemitting(cost_cutoff);
  }
}

// FinalizeDecoding() is a version of PruneActiveTokens that we call
// (optionally) on the final frame.  Takes into account the final-prob of
// tokens.  This function used to be called PruneActiveTokensFinal().
template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::FinalizeDecoding() {
  int32 final_frame_plus_one = NumFramesDecoded();
  int32 num_toks_begin = num_toks_;
  // PruneForwardLinksFinal() prunes final frame (with final-probs), and
  // sets decoding_finalized_.
  PruneForwardLinksFinal();
  for (int32 f = final_frame_plus_one - 1; f >= 0; f--) {
    bool b1, b2; // values not used.
    BaseFloat dontcare = 0.0; // delta of zero means we must always update
    PruneForwardLinks(f, &b1, &b2, dontcare);
    PruneTokensForFrame(f + 1);
  }
  PruneTokensForFrame(0);
  KALDI_VLOG(4) << "pruned tokens from " << num_toks_begin
                << " to " << num_toks_;
}

/// Gets the weight cutoff.  Also counts the active tokens.
template <typename FST, typename Token>
BaseFloat LatticeFasterDecoderTpl<FST, Token>::GetCutoff(Elem *list_head, size_t *tok_count,
                                          BaseFloat *adaptive_beam, Elem **best_elem) {
  BaseFloat best_weight = std::numeric_limits<BaseFloat>::infinity();
  // positive == high cost == bad.
  size_t count = 0;
  if (config_.max_active == std::numeric_limits<int32>::max() &&
      config_.min_active == 0) {
    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
      BaseFloat w = static_cast<BaseFloat>(e->val->tot_cost);
      if (w < best_weight) {
        best_weight = w;
        if (best_elem) *best_elem = e;
      }
    }
    if (tok_count != NULL) *tok_count = count;
    if (adaptive_beam != NULL) *adaptive_beam = config_.beam;
    return best_weight + config_.beam;
  } else {
    tmp_array_.clear();
    for (Elem *e = list_head; e != NULL; e = e->tail, count++) {
      BaseFloat w = e->val->tot_cost;
      tmp_array_.push_back(w);
      if (w < best_weight) {
        best_weight = w;
        if (best_elem) *best_elem = e;
      }
    }
    if (tok_count != NULL) *tok_count = count;

    BaseFloat beam_cutoff = best_weight + config_.beam,
        min_active_cutoff = std::numeric_limits<BaseFloat>::infinity(),
        max_active_cutoff = std::numeric_limits<BaseFloat>::infinity();

    KALDI_VLOG(6) << "Number of tokens active on frame " << NumFramesDecoded()
                  << " is " << tmp_array_.size();

    if (tmp_array_.size() > static_cast<size_t>(config_.max_active)) {
      std::nth_element(tmp_array_.begin(),
                       tmp_array_.begin() + config_.max_active,
                       tmp_array_.end());
      max_active_cutoff = tmp_array_[config_.max_active];
    }
    if (max_active_cutoff < beam_cutoff) { // max_active is tighter than beam.
      if (adaptive_beam)
        *adaptive_beam = max_active_cutoff - best_weight + config_.beam_delta;
      return max_active_cutoff;
    }
    if (tmp_array_.size() > static_cast<size_t>(config_.min_active)) {
      if (config_.min_active == 0) min_active_cutoff = best_weight;
      else {
        std::nth_element(tmp_array_.begin(),
                         tmp_array_.begin() + config_.min_active,
                         tmp_array_.size() > static_cast<size_t>(config_.max_active) ?
                         tmp_array_.begin() + config_.max_active :
                         tmp_array_.end());
        min_active_cutoff = tmp_array_[config_.min_active];
      }
    }
    if (min_active_cutoff > beam_cutoff) { // min_active is looser than beam.
      if (adaptive_beam)
        *adaptive_beam = min_active_cutoff - best_weight + config_.beam_delta;
      return min_active_cutoff;
    } else {
      *adaptive_beam = config_.beam;
      return beam_cutoff;
    }
  }
}

template <typename FST, typename Token>
BaseFloat LatticeFasterDecoderTpl<FST, Token>::ProcessEmitting(
    DecodableInterface *decodable) {
  KALDI_ASSERT(active_toks_.size() > 0);
  int32 frame = active_toks_.size() - 1; // frame is the frame-index
                                         // (zero-based) used to get likelihoods
                                         // from the decodable object.
  active_toks_.resize(active_toks_.size() + 1);

  Elem *final_toks = toks_.Clear(); // analogous to swapping prev_toks_ / cur_toks_
                                   // in simple-decoder.h.   Removes the Elems from
                                   // being indexed in the hash in toks_.
  Elem *best_elem = NULL;
  BaseFloat adaptive_beam;
  size_t tok_cnt;
  BaseFloat cur_cutoff = GetCutoff(final_toks, &tok_cnt, &adaptive_beam, &best_elem);
  KALDI_VLOG(6) << "Adaptive beam on frame " << NumFramesDecoded() << " is "
                << adaptive_beam;

  PossiblyResizeHash(tok_cnt);  // This makes sure the hash is always big enough.

  BaseFloat next_cutoff = std::numeric_limits<BaseFloat>::infinity();
  // pruning "online" before having seen all tokens

  BaseFloat cost_offset = 0.0; // Used to keep probabilities in a good
                               // dynamic range.


  // First process the best token to get a hopefully
  // reasonably tight bound on the next cutoff.  The only
  // products of the next block are "next_cutoff" and "cost_offset".
  if (best_elem) {
    StateId state = best_elem->key;
    Token *tok = best_elem->val;
    cost_offset = - tok->tot_cost;
    for (fst::ArcIterator<FST> aiter(*fst_, state);
         !aiter.Done();
         aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (arc.ilabel != 0) {  // propagate..
        BaseFloat new_weight = arc.weight.Value() + cost_offset -
            decodable->LogLikelihood(frame, arc.ilabel) + tok->tot_cost;
        if (new_weight + adaptive_beam < next_cutoff)
          next_cutoff = new_weight + adaptive_beam;
      }
    }
  }

  // Store the offset on the acoustic likelihoods that we're applying.
  // Could just do cost_offsets_.push_back(cost_offset), but we
  // do it this way as it's more robust to future code changes.
  cost_offsets_.resize(frame + 1, 0.0);
  cost_offsets_[frame] = cost_offset;

  // the tokens are now owned here, in final_toks, and the hash is empty.
  // 'owned' is a complex thing here; the point is we need to call DeleteElem
  // on each elem 'e' to let toks_ know we're done with them.
  for (Elem *e = final_toks, *e_tail; e != NULL; e = e_tail) {
    // loop this way because we delete "e" as we go.
    StateId state = e->key;
    Token *tok = e->val;
    if (tok->tot_cost <= cur_cutoff) {
      for (fst::ArcIterator<FST> aiter(*fst_, state);
           !aiter.Done();
           aiter.Next()) {
        const Arc &arc = aiter.Value();
        if (arc.ilabel != 0) {  // propagate..
          BaseFloat ac_cost = cost_offset -
              decodable->LogLikelihood(frame, arc.ilabel),
              graph_cost = arc.weight.Value(),
              cur_cost = tok->tot_cost,
              tot_cost = cur_cost + ac_cost + graph_cost;
          if (tot_cost >= next_cutoff) continue;
          else if (tot_cost + adaptive_beam < next_cutoff)
            next_cutoff = tot_cost + adaptive_beam; // prune by best current token
          // Note: the frame indexes into active_toks_ are one-based,
          // hence the + 1.
          Elem *e_next = FindOrAddToken(arc.nextstate,
                                        frame + 1, tot_cost, tok, NULL);
          // NULL: no change indicator needed

          // Add ForwardLink from tok to next_tok (put on head of list tok->links)
          tok->links = new (forward_link_pool_.Allocate())
              ForwardLinkT(e_next->val, arc.ilabel, arc.olabel, graph_cost,
                           ac_cost, tok->links);
        }
      } // for all arcs
    }
    e_tail = e->tail;
    toks_.Delete(e); // delete Elem
  }
  return next_cutoff;
}

// static inline
template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::DeleteForwardLinks(Token *tok) {
  ForwardLinkT *l = tok->links, *m;
  while (l != NULL) {
    m = l->next;
    forward_link_pool_.Free(l);
    l = m;
  }
  tok->links = NULL;
}


template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::ProcessNonemitting(BaseFloat cutoff) {
  KALDI_ASSERT(!active_toks_.empty());
  int32 frame = static_cast<int32>(active_toks_.size()) - 2;
  // Note: "frame" is the time-index we just processed, or -1 if
  // we are processing the nonemitting transitions before the
  // first frame (called from InitDecoding()).

  // Processes nonemitting arcs for one frame.  Propagates within toks_.
  // Note-- this queue structure is not very optimal as
  // it may cause us to process states unnecessarily (e.g. more than once),
  // but in the baseline code, turning this vector into a set to fix this
  // problem did not improve overall speed.

  KALDI_ASSERT(queue_.empty());

  if (toks_.GetList() == NULL) {
    if (!warned_) {
      KALDI_WARN << "Error, no surviving tokens: frame is " << frame;
      warned_ = true;
    }
  }

  for (const Elem *e = toks_.GetList(); e != NULL;  e = e->tail) {
    StateId state = e->key;
    if (fst_->NumInputEpsilons(state) != 0)
      queue_.push_back(e);
  }

  while (!queue_.empty()) {
    const Elem *e = queue_.back();
    queue_.pop_back();

    StateId state = e->key;
    Token *tok = e->val;  // would segfault if e is a NULL pointer but this can't happen.
    BaseFloat cur_cost = tok->tot_cost;
    if (cur_cost >= cutoff) // Don't bother processing successors.
      continue;
    // If "tok" has any existing forward links, delete them,
    // because we're about to regenerate them.  This is a kind
    // of non-optimality (remember, this is the simple decoder),
    // but since most states are emitting it's not a huge issue.
    DeleteForwardLinks(tok); // necessary when re-visiting
    tok->links = NULL;
    for (fst::ArcIterator<FST> aiter(*fst_, state);
         !aiter.Done();
         aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (arc.ilabel == 0) {  // propagate nonemitting only...
        BaseFloat graph_cost = arc.weight.Value(),
            tot_cost = cur_cost + graph_cost;
        if (tot_cost < cutoff) {
          bool changed;

          Elem *e_new = FindOrAddToken(arc.nextstate, frame + 1, tot_cost,
                                          tok, &changed);

          tok->links = new (forward_link_pool_.Allocate()) ForwardLinkT(
              e_new->val, 0, arc.olabel, graph_cost, 0, tok->links);

          // "changed" tells us whether the new token has a different
          // cost from before, or is new [if so, add into queue].
          if (changed && fst_->NumInputEpsilons(arc.nextstate) != 0)
            queue_.push_back(e_new);
        }
      }
    } // for all arcs
  } // while queue not empty
}


template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::DeleteElems(Elem *list) {
  for (Elem *e = list, *e_tail; e != NULL; e = e_tail) {
    e_tail = e->tail;
    toks_.Delete(e);
  }
}

template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::ClearActiveTokens() { // a cleanup routine, at utt end/begin
  for (size_t i = 0; i < active_toks_.size(); i++) {
    // Delete all tokens alive on this frame, and any forward
    // links they may have.
    for (Token *tok = active_toks_[i].toks; tok != NULL; ) {
      DeleteForwardLinks(tok);
      Token *next_tok = tok->next;
      token_pool_.Free(tok);
      num_toks_--;
      tok = next_tok;
    }
  }
  active_toks_.clear();
  KALDI_ASSERT(num_toks_ == 0);
}

// static
template <typename FST, typename Token>
void LatticeFasterDecoderTpl<FST, Token>::TopSortTokens(
    Token *tok_list, std::vector<Token*> *topsorted_list) {
  unordered_map<Token*, int32> token2pos;
  typedef typename unordered_map<Token*, int32>::iterator IterType;
  int32 num_toks = 0;
  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
    num_toks++;
  int32 cur_pos = 0;
  // We assign the tokens numbers num_toks - 1, ... , 2, 1, 0.
  // This is likely to be in closer to topological order than
  // if we had given them ascending order, because of the way
  // new tokens are put at the front of the list.
  for (Token *tok = tok_list; tok != NULL; tok = tok->next)
    token2pos[tok] = num_toks - ++cur_pos;

  unordered_set<Token*> reprocess;

  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter) {
    Token *tok = iter->first;
    int32 pos = iter->second;
    for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
      if (link->ilabel == 0) {
        // We only need to consider epsilon links, since non-epsilon links
        // transition between frames and this function only needs to sort a list
        // of tokens from a single frame.
        IterType following_iter = token2pos.find(link->next_tok);
        if (following_iter != token2pos.end()) { // another token on this frame,
                                                 // so must consider it.
          int32 next_pos = following_iter->second;
          if (next_pos < pos) { // reassign the position of the next Token.
            following_iter->second = cur_pos++;
            reprocess.insert(link->next_tok);
          }
        }
      }
    }
    // In case we had previously assigned this token to be reprocessed, we can
    // erase it from that set because it's "happy now" (we just processed it).
    reprocess.erase(tok);
  }

  size_t max_loop = 1000000, loop_count; // max_loop is to detect epsilon cycles.
  for (loop_count = 0;
       !reprocess.empty() && loop_count < max_loop; ++loop_count) {
    std::vector<Token*> reprocess_vec;
    for (typename unordered_set<Token*>::iterator iter = reprocess.begin();
         iter != reprocess.end(); ++iter)
      reprocess_vec.push_back(*iter);
    reprocess.clear();
    for (typename std::vector<Token*>::iterator iter = reprocess_vec.begin();
         iter != reprocess_vec.end(); ++iter) {
      Token *tok = *iter;
      int32 pos = token2pos[tok];
      // Repeat the processing we did above (for comments, see above).
      for (ForwardLinkT *link = tok->links; link != NULL; link = link->next) {
        if (link->ilabel == 0) {
          IterType following_iter = token2pos.find(link->next_tok);
          if (following_iter != token2pos.end()) {
            int32 next_pos = following_iter->second;
            if (next_pos < pos) {
              following_iter->second = cur_pos++;
              reprocess.insert(link->next_tok);
            }
          }
        }
      }
    }
  }
  KALDI_ASSERT(loop_count < max_loop && "Epsilon loops exist in your decoding "
               "graph (this is not allowed!)");

  topsorted_list->clear();
  topsorted_list->resize(cur_pos, NULL);  // create a list with NULLs in between.
  for (IterType iter = token2pos.begin(); iter != token2pos.end(); ++iter)
    (*topsorted_list)[iter->second] = iter->first;
}

// Instantiate the template for the combination of token types and FST types
// that we'll need.
template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc>, decoder::StdToken>;
template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, decoder::StdToken >;
template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, decoder::StdToken >;


template class LatticeFasterDecoderTpl<fst::Fst<fst::StdArc> , decoder::BackpointerToken>;
template class LatticeFasterDecoderTpl<fst::VectorFst<fst::StdArc>, decoder::BackpointerToken >;
template class LatticeFasterDecoderTpl<fst::ConstFst<fst::StdArc>, decoder::BackpointerToken >;


} // end namespace kaldi.


================================================
FILE: runtime/engine/kaldi/decoder/lattice-faster-decoder.h
================================================
// decoder/lattice-faster-decoder.h

// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
//                2014  Guoguo Chen
//                2018  Zhehuai Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_DECODER_LATTICE_FASTER_DECODER_H_
#define KALDI_DECODER_LATTICE_FASTER_DECODER_H_

#include "fst/fstlib.h"
#include "fst/memory.h"
#include "fstext/fstext-lib.h"
#include "decoder/decodable-itf.h"
#include "lat/determinize-lattice-pruned.h"
#include "lat/kaldi-lattice.h"
#include "util/hash-list.h"
#include "util/stl-utils.h"

namespace kaldi {

struct LatticeFasterDecoderConfig {
  BaseFloat beam;
  int32 max_active;
  int32 min_active;
  BaseFloat lattice_beam;
  int32 prune_interval;
  bool determinize_lattice; // not inspected by this class... used in
                            // command-line program.
  BaseFloat beam_delta;
  BaseFloat hash_ratio;
  // Note: we don't make prune_scale configurable on the command line, it's not
  // a very important parameter.  It affects the algorithm that prunes the
  // tokens as we go.
  BaseFloat prune_scale;

  // Number of elements in the block for Token and ForwardLink memory
  // pool allocation.
  int32 memory_pool_tokens_block_size;
  int32 memory_pool_links_block_size;

  // Most of the options inside det_opts are not actually queried by the
  // LatticeFasterDecoder class itself, but by the code that calls it, for
  // example in the function DecodeUtteranceLatticeFaster.
  fst::DeterminizeLatticePhonePrunedOptions det_opts;

  LatticeFasterDecoderConfig()
      : beam(16.0),
        max_active(std::numeric_limits<int32>::max()),
        min_active(200),
        lattice_beam(10.0),
        prune_interval(25),
        determinize_lattice(true),
        beam_delta(0.5),
        hash_ratio(2.0),
        prune_scale(0.1),
        memory_pool_tokens_block_size(1 << 8),
        memory_pool_links_block_size(1 << 8) {}
  void Register(OptionsItf *opts) {
    det_opts.Register(opts);
    opts->Register("beam", &beam, "Decoding beam.  Larger->slower, more accurate.");
    opts->Register("max-active", &max_active, "Decoder max active states.  Larger->slower; "
                   "more accurate");
    opts->Register("min-active", &min_active, "Decoder minimum #active states.");
    opts->Register("lattice-beam", &lattice_beam, "Lattice generation beam.  Larger->slower, "
                   "and deeper lattices");
    opts->Register("prune-interval", &prune_interval, "Interval (in frames) at "
                   "which to prune tokens");
    opts->Register("determinize-lattice", &determinize_lattice, "If true, "
                   "determinize the lattice (lattice-determinization, keeping only "
                   "best pdf-sequence for each word-sequence).");
    opts->Register("beam-delta", &beam_delta, "Increment used in decoding-- this "
                   "parameter is obscure and relates to a speedup in the way the "
                   "max-active constraint is applied.  Larger is more accurate.");
    opts->Register("hash-ratio", &hash_ratio, "Setting used in decoder to "
                   "control hash behavior");
    opts->Register("memory-pool-tokens-block-size", &memory_pool_tokens_block_size,
                   "Memory pool block size suggestion for storing tokens (in elements). "
                   "Smaller uses less memory but increases cache misses.");
    opts->Register("memory-pool-links-block-size", &memory_pool_links_block_size,
                   "Memory pool block size suggestion for storing links (in elements). "
                   "Smaller uses less memory but increases cache misses.");
  }
  void Check() const {
    KALDI_ASSERT(beam > 0.0 && max_active > 1 && lattice_beam > 0.0
                 && min_active <= max_active
                 && prune_interval > 0 && beam_delta > 0.0 && hash_ratio >= 1.0
                 && prune_scale > 0.0 && prune_scale < 1.0);
  }
};

namespace decoder {
// We will template the decoder on the token type as well as the FST type; this
// is a mechanism so that we can use the same underlying decoder code for
// versions of the decoder that support quickly getting the best path
// (LatticeFasterOnlineDecoder, see lattice-faster-online-decoder.h) and also
// those that do not (LatticeFasterDecoder).


// ForwardLinks are the links from a token to a token on the next frame.
// or sometimes on the current frame (for input-epsilon links).
template <typename Token>
struct ForwardLink {
  using Label = fst::StdArc::Label;

  Token *next_tok;  // the next token [or NULL if represents final-state]
  Label ilabel;  // ilabel on arc
  Label olabel;  // olabel on arc
  BaseFloat graph_cost;  // graph cost of traversing arc (contains LM, etc.)
  BaseFloat acoustic_cost;  // acoustic cost (pre-scaled) of traversing arc
  ForwardLink *next;  // next in singly-linked list of forward arcs (arcs
                      // in the state-level lattice) from a token.
  inline ForwardLink(Token *next_tok, Label ilabel, Label olabel,
                     BaseFloat graph_cost, BaseFloat acoustic_cost,
                     ForwardLink *next):
      next_tok(next_tok), ilabel(ilabel), olabel(olabel),
      graph_cost(graph_cost), acoustic_cost(acoustic_cost),
      next(next) { }
};


struct StdToken {
  using ForwardLinkT = ForwardLink<StdToken>;
  using Token = StdToken;

  // Standard token type for LatticeFasterDecoder.  Each active HCLG
  // (decoding-graph) state on each frame has one token.

  // tot_cost is the total (LM + acoustic) cost from the beginning of the
  // utterance up to this point.  (but see cost_offset_, which is subtracted
  // to keep it in a good numerical range).
  BaseFloat tot_cost;

  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals the
  // minimum difference between the cost of the best path that this link is a
  // part of, and the cost of the absolute best path, under the assumption that
  // any of the currently active states at the decoding front may eventually
  // succeed (e.g. if you were to take the currently active states one by one
  // and compute this difference, and then take the minimum).
  BaseFloat extra_cost;

  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
  // use for lattice generation.
  ForwardLinkT *links;

  //'next' is the next in the singly-linked list of tokens for this frame.
  Token *next;

  // This function does nothing and should be optimized out; it's needed
  // so we can share the regular LatticeFasterDecoderTpl code and the code
  // for LatticeFasterOnlineDecoder that supports fast traceback.
  inline void SetBackpointer (Token *backpointer) { }

  // This constructor just ignores the 'backpointer' argument.  That argument is
  // needed so that we can use the same decoder code for LatticeFasterDecoderTpl
  // and LatticeFasterOnlineDecoderTpl (which needs backpointers to support a
  // fast way to obtain the best path).
  inline StdToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
                  Token *next, Token *backpointer):
      tot_cost(tot_cost), extra_cost(extra_cost), links(links), next(next) { }
};

struct BackpointerToken {
  using ForwardLinkT = ForwardLink<BackpointerToken>;
  using Token = BackpointerToken;

  // BackpointerToken is like Token but also
  // Standard token type for LatticeFasterDecoder.  Each active HCLG
  // (decoding-graph) state on each frame has one token.

  // tot_cost is the total (LM + acoustic) cost from the beginning of the
  // utterance up to this point.  (but see cost_offset_, which is subtracted
  // to keep it in a good numerical range).
  BaseFloat tot_cost;

  // exta_cost is >= 0.  After calling PruneForwardLinks, this equals
  // the minimum difference between the cost of the best path, and the cost of
  // this is on, and the cost of the absolute best path, under the assumption
  // that any of the currently active states at the decoding front may
  // eventually succeed (e.g. if you were to take the currently active states
  // one by one and compute this difference, and then take the minimum).
  BaseFloat extra_cost;

  // 'links' is the head of singly-linked list of ForwardLinks, which is what we
  // use for lattice generation.
  ForwardLinkT *links;

  //'next' is the next in the singly-linked list of tokens for this frame.
  BackpointerToken *next;

  // Best preceding BackpointerToken (could be a on this frame, connected to
  // this via an epsilon transition, or on a previous frame).  This is only
  // required for an efficient GetBestPath function in
  // LatticeFasterOnlineDecoderTpl; it plays no part in the lattice generation
  // (the "links" list is what stores the forward links, for that).
  Token *backpointer;

  inline void SetBackpointer (Token *backpointer) {
    this->backpointer = backpointer;
  }

  inline BackpointerToken(BaseFloat tot_cost, BaseFloat extra_cost, ForwardLinkT *links,
                          Token *next, Token *backpointer):
      tot_cost(tot_cost), extra_cost(extra_cost), links(links), next(next),
      backpointer(backpointer) { }
};

}  // namespace decoder


/** This is the "normal" lattice-generating decoder.
    See \ref lattices_generation \ref decoders_faster and \ref decoders_simple
     for more information.

   The decoder is templated on the FST type and the token type.  The token type
   will normally be StdToken, but also may be BackpointerToken which is to support
   quick lookup of the current best path (see lattice-faster-online-decoder.h)

   The FST you invoke this decoder which is expected to equal
   Fst::Fst<fst::StdArc>, a.k.a. StdFst, or GrammarFst.  If you invoke it with
   FST == StdFst and it notices that the actual FST type is
   fst::VectorFst<fst::StdArc> or fst::ConstFst<fst::StdArc>, the decoder object
   will internally cast itself to one that is templated on those more specific
   types; this is an optimization for speed.
 */
template <typename FST, typename Token = decoder::StdToken>
class LatticeFasterDecoderTpl {
 public:
  using Arc = typename FST::Arc;
  using Label = typename Arc::Label;
  using StateId = typename Arc::StateId;
  using Weight = typename Arc::Weight;
  using ForwardLinkT = decoder::ForwardLink<Token>;

  // Instantiate this class once for each thing you have to decode.
  // This version of the constructor does not take ownership of
  // 'fst'.
  LatticeFasterDecoderTpl(const FST &fst,
                          const LatticeFasterDecoderConfig &config);

  // This version of the constructor takes ownership of the fst, and will delete
  // it when this object is destroyed.
  LatticeFasterDecoderTpl(const LatticeFasterDecoderConfig &config,
                          FST *fst);

  void SetOptions(const LatticeFasterDecoderConfig &config) {
    config_ = config;
  }

  const LatticeFasterDecoderConfig &GetOptions() const {
    return config_;
  }

  ~LatticeFasterDecoderTpl();

  /// Decodes until there are no more frames left in the "decodable" object..
  /// note, this may block waiting for input if the "decodable" object blocks.
  /// Returns true if any kind of traceback is available (not necessarily from a
  /// final state).
  bool Decode(DecodableInterface *decodable);


  /// says whether a final-state was active on the last frame.  If it was not, the
  /// lattice (or traceback) will end with states that are not final-states.
  bool ReachedFinal() const {
    return FinalRelativeCost() != std::numeric_limits<BaseFloat>::infinity();
  }

  /// Outputs an FST corresponding to the single best path through the lattice.
  /// Returns true if result is nonempty (using the return status is deprecated,
  /// it will become void).  If "use_final_probs" is true AND we reached the
  /// final-state of the graph then it will include those as final-probs, else
  /// it will treat all final-probs as one.  Note: this just calls GetRawLattice()
  /// and figures out the shortest path.
  bool GetBestPath(Lattice *ofst,
                   bool use_final_probs = true) const;

  /// Outputs an FST corresponding to the raw, state-level
  /// tracebacks.  Returns true if result is nonempty.
  /// If "use_final_probs" is true AND we reached the final-state
  /// of the graph then it will include those as final-probs, else
  /// it will treat all final-probs as one.
  /// The raw lattice will be topologically sorted.
  ///
  /// See also GetRawLatticePruned in lattice-faster-online-decoder.h,
  /// which also supports a pruning beam, in case for some reason
  /// you want it pruned tighter than the regular lattice beam.
  /// We could put that here in future needed.
  bool GetRawLattice(Lattice *ofst, bool use_final_probs = true) const;


  /// [Deprecated, users should now use GetRawLattice and determinize it
  /// themselves, e.g. using DeterminizeLatticePhonePrunedWrapper].
  /// Outputs an FST corresponding to the lattice-determinized
  /// lattice (one path per word sequence).   Returns true if result is nonempty.
  /// If "use_final_probs" is true AND we reached the final-state of the graph
  /// then it will include those as final-probs, else it will treat all
  /// final-probs as one.
  bool GetLattice(CompactLattice *ofst,
                  bool use_final_probs = true) const;

  /// InitDecoding initializes the decoding, and should only be used if you
  /// intend to call AdvanceDecoding().  If you call Decode(), you don't need to
  /// call this.  You can also call InitDecoding if you have already decoded an
  /// utterance and want to start with a new utterance.
  void InitDecoding();

  /// This will decode until there are no more frames ready in the decodable
  /// object.  You can keep calling it each time more frames become available.
  /// If max_num_frames is specified, it specifies the maximum number of frames
  /// the function will decode before returning.
  void AdvanceDecoding(DecodableInterface *decodable,
                       int32 max_num_frames = -1);

  /// This function may be optionally called after AdvanceDecoding(), when you
  /// do not plan to decode any further.  It does an extra pruning step that
  /// will help to prune the lattices output by GetLattice and (particularly)
  /// GetRawLattice more completely, particularly toward the end of the
  /// utterance.  If you call this, you cannot call AdvanceDecoding again (it
  /// will fail), and you cannot call GetLattice() and related functions with
  /// use_final_probs = false.  Used to be called PruneActiveTokensFinal().
  void FinalizeDecoding();

  /// FinalRelativeCost() serves the same purpose as ReachedFinal(), but gives
  /// more information.  It returns the difference between the best (final-cost
  /// plus cost) of any token on the final frame, and the best cost of any token
  /// on the final frame.  If it is infinity it means no final-states were
  /// present on the final frame.  It will usually be nonnegative.  If it not
  /// too positive (e.g. < 5 is my first guess, but this is not tested) you can
  /// take it as a good indication that we reached the final-state with
  /// reasonable likelihood.
  BaseFloat FinalRelativeCost() const;


  // Returns the number of frames decoded so far.  The value returned changes
  // whenever we call ProcessEmitting().
  inline int32 NumFramesDecoded() const { return active_toks_.size() - 1; }

 protected:
  // we make things protected instead of private, as code in
  // LatticeFasterOnlineDecoderTpl, which inherits from this, also uses the
  // internals.

  // Deletes the elements of the singly linked list tok->links.
  void DeleteForwardLinks(Token *tok);

  // head of per-frame list of Tokens (list is in topological order),
  // and something saying whether we ever pruned it using PruneForwardLinks.
  struct TokenList {
    Token *toks;
    bool must_prune_forward_links;
    bool must_prune_tokens;
    TokenList(): toks(NULL), must_prune_forward_links(true),
                 must_prune_tokens(true) { }
  };

  using Elem = typename HashList<StateId, Token*>::Elem;
  // Equivalent to:
  //  struct Elem {
  //    StateId key;
  //    Token *val;
  //    Elem *tail;
  //  };

  void PossiblyResizeHash(size_t num_toks);

  // FindOrAddToken either locates a token in hash of toks_, or if necessary
  // inserts a new, empty token (i.e. with no forward links) for the current
  // frame.  [note: it's inserted if necessary into hash toks_ and also into the
  // singly linked list of tokens active on this frame (whose head is at
  // active_toks_[frame]).  The frame_plus_one argument is the acoustic frame
  // index plus one, which is used to index into the active_toks_ array.
  // Returns the Token pointer.  Sets "changed" (if non-NULL) to true if the
  // token was newly created or the cost changed.
  // If Token == StdToken, the 'backpointer' argument has no purpose (and will
  // hopefully be optimized out).
  inline Elem *FindOrAddToken(StateId state, int32 frame_plus_one,
                              BaseFloat tot_cost, Token *backpointer,
                              bool *changed);

  // prunes outgoing links for all tokens in active_toks_[frame]
  // it's called by PruneActiveTokens
  // all links, that have link_extra_cost > lattice_beam are pruned
  // delta is the amount by which the extra_costs must change
  // before we set *extra_costs_changed = true.
  // If delta is larger,  we'll tend to go back less far
  //    toward the beginning of the file.
  // extra_costs_changed is set to true if extra_cost was changed for any token
  // links_pruned is set to true if any link in any token was pruned
  void PruneForwardLinks(int32 frame_plus_one, bool *extra_costs_changed,
                         bool *links_pruned,
                         BaseFloat delta);

  // This function computes the final-costs for tokens active on the final
  // frame.  It outputs to final-costs, if non-NULL, a map from the Token*
  // pointer to the final-prob of the corresponding state, for all Tokens
  // that correspond to states that have final-probs.  This map will be
  // empty if there were no final-probs.  It outputs to
  // final_relative_cost, if non-NULL, the difference between the best
  // forward-cost including the final-prob cost, and the best forward-cost
  // without including the final-prob cost (this will usually be positive), or
  // infinity if there were no final-probs.  [c.f. FinalRelativeCost(), which
  // outputs this quanitity].  It outputs to final_best_cost, if
  // non-NULL, the lowest for any token t active on the final frame, of
  // forward-cost[t] + final-cost[t], where final-cost[t] is the final-cost in
  // the graph of the state corresponding to token t, or the best of
  // forward-cost[t] if there were no final-probs active on the final frame.
  // You cannot call this after FinalizeDecoding() has been called; in that
  // case you should get the answer from class-member variables.
  void ComputeFinalCosts(unordered_map<Token*, BaseFloat> *final_costs,
                         BaseFloat *final_relative_cost,
                         BaseFloat *final_best_cost) const;

  // PruneForwardLinksFinal is a version of PruneForwardLinks that we call
  // on the final frame.  If there are final tokens active, it uses
  // the final-probs for pruning, otherwise it treats all tokens as final.
  void PruneForwardLinksFinal();

  // Prune away any tokens on this frame that have no forward links.
  // [we don't do this in PruneForwardLinks because it would give us
  // a problem with dangling pointers].
  // It's called by PruneActiveTokens if any forward links have been pruned
  void PruneTokensForFrame(int32 frame_plus_one);


  // Go backwards through still-alive tokens, pruning them if the
  // forward+backward cost is more than lat_beam away from the best path.  It's
  // possible to prove that this is "correct" in the sense that we won't lose
  // anything outside of lat_beam, regardless of what happens in the future.
  // delta controls when it considers a cost to have changed enough to continue
  // going backward and propagating the change.  larger delta -> will recurse
  // less far.
  void PruneActiveTokens(BaseFloat delta);

  /// Gets the weight cutoff.  Also counts the active tokens.
  BaseFloat GetCutoff(Elem *list_head, size_t *tok_count,
                      BaseFloat *adaptive_beam, Elem **best_elem);

  /// Processes emitting arcs for one frame.  Propagates from prev_toks_ to
  /// cur_toks_.  Returns the cost cutoff for subsequent ProcessNonemitting() to
  /// use.
  BaseFloat ProcessEmitting(DecodableInterface *decodable);

  /// Processes nonemitting (epsilon) arcs for one frame.  Called after
  /// ProcessEmitting() on each frame.  The cost cutoff is computed by the
  /// preceding ProcessEmitting().
  void ProcessNonemitting(BaseFloat cost_cutoff);

  // HashList defined in ../util/hash-list.h.  It actually allows us to maintain
  // more than one list (e.g. for current and previous frames), but only one of
  // them at a time can be indexed by StateId.  It is indexed by frame-index
  // plus one, where the frame-index is zero-based, as used in decodable object.
  // That is, the emitting probs of frame t are accounted for in tokens at
  // toks_[t+1].  The zeroth frame is for nonemitting transition at the start of
  // the graph.
  HashList<StateId, Token*> toks_;

  std::vector<TokenList> active_toks_; // Lists of tokens, indexed by
  // frame (members of TokenList are toks, must_prune_forward_links,
  // must_prune_tokens).
  std::vector<const Elem* > queue_;  // temp variable used in ProcessNonemitting,
  std::vector<BaseFloat> tmp_array_;  // used in GetCutoff.

  // fst_ is a pointer to the FST we are decoding from.
  const FST *fst_;
  // delete_fst_ is true if the pointer fst_ needs to be deleted when this
  // object is destroyed.
  bool delete_fst_;

  std::vector<BaseFloat> cost_offsets_; // This contains, for each
  // frame, an offset that was added to the acoustic log-likelihoods on that
  // frame in order to keep everything in a nice dynamic range i.e.  close to
  // zero, to reduce roundoff errors.
  LatticeFasterDecoderConfig config_;
  int32 num_toks_; // current total #toks allocated...
  bool warned_;

  /// decoding_finalized_ is true if someone called FinalizeDecoding().  [note,
  /// calling this is optional].  If true, it's forbidden to decode more.  Also,
  /// if this is set, then the output of ComputeFinalCosts() is in the next
  /// three variables.  The reason we need to do this is that after
  /// FinalizeDecoding() calls PruneTokensForFrame() for the final frame, some
  /// of the tokens on the last frame are freed, so we free the list from toks_
  /// to avoid having dangling pointers hanging around.
  bool decoding_finalized_;
  /// For the meaning of the next 3 variables, see the comment for
  /// decoding_finalized_ above., and ComputeFinalCosts().
  unordered_map<Token*, BaseFloat> final_costs_;
  BaseFloat final_relative_cost_;
  BaseFloat final_best_cost_;

  // Memory pools for storing tokens and forward links.
  // We use it to decrease the work put on allocator and to move some of data
  // together. Too small block sizes will result in more work to allocator but
  // bigger ones increase the memory usage.
  fst::MemoryPool<Token> token_pool_;
  fst::MemoryPool<ForwardLinkT> forward_link_pool_;

  // There are various cleanup tasks... the toks_ structure contains
  // singly linked lists of Token pointers, where Elem is the list type.
  // It also indexes them in a hash, indexed by state (this hash is only
  // maintained for the most recent frame).  toks_.Clear()
  // deletes them from the hash and returns the list of Elems.  The
  // function DeleteElems calls toks_.Delete(elem) for each elem in
  // the list, which returns ownership of the Elem to the toks_ structure
  // for reuse, but does not delete the Token pointer.  The Token pointers
  // are reference-counted and are ultimately deleted in PruneTokensForFrame,
  // but are also linked together on each frame by their own linked-list,
  // using the "next" pointer.  We delete them manually.
  void DeleteElems(Elem *list);

  // This function takes a singly linked list of tokens for a single frame, and
  // outputs a list of them in topological order (it will crash if no such order
  // can be found, which will typically be due to decoding graphs with epsilon
  // cycles, which are not allowed).  Note: the output list may contain NULLs,
  // which the caller should pass over; it just happens to be more efficient for
  // the algorithm to output a list that contains NULLs.
  static void TopSortTokens(Token *tok_list,
                            std::vector<Token*> *topsorted_list);

  void ClearActiveTokens();

  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterDecoderTpl);
};

typedef LatticeFasterDecoderTpl<fst::StdFst, decoder::StdToken> LatticeFasterDecoder;


} // end namespace kaldi.

#endif


================================================
FILE: runtime/engine/kaldi/decoder/lattice-faster-online-decoder.cc
================================================
// decoder/lattice-faster-online-decoder.cc

// Copyright 2009-2012  Microsoft Corporation  Mirko Hannemann
//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
//                2014  Guoguo Chen
//                2014  IMSL, PKU-HKUST (author: Wei Shi)
//                2018  Zhehuai Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

// see note at the top of lattice-faster-decoder.cc, about how to maintain this
// file in sync with lattice-faster-decoder.cc

#include "decoder/lattice-faster-online-decoder.h"
#include "lat/lattice-functions.h"

namespace kaldi {

template <typename FST>
bool LatticeFasterOnlineDecoderTpl<FST>::TestGetBestPath(
    bool use_final_probs) const {
  Lattice lat1;
  {
    Lattice raw_lat;
    this->GetRawLattice(&raw_lat, use_final_probs);
    ShortestPath(raw_lat, &lat1);
  }
  Lattice lat2;
  GetBestPath(&lat2, use_final_probs);
  BaseFloat delta = 0.1;
  int32 num_paths = 1;
  if (!fst::RandEquivalent(lat1, lat2, num_paths, delta, rand())) {
    KALDI_WARN << "Best-path test failed";
    return false;
  } else {
    return true;
  }
}


// Outputs an FST corresponding to the single best path through the lattice.
template <typename FST>
bool LatticeFasterOnlineDecoderTpl<FST>::GetBestPath(Lattice *olat,
                                                     bool use_final_probs) const {
  olat->DeleteStates();
  BaseFloat final_graph_cost;
  BestPathIterator iter = BestPathEnd(use_final_probs, &final_graph_cost);
  if (iter.Done())
    return false;  // would have printed warning.
  StateId state = olat->AddState();
  olat->SetFinal(state, LatticeWeight(final_graph_cost, 0.0));
  while (!iter.Done()) {
    LatticeArc arc;
    iter = TraceBackBestPath(iter, &arc);
    arc.nextstate = state;
    StateId new_state = olat->AddState();
    olat->AddArc(new_state, arc);
    state = new_state;
  }
  olat->SetStart(state);
  return true;
}

template <typename FST>
typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator LatticeFasterOnlineDecoderTpl<FST>::BestPathEnd(
    bool use_final_probs,
    BaseFloat *final_cost_out) const {
  if (this->decoding_finalized_ && !use_final_probs)
    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
              << "BestPathEnd() with use_final_probs == false";
  KALDI_ASSERT(this->NumFramesDecoded() > 0 &&
               "You cannot call BestPathEnd if no frames were decoded.");

  unordered_map<Token*, BaseFloat> final_costs_local;

  const unordered_map<Token*, BaseFloat> &final_costs =
      (this->decoding_finalized_ ? this->final_costs_ :final_costs_local);
  if (!this->decoding_finalized_ && use_final_probs)
    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);

  // Singly linked list of tokens on last frame (access list through "next"
  // pointer).
  BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
  BaseFloat best_final_cost = 0;
  Token *best_tok = NULL;
  for (Token *tok = this->active_toks_.back().toks;
       tok != NULL; tok = tok->next) {
    BaseFloat cost = tok->tot_cost, final_cost = 0.0;
    if (use_final_probs && !final_costs.empty()) {
      // if we are instructed to use final-probs, and any final tokens were
      // active on final frame, include the final-prob in the cost of the token.
      typename unordered_map<Token*, BaseFloat>::const_iterator
          iter = final_costs.find(tok);
      if (iter != final_costs.end()) {
        final_cost = iter->second;
        cost += final_cost;
      } else {
        cost = std::numeric_limits<BaseFloat>::infinity();
      }
    }
    if (cost < best_cost) {
      best_cost = cost;
      best_tok = tok;
      best_final_cost = final_cost;
    }
  }
  if (best_tok == NULL) {  // this should not happen, and is likely a code error or
    // caused by infinities in likelihoods, but I'm not making
    // it a fatal error for now.
    KALDI_WARN << "No final token found.";
  }
  if (final_cost_out)
    *final_cost_out = best_final_cost;
  return BestPathIterator(best_tok, this->NumFramesDecoded() - 1);
}


template <typename FST>
typename LatticeFasterOnlineDecoderTpl<FST>::BestPathIterator LatticeFasterOnlineDecoderTpl<FST>::TraceBackBestPath(
    BestPathIterator iter, LatticeArc *oarc) const {
  KALDI_ASSERT(!iter.Done() && oarc != NULL);
  Token *tok = static_cast<Token*>(iter.tok);
  int32 cur_t = iter.frame, step_t = 0;
  if (tok->backpointer != NULL) {
    // retrieve the correct forward link(with the best link cost)
    BaseFloat best_cost = std::numeric_limits<BaseFloat>::infinity();
    ForwardLinkT *link;
    for (link = tok->backpointer->links;
         link != NULL; link = link->next) {
      if (link->next_tok == tok) { // this is a link to "tok"
        BaseFloat graph_cost = link->graph_cost, 
                  acoustic_cost = link->acoustic_cost;
        BaseFloat cost = graph_cost + acoustic_cost;
        if (cost < best_cost) {
          oarc->ilabel = link->ilabel;
          oarc->olabel = link->olabel;
          if (link->ilabel != 0) {
            KALDI_ASSERT(static_cast<size_t>(cur_t) < this->cost_offsets_.size());
            acoustic_cost -= this->cost_offsets_[cur_t];
            step_t = -1;
          } else {
            step_t = 0;
          }
          oarc->weight = LatticeWeight(graph_cost, acoustic_cost);
          best_cost = cost;
        }
      }
    }
    if (link == NULL &&
        best_cost == std::numeric_limits<BaseFloat>::infinity()) { // Did not find correct link.
      KALDI_ERR << "Error tracing best-path back (likely "
                << "bug in token-pruning algorithm)";
    }
  } else {
    oarc->ilabel = 0;
    oarc->olabel = 0;
    oarc->weight = LatticeWeight::One(); // zero costs.
  }
  return BestPathIterator(tok->backpointer, cur_t + step_t);
}

template <typename FST>
bool LatticeFasterOnlineDecoderTpl<FST>::GetRawLatticePruned(
    Lattice *ofst,
    bool use_final_probs,
    BaseFloat beam) const {
  typedef LatticeArc Arc;
  typedef Arc::StateId StateId;
  typedef Arc::Weight Weight;
  typedef Arc::Label Label;

  // Note: you can't use the old interface (Decode()) if you want to
  // get the lattice with use_final_probs = false.  You'd have to do
  // InitDecoding() and then AdvanceDecoding().
  if (this->decoding_finalized_ && !use_final_probs)
    KALDI_ERR << "You cannot call FinalizeDecoding() and then call "
              << "GetRawLattice() with use_final_probs == false";

  unordered_map<Token*, BaseFloat> final_costs_local;

  const unordered_map<Token*, BaseFloat> &final_costs =
      (this->decoding_finalized_ ? this->final_costs_ : final_costs_local);
  if (!this->decoding_finalized_ && use_final_probs)
    this->ComputeFinalCosts(&final_costs_local, NULL, NULL);

  ofst->DeleteStates();
  // num-frames plus one (since frames are one-based, and we have
  // an extra frame for the start-state).
  int32 num_frames = this->active_toks_.size() - 1;
  KALDI_ASSERT(num_frames > 0);
  for (int32 f = 0; f <= num_frames; f++) {
    if (this->active_toks_[f].toks == NULL) {
      KALDI_WARN << "No tokens active on frame " << f
                 << ": not producing lattice.\n";
      return false;
    }
  }
  unordered_map<Token*, StateId> tok_map;
  std::queue<std::pair<Token*, int32> > tok_queue;
  // First initialize the queue and states.  Put the initial state on the queue;
  // this is the last token in the list active_toks_[0].toks.
  for (Token *tok = this->active_toks_[0].toks;
       tok != NULL; tok = tok->next) {
    if (tok->next == NULL) {
      tok_map[tok] = ofst->AddState();
      ofst->SetStart(tok_map[tok]);
      std::pair<Token*, int32> tok_pair(tok, 0);  // #frame = 0
      tok_queue.push(tok_pair);
    }
  }

  // Next create states for "good" tokens
  while (!tok_queue.empty()) {
    std::pair<Token*, int32> cur_tok_pair = tok_queue.front();
    tok_queue.pop();
    Token *cur_tok = cur_tok_pair.first;
    int32 cur_frame = cur_tok_pair.second;
    KALDI_ASSERT(cur_frame >= 0 &&
                 cur_frame <= this->cost_offsets_.size());

    typename unordered_map<Token*, StateId>::const_iterator iter =
        tok_map.find(cur_tok);
    KALDI_ASSERT(iter != tok_map.end());
    StateId cur_state = iter->second;

    for (ForwardLinkT *l = cur_tok->links;
         l != NULL;
         l = l->next) {
      Token *next_tok = l->next_tok;
      if (next_tok->extra_cost < beam) {
        // so both the current and the next token are good; create the arc
        int32 next_frame = l->ilabel == 0 ? cur_frame : cur_frame + 1;
        StateId nextstate;
        if (tok_map.find(next_tok) == tok_map.end()) {
          nextstate = tok_map[next_tok] = ofst->AddState();
          tok_queue.push(std::pair<Token*, int32>(next_tok, next_frame));
        } else {
          nextstate = tok_map[next_tok];
        }
        BaseFloat cost_offset = (l->ilabel != 0 ?
                                 this->cost_offsets_[cur_frame] : 0);
        Arc arc(l->ilabel, l->olabel,
                Weight(l->graph_cost, l->acoustic_cost - cost_offset),
                nextstate);
        ofst->AddArc(cur_state, arc);
      }
    }
    if (cur_frame == num_frames) {
      if (use_final_probs && !final_costs.empty()) {
        typename unordered_map<Token*, BaseFloat>::const_iterator iter =
            final_costs.find(cur_tok);
        if (iter != final_costs.end())
          ofst->SetFinal(cur_state, LatticeWeight(iter->second, 0));
      } else {
        ofst->SetFinal(cur_state, LatticeWeight::One());
      }
    }
  }
  return (ofst->NumStates() != 0);
}


// Instantiate the template for the FST types that we'll need.
template class LatticeFasterOnlineDecoderTpl<fst::Fst<fst::StdArc> >;
template class LatticeFasterOnlineDecoderTpl<fst::VectorFst<fst::StdArc> >;
template class LatticeFasterOnlineDecoderTpl<fst::ConstFst<fst::StdArc> >;
//template class LatticeFasterOnlineDecoderTpl<fst::ConstGrammarFst >;
//template class LatticeFasterOnlineDecoderTpl<fst::VectorGrammarFst >;


} // end namespace kaldi.


================================================
FILE: runtime/engine/kaldi/decoder/lattice-faster-online-decoder.h
================================================
// decoder/lattice-faster-online-decoder.h

// Copyright 2009-2013  Microsoft Corporation;  Mirko Hannemann;
//           2013-2014  Johns Hopkins University (Author: Daniel Povey)
//                2014  Guoguo Chen
//                2018  Zhehuai Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

// see note at the top of lattice-faster-decoder.h, about how to maintain this
// file in sync with lattice-faster-decoder.h


#ifndef KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_
#define KALDI_DECODER_LATTICE_FASTER_ONLINE_DECODER_H_

#include "util/stl-utils.h"
#include "util/hash-list.h"
#include "fst/fstlib.h"
#include "decoder/decodable-itf.h"
#include "fstext/fstext-lib.h"
#include "lat/determinize-lattice-pruned.h"
#include "lat/kaldi-lattice.h"
#include "decoder/lattice-faster-decoder.h"

namespace kaldi {


/** LatticeFasterOnlineDecoderTpl is as LatticeFasterDecoderTpl but also
    supports an efficient way to get the best path (see the function
    BestPathEnd()), which is useful in endpointing and in situations where you
    might want to frequently access the best path.

    This is only templated on the FST type, since the Token type is required to
    be BackpointerToken.  Actually it only makes sense to instantiate
    LatticeFasterDecoderTpl with Token == BackpointerToken if you do so indirectly via
    this child class.
 */
template <typename FST>
class LatticeFasterOnlineDecoderTpl:
      public LatticeFasterDecoderTpl<FST, decoder::BackpointerToken> {
 public:
  using Arc = typename FST::Arc;
  using Label = typename Arc::Label;
  using StateId = typename Arc::StateId;
  using Weight = typename Arc::Weight;
  using Token = decoder::BackpointerToken;
  using ForwardLinkT = decoder::ForwardLink<Token>;

  // Instantiate this class once for each thing you have to decode.
  // This version of the constructor does not take ownership of
  // 'fst'.
  LatticeFasterOnlineDecoderTpl(const FST &fst,
                                const LatticeFasterDecoderConfig &config):
      LatticeFasterDecoderTpl<FST, Token>(fst, config) { }

  // This version of the initializer takes ownership of 'fst', and will delete
  // it when this object is destroyed.
  LatticeFasterOnlineDecoderTpl(const LatticeFasterDecoderConfig &config,
                                FST *fst):
      LatticeFasterDecoderTpl<FST, Token>(config, fst) { }


  struct BestPathIterator {
    void *tok;
    int32 frame;
    // note, "frame" is the frame-index of the frame you'll get the
    // transition-id for next time, if you call TraceBackBestPath on this
    // iterator (assuming it's not an epsilon transition).  Note that this
    // is one less than you might reasonably expect, e.g. it's -1 for
    // the nonemitting transitions before the first frame.
    BestPathIterator(void *t, int32 f): tok(t), frame(f) { }
    bool Done() const { return tok == NULL; }
  };


  /// Outputs an FST corresponding to the single best path through the lattice.
  /// This is quite efficient because it doesn't get the entire raw lattice and find
  /// the best path through it; instead, it uses the BestPathEnd and BestPathIterator
  /// so it basically traces it back through the lattice.
  /// Returns true if result is nonempty (using the return status is deprecated,
  /// it will become void).  If "use_final_probs" is true AND we reached the
  /// final-state of the graph then it will include those as final-probs, else
  /// it will treat all final-probs as one.
  bool GetBestPath(Lattice *ofst,
                   bool use_final_probs = true) const;


  /// This function does a self-test of GetBestPath().  Returns true on
  /// success; returns false and prints a warning on failure.
  bool TestGetBestPath(bool use_final_probs = true) const;


  /// This function returns an iterator that can be used to trace back
  /// the best path.  If use_final_probs == true and at least one final state
  /// survived till the end, it will use the final-probs in working out the best
  /// final Token, and will output the final cost to *final_cost (if non-NULL),
  /// else it will use only the forward likelihood, and will put zero in
  /// *final_cost (if non-NULL).
  /// Requires that NumFramesDecoded() > 0.
  BestPathIterator BestPathEnd(bool use_final_probs,
                               BaseFloat *final_cost = NULL) const;


  /// This function can be used in conjunction with BestPathEnd() to trace back
  /// the best path one link at a time (e.g. this can be useful in endpoint
  /// detection).  By "link" we mean a link in the graph; not all links cross
  /// frame boundaries, but each time you see a nonzero ilabel you can interpret
  /// that as a frame.  The return value is the updated iterator.  It outputs
  /// the ilabel and olabel, and the (graph and acoustic) weight to the "arc" pointer,
  /// while leaving its "nextstate" variable unchanged.
  BestPathIterator TraceBackBestPath(
      BestPathIterator iter, LatticeArc *arc) const;


  /// Behaves the same as GetRawLattice but only processes tokens whose
  /// extra_cost is smaller than the best-cost plus the specified beam.
  /// It is only worthwhile to call this function if beam is less than
  /// the lattice_beam specified in the config; otherwise, it would
  /// return essentially the same thing as GetRawLattice, but more slowly.
  bool GetRawLatticePruned(Lattice *ofst,
                           bool use_final_probs,
                           BaseFloat beam) const;

  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeFasterOnlineDecoderTpl);
};

typedef LatticeFasterOnlineDecoderTpl<fst::StdFst> LatticeFasterOnlineDecoder;


} // end namespace kaldi.

#endif


================================================
FILE: runtime/engine/kaldi/fstbin/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)

set(BINS
fstaddselfloops
fstisstochastic    
fstminimizeencoded
fstdeterminizestar
fsttablecompose
)

foreach(binary IN LISTS BINS)
   add_executable(${binary} ${CMAKE_CURRENT_SOURCE_DIR}/${binary}.cc) 
   target_include_directories(${binary} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
   target_link_libraries(${binary} PUBLIC kaldi-fstext glog gflags fst dl)
endforeach()


================================================
FILE: runtime/engine/kaldi/fstbin/fstaddselfloops.cc
================================================
// fstbin/fstaddselfloops.cc

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "base/kaldi-common.h"
#include "fst/fstlib.h"
#include "fstext/determinize-star.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "util/parse-options.h"
#include "util/simple-io-funcs.h"

/* some test examples:
  pushd ~/tmpdir
 ( echo 3; echo  4) > in.list
 ( echo 5; echo  6) > out.list
 ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstaddselfloops in.list out.list
 | fstprint ( echo "0 1 0 1"; echo " 0 2 1 0"; echo "1 0"; echo "2 0"; ) |
 fstcompile | fstaddselfloops in.list out.list | fstprint
*/

int main(int argc, char *argv[]) {
    try {
        using namespace kaldi;  // NOLINT
        using namespace fst;    // NOLINT
        using kaldi::int32;

        const char *usage =
            "Adds self-loops to states of an FST to propagate disambiguation "
            "symbols through it\n"
            "They are added on each final state and each state with "
            "non-epsilon "
            "output symbols\n"
            "on at least one arc out of the state.  Useful in conjunction with "
            "predeterminize\n"
            "\n"
            "Usage:  fstaddselfloops in-disambig-list out-disambig-list  "
            "[in.fst "
            "[out.fst] ]\n"
            "E.g:  fstaddselfloops in.list out.list < in.fst > withloops.fst\n"
            "in.list and out.list are lists of integers, one per line, of the\n"
            "same length.\n";

        ParseOptions po(usage);
        po.Read(argc, argv);

        if (po.NumArgs() < 2 || po.NumArgs() > 4) {
            po.PrintUsage();
            exit(1);
        }

        std::string disambig_in_rxfilename = po.GetArg(1),
                    disambig_out_rxfilename = po.GetArg(2),
                    fst_in_filename = po.GetOptArg(3),
                    fst_out_filename = po.GetOptArg(4);

        VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);

        std::vector<int32> disambig_in;
        if (!ReadIntegerVectorSimple(disambig_in_rxfilename, &disambig_in))
            KALDI_ERR << "fstaddselfloops: Could not read disambiguation "
                         "symbols from "
                      << kaldi::PrintableRxfilename(disambig_in_rxfilename);

        std::vector<int32> disambig_out;
        if (!ReadIntegerVectorSimple(disambig_out_rxfilename, &disambig_out))
            KALDI_ERR << "fstaddselfloops: Could not read disambiguation "
                         "symbols from "
                      << kaldi::PrintableRxfilename(disambig_out_rxfilename);

        if (disambig_in.size() != disambig_out.size())
            KALDI_ERR << "fstaddselfloops: mismatch in size of disambiguation "
                         "symbols";

        AddSelfLoops(fst, disambig_in, disambig_out);

        WriteFstKaldi(*fst, fst_out_filename);

        delete fst;

        return 0;
    } catch (const std::exception &e) {
        std::cerr << e.what();
        return -1;
    }
    return 0;
}


================================================
FILE: runtime/engine/kaldi/fstbin/fstdeterminizestar.cc
================================================
// fstbin/fstdeterminizestar.cc

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "base/kaldi-common.h"
#include "fst/fstlib.h"
#include "fstext/determinize-star.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "util/parse-options.h"
#if !defined(_MSC_VER) && !defined(__APPLE__)
#include <signal.h>  // Comment this line and the call to signal below if
// it causes compilation problems.  It is only to enable a debugging procedure
// when determinization does not terminate.  We are disabling this code if
// compiling on Windows because signal.h is not available there, and on
// MacOS due to a problem with <signal.h> in the initial release of Sierra.
#endif

/* some test  examples:
 ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
 ( echo "0 0 1 0"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint
 ( echo "0 0 1 0"; echo "0 1 1 0"; echo "0 0" ) | fstcompile |
 fstdeterminizestar | fstprint # this last one fails [correctly]: ( echo "0 0 0
 1"; echo "0 0" ) | fstcompile | fstdeterminizestar | fstprint

  cd ~/tmpdir
  while true; do
    fstrand > 1.fst
    fstpredeterminize out.lst 1.fst | fstdeterminizestar | fstrmsymbols out.lst
 > 2.fst fstequivalent --random=true 1.fst 2.fst || echo "Test failed" echo -n
 "." done

 Test of debugging [with non-determinizable input]:
 ( echo " 0 0 1 0 1.0"; echo "0 1 1 0"; echo "1 1 1 0 0"; echo "0 2 2 0"; echo
 "2"; echo "1" ) | fstcompile | fstdeterminizestar kill -SIGUSR1 [the process-id
 of fstdeterminizestar] # prints out a bunch of debugging output showing the
 mess it got itself into.
*/

bool debug_location = false;
void signal_handler(int) { debug_location = true; }

int main(int argc, char *argv[]) {
    try {
        using namespace kaldi;  // NOLINT
        using namespace fst;    // NOLINT
        using kaldi::int32;

        const char *usage =
            "Removes epsilons and determinizes in one step\n"
            "\n"
            "Usage:  fstdeterminizestar [in.fst [out.fst] ]\n"
            "\n"
            "See also: fstdeterminizelog, lattice-determinize\n";

        float delta = kDelta;
        int max_states = -1;
        bool use_log = false;
        ParseOptions po(usage);
        po.Register("use-log", &use_log, "Determinize in log semiring.");
        po.Register("delta",
                    &delta,
                    "Delta value used to determine equivalence of weights.");
        po.Register("max-states",
                    &max_states,
                    "Maximum number of states in determinized FST before it "
                    "will abort.");
        po.Read(argc, argv);

        if (po.NumArgs() > 2) {
            po.PrintUsage();
            exit(1);
        }

        std::string fst_in_str = po.GetOptArg(1), fst_out_str = po.GetOptArg(2);

// This enables us to get traceback info from determinization that is
// not seeming to terminate.
#if !defined(_MSC_VER) && !defined(__APPLE__)
        signal(SIGUSR1, signal_handler);
#endif
        // Normal case: just files.
        VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_str);

        ArcSort(fst, ILabelCompare<StdArc>());  // improves speed.
        if (use_log) {
            DeterminizeStarInLog(fst, delta, &debug_location, max_states);
        } else {
            VectorFst<StdArc> det_fst;
            DeterminizeStar(*fst, &det_fst, delta, &debug_location, max_states);
            *fst = det_fst;  // will do shallow copy and then det_fst goes
                             // out of scope anyway.
        }
        WriteFstKaldi(*fst, fst_out_str);
        delete fst;
        return 0;
    } catch (const std::exception &e) {
        std::cerr << e.what();
        return -1;
    }
}


================================================
FILE: runtime/engine/kaldi/fstbin/fstisstochastic.cc
================================================
// fstbin/fstisstochastic.cc

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "base/kaldi-common.h"
#include "fst/fstlib.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "util/kaldi-io.h"
#include "util/parse-options.h"

// e.g. of test:
// echo " 0 0" | fstcompile | fstisstochastic
// should return 0 and print "0 0" [meaning, min and
// max weight are one = exp(0)]
// echo " 0 1" | fstcompile | fstisstochastic
// should  return 1, not stochastic, and print 1 1
// (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
// fstcompile | fstisstochastic should return 0, stochastic; it prints "0
// -1.78e-07" for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo
// "1 0" ) | fstcompile | fstisstochastic --test-in-log=false should return 1,
// not stochastic in tropical; it prints "0 0.693147" for me (echo "0 0 0 0 0 ";
// echo "0 1 0 0 0 "; echo "1 0" ) | fstcompile | fstisstochastic
// --test-in-log=false should return 0, stochastic in tropical; it prints "0 0"
// for me (echo "0 0 0 0 0.693147 "; echo "0 1 0 0 0.693147 "; echo "1 0" ) |
// fstcompile | fstisstochastic --test-in-log=false --delta=1 returns 0 even
// though not stochastic because we gave it an absurdly large delta.

int main(int argc, char *argv[]) {
    try {
        using namespace kaldi;  // NOLINT
        using namespace fst;    // NOLINT
        using kaldi::int32;

        const char *usage =
            "Checks whether an FST is stochastic and exits with success if "
            "so.\n"
            "Prints out maximum error (in log units).\n"
            "\n"
            "Usage:  fstisstochastic [ in.fst ]\n";

        float delta = 0.01;
        bool test_in_log = true;

        ParseOptions po(usage);
        po.Register("delta", &delta, "Maximum error to accept.");
        po.Register(
            "test-in-log", &test_in_log, "Test stochasticity in log semiring.");
        po.Read(argc, argv);

        if (po.NumArgs() > 1) {
            po.PrintUsage();
            exit(1);
        }

        std::string fst_in_filename = po.GetOptArg(1);

        Fst<StdArc> *fst = ReadFstKaldiGeneric(fst_in_filename);

        bool ans;
        StdArc::Weight min, max;
        if (test_in_log)
            ans = IsStochasticFstInLog(*fst, delta, &min, &max);
        else
            ans = IsStochasticFst(*fst, delta, &min, &max);

        std::cout << min.Value() << " " << max.Value() << '\n';
        delete fst;
        if (ans)
            return 0;  // success;
        else
            return 1;
    } catch (const std::exception &e) {
        std::cerr << e.what();
        return -1;
    }
}


================================================
FILE: runtime/engine/kaldi/fstbin/fstminimizeencoded.cc
================================================
// fstbin/fstminimizeencoded.cc

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "base/kaldi-common.h"
#include "fst/fstlib.h"
#include "fstext/determinize-star.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "util/kaldi-io.h"
#include "util/parse-options.h"
#include "util/text-utils.h"

/* some test  examples:
 ( echo "0 0 0 0"; echo "0 0" ) | fstcompile | fstminimizeencoded | fstprint
 ( echo "0 1 0 0"; echo " 0 2 0 0"; echo "1 0"; echo "2 0"; ) | fstcompile |
 fstminimizeencoded | fstprint
*/

int main(int argc, char *argv[]) {
    try {
        using namespace kaldi;  // NOLINT
        using namespace fst;    // NOLINT
        using kaldi::int32;

        const char *usage =
            "Minimizes FST after encoding [similar to fstminimize, but no "
            "weight-pushing]\n"
            "\n"
            "Usage:  fstminimizeencoded [in.fst [out.fst] ]\n";

        float delta = kDelta;
        ParseOptions po(usage);
        po.Register("delta",
                    &delta,
                    "Delta likelihood used for quantization of weights");
        po.Read(argc, argv);

        if (po.NumArgs() > 2) {
            po.PrintUsage();
            exit(1);
        }

        std::string fst_in_filename = po.GetOptArg(1),
                    fst_out_filename = po.GetOptArg(2);

        VectorFst<StdArc> *fst = ReadFstKaldi(fst_in_filename);

        MinimizeEncoded(fst, delta);

        WriteFstKaldi(*fst, fst_out_filename);

        delete fst;
        return 0;
    } catch (const std::exception &e) {
        std::cerr << e.what();
        return -1;
    }
    return 0;
}


================================================
FILE: runtime/engine/kaldi/fstbin/fsttablecompose.cc
================================================
// fstbin/fsttablecompose.cc

// Copyright 2009-2011  Microsoft Corporation
//                2013  Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "base/kaldi-common.h"
#include "fst/fstlib.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "fstext/table-matcher.h"
#include "util/parse-options.h"

/*
  cd ~/tmpdir
  while true; do
    fstrand  | fstarcsort --sort_type=olabel > 1.fst; fstrand | fstarcsort
  > 2.fst fstcompose 1.fst 2.fst > 3a.fst fsttablecompose 1.fst 2.fst > 3b.fst
    fstequivalent --random=true 3a.fst 3b.fst || echo "Test failed"
    echo -n "."
  done

*/

int main(int argc, char *argv[]) {
    try {
        using namespace kaldi;  // NOLINT
        using namespace fst;    // NOLINT
        using kaldi::int32;
        /*
          fsttablecompose should always give equivalent results to compose,
          but it is more efficient for certain kinds of inputs.
          In particular, it is useful when, say, the left FST has states
          that typically either have epsilon olabels, or
          one transition out for each of the possible symbols (as the
          olabel).  The same with the input symbols of the right-hand FST
          is possible.
        */

        const char *usage =
            "Composition algorithm [between two FSTs of standard type, in "
            "tropical\n"
            "semiring] that is more efficient for certain cases-- in "
            "particular,\n"
            "where one of the FSTs (the left one, if --match-side=left) has "
            "large\n"
            "out-degree\n"
            "\n"
            "Usage:  fsttablecompose (fst1-rxfilename|fst1-rspecifier) "
            "(fst2-rxfilename|fst2-rspecifier) "
            "[(out-rxfilename|out-rspecifier)]\n";

        ParseOptions po(usage);

        TableComposeOptions opts;
        std::string match_side = "left";
        std::string compose_filter = "sequence";

        po.Register(
            "connect", &opts.connect, "If true, trim FST before output.");
        po.Register("match-side",
                    &match_side,
                    "Side of composition to do table "
                    "match, one of: \"left\" or \"right\".");
        po.Register(
            "compose-filter",
            &compose_filter,
            "Composition filter to use, "
            "one of: \"alt_sequence\", \"auto\", \"match\", \"sequence\"");

        po.Read(argc, argv);

        if (match_side == "left") {
            opts.table_match_type = MATCH_OUTPUT;
        } else if (match_side == "right") {
            opts.table_match_type = MATCH_INPUT;
        } else {
            KALDI_ERR << "Invalid match-side option: " << match_side;
        }

        if (compose_filter == "alt_sequence") {
            opts.filter_type = ALT_SEQUENCE_FILTER;
        } else if (compose_filter == "auto") {
            opts.filter_type = AUTO_FILTER;
        } else if (compose_filter == "match") {
            opts.filter_type = MATCH_FILTER;
        } else if (compose_filter == "sequence") {
            opts.filter_type = SEQUENCE_FILTER;
        } else {
            KALDI_ERR << "Invalid compose-filter option: " << compose_filter;
        }

        if (po.NumArgs() < 2 || po.NumArgs() > 3) {
            po.PrintUsage();
            exit(1);
        }

        std::string fst1_in_str = po.GetArg(1), fst2_in_str = po.GetArg(2),
                    fst_out_str = po.GetOptArg(3);

        VectorFst<StdArc> *fst1 = ReadFstKaldi(fst1_in_str);

        VectorFst<StdArc> *fst2 = ReadFstKaldi(fst2_in_str);

        // Checks if <fst1> is olabel sorted and <fst2> is ilabel sorted.
        if (fst1->Properties(fst::kOLabelSorted, true) == 0) {
            KALDI_WARN << "The first FST is not olabel sorted.";
        }
        if (fst2->Properties(fst::kILabelSorted, true) == 0) {
            KALDI_WARN << "The second FST is not ilabel sorted.";
        }

        VectorFst<StdArc> composed_fst;

        TableCompose(*fst1, *fst2, &composed_fst, opts);

        delete fst1;
        delete fst2;

        WriteFstKaldi(composed_fst, fst_out_str);
        return 0;
    } catch (const std::exception &e) {
        std::cerr << e.what();
        return -1;
    }
}


================================================
FILE: runtime/engine/kaldi/fstext/CMakeLists.txt
================================================

add_library(kaldi-fstext
  kaldi-fst-io.cc
)
target_link_libraries(kaldi-fstext PUBLIC kaldi-util)


================================================
FILE: runtime/engine/kaldi/fstext/determinize-lattice-inl.h
================================================
// fstext/determinize-lattice-inl.h

// Copyright 2009-2012  Microsoft Corporation
//           2012-2013  Johns Hopkins University (Author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
#define KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_
// Do not include this file directly.  It is included by determinize-lattice.h

#include <algorithm>
#include <climits>
#include <deque>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

namespace fst {

// This class maps back and forth from/to integer id's to sequences of strings.
// used in determinization algorithm.  It is constructed in such a way that
// finding the string-id of the successor of (string, next-label) has constant
// time.

// Note: class IntType, typically int32, is the type of the element in the
// string (typically a template argument of the CompactLatticeWeightTpl).

template <class IntType>
class LatticeStringRepository {
 public:
  struct Entry {
    const Entry *parent;  // NULL for empty string.
    IntType i;
    inline bool operator==(const Entry &other) const {
      return (parent == other.parent && i == other.i);
    }
    Entry() {}
    Entry(const Entry &e) : parent(e.parent), i(e.i) {}
  };
  // Note: all Entry* pointers returned in function calls are
  // owned by the repository itself, not by the caller!

  // Interface guarantees empty string is NULL.
  inline const Entry *EmptyString() { return NULL; }

  // Returns string of "parent" with i appended.  Pointer
  // owned by repository
  const Entry *Successor(const Entry *parent, IntType i) {
    new_entry_->parent = parent;
    new_entry_->i = i;

    std::pair<typename SetType::iterator, bool> pr = set_.insert(new_entry_);
    if (pr.second) {  // Was successfully inserted (was not there).  We need to
                      // replace the element we inserted, which resides on the
                      // stack, with one from the heap.
      const Entry *ans = new_entry_;
      new_entry_ = new Entry();
      return ans;
    } else {  // Was not inserted because an equivalent Entry already
              // existed.
      return *pr.first;
    }
  }

  const Entry *Concatenate(const Entry *a, const Entry *b) {
    if (a == NULL)
      return b;
    else if (b == NULL)
      return a;
    std::vector<IntType> v;
    ConvertToVector(b, &v);
    const Entry *ans = a;
    for (size_t i = 0; i < v.size(); i++) ans = Successor(ans, v[i]);
    return ans;
  }
  const Entry *CommonPrefix(const Entry *a, const Entry *b) {
    std::vector<IntType> a_vec, b_vec;
    ConvertToVector(a, &a_vec);
    ConvertToVector(b, &b_vec);
    const Entry *ans = NULL;
    for (size_t i = 0;
         i < a_vec.size() && i < b_vec.size() && a_vec[i] == b_vec[i]; i++)
      ans = Successor(ans, a_vec[i]);
    return ans;
  }

  // removes any elements from b that are not part of
  // a common prefix with a.
  void ReduceToCommonPrefix(const Entry *a, std::vector<IntType> *b) {
    size_t a_size = Size(a), b_size = b->size();
    while (a_size > b_size) {
      a = a->parent;
      a_size--;
    }
    if (b_size > a_size) b_size = a_size;
    typename std::vector<IntType>::iterator b_begin = b->begin();
    while (a_size != 0) {
      if (a->i != *(b_begin + a_size - 1)) b_size = a_size - 1;
      a = a->parent;
      a_size--;
    }
    if (b_size != b->size()) b->resize(b_size);
  }

  // removes the first n elements of a.
  const Entry *RemovePrefix(const Entry *a, size_t n) {
    if (n == 0) return a;
    std::vector<IntType> a_vec;
    ConvertToVector(a, &a_vec);
    assert(a_vec.size() >= n);
    const Entry *ans = NULL;
    for (size_t i = n; i < a_vec.size(); i++) ans = Successor(ans, a_vec[i]);
    return ans;
  }

  // Returns true if a is a prefix of b.  If a is prefix of b,
  // time taken is |b| - |a|.  Else, time taken is |b|.
  bool IsPrefixOf(const Entry *a, const Entry *b) const {
    if (a == NULL) return true;  // empty string prefix of all.
    if (a == b) return true;
    if (b == NULL) return false;
    return IsPrefixOf(a, b->parent);
  }

  inline size_t Size(const Entry *entry) const {
    size_t ans = 0;
    while (entry != NULL) {
      ans++;
      entry = entry->parent;
    }
    return ans;
  }

  void ConvertToVector(const Entry *entry, std::vector<IntType> *out) const {
    size_t length = Size(entry);
    out->resize(length);
    if (entry != NULL) {
      typename std::vector<IntType>::reverse_iterator iter = out->rbegin();
      while (entry != NULL) {
        *iter = entry->i;
        entry = entry->parent;
        ++iter;
      }
    }
  }

  const Entry *ConvertFromVector(const std::vector<IntType> &vec) {
    const Entry *e = NULL;
    for (size_t i = 0; i < vec.size(); i++) e = Successor(e, vec[i]);
    return e;
  }

  LatticeStringRepository() { new_entry_ = new Entry; }

  void Destroy() {
    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
         ++iter)
      delete *iter;
    SetType tmp;
    tmp.swap(set_);
    if (new_entry_) {
      delete new_entry_;
      new_entry_ = NULL;
    }
  }

  // Rebuild will rebuild this object, guaranteeing only
  // to preserve the Entry values that are in the vector pointed
  // to (this list does not have to be unique).  The point of
  // this is to save memory.
  void Rebuild(const std::vector<const Entry *> &to_keep) {
    SetType tmp_set;
    for (typename std::vector<const Entry *>::const_iterator iter =
             to_keep.begin();
         iter != to_keep.end(); ++iter)
      RebuildHelper(*iter, &tmp_set);
    // Now delete all elems not in tmp_set.
    for (typename SetType::iterator iter = set_.begin(); iter != set_.end();
         ++iter) {
      if (tmp_set.count(*iter) == 0)
        delete (*iter);  // delete the Entry; not needed.
    }
    set_.swap(tmp_set);
  }

  ~LatticeStringRepository() { Destroy(); }
  int32 MemSize() const {
    return set_.size() * sizeof(Entry) * 2;  // this is a lower bound
    // on the size this structure might take.
  }

 private:
  class EntryKey {  // Hash function object.
   public:
    inline size_t operator()(const Entry *entry) const {
      size_t prime = 49109;
      return static_cast<size_t>(entry->i) +
             prime * reinterpret_cast<size_t>(entry->parent);
    }
  };
  class EntryEqual {
   public:
    inline bool operator()(const Entry *e1, const Entry *e2) const {
      return (*e1 == *e2);
    }
  };
  typedef std::unordered_set<const Entry *, EntryKey, EntryEqual> SetType;

  void RebuildHelper(const Entry *to_add, SetType *tmp_set) {
    while (true) {
      if (to_add == NULL) return;
      typename SetType::iterator iter = tmp_set->find(to_add);
      if (iter == tmp_set->end()) {  // not in tmp_set.
        tmp_set->insert(to_add);
        to_add = to_add->parent;  // and loop.
      } else {
        return;
      }
    }
  }

  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeStringRepository);
  Entry *new_entry_;  // We always have a pre-allocated Entry ready to use,
                      // to avoid unnecessary news and deletes.
  SetType set_;
};

// class LatticeDeterminizer is templated on the same types that
// CompactLatticeWeight is templated on: the base weight (Weight), typically
// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
// IntType, typically int32, used for the output symbols in the compact
// representation of strings [note: the output symbols would usually be
// p.d.f. id's in the anticipated use of this code] It has a special requirement
// on the Weight type: that there should be a Compare function on the weights
// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1
// > w2.  This requires that there be a total order on the weights.

template <class Weight, class IntType>
class LatticeDeterminizer {
 public:
  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
  // correspondence between our states and the states in ofst.  If destroy ==
  // true, release memory as we go (but we cannot output again).

  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
  typedef ArcTpl<CompactWeight>
      CompactArc;              // arc in compact, acceptor form of lattice
  typedef ArcTpl<Weight> Arc;  // arc in non-compact version of lattice

  // Output to standard FST with CompactWeightTpl<Weight> as its weight type
  // (the weight stores the original output-symbol strings).  If destroy ==
  // true, release memory as we go (but we cannot output again).
  void Output(MutableFst<CompactArc> *ofst, bool destroy = true) {
    assert(determinized_);
    typedef typename Arc::StateId StateId;
    StateId nStates = static_cast<StateId>(output_arcs_.size());
    if (destroy) FreeMostMemory();
    ofst->DeleteStates();
    ofst->SetStart(kNoStateId);
    if (nStates == 0) {
      return;
    }
    for (StateId s = 0; s < nStates; s++) {
      OutputStateId news = ofst->AddState();
      assert(news == s);
    }
    ofst->SetStart(0);
    // now process transitions.
    for (StateId this_state = 0; this_state < nStates; this_state++) {
      std::vector<TempArc> &this_vec(output_arcs_[this_state]);
      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
                                                    end = this_vec.end();

      for (; iter != end; ++iter) {
        const TempArc &temp_arc(*iter);
        CompactArc new_arc;
        std::vector<Label> seq;
        repository_.ConvertToVector(temp_arc.string, &seq);
        CompactWeight weight(temp_arc.weight, seq);
        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
          ofst->SetFinal(this_state, weight);
        } else {  // is really an arc.
          new_arc.nextstate = temp_arc.nextstate;
          new_arc.ilabel = temp_arc.ilabel;
          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
          new_arc.weight = weight;           // includes string and weight.
          ofst->AddArc(this_state, new_arc);
        }
      }
      // Free up memory.  Do this inside the loop as ofst is also allocating
      // memory
      if (destroy) {
        std::vector<TempArc> temp;
        std::swap(temp, this_vec);
      }
    }
    if (destroy) {
      std::vector<std::vector<TempArc> > temp;
      std::swap(temp, output_arcs_);
    }
  }

  // Output to standard FST with Weight as its weight type.  We will create
  // extra states to handle sequences of symbols on the output.  If destroy ==
  // true, release memory as we go (but we cannot output again).
  void Output(MutableFst<Arc> *ofst, bool destroy = true) {
    // Outputs to standard fst.
    OutputStateId nStates = static_cast<OutputStateId>(output_arcs_.size());
    ofst->DeleteStates();
    if (nStates == 0) {
      ofst->SetStart(kNoStateId);
      return;
    }
    if (destroy) FreeMostMemory();
    // Add basic states-- but we will add extra ones to account for strings on
    // output.
    for (OutputStateId s = 0; s < nStates; s++) {
      OutputStateId news = ofst->AddState();
      assert(news == s);
    }
    ofst->SetStart(0);
    for (OutputStateId this_state = 0; this_state < nStates; this_state++) {
      std::vector<TempArc> &this_vec(output_arcs_[this_state]);

      typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
                                                    end = this_vec.end();
      for (; iter != end; ++iter) {
        const TempArc &temp_arc(*iter);
        std::vector<Label> seq;
        repository_.ConvertToVector(temp_arc.string, &seq);

        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
          // Make a sequence of states going to a final state, with the strings
          // as labels.  Put the weight on the first arc.
          OutputStateId cur_state = this_state;
          for (size_t i = 0; i < seq.size(); i++) {
            OutputStateId next_state = ofst->AddState();
            Arc arc;
            arc.nextstate = next_state;
            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
            arc.ilabel = 0;  // epsilon.
            arc.olabel = seq[i];
            ofst->AddArc(cur_state, arc);
            cur_state = next_state;
          }
          ofst->SetFinal(cur_state,
                         (seq.size() == 0 ? temp_arc.weight : Weight::One()));
        } else {  // Really an arc.
          OutputStateId cur_state = this_state;
          // Have to be careful with this integer comparison (i+1 < seq.size())
          // because unsigned. i < seq.size()-1 could fail for zero-length
          // sequences.
          for (size_t i = 0; i + 1 < seq.size(); i++) {
            // for all but the last element of seq, create new state.
            OutputStateId next_state = ofst->AddState();
            Arc arc;
            arc.nextstate = next_state;
            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
            arc.ilabel = (i == 0 ? temp_arc.ilabel
                                 : 0);  // put ilabel on first element of seq.
            arc.olabel = seq[i];
            ofst->AddArc(cur_state, arc);
            cur_state = next_state;
          }
          // Add the final arc in the sequence.
          Arc arc;
          arc.nextstate = temp_arc.nextstate;
          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
          ofst->AddArc(cur_state, arc);
        }
      }
      // Free up memory.  Do this inside the loop as ofst is also allocating
      // memory
      if (destroy) {
        std::vector<TempArc> temp;
        temp.swap(this_vec);
      }
    }
    if (destroy) {
      std::vector<std::vector<TempArc> > temp;
      temp.swap(output_arcs_);
      repository_.Destroy();
    }
  }

  // Initializer.  After initializing the object you will typically
  // call Determinize() and then call one of the Output functions.
  // Note: ifst.Copy() will generally do a
  // shallow copy.  We do it like this for memory safety, rather than
  // keeping a reference or pointer to ifst_.
  LatticeDeterminizer(const Fst<Arc> &ifst, DeterminizeLatticeOptions opts)
      : num_arcs_(0),
        num_elems_(0),
        ifst_(ifst.Copy()),
        opts_(opts),
        equal_(opts_.delta),
        determinized_(false),
        minimal_hash_(3, hasher_, equal_),
        initial_hash_(3, hasher_, equal_) {
    KALDI_ASSERT(Weight::Properties() & kIdempotent);  // this algorithm won't
    // work correctly otherwise.
  }

  // frees all except output_arcs_, which contains the important info
  // we need to output the FST.
  void FreeMostMemory() {
    if (ifst_) {
      delete ifst_;
      ifst_ = NULL;
    }
    for (typename MinimalSubsetHash::iterator iter = minimal_hash_.begin();
         iter != minimal_hash_.end(); ++iter)
      delete iter->first;
    {
      MinimalSubsetHash tmp;
      tmp.swap(minimal_hash_);
    }
    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
         iter != initial_hash_.end(); ++iter)
      delete iter->first;
    {
      InitialSubsetHash tmp;
      tmp.swap(initial_hash_);
    }
    {
      std::vector<std::vector<Element> *> output_states_tmp;
      output_states_tmp.swap(output_states_);
    }
    {
      std::vector<char> tmp;
      tmp.swap(isymbol_or_final_);
    }
    {
      std::vector<OutputStateId> tmp;
      tmp.swap(queue_);
    }
    {
      std::vector<std::pair<Label, Element> > tmp;
      tmp.swap(all_elems_tmp_);
    }
  }

  ~LatticeDeterminizer() {
    FreeMostMemory();  // rest is deleted by destructors.
  }
  void RebuildRepository() {  // rebuild the string repository,
    // freeing stuff we don't need.. we call this when memory usage
    // passes a supplied threshold.  We need to accumulate all the
    // strings we need the repository to "remember", then tell it
    // to clean the repository.
    std::vector<StringId> needed_strings;
    for (size_t i = 0; i < output_arcs_.size(); i++)
      for (size_t j = 0; j < output_arcs_[i].size(); j++)
        needed_strings.push_back(output_arcs_[i][j].string);

    // the following loop covers strings present in minimal_hash_
    // which are also accessible via output_states_.
    for (size_t i = 0; i < output_states_.size(); i++)
      for (size_t j = 0; j < output_states_[i]->size(); j++)
        needed_strings.push_back((*(output_states_[i]))[j].string);

    // the following loop covers strings present in initial_hash_.
    for (typename InitialSubsetHash::const_iterator iter =
             initial_hash_.begin();
         iter != initial_hash_.end(); ++iter) {
      const std::vector<Element> &vec = *(iter->first);
      Element elem = iter->second;
      for (size_t i = 0; i < vec.size(); i++)
        needed_strings.push_back(vec[i].string);
      needed_strings.push_back(elem.string);
    }

    std::sort(needed_strings.begin(), needed_strings.end());
    needed_strings.erase(
        std::unique(needed_strings.begin(), needed_strings.end()),
        needed_strings.end());  // uniq the strings.
    repository_.Rebuild(needed_strings);
  }

  bool CheckMemoryUsage() {
    int32 repo_size = repository_.MemSize(),
          arcs_size = num_arcs_ * sizeof(TempArc),
          elems_size = num_elems_ * sizeof(Element),
          total_size = repo_size + arcs_size + elems_size;
    if (opts_.max_mem > 0 &&
        total_size > opts_.max_mem) {  // We passed the memory threshold.
      // This is usually due to the repository getting large, so we
      // clean this out.
      RebuildRepository();
      int32 new_repo_size = repository_.MemSize(),
            new_total_size = new_repo_size + arcs_size + elems_size;

      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository "
                       "shrank from "
                    << repo_size << " to " << new_repo_size
                    << " bytes (approximately)";

      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
        // Rebuilding didn't help enough-- we need a margin to stop
        // having to rebuild too often.
        KALDI_WARN << "Failure in determinize-lattice: size exceeds maximum "
                   << opts_.max_mem << " bytes; (repo,arcs,elems) = ("
                   << repo_size << "," << arcs_size << "," << elems_size
                   << "), after rebuilding, repo size was " << new_repo_size;
        return false;
      }
    }
    return true;
  }

  // Returns true on success.  Can fail for out-of-memory
  // or max-states related reasons.
  bool Determinize(bool *debug_ptr) {
    assert(!determinized_);
    // This determinizes the input fst but leaves it in the "special format"
    // in "output_arcs_".  Must be called after Initialize().  To get the
    // output, call one of the Output routines.
    try {
      InitializeDeterminization();  // some start-up tasks.
      while (!queue_.empty()) {
        OutputStateId out_state = queue_.back();
        queue_.pop_back();
        ProcessState(out_state);
        if (debug_ptr && *debug_ptr) Debug();  // will exit.
        if (!CheckMemoryUsage()) return false;
      }
      return (determinized_ = true);
    } catch (const std::bad_alloc &) {
      int32 repo_size = repository_.MemSize(),
            arcs_size = num_arcs_ * sizeof(TempArc),
            elems_size = num_elems_ * sizeof(Element),
            total_size = repo_size + arcs_size + elems_size;
      KALDI_WARN
          << "Memory allocation error doing lattice determinization; using "
          << total_size << " bytes (max = " << opts_.max_mem
          << " (repo,arcs,elems) = (" << repo_size << "," << arcs_size << ","
          << elems_size << ")";
      return (determinized_ = false);
    } catch (const std::runtime_error &) {
      KALDI_WARN << "Caught exception doing lattice determinization";
      return (determinized_ = false);
    }
  }

 private:
  typedef typename Arc::Label Label;
  typedef typename Arc::StateId
      StateId;  // use this when we don't know if it's input or output.
  typedef typename Arc::StateId InputStateId;   // state in the input FST.
  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
                                                // states in output Fst.

  typedef LatticeStringRepository<IntType> StringRepositoryType;
  typedef const typename StringRepositoryType::Entry *StringId;

  // Element of a subset [of original states]
  struct Element {
    StateId state;  // use StateId as this is usually InputStateId but in one
                    // case OutputStateId.
    StringId string;
    Weight weight;
    bool operator!=(const Element &other) const {
      return (state != other.state || string != other.string ||
              weight != other.weight);
    }
    // This operator is only intended to support sorting in EpsilonClosure()
    bool operator<(const Element &other) const { return state < other.state; }
  };

  // Arcs in the format we temporarily create in this class (a representation,
  // essentially of a Gallic Fst).
  struct TempArc {
    Label ilabel;
    StringId string;  // Look it up in the StringRepository, it's a sequence of
                      // Labels.
    OutputStateId nextstate;  // or kNoState for final weights.
    Weight weight;
  };

  // Hashing function used in hash of subsets.
  // A subset is a pointer to vector<Element>.
  // The Elements are in sorted order on state id, and without repeated states.
  // Because the order of Elements is fixed, we can use a hashing function that
  // is order-dependent.  However the weights are not included in the hashing
  // function-- we hash subsets that differ only in weight to the same key. This
  // is not optimal in terms of the O(N) performance but typically if we have a
  // lot of determinized states that differ only in weight then the input
  // probably was pathological in some way, or even non-determinizable.
  //   We don't quantize the weights, in order to avoid inexactness in simple
  //   cases.
  // Instead we apply the delta when comparing subsets for equality, and allow a
  // small difference.

  class SubsetKey {
   public:
    size_t operator()(const std::vector<Element> *subset)
        const {  // hashes only the state and string.
      size_t hash = 0, factor = 1;
      for (typename std::vector<Element>::const_iterator iter = subset->begin();
           iter != subset->end(); ++iter) {
        hash *= factor;
        hash += iter->state + reinterpret_cast<size_t>(iter->string);
        factor *= 23531;  // these numbers are primes.
      }
      return hash;
    }
  };

  // This is the equality operator on subsets.  It checks for exact match on
  // state-id and string, and approximate match on weights.
  class SubsetEqual {
   public:
    bool operator()(const std::vector<Element> *s1,
                    const std::vector<Element> *s2) const {
      size_t sz = s1->size();
      assert(sz >= 0);
      if (sz != s2->size()) return false;
      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
                                                    iter1_end = s1->end(),
                                                    iter2 = s2->begin();
      for (; iter1 < iter1_end; ++iter1, ++iter2) {
        if (iter1->state != iter2->state || iter1->string != iter2->string ||
            !ApproxEqual(iter1->weight, iter2->weight, delta_))
          return false;
      }
      return true;
    }
    float delta_;
    explicit SubsetEqual(float delta) : delta_(delta) {}
    SubsetEqual() : delta_(kDelta) {}
  };

  // Operator that says whether two Elements have the same states.
  // Used only for debug.
  class SubsetEqualStates {
   public:
    bool operator()(const std::vector<Element> *s1,
                    const std::vector<Element> *s2) const {
      size_t sz = s1->size();
      assert(sz >= 0);
      if (sz != s2->size()) return false;
      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
                                                    iter1_end = s1->end(),
                                                    iter2 = s2->begin();
      for (; iter1 < iter1_end; ++iter1, ++iter2) {
        if (iter1->state != iter2->state) return false;
      }
      return true;
    }
  };

  // Define the hash type we use to map subsets (in minimal
  // representation) to OutputStateId.
  typedef std::unordered_map<const std::vector<Element> *, OutputStateId,
                             SubsetKey, SubsetEqual>
      MinimalSubsetHash;

  // Define the hash type we use to map subsets (in initial
  // representation) to OutputStateId, together with an
  // extra weight. [note: we interpret the Element.state in here
  // as an OutputStateId even though it's declared as InputStateId;
  // these types are the same anyway].
  typedef std::unordered_map<const std::vector<Element> *, Element, SubsetKey,
                             SubsetEqual>
      InitialSubsetHash;

  // converts the representation of the subset from canonical (all states) to
  // minimal (only states with output symbols on arcs leaving them, and final
  // states).  Output is not necessarily normalized, even if input_subset was.
  void ConvertToMinimal(std::vector<Element> *subset) {
    assert(!subset->empty());
    typename std::vector<Element>::iterator cur_in = subset->begin(),
                                            cur_out = subset->begin(),
                                            end = subset->end();
    while (cur_in != end) {
      if (IsIsymbolOrFinal(cur_in->state)) {  // keep it...
        *cur_out = *cur_in;
        cur_out++;
      }
      cur_in++;
    }
    subset->resize(cur_out - subset->begin());
  }

  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
  // Involves a hash lookup, and possibly adding a new OutputStateId.
  // If it creates a new OutputStateId, it adds it to the queue.
  OutputStateId MinimalToStateId(const std::vector<Element> &subset) {
    typename MinimalSubsetHash::const_iterator iter =
        minimal_hash_.find(&subset);
    if (iter != minimal_hash_.end())  // Found a matching subset.
      return iter->second;
    OutputStateId ans = static_cast<OutputStateId>(output_arcs_.size());
    std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
    output_states_.push_back(subset_ptr);
    num_elems_ += subset_ptr->size();
    output_arcs_.push_back(std::vector<TempArc>());
    minimal_hash_[subset_ptr] = ans;
    queue_.push_back(ans);
    return ans;
  }

  // Given a normalized initial subset of elements (i.e. before epsilon
  // closure), compute the corresponding output-state.
  OutputStateId InitialToStateId(const std::vector<Element> &subset_in,
                                 Weight *remaining_weight,
                                 StringId *common_prefix) {
    typename InitialSubsetHash::const_iterator iter =
        initial_hash_.find(&subset_in);
    if (iter != initial_hash_.end()) {  // Found a matching subset.
      const Element &elem = iter->second;
      *remaining_weight = elem.weight;
      *common_prefix = elem.string;
      if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP
      return elem.state;
    }
    // else no matching subset-- have to work it out.
    std::vector<Element> subset(subset_in);
    // Follow through epsilons.  Will add no duplicate states.  note: after
    // EpsilonClosure, it is the same as "canonical" subset, except not
    // normalized (actually we never compute the normalized canonical subset,
    // only the normalized minimal one).
    EpsilonClosure(&subset);    // follow epsilons.
    ConvertToMinimal(&subset);  // remove all but emitting and final states.

    Element elem;  // will be used to store remaining weight and string, and
                   // OutputStateId, in initial_hash_;
    NormalizeSubset(&subset, &elem.weight,
                    &elem.string);  // normalize subset; put
    // common string and weight in "elem".  The subset is now a minimal,
    // normalized subset.

    OutputStateId ans = MinimalToStateId(subset);
    *remaining_weight = elem.weight;
    *common_prefix = elem.string;
    if (elem.weight == Weight::Zero()) KALDI_WARN << "Zero weight!";  // TEMP

    // Before returning "ans", add the initial subset to the hash,
    // so that we can bypass the epsilon-closure etc., next time
    // we process the same initial subset.
    std::vector<Element> *initial_subset_ptr =
        new std::vector<Element>(subset_in);
    elem.state = ans;
    initial_hash_[initial_subset_ptr] = elem;
    num_elems_ += initial_subset_ptr->size();  // keep track of memory usage.
    return ans;
  }

  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
  // see function
  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
  // in lattice-weight.h.
  // this is the same as that, but optimized for our data structures.
  inline int Compare(const Weight &a_w, StringId a_str, const Weight &b_w,
                     StringId b_str) const {
    int weight_comp = fst::Compare(a_w, b_w);
    if (weight_comp != 0) return weight_comp;
    // now comparing strings.
    if (a_str == b_str) return 0;
    std::vector<IntType> a_vec, b_vec;
    repository_.ConvertToVector(a_str, &a_vec);
    repository_.ConvertToVector(b_str, &b_vec);
    // First compare their lengths.
    int a_len = a_vec.size(), b_len = b_vec.size();
    // use opposite order on the string lengths (c.f. Compare in
    // lattice-weight.h)
    if (a_len > b_len)
      return -1;
    else if (a_len < b_len)
      return 1;
    for (int i = 0; i < a_len; i++) {
      if (a_vec[i] < b_vec[i])
        return -1;
      else if (a_vec[i] > b_vec[i])
        return 1;
    }
    assert(
        0);  // because we checked if a_str == b_str above, shouldn't reach here
    return 0;
  }

  // This function computes epsilon closure of subset of states by following
  // epsilon links. Called by InitialToStateId and Initialize. Has no side
  // effects except on the string repository.  The "output_subset" is not
  // necessarily normalized (in the sense of there being no common substring),
  // unless input_subset was.
  void EpsilonClosure(std::vector<Element> *subset) {
    // at input, subset must have only one example of each StateId.  [will still
    // be so at output].  This function follows input-epsilons, and augments the
    // subset accordingly.

    std::deque<Element> queue;
    std::unordered_map<InputStateId, Element> cur_subset;
    typedef
        typename std::unordered_map<InputStateId, Element>::iterator MapIter;
    typedef typename std::vector<Element>::const_iterator VecIter;

    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
      queue.push_back(*iter);
      cur_subset[iter->state] = *iter;
    }

    // find whether input fst is known to be sorted on input label.
    bool sorted =
        ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
    bool replaced_elems = false;  // relates to an optimization, see below.
    int counter =
        0;  // stops infinite loops here for non-lattice-determinizable input;
    // useful in testing.
    while (queue.size() != 0) {
      Element elem = queue.front();
      queue.pop_front();

      // The next if-statement is a kind of optimization.  It's to prevent us
      // unnecessarily repeating the processing of a state.  "cur_subset" always
      // contains only one Element with a particular state.  The issue is that
      // whenever we modify the Element corresponding to that state in
      // "cur_subset", both the new (optimal) and old (less-optimal) Element
      // will still be in "queue".  The next if-statement stops us from wasting
      // compute by processing the old Element.
      if (replaced_elems && cur_subset[elem.state] != elem) continue;
      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
        KALDI_ERR << "Lattice determinization aborted since looped more than "
                  << opts_.max_loop << " times during epsilon closure";
      }
      for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
           aiter.Next()) {
        const Arc &arc = aiter.Value();
        if (sorted && arc.ilabel != 0)
          break;  // Break from the loop: due to sorting there will be no
        // more transitions with epsilons as input labels.
        if (arc.ilabel == 0 &&
            arc.weight != Weight::Zero()) {  // Epsilon transition.
          Element next_elem;
          next_elem.state = arc.nextstate;
          next_elem.weight = Times(elem.weight, arc.weight);
          // now must append strings
          if (arc.olabel == 0)
            next_elem.string = elem.string;
          else
            next_elem.string = repository_.Successor(elem.string, arc.olabel);

          MapIter iter = cur_subset.find(next_elem.state);
          if (iter == cur_subset.end()) {
            // was no such StateId: insert and add to queue.
            cur_subset[next_elem.state] = next_elem;
            queue.push_back(next_elem);
          } else {
            // was not inserted because one already there.  In normal
            // determinization we'd add the weights.  Here, we find which one
            // has the better weight, and keep its corresponding string.
            int comp = Compare(next_elem.weight, next_elem.string,
                               iter->second.weight, iter->second.string);
            if (comp ==
                1) {  // next_elem is better, so use its (weight, string)
              iter->second.string = next_elem.string;
              iter->second.weight = next_elem.weight;
              queue.push_back(next_elem);
              replaced_elems = true;
            }
            // else it is the same or worse, so use original one.
          }
        }
      }
    }

    {  // copy cur_subset to subset.
      subset->clear();
      subset->reserve(cur_subset.size());
      MapIter iter = cur_subset.begin(), end = cur_subset.end();
      for (; iter != end; ++iter) subset->push_back(iter->second);
      // sort by state ID, because the subset hash function is
      // order-dependent(see SubsetKey)
      std::sort(subset->begin(), subset->end());
    }
  }

  // This function works out the final-weight of the determinized state.
  // called by ProcessSubset.
  // Has no side effects except on the variable repository_, and output_arcs_.

  void ProcessFinal(OutputStateId output_state) {
    const std::vector<Element> &minimal_subset =
        *(output_states_[output_state]);
    // processes final-weights for this subset.

    // minimal_subset may be empty if the graphs is not connected/trimmed, I
    // think, do don't check that it's nonempty.
    bool is_final = false;
    StringId final_string = NULL;  // = NULL to keep compiler happy.
    Weight final_weight = Weight::Zero();
    typename std::vector<Element>::const_iterator iter = minimal_subset.begin(),
                                                  end = minimal_subset.end();
    for (; iter != end; ++iter) {
      const Element &elem = *iter;
      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
      StringId this_final_string = elem.string;
      if (this_final_weight != Weight::Zero() &&
          (!is_final || Compare(this_final_weight, this_final_string,
                                final_weight, final_string) == 1)) {  // the new
        // (weight, string) pair is more in semiring than our current
        // one.
        is_final = true;
        final_weight = this_final_weight;
        final_string = this_final_string;
      }
    }
    if (is_final) {
      // store final weights in TempArc structure, just like a transition.
      TempArc temp_arc;
      temp_arc.ilabel = 0;
      temp_arc.nextstate =
          kNoStateId;  // special marker meaning "final weight".
      temp_arc.string = final_string;
      temp_arc.weight = final_weight;
      output_arcs_[output_state].push_back(temp_arc);
      num_arcs_++;
    }
  }

  // NormalizeSubset normalizes the subset "elems" by
  // removing any common string prefix (putting it in common_str),
  // and dividing by the total weight (putting it in tot_weight).
  void NormalizeSubset(std::vector<Element> *elems, Weight *tot_weight,
                       StringId *common_str) {
    if (elems->empty()) {              // just set common_str, tot_weight
      KALDI_WARN << "[empty subset]";  // TEMP
      // to defaults and return...
      *common_str = repository_.EmptyString();
      *tot_weight = Weight::Zero();
      return;
    }
    size_t size = elems->size();
    std::vector<IntType> common_prefix;
    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
    Weight weight = (*elems)[0].weight;
    for (size_t i = 1; i < size; i++) {
      weight = Plus(weight, (*elems)[i].weight);
      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
    }
    assert(weight != Weight::Zero());  // we made sure to ignore arcs with zero
    // weights on them, so we shouldn't have zero here.
    size_t prefix_len = common_prefix.size();
    for (size_t i = 0; i < size; i++) {
      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
      (*elems)[i].string =
          repository_.RemovePrefix((*elems)[i].string, prefix_len);
    }
    *common_str = repository_.ConvertFromVector(common_prefix);
    *tot_weight = weight;
  }

  // Take a subset of Elements that is sorted on state, and
  // merge any Elements that have the same state (taking the best
  // (weight, string) pair in the semiring).
  void MakeSubsetUnique(std::vector<Element> *subset) {
    typedef typename std::vector<Element>::iterator IterType;

    // This assert is designed to fail (usually) if the subset is not sorted on
    // state.
    assert(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);

    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
    size_t num_out = 0;
    // Merge elements with same state-id
    while (cur_in != end) {  // while we have more elements to process.
      // At this point, cur_out points to location of next place we want to put
      // an element, cur_in points to location of next element we want to
      // process.
      if (cur_in != cur_out) *cur_out = *cur_in;
      cur_in++;
      while (cur_in != end && cur_in->state == cur_out->state) {
        if (Compare(cur_in->weight, cur_in->string, cur_out->weight,
                    cur_out->string) == 1) {
          // if *cur_in > *cur_out in semiring, then take *cur_in.
          cur_out->string = cur_in->string;
          cur_out->weight = cur_in->weight;
        }
        cur_in++;
      }
      cur_out++;
      num_out++;
    }
    subset->resize(num_out);
  }

  // ProcessTransition is called from "ProcessTransitions".  Broken out for
  // clarity.  Processes a transition from state "state".  The set of Elements
  // represents a set of next-states with associated weights and strings, each
  // one arising from an arc from some state in a determinized-state; the
  // next-states are not necessarily unique (i.e. there may be >1 entry
  // associated with each), and any such sets of Elements have to be merged
  // within this routine (we take the [weight, string] pair that's better in the
  // semiring).
  void ProcessTransition(OutputStateId state, Label ilabel,
                         std::vector<Element> *subset) {
    MakeSubsetUnique(subset);  // remove duplicates with the same state.

    StringId common_str;
    Weight tot_weight;
    NormalizeSubset(subset, &tot_weight, &common_str);

    OutputStateId nextstate;
    {
      Weight next_tot_weight;
      StringId next_common_str;
      nextstate = InitialToStateId(*subset, &next_tot_weight, &next_common_str);
      common_str = repository_.Concatenate(common_str, next_common_str);
      tot_weight = Times(tot_weight, next_tot_weight);
    }

    // Now add an arc to the next state (would have been created if necessary by
    // InitialToStateId).
    TempArc temp_arc;
    temp_arc.ilabel = ilabel;
    temp_arc.nextstate = nextstate;
    temp_arc.string = common_str;
    temp_arc.weight = tot_weight;
    output_arcs_[state].push_back(temp_arc);  // record the arc.
    num_arcs_++;
  }

  // "less than" operator for pair<Label, Element>.   Used in
  // ProcessTransitions. Lexicographical order, which only compares the state
  // when ordering the "Element" member of the pair.

  class PairComparator {
   public:
    inline bool operator()(const std::pair<Label, Element> &p1,
                           const std::pair<Label, Element> &p2) {
      if (p1.first < p2.first) {
        return true;
      } else if (p1.first > p2.first) {
        return false;
      } else {
        return p1.second.state < p2.second.state;
      }
    }
  };

  // ProcessTransitions processes emitting transitions (transitions
  // with ilabels) out of this subset of states.
  // Does not consider final states.  Breaks the emitting transitions up by
  // ilabel, and creates a new transition in the determinized FST for each
  // unique ilabel. Does this by creating a big vector of pairs <Label, Element>
  // and then sorting them using a lexicographical ordering, and calling
  // ProcessTransition for each range with the same ilabel. Side effects on
  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.

  void ProcessTransitions(OutputStateId output_state) {
    const std::vector<Element> &minimal_subset =
        *(output_states_[output_state]);
    // it's possible that minimal_subset could be empty if there are
    // unreachable parts of the graph, so don't check that it's nonempty.
    std::vector<std::pair<Label, Element> > &all_elems(
        all_elems_tmp_);  // use class member
    // to avoid memory allocation/deallocation.
    {
      // Push back into "all_elems", elements corresponding to all
      // non-epsilon-input transitions out of all states in "minimal_subset".
      typename std::vector<Element>::const_iterator iter =
                                                        minimal_subset.begin(),
                                                    end = minimal_subset.end();
      for (; iter != end; ++iter) {
        const Element &elem = *iter;
        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
             aiter.Next()) {
          const Arc &arc = aiter.Value();
          if (arc.ilabel != 0 &&
              arc.weight != Weight::Zero()) {  // Non-epsilon transition --
                                               // ignore epsilons here.
            std::pair<Label, Element> this_pr;
            this_pr.first = arc.ilabel;
            Element &next_elem(this_pr.second);
            next_elem.state = arc.nextstate;
            next_elem.weight = Times(elem.weight, arc.weight);
            if (arc.olabel == 0)  // output epsilon
              next_elem.string = elem.string;
            else
              next_elem.string = repository_.Successor(elem.string, arc.olabel);
            all_elems.push_back(this_pr);
          }
        }
      }
    }
    PairComparator pc;
    std::sort(all_elems.begin(), all_elems.end(), pc);
    // now sorted first on input label, then on state.
    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
        PairIter;
    PairIter cur = all_elems.begin(), end = all_elems.end();
    std::vector<Element> this_subset;
    while (cur != end) {
      // Process ranges that share the same input symbol.
      Label ilabel = cur->first;
      this_subset.clear();
      while (cur != end && cur->first == ilabel) {
        this_subset.push_back(cur->second);
        cur++;
      }
      // We now have a subset for this ilabel.
      assert(!this_subset.empty());  // temp.
      ProcessTransition(output_state, ilabel, &this_subset);
    }
    all_elems.clear();  // as it's a class variable-- want it to stay
    // emtpy.
  }

  // ProcessState does the processing of a determinized state, i.e. it creates
  // transitions out of it and the final-probability if any.
  void ProcessState(OutputStateId output_state) {
    ProcessFinal(output_state);
    ProcessTransitions(output_state);
  }

  void Debug() {  // this function called if you send a signal
    // SIGUSR1 to the process (and it's caught by the handler in
    // fstdeterminizestar).  It prints out some traceback
    // info and exits.

    KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
    // free up memory from the hash as we need a little memory
    {
      MinimalSubsetHash hash_tmp;
      hash_tmp.swap(minimal_hash_);
    }

    if (output_arcs_.size() <= 2) {
      KALDI_ERR << "Nothing to trace back";
    }
    size_t max_state = output_arcs_.size() - 2;  // Don't take the last
    // one as we might be halfway into constructing it.

    std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
    for (size_t i = 0; i < max_state; i++) {
      for (size_t j = 0; j < output_arcs_[i].size(); j++) {
        OutputStateId nextstate = output_arcs_[i][j].nextstate;
        // Always find an earlier-numbered predecessor; this
        // is always possible because of the way the algorithm
        // works.
        if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
      }
    }
    std::vector<std::pair<Label, StringId> > traceback;
    // 'traceback' is a pair of (ilabel, olabel-seq).
    OutputStateId cur_state = max_state;  // A recently constructed state.

    while (cur_state != 0 && cur_state != kNoStateId) {
      OutputStateId last_state = predecessor[cur_state];
      std::pair<Label, StringId> p;
      size_t i;
      for (i = 0; i < output_arcs_[last_state].size(); i++) {
        if (output_arcs_[last_state][i].nextstate == cur_state) {
          p.first = output_arcs_[last_state][i].ilabel;
          p.second = output_arcs_[last_state][i].string;
          traceback.push_back(p);
          break;
        }
      }
      KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
      cur_state = last_state;
    }
    if (cur_state == kNoStateId)
      KALDI_WARN << "Traceback did not reach start state "
                 << "(possibly debug-code error)";

    std::stringstream ss;
    ss << "Traceback follows in format "
       << "ilabel (olabel olabel) ilabel (olabel) ... :";
    for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
      ss << ' ' << traceback[i].first << " ( ";
      std::vector<Label> seq;
      repository_.ConvertToVector(traceback[i].second, &seq);
      for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
      ss << ')';
    }
    KALDI_ERR << ss.str();
  }

  bool IsIsymbolOrFinal(InputStateId state) {  // returns true if this state
    // of the input FST either is final or has an osymbol on an arc out of it.
    // Uses the vector isymbol_or_final_ as a cache for this info.
    assert(state >= 0);
    if (isymbol_or_final_.size() <= state)
      isymbol_or_final_.resize(state + 1, static_cast<char>(OSF_UNKNOWN));
    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
      return false;
    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
      return true;
    // else work it out...
    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
    if (ifst_->Final(state) != Weight::Zero())
      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
    for (ArcIterator<Fst<Arc> > aiter(*ifst_, state); !aiter.Done();
         aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
        return true;
      }
    }
    return IsIsymbolOrFinal(state);  // will only recurse once.
  }

  void InitializeDeterminization() {
    if (ifst_->Properties(kExpanded, false) != 0) {  // if we know the number of
      // states in ifst_, it might be a bit more efficient
      // to pre-size the hashes so we're not constantly rebuilding them.
#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
      StateId num_states =
          down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(ifst_)
              ->NumStates();
      minimal_hash_.rehash(num_states / 2 + 3);
      initial_hash_.rehash(num_states / 2 + 3);
#endif
    }
    InputStateId start_id = ifst_->Start();
    if (start_id != kNoStateId) {
      /* Insert determinized-state corresponding to the start state into hash
         and queue.  Unlike all the other states, we don't "normalize" the
         representation of this determinized-state before we put it into
         minimal_hash_.  This is actually what we want, as otherwise we'd have
         problems dealing with any extra weight and string and might have to
         create a "super-initial" state which would make the output
         nondeterministic.  Normalization is only needed to make the
         determinized output more minimal anyway, it's not needed for
         correctness. Note, we don't put anything in the initial_hash_.  The
         initial_hash_ is only a lookaside buffer anyway, so this isn't a
         problem-- it will get populated later if it needs to be.
      */
      Element elem;
      elem.state = start_id;
      elem.weight = Weight::One();
      elem.string = repository_.EmptyString();  // Id of empty sequence.
      std::vector<Element> subset;
      subset.push_back(elem);
      EpsilonClosure(&subset);    // follow through epsilon-inputs links
      ConvertToMinimal(&subset);  // remove all but final states and
      // states with input-labels on arcs out of them.
      std::vector<Element> *subset_ptr = new std::vector<Element>(subset);
      assert(output_arcs_.empty() && output_states_.empty());
      // add the new state...
      output_states_.push_back(subset_ptr);
      output_arcs_.push_back(std::vector<TempArc>());
      OutputStateId initial_state = 0;
      minimal_hash_[subset_ptr] = initial_state;
      queue_.push_back(initial_state);
    }
  }

  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizer);

  std::vector<std::vector<Element> *>
      output_states_;  // maps from output state to
                       // minimal representation [normalized].
                       // View pointers as owned in
                       // minimal_hash_.
  std::vector<std::vector<TempArc> >
      output_arcs_;  // essentially an FST in our format.

  int num_arcs_;   // keep track of memory usage: number of arcs in output_arcs_
  int num_elems_;  // keep track of memory usage: number of elems in
                   // output_states_

  const Fst<Arc> *ifst_;
  DeterminizeLatticeOptions opts_;
  SubsetKey hasher_;  // object that computes keys-- has no data members.
  SubsetEqual
      equal_;  // object that compares subsets-- only data member is delta_.
  bool determinized_;  // set to true when user called Determinize(); used to
                       // make
  // sure this object is used correctly.
  MinimalSubsetHash
      minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
                      // representation" (only include final and states and
                      // states with nonzero ilabel on arc out of them.  Owns
                      // the pointers in its keys.
  InitialSubsetHash initial_hash_;  // hash from Subset to Element, which
                                    // represents the OutputStateId together
                                    // with an extra weight and string.  Subset
                                    // is "initial representation".  The extra
                                    // weight and string is needed because after
                                    // we convert to minimal representation and
                                    // normalize, there may be an extra weight
                                    // and string.  Owns the pointers
                                    // in its keys.
  std::vector<OutputStateId>
      queue_;  // Queue of output-states to process.  Starts with
  // state 0, and increases and then (hopefully) decreases in length during
  // determinization.  LIFO queue (queue discipline doesn't really matter).

  std::vector<std::pair<Label, Element> >
      all_elems_tmp_;  // temporary vector used in ProcessTransitions.

  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };

  std::vector<char> isymbol_or_final_;  // A kind of cache; it says whether
  // each state is (emitting or final) where emitting means it has at least one
  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()

  LatticeStringRepository<IntType>
      repository_;  // defines a compact and fast way of
  // storing sequences of labels.
};

// normally Weight would be LatticeWeight<float> (which has two floats),
// or possibly TropicalWeightTpl<float>, and IntType would be int32.
template <class Weight, class IntType>
bool DeterminizeLattice(const Fst<ArcTpl<Weight> > &ifst,
                        MutableFst<ArcTpl<Weight> > *ofst,
                        DeterminizeLatticeOptions opts, bool *debug_ptr) {
  ofst->SetInputSymbols(ifst.InputSymbols());
  ofst->SetOutputSymbols(ifst.OutputSymbols());
  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
  if (!det.Determinize(debug_ptr)) return false;
  det.Output(ofst);
  return true;
}

// normally Weight would be LatticeWeight<float> (which has two floats),
// or possibly TropicalWeightTpl<float>, and IntType would be int32.
template <class Weight, class IntType>
bool DeterminizeLattice(
    const Fst<ArcTpl<Weight> > &ifst,
    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
    DeterminizeLatticeOptions opts, bool *debug_ptr) {
  ofst->SetInputSymbols(ifst.InputSymbols());
  ofst->SetOutputSymbols(ifst.OutputSymbols());
  LatticeDeterminizer<Weight, IntType> det(ifst, opts);
  if (!det.Determinize(debug_ptr)) return false;
  det.Output(ofst);
  return true;
}

}  // namespace fst

#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_INL_H_


================================================
FILE: runtime/engine/kaldi/fstext/determinize-lattice.h
================================================
// fstext/determinize-lattice.h

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
#define KALDI_FSTEXT_DETERMINIZE_LATTICE_H_
#include <fst/fst-decl.h>
#include <fst/fstlib.h>
#include <algorithm>
#include <map>
#include <set>
#include <vector>
#include "fstext/lattice-weight.h"

namespace fst {

/// \addtogroup fst_extensions
///  @{

// For example of usage, see test-determinize-lattice.cc

/*
   DeterminizeLattice implements a special form of determinization
   with epsilon removal, optimized for a phase of lattice generation.
   Its input is an FST with weight-type BaseWeightType (usually a pair of
   floats, with a lexicographical type of order, such as
   LatticeWeightTpl<float>). Typically this would be a state-level lattice, with
   input symbols equal to words, and output-symbols equal to p.d.f's (so like
   the inverse of HCLG).  Imagine representing this as an acceptor of type
   CompactLatticeWeightTpl<float>, in which the input/output symbols are words,
   and the weights contain the original weights together with strings (with zero
   or one symbol in them) containing the original output labels (the p.d.f.'s).
   We determinize this using acceptor determinization with epsilon removal.
   Remember (from lattice-weight.h) that CompactLatticeWeightTpl has a special
   kind of semiring where we always take the string corresponding to the best
   cost (of type BaseWeightType), and discard the other.  This corresponds to
   taking the best output-label sequence (of p.d.f.'s) for each input-label
   sequence (of words).  We couldn't use the Gallic weight for this, or it would
   die as soon as it detected that the input FST was non-functional.  In our
   case, any acyclic FST (and many cyclic ones) can be determinized. We assume
   that there is a function Compare(const BaseWeightType &a, const
   BaseWeightType &b) that returns (-1, 0, 1) according to whether (a < b, a ==
   b, a > b) in the total order on the BaseWeightType... this information should
   be the same as NaturalLess would give, but it's more efficient to do it this
   way. You can define this for things like TropicalWeight if you need to
   instantiate this class for that weight type.

   We implement this determinization in a special way to make it efficient for
   the types of FSTs that we will apply it to.  One issue is that if we
   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
   type vector<IntType>, the algorithm takes time quadratic in the length of
   words (in states), because propagating each arc involves copying a whole
   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
   where each string is a pointer (Entry*), and uses a hash from (Entry*,
   IntType), to the successor string (and a way to get the latest IntType and
   the ancestor Entry*).  [this is the class LatticeStringRepository].

   Another issue is that rather than representing a determinized-state as a
   collection of (state, weight), we represent it in a couple of reduced forms.
   Suppose a determinized-state is a collection of (state, weight) pairs; call
   this the "canonical representation".  Note: these collections are always
   normalized to remove any common weight and string part.  Define end-states as
   the subset of states that have an arc out of them with a label on, or are
   final.  If we represent a determinized-state a the set of just its
   (end-state, weight) pairs, this will be a valid and more compact
   representation, and will lead to a smaller set of determinized states (like
   early minimization).  Call this collection of (end-state, weight) pairs the
   "minimal representation".  As a mechanism to reduce compute, we can also
   consider another representation. In the determinization algorithm, we start
   off with a set of (begin-state, weight) pairs (where the "begin-states" are
   initial or have a label on the transition into them), and the "canonical
   representation" consists of the epsilon-closure of this set (i.e. follow
   epsilons).  Call this set of (begin-state, weight) pairs, appropriately
   normalized, the "initial representation".  If two initial representations are
   the same, the "canonical representation" and hence the "minimal
   representation" will be the same.  We can use this to reduce compute.  Note
   that if two initial representations are different, this does not preclude the
   other representations from being the same.

*/

struct DeterminizeLatticeOptions {
  float delta;  // A small offset used to measure equality of weights.
  int max_mem;  // If >0, determinization will fail and return false
  // when the algorithm's (approximate) memory consumption crosses this
  // threshold.
  int max_loop;  // If >0, can be used to detect non-determinizable input
  // (a case that wouldn't be caught by max_mem).
  DeterminizeLatticeOptions() : delta(kDelta), max_mem(-1), max_loop(-1) {}
};

/**
    This function implements the normal version of DeterminizeLattice, in which
    the output strings are represented using sequences of arcs, where all but
    the first one has an epsilon on the input side.  The debug_ptr argument is
    an optional pointer to a bool that, if it becomes true while the algorithm
    is executing, the algorithm will print a traceback and terminate (used in
    fstdeterminizestar.cc debug non-terminating determinization).  More
    efficient if ifst is arc-sorted on input label.  If the number of arcs gets
    more than max_states, it will throw std::runtime_error (otherwise this code
    does not use exceptions).  This is mainly useful for debug.  */
template <class Weight, class IntType>
bool DeterminizeLattice(
    const Fst<ArcTpl<Weight> > &ifst, MutableFst<ArcTpl<Weight> > *ofst,
    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
    bool *debug_ptr = NULL);

/*  This is a version of DeterminizeLattice with a slightly more "natural"
   output format, where the output sequences are encoded using the
   CompactLatticeArcTpl template (i.e. the sequences of output symbols are
   represented directly as strings) More efficient if ifst is arc-sorted on
   input label. If the #arcs gets more than max_arcs, it will throw
   std::runtime_error (otherwise this code does not use exceptions).  This is
   mainly useful for debug.
*/
template <class Weight, class IntType>
bool DeterminizeLattice(
    const Fst<ArcTpl<Weight> > &ifst,
    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
    DeterminizeLatticeOptions opts = DeterminizeLatticeOptions(),
    bool *debug_ptr = NULL);

/// @} end "addtogroup fst_extensions"

}  // end namespace fst

#include "fstext/determinize-lattice-inl.h"

#endif  // KALDI_FSTEXT_DETERMINIZE_LATTICE_H_


================================================
FILE: runtime/engine/kaldi/fstext/determinize-star-inl.h
================================================
// fstext/determinize-star-inl.h

// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
//           2015 Hainan Xu

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
#define KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_
// Do not include this file directly.  It is included by determinize-star.h

#include <algorithm>
#include <climits>
#include <deque>
#include <limits>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
using std::unordered_map;

#include "base/kaldi-error.h"

namespace fst {

// This class maps back and forth from/to integer id's to sequences of strings.
// used in determinization algorithm.

template <class Label, class StringId>
class StringRepository {
  // Label and StringId are both integer types, possibly the same.
  // This is a utility that maps back and forth between a vector<Label> and
  // StringId representation of sequences of Labels.  It is to save memory, and
  // to save compute. We treat sequences of length zero and one separately, for
  // efficiency.

 public:
  class VectorKey {  // Hash function object.
   public:
    size_t operator()(const std::vector<Label> *vec) const {
      assert(vec != NULL);
      size_t hash = 0, factor = 1;
      for (typename std::vector<Label>::const_iterator it = vec->begin();
           it != vec->end(); it++) {
        hash += factor * (*it);
        factor *= 103333;  // just an arbitrary prime number.
      }
      return hash;
    }
  };
  class VectorEqual {  // Equality-operator function object.
   public:
    size_t operator()(const std::vector<Label> *vec1,
                      const std::vector<Label> *vec2) const {
      return (*vec1 == *vec2);
    }
  };

  typedef unordered_map<const std::vector<Label> *, StringId, VectorKey,
                        VectorEqual>
      MapType;

  StringId IdOfEmpty() { return no_symbol; }

  StringId IdOfLabel(Label l) {
    if (l >= 0 && l <= (Label)single_symbol_range) {
      return l + single_symbol_start;
    } else {
      // l is out of the allowed range so we have to treat it as a sequence of
      // length one.  Should be v. rare.
      std::vector<Label> v;
      v.push_back(l);
      return IdOfSeqInternal(v);
    }
  }

  StringId IdOfSeq(
      const std::vector<Label> &v) {  // also works for sizes 0 and 1.
    size_t sz = v.size();
    if (sz == 0)
      return no_symbol;
    else if (v.size() == 1)
      return IdOfLabel(v[0]);
    else
      return IdOfSeqInternal(v);
  }

  inline bool IsEmptyString(StringId id) { return id == no_symbol; }
  void SeqOfId(StringId id, std::vector<Label> *v) {
    if (id == no_symbol) {
      v->clear();
    } else if (id >= single_symbol_start) {
      v->resize(1);
      (*v)[0] = id - single_symbol_start;
    } else {
      assert(static_cast<size_t>(id) < vec_.size());
      *v = *(vec_[id]);
    }
  }
  StringId RemovePrefix(StringId id, size_t prefix_len) {
    if (prefix_len == 0) {
      return id;
    } else {
      std::vector<Label> v;
      SeqOfId(id, &v);
      size_t sz = v.size();
      assert(sz >= prefix_len);
      std::vector<Label> v_noprefix(sz - prefix_len);
      for (size_t i = 0; i < sz - prefix_len; i++)
        v_noprefix[i] = v[i + prefix_len];
      return IdOfSeq(v_noprefix);
    }
  }

  StringRepository() {
    // The following are really just constants but don't want to complicate
    // compilation so make them class variables.  Due to the brokenness of
    // <limits>, they can't be accessed as constants.
    string_end = (std::numeric_limits<StringId>::max() / 2) -
                 1;  // all hash values must be <= this.
    no_symbol = (std::numeric_limits<StringId>::max() /
                 2);  // reserved for empty sequence.
    single_symbol_start = (std::numeric_limits<StringId>::max() / 2) + 1;
    single_symbol_range =
        std::numeric_limits<StringId>::max() - single_symbol_start;
  }
  void Destroy() {
    for (typename std::vector<std::vector<Label> *>::iterator iter =
             vec_.begin();
         iter != vec_.end(); ++iter)
      delete *iter;
    std::vector<std::vector<Label> *> tmp_vec;
    tmp_vec.swap(vec_);
    MapType tmp_map;
    tmp_map.swap(map_);
  }
  ~StringRepository() { Destroy(); }

 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(StringRepository);

  StringId IdOfSeqInternal(const std::vector<Label> &v) {
    typename MapType::iterator iter = map_.find(&v);
    if (iter != map_.end()) {
      return iter->second;
    } else {  // must add it to map.
      StringId this_id = (StringId)vec_.size();
      std::vector<Label> *v_new = new std::vector<Label>(v);
      vec_.push_back(v_new);
      map_[v_new] = this_id;
      assert(this_id < string_end);  // or we used up the labels.
      return this_id;
    }
  }

  std::vector<std::vector<Label> *> vec_;
  MapType map_;

  static const StringId string_start =
      (StringId)0;      // This must not change.  It's assumed.
  StringId string_end;  // = (numeric_limits<StringId>::max() / 2) - 1; // all
                        // hash values must be <= this.
  StringId no_symbol;   // = (numeric_limits<StringId>::max() / 2); // reserved
                        // for empty sequence.
  StringId
      single_symbol_start;  // =  (numeric_limits<StringId>::max() / 2) + 1;
  StringId single_symbol_range;  // =  numeric_limits<StringId>::max() -
                                 // single_symbol_start;
};

template <class F>
class DeterminizerStar {
  typedef typename F::Arc Arc;

 public:
  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1
  // correspondence between our states and the states in ofst.  If destroy ==
  // true, release memory as we go (but we cannot output again).
  void Output(MutableFst<GallicArc<Arc> > *ofst, bool destroy = true);

  // Output to standard FST.  We will create extra states to handle sequences of
  // symbols on the output.  If destroy == true, release memory as we go (but we
  // cannot output again).

  void Output(MutableFst<Arc> *ofst, bool destroy = true);

  // Initializer.  After initializing the object you will typically call
  // Determinize() and then one of the Output functions.
  DeterminizerStar(const Fst<Arc> &ifst, float delta = kDelta,
                   int max_states = -1, bool allow_partial = false)
      : ifst_(ifst.Copy()),
        delta_(delta),
        max_states_(max_states),
        determinized_(false),
        allow_partial_(allow_partial),
        is_partial_(false),
        equal_(delta),
        hash_(ifst.Properties(kExpanded, false)
                  ? down_cast<const ExpandedFst<Arc> *, const Fst<Arc> >(&ifst)
                                ->NumStates() /
                            2 +
                        3
                  : 20,
              hasher_, equal_),
        epsilon_closure_(ifst_, max_states, &repository_, delta) {}

  void Determinize(bool *debug_ptr) {
    assert(!determinized_);
    // This determinizes the input fst but leaves it in the "special format"
    // in "output_arcs_".
    InputStateId start_id = ifst_->Start();
    if (start_id == kNoStateId) {
      determinized_ = true;
      return;  // Nothing to do.
    } else {   // Insert start state into hash and queue.
      Element elem;
      elem.state = start_id;
      elem.weight = Weight::One();
      elem.string = repository_.IdOfEmpty();  // Id of empty sequence.
      std::vector<Element> vec;
      vec.push_back(elem);
      OutputStateId cur_id = SubsetToStateId(vec);
      assert(cur_id == 0 && "Do not call Determinize twice.");
    }
    while (!Q_.empty()) {
      std::pair<std::vector<Element> *, OutputStateId> cur_pair = Q_.front();
      Q_.pop_front();
      ProcessSubset(cur_pair);
      if (debug_ptr && *debug_ptr) Debug();  // will exit.
      if (max_states_ > 0 && output_arcs_.size() > max_states_) {
        if (allow_partial_ == false) {
          KALDI_ERR << "Determinization aborted since passed " << max_states_
                    << " states";
        } else {
          KALDI_WARN << "Determinization terminated since passed "
                     << max_states_
                     << " states, partial results will be generated";
          is_partial_ = true;
          break;
        }
      }
    }
    determinized_ = true;
  }

  bool IsPartial() { return is_partial_; }

  // frees all except output_arcs_, which contains the important info
  // we need to output.
  void FreeMostMemory() {
    if (ifst_) {
      delete ifst_;
      ifst_ = NULL;
    }
    for (typename SubsetHash::iterator iter = hash_.begin();
         iter != hash_.end(); ++iter)
      delete iter->first;
    SubsetHash tmp;
    tmp.swap(hash_);
  }

  ~DeterminizerStar() { FreeMostMemory(); }

 private:
  typedef typename Arc::Label Label;
  typedef typename Arc::Weight Weight;
  typedef typename Arc::StateId InputStateId;
  typedef typename Arc::StateId
      OutputStateId;  // same as above but distinguish states in output Fst.
  typedef typename Arc::Label StringId;  // Id type used in the StringRepository
  typedef StringRepository<Label, StringId> StringRepositoryType;

  // Element of a subset [of original states]

  struct Element {
    InputStateId state;
    StringId string;
    Weight weight;
    bool operator!=(const Element &other) const {
      return (state != other.state || string != other.string ||
              weight != other.weight);
    }
  };

  // Arcs in the format we temporarily create in this class (a representation,
  // essentially of a Gallic Fst).
  struct TempArc {
    Label ilabel;
    StringId ostring;  // Look it up in the StringRepository, it's a sequence of
                       // Labels.
    OutputStateId nextstate;  // or kNoState for final weights.
    Weight weight;
  };

  // Hashing function used in hash of subsets.
  // A subset is a pointer to vector<Element>.
  // The Elements are in sorted order on state id, and without repeated states.
  // Because the order of Elements is fixed, we can use a hashing function that
  // is order-dependent.  However the weights are not included in the hashing
  // function-- we hash subsets that differ only in weight to the same key. This
  // is not optimal in terms of the O(N) performance but typically if we have a
  // lot of determinized states that differ only in weight then the input
  // probably was pathological in some way, or even non-determinizable.
  //   We don't quantize the weights, in order to avoid inexactness in simple
  //   cases.
  // Instead we apply the delta when comparing subsets for equality, and allow a
  // small difference.

  class SubsetKey {
   public:
    size_t operator()(const std::vector<Element> *subset)
        const {  // hashes only the state and string.
      size_t hash = 0, factor = 1;
      for (typename std::vector<Element>::const_iterator iter = subset->begin();
           iter != subset->end(); ++iter) {
        hash *= factor;
        hash += iter->state + 103333 * iter->string;
        factor *= 23531;  // these numbers are primes.
      }
      return hash;
    }
  };

  // This is the equality operator on subsets.  It checks for exact match on
  // state-id and string, and approximate match on weights.
  class SubsetEqual {
   public:
    bool operator()(const std::vector<Element> *s1,
                    const std::vector<Element> *s2) const {
      size_t sz = s1->size();
      assert(sz >= 0);
      if (sz != s2->size()) return false;
      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
                                                    iter1_end = s1->end(),
                                                    iter2 = s2->begin();
      for (; iter1 < iter1_end; ++iter1, ++iter2) {
        if (iter1->state != iter2->state || iter1->string != iter2->string ||
            !ApproxEqual(iter1->weight, iter2->weight, delta_))
          return false;
      }
      return true;
    }
    float delta_;
    explicit SubsetEqual(float delta) : delta_(delta) {}
    SubsetEqual() : delta_(kDelta) {}
  };

  // Operator that says whether two Elements have the same states.
  // Used only for debug.
  class SubsetEqualStates {
   public:
    bool operator()(const std::vector<Element> *s1,
                    const std::vector<Element> *s2) const {
      size_t sz = s1->size();
      assert(sz >= 0);
      if (sz != s2->size()) return false;
      typename std::vector<Element>::const_iterator iter1 = s1->begin(),
                                                    iter1_end = s1->end(),
                                                    iter2 = s2->begin();
      for (; iter1 < iter1_end; ++iter1, ++iter2) {
        if (iter1->state != iter2->state) return false;
      }
      return true;
    }
  };

  // Define the hash type we use to store subsets.
  typedef unordered_map<const std::vector<Element> *, OutputStateId, SubsetKey,
                        SubsetEqual>
      SubsetHash;

  class EpsilonClosure {
   public:
    EpsilonClosure(const Fst<Arc> *ifst, int max_states,
                   StringRepository<Label, StringId> *repository, float delta)
        : ifst_(ifst),
          max_states_(max_states),
          repository_(repository),
          delta_(delta) {}

    // This function computes epsilon closure of subset of states by following
    // epsilon links. Called by ProcessSubset. Has no side effects except on the
    // repository.
    void GetEpsilonClosure(const std::vector<Element> &input_subset,
                           std::vector<Element> *output_subset);

   private:
    struct EpsilonClosureInfo {
      EpsilonClosureInfo() {}
      EpsilonClosureInfo(const Element &e, const Weight &w, bool i)
          : element(e), weight_to_process(w), in_queue(i) {}
      // the weight in the Element struct is the total current weight
      // that has been processed already
      Element element;
      // this stores the weight that we haven't processed (propagated)
      Weight weight_to_process;
      // whether "this" struct is in the queue
      // we store the info here so that we don't have to look it up every time
      bool in_queue;
      bool operator<(const EpsilonClosureInfo &other) const {
        return this->element.state < other.element.state;
      }
    };

    // to further speed up EpsilonClosure() computation, we have 2 queues
    // the 2nd queue is used when we first iterate over the input set -
    // if queue_2_.empty() then we directly set output_set equal to input_set
    // and return immediately
    // Since Epsilon arcs are relatively rare, this way we could efficiently
    // detect the epsilon-free case, without having to waste our computation
    // e.g. allocating the EpsilonClosureInfo structure; this also lets us do a
    // level-by-level traversal, which could avoid some (unfortunately not all)
    // duplicate computation if epsilons form a DAG that is not a tree
    //
    // We put the queues here for better efficiency for memory allocation
    std::deque<typename Arc::StateId> queue_;
    std::vector<Element> queue_2_;

    // the following 2 structures together form our *virtual "map"*
    // basically we need a map from state_id to EpsilonClosureInfo that operates
    // in O(1) time, while still takes relatively small mem, and this does it
    // well for efficiency we don't clear id_to_index_ of its outdated
    // information As a result each time we do a look-up, we need to check if
    // (ecinfo_[id_to_index_[id]].element.state == id) Yet this is still faster
    // than using a std::map<StateId, EpsilonClosureInfo>
    std::vector<int> id_to_index_;
    // unlike id_to_index_, we clear the content of ecinfo_ each time we call
    // EpsilonClosure(). This needed because we need an efficient way to
    // traverse the virtual map - it is just too costly to traverse the
    // id_to_index_ vector.
    std::vector<EpsilonClosureInfo> ecinfo_;

    // Add one element (elem) into cur_subset
    // it also adds the necessary stuff to queue_, set the correct weight
    void AddOneElement(const Element &elem, const Weight &unprocessed_weight);

    // Sub-routine that we call in EpsilonClosure()
    // It takes the current "unprocessed_weight" and propagate it to the
    // states accessible from elem.state by an epsilon arc
    // and add the results to cur_subset.
    // save_to_queue_2 is set true when we iterate over the initial subset
    // - then we save it to queue_2 s.t. if it's empty, we directly return
    // the input set
    void ExpandOneElement(const Element &elem, bool sorted,
                          const Weight &unprocessed_weight,
                          bool save_to_queue_2 = false);

    // no pointers below would take the ownership
    const Fst<Arc> *ifst_;
    int max_states_;
    StringRepository<Label, StringId> *repository_;
    float delta_;
  };

  // This function works out the final-weight of the determinized state.
  // called by ProcessSubset.
  // Has no side effects except on the variable repository_, and output_arcs_.

  void ProcessFinal(const std::vector<Element> &closed_subset,
                    OutputStateId state) {
    // processes final-weights for this subset.
    bool is_final = false;
    StringId final_string = 0;  // = 0 to keep compiler happy.
    Weight final_weight =
        Weight::One();  // This value will never be accessed, and
    // we just set it to avoid spurious compiler warnings.  We avoid setting it
    // to Zero() because floating-point infinities can sometimes generate
    // interrupts and slow things down.
    typename std::vector<Element>::const_iterator iter = closed_subset.begin(),
                                                  end = closed_subset.end();
    for (; iter != end; ++iter) {
      const Element &elem = *iter;
      Weight this_final_weight = ifst_->Final(elem.state);
      if (this_final_weight != Weight::Zero()) {
        if (!is_final) {  // first final-weight
          final_string = elem.string;
          final_weight = Times(elem.weight, this_final_weight);
          is_final = true;
        } else {  // already have one.
          if (final_string != elem.string) {
            KALDI_ERR << "FST was not functional -> not determinizable";
          }
          final_weight =
              Plus(final_weight, Times(elem.weight, this_final_weight));
        }
      }
    }
    if (is_final) {
      // store final weights in TempArc structure, just like a transition.
      TempArc temp_arc;
      temp_arc.ilabel = 0;
      temp_arc.nextstate =
          kNoStateId;  // special marker meaning "final weight".
      temp_arc.ostring = final_string;
      temp_arc.weight = final_weight;
      output_arcs_[state].push_back(temp_arc);
    }
  }

  // ProcessTransition is called from "ProcessTransitions".  Broken out for
  // clarity.  Has side effects on output_arcs_, and (via SubsetToStateId), Q_
  // and hash_.
  void ProcessTransition(OutputStateId state, Label ilabel,
                         std::vector<Element> *subset);

  // "less than" operator for pair<Label, Element>.   Used in
  // ProcessTransitions. Lexicographical order, with comparing the state only
  // for "Element".

  class PairComparator {
   public:
    inline bool operator()(const std::pair<Label, Element> &p1,
                           const std::pair<Label, Element> &p2) {
      if (p1.first < p2.first) {
        return true;
      } else if (p1.first > p2.first) {
        return false;
      } else {
        return p1.second.state < p2.second.state;
      }
    }
  };

  // ProcessTransitions handles transitions out of this subset of states.
  // Ignores epsilon transitions (epsilon closure already handled that).
  // Does not consider final states.  Breaks the transitions up by ilabel,
  // and creates a new transition in determinized FST, for each ilabel.
  // Does this by creating a big vector of pairs <Label, Element> and then
  // sorting them using a lexicographical ordering, and calling
  // ProcessTransition for each range with the same ilabel. Side effects on
  // repository, and (via ProcessTransition) on Q_, hash_, and output_arcs_.
  void ProcessTransitions(const std::vector<Element> &closed_subset,
                          OutputStateId state) {
    std::vector<std::pair<Label, Element> > all_elems;
    {  // Push back into "all_elems", elements corresponding to all
       // non-epsilon-input transitions
      // out of all states in "closed_subset".
      typename std::vector<Element>::const_iterator iter =
                                                        closed_subset.begin(),
                                                    end = closed_subset.end();
      for (; iter != end; ++iter) {
        const Element &elem = *iter;
        for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
             aiter.Next()) {
          const Arc &arc = aiter.Value();
          if (arc.ilabel !=
              0) {  // Non-epsilon transition -- ignore epsilons here.
            std::pair<Label, Element> this_pr;
            this_pr.first = arc.ilabel;
            Element &next_elem(this_pr.second);
            next_elem.state = arc.nextstate;
            next_elem.weight = Times(elem.weight, arc.weight);
            if (arc.olabel == 0) {  // output epsilon-- this is simple case so
                                    // handle separately for efficiency
              next_elem.string = elem.string;
            } else {
              std::vector<Label> seq;
              repository_.SeqOfId(elem.string, &seq);
              seq.push_back(arc.olabel);
              next_elem.string = repository_.IdOfSeq(seq);
            }
            all_elems.push_back(this_pr);
          }
        }
      }
    }
    PairComparator pc;
    std::sort(all_elems.begin(), all_elems.end(), pc);
    // now sorted first on input label, then on state.
    typedef typename std::vector<std::pair<Label, Element> >::const_iterator
        PairIter;
    PairIter cur = all_elems.begin(), end = all_elems.end();
    std::vector<Element> this_subset;
    while (cur != end) {
      // Process ranges that share the same input symbol.
      Label ilabel = cur->first;
      this_subset.clear();
      while (cur != end && cur->first == ilabel) {
        this_subset.push_back(cur->second);
        cur++;
      }
      // We now have a subset for this ilabel.
      ProcessTransition(state, ilabel, &this_subset);
    }
  }

  // SubsetToStateId converts a subset (vector of Elements) to a StateId in the
  // output fst.  This is a hash lookup; if no such state exists, it adds a new
  // state to the hash and adds a new pair to the queue. Side effects on hash_
  // and Q_, and on output_arcs_ [just affects the size].
  OutputStateId SubsetToStateId(
      const std::vector<Element> &subset) {  // may add the subset to the queue.
    typedef typename SubsetHash::iterator IterType;
    IterType iter = hash_.find(&subset);
    if (iter == hash_.end()) {  // was not there.
      std::vector<Element> *new_subset = new std::vector<Element>(subset);
      OutputStateId new_state_id = (OutputStateId)output_arcs_.size();
      bool ans =
          hash_
              .insert(std::pair<const std::vector<Element> *, OutputStateId>(
                  new_subset, new_state_id))
              .second;
      assert(ans);
      output_arcs_.push_back(std::vector<TempArc>());
      if (allow_partial_ == false) {
        // If --allow-partial is not requested, we do the old way.
        Q_.push_front(std::pair<std::vector<Element> *, OutputStateId>(
            new_subset, new_state_id));
      } else {
        // If --allow-partial is requested, we do breadth first search. This
        // ensures that when we return partial results, we return the states
        // that are reachable by the fewest steps from the start state.
        Q_.push_back(std::pair<std::vector<Element> *, OutputStateId>(
            new_subset, new_state_id));
      }
      return new_state_id;
    } else {
      return iter->second;  // the OutputStateId.
    }
  }

  // ProcessSubset does the processing of a determinized state, i.e. it creates
  // transitions out of it and adds new determinized states to the queue if
  // necessary. The first stage is "EpsilonClosure" (follow epsilons to get a
  // possibly larger set of (states, weights)).  After that we ignore epsilons.
  // We process the final-weight of the state, and then handle transitions out
  // (this may add more determinized states to the queue).
  void ProcessSubset(
      const std::pair<std::vector<Element> *, OutputStateId> &pair) {
    const std::vector<Element> *subset = pair.first;
    OutputStateId state = pair.second;

    std::vector<Element> closed_subset;  // subset after epsilon closure.
    epsilon_closure_.GetEpsilonClosure(*subset, &closed_subset);

    // Now follow non-epsilon arcs [and also process final states]
    ProcessFinal(closed_subset, state);

    // Now handle transitions out of these states.
    ProcessTransitions(closed_subset, state);
  }

  void Debug();

  KALDI_DISALLOW_COPY_AND_ASSIGN(DeterminizerStar);
  std::deque<std::pair<std::vector<Element> *, OutputStateId> >
      Q_;  // queue of subsets to be processed.

  std::vector<std::vector<TempArc> >
      output_arcs_;  // essentially an FST in our format.

  const Fst<Arc> *ifst_;
  float delta_;
  int max_states_;
  bool determinized_;   // used to check usage.
  bool allow_partial_;  // output paritial results or not
  bool is_partial_;     // if we get partial results or not
  SubsetKey hasher_;    // object that computes keys-- has no data members.
  SubsetEqual
      equal_;  // object that compares subsets-- only data member is delta_.
  SubsetHash hash_;  // hash from Subset to StateId in final Fst.

  StringRepository<Label, StringId>
      repository_;  // associate integer id's with sequences of labels.
  EpsilonClosure epsilon_closure_;
};

template <class F>
bool DeterminizeStar(F &ifst,  // NOLINT
                     MutableFst<typename F::Arc> *ofst, float delta,
                     bool *debug_ptr, int max_states, bool allow_partial) {
  ofst->SetOutputSymbols(ifst.OutputSymbols());
  ofst->SetInputSymbols(ifst.InputSymbols());
  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
  det.Determinize(debug_ptr);
  det.Output(ofst);
  return det.IsPartial();
}

template <class F>
bool DeterminizeStar(F &ifst,  // NOLINT
                     MutableFst<GallicArc<typename F::Arc> > *ofst, float delta,
                     bool *debug_ptr, int max_states, bool allow_partial) {
  ofst->SetOutputSymbols(ifst.InputSymbols());
  ofst->SetInputSymbols(ifst.InputSymbols());
  DeterminizerStar<F> det(ifst, delta, max_states, allow_partial);
  det.Determinize(debug_ptr);
  det.Output(ofst);
  return det.IsPartial();
}

template <class F>
void DeterminizerStar<F>::EpsilonClosure::GetEpsilonClosure(
    const std::vector<Element> &input_subset,
    std::vector<Element> *output_subset) {
  ecinfo_.resize(0);
  size_t size = input_subset.size();
  // find whether input fst is known to be sorted in input label.
  bool sorted =
      ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);

  // size is still the input_subset.size()
  for (size_t i = 0; i < size; i++) {
    ExpandOneElement(input_subset[i], sorted, input_subset[i].weight, true);
  }

  size_t s = queue_2_.size();
  if (s == 0) {
    *output_subset = input_subset;
    return;
  } else {
    // queue_2 not empty. Need to create the vector<info>
    for (size_t i = 0; i < size; i++) {
      // the weight has not been processed yet,
      // so put all of them in the "weight_to_process"
      ecinfo_.push_back(
          EpsilonClosureInfo(input_subset[i], input_subset[i].weight, false));
      ecinfo_.back().element.weight = Weight::Zero();  // clear the weight

      if (id_to_index_.size() < input_subset[i].state + 1) {
        id_to_index_.resize(2 * input_subset[i].state + 1, -1);
      }
      id_to_index_[input_subset[i].state] = ecinfo_.size() - 1;
    }
  }

  {
    Element elem;
    elem.weight = Weight::Zero();
    for (size_t i = 0; i < s; i++) {
      elem.state = queue_2_[i].state;
      elem.string = queue_2_[i].string;
      AddOneElement(elem, queue_2_[i].weight);
    }
    queue_2_.resize(0);
  }

  int counter = 0;  // relates to max-states option, used for test.
  while (!queue_.empty()) {
    InputStateId id = queue_.front();

    // no need to check validity of the index
    // since anything in the queue we are sure they're in the "virtual set"
    int index = id_to_index_[id];
    EpsilonClosureInfo &info = ecinfo_[index];
    Element &elem = info.element;
    Weight unprocessed_weight = info.weight_to_process;

    elem.weight = Plus(elem.weight, unprocessed_weight);
    info.weight_to_process = Weight::Zero();

    info.in_queue = false;
    queue_.pop_front();

    if (max_states_ > 0 && counter++ > max_states_) {
      KALDI_ERR << "Determinization aborted since looped more than "
                << max_states_ << " times during epsilon closure";
    }

    // generally we need to be careful about iterator-invalidation problem
    // here we pass a reference (elem), which could be an issue.
    // In the beginning of ExpandOneElement, we make a copy of elem.string
    // to avoid that issue
    ExpandOneElement(elem, sorted, unprocessed_weight);
  }

  {
    // this sorting is based on StateId
    sort(ecinfo_.begin(), ecinfo_.end());

    output_subset->clear();

    size = ecinfo_.size();
    output_subset->reserve(size);
    for (size_t i = 0; i < size; i++) {
      EpsilonClosureInfo &info = ecinfo_[i];
      if (info.weight_to_process != Weight::Zero()) {
        info.element.weight = Plus(info.element.weight, info.weight_to_process);
      }
      output_subset->push_back(info.element);
    }
  }
}

template <class F>
void DeterminizerStar<F>::EpsilonClosure::AddOneElement(
    const Element &elem, const Weight &unprocessed_weight) {
  // first we try to find the element info in the ecinfo_ vector
  int index = -1;
  if (elem.state < id_to_index_.size()) {
    index = id_to_index_[elem.state];
  }
  if (index != -1) {
    if (index >= ecinfo_.size()) {
      index = -1;
    } else if (ecinfo_[index].element.state != elem.state) {
      // since ecinfo_ might store outdated information, we need to check
      index = -1;
    }
  }

  if (index == -1) {
    // was no such StateId: insert and add to queue.
    ecinfo_.push_back(EpsilonClosureInfo(elem, unprocessed_weight, true));
    size_t size = id_to_index_.size();
    if (size < elem.state + 1) {
      // double the size to reduce memory operations
      id_to_index_.resize(2 * elem.state + 1, -1);
    }
    id_to_index_[elem.state] = ecinfo_.size() - 1;
    queue_.push_back(elem.state);

  } else {  // one is already there.  Add weights.
    EpsilonClosureInfo &info = ecinfo_[index];
    if (info.element.string != elem.string) {
      // Non-functional FST.
      std::ostringstream ss;
      ss << "FST was not functional -> not determinizable.";
      {  // Print some debugging information.  Can be helpful to debug
        // the inputs when FSTs are mysteriously non-functional.
        std::vector<Label> tmp_seq;
        repository_->SeqOfId(info.element.string, &tmp_seq);
        ss << "\nFirst string:";
        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
        ss << "\nSecond string:";
        repository_->SeqOfId(elem.string, &tmp_seq);
        for (size_t i = 0; i < tmp_seq.size(); i++) ss << ' ' << tmp_seq[i];
      }
      KALDI_ERR << ss.str();
    }

    info.weight_to_process = Plus(info.weight_to_process, unprocessed_weight);

    if (!info.in_queue) {
      // this is because the code in "else" below: the
      // iter->second.weight_to_process might not be Zero()
      Weight weight = Plus(info.element.weight, info.weight_to_process);

      // What is done below is, we propagate the weight (by adding them
      // to the queue only when the change is big enough;
      // otherwise we just store the weight, until before returning
      // we add the element.weight and weight_to_process together
      if (!ApproxEqual(weight, info.element.weight, delta_)) {
        // add extra part of weight to queue.
        info.in_queue = true;
        queue_.push_back(elem.state);
      }
    }
  }
}

template <class F>
void DeterminizerStar<F>::EpsilonClosure::ExpandOneElement(
    const Element &elem, bool sorted, const Weight &unprocessed_weight,
    bool save_to_queue_2) {
  StringId str =
      elem.string;  // copy it here because there is an iterator-
                    // - invalidation problem (it really happens for some FSTs)

  // now we are going to propagate the "unprocessed_weight"
  for (ArcIterator<Fst<Arc> > aiter(*ifst_, elem.state); !aiter.Done();
       aiter.Next()) {
    const Arc &arc = aiter.Value();
    if (sorted && arc.ilabel > 0) {
      break;
      // Break from the loop: due to sorting there will be no
      // more transitions with epsilons as input labels.
    }
    if (arc.ilabel != 0) {
      continue;  // we only process epsilons here
    }
    Element next_elem;
    next_elem.state = arc.nextstate;
    next_elem.weight = Weight::Zero();
    Weight next_unprocessed_weight = Times(unprocessed_weight, arc.weight);

    // now must append strings
    if (arc.olabel == 0) {
      next_elem.string = str;
    } else {
      std::vector<Label> seq;
      repository_->SeqOfId(str, &seq);
      if (arc.olabel != 0) seq.push_back(arc.olabel);
      next_elem.string = repository_->IdOfSeq(seq);
    }
    if (save_to_queue_2) {
      next_elem.weight = next_unprocessed_weight;
      queue_2_.push_back(next_elem);
    } else {
      AddOneElement(next_elem, next_unprocessed_weight);
    }
  }
}

template <class F>
void DeterminizerStar<F>::Output(MutableFst<GallicArc<Arc> > *ofst,
                                 bool destroy) {
  assert(determinized_);
  if (destroy) determinized_ = false;
  typedef GallicWeight<Label, Weight> ThisGallicWeight;
  typedef typename Arc::StateId StateId;
  if (destroy) FreeMostMemory();
  StateId nStates = static_cast<StateId>(output_arcs_.size());
  ofst->DeleteStates();
  ofst->SetStart(kNoStateId);
  if (nStates == 0) {
    return;
  }
  for (StateId s = 0; s < nStates; s++) {
    OutputStateId news = ofst->AddState();
    assert(news == s);
  }
  ofst->SetStart(0);
  // now process transitions.
  for (StateId this_state = 0; this_state < nStates; this_state++) {
    std::vector<TempArc> &this_vec(output_arcs_[this_state]);
    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
                                                  end = this_vec.end();
    for (; iter != end; ++iter) {
      const TempArc &temp_arc(*iter);
      GallicArc<Arc> new_arc;
      std::vector<Label> seq;
      repository_.SeqOfId(temp_arc.ostring, &seq);
      StringWeight<Label, STRING_LEFT> string_weight;
      for (size_t i = 0; i < seq.size(); i++) string_weight.PushBack(seq[i]);
      ThisGallicWeight gallic_weight(string_weight, temp_arc.weight);

      if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
        ofst->SetFinal(this_state, gallic_weight);
      } else {  // is really an arc.
        new_arc.nextstate = temp_arc.nextstate;
        new_arc.ilabel = temp_arc.ilabel;
        new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
        new_arc.weight = gallic_weight;    // includes string and weight.
        ofst->AddArc(this_state, new_arc);
      }
    }
    // Free up memory.  Do this inside the loop as ofst is also allocating
    // memory
    if (destroy) {
      std::vector<TempArc> temp;
      temp.swap(this_vec);
    }
  }
  if (destroy) {
    std::vector<std::vector<TempArc> > temp;
    temp.swap(output_arcs_);
  }
}

template <class F>
void DeterminizerStar<F>::Output(MutableFst<Arc> *ofst, bool destroy) {
  assert(determinized_);
  if (destroy) determinized_ = false;
  // Outputs to standard fst.
  OutputStateId num_states = static_cast<OutputStateId>(output_arcs_.size());
  if (destroy) FreeMostMemory();
  ofst->DeleteStates();
  if (num_states == 0) {
    ofst->SetStart(kNoStateId);
    return;
  }
  // Add basic states-- but will add extra ones to account for strings on
  // output.
  for (OutputStateId s = 0; s < num_states; s++) {
    OutputStateId news = ofst->AddState();
    assert(news == s);
  }
  ofst->SetStart(0);
  for (OutputStateId this_state = 0; this_state < num_states; this_state++) {
    std::vector<TempArc> &this_vec(output_arcs_[this_state]);

    typename std::vector<TempArc>::const_iterator iter = this_vec.begin(),
                                                  end = this_vec.end();
    for (; iter != end; ++iter) {
      const TempArc &temp_arc(*iter);
      std::vector<Label> seq;
      repository_.SeqOfId(temp_arc.ostring, &seq);
      if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
        // Make a sequence of states going to a final state, with the strings as
        // labels. Put the weight on the first arc.
        OutputStateId cur_state = this_state;
        for (size_t i = 0; i < seq.size(); i++) {
          OutputStateId next_state = ofst->AddState();
          Arc arc;
          arc.nextstate = next_state;
          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
          arc.ilabel = 0;  // epsilon.
          arc.olabel = seq[i];
          ofst->AddArc(cur_state, arc);
          cur_state = next_state;
        }
        ofst->SetFinal(cur_state,
                       (seq.size() == 0 ? temp_arc.weight : Weight::One()));
      } else {  // Really an arc.
        OutputStateId cur_state = this_state;
        // Have to be careful with this integer comparison (i+1 < seq.size())
        // because unsigned. i < seq.size()-1 could fail for zero-length
        // sequences.
        for (size_t i = 0; i + 1 < seq.size(); i++) {
          // for all but the last element of seq, create new state.
          OutputStateId next_state = ofst->AddState();
          Arc arc;
          arc.nextstate = next_state;
          arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
          arc.ilabel = (i == 0 ? temp_arc.ilabel
                               : 0);  // put ilabel on first element of seq.
          arc.olabel = seq[i];
          ofst->AddArc(cur_state, arc);
          cur_state = next_state;
        }
        // Add the final arc in the sequence.
        Arc arc;
        arc.nextstate = temp_arc.nextstate;
        arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
        arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
        arc.olabel = (seq.size() > 0 ? seq.back() : 0);
        ofst->AddArc(cur_state, arc);
      }
    }
    // Free up memory.  Do this inside the loop as ofst is also allocating
    // memory
    if (destroy) {
      std::vector<TempArc> temp;
      temp.swap(this_vec);
    }
  }
  if (destroy) {
    std::vector<std::vector<TempArc> > temp;
    temp.swap(output_arcs_);
    repository_.Destroy();
  }
}

template <class F>
void DeterminizerStar<F>::ProcessTransition(OutputStateId state, Label ilabel,
                                            std::vector<Element> *subset) {
  // At input, "subset" may contain duplicates for a given dest state (but in
  // sorted order).  This function removes duplicates from "subset", normalizes
  // it, and adds a transition to the dest. state (possibly affecting Q_ and
  // hash_, if state did not exist).

  typedef typename std::vector<Element>::iterator IterType;
  {  // This block makes the subset have one unique Element per state, adding
     // the weights.
    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
    size_t num_out = 0;
    // Merge elements with same state-id
    while (cur_in != end) {  // while we have more elements to process.
      // At this point, cur_out points to location of next place we want to put
      // an element, cur_in points to location of next element we want to
      // process.
      if (cur_in != cur_out) *cur_out = *cur_in;
      cur_in++;
      while (cur_in != end &&
             cur_in->state == cur_out->state) {  // merge elements.
        if (cur_in->string != cur_out->string) {
          KALDI_ERR << "FST was not functional -> not determinizable";
        }
        cur_out->weight = Plus(cur_out->weight, cur_in->weight);
        cur_in++;
      }
      cur_out++;
      num_out++;
    }
    subset->resize(num_out);
  }

  StringId common_str;
  Weight tot_weight;
  {  // This block computes common_str and tot_weight (essentially: the common
     // divisor)
    // and removes them from the elements.
    std::vector<Label> seq;

    IterType begin = subset->begin(), iter, end = subset->end();
    {  // This block computes "seq", which is the common prefix, and
       // "common_str",
      // which is the StringId version of "seq".
      std::vector<Label> tmp_seq;
      for (iter = begin; iter != end; ++iter) {
        if (iter == begin) {
          repository_.SeqOfId(iter->string, &seq);
        } else {
          repository_.SeqOfId(iter->string, &tmp_seq);
          if (tmp_seq.size() < seq.size())
            seq.resize(tmp_seq.size());  // size of shortest one.
          for (size_t i = 0; i < seq.size();
               i++)  // seq.size() is the shorter one at this point.
            if (tmp_seq[i] != seq[i]) seq.resize(i);
        }
        if (seq.size() == 0) break;  // will not get any prefix.
      }
      common_str = repository_.IdOfSeq(seq);
    }

    {  // This block computes "tot_weight".
      iter = begin;
      tot_weight = iter->weight;
      for (++iter; iter != end; ++iter)
        tot_weight = Plus(tot_weight, iter->weight);
    }

    // Now divide out common stuff from elements.
    size_t prefix_len = seq.size();
    for (iter = begin; iter != end; ++iter) {
      iter->weight = Divide(iter->weight, tot_weight);
      iter->string = repository_.RemovePrefix(iter->string, prefix_len);
    }
  }

  // Now add an arc to the state that the subset represents.
  // We may create a new state id for this (in SubsetToStateId).
  TempArc temp_arc;
  temp_arc.ilabel = ilabel;
  temp_arc.nextstate =
      SubsetToStateId(*subset);  // may or may not really add the subset.
  temp_arc.ostring = common_str;
  temp_arc.weight = tot_weight;
  output_arcs_[state].push_back(temp_arc);  // record the arc.
}

template <class F>
void DeterminizerStar<F>::Debug() {
  // this function called if you send a signal
  // SIGUSR1 to the process (and it's caught by the handler in
  // fstdeterminizestar).  It prints out some traceback
  // info and exits.

  KALDI_WARN << "Debug function called (probably SIGUSR1 caught)";
  // free up memory from the hash as we need a little memory
  {
    SubsetHash hash_tmp;
    std::swap(hash_tmp, hash_);
  }

  if (output_arcs_.size() <= 2) {
    KALDI_ERR << "Nothing to trace back";
  }
  size_t max_state = output_arcs_.size() - 2;  // don't take the last
  // one as we might be halfway into constructing it.

  std::vector<OutputStateId> predecessor(max_state + 1, kNoStateId);
  for (size_t i = 0; i < max_state; i++) {
    for (size_t j = 0; j < output_arcs_[i].size(); j++) {
      OutputStateId nextstate = output_arcs_[i][j].nextstate;
      // Always find an earlier-numbered predecessor; this
      // is always possible because of the way the algorithm
      // works.
      if (nextstate <= max_state && nextstate > i) predecessor[nextstate] = i;
    }
  }
  std::vector<std::pair<Label, StringId> > traceback;
  // 'traceback' is a pair of (ilabel, olabel-seq).
  OutputStateId cur_state = max_state;  // A recently constructed state.

  while (cur_state != 0 && cur_state != kNoStateId) {
    OutputStateId last_state = predecessor[cur_state];
    std::pair<Label, StringId> p;
    size_t i;
    for (i = 0; i < output_arcs_[last_state].size(); i++) {
      if (output_arcs_[last_state][i].nextstate == cur_state) {
        p.first = output_arcs_[last_state][i].ilabel;
        p.second = output_arcs_[last_state][i].ostring;
        traceback.push_back(p);
        break;
      }
    }
    KALDI_ASSERT(i != output_arcs_[last_state].size());  // Or fell off loop.
    cur_state = last_state;
  }
  if (cur_state == kNoStateId)
    KALDI_WARN << "Traceback did not reach start state "
               << "(possibly debug-code error)";

  std::stringstream ss;
  ss << "Traceback follows in format "
     << "ilabel (olabel olabel) ilabel (olabel) ... :";
  for (ssize_t i = traceback.size() - 1; i >= 0; i--) {
    ss << ' ' << traceback[i].first << " ( ";
    std::vector<Label> seq;
    repository_.SeqOfId(traceback[i].second, &seq);
    for (size_t j = 0; j < seq.size(); j++) ss << seq[j] << ' ';
    ss << ')';
  }
  KALDI_ERR << ss.str();
}

}  // namespace fst

#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_INL_H_


================================================
FILE: runtime/engine/kaldi/fstext/determinize-star.h
================================================
// fstext/determinize-star.h

// Copyright 2009-2011  Microsoft Corporation
//                2014  Guoguo Chen
//                2015  Hainan Xu

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_DETERMINIZE_STAR_H_
#define KALDI_FSTEXT_DETERMINIZE_STAR_H_
#include <fst/fst-decl.h>
#include <fst/fstlib.h>
#include <algorithm>
#include <map>
#include <set>
#include <stdexcept>  // this algorithm uses exceptions
#include <vector>

namespace fst {

/// \addtogroup fst_extensions
///  @{

// For example of usage, see test-determinize-star.cc

/*
   DeterminizeStar implements determinization with epsilon removal, which we
   distinguish with a star.

   We define a determinized* FST as one in which no state has more than one
   transition with the same input-label.  Epsilon input labels are not allowed
   except starting from states that have exactly one arc exiting them (and are
   not final).  [In the normal definition of determinized, epsilon-input labels
   are not allowed at all, whereas in Mohri's definition, epsilons are treated
   as ordinary symbols].  The determinized* definition is intended to simulate
   the effect of allowing strings of output symbols at each state.

   The algorithm implemented here takes an Fst<Arc>, and a pointer to a
   MutableFst<Arc> where it puts its output.  The weight type is assumed to be a
   float-weight.  It does epsilon removal and determinization.
   This algorithm may fail if the input has epsilon cycles under
   certain circumstances (i.e. the semiring is non-idempotent, e.g. the log
   semiring, or there are negative cost epsilon cycles).

   This implementation is much less fancy than the one in fst/determinize.h, and
   does not have an "on-demand" version.

   The algorithm is a fairly normal determinization algorithm.  We keep in
   memory the subsets of states, together with their leftover strings and their
   weights.  The only difference is we detect input epsilon transitions and
   treat them "specially".
*/

// This algorithm will be slightly faster if you sort the input fst on input
// label.

/**
    This function implements the normal version of DeterminizeStar, in which the
    output strings are represented using sequences of arcs, where all but the
    first one has an epsilon on the input side.  The debug_ptr argument is an
    optional pointer to a bool that, if it becomes true while the algorithm is
    executing, the algorithm will print a traceback and terminate (used in
    fstdeterminizestar.cc debug non-terminating determinization).
    If max_states is positive, it will stop determinization and throw an
    exception as soon as the max-states is reached. This can be useful in test.
    If allow_partial is true, the algorithm will output partial results when the
    specified max_states is reached (when larger than zero), instead of throwing
    out an error.

    Caution, the return status is un-intuitive: this function will return false
   if determinization completed normally, and true if it was stopped early by
    reaching the 'max-states' limit, and a partial FST was generated.
*/
template <class F>
bool DeterminizeStar(F &ifst, MutableFst<typename F::Arc> *ofst,  // NOLINT
                     float delta = kDelta, bool *debug_ptr = NULL,
                     int max_states = -1, bool allow_partial = false);

/*  This is a version of DeterminizeStar with a slightly more "natural" output
   format, where the output sequences are encoded using the GallicArc (i.e. the
   output symbols are strings. If max_states is positive, it will stop
   determinization and throw an exception as soon as the max-states is reached.
   This can be useful in test. If allow_partial is true, the algorithm will
   output partial results when the specified max_states is reached (when larger
   than zero), instead of throwing out an error.

    Caution, the return status is un-intuitive: this function will return false
   if determinization completed normally, and true if it was stopped early by
    reaching the 'max-states' limit, and a partial FST was generated.
*/
template <class F>
bool DeterminizeStar(F &ifst,  // NOLINT
                     MutableFst<GallicArc<typename F::Arc> > *ofst,
                     float delta = kDelta, bool *debug_ptr = NULL,
                     int max_states = -1, bool allow_partial = false);

/// @} end "addtogroup fst_extensions"

}  // end namespace fst

#include "fstext/determinize-star-inl.h"

#endif  // KALDI_FSTEXT_DETERMINIZE_STAR_H_


================================================
FILE: runtime/engine/kaldi/fstext/fstext-lib.h
================================================
// fstext/fstext-lib.h

// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (author:
// Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_FSTEXT_LIB_H_
#define KALDI_FSTEXT_FSTEXT_LIB_H_

#include "fst/fstlib.h"
#include "fstext/determinize-lattice.h"
#include "fstext/determinize-star.h"
#include "fstext/fstext-utils.h"
#include "fstext/kaldi-fst-io.h"
#include "fstext/lattice-utils.h"
#include "fstext/lattice-weight.h"
#include "fstext/pre-determinize.h"
#include "fstext/table-matcher.h"

#endif  // KALDI_FSTEXT_FSTEXT_LIB_H_


================================================
FILE: runtime/engine/kaldi/fstext/fstext-utils-inl.h
================================================
// fstext/fstext-utils-inl.h

// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
// Daniel Povey)
//                2014  Telepoint Global Hosting Service, LLC. (Author: David
//                Snyder)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_FSTEXT_UTILS_INL_H_
#define KALDI_FSTEXT_FSTEXT_UTILS_INL_H_

#include <algorithm>
#include <cstring>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

#include "base/kaldi-common.h"
#include "fstext/determinize-star.h"
#include "fstext/pre-determinize.h"
#include "util/const-integer-set.h"
#include "util/kaldi-io.h"
#include "util/stl-utils.h"
#include "util/text-utils.h"

namespace fst {

template <class Arc>
typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst) {
  typename Arc::Label ans = 0;
  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
    typename Arc::StateId s = siter.Value();
    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      ans = std::max(ans, arc.olabel);
    }
  }
  return ans;
}

template <class Arc>
typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst) {
  typename Arc::Label ans = 0;
  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
    typename Arc::StateId s = siter.Value();
    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      ans = std::max(ans, arc.ilabel);
    }
  }
  return ans;
}

template <class Arc>
typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst) {
  typedef typename Arc::StateId StateId;
  StateId num_arcs = 0;
  for (StateId s = 0; s < fst.NumStates(); s++) num_arcs += fst.NumArcs(s);
  return num_arcs;
}

template <class Arc, class I>
void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
                      std::vector<I> *symbols) {
  KALDI_ASSERT_IS_INTEGER_TYPE(I);
  std::set<I> all_syms;
  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
    typename Arc::StateId s = siter.Value();
    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      all_syms.insert(arc.olabel);
    }
  }

  // Remove epsilon, if instructed.
  if (!include_eps && !all_syms.empty() && *all_syms.begin() == 0)
    all_syms.erase(0);
  KALDI_ASSERT(symbols != NULL);
  kaldi::CopySetToVector(all_syms, symbols);
}

template <class Arc, class I>
void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
                     std::vector<I> *symbols) {
  KALDI_ASSERT_IS_INTEGER_TYPE(I);
  unordered_set<I> all_syms;
  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
    typename Arc::StateId s = siter.Value();
    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      all_syms.insert(arc.ilabel);
    }
  }
  // Remove epsilon, if instructed.
  if (!include_eps && all_syms.count(0) != 0) all_syms.erase(0);
  KALDI_ASSERT(symbols != NULL);
  kaldi::CopySetToVector(all_syms, symbols);
  std::sort(symbols->begin(), symbols->end());
}

template <class Arc, class I>
class RemoveSomeInputSymbolsMapper {
 public:
  Arc operator()(const Arc &arc_in) {
    Arc ans = arc_in;
    if (to_remove_set_.count(ans.ilabel) != 0)
      ans.ilabel = 0;  // remove this symbol
    return ans;
  }
  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }
  MapSymbolsAction InputSymbolsAction() { return MAP_CLEAR_SYMBOLS; }
  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }
  uint64 Properties(uint64 props) const {
    // remove the following as we don't know now if any of them are true.
    uint64 to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
                       kNonIDeterministic | kNoEpsilons | kNoIEpsilons |
                       kILabelSorted | kNotILabelSorted;
    return props & ~to_remove;
  }
  explicit RemoveSomeInputSymbolsMapper(const std::vector<I> &to_remove)
      : to_remove_set_(to_remove) {
    KALDI_ASSERT_IS_INTEGER_TYPE(I);
    assert(to_remove_set_.count(0) == 0);  // makes no sense to remove epsilon.
  }

 private:
  kaldi::ConstIntegerSet<I> to_remove_set_;
};

template <class Arc, class I>
using LookaheadFst = ArcMapFst<Arc, Arc, RemoveSomeInputSymbolsMapper<Arc, I> >;

// Lookahead composition is used for optimized online
// composition of FSTs during decoding. See
// nnet3/nnet3-latgen-faster-lookahead.cc. For details of compose filters
// see DefaultLookAhead in fst/compose.h
template <class Arc, class I>
LookaheadFst<Arc, I> *LookaheadComposeFst(const Fst<Arc> &ifst1,
                                          const Fst<Arc> &ifst2,
                                          const std::vector<I> &to_remove) {
  fst::CacheOptions cache_opts(true, 1 << 25LL);
  fst::CacheOptions cache_opts_map(true, 0);
  fst::ArcMapFstOptions arcmap_opts(cache_opts);
  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
  return new LookaheadFst<Arc, I>(ComposeFst<Arc>(ifst1, ifst2, cache_opts),
                                  mapper, arcmap_opts);
}

template <class Arc, class I>
void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
                            MutableFst<Arc> *fst) {
  KALDI_ASSERT_IS_INTEGER_TYPE(I);
  RemoveSomeInputSymbolsMapper<Arc, I> mapper(to_remove);
  Map(fst, mapper);
}

template <class Arc, class I>
class MapInputSymbolsMapper {
 public:
  Arc operator()(const Arc &arc_in) {
    Arc ans = arc_in;
    if (ans.ilabel > 0 && ans.ilabel < static_cast<typename Arc::Label>(
                                           (*symbol_mapping_).size()))
      ans.ilabel = (*symbol_mapping_)[ans.ilabel];
    return ans;
  }
  MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; }
  MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; }
  MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; }
  uint64 Properties(uint64 props) const {  // Not tested.
    bool remove_epsilons =
        (symbol_mapping_->size() > 0 && (*symbol_mapping_)[0] != 0);
    bool add_epsilons = (symbol_mapping_->size() > 1 &&
                         *std::min_element(symbol_mapping_->begin() + 1,
                                           symbol_mapping_->end()) == 0);

    // remove the following as we don't know now if any of them are true.
    uint64 props_to_remove = kAcceptor | kNotAcceptor | kIDeterministic |
                             kNonIDeterministic | kILabelSorted |
                             kNotILabelSorted;
    if (remove_epsilons) props_to_remove |= kEpsilons | kIEpsilons;
    if (add_epsilons) props_to_remove |= kNoEpsilons | kNoIEpsilons;
    uint64 props_to_add = 0;
    if (remove_epsilons && !add_epsilons)
      props_to_add |= kNoEpsilons | kNoIEpsilons;
    return (props & ~props_to_remove) | props_to_add;
  }
  // initialize with copy = false only if the "to_remove" argument will not be
  // deleted in the lifetime of this object.
  MapInputSymbolsMapper(const std::vector<I> &to_remove, bool copy) {
    KALDI_ASSERT_IS_INTEGER_TYPE(I);
    if (copy)
      symbol_mapping_ = new std::vector<I>(to_remove);
    else
      symbol_mapping_ = &to_remove;
    owned = copy;
  }
  ~MapInputSymbolsMapper() {
    if (owned && symbol_mapping_ != NULL) delete symbol_mapping_;
  }

 private:
  bool owned;
  const std::vector<I> *symbol_mapping_;
};

template <class Arc, class I>
void MapInputSymbols(const std::vector<I> &symbol_mapping,
                     MutableFst<Arc> *fst) {
  KALDI_ASSERT_IS_INTEGER_TYPE(I);
  // false == don't copy the "symbol_mapping", retain pointer--
  // safe since short-lived object.
  MapInputSymbolsMapper<Arc, I> mapper(symbol_mapping, false);
  Map(fst, mapper);
}

template <class Arc, class I>
bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
                             std::vector<I> *osymbols_out,
                             typename Arc::Weight *tot_weight_out) {
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;

  Weight tot_weight = Weight::One();
  std::vector<I> ilabel_seq;
  std::vector<I> olabel_seq;

  StateId cur_state = fst.Start();
  if (cur_state == kNoStateId) {  // empty sequence.
    if (isymbols_out != NULL) isymbols_out->clear();
    if (osymbols_out != NULL) osymbols_out->clear();
    if (tot_weight_out != NULL) *tot_weight_out = Weight::Zero();
    return true;
  }
  while (1) {
    Weight w = fst.Final(cur_state);
    if (w != Weight::Zero()) {  // is final..
      tot_weight = Times(w, tot_weight);
      if (fst.NumArcs(cur_state) != 0) return false;
      if (isymbols_out != NULL) *isymbols_out = ilabel_seq;
      if (osymbols_out != NULL) *osymbols_out = olabel_seq;
      if (tot_weight_out != NULL) *tot_weight_out = tot_weight;
      return true;
    } else {
      if (fst.NumArcs(cur_state) != 1) return false;

      ArcIterator<Fst<Arc> > iter(fst, cur_state);  // get the only arc.
      const Arc &arc = iter.Value();
      tot_weight = Times(arc.weight, tot_weight);
      if (arc.ilabel != 0) ilabel_seq.push_back(arc.ilabel);
      if (arc.olabel != 0) olabel_seq.push_back(arc.olabel);
      cur_state = arc.nextstate;
    }
  }
}

// see fstext-utils.h for comment.
template <class Arc>
void ConvertNbestToVector(const Fst<Arc> &fst,
                          std::vector<VectorFst<Arc> > *fsts_out) {
  typedef typename Arc::Weight Weight;
  typedef typename Arc::StateId StateId;
  fsts_out->clear();
  StateId start_state = fst.Start();
  if (start_state == kNoStateId) return;  // No output.
  size_t n_arcs = fst.NumArcs(start_state);
  bool start_is_final = (fst.Final(start_state) != Weight::Zero());
  fsts_out->reserve(n_arcs + (start_is_final ? 1 : 0));

  if (start_is_final) {
    fsts_out->resize(fsts_out->size() + 1);
    StateId start_state_out = fsts_out->back().AddState();
    fsts_out->back().SetFinal(start_state_out, fst.Final(start_state));
  }

  for (ArcIterator<Fst<Arc> > start_aiter(fst, start_state);
       !start_aiter.Done(); start_aiter.Next()) {
    fsts_out->resize(fsts_out->size() + 1);
    VectorFst<Arc> &ofst = fsts_out->back();
    const Arc &first_arc = start_aiter.Value();
    StateId cur_state = start_state, cur_ostate = ofst.AddState();
    ofst.SetStart(cur_ostate);
    StateId next_ostate = ofst.AddState();
    ofst.AddArc(cur_ostate, Arc(first_arc.ilabel, first_arc.olabel,
                                first_arc.weight, next_ostate));
    cur_state = first_arc.nextstate;
    cur_ostate = next_ostate;
    while (1) {
      size_t this_n_arcs = fst.NumArcs(cur_state);
      KALDI_ASSERT(this_n_arcs <= 1);  // or it violates our assumptions
                                       // about the input.
      if (this_n_arcs == 1) {
        KALDI_ASSERT(fst.Final(cur_state) == Weight::Zero());
        // or problem with ShortestPath.
        ArcIterator<Fst<Arc> > aiter(fst, cur_state);
        const Arc &arc = aiter.Value();
        next_ostate = ofst.AddState();
        ofst.AddArc(cur_ostate,
                    Arc(arc.ilabel, arc.olabel, arc.weight, next_ostate));
        cur_state = arc.nextstate;
        cur_ostate = next_ostate;
      } else {
        KALDI_ASSERT(fst.Final(cur_state) != Weight::Zero());
        // or problem with ShortestPath.
        ofst.SetFinal(cur_ostate, fst.Final(cur_state));
        break;
      }
    }
  }
}

// see fstext-utils.sh for comment.
template <class Arc>
void NbestAsFsts(const Fst<Arc> &fst, size_t n,
                 std::vector<VectorFst<Arc> > *fsts_out) {
  KALDI_ASSERT(n > 0);
  KALDI_ASSERT(fsts_out != NULL);
  VectorFst<Arc> nbest_fst;
  ShortestPath(fst, &nbest_fst, n);
  ConvertNbestToVector(nbest_fst, fsts_out);
}

template <class Arc, class I>
void MakeLinearAcceptorWithAlternatives(
    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst) {
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;

  ofst->DeleteStates();
  StateId cur_state = ofst->AddState();
  ofst->SetStart(cur_state);
  for (size_t i = 0; i < labels.size(); i++) {
    KALDI_ASSERT(labels[i].size() != 0);
    StateId next_state = ofst->AddState();
    for (size_t j = 0; j < labels[i].size(); j++) {
      Arc arc(labels[i][j], labels[i][j], Weight::One(), next_state);
      ofst->AddArc(cur_state, arc);
    }
    cur_state = next_state;
  }
  ofst->SetFinal(cur_state, Weight::One());
}

template <class Arc, class I>
void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst) {
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;

  ofst->DeleteStates();
  StateId cur_state = ofst->AddState();
  ofst->SetStart(cur_state);
  for (size_t i = 0; i < labels.size(); i++) {
    StateId next_state = ofst->AddState();
    Arc arc(labels[i], labels[i], Weight::One(), next_state);
    ofst->AddArc(cur_state, arc);
    cur_state = next_state;
  }
  ofst->SetFinal(cur_state, Weight::One());
}

template <class I>
void GetSymbols(const SymbolTable &symtab, bool include_eps,
                std::vector<I> *syms_out) {
  KALDI_ASSERT(syms_out != NULL);
  syms_out->clear();
  for (SymbolTableIterator iter(symtab); !iter.Done(); iter.Next()) {
    if (include_eps || iter.Value() != 0) {
      syms_out->push_back(iter.Value());
      KALDI_ASSERT(syms_out->back() ==
                   iter.Value());  // an integer-range thing.
    }
  }
}

template <class Arc>
void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
                            float delta) {
  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
  std::vector<typename Arc::Label> extra_syms;
  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
  DeterminizeStar(*ifst, ofst, delta);
  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
}

template <class Arc>
void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
                                    float delta) {
  typename Arc::Label highest_sym = HighestNumberedInputSymbol(*ifst);
  std::vector<typename Arc::Label> extra_syms;
  PreDeterminize(ifst, (typename Arc::Label)(highest_sym + 1), &extra_syms);
  DeterminizeStar(*ifst, ofst, delta);
  RemoveSomeInputSymbols(extra_syms, ofst);  // remove the extra symbols.
  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.
  MinimizeEncoded(ofst, delta);
}

inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta,
                                 bool *debug_ptr, int max_states) {
  // DeterminizeStarInLog determinizes 'fst' in the log semiring, using
  // the DeterminizeStar algorithm (which also removes epsilons).

  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
  VectorFst<LogArc> *fst_log =
      new VectorFst<LogArc>;  // Want to determinize in log semiring.
  Cast(*fst, fst_log);
  VectorFst<StdArc> tmp;
  *fst = tmp;  // make fst empty to free up memory. [actually may make no
               // difference..]
  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
  DeterminizeStar(*fst_log, fst_det_log, delta, debug_ptr, max_states);
  Cast(*fst_det_log, fst);
  delete fst_log;
  delete fst_det_log;
}

inline void DeterminizeInLog(VectorFst<StdArc> *fst) {
  // DeterminizeInLog determinizes 'fst' in the log semiring.

  ArcSort(fst, ILabelCompare<StdArc>());  // helps DeterminizeStar to be faster.
  VectorFst<LogArc> *fst_log =
      new VectorFst<LogArc>;  // Want to determinize in log semiring.
  Cast(*fst, fst_log);
  VectorFst<StdArc> tmp;
  *fst = tmp;  // make fst empty to free up memory. [actually may make no
               // difference..]
  VectorFst<LogArc> *fst_det_log = new VectorFst<LogArc>;
  Determinize(*fst_log, fst_det_log);
  Cast(*fst_det_log, fst);
  delete fst_log;
  delete fst_det_log;
}

// make it inline to avoid having to put it in a .cc file.
// destructive algorithm (changes ifst as well as ofst).
inline void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
                                                VectorFst<StdArc> *ofst,
                                                float delta) {
  VectorFst<LogArc> *ifst_log =
      new VectorFst<LogArc>;  // Want to determinize in log semiring.
  Cast(*ifst, ifst_log);
  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
  Cast(*ofst_log, ofst);
  delete ifst_log;
  delete ofst_log;
  RemoveEpsLocal(ofst);  // this is "safe" and will never hurt.  Do this in
                         // tropical, which is important.
  MinimizeEncoded(ofst, delta);  // Non-deterministic minimization will fail in
                                 // log semiring so do it with StdARc.
}

inline void SafeDeterminizeWrapperInLog(VectorFst<StdArc> *ifst,
                                        VectorFst<StdArc> *ofst, float delta) {
  VectorFst<LogArc> *ifst_log =
      new VectorFst<LogArc>;  // Want to determinize in log semiring.
  Cast(*ifst, ifst_log);
  VectorFst<LogArc> *ofst_log = new VectorFst<LogArc>;
  SafeDeterminizeWrapper(ifst_log, ofst_log, delta);
  Cast(*ofst_log, ofst);
  delete ifst_log;
  delete ofst_log;
}

template <class Arc>
void RemoveWeights(MutableFst<Arc> *ifst) {
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;

  for (StateIterator<MutableFst<Arc> > siter(*ifst); !siter.Done();
       siter.Next()) {
    StateId s = siter.Value();
    for (MutableArcIterator<MutableFst<Arc> > aiter(ifst, s); !aiter.Done();
         aiter.Next()) {
      Arc arc(aiter.Value());
      arc.weight = Weight::One();
      aiter.SetValue(arc);
    }
    if (ifst->Final(s) != Weight::Zero()) ifst->SetFinal(s, Weight::One());
  }
  ifst->SetProperties(kUnweighted, kUnweighted);
}

// Used in PrecedingInputSymbolsAreSame (non-functor version), and
// similar routines.
template <class T>
struct IdentityFunction {
  typedef T Arg;
  typedef T Result;
  T operator()(const T &t) const { return t; }
};

template <class Arc>
bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst) {
  IdentityFunction<typename Arc::Label> f;
  return PrecedingInputSymbolsAreSameClass(start_is_epsilon, fst, f);
}

template <class Arc, class F>  // F is functor type from labels to classes.
bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
                                       const Fst<Arc> &fst, const F &f) {
  typedef typename F::Result ClassType;
  typedef typename Arc::StateId StateId;
  std::vector<ClassType> classes;
  ClassType noClass = f(kNoLabel);

  if (start_is_epsilon) {
    StateId start_state = fst.Start();
    if (start_state < 0 || start_state == kNoStateId)
      return true;  // empty fst-- doesn't matter.
    classes.resize(start_state + 1, noClass);
    classes[start_state] = 0;
  }

  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
    StateId s = siter.Value();
    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (classes.size() <= arc.nextstate)
        classes.resize(arc.nextstate + 1, noClass);
      if (classes[arc.nextstate] == noClass)
        classes[arc.nextstate] = f(arc.ilabel);
      else if (classes[arc.nextstate] != f(arc.ilabel))
        return false;
    }
  }
  return true;
}

template <class Arc>
bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst) {
  IdentityFunction<typename Arc::Label> f;
  return FollowingInputSymbolsAreSameClass(end_is_epsilon, fst, f);
}

template <class Arc, class F>
bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
                                       const F &f) {
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;
  typedef typename F::Result ClassType;
  const ClassType noClass = f(kNoLabel), epsClass = f(0);
  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
    StateId s = siter.Value();
    ClassType c = noClass;
    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (c == noClass)
        c = f(arc.ilabel);
      else if (c != f(arc.ilabel))
        return false;
    }
    if (end_is_epsilon && c != noClass && c != epsClass &&
        fst.Final(s) != Weight::Zero())
      return false;
  }
  return true;
}

template <class Arc>
void MakePrecedingInputSymbolsSame(bool start_is_epsilon,
                                   MutableFst<Arc> *fst) {
  IdentityFunction<typename Arc::Label> f;
  MakePrecedingInputSymbolsSameClass(start_is_epsilon, fst, f);
}

template <class Arc, class F>
void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
                                        MutableFst<Arc> *fst, const F &f) {
  typedef typename F::Result ClassType;
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;
  std::vector<ClassType> classes;
  ClassType noClass = f(kNoLabel);
  ClassType epsClass = f(0);
  if (start_is_epsilon) {  // treat having-start-state as epsilon in-transition.
    StateId start_state = fst->Start();
    if (start_state < 0 || start_state == kNoStateId)  // empty FST.
      return;
    classes.resize(start_state + 1, noClass);
    classes[start_state] = epsClass;
  }

  // Find bad states (states with multiple input-symbols into them).
  std::set<StateId> bad_states;  // states that we need to change.
  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
    StateId s = siter.Value();
    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (classes.size() <= static_cast<size_t>(arc.nextstate))
        classes.resize(arc.nextstate + 1, noClass);
      if (classes[arc.nextstate] == noClass)
        classes[arc.nextstate] = f(arc.ilabel);
      else if (classes[arc.nextstate] != f(arc.ilabel))
        bad_states.insert(arc.nextstate);
    }
  }
  if (bad_states.empty()) return;  // Nothing to do.
  kaldi::ConstIntegerSet<StateId> bad_states_ciset(
      bad_states);  // faster lookup.

  // Work out list of arcs we have to change as (state, arc-offset).
  // Can't do the actual changes in this pass, since we have to add new
  // states which invalidates the iterators.
  std::vector<std::pair<StateId, size_t> > arcs_to_change;
  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
    StateId s = siter.Value();
    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (arc.ilabel != 0 && bad_states_ciset.count(arc.nextstate) != 0)
        arcs_to_change.push_back(std::make_pair(s, aiter.Position()));
    }
  }
  KALDI_ASSERT(!arcs_to_change.empty());  // since !bad_states.empty().

  std::map<std::pair<StateId, ClassType>, StateId> state_map;
  // state_map is a map from (bad-state, input-symbol-class) to dummy-state.

  for (size_t i = 0; i < arcs_to_change.size(); i++) {
    StateId s = arcs_to_change[i].first;
    ArcIterator<MutableFst<Arc> > aiter(*fst, s);
    aiter.Seek(arcs_to_change[i].second);
    Arc arc = aiter.Value();

    // Transition is non-eps transition to "bad" state.  Introduce new state (or
    // find existing one).
    std::pair<StateId, ClassType> p(arc.nextstate, f(arc.ilabel));
    if (state_map.count(p) == 0) {
      StateId newstate = state_map[p] = fst->AddState();
      fst->AddArc(newstate, Arc(0, 0, Weight::One(), arc.nextstate));
    }
    StateId dst_state = state_map[p];
    arc.nextstate = dst_state;

    // Initialize the MutableArcIterator only now, as the call to NewState()
    // may have invalidated the first arc iterator.
    MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
    maiter.Seek(arcs_to_change[i].second);
    maiter.SetValue(arc);
  }
}

template <class Arc>
void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst) {
  IdentityFunction<typename Arc::Label> f;
  MakeFollowingInputSymbolsSameClass(end_is_epsilon, fst, f);
}

template <class Arc, class F>
void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
                                        MutableFst<Arc> *fst, const F &f) {
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;
  typedef typename F::Result ClassType;
  std::vector<StateId> bad_states;
  ClassType noClass = f(kNoLabel);
  ClassType epsClass = f(0);
  for (StateIterator<Fst<Arc> > siter(*fst); !siter.Done(); siter.Next()) {
    StateId s = siter.Value();
    ClassType c = noClass;
    bool bad = false;
    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (c == noClass) {
        c = f(arc.ilabel);
      } else if (c != f(arc.ilabel)) {
        bad = true;
        break;
      }
    }
    if (end_is_epsilon && c != noClass && c != epsClass &&
        fst->Final(s) != Weight::Zero())
      bad = true;
    if (bad) bad_states.push_back(s);
  }
  std::vector<Arc> my_arcs;
  for (size_t i = 0; i < bad_states.size(); i++) {
    StateId s = bad_states[i];
    my_arcs.clear();
    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
         aiter.Next())
      my_arcs.push_back(aiter.Value());

    for (size_t j = 0; j < my_arcs.size(); j++) {
      Arc &arc = my_arcs[j];
      if (arc.ilabel != 0) {
        StateId newstate = fst->AddState();
        // Create a new state for each non-eps arc in original FST, out of each
        // bad state. Not as optimal as it could be, but does avoid some
        // complicated weight-pushing issues in which, to maintain
        // stochasticity, we would have to know which semiring we want to
        // maintain stochasticity in.
        fst->AddArc(newstate, Arc(arc.ilabel, 0, Weight::One(), arc.nextstate));
        MutableArcIterator<MutableFst<Arc> > maiter(fst, s);
        maiter.Seek(j);
        maiter.SetValue(Arc(0, arc.olabel, arc.weight, newstate));
      }
    }
  }
}

template <class Arc>
VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts) {
  typedef typename Arc::Weight Weight;
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Label Label;

  VectorFst<Arc> *ans = new VectorFst<Arc>;
  StateId loop_state = ans->AddState();  // = 0.
  ans->SetStart(loop_state);
  ans->SetFinal(loop_state, Weight::One());

  // "cache" is used as an optimization when some of the pointers in "fsts"
  // may have the same value.
  unordered_map<const ExpandedFst<Arc> *, Arc> cache;

  for (Label i = 0; i < static_cast<Label>(fsts.size()); i++) {
    const ExpandedFst<Arc> *fst = fsts[i];
    if (fst == NULL) continue;
    {  // optimization with cache: helpful if some members of "fsts" may
      // contain the same pointer value (e.g. in GetHTransducer).
      typename unordered_map<const ExpandedFst<Arc> *, Arc>::iterator iter =
          cache.find(fst);
      if (iter != cache.end()) {
        Arc arc = iter->second;
        arc.olabel = i;
        ans->AddArc(0, arc);
        continue;
      }
    }

    KALDI_ASSERT(fst->Properties(kAcceptor, true) ==
                 kAcceptor);  // expect acceptor.

    StateId fst_num_states = fst->NumStates();
    StateId fst_start_state = fst->Start();

    if (fst_start_state == kNoStateId) continue;  // empty fst.

    bool share_start_state =
        fst->Properties(kInitialAcyclic, true) == kInitialAcyclic &&
        fst->NumArcs(fst_start_state) == 1 &&
        fst->Final(fst_start_state) == Weight::Zero();

    std::vector<StateId> state_map(fst_num_states);  // fst state -> ans state
    for (StateId s = 0; s < fst_num_states; s++) {
      if (s == fst_start_state && share_start_state)
        state_map[s] = loop_state;
      else
        state_map[s] = ans->AddState();
    }
    if (!share_start_state) {
      Arc arc(0, i, Weight::One(), state_map[fst_start_state]);
      cache[fst] = arc;
      ans->AddArc(0, arc);
    }
    for (StateId s = 0; s < fst_num_states; s++) {
      // Add arcs out of state s.
      for (ArcIterator<ExpandedFst<Arc> > aiter(*fst, s); !aiter.Done();
           aiter.Next()) {
        const Arc &arc = aiter.Value();
        Label olabel = (s == fst_start_state && share_start_state ? i : 0);
        Arc newarc(arc.ilabel, olabel, arc.weight, state_map[arc.nextstate]);
        ans->AddArc(state_map[s], newarc);
        if (s == fst_start_state && share_start_state) cache[fst] = newarc;
      }
      if (fst->Final(s) != Weight::Zero()) {
        KALDI_ASSERT(!(s == fst_start_state && share_start_state));
        ans->AddArc(state_map[s], Arc(0, 0, fst->Final(s), loop_state));
      }
    }
  }
  return ans;
}

template <class Arc>
void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst) {
  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
       siter.Next()) {
    typename Arc::StateId s = siter.Value();
    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
         aiter.Next()) {
      Arc arc = aiter.Value();
      bool change = false;
      if (clear_input && arc.ilabel != 0) {
        arc.ilabel = 0;
        change = true;
      }
      if (clear_output && arc.olabel != 0) {
        arc.olabel = 0;
        change = true;
      }
      if (change) {
        aiter.SetValue(arc);
      }
    }
  }
}

template <class Arc>
void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst) {
  typedef typename Arc::Weight Weight;
  typedef typename Arc::StateId StateId;
  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
       siter.Next()) {
    StateId s = siter.Value();
    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, s); !aiter.Done();
         aiter.Next()) {
      Arc arc = aiter.Value();
      arc.weight = Weight(arc.weight.Value() * scale);
      aiter.SetValue(arc);
    }
    if (fst->Final(s) != Weight::Zero())
      fst->SetFinal(s, Weight(fst->Final(s).Value() * scale));
  }
}

// return arc-offset of self-loop with ilabel (or -1 if none exists).
// if more than one such self-loop, pick first one.
template <class Arc>
ssize_t FindSelfLoopWithILabel(const Fst<Arc> &fst, typename Arc::StateId s) {
  for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next())
    if (aiter.Value().nextstate == s && aiter.Value().ilabel != 0)
      return static_cast<ssize_t>(aiter.Position());
  return static_cast<ssize_t>(-1);
}

template <class Arc>
bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
                int rand_seed, MutableFst<Arc> *ofst, int num_retries) {
  srand(rand_seed);
  KALDI_ASSERT(ofst->NumStates() == 0);  // make sure ofst empty.
  // make sure all states can reach final-state (or this algorithm may enter
  // infinite loop.
  KALDI_ASSERT(ifst.Properties(kCoAccessible, true) == kCoAccessible);

  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;

  if (ifst.Start() == kNoStateId) {
    KALDI_WARN << "Empty input fst.";
    return false;
  }
  // First select path through ifst.
  std::vector<StateId> path;
  std::vector<size_t> arc_offsets;  // arc taken out of each state.
  std::vector<int> nof_ilabels;

  StateId num_ilabels = 0;
  int retry_no = 0;

  // Under normal circumstances, this will be one-pass-only process
  // Multiple tries might be needed in special cases, typically when
  // the number of frames is close to number of transitions from
  // the start node to the final node. It usually happens for really
  // short utterances
  do {
    num_ilabels = 0;
    arc_offsets.clear();
    path.clear();
    path.push_back(ifst.Start());

    while (1) {
      // Select either an arc or final-prob.
      StateId s = path.back();
      size_t num_arcs = ifst.NumArcs(s);
      size_t num_arcs_tot = num_arcs;
      if (ifst.Final(s) != Weight::Zero()) num_arcs_tot++;
      // kaldi::RandInt is a bit like Rand(), but gets around situations
      // where RAND_MAX is very small.
      // Change this to Rand() % num_arcs_tot if compile issues arise
      size_t arc_offset =
          static_cast<size_t>(kaldi::RandInt(0, num_arcs_tot - 1));

      if (arc_offset < num_arcs) {  // an actual arc.
        ArcIterator<Fst<Arc> > aiter(ifst, s);
        aiter.Seek(arc_offset);
        const Arc &arc = aiter.Value();
        if (arc.nextstate == s) {
          continue;  // don't take this self-loop arc
        } else {
          arc_offsets.push_back(arc_offset);
          path.push_back(arc.nextstate);
          if (arc.ilabel != 0) num_ilabels++;
        }
      } else {
        break;  // Chose final-prob.
      }
    }

    nof_ilabels.push_back(num_ilabels);
  } while ((++retry_no < num_retries) && (num_ilabels > length));

  if (num_ilabels > length) {
    std::stringstream ilabel_vec;
    std::copy(nof_ilabels.begin(), nof_ilabels.end(),
              std::ostream_iterator<int>(ilabel_vec, ","));
    std::string s = ilabel_vec.str();
    s.erase(s.end() - 1);
    KALDI_WARN << "EqualAlign: the randomly constructed paths lengths: " << s;
    KALDI_WARN << "EqualAlign: utterance has too few frames " << length
               << " to align.";
    return false;  // can't make it shorter by adding self-loops!.
  }

  StateId num_self_loops = 0;
  std::vector<ssize_t> self_loop_offsets(path.size());
  for (size_t i = 0; i < path.size(); i++)
    if ((self_loop_offsets[i] = FindSelfLoopWithILabel(ifst, path[i])) !=
        static_cast<ssize_t>(-1))
      num_self_loops++;

  if (num_self_loops == 0 && num_ilabels < length) {
    KALDI_WARN << "No self-loops on chosen path; cannot match length.";
    return false;  // no self-loops to make it longer.
  }

  StateId num_extra = length - num_ilabels;  // Number of self-loops we need.

  StateId min_num_loops = 0;
  if (num_extra != 0)
    min_num_loops = num_extra / num_self_loops;  // prevent div by zero.
  StateId num_with_one_more_loop = num_extra - (min_num_loops * num_self_loops);
  KALDI_ASSERT(num_with_one_more_loop < num_self_loops || num_self_loops == 0);

  ofst->AddState();
  ofst->SetStart(0);
  StateId cur_state = 0;
  StateId counter = 0;  // tell us when we should stop adding one more loop.
  for (size_t i = 0; i < path.size(); i++) {
    // First, add any self-loops that are necessary.
    StateId num_loops = 0;
    if (self_loop_offsets[i] != static_cast<ssize_t>(-1)) {
      num_loops = min_num_loops + (counter < num_with_one_more_loop ? 1 : 0);
      counter++;
    }
    for (StateId j = 0; j < num_loops; j++) {
      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
      aiter.Seek(self_loop_offsets[i]);
      Arc arc = aiter.Value();
      KALDI_ASSERT(arc.nextstate == path[i] &&
                   arc.ilabel != 0);  // make sure self-loop with ilabel.
      StateId next_state = ofst->AddState();
      ofst->AddArc(cur_state,
                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
      cur_state = next_state;
    }
    if (i + 1 < path.size()) {  // add forward transition.
      ArcIterator<Fst<Arc> > aiter(ifst, path[i]);
      aiter.Seek(arc_offsets[i]);
      Arc arc = aiter.Value();
      KALDI_ASSERT(arc.nextstate == path[i + 1]);
      StateId next_state = ofst->AddState();
      ofst->AddArc(cur_state,
                   Arc(arc.ilabel, arc.olabel, arc.weight, next_state));
      cur_state = next_state;
    } else {  // add final-prob.
      Weight weight = ifst.Final(path[i]);
      KALDI_ASSERT(weight != Weight::Zero());
      ofst->SetFinal(cur_state, weight);
    }
  }
  return true;
}

// This function identifies two types of useless arcs:
// those where arc A and arc B both go from state X to
// state Y with the same input symbol (remove the one
// with smaller probability, or an arbitrary one if they
// are the same); and those where A is an arc from state X
// to state X, with epsilon input symbol [remove A].
// Only works for tropical (not log) semiring as it uses
// NaturalLess.
template <class Arc>
void RemoveUselessArcs(MutableFst<Arc> *fst) {
  typedef typename Arc::Label Label;
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;
  NaturalLess<Weight> nl;
  StateId non_coacc_state = kNoStateId;
  size_t num_arcs_removed = 0, tot_arcs = 0;
  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
       siter.Next()) {
    std::vector<size_t> arcs_to_delete;
    std::vector<Arc> arcs;
    // pair2arclist lets us look up the arcs
    std::map<std::pair<Label, StateId>, std::vector<size_t> > pair2arclist;
    StateId state = siter.Value();
    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
         aiter.Next()) {
      size_t pos = arcs.size();
      const Arc &arc = aiter.Value();
      arcs.push_back(arc);
      pair2arclist[std::make_pair(arc.ilabel, arc.nextstate)].push_back(pos);
    }
    typename std::map<std::pair<Label, StateId>, std::vector<size_t> >::iterator
        iter = pair2arclist.begin(),
        end = pair2arclist.end();
    for (; iter != end; ++iter) {
      const std::vector<size_t> &poslist = iter->second;
      if (poslist.size() > 1) {  // >1 arc with same ilabel, dest-state
        size_t best_pos = poslist[0];
        Weight best_weight = arcs[best_pos].weight;
        for (size_t j = 1; j < poslist.size(); j++) {
          size_t pos = poslist[j];
          Weight this_weight = arcs[pos].weight;
          if (nl(this_weight,
                 best_weight)) {  // NaturalLess seems to be somehow
            // "backwards".
            best_weight = this_weight;  // found a better one.
            best_pos = pos;
          }
        }
        for (size_t j = 0; j < poslist.size(); j++)
          if (poslist[j] != best_pos) arcs_to_delete.push_back(poslist[j]);
      } else {
        KALDI_ASSERT(poslist.size() == 1);
        size_t pos = poslist[0];
        Arc &arc = arcs[pos];
        if (arc.ilabel == 0 && arc.nextstate == state)
          arcs_to_delete.push_back(pos);
      }
    }
    tot_arcs += arcs.size();
    if (arcs_to_delete.size() != 0) {
      num_arcs_removed += arcs_to_delete.size();
      if (non_coacc_state == kNoStateId) non_coacc_state = fst->AddState();
      MutableArcIterator<MutableFst<Arc> > maiter(fst, state);
      for (size_t j = 0; j < arcs_to_delete.size(); j++) {
        size_t pos = arcs_to_delete[j];
        maiter.Seek(pos);
        arcs[pos].nextstate = non_coacc_state;
        maiter.SetValue(arcs[pos]);
      }
    }
  }
  if (non_coacc_state != kNoStateId) Connect(fst);
  KALDI_VLOG(1) << "removed " << num_arcs_removed << " of " << tot_arcs
                << "arcs.";
}

template <class Arc>
void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
                typename Arc::Label phi_label, MutableFst<Arc> *ofst) {
  KALDI_ASSERT(phi_label !=
               kNoLabel);  // just use regular compose in this case.
  typedef Fst<Arc> F;
  typedef PhiMatcher<SortedMatcher<F> > PM;
  CacheOptions base_opts;
  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
  ComposeFstImplOptions<SortedMatcher<F>, PM> impl_opts(base_opts);

  // the false below is something called phi_loop which is something I don't
  // fully understand, but I don't think we want it.

  // These pointers are taken ownership of, by ComposeFst.
  PM *phi_matcher = new PM(fst2, MATCH_INPUT, phi_label, false);
  SortedMatcher<F> *sorted_matcher =
      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
  // not to use this matcher, as this would mean we would
  // not follow phi transitions.
  impl_opts.matcher1 = sorted_matcher;
  impl_opts.matcher2 = phi_matcher;
  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
  Connect(ofst);
}

template <class Arc>
void PropagateFinalInternal(typename Arc::Label phi_label,
                            typename Arc::StateId s, MutableFst<Arc> *fst) {
  typedef typename Arc::Weight Weight;
  if (fst->Final(s) == Weight::Zero()) {
    // search for phi transition.  We assume there
    // is just one-- phi nondeterminism is not allowed
    // anyway.
    int num_phis = 0;
    for (ArcIterator<Fst<Arc> > aiter(*fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (arc.ilabel == phi_label) {
        num_phis++;
        if (arc.nextstate == s) continue;  // don't expect
        // phi loops but ignore them anyway.

        // If this recurses infinitely, it means there
        // are loops of phi transitions, which there should
        // not be in a normal backoff LM.  We could make this
        // routine work for this case, but currently there is
        // no need.
        PropagateFinalInternal(phi_label, arc.nextstate, fst);
        if (fst->Final(arc.nextstate) != Weight::Zero())
          fst->SetFinal(s, Times(fst->Final(arc.nextstate), arc.weight));
      }
      KALDI_ASSERT(num_phis <= 1 && "Phi nondeterminism found");
    }
  }
}

template <class Arc>
void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst) {
  typedef typename Arc::StateId StateId;
  if (fst->Properties(kIEpsilons, true))  // just warn.
    KALDI_WARN << "PropagateFinal: this may not work as desired "
                  "since your FST has input epsilons.";
  StateId num_states = fst->NumStates();
  for (StateId s = 0; s < num_states; s++)
    PropagateFinalInternal(phi_label, s, fst);
}

template <class Arc>
void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
                typename Arc::Label rho_label, MutableFst<Arc> *ofst) {
  KALDI_ASSERT(rho_label !=
               kNoLabel);  // just use regular compose in this case.
  typedef Fst<Arc> F;
  typedef RhoMatcher<SortedMatcher<F> > RM;
  CacheOptions base_opts;
  base_opts.gc_limit = 0;  // Cache only the last state for fastest copy.
  // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
  // The matcher for fst1 doesn't matter; we'll use fst2's matcher.
  ComposeFstImplOptions<SortedMatcher<F>, RM> impl_opts(base_opts);

  // the false below is something called rho_loop which is something I don't
  // fully understand, but I don't think we want it.

  // These pointers are taken ownership of, by ComposeFst.
  RM *rho_matcher = new RM(fst2, MATCH_INPUT, rho_label);
  SortedMatcher<F> *sorted_matcher =
      new SortedMatcher<F>(fst1, MATCH_NONE);  // tell it
  // not to use this matcher, as this would mean we would
  // not follow rho transitions.
  impl_opts.matcher1 = sorted_matcher;
  impl_opts.matcher2 = rho_matcher;
  *ofst = ComposeFst<Arc>(fst1, fst2, impl_opts);
  Connect(ofst);
}

// Declare an override of the template below.
template <>
inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
                            LogArc::Weight *min_sum, LogArc::Weight *max_sum);

// Will override this for LogArc where NaturalLess will not work.
template <class Arc>
inline bool IsStochasticFst(const Fst<Arc> &fst, float delta,
                            typename Arc::Weight *min_sum,
                            typename Arc::Weight *max_sum) {
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;
  NaturalLess<Weight> nl;
  bool first_time = true;
  bool ans = true;
  if (min_sum) *min_sum = Arc::Weight::One();
  if (max_sum) *max_sum = Arc::Weight::One();
  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
    StateId s = siter.Value();
    Weight sum = fst.Final(s);
    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      sum = Plus(sum, arc.weight);
    }
    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
    if (first_time) {
      first_time = false;
      if (max_sum) *max_sum = sum;
      if (min_sum) *min_sum = sum;
    } else {
      if (max_sum && nl(*max_sum, sum)) *max_sum = sum;
      if (min_sum && nl(sum, *min_sum)) *min_sum = sum;
    }
  }
  if (first_time) {  // just avoid NaNs if FST was empty.
    if (max_sum) *max_sum = Weight::One();
    if (min_sum) *min_sum = Weight::One();
  }
  return ans;
}

// Overriding template for LogArc as NaturalLess does not work there.
template <>
inline bool IsStochasticFst(const Fst<LogArc> &fst, float delta,
                            LogArc::Weight *min_sum, LogArc::Weight *max_sum) {
  typedef LogArc Arc;
  typedef Arc::StateId StateId;
  typedef Arc::Weight Weight;
  bool first_time = true;
  bool ans = true;
  if (min_sum) *min_sum = LogArc::Weight::One();
  if (max_sum) *max_sum = LogArc::Weight::One();
  for (StateIterator<Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) {
    StateId s = siter.Value();
    Weight sum = fst.Final(s);
    for (ArcIterator<Fst<Arc> > aiter(fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      sum = Plus(sum, arc.weight);
    }
    if (!ApproxEqual(Weight::One(), sum, delta)) ans = false;
    if (first_time) {
      first_time = false;
      if (max_sum) *max_sum = sum;
      if (min_sum) *min_sum = sum;
    } else {
      // note that max and min are reversed from their normal
      // meanings here (max and min w.r.t. the underlying probabilities).
      if (max_sum && sum.Value() < max_sum->Value()) *max_sum = sum;
      if (min_sum && sum.Value() > min_sum->Value()) *min_sum = sum;
    }
  }
  if (first_time) {  // just avoid NaNs if FST was empty.
    if (max_sum) *max_sum = Weight::One();
    if (min_sum) *min_sum = Weight::One();
  }
  return ans;
}

// Tests whether a tropical FST is stochastic in the log
// semiring. (casts it and does the check.)
// This function deals with the generic fst.
// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>.
// Otherwise, it will be died with an error.
inline bool IsStochasticFstInLog(const Fst<StdArc> &fst, float delta,
                                 StdArc::Weight *min_sum,
                                 StdArc::Weight *max_sum) {
  bool ans = false;
  LogArc::Weight log_min = LogArc::Weight::One(),
                 log_max = LogArc::Weight::Zero();
  if (fst.Type() == "const") {
    ConstFst<LogArc> logfst;
    Cast(dynamic_cast<const ConstFst<StdArc> &>(fst), &logfst);
    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
  } else if (fst.Type() == "vector") {
    VectorFst<LogArc> logfst;
    Cast(dynamic_cast<const VectorFst<StdArc> &>(fst), &logfst);
    ans = IsStochasticFst(logfst, delta, &log_min, &log_max);
  } else {
    KALDI_ERR << "This version currently supports ConstFst<StdArc> "
              << "or VectorFst<StdArc>";
  }
  if (min_sum) *min_sum = StdArc::Weight(log_min.Value());
  if (max_sum) *max_sum = StdArc::Weight(log_max.Value());
  return ans;
}

}  // namespace fst.

#endif  // KALDI_FSTEXT_FSTEXT_UTILS_INL_H_


================================================
FILE: runtime/engine/kaldi/fstext/fstext-utils.h
================================================
// fstext/fstext-utils.h

// Copyright 2009-2011  Microsoft Corporation
//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
//                2013  Guoguo Chen
//                2014  Telepoint Global Hosting Service, LLC. (Author: David
//                Snyder)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_FSTEXT_UTILS_H_
#define KALDI_FSTEXT_FSTEXT_UTILS_H_

#include <fst/fst-decl.h>
#include <fst/fstlib.h>

#include <algorithm>
#include <map>
#include <set>
#include <vector>

#include "fstext/determinize-star.h"
#include "fstext/remove-eps-local.h"
#include "base/kaldi-common.h"  // for error reporting macros.
#include "util/text-utils.h"  // for SplitStringToVector
#include "fst/script/print-impl.h"

namespace fst {

/// Returns the highest numbered output symbol id of the FST (or zero
/// for an empty FST.
template <class Arc>
typename Arc::Label HighestNumberedOutputSymbol(const Fst<Arc> &fst);

/// Returns the highest numbered input symbol id of the FST (or zero
/// for an empty FST.
template <class Arc>
typename Arc::Label HighestNumberedInputSymbol(const Fst<Arc> &fst);

/// Returns the total number of arcs in an FST.
template <class Arc>
typename Arc::StateId NumArcs(const ExpandedFst<Arc> &fst);

/// GetInputSymbols gets the list of symbols on the input of fst
/// (including epsilon, if include_eps == true), as a sorted, unique
/// list.
template <class Arc, class I>
void GetInputSymbols(const Fst<Arc> &fst, bool include_eps,
                     std::vector<I> *symbols);

/// GetOutputSymbols gets the list of symbols on the output of fst
/// (including epsilon, if include_eps == true)
template <class Arc, class I>
void GetOutputSymbols(const Fst<Arc> &fst, bool include_eps,
                      std::vector<I> *symbols);

/// ClearSymbols sets all the symbols on the input and/or
/// output side of the FST to zero, as specified.
/// It does not alter the symbol tables.
template <class Arc>
void ClearSymbols(bool clear_input, bool clear_output, MutableFst<Arc> *fst);

template <class I>
void GetSymbols(const SymbolTable &symtab, bool include_eps,
                std::vector<I> *syms_out);

inline void DeterminizeStarInLog(VectorFst<StdArc> *fst, float delta = kDelta,
                                 bool *debug_ptr = NULL, int max_states = -1);

// e.g. of using this function: PushInLog<REWEIGHT_TO_INITIAL>(fst,
// kPushWeights|kPushLabels);

template <ReweightType rtype>  // == REWEIGHT_TO_{INITIAL, FINAL}
void PushInLog(VectorFst<StdArc> *fst, uint32 ptype, float delta = kDelta) {
  // PushInLog pushes the FST
  // and returns a new pushed FST (labels and weights pushed to the left).
  VectorFst<LogArc> *fst_log =
      new VectorFst<LogArc>;  // Want to determinize in log semiring.
  Cast(*fst, fst_log);
  VectorFst<StdArc> tmp;
  *fst = tmp;  // free up memory.
  VectorFst<LogArc> *fst_pushed_log = new VectorFst<LogArc>;
  Push<LogArc, rtype>(*fst_log, fst_pushed_log, ptype, delta);
  Cast(*fst_pushed_log, fst);
  delete fst_log;
  delete fst_pushed_log;
}

// Minimizes after encoding; applicable to all FSTs.  It is like what you get
// from the Minimize() function, except it will not push the weights, or the
// symbols.  This is better for our recipes, as we avoid ever pushing the
// weights.  However, it will only minimize optimally if your graphs are such
// that the symbols are as far to the left as they can go, and the weights
// in combinable paths are the same... hard to formalize this, but it's
// something that is satisified by our normal FSTs.
template <class Arc>
void MinimizeEncoded(VectorFst<Arc> *fst, float delta = kDelta) {
  Map(fst, QuantizeMapper<Arc>(delta));
  EncodeMapper<Arc> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
  Encode(fst, &encoder);
  internal::AcceptorMinimize(fst);
  Decode(fst, encoder);
}

/// GetLinearSymbolSequence gets the symbol sequence from a linear FST.
/// If the FST is not just a linear sequence, it returns false.   If it is
/// a linear sequence (including the empty FST), it returns true.  In this
/// case it outputs the symbol
/// sequences as "isymbols_out" and "osymbols_out" (removing epsilons), and
/// the total weight as "tot_weight". The total weight will be Weight::Zero()
/// if the FST is empty.  If any of the output pointers are NULL, it does not
/// create that output.
template <class Arc, class I>
bool GetLinearSymbolSequence(const Fst<Arc> &fst, std::vector<I> *isymbols_out,
                             std::vector<I> *osymbols_out,
                             typename Arc::Weight *tot_weight_out);

/// This function converts an FST with a special structure, which is
/// output by the OpenFst functions ShortestPath and RandGen, and converts
/// them into a std::vector of separate FSTs.  This special structure is that
/// the only state that has more than one (arcs-out or final-prob) is the
/// start state.  fsts_out is resized to the appropriate size.
template <class Arc>
void ConvertNbestToVector(const Fst<Arc> &fst,
                          std::vector<VectorFst<Arc> > *fsts_out);

/// Takes the n-shortest-paths (using ShortestPath), but outputs
/// the result as a vector of up to n fsts.  This function will
/// size the "fsts_out" vector to however many paths it got
/// (which will not exceed n).  n must be >= 1.
template <class Arc>
void NbestAsFsts(const Fst<Arc> &fst, size_t n,
                 std::vector<VectorFst<Arc> > *fsts_out);

/// Creates unweighted linear acceptor from symbol sequence.
template <class Arc, class I>
void MakeLinearAcceptor(const std::vector<I> &labels, MutableFst<Arc> *ofst);

/// Creates an unweighted acceptor with a linear structure, with alternatives
/// at each position.  Epsilon is treated like a normal symbol here.
/// Each position in "labels" must have at least one alternative.
template <class Arc, class I>
void MakeLinearAcceptorWithAlternatives(
    const std::vector<std::vector<I> > &labels, MutableFst<Arc> *ofst);

/// Does PreDeterminize and DeterminizeStar and then removes the disambiguation
/// symbols. This is a form of determinization that will never blow up. Note
/// that ifst is non-const and can be considered to be destroyed by this
/// operation.
/// Does not do epsilon removal (RemoveEpsLocal)-- this is so it's safe to cast
/// to log and do this, and maintain equivalence in tropical.

template <class Arc>
void SafeDeterminizeWrapper(MutableFst<Arc> *ifst, MutableFst<Arc> *ofst,
                            float delta = kDelta);

/// SafeDeterminizeMinimizeWapper is as SafeDeterminizeWrapper except that it
/// also minimizes (encoded minimization, which is safe).  This algorithm will
/// destroy "ifst".
template <class Arc>
void SafeDeterminizeMinimizeWrapper(MutableFst<Arc> *ifst, VectorFst<Arc> *ofst,
                                    float delta = kDelta);

/// SafeDeterminizeMinimizeWapperInLog is as SafeDeterminizeMinimizeWrapper
/// except it first casts tothe log semiring.
void SafeDeterminizeMinimizeWrapperInLog(VectorFst<StdArc> *ifst,
                                         VectorFst<StdArc> *ofst,
                                         float delta = kDelta);

/// RemoveSomeInputSymbols removes any symbol that appears in "to_remove", from
/// the input side of the FST, replacing them with epsilon.
template <class Arc, class I>
void RemoveSomeInputSymbols(const std::vector<I> &to_remove,
                            MutableFst<Arc> *fst);

// MapInputSymbols will replace any input symbol i that is between 0 and
// symbol_map.size()-1, with symbol_map[i].  It removes the input symbol
// table of the FST.
template <class Arc, class I>
void MapInputSymbols(const std::vector<I> &symbol_map, MutableFst<Arc> *fst);

template <class Arc>
void RemoveWeights(MutableFst<Arc> *fst);

/// Returns true if and only if the FST is such that the input symbols
/// on arcs entering any given state all have the same value.
/// if "start_is_epsilon", treat start-state as an epsilon input arc
/// [i.e. ensure only epsilon can enter start-state].
template <class Arc>
bool PrecedingInputSymbolsAreSame(bool start_is_epsilon, const Fst<Arc> &fst);

/// This is as PrecedingInputSymbolsAreSame, but with a functor f that maps
/// labels to classes. The function tests whether the symbols preceding any
/// given state are in the same class. Formally, f is of a type F that has an
/// operator of type F::Result F::operator() (F::Arg a) const; where F::Result
/// is an integer type and F::Arc can be constructed from Arc::Label. this must
/// apply to valid labels and also to kNoLabel (so we can have a marker for the
/// invalid labels.
template <class Arc, class F>
bool PrecedingInputSymbolsAreSameClass(bool start_is_epsilon,
                                       const Fst<Arc> &fst, const F &f);

/// Returns true if and only if the FST is such that the input symbols
/// on arcs exiting any given state all have the same value.
/// If end_is_epsilon, treat end-state as an epsilon output arc [i.e. ensure
/// end-states cannot have non-epsilon output transitions.]
template <class Arc>
bool FollowingInputSymbolsAreSame(bool end_is_epsilon, const Fst<Arc> &fst);

template <class Arc, class F>
bool FollowingInputSymbolsAreSameClass(bool end_is_epsilon, const Fst<Arc> &fst,
                                       const F &f);

/// MakePrecedingInputSymbolsSame ensures that all arcs entering any given fst
/// state have the same input symbol.  It does this by detecting states
/// that have differing input symbols going in, and inserting, for each of
/// the preceding arcs with non-epsilon input symbol, a new dummy state that
/// has an epsilon link to the fst state.
/// If "start_is_epsilon", ensure that start-state can have only epsilon-links
/// into it.
template <class Arc>
void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst<Arc> *fst);

/// As MakePrecedingInputSymbolsSame, but takes a functor object that maps
/// labels to classes.
template <class Arc, class F>
void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon,
                                        MutableFst<Arc> *fst, const F &f);

/// MakeFollowingInputSymbolsSame ensures that all arcs exiting any given fst
/// state have the same input symbol.  It does this by detecting states that
/// have differing input symbols on arcs that exit it, and inserting, for each
/// of the following arcs with non-epsilon input symbol, a new dummy state that
/// has an input-epsilon link from the fst state.  The output symbol and weight
/// stay on the link to the dummy state (in order to keep the FST
/// output-deterministic and stochastic, if it already was). If end_is_epsilon,
/// treat "being a final-state" like having an epsilon output link.
template <class Arc>
void MakeFollowingInputSymbolsSame(bool end_is_epsilon, MutableFst<Arc> *fst);

/// As MakeFollowingInputSymbolsSame, but takes a functor object that maps
/// labels to classes.
template <class Arc, class F>
void MakeFollowingInputSymbolsSameClass(bool end_is_epsilon,
                                        MutableFst<Arc> *fst, const F &f);

/// MakeLoopFst creates an FST that has a state that is both initial and
/// final (weight == Weight::One()), and for each non-NULL pointer fsts[i],
/// it has an arc out whose output-symbol is i and which goes to a
/// sub-graph whose input language is equivalent to fsts[i], where the
/// final-state becomes a transition to the loop-state.  Each fst in "fsts"
/// should be an acceptor.  The fst MakeLoopFst returns is output-deterministic,
/// but not output-epsilon free necessarily, and arcs are sorted on output
/// label. Note: if some of the pointers in the input vector "fsts" have the
/// same value, "MakeLoopFst" uses this to speed up the computation.

/// Formally: suppose I is the set of indexes i such that fsts[i] != NULL.
/// Let L[i] be the language that the acceptor fsts[i] accepts.
/// Let the language K be the set of input-output pairs i:l such
/// that i in I and l in L[i].  Then the FST returned by MakeLoopFst
/// accepts the language K*, where * is the Kleene closure (CLOSURE_STAR)
/// of K.

/// We could have implemented this via a combination of "project",
/// "concat", "union" and "closure".  But that FST would have been
/// less well optimized and would have a lot of final-states.

template <class Arc>
VectorFst<Arc> *MakeLoopFst(const std::vector<const ExpandedFst<Arc> *> &fsts);

/// ApplyProbabilityScale is applicable to FSTs in the log or tropical semiring.
/// It multiplies the arc and final weights by "scale" [this is not the Mul
/// operation of the semiring, it's actual multiplication, which is equivalent
/// to taking a power in the semiring].
template <class Arc>
void ApplyProbabilityScale(float scale, MutableFst<Arc> *fst);

/// EqualAlign is similar to RandGen, but it generates a sequence with exactly
/// "length" input symbols.  It returns true on success, false on failure
/// (failure is partly random but should never happen in practice for normal
/// speech models.) It generates a random path through the input FST, finds out
/// which subset of the states it visits along the way have self-loops with
/// inupt symbols on them, and outputs a path with exactly enough self-loops to
/// have the requested number of input symbols. Note that EqualAlign does not
/// use the probabilities on the FST.  It just uses equal probabilities in the
/// first stage of selection (since the output will anyway not be a truly random
/// sample from the FST). The input fst "ifst" must be connected or this may
/// enter an infinite loop.
template <class Arc>
bool EqualAlign(const Fst<Arc> &ifst, typename Arc::StateId length,
                int rand_seed, MutableFst<Arc> *ofst, int num_retries = 10);

// RemoveUselessArcs removes arcs such that there is no input symbol
// sequence for which the best path through the FST would contain
// those arcs [for these purposes, epsilon is not treated as a real symbol].
// This is mainly geared towards decoding-graph FSTs which may contain
// transitions that have less likely words on them that would never be
// taken.  We do not claim that this algorithm removes all such arcs;
// it just does the best job it can.
// Only works for tropical (not log) semiring as it uses
// NaturalLess.
template <class Arc>
void RemoveUselessArcs(MutableFst<Arc> *fst);

// PhiCompose is a version of composition where
// the right hand FST (fst2) is treated as a backoff
// LM, with the phi symbol (e.g. #0) treated as a
// "failure transition", only taken when we don't
// have a match for the requested symbol.
template <class Arc>
void PhiCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
                typename Arc::Label phi_label, MutableFst<Arc> *fst);

// PropagateFinal propagates final-probs through
// "phi" transitions (note that here, phi_label may
// be epsilon if you want).  If you have a backoff LM
// with special symbols ("phi") on the backoff arcs
// instead of epsilon, you may use PhiCompose to compose
// with it, but this won't do the right thing w.r.t.
// final probabilities.  You should first call PropagateFinal
// on the FST with phi's i it (fst2 in PhiCompose above),
// to fix this.  If a state does not have a final-prob,
// but has a phi transition, it makes the state's final-prob
// (phi-prob * final-prob-of-dest-state), and does this
// recursively i.e. follows phi transitions on the dest state
// first.  It behaves as if there were a super-final state
// with a special symbol leading to it, from each currently
// final state.  Note that this may not behave as desired
// if there are epsilons in your FST; it might be better
// to remove those before calling this function.

template <class Arc>
void PropagateFinal(typename Arc::Label phi_label, MutableFst<Arc> *fst);

// PhiCompose is a version of composition where
// the right hand FST (fst2) has speciall "rho transitions"
// which are taken whenever no normal transition matches; these
// transitions will be rewritten with whatever symbol was on
// the first FST.
template <class Arc>
void RhoCompose(const Fst<Arc> &fst1, const Fst<Arc> &fst2,
                typename Arc::Label rho_label, MutableFst<Arc> *fst);

/** This function returns true if, in the semiring of the FST, the sum (within
    the semiring) of all the arcs out of each state in the FST is one, to within
    delta.  After MakeStochasticFst, this should be true (for a connected FST).

    @param fst [in] the FST that we are testing.
    @param delta [in] the tolerance to within which we test equality to 1.
    @param min_sum [out] if non, NULL, contents will be set to the minimum sum
   of weights.
    @param max_sum [out] if non, NULL, contents will be set to the maximum sum
   of weights.
    @return Returns true if the FST is stochastic, and false otherwise.
*/

template <class Arc>
bool IsStochasticFst(const Fst<Arc> &fst,
                     float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
                     typename Arc::Weight *min_sum = NULL,
                     typename Arc::Weight *max_sum = NULL);

// IsStochasticFstInLog makes sure it's stochastic after casting to log.
inline bool IsStochasticFstInLog(
    const Fst<StdArc> &fst,
    float delta = kDelta,  // kDelta = 1.0/1024.0 by default.
    StdArc::Weight *min_sum = NULL, StdArc::Weight *max_sum = NULL);

}  // end namespace fst

#include "fstext/fstext-utils-inl.h"

#endif  // KALDI_FSTEXT_FSTEXT_UTILS_H_


================================================
FILE: runtime/engine/kaldi/fstext/kaldi-fst-io-inl.h
================================================
// fstext/kaldi-fst-io-inl.h

// Copyright 2009-2011  Microsoft Corporation
//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
//                2013  Guoguo Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_KALDI_FST_IO_INL_H_
#define KALDI_FSTEXT_KALDI_FST_IO_INL_H_

#include <string>
#include <vector>

#include "util/text-utils.h"

namespace fst {

template <class Arc>
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &t) {
  bool ok;
  if (binary) {
    // Binary-mode writing.
    ok = t.Write(os, FstWriteOptions());
  } else {
    // Text-mode output.  Note: we expect that t.InputSymbols() and
    // t.OutputSymbols() would always return NULL.  The corresponding input
    // routine would not work if the FST actually had symbols attached.  Write a
    // newline to start the FST; in a table, the first line of the FST will
    // appear on its own line.
    os << '\n';
    bool acceptor = false, write_one = false;
    FstPrinter<Arc> printer(t, t.InputSymbols(), t.OutputSymbols(), NULL,
                            acceptor, write_one, "\t");
    printer.Print(&os, "<unknown>");
    if (os.fail()) KALDI_ERR << "Stream failure detected writing FST to stream";
    // Write another newline as a terminating character.  The read routine will
    // detect this [this is a Kaldi mechanism, not something in the original
    // OpenFst code].
    os << '\n';
    ok = os.good();
  }
  if (!ok) {
    KALDI_ERR << "Error writing FST to stream";
  }
}

// Utility function used in ReadFstKaldi
template <class W>
inline bool StrToWeight(const std::string &s, bool allow_zero, W *w) {
  std::istringstream strm(s);
  strm >> *w;
  if (strm.fail() || (!allow_zero && *w == W::Zero())) {
    return false;
  }
  return true;
}

template <class Arc>
void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst) {
  typedef typename Arc::Weight Weight;
  typedef typename Arc::StateId StateId;
  if (binary) {
    // We don't have access to the filename here, so write [unknown].
    VectorFst<Arc> *ans =
        VectorFst<Arc>::Read(is, fst::FstReadOptions(std::string("[unknown]")));
    if (ans == NULL) {
      KALDI_ERR << "Error reading FST from stream.";
    }
    *fst = *ans;  // shallow copy.
    delete ans;
  } else {
    // Consume the \r on Windows, the \n that the text-form FST format starts
    // with, and any extra spaces that might have got in there somehow.
    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
    if (is.peek() == '\n') {
      is.get();  // consume the newline.
    } else {     // saw spaces but no newline.. this is not expected.
      KALDI_ERR << "Reading FST: unexpected sequence of spaces "
                << " at file position " << is.tellg();
    }
    using kaldi::ConvertStringToInteger;
    using kaldi::SplitStringToIntegers;
    using std::string;
    using std::vector;
    fst->DeleteStates();
    string line;
    size_t nline = 0;
    string separator = FLAGS_fst_field_separator + "\r\n";
    while (std::getline(is, line)) {
      nline++;
      vector<string> col;
      // on Windows we'll write in text and read in binary mode.
      kaldi::SplitStringToVector(line, separator.c_str(), true, &col);
      if (col.size() == 0) break;  // Empty line is a signal to stop, in our
      // archive format.
      if (col.size() > 5) {
        KALDI_ERR << "Bad line in FST: " << line;
      }
      StateId s;
      if (!ConvertStringToInteger(col[0], &s)) {
        KALDI_ERR << "Bad line in FST: " << line;
      }
      while (s >= fst->NumStates()) fst->AddState();
      if (nline == 1) fst->SetStart(s);

      bool ok = true;
      Arc arc;
      Weight w;
      StateId d = s;
      switch (col.size()) {
        case 1:
          fst->SetFinal(s, Weight::One());
          break;
        case 2:
          if (!StrToWeight(col[1], true, &w))
            ok = false;
          else
            fst->SetFinal(s, w);
          break;
        case 3:  // 3 columns not ok for Lattice format; it's not an acceptor.
          ok = false;
          break;
        case 4:
          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
               ConvertStringToInteger(col[2], &arc.ilabel) &&
               ConvertStringToInteger(col[3], &arc.olabel);
          if (ok) {
            d = arc.nextstate;
            arc.weight = Weight::One();
            fst->AddArc(s, arc);
          }
          break;
        case 5:
          ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
               ConvertStringToInteger(col[2], &arc.ilabel) &&
               ConvertStringToInteger(col[3], &arc.olabel) &&
               StrToWeight(col[4], false, &arc.weight);
          if (ok) {
            d = arc.nextstate;
            fst->AddArc(s, arc);
          }
          break;
        default:
          ok = false;
      }
      while (d >= fst->NumStates()) fst->AddState();
      if (!ok) KALDI_ERR << "Bad line in FST: " << line;
    }
  }
}

template <class Arc>  // static
bool VectorFstTplHolder<Arc>::Write(std::ostream &os, bool binary, const T &t) {
  try {
    WriteFstKaldi(os, binary, t);
    return true;
  } catch (...) {
    return false;
  }
}

template <class Arc>  // static
bool VectorFstTplHolder<Arc>::Read(std::istream &is) {
  Clear();
  int c = is.peek();
  if (c == -1) {
    KALDI_WARN << "End of stream detected reading Fst";
    return false;
  } else if (isspace(c)) {  // The text form of the FST begins
    // with space (normally, '\n'), so this means it's text (the binary form
    // cannot begin with space because it starts with the FST Type() which is
    // not space).
    try {
      t_ = new VectorFst<Arc>();
      ReadFstKaldi(is, false, t_);
    } catch (...) {
      Clear();
      return false;
    }
  } else {  // reading a binary FST.
    try {
      t_ = new VectorFst<Arc>();
      ReadFstKaldi(is, true, t_);
    } catch (...) {
      Clear();
      return false;
    }
  }
  return true;
}

}  // namespace fst.

#endif  // KALDI_FSTEXT_KALDI_FST_IO_INL_H_


================================================
FILE: runtime/engine/kaldi/fstext/kaldi-fst-io.cc
================================================
// fstext/kaldi-fst-io.cc

// Copyright 2009-2011  Microsoft Corporation
//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
//                2013  Guoguo Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "fstext/kaldi-fst-io.h"

#include <string>

#include "base/kaldi-error.h"
#include "base/kaldi-math.h"
#include "util/kaldi-io.h"

namespace fst {

VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename) {
  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
  // for compatibility with OpenFst conventions.
  kaldi::Input ki(rxfilename);
  fst::FstHeader hdr;
  if (!hdr.Read(ki.Stream(), rxfilename))
    KALDI_ERR << "Reading FST: error reading FST header from "
              << kaldi::PrintableRxfilename(rxfilename);
  FstReadOptions ropts("<unspecified>", &hdr);
  VectorFst<StdArc> *fst = VectorFst<StdArc>::Read(ki.Stream(), ropts);
  if (!fst)
    KALDI_ERR << "Could not read fst from "
              << kaldi::PrintableRxfilename(rxfilename);
  return fst;
}

// Register const fst to load it automatically. Other types like
// olabel_lookahead or ngram or compact_fst should be registered
// through OpenFst registration API.
static fst::FstRegisterer<VectorFst<StdArc>> VectorFst_StdArc_registerer;
static fst::FstRegisterer<ConstFst<StdArc>> ConstFst_StdArc_registerer;

Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename, bool throw_on_err) {
  if (rxfilename == "") rxfilename = "-";  // interpret "" as stdin,
  // for compatibility with OpenFst conventions.
  kaldi::Input ki(rxfilename);
  fst::FstHeader hdr;
  // Read FstHeader which contains the type of FST
  if (!hdr.Read(ki.Stream(), rxfilename)) {
    if (throw_on_err) {
      KALDI_ERR << "Reading FST: error reading FST header from "
                << kaldi::PrintableRxfilename(rxfilename);
    } else {
      KALDI_WARN << "We fail to read FST header from "
                 << kaldi::PrintableRxfilename(rxfilename)
                 << ". A NULL pointer is returned.";
      return NULL;
    }
  }
  // Check the type of Arc
  if (hdr.ArcType() != fst::StdArc::Type()) {
    if (throw_on_err) {
      KALDI_ERR << "FST with arc type " << hdr.ArcType()
                << " is not supported.";
    } else {
      KALDI_WARN << "Fst with arc type" << hdr.ArcType()
                 << " is not supported. A NULL pointer is returned.";
      return NULL;
    }
  }
  // Read the FST
  FstReadOptions ropts("<unspecified>", &hdr);
  Fst<StdArc> *fst = Fst<StdArc>::Read(ki.Stream(), ropts);
  if (!fst) {
    if (throw_on_err) {
      KALDI_ERR << "Could not read fst from "
                << kaldi::PrintableRxfilename(rxfilename);
    } else {
      KALDI_WARN << "Could not read fst from "
                 << kaldi::PrintableRxfilename(rxfilename)
                 << ". A NULL pointer is returned.";
      return NULL;
    }
  }
  return fst;
}

VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst) {
  // This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
  std::string real_type = fst->Type();
  KALDI_ASSERT(real_type == "vector" || real_type == "const");
  if (real_type == "vector") {
    return dynamic_cast<VectorFst<StdArc> *>(fst);
  } else {
    // As the 'fst' can't cast to VectorFst, we create a new
    // VectorFst<StdArc> initialized by 'fst', and delete 'fst'.
    VectorFst<StdArc> *new_fst = new VectorFst<StdArc>(*fst);
    delete fst;
    return new_fst;
  }
}

void ReadFstKaldi(std::string rxfilename, fst::StdVectorFst *ofst) {
  fst::StdVectorFst *fst = ReadFstKaldi(rxfilename);
  *ofst = *fst;
  delete fst;
}

void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename) {
  if (wxfilename == "") wxfilename = "-";  // interpret "" as stdout,
  // for compatibility with OpenFst conventions.
  bool write_binary = true, write_header = false;
  kaldi::Output ko(wxfilename, write_binary, write_header);
  FstWriteOptions wopts(kaldi::PrintableWxfilename(wxfilename));
  fst.Write(ko.Stream(), wopts);
}

fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename) {
  // ReadFstKaldi() will die with exception on failure.
  fst::VectorFst<fst::StdArc> *ans = fst::ReadFstKaldi(rxfilename);
  if (ans->Properties(fst::kAcceptor, true) == 0) {
    // If it's not already an acceptor, project on the output, i.e. copy olabels
    // to ilabels.  Generally the G.fst's on disk will have the disambiguation
    // symbol #0 on the input symbols of the backoff arc, and projection will
    // replace them with epsilons which is what is on the output symbols of
    // those arcs.
    fst::Project(ans, fst::PROJECT_OUTPUT);
  }
  if (ans->Properties(fst::kILabelSorted, true) == 0) {
    // Make sure LM is sorted on ilabel.
    fst::ILabelCompare<fst::StdArc> ilabel_comp;
    fst::ArcSort(ans, ilabel_comp);
  }
  return ans;
}

}  // end namespace fst


================================================
FILE: runtime/engine/kaldi/fstext/kaldi-fst-io.h
================================================
// fstext/kaldi-fst-io.h

// Copyright 2009-2011  Microsoft Corporation
//           2012-2015  Johns Hopkins University (Author: Daniel Povey)
//                2013  Guoguo Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_KALDI_FST_IO_H_
#define KALDI_FSTEXT_KALDI_FST_IO_H_

#include <string>
#include <utility>

#include "fst/fst-decl.h"
#include "fst/fstlib.h"
#include "fst/script/print-impl.h"

#include "base/kaldi-common.h"

// Some functions for writing Fsts.
// I/O for FSTs is a bit of a mess, and not very well integrated with Kaldi's
// generic I/O mechanisms, because we want files containing just FSTs to
// be readable by OpenFST's native binaries, which is not compatible
// with the normal \0B header that identifies Kaldi files as containing
// binary data.
// So use the functions here with your eyes open, and with caution!
namespace fst {

// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
// On error returns NULL. Only supports VectorFst and exists
// mainly for backward code compabibility.
VectorFst<StdArc> *ReadFstKaldi(std::string rxfilename);

// Read a binary FST using Kaldi I/O mechanisms (pipes, etc.)
// If it can't read the FST, if throw_on_err == true it throws using KALDI_ERR;
// otherwise it prints a warning and returns. Note:this
// doesn't support the text-mode option that we generally like to support.
// This version currently supports ConstFst<StdArc> or VectorFst<StdArc>
// (const-fst can give better performance for decoding). Other
// types could be also loaded if registered inside OpenFst.
Fst<StdArc> *ReadFstKaldiGeneric(std::string rxfilename,
                                 bool throw_on_err = true);

// This function attempts to dynamic_cast the pointer 'fst' (which will likely
// have been returned by ReadFstGeneric()), to the more derived
// type VectorFst<StdArc>. If this succeeds, it returns the same pointer;
// if it fails, it converts the FST type (by creating a new VectorFst<stdArc>
// initialized by 'fst'), prints a warning, and deletes 'fst'.
VectorFst<StdArc> *CastOrConvertToVectorFst(Fst<StdArc> *fst);

// Version of ReadFstKaldi() that writes to a pointer.  Assumes
// the FST is binary with no binary marker.  Crashes on error.
void ReadFstKaldi(std::string rxfilename, VectorFst<StdArc> *ofst);

// Write an FST using Kaldi I/O mechanisms (pipes, etc.)
// On error, throws using KALDI_ERR.  For use only in code in fstbin/,
// as it doesn't support the text-mode option.
void WriteFstKaldi(const VectorFst<StdArc> &fst, std::string wxfilename);

// This is a more general Kaldi-type-IO mechanism of writing FSTs to
// streams, supporting binary or text-mode writing.  (note: we just
// write the integers, symbol tables are not supported).
// On error, throws using KALDI_ERR.
template <class Arc>
void WriteFstKaldi(std::ostream &os, bool binary, const VectorFst<Arc> &fst);

// A generic Kaldi-type-IO mechanism of reading FSTs from streams,
// supporting binary or text-mode reading/writing.
template <class Arc>
void ReadFstKaldi(std::istream &is, bool binary, VectorFst<Arc> *fst);

// Read an FST file for LM (G.fst) and make it an acceptor,
// and make sure it is sorted on labels
fst::VectorFst<fst::StdArc> *ReadAndPrepareLmFst(std::string rxfilename);

// This is a Holder class with T = VectorFst<Arc>, that meets the requirements
// of a Holder class as described in ../util/kaldi-holder.h. This enables us to
// read/write collections of FSTs indexed by strings, using the Table concept (
// see ../util/kaldi-table.h).
// Originally it was only templated on T = VectorFst<StdArc>, but as the keyword
// spotting stuff introduced more types of FSTs, we made it also templated on
// the arc.
template <class Arc>
class VectorFstTplHolder {
 public:
  typedef VectorFst<Arc> T;

  VectorFstTplHolder() : t_(NULL) {}

  static bool Write(std::ostream &os, bool binary, const T &t);

  void Copy(const T &t) {  // copies it into the holder.
    Clear();
    t_ = new T(t);
  }

  // Reads into the holder.
  bool Read(std::istream &is);

  // It's potentially a binary format, so must read in binary mode (linefeed
  // translation will corrupt the file.  We don't know till we open the file if
  // it's really binary, so we need to read in binary mode to be on the safe
  // side.  Extra linefeeds won't matter, the text-mode reading code ignores
  // them.
  static bool IsReadInBinary() { return true; }

  T &Value() {
    // code error if !t_.
    if (!t_) KALDI_ERR << "VectorFstTplHolder::Value() called wrongly.";
    return *t_;
  }

  void Clear() {
    if (t_) {
      delete t_;
      t_ = NULL;
    }
  }

  void Swap(VectorFstTplHolder<Arc> *other) { std::swap(t_, other->t_); }

  bool ExtractRange(const VectorFstTplHolder<Arc> &other,
                    const std::string &range) {
    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    return false;
  }

  ~VectorFstTplHolder() { Clear(); }
  // No destructor.  Assignment and
  // copy constructor take their default implementations.
 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorFstTplHolder);
  T *t_;
};

// Now make the original VectorFstHolder as the typedef of
// VectorFstHolder<StdArc>.
typedef VectorFstTplHolder<StdArc> VectorFstHolder;

}  // end namespace fst

#include "fstext/kaldi-fst-io-inl.h"

#endif  // KALDI_FSTEXT_KALDI_FST_IO_H_


================================================
FILE: runtime/engine/kaldi/fstext/lattice-utils-inl.h
================================================
// fstext/lattice-utils-inl.h

// Copyright 2009-2012  Microsoft Corporation  Johns Hopkins University (Author:
// Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_LATTICE_UTILS_INL_H_
#define KALDI_FSTEXT_LATTICE_UTILS_INL_H_
// Do not include this file directly.  It is included by lattice-utils.h

#include <utility>
#include <vector>

namespace fst {

/* Convert from FST with arc-type Weight, to one with arc-type
   CompactLatticeWeight.  Uses FactorFst to identify chains
   of states which can be turned into a single output arc. */

template <class Weight, class Int>
void ConvertLattice(
    const ExpandedFst<ArcTpl<Weight> > &ifst,
    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
    bool invert) {
  typedef ArcTpl<Weight> Arc;
  typedef typename Arc::StateId StateId;
  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
  typedef ArcTpl<CompactWeight> CompactArc;

  VectorFst<ArcTpl<Weight> > ffst;
  std::vector<std::vector<Int> > labels;
  if (invert) {  // normal case: want the ilabels as sequences on the arcs of
    Factor(ifst, &ffst, &labels);  // the output... Factor makes seqs of
                                   // ilabels.
  } else {
    VectorFst<ArcTpl<Weight> > invfst(ifst);
    Invert(&invfst);
    Factor(invfst, &ffst, &labels);
  }

  TopSort(&ffst);  // Put the states in ffst in topological order, which is
  // easier on the eye when reading the text-form lattices and corresponds to
  // what we get when we generate the lattices in the decoder.

  ofst->DeleteStates();

  // The states will be numbered exactly the same as the original FST.
  // Add the states to the new FST.
  StateId num_states = ffst.NumStates();
  for (StateId s = 0; s < num_states; s++) {
    StateId news = ofst->AddState();
    assert(news == s);
  }
  ofst->SetStart(ffst.Start());
  for (StateId s = 0; s < num_states; s++) {
    Weight final_weight = ffst.Final(s);
    if (final_weight != Weight::Zero()) {
      CompactWeight final_compact_weight(final_weight, std::vector<Int>());
      ofst->SetFinal(s, final_compact_weight);
    }
    for (ArcIterator<ExpandedFst<Arc> > iter(ffst, s); !iter.Done();
         iter.Next()) {
      const Arc &arc = iter.Value();
      KALDI_PARANOID_ASSERT(arc.weight != Weight::Zero());
      // note: zero-weight arcs not allowed anyway so weight should not be zero,
      // but no harm in checking.
      CompactArc compact_arc(arc.olabel, arc.olabel,
                             CompactWeight(arc.weight, labels[arc.ilabel]),
                             arc.nextstate);
      ofst->AddArc(s, compact_arc);
    }
  }
}

template <class Weight, class Int>
void ConvertLattice(
    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
    MutableFst<ArcTpl<Weight> > *ofst, bool invert) {
  typedef ArcTpl<Weight> Arc;
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Label Label;
  typedef CompactLatticeWeightTpl<Weight, Int> CompactWeight;
  typedef ArcTpl<CompactWeight> CompactArc;
  ofst->DeleteStates();
  // make the states in the new FST have the same numbers as
  // the original ones, and add chains of states as necessary
  // to encode the string-valued weights.
  StateId num_states = ifst.NumStates();
  for (StateId s = 0; s < num_states; s++) {
    StateId news = ofst->AddState();
    assert(news == s);
  }
  ofst->SetStart(ifst.Start());
  for (StateId s = 0; s < num_states; s++) {
    CompactWeight final_weight = ifst.Final(s);
    if (final_weight != CompactWeight::Zero()) {
      StateId cur_state = s;
      size_t string_length = final_weight.String().size();
      for (size_t n = 0; n < string_length; n++) {
        StateId next_state = ofst->AddState();
        Label ilabel = 0;
        Arc arc(ilabel, final_weight.String()[n],
                (n == 0 ? final_weight.Weight() : Weight::One()), next_state);
        if (invert) std::swap(arc.ilabel, arc.olabel);
        ofst->AddArc(cur_state, arc);
        cur_state = next_state;
      }
      ofst->SetFinal(cur_state,
                     string_length > 0 ? Weight::One() : final_weight.Weight());
    }
    for (ArcIterator<ExpandedFst<CompactArc> > iter(ifst, s); !iter.Done();
         iter.Next()) {
      const CompactArc &arc = iter.Value();
      size_t string_length = arc.weight.String().size();
      StateId cur_state = s;
      // for all but the last element in the string--
      // add a temporary state.
      for (size_t n = 0; n + 1 < string_length; n++) {
        StateId next_state = ofst->AddState();
        Label ilabel = (n == 0 ? arc.ilabel : 0),
              olabel = static_cast<Label>(arc.weight.String()[n]);
        Weight weight = (n == 0 ? arc.weight.Weight() : Weight::One());
        Arc new_arc(ilabel, olabel, weight, next_state);
        if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
        ofst->AddArc(cur_state, new_arc);
        cur_state = next_state;
      }
      Label ilabel = (string_length <= 1 ? arc.ilabel : 0),
            olabel = (string_length > 0 ? arc.weight.String()[string_length - 1]
                                        : 0);
      Weight weight =
          (string_length <= 1 ? arc.weight.Weight() : Weight::One());
      Arc new_arc(ilabel, olabel, weight, arc.nextstate);
      if (invert) std::swap(new_arc.ilabel, new_arc.olabel);
      ofst->AddArc(cur_state, new_arc);
    }
  }
}

// This function converts lattices between float and double;
// it works for both CompactLatticeWeight and LatticeWeight.
template <class WeightIn, class WeightOut>
void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
                    MutableFst<ArcTpl<WeightOut> > *ofst) {
  typedef ArcTpl<WeightIn> ArcIn;
  typedef ArcTpl<WeightOut> ArcOut;
  typedef typename ArcIn::StateId StateId;
  ofst->DeleteStates();
  // The states will be numbered exactly the same as the original FST.
  // Add the states to the new FST.
  StateId num_states = ifst.NumStates();
  for (StateId s = 0; s < num_states; s++) {
    StateId news = ofst->AddState();
    assert(news == s);
  }
  ofst->SetStart(ifst.Start());
  for (StateId s = 0; s < num_states; s++) {
    WeightIn final_iweight = ifst.Final(s);
    if (final_iweight != WeightIn::Zero()) {
      WeightOut final_oweight;
      ConvertLatticeWeight(final_iweight, &final_oweight);
      ofst->SetFinal(s, final_oweight);
    }
    for (ArcIterator<ExpandedFst<ArcIn> > iter(ifst, s); !iter.Done();
         iter.Next()) {
      ArcIn arc = iter.Value();
      KALDI_PARANOID_ASSERT(arc.weight != WeightIn::Zero());
      ArcOut oarc;
      ConvertLatticeWeight(arc.weight, &oarc.weight);
      oarc.ilabel = arc.ilabel;
      oarc.olabel = arc.olabel;
      oarc.nextstate = arc.nextstate;
      ofst->AddArc(s, oarc);
    }
  }
}

template <class Weight, class ScaleFloat>
void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
                  MutableFst<ArcTpl<Weight> > *fst) {
  assert(scale.size() == 2 && scale[0].size() == 2 && scale[1].size() == 2);
  if (scale == DefaultLatticeScale())  // nothing to do.
    return;
  typedef ArcTpl<Weight> Arc;
  typedef MutableFst<Arc> Fst;
  typedef typename Arc::StateId StateId;
  StateId num_states = fst->NumStates();
  for (StateId s = 0; s < num_states; s++) {
    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
      Arc arc = aiter.Value();
      arc.weight = Weight(ScaleTupleWeight(arc.weight, scale));
      aiter.SetValue(arc);
    }
    Weight final_weight = fst->Final(s);
    if (final_weight != Weight::Zero())
      fst->SetFinal(s, Weight(ScaleTupleWeight(final_weight, scale)));
  }
}

template <class Weight, class Int>
void RemoveAlignmentsFromCompactLattice(
    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst) {
  typedef CompactLatticeWeightTpl<Weight, Int> W;
  typedef ArcTpl<W> Arc;
  typedef MutableFst<Arc> Fst;
  typedef typename Arc::StateId StateId;
  StateId num_states = fst->NumStates();
  for (StateId s = 0; s < num_states; s++) {
    for (MutableArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
      Arc arc = aiter.Value();
      arc.weight = W(arc.weight.Weight(), std::vector<Int>());
      aiter.SetValue(arc);
    }
    W final_weight = fst->Final(s);
    if (final_weight != W::Zero())
      fst->SetFinal(s, W(final_weight.Weight(), std::vector<Int>()));
  }
}

template <class Weight, class Int>
bool CompactLatticeHasAlignment(
    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst) {
  typedef CompactLatticeWeightTpl<Weight, Int> W;
  typedef ArcTpl<W> Arc;
  typedef ExpandedFst<Arc> Fst;
  typedef typename Arc::StateId StateId;
  StateId num_states = fst.NumStates();
  for (StateId s = 0; s < num_states; s++) {
    for (ArcIterator<Fst> aiter(fst, s); !aiter.Done(); aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (!arc.weight.String().empty()) return true;
    }
    W final_weight = fst.Final(s);
    if (!final_weight.String().empty()) return true;
  }
  return false;
}

template <class Real>
void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst) {
  int32 num_states_cache = 50000;
  fst::CacheOptions cache_opts(true, num_states_cache);
  fst::MapFstOptions mapfst_opts(cache_opts);
  StdToLatticeMapper<Real> mapper;
  MapFst<StdArc, ArcTpl<LatticeWeightTpl<Real> >, StdToLatticeMapper<Real> >
      map_fst(ifst, mapper, mapfst_opts);
  *ofst = map_fst;
}

}  // namespace fst

#endif  // KALDI_FSTEXT_LATTICE_UTILS_INL_H_


================================================
FILE: runtime/engine/kaldi/fstext/lattice-utils.h
================================================
// fstext/lattice-utils.h

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_LATTICE_UTILS_H_
#define KALDI_FSTEXT_LATTICE_UTILS_H_

#include <vector>

#include "fst/fstlib.h"
#include "fstext/lattice-weight.h"

namespace fst {

// The template ConvertLattice does conversions to and from
// LatticeWeight FSTs and CompactLatticeWeight FSTs, and
// between float and double, and to convert from LatticeWeight
// to TropicalWeight.  It's used in the I/O code for lattices,
// and for converting lattices to standard FSTs (e.g. for creating
// decoding graphs from lattices).

/**
   Convert lattice from a normal FST to a CompactLattice FST.
   This is a bit like converting to the Gallic semiring, except
   the semiring behaves in a different way (designed to take
   the best path).
   Note: the ilabels end up as the symbols on the arcs of the
   output acceptor, and the olabels go to the strings.  To make
   it the other way around (useful for the speech-recognition
   application), set invert=true [the default].
*/
template <class Weight, class Int>
void ConvertLattice(
    const ExpandedFst<ArcTpl<Weight> > &ifst,
    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *ofst,
    bool invert = true);

/**
   Convert lattice CompactLattice  format to Lattice.  This is a bit
   like converting from the Gallic semiring.  As for any CompactLattice, "ifst"
   must be an acceptor (i.e., ilabels and olabels should be identical).  If
   invert=false, the labels on "ifst" become the ilabels on "ofst" and the
   strings in the weights of "ifst" becomes the olabels.  If invert=true
   [default], this is reversed (useful for speech recognition lattices; our
   standard non-compact format has the words on the output side to match HCLG).
   */
template <class Weight, class Int>
void ConvertLattice(
    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &ifst,
    MutableFst<ArcTpl<Weight> > *ofst, bool invert = true);

/**
  Convert between CompactLattices and Lattices of different floating point
  types... this works between any pair of weight types for which
  ConvertLatticeWeight is defined (c.f. lattice-weight.h), and also includes
  conversion from LatticeWeight to TropicalWeight.
 */
template <class WeightIn, class WeightOut>
void ConvertLattice(const ExpandedFst<ArcTpl<WeightIn> > &ifst,
                    MutableFst<ArcTpl<WeightOut> > *ofst);

// Now define some ConvertLattice functions that require two phases of
// conversion (don't bother coding these separately as they will be used rarely.

// Lattice with float to CompactLattice with double.
template <class Int>
void ConvertLattice(
    const ExpandedFst<ArcTpl<LatticeWeightTpl<float> > > &ifst,
    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
        *ofst) {
  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
      fst;
  ConvertLattice(ifst, &fst);
  ConvertLattice(fst, ofst);
}

// Lattice with double to CompactLattice with float.
template <class Int>
void ConvertLattice(
    const ExpandedFst<ArcTpl<LatticeWeightTpl<double> > > &ifst,
    MutableFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
        *ofst) {
  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
      fst;
  ConvertLattice(ifst, &fst);
  ConvertLattice(fst, ofst);
}

/// Converts CompactLattice with double to Lattice with float.
template <class Int>
void ConvertLattice(
    const ExpandedFst<
        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > > &ifst,
    MutableFst<ArcTpl<LatticeWeightTpl<float> > > *ofst) {
  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > >
      fst;
  ConvertLattice(ifst, &fst);
  ConvertLattice(fst, ofst);
}

/// Converts CompactLattice with float to Lattice with double.
template <class Int>
void ConvertLattice(
    const ExpandedFst<
        ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<float>, Int> > > &ifst,
    MutableFst<ArcTpl<LatticeWeightTpl<double> > > *ofst) {
  VectorFst<ArcTpl<CompactLatticeWeightTpl<LatticeWeightTpl<double>, Int> > >
      fst;
  ConvertLattice(ifst, &fst);
  ConvertLattice(fst, ofst);
}

/// Converts TropicalWeight to LatticeWeight (puts all the weight on
/// the first float in the lattice's pair).
template <class Real>
void ConvertFstToLattice(const ExpandedFst<ArcTpl<TropicalWeight> > &ifst,
                         MutableFst<ArcTpl<LatticeWeightTpl<Real> > > *ofst);

/** Returns a default 2x2 matrix scaling factor for LatticeWeight */
inline std::vector<std::vector<double> > DefaultLatticeScale() {
  std::vector<std::vector<double> > ans(2);
  ans[0].resize(2, 0.0);
  ans[1].resize(2, 0.0);
  ans[0][0] = ans[1][1] = 1.0;
  return ans;
}

inline std::vector<std::vector<double> > AcousticLatticeScale(double acwt) {
  std::vector<std::vector<double> > ans(2);
  ans[0].resize(2, 0.0);
  ans[1].resize(2, 0.0);
  ans[0][0] = 1.0;
  ans[1][1] = acwt;
  return ans;
}

inline std::vector<std::vector<double> > GraphLatticeScale(double lmwt) {
  std::vector<std::vector<double> > ans(2);
  ans[0].resize(2, 0.0);
  ans[1].resize(2, 0.0);
  ans[0][0] = lmwt;
  ans[1][1] = 1.0;
  return ans;
}

inline std::vector<std::vector<double> > LatticeScale(double lmwt,
                                                      double acwt) {
  std::vector<std::vector<double> > ans(2);
  ans[0].resize(2, 0.0);
  ans[1].resize(2, 0.0);
  ans[0][0] = lmwt;
  ans[1][1] = acwt;
  return ans;
}

/** Scales the pairs of weights in LatticeWeight or CompactLatticeWeight by
    viewing the pair (a, b) as a 2-vector and pre-multiplying by the 2x2 matrix
    in "scale".  E.g. typically scale would equal
     [ 1   0;
       0  acwt ]
    if we want to scale the acoustics by "acwt".
 */
template <class Weight, class ScaleFloat>
void ScaleLattice(const std::vector<std::vector<ScaleFloat> > &scale,
                  MutableFst<ArcTpl<Weight> > *fst);

/// Removes state-level alignments (the strings that are
/// part of the weights).
template <class Weight, class Int>
void RemoveAlignmentsFromCompactLattice(
    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);

/// Returns true if lattice has alignments, i.e. it has
/// any nonempty strings inside its weights.
template <class Weight, class Int>
bool CompactLatticeHasAlignment(
    const ExpandedFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > &fst);

/// Class StdToLatticeMapper maps a normal arc (StdArc)
/// to a LatticeArc by putting the StdArc weight as the first
/// element of the LatticeWeight.  Useful when doing LM
/// rescoring.
template <class Real>
class StdToLatticeMapper {
  typedef LatticeWeightTpl<Real> LatticeWeight;
  typedef ArcTpl<LatticeWeight> LatticeArc;

 public:
  LatticeArc operator()(const StdArc &arc) {
    // Note: we have to check whether the arc's weight is zero below,
    // and if so return (infinity, infinity) and not (infinity, zero),
    // because (infinity, zero) is not a valid LatticeWeight, which should
    // either be both finite, or both infinite (i.e. Zero()).
    return LatticeArc(
        arc.ilabel, arc.olabel,
        LatticeWeight(arc.weight.Value(), arc.weight == StdArc::Weight::Zero()
                                              ? arc.weight.Value()
                                              : 0.0),
        arc.nextstate);
  }
  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }

  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }

  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }

  // I believe all properties are preserved.
  uint64 Properties(uint64 props) { return props; }
};

/// Class LatticeToStdMapper maps a LatticeArc to a normal arc (StdArc)
/// by adding the elements of the LatticeArc weight.

template <class Real>
class LatticeToStdMapper {
  typedef LatticeWeightTpl<Real> LatticeWeight;
  typedef ArcTpl<LatticeWeight> LatticeArc;

 public:
  StdArc operator()(const LatticeArc &arc) {
    return StdArc(arc.ilabel, arc.olabel,
                  StdArc::Weight(arc.weight.Value1() + arc.weight.Value2()),
                  arc.nextstate);
  }
  MapFinalAction FinalAction() { return MAP_NO_SUPERFINAL; }

  MapSymbolsAction InputSymbolsAction() { return MAP_COPY_SYMBOLS; }

  MapSymbolsAction OutputSymbolsAction() { return MAP_COPY_SYMBOLS; }

  // I believe all properties are preserved.
  uint64 Properties(uint64 props) { return props; }
};

template <class Weight, class Int>
void PruneCompactLattice(
    Weight beam,
    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, Int> > > *fst);

}  // end namespace fst

#include "fstext/lattice-utils-inl.h"

#endif  // KALDI_FSTEXT_LATTICE_UTILS_H_


================================================
FILE: runtime/engine/kaldi/fstext/lattice-weight.h
================================================
// fstext/lattice-weight.h
// Copyright 2009-2012  Microsoft Corporation
//                      Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_LATTICE_WEIGHT_H_
#define KALDI_FSTEXT_LATTICE_WEIGHT_H_

#include <algorithm>
#include <limits>
#include <string>
#include <vector>

#include "base/kaldi-common.h"
#include "fst/fstlib.h"

namespace fst {

// Declare weight type for lattice... will import to namespace kaldi.  has two
// members, value1_ and value2_, of type BaseFloat (normally equals float).  It
// is basically the same as the tropical semiring on value1_+value2_, except it
// keeps track of a and b separately.  More precisely, it is equivalent to the
// lexicographic semiring on (value1_+value2_), (value1_-value2_)

template <class FloatType>
class LatticeWeightTpl;

template <class FloatType>
inline std::ostream &operator<<(std::ostream &strm,
                                const LatticeWeightTpl<FloatType> &w);

template <class FloatType>
inline std::istream &operator>>(std::istream &strm,
                                LatticeWeightTpl<FloatType> &w);

template <class FloatType>
class LatticeWeightTpl {
 public:
  typedef FloatType T;  // normally float.
  typedef LatticeWeightTpl ReverseWeight;

  inline T Value1() const { return value1_; }

  inline T Value2() const { return value2_; }

  inline void SetValue1(T f) { value1_ = f; }

  inline void SetValue2(T f) { value2_ = f; }

  LatticeWeightTpl() : value1_{}, value2_{} {}

  LatticeWeightTpl(T a, T b) : value1_(a), value2_(b) {}

  LatticeWeightTpl(const LatticeWeightTpl &other)
      : value1_(other.value1_), value2_(other.value2_) {}

  LatticeWeightTpl &operator=(const LatticeWeightTpl &w) {
    value1_ = w.value1_;
    value2_ = w.value2_;
    return *this;
  }

  LatticeWeightTpl<FloatType> Reverse() const { return *this; }

  static const LatticeWeightTpl Zero() {
    return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
                            std::numeric_limits<T>::infinity());
  }

  static const LatticeWeightTpl One() { return LatticeWeightTpl(0.0, 0.0); }

  static const std::string &Type() {
    static const std::string type = (sizeof(T) == 4 ? "lattice4" : "lattice8");
    return type;
  }

  static const LatticeWeightTpl NoWeight() {
    return LatticeWeightTpl(std::numeric_limits<FloatType>::quiet_NaN(),
                            std::numeric_limits<FloatType>::quiet_NaN());
  }

  bool Member() const {
    // value1_ == value1_ tests for NaN.
    // also test for no -inf, and either both or neither
    // must be +inf, and
    if (value1_ != value1_ || value2_ != value2_) return false;  // NaN
    if (value1_ == -std::numeric_limits<T>::infinity() ||
        value2_ == -std::numeric_limits<T>::infinity())
      return false;  // -infty not allowed
    if (value1_ == std::numeric_limits<T>::infinity() ||
        value2_ == std::numeric_limits<T>::infinity()) {
      if (value1_ != std::numeric_limits<T>::infinity() ||
          value2_ != std::numeric_limits<T>::infinity())
        return false;  // both must be +infty;
      // this is necessary so that the semiring has only one zero.
    }
    return true;
  }

  LatticeWeightTpl Quantize(float delta = kDelta) const {
    if (value1_ + value2_ == -std::numeric_limits<T>::infinity()) {
      return LatticeWeightTpl(-std::numeric_limits<T>::infinity(),
                              -std::numeric_limits<T>::infinity());
    } else if (value1_ + value2_ == std::numeric_limits<T>::infinity()) {
      return LatticeWeightTpl(std::numeric_limits<T>::infinity(),
                              std::numeric_limits<T>::infinity());
    } else if (value1_ + value2_ != value1_ + value2_) {  // NaN
      return LatticeWeightTpl(value1_ + value2_, value1_ + value2_);
    } else {
      return LatticeWeightTpl(floor(value1_ / delta + 0.5F) * delta,
                              floor(value2_ / delta + 0.5F) * delta);
    }
  }
  static constexpr uint64 Properties() {
    return kLeftSemiring | kRightSemiring | kCommutative | kPath | kIdempotent;
  }

  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
  // not Kaldi-style, I/O.
  std::istream &Read(std::istream &strm) {
    // Always read/write as float, even if T is double,
    // so we can use OpenFst-style read/write and still maintain
    // compatibility when compiling with different FloatTypes
    ReadType(strm, &value1_);
    ReadType(strm, &value2_);
    return strm;
  }

  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
  // not Kaldi-style, I/O.
  std::ostream &Write(std::ostream &strm) const {
    WriteType(strm, value1_);
    WriteType(strm, value2_);
    return strm;
  }

  size_t Hash() const {
    size_t ans;
    union {
      T f;
      size_t s;
    } u;
    u.s = 0;
    u.f = value1_;
    ans = u.s;
    u.f = value2_;
    ans += u.s;
    return ans;
  }

 protected:
  inline static void WriteFloatType(std::ostream &strm, const T &f) {
    if (f == std::numeric_limits<T>::infinity())
      strm << "Infinity";
    else if (f == -std::numeric_limits<T>::infinity())
      strm << "-Infinity";
    else if (f != f)
      strm << "BadNumber";
    else
      strm << f;
  }

  // Internal helper function, used in ReadNoParen.
  inline static void ReadFloatType(std::istream &strm, T &f) {  // NOLINT
    std::string s;
    strm >> s;
    if (s == "Infinity") {
      f = std::numeric_limits<T>::infinity();
    } else if (s == "-Infinity") {
      f = -std::numeric_limits<T>::infinity();
    } else if (s == "BadNumber") {
      f = std::numeric_limits<T>::quiet_NaN();
    } else {
      char *p;
      f = strtod(s.c_str(), &p);
      if (p < s.c_str() + s.size()) strm.clear(std::ios::badbit);
    }
  }

  // Reads LatticeWeight when there are no parentheses around pair terms...
  // currently the only form supported.
  inline std::istream &ReadNoParen(std::istream &strm, char separator) {
    int c;
    do {
      c = strm.get();
    } while (isspace(c));

    std::string s1;
    while (c != separator) {
      if (c == EOF) {
        strm.clear(std::ios::badbit);
        return strm;
      }
      s1 += c;
      c = strm.get();
    }
    std::istringstream strm1(s1);
    ReadFloatType(strm1, value1_);  // ReadFloatType is class member function
    // read second element
    ReadFloatType(strm, value2_);
    return strm;
  }

  friend std::istream &operator>>
      <FloatType>(std::istream &, LatticeWeightTpl<FloatType> &);
  friend std::ostream &operator<<<FloatType>(
      std::ostream &, const LatticeWeightTpl<FloatType> &);

 private:
  T value1_;
  T value2_;
};

/* ScaleTupleWeight is a function defined for LatticeWeightTpl and
   CompactLatticeWeightTpl that mutliplies the pair (value1_, value2_) by a 2x2
   matrix.  Used, for example, in applying acoustic scaling.
 */
template <class FloatType, class ScaleFloatType>
inline LatticeWeightTpl<FloatType> ScaleTupleWeight(
    const LatticeWeightTpl<FloatType> &w,
    const std::vector<std::vector<ScaleFloatType> > &scale) {
  // Without the next special case we'd get NaNs from infinity * 0
  if (w.Value1() == std::numeric_limits<FloatType>::infinity())
    return LatticeWeightTpl<FloatType>::Zero();
  return LatticeWeightTpl<FloatType>(
      scale[0][0] * w.Value1() + scale[0][1] * w.Value2(),
      scale[1][0] * w.Value1() + scale[1][1] * w.Value2());
}

/* For testing purposes and in case it's ever useful, we define a similar
   function to apply to LexicographicWeight and the like, templated on
   TropicalWeight<float> etc.; we use PairWeight which is the base class of
   LexicographicWeight.
*/
template <class FloatType, class ScaleFloatType>
inline PairWeight<TropicalWeightTpl<FloatType>, TropicalWeightTpl<FloatType> >
ScaleTupleWeight(const PairWeight<TropicalWeightTpl<FloatType>,
                                  TropicalWeightTpl<FloatType> > &w,
                 const std::vector<std::vector<ScaleFloatType> > &scale) {
  typedef TropicalWeightTpl<FloatType> BaseType;
  typedef PairWeight<BaseType, BaseType> PairType;
  const BaseType zero = BaseType::Zero();
  // Without the next special case we'd get NaNs from infinity * 0
  if (w.Value1() == zero || w.Value2() == zero) return PairType(zero, zero);
  FloatType f1 = w.Value1().Value(), f2 = w.Value2().Value();
  return PairType(BaseType(scale[0][0] * f1 + scale[0][1] * f2),
                  BaseType(scale[1][0] * f1 + scale[1][1] * f2));
}

template <class FloatType>
inline bool operator==(const LatticeWeightTpl<FloatType> &wa,
                       const LatticeWeightTpl<FloatType> &wb) {
  // Volatile qualifier thwarts over-aggressive compiler optimizations
  // that lead to problems esp. with NaturalLess().
  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
                     vb2 = wb.Value2();
  return (va1 == vb1 && va2 == vb2);
}

template <class FloatType>
inline bool operator!=(const LatticeWeightTpl<FloatType> &wa,
                       const LatticeWeightTpl<FloatType> &wb) {
  // Volatile qualifier thwarts over-aggressive compiler optimizations
  // that lead to problems esp. with NaturalLess().
  volatile FloatType va1 = wa.Value1(), va2 = wa.Value2(), vb1 = wb.Value1(),
                     vb2 = wb.Value2();
  return (va1 != vb1 || va2 != vb2);
}

// We define a Compare function LatticeWeightTpl even though it's
// not required by the semiring standard-- it's just more efficient
// to do it this way rather than using the NaturalLess template.

/// Compare returns -1 if w1 < w2, +1 if w1 > w2, and 0 if w1 == w2.

template <class FloatType>
inline int Compare(const LatticeWeightTpl<FloatType> &w1,
                   const LatticeWeightTpl<FloatType> &w2) {
  FloatType f1 = w1.Value1() + w1.Value2(), f2 = w2.Value1() + w2.Value2();
  if (f1 < f2) {  // having smaller cost means you're larger
    return 1;
  } else if (f1 > f2) {  // in the semiring [higher probability]
    return -1;
  } else if (w1.Value1() < w2.Value1()) {
  // mathematically we should be comparing (w1.value1_-w1.value2_ <
  // w2.value1_-w2.value2_) in the next line, but add w1.value1_+w1.value2_ =
  // w2.value1_+w2.value2_ to both sides and divide by two, and we get the
  // simpler equivalent form w1.value1_ < w2.value1_.
    return 1;
  } else if (w1.Value1() > w2.Value1()) {
    return -1;
  } else {
    return 0;
  }
}

template <class FloatType>
inline LatticeWeightTpl<FloatType> Plus(const LatticeWeightTpl<FloatType> &w1,
                                        const LatticeWeightTpl<FloatType> &w2) {
  return (Compare(w1, w2) >= 0 ? w1 : w2);
}

// For efficiency, override the NaturalLess template class.
template <class FloatType>
class NaturalLess<LatticeWeightTpl<FloatType> > {
 public:
  typedef LatticeWeightTpl<FloatType> Weight;

  NaturalLess() {}

  bool operator()(const Weight &w1, const Weight &w2) const {
    // NaturalLess is a negative order (opposite to normal ordering).
    // This operator () corresponds to "<" in the negative order, which
    // corresponds to the ">" in the normal order.
    return (Compare(w1, w2) == 1);
  }
};
template <>
class NaturalLess<LatticeWeightTpl<float> > {
 public:
  typedef LatticeWeightTpl<float> Weight;

  NaturalLess() {}

  bool operator()(const Weight &w1, const Weight &w2) const {
    // NaturalLess is a negative order (opposite to normal ordering).
    // This operator () corresponds to "<" in the negative order, which
    // corresponds to the ">" in the normal order.
    return (Compare(w1, w2) == 1);
  }
};
template <>
class NaturalLess<LatticeWeightTpl<double> > {
 public:
  typedef LatticeWeightTpl<double> Weight;

  NaturalLess() {}

  bool operator()(const Weight &w1, const Weight &w2) const {
    // NaturalLess is a negative order (opposite to normal ordering).
    // This operator () corresponds to "<" in the negative order, which
    // corresponds to the ">" in the normal order.
    return (Compare(w1, w2) == 1);
  }
};

template <class FloatType>
inline LatticeWeightTpl<FloatType> Times(
    const LatticeWeightTpl<FloatType> &w1,
    const LatticeWeightTpl<FloatType> &w2) {
  return LatticeWeightTpl<FloatType>(w1.Value1() + w2.Value1(),
                                     w1.Value2() + w2.Value2());
}

// divide w1 by w2 (on left/right/any doesn't matter as
// commutative).
template <class FloatType>
inline LatticeWeightTpl<FloatType> Divide(const LatticeWeightTpl<FloatType> &w1,
                                          const LatticeWeightTpl<FloatType> &w2,
                                          DivideType typ = DIVIDE_ANY) {
  typedef FloatType T;
  T a = w1.Value1() - w2.Value1(), b = w1.Value2() - w2.Value2();
  if (a != a || b != b || a == -std::numeric_limits<T>::infinity() ||
      b == -std::numeric_limits<T>::infinity()) {
    KALDI_WARN << "LatticeWeightTpl::Divide, NaN or invalid number produced. "
               << "[dividing by zero?]  Returning zero";
    return LatticeWeightTpl<T>::Zero();
  }
  if (a == std::numeric_limits<T>::infinity() ||
      b == std::numeric_limits<T>::infinity())
    return LatticeWeightTpl<T>::Zero();  // not a valid number if only one is
                                         // infinite.
  return LatticeWeightTpl<T>(a, b);
}

template <class FloatType>
inline bool ApproxEqual(const LatticeWeightTpl<FloatType> &w1,
                        const LatticeWeightTpl<FloatType> &w2,
                        float delta = kDelta) {
  if (w1.Value1() == w2.Value1() && w1.Value2() == w2.Value2())
    return true;  // handles Zero().
  return (fabs((w1.Value1() + w1.Value2()) - (w2.Value1() + w2.Value2())) <=
          delta);
}

template <class FloatType>
inline std::ostream &operator<<(std::ostream &strm,
                                const LatticeWeightTpl<FloatType> &w) {
  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value1());
  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
  strm << FLAGS_fst_weight_separator[0];  // comma by default;
  // may or may not be settable from Kaldi programs.
  LatticeWeightTpl<FloatType>::WriteFloatType(strm, w.Value2());
  return strm;
}

template <class FloatType>
inline std::istream &operator>>(std::istream &strm,
                                LatticeWeightTpl<FloatType> &w1) {
  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
  // separator defaults to ','
  return w1.ReadNoParen(strm, FLAGS_fst_weight_separator[0]);
}

// CompactLattice will be an acceptor (accepting the words/output-symbols),
// with the weights and input-symbol-seqs on the arcs.
// There must be a total order on W.  We assume for the sake of efficiency
// that there is a function
// Compare(W w1, W w2) that returns -1 if w1 < w2, +1 if w1 > w2, and
// zero if w1 == w2, and Plus for type W returns (Compare(w1,w2) >= 0 ? w1 :
// w2).

template <class WeightType, class IntType>
class CompactLatticeWeightTpl {
 public:
  typedef WeightType W;

  typedef CompactLatticeWeightTpl<WeightType, IntType> ReverseWeight;

  // Plus is like LexicographicWeight on the pair (weight_, string_), but where
  // we use standard lexicographic order on string_ [this is not the same as
  // NaturalLess on the StringWeight equivalent, which does not define a
  // total order].
  // Times, Divide obvious... (support both left & right division..)
  // CommonDivisor would need to be coded separately.

  CompactLatticeWeightTpl() {}

  CompactLatticeWeightTpl(const WeightType &w, const std::vector<IntType> &s)
      : weight_(w), string_(s) {}

  CompactLatticeWeightTpl &operator=(
      const CompactLatticeWeightTpl<WeightType, IntType> &w) {
    weight_ = w.weight_;
    string_ = w.string_;
    return *this;
  }

  const W &Weight() const { return weight_; }

  const std::vector<IntType> &String() const { return string_; }

  void SetWeight(const W &w) { weight_ = w; }

  void SetString(const std::vector<IntType> &s) { string_ = s; }

  static const CompactLatticeWeightTpl<WeightType, IntType> Zero() {
    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::Zero(),
                                                        std::vector<IntType>());
  }

  static const CompactLatticeWeightTpl<WeightType, IntType> One() {
    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::One(),
                                                        std::vector<IntType>());
  }

  inline static std::string GetIntSizeString() {
    char buf[2];
    buf[0] = '0' + sizeof(IntType);
    buf[1] = '\0';
    return buf;
  }
  static const std::string &Type() {
    static const std::string type =
        "compact" + WeightType::Type() + GetIntSizeString();
    return type;
  }

  static const CompactLatticeWeightTpl<WeightType, IntType> NoWeight() {
    return CompactLatticeWeightTpl<WeightType, IntType>(WeightType::NoWeight(),
                                                        std::vector<IntType>());
  }

  CompactLatticeWeightTpl<WeightType, IntType> Reverse() const {
    size_t s = string_.size();
    std::vector<IntType> v(s);
    for (size_t i = 0; i < s; i++) v[i] = string_[s - i - 1];
    return CompactLatticeWeightTpl<WeightType, IntType>(weight_, v);
  }

  bool Member() const {
    // a semiring has only one zero, this is the important property
    // we're trying to maintain here.  So force string_ to be empty if
    // w_ == zero.
    if (!weight_.Member()) return false;
    if (weight_ == WeightType::Zero())
      return string_.empty();
    else
      return true;
  }

  CompactLatticeWeightTpl Quantize(float delta = kDelta) const {
    return CompactLatticeWeightTpl(weight_.Quantize(delta), string_);
  }

  static constexpr uint64 Properties() {
    return kLeftSemiring | kRightSemiring | kPath | kIdempotent;
  }

  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
  // not Kaldi-style, I/O.
  std::istream &Read(std::istream &strm) {
    weight_.Read(strm);
    if (strm.fail()) {
      return strm;
    }
    int32 sz;
    ReadType(strm, &sz);
    if (strm.fail()) {
      return strm;
    }
    if (sz < 0) {
      KALDI_WARN << "Negative string size!  Read failure";
      strm.clear(std::ios::badbit);
      return strm;
    }
    string_.resize(sz);
    for (int32 i = 0; i < sz; i++) {
      ReadType(strm, &(string_[i]));
    }
    return strm;
  }

  // This is used in OpenFst for binary I/O.  This is OpenFst-style,
  // not Kaldi-style, I/O.
  std::ostream &Write(std::ostream &strm) const {
    weight_.Write(strm);
    if (strm.fail()) {
      return strm;
    }
    int32 sz = static_cast<int32>(string_.size());
    WriteType(strm, sz);
    for (int32 i = 0; i < sz; i++) WriteType(strm, string_[i]);
    return strm;
  }
  size_t Hash() const {
    size_t ans = weight_.Hash();
    // any weird numbers here are largish primes
    size_t sz = string_.size(), mult = 6967;
    for (size_t i = 0; i < sz; i++) {
      ans += string_[i] * mult;
      mult *= 7499;
    }
    return ans;
  }

 private:
  W weight_;
  std::vector<IntType> string_;
};

template <class WeightType, class IntType>
inline bool operator==(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
  return (w1.Weight() == w2.Weight() && w1.String() == w2.String());
}

template <class WeightType, class IntType>
inline bool operator!=(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
                       const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
  return (w1.Weight() != w2.Weight() || w1.String() != w2.String());
}

template <class WeightType, class IntType>
inline bool ApproxEqual(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
                        const CompactLatticeWeightTpl<WeightType, IntType> &w2,
                        float delta = kDelta) {
  return (ApproxEqual(w1.Weight(), w2.Weight(), delta) &&
          w1.String() == w2.String());
}

// Compare is not part of the standard for weight types, but used internally for
// efficiency.  The comparison here first compares the weight; if this is the
// same, it compares the string.  The comparison on strings is: first compare
// the length, if this is the same, use lexicographical order.  We can't just
// use the lexicographical order because this would destroy the distributive
// property of multiplication over addition, taking into account that addition
// uses Compare.  The string element of "Compare" isn't super-important in
// practical terms; it's only needed to ensure that Plus always give consistent
// answers and is symmetric.  It's essentially for tie-breaking, but we need to
// make sure all the semiring axioms are satisfied otherwise OpenFst might
// break.

template <class WeightType, class IntType>
inline int Compare(const CompactLatticeWeightTpl<WeightType, IntType> &w1,
                   const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
  int c1 = Compare(w1.Weight(), w2.Weight());
  if (c1 != 0) return c1;
  int l1 = w1.String().size(), l2 = w2.String().size();
  // Use opposite order on the string lengths, so that if the costs are the
  // same, the shorter string wins.
  if (l1 > l2)
    return -1;
  else if (l1 < l2)
    return 1;
  for (int i = 0; i < l1; i++) {
    if (w1.String()[i] < w2.String()[i])
      return -1;
    else if (w1.String()[i] > w2.String()[i])
      return 1;
  }
  return 0;
}

// For efficiency, override the NaturalLess template class.
template <class FloatType, class IntType>
class NaturalLess<
    CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> > {
 public:
  typedef CompactLatticeWeightTpl<LatticeWeightTpl<FloatType>, IntType> Weight;

  NaturalLess() {}

  bool operator()(const Weight &w1, const Weight &w2) const {
    // NaturalLess is a negative order (opposite to normal ordering).
    // This operator () corresponds to "<" in the negative order, which
    // corresponds to the ">" in the normal order.
    return (Compare(w1, w2) == 1);
  }
};
template <>
class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> > {
 public:
  typedef CompactLatticeWeightTpl<LatticeWeightTpl<float>, int32> Weight;

  NaturalLess() {}

  bool operator()(const Weight &w1, const Weight &w2) const {
    // NaturalLess is a negative order (opposite to normal ordering).
    // This operator () corresponds to "<" in the negative order, which
    // corresponds to the ">" in the normal order.
    return (Compare(w1, w2) == 1);
  }
};
template <>
class NaturalLess<CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> > {
 public:
  typedef CompactLatticeWeightTpl<LatticeWeightTpl<double>, int32> Weight;

  NaturalLess() {}

  bool operator()(const Weight &w1, const Weight &w2) const {
    // NaturalLess is a negative order (opposite to normal ordering).
    // This operator () corresponds to "<" in the negative order, which
    // corresponds to the ">" in the normal order.
    return (Compare(w1, w2) == 1);
  }
};

// Make sure Compare is defined for TropicalWeight, so everything works
// if we substitute LatticeWeight for TropicalWeight.
inline int Compare(const TropicalWeight &w1, const TropicalWeight &w2) {
  float f1 = w1.Value(), f2 = w2.Value();
  if (f1 == f2)
    return 0;
  else if (f1 > f2)
    return -1;
  else
    return 1;
}

template <class WeightType, class IntType>
inline CompactLatticeWeightTpl<WeightType, IntType> Plus(
    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
  return (Compare(w1, w2) >= 0 ? w1 : w2);
}

template <class WeightType, class IntType>
inline CompactLatticeWeightTpl<WeightType, IntType> Times(
    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
    const CompactLatticeWeightTpl<WeightType, IntType> &w2) {
  WeightType w = Times(w1.Weight(), w2.Weight());
  if (w == WeightType::Zero()) {
    return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
    // special case to ensure zero is unique
  } else {
    std::vector<IntType> v;
    v.resize(w1.String().size() + w2.String().size());
    typename std::vector<IntType>::iterator iter = v.begin();
    iter = std::copy(w1.String().begin(), w1.String().end(),
                     iter);  // returns end of first range.
    std::copy(w2.String().begin(), w2.String().end(), iter);
    return CompactLatticeWeightTpl<WeightType, IntType>(w, v);
  }
}

template <class WeightType, class IntType>
inline CompactLatticeWeightTpl<WeightType, IntType> Divide(
    const CompactLatticeWeightTpl<WeightType, IntType> &w1,
    const CompactLatticeWeightTpl<WeightType, IntType> &w2,
    DivideType div = DIVIDE_ANY) {
  if (w1.Weight() == WeightType::Zero()) {
    if (w2.Weight() != WeightType::Zero()) {
      return CompactLatticeWeightTpl<WeightType, IntType>::Zero();
    } else {
      KALDI_ERR << "Division by zero [0/0]";
    }
  } else if (w2.Weight() == WeightType::Zero()) {
    KALDI_ERR << "Error: division by zero";
  }
  WeightType w = Divide(w1.Weight(), w2.Weight());

  const std::vector<IntType> v1 = w1.String(), v2 = w2.String();
  if (v2.size() > v1.size()) {
    KALDI_ERR << "Cannot divide, length mismatch";
  }
  typename std::vector<IntType>::const_iterator v1b = v1.begin(),
                                                v1e = v1.end(),
                                                v2b = v2.begin(),
                                                v2e = v2.end();
  if (div == DIVIDE_LEFT) {
    if (!std::equal(v2b, v2e,
                    v1b)) {  // v2 must be identical to first part of v1.
      KALDI_ERR << "Cannot divide, data mismatch";
    }
    return CompactLatticeWeightTpl<WeightType, IntType>(
        w, std::vector<IntType>(v1b + (v2e - v2b),
                                v1e));  // return last part of v1.
  } else if (div == DIVIDE_RIGHT) {
    if (!std::equal(
            v2b, v2e,
            v1e - (v2e - v2b))) {  // v2 must be identical to last part of v1.
      KALDI_ERR << "Cannot divide, data mismatch";
    }
    return CompactLatticeWeightTpl<WeightType, IntType>(
        w, std::vector<IntType>(
               v1b, v1e - (v2e - v2b)));  // return first part of v1.

  } else {
    KALDI_ERR << "Cannot divide CompactLatticeWeightTpl with DIVIDE_ANY";
  }
  return CompactLatticeWeightTpl<WeightType,
                                 IntType>::Zero();  // keep compiler happy.
}

template <class WeightType, class IntType>
inline std::ostream &operator<<(
    std::ostream &strm, const CompactLatticeWeightTpl<WeightType, IntType> &w) {
  strm << w.Weight();
  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
  strm << FLAGS_fst_weight_separator[0];  // comma by default.
  for (size_t i = 0; i < w.String().size(); i++) {
    strm << w.String()[i];
    if (i + 1 < w.String().size())
      strm << kStringSeparator;  // '_'; defined in string-weight.h in OpenFst
                                 // code.
  }
  return strm;
}

template <class WeightType, class IntType>
inline std::istream &operator>>(
    std::istream &strm, CompactLatticeWeightTpl<WeightType, IntType> &w) {
  std::string s;
  strm >> s;
  if (strm.fail()) {
    return strm;
  }
  CHECK(FLAGS_fst_weight_separator.size() == 1);  // NOLINT
  size_t pos = s.find_last_of(FLAGS_fst_weight_separator);  // normally ","
  if (pos == std::string::npos) {
    strm.clear(std::ios::badbit);
    return strm;
  }
  // get parts of str before and after the separator (default: ',');
  std::string s1(s, 0, pos), s2(s, pos + 1);
  std::istringstream strm1(s1);
  WeightType weight;
  strm1 >> weight;
  w.SetWeight(weight);
  if (strm1.fail() || !strm1.eof()) {
    strm.clear(std::ios::badbit);
    return strm;
  }
  // read string part.
  std::vector<IntType> string;
  const char *c = s2.c_str();
  while (*c != '\0') {
    if (*c == kStringSeparator)  // '_'
      c++;
    char *c2;
    int64_t i = strtol(c, &c2, 10);
    if (c2 == c || static_cast<int64_t>(static_cast<IntType>(i)) != i) {
      strm.clear(std::ios::badbit);
      return strm;
    }
    c = c2;
    string.push_back(static_cast<IntType>(i));
  }
  w.SetString(string);
  return strm;
}

template <class BaseWeightType, class IntType>
class CompactLatticeWeightCommonDivisorTpl {
 public:
  typedef CompactLatticeWeightTpl<BaseWeightType, IntType> Weight;

  Weight operator()(const Weight &w1, const Weight &w2) const {
    // First find longest common prefix of the strings.
    typename std::vector<IntType>::const_iterator s1b = w1.String().begin(),
                                                  s1e = w1.String().end(),
                                                  s2b = w2.String().begin(),
                                                  s2e = w2.String().end();
    while (s1b < s1e && s2b < s2e && *s1b == *s2b) {
      s1b++;
      s2b++;
    }
    return Weight(Plus(w1.Weight(), w2.Weight()),
                  std::vector<IntType>(w1.String().begin(), s1b));
  }
};

/** Scales the pair (a, b) of floating-point weights inside a
    CompactLatticeWeight by premultiplying it (viewed as a vector)
    by a 2x2 matrix "scale".
    Assumes there is a ScaleTupleWeight function that applies to "Weight";
    this currently only works if Weight equals LatticeWeightTpl<FloatType>
    for some FloatType.
*/
template <class Weight, class IntType, class ScaleFloatType>
inline CompactLatticeWeightTpl<Weight, IntType> ScaleTupleWeight(
    const CompactLatticeWeightTpl<Weight, IntType> &w,
    const std::vector<std::vector<ScaleFloatType> > &scale) {
  return CompactLatticeWeightTpl<Weight, IntType>(
      Weight(ScaleTupleWeight(w.Weight(), scale)), w.String());
}

/** Define some ConvertLatticeWeight functions that are used in various lattice
    conversions... make them all templates, some with no arguments, since some
    must be templates.*/
template <class Float1, class Float2>
inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
                                 LatticeWeightTpl<Float2> *w_out) {
  w_out->SetValue1(w_in.Value1());
  w_out->SetValue2(w_in.Value2());
}

template <class Float1, class Float2, class Int>
inline void ConvertLatticeWeight(
    const CompactLatticeWeightTpl<LatticeWeightTpl<Float1>, Int> &w_in,
    CompactLatticeWeightTpl<LatticeWeightTpl<Float2>, Int> *w_out) {
  LatticeWeightTpl<Float2> weight2(w_in.Weight().Value1(),
                                   w_in.Weight().Value2());
  w_out->SetWeight(weight2);
  w_out->SetString(w_in.String());
}

// to convert from Lattice to standard FST
template <class Float1, class Float2>
inline void ConvertLatticeWeight(const LatticeWeightTpl<Float1> &w_in,
                                 TropicalWeightTpl<Float2> *w_out) {
  TropicalWeightTpl<Float2> w1(w_in.Value1());
  TropicalWeightTpl<Float2> w2(w_in.Value2());
  *w_out = Times(w1, w2);
}

template <class Float>
inline double ConvertToCost(const LatticeWeightTpl<Float> &w) {
  return static_cast<double>(w.Value1()) + static_cast<double>(w.Value2());
}

template <class Float, class Int>
inline double ConvertToCost(
    const CompactLatticeWeightTpl<LatticeWeightTpl<Float>, Int> &w) {
  return static_cast<double>(w.Weight().Value1()) +
         static_cast<double>(w.Weight().Value2());
}

template <class Float>
inline double ConvertToCost(const TropicalWeightTpl<Float> &w) {
  return w.Value();
}

}  // namespace fst

#endif  // KALDI_FSTEXT_LATTICE_WEIGHT_H_


================================================
FILE: runtime/engine/kaldi/fstext/pre-determinize-inl.h
================================================
// fstext/pre-determinize-inl.h

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_
#define KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_

#include <algorithm>
#include <map>
#include <set>
#include <string>
#include <utility>
#include <vector>

/* Do not include this file directly.  It is an implementation file included by
 * PreDeterminize.h */

/*
  Predeterminization

    This is a function that makes an FST compactly determinizable by inserting
  symbols on the input side as necessary for disambiguation.  Note that we do
  not treat epsilon as a real symbol when measuring determinizability in this
  sense.   The extra symbols are added to the vocabulary, on the input side;
  these are of the form (prefix)1, (prefix)2, and so on without limit, where
  (prefix) is some prefix the user provides, e.g. '#' (the function checks that
  this will not lead to conflicts with symbols already in the FST).  The
  function tells us how many such symbols it created.

   Note that there is a paper "Generalized optimization algorithm for speech
  recognition transducers" by Allauzen and Mohri, that deals with a similar
  issue, but this is a very different algorithm that only aims to ensure
  determinizability, but not *compact* determinizability.

   Our algorithm is slightly heuristic, and probably not optimal, but does
  ensure that the output is compactly determinizable, possibly at the expense of
  inserting unnecessary symbols.  We considered more sophisticated algorithms,
  but these were extremely complicated and would give the same output for the
  kinds of inputs that we envisage.

   Suppose the input FST is T.  We want to ensure that in det(T), if we consider
  the states of det(T) as weighted subsets of states of T, each state of T only
  appears once in any given subset.  This ensures that det(T) is no larger than
  T in an appropriate sense.  The way we do this is as follows.  We identify all
  states in T that have multiple input transitions (counting "being an initial
  state" as an input transition). Let's call these "problematic" states.  For a
  problematic state p we stipulate that it can never appear in any state of
  det(T) unless that state equals (p, \bar{1}) [i.e. p, unweighted].  In order
  to ensure this, we insert input symbols on the transitions to these
   problematic states (this may necessitate adding extra states).
      We also stipulate that the path through det(T) should always be sufficient
  to tell us the path through T (and we insert extra symbols sufficient to make
  this so).  This is to simplify the algorithm, so that we don't have to
  consider the output symbols or weights when predeterminizing.

   The algorithm is as follows.

    (A) Definitions

      (i)  Define a *problematic state* as a state that either has multiple
  input transitions, or is an initial state and has at least one input
  transition.

     (ii)  For an arc a, define:
            i[a] = input symbol on a
            o[a] = output symbol on a
            n[a] = dest-state of a
            p[a] = origin-state of a

           For a state q, define
            E[q] = set of transitions leaving q.
           For a set of states Q, define
            E[Q] = set of transitions leaving some q in Q

    (iii)  For a state s, define Closure(s) as the union of state s, and all
  states t that are reachable via sequences of arcs a such that i[a]=epsilon and
  n[a] is not problematic.

           For a set of states S, define Closure(S) as the union of the closures
  of states s in S.

    (B) Inputs and outputs.

     (i) Inputs and preconditions.  Input is an FST, which should have a symbol
  table compiled into it, and a prefix (e.g. #) for symbols to be added.  We
  check that the input FST is trim, and that it does not have any symbols that
  appear on its arcs, that are equal to the prefix followed by digits.

    (ii) Outputs: The algorithm modifies the FST that is given to it, and
  returns the number of the highest numbered "extra symbol" inserted.  The extra
  symbols are numbered #1, #2 and so on without limit (as integers).  They are
  inserted into the symbol table in a sequential way by calling AvailableKey()
         for each in turn (this is stipulated in case we need to keep other
  symbol tables in sync).

     (C) Sub-algorithm: Closure(S).  This requires the array p(s), defined
  below, which is true if s is problematic.  This also requires, for efficiency,
  that the arcs be sorted on input label. Input: a set of states S.  [plus, the
  fst and the array p]. Output: a set of states T. Algorithm: set T <-- S, Q <--
  S. while Q is nonempty: pop a state s from Q. for each transition a from state
  s with epsilon on the input label [we can find these efficiently using the
  sorting on arcs]: If p(n[a]) is false and n[a] is not in T: Insert n[a] into
  T. Add n[a] to Q. return T.


     (D) Main algorithm.


       (i) (a) Check preconditions (FST is trim)
           (b) Make sure there is just one final state (insert epsilon
  transitions as necessary). (c) Sort arcs on input label (so epsilon arcs are
  at the start of arc lists).


      (ii) Work out the set of problematic states by constructing a boolean
  array indexed by states, i.e. p(s) which is true if the state is problematic.
  We can do this by constructing an array t(s) to store the number of
  transitions into each state [adding one for the initial state], and then
  setting p(s) = true if t(s) > 1.

           Also create a boolean array d(s), defined for states, and set d(s) =
  false. This array is purely for sanity-checking that we are processing each
  state exactly once.

     (iii) Set up an array of integers m(a), indexed by arcs (how exactly we
  store these is implementation-dependent, but this will probably be a hash from
  (state, arc-index) to integers.  m(a) will store the extra symbol, if any, to
  be added to that arc (or -1 if no such symbol; we can also simply have the arc
  not present in the hash).  The initial value of m(a) is -1 (if array), or
  undefined (if hash).

      (iv) Initialize a set of sets-of-states S, and a queue of pairs Q, as
  follows. The pairs in Q are a pair of (set-of-states, integer), where the
  integer is the number of "special symbols" already used up for that state.

            Note that we use a special indexing for the sets in both S and Q,
  rather than using std::set.  We use a sorted vector of StateId's.  And in S,
  we index them by the lowest-numbered state-id.  Because each state is supposed
  to only ever be a member of one set, if there is an attempt to add another,
  different set with the same lowest-numbered state-id, we detect an error.

            Let I be the single initial state (OpenFST only supports one).
            We set:
              S = { Closure(I) }
              Push (Closure(I), 0)  onto Q.
            Then for each state s such that p(s) = true, and s is not an initial
  state: S <-- S u { Closure(s) } Push (Closure(s), 0)  onto Q.

       (v) While Q is nonempty:

          (a) Pop pair (A, n) from Q (queue discipline is arbitrary).

          (b) For each state s in A, check that d(s) is false, and set d(s) to
  true. This is for sanity checking only.

          (c)
             Let S_\eps be the set of epsilon-transitions from members of A to
  problematic states (i.e. S_\eps = \{ a \in E[A]: i[a]=\epsilon, p(n[a]) = true
  \}).

             Next, we will define, for each t \neq \epsilon, S_t as the set of
               transitions from some state s in S with t as the input label,
  i.e.: S_t = \{ a \in E[A]: i[a] = t \} We further define T_t and U_t as the
  subsets of S where the destination state is problematic and non-problematic
  respectively, i.e: T_t = \{ a \in E[A]: i[a] = t, p(n[a]) = true \} U_t = \{ a
  \in E[A]: i[a] = t, p(n[a]) = false \}

             The easiest way to obtain these sets is probably to have a hash
  indexed by t that maps to a list of pairs (state, arc-offset) that stores S_t.
               From this we can work out the sizes of T_t and U_t on the fly.

         (d)
             for each transition a in S_\eps:
                m(a) <-- n # Will put symbol n on this transition.
                n <-- n+1  # Note, same n as in pair (A, n)

         (e)
             next,
             for each t\neq epsilon s.t. S_t is nonempty,

                if |S_t| > 1 #if-statement is because if |S_t|=|T_t|=1, no need
  for prefix. k = 0 for each transition a in T_t: set m(a) to k. set k = k+1

                if |U_t| > 0
                   Let V_t be the set of destination-states of arcs in U_t.
                   if Closure(V_t) is not in S:
                     insert Closure(V_t) into S, and add the pair (Closure(V_t),
  k) to Q.

       (vi) Check that for each state in the FST, d(s) = true.

      (vii) Let n = max_a m(a).  This is the highest-numbered extra symbol
  (extra symbols start from zero, in this numbering which doesn't correspond to
  the symbol-table numbering).  Here we add n+1 extra symbols to the symbol
  table and store the mappings from 0, 1, ... n to the symbol-id.

     (viii) Set up a hash h from (state, int) to (state-id) such that
             t = h(s, k)
            will be the state-id of a newly-created state that has a transition
  to state s with input-label #k.

      (ix) For each arc a such that m(a) != 0:
             If i[a] = epsilon (the input label is epsilon):
                Change i[a] to #m(a). [i.e. prefix then digit m(a)]
             Otherwise:
                If t = h(n[a], m(a)) is not defined [where n[a] is the
  dest-state]: create a new state t with a transition to n[a], with input-label
  #m(a) and no output-label or weight.  Set h(n[a], m(a)) = t. Change n[a] to
  h(n[a], m(a)).


*/
namespace fst {

namespace pre_determinize_helpers {

// make it inline to avoid having to put it in a .cc file which most functions
// here could not go in.
inline bool HasBannedPrefixPlusDigits(SymbolTable *symTable, std::string prefix,
                                      std::string *bad_sym) {
  // returns true if the symbol table contains any string consisting of this
  // (possibly empty) prefix followed by a nonempty sequence of digits (0 to 9).
  // requires symTable to be non-NULL.
  // if bad_sym != NULL, puts the first bad symbol it finds in *bad_sym.
  assert(symTable != NULL);
  const char *prefix_ptr = prefix.c_str();
  size_t prefix_len =
      strlen(prefix_ptr);  // allowed to be zero but not encouraged.
  for (SymbolTableIterator siter(*symTable); !siter.Done(); siter.Next()) {
    const std::string &sym = siter.Symbol();
    if (!strncmp(prefix_ptr, sym.c_str(), prefix_len)) {  // has prefix.
      if (isdigit(sym[prefix_len])) {  // we don't allow prefix followed by a
                                       // digit, as a symbol.
        // Has at least one digit.
        size_t pos;
        for (pos = prefix_len; sym[pos] != '\0'; pos++)
          if (!isdigit(sym[pos])) break;
        if (sym[pos] == '\0') {  // All remaining characters were digits.
          if (bad_sym != NULL) *bad_sym = sym;
          return true;
        }
      }  // else OK because prefix was followed by '\0' or a non-digit.
    }
  }
  return false;  // doesn't have banned symbol.
}

template <class T>
void CopySetToVector(const std::set<T> s, std::vector<T> *v) {
  // adds members of s to v, in sorted order from lowest to highest
  // (because the set was in sorted order).
  assert(v != NULL);
  v->resize(s.size());
  typename std::set<T>::const_iterator siter = s.begin();
  typename std::vector<T>::iterator viter = v->begin();
  for (; siter != s.end(); ++siter, ++viter) {
    assert(viter != v->end());
    *viter = *siter;
  }
}

// Warning.  This function calls 'new'.
template <class T>
std::vector<T> *InsertMember(const std::vector<T> m,
                             std::vector<std::vector<T> *> *S) {
  assert(m.size() > 0);
  T idx = m[0];
  assert(idx >= (T)0 && idx < (T)S->size());
  if ((*S)[idx] != NULL) {
    assert(*((*S)[idx]) == m);
    // The vectors should be the same.  Otherwise this is a bug in the
    // algorithm. It could either be a programming error or a deeper conceptual
    // bug.
    return NULL;  // nothing was inserted.
  } else {
    std::vector<T> *ret = (*S)[idx] = new std::vector<T>(m);  // New copy of m.
    return ret;                                               // was inserted.
  }
}

// See definition of Closure(S) in item A(iii) in the comment above. it's the
// set of states that are reachable from S via sequences of arcs a such that
// i[a]=epsilon and n[a] is not problematic.  We assume that the fst is sorted
// on input label (so epsilon arcs first) The algorithm is described in section
// (C) above.  We use the same variable for S and T.
template <class Arc>
void Closure(MutableFst<Arc> *fst, std::set<typename Arc::StateId> *S,
             const std::vector<bool> &pVec) {
  typedef typename Arc::StateId StateId;
  std::vector<StateId> Q;
  CopySetToVector(*S, &Q);
  while (Q.size() != 0) {
    StateId s = Q.back();
    Q.pop_back();
    for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
         aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (arc.ilabel != 0)
        break;  // Break from the loop: due to sorting there will be no
      // more transitions with epsilons as input labels.
      if (!pVec[arc.nextstate]) {  // Next state is not problematic -> we can
                                   // use this transition.
        std::pair<typename std::set<StateId>::iterator, bool> p =
            S->insert(arc.nextstate);
        if (p.second) {  // True means: was inserted into S (wasn't already
                         // there).
          Q.push_back(arc.nextstate);
        }
      }
    }
  }
}  // end function Closure.

}  // end namespace pre_determinize_helpers.

template <class Arc, class Int>
void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_sym,
                    std::vector<Int> *symsOut) {
  typedef typename Arc::Label Label;
  typedef typename Arc::StateId StateId;
  typedef size_t ArcId;  // Our own typedef, not standard OpenFst.  Use size_t
  // for compatibility with argument of ArcIterator::Seek().
  typedef typename Arc::Weight Weight;
  assert(first_new_sym > 0);
  assert(fst != NULL);
  if (fst->Start() == kNoStateId) return;  // for empty FST, nothing to do.
  assert(symsOut != NULL &&
         symsOut->size() == 0);  // we will output the symbols we add into this.

  {  // (D)(i)(a): check is trim (i.e. connected, in OpenFST parlance).
    KALDI_VLOG(2) << "PreDeterminize: Checking FST properties";
    uint64 props = fst->Properties(
        kAccessible | kCoAccessible,
        true);  // true-> computes properties if unknown at time when called.
    if (props !=
        (kAccessible | kCoAccessible)) {  // All states are not both accessible
                                          // and co-accessible...
      KALDI_ERR << "PreDeterminize: FST is not trim";
    }
  }

  {  // (D)(i)(b): make single final state.
    KALDI_VLOG(2) << "PreDeterminize: creating single final state";
    CreateSuperFinal(fst);
  }

  {  // (D)(i)(c): sort arcs on input.
    KALDI_VLOG(2) << "PreDeterminize: sorting arcs on input";
    ILabelCompare<Arc> icomp;
    ArcSort(fst, icomp);
  }

  StateId n_states = 0,
          max_state =
              0;  // Compute n_states, max_state = highest-numbered state.
  {               // compute nStates, maxStates.
    for (StateIterator<MutableFst<Arc> > iter(*fst); !iter.Done();
         iter.Next()) {
      StateId state = iter.Value();
      assert(state >= 0);
      n_states++;
      if (state > max_state) max_state = state;
    }
    KALDI_VLOG(2) << "PreDeterminize: n_states = " << (n_states)
                  << ", max_state =" << (max_state);
  }

  std::vector<bool> p_vec(max_state + 1, false);  // compute this next.
  {  // D(ii): computing the array p. ["problematic states, i.e. states with >1
     // input transition,
    // counting being the initial state as an input transition"].
    std::vector<bool> seen_vec(
        max_state + 1,
        false);  // rather than counting incoming transitions we just have a
                 // bool that says we saw at least one.

    seen_vec[fst->Start()] = true;
    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
         siter.Next()) {
      for (ArcIterator<MutableFst<Arc> > aiter(*fst, siter.Value());
           !aiter.Done(); aiter.Next()) {
        const Arc &arc = aiter.Value();
        assert(arc.nextstate >= 0 && arc.nextstate < max_state + 1);
        if (seen_vec[arc.nextstate])
          p_vec[arc.nextstate] =
              true;  // now have >1 transition in, so problematic.
        else
          seen_vec[arc.nextstate] = true;
      }
    }
  }
  // D(iii): set up m(a)
  std::map<std::pair<StateId, ArcId>, size_t> m_map;
  // This is the array m, indexed by arcs.  It maps to the index of the symbol
  // we add.

  // WARNING: we should be sure to clean up this memory before exiting.  Do not
  // return or throw an exception from this function, later than this point,
  // without cleaning up! Note that the vectors are shared between Q and S (they
  // "belong to" S.
  std::vector<std::vector<StateId> *> S(max_state + 1,
                                        (std::vector<StateId> *)(void *)0);
  std::vector<std::pair<std::vector<StateId> *, size_t> > Q;

  // D(iv): initialize S and Q.
  {
    std::vector<StateId>
        all_seed_states;  // all "problematic" states, plus initial state (if
                          // not problematic).
    if (!p_vec[fst->Start()]) all_seed_states.push_back(fst->Start());
    for (StateId s = 0; s <= max_state; s++)
      if (p_vec[s]) all_seed_states.push_back(s);

    for (size_t idx = 0; idx < all_seed_states.size(); idx++) {
      StateId s = all_seed_states[idx];
      std::set<StateId> closure_s;
      closure_s.insert(s);  // insert "seed" state.
      pre_determinize_helpers::Closure(
          fst, &closure_s,
          p_vec);  // follow epsilons to non-problematic states.
      // Closure in this case whis will usually not add anything, for typical
      // topologies in speech
      std::vector<StateId> closure_s_vec;
      pre_determinize_helpers::CopySetToVector(closure_s, &closure_s_vec);
      KALDI_ASSERT(closure_s_vec.size() != 0);
      std::vector<StateId> *ptr =
          pre_determinize_helpers::InsertMember(closure_s_vec, &S);
      KALDI_ASSERT(ptr != NULL);  // Or conceptual bug or programming error.
      Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, 0));
    }
  }

  std::vector<bool> d_vec(max_state + 1,
                          false);  // "done vector".  Purely for debugging.

  size_t num_extra_det_states = 0;

  // (D)(v)
  while (Q.size() != 0) {
    // (D)(v)(a)
    std::pair<std::vector<StateId> *, size_t> cur_pair(Q.back());
    Q.pop_back();
    const std::vector<StateId> &A(*cur_pair.first);
    size_t n = cur_pair.second;  // next special symbol to add.

    // (D)(v)(b)
    for (size_t idx = 0; idx < A.size(); idx++) {
      assert(d_vec[A[idx]] == false &&
             "This state has been seen before.  Algorithm error.");
      d_vec[A[idx]] = true;
    }

    // From here is (D)(v)(c).  We work out S_\eps and S_t (for t\neq eps)
    // simultaneously at first.
    std::map<Label, std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >
        arc_hash;
    // arc_hash is a hash with info of all arcs from states in the set A to
    // non-problematic states.
    // It is a map from ilabel to pair(pair(start-state, arc-offset),
    // end-state). Here, arc-offset reflects the order in which we accessed the
    // arc using the ArcIterator (zero for the first arc).

    {  // This block sets up arc_hash
      for (size_t idx = 0; idx < A.size(); idx++) {
        StateId s = A[idx];
        assert(s >= 0 && s <= max_state);
        ArcId arc_id = 0;
        for (ArcIterator<MutableFst<Arc> > aiter(*fst, s); !aiter.Done();
             aiter.Next(), ++arc_id) {
          const Arc &arc = aiter.Value();

          std::pair<std::pair<StateId, ArcId>, StateId> this_pair(
              std::pair<StateId, ArcId>(s, arc_id), arc.nextstate);
          bool inserted = (arc_hash[arc.ilabel].insert(this_pair)).second;
          assert(inserted);  // Otherwise we had a duplicate.
        }
      }
    }

    // (D)(v)(d)
    if (arc_hash.count(0) == 1) {  // We have epsilon transitions out.
      std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &eps_set =
          arc_hash[0];
      typedef typename std::set<
          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t;
      for (set_iter_t siter = eps_set.begin(); siter != eps_set.end();
           ++siter) {
        const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr = *siter;
        if (p_vec[this_pr.second]) {  // Eps-transition to problematic state.
          assert(m_map.count(this_pr.first) == 0);
          m_map[this_pr.first] = n;
          n++;
        }
      }
    }

    // (D)(v)(e)
    {
      typedef typename std::map<
          Label,
          std::set<std::pair<std::pair<StateId, ArcId>, StateId> > >::iterator
          map_iter_t;
      typedef typename std::set<
          std::pair<std::pair<StateId, ArcId>, StateId> >::iterator set_iter_t2;
      for (map_iter_t miter = arc_hash.begin(); miter != arc_hash.end();
           ++miter) {
        Label t = miter->first;
        std::set<std::pair<std::pair<StateId, ArcId>, StateId> > &S_t =
            miter->second;
        if (t != 0) {             // For t != epsilon,
          std::set<StateId> V_t;  // set of destination non-problem states. Will
                                  // create this set now.

          // exists_noproblem is true iff |U_t| > 0.
          size_t k = 0;

          // First loop "for each transition a in T_t" (i.e. transitions to
          // problematic states) The if-statement if (|S_t|>1) is pushed inside
          // the loop, as the loop also computes the set V_t.
          for (set_iter_t2 siter = S_t.begin(); siter != S_t.end(); ++siter) {
            const std::pair<std::pair<StateId, ArcId>, StateId> &this_pr =
                *siter;
            if (p_vec[this_pr.second]) {  // only consider problematic states
                                          // (just set T_t)
              if (S_t.size() >
                  1) {  // This is where we pushed the if-statement in.
                assert(m_map.count(this_pr.first) == 0);
                m_map[this_pr.first] = k;
                k++;
                num_extra_det_states++;
              }
            } else {  // Create the set V_t.
              V_t.insert(this_pr.second);
            }
          }
          if (V_t.size() != 0) {
            pre_determinize_helpers::Closure(
                fst, &V_t,
                p_vec);  // follow epsilons to non-problematic states.
            std::vector<StateId> closure_V_t_vec;
            pre_determinize_helpers::CopySetToVector(V_t, &closure_V_t_vec);
            std::vector<StateId> *ptr =
                pre_determinize_helpers::InsertMember(closure_V_t_vec, &S);
            if (ptr != NULL) {  // was inserted.
              Q.push_back(std::pair<std::vector<StateId> *, size_t>(ptr, k));
            }
          }
        }
      }
    }
  }  // end while (Q.size() != 0)

  {  // (D)(vi): Check that for each state in the FST, d(s) = true.
    for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
         siter.Next()) {
      StateId val = siter.Value();
      assert(d_vec[val] == true);
    }
  }

  {  // (D)(vii): compute symbol-table ID's.
    // sets up symsOut array.
    int64 n = -1;
    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
             m_map.begin();
         m_iter != m_map.end(); ++m_iter) {
      n = std::max(n,
                   static_cast<int64>(
                       m_iter->second));  // m_iter->second is of type size_t.
    }
    // At this point n is the highest symbol-id (type size_t) of symbols we must
    // add.
    n++;  // This is now the number of symbols we must add.
    for (size_t i = 0; static_cast<int64>(i) < n; i++)
      symsOut->push_back(first_new_sym + i);
  }

  // (D)(viii): set up hash.
  std::map<std::pair<StateId, size_t>, StateId> h_map;

  {  // D(ix): add extra symbols!  This is where the work gets done.
    // Core part of this is below, search for (*)
    size_t n_states_added = 0;

    for (typename std::map<std::pair<StateId, ArcId>, size_t>::iterator m_iter =
             m_map.begin();
         m_iter != m_map.end(); ++m_iter) {
      StateId state = m_iter->first.first;
      ArcId arcpos = m_iter->first.second;
      size_t m_a = m_iter->second;

      MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
      aiter.Seek(arcpos);
      Arc arc = aiter.Value();

      // (*) core part here.
      if (arc.ilabel == 0) {
        arc.ilabel = (*symsOut)[m_a];
      } else {
        std::pair<StateId, size_t> pr(arc.nextstate, m_a);
        if (!h_map.count(pr)) {
          n_states_added++;
          StateId newstate = fst->AddState();
          assert(newstate >= 0);
          Arc new_arc((*symsOut)[m_a], (Label)0, Weight::One(), arc.nextstate);
          fst->AddArc(newstate, new_arc);
          h_map[pr] = newstate;
        }
        arc.nextstate = h_map[pr];
      }
      aiter.SetValue(arc);
    }

    KALDI_VLOG(2) << "Added " << (n_states_added)
                  << " new states and added/changed " << (m_map.size())
                  << " arcs";
  }
  // Now free up memory.
  for (size_t i = 0; i < S.size(); i++) delete S[i];
}  // end function PreDeterminize

template <class Label>
void CreateNewSymbols(SymbolTable *input_sym_table, int nSym,
                      std::string prefix, std::vector<Label> *symsOut) {
  // Creates nSym new symbols named (prefix)0, (prefix)1 and so on.
  // Crashes if it cannot create them because one or more of them were in the
  // symbol table already.
  assert(symsOut && symsOut->size() == 0);
  for (int i = 0; i < nSym; i++) {
    std::stringstream ss;
    ss << prefix << i;
    std::string str = ss.str();
    if (input_sym_table->Find(str) != -1) {  // should not be present.
    }
    assert(symsOut);
    symsOut->push_back((Label)input_sym_table->AddSymbol(str));
  }
}

// see pre-determinize.h for documentation.
template <class Arc>
void AddSelfLoops(MutableFst<Arc> *fst,
                  const std::vector<typename Arc::Label> &isyms,
                  const std::vector<typename Arc::Label> &osyms) {
  assert(fst != NULL);
  assert(isyms.size() == osyms.size());
  typedef typename Arc::Label Label;
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;
  size_t n = isyms.size();
  if (n == 0) return;  // Nothing to do.

  // {
  // the following declarations and statements are for quick detection of these
  // symbols, which is purely for debugging/checking purposes.
  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
        isyms_max = *std::max_element(isyms.begin(), isyms.end()),
        osyms_min = *std::min_element(osyms.begin(), osyms.end()),
        osyms_max = *std::max_element(osyms.begin(), osyms.end());
  std::set<Label> isyms_set, osyms_set;
  for (size_t i = 0; i < isyms.size(); i++) {
    assert(isyms[i] > 0 &&
           osyms[i] > 0);  // should not have epsilon or invalid symbols.
    isyms_set.insert(isyms[i]);
    osyms_set.insert(osyms[i]);
  }
  assert(isyms_set.size() == n && osyms_set.size() == n);
  // } end block.

  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
       siter.Next()) {
    StateId state = siter.Value();
    bool this_state_needs_self_loops = (fst->Final(state) != Weight::Zero());
    for (ArcIterator<MutableFst<Arc> > aiter(*fst, state); !aiter.Done();
         aiter.Next()) {
      const Arc &arc = aiter.Value();
      // If one of the following asserts fails, it means that the input FST
      // already had the symbols we are inserting.  This is contrary to the
      // preconditions of this algorithm.
      assert(!(arc.ilabel >= isyms_min && arc.ilabel <= isyms_max &&
               isyms_set.count(arc.ilabel) != 0));
      assert(!(arc.olabel >= osyms_min && arc.olabel <= osyms_max &&
               osyms_set.count(arc.olabel) != 0));
      if (arc.olabel != 0)  // Has non-epsilon output label -> need self loops.
        this_state_needs_self_loops = true;
    }
    if (this_state_needs_self_loops) {
      for (size_t i = 0; i < n; i++) {
        Arc arc;
        arc.ilabel = isyms[i];
        arc.olabel = osyms[i];
        arc.weight = Weight::One();
        arc.nextstate = state;
        fst->AddArc(state, arc);
      }
    }
  }
}

template <class Arc>
int64 DeleteISymbols(MutableFst<Arc> *fst,
                     std::vector<typename Arc::Label> isyms) {
  // We could do this using the Mapper concept, but this is much easier to
  // understand.

  typedef typename Arc::Label Label;
  typedef typename Arc::StateId StateId;

  int64 num_deleted = 0;

  if (isyms.size() == 0) return 0;
  Label isyms_min = *std::min_element(isyms.begin(), isyms.end()),
        isyms_max = *std::max_element(isyms.begin(), isyms.end());
  bool isyms_consecutive =
      (isyms_max + 1 - isyms_min == static_cast<Label>(isyms.size()));
  std::set<Label> isyms_set;
  if (!isyms_consecutive) {
    for (size_t i = 0; i < isyms.size(); i++) isyms_set.insert(isyms[i]);
  }

  for (StateIterator<MutableFst<Arc> > siter(*fst); !siter.Done();
       siter.Next()) {
    StateId state = siter.Value();
    for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state); !aiter.Done();
         aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (arc.ilabel >= isyms_min && arc.ilabel <= isyms_max) {
        if (isyms_consecutive || isyms_set.count(arc.ilabel) != 0) {
          num_deleted++;
          Arc mod_arc(arc);
          mod_arc.ilabel = 0;  // change label to epsilon.
          aiter.SetValue(mod_arc);
        }
      }
    }
  }
  return num_deleted;
}

template <class Arc>
typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst) {
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Weight Weight;
  assert(fst != NULL);
  StateId num_states = fst->NumStates();
  StateId num_final = 0;
  std::vector<StateId> final_states;
  for (StateId s = 0; s < num_states; s++) {
    if (fst->Final(s) != Weight::Zero()) {
      num_final++;
      final_states.push_back(s);
    }
  }
  if (final_states.size() == 1) {
    if (fst->Final(final_states[0]) == Weight::One()) {
      ArcIterator<MutableFst<Arc> > iter(*fst, final_states[0]);
      if (iter.Done()) {
        // We already have a final state w/ no transitions out and unit weight.
        // So we're done.
        return final_states[0];
      }
    }
  }

  StateId final_state = fst->AddState();
  fst->SetFinal(final_state, Weight::One());
  for (size_t idx = 0; idx < final_states.size(); idx++) {
    StateId s = final_states[idx];
    Weight weight = fst->Final(s);
    fst->SetFinal(s, Weight::Zero());
    Arc arc;
    arc.ilabel = 0;
    arc.olabel = 0;
    arc.nextstate = final_state;
    arc.weight = weight;
    fst->AddArc(s, arc);
  }
  return final_state;
}

}  // namespace fst

#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_INL_H_


================================================
FILE: runtime/engine/kaldi/fstext/pre-determinize.h
================================================
// fstext/pre-determinize.h

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_PRE_DETERMINIZE_H_
#define KALDI_FSTEXT_PRE_DETERMINIZE_H_

#include <fst/fst-decl.h>
#include <fst/fstlib.h>

#include <algorithm>
#include <map>
#include <set>
#include <string>
#include <vector>

#include "base/kaldi-common.h"

namespace fst {

/* PreDeterminize inserts extra symbols on the input side of an FST as necessary
   to ensure that, after epsilon removal, it will be compactly determinizable by
   the determinize* algorithm.  By compactly determinizable we mean that no
   original FST state is represented in more than one determinized state).

   Caution: this code is now only used in testing.

   The new symbols start from the value "first_new_symbol", which should be
   higher than the largest-numbered symbol currently in the FST.  The new
   symbols added are put in the array syms_out, which should be empty at start.
*/

template <class Arc, class Int>
void PreDeterminize(MutableFst<Arc> *fst, typename Arc::Label first_new_symbol,
                    std::vector<Int> *syms_out);

/* CreateNewSymbols is a helper function used inside PreDeterminize, and is also
   useful when you need to add a number of extra symbols to a different
   vocabulary from the one modified by PreDeterminize. */

template <class Label>
void CreateNewSymbols(SymbolTable *inputSymTable, int nSym, std::string prefix,
                      std::vector<Label> *syms_out);

/** AddSelfLoops is a function you will probably want to use alongside
   PreDeterminize, to add self-loops to any FSTs that you compose on the left
   hand side of the one modified by PreDeterminize.

    This function inserts loops with "special symbols" [e.g. \#0, \#1] into an
   FST. This is done at each final state and each state with non-epsilon output
   symbols on at least one arc out of it.  This is to ensure that these symbols,
   when inserted into the input side of an FST we will compose with on the
   right, can "pass through" this FST.

    At input, isyms and osyms must be vectors of the same size n, corresponding
    to symbols that currently do not exist in 'fst'.  For each state in n that
   has non-epsilon symbols on the output side of arcs leaving it, or which is a
   final state, this function inserts n self-loops with unit weight and one of
   the n pairs of symbols on its input and output.
*/
template <class Arc>
void AddSelfLoops(MutableFst<Arc> *fst,
                  const std::vector<typename Arc::Label> &isyms,
                  const std::vector<typename Arc::Label> &osyms);

/* DeleteSymbols replaces any instances of symbols in the vector symsIn,
   appearing on the input side, with epsilon. */
/* It returns the number of instances of symbols deleted. */
template <class Arc>
int64 DeleteISymbols(MutableFst<Arc> *fst,
                     std::vector<typename Arc::Label> symsIn);

/* CreateSuperFinal takes an FST, and creates an equivalent FST with a single
   final state with no transitions out and unit final weight, by inserting
   epsilon transitions as necessary. */
template <class Arc>
typename Arc::StateId CreateSuperFinal(MutableFst<Arc> *fst);

}  // end namespace fst

#include "fstext/pre-determinize-inl.h"

#endif  // KALDI_FSTEXT_PRE_DETERMINIZE_H_


================================================
FILE: runtime/engine/kaldi/fstext/remove-eps-local-inl.h
================================================
// fstext/remove-eps-local-inl.h

// Copyright 2009-2011  Microsoft Corporation
//                2014  Johns Hopkins University (author: Daniel Povey

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_
#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_

#include <vector>

namespace fst {

template <class Weight>
struct ReweightPlusDefault {
  inline Weight operator()(const Weight &a, const Weight &b) {
    return Plus(a, b);
  }
};

struct ReweightPlusLogArc {
  inline TropicalWeight operator()(const TropicalWeight &a,
                                   const TropicalWeight &b) {
    LogWeight a_log(a.Value()), b_log(b.Value());
    return TropicalWeight(Plus(a_log, b_log).Value());
  }
};

template <class Arc,
          class ReweightPlus = ReweightPlusDefault<typename Arc::Weight> >
class RemoveEpsLocalClass {
  typedef typename Arc::StateId StateId;
  typedef typename Arc::Label Label;
  typedef typename Arc::Weight Weight;

 public:
  explicit RemoveEpsLocalClass(MutableFst<Arc> *fst) : fst_(fst) {
    if (fst_->Start() == kNoStateId) return;  // empty.
    non_coacc_state_ = fst_->AddState();
    InitNumArcs();
    StateId num_states = fst_->NumStates();
    for (StateId s = 0; s < num_states; s++)
      for (size_t pos = 0; pos < fst_->NumArcs(s); pos++) RemoveEps(s, pos);
    assert(CheckNumArcs());
    Connect(fst);  // remove inaccessible states.
  }

 private:
  MutableFst<Arc> *fst_;
  StateId non_coacc_state_;  //  use this to delete arcs: make it nextstate
  std::vector<StateId> num_arcs_in_;  // The number of arcs into the state, plus
                                      // one if it's the start state.
  std::vector<StateId> num_arcs_out_;  // The number of arcs out of the state,
                                       // plus one if it's a final state.
  ReweightPlus reweight_plus_;

  bool CanCombineArcs(const Arc &a, const Arc &b, Arc *c) {
    if (a.ilabel != 0 && b.ilabel != 0) return false;
    if (a.olabel != 0 && b.olabel != 0) return false;
    c->weight = Times(a.weight, b.weight);
    c->ilabel = (a.ilabel != 0 ? a.ilabel : b.ilabel);
    c->olabel = (a.olabel != 0 ? a.olabel : b.olabel);
    c->nextstate = b.nextstate;
    return true;
  }

  static bool CanCombineFinal(const Arc &a, Weight final_prob,
                              Weight *final_prob_out) {
    if (a.ilabel != 0 || a.olabel != 0) {
      return false;
    } else {
      *final_prob_out = Times(a.weight, final_prob);
      return true;
    }
  }

  void InitNumArcs() {  // init num transitions in/out of each state.
    StateId num_states = fst_->NumStates();
    num_arcs_in_.resize(num_states);
    num_arcs_out_.resize(num_states);
    num_arcs_in_[fst_->Start()]++;  // count start as trans in.
    for (StateId s = 0; s < num_states; s++) {
      if (fst_->Final(s) != Weight::Zero())
        num_arcs_out_[s]++;  // count final as transition.
      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
           aiter.Next()) {
        num_arcs_in_[aiter.Value().nextstate]++;
        num_arcs_out_[s]++;
      }
    }
  }

  bool CheckNumArcs() {  // check num arcs in/out of each state, at end.  Debug.
    num_arcs_in_[fst_->Start()]--;  // count start as trans in.
    StateId num_states = fst_->NumStates();
    for (StateId s = 0; s < num_states; s++) {
      if (s == non_coacc_state_) continue;
      if (fst_->Final(s) != Weight::Zero())
        num_arcs_out_[s]--;  // count final as transition.
      for (ArcIterator<MutableFst<Arc> > aiter(*fst_, s); !aiter.Done();
           aiter.Next()) {
        if (aiter.Value().nextstate == non_coacc_state_) continue;
        num_arcs_in_[aiter.Value().nextstate]--;
        num_arcs_out_[s]--;
      }
    }
    for (StateId s = 0; s < num_states; s++) {
      assert(num_arcs_in_[s] == 0);
      assert(num_arcs_out_[s] == 0);
    }
    return true;  // always does this.  so we can assert it w/o warnings.
  }

  inline void GetArc(StateId s, size_t pos, Arc *arc) const {
    ArcIterator<MutableFst<Arc> > aiter(*fst_, s);
    aiter.Seek(pos);
    *arc = aiter.Value();
  }

  inline void SetArc(StateId s, size_t pos, const Arc &arc) {
    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
    aiter.Seek(pos);
    aiter.SetValue(arc);
  }

  void Reweight(StateId s, size_t pos, Weight reweight) {
    // Reweight is called from RemoveEpsPattern1; it is a step we
    // do to preserve stochasticity.  This function multiplies the
    // arc at (s, pos) by reweight and divides all the arcs [+final-prob]
    // out of the next state by the same.  This is only valid if
    // the next state has only one arc in and is not the start state.
    assert(reweight != Weight::Zero());
    MutableArcIterator<MutableFst<Arc> > aiter(fst_, s);
    aiter.Seek(pos);
    Arc arc = aiter.Value();
    assert(num_arcs_in_[arc.nextstate] == 1);
    arc.weight = Times(arc.weight, reweight);
    aiter.SetValue(arc);

    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, arc.nextstate);
         !aiter_next.Done(); aiter_next.Next()) {
      Arc nextarc = aiter_next.Value();
      if (nextarc.nextstate != non_coacc_state_) {
        nextarc.weight = Divide(nextarc.weight, reweight, DIVIDE_LEFT);
        aiter_next.SetValue(nextarc);
      }
    }
    Weight final = fst_->Final(arc.nextstate);
    if (final != Weight::Zero()) {
      fst_->SetFinal(arc.nextstate, Divide(final, reweight, DIVIDE_LEFT));
    }
  }

  // RemoveEpsPattern1 applies where this arc, which is not a
  // self-loop, enters a state which has only one input transition
  // [and is not the start state], and has multiple output
  // transitions [counting being the final-state as a final-transition].

  void RemoveEpsPattern1(StateId s, size_t pos, Arc arc) {
    const StateId nextstate = arc.nextstate;
    Weight total_removed = Weight::Zero(),
           total_kept = Weight::Zero();  // totals out of nextstate.
    std::vector<Arc> arcs_to_add;        // to add to state s.
    for (MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
         !aiter_next.Done(); aiter_next.Next()) {
      Arc nextarc = aiter_next.Value();
      if (nextarc.nextstate == non_coacc_state_) continue;  // deleted.
      Arc combined;
      if (CanCombineArcs(arc, nextarc, &combined)) {
        total_removed = reweight_plus_(total_removed, nextarc.weight);
        num_arcs_out_[nextstate]--;
        num_arcs_in_[nextarc.nextstate]--;
        nextarc.nextstate = non_coacc_state_;
        aiter_next.SetValue(nextarc);
        arcs_to_add.push_back(combined);
      } else {
        total_kept = reweight_plus_(total_kept, nextarc.weight);
      }
    }

    {  // now final-state.
      Weight next_final = fst_->Final(nextstate);
      if (next_final != Weight::Zero()) {
        Weight new_final;
        if (CanCombineFinal(arc, next_final, &new_final)) {
          total_removed = reweight_plus_(total_removed, next_final);
          if (fst_->Final(s) == Weight::Zero())
            num_arcs_out_[s]++;  // final is counted as arc.
          fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
          num_arcs_out_[nextstate]--;
          fst_->SetFinal(nextstate, Weight::Zero());
        } else {
          total_kept = reweight_plus_(total_kept, next_final);
        }
      }
    }

    if (total_removed != Weight::Zero()) {  // did something...
      if (total_kept == Weight::Zero()) {   // removed everything: remove arc.
        num_arcs_out_[s]--;
        num_arcs_in_[arc.nextstate]--;
        arc.nextstate = non_coacc_state_;
        SetArc(s, pos, arc);
      } else {
        // Have to reweight.
        Weight total = reweight_plus_(total_removed, total_kept);
        Weight reweight = Divide(total_kept, total, DIVIDE_LEFT);  // <=1
        Reweight(s, pos, reweight);
      }
    }
    // Now add the arcs we were going to add.
    for (size_t i = 0; i < arcs_to_add.size(); i++) {
      num_arcs_out_[s]++;
      num_arcs_in_[arcs_to_add[i].nextstate]++;
      fst_->AddArc(s, arcs_to_add[i]);
    }
  }

  void RemoveEpsPattern2(StateId s, size_t pos, Arc arc) {
    // Pattern 2 is where "nextstate" has only one arc out, counting
    // being-the-final-state as an arc, but possibly multiple arcs in.
    // Also, nextstate != s.

    const StateId nextstate = arc.nextstate;
    bool can_delete_next = (num_arcs_in_[nextstate] == 1);  // if
    // we combine, can delete the corresponding out-arc/final-prob
    // of nextstate.
    bool delete_arc = false;  // set to true if this arc to be deleted.

    Weight next_final = fst_->Final(arc.nextstate);
    if (next_final !=
        Weight::Zero()) {  // nextstate has no actual arcs out, only final-prob.
      Weight new_final;
      if (CanCombineFinal(arc, next_final, &new_final)) {
        if (fst_->Final(s) == Weight::Zero())
          num_arcs_out_[s]++;  // final is counted as arc.
        fst_->SetFinal(s, Plus(fst_->Final(s), new_final));
        delete_arc = true;  // will delete "arc".
        if (can_delete_next) {
          num_arcs_out_[nextstate]--;
          fst_->SetFinal(nextstate, Weight::Zero());
        }
      }
    } else {  // has an arc but no final prob.
      MutableArcIterator<MutableFst<Arc> > aiter_next(fst_, nextstate);
      assert(!aiter_next.Done());
      while (aiter_next.Value().nextstate == non_coacc_state_) {
        aiter_next.Next();
        assert(!aiter_next.Done());
      }
      // now aiter_next points to a real arc out of nextstate.
      Arc nextarc = aiter_next.Value();
      Arc combined;
      if (CanCombineArcs(arc, nextarc, &combined)) {
        delete_arc = true;
        if (can_delete_next) {  // do it before we invalidate iterators
          num_arcs_out_[nextstate]--;
          num_arcs_in_[nextarc.nextstate]--;
          nextarc.nextstate = non_coacc_state_;
          aiter_next.SetValue(nextarc);
        }
        num_arcs_out_[s]++;
        num_arcs_in_[combined.nextstate]++;
        fst_->AddArc(s, combined);
      }
    }
    if (delete_arc) {
      num_arcs_out_[s]--;
      num_arcs_in_[nextstate]--;
      arc.nextstate = non_coacc_state_;
      SetArc(s, pos, arc);
    }
  }

  void RemoveEps(StateId s, size_t pos) {
    // Tries to do local epsilon-removal for arc sequences starting with this
    // arc
    Arc arc;
    GetArc(s, pos, &arc);
    StateId nextstate = arc.nextstate;
    if (nextstate == non_coacc_state_) return;  // deleted arc.
    if (nextstate == s) return;  // don't handle self-loops: too complex.

    if (num_arcs_in_[nextstate] == 1 && num_arcs_out_[nextstate] > 1) {
      RemoveEpsPattern1(s, pos, arc);
    } else if (num_arcs_out_[nextstate] == 1) {
      RemoveEpsPattern2(s, pos, arc);
    }
  }
};

template <class Arc>
void RemoveEpsLocal(MutableFst<Arc> *fst) {
  RemoveEpsLocalClass<Arc> c(fst);  // work gets done in initializer.
}

void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst) {
  // work gets done in initializer.
  RemoveEpsLocalClass<StdArc, ReweightPlusLogArc> c(fst);
}

}  // end namespace fst.

#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_INL_H_


================================================
FILE: runtime/engine/kaldi/fstext/remove-eps-local.h
================================================
// fstext/remove-eps-local.h

// Copyright 2009-2011  Microsoft Corporation
//                2014  Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_
#define KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_

#include <fst/fst-decl.h>
#include <fst/fstlib.h>

namespace fst {

/// RemoveEpsLocal remove some (but not necessarily all) epsilons in an FST,
/// using an algorithm that is guaranteed to never increase the number of arcs
/// in the FST (and will also never increase the number of states).  The
/// algorithm is not optimal but is reasonably clever.  It does not just remove
/// epsilon arcs;it also combines pairs of input-epsilon and output-epsilon arcs
/// into one.
/// The algorithm preserves equivalence and stochasticity in the given semiring.
/// If you want to preserve stochasticity in a different semiring (e.g. log),
/// then use RemoveEpsLocalSpecial, which only works for StdArc but which
/// preserves stochasticity, where possible (*) in the LogArc sense.  The reason
/// that we can't just cast to a different semiring is that in that case we
/// would no longer be able to guarantee equivalence in the original semiring
/// (this arises from what happens when we combine identical arcs).
/// (*) by "where possible".. there are situations where we wouldn't be able to
/// preserve stochasticity in the LogArc sense while maintaining equivalence in
/// the StdArc sense, so in these situations we maintain equivalence.

template <class Arc>
void RemoveEpsLocal(MutableFst<Arc> *fst);

/// As RemoveEpsLocal but takes care to preserve stochasticity
/// when cast to LogArc.
inline void RemoveEpsLocalSpecial(MutableFst<StdArc> *fst);

}  // namespace fst

#include "fstext/remove-eps-local-inl.h"

#endif  // KALDI_FSTEXT_REMOVE_EPS_LOCAL_H_


================================================
FILE: runtime/engine/kaldi/fstext/table-matcher.h
================================================
// fstext/table-matcher.h

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_FSTEXT_TABLE_MATCHER_H_
#define KALDI_FSTEXT_TABLE_MATCHER_H_

#include <fst/fst-decl.h>
#include <fst/fstlib.h>

#include <memory>
#include <vector>

namespace fst {

/// TableMatcher is a matcher specialized for the case where the output
/// side of the left FST always has either all-epsilons coming out of
/// a state, or a majority of the symbol table.  Therefore we can
/// either store nothing (for the all-epsilon case) or store a lookup
/// table from Labels to arc offsets.  Since the TableMatcher has to
/// iterate over all arcs in each left-hand state the first time it sees
/// it, this matcher type is not efficient if you compose with
/// something very small on the right-- unless you do it multiple
/// times and keep the matcher around. To do this requires using the
/// most advanced form of ComposeFst in Compose.h, that initializes
/// with ComposeFstImplOptions.

struct TableMatcherOptions {
  float
      table_ratio;  // we construct the table if it would be at least this full.
  int min_table_size;
  TableMatcherOptions() : table_ratio(0.25), min_table_size(4) {}
};

// Introducing an "impl" class for TableMatcher because
// we need to do a shallow copy of the Matcher for when
// we want to cache tables for multiple compositions.
template <class F, class BackoffMatcher = SortedMatcher<F> >
class TableMatcherImpl : public MatcherBase<typename F::Arc> {
 public:
  typedef F FST;
  typedef typename F::Arc Arc;
  typedef typename Arc::Label Label;
  typedef typename Arc::StateId StateId;
  typedef StateId
      ArcId;  // Use this type to store arc offsets [it's actually size_t
  // in the Seek function of ArcIterator, but StateId should be big enough].
  typedef typename Arc::Weight Weight;

 public:
  TableMatcherImpl(const FST &fst, MatchType match_type,
                   const TableMatcherOptions &opts = TableMatcherOptions())
      : match_type_(match_type),
        fst_(fst.Copy()),
        loop_(match_type == MATCH_INPUT
                  ? Arc(kNoLabel, 0, Weight::One(), kNoStateId)
                  : Arc(0, kNoLabel, Weight::One(), kNoStateId)),
        aiter_(NULL),
        s_(kNoStateId),
        opts_(opts),
        backoff_matcher_(fst, match_type) {
    assert(opts_.min_table_size > 0);
    if (match_type == MATCH_INPUT)
      assert(fst_->Properties(kILabelSorted, true) == kILabelSorted);
    else if (match_type == MATCH_OUTPUT)
      assert(fst_->Properties(kOLabelSorted, true) == kOLabelSorted);
    else
      assert(0 && "Invalid FST properties");
  }

  virtual const FST &GetFst() const { return *fst_; }

  virtual ~TableMatcherImpl() {
    std::vector<ArcId> *const empty =
        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
    for (size_t i = 0; i < tables_.size(); i++) {
      if (tables_[i] != NULL && tables_[i] != empty) delete tables_[i];
    }
    delete aiter_;
    delete fst_;
  }

  virtual MatchType Type(bool test) const { return match_type_; }

  void SetState(StateId s) {
    if (aiter_) {
      delete aiter_;
      aiter_ = NULL;
    }
    if (match_type_ == MATCH_NONE) LOG(FATAL) << "TableMatcher: bad match type";
    s_ = s;
    std::vector<ArcId> *const empty =
        ((std::vector<ArcId> *)(NULL)) + 1;  // special marker.
    if (static_cast<size_t>(s) >= tables_.size()) {
      assert(s >= 0);
      tables_.resize(s + 1, NULL);
    }
    std::vector<ArcId> *&this_table_ = tables_[s];  // note: ref to ptr.
    if (this_table_ == empty) {
      backoff_matcher_.SetState(s);
      return;
    } else if (this_table_ == NULL) {  // NULL means has not been set.
      ArcId num_arcs = fst_->NumArcs(s);
      if (num_arcs == 0 || num_arcs < opts_.min_table_size) {
        this_table_ = empty;
        backoff_matcher_.SetState(s);
        return;
      }
      ArcIterator<FST> aiter(*fst_, s);
      aiter.SetFlags(
          kArcNoCache |
              (match_type_ == MATCH_OUTPUT ? kArcOLabelValue : kArcILabelValue),
          kArcNoCache | kArcValueFlags);
      // the statement above, says: "Don't cache stuff; and I only need the
      // ilabel/olabel to be computed.
      aiter.Seek(num_arcs - 1);
      Label highest_label =
          (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
                                       : aiter.Value().ilabel);
      if ((highest_label + 1) * opts_.table_ratio > num_arcs) {
        this_table_ = empty;
        backoff_matcher_.SetState(s);
        return;  // table would be too sparse.
      }
      // OK, now we are creating the table.
      this_table_ = new std::vector<ArcId>(highest_label + 1, kNoStateId);
      ArcId pos = 0;
      for (aiter.Seek(0); !aiter.Done(); aiter.Next(), pos++) {
        Label label = (match_type_ == MATCH_OUTPUT ? aiter.Value().olabel
                                                   : aiter.Value().ilabel);
        assert(static_cast<size_t>(label) <=
               static_cast<size_t>(highest_label));  // also checks >= 0.
        if ((*this_table_)[label] == kNoStateId) (*this_table_)[label] = pos;
        // set this_table_[label] to first position where arc has this
        // label.
      }
    }
    // At this point in the code, this_table_ != NULL and != empty.
    aiter_ = new ArcIterator<FST>(*fst_, s);
    aiter_->SetFlags(kArcNoCache,
                     kArcNoCache);  // don't need to cache arcs as may only
    // need a small subset.
    loop_.nextstate = s;
    // aiter_ = NULL;
    // backoff_matcher_.SetState(s);
  }

  bool Find(Label match_label) {
    if (!aiter_) {
      return backoff_matcher_.Find(match_label);
    } else {
      match_label_ = match_label;
      current_loop_ = (match_label == 0);
      // kNoLabel means the implicit loop on the other FST --
      // matches real epsilons but not the self-loop.
      match_label_ = (match_label_ == kNoLabel ? 0 : match_label_);
      if (static_cast<size_t>(match_label_) < tables_[s_]->size() &&
          (*(tables_[s_]))[match_label_] != kNoStateId) {
        aiter_->Seek((*(tables_[s_]))[match_label_]);  // label exists.
        return true;
      }
      return current_loop_;
    }
  }
  const Arc &Value() const {
    if (aiter_)
      return current_loop_ ? loop_ : aiter_->Value();
    else
      return backoff_matcher_.Value();
  }

  void Next() {
    if (aiter_) {
      if (current_loop_)
        current_loop_ = false;
      else
        aiter_->Next();
    } else {
      backoff_matcher_.Next();
    }
  }

  bool Done() const {
    if (aiter_ != NULL) {
      if (current_loop_) return false;
      if (aiter_->Done()) return true;
      Label label = (match_type_ == MATCH_OUTPUT ? aiter_->Value().olabel
                                                 : aiter_->Value().ilabel);
      return (label != match_label_);
    } else {
      return backoff_matcher_.Done();
    }
  }
  const Arc &Value() {
    if (aiter_ != NULL) {
      return (current_loop_ ? loop_ : aiter_->Value());
    } else {
      return backoff_matcher_.Value();
    }
  }

  virtual TableMatcherImpl<FST> *Copy(bool safe = false) const {
    assert(0);  // shouldn't be called.  This is not a "real" matcher,
    // although we derive from MatcherBase for convenience.
    return NULL;
  }

  virtual uint64 Properties(uint64 props) const {
    return props;
  }  // simple matcher that does
     // not change its FST, so properties are properties of FST it is applied to

 private:
  virtual void SetState_(StateId s) { SetState(s); }
  virtual bool Find_(Label label) { return Find(label); }
  virtual bool Done_() const { return Done(); }
  virtual const Arc &Value_() const { return Value(); }
  virtual void Next_() { Next(); }

  MatchType match_type_;
  FST *fst_;
  bool current_loop_;
  Label match_label_;
  Arc loop_;
  ArcIterator<FST> *aiter_;
  StateId s_;
  std::vector<std::vector<ArcId> *> tables_;
  TableMatcherOptions opts_;
  BackoffMatcher backoff_matcher_;
};

template <class F, class BackoffMatcher = SortedMatcher<F> >
class TableMatcher : public MatcherBase<typename F::Arc> {
 public:
  typedef F FST;
  typedef typename F::Arc Arc;
  typedef typename Arc::Label Label;
  typedef typename Arc::StateId StateId;
  typedef StateId
      ArcId;  // Use this type to store arc offsets [it's actually size_t
  // in the Seek function of ArcIterator, but StateId should be big enough].
  typedef typename Arc::Weight Weight;
  typedef TableMatcherImpl<F, BackoffMatcher> Impl;

  TableMatcher(const FST &fst, MatchType match_type,
               const TableMatcherOptions &opts = TableMatcherOptions())
      : impl_(std::make_shared<Impl>(fst, match_type, opts)) {}

  TableMatcher(const TableMatcher<FST, BackoffMatcher> &matcher,
               bool safe = false)
      : impl_(matcher.impl_) {
    if (safe == true) {
      LOG(FATAL) << "TableMatcher: Safe copy not supported";
    }
  }

  virtual const FST &GetFst() const { return impl_->GetFst(); }

  virtual MatchType Type(bool test) const { return impl_->Type(test); }

  void SetState(StateId s) { return impl_->SetState(s); }

  bool Find(Label match_label) { return impl_->Find(match_label); }

  const Arc &Value() const { return impl_->Value(); }

  void Next() { return impl_->Next(); }

  bool Done() const { return impl_->Done(); }

  const Arc &Value() { return impl_->Value(); }

  virtual TableMatcher<FST, BackoffMatcher> *Copy(bool safe = false) const {
    return new TableMatcher<FST, BackoffMatcher>(*this, safe);
  }

  virtual uint64 Properties(uint64 props) const {
    return impl_->Properties(props);
  }  // simple matcher that does
     // not change its FST, so properties are properties of FST it is applied to
 private:
  std::shared_ptr<Impl> impl_;

  virtual void SetState_(StateId s) { impl_->SetState(s); }
  virtual bool Find_(Label label) { return impl_->Find(label); }
  virtual bool Done_() const { return impl_->Done(); }
  virtual const Arc &Value_() const { return impl_->Value(); }
  virtual void Next_() { impl_->Next(); }

  TableMatcher &operator=(const TableMatcher &) = delete;
};

struct TableComposeOptions : public TableMatcherOptions {
  bool connect;               // Connect output
  ComposeFilter filter_type;  // Which pre-defined filter to use
  MatchType table_match_type;

  explicit TableComposeOptions(const TableMatcherOptions &mo, bool c = true,
                               ComposeFilter ft = SEQUENCE_FILTER,
                               MatchType tms = MATCH_OUTPUT)
      : TableMatcherOptions(mo),
        connect(c),
        filter_type(ft),
        table_match_type(tms) {}
  TableComposeOptions()
      : connect(true),
        filter_type(SEQUENCE_FILTER),
        table_match_type(MATCH_OUTPUT) {}
};

template <class Arc>
void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
                  MutableFst<Arc> *ofst,
                  const TableComposeOptions &opts = TableComposeOptions()) {
  typedef Fst<Arc> F;
  CacheOptions nopts;
  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
  if (opts.table_match_type == MATCH_OUTPUT) {
    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
    impl_opts.matcher1 = new TableMatcher<F>(ifst1, MATCH_OUTPUT, opts);
    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
  } else {
    assert(opts.table_match_type == MATCH_INPUT);
    // ComposeFstImplOptions templated on matcher for fst1, matcher for fst2.
    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
    impl_opts.matcher2 = new TableMatcher<F>(ifst2, MATCH_INPUT, opts);
    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
  }
  if (opts.connect) Connect(ofst);
}

/// TableComposeCache lets us do multiple compositions while caching the same
/// matcher.
template <class F>
struct TableComposeCache {
  TableMatcher<F> *matcher;
  TableComposeOptions opts;
  explicit TableComposeCache(
      const TableComposeOptions &opts = TableComposeOptions())
      : matcher(NULL), opts(opts) {}
  ~TableComposeCache() { delete (matcher); }
};

template <class Arc>
void TableCompose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
                  MutableFst<Arc> *ofst, TableComposeCache<Fst<Arc> > *cache) {
  typedef Fst<Arc> F;
  assert(cache != NULL);
  CacheOptions nopts;
  nopts.gc_limit = 0;  // Cache only the last state for fastest copy.
  if (cache->opts.table_match_type == MATCH_OUTPUT) {
    ComposeFstImplOptions<TableMatcher<F>, SortedMatcher<F> > impl_opts(nopts);
    if (cache->matcher == NULL)
      cache->matcher = new TableMatcher<F>(ifst1, MATCH_OUTPUT, cache->opts);
    impl_opts.matcher1 = cache->matcher->Copy();  // not passing "safe": may not
    // be thread-safe-- anway I don't understand this part.
    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
  } else {
    assert(cache->opts.table_match_type == MATCH_INPUT);
    ComposeFstImplOptions<SortedMatcher<F>, TableMatcher<F> > impl_opts(nopts);
    if (cache->matcher == NULL)
      cache->matcher = new TableMatcher<F>(ifst2, MATCH_INPUT, cache->opts);
    impl_opts.matcher2 = cache->matcher->Copy();
    *ofst = ComposeFst<Arc>(ifst1, ifst2, impl_opts);
  }
  if (cache->opts.connect) Connect(ofst);
}

}  // namespace fst

#endif  // KALDI_FSTEXT_TABLE_MATCHER_H_


================================================
FILE: runtime/engine/kaldi/lat/CMakeLists.txt
================================================

add_library(kaldi-lat
determinize-lattice-pruned.cc
lattice-functions.cc
)
target_link_libraries(kaldi-lat PUBLIC kaldi-util)

================================================
FILE: runtime/engine/kaldi/lat/determinize-lattice-pruned.cc
================================================
// lat/determinize-lattice-pruned.cc

// Copyright 2009-2012  Microsoft Corporation
//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
//                2014  Guoguo Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include <vector>
#include <climits>
#include "fstext/determinize-lattice.h" // for LatticeStringRepository
#include "fstext/fstext-utils.h"
#include "lat/lattice-functions.h"  // for PruneLattice
// #include "lat/minimize-lattice.h"   // for minimization
// #include "lat/push-lattice.h"       // for minimization
#include "lat/determinize-lattice-pruned.h"

namespace fst {

using std::vector;
using std::pair;
using std::greater;

// class LatticeDeterminizerPruned is templated on the same types that
// CompactLatticeWeight is templated on: the base weight (Weight), typically
// LatticeWeightTpl<float> etc. but could also be e.g. TropicalWeight, and the
// IntType, typically int32, used for the output symbols in the compact
// representation of strings [note: the output symbols would usually be
// p.d.f. id's in the anticipated use of this code] It has a special requirement
// on the Weight type: that there should be a Compare function on the weights
// such that Compare(w1, w2) returns -1 if w1 < w2, 0 if w1 == w2, and +1 if w1 >
// w2.  This requires that there be a total order on the weights.

template<class Weight, class IntType> class LatticeDeterminizerPruned {
 public:
  // Output to Gallic acceptor (so the strings go on weights, and there is a 1-1 correspondence
  // between our states and the states in ofst.  If destroy == true, release memory as we go
  // (but we cannot output again).

  typedef CompactLatticeWeightTpl<Weight, IntType> CompactWeight;
  typedef ArcTpl<CompactWeight> CompactArc; // arc in compact, acceptor form of lattice
  typedef ArcTpl<Weight> Arc; // arc in non-compact version of lattice

  // Output to standard FST with CompactWeightTpl<Weight> as its weight type (the
  // weight stores the original output-symbol strings).  If destroy == true,
  // release memory as we go (but we cannot output again).
  void Output(MutableFst<CompactArc>  *ofst, bool destroy = true) {
    KALDI_ASSERT(determinized_);
    typedef typename Arc::StateId StateId;
    StateId nStates = static_cast<StateId>(output_states_.size());
    if (destroy)
      FreeMostMemory();
    ofst->DeleteStates();
    ofst->SetStart(kNoStateId);
    if (nStates == 0) {
      return;
    }
    for (StateId s = 0;s < nStates;s++) {
      OutputStateId news = ofst->AddState();
      KALDI_ASSERT(news == s);
    }
    ofst->SetStart(0);
    // now process transitions.
    for (StateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
      OutputState &this_state = *(output_states_[this_state_id]);
      vector<TempArc> &this_vec(this_state.arcs);
      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();

      for (;iter != end; ++iter) {
        const TempArc &temp_arc(*iter);
        CompactArc new_arc;
        vector<Label> olabel_seq;
        repository_.ConvertToVector(temp_arc.string, &olabel_seq);
        CompactWeight weight(temp_arc.weight, olabel_seq);
        if (temp_arc.nextstate == kNoStateId) {  // is really final weight.
          ofst->SetFinal(this_state_id, weight);
        } else {  // is really an arc.
          new_arc.nextstate = temp_arc.nextstate;
          new_arc.ilabel = temp_arc.ilabel;
          new_arc.olabel = temp_arc.ilabel;  // acceptor.  input == output.
          new_arc.weight = weight;  // includes string and weight.
          ofst->AddArc(this_state_id, new_arc);
        }
      }
      // Free up memory.  Do this inside the loop as ofst is also allocating memory,
      // and we want to reduce the maximum amount ever allocated.
      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
    }
    if (destroy) {
      FreeOutputStates();
      repository_.Destroy();
    }
  }

  // Output to standard FST with Weight as its weight type.  We will create extra
  // states to handle sequences of symbols on the output.  If destroy == true,
  // release memory as we go (but we cannot output again).
  void  Output(MutableFst<Arc> *ofst, bool destroy = true) {
    // Outputs to standard fst.
    OutputStateId nStates = static_cast<OutputStateId>(output_states_.size());
    ofst->DeleteStates();
    if (nStates == 0) {
      ofst->SetStart(kNoStateId);
      return;
    }
    if (destroy)
      FreeMostMemory();
    // Add basic states-- but we will add extra ones to account for strings on output.
    for (OutputStateId s = 0; s< nStates;s++) {
      OutputStateId news = ofst->AddState();
      KALDI_ASSERT(news == s);
    }
    ofst->SetStart(0);
    for (OutputStateId this_state_id = 0; this_state_id < nStates; this_state_id++) {
      OutputState &this_state = *(output_states_[this_state_id]);
      vector<TempArc> &this_vec(this_state.arcs);

      typename vector<TempArc>::const_iterator iter = this_vec.begin(), end = this_vec.end();
      for (; iter != end; ++iter) {
        const TempArc &temp_arc(*iter);
        vector<Label> seq;
        repository_.ConvertToVector(temp_arc.string, &seq);

        if (temp_arc.nextstate == kNoStateId) {  // Really a final weight.
          // Make a sequence of states going to a final state, with the strings
          // as labels.  Put the weight on the first arc.
          OutputStateId cur_state = this_state_id;
          for (size_t i = 0; i < seq.size(); i++) {
            OutputStateId next_state = ofst->AddState();
            Arc arc;
            arc.nextstate = next_state;
            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
            arc.ilabel = 0;  // epsilon.
            arc.olabel = seq[i];
            ofst->AddArc(cur_state, arc);
            cur_state = next_state;
          }
          ofst->SetFinal(cur_state, (seq.size() == 0 ? temp_arc.weight : Weight::One()));
        } else {  // Really an arc.
          OutputStateId cur_state = this_state_id;
          // Have to be careful with this integer comparison (i+1 < seq.size()) because unsigned.
          // i < seq.size()-1 could fail for zero-length sequences.
          for (size_t i = 0; i+1 < seq.size();i++) {
            // for all but the last element of seq, create new state.
            OutputStateId next_state = ofst->AddState();
            Arc arc;
            arc.nextstate = next_state;
            arc.weight = (i == 0 ? temp_arc.weight : Weight::One());
            arc.ilabel = (i == 0 ? temp_arc.ilabel : 0);  // put ilabel on first element of seq.
            arc.olabel = seq[i];
            ofst->AddArc(cur_state, arc);
            cur_state = next_state;
          }
          // Add the final arc in the sequence.
          Arc arc;
          arc.nextstate = temp_arc.nextstate;
          arc.weight = (seq.size() <= 1 ? temp_arc.weight : Weight::One());
          arc.ilabel = (seq.size() <= 1 ? temp_arc.ilabel : 0);
          arc.olabel = (seq.size() > 0 ? seq.back() : 0);
          ofst->AddArc(cur_state, arc);
        }
      }
      // Free up memory.  Do this inside the loop as ofst is also allocating memory
      if (destroy) { vector<TempArc> temp; temp.swap(this_vec); }
    }
    if (destroy) {
      FreeOutputStates();
      repository_.Destroy();
    }
  }


  // Initializer.  After initializing the object you will typically
  // call Determinize() and then call one of the Output functions.
  // Note: ifst.Copy() will generally do a
  // shallow copy.  We do it like this for memory safety, rather than
  // keeping a reference or pointer to ifst_.
  LatticeDeterminizerPruned(const ExpandedFst<Arc> &ifst,
                            double beam,
                            DeterminizeLatticePrunedOptions opts):
      num_arcs_(0), num_elems_(0), ifst_(ifst.Copy()), beam_(beam), opts_(opts),
      equal_(opts_.delta), determinized_(false),
      minimal_hash_(3, hasher_, equal_), initial_hash_(3, hasher_, equal_) {
    KALDI_ASSERT(Weight::Properties() & kIdempotent); // this algorithm won't
    // work correctly otherwise.
  }

  void FreeOutputStates() {
    for (size_t i = 0; i < output_states_.size(); i++)
      delete output_states_[i];
    vector<OutputState*> temp;
    temp.swap(output_states_);
  }

  // frees all memory except the info (in output_states_[ ]->arcs)
  // that we need to output the FST.
  void FreeMostMemory() {
    if (ifst_) {
      delete ifst_;
      ifst_ = NULL;
    }
    { MinimalSubsetHash tmp; tmp.swap(minimal_hash_); }

    for (size_t i = 0; i < output_states_.size(); i++) {
      vector<Element> empty_subset;
      empty_subset.swap(output_states_[i]->minimal_subset);
    }

    for (typename InitialSubsetHash::iterator iter = initial_hash_.begin();
         iter != initial_hash_.end(); ++iter)
      delete iter->first;
    { InitialSubsetHash tmp; tmp.swap(initial_hash_); }
    for (size_t i = 0; i < output_states_.size(); i++) {
      vector<Element> tmp;
      tmp.swap(output_states_[i]->minimal_subset);
    }
    { vector<char> tmp;  tmp.swap(isymbol_or_final_); }
    { // Free up the queue.  I'm not sure how to make sure all
      // the memory is really freed (no swap() function)... doesn't really
      // matter much though.
      while (!queue_.empty()) {
        Task *t = queue_.top();
        delete t;
        queue_.pop();
      }
    }
    { vector<pair<Label, Element> > tmp; tmp.swap(all_elems_tmp_); }
  }

  ~LatticeDeterminizerPruned() {
    FreeMostMemory();
    FreeOutputStates();
    // rest is deleted by destructors.
  }

  void RebuildRepository() { // rebuild the string repository,
    // freeing stuff we don't need.. we call this when memory usage
    // passes a supplied threshold.  We need to accumulate all the
    // strings we need the repository to "remember", then tell it
    // to clean the repository.
    std::vector<StringId> needed_strings;
    for (size_t i = 0; i < output_states_.size(); i++) {
      AddStrings(output_states_[i]->minimal_subset, &needed_strings);
      for (size_t j = 0; j < output_states_[i]->arcs.size(); j++)
        needed_strings.push_back(output_states_[i]->arcs[j].string);
    }

    { // the queue doesn't allow us access to the underlying vector,
      // so we have to resort to a temporary collection.
      std::vector<Task*> tasks;
      while (!queue_.empty()) {
        Task *task = queue_.top();
        queue_.pop();
        tasks.push_back(task);
        AddStrings(task->subset, &needed_strings);
      }
      for (size_t i = 0; i < tasks.size(); i++)
        queue_.push(tasks[i]);
    }

    // the following loop covers strings present in initial_hash_.
    for (typename InitialSubsetHash::const_iterator
             iter = initial_hash_.begin();
         iter != initial_hash_.end(); ++iter) {
      const vector<Element> &vec = *(iter->first);
      Element elem = iter->second;
      AddStrings(vec, &needed_strings);
      needed_strings.push_back(elem.string);
    }
    std::sort(needed_strings.begin(), needed_strings.end());
    needed_strings.erase(std::unique(needed_strings.begin(),
                                     needed_strings.end()),
                         needed_strings.end()); // uniq the strings.
    KALDI_LOG << "Rebuilding repository.";

    repository_.Rebuild(needed_strings);
  }

  bool CheckMemoryUsage() {
    int32 repo_size = repository_.MemSize(),
        arcs_size = num_arcs_ * sizeof(TempArc),
        elems_size = num_elems_ * sizeof(Element),
        total_size = repo_size + arcs_size + elems_size;
    if (opts_.max_mem > 0 && total_size > opts_.max_mem) { // We passed the memory threshold.
      // This is usually due to the repository getting large, so we
      // clean this out.
      RebuildRepository();
      int32 new_repo_size = repository_.MemSize(),
          new_total_size = new_repo_size + arcs_size + elems_size;

      KALDI_VLOG(2) << "Rebuilt repository in determinize-lattice: repository shrank from "
                    << repo_size << " to " << new_repo_size << " bytes (approximately)";

      if (new_total_size > static_cast<int32>(opts_.max_mem * 0.8)) {
        // Rebuilding didn't help enough-- we need a margin to stop
        // having to rebuild too often.  We'll just return to the user at
        // this point, with a partial lattice that's pruned tighter than
        // the specified beam.  Here we figure out what the effective
        // beam was.
        double effective_beam = beam_;
        if (!queue_.empty()) { // Note: queue should probably not be empty; we're
          // just being paranoid here.
          Task *task = queue_.top();
          double total_weight = backward_costs_[ifst_->Start()]; // best weight of FST.
          effective_beam = task->priority_cost - total_weight;
        }
        KALDI_WARN << "Did not reach requested beam in determinize-lattice: "
                   << "size exceeds maximum " << opts_.max_mem
                   << " bytes; (repo,arcs,elems) = (" << repo_size << ","
                   << arcs_size << "," << elems_size
                   << "), after rebuilding, repo size was " << new_repo_size
                   << ", effective beam was " << effective_beam
                   << " vs. requested beam " << beam_;
        return false;
      }
    }
    return true;
  }

  bool Determinize(double *effective_beam) {
    KALDI_ASSERT(!determinized_);
    // This determinizes the input fst but leaves it in the "special format"
    // in "output_arcs_".  Must be called after Initialize().  To get the
    // output, call one of the Output routines.

    InitializeDeterminization(); // some start-up tasks.
    while (!queue_.empty()) {
      Task *task = queue_.top();
      // Note: the queue contains only tasks that are "within the beam".
      // We also have to check whether we have reached one of the user-specified
      // maximums, of estimated memory, arcs, or states.  The condition for
      // ending is:
      // num-states is more than user specified, OR
      // num-arcs is more than user specified, OR
      // memory passed a user-specified threshold and cleanup failed
      //  to get it below that threshold.
      size_t num_states = output_states_.size();
      if ((opts_.max_states > 0 && num_states > opts_.max_states) ||
          (opts_.max_arcs > 0 && num_arcs_ > opts_.max_arcs) ||
          (num_states % 10 == 0 && !CheckMemoryUsage())) { // note: at some point
        // it was num_states % 100, not num_states % 10, but I encountered an example
        // where memory was exhausted before we reached state #100.
        KALDI_VLOG(1) << "Lattice determinization terminated but not "
                      << " because of lattice-beam.  (#states, #arcs) is ( "
                      << output_states_.size() << ", " << num_arcs_
                      << " ), versus limits ( " << opts_.max_states << ", "
                      << opts_.max_arcs << " ) (else, may be memory limit).";
        break;
        // we terminate the determinization here-- whatever we already expanded is
        // what we'll return...  because we expanded stuff in order of total
        // (forward-backward) weight, the stuff we returned first is the most
        // important.
      }
      queue_.pop();
      ProcessTransition(task->state, task->label, &(task->subset));
      delete task;
    }
    determinized_ = true;
    if (effective_beam != NULL) {
      if (queue_.empty()) *effective_beam = beam_;
      else
        *effective_beam = queue_.top()->priority_cost -
            backward_costs_[ifst_->Start()];
    }
    return (queue_.empty()); // return success if queue was empty, i.e. we processed
    // all tasks and did not break out of the loop early due to reaching a memory,
    // arc or state limit.
  }
 private:

  typedef typename Arc::Label Label;
  typedef typename Arc::StateId StateId;  // use this when we don't know if it's input or output.
  typedef typename Arc::StateId InputStateId;  // state in the input FST.
  typedef typename Arc::StateId OutputStateId;  // same as above but distinguish
                                                // states in output Fst.

  typedef LatticeStringRepository<IntType> StringRepositoryType;
  typedef const typename StringRepositoryType::Entry* StringId;

  // Element of a subset [of original states]
  struct Element {
    StateId state; // use StateId as this is usually InputStateId but in one case
                   // OutputStateId.
    StringId string;
    Weight weight;
    bool operator != (const Element &other) const {
      return (state != other.state || string != other.string ||
              weight != other.weight);
    }
    // This operator is only intended for the priority_queue in the function
    // EpsilonClosure().
    bool operator > (const Element &other) const {
      return state > other.state;
    }
    // This operator is only intended to support sorting in EpsilonClosure()
    bool operator < (const Element &other) const {
      return state < other.state;
    }
  };

  // Arcs in the format we temporarily create in this class (a representation, essentially of
  // a Gallic Fst).
  struct TempArc {
    Label ilabel;
    StringId string;  // Look it up in the StringRepository, it's a sequence of Labels.
    OutputStateId nextstate;  // or kNoState for final weights.
    Weight weight;
  };

  // Hashing function used in hash of subsets.
  // A subset is a pointer to vector<Element>.
  // The Elements are in sorted order on state id, and without repeated states.
  // Because the order of Elements is fixed, we can use a hashing function that is
  // order-dependent.  However the weights are not included in the hashing function--
  // we hash subsets that differ only in weight to the same key.  This is not optimal
  // in terms of the O(N) performance but typically if we have a lot of determinized
  // states that differ only in weight then the input probably was pathological in some way,
  // or even non-determinizable.
  //   We don't quantize the weights, in order to avoid inexactness in simple cases.
  // Instead we apply the delta when comparing subsets for equality, and allow a small
  // difference.

  class SubsetKey {
   public:
    size_t operator ()(const vector<Element> * subset) const {  // hashes only the state and string.
      size_t hash = 0, factor = 1;
      for (typename vector<Element>::const_iterator iter= subset->begin(); iter != subset->end(); ++iter) {
        hash *= factor;
        hash += iter->state + reinterpret_cast<size_t>(iter->string);
        factor *= 23531;  // these numbers are primes.
      }
      return hash;
    }
  };

  // This is the equality operator on subsets.  It checks for exact match on state-id
  // and string, and approximate match on weights.
  class SubsetEqual {
   public:
    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
      size_t sz = s1->size();
      KALDI_ASSERT(sz>=0);
      if (sz != s2->size()) return false;
      typename vector<Element>::const_iterator iter1 = s1->begin(),
          iter1_end = s1->end(), iter2=s2->begin();
      for (; iter1 < iter1_end; ++iter1, ++iter2) {
        if (iter1->state != iter2->state ||
           iter1->string != iter2->string ||
            ! ApproxEqual(iter1->weight, iter2->weight, delta_)) return false;
      }
      return true;
    }
    float delta_;
    SubsetEqual(float delta): delta_(delta) {}
    SubsetEqual(): delta_(kDelta) {}
  };

  // Operator that says whether two Elements have the same states.
  // Used only for debug.
  class SubsetEqualStates {
   public:
    bool operator ()(const vector<Element> * s1, const vector<Element> * s2) const {
      size_t sz = s1->size();
      KALDI_ASSERT(sz>=0);
      if (sz != s2->size()) return false;
      typename vector<Element>::const_iterator iter1 = s1->begin(),
          iter1_end = s1->end(), iter2=s2->begin();
      for (; iter1 < iter1_end; ++iter1, ++iter2) {
        if (iter1->state != iter2->state) return false;
      }
      return true;
    }
  };

  // Define the hash type we use to map subsets (in minimal
  // representation) to OutputStateId.
  typedef unordered_map<const vector<Element>*, OutputStateId,
                        SubsetKey, SubsetEqual> MinimalSubsetHash;

  // Define the hash type we use to map subsets (in initial
  // representation) to OutputStateId, together with an
  // extra weight. [note: we interpret the Element.state in here
  // as an OutputStateId even though it's declared as InputStateId;
  // these types are the same anyway].
  typedef unordered_map<const vector<Element>*, Element,
                        SubsetKey, SubsetEqual> InitialSubsetHash;


  // converts the representation of the subset from canonical (all states) to
  // minimal (only states with output symbols on arcs leaving them, and final
  // states).  Output is not necessarily normalized, even if input_subset was.
  void ConvertToMinimal(vector<Element> *subset) {
    KALDI_ASSERT(!subset->empty());
    typename vector<Element>::iterator cur_in = subset->begin(),
        cur_out = subset->begin(), end = subset->end();
    while (cur_in != end) {
      if(IsIsymbolOrFinal(cur_in->state)) {  // keep it...
        *cur_out = *cur_in;
        cur_out++;
      }
      cur_in++;
    }
    subset->resize(cur_out - subset->begin());
  }

  // Takes a minimal, normalized subset, and converts it to an OutputStateId.
  // Involves a hash lookup, and possibly adding a new OutputStateId.
  // If it creates a new OutputStateId, it creates a new record for it, works
  // out its final-weight, and puts stuff on the queue relating to its
  // transitions.
  OutputStateId MinimalToStateId(const vector<Element> &subset,
                                 const double forward_cost) {
    typename MinimalSubsetHash::const_iterator iter
        = minimal_hash_.find(&subset);
    if (iter != minimal_hash_.end()) { // Found a matching subset.
      OutputStateId state_id = iter->second;
      const OutputState &state = *(output_states_[state_id]);
      // Below is just a check that the algorithm is working...
      if (forward_cost < state.forward_cost - 0.1) {
        // for large weights, this check could fail due to roundoff.
        KALDI_WARN << "New cost is less (check the difference is small) "
                   << forward_cost << ", "
                   << state.forward_cost;
      }
      return state_id;
    }
    OutputStateId state_id = static_cast<OutputStateId>(output_states_.size());
    OutputState *new_state = new OutputState(subset, forward_cost);
    minimal_hash_[&(new_state->minimal_subset)] = state_id;
    output_states_.push_back(new_state);
    num_elems_ += subset.size();
    // Note: in the previous algorithm, we pushed the new state-id onto the queue
    // at this point.  Here, the queue happens elsewhere, and we directly process
    // the state (which result in stuff getting added to the queue).
    ProcessFinal(state_id); // will work out the final-prob.
    ProcessTransitions(state_id); // will process transitions and add stuff to the queue.
    return state_id;
  }


  // Given a normalized initial subset of elements (i.e. before epsilon closure),
  // compute the corresponding output-state.
  OutputStateId InitialToStateId(const vector<Element> &subset_in,
                                 double forward_cost,
                                 Weight *remaining_weight,
                                 StringId *common_prefix) {
    typename InitialSubsetHash::const_iterator iter
        = initial_hash_.find(&subset_in);
    if (iter != initial_hash_.end()) { // Found a matching subset.
      const Element &elem = iter->second;
      *remaining_weight = elem.weight;
      *common_prefix = elem.string;
      if (elem.weight == Weight::Zero())
        KALDI_WARN << "Zero weight!";
      return elem.state;
    }
    // else no matching subset-- have to work it out.
    vector<Element> subset(subset_in);
    // Follow through epsilons.  Will add no duplicate states.  note: after
    // EpsilonClosure, it is the same as "canonical" subset, except not
    // normalized (actually we never compute the normalized canonical subset,
    // only the normalized minimal one).
    EpsilonClosure(&subset); // follow epsilons.
    ConvertToMinimal(&subset); // remove all but emitting and final states.

    Element elem; // will be used to store remaining weight and string, and
                 // OutputStateId, in initial_hash_;
    NormalizeSubset(&subset, &elem.weight, &elem.string); // normalize subset; put
    // common string and weight in "elem".  The subset is now a minimal,
    // normalized subset.

    forward_cost += ConvertToCost(elem.weight);
    OutputStateId ans = MinimalToStateId(subset, forward_cost);
    *remaining_weight = elem.weight;
    *common_prefix = elem.string;
    if (elem.weight == Weight::Zero())
      KALDI_WARN << "Zero weight!";

    // Before returning "ans", add the initial subset to the hash,
    // so that we can bypass the epsilon-closure etc., next time
    // we process the same initial subset.
    vector<Element> *initial_subset_ptr = new vector<Element>(subset_in);
    elem.state = ans;
    initial_hash_[initial_subset_ptr] = elem;
    num_elems_ += initial_subset_ptr->size(); // keep track of memory usage.
    return ans;
  }

  // returns the Compare value (-1 if a < b, 0 if a == b, 1 if a > b) according
  // to the ordering we defined on strings for the CompactLatticeWeightTpl.
  // see function
  // inline int Compare (const CompactLatticeWeightTpl<WeightType,IntType> &w1,
  //                     const CompactLatticeWeightTpl<WeightType,IntType> &w2)
  // in lattice-weight.h.
  // this is the same as that, but optimized for our data structures.
  inline int Compare(const Weight &a_w, StringId a_str,
                     const Weight &b_w, StringId b_str) const {
    int weight_comp = fst::Compare(a_w, b_w);
    if (weight_comp != 0) return weight_comp;
    // now comparing strings.
    if (a_str == b_str) return 0;
    vector<IntType> a_vec, b_vec;
    repository_.ConvertToVector(a_str, &a_vec);
    repository_.ConvertToVector(b_str, &b_vec);
    // First compare their lengths.
    int a_len = a_vec.size(), b_len = b_vec.size();
    // use opposite order on the string lengths (c.f. Compare in
    // lattice-weight.h)
    if (a_len > b_len) return -1;
    else if (a_len < b_len) return 1;
    for(int i = 0; i < a_len; i++) {
      if (a_vec[i] < b_vec[i]) return -1;
      else if (a_vec[i] > b_vec[i]) return 1;
    }
    KALDI_ASSERT(0); // because we checked if a_str == b_str above, shouldn't reach here
    return 0;
  }

  // This function computes epsilon closure of subset of states by following epsilon links.
  // Called by InitialToStateId and Initialize.
  // Has no side effects except on the string repository.  The "output_subset" is not
  // necessarily normalized (in the sense of there being no common substring), unless
  // input_subset was.
  void EpsilonClosure(vector<Element> *subset) {
    // at input, subset must have only one example of each StateId.  [will still
    // be so at output].  This function follows input-epsilons, and augments the
    // subset accordingly.

    std::priority_queue<Element, vector<Element>, greater<Element> > queue;
    unordered_map<InputStateId, Element> cur_subset;
    typedef typename unordered_map<InputStateId, Element>::iterator MapIter;
    typedef typename vector<Element>::const_iterator VecIter;

    for (VecIter iter = subset->begin(); iter != subset->end(); ++iter) {
      queue.push(*iter);
      cur_subset[iter->state] = *iter;
    }

    // find whether input fst is known to be sorted on input label.
    bool sorted = ((ifst_->Properties(kILabelSorted, false) & kILabelSorted) != 0);
    bool replaced_elems = false; // relates to an optimization, see below.
    int counter = 0; // stops infinite loops here for non-lattice-determinizable input
    // (e.g. input with negative-cost epsilon loops); useful in testing.
    while (queue.size() != 0) {
      Element elem = queue.top();
      queue.pop();

      // The next if-statement is a kind of optimization.  It's to prevent us
      // unnecessarily repeating the processing of a state.  "cur_subset" always
      // contains only one Element with a particular state.  The issue is that
      // whenever we modify the Element corresponding to that state in "cur_subset",
      // both the new (optimal) and old (less-optimal) Element will still be in
      // "queue".  The next if-statement stops us from wasting compute by
      // processing the old Element.
      if (replaced_elems && cur_subset[elem.state] != elem)
        continue;
      if (opts_.max_loop > 0 && counter++ > opts_.max_loop) {
        KALDI_ERR << "Lattice determinization aborted since looped more than "
                  << opts_.max_loop << " times during epsilon closure.";
      }
      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); !aiter.Done(); aiter.Next()) {
        const Arc &arc = aiter.Value();
        if (sorted && arc.ilabel != 0) break;  // Break from the loop: due to sorting there will be no
        // more transitions with epsilons as input labels.
        if (arc.ilabel == 0
            && arc.weight != Weight::Zero()) {  // Epsilon transition.
          Element next_elem;
          next_elem.state = arc.nextstate;
          next_elem.weight = Times(elem.weight, arc.weight);
          // next_elem.string is not set up yet... create it only
          // when we know we need it (this is an optimization)

          MapIter iter = cur_subset.find(next_elem.state);
          if (iter == cur_subset.end()) {
            // was no such StateId: insert and add to queue.
            next_elem.string = (arc.olabel == 0 ? elem.string :
                                repository_.Successor(elem.string, arc.olabel));
            cur_subset[next_elem.state] = next_elem;
            queue.push(next_elem);
          } else {
            // was not inserted because one already there.  In normal
            // determinization we'd add the weights.  Here, we find which one
            // has the better weight, and keep its corresponding string.
            int comp = fst::Compare(next_elem.weight, iter->second.weight);
            if (comp == 0) { // A tie on weights.  This should be a rare case;
                             // we don't optimize for it.
              next_elem.string = (arc.olabel == 0 ? elem.string :
                                  repository_.Successor(elem.string,
                                                        arc.olabel));
              comp = Compare(next_elem.weight, next_elem.string,
                             iter->second.weight, iter->second.string);
            }
            if(comp == 1) { // next_elem is better, so use its (weight, string)
              next_elem.string = (arc.olabel == 0 ? elem.string :
                                  repository_.Successor(elem.string, arc.olabel));
              iter->second.string = next_elem.string;
              iter->second.weight = next_elem.weight;
              queue.push(next_elem);
              replaced_elems = true;
            }
            // else it is the same or worse, so use original one.
          }
        }
      }
    }

    { // copy cur_subset to subset.
      subset->clear();
      subset->reserve(cur_subset.size());
      MapIter iter = cur_subset.begin(), end = cur_subset.end();
      for (; iter != end; ++iter) subset->push_back(iter->second);
      // sort by state ID, because the subset hash function is order-dependent(see SubsetKey)
      std::sort(subset->begin(), subset->end());
    }
  }


  // This function works out the final-weight of the determinized state.
  // called by ProcessSubset.
  // Has no side effects except on the variable repository_, and
  // output_states_[output_state_id].arcs

  void ProcessFinal(OutputStateId output_state_id) {
    OutputState &state = *(output_states_[output_state_id]);
    const vector<Element> &minimal_subset = state.minimal_subset;
    // processes final-weights for this subset.  state.minimal_subset_ may be
    // empty if the graphs is not connected/trimmed, I think, do don't check
    // that it's nonempty.
    StringId final_string = repository_.EmptyString();  // set it to keep the
    // compiler happy; if it doesn't get set in the loop, we won't use the value anyway.
    Weight final_weight = Weight::Zero();
    bool is_final = false;
    typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
    for (; iter != end; ++iter) {
      const Element &elem = *iter;
      Weight this_final_weight = Times(elem.weight, ifst_->Final(elem.state));
      StringId this_final_string = elem.string;
      if (this_final_weight != Weight::Zero() &&
         (!is_final || Compare(this_final_weight, this_final_string,
                               final_weight, final_string) == 1)) { // the new
        // (weight, string) pair is more in semiring than our current
        // one.
        is_final = true;
        final_weight = this_final_weight;
        final_string = this_final_string;
      }
    }
    if (is_final &&
        ConvertToCost(final_weight) + state.forward_cost <= cutoff_) {
      // store final weights in TempArc structure, just like a transition.
      // Note: we only store the final-weight if it's inside the pruning beam, hence
      // the stuff with Compare.
      TempArc temp_arc;
      temp_arc.ilabel = 0;
      temp_arc.nextstate = kNoStateId;  // special marker meaning "final weight".
      temp_arc.string = final_string;
      temp_arc.weight = final_weight;
      state.arcs.push_back(temp_arc);
      num_arcs_++;
    }
  }

  // NormalizeSubset normalizes the subset "elems" by
  // removing any common string prefix (putting it in common_str),
  // and dividing by the total weight (putting it in tot_weight).
  void NormalizeSubset(vector<Element> *elems,
                       Weight *tot_weight,
                       StringId *common_str) {
    if(elems->empty()) { // just set common_str, tot_weight
      // to defaults and return...
      KALDI_WARN << "empty subset";
      *common_str = repository_.EmptyString();
      *tot_weight = Weight::Zero();
      return;
    }
    size_t size = elems->size();
    vector<IntType> common_prefix;
    repository_.ConvertToVector((*elems)[0].string, &common_prefix);
    Weight weight = (*elems)[0].weight;
    for(size_t i = 1; i < size; i++) {
      weight = Plus(weight, (*elems)[i].weight);
      repository_.ReduceToCommonPrefix((*elems)[i].string, &common_prefix);
    }
    KALDI_ASSERT(weight != Weight::Zero()); // we made sure to ignore arcs with zero
    // weights on them, so we shouldn't have zero here.
    size_t prefix_len = common_prefix.size();
    for(size_t i = 0; i < size; i++) {
      (*elems)[i].weight = Divide((*elems)[i].weight, weight, DIVIDE_LEFT);
      (*elems)[i].string =
          repository_.RemovePrefix((*elems)[i].string, prefix_len);
    }
    *common_str = repository_.ConvertFromVector(common_prefix);
    *tot_weight = weight;
  }

  // Take a subset of Elements that is sorted on state, and
  // merge any Elements that have the same state (taking the best
  // (weight, string) pair in the semiring).
  void MakeSubsetUnique(vector<Element> *subset) {
    typedef typename vector<Element>::iterator IterType;

    // This KALDI_ASSERT is designed to fail (usually) if the subset is not sorted on
    // state.
    KALDI_ASSERT(subset->size() < 2 || (*subset)[0].state <= (*subset)[1].state);

    IterType cur_in = subset->begin(), cur_out = cur_in, end = subset->end();
    size_t num_out = 0;
    // Merge elements with same state-id
    while (cur_in != end) {  // while we have more elements to process.
      // At this point, cur_out points to location of next place we want to put an element,
      // cur_in points to location of next element we want to process.
      if (cur_in != cur_out) *cur_out = *cur_in;
      cur_in++;
      while (cur_in != end && cur_in->state == cur_out->state) {
        if (Compare(cur_in->weight, cur_in->string,
                   cur_out->weight, cur_out->string) == 1) {
          // if *cur_in > *cur_out in semiring, then take *cur_in.
          cur_out->string = cur_in->string;
          cur_out->weight = cur_in->weight;
        }
        cur_in++;
      }
      cur_out++;
      num_out++;
    }
    subset->resize(num_out);
  }

  // ProcessTransition was called from "ProcessTransitions" in the non-pruned
  // code, but now we in effect put the calls to ProcessTransition on a priority
  // queue, and it now gets called directly from Determinize().  This function
  // processes a transition from state "ostate_id".  The set "subset" of Elements
  // represents a set of next-states with associated weights and strings, each
  // one arising from an arc from some state in a determinized-state; the
  // next-states are unique (there is only one Entry assocated with each)
  void ProcessTransition(OutputStateId ostate_id, Label ilabel, vector<Element> *subset) {

    double forward_cost = output_states_[ostate_id]->forward_cost;
    StringId common_str;
    Weight tot_weight;
    NormalizeSubset(subset, &tot_weight, &common_str);
    forward_cost += ConvertToCost(tot_weight);

    OutputStateId nextstate;
    {
      Weight next_tot_weight;
      StringId next_common_str;
      nextstate = InitialToStateId(*subset,
                                   forward_cost,
                                   &next_tot_weight,
                                   &next_common_str);
      common_str = repository_.Concatenate(common_str, next_common_str);
      tot_weight = Times(tot_weight, next_tot_weight);
    }

    // Now add an arc to the next state (would have been created if necessary by
    // InitialToStateId).
    TempArc temp_arc;
    temp_arc.ilabel = ilabel;
    temp_arc.nextstate = nextstate;
    temp_arc.string = common_str;
    temp_arc.weight = tot_weight;
    output_states_[ostate_id]->arcs.push_back(temp_arc);  // record the arc.
    num_arcs_++;
  }


  // "less than" operator for pair<Label, Element>.   Used in ProcessTransitions.
  // Lexicographical order, which only compares the state when ordering the
  // "Element" member of the pair.

  class PairComparator {
   public:
    inline bool operator () (const pair<Label, Element> &p1, const pair<Label, Element> &p2) {
      if (p1.first < p2.first) return true;
      else if (p1.first > p2.first) return false;
      else {
        return p1.second.state < p2.second.state;
      }
    }
  };


  // ProcessTransitions processes emitting transitions (transitions with
  // ilabels) out of this subset of states.  It actualy only creates records
  // ("Task") that get added to the queue.  The transitions will be processed in
  // priority order from Determinize().  This function soes not consider final
  // states.  Partitions the emitting transitions up by ilabel (by sorting on
  // ilabel), and for each unique ilabel, it creates a Task record that contains
  // the information we need to process the transition.

  void ProcessTransitions(OutputStateId output_state_id) {
    const vector<Element> &minimal_subset = output_states_[output_state_id]->minimal_subset;
    // it's possible that minimal_subset could be empty if there are
    // unreachable parts of the graph, so don't check that it's nonempty.
    vector<pair<Label, Element> > &all_elems(all_elems_tmp_); // use class member
    // to avoid memory allocation/deallocation.
    {
      // Push back into "all_elems", elements corresponding to all
      // non-epsilon-input transitions out of all states in "minimal_subset".
      typename vector<Element>::const_iterator iter = minimal_subset.begin(), end = minimal_subset.end();
      for (;iter != end; ++iter) {
        const Element &elem = *iter;
        for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, elem.state); ! aiter.Done(); aiter.Next()) {
          const Arc &arc = aiter.Value();
          if (arc.ilabel != 0
              && arc.weight != Weight::Zero()) {  // Non-epsilon transition -- ignore epsilons here.
            pair<Label, Element> this_pr;
            this_pr.first = arc.ilabel;
            Element &next_elem(this_pr.second);
            next_elem.state = arc.nextstate;
            next_elem.weight = Times(elem.weight, arc.weight);
            if (arc.olabel == 0) // output epsilon
              next_elem.string = elem.string;
            else
              next_elem.string = repository_.Successor(elem.string, arc.olabel);
            all_elems.push_back(this_pr);
          }
        }
      }
    }
    PairComparator pc;
    std::sort(all_elems.begin(), all_elems.end(), pc);
    // now sorted first on input label, then on state.
    typedef typename vector<pair<Label, Element> >::const_iterator PairIter;
    PairIter cur = all_elems.begin(), end = all_elems.end();
    while (cur != end) {
      // The old code (non-pruned) called ProcessTransition; here, instead,
      // we'll put the calls into a priority queue.
      Task *task = new Task;
      // Process ranges that share the same input symbol.
      Label ilabel = cur->first;
      task->state = output_state_id;
      task->priority_cost = std::numeric_limits<double>::infinity();
      task->label = ilabel;
      while (cur != end && cur->first == ilabel) {
        task->subset.push_back(cur->second);
        const Element &element = cur->second;
        // Note: we'll later include the term "forward_cost" in the
        // priority_cost.
        task->priority_cost = std::min(task->priority_cost,
                                       ConvertToCost(element.weight) +
                                       backward_costs_[element.state]);
        cur++;
      }

      // After the command below, the "priority_cost" is a value comparable to
      // the total-weight of the input FST, like a total-path weight... of
      // course, it will typically be less (in the semiring) than that.
      // note: we represent it just as a double.
      task->priority_cost += output_states_[output_state_id]->forward_cost;

      if (task->priority_cost > cutoff_) {
        // This task would never get done as it's past the pruning cutoff.
        delete task;
      } else {
        MakeSubsetUnique(&(task->subset)); // remove duplicate Elements with the same state.
        queue_.push(task); // Push the task onto the queue.  The queue keeps it
        // in prioritized order, so we always process the one with the "best"
        // weight (highest in the semiring).

        { // this is a check.
          double best_cost = backward_costs_[ifst_->Start()],
              tolerance = 0.01 + 1.0e-04 * std::abs(best_cost);
          if (task->priority_cost < best_cost - tolerance) {
            KALDI_WARN << "Cost below best cost was encountered:"
                       << task->priority_cost << " < " << best_cost;
          }
        }
      }
    }
    all_elems.clear(); // as it's a reference to a class variable; we want it to stay
    // empty.
  }


  bool IsIsymbolOrFinal(InputStateId state) { // returns true if this state
    // of the input FST either is final or has an osymbol on an arc out of it.
    // Uses the vector isymbol_or_final_ as a cache for this info.
    KALDI_ASSERT(state >= 0);
    if (isymbol_or_final_.size() <= state)
      isymbol_or_final_.resize(state+1, static_cast<char>(OSF_UNKNOWN));
    if (isymbol_or_final_[state] == static_cast<char>(OSF_NO))
      return false;
    else if (isymbol_or_final_[state] == static_cast<char>(OSF_YES))
      return true;
    // else work it out...
    isymbol_or_final_[state] = static_cast<char>(OSF_NO);
    if (ifst_->Final(state) != Weight::Zero())
      isymbol_or_final_[state] = static_cast<char>(OSF_YES);
    for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, state);
         !aiter.Done();
         aiter.Next()) {
      const Arc &arc = aiter.Value();
      if (arc.ilabel != 0 && arc.weight != Weight::Zero()) {
        isymbol_or_final_[state] = static_cast<char>(OSF_YES);
        return true;
      }
    }
    return IsIsymbolOrFinal(state); // will only recurse once.
  }

  void ComputeBackwardWeight() {
    // Sets up the backward_costs_ array, and the cutoff_ variable.
    KALDI_ASSERT(beam_ > 0);

    // Only handle the toplogically sorted case.
    backward_costs_.resize(ifst_->NumStates());
    for (StateId s = ifst_->NumStates() - 1; s >= 0; s--) {
      double &cost = backward_costs_[s];
      cost = ConvertToCost(ifst_->Final(s));
      for (ArcIterator<ExpandedFst<Arc> > aiter(*ifst_, s);
           !aiter.Done(); aiter.Next()) {
        const Arc &arc = aiter.Value();
        cost = std::min(cost,
                        ConvertToCost(arc.weight) + backward_costs_[arc.nextstate]);
      }
    }

    if (ifst_->Start() == kNoStateId) return; // we'll be returning
    // an empty FST.

    double best_cost = backward_costs_[ifst_->Start()];
    if (best_cost == std::numeric_limits<double>::infinity())
      KALDI_WARN << "Total weight of input lattice is zero.";
    cutoff_ = best_cost + beam_;
  }

  void InitializeDeterminization() {
    // We insist that the input lattice be topologically sorted.  This is not a
    // fundamental limitation of the algorithm (which in principle should be
    // applicable to even cyclic FSTs), but it helps us more efficiently
    // compute the backward_costs_ array.  There may be some other reason we
    // require this, that escapes me at the moment.
    KALDI_ASSERT(ifst_->Properties(kTopSorted, true) != 0);
    ComputeBackwardWeight();
#if !(__GNUC__ == 4 && __GNUC_MINOR__ == 0)
    if(ifst_->Properties(kExpanded, false) != 0) { // if we know the number of
      // states in ifst_, it might be a bit more efficient
      // to pre-size the hashes so we're not constantly rebuilding them.
      StateId num_states =
          down_cast<const ExpandedFst<Arc>*, const Fst<Arc> >(ifst_)->NumStates();
      minimal_hash_.rehash(num_states/2 + 3);
      initial_hash_.rehash(num_states/2 + 3);
    }
#endif
    InputStateId start_id = ifst_->Start();
    if (start_id != kNoStateId) {
      /* Create determinized-state corresponding to the start state....
         Unlike all the other states, we don't "normalize" the representation
         of this determinized-state before we put it into minimal_hash_.  This is actually
         what we want, as otherwise we'd have problems dealing with any extra weight
         and string and might have to create a "super-initial" state which would make
         the output nondeterministic.  Normalization is only needed to make the
         determinized output more minimal anyway, it's not needed for correctness.
         Note, we don't put anything in the initial_hash_.  The initial_hash_ is only
         a lookaside buffer anyway, so this isn't a problem-- it will get populated
         later if it needs to be.
      */
      vector<Element> subset(1);
      subset[0].state = start_id;
      subset[0].weight = Weight::One();
      subset[0].string = repository_.EmptyString();  // Id of empty sequence.
      EpsilonClosure(&subset); // follow through epsilon-input links
      ConvertToMinimal(&subset); // remove all but final states and
      // states with input-labels on arcs out of them.
      // Weight::One() is the "forward-weight" of this determinized state...
      // i.e. the minimal cost from the start of the determinized FST to this
      // state [One() because it's the start state].
      OutputState *initial_state = new OutputState(subset, 0);
      KALDI_ASSERT(output_states_.empty());
      output_states_.push_back(initial_state);
      num_elems_ += subset.size();
      OutputStateId initial_state_id = 0;
      minimal_hash_[&(initial_state->minimal_subset)] = initial_state_id;
      ProcessFinal(initial_state_id);
      ProcessTransitions(initial_state_id); // this will add tasks to
      // the queue, which we'll start processing in Determinize().
    }
  }

  KALDI_DISALLOW_COPY_AND_ASSIGN(LatticeDeterminizerPruned);

  struct OutputState {
    vector<Element> minimal_subset;
    vector<TempArc> arcs; // arcs out of the state-- those that have been processed.
    // Note: the final-weight is included here with kNoStateId as the state id.  We
    // always process the final-weight regardless of the beam; when producing the
    // output we may have to ignore some of these.
    double forward_cost; // Represents minimal cost from start-state
    // to this state.  Used in prioritization of tasks, and pruning.
    // Note: we know this minimal cost from when we first create the OutputState;
    // this is because of the priority-queue we use, that ensures that the
    // "best" path into the state will be expanded first.
    OutputState(const vector<Element> &minimal_subset,
                double forward_cost): minimal_subset(minimal_subset),
                                      forward_cost(forward_cost) { }
  };

  vector<OutputState*> output_states_; // All the info about the output states.

  int num_arcs_; // keep track of memory usage: number of arcs in output_states_[ ]->arcs
  int num_elems_; // keep track of memory usage: number of elems in output_states_ and
  // the keys of initial_hash_

  const ExpandedFst<Arc> *ifst_;
  std::vector<double> backward_costs_; // This vector stores, for every state in ifst_,
  // the minimal cost to the end-state (i.e. the sum of weights; they are guaranteed to
  // have "take-the-minimum" semantics).  We get the double from the ConvertToCost()
  // function on the lattice weights.

  double beam_;
  double cutoff_; // beam plus total-weight of input (and note, the weight is
  // guaranteed to be "tropical-like" so the sum does represent a min-cost.

  DeterminizeLatticePrunedOptions opts_;
  SubsetKey hasher_;  // object that computes keys-- has no data members.
  SubsetEqual equal_;  // object that compares subsets-- only data member is delta_.
  bool determinized_; // set to true when user called Determinize(); used to make
  // sure this object is used correctly.
  MinimalSubsetHash minimal_hash_;  // hash from Subset to OutputStateId.  Subset is "minimal
                                    // representation" (only include final and states and states with
                                    // nonzero ilabel on arc out of them.  Owns the pointers
                                    // in its keys.
  InitialSubsetHash initial_hash_;   // hash from Subset to Element, which
                                     // represents the OutputStateId together
                                     // with an extra weight and string.  Subset
                                     // is "initial representation".  The extra
                                     // weight and string is needed because after
                                     // we convert to minimal representation and
                                     // normalize, there may be an extra weight
                                     // and string.  Owns the pointers
                                     // in its keys.

  struct Task {
    OutputStateId state; // State from which we're processing the transition.
    Label label; // Label on the transition we're processing out of this state.
    vector<Element> subset; // Weighted subset of states (with strings)-- not normalized.
    double priority_cost; // Cost used in deciding priority of tasks.  Note:
    // we assume there is a ConvertToCost() function that converts the semiring to double.
  };

  struct TaskCompare {
    inline int operator() (const Task *t1, const Task *t2) {
      // view this like operator <, which is the default template parameter
      // to std::priority_queue.
      // returns true if t1 is worse than t2.
      return (t1->priority_cost > t2->priority_cost);
    }
  };

  // This priority queue contains "Task"s to be processed; these correspond
  // to transitions out of determinized states.  We process these in priority
  // order according to the best weight of any path passing through these
  // determinized states... it's possible to work this out.
  std::priority_queue<Task*, vector<Task*>, TaskCompare> queue_;

  vector<pair<Label, Element> > all_elems_tmp_; // temporary vector used in ProcessTransitions.

  enum IsymbolOrFinal { OSF_UNKNOWN = 0, OSF_NO = 1, OSF_YES = 2 };

  vector<char> isymbol_or_final_; // A kind of cache; it says whether
  // each state is (emitting or final) where emitting means it has at least one
  // non-epsilon output arc.  Only accessed by IsIsymbolOrFinal()

  LatticeStringRepository<IntType> repository_;  // defines a compact and fast way of
  // storing sequences of labels.

  void AddStrings(const vector<Element> &vec,
                  vector<StringId> *needed_strings) {
    for (typename std::vector<Element>::const_iterator iter = vec.begin();
         iter != vec.end(); ++iter)
      needed_strings->push_back(iter->string);
  }
};


// normally Weight would be LatticeWeight<float> (which has two floats),
// or possibly TropicalWeightTpl<float>, and IntType would be int32.
// Caution: there are two versions of the function DeterminizeLatticePruned,
// with identical code but different output FST types.
template<class Weight, class IntType>
bool DeterminizeLatticePruned(
    const ExpandedFst<ArcTpl<Weight> >&ifst,
    double beam,
    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > >*ofst,
    DeterminizeLatticePrunedOptions opts) {
  ofst->SetInputSymbols(ifst.InputSymbols());
  ofst->SetOutputSymbols(ifst.OutputSymbols());
  if (ifst.NumStates() == 0) {
    ofst->DeleteStates();
    return true;
  }
  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
                             // retrying.
  VectorFst<ArcTpl<Weight> > temp_fst;

  for (int32 iter = 0; iter < max_num_iters; iter++) {
    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
                                                   beam, opts);
    double effective_beam;
    bool ans = det.Determinize(&effective_beam);
    // if it returns false it will typically still produce reasonable output,
    // just with a narrower beam than "beam".  If the user specifies an infinite
    // beam we don't do this beam-narrowing.
    if (effective_beam >= beam * opts.retry_cutoff ||
        beam == std::numeric_limits<double>::infinity() ||
        iter + 1 == max_num_iters) {
      det.Output(ofst);
      return ans;
    } else {
      // The code below to set "beam" is a heuristic.
      // If effective_beam is very small, we want to reduce by a lot.
      // But never change the beam by more than a factor of two.
      if (effective_beam < 0.0) effective_beam = 0.0;
      double new_beam = beam * sqrt(effective_beam / beam);
      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
      beam = new_beam;
      if (iter == 0) temp_fst = ifst;
      kaldi::PruneLattice(beam, &temp_fst);
      KALDI_LOG << "Pruned state-level lattice with beam " << beam
                << " and retrying determinization with that beam.";
    }
  }
  return false; // Suppress compiler warning; this code is unreachable.
}


// normally Weight would be LatticeWeight<float> (which has two floats),
// or possibly TropicalWeightTpl<float>, and IntType would be int32.
// Caution: there are two versions of the function DeterminizeLatticePruned,
// with identical code but different output FST types.
template<class Weight>
bool DeterminizeLatticePruned(const ExpandedFst<ArcTpl<Weight> > &ifst,
                              double beam,
                              MutableFst<ArcTpl<Weight> > *ofst,
                              DeterminizeLatticePrunedOptions opts) {
  typedef int32 IntType;
  ofst->SetInputSymbols(ifst.InputSymbols());
  ofst->SetOutputSymbols(ifst.OutputSymbols());
  KALDI_ASSERT(opts.retry_cutoff >= 0.0 && opts.retry_cutoff < 1.0);
  if (ifst.NumStates() == 0) {
    ofst->DeleteStates();
    return true;
  }
  int32 max_num_iters = 10;  // avoid the potential for infinite loops if
                             // retrying.
  VectorFst<ArcTpl<Weight> > temp_fst;

  for (int32 iter = 0; iter < max_num_iters; iter++) {
    LatticeDeterminizerPruned<Weight, IntType> det(iter == 0 ? ifst : temp_fst,
                                                   beam, opts);
    double effective_beam;
    bool ans = det.Determinize(&effective_beam);
    // if it returns false it will typically still
    // produce reasonable output, just with a
    // narrower beam than "beam".
    if (effective_beam >= beam * opts.retry_cutoff ||
        iter + 1 == max_num_iters) {
      det.Output(ofst);
      return ans;
    } else {
      // The code below to set "beam" is a heuristic.
      // If effective_beam is very small, we want to reduce by a lot.
      // But never change the beam by more than a factor of two.
      if (effective_beam < 0)
        effective_beam = 0;
      double new_beam = beam * sqrt(effective_beam / beam);
      if (new_beam < 0.5 * beam) new_beam = 0.5 * beam;
      KALDI_WARN << "Effective beam " << effective_beam << " was less than beam "
                 << beam << " * cutoff " << opts.retry_cutoff << ", pruning raw "
                 << "lattice with new beam " << new_beam << " and retrying.";
      beam = new_beam;
      if (iter == 0) temp_fst = ifst;
      kaldi::PruneLattice(beam, &temp_fst);
    }
  }
  return false; // Suppress compiler warning; this code is unreachable.
}

// template<class Weight>
// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
//     const kaldi::TransitionModel &trans_model,
//     MutableFst<ArcTpl<Weight> > *fst) {
//   // Define some types.
//   typedef ArcTpl<Weight> Arc;
//   typedef typename Arc::StateId StateId;
//   typedef typename Arc::Label Label;
//
//   // Work out the first phone symbol. This is more related to the phone
//   // insertion function, so we put it here and make it the returning value of
//   // DeterminizeLatticeInsertPhones().
//   Label first_phone_label = HighestNumberedInputSymbol(*fst) + 1;
//
//   // Insert phones here.
//   for (StateIterator<MutableFst<Arc> > siter(*fst);
//        !siter.Done(); siter.Next()) {
//     StateId state = siter.Value();
//     if (state == fst->Start())
//       continue;
//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
//          !aiter.Done(); aiter.Next()) {
//       Arc arc = aiter.Value();
//
//       // Note: the words are on the input symbol side and transition-id's are on
//       // the output symbol side.
//       if ((arc.olabel != 0)
//           && (trans_model.TransitionIdToHmmState(arc.olabel) == 0)
//           && (!trans_model.IsSelfLoop(arc.olabel))) {
//         Label phone =
//             static_cast<Label>(trans_model.TransitionIdToPhone(arc.olabel));
//
//         // Skips <eps>.
//         KALDI_ASSERT(phone != 0);
//
//         if (arc.ilabel == 0) {
//           // If there is no word on the arc, insert the phone directly.
//           arc.ilabel = first_phone_label + phone;
//         } else {
//           // Otherwise, add an additional arc.
//           StateId additional_state = fst->AddState();
//           StateId next_state = arc.nextstate;
//           arc.nextstate = additional_state;
//           fst->AddArc(additional_state,
//                       Arc(first_phone_label + phone, 0,
//                           Weight::One(), next_state));
//         }
//       }
//
//       aiter.SetValue(arc);
//     }
//   }
//
//   return first_phone_label;
// }
//
// template<class Weight>
// void DeterminizeLatticeDeletePhones(
//     typename ArcTpl<Weight>::Label first_phone_label,
//     MutableFst<ArcTpl<Weight> > *fst) {
//   // Define some types.
//   typedef ArcTpl<Weight> Arc;
//   typedef typename Arc::StateId StateId;
//   typedef typename Arc::Label Label;
//
//   // Delete phones here.
//   for (StateIterator<MutableFst<Arc> > siter(*fst);
//        !siter.Done(); siter.Next()) {
//     StateId state = siter.Value();
//     for (MutableArcIterator<MutableFst<Arc> > aiter(fst, state);
//          !aiter.Done(); aiter.Next()) {
//       Arc arc = aiter.Value();
//
//       if (arc.ilabel >= first_phone_label)
//         arc.ilabel = 0;
//
//       aiter.SetValue(arc);
//     }
//   }
// }
// instantiate for type LatticeWeight
// template
// void DeterminizeLatticeDeletePhones(
//     ArcTpl<kaldi::LatticeWeight>::Label first_phone_label,
//     MutableFst<ArcTpl<kaldi::LatticeWeight> > *fst);
//
// /** This function does a first pass determinization with phone symbols inserted
//     at phone boundary. It uses a transition model to work out the transition-id
//     to phone map. First, phones will be inserted into the word level lattice.
//     Second, determinization will be applied on top of the phone + word lattice.
//     Finally, the inserted phones will be removed, converting the lattice back to
//     a word level lattice. The output lattice of this pass is not deterministic,
//     since we remove the phone symbols as a last step. It is supposed to be
//     followed by another pass of determinization at the word level. It could also
//     be useful for some other applications such as fMLLR estimation, confidence
//     estimation, discriminative training, etc.
// */
// template<class Weight, class IntType>
// bool DeterminizeLatticePhonePrunedFirstPass(
//     const kaldi::TransitionModel &trans_model,
//     double beam,
//     MutableFst<ArcTpl<Weight> > *fst,
//     const DeterminizeLatticePrunedOptions &opts) {
//   // First, insert the phones.
//   typename ArcTpl<Weight>::Label first_phone_label =
//       DeterminizeLatticeInsertPhones(trans_model, fst);
//   TopSort(fst);
//
//   // Second, do determinization with phone inserted.
//   bool ans = DeterminizeLatticePruned<Weight>(*fst, beam, fst, opts);
//
//   // Finally, remove the inserted phones.
//   DeterminizeLatticeDeletePhones(first_phone_label, fst);
//   TopSort(fst);
//
//   return ans;
// }
//
// // "Destructive" version of DeterminizeLatticePhonePruned() where the input
// // lattice might be modified.
// template<class Weight, class IntType>
// bool DeterminizeLatticePhonePruned(
//     const kaldi::TransitionModel &trans_model,
//     MutableFst<ArcTpl<Weight> > *ifst,
//     double beam,
//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
//     DeterminizeLatticePhonePrunedOptions opts) {
//   // Returning status.
//   bool ans = true;
//
//   // Make sure at least one of opts.phone_determinize and opts.word_determinize
//   // is not false, otherwise calling this function doesn't make any sense.
//   if ((opts.phone_determinize || opts.word_determinize) == false) {
//     KALDI_WARN << "Both --phone-determinize and --word-determinize are set to "
//                << "false, copying lattice without determinization.";
//     // We are expecting the words on the input side.
//     ConvertLattice<Weight, IntType>(*ifst, ofst, false);
//     return ans;
//   }
//
//   // Determinization options.
//   DeterminizeLatticePrunedOptions det_opts;
//   det_opts.delta = opts.delta;
//   det_opts.max_mem = opts.max_mem;
//
//   // If --phone-determinize is true, do the determinization on phone + word
//   // lattices.
//   if (opts.phone_determinize) {
//     KALDI_VLOG(3) << "Doing first pass of determinization on phone + word "
//                   << "lattices.";
//     ans = DeterminizeLatticePhonePrunedFirstPass<Weight, IntType>(
//         trans_model, beam, ifst, det_opts) && ans;
//
//     // If --word-determinize is false, we've finished the job and return here.
//     if (!opts.word_determinize) {
//       // We are expecting the words on the input side.
//       ConvertLattice<Weight, IntType>(*ifst, ofst, false);
//       return ans;
//     }
//   }
//
//   // If --word-determinize is true, do the determinization on word lattices.
//   if (opts.word_determinize) {
//     KALDI_VLOG(3) << "Doing second pass of determinization on word lattices.";
//     ans = DeterminizeLatticePruned<Weight, IntType>(
//         *ifst, beam, ofst, det_opts) && ans;
//   }
//
//   // If --minimize is true, push and minimize after determinization.
//   if (opts.minimize) {
//     KALDI_VLOG(3) << "Pushing and minimizing on word lattices.";
//     ans = PushCompactLatticeStrings<Weight, IntType>(ofst) && ans;
//     ans = PushCompactLatticeWeights<Weight, IntType>(ofst) && ans;
//     ans = MinimizeCompactLattice<Weight, IntType>(ofst) && ans;
//   }
//
//   return ans;
// }
//
// // Normal verson of DeterminizeLatticePhonePruned(), where the input lattice
// // will be kept as unchanged.
// template<class Weight, class IntType>
// bool DeterminizeLatticePhonePruned(
//     const kaldi::TransitionModel &trans_model,
//     const ExpandedFst<ArcTpl<Weight> > &ifst,
//     double beam,
//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
//     DeterminizeLatticePhonePrunedOptions opts) {
//   VectorFst<ArcTpl<Weight> > temp_fst(ifst);
//   return DeterminizeLatticePhonePruned(trans_model, &temp_fst,
//                                        beam, ofst, opts);
// }
//
// bool DeterminizeLatticePhonePrunedWrapper(
//     const kaldi::TransitionModel &trans_model,
//     MutableFst<kaldi::LatticeArc> *ifst,
//     double beam,
//     MutableFst<kaldi::CompactLatticeArc> *ofst,
//     DeterminizeLatticePhonePrunedOptions opts) {
//   bool ans = true;
//   Invert(ifst);
//   if (ifst->Properties(fst::kTopSorted, true) == 0) {
//     if (!TopSort(ifst)) {
//       // Cannot topologically sort the lattice -- determinization will fail.
//       KALDI_ERR << "Topological sorting of state-level lattice failed (probably"
//                 << " your lexicon has empty words or your LM has epsilon cycles"
//                 << ").";
//     }
//   }
//   ILabelCompare<kaldi::LatticeArc> ilabel_comp;
//   ArcSort(ifst, ilabel_comp);
//   ans = DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
//       trans_model, ifst, beam, ofst, opts);
//   Connect(ofst);
//   return ans;
// }

// Instantiate the templates for the types we might need.
// Note: there are actually four templates, each of which
// we instantiate for a single type.
template
bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
    const ExpandedFst<kaldi::LatticeArc> &ifst,
    double prune,
    MutableFst<kaldi::CompactLatticeArc> *ofst,
    DeterminizeLatticePrunedOptions opts);

template
bool DeterminizeLatticePruned<kaldi::LatticeWeight>(
    const ExpandedFst<kaldi::LatticeArc> &ifst,
    double prune,
    MutableFst<kaldi::LatticeArc> *ofst,
    DeterminizeLatticePrunedOptions opts);

// template
// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
//     const kaldi::TransitionModel &trans_model,
//     const ExpandedFst<kaldi::LatticeArc> &ifst,
//     double prune,
//     MutableFst<kaldi::CompactLatticeArc> *ofst,
//     DeterminizeLatticePhonePrunedOptions opts);
//
// template
// bool DeterminizeLatticePhonePruned<kaldi::LatticeWeight, kaldi::int32>(
//     const kaldi::TransitionModel &trans_model,
//     MutableFst<kaldi::LatticeArc> *ifst,
//     double prune,
//     MutableFst<kaldi::CompactLatticeArc> *ofst,
//     DeterminizeLatticePhonePrunedOptions opts);

}


================================================
FILE: runtime/engine/kaldi/lat/determinize-lattice-pruned.h
================================================
// lat/determinize-lattice-pruned.h

// Copyright 2009-2012  Microsoft Corporation
//           2012-2013  Johns Hopkins University (Author: Daniel Povey)
//                2014  Guoguo Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
#define KALDI_LAT_DETERMINIZE_LATTICE_PRUNED_H_
#include <fst/fstlib.h>
#include <fst/fst-decl.h>
#include <algorithm>
#include <map>
#include <set>
#include <vector>
#include "fstext/lattice-weight.h"
// #include "hmm/transition-model.h"
#include "util/options-itf.h"
#include "lat/kaldi-lattice.h"

namespace fst {

/// \addtogroup fst_extensions
///  @{


// For example of usage, see test-determinize-lattice-pruned.cc

/*
   DeterminizeLatticePruned implements a special form of determinization with
   epsilon removal, optimized for a phase of lattice generation.  This algorithm
   also does pruning at the same time-- the combination is more efficient as it
   somtimes prevents us from creating a lot of states that would later be pruned
   away.  This allows us to increase the lattice-beam and not have the algorithm
   blow up.  Also, because our algorithm processes states in order from those
   that appear on high-scoring paths down to those that appear on low-scoring
   paths, we can easily terminate the algorithm after a certain specified number
   of states or arcs.

   The input is an FST with weight-type BaseWeightType (usually a pair of floats,
   with a lexicographical type of order, such as LatticeWeightTpl<float>).
   Typically this would be a state-level lattice, with input symbols equal to
   words, and output-symbols equal to p.d.f's (so like the inverse of HCLG).  Imagine representing this as an
   acceptor of type CompactLatticeWeightTpl<float>, in which the input/output
   symbols are words, and the weights contain the original weights together with
   strings (with zero or one symbol in them) containing the original output labels
   (the p.d.f.'s).  We determinize this using acceptor determinization with
   epsilon removal.  Remember (from lattice-weight.h) that
   CompactLatticeWeightTpl has a special kind of semiring where we always take
   the string corresponding to the best cost (of type BaseWeightType), and
   discard the other.  This corresponds to taking the best output-label sequence
   (of p.d.f.'s) for each input-label sequence (of words).  We couldn't use the
   Gallic weight for this, or it would die as soon as it detected that the input
   FST was non-functional.  In our case, any acyclic FST (and many cyclic ones)
   can be determinized.
   We assume that there is a function
      Compare(const BaseWeightType &a, const BaseWeightType &b)
   that returns (-1, 0, 1) according to whether (a < b, a == b, a > b) in the
   total order on the BaseWeightType... this information should be the
   same as NaturalLess would give, but it's more efficient to do it this way.
   You can define this for things like TropicalWeight if you need to instantiate
   this class for that weight type.

   We implement this determinization in a special way to make it efficient for
   the types of FSTs that we will apply it to.  One issue is that if we
   explicitly represent the strings (in CompactLatticeWeightTpl) as vectors of
   type vector<IntType>, the algorithm takes time quadratic in the length of
   words (in states), because propagating each arc involves copying a whole
   vector (of integers representing p.d.f.'s).  Instead we use a hash structure
   where each string is a pointer (Entry*), and uses a hash from (Entry*,
   IntType), to the successor string (and a way to get the latest IntType and the
   ancestor Entry*).  [this is the class LatticeStringRepository].

   Another issue is that rather than representing a determinized-state as a
   collection of (state, weight), we represent it in a couple of reduced forms.
   Suppose a determinized-state is a collection of (state, weight) pairs; call
   this the "canonical representation".  Note: these collections are always
   normalized to remove any common weight and string part.  Define end-states as
   the subset of states that have an arc out of them with a label on, or are
   final.  If we represent a determinized-state a the set of just its (end-state,
   weight) pairs, this will be a valid and more compact representation, and will
   lead to a smaller set of determinized states (like early minimization).  Call
   this collection of (end-state, weight) pairs the "minimal representation".  As
   a mechanism to reduce compute, we can also consider another representation.
   In the determinization algorithm, we start off with a set of (begin-state,
   weight) pairs (where the "begin-states" are initial or have a label on the
   transition into them), and the "canonical representation" consists of the
   epsilon-closure of this set (i.e. follow epsilons).  Call this set of
   (begin-state, weight) pairs, appropriately normalized, the "initial
   representation".  If two initial representations are the same, the "canonical
   representation" and hence the "minimal representation" will be the same.  We
   can use this to reduce compute.  Note that if two initial representations are
   different, this does not preclude the other representations from being the same.

*/


struct DeterminizeLatticePrunedOptions {
  float delta; // A small offset used to measure equality of weights.
  int max_mem; // If >0, determinization will fail and return false
  // when the algorithm's (approximate) memory consumption crosses this threshold.
  int max_loop; // If >0, can be used to detect non-determinizable input
  // (a case that wouldn't be caught by max_mem).
  int max_states;
  int max_arcs;
  float retry_cutoff;
  DeterminizeLatticePrunedOptions(): delta(kDelta),
                                     max_mem(-1),
                                     max_loop(-1),
                                     max_states(-1),
                                     max_arcs(-1),
                                     retry_cutoff(0.5) { }
  void Register (kaldi::OptionsItf *opts) {
    opts->Register("delta", &delta, "Tolerance used in determinization");
    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
                   "determinization (real usage might be many times this)");
    opts->Register("max-arcs", &max_arcs, "Maximum number of arcs in "
                   "output FST (total, not per state");
    opts->Register("max-states", &max_states, "Maximum number of arcs in output "
                   "FST (total, not per state");
    opts->Register("max-loop", &max_loop, "Option used to detect a particular "
                   "type of determinization failure, typically due to invalid input "
                   "(e.g., negative-cost loops)");
    opts->Register("retry-cutoff", &retry_cutoff, "Controls pruning un-determinized "
                   "lattice and retrying determinization: if effective-beam < "
                   "retry-cutoff * beam, we prune the raw lattice and retry.  Avoids "
                   "ever getting empty output for long segments.");
  }
};

struct DeterminizeLatticePhonePrunedOptions {
  // delta: a small offset used to measure equality of weights.
  float delta;
  // max_mem: if > 0, determinization will fail and return false when the
  // algorithm's (approximate) memory consumption crosses this threshold.
  int max_mem;
  // phone_determinize: if true, do a first pass determinization on both phones
  // and words.
  bool phone_determinize;
  // word_determinize: if true, do a second pass determinization on words only.
  bool word_determinize;
  // minimize: if true, push and minimize after determinization.
  bool minimize;
  DeterminizeLatticePhonePrunedOptions(): delta(kDelta),
                                          max_mem(50000000),
                                          phone_determinize(true),
                                          word_determinize(true),
                                          minimize(false) {}
  void Register (kaldi::OptionsItf *opts) {
    opts->Register("delta", &delta, "Tolerance used in determinization");
    opts->Register("max-mem", &max_mem, "Maximum approximate memory usage in "
                   "determinization (real usage might be many times this).");
    opts->Register("phone-determinize", &phone_determinize, "If true, do an "
                   "initial pass of determinization on both phones and words (see"
                   " also --word-determinize)");
    opts->Register("word-determinize", &word_determinize, "If true, do a second "
                   "pass of determinization on words only (see also "
                   "--phone-determinize)");
    opts->Register("minimize", &minimize, "If true, push and minimize after "
                   "determinization.");
  }
};

/**
    This function implements the normal version of DeterminizeLattice, in which the
    output strings are represented using sequences of arcs, where all but the
    first one has an epsilon on the input side.  It also prunes using the beam
    in the "prune" parameter.  The input FST must be topologically sorted in order
    for the algorithm to work. For efficiency it is recommended to sort ilabel as well.
    Returns true on success, and false if it had to terminate the determinization
    earlier than specified by the "prune" beam-- that is, if it terminated because
    of the max_mem, max_loop or max_arcs constraints in the options.
    CAUTION: you may want to use the version below which outputs to CompactLattice.
*/
template<class Weight>
bool DeterminizeLatticePruned(
    const ExpandedFst<ArcTpl<Weight> > &ifst,
    double prune,
    MutableFst<ArcTpl<Weight> > *ofst,
    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());


/*  This is a version of DeterminizeLattice with a slightly more "natural" output format,
    where the output sequences are encoded using the CompactLatticeArcTpl template
    (i.e. the sequences of output symbols are represented directly as strings The input
    FST must be topologically sorted in order for the algorithm to work. For efficiency
    it is recommended to sort the ilabel for the input FST as well.
    Returns true on normal success, and false if it had to terminate the determinization
    earlier than specified by the "prune" beam-- that is, if it terminated because
    of the max_mem, max_loop or max_arcs constraints in the options.
    CAUTION: if Lattice is the input, you need to Invert() before calling this,
    so words are on the input side.
*/
template<class Weight, class IntType>
bool DeterminizeLatticePruned(
    const ExpandedFst<ArcTpl<Weight> >&ifst,
    double prune,
    MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
    DeterminizeLatticePrunedOptions opts = DeterminizeLatticePrunedOptions());

// /** This function takes in lattices and inserts phones at phone boundaries. It
//     uses the transition model to work out the transition_id to phone map. The
//     returning value is the starting index of the phone label. Typically we pick
//     (maximum_output_label_index + 1) as this value. The inserted phones are then
//     mapped to (returning_value + original_phone_label) in the new lattice. The
//     returning value will be used by DeterminizeLatticeDeletePhones() where it
//     works out the phones according to this value.
// */
// template<class Weight>
// typename ArcTpl<Weight>::Label DeterminizeLatticeInsertPhones(
//     const kaldi::TransitionModel &trans_model,
//     MutableFst<ArcTpl<Weight> > *fst);
//
// /** This function takes in lattices and deletes "phones" from them. The "phones"
//     here are actually any label that is larger than first_phone_label because
//     when we insert phones into the lattice, we map the original phone label to
//     (first_phone_label + original_phone_label). It is supposed to be used
//     together with DeterminizeLatticeInsertPhones()
// */
// template<class Weight>
// void DeterminizeLatticeDeletePhones(
//     typename ArcTpl<Weight>::Label first_phone_label,
//     MutableFst<ArcTpl<Weight> > *fst);
//
// /** This function is a wrapper of DeterminizeLatticePhonePrunedFirstPass() and
//     DeterminizeLatticePruned(). If --phone-determinize is set to true, it first
//     calls DeterminizeLatticePhonePrunedFirstPass() to do the initial pass of
//     determinization on the phone + word lattices. If --word-determinize is set
//     true, it then does a second pass of determinization on the word lattices by
//     calling DeterminizeLatticePruned(). If both are set to false, then it gives
//     a warning and copying the lattices without determinization.
//
//     Note: the point of doing first a phone-level determinization pass and then
//     a word-level determinization pass is that it allows us to determinize
//     deeper lattices without "failing early" and returning a too-small lattice
//     due to the max-mem constraint.  The result should be the same as word-level
//     determinization in general, but for deeper lattices it is a bit faster,
//     despite the fact that we now have two passes of determinization by default.
// */
// template<class Weight, class IntType>
// bool DeterminizeLatticePhonePruned(
//     const kaldi::TransitionModel &trans_model,
//     const ExpandedFst<ArcTpl<Weight> > &ifst,
//     double prune,
//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
//     DeterminizeLatticePhonePrunedOptions opts
//       = DeterminizeLatticePhonePrunedOptions());
//
// /** "Destructive" version of DeterminizeLatticePhonePruned() where the input
//     lattice might be changed.
// */
// template<class Weight, class IntType>
// bool DeterminizeLatticePhonePruned(
//     const kaldi::TransitionModel &trans_model,
//     MutableFst<ArcTpl<Weight> > *ifst,
//     double prune,
//     MutableFst<ArcTpl<CompactLatticeWeightTpl<Weight, IntType> > > *ofst,
//     DeterminizeLatticePhonePrunedOptions opts
//       = DeterminizeLatticePhonePrunedOptions());
//
// /** This function is a wrapper of DeterminizeLatticePhonePruned() that works for
//     Lattice type FSTs.  It simplifies the calling process by calling
//     TopSort() Invert() and ArcSort() for you.
//     Unlike other determinization routines, the function
//     requires "ifst" to have transition-id's on the input side and words on the
//     output side.
//     This function can be used as the top-level interface to all the determinization
//     code.
// */
// bool DeterminizeLatticePhonePrunedWrapper(
//     const kaldi::TransitionModel &trans_model,
//     MutableFst<kaldi::LatticeArc> *ifst,
//     double prune,
//     MutableFst<kaldi::CompactLatticeArc> *ofst,
//     DeterminizeLatticePhonePrunedOptions opts
//       = DeterminizeLatticePhonePrunedOptions());

/// @} end "addtogroup fst_extensions"

} // end namespace fst

#endif


================================================
FILE: runtime/engine/kaldi/lat/kaldi-lattice.cc
================================================
// lat/kaldi-lattice.cc

// Copyright 2009-2011     Microsoft Corporation
//                2013     Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#include "lat/kaldi-lattice.h"
#include "fst/script/print-impl.h"

namespace kaldi {

/// Converts lattice types if necessary, deleting its input.
template<class OrigWeightType>
CompactLattice* ConvertToCompactLattice(fst::VectorFst<OrigWeightType> *ifst) {
  if (!ifst) return NULL;
  CompactLattice *ofst = new CompactLattice();
  ConvertLattice(*ifst, ofst);
  delete ifst;
  return ofst;
}

// This overrides the template if there is no type conversion going on
// (for efficiency).
template<>
CompactLattice* ConvertToCompactLattice(CompactLattice *ifst) {
  return ifst;
}

/// Converts lattice types if necessary, deleting its input.
template<class OrigWeightType>
Lattice* ConvertToLattice(fst::VectorFst<OrigWeightType> *ifst) {
  if (!ifst) return NULL;
  Lattice *ofst = new Lattice();
  ConvertLattice(*ifst, ofst);
  delete ifst;
  return ofst;
}

// This overrides the template if there is no type conversion going on
// (for efficiency).
template<>
Lattice* ConvertToLattice(Lattice *ifst) {
  return ifst;
}


bool WriteCompactLattice(std::ostream &os, bool binary,
                         const CompactLattice &t) {
  if (binary) {
    fst::FstWriteOptions opts;
    // Leave all the options default.  Normally these lattices wouldn't have any
    // osymbols/isymbols so no point directing it not to write them (who knows what
    // we'd want to if we had them).
    return t.Write(os, opts);
  } else {
    // Text-mode output.  Note: we expect that t.InputSymbols() and
    // t.OutputSymbols() would always return NULL.  The corresponding input
    // routine would not work if the FST actually had symbols attached.
    // Write a newline after the key, so the first line of the FST appears
    // on its own line.
    os << '\n';
    bool acceptor = true, write_one = false;
    fst::FstPrinter<CompactLatticeArc> printer(t, t.InputSymbols(),
                                               t.OutputSymbols(),
                                               NULL, acceptor, write_one, "\t");
    printer.Print(&os, "<unknown>");
    if (os.fail())
      KALDI_WARN << "Stream failure detected.";
    // Write another newline as a terminating character.  The read routine will
    // detect this [this is a Kaldi mechanism, not somethig in the original
    // OpenFst code].
    os << '\n';
    return os.good();
  }
}

/// LatticeReader provides (static) functions for reading both Lattice
/// and CompactLattice, in text form.
class LatticeReader {
  typedef LatticeArc Arc;
  typedef LatticeWeight Weight;
  typedef CompactLatticeArc CArc;
  typedef CompactLatticeWeight CWeight;
  typedef Arc::Label Label;
  typedef Arc::StateId StateId;
 public:
  // everything is static in this class.

  /** This function reads from the FST text format; it does not know in advance
      whether it's a Lattice or CompactLattice in the stream so it tries to
      read both formats until it becomes clear which is the correct one.
  */
  static std::pair<Lattice*, CompactLattice*> ReadText(
      std::istream &is) {
    typedef std::pair<Lattice*, CompactLattice*> PairT;
    using std::string;
    using std::vector;
    Lattice *fst = new Lattice();
    CompactLattice *cfst = new CompactLattice();
    string line;
    size_t nline = 0;
    string separator = FLAGS_fst_field_separator + "\r\n";
    while (std::getline(is, line)) {
      nline++;
      vector<string> col;
      // on Windows we'll write in text and read in binary mode.
      SplitStringToVector(line, separator.c_str(), true, &col);
      if (col.size() == 0) break; // Empty line is a signal to stop, in our
      // archive format.
      if (col.size() > 5) {
        KALDI_WARN << "Reading lattice: bad line in FST: " << line;
        delete fst;
        delete cfst;
        return PairT(static_cast<Lattice*>(NULL),
                     static_cast<CompactLattice*>(NULL));
      }
      StateId s;
      if (!ConvertStringToInteger(col[0], &s)) {
        KALDI_WARN << "FstCompiler: bad line in FST: " << line;
        delete fst;
        delete cfst;
        return PairT(static_cast<Lattice*>(NULL),
                     static_cast<CompactLattice*>(NULL));
      }
      if (fst)
        while (s >= fst->NumStates())
          fst->AddState();
      if (cfst)
        while (s >= cfst->NumStates())
          cfst->AddState();
      if (nline == 1) {
        if (fst) fst->SetStart(s);
        if (cfst) cfst->SetStart(s);
      }

      if (fst) { // we still have fst; try to read that arc.
        bool ok = true;
        Arc arc;
        Weight w;
        StateId d = s;
        switch (col.size()) {
          case 1 :
            fst->SetFinal(s, Weight::One());
            break;
          case 2:
            if (!StrToWeight(col[1], true, &w)) ok = false;
            else fst->SetFinal(s, w);
            break;
          case 3: // 3 columns not ok for Lattice format; it's not an acceptor.
            ok = false;
            break;
          case 4:
            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
                ConvertStringToInteger(col[2], &arc.ilabel) &&
                ConvertStringToInteger(col[3], &arc.olabel);
            if (ok) {
              d = arc.nextstate;
              arc.weight = Weight::One();
              fst->AddArc(s, arc);
            }
            break;
          case 5:
            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
                ConvertStringToInteger(col[2], &arc.ilabel) &&
                ConvertStringToInteger(col[3], &arc.olabel) &&
                StrToWeight(col[4], false, &arc.weight);
            if (ok) {
              d = arc.nextstate;
              fst->AddArc(s, arc);
            }
            break;
          default:
            ok = false;
        }
        while (d >= fst->NumStates())
          fst->AddState();
        if (!ok) {
          delete fst;
          fst = NULL;
        }
      }
      if (cfst) {
        bool ok = true;
        CArc arc;
        CWeight w;
        StateId d = s;
        switch (col.size()) {
          case 1 :
            cfst->SetFinal(s, CWeight::One());
            break;
          case 2:
            if (!StrToCWeight(col[1], true, &w)) ok = false;
            else cfst->SetFinal(s, w);
            break;
          case 3: // compact-lattice is acceptor format: state, next-state, label.
            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
                ConvertStringToInteger(col[2], &arc.ilabel);
            if (ok) {
              d = arc.nextstate;
              arc.olabel = arc.ilabel;
              arc.weight = CWeight::One();
              cfst->AddArc(s, arc);
            }
            break;
          case 4:
            ok = ConvertStringToInteger(col[1], &arc.nextstate) &&
                ConvertStringToInteger(col[2], &arc.ilabel) &&
                StrToCWeight(col[3], false, &arc.weight);
            if (ok) {
              d = arc.nextstate;
              arc.olabel = arc.ilabel;
              cfst->AddArc(s, arc);
            }
            break;
          case 5: default:
            ok = false;
        }
        while (d >= cfst->NumStates())
          cfst->AddState();
        if (!ok) {
          delete cfst;
          cfst = NULL;
        }
      }
      if (!fst && !cfst) {
        KALDI_WARN << "Bad line in lattice text format: " << line;
        // read until we get an empty line, so at least we
        // have a chance to read the next one (although this might
        // be a bit futile since the calling code will get unhappy
        // about failing to read this one.
        while (std::getline(is, line)) {
          SplitStringToVector(line, separator.c_str(), true, &col);
          if (col.empty()) break;
        }
        return PairT(static_cast<Lattice*>(NULL),
                     static_cast<CompactLattice*>(NULL));
      }
    }
    return PairT(fst, cfst);
  }

  static bool StrToWeight(const std::string &s, bool allow_zero, Weight *w) {
    std::istringstream strm(s);
    strm >> *w;
    if (!strm || (!allow_zero && *w == Weight::Zero())) {
      return false;
    }
    return true;
  }

  static  bool StrToCWeight(const std::string &s, bool allow_zero, CWeight *w) {
    std::istringstream strm(s);
    strm >> *w;
    if (!strm || (!allow_zero && *w == CWeight::Zero())) {
      return false;
    }
    return true;
  }
};


CompactLattice *ReadCompactLatticeText(std::istream &is) {
  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
  if (lat_pair.second != NULL) {
    delete lat_pair.first;
    return lat_pair.second;
  } else if (lat_pair.first != NULL) {
    // note: ConvertToCompactLattice frees its input.
    return ConvertToCompactLattice(lat_pair.first);
  } else {
    return NULL;
  }
}


Lattice *ReadLatticeText(std::istream &is) {
  std::pair<Lattice*, CompactLattice*> lat_pair = LatticeReader::ReadText(is);
  if (lat_pair.first != NULL) {
    delete lat_pair.second;
    return lat_pair.first;
  } else if (lat_pair.second != NULL) {
    // note: ConvertToLattice frees its input.
    return ConvertToLattice(lat_pair.second);
  } else {
    return NULL;
  }
}

bool ReadCompactLattice(std::istream &is, bool binary,
                        CompactLattice **clat) {
  KALDI_ASSERT(*clat == NULL);
  if (binary) {
    fst::FstHeader hdr;
    if (!hdr.Read(is, "<unknown>")) {
      KALDI_WARN << "Reading compact lattice: error reading FST header.";
      return false;
    }
    if (hdr.FstType() != "vector") {
      KALDI_WARN << "Reading compact lattice: unsupported FST type: "
                 << hdr.FstType();
      return false;
    }
    fst::FstReadOptions ropts("<unspecified>",
                              &hdr);

    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
    typedef fst::LatticeWeightTpl<float> T3;
    typedef fst::LatticeWeightTpl<double> T4;
    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;

    CompactLattice *ans = NULL;
    if (hdr.ArcType() == T1::Type()) {
      ans = ConvertToCompactLattice(F1::Read(is, ropts));
    } else if (hdr.ArcType() == T2::Type()) {
      ans = ConvertToCompactLattice(F2::Read(is, ropts));
    } else if (hdr.ArcType() == T3::Type()) {
      ans = ConvertToCompactLattice(F3::Read(is, ropts));
    } else if (hdr.ArcType() == T4::Type()) {
      ans = ConvertToCompactLattice(F4::Read(is, ropts));
    } else {
      KALDI_WARN << "FST with arc type " << hdr.ArcType()
                 << " cannot be converted to CompactLattice.\n";
      return false;
    }
    if (ans == NULL) {
      KALDI_WARN << "Error reading compact lattice (after reading header).";
      return false;
    }
    *clat = ans;
    return true;
  } else {
    // The next line would normally consume the \r on Windows, plus any
    // extra spaces that might have got in there somehow.
    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
    if (is.peek() == '\n') is.get(); // consume the newline.
    else { // saw spaces but no newline.. this is not expected.
      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
                 << " at file position " << is.tellg();
      return false;
    }
    *clat = ReadCompactLatticeText(is); // that routine will warn on error.
    return (*clat != NULL);
  }
}


bool CompactLatticeHolder::Read(std::istream &is) {
  Clear(); // in case anything currently stored.
  int c = is.peek();
  if (c == -1) {
    KALDI_WARN << "End of stream detected reading CompactLattice.";
    return false;
  } else if (isspace(c)) { // The text form of the lattice begins
    // with space (normally, '\n'), so this means it's text (the binary form
    // cannot begin with space because it starts with the FST Type() which is not
    // space).
    return ReadCompactLattice(is, false, &t_);
  } else if (c != 214) { // 214 is first char of FST magic number,
    // on little-endian machines which is all we support (\326 octal)
    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
               << " [non-space but no magic number detected], file pos is "
               << is.tellg();
    return false;
  } else {
    return ReadCompactLattice(is, true, &t_);
  }
}

bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
  if (binary) {
    fst::FstWriteOptions opts;
    // Leave all the options default.  Normally these lattices wouldn't have any
    // osymbols/isymbols so no point directing it not to write them (who knows what
    // we'd want to do if we had them).
    return t.Write(os, opts);
  } else {
    // Text-mode output.  Note: we expect that t.InputSymbols() and
    // t.OutputSymbols() would always return NULL.  The corresponding input
    // routine would not work if the FST actually had symbols attached.
    // Write a newline after the key, so the first line of the FST appears
    // on its own line.
    os << '\n';
    bool acceptor = false, write_one = false;
    fst::FstPrinter<LatticeArc> printer(t, t.InputSymbols(),
                                        t.OutputSymbols(),
                                        NULL, acceptor, write_one, "\t");
    printer.Print(&os, "<unknown>");
    if (os.fail())
      KALDI_WARN << "Stream failure detected.";
    // Write another newline as a terminating character.  The read routine will
    // detect this [this is a Kaldi mechanism, not something in the original
    // OpenFst code].
    os << '\n';
    return os.good();
  }
}

bool ReadLattice(std::istream &is, bool binary,
                 Lattice **lat) {
  KALDI_ASSERT(*lat == NULL);
  if (binary) {
    fst::FstHeader hdr;
    if (!hdr.Read(is, "<unknown>")) {
      KALDI_WARN << "Reading lattice: error reading FST header.";
      return false;
    }
    if (hdr.FstType() != "vector") {
      KALDI_WARN << "Reading lattice: unsupported FST type: "
                 << hdr.FstType();
      return false;
    }
    fst::FstReadOptions ropts("<unspecified>",
                              &hdr);

    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<float>, int32> T1;
    typedef fst::CompactLatticeWeightTpl<fst::LatticeWeightTpl<double>, int32> T2;
    typedef fst::LatticeWeightTpl<float> T3;
    typedef fst::LatticeWeightTpl<double> T4;
    typedef fst::VectorFst<fst::ArcTpl<T1> > F1;
    typedef fst::VectorFst<fst::ArcTpl<T2> > F2;
    typedef fst::VectorFst<fst::ArcTpl<T3> > F3;
    typedef fst::VectorFst<fst::ArcTpl<T4> > F4;

    Lattice *ans = NULL;
    if (hdr.ArcType() == T1::Type()) {
      ans = ConvertToLattice(F1::Read(is, ropts));
    } else if (hdr.ArcType() == T2::Type()) {
      ans = ConvertToLattice(F2::Read(is, ropts));
    } else if (hdr.ArcType() == T3::Type()) {
      ans = ConvertToLattice(F3::Read(is, ropts));
    } else if (hdr.ArcType() == T4::Type()) {
      ans = ConvertToLattice(F4::Read(is, ropts));
    } else {
      KALDI_WARN << "FST with arc type " << hdr.ArcType()
                 << " cannot be converted to Lattice.\n";
      return false;
    }
    if (ans == NULL) {
      KALDI_WARN << "Error reading lattice (after reading header).";
      return false;
    }
    *lat = ans;
    return true;
  } else {
    // The next line would normally consume the \r on Windows, plus any
    // extra spaces that might have got in there somehow.
    while (std::isspace(is.peek()) && is.peek() != '\n') is.get();
    if (is.peek() == '\n') is.get(); // consume the newline.
    else { // saw spaces but no newline.. this is not expected.
      KALDI_WARN << "Reading compact lattice: unexpected sequence of spaces "
                 << " at file position " << is.tellg();
      return false;
    }
    *lat = ReadLatticeText(is); // that routine will warn on error.
    return (*lat != NULL);
  }
}


/* Since we don't write the binary headers for this type of holder,
   we use a different method to work out whether we're in binary mode.
 */
bool LatticeHolder::Read(std::istream &is) {
  Clear(); // in case anything currently stored.
  int c = is.peek();
  if (c == -1) {
    KALDI_WARN << "End of stream detected reading Lattice.";
    return false;
  } else if (isspace(c)) { // The text form of the lattice begins
    // with space (normally, '\n'), so this means it's text (the binary form
    // cannot begin with space because it starts with the FST Type() which is not
    // space).
    return ReadLattice(is, false, &t_);
  } else if (c != 214) { // 214 is first char of FST magic number,
    // on little-endian machines which is all we support (\326 octal)
    KALDI_WARN << "Reading compact lattice: does not appear to be an FST "
               << " [non-space but no magic number detected], file pos is "
               << is.tellg();
    return false;
  } else {
    return ReadLattice(is, true, &t_);
  }
}


} // end namespace kaldi


================================================
FILE: runtime/engine/kaldi/lat/kaldi-lattice.h
================================================
// lat/kaldi-lattice.h

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_LAT_KALDI_LATTICE_H_
#define KALDI_LAT_KALDI_LATTICE_H_

#include "fstext/fstext-lib.h"
#include "base/kaldi-common.h"
// #include "util/common-utils.h"


namespace kaldi {
// will import some things above...

typedef fst::LatticeWeightTpl<BaseFloat> LatticeWeight;

// careful: kaldi::int32 is not always the same C type as fst::int32
typedef fst::CompactLatticeWeightTpl<LatticeWeight, int32> CompactLatticeWeight;

typedef fst::CompactLatticeWeightCommonDivisorTpl<LatticeWeight, int32>
  CompactLatticeWeightCommonDivisor;

typedef fst::ArcTpl<LatticeWeight> LatticeArc;

typedef fst::ArcTpl<CompactLatticeWeight> CompactLatticeArc;

typedef fst::VectorFst<LatticeArc> Lattice;

typedef fst::VectorFst<CompactLatticeArc> CompactLattice;

// The following functions for writing and reading lattices in binary or text
// form are provided here in case you need to include lattices in larger,
// Kaldi-type objects with their own Read and Write functions.  Caution: these
// functions return false on stream failure rather than throwing an exception as
// most similar Kaldi functions would do.

bool WriteCompactLattice(std::ostream &os, bool binary,
                         const CompactLattice &clat);
bool WriteLattice(std::ostream &os, bool binary,
                  const Lattice &lat);

// the following function requires that *clat be
// NULL when called.
bool ReadCompactLattice(std::istream &is, bool binary,
                        CompactLattice **clat);
// the following function requires that *lat be
// NULL when called.
bool ReadLattice(std::istream &is, bool binary,
                 Lattice **lat);


class CompactLatticeHolder {
 public:
  typedef CompactLattice T;

  CompactLatticeHolder() { t_ = NULL; }

  static bool Write(std::ostream &os, bool binary, const T &t) {
    // Note: we don't include the binary-mode header when writing
    // this object to disk; this ensures that if we write to single
    // files, the result can be read by OpenFst.
    return WriteCompactLattice(os, binary, t);
  }

  bool Read(std::istream &is);

  static bool IsReadInBinary() { return true; }

  T &Value() {
    KALDI_ASSERT(t_ != NULL && "Called Value() on empty CompactLatticeHolder");
    return *t_;
  }

  void Clear() { delete t_; t_ = NULL; }

  void Swap(CompactLatticeHolder *other) {
    std::swap(t_, other->t_);
  }

  bool ExtractRange(const CompactLatticeHolder &other, const std::string &range) {
    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    return false;
  }

  ~CompactLatticeHolder() { Clear(); }
 private:
  T *t_;
};

class LatticeHolder {
 public:
  typedef Lattice T;

  LatticeHolder() { t_ = NULL; }

  static bool Write(std::ostream &os, bool binary, const T &t) {
    // Note: we don't include the binary-mode header when writing
    // this object to disk; this ensures that if we write to single
    // files, the result can be read by OpenFst.
    return WriteLattice(os, binary, t);
  }

  bool Read(std::istream &is);

  static bool IsReadInBinary() { return true; }

  T &Value() {
    KALDI_ASSERT(t_ != NULL && "Called Value() on empty LatticeHolder");
    return *t_;
  }

  void Clear() {  delete t_; t_ = NULL; }

  void Swap(LatticeHolder *other) {
    std::swap(t_, other->t_);
  }

  bool ExtractRange(const LatticeHolder &other, const std::string &range) {
    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    return false;
  }

  ~LatticeHolder() { Clear(); }
 private:
  T *t_;
};

// typedef TableWriter<LatticeHolder> LatticeWriter;
// typedef SequentialTableReader<LatticeHolder> SequentialLatticeReader;
// typedef RandomAccessTableReader<LatticeHolder> RandomAccessLatticeReader;
//
// typedef TableWriter<CompactLatticeHolder> CompactLatticeWriter;
// typedef SequentialTableReader<CompactLatticeHolder> SequentialCompactLatticeReader;
// typedef RandomAccessTableReader<CompactLatticeHolder> RandomAccessCompactLatticeReader;


} // namespace kaldi

#endif  // KALDI_LAT_KALDI_LATTICE_H_


================================================
FILE: runtime/engine/kaldi/lat/lattice-functions.cc
================================================
// lat/lattice-functions.cc

// Copyright 2009-2011  Saarland University (Author: Arnab Ghoshal)
//           2012-2013  Johns Hopkins University (Author: Daniel Povey);  Chao Weng;
//                      Bagher BabaAli
//                2013  Cisco Systems (author: Neha Agrawal) [code modified
//                      from original code in ../gmmbin/gmm-rescore-lattice.cc]
//                2014  Guoguo Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#include "lat/lattice-functions.h"
// #include "hmm/transition-model.h"
// #include "util/stl-utils.h"
#include "base/kaldi-math.h"
// #include "hmm/hmm-utils.h"

namespace kaldi {
using std::map;
using std::vector;

// void GetPerFrameAcousticCosts(const Lattice &nbest,
//                               Vector<BaseFloat> *per_frame_loglikes) {
//   using namespace fst;
//   typedef Lattice::Arc::Weight Weight;
//   vector<BaseFloat> loglikes;
//
//   int32 cur_state = nbest.Start();
//   int32 prev_frame = -1;
//   BaseFloat eps_acwt = 0.0;
//   while(1) {
//     Weight w = nbest.Final(cur_state);
//     if (w != Weight::Zero()) {
//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 0);
//       if (per_frame_loglikes != NULL)  {
//         SubVector<BaseFloat> subvec(&(loglikes[0]), loglikes.size());
//         Vector<BaseFloat> vec(subvec);
//         *per_frame_loglikes = vec;
//       }
//       break;
//     } else {
//       KALDI_ASSERT(nbest.NumArcs(cur_state) == 1);
//       fst::ArcIterator<Lattice> iter(nbest, cur_state);
//       const Lattice::Arc &arc = iter.Value();
//       BaseFloat acwt = arc.weight.Value2();
//       if (arc.ilabel != 0) {
//         if (eps_acwt > 0) {
//           acwt += eps_acwt;
//           eps_acwt = 0.0;
//         }
//         loglikes.push_back(acwt);
//         prev_frame++;
//       } else if (acwt == acwt){
//         if (prev_frame > -1) {
//           loglikes[prev_frame] += acwt;
//         } else {
//           eps_acwt += acwt;
//         }
//       }
//       cur_state = arc.nextstate;
//     }
//   }
// }
//
// int32 LatticeStateTimes(const Lattice &lat, vector<int32> *times) {
//   if (!lat.Properties(fst::kTopSorted, true))
//     KALDI_ERR << "Input lattice must be topologically sorted.";
//   KALDI_ASSERT(lat.Start() == 0);
//   int32 num_states = lat.NumStates();
//   times->clear();
//   times->resize(num_states, -1);
//   (*times)[0] = 0;
//   for (int32 state = 0; state < num_states; state++) {
//     int32 cur_time = (*times)[state];
//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
//         aiter.Next()) {
//       const LatticeArc &arc = aiter.Value();
//
//       if (arc.ilabel != 0) {  // Non-epsilon input label on arc
//         // next time instance
//         if ((*times)[arc.nextstate] == -1) {
//           (*times)[arc.nextstate] = cur_time + 1;
//         } else {
//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time + 1);
//         }
//       } else {  // epsilon input label on arc
//         // Same time instance
//         if ((*times)[arc.nextstate] == -1)
//           (*times)[arc.nextstate] = cur_time;
//         else
//           KALDI_ASSERT((*times)[arc.nextstate] == cur_time);
//       }
//     }
//   }
//   return (*std::max_element(times->begin(), times->end()));
// }
//
// int32 CompactLatticeStateTimes(const CompactLattice &lat,
//                                vector<int32> *times) {
//   if (!lat.Properties(fst::kTopSorted, true))
//     KALDI_ERR << "Input lattice must be topologically sorted.";
//   KALDI_ASSERT(lat.Start() == 0);
//   int32 num_states = lat.NumStates();
//   times->clear();
//   times->resize(num_states, -1);
//   (*times)[0] = 0;
//   int32 utt_len = -1;
//   for (int32 state = 0; state < num_states; state++) {
//     int32 cur_time = (*times)[state];
//     for (fst::ArcIterator<CompactLattice> aiter(lat, state); !aiter.Done();
//         aiter.Next()) {
//       const CompactLatticeArc &arc = aiter.Value();
//       int32 arc_len = static_cast<int32>(arc.weight.String().size());
//       if ((*times)[arc.nextstate] == -1)
//         (*times)[arc.nextstate] = cur_time + arc_len;
//       else
//         KALDI_ASSERT((*times)[arc.nextstate] == cur_time + arc_len);
//     }
//     if (lat.Final(state) != CompactLatticeWeight::Zero()) {
//       int32 this_utt_len = (*times)[state] + lat.Final(state).String().size();
//       if (utt_len == -1) utt_len = this_utt_len;
//       else {
//         if (this_utt_len != utt_len) {
//           KALDI_WARN << "Utterance does not "
//               "seem to have a consistent length.";
//           utt_len = std::max(utt_len, this_utt_len);
//         }
//       }
//     }
//   }
//   if (utt_len == -1) {
//     KALDI_WARN << "Utterance does not have a final-state.";
//     return 0;
//   }
//   return utt_len;
// }
//
// bool ComputeCompactLatticeAlphas(const CompactLattice &clat,
//                                  vector<double> *alpha) {
//   using namespace fst;
//
//   // typedef the arc, weight types
//   typedef CompactLattice::Arc Arc;
//   typedef Arc::Weight Weight;
//   typedef Arc::StateId StateId;
//
//   //Make sure the lattice is topologically sorted.
//   if (clat.Properties(fst::kTopSorted, true) == 0) {
//     KALDI_WARN << "Input lattice must be topologically sorted.";
//     return false;
//   }
//   if (clat.Start() != 0) {
//     KALDI_WARN << "Input lattice must start from state 0.";
//     return false;
//   }
//
//   int32 num_states = clat.NumStates();
//   (*alpha).resize(0);
//   (*alpha).resize(num_states, kLogZeroDouble);
//
//   // Now propagate alphas forward. Note that we don't acount the weight of the
//   // final state to alpha[final_state] -- we acount it to beta[final_state];
//   (*alpha)[0] = 0.0;
//   for (StateId s = 0; s < num_states; s++) {
//     double this_alpha = (*alpha)[s];
//     for (ArcIterator<CompactLattice> aiter(clat, s);
//          !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_like = -(arc.weight.Weight().Value1() +
//                           arc.weight.Weight().Value2());
//       (*alpha)[arc.nextstate] = LogAdd((*alpha)[arc.nextstate],
//                                        this_alpha + arc_like);
//     }
//   }
//
//   return true;
// }
//
// bool ComputeCompactLatticeBetas(const CompactLattice &clat,
//                                 vector<double> *beta) {
//   using namespace fst;
//
//   // typedef the arc, weight types
//   typedef CompactLattice::Arc Arc;
//   typedef Arc::Weight Weight;
//   typedef Arc::StateId StateId;
//
//   // Make sure the lattice is topologically sorted.
//   if (clat.Properties(fst::kTopSorted, true) == 0) {
//     KALDI_WARN << "Input lattice must be topologically sorted.";
//     return false;
//   }
//   if (clat.Start() != 0) {
//     KALDI_WARN << "Input lattice must start from state 0.";
//     return false;
//   }
//
//   int32 num_states = clat.NumStates();
//   (*beta).resize(0);
//   (*beta).resize(num_states, kLogZeroDouble);
//
//   // Now propagate betas backward. Note that beta[final_state] contains the
//   // weight of the final state in the lattice -- compare that with alpha.
//   for (StateId s = num_states-1; s >= 0; s--) {
//     Weight f = clat.Final(s);
//     double this_beta = -(f.Weight().Value1()+f.Weight().Value2());
//     for (ArcIterator<CompactLattice> aiter(clat, s);
//          !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_like = -(arc.weight.Weight().Value1() +
//                           arc.weight.Weight().Value2());
//       double arc_beta = (*beta)[arc.nextstate] + arc_like;
//       this_beta = LogAdd(this_beta, arc_beta);
//     }
//     (*beta)[s] = this_beta;
//   }
//
//   return true;
// }

template<class LatType>  // could be Lattice or CompactLattice
bool PruneLattice(BaseFloat beam, LatType *lat) {
  typedef typename LatType::Arc Arc;
  typedef typename Arc::Weight Weight;
  typedef typename Arc::StateId StateId;

  KALDI_ASSERT(beam > 0.0);
  if (!lat->Properties(fst::kTopSorted, true)) {
    if (fst::TopSort(lat) == false) {
      KALDI_WARN << "Cycles detected in lattice";
      return false;
    }
  }
  // We assume states before "start" are not reachable, since
  // the lattice is topologically sorted.
  int32 start = lat->Start();
  int32 num_states = lat->NumStates();
  if (num_states == 0) return false;
  std::vector<double> forward_cost(num_states,
                                   std::numeric_limits<double>::infinity());  // viterbi forward.
  forward_cost[start] = 0.0; // lattice can't have cycles so couldn't be
  // less than this.
  double best_final_cost = std::numeric_limits<double>::infinity();
  // Update the forward probs.
  // Thanks to Jing Zheng for finding a bug here.
  for (int32 state = 0; state < num_states; state++) {
    double this_forward_cost = forward_cost[state];
    for (fst::ArcIterator<LatType> aiter(*lat, state);
         !aiter.Done();
         aiter.Next()) {
      const Arc &arc(aiter.Value());
      StateId nextstate = arc.nextstate;
      KALDI_ASSERT(nextstate > state && nextstate < num_states);
      double next_forward_cost = this_forward_cost +
          ConvertToCost(arc.weight);
      if (forward_cost[nextstate] > next_forward_cost)
        forward_cost[nextstate] = next_forward_cost;
    }
    Weight final_weight = lat->Final(state);
    double this_final_cost = this_forward_cost +
        ConvertToCost(final_weight);
    if (this_final_cost < best_final_cost)
      best_final_cost = this_final_cost;
  }
  int32 bad_state = lat->AddState(); // this state is not final.
  double cutoff = best_final_cost + beam;

  // Go backwards updating the backward probs (which share memory with the
  // forward probs), and pruning arcs and deleting final-probs.  We prune arcs
  // by making them point to the non-final state "bad_state".  We'll then use
  // Trim() to remove unnecessary arcs and states.  [this is just easier than
  // doing it ourselves.]
  std::vector<double> &backward_cost(forward_cost);
  for (int32 state = num_states - 1; state >= 0; state--) {
    double this_forward_cost = forward_cost[state];
    double this_backward_cost = ConvertToCost(lat->Final(state));
    if (this_backward_cost + this_forward_cost > cutoff
        && this_backward_cost != std::numeric_limits<double>::infinity())
      lat->SetFinal(state, Weight::Zero());
    for (fst::MutableArcIterator<LatType> aiter(lat, state);
         !aiter.Done();
         aiter.Next()) {
      Arc arc(aiter.Value());
      StateId nextstate = arc.nextstate;
      KALDI_ASSERT(nextstate > state && nextstate < num_states);
      double arc_cost = ConvertToCost(arc.weight),
          arc_backward_cost = arc_cost + backward_cost[nextstate],
          this_fb_cost = this_forward_cost + arc_backward_cost;
      if (arc_backward_cost < this_backward_cost)
        this_backward_cost = arc_backward_cost;
      if (this_fb_cost > cutoff) { // Prune the arc.
        arc.nextstate = bad_state;
        aiter.SetValue(arc);
      }
    }
    backward_cost[state] = this_backward_cost;
  }
  fst::Connect(lat);
  return (lat->NumStates() > 0);
}

// instantiate the template for lattice and CompactLattice.
template bool PruneLattice(BaseFloat beam, Lattice *lat);
template bool PruneLattice(BaseFloat beam, CompactLattice *lat);


// BaseFloat LatticeForwardBackward(const Lattice &lat, Posterior *post,
//                                  double *acoustic_like_sum) {
//   // Note, Posterior is defined as follows:  Indexed [frame], then a list
//   // of (transition-id, posterior-probability) pairs.
//   // typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
//   using namespace fst;
//   typedef Lattice::Arc Arc;
//   typedef Arc::Weight Weight;
//   typedef Arc::StateId StateId;
//
//   if (acoustic_like_sum) *acoustic_like_sum = 0.0;
//
//   // Make sure the lattice is topologically sorted.
//   if (lat.Properties(fst::kTopSorted, true) == 0)
//     KALDI_ERR << "Input lattice must be topologically sorted.";
//   KALDI_ASSERT(lat.Start() == 0);
//
//   int32 num_states = lat.NumStates();
//   vector<int32> state_times;
//   int32 max_time = LatticeStateTimes(lat, &state_times);
//   std::vector<double> alpha(num_states, kLogZeroDouble);
//   std::vector<double> &beta(alpha); // we re-use the same memory for
//   // this, but it's semantically distinct so we name it differently.
//   double tot_forward_prob = kLogZeroDouble;
//
//   post->clear();
//   post->resize(max_time);
//
//   alpha[0] = 0.0;
//   // Propagate alphas forward.
//   for (StateId s = 0; s < num_states; s++) {
//     double this_alpha = alpha[s];
//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_like = -ConvertToCost(arc.weight);
//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
//     }
//     Weight f = lat.Final(s);
//     if (f != Weight::Zero()) {
//       double final_like = this_alpha - (f.Value1() + f.Value2());
//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
//       KALDI_ASSERT(state_times[s] == max_time &&
//                    "Lattice is inconsistent (final-prob not at max_time)");
//     }
//   }
//   for (StateId s = num_states-1; s >= 0; s--) {
//     Weight f = lat.Final(s);
//     double this_beta = -(f.Value1() + f.Value2());
//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_like = -ConvertToCost(arc.weight),
//           arc_beta = beta[arc.nextstate] + arc_like;
//       this_beta = LogAdd(this_beta, arc_beta);
//       int32 transition_id = arc.ilabel;
//
//       // The following "if" is an optimization to avoid un-needed exp().
//       if (transition_id != 0 || acoustic_like_sum != NULL) {
//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
//
//         if (transition_id != 0) // Arc has a transition-id on it [not epsilon]
//           (*post)[state_times[s]].push_back(std::make_pair(transition_id,
//                                                            static_cast<kaldi::BaseFloat>(posterior)));
//         if (acoustic_like_sum != NULL)
//           *acoustic_like_sum -= posterior * arc.weight.Value2();
//       }
//     }
//     if (acoustic_like_sum != NULL && f != Weight::Zero()) {
//       double final_logprob = - ConvertToCost(f),
//           posterior = Exp(alpha[s] + final_logprob - tot_forward_prob);
//       *acoustic_like_sum -= posterior * f.Value2();
//     }
//     beta[s] = this_beta;
//   }
//   double tot_backward_prob = beta[0];
//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
//               << ", while total backward probability = " << tot_backward_prob;
//   }
//   // Now combine any posteriors with the same transition-id.
//   for (int32 t = 0; t < max_time; t++)
//     MergePairVectorSumming(&((*post)[t]));
//   return tot_backward_prob;
// }
//
//
// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
//                          const vector<int32> &silence_phones,
//                          vector< std::set<int32> > *active_phones) {
//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
//   vector<int32> state_times;
//   int32 num_states = lat.NumStates();
//   int32 max_time = LatticeStateTimes(lat, &state_times);
//   active_phones->clear();
//   active_phones->resize(max_time);
//   for (int32 state = 0; state < num_states; state++) {
//     int32 cur_time = state_times[state];
//     for (fst::ArcIterator<Lattice> aiter(lat, state); !aiter.Done();
//         aiter.Next()) {
//       const LatticeArc &arc = aiter.Value();
//       if (arc.ilabel != 0) {  // Non-epsilon arc
//         int32 phone = trans.TransitionIdToPhone(arc.ilabel);
//         if (!std::binary_search(silence_phones.begin(),
//                                 silence_phones.end(), phone))
//           (*active_phones)[cur_time].insert(phone);
//       }
//     }  // end looping over arcs
//   }  // end looping over states
// }
//
// void ConvertLatticeToPhones(const TransitionModel &trans,
//                             Lattice *lat) {
//   typedef LatticeArc Arc;
//   int32 num_states = lat->NumStates();
//   for (int32 state = 0; state < num_states; state++) {
//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
//         aiter.Next()) {
//       Arc arc(aiter.Value());
//       arc.olabel = 0; // remove any word.
//       if ((arc.ilabel != 0) // has a transition-id on input..
//           && (trans.TransitionIdToHmmState(arc.ilabel) == 0)
//           && (!trans.IsSelfLoop(arc.ilabel))) {
//          // && trans.IsFinal(arc.ilabel)) // there is one of these per phone...
//         arc.olabel = trans.TransitionIdToPhone(arc.ilabel);
//       }
//       aiter.SetValue(arc);
//     }  // end looping over arcs
//   }  // end looping over states
// }
//
//
// static inline double LogAddOrMax(bool viterbi, double a, double b) {
//   if (viterbi)
//     return std::max(a, b);
//   else
//     return LogAdd(a, b);
// }
//
// template<typename LatticeType>
// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
//                                     bool viterbi,
//                                     vector<double> *alpha,
//                                     vector<double> *beta) {
//   typedef typename LatticeType::Arc Arc;
//   typedef typename Arc::Weight Weight;
//   typedef typename Arc::StateId StateId;
//
//   StateId num_states = lat.NumStates();
//   KALDI_ASSERT(lat.Properties(fst::kTopSorted, true) == fst::kTopSorted);
//   KALDI_ASSERT(lat.Start() == 0);
//   alpha->clear();
//   beta->clear();
//   alpha->resize(num_states, kLogZeroDouble);
//   beta->resize(num_states, kLogZeroDouble);
//
//   double tot_forward_prob = kLogZeroDouble;
//   (*alpha)[0] = 0.0;
//   // Propagate alphas forward.
//   for (StateId s = 0; s < num_states; s++) {
//     double this_alpha = (*alpha)[s];
//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
//          aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_like = -ConvertToCost(arc.weight);
//       (*alpha)[arc.nextstate] = LogAddOrMax(viterbi, (*alpha)[arc.nextstate],
//                                                 this_alpha + arc_like);
//     }
//     Weight f = lat.Final(s);
//     if (f != Weight::Zero()) {
//       double final_like = this_alpha - ConvertToCost(f);
//       tot_forward_prob = LogAddOrMax(viterbi, tot_forward_prob, final_like);
//     }
//   }
//   for (StateId s = num_states-1; s >= 0; s--) { // it's guaranteed signed.
//     double this_beta = -ConvertToCost(lat.Final(s));
//     for (fst::ArcIterator<LatticeType> aiter(lat, s); !aiter.Done();
//          aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_like = -ConvertToCost(arc.weight),
//           arc_beta = (*beta)[arc.nextstate] + arc_like;
//       this_beta = LogAddOrMax(viterbi, this_beta, arc_beta);
//     }
//     (*beta)[s] = this_beta;
//   }
//   double tot_backward_prob = (*beta)[lat.Start()];
//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-8)) {
//     KALDI_WARN << "Total forward probability over lattice = " << tot_forward_prob
//                << ", while total backward probability = " << tot_backward_prob;
//   }
//   // Split the difference when returning... they should be the same.
//   return 0.5 * (tot_backward_prob + tot_forward_prob);
// }
//
// // instantiate the template for Lattice and CompactLattice
// template
// double ComputeLatticeAlphasAndBetas(const Lattice &lat,
//                                     bool viterbi,
//                                     vector<double> *alpha,
//                                     vector<double> *beta);
//
// template
// double ComputeLatticeAlphasAndBetas(const CompactLattice &lat,
//                                     bool viterbi,
//                                     vector<double> *alpha,
//                                     vector<double> *beta);
//
//
//
// /// This is used in CompactLatticeLimitDepth.
// struct LatticeArcRecord {
//   BaseFloat logprob; // logprob <= 0 is the best Viterbi logprob of this arc,
//                      // minus the overall best-cost of the lattice.
//   CompactLatticeArc::StateId state; // state in the lattice.
//   size_t arc; // arc index within the state.
//   bool operator < (const LatticeArcRecord &other) const {
//     return logprob < other.logprob;
//   }
// };
//
// void CompactLatticeLimitDepth(int32 max_depth_per_frame,
//                               CompactLattice *clat) {
//   typedef CompactLatticeArc Arc;
//   typedef Arc::Weight Weight;
//   typedef Arc::StateId StateId;
//
//   if (clat->Start() == fst::kNoStateId) {
//     KALDI_WARN << "Limiting depth of empty lattice.";
//     return;
//   }
//   if (clat->Properties(fst::kTopSorted, true) == 0) {
//     if (!TopSort(clat))
//       KALDI_ERR << "Topological sorting of lattice failed.";
//   }
//
//   vector<int32> state_times;
//   int32 T = CompactLatticeStateTimes(*clat, &state_times);
//
//   // The alpha and beta quantities here are "viterbi" alphas and beta.
//   std::vector<double> alpha;
//   std::vector<double> beta;
//   bool viterbi = true;
//   double best_prob = ComputeLatticeAlphasAndBetas(*clat, viterbi,
//                                                   &alpha, &beta);
//
//   std::vector<std::vector<LatticeArcRecord> > arc_records(T);
//
//   StateId num_states = clat->NumStates();
//   for (StateId s = 0; s < num_states; s++) {
//     for (fst::ArcIterator<CompactLattice> aiter(*clat, s); !aiter.Done();
//          aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       LatticeArcRecord arc_record;
//       arc_record.state = s;
//       arc_record.arc = aiter.Position();
//       arc_record.logprob =
//           (alpha[s] + beta[arc.nextstate] - ConvertToCost(arc.weight))
//            - best_prob;
//       KALDI_ASSERT(arc_record.logprob < 0.1); // Should be zero or negative.
//       int32 num_frames = arc.weight.String().size(), start_t = state_times[s];
//       for (int32 t = start_t; t < start_t + num_frames; t++) {
//         KALDI_ASSERT(t < T);
//         arc_records[t].push_back(arc_record);
//       }
//     }
//   }
//   StateId dead_state = clat->AddState(); // A non-coaccesible state which we use
//                                          // to remove arcs (make them end
//                                          // there).
//   size_t max_depth = max_depth_per_frame;
//   for (int32 t = 0; t < T; t++) {
//     size_t size = arc_records[t].size();
//     if (size > max_depth) {
//       // we sort from worst to best, so we keep the later-numbered ones,
//       // and delete the lower-numbered ones.
//       size_t cutoff = size - max_depth;
//       std::nth_element(arc_records[t].begin(),
//                        arc_records[t].begin() + cutoff,
//                        arc_records[t].end());
//       for (size_t index = 0; index < cutoff; index++) {
//         LatticeArcRecord record(arc_records[t][index]);
//         fst::MutableArcIterator<CompactLattice> aiter(clat, record.state);
//         aiter.Seek(record.arc);
//         Arc arc = aiter.Value();
//         if (arc.nextstate != dead_state) { // not already killed.
//           arc.nextstate = dead_state;
//           aiter.SetValue(arc);
//         }
//       }
//     }
//   }
//   Connect(clat);
//   TopSortCompactLatticeIfNeeded(clat);
// }
//
//
// void TopSortCompactLatticeIfNeeded(CompactLattice *clat) {
//   if (clat->Properties(fst::kTopSorted, true) == 0) {
//     if (fst::TopSort(clat) == false) {
//       KALDI_ERR << "Topological sorting failed";
//     }
//   }
// }
//
// void TopSortLatticeIfNeeded(Lattice *lat) {
//   if (lat->Properties(fst::kTopSorted, true) == 0) {
//     if (fst::TopSort(lat) == false) {
//       KALDI_ERR << "Topological sorting failed";
//     }
//   }
// }
//
//
// /// Returns the depth of the lattice, defined as the average number of
// /// arcs crossing any given frame.  Returns 1 for empty lattices.
// /// Requires that input is topologically sorted.
// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
//                               int32 *num_frames) {
//   typedef CompactLattice::Arc::StateId StateId;
//   if (clat.Properties(fst::kTopSorted, true) == 0) {
//     KALDI_ERR << "Lattice input to CompactLatticeDepth was not topologically "
//               << "sorted.";
//   }
//   if (clat.Start() == fst::kNoStateId) {
//     *num_frames = 0;
//     return 1.0;
//   }
//   size_t num_arc_frames = 0;
//   int32 t;
//   {
//     vector<int32> state_times;
//     t = CompactLatticeStateTimes(clat, &state_times);
//   }
//   if (num_frames != NULL)
//     *num_frames = t;
//   for (StateId s = 0; s < clat.NumStates(); s++) {
//     for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
//          aiter.Next()) {
//       const CompactLatticeArc &arc = aiter.Value();
//       num_arc_frames += arc.weight.String().size();
//     }
//     num_arc_frames += clat.Final(s).String().size();
//   }
//   return num_arc_frames / static_cast<BaseFloat>(t);
// }
//
//
// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
//                                  std::vector<int32> *depth_per_frame) {
//   typedef CompactLattice::Arc::StateId StateId;
//   if (clat.Properties(fst::kTopSorted, true) == 0) {
//     KALDI_ERR << "Lattice input to CompactLatticeDepthPerFrame was not "
//               << "topologically sorted.";
//   }
//   if (clat.Start() == fst::kNoStateId) {
//     depth_per_frame->clear();
//     return;
//   }
//   vector<int32> state_times;
//   int32 T = CompactLatticeStateTimes(clat, &state_times);
//
//   depth_per_frame->clear();
//   if (T <= 0) {
//     return;
//   } else {
//     depth_per_frame->resize(T, 0);
//     for (StateId s = 0; s < clat.NumStates(); s++) {
//       int32 start_time = state_times[s];
//       for (fst::ArcIterator<CompactLattice> aiter(clat, s); !aiter.Done();
//            aiter.Next()) {
//         const CompactLatticeArc &arc = aiter.Value();
//         int32 len = arc.weight.String().size();
//         for (int32 t = start_time; t < start_time + len; t++) {
//           KALDI_ASSERT(t < T);
//           (*depth_per_frame)[t]++;
//         }
//       }
//       int32 final_len = clat.Final(s).String().size();
//       for (int32 t = start_time; t < start_time + final_len; t++) {
//         KALDI_ASSERT(t < T);
//         (*depth_per_frame)[t]++;
//       }
//     }
//   }
// }
//
//
//
// void ConvertCompactLatticeToPhones(const TransitionModel &trans,
//                                    CompactLattice *clat) {
//   typedef CompactLatticeArc Arc;
//   typedef Arc::Weight Weight;
//   int32 num_states = clat->NumStates();
//   for (int32 state = 0; state < num_states; state++) {
//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
//          !aiter.Done();
//          aiter.Next()) {
//       Arc arc(aiter.Value());
//       std::vector<int32> phone_seq;
//       const std::vector<int32> &tid_seq = arc.weight.String();
//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
//            iter != tid_seq.end(); ++iter) {
//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
//       }
//       arc.weight.SetString(phone_seq);
//       aiter.SetValue(arc);
//     } // end looping over arcs
//     Weight f = clat->Final(state);
//     if (f != Weight::Zero()) {
//       std::vector<int32> phone_seq;
//       const std::vector<int32> &tid_seq = f.String();
//       for (std::vector<int32>::const_iterator iter = tid_seq.begin();
//            iter != tid_seq.end(); ++iter) {
//         if (trans.IsFinal(*iter))// note: there is one of these per phone...
//           phone_seq.push_back(trans.TransitionIdToPhone(*iter));
//       }
//       f.SetString(phone_seq);
//       clat->SetFinal(state, f);
//     }
//   }  // end looping over states
// }
//
// bool LatticeBoost(const TransitionModel &trans,
//                   const std::vector<int32> &alignment,
//                   const std::vector<int32> &silence_phones,
//                   BaseFloat b,
//                   BaseFloat max_silence_error,
//                   Lattice *lat) {
//   TopSortLatticeIfNeeded(lat);
//
//   // get all stored properties (test==false means don't test if not known).
//   uint64 props = lat->Properties(fst::kFstProperties,
//                                  false);
//
//   KALDI_ASSERT(IsSortedAndUniq(silence_phones));
//   KALDI_ASSERT(max_silence_error >= 0.0 && max_silence_error <= 1.0);
//   vector<int32> state_times;
//   int32 num_states = lat->NumStates();
//   int32 num_frames = LatticeStateTimes(*lat, &state_times);
//   KALDI_ASSERT(num_frames == static_cast<int32>(alignment.size()));
//   for (int32 state = 0; state < num_states; state++) {
//     int32 cur_time = state_times[state];
//     for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
//          aiter.Next()) {
//       LatticeArc arc = aiter.Value();
//       if (arc.ilabel != 0) {  // Non-epsilon arc
//         if (arc.ilabel < 0 || arc.ilabel > trans.NumTransitionIds()) {
//           KALDI_WARN << "Lattice has out-of-range transition-ids: "
//                      << "lattice/model mismatch?";
//           return false;
//         }
//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
//             ref_phone = trans.TransitionIdToPhone(alignment[cur_time]);
//         BaseFloat frame_error;
//         if (phone == ref_phone) {
//           frame_error = 0.0;
//         } else { // an error...
//           if (std::binary_search(silence_phones.begin(), silence_phones.end(), phone))
//             frame_error = max_silence_error;
//           else
//             frame_error = 1.0;
//         }
//         BaseFloat delta_cost = -b * frame_error; // negative cost if
//         // frame is wrong, to boost likelihood of arcs with errors on them.
//         // Add this cost to the graph part.
//         arc.weight.SetValue1(arc.weight.Value1() + delta_cost);
//         aiter.SetValue(arc);
//       }
//     }
//   }
//   // All we changed is the weights, so any properties that were
//   // known before, are still known, except for whether or not the
//   // lattice was weighted.
//   lat->SetProperties(props,
//                      ~(fst::kWeighted|fst::kUnweighted));
//
//   return true;
// }
//
//
//
// BaseFloat LatticeForwardBackwardMpeVariants(
//     const TransitionModel &trans,
//     const std::vector<int32> &silence_phones,
//     const Lattice &lat,
//     const std::vector<int32> &num_ali,
//     std::string criterion,
//     bool one_silence_class,
//     Posterior *post) {
//   using namespace fst;
//   typedef Lattice::Arc Arc;
//   typedef Arc::Weight Weight;
//   typedef Arc::StateId StateId;
//
//   KALDI_ASSERT(criterion == "mpfe" || criterion == "smbr");
//   bool is_mpfe = (criterion == "mpfe");
//
//   if (lat.Properties(fst::kTopSorted, true) == 0)
//     KALDI_ERR << "Input lattice must be topologically sorted.";
//   KALDI_ASSERT(lat.Start() == 0);
//
//   int32 num_states = lat.NumStates();
//   vector<int32> state_times;
//   int32 max_time = LatticeStateTimes(lat, &state_times);
//   KALDI_ASSERT(max_time == static_cast<int32>(num_ali.size()));
//   std::vector<double> alpha(num_states, kLogZeroDouble),
//       alpha_smbr(num_states, 0), //forward variable for sMBR
//       beta(num_states, kLogZeroDouble),
//       beta_smbr(num_states, 0); //backward variable for sMBR
//
//   double tot_forward_prob = kLogZeroDouble;
//   double tot_forward_score = 0;
//
//   post->clear();
//   post->resize(max_time);
//
//   alpha[0] = 0.0;
//   // First Pass Forward,
//   for (StateId s = 0; s < num_states; s++) {
//     double this_alpha = alpha[s];
//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_like = -ConvertToCost(arc.weight);
//       alpha[arc.nextstate] = LogAdd(alpha[arc.nextstate], this_alpha + arc_like);
//     }
//     Weight f = lat.Final(s);
//     if (f != Weight::Zero()) {
//       double final_like = this_alpha - (f.Value1() + f.Value2());
//       tot_forward_prob = LogAdd(tot_forward_prob, final_like);
//       KALDI_ASSERT(state_times[s] == max_time &&
//                    "Lattice is inconsistent (final-prob not at max_time)");
//     }
//   }
//   // First Pass Backward,
//   for (StateId s = num_states-1; s >= 0; s--) {
//     Weight f = lat.Final(s);
//     double this_beta = -(f.Value1() + f.Value2());
//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_like = -ConvertToCost(arc.weight),
//           arc_beta = beta[arc.nextstate] + arc_like;
//       this_beta = LogAdd(this_beta, arc_beta);
//     }
//     beta[s] = this_beta;
//   }
//   // First Pass Forward-Backward Check
//   double tot_backward_prob = beta[0];
//   // may loose the condition somehow here 1e-6 (was 1e-8)
//   if (!ApproxEqual(tot_forward_prob, tot_backward_prob, 1e-6)) {
//     KALDI_ERR << "Total forward probability over lattice = " << tot_forward_prob
//               << ", while total backward probability = " << tot_backward_prob;
//   }
//
//   alpha_smbr[0] = 0.0;
//   // Second Pass Forward, calculate forward for MPFE/SMBR
//   for (StateId s = 0; s < num_states; s++) {
//     double this_alpha = alpha[s];
//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_like = -ConvertToCost(arc.weight);
//       double frame_acc = 0.0;
//       if (arc.ilabel != 0) {
//         int32 cur_time = state_times[s];
//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
//                                                silence_phones.end(),
//                                                phone),
//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
//                                                   silence_phones.end(),
//                                                   ref_phone),
//             both_sil = phone_is_sil && ref_phone_is_sil;
//         if (!is_mpfe) { // smbr.
//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
//           if (!one_silence_class)  // old behavior
//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
//           else
//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
//         } else {
//           if (!one_silence_class)  // old behavior
//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
//           else
//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
//         }
//       }
//       double arc_scale = Exp(alpha[s] + arc_like - alpha[arc.nextstate]);
//       alpha_smbr[arc.nextstate] += arc_scale * (alpha_smbr[s] + frame_acc);
//     }
//     Weight f = lat.Final(s);
//     if (f != Weight::Zero()) {
//       double final_like = this_alpha - (f.Value1() + f.Value2());
//       double arc_scale = Exp(final_like - tot_forward_prob);
//       tot_forward_score += arc_scale * alpha_smbr[s];
//       KALDI_ASSERT(state_times[s] == max_time &&
//                    "Lattice is inconsistent (final-prob not at max_time)");
//     }
//   }
//   // Second Pass Backward, collect Mpe style posteriors
//   for (StateId s = num_states-1; s >= 0; s--) {
//     for (ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_like = -ConvertToCost(arc.weight),
//           arc_beta = beta[arc.nextstate] + arc_like;
//       double frame_acc = 0.0;
//       int32 transition_id = arc.ilabel;
//       if (arc.ilabel != 0) {
//         int32 cur_time = state_times[s];
//         int32 phone = trans.TransitionIdToPhone(arc.ilabel),
//             ref_phone = trans.TransitionIdToPhone(num_ali[cur_time]);
//         bool phone_is_sil = std::binary_search(silence_phones.begin(),
//                                                silence_phones.end(), phone),
//             ref_phone_is_sil = std::binary_search(silence_phones.begin(),
//                                                   silence_phones.end(),
//                                                   ref_phone),
//             both_sil = phone_is_sil && ref_phone_is_sil;
//         if (!is_mpfe) { // smbr.
//           int32 pdf = trans.TransitionIdToPdf(arc.ilabel),
//               ref_pdf = trans.TransitionIdToPdf(num_ali[cur_time]);
//           if (!one_silence_class)  // old behavior
//             frame_acc = (pdf == ref_pdf && !phone_is_sil) ? 1.0 : 0.0;
//           else
//             frame_acc = (pdf == ref_pdf || both_sil) ? 1.0 : 0.0;
//         } else {
//           if (!one_silence_class)  // old behavior
//             frame_acc = (phone == ref_phone && !phone_is_sil) ? 1.0 : 0.0;
//           else
//             frame_acc = (phone == ref_phone || both_sil) ? 1.0 : 0.0;
//         }
//       }
//       double arc_scale = Exp(beta[arc.nextstate] + arc_like - beta[s]);
//       // check arc_scale NAN,
//       // this is to prevent partial paths in Lattices
//       // i.e., paths don't survive to the final state
//       if (KALDI_ISNAN(arc_scale)) arc_scale = 0;
//       beta_smbr[s] += arc_scale * (beta_smbr[arc.nextstate] + frame_acc);
//
//       if (transition_id != 0) { // Arc has a transition-id on it [not epsilon]
//         double posterior = Exp(alpha[s] + arc_beta - tot_forward_prob);
//         double acc_diff = alpha_smbr[s] + frame_acc + beta_smbr[arc.nextstate]
//                                - tot_forward_score;
//         double posterior_smbr = posterior * acc_diff;
//         (*post)[state_times[s]].push_back(std::make_pair(transition_id,
//                                                          static_cast<BaseFloat>(posterior_smbr)));
//       }
//     }
//   }
//
//   //Second Pass Forward Backward check
//   double tot_backward_score = beta_smbr[0];  // Initial state id == 0
//   // may loose the condition somehow here 1e-5/1e-4
//   if (!ApproxEqual(tot_forward_score, tot_backward_score, 1e-4)) {
//     KALDI_ERR << "Total forward score over lattice = " << tot_forward_score
//               << ", while total backward score = " << tot_backward_score;
//   }
//
//   // Output the computed posteriors
//   for (int32 t = 0; t < max_time; t++)
//     MergePairVectorSumming(&((*post)[t]));
//   return tot_forward_score;
// }
//
// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
//                                    std::vector<int32> *words,
//                                    std::vector<int32> *begin_times,
//                                    std::vector<int32> *lengths) {
//   words->clear();
//   begin_times->clear();
//   lengths->clear();
//   typedef CompactLattice::Arc Arc;
//   typedef Arc::Label Label;
//   typedef CompactLattice::StateId StateId;
//   typedef CompactLattice::Weight Weight;
//   using namespace fst;
//   StateId state = clat.Start();
//   int32 cur_time = 0;
//   if (state == kNoStateId) {
//     KALDI_WARN << "Empty lattice.";
//     return false;
//   }
//   while (1) {
//     Weight final = clat.Final(state);
//     size_t num_arcs = clat.NumArcs(state);
//     if (final != Weight::Zero()) {
//       if (num_arcs != 0) {
//         KALDI_WARN << "Lattice is not linear.";
//         return false;
//       }
//       if (! final.String().empty()) {
//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
//             "was not word-aligned (alignments will be approximate)";
//       }
//       return true;
//     } else {
//       if (num_arcs != 1) {
//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
//         return false;
//       }
//       fst::ArcIterator<CompactLattice> aiter(clat, state);
//       const Arc &arc = aiter.Value();
//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
//       // Also note: word_id may be zero; we output it anyway.
//       int32 length = arc.weight.String().size();
//       words->push_back(word_id);
//       begin_times->push_back(cur_time);
//       lengths->push_back(length);
//       cur_time += length;
//       state = arc.nextstate;
//     }
//   }
// }
//
//
// bool CompactLatticeToWordProns(
//     const TransitionModel &tmodel,
//     const CompactLattice &clat,
//     std::vector<int32> *words,
//     std::vector<int32> *begin_times,
//     std::vector<int32> *lengths,
//     std::vector<std::vector<int32> > *prons,
//     std::vector<std::vector<int32> > *phone_lengths) {
//   words->clear();
//   begin_times->clear();
//   lengths->clear();
//   prons->clear();
//   phone_lengths->clear();
//   typedef CompactLattice::Arc Arc;
//   typedef Arc::Label Label;
//   typedef CompactLattice::StateId StateId;
//   typedef CompactLattice::Weight Weight;
//   using namespace fst;
//   StateId state = clat.Start();
//   int32 cur_time = 0;
//   if (state == kNoStateId) {
//     KALDI_WARN << "Empty lattice.";
//     return false;
//   }
//   while (1) {
//     Weight final = clat.Final(state);
//     size_t num_arcs = clat.NumArcs(state);
//     if (final != Weight::Zero()) {
//       if (num_arcs != 0) {
//         KALDI_WARN << "Lattice is not linear.";
//         return false;
//       }
//       if (! final.String().empty()) {
//         KALDI_WARN << "Lattice has alignments on final-weight: probably "
//             "was not word-aligned (alignments will be approximate)";
//       }
//       return true;
//     } else {
//       if (num_arcs != 1) {
//         KALDI_WARN << "Lattice is not linear: num-arcs = " << num_arcs;
//         return false;
//       }
//       fst::ArcIterator<CompactLattice> aiter(clat, state);
//       const Arc &arc = aiter.Value();
//       Label word_id = arc.ilabel; // Note: ilabel==olabel, since acceptor.
//       // Also note: word_id may be zero; we output it anyway.
//       int32 length = arc.weight.String().size();
//       words->push_back(word_id);
//       begin_times->push_back(cur_time);
//       lengths->push_back(length);
//       const std::vector<int32> &arc_alignment = arc.weight.String();
//       std::vector<std::vector<int32> > split_alignment;
//       SplitToPhones(tmodel, arc_alignment, &split_alignment);
//       std::vector<int32> phones(split_alignment.size());
//       std::vector<int32> plengths(split_alignment.size());
//       for (size_t i = 0; i < split_alignment.size(); i++) {
//         KALDI_ASSERT(!split_alignment[i].empty());
//         phones[i] = tmodel.TransitionIdToPhone(split_alignment[i][0]);
//         plengths[i] = split_alignment[i].size();
//       }
//       prons->push_back(phones);
//       phone_lengths->push_back(plengths);
//
//       cur_time += length;
//       state = arc.nextstate;
//     }
//   }
// }
//
//
//
// void CompactLatticeShortestPath(const CompactLattice &clat,
//                                 CompactLattice *shortest_path) {
//   using namespace fst;
//   if (clat.Properties(fst::kTopSorted, true) == 0) {
//     CompactLattice clat_copy(clat);
//     if (!TopSort(&clat_copy))
//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
//     CompactLatticeShortestPath(clat_copy, shortest_path);
//     return;
//   }
//   // Now we can assume it's topologically sorted.
//   shortest_path->DeleteStates();
//   if (clat.Start() == kNoStateId) return;
//   typedef CompactLatticeArc Arc;
//   typedef Arc::StateId StateId;
//   typedef CompactLatticeWeight Weight;
//   vector<std::pair<double, StateId> > best_cost_and_pred(clat.NumStates() + 1);
//   StateId superfinal = clat.NumStates();
//   for (StateId s = 0; s <= clat.NumStates(); s++) {
//     best_cost_and_pred[s].first = std::numeric_limits<double>::infinity();
//     best_cost_and_pred[s].second = fst::kNoStateId;
//   }
//   best_cost_and_pred[clat.Start()].first = 0;
//   for (StateId s = 0; s < clat.NumStates(); s++) {
//     double my_cost = best_cost_and_pred[s].first;
//     for (ArcIterator<CompactLattice> aiter(clat, s);
//          !aiter.Done();
//          aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double arc_cost = ConvertToCost(arc.weight),
//           next_cost = my_cost + arc_cost;
//       if (next_cost < best_cost_and_pred[arc.nextstate].first) {
//         best_cost_and_pred[arc.nextstate].first = next_cost;
//         best_cost_and_pred[arc.nextstate].second = s;
//       }
//     }
//     double final_cost = ConvertToCost(clat.Final(s)),
//         tot_final = my_cost + final_cost;
//     if (tot_final < best_cost_and_pred[superfinal].first) {
//       best_cost_and_pred[superfinal].first = tot_final;
//       best_cost_and_pred[superfinal].second = s;
//     }
//   }
//   std::vector<StateId> states; // states on best path.
//   StateId cur_state = superfinal, start_state = clat.Start();
//   while (cur_state != start_state) {
//     StateId prev_state = best_cost_and_pred[cur_state].second;
//     if (prev_state == kNoStateId) {
//       KALDI_WARN << "Failure in best-path algorithm for lattice (infinite costs?)";
//       return; // return empty best-path.
//     }
//     states.push_back(prev_state);
//     KALDI_ASSERT(cur_state != prev_state && "Lattice with cycles");
//     cur_state = prev_state;
//   }
//   std::reverse(states.begin(), states.end());
//   for (size_t i = 0; i < states.size(); i++)
//     shortest_path->AddState();
//   for (StateId s = 0; static_cast<size_t>(s) < states.size(); s++) {
//     if (s == 0) shortest_path->SetStart(s);
//     if (static_cast<size_t>(s + 1) < states.size()) { // transition to next state.
//       bool have_arc = false;
//       Arc cur_arc;
//       for (ArcIterator<CompactLattice> aiter(clat, states[s]);
//            !aiter.Done();
//            aiter.Next()) {
//         const Arc &arc = aiter.Value();
//         if (arc.nextstate == states[s+1]) {
//           if (!have_arc ||
//               ConvertToCost(arc.weight) < ConvertToCost(cur_arc.weight)) {
//             cur_arc = arc;
//             have_arc = true;
//           }
//         }
//       }
//       KALDI_ASSERT(have_arc && "Code error.");
//       shortest_path->AddArc(s, Arc(cur_arc.ilabel, cur_arc.olabel,
//                                    cur_arc.weight, s+1));
//     } else { // final-prob.
//       shortest_path->SetFinal(s, clat.Final(states[s]));
//     }
//   }
// }
//
//
// void ExpandCompactLattice(const CompactLattice &clat,
//                           double epsilon,
//                           CompactLattice *expand_clat) {
//   using namespace fst;
//   typedef CompactLattice::Arc Arc;
//   typedef Arc::Weight Weight;
//   typedef Arc::StateId StateId;
//   typedef std::pair<StateId, StateId> StatePair;
//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
//   typedef MapType::iterator IterType;
//
//   if (clat.Start() == kNoStateId) return;
//   // Make sure the input lattice is topologically sorted.
//   if (clat.Properties(kTopSorted, true) == 0) {
//     CompactLattice clat_copy(clat);
//     KALDI_LOG << "Topsort this lattice.";
//     if (!TopSort(&clat_copy))
//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
//     ExpandCompactLattice(clat_copy, epsilon, expand_clat);
//     return;
//   }
//
//   // Compute backward logprobs betas for the expanded lattice.
//   // Note: the backward logprobs in the original lattice <clat> and the
//   // expanded lattice <expand_clat> are the same.
//   int32 num_states = clat.NumStates();
//   std::vector<double> beta(num_states, kLogZeroDouble);
//   ComputeCompactLatticeBetas(clat, &beta);
//   double tot_backward_logprob = beta[0];
//   std::vector<double> alpha;
//   alpha.push_back(0.0);
//   expand_clat->DeleteStates();
//   MapType state_map; // Map from state pair (orig_state, copy_state) to
//   // copy_state, where orig_state is a state in the original lattice, and
//   // copy_state is its corresponding one in the expanded lattice.
//   unordered_map<StateId, StateId> states; // Map from orig_state to its
//   // copy_state for states with incoming arcs' posteriors <= epsilon.
//   std::queue<StatePair> state_queue;
//
//   // Set start state in the expanded lattice.
//   StateId start_state = expand_clat->AddState();
//   expand_clat->SetStart(start_state);
//   StatePair start_pair(clat.Start(), start_state);
//   state_queue.push(start_pair);
//   std::pair<IterType, bool> result =
//     state_map.insert(std::make_pair(start_pair, start_state));
//   KALDI_ASSERT(result.second == true);
//
//   // Expand <clat> and update forward logprobs alphas in <expand_clat>.
//   while (!state_queue.empty()) {
//     StatePair s = state_queue.front();
//     StateId s1 = s.first,
//             s2 = s.second;
//     state_queue.pop();
//
//     Weight f = clat.Final(s1);
//     if (f != Weight::Zero()) {
//       KALDI_ASSERT(state_map.find(s) != state_map.end());
//       expand_clat->SetFinal(state_map[s], f);
//     }
//
//     for (ArcIterator<CompactLattice> aiter(clat, s1);
//          !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       StateId orig_state = arc.nextstate;
//       double arc_like = -ConvertToCost(arc.weight),
//              this_alpha = alpha[s2] + arc_like,
//              arc_post = Exp(this_alpha + beta[orig_state] -
//                             tot_backward_logprob);
//       // Generate the expanded lattice.
//       StateId copy_state;
//       if (arc_post > epsilon) {
//         copy_state = expand_clat->AddState();
//         StatePair next_pair(orig_state, copy_state);
//         std::pair<IterType, bool> result =
//           state_map.insert(std::make_pair(next_pair, copy_state));
//         KALDI_ASSERT(result.second == true);
//         state_queue.push(next_pair);
//       } else {
//         unordered_map<StateId, StateId>::iterator iter = states.find(orig_state);
//         if (iter == states.end() ) { // The counterpart state of orig_state
//                                    // has not been created in <expand_clat> yet.
//           copy_state = expand_clat->AddState();
//           StatePair next_pair(orig_state, copy_state);
//           std::pair<IterType, bool> result =
//             state_map.insert(std::make_pair(next_pair, copy_state));
//           KALDI_ASSERT(result.second == true);
//           state_queue.push(next_pair);
//           states[orig_state] = copy_state;
//         } else {
//           copy_state = iter->second;
//         }
//       }
//       // Create an arc from state_map[s] to copy_state in the expanded lattice.
//       expand_clat->AddArc(state_map[s], Arc(arc.ilabel, arc.olabel, arc.weight,
//                                             copy_state));
//       // Compute forward logprobs alpha for the expanded lattice.
//       if ((alpha.size() - 1) < copy_state) { // The first time to compute alpha
//                                              // for copy_state in <expand_clat>.
//         alpha.push_back(this_alpha);
//       } else { // Accumulate alpha.
//         alpha[copy_state] = LogAdd(alpha[copy_state], this_alpha);
//       }
//     }
//   } // end while
// }
//
//
// void CompactLatticeBestCostsAndTracebacks(
//     const CompactLattice &clat,
//     CostTraceType *forward_best_cost_and_pred,
//     CostTraceType *backward_best_cost_and_pred) {
//
//   // typedef the arc, weight types
//   typedef CompactLatticeArc Arc;
//   typedef Arc::Weight Weight;
//   typedef Arc::StateId StateId;
//
//   forward_best_cost_and_pred->clear();
//   backward_best_cost_and_pred->clear();
//   forward_best_cost_and_pred->resize(clat.NumStates());
//   backward_best_cost_and_pred->resize(clat.NumStates());
//   // Initialize the cost and predecessor state for each state.
//   for (StateId s = 0; s < clat.NumStates(); s++) {
//     (*forward_best_cost_and_pred)[s].first =
//                                         std::numeric_limits<double>::infinity();
//     (*backward_best_cost_and_pred)[s].first =
//                                         std::numeric_limits<double>::infinity();
//     (*forward_best_cost_and_pred)[s].second = fst::kNoStateId;
//     (*backward_best_cost_and_pred)[s].second = fst::kNoStateId;
//   }
//
//   StateId start_state = clat.Start();
//   (*forward_best_cost_and_pred)[start_state].first = 0;
//   // Transverse the lattice forwardly to compute the best cost from the start
//   // state to each state and the best predecessor state of each state.
//   for (StateId s = 0; s < clat.NumStates(); s++) {
//     double cur_cost = (*forward_best_cost_and_pred)[s].first;
//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
//          !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double next_cost = cur_cost + ConvertToCost(arc.weight);
//       if (next_cost < (*forward_best_cost_and_pred)[arc.nextstate].first) {
//         (*forward_best_cost_and_pred)[arc.nextstate].first = next_cost;
//         (*forward_best_cost_and_pred)[arc.nextstate].second = s;
//       }
//     }
//   }
//   // Transverse the lattice backwardly to compute the best cost from a final
//   // state to each state and the best predecessor state of each state.
//   for (StateId s = clat.NumStates() - 1; s >= 0; s--) {
//     double this_cost = ConvertToCost(clat.Final(s));
//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
//          !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       double next_cost = (*backward_best_cost_and_pred)[arc.nextstate].first +
//         ConvertToCost(arc.weight);
//       if (next_cost < this_cost) {
//         this_cost = next_cost;
//         (*backward_best_cost_and_pred)[s].second = arc.nextstate;
//       }
//     }
//     (*backward_best_cost_and_pred)[s].first = this_cost;
//   }
// }
//
//
// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
//                                   CompactLattice *clat) {
//   if (clat->Start() == fst::kNoStateId) return;
//   // Make sure the input lattice is topologically sorted.
//   if (clat->Properties(fst::kTopSorted, true) == 0) {
//     KALDI_LOG << "Topsort this lattice.";
//     if (!TopSort(clat))
//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
//     AddNnlmScoreToCompactLattice(nnlm_scores, clat);
//     return;
//   }
//
//   // typedef the arc, weight types
//   typedef CompactLatticeArc Arc;
//   typedef Arc::Weight Weight;
//   typedef Arc::StateId StateId;
//   typedef std::pair<int32, int32> StatePair;
//
//   int32 num_states = clat->NumStates();
//   unordered_map<StatePair, bool, PairHasher<int32> > final_state_check;
//   for (StateId s = 0; s < num_states; s++) {
//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, s);
//          !aiter.Done(); aiter.Next()) {
//       Arc arc(aiter.Value());
//       StatePair arc_index = std::make_pair(static_cast<int32>(s),
//                                            static_cast<int32>(arc.nextstate));
//       MapT::const_iterator it = nnlm_scores.find(arc_index);
//       double nnlm_score;
//       if (it != nnlm_scores.end())
//         nnlm_score = it->second;
//       else
//         KALDI_ERR << "Some arc does not have neural language model score.";
//       if (arc.ilabel != 0) { // if there is a word on this arc
//         LatticeWeight weight = arc.weight.Weight();
//         // Add associated neural LM score to each arc.
//         weight.SetValue1(weight.Value1() + nnlm_score);
//         arc.weight.SetWeight(weight);
//         aiter.SetValue(arc);
//       }
//       Weight clat_final = clat->Final(arc.nextstate);
//       StatePair final_pair = std::make_pair(arc.nextstate, arc.nextstate);
//       // Add neural LM scores to each final state only once.
//       if (clat_final != CompactLatticeWeight::Zero() &&
//           final_state_check.find(final_pair) == final_state_check.end()) {
//         MapT::const_iterator final_it = nnlm_scores.find(final_pair);
//         double final_nnlm_score = 0.0;
//         if (final_it != nnlm_scores.end())
//           final_nnlm_score = final_it->second;
//         // Add neural LM scores to the final weight.
//         Weight final_weight(LatticeWeight(clat_final.Weight().Value1() +
//                                           final_nnlm_score,
//                                           clat_final.Weight().Value2()),
//                                           clat_final.String());
//         clat->SetFinal(arc.nextstate, final_weight);
//         final_state_check[final_pair] = true;
//       }
//     } // end looping over arcs
//   } // end looping over states
// }
//
// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
//                                    CompactLattice *clat) {
//   typedef CompactLatticeArc Arc;
//   int32 num_states = clat->NumStates();
//
//   //scan the lattice
//   for (int32 state = 0; state < num_states; state++) {
//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
//          !aiter.Done(); aiter.Next()) {
//
//       Arc arc(aiter.Value());
//
//       if (arc.ilabel != 0) { // if there is a word on this arc
//         LatticeWeight weight = arc.weight.Weight();
//         // add word insertion penalty to lattice
//         weight.SetValue1( weight.Value1() + word_ins_penalty);
//         arc.weight.SetWeight(weight);
//         aiter.SetValue(arc);
//       }
//     } // end looping over arcs
//   }  // end looping over states
// }
//
// struct ClatRescoreTuple {
//   ClatRescoreTuple(int32 state, int32 arc, int32 tid):
//       state_id(state), arc_id(arc), tid(tid) { }
//   int32 state_id;
//   int32 arc_id;
//   int32 tid;
// };
//
// /** RescoreCompactLatticeInternal is the internal code for both
//     RescoreCompactLattice and RescoreCompatLatticeSpeedup.  For
//     RescoreCompactLattice, "tmodel" will be NULL and speedup_factor will be 1.0.
//  */
// bool RescoreCompactLatticeInternal(
//     const TransitionModel *tmodel,
//     BaseFloat speedup_factor,
//     DecodableInterface *decodable,
//     CompactLattice *clat) {
//   KALDI_ASSERT(speedup_factor >= 1.0);
//   if (clat->NumStates() == 0) {
//     KALDI_WARN << "Rescoring empty lattice";
//     return false;
//   }
//   if (!clat->Properties(fst::kTopSorted, true)) {
//     if (fst::TopSort(clat) == false) {
//       KALDI_WARN << "Cycles detected in lattice.";
//       return false;
//     }
//   }
//   std::vector<int32> state_times;
//   int32 utt_len = kaldi::CompactLatticeStateTimes(*clat, &state_times);
//
//   std::vector<std::vector<ClatRescoreTuple> > time_to_state(utt_len);
//
//   int32 num_states = clat->NumStates();
//   KALDI_ASSERT(num_states == state_times.size());
//   for (size_t state = 0; state < num_states; state++) {
//     KALDI_ASSERT(state_times[state] >= 0);
//     int32 t = state_times[state];
//     int32 arc_id = 0;
//     for (fst::MutableArcIterator<CompactLattice> aiter(clat, state);
//          !aiter.Done(); aiter.Next(), arc_id++) {
//       CompactLatticeArc arc = aiter.Value();
//       std::vector<int32> arc_string = arc.weight.String();
//
//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
//         if (t < utt_len) { // end state may be past this..
//           int32 tid = arc_string[offset];
//           time_to_state[t+offset].push_back(ClatRescoreTuple(state, arc_id, tid));
//         } else {
//           if (t != utt_len) {
//             KALDI_WARN << "There appears to be lattice/feature mismatch, "
//                        << "aborting.";
//             return false;
//           }
//         }
//       }
//     }
//     if (clat->Final(state) != CompactLatticeWeight::Zero()) {
//       arc_id = -1;
//       std::vector<int32> arc_string = clat->Final(state).String();
//       for (size_t offset = 0; offset < arc_string.size(); offset++) {
//         KALDI_ASSERT(t + offset < utt_len); // already checked in
//         // CompactLatticeStateTimes, so would be code error.
//         time_to_state[t+offset].push_back(
//             ClatRescoreTuple(state, arc_id, arc_string[offset]));
//       }
//     }
//   }
//
//   for (int32 t = 0; t < utt_len; t++) {
//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
//       KALDI_WARN << "Features are too short for lattice: utt-len is "
//                  << utt_len << ", " << t << " is last frame";
//       return false;
//     }
//     // frame_scale is the scale we put on the computed acoustic probs for this
//     // frame.  It will always be 1.0 if tmodel == NULL (i.e. if we are not doing
//     // the "speedup" code).  For frames with multiple pdf-ids it will be one.
//     // For frames with only one pdf-id, it will equal speedup_factor (>=1.0)
//     // with probability 1.0 / speedup_factor, and zero otherwise.  If it is zero,
//     // we can avoid computing the probabilities.
//     BaseFloat frame_scale = 1.0;
//     KALDI_ASSERT(!time_to_state[t].empty());
//     if (tmodel != NULL) {
//       int32 pdf_id = tmodel->TransitionIdToPdf(time_to_state[t][0].tid);
//       bool frame_has_multiple_pdfs = false;
//       for (size_t i = 1; i < time_to_state[t].size(); i++) {
//         if (tmodel->TransitionIdToPdf(time_to_state[t][i].tid) != pdf_id) {
//           frame_has_multiple_pdfs = true;
//           break;
//         }
//       }
//       if (frame_has_multiple_pdfs) {
//         frame_scale = 1.0;
//       } else {
//         if (WithProb(1.0 / speedup_factor)) {
//           frame_scale = speedup_factor;
//         } else {
//           frame_scale = 0.0;
//         }
//       }
//       if (frame_scale == 0.0)
//         continue; // the code below would be pointless.
//     }
//
//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
//       int32 state = time_to_state[t][i].state_id;
//       int32 arc_id = time_to_state[t][i].arc_id;
//       int32 tid = time_to_state[t][i].tid;
//
//       if (arc_id == -1) { // Final state
//         // Access the trans_id
//         CompactLatticeWeight curr_clat_weight = clat->Final(state);
//
//         // Calculate likelihood
//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
//         // update weight
//         CompactLatticeWeight new_clat_weight = curr_clat_weight;
//         LatticeWeight new_lat_weight = new_clat_weight.Weight();
//         new_lat_weight.SetValue2(-log_like + curr_clat_weight.Weight().Value2());
//         new_clat_weight.SetWeight(new_lat_weight);
//         clat->SetFinal(state, new_clat_weight);
//       } else {
//         fst::MutableArcIterator<CompactLattice> aiter(clat, state);
//
//         aiter.Seek(arc_id);
//         CompactLatticeArc arc = aiter.Value();
//
//         // Calculate likelihood
//         BaseFloat log_like = decodable->LogLikelihood(t, tid) * frame_scale;
//         // update weight
//         LatticeWeight new_weight = arc.weight.Weight();
//         new_weight.SetValue2(-log_like + arc.weight.Weight().Value2());
//         arc.weight.SetWeight(new_weight);
//         aiter.SetValue(arc);
//       }
//     }
//   }
//   return true;
// }
//
//
// bool RescoreCompactLatticeSpeedup(
//     const TransitionModel &tmodel,
//     BaseFloat speedup_factor,
//     DecodableInterface *decodable,
//     CompactLattice *clat) {
//   return RescoreCompactLatticeInternal(&tmodel, speedup_factor, decodable, clat);
// }
//
// bool RescoreCompactLattice(DecodableInterface *decodable,
//                            CompactLattice *clat) {
//   return RescoreCompactLatticeInternal(NULL, 1.0, decodable, clat);
// }
//
//
// bool RescoreLattice(DecodableInterface *decodable,
//                     Lattice *lat) {
//   if (lat->NumStates() == 0) {
//     KALDI_WARN << "Rescoring empty lattice";
//     return false;
//   }
//   if (!lat->Properties(fst::kTopSorted, true)) {
//     if (fst::TopSort(lat) == false) {
//       KALDI_WARN << "Cycles detected in lattice.";
//       return false;
//     }
//   }
//   std::vector<int32> state_times;
//   int32 utt_len = kaldi::LatticeStateTimes(*lat, &state_times);
//
//   std::vector<std::vector<int32> > time_to_state(utt_len );
//
//   int32 num_states = lat->NumStates();
//   KALDI_ASSERT(num_states == state_times.size());
//   for (size_t state = 0; state < num_states; state++) {
//     int32 t = state_times[state];
//     // Don't check t >= 0 because non-accessible states could have t = -1.
//     KALDI_ASSERT(t <= utt_len);
//     if (t >= 0 && t < utt_len)
//       time_to_state[t].push_back(state);
//   }
//
//   for (int32 t = 0; t < utt_len; t++) {
//     if ((t < utt_len - 1) && decodable->IsLastFrame(t)) {
//       KALDI_WARN << "Features are too short for lattice: utt-len is "
//                  << utt_len << ", " << t << " is last frame";
//       return false;
//     }
//     for (size_t i = 0; i < time_to_state[t].size(); i++) {
//       int32 state = time_to_state[t][i];
//       for (fst::MutableArcIterator<Lattice> aiter(lat, state);
//            !aiter.Done(); aiter.Next()) {
//         LatticeArc arc = aiter.Value();
//         if (arc.ilabel != 0) {
//           int32 trans_id = arc.ilabel; // Note: it doesn't necessarily
//           // have to be a transition-id, just whatever the Decodable
//           // object is expecting, but it's normally a transition-id.
//
//           BaseFloat log_like = decodable->LogLikelihood(t, trans_id);
//           arc.weight.SetValue2(-log_like + arc.weight.Value2());
//           aiter.SetValue(arc);
//         }
//       }
//     }
//   }
//   return true;
// }
//
//
// BaseFloat LatticeForwardBackwardMmi(
//     const TransitionModel &tmodel,
//     const Lattice &lat,
//     const std::vector<int32> &num_ali,
//     bool drop_frames,
//     bool convert_to_pdf_ids,
//     bool cancel,
//     Posterior *post) {
//   // First compute the MMI posteriors.
//
//   Posterior den_post;
//   BaseFloat ans = LatticeForwardBackward(lat,
//                                          &den_post,
//                                          NULL);
//
//   Posterior num_post;
//   AlignmentToPosterior(num_ali, &num_post);
//
//   // Now negate the MMI posteriors and add the numerator
//   // posteriors.
//   ScalePosterior(-1.0, &den_post);
//
//   if (convert_to_pdf_ids) {
//     Posterior num_tmp;
//     ConvertPosteriorToPdfs(tmodel, num_post, &num_tmp);
//     num_tmp.swap(num_post);
//     Posterior den_tmp;
//     ConvertPosteriorToPdfs(tmodel, den_post, &den_tmp);
//     den_tmp.swap(den_post);
//   }
//
//   MergePosteriors(num_post, den_post,
//                   cancel, drop_frames, post);
//
//   return ans;
// }
//
//
// int32 LongestSentenceLength(const Lattice &lat) {
//   typedef Lattice::Arc Arc;
//   typedef Arc::Label Label;
//   typedef Arc::StateId StateId;
//
//   if (lat.Properties(fst::kTopSorted, true) == 0) {
//     Lattice lat_copy(lat);
//     if (!TopSort(&lat_copy))
//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
//     return LongestSentenceLength(lat_copy);
//   }
//   std::vector<int32> max_length(lat.NumStates(), 0);
//   int32 lattice_max_length = 0;
//   for (StateId s = 0; s < lat.NumStates(); s++) {
//     int32 this_max_length = max_length[s];
//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       bool arc_has_word = (arc.olabel != 0);
//       StateId nextstate = arc.nextstate;
//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
//       if (arc_has_word) {
//         // A lattice should ideally not have cycles anyway; a cycle with a word
//         // on is something very bad.
//         KALDI_ASSERT(nextstate > s && "Lattice has cycles with words on.");
//         max_length[nextstate] = std::max(max_length[nextstate],
//                                          this_max_length + 1);
//       } else {
//         max_length[nextstate] = std::max(max_length[nextstate],
//                                          this_max_length);
//       }
//     }
//     if (lat.Final(s) != LatticeWeight::Zero())
//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
//   }
//   return lattice_max_length;
// }
//
// int32 LongestSentenceLength(const CompactLattice &clat) {
//   typedef CompactLattice::Arc Arc;
//   typedef Arc::Label Label;
//   typedef Arc::StateId StateId;
//
//   if (clat.Properties(fst::kTopSorted, true) == 0) {
//     CompactLattice clat_copy(clat);
//     if (!TopSort(&clat_copy))
//       KALDI_ERR << "Was not able to topologically sort lattice (cycles found?)";
//     return LongestSentenceLength(clat_copy);
//   }
//   std::vector<int32> max_length(clat.NumStates(), 0);
//   int32 lattice_max_length = 0;
//   for (StateId s = 0; s < clat.NumStates(); s++) {
//     int32 this_max_length = max_length[s];
//     for (fst::ArcIterator<CompactLattice> aiter(clat, s);
//          !aiter.Done(); aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       bool arc_has_word = (arc.ilabel != 0); // note: olabel == ilabel.
//       // also note: for normal CompactLattice, e.g. as produced by
//       // determinization, all arcs will have nonzero labels, but the user might
//       // decide to remplace some of the labels with zero for some reason, and we
//       // want to support this.
//       StateId nextstate = arc.nextstate;
//       KALDI_ASSERT(static_cast<size_t>(nextstate) < max_length.size());
//       KALDI_ASSERT(nextstate > s && "CompactLattice has cycles");
//       if (arc_has_word)
//         max_length[nextstate] = std::max(max_length[nextstate],
//                                          this_max_length + 1);
//       else
//         max_length[nextstate] = std::max(max_length[nextstate],
//                                          this_max_length);
//     }
//     if (clat.Final(s) != CompactLatticeWeight::Zero())
//       lattice_max_length = std::max(lattice_max_length, max_length[s]);
//   }
//   return lattice_max_length;
// }
//
// void ComposeCompactLatticeDeterministic(
//     const CompactLattice& clat,
//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
//     CompactLattice* composed_clat) {
//   // StdFst::Arc and CompactLatticeArc has the same StateId type.
//   typedef fst::StdArc::StateId StateId;
//   typedef fst::StdArc::Weight Weight1;
//   typedef CompactLatticeArc::Weight Weight2;
//   typedef std::pair<StateId, StateId> StatePair;
//   typedef unordered_map<StatePair, StateId, PairHasher<StateId> > MapType;
//   typedef MapType::iterator IterType;
//
//   // Empties the output FST.
//   KALDI_ASSERT(composed_clat != NULL);
//   composed_clat->DeleteStates();
//
//   MapType state_map;
//   std::queue<StatePair> state_queue;
//
//   // Sets start state in <composed_clat>.
//   StateId start_state = composed_clat->AddState();
//   StatePair start_pair(clat.Start(), det_fst->Start());
//   composed_clat->SetStart(start_state);
//   state_queue.push(start_pair);
//   std::pair<IterType, bool> result =
//       state_map.insert(std::make_pair(start_pair, start_state));
//   KALDI_ASSERT(result.second == true);
//
//   // Starts composition here.
//   while (!state_queue.empty()) {
//     // Gets the first state in the queue.
//     StatePair s = state_queue.front();
//     StateId s1 = s.first;
//     StateId s2 = s.second;
//     state_queue.pop();
//
//
//     Weight2 clat_final = clat.Final(s1);
//     if (clat_final.Weight().Value1() !=
//         std::numeric_limits<BaseFloat>::infinity()) {
//       // Test for whether the final-prob of state s1 was zero.
//       Weight1 det_fst_final = det_fst->Final(s2);
//       if (det_fst_final.Value() !=
//           std::numeric_limits<BaseFloat>::infinity()) {
//         // Test for whether the final-prob of state s2 was zero.  If neither
//         // source-state final prob was zero, then we should create final state
//         // in fst_composed. We compute the product manually since this is more
//         // efficient.
//         Weight2 final_weight(LatticeWeight(clat_final.Weight().Value1() +
//                                            det_fst_final.Value(),
//                                            clat_final.Weight().Value2()),
//                              clat_final.String());
//         // we can assume final_weight is not Zero(), since neither of
//         // the sources was zero.
//         KALDI_ASSERT(state_map.find(s) != state_map.end());
//         composed_clat->SetFinal(state_map[s], final_weight);
//       }
//     }
//
//     // Loops over pair of edges at s1 and s2.
//     for (fst::ArcIterator<CompactLattice> aiter(clat, s1);
//          !aiter.Done(); aiter.Next()) {
//       const CompactLatticeArc& arc1 = aiter.Value();
//       fst::StdArc arc2;
//       StateId next_state1 = arc1.nextstate, next_state2;
//       bool matched = false;
//
//       if (arc1.olabel == 0) {
//         // If the symbol on <arc1> is <epsilon>, we transit to the next state
//         // for <clat>, but keep <det_fst> at the current state.
//         matched = true;
//         next_state2 = s2;
//       } else {
//         // Otherwise try to find the matched arc in <det_fst>.
//         matched = det_fst->GetArc(s2, arc1.olabel, &arc2);
//         if (matched) {
//           next_state2 = arc2.nextstate;
//         }
//       }
//
//       // If matched arc is found in <det_fst>, then we have to add new arcs to
//       // <composed_clat>.
//       if (matched) {
//         StatePair next_state_pair(next_state1, next_state2);
//         IterType siter = state_map.find(next_state_pair);
//         StateId next_state;
//
//         // Adds composed state to <state_map>.
//         if (siter == state_map.end()) {
//           // If the composed state has not been created yet, create it.
//           next_state = composed_clat->AddState();
//           std::pair<const StatePair, StateId> next_state_map(next_state_pair,
//                                                              next_state);
//           std::pair<IterType, bool> result = state_map.insert(next_state_map);
//           KALDI_ASSERT(result.second);
//           state_queue.push(next_state_pair);
//         } else {
//           // If the composed state is already in <state_map>, we can directly
//           // use that.
//           next_state = siter->second;
//         }
//
//         // Adds arc to <composed_clat>.
//         if (arc1.olabel == 0) {
//           composed_clat->AddArc(state_map[s],
//                                 CompactLatticeArc(arc1.ilabel, 0,
//                                                   arc1.weight, next_state));
//         } else {
//           Weight2 composed_weight(
//               LatticeWeight(arc1.weight.Weight().Value1() +
//                             arc2.weight.Value(),
//                             arc1.weight.Weight().Value2()),
//               arc1.weight.String());
//           composed_clat->AddArc(state_map[s],
//                                 CompactLatticeArc(arc1.ilabel, arc2.olabel,
//                                                   composed_weight, next_state));
//         }
//       }
//     }
//   }
//   fst::Connect(composed_clat);
// }
//
//
// void ComputeAcousticScoresMap(
//     const Lattice &lat,
//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
//                                         PairHasher<int32> > *acoustic_scores) {
//   // typedef the arc, weight types
//   typedef Lattice::Arc Arc;
//   typedef Arc::Weight LatticeWeight;
//   typedef Arc::StateId StateId;
//
//   acoustic_scores->clear();
//
//   std::vector<int32> state_times;
//   LatticeStateTimes(lat, &state_times);   // Assumes the input is top sorted
//
//   KALDI_ASSERT(lat.Start() == 0);
//
//   for (StateId s = 0; s < lat.NumStates(); s++) {
//     int32 t = state_times[s];
//     for (fst::ArcIterator<Lattice> aiter(lat, s); !aiter.Done();
//           aiter.Next()) {
//       const Arc &arc = aiter.Value();
//       const LatticeWeight &weight = arc.weight;
//
//       int32 tid = arc.ilabel;
//
//       if (tid != 0) {
//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
//           PairHasher<int32> >::iterator it = acoustic_scores->find(std::make_pair(t, tid));
//         if (it == acoustic_scores->end()) {
//           acoustic_scores->insert(std::make_pair(std::make_pair(t, tid),
//                                           std::make_pair(weight.Value2(), 1)));
//         } else {
//           if (it->second.second == 2
//                 && it->second.first / it->second.second != weight.Value2()) {
//             KALDI_VLOG(2) << "Transitions on the same frame have different "
//                           << "acoustic costs for tid " << tid << "; "
//                           << it->second.first / it->second.second
//                           << " vs " << weight.Value2();
//           }
//           it->second.first += weight.Value2();
//           it->second.second++;
//         }
//       } else {
//         // Arcs with epsilon input label (tid) must have 0 acoustic cost
//         KALDI_ASSERT(weight.Value2() == 0);
//       }
//     }
//
//     LatticeWeight f = lat.Final(s);
//     if (f != LatticeWeight::Zero()) {
//       // Final acoustic cost must be 0 as we are reading from
//       // non-determinized, non-compact lattice
//       KALDI_ASSERT(f.Value2() == 0.0);
//     }
//   }
// }
//
// void ReplaceAcousticScoresFromMap(
//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
//                                         PairHasher<int32> > &acoustic_scores,
//     Lattice *lat) {
//   // typedef the arc, weight types
//   typedef Lattice::Arc Arc;
//   typedef Arc::Weight LatticeWeight;
//   typedef Arc::StateId StateId;
//
//   TopSortLatticeIfNeeded(lat);
//
//   std::vector<int32> state_times;
//   LatticeStateTimes(*lat, &state_times);
//
//   KALDI_ASSERT(lat->Start() == 0);
//
//   for (StateId s = 0; s < lat->NumStates(); s++) {
//     int32 t = state_times[s];
//     for (fst::MutableArcIterator<Lattice> aiter(lat, s);
//          !aiter.Done(); aiter.Next()) {
//       Arc arc(aiter.Value());
//
//       int32 tid = arc.ilabel;
//       if (tid != 0) {
//         unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
//           PairHasher<int32> >::const_iterator it = acoustic_scores.find(std::make_pair(t, tid));
//         if (it == acoustic_scores.end()) {
//           KALDI_ERR << "Could not find tid " << tid << " at time " << t
//                     << " in the acoustic scores map.";
//         } else {
//           arc.weight.SetValue2(it->second.first / it->second.second);
//         }
//       } else {
//         // For epsilon arcs, set acoustic cost to 0.0
//         arc.weight.SetValue2(0.0);
//       }
//       aiter.SetValue(arc);
//     }
//
//     LatticeWeight f = lat->Final(s);
//     if (f != LatticeWeight::Zero()) {
//       // Set final acoustic cost to 0.0
//       f.SetValue2(0.0);
//       lat->SetFinal(s, f);
//     }
//   }
// }

}  // namespace kaldi


================================================
FILE: runtime/engine/kaldi/lat/lattice-functions.h
================================================
// lat/lattice-functions.h

// Copyright 2009-2012   Saarland University (author: Arnab Ghoshal)
//           2012-2013   Johns Hopkins University (Author: Daniel Povey);
//                       Bagher BabaAli
//                2014   Guoguo Chen

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_LAT_LATTICE_FUNCTIONS_H_
#define KALDI_LAT_LATTICE_FUNCTIONS_H_

#include <vector>
#include <map>

#include "base/kaldi-common.h"
// #include "hmm/posterior.h"
#include "fstext/fstext-lib.h"
// #include "hmm/transition-model.h"
#include "lat/kaldi-lattice.h"
// #include "itf/decodable-itf.h"

namespace kaldi {

// /**
//    This function extracts the per-frame log likelihoods from a linear
//    lattice (which we refer to as an 'nbest' lattice elsewhere in Kaldi code).
//    The dimension of *per_frame_loglikes will be set to the
//    number of input symbols in 'nbest'.  The elements of
//    '*per_frame_loglikes' will be set to the .Value2() elements of the lattice
//    weights, which represent the acoustic costs; you may want to scale this
//    vector afterward by -1/acoustic_scale to get the original loglikes.
//    If there are acoustic costs on input-epsilon arcs or the final-prob in 'nbest'
//    (and this should not normally be the case in situations where it makes
//    sense to call this function), they will be included to the cost of the
//    preceding input symbol, or the following input symbol for input-epsilons
//    encountered prior to any input symbol.  If 'nbest' has no input symbols,
//    'per_frame_loglikes' will be set to the empty vector.
// **/
// void GetPerFrameAcousticCosts(const Lattice &nbest,
//                               Vector<BaseFloat> *per_frame_loglikes);
//
// /// This function iterates over the states of a topologically sorted lattice and
// /// counts the time instance corresponding to each state. The times are returned
// /// in a vector of integers 'times' which is resized to have a size equal to the
// /// number of states in the lattice. The function also returns the maximum time
// /// in the lattice (this will equal the number of frames in the file).
// int32 LatticeStateTimes(const Lattice &lat, std::vector<int32> *times);
//
// /// As LatticeStateTimes, but in the CompactLattice format.  Note: must
// /// be topologically sorted.  Returns length of the utterance in frames, which
// /// might not be the same as the maximum time in the lattice, due to frames
// /// in the final-prob.
// int32 CompactLatticeStateTimes(const CompactLattice &clat,
//                                std::vector<int32> *times);
//
// /// This function does the forward-backward over lattices and computes the
// /// posterior probabilities of the arcs. It returns the total log-probability
// /// of the lattice.  The Posterior quantities contain pairs of (transition-id, weight)
// /// on each frame.
// /// If the pointer "acoustic_like_sum" is provided, this value is set to
// /// the sum over the arcs, of the posterior of the arc times the
// /// acoustic likelihood [i.e. negated acoustic score] on that link.
// /// This is used in combination with other quantities to work out
// /// the objective function in MMI discriminative training.
// BaseFloat LatticeForwardBackward(const Lattice &lat,
//                                  Posterior *arc_post,
//                                  double *acoustic_like_sum = NULL);
//
// // This function is something similar to LatticeForwardBackward(), but it is on
// // the CompactLattice lattice format. Also we only need the alpha in the forward
// // path, not the posteriors.
// bool ComputeCompactLatticeAlphas(const CompactLattice &lat,
//                                  std::vector<double> *alpha);
//
// // A sibling of the function CompactLatticeAlphas()... We compute the beta from
// // the backward path here.
// bool ComputeCompactLatticeBetas(const CompactLattice &lat,
//                                 std::vector<double> *beta);
//
//
// // Computes (normal or Viterbi) alphas and betas; returns (total-prob, or
// // best-path negated cost) Note: in either case, the alphas and betas are
// // negated costs.  Requires that lat be topologically sorted.  This code
// // will work for either CompactLattice or Latice.
// template<typename LatticeType>
// double ComputeLatticeAlphasAndBetas(const LatticeType &lat,
//                                     bool viterbi,
//                                     std::vector<double> *alpha,
//                                     std::vector<double> *beta);
//
//
// /// Topologically sort the compact lattice if not already topologically sorted.
// /// Will crash if the lattice cannot be topologically sorted.
// void TopSortCompactLatticeIfNeeded(CompactLattice *clat);
//
//
// /// Topologically sort the lattice if not already topologically sorted.
// /// Will crash if lattice cannot be topologically sorted.
// void TopSortLatticeIfNeeded(Lattice *clat);
//
// /// Returns the depth of the lattice, defined as the average number of arcs (or
// /// final-prob strings) crossing any given frame.  Returns 1 for empty lattices.
// /// Requires that clat is topologically sorted!
// BaseFloat CompactLatticeDepth(const CompactLattice &clat,
//                               int32 *num_frames = NULL);
//
// /// This function returns, for each frame, the number of arcs crossing that
// /// frame.
// void CompactLatticeDepthPerFrame(const CompactLattice &clat,
//                                  std::vector<int32> *depth_per_frame);
//
//
// /// This function limits the depth of the lattice, per frame: that means, it
// /// does not allow more than a specified number of arcs active on any given
// /// frame.  This can be used to reduce the size of the "very deep" portions of
// /// the lattice.
// void CompactLatticeLimitDepth(int32 max_arcs_per_frame,
//                               CompactLattice *clat);
//
//
// /// Given a lattice, and a transition model to map pdf-ids to phones,
// /// outputs for each frame the set of phones active on that frame.  If
// /// sil_phones (which must be sorted and uniq) is nonempty, it excludes
// /// phones in this list.
// void LatticeActivePhones(const Lattice &lat, const TransitionModel &trans,
//                          const std::vector<int32> &sil_phones,
//                          std::vector<std::set<int32> > *active_phones);
//
// /// Given a lattice, and a transition model to map pdf-ids to phones,
// /// replace the output symbols (presumably words), with phones; we
// /// use the TransitionModel to work out the phone sequence.  Note
// /// that the phone labels are not exactly aligned with the phone
// /// boundaries.  We put a phone label to coincide with any transition
// /// to the final, nonemitting state of a phone (this state always exists,
// /// we ensure this in HmmTopology::Check()).  This would be the last
// /// transition-id in the phone if reordering is not done (but typically
// /// we do reorder).
// /// Also see PhoneAlignLattice, in phone-align-lattice.h.
// void ConvertLatticeToPhones(const TransitionModel &trans_model,
//                             Lattice *lat);

/// Prunes a lattice or compact lattice.  Returns true on success, false if
/// there was some kind of failure.
template<class LatticeType>
bool PruneLattice(BaseFloat beam, LatticeType *lat);

//
// /// Given a lattice, and a transition model to map pdf-ids to phones,
// /// replace the sequences of transition-ids with sequences of phones.
// /// Note that this is different from ConvertLatticeToPhones, in that
// /// we replace the transition-ids not the words.
// void ConvertCompactLatticeToPhones(const TransitionModel &trans_model,
//                                    CompactLattice *clat);
//
// /// Boosts LM probabilities by b * [number of frame errors]; equivalently, adds
// /// -b*[number of frame errors] to the graph-component of the cost of each arc/path.
// /// There is a frame error if a particular transition-id on a particular frame
// /// corresponds to a phone not matching transcription's alignment for that frame.
// /// This is used in "margin-inspired" discriminative training, esp. Boosted MMI.
// /// The TransitionModel is used to map transition-ids in the lattice
// /// input-side to phones; the phones appearing in
// /// "silence_phones" are treated specially in that we replace the frame error f
// /// (either zero or 1) for a frame, with the minimum of f or max_silence_error.
// /// For the normal recipe, max_silence_error would be zero.
// /// Returns true on success, false if there was some kind of mismatch.
// /// At input, silence_phones must be sorted and unique.
// bool LatticeBoost(const TransitionModel &trans,
//                   const std::vector<int32> &alignment,
//                   const std::vector<int32> &silence_phones,
//                   BaseFloat b,
//                   BaseFloat max_silence_error,
//                   Lattice *lat);
//
//
// /**
//    This function implements either the MPFE (minimum phone frame error) or SMBR
//    (state-level minimum bayes risk) forward-backward, depending on whether
//    "criterion" is "mpfe" or "smbr".  It returns the MPFE
//    criterion of SMBR criterion for this utterance, and outputs the posteriors (which
//    may be positive or negative) into "post".
//
//    @param [in] trans    The transition model. Used to map the
//                         transition-ids to phones or pdfs.
//    @param [in] silence_phones   A list of integer ids of silence phones. The
//                         silence frames i.e. the frames where num_ali
//                         corresponds to a silence phones are treated specially.
//                         The behavior is determined by 'one_silence_class'
//                         being false (traditional behavior) or true.
//                         Usually in our setup, several phones including
//                         the silence, vocalized noise, non-spoken noise
//                         and unk are treated as "silence phones"
//    @param [in] lat      The denominator lattice
//    @param [in] num_ali  The numerator alignment
//    @param [in] criterion    The objective function. Must be "mpfe" or "smbr"
//                         for MPFE (minimum phone frame error) or sMBR
//                         (state minimum bayes risk) training.
//    @param [in] one_silence_class   Determines how the silence frames are treated.
//                         Setting this to false gives the old traditional behavior,
//                         where the silence frames (according to num_ali) are
//                         treated as incorrect. However, this means that the
//                         insertions are not penalized by the objective.
//                         Setting this to true gives the new behaviour, where we
//                         treat silence as any other phone, except that all pdfs
//                         of silence phones are collapsed into a single class for
//                         the frame-error computation. This can possible reduce
//                         the insertions in the trained model. This is closer to
//                         the WER metric that we actually care about, since WER is
//                         generally computed after filtering out noises, but
//                         does penalize insertions.
//     @param [out] post   The "MBR posteriors" i.e. derivatives w.r.t to the
//                         pseudo log-likelihoods of states at each frame.
// */
// BaseFloat LatticeForwardBackwardMpeVariants(
//     const TransitionModel &trans,
//     const std::vector<int32> &silence_phones,
//     const Lattice &lat,
//     const std::vector<int32> &num_ali,
//     std::string criterion,
//     bool one_silence_class,
//     Posterior *post);
//
// /**
//    This function can be used to compute posteriors for MMI, with a positive contribution
//    for the numerator and a negative one for the denominator.  This function is not actually
//    used in our normal MMI training recipes, where it's instead done using various command
//    line programs that each do a part of the job.  This function was written for use in
//    neural-net MMI training.
//
//    @param [in] trans    The transition model. Used to map the
//                         transition-ids to phones or pdfs.
//    @param [in] lat      The denominator lattice
//    @param [in] num_ali  The numerator alignment
//    @param [in] drop_frames   If "drop_frames" is true, it will not compute any
//                         posteriors on frames where the num and den have disjoint
//                         pdf-ids.
//    @param [in] convert_to_pdf_ids   If "convert_to_pdfs_ids" is true, it will
//                         convert the output to be at the level of pdf-ids, not
//                         transition-ids.
//    @param [in] cancel   If "cancel" is true, it will cancel out any positive and
//                         negative parts from the same transition-id (or pdf-id,
//                         if convert_to_pdf_ids == true).
//    @param [out] arc_post   The output MMI posteriors of transition-ids (or
//                         pdf-ids if convert_to_pdf_ids == true) at each frame
//                         i.e. the difference between the numerator
//                         and denominator posteriors.
//
//    It returns the forward-backward likelihood of the lattice. */
// BaseFloat LatticeForwardBackwardMmi(
//     const TransitionModel &trans,
//     const Lattice &lat,
//     const std::vector<int32> &num_ali,
//     bool drop_frames,
//     bool convert_to_pdf_ids,
//     bool cancel,
//     Posterior *arc_post);
//
//
// /// This function takes a CompactLattice that should only contain a single
// /// linear sequence (e.g. derived from lattice-1best), and that should have been
// /// processed so that the arcs in the CompactLattice align correctly with the
// /// word boundaries (e.g. by lattice-align-words).  It outputs 3 vectors of the
// /// same size, which give, for each word in the lattice (in sequence), the word
// /// label and the begin time and length in frames.  This is done even for zero
// /// (epsilon) words, generally corresponding to optional silence-- if you don't
// /// want them, just ignore them in the output.
// /// This function will print a warning and return false, if the lattice
// /// did not have the correct format (e.g. if it is empty or it is not
// /// linear).
// bool CompactLatticeToWordAlignment(const CompactLattice &clat,
//                                    std::vector<int32> *words,
//                                    std::vector<int32> *begin_times,
//                                    std::vector<int32> *lengths);
//
// /// This function takes a CompactLattice that should only contain a single
// /// linear sequence (e.g. derived from lattice-1best), and that should have been
// /// processed so that the arcs in the CompactLattice align correctly with the
// /// word boundaries (e.g. by lattice-align-words).  It outputs 4 vectors of the
// /// same size, which give, for each word in the lattice (in sequence), the word
// /// label, the begin time and length in frames, and the pronunciation (sequence
// /// of phones).  This is done even for zero words, corresponding to optional
// /// silences -- if you don't want them, just ignore them in the output.
// /// This function will print a warning and return false, if the lattice
// /// did not have the correct format (e.g. if it is empty or it is not
// /// linear).
// bool CompactLatticeToWordProns(
//     const TransitionModel &tmodel,
//     const CompactLattice &clat,
//     std::vector<int32> *words,
//     std::vector<int32> *begin_times,
//     std::vector<int32> *lengths,
//     std::vector<std::vector<int32> > *prons,
//     std::vector<std::vector<int32> > *phone_lengths);
//
//
// /// A form of the shortest-path/best-path algorithm that's specially coded for
// /// CompactLattice.  Requires that clat be acyclic.
// void CompactLatticeShortestPath(const CompactLattice &clat,
//                                 CompactLattice *shortest_path);
//
// /// This function expands a CompactLattice to ensure high-probability paths
// /// have unique histories. Arcs with posteriors larger than epsilon get splitted.
// void ExpandCompactLattice(const CompactLattice &clat,
//                           double epsilon,
//                           CompactLattice *expand_clat);
//
// /// For each state, compute forward and backward best (viterbi) costs and its
// /// traceback states (for generating best paths later). The forward best cost
// /// for a state is the cost of the best path from the start state to the state.
// /// The traceback state of this state is its predecessor state in the best path.
// /// The backward best cost for a state is the cost of the best path from the
// /// state to a final one. Its traceback state is the successor state in the best
// /// path in the forward direction.
// /// Note: final weights of states are in backward_best_cost_and_pred.
// /// Requires the input CompactLattice clat be acyclic.
// typedef std::vector<std::pair<double,
//         CompactLatticeArc::StateId> > CostTraceType;
// void CompactLatticeBestCostsAndTracebacks(
//     const CompactLattice &clat,
//     CostTraceType *forward_best_cost_and_pred,
//     CostTraceType *backward_best_cost_and_pred);
//
// /// This function adds estimated neural language model scores of words in a
// /// minimal list of hypotheses that covers a lattice, to the graph scores on the
// /// arcs. The list of hypotheses are generated by latbin/lattice-path-cover.
// typedef unordered_map<std::pair<int32, int32>, double, PairHasher<int32> > MapT;
// void AddNnlmScoreToCompactLattice(const MapT &nnlm_scores,
//                                   CompactLattice *clat);
//
// /// This function add the word insertion penalty to graph score of each word
// /// in the compact lattice
// void AddWordInsPenToCompactLattice(BaseFloat word_ins_penalty,
//                                    CompactLattice *clat);
//
// /// This function *adds* the negated scores obtained from the Decodable object,
// /// to the acoustic scores on the arcs.  If you want to replace them, you should
// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
// /// true on success, false on error (typically some kind of mismatched inputs).
// bool RescoreCompactLattice(DecodableInterface *decodable,
//                            CompactLattice *clat);
//
//
// /// This function returns the number of words in the longest sentence in a
// /// CompactLattice (i.e. the maximum of any path, of the count of
// /// olabels on that path).
// int32 LongestSentenceLength(const Lattice &lat);
//
// /// This function returns the number of words in the longest sentence in a
// /// CompactLattice, i.e. the maximum of any path, of the count of
// /// labels on that path... note, in CompactLattice, the ilabels and olabels
// /// are identical because it is an acceptor.
// int32 LongestSentenceLength(const CompactLattice &lat);
//
//
// /// This function is like RescoreCompactLattice, but it is modified to avoid
// /// computing probabilities on most frames where all the pdf-ids are the same.
// /// (it needs the transition-model to work out whether two transition-ids map to
// /// the same pdf-id, and it assumes that the lattice has transition-ids on it).
// /// The naive thing would be to just set all probabilities to zero on frames
// /// where all the pdf-ids are the same (because this value won't affect the
// /// lattice posterior).  But this would become confusing when we compute
// /// corpus-level diagnostics such as the MMI objective function.  Instead,
// /// imagine speedup_factor = 100 (it must be >= 1.0)... with probability (1.0 /
// /// speedup_factor) we compute those likelihoods and multiply them by
// /// speedup_factor; otherwise we set them to zero.  This gives the right
// /// expected probability so our corpus-level diagnostics will be about right.
// bool RescoreCompactLatticeSpeedup(
//     const TransitionModel &tmodel,
//     BaseFloat speedup_factor,
//     DecodableInterface *decodable,
//     CompactLattice *clat);
//
//
// /// This function *adds* the negated scores obtained from the Decodable object,
// /// to the acoustic scores on the arcs.  If you want to replace them, you should
// /// use ScaleCompactLattice to first set the acoustic scores to zero.  Returns
// /// true on success, false on error (e.g. some kind of mismatched inputs).
// /// The input labels, if nonzero, are interpreted as transition-ids or whatever
// /// other index the Decodable object expects.
// bool RescoreLattice(DecodableInterface *decodable,
//                     Lattice *lat);
//
// /// This function Composes a CompactLattice format lattice with a
// /// DeterministicOnDemandFst<fst::StdFst> format fst, and outputs another
// /// CompactLattice format lattice. The first element (the one that corresponds
// /// to LM weight) in CompactLatticeWeight is used for composition.
// ///
// /// Note that the DeterministicOnDemandFst interface is not "const", therefore
// /// we cannot use "const" for <det_fst>.
// void ComposeCompactLatticeDeterministic(
//     const CompactLattice& clat,
//     fst::DeterministicOnDemandFst<fst::StdArc>* det_fst,
//     CompactLattice* composed_clat);
//
// /// This function computes the mapping from the pair
// /// (frame-index, transition-id) to the pair
// /// (sum-of-acoustic-scores, num-of-occurrences) over all occurrences of the
// /// transition-id in that frame.
// /// frame-index in the lattice.
// /// This function is useful for retaining the acoustic scores in a
// /// non-compact lattice after a process like determinization where the
// /// frame-level acoustic scores are typically lost.
// /// The function ReplaceAcousticScoresFromMap is used to restore the
// /// acoustic scores computed by this function.
// ///
// ///   @param [in] lat   Input lattice. Expected to be top-sorted. Otherwise the
// ///                     function will crash.
// ///   @param [out] acoustic_scores
// ///                     Pointer to a map from the pair (frame-index,
// ///                     transition-id) to a pair (sum-of-acoustic-scores,
// ///                     num-of-occurrences).
// ///                     Usually the acoustic scores for a pdf-id (and hence
// ///                     transition-id) on a frame will be the same for all the
// ///                     occurrences of the pdf-id in that frame.
// ///                     But if not, we will take the average of the acoustic
// ///                     scores. Hence, we store both the sum-of-acoustic-scores
// ///                     and the num-of-occurrences of the transition-id in that
// ///                     frame.
// void ComputeAcousticScoresMap(
//     const Lattice &lat,
//     unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
//                                         PairHasher<int32> > *acoustic_scores);
//
// /// This function restores acoustic scores computed using the function
// /// ComputeAcousticScoresMap into the lattice.
// ///
// ///   @param [in] acoustic_scores
// ///                      A map from the pair (frame-index, transition-id) to a
// ///                      pair (sum-of-acoustic-scores, num-of-occurrences) of
// ///                      the occurrences of the transition-id in that frame.
// ///                      See the comments for ComputeAcousticScoresMap for
// ///                      details.
// ///   @param [out] lat   Pointer to the output lattice.
// void ReplaceAcousticScoresFromMap(
//     const unordered_map<std::pair<int32, int32>, std::pair<BaseFloat, int32>,
//                                         PairHasher<int32> > &acoustic_scores,
//     Lattice *lat);

}  // namespace kaldi

#endif  // KALDI_LAT_LATTICE_FUNCTIONS_H_


================================================
FILE: runtime/engine/kaldi/lm/CMakeLists.txt
================================================

add_library(kaldi-lm
  arpa-file-parser.cc
  arpa-lm-compiler.cc
)
target_link_libraries(kaldi-lm PUBLIC kaldi-util)

================================================
FILE: runtime/engine/kaldi/lm/arpa-file-parser.cc
================================================
// lm/arpa-file-parser.cc

// Copyright 2014  Guoguo Chen
// Copyright 2016  Smart Action Company LLC (kkm)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include <fst/fstlib.h>

#include <sstream>

#include "base/kaldi-error.h"
#include "base/kaldi-math.h"
#include "lm/arpa-file-parser.h"
#include "util/text-utils.h"

namespace kaldi {

ArpaFileParser::ArpaFileParser(const ArpaParseOptions& options,
                               fst::SymbolTable* symbols)
    : options_(options), symbols_(symbols),
      line_number_(0), warning_count_(0) {
}

ArpaFileParser::~ArpaFileParser() {
}

void TrimTrailingWhitespace(std::string *str) {
  str->erase(str->find_last_not_of(" \n\r\t") + 1);
}

void ArpaFileParser::Read(std::istream &is) {
  // Argument sanity checks.
  if (options_.bos_symbol <= 0 || options_.eos_symbol <= 0 ||
      options_.bos_symbol == options_.eos_symbol)
    KALDI_ERR << "BOS and EOS symbols are required, must not be epsilons, and "
              << "differ from each other. Given:"
              << " BOS=" << options_.bos_symbol
              << " EOS=" << options_.eos_symbol;
  if (symbols_ != NULL &&
      options_.oov_handling == ArpaParseOptions::kReplaceWithUnk &&
      (options_.unk_symbol <= 0 ||
       options_.unk_symbol == options_.bos_symbol ||
       options_.unk_symbol == options_.eos_symbol))
    KALDI_ERR << "When symbol table is given and OOV mode is kReplaceWithUnk, "
              << "UNK symbol is required, must not be epsilon, and "
              << "differ from both BOS and EOS symbols. Given:"
              << " UNK=" << options_.unk_symbol
              << " BOS=" << options_.bos_symbol
              << " EOS=" << options_.eos_symbol;
  if (symbols_ != NULL && symbols_->Find(options_.bos_symbol).empty())
    KALDI_ERR << "BOS symbol must exist in symbol table";
  if (symbols_ != NULL && symbols_->Find(options_.eos_symbol).empty())
    KALDI_ERR << "EOS symbol must exist in symbol table";
  if (symbols_ != NULL && options_.unk_symbol > 0 &&
      symbols_->Find(options_.unk_symbol).empty())
    KALDI_ERR << "UNK symbol must exist in symbol table";

  ngram_counts_.clear();
  line_number_ = 0;
  warning_count_ = 0;
  current_line_.clear();

#define PARSE_ERR KALDI_ERR << LineReference() << ": "

  // Give derived class an opportunity to prepare its state.
  ReadStarted();

  // Processes "\data\" section.
  bool keyword_found = false;
  while (++line_number_, getline(is, current_line_) && !is.eof()) {
    if (current_line_.find_first_not_of(" \t\n\r") == std::string::npos) {
      continue;
    }

    TrimTrailingWhitespace(&current_line_);

    // Continue skipping lines until the \data\ marker alone on a line is found.
    if (!keyword_found) {
      if (current_line_ == "\\data\\") {
        KALDI_LOG << "Reading \\data\\ section.";
        keyword_found = true;
      }
      continue;
    }

    if (current_line_[0] == '\\') break;

    // Enters "\data\" section, and looks for patterns like "ngram 1=1000",
    // which means there are 1000 unigrams.
    std::size_t equal_symbol_pos = current_line_.find("=");
    if (equal_symbol_pos != std::string::npos)
      // Guaranteed spaces around the "=".
      current_line_.replace(equal_symbol_pos, 1, " = ");
    std::vector<std::string> col;
    SplitStringToVector(current_line_, " \t", true, &col);
    if (col.size() == 4 && col[0] == "ngram" && col[2] == "=") {
      int32 order, ngram_count = 0;
      if (!ConvertStringToInteger(col[1], &order) ||
          !ConvertStringToInteger(col[3], &ngram_count)) {
        PARSE_ERR << "cannot parse ngram count";
      }
      if (ngram_counts_.size() <= order) {
        ngram_counts_.resize(order);
      }
      ngram_counts_[order - 1] = ngram_count;
    } else {
      KALDI_WARN << LineReference()
                 << ": uninterpretable line in \\data\\ section";
    }
  }

  if (ngram_counts_.size() == 0)
    PARSE_ERR << "\\data\\ section missing or empty.";

  // Signal that grammar order and n-gram counts are known.
  HeaderAvailable();

  NGram ngram;
  ngram.words.reserve(ngram_counts_.size());

  // Processes "\N-grams:" section.
  for (int32 cur_order = 1; cur_order <= ngram_counts_.size(); ++cur_order) {
    // Skips n-grams with zero count.
    if (ngram_counts_[cur_order - 1] == 0)
      KALDI_WARN << "Zero ngram count in ngram order " << cur_order
                 << "(look for 'ngram " << cur_order << "=0' in the \\data\\ "
                 << " section). There is possibly a problem with the file.";

    // Must be looking at a \k-grams: directive at this point.
    std::ostringstream keyword;
    keyword << "\\" << cur_order << "-grams:";
    if (current_line_ != keyword.str()) {
      PARSE_ERR << "invalid directive, expecting '" << keyword.str() << "'";
    }
    KALDI_LOG << "Reading " << current_line_ << " section.";

    int32 ngram_count = 0;
    while (++line_number_, getline(is, current_line_) && !is.eof()) {
      if (current_line_.find_first_not_of(" \n\t\r") == std::string::npos) {
        continue;
      }
      if (current_line_[0] == '\\') {
        TrimTrailingWhitespace(&current_line_);
        std::ostringstream next_keyword;
        next_keyword << "\\" << cur_order + 1 << "-grams:";
        if ((current_line_ != next_keyword.str()) &&
            (current_line_ != "\\end\\")) {
          if (ShouldWarn()) {
            KALDI_WARN << "ignoring possible directive '" << current_line_
                       << "' expecting '" << next_keyword.str() << "'";

            if (warning_count_ > 0 &&
                warning_count_ > static_cast<uint32>(options_.max_warnings)) {
              KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
                         << options_.max_warnings << " were reported. "
                         << "Run program with --max-arpa-warnings=-1 "
                         << "to see all warnings";
            }
          }
        } else {
          break;
        }
      }

      std::vector<std::string> col;
      SplitStringToVector(current_line_, " \t", true, &col);

      if (col.size() < 1 + cur_order ||
          col.size() > 2 + cur_order ||
          (cur_order == ngram_counts_.size() && col.size() != 1 + cur_order)) {
        PARSE_ERR << "Invalid n-gram data line";
      }
      ++ngram_count;

      // Parse out n-gram logprob and, if present, backoff weight.
      if (!ConvertStringToReal(col[0], &ngram.logprob)) {
        PARSE_ERR << "invalid n-gram logprob '" << col[0] << "'";
      }
      ngram.backoff = 0.0;
      if (col.size() > cur_order + 1) {
        if (!ConvertStringToReal(col[cur_order + 1], &ngram.backoff))
          PARSE_ERR << "invalid backoff weight '" << col[cur_order + 1] << "'";
      }
      // Convert to natural log.
      ngram.logprob *= M_LN10;
      ngram.backoff *= M_LN10;

      ngram.words.resize(cur_order);
      bool skip_ngram = false;
      for (int32 index = 0; !skip_ngram && index < cur_order; ++index) {
        int32 word;
        if (symbols_) {
          // Symbol table provided, so symbol labels are expected.
          if (options_.oov_handling == ArpaParseOptions::kAddToSymbols) {
            word = symbols_->AddSymbol(col[1 + index]);
          } else {
            word = symbols_->Find(col[1 + index]);
            if (word == -1) { // fst::kNoSymbol
              switch (options_.oov_handling) {
                case ArpaParseOptions::kReplaceWithUnk:
                  word = options_.unk_symbol;
                  break;
                case ArpaParseOptions::kSkipNGram:
                  if (ShouldWarn())
                    KALDI_WARN << LineReference() << " skipped: word '"
                               << col[1 + index] << "' not in symbol table";
                  skip_ngram = true;
                  break;
                default:
                  PARSE_ERR << "word '"  << col[1 + index]
                            << "' not in symbol table";
              }
            }
          }
        } else {
          // Symbols not provided, LM file should contain integers.
          if (!ConvertStringToInteger(col[1 + index], &word) || word < 0) {
            PARSE_ERR << "invalid symbol '" << col[1 + index] << "'";
          }
        }
        // Whichever way we got it, an epsilon is invalid.
        if (word == 0) {
          PARSE_ERR << "epsilon symbol '" << col[1 + index]
                    << "' is illegal in ARPA LM";
        }
        ngram.words[index] = word;
      }
      if (!skip_ngram) {
        ConsumeNGram(ngram);
      }
    }
    if (ngram_count > ngram_counts_[cur_order - 1]) {
      PARSE_ERR << "header said there would be " << ngram_counts_[cur_order - 1]
                << " n-grams of order " << cur_order
                << ", but we saw more already.";
    }
  }

  if (current_line_ != "\\end\\") {
    PARSE_ERR << "invalid or unexpected directive line, expecting \\end\\";
  }

  if (warning_count_ > 0 &&
      warning_count_ > static_cast<uint32>(options_.max_warnings)) {
    KALDI_WARN << "Of " << warning_count_ << " parse warnings, "
               << options_.max_warnings << " were reported. Run program with "
               << "--max-arpa-warnings=-1 to see all warnings";
  }

  current_line_.clear();
  ReadComplete();

#undef PARSE_ERR
}

std::string ArpaFileParser::LineReference() const {
  std::ostringstream ss;
  ss << "line " << line_number_ << " [" << current_line_ << "]";
  return ss.str();
}

bool ArpaFileParser::ShouldWarn() {
  return (warning_count_ != -1) &&
    (++warning_count_ <= static_cast<uint32>(options_.max_warnings));
}

}  // namespace kaldi


================================================
FILE: runtime/engine/kaldi/lm/arpa-file-parser.h
================================================
// lm/arpa-file-parser.h

// Copyright 2014  Guoguo Chen
// Copyright 2016  Smart Action Company LLC (kkm)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_LM_ARPA_FILE_PARSER_H_
#define KALDI_LM_ARPA_FILE_PARSER_H_

#include <fst/fst-decl.h>

#include <string>
#include <vector>

#include "base/kaldi-types.h"
#include "util/options-itf.h"

namespace kaldi {

/**
  Options that control ArpaFileParser
*/
struct ArpaParseOptions {
  enum OovHandling {
    kRaiseError,     ///< Abort on OOV words
    kAddToSymbols,   ///< Add novel words to the symbol table.
    kReplaceWithUnk,  ///< Replace OOV words with <unk>.
    kSkipNGram       ///< Skip n-gram with OOV word and continue.
  };

  ArpaParseOptions():
      bos_symbol(-1), eos_symbol(-1), unk_symbol(-1),
      oov_handling(kRaiseError), max_warnings(30) { }

  void Register(OptionsItf *opts) {
    // Registering only the max_warnings count, since other options are
    // treated differently by client programs: some want integer symbols,
    // while other are passed words in their command line.
    opts->Register("max-arpa-warnings", &max_warnings,
                   "Maximum warnings to report on ARPA parsing, "
                   "0 to disable, -1 to show all");
  }

  int32 bos_symbol;  ///< Symbol for <s>, Required non-epsilon.
  int32 eos_symbol;  ///< Symbol for </s>, Required non-epsilon.
  int32 unk_symbol;  ///< Symbol for <unk>, Required for kReplaceWithUnk.
  OovHandling oov_handling;  ///< How to handle OOV words in the file.
  int32 max_warnings;  ///< Maximum warnings to report, <0 unlimited.
};

/**
   A parsed n-gram from ARPA LM file.
*/
struct NGram {
  NGram() : logprob(0.0), backoff(0.0) { }
  std::vector<int32> words;  ///< Symbols in left to right order.
  float logprob;             ///< Log-prob of the n-gram.
  float backoff;             ///< log-backoff weight of the n-gram.
                             ///< Defaults to zero if not specified.
};

/**
    ArpaFileParser is an abstract base class for ARPA LM file conversion.

    See ConstArpaLmBuilder and ArpaLmCompiler for usage examples.
*/
class ArpaFileParser {
 public:
  /// Constructs the parser with the given options and optional symbol table.
  /// If symbol table is provided, then the file should contain text n-grams,
  /// and the words are mapped to symbols through it. bos_symbol and
  /// eos_symbol in the options structure must be valid symbols in the table,
  /// and so must be unk_symbol if provided. The table is not owned by the
  /// parser, but may be augmented, if oov_handling is set to kAddToSymbols.
  /// If symbol table is a null pointer, the file should contain integer
  /// symbol values, and oov_handling has no effect. bos_symbol and eos_symbol
  /// must be valid symbols still.
  ArpaFileParser(const ArpaParseOptions& options, fst::SymbolTable* symbols);
  virtual ~ArpaFileParser();

  /// Read ARPA LM file from a stream.
  void Read(std::istream &is);

  /// Parser options.
  const ArpaParseOptions& Options() const { return options_; }

 protected:
  /// Override called before reading starts. This is the point to prepare
  /// any state in the derived class.
  virtual void ReadStarted() { }

  /// Override function called to signal that ARPA header with the expected
  /// number of n-grams has been read, and ngram_counts() is now valid.
  virtual void HeaderAvailable() { }

  /// Pure override that must be implemented to process current n-gram. The
  /// n-grams are sent in the file order, which guarantees that all
  /// (k-1)-grams are processed before the first k-gram is.
  virtual void ConsumeNGram(const NGram&) = 0;

  /// Override function called after the last n-gram has been consumed.
  virtual void ReadComplete() { }

  /// Read-only access to symbol table. Not owned, do not make public.
  const fst::SymbolTable* Symbols() const { return symbols_; }

  /// Inside ConsumeNGram(), provides the current line number.
  int32 LineNumber() const { return line_number_; }

  /// Inside ConsumeNGram(), returns a formatted reference to the line being
  /// compiled, to print out as part of diagnostics.
  std::string LineReference() const;

  /// Increments warning count, and returns true if a warning should be
  /// printed or false if the count has exceeded the set maximum.
  bool ShouldWarn();

  /// N-gram counts. Valid from the point when HeaderAvailable() is called.
  const std::vector<int32>& NgramCounts() const { return ngram_counts_; }

 private:
  ArpaParseOptions options_;
  fst::SymbolTable* symbols_;  // the pointer is not owned here.
  int32 line_number_;
  uint32 warning_count_;
  std::string current_line_;
  std::vector<int32> ngram_counts_;
};

}  // namespace kaldi

#endif  // KALDI_LM_ARPA_FILE_PARSER_H_


================================================
FILE: runtime/engine/kaldi/lm/arpa-lm-compiler.cc
================================================
// lm/arpa-lm-compiler.cc

// Copyright 2009-2011 Gilles Boulianne
// Copyright 2016 Smart Action LLC (kkm)
// Copyright 2017 Xiaohui Zhang

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include <algorithm>
#include <limits>
#include <sstream>
#include <utility>

#include "base/kaldi-math.h"
#include "lm/arpa-lm-compiler.h"
#include "util/stl-utils.h"
#include "util/text-utils.h"
#include "fstext/remove-eps-local.h"

namespace kaldi {

class ArpaLmCompilerImplInterface {
 public:
  virtual ~ArpaLmCompilerImplInterface() { }
  virtual void ConsumeNGram(const NGram& ngram, bool is_highest) = 0;
};

namespace {

typedef int32 StateId;
typedef int32 Symbol;

// GeneralHistKey can represent state history in an arbitrarily large n
// n-gram model with symbol ids fitting int32.
class GeneralHistKey {
 public:
  // Construct key from being and end iterators.
  template<class InputIt>
  GeneralHistKey(InputIt begin, InputIt end) : vector_(begin, end) { }
  // Construct empty history key.
  GeneralHistKey() : vector_() { }
  // Return tails of the key as a GeneralHistKey. The tails of an n-gram
  // w[1..n] is the sequence w[2..n] (and the heads is w[1..n-1], but the
  // key class does not need this operartion).
  GeneralHistKey Tails() const {
    return GeneralHistKey(vector_.begin() + 1, vector_.end());
  }
  // Keys are equal if represent same state.
  friend bool operator==(const GeneralHistKey& a, const GeneralHistKey& b) {
    return a.vector_ == b.vector_;
  }
  // Public typename HashType for hashing.
  struct HashType : public std::unary_function<GeneralHistKey, size_t> {
    size_t operator()(const GeneralHistKey& key) const {
      return VectorHasher<Symbol>().operator()(key.vector_);
    }
  };

 private:
  std::vector<Symbol> vector_;
};

// OptimizedHistKey combines 3 21-bit symbol ID values into one 64-bit
// machine word. allowing significant memory reduction and some runtime
// benefit over GeneralHistKey. Since 3 symbols are enough to track history
// in a 4-gram model, this optimized key is used for smaller models with up
// to 4-gram and symbol values up to 2^21-1.
//
// See GeneralHistKey for interface requirements of a key class.
class OptimizedHistKey {
 public:
  enum {
    kShift = 21,  // 21 * 3 = 63 bits for data.
    kMaxData = (1 << kShift) - 1
  };
  template<class InputIt>
  OptimizedHistKey(InputIt begin, InputIt end) : data_(0) {
    for (uint32 shift = 0; begin != end; ++begin, shift += kShift) {
      data_ |= static_cast<uint64>(*begin) << shift;
    }
  }
  OptimizedHistKey() : data_(0) { }
  OptimizedHistKey Tails() const {
    return OptimizedHistKey(data_ >> kShift);
  }
  friend bool operator==(const OptimizedHistKey& a, const OptimizedHistKey& b) {
    return a.data_ == b.data_;
  }
  struct HashType : public std::unary_function<OptimizedHistKey, size_t> {
    size_t operator()(const OptimizedHistKey& key) const { return key.data_; }
  };

 private:
  explicit OptimizedHistKey(uint64 data) : data_(data) { }
  uint64 data_;
};

}  // namespace

template <class HistKey>
class ArpaLmCompilerImpl : public ArpaLmCompilerImplInterface {
 public:
  ArpaLmCompilerImpl(ArpaLmCompiler* parent, fst::StdVectorFst* fst,
                     Symbol sub_eps);

  virtual void ConsumeNGram(const NGram &ngram, bool is_highest);

 private:
  StateId AddStateWithBackoff(HistKey key, float backoff);
  void CreateBackoff(HistKey key, StateId state, float weight);

  ArpaLmCompiler *parent_;  // Not owned.
  fst::StdVectorFst* fst_;  // Not owned.
  Symbol bos_symbol_;
  Symbol eos_symbol_;
  Symbol sub_eps_;

  StateId eos_state_;
  typedef unordered_map<HistKey, StateId,
                        typename HistKey::HashType> HistoryMap;
  HistoryMap history_;
};

template <class HistKey>
ArpaLmCompilerImpl<HistKey>::ArpaLmCompilerImpl(
    ArpaLmCompiler* parent, fst::StdVectorFst* fst, Symbol sub_eps)
    : parent_(parent), fst_(fst), bos_symbol_(parent->Options().bos_symbol),
      eos_symbol_(parent->Options().eos_symbol), sub_eps_(sub_eps) {
  // The algorithm maintains state per history. The 0-gram is a special state
  // for empty history. All unigrams (including BOS) backoff into this state.
  StateId zerogram = fst_->AddState();
  history_[HistKey()] = zerogram;

  // Also, if </s> is not treated as epsilon, create a common end state for
  // all transitions accepting the </s>, since they do not back off. This small
  // optimization saves about 2% states in an average grammar.
  if (sub_eps_ == 0) {
    eos_state_ = fst_->AddState();
    fst_->SetFinal(eos_state_, 0);
  }
}

template <class HistKey>
void ArpaLmCompilerImpl<HistKey>::ConsumeNGram(const NGram &ngram,
                                               bool is_highest) {
  // Generally, we do the following. Suppose we are adding an n-gram "A B
  // C". Then find the node for "A B", add a new node for "A B C", and connect
  // them with the arc accepting "C" with the specified weight. Also, add a
  // backoff arc from the new "A B C" node to its backoff state "B C".
  //
  // Two notable exceptions are the highest order n-grams, and final n-grams.
  //
  // When adding a highest order n-gram (e. g., our "A B C" is in a 3-gram LM),
  // the following optimization is performed. There is no point adding a node
  // for "A B C" with a "C" arc from "A B", since there will be no other
  // arcs ingoing to this node, and an epsilon backoff arc into the backoff
  // model "B C", with the weight of \bar{1}. To save a node, create an arc
  // accepting "C" directly from "A B" to "B C". This saves as many nodes
  // as there are the highest order n-grams, which is typically about half
  // the size of a large 3-gram model.
  //
  // Indeed, this does not apply to n-grams ending in EOS, since they do not
  // back off. These are special, as they do not have a back-off state, and
  // the node for "(..anything..) </s>" is always final. These are handled
  // in one of the two possible ways, If symbols <s> and </s> are being
  // replaced by epsilons, neither node nor arc is created, and the logprob
  // of the n-gram is applied to its source node as final weight. If <s> and
  // </s> are preserved, then a special final node for </s> is allocated and
  // used as the destination of the "</s>" acceptor arc.
  HistKey heads(ngram.words.begin(), ngram.words.end() - 1);
  typename HistoryMap::iterator source_it = history_.find(heads);
  if (source_it == history_.end()) {
    // There was no "A B", therefore the probability of "A B C" is zero.
    // Print a warning and discard current n-gram.
    if (parent_->ShouldWarn())
      KALDI_WARN << parent_->LineReference()
                 << " skipped: no parent (n-1)-gram exists";
    return;
  }

  StateId source = source_it->second;
  StateId dest;
  Symbol sym = ngram.words.back();
  float weight = -ngram.logprob;
  if (sym == sub_eps_ || sym == 0) {
    KALDI_ERR << " <eps> or disambiguation symbol " << sym << "found in the ARPA file. ";
  }
  if (sym == eos_symbol_) {
    if (sub_eps_ == 0) {
      // Keep </s> as a real symbol when not substituting.
      dest = eos_state_;
    } else {
      // Treat </s> as if it was epsilon: mark source final, with the weight
      // of the n-gram.
      fst_->SetFinal(source, weight);
      return;
    }
  } else {
    // For the highest order n-gram, this may find an existing state, for
    // non-highest, will create one (unless there are duplicate n-grams
    // in the grammar, which cannot be reliably detected if highest order,
    // so we better do not do that at all).
    dest = AddStateWithBackoff(
        HistKey(ngram.words.begin() + (is_highest ? 1 : 0),
                ngram.words.end()),
        -ngram.backoff);
  }

  if (sym == bos_symbol_) {
    weight = 0;  // Accepting <s> is always free.
    if (sub_eps_ == 0) {
      // <s> is as a real symbol, only accepted in the start state.
      source = fst_->AddState();
      fst_->SetStart(source);
    } else {
      // The new state for <s> unigram history *is* the start state.
      fst_->SetStart(dest);
      return;
    }
  }

  // Add arc from source to dest, whichever way it was found.
  fst_->AddArc(source, fst::StdArc(sym, sym, weight, dest));
  return;
}

// Find or create a new state for n-gram defined by key, and ensure it has a
// backoff transition.  The key is either the current n-gram for all but
// highest orders, or the tails of the n-gram for the highest order. The
// latter arises from the chain-collapsing optimization described above.
template <class HistKey>
StateId ArpaLmCompilerImpl<HistKey>::AddStateWithBackoff(HistKey key,
                                                         float backoff) {
  typename HistoryMap::iterator dest_it = history_.find(key);
  if (dest_it != history_.end()) {
    // Found an existing state in the history map. Invariant: if the state in
    // the map, then its backoff arc is in the FST. We are done.
    return dest_it->second;
  }
  // Otherwise create a new state and its backoff arc, and register in the map.
  StateId dest = fst_->AddState();
  history_[key] = dest;
  CreateBackoff(key.Tails(), dest, backoff);
  return dest;
}

// Create a backoff arc for a state. Key is a backoff destination that may or
// may not exist. When the destination is not found, naturally fall back to
// the lower order model, and all the way down until one is found (since the
// 0-gram model is always present, the search is guaranteed to terminate).
template <class HistKey>
inline void ArpaLmCompilerImpl<HistKey>::CreateBackoff(
    HistKey key, StateId state, float weight) {
  typename HistoryMap::iterator dest_it = history_.find(key);
  while (dest_it == history_.end()) {
    key = key.Tails();
    dest_it = history_.find(key);
  }

  // The arc should transduce either <eos> or #0 to <eps>, depending on the
  // epsilon substitution mode. This is the only case when input and output
  // label may differ.
  fst_->AddArc(state, fst::StdArc(sub_eps_, 0, weight, dest_it->second));
}

ArpaLmCompiler::~ArpaLmCompiler() {
  if (impl_ != NULL)
    delete impl_;
}

void ArpaLmCompiler::HeaderAvailable() {
  KALDI_ASSERT(impl_ == NULL);
  // Use optimized implementation if the grammar is 4-gram or less, and the
  // maximum attained symbol id will fit into the optimized range.
  int64 max_symbol = 0;
  if (Symbols() != NULL)
    max_symbol = Symbols()->AvailableKey() - 1;
  // If augmenting the symbol table, assume the worst case when all words in
  // the model being read are novel.
  if (Options().oov_handling == ArpaParseOptions::kAddToSymbols)
    max_symbol += NgramCounts()[0];

  if (NgramCounts().size() <= 4 && max_symbol < OptimizedHistKey::kMaxData) {
    impl_ = new ArpaLmCompilerImpl<OptimizedHistKey>(this, &fst_, sub_eps_);
  } else {
    impl_ = new ArpaLmCompilerImpl<GeneralHistKey>(this, &fst_, sub_eps_);
    KALDI_LOG << "Reverting to slower state tracking because model is large: "
              << NgramCounts().size() << "-gram with symbols up to "
              << max_symbol;
  }
}

void ArpaLmCompiler::ConsumeNGram(const NGram &ngram) {
  // <s> is invalid in tails, </s> in heads of an n-gram.
  for (int i = 0; i < ngram.words.size(); ++i) {
    if ((i > 0 && ngram.words[i] == Options().bos_symbol) ||
        (i + 1 < ngram.words.size()
         && ngram.words[i] == Options().eos_symbol)) {
      if (ShouldWarn())
        KALDI_WARN << LineReference()
                   << " skipped: n-gram has invalid BOS/EOS placement";
      return;
    }
  }

  bool is_highest = ngram.words.size() == NgramCounts().size();
  impl_->ConsumeNGram(ngram, is_highest);
}

void ArpaLmCompiler::RemoveRedundantStates() {
  fst::StdArc::Label backoff_symbol = sub_eps_;
  if (backoff_symbol == 0) {
    // The method of removing redundant states implemented in this function
    // leads to slow determinization of L o G when people use the older style of
    // usage of arpa2fst where the --disambig-symbol option was not specified.
    // The issue seems to be that it creates a non-deterministic FST, while G is
    // supposed to be deterministic.  By 'return'ing below, we just disable this
    // method if people were using an older script.  This method isn't really
    // that consequential anyway, and people will move to the newer-style
    // scripts (see current utils/format_lm.sh), so this isn't much of a
    // problem.
    return;
  }

  fst::StdArc::StateId num_states = fst_.NumStates();


  // replace the #0 symbols on the input of arcs out of redundant states (states
  // that are not final and have only a backoff arc leaving them), with <eps>.
  for (fst::StdArc::StateId state = 0; state < num_states; state++) {
    if (fst_.NumArcs(state) == 1 && fst_.Final(state) == fst::TropicalWeight::Zero()) {
      fst::MutableArcIterator<fst::StdVectorFst> iter(&fst_, state);
      fst::StdArc arc = iter.Value();
      if (arc.ilabel == backoff_symbol) {
        arc.ilabel = 0;
        iter.SetValue(arc);
      }
    }
  }

  // we could call fst::RemoveEps, and it would have the same effect in normal
  // cases, where backoff_symbol != 0 and there are no epsilons in unexpected
  // places, but RemoveEpsLocal is a bit safer in case something weird is going
  // on; it guarantees not to blow up the FST.
  fst::RemoveEpsLocal(&fst_);
  KALDI_LOG << "Reduced num-states from " << num_states << " to "
            << fst_.NumStates();
}

void ArpaLmCompiler::Check() const {
  if (fst_.Start() == fst::kNoStateId) {
    KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
              << Symbols()->Find(Options().bos_symbol) << ".";
  }
}

void ArpaLmCompiler::ReadComplete() {
  fst_.SetInputSymbols(Symbols());
  fst_.SetOutputSymbols(Symbols());
  RemoveRedundantStates();
  Check();
}

}  // namespace kaldi


================================================
FILE: runtime/engine/kaldi/lm/arpa-lm-compiler.h
================================================
// lm/arpa-lm-compiler.h

// Copyright 2009-2011 Gilles Boulianne
// Copyright 2016 Smart Action LLC (kkm)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_LM_ARPA_LM_COMPILER_H_
#define KALDI_LM_ARPA_LM_COMPILER_H_

#include <fst/fstlib.h>

#include "lm/arpa-file-parser.h"

namespace kaldi {

class ArpaLmCompilerImplInterface;

class ArpaLmCompiler : public ArpaFileParser {
 public:
  ArpaLmCompiler(const ArpaParseOptions& options, int sub_eps,
                 fst::SymbolTable* symbols)
      : ArpaFileParser(options, symbols),
        sub_eps_(sub_eps), impl_(NULL) {
  }
  ~ArpaLmCompiler();

  const fst::StdVectorFst& Fst() const { return fst_; }
  fst::StdVectorFst* MutableFst() { return &fst_; }

 protected:
  // ArpaFileParser overrides.
  virtual void HeaderAvailable();
  virtual void ConsumeNGram(const NGram& ngram);
  virtual void ReadComplete();


 private:
  // this function removes states that only have a backoff arc coming
  // out of them.
  void RemoveRedundantStates();
  void Check() const;

  int sub_eps_;
  ArpaLmCompilerImplInterface* impl_;  // Owned.
  fst::StdVectorFst fst_;
  template <class HistKey> friend class ArpaLmCompilerImpl;
};

}  // namespace kaldi

#endif  // KALDI_LM_ARPA_LM_COMPILER_H_


================================================
FILE: runtime/engine/kaldi/lmbin/CMakeLists.txt
================================================

add_executable(arpa2fst ${CMAKE_CURRENT_SOURCE_DIR}/arpa2fst.cc)
target_include_directories(arpa2fst PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
target_link_libraries(arpa2fst PUBLIC kaldi-lm glog gflags fst)


================================================
FILE: runtime/engine/kaldi/lmbin/arpa2fst.cc
================================================
// bin/arpa2fst.cc
//
// Copyright 2009-2011  Gilles Boulianne.
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABILITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include <string>

#include "lm/arpa-lm-compiler.h"
#include "util/kaldi-io.h"
#include "util/parse-options.h"

int main(int argc, char *argv[]) {
    using namespace kaldi;  // NOLINT
    try {
        const char *usage =
            "Convert an ARPA format language model into an FST\n"
            "Usage: arpa2fst [opts] <input-arpa> <output-fst>\n"
            " e.g.: arpa2fst --disambig-symbol=#0 --read-symbol-table="
            "data/lang/words.txt lm/input.arpa G.fst\n\n"
            "Note: When called without switches, the output G.fst will "
            "contain\n"
            "an embedded symbol table. This is compatible with the way a "
            "previous\n"
            "version of arpa2fst worked.\n";

        ParseOptions po(usage);

        ArpaParseOptions options;
        options.Register(&po);

        // Option flags.
        std::string bos_symbol = "<s>";
        std::string eos_symbol = "</s>";
        std::string disambig_symbol;
        std::string read_syms_filename;
        std::string write_syms_filename;
        bool keep_symbols = false;
        bool ilabel_sort = true;

        po.Register("bos-symbol", &bos_symbol, "Beginning of sentence symbol");
        po.Register("eos-symbol", &eos_symbol, "End of sentence symbol");
        po.Register(
            "disambig-symbol",
            &disambig_symbol,
            "Disambiguator. If provided (e. g. #0), used on input side of "
            "backoff links, and <s> and </s> are replaced with epsilons");
        po.Register("read-symbol-table",
                    &read_syms_filename,
                    "Use existing symbol table");
        po.Register("write-symbol-table",
                    &write_syms_filename,
                    "Write generated symbol table to a file");
        po.Register(
            "keep-symbols",
            &keep_symbols,
            "Store symbol table with FST. Symbols always saved to FST if "
            "symbol tables are neither read or written (otherwise symbols "
            "would be lost entirely)");
        po.Register("ilabel-sort", &ilabel_sort, "Ilabel-sort the output FST");

        po.Read(argc, argv);

        if (po.NumArgs() != 1 && po.NumArgs() != 2) {
            po.PrintUsage();
            exit(1);
        }
        std::string arpa_rxfilename = po.GetArg(1),
                    fst_wxfilename = po.GetOptArg(2);

        int64 disambig_symbol_id = 0;

        fst::SymbolTable *symbols;
        if (!read_syms_filename.empty()) {
            // Use existing symbols. Required symbols must be in the table.
            kaldi::Input kisym(read_syms_filename);
            symbols = fst::SymbolTable::ReadText(
                kisym.Stream(), PrintableWxfilename(read_syms_filename));
            if (symbols == NULL)
                KALDI_ERR << "Could not read symbol table from file "
                          << read_syms_filename;

            options.oov_handling = ArpaParseOptions::kSkipNGram;
            if (!disambig_symbol.empty()) {
                disambig_symbol_id = symbols->Find(disambig_symbol);
                if (disambig_symbol_id == -1)  // fst::kNoSymbol
                    KALDI_ERR << "Symbol table " << read_syms_filename
                              << " has no symbol for " << disambig_symbol;
            }
        } else {
            // Create a new symbol table and populate it from ARPA file.
            symbols = new fst::SymbolTable(PrintableWxfilename(fst_wxfilename));
            options.oov_handling = ArpaParseOptions::kAddToSymbols;
            symbols->AddSymbol("<eps>", 0);
            if (!disambig_symbol.empty()) {
                disambig_symbol_id = symbols->AddSymbol(disambig_symbol);
            }
        }

        // Add or use existing BOS and EOS.
        options.bos_symbol = symbols->AddSymbol(bos_symbol);
        options.eos_symbol = symbols->AddSymbol(eos_symbol);

        // If producing new (not reading existing) symbols and not saving them,
        // need to keep symbols with FST, otherwise they would be lost.
        if (read_syms_filename.empty() && write_syms_filename.empty())
            keep_symbols = true;

        // Actually compile LM.
        KALDI_ASSERT(symbols != NULL);
        ArpaLmCompiler lm_compiler(options, disambig_symbol_id, symbols);
        {
            Input ki(arpa_rxfilename);
            lm_compiler.Read(ki.Stream());
        }

        // Sort the FST in-place if requested by options.
        if (ilabel_sort) {
            fst::ArcSort(lm_compiler.MutableFst(), fst::StdILabelCompare());
        }

        // Write symbols if requested.
        if (!write_syms_filename.empty()) {
            kaldi::Output kosym(write_syms_filename, false);
            symbols->WriteText(kosym.Stream());
        }

        // Write LM FST.
        bool write_binary = true, write_header = false;
        kaldi::Output kofst(fst_wxfilename, write_binary, write_header);
        fst::FstWriteOptions wopts(PrintableWxfilename(fst_wxfilename));
        wopts.write_isymbols = wopts.write_osymbols = keep_symbols;
        lm_compiler.Fst().Write(kofst.Stream(), wopts);

        delete symbols;
    } catch (const std::exception &e) {
        std::cerr << e.what();
        return -1;
    }
}


================================================
FILE: runtime/engine/kaldi/util/CMakeLists.txt
================================================
add_library(kaldi-util
  kaldi-holder.cc
  kaldi-io.cc
  kaldi-semaphore.cc
  kaldi-table.cc
  kaldi-thread.cc
  parse-options.cc
  simple-io-funcs.cc
  simple-options.cc
  text-utils.cc
)
target_link_libraries(kaldi-util PUBLIC kaldi-base kaldi-matrix)

================================================
FILE: runtime/engine/kaldi/util/basic-filebuf.h
================================================
///////////////////////////////////////////////////////////////////////////////
// This is a modified version of the std::basic_filebuf from libc++
// (http://libcxx.llvm.org/).
// It allows one to create basic_filebuf from an existing FILE* handle or file
// descriptor.
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source License licenses. See LICENSE.TXT for details (included at the
// bottom).
///////////////////////////////////////////////////////////////////////////////
#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
#define KALDI_UTIL_BASIC_FILEBUF_H_

///////////////////////////////////////////////////////////////////////////////
#include <fstream>
#include <cstdio>
#include <cstring>
#include <string>
#include <algorithm>

///////////////////////////////////////////////////////////////////////////////
namespace kaldi {
///////////////////////////////////////////////////////////////////////////////
template <typename CharT, typename Traits = std::char_traits<CharT> >
class basic_filebuf : public std::basic_streambuf<CharT, Traits> {
 public:
    typedef CharT                            char_type;
    typedef Traits                           traits_type;
    typedef typename traits_type::int_type   int_type;
    typedef typename traits_type::pos_type   pos_type;
    typedef typename traits_type::off_type   off_type;
    typedef typename traits_type::state_type state_type;

    basic_filebuf();
    basic_filebuf(basic_filebuf&& rhs);
    virtual ~basic_filebuf();

    basic_filebuf& operator=(basic_filebuf&& rhs);
    void swap(basic_filebuf& rhs);

    bool is_open() const;
    basic_filebuf* open(const char* s, std::ios_base::openmode mode);
    basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
    basic_filebuf* open(int fd, std::ios_base::openmode mode);
    basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
    basic_filebuf* close();

    FILE* file() { return this->_M_file; }
    int fd() { return fileno(this->_M_file); }

 protected:
    int_type underflow() override;
    int_type pbackfail(int_type c = traits_type::eof()) override;
    int_type overflow(int_type c = traits_type::eof()) override;
    std::basic_streambuf<char_type, traits_type>*
      setbuf(char_type* s, std::streamsize n) override;
    pos_type seekoff(off_type off, std::ios_base::seekdir way,
                     std::ios_base::openmode wch =
                     std::ios_base::in | std::ios_base::out) override;
    pos_type seekpos(pos_type sp,
                     std::ios_base::openmode wch =
                     std::ios_base::in | std::ios_base::out) override;
    int sync() override;
    void imbue(const std::locale& loc) override;

 protected:
    char*       _M_extbuf;
    const char* _M_extbufnext;
    const char* _M_extbufend;
    char _M_extbuf_min[8];
    size_t _M_ebs;
    char_type* _M_intbuf;
    size_t _M_ibs;
    FILE* _M_file;
    const std::codecvt<char_type, char, state_type>* _M_cv;
    state_type _M_st;
    state_type _M_st_last;
    std::ios_base::openmode _M_om;
    std::ios_base::openmode _M_cm;
    bool _M_owns_eb;
    bool _M_owns_ib;
    bool _M_always_noconv;

    const char* _M_get_mode(std::ios_base::openmode mode);
    bool _M_read_mode();
    void _M_write_mode();
};

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
basic_filebuf<CharT, Traits>::basic_filebuf()
    : _M_extbuf(nullptr),
      _M_extbufnext(nullptr),
      _M_extbufend(nullptr),
      _M_ebs(0),
      _M_intbuf(nullptr),
      _M_ibs(0),
      _M_file(nullptr),
      _M_cv(nullptr),
      _M_st(),
      _M_st_last(),
      _M_om(std::ios_base::openmode(0)),
      _M_cm(std::ios_base::openmode(0)),
      _M_owns_eb(false),
      _M_owns_ib(false),
      _M_always_noconv(false) {
    if (std::has_facet<std::codecvt<char_type, char, state_type> >
        (this->getloc())) {
        _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >
          (this->getloc());
        _M_always_noconv = _M_cv->always_noconv();
    }
    setbuf(0, 4096);
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
    : std::basic_streambuf<CharT, Traits>(rhs) {
    if (rhs._M_extbuf == rhs._M_extbuf_min) {
        _M_extbuf = _M_extbuf_min;
        _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
        _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
    } else {
        _M_extbuf = rhs._M_extbuf;
        _M_extbufnext = rhs._M_extbufnext;
        _M_extbufend = rhs._M_extbufend;
    }
    _M_ebs = rhs._M_ebs;
    _M_intbuf = rhs._M_intbuf;
    _M_ibs = rhs._M_ibs;
    _M_file = rhs._M_file;
    _M_cv = rhs._M_cv;
    _M_st = rhs._M_st;
    _M_st_last = rhs._M_st_last;
    _M_om = rhs._M_om;
    _M_cm = rhs._M_cm;
    _M_owns_eb = rhs._M_owns_eb;
    _M_owns_ib = rhs._M_owns_ib;
    _M_always_noconv = rhs._M_always_noconv;
    if (rhs.pbase()) {
        if (rhs.pbase() == rhs._M_intbuf)
            this->setp(_M_intbuf, _M_intbuf + (rhs. epptr() - rhs.pbase()));
        else
            this->setp(reinterpret_cast<char_type*>(_M_extbuf),
                       reinterpret_cast<char_type*>(_M_extbuf)
                       + (rhs. epptr() - rhs.pbase()));
        this->pbump(rhs. pptr() - rhs.pbase());
    } else if (rhs.eback()) {
        if (rhs.eback() == rhs._M_intbuf)
            this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
                                  _M_intbuf + (rhs.egptr() - rhs.eback()));
        else
            this->setg(reinterpret_cast<char_type*>(_M_extbuf),
                       reinterpret_cast<char_type*>(_M_extbuf) +
                       (rhs.gptr() - rhs.eback()),
                       reinterpret_cast<char_type*>(_M_extbuf) +
                       (rhs.egptr() - rhs.eback()));
    }
    rhs._M_extbuf = nullptr;
    rhs._M_extbufnext = nullptr;
    rhs._M_extbufend = nullptr;
    rhs._M_ebs = 0;
    rhs._M_intbuf = nullptr;
    rhs._M_ibs = 0;
    rhs._M_file = nullptr;
    rhs._M_st = state_type();
    rhs._M_st_last = state_type();
    rhs._M_om = std::ios_base::openmode(0);
    rhs._M_cm = std::ios_base::openmode(0);
    rhs._M_owns_eb = false;
    rhs._M_owns_ib = false;
    rhs.setg(0, 0, 0);
    rhs.setp(0, 0);
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
inline
basic_filebuf<CharT, Traits>&
basic_filebuf<CharT, Traits>::operator=(basic_filebuf&& rhs) {
    close();
    swap(rhs);
    return *this;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
basic_filebuf<CharT, Traits>::~basic_filebuf() {
    // try
    // {
    //     close();
    // }
    // catch (...)
    // {
    // }
    if (_M_owns_eb)
        delete [] _M_extbuf;
    if (_M_owns_ib)
        delete [] _M_intbuf;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
void
basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) {
    std::basic_streambuf<char_type, traits_type>::swap(rhs);
    if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
        std::swap(_M_extbuf, rhs._M_extbuf);
        std::swap(_M_extbufnext, rhs._M_extbufnext);
        std::swap(_M_extbufend, rhs._M_extbufend);
    } else {
        ptrdiff_t ln = _M_extbufnext - _M_extbuf;
        ptrdiff_t le = _M_extbufend - _M_extbuf;
        ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
        ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
        if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) {
            _M_extbuf = rhs._M_extbuf;
            rhs._M_extbuf = rhs._M_extbuf_min;
        } else if (_M_extbuf != _M_extbuf_min &&
            rhs._M_extbuf == rhs._M_extbuf_min) {
            rhs._M_extbuf = _M_extbuf;
            _M_extbuf = _M_extbuf_min;
        }
        _M_extbufnext = _M_extbuf + rn;
        _M_extbufend = _M_extbuf + re;
        rhs._M_extbufnext = rhs._M_extbuf + ln;
        rhs._M_extbufend = rhs._M_extbuf + le;
    }
    std::swap(_M_ebs, rhs._M_ebs);
    std::swap(_M_intbuf, rhs._M_intbuf);
    std::swap(_M_ibs, rhs._M_ibs);
    std::swap(_M_file, rhs._M_file);
    std::swap(_M_cv, rhs._M_cv);
    std::swap(_M_st, rhs._M_st);
    std::swap(_M_st_last, rhs._M_st_last);
    std::swap(_M_om, rhs._M_om);
    std::swap(_M_cm, rhs._M_cm);
    std::swap(_M_owns_eb, rhs._M_owns_eb);
    std::swap(_M_owns_ib, rhs._M_owns_ib);
    std::swap(_M_always_noconv, rhs._M_always_noconv);
    if (this->eback() == reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
        ptrdiff_t n = this->gptr() - this->eback();
        ptrdiff_t e = this->egptr() - this->eback();
        this->setg(reinterpret_cast<char_type*>(_M_extbuf_min),
                   reinterpret_cast<char_type*>(_M_extbuf_min) + n,
                   reinterpret_cast<char_type*>(_M_extbuf_min) + e);
    } else if (this->pbase() ==
               reinterpret_cast<char_type*>(rhs._M_extbuf_min)) {
        ptrdiff_t n = this->pptr() - this->pbase();
        ptrdiff_t e = this->epptr() - this->pbase();
        this->setp(reinterpret_cast<char_type*>(_M_extbuf_min),
                   reinterpret_cast<char_type*>(_M_extbuf_min) + e);
        this->pbump(n);
    }
    if (rhs.eback() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
        ptrdiff_t n = rhs.gptr() - rhs.eback();
        ptrdiff_t e = rhs.egptr() - rhs.eback();
        rhs.setg(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
                 reinterpret_cast<char_type*>(rhs._M_extbuf_min) + n,
                 reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
    } else if (rhs.pbase() == reinterpret_cast<char_type*>(_M_extbuf_min)) {
        ptrdiff_t n = rhs.pptr() - rhs.pbase();
        ptrdiff_t e = rhs.epptr() - rhs.pbase();
        rhs.setp(reinterpret_cast<char_type*>(rhs._M_extbuf_min),
                 reinterpret_cast<char_type*>(rhs._M_extbuf_min) + e);
        rhs.pbump(n);
    }
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
inline
void
swap(basic_filebuf<CharT, Traits>& x, basic_filebuf<CharT, Traits>& y) {
    x.swap(y);
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
inline
bool
basic_filebuf<CharT, Traits>::is_open() const {
    return _M_file != nullptr;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
const char* basic_filebuf<CharT, Traits>::
_M_get_mode(std::ios_base::openmode mode) {
    switch ((mode & ~std::ios_base::ate) | 0) {
    case std::ios_base::out:
    case std::ios_base::out | std::ios_base::trunc:
        return "w";
    case std::ios_base::out | std::ios_base::app:
    case std::ios_base::app:
        return "a";
        break;
    case std::ios_base::in:
        return "r";
    case std::ios_base::in  | std::ios_base::out:
        return "r+";
    case std::ios_base::in  | std::ios_base::out | std::ios_base::trunc:
        return "w+";
    case std::ios_base::in  | std::ios_base::out | std::ios_base::app:
    case std::ios_base::in  | std::ios_base::app:
        return "a+";
    case std::ios_base::out | std::ios_base::binary:
    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
        return "wb";
    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
    case std::ios_base::app | std::ios_base::binary:
        return "ab";
    case std::ios_base::in  | std::ios_base::binary:
        return "rb";
    case std::ios_base::in  | std::ios_base::out | std::ios_base::binary:
        return "r+b";
    case std::ios_base::in  | std::ios_base::out | std::ios_base::trunc |
      std::ios_base::binary:
        return "w+b";
    case std::ios_base::in  | std::ios_base::out | std::ios_base::app |
      std::ios_base::binary:
    case std::ios_base::in  | std::ios_base::app | std::ios_base::binary:
        return "a+b";
    default:
        return nullptr;
    }
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
basic_filebuf<CharT, Traits>*
basic_filebuf<CharT, Traits>::
open(const char* s, std::ios_base::openmode mode) {
    basic_filebuf<CharT, Traits>* rt = nullptr;
    if (_M_file == nullptr) {
        const char* md= _M_get_mode(mode);
        if (md) {
            _M_file = fopen(s, md);
            if (_M_file) {
                rt = this;
                _M_om = mode;
                if (mode & std::ios_base::ate) {
                    if (fseek(_M_file, 0, SEEK_END)) {
                        fclose(_M_file);
                        _M_file = nullptr;
                        rt = nullptr;
                    }
                }
            }
        }
    }
    return rt;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
inline
basic_filebuf<CharT, Traits>*
basic_filebuf<CharT, Traits>::open(const std::string& s,
    std::ios_base::openmode mode) {
    return open(s.c_str(), mode);
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
basic_filebuf<CharT, Traits>*
basic_filebuf<CharT, Traits>::open(int fd, std::ios_base::openmode mode) {
    const char* md= this->_M_get_mode(mode);
    if (md) {
        this->_M_file= fdopen(fd, md);
        this->_M_om = mode;
        return this;
    } else {
      return nullptr;
    }
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
basic_filebuf<CharT, Traits>*
basic_filebuf<CharT, Traits>::open(FILE* f, std::ios_base::openmode mode) {
    this->_M_file = f;
    this->_M_om = mode;
    return this;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
basic_filebuf<CharT, Traits>*
basic_filebuf<CharT, Traits>::close() {
    basic_filebuf<CharT, Traits>* rt = nullptr;
    if (_M_file) {
        rt = this;
        std::unique_ptr<FILE, int(*)(FILE*)> h(_M_file, fclose);
        if (sync())
            rt = nullptr;
        if (fclose(h.release()) == 0)
            _M_file = nullptr;
        else
            rt = nullptr;
    }
    return rt;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
typename basic_filebuf<CharT, Traits>::int_type
basic_filebuf<CharT, Traits>::underflow() {
    if (_M_file == nullptr)
        return traits_type::eof();
    bool initial = _M_read_mode();
    char_type buf;
    if (this->gptr() == nullptr)
        this->setg(&buf, &buf+1, &buf+1);
    const size_t unget_sz = initial ? 0 : std::
      min<size_t>((this->egptr() - this->eback()) / 2, 4);
    int_type c = traits_type::eof();
    if (this->gptr() == this->egptr()) {
        memmove(this->eback(), this->egptr() - unget_sz,
            unget_sz * sizeof(char_type));
        if (_M_always_noconv) {
            size_t nmemb = static_cast<size_t>
              (this->egptr() - this->eback() - unget_sz);
            nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
            if (nmemb != 0) {
                this->setg(this->eback(),
                           this->eback() + unget_sz,
                           this->eback() + unget_sz + nmemb);
                c = traits_type::to_int_type(*this->gptr());
            }
        } else {
            memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
            _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
            _M_extbufend = _M_extbuf +
              (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
            size_t nmemb = std::min(static_cast<size_t>(_M_ibs - unget_sz),
                                    static_cast<size_t>
                                    (_M_extbufend - _M_extbufnext));
            std::codecvt_base::result r;
            _M_st_last = _M_st;
            size_t nr = fread(
                reinterpret_cast<void*>(const_cast<char_type*>(_M_extbufnext)),
                1, nmemb, _M_file);
            if (nr != 0) {
                if (!_M_cv)
                    throw std::bad_cast();
                _M_extbufend = _M_extbufnext + nr;
                char_type*  inext;
                r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
                              this->eback() + unget_sz,
                              this->eback() + _M_ibs, inext);
                if (r == std::codecvt_base::noconv) {
                    this->setg(reinterpret_cast<char_type*>(_M_extbuf),
                               reinterpret_cast<char_type*>(_M_extbuf),
                               const_cast<char_type*>(_M_extbufend));
                    c = traits_type::to_int_type(*this->gptr());
                } else if (inext != this->eback() + unget_sz) {
                    this->setg(this->eback(), this->eback() + unget_sz, inext);
                    c = traits_type::to_int_type(*this->gptr());
                }
            }
        }
    } else {
        c = traits_type::to_int_type(*this->gptr());
    }
    if (this->eback() == &buf)
        this->setg(0, 0, 0);
    return c;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
typename basic_filebuf<CharT, Traits>::int_type
basic_filebuf<CharT, Traits>::pbackfail(int_type c) {
    if (_M_file && this->eback() < this->gptr()) {
        if (traits_type::eq_int_type(c, traits_type::eof())) {
            this->gbump(-1);
            return traits_type::not_eof(c);
        }
        if ((_M_om & std::ios_base::out) ||
            traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) {
            this->gbump(-1);
            *this->gptr() = traits_type::to_char_type(c);
            return c;
        }
    }
    return traits_type::eof();
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
typename basic_filebuf<CharT, Traits>::int_type
basic_filebuf<CharT, Traits>::overflow(int_type c) {
    if (_M_file == nullptr)
        return traits_type::eof();
    _M_write_mode();
    char_type buf;
    char_type* pb_save = this->pbase();
    char_type* epb_save = this->epptr();
    if (!traits_type::eq_int_type(c, traits_type::eof())) {
        if (this->pptr() == nullptr)
            this->setp(&buf, &buf+1);
        *this->pptr() = traits_type::to_char_type(c);
        this->pbump(1);
    }
    if (this->pptr() != this->pbase()) {
        if (_M_always_noconv) {
            size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
            if (fwrite(this->pbase(), sizeof(char_type),
                  nmemb, _M_file) != nmemb)
                return traits_type::eof();
        } else {
            char* extbe = _M_extbuf;
            std::codecvt_base::result r;
            do {
                if (!_M_cv)
                    throw std::bad_cast();
                const char_type* e;
                r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e,
                               _M_extbuf, _M_extbuf + _M_ebs, extbe);
                if (e == this->pbase())
                    return traits_type::eof();
                if (r == std::codecvt_base::noconv) {
                    size_t nmemb = static_cast<size_t>
                      (this->pptr() - this->pbase());
                    if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
                        return traits_type::eof();
                } else if (r == std::codecvt_base::ok ||
                    r == std::codecvt_base::partial) {
                    size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
                    if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
                        return traits_type::eof();
                    if (r == std::codecvt_base::partial) {
                        this->setp(const_cast<char_type*>(e),
                            this->pptr());
                        this->pbump(this->epptr() - this->pbase());
                    }
                } else {
                    return traits_type::eof();
                }
            } while (r == std::codecvt_base::partial);
        }
        this->setp(pb_save, epb_save);
    }
    return traits_type::not_eof(c);
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
std::basic_streambuf<CharT, Traits>*
basic_filebuf<CharT, Traits>::setbuf(char_type* s, std::streamsize n) {
    this->setg(0, 0, 0);
    this->setp(0, 0);
    if (_M_owns_eb)
        delete [] _M_extbuf;
    if (_M_owns_ib)
        delete [] _M_intbuf;
    _M_ebs = n;
    if (_M_ebs > sizeof(_M_extbuf_min)) {
        if (_M_always_noconv && s) {
            _M_extbuf = reinterpret_cast<char*>(s);
            _M_owns_eb = false;
        } else {
            _M_extbuf = new char[_M_ebs];
            _M_owns_eb = true;
        }
    } else {
        _M_extbuf = _M_extbuf_min;
        _M_ebs = sizeof(_M_extbuf_min);
        _M_owns_eb = false;
    }
    if (!_M_always_noconv) {
        _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
        if (s && _M_ibs >= sizeof(_M_extbuf_min)) {
            _M_intbuf = s;
            _M_owns_ib = false;
        } else {
            _M_intbuf = new char_type[_M_ibs];
            _M_owns_ib = true;
        }
    } else {
        _M_ibs = 0;
        _M_intbuf = 0;
        _M_owns_ib = false;
    }
    return this;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
typename basic_filebuf<CharT, Traits>::pos_type
basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
                                      std::ios_base::openmode) {
    if (!_M_cv)
        throw std::bad_cast();
    int width = _M_cv->encoding();
    if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
        return pos_type(off_type(-1));
    // width > 0 || off == 0
    int whence;
    switch (way) {
    case std::ios_base::beg:
        whence = SEEK_SET;
        break;
    case std::ios_base::cur:
        whence = SEEK_CUR;
        break;
    case std::ios_base::end:
        whence = SEEK_END;
        break;
    default:
        return pos_type(off_type(-1));
    }
#if _WIN32
    if (fseek(_M_file, width > 0 ? width * off : 0, whence))
        return pos_type(off_type(-1));
    pos_type r = ftell(_M_file);
#else
    if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
        return pos_type(off_type(-1));
    pos_type r = ftello(_M_file);
#endif
    r.state(_M_st);
    return r;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
typename basic_filebuf<CharT, Traits>::pos_type
basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) {
    if (_M_file == nullptr || sync())
        return pos_type(off_type(-1));
#if _WIN32
    if (fseek(_M_file, sp, SEEK_SET))
        return pos_type(off_type(-1));
#else
    if (fseeko(_M_file, sp, SEEK_SET))
        return pos_type(off_type(-1));
#endif
    _M_st = sp.state();
    return sp;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
int
basic_filebuf<CharT, Traits>::sync() {
    if (_M_file == nullptr)
        return 0;
    if (!_M_cv)
        throw std::bad_cast();
    if (_M_cm & std::ios_base::out) {
        if (this->pptr() != this->pbase())
            if (overflow() == traits_type::eof())
                return -1;
        std::codecvt_base::result r;
        do {
            char* extbe;
            r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
            size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
            if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
                return -1;
        } while (r == std::codecvt_base::partial);
        if (r == std::codecvt_base::error)
            return -1;
        if (fflush(_M_file))
            return -1;
    } else if (_M_cm & std::ios_base::in) {
        off_type c;
        state_type state = _M_st_last;
        bool update_st = false;
        if (_M_always_noconv) {
            c = this->egptr() - this->gptr();
        } else {
            int width = _M_cv->encoding();
            c = _M_extbufend - _M_extbufnext;
            if (width > 0) {
                c += width * (this->egptr() - this->gptr());
            } else {
                if (this->gptr() != this->egptr()) {
                    const int off = _M_cv->length(state, _M_extbuf,
                                                  _M_extbufnext,
                                                  this->gptr() - this->eback());
                    c += _M_extbufnext - _M_extbuf - off;
                    update_st = true;
                }
            }
        }
#if _WIN32
        if (fseek(_M_file_, -c, SEEK_CUR))
            return -1;
#else
        if (fseeko(_M_file, -c, SEEK_CUR))
            return -1;
#endif
        if (update_st)
            _M_st = state;
        _M_extbufnext = _M_extbufend = _M_extbuf;
        this->setg(0, 0, 0);
        _M_cm = std::ios_base::openmode(0);
    }
    return 0;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
void
basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) {
    sync();
    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
    bool old_anc = _M_always_noconv;
    _M_always_noconv = _M_cv->always_noconv();
    if (old_anc != _M_always_noconv) {
        this->setg(0, 0, 0);
        this->setp(0, 0);
        // invariant, char_type is char, else we couldn't get here
        // need to dump _M_intbuf
        if (_M_always_noconv) {
            if (_M_owns_eb)
                delete [] _M_extbuf;
            _M_owns_eb = _M_owns_ib;
            _M_ebs = _M_ibs;
            _M_extbuf = reinterpret_cast<char*>(_M_intbuf);
            _M_ibs = 0;
            _M_intbuf = nullptr;
            _M_owns_ib = false;
        } else {  // need to obtain an _M_intbuf.
         // If _M_extbuf is user-supplied, use it, else new _M_intbuf
            if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) {
                _M_ibs = _M_ebs;
                _M_intbuf = reinterpret_cast<char_type*>(_M_extbuf);
                _M_owns_ib = false;
                _M_extbuf = new char[_M_ebs];
                _M_owns_eb = true;
            } else {
                _M_ibs = _M_ebs;
                _M_intbuf = new char_type[_M_ibs];
                _M_owns_ib = true;
            }
        }
    }
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
bool
basic_filebuf<CharT, Traits>::_M_read_mode() {
    if (!(_M_cm & std::ios_base::in)) {
        this->setp(0, 0);
        if (_M_always_noconv)
            this->setg(reinterpret_cast<char_type*>(_M_extbuf),
                       reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs,
                       reinterpret_cast<char_type*>(_M_extbuf) + _M_ebs);
        else
            this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
        _M_cm = std::ios_base::in;
        return true;
    }
    return false;
}

///////////////////////////////////////////////////////////////////////////////
template <class CharT, class Traits>
void
basic_filebuf<CharT, Traits>::_M_write_mode() {
    if (!(_M_cm & std::ios_base::out)) {
        this->setg(0, 0, 0);
        if (_M_ebs > sizeof(_M_extbuf_min)) {
            if (_M_always_noconv)
                this->setp(reinterpret_cast<char_type*>(_M_extbuf),
                           reinterpret_cast<char_type*>(_M_extbuf) +
                           (_M_ebs - 1));
            else
                this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
        } else {
            this->setp(0, 0);
        }
        _M_cm = std::ios_base::out;
    }
}

///////////////////////////////////////////////////////////////////////////////
}

///////////////////////////////////////////////////////////////////////////////
#endif  // KALDI_UTIL_BASIC_FILEBUF_H_

///////////////////////////////////////////////////////////////////////////////

/*
 * ============================================================================
 * libc++ License
 * ============================================================================
 *
 * The libc++ library is dual licensed under both the University of Illinois
 * "BSD-Like" license and the MIT license.  As a user of this code you may
 * choose to use it under either license.  As a contributor, you agree to allow
 * your code to be used under both.
 *
 * Full text of the relevant licenses is included below.
 *
 * ============================================================================
 *
 * University of Illinois/NCSA
 * Open Source License
 *
 * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included below)
 *
 * All rights reserved.
 *
 * Developed by:
 *
 *     LLVM Team
 *
 *     University of Illinois at Urbana-Champaign
 *
 *     http://llvm.org
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal with
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is furnished to do
 * so, subject to the following conditions:
 *
 *     * Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimers.
 *
 *     * Redistributions in binary form must reproduce the above copyright notice,
 *       this list of conditions and the following disclaimers in the
 *       documentation and/or other materials provided with the distribution.
 *
 *     * Neither the names of the LLVM Team, University of Illinois at
 *       Urbana-Champaign, nor the names of its contributors may be used to
 *       endorse or promote products derived from this Software without specific
 *       prior written permission.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
 * SOFTWARE.
 *
 * ==============================================================================
 *
 * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included below)
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 * ==============================================================================
 *
 * This file is a partial list of people who have contributed to the LLVM/libc++
 * project.  If you have contributed a patch or made some other contribution to
 * LLVM/libc++, please submit a patch to this file to add yourself, and it will be
 * done!
 *
 * The list is sorted by surname and formatted to allow easy grepping and
 * beautification by scripts.  The fields are: name (N), email (E), web-address
 * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
 * (S).
 *
 * N: Saleem Abdulrasool
 * E: compnerd@compnerd.org
 * D: Minor patches and Linux fixes.
 *
 * N: Dimitry Andric
 * E: dimitry@andric.com
 * D: Visibility fixes, minor FreeBSD portability patches.
 *
 * N: Holger Arnold
 * E: holgerar@gmail.com
 * D: Minor fix.
 *
 * N: Ruben Van Boxem
 * E: vanboxem dot ruben at gmail dot com
 * D: Initial Windows patches.
 *
 * N: David Chisnall
 * E: theraven at theravensnest dot org
 * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
 *
 * N: Marshall Clow
 * E: mclow.lists@gmail.com
 * E: marshall@idio.com
 * D: C++14 support, patches and bug fixes.
 *
 * N: Bill Fisher
 * E: william.w.fisher@gmail.com
 * D: Regex bug fixes.
 *
 * N: Matthew Dempsky
 * E: matthew@dempsky.org
 * D: Minor patches and bug fixes.
 *
 * N: Google Inc.
 * D: Copyright owner and contributor of the CityHash algorithm
 *
 * N: Howard Hinnant
 * E: hhinnant@apple.com
 * D: Architect and primary author of libc++
 *
 * N: Hyeon-bin Jeong
 * E: tuhertz@gmail.com
 * D: Minor patches and bug fixes.
 *
 * N: Argyrios Kyrtzidis
 * E: kyrtzidis@apple.com
 * D: Bug fixes.
 *
 * N: Bruce Mitchener, Jr.
 * E: bruce.mitchener@gmail.com
 * D: Emscripten-related changes.
 *
 * N: Michel Morin
 * E: mimomorin@gmail.com
 * D: Minor patches to is_convertible.
 *
 * N: Andrew Morrow
 * E: andrew.c.morrow@gmail.com
 * D: Minor patches and Linux fixes.
 *
 * N: Arvid Picciani
 * E: aep at exys dot org
 * D: Minor patches and musl port.
 *
 * N: Bjorn Reese
 * E: breese@users.sourceforge.net
 * D: Initial regex prototype
 *
 * N: Nico Rieck
 * E: nico.rieck@gmail.com
 * D: Windows fixes
 *
 * N: Jonathan Sauer
 * D: Minor patches, mostly related to constexpr
 *
 * N: Craig Silverstein
 * E: csilvers@google.com
 * D: Implemented Cityhash as the string hash function on 64-bit machines
 *
 * N: Richard Smith
 * D: Minor patches.
 *
 * N: Joerg Sonnenberger
 * E: joerg@NetBSD.org
 * D: NetBSD port.
 *
 * N: Stephan Tolksdorf
 * E: st@quanttec.com
 * D: Minor <atomic> fix
 *
 * N: Michael van der Westhuizen
 * E: r1mikey at gmail dot com
 *
 * N: Klaas de Vries
 * E: klaas at klaasgaaf dot nl
 * D: Minor bug fix.
 *
 * N: Zhang Xiongpang
 * E: zhangxiongpang@gmail.com
 * D: Minor patches and bug fixes.
 *
 * N: Xing Xue
 * E: xingxue@ca.ibm.com
 * D: AIX port
 *
 * N: Zhihao Yuan
 * E: lichray@gmail.com
 * D: Standard compatibility fixes.
 *
 * N: Jeffrey Yasskin
 * E: jyasskin@gmail.com
 * E: jyasskin@google.com
 * D: Linux fixes.
 */


================================================
FILE: runtime/engine/kaldi/util/common-utils.h
================================================
// util/common-utils.h

// Copyright 2009-2011 Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_UTIL_COMMON_UTILS_H_
#define KALDI_UTIL_COMMON_UTILS_H_

#include "base/kaldi-common.h"
#include "util/parse-options.h"
#include "util/kaldi-io.h"
#include "util/simple-io-funcs.h"
#include "util/kaldi-holder.h"
#include "util/kaldi-table.h"
#include "util/table-types.h"
#include "util/text-utils.h"

#endif  // KALDI_UTIL_COMMON_UTILS_H_


================================================
FILE: runtime/engine/kaldi/util/const-integer-set-inl.h
================================================
// util/const-integer-set-inl.h

// Copyright 2009-2011     Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_

// Do not include this file directly.  It is included by const-integer-set.h


namespace kaldi {

template<class I>
void ConstIntegerSet<I>::InitInternal() {
  KALDI_ASSERT_IS_INTEGER_TYPE(I);
  quick_set_.clear();  // just in case we previously had data.
  if (slow_set_.size() == 0) {
    lowest_member_=(I) 1;
    highest_member_=(I) 0;
    contiguous_ = false;
    quick_ = false;
  } else {
    lowest_member_ = slow_set_.front();
    highest_member_ = slow_set_.back();
    size_t range = highest_member_ + 1 - lowest_member_;
    if (range == slow_set_.size()) {
      contiguous_ = true;
      quick_= false;
    } else {
      contiguous_ = false;
      // If it would be more compact to store as bool
      if (range < slow_set_.size() * 8 * sizeof(I)) {
        // (assuming 1 bit per element)...
        quick_set_.resize(range, false);
        for (size_t i = 0;i < slow_set_.size();i++)
          quick_set_[slow_set_[i] - lowest_member_] = true;
        quick_ = true;
      } else {
        quick_ = false;
      }
    }
  }
}

template<class I>
int ConstIntegerSet<I>::count(I i) const {
  if (i < lowest_member_ || i > highest_member_) {
    return 0;
  } else {
    if (contiguous_) return true;
    if (quick_) {
      return (quick_set_[i-lowest_member_] ? 1 : 0);
    } else {
      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
      return (ans ? 1 : 0);
    }
  }
}

template<class I>
void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
  WriteIntegerVector(os, binary, slow_set_);
}

template<class I>
void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
  ReadIntegerVector(is, binary, &slow_set_);
  InitInternal();
}


}  // end namespace kaldi

#endif  // KALDI_UTIL_CONST_INTEGER_SET_INL_H_


================================================
FILE: runtime/engine/kaldi/util/const-integer-set.h
================================================
// util/const-integer-set.h

// Copyright 2009-2011     Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
#define KALDI_UTIL_CONST_INTEGER_SET_H_
#include <vector>
#include <set>
#include <algorithm>
#include <limits>
#include <cassert>
#include "util/stl-utils.h"

  /* ConstIntegerSet is a way to efficiently test whether something is in a
     supplied set of integers.  It can be initialized from a vector or set, but
     never changed after that. It either uses a sorted vector or an array of
     bool, depending on the input.  It behaves like a const version of an STL set, with
     only a subset of the functionality, except all the member functions are
     upper-case.

     Note that we could get rid of the member slow_set_, but we'd have to
     do more work to implement an iterator type.  This would save memory.
  */

namespace kaldi {

template<class I> class ConstIntegerSet {
 public:
  ConstIntegerSet(): lowest_member_(1), highest_member_(0) { }

  void Init(const std::vector<I> &input) {
    slow_set_ = input;
    SortAndUniq(&slow_set_);
    InitInternal();
  }

  void Init(const std::set<I> &input) {
    CopySetToVector(input, &slow_set_);
    InitInternal();
  }

  explicit ConstIntegerSet(const std::vector<I> &input): slow_set_(input) {
    SortAndUniq(&slow_set_);
    InitInternal();
  }
  explicit ConstIntegerSet(const std::set<I> &input) {
    CopySetToVector(input, &slow_set_);
    InitInternal();
  }
  explicit ConstIntegerSet(const ConstIntegerSet<I> &other):
                           slow_set_(other.slow_set_) {
    InitInternal();
  }

  int count(I i) const;  // returns 1 or 0.

  typedef typename std::vector<I>::const_iterator iterator;
  iterator begin() const { return slow_set_.begin(); }
  iterator end() const { return slow_set_.end(); }
  size_t size() const { return slow_set_.size(); }
  bool empty() const { return slow_set_.empty(); }

  void Write(std::ostream &os, bool binary) const;
  void Read(std::istream &is, bool binary);

 private:
  I lowest_member_;
  I highest_member_;
  bool contiguous_;
  bool quick_;
  std::vector<bool> quick_set_;
  std::vector<I> slow_set_;
  void InitInternal();
};

}  // end namespace kaldi

#include "util/const-integer-set-inl.h"

#endif  // KALDI_UTIL_CONST_INTEGER_SET_H_


================================================
FILE: runtime/engine/kaldi/util/edit-distance-inl.h
================================================
// util/edit-distance-inl.h

// Copyright 2009-2011  Microsoft Corporation;  Haihua Xu;  Yanmin Qian

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_UTIL_EDIT_DISTANCE_INL_H_
#define KALDI_UTIL_EDIT_DISTANCE_INL_H_
#include <algorithm>
#include <utility>
#include <vector>
#include "util/stl-utils.h"

namespace kaldi {

template<class T>
int32 LevenshteinEditDistance(const std::vector<T> &a,
                              const std::vector<T> &b) {
  // Algorithm:
  //  write A and B for the sequences, with elements a_0 ..
  //  let |A| = M and |B| = N be the lengths, and have
  //  elements a_0 ... a_{M-1} and b_0 ... b_{N-1}.
  //  We are computing the recursion
  //     E(m, n) = min(  E(m-1, n-1) + (1-delta(a_{m-1}, b_{n-1})),
  //                    E(m-1, n) + 1,
  //                    E(m, n-1) + 1).
  //  where E(m, n) is defined for m = 0..M and n = 0..N and out-of-
  //  bounds quantities are considered to be infinity (i.e. the
  //  recursion does not visit them).

  // We do this computation using a vector e of size N+1.
  // The outer iterations range over m = 0..M.

  int M = a.size(), N = b.size();
  std::vector<int32> e(N+1);
  std::vector<int32> e_tmp(N+1);
  // initialize e.
  for (size_t i = 0; i < e.size(); i++)
    e[i] = i;
  for (int32 m = 1; m <= M; m++) {
    // computing E(m, .) from E(m-1, .)
    // handle special case n = 0:
    e_tmp[0] = e[0] + 1;

    for (int32 n = 1; n <= N; n++) {
      int32 term1 = e[n-1] + (a[m-1] == b[n-1] ? 0 : 1);
      int32 term2 = e[n] + 1;
      int32 term3 = e_tmp[n-1] + 1;
      e_tmp[n] = std::min(term1, std::min(term2, term3));
    }
    e = e_tmp;
  }
  return e.back();
}
//
struct error_stats {
  int32 ins_num;
  int32 del_num;
  int32 sub_num;
  int32 total_cost;  // minimum total cost to the current alignment.
};
// Note that both hyp and ref should not contain noise word in
// the following implementation.

template<class T>
int32 LevenshteinEditDistance(const std::vector<T> &ref,
                              const std::vector<T> &hyp,
                              int32 *ins, int32 *del, int32 *sub) {
  // temp sequence to remember error type and stats.
  std::vector<error_stats> e(ref.size()+1);
  std::vector<error_stats> cur_e(ref.size()+1);
  // initialize the first hypothesis aligned to the reference at each
  // position:[hyp_index =0][ref_index]
  for (size_t i =0; i < e.size(); i ++) {
    e[i].ins_num = 0;
    e[i].sub_num = 0;
    e[i].del_num = i;
    e[i].total_cost = i;
  }

  // for other alignments
  for (size_t hyp_index = 1; hyp_index <= hyp.size(); hyp_index ++) {
    cur_e[0] = e[0];
    cur_e[0].ins_num++;
    cur_e[0].total_cost++;
    for (size_t ref_index = 1; ref_index <= ref.size(); ref_index ++) {
     int32 ins_err = e[ref_index].total_cost + 1;
     int32 del_err = cur_e[ref_index-1].total_cost + 1;
     int32 sub_err = e[ref_index-1].total_cost;
      if (hyp[hyp_index-1] != ref[ref_index-1])
       sub_err++;

     if (sub_err < ins_err && sub_err < del_err) {
        cur_e[ref_index] =e[ref_index-1];
        if (hyp[hyp_index-1] != ref[ref_index-1])
          cur_e[ref_index].sub_num++;  // substitution error should be increased
        cur_e[ref_index].total_cost = sub_err;
     } else if (del_err < ins_err) {
        cur_e[ref_index] = cur_e[ref_index-1];
        cur_e[ref_index].total_cost = del_err;
        cur_e[ref_index].del_num++;    // deletion number is increased.
     } else {
        cur_e[ref_index] = e[ref_index];
        cur_e[ref_index].total_cost = ins_err;
        cur_e[ref_index].ins_num++;    // insertion number is increased.
     }
  }
  e = cur_e;  // alternate for the next recursion.
  }
  size_t ref_index = e.size()-1;
  *ins = e[ref_index].ins_num, *del =
    e[ref_index].del_num, *sub = e[ref_index].sub_num;
  return e[ref_index].total_cost;
}

template<class T>
int32 LevenshteinAlignment(const std::vector<T> &a,
                           const std::vector<T> &b,
                           T eps_symbol,
                           std::vector<std::pair<T, T> > *output) {
  // Check inputs:
  {
    KALDI_ASSERT(output != NULL);
    for (size_t i = 0; i < a.size(); i++) KALDI_ASSERT(a[i] != eps_symbol);
    for (size_t i = 0; i < b.size(); i++) KALDI_ASSERT(b[i] != eps_symbol);
  }
  output->clear();
  // This is very memory-inefficiently implemented using a vector of vectors.
  size_t M = a.size(), N = b.size();
  size_t m, n;
  std::vector<std::vector<int32> > e(M+1);
  for (m = 0; m <=M; m++) e[m].resize(N+1);
  for (n = 0; n <= N; n++)
    e[0][n]  = n;
  for (m = 1; m <= M; m++) {
    e[m][0] = e[m-1][0] + 1;
    for (n = 1; n <= N; n++) {
      int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1);
      int32 del = e[m-1][n] + 1;  // assumes a == ref, b == hyp.
      int32 ins = e[m][n-1] + 1;
      e[m][n] = std::min(sub_or_ok, std::min(del, ins));
    }
  }
  // get time-reversed output first: trace back.
  m = M;
  n = N;
  while (m != 0 || n != 0) {
    size_t last_m, last_n;
    if (m == 0) {
      last_m = m;
      last_n = n-1;
    } else if (n == 0) {
      last_m = m-1;
      last_n = n;
    } else {
      int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1);
      int32 del = e[m-1][n] + 1;  // assumes a == ref, b == hyp.
      int32 ins = e[m][n-1] + 1;
      // choose sub_or_ok if all else equal.
      if (sub_or_ok <= std::min(del, ins)) {
        last_m = m-1;
        last_n = n-1;
      } else {
        if (del <= ins) {  // choose del over ins if equal.
          last_m = m-1;
          last_n = n;
        } else {
          last_m = m;
          last_n = n-1;
        }
      }
    }
    T a_sym, b_sym;
    a_sym = (last_m == m ? eps_symbol : a[last_m]);
    b_sym = (last_n == n ? eps_symbol : b[last_n]);
    output->push_back(std::make_pair(a_sym, b_sym));
    m = last_m;
    n = last_n;
  }
  ReverseVector(output);
  return e[M][N];
}


}  // end namespace kaldi

#endif  // KALDI_UTIL_EDIT_DISTANCE_INL_H_


================================================
FILE: runtime/engine/kaldi/util/edit-distance.h
================================================
// util/edit-distance.h

// Copyright 2009-2011     Microsoft Corporation;  Haihua Xu

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_UTIL_EDIT_DISTANCE_H_
#define KALDI_UTIL_EDIT_DISTANCE_H_
#include <vector>
#include <set>
#include <algorithm>
#include <limits>
#include <cassert>
#include <utility>
#include "util/edit-distance-inl.h"
#include "base/kaldi-types.h"

namespace kaldi {

// Compute the edit-distance between two strings.
template<class T>
int32 LevenshteinEditDistance(const std::vector<T> &a,
                              const std::vector<T> &b);


// edit distance calculation with conventional method.
// note: noise word must be filtered out from the hypothesis and
// reference sequence
// before the following procedure conducted.
template<class T>
int32 LevenshteinEditDistance(const std::vector<T> &ref,
                              const std::vector<T> &hyp,
                              int32 *ins, int32 *del, int32 *sub);

// This version of the edit-distance computation outputs the alignment
// between the two.  This is a vector of pairs of (symbol a, symbol b).
// The epsilon symbol (eps_symbol) must not occur in sequences a or b.
// Where one aligned to no symbol in the other (insertion or deletion),
// epsilon will be the corresponding member of the pair.
// It returns the edit-distance between the two strings.

template<class T>
int32 LevenshteinAlignment(const std::vector<T> &a,
                           const std::vector<T> &b,
                           T eps_symbol,
                           std::vector<std::pair<T, T> > *output);

}  // end namespace kaldi

#endif  // KALDI_UTIL_EDIT_DISTANCE_H_


================================================
FILE: runtime/engine/kaldi/util/hash-list-inl.h
================================================
// util/hash-list-inl.h

// Copyright 2009-2011   Microsoft Corporation
//                2013   Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_UTIL_HASH_LIST_INL_H_
#define KALDI_UTIL_HASH_LIST_INL_H_

// Do not include this file directly.  It is included by fast-hash.h


namespace kaldi {

template<class I, class T> HashList<I, T>::HashList() {
  list_head_ = NULL;
  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
  hash_size_ = 0;
  freed_head_ = NULL;
}

template<class I, class T> void HashList<I, T>::SetSize(size_t size) {
  hash_size_ = size;
  KALDI_ASSERT(list_head_ == NULL &&
      bucket_list_tail_ == static_cast<size_t>(-1));  // make sure empty.
  if (size > buckets_.size())
    buckets_.resize(size, HashBucket(0, NULL));
}

template<class I, class T>
typename HashList<I, T>::Elem* HashList<I, T>::Clear() {
  // Clears the hashtable and gives ownership of the currently contained list
  // to the user.
  for (size_t cur_bucket = bucket_list_tail_;
      cur_bucket != static_cast<size_t>(-1);
      cur_bucket = buckets_[cur_bucket].prev_bucket) {
    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
  }
  bucket_list_tail_ = static_cast<size_t>(-1);
  Elem *ans = list_head_;
  list_head_ = NULL;
  return ans;
}

template<class I, class T>
const typename HashList<I, T>::Elem* HashList<I, T>::GetList() const {
  return list_head_;
}

template<class I, class T>
inline void HashList<I, T>::Delete(Elem *e) {
  e->tail = freed_head_;
  freed_head_ = e;
}

template<class I, class T>
inline typename HashList<I, T>::Elem* HashList<I, T>::Find(I key) {
  size_t index = (static_cast<size_t>(key) % hash_size_);
  HashBucket &bucket = buckets_[index];
  if (bucket.last_elem == NULL) {
    return NULL;  // empty bucket.
  } else {
    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1) ?
                  list_head_ :
                  buckets_[bucket.prev_bucket].last_elem->tail),
        *tail = bucket.last_elem->tail;
    for (Elem *e = head; e != tail; e = e->tail)
      if (e->key == key) return e;
    return NULL;  // Not found.
  }
}

template<class I, class T>
inline typename HashList<I, T>::Elem* HashList<I, T>::New() {
  if (freed_head_) {
    Elem *ans = freed_head_;
    freed_head_ = freed_head_->tail;
    return ans;
  } else {
    Elem *tmp = new Elem[allocate_block_size_];
    for (size_t i = 0; i+1 < allocate_block_size_; i++)
      tmp[i].tail = tmp+i+1;
    tmp[allocate_block_size_-1].tail = NULL;
    freed_head_ = tmp;
    allocated_.push_back(tmp);
    return this->New();
  }
}

template<class I, class T>
HashList<I, T>::~HashList() {
  // First test whether we had any memory leak within the
  // HashList, i.e. things for which the user did not call Delete().
  size_t num_in_list = 0, num_allocated = 0;
  for (Elem *e = freed_head_; e != NULL; e = e->tail)
    num_in_list++;
  for (size_t i = 0; i < allocated_.size(); i++) {
    num_allocated += allocate_block_size_;
    delete[] allocated_[i];
  }
  if (num_in_list != num_allocated) {
    KALDI_WARN << "Possible memory leak: " << num_in_list
               << " != " << num_allocated
               << ": you might have forgotten to call Delete on "
               << "some Elems";
  }
}

template<class I, class T>
inline typename HashList<I, T>::Elem* HashList<I, T>::Insert(I key, T val) {
  size_t index = (static_cast<size_t>(key) % hash_size_);
  HashBucket &bucket = buckets_[index];
  // Check the element is existing or not.
  if (bucket.last_elem != NULL) {
    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1) ?
                  list_head_ :
                  buckets_[bucket.prev_bucket].last_elem->tail),
         *tail = bucket.last_elem->tail;
    for (Elem *e = head; e != tail; e = e->tail)
      if (e->key == key) return e;
  }

  // This is a new element. Insert it.
  Elem *elem = New();
  elem->key = key;
  elem->val = val;
  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
    // head of bucket list (which is tail of regular list, they go in
    // opposite directions).
    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
      // list was empty so this is the first elem.
      KALDI_ASSERT(list_head_ == NULL);
      list_head_ = elem;
    } else {
      // link in to the chain of Elems
      buckets_[bucket_list_tail_].last_elem->tail = elem;
    }
    elem->tail = NULL;
    bucket.last_elem = elem;
    bucket.prev_bucket = bucket_list_tail_;
    bucket_list_tail_ = index;
  } else {
    // Already-occupied bucket.  Insert at tail of list of elements within
    // the bucket.
    elem->tail = bucket.last_elem->tail;
    bucket.last_elem->tail = elem;
    bucket.last_elem = elem;
  }
  return elem;
}

template<class I, class T>
void HashList<I, T>::InsertMore(I key, T val) {
  size_t index = (static_cast<size_t>(key) % hash_size_);
  HashBucket &bucket = buckets_[index];
  Elem *elem = New();
  elem->key = key;
  elem->val = val;

  KALDI_ASSERT(bucket.last_elem != NULL);  // assume one element is already here
  if (bucket.last_elem->key == key) {  // standard behavior: add as last element
    elem->tail = bucket.last_elem->tail;
    bucket.last_elem->tail = elem;
    bucket.last_elem = elem;
    return;
  }
  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1) ?
             list_head_ : buckets_[bucket.prev_bucket].last_elem->tail);
  // find place to insert in linked list
  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
  KALDI_ASSERT(e->key == key);  // not found? - should not happen
  elem->tail = e->tail;
  e->tail = elem;
}


}  // end namespace kaldi

#endif  // KALDI_UTIL_HASH_LIST_INL_H_


================================================
FILE: runtime/engine/kaldi/util/hash-list.h
================================================
// util/hash-list.h

// Copyright 2009-2011   Microsoft Corporation
//                2013   Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_UTIL_HASH_LIST_H_
#define KALDI_UTIL_HASH_LIST_H_
#include <vector>
#include <set>
#include <algorithm>
#include <limits>
#include <cassert>
#include "util/stl-utils.h"


/* This header provides utilities for a structure that's used in a decoder (but
   is quite generic in nature so we implement and test it separately).
   Basically it's a singly-linked list, but implemented in such a way that we
   can quickly search for elements in the list.  We give it a slightly richer
   interface than just a hash and a list.  The idea is that we want to separate
   the hash part and the list part: basically, in the decoder, we want to have a
   single hash for the current frame and the next frame, because by the time we
   need to access the hash for the next frame we no longer need the hash for the
   previous frame.  So we have an operation that clears the hash but leaves the
   list structure intact.  We also control memory management inside this object,
   to avoid repeated new's/deletes.

   See hash-list-test.cc for an example of how to use this object.
*/


namespace kaldi {

template<class I, class T> class HashList {
 public:
  struct Elem {
    I key;
    T val;
    Elem *tail;
  };

  /// Constructor takes no arguments.
  /// Call SetSize to inform it of the likely size.
  HashList();

  /// Clears the hash and gives the head of the current list to the user;
  /// ownership is transferred to the user (the user must call Delete()
  /// for each element in the list, at his/her leisure).
  Elem *Clear();

  /// Gives the head of the current list to the user.  Ownership retained in the
  /// class.  Caution: in December 2013 the return type was changed to const
  /// Elem* and this function was made const.  You may need to change some types
  /// of local Elem* variables to const if this produces compilation errors.
  const Elem *GetList() const;

  /// Think of this like delete().  It is to be called for each Elem in turn
  /// after you "obtained ownership" by doing Clear().  This is not the opposite
  /// of. Insert, it is the opposite of New.  It's really a memory operation.
  inline void Delete(Elem *e);

  /// This should probably not be needed to be called directly by the user.
  /// Think of it as opposite
  /// to Delete();
  inline Elem *New();

  /// Find tries to find this element in the current list using the hashtable.
  /// It returns NULL if not present.  The Elem it returns is not owned by the
  /// user, it is part of the internal list owned by this object, but the user
  /// is free to modify the "val" element.
  inline Elem *Find(I key);

  /// Insert inserts a new element into the hashtable/stored list.
  /// Because element keys in a hashtable are unique, this operation checks
  /// whether each inserted element has a key equivalent to the one of an
  /// element already in the hashtable. If so, the element is not inserted,
  /// returning an pointer to this existing element.
  inline Elem *Insert(I key, T val);

  /// Insert inserts another element with same key into the hashtable/
  /// stored list.
  /// By calling this, the user asserts that one element with that key is
  /// already present.
  /// We insert it that way, that all elements with the same key
  /// follow each other.
  /// Find() will return the first one of the elements with the same key.
  inline void InsertMore(I key, T val);

  /// SetSize tells the object how many hash buckets to allocate (should
  /// typically be at least twice the number of objects we expect to go in the
  /// structure, for fastest performance).  It must be called while the hash
  /// is empty (e.g. after Clear() or after initializing the object, but before
  /// adding anything to the hash.
  void SetSize(size_t sz);

  /// Returns current number of hash buckets.
  inline size_t Size() { return hash_size_; }

  ~HashList();
 private:

  struct HashBucket {
    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note:
    // list of buckets goes in opposite direction to list of Elems.
    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
    inline HashBucket(size_t i, Elem *e): prev_bucket(i), last_elem(e) {}
  };

  Elem *list_head_;  // head of currently stored list.
  size_t bucket_list_tail_;  // tail of list of active hash buckets.

  size_t hash_size_;  // number of hash buckets.

  std::vector<HashBucket> buckets_;

  Elem *freed_head_;  // head of list of currently freed elements. [ready for
  // allocation]

  std::vector<Elem*> allocated_;  // list of allocated blocks.

  static const size_t allocate_block_size_ = 1024;  // Number of Elements to
  // allocate in one block.  Must be largish so storing allocated_ doesn't
  // become a problem.
};


}  // end namespace kaldi

#include "util/hash-list-inl.h"

#endif  // KALDI_UTIL_HASH_LIST_H_


================================================
FILE: runtime/engine/kaldi/util/kaldi-cygwin-io-inl.h
================================================
// util/kaldi-cygwin-io-inl.h

// Copyright 2015 Smart Action Company LLC (author: Kirill Katsnelson)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_
#define KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_

#ifndef _MSC_VER
#error This is a Windows-compatibility file. Something went wery wrong.
#endif

#include <string>

// This file is included only into kaldi-io.cc, and only if
// KALDI_CYGWIN_COMPAT is enabled.
//
// The routines map unix-ey paths passed to Windows programs from shell
// scripts in egs. Since shell scripts run under cygwin, they use cygwin's
// own mount table and a mapping to the file system. It is quite possible to
// create quite an intricate mapping that only own cygwin API would be able
// to untangle. Unfortunately, the API to map between filenames is not
// available to non-cygwin programs. Running cygpath for every file operation
// would as well be cumbersome. So this is only a simplistic path resolution,
// assuming that the default cygwin prefix /cygdrive is used, and that all
// resolved unix-style full paths end up prefixed with /cygdrive. This is
// quite a sensible approach. We'll also try to map /dev/null and /tmp/**,
// die on all other /dev/** and warn about all other rooted paths.

namespace kaldi {

static bool prefixp(const std::string& pfx, const std::string& str) {
  return pfx.length() <= str.length() &&
    std::equal(pfx.begin(), pfx.end(), str.begin());
}

static std::string cygprefix("/cygdrive/");

static std::string MapCygwinPathNoTmp(const std::string &filename) {
  // UNC(?), relative, native Windows and empty paths are ok already.
  if (prefixp("//", filename) || !prefixp("/", filename))
    return filename;

  // /dev/...
  if (filename == "/dev/null")
    return "\\\\.\\nul";
  if (prefixp("/dev/", filename)) {
      KALDI_ERR << "Unable to resolve path '" << filename
                << "' - only have /dev/null here.";
      return "\\\\.\\invalid";
  }

  // /cygdrive/?[/....]
  int preflen = cygprefix.size();
  if (prefixp(cygprefix, filename)
      && filename.size() >= preflen + 1 && isalpha(filename[preflen])
      && (filename.size() == preflen + 1 || filename[preflen + 1] == '/')) {
    return std::string() + filename[preflen] + ':' +
       (filename.size() > preflen + 1 ? filename.substr(preflen + 1) : "/");
  }

  KALDI_WARN << "Unable to resolve path '" << filename
             << "' - cannot map unix prefix. "
             << "Will go on, but breakage will likely ensue.";
  return filename;
}

// extern for unit testing.
std::string MapCygwinPath(const std::string &filename) {
  // /tmp[/....]
  if (filename != "/tmp" && !prefixp("/tmp/", filename)) {
    return MapCygwinPathNoTmp(filename);
  }
  char *tmpdir = std::getenv("TMP");
  if (tmpdir == nullptr)
    tmpdir = std::getenv("TEMP");
  if (tmpdir == nullptr) {
    KALDI_ERR << "Unable to resolve path '" << filename
              << "' - unable to find temporary directory. Set TMP.";
    return filename;
  }
  // Map the value of tmpdir again, as cygwin environment actually may contain
  // unix-style paths.
  return MapCygwinPathNoTmp(std::string(tmpdir) + filename.substr(4));
}

// A popen implementation that passes the command line through cygwin
// bash.exe. This is necessary since some piped commands are cygwin links
// (e. g. fgrep is a soft link to grep), and some are #!-files, such as
// gunzip which is a shell script that invokes gzip, or kaldi's own run.pl
// which is a perl script.
//
// _popen uses cmd.exe or whatever shell is specified via the COMSPEC
// variable. Unfortunately, it adds a hardcoded " /c " to it, so we cannot
// just substitute the environment variable COMSPEC to point to bash.exe.
// Instead, quote the command and pass it to bash via its -c switch.
static FILE *CygwinCompatPopen(const char* command, const char* mode) {
  // To speed up command launch marginally, optionally accept full path
  // to bash.exe. This will not work if the path contains spaces, but
  // no sane person would install cygwin into a space-ridden path.
  const char* bash_exe = std::getenv("BASH_EXE");
  std::string qcmd(bash_exe != nullptr ? bash_exe : "bash.exe");
  qcmd += " -c \"";
  for (; *command; ++command) {
    if (*command == '\"')
      qcmd += '\"';
    qcmd += *command;
  }
  qcmd += '\"';

  return _popen(qcmd.c_str(), mode);
}

}  // namespace kaldi

#endif  // KALDI_UTIL_KALDI_CYGWIN_IO_INL_H_


================================================
FILE: runtime/engine/kaldi/util/kaldi-holder-inl.h
================================================
// util/kaldi-holder-inl.h

// Copyright 2009-2011     Microsoft Corporation
//                2016     Xiaohui Zhang

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_UTIL_KALDI_HOLDER_INL_H_
#define KALDI_UTIL_KALDI_HOLDER_INL_H_

#include <algorithm>
#include <vector>
#include <utility>
#include <string>

#include "base/kaldi-utils.h"
#include "util/kaldi-io.h"
#include "util/text-utils.h"
#include "matrix/kaldi-matrix.h"

namespace kaldi {

/// \addtogroup holders
/// @{


// KaldiObjectHolder is valid only for Kaldi objects with
// copy constructors, default constructors, and "normal"
// Kaldi Write and Read functions.  E.g. it works for
// Matrix and Vector.
template<class KaldiType> class KaldiObjectHolder {
 public:
  typedef KaldiType T;

  KaldiObjectHolder(): t_(NULL) { }

  static bool Write(std::ostream &os, bool binary, const T &t) {
    InitKaldiOutputStream(os, binary);  // Puts binary header if binary mode.
    try {
      t.Write(os, binary);
      return os.good();
    } catch(const std::exception &e) {
      KALDI_WARN << "Exception caught writing Table object. " << e.what();
      return false;  // Write failure.
    }
  }

  void Clear() {
    if (t_) {
      delete t_;
      t_ = NULL;
    }
  }

  // Reads into the holder.
  bool Read(std::istream &is) {
    delete t_;
    t_ = new T;
    // Don't want any existing state to complicate the read function: get new
    // object.
    bool is_binary;
    if (!InitKaldiInputStream(is, &is_binary)) {
      KALDI_WARN << "Reading Table object, failed reading binary header\n";
      return false;
    }
    try {
      t_->Read(is, is_binary);
      return true;
    } catch(const std::exception &e) {
      KALDI_WARN << "Exception caught reading Table object. " << e.what();
      delete t_;
      t_ = NULL;
      return false;
    }
  }

  // Kaldi objects always have the stream open in binary mode for
  // reading.
  static bool IsReadInBinary() { return true; }

  T &Value() {
    // code error if !t_.
    if (!t_) KALDI_ERR << "KaldiObjectHolder::Value() called wrongly.";
    return *t_;
  }

  void Swap(KaldiObjectHolder<T> *other) {
    // the t_ values are pointers so this is a shallow swap.
    std::swap(t_, other->t_);
  }

  bool ExtractRange(const KaldiObjectHolder<T> &other,
                    const std::string &range) {
    KALDI_ASSERT(other.t_ != NULL);
    delete t_;
    t_ = new T;
    // this call will fail for most object types.
    return ExtractObjectRange(*(other.t_), range, t_);
  }

  ~KaldiObjectHolder() { delete t_; }
 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(KaldiObjectHolder);
  T *t_;
};


// BasicHolder is valid for float, double, bool, and integer
// types.  There will be a compile time error otherwise, because
// we make sure that the {Write, Read}BasicType functions do not
// get instantiated for other types.

template<class BasicType> class BasicHolder {
 public:
  typedef BasicType T;

  BasicHolder(): t_(static_cast<T>(-1)) { }

  static bool Write(std::ostream &os, bool binary, const T &t) {
    InitKaldiOutputStream(os, binary);  // Puts binary header if binary mode.
    try {
      WriteBasicType(os, binary, t);
      if (!binary) os << '\n';  // Makes output format more readable and
      // easier to manipulate.
      return os.good();
    } catch(const std::exception &e) {
      KALDI_WARN << "Exception caught writing Table object. " << e.what();
      return false;  // Write failure.
    }
  }

  void Clear() { }

  // Reads into the holder.
  bool Read(std::istream &is) {
    bool is_binary;
    if (!InitKaldiInputStream(is, &is_binary)) {
      KALDI_WARN << "Reading Table object [integer type], failed reading binary"
          " header\n";
      return false;
    }
    try {
      int c;
      if (!is_binary) {  // This is to catch errors, the class would work
        // without it..
        // Eat up any whitespace and make sure it's not newline.
        while (isspace((c = is.peek())) && c != static_cast<int>('\n')) {
          is.get();
        }
        if (is.peek() == '\n') {
          KALDI_WARN << "Found newline but expected basic type.";
          return false;  // This is just to catch a more-
          // likely-than average type of error (empty line before the token),
          // since ReadBasicType will eat it up.
        }
      }

      ReadBasicType(is, is_binary, &t_);

      if (!is_binary) {  // This is to catch errors, the class would work
        // without it..
        // make sure there is a newline.
        while (isspace((c = is.peek())) && c != static_cast<int>('\n')) {
          is.get();
        }
        if (is.peek() != '\n') {
          KALDI_WARN << "BasicHolder::Read, expected newline, got "
                     << CharToString(is.peek()) << ", position " << is.tellg();
          return false;
        }
        is.get();  // Consume the newline.
      }
      return true;
    } catch(const std::exception &e) {
      KALDI_WARN << "Exception caught reading Table object. " << e.what();
      return false;
    }
  }

  // Objects read/written with the Kaldi I/O functions always have the stream
  // open in binary mode for reading.
  static bool IsReadInBinary() { return true; }

  T &Value() {
    return t_;
  }

  void Swap(BasicHolder<T> *other) {
    std::swap(t_, other->t_);
  }

  bool ExtractRange(const BasicHolder<T> &other, const std::string &range) {
    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    return false;
  }

  ~BasicHolder() { }
 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(BasicHolder);

  T t_;
};


/// A Holder for a vector of basic types, e.g.
/// std::vector<int32>, std::vector<float>, and so on.
/// Note: a basic type is defined as a type for which ReadBasicType
/// and WriteBasicType are implemented, i.e. integer and floating
/// types, and bool.
template<class BasicType> class BasicVectorHolder {
 public:
  typedef std::vector<BasicType> T;

  BasicVectorHolder() { }

  static bool Write(std::ostream &os, bool binary, const T &t) {
    InitKaldiOutputStream(os, binary);  // Puts binary header if binary mode.
    try {
      if (binary) {  // need to write the size, in binary mode.
        KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) ==
                     t.size());
        // Or this Write routine cannot handle such a large vector.
        // use int32 because it's fixed size regardless of compilation.
        // change to int64 (plus in Read function) if this becomes a problem.
        WriteBasicType(os, binary, static_cast<int32>(t.size()));
        for (typename std::vector<BasicType>::const_iterator iter = t.begin();
             iter != t.end(); ++iter)
          WriteBasicType(os, binary, *iter);

      } else {
        for (typename std::vector<BasicType>::const_iterator iter = t.begin();
             iter != t.end(); ++iter)
          WriteBasicType(os, binary, *iter);
        os << '\n';  // Makes output format more readable and
        // easier to manipulate.  In text mode, this function writes something
        // like "1 2 3\n".
      }
      return os.good();
    } catch(const std::exception &e) {
      KALDI_WARN << "Exception caught writing Table object (BasicVector). "
                 << e.what();
      return false;  // Write failure.
    }
  }

  void Clear() { t_.clear(); }

  // Reads into the holder.
  bool Read(std::istream &is) {
    t_.clear();
    bool is_binary;
    if (!InitKaldiInputStream(is, &is_binary)) {
      KALDI_WARN << "Reading Table object [integer type], failed reading binary"
          " header\n";
      return false;
    }
    if (!is_binary) {
      // In text mode, we terminate with newline.
      std::string line;
      getline(is, line);  // this will discard the \n, if present.
      if (is.fail()) {
        KALDI_WARN << "BasicVectorHolder::Read, error reading line " <<
            (is.eof() ? "[eof]" : "");
        return false;  // probably eof.  fail in any case.
      }
      std::istringstream line_is(line);
      try {
        while (1) {
          line_is >> std::ws;  // eat up whitespace.
          if (line_is.eof()) break;
          BasicType bt;
          ReadBasicType(line_is, false, &bt);
          t_.push_back(bt);
        }
        return true;
      } catch(const std::exception &e) {
        KALDI_WARN << "BasicVectorHolder::Read, could not interpret line: "
                   << "'" << line << "'" << "\n" << e.what();
        return false;
      }
    } else {  // binary mode.
      size_t filepos = is.tellg();
      try {
        int32 size;
        ReadBasicType(is, true, &size);
        t_.resize(size);
        for (typename std::vector<BasicType>::iterator iter = t_.begin();
             iter != t_.end();
             ++iter) {
          ReadBasicType(is, true, &(*iter));
        }
        return true;
      } catch(...) {
        KALDI_WARN << "BasicVectorHolder::Read, read error or unexpected data"
            " at archive entry beginning at file position " << filepos;
        return false;
      }
    }
  }

  // Objects read/written with the Kaldi I/O functions always have the stream
  // open in binary mode for reading.
  static bool IsReadInBinary() { return true; }

  T &Value() { return t_; }

  void Swap(BasicVectorHolder<BasicType> *other) {
    t_.swap(other->t_);
  }

  bool ExtractRange(const BasicVectorHolder<BasicType> &other,
                    const std::string &range) {
    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    return false;
  }

  ~BasicVectorHolder() { }
 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(BasicVectorHolder);
  T t_;
};


/// BasicVectorVectorHolder is a Holder for a vector of vector of
/// a basic type, e.g. std::vector<std::vector<int32> >.
/// Note: a basic type is defined as a type for which ReadBasicType
/// and WriteBasicType are implemented, i.e. integer and floating
/// types, and bool.
template<class BasicType> class BasicVectorVectorHolder {
 public:
  typedef std::vector<std::vector<BasicType> > T;

  BasicVectorVectorHolder() { }

  static bool Write(std::ostream &os, bool binary, const T &t) {
    InitKaldiOutputStream(os, binary);  // Puts binary header if binary mode.
    try {
      if (binary) {  // need to write the size, in binary mode.
        KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) ==
                     t.size());
        // Or this Write routine cannot handle such a large vector.
        // use int32 because it's fixed size regardless of compilation.
        // change to int64 (plus in Read function) if this becomes a problem.
        WriteBasicType(os, binary, static_cast<int32>(t.size()));
        for (typename std::vector<std::vector<BasicType> >::const_iterator
                 iter = t.begin();
             iter != t.end(); ++iter) {
          KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(iter->size()))
                       == iter->size());
          WriteBasicType(os, binary, static_cast<int32>(iter->size()));
          for (typename std::vector<BasicType>::const_iterator
                   iter2 = iter->begin();
               iter2 != iter->end(); ++iter2) {
            WriteBasicType(os, binary, *iter2);
          }
        }
      } else {  // text mode...
        // In text mode, we write out something like (for integers):
        // "1 2 3 ; 4 5 ; 6 ; ; 7 8 9 ;\n"
        // where the semicolon is a terminator, not a separator
        // (a separator would cause ambiguity between an
        // empty list, and a list containing a single empty list).
        for (typename std::vector<std::vector<BasicType> >::const_iterator
                 iter = t.begin();
             iter != t.end();
             ++iter) {
          for (typename std::vector<BasicType>::const_iterator
                   iter2 = iter->begin();
               iter2 != iter->end(); ++iter2)
            WriteBasicType(os, binary, *iter2);
          os << "; ";
        }
        os << '\n';
      }
      return os.good();
    } catch(const std::exception &e) {
      KALDI_WARN << "Exception caught writing Table object. " << e.what();
      return false;  // Write failure.
    }
  }

  void Clear() { t_.clear(); }

  // Reads into the holder.
  bool Read(std::istream &is) {
    t_.clear();
    bool is_binary;
    if (!InitKaldiInputStream(is, &is_binary)) {
      KALDI_WARN << "Failed reading binary header\n";
      return false;
    }
    if (!is_binary) {
      // In text mode, we terminate with newline.
      try {  // catching errors from ReadBasicType..
        std::vector<BasicType> v;  // temporary vector
        while (1) {
          int i = is.peek();
          if (i == -1) {
            KALDI_WARN << "Unexpected EOF";
            return false;
          } else if (static_cast<char>(i) == '\n') {
            if (!v.empty()) {
              KALDI_WARN << "No semicolon before newline (wrong format)";
              return false;
            } else {
              is.get();
              return true;
            }
          } else if (std::isspace(i)) {
            is.get();
          } else if (static_cast<char>(i) == ';') {
            t_.push_back(v);
            v.clear();
            is.get();
          } else {  // some object we want to read...
            BasicType b;
            ReadBasicType(is, false, &b);  // throws on error.
            v.push_back(b);
          }
        }
      } catch(const std::exception &e) {
        KALDI_WARN << "BasicVectorVectorHolder::Read, read error. " << e.what();
        return false;
      }
    } else {  // binary mode.
      size_t filepos = is.tellg();
      try {
        int32 size;
        ReadBasicType(is, true, &size);
        t_.resize(size);
        for (typename std::vector<std::vector<BasicType> >::iterator
                 iter = t_.begin();
             iter != t_.end();
             ++iter) {
          int32 size2;
          ReadBasicType(is, true, &size2);
          iter->resize(size2);
          for (typename std::vector<BasicType>::iterator iter2 = iter->begin();
               iter2 != iter->end();
               ++iter2)
            ReadBasicType(is, true, &(*iter2));
        }
        return true;
      } catch(...) {
        KALDI_WARN << "Read error or unexpected data at archive entry beginning"
            " at file position " << filepos;
        return false;
      }
    }
  }

  // Objects read/written with the Kaldi I/O functions always have the stream
  // open in binary mode for reading.
  static bool IsReadInBinary() { return true; }

  T &Value() {  return t_; }

  void Swap(BasicVectorVectorHolder<BasicType> *other) {
    t_.swap(other->t_);
  }

  bool ExtractRange(BasicVectorVectorHolder<BasicType> &other,
                    const std::string &range) {
    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    return false;
  }

  ~BasicVectorVectorHolder() { }
 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(BasicVectorVectorHolder);
  T t_;
};


/// BasicPairVectorHolder is a Holder for a vector of pairs of
/// a basic type, e.g. std::vector<std::pair<int32, int32> >.
/// Note: a basic type is defined as a type for which ReadBasicType
/// and WriteBasicType are implemented, i.e. integer and floating
/// types, and bool.
template<class BasicType> class BasicPairVectorHolder {
 public:
  typedef std::vector<std::pair<BasicType, BasicType> > T;

  BasicPairVectorHolder() { }

  static bool Write(std::ostream &os, bool binary, const T &t) {
    InitKaldiOutputStream(os, binary);  // Puts binary header if binary mode.
    try {
      if (binary) {  // need to write the size, in binary mode.
        KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) ==
                     t.size());
        // Or this Write routine cannot handle such a large vector.
        // use int32 because it's fixed size regardless of compilation.
        // change to int64 (plus in Read function) if this becomes a problem.
        WriteBasicType(os, binary, static_cast<int32>(t.size()));
        for (typename T::const_iterator iter = t.begin();
             iter != t.end(); ++iter) {
          WriteBasicType(os, binary, iter->first);
          WriteBasicType(os, binary, iter->second);
        }
      } else {  // text mode...
        // In text mode, we write out something like (for integers):
        // "1 2 ; 4 5 ; 6 7 ; 8 9 \n"
        // where the semicolon is a separator, not a terminator.
        for (typename T::const_iterator iter = t.begin();
             iter != t.end();) {
          WriteBasicType(os, binary, iter->first);
          WriteBasicType(os, binary, iter->second);
          ++iter;
          if (iter != t.end())
            os << "; ";
        }
        os << '\n';
      }
      return os.good();
    } catch(const std::exception &e) {
      KALDI_WARN << "Exception caught writing Table object. " << e.what();
      return false;  // Write failure.
    }
  }

  void Clear() { t_.clear(); }

  // Reads into the holder.
  bool Read(std::istream &is) {
    t_.clear();
    bool is_binary;
    if (!InitKaldiInputStream(is, &is_binary)) {
      KALDI_WARN << "Reading Table object [integer type], failed reading binary"
          " header\n";
      return false;
    }
    if (!is_binary) {
      // In text mode, we terminate with newline.
      try {  // catching errors from ReadBasicType..
        std::vector<BasicType> v;  // temporary vector
        while (1) {
          int i = is.peek();
          if (i == -1) {
            KALDI_WARN << "Unexpected EOF";
            return false;
          } else if (static_cast<char>(i) == '\n') {
            if (t_.empty() && v.empty()) {
              is.get();
              return true;
            } else if (v.size() == 2) {
              t_.push_back(std::make_pair(v[0], v[1]));
              is.get();
              return true;
            } else {
              KALDI_WARN << "Unexpected newline, reading vector<pair<?> >; got "
                         << v.size() << " elements, expected 2.";
              return false;
            }
          } else if (std::isspace(i)) {
            is.get();
          } else if (static_cast<char>(i) == ';') {
            if (v.size() != 2) {
              KALDI_WARN << "Wrong input format, reading vector<pair<?> >; got "
                         << v.size() << " elements, expected 2.";
              return false;
            }
            t_.push_back(std::make_pair(v[0], v[1]));
            v.clear();
            is.get();
          } else {  // some object we want to read...
            BasicType b;
            ReadBasicType(is, false, &b);  // throws on error.
            v.push_back(b);
          }
        }
      } catch(const std::exception &e) {
        KALDI_WARN << "BasicPairVectorHolder::Read, read error. " << e.what();
        return false;
      }
    } else {  // binary mode.
      size_t filepos = is.tellg();
      try {
        int32 size;
        ReadBasicType(is, true, &size);
        t_.resize(size);
        for (typename T::iterator iter = t_.begin();
             iter != t_.end();
             ++iter) {
          ReadBasicType(is, true, &(iter->first));
          ReadBasicType(is, true, &(iter->second));
        }
        return true;
      } catch(...) {
        KALDI_WARN << "BasicVectorHolder::Read, read error or unexpected data"
            " at archive entry beginning at file position " << filepos;
        return false;
      }
    }
  }

  // Objects read/written with the Kaldi I/O functions always have the stream
  // open in binary mode for reading.
  static bool IsReadInBinary() { return true; }

  T &Value() {  return t_; }

  void Swap(BasicPairVectorHolder<BasicType> *other) {
    t_.swap(other->t_);
  }

  bool ExtractRange(const BasicPairVectorHolder<BasicType> &other,
                    const std::string &range) {
    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    return false;
  }

  ~BasicPairVectorHolder() { }
 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(BasicPairVectorHolder);
  T t_;
};


// We define a Token as a nonempty, printable, whitespace-free std::string.
// The binary and text formats here are the same (newline-terminated)
// and as such we don't bother with the binary-mode headers.
class TokenHolder {
 public:
  typedef std::string T;

  TokenHolder() {}

  static bool Write(std::ostream &os, bool, const T &t) {  // ignore binary-mode
    KALDI_ASSERT(IsToken(t));
    os << t << '\n';
    return os.good();
  }

  void Clear() { t_.clear(); }

  // Reads into the holder.
  bool Read(std::istream &is) {
    is >> t_;
    if (is.fail()) return false;
    char c;
    while (isspace(c = is.peek()) && c!= '\n') is.get();
    if (is.peek() != '\n') {
      KALDI_WARN << "TokenHolder::Read, expected newline, got char "
        << CharToString(is.peek())
        << ", at stream pos " << is.tellg();
      return false;
    }
    is.get();  // get '\n'
    return true;
  }


  // Since this is fundamentally a text format, read in text mode (would work
  // fine either way, but doing it this way will exercise more of the code).
  static bool IsReadInBinary() { return false; }

  T &Value() { return t_; }

  ~TokenHolder() { }

  void Swap(TokenHolder *other) {
    t_.swap(other->t_);
  }

  bool ExtractRange(const TokenHolder &other,
                    const std::string &range) {
    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    return false;
  }

 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(TokenHolder);
  T t_;
};

// A Token is a nonempty, whitespace-free std::string.
// Class TokenVectorHolder is a Holder class for vectors of these.
class TokenVectorHolder {
 public:
  typedef std::vector<std::string> T;

  TokenVectorHolder() { }

  static bool Write(std::ostream &os, bool, const T &t) {  // ignore binary-mode
    for (std::vector<std::string>::const_iterator iter = t.begin();
         iter != t.end();
         ++iter) {
      KALDI_ASSERT(IsToken(*iter));  // make sure it's whitespace-free,
      // printable and nonempty.
      os << *iter << ' ';
    }
    os << '\n';
    return os.good();
  }

  void Clear() { t_.clear(); }


  // Reads into the holder.
  bool Read(std::istream &is) {
    t_.clear();

    // there is no binary/non-binary mode.

    std::string line;
    getline(is, line);  // this will discard the \n, if present.
    if (is.fail()) {
      KALDI_WARN << "BasicVectorHolder::Read, error reading line " << (is.eof()
                                                                       ? "[eof]" : "");
      return false;  // probably eof.  fail in any case.
    }
    const char *white_chars = " \t\n\r\f\v";
    SplitStringToVector(line, white_chars, true, &t_);  // true== omit
    // empty strings e.g. between spaces.
    return true;
  }

  // Read in text format since it's basically a text-mode thing.. doesn't really
  // matter, it would work either way since we ignore the extra '\r'.
  static bool IsReadInBinary() { return false; }

  T &Value() { return t_; }

  void Swap(TokenVectorHolder *other) {
    t_.swap(other->t_);
  }

  bool ExtractRange(const TokenVectorHolder &other,
                    const std::string &range) {
    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    return false;
  }

 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(TokenVectorHolder);
  T t_;
};


//class HtkMatrixHolder {
 //public:
  //typedef std::pair<Matrix<BaseFloat>, HtkHeader> T;

  //HtkMatrixHolder() {}

  //static bool Write(std::ostream &os, bool binary, const T &t) {
    //if (!binary)
      //KALDI_ERR << "Non-binary HTK-format write not supported.";
    //bool ans = WriteHtk(os, t.first, t.second);
    //if (!ans)
      //KALDI_WARN << "Error detected writing HTK-format matrix.";
    //return ans;
  //}

  //void Clear() { t_.first.Resize(0, 0); }

  //// Reads into the holder.
  //bool Read(std::istream &is) {
    //bool ans = ReadHtk(is, &t_.first, &t_.second);
    //if (!ans) {
      //KALDI_WARN << "Error detected reading HTK-format matrix.";
      //return false;
    //}
    //return ans;
  //}

  //// HTK-format matrices only read in binary.
  //static bool IsReadInBinary() { return true; }

  //T &Value() { return t_; }

  //void Swap(HtkMatrixHolder *other) {
    //t_.first.Swap(&(other->t_.first));
    //std::swap(t_.second, other->t_.second);
  //}

  //bool ExtractRange(const HtkMatrixHolder &other,
                    //const std::string &range) {
    //KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    //return false;
  //}
  //// Default destructor.
 //private:
  //KALDI_DISALLOW_COPY_AND_ASSIGN(HtkMatrixHolder);
  //T t_;
//};

// SphinxMatrixHolder can be used to read and write feature files in
// CMU Sphinx format. 13-dimensional big-endian features are assumed.
// The ultimate reference is SphinxBase's source code (for example see
// feat_s2mfc_read() in src/libsphinxbase/feat/feat.c).
// We can't fully automate the detection of machine/feature file endianess
// mismatch here, because for this Sphinx relies on comparing the feature
// file's size with the number recorded in its header. We are working with
// streams, however(what happens if this is a Kaldi archive?). This should
// be no problem, because the usage help of Sphinx' "wave2feat" for example
// says that Sphinx features are always big endian.
// Note: the kFeatDim defaults to 13, see forward declaration in kaldi-holder.h
//template<int kFeatDim> class SphinxMatrixHolder {
 //public:
  //typedef Matrix<BaseFloat> T;

  //SphinxMatrixHolder() {}

  //void Clear() { feats_.Resize(0, 0); }

  //// Writes Sphinx-format features
  //static bool Write(std::ostream &os, bool binary, const T &m) {
    //if (!binary) {
      //KALDI_WARN << "SphinxMatrixHolder can't write Sphinx features in text ";
      //return false;
    //}

    //int32 size = m.NumRows() * m.NumCols();
    //if (MachineIsLittleEndian())
      //KALDI_SWAP4(size);
    //// write the header
    //os.write(reinterpret_cast<char*> (&size), sizeof(size));

    //for (MatrixIndexT i = 0; i < m.NumRows(); i++) {
      //std::vector<float32> tmp(m.NumCols());
      //for (MatrixIndexT j = 0; j < m.NumCols(); j++) {
        //tmp[j] = static_cast<float32>(m(i, j));
        //if (MachineIsLittleEndian())
          //KALDI_SWAP4(tmp[j]);
      //}
      //os.write(reinterpret_cast<char*>(&(tmp[0])),
               //tmp.size() * 4);
    //}
    //return true;
  //}

  //// Reads the features into a Kaldi Matrix
  //bool Read(std::istream &is) {
    //int32 nmfcc;

    //is.read(reinterpret_cast<char*> (&nmfcc), sizeof(nmfcc));
    //if (MachineIsLittleEndian())
      //KALDI_SWAP4(nmfcc);
    //KALDI_VLOG(2) << "#feats: " << nmfcc;
    //int32 nfvec = nmfcc / kFeatDim;
    //if ((nmfcc % kFeatDim) != 0) {
      //KALDI_WARN << "Sphinx feature count is inconsistent with vector length ";
      //return false;
    //}

    //feats_.Resize(nfvec, kFeatDim);
    //for (MatrixIndexT i = 0; i < feats_.NumRows(); i++) {
      //if (sizeof(BaseFloat) == sizeof(float32)) {
        //is.read(reinterpret_cast<char*> (feats_.RowData(i)),
                //kFeatDim * sizeof(float32));
        //if (!is.good()) {
          //KALDI_WARN << "Unexpected error/EOF while reading Sphinx features ";
          //return false;
        //}
        //if (MachineIsLittleEndian()) {
          //for (MatrixIndexT j = 0; j < kFeatDim; j++)
            //KALDI_SWAP4(feats_(i, j));
        //}
      //} else {  // KALDI_DOUBLEPRECISION=1
        //float32 tmp[kFeatDim];
        //is.read(reinterpret_cast<char*> (tmp), sizeof(tmp));
        //if (!is.good()) {
          //KALDI_WARN << "Unexpected error/EOF while reading Sphinx features ";
          //return false;
        //}
        //for (MatrixIndexT j = 0; j < kFeatDim; j++) {
          //if (MachineIsLittleEndian())
            //KALDI_SWAP4(tmp[j]);
          //feats_(i, j) = static_cast<BaseFloat>(tmp[j]);
        //}
      //}
    //}

    //return true;
  //}

  //// Only read in binary
  //static bool IsReadInBinary() { return true; }

  //T &Value() { return feats_; }

  //void Swap(SphinxMatrixHolder *other) {
    //feats_.Swap(&(other->feats_));
  //}

  //bool ExtractRange(const SphinxMatrixHolder &other,
                    //const std::string &range) {
    //KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    //return false;
  //}

 //private:
  //KALDI_DISALLOW_COPY_AND_ASSIGN(SphinxMatrixHolder);
  //T feats_;
//};


/// @} end "addtogroup holders"

}  // end namespace kaldi


#endif  // KALDI_UTIL_KALDI_HOLDER_INL_H_


================================================
FILE: runtime/engine/kaldi/util/kaldi-holder.cc
================================================
// util/kaldi-holder.cc

// Copyright 2009-2011     Microsoft Corporation
//                2016     Xiaohui Zhang

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "util/kaldi-holder.h"
#include "matrix/kaldi-matrix.h"

namespace kaldi {

// Parse matrix range specifier in form r1:r2,c1:c2
// where any of those four numbers can be missing. In those
// cases, the missing number is set either to 0 (for r1 or c1)
// or the value of parameter rows -1 or columns -1 (which
// represent the dimensions of the original matrix) for missing
// r2 or c2, respectively.
// Examples of valid ranges: 0:39,: or :,:3 or :,5:10
bool ParseMatrixRangeSpecifier(const std::string &range,
                         const int rows, const int cols,
                          std::vector<int32> *row_range,
                          std::vector<int32> *col_range) {
  if (range.empty()) {
    KALDI_ERR << "Empty range specifier.";
    return false;
  }
  std::vector<std::string> splits;
  SplitStringToVector(range, ",", false, &splits);
  if (!((splits.size() == 1 && !splits[0].empty()) ||
        (splits.size() == 2  && !splits[0].empty() && !splits[1].empty()))) {
    KALDI_ERR << "Invalid range specifier for matrix: " << range;
    return false;
  }

  bool status = true;

  if (splits[0] != ":")
    status = SplitStringToIntegers(splits[0], ":", false, row_range);

  if (splits.size() == 2 && splits[1] != ":") {
    status = status && SplitStringToIntegers(splits[1], ":", false, col_range);
  }
  if (row_range->size() == 0) {
    row_range->push_back(0);
    row_range->push_back(rows - 1);
  }
  if (col_range->size() == 0) {
    col_range->push_back(0);
    col_range->push_back(cols - 1);
  }

  // Length tolerance of 3 -- 2 to account for edge effects when
  // frame-length is 25ms and frame-shift is 10ms, and 1 for rounding effects
  // since segments are usually retained up to 2 decimal places.
  int32 length_tolerance = 3;
  if (!(status && row_range->size() == 2 && col_range->size() == 2 &&
        row_range->at(0) >= 0 && row_range->at(0) <= row_range->at(1) &&
        row_range->at(1) < rows + length_tolerance &&
        col_range->at(0) >=0 &&
        col_range->at(0) <= col_range->at(1) && col_range->at(1) < cols)) {
    KALDI_ERR << "Invalid range specifier: " << range
              << " for matrix of size " << rows
              << "x" << cols;
    return false;
  }

  if (row_range->at(1) >= rows)
    KALDI_WARN << "Row range " << row_range->at(0) << ":" << row_range->at(1)
               << " goes beyond the number of rows of the "
               << "matrix " << rows;
  return status;
}

/*bool ExtractObjectRange(const GeneralMatrix &input, const std::string &range,
                        GeneralMatrix *output) {
  // We just inspect input's type and forward to the correct implementation
  // if available. For kSparseMatrix, we do just fairly inefficient conversion
  // to a full matrix.
  Matrix<BaseFloat> output_mat;
  if (input.Type() == kFullMatrix) {
    const Matrix<BaseFloat> &in = input.GetFullMatrix();
    ExtractObjectRange(in, range, &output_mat);
  } else if (input.Type() == kCompressedMatrix) {
    const CompressedMatrix &in = input.GetCompressedMatrix();
    ExtractObjectRange(in, range, &output_mat);
  } else {
    KALDI_ASSERT(input.Type() == kSparseMatrix);
    // NOTE: this is fairly inefficient, so if this happens to be bottleneck
    // it should be re-implemented more efficiently.
    Matrix<BaseFloat> input_mat;
    input.GetMatrix(&input_mat);
    ExtractObjectRange(input_mat, range, &output_mat);
  }
  output->Clear();
  output->SwapFullMatrix(&output_mat);
  return true;
}

template<class Real>
bool ExtractObjectRange(const CompressedMatrix &input, const std::string &range,
                        Matrix<Real> *output) {
  std::vector<int32> row_range, col_range;

  if (!ParseMatrixRangeSpecifier(range, input.NumRows(), input.NumCols(),
                                 &row_range, &col_range)) {
    KALDI_ERR << "Could not parse range specifier \"" << range << "\".";
  }

  int32 row_size = std::min(row_range[1], input.NumRows() - 1)
                   - row_range[0] + 1,
        col_size = col_range[1] - col_range[0] + 1;

  output->Resize(row_size, col_size, kUndefined);
  input.CopyToMat(row_range[0], col_range[0], output);
  return true;
}

// template instantiation
template bool ExtractObjectRange(const CompressedMatrix &, const std::string &,
                                 Matrix<float> *);
template bool ExtractObjectRange(const CompressedMatrix &, const std::string &,
                                 Matrix<double> *);

*/
template<class Real>
bool ExtractObjectRange(const Matrix<Real> &input, const std::string &range,
                        Matrix<Real> *output) {
  std::vector<int32> row_range, col_range;

  if (!ParseMatrixRangeSpecifier(range, input.NumRows(), input.NumCols(),
                                 &row_range, &col_range)) {
    KALDI_ERR << "Could not parse range specifier \"" << range << "\".";
  }

  int32 row_size = std::min(row_range[1], input.NumRows() - 1)
                   - row_range[0] + 1,
        col_size = col_range[1] - col_range[0] + 1;
  output->Resize(row_size, col_size, kUndefined);
  output->CopyFromMat(input.Range(row_range[0], row_size,
                                  col_range[0], col_size));
  return true;
}

// template instantiation
template bool ExtractObjectRange(const Matrix<double> &, const std::string &,
                                 Matrix<double> *);
template bool ExtractObjectRange(const Matrix<float> &, const std::string &,
                                 Matrix<float> *);

template<class Real>
bool ExtractObjectRange(const Vector<Real> &input, const std::string &range,
                        Vector<Real> *output) {
  if (range.empty()) {
    KALDI_ERR << "Empty range specifier.";
    return false;
  }
  std::vector<std::string> splits;
  SplitStringToVector(range, ",", false, &splits);
  if (!((splits.size() == 1 && !splits[0].empty()))) {
    KALDI_ERR << "Invalid range specifier for vector: " << range;
    return false;
  }
  std::vector<int32> index_range;
  bool status = true;
  if (splits[0] != ":")
    status = SplitStringToIntegers(splits[0], ":", false, &index_range);

  if (index_range.size() == 0) {
    index_range.push_back(0);
    index_range.push_back(input.Dim() - 1);
  }

  // Length tolerance of 3 -- 2 to account for edge effects when
  // frame-length is 25ms and frame-shift is 10ms, and 1 for rounding effects
  // since segments are usually retained up to 2 decimal places.
  int32 length_tolerance = 3;
  if (!(status && index_range.size() == 2 &&
        index_range[0] >= 0 && index_range[0] <= index_range[1] &&
        index_range[1] < input.Dim() + length_tolerance)) {
    KALDI_ERR << "Invalid range specifier: " << range
              << " for vector of size " << input.Dim();
    return false;
  }

  if (index_range[1] >= input.Dim())
    KALDI_WARN << "Range " << index_range[0] << ":" << index_range[1]
               << " goes beyond the vector dimension " << input.Dim();
  int32 size = std::min(index_range[1], input.Dim() - 1) - index_range[0] + 1;
  output->Resize(size, kUndefined);
  output->CopyFromVec(input.Range(index_range[0], size));
  return true;
}

// template instantiation
template bool ExtractObjectRange(const Vector<double> &, const std::string &,
                                 Vector<double> *);
template bool ExtractObjectRange(const Vector<float> &, const std::string &,
                                 Vector<float> *);

bool ExtractRangeSpecifier(const std::string &rxfilename_with_range,
                           std::string *data_rxfilename,
                           std::string *range) {
  if (rxfilename_with_range.empty() ||
      rxfilename_with_range[rxfilename_with_range.size()-1] != ']')
    KALDI_ERR << "ExtractRangeRspecifier called wrongly.";
  std::vector<std::string> splits;
  SplitStringToVector(rxfilename_with_range, "[", false, &splits);
  if (splits.size() == 2 && !splits[0].empty() && splits[1].size() > 1) {
    *data_rxfilename = splits[0];
    range->assign(splits[1], 0, splits[1].size()-1);
    return true;
  }
  return false;
}

}  // end namespace kaldi


================================================
FILE: runtime/engine/kaldi/util/kaldi-holder.h
================================================
// util/kaldi-holder.h

// Copyright 2009-2011     Microsoft Corporation
//                2016     Johns Hopkins University (author: Daniel Povey)
//                2016     Xiaohui Zhang

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_UTIL_KALDI_HOLDER_H_
#define KALDI_UTIL_KALDI_HOLDER_H_

#include <algorithm>
#include "util/kaldi-io.h"
#include "util/text-utils.h"
#include "matrix/kaldi-vector.h"

namespace kaldi {


// The Table class uses a Holder class to wrap objects, and make them behave
// in a "normalized" way w.r.t. reading and writing, so the Table class can
// be template-ized without too much trouble. Look below this
// comment (search for GenericHolder) to see what it looks like.
//
//  Requirements of the holder class:
//
// They can only contain objects that can be read/written without external
// information; other objects cannot be stored in this type of archive.
//
// In terms of what functions it should have, see GenericHolder below.
// It is just for documentation.
//
// (1) Requirements of the Read and Write functions
//
// The Read and Write functions should have the property that in a longer
// file, if the Read function is started from where the Write function started
// writing, it should go to where the Write function stopped writing, in either
// text or binary mode (but it's OK if it doesn't eat up trailing space).
//
//     [Desirable property: when writing in text mode the output should contain
//      exactly one newline, at the end of the output; this makes it easier to
//      manipulate]
//
//     [Desirable property for classes: the output should just be a binary-mode
//      header (if in binary mode and it's a Kaldi object, or no header
//      othewise), and then the output of Object.Write().  This means that when
//      written to individual files with the scp: type of wspecifier, we can
//      read the individual files in the "normal" Kaldi way by reading the
//      binary header and then the object.]
//
//
// The Write function takes a 'binary' argument.  In general, each object will
// have two formats: text and binary.  However, it's permitted to throw() if
// asked to read in the text format if there is none.  The file will be open, if
// the file system has binary/text modes, in the corresponding mode.  However,
// the object should have a file-mode in which it can read either text or binary
// output.  It announces this via the static IsReadInBinary() function.  This
// will generally be the binary mode and it means that where necessary, in text
// formats, we must ignore \r characters.
//
// Memory requirements: if it allocates memory, the destructor should
// free that memory.  Copying and assignment of Holder objects may be
// disallowed as the Table code never does this.


/// GenericHolder serves to document the requirements of the Holder interface;
/// it's not intended to be used.
template<class SomeType> class GenericHolder {
 public:
  typedef SomeType T;

  /// Must have a constructor that takes no arguments.
  GenericHolder() { }

  /// Write() writes this object of type T.  Possibly also writes a binary-mode
  /// header so that the Read function knows which mode to read in (since the
  /// Read function does not get this information).  It's a static member so we
  /// can write those not inside this class (can use this function with Value()
  /// to write from this class).  The Write method may throw if it cannot write
  /// the object in the given (binary/non-binary) mode.  The holder object can
  /// assume the stream has been opened in the given mode (where relevant).  The
  /// object can write the data how it likes.
  static bool Write(std::ostream &os, bool binary, const T &t);

  /// Reads into the holder.  Must work out from the stream (which will be
  /// opened on Windows in binary mode if the IsReadInBinary() function of this
  /// class returns true, and text mode otherwise) whether the actual data is
  /// binary or not (usually via reading the Kaldi binary-mode header).
  /// We put the responsibility for reading the Kaldi binary-mode header in the
  /// Read function (rather than making the binary mode an argument to this
  /// function), so that for non-Kaldi binary files we don't have to write the
  /// header, which would prevent the file being read by non-Kaldi programs
  /// (e.g. if we write to individual files using an scp).
  /// Read must deallocate any existing data we have here, if applicable (must
  /// not assume the object was newly constructed).
  /// Returns true on success.
  /// If Read() returns false, the contents of this object and hence the value
  /// returned by Value() may be undefined.
  bool Read(std::istream &is);

  /// IsReadInBinary() will return true if the object wants the file to be
  /// opened in binary for reading (if the file system has binary/text modes),
  /// and false otherwise.  Static function.  Kaldi objects always return true
  /// as they always read in binary mode.  Note that we must be able to read, in
  /// this mode, objects written in both text and binary mode by Write (which
  /// may mean ignoring "\r" characters).  I doubt we will ever want this
  /// function to return false.
  static bool IsReadInBinary() { return true; }

  /// Returns the value of the object held here.  Will only
  /// ever be called if Read() has been previously called and it returned
  /// true (so OK to throw exception if no object was read).
  T &Value() { return t_; }  // if t is a pointer, would return *t_;

  /// The Clear() function doesn't have to do anything.  Its purpose is to
  /// allow the object to free resources if they're no longer needed.
  void Clear() { }

  /// This swaps the objects held by *this and *other (preferably a shallow
  /// swap).  Note, this is just an example.  The swap is with the *same type*
  /// of holder, not with some nonexistent base-class (remember, GenericHolder is
  /// an example for documentation, not a base-class).
  void Swap(GenericHolder<T> *other) { std::swap(t_, other->t_); }

  /// At the time of writing this will only do something meaningful
  /// KaldiObjectHolder holding matrix objects, in order to extract a holder
  /// holding a sub-matrix specified by 'range', e.g. [0:3,2:10], like in Matlab
  /// but with zero-based indexing. It returns true with successful extraction
  /// of the range, false if the range was invalid or outside the bounds of the
  /// matrix.  For other types of holder it just throws an error.
  bool ExtractRange(const GenericHolder<T> &other, const std::string &range) {
    KALDI_ERR << "ExtractRange is not defined for this type of holder.";
    return false;
  }

  /// If the object held pointers, the destructor would free them.
  ~GenericHolder() { }

 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(GenericHolder);
  T t_;  // t_ may alternatively be of type T*.
};


// See kaldi-holder-inl.h for examples of some actual Holder
// classes and templates.


// The following two typedefs should probably be in their own file, but they're
// here until there are enough of them to warrant their own header.


/// \addtogroup holders
/// @{

/// KaldiObjectHolder works for Kaldi objects that have the "standard" Read
/// and Write functions, and a copy constructor.
template<class KaldiType> class KaldiObjectHolder;

/// BasicHolder is valid for float, double, bool, and integer
/// types.  There will be a compile time error otherwise, because
/// we make sure that the {Write, Read}BasicType functions do not
/// get instantiated for other types.
template<class BasicType> class BasicHolder;


// A Holder for a vector of basic types, e.g.
// std::vector<int32>, std::vector<float>, and so on.
// Note: a basic type is defined as a type for which ReadBasicType
// and WriteBasicType are implemented, i.e. integer and floating
// types, and bool.
template<class BasicType> class BasicVectorHolder;


// A holder for vectors of vectors of basic types, e.g.
// std::vector<std::vector<int32> >, and so on.
// Note: a basic type is defined as a type for which ReadBasicType
// and WriteBasicType are implemented, i.e. integer and floating
// types, and bool.
template<class BasicType> class BasicVectorVectorHolder;

// A holder for vectors of pairs of basic types, e.g.
// std::vector<std::pair<int32, int32> >, and so on.
// Note: a basic type is defined as a type for which ReadBasicType
// and WriteBasicType are implemented, i.e. integer and floating
// types, and bool.  Text format is (e.g. for integers),
// "1 12 ; 43 61 ; 17 8 \n"
template<class BasicType> class BasicPairVectorHolder;

/// We define a Token (not a typedef, just a word) as a nonempty, printable,
/// whitespace-free std::string.  The binary and text formats here are the same
/// (newline-terminated) and as such we don't bother with the binary-mode
/// headers.
class TokenHolder;

/// Class TokenVectorHolder is a Holder class for vectors of Tokens
/// (T == std::string).
class TokenVectorHolder;

/// A class for reading/writing HTK-format matrices.
/// T == std::pair<Matrix<BaseFloat>, HtkHeader>
//class HtkMatrixHolder;

/// A class for reading/writing Sphinx format matrices.
//template<int kFeatDim = 13> class SphinxMatrixHolder;

/// This templated function exists so that we can write .scp files with
/// 'object ranges' specified: the canonical example is a [first:last] range
/// of rows of a matrix, or [first-row:last-row,first-column,last-column]
/// of a matrix.  We can also support [begin-time:end-time] of a wave
/// file.  The string 'range' is whatever is in the square brackets; it is
/// parsed inside this function.
/// This function returns true if the partial object was successfully extracted,
/// and false if there was an error such as an invalid range.
/// The generic version of this function just fails; we overload the template
/// whenever we need it for a specific class.
template <class T>
bool ExtractObjectRange(const T &input, const std::string &range, T *output) {
  KALDI_ERR << "Ranges not supported for objects of this type.";
  return false;
}

/// The template is specialized with a version that actually does something,
/// for types Matrix<float> and Matrix<double>.  We can later add versions of
/// this template for other types, such as Vector, which can meaningfully
/// have ranges extracted.
template <class Real>
bool ExtractObjectRange(const Matrix<Real> &input, const std::string &range,
                        Matrix<Real> *output);

/// The template is specialized types Vector<float> and Vector<double>.
template <class Real>
bool ExtractObjectRange(const Vector<Real> &input, const std::string &range,
                        Vector<Real> *output);

/// GeneralMatrix is always of type BaseFloat
//bool ExtractObjectRange(const GeneralMatrix &input, const std::string &range,
 //                       GeneralMatrix *output);

/// CompressedMatrix is always of the type BaseFloat but it is more
/// efficient to provide template as it uses CompressedMatrix's own
/// conversion to Matrix<Real>
//template <class Real>
//bool ExtractObjectRange(const CompressedMatrix &input, const std::string &range,
 //                       Matrix<Real> *output);

// In SequentialTableReaderScriptImpl and RandomAccessTableReaderScriptImpl, for
// cases where the scp contained 'range specifiers' (things in square brackets
// identifying parts of objects like matrices), use this function to separate
// the input string 'rxfilename_with_range' (e.g "1.ark:100[1:2,2:10]") into the data_rxfilename
// (e.g. "1.ark:100") and the optional range specifier which will be everything
// inside the square brackets.  It returns true if everything seems OK, and
// false if for example the string contained more than one '['.  This function
// should only be called if 'line' ends in ']', otherwise it is an error.
bool ExtractRangeSpecifier(const std::string &rxfilename_with_range,
                           std::string *data_rxfilename,
                           std::string *range);


/// @} end "addtogroup holders"


}  // end namespace kaldi

#include "util/kaldi-holder-inl.h"

#endif  // KALDI_UTIL_KALDI_HOLDER_H_


================================================
FILE: runtime/engine/kaldi/util/kaldi-io-inl.h
================================================
// util/kaldi-io-inl.h

// Copyright 2009-2011 Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_UTIL_KALDI_IO_INL_H_
#define KALDI_UTIL_KALDI_IO_INL_H_

#include<string>

namespace kaldi {

bool Input::Open(const std::string &rxfilename, bool *binary) {
  return OpenInternal(rxfilename, true, binary);
}

bool Input::OpenTextMode(const std::string &rxfilename) {
  return OpenInternal(rxfilename, false, NULL);
}

bool Input::IsOpen() {
  return impl_ != NULL;
}

bool Output::IsOpen() {
  return impl_ != NULL;
}


}  // end namespace kaldi.


#endif  // KALDI_UTIL_KALDI_IO_INL_H_


================================================
FILE: runtime/engine/kaldi/util/kaldi-io.cc
================================================
// util/kaldi-io.cc

// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
//                2016  Xiaohui Zhang

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "util/kaldi-io.h"
#include <errno.h>
#include <cstdlib>
#include "base/kaldi-math.h"
#include "util/text-utils.h"
#include "util/parse-options.h"
#include "util/kaldi-holder.h"
#include "util/kaldi-pipebuf.h"
#include "util/kaldi-table.h"  // for Classify{W,R}specifier
#include <stdio.h>
#include <stdlib.h>

#ifdef KALDI_CYGWIN_COMPAT
#include "util/kaldi-cygwin-io-inl.h"
#define MapOsPath(x) MapCygwinPath(x)
#else  // KALDI_CYGWIN_COMPAT
#define MapOsPath(x) x
#endif  // KALDI_CYGWIN_COMPAT


#if defined(_MSC_VER) 
static FILE *popen(const char* command, const char* mode) {
#ifdef KALDI_CYGWIN_COMPAT
  return kaldi::CygwinCompatPopen(command, mode);
#else  // KALDI_CYGWIN_COMPAT
  return _popen(command, mode);
#endif  // KALDI_CYGWIN_COMPAT
}
#endif  // _MSC_VER

namespace kaldi {

#ifndef _MSC_VER  // on VS, we don't need this type.
// could replace basic_pipebuf<char> with stdio_filebuf<char> on some platforms.
// Would mean we could use less of our own code.
typedef basic_pipebuf<char> PipebufType;
#endif
}

namespace kaldi {

std::string PrintableRxfilename(const std::string &rxfilename) {
  if (rxfilename == "" || rxfilename == "-") {
    return "standard input";
  } else {
    // If this call to Escape later causes compilation issues,
    // just replace it with "return rxfilename"; it's only a
    // pretty-printing issue.
    return ParseOptions::Escape(rxfilename);
  }
}


std::string PrintableWxfilename(const std::string &wxfilename) {
  if (wxfilename == "" || wxfilename == "-") {
    return "standard output";
  } else {
    // If this call to Escape later causes compilation issues,
    // just replace it with "return wxfilename"; it's only a
    // pretty-printing issue.
    return ParseOptions::Escape(wxfilename);
  }
}


OutputType ClassifyWxfilename(const std::string &filename) {
  const char *c = filename.c_str();
  size_t length = filename.length();
  char first_char = c[0],
      last_char = (length == 0 ? '\0' : c[filename.length()-1]);

  // if 'filename' is "" or "-", return kStandardOutput.
  if (length == 0 || (length == 1 && first_char == '-'))
    return kStandardOutput;
  else if (first_char == '|') return kPipeOutput;  // An output pipe like "|blah".
  else if (isspace(first_char) || isspace(last_char) || last_char == '|') {
      return kNoOutput;  // Leading or trailing space: can't interpret this.
                         // Final '|' would represent an input pipe, not an
                         // output pipe.
  } else if ((first_char == 'a' || first_char == 's') &&
             strchr(c, ':') != NULL &&
             (ClassifyWspecifier(filename, NULL, NULL, NULL) != kNoWspecifier ||
              ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
    // e.g. ark:something or scp:something... this is almost certainly a
    // scripting error, so call it an error rather than treating it as a file.
    // In practice in modern kaldi scripts all (r,w)filenames begin with "ark"
    // or "scp", even though technically speaking options like "b", "t", "s" or
    // "cs" can appear before the ark or scp, like "b,ark".  For efficiency,
    // and because this code is really just a nicety to catch errors earlier
    // than they would otherwise be caught, we only call those extra functions
    // for filenames beginning with 'a' or 's'.
    return kNoOutput;
  } else if (isdigit(last_char)) {
    // This could be a file, but we have to see if it's an offset into a file
    // (like foo.ark:4314328), which is not allowed for writing (but is
    // allowed for reaching).  This eliminates some things which would be
    // valid UNIX filenames but are not allowed by Kaldi.  (Even if we allowed
    // such filenames for writing, we woudln't be able to correctly read them).
    const char *d = c + length - 1;
    while (isdigit(*d) && d > c) d--;
    if (*d == ':') return kNoOutput;
    // else it could still be a filename; continue to the next check.
  }

  // At this point it matched no other pattern so we assume a filename, but we
  // check for internal '|' as it's a common source of errors to have pipe
  // commands without the pipe in the right place.  Say that it can't be
  // classified.
  if (strchr(c, '|') != NULL) {
    KALDI_WARN << "Trying to classify wxfilename with pipe symbol in the"
        " wrong place (pipe without | at the beginning?): " <<
        filename;
    return kNoOutput;
  }
  return kFileOutput;  // It matched no other pattern: assume it's a filename.
}


InputType ClassifyRxfilename(const std::string &filename) {
  const char *c = filename.c_str();
  size_t length = filename.length();
  char first_char = c[0],
      last_char = (length == 0 ? '\0' : c[filename.length()-1]);

  // if 'filename' is "" or "-", return kStandardInput.
  if (length == 0 || (length == 1 && first_char == '-')) {
    return kStandardInput;
  } else if (first_char == '|') {
    return kNoInput;  // An output pipe like "|blah": not
                      // valid for input.
  } else if (last_char == '|') {
    return kPipeInput;
  } else if (isspace(first_char) || isspace(last_char)) {
    return kNoInput;  // We don't allow leading or trailing space in a filename.
  } else if ((first_char == 'a' || first_char == 's') &&
             strchr(c, ':') != NULL &&
            (ClassifyWspecifier(filename, NULL, NULL, NULL) != kNoWspecifier ||
             ClassifyRspecifier(filename, NULL, NULL) != kNoRspecifier)) {
    // e.g. ark:something or scp:something... this is almost certainly a
    // scripting error, so call it an error rather than treating it as a file.
    // In practice in modern kaldi scripts all (r,w)filenames begin with "ark"
    // or "scp", even though technically speaking options like "b", "t", "s" or
    // "cs" can appear before the ark or scp, like "b,ark".  For efficiency,
    // and because this code is really just a nicety to catch errors earlier
    // than they would otherwise be caught, we only call those extra functions
    // for filenames beginning with 'a' or 's'.
    return kNoInput;
  } else if (isdigit(last_char)) {
    const char *d = c + length - 1;
    while (isdigit(*d) && d > c) d--;
    if (*d == ':') return kOffsetFileInput;  // Filename is like
                                             // some_file:12345
    // otherwise it could still be a filename; continue to the next check.
  }


  // At this point it matched no other pattern so we assume a filename, but
  // we check for '|' as it's a common source of errors to have pipe
  // commands without the pipe in the right place.  Say that it can't be
  // classified in this case.
  if (strchr(c, '|') != NULL) {
    KALDI_WARN << "Trying to classify rxfilename with pipe symbol in the"
        " wrong place (pipe without | at the end?): " << filename;
    return kNoInput;
  }
  return kFileInput;  // It matched no other pattern: assume it's a filename.
}

class OutputImplBase {
 public:
  // Open will open it as a file (no header), and return true
  // on success.  It cannot be called on an already open stream.
  virtual bool Open(const std::string &filename, bool binary) = 0;
  virtual std::ostream &Stream() = 0;
  virtual bool Close() = 0;
  virtual ~OutputImplBase() { }
};


class FileOutputImpl: public OutputImplBase {
 public:
  virtual bool Open(const std::string &filename, bool binary) {
    if (os_.is_open()) KALDI_ERR << "FileOutputImpl::Open(), "
                                << "open called on already open file.";
    filename_ = filename;
    os_.open(MapOsPath(filename_).c_str(),
             binary ? std::ios_base::out | std::ios_base::binary
                    : std::ios_base::out);
    return os_.is_open();
  }

  virtual std::ostream &Stream() {
    if (!os_.is_open())
      KALDI_ERR << "FileOutputImpl::Stream(), file is not open.";
      // I believe this error can only arise from coding error.
    return os_;
  }

  virtual bool Close() {
    if (!os_.is_open())
      KALDI_ERR << "FileOutputImpl::Close(), file is not open.";
    // I believe this error can only arise from coding error.
    os_.close();
    return !(os_.fail());
  }
  virtual ~FileOutputImpl() {
    if (os_.is_open()) {
      os_.close();
      if (os_.fail())
        KALDI_ERR << "Error closing output file " << filename_;
    }
  }
 private:
  std::string filename_;
  std::ofstream os_;
};

class StandardOutputImpl: public OutputImplBase {
 public:
  StandardOutputImpl(): is_open_(false) { }

  virtual bool Open(const std::string &filename, bool binary) {
    if (is_open_) KALDI_ERR << "StandardOutputImpl::Open(), "
                     "open called on already open file.";
#ifdef _MSC_VER
    _setmode(_fileno(stdout), binary ? _O_BINARY : _O_TEXT);
#endif
    is_open_ = std::cout.good();
    return is_open_;
  }

  virtual std::ostream &Stream() {
    if (!is_open_)
      KALDI_ERR << "StandardOutputImpl::Stream(), object not initialized.";
    // I believe this error can only arise from coding error.
    return std::cout;
  }

  virtual bool Close() {
    if (!is_open_)
      KALDI_ERR << "StandardOutputImpl::Close(), file is not open.";
    is_open_ = false;
    std::cout << std::flush;
    return !(std::cout.fail());
  }
  virtual ~StandardOutputImpl() {
    if (is_open_) {
      std::cout << std::flush;
      if (std::cout.fail())
        KALDI_ERR << "Error writing to standard output";
    }
  }
 private:
  bool is_open_;
};

class PipeOutputImpl: public OutputImplBase {
 public:
  PipeOutputImpl(): f_(NULL), os_(NULL) { }

  virtual bool Open(const std::string &wxfilename, bool binary) {
    filename_ = wxfilename;
    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
    KALDI_ASSERT(wxfilename.length() != 0 && wxfilename[0] == '|');  // should
    // start with '|'
    std::string cmd_name(wxfilename, 1);
#if defined(_MSC_VER) || defined(__CYGWIN__)
    f_ = popen(cmd_name.c_str(), (binary ? "wb" : "w"));
#else
    f_ = popen(cmd_name.c_str(), "w");
#endif
    if (!f_) {  // Failure.
      KALDI_WARN << "Failed opening pipe for writing, command is: "
                 << cmd_name << ", errno is " << strerror(errno);
      return false;
    } else {
#ifndef _MSC_VER
      fb_ = new PipebufType(f_,  // Using this constructor won't make the
                                 // destructor try to close the stream when
                                 // we're done.
                                  (binary ? std::ios_base::out|
                                   std::ios_base::binary
                                   :std::ios_base::out));
      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
      os_ = new std::ostream(fb_);
#else
      os_ = new std::ofstream(f_);
#endif
      return os_->good();
    }
  }

  virtual std::ostream &Stream() {
    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Stream(),"
                                  " object not initialized.";
    // I believe this error can only arise from coding error.
    return *os_;
  }

  virtual bool Close() {
    if (os_ == NULL) KALDI_ERR << "PipeOutputImpl::Close(), file is not open.";
    bool ok = true;
    os_->flush();
    if (os_->fail()) ok = false;
    delete os_;
    os_ = NULL;
    int status;
#ifdef _MSC_VER
    status = _pclose(f_);
#else
    status = pclose(f_);
#endif
    if (status)
      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
                 << status;
    f_ = NULL;
#ifndef _MSC_VER
    delete fb_;
    fb_ = NULL;
#endif
    return ok;
  }
  virtual ~PipeOutputImpl() {
    if (os_) {
      if (!Close())
        KALDI_ERR << "Error writing to pipe " << PrintableWxfilename(filename_);
    }
  }
 private:
  std::string filename_;
  FILE *f_;
#ifndef _MSC_VER
  PipebufType *fb_;
#endif
  std::ostream *os_;
};


class InputImplBase {
 public:
  // Open will open it as a file, and return true on success.
  // May be called twice only for kOffsetFileInput (otherwise,
  // if called twice, we just create a new Input object, to avoid
  // having to deal with the extra hassle of reopening with the
  // same object.
  // Note that we will to call Open with true (binary) for
  // for text-mode Kaldi files; the only actual text-mode input
  // is for non-Kaldi files.
  virtual bool Open(const std::string &filename, bool binary) = 0;
  virtual std::istream &Stream() = 0;
  virtual int32 Close() = 0;  // We only need to check failure in the case of
                              // kPipeInput.
  // on close for input streams.
  virtual InputType MyType() = 0;  // Because if it's kOffsetFileInput, we may
                                   // call Open twice
  // (has efficiency benefits).

  virtual ~InputImplBase() { }
};

class FileInputImpl: public InputImplBase {
 public:
  virtual bool Open(const std::string &filename, bool binary) {
    if (is_.is_open()) KALDI_ERR << "FileInputImpl::Open(), "
                                << "open called on already open file.";
    is_.open(MapOsPath(filename).c_str(),
             binary ? std::ios_base::in | std::ios_base::binary
                    : std::ios_base::in);
    return is_.is_open();
  }

  virtual std::istream &Stream() {
    if (!is_.is_open())
      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
    // I believe this error can only arise from coding error.
    return is_;
  }

  virtual int32 Close() {
    if (!is_.is_open())
      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
    // I believe this error can only arise from coding error.
    is_.close();
    // Don't check status.
    return 0;
  }

  virtual InputType MyType() { return kFileInput; }

  virtual ~FileInputImpl() {
    // Stream will automatically be closed, and we don't care about
    // whether it fails.
  }
 private:
  std::ifstream is_;
};


class StandardInputImpl: public InputImplBase {
 public:
  StandardInputImpl(): is_open_(false) { }

  virtual bool Open(const std::string &filename, bool binary) {
    if (is_open_) KALDI_ERR << "StandardInputImpl::Open(), "
                     "open called on already open file.";
    is_open_ = true;
#ifdef _MSC_VER
    _setmode(_fileno(stdin), binary ? _O_BINARY : _O_TEXT);
#endif
    return true;  // Don't check good() because would be false if
    // eof, which may be valid input.
  }

  virtual std::istream &Stream() {
    if (!is_open_)
      KALDI_ERR << "StandardInputImpl::Stream(), object not initialized.";
    // I believe this error can only arise from coding error.
    return std::cin;
  }

  virtual InputType MyType() { return kStandardInput; }

  virtual int32 Close() {
    if (!is_open_) KALDI_ERR << "StandardInputImpl::Close(), file is not open.";
    is_open_ = false;
    return 0;
  }
  virtual ~StandardInputImpl() { }
 private:
  bool is_open_;
};

class PipeInputImpl: public InputImplBase {
 public:
  PipeInputImpl(): f_(NULL), is_(NULL) { }

  virtual bool Open(const std::string &rxfilename, bool binary) {
    filename_ = rxfilename;
    KALDI_ASSERT(f_ == NULL);  // Make sure closed.
    KALDI_ASSERT(rxfilename.length() != 0 &&
           rxfilename[rxfilename.length()-1] == '|');  // should end with '|'
    std::string cmd_name(rxfilename, 0, rxfilename.length()-1);
#if defined(_MSC_VER) || defined(__CYGWIN__)
    f_ = popen(cmd_name.c_str(), (binary ? "rb" : "r"));
#else
    f_ = popen(cmd_name.c_str(), "r");
#endif

    if (!f_) {  // Failure.
      KALDI_WARN << "Failed opening pipe for reading, command is: "
                 << cmd_name << ", errno is " << strerror(errno);
      return false;
    } else {
#ifndef _MSC_VER
      fb_ = new PipebufType(f_,  // Using this constructor won't lead the
                                 // destructor to close the stream.
                                 (binary ? std::ios_base::in|
                                  std::ios_base::binary
                                  :std::ios_base::in));
      KALDI_ASSERT(fb_ != NULL);  // or would be alloc error.
      is_ = new std::istream(fb_);
#else
      is_ = new std::ifstream(f_);
#endif
      if (is_->fail() || is_->bad()) return false;
      if (is_->eof()) {
        KALDI_WARN << "Pipe opened with command "
                   << PrintableRxfilename(rxfilename)
                   << " is empty.";
        // don't return false: empty may be valid.
      }
      return true;
    }
  }

  virtual std::istream &Stream() {
    if (is_ == NULL)
      KALDI_ERR << "PipeInputImpl::Stream(), object not initialized.";
    // I believe this error can only arise from coding error.
    return *is_;
  }

  virtual int32 Close() {
    if (is_ == NULL)
      KALDI_ERR << "PipeInputImpl::Close(), file is not open.";
    delete is_;
    is_ = NULL;
    int32 status;
#ifdef _MSC_VER
    status = _pclose(f_);
#else
    status = pclose(f_);
#endif
    if (status)
      KALDI_WARN << "Pipe " << filename_ << " had nonzero return status "
                 << status;
    f_ = NULL;
#ifndef _MSC_VER
    delete fb_;
    fb_ = NULL;
#endif
    return status;
  }
  virtual ~PipeInputImpl() {
    if (is_)
      Close();
  }
  virtual InputType MyType() { return kPipeInput; }
 private:
  std::string filename_;
  FILE *f_;
#ifndef _MSC_VER
  PipebufType *fb_;
#endif
  std::istream *is_;
};

/*
#else

// Just have an empty implementation of the pipe input that crashes if
// called.
class PipeInputImpl: public InputImplBase {
 public:
  PipeInputImpl() { KALDI_ASSERT(0 && "Pipe input not yet supported on this
  platform."); }
  virtual bool Open(const std::string, bool) { return 0; }
  virtual std::istream &Stream() const { return NULL; }
  virtual void Close() {}
  virtual InputType MyType() { return kPipeInput; }
};

#endif
*/

class OffsetFileInputImpl: public InputImplBase {
  // This class is a bit more complicated than the

 public:
  // splits a filename like /my/file:123 into /my/file and the
  // number 123.  Crashes if not this format.
  static void SplitFilename(const std::string &rxfilename,
                            std::string *filename,
                            size_t *offset) {
    size_t pos = rxfilename.find_last_of(':');
    KALDI_ASSERT(pos != std::string::npos);  // would indicate error in calling
    // code, as the filename is supposed to be of the correct form at this
    // point.
    *filename = std::string(rxfilename, 0, pos);
    std::string number(rxfilename, pos+1);
    bool ans = ConvertStringToInteger(number, offset);
    if (!ans)
      KALDI_ERR << "Cannot get offset from filename " << rxfilename
                << " (possibly you compiled in 32-bit and have a >32-bit"
                << " byte offset into a file; you'll have to compile 64-bit.";
  }

  bool Seek(size_t offset) {
    size_t cur_pos = is_.tellg();
    if (cur_pos == offset) return true;
    else if (cur_pos<offset && cur_pos+100 > offset) {
      // We're close enough that it may be faster to just
      // read that data, rather than seek.
      for (size_t i = cur_pos; i < offset; i++)
        is_.get();
      return (is_.tellg() == std::streampos(offset));
    }
    // Try to actually seek.
    is_.seekg(offset, std::ios_base::beg);
    if (is_.fail()) {  // failbit or badbit is set [error happened]
      is_.close();
      return false;  // failure.
    } else {
      is_.clear();  // Clear any failure bits (e.g. eof).
      return true;  // success.
    }
  }

  // This Open routine is unusual in that it is designed to work even
  // if it was already open.  This for efficiency when seeking multiple
  // times.
  virtual bool Open(const std::string &rxfilename, bool binary) {
    if (is_.is_open()) {
      // We are opening when we have an already-open file.
      // We may have to seek within this file, or else close it and
      // open a different one.
      std::string tmp_filename;
      size_t offset;
      SplitFilename(rxfilename, &tmp_filename, &offset);
      if (tmp_filename == filename_ && binary == binary_) {  // Just seek
        is_.clear();  // clear fail bit, etc.
        return Seek(offset);
      } else {
        is_.close();  // don't bother checking error status of is_.
        filename_ = tmp_filename;
        is_.open(MapOsPath(filename_).c_str(),
                 binary ? std::ios_base::in | std::ios_base::binary
                        : std::ios_base::in);
        if (!is_.is_open()) return false;
        else
          return Seek(offset);
      }
    } else {
      size_t offset;
      SplitFilename(rxfilename, &filename_, &offset);
      binary_ = binary;
      is_.open(MapOsPath(filename_).c_str(),
                binary ? std::ios_base::in | std::ios_base::binary
                      : std::ios_base::in);
      if (!is_.is_open()) return false;
      else
        return Seek(offset);
    }
  }

  virtual std::istream &Stream() {
    if (!is_.is_open())
      KALDI_ERR << "FileInputImpl::Stream(), file is not open.";
    // I believe this error can only arise from coding error.
    return is_;
  }

  virtual int32 Close() {
    if (!is_.is_open())
      KALDI_ERR << "FileInputImpl::Close(), file is not open.";
    // I believe this error can only arise from coding error.
    is_.close();
    // Don't check status.
    return 0;
  }

  virtual InputType MyType() { return kOffsetFileInput; }

  virtual ~OffsetFileInputImpl() {
    // Stream will automatically be closed, and we don't care about
    // whether it fails.
  }
 private:
  std::string filename_;  // the actual filename
  bool binary_;  // true if was opened in binary mode.
  std::ifstream is_;
};


Output::Output(const std::string &wxfilename, bool binary,
               bool write_header):impl_(NULL) {
  if (!Open(wxfilename, binary, write_header)) {
    if (impl_) {
      delete impl_;
      impl_ = NULL;
    }
    KALDI_ERR << "Error opening output stream " <<
        PrintableWxfilename(wxfilename);
  }
}

bool Output::Close() {
  if (!impl_) {
    return false;  // error to call Close if not open.
  } else {
    bool ans = impl_->Close();
    delete impl_;
    impl_ = NULL;
    return ans;
  }
}

Output::~Output() {
  if (impl_) {
    bool ok = impl_->Close();
    delete impl_;
    impl_ = NULL;
    if (!ok)
      KALDI_ERR << "Error closing output file "
                << PrintableWxfilename(filename_)
                << (ClassifyWxfilename(filename_) == kFileOutput ?
                    " (disk full?)" : "");
  }
}

std::ostream &Output::Stream() {  // will throw if not open; else returns
  // stream.
  if (!impl_) KALDI_ERR << "Output::Stream() called but not open.";
  return impl_->Stream();
}

bool Output::Open(const std::string &wxfn, bool binary, bool header) {
  if (IsOpen()) {
    if (!Close()) {  // Throw here rather than return status, as it's an error
      // about something else: if the user wanted to avoid the exception he/she
      // could have called Close().
      KALDI_ERR << "Output::Open(), failed to close output stream: "
                << PrintableWxfilename(filename_);
    }
  }

  filename_ = wxfn;

  OutputType type = ClassifyWxfilename(wxfn);
  KALDI_ASSERT(impl_ == NULL);

  if (type ==  kFileOutput) {
    impl_ = new FileOutputImpl();
  } else if (type == kStandardOutput) {
    impl_ = new StandardOutputImpl();
  } else if (type == kPipeOutput) {
    impl_ = new PipeOutputImpl();
  } else {  // type == kNoOutput
    KALDI_WARN << "Invalid output filename format "<<
        PrintableWxfilename(wxfn);
    return false;
  }
  if (!impl_->Open(wxfn, binary)) {
    delete impl_;
    impl_ = NULL;
    return false;  // failed to open.
  } else {  // successfully opened it.
    if (header) {
      InitKaldiOutputStream(impl_->Stream(), binary);
      bool ok = impl_->Stream().good();  // still OK?
      if (!ok) {
        delete impl_;
        impl_ = NULL;
        return false;
      }
      return true;
    } else {
      return true;
    }
  }
}


Input::Input(const std::string &rxfilename, bool *binary): impl_(NULL) {
  if (!Open(rxfilename, binary)) {
    KALDI_ERR << "Error opening input stream "
              << PrintableRxfilename(rxfilename);
  }
}

int32 Input::Close() {
  if (impl_) {
    int32 ans = impl_->Close();
    delete impl_;
    impl_ = NULL;
    return ans;
  } else {
    return 0;
  }
}

bool Input::OpenInternal(const std::string &rxfilename,
                         bool file_binary,
                         bool *contents_binary) {
  InputType type = ClassifyRxfilename(rxfilename);
  if (IsOpen()) {
    // May have to close the stream first.
    if (type == kOffsetFileInput && impl_->MyType() == kOffsetFileInput) {
      // We want to use the same object to Open... this is in case
      // the files are the same, so we can just seek.
      if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
        // always open in binary.
        delete impl_;
        impl_ = NULL;
        return false;
      }
      // read the binary header, if requested.
      if (contents_binary != NULL)
        return InitKaldiInputStream(impl_->Stream(), contents_binary);
      else
        return true;
    } else {
      Close();
      // and fall through to code below which actually opens the file.
    }
  }
  if (type ==  kFileInput) {
    impl_ = new FileInputImpl();
  } else if (type == kStandardInput) {
    impl_ = new StandardInputImpl();
  } else if (type == kPipeInput) {
    impl_ = new PipeInputImpl();
  } else if (type == kOffsetFileInput) {
    impl_ = new OffsetFileInputImpl();
  } else {  // type == kNoInput
    KALDI_WARN << "Invalid input filename format "<<
        PrintableRxfilename(rxfilename);
    return false;
  }
  if (!impl_->Open(rxfilename, file_binary)) {  // true is binary mode--
    // always read in binary.
    delete impl_;
    impl_ = NULL;
    return false;
  }
  if (contents_binary != NULL)
    return InitKaldiInputStream(impl_->Stream(), contents_binary);
  else
    return true;
}


Input::~Input() { if (impl_) Close(); }


std::istream &Input::Stream() {
  if (!IsOpen()) KALDI_ERR << "Input::Stream(), not open.";
  return impl_->Stream();
}


template <> void ReadKaldiObject(const std::string &filename,
                                 Matrix<float> *m) {
  if (!filename.empty() && filename[filename.size() - 1] == ']') {
    // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
    // (the bit in square brackets is the range).
    std::string rxfilename, range;
    if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
      KALDI_ERR << "Could not make sense of possible range specifier in filename "
                << "while reading matrix: " << filename;
    }
    Matrix<float> temp;
    bool binary_in;
    Input ki(rxfilename, &binary_in);
    temp.Read(ki.Stream(), binary_in);
    if (!ExtractObjectRange(temp, range, m)) {
      KALDI_ERR << "Error extracting range of object: " << filename;
    }
  } else {
    // The normal case, there is no range.
    bool binary_in;
    Input ki(filename, &binary_in);
    m->Read(ki.Stream(), binary_in);
  }
}

template <> void ReadKaldiObject(const std::string &filename,
                                 Matrix<double> *m) {
  if (!filename.empty() && filename[filename.size() - 1] == ']') {
    // This filename seems to have a 'range'... like foo.ark:4312423[20:30].
    // (the bit in square brackets is the range).
    std::string rxfilename, range;
    if (!ExtractRangeSpecifier(filename, &rxfilename, &range)) {
      KALDI_ERR << "Could not make sense of possible range specifier in filename "
                << "while reading matrix: " << filename;
    }
    Matrix<double> temp;
    bool binary_in;
    Input ki(rxfilename, &binary_in);
    temp.Read(ki.Stream(), binary_in);
    if (!ExtractObjectRange(temp, range, m)) {
      KALDI_ERR << "Error extracting range of object: " << filename;
    }
  } else {
    // The normal case, there is no range.
    bool binary_in;
    Input ki(filename, &binary_in);
    m->Read(ki.Stream(), binary_in);
  }
}


}  // end namespace kaldi


================================================
FILE: runtime/engine/kaldi/util/kaldi-io.h
================================================
// util/kaldi-io.h

// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
//                2016  Xiaohui Zhang

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_UTIL_KALDI_IO_H_
#define KALDI_UTIL_KALDI_IO_H_

#ifdef _MSC_VER
# include <fcntl.h>
# include <io.h>
#endif
#include <cctype>  // For isspace.
#include <limits>
#include <string>
#include "base/kaldi-common.h"
#include "matrix/kaldi-matrix.h"


namespace kaldi {

class OutputImplBase;  // Forward decl; defined in a .cc file
class InputImplBase;  // Forward decl; defined in a .cc file

/// \addtogroup io_group
/// @{

// The Output and Input classes handle stream-opening for "extended" filenames
// that include actual files, standard-input/standard-output, pipes, and
// offsets into actual files.  They also handle reading and writing the
// binary-mode headers for Kaldi files, where applicable.  The classes have
// versions of the Open routines that throw and do not throw, depending whether
// the calling code wants to catch the errors or not; there are also versions
// that write (or do not write) the Kaldi binary-mode header that says if it's
// binary mode.  Generally files that contain Kaldi objects will have the header
// on, so we know upon reading them whether they have the header.  So you would
// use the OpenWithHeader routines for these (or the constructor); but other
// types of objects (e.g. FSTs) would have files without a header so you would
// use OpenNoHeader.

// We now document the types of extended filenames that we use.
//
// A "wxfilename"  is an extended filename for writing. It can take three forms:
// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My
//                        Documents\\boo"
//          (whatever the actual file-system interprets)
// (2) Standard output:  "" or "-"
// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
//
//
// A "rxfilename" is an extended filename for reading.  It can take four forms:
// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
// (2) Standard input: "" or "-"
// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
//   [these are created by the Table and TableWriter classes; I may also write
//    a program that creates them for arbitrary files]
//


// Typical usage:
// ...
// bool binary;
// MyObject.Write(Output(some_filename, binary).Stream(), binary);
//
// ... more extensive example:
// {
//    Output ko(some_filename, binary);
//    MyObject1.Write(ko.Stream(), binary);
//    MyObject2.Write(ko.Stream(), binary);
// }


enum OutputType {
  kNoOutput,
  kFileOutput,
  kStandardOutput,
  kPipeOutput
};

/// ClassifyWxfilename interprets filenames as follows:
///  - kNoOutput: invalid filenames (leading or trailing space, things that look
///     like wspecifiers and rspecifiers or like pipes to read from with leading
///     |.
///  - kFileOutput: Normal filenames
///  - kStandardOutput: The empty string or "-", interpreted as standard output
///  - kPipeOutput: pipes, e.g. "| gzip -c > /tmp/abc.gz"
OutputType ClassifyWxfilename(const std::string &wxfilename);

enum InputType {
  kNoInput,
  kFileInput,
  kStandardInput,
  kOffsetFileInput,
  kPipeInput
};

/// ClassifyRxfilenames interprets filenames for reading as follows:
///  - kNoInput: invalid filenames (leading or trailing space, things that
///       look like wspecifiers and rspecifiers or pipes to write to
///       with trailing |.
///  - kFileInput: normal filenames
///  - kStandardInput: the empty string or "-"
///  - kPipeInput: e.g. "gunzip -c /tmp/abc.gz |"
///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
InputType ClassifyRxfilename(const std::string &rxfilename);


class Output {
 public:
  // The normal constructor, provided for convenience.
  // Equivalent to calling with default constructor then Open()
  // with these arguments.
  Output(const std::string &filename, bool binary, bool write_header = true);

  Output(): impl_(NULL) {}

  /// This opens the stream, with the given mode (binary or text).  It returns
  /// true on success and false on failure.  However, it will throw if something
  /// was already open and could not be closed (to avoid this, call Close()
  /// first.  if write_header == true and binary == true, it writes the Kaldi
  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
  /// already open; it will close the existing stream and reopen (however if
  /// closing the old stream failed it will throw).
  bool Open(const std::string &wxfilename, bool binary, bool write_header);

  inline bool IsOpen();  // return true if we have an open stream.  Does not
  // imply stream is good for writing.

  std::ostream &Stream();  // will throw if not open; else returns stream.

  // Close closes the stream. Calling Close is never necessary unless you
  // want to avoid exceptions being thrown.  There are times when calling
  // Close will hurt efficiency (basically, when using offsets into files,
  // and using the same Input object),
  // but most of the time the user won't be doing this directly, it will
  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
  bool Close();

  // This will throw if stream could not be closed (to check error status,
  // call Close()).
  ~Output();

 private:
  OutputImplBase *impl_;  // non-NULL if open.
  std::string filename_;
  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
};


// bool binary_in;
// Input ki(some_filename, &binary_in);
// MyObject.Read(ki.Stream(), binary_in);
//
// ... more extensive example:
//
// {
//    bool binary_in;
//    Input ki(some_filename, &binary_in);
//    MyObject1.Read(ki.Stream(), &binary_in);
//    MyObject2.Write(ki.Stream(), &binary_in);
// }
// Note that to catch errors you need to use try.. catch.
// Input communicates errors by throwing exceptions.


// Input interprets four kinds of filenames:
//  (1) Normal filenames
//  (2) The empty string or "-", interpreted as standard output
//  (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
// The last one has no correspondence in Output.


class Input {
 public:
  /// The normal constructor.  Opens the stream in binary mode.
  /// Equivalent to calling the default constructor followed by Open(); then, if
  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
  /// throws on error.
  Input(const std::string &rxfilename, bool *contents_binary = NULL);

  Input(): impl_(NULL) {}

  // Open opens the stream for reading (the mode, where relevant, is binary; use
  // OpenTextMode for text-mode, we made this a separate function rather than a
  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
  // since reading in the file system's text mode is unusual.)  If
  // contents_binary != NULL, it reads the binary-mode header and puts it in the
  // "binary" variable.  Returns true on success.  If it returns false it will
  // not be open.  You may call Open even if it is already open; it will close
  // the existing stream and reopen (however if closing the old stream failed it
  // will throw).
  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);

  // As Open but (if the file system has text/binary modes) opens in text mode;
  // you shouldn't ever have to use this as in Kaldi we read even text files in
  // binary mode (and ignore the \r).
  inline bool OpenTextMode(const std::string &rxfilename);

  // Return true if currently open for reading and Stream() will
  // succeed.  Does not guarantee that the stream is good.
  inline bool IsOpen();

  // It is never necessary or helpful to call Close, except if
  // you are concerned about to many filehandles being open.
  // Close does not throw. It returns the exit code as int32
  // in the case of a pipe [kPipeInput], and always zero otherwise.
  int32 Close();

  // Returns the underlying stream. Throws if !IsOpen()
  std::istream &Stream();

  // Destructor does not throw: input streams may legitimately fail so we
  // don't worry about the status when we close them.
  ~Input();
 private:
  bool OpenInternal(const std::string &rxfilename, bool file_binary,
                    bool *contents_binary);
  InputImplBase *impl_;
  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
};

template <class C> void ReadKaldiObject(const std::string &filename,
                                        C *c) {
  bool binary_in;
  Input ki(filename, &binary_in);
  c->Read(ki.Stream(), binary_in);
}

// Specialize the template for reading matrices, because we want to be able to
// support reading 'ranges' (row and column ranges), like foo.mat[10:20].
template <> void ReadKaldiObject(const std::string &filename,
                                 Matrix<float> *m);


template <> void ReadKaldiObject(const std::string &filename,
                                 Matrix<double> *m);


template <class C> inline void WriteKaldiObject(const C &c,
                                                const std::string &filename,
                                                bool binary) {
  Output ko(filename, binary);
  c.Write(ko.Stream(), binary);
}

/// PrintableRxfilename turns the rxfilename into a more human-readable
/// form for error reporting, i.e. it does quoting and escaping and
/// replaces "" or "-" with "standard input".
std::string PrintableRxfilename(const std::string &rxfilename);

/// PrintableWxfilename turns the wxfilename into a more human-readable
/// form for error reporting, i.e. it does quoting and escaping and
/// replaces "" or "-" with "standard output".
std::string PrintableWxfilename(const std::string &wxfilename);

/// @}

}  // end namespace kaldi.

#include "util/kaldi-io-inl.h"

#endif  // KALDI_UTIL_KALDI_IO_H_


================================================
FILE: runtime/engine/kaldi/util/kaldi-pipebuf.h
================================================
// util/kaldi-pipebuf.h

// Copyright 2009-2011  Ondrej Glembek

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


/** @file kaldi-pipebuf.h
 *  This is an Kaldi C++ Library header.
 */

#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
#define KALDI_UTIL_KALDI_PIPEBUF_H_

#include<string>
#if !defined(_LIBCPP_VERSION)  // libc++
#include <fstream>
#else
#include "util/basic-filebuf.h"
#endif

namespace kaldi {
// This class provides a way to initialize a filebuf with a FILE* pointer
// directly; it will not close the file pointer when it is deleted.
// The C++ standard does not allow implementations of C++ to provide
// this constructor within basic_filebuf, which makes it hard to deal
// with pipes using completely native C++.  This is a workaround

#ifdef _MSC_VER
#elif defined(_LIBCPP_VERSION)  // libc++
template<class CharType, class Traits = std::char_traits<CharType> >
class basic_pipebuf : public basic_filebuf<CharType, Traits> {
 public:
  typedef basic_pipebuf<CharType, Traits>   ThisType;

 public:
  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
      : basic_filebuf<CharType, Traits>() {
    this->open(fptr, mode);
    if (!this->is_open()) {
      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
      // code error, if the fptr was good.
      return;
    }
  }
};  // class basic_pipebuf
#else
template<class CharType, class Traits = std::char_traits<CharType> >
class basic_pipebuf : public std::basic_filebuf<CharType, Traits> {
 public:
  typedef basic_pipebuf<CharType, Traits>   ThisType;

 public:
  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
      : std::basic_filebuf<CharType, Traits>() {
    this->_M_file.sys_open(fptr, mode);
    if (!this->is_open()) {
      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
      // code error, if the fptr was good.
      return;
    }
    this->_M_mode = mode;
    this->_M_buf_size = BUFSIZ;
    this->_M_allocate_internal_buffer();
    this->_M_reading = false;
    this->_M_writing = false;
    this->_M_set_buffer(-1);
  }
};  // class basic_pipebuf
#endif  // _MSC_VER

}  // namespace kaldi

#endif  // KALDI_UTIL_KALDI_PIPEBUF_H_


================================================
FILE: runtime/engine/kaldi/util/kaldi-semaphore.cc
================================================
// util/kaldi-semaphore.cc

// Copyright 2012  Karel Vesely (Brno University of Technology)
//           2017  Dogan Can (University of Southern California)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#include "base/kaldi-error.h"
#include "util/kaldi-semaphore.h"

namespace kaldi {

Semaphore::Semaphore(int32 count) {
  KALDI_ASSERT(count >= 0);
  count_ = count;
}

Semaphore::~Semaphore() {}

bool Semaphore::TryWait() {
  std::unique_lock<std::mutex> lock(mutex_);
  if(count_) {
      count_--;
      return true;
  }
  return false;
}

void Semaphore::Wait() {
  std::unique_lock<std::mutex> lock(mutex_);
  while(!count_)
    condition_variable_.wait(lock);
  count_--;
}

void Semaphore::Signal() {
  std::unique_lock<std::mutex> lock(mutex_);
  count_++;
  condition_variable_.notify_one();
}

} // namespace kaldi


================================================
FILE: runtime/engine/kaldi/util/kaldi-semaphore.h
================================================
// util/kaldi-semaphore.h

// Copyright 2012  Karel Vesely (Brno University of Technology)
//           2017  Dogan Can (University of Southern California)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_THREAD_KALDI_SEMAPHORE_H_
#define KALDI_THREAD_KALDI_SEMAPHORE_H_ 1

#include <mutex>
#include <condition_variable>

namespace kaldi {

class Semaphore {
 public:
  Semaphore(int32 count = 0);

  ~Semaphore();

  bool TryWait();  ///< Returns true if Wait() goes through
  void Wait();     ///< decrease the counter
  void Signal();   ///< increase the counter

 private:
  int32 count_;    ///< the semaphore counter, 0 means block on Wait()

  std::mutex mutex_;
  std::condition_variable condition_variable_;
  KALDI_DISALLOW_COPY_AND_ASSIGN(Semaphore);
};

} //namespace

#endif // KALDI_THREAD_KALDI_SEMAPHORE_H_


================================================
FILE: runtime/engine/kaldi/util/kaldi-table-inl.h
================================================
// util/kaldi-table-inl.h

// Copyright 2009-2011    Microsoft Corporation
//                2013    Johns Hopkins University (author: Daniel Povey)
//                2016    Xiaohui Zhang

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_UTIL_KALDI_TABLE_INL_H_
#define KALDI_UTIL_KALDI_TABLE_INL_H_

#include <algorithm>
#include <string>
#include <thread>
#include <utility>
#include <vector>
#include <errno.h>
#include "util/kaldi-io.h"
#include "util/kaldi-holder.h"
#include "util/text-utils.h"
#include "util/stl-utils.h"  // for StringHasher.
#include "util/kaldi-semaphore.h"


namespace kaldi {

/// \addtogroup table_impl_types
/// @{

template<class Holder> class SequentialTableReaderImplBase {
 public:
  typedef typename Holder::T T;
  // note that Open takes rxfilename not rspecifier.  Open will only be
  // called on a just-allocated object.
  virtual bool Open(const std::string &rxfilename) = 0;
  // Done() should be called on a successfully opened, not-closed object.
  // only throws if called at the wrong time (i.e. code error).
  virtual bool Done() const = 0;
  // Returns true if the reader is open [i.e. Open() succeeded and
  // the user has not called Close()]
  virtual bool IsOpen() const = 0;
  // Returns the current key; it is valid to call this if Done() returned false.
  // Only throws on code error (i.e. called at the wrong time).
  virtual std::string Key() = 0;
  // Returns the value associated with the current key.  Valid to call it if
  // Done() returned false.  It throws if the value could not be read.  [However
  // if you use the ,p modifier it will never throw, unless you call it at the
  // wrong time, i.e. unless there is a code error.]
  virtual T &Value() = 0;
  virtual void FreeCurrent() = 0;
  // move to the next object.  This won't throw unless called wrongly (e.g. on
  // non-open archive.]
  virtual void Next() = 0;
  // Close the table.  Returns its status as bool so it won't throw, unless
  // called wrongly [i.e. on non-open archive.]
  virtual bool Close() = 0;
  // SwapHolder() is not part of the public interface of SequentialTableReader.
  // It should be called when it would be valid to call Value() or FreeCurrent()
  // (i.e. when a value is stored), and after this it's not valid to get the
  // value any more until you call Next().  It swaps the contents of
  // this->holder_ with those of 'other_holder'.  It's needed as part of how
  // we implement SequentialTableReaderBackgroundImpl.
  virtual void SwapHolder(Holder *other_holder) = 0;
  SequentialTableReaderImplBase() { }
  virtual ~SequentialTableReaderImplBase() { }  // throws.
 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(SequentialTableReaderImplBase);
};

// This is the implementation for SequentialTableReader
// when it's actually a script file.
template<class Holder>  class SequentialTableReaderScriptImpl:
      public SequentialTableReaderImplBase<Holder> {
 public:
  typedef typename Holder::T T;

  SequentialTableReaderScriptImpl(): state_(kUninitialized) { }

  // You may call Open from states kUninitialized and kError.
  // It may leave the object in any of the states.
  virtual bool Open(const std::string &rspecifier) {
    if (state_ != kUninitialized && state_ != kError)
      if (!Close())  // call Close() yourself to suppress this exception.
        KALDI_ERR << "Error closing previous input: "
                  << "rspecifier was " << rspecifier_;
    bool binary;
    rspecifier_ = rspecifier;
    RspecifierType rs = ClassifyRspecifier(rspecifier, &script_rxfilename_,
                                           &opts_);
    KALDI_ASSERT(rs == kScriptRspecifier);
    if (!script_input_.Open(script_rxfilename_, &binary)) {  // Failure on Open
      KALDI_WARN << "Failed to open script file "
                 << PrintableRxfilename(script_rxfilename_);
      state_ = kUninitialized;
      return false;
    } else {  // Open succeeded.
      if (binary) {
        KALDI_WARN << "Script file should not be binary file.";
        SetErrorState();
        return false;
      } else {
        state_ = kFileStart;
        Next();
        if (state_ == kError)
          return false;
        // any other status, including kEof, is OK from the point of view of
        // the 'open' function (empty scp file is not inherently an error).
        return true;
      }
    }
  }

  virtual bool IsOpen() const {
    switch (state_) {
      case kEof: case kHaveScpLine: case kHaveObject: case kHaveRange:
        return true;
      case kUninitialized: case kError:
        return false;
      default: KALDI_ERR << "IsOpen() called on invalid object.";
        // note: kFileStart is not a valid state for the user to call a member
        // function (we never return from a public function in this state).
        return false;
    }
  }

  virtual bool Done() const {
    switch (state_) {
      case kHaveScpLine: case kHaveObject: case kHaveRange: return false;
      case kEof: case kError: return true;  // Error condition, like Eof, counts
        // as Done(); the destructor/Close() will inform the user of the error.
      default: KALDI_ERR << "Done() called on TableReader object at the wrong"
          " time.";
        return false;
    }
  }

  virtual std::string Key() {
    // Valid to call this whenever Done() returns false.
    switch (state_) {
      case kHaveScpLine: case kHaveObject: case kHaveRange: break;
      default:
        // coding error.
        KALDI_ERR << "Key() called on TableReader object at the wrong time.";
    }
    return key_;
  }

  T &Value() {
    if (!EnsureObjectLoaded())
      KALDI_ERR << "Failed to load object from "
                << PrintableRxfilename(data_rxfilename_)
                << " (to suppress this error, add the permissive "
                << "(p, ) option to the rspecifier.";
    // Because EnsureObjectLoaded() returned with success, we know
    // that if range_ is nonempty (i.e. a range was requested), the
    // state will be kHaveRange.
    if (state_ == kHaveRange) {
      return range_holder_.Value();
    } else {
      KALDI_ASSERT(state_ == kHaveObject);
      return holder_.Value();
    }
  }

  void FreeCurrent() {
    if (state_ == kHaveObject) {
      holder_.Clear();
      state_ = kHaveScpLine;
    } else if (state_ == kHaveRange) {
      range_holder_.Clear();
      state_ = kHaveObject;
    } else {
      KALDI_WARN << "FreeCurrent called at the wrong time.";
    }
  }

  void SwapHolder(Holder *other_holder) {
    // call Value() to ensure we have a value, and ignore its return value while
    // suppressing compiler warnings by casting to void.  It will cause the
    // program to die with KALDI_ERR if we couldn't get a value.
    (void) Value();
    // At this point we know that we successfully loaded an object,
    // and if there was a range specified, it's in range_holder_.
    if (state_ == kHaveObject) {
      holder_.Swap(other_holder);
      state_ = kHaveScpLine;
    } else if (state_ == kHaveRange) {
      range_holder_.Swap(other_holder);
      state_ = kHaveObject;
      // This indicates that we still have the base object (but no range).
    } else {
      KALDI_ERR << "Code error";
    }
    // Note: after this call there may be some junk left in range_holder_ or
    // holder_, but it won't matter.  We avoid calling Clear() on them, as this
    // function needs to be lightweight for the 'bg' feature to work well.
  }

  // Next goes to the next object.
  // It can leave the object in most of the statuses, but
  // the only circumstances under which it will return are:
  //  either:
  //  - if Done() returned true, i.e. kError or kEof.
  //  or:
  //  - in non-permissive mode, status kHaveScpLine or kHaveObjecct
  //  - in permissive mode, only when we successfully have an object,
  //    which means either (kHaveObject and range_.empty()), or
  //    kHaveRange.
  void Next() {
    while (1) {
      NextScpLine();
      if (Done()) return;
      if (opts_.permissive) {
        // Permissive mode means, when reading scp files, we treat keys whose
        // scp entry cannot be read as nonexistent.  This means trying to read.
        if (EnsureObjectLoaded()) return;  // Success.
        // else try the next scp line.
      } else {
        return;  // We go the next key; Value() will crash if we can't read the
                 // object on the scp line.
      }
    }
  }

  // This function may be entered at in any state.  At exit, the object will be
  // in state kUninitialized.  It only returns false in the situation where we
  // were at the end of the stream (kEof) and the script_input_ was a pipe and
  // it ended with error status; this is so that we can catch errors from
  // programs that we invoked via a pipe.
  virtual bool Close() {
    int32 status = 0;
    if (script_input_.IsOpen())
      status = script_input_.Close();
    if (data_input_.IsOpen())
      data_input_.Close();
    range_holder_.Clear();
    holder_.Clear();
    if (!this->IsOpen())
      KALDI_ERR << "Close() called on input that was not open.";
    StateType old_state = state_;
    state_ = kUninitialized;
    if (old_state == kError || (old_state == kEof && status != 0)) {
      if (opts_.permissive) {
        KALDI_WARN << "Close() called on scp file with read error, ignoring the"
            " error because permissive mode specified.";
        return true;
      } else {
        return false;  // User will do something with the error status.
      }
    } else {
      return true;
    }
    // Possible states                                          Return value
    // kLoadSucceeded/kRangeSucceeded/kRangeFailed              true
    // kError (if opts_.permissive)                             true
    // kError (if !opts_.permissive)                            false
    // kEof (if script_input_.Close() && !opts.permissive)      false
    // kEof (if !script_input_.Close() || opts.permissive)      true
    // kUninitialized/kFileStart/kHaveScpLine                   true
    // kUnitialized                                             true
  }

  virtual ~SequentialTableReaderScriptImpl() {
    if (this->IsOpen() && !Close())
      KALDI_ERR << "TableReader: reading script file failed: from scp "
                      << PrintableRxfilename(script_rxfilename_);
  }
 private:

  // Function EnsureObjectLoaded() ensures that we have fully loaded any object
  // (including object range) associated with the current key, and returns true
  // on success (i.e. we have the object) and false on failure.
  //
  // Possible entry states: kHaveScpLine, kLoadSucceeded, kRangeSucceeded
  //
  // Possible exit states: kHaveScpLine, kLoadSucceeded, kRangeSucceeded.
  //
  // Note: the return status has information that cannot be deduced from
  // just the exit state.  If the object could not be loaded we go to state
  // kHaveScpLine but return false; and if the range was requested but
  // could not be extracted, we go to state kLoadSucceeded but return false.
  bool EnsureObjectLoaded() {
    if (!(state_ == kHaveScpLine || state_ == kHaveObject ||
          state_ == kHaveRange))
      KALDI_ERR << "Invalid state (code error)";

    if (state_ == kHaveScpLine) {  // need to load the object into holder_.
      bool ans;
      // note, NULL means it doesn't read the binary-mode header
      if (Holder::IsReadInBinary()) {
        ans = data_input_.Open(data_rxfilename_, NULL);
      } else {
        ans = data_input_.OpenTextMode(data_rxfilename_);
      }
      if (!ans) {
        KALDI_WARN << "Failed to open file "
                   << PrintableRxfilename(data_rxfilename_);
        return false;
      } else {
        if (holder_.Read(data_input_.Stream())) {
          state_ = kHaveObject;
        } else {  // holder_ will not contain data.
          KALDI_WARN << "Failed to load object from "
                     << PrintableRxfilename(data_rxfilename_);
          return false;
        }
      }
    }
    // OK, at this point the state must be either
    // kHaveObject or kHaveRange.
    if (range_.empty()) {
      // if range_ is the empty string, we should not be in the state
      // kHaveRange.
      KALDI_ASSERT(state_ == kHaveObject);
      return true;
    }
    // range_ is nonempty.
    if (state_ == kHaveRange) {
      // range was already extracted, so there nothing to do.
      return true;
    }
    // OK, range_ is nonempty and state_ is kHaveObject.  We attempt to extract
    // the range object.  Note: ExtractRange() will throw with KALDI_ERR if the
    // object type doesn't support ranges.
    if (!range_holder_.ExtractRange(holder_, range_)) {
      KALDI_WARN  << "Failed to load object from "
                  << PrintableRxfilename(data_rxfilename_)
                  << "[" << range_ << "]";
      return false;
    } else {
      state_ = kHaveRange;
      return true;
    }
  }

  void SetErrorState() {
    state_ = kError;
    script_input_.Close();
    data_input_.Close();
    holder_.Clear();
    range_holder_.Clear();
  }

  // Reads the next line in the script file.
  // Possible entry states: kHaveObject, kHaveRange, kHaveScpLine, kFileStart.
  // Possible exit states: kEof, kError, kHaveScpLine, kHaveObject.
  void NextScpLine() {
    switch (state_) {  // Check and simplify the state.
      case kHaveRange:
        range_holder_.Clear();
        state_ = kHaveObject;
        break;
      case kHaveScpLine: case kHaveObject: case kFileStart: break;
      default:
        // No other states are valid to call Next() from.
        KALDI_ERR << "Reading script file: Next called wrongly.";
    }
    // at this point the state will be kHaveObject, kHaveScpLine, or kFileStart.
    std::string line;
    if (getline(script_input_.Stream(), line)) {
      // After extracting "key" from "line", we put the rest
      // of "line" into "rest", and then extract data_rxfilename_
      // (e.g. 1.ark:100) and possibly the range_ specifer
      // (e.g. [1:2,2:10]) from "rest".
      std::string data_rxfilename, rest;
      SplitStringOnFirstSpace(line, &key_, &rest);
      if (!key_.empty() && !rest.empty()) {
        // Got a valid line.
        if (rest[rest.size()-1] == ']') {
          if(!ExtractRangeSpecifier(rest, &data_rxfilename, &range_)) {
            KALDI_WARN << "Reading rspecifier '" << rspecifier_
                       << ", cannot make sense of scp line "
                       << line;
            SetErrorState();
            return;
          }
        } else {
          data_rxfilename = rest;
          range_ = "";
        }
        bool filenames_equal = (data_rxfilename_ == data_rxfilename);
        if (!filenames_equal)
          data_rxfilename_ = data_rxfilename;
        if (state_ == kHaveObject) {
          if (!filenames_equal) {
            holder_.Clear();
            state_ = kHaveScpLine;
          }
          // else leave state_ at kHaveObject and leave the object in the
          // holder.
        } else {
          state_ = kHaveScpLine;
        }
      } else {
        KALDI_WARN << "We got an invalid line in the scp file. "
                   << "It should look like: some_key 1.ark:10, got: "
                   << line;
        SetErrorState();
      }
    } else {
      state_ = kEof;  // there is nothing more in the scp file.  Might as well
                      // close input streams as we don't need them.
      script_input_.Close();
      if (data_input_.IsOpen())
        data_input_.Close();
      holder_.Clear();  // clear the holder if it was nonempty.
      range_holder_.Clear();  // clear the range holder if it was nonempty.
    }
  }

  std::string rspecifier_;  // the rspecifier that this class was opened with.
  RspecifierOptions opts_;  // options.
  std::string script_rxfilename_;  // rxfilename of the script file.

  Input script_input_;  // Input object for the .scp file
  Input data_input_;   // Input object for the entries in the script file;
                       // we make this a class member instead of a local variable,
                       // so that rspecifiers of the form filename:byte-offset,
                       // e.g. foo.ark:12345, can be handled using fseek().

  Holder holder_;       // Holds the object.
  Holder range_holder_; // Holds the partial object corresponding to the object
                        // range specifier 'range_'; this is only used when
                        // 'range_' is specified, i.e. when the .scp file
                        // contains lines of the form rspecifier[range], like
                        // foo.ark:242[0:9] (representing a row range of a
                        // matrix).


  std::string key_;  // the key of the current scp line we're processing
  std::string data_rxfilename_;  // the rxfilename corresponding to the current key
  std::string range_;  // the range of object corresponding to the current key, if an
                       // object range was specified in the script file, else "".

  enum StateType {
    //  Summary of the states this object can be in (state_).
    //
    //                (*) Does holder_ contain the object corresponding to
    //                    data_rxfilename_ ?
    //                    (*) Does range_holder_ contain a range object?
    //                         (*) is script_input_ open?
    //                             (*) are key_, data_rxfilename_ and range_ [if applicable] set?
    //
    kUninitialized, // no  no  no  no            Uninitialized or closed object.
    kFileStart,     // no  no  yes no            We just opened the .scp file (we'll never be in this
                    //                           state when a user-visible function is called.)
    kEof,           // no  no  no  no            We did Next() and found eof in script file.
    kError,         // no  no  no  no            Error reading or parsing script file.
    kHaveScpLine,   // no  no  yes yes           Have a line of the script file but nothing else.
    kHaveObject,    // yes no  yes yes           holder_ contains an object but range_holder_ does not.
    kHaveRange,     // yes yes yes yes           we have the range object in range_holder_ (implies
                    //                           range_ nonempty).
  } state_;


};


// This is the implementation for SequentialTableReader
// when it's an archive.  Note that the archive format is:
// key1 [space] object1 key2 [space]
// object2 ... eof.
// "object1" is the output of the Holder::Write function and will
// typically contain a binary header (in binary mode) and then
// the output of object.Write(os, binary).
// The archive itself does not care whether it is in binary
// or text mode, for reading purposes.

template<class Holder>  class SequentialTableReaderArchiveImpl:
      public SequentialTableReaderImplBase<Holder> {
 public:
  typedef typename Holder::T T;

  SequentialTableReaderArchiveImpl(): state_(kUninitialized) { }

  virtual bool Open(const std::string &rspecifier) {
    if (state_ != kUninitialized) {
      if (!Close()) {  // call Close() yourself to suppress this exception.
        if (opts_.permissive)
          KALDI_WARN << "Error closing previous input "
              "(only warning, since permissive mode).";
        else
          KALDI_ERR << "Error closing previous input.";
      }
    }
    rspecifier_ = rspecifier;
    RspecifierType rs = ClassifyRspecifier(rspecifier,
                                           &archive_rxfilename_,
                                           &opts_);
    KALDI_ASSERT(rs == kArchiveRspecifier);

    bool ans;
    // NULL means don't expect binary-mode header
    if (Holder::IsReadInBinary())
      ans = input_.Open(archive_rxfilename_, NULL);
    else
      ans = input_.OpenTextMode(archive_rxfilename_);
    if (!ans) {  // header.
      KALDI_WARN << "Failed to open stream "
                 << PrintableRxfilename(archive_rxfilename_);
      state_ = kUninitialized;  // Failure on Open
      return false;  // User should print the error message.
    }
    state_ = kFileStart;
    Next();
    if (state_ == kError) {
      KALDI_WARN << "Error beginning to read archive file (wrong filename?): "
                 << PrintableRxfilename(archive_rxfilename_);
      input_.Close();
      state_ = kUninitialized;
      return false;
    }
    KALDI_ASSERT(state_ == kHaveObject || state_ == kEof);
    return true;
  }

  virtual void Next() {
    switch (state_) {
      case kHaveObject:
        holder_.Clear();
        break;
      case kFileStart: case kFreedObject:
        break;
      default:
        KALDI_ERR << "Next() called wrongly.";
    }
    std::istream &is = input_.Stream();
    is.clear();  // Clear any fail bits that may have been set... just in case
    // this happened in the Read function.
    is >> key_;  // This eats up any leading whitespace and gets the string.
    if (is.eof()) {
      state_ = kEof;
      return;
    }
    if (is.fail()) {  // This shouldn't really happen, barring file-system
                      // errors.
      KALDI_WARN << "Error reading archive "
                 << PrintableRxfilename(archive_rxfilename_);
      state_ = kError;
      return;
    }
    int c;
    if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') {  // We expect a
                                                     // space ' ' after the key.
      // We also allow tab [which is consumed] and newline [which is not], just
      // so we can read archives generated by scripts that may not be fully
      // aware of how this format works.
      KALDI_WARN << "Invalid archive file format: expected space after key "
                 << key_ << ", got character "
                 << CharToString(static_cast<char>(is.peek())) << ", reading "
                 << PrintableRxfilename(archive_rxfilename_);
      state_ = kError;
      return;
    }
    if (c != '\n') is.get();  // Consume the space or tab.
    if (holder_.Read(is)) {
      state_ = kHaveObject;
      return;
    } else {
      KALDI_WARN << "Object read failed, reading archive "
                 << PrintableRxfilename(archive_rxfilename_);
      state_ = kError;
      return;
    }
  }

  virtual bool IsOpen() const {
    switch (state_) {
      case kEof: case kError: case kHaveObject: case kFreedObject: return true;
      case kUninitialized: return false;
      default: KALDI_ERR << "IsOpen() called on invalid object.";  // kFileStart
        // is not valid state for user to call something on.
        return false;
    }
  }

  virtual bool Done() const {
    switch (state_) {
      case kHaveObject:
        return false;
      case kEof: case kError:
        return true;  // Error-state counts as Done(), but destructor
        // will fail (unless you check the status with Close()).
      default:
        KALDI_ERR << "Done() called on TableReader object at the wrong time.";
        return false;
    }
  }

  virtual std::string Key() {
    // Valid to call this whenever Done() returns false
    switch (state_) {
      case kHaveObject: break;  // only valid case.
      default:
        // coding error.
        KALDI_ERR << "Key() called on TableReader object at the wrong time.";
    }
    return key_;
  }

  T &Value() {
    switch (state_) {
      case kHaveObject:
        break;  // only valid case.
      default:
        // coding error.
        KALDI_ERR << "Value() called on TableReader object at the wrong time.";
    }
    return holder_.Value();
  }

  virtual void FreeCurrent() {
    if (state_ == kHaveObject) {
      holder_.Clear();
      state_ = kFreedObject;
    } else {
      KALDI_WARN << "FreeCurrent called at the wrong time.";
    }
  }

  void SwapHolder(Holder *other_holder) {
    // call Value() to ensure we have a value, and ignore its return value while
    // suppressing compiler warnings by casting to void.
    (void) Value();
    if (state_ == kHaveObject) {
      holder_.Swap(other_holder);
      state_ = kFreedObject;
    } else {
      KALDI_ERR << "SwapHolder called at the wrong time "
                   "(error related to ',bg' modifier).";
    }
  }

  virtual bool Close() {
    // To clean up, Close() also closes the Input object if
    // it's open.  It will succeed if the stream was not in an error state,
    // and the Input object isn't in an error state  we've found eof in the archive.
    if (!this->IsOpen())
      KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly.";
    int32 status = 0;
    if (input_.IsOpen())
      status = input_.Close();
    if (state_ == kHaveObject)
      holder_.Clear();
    StateType old_state = state_;
    state_ = kUninitialized;
    if (old_state == kError || (old_state == kEof && status != 0)) {
      if (opts_.permissive) {
        KALDI_WARN << "Error detected closing TableReader for archive "
                   << PrintableRxfilename(archive_rxfilename_)
                   << " but ignoring "
                   << "it as permissive mode specified.";
        return true;
      } else {
        return false;
      }
    } else {
      return true;
    }
  }

  virtual ~SequentialTableReaderArchiveImpl() {
    if (this->IsOpen() && !Close())
      KALDI_ERR << "TableReader: error detected closing archive "
                << PrintableRxfilename(archive_rxfilename_);
  }
 private:
  Input input_;  // Input object for the archive
  Holder holder_;     // Holds the object.
  std::string key_;
  std::string rspecifier_;
  std::string archive_rxfilename_;
  RspecifierOptions opts_;
  enum StateType {  //  [The state of the reading process]        [does holder_ [is input_
    //                                                     have object]   open]
    kUninitialized,  // Uninitialized or closed.                  no         no
    kFileStart,      // [state we use internally: just opened.]   no         yes
    kEof,     // We did Next() and found eof in archive           no         no
    kError,   // Some other error                                 no         no
    kHaveObject,  // We read the key and the object after it.     yes        yes
    kFreedObject,  // The user called FreeCurrent().              no         yes
  } state_;
};

// this is for when someone adds the 'th' modifier; it wraps around the basic
// implementation and allows it to do the reading in a background thread.
template<class Holder>
class SequentialTableReaderBackgroundImpl:
      public SequentialTableReaderImplBase<Holder> {
 public:
  typedef typename Holder::T T;

  SequentialTableReaderBackgroundImpl(
      SequentialTableReaderImplBase<Holder> *base_reader):
      base_reader_(base_reader) {}

  // This function ignores the rxfilename argument.
  // We use the same function signature as the regular Open(),
  // for convenience.
  virtual bool Open(const std::string &rxfilename) {
    KALDI_ASSERT(base_reader_ != NULL &&
                 base_reader_->IsOpen());  // or code error.
    {
      thread_ = std::thread(SequentialTableReaderBackgroundImpl<Holder>::run,
                            this);
    }

    if (!base_reader_->Done())
      Next();
    return true;
  }

  virtual bool IsOpen() const {
    // Close() sets base_reader_ to NULL, and we never initialize this object
    // with a non-open base_reader_, so no need to check if it's open.
    return base_reader_ != NULL;
  }

  void RunInBackground() {
    try {
      // This function is called in the background thread.  The whole point of
      // the background thread is that we don't want to do the actual reading
      // (inside Next()) in the foreground.
      while (base_reader_ != NULL && !base_reader_->Done()) {
        consumer_sem_.Signal();
        // Here is where the consumer process (parent thread) gets to do its
        // stuff.  Principally it calls SwapHolder()-- a shallow swap that is
        // cheap.
        producer_sem_.Wait();
        // we check that base_reader_ is not NULL in case Close() was
        // called in the main thread.
        if (base_reader_ != NULL)
          base_reader_->Next();   //  here is where the work happens.
      }
      // this signal will be waited on in the Next() function of the foreground
      // thread if it is still running, or Close() otherwise.
      consumer_sem_.Signal();
      // this signal may be waited on in Close().
      consumer_sem_.Signal();
    } catch (...) {
      // There is nothing we called above that could potentially throw due to
      // user data.  So we treat reaching this point as a code-error condition.
      // Closing base_reader_ will trigger an exception in Next() in the main
      // thread when it checks that base_reader_->IsOpen().
      if (base_reader_->IsOpen()) {
        base_reader_->Close();
        delete base_reader_;
        base_reader_ = NULL;
      }
      consumer_sem_.Signal();
      return;
    }
  }
  static void run(SequentialTableReaderBackgroundImpl<Holder> *object) {
    object->RunInBackground();
  }
  virtual bool Done() const {
    return key_.empty();
  }
  virtual std::string Key() {
    if (key_.empty())
      KALDI_ERR << "Calling Key() at the wrong time.";
    return key_;
  }
  virtual T &Value() {
    if (key_.empty())
      KALDI_ERR << "Calling Value() at the wrong time.";
    return holder_.Value();
  }
  void SwapHolder(Holder *other_holder) {
    KALDI_ERR << "SwapHolder() should not be called on this class.";
  }
  virtual void FreeCurrent() {
    if (key_.empty())
      KALDI_ERR << "Calling FreeCurrent() at the wrong time.";
    // note: ideally a call to Value() should crash if you have just called
    // FreeCurrent().  For typical holders such as KaldiObjectHolder this will
    // happen inside the holder_.Value() call.  This won't be the case for all
    // holders, but it's not a great loss (just a missed opportunity to spot a
    // code error).
    holder_.Clear();
  }
  virtual void Next() {
    consumer_sem_.Wait();
    if (base_reader_ == NULL || !base_reader_->IsOpen())
      KALDI_ERR << "Error detected (likely code error) in background "
                << "reader (',bg' option)";
    if (base_reader_->Done()) {
      // there is nothing else to read.
      key_ = "";
    } else {
      key_ = base_reader_->Key();
      base_reader_->SwapHolder(&holder_);
    }
    // this Signal() tells the producer thread, in the background,
    // that it's now safe to read the next value.
    producer_sem_.Signal();
  }

  // note: we can be sure that Close() won't be called twice, as the TableReader
  // object will delete this object after calling Close.
  virtual bool Close() {
    KALDI_ASSERT(base_reader_ != NULL && thread_.joinable());
    // wait until the producer thread is idle.
    consumer_sem_.Wait();
    bool ans = true;
    try {
      ans = base_reader_->Close();
    } catch (...) {
      ans = false;
    }
    delete base_reader_;
    // setting base_reader_ to NULL will cause the loop in the producer thread
    // to exit.
    base_reader_ = NULL;
    producer_sem_.Signal();

    thread_.join();
    return ans;
  }
  ~SequentialTableReaderBackgroundImpl() {
    if (base_reader_) {
      if (!Close()) {
        KALDI_ERR << "Error detected closing background reader "
                  << "(relates to ',bg' modifier)";
      }
    }
  }
 private:
  std::string key_;
  Holder holder_;
  // I couldn't figure out what to call these semaphores.  consumer_sem_ is the
  // one that the consumer (main thread) waits on; producer_sem_ is the one
  // that the producer (background thread) waits on.
  Semaphore consumer_sem_;
  Semaphore producer_sem_;
  std::thread thread_;
  SequentialTableReaderImplBase<Holder> *base_reader_;

};

template<class Holder>
SequentialTableReader<Holder>::SequentialTableReader(const std::string
                                                     &rspecifier): impl_(NULL) {
  if (rspecifier != "" && !Open(rspecifier))
    KALDI_ERR << "Error constructing TableReader: rspecifier is " << rspecifier;
}

template<class Holder>
bool SequentialTableReader<Holder>::Open(const std::string &rspecifier) {
  if (IsOpen())
    if (!Close())
      KALDI_ERR << "Could not close previously open object.";
  // now impl_ will be NULL.

  RspecifierOptions opts;
  RspecifierType wt = ClassifyRspecifier(rspecifier, NULL, &opts);
  switch (wt) {
    case kArchiveRspecifier:
      impl_ = new SequentialTableReaderArchiveImpl<Holder>();
      break;
    case kScriptRspecifier:
      impl_ = new SequentialTableReaderScriptImpl<Holder>();
      break;
    case kNoRspecifier: default:
      KALDI_WARN << "Invalid rspecifier " << rspecifier;
      return false;
  }
  if (!impl_->Open(rspecifier)) {
    delete impl_;
    impl_ = NULL;
    return false;  // sub-object will have printed warnings.
  }
  if (opts.background) {
    impl_ = new SequentialTableReaderBackgroundImpl<Holder>(
        impl_);
    if (!impl_->Open("")) {
      // the rxfilename is ignored in that Open() call.
      // It should only return false on code error.
      return false;
    }
  }
  return true;
}

template<class Holder>
bool SequentialTableReader<Holder>::Close() {
  CheckImpl();
  bool ans = impl_->Close();
  delete impl_;  // We don't keep around empty impl_ objects.
  impl_ = NULL;
  return ans;
}


template<class Holder>
bool SequentialTableReader<Holder>::IsOpen() const {
  return (impl_ != NULL);  // Because we delete the object whenever
  // that object is not open.  Thus, the IsOpen functions of the
  // Impl objects are not really needed.
}

template<class Holder>
std::string SequentialTableReader<Holder>::Key() {
  CheckImpl();
  return impl_->Key();  // this call may throw if called wrongly in other ways,
  // e.g. eof.
}


template<class Holder>
void SequentialTableReader<Holder>::FreeCurrent() {
  CheckImpl();
  impl_->FreeCurrent();
}


template<class Holder>
typename SequentialTableReader<Holder>::T &
SequentialTableReader<Holder>::Value() {
  CheckImpl();
  return impl_->Value();  // This may throw (if EnsureObjectLoaded() returned false you
                          // are safe.).
}


template<class Holder>
void SequentialTableReader<Holder>::Next() {
  CheckImpl();
  impl_->Next();
}

template<class Holder>
bool SequentialTableReader<Holder>::Done() {
  CheckImpl();
  return impl_->Done();
}


template<class Holder>
SequentialTableReader<Holder>::~SequentialTableReader() {
  delete impl_;
  // Destructor of impl_ may throw.
}


template<class Holder> class TableWriterImplBase {
 public:
  typedef typename Holder::T T;

  virtual bool Open(const std::string &wspecifier) = 0;

  // Write returns true on success, false on failure, but
  // some errors may not be detected until we call Close().
  // It throws (via KALDI_ERR) if called wrongly.  We could
  // have just thrown on all errors, since this is what
  // TableWriter does; it was designed this way because originally
  // TableWriter::Write returned an exit status.
  virtual bool Write(const std::string &key, const T &value) = 0;

  // Flush will flush any archive; it does not return error status,
  //  any errors will be reported on the next Write or Close.
  virtual void Flush() = 0;

  virtual bool Close() = 0;

  virtual bool IsOpen() const = 0;

  // May throw on write error if Close was not called.
  virtual ~TableWriterImplBase() { }

  TableWriterImplBase() { }
 private:
  KALDI_DISALLOW_COPY_AND_ASSIGN(TableWriterImplBase);
};


// The implementation of TableWriter we use when writing directly
// to an archive with no associated scp.
template<class Holder>
class TableWriterArchiveImpl: public TableWriterImplBase<Holder> {
 public:
  typedef typename Holder::T T;

  virtual bool Open(const std::string &wspecifier) {
    switch (state_) {
      case kUninitialized:
        break;
      case kWriteError:
        KALDI_ERR << "Opening stream, already open with write error.";
      case kOpen: default:
        if (!Close())  // throw because this error may not have been previously
          // detected by the user.
          KALDI_ERR << "Opening stream, error closing previously open stream.";
    }
    wspecifier_ = wspecifier;
    WspecifierType ws = ClassifyWspecifier(wspecifier,
                                           &archive_wxfilename_,
                                           NULL,
                                           &opts_);
    KALDI_ASSERT(ws == kArchiveWspecifier);  // or wrongly called.

    if (output_.Open(archive_wxfilename_, opts_.binary, false)) {  // false
                                                      // means no binary header.
      state_ = kOpen;
      return true;
    } else {
      // stream will not be open.  User will report this error
      // (we return bool), so don't bother printing anything.
      state_ = kUninitialized;
      return false;
    }
  }

  virtual bool IsOpen() const {
    switch (state_) {
      case kUninitialized: return false;
      case kOpen: case kWriteError: return true;
      default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state.";
    }
    return false;
  }

  // Write returns true on success, false on failure, but
  // some errors may not be detected till we call Close().
  virtual bool Write(const std::string &key, const T &value) {
    switch (state_) {
      case kOpen: break;
      case kWriteError:
        // user should have known from the last
        // call to Write that there was a problem.
        KALDI_WARN << "Attempting to write to invalid stream.";
        return false;
      case kUninitialized: default:
        KALDI_ERR << "Write called on invalid stream";
    }
    // state is now kOpen or kWriteError.
    if (!IsToken(key))  // e.g. empty string or has spaces...
      KALDI_ERR << "Using invalid key " << key;
    output_.Stream() << key << ' ';
    if (!Holder::Write(output_.Stream(), opts_.binary, value)) {
      KALDI_WARN << "Write failure to "
                 << PrintableWxfilename(archive_wxfilename_);
      state_ = kWriteError;
      return false;
    }
    if (state_ == kWriteError) return false;  // Even if this Write seems to
    // have succeeded, we fail because a previous Write failed and the archive
    // may be corrupted and unreadable.

    if (opts_.flush)
      Flush();
    return true;
  }

  // Flush will flush any archive; it does not return error status,
  //  any errors will be reported on the next Write or Close.
  virtual void Flush() {
    switch (state_) {
      case kWriteError: case kOpen:
        output_.Stream().flush();  // Don't check error status.
        return;
      default:
        KALDI_WARN << "Flush called on not-open writer.";
    }
  }

  virtual bool Close() {
    if (!this->IsOpen() || !output_.IsOpen())
      KALDI_ERR << "Close called on a stream that was not open."
                << this->IsOpen() << ", " << output_.IsOpen();
    bool close_success = output_.Close();
    if (!close_success) {
      KALDI_WARN << "Error closing stream: wspecifier is " << wspecifier_;
      state_ = kUninitialized;
      return false;
    }
    if (state_ == kWriteError) {
      KALDI_WARN << "Closing writer in error state: wspecifier is "
                 << wspecifier_;
      state_ = kUninitialized;
      return false;
    }
    state_ = kUninitialized;
    return true;
  }

  TableWriterArchiveImpl(): state_(kUninitialized) {}

  // May throw on write error if Close was not called.
  virtual ~TableWriterArchiveImpl() {
    if (!IsOpen()) return;
    else if (!Close())
      KALDI_ERR << "At TableWriter destructor: Write failed or stream close "
                << "failed: wspecifier is "<<  wspecifier_;
  }

 private:
  Output output_;
  WspecifierOptions opts_;
  std::string wspecifier_;
  std::string archive_wxfilename_;
  enum {               // is stream open?
    kUninitialized,    // no
    kOpen,             // yes
    kWriteError,       // yes
  } state_;
};


// The implementation of TableWriter we use when writing to
// individual files (more generally, wxfilenames) specified
// in an scp file that we read.

// Note: the code for this class is similar to
// RandomAccessTableReaderScriptImpl; try to keep them in sync.

template<class Holder>
class TableWriterScriptImpl: public TableWriterImplBase<Holder> {
 public:
  typedef typename Holder::T T;

  TableWriterScriptImpl(): last_found_(0), state_(kUninitialized) {}

  virtual bool Open(const std::string &wspecifier) {
    switch (state_) {
      case kReadScript:
        KALDI_ERR << " Opening already open TableWriter: call Close first.";
      case kUninitialized: case kNotReadScript:
        break;
    }
    wspecifier_ = wspecifier;
    WspecifierType ws = ClassifyWspecifier(wspecifier,
                                           NULL,
                                           &script_rxfilename_,
                                           &opts_);
    KALDI_ASSERT(ws == kScriptWspecifier);  // or wrongly called.
    KALDI_ASSERT(script_.empty());  // no way it could be nonempty at this point.

    if (!ReadScriptFile(script_rxfilename_,
                         true,  // print any warnings
                         &script_)) {  // error reading script file or invalid
                                       // format
      state_ = kNotReadScript;
      return false;  // no need to print further warnings.  user gets the error.
    }
    std::sort(script_.begin(), script_.end());
    for (size_t i = 0; i+1 < script_.size(); i++) {
      if (script_[i].first.compare(script_[i+1].first) >= 0) {
        // script[i] not < script[i+1] in lexical order...
        KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
                   << " contains duplicate key " << script_[i].first;
        state_ = kNotReadScript;
        return false;
      }
    }
    state_ = kReadScript;
    return true;
  }

  virtual bool IsOpen() const {  return (state_ == kReadScript);  }

  virtual bool Close() {
    if (!IsOpen())
      KALDI_ERR << "Close() called on TableWriter that was not open.";
    state_ = kUninitialized;
    last_found_ = 0;
    script_.clear();
    return true;
  }

  // Write returns true on success, false on failure, but
  // some errors may not be detected till we call Close().
  virtual bool Write(const std::string &key, const T &value) {
    if (!IsOpen())
      KALDI_ERR << "Write called on invalid stream";

    if (!IsToken(key))  // e.g. empty string or has spaces...
      KALDI_ERR << "Using invalid key " << key;

    std::string wxfilename;
    if (!LookupFilename(key, &wxfilename)) {
      if (opts_.permissive) {
        return true;  // In permissive mode, it's as if we're writing to
                     // /dev/null for missing keys.
      } else {
        KALDI_WARN << "Script file "
                   << PrintableRxfilename(script_rxfilename_)
                   << " has no entry for key " <<key;
        return false;
      }
    }
    Output output;
    if (!output.Open(wxfilename, opts_.binary, false)) {
      // Open in the text/binary mode (on Windows) given by member var. "binary"
      // (obtained from wspecifier), but do not put the binary-mode header (it
      // will be written, if needed, by the Holder::Write function.)
      KALDI_WARN << "Failed to open stream: "
                 << PrintableWxfilename(wxfilename);
      return false;
    }
    if (!Holder::Write(output.Stream(), opts_.binary, value)
        || !output.Close()) {
      KALDI_WARN << "Failed to write data to "
                 << PrintableWxfilename(wxfilename);

      return false;
    }
    return true;
  }

  // Flush does nothing in this implementation, there is nothing to flush.
  virtual void Flush() { }


  virtual ~TableWriterScriptImpl() {
    // Nothing to do in destructor.
  }

 private:
  // Note: this function is almost the same as in
  // RandomAccessTableReaderScriptImpl.
  bool LookupFilename(const std::string &key, std::string *wxfilename) {
    // First, an optimization: if we're going consecutively, this will
    // make the lookup very fast.
    last_found_++;
    if (last_found_ < script_.size() && script_[last_found_].first == key) {
      *wxfilename = script_[last_found_].second;
      return true;
    }
    std::pair<std::string, std::string> pr(key, "");  // Important that ""
    // compares less than or equal to any string, so lower_bound points to the
    // element that has the same key.
    typedef typename std::vector<std::pair<std::string, std::string> >
                     ::const_iterator IterType;
    IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
    if (iter != script_.end() && iter->first == key) {
      last_found_ = iter - script_.begin();
      *wxfilename = iter->second;
      return true;
    } else {
      return false;
    }
  }


  WspecifierOptions opts_;
  std::string wspecifier_;
  std::string script_rxfilename_;

  // the script_ variable contains pairs of (key, filename), sorted using
  // std::sort.  This can be used with binary_search to look up filenames for
  // writing.  If this becomes inefficient we can use std::unordered_map (but I
  // suspect this wouldn't be significantly faster & would use more memory).
  // If memory becomes a problem here, the user should probably be passing
  // only the relevant part of the scp file rather than expecting us to get too
  // clever in the code.
  std::vector<std::pair<std::string, std::string> > script_;
  size_t last_found_;  // This is for an optimization used in LookupFilename.

  enum {
    kUninitialized,
    kReadScript,
    kNotReadScript,  // read of script failed.
  } state_;
};


// The implementation of TableWriter we use when writing directly
// to an archive plus an associated scp.
template<class Holder>
class TableWriterBothImpl: public TableWriterImplBase<Holder> {
 public:
  typedef typename Holder::T T;

  virtual bool Open(const std::string &wspecifier) {
    switch (state_) {
      case kUninitialized:
        break;
      case kWriteError:
        KALDI_ERR << "Opening stream, already open with write error.";
      case kOpen: default:
        if (!Close())  // throw because this error may not have been previously
                       // detected by user.
          KALDI_ERR << "Opening stream, error closing previously open stream.";
    }
    wspecifier_ = wspecifier;
    WspecifierType ws = ClassifyWspecifier(wspecifier,
                                           &archive_wxfilename_,
                                           &script_wxfilename_,
                                           &opts_);
    KALDI_ASSERT(ws == kBothWspecifier);  // or wrongly called.
    if (ClassifyWxfilename(archive_wxfilename_) != kFileOutput)
      KALDI_WARN << "When writing to both archive and script, the script file "
          "will generally not be interpreted correctly unless the archive is "
          "an actual file: wspecifier = " << wspecifier;

    if (!archive_output_.Open(archive_wxfilename_, opts_.binary, false)) {
      // false means no binary header.
      state_ = kUninitialized;
      return false;
    }
    if (!script_output_.Open(script_wxfilename_, false, false)) {  // first
      // false means text mode: script files always text-mode.   second false
      //  means don't write header (doesn't matter for text mode).
      archive_output_.Close();  // Don't care about status: error anyway.
      state_ = kUninitialized;
      return false;
    }
    state_ = kOpen;
    return true;
  }

  virtual bool IsOpen() const {
    switch (state_) {
      case kUninitialized: return false;
      case kOpen: case kWriteError: return true;
      default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state.";
    }
    return false;
  }

  void MakeFilename(typename std::ostream::pos_type streampos,
                    std::string *output) const {
    std::ostringstream ss;
    ss << ':' << streampos;
    KALDI_ASSERT(ss.str() != ":-1");
    *output = archive_wxfilename_ + ss.str();

    // e.g. /some/file:12302.
    // Note that we warned if archive_wxfilename_ is not an actual filename;
    // the philosophy is we give the user rope and if they want to hang
    // themselves, with it, fine.
  }

  // Write returns true on success, false on failure, but
  // some errors may not be detected till we call Close().
  virtual bool Write(const std::string &key, const T &value) {
    switch (state_) {
      case kOpen: break;
      case kWriteError:
        // user should have known from the last
        // call to Write that there was a problem.  Warn about it.
        KALDI_WARN << "Writing to non-open TableWriter object.";
        return false;
      case kUninitialized: default:
        KALDI_ERR << "Write called on invalid stream";
    }
    // state is now kOpen or kWriteError.
    if (!IsToken(key))  // e.g. empty string or has spaces...
      KALDI_ERR << "Using invalid key " << key;
    std::ostream &archive_os = archive_output_.Stream();
    archive_os << key << ' ';
    typename std::ostream::pos_type archive_os_pos = archive_os.tellp();
    // position at start of Write() to archive.  We will record this in the
    // script file.
    std::string offset_rxfilename;  // rxfilename with offset into the archive,
    // e.g. some_archive_name.ark:431541423
    MakeFilename(archive_os_pos, &offset_rxfilename);

    // Write to the script file first.
    // The idea is that we want to get all the information possible into the
    // script file, to make it easier to unwind errors later.
    std::ostream &script_os = script_output_.Stream();
    script_output_.Stream() << key << ' ' << offset_rxfilename << '\n';

    if (!Holder::Write(archive_output_.Stream(), opts_.binary, value)) {
      KALDI_WARN << "Write failure to"
                 << PrintableWxfilename(archive_wxfilename_);
      state_ = kWriteError;
      return false;
    }

    if (script_os.fail()) {
      KALDI_WARN << "Write failure to script file detected: "
                 << PrintableWxfilename(script_wxfilename_);
      state_ = kWriteError;
      return false;
    }

    if (archive_os.fail()) {
      KALDI_WARN << "Write failure to archive file detected: "
                 << PrintableWxfilename(archive_wxfilename_);
      state_ = kWriteError;
      return false;
    }

    if (state_ == kWriteError) return false;  // Even if this Write seems to
    // have succeeded, we fail because a previous Write failed and the archive
    // may be corrupted and unreadable.

    if (opts_.flush)
      Flush();
    return true;
  }

  // Flush will flush any archive; it does not return error status,
  //  any errors will be reported on the next Write or Close.
  virtual void Flush() {
    switch (state_) {
      case kWriteError: case kOpen:
        archive_output_.Stream().flush();  // Don't check error status.
        script_output_.Stream().flush();  // Don't check error status.
        return;
      default:
        KALDI_WARN << "Flush called on not-open writer.";
    }
  }

  virtual bool Close() {
    if (!this->IsOpen())
      KALDI_ERR << "Close called on a stream that was not open.";
    bool close_success = true;
    if (archive_output_.IsOpen())
      if (!archive_output_.Close()) close_success = false;
    if (script_output_.IsOpen())
      if (!script_output_.Close()) close_success = false;
    bool ans = close_success && (state_ != kWriteError);
    state_ = kUninitialized;
    return ans;
  }

  TableWriterBothImpl(): state_(kUninitialized) {}

  // May throw on write error if Close() was not called.
  // User can get the error status by calling Close().
  virtual ~TableWriterBothImpl() {
    if (!IsOpen()) return;
    else if (!Close())
      KALDI_ERR << "Write failed or stream close failed: "
                << wspecifier_;
  }

 private:
  Output archive_output_;
  Output script_output_;
  WspecifierOptions opts_;
  std::string archive_wxfilename_;
  std::string script_wxfilename_;
  std::string wspecifier_;
  enum {               // is stream open?
    kUninitialized,    // no
    kOpen,             // yes
    kWriteError,       // yes
  } state_;
};


template<class Holder>
TableWriter<Holder>::TableWriter(const std::string &wspecifier): impl_(NULL) {
  if (wspecifier != "" && !Open(wspecifier))
    KALDI_ERR << "Failed to open table for writing with wspecifier: " << wspecifier
              << ": errno (in case it's relevant) is: " << strerror(errno);
}

template<class Holder>
bool TableWriter<Holder>::IsOpen() const {
  return (impl_ != NULL);
}


template<class Holder>
bool TableWriter<Holder>::Open(const std::string &wspecifier) {
  if (IsOpen()) {
    if (!Close())  // call Close() yourself to suppress this exception.
      KALDI_ERR << "Failed to close previously open writer.";
  }
  KALDI_ASSERT(impl_ == NULL);
  WspecifierType wtype = ClassifyWspecifier(wspecifier, NULL, NULL, NULL);
  switch (wtype) {
    case kBothWspecifier:
      impl_ = new TableWriterBothImpl<Holder>();
      break;
    case kArchiveWspecifier:
      impl_ = new TableWriterArchiveImpl<Holder>();
      break;
    case kScriptWspecifier:
      impl_ = new TableWriterScriptImpl<Holder>();
      break;
    case kNoWspecifier: default:
      KALDI_WARN << "ClassifyWspecifier: invalid wspecifier " << wspecifier;
      return false;
  }
  if (impl_->Open(wspecifier)) {
    return true;
  } else {  // The class will have printed a more specific warning.
    delete impl_;
    impl_ = NULL;
    return false;
  }
}

template<class Holder>
void TableWriter<Holder>::Write(const std::string &key,
                                const T &value) const {
  CheckImpl();
  if (!impl_->Write(key, value))
    KALDI_ERR << "Error in TableWriter::Write";
  // More specific warning will have
  // been printed in the Write function.
}

template<class Holder>
void TableWriter<Holder>::Flush() {
  CheckImpl();
  impl_->Flush();
}

template<class Holder>
bool TableWriter<Holder>::Close() {
  CheckImpl();
  bool ans = impl_->Close();
  delete impl_;  // We don't keep around non-open impl_ objects
                 // [c.f. definition of IsOpen()]
  impl_ = NULL;
  return ans;
}

template<class Holder>
TableWriter<Holder>::~TableWriter() {
  if (IsOpen() && !Close()) {
    KALDI_ERR << "Error closing TableWriter [in destructor].";
  }
}


// Types of RandomAccessTableReader:
// In principle, we would like to have four types of RandomAccessTableReader:
//  the 4 combinations  [scp, archive], [seekable, not-seekable],
// where if something is seekable we only store a file offset.  However,
// it seems sufficient for now to only implement two of these, in both
// cases assuming it's not seekable so we never store file offsets and always
// store either the scp line or the data in the archive.  The reasons are:
// (1)
// For scp files, storing the actual entry is not that much more expensive
// than storing the file offsets (since the entries are just filenames), and
// avoids a lot of fseek operations that might be expensive.
// (2)
// For archive files, there is no real reason, if you have the archive file
// on disk somewhere, why you wouldn't access it via its associated scp.
// [i.e. write it as ark, scp].  The main reason to read archives directly
// is if they are part of a pipe, and in this case it's not seekable, so
// we implement only this case.
//
// Note that we will rarely in practice have to keep in memory everything in
// the archive, as long as things are only read once from the archive (the
// "o, " or "once" option) and as long as we keep our keys in sorted order;
// to take advantage of this we need the "s, " (sorted) option, so we would
// read archives as e.g. "s, o, ark:-" (this is the rspecifier we would use if
// it was the standard input and these conditions held).

template<class Holder> class RandomAccessTableReaderImplBase {
 public:
  typedef typename Holder::T T;

  virtual bool Open(const std::string &rspecifier) = 0;

  virtual bool HasKey(const std::string &key) = 0;

  virtual const T &Value(const std::string &key) = 0;

  virtual bool Close() = 0;

  virtual ~RandomAccessTableReaderImplBase() {}
};


// Implementation of RandomAccessTableReader for a script file; for simplicity
// we just read it in all in one go, as it's unlikely someone would generate
// this from a pipe.  In principle we could read it on-demand as for the
// archives, but this would probably be overkill.

// Note: the code for this class is similar to TableWriterScriptImpl:
// try to keep them in sync.
template<class Holder>
class RandomAccessTableReaderScriptImpl:
      public RandomAccessTableReaderImplBase<Holder> {
 public:
  typedef typename Holder::T T;

  RandomAccessTableReaderScriptImpl(): last_found_(0), state_(kUninitialized) {}

  virtual bool Open(const std::string &rspecifier) {
    switch (state_) {
      case kNotHaveObject: case kHaveObject: case kHaveRange:
        KALDI_ERR << " Opening already open RandomAccessTableReader:"
                     " call Close first.";
      case kUninitialized: case kNotReadScript:
        break;
    }
    rspecifier_ = rspecifier;
    RspecifierType rs = ClassifyRspecifier(rspecifier,
                                           &script_rxfilename_,
                                           &opts_);
    KALDI_ASSERT(rs == kScriptRspecifier);  // or wrongly called.
    KALDI_ASSERT(script_.empty());  // no way it could be nonempty at this point

    if (!ReadScriptFile(script_rxfilename_,
                        true,  // print any warnings
                        &script_)) {  // error reading script file or invalid
                                      // format
      state_ = kNotReadScript;
      return false;  // no need to print further warnings.  user gets the error.
    }

    rspecifier_ = rspecifier;
    // If opts_.sorted, the user has asserted that the keys are already sorted.
    // Although we could easily sort them, we want to let the user know of this
    // mistake.  This same mistake could have serious effects if used with an
    // archive rather than a script.
    if (!opts_.sorted)
      std::sort(script_.begin(), script_.end());
    for (size_t i = 0; i + 1 < script_.size(); i++) {
      if (script_[i].first.compare(script_[i+1].first) >= 0) {
        // script[i] not < script[i+1] in lexical order...
        bool same = (script_[i].first == script_[i+1].first);
        KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
                   << (same ? " contains duplicate key: " :
                       " is not sorted (remove s, option or add ns, option):"
                       " key is ") << script_[i].first;
        state_ = kNotReadScript;
        return false;
      }
    }
    state_ = kNotHaveObject;
    key_ = "";  // make sure we don't have a key set
    return true;
  }

  virtual bool IsOpen() const {
    return  (state_ == kNotHaveObject || state_ == kHaveObject ||
             state_ == kHaveRange);
  }

  virtual bool Close() {
    if (!IsOpen())
      KALDI_ERR << "Close() called on RandomAccessTableReader that was not"
                   " open.";
    holder_.Clear();
    range_holder_.Clear();
    state_ = kUninitialized;
    last_found_ = 0;
    script_.clear();
    key_ = "";
    range_ = "";
    data_rxfilename_ = "";
    // This cannot fail because any errors of a "global" nature would have been
    // detected when we did Open().  With archives it's different.
    return true;
  }

  virtual bool HasKey(const std::string &key) {
    bool preload = opts_.permissive;
    // In permissive mode, we have to check that we can read
    // the scp entry before we assert that the key is there.
    return HasKeyInternal(key, preload);
  }


  // Write returns true on success, false on failure, but
  // some errors may not be detected till we call Close().
  virtual const T&  Value(const std::string &key) {
    if (!HasKeyInternal(key, true)) // true == preload.
      KALDI_ERR << "Could not get item for key " << key
                << ", rspecifier is " << rspecifier_ << " [to ignore this, "
                 << "add the p, (permissive) option to the rspecifier.";
    KALDI_ASSERT(key_ == key);
    if (state_ == kHaveObject) {
      return holder_.Value();
    } else {
      KALDI_ASSERT(state_ == kHaveRange);
      return range_holder_.Value();
    }
  }

  virtual ~RandomAccessTableReaderScriptImpl() { }

 private:

  // HasKeyInternal when called with preload == false just tells us whether the
  // key is in the scp.  With preload == true, which happens when the ,p
  // (permissive) option is given in the rspecifier (or when called from
  // Value()), it will also check that we can preload the object from disk
  // (loading from the rxfilename in the scp), and only return true if we can.
  // This function is called both from HasKey and from Value().
  virtual bool HasKeyInternal(const std::string &key, bool preload) {
    switch (state_) {
      case kUninitialized: case kNotReadScript:
        KALDI_ERR << "HasKey called on RandomAccessTableReader object that is"
                     " not open.";
      case kHaveObject:
        if (key == key_ && range_.empty())
          return true;
        break;
      case kHaveRange:
        if (key == key_)
          return true;
        break;
      case kNotHaveObject: default: break;
    }
    KALDI_ASSERT(IsToken(key));
    size_t key_pos = 0;
    if (!LookupKey(key, &key_pos)) {
      return false;
    } else {
      if (!preload) {
        return true;  // we have the key, and were not asked to verify that the
                      // object could be read.
      } else {  // preload specified, so we have to attempt to pre-load the
                // object before returning.
        std::string data_rxfilename, range; // We will split
        // script_[key_pos].second (e.g. "1.ark:100[0:2]" into data_rxfilename
        // (e.g. "1.ark:100") and range (if any), e.g. "0:2".
        if (script_[key_pos].second[script_[key_pos].second.size()-1] == ']') {
          if(!ExtractRangeSpecifier(script_[key_pos].second,
                                    &data_rxfilename,
                                    &range)) {
            KALDI_ERR << "TableReader: failed to parse range in '"
                      << script_[key_pos].second << "'";
          }
        } else {
          data_rxfilename = script_[key_pos].second;
        }
        if (state_ == kHaveRange) {
          if (data_rxfilename_ == data_rxfilename && range_ == range) {
            // the odd situation where two keys had the same rxfilename and range:
            // just change the key and keep the object.
            key_ = key;
            return true;
          } else {
            range_holder_.Clear();
            state_ = kHaveObject;
          }
        }
        // OK, at this point the state will be kHaveObject or kNotHaveObject.
        if (state_ == kHaveObject) {
          if (data_rxfilename_ != data_rxfilename) {
            // clear out the object.
            state_ = kNotHaveObject;
            holder_.Clear();
          }
        }
        // At this point we can safely switch to the new key, data_rxfilename
        // and range, and we know that if we have an object, it will already be
        // the correct one.  The state is now kHaveObject or kNotHaveObject.
        key_ = key;
        data_rxfilename_ = data_rxfilename;
        range_ = range;
        if (state_ == kNotHaveObject) {
          // we need to read the object.
          if (!input_.Open(data_rxfilename)) {
            KALDI_WARN << "Error opening stream "
                       << PrintableRxfilename(data_rxfilename);
            return false;
          } else {
            if (holder_.Read(input_.Stream())) {
              state_ = kHaveObject;
            } else {
              KALDI_WARN << "Error reading object from "
                  "stream " << PrintableRxfilename(data_rxfilename);
              return false;
            }
          }
        }
        // At this point the state is kHaveObject.
        if (range.empty())
          return true;  // we're done: no range was requested.
        if (range_holder_.ExtractRange(holder_, range)) {
          state_ = kHaveRange;
          return true;
        } else {
          KALDI_WARN  << "Failed to load object from "
                      << PrintableRxfilename(data_rxfilename)
                      << "[" << range << "]";
          // leave state at kHaveObject.
          return false;
        }
      }
    }
  }

  // This function attempts to look up the key "key" in the sorted array
  // script_.  If it was found it returns true and puts the array offset into
  // 'script_offset'; otherwise it returns false.
  bool LookupKey(const std::string &key, size_t *script_offset) {
    // First, an optimization: if we're going consecutively, this will
    // make the lookup very fast.  Since we may call HasKey and then
    // Value(), which both may look up the key, we test if either the
    // current or next position are correct.
    if (last_found_ < script_.size() && script_[last_found_].first == key) {
      *script_offset = last_found_;
      return true;
    }
    last_found_++;
    if (last_found_ < script_.size() && script_[last_found_].first == key) {
      *script_offset = last_found_;
      return true;
    }
    std::pair<std::string, std::string> pr(key, "");  // Important that ""
    // compares less than or equal to any string, so lower_bound points to the
    // element that has the same key.
    typedef typename std::vector<std::pair<std::string, std::string> >
                     ::const_iterator IterType;
    IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
    if (iter != script_.end() && iter->first == key) {
      last_found_ = *script_offset = iter - script_.begin();
      return true;
    } else {
      return false;
    }
  }


  Input input_;  // Use the same input_ object for reading each file, in case
                 // the scp specifies offsets in an archive so we can keep the
                 // same file open.
  RspecifierOptions opts_;
  std::string rspecifier_;  // rspecifier used to open this object; used in
                            // debug messages
  std::string script_rxfilename_;  // rxfilename of script file that we read.

  std::string key_;  // The current key of the object that we have, but see the
                     // notes regarding states_ for more explanation of the
                     // semantics.

  Holder holder_;
  Holder range_holder_; // Holds the partial object corresponding to the object
                        // range specifier 'range_'. this is only used when
                        // 'range_' is specified.
  std::string range_; // range within which we read the object from holder_.
                      // If key_ is set, always correspond to the key.
  std::string data_rxfilename_;  // the rxfilename corresponding to key_,
                                 // always set when key_ is set.


  // the script_ variable contains pairs of (key, filename), sorted using
  // std::sort.  This can be used with binary_search to look up filenames for
  // writing.  If this becomes inefficient we can use std::unordered_map (but I
  // suspect this wouldn't be significantly faster & would use more memory).
  // If memory becomes a problem here, the user should probably be passing
  // only the relevant part of the scp file rather than expecting us to get too
  // clever in the code.
  std::vector<std::pair<std::string, std::string> > script_;
  size_t last_found_;  // This is for an optimization used in FindFilename.

  enum {
    //                   (*) is script_ set up?
    //                          (*) does holder_ contain an object?
    //                               (*) does range_holder_ contain and object?
    //
    //
    kUninitialized,  //    no    no    no
    kNotReadScript,  //    no    no    no
    kNotHaveObject,  //    yes   no    no
    kHaveObject,     //    yes   yes   no
    kHaveRange,      //    yes   yes   yes

    // If we are in a state where holder_ contains an object, it always contains
    // the object from 'key_', and the corresponding rxfilename is always
    // 'data_rxfilename_'.  If range_holder_ contains an object, it always
    // corresponds to the range 'range_' of the object in 'holder_', and always
    // corresponds to the current key.
  } state_;
};


// This is the base-class (with some implemented functions) for the
// implementations of RandomAccessTableReader when it's an archive.  This
// base-class handles opening the files, storing the state of the reading
// process, and loading objects.  This is the only case in which we have
// an intermediate class in the hierarchy between the virtual ImplBase
// class and the actual Impl classes.
// The child classes vary in the assumptions regarding sorting, etc.

template<class Holder>
class RandomAccessTableReaderArchiveImplBase:
      public RandomAccessTableReaderImplBase<Holder> {
 public:
  typedef typename Holder::T T;

  RandomAccessTableReaderArchiveImplBase(): holder_(NULL),
                                            state_(kUninitialized) { }

  virtual bool Open(const std::string &rspecifier) {
    if (state_ != kUninitialized) {
      if (!this->Close())  // call Close() yourself to suppress this exception.
        KALDI_ERR << "Error closing previous input.";
    }
    rspecifier_ = rspecifier;
    RspecifierType rs = ClassifyRspecifier(rspecifier, &archive_rxfilename_,
                                           &opts_);
    KALDI_ASSERT(rs == kArchiveRspecifier);

    // NULL means don't expect binary-mode header
    bool ans;
    if (Holder::IsReadInBinary())
      ans = input_.Open(archive_rxfilename_, NULL);
    else
      ans = input_.OpenTextMode(archive_rxfilename_);
    if (!ans) {  // header.
      KALDI_WARN << "Failed to open stream "
                 << PrintableRxfilename(archive_rxfilename_);
      state_ = kUninitialized;  // Failure on Open
      return false;  // User should print the error message.
    } else {
      state_ = kNoObject;
    }
    return true;
  }

  // ReadNextObject() requires that the state be kNoObject,
  // and it will try read the next object.  If it succeeds,
  // it sets the state to kHaveObject, and
  // cur_key_ and holder_ have the key and value.  If it fails,
  // it sets the state to kError or kEof.
  void ReadNextObject() {
    if (state_ != kNoObject)
      KALDI_ERR << "ReadNextObject() called from wrong state.";
    // Code error somewhere in this class or a child class.
    std::istream &is = input_.Stream();
    is.clear();  // Clear any fail bits that may have been set... just in case
    // this happened in the Read function.
    is >> cur_key_;  // This eats up any leading whitespace and gets the string.
    if (is.eof()) {
      state_ = kEof;
      return;
    }
    if (is.fail()) {  // This shouldn't really happen, barring file-system
                      // errors.
      KALDI_WARN << "Error reading archive: rspecifier is " << rspecifier_;
      state_ = kError;
      return;
    }
    int c;
    if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') {  // We expect a
      // space ' ' after the key.
      // We also allow tab, just so we can read archives generated by scripts
      // that may not be fully aware of how this format works.
      KALDI_WARN << "Invalid archive file format: expected space after key "
                 <<cur_key_
                 <<", got character "
                 << CharToString(static_cast<char>(is.peek()))
                 << ", reading archive "
                 << PrintableRxfilename(archive_rxfilename_);
      state_ = kError;
      return;
    }
    if (c != '\n') is.get();  // Consume the space or tab.
    holder_ = new Holder;
    if (holder_->Read(is)) {
      state_ = kHaveObject;
      return;
    } else {
      KALDI_WARN << "Object read failed, reading archive "
                 << PrintableRxfilename(archive_rxfilename_);
      state_ = kError;
      delete holder_;
      holder_ = NULL;
      return;
    }
  }

  virtual bool IsOpen() const {
    switch (state_) {
      case kEof: case kError: case kHaveObject: case kNoObject: return true;
      case kUninitialized: return false;
      default: KALDI_ERR << "IsOpen() called on invalid object.";
        return false;
    }
  }

  // Called by the child-class virutal Close() functions; does the
  // shared parts of the cleanup.
  bool CloseInternal() {
    if (!this->IsOpen())
      KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly.";
    if (input_.IsOpen())
      input_.Close();
    if (state_ == kHaveObject) {
      KALDI_ASSERT(holder_ != NULL);
      delete holder_;
      holder_ = NULL;
    } else {
      KALDI_ASSERT(holder_ == NULL);
    }
    bool ans = (state_ != kError);
    state_ = kUninitialized;
    if (!ans && opts_.permissive) {
      KALDI_WARN << "Error state detected closing reader.  "
                 << "Ignoring it because you specified permissive mode.";
      return true;
    }
    return ans;
  }

  ~RandomAccessTableReaderArchiveImplBase() {
    // The child class has the responsibility to call CloseInternal().
    KALDI_ASSERT(state_ == kUninitialized && holder_ == NULL);
  }
 private:
  Input input_;       // Input object for the archive
 protected:
  // The variables below are accessed by child classes.

  std::string cur_key_;   // current key (if state == kHaveObject).
  Holder *holder_;    // Holds the object we just read (if state == kHaveObject)

  std::string rspecifier_;
  std::string archive_rxfilename_;
  RspecifierOptions opts_;

  enum {  //  [The state of the reading process]        [does holder_ [is input_
    //                                                      have object]   open]
    kUninitialized,  // Uninitialized or closed                   no         no
    kNoObject,      // Do not have object in holder_              no         yes
    kHaveObject,    // Have object in holder_                     yes        yes
    kEof,           // End of file                                no         yes
    kError,         // Some kind of error-state in the reading.   no         yes
  } state_;
};


// RandomAccessTableReaderDSortedArchiveImpl (DSorted for "doubly sorted") is
// the implementation for random-access reading of archives when both the
// archive, and the calling code, are in sorted order (i.e. we ask for the keys
// in sorted order).  This is when the s and cs options are both given.  It only
// ever has to keep one object in memory.  It inherits from
// RandomAccessTableReaderArchiveImplBase which implements the common parts of
// RandomAccessTableReader that are used when it's an archive we're reading from

template<class Holder>
class RandomAccessTableReaderDSortedArchiveImpl:
      public RandomAccessTableReaderArchiveImplBase<Holder> {
  using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
  using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;
 public:
  typedef typename Holder::T T;

  RandomAccessTableReaderDSortedArchiveImpl() { }

  virtual bool Close() {
    // We don't have anything additional to clean up, so just
    // call generic base-class one.
    return this->CloseInternal();
  }

  virtual bool HasKey(const std::string &key) {
    return FindKeyInternal(key);
  }
  virtual const T & Value(const std::string &key) {
    if (!FindKeyInternal(key)) {
      KALDI_ERR << "Value() called but no such key " << key
                << " in archive " << PrintableRxfilename(archive_rxfilename_);
    }
    KALDI_ASSERT(this->state_ == kHaveObject && key == this->cur_key_
                 && holder_ != NULL);
    return this->holder_->Value();
  }

  virtual ~RandomAccessTableReaderDSortedArchiveImpl() {
    if (this->IsOpen())
      if (!Close())  // more specific warning will already have been printed.
        // we are in some kind of error state & user did not find out by
        // calling Close().
        KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
                  << rspecifier_;
  }
 private:
  // FindKeyInternal tries to find the key by calling "ReadNextObject()"
  // as many times as necessary till we get to it.  It is called from
  // both FindKey and Value().
  bool FindKeyInternal(const std::string &key) {
    // First check that the user is calling us right: should be
    // in sorted order.  If not, error.
    if (!last_requested_key_.empty()) {
      if (key.compare(last_requested_key_) < 0) {  // key < last_requested_key_
        KALDI_ERR << "You provided the \"cs\" option "
                  << "but are not calling with keys in sorted order: "
                  << key << " < " << last_requested_key_ << ": rspecifier is "
                  << rspecifier_;
      }
    }
    // last_requested_key_ is just for debugging of order of calling.
    last_requested_key_ = key;

    if (state_ == kNoObject)
      ReadNextObject();  // This can only happen
      // once, the first time someone calls HasKey() or Value().  We don't
      // do it in the initializer to stop the program hanging too soon,
      // if reading from a pipe.

    if (state_ == kEof || state_ == kError) return false;

    if (state_ == kUninitialized)
      KALDI_ERR << "Trying to access a RandomAccessTableReader object that is"
                   " not open.";

    std::string last_key_;  // To check that
    // the archive we're reading is in sorted order.
    while (1) {
      KALDI_ASSERT(state_ == kHaveObject);
      int compare = key.compare(cur_key_);
      if (compare == 0) {  // key == key_
        return true;  // we got it..
      } else if (compare < 0) {  // key < cur_key_, so we already read past the
        // place where we want to be.  This implies that we will never find it
        // [due to the sorting etc., this means it just isn't in the archive].
        return false;
      } else {  // compare > 0, key > cur_key_.  We need to read further ahead.
        last_key_ = cur_key_;
        // read next object.. we have to set state to kNoObject first.
        KALDI_ASSERT(holder_ != NULL);
        delete holder_;
        holder_ = NULL;
        state_ = kNoObject;
        ReadNextObject();
        if (state_ != kHaveObject)
          return false;  // eof or read error.
        if (cur_key_.compare(last_key_) <= 0) {
          KALDI_ERR << "You provided the \"s\" option "
                    << " (sorted order), but keys are out of order or"
                       " duplicated: "
                    << last_key_ << " is followed by " << cur_key_
                    << ": rspecifier is " << rspecifier_;
        }
      }
    }
  }

  /// Last string provided to HasKey() or Value();
  std::string last_requested_key_;
};

// RandomAccessTableReaderSortedArchiveImpl is for random-access reading of
// archives when the user specified the sorted (s) option but not the
// called-sorted (cs) options.
template<class Holder>
class RandomAccessTableReaderSortedArchiveImpl:
      public RandomAccessTableReaderArchiveImplBase<Holder> {
  using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
  using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;

 public:
  typedef typename Holder::T T;

  RandomAccessTableReaderSortedArchiveImpl():
      last_found_index_(static_cast<size_t>(-1)),
      pending_delete_(static_cast<size_t>(-1)) { }

  virtual bool Close() {
    for (size_t i = 0; i < seen_pairs_.size(); i++)
      delete seen_pairs_[i].second;
    seen_pairs_.clear();

    pending_delete_ = static_cast<size_t>(-1);
    last_found_index_ = static_cast<size_t>(-1);

    return this->CloseInternal();
  }
  virtual bool HasKey(const std::string &key) {
    HandlePendingDelete();
    size_t index;
    bool ans = FindKeyInternal(key, &index);
    if (ans && opts_.once && seen_pairs_[index].second == NULL) {
      // Just do a check RE the once option. "&&opts_.once" is for
      // efficiency since this can only happen in that case.
      KALDI_ERR << "Error: HasKey called after Value() already called for "
                << " that key, and once (o) option specified: rspecifier is "
                << rspecifier_;
    }
    return ans;
  }
  virtual const T & Value(const std::string &key) {
    HandlePendingDelete();
    size_t index;
    if (!FindKeyInternal(key, &index)) {
      KALDI_ERR << "Value() called but no such key " << key
                << " in archive " << PrintableRxfilename(archive_rxfilename_);
    }
    if (seen_pairs_[index].second == NULL) {  // can happen if opts.once_
      KALDI_ERR << "Error: Value() called more than once for key "
                << key << " and once (o) option specified: rspecifier is "
                << rspecifier_;
    }
    if (opts_.once)
      pending_delete_ = index;  // mark this index to be deleted on next call.
    return seen_pairs_[index].second->Value();
  }
  virtual ~RandomAccessTableReaderSortedArchiveImpl() {
    if (this->IsOpen())
      if (!Close())  // more specific warning will already have been printed.
        // we are in some kind of error state & user did not find out by
        // calling Close().
        KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
                  << rspecifier_;
  }
 private:
  void HandlePendingDelete() {
    const size_t npos = static_cast<size_t>(-1);
    if (pending_delete_ != npos) {
      KALDI_ASSERT(pending_delete_ < seen_pairs_.size());
      KALDI_ASSERT(seen_pairs_[pending_delete_].second != NULL);
      delete seen_pairs_[pending_delete_].second;
      seen_pairs_[pending_delete_].second = NULL;
      pending_delete_ = npos;
    }
  }

  // FindKeyInternal tries to find the key in the array "seen_pairs_".
  // If it is not already there, it reads ahead as far as necessary
  // to determine whether we have the key or not.  On success it returns
  // true and puts the index into the array seen_pairs_, into "index";
  // on failure it returns false.
  // It will leave the state as either kNoObject, kEof or kError.
  // FindKeyInternal does not do any checking about whether you are asking
  // about a key that has been already given (with the "once" option).
  // That is the user's responsibility.

  bool FindKeyInternal(const std::string &key, size_t *index) {
    // First, an optimization in case the previous call was for the
    // same key, and we found it.
    if (last_found_index_ < seen_pairs_.size()
       && seen_pairs_[last_found_index_].first == key) {
      *index = last_found_index_;
      return true;
    }

    if (state_ == kUninitialized)
      KALDI_ERR << "Trying to access a RandomAccessTableReader object that is"
                   " not open.";

    // Step one is to see whether we have to read ahead for the object..
    // Note, the possible states right now are kNoObject, kEof or kError.
    // We are never in the state kHaveObject except just after calling
    // ReadNextObject().
    bool looped = false;
    while (state_ == kNoObject &&
          (seen_pairs_.empty() || key.compare(seen_pairs_.back().first) > 0)) {
      looped = true;
      // Read this as:
      //  while ( the stream is potentially good for reading &&
      //        ([got no keys] || key > most_recent_key) ) { ...
      //     Try to read a new object.
      // Note that the keys in seen_pairs_ are ordered from least to greatest.
      ReadNextObject();
      if (state_ == kHaveObject) {  // Successfully read object.
        if (!seen_pairs_.empty() &&  // This is just a check.
           cur_key_.compare(seen_pairs_.back().first) <= 0) {
          // read the expression above as: !( cur_key_ > previous_key).
          // it means we are not in sorted order [the user specified that we
          // are, or we would not be using this implementation].
          KALDI_ERR << "You provided the sorted (s) option but keys in archive "
                    << PrintableRxfilename(archive_rxfilename_) << " are not "
                    << "in sorted order: " << seen_pairs_.back().first
                    << " is followed by " << cur_key_;
        }
        KALDI_ASSERT(holder_ != NULL);
        seen_pairs_.push_back(std::make_pair(cur_key_, holder_));
        holder_ = NULL;
        state_ = kNoObject;
      }
    }
    if (looped) {  // We only need to check the last element of the seen_pairs_
      // array, since we would not have read more after getting "key".
      if (!seen_pairs_.empty() && seen_pairs_.back().first == key) {
        last_found_index_ = *index = seen_pairs_.size() - 1;
        return true;
      } else {
        return false;
      }
    }
    // Now we have do an actual binary search in the seen_pairs_ array.
    std::pair<std::string, Holder*> pr(key, static_cast<Holder*>(NULL));
    typename std::vector<std::pair<std::string, Holder*> >::iterator
        iter = std::lower_bound(seen_pairs_.begin(), seen_pairs_.end(),
                                pr, PairCompare());
    if (iter != seen_pairs_.end() &&
       key == iter->first) {
      last_found_index_ = *index = (iter - seen_pairs_.begin());
      return true;
    } else {
      return false;
    }
  }

  // These are the pairs of (key, object) we have read.  We keep all the keys we
  // have read but the actual objects (if they are stored with pointers inside
  // the Holder object) may be deallocated if once == true, and the Holder
  // pointer set to NULL.
  std::vector<std::pair<std::string, Holder*> > seen_pairs_;
  size_t last_found_index_;  // An optimization s.t. if FindKeyInternal called
  // twice with same key (as it often will), it doesn't have to do the key
  // search twice.
  size_t pending_delete_;  // If opts_.once == true, this is the index of
  // element of seen_pairs_ that is pending deletion.
  struct PairCompare {
    // PairCompare is the Less-than operator for the pairs of(key, Holder).
    // compares the keys.
    inline bool operator() (const std::pair<std::string, Holder*> &pr1,
                            const std::pair<std::string, Holder*> &pr2) {
      return  (pr1.first.compare(pr2.first) < 0);
    }
  };
};


// RandomAccessTableReaderUnsortedArchiveImpl is for random-access reading of
// archives when the user does not specify the sorted (s) option (in this case
// the called-sorted, or "cs" option, is ignored).  This is the least efficient
// of the random access archive readers, in general, but it can be as efficient
// as the others, in speed, memory and latency, if the "once" option is
// specified and it happens that the keys of the archive are the same as the
// keys the code is called with (to HasKey() and Value()), and in the same
// order.  However, if you ask it for a key that's not present it will have to
// read the archive till the end and store it all in memory.

template<class Holder>
class RandomAccessTableReaderUnsortedArchiveImpl:
      public RandomAccessTableReaderArchiveImplBase<Holder> {
  using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
  using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
  using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
  using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;

  typedef typename Holder::T T;

 public:
  RandomAccessTableReaderUnsortedArchiveImpl(): to_delete_iter_(map_.end()),
                                                to_delete_iter_valid_(false) {
    map_.max_load_factor(0.5);  // make it quite empty -> quite efficient.
    // default seems to be 1.
  }

  virtual bool Close() {
    for (typename MapType::iterator iter = map_.begin();
        iter != map_.end();
        ++iter) {
      delete iter->second;
    }
    map_.clear();
    first_deleted_string_ = "";
    to_delete_iter_valid_ = false;
    return this->CloseInternal();
  }

  virtual bool HasKey(const std::string &key) {
    HandlePendingDelete();
    return FindKeyInternal(key, NULL);
  }
  virtual const T & Value(const std::string &key) {
    HandlePendingDelete();
    const T *ans_ptr = NULL;
    if (!FindKeyInternal(key, &ans_ptr))
      KALDI_ERR << "Value() called but no such key " << key
                << " in archive " << PrintableRxfilename(archive_rxfilename_);
    return *ans_ptr;
  }
  virtual ~RandomAccessTableReaderUnsortedArchiveImpl() {
    if (this->IsOpen())
      if (!Close())  // more specific warning will already have been printed.
        // we are in some kind of error state & user did not find out by
        // calling Close().
        KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
                  << rspecifier_;
  }
 private:
  void HandlePendingDelete() {
    if (to_delete_iter_valid_) {
      to_delete_iter_valid_ = false;
      delete to_delete_iter_->second;  // Delete Holder object.
      if (first_deleted_string_.length() == 0)
        first_deleted_string_ = to_delete_iter_->first;
      map_.erase(to_delete_iter_);  // delete that element.
    }
  }

  // FindKeyInternal tries to find the key in the map "map_"
  // If it is not already there, it reads ahead either until it finds the
  // key, or until end of file.  If called with value_ptr == NULL,
  // it assumes it's called from HasKey() and just returns true or false
  // and doesn't otherwise have side effects.  If called with value_ptr !=
  // NULL, it assumes it's called from Value().  Thus, it will crash
  // if it cannot find the key.  If it can find it it puts its address in
  // *value_ptr, and if opts_once == true it will mark that element of the
  // map to be deleted.

  bool FindKeyInternal(const std::string &key, const T **value_ptr = NULL) {
    typename MapType::iterator iter = map_.find(key);
    if (iter != map_.end()) {  // Found in the map...
      if (value_ptr == NULL) {  // called from HasKey
        return true;  // this is all we have to do.
      } else {
        *value_ptr = &(iter->second->Value());
        if (opts_.once) {  // value won't be needed again, so mark
          // for deletion.
          to_delete_iter_ = iter;  // pending delete.
          KALDI_ASSERT(!to_delete_iter_valid_);
          to_delete_iter_valid_ = true;
        }
        return true;
      }
    }
    while (state_ == kNoObject) {
      ReadNextObject();
      if (state_ == kHaveObject) {  // Successfully read object.
        state_ = kNoObject;  // we are about to transfer ownership
        // of the object in holder_ to map_.
        // Insert it into map_.
        std::pair<typename MapType::iterator, bool> pr =
            map_.insert(typename MapType::value_type(cur_key_, holder_));

        if (!pr.second) {  // Was not inserted-- previous element w/ same key
          delete holder_;  // map was not changed, no ownership transferred.
          holder_ = NULL;
          KALDI_ERR << "Error in RandomAccessTableReader: duplicate key "
                    << cur_key_ << " in archive " << archive_rxfilename_;
        }
        holder_ = NULL;  // ownership transferred to map_.
        if (cur_key_ == key) {  // the one we wanted..
          if (value_ptr == NULL) {  // called from HasKey
            return true;
          } else {  // called from Value()
            *value_ptr = &(pr.first->second->Value());  // this gives us the
            // Value() from the Holder in the map.
            if (opts_.once) {  // mark for deletion, as won't be needed again.
              to_delete_iter_ = pr.first;
              KALDI_ASSERT(!to_delete_iter_valid_);
              to_delete_iter_valid_ = true;
            }
            return true;
          }
        }
      }
    }
    if (opts_.once && key == first_deleted_string_) {
      KALDI_ERR << "You specified the once (o) option but "
                << "you are calling using key " << key
                << " more than once: rspecifier is " << rspecifier_;
    }
    return false;  // We read the entire archive (or got to error state) and
    // didn't find it.
  }

  typedef unordered_map<std::string, Holder*, StringHasher>  MapType;
  MapType map_;

  typename MapType::iterator to_delete_iter_;
  bool to_delete_iter_valid_;

  std::string first_deleted_string_;  // keep the first string we deleted
  // from map_ (if opts_.once == true).  It's for an inexact spot-check that the
  // "once" option isn't being used incorrectly.
};


template<class Holder>
RandomAccessTableReader<Holder>::RandomAccessTableReader(const
                                                       std::string &rspecifier):
    impl_(NULL) {
  if (rspecifier != "" && !Open(rspecifier))
    KALDI_ERR << "Error opening RandomAccessTableReader object "
        " (rspecifier is: " << rspecifier << ")";
}

template<class Holder>
bool RandomAccessTableReader<Holder>::Open(const std::string &rspecifier) {
  if (IsOpen())
    KALDI_ERR << "Already open.";
  RspecifierOptions opts;
  RspecifierType rs = ClassifyRspecifier(rspecifier, NULL, &opts);
  switch (rs) {
    case kScriptRspecifier:
      impl_ = new RandomAccessTableReaderScriptImpl<Holder>();
      break;
    case kArchiveRspecifier:
      if (opts.sorted) {
        if (opts.called_sorted)  // "doubly" sorted case.
          impl_ = new RandomAccessTableReaderDSortedArchiveImpl<Holder>();
        else
          impl_ = new RandomAccessTableReaderSortedArchiveImpl<Holder>();
      } else {
        impl_ = new RandomAccessTableReaderUnsortedArchiveImpl<Holder>();
      }
      break;
    case kNoRspecifier: default:
      KALDI_WARN << "Invalid rspecifier: "
                 << rspecifier;
      return false;
  }
  if (!impl_->Open(rspecifier)) {
    // A warning will already have been printed.
    delete impl_;
    impl_ = NULL;
    return false;
  }
  return true;
}

template<class Holder>
bool RandomAccessTableReader<Holder>::HasKey(const std::string &key) {
  CheckImpl();
  if (!IsToken(key))
    KALDI_ERR << "Invalid key \"" << key << '"';
  return impl_->HasKey(key);
}


template<class Holder>
const typename RandomAccessTableReader<Holder>::T&
RandomAccessTableReader<Holder>::Value(const std::string &key) {
  CheckImpl();
  return impl_->Value(key);
}

template<class Holder>
bool RandomAccessTableReader<Holder>::Close() {
  CheckImpl();
  bool ans =impl_->Close();
  delete impl_;
  impl_ = NULL;
  return ans;
}

template<class Holder>
RandomAccessTableReader<Holder>::~RandomAccessTableReader() {
  if (IsOpen() && !Close())  // call Close() yourself to stop this being thrown.
    KALDI_ERR << "failure detected in destructor.";
}

template<class Holder>
void SequentialTableReader<Holder>::CheckImpl() const {
  if (!impl_) {
    KALDI_ERR << "Trying to use empty SequentialTableReader (perhaps you "
              << "passed the empty string as an argument to a program?)";
  }
}

template<class Holder>
void RandomAccessTableReader<Holder>::CheckImpl() const {
  if (!impl_) {
    KALDI_ERR << "Trying to use empty RandomAccessTableReader (perhaps you "
              << "passed the empty string as an argument to a program?)";
  }
}

template<class Holder>
void TableWriter<Holder>::CheckImpl() const {
  if (!impl_) {
    KALDI_ERR << "Trying to use empty TableWriter (perhaps you "
              << "passed the empty string as an argument to a program?)";
  }
}

template<class Holder>
RandomAccessTableReaderMapped<Holder>::RandomAccessTableReaderMapped(
    const std::string &table_rxfilename,
    const std::string &utt2spk_rxfilename):
    reader_(table_rxfilename), token_reader_(table_rxfilename.empty() ? "" :
                                             utt2spk_rxfilename),
    utt2spk_rxfilename_(utt2spk_rxfilename) { }

template<class Holder>
bool RandomAccessTableReaderMapped<Holder>::Open(
    const std::string &table_rxfilename,
    const std::string &utt2spk_rxfilename) {
  if (reader_.IsOpen()) reader_.Close();
  if (token_reader_.IsOpen()) token_reader_.Close();
  KALDI_ASSERT(!table_rxfilename.empty());
  if (!reader_.Open(table_rxfilename)) return false;  // will have printed
  // warning internally, probably.
  if (!utt2spk_rxfilename.empty()) {
    if (!token_reader_.Open(utt2spk_rxfilename)) {
      reader_.Close();
      return false;
    }
  }
  return true;
}


template<class Holder>
bool RandomAccessTableReaderMapped<Holder>::HasKey(const std::string &utt) {
  // We don't check IsOpen, we let the call go through to the member variable
  // (reader_), which will crash with a more informative error message than
  // we can give here, as we don't any longer know the rxfilename.
  if (token_reader_.IsOpen()) {  // We need to map the key from utt to spk.
    if (!token_reader_.HasKey(utt))
      KALDI_ERR << "Attempting to read key " << utt << ", which is not present "
                << "in utt2spk map or similar map being read from "
                << PrintableRxfilename(utt2spk_rxfilename_);
    const std::string &spk = token_reader_.Value(utt);
    return reader_.HasKey(spk);
  } else {
    return reader_.HasKey(utt);
  }
}

template<class Holder>
const typename Holder::T& RandomAccessTableReaderMapped<Holder>::Value(
    const std::string &utt) {
  if (token_reader_.IsOpen()) {  // We need to map the key from utt to spk.
    if (!token_reader_.HasKey(utt))
      KALDI_ERR << "Attempting to read key " << utt << ", which is not present "
                << "in utt2spk map or similar map being read from "
                << PrintableRxfilename(utt2spk_rxfilename_);
    const std::string &spk = token_reader_.Value(utt);
    return reader_.Value(spk);
  } else {
    return reader_.Value(utt);
  }
}


/// @}

}  // end namespace kaldi


#endif  // KALDI_UTIL_KALDI_TABLE_INL_H_


================================================
FILE: runtime/engine/kaldi/util/kaldi-table.cc
================================================
// util/kaldi-table.cc

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "util/kaldi-table.h"
#include "util/text-utils.h"

namespace kaldi {


bool ReadScriptFile(const std::string &rxfilename,
                    bool warn,
                    std::vector<std::pair<std::string, std::string> >
                    *script_out) {
  bool is_binary;
  Input input;

  if (!input.Open(rxfilename, &is_binary)) {
    if (warn) KALDI_WARN << "Error opening script file: " <<
                 PrintableRxfilename(rxfilename);
    return false;
  }
  if (is_binary) {
    if (warn) KALDI_WARN << "Error: script file appears to be binary: " <<
                 PrintableRxfilename(rxfilename);
    return false;
  }

  bool ans = ReadScriptFile(input.Stream(), warn, script_out);
  if (warn && !ans)
    KALDI_WARN << "[script file was: " << PrintableRxfilename(rxfilename) <<
                  "]";
  return ans;
}

bool ReadScriptFile(std::istream &is,
                    bool warn,
                    std::vector<std::pair<std::string, std::string> >
                    *script_out) {
  KALDI_ASSERT(script_out != NULL);
  std::string line;
  int line_number = 0;
  while (getline(is, line)) {
    line_number++;
    const char *c = line.c_str();
    if (*c == '\0') {
      if (warn)
        KALDI_WARN << "Empty " << line_number << "'th line in script file";
      return false;  // Empty line so invalid scp file format..
    }

    std::string key, rest;
    SplitStringOnFirstSpace(line, &key, &rest);

    if (key.empty() || rest.empty()) {
      if (warn)
        KALDI_WARN << "Invalid " << line_number << "'th line in script file"
                          <<":\"" << line << '"';
      return false;
    }
    script_out->resize(script_out->size()+1);
    script_out->back().first = key;
    script_out->back().second = rest;
  }
  return true;
}

bool WriteScriptFile(std::ostream &os,
                     const std::vector<std::pair<std::string, std::string> >
                     &script) {
  if (!os.good()) {
    KALDI_WARN << "WriteScriptFile: attempting to write to invalid stream.";
    return false;
  }
  std::vector<std::pair<std::string, std::string> >::const_iterator iter;
  for (iter = script.begin(); iter != script.end(); ++iter) {
    if (!IsToken(iter->first)) {
      KALDI_WARN << "WriteScriptFile: using invalid token \"" << iter->first <<
                    '"';
      return false;
    }
    if (iter->second.find('\n') != std::string::npos ||
       (iter->second.length() != 0 &&
        (isspace(iter->second[0]) ||
         isspace(iter->second[iter->second.length()-1])))) {
      // second part contains newline or leading or trailing space.
      KALDI_WARN << "WriteScriptFile: attempting to write invalid line \"" <<
                    iter->second << '"';
      return false;
    }
    os << iter->first << ' ' << iter->second << '\n';
  }
  if (!os.good()) {
    KALDI_WARN << "WriteScriptFile: stream in error state.";
    return false;
  }
  return true;
}

bool WriteScriptFile(const std::string &wxfilename,
                     const std::vector<std::pair<std::string, std::string> >
                     &script) {
  Output output;
  if (!output.Open(wxfilename, false, false)) {  // false, false means not
    // binary, no binary-mode header.
    KALDI_ERR << "Error opening output stream for script file: "
              << PrintableWxfilename(wxfilename);
    return false;
  }
  if (!WriteScriptFile(output.Stream(), script)) {
    KALDI_ERR << "Error writing script file to stream "
              << PrintableWxfilename(wxfilename);
    return false;
  }
  return true;
}


WspecifierType ClassifyWspecifier(const std::string &wspecifier,
                                  std::string *archive_wxfilename,
                                  std::string *script_wxfilename,
                                  WspecifierOptions *opts) {
  //  Examples:
  //  ark,t:wxfilename -> kArchiveWspecifier
  //  ark,b:wxfilename -> kArchiveWspecifier
  //  scp,t:rxfilename -> kScriptWspecifier
  //  scp,t:rxfilename -> kScriptWspecifier
  //  ark,scp,t:filename, wxfilename -> kBothWspecifier
  //  ark,scp:filename, wxfilename ->  kBothWspecifier
  //  Note we can include the flush option (f) or no-flush (nf)
  // anywhere: e.g.
  //  ark,scp,f:filename, wxfilename ->  kBothWspecifier
  // or:
  //  scp,t,nf:rxfilename -> kScriptWspecifier

  if (archive_wxfilename) archive_wxfilename->clear();
  if (script_wxfilename) script_wxfilename->clear();

  size_t pos = wspecifier.find(':');
  if (pos == std::string::npos) return kNoWspecifier;
  if (isspace(*(wspecifier.rbegin()))) return kNoWspecifier;  // Trailing space
  // disallowed.

  std::string before_colon(wspecifier, 0, pos), after_colon(wspecifier, pos+1);

  std::vector<std::string> split_first_part;  // Split part before ':' on ', '.
  SplitStringToVector(before_colon, ", ", false, &split_first_part);  // false==
  // don't omit empty strings between commas.

  WspecifierType ws = kNoWspecifier;

  if (opts != NULL)
    *opts = WspecifierOptions();  // Make sure all the defaults are as in the
                                  // default constructor of the options class.

  for (size_t i = 0; i < split_first_part.size(); i++) {
    const std::string &str = split_first_part[i];  // e.g. "b", "t", "f", "ark",
    // "scp".
    const char *c = str.c_str();
    if (!strcmp(c, "b")) {
      if (opts) opts->binary = true;
    } else if (!strcmp(c, "f")) {
      if (opts) opts->flush = true;
    } else if (!strcmp(c, "nf")) {
      if (opts) opts->flush = false;
    } else if (!strcmp(c, "t")) {
      if (opts) opts->binary = false;
    } else if (!strcmp(c, "p")) {
      if (opts) opts->permissive = true;
    } else if (!strcmp(c, "ark")) {
      if (ws == kNoWspecifier) ws = kArchiveWspecifier;
      else
        return kNoWspecifier;  // We do not allow "scp, ark", only "ark,
      // scp".
    } else if (!strcmp(c, "scp")) {
      if (ws == kNoWspecifier) ws = kScriptWspecifier;
      else if (ws == kArchiveWspecifier) ws = kBothWspecifier;
      else
        return kNoWspecifier;  // repeated "scp" option: invalid.
    } else {
      return kNoWspecifier;  // Could not interpret this option.
    }
  }

  switch (ws) {
    case kArchiveWspecifier:
      if (archive_wxfilename)
        *archive_wxfilename = after_colon;
      break;
    case kScriptWspecifier:
      if (script_wxfilename)
        *script_wxfilename = after_colon;
      break;
    case kBothWspecifier:
      pos = after_colon.find(',');  // first comma.
      if (pos == std::string::npos) return kNoWspecifier;
      if (archive_wxfilename)
        *archive_wxfilename = std::string(after_colon, 0, pos);
      if (script_wxfilename)
        *script_wxfilename = std::string(after_colon, pos+1);
      break;
    case kNoWspecifier: default: break;
  }
  return ws;
}


RspecifierType ClassifyRspecifier(const std::string &rspecifier,
                                  std::string *rxfilename,
                                  RspecifierOptions *opts) {
  // Examples
  // ark:rxfilename  ->  kArchiveRspecifier
  // scp:rxfilename  -> kScriptRspecifier
  //
  // We also allow the meaningless prefixes b, and t,
  // plus the options o (once), no (not-once),
  // s (sorted) and ns (not-sorted), p (permissive)
  // and np (not-permissive).
  // so the following would be valid:
  //
  // f, o, b, np, ark:rxfilename  ->  kArchiveRspecifier
  //
  // Examples:
  //
  // b, ark:rxfilename  ->  kArchiveRspecifier
  // t, ark:rxfilename  ->  kArchiveRspecifier
  // b, scp:rxfilename  -> kScriptRspecifier
  // t, no, s, scp:rxfilename  -> kScriptRspecifier
  // t, ns, scp:rxfilename  -> kScriptRspecifier

  // Improperly formed Rspecifiers will be classified as kNoRspecifier.

  if (rxfilename) rxfilename->clear();

  if (opts != NULL)
    *opts = RspecifierOptions();  // Make sure all the defaults are as in the
                                  // default constructor of the options class.

  size_t pos = rspecifier.find(':');
  if (pos == std::string::npos) return kNoRspecifier;

  if (isspace(*(rspecifier.rbegin()))) return kNoRspecifier;  // Trailing space
  // disallowed.

  std::string before_colon(rspecifier, 0, pos),
      after_colon(rspecifier, pos+1);

  std::vector<std::string> split_first_part;  // Split part before ':' on ', '.
  SplitStringToVector(before_colon, ", ", false, &split_first_part);  // false==
  // don't omit empty strings between commas.

  RspecifierType rs = kNoRspecifier;

  for (size_t i = 0; i < split_first_part.size(); i++) {
    const std::string &str = split_first_part[i];  // e.g. "b", "t", "f", "ark",
    // "scp".
    const char *c = str.c_str();
    if (!strcmp(c, "b"));  // Ignore this option.  It's so we can use the same
    // specifiers for rspecifiers and wspecifiers.
    else if (!strcmp(c, "t"));  // Ignore this option too.
    else if (!strcmp(c, "o")) {
      if (opts) opts->once = true;
    } else if (!strcmp(c, "no")) {
      if (opts) opts->once = false;
    } else if (!strcmp(c, "p")) {
      if (opts) opts->permissive = true;
    } else if (!strcmp(c, "np")) {
      if (opts) opts->permissive = false;
    } else if (!strcmp(c, "s")) {
      if (opts) opts->sorted = true;
    } else if (!strcmp(c, "ns")) {
      if (opts) opts->sorted = false;
    } else if (!strcmp(c, "cs")) {
      if (opts) opts->called_sorted = true;
    } else if (!strcmp(c, "ncs")) {
      if (opts) opts->called_sorted = false;
    } else if (!strcmp(c, "bg")) {
      if (opts) opts->background = true;
    } else if (!strcmp(c, "ark")) {
      if (rs == kNoRspecifier) rs = kArchiveRspecifier;
      else
        return kNoRspecifier;  // Repeated or combined ark and scp options
      // invalid.
    } else if (!strcmp(c, "scp")) {
      if (rs == kNoRspecifier) rs = kScriptRspecifier;
      else
        return kNoRspecifier;  // Repeated or combined ark and scp options
      // invalid.
    } else {
      return kNoRspecifier;  // Could not interpret this option.
    }
  }
  if ((rs == kArchiveRspecifier || rs == kScriptRspecifier)
     && rxfilename != NULL)
    *rxfilename = after_colon;
  return rs;
}


}  // end namespace kaldi


================================================
FILE: runtime/engine/kaldi/util/kaldi-table.h
================================================
// util/kaldi-table.h

// Copyright 2009-2011    Microsoft Corporation
//                2013    Johns Hopkins University (author: Daniel Povey)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_UTIL_KALDI_TABLE_H_
#define KALDI_UTIL_KALDI_TABLE_H_

#include <string>
#include <vector>
#include <utility>

#include "base/kaldi-common.h"
#include "util/kaldi-holder.h"

namespace kaldi {

// Forward declarations
template<class Holder> class RandomAccessTableReaderImplBase;
template<class Holder>  class SequentialTableReaderImplBase;
template<class Holder>  class TableWriterImplBase;

/// \addtogroup table_group
/// @{

// This header defines the Table classes (RandomAccessTableReader,
// SequentialTableReader and TableWriter) and explains what the Holder classes,
// which the Table class requires as a template argument, are like.  It also
// explains the "rspecifier" and "wspecifier" concepts (these are strings that
// explain how to read/write objects via archives or scp files.  A table is
// conceptually a collection of objects of a particular type T indexed by keys
// of type std::string (these Keys additionally have an order within
// each table).
// The Table classes are templated on a type (call it Holder) such that
// Holder::T is a typedef equal to T.

// see kaldi-holder.h for detail on the Holder classes.

typedef std::vector<std::string> KeyList;

// Documentation for "wspecifier"
// "wspecifier" describes how we write a set of objects indexed by keys.
// The basic, unadorned wspecifiers are as follows:
//
//  ark:wxfilename
//  scp:rxfilename
//  ark,scp:filename,wxfilename
//  ark,scp:filename,wxfilename
//
//
//  We also allow the following modifiers:
//  t means text mode.
//  b means binary mode.
//  f means flush the stream after writing each entry.
//   (nf means don't flush, and the default is not to flush).
//  p means permissive mode, when writing to an "scp" file only: will ignore
//     missing scp entries, i.e. won't write anything for those files but will
//     return success status).
//
//  So the following are valid wspecifiers:
//  ark,b,f:foo
//  "ark,b,b:| gzip -c > foo"
//  "ark,scp,t,nf:foo.ark,|gzip -c > foo.scp.gz"
//  ark,b:-
//
//  The meanings of rxfilename and wxfilename are as described in
//  kaldi-io.h (they are filenames but include pipes, stdin/stdout
//  and so on; filename is a regular filename.
//

//  The ark:wxfilename type of wspecifier instructs the class to
//  write directly to an archive.  For small objects (e.g. lists of ints),
//  the text archive format will generally be human readable with one line
//  per entry in the archive.
//
//  The type "scp:xfilename" refers to an scp file which should
//  already exist on disk, and tells us where to write the data for
//  each key (usually an actual file); each line of the scp file
//  would be:
//   key xfilename
//
//  The type ark,scp:filename,wxfilename means
//  we write both an archive and an scp file that specifies offsets into the
//  archive, with lines like:
//    key filename:12407
//  where the number is the byte offset into the file.
//  In this case we restrict the archive-filename to be an actual filename,
//  as we can't see a situation where an extended filename would make sense
//  for this (we can't fseek() in pipes).

enum WspecifierType  {
  kNoWspecifier,
  kArchiveWspecifier,
  kScriptWspecifier,
  kBothWspecifier
};

struct WspecifierOptions {
  bool binary;
  bool flush;
  bool permissive;  // will ignore absent scp entries.
  WspecifierOptions(): binary(true), flush(false), permissive(false) { }
};

// ClassifyWspecifier returns the type of the wspecifier string,
// and (if pointers are non-NULL) outputs the extra information
// about the options, and the script and archive
// filenames.
WspecifierType ClassifyWspecifier(const std::string &wspecifier,
                                  std::string *archive_wxfilename,
                                  std::string *script_wxfilename,
                                  WspecifierOptions *opts);

// ReadScriptFile reads an .scp file in its entirety, and appends it
// (in order as it was in the scp file) in script_out_, which contains
// pairs of (key, xfilename).  The .scp
// file format is: on each line, key xfilename
// where xfilename means rxfilename or wxfilename, and may contain internal
// spaces (we trim away any leading or trailing space).  The key is space-free.
// ReadScriptFile returns true if the format was valid (empty files
// are valid).
// If 'print_warnings', it will print out warning messages that explain what
// kind of error there was.
bool ReadScriptFile(const std::string &rxfilename,
                    bool print_warnings,
                    std::vector<std::pair<std::string, std::string> >
                    *script_out);

// This version of ReadScriptFile works from an istream.
bool ReadScriptFile(std::istream &is,
                    bool print_warnings,
                    std::vector<std::pair<std::string, std::string> >
                    *script_out);

// Writes, for each entry in script, the first element, then ' ', then the
// second element then '\n'.  Checks that the keys (first elements of pairs) are
// valid tokens (nonempty, no whitespace), and the values (second elements of
// pairs) are newline-free and contain no leading or trailing space.  Returns
// true on success.
bool WriteScriptFile(const std::string &wxfilename,
                     const std::vector<std::pair<std::string, std::string> >
                     &script);

// This version writes to an ostream.
bool WriteScriptFile(std::ostream &os,
                     const std::vector<std::pair<std::string, std::string> >
                     &script);

// Documentation for "rspecifier"
// "rspecifier" describes how we read a set of objects indexed by keys.
// The possibilities are:
//
// ark:rxfilename
// scp:rxfilename
//
// We also allow various modifiers:
//   o   means the program will only ask for each key once, which enables
//       the reader to discard already-asked-for values.
//   s   means the keys are sorted on input (means we don't have to read till
//       eof if someone asked for a key that wasn't there).
//   cs  means that it is called in sorted order (we are generally asserting
//       this based on knowledge of how the program works).
//   p   means "permissive", and causes it to skip over keys whose corresponding
//       scp-file entries cannot be read. [and to ignore errors in archives and
//       script files, and just consider the "good" entries].
//       We allow the negation of the options above, as in no, ns, np,
//       but these aren't currently very useful (just equivalent to omitting the
//       corresponding option).
//       [any of the above options can be prefixed by n to negate them, e.g. no,
//       ns, ncs, np; but these aren't currently useful as you could just omit
//       the option].
//   bg means "background".  It currently has no effect for random-access readers,
//       but for sequential readers it will cause it to "read ahead" to the next
//       value, in a background thread.  Recommended when reading larger objects
//       such as neural-net training examples, especially when you want to
//       maximize GPU usage.
//
//   b   is ignored [for scripting convenience]
//   t   is ignored [for scripting convenience]
//
//
//  So for instance the following would be a valid rspecifier:
//
//   "o, s, p, ark:gunzip -c foo.gz|"

struct  RspecifierOptions {
  // These options only make a difference for the RandomAccessTableReader class.
  bool once;   // we assert that the program will only ask for each key once.
  bool sorted;  // we assert that the keys are sorted.
  bool called_sorted;  // we assert that the (HasKey(), Value() functions will
  // also be called in sorted order.  [this implies "once" but not vice versa].
  bool permissive;  // If "permissive", when reading from scp files it treats
  // scp files that can't be read as if the corresponding key were not there.
  // For archive files it will suppress errors getting thrown if the archive
  // is corrupted and can't be read to the end.
  bool background;  // For sequential readers, if the background option ("bg")
                    // is provided, it will read ahead to the next object in a
                    // background thread.
  RspecifierOptions(): once(false), sorted(false),
                       called_sorted(false), permissive(false),
                       background(false) { }
};

enum RspecifierType  {
  kNoRspecifier,
  kArchiveRspecifier,
  kScriptRspecifier
};

RspecifierType ClassifyRspecifier(const std::string &rspecifier,
                                  std::string *rxfilename,
                                  RspecifierOptions *opts);


/// Allows random access to a collection
/// of objects in an archive or script file; see \ref io_sec_tables.
template<class Holder>
class RandomAccessTableReader {
 public:
  typedef typename Holder::T T;

  RandomAccessTableReader(): impl_(NULL) { }

  // This constructor is equivalent to default constructor + "open", but
  // throws on error.
  explicit RandomAccessTableReader(const std::string &rspecifier);

  // Opens the table.
  bool Open(const std::string &rspecifier);

  // Returns true if table is open.
  bool IsOpen() const { return (impl_ != NULL); }

  // Close() will close the table [throws if it was not open],
  // and returns true on success (false if we were reading an
  // archive and we discovered an error in the archive).
  bool Close();

  // Says if it has this key.
  // If you are using the "permissive" (p) read option,
  // it will return false for keys whose corresponding entry
  // in the scp file cannot be read.

  bool HasKey(const std::string &key);

  // Value() may throw if you are reading an scp file, you
  // do not have the "permissive" (p) option, and an entry
  // in the scp file cannot be read.  Typically you won't
  // want to catch this error.
  const T &Value(const std::string &key);

  ~RandomAccessTableReader();

  // Allow copy-constructor only for non-opened readers (needed for inclusion in
  // stl vector)
  RandomAccessTableReader(const RandomAccessTableReader<Holder>
                          &other):
      impl_(NULL) { KALDI_ASSERT(other.impl_ == NULL); }
 private:
  // Disallow assignment.
  RandomAccessTableReader &operator=(const RandomAccessTableReader<Holder>&);
  void CheckImpl() const;  // Checks that impl_ is non-NULL; prints an error
                           // message and dies (with KALDI_ERR) if NULL.
  RandomAccessTableReaderImplBase<Holder> *impl_;
};


/// A templated class for reading objects sequentially from an archive or script
/// file; see \ref io_sec_tables.
template<class Holder>
class SequentialTableReader {
 public:
  typedef typename Holder::T T;

  SequentialTableReader(): impl_(NULL) { }

  // This constructor equivalent to default constructor + "open", but
  // throws on error.
  explicit SequentialTableReader(const std::string &rspecifier);

  // Opens the table.  Returns exit status; but does throw if previously open
  // stream was in error state.  You can call Close to prevent this; anyway,
  // calling Open more than once is not usually needed.
  bool Open(const std::string &rspecifier);

  // Returns true if we're done.  It will also return true if there's some kind
  // of error and we can't read any more; in this case, you can detect the
  // error by calling Close and checking the return status; otherwise
  // the destructor will throw.
  inline bool Done();

  // Only valid to call Key() if Done() returned false.
  inline std::string Key();

  // FreeCurrent() is provided as an optimization to save memory, for large
  // objects.  It instructs the class to deallocate the current value. The
  // reference Value() will be invalidated by this.
  void FreeCurrent();

  // Return reference to the current value.  It's only valid to call this if
  // Done() returned false.  The reference is valid till next call to this
  // object.  It will throw if you are reading an scp file, did not specify the
  // "permissive" (p) option and the file cannot be read.  [The permissive
  // option makes it behave as if that key does not even exist, if the
  // corresponding file cannot be read.]  You probably wouldn't want to catch
  // this exception; the user can just specify the p option in the rspecifier.
  // We make this non-const to enable things like shallow swap on the held
  // object in situations where this would avoid making a redundant copy.
  T &Value();

  // Next goes to the next key.  It will not throw; any error will
  // result in Done() returning true, and then the destructor will
  // throw unless you call Close().
  void Next();

  // Returns true if table is open for reading (does not imply
  // stream is in good state).
  bool IsOpen() const;

  // Close() will return false (failure) if Done() became true
  // because of an error/ condition rather than because we are
  // really done [e.g. because of an error or early termination
  // in the archive].
  // If there is an error and you don't call Close(), the destructor
  // will fail.
  // Close()
  bool Close();

  // The destructor may throw.  This is the desired behaviour, as it's the way
  // we signal the error to the user (to detect it, call Close().  The issue is
  // that otherwise the user has no way to tell whether Done() returned true
  // because we reached the end of the archive or script, or because there was
  // an error that prevented further reading.
  ~SequentialTableReader();

  // Allow copy-constructor only for non-opened readers (needed for inclusion in
  // stl vector)
  SequentialTableReader(const SequentialTableReader<Holder> &other):
      impl_(NULL) { KALDI_ASSERT(other.impl_ == NULL); }
 private:
  // Disallow assignment.
  SequentialTableReader &operator = (const SequentialTableReader<Holder>&);
  void CheckImpl() const;  // Checks that impl_ is non-NULL; prints an error
                           // message and dies (with KALDI_ERR) if NULL.
  SequentialTableReaderImplBase<Holder> *impl_;
};


/// A templated class for writing objects to an
/// archive or script file; see \ref io_sec_tables.
template<class Holder>
class TableWriter {
 public:
  typedef typename Holder::T T;

  TableWriter(): impl_(NULL) { }

  // This constructor equivalent to default constructor
  // + "open", but throws on error.  See docs for
  // wspecifier above.
  explicit TableWriter(const std::string &wspecifier);

  // Opens the table.  See docs for wspecifier above.
  // If it returns true, it is open.
  bool Open(const std::string &wspecifier);

  // Returns true if open for writing.
  bool IsOpen() const;

  // Write the object. Throws KaldiFatalError on error via the KALDI_ERR macro.
  inline void Write(const std::string &key, const T &value) const;


  // Flush will flush any archive; it does not return error status
  // or throw, any errors will be reported on the next Write or Close.
  // Useful if we may be writing to a command in a pipe and want
  // to ensure good CPU utilization.
  void Flush();

  // Close() is not necessary to call, as the destructor
  // closes it; it's mainly useful if you want to handle
  // error states because the destructor will throw on
  // error if you do not call Close().
  bool Close();

  ~TableWriter();

  // Allow copy-constructor only for non-opened writers (needed for inclusion in
  // stl vector)
  TableWriter(const TableWriter &other): impl_(NULL) {
    KALDI_ASSERT(other.impl_ == NULL);
  }
 private:
  TableWriter &operator = (const TableWriter&);  // Disallow assignment.

  void CheckImpl() const;  // Checks that impl_ is non-NULL; prints an error
                           // message and dies (with KALDI_ERR) if NULL.
  TableWriterImplBase<Holder> *impl_;
};


/// This class is for when you are reading something in random access, but
/// it may actually be stored per-speaker (or something similar) but the
/// keys you're using are per utterance.  So you also provide an "rxfilename"
/// for a file containing lines like
/// utt1 spk1
/// utt2 spk1
/// utt3 spk1
/// and so on.  Note: this is optional; if it is an empty string, we just won't
/// do the mapping.  Also, "table_rxfilename" may be the empty string (as for
/// a regular table), in which case the table just won't be opened.
/// We provide only the most frequently used of the functions of
/// RandomAccessTableReader.

template<class Holder>
class RandomAccessTableReaderMapped {
 public:
  typedef typename Holder::T T;
  /// Note: "utt2spk_rxfilename" will in the normal case be an rxfilename
  /// for an utterance to speaker map, but this code is general; it accepts
  /// a generic map.
  RandomAccessTableReaderMapped(const std::string &table_rxfilename,
                                const std::string &utt2spk_rxfilename);

  RandomAccessTableReaderMapped() {}

  /// Note: when calling Open, utt2spk_rxfilename may be empty.
  bool Open(const std::string &table_rxfilename,
            const std::string &utt2spk_rxfilename);

  bool HasKey(const std::string &key);
  const T &Value(const std::string &key);
  inline bool IsOpen() const { return reader_.IsOpen(); }
  inline bool Close() { return reader_.Close(); }


  // The default copy-constructor will do what we want: it will crash for
  // already-opened readers, by calling the member-variable copy-constructors.
 private:
  // Disallow assignment.
  RandomAccessTableReaderMapped &operator =
    (const RandomAccessTableReaderMapped<Holder>&);
  RandomAccessTableReader<Holder> reader_;
  RandomAccessTableReader<TokenHolder> token_reader_;
  std::string utt2spk_rxfilename_;  // Used only in diagnostic messages.
};


/// @} end "addtogroup table_group"
}  // end namespace kaldi

#include "util/kaldi-table-inl.h"

#endif  // KALDI_UTIL_KALDI_TABLE_H_


================================================
FILE: runtime/engine/kaldi/util/kaldi-thread.cc
================================================
// util/kaldi-thread.cc

// Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
//                 Frantisek Skala

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "base/kaldi-common.h"
#include "util/kaldi-thread.h"

namespace kaldi {
int32 g_num_threads = 8;  // Initialize this global variable.

MultiThreadable::~MultiThreadable() {
  // default implementation does nothing
}


}  // end namespace kaldi


================================================
FILE: runtime/engine/kaldi/util/kaldi-thread.h
================================================
// util/kaldi-thread.h

// Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
//                 Frantisek Skala
//           2017  University of Southern California (Author: Dogan Can)

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_THREAD_KALDI_THREAD_H_
#define KALDI_THREAD_KALDI_THREAD_H_ 1

#include <thread>
#include "util/options-itf.h"
#include "util/kaldi-semaphore.h"

// This header provides convenient mechanisms for parallelization.
//
// The class MultiThreader, and the function RunMultiThreaded provide a
// mechanism to run a specified number of jobs in parellel and wait for them
// all to finish. They accept objects of some class C that derives from the
// base class MultiThreadable. C needs to define the operator () that takes
// no arguments. See ExampleClass below.
//
// The class TaskSequencer addresses a different problem typically encountered
// in Kaldi command-line programs that process a sequence of items. The items
// to be processed are coming in. They are all of different sizes, e.g.
// utterances with different numbers of frames. We would like them to be
// processed in parallel to make good use of the threads available but they
// must be output in the same order they came in. Here, we again accept objects
// of some class C with an operator () that takes no arguments. C may also have
// a destructor with side effects (typically some kind of output).
// TaskSequencer is responsible for running the jobs in parallel. It has a
// function Run() that will accept a new object of class C; this will block
// until a thread is free, at which time it will spawn a thread that starts
// running the operator () of the object. When threads are finished running,
// the objects will be deleted. TaskSequencer guarantees that the destructors
// will be called sequentially (not in parallel) and in the same order the
// objects were given to the Run() function, so that it is safe for the
// destructor to have side effects such as outputting data.
// Note: the destructor of TaskSequencer will wait for any remaining jobs that
// are still running and will call the destructors.


namespace kaldi {

extern int32 g_num_threads;  // Maximum number of threads (for programs that
// use threads, which is not many of them, e.g. the SGMM update program does.
// This is 8 by default.  You can change this on the command line, where
// used, with --num-threads.  Programs that think they will use threads
// should register it with their ParseOptions, as something like:
// po.Register("num-threads", &g_num_threads, "Number of threads to use.");

class MultiThreadable {
  // To create a function object that does part of the job, inherit from this
  // class, implement a copy constructor calling the default copy constructor
  // of this base class (so that thread_id_ and num_threads_ are copied to new
  // instances), and finally implement the operator() that does part of the job
  // based on thread_id_ and num_threads_ variables.
  // Note: example implementations are in util/kaldi-thread-test.cc
 public:
  virtual void operator() () = 0;
  // Does the main function of the class
  //  Subclasses have to redefine this
  virtual ~MultiThreadable();
  // Optional destructor.  Note: the destructor of the object passed by the user
  // will also be called, so watch out.

 public:
  // Do not redeclare thread_id_ and num_threads_ in derived classes.
  int32 thread_id_;  // 0 <= thread_id_ < num_threads_
  int32 num_threads_;

 private:
  // Have additional member variables as needed.
};


class ExampleClass: public MultiThreadable {
 public:
  ExampleClass(int32 *foo); // Typically there will be an initializer that
  // takes arguments.

  ExampleClass(const ExampleClass &other); // A copy constructor is also needed;
  // some example classes use the default version of this.

  void operator() () {
    // Does the main function of the class.  This
    // function will typically want to look at the values of the
    // member variables thread_id_ and num_threads_, inherited
    // from MultiThreadable.
  }
  ~ExampleClass() {
    // Optional destructor.  Sometimes useful things happen here,
    // for example summing up of certain quantities.  See code
    // that uses RunMultiThreaded for examples.
  }
 private:
  // Have additional member variables as needed.
};


template<class C>
class MultiThreader {
 public:
  MultiThreader(int32 num_threads, const C &c_in) :
    threads_(std::max<int32>(1, num_threads)),
    cvec_(std::max<int32>(1, num_threads), c_in) {
    if (num_threads == 0) {
      // This is a special case with num_threads == 0, which behaves like with
      // num_threads == 1 but without creating extra threads.  This can be
      // useful in GPU computations where threads cannot be used.
      cvec_[0].thread_id_ = 0;
      cvec_[0].num_threads_ = 1;
      (cvec_[0])();
    } else {
      for (int32 i = 0; i < threads_.size(); i++) {
        cvec_[i].thread_id_ = i;
        cvec_[i].num_threads_ = threads_.size();
        threads_[i] = std::thread(std::ref(cvec_[i]));
      }
    }
  }
  ~MultiThreader() {
    for (size_t i = 0; i < threads_.size(); i++)
      if (threads_[i].joinable())
        threads_[i].join();
  }
 private:
  std::vector<std::thread> threads_;
  std::vector<C> cvec_;
};

/// Here, class C should inherit from MultiThreadable.  Note: if you want to
/// control the number of threads yourself, or need to do something in the main
/// thread of the program while the objects exist, just initialize the
/// MultiThreader<C> object yourself.
template<class C> void RunMultiThreaded(const C &c_in) {
  MultiThreader<C> m(g_num_threads, c_in);
}


struct TaskSequencerConfig {
  int32 num_threads;
  int32 num_threads_total;
  TaskSequencerConfig(): num_threads(1), num_threads_total(0)  { }
  void Register(OptionsItf *opts) {
    opts->Register("num-threads", &num_threads, "Number of actively processing "
                   "threads to run in parallel");
    opts->Register("num-threads-total", &num_threads_total, "Total number of "
                   "threads, including those that are waiting on other threads "
                   "to produce their output.  Controls memory use.  If <= 0, "
                   "defaults to --num-threads plus 20.  Otherwise, must "
                   "be >= num-threads.");
  }
};

// C should have an operator () taking no arguments, that does some kind
// of computation, and a destructor that produces some kind of output (the
// destructors will be run sequentially in the same order Run as called.
template<class C>
class TaskSequencer {
 public:
  TaskSequencer(const TaskSequencerConfig &config):
      num_threads_(config.num_threads),
      threads_avail_(config.num_threads),
      tot_threads_avail_(config.num_threads_total > 0 ? config.num_threads_total :
                         config.num_threads + 20),
      thread_list_(NULL) {
    KALDI_ASSERT((config.num_threads_total <= 0 ||
                  config.num_threads_total >= config.num_threads) &&
                 "num-threads-total, if specified, must be >= num-threads");
  }

  /// This function takes ownership of the pointer "c", and will delete it
  /// in the same sequence as Run was called on the jobs.
  void Run(C *c) {
    // run in main thread
    if (num_threads_ == 0) {
      (*c)();
      delete c;
      return;
    }

    threads_avail_.Wait(); // wait till we have a thread for computation free.
    tot_threads_avail_.Wait(); // this ensures we don't have too many threads
    // waiting on I/O, and consume too much memory.

    // put the new RunTaskArgsList object at head of the singly
    // linked list thread_list_.
    thread_list_ = new RunTaskArgsList(this, c, thread_list_);
    thread_list_->thread = std::thread(TaskSequencer<C>::RunTask,
                                       thread_list_);
  }

  void Wait() { // You call this at the end if it's more convenient
    // than waiting for the destructor.  It waits for all tasks to finish.
    if (thread_list_ != NULL) {
      thread_list_->thread.join();
      KALDI_ASSERT(thread_list_->tail == NULL); // thread would not
      // have exited without setting tail to NULL.
      delete thread_list_;
      thread_list_ = NULL;
    }
  }

  /// The destructor waits for the last thread to exit.
  ~TaskSequencer() {
    Wait();
  }
 private:
  struct RunTaskArgsList {
    TaskSequencer *me; // Think of this as a "this" pointer.
    C *c; // Clist element of the task we're expected
    std::thread thread;
    RunTaskArgsList *tail;
    RunTaskArgsList(TaskSequencer *me, C *c, RunTaskArgsList *tail):
        me(me), c(c), tail(tail) {}
  };
  // This static function gets run in the threads that we create.
  static void RunTask(RunTaskArgsList *args) {
    // (1) run the job.
    (*(args->c))(); // call operator () on args->c, which does the computation.
    args->me->threads_avail_.Signal(); // Signal that the compute-intensive
    // part of the thread is done (we want to run no more than
    // config_.num_threads of these.)

    // (2) we want to destroy the object "c" now, by deleting it.  But for
    //     correct sequencing (this is the whole point of this class, it
    //     is intended to ensure the output of the program is in correct order),
    //     we first wait till the previous thread, whose details will be in "tail",
    //     is finished.
    if (args->tail != NULL) {
      args->tail->thread.join();
    }

    delete args->c; // delete the object "c".  This may cause some output,
    // e.g. to a stream.  We don't need to worry about concurrent access to
    // the output stream, because each thread waits for the previous thread
    // to be done, before doing this.  So there is no risk of concurrent
    // access.
    args->c = NULL;

    if (args->tail != NULL) {
      KALDI_ASSERT(args->tail->tail == NULL); // Because we already
      // did join on args->tail->thread, which means that
      // thread was done, and before it exited, it would have
      // deleted and set to NULL its tail (which is the next line of code).
      delete args->tail;
      args->tail = NULL;
    }
    // At this point we are exiting from the thread.  Signal the
    // "tot_threads_avail_" semaphore which is used to limit the total number of threads that are alive, including
    // not onlhy those that are in active computation in c->operator (), but those
    // that are waiting on I/O or other threads.
    args->me->tot_threads_avail_.Signal();
  }

  int32 num_threads_; // copy of config.num_threads (since Semaphore doesn't store original count)

  Semaphore threads_avail_; // Initialized to the number of threads we are
  // supposed to run with; the function Run() waits on this.

  Semaphore tot_threads_avail_; // We use this semaphore to ensure we don't
  // consume too much memory...
  RunTaskArgsList *thread_list_;

};

} // namespace kaldi

#endif  // KALDI_THREAD_KALDI_THREAD_H_


================================================
FILE: runtime/engine/kaldi/util/options-itf.h
================================================
// itf/options-itf.h

// Copyright 2013  Tanel Alumae, Tallinn University of Technology

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_ITF_OPTIONS_ITF_H_
#define KALDI_ITF_OPTIONS_ITF_H_ 1
#include "base/kaldi-common.h"

namespace kaldi {

class OptionsItf {
 public:
  
  virtual void Register(const std::string &name,
                bool *ptr, const std::string &doc) = 0; 
  virtual void Register(const std::string &name,
                int32 *ptr, const std::string &doc) = 0; 
  virtual void Register(const std::string &name,
                uint32 *ptr, const std::string &doc) = 0; 
  virtual void Register(const std::string &name,
                float *ptr, const std::string &doc) = 0; 
  virtual void Register(const std::string &name,
                double *ptr, const std::string &doc) = 0; 
  virtual void Register(const std::string &name,
                std::string *ptr, const std::string &doc) = 0; 
  
  virtual ~OptionsItf() {}
};

}  // namespace Kaldi

#endif  // KALDI_ITF_OPTIONS_ITF_H_


================================================
FILE: runtime/engine/kaldi/util/parse-options.cc
================================================
// util/parse-options.cc

// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
//                      Saarland University (Author: Arnab Ghoshal);
// Copyright 2012-2013  Johns Hopkins University (Author: Daniel Povey);
//                      Frantisek Skala;  Arnab Ghoshal
// Copyright 2013       Tanel Alumae
//
// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include <iostream>
#include <iomanip>
#include <fstream>
#include <algorithm>
#include <cstdlib>
#include <cassert>
#include <cstring>

#include "util/parse-options.h"
#include "util/text-utils.h"
#include "base/kaldi-common.h"

namespace kaldi {


ParseOptions::ParseOptions(const std::string &prefix,
                           OptionsItf *other):
    print_args_(false), help_(false), usage_(""), argc_(0), argv_(NULL) {
  ParseOptions *po = dynamic_cast<ParseOptions*>(other);
  if (po != NULL && po->other_parser_ != NULL) {
    // we get here if this constructor is used twice, recursively.
    other_parser_ = po->other_parser_;
  } else {
    other_parser_ = other;
  }
  if (po != NULL && po->prefix_ != "") {
    prefix_ = po->prefix_ + std::string(".") + prefix;
  } else {
    prefix_ = prefix;
  }
}

void ParseOptions::Register(const std::string &name,
                            bool *ptr, const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name,
                            int32 *ptr, const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name,
                            uint32 *ptr, const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name,
                            float *ptr, const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name,
                            double *ptr, const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

void ParseOptions::Register(const std::string &name,
                            std::string *ptr, const std::string &doc) {
  RegisterTmpl(name, ptr, doc);
}

// old-style, used for registering application-specific parameters
template<typename T>
void ParseOptions::RegisterTmpl(const std::string &name, T *ptr,
                                const std::string &doc) {
  if (other_parser_ == NULL) {
    this->RegisterCommon(name, ptr, doc, false);
  } else {
    KALDI_ASSERT(prefix_ != "" &&
                 "Cannot use empty prefix when registering with prefix.");
    std::string new_name = prefix_ + '.' + name;  // name becomes prefix.name
    other_parser_->Register(new_name, ptr, doc);
  }
}

// does the common part of the job of registering a parameter
template<typename T>
void ParseOptions::RegisterCommon(const std::string &name, T *ptr,
                                  const std::string &doc, bool is_standard) {
  KALDI_ASSERT(ptr != NULL);
  std::string idx = name;
  NormalizeArgName(&idx);
  if (doc_map_.find(idx) != doc_map_.end())
    KALDI_WARN << "Registering option twice, ignoring second time: " << name;
  this->RegisterSpecific(name, idx, ptr, doc, is_standard);
}

// used to register standard parameters (those that are present in all of the
// applications)
template<typename T>
void ParseOptions::RegisterStandard(const std::string &name, T *ptr,
                            const std::string &doc) {
  this->RegisterCommon(name, ptr, doc, true);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx,
                                    bool *b,
                                    const std::string &doc,
                                    bool is_standard) {
  bool_map_[idx] = b;
  doc_map_[idx] = DocInfo(name, doc + " (bool, default = "
                          + ((*b)? "true)" : "false)"), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx,
                                    int32 *i,
                                    const std::string &doc,
                                    bool is_standard) {
  int_map_[idx] = i;
  std::ostringstream ss;
  ss << doc << " (int, default = " << *i << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx,
                                    uint32 *u,
                                    const std::string &doc,
                                    bool is_standard) {
  uint_map_[idx] = u;
  std::ostringstream ss;
  ss << doc << " (uint, default = " << *u << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx,
                                    float *f,
                                    const std::string &doc,
                                    bool is_standard) {
  float_map_[idx] = f;
  std::ostringstream ss;
  ss << doc << " (float, default = " << *f << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx,
                                    double *f,
                                    const std::string &doc,
                                    bool is_standard) {
  double_map_[idx] = f;
  std::ostringstream ss;
  ss << doc << " (double, default = " << *f << ")";
  doc_map_[idx] = DocInfo(name, ss.str(), is_standard);
}

void ParseOptions::RegisterSpecific(const std::string &name,
                                    const std::string &idx,
                                    std::string *s,
                                    const std::string &doc,
                                    bool is_standard) {
  string_map_[idx] = s;
  doc_map_[idx] = DocInfo(name, doc + " (string, default = \"" + *s + "\")",
                          is_standard);
}
void ParseOptions::DisableOption(const std::string &name) {
  if (argv_ != NULL)
    KALDI_ERR << "DisableOption must not be called after calling Read().";
  if (doc_map_.erase(name) == 0)
    KALDI_ERR << "Option " << name
              << " was not registered so cannot be disabled: ";
  bool_map_.erase(name);
  int_map_.erase(name);
  uint_map_.erase(name);
  float_map_.erase(name);
  double_map_.erase(name);
  string_map_.erase(name);
}


int ParseOptions::NumArgs() const {
  return positional_args_.size();
}

std::string ParseOptions::GetArg(int i) const {
  // use KALDI_ERR if code error
  if (i < 1 || i > static_cast<int>(positional_args_.size()))
    KALDI_ERR << "ParseOptions::GetArg, invalid index " << i;
  return positional_args_[i - 1];
}

// We currently do not support any other options.
enum ShellType { kBash = 0 };

// This can be changed in the code if it ever does need to be changed (as it's
// unlikely that one compilation of this tool-set would use both shells).
static ShellType kShellType = kBash;

// Returns true if we need to escape a string before putting it into
// a shell (mainly thinking of bash shell, but should work for others)
// This is for the convenience of the user so command-lines that are
// printed out by ParseOptions::Read (with --print-args=true) are
// paste-able into the shell and will run. If you use a different type of
// shell, it might be necessary to change this function.
// But it's mostly a cosmetic issue as it basically affects how
// the program echoes its command-line arguments to the screen.
static bool MustBeQuoted(const std::string &str, ShellType st) {
  // Only Bash is supported (for the moment).
  KALDI_ASSERT(st == kBash && "Invalid shell type.");

  const char *c = str.c_str();
  if (*c == '\0') {
    return true;  // Must quote empty string
  } else {
    const char *ok_chars[2];

    // These seem not to be interpreted as long as there are no other "bad"
    // characters involved (e.g. "," would be interpreted as part of something
    // like a{b,c}, but not on its own.
    ok_chars[kBash] = "[]~#^_-+=:.,/";

    // Just want to make sure that a space character doesn't get automatically
    // inserted here via an automated style-checking script, like it did before.
    KALDI_ASSERT(!strchr(ok_chars[kBash], ' '));

    for (; *c != '\0'; c++) {
      // For non-alphanumeric characters we have a list of characters which
      // are OK. All others are forbidden (this is easier since the shell
      // interprets most non-alphanumeric characters).
      if (!isalnum(*c)) {
        const char *d;
        for (d = ok_chars[st]; *d != '\0'; d++) if (*c == *d) break;
        // If not alphanumeric or one of the "ok_chars", it must be escaped.
        if (*d == '\0') return true;
      }
    }
    return false;  // The string was OK. No quoting or escaping.
  }
}

// Returns a quoted and escaped version of "str"
// which has previously been determined to need escaping.
// Our aim is to print out the command line in such a way that if it's
// pasted into a shell of ShellType "st" (only bash for now), it
// will get passed to the program in the same way.
static std::string QuoteAndEscape(const std::string &str, ShellType st) {
  // Only Bash is supported (for the moment).
  KALDI_ASSERT(st == kBash && "Invalid shell type.");

  // For now we use the following rules:
  // In the normal case, we quote with single-quote "'", and to escape
  // a single-quote we use the string: '\'' (interpreted as closing the
  // single-quote, putting an escaped single-quote from the shell, and
  // then reopening the single quote).
  char quote_char = '\'';
  const char *escape_str = "'\\''";  // e.g. echo 'a'\''b' returns a'b

  // If the string contains single-quotes that would need escaping this
  // way, and we determine that the string could be safely double-quoted
  // without requiring any escaping, then we double-quote the string.
  // This is the case if the characters "`$\ do not appear in the string.
  // e.g. see http://www.redhat.com/mirrors/LDP/LDP/abs/html/quotingvar.html
  const char *c_str = str.c_str();
  if (strchr(c_str, '\'') && !strpbrk(c_str, "\"`$\\")) {
    quote_char = '"';
    escape_str = "\\\"";  // should never be accessed.
  }

  char buf[2];
  buf[1] = '\0';

  buf[0] = quote_char;
  std::string ans = buf;
  const char *c = str.c_str();
  for (;*c != '\0'; c++) {
    if (*c == quote_char) {
      ans += escape_str;
    } else {
      buf[0] = *c;
      ans += buf;
    }
  }
  buf[0] = quote_char;
  ans += buf;
  return ans;
}

// static function
std::string ParseOptions::Escape(const std::string &str) {
  return MustBeQuoted(str, kShellType) ? QuoteAndEscape(str, kShellType) : str;
}


int ParseOptions::Read(int argc, const char *const argv[]) {
  argc_ = argc;
  argv_ = argv;
  std::string key, value;
  int i;
  if (argc > 0) {
    // set global "const char*" g_program_name (name of the program)
    // so it can be printed out in error messages;
    // it's useful because often the stderr of different programs will
    // be mixed together in the same log file.
#ifdef _MSC_VER
    const char *c = strrchr(argv[0], '\\');
#else
    const char *c = strrchr(argv[0], '/');
#endif
    SetProgramName(c == NULL ? argv[0] : c + 1);
  }
  // first pass: look for config parameter, look for priority
  for (i = 1; i < argc; i++) {
    if (std::strncmp(argv[i], "--", 2) == 0) {
      if (std::strcmp(argv[i], "--") == 0) {
        // a lone "--" marks the end of named options
        break;
      }
      bool has_equal_sign;
      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
      NormalizeArgName(&key);
      Trim(&value);
      if (key.compare("config") == 0) {
        ReadConfigFile(value);
      }
      if (key.compare("help") == 0) {
        PrintUsage();
        exit(0);
      }
    }
  }
  bool double_dash_seen = false;
  // second pass: add the command line options
  for (i = 1; i < argc; i++) {
    if (std::strncmp(argv[i], "--", 2) == 0) {
      if (std::strcmp(argv[i], "--") == 0) {
        // A lone "--" marks the end of named options.
        // Skip that option and break the processing of named options
        i += 1;
        double_dash_seen = true;
        break;
      }
      bool has_equal_sign;
      SplitLongArg(argv[i], &key, &value, &has_equal_sign);
      NormalizeArgName(&key);
      Trim(&value);
      if (!SetOption(key, value, has_equal_sign)) {
        PrintUsage(true);
        KALDI_ERR << "Invalid option " << argv[i];
      }
    } else {
      break;
    }
  }

  // process remaining arguments as positional
  for (; i < argc; i++) {
    if ((std::strcmp(argv[i], "--") == 0) && !double_dash_seen) {
      double_dash_seen = true;
    } else {
      positional_args_.push_back(std::string(argv[i]));
    }
  }

  // if the user did not suppress this with --print-args = false....
  if (print_args_) {
    std::ostringstream strm;
    for (int j = 0; j < argc; j++)
      strm << Escape(argv[j]) << " ";
    strm << '\n';
    std::cerr << strm.str() << std::flush;
  }
  return i;
}


void ParseOptions::PrintUsage(bool print_command_line) {
  std::cerr << '\n' << usage_ << '\n';
  DocMapType::iterator it;
  // first we print application-specific options
  bool app_specific_header_printed = false;
  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
    if (it->second.is_standard_ == false) {  // application-specific option
      if (app_specific_header_printed == false) {  // header was not yet printed
        std::cerr << "Options:" << '\n';
        app_specific_header_printed = true;
      }
      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
          << " : " << it->second.use_msg_ << '\n';
    }
  }
  if (app_specific_header_printed == true) {
    std::cerr << '\n';
  }

  // then the standard options
  std::cerr << "Standard options:" << '\n';
  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
    if (it->second.is_standard_ == true) {  // we have standard option
      std::cerr << "  --" << std::setw(25) << std::left << it->second.name_
          << " : " << it->second.use_msg_ << '\n';
    }
  }
  std::cerr << '\n';
  if (print_command_line) {
    std::ostringstream strm;
    strm << "Command line was: ";
    for (int j = 0; j < argc_; j++)
      strm << Escape(argv_[j]) << " ";
    strm << '\n';
    std::cerr << strm.str() << std::flush;
  }
}

void ParseOptions::PrintConfig(std::ostream &os) {
  os << '\n' << "[[ Configuration of UI-Registered options ]]" << '\n';
  std::string key;
  DocMapType::iterator it;
  for (it = doc_map_.begin(); it != doc_map_.end(); ++it) {
    key = it->first;
    os << it->second.name_ << " = ";
    if (bool_map_.end() != bool_map_.find(key)) {
      os << (*bool_map_[key] ? "true" : "false");
    } else if (int_map_.end() != int_map_.find(key)) {
      os << (*int_map_[key]);
    } else if (uint_map_.end() != uint_map_.find(key)) {
      os << (*uint_map_[key]);
    } else if (float_map_.end() != float_map_.find(key)) {
      os << (*float_map_[key]);
    } else if (double_map_.end() != double_map_.find(key)) {
      os << (*double_map_[key]);
    } else if (string_map_.end() != string_map_.find(key)) {
      os << "'" << *string_map_[key] << "'";
    } else {
      KALDI_ERR << "PrintConfig: unrecognized option " << key << "[code error]";
    }
    os << '\n';
  }
  os << '\n';
}


void ParseOptions::ReadConfigFile(const std::string &filename) {
  std::ifstream is(filename.c_str(), std::ifstream::in);
  if (!is.good()) {
    KALDI_ERR << "Cannot open config file: " << filename;
  }

  std::string line, key, value;
  int32 line_number = 0;
  while (std::getline(is, line)) {
    line_number++;
    // trim out the comments
    size_t pos;
    if ((pos = line.find_first_of('#')) != std::string::npos) {
      line.erase(pos);
    }
    // skip empty lines
    Trim(&line);
    if (line.length() == 0) continue;

    if (line.substr(0, 2) != "--") {
      KALDI_ERR << "Reading config file " << filename
                << ": line " << line_number << " does not look like a line "
                << "from a Kaldi command-line program's config file: should "
                << "be of the form --x=y.  Note: config files intended to "
                << "be sourced by shell scripts lack the '--'.";
    }

    // parse option
    bool has_equal_sign;
    SplitLongArg(line, &key, &value, &has_equal_sign);
    NormalizeArgName(&key);
    Trim(&value);
    if (!SetOption(key, value, has_equal_sign)) {
      PrintUsage(true);
      KALDI_ERR << "Invalid option " << line << " in config file " << filename;
    }
  }
}


void ParseOptions::SplitLongArg(const std::string &in,
                                std::string *key,
                                std::string *value,
                                bool *has_equal_sign) {
  KALDI_ASSERT(in.substr(0, 2) == "--");  // precondition.
  size_t pos = in.find_first_of('=', 0);
  if (pos == std::string::npos) {  // we allow --option for bools
    // defaults to empty.  We handle this differently in different cases.
    *key = in.substr(2, in.size()-2);  // 2 because starts with --.
    *value = "";
    *has_equal_sign = false;
  } else if (pos == 2) {  // we also don't allow empty keys: --=value
    PrintUsage(true);
    KALDI_ERR << "Invalid option (no key): " << in;
  } else {  // normal case: --option=value
    *key = in.substr(2, pos-2);  // 2 because starts with --.
    *value = in.substr(pos + 1);
    *has_equal_sign = true;
  }
}


void ParseOptions::NormalizeArgName(std::string *str) {
  std::string out;
  std::string::iterator it;

  for (it = str->begin(); it != str->end(); ++it) {
    if (*it == '_')
      out += '-';  // convert _ to -
    else
      out += std::tolower(*it);
  }
  *str = out;

  KALDI_ASSERT(str->length() > 0);
}


bool ParseOptions::SetOption(const std::string &key,
                             const std::string &value,
                             bool has_equal_sign) {
  if (bool_map_.end() != bool_map_.find(key)) {
    if (has_equal_sign && value == "")
      KALDI_ERR << "Invalid option --" << key << "=";
    *(bool_map_[key]) = ToBool(value);
  } else if (int_map_.end() != int_map_.find(key)) {
    *(int_map_[key]) = ToInt(value);
  } else if (uint_map_.end() != uint_map_.find(key)) {
    *(uint_map_[key]) = ToUint(value);
  } else if (float_map_.end() != float_map_.find(key)) {
    *(float_map_[key]) = ToFloat(value);
  } else if (double_map_.end() != double_map_.find(key)) {
    *(double_map_[key]) = ToDouble(value);
  } else if (string_map_.end() != string_map_.find(key)) {
    if (!has_equal_sign)
      KALDI_ERR << "Invalid option --" << key
                << " (option format is --x=y).";
    *(string_map_[key]) = value;
  } else {
    return false;
  }
  return true;
}


bool ParseOptions::ToBool(std::string str) {
  std::transform(str.begin(), str.end(), str.begin(), ::tolower);

  // allow "" as a valid option for "true", so that --x is the same as --x=true
  if ((str.compare("true") == 0) || (str.compare("t") == 0)
      || (str.compare("1") == 0) || (str.compare("") == 0)) {
    return true;
  }
  if ((str.compare("false") == 0) || (str.compare("f") == 0)
      || (str.compare("0") == 0)) {
    return false;
  }
  // if it is neither true nor false:
  PrintUsage(true);
  KALDI_ERR << "Invalid format for boolean argument [expected true or false]: "
            << str;
  return false;  // never reached
}


int32 ParseOptions::ToInt(const std::string &str) {
  int32 ret;
  if (!ConvertStringToInteger(str, &ret))
    KALDI_ERR << "Invalid integer option \"" << str << "\"";
  return ret;
}

uint32 ParseOptions::ToUint(const std::string &str) {
  uint32 ret;
  if (!ConvertStringToInteger(str, &ret))
    KALDI_ERR << "Invalid integer option \"" << str << "\"";
  return ret;
}

float ParseOptions::ToFloat(const std::string &str) {
  float ret;
  if (!ConvertStringToReal(str, &ret))
    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
  return ret;
}

double ParseOptions::ToDouble(const std::string &str) {
  double ret;
  if (!ConvertStringToReal(str, &ret))
    KALDI_ERR << "Invalid floating-point option \"" << str << "\"";
  return ret;
}

// instantiate templates
template void ParseOptions::RegisterTmpl(const std::string &name, bool *ptr,
                            const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, int32 *ptr,
                            const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, uint32 *ptr,
                            const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, float *ptr,
                            const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name, double *ptr,
                            const std::string &doc);
template void ParseOptions::RegisterTmpl(const std::string &name,
                            std::string *ptr, const std::string &doc);

template void ParseOptions::RegisterStandard(const std::string &name,
                            bool *ptr,
                            const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                            int32 *ptr,
                            const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                            uint32 *ptr,
                            const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                            float *ptr,
                            const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                            double *ptr,
                            const std::string &doc);
template void ParseOptions::RegisterStandard(const std::string &name,
                            std::string *ptr,
                            const std::string &doc);

template void ParseOptions::RegisterCommon(const std::string &name,
                            bool *ptr,
                            const std::string &doc, bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name,
                            int32 *ptr,
                            const std::string &doc, bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name,
                            uint32 *ptr,
                            const std::string &doc, bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name,
                            float *ptr,
                            const std::string &doc, bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name,
                            double *ptr,
                            const std::string &doc, bool is_standard);
template void ParseOptions::RegisterCommon(const std::string &name,
                            std::string *ptr,
                            const std::string &doc, bool is_standard);

}  // namespace kaldi


================================================
FILE: runtime/engine/kaldi/util/parse-options.h
================================================
// util/parse-options.h

// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
//                      Saarland University (Author: Arnab Ghoshal);
// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
#define KALDI_UTIL_PARSE_OPTIONS_H_

#include <map>
#include <string>
#include <vector>

#include "base/kaldi-common.h"
#include "util/options-itf.h"

namespace kaldi {

/// The class ParseOptions is for parsing command-line options; see
/// \ref parse_options for more documentation.
class ParseOptions : public OptionsItf {
 public:
  explicit ParseOptions(const char *usage) :
    print_args_(true), help_(false), usage_(usage), argc_(0), argv_(NULL),
    prefix_(""), other_parser_(NULL) {
#if !defined(_MSC_VER) && !defined(__CYGWIN__) // This is just a convenient place to set the stderr to line
    setlinebuf(stderr);  // buffering mode, since it's called at program start.
#endif  // This helps ensure different programs' output is not mixed up.
    RegisterStandard("config", &config_, "Configuration file to read (this "
                     "option may be repeated)");
    RegisterStandard("print-args", &print_args_,
                     "Print the command line arguments (to stderr)");
    RegisterStandard("help", &help_, "Print out usage message");
    RegisterStandard("verbose", &g_kaldi_verbose_level,
                     "Verbose level (higher->more logging)");
  }

  /**
    This is a constructor for the special case where some options are
    registered with a prefix to avoid conflicts.  The object thus created will
    only be used temporarily to register an options class with the original
    options parser (which is passed as the *other pointer) using the given
    prefix.  It should not be used for any other purpose, and the prefix must
    not be the empty string.  It seems to be the least bad way of implementing
    options with prefixes at this point.
    Example of usage is:
     ParseOptions po;  // original ParseOptions object
     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
     MfccOptions mfcc_opts;
     mfcc_opts.Register(&po_mfcc);
    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
    instead of just --frame-shift=10.0
   */
  ParseOptions(const std::string &prefix, OptionsItf *other);

  ~ParseOptions() {}

  // Methods from the interface
  void Register(const std::string &name,
                bool *ptr, const std::string &doc);
  void Register(const std::string &name,
                int32 *ptr, const std::string &doc);
  void Register(const std::string &name,
                uint32 *ptr, const std::string &doc);
  void Register(const std::string &name,
                float *ptr, const std::string &doc);
  void Register(const std::string &name,
                double *ptr, const std::string &doc);
  void Register(const std::string &name,
                std::string *ptr, const std::string &doc);

  /// If called after registering an option and before calling
  /// Read(), disables that option from being used.  Will crash
  /// at runtime if that option had not been registered.
  void DisableOption(const std::string &name);

  /// This one is used for registering standard parameters of all the programs
  template<typename T>
  void RegisterStandard(const std::string &name,
                        T *ptr, const std::string &doc);

  /**
    Parses the command line options and fills the ParseOptions-registered
    variables. This must be called after all the variables were registered!!!

    Initially the variables have implicit values,
    then the config file values are set-up,
    finally the command line values given.
    Returns the first position in argv that was not used.
    [typically not useful: use NumParams() and GetParam(). ]
   */
  int Read(int argc, const char *const *argv);

  /// Prints the usage documentation [provided in the constructor].
  void PrintUsage(bool print_command_line = false);
  /// Prints the actual configuration of all the registered variables
  void PrintConfig(std::ostream &os);

  /// Reads the options values from a config file.  Must be called after
  /// registering all options.  This is usually used internally after the
  /// standard --config option is used, but it may also be called from a
  /// program.
  void ReadConfigFile(const std::string &filename);

  /// Number of positional parameters (c.f. argc-1).
  int NumArgs() const;

  /// Returns one of the positional parameters; 1-based indexing for argc/argv
  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
  std::string GetArg(int param) const;

  std::string GetOptArg(int param) const {
    return (param <= NumArgs() ? GetArg(param) : "");
  }

  /// The following function will return a possibly quoted and escaped
  /// version of "str", according to the current shell.  Currently
  /// this is just hardwired to bash.  It's useful for debug output.
  static std::string Escape(const std::string &str);

 private:
  /// Template to register various variable types,
  /// used for program-specific parameters
  template<typename T>
  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);

  // Following functions do just the datatype-specific part of the job
  /// Register boolean variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        bool *b, const std::string &doc, bool is_standard);
  /// Register int32 variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        int32 *i, const std::string &doc, bool is_standard);
  /// Register unsinged  int32 variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        uint32 *u,
                        const std::string &doc, bool is_standard);
  /// Register float variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        float *f, const std::string &doc, bool is_standard);
  /// Register double variable [useful as we change BaseFloat type].
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        double *f, const std::string &doc, bool is_standard);
  /// Register string variable
  void RegisterSpecific(const std::string &name, const std::string &idx,
                        std::string *s, const std::string &doc,
                        bool is_standard);

  /// Does the actual job for both kinds of parameters
  /// Does the common part of the job for all datatypes,
  /// then calls RegisterSpecific
  template<typename T>
  void RegisterCommon(const std::string &name,
                      T *ptr, const std::string &doc, bool is_standard);

  /// Set option with name "key" to "value"; will crash if can't do it.
  /// "has_equal_sign" is used to allow --x for a boolean option x,
  /// and --y=, for a string option y.
  bool SetOption(const std::string &key, const std::string &value,
                 bool has_equal_sign);

  bool ToBool(std::string str);
  int32 ToInt(const std::string &str);
  uint32 ToUint(const std::string &str);
  float ToFloat(const std::string &str);
  double ToDouble(const std::string &str);

  // maps for option variables
  std::map<std::string, bool*> bool_map_;
  std::map<std::string, int32*> int_map_;
  std::map<std::string, uint32*> uint_map_;
  std::map<std::string, float*> float_map_;
  std::map<std::string, double*> double_map_;
  std::map<std::string, std::string*> string_map_;

  /**
     Structure for options' documentation
   */
  struct DocInfo {
    DocInfo() {}
    DocInfo(const std::string &name, const std::string &usemsg)
      : name_(name), use_msg_(usemsg), is_standard_(false) {}
    DocInfo(const std::string &name, const std::string &usemsg,
            bool is_standard)
      : name_(name), use_msg_(usemsg),  is_standard_(is_standard) {}

    std::string name_;
    std::string use_msg_;
    bool is_standard_;
  };
  typedef std::map<std::string, DocInfo> DocMapType;
  DocMapType doc_map_;  ///< map for the documentation

  bool print_args_;     ///< variable for the implicit --print-args parameter
  bool help_;           ///< variable for the implicit --help parameter
  std::string config_;  ///< variable for the implicit --config parameter
  std::vector<std::string> positional_args_;
  const char *usage_;
  int argc_;
  const char *const *argv_;

  /// These members are not normally used. They are only used when the object
  /// is constructed with a prefix
  std::string prefix_;
  OptionsItf *other_parser_;
 protected:
    /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
  /// this is needed in order to correctly allow --x for a boolean option
  /// x, and --y= for a string option y, and to disallow --x= and --y.
  void SplitLongArg(const std::string &in, std::string *key,
                    std::string *value, bool *has_equal_sign);

  void NormalizeArgName(std::string *str);
};

/// This template is provided for convenience in reading config classes from
/// files; this is not the standard way to read configuration options, but may
/// occasionally be needed.  This function assumes the config has a function
/// "void Register(OptionsItf *opts)" which it can call to register the
/// ParseOptions object.
template<class C> void ReadConfigFromFile(const std::string &config_filename,
                                          C *c) {
  std::ostringstream usage_str;
  usage_str << "Parsing config from "
            << "from '" << config_filename << "'";
  ParseOptions po(usage_str.str().c_str());
  c->Register(&po);
  po.ReadConfigFile(config_filename);
}

/// This variant of the template ReadConfigFromFile is for if you need to read
/// two config classes from the same file.
template<class C1, class C2> void ReadConfigsFromFile(const std::string &conf,
                                                      C1 *c1, C2 *c2) {
  std::ostringstream usage_str;
  usage_str << "Parsing config from "
            << "from '" << conf << "'";
  ParseOptions po(usage_str.str().c_str());
  c1->Register(&po);
  c2->Register(&po);
  po.ReadConfigFile(conf);
}


}  // namespace kaldi

#endif  // KALDI_UTIL_PARSE_OPTIONS_H_


================================================
FILE: runtime/engine/kaldi/util/simple-io-funcs.cc
================================================
// util/simple-io-funcs.cc

// Copyright 2009-2011  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#include "util/simple-io-funcs.h"
#include "util/text-utils.h"

namespace kaldi {

bool WriteIntegerVectorSimple(const std::string &wxfilename,
                              const std::vector<int32> &list) {
  kaldi::Output ko;
  // false, false is: text-mode, no Kaldi header.
  if (!ko.Open(wxfilename, false, false)) return false;
  for (size_t i = 0; i < list.size(); i++) ko.Stream() << list[i] << '\n';
  return ko.Close();
}

bool ReadIntegerVectorSimple(const std::string &rxfilename,
                             std::vector<int32> *list) {
  kaldi::Input ki;
  if (!ki.OpenTextMode(rxfilename)) return false;
  std::istream &is = ki.Stream();
  int32 i;
  list->clear();
  while ( !(is >> i).fail() )
    list->push_back(i);
  is >> std::ws;
  return is.eof();  // should be eof, or junk at end of file.
}

bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
                                 const std::vector<std::vector<int32> > &list) {
  kaldi::Output ko;
  // false, false is: text-mode, no Kaldi header.
  if (!ko.Open(wxfilename, false, false)) return false;
  std::ostream &os = ko.Stream();
  for (size_t i = 0; i < list.size(); i++) {
    for (size_t j = 0; j < list[i].size(); j++) {
      os << list[i][j];
      if (j+1 < list[i].size()) os << ' ';
    }
    os << '\n';
  }
  return ko.Close();
}

bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
                                   std::vector<std::vector<int32> > *list) {
  kaldi::Input ki;
  if (!ki.OpenTextMode(rxfilename)) return false;
  std::istream &is = ki.Stream();
  list->clear();
  std::string line;
  while (std::getline(is, line)) {
    std::vector<int32> v;
    if (!SplitStringToIntegers(line, " \t\r", true, &v)) {
      list->clear();
      return false;
    }
    list->push_back(v);
  }
  return is.eof();  // if we're not at EOF, something weird happened.
}


}  // end namespace kaldi


================================================
FILE: runtime/engine/kaldi/util/simple-io-funcs.h
================================================
// util/simple-io-funcs.h

// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.
#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_

#include <string>
#include <vector>
#include "util/kaldi-io.h"

// This header contains some utilities for reading some common, simple text
// formats:integers in files, one per line, and integers in files, possibly
// multiple per line. these are not really fully native Kaldi formats; they are
// mostly for small files that might be generated by scripts, and can be read
// all at one time. for longer files of this type, we would probably use the
// Table code.

namespace kaldi {

/// WriteToList attempts to write this list of integers, one per line,
/// to the given file, in text format.
/// returns true if succeeded.
bool WriteIntegerVectorSimple(const std::string &wxfilename,
                              const std::vector<int32> &v);

/// ReadFromList attempts to read this list of integers, one per line,
/// from the given file, in text format.
/// returns true if succeeded.
bool ReadIntegerVectorSimple(const std::string &rxfilename,
                             std::vector<int32> *v);

// This is a file format like:
// 1 2
// 3
//
// 4 5 6
// etc.
bool WriteIntegerVectorVectorSimple(const std::string &wxfilename,
                                    const std::vector<std::vector<int32> > &v);

bool ReadIntegerVectorVectorSimple(const std::string &rxfilename,
                                   std::vector<std::vector<int32> > *v);


}  // end namespace kaldi.


#endif  // KALDI_UTIL_SIMPLE_IO_FUNCS_H_


================================================
FILE: runtime/engine/kaldi/util/simple-options.cc
================================================
// util/simple-options.cc

// Copyright 2013  Tanel Alumae, Tallinn University of Technology

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#include "util/simple-options.h"


namespace kaldi {

void SimpleOptions::Register(const std::string &name,
                             bool *value,
                             const std::string &doc) {
  bool_map_[name] = value;
  option_info_list_.push_back(std::make_pair(name, OptionInfo(doc, kBool)));
}

void SimpleOptions::Register(const std::string &name,
                             int32 *value,
                             const std::string &doc) {
  int_map_[name] = value;
  option_info_list_.push_back(std::make_pair(name, OptionInfo(doc, kInt32)));
}

void SimpleOptions::Register(const std::string &name,
                             uint32 *value,
                             const std::string &doc) {
  uint_map_[name] = value;
  option_info_list_.push_back(std::make_pair(name, OptionInfo(doc, kUint32)));
}

void SimpleOptions::Register(const std::string &name,
                             float *value,
                             const std::string &doc) {
  float_map_[name] = value;
  option_info_list_.push_back(std::make_pair(name, OptionInfo(doc, kFloat)));
}

void SimpleOptions::Register(const std::string &name,
                             double *value,
                             const std::string &doc) {
  double_map_[name] = value;
  option_info_list_.push_back(std::make_pair(name, OptionInfo(doc, kDouble)));
}

void SimpleOptions::Register(const std::string &name,
                             std::string *value,
                             const std::string &doc) {
  string_map_[name] = value;
  option_info_list_.push_back(std::make_pair(name, OptionInfo(doc, kString)));
}

template<typename T>
static bool SetOptionImpl(const std::string &key, const T &value,
                          std::map<std::string, T*> &some_map) {
  if (some_map.end() != some_map.find(key)) {
    *(some_map[key]) = value;
    return true;
  }
  return false;
}

bool SimpleOptions::SetOption(const std::string &key, const bool &value) {
  return SetOptionImpl(key, value, bool_map_);
}

bool SimpleOptions::SetOption(const std::string &key, const int32 &value) {
  if (!SetOptionImpl(key, value, int_map_)) {
    if (!SetOptionImpl(key, static_cast<uint32>(value), uint_map_)) {
      return false;
    }
  }
  return true;
}

bool SimpleOptions::SetOption(const std::string &key, const uint32 &value) {
  if (!SetOptionImpl(key, value, uint_map_)) {
    if (!SetOptionImpl(key, static_cast<int32>(value), int_map_)) {
      return false;
    }
  }
  return true;
}

bool SimpleOptions::SetOption(const std::string &key, const float &value) {
  if (!SetOptionImpl(key, value, float_map_)) {
    if (!SetOptionImpl(key, static_cast<double>(value), double_map_)) {
      return false;
    }
  }
  return true;
}

bool SimpleOptions::SetOption(const std::string &key, const double &value) {
  if (!SetOptionImpl(key, value, double_map_)) {
    if (!SetOptionImpl(key, static_cast<float>(value), float_map_)) {
      return false;
    }
  }
  return true;
}

bool SimpleOptions::SetOption(const std::string &key,
                              const std::string &value) {
  return SetOptionImpl(key, value, string_map_);
}

bool SimpleOptions::SetOption(const std::string &key, const char *value) {
  std::string str_value = std::string(value);
  return SetOptionImpl(key, str_value, string_map_);
}


template<typename T>
static bool GetOptionImpl(const std::string &key, T *value,
                          std::map<std::string, T*> &some_map) {
  typename std::map<std::string, T*>::iterator it  = some_map.find(key);
  if (it != some_map.end()) {
    *value = *(it->second);
    return true;
  }
  return false;
}

bool SimpleOptions::GetOption(const std::string &key, bool *value) {
  return GetOptionImpl(key, value, bool_map_);
}

bool SimpleOptions::GetOption(const std::string &key, int32 *value) {
  return GetOptionImpl(key, value, int_map_);
}

bool SimpleOptions::GetOption(const std::string &key, uint32 *value) {
  return GetOptionImpl(key, value, uint_map_);
}

bool SimpleOptions::GetOption(const std::string &key, float *value) {
  return GetOptionImpl(key, value, float_map_);
}

bool SimpleOptions::GetOption(const std::string &key, double *value) {
  return GetOptionImpl(key, value, double_map_);
}

bool SimpleOptions::GetOption(const std::string &key, std::string *value) {
  return GetOptionImpl(key, value, string_map_);
}

std::vector<std::pair<std::string, SimpleOptions::OptionInfo> >
SimpleOptions::GetOptionInfoList() {
  return option_info_list_;
}

bool SimpleOptions::GetOptionType(const std::string &key, OptionType *type) {
  for (std::vector <std::pair<std::string,
      OptionInfo> >::iterator dx = option_info_list_.begin();
      dx != option_info_list_.end(); dx++) {
    std::pair<std::string, SimpleOptions::OptionInfo> info_pair = (*dx);
    if (info_pair.first == key) {
      *type = info_pair.second.type;
      return true;
    }
  }
  return false;
}


}  // namespace kaldi


================================================
FILE: runtime/engine/kaldi/util/simple-options.h
================================================
// util/simple-options.h

// Copyright 2013  Tanel Alumae, Tallinn University of Technology

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_UTIL_SIMPLE_OPTIONS_H_
#define KALDI_UTIL_SIMPLE_OPTIONS_H_

#include <map>
#include <string>
#include <vector>

#include "base/kaldi-common.h"
#include "util/options-itf.h"

namespace kaldi {


/// The class SimpleOptions is an implementation of OptionsItf that allows
/// setting and getting option values programmatically, i.e., via getter
/// and setter methods. It doesn't provide any command line parsing
/// functionality.
/// The class ParseOptions should be used for command-line options.
class SimpleOptions : public OptionsItf {
 public:
  SimpleOptions() {
  }

  virtual ~SimpleOptions() {
  }

  // Methods from the interface
  void Register(const std::string &name, bool *ptr, const std::string &doc);
  void Register(const std::string &name, int32 *ptr, const std::string &doc);
  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
  void Register(const std::string &name, float *ptr, const std::string &doc);
  void Register(const std::string &name, double *ptr, const std::string &doc);
  void Register(const std::string &name, std::string *ptr,
                const std::string &doc);

  // set option with the specified key, return true if successful
  bool SetOption(const std::string &key, const bool &value);
  bool SetOption(const std::string &key, const int32 &value);
  bool SetOption(const std::string &key, const uint32 &value);
  bool SetOption(const std::string &key, const float &value);
  bool SetOption(const std::string &key, const double &value);
  bool SetOption(const std::string &key, const std::string &value);
  bool SetOption(const std::string &key, const char* value);

  // get option with the specified key and put to 'value',
  // return true if successful
  bool GetOption(const std::string &key, bool *value);
  bool GetOption(const std::string &key, int32 *value);
  bool GetOption(const std::string &key, uint32 *value);
  bool GetOption(const std::string &key, float *value);
  bool GetOption(const std::string &key, double *value);
  bool GetOption(const std::string &key, std::string *value);

  enum OptionType {
    kBool,
    kInt32,
    kUint32,
    kFloat,
    kDouble,
    kString
  };

  struct OptionInfo {
    OptionInfo(const std::string &doc, OptionType type) :
      doc(doc), type(type) {
    }
    std::string doc;
    OptionType type;
  };

  std::vector<std::pair<std::string, OptionInfo> > GetOptionInfoList();

  /*
   * Puts the type of the option with name 'key' in the argument 'type'.
   * Return true if such option is found, false otherwise.
   */
  bool GetOptionType(const std::string &key, OptionType *type);

 private:

  std::vector<std::pair<std::string, OptionInfo> > option_info_list_;

  // maps for option variables
  std::map<std::string, bool*> bool_map_;
  std::map<std::string, int32*> int_map_;
  std::map<std::string, uint32*> uint_map_;
  std::map<std::string, float*> float_map_;
  std::map<std::string, double*> double_map_;
  std::map<std::string, std::string*> string_map_;
};

}  // namespace kaldi

#endif  // KALDI_UTIL_SIMPLE_OPTIONS_H_


================================================
FILE: runtime/engine/kaldi/util/stl-utils.h
================================================
// util/stl-utils.h

// Copyright 2009-2011  Microsoft Corporation;  Saarland University

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_UTIL_STL_UTILS_H_
#define KALDI_UTIL_STL_UTILS_H_

#include <unordered_map>
#include <unordered_set>
using std::unordered_map;
using std::unordered_set;

#include <algorithm>
#include <map>
#include <set>
#include <string>
#include <vector>
#include "base/kaldi-common.h"

namespace kaldi {

/// Sorts and uniq's (removes duplicates) from a vector.
template<typename T>
inline void SortAndUniq(std::vector<T> *vec) {
  std::sort(vec->begin(), vec->end());
  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
}


/// Returns true if the vector is sorted.
template<typename T>
inline bool IsSorted(const std::vector<T> &vec) {
  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
  if (iter == end) return true;
  while (1) {
    typename std::vector<T>::const_iterator next_iter = iter;
    ++next_iter;
    if (next_iter == end) return true;  // end of loop and nothing out of order
    if (*next_iter < *iter) return false;
    iter = next_iter;
  }
}


/// Returns true if the vector is sorted and contains each element
/// only once.
template<typename T>
inline bool IsSortedAndUniq(const std::vector<T> &vec) {
  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
  if (iter == end) return true;
  while (1) {
    typename std::vector<T>::const_iterator next_iter = iter;
    ++next_iter;
    if (next_iter == end) return true;  // end of loop and nothing out of order
    if (*next_iter <= *iter) return false;
    iter = next_iter;
  }
}


/// Removes duplicate elements from a sorted list.
template<typename T>
inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
  KALDI_PARANOID_ASSERT(IsSorted(*vec));
  KALDI_ASSERT(vec);
  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
}

/// Copies the elements of a set to a vector.
template<class T>
void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
  // copies members of s into v, in sorted order from lowest to highest
  // (because the set was in sorted order).
  KALDI_ASSERT(v != NULL);
  v->resize(s.size());
  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
  typename std::vector<T>::iterator viter = v->begin();
  for (; siter != send; ++siter, ++viter) {
    *viter = *siter;
  }
}

template<class T>
void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
  KALDI_ASSERT(v != NULL);
  v->resize(s.size());
  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
  typename std::vector<T>::iterator viter = v->begin();
  for (; siter != send; ++siter, ++viter) {
    *viter = *siter;
  }
}


/// Copies the (key, value) pairs in a map to a vector of pairs.
template<class A, class B>
void CopyMapToVector(const std::map<A, B> &m,
                     std::vector<std::pair<A, B> > *v) {
  KALDI_ASSERT(v != NULL);
  v->resize(m.size());
  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
  for (; miter != mend; ++miter, ++viter) {
    *viter = std::make_pair(miter->first, miter->second);
    // do it like this because of const casting.
  }
}

/// Copies the keys in a map to a vector.
template<class A, class B>
void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
  KALDI_ASSERT(v != NULL);
  v->resize(m.size());
  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
  typename std::vector<A>::iterator viter = v->begin();
  for (; miter != mend; ++miter, ++viter) {
    *viter = miter->first;
  }
}

/// Copies the values in a map to a vector.
template<class A, class B>
void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
  KALDI_ASSERT(v != NULL);
  v->resize(m.size());
  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
  typename std::vector<B>::iterator viter = v->begin();
  for (; miter != mend; ++miter, ++viter) {
    *viter = miter->second;
  }
}

/// Copies the keys in a map to a set.
template<class A, class B>
void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
  KALDI_ASSERT(s != NULL);
  s->clear();
  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
  for (; miter != mend; ++miter) {
    s->insert(s->end(), miter->first);
  }
}

/// Copies the values in a map to a set.
template<class A, class B>
void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
  KALDI_ASSERT(s != NULL);
  s->clear();
  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
  for (; miter != mend; ++miter)
    s->insert(s->end(), miter->second);
}


/// Copies the contents of a vector to a set.
template<class A>
void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
  KALDI_ASSERT(s != NULL);
  s->clear();
  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
  for (; iter != end; ++iter)
    s->insert(s->end(), *iter);
  // s->end() is a hint in case v was sorted.  will work regardless.
}

/// Deletes any non-NULL pointers in the vector v, and sets
/// the corresponding entries of v to NULL
template<class A>
void DeletePointers(std::vector<A*> *v) {
  KALDI_ASSERT(v != NULL);
  typename std::vector<A*>::iterator iter = v->begin(), end = v->end();
  for (; iter != end; ++iter) {
    if (*iter != NULL) {
      delete *iter;
      *iter = NULL;  // set to NULL for extra safety.
    }
  }
}

/// Returns true if the vector of pointers contains NULL pointers.
template<class A>
bool ContainsNullPointers(const std::vector<A*> &v) {
  typename std::vector<A*>::const_iterator iter = v.begin(), end = v.end();
  for (; iter != end; ++iter)
    if (*iter == static_cast<A*> (NULL)) return true;
  return false;
}

/// Copies the contents a vector of one type to a vector
/// of another type.
template<typename A, typename B>
void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
  KALDI_ASSERT(vec_out != NULL);
  vec_out->resize(vec_in.size());
  for (size_t i = 0; i < vec_in.size(); i++)
    (*vec_out)[i] = static_cast<B> (vec_in[i]);
}

/// A hashing function-object for vectors.
template<typename Int>
struct VectorHasher {  // hashing function for vector<Int>.
  size_t operator()(const std::vector<Int> &x) const noexcept {
    size_t ans = 0;
    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
    for (; iter != end; ++iter) {
      ans *= kPrime;
      ans += *iter;
    }
    return ans;
  }
  VectorHasher() {  // Check we're instantiated with an integer type.
    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
  }
 private:
  static const int kPrime = 7853;
};

/// A hashing function-object for pairs of ints
template<typename Int1, typename Int2 = Int1>
struct PairHasher {  // hashing function for pair<int>
  size_t operator()(const std::pair<Int1, Int2> &x) const noexcept {
    // 7853 was chosen at random from a list of primes.
    return x.first + x.second * 7853;
  }
  PairHasher() {  // Check we're instantiated with an integer type.
    KALDI_ASSERT_IS_INTEGER_TYPE(Int1);
    KALDI_ASSERT_IS_INTEGER_TYPE(Int2);
  }
};


/// A hashing function object for strings.
struct StringHasher {  // hashing function for std::string
  size_t operator()(const std::string &str) const noexcept {
    size_t ans = 0, len = str.length();
    const char *c = str.c_str(), *end = c + len;
    for (; c != end; c++) {
      ans *= kPrime;
      ans += *c;
    }
    return ans;
  }
 private:
  static const int kPrime = 7853;
};

/// Reverses the contents of a vector.
template<typename T>
inline void ReverseVector(std::vector<T> *vec) {
  KALDI_ASSERT(vec != NULL);
  size_t sz = vec->size();
  for (size_t i = 0; i < sz/2; i++)
    std::swap( (*vec)[i], (*vec)[sz-1-i]);
}


/// Comparator object for pairs that compares only the first pair.
template<class A, class B>
struct CompareFirstMemberOfPair {
  inline bool operator() (const std::pair<A, B> &p1,
                          const std::pair<A, B> &p2) {
    return p1.first < p2.first;
  }
};

/// For a vector of pair<I, F> where I is an integer and F a floating-point or
/// integer type, this function sorts a vector of type vector<pair<I, F> > on
/// the I value and then merges elements with equal I values, summing these over
/// the F component and then removing any F component with zero value.  This
/// is for where the vector of pairs represents a map from the integer to float
/// component, with an "adding" type of semantics for combining the elements.
template<typename I, typename F>
inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
  KALDI_ASSERT_IS_INTEGER_TYPE(I);
  CompareFirstMemberOfPair<I, F> c;
  std::sort(vec->begin(), vec->end(), c);  // sort on 1st element.
  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
      in = vec->begin(), end = vec->end();
  // special case: while there is nothing to be changed, skip over
  // initial input (avoids unnecessary copying).
  while (in + 1 < end && in[0].first != in[1].first && in[0].second != 0.0) {
    in++;
    out++;
  }
  while (in < end) {
    // We reach this point only at the first element of
    // each stretch of identical .first elements.
    *out = *in;
    ++in;
    while (in < end && in->first == out->first) {
      out->second += in->second;  // this is the merge operation.
      ++in;
    }
    if (out->second != static_cast<F>(0))  // Don't keep zero elements.
      out++;
  }
  vec->erase(out, end);
}

}  // namespace kaldi

#endif  // KALDI_UTIL_STL_UTILS_H_


================================================
FILE: runtime/engine/kaldi/util/table-types.h
================================================
// util/table-types.h

// Copyright 2009-2011     Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.


#ifndef KALDI_UTIL_TABLE_TYPES_H_
#define KALDI_UTIL_TABLE_TYPES_H_
#include "base/kaldi-common.h"
#include "util/kaldi-table.h"
#include "util/kaldi-holder.h"
#include "matrix/kaldi-matrix.h"
#include "matrix/kaldi-vector.h"

namespace kaldi {

// This header defines typedefs that are specific instantiations of
// the Table types.

/// \addtogroup table_types
/// @{

typedef TableWriter<KaldiObjectHolder<Matrix<BaseFloat> > >
                    BaseFloatMatrixWriter;
typedef SequentialTableReader<KaldiObjectHolder<Matrix<BaseFloat> > >
                             SequentialBaseFloatMatrixReader;
typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<BaseFloat> > >
                                RandomAccessBaseFloatMatrixReader;
typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<BaseFloat> > >
                                      RandomAccessBaseFloatMatrixReaderMapped;

typedef TableWriter<KaldiObjectHolder<Matrix<double> > >
                                      DoubleMatrixWriter;
typedef SequentialTableReader<KaldiObjectHolder<Matrix<double> > >
                              SequentialDoubleMatrixReader;
typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<double> > >
                                RandomAccessDoubleMatrixReader;
typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<double> > >
                                      RandomAccessDoubleMatrixReaderMapped;

//typedef TableWriter<KaldiObjectHolder<CompressedMatrix> >
                                      //CompressedMatrixWriter;

typedef TableWriter<KaldiObjectHolder<Vector<BaseFloat> > >
                                      BaseFloatVectorWriter;
typedef SequentialTableReader<KaldiObjectHolder<Vector<BaseFloat> > >
                              SequentialBaseFloatVectorReader;
typedef RandomAccessTableReader<KaldiObjectHolder<Vector<BaseFloat> > >
                                RandomAccessBaseFloatVectorReader;
typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Vector<BaseFloat> > >
                                      RandomAccessBaseFloatVectorReaderMapped;

typedef TableWriter<KaldiObjectHolder<Vector<double> > >
                                      DoubleVectorWriter;
typedef SequentialTableReader<KaldiObjectHolder<Vector<double> > >
                              SequentialDoubleVectorReader;
typedef RandomAccessTableReader<KaldiObjectHolder<Vector<double> > >
                                RandomAccessDoubleVectorReader;

//typedef TableWriter<KaldiObjectHolder<CuMatrix<BaseFloat> > >
                                      //BaseFloatCuMatrixWriter;
//typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > >
                              //SequentialBaseFloatCuMatrixReader;
//typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > >
                                //RandomAccessBaseFloatCuMatrixReader;
//typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<BaseFloat> > >
                                      //RandomAccessBaseFloatCuMatrixReaderMapped;

//typedef TableWriter<KaldiObjectHolder<CuMatrix<double> > >
                                      //DoubleCuMatrixWriter;
//typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<double> > >
                              //SequentialDoubleCuMatrixReader;
//typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<double> > >
                                //RandomAccessDoubleCuMatrixReader;
//typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<double> > >
                                      //RandomAccessDoubleCuMatrixReaderMapped;

//typedef TableWriter<KaldiObjectHolder<CuVector<BaseFloat> > >
                    //BaseFloatCuVectorWriter;
//typedef SequentialTableReader<KaldiObjectHolder<CuVector<BaseFloat> > >
                              //SequentialBaseFloatCuVectorReader;
//typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<BaseFloat> > >
                                //RandomAccessBaseFloatCuVectorReader;
//typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuVector<BaseFloat> > >
                                      //RandomAccessBaseFloatCuVectorReaderMapped;

//typedef TableWriter<KaldiObjectHolder<CuVector<double> > >
                    //DoubleCuVectorWriter;
//typedef SequentialTableReader<KaldiObjectHolder<CuVector<double> > >
                              //SequentialDoubleCuVectorReader;
//typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<double> > >
                                //RandomAccessDoubleCuVectorReader;


typedef TableWriter<BasicHolder<int32> >  Int32Writer;
typedef SequentialTableReader<BasicHolder<int32> >  SequentialInt32Reader;
typedef RandomAccessTableReader<BasicHolder<int32> >  RandomAccessInt32Reader;

typedef TableWriter<BasicVectorHolder<int32> >  Int32VectorWriter;
typedef SequentialTableReader<BasicVectorHolder<int32> >
                              SequentialInt32VectorReader;
typedef RandomAccessTableReader<BasicVectorHolder<int32> >
                                RandomAccessInt32VectorReader;

typedef TableWriter<BasicVectorVectorHolder<int32> >  Int32VectorVectorWriter;
typedef SequentialTableReader<BasicVectorVectorHolder<int32> >
                              SequentialInt32VectorVectorReader;
typedef RandomAccessTableReader<BasicVectorVectorHolder<int32> >
                                RandomAccessInt32VectorVectorReader;

typedef TableWriter<BasicPairVectorHolder<int32> >  Int32PairVectorWriter;
typedef SequentialTableReader<BasicPairVectorHolder<int32> >
                              SequentialInt32PairVectorReader;
typedef RandomAccessTableReader<BasicPairVectorHolder<int32> >
                                RandomAccessInt32PairVectorReader;

typedef TableWriter<BasicPairVectorHolder<BaseFloat> >
                    BaseFloatPairVectorWriter;
typedef SequentialTableReader<BasicPairVectorHolder<BaseFloat> >
                              SequentialBaseFloatPairVectorReader;
typedef RandomAccessTableReader<BasicPairVectorHolder<BaseFloat> >
                                RandomAccessBaseFloatPairVectorReader;

typedef TableWriter<BasicHolder<BaseFloat> >  BaseFloatWriter;
typedef SequentialTableReader<BasicHolder<BaseFloat> >
                              SequentialBaseFloatReader;
typedef RandomAccessTableReader<BasicHolder<BaseFloat> >
                                RandomAccessBaseFloatReader;
typedef RandomAccessTableReaderMapped<BasicHolder<BaseFloat> >
                                      RandomAccessBaseFloatReaderMapped;

typedef TableWriter<BasicHolder<double> >  DoubleWriter;
typedef SequentialTableReader<BasicHolder<double> >  SequentialDoubleReader;
typedef RandomAccessTableReader<BasicHolder<double> >  RandomAccessDoubleReader;

typedef TableWriter<BasicHolder<bool> >  BoolWriter;
typedef SequentialTableReader<BasicHolder<bool> >  SequentialBoolReader;
typedef RandomAccessTableReader<BasicHolder<bool> >  RandomAccessBoolReader;

/// TokenWriter is a writer specialized for std::string where the strings
/// are nonempty and whitespace-free.   T == std::string
typedef TableWriter<TokenHolder> TokenWriter;
typedef SequentialTableReader<TokenHolder> SequentialTokenReader;
typedef RandomAccessTableReader<TokenHolder> RandomAccessTokenReader;


/// TokenVectorWriter is a writer specialized for sequences of
/// std::string where the strings are nonempty and whitespace-free.
/// T == std::vector<std::string>
typedef TableWriter<TokenVectorHolder> TokenVectorWriter;
// Ditto for SequentialTokenVectorReader.
typedef SequentialTableReader<TokenVectorHolder> SequentialTokenVectorReader;
typedef RandomAccessTableReader<TokenVectorHolder>
                                RandomAccessTokenVectorReader;


//typedef TableWriter<KaldiObjectHolder<GeneralMatrix> >
//                                      GeneralMatrixWriter;
//typedef SequentialTableReader<KaldiObjectHolder<GeneralMatrix> >
 //                             SequentialGeneralMatrixReader;
//typedef RandomAccessTableReader<KaldiObjectHolder<GeneralMatrix> >
 //                               RandomAccessGeneralMatrixReader;
//typedef RandomAccessTableReaderMapped<KaldiObjectHolder<GeneralMatrix> >
 //                                     RandomAccessGeneralMatrixReaderMapped;


/// @}

// Note: for FST reader/writer, see ../fstext/fstext-utils.h
// [not done yet].

}  // end namespace kaldi


#endif  // KALDI_UTIL_TABLE_TYPES_H_


================================================
FILE: runtime/engine/kaldi/util/text-utils.cc
================================================
// util/text-utils.cc

// Copyright 2009-2011  Saarland University;  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//  http://www.apache.org/licenses/LICENSE-2.0

// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#include "util/text-utils.h"
#include <limits>
#include <map>
#include <algorithm>
#include "base/kaldi-common.h"

namespace kaldi {


template<class F>
bool SplitStringToFloats(const std::string &full,
                         const char *delim,
                         bool omit_empty_strings,  // typically false
                         std::vector<F> *out) {
  KALDI_ASSERT(out != NULL);
  if (*(full.c_str()) == '\0') {
    out->clear();
    return true;
  }
  std::vector<std::string> split;
  SplitStringToVector(full, delim, omit_empty_strings, &split);
  out->resize(split.size());
  for (size_t i = 0; i < split.size(); i++) {
    F f = 0;
    if (!ConvertStringToReal(split[i], &f))
      return false;
    (*out)[i] = f;
  }
  return true;
}

// Instantiate the template above for float and double.
template
bool SplitStringToFloats(const std::string &full,
                         const char *delim,
                         bool omit_empty_strings,
                         std::vector<float> *out);
template
bool SplitStringToFloats(const std::string &full,
                         const char *delim,
                         bool omit_empty_strings,
                         std::vector<double> *out);

void SplitStringToVector(const std::string &full, const char *delim,
                         bool omit_empty_strings,
                         std::vector<std::string> *out) {
  size_t start = 0, found = 0, end = full.size();
  out->clear();
  while (found != std::string::npos) {
    found = full.find_first_of(delim, start);
    // start != end condition is for when the delimiter is at the end
    if (!omit_empty_strings || (found != start && start != end))
      out->push_back(full.substr(start, found - start));
    start = found + 1;
  }
}

void JoinVectorToString(const std::vector<std::string> &vec_in,
                        const char *delim, bool omit_empty_strings,
                        std::string *str_out) {
  std::string tmp_str;
  for (size_t i = 0; i < vec_in.size(); i++) {
    if (!omit_empty_strings || !vec_in[i].empty()) {
      tmp_str.append(vec_in[i]);
      if (i < vec_in.size() - 1)
        if (!omit_empty_strings || !vec_in[i+1].empty())
          tmp_str.append(delim);
    }
  }
  str_out->swap(tmp_str);
}

void Trim(std::string *str) {
  const char *white_chars = " \t\n\r\f\v";

  std::string::size_type pos = str->find_last_not_of(white_chars);
  if (pos != std::string::npos)  {
    str->erase(pos + 1);
    pos = str->find_first_not_of(white_chars);
    if (pos != std::string::npos) str->erase(0, pos);
  } else {
    str->erase(str->begin(), str->end());
  }
}

bool IsToken(const std::string &token) {
  size_t l = token.length();
  if (l == 0) return false;
  for (size_t i = 0; i < l; i++) {
    unsigned char c = token[i];
    if ((!isprint(c) || isspace(c)) && (isascii(c) || c == (unsigned char)255))
      return false;
    // The "&& (isascii(c) || c == 255)" was added so that we won't reject
    // non-ASCII characters such as French characters with accents [except for
    // 255 which is "nbsp", a form of space].
  }
  return true;
}


void SplitStringOnFirstSpace(const std::string &str,
                             std::string *first,
                             std::string *rest) {
  const char *white_chars = " \t\n\r\f\v";
  typedef std::string::size_type I;
  const I npos = std::string::npos;
  I first_nonwhite = str.find_first_not_of(white_chars);
  if (first_nonwhite == npos) {
    first->clear();
    rest->clear();
    return;
  }
  // next_white is first whitespace after first nonwhitespace.
  I next_white = str.find_first_of(white_chars, first_nonwhite);

  if (next_white == npos) {  // no more whitespace...
    *first = std::string(str, first_nonwhite);
    rest->clear();
    return;
  }
  I next_nonwhite = str.find_first_not_of(white_chars, next_white);
  if (next_nonwhite == npos) {
    *first = std::string(str, first_nonwhite, next_white-first_nonwhite);
    rest->clear();
    return;
  }

  I last_nonwhite = str.find_last_not_of(white_chars);
  KALDI_ASSERT(last_nonwhite != npos);  // or coding error.

  *first = std::string(str, first_nonwhite, next_white-first_nonwhite);
  *rest = std::string(str, next_nonwhite, last_nonwhite+1-next_nonwhite);
}

bool IsLine(const std::string &line) {
  if (line.find('\n') != std::string::npos) return false;
  if (line.empty()) return true;
  if (isspace(*(line.begin()))) return false;
  if (isspace(*(line.rbegin()))) return false;
  std::string::const_iterator iter = line.begin(), end = line.end();
  for (; iter != end; iter++)
    if (!isprint(*iter)) return false;
  return true;
}

template <class T>
class NumberIstream{
 public:
  explicit NumberIstream(std::istream &i) : in_(i) {}

  NumberIstream & operator >> (T &x) {
    if (!in_.good()) return *this;
    in_ >> x;
    if (!in_.fail() && RemainderIsOnlySpaces()) return *this;
    return ParseOnFail(&x);
  }

 private:
  std::istream &in_;

  bool RemainderIsOnlySpaces() {
    if (in_.tellg() != std::istream::pos_type(-1)) {
      std::string rem;
      in_ >> rem;

      if (rem.find_first_not_of(' ') != std::string::npos) {
        // there is not only spaces
        return false;
      }
    }

    in_.clear();
    return true;
  }

  NumberIstream & ParseOnFail(T *x) {
    std::string str;
    in_.clear();
    in_.seekg(0);
    // If the stream is broken even before trying
    // to read from it or if there are many tokens,
    // it's pointless to try.
    if (!(in_ >> str) || !RemainderIsOnlySpaces()) {
      in_.setstate(std::ios_base::failbit);
      return *this;
    }

    std::map<std::string, T> inf_nan_map;
    // we'll keep just uppercase values.
    inf_nan_map["INF"] = std::numeric_limits<T>::infinity();
    inf_nan_map["+INF"] = std::numeric_limits<T>::infinity();
    inf_nan_map["-INF"] = - std::numeric_limits<T>::infinity();
    inf_nan_map["INFINITY"] = std::numeric_limits<T>::infinity();
    inf_nan_map["+INFINITY"] = std::numeric_limits<T>::infinity();
    inf_nan_map["-INFINITY"] = - std::numeric_limits<T>::infinity();
    inf_nan_map["NAN"] = std::numeric_limits<T>::quiet_NaN();
    inf_nan_map["+NAN"] = std::numeric_limits<T>::quiet_NaN();
    inf_nan_map["-NAN"] = - std::numeric_limits<T>::quiet_NaN();
    // MSVC
    inf_nan_map["1.#INF"] = std::numeric_limits<T>::infinity();
    inf_nan_map["-1.#INF"] = - std::numeric_limits<T>::infinity();
    inf_nan_map["1.#QNAN"] = std::numeric_limits<T>::quiet_NaN();
    inf_nan_map["-1.#QNAN"] = - std::numeric_limits<T>::quiet_NaN();

    std::transform(str.begin(), str.end(), str.begin(), ::toupper);

    if (inf_nan_map.find(str) != inf_nan_map.end()) {
      *x = inf_nan_map[str];
    } else {
      in_.setstate(std::ios_base::failbit);
    }

    return *this;
  }
};


template <typename T>
bool ConvertStringToReal(const std::string &str,
                         T *out) {
  std::istringstream iss(str);

  NumberIstream<T> i(iss);

  i >> *out;

  if (iss.fail()) {
    // Number conversion failed.
    return false;
  }

  return true;
}

template
bool ConvertStringToReal(const std::string &str,
                         float *out);
template
bool ConvertStringToReal(const std::string &str,
                         double *out);


/*
  This function is a helper function of StringsApproxEqual.  It should be
  thought of as a recursive function-- it was designed that way-- but rather
  than actually recursing (which would cause problems with stack overflow), we
  just set the args and return to the start.

  The 'decimal_places_tolerance' argument is just passed in from outside,
  see the documentation for StringsApproxEqual in text-utils.h to see an
  explanation.  The argument 'places_into_number' provides some information
  about the strings 'a' and 'b' that precedes the current pointers.
  For purposes of this comment, let's define the 'decimal' of a number
  as the part that comes after the decimal point, e.g. in '99.123',
  '123' would be the decimal.  If 'places_into_number' is -1, it means
  we're not currently inside some place like that (i.e. it's not the
  case that we're pointing to the '1' or the '2' or the '3').
  If it's 0, then we'd be pointing to the first place after the decimal,
  '1' in this case.  Note if one of the numbers is shorter than the
  other, like '99.123' versus '99.1234' and 'a' points to the first '3'
  while 'b' points to the second '4', 'places_into_number' referes to the
  shorter of the two, i.e. it would be 2 in this example.


 */
bool StringsApproxEqualInternal(const char *a, const char *b,
                                int32 decimal_places_tolerance,
                                int32 places_into_number) {
start:
  char ca = *a, cb = *b;
  if (ca == cb) {
    if (ca == '\0') {
      return true;
    } else {
      if (places_into_number >= 0) {
        if (isdigit(ca)) {
          places_into_number++;
        } else {
          places_into_number = -1;
        }
      } else {
        if (ca == '.') {
          places_into_number = 0;
        }
      }
      a++;
      b++;
      goto start;
    }
  } else {
    if (places_into_number  >= decimal_places_tolerance &&
        (isdigit(ca) || isdigit(cb))) {
      // we're potentially willing to accept this difference between the
      // strings.
      if (isdigit(ca)) a++;
      if (isdigit(cb)) b++;
      // we'll have advanced at least one of the two strings.
      goto start;
    } else if (places_into_number >= 0 &&
               ((ca == '0' && !isdigit(cb)) || (cb == '0' && !isdigit(ca)))) {
      // this clause is designed to ensure that, for example,
      // "0.1" would count the same as "0.100001".
      if (ca == '0') a++;
      else b++;
      places_into_number++;
      goto start;
    } else {
      return false;
    }
  }

}


bool StringsApproxEqual(const std::string &a,
                        const std::string &b,
                        int32 decimal_places_tolerance) {
  return StringsApproxEqualInternal(a.c_str(), b.c_str(),
                                    decimal_places_tolerance, -1);
}


bool ConfigLine::ParseLine(const std::string &line) {
  data_.clear();
  whole_line_ = line;
  if (line.size() == 0) return false;   // Empty line
  size_t pos = 0, size = line.size();
  while (isspace(line[pos]) && pos < size) pos++;
  if (pos == size)
    return false;  // whitespace-only line
  size_t first_token_start_pos = pos;
  // first get first_token_.
  while (!isspace(line[pos]) && pos < size) {
    if (line[pos] == '=') {
      // If the first block of non-whitespace looks like "foo-bar=...",
      // then we ignore it: there is no initial token, and FirstToken()
      // is empty.
      pos = first_token_start_pos;
      break;
    }
    pos++;
  }
  first_token_ = std::string(line, first_token_start_pos, pos - first_token_start_pos);
  // first_token_ is expected to be either empty or something like
  // "component-node", which actually is a slightly more restrictive set of
  // strings than IsValidName() checks for this is a convenient way to check it.
  if (!first_token_.empty() && !IsValidName(first_token_))
    return false;

  while (pos < size) {
    if (isspace(line[pos])) {
      pos++;
      continue;
    }

    // OK, at this point we know that we are pointing at nonspace.
    size_t next_equals_sign = line.find_first_of("=", pos);
    if (next_equals_sign == pos || next_equals_sign == std::string::npos) {
      // we're looking for something like 'key=value'.  If there is no equals sign,
      // or it's not preceded by something, it's a parsing failure.
      return false;
    }
    std::string key(line, pos, next_equals_sign - pos);
    if (!IsValidName(key)) return false;

    // handle any quotes.  we support key='blah blah' or key="foo bar".
    // no escaping is supported.
    if (line[next_equals_sign+1] == '\'' || line[next_equals_sign+1] == '"') {
      char my_quote = line[next_equals_sign+1];
      size_t next_quote = line.find_first_of(my_quote, next_equals_sign + 2);
      if (next_quote == std::string::npos) {  // no matching quote was found.
        KALDI_WARN << "No matching quote for " << my_quote << " in config line '"
                   << line << "'";
        return false;
      } else {
        std::string value(line, next_equals_sign + 2,
                          next_quote - next_equals_sign - 2);
        data_.insert(std::make_pair(key, std::make_pair(value, false)));
        pos = next_quote + 1;
        continue;
      }
    } else {
      // we want to be able to parse something like "... input=Offset(a, -1) foo=bar":
      // in general, config values with spaces in them, even without quoting.

      size_t next_next_equals_sign = line.find_first_of("=", next_equals_sign + 1),
          terminating_space = size;

      if (next_next_equals_sign != std::string::npos) {  // found a later equals sign.
        size_t preceding_space = line.find_last_of(" \t", next_next_equals_sign);
        if (preceding_space != std::string::npos &&
            preceding_space > next_equals_sign)
          terminating_space = preceding_space;
      }
      while (isspace(line[terminating_space - 1]) && terminating_space > 0)
        terminating_space--;

      std::string value(line, next_equals_sign + 1,
                        terminating_space - (next_equals_sign + 1));
      data_.insert(std::make_pair(key, std::make_pair(value, false)));
      pos = terminating_space;
    }
  }
  return true;
}

bool ConfigLine::GetValue(const std::string &key, std::string *value) {
  KALDI_ASSERT(value != NULL);
  std::map<std::string, std::pair<std::string, bool> >::iterator it = data_.begin();
  for (; it != data_.end(); ++it) {
    if (it->first == key) {
      *value = (it->second).first;
      (it->second).second = true;
      return true;
    }
  }
  return false;
}

bool ConfigLine::GetValue(const std::string &key, BaseFloat *value) {
  KALDI_ASSERT(value != NULL);
  std::map<std::string, std::pair<std::string, bool> >::iterator it = data_.begin();
  for (; it != data_.end(); ++it) {
    if (it->first == key) {
      if (!ConvertStringToReal((it->second).first, value))
        return false;
      (it->second).second = true;
      return true;
    }
  }
  return false;
}

bool ConfigLine::GetValue(const std::string &key, int32 *value) {
  KALDI_ASSERT(value != NULL);
  std::map<std::string, std::pair<std::string, bool> >::iterator it = data_.begin();
  for (; it != data_.end(); ++it) {
    if (it->first == key) {
      if (!ConvertStringToInteger((it->second).first, value))
        return false;
      (it->second).second = true;
      return true;
    }
  }
  return false;
}

bool ConfigLine::GetValue(const std::string &key, std::vector<int32> *value) {
  KALDI_ASSERT(value != NULL);
  value->clear();
  std::map<std::string, std::pair<std::string, bool> >::iterator it = data_.begin();
  for (; it != data_.end(); ++it) {
    if (it->first == key) {
      if (!SplitStringToIntegers((it->second).first, ":,", true, value)) {
        // KALDI_WARN << "Bad option " << (it->second).first;
        return false;
      }
      (it->second).second = true;
      return true;
    }
  }
  return false;
}

bool ConfigLine::GetValue(const std::string &key, bool *value) {
  KALDI_ASSERT(value != NULL);
  std::map<std::string, std::pair<std::string, bool> >::iterator it = data_.begin();
  for (; it != data_.end(); ++it) {
    if (it->first == key) {
      if ((it->second).first.size() == 0) return false;
      switch (((it->second).first)[0]) {
        case 'F':
        case 'f':
          *value = false;
          break;
        case 'T':
        case 't':
          *value = true;
          break;
        default:
          return false;
      }
      (it->second).second = true;
      return true;
    }
  }
  return false;
}

bool ConfigLine::HasUnusedValues() const {
  std::map<std::string, std::pair<std::string, bool> >::const_iterator it = data_.begin();
  for (; it != data_.end(); ++it) {
    if (!(it->second).second) return true;
  }
  return false;
}

std::string ConfigLine::UnusedValues() const {
  std::string unused_str;
  std::map<std::string, std::pair<std::string, bool> >::const_iterator it = data_.begin();
  for (; it != data_.end(); ++it) {
    if (!(it->second).second) {
      if (unused_str == "")
        unused_str = it->first + "=" + (it->second).first;
      else
        unused_str += " " + it->first + "=" + (it->second).first;
    }
  }
  return unused_str;
}

// This is like ExpectToken but for two tokens, and it
// will either accept token1 and then token2, or just token2.
// This is useful in Read functions where the first token
// may already have been consumed.
void ExpectOneOrTwoTokens(std::istream &is, bool binary,
                          const std::string &token1,
                          const std::string &token2) {
  KALDI_ASSERT(token1 != token2);
  std::string temp;
  ReadToken(is, binary, &temp);
  if (temp == token1) {
    ExpectToken(is, binary, token2);
  } else {
    if (temp != token2) {
      KALDI_ERR << "Expecting token " << token1 << " or " << token2
                << " but got " << temp;
    }
  }
}


bool IsValidName(const std::string &name) {
  if (name.size() == 0) return false;
  for (size_t i = 0; i < name.size(); i++) {
    if (i == 0 && !isalpha(name[i]) && name[i] != '_')
      return false;
    if (!isalnum(name[i]) && name[i] != '_' && name[i] != '-' && name[i] != '.')
      return false;
  }
  return true;
}

void ReadConfigLines(std::istream &is,
                    std::vector<std::string> *lines) {
  KALDI_ASSERT(lines != NULL);
  std::string line;
  while (std::getline(is, line)) {
    if (line.size() == 0) continue;
    size_t start = line.find_first_not_of(" \t");
    size_t end = line.find_first_of('#');
    if (start == std::string::npos || start == end) continue;
    end = line.find_last_not_of(" \t", end - 1);
    KALDI_ASSERT(end >= start);
    lines->push_back(line.substr(start, end - start + 1));
  }
}

void ParseConfigLines(const std::vector<std::string> &lines,
                      std::vector<ConfigLine> *config_lines) {
  config_lines->resize(lines.size());
  for (size_t i = 0; i < lines.size(); i++) {
    bool ret = (*config_lines)[i].ParseLine(lines[i]);
    if (!ret) {
      KALDI_ERR << "Error parsing config line: " << lines[i];
    }
  }
}


}  // end namespace kaldi


================================================
FILE: runtime/engine/kaldi/util/text-utils.h
================================================
// util/text-utils.h

// Copyright 2009-2011  Saarland University;  Microsoft Corporation

// See ../../COPYING for clarification regarding multiple authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//  http://www.apache.org/licenses/LICENSE-2.0
//
// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
// MERCHANTABLITY OR NON-INFRINGEMENT.
// See the Apache 2 License for the specific language governing permissions and
// limitations under the License.

#ifndef KALDI_UTIL_TEXT_UTILS_H_
#define KALDI_UTIL_TEXT_UTILS_H_

#include <errno.h>
#include <string>
#include <algorithm>
#include <map>
#include <set>
#include <vector>
#include <limits>
#include "base/kaldi-common.h"


namespace kaldi {

/// Split a string using any of the single character delimiters.
/// If omit_empty_strings == true, the output will contain any
/// nonempty strings after splitting on any of the
/// characters in the delimiter.  If omit_empty_strings == false,
/// the output will contain n+1 strings if there are n characters
/// in the set "delim" within the input string.  In this case
/// the empty string is split to a single empty string.
void SplitStringToVector(const std::string &full, const char *delim,
                         bool omit_empty_strings,
                         std::vector<std::string> *out);

/// Joins the elements of a vector of strings into a single string using
/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
/// in the vector are skipped. A vector of empty strings results in an empty
/// string on the output.
void JoinVectorToString(const std::vector<std::string> &vec_in,
                        const char *delim, bool omit_empty_strings,
                        std::string *str_out);

/**
  \brief Split a string (e.g. 1:2:3) into a vector of integers.

  \param [in]  delim  String containing a list of characters, any of which
                      is allowed as a delimiter.
  \param [in] omit_empty_strings If true, empty strings between delimiters are
                      allowed and will not produce an output integer; if false,
                      instances of characters in 'delim' that are consecutive or
                      at the start or end of the string would be an error.
                      You'll normally want this to be true if 'delim' consists
                      of spaces, and false otherwise.
  \param [out] out   The output list of integers.
*/
template<class I>
bool SplitStringToIntegers(const std::string &full,
                           const char *delim,
                           bool omit_empty_strings,  // typically false [but
                                                     // should probably be true
                                                     // if "delim" is spaces].
                           std::vector<I> *out) {
  KALDI_ASSERT(out != NULL);
  KALDI_ASSERT_IS_INTEGER_TYPE(I);
  if (*(full.c_str()) == '\0') {
    out->clear();
    return true;
  }
  std::vector<std::string> split;
  SplitStringToVector(full, delim, omit_empty_strings, &split);
  out->resize(split.size());
  for (size_t i = 0; i < split.size(); i++) {
    const char *this_str = split[i].c_str();
    char *end = NULL;
    int64 j = 0;
    j = KALDI_STRTOLL(this_str, &end);
    if (end == this_str || *end != '\0') {
      out->clear();
      return false;
    } else {
      I jI = static_cast<I>(j);
      if (static_cast<int64>(jI) != j) {
        // output type cannot fit this integer.
        out->clear();
        return false;
      }
      (*out)[i] = jI;
    }
  }
  return true;
}

// This is defined for F = float and double.
template<class F>
bool SplitStringToFloats(const std::string &full,
                         const char *delim,
                         bool omit_empty_strings,  // typically false
                         std::vector<F> *out);


/// Converts a string into an integer via strtoll and returns false if there was
/// any kind of problem (i.e. the string was not an integer or contained extra
/// non-whitespace junk, or the integer was too large to fit into the type it is
/// being converted into).  Only sets *out if everything was OK and it returns
/// true.
template<class Int>
bool ConvertStringToInteger(const std::string &str,
                            Int *out) {
  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
  const char *this_str = str.c_str();
  char *end = NULL;
  errno = 0;
  int64 i = KALDI_STRTOLL(this_str, &end);
  if (end != this_str)
    while (isspace(*end)) end++;
  if (end == this_str || *end != '\0' || errno != 0)
    return false;
  Int iInt = static_cast<Int>(i);
  if (static_cast<int64>(iInt) != i ||
      (i < 0 && !std::numeric_limits<Int>::is_signed)) {
    return false;
  }
  *out = iInt;
  return true;
}


/// ConvertStringToReal converts a string into either float or double
/// and returns false if there was any kind of problem (i.e. the string
/// was not a floating point number or contained extra non-whitespace junk).
/// Be careful- this function will successfully read inf's or nan's.
template <typename T>
bool ConvertStringToReal(const std::string &str,
                         T *out);

/// Removes the beginning and trailing whitespaces from a string
void Trim(std::string *str);


/// Removes leading and trailing white space from the string, then splits on the
/// first section of whitespace found (if present), putting the part before the
/// whitespace in "first" and the rest in "rest".  If there is no such space,
/// everything that remains after removing leading and trailing whitespace goes
/// in "first".
void SplitStringOnFirstSpace(const std::string &line,
                             std::string *first,
                             std::string *rest);


/// Returns true if "token" is nonempty, and all characters are
/// printable and whitespace-free.
bool IsToken(const std::string &token);


/// Returns true if "line" is free of \n characters and unprintable
/// characters, and does not contain leading or trailing whitespace.
bool IsLine(const std::string &line);


/**
   This function returns true when two text strings are approximately equal, and
   false when they are not.  The definition of 'equal' is normal string
   equality, except that two substrings like "0.31134" and "0.311341" would be
   considered equal.  'decimal_places_tolerance' controls how many digits after
   the '.' have to match up.
   E.g. StringsApproxEqual("hello 0.23 there", "hello 0.24 there", 2) would
   return false because there is a difference in the 2nd decimal, but with
   an argument of 1 it would return true.
 */
bool StringsApproxEqual(const std::string &a,
                        const std::string &b,
                        int32 decimal_places_check = 2);

/**
   This class is responsible for parsing input like
    hi-there xx=yyy a=b c empty= f-oo=Append(bar, sss) ba_z=123 bing='a b c' baz="a b c d='a b' e"
   and giving you access to the fields, in this case

   FirstToken() == "hi-there", and key->value pairs:

   xx->yyy, a->"b c", empty->"", f-oo->"Append(bar, sss)", ba_z->"123",
   bing->"a b c", baz->"a b c d='a b' e"

   The first token is optional, if the line started with a key-value pair then
   FirstValue() will be empty.

   Note: it can parse value fields with space inside them only if they are free of the '='
   character.  If values are going to contain the '=' character, you need to quote them
   with either single or double quotes.

   Key values may contain -_a-zA-Z0-9, but must begin with a-zA-Z_.
 */
class ConfigLine {
 public:
  // Tries to parse the line as a config-file line.  Returns false
  // if it could not for some reason, e.g. parsing failure.  In most cases
  // prints no warnings; the user should do this.  Does not expect comments.
  bool ParseLine(const std::string &line);

  // the GetValue functions are overloaded for various types.  They return true
  // if the key exists with value that can be converted to that type, and false
  // otherwise.  They also mark the key-value pair as having been read.  It is
  // not an error to read values twice.
  bool GetValue(const std::string &key, std::string *value);
  bool GetValue(const std::string &key, BaseFloat *value);
  bool GetValue(const std::string &key, int32 *value);
  // Values may be separated by ":" or by ",".
  bool GetValue(const std::string &key, std::vector<int32> *value);
  bool GetValue(const std::string &key, bool *value);

  bool HasUnusedValues() const;
  /// returns e.g. foo=bar xxx=yyy if foo and xxx were not consumed by one
  /// of the GetValue() functions.
  std::string UnusedValues() const;

  const std::string &FirstToken() const { return first_token_; }

  const std::string WholeLine() { return whole_line_; }
  // use default assignment operator and copy constructor.
 private:
  std::string whole_line_;
  // the first token of the line, e.g. if line is
  // foo-bar baz=bing
  // then first_token_ would be "foo-bar".
  std::string first_token_;

  // data_ maps from key to (value, is-this-value-consumed?).
  std::map<std::string, std::pair<std::string, bool> > data_;

};

/// This function is like ExpectToken but for two tokens, and it will either
/// accept token1 and then token2, or just token2.  This is useful in Read
/// functions where the first token may already have been consumed.
void ExpectOneOrTwoTokens(std::istream &is, bool binary,
                          const std::string &token1,
                          const std::string &token2);


/**
   This function reads in a config file and *appends* its contents to a vector of
   lines; it is responsible for removing comments (anything after '#') and
   stripping out any lines that contain only whitespace after comment removal.
 */
void ReadConfigLines(std::istream &is,
                     std::vector<std::string> *lines);


/**
   This function converts config-lines from a simple sequence of strings
   as output by ReadConfigLines(), into a sequence of first-tokens and
   name-value pairs.  The general format is:
      "command-type bar=baz xx=yyy"
   etc., although there are subtleties as to what exactly is allowed, see
   documentation for class ConfigLine for details.
   This function will die if there was a parsing failure.
 */
void ParseConfigLines(const std::vector<std::string> &lines,
                      std::vector<ConfigLine> *config_lines);


/// Returns true if 'name' would be a valid name for a component or node in a
/// nnet3Nnet.  This is a nonempty string beginning with A-Za-z_, and containing only
/// '-', '_', '.', A-Z, a-z, or 0-9.
bool IsValidName(const std::string &name);

}  // namespace kaldi

#endif  // KALDI_UTIL_TEXT_UTILS_H_


================================================
FILE: runtime/engine/vad/CMakeLists.txt
================================================
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

add_subdirectory(nnet)

add_subdirectory(interface)

================================================
FILE: runtime/engine/vad/frontend/wav.h
================================================
// Copyright (c) 2016 Personal (Binbin Zhang)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <iostream>
#include <string>

namespace wav {

struct WavHeader {
    char riff[4];  // "riff"
    unsigned int size;
    char wav[4];  // "WAVE"
    char fmt[4];  // "fmt "
    unsigned int fmt_size;
    uint16_t format;
    uint16_t channels;
    unsigned int sample_rate;
    unsigned int bytes_per_second;
    uint16_t block_size;
    uint16_t bit;
    char data[4];  // "data"
    unsigned int data_size;
};

class WavReader {
  public:
    WavReader() : data_(nullptr) {}
    explicit WavReader(const std::string& filename) { Open(filename); }

    bool Open(const std::string& filename) {
        FILE* fp = fopen(filename.c_str(), "rb");
        if (NULL == fp) {
            std::cout << "Error in read " << filename;
            return false;
        }

        WavHeader header;
        fread(&header, 1, sizeof(header), fp);
        if (header.fmt_size < 16) {
            fprintf(stderr,
                    "WaveData: expect PCM format data "
                    "to have fmt chunk of at least size 16.\n");
            return false;
        } else if (header.fmt_size > 16) {
            int offset = 44 - 8 + header.fmt_size - 16;
            fseek(fp, offset, SEEK_SET);
            fread(header.data, 8, sizeof(char), fp);
        }
        // check "riff" "WAVE" "fmt " "data"

        // Skip any sub-chunks between "fmt" and "data".  Usually there will
        // be a single "fact" sub chunk, but on Windows there can also be a
        // "list" sub chunk.
        while (0 != strncmp(header.data, "data", 4)) {
            // We will just ignore the data in these chunks.
            fseek(fp, header.data_size, SEEK_CUR);
            // read next sub chunk
            fread(header.data, 8, sizeof(char), fp);
        }

        num_channel_ = header.channels;
        sample_rate_ = header.sample_rate;
        bits_per_sample_ = header.bit;
        int num_data = header.data_size / (bits_per_sample_ / 8);
        data_ = new float[num_data];  // Create 1-dim array
        num_samples_ = num_data / num_channel_;

        for (int i = 0; i < num_data; ++i) {
            switch (bits_per_sample_) {
                case 8: {
                    char sample;
                    fread(&sample, 1, sizeof(char), fp);
                    data_[i] = static_cast<float>(sample);
                    break;
                }
                case 16: {
                    int16_t sample;
                    fread(&sample, 1, sizeof(int16_t), fp);
                    // std::cout << sample;
                    data_[i] = static_cast<float>(sample);
                    // std::cout << data_[i];
                    break;
                }
                case 32: {
                    int sample;
                    fread(&sample, 1, sizeof(int), fp);
                    data_[i] = static_cast<float>(sample);
                    break;
                }
                default:
                    fprintf(stderr, "unsupported quantization bits");
                    exit(1);
            }
        }
        fclose(fp);
        return true;
    }

    int num_channel() const { return num_channel_; }
    int sample_rate() const { return sample_rate_; }
    int bits_per_sample() const { return bits_per_sample_; }
    int num_samples() const { return num_samples_; }
    const float* data() const { return data_; }

  private:
    int num_channel_;
    int sample_rate_;
    int bits_per_sample_;
    int num_samples_;  // sample points per channel
    float* data_;
};

class WavWriter {
  public:
    WavWriter(const float* data,
              int num_samples,
              int num_channel,
              int sample_rate,
              int bits_per_sample)
        : data_(data),
          num_samples_(num_samples),
          num_channel_(num_channel),
          sample_rate_(sample_rate),
          bits_per_sample_(bits_per_sample) {}

    void Write(const std::string& filename) {
        FILE* fp = fopen(filename.c_str(), "w");
        // init char 'riff' 'WAVE' 'fmt ' 'data'
        WavHeader header;
        char wav_header[44] = {
            0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, 0x41, 0x56,
            0x45, 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00,
            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
            0x00, 0x00, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00};
        memcpy(&header, wav_header, sizeof(header));
        header.channels = num_channel_;
        header.bit = bits_per_sample_;
        header.sample_rate = sample_rate_;
        header.data_size = num_samples_ * num_channel_ * (bits_per_sample_ / 8);
        header.size = sizeof(header) - 8 + header.data_size;
        header.bytes_per_second =
            sample_rate_ * num_channel_ * (bits_per_sample_ / 8);
        header.block_size = num_channel_ * (bits_per_sample_ / 8);

        fwrite(&header, 1, sizeof(header), fp);

        for (int i = 0; i < num_samples_; ++i) {
            for (int j = 0; j < num_channel_; ++j) {
                switch (bits_per_sample_) {
                    case 8: {
                        char sample =
                            static_cast<char>(data_[i * num_channel_ + j]);
                        fwrite(&sample, 1, sizeof(sample), fp);
                        break;
                    }
                    case 16: {
                        int16_t sample =
                            static_cast<int16_t>(data_[i * num_channel_ + j]);
                        fwrite(&sample, 1, sizeof(sample), fp);
                        break;
                    }
                    case 32: {
                        int sample =
                            static_cast<int>(data_[i * num_channel_ + j]);
                        fwrite(&sample, 1, sizeof(sample), fp);
                        break;
                    }
                }
            }
        }
        fclose(fp);
    }

  private:
    const float* data_;
    int num_samples_;  // total float points in data_
    int num_channel_;
    int sample_rate_;
    int bits_per_sample_;
};

}  // namespace wav


================================================
FILE: runtime/engine/vad/interface/CMakeLists.txt
================================================
set(srcs 
    vad_interface.cc 
)

add_library(pps_vad_interface SHARED ${srcs})
target_link_libraries(pps_vad_interface PUBLIC pps_vad extern_glog)


set(bin_name vad_interface_main)
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
target_link_libraries(${bin_name} pps_vad_interface)
# set_target_properties(${bin_name} PROPERTIES PUBLIC_HEADER "vad_interface.h;../frontend/wav.h")

file(RELATIVE_PATH DEST_DIR ${ENGINE_ROOT} ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS pps_vad_interface DESTINATION lib)
install(FILES vad_interface.h DESTINATION include/${DEST_DIR})

install(TARGETS vad_interface_main 
        RUNTIME DESTINATION bin
        LIBRARY DESTINATION lib
        ARCHIVE DESTINATION lib
        PUBLIC_HEADER DESTINATION include/${DEST_DIR}
)
install(FILES vad_interface_main.cc DESTINATION demo/${DEST_DIR})

================================================
FILE: runtime/engine/vad/interface/vad_interface.cc
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "vad/interface/vad_interface.h"

#include "common/base/config.h"
#include "vad/nnet/vad.h"


PPSHandle_t PPSVadCreateInstance(const char* conf_path) {
    Config conf(conf_path);
    ppspeech::VadNnetConf nnet_conf;
    nnet_conf.sr = conf.Read("sr", 16000);
    nnet_conf.frame_ms = conf.Read("frame_ms", 32);
    nnet_conf.threshold = conf.Read("threshold", 0.45f);
    nnet_conf.beam = conf.Read("beam", 0.15f);
    nnet_conf.min_silence_duration_ms =
        conf.Read("min_silence_duration_ms", 200);
    nnet_conf.speech_pad_left_ms = conf.Read("speech_pad_left_ms", 0);
    nnet_conf.speech_pad_right_ms = conf.Read("speech_pad_right_ms", 0);

    nnet_conf.model_file_path = conf.Read("model_path", std::string(""));
    nnet_conf.param_file_path = conf.Read("param_path", std::string(""));
    nnet_conf.num_cpu_thread = conf.Read("num_cpu_thread", 1);

    ppspeech::Vad* model = new ppspeech::Vad(nnet_conf.model_file_path);

    // custom config, but must be set before init
    model->SetConfig(nnet_conf);
    model->Init();

    return static_cast<PPSHandle_t>(model);
}


int PPSVadDestroyInstance(PPSHandle_t instance) {
    ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
    if (model != nullptr) {
        delete model;
        model = nullptr;
    }
    return 0;
}

int PPSVadChunkSizeSamples(PPSHandle_t instance) {
    ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
    if (model == nullptr) {
        printf("instance is null\n");
        return -1;
    }

    return model->WindowSizeSamples();
}

PPSVadState_t PPSVadFeedForward(PPSHandle_t instance,
                                float* chunk,
                                int num_element) {
    ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
    if (model == nullptr) {
        printf("instance is null\n");
        return PPS_VAD_ILLEGAL;
    }

    std::vector<float> chunk_in(chunk, chunk + num_element);
    if (!model->ForwardChunk(chunk_in)) {
        printf("forward chunk failed\n");
        return PPS_VAD_ILLEGAL;
    }
    ppspeech::Vad::State s = model->Postprocess();
    PPSVadState_t ret = (PPSVadState_t)s;
    return ret;
}

int PPSVadReset(PPSHandle_t instance) {
    ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
    if (model == nullptr) {
        printf("instance is null\n");
        return -1;
    }
    model->Reset();
    return 0;
}

int PPSVadGetResult(PPSHandle_t instance, char* result, int max_len){
    ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
    if (model == nullptr) {
        printf("instance is null\n");
        return -1;
    }
    return model->GetResult(result, max_len);
};

================================================
FILE: runtime/engine/vad/interface/vad_interface.h
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#ifdef __cplusplus
extern "C" {
#endif

typedef void* PPSHandle_t;

typedef enum {
    PPS_VAD_ILLEGAL = 0,  // error
    PPS_VAD_SIL,          // silence
    PPS_VAD_START,        // start speech
    PPS_VAD_SPEECH,       // in speech
    PPS_VAD_END,          // end speech
    PPS_VAD_NUMSTATES,    // number of states
} PPSVadState_t;

PPSHandle_t PPSVadCreateInstance(const char* conf_path);

int PPSVadDestroyInstance(PPSHandle_t instance);

int PPSVadReset(PPSHandle_t instance);

int PPSVadChunkSizeSamples(PPSHandle_t instance);

PPSVadState_t PPSVadFeedForward(PPSHandle_t instance,
                                float* chunk,
                                int num_element);

int PPSVadGetResult(PPSHandle_t instance, char* result, int max_len);
#ifdef __cplusplus
}
#endif  // __cplusplus

================================================
FILE: runtime/engine/vad/interface/vad_interface_main.cc
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include <iostream>
#include <vector>

#include <fstream>
#include "common/base/common.h"
#include "vad/frontend/wav.h"
#include "vad/interface/vad_interface.h"

int main(int argc, char* argv[]) {
    if (argc < 3) {
        std::cout << "Usage: vad_interface_main path/to/config wav.scp"
                     "run_option, "
                     "e.g ./vad_interface_main config wav.scp"
                  << std::endl;
        return -1;
    }

    std::string config_path = argv[1];
    std::string wav_scp = argv[2];

    PPSHandle_t handle = PPSVadCreateInstance(config_path.c_str());

    std::ifstream fp_wav(wav_scp);
    std::string line = "";
    while(getline(fp_wav, line)){
        std::vector<float> inputWav;  // [0, 1]
        wav::WavReader wav_reader = wav::WavReader(line);
        auto sr = wav_reader.sample_rate();
        CHECK(sr == 16000) << " sr is " << sr << " expect 16000";

        auto num_samples = wav_reader.num_samples();
        inputWav.resize(num_samples);
        for (int i = 0; i < num_samples; i++) {
            inputWav[i] = wav_reader.data()[i] / 32768;
        }

        ppspeech::Timer timer;
        int window_size_samples = PPSVadChunkSizeSamples(handle);
        for (int64_t j = 0; j < num_samples; j += window_size_samples) {
            auto start = j;
            auto end = start + window_size_samples >= num_samples
                        ? num_samples
                        : start + window_size_samples;
            std::vector<float> r(window_size_samples, 0);
            auto current_chunk_size = end - start;
            memcpy(r.data(), inputWav.data() + start, current_chunk_size * sizeof(float));

            PPSVadState_t s = PPSVadFeedForward(handle, r.data(), r.size());
        }

        std::cout << "RTF=" << timer.Elapsed() / double(num_samples / sr)
                << std::endl;

        char result[10240] = {0};
        PPSVadGetResult(handle, result, 10240);
        std::cout << line << " " << result << std::endl;
        
        PPSVadReset(handle);
        // getchar();
    }
    PPSVadDestroyInstance(handle);
    return 0;
}


================================================
FILE: runtime/engine/vad/nnet/CMakeLists.txt
================================================
set(srcs 
    vad.cc 
)

add_library(pps_vad ${srcs})
target_link_libraries(pps_vad PUBLIC ${FASTDEPLOY_LIBS} common extern_glog)


set(bin_name vad_nnet_main)
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
target_link_libraries(${bin_name} pps_vad)

file(RELATIVE_PATH DEST_DIR ${ENGINE_ROOT} ${CMAKE_CURRENT_SOURCE_DIR})
install(TARGETS pps_vad DESTINATION lib)
if(ANDROID)
    install(TARGETS extern_glog DESTINATION lib)
else() # UNIX
    install(TARGETS glog DESTINATION lib)
endif()


================================================
FILE: runtime/engine/vad/nnet/vad.cc
================================================
// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "vad/nnet/vad.h"

#include <cstring>
#include <iomanip>

#include "common/base/common.h"


namespace ppspeech {

Vad::Vad(const std::string& model_file,
         const fastdeploy::RuntimeOption&
             custom_option /* = fastdeploy::RuntimeOption() */) {
    valid_cpu_backends = {fastdeploy::Backend::ORT,
                          fastdeploy::Backend::OPENVINO};
    valid_gpu_backends = {fastdeploy::Backend::ORT, fastdeploy::Backend::TRT};

    runtime_option = custom_option;
    // ORT backend
    runtime_option.UseCpu();
    runtime_option.UseOrtBackend();
    runtime_option.model_format = fastdeploy::ModelFormat::ONNX;
    // grap opt level
    runtime_option.ort_option.graph_optimization_level = 99;
    // one-thread
    runtime_option.ort_option.intra_op_num_threads = 1;
    runtime_option.ort_option.inter_op_num_threads = 1;
    // model path
    runtime_option.model_file = model_file;
}

void Vad::Init() {
    std::lock_guard<std::mutex> lock(init_lock_);
    Initialize();
}

std::string Vad::ModelName() const { return "VAD"; }

void Vad::SetConfig(const VadNnetConf conf) {
    SetConfig(conf.sr,
              conf.frame_ms,
              conf.threshold,
              conf.beam,
              conf.min_silence_duration_ms,
              conf.speech_pad_left_ms,
              conf.speech_pad_right_ms);
}

void Vad::SetConfig(const int& sr,
                    const int& frame_ms,
                    const float& threshold,
                    const float& beam,
                    const int& min_silence_duration_ms,
                    const int& speech_pad_left_ms,
                    const int& speech_pad_right_ms) {
    if (initialized_) {
        fastdeploy::FDERROR << "SetConfig must be called before init"
                            << std::endl;
        throw std::runtime_error("SetConfig must be called before init");
    }
    sample_rate_ = sr;
    sr_per_ms_ = sr / 1000;
    threshold_ = threshold;
    beam_ = beam;
    frame_ms_ = frame_ms;
    min_silence_samples_ = min_silence_duration_ms * sr_per_ms_;
    speech_pad_left_samples_ = speech_pad_left_ms * sr_per_ms_;
    speech_pad_right_samples_ = speech_pad_right_ms * sr_per_ms_;

    // init chunk size
    window_size_samples_ = frame_ms * sr_per_ms_;
    current_chunk_size_ = window_size_samples_;

    fastdeploy::FDINFO << "sr=" << sr_per_ms_ << " threshold=" << threshold_
                       << " beam=" << beam_ << " frame_ms=" << frame_ms_
                       << " min_silence_duration_ms=" << min_silence_duration_ms
                       << " speech_pad_left_ms=" << speech_pad_left_ms
                       << " speech_pad_right_ms=" << speech_pad_right_ms;
}

void Vad::Reset() {
    std::memset(h_.data(), 0.0f, h_.size() * sizeof(float));
    std::memset(c_.data(), 0.0f, c_.size() * sizeof(float));

    triggerd_ = false;
    temp_end_ = 0;
    current_sample_ = 0;

    speechStart_.clear();
    speechEnd_.clear();

    states_.clear();
}

bool Vad::Initialize() {
    // input & output holder
    inputTensors_.resize(4);
    outputTensors_.resize(3);

    // input shape
    input_node_dims_.emplace_back(1);
    input_node_dims_.emplace_back(window_size_samples_);
    // sr buffer
    sr_.resize(1);
    sr_[0] = sample_rate_;
    // hidden state buffer
    h_.resize(size_hc_);
    c_.resize(size_hc_);

    Reset();


    // InitRuntime
    if (!InitRuntime()) {
        fastdeploy::FDERROR << "Failed to initialize fastdeploy backend."
                            << std::endl;
        return false;
    }

    initialized_ = true;


    fastdeploy::FDINFO << "init done.";
    return true;
}

bool Vad::ForwardChunk(std::vector<float>& chunk) {
    // last chunk may not be window_size_samples_
    input_node_dims_.back() = chunk.size();
    assert(window_size_samples_ >= chunk.size());
    current_chunk_size_ = chunk.size();

    inputTensors_[0].name = "input";
    inputTensors_[0].SetExternalData(
        input_node_dims_, fastdeploy::FDDataType::FP32, chunk.data());
    inputTensors_[1].name = "sr";
    inputTensors_[1].SetExternalData(
        sr_node_dims_, fastdeploy::FDDataType::INT64, sr_.data());
    inputTensors_[2].name = "h";
    inputTensors_[2].SetExternalData(
        hc_node_dims_, fastdeploy::FDDataType::FP32, h_.data());
    inputTensors_[3].name = "c";
    inputTensors_[3].SetExternalData(
        hc_node_dims_, fastdeploy::FDDataType::FP32, c_.data());

    if (!Infer(inputTensors_, &outputTensors_)) {
        return false;
    }

    // Push forward sample index
    current_sample_ += current_chunk_size_;
    return true;
}

const Vad::State& Vad::Postprocess() {
    // update prob, h, c
    outputProb_ = *(float*)outputTensors_[0].Data();
    auto* hn = static_cast<float*>(outputTensors_[1].MutableData());
    std::memcpy(h_.data(), hn, h_.size() * sizeof(float));
    auto* cn = static_cast<float*>(outputTensors_[2].MutableData());
    std::memcpy(c_.data(), cn, c_.size() * sizeof(float));

    if (outputProb_ < threshold_ && !triggerd_) {
        // 1. Silence
#ifdef PPS_DEBUG
        DLOG(INFO) << "{ silence: " << 1.0 * current_sample_ / sample_rate_
                   << " s; prob: " << outputProb_ << " }";
#endif
        states_.emplace_back(Vad::State::SIL);
    } else if (outputProb_ >= threshold_ && !triggerd_) {
        // 2. Start
        triggerd_ = true;
        speech_start_ =
            current_sample_ - current_chunk_size_ - speech_pad_left_samples_;
        speech_start_ = std::max(int(speech_start_), 0);
        float start_sec = 1.0 * speech_start_ / sample_rate_;
        speechStart_.emplace_back(start_sec);
#ifdef PPS_DEBUG
        DLOG(INFO) << "{ speech start: " << start_sec
                   << " s; prob: " << outputProb_ << " }";
#endif
        states_.emplace_back(Vad::State::START);
    } else if (outputProb_ >= threshold_ - beam_ && triggerd_) {
        // 3. Continue

        if (temp_end_ != 0) {
            // speech prob relaxation, speech continues again
#ifdef PPS_DEBUG
            DLOG(INFO)
                << "{ speech fake end(sil < min_silence_ms) to continue: "
                << 1.0 * current_sample_ / sample_rate_
                << " s; prob: " << outputProb_ << " }";
#endif
            temp_end_ = 0;
        } else {
            // speech prob relaxation, keep tracking speech
#ifdef PPS_DEBUG
            DLOG(INFO) << "{ speech continue: "
                       << 1.0 * current_sample_ / sample_rate_
                       << " s; prob: " << outputProb_ << " }";
#endif
        }

        states_.emplace_back(Vad::State::SPEECH);
    } else if (outputProb_ < threshold_ - beam_ && triggerd_) {
        // 4. End
        if (temp_end_ == 0) {
            temp_end_ = current_sample_;
        }

        // check possible speech end
        if (current_sample_ - temp_end_ < min_silence_samples_) {
            // a. silence < min_slience_samples, continue speaking
#ifdef PPS_DEBUG
            DLOG(INFO) << "{ speech fake end(sil < min_silence_ms): "
                       << 1.0 * current_sample_ / sample_rate_
                       << " s; prob: " << outputProb_ << " }";
#endif
            states_.emplace_back(Vad::State::SIL);
        } else {
            // b. silence >= min_slience_samples, end speaking
            speech_end_ = current_sample_ + speech_pad_right_samples_;
            temp_end_ = 0;
            triggerd_ = false;
            auto end_sec = 1.0 * speech_end_ / sample_rate_;
            speechEnd_.emplace_back(end_sec);
#ifdef PPS_DEBUG
            DLOG(INFO) << "{ speech end: " << end_sec
                       << " s; prob: " << outputProb_ << " }";
#endif
            states_.emplace_back(Vad::State::END);
        }
    }

    return states_.back();
}

std::string Vad::ConvertTime(float time_s) const{
    float seconds_tmp, minutes_tmp, hours_tmp;
    float seconds;
    int minutes, hours;
 
	//	计算小时
	hours_tmp = time_s / 60 / 60;  // 1
	hours = (int)hours_tmp;
 
	// 计算分钟
	minutes_tmp = time_s / 60;
	if (minutes_tmp >= 60) {
		minutes = minutes_tmp - 60 * (double)hours;
	}
	else {
		minutes = minutes_tmp;
	}
 
	// 计算秒数
	seconds_tmp = (60 * 60 * hours) + (60 * minutes);
	seconds = time_s - seconds_tmp;
 
	// 输出格式
    std::stringstream ss;
    ss << hours << ":" << minutes << ":" << seconds;
 
	return ss.str();
}

int Vad::GetResult(char* result, int max_len,
    float removeThreshold,
    float expandHeadThreshold,
    float expandTailThreshold,
    float mergeThreshold) const {
    float audioLength = 1.0 * current_sample_ / sample_rate_;
    if (speechStart_.empty() && speechEnd_.empty()) {
        return {};
    }
    if (speechEnd_.size() != speechStart_.size()) {
        // set the audio length as the last end
        speechEnd_.emplace_back(audioLength);
    }
    
    std::string json = "[";

    for (int i = 0; i < speechStart_.size(); ++i) {
        json += "{\"s\":\"" + ConvertTime(speechStart_[i]) + "\",\"e\":\"" + ConvertTime(speechEnd_[i]) + "\"},";
    }
    json.pop_back();
    json += "]";
    
    if(result != NULL){
        snprintf(result, max_len, "%s", json.c_str());
    } else {
        DLOG(INFO) << "result is NULL";
    }
    return 0;
}

std::ostream& operator<<(std::ostream& os, const Vad::State& s) {
    switch (s) {
        case Vad::State::SIL:
            os << "[SIL]";
            break;
        case Vad::State::START:
            os << "[STA]";
            break;
        case Vad::State::SPEECH:
            os << "[SPE]";
            break;
        case Vad::State::END:
            os << "[END]";
            break;
        default:
            // illegal state
            os << "[ILL]";
            break;
    }
    return os;
}

}  // namespace ppspeech

================================================
FILE: runtime/engine/vad/nnet/vad.h
================================================
// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
#include <iostream>
#include <mutex>
#include <vector>

#include "fastdeploy/fastdeploy_model.h"
#include "fastdeploy/runtime.h"
#include "vad/frontend/wav.h"

namespace ppspeech {

struct VadNnetConf {
    // wav
    int sr;
    int frame_ms;
    float threshold;
    float beam;
    int min_silence_duration_ms;
    int speech_pad_left_ms;
    int speech_pad_right_ms;

    // model
    std::string model_file_path;
    std::string param_file_path;
    std::string dict_file_path;
    int num_cpu_thread;   // 1 thred
    std::string backend;  // ort,lite, etc.
};

class Vad : public fastdeploy::FastDeployModel {
  public:
    enum class State { ILLEGAL = 0, SIL, START, SPEECH, END };
    friend std::ostream& operator<<(std::ostream& os, const Vad::State& s);

    Vad(const std::string& model_file,
        const fastdeploy::RuntimeOption& custom_option =
            fastdeploy::RuntimeOption());

    virtual ~Vad() {}

    void Init();

    void Reset();

    void SetConfig(const int& sr,
                   const int& frame_ms,
                   const float& threshold,
                   const float& beam,
                   const int& min_silence_duration_ms,
                   const int& speech_pad_left_ms,
                   const int& speech_pad_right_ms);
    void SetConfig(const VadNnetConf conf);

    bool ForwardChunk(std::vector<float>& chunk);

    const State& Postprocess();

    int GetResult(char* result, int max_len,
        float removeThreshold = 0.0,
        float expandHeadThreshold = 0.0,
        float expandTailThreshold = 0,
        float mergeThreshold = 0.0) const;

    const std::vector<State> GetStates() const { return states_; }

    int SampleRate() const { return sample_rate_; }

    int FrameMs() const { return frame_ms_; }
    int64_t WindowSizeSamples() const { return window_size_samples_; }

    float Threshold() const { return threshold_; }

    int MinSilenceDurationMs() const {
        return min_silence_samples_ / sample_rate_;
    }
    int SpeechPadLeftMs() const {
        return speech_pad_left_samples_ / sample_rate_;
    }
    int SpeechPadRightMs() const {
        return speech_pad_right_samples_ / sample_rate_;
    }

    int MinSilenceSamples() const { return min_silence_samples_; }
    int SpeechPadLeftSamples() const { return speech_pad_left_samples_; }
    int SpeechPadRightSamples() const { return speech_pad_right_samples_; }

    std::string ModelName() const override;

  private:
    bool Initialize();
    std::string ConvertTime(float time_s) const;

  private:
    std::mutex init_lock_;
    bool initialized_{false};

    // input and output
    std::vector<fastdeploy::FDTensor> inputTensors_;
    std::vector<fastdeploy::FDTensor> outputTensors_;

    // model states
    bool triggerd_ = false;
    unsigned int speech_start_ = 0;
    unsigned int speech_end_ = 0;
    unsigned int temp_end_ = 0;
    unsigned int current_sample_ = 0;
    unsigned int current_chunk_size_ = 0;
    // MAX 4294967295 samples / 8sample per ms / 1000 / 60 = 8947 minutes
    float outputProb_;

    std::vector<float> speechStart_;
    mutable std::vector<float> speechEnd_;

    std::vector<State> states_;

    /* ========================================================================
     */
    int sample_rate_ = 16000;
    int frame_ms_ = 32;  // 32, 64, 96 for 16k
    float threshold_ = 0.5f;
    float beam_ = 0.15f;

    int64_t window_size_samples_;  // support 256 512 768 for 8k; 512 1024 1536
                                   // for 16k.
    int sr_per_ms_;                // support 8 or 16
    int min_silence_samples_;      // sr_per_ms_ * frame_ms_
    int speech_pad_left_samples_{0};   // usually 250ms
    int speech_pad_right_samples_{0};  // usually 0

    /* ========================================================================
     */
    std::vector<int64_t> sr_;
    const size_t size_hc_ = 2 * 1 * 64;  // It's FIXED.
    std::vector<float> h_;
    std::vector<float> c_;

    std::vector<int64_t> input_node_dims_;
    const std::vector<int64_t> sr_node_dims_ = {1};
    const std::vector<int64_t> hc_node_dims_ = {2, 1, 64};
};

}  // namespace ppspeech

================================================
FILE: runtime/engine/vad/nnet/vad_nnet_main.cc
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "common/base/common.h"
#include "vad/nnet/vad.h"

int main(int argc, char* argv[]) {
    if (argc < 3) {
        std::cout << "Usage: vad_nnet_main path/to/model path/to/audio "
                     "run_option, "
                     "e.g ./vad_nnet_main silero_vad.onnx sample.wav"
                  << std::endl;
        return -1;
    }

    std::string model_file = argv[1];
    std::string audio_file = argv[2];

    int sr = 16000;
    ppspeech::Vad vad(model_file);
    // custom config, but must be set before init
    vad.SetConfig(sr, 32, 0.5f, 0.15, 200, 0, 0);
    vad.Init();

    std::vector<float> inputWav;  // [0, 1]
    wav::WavReader wav_reader = wav::WavReader(audio_file);
    assert(wav_reader.sample_rate() == sr);


    auto num_samples = wav_reader.num_samples();
    inputWav.resize(num_samples);
    for (int i = 0; i < num_samples; i++) {
        inputWav[i] = wav_reader.data()[i] / 32768;
    }

    ppspeech::Timer timer;
    int window_size_samples = vad.WindowSizeSamples();
    for (int64_t j = 0; j < num_samples; j += window_size_samples) {
        auto start = j;
        auto end = start + window_size_samples >= num_samples
                       ? num_samples
                       : start + window_size_samples;
        auto current_chunk_size = end - start;

        std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end};
        assert(r.size() == static_cast<size_t>(current_chunk_size));

        if (!vad.ForwardChunk(r)) {
            std::cerr << "Failed to inference while using model:"
                      << vad.ModelName() << "." << std::endl;
            return false;
        }

        ppspeech::Vad::State s = vad.Postprocess();
        std::cout << s << " ";
    }
    std::cout << std::endl;

    std::cout << "RTF=" << timer.Elapsed() / double(num_samples / sr)
              << std::endl;
    std::cout << "\b\b " << std::endl;

    vad.Reset();

    return 0;
}


================================================
FILE: runtime/examples/.gitignore
================================================
*.ark
*.scp
paddle_asr_model/


================================================
FILE: runtime/examples/README.md
================================================
# Examples for SpeechX

> `u2pp_ol` is recommended.

* `u2pp_ol` - u2++ streaming asr test under `aishell-1` test dataset.
* `ds2_ol` - ds2 streaming test under `aishell-1` test dataset. 


## How to run  

### Create env

Using `tools/evn.sh` under `speechx` to create python env.

```
bash tools/env.sh
```

Source env before play with example.
```
. venv/bin/activate
```

### Play with example

`run.sh` is the entry point for every example.

Example to play `u2pp_ol`:

```
pushd u2pp_ol/wenetspeech
bash run.sh --stop_stage 4
```

## Display Model with [Netron](https://github.com/lutzroeder/netron)  

If you have a model, we can using this commend to show model graph.

For example:
```
pip install netron
netron exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel  --port 8022 --host 10.21.55.20
```

## For Developer  

> Reminder: Only for developer, make sure you know what's it.

* codelab - for speechx developer, using for test.


================================================
FILE: runtime/examples/android/VadJni/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: runtime/examples/android/VadJni/app/.gitignore
================================================
/build
/cache


================================================
FILE: runtime/examples/android/VadJni/app/build.gradle
================================================
plugins {
    id 'com.android.application'
}

android {
    namespace 'com.baidu.paddlespeech.vadjni'
    compileSdk 33
    ndkVersion '23.1.7779620'

    defaultConfig {
        applicationId "com.baidu.paddlespeech.vadjni"
        minSdk 21
        targetSdk 33
        versionCode 1
        versionName "1.0"

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"

        externalNativeBuild {
            cmake {
                arguments '-DANDROID_PLATFORM=android-21', '-DANDROID_STL=c++_shared', "-DANDROID_TOOLCHAIN=clang"
                abiFilters 'arm64-v8a'
                cppFlags "-std=c++11"
            }
        }
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    externalNativeBuild {
        cmake {
            path file('src/main/cpp/CMakeLists.txt')
            version '3.22.1'
        }
    }
    buildFeatures {
        viewBinding true
    }
    sourceSets {
        main {
            jniLibs.srcDirs = ['libs']
        }
    }
}

dependencies {
    // Dependency on local binaries
    implementation fileTree(dir: 'libs', include: ['*.jar'])
    // Dependency on a remote binary
    implementation 'androidx.appcompat:appcompat:1.4.1'
    implementation 'com.google.android.material:material:1.5.0'
    implementation 'androidx.constraintlayout:constraintlayout:2.1.3'
    testImplementation 'junit:junit:4.13.2'
    androidTestImplementation 'androidx.test.ext:junit:1.1.3'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.4.0'
}

def CXX_LIB = [
//        [
//                'src' : 'https://bj.bcebos.com/fastdeploy/dev/android/fastdeploy-android-with-text-0.0.0-shared.tgz',
//                'dest': 'libs',
//                'name': 'fastdeploy-android-latest-shared-dev'
//        ]
]

task downloadAndExtractLibs(type: DefaultTask) {
    doFirst {
        println "[INFO] Downloading and extracting fastdeploy android c++ lib ..."
    }
    doLast {
        String cachePath = "cache"
        if (!file("${cachePath}").exists()) {
            mkdir "${cachePath}"
        }

        CXX_LIB.eachWithIndex { lib, index ->

            String[] libPaths = lib.src.split("/")
            String sdkName = lib.name
            String libName = libPaths[libPaths.length - 1]
            libName = libName.substring(0, libName.indexOf("tgz") - 1)
            String cacheName = cachePath + "/" + "${libName}.tgz"

            String libDir = lib.dest + "/" + libName
            String sdkDir = lib.dest + "/" + sdkName

            boolean copyFiles = false
            if (!file("${sdkDir}").exists()) {
                // Download lib and rename to sdk name later.
                if (!file("${cacheName}").exists()) {
                    println "[INFO] Downloading ${lib.src} -> ${cacheName}"
                    ant.get(src: lib.src, dest: file("${cacheName}"))
                }
                copyFiles = true
            }

            if (copyFiles) {
                println "[INFO] Taring ${cacheName} -> ${libDir}"
                copy { from(tarTree("${cacheName}")) into("${lib.dest}") }
                if (!libName.equals(sdkName)) {
                    if (file("${sdkDir}").exists()) {
                        delete("${sdkDir}")
                        println "[INFO] Remove old ${sdkDir}"
                    }
                    mkdir "${sdkDir}"
                    println "[INFO] Coping ${libDir} -> ${sdkDir}"
                    copy { from("${libDir}") into("${sdkDir}") }
                    delete("${libDir}")
                    println "[INFO] Removed ${libDir}"
                    println "[INFO] Update ${sdkDir} done!"
                }
            } else {
                println "[INFO] ${sdkDir} already exists!"
                println "[WARN] Please delete ${cacheName} and ${sdkDir} " +
                        "if you want to UPDATE ${sdkName} c++ lib. Then, rebuild this sdk."
            }
        }
    }
}

preBuild.dependsOn downloadAndExtractLibs

================================================
FILE: runtime/examples/android/VadJni/app/libs/.gitkeep
================================================


================================================
FILE: runtime/examples/android/VadJni/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: runtime/examples/android/VadJni/app/src/androidTest/java/com/baidu/paddlespeech/vadjni/ExampleInstrumentedTest.java
================================================
package com.baidu.paddlespeech.vadjni;

import android.content.Context;

import androidx.test.platform.app.InstrumentationRegistry;
import androidx.test.ext.junit.runners.AndroidJUnit4;

import org.junit.Test;
import org.junit.runner.RunWith;

import static org.junit.Assert.*;

/**
 * Instrumented test, which will execute on an Android device.
 *
 * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
 */
@RunWith(AndroidJUnit4.class)
public class ExampleInstrumentedTest {
    @Test
    public void useAppContext() {
        // Context of the app under test.
        Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();
        assertEquals("com.baidu.paddlespeech.vadjni", appContext.getPackageName());
    }
}

================================================
FILE: runtime/examples/android/VadJni/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools">

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:supportsRtl="true"
        android:theme="@style/Theme.VadJni"
        tools:targetApi="31">
        <activity
            android:name=".MainActivity"
            android:exported="true">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

</manifest>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/assets/.gitkeep
================================================


================================================
FILE: runtime/examples/android/VadJni/app/src/main/cpp/CMakeLists.txt
================================================
# For more information about using CMake with Android Studio, read the
# documentation: https://d.android.com/studio/projects/add-native-code.html

# Sets the minimum version of CMake required to build the native library.

cmake_minimum_required(VERSION 3.22.1)

# Declares and names the project.

project("vadjni")


set(PPS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../../../libs/${ANDROID_ABI})

include_directories(${CMAKE_CURRENT_SOURCE_DIR})

# Creates and names a library, sets it as either STATIC
# or SHARED, and provides the relative paths to its source code.
# You can define multiple libraries, and CMake builds them for you.
# Gradle automatically packages shared libraries with your APK.

add_library( # Sets the name of the library.
        vadjni

        # Sets the library as a shared library.
        SHARED

        # Provides a relative path to your source file(s).
        native-lib.cpp)

# Searches for a specified prebuilt library and stores the path as a
# variable. Because CMake includes system libraries in the search path by
# default, you only need to specify the name of the public NDK library
# you want to add. CMake verifies that the library exists before
# completing its build.

find_library( # Sets the name of the path variable.
        log-lib

        # Specifies the name of the NDK library that
        # you want CMake to locate.
        log)

# Specifies libraries CMake should link to your target library. You
# can link multiple libraries, such as libraries you define in this
# build script, prebuilt third-party libraries, or system libraries.

message(STATUS "PPS_DIR=${PPS_DIR}")
target_link_libraries( # Specifies the target library.
        vadjni
        ${PPS_DIR}/libfastdeploy.so
        ${PPS_DIR}/libonnxruntime.so
        ${PPS_DIR}/libgflags_nothreads.a
        ${PPS_DIR}/libbase.a
        ${PPS_DIR}/libpps_vad.a
        ${PPS_DIR}/libpps_vad_interface.a
        # Links the target library to the log library
        # included in the NDK.
        ${log-lib})

================================================
FILE: runtime/examples/android/VadJni/app/src/main/cpp/native-lib.cpp
================================================

#include <string>
#include "vad_interface.h"
#include <jni.h>

extern "C"
JNIEXPORT jstring JNICALL
Java_com_baidu_paddlespeech_vadjni_MainActivity_stringFromJNI(
        JNIEnv* env,
        jobject /* this */) {
    std::string hello = "Hello from C++";
    return env->NewStringUTF(hello.c_str());
}

extern "C"
JNIEXPORT jlong JNICALL
Java_com_baidu_paddlespeech_vadjni_MainActivity_createInstance(
        JNIEnv* env,
        jobject thiz,
        jstring conf_path){
    const char* path = env->GetStringUTFChars(conf_path, JNI_FALSE);
    PPSHandle_t handle = PPSVadCreateInstance(path);

    return (jlong)(handle);
    return 0;
}


extern "C"
JNIEXPORT jint JNICALL
Java_com_baidu_paddlespeech_vadjni_MainActivity_destroyInstance(JNIEnv *env, jobject thiz,
                                                                jlong instance) {
    PPSHandle_t handle = (PPSHandle_t)(instance);
    return (jint)PPSVadDestroyInstance(handle);
}
extern "C"
JNIEXPORT jint JNICALL
Java_com_baidu_paddlespeech_vadjni_MainActivity_reset(JNIEnv *env, jobject thiz, jlong instance) {
    PPSHandle_t handle = (PPSHandle_t)(instance);
    return (jint)PPSVadReset(handle);
}
extern "C"
JNIEXPORT jint JNICALL
Java_com_baidu_paddlespeech_vadjni_MainActivity_chunkSizeSamples(JNIEnv *env, jobject thiz,
                                                                 jlong instance) {
    PPSHandle_t handle = (PPSHandle_t)(instance);
    return (jint)PPSVadChunkSizeSamples(handle);
}
extern "C"
JNIEXPORT jint JNICALL
Java_com_baidu_paddlespeech_vadjni_MainActivity_feedForward(JNIEnv *env, jobject thiz,
                                                            jlong instance, jfloatArray chunk) {
    PPSHandle_t handle = (PPSHandle_t)(instance);
    jsize num_elms = env->GetArrayLength(chunk);
    jfloat* chunk_ptr = env->GetFloatArrayElements(chunk, JNI_FALSE);
    return (jint)PPSVadFeedForward(handle, (float*)chunk_ptr, (int)num_elms);
}

================================================
FILE: runtime/examples/android/VadJni/app/src/main/cpp/vad_interface.h
================================================
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#ifdef __cplusplus
extern "C" {
#endif

typedef void* PPSHandle_t;

typedef enum {
    PPS_VAD_ILLEGAL = 0,  // error
    PPS_VAD_SIL,          // silence
    PPS_VAD_START,        // start speech
    PPS_VAD_SPEECH,       // in speech
    PPS_VAD_END,          // end speech
    PPS_VAD_NUMSTATES,    // number of states
} PPSVadState_t;

PPSHandle_t PPSVadCreateInstance(const char* conf_path);

int PPSVadDestroyInstance(PPSHandle_t instance);

int PPSVadReset(PPSHandle_t instance);

int PPSVadChunkSizeSamples(PPSHandle_t instance);

PPSVadState_t PPSVadFeedForward(PPSHandle_t instance,
                                float* chunk,
                                int num_element);

#ifdef __cplusplus
}
#endif  // __cplusplus

================================================
FILE: runtime/examples/android/VadJni/app/src/main/java/com/baidu/paddlespeech/vadjni/MainActivity.java
================================================
package com.baidu.paddlespeech.vadjni;

import androidx.appcompat.app.AppCompatActivity;

import android.os.Bundle;
import android.widget.Button;
import android.widget.TextView;

import com.baidu.paddlespeech.vadjni.databinding.ActivityMainBinding;

public class MainActivity extends AppCompatActivity {

    // Used to load the 'vadjni' library on application startup.
    static {
        System.loadLibrary("vadjni");
    }

    private ActivityMainBinding binding;
    private long instance;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);

        binding = ActivityMainBinding.inflate(getLayoutInflater());
        setContentView(binding.getRoot());

        // Example of a call to a native method
        TextView tv = binding.sampleText;
        tv.setText(stringFromJNI());

        Button lw = binding.loadWav;
    }

    /**
     * A native method that is implemented by the 'vadjni' native library,
     * which is packaged with this application.
     */
    public native String stringFromJNI();

    public static native long createInstance(String config_path);

    public static native int destroyInstance(long instance);

    public static native int reset(long instance);

    public static native int chunkSizeSamples(long instance);

    public static native int feedForward(long instance, float[] chunk);
}

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <TextView
        android:id="@+id/sample_text"
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"
        android:text="Hello World!"
        app:layout_constraintBottom_toBottomOf="parent"
        app:layout_constraintEnd_toEndOf="parent"
        app:layout_constraintStart_toStartOf="parent"
        app:layout_constraintTop_toTopOf="parent"
        app:layout_constraintVertical_bias="0.483" />

    <Button
        android:id="@+id/load_wav"
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"
        android:text="Load Wav"
        tools:layout_editor_absoluteX="150dp"
        tools:layout_editor_absoluteY="429dp" />

</androidx.constraintlayout.widget.ConstraintLayout>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/mipmap-anydpi-v33/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
    <monochrome android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">VadJni</string>
</resources>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/values/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.VadJni" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_500</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/white</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_700</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/values-night/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.VadJni" parent="Theme.MaterialComponents.DayNight.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_200</item>
        <item name="colorPrimaryVariant">@color/purple_700</item>
        <item name="colorOnPrimary">@color/black</item>
        <!-- Secondary brand color. -->
        <item name="colorSecondary">@color/teal_200</item>
        <item name="colorSecondaryVariant">@color/teal_200</item>
        <item name="colorOnSecondary">@color/black</item>
        <!-- Status bar color. -->
        <item name="android:statusBarColor">?attr/colorPrimaryVariant</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: runtime/examples/android/VadJni/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: runtime/examples/android/VadJni/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id 'com.android.application' version '7.4.2' apply false
    id 'com.android.library' version '7.4.2' apply false
}

================================================
FILE: runtime/examples/android/VadJni/gradle/wrapper/gradle-wrapper.properties
================================================
#Wed Mar 08 19:23:10 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-7.5-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME


================================================
FILE: runtime/examples/android/VadJni/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# AndroidX package structure to make it clearer which packages are bundled with the
# Android operating system, and which are packaged with your app's APK
# https://developer.android.com/topic/libraries/support-library/androidx-rn
android.useAndroidX=true
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: runtime/examples/android/VadJni/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: runtime/examples/android/VadJni/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: runtime/examples/android/VadJni/settings.gradle
================================================
pluginManagement {
    repositories {
        google()
        mavenCentral()
        gradlePluginPortal()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}
rootProject.name = "VadJni"
include ':app'


================================================
FILE: runtime/examples/audio_classification/README.md
================================================
# audio classification

This directory provieds audio classification on CPU

## conf
config is the input of engine

    [CONF]
    wav_normal=true
    wav_normal_type=linear
    wav_norm_mul_factor=1.0
    model_path=./inference.onnx
    param_path=
    dict_path=./label_list
    num_cpu_thread=1
    samp_freq=32000
    frame_length_ms=32
    frame_shift_ms=10
    num_bins=64
    low_freq=50
    high_freq=14000
    dither=0.0
## label_list
model output label

    Dog
    Rooster
    Pig
    Cow
    Frog
    Cat
    Hen
    Insects (flying)
    Sheep
    Crow
    Rain
    Sea waves
    Crackling fire
    .....
## scp && test.wav
scp is the input of engine and each line in scp is wav
## execute
../../build/Linux/x86_64/engine/audio_classification/nnet/panns_nnet_main --conf_path=./conf --scp_path=./scp --topk=1
usage: panns_nnet_main conf scp topk
output such as:

    wav_normal = true
    wav_normal_type = linear
    wav_norm_mul_factor = 1.0
    model_path = ./inference.onnx
    param_path = 
    dict_path = ./label_list
    num_cpu_thread = 1
    samp_freq = 32000
    frame_length_ms = 32
    frame_shift_ms = 10
    num_bins = 64
    low_freq = 50
    high_freq = 14000
    dither = 0.0
    [INFO] fastdeploy/runtime/runtime.cc(293)::CreateOrtBackend     Runtime initialized with Backend::ORT in Device::CPU.
    --- Init FastDeploy Runitme Done! 
    --- Model:  ./inference.onnx
    test.wav{"Clock alarm":"16.5309"}
## android demo
### install
#### copy lib & interface
cd ../../
sh build_android.sh
cp build/Android/arm64-v8a-api-21/cls-android-out/*.so examples/audio_classification/android_demo/app/src/main/cpp/jniLibs/arm64-v8a/
cp build/Android/arm64-v8a-api-21/cls-android-out/panns_interface.h examples/audio_classification/android_demo/app/src/main/cpp/
includes/

#### set path
push resource into android phone

1. change resource path in conf to global path, such as:

    [CONF]
    wav_normal=true
    wav_normal_type=linear
    wav_norm_mul_factor=1.0
    model_path=/data/local/tmp/inference.onnx
    param_path=
    dict_path=/data/local/tmp/label_list
    num_cpu_thread=1
    samp_freq=32000
    frame_length_ms=32
    frame_shift_ms=10
    num_bins=64
    low_freq=50
    high_freq=14000
    dither=0.0
2. adb push conf label_list scp test.wav /data/local/tmp/
3. set resource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as:

std::string conf_path = "/data/local/tmp/conf";
std::string wav_path = "/data/local/tmp/test.wav";

4. execute android_demo in android studio


================================================
FILE: runtime/examples/audio_classification/android_demo/.gitignore
================================================
*.iml
.gradle
/local.properties
/.idea/caches
/.idea/libraries
/.idea/modules.xml
/.idea/workspace.xml
/.idea/navEditor.xml
/.idea/assetWizardSettings.xml
.DS_Store
/build
/captures
.externalNativeBuild
.cxx
local.properties


================================================
FILE: runtime/examples/audio_classification/android_demo/app/.gitignore
================================================
/build

================================================
FILE: runtime/examples/audio_classification/android_demo/app/build.gradle
================================================
plugins {
    id 'com.android.application'
    id 'org.jetbrains.kotlin.android'
}

android {
    namespace 'com.baidu.paddlespeech.cls'
    compileSdk 32

    defaultConfig {
        applicationId "com.baidu.paddlespeech.cls"
        minSdk 28
        targetSdk 32
        versionCode 1
        versionName "1.0"

        testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner"
        externalNativeBuild {
            cmake {
                cppFlags ''
            }
        }
        // 设置ndk编译的cpu架构
        ndk {
            abiFilters 'arm64-v8a'
        }
    }

    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
    compileOptions {
        sourceCompatibility JavaVersion.VERSION_1_8
        targetCompatibility JavaVersion.VERSION_1_8
    }
    kotlinOptions {
        jvmTarget = '1.8'
    }
    externalNativeBuild {
        cmake {
            path file('src/main/cpp/CMakeLists.txt')
            version '3.22.1'
        }
    }
    //我们将外部so库放在jniLibs文件夹下，因此要将它设置为jniLibs使工程在打包的时候能将它包含进去，否则app运行时会报无法找到so库的错误。
    sourceSets {
        main {
            jniLibs.srcDirs = ['src/main/cpp/jniLibs']
            resources { srcDirs = ['src/main/cpp/resources'] }
        }
    }
}

dependencies {

    //noinspection GradleCompatible
    implementation 'com.android.support:appcompat-v7:28.0.0'
    implementation 'com.android.support.constraint:constraint-layout:2.0.4'
    testImplementation 'junit:junit:4.13.2'
    androidTestImplementation 'com.android.support.test:runner:1.0.2'
    androidTestImplementation 'com.android.support.test.espresso:espresso-core:3.0.2'
}

================================================
FILE: runtime/examples/audio_classification/android_demo/app/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/androidTest/java/com/example/cls/ExampleInstrumentedTest.kt
================================================
package com.example.cls

import android.support.test.InstrumentationRegistry
import android.support.test.runner.AndroidJUnit4

import org.junit.Test
import org.junit.runner.RunWith

import org.junit.Assert.*

/**
 * Instrumented test, which will execute on an Android device.
 *
 * See [testing documentation](http://d.android.com/tools/testing).
 */
@RunWith(AndroidJUnit4::class)
class ExampleInstrumentedTest {
    @Test
    fun useAppContext() {
        // Context of the app under test.
        val appContext = InstrumentationRegistry.getInstrumentation().targetContext
        assertEquals("com.example.cls", appContext.packageName)
    }
}

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools" >

    <application
        android:allowBackup="true"
        android:dataExtractionRules="@xml/data_extraction_rules"
        android:fullBackupContent="@xml/backup_rules"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:supportsRtl="true"
        android:theme="@style/Theme.Cls"
        tools:targetApi="31" >
        <activity
            android:name=".MainActivity"
            android:exported="true" >
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>

            <meta-data
                android:name="android.app.lib_name"
                android:value="" />
        </activity>
    </application>

</manifest>

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/cpp/CMakeLists.txt
================================================

# For more information about using CMake with Android Studio, read the
# documentation: https://d.android.com/studio/projects/add-native-code.html

# Sets the minimum version of CMake required to build the native library.

cmake_minimum_required(VERSION 3.18.1)

# Declares and names the project.

project("cls")

include_directories(src/main/cpp/)
file(GLOB CPP_FILES "src/main/cpp/*.cpp")

# 添加so库存放位置
set(distribution_DIR ${CMAKE_SOURCE_DIR}/jniLibs)

# Creates and names a library, sets it as either STATIC
# or SHARED, and provides the relative paths to its source code.
# You can define multiple libraries, and CMake builds them for you.
# Gradle automatically packages shared libraries with your APK.

add_library( # Sets the name of the library.
        native-lib

        # Sets the library as a shared library.
        SHARED

        # Provides a relative path to your source file(s).
        ${CMAKE_CURRENT_SOURCE_DIR}/native-lib.cpp )

# Searches for a specified prebuilt library and stores the path as a
# variable. Because CMake includes system libraries in the search path by
# default, you only need to specify the name of the public NDK library
# you want to add. CMake verifies that the library exists before
# completing its build.

find_library( # Sets the name of the path variable.
        log-lib

        # Specifies the name of the NDK library that
        # you want CMake to locate.
        log )

# Specifies libraries CMake should link to your target library. You
# can link multiple libraries, such as libraries you define in this
# build script, prebuilt third-party libraries, or system libraries.

target_link_libraries( # Specifies the target library.
        native-lib
        ${CMAKE_SOURCE_DIR}/jniLibs/${ANDROID_ABI}/libc++_shared.so
        ${CMAKE_SOURCE_DIR}/jniLibs/${ANDROID_ABI}/libcls.so
        ${CMAKE_SOURCE_DIR}/jniLibs/${ANDROID_ABI}/libfastdeploy.so
        ${CMAKE_SOURCE_DIR}/jniLibs/${ANDROID_ABI}/libonnxruntime.so
        # Links the target library to the log library
        # included in the NDK.
        ${log-lib}
        )


================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/cpp/includes/panns_interface.h
================================================
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

namespace ppspeech {

void* ClsCreateInstance(const char* conf_path);
int ClsDestroyInstance(void* instance);
int ClsFeedForward(void* instance,
                   const char* wav_path,
                   int topk,
                   char* result,
                   int result_max_len);
int ClsReset(void* instance);
}  // namespace ppspeech

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/cpp/native-lib.cpp
================================================
// Write C++ code here.
//
// Do not forget to dynamically load the C++ library into your application.
//
// For instance,
//
// In MainActivity.java:
//    static {
//       System.loadLibrary("mysotest");
//    }
//
// Or, in MainActivity.kt:
//    companion object {
//      init {
//         System.loadLibrary("mysotest")
//      }
//    }

#include <jni.h>
#include <stdlib.h>
#include <iostream>
#include "includes/panns_interface.h"

//如果你不想用引入头文件的方法，可以把导入头文件的include语句注释掉，然后将下面这句取消注释。
//string getStringFromSoLibrary();

void* cls_instance = nullptr;

extern "C"
JNIEXPORT jboolean JNICALL Java_com_baidu_paddlespeech_cls_MainActivity_nClsCreateInstance(JNIEnv *env, jobject instance)
{
    if (cls_instance != nullptr) {
        ppspeech::ClsDestroyInstance(cls_instance);
        cls_instance = nullptr;
    }
    std::string conf_path = "/data/local/tmp/masimeng/cls/conf";
    cls_instance = ppspeech::ClsCreateInstance(conf_path.c_str());
    return true;
}

extern "C"
JNIEXPORT jboolean JNICALL Java_com_baidu_paddlespeech_cls_MainActivity_nClsDestroyInstance(JNIEnv *env, jobject instance){
    if (cls_instance != nullptr) {
        ppspeech::ClsDestroyInstance(cls_instance);
        cls_instance = nullptr;
    }
    return true;
}

extern "C"
JNIEXPORT jstring JNICALL Java_com_baidu_paddlespeech_cls_MainActivity_nClsFeedForward(JNIEnv *env, jobject instance){
    if (cls_instance != nullptr) {
        char result[1024] = {0};
        std::string wav_path = "/data/local/tmp/masimeng/cls/test.wav";
        int ret = ppspeech::ClsFeedForward(cls_instance, wav_path.c_str(), 1, result, 1024);
        return env->NewStringUTF(result);
    }
    return env->NewStringUTF(NULL);
}

extern "C"
JNIEXPORT jboolean JNICALL Java_com_baidu_paddlespeech_cls_MainActivity_nClsReset(JNIEnv *env, jobject instance){
    if (cls_instance != nullptr) {
        ppspeech::ClsReset(cls_instance);
    }
    return true;
}

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/java/com/example/cls/MainActivity.kt
================================================
package com.baidu.paddlespeech.cls

import android.support.v7.app.AppCompatActivity
import android.os.Bundle
import android.widget.TextView

class MainActivity : AppCompatActivity() {
    private lateinit var tvContent: TextView

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)
        tvContent = findViewById(R.id.tv_content)

        nClsCreateInstance()
        tvContent.text = nClsFeedForward()
        nClsReset()
        nClsDestroyInstance()
    }

    external fun nClsCreateInstance(): Boolean
    external fun nClsDestroyInstance(): Boolean
    external fun nClsFeedForward(): String
    external fun nClsReset(): Boolean

    companion object{
        init {
            System.loadLibrary("native-lib")
        }
    }
}

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/drawable/ic_launcher_background.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillColor="#3DDC84"
        android:pathData="M0,0h108v108h-108z" />
    <path
        android:fillColor="#00000000"
        android:pathData="M9,0L9,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,0L19,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,0L29,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,0L39,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,0L49,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,0L59,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,0L69,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,0L79,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M89,0L89,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M99,0L99,108"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,9L108,9"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,19L108,19"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,29L108,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,39L108,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,49L108,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,59L108,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,69L108,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,79L108,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,89L108,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M0,99L108,99"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,29L89,29"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,39L89,39"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,49L89,49"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,59L89,59"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,69L89,69"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M19,79L89,79"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M29,19L29,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M39,19L39,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M49,19L49,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M59,19L59,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M69,19L69,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
    <path
        android:fillColor="#00000000"
        android:pathData="M79,19L79,89"
        android:strokeWidth="0.8"
        android:strokeColor="#33FFFFFF" />
</vector>


================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path android:pathData="M31,63.928c0,0 6.4,-11 12.1,-13.1c7.2,-2.6 26,-1.4 26,-1.4l38.1,38.1L107,108.928l-32,-1L31,63.928z">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="85.84757"
                android:endY="92.4963"
                android:startX="42.9492"
                android:startY="49.59793"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M65.3,45.828l3.8,-6.6c0.2,-0.4 0.1,-0.9 -0.3,-1.1c-0.4,-0.2 -0.9,-0.1 -1.1,0.3l-3.9,6.7c-6.3,-2.8 -13.4,-2.8 -19.7,0l-3.9,-6.7c-0.2,-0.4 -0.7,-0.5 -1.1,-0.3C38.8,38.328 38.7,38.828 38.9,39.228l3.8,6.6C36.2,49.428 31.7,56.028 31,63.928h46C76.3,56.028 71.8,49.428 65.3,45.828zM43.4,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2c-0.3,-0.7 -0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C45.3,56.528 44.5,57.328 43.4,57.328L43.4,57.328zM64.6,57.328c-0.8,0 -1.5,-0.5 -1.8,-1.2s-0.1,-1.5 0.4,-2.1c0.5,-0.5 1.4,-0.7 2.1,-0.4c0.7,0.3 1.2,1 1.2,1.8C66.5,56.528 65.6,57.328 64.6,57.328L64.6,57.328z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/layout/activity_main.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<android.support.constraint.ConstraintLayout
    xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">
    <TextView
        android:layout_width="wrap_content"
        android:layout_height="wrap_content"
        android:text="Hello World!"
        android:id="@+id/tv_content"
        app:layout_constraintBottom_toBottomOf="parent"
        app:layout_constraintStart_toStartOf="parent"
        app:layout_constraintEnd_toEndOf="parent"
        app:layout_constraintTop_toTopOf="parent" />

  </android.support.constraint.ConstraintLayout>

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<adaptive-icon xmlns:android="http://schemas.android.com/apk/res/android">
    <background android:drawable="@drawable/ic_launcher_background" />
    <foreground android:drawable="@drawable/ic_launcher_foreground" />
</adaptive-icon>

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="purple_200">#FFBB86FC</color>
    <color name="purple_500">#FF6200EE</color>
    <color name="purple_700">#FF3700B3</color>
    <color name="teal_200">#FF03DAC5</color>
    <color name="teal_700">#FF018786</color>
    <color name="black">#FF000000</color>
    <color name="white">#FFFFFFFF</color>
</resources>

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">cls</string>
</resources>

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/values/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.Cls" parent="Theme.AppCompat.Light.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_500</item>
        <item name="colorPrimaryDark">@color/purple_700</item>
        <item name="colorAccent">@color/teal_200</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/values-night/themes.xml
================================================
<resources xmlns:tools="http://schemas.android.com/tools">
    <!-- Base application theme. -->
    <style name="Theme.Cls" parent="Theme.AppCompat.Light.DarkActionBar">
        <!-- Primary brand color. -->
        <item name="colorPrimary">@color/purple_200</item>
        <item name="colorPrimaryDark">@color/purple_700</item>
        <item name="colorAccent">@color/teal_200</item>
        <!-- Customize your theme here. -->
    </style>
</resources>

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/xml/backup_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample backup rules file; uncomment and customize as necessary.
   See https://developer.android.com/guide/topics/data/autobackup
   for details.
   Note: This file is ignored for devices older that API 31
   See https://developer.android.com/about/versions/12/backup-restore
-->
<full-backup-content>
    <!--
   <include domain="sharedpref" path="."/>
   <exclude domain="sharedpref" path="device.xml"/>
-->
</full-backup-content>

================================================
FILE: runtime/examples/audio_classification/android_demo/app/src/main/res/xml/data_extraction_rules.xml
================================================
<?xml version="1.0" encoding="utf-8"?><!--
   Sample data extraction rules file; uncomment and customize as necessary.
   See https://developer.android.com/about/versions/12/backup-restore#xml-changes
   for details.
-->
<data-extraction-rules>
    <cloud-backup>
        <!-- TODO: Use <include> and <exclude> to control what is backed up.
        <include .../>
        <exclude .../>
        -->
    </cloud-backup>
    <!--
    <device-transfer>
        <include .../>
        <exclude .../>
    </device-transfer>
    -->
</data-extraction-rules>

================================================
FILE: runtime/examples/audio_classification/android_demo/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.
plugins {
    id 'com.android.application' version '7.3.1' apply false
    id 'com.android.library' version '7.3.1' apply false
    id 'org.jetbrains.kotlin.android' version '1.7.20' apply false
}

================================================
FILE: runtime/examples/audio_classification/android_demo/gradle/wrapper/gradle-wrapper.properties
================================================
#Mon Jan 16 16:37:04 CST 2023
distributionBase=GRADLE_USER_HOME
distributionUrl=https\://services.gradle.org/distributions/gradle-7.4-bin.zip
distributionPath=wrapper/dists
zipStorePath=wrapper/dists
zipStoreBase=GRADLE_USER_HOME


================================================
FILE: runtime/examples/audio_classification/android_demo/gradle.properties
================================================
# Project-wide Gradle settings.
# IDE (e.g. Android Studio) users:
# Gradle settings configured through the IDE *will override*
# any settings specified in this file.
# For more details on how to configure your build environment visit
# http://www.gradle.org/docs/current/userguide/build_environment.html
# Specifies the JVM arguments used for the daemon process.
# The setting is particularly useful for tweaking memory settings.
org.gradle.jvmargs=-Xmx2048m -Dfile.encoding=UTF-8
# When configured, Gradle will run in incubating parallel mode.
# This option should only be used with decoupled projects. More details, visit
# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects
# org.gradle.parallel=true
# Kotlin code style for this project: "official" or "obsolete":
kotlin.code.style=official
# Enables namespacing of each library's R class so that its R class includes only the
# resources declared in the library itself and none from the library's dependencies,
# thereby reducing the size of the R class for that library
android.nonTransitiveRClass=true

================================================
FILE: runtime/examples/audio_classification/android_demo/gradlew
================================================
#!/usr/bin/env sh

#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

##############################################################################
##
##  Gradle start up script for UN*X
##
##############################################################################

# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
    ls=`ls -ld "$PRG"`
    link=`expr "$ls" : '.*-> \(.*\)$'`
    if expr "$link" : '/.*' > /dev/null; then
        PRG="$link"
    else
        PRG=`dirname "$PRG"`"/$link"
    fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null

APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`

# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn () {
    echo "$*"
}

die () {
    echo
    echo "$*"
    echo
    exit 1
}

# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
  CYGWIN* )
    cygwin=true
    ;;
  Darwin* )
    darwin=true
    ;;
  MINGW* )
    msys=true
    ;;
  NONSTOP* )
    nonstop=true
    ;;
esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
        # IBM's JDK on AIX uses strange locations for the executables
        JAVACMD="$JAVA_HOME/jre/sh/java"
    else
        JAVACMD="$JAVA_HOME/bin/java"
    fi
    if [ ! -x "$JAVACMD" ] ; then
        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
    fi
else
    JAVACMD="java"
    which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.

Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi

# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
    MAX_FD_LIMIT=`ulimit -H -n`
    if [ $? -eq 0 ] ; then
        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
            MAX_FD="$MAX_FD_LIMIT"
        fi
        ulimit -n $MAX_FD
        if [ $? -ne 0 ] ; then
            warn "Could not set maximum file descriptor limit: $MAX_FD"
        fi
    else
        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
    fi
fi

# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

    JAVACMD=`cygpath --unix "$JAVACMD"`

    # We build the pattern for arguments to be converted via cygpath
    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
    SEP=""
    for dir in $ROOTDIRSRAW ; do
        ROOTDIRS="$ROOTDIRS$SEP$dir"
        SEP="|"
    done
    OURCYGPATTERN="(^($ROOTDIRS))"
    # Add a user-defined pattern to the cygpath arguments
    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
    fi
    # Now convert the arguments - kludge to limit ourselves to /bin/sh
    i=0
    for arg in "$@" ; do
        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option

        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
        else
            eval `echo args$i`="\"$arg\""
        fi
        i=`expr $i + 1`
    done
    case $i in
        0) set -- ;;
        1) set -- "$args0" ;;
        2) set -- "$args0" "$args1" ;;
        3) set -- "$args0" "$args1" "$args2" ;;
        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
    esac
fi

# Escape application args
save () {
    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
    echo " "
}
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

exec "$JAVACMD" "$@"


================================================
FILE: runtime/examples/audio_classification/android_demo/gradlew.bat
================================================
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem      https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem

@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem  Gradle startup script for Windows
@rem
@rem ##########################################################################

@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal

set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.

goto fail

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd

:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1

:mainEnd
if "%OS%"=="Windows_NT" endlocal

:omega


================================================
FILE: runtime/examples/audio_classification/android_demo/settings.gradle
================================================
pluginManagement {
    repositories {
        gradlePluginPortal()
        google()
        mavenCentral()
    }
}
dependencyResolutionManagement {
    repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS)
    repositories {
        google()
        mavenCentral()
    }
}
rootProject.name = "cls"
include ':app'


================================================
FILE: runtime/examples/audio_classification/conf
================================================
[CONF]
wav_normal=true
wav_normal_type=linear
wav_norm_mul_factor=1.0
model_path=./inference.onnx
param_path=
dict_path=./label_list
num_cpu_thread=1
samp_freq=32000
frame_length_ms=32
frame_shift_ms=10
num_bins=64
low_freq=50
high_freq=14000
dither=0.0


================================================
FILE: runtime/examples/audio_classification/label_list
================================================
Dog
Rooster
Pig
Cow
Frog
Cat
Hen
Insects (flying)
Sheep
Crow
Rain
Sea waves
Crackling fire
Crickets
Chirping birds
Water drops
Wind
Pouring water
Toilet flush
Thunderstorm
Crying baby
Sneezing
Clapping
Breathing
Coughing
Footsteps
Laughing
Brushing teeth
Snoring
Drinking, sipping
Door knock
Mouse click
Keyboard typing
Door, wood creaks
Can opening
Washing machine
Vacuum cleaner
Clock alarm
Clock tick
Glass breaking
Helicopter
Chainsaw
Siren
Car horn
Engine
Train
Church bells
Airplane
Fireworks
Hand saw


================================================
FILE: runtime/examples/audio_classification/scp
================================================
test.wav


================================================
FILE: runtime/examples/codelab/README.md
================================================
# Codelab

> The below is for developing and offline testing. 
> Do not run it only if you know what it is.

* nnet
* feat
* decoder
* u2


================================================
FILE: runtime/examples/codelab/decoder/.gitignore
================================================
data
exp


================================================
FILE: runtime/examples/codelab/decoder/README.md
================================================
# ASR Decoder

ASR Decoder test bins. We using theses bins to test CTC BeamSearch decoder and WFST decoder.

* decoder_test_main.cc 
feed nnet output logprob, and only test decoder

* offline_decoder_sliding_chunk_main.cc
feed streaming audio feature, decode as streaming manner.

* offline_wfst_decoder_main.cc
feed streaming audio feature, decode using WFST as streaming manner.


================================================
FILE: runtime/examples/codelab/decoder/path.sh
================================================
# This contains the locations of binarys build required for running the examples.

SPEECHX_ROOT=$PWD/../../../
SPEECHX_BUILD=$SPEECHX_ROOT/build/speechx

SPEECHX_TOOLS=$SPEECHX_ROOT/tools
TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin

[ -d $SPEECHX_BUILD ] || { echo "Error: 'build/speechx' directory not found. please ensure that the project build successfully"; }

export LC_AL=C

SPEECHX_BIN=$SPEECHX_ROOT/build/speechx/decoder:$SPEECHX_ROOT/build/speechx/frontend/audio
export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN


================================================
FILE: runtime/examples/codelab/decoder/run.sh
================================================
#!/bin/bash
set +x
set -e

. path.sh

# 1. compile
if [ ! -d ${SPEECHX_EXAMPLES} ]; then
    pushd ${SPEECHX_ROOT} 
    bash build.sh
    popd
fi

# input
mkdir -p data
data=$PWD/data
ckpt_dir=$data/model
model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
vocb_dir=$ckpt_dir/data/lang_char/

lm=$data/zh_giga.no_cna_cmn.prune01244.klm

# output
exp_dir=./exp
mkdir -p $exp_dir

# 2. download model
if [[ ! -f data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz ]]; then
    mkdir -p data/model
    pushd data/model
    wget -c https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
    tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
    popd
fi

# produce wav scp
if [ ! -f data/wav.scp ]; then
    pushd data
    wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
    echo "utt1 " $PWD/zh.wav > wav.scp
    popd 
fi

# download lm
if [ ! -f $lm ]; then
    pushd data
    wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm
    popd
fi

feat_wspecifier=$exp_dir/feats.ark
cmvn=$exp_dir/cmvn.ark

export GLOG_logtostderr=1

# dump json cmvn to kaldi
cmvn_json2kaldi_main \
    --json_file  $ckpt_dir/data/mean_std.json \
    --cmvn_write_path $cmvn \
    --binary=false
echo "convert json cmvn to kaldi ark."


# generate linear feature as streaming
compute_linear_spectrogram_main \
    --wav_rspecifier=scp:$data/wav.scp \
    --feature_wspecifier=ark,t:$feat_wspecifier \
    --cmvn_file=$cmvn
echo "compute linear spectrogram feature."

# run ctc beam search decoder as streaming
ctc_beam_search_decoder_main \
    --result_wspecifier=ark,t:$exp_dir/result.txt \
    --feature_rspecifier=ark:$feat_wspecifier \
    --model_path=$model_dir/avg_1.jit.pdmodel \
    --param_path=$model_dir/avg_1.jit.pdiparams \
    --dict_file=$vocb_dir/vocab.txt \
    --lm_path=$lm


================================================
FILE: runtime/examples/codelab/decoder/valgrind.sh
================================================
#!/bin/bash

# this script is for memory check, so please run ./run.sh first.

set +x
set -e

. ./path.sh

if [ ! -d ${SPEECHX_TOOLS}/valgrind/install ]; then
  echo "please install valgrind in the speechx tools dir.\n" 
  exit 1
fi

model_dir=../paddle_asr_model
feat_wspecifier=./feats.ark
cmvn=./cmvn.ark

valgrind --tool=memcheck --track-origins=yes --leak-check=full --show-leak-kinds=all \
  offline_decoder_main \
  --feature_respecifier=ark:$feat_wspecifier \
  --model_path=$model_dir/avg_1.jit.pdmodel \
  --param_path=$model_dir/avg_1.jit.pdparams \
  --dict_file=$model_dir/vocab.txt \
  --lm_path=$model_dir/avg_1.jit.klm


================================================
FILE: runtime/examples/codelab/feat/.gitignore
================================================
data
exp


================================================
FILE: runtime/examples/codelab/feat/README.md
================================================
# Deepspeech2 Straming Audio Feature

ASR audio feature test bins. We using theses bins to test linaer/fbank/mfcc asr feature as streaming manner.

* compute_linear_spectrogram_main.cc

compute linear spectrogram without db norm in streaming manner.


================================================
FILE: runtime/examples/codelab/feat/path.sh
================================================
# This contains the locations of binarys build required for running the examples.

SPEECHX_ROOT=$PWD/../../../
SPEECHX_BUILD=$SPEECHX_ROOT/build/speechx

SPEECHX_TOOLS=$SPEECHX_ROOT/tools
TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin

[ -d $SPEECHX_BUILD ] || { echo "Error: 'build/speechx' directory not found. please ensure that the project build successfully"; }

export LC_AL=C

SPEECHX_BIN=$SPEECHX_ROOT/build/speechx/decoder:$SPEECHX_ROOT/build/speechx/frontend/audio
export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN


================================================
FILE: runtime/examples/codelab/feat/run.sh
================================================
#!/bin/bash
set +x
set -e

. ./path.sh

# 1. compile
if [ ! -d ${SPEECHX_EXAMPLES} ]; then
    pushd ${SPEECHX_ROOT} 
    bash build.sh
    popd
fi

# 2. download model
if [ ! -e data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz ]; then
    mkdir -p data/model
    pushd data/model
    wget -c https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
    tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
    popd
fi

# produce wav scp
if [ ! -f data/wav.scp ]; then
    mkdir -p data
    pushd data
    wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
    echo "utt1 " $PWD/zh.wav > wav.scp
    popd 
fi


# input
data_dir=./data
exp_dir=./exp
model_dir=$data_dir/model/

mkdir -p $exp_dir


# 3. run feat
export GLOG_logtostderr=1

cmvn_json2kaldi_main \
    --json_file=$model_dir/data/mean_std.json \
    --cmvn_write_path=$exp_dir/cmvn.ark \
    --binary=false
echo "convert json cmvn to kaldi ark."


compute_linear_spectrogram_main \
    --wav_rspecifier=scp:$data_dir/wav.scp \
    --feature_wspecifier=ark,t:$exp_dir/feats.ark \
    --cmvn_file=$exp_dir/cmvn.ark
echo "compute linear spectrogram feature."

compute_fbank_main \
    --num_bins=161 \
    --wav_rspecifier=scp:$data_dir/wav.scp \
    --feature_wspecifier=ark,t:$exp_dir/fbank.ark \
    --cmvn_file=$exp_dir/cmvn.ark
echo "compute fbank feature."


================================================
FILE: runtime/examples/codelab/feat/valgrind.sh
================================================
#!/bin/bash

# this script is for memory check, so please run ./run.sh first.

set +x
set -e

. ./path.sh

if [ ! -d ${SPEECHX_TOOLS}/valgrind/install ]; then
  echo "please install valgrind in the speechx tools dir.\n" 
  exit 1
fi

model_dir=../paddle_asr_model
feat_wspecifier=./feats.ark
cmvn=./cmvn.ark

valgrind --tool=memcheck --track-origins=yes --leak-check=full --show-leak-kinds=all \
  compute_linear_spectrogram_main \
  --wav_rspecifier=scp:$model_dir/wav.scp \
  --feature_wspecifier=ark,t:$feat_wspecifier \
  --cmvn_write_path=$cmvn


================================================
FILE: runtime/examples/codelab/nnet/.gitignore
================================================
data
exp


================================================
FILE: runtime/examples/codelab/nnet/README.md
================================================
# Deepspeech2 Streaming NNet Test

Using for ds2 streaming nnet inference test.


================================================
FILE: runtime/examples/codelab/nnet/path.sh
================================================
# This contains the locations of binarys build required for running the examples.

SPEECHX_ROOT=$PWD/../../../
SPEECHX_BUILD=$SPEECHX_ROOT/build/speechx

SPEECHX_TOOLS=$SPEECHX_ROOT/tools
TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin

[ -d $SPEECHX_BUILD ] || { echo "Error: 'build/speechx' directory not found. please ensure that the project build successfully"; }

export LC_AL=C

SPEECHX_BIN=$SPEECHX_BUILD/codelab/nnet
export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN


================================================
FILE: runtime/examples/codelab/nnet/run.sh
================================================
#!/bin/bash
set +x
set -e

. path.sh

# 1. compile
if [ ! -d ${SPEECHX_EXAMPLES} ]; then
    pushd ${SPEECHX_ROOT} 
    bash build.sh
    popd
fi

# 2. download model
if [ ! -f data/model/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz ]; then
    mkdir -p data/model
    pushd data/model
    wget -c https://paddlespeech.cdn.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
    tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
    popd
fi

ckpt_dir=./data/model
model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/

ds2_model_test_main \
    --model_path=$model_dir/avg_1.jit.pdmodel \
    --param_path=$model_dir/avg_1.jit.pdiparams


================================================
FILE: runtime/examples/codelab/nnet/valgrind.sh
================================================
#!/bin/bash

# this script is for memory check, so please run ./run.sh first.

set +x
set -e

. ./path.sh

if [ ! -d ${SPEECHX_TOOLS}/valgrind/install ]; then
  echo "please install valgrind in the speechx tools dir.\n" 
  exit 1
fi

ckpt_dir=./data/model
model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/

valgrind --tool=memcheck --track-origins=yes --leak-check=full --show-leak-kinds=all \
  ds2_model_test_main \
  --model_path=$model_dir/avg_1.jit.pdmodel \
  --param_path=$model_dir/avg_1.jit.pdparams


================================================
FILE: runtime/examples/codelab/u2/.gitignore
================================================
data


================================================
FILE: runtime/examples/codelab/u2/README.md
================================================
# u2/u2pp Streaming Test


================================================
FILE: runtime/examples/codelab/u2/local/decode.sh
================================================
#!/bin/bash
set +x
set -e

. path.sh

data=data
exp=exp
mkdir -p $exp
ckpt_dir=$data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/

ctc_prefix_beam_search_decoder_main \
    --model_path=$model_dir/export.jit \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --vocab_path=$model_dir/unit.txt \
    --feature_rspecifier=ark,t:$exp/fbank.ark \
    --result_wspecifier=ark,t:$exp/result.ark

echo "u2 ctc prefix beam search decode."


================================================
FILE: runtime/examples/codelab/u2/local/feat.sh
================================================
#!/bin/bash
set -x
set -e

. path.sh

data=data
exp=exp
mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/


cmvn_json2kaldi_main \
    --json_file  $model_dir/mean_std.json \
    --cmvn_write_path $exp/cmvn.ark \
    --binary=false

echo "convert json cmvn to kaldi ark."

compute_fbank_main \
    --num_bins 80 \
    --wav_rspecifier=scp:$data/wav.scp \
    --cmvn_file=$exp/cmvn.ark \
    --feature_wspecifier=ark,t:$exp/fbank.ark

echo "compute fbank feature."


================================================
FILE: runtime/examples/codelab/u2/local/nnet.sh
================================================
#!/bin/bash
set -x
set -e

. path.sh

data=data
exp=exp
mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/

u2_nnet_main \
    --model_path=$model_dir/export.jit \
    --feature_rspecifier=ark,t:$exp/fbank.ark \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --acoustic_scale=1.0 \
    --nnet_encoder_outs_wspecifier=ark,t:$exp/encoder_outs.ark \
    --nnet_prob_wspecifier=ark,t:$exp/logprobs.ark
echo "u2 nnet decode."


================================================
FILE: runtime/examples/codelab/u2/local/recognizer.sh
================================================
#!/bin/bash
set -e

. path.sh

data=data
exp=exp
mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/

u2_recognizer_main \
    --use_fbank=true \
    --num_bins=80 \
    --cmvn_file=$exp/cmvn.ark \
    --model_path=$model_dir/export.jit \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --vocab_path=$model_dir/unit.txt \
    --wav_rspecifier=scp:$data/wav.scp \
    --result_wspecifier=ark,t:$exp/result.ark


================================================
FILE: runtime/examples/codelab/u2/path.sh
================================================
# This contains the locations of binarys build required for running the examples.

unset GREP_OPTIONS

SPEECHX_ROOT=$PWD/../../../
SPEECHX_BUILD=$SPEECHX_ROOT/build/speechx

SPEECHX_TOOLS=$SPEECHX_ROOT/tools
TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin

[ -d $SPEECHX_BUILD ] || { echo "Error: 'build/speechx' directory not found. please ensure that the project build successfully"; }

export LC_AL=C

export PATH=$PATH:$TOOLS_BIN:$SPEECHX_BUILD/nnet:$SPEECHX_BUILD/decoder:$SPEECHX_BUILD/frontend/audio:$SPEECHX_BUILD/recognizer

PADDLE_LIB_PATH=$(python -c "import os; import paddle; include_dir=paddle.sysconfig.get_include(); paddle_dir=os.path.split(include_dir)[0]; libs_dir=os.path.join(paddle_dir, 'libs'); fluid_dir=os.path.join(paddle_dir, 'fluid'); out=':'.join([libs_dir, fluid_dir]); print(out);")
export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH


================================================
FILE: runtime/examples/codelab/u2/run.sh
================================================
#!/bin/bash
set -x
set -e

. path.sh

# 1. compile
if [ ! -d ${SPEECHX_EXAMPLES} ]; then
    pushd ${SPEECHX_ROOT} 
    bash build.sh
    popd
fi

# 2. download model
if [ ! -f data/model/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz ]; then
    mkdir -p data/model
    pushd data/model
    wget -c https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz
    tar xzfv asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model.tar.gz
    popd
fi

# produce wav scp
if [ ! -f data/wav.scp ]; then
    mkdir -p data
    pushd data
    wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
    echo "utt1 " $PWD/zh.wav > wav.scp
    popd 
fi

data=data
exp=exp
mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.1.0.model/


./local/feat.sh

./local/nnet.sh

./local/decode.sh


================================================
FILE: runtime/examples/custom_asr/README.md
================================================
# Customized ASR

## introduction
These scripts are tutorials to show you how build your own decoding graph.

eg:
* G with slot: 打车到 "address_slot"。
![](https://ai-studio-static-online.cdn.bcebos.com/28d9ef132a7f47a895a65ae9e5c4f55b8f472c9f3dd24be8a2e66e0b88b173a4)

* this is address slot wfst, you can add the address which want to recognize.
![](https://ai-studio-static-online.cdn.bcebos.com/47c89100ef8c465bac733605ffc53d76abefba33d62f4d818d351f8cea3c8fe2)

* after replace operation, G = fstreplace(G_with_slot, address_slot), we will get the customized graph.
![](https://ai-studio-static-online.cdn.bcebos.com/60a3095293044f10b73039ab10c7950d139a6717580a44a3ba878c6e74de402b)

These operations are in the scripts, please check out. we will launch more detail scripts.

## How to run

```
bash run.sh
```

## Results

### CTC WFST

```
Overall -> 1.23 % N=1134 C=1126 S=6 D=2 I=6
Mandarin -> 1.24 % N=1132 C=1124 S=6 D=2 I=6
English -> 0.00 % N=2 C=2 S=0 D=0 I=0
```


================================================
FILE: runtime/examples/custom_asr/local/compile_lexicon_token_fst.sh
================================================
#!/bin/bash
# Copyright 2015       Yajie Miao    (Carnegie Mellon University)

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
# THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
# WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
# MERCHANTABLITY OR NON-INFRINGEMENT.
# See the Apache 2 License for the specific language governing permissions and
# limitations under the License.

# This script compiles the lexicon and CTC tokens into FSTs. FST compiling slightly differs between the
# phoneme and character-based lexicons.
set -eo pipefail
. utils/parse_options.sh

if [ $# -ne 3 ]; then
  echo "usage: utils/fst/compile_lexicon_token_fst.sh <dict-src-dir> <tmp-dir> <lang-dir>"
  echo "e.g.: utils/fst/compile_lexicon_token_fst.sh data/local/dict data/local/lang_tmp data/lang"
  echo "<dict-src-dir> should contain the following files:"
  echo "lexicon.txt lexicon_numbers.txt units.txt"
  echo "options: "
  exit 1;
fi

srcdir=$1
tmpdir=$2
dir=$3
mkdir -p $dir $tmpdir

[ -f path.sh ] && . ./path.sh

cp $srcdir/units.txt $dir

# Add probabilities to lexicon entries. There is in fact no point of doing this here since all the entries have 1.0.
# But utils/make_lexicon_fst.pl requires a probabilistic version, so we just leave it as it is.
perl -ape 's/(\S+\s+)(.+)/${1}1.0\t$2/;' < $srcdir/lexicon.txt > $tmpdir/lexiconp.txt || exit 1;

# Add disambiguation symbols to the lexicon. This is necessary for determinizing the composition of L.fst and G.fst.
# Without these symbols, determinization will fail.
# default first disambiguation is #1
ndisambig=`utils/fst/add_lex_disambig.pl $tmpdir/lexiconp.txt $tmpdir/lexiconp_disambig.txt`
# add #0 (#0 reserved for symbol in grammar).
ndisambig=$[$ndisambig+1];

( for n in `seq 0 $ndisambig`; do echo '#'$n; done ) > $tmpdir/disambig.list

# Get the full list of CTC tokens used in FST. These tokens include <eps>, the blank <blk>,
# the actual model unit, and the disambiguation symbols.
cat $srcdir/units.txt | awk '{print $1}' > $tmpdir/units.list
(echo '<eps>';) | cat - $tmpdir/units.list $tmpdir/disambig.list | awk '{print $1 " " (NR-1)}' > $dir/tokens.txt

# ctc_token_fst_corrected is too big and too slow for character based chinese modeling,
# so here just use simple ctc_token_fst
utils/fst/ctc_token_fst.py --token_file $dir/tokens.txt | \
  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/tokens.txt --keep_isymbols=false --keep_osymbols=false | \
  fstarcsort --sort_type=olabel > $dir/T.fst || exit 1;

# Encode the words with indices. Will be used in lexicon and language model FST compiling.
cat $tmpdir/lexiconp.txt | awk '{print $1}' | sort | awk '
  BEGIN {
    print "<eps> 0";
  }
  {
    printf("%s %d\n", $1, NR);
  }
  END {
    printf("#0 %d\n", NR+1);
    printf("<s> %d\n", NR+2);
    printf("</s> %d\n", NR+3);
    printf("ROOT %d\n", NR+4);
  }' > $dir/words.txt || exit 1;

# Now compile the lexicon FST. Depending on the size of your lexicon, it may take some time.
token_disambig_symbol=`grep \#0 $dir/tokens.txt | awk '{print $2}'`
word_disambig_symbol=`grep \#0 $dir/words.txt | awk '{print $2}'`

utils/fst/make_lexicon_fst.pl --pron-probs $tmpdir/lexiconp_disambig.txt 0 "sil" '#'$ndisambig | \
  fstcompile --isymbols=$dir/tokens.txt --osymbols=$dir/words.txt \
  --keep_isymbols=false --keep_osymbols=false |   \
  fstaddselfloops  "echo $token_disambig_symbol |" "echo $word_disambig_symbol |" | \
  fstarcsort --sort_type=olabel > $dir/L.fst || exit 1;

echo "Lexicon and Token FSTs compiling succeeded"


================================================
FILE: runtime/examples/custom_asr/local/mk_slot_graph.sh
================================================
#!/bin/bash

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License

graph_slot=$1
dir=$2

[ -f path.sh ] && . ./path.sh

sym=$dir/../lang/words.txt
cat > $dir/address_slot.txt <<EOF
0 1 南山 南山
0 1 南京 南京
0 1 光明 光明   
0 1 龙岗 龙岗
0 1 北苑 北苑
0 1 北京 北京
0 1 酒店 酒店
0 1 合肥 合肥
0 1 望京搜后 望京搜后
0 1 地铁站 地铁站
0 1 海淀黄庄 海淀黄庄
0 1 佛山 佛山
0 1 广州 广州
0 1 苏州 苏州
0 1 百度大厦 百度大厦
0 1 龙泽苑东区 龙泽苑东区
0 1 首都机场 首都机场
0 1 朝来家园 朝来家园
0 1 深大 深大
0 1 双龙 双龙
0 1 公司 公司
0 1 上海 上海
0 1 家 家
0 1 机场 机场
0 1 华祝 华祝
0 1 上海虹桥 上海虹桥
0 2 检验 检验
2 1 中心 中心
0 3 苏州 苏州
3 1 街 街
3 8 高铁 高铁
8 1 站 站
0 4 杭州 杭州
4 1 东站 东站
4 1 <eps> <eps>
0 5 上海 上海
0 5 北京 北京
0 5 合肥 合肥
5 1 南站 南站
0 6 立水 立水
6 1 桥 桥
0 7 青岛 青岛
7 1 站 站
1
EOF

fstcompile --isymbols=$sym --osymbols=$sym $dir/address_slot.txt $dir/address_slot.fst
fstcompile --isymbols=$sym --osymbols=$sym $graph_slot/time_slot.txt $dir/time_slot.fst
fstcompile --isymbols=$sym --osymbols=$sym $graph_slot/date_slot.txt $dir/date_slot.fst
fstcompile --isymbols=$sym --osymbols=$sym $graph_slot/money_slot.txt $dir/money_slot.fst
fstcompile --isymbols=$sym --osymbols=$sym $graph_slot/year_slot.txt $dir/year_slot.fst


================================================
FILE: runtime/examples/custom_asr/local/mk_tlg_with_slot.sh
================================================
#!/bin/bash

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License

lm=$1
lang=$2
tgt_lang=$3

unset GREP_OPTIONS

sym=$lang/words.txt
arpa_lm=$lm/lm.arpa
# Compose the language model to FST
cat $arpa_lm | \
   grep -v '<s> <s>' | \
   grep -v '</s> <s>' | \
   grep -v '</s> </s>' | \
   grep -v -i '<unk>' | \
   grep -v -i '<spoken_noise>' | \
   arpa2fst --read-symbol-table=$sym --keep-symbols=true - | fstprint | \
   utils/fst/eps2disambig.pl | utils/fst/s2eps.pl | fstcompile --isymbols=$sym \
     --osymbols=$sym --keep_isymbols=false --keep_osymbols=false | \
    fstrmepsilon | fstarcsort --sort_type=ilabel > $tgt_lang/G_with_slot.fst

root_label=`grep ROOT $sym | awk '{print $2}'`
address_slot_label=`grep \<ADDRESS_SLOT\> $sym | awk '{print $2}'`
time_slot_label=`grep \<TIME_SLOT\> $sym | awk '{print $2}'`
date_slot_label=`grep \<DATE_SLOT\> $sym | awk '{print $2}'`
money_slot_label=`grep \<MONEY_SLOT\> $sym | awk '{print $2}'`
year_slot_label=`grep \<YEAR_SLOT\> $sym | awk '{print $2}'`

fstisstochastic $tgt_lang/G_with_slot.fst

fstreplace --epsilon_on_replace $tgt_lang/G_with_slot.fst \
  $root_label $tgt_lang/address_slot.fst $address_slot_label \
  $tgt_lang/date_slot.fst $date_slot_label \
  $tgt_lang/money_slot.fst $money_slot_label \
  $tgt_lang/time_slot.fst $time_slot_label \
  $tgt_lang/year_slot.fst $year_slot_label $tgt_lang/G.fst

fstisstochastic $tgt_lang/G.fst

# Compose the token, lexicon and language-model FST into the final decoding graph
fsttablecompose $lang/L.fst $tgt_lang/G.fst | fstdeterminizestar --use-log=true | \
    fstminimizeencoded | fstarcsort --sort_type=ilabel > $tgt_lang/LG.fst || exit 1;
fsttablecompose $lang/T.fst $tgt_lang/LG.fst > $tgt_lang/TLG.fst || exit 1;
rm $tgt_lang/LG.fst

echo "Composing decoding graph TLG.fst succeeded"

================================================
FILE: runtime/examples/custom_asr/local/train_lm_with_slot.sh
================================================
#!/bin/bash

# To be run from one directory above this script.
. ./path.sh
src=ds2_graph_with_slot
text=$src/train_text
lexicon=$src/local/dict/lexicon.txt

dir=$src/local/lm
mkdir -p $dir

for f in "$text" "$lexicon"; do
  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
done

# Check SRILM tools
if ! which ngram-count > /dev/null; then
  pushd $MAIN_ROOT/tools
  make srilm.done
  popd
fi

# This script takes no arguments.  It assumes you have already run
# It takes as input the files
# data/local/lm/text
# data/local/dict/lexicon.txt


cleantext=$dir/text.no_oov

cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
  {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
  > $cleantext || exit 1;

cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
   sort -nr > $dir/word.counts || exit 1;
# Get counts from acoustic training transcripts, and add  one-count
# for each word in the lexicon (but not silence, we don't want it
# in the LM-- we'll add it optionally later).
cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
  cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
   sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;

# filter the words which are not in the text
cat $dir/unigram.counts | awk '$1>1{print $0}' | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist

# kaldi_lm results
mkdir -p $dir
cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' > $dir/train

ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
  -map-unk "<UNK>" -gt3max 0 -gt2max 0 -gt1max 0 -lm $dir/lm.arpa

#ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
#  -map-unk "<UNK>" -lm $dir/lm2.arpa

================================================
FILE: runtime/examples/custom_asr/path.sh
================================================
# This contains the locations of binarys build required for running the examples.

MAIN_ROOT=`realpath $PWD/../../../`
RUNTIME_ROOT=`realpath $MAIN_ROOT/runtime`
RUNTIME_EXAMPLES=$RUNTIME_ROOT/build/examples

export LC_AL=C

# srilm
export LIBLBFGS=${MAIN_ROOT}/tools/liblbfgs-1.10
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${LIBLBFGS}/lib/.libs
export SRILM=${MAIN_ROOT}/tools/srilm

# kaldi lm
KALDI_DIR=$RUNTIME_ROOT/build/engine/kaldi/
OPENFST_DIR=$RUNTIME_ROOT/fc_patch/openfst-build/src
export PATH=${PATH}:${SRILM}/bin:${SRILM}/bin/i686-m64:$KALDI_DIR/lmbin:$KALDI_DIR/fstbin:$OPENFST_DIR/bin:$SPEECHX_EXAMPLES/ds2_ol/decoder


================================================
FILE: runtime/examples/custom_asr/run.sh
================================================
#!/bin/bash
set +x
set -e

export GLOG_logtostderr=1

. ./path.sh || exit 1;

# ds2 means deepspeech2 (acoutic model type)
dir=$PWD/exp/ds2_graph_with_slot
data=$PWD/data
stage=0
stop_stage=10

mkdir -p $dir

model_dir=$PWD/resource/model
vocab=$model_dir/vocab.txt
cmvn=$data/cmvn.ark
text_with_slot=$data/text_with_slot
resource=$PWD/resource
# download resource
if [ ! -f $cmvn ]; then
    wget -c https://paddlespeech.cdn.bcebos.com/s2t/paddle_asr_online/resource.tar.gz
    tar xzfv resource.tar.gz
    ln -s ./resource/data .
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
  # make dict
  unit_file=$vocab
  mkdir -p $dir/local/dict
  cp $unit_file $dir/local/dict/units.txt
  cp $text_with_slot $dir/train_text
  utils/fst/prepare_dict.py --unit_file $unit_file --in_lexicon $data/lexicon.txt \
    --out_lexicon $dir/local/dict/lexicon.txt
  # add slot to lexicon, just in case the lm training script filter the slot.
  echo "<MONEY_SLOT> 一" >> $dir/local/dict/lexicon.txt
  echo "<DATE_SLOT> 一" >> $dir/local/dict/lexicon.txt
  echo "<ADDRESS_SLOT> 一" >> $dir/local/dict/lexicon.txt
  echo "<YEAR_SLOT> 一" >> $dir/local/dict/lexicon.txt
  echo "<TIME_SLOT> 一" >> $dir/local/dict/lexicon.txt
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
  # train lm
  lm=$dir/local/lm
  mkdir -p $lm
  # this script is different with the common lm training script
  local/train_lm_with_slot.sh
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
  # make T & L
  local/compile_lexicon_token_fst.sh $dir/local/dict $dir/local/tmp $dir/local/lang
  mkdir -p $dir/local/lang_test
  # make slot graph
  local/mk_slot_graph.sh $resource/graph $dir/local/lang_test
  # make TLG
  local/mk_tlg_with_slot.sh $dir/local/lm $dir/local/lang $dir/local/lang_test || exit 1;
  mv $dir/local/lang_test/TLG.fst $dir/local/lang/
fi

if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
  # test TLG
  model_dir=$PWD/resource/model
  cmvn=$data/cmvn.ark
  wav_scp=$data/wav.scp
  graph=$dir/local/lang

  recognizer_test_main \
    --wav_rspecifier=scp:$wav_scp \
    --cmvn_file=$cmvn \
    --use_fbank=true \
    --model_path=$model_dir/avg_10.jit.pdmodel \
    --param_path=$model_dir/avg_10.jit.pdiparams \
    --model_cache_shapes="5-1-2048,5-1-2048" \
    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
    --word_symbol_table=$graph/words.txt \
    --graph_path=$graph/TLG.fst --max_active=7500 \
    --acoustic_scale=12 \
    --result_wspecifier=ark,t:./exp/result_run.txt

    # the data/wav.trans is the label.
    utils/compute-wer.py --char=1 --v=1 data/wav.trans exp/result_run.txt > exp/wer_run
    tail -n 7 exp/wer_run
fi


================================================
FILE: runtime/examples/text_lm/.gitignore
================================================
data


================================================
FILE: runtime/examples/text_lm/README.md
================================================
# Text PreProcess for building ngram LM

## Input

```
data/
|-- text
```

Input file is kaldi-style, which has `utt` at first column: 
```
Y0000000000_--5llN02F84_S00000  怎么样这些日子住得还习惯吧
Y0000000000_--5llN02F84_S00002  挺好的
Y0000000000_--5llN02F84_S00003  对了美静这段日子经常不和我们一起用餐
Y0000000000_--5llN02F84_S00004  是不是对我回来有什么想法啊
Y0000000000_--5llN02F84_S00005  哪有的事啊
Y0000000000_--5llN02F84_S00006  她这两天挺累的身体也不太舒服
Y0000000000_--5llN02F84_S00007  我让她多睡一会那就好如果要是觉得不方便
Y0000000000_--5llN02F84_S00009  我就搬出去住
Y0000000000_--5llN02F84_S00010  你看你这个人你就是疑心太重
Y0000000000_--5llN02F84_S00011  你现在多好一切都井然有序的
```


## Output

```
data/
`-- text.tn
```

Output file like this:

```
BAC009S0002W0122 而 对 楼市 成交 抑制 作用 最 大 的 限 购
BAC009S0002W0123 也 成为 地方 政府 的 眼中 钉
BAC009S0002W0124 自 六月 底 呼和浩特 市 率先 宣布 取消 限 购 后
BAC009S0002W0125 各地 政府 便 纷纷 跟进
BAC009S0002W0126 仅 一 个 多 月 的 时间 里
BAC009S0002W0127 除了 北京 上海 广州 深圳 四 个 一 线 城市 和 三亚 之外
BAC009S0002W0128 四十六 个 限 购 城市 当中
BAC009S0002W0129 四十一 个 已 正式 取消 或 变相 放松 了 限 购
BAC009S0002W0130 财政 金融 政策 紧随 其后 而来
```


================================================
FILE: runtime/examples/text_lm/local/mmseg.py
================================================
#!/usr/bin/env python3
# modify from https://sites.google.com/site/homepageoffuyanwei/Home/remarksandexcellentdiscussion/page-2


class Word:
    def __init__(self, text='', freq=0):
        self.text = text
        self.freq = freq
        self.length = len(text)


class Chunk:
    def __init__(self, w1, w2=None, w3=None):
        self.words = []
        self.words.append(w1)
        if w2:
            self.words.append(w2)
        if w3:
            self.words.append(w3)

    #计算chunk的总长度  
    def totalWordLength(self):
        length = 0
        for word in self.words:
            length += len(word.text)
        return length

    #计算平均长度  
    def averageWordLength(self):
        return float(self.totalWordLength()) / float(len(self.words))

    #计算标准差  
    def standardDeviation(self):
        average = self.averageWordLength()
        sum = 0.0
        for word in self.words:
            tmp = (len(word.text) - average)
            sum += float(tmp) * float(tmp)
        return sum

    #自由语素度  
    def wordFrequency(self):
        sum = 0
        for word in self.words:
            sum += word.freq
        return sum


class ComplexCompare:
    def takeHightest(self, chunks, comparator):
        i = 1
        for j in range(1, len(chunks)):
            rlt = comparator(chunks[j], chunks[0])
            if rlt > 0:
                i = 0
            if rlt >= 0:
                chunks[i], chunks[j] = chunks[j], chunks[i]
                i += 1
        return chunks[0:i]

    #以下四个函数是mmseg算法的四种过滤原则，核心算法  
    def mmFilter(self, chunks):
        def comparator(a, b):
            return a.totalWordLength() - b.totalWordLength()

        return self.takeHightest(chunks, comparator)

    def lawlFilter(self, chunks):
        def comparator(a, b):
            return a.averageWordLength() - b.averageWordLength()

        return self.takeHightest(chunks, comparator)

    def svmlFilter(self, chunks):
        def comparator(a, b):
            return b.standardDeviation() - a.standardDeviation()

        return self.takeHightest(chunks, comparator)

    def logFreqFilter(self, chunks):
        def comparator(a, b):
            return a.wordFrequency() - b.wordFrequency()

        return self.takeHightest(chunks, comparator)


#加载词组字典和字符字典
dictWord = {}
maxWordLength = 0


def loadDictChars(filepath):
    global maxWordLength
    fsock = open(filepath)
    for line in fsock:
        freq, word = line.split()
        word = word.strip()
        dictWord[word] = (len(word), int(freq))
        maxWordLength = len(word) if maxWordLength < len(
            word) else maxWordLength
    fsock.close()


def loadDictWords(filepath):
    global maxWordLength
    fsock = open(filepath)
    for line in fsock.readlines():
        word = line.strip()
        dictWord[word] = (len(word), 0)
        maxWordLength = len(word) if maxWordLength < len(
            word) else maxWordLength
    fsock.close()


#判断该词word是否在字典dictWord中      
def getDictWord(word):
    result = dictWord.get(word)
    if result:
        return Word(word, result[1])
    return None


#开始加载字典  
def run():
    from os.path import join, dirname
    loadDictChars(join(dirname(__file__), 'data', 'chars.dic'))
    loadDictWords(join(dirname(__file__), 'data', 'words.dic'))


class Analysis:
    def __init__(self, text):
        self.text = text
        self.cacheSize = 3
        self.pos = 0
        self.textLength = len(self.text)
        self.cache = []
        self.cacheIndex = 0
        self.complexCompare = ComplexCompare()

        #简单小技巧，用到个缓存，不知道具体有没有用处  
        for i in range(self.cacheSize):
            self.cache.append([-1, Word()])

        #控制字典只加载一次  
        if not dictWord:
            run()

    def __iter__(self):
        while True:
            token = self.getNextToken()
            if token is None:
                raise StopIteration
            yield token

    def getNextChar(self):
        return self.text[self.pos]

    #判断该字符是否是中文字符（不包括中文标点）    
    def isChineseChar(self, character):
        return 0x4e00 <= ord(character) < 0x9fa6

    #判断是否是ASCII码  
    def isASCIIChar(self, ch):
        import string
        if ch in string.whitespace:
            return False
        if ch in string.punctuation:
            return False
        return ch in string.printable

    #得到下一个切割结果  
    def getNextToken(self):
        while self.pos < self.textLength:
            if self.isChineseChar(self.getNextChar()):
                token = self.getChineseWords()
            else:
                token = self.getASCIIWords() + '/'
            if len(token) > 0:
                return token
        return None

    #切割出非中文词  
    def getASCIIWords(self):
        # Skip pre-word whitespaces and punctuations  
        #跳过中英文标点和空格  
        while self.pos < self.textLength:
            ch = self.getNextChar()
            if self.isASCIIChar(ch) or self.isChineseChar(ch):
                break
            self.pos += 1
        #得到英文单词的起始位置      
        start = self.pos

        #找出英文单词的结束位置  
        while self.pos < self.textLength:
            ch = self.getNextChar()
            if not self.isASCIIChar(ch):
                break
            self.pos += 1
        end = self.pos

        #Skip chinese word whitespaces and punctuations  
        #跳过中英文标点和空格  
        while self.pos < self.textLength:
            ch = self.getNextChar()
            if self.isASCIIChar(ch) or self.isChineseChar(ch):
                break
            self.pos += 1

        #返回英文单词  
        return self.text[start:end]

    #切割出中文词，并且做处理，用上述4种方法  
    def getChineseWords(self):
        chunks = self.createChunks()
        if len(chunks) > 1:
            chunks = self.complexCompare.mmFilter(chunks)
        if len(chunks) > 1:
            chunks = self.complexCompare.lawlFilter(chunks)
        if len(chunks) > 1:
            chunks = self.complexCompare.svmlFilter(chunks)
        if len(chunks) > 1:
            chunks = self.complexCompare.logFreqFilter(chunks)
        if len(chunks) == 0:
            return ''

        #最后只有一种切割方法  
        word = chunks[0].words
        token = ""
        length = 0
        for x in word:
            if x.length != -1:
                token += x.text + "/"
                length += len(x.text)
        self.pos += length
        return token

    #三重循环来枚举切割方法，这里也可以运用递归来实现  
    def createChunks(self):
        chunks = []
        originalPos = self.pos
        words1 = self.getMatchChineseWords()

        for word1 in words1:
            self.pos += len(word1.text)
            if self.pos < self.textLength:
                words2 = self.getMatchChineseWords()
                for word2 in words2:
                    self.pos += len(word2.text)
                    if self.pos < self.textLength:
                        words3 = self.getMatchChineseWords()
                        for word3 in words3:
                            # print(word3.length, word3.text)
                            if word3.length == -1:
                                chunk = Chunk(word1, word2)
                            else:
                                chunk = Chunk(word1, word2, word3)
                            chunks.append(chunk)
                    elif self.pos == self.textLength:
                        chunks.append(Chunk(word1, word2))
                    self.pos -= len(word2.text)
            elif self.pos == self.textLength:
                chunks.append(Chunk(word1))
            self.pos -= len(word1.text)

        self.pos = originalPos
        return chunks

    #运用正向最大匹配算法结合字典来切割中文文本    
    def getMatchChineseWords(self):
        #use cache,check it   
        for i in range(self.cacheSize):
            if self.cache[i][0] == self.pos:
                return self.cache[i][1]

        originalPos = self.pos
        words = []
        index = 0
        while self.pos < self.textLength:
            if index >= maxWordLength:
                break
            if not self.isChineseChar(self.getNextChar()):
                break
            self.pos += 1
            index += 1

            text = self.text[originalPos:self.pos]
            word = getDictWord(text)
            if word:
                words.append(word)

        self.pos = originalPos
        #没有词则放置个‘X’，将文本长度标记为-1  
        if not words:
            word = Word()
            word.length = -1
            word.text = 'X'
            words.append(word)

        self.cache[self.cacheIndex] = (self.pos, words)
        self.cacheIndex += 1
        if self.cacheIndex >= self.cacheSize:
            self.cacheIndex = 0
        return words


if __name__ == "__main__":

    def cuttest(text):
        #cut =  Analysis(text)  
        tmp = ""
        try:
            for word in iter(Analysis(text)):
                tmp += word
        except Exception as e:
            pass

        print(tmp)
        print("================================")

    cuttest(u"研究生命来源")
    cuttest(u"南京市长江大桥欢迎您")
    cuttest(u"请把手抬高一点儿")
    cuttest(u"长春市长春节致词。")
    cuttest(u"长春市长春药店。")
    cuttest(u"我的和服务必在明天做好。")
    cuttest(u"我发现有很多人喜欢他。")
    cuttest(u"我喜欢看电视剧大长今。")
    cuttest(u"半夜给拎起来陪看欧洲杯糊着两眼半晌没搞明白谁和谁踢。")
    cuttest(u"李智伟高高兴兴以及王晓薇出去玩，后来智伟和晓薇又单独去玩了。")
    cuttest(u"一次性交出去很多钱。 ")
    cuttest(u"这是一个伸手不见五指的黑夜。我叫孙悟空，我爱北京，我爱Python和C++。")
    cuttest(u"我不喜欢日本和服。")
    cuttest(u"雷猴回归人间。")
    cuttest(u"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作")
    cuttest(u"我需要廉租房")
    cuttest(u"永和服装饰品有限公司")
    cuttest(u"我爱北京天安门")
    cuttest(u"abc")
    cuttest(u"隐马尔可夫")
    cuttest(u"雷猴是个好网站")
    cuttest(u"“Microsoft”一词由“MICROcomputer（微型计算机）”和“SOFTware（软件）”两部分组成")
    cuttest(u"草泥马和欺实马是今年的流行词汇")
    cuttest(u"伊藤洋华堂总府店")
    cuttest(u"中国科学院计算技术研究所")
    cuttest(u"罗密欧与朱丽叶")
    cuttest(u"我购买了道具和服装")
    cuttest(u"PS: 我觉得开源有一个好处，就是能够敦促自己不断改进，避免敞帚自珍")
    cuttest(u"湖北省石首市")
    cuttest(u"总经理完成了这件事情")
    cuttest(u"电脑修好了")
    cuttest(u"做好了这件事情就一了百了了")
    cuttest(u"人们审美的观点是不同的")
    cuttest(u"我们买了一个美的空调")
    cuttest(u"线程初始化时我们要注意")
    cuttest(u"一个分子是由好多原子组织成的")
    cuttest(u"祝你马到功成")
    cuttest(u"他掉进了无底洞里")
    cuttest(u"中国的首都是北京")
    cuttest(u"孙君意")
    cuttest(u"外交部发言人马朝旭")
    cuttest(u"领导人会议和第四届东亚峰会")
    cuttest(u"在过去的这五年")
    cuttest(u"还需要很长的路要走")
    cuttest(u"60周年首都阅兵")
    cuttest(u"你好人们审美的观点是不同的")
    cuttest(u"买水果然后来世博园")
    cuttest(u"买水果然后去世博园")
    cuttest(u"但是后来我才知道你是对的")
    cuttest(u"存在即合理")
    cuttest(u"的的的的的在的的的的就以和和和")
    cuttest(u"I love你，不以为耻，反以为rong")
    cuttest(u" ")
    cuttest(u"")
    cuttest(u"hello你好人们审美的观点是不同的")
    cuttest(u"很好但主要是基于网页形式")
    cuttest(u"hello你好人们审美的观点是不同的")
    cuttest(u"为什么我不能拥有想要的生活")
    cuttest(u"后来我才")
    cuttest(u"此次来中国是为了")
    cuttest(u"使用了它就可以解决一些问题")
    cuttest(u",使用了它就可以解决一些问题")
    cuttest(u"其实使用了它就可以解决一些问题")
    cuttest(u"好人使用了它就可以解决一些问题")
    cuttest(u"是因为和国家")
    cuttest(u"老年搜索还支持")
    cuttest(
        u"干脆就把那部蒙人的闲法给废了拉倒！RT @laoshipukong : 27日，全国人大常委会第三次审议侵权责任法草案，删除了有关医疗损害责任“举证倒置”的规定。在医患纠纷中本已处于弱势地位的消费者由此将陷入万劫不复的境地。 "
    )
    cuttest("2022年12月30日是星期几？")
    cuttest("二零二二年十二月三十日是星期几？")


================================================
FILE: runtime/examples/text_lm/path.sh
================================================
MAIN_ROOT=`realpath $PWD/../../../`
ENGINE_ROOT=`realpath $MAIN_ROOT/runtime`

export LC_AL=C


================================================
FILE: runtime/examples/text_lm/run.sh
================================================
#!/bin/bash
set -eo pipefail

. path.sh

stage=0
stop_stage=100
has_key=true
token_type=word

. utils/parse_options.sh || exit -1;

text=data/text

if [ ! -f $text ]; then
    echo "$0: Not find $1";
    exit -1;
fi

if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then
    echo "text tn & wordseg preprocess"
    rm -rf ${text}.tn
    python3 utils/zh_tn.py --has_key $has_key --token_type $token_type ${text} ${text}.tn
fi

================================================
FILE: runtime/examples/u2pp_ol/README.md
================================================
# U2/U2++ Streaming ASR

## Examples

* `wenetspeech` - Streaming Decoding with wenetspeech u2/u2++ model. Using aishell test data for testing.    


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/.gitignore
================================================
data
exp


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/README.md
================================================
# U2/U2++ Streaming ASR 

A C++ deployment example for `PaddleSpeech/examples/wenetspeech/asr1` recipe. The model is static model from `export`, how to export model please see [here](../../../../examples/wenetspeech/asr1/). If you want using exported model, `run.sh` will download it, for the model link please see `run.sh`.

This example will demonstrate how to using the u2/u2++ model to recognize `wav` and compute `CER`. We using AISHELL-1 as test data.

## Testing with Aishell Test Data

## Source path.sh

```bash
. path.sh
```

SpeechX bins is under `echo $SPEECHX_BUILD`, more info please see `path.sh`.


### Download dataset and model

```
./run.sh --stop_stage 0
```

### process `cmvn` and compute feature

```bash
./run.sh --stage 1 --stop_stage 1
```

If you only want to convert `cmvn` file format, can using this cmd:

```bash 
./local/feat.sh --stage 1 --stop_stage 1
```

### Decoding using `feature` input

```
./run.sh --stage 2 --stop_stage 2
```

### Decoding using `wav` input

```
./run.sh --stage 3 --stop_stage 3
```

This stage using `u2_recognizer_main` to recognize wav file.

The input is `scp` file which look like this:
```text
# head data/split1/1/aishell_test.scp 
BAC009S0764W0121        /workspace/PaddleSpeech/runtime/examples/u2pp_ol/wenetspeech/data/test/S0764/BAC009S0764W0121.wav
BAC009S0764W0122        /workspace/PaddleSpeech/runtime/examples/u2pp_ol/wenetspeech/data/test/S0764/BAC009S0764W0122.wav
...
BAC009S0764W0125        /workspace/PaddleSpeech/runtime/examples/u2pp_ol/wenetspeech/data/test/S0764/BAC009S0764W0125.wav
```

If you want to recognize one wav, you can make `scp` file like this:
```text
key  path/to/wav/file
```

Then specify `--wav_rspecifier=` param for `u2_recognizer_main` bin. For other flags meaning, please see `help`:
```bash
u2_recognizer_main --help
```

The exmaple using `u2_recgonize_main` bin please see `local/recognizer.sh`.

### Decoding with `wav` using quant model

`local/recognizer_quant.sh` is same to `local/recognizer.sh`, but using quanted model.


## Results

Please see [here](./RESULTS.md).


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/RESULTS.md
================================================
# aishell test

7176 utts, duration 36108.9 sec.

## U2++ Attention Rescore

> Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz, support `avx512_vnni`
> RTF with feature and decoder which is more end to end.

### FP32

`local/recognizer.sh`

#### CER

```
Overall -> 5.75 % N=104765 C=99035 S=5587 D=143 I=294
Mandarin -> 5.75 % N=104762 C=99035 S=5584 D=143 I=294
English -> 0.00 % N=0 C=0 S=0 D=0 I=0
Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
```

#### RTF 

```
I1027 10:52:38.662868 51665 recognizer_main.cc:122] total wav duration is: 36108.9 sec
I1027 10:52:38.662858 51665 recognizer_main.cc:121] total cost:9577.31 sec
I1027 10:52:38.662876 51665 recognizer_main.cc:123] RTF is: 0.265234
```

### INT8

`local/recognizer_quant.sh`

#### CER

```
Overall -> 5.83 % N=104765 C=98943 S=5675 D=147 I=286
Mandarin -> 5.83 % N=104762 C=98943 S=5672 D=147 I=286
English -> 0.00 % N=0 C=0 S=0 D=0 I=0
Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
```

#### RTF 

```
I1110 09:59:52.551712 37249 u2_recognizer_main.cc:122] total wav duration is: 36108.9 sec
I1110 09:59:52.551717 37249 u2_recognizer_main.cc:123] total decode cost:9737.63 sec
I1110 09:59:52.551723 37249 u2_recognizer_main.cc:124] RTF is: 0.269674
```

### TLG decoder without attention rescore

`local/recognizer_wfst.sh`

#### CER

```
Overall -> 4.73 % N=104765 C=100001 S=4283 D=481 I=187
Mandarin -> 4.72 % N=104762 C=100001 S=4280 D=481 I=187
Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
```

#### RTF
```
I0417 08:07:15.300631 75784 recognizer_main.cc:113] total wav duration is: 36108.9 sec
I0417 08:07:15.300642 75784 recognizer_main.cc:114] total decode cost:10247.7 sec
I0417 08:07:15.300648 75784 recognizer_main.cc:115] total rescore cost:908.228 sec
I0417 08:07:15.300653 75784 recognizer_main.cc:116] RTF is: 0.283
```


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/aishell_train_lms.sh
================================================
#!/bin/bash

# To be run from one directory above this script.
. ./path.sh

nj=40
text=data/local/lm/text
lexicon=data/local/dict/lexicon.txt

for f in "$text" "$lexicon"; do
  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
done

# Check SRILM tools
if ! which ngram-count > /dev/null; then
    echo "srilm tools are not found, please download it and install it from: "
    echo "http://www.speech.sri.com/projects/srilm/download.html"
    echo "Then add the tools to your PATH"
    exit 1
fi

# This script takes no arguments.  It assumes you have already run
# aishell_data_prep.sh.
# It takes as input the files
# data/local/lm/text
# data/local/dict/lexicon.txt
dir=data/local/lm
mkdir -p $dir

cleantext=$dir/text.no_oov

# oov to <SPOKEN_NOISE>
# lexicon line: word char0 ... charn
# text line: utt word0 ... wordn -> line: <SPOKEN_NOISE> word0 ... wordn
text_dir=$(dirname $text)
split_name=$(basename $text)
./local/split_data.sh $text_dir $text $split_name $nj

utils/run.pl JOB=1:$nj $text_dir/split${nj}/JOB/${split_name}.no_oov.log \
  cat ${text_dir}/split${nj}/JOB/${split_name} \| awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
    {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
    \> ${text_dir}/split${nj}/JOB/${split_name}.no_oov || exit 1;
cat ${text_dir}/split${nj}/*/${split_name}.no_oov  > $cleantext

# compute word counts, sort in descending order
# line: count word
cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort --parallel=`nproc` | uniq -c | \
   sort --parallel=`nproc` -nr > $dir/word.counts || exit 1;

# Get counts from acoustic training transcripts, and add  one-count
# for each word in the lexicon (but not silence, we don't want it
# in the LM-- we'll add it optionally later).
cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
  cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
   sort --parallel=`nproc` | uniq -c | sort --parallel=`nproc` -nr > $dir/unigram.counts || exit 1;

# word with <s> </s>
cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist

# hold out to compute ppl
heldout_sent=10000 # Don't change this if you want result to be comparable with kaldi_lm results

mkdir -p $dir
cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
  head -$heldout_sent > $dir/heldout
cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
  tail -n +$heldout_sent > $dir/train

ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
  -map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.arpa
ngram -lm $dir/lm.arpa -ppl $dir/heldout

================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/decode.sh
================================================
#!/bin/bash
set -e

. path.sh

data=data
exp=exp
nj=20
mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model/
text=$data/test/text

utils/run.pl JOB=1:$nj $data/split${nj}/JOB/decoder.log \
ctc_prefix_beam_search_decoder_main \
    --model_path=$model_dir/export.jit \
    --word_symbol_table=$model_dir/unit.txt \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --feature_rspecifier=scp:$data/split${nj}/JOB/fbank.scp \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/result_decode.ark

cat $data/split${nj}/*/result_decode.ark > $exp/aishell.decode.rsl
utils/compute-wer.py --char=1 --v=1 $text $exp/aishell.decode.rsl > $exp/aishell.decode.err
tail -n 7 $exp/aishell.decode.err


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/feat.sh
================================================
#!/bin/bash
set -e

. path.sh

nj=20
stage=-1
stop_stage=100

. utils/parse_options.sh

data=data
exp=exp
mkdir -p $exp

ckpt_dir=./data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model/
aishell_wav_scp=aishell_test.scp


if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    ./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
    
    utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat.log \
    compute_fbank_main \
        --num_bins 80 \
        --cmvn_file=$model_dir/mean_std.json \
        --streaming_chunk=36 \
        --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
        --feature_wspecifier=ark,scp:$data/split${nj}/JOB/fbank.ark,$data/split${nj}/JOB/fbank.scp
    
    echo "compute fbank feature."
fi


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/nnet.sh
================================================
#!/bin/bash
set -e

. path.sh

nj=20
data=data
exp=exp

mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model/

utils/run.pl JOB=1:$nj $data/split${nj}/JOB/nnet.log \
u2_nnet_main \
    --model_path=$model_dir/export.jit \
    --vocab_path=$model_dir/unit.txt \
    --feature_rspecifier=ark,t:${data}/split${nj}/JOB/fbank.ark \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --acoustic_scale=1.0 \
    --nnet_encoder_outs_wspecifier=ark,t:$exp/encoder_outs.ark \
    --nnet_prob_wspecifier=ark,t:$exp/logprobs.ark
echo "u2 nnet decode."


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/recognizer.sh
================================================
#!/bin/bash
set -e

data=data
exp=exp
nj=20

. utils/parse_options.sh

mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model/
aishell_wav_scp=aishell_test.scp
text=$data/test/text

./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj

utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer.log \
recognizer_main \
    --use_fbank=true \
    --num_bins=80 \
    --cmvn_file=$model_dir/mean_std.json \
    --model_path=$model_dir/export.jit \
    --word_symbol_table=$model_dir/unit.txt \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/result_recognizer.ark


cat $data/split${nj}/*/result_recognizer.ark > $exp/aishell_recognizer
utils/compute-wer.py --char=1 --v=1 $text $exp/aishell_recognizer > $exp/aishell.recognizer.err
echo "recognizer test have finished!!!"
echo "please checkout in $exp/aishell.recognizer.err"
tail -n 7 $exp/aishell.recognizer.err


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/recognizer_fastdeploy.sh
================================================
#!/bin/bash
set -e

data=data
exp=exp
nj=20

. utils/parse_options.sh

mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/onnx_model/
aishell_wav_scp=aishell_test.scp
text=$data/test/text

./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj

utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer.fd.log \
recognizer_main \
    --use_fbank=true \
    --num_bins=80 \
    --model_path=$model_dir \
    --word_symbol_table=$model_dir/unit.txt \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --with_onnx_model=true \
    --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/recognizer.fd.rsl.ark


cat $data/split${nj}/*/recognizer.fd.rsl.ark > $exp/aishell.recognizer.fd.rsl
utils/compute-wer.py --char=1 --v=1 $text $exp/aishell.recognizer.fd.rsl > $exp/aishell.recognizer.fd.err
echo "recognizer fd test have finished!!!"
echo "please checkout in $exp/aishell.recognizer.fd.err"
tail -n 7 $exp/aishell.recognizer.fd.err


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/recognizer_quant.sh
================================================
#!/bin/bash
set -e

data=data
exp=exp
nj=20

. utils/parse_options.sh

mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_quant_1.3.0.model/
aishell_wav_scp=aishell_test.scp
text=$data/test/text

./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj

utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer.quant.log \
recognizer_main \
    --use_fbank=true \
    --num_bins=80 \
    --cmvn_file=$model_dir/mean_std.json \
    --model_path=$model_dir/export \
    --word_symbol_table=$model_dir/unit.txt \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/recognizer.quant.rsl.ark


cat $data/split${nj}/*/recognizer.quant.rsl.ark > $exp/aishell.recognizer.quant.rsl
utils/compute-wer.py --char=1 --v=1 $text $exp/aishell.recognizer.quant.rsl > $exp/aishell.recognizer.quant.err
echo "recognizer quant test have finished!!!"
echo "please checkout in $exp/aishell.recognizer.quant.err"
tail -n 7 $exp/aishell.recognizer.quant.err


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst.sh
================================================
#!/bin/bash
set -e

data=data
exp=exp
nj=20

. utils/parse_options.sh

mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model/
aishell_wav_scp=aishell_test.scp
text=$data/test/text

./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj

lang_dir=./data/lang_test/
graph=$lang_dir/TLG.fst
word_table=$lang_dir/words.txt

if [ ! -f $graph ]; then
    # download ngram, if you want to make graph by yourself, please refer local/run_build_tlg.sh
    mkdir -p $lang_dir
    pushd $lang_dir
    wget -c https://paddlespeech.cdn.bcebos.com/speechx/examples/ngram/zh/tlg.zip
    unzip tlg.zip
    popd
fi

utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer_wfst.log \
recognizer_main \
    --use_fbank=true \
    --num_bins=80 \
    --cmvn_file=$model_dir/mean_std.json \
    --model_path=$model_dir/export.jit \
    --graph_path=$lang_dir/TLG.fst \
    --word_symbol_table=$word_table \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
    --rescoring_weight=0.0 \
    --acoustic_scale=2 \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/result_recognizer_wfst.ark


cat $data/split${nj}/*/result_recognizer_wfst.ark > $exp/aishell_recognizer_wfst
utils/compute-wer.py --char=1 --v=1 $text $exp/aishell_recognizer_wfst > $exp/aishell.recognizer_wfst.err
echo "recognizer test have finished!!!"
echo "please checkout in $exp/aishell.recognizer_wfst.err"
tail -n 7 $exp/aishell.recognizer_wfst.err


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/recognizer_wfst_fastdeploy.sh
================================================
#!/bin/bash
set -e

data=data
exp=exp
nj=20

. utils/parse_options.sh

mkdir -p $exp
ckpt_dir=./data/model
model_dir=$ckpt_dir/onnx_model/
aishell_wav_scp=aishell_test.scp
text=$data/test/text

./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj

lang_dir=./data/lang_test/
graph=$lang_dir/TLG.fst
word_table=$lang_dir/words.txt

if [ ! -f $graph ]; then
    # download ngram, if you want to make graph by yourself, please refer local/run_build_tlg.sh
    mkdir -p $lang_dir
    pushd $lang_dir
    wget -c https://paddlespeech.cdn.bcebos.com/speechx/examples/ngram/zh/tlg.zip
    unzip tlg.zip
    popd
fi

utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer_wfst_fd.log \
recognizer_main \
    --use_fbank=true \
    --num_bins=80 \
    --model_path=$model_dir \
    --graph_path=$lang_dir/TLG.fst \
    --word_symbol_table=$word_table \
    --nnet_decoder_chunk=16 \
    --receptive_field_length=7 \
    --subsampling_rate=4 \
    --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
    --rescoring_weight=0.0 \
    --acoustic_scale=2 \
    --result_wspecifier=ark,t:$data/split${nj}/JOB/result_recognizer_wfst_fd.ark


cat $data/split${nj}/*/result_recognizer_wfst_fd.ark > $exp/aishell_recognizer_wfst_fd
utils/compute-wer.py --char=1 --v=1 $text $exp/aishell_recognizer_wfst_fd > $exp/aishell.recognizer_wfst_fd.err
echo "recognizer test have finished!!!"
echo "please checkout in $exp/aishell.recognizer_wfst_fd.err"
tail -n 7 $exp/aishell.recognizer_wfst_fd.err


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/run_build_tlg.sh
================================================
#!/bin/bash
set -eo pipefail

#. path.sh

# attention, please replace the vocab is only for this script. 
# different acustic model has different vocab
ckpt_dir=data/model/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model
unit=$ckpt_dir/vocab.txt       # vocab file, line: char/spm_pice

stage=2
stop_stage=100
corpus=aishell
lexicon=data/lexicon.txt  # line: word ph0 ... phn, aishell/resource_aishell/lexicon.txt
text=data/text            # line: utt text, aishell/data_aishell/transcript/aishell_transcript_v0.8.txt filter by data/train/text

. utils/parse_options.sh

data=$PWD/data
mkdir -p $data

if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
    if [ ! -f $data/speech.ngram.zh.tar.gz ];then
        # download ngram
        pushd $data
        wget -c http://paddlespeech.cdn.bcebos.com/speechx/examples/ngram/zh/speech.ngram.zh.tar.gz
        tar xvzf speech.ngram.zh.tar.gz
        popd
    fi
fi

if [ ! -f $unit ]; then
    echo "$0: No such file $unit"
    exit 1;
fi

if ! which ngram-count; then
    # need srilm install
    pushd $MAIN_ROOT/tools
    make srilm.done
    popd
fi

echo "done."
mkdir -p data/local/dict
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
    # Prepare dict
    # line: char/spm_pices
    cp $unit data/local/dict/units.txt

    if [ ! -f $lexicon ];then
       utils/text_to_lexicon.py --has_key true --text $text --lexicon $lexicon
        echo "Generate $lexicon from $text"
    fi

    # filter by vocab
    # line: word ph0 ... phn -> line: word char0 ... charn
    utils/fst/prepare_dict.py \
        --unit_file $unit \
        --in_lexicon ${lexicon} \
        --out_lexicon data/local/dict/lexicon.txt
fi

lm=data/local/lm
mkdir -p $lm

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # Train ngram lm
    cp $text $lm/text
    local/aishell_train_lms.sh
    echo "build LM done."
fi

# build TLG
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
  # build T & L
  utils/fst/compile_lexicon_token_fst.sh \
      data/local/dict data/local/tmp data/local/lang
 
  # build G & TLG
  utils/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;

fi


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/local/split_data.sh
================================================
#!/usr/bin/env bash

set -eo pipefail

data=$1
scp=$2
split_name=$3
numsplit=$4

# save in $data/split{n}
# $scp to split
# 

if [[ ! $numsplit -gt 0 ]]; then
  echo "$0: Invalid num-split argument";
  exit 1;
fi

directories=$(for n in `seq $numsplit`; do echo $data/split${numsplit}/$n; done)
scp_splits=$(for n in `seq $numsplit`; do echo $data/split${numsplit}/$n/${split_name}; done)

# if this mkdir fails due to argument-list being too long, iterate.
if ! mkdir -p $directories >&/dev/null; then
  for n in `seq $numsplit`; do
    mkdir -p $data/split${numsplit}/$n
  done
fi

echo "utils/split_scp.pl $scp $scp_splits"
utils/split_scp.pl $scp $scp_splits


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/path.sh
================================================
# This contains the locations of binarys build required for running the examples.

unset GREP_OPTIONS

ENGINE_ROOT=$PWD/../../../
ENGINE_BUILD=$ENGINE_ROOT/build/Linux/x86_64/engine/asr

ENGINE_TOOLS=$ENGINE_ROOT/tools
TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin

[ -d $ENGINE_BUILD ] || { echo "Error: 'build/runtime' directory not found. please ensure that the project build successfully"; }

export LC_AL=C

export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/decoder:$ENGINE_BUILD/../common/frontend/audio:$ENGINE_BUILD/recognizer:../../../fc_patch/openfst/bin:$ENGINE_BUILD/../kaldi/fstbin:$ENGINE_BUILD/../kaldi/lmbin

#PADDLE_LIB_PATH=$(python -c "import os; import paddle; include_dir=paddle.sysconfig.get_include(); paddle_dir=os.path.split(include_dir)[0]; libs_dir=os.path.join(paddle_dir, 'libs'); fluid_dir=os.path.join(paddle_dir, 'fluid'); out=':'.join([libs_dir, fluid_dir]); print(out);")
export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH


================================================
FILE: runtime/examples/u2pp_ol/wenetspeech/run.sh
================================================
#!/bin/bash
set -e

. path.sh

nj=40
stage=-1
stop_stage=100

. utils/parse_options.sh

# input
data=data
exp=exp
mkdir -p $exp $data
aishell_wav_scp=aishell_test.scp

# 1. compile
if [ ! -d ${SPEECHX_BUILD} ]; then
    pushd ${SPEECHX_ROOT} 
    bash build.sh
    popd
fi

ckpt_dir=$data/model

if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
    #  download u2pp model
    if [ ! -f $ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model.tar.gz ]; then
        mkdir -p $ckpt_dir
        pushd $ckpt_dir

        wget -c https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model.tar.gz
        tar xzfv asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model.tar.gz

        popd
    fi

    # download u2pp quant model
    if [ ! -f $ckpt_dir/asr1_chunk_conformer_u2pp_wenetspeech_static_quant_1.3.0.model.tar.gz ]; then
        mkdir -p $ckpt_dir
        pushd $ckpt_dir

        wget -c https://paddlespeech.cdn.bcebos.com/s2t/wenetspeech/asr1/static/asr1_chunk_conformer_u2pp_wenetspeech_static_quant_1.3.0.model.tar.gz
        tar xzfv asr1_chunk_conformer_u2pp_wenetspeech_static_quant_1.3.0.model.tar.gz

        popd
    fi

    # test wav scp
    if [ ! -f data/wav.scp ]; then
        mkdir -p $data
        pushd $data
        wget -c https://paddlespeech.cdn.bcebos.com/PaddleAudio/zh.wav
        echo "utt1 " $PWD/zh.wav > wav.scp
        popd 
    fi

    # aishell wav scp
    if [ ! -d $data/test ]; then
        pushd $data
        wget -c https://paddlespeech.cdn.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
        unzip  aishell_test.zip
        popd

        realpath $data/test/*/*.wav > $data/wavlist
        awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
        paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
    fi
fi

if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # decode with wav input
    ./local/recognizer.sh
fi

if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # decode with wav input with quanted model
    ./local/recognizer_quant.sh
fi

if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # decode with wfst 
    ./local/recognizer_wfst.sh
fi


================================================
FILE: runtime/examples/vad/.gitignore
================================================
data


================================================
FILE: runtime/examples/vad/README.md
================================================
# Silero VAD - pre-trained enterprise-grade Voice Activity Detector

This directory provides VAD models on CPU/GPU.

![](https://user-images.githubusercontent.com/36505480/198026365-8da383e0-5398-4a12-b7f8-22c2c0059512.png)


## VAD Interface

For vad interface please see [](../../engine/vad/interface/).

### Create Handdle

```c++
PPSHandle_t PPSVadCreateInstance(const char* conf_path);
```

### Destroy Handdle

```c++
int PPSVadDestroyInstance(PPSHandle_t instance);
```

### Reset Vad State

```c++
int PPSVadReset(PPSHandle_t instance);
```

Reset Vad state before processing next `wav`.

### Get Chunk Size

```c++
int PPSVadChunkSizeSamples(PPSHandle_t instance);
```

This API will return chunk size in `sample` unit.
When do forward, we need feed `chunk size` samples, except last chunk.

### Vad Forward

```c++
PPSVadState_t PPSVadFeedForward(PPSHandle_t instance,
                                float* chunk,
                                int num_element);
```

Vad has below states:
```c++
typedef enum {
    PPS_VAD_ILLEGAL = 0,  // error
    PPS_VAD_SIL,          // silence
    PPS_VAD_START,        // start speech
    PPS_VAD_SPEECH,       // in speech
    PPS_VAD_END,          // end speech
    PPS_VAD_NUMSTATES,    // number of states
} PPSVadState_t;
```

If `PPSVadFeedForward` occur an error will return `PPS_VAD_ILLEGAL` state.


## Linux

### Build Runtime
```bash
# cd /path/to/paddlespeech/runtime
cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON
cmake --build build
```

Since VAD using FastDeploy runtime, if you have another FastDeploy Library, you can using this command to build:

```bash
# cd /path/to/paddlespeech/runtime
cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON -DFASTDEPLOY_INSTALL_DIR=/workspace//paddle/FastDeploy/build/Linux/x86_64/install
cmake --build build
```

`DFASTDEPLOY_INSTALL_DIR` is the directory of FastDeploy Library.

### Run Demo

After building success, we can do this to run demo under this example dir:

```bash 
bash run.sh
```

The output like these:

```bash
/workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(88)::SetConfig  sr=16 threshold=0.5 beam=0.15 frame_ms=32 min_silence_duration_ms=200 speech_pad_left_ms=0 speech_pad_right_ms=0[INFO] fastdeploy/runtime/runtime.cc(293)::CreateOrtBackend     Runtime initialized with Backend::ORT in Device::CPU./workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(137)::Initialize        init done.[SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] 
RTF=0.00774591
speak start: 0.32 s, end: 2.464 s | speak start: 3.296 s, end: 4.64 s | speak start: 5.408 s, end: 7.872 s | speak start: 8.192 s, end: 10.72 s   
vad_nnet_main done!
sr = 16000
frame_ms = 32
threshold = 0.5
beam = 0.15
min_silence_duration_ms = 200
speech_pad_left_ms = 0
speech_pad_right_ms = 0
model_path = ./data/silero_vad/silero_vad.onnx
param_path = (default)num_cpu_thread = 1(default)/workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(88)::SetConfig  sr=16 threshold=0.5 beam=0.15 frame_ms=32 min_silence_duration_ms=200 speech_pad_left_ms=0 speech_pad_right_ms=0[INFO] fastdeploy/runtime/runtime.cc(293)::CreateOrtBackend     Runtime initialized with Backend::ORT in Device::CPU./workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(137)::Initialize        init done.
1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 
RTF=0.00778218
vad_interface_main done!
```

## Android

When to using on Android, please setup your `NDK` enverment before, then do as below:

```bash
# cd /path/to/paddlespeech/runtime
bash build_android.sh
```

## Result

| Arch | RTF | Runtime Size |
|--|--|--|
| x86_64    | 0.00778218 |  |
| arm64-v8a | 0.00744745 | ~10.532MB |

## Machine Information

#### x86_64

The environment as below:

```text
Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              80
On-line CPU(s) list: 0-79
Thread(s) per core:  2
Core(s) per socket:  20
Socket(s):           2
NUMA node(s):        2
Vendor ID:           GenuineIntel
CPU family:          6
Model:               85
Model name:          Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz
Stepping:            7
CPU MHz:             2599.998
BogoMIPS:            5199.99
Hypervisor vendor:   KVM
Virtualization type: full
L1d cache:           32K
L1i cache:           32K
L2 cache:            1024K
L3 cache:            33792K
NUMA node0 CPU(s):   0-39
NUMA node1 CPU(s):   40-79
Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb ibrs_enhanced fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 arat umip pku ospke avx512_vnni spec_ctrl arch_capabilities
```

#### arm64-v8a

```text
Processor       : AArch64 Processor rev 14 (aarch64)
processor       : 0
BogoMIPS        : 38.40
Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
CPU implementer : 0x51
CPU architecture: 8
CPU variant     : 0xd
CPU part        : 0x805
CPU revision    : 14

processor       : 1
BogoMIPS        : 38.40
Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
CPU implementer : 0x51
CPU architecture: 8
CPU variant     : 0xd
CPU part        : 0x805
CPU revision    : 14

processor       : 2
BogoMIPS        : 38.40
Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
CPU implementer : 0x51
CPU architecture: 8
CPU variant     : 0xd
CPU part        : 0x805
CPU revision    : 14

processor       : 3
BogoMIPS        : 38.40
Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
CPU implementer : 0x51
CPU architecture: 8
CPU variant     : 0xd
CPU part        : 0x805
CPU revision    : 14

processor       : 4
BogoMIPS        : 38.40
Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
CPU implementer : 0x51
CPU architecture: 8
CPU variant     : 0xd
CPU part        : 0x804
CPU revision    : 14

processor       : 5
BogoMIPS        : 38.40
Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
CPU implementer : 0x51
CPU architecture: 8
CPU variant     : 0xd
CPU part        : 0x804
CPU revision    : 14

processor       : 6
BogoMIPS        : 38.40
Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
CPU implementer : 0x51
CPU architecture: 8
CPU variant     : 0xd
CPU part        : 0x804
CPU revision    : 14

processor       : 7
BogoMIPS        : 38.40
Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
CPU implementer : 0x51
CPU architecture: 8
CPU variant     : 0xd
CPU part        : 0x804
CPU revision    : 14

Hardware        : Qualcomm Technologies, Inc SM8150
```


## Download Pre-trained ONNX Model

For developers' testing, model exported by VAD are provided below. Developers can download them directly.

| 模型                                                         | 大小  | 备注                                                         |
| :----------------------------------------------------------- | :---- | :----------------------------------------------------------- |
| [silero-vad](https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz) | 1.8MB | This model file is sourced from [snakers4/silero-vad](https://github.com/snakers4/silero-vad)，MIT License |


## FastDeploy Runtime

For FastDeploy software and hardware requements, and pre-released library please to see [FastDeploy](https://github.com/PaddlePaddle/FastDeploy):

- 1. [FastDeploy Environment Requirements](https://github.com/PaddlePaddle/FastDeploy/docs/en/build_and_install/download_prebuilt_libraries.md).
- 2. [FastDeploy Precompiled Library](https://github.com/PaddlePaddle/FastDeploy/docs/en/build_and_install/download_prebuilt_libraries.md).


## Reference
* https://github.com/snakers4/silero-vad
* https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/audio/silero-vad/README.md


================================================
FILE: runtime/examples/vad/conf/vad.ini
================================================
[model]
model_path=./data/silero_vad/silero_vad.onnx

[vad]
sr = 16000 # 16k
frame_ms = 32 # 32, 64, 96 for 16k
threshold = 0.5
beam = 0.15
min_silence_duration_ms = 200
speech_pad_left_ms = 0
speech_pad_right_ms = 0


================================================
FILE: runtime/examples/vad/local/build.sh
================================================
ANDROID_NDK=/workspace/zhanghui/android-sdk/android-ndk-r25c
ANDROID_TOOLCHAIN=clang
FASTDEPLOY_INSTALL_DIR=./fdlib/fastdeploy-android-1.0.3-shared/
TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake

cmake -B build  -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
         -DCMAKE_BUILD_TYPE=Release \
         -DANDROID_ABI="arm64-v8a" \
         -DANDROID_NDK=${ANDROID_NDK} \
         -DANDROID_PLATFORM="android-21" \
         -DANDROID_STL=c++_shared \
         -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \
         -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} \
         -Wno-dev


================================================
FILE: runtime/examples/vad/local/build_android.sh
================================================
ANDROID_NDK=/workspace/zhanghui/android-sdk/android-ndk-r25c
FASTDEPLOY_INSTALL_DIR=./fdlib/fastdeploy-android-1.0.4-shared/

# Setting up Android toolchanin
ANDROID_ABI=arm64-v8a  # 'arm64-v8a', 'armeabi-v7a'
ANDROID_PLATFORM="android-21"  # API >= 21
ANDROID_STL=c++_shared  # 'c++_shared', 'c++_static'
ANDROID_TOOLCHAIN=clang  # 'clang' only
TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake

# Create build directory
BUILD_ROOT=build/Android
BUILD_DIR=${BUILD_ROOT}/${ANDROID_ABI}-api-21
#FASDEPLOY_INSTALL_DIR="${BUILD_DIR}/install"
#mkdir build && mkdir ${BUILD_ROOT} && mkdir ${BUILD_DIR}
mkdir -p ${BUILD_DIR}
cd ${BUILD_DIR}

# CMake configuration with Android toolchain
cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
      -DCMAKE_BUILD_TYPE=MinSizeRel \
      -DANDROID_ABI=${ANDROID_ABI} \
      -DANDROID_NDK=${ANDROID_NDK} \
      -DANDROID_PLATFORM=${ANDROID_PLATFORM} \
      -DANDROID_STL=${ANDROID_STL} \
      -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \
      -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} \
      -Wno-dev ../../..

# Build FastDeploy Android C++ SDK
make -j8


================================================
FILE: runtime/examples/vad/local/decode.sh
================================================
[File too large to display: 343 B]

================================================
FILE: runtime/examples/vad/local/download.sh
================================================
#!/bin/bash

mkdir -p data
cd data

wget -c https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz

test -e silero_vad || tar zxvf silero_vad.tgz

wget -c https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav


================================================
FILE: runtime/examples/vad/path.sh
================================================
# This contains the locations of binarys build required for running the examples.

unset GREP_OPTIONS

ENGINE_ROOT=$PWD/../../
ENGINE_BUILD=$ENGINE_ROOT/build/Linux/x86_64/engine/vad

ENGINE_TOOLS=$ENGINE_ROOT/tools
TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin

[ -d $ENGINE_BUILD ] || { echo "Error: 'build/runtime' directory not found. please ensure that the project build successfully"; }

export LC_AL=C

export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/interface

export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH


================================================
FILE: runtime/examples/vad/run.sh
================================================
#!/bin/bash
set -e

. path.sh

nj=40
stage=-1
stop_stage=100

. utils/parse_options.sh

# input
data=data
exp=exp
mkdir -p $exp $data

# 1. compile
if [ ! -d ${ENGINE_BUILD} ]; then
    pushd ${ENGINE_ROOT} 
    bash build.sh

    # build for android armv8/armv7
    # bash build_android.sh
    popd
fi


if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
    ./local/download.sh
fi


if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    ./local/decode.sh 
fi


================================================
FILE: runtime/examples/vad/vad-android-demo/.gradle/6.1.1/gc.properties
================================================


================================================
FILE: runtime/examples/vad/vad-android-demo/.gradle/buildOutputCleanup/cache.properties
================================================
#Wed Mar 29 16:03:45 CST 2023
gradle.version=6.1.1


================================================
FILE: runtime/examples/vad/vad-android-demo/.gradle/vcs-1/gc.properties
================================================


================================================
FILE: runtime/examples/vad/vad-android-demo/LICENSE.md
================================================
Copyright 2019 Georgiy Konovalov

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and 
associated documentation files (the "Software"), to deal in the Software without restriction, 
including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 
and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial 
portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT 
NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

================================================
FILE: runtime/examples/vad/vad-android-demo/README
================================================
'''
INSTALL
'''
执行目录为speechx根目录
sh build_android.sh
cp build/Android/arm64-v8a-api-21/vad-android-out/*.so examples/vad/vad-android-demo/vad/src/main/cpp/jniLibs/arm64-v8a
cp build/Android/arm64-v8a-api-21/vad-android-out/vad_interface.h examples/vad/vad-android-demo/vad/src/main/cpp/includes/

'''
USEAGE
'''
1. 根据vad资源实际放在手机中的位置，更改vad-android-demo/app/src/main/cpp/vad/src/main/cpp/native-lib.cpp中的conf_path、fp_pcm_name、fp_log_name
2. 在AndroidStudio中编译执行


================================================
FILE: runtime/examples/vad/vad-android-demo/README.md
================================================
This VAD library can process audio in real-time utilizing 
[Gaussian Mixture Model](http://en.wikipedia.org/wiki/Mixture_model#Gaussian_mixture_model) (GMM)
which helps identify presence of human speech in an audio sample that contains a mixture of speech 
and noise. VAD work offline and all processing done on device.
  
Library based on 
[WebRTC VAD](https://chromium.googlesource.com/external/webrtc/+/branch-heads/43/webrtc/common_audio/vad/) 
from Google which is reportedly one of the best available: it's fast, modern and free.
This  algorithm has  found  wide adoption and has recently become one of 
the gold-standards for delay-sensitive scenarios like web-based interaction.
  
If you are looking for a higher accuracy and faster processing time I recommend to use Deep Neural 
Networks(DNN). Please see for reference the following paper with 
[DNN vs GMM](https://www.microsoft.com/en-us/research/uploads/prod/2018/02/KoPhiliposeTashevZarar_ICASSP_2018.pdf)
comparison.

<p align="center">
<img src="https://raw.githubusercontent.com/gkonovalov/android-vad/master/demo.gif" alt="drawing" height="400"/>
</p>

## Parameters
VAD library only accepts 16-bit mono PCM audio stream and can work with next Sample Rates, Frame Sizes and Classifiers. 
<table>
<tr>
<td>
&nbsp

| Valid Sample Rate  | Valid Frame Size  |   
|:-------------------|:------------------|   
| 8000Hz             | 80, 160, 240      |  
| 16000Hz            | 160, 320, 480     |   
| 32000Hz            | 320, 640, 960     |   
| 48000Hz            | 480, 960, 1440    |   
</td>
<td>
&nbsp

| Valid Classifiers |
|:------------------|
| NORMAL            |
| LOW_BITRATE       |
| AGGRESSIVE        |
| VERY_AGGRESSIVE   |
</td>
</tr>
</table>


**Silence duration (ms)** - this parameter used in Continuous Speech detector,
the value of this parameter will define the necessary and sufficient 
duration of negative results to recognize it as silence.
 
**Voice duration (ms)** - this parameter used in Continuous Speech detector,
the value of this parameter will define the necessary and sufficient 
duration of positive results to recognize result as speech.

Recommended parameters:
* Sample Rate - **16KHz**,
* Frame Size - **160**,
* Mode - **VERY_AGGRESSIVE**,
* Silence Duration - **500ms**,
* Voice Duration - **500ms**;

## Usage
VAD supports 2 different ways of detecting speech:
1. Continuous Speech listener was designed to detect long utterances 
without returning false positive results when user makes pauses between 
sentences.
```java
 Vad vad = new Vad(VadConfig.newBuilder()
                .setSampleRate(VadConfig.SampleRate.SAMPLE_RATE_16K)
                .setFrameSize(VadConfig.FrameSize.FRAME_SIZE_160)
                .setMode(VadConfig.Mode.VERY_AGGRESSIVE)
                .setSilenceDurationMillis(500)
                .setVoiceDurationMillis(500)
                .build());

        vad.start();
        
        vad.addContinuousSpeechListener(short[] audioFrame, new VadListener() {
            @Override
            public void onSpeechDetected() {
                //speech detected!
            }

            @Override
            public void onNoiseDetected() {
                //noise detected!
            }
        });
        
        vad.stop();
```

2. Speech detector was designed to detect speech/noise in small audio 
frames and return result for every frame. This method will not work for 
long utterances.
```java
 Vad vad = new Vad(VadConfig.newBuilder()
                .setSampleRate(VadConfig.SampleRate.SAMPLE_RATE_16K)
                .setFrameSize(VadConfig.FrameSize.FRAME_SIZE_160)
                .setMode(VadConfig.Mode.VERY_AGGRESSIVE)
                .build());

        vad.start();
        
        boolean isSpeech = vad.isSpeech(short[] audioFrame);
        
        vad.stop();
```
## Requirements
Android VAD supports Android 4.1 (Jelly Bean) and later.

## Development

To open the project in Android Studio:

1. Go to *File* menu or the *Welcome Screen*
2. Click on *Open...*
3. Navigate to VAD's root directory.
4. Select `setting.gradle`

## Download
[![](https://jitpack.io/v/gkonovalov/android-vad.svg)](https://jitpack.io/#gkonovalov/android-vad)


Gradle is the only supported build configuration, so just add the dependency to your project `build.gradle` file:
1. Add it in your root build.gradle at the end of repositories:
```groovy
allprojects {
   repositories {
     maven { url 'https://jitpack.io' }
   }
}
```

2. Add the dependency
```groovy
dependencies {
    implementation 'com.github.gkonovalov:android-vad:1.0.1'
}
```

You also can download precompiled AAR library and APK files from GitHub's [releases page](https://github.com/gkonovalov/android-vad/releases).

------------
Georgiy Konovalov 2021 (c) [MIT License](https://opensource.org/licenses/MIT)

================================================
FILE: runtime/examples/vad/vad-android-demo/build.gradle
================================================
// Top-level build file where you can add configuration options common to all sub-projects/modules.

buildscript {
    repositories {
        google()
        jcenter()
        maven { url 'https://jitpack.io' }
    }

    dependencies {
        classpath 'com.android.tools.build:gradle:4.0.2'
        classpath 'com.github.dcendents:android-maven-gradle-plugin:2.1'
        // NOTE: Do not place your application dependencies here; they belong
        // in the individual module build.gradle files
    }
}

allprojects {
    repositories {
        google()
        jcenter()
        maven { url 'https://jitpack.io' }
    }
}

task clean(type: Delete) {
    delete rootProject.buildDir
}


================================================
FILE: runtime/examples/vad/vad-android-demo/example/.gitignore
================================================
/build


================================================
FILE: runtime/examples/vad/vad-android-demo/example/build.gradle
================================================
apply plugin: 'com.android.application'

android {
    compileSdkVersion 29
    buildToolsVersion "29.0.2"
    defaultConfig {
        applicationId "com.konovalov.vad.example"
        minSdkVersion 16
        targetSdkVersion 29
        versionCode 2
        versionName "1.0.1"

        setProperty("archivesBaseName", "Android-VAD-v" + versionName)

        testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
    }
    buildTypes {
        release {
            minifyEnabled false
            proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
        }
    }
}

dependencies {
    implementation fileTree(dir: 'libs', include: ['*.jar'])
    implementation 'androidx.appcompat:appcompat:1.3.0'
    implementation 'androidx.constraintlayout:constraintlayout:2.0.4'

    implementation 'com.airbnb.android:lottie:3.4.0'
    implementation 'org.permissionsdispatcher:permissionsdispatcher:4.8.0'
    annotationProcessor 'org.permissionsdispatcher:permissionsdispatcher-processor:4.8.0'
    implementation project(path: ':vad')

    testImplementation 'junit:junit:4.12'
    androidTestImplementation 'androidx.test:runner:1.3.0'
    androidTestImplementation 'androidx.test.espresso:espresso-core:3.3.0'
    implementation 'com.google.android.material:material:1.3.0'
}


================================================
FILE: runtime/examples/vad/vad-android-demo/example/local.properties
================================================
## This file must *NOT* be checked into Version Control Systems,
# as it contains information specific to your local configuration.
#
# Location of the SDK. This is only used by Gradle.
# For customization when using a Version Control System, please read the
# header note.
#Thu Mar 30 16:37:21 CST 2023
sdk.dir=/Users/masimeng/Library/Android/sdk
ndk.dir=/Users/masimeng/Library/Android/sdk/ndk/23.1.7779620/


================================================
FILE: runtime/examples/vad/vad-android-demo/example/proguard-rules.pro
================================================
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
#   http://developer.android.com/guide/developing/tools/proguard.html

# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
#   public *;
#}

# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable

# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile


================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/androidTest/java/com/konovalov/vad/example/ExampleInstrumentedTest.java
================================================
package com.konovalov.vad.example;

import android.content.Context;

import androidx.test.platform.app.InstrumentationRegistry;
import androidx.test.runner.AndroidJUnit4;

import org.junit.Test;
import org.junit.runner.RunWith;

import static org.junit.Assert.*;

/**
 * Instrumented test, which will execute on an Android device.
 *
 * @see <a href="http://d.android.com/tools/testing">Testing documentation</a>
 */
@RunWith(AndroidJUnit4.class)
public class ExampleInstrumentedTest {
    @Test
    public void useAppContext() {
        // Context of the app under test.
        Context appContext = InstrumentationRegistry.getInstrumentation().getTargetContext();

        assertEquals("com.konovalov.vad.example", appContext.getPackageName());
    }
}


================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/AndroidManifest.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:tools="http://schemas.android.com/tools"
    package="com.konovalov.vad.example">

    <uses-permission android:name="android.permission.RECORD_AUDIO" />
    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE"></uses-permission>
    <uses-permission android:name="android.permission.MOUNT_UNMOUNT_FILESYSTEMS"
        tools:ignore="ProtectedPermissions"></uses-permission>

    <application
        android:allowBackup="true"
        android:icon="@mipmap/ic_launcher"
        android:label="@string/app_name"
        android:roundIcon="@mipmap/ic_launcher_round"
        android:screenOrientation="portrait"
        android:theme="@style/AppTheme.NoActionBar">
        <activity android:name=".MainActivity">
            <intent-filter>
                <action android:name="android.intent.action.MAIN" />

                <category android:name="android.intent.category.LAUNCHER" />
            </intent-filter>
        </activity>
    </application>

</manifest>

================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/java/com/konovalov/vad/example/MainActivity.java
================================================
[File too large to display: 7.4 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/java/com/konovalov/vad/example/recorder/VoiceRecorder.java
================================================
package com.konovalov.vad.example.recorder;

import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;
import android.util.Log;

import com.konovalov.vad.example.recorder.VoiceRecorderConfig;

import com.konovalov.vad.Vad;
//import com.konovalov.vad.VadConfig;
import com.konovalov.vad.VadListener;

import static android.media.AudioFormat.CHANNEL_IN_MONO;
import static android.media.AudioFormat.CHANNEL_IN_STEREO;

import java.io.File;

/**
 * Created by George Konovalov on 11/16/2019.
 */

public class VoiceRecorder {
    private static final int PCM_CHANNEL = CHANNEL_IN_MONO;
    private static final int PCM_ENCODING_BIT = AudioFormat.ENCODING_PCM_16BIT;

    private VoiceRecorderConfig config;
    private Vad vad;
    private AudioRecord audioRecord;
    private Listener callback;
    private Thread thread;

    private boolean isListening = false;

    private static final String TAG = VoiceRecorder.class.getSimpleName();

    public VoiceRecorder(Listener callback, VoiceRecorderConfig config) {
        this.callback = callback;
        this.config = config;
        this.vad = new Vad();
    }

    public void updateConfig(VoiceRecorderConfig config) {
        this.config = config;
    }

    public void start() {
        stop();
        audioRecord = createAudioRecord();
        if (audioRecord != null) {
            isListening = true;
            audioRecord.startRecording();

            thread = new Thread(new ProcessVoice());
            thread.start();
            vad.start();
        } else {
            Log.w(TAG, "Failed start Voice Recorder!");
        }
    }


    public void stop() {
        isListening = false;
        if (thread != null) {
            thread.interrupt();
            thread = null;
        }
        if (audioRecord != null) {
            try {
                audioRecord.release();
            } catch (Exception e) {
                Log.e(TAG, "Error stop AudioRecord ", e);
            }
            audioRecord = null;
        }
        if (vad != null) {
            vad.stop();
        }
    }


    private AudioRecord createAudioRecord() {
        try {
            final int minBufSize = AudioRecord.getMinBufferSize(config.getSampleRate().getValue(), PCM_CHANNEL, PCM_ENCODING_BIT);
            int frame_size = config.getFrameSize().getValue();
            if (minBufSize > frame_size) {
                Log.e(TAG, "minBufSize > frame_size");
                return null;
            }
            Log.i(TAG, "minBufSize : " + minBufSize);
            final AudioRecord audioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, config.getSampleRate().getValue(), PCM_CHANNEL, PCM_ENCODING_BIT, frame_size);
            Log.i(TAG, "config.getSampleRate().getValue() : " + config.getSampleRate().getValue());

            if (audioRecord.getState() == AudioRecord.STATE_INITIALIZED) {
                return audioRecord;
            } else {
                audioRecord.release();
            }
        } catch (IllegalArgumentException e) {
            Log.e(TAG, "Error can't create AudioRecord ", e);
        }

        return null;
    }

    private int getNumberOfChannels() {
        switch (PCM_CHANNEL) {
            case CHANNEL_IN_MONO:
                return 1;
            case CHANNEL_IN_STEREO:
                return 2;
        }
        return 1;
    }

    private class ProcessVoice implements Runnable {

        @Override
        public void run() {
            android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_AUDIO);
            final int minBufSize = AudioRecord.getMinBufferSize(config.getSampleRate().getValue(), PCM_CHANNEL, PCM_ENCODING_BIT);
            while (!Thread.interrupted() && isListening && audioRecord != null) {
                short[] buffer = new short[config.getFrameSize().getValue()];
                audioRecord.read(buffer, 0, buffer.length);
                detectSpeech(buffer);
            }
        }

        private void detectSpeech(short[] buffer) {
            vad.addContinuousSpeechListener(buffer, new VadListener() {
                @Override
                public void onSpeechDetected() {
                    callback.onSpeechDetected();
                }

                @Override
                public void onNoiseDetected() {
                    callback.onNoiseDetected();
                }
            });
        }
    }

    public interface Listener {
        void onSpeechDetected();

        void onNoiseDetected();
    }

}


================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/java/com/konovalov/vad/example/recorder/VoiceRecorderConfig.java
================================================
package com.konovalov.vad.example.recorder;


/**
 * Created by George Konovalov on 11/16/2019.
 */

public class VoiceRecorderConfig {
    private SampleRate sampleRate;
    private Mode mode;
    private FrameSize frameSize;
    private int voiceDurationMillis;
    private int silenceDurationMillis;

    public VoiceRecorderConfig() {
    }

    public VoiceRecorderConfig(Builder builder) {
        this.voiceDurationMillis = builder.voiceDurationMillis;
        this.silenceDurationMillis = builder.silenceDurationMillis;
        this.sampleRate = builder.sampleRate;
        this.frameSize = builder.frameSize;
        this.mode = builder.mode;
    }

    public SampleRate getSampleRate() {
        return sampleRate;
    }

    public Mode getMode() {
        return mode;
    }

    public FrameSize getFrameSize() {
        return frameSize;
    }

    public int getVoiceDurationMillis() {
        return voiceDurationMillis;
    }

    public int getSilenceDurationMillis() {
        return silenceDurationMillis;
    }

    public void setSampleRate(SampleRate sampleRate) {
        this.sampleRate = sampleRate;
    }

    public void setMode(Mode mode) {
        this.mode = mode;
    }

    public void setFrameSize(FrameSize frameSize) {
        this.frameSize = frameSize;
    }

    public void setVoiceDurationMillis(int voiceDurationMillis) {
        this.voiceDurationMillis = voiceDurationMillis;
    }

    public void setSilenceDurationMillis(int silenceDurationMillis) {
        this.silenceDurationMillis = silenceDurationMillis;
    }

    public static Builder newBuilder() {
        return new Builder();
    }

    public static class Builder {
        private SampleRate sampleRate = SampleRate.SAMPLE_RATE_16K;
        private Mode mode = Mode.VERY_AGGRESSIVE;
        private FrameSize frameSize;
        private int voiceDurationMillis = 500;
        private int silenceDurationMillis = 500;

        private Builder() {
        }

        public Builder setSampleRate(SampleRate sampleRate) {
            this.sampleRate = sampleRate;
            return this;
        }

        public Builder setMode(Mode mode) {
            this.mode = mode;
            return this;
        }

        public Builder setFrameSize(FrameSize frameSize) {
            this.frameSize = frameSize;
            return this;
        }

        public Builder setVoiceDurationMillis(int voiceDurationMillis) {
            this.voiceDurationMillis = voiceDurationMillis;
            return this;
        }

        public Builder setSilenceDurationMillis(int silenceDurationMillis) {
            this.silenceDurationMillis = silenceDurationMillis;
            return this;
        }

        public VoiceRecorderConfig build() {
            return new VoiceRecorderConfig(this);
        }
    }

    public enum SampleRate {
        SAMPLE_RATE_16K(16000);

        private int sampleRate;

        public int getValue() {
            return sampleRate;
        }

        SampleRate(int sampleRate) {
            this.sampleRate = sampleRate;
        }
    }

    public enum Mode {
        NORMAL(0),
        LOW_BITRATE(1),
        AGGRESSIVE(2),
        VERY_AGGRESSIVE(3);

        private int mode;

        public int getValue() {
            return mode;
        }

        Mode(int mode) {
            this.mode = mode;
        }
    }

    public enum FrameSize {
        FRAME_SIZE_1536(1536);

        private int frameSize;

        public int getValue() {
            return frameSize;
        }

        FrameSize(int frameSize) {
            this.frameSize = frameSize;
        }
    }


}


================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/res/drawable/ic_launcher_background.xml
================================================
[File too large to display: 5.5 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/res/drawable-v24/ic_launcher_foreground.xml
================================================
<vector xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:aapt="http://schemas.android.com/aapt"
    android:width="108dp"
    android:height="108dp"
    android:viewportWidth="108"
    android:viewportHeight="108">
    <path
        android:fillType="evenOdd"
        android:pathData="M32,64C32,64 38.39,52.99 44.13,50.95C51.37,48.37 70.14,49.57 70.14,49.57L108.26,87.69L108,109.01L75.97,107.97L32,64Z"
        android:strokeWidth="1"
        android:strokeColor="#00000000">
        <aapt:attr name="android:fillColor">
            <gradient
                android:endX="78.5885"
                android:endY="90.9159"
                android:startX="48.7653"
                android:startY="61.0927"
                android:type="linear">
                <item
                    android:color="#44000000"
                    android:offset="0.0" />
                <item
                    android:color="#00000000"
                    android:offset="1.0" />
            </gradient>
        </aapt:attr>
    </path>
    <path
        android:fillColor="#FFFFFF"
        android:fillType="nonZero"
        android:pathData="M66.94,46.02L66.94,46.02C72.44,50.07 76,56.61 76,64L32,64C32,56.61 35.56,50.11 40.98,46.06L36.18,41.19C35.45,40.45 35.45,39.3 36.18,38.56C36.91,37.81 38.05,37.81 38.78,38.56L44.25,44.05C47.18,42.57 50.48,41.71 54,41.71C57.48,41.71 60.78,42.57 63.68,44.05L69.11,38.56C69.84,37.81 70.98,37.81 71.71,38.56C72.44,39.3 72.44,40.45 71.71,41.19L66.94,46.02ZM62.94,56.92C64.08,56.92 65,56.01 65,54.88C65,53.76 64.08,52.85 62.94,52.85C61.8,52.85 60.88,53.76 60.88,54.88C60.88,56.01 61.8,56.92 62.94,56.92ZM45.06,56.92C46.2,56.92 47.13,56.01 47.13,54.88C47.13,53.76 46.2,52.85 45.06,52.85C43.92,52.85 43,53.76 43,54.88C43,56.01 43.92,56.92 45.06,56.92Z"
        android:strokeWidth="1"
        android:strokeColor="#00000000" />
</vector>


================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/res/layout/activity_main.xml
================================================
[File too large to display: 5.0 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/res/mipmap-anydpi-v26/ic_launcher.xml
================================================
[File too large to display: 272 B]

================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml
================================================
[File too large to display: 272 B]

================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/res/values/colors.xml
================================================
<?xml version="1.0" encoding="utf-8"?>
<resources>
    <color name="colorPrimary">#000000</color>
    <color name="colorPrimaryDark">#00574B</color>
    <color name="colorAccent">#FFFFFF</color>
    <color name="colorPrimaryText">#FFFFFF</color>
</resources>


================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/res/values/strings.xml
================================================
<resources>
    <string name="app_name">Android VAD</string>
    <string name="speech_detected">Speech detected!</string>
    <string name="noise_detected">Noise detected!</string>
    <string name="press_button_to_start">Press button to start VAD!</string>
    <string name="start">Start</string>
    <string name="stop">Stop</string>
    <string name="sample_rate">Sample Rate:</string>
    <string name="frame_size">Frame Size:</string>
    <string name="mode">Mode:</string>
</resources>


================================================
FILE: runtime/examples/vad/vad-android-demo/example/src/main/res/values/styles.xml
================================================
[File too large to display: 673 B]

================================================
FILE: runtime/examples/vad/vad-android-demo/gradle/wrapper/gradle-wrapper.properties
================================================
[File too large to display: 232 B]

================================================
FILE: runtime/examples/vad/vad-android-demo/gradle.properties
================================================
[File too large to display: 1.0 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/gradlew
================================================
[File too large to display: 5.2 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/gradlew.bat
================================================
[File too large to display: 2.1 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/local.properties
================================================
## This file must *NOT* be checked into Version Control Systems,
# as it contains information specific to your local configuration.
#
# Location of the SDK. This is only used by Gradle.
# For customization when using a Version Control System, please read the
# header note.
#Wed Mar 29 16:03:45 CST 2023
sdk.dir=/Users/masimeng/Library/Android/sdk
ndk.dir=/Users/masimeng/Library/Android/sdk/ndk/20.0.5594570/


================================================
FILE: runtime/examples/vad/vad-android-demo/settings.gradle
================================================
[File too large to display: 50 B]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/.gitignore
================================================
[File too large to display: 7 B]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/build.gradle
================================================
[File too large to display: 1.6 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/consumer-rules.pro
================================================


================================================
FILE: runtime/examples/vad/vad-android-demo/vad/proguard-rules.pro
================================================
[File too large to display: 751 B]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/src/androidTest/java/com/konovalov/vad/ExampleInstrumentedTest.java
================================================
[File too large to display: 755 B]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/src/main/AndroidManifest.xml
================================================
[File too large to display: 104 B]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/src/main/cpp/CMakeLists.txt
================================================
[File too large to display: 2.1 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/src/main/cpp/includes/vad_interface.h
================================================
[File too large to display: 1.3 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/src/main/cpp/native-lib.cpp
================================================
[File too large to display: 4.4 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/src/main/java/com/konovalov/vad/Vad.java
================================================
[File too large to display: 2.4 KB]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/src/main/java/com/konovalov/vad/VadListener.java
================================================
[File too large to display: 175 B]

================================================
FILE: runtime/examples/vad/vad-android-demo/vad/src/main/res/values/strings.xml
================================================
[File too large to display: 66 B]

================================================
FILE: runtime/patch/CPPLINT.cfg
================================================
[File too large to display: 17 B]

================================================
FILE: runtime/patch/README.md
================================================
[File too large to display: 61 B]

================================================
FILE: runtime/patch/openfst/src/include/fst/flags.h
================================================
[File too large to display: 6.6 KB]

================================================
FILE: runtime/patch/openfst/src/include/fst/log.h
================================================
[File too large to display: 2.2 KB]

================================================
FILE: runtime/patch/openfst/src/lib/flags.cc
================================================
[File too large to display: 5.5 KB]

================================================
FILE: runtime/tools/clang-format.sh
================================================
[File too large to display: 107 B]

================================================
FILE: runtime/tools/setup_valgrind.sh
================================================
[File too large to display: 413 B]

================================================
FILE: runtime/tools/venv.sh
================================================
[File too large to display: 83 B]

================================================
FILE: setup.cfg
================================================
[File too large to display: 180 B]

================================================
FILE: setup.py
================================================
[File too large to display: 10.3 KB]

================================================
FILE: tests/benchmark/conformer/README.md
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/benchmark/conformer/prepare.sh
================================================
[File too large to display: 170 B]

================================================
FILE: tests/benchmark/conformer/run.sh
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/benchmark/conformer/run_benchmark.sh
================================================
[File too large to display: 3.1 KB]

================================================
FILE: tests/benchmark/pwgan/README.md
================================================
[File too large to display: 338 B]

================================================
FILE: tests/benchmark/pwgan/run_all.sh
================================================
[File too large to display: 3.2 KB]

================================================
FILE: tests/benchmark/pwgan/run_benchmark.sh
================================================
[File too large to display: 3.1 KB]

================================================
FILE: tests/chains/ds2/README.md
================================================
[File too large to display: 125 B]

================================================
FILE: tests/chains/ds2/ds2_params_lite_train_infer.txt
================================================
[File too large to display: 1.4 KB]

================================================
FILE: tests/chains/ds2/ds2_params_whole_train_infer.txt
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tests/chains/ds2/lite_train_infer.sh
================================================
[File too large to display: 250 B]

================================================
FILE: tests/chains/ds2/prepare.sh
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/chains/ds2/speedyspeech_params_lite.txt
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/chains/ds2/test.sh
================================================
[File too large to display: 15.2 KB]

================================================
FILE: tests/chains/ds2/whole_train_infer.sh
================================================
[File too large to display: 244 B]

================================================
FILE: tests/chains/speedyspeech/README.md
================================================
[File too large to display: 209 B]

================================================
FILE: tests/chains/speedyspeech/infer.sh
================================================
[File too large to display: 81 B]

================================================
FILE: tests/chains/speedyspeech/lite_train_infer.sh
================================================
[File too large to display: 216 B]

================================================
FILE: tests/chains/speedyspeech/prepare.sh
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/chains/speedyspeech/speedyspeech_params_lite_multi_gpu.txt
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/chains/speedyspeech/speedyspeech_params_lite_single_gpu.txt
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tests/chains/speedyspeech/speedyspeech_params_whole_multi_gpu.txt
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/chains/speedyspeech/speedyspeech_params_whole_single_gpu.txt
================================================
[File too large to display: 2.2 KB]

================================================
FILE: tests/chains/speedyspeech/test.sh
================================================
[File too large to display: 15.0 KB]

================================================
FILE: tests/chains/speedyspeech/whole_train_infer.sh
================================================
[File too large to display: 221 B]

================================================
FILE: tests/test_tipc/barrier.sh
================================================
[File too large to display: 246 B]

================================================
FILE: tests/test_tipc/benchmark_train.sh
================================================
[File too large to display: 9.5 KB]

================================================
FILE: tests/test_tipc/common_func.sh
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/test_tipc/configs/conformer/train_infer_python.txt
================================================
[File too large to display: 1.2 KB]

================================================
FILE: tests/test_tipc/configs/mdtc/train_infer_python.txt
================================================
[File too large to display: 895 B]

================================================
FILE: tests/test_tipc/configs/pwgan/train_infer_python.txt
================================================
[File too large to display: 1.3 KB]

================================================
FILE: tests/test_tipc/conformer/scripts/aishell_tiny.py
================================================
[File too large to display: 5.5 KB]

================================================
FILE: tests/test_tipc/docs/benchmark_train.md
================================================
[File too large to display: 2.5 KB]

================================================
FILE: tests/test_tipc/prepare.sh
================================================
[File too large to display: 4.8 KB]

================================================
FILE: tests/test_tipc/test_train_inference_python.sh
================================================
[File too large to display: 18.0 KB]

================================================
FILE: tests/unit/asr/deepspeech2_model_test.py
================================================
[File too large to display: 3.4 KB]

================================================
FILE: tests/unit/asr/deepspeech2_online_model_test.py
================================================
[File too large to display: 9.7 KB]

================================================
FILE: tests/unit/asr/deepspeech2_online_model_test.sh
================================================
[File too large to display: 171 B]

================================================
FILE: tests/unit/asr/error_rate_test.py
================================================
[File too large to display: 4.9 KB]

================================================
FILE: tests/unit/asr/mask_test.py
================================================
[File too large to display: 1.9 KB]

================================================
FILE: tests/unit/asr/reverse_pad_list.py
================================================
[File too large to display: 6.0 KB]

================================================
FILE: tests/unit/asr/u2_model_test.py
================================================
[File too large to display: 5.4 KB]

================================================
FILE: tests/unit/audiotools/core/test_audio_signal.py
================================================
[File too large to display: 19.8 KB]

================================================
FILE: tests/unit/audiotools/core/test_bands.py
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tests/unit/audiotools/core/test_display.py
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tests/unit/audiotools/core/test_dsp.py
================================================
[File too large to display: 6.2 KB]

================================================
FILE: tests/unit/audiotools/core/test_effects.py
================================================
[File too large to display: 10.5 KB]

================================================
FILE: tests/unit/audiotools/core/test_fftconv.py
================================================
[File too large to display: 2.8 KB]

================================================
FILE: tests/unit/audiotools/core/test_grad.py
================================================
[File too large to display: 4.8 KB]

================================================
FILE: tests/unit/audiotools/core/test_highpass.py
================================================
[File too large to display: 3.3 KB]

================================================
FILE: tests/unit/audiotools/core/test_loudness.py
================================================
[File too large to display: 7.9 KB]

================================================
FILE: tests/unit/audiotools/core/test_lowpass.py
================================================
[File too large to display: 3.7 KB]

================================================
FILE: tests/unit/audiotools/core/test_util.py
================================================
[File too large to display: 4.3 KB]

================================================
FILE: tests/unit/audiotools/data/test_datasets.py
================================================
[File too large to display: 6.2 KB]

================================================
FILE: tests/unit/audiotools/data/test_preprocess.py
================================================
[File too large to display: 1.1 KB]

================================================
FILE: tests/unit/audiotools/data/test_transforms.py
================================================
[File too large to display: 14.4 KB]

================================================
FILE: tests/unit/audiotools/ml/test_decorators.py
================================================
[File too large to display: 3.2 KB]

================================================
FILE: tests/unit/audiotools/ml/test_model.py
================================================
[File too large to display: 2.4 KB]

================================================
FILE: tests/unit/audiotools/test_audiotools.sh
================================================
[File too large to display: 235 B]

================================================
FILE: tests/unit/audiotools/test_post.py
================================================
[File too large to display: 860 B]

================================================
FILE: tests/unit/ci.sh
================================================
[File too large to display: 865 B]

================================================
FILE: tests/unit/cli/aishell_test_prepare.py
================================================
[File too large to display: 5.0 KB]

================================================
FILE: tests/unit/cli/calc_RTF_CER_by_aishell.sh
================================================
[File too large to display: 1.0 KB]

================================================
FILE: tests/unit/cli/path.sh
================================================
[File too large to display: 346 B]

================================================
FILE: tests/unit/cli/test_cli.sh
================================================
[File too large to display: 6.8 KB]

================================================
FILE: tests/unit/doc/test_cli.md
================================================
[File too large to display: 666 B]

================================================
FILE: tests/unit/server/offline/change_yaml.py
================================================
[File too large to display: 3.8 KB]

================================================
FILE: tests/unit/server/offline/conf/application.yaml
================================================
[File too large to display: 4.8 KB]

================================================
FILE: tests/unit/server/offline/test_server_client.sh
================================================
[File too large to display: 7.6 KB]

================================================
FILE: tests/unit/server/online/tts/check_server/change_yaml.py
================================================
[File too large to display: 3.3 KB]

================================================
FILE: tests/unit/server/online/tts/check_server/conf/application.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/unit/server/online/tts/check_server/test.sh
================================================
[File too large to display: 10.9 KB]

================================================
FILE: tests/unit/server/online/tts/check_server/test_all.sh
================================================
[File too large to display: 1.0 KB]

================================================
FILE: tests/unit/server/online/tts/check_server/tts_online_application.yaml
================================================
[File too large to display: 2.9 KB]

================================================
FILE: tests/unit/server/online/tts/test_server/test_http_client.py
================================================
[File too large to display: 6.3 KB]

================================================
FILE: tests/unit/tts/test_data_table.py
================================================
[File too large to display: 925 B]

================================================
FILE: tests/unit/tts/test_enfrontend.py
================================================
[File too large to display: 927 B]

================================================
FILE: tests/unit/tts/test_expansion.py
================================================
[File too large to display: 970 B]

================================================
FILE: tests/unit/tts/test_fftconv1d.py
================================================
[File too large to display: 4.6 KB]

================================================
FILE: tests/unit/tts/test_losses.py
================================================
[File too large to display: 2.0 KB]

================================================
FILE: tests/unit/tts/test_mixfrontend.py
================================================
[File too large to display: 6.0 KB]

================================================
FILE: tests/unit/tts/test_optimizer.py
================================================
[File too large to display: 1.2 KB]

================================================
FILE: tests/unit/tts/test_pwg.py
================================================
[File too large to display: 7.5 KB]

================================================
FILE: tests/unit/tts/test_raise.py
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/unit/tts/test_reporter.py
================================================
[File too large to display: 1.6 KB]

================================================
FILE: tests/unit/tts/test_snapshot.py
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/unit/tts/test_ssml.py
================================================
[File too large to display: 4.0 KB]

================================================
FILE: tests/unit/tts/test_stft.py
================================================
[File too large to display: 1.1 KB]

================================================
FILE: tests/unit/tts/test_to_static.py
================================================
[File too large to display: 1.8 KB]

================================================
FILE: tests/unit/vector/conftest.py
================================================
[File too large to display: 1.0 KB]

================================================
FILE: tests/unit/vector/test_augment.py
================================================
[File too large to display: 4.8 KB]

================================================
FILE: third_party/README.md
================================================
[File too large to display: 579 B]

================================================
FILE: third_party/__init__.py
================================================


================================================
FILE: third_party/ctc_decoders/.gitignore
================================================
[File too large to display: 127 B]

================================================
FILE: third_party/ctc_decoders/COPYING.APACHE2.0
================================================
[File too large to display: 11.1 KB]

================================================
FILE: third_party/ctc_decoders/COPYING.LESSER.3
================================================
[File too large to display: 7.5 KB]

================================================
FILE: third_party/ctc_decoders/LICENSE
================================================
[File too large to display: 329 B]

================================================
FILE: third_party/ctc_decoders/__init__.py
================================================
[File too large to display: 610 B]

================================================
FILE: third_party/ctc_decoders/ctc_beam_search_decoder.cpp
================================================
[File too large to display: 22.7 KB]

================================================
FILE: third_party/ctc_decoders/ctc_beam_search_decoder.h
================================================
[File too large to display: 5.6 KB]

================================================
FILE: third_party/ctc_decoders/ctc_greedy_decoder.cpp
================================================
[File too large to display: 2.2 KB]

================================================
FILE: third_party/ctc_decoders/ctc_greedy_decoder.h
================================================
[File too large to display: 1.2 KB]

================================================
FILE: third_party/ctc_decoders/decoder_utils.cpp
================================================
[File too large to display: 6.0 KB]

================================================
FILE: third_party/ctc_decoders/decoder_utils.h
================================================
[File too large to display: 3.8 KB]

================================================
FILE: third_party/ctc_decoders/decoders.i
================================================
[File too large to display: 1.2 KB]

================================================
FILE: third_party/ctc_decoders/path_trie.cpp
================================================
[File too large to display: 5.1 KB]

================================================
FILE: third_party/ctc_decoders/path_trie.h
================================================
[File too large to display: 2.3 KB]

================================================
FILE: third_party/ctc_decoders/scorer.cpp
================================================
[File too large to display: 7.1 KB]

================================================
FILE: third_party/ctc_decoders/scorer.h
================================================
[File too large to display: 3.2 KB]

================================================
FILE: third_party/ctc_decoders/setup.py
================================================
[File too large to display: 4.3 KB]

================================================
FILE: third_party/ctc_decoders/setup.sh
================================================
[File too large to display: 607 B]

================================================
FILE: third_party/install.sh
================================================
[File too large to display: 202 B]

================================================
FILE: third_party/install_win_ctc.bat
================================================
[File too large to display: 453 B]

================================================
FILE: third_party/python_kaldi_features/.gitignore
================================================
[File too large to display: 46 B]

================================================
FILE: third_party/python_kaldi_features/LICENSE
================================================
[File too large to display: 1.1 KB]

================================================
FILE: third_party/python_kaldi_features/MANIFEST
================================================
[File too large to display: 152 B]

================================================
FILE: third_party/python_kaldi_features/README.rst
================================================
[File too large to display: 2.4 KB]

================================================
FILE: third_party/python_kaldi_features/docs/Makefile
================================================
[File too large to display: 3.1 KB]

================================================
FILE: third_party/python_kaldi_features/docs/make.bat
================================================
[File too large to display: 3.0 KB]

================================================
FILE: third_party/python_kaldi_features/docs/source/conf.py
================================================
[File too large to display: 6.4 KB]

================================================
FILE: third_party/python_kaldi_features/docs/source/index.rst
================================================
[File too large to display: 1.8 KB]

================================================
FILE: third_party/python_kaldi_features/example.py
================================================
[File too large to display: 1.3 KB]

================================================
FILE: third_party/python_kaldi_features/python_speech_features/__init__.py
================================================
[File too large to display: 20 B]

================================================
FILE: third_party/python_kaldi_features/python_speech_features/base.py
================================================
[File too large to display: 9.1 KB]

================================================
FILE: third_party/python_kaldi_features/python_speech_features/base_orig.py
================================================
[File too large to display: 10.6 KB]

================================================
FILE: third_party/python_kaldi_features/python_speech_features/sigproc.py
================================================
[File too large to display: 6.8 KB]

================================================
FILE: third_party/python_kaldi_features/python_speech_features/sigproc_orig.py
================================================
[File too large to display: 6.0 KB]

================================================
FILE: third_party/python_kaldi_features/requirements.txt
================================================
[File too large to display: 17 B]

================================================
FILE: third_party/python_kaldi_features/setup.py
================================================
[File too large to display: 590 B]

================================================
FILE: third_party/python_kaldi_features/test/test_sigproc.py
================================================
[File too large to display: 1.1 KB]

================================================
FILE: tools/Dockerfile
================================================
[File too large to display: 153 B]

================================================
FILE: tools/Makefile
================================================
[File too large to display: 3.3 KB]

================================================
FILE: tools/extras/README.md
================================================
[File too large to display: 144 B]

================================================
FILE: tools/extras/install_autolog.sh
================================================
[File too large to display: 350 B]

================================================
FILE: tools/extras/install_gcc.sh
================================================
[File too large to display: 397 B]

================================================
FILE: tools/extras/install_kaldi.sh
================================================
[File too large to display: 987 B]

================================================
FILE: tools/extras/install_kenlm.sh
================================================
[File too large to display: 533 B]

================================================
FILE: tools/extras/install_liblbfgs.sh
================================================
[File too large to display: 921 B]

================================================
FILE: tools/extras/install_mfa_v1.sh
================================================
[File too large to display: 246 B]

================================================
FILE: tools/extras/install_mfa_v2.sh
================================================
[File too large to display: 372 B]

================================================
FILE: tools/extras/install_miniconda.sh
================================================
[File too large to display: 757 B]

================================================
FILE: tools/extras/install_mkl.sh
================================================
[File too large to display: 9.8 KB]

================================================
FILE: tools/extras/install_ngram.sh
================================================
[File too large to display: 660 B]

================================================
FILE: tools/extras/install_openblas.sh
================================================
[File too large to display: 1.0 KB]

================================================
FILE: tools/extras/install_openfst.sh
================================================
[File too large to display: 1.4 KB]

================================================
FILE: tools/extras/install_pynini.sh
================================================
[File too large to display: 375 B]

================================================
FILE: tools/extras/install_sclite.sh
================================================
[File too large to display: 864 B]

================================================
FILE: tools/extras/install_soundfile.sh
================================================
[File too large to display: 492 B]

================================================
FILE: tools/extras/install_sox.sh
================================================
[File too large to display: 302 B]

================================================
FILE: tools/extras/install_srilm.sh
================================================
[File too large to display: 2.8 KB]

================================================
FILE: tools/extras/install_venv.sh
================================================
[File too large to display: 761 B]

================================================
FILE: tools/extras/srilm.patch
================================================
[File too large to display: 580 B]

================================================
FILE: tools/get_contributors.ipynb
================================================
[File too large to display: 4.1 KB]

================================================
FILE: tools/pre_commit.sh
================================================
[File too large to display: 2.1 KB]

================================================
FILE: tools/release_note.py
================================================
[File too large to display: 6.1 KB]

================================================
FILE: tools/setup_anaconda.sh
================================================
[File too large to display: 1.5 KB]

================================================
FILE: tools/watermark.py
================================================
[File too large to display: 709 B]

================================================
FILE: utils/DER.py
================================================
[File too large to display: 6.0 KB]

================================================
FILE: utils/README.md
================================================
[File too large to display: 163 B]

================================================
FILE: utils/__init__.py
================================================
[File too large to display: 610 B]

================================================
FILE: utils/addjson.py
================================================
[File too large to display: 4.8 KB]

================================================
FILE: utils/apply-cmvn.py
================================================
[File too large to display: 4.8 KB]

================================================
FILE: utils/avg.sh
================================================
[File too large to display: 612 B]

================================================
FILE: utils/avg_model.py
================================================
[File too large to display: 734 B]

================================================
FILE: utils/build_kenlm_model_from_arpa.sh
================================================
[File too large to display: 1.1 KB]

================================================
FILE: utils/build_vocab.py
================================================
[File too large to display: 828 B]

================================================
FILE: utils/caculate_rtf.py
================================================
[File too large to display: 2.1 KB]

================================================
FILE: utils/compute-cmvn-stats.py
================================================
[File too large to display: 6.1 KB]

================================================
FILE: utils/compute-wer.py
================================================
[File too large to display: 155 B]

================================================
FILE: utils/compute_mean_std.py
================================================
[File too large to display: 817 B]

================================================
FILE: utils/compute_statistics.py
================================================
[File too large to display: 3.5 KB]

================================================
FILE: utils/copy-feats.py
================================================
[File too large to display: 3.4 KB]

================================================
FILE: utils/data2json.sh
================================================
[File too large to display: 4.8 KB]

================================================
FILE: utils/dump.sh
================================================
[File too large to display: 2.6 KB]

================================================
FILE: utils/dump_manifest.py
================================================
[File too large to display: 2.1 KB]

================================================
FILE: utils/duration_from_maniefst.sh
================================================
[File too large to display: 145 B]

================================================
FILE: utils/espnet_json_to_manifest.py
================================================
[File too large to display: 756 B]

================================================
FILE: utils/feat-to-shape.py
================================================
[File too large to display: 2.8 KB]

================================================
FILE: utils/feat_to_shape.sh
================================================
[File too large to display: 1.7 KB]

================================================
FILE: utils/filter.py
================================================
[File too large to display: 1.5 KB]

================================================
FILE: utils/filter_scp.pl
================================================
[File too large to display: 2.9 KB]

================================================
FILE: utils/format_data.py
================================================
[File too large to display: 780 B]

================================================
FILE: utils/format_rsl.py
================================================
[File too large to display: 713 B]

================================================
FILE: utils/format_triplet_data.py
================================================
[File too large to display: 3.7 KB]

================================================
FILE: utils/fst/add_lex_disambig.pl
================================================
[File too large to display: 6.6 KB]

================================================
FILE: utils/fst/compile_lexicon_token_fst.sh
================================================
[File too large to display: 3.7 KB]

================================================
FILE: utils/fst/ctc_token_fst.py
================================================
[File too large to display: 1.4 KB]

================================================
FILE: utils/fst/ctc_token_fst_corrected.py
================================================
[File too large to display: 2.0 KB]

================================================
FILE: utils/fst/eps2disambig.pl
================================================
[File too large to display: 1002 B]

================================================
FILE: utils/fst/make_lexicon_fst.pl
================================================
[File too large to display: 5.7 KB]

================================================
FILE: utils/fst/make_tlg.sh
================================================
[File too large to display: 2.2 KB]

================================================
FILE: utils/fst/prepare_dict.py
================================================
[File too large to display: 3.5 KB]

================================================
FILE: utils/fst/remove_oovs.pl
================================================
[File too large to display: 1.4 KB]

================================================
FILE: utils/fst/rnnt_token_fst.py
================================================
[File too large to display: 993 B]

================================================
FILE: utils/fst/s2eps.pl
================================================
[File too large to display: 1008 B]

================================================
FILE: utils/gen_duration_from_textgrid.py
================================================
[File too large to display: 4.1 KB]

================================================
FILE: utils/generate_infer_yaml.py
================================================
[File too large to display: 6.5 KB]

================================================
FILE: utils/json2trn.py
================================================
[File too large to display: 3.2 KB]

================================================
FILE: utils/link_wav.py
================================================
[File too large to display: 3.2 KB]

================================================
FILE: utils/log.sh
================================================
[File too large to display: 316 B]

================================================
FILE: utils/manifest_key_value.py
================================================
[File too large to display: 2.6 KB]

================================================
FILE: utils/md-eval.pl
================================================
[File too large to display: 105.7 KB]

================================================
FILE: utils/merge_scp2json.py
================================================
[File too large to display: 9.7 KB]

================================================
FILE: utils/ngram_train.sh
================================================
[File too large to display: 573 B]

================================================
FILE: utils/pack_model.sh
================================================
[File too large to display: 3.4 KB]

================================================
FILE: utils/parallel/run.pl
================================================
[File too large to display: 12.5 KB]

================================================
FILE: utils/parse_options.sh
================================================
[File too large to display: 3.6 KB]

================================================
FILE: utils/pd_env_collect.sh
================================================
[File too large to display: 4.2 KB]

================================================
FILE: utils/profile.sh
================================================
[File too large to display: 600 B]

================================================
FILE: utils/reduce_data_dir.sh
================================================
[File too large to display: 2.2 KB]

================================================
FILE: utils/remove_longshortdata.py
================================================
[File too large to display: 3.1 KB]

================================================
FILE: utils/remove_longshortdata.sh
================================================
[File too large to display: 1.9 KB]

================================================
FILE: utils/score_sclite.sh
================================================
[File too large to display: 4.6 KB]

================================================
FILE: utils/scp2json.py
================================================
[File too large to display: 1.4 KB]

================================================
FILE: utils/show_results.sh
================================================
[File too large to display: 1.7 KB]

================================================
FILE: utils/spk2utt_to_utt2spk.pl
================================================
[File too large to display: 835 B]

================================================
FILE: utils/split_data.sh
================================================
[File too large to display: 2.5 KB]

================================================
FILE: utils/split_json.sh
================================================
[File too large to display: 602 B]

================================================
FILE: utils/split_scp.pl
================================================
[File too large to display: 8.0 KB]

================================================
FILE: utils/spm_decode
================================================
[File too large to display: 1.4 KB]

================================================
FILE: utils/spm_encode
================================================
[File too large to display: 3.4 KB]

================================================
FILE: utils/spm_train
================================================
[File too large to display: 346 B]

================================================
FILE: utils/tarball.sh
================================================
[File too large to display: 601 B]

================================================
FILE: utils/text2token.py
================================================
[File too large to display: 3.9 KB]

================================================
FILE: utils/text_to_lexicon.py
================================================
[File too large to display: 1.1 KB]

================================================
FILE: utils/tokenizer.perl
================================================
[File too large to display: 18.0 KB]

================================================
FILE: utils/train_arpa_with_kenlm.sh
================================================
[File too large to display: 2.3 KB]

================================================
FILE: utils/update_json.sh
================================================
[File too large to display: 2.6 KB]

================================================
FILE: utils/utility.sh
================================================
[File too large to display: 488 B]

================================================
FILE: utils/utt2spk_to_spk2utt.pl
================================================
[File too large to display: 1.2 KB]

================================================
FILE: utils/zh_tn.py
================================================
[File too large to display: 66.2 KB]